From 39d2fdb51233ed9b1aaf3adaa3267853f5e58c0f Mon Sep 17 00:00:00 2001
From: frreiss <frreiss@us.ibm.com>
Date: Tue, 1 Nov 2016 23:00:17 -0700
Subject: [PATCH 0001/1204] [SPARK-17475][STREAMING] Delete CRC files if the
 filesystem doesn't use checksum files

## What changes were proposed in this pull request?

When the metadata logs for various parts of Structured Streaming are stored on non-HDFS filesystems such as NFS or ext4, the HDFSMetadataLog class leaves hidden HDFS-style checksum (CRC) files in the log directory, one file per batch. This PR modifies HDFSMetadataLog so that it detects the use of a filesystem that doesn't use CRC files and removes the CRC files.
## How was this patch tested?

Modified an existing test case in HDFSMetadataLogSuite to check whether HDFSMetadataLog correctly removes CRC files on the local POSIX filesystem.  Ran the entire regression suite.

Author: frreiss <frreiss@us.ibm.com>

Closes #15027 from frreiss/fred-17475.

(cherry picked from commit 620da3b4828b3580c7ed7339b2a07938e6be1bb1)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../spark/sql/execution/streaming/HDFSMetadataLog.scala     | 5 +++++
 .../sql/execution/streaming/HDFSMetadataLogSuite.scala      | 6 ++++++
 2 files changed, 11 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
index c7235320fd6bd..9a0f87cf0498c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
@@ -148,6 +148,11 @@ class HDFSMetadataLog[T: ClassTag](sparkSession: SparkSession, path: String)
           // It will fail if there is an existing file (someone has committed the batch)
           logDebug(s"Attempting to write log #${batchIdToPath(batchId)}")
           fileManager.rename(tempPath, batchIdToPath(batchId))
+
+          // SPARK-17475: HDFSMetadataLog should not leak CRC files
+          // If the underlying filesystem didn't rename the CRC file, delete it.
+          val crcPath = new Path(tempPath.getParent(), s".${tempPath.getName()}.crc")
+          if (fileManager.exists(crcPath)) fileManager.delete(crcPath)
           return
         } catch {
           case e: IOException if isFileAlreadyExistsException(e) =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala
index 9c1d26dcb2241..d03e08d9a576c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala
@@ -119,6 +119,12 @@ class HDFSMetadataLogSuite extends SparkFunSuite with SharedSQLContext {
       assert(metadataLog.get(1).isEmpty)
       assert(metadataLog.get(2).isDefined)
       assert(metadataLog.getLatest().get._1 == 2)
+
+      // There should be exactly one file, called "2", in the metadata directory.
+      // This check also tests for regressions of SPARK-17475
+      val allFiles = new File(metadataLog.metadataPath.toString).listFiles().toSeq
+      assert(allFiles.size == 1)
+      assert(allFiles(0).getName() == "2")
     }
   }
 

From e6509c2459e7ece3c3c6bcd143b8cc71f8f4d5c8 Mon Sep 17 00:00:00 2001
From: Eric Liang <ekl@databricks.com>
Date: Wed, 2 Nov 2016 14:15:10 +0800
Subject: [PATCH 0002/1204] [SPARK-18183][SPARK-18184] Fix INSERT
 [INTO|OVERWRITE] TABLE ... PARTITION for Datasource tables

There are a couple issues with the current 2.1 behavior when inserting into Datasource tables with partitions managed by Hive.

(1) OVERWRITE TABLE ... PARTITION will actually overwrite the entire table instead of just the specified partition.
(2) INSERT|OVERWRITE does not work with partitions that have custom locations.

This PR fixes both of these issues for Datasource tables managed by Hive. The behavior for legacy tables or when `manageFilesourcePartitions = false` is unchanged.

There is one other issue in that INSERT OVERWRITE with dynamic partitions will overwrite the entire table instead of just the updated partitions, but this behavior is pretty complicated to implement for Datasource tables. We should address that in a future release.

Unit tests.

Author: Eric Liang <ekl@databricks.com>

Closes #15705 from ericl/sc-4942.

(cherry picked from commit abefe2ec428dc24a4112c623fb6fbe4b2ca60a2b)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../spark/sql/catalyst/dsl/package.scala      |  2 +-
 .../sql/catalyst/parser/AstBuilder.scala      |  9 +++-
 .../plans/logical/basicLogicalOperators.scala | 19 ++++++-
 .../sql/catalyst/parser/PlanParserSuite.scala | 15 ++++--
 .../apache/spark/sql/DataFrameWriter.scala    |  4 +-
 .../datasources/CatalogFileIndex.scala        |  5 +-
 .../datasources/DataSourceStrategy.scala      | 30 +++++++++--
 .../InsertIntoDataSourceCommand.scala         |  6 +--
 .../spark/sql/hive/HiveStrategies.scala       |  3 +-
 .../CreateHiveTableAsSelectCommand.scala      |  5 +-
 .../PartitionProviderCompatibilitySuite.scala | 52 +++++++++++++++++++
 11 files changed, 129 insertions(+), 21 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index 66e52ca68af19..e901683be6854 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -367,7 +367,7 @@ package object dsl {
       def insertInto(tableName: String, overwrite: Boolean = false): LogicalPlan =
         InsertIntoTable(
           analysis.UnresolvedRelation(TableIdentifier(tableName)),
-          Map.empty, logicalPlan, overwrite, false)
+          Map.empty, logicalPlan, OverwriteOptions(overwrite), false)
 
       def as(alias: String): LogicalPlan = logicalPlan match {
         case UnresolvedRelation(tbl, _) => UnresolvedRelation(tbl, Option(alias))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 38e9bb6c162ad..ac1577b3abb4d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -177,12 +177,19 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
       throw new ParseException(s"Dynamic partitions do not support IF NOT EXISTS. Specified " +
         "partitions with value: " + dynamicPartitionKeys.keys.mkString("[", ",", "]"), ctx)
     }
+    val overwrite = ctx.OVERWRITE != null
+    val overwritePartition =
+      if (overwrite && partitionKeys.nonEmpty && dynamicPartitionKeys.isEmpty) {
+        Some(partitionKeys.map(t => (t._1, t._2.get)))
+      } else {
+        None
+      }
 
     InsertIntoTable(
       UnresolvedRelation(tableIdent, None),
       partitionKeys,
       query,
-      ctx.OVERWRITE != null,
+      OverwriteOptions(overwrite, overwritePartition),
       ctx.EXISTS != null)
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index a48974c6322ad..7a15c2285d584 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -21,6 +21,7 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
+import org.apache.spark.sql.catalyst.catalog.CatalogTypes
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.plans._
@@ -345,18 +346,32 @@ case class BroadcastHint(child: LogicalPlan) extends UnaryNode {
   override lazy val statistics: Statistics = super.statistics.copy(isBroadcastable = true)
 }
 
+/**
+ * Options for writing new data into a table.
+ *
+ * @param enabled whether to overwrite existing data in the table.
+ * @param specificPartition only data in the specified partition will be overwritten.
+ */
+case class OverwriteOptions(
+    enabled: Boolean,
+    specificPartition: Option[CatalogTypes.TablePartitionSpec] = None) {
+  if (specificPartition.isDefined) {
+    assert(enabled, "Overwrite must be enabled when specifying a partition to overwrite.")
+  }
+}
+
 case class InsertIntoTable(
     table: LogicalPlan,
     partition: Map[String, Option[String]],
     child: LogicalPlan,
-    overwrite: Boolean,
+    overwrite: OverwriteOptions,
     ifNotExists: Boolean)
   extends LogicalPlan {
 
   override def children: Seq[LogicalPlan] = child :: Nil
   override def output: Seq[Attribute] = Seq.empty
 
-  assert(overwrite || !ifNotExists)
+  assert(overwrite.enabled || !ifNotExists)
   assert(partition.values.forall(_.nonEmpty) || !ifNotExists)
 
   override lazy val resolved: Boolean = childrenResolved && table.resolved
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index ca86304d4d400..7400f3430e99c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -180,7 +180,16 @@ class PlanParserSuite extends PlanTest {
         partition: Map[String, Option[String]],
         overwrite: Boolean = false,
         ifNotExists: Boolean = false): LogicalPlan =
-      InsertIntoTable(table("s"), partition, plan, overwrite, ifNotExists)
+      InsertIntoTable(
+        table("s"), partition, plan,
+        OverwriteOptions(
+          overwrite,
+          if (overwrite && partition.nonEmpty) {
+            Some(partition.map(kv => (kv._1, kv._2.get)))
+          } else {
+            None
+          }),
+        ifNotExists)
 
     // Single inserts
     assertEqual(s"insert overwrite table s $sql",
@@ -196,9 +205,9 @@ class PlanParserSuite extends PlanTest {
     val plan2 = table("t").where('x > 5).select(star())
     assertEqual("from t insert into s select * limit 1 insert into u select * where x > 5",
       InsertIntoTable(
-        table("s"), Map.empty, plan.limit(1), overwrite = false, ifNotExists = false).union(
+        table("s"), Map.empty, plan.limit(1), OverwriteOptions(false), ifNotExists = false).union(
         InsertIntoTable(
-          table("u"), Map.empty, plan2, overwrite = false, ifNotExists = false)))
+          table("u"), Map.empty, plan2, OverwriteOptions(false), ifNotExists = false)))
   }
 
   test ("insert with if not exists") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 11dd1df909938..700f4835ac89a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -25,7 +25,7 @@ import org.apache.spark.annotation.InterfaceStability
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType}
-import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, Union}
+import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, OverwriteOptions, Union}
 import org.apache.spark.sql.execution.command.AlterTableRecoverPartitionsCommand
 import org.apache.spark.sql.execution.datasources.{CaseInsensitiveMap, CreateTable, DataSource, HadoopFsRelation}
 import org.apache.spark.sql.types.StructType
@@ -259,7 +259,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
         table = UnresolvedRelation(tableIdent),
         partition = Map.empty[String, Option[String]],
         child = df.logicalPlan,
-        overwrite = mode == SaveMode.Overwrite,
+        overwrite = OverwriteOptions(mode == SaveMode.Overwrite),
         ifNotExists = false)).toRdd
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/CatalogFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/CatalogFileIndex.scala
index 092aabc89a36c..443a2ec033a98 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/CatalogFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/CatalogFileIndex.scala
@@ -67,7 +67,10 @@ class CatalogFileIndex(
       val selectedPartitions = sparkSession.sessionState.catalog.listPartitionsByFilter(
         table.identifier, filters)
       val partitions = selectedPartitions.map { p =>
-        PartitionPath(p.toRow(partitionSchema), p.storage.locationUri.get)
+        val path = new Path(p.storage.locationUri.get)
+        val fs = path.getFileSystem(hadoopConf)
+        PartitionPath(
+          p.toRow(partitionSchema), path.makeQualified(fs.getUri, fs.getWorkingDirectory))
       }
       val partitionSpec = PartitionSpec(partitionSchema, partitions)
       new PrunedInMemoryFileIndex(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index 34b77cab65def..47c1f9d3fac1e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.datasources
 
 import scala.collection.mutable.ArrayBuffer
 
+import org.apache.hadoop.fs.Path
+
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
@@ -174,14 +176,32 @@ case class DataSourceAnalysis(conf: CatalystConf) extends Rule[LogicalPlan] {
         case LogicalRelation(r: HadoopFsRelation, _, _) => r.location.rootPaths
       }.flatten
 
-      val mode = if (overwrite) SaveMode.Overwrite else SaveMode.Append
-      if (overwrite && inputPaths.contains(outputPath)) {
+      val mode = if (overwrite.enabled) SaveMode.Overwrite else SaveMode.Append
+      if (overwrite.enabled && inputPaths.contains(outputPath)) {
         throw new AnalysisException(
           "Cannot overwrite a path that is also being read from.")
       }
 
+      val overwritingSinglePartition = (overwrite.specificPartition.isDefined &&
+        t.sparkSession.sessionState.conf.manageFilesourcePartitions &&
+        l.catalogTable.get.partitionProviderIsHive)
+
+      val effectiveOutputPath = if (overwritingSinglePartition) {
+        val partition = t.sparkSession.sessionState.catalog.getPartition(
+          l.catalogTable.get.identifier, overwrite.specificPartition.get)
+        new Path(partition.storage.locationUri.get)
+      } else {
+        outputPath
+      }
+
+      val effectivePartitionSchema = if (overwritingSinglePartition) {
+        Nil
+      } else {
+        query.resolve(t.partitionSchema, t.sparkSession.sessionState.analyzer.resolver)
+      }
+
       def refreshPartitionsCallback(updatedPartitions: Seq[TablePartitionSpec]): Unit = {
-        if (l.catalogTable.isDefined &&
+        if (l.catalogTable.isDefined && updatedPartitions.nonEmpty &&
             l.catalogTable.get.partitionColumnNames.nonEmpty &&
             l.catalogTable.get.partitionProviderIsHive) {
           val metastoreUpdater = AlterTableAddPartitionCommand(
@@ -194,8 +214,8 @@ case class DataSourceAnalysis(conf: CatalystConf) extends Rule[LogicalPlan] {
       }
 
       val insertCmd = InsertIntoHadoopFsRelationCommand(
-        outputPath,
-        query.resolve(t.partitionSchema, t.sparkSession.sessionState.analyzer.resolver),
+        effectiveOutputPath,
+        effectivePartitionSchema,
         t.bucketSpec,
         t.fileFormat,
         refreshPartitionsCallback,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoDataSourceCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoDataSourceCommand.scala
index b2ff68a833fea..2eba1e9986acd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoDataSourceCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoDataSourceCommand.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources
 
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.plans.QueryPlan
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OverwriteOptions}
 import org.apache.spark.sql.execution.command.RunnableCommand
 import org.apache.spark.sql.sources.InsertableRelation
 
@@ -30,7 +30,7 @@ import org.apache.spark.sql.sources.InsertableRelation
 case class InsertIntoDataSourceCommand(
     logicalRelation: LogicalRelation,
     query: LogicalPlan,
-    overwrite: Boolean)
+    overwrite: OverwriteOptions)
   extends RunnableCommand {
 
   override protected def innerChildren: Seq[QueryPlan[_]] = Seq(query)
@@ -40,7 +40,7 @@ case class InsertIntoDataSourceCommand(
     val data = Dataset.ofRows(sparkSession, query)
     // Apply the schema of the existing table to the new data.
     val df = sparkSession.internalCreateDataFrame(data.queryExecution.toRdd, logicalRelation.schema)
-    relation.insert(df, overwrite)
+    relation.insert(df, overwrite.enabled)
 
     // Invalidate the cache.
     sparkSession.sharedState.cacheManager.invalidateCache(logicalRelation)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index 9d2930948d6ba..ce1e3eb1a5bc9 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -46,7 +46,8 @@ private[hive] trait HiveStrategies {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
       case logical.InsertIntoTable(
           table: MetastoreRelation, partition, child, overwrite, ifNotExists) =>
-        InsertIntoHiveTable(table, partition, planLater(child), overwrite, ifNotExists) :: Nil
+        InsertIntoHiveTable(
+          table, partition, planLater(child), overwrite.enabled, ifNotExists) :: Nil
 
       case CreateTable(tableDesc, mode, Some(query)) if tableDesc.provider.get == "hive" =>
         val newTableDesc = if (tableDesc.storage.serde.isEmpty) {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
index ef5a5a001fb6f..cac43597aef21 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
@@ -21,7 +21,7 @@ import scala.util.control.NonFatal
 
 import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
-import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan, OverwriteOptions}
 import org.apache.spark.sql.execution.command.RunnableCommand
 import org.apache.spark.sql.hive.MetastoreRelation
 
@@ -88,7 +88,8 @@ case class CreateHiveTableAsSelectCommand(
     } else {
       try {
         sparkSession.sessionState.executePlan(InsertIntoTable(
-          metastoreRelation, Map(), query, overwrite = true, ifNotExists = false)).toRdd
+          metastoreRelation, Map(), query, overwrite = OverwriteOptions(true),
+          ifNotExists = false)).toRdd
       } catch {
         case NonFatal(e) =>
           // drop the created table.
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
index 5f16960fb1496..ac435bf6195b0 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
@@ -134,4 +134,56 @@ class PartitionProviderCompatibilitySuite
       }
     }
   }
+
+  test("insert overwrite partition of legacy datasource table overwrites entire table") {
+    withSQLConf(SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key -> "false") {
+      withTable("test") {
+        withTempDir { dir =>
+          setupPartitionedDatasourceTable("test", dir)
+          spark.sql(
+            """insert overwrite table test
+              |partition (partCol=1)
+              |select * from range(100)""".stripMargin)
+          assert(spark.sql("select * from test").count() == 100)
+
+          // Dynamic partitions case
+          spark.sql("insert overwrite table test select id, id from range(10)".stripMargin)
+          assert(spark.sql("select * from test").count() == 10)
+        }
+      }
+    }
+  }
+
+  test("insert overwrite partition of new datasource table overwrites just partition") {
+    withSQLConf(SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key -> "true") {
+      withTable("test") {
+        withTempDir { dir =>
+          setupPartitionedDatasourceTable("test", dir)
+          sql("msck repair table test")
+          spark.sql(
+            """insert overwrite table test
+              |partition (partCol=1)
+              |select * from range(100)""".stripMargin)
+          assert(spark.sql("select * from test").count() == 104)
+
+          // Test overwriting a partition that has a custom location
+          withTempDir { dir2 =>
+            sql(
+              s"""alter table test partition (partCol=1)
+                |set location '${dir2.getAbsolutePath}'""".stripMargin)
+            assert(sql("select * from test").count() == 4)
+            sql(
+              """insert overwrite table test
+                |partition (partCol=1)
+                |select * from range(30)""".stripMargin)
+            sql(
+              """insert overwrite table test
+                |partition (partCol=1)
+                |select * from range(20)""".stripMargin)
+            assert(sql("select * from test").count() == 24)
+          }
+        }
+      }
+    }
+  }
 }

From 85dd073743946383438aabb9f1281e6075f25cc5 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Tue, 1 Nov 2016 23:37:03 -0700
Subject: [PATCH 0003/1204] [SPARK-18192] Support all file formats in
 structured streaming

## What changes were proposed in this pull request?
This patch adds support for all file formats in structured streaming sinks. This is actually a very small change thanks to all the previous refactoring done using the new internal commit protocol API.

## How was this patch tested?
Updated FileStreamSinkSuite to add test cases for json, text, and parquet.

Author: Reynold Xin <rxin@databricks.com>

Closes #15711 from rxin/SPARK-18192.

(cherry picked from commit a36653c5b7b2719f8bfddf4ddfc6e1b828ac9af1)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../execution/datasources/DataSource.scala    |  8 +--
 .../sql/streaming/FileStreamSinkSuite.scala   | 62 +++++++++----------
 2 files changed, 32 insertions(+), 38 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index d980e6a15aabe..3f956c427655e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -29,7 +29,6 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable}
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
@@ -37,7 +36,6 @@ import org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider
 import org.apache.spark.sql.execution.datasources.json.JsonFileFormat
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 import org.apache.spark.sql.execution.streaming._
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.{CalendarIntervalType, StructType}
@@ -292,7 +290,7 @@ case class DataSource(
       case s: StreamSinkProvider =>
         s.createSink(sparkSession.sqlContext, options, partitionColumns, outputMode)
 
-      case parquet: parquet.ParquetFileFormat =>
+      case fileFormat: FileFormat =>
         val caseInsensitiveOptions = new CaseInsensitiveMap(options)
         val path = caseInsensitiveOptions.getOrElse("path", {
           throw new IllegalArgumentException("'path' is not specified")
@@ -301,7 +299,7 @@ case class DataSource(
           throw new IllegalArgumentException(
             s"Data source $className does not support $outputMode output mode")
         }
-        new FileStreamSink(sparkSession, path, parquet, partitionColumns, options)
+        new FileStreamSink(sparkSession, path, fileFormat, partitionColumns, options)
 
       case _ =>
         throw new UnsupportedOperationException(
@@ -516,7 +514,7 @@ case class DataSource(
           val plan = data.logicalPlan
           plan.resolve(name :: Nil, data.sparkSession.sessionState.analyzer.resolver).getOrElse {
             throw new AnalysisException(
-              s"Unable to resolve ${name} given [${plan.output.map(_.name).mkString(", ")}]")
+              s"Unable to resolve $name given [${plan.output.map(_.name).mkString(", ")}]")
           }.asInstanceOf[Attribute]
         }
         // For partitioned relation r, r.schema's column ordering can be different from the column
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
index 902cf05344716..0f140f94f630e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.streaming
 
-import org.apache.spark.sql._
+import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.execution.DataSourceScanExec
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.streaming.{MemoryStream, MetadataLogFileIndex}
@@ -142,42 +142,38 @@ class FileStreamSinkSuite extends StreamTest {
     }
   }
 
-  test("FileStreamSink - supported formats") {
-    def testFormat(format: Option[String]): Unit = {
-      val inputData = MemoryStream[Int]
-      val ds = inputData.toDS()
+  test("FileStreamSink - parquet") {
+    testFormat(None) // should not throw error as default format parquet when not specified
+    testFormat(Some("parquet"))
+  }
 
-      val outputDir = Utils.createTempDir(namePrefix = "stream.output").getCanonicalPath
-      val checkpointDir = Utils.createTempDir(namePrefix = "stream.checkpoint").getCanonicalPath
+  test("FileStreamSink - text") {
+    testFormat(Some("text"))
+  }
 
-      var query: StreamingQuery = null
+  test("FileStreamSink - json") {
+    testFormat(Some("text"))
+  }
 
-      try {
-        val writer =
-          ds.map(i => (i, i * 1000))
-            .toDF("id", "value")
-            .writeStream
-        if (format.nonEmpty) {
-          writer.format(format.get)
-        }
-        query = writer
-            .option("checkpointLocation", checkpointDir)
-            .start(outputDir)
-      } finally {
-        if (query != null) {
-          query.stop()
-        }
-      }
-    }
+  def testFormat(format: Option[String]): Unit = {
+    val inputData = MemoryStream[Int]
+    val ds = inputData.toDS()
 
-    testFormat(None) // should not throw error as default format parquet when not specified
-    testFormat(Some("parquet"))
-    val e = intercept[UnsupportedOperationException] {
-      testFormat(Some("text"))
-    }
-    Seq("text", "not support", "stream").foreach { s =>
-      assert(e.getMessage.contains(s))
+    val outputDir = Utils.createTempDir(namePrefix = "stream.output").getCanonicalPath
+    val checkpointDir = Utils.createTempDir(namePrefix = "stream.checkpoint").getCanonicalPath
+
+    var query: StreamingQuery = null
+
+    try {
+      val writer = ds.map(i => (i, i * 1000)).toDF("id", "value").writeStream
+      if (format.nonEmpty) {
+        writer.format(format.get)
+      }
+      query = writer.option("checkpointLocation", checkpointDir).start(outputDir)
+    } finally {
+      if (query != null) {
+        query.stop()
+      }
     }
   }
-
 }

From 4c4bf87acf2516a72b59f4e760413f80640dca1e Mon Sep 17 00:00:00 2001
From: CodingCat <zhunansjtu@gmail.com>
Date: Tue, 1 Nov 2016 23:39:53 -0700
Subject: [PATCH 0004/1204] [SPARK-18144][SQL] logging
 StreamingQueryListener$QueryStartedEvent

## What changes were proposed in this pull request?

The PR fixes the bug that the QueryStartedEvent is not logged

the postToAll() in the original code is actually calling StreamingQueryListenerBus.postToAll() which has no listener at all....we shall post by sparkListenerBus.postToAll(s) and this.postToAll() to trigger local listeners as well as the listeners registered in LiveListenerBus

zsxwing
## How was this patch tested?

The following snapshot shows that QueryStartedEvent has been logged correctly

![image](https://cloud.githubusercontent.com/assets/678008/19821553/007a7d28-9d2d-11e6-9f13-49851559cdaa.png)

Author: CodingCat <zhunansjtu@gmail.com>

Closes #15675 from CodingCat/SPARK-18144.

(cherry picked from commit 85c5424d466f4a5765c825e0e2ab30da97611285)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../streaming/StreamingQueryListenerBus.scala          | 10 +++++++++-
 .../spark/sql/streaming/StreamingQuerySuite.scala      |  7 ++++++-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala
index fc2190d39da4f..22e4c6380fcd5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala
@@ -41,6 +41,8 @@ class StreamingQueryListenerBus(sparkListenerBus: LiveListenerBus)
   def post(event: StreamingQueryListener.Event) {
     event match {
       case s: QueryStartedEvent =>
+        sparkListenerBus.post(s)
+        // post to local listeners to trigger callbacks
         postToAll(s)
       case _ =>
         sparkListenerBus.post(event)
@@ -50,7 +52,13 @@ class StreamingQueryListenerBus(sparkListenerBus: LiveListenerBus)
   override def onOtherEvent(event: SparkListenerEvent): Unit = {
     event match {
       case e: StreamingQueryListener.Event =>
-        postToAll(e)
+        // SPARK-18144: we broadcast QueryStartedEvent to all listeners attached to this bus
+        // synchronously and the ones attached to LiveListenerBus asynchronously. Therefore,
+        // we need to ignore QueryStartedEvent if this method is called within SparkListenerBus
+        // thread
+        if (!LiveListenerBus.withinListenerThread.value || !e.isInstanceOf[QueryStartedEvent]) {
+          postToAll(e)
+        }
       case _ =>
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index 464c443beb6e7..31b7fe0b04da9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -290,7 +290,10 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
     // A StreamingQueryListener that gets the query status after the first completed trigger
     val listener = new StreamingQueryListener {
       @volatile var firstStatus: StreamingQueryStatus = null
-      override def onQueryStarted(queryStarted: QueryStartedEvent): Unit = { }
+      @volatile var queryStartedEvent = 0
+      override def onQueryStarted(queryStarted: QueryStartedEvent): Unit = {
+        queryStartedEvent += 1
+      }
       override def onQueryProgress(queryProgress: QueryProgressEvent): Unit = {
        if (firstStatus == null) firstStatus = queryProgress.queryStatus
       }
@@ -303,6 +306,8 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
       q.processAllAvailable()
       eventually(timeout(streamingTimeout)) {
         assert(listener.firstStatus != null)
+        // test if QueryStartedEvent callback is called for only once
+        assert(listener.queryStartedEvent === 1)
       }
       listener.firstStatus
     } finally {

From 3b624bedf0f0ecd5dcfcc262a3ca8b4e33662533 Mon Sep 17 00:00:00 2001
From: Ryan Blue <blue@apache.org>
Date: Wed, 2 Nov 2016 00:08:30 -0700
Subject: [PATCH 0005/1204] [SPARK-17532] Add lock debugging info to thread
 dumps.

## What changes were proposed in this pull request?

This adds information to the web UI thread dump page about the JVM locks
held by threads and the locks that threads are blocked waiting to
acquire. This should help find cases where lock contention is causing
Spark applications to run slowly.
## How was this patch tested?

Tested by applying this patch and viewing the change in the web UI.

![thread-lock-info](https://cloud.githubusercontent.com/assets/87915/18493057/6e5da870-79c3-11e6-8c20-f54c18a37544.png)

Additions:
- A "Thread Locking" column with the locks held by the thread or that are blocking the thread
- Links from the a blocked thread to the thread holding the lock
- Stack frames show where threads are inside `synchronized` blocks, "holding Monitor(...)"

Author: Ryan Blue <blue@apache.org>

Closes #15088 from rdblue/SPARK-17532-add-thread-lock-info.

(cherry picked from commit 2dc048081668665f85623839d5f663b402e42555)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../org/apache/spark/ui/static/table.js       |  3 +-
 .../ui/exec/ExecutorThreadDumpPage.scala      | 12 +++++++
 .../apache/spark/util/ThreadStackTrace.scala  |  6 +++-
 .../scala/org/apache/spark/util/Utils.scala   | 34 ++++++++++++++++---
 4 files changed, 49 insertions(+), 6 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/table.js b/core/src/main/resources/org/apache/spark/ui/static/table.js
index 14b06bfe860ed..0315ebf5c48a9 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/table.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/table.js
@@ -36,7 +36,7 @@ function toggleThreadStackTrace(threadId, forceAdd) {
     if (stackTrace.length == 0) {
         var stackTraceText = $('#' + threadId + "_td_stacktrace").html()
         var threadCell = $("#thread_" + threadId + "_tr")
-        threadCell.after("<tr id=\"" + threadId +"_stacktrace\" class=\"accordion-body\"><td colspan=\"3\"><pre>" +
+        threadCell.after("<tr id=\"" + threadId +"_stacktrace\" class=\"accordion-body\"><td colspan=\"4\"><pre>" +
             stackTraceText +  "</pre></td></tr>")
     } else {
         if (!forceAdd) {
@@ -73,6 +73,7 @@ function onMouseOverAndOut(threadId) {
     $("#" + threadId + "_td_id").toggleClass("threaddump-td-mouseover");
     $("#" + threadId + "_td_name").toggleClass("threaddump-td-mouseover");
     $("#" + threadId + "_td_state").toggleClass("threaddump-td-mouseover");
+    $("#" + threadId + "_td_locking").toggleClass("threaddump-td-mouseover");
 }
 
 function onSearchStringChange() {
diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
index a0ef80d9bdae0..c6a07445f2a35 100644
--- a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
@@ -48,6 +48,16 @@ private[ui] class ExecutorThreadDumpPage(parent: ExecutorsTab) extends WebUIPage
           }
       }.map { thread =>
         val threadId = thread.threadId
+        val blockedBy = thread.blockedByThreadId match {
+          case Some(blockedByThreadId) =>
+            <div>
+              Blocked by <a href={s"#${thread.blockedByThreadId}_td_id"}>
+              Thread {thread.blockedByThreadId} {thread.blockedByLock}</a>
+            </div>
+          case None => Text("")
+        }
+        val heldLocks = thread.holdingLocks.mkString(", ")
+
         <tr id={s"thread_${threadId}_tr"} class="accordion-heading"
             onclick={s"toggleThreadStackTrace($threadId, false)"}
             onmouseover={s"onMouseOverAndOut($threadId)"}
@@ -55,6 +65,7 @@ private[ui] class ExecutorThreadDumpPage(parent: ExecutorsTab) extends WebUIPage
           <td id={s"${threadId}_td_id"}>{threadId}</td>
           <td id={s"${threadId}_td_name"}>{thread.threadName}</td>
           <td id={s"${threadId}_td_state"}>{thread.threadState}</td>
+          <td id={s"${threadId}_td_locking"}>{blockedBy}{heldLocks}</td>
           <td id={s"${threadId}_td_stacktrace"} class="hidden">{thread.stackTrace}</td>
         </tr>
       }
@@ -86,6 +97,7 @@ private[ui] class ExecutorThreadDumpPage(parent: ExecutorsTab) extends WebUIPage
           <th onClick="collapseAllThreadStackTrace(false)">Thread ID</th>
           <th onClick="collapseAllThreadStackTrace(false)">Thread Name</th>
           <th onClick="collapseAllThreadStackTrace(false)">Thread State</th>
+          <th onClick="collapseAllThreadStackTrace(false)">Thread Locks</th>
         </thead>
         <tbody>{dumpRows}</tbody>
       </table>
diff --git a/core/src/main/scala/org/apache/spark/util/ThreadStackTrace.scala b/core/src/main/scala/org/apache/spark/util/ThreadStackTrace.scala
index d4e0ad93b966a..b1217980faf1f 100644
--- a/core/src/main/scala/org/apache/spark/util/ThreadStackTrace.scala
+++ b/core/src/main/scala/org/apache/spark/util/ThreadStackTrace.scala
@@ -24,4 +24,8 @@ private[spark] case class ThreadStackTrace(
   threadId: Long,
   threadName: String,
   threadState: Thread.State,
-  stackTrace: String)
+  stackTrace: String,
+  blockedByThreadId: Option[Long],
+  blockedByLock: String,
+  holdingLocks: Seq[String])
+
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 6027b07c0fee8..22c28fba2087e 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.util
 
 import java.io._
-import java.lang.management.ManagementFactory
+import java.lang.management.{LockInfo, ManagementFactory, MonitorInfo}
 import java.net._
 import java.nio.ByteBuffer
 import java.nio.channels.Channels
@@ -2096,15 +2096,41 @@ private[spark] object Utils extends Logging {
     }
   }
 
+  private implicit class Lock(lock: LockInfo) {
+    def lockString: String = {
+      lock match {
+        case monitor: MonitorInfo =>
+          s"Monitor(${lock.getClassName}@${lock.getIdentityHashCode}})"
+        case _ =>
+          s"Lock(${lock.getClassName}@${lock.getIdentityHashCode}})"
+      }
+    }
+  }
+
   /** Return a thread dump of all threads' stacktraces.  Used to capture dumps for the web UI */
   def getThreadDump(): Array[ThreadStackTrace] = {
     // We need to filter out null values here because dumpAllThreads() may return null array
     // elements for threads that are dead / don't exist.
     val threadInfos = ManagementFactory.getThreadMXBean.dumpAllThreads(true, true).filter(_ != null)
     threadInfos.sortBy(_.getThreadId).map { case threadInfo =>
-      val stackTrace = threadInfo.getStackTrace.map(_.toString).mkString("\n")
-      ThreadStackTrace(threadInfo.getThreadId, threadInfo.getThreadName,
-        threadInfo.getThreadState, stackTrace)
+      val monitors = threadInfo.getLockedMonitors.map(m => m.getLockedStackFrame -> m).toMap
+      val stackTrace = threadInfo.getStackTrace.map { frame =>
+        monitors.get(frame) match {
+          case Some(monitor) =>
+            monitor.getLockedStackFrame.toString + s" => holding ${monitor.lockString}"
+          case None =>
+            frame.toString
+        }
+      }.mkString("\n")
+
+      // use a set to dedup re-entrant locks that are held at multiple places
+      val heldLocks = (threadInfo.getLockedSynchronizers.map(_.lockString)
+          ++ threadInfo.getLockedMonitors.map(_.lockString)
+        ).toSet
+
+      ThreadStackTrace(threadInfo.getThreadId, threadInfo.getThreadName, threadInfo.getThreadState,
+        stackTrace, if (threadInfo.getLockOwnerId < 0) None else Some(threadInfo.getLockOwnerId),
+        Option(threadInfo.getLockInfo).map(_.lockString).getOrElse(""), heldLocks.toSeq)
     }
   }
 

From ab8da1413836591fecbc75a2515875bf3e50527f Mon Sep 17 00:00:00 2001
From: Liwei Lin <lwlin7@gmail.com>
Date: Wed, 2 Nov 2016 09:10:34 +0000
Subject: [PATCH 0006/1204] [SPARK-18198][DOC][STREAMING] Highlight code
 snippets

## What changes were proposed in this pull request?

This patch uses `{% highlight lang %}...{% endhighlight %}` to highlight code snippets in the `Structured Streaming Kafka010 integration doc` and the `Spark Streaming Kafka010 integration doc`.

This patch consists of two commits:
- the first commit fixes only the leading spaces -- this is large
- the second commit adds the highlight instructions -- this is much simpler and easier to review

## How was this patch tested?

SKIP_API=1 jekyll build

## Screenshots

**Before**

![snip20161101_3](https://cloud.githubusercontent.com/assets/15843379/19894258/47746524-a087-11e6-9a2a-7bff2d428d44.png)

**After**

![snip20161101_1](https://cloud.githubusercontent.com/assets/15843379/19894324/8bebcd1e-a087-11e6-835b-88c4d2979cfa.png)

Author: Liwei Lin <lwlin7@gmail.com>

Closes #15715 from lw-lin/doc-highlight-code-snippet.

(cherry picked from commit 98ede49496d0d7b4724085083d4f24436b92a7bf)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/streaming-kafka-0-10-integration.md      | 391 +++++++++---------
 .../structured-streaming-kafka-integration.md | 156 +++----
 2 files changed, 287 insertions(+), 260 deletions(-)

diff --git a/docs/streaming-kafka-0-10-integration.md b/docs/streaming-kafka-0-10-integration.md
index c1ef396907db7..b645d3c3a4b53 100644
--- a/docs/streaming-kafka-0-10-integration.md
+++ b/docs/streaming-kafka-0-10-integration.md
@@ -17,69 +17,72 @@ For Scala/Java applications using SBT/Maven project definitions, link your strea
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
-	import org.apache.kafka.clients.consumer.ConsumerRecord
-	import org.apache.kafka.common.serialization.StringDeserializer
-	import org.apache.spark.streaming.kafka010._
-	import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
-	import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
-
-	val kafkaParams = Map[String, Object](
-	  "bootstrap.servers" -> "localhost:9092,anotherhost:9092",
-	  "key.deserializer" -> classOf[StringDeserializer],
-	  "value.deserializer" -> classOf[StringDeserializer],
-	  "group.id" -> "use_a_separate_group_id_for_each_stream",
-	  "auto.offset.reset" -> "latest",
-	  "enable.auto.commit" -> (false: java.lang.Boolean)
-	)
-
-	val topics = Array("topicA", "topicB")
-	val stream = KafkaUtils.createDirectStream[String, String](
-	  streamingContext,
-	  PreferConsistent,
-	  Subscribe[String, String](topics, kafkaParams)
-	)
-
-	stream.map(record => (record.key, record.value))
-
+{% highlight scala %}
+import org.apache.kafka.clients.consumer.ConsumerRecord
+import org.apache.kafka.common.serialization.StringDeserializer
+import org.apache.spark.streaming.kafka010._
+import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
+import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
+
+val kafkaParams = Map[String, Object](
+  "bootstrap.servers" -> "localhost:9092,anotherhost:9092",
+  "key.deserializer" -> classOf[StringDeserializer],
+  "value.deserializer" -> classOf[StringDeserializer],
+  "group.id" -> "use_a_separate_group_id_for_each_stream",
+  "auto.offset.reset" -> "latest",
+  "enable.auto.commit" -> (false: java.lang.Boolean)
+)
+
+val topics = Array("topicA", "topicB")
+val stream = KafkaUtils.createDirectStream[String, String](
+  streamingContext,
+  PreferConsistent,
+  Subscribe[String, String](topics, kafkaParams)
+)
+
+stream.map(record => (record.key, record.value))
+{% endhighlight %}
 Each item in the stream is a [ConsumerRecord](http://kafka.apache.org/0100/javadoc/org/apache/kafka/clients/consumer/ConsumerRecord.html)
 </div>
 <div data-lang="java" markdown="1">
-	import java.util.*;
-	import org.apache.spark.SparkConf;
-	import org.apache.spark.TaskContext;
-	import org.apache.spark.api.java.*;
-	import org.apache.spark.api.java.function.*;
-	import org.apache.spark.streaming.api.java.*;
-	import org.apache.spark.streaming.kafka010.*;
-	import org.apache.kafka.clients.consumer.ConsumerRecord;
-	import org.apache.kafka.common.TopicPartition;
-	import org.apache.kafka.common.serialization.StringDeserializer;
-	import scala.Tuple2;
-	
-	Map<String, Object> kafkaParams = new HashMap<>();
-	kafkaParams.put("bootstrap.servers", "localhost:9092,anotherhost:9092");
-	kafkaParams.put("key.deserializer", StringDeserializer.class);
-	kafkaParams.put("value.deserializer", StringDeserializer.class);
-	kafkaParams.put("group.id", "use_a_separate_group_id_for_each_stream");
-	kafkaParams.put("auto.offset.reset", "latest");
-	kafkaParams.put("enable.auto.commit", false);
-	
-	Collection<String> topics = Arrays.asList("topicA", "topicB");
-	
-	final JavaInputDStream<ConsumerRecord<String, String>> stream =
-	  KafkaUtils.createDirectStream(
-	    streamingContext,
-	    LocationStrategies.PreferConsistent(),
-	    ConsumerStrategies.<String, String>Subscribe(topics, kafkaParams)
-	  );
-	
-	stream.mapToPair(
-	  new PairFunction<ConsumerRecord<String, String>, String, String>() {
-	    @Override
-	    public Tuple2<String, String> call(ConsumerRecord<String, String> record) {
-	      return new Tuple2<>(record.key(), record.value());
-	    }
-	  })
+{% highlight java %}
+import java.util.*;
+import org.apache.spark.SparkConf;
+import org.apache.spark.TaskContext;
+import org.apache.spark.api.java.*;
+import org.apache.spark.api.java.function.*;
+import org.apache.spark.streaming.api.java.*;
+import org.apache.spark.streaming.kafka010.*;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.serialization.StringDeserializer;
+import scala.Tuple2;
+
+Map<String, Object> kafkaParams = new HashMap<>();
+kafkaParams.put("bootstrap.servers", "localhost:9092,anotherhost:9092");
+kafkaParams.put("key.deserializer", StringDeserializer.class);
+kafkaParams.put("value.deserializer", StringDeserializer.class);
+kafkaParams.put("group.id", "use_a_separate_group_id_for_each_stream");
+kafkaParams.put("auto.offset.reset", "latest");
+kafkaParams.put("enable.auto.commit", false);
+
+Collection<String> topics = Arrays.asList("topicA", "topicB");
+
+final JavaInputDStream<ConsumerRecord<String, String>> stream =
+  KafkaUtils.createDirectStream(
+    streamingContext,
+    LocationStrategies.PreferConsistent(),
+    ConsumerStrategies.<String, String>Subscribe(topics, kafkaParams)
+  );
+
+stream.mapToPair(
+  new PairFunction<ConsumerRecord<String, String>, String, String>() {
+    @Override
+    public Tuple2<String, String> call(ConsumerRecord<String, String> record) {
+      return new Tuple2<>(record.key(), record.value());
+    }
+  })
+{% endhighlight %}
 </div>
 </div>
 
@@ -109,32 +112,35 @@ If you have a use case that is better suited to batch processing, you can create
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
-	// Import dependencies and create kafka params as in Create Direct Stream above
-
-	val offsetRanges = Array(
-	  // topic, partition, inclusive starting offset, exclusive ending offset
-	  OffsetRange("test", 0, 0, 100),
-	  OffsetRange("test", 1, 0, 100)
-	)
+{% highlight scala %}
+// Import dependencies and create kafka params as in Create Direct Stream above
 
-	val rdd = KafkaUtils.createRDD[String, String](sparkContext, kafkaParams, offsetRanges, PreferConsistent)
+val offsetRanges = Array(
+  // topic, partition, inclusive starting offset, exclusive ending offset
+  OffsetRange("test", 0, 0, 100),
+  OffsetRange("test", 1, 0, 100)
+)
 
+val rdd = KafkaUtils.createRDD[String, String](sparkContext, kafkaParams, offsetRanges, PreferConsistent)
+{% endhighlight %}
 </div>
 <div data-lang="java" markdown="1">
-	// Import dependencies and create kafka params as in Create Direct Stream above
-
-	OffsetRange[] offsetRanges = {
-	  // topic, partition, inclusive starting offset, exclusive ending offset
-	  OffsetRange.create("test", 0, 0, 100),
-	  OffsetRange.create("test", 1, 0, 100)
-	};
-
-	JavaRDD<ConsumerRecord<String, String>> rdd = KafkaUtils.createRDD(
-	  sparkContext,
-	  kafkaParams,
-	  offsetRanges,
-	  LocationStrategies.PreferConsistent()
-	);
+{% highlight java %}
+// Import dependencies and create kafka params as in Create Direct Stream above
+
+OffsetRange[] offsetRanges = {
+  // topic, partition, inclusive starting offset, exclusive ending offset
+  OffsetRange.create("test", 0, 0, 100),
+  OffsetRange.create("test", 1, 0, 100)
+};
+
+JavaRDD<ConsumerRecord<String, String>> rdd = KafkaUtils.createRDD(
+  sparkContext,
+  kafkaParams,
+  offsetRanges,
+  LocationStrategies.PreferConsistent()
+);
+{% endhighlight %}
 </div>
 </div>
 
@@ -144,29 +150,33 @@ Note that you cannot use `PreferBrokers`, because without the stream there is no
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
-	stream.foreachRDD { rdd =>
-	  val offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
-	  rdd.foreachPartition { iter =>
-	    val o: OffsetRange = offsetRanges(TaskContext.get.partitionId)
-	    println(s"${o.topic} ${o.partition} ${o.fromOffset} ${o.untilOffset}")
-	  }
-	}
+{% highlight scala %}
+stream.foreachRDD { rdd =>
+  val offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
+  rdd.foreachPartition { iter =>
+    val o: OffsetRange = offsetRanges(TaskContext.get.partitionId)
+    println(s"${o.topic} ${o.partition} ${o.fromOffset} ${o.untilOffset}")
+  }
+}
+{% endhighlight %}
 </div>
 <div data-lang="java" markdown="1">
-	stream.foreachRDD(new VoidFunction<JavaRDD<ConsumerRecord<String, String>>>() {
-	  @Override
-	  public void call(JavaRDD<ConsumerRecord<String, String>> rdd) {
-	    final OffsetRange[] offsetRanges = ((HasOffsetRanges) rdd.rdd()).offsetRanges();
-	    rdd.foreachPartition(new VoidFunction<Iterator<ConsumerRecord<String, String>>>() {
-	      @Override
-	      public void call(Iterator<ConsumerRecord<String, String>> consumerRecords) {
-	        OffsetRange o = offsetRanges[TaskContext.get().partitionId()];
-	        System.out.println(
-	          o.topic() + " " + o.partition() + " " + o.fromOffset() + " " + o.untilOffset());
-	      }
-	    });
-	  }
-	});
+{% highlight java %}
+stream.foreachRDD(new VoidFunction<JavaRDD<ConsumerRecord<String, String>>>() {
+  @Override
+  public void call(JavaRDD<ConsumerRecord<String, String>> rdd) {
+    final OffsetRange[] offsetRanges = ((HasOffsetRanges) rdd.rdd()).offsetRanges();
+    rdd.foreachPartition(new VoidFunction<Iterator<ConsumerRecord<String, String>>>() {
+      @Override
+      public void call(Iterator<ConsumerRecord<String, String>> consumerRecords) {
+        OffsetRange o = offsetRanges[TaskContext.get().partitionId()];
+        System.out.println(
+          o.topic() + " " + o.partition() + " " + o.fromOffset() + " " + o.untilOffset());
+      }
+    });
+  }
+});
+{% endhighlight %}
 </div>
 </div>
 
@@ -183,25 +193,28 @@ Kafka has an offset commit API that stores offsets in a special Kafka topic.  By
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
-	stream.foreachRDD { rdd =>
-	  val offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
-
-	  // some time later, after outputs have completed
-	  stream.asInstanceOf[CanCommitOffsets].commitAsync(offsetRanges)
-	}
-
+{% highlight scala %}
+stream.foreachRDD { rdd =>
+  val offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
+
+  // some time later, after outputs have completed
+  stream.asInstanceOf[CanCommitOffsets].commitAsync(offsetRanges)
+}
+{% endhighlight %}
 As with HasOffsetRanges, the cast to CanCommitOffsets will only succeed if called on the result of createDirectStream, not after transformations.  The commitAsync call is threadsafe, but must occur after outputs if you want meaningful semantics.
 </div>
 <div data-lang="java" markdown="1">
-	stream.foreachRDD(new VoidFunction<JavaRDD<ConsumerRecord<String, String>>>() {
-	  @Override
-	  public void call(JavaRDD<ConsumerRecord<String, String>> rdd) {
-	    OffsetRange[] offsetRanges = ((HasOffsetRanges) rdd.rdd()).offsetRanges();
-
-	    // some time later, after outputs have completed
-	    ((CanCommitOffsets) stream.inputDStream()).commitAsync(offsetRanges);
-	  }
-	});
+{% highlight java %}
+stream.foreachRDD(new VoidFunction<JavaRDD<ConsumerRecord<String, String>>>() {
+  @Override
+  public void call(JavaRDD<ConsumerRecord<String, String>> rdd) {
+    OffsetRange[] offsetRanges = ((HasOffsetRanges) rdd.rdd()).offsetRanges();
+
+    // some time later, after outputs have completed
+    ((CanCommitOffsets) stream.inputDStream()).commitAsync(offsetRanges);
+  }
+});
+{% endhighlight %}
 </div>
 </div>
 
@@ -210,64 +223,68 @@ For data stores that support transactions, saving offsets in the same transactio
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
-	// The details depend on your data store, but the general idea looks like this
+{% highlight scala %}
+// The details depend on your data store, but the general idea looks like this
 
-	// begin from the the offsets committed to the database
-	val fromOffsets = selectOffsetsFromYourDatabase.map { resultSet =>
-	  new TopicPartition(resultSet.string("topic"), resultSet.int("partition")) -> resultSet.long("offset")
-	}.toMap
+// begin from the the offsets committed to the database
+val fromOffsets = selectOffsetsFromYourDatabase.map { resultSet =>
+  new TopicPartition(resultSet.string("topic"), resultSet.int("partition")) -> resultSet.long("offset")
+}.toMap
 
-	val stream = KafkaUtils.createDirectStream[String, String](
-	  streamingContext,
-	  PreferConsistent,
-	  Assign[String, String](fromOffsets.keys.toList, kafkaParams, fromOffsets)
-	)
+val stream = KafkaUtils.createDirectStream[String, String](
+  streamingContext,
+  PreferConsistent,
+  Assign[String, String](fromOffsets.keys.toList, kafkaParams, fromOffsets)
+)
 
-	stream.foreachRDD { rdd =>
-	  val offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
+stream.foreachRDD { rdd =>
+  val offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
 
-	  val results = yourCalculation(rdd)
+  val results = yourCalculation(rdd)
 
-	  // begin your transaction
+  // begin your transaction
 
-	  // update results
-	  // update offsets where the end of existing offsets matches the beginning of this batch of offsets
-	  // assert that offsets were updated correctly
+  // update results
+  // update offsets where the end of existing offsets matches the beginning of this batch of offsets
+  // assert that offsets were updated correctly
 
-	  // end your transaction
-	}
+  // end your transaction
+}
+{% endhighlight %}
 </div>
 <div data-lang="java" markdown="1">
-	// The details depend on your data store, but the general idea looks like this
-
-	// begin from the the offsets committed to the database
-	Map<TopicPartition, Long> fromOffsets = new HashMap<>();
-	for (resultSet : selectOffsetsFromYourDatabase)
-	  fromOffsets.put(new TopicPartition(resultSet.string("topic"), resultSet.int("partition")), resultSet.long("offset"));
-	}
-
-	JavaInputDStream<ConsumerRecord<String, String>> stream = KafkaUtils.createDirectStream(
-	  streamingContext,
-	  LocationStrategies.PreferConsistent(),
-	  ConsumerStrategies.<String, String>Assign(fromOffsets.keySet(), kafkaParams, fromOffsets)
-	);
-
-	stream.foreachRDD(new VoidFunction<JavaRDD<ConsumerRecord<String, String>>>() {
-	  @Override
-	  public void call(JavaRDD<ConsumerRecord<String, String>> rdd) {
-	    OffsetRange[] offsetRanges = ((HasOffsetRanges) rdd.rdd()).offsetRanges();
-	    
-	    Object results = yourCalculation(rdd);
-
-	    // begin your transaction
-
-	    // update results
-	    // update offsets where the end of existing offsets matches the beginning of this batch of offsets
-	    // assert that offsets were updated correctly
-
-	    // end your transaction
-	  }
-	});
+{% highlight java %}
+// The details depend on your data store, but the general idea looks like this
+
+// begin from the the offsets committed to the database
+Map<TopicPartition, Long> fromOffsets = new HashMap<>();
+for (resultSet : selectOffsetsFromYourDatabase)
+  fromOffsets.put(new TopicPartition(resultSet.string("topic"), resultSet.int("partition")), resultSet.long("offset"));
+}
+
+JavaInputDStream<ConsumerRecord<String, String>> stream = KafkaUtils.createDirectStream(
+  streamingContext,
+  LocationStrategies.PreferConsistent(),
+  ConsumerStrategies.<String, String>Assign(fromOffsets.keySet(), kafkaParams, fromOffsets)
+);
+
+stream.foreachRDD(new VoidFunction<JavaRDD<ConsumerRecord<String, String>>>() {
+  @Override
+  public void call(JavaRDD<ConsumerRecord<String, String>> rdd) {
+    OffsetRange[] offsetRanges = ((HasOffsetRanges) rdd.rdd()).offsetRanges();
+    
+    Object results = yourCalculation(rdd);
+
+    // begin your transaction
+
+    // update results
+    // update offsets where the end of existing offsets matches the beginning of this batch of offsets
+    // assert that offsets were updated correctly
+
+    // end your transaction
+  }
+});
+{% endhighlight %}
 </div>
 </div>
 
@@ -277,25 +294,29 @@ The new Kafka consumer [supports SSL](http://kafka.apache.org/documentation.html
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
-	val kafkaParams = Map[String, Object](
-	  // the usual params, make sure to change the port in bootstrap.servers if 9092 is not TLS
-	  "security.protocol" -> "SSL",
-	  "ssl.truststore.location" -> "/some-directory/kafka.client.truststore.jks",
-	  "ssl.truststore.password" -> "test1234",
-	  "ssl.keystore.location" -> "/some-directory/kafka.client.keystore.jks",
-	  "ssl.keystore.password" -> "test1234",
-	  "ssl.key.password" -> "test1234"
-	)
+{% highlight scala %}
+val kafkaParams = Map[String, Object](
+  // the usual params, make sure to change the port in bootstrap.servers if 9092 is not TLS
+  "security.protocol" -> "SSL",
+  "ssl.truststore.location" -> "/some-directory/kafka.client.truststore.jks",
+  "ssl.truststore.password" -> "test1234",
+  "ssl.keystore.location" -> "/some-directory/kafka.client.keystore.jks",
+  "ssl.keystore.password" -> "test1234",
+  "ssl.key.password" -> "test1234"
+)
+{% endhighlight %}
 </div>
 <div data-lang="java" markdown="1">
-	Map<String, Object> kafkaParams = new HashMap<String, Object>();
-	// the usual params, make sure to change the port in bootstrap.servers if 9092 is not TLS
-	kafkaParams.put("security.protocol", "SSL");
-	kafkaParams.put("ssl.truststore.location", "/some-directory/kafka.client.truststore.jks");
-	kafkaParams.put("ssl.truststore.password", "test1234");
-	kafkaParams.put("ssl.keystore.location", "/some-directory/kafka.client.keystore.jks");
-	kafkaParams.put("ssl.keystore.password", "test1234");
-	kafkaParams.put("ssl.key.password", "test1234");
+{% highlight java %}
+Map<String, Object> kafkaParams = new HashMap<String, Object>();
+// the usual params, make sure to change the port in bootstrap.servers if 9092 is not TLS
+kafkaParams.put("security.protocol", "SSL");
+kafkaParams.put("ssl.truststore.location", "/some-directory/kafka.client.truststore.jks");
+kafkaParams.put("ssl.truststore.password", "test1234");
+kafkaParams.put("ssl.keystore.location", "/some-directory/kafka.client.keystore.jks");
+kafkaParams.put("ssl.keystore.password", "test1234");
+kafkaParams.put("ssl.key.password", "test1234");
+{% endhighlight %}
 </div>
 </div>
 
diff --git a/docs/structured-streaming-kafka-integration.md b/docs/structured-streaming-kafka-integration.md
index a6c3b3a9024d8..c4c9fb3f7d3db 100644
--- a/docs/structured-streaming-kafka-integration.md
+++ b/docs/structured-streaming-kafka-integration.md
@@ -19,97 +19,103 @@ application. See the [Deploying](#deploying) subsection below.
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
+{% highlight scala %}
 
-    // Subscribe to 1 topic
-    val ds1 = spark
-      .readStream
-      .format("kafka")
-      .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-      .option("subscribe", "topic1")
-      .load()
-    ds1.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
-      .as[(String, String)]
+// Subscribe to 1 topic
+val ds1 = spark
+  .readStream
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribe", "topic1")
+  .load()
+ds1.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+  .as[(String, String)]
 
-    // Subscribe to multiple topics
-    val ds2 = spark
-      .readStream
-      .format("kafka")
-      .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-      .option("subscribe", "topic1,topic2")
-      .load()
-    ds2.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
-      .as[(String, String)]
+// Subscribe to multiple topics
+val ds2 = spark
+  .readStream
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribe", "topic1,topic2")
+  .load()
+ds2.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+  .as[(String, String)]
 
-    // Subscribe to a pattern
-    val ds3 = spark
-      .readStream
-      .format("kafka")
-      .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-      .option("subscribePattern", "topic.*")
-      .load()
-    ds3.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
-      .as[(String, String)]
+// Subscribe to a pattern
+val ds3 = spark
+  .readStream
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribePattern", "topic.*")
+  .load()
+ds3.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+  .as[(String, String)]
 
+{% endhighlight %}
 </div>
 <div data-lang="java" markdown="1">
+{% highlight java %}
 
-    // Subscribe to 1 topic
-    Dataset<Row> ds1 = spark
-      .readStream()
-      .format("kafka")
-      .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-      .option("subscribe", "topic1")
-      .load()
-    ds1.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+// Subscribe to 1 topic
+Dataset<Row> ds1 = spark
+  .readStream()
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribe", "topic1")
+  .load()
+ds1.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
 
-    // Subscribe to multiple topics
-    Dataset<Row> ds2 = spark
-      .readStream()
-      .format("kafka")
-      .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-      .option("subscribe", "topic1,topic2")
-      .load()
-    ds2.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+// Subscribe to multiple topics
+Dataset<Row> ds2 = spark
+  .readStream()
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribe", "topic1,topic2")
+  .load()
+ds2.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
 
-    // Subscribe to a pattern
-    Dataset<Row> ds3 = spark
-      .readStream()
-      .format("kafka")
-      .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-      .option("subscribePattern", "topic.*")
-      .load()
-    ds3.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+// Subscribe to a pattern
+Dataset<Row> ds3 = spark
+  .readStream()
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribePattern", "topic.*")
+  .load()
+ds3.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
 
+{% endhighlight %}
 </div>
 <div data-lang="python" markdown="1">
+{% highlight python %}
 
-    # Subscribe to 1 topic
-    ds1 = spark
-      .readStream()
-      .format("kafka")
-      .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-      .option("subscribe", "topic1")
-      .load()
-    ds1.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+# Subscribe to 1 topic
+ds1 = spark
+  .readStream()
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribe", "topic1")
+  .load()
+ds1.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
 
-    # Subscribe to multiple topics
-    ds2 = spark
-      .readStream
-      .format("kafka")
-      .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-      .option("subscribe", "topic1,topic2")
-      .load()
-    ds2.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+# Subscribe to multiple topics
+ds2 = spark
+  .readStream
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribe", "topic1,topic2")
+  .load()
+ds2.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
 
-    # Subscribe to a pattern
-    ds3 = spark
-      .readStream()
-      .format("kafka")
-      .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-      .option("subscribePattern", "topic.*")
-      .load()
-    ds3.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+# Subscribe to a pattern
+ds3 = spark
+  .readStream()
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribePattern", "topic.*")
+  .load()
+ds3.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
 
+{% endhighlight %}
 </div>
 </div>
 

From 176afa5e8b207e28a16e1b22280ed05c10b7b486 Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Wed, 2 Nov 2016 09:39:15 +0000
Subject: [PATCH 0007/1204] [SPARK-18076][CORE][SQL] Fix default Locale used in
 DateFormat, NumberFormat to Locale.US

## What changes were proposed in this pull request?

Fix `Locale.US` for all usages of `DateFormat`, `NumberFormat`
## How was this patch tested?

Existing tests.

Author: Sean Owen <sowen@cloudera.com>

Closes #15610 from srowen/SPARK-18076.

(cherry picked from commit 9c8deef64efee20a0ddc9b612f90e77c80aede60)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../org/apache/spark/SparkHadoopWriter.scala  |  8 +++----
 .../apache/spark/deploy/SparkHadoopUtil.scala |  4 ++--
 .../apache/spark/deploy/master/Master.scala   |  5 ++--
 .../apache/spark/deploy/worker/Worker.scala   |  4 ++--
 .../org/apache/spark/rdd/HadoopRDD.scala      |  5 ++--
 .../org/apache/spark/rdd/NewHadoopRDD.scala   |  4 ++--
 .../apache/spark/rdd/PairRDDFunctions.scala   |  4 ++--
 .../status/api/v1/JacksonMessageWriter.scala  |  4 ++--
 .../spark/status/api/v1/SimpleDateParam.scala |  6 ++---
 .../scala/org/apache/spark/ui/UIUtils.scala   |  3 ++-
 .../spark/util/logging/RollingPolicy.scala    |  6 ++---
 .../org/apache/spark/util/UtilsSuite.scala    |  2 +-
 .../deploy/rest/mesos/MesosRestServer.scala   | 11 ++++-----
 .../mllib/pmml/export/PMMLModelExport.scala   |  4 ++--
 .../expressions/datetimeExpressions.scala     | 17 ++++++-------
 .../expressions/stringExpressions.scala       |  2 +-
 .../spark/sql/catalyst/json/JSONOptions.scala |  6 +++--
 .../sql/catalyst/util/DateTimeUtils.scala     |  6 ++---
 .../expressions/DateExpressionsSuite.scala    | 24 +++++++++----------
 .../catalyst/util/DateTimeUtilsSuite.scala    |  6 ++---
 .../datasources/csv/CSVInferSchema.scala      |  4 ++--
 .../datasources/csv/CSVOptions.scala          |  5 ++--
 .../sql/execution/metric/SQLMetrics.scala     |  2 +-
 .../sql/execution/streaming/socket.scala      |  4 ++--
 .../apache/spark/sql/DateFunctionsSuite.scala | 11 +++++----
 .../execution/datasources/csv/CSVSuite.scala  |  9 +++----
 .../datasources/csv/CSVTypeCastSuite.scala    |  9 ++++---
 .../hive/execution/InsertIntoHiveTable.scala  |  9 +++----
 .../spark/sql/hive/hiveWriterContainers.scala |  4 ++--
 .../sql/sources/SimpleTextRelation.scala      |  3 ++-
 .../apache/spark/streaming/ui/UIUtils.scala   |  8 ++++---
 31 files changed, 103 insertions(+), 96 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala b/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala
index 6550d703bc860..7f75a393bf8ff 100644
--- a/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala
+++ b/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala
@@ -20,7 +20,7 @@ package org.apache.spark
 import java.io.IOException
 import java.text.NumberFormat
 import java.text.SimpleDateFormat
-import java.util.Date
+import java.util.{Date, Locale}
 
 import org.apache.hadoop.fs.FileSystem
 import org.apache.hadoop.fs.Path
@@ -67,12 +67,12 @@ class SparkHadoopWriter(jobConf: JobConf) extends Logging with Serializable {
 
   def setup(jobid: Int, splitid: Int, attemptid: Int) {
     setIDs(jobid, splitid, attemptid)
-    HadoopRDD.addLocalConfiguration(new SimpleDateFormat("yyyyMMddHHmmss").format(now),
+    HadoopRDD.addLocalConfiguration(new SimpleDateFormat("yyyyMMddHHmmss", Locale.US).format(now),
       jobid, splitID, attemptID, conf.value)
   }
 
   def open() {
-    val numfmt = NumberFormat.getInstance()
+    val numfmt = NumberFormat.getInstance(Locale.US)
     numfmt.setMinimumIntegerDigits(5)
     numfmt.setGroupingUsed(false)
 
@@ -162,7 +162,7 @@ class SparkHadoopWriter(jobConf: JobConf) extends Logging with Serializable {
 private[spark]
 object SparkHadoopWriter {
   def createJobID(time: Date, id: Int): JobID = {
-    val formatter = new SimpleDateFormat("yyyyMMddHHmmss")
+    val formatter = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US)
     val jobtrackerID = formatter.format(time)
     new JobID(jobtrackerID, id)
   }
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index 3f54ecc17ac33..23156072c3ebe 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -21,7 +21,7 @@ import java.io.IOException
 import java.lang.reflect.Method
 import java.security.PrivilegedExceptionAction
 import java.text.DateFormat
-import java.util.{Arrays, Comparator, Date}
+import java.util.{Arrays, Comparator, Date, Locale}
 
 import scala.collection.JavaConverters._
 import scala.util.control.NonFatal
@@ -357,7 +357,7 @@ class SparkHadoopUtil extends Logging {
    * @return a printable string value.
    */
   private[spark] def tokenToString(token: Token[_ <: TokenIdentifier]): String = {
-    val df = DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.SHORT)
+    val df = DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.SHORT, Locale.US)
     val buffer = new StringBuilder(128)
     buffer.append(token.toString)
     try {
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index 8c91aa15167c4..4618e6117a4fb 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.deploy.master
 
 import java.text.SimpleDateFormat
-import java.util.Date
+import java.util.{Date, Locale}
 import java.util.concurrent.{ScheduledFuture, TimeUnit}
 
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
@@ -51,7 +51,8 @@ private[deploy] class Master(
 
   private val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
 
-  private def createDateFormat = new SimpleDateFormat("yyyyMMddHHmmss") // For application IDs
+  // For application IDs
+  private def createDateFormat = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US)
 
   private val WORKER_TIMEOUT_MS = conf.getLong("spark.worker.timeout", 60) * 1000
   private val RETAINED_APPLICATIONS = conf.getInt("spark.deploy.retainedApplications", 200)
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 0bedd9a20a969..8b1c6bf2e5fd5 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -20,7 +20,7 @@ package org.apache.spark.deploy.worker
 import java.io.File
 import java.io.IOException
 import java.text.SimpleDateFormat
-import java.util.{Date, UUID}
+import java.util.{Date, Locale, UUID}
 import java.util.concurrent._
 import java.util.concurrent.{Future => JFuture, ScheduledFuture => JScheduledFuture}
 
@@ -68,7 +68,7 @@ private[deploy] class Worker(
     ThreadUtils.newDaemonSingleThreadExecutor("worker-cleanup-thread"))
 
   // For worker and executor IDs
-  private def createDateFormat = new SimpleDateFormat("yyyyMMddHHmmss")
+  private def createDateFormat = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US)
   // Send a heartbeat every (heartbeat timeout) / 4 milliseconds
   private val HEARTBEAT_MILLIS = conf.getLong("spark.worker.timeout", 60) * 1000 / 4
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index e1cf3938de098..36a2f5c87e372 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -19,7 +19,7 @@ package org.apache.spark.rdd
 
 import java.io.IOException
 import java.text.SimpleDateFormat
-import java.util.Date
+import java.util.{Date, Locale}
 
 import scala.collection.immutable.Map
 import scala.reflect.ClassTag
@@ -243,7 +243,8 @@ class HadoopRDD[K, V](
 
       var reader: RecordReader[K, V] = null
       val inputFormat = getInputFormat(jobConf)
-      HadoopRDD.addLocalConfiguration(new SimpleDateFormat("yyyyMMddHHmmss").format(createTime),
+      HadoopRDD.addLocalConfiguration(
+        new SimpleDateFormat("yyyyMMddHHmmss", Locale.US).format(createTime),
         context.stageId, theSplit.index, context.attemptNumber, jobConf)
       reader = inputFormat.getRecordReader(split.inputSplit.value, jobConf, Reporter.NULL)
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
index baf31fb658870..488e777fea371 100644
--- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
@@ -19,7 +19,7 @@ package org.apache.spark.rdd
 
 import java.io.IOException
 import java.text.SimpleDateFormat
-import java.util.Date
+import java.util.{Date, Locale}
 
 import scala.reflect.ClassTag
 
@@ -79,7 +79,7 @@ class NewHadoopRDD[K, V](
   // private val serializableConf = new SerializableWritable(_conf)
 
   private val jobTrackerId: String = {
-    val formatter = new SimpleDateFormat("yyyyMMddHHmmss")
+    val formatter = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US)
     formatter.format(new Date())
   }
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index 068f4ed8ad745..67baad1c51bca 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -19,7 +19,7 @@ package org.apache.spark.rdd
 
 import java.nio.ByteBuffer
 import java.text.SimpleDateFormat
-import java.util.{Date, HashMap => JHashMap}
+import java.util.{Date, HashMap => JHashMap, Locale}
 
 import scala.collection.{mutable, Map}
 import scala.collection.JavaConverters._
@@ -1079,7 +1079,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
     // Rename this as hadoopConf internally to avoid shadowing (see SPARK-2038).
     val hadoopConf = conf
     val job = NewAPIHadoopJob.getInstance(hadoopConf)
-    val formatter = new SimpleDateFormat("yyyyMMddHHmmss")
+    val formatter = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US)
     val jobtrackerID = formatter.format(new Date())
     val stageId = self.id
     val jobConfiguration = job.getConfiguration
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/JacksonMessageWriter.scala b/core/src/main/scala/org/apache/spark/status/api/v1/JacksonMessageWriter.scala
index f6a9f9c5573db..76af33c1a18db 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/JacksonMessageWriter.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/JacksonMessageWriter.scala
@@ -21,7 +21,7 @@ import java.lang.annotation.Annotation
 import java.lang.reflect.Type
 import java.nio.charset.StandardCharsets
 import java.text.SimpleDateFormat
-import java.util.{Calendar, SimpleTimeZone}
+import java.util.{Calendar, Locale, SimpleTimeZone}
 import javax.ws.rs.Produces
 import javax.ws.rs.core.{MediaType, MultivaluedMap}
 import javax.ws.rs.ext.{MessageBodyWriter, Provider}
@@ -86,7 +86,7 @@ private[v1] class JacksonMessageWriter extends MessageBodyWriter[Object]{
 
 private[spark] object JacksonMessageWriter {
   def makeISODateFormat: SimpleDateFormat = {
-    val iso8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'GMT'")
+    val iso8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'GMT'", Locale.US)
     val cal = Calendar.getInstance(new SimpleTimeZone(0, "GMT"))
     iso8601.setCalendar(cal)
     iso8601
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/SimpleDateParam.scala b/core/src/main/scala/org/apache/spark/status/api/v1/SimpleDateParam.scala
index 0c71cd2382225..d8d5e8958b23c 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/SimpleDateParam.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/SimpleDateParam.scala
@@ -17,7 +17,7 @@
 package org.apache.spark.status.api.v1
 
 import java.text.{ParseException, SimpleDateFormat}
-import java.util.TimeZone
+import java.util.{Locale, TimeZone}
 import javax.ws.rs.WebApplicationException
 import javax.ws.rs.core.Response
 import javax.ws.rs.core.Response.Status
@@ -25,12 +25,12 @@ import javax.ws.rs.core.Response.Status
 private[v1] class SimpleDateParam(val originalValue: String) {
 
   val timestamp: Long = {
-    val format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz")
+    val format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz", Locale.US)
     try {
       format.parse(originalValue).getTime()
     } catch {
       case _: ParseException =>
-        val gmtDay = new SimpleDateFormat("yyyy-MM-dd")
+        val gmtDay = new SimpleDateFormat("yyyy-MM-dd", Locale.US)
         gmtDay.setTimeZone(TimeZone.getTimeZone("GMT"))
         try {
           gmtDay.parse(originalValue).getTime()
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index c0d1a2220f62a..66b097aa8166d 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -36,7 +36,8 @@ private[spark] object UIUtils extends Logging {
 
   // SimpleDateFormat is not thread-safe. Don't expose it to avoid improper use.
   private val dateFormat = new ThreadLocal[SimpleDateFormat]() {
-    override def initialValue(): SimpleDateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
+    override def initialValue(): SimpleDateFormat =
+      new SimpleDateFormat("yyyy/MM/dd HH:mm:ss", Locale.US)
   }
 
   def formatDate(date: Date): String = dateFormat.get.format(date)
diff --git a/core/src/main/scala/org/apache/spark/util/logging/RollingPolicy.scala b/core/src/main/scala/org/apache/spark/util/logging/RollingPolicy.scala
index 5c4238c0381a1..1f263df57c857 100644
--- a/core/src/main/scala/org/apache/spark/util/logging/RollingPolicy.scala
+++ b/core/src/main/scala/org/apache/spark/util/logging/RollingPolicy.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.util.logging
 
 import java.text.SimpleDateFormat
-import java.util.Calendar
+import java.util.{Calendar, Locale}
 
 import org.apache.spark.internal.Logging
 
@@ -59,7 +59,7 @@ private[spark] class TimeBasedRollingPolicy(
   }
 
   @volatile private var nextRolloverTime = calculateNextRolloverTime()
-  private val formatter = new SimpleDateFormat(rollingFileSuffixPattern)
+  private val formatter = new SimpleDateFormat(rollingFileSuffixPattern, Locale.US)
 
   /** Should rollover if current time has exceeded next rollover time */
   def shouldRollover(bytesToBeWritten: Long): Boolean = {
@@ -109,7 +109,7 @@ private[spark] class SizeBasedRollingPolicy(
   }
 
   @volatile private var bytesWrittenSinceRollover = 0L
-  val formatter = new SimpleDateFormat("--yyyy-MM-dd--HH-mm-ss--SSSS")
+  val formatter = new SimpleDateFormat("--yyyy-MM-dd--HH-mm-ss--SSSS", Locale.US)
 
   /** Should rollover if the next set of bytes is going to exceed the size limit */
   def shouldRollover(bytesToBeWritten: Long): Boolean = {
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 15ef32f21d90c..feacfb7642f27 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -264,7 +264,7 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
     val hour = minute * 60
     def str: (Long) => String = Utils.msDurationToString(_)
 
-    val sep = new DecimalFormatSymbols(Locale.getDefault()).getDecimalSeparator()
+    val sep = new DecimalFormatSymbols(Locale.US).getDecimalSeparator
 
     assert(str(123) === "123 ms")
     assert(str(second) === "1" + sep + "0 s")
diff --git a/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala b/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala
index 3b96488a129a9..ff60b88c6d533 100644
--- a/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala
+++ b/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala
@@ -19,7 +19,7 @@ package org.apache.spark.deploy.rest.mesos
 
 import java.io.File
 import java.text.SimpleDateFormat
-import java.util.Date
+import java.util.{Date, Locale}
 import java.util.concurrent.atomic.AtomicLong
 import javax.servlet.http.HttpServletResponse
 
@@ -62,11 +62,10 @@ private[mesos] class MesosSubmitRequestServlet(
   private val DEFAULT_CORES = 1.0
 
   private val nextDriverNumber = new AtomicLong(0)
-  private def createDateFormat = new SimpleDateFormat("yyyyMMddHHmmss")  // For application IDs
-  private def newDriverId(submitDate: Date): String = {
-    "driver-%s-%04d".format(
-      createDateFormat.format(submitDate), nextDriverNumber.incrementAndGet())
-  }
+  // For application IDs
+  private def createDateFormat = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US)
+  private def newDriverId(submitDate: Date): String =
+    f"driver-${createDateFormat.format(submitDate)}-${nextDriverNumber.incrementAndGet()}%04d"
 
   /**
    * Build a driver description from the fields specified in the submit request.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExport.scala b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExport.scala
index 426bb818c9266..f5ca1c221d66b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExport.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExport.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.mllib.pmml.export
 
 import java.text.SimpleDateFormat
-import java.util.Date
+import java.util.{Date, Locale}
 
 import scala.beans.BeanProperty
 
@@ -34,7 +34,7 @@ private[mllib] trait PMMLModelExport {
     val version = getClass.getPackage.getImplementationVersion
     val app = new Application("Apache Spark MLlib").setVersion(version)
     val timestamp = new Timestamp()
-      .addContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss").format(new Date()))
+      .addContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.US).format(new Date()))
     val header = new Header()
       .setApplication(app)
       .setTimestamp(timestamp)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 7ab68a13e09cf..67c078ae5e264 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import java.text.SimpleDateFormat
-import java.util.{Calendar, TimeZone}
+import java.util.{Calendar, Locale, TimeZone}
 
 import scala.util.Try
 
@@ -331,7 +331,7 @@ case class DateFormatClass(left: Expression, right: Expression) extends BinaryEx
   override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, StringType)
 
   override protected def nullSafeEval(timestamp: Any, format: Any): Any = {
-    val sdf = new SimpleDateFormat(format.toString)
+    val sdf = new SimpleDateFormat(format.toString, Locale.US)
     UTF8String.fromString(sdf.format(new java.util.Date(timestamp.asInstanceOf[Long] / 1000)))
   }
 
@@ -400,7 +400,7 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes {
 
   private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String]
   private lazy val formatter: SimpleDateFormat =
-    Try(new SimpleDateFormat(constFormat.toString)).getOrElse(null)
+    Try(new SimpleDateFormat(constFormat.toString, Locale.US)).getOrElse(null)
 
   override def eval(input: InternalRow): Any = {
     val t = left.eval(input)
@@ -425,7 +425,7 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes {
             null
           } else {
             val formatString = f.asInstanceOf[UTF8String].toString
-            Try(new SimpleDateFormat(formatString).parse(
+            Try(new SimpleDateFormat(formatString, Locale.US).parse(
               t.asInstanceOf[UTF8String].toString).getTime / 1000L).getOrElse(null)
           }
       }
@@ -520,7 +520,7 @@ case class FromUnixTime(sec: Expression, format: Expression)
 
   private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String]
   private lazy val formatter: SimpleDateFormat =
-    Try(new SimpleDateFormat(constFormat.toString)).getOrElse(null)
+    Try(new SimpleDateFormat(constFormat.toString, Locale.US)).getOrElse(null)
 
   override def eval(input: InternalRow): Any = {
     val time = left.eval(input)
@@ -539,9 +539,10 @@ case class FromUnixTime(sec: Expression, format: Expression)
         if (f == null) {
           null
         } else {
-          Try(UTF8String.fromString(new SimpleDateFormat(
-            f.asInstanceOf[UTF8String].toString).format(new java.util.Date(
-              time.asInstanceOf[Long] * 1000L)))).getOrElse(null)
+          Try(
+            UTF8String.fromString(new SimpleDateFormat(f.toString, Locale.US).
+              format(new java.util.Date(time.asInstanceOf[Long] * 1000L)))
+          ).getOrElse(null)
         }
       }
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 1bcbb6cfc9246..25a5e3fd7da73 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -1415,7 +1415,7 @@ case class Sentences(
       val locale = if (languageStr != null && countryStr != null) {
         new Locale(languageStr.toString, countryStr.toString)
       } else {
-        Locale.getDefault
+        Locale.US
       }
       getSentences(string.asInstanceOf[UTF8String].toString, locale)
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
index aec18922ea6c8..c45970658cf07 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.json
 
+import java.util.Locale
+
 import com.fasterxml.jackson.core.{JsonFactory, JsonParser}
 import org.apache.commons.lang3.time.FastDateFormat
 
@@ -56,11 +58,11 @@ private[sql] class JSONOptions(
 
   // Uses `FastDateFormat` which can be direct replacement for `SimpleDateFormat` and thread-safe.
   val dateFormat: FastDateFormat =
-    FastDateFormat.getInstance(parameters.getOrElse("dateFormat", "yyyy-MM-dd"))
+    FastDateFormat.getInstance(parameters.getOrElse("dateFormat", "yyyy-MM-dd"), Locale.US)
 
   val timestampFormat: FastDateFormat =
     FastDateFormat.getInstance(
-      parameters.getOrElse("timestampFormat", "yyyy-MM-dd'T'HH:mm:ss.SSSZZ"))
+      parameters.getOrElse("timestampFormat", "yyyy-MM-dd'T'HH:mm:ss.SSSZZ"), Locale.US)
 
   // Parse mode flags
   if (!ParseModes.isValidMode(parseMode)) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 0b643a5b84268..235ca8d2633a1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.util
 
 import java.sql.{Date, Timestamp}
 import java.text.{DateFormat, SimpleDateFormat}
-import java.util.{Calendar, TimeZone}
+import java.util.{Calendar, Locale, TimeZone}
 import javax.xml.bind.DatatypeConverter
 
 import scala.annotation.tailrec
@@ -79,14 +79,14 @@ object DateTimeUtils {
   // `SimpleDateFormat` is not thread-safe.
   val threadLocalTimestampFormat = new ThreadLocal[DateFormat] {
     override def initialValue(): SimpleDateFormat = {
-      new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
+      new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)
     }
   }
 
   // `SimpleDateFormat` is not thread-safe.
   private val threadLocalDateFormat = new ThreadLocal[DateFormat] {
     override def initialValue(): SimpleDateFormat = {
-      new SimpleDateFormat("yyyy-MM-dd")
+      new SimpleDateFormat("yyyy-MM-dd", Locale.US)
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index 6118a34d29eaa..35cea25ba0b7d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import java.sql.{Date, Timestamp}
 import java.text.SimpleDateFormat
-import java.util.Calendar
+import java.util.{Calendar, Locale}
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
@@ -30,8 +30,8 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   import IntegralLiteralTestUtils._
 
-  val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
-  val sdfDate = new SimpleDateFormat("yyyy-MM-dd")
+  val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)
+  val sdfDate = new SimpleDateFormat("yyyy-MM-dd", Locale.US)
   val d = new Date(sdf.parse("2015-04-08 13:10:15").getTime)
   val ts = new Timestamp(sdf.parse("2013-11-08 13:10:15").getTime)
 
@@ -49,7 +49,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("DayOfYear") {
-    val sdfDay = new SimpleDateFormat("D")
+    val sdfDay = new SimpleDateFormat("D", Locale.US)
     (0 to 3).foreach { m =>
       (0 to 5).foreach { i =>
         val c = Calendar.getInstance()
@@ -411,9 +411,9 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("from_unixtime") {
-    val sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
+    val sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)
     val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS"
-    val sdf2 = new SimpleDateFormat(fmt2)
+    val sdf2 = new SimpleDateFormat(fmt2, Locale.US)
     checkEvaluation(
       FromUnixTime(Literal(0L), Literal("yyyy-MM-dd HH:mm:ss")), sdf1.format(new Timestamp(0)))
     checkEvaluation(FromUnixTime(
@@ -430,11 +430,11 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("unix_timestamp") {
-    val sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
+    val sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)
     val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS"
-    val sdf2 = new SimpleDateFormat(fmt2)
+    val sdf2 = new SimpleDateFormat(fmt2, Locale.US)
     val fmt3 = "yy-MM-dd"
-    val sdf3 = new SimpleDateFormat(fmt3)
+    val sdf3 = new SimpleDateFormat(fmt3, Locale.US)
     val date1 = Date.valueOf("2015-07-24")
     checkEvaluation(
       UnixTimestamp(Literal(sdf1.format(new Timestamp(0))), Literal("yyyy-MM-dd HH:mm:ss")), 0L)
@@ -466,11 +466,11 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("to_unix_timestamp") {
-    val sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
+    val sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)
     val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS"
-    val sdf2 = new SimpleDateFormat(fmt2)
+    val sdf2 = new SimpleDateFormat(fmt2, Locale.US)
     val fmt3 = "yy-MM-dd"
-    val sdf3 = new SimpleDateFormat(fmt3)
+    val sdf3 = new SimpleDateFormat(fmt3, Locale.US)
     val date1 = Date.valueOf("2015-07-24")
     checkEvaluation(
       ToUnixTimestamp(Literal(sdf1.format(new Timestamp(0))), Literal("yyyy-MM-dd HH:mm:ss")), 0L)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 4f516d006458e..e0a9a0c3d5c00 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.util
 
 import java.sql.{Date, Timestamp}
 import java.text.SimpleDateFormat
-import java.util.{Calendar, TimeZone}
+import java.util.{Calendar, Locale, TimeZone}
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.util.DateTimeUtils._
@@ -68,8 +68,8 @@ class DateTimeUtilsSuite extends SparkFunSuite {
       assert(d2.toString === d1.toString)
     }
 
-    val df1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
-    val df2 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss z")
+    val df1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)
+    val df2 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss z", Locale.US)
 
     checkFromToJavaDate(new Date(100))
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala
index 3ab775c909238..1981d8607c0c6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala
@@ -247,7 +247,7 @@ private[csv] object CSVTypeCast {
             case options.positiveInf => Float.PositiveInfinity
             case _ =>
               Try(datum.toFloat)
-                .getOrElse(NumberFormat.getInstance(Locale.getDefault).parse(datum).floatValue())
+                .getOrElse(NumberFormat.getInstance(Locale.US).parse(datum).floatValue())
           }
         case _: DoubleType =>
           datum match {
@@ -256,7 +256,7 @@ private[csv] object CSVTypeCast {
             case options.positiveInf => Double.PositiveInfinity
             case _ =>
               Try(datum.toDouble)
-                .getOrElse(NumberFormat.getInstance(Locale.getDefault).parse(datum).doubleValue())
+                .getOrElse(NumberFormat.getInstance(Locale.US).parse(datum).doubleValue())
           }
         case _: BooleanType => datum.toBoolean
         case dt: DecimalType =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
index 014614eb997a5..5903729c11fc5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.execution.datasources.csv
 
 import java.nio.charset.StandardCharsets
+import java.util.Locale
 
 import org.apache.commons.lang3.time.FastDateFormat
 
@@ -104,11 +105,11 @@ private[csv] class CSVOptions(@transient private val parameters: Map[String, Str
 
   // Uses `FastDateFormat` which can be direct replacement for `SimpleDateFormat` and thread-safe.
   val dateFormat: FastDateFormat =
-    FastDateFormat.getInstance(parameters.getOrElse("dateFormat", "yyyy-MM-dd"))
+    FastDateFormat.getInstance(parameters.getOrElse("dateFormat", "yyyy-MM-dd"), Locale.US)
 
   val timestampFormat: FastDateFormat =
     FastDateFormat.getInstance(
-      parameters.getOrElse("timestampFormat", "yyyy-MM-dd'T'HH:mm:ss.SSSZZ"))
+      parameters.getOrElse("timestampFormat", "yyyy-MM-dd'T'HH:mm:ss.SSSZZ"), Locale.US)
 
   val maxColumns = getInt("maxColumns", 20480)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
index 0cc1edd196bc8..dbc27d8b237f3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
@@ -102,7 +102,7 @@ object SQLMetrics {
    */
   def stringValue(metricsType: String, values: Seq[Long]): String = {
     if (metricsType == SUM_METRIC) {
-      val numberFormat = NumberFormat.getIntegerInstance(Locale.ENGLISH)
+      val numberFormat = NumberFormat.getIntegerInstance(Locale.US)
       numberFormat.format(values.sum)
     } else {
       val strFormat: Long => String = if (metricsType == SIZE_METRIC) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/socket.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/socket.scala
index c662e7c6bc775..042977f870b8e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/socket.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/socket.scala
@@ -21,7 +21,7 @@ import java.io.{BufferedReader, InputStreamReader, IOException}
 import java.net.Socket
 import java.sql.Timestamp
 import java.text.SimpleDateFormat
-import java.util.Calendar
+import java.util.{Calendar, Locale}
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.mutable.ListBuffer
@@ -37,7 +37,7 @@ object TextSocketSource {
   val SCHEMA_REGULAR = StructType(StructField("value", StringType) :: Nil)
   val SCHEMA_TIMESTAMP = StructType(StructField("value", StringType) ::
     StructField("timestamp", TimestampType) :: Nil)
-  val DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
+  val DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)
 }
 
 /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
index f7aa3b747ae5d..e05b2252ee346 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql
 
 import java.sql.{Date, Timestamp}
 import java.text.SimpleDateFormat
+import java.util.Locale
 
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.functions._
@@ -55,8 +56,8 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
     checkAnswer(sql("""SELECT CURRENT_TIMESTAMP() = NOW()"""), Row(true))
   }
 
-  val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
-  val sdfDate = new SimpleDateFormat("yyyy-MM-dd")
+  val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)
+  val sdfDate = new SimpleDateFormat("yyyy-MM-dd", Locale.US)
   val d = new Date(sdf.parse("2015-04-08 13:10:15").getTime)
   val ts = new Timestamp(sdf.parse("2013-04-08 13:10:15").getTime)
 
@@ -395,11 +396,11 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext {
   }
 
   test("from_unixtime") {
-    val sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
+    val sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)
     val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS"
-    val sdf2 = new SimpleDateFormat(fmt2)
+    val sdf2 = new SimpleDateFormat(fmt2, Locale.US)
     val fmt3 = "yy-MM-dd HH-mm-ss"
-    val sdf3 = new SimpleDateFormat(fmt3)
+    val sdf3 = new SimpleDateFormat(fmt3, Locale.US)
     val df = Seq((1000, "yyyy-MM-dd HH:mm:ss.SSS"), (-1000, "yy-MM-dd HH-mm-ss")).toDF("a", "b")
     checkAnswer(
       df.select(from_unixtime(col("a"))),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index f7c22c6c93f7a..8209b5bd7f9de 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -21,6 +21,7 @@ import java.io.File
 import java.nio.charset.UnsupportedCharsetException
 import java.sql.{Date, Timestamp}
 import java.text.SimpleDateFormat
+import java.util.Locale
 
 import org.apache.commons.lang3.time.FastDateFormat
 import org.apache.hadoop.io.SequenceFile.CompressionType
@@ -487,7 +488,7 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
       .select("date")
       .collect()
 
-    val dateFormat = new SimpleDateFormat("dd/MM/yyyy HH:mm")
+    val dateFormat = new SimpleDateFormat("dd/MM/yyyy HH:mm", Locale.US)
     val expected =
       Seq(Seq(new Timestamp(dateFormat.parse("26/08/2015 18:00").getTime)),
         Seq(new Timestamp(dateFormat.parse("27/10/2014 18:30").getTime)),
@@ -509,7 +510,7 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
       .select("date")
       .collect()
 
-    val dateFormat = new SimpleDateFormat("dd/MM/yyyy hh:mm")
+    val dateFormat = new SimpleDateFormat("dd/MM/yyyy hh:mm", Locale.US)
     val expected = Seq(
       new Date(dateFormat.parse("26/08/2015 18:00").getTime),
       new Date(dateFormat.parse("27/10/2014 18:30").getTime),
@@ -728,7 +729,7 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
         .option("inferSchema", "false")
         .load(iso8601timestampsPath)
 
-      val iso8501 = FastDateFormat.getInstance("yyyy-MM-dd'T'HH:mm:ss.SSSZZ")
+      val iso8501 = FastDateFormat.getInstance("yyyy-MM-dd'T'HH:mm:ss.SSSZZ", Locale.US)
       val expectedTimestamps = timestamps.collect().map { r =>
         // This should be ISO8601 formatted string.
         Row(iso8501.format(r.toSeq.head))
@@ -761,7 +762,7 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
         .option("inferSchema", "false")
         .load(iso8601datesPath)
 
-      val iso8501 = FastDateFormat.getInstance("yyyy-MM-dd")
+      val iso8501 = FastDateFormat.getInstance("yyyy-MM-dd", Locale.US)
       val expectedDates = dates.collect().map { r =>
         // This should be ISO8601 formatted string.
         Row(iso8501.format(r.toSeq.head))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVTypeCastSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVTypeCastSuite.scala
index 51832a13cfe0b..c74406b9cbfbb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVTypeCastSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVTypeCastSuite.scala
@@ -144,13 +144,12 @@ class CSVTypeCastSuite extends SparkFunSuite {
       DateTimeUtils.millisToDays(DateTimeUtils.stringToTime("2015-01-01").getTime))
   }
 
-  test("Float and Double Types are cast correctly with Locale") {
+  test("Float and Double Types are cast without respect to platform default Locale") {
     val originalLocale = Locale.getDefault
     try {
-      val locale : Locale = new Locale("fr", "FR")
-      Locale.setDefault(locale)
-      assert(CSVTypeCast.castTo("1,00", FloatType) == 1.0)
-      assert(CSVTypeCast.castTo("1,00", DoubleType) == 1.0)
+      Locale.setDefault(new Locale("fr", "FR"))
+      assert(CSVTypeCast.castTo("1,00", FloatType) == 100.0) // Would parse as 1.0 in fr-FR
+      assert(CSVTypeCast.castTo("1,00", DoubleType) == 100.0)
     } finally {
       Locale.setDefault(originalLocale)
     }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index 2843100fb3b36..05164d774ccaf 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -20,9 +20,7 @@ package org.apache.spark.sql.hive.execution
 import java.io.IOException
 import java.net.URI
 import java.text.SimpleDateFormat
-import java.util.{Date, Random}
-
-import scala.collection.JavaConverters._
+import java.util.{Date, Locale, Random}
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, Path}
@@ -60,9 +58,8 @@ case class InsertIntoHiveTable(
 
   private def executionId: String = {
     val rand: Random = new Random
-    val format: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd_HH-mm-ss_SSS")
-    val executionId: String = "hive_" + format.format(new Date) + "_" + Math.abs(rand.nextLong)
-    return executionId
+    val format = new SimpleDateFormat("yyyy-MM-dd_HH-mm-ss_SSS", Locale.US)
+    "hive_" + format.format(new Date) + "_" + Math.abs(rand.nextLong)
   }
 
   private def getStagingDir(inputPath: Path, hadoopConf: Configuration): Path = {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala
index ea88276bb96c0..e53c3e4d4833b 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.hive
 
 import java.text.NumberFormat
-import java.util.Date
+import java.util.{Date, Locale}
 
 import scala.collection.JavaConverters._
 
@@ -95,7 +95,7 @@ private[hive] class SparkHiveWriterContainer(
   }
 
   protected def getOutputName: String = {
-    val numberFormat = NumberFormat.getInstance()
+    val numberFormat = NumberFormat.getInstance(Locale.US)
     numberFormat.setMinimumIntegerDigits(5)
     numberFormat.setGroupingUsed(false)
     val extension = Utilities.getFileExtension(conf.value, fileSinkConf.getCompressed, outputFormat)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala
index 64d0ecbeefc98..cecfd99098659 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.sources
 
 import java.text.NumberFormat
+import java.util.Locale
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, Path}
@@ -141,7 +142,7 @@ class SimpleTextOutputWriter(path: String, context: TaskAttemptContext)
 
 class AppendingTextOutputFormat(path: String) extends TextOutputFormat[NullWritable, Text] {
 
-  val numberFormat = NumberFormat.getInstance()
+  val numberFormat = NumberFormat.getInstance(Locale.US)
   numberFormat.setMinimumIntegerDigits(5)
   numberFormat.setGroupingUsed(false)
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala
index 9b1c939e9329f..84ecf81abfbf1 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.streaming.ui
 
 import java.text.SimpleDateFormat
-import java.util.TimeZone
+import java.util.{Locale, TimeZone}
 import java.util.concurrent.TimeUnit
 
 import scala.xml.Node
@@ -80,11 +80,13 @@ private[streaming] object UIUtils {
 
   // SimpleDateFormat is not thread-safe. Don't expose it to avoid improper use.
   private val batchTimeFormat = new ThreadLocal[SimpleDateFormat]() {
-    override def initialValue(): SimpleDateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
+    override def initialValue(): SimpleDateFormat =
+      new SimpleDateFormat("yyyy/MM/dd HH:mm:ss", Locale.US)
   }
 
   private val batchTimeFormatWithMilliseconds = new ThreadLocal[SimpleDateFormat]() {
-    override def initialValue(): SimpleDateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss.SSS")
+    override def initialValue(): SimpleDateFormat =
+      new SimpleDateFormat("yyyy/MM/dd HH:mm:ss.SSS", Locale.US)
   }
 
   /**

From 41491e54080742f6e4a1e80a72cd9f46a9336e31 Mon Sep 17 00:00:00 2001
From: eyal farago <eyal farago>
Date: Wed, 2 Nov 2016 11:12:20 +0100
Subject: [PATCH 0008/1204] [SPARK-16839][SQL] Simplify Struct creation code
 path

## What changes were proposed in this pull request?

Simplify struct creation, especially the aspect of `CleanupAliases` which missed some aliases when handling trees created by `CreateStruct`.

This PR includes:

1. A failing test (create struct with nested aliases, some of the aliases survive `CleanupAliases`).
2. A fix that transforms `CreateStruct` into a `CreateNamedStruct` constructor, effectively eliminating `CreateStruct` from all expression trees.
3. A `NamePlaceHolder` used by `CreateStruct` when column names cannot be extracted from unresolved `NamedExpression`.
4. A new Analyzer rule that resolves `NamePlaceHolder` into a string literal once the `NamedExpression` is resolved.
5. `CleanupAliases` code was simplified as it no longer has to deal with `CreateStruct`'s top level columns.

## How was this patch tested?
Running all tests-suits in package org.apache.spark.sql, especially including the analysis suite, making sure added test initially fails, after applying suggested fix rerun the entire analysis package successfully.

Modified few tests that expected `CreateStruct` which is now transformed into `CreateNamedStruct`.

Author: eyal farago <eyal farago>
Author: Herman van Hovell <hvanhovell@databricks.com>
Author: eyal farago <eyal.farago@gmail.com>
Author: Eyal Farago <eyal.farago@actimize.com>
Author: Hyukjin Kwon <gurwls223@gmail.com>
Author: eyalfa <eyal.farago@gmail.com>

Closes #15718 from hvanhovell/SPARK-16839-2.

(cherry picked from commit f151bd1af8a05d4b6c901ebe6ac0b51a4a1a20df)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 R/pkg/inst/tests/testthat/test_sparkSQL.R     |  12 +-
 .../sql/catalyst/analysis/Analyzer.scala      |  53 ++---
 .../catalyst/analysis/FunctionRegistry.scala  |   2 +-
 .../sql/catalyst/expressions/Projection.scala |   2 -
 .../expressions/complexTypeCreator.scala      | 212 ++++++------------
 .../sql/catalyst/parser/AstBuilder.scala      |   4 +-
 .../sql/catalyst/analysis/AnalysisSuite.scala |  38 +++-
 .../expressions/ComplexTypeSuite.scala        |   1 -
 .../scala/org/apache/spark/sql/Column.scala   |   3 +
 .../command/AnalyzeColumnCommand.scala        |   4 +-
 .../sql-tests/results/group-by.sql.out        |   2 +-
 .../apache/spark/sql/hive/test/TestHive.scala |  20 +-
 .../resources/sqlgen/subquery_in_having_2.sql |   2 +-
 .../sql/catalyst/LogicalPlanToSQLSuite.scala  |  12 +-
 14 files changed, 169 insertions(+), 198 deletions(-)

diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 806019d7524ff..d7fe6b32822a7 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1222,16 +1222,16 @@ test_that("column functions", {
   # Test struct()
   df <- createDataFrame(list(list(1L, 2L, 3L), list(4L, 5L, 6L)),
                         schema = c("a", "b", "c"))
-  result <- collect(select(df, struct("a", "c")))
+  result <- collect(select(df, alias(struct("a", "c"), "d")))
   expected <- data.frame(row.names = 1:2)
-  expected$"struct(a, c)" <- list(listToStruct(list(a = 1L, c = 3L)),
-                                 listToStruct(list(a = 4L, c = 6L)))
+  expected$"d" <- list(listToStruct(list(a = 1L, c = 3L)),
+                      listToStruct(list(a = 4L, c = 6L)))
   expect_equal(result, expected)
 
-  result <- collect(select(df, struct(df$a, df$b)))
+  result <- collect(select(df, alias(struct(df$a, df$b), "d")))
   expected <- data.frame(row.names = 1:2)
-  expected$"struct(a, b)" <- list(listToStruct(list(a = 1L, b = 2L)),
-                                 listToStruct(list(a = 4L, b = 5L)))
+  expected$"d" <- list(listToStruct(list(a = 1L, b = 2L)),
+                      listToStruct(list(a = 4L, b = 5L)))
   expect_equal(result, expected)
 
   # Test encode(), decode()
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index f8f4799322b3b..5011f2fdbf9b7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.catalyst.optimizer.BooleanSimplification
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, _}
 import org.apache.spark.sql.catalyst.rules._
-import org.apache.spark.sql.catalyst.trees.{TreeNodeRef}
+import org.apache.spark.sql.catalyst.trees.TreeNodeRef
 import org.apache.spark.sql.catalyst.util.toPrettySQL
 import org.apache.spark.sql.types._
 
@@ -83,6 +83,7 @@ class Analyzer(
       ResolveTableValuedFunctions ::
       ResolveRelations ::
       ResolveReferences ::
+      ResolveCreateNamedStruct ::
       ResolveDeserializer ::
       ResolveNewInstance ::
       ResolveUpCast ::
@@ -653,11 +654,12 @@ class Analyzer(
             case s: Star => s.expand(child, resolver)
             case o => o :: Nil
           })
-        case c: CreateStruct if containsStar(c.children) =>
-          c.copy(children = c.children.flatMap {
-            case s: Star => s.expand(child, resolver)
-            case o => o :: Nil
-          })
+        case c: CreateNamedStruct if containsStar(c.valExprs) =>
+          val newChildren = c.children.grouped(2).flatMap {
+            case Seq(k, s : Star) => CreateStruct(s.expand(child, resolver)).children
+            case kv => kv
+          }
+          c.copy(children = newChildren.toList )
         case c: CreateArray if containsStar(c.children) =>
           c.copy(children = c.children.flatMap {
             case s: Star => s.expand(child, resolver)
@@ -1141,7 +1143,7 @@ class Analyzer(
         case In(e, Seq(l @ ListQuery(_, exprId))) if e.resolved =>
           // Get the left hand side expressions.
           val expressions = e match {
-            case CreateStruct(exprs) => exprs
+            case cns : CreateNamedStruct => cns.valExprs
             case expr => Seq(expr)
           }
           resolveSubQuery(l, plans, expressions.size) { (rewrite, conditions) =>
@@ -2072,18 +2074,8 @@ object EliminateUnions extends Rule[LogicalPlan] {
  */
 object CleanupAliases extends Rule[LogicalPlan] {
   private def trimAliases(e: Expression): Expression = {
-    var stop = false
     e.transformDown {
-      // CreateStruct is a special case, we need to retain its top level Aliases as they decide the
-      // name of StructField. We also need to stop transform down this expression, or the Aliases
-      // under CreateStruct will be mistakenly trimmed.
-      case c: CreateStruct if !stop =>
-        stop = true
-        c.copy(children = c.children.map(trimNonTopLevelAliases))
-      case c: CreateStructUnsafe if !stop =>
-        stop = true
-        c.copy(children = c.children.map(trimNonTopLevelAliases))
-      case Alias(child, _) if !stop => child
+      case Alias(child, _) => child
     }
   }
 
@@ -2116,15 +2108,8 @@ object CleanupAliases extends Rule[LogicalPlan] {
     case a: AppendColumns => a
 
     case other =>
-      var stop = false
       other transformExpressionsDown {
-        case c: CreateStruct if !stop =>
-          stop = true
-          c.copy(children = c.children.map(trimNonTopLevelAliases))
-        case c: CreateStructUnsafe if !stop =>
-          stop = true
-          c.copy(children = c.children.map(trimNonTopLevelAliases))
-        case Alias(child, _) if !stop => child
+        case Alias(child, _) => child
       }
   }
 }
@@ -2217,3 +2202,19 @@ object TimeWindowing extends Rule[LogicalPlan] {
       }
   }
 }
+
+/**
+ * Resolve a [[CreateNamedStruct]] if it contains [[NamePlaceholder]]s.
+ */
+object ResolveCreateNamedStruct extends Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = plan.transformAllExpressions {
+    case e: CreateNamedStruct if !e.resolved =>
+      val children = e.children.grouped(2).flatMap {
+        case Seq(NamePlaceholder, e: NamedExpression) if e.resolved =>
+          Seq(Literal(e.name), e)
+        case kv =>
+          kv
+      }
+      CreateNamedStruct(children.toList)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 3e836ca375e2e..b028d07fb8d0c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -357,7 +357,7 @@ object FunctionRegistry {
     expression[MapValues]("map_values"),
     expression[Size]("size"),
     expression[SortArray]("sort_array"),
-    expression[CreateStruct]("struct"),
+    CreateStruct.registryEntry,
 
     // misc functions
     expression[AssertTrue]("assert_true"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
index a81fa1ce3adcc..03e054d098511 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
@@ -119,7 +119,6 @@ object UnsafeProjection {
    */
   def create(exprs: Seq[Expression]): UnsafeProjection = {
     val unsafeExprs = exprs.map(_ transform {
-      case CreateStruct(children) => CreateStructUnsafe(children)
       case CreateNamedStruct(children) => CreateNamedStructUnsafe(children)
     })
     GenerateUnsafeProjection.generate(unsafeExprs)
@@ -145,7 +144,6 @@ object UnsafeProjection {
       subexpressionEliminationEnabled: Boolean): UnsafeProjection = {
     val e = exprs.map(BindReferences.bindReference(_, inputSchema))
       .map(_ transform {
-        case CreateStruct(children) => CreateStructUnsafe(children)
         case CreateNamedStruct(children) => CreateNamedStructUnsafe(children)
     })
     GenerateUnsafeProjection.generate(e, subexpressionEliminationEnabled)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index 917aa0873130b..dbfb2996ec9d5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -18,9 +18,11 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
+import org.apache.spark.sql.catalyst.analysis.Star
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.expressions.codegen._
-import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData, MapData, TypeUtils}
+import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData, TypeUtils}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -172,101 +174,71 @@ case class CreateMap(children: Seq[Expression]) extends Expression {
 }
 
 /**
- * Returns a Row containing the evaluation of all children expressions.
+ * An expression representing a not yet available attribute name. This expression is unevaluable
+ * and as its name suggests it is a temporary place holder until we're able to determine the
+ * actual attribute name.
  */
-@ExpressionDescription(
-  usage = "_FUNC_(col1, col2, col3, ...) - Creates a struct with the given field values.")
-case class CreateStruct(children: Seq[Expression]) extends Expression {
-
-  override def foldable: Boolean = children.forall(_.foldable)
-
-  override lazy val dataType: StructType = {
-    val fields = children.zipWithIndex.map { case (child, idx) =>
-      child match {
-        case ne: NamedExpression =>
-          StructField(ne.name, ne.dataType, ne.nullable, ne.metadata)
-        case _ =>
-          StructField(s"col${idx + 1}", child.dataType, child.nullable, Metadata.empty)
-      }
-    }
-    StructType(fields)
-  }
-
+case object NamePlaceholder extends LeafExpression with Unevaluable {
+  override lazy val resolved: Boolean = false
+  override def foldable: Boolean = false
   override def nullable: Boolean = false
+  override def dataType: DataType = StringType
+  override def prettyName: String = "NamePlaceholder"
+  override def toString: String = prettyName
+}
 
-  override def eval(input: InternalRow): Any = {
-    InternalRow(children.map(_.eval(input)): _*)
+/**
+ * Returns a Row containing the evaluation of all children expressions.
+ */
+object CreateStruct extends FunctionBuilder {
+  def apply(children: Seq[Expression]): CreateNamedStruct = {
+    CreateNamedStruct(children.zipWithIndex.flatMap {
+      case (e: NamedExpression, _) if e.resolved => Seq(Literal(e.name), e)
+      case (e: NamedExpression, _) => Seq(NamePlaceholder, e)
+      case (e, index) => Seq(Literal(s"col${index + 1}"), e)
+    })
   }
 
-  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val rowClass = classOf[GenericInternalRow].getName
-    val values = ctx.freshName("values")
-    ctx.addMutableState("Object[]", values, s"this.$values = null;")
-
-    ev.copy(code = s"""
-      boolean ${ev.isNull} = false;
-      this.$values = new Object[${children.size}];""" +
-      ctx.splitExpressions(
-        ctx.INPUT_ROW,
-        children.zipWithIndex.map { case (e, i) =>
-          val eval = e.genCode(ctx)
-          eval.code + s"""
-            if (${eval.isNull}) {
-              $values[$i] = null;
-            } else {
-              $values[$i] = ${eval.value};
-            }"""
-        }) +
-      s"""
-        final InternalRow ${ev.value} = new $rowClass($values);
-        this.$values = null;
-      """)
+  /**
+   * Entry to use in the function registry.
+   */
+  val registryEntry: (String, (ExpressionInfo, FunctionBuilder)) = {
+    val info: ExpressionInfo = new ExpressionInfo(
+      "org.apache.spark.sql.catalyst.expressions.NamedStruct",
+      null,
+      "struct",
+      "_FUNC_(col1, col2, col3, ...) - Creates a struct with the given field values.",
+      "")
+    ("struct", (info, this))
   }
-
-  override def prettyName: String = "struct"
 }
 
-
 /**
- * Creates a struct with the given field names and values
- *
- * @param children Seq(name1, val1, name2, val2, ...)
+ * Common base class for both [[CreateNamedStruct]] and [[CreateNamedStructUnsafe]].
  */
-// scalastyle:off line.size.limit
-@ExpressionDescription(
-  usage = "_FUNC_(name1, val1, name2, val2, ...) - Creates a struct with the given field names and values.")
-// scalastyle:on line.size.limit
-case class CreateNamedStruct(children: Seq[Expression]) extends Expression {
+trait CreateNamedStructLike extends Expression {
+  lazy val (nameExprs, valExprs) = children.grouped(2).map {
+    case Seq(name, value) => (name, value)
+  }.toList.unzip
 
-  /**
-   * Returns Aliased [[Expression]]s that could be used to construct a flattened version of this
-   * StructType.
-   */
-  def flatten: Seq[NamedExpression] = valExprs.zip(names).map {
-    case (v, n) => Alias(v, n.toString)()
-  }
+  lazy val names = nameExprs.map(_.eval(EmptyRow))
 
-  private lazy val (nameExprs, valExprs) =
-    children.grouped(2).map { case Seq(name, value) => (name, value) }.toList.unzip
+  override def nullable: Boolean = false
 
-  private lazy val names = nameExprs.map(_.eval(EmptyRow))
+  override def foldable: Boolean = valExprs.forall(_.foldable)
 
   override lazy val dataType: StructType = {
     val fields = names.zip(valExprs).map {
-      case (name, valExpr: NamedExpression) =>
-        StructField(name.asInstanceOf[UTF8String].toString,
-          valExpr.dataType, valExpr.nullable, valExpr.metadata)
-      case (name, valExpr) =>
-        StructField(name.asInstanceOf[UTF8String].toString,
-          valExpr.dataType, valExpr.nullable, Metadata.empty)
+      case (name, expr) =>
+        val metadata = expr match {
+          case ne: NamedExpression => ne.metadata
+          case _ => Metadata.empty
+        }
+        StructField(name.toString, expr.dataType, expr.nullable, metadata)
     }
     StructType(fields)
   }
 
-  override def foldable: Boolean = valExprs.forall(_.foldable)
-
-  override def nullable: Boolean = false
-
   override def checkInputDataTypes(): TypeCheckResult = {
     if (children.size % 2 != 0) {
       TypeCheckResult.TypeCheckFailure(s"$prettyName expects an even number of arguments.")
@@ -274,8 +246,8 @@ case class CreateNamedStruct(children: Seq[Expression]) extends Expression {
       val invalidNames = nameExprs.filterNot(e => e.foldable && e.dataType == StringType)
       if (invalidNames.nonEmpty) {
         TypeCheckResult.TypeCheckFailure(
-          s"Only foldable StringType expressions are allowed to appear at odd position , got :" +
-            s" ${invalidNames.mkString(",")}")
+          "Only foldable StringType expressions are allowed to appear at odd position, got:" +
+          s" ${invalidNames.mkString(",")}")
       } else if (!names.contains(null)) {
         TypeCheckResult.TypeCheckSuccess
       } else {
@@ -284,9 +256,29 @@ case class CreateNamedStruct(children: Seq[Expression]) extends Expression {
     }
   }
 
+  /**
+   * Returns Aliased [[Expression]]s that could be used to construct a flattened version of this
+   * StructType.
+   */
+  def flatten: Seq[NamedExpression] = valExprs.zip(names).map {
+    case (v, n) => Alias(v, n.toString)()
+  }
+
   override def eval(input: InternalRow): Any = {
     InternalRow(valExprs.map(_.eval(input)): _*)
   }
+}
+
+/**
+ * Creates a struct with the given field names and values
+ *
+ * @param children Seq(name1, val1, name2, val2, ...)
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(name1, val1, name2, val2, ...) - Creates a struct with the given field names and values.")
+// scalastyle:on line.size.limit
+case class CreateNamedStruct(children: Seq[Expression]) extends CreateNamedStructLike {
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val rowClass = classOf[GenericInternalRow].getName
@@ -316,44 +308,6 @@ case class CreateNamedStruct(children: Seq[Expression]) extends Expression {
   override def prettyName: String = "named_struct"
 }
 
-/**
- * Returns a Row containing the evaluation of all children expressions. This is a variant that
- * returns UnsafeRow directly. The unsafe projection operator replaces [[CreateStruct]] with
- * this expression automatically at runtime.
- */
-case class CreateStructUnsafe(children: Seq[Expression]) extends Expression {
-
-  override def foldable: Boolean = children.forall(_.foldable)
-
-  override lazy val resolved: Boolean = childrenResolved
-
-  override lazy val dataType: StructType = {
-    val fields = children.zipWithIndex.map { case (child, idx) =>
-      child match {
-        case ne: NamedExpression =>
-          StructField(ne.name, ne.dataType, ne.nullable, ne.metadata)
-        case _ =>
-          StructField(s"col${idx + 1}", child.dataType, child.nullable, Metadata.empty)
-      }
-    }
-    StructType(fields)
-  }
-
-  override def nullable: Boolean = false
-
-  override def eval(input: InternalRow): Any = {
-    InternalRow(children.map(_.eval(input)): _*)
-  }
-
-  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val eval = GenerateUnsafeProjection.createCode(ctx, children)
-    ExprCode(code = eval.code, isNull = eval.isNull, value = eval.value)
-  }
-
-  override def prettyName: String = "struct_unsafe"
-}
-
-
 /**
  * Creates a struct with the given field names and values. This is a variant that returns
  * UnsafeRow directly. The unsafe projection operator replaces [[CreateStruct]] with
@@ -361,31 +315,7 @@ case class CreateStructUnsafe(children: Seq[Expression]) extends Expression {
  *
  * @param children Seq(name1, val1, name2, val2, ...)
  */
-case class CreateNamedStructUnsafe(children: Seq[Expression]) extends Expression {
-
-  private lazy val (nameExprs, valExprs) =
-    children.grouped(2).map { case Seq(name, value) => (name, value) }.toList.unzip
-
-  private lazy val names = nameExprs.map(_.eval(EmptyRow).toString)
-
-  override lazy val dataType: StructType = {
-    val fields = names.zip(valExprs).map {
-      case (name, valExpr: NamedExpression) =>
-        StructField(name, valExpr.dataType, valExpr.nullable, valExpr.metadata)
-      case (name, valExpr) =>
-        StructField(name, valExpr.dataType, valExpr.nullable, Metadata.empty)
-    }
-    StructType(fields)
-  }
-
-  override def foldable: Boolean = valExprs.forall(_.foldable)
-
-  override def nullable: Boolean = false
-
-  override def eval(input: InternalRow): Any = {
-    InternalRow(valExprs.map(_.eval(input)): _*)
-  }
-
+case class CreateNamedStructUnsafe(children: Seq[Expression]) extends CreateNamedStructLike {
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val eval = GenerateUnsafeProjection.createCode(ctx, valExprs)
     ExprCode(code = eval.code, isNull = eval.isNull, value = eval.value)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index ac1577b3abb4d..4b151c81d8f8b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -688,8 +688,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
         // inline table comes in two styles:
         // style 1: values (1), (2), (3)  -- multiple columns are supported
         // style 2: values 1, 2, 3  -- only a single column is supported here
-        case CreateStruct(children) => children  // style 1
-        case child => Seq(child)  // style 2
+        case struct: CreateNamedStruct => struct.valExprs // style 1
+        case child => Seq(child)                          // style 2
       }
     }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 590774c043040..817de48de2798 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
+import org.scalatest.ShouldMatchers
+
 import org.apache.spark.sql.catalyst.{SimpleCatalystConf, TableIdentifier}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
@@ -25,7 +27,8 @@ import org.apache.spark.sql.catalyst.plans.{Cross, Inner}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.types._
 
-class AnalysisSuite extends AnalysisTest {
+
+class AnalysisSuite extends AnalysisTest with ShouldMatchers {
   import org.apache.spark.sql.catalyst.analysis.TestRelations._
 
   test("union project *") {
@@ -218,9 +221,36 @@ class AnalysisSuite extends AnalysisTest {
 
     // CreateStruct is a special case that we should not trim Alias for it.
     plan = testRelation.select(CreateStruct(Seq(a, (a + 1).as("a+1"))).as("col"))
-    checkAnalysis(plan, plan)
-    plan = testRelation.select(CreateStructUnsafe(Seq(a, (a + 1).as("a+1"))).as("col"))
-    checkAnalysis(plan, plan)
+    expected = testRelation.select(CreateNamedStruct(Seq(
+      Literal(a.name), a,
+      Literal("a+1"), (a + 1))).as("col"))
+    checkAnalysis(plan, expected)
+  }
+
+  test("Analysis may leave unnecassary aliases") {
+    val att1 = testRelation.output.head
+    var plan = testRelation.select(
+      CreateStruct(Seq(att1, ((att1.as("aa")) + 1).as("a_plus_1"))).as("col"),
+      att1
+    )
+    val prevPlan = getAnalyzer(true).execute(plan)
+    plan = prevPlan.select(CreateArray(Seq(
+      CreateStruct(Seq(att1, (att1 + 1).as("a_plus_1"))).as("col1"),
+      /** alias should be eliminated by [[CleanupAliases]] */
+      "col".attr.as("col2")
+    )).as("arr"))
+    plan = getAnalyzer(true).execute(plan)
+
+    val expectedPlan = prevPlan.select(
+      CreateArray(Seq(
+        CreateNamedStruct(Seq(
+          Literal(att1.name), att1,
+          Literal("a_plus_1"), (att1 + 1))),
+          'col.struct(prevPlan.output(0).dataType.asInstanceOf[StructType]).notNull
+      )).as("arr")
+    )
+
+    checkAnalysis(plan, expectedPlan)
   }
 
   test("SPARK-10534: resolve attribute references in order by clause") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
index 0c307b2b8576b..c21c6de32c0ba 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
@@ -243,7 +243,6 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
     val b = AttributeReference("b", IntegerType)()
     checkMetadata(CreateStruct(Seq(a, b)))
     checkMetadata(CreateNamedStruct(Seq("a", a, "b", b)))
-    checkMetadata(CreateStructUnsafe(Seq(a, b)))
     checkMetadata(CreateNamedStructUnsafe(Seq("a", a, "b", b)))
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 249408e0fbce4..7a131b30eafd7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -186,6 +186,9 @@ class Column(val expr: Expression) extends Logging {
     case a: AggregateExpression if a.aggregateFunction.isInstanceOf[TypedAggregateExpression] =>
       UnresolvedAlias(a, Some(Column.generateAlias))
 
+    // Wait until the struct is resolved. This will generate a nicer looking alias.
+    case struct: CreateNamedStructLike => UnresolvedAlias(struct)
+
     case expr: Expression => Alias(expr, usePrettyExpression(expr).sql)()
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
index f873f34a845ef..6141fab4aff0d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
@@ -137,7 +137,7 @@ object ColumnStatStruct {
   private def numTrues(e: Expression): Expression = Sum(If(e, one, zero))
   private def numFalses(e: Expression): Expression = Sum(If(Not(e), one, zero))
 
-  private def getStruct(exprs: Seq[Expression]): CreateStruct = {
+  private def getStruct(exprs: Seq[Expression]): CreateNamedStruct = {
     CreateStruct(exprs.map { expr: Expression =>
       expr.transformUp {
         case af: AggregateFunction => af.toAggregateExpression()
@@ -168,7 +168,7 @@ object ColumnStatStruct {
     }
   }
 
-  def apply(attr: Attribute, relativeSD: Double): CreateStruct = attr.dataType match {
+  def apply(attr: Attribute, relativeSD: Double): CreateNamedStruct = attr.dataType match {
     // Use aggregate functions to compute statistics we need.
     case _: NumericType | TimestampType | DateType => getStruct(numericColumnStat(attr, relativeSD))
     case StringType => getStruct(stringColumnStat(attr, relativeSD))
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index a91f04e098b18..af6c930d64b76 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -87,7 +87,7 @@ struct<foo:string,approx_count_distinct(a):bigint>
 -- !query 9
 SELECT 'foo', MAX(STRUCT(a)) FROM testData WHERE a = 0 GROUP BY 1
 -- !query 9 schema
-struct<foo:string,max(struct(a)):struct<a:int>>
+struct<foo:string,max(named_struct(a, a)):struct<a:int>>
 -- !query 9 output
 
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 6eb571b91ffab..90000445dffb2 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -190,6 +190,12 @@ private[hive] class TestHiveSparkSession(
     new File(Thread.currentThread().getContextClassLoader.getResource(path).getFile)
   }
 
+  private def quoteHiveFile(path : String) = if (Utils.isWindows) {
+    getHiveFile(path).getPath.replace('\\', '/')
+  } else {
+    getHiveFile(path).getPath
+  }
+
   def getWarehousePath(): String = {
     val tempConf = new SQLConf
     sc.conf.getAll.foreach { case (k, v) => tempConf.setConfString(k, v) }
@@ -225,16 +231,16 @@ private[hive] class TestHiveSparkSession(
     val hiveQTestUtilTables: Seq[TestTable] = Seq(
       TestTable("src",
         "CREATE TABLE src (key INT, value STRING)".cmd,
-        s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/kv1.txt")}' INTO TABLE src".cmd),
+        s"LOAD DATA LOCAL INPATH '${quoteHiveFile("data/files/kv1.txt")}' INTO TABLE src".cmd),
       TestTable("src1",
         "CREATE TABLE src1 (key INT, value STRING)".cmd,
-        s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/kv3.txt")}' INTO TABLE src1".cmd),
+        s"LOAD DATA LOCAL INPATH '${quoteHiveFile("data/files/kv3.txt")}' INTO TABLE src1".cmd),
       TestTable("srcpart", () => {
         sql(
           "CREATE TABLE srcpart (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING)")
         for (ds <- Seq("2008-04-08", "2008-04-09"); hr <- Seq("11", "12")) {
           sql(
-            s"""LOAD DATA LOCAL INPATH '${getHiveFile("data/files/kv1.txt")}'
+            s"""LOAD DATA LOCAL INPATH '${quoteHiveFile("data/files/kv1.txt")}'
                |OVERWRITE INTO TABLE srcpart PARTITION (ds='$ds',hr='$hr')
              """.stripMargin)
         }
@@ -244,7 +250,7 @@ private[hive] class TestHiveSparkSession(
           "CREATE TABLE srcpart1 (key INT, value STRING) PARTITIONED BY (ds STRING, hr INT)")
         for (ds <- Seq("2008-04-08", "2008-04-09"); hr <- 11 to 12) {
           sql(
-            s"""LOAD DATA LOCAL INPATH '${getHiveFile("data/files/kv1.txt")}'
+            s"""LOAD DATA LOCAL INPATH '${quoteHiveFile("data/files/kv1.txt")}'
                |OVERWRITE INTO TABLE srcpart1 PARTITION (ds='$ds',hr='$hr')
              """.stripMargin)
         }
@@ -269,7 +275,7 @@ private[hive] class TestHiveSparkSession(
 
         sql(
           s"""
-             |LOAD DATA LOCAL INPATH '${getHiveFile("data/files/complex.seq")}'
+             |LOAD DATA LOCAL INPATH '${quoteHiveFile("data/files/complex.seq")}'
              |INTO TABLE src_thrift
            """.stripMargin)
       }),
@@ -308,7 +314,7 @@ private[hive] class TestHiveSparkSession(
            |)
          """.stripMargin.cmd,
         s"""
-           |LOAD DATA LOCAL INPATH '${getHiveFile("data/files/episodes.avro")}'
+           |LOAD DATA LOCAL INPATH '${quoteHiveFile("data/files/episodes.avro")}'
            |INTO TABLE episodes
          """.stripMargin.cmd
       ),
@@ -379,7 +385,7 @@ private[hive] class TestHiveSparkSession(
       TestTable("src_json",
         s"""CREATE TABLE src_json (json STRING) STORED AS TEXTFILE
          """.stripMargin.cmd,
-        s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/json.txt")}' INTO TABLE src_json".cmd)
+        s"LOAD DATA LOCAL INPATH '${quoteHiveFile("data/files/json.txt")}' INTO TABLE src_json".cmd)
     )
 
     hiveQTestUtilTables.foreach(registerTestTable)
diff --git a/sql/hive/src/test/resources/sqlgen/subquery_in_having_2.sql b/sql/hive/src/test/resources/sqlgen/subquery_in_having_2.sql
index de0116a4dcbaf..cdda29af50e37 100644
--- a/sql/hive/src/test/resources/sqlgen/subquery_in_having_2.sql
+++ b/sql/hive/src/test/resources/sqlgen/subquery_in_having_2.sql
@@ -7,4 +7,4 @@ having b.key in (select a.key
                  where a.value > 'val_9' and a.value = min(b.value))
 order by b.key
 --------------------------------------------------------------------------------
-SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `min(value)` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_0`, min(`gen_attr_5`) AS `gen_attr_1`, min(`gen_attr_5`) AS `gen_attr_4` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_5` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_0` HAVING (struct(`gen_attr_0`, `gen_attr_4`) IN (SELECT `gen_attr_6` AS `_c0`, `gen_attr_7` AS `_c1` FROM (SELECT `gen_attr_2` AS `gen_attr_6`, `gen_attr_3` AS `gen_attr_7` FROM (SELECT `gen_attr_2`, `gen_attr_3` FROM (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_3` FROM `default`.`src`) AS gen_subquery_3 WHERE (`gen_attr_3` > 'val_9')) AS gen_subquery_2) AS gen_subquery_4))) AS gen_subquery_1 ORDER BY `gen_attr_0` ASC NULLS FIRST) AS b
+SELECT `gen_attr_0` AS `key`, `gen_attr_1` AS `min(value)` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `gen_attr_0`, min(`gen_attr_5`) AS `gen_attr_1`, min(`gen_attr_5`) AS `gen_attr_4` FROM (SELECT `key` AS `gen_attr_0`, `value` AS `gen_attr_5` FROM `default`.`src`) AS gen_subquery_0 GROUP BY `gen_attr_0` HAVING (named_struct('gen_attr_0', `gen_attr_0`, 'gen_attr_4', `gen_attr_4`) IN (SELECT `gen_attr_6` AS `_c0`, `gen_attr_7` AS `_c1` FROM (SELECT `gen_attr_2` AS `gen_attr_6`, `gen_attr_3` AS `gen_attr_7` FROM (SELECT `gen_attr_2`, `gen_attr_3` FROM (SELECT `key` AS `gen_attr_2`, `value` AS `gen_attr_3` FROM `default`.`src`) AS gen_subquery_3 WHERE (`gen_attr_3` > 'val_9')) AS gen_subquery_2) AS gen_subquery_4))) AS gen_subquery_1 ORDER BY `gen_attr_0` ASC NULLS FIRST) AS b
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala
index c7f10e569fa4d..12d18dc87ceb4 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst
 import java.nio.charset.StandardCharsets
 import java.nio.file.{Files, NoSuchFileException, Paths}
 
+import scala.io.Source
 import scala.util.control.NonFatal
 
 import org.apache.spark.sql.Column
@@ -109,12 +110,15 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils {
         Files.write(path, answerText.getBytes(StandardCharsets.UTF_8))
       } else {
         val goldenFileName = s"sqlgen/$answerFile.sql"
-        val resourceFile = getClass.getClassLoader.getResource(goldenFileName)
-        if (resourceFile == null) {
+        val resourceStream = getClass.getClassLoader.getResourceAsStream(goldenFileName)
+        if (resourceStream == null) {
           throw new NoSuchFileException(goldenFileName)
         }
-        val path = resourceFile.getPath
-        val answerText = new String(Files.readAllBytes(Paths.get(path)), StandardCharsets.UTF_8)
+        val answerText = try {
+          Source.fromInputStream(resourceStream).mkString
+        } finally {
+          resourceStream.close
+        }
         val sqls = answerText.split(separator)
         assert(sqls.length == 2, "Golden sql files should have a separator.")
         val expectedSQL = sqls(1).trim()

From 9be069125f7e94df9d862f307b87965baf9416e3 Mon Sep 17 00:00:00 2001
From: Takeshi YAMAMURO <linguin.m.s@gmail.com>
Date: Wed, 2 Nov 2016 11:29:26 -0700
Subject: [PATCH 0009/1204] [SPARK-17683][SQL] Support ArrayType in
 Literal.apply

## What changes were proposed in this pull request?

This pr is to add pattern-matching entries for array data in `Literal.apply`.
## How was this patch tested?

Added tests in `LiteralExpressionSuite`.

Author: Takeshi YAMAMURO <linguin.m.s@gmail.com>

Closes #15257 from maropu/SPARK-17683.

(cherry picked from commit 4af0ce2d96de3397c9bc05684cad290a52486577)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../sql/catalyst/expressions/literals.scala   | 57 ++++++++++++++++++-
 .../expressions/LiteralExpressionSuite.scala  | 27 ++++++++-
 2 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index a597a17aadd99..1985e68c94e2d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -17,14 +17,25 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import java.lang.{Boolean => JavaBoolean}
+import java.lang.{Byte => JavaByte}
+import java.lang.{Double => JavaDouble}
+import java.lang.{Float => JavaFloat}
+import java.lang.{Integer => JavaInteger}
+import java.lang.{Long => JavaLong}
+import java.lang.{Short => JavaShort}
+import java.math.{BigDecimal => JavaBigDecimal}
 import java.nio.charset.StandardCharsets
 import java.sql.{Date, Timestamp}
 import java.util
 import java.util.Objects
 import javax.xml.bind.DatatypeConverter
 
+import scala.math.{BigDecimal, BigInt}
+
 import org.json4s.JsonAST._
 
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
@@ -46,12 +57,17 @@ object Literal {
     case s: String => Literal(UTF8String.fromString(s), StringType)
     case b: Boolean => Literal(b, BooleanType)
     case d: BigDecimal => Literal(Decimal(d), DecimalType(Math.max(d.precision, d.scale), d.scale))
-    case d: java.math.BigDecimal =>
+    case d: JavaBigDecimal =>
       Literal(Decimal(d), DecimalType(Math.max(d.precision, d.scale), d.scale()))
     case d: Decimal => Literal(d, DecimalType(Math.max(d.precision, d.scale), d.scale))
     case t: Timestamp => Literal(DateTimeUtils.fromJavaTimestamp(t), TimestampType)
     case d: Date => Literal(DateTimeUtils.fromJavaDate(d), DateType)
     case a: Array[Byte] => Literal(a, BinaryType)
+    case a: Array[_] =>
+      val elementType = componentTypeToDataType(a.getClass.getComponentType())
+      val dataType = ArrayType(elementType)
+      val convert = CatalystTypeConverters.createToCatalystConverter(dataType)
+      Literal(convert(a), dataType)
     case i: CalendarInterval => Literal(i, CalendarIntervalType)
     case null => Literal(null, NullType)
     case v: Literal => v
@@ -59,6 +75,45 @@ object Literal {
       throw new RuntimeException("Unsupported literal type " + v.getClass + " " + v)
   }
 
+  /**
+   * Returns the Spark SQL DataType for a given class object. Since this type needs to be resolved
+   * in runtime, we use match-case idioms for class objects here. However, there are similar
+   * functions in other files (e.g., HiveInspectors), so these functions need to merged into one.
+   */
+  private[this] def componentTypeToDataType(clz: Class[_]): DataType = clz match {
+    // primitive types
+    case JavaShort.TYPE => ShortType
+    case JavaInteger.TYPE => IntegerType
+    case JavaLong.TYPE => LongType
+    case JavaDouble.TYPE => DoubleType
+    case JavaByte.TYPE => ByteType
+    case JavaFloat.TYPE => FloatType
+    case JavaBoolean.TYPE => BooleanType
+
+    // java classes
+    case _ if clz == classOf[Date] => DateType
+    case _ if clz == classOf[Timestamp] => TimestampType
+    case _ if clz == classOf[JavaBigDecimal] => DecimalType.SYSTEM_DEFAULT
+    case _ if clz == classOf[Array[Byte]] => BinaryType
+    case _ if clz == classOf[JavaShort] => ShortType
+    case _ if clz == classOf[JavaInteger] => IntegerType
+    case _ if clz == classOf[JavaLong] => LongType
+    case _ if clz == classOf[JavaDouble] => DoubleType
+    case _ if clz == classOf[JavaByte] => ByteType
+    case _ if clz == classOf[JavaFloat] => FloatType
+    case _ if clz == classOf[JavaBoolean] => BooleanType
+
+    // other scala classes
+    case _ if clz == classOf[String] => StringType
+    case _ if clz == classOf[BigInt] => DecimalType.SYSTEM_DEFAULT
+    case _ if clz == classOf[BigDecimal] => DecimalType.SYSTEM_DEFAULT
+    case _ if clz == classOf[CalendarInterval] => CalendarIntervalType
+
+    case _ if clz.isArray => ArrayType(componentTypeToDataType(clz.getComponentType))
+
+    case _ => throw new AnalysisException(s"Unsupported component type $clz in arrays")
+  }
+
   /**
    * Constructs a [[Literal]] of [[ObjectType]], for example when you need to pass an object
    * into code generation.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
index 450222d8cbba3..4af4da8a9f0c2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
@@ -21,6 +21,7 @@ import java.nio.charset.StandardCharsets
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.CatalystTypeConverters
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
@@ -43,6 +44,7 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(Literal.create(null, TimestampType), null)
     checkEvaluation(Literal.create(null, CalendarIntervalType), null)
     checkEvaluation(Literal.create(null, ArrayType(ByteType, true)), null)
+    checkEvaluation(Literal.create(null, ArrayType(StringType, true)), null)
     checkEvaluation(Literal.create(null, MapType(StringType, IntegerType)), null)
     checkEvaluation(Literal.create(null, StructType(Seq.empty)), null)
   }
@@ -122,5 +124,28 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
-  // TODO(davies): add tests for ArrayType, MapType and StructType
+  test("array") {
+    def checkArrayLiteral(a: Array[_], elementType: DataType): Unit = {
+      val toCatalyst = (a: Array[_], elementType: DataType) => {
+        CatalystTypeConverters.createToCatalystConverter(ArrayType(elementType))(a)
+      }
+      checkEvaluation(Literal(a), toCatalyst(a, elementType))
+    }
+    checkArrayLiteral(Array(1, 2, 3), IntegerType)
+    checkArrayLiteral(Array("a", "b", "c"), StringType)
+    checkArrayLiteral(Array(1.0, 4.0), DoubleType)
+    checkArrayLiteral(Array(CalendarInterval.MICROS_PER_DAY, CalendarInterval.MICROS_PER_HOUR),
+      CalendarIntervalType)
+  }
+
+  test("unsupported types (map and struct) in literals") {
+    def checkUnsupportedTypeInLiteral(v: Any): Unit = {
+      val errMsgMap = intercept[RuntimeException] {
+        Literal(v)
+      }
+      assert(errMsgMap.getMessage.startsWith("Unsupported literal type"))
+    }
+    checkUnsupportedTypeInLiteral(Map("key1" -> 1, "key2" -> 2))
+    checkUnsupportedTypeInLiteral(("mike", 29, 1.0))
+  }
 }

From a885d5bbce9dba66b394850b3aac51ae97cb18dd Mon Sep 17 00:00:00 2001
From: buzhihuojie <ren.weiluo@gmail.com>
Date: Wed, 2 Nov 2016 11:36:20 -0700
Subject: [PATCH 0010/1204] [SPARK-17895] Improve doc for rangeBetween and
 rowsBetween

## What changes were proposed in this pull request?

Copied description for row and range based frame boundary from https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala#L56

Added examples to show different behavior of rangeBetween and rowsBetween when involving duplicate values.

Please review https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark before opening a pull request.

Author: buzhihuojie <ren.weiluo@gmail.com>

Closes #15727 from david-weiluo-ren/improveDocForRangeAndRowsBetween.

(cherry picked from commit 742e0fea5391857964e90d396641ecf95cac4248)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../apache/spark/sql/expressions/Window.scala | 55 +++++++++++++++++++
 .../spark/sql/expressions/WindowSpec.scala    | 55 +++++++++++++++++++
 2 files changed, 110 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
index 0b26d863cac5d..327bc379d4132 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
@@ -121,6 +121,32 @@ object Window {
    * and [[Window.currentRow]] to specify special boundary values, rather than using integral
    * values directly.
    *
+   * A row based boundary is based on the position of the row within the partition.
+   * An offset indicates the number of rows above or below the current row, the frame for the
+   * current row starts or ends. For instance, given a row based sliding frame with a lower bound
+   * offset of -1 and a upper bound offset of +2. The frame for row with index 5 would range from
+   * index 4 to index 6.
+   *
+   * {{{
+   *   import org.apache.spark.sql.expressions.Window
+   *   val df = Seq((1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"))
+   *     .toDF("id", "category")
+   *   df.withColumn("sum",
+   *       sum('id) over Window.partitionBy('category).orderBy('id).rowsBetween(0,1))
+   *     .show()
+   *
+   *   +---+--------+---+
+   *   | id|category|sum|
+   *   +---+--------+---+
+   *   |  1|       b|  3|
+   *   |  2|       b|  5|
+   *   |  3|       b|  3|
+   *   |  1|       a|  2|
+   *   |  1|       a|  3|
+   *   |  2|       a|  2|
+   *   +---+--------+---+
+   * }}}
+   *
    * @param start boundary start, inclusive. The frame is unbounded if this is
    *              the minimum long value ([[Window.unboundedPreceding]]).
    * @param end boundary end, inclusive. The frame is unbounded if this is the
@@ -144,6 +170,35 @@ object Window {
    * and [[Window.currentRow]] to specify special boundary values, rather than using integral
    * values directly.
    *
+   * A range based boundary is based on the actual value of the ORDER BY
+   * expression(s). An offset is used to alter the value of the ORDER BY expression, for
+   * instance if the current order by expression has a value of 10 and the lower bound offset
+   * is -3, the resulting lower bound for the current row will be 10 - 3 = 7. This however puts a
+   * number of constraints on the ORDER BY expressions: there can be only one expression and this
+   * expression must have a numerical data type. An exception can be made when the offset is 0,
+   * because no value modification is needed, in this case multiple and non-numeric ORDER BY
+   * expression are allowed.
+   *
+   * {{{
+   *   import org.apache.spark.sql.expressions.Window
+   *   val df = Seq((1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"))
+   *     .toDF("id", "category")
+   *   df.withColumn("sum",
+   *       sum('id) over Window.partitionBy('category).orderBy('id).rangeBetween(0,1))
+   *     .show()
+   *
+   *   +---+--------+---+
+   *   | id|category|sum|
+   *   +---+--------+---+
+   *   |  1|       b|  3|
+   *   |  2|       b|  5|
+   *   |  3|       b|  3|
+   *   |  1|       a|  4|
+   *   |  1|       a|  4|
+   *   |  2|       a|  2|
+   *   +---+--------+---+
+   * }}}
+   *
    * @param start boundary start, inclusive. The frame is unbounded if this is
    *              the minimum long value ([[Window.unboundedPreceding]]).
    * @param end boundary end, inclusive. The frame is unbounded if this is the
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
index 1e85b6e7881ad..4a8ce695bd4da 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
@@ -89,6 +89,32 @@ class WindowSpec private[sql](
    * and [[Window.currentRow]] to specify special boundary values, rather than using integral
    * values directly.
    *
+   * A row based boundary is based on the position of the row within the partition.
+   * An offset indicates the number of rows above or below the current row, the frame for the
+   * current row starts or ends. For instance, given a row based sliding frame with a lower bound
+   * offset of -1 and a upper bound offset of +2. The frame for row with index 5 would range from
+   * index 4 to index 6.
+   *
+   * {{{
+   *   import org.apache.spark.sql.expressions.Window
+   *   val df = Seq((1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"))
+   *     .toDF("id", "category")
+   *   df.withColumn("sum",
+   *       sum('id) over Window.partitionBy('category).orderBy('id).rowsBetween(0,1))
+   *     .show()
+   *
+   *   +---+--------+---+
+   *   | id|category|sum|
+   *   +---+--------+---+
+   *   |  1|       b|  3|
+   *   |  2|       b|  5|
+   *   |  3|       b|  3|
+   *   |  1|       a|  2|
+   *   |  1|       a|  3|
+   *   |  2|       a|  2|
+   *   +---+--------+---+
+   * }}}
+   *
    * @param start boundary start, inclusive. The frame is unbounded if this is
    *              the minimum long value ([[Window.unboundedPreceding]]).
    * @param end boundary end, inclusive. The frame is unbounded if this is the
@@ -111,6 +137,35 @@ class WindowSpec private[sql](
    * and [[Window.currentRow]] to specify special boundary values, rather than using integral
    * values directly.
    *
+   * A range based boundary is based on the actual value of the ORDER BY
+   * expression(s). An offset is used to alter the value of the ORDER BY expression, for
+   * instance if the current order by expression has a value of 10 and the lower bound offset
+   * is -3, the resulting lower bound for the current row will be 10 - 3 = 7. This however puts a
+   * number of constraints on the ORDER BY expressions: there can be only one expression and this
+   * expression must have a numerical data type. An exception can be made when the offset is 0,
+   * because no value modification is needed, in this case multiple and non-numeric ORDER BY
+   * expression are allowed.
+   *
+   * {{{
+   *   import org.apache.spark.sql.expressions.Window
+   *   val df = Seq((1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"))
+   *     .toDF("id", "category")
+   *   df.withColumn("sum",
+   *       sum('id) over Window.partitionBy('category).orderBy('id).rangeBetween(0,1))
+   *     .show()
+   *
+   *   +---+--------+---+
+   *   | id|category|sum|
+   *   +---+--------+---+
+   *   |  1|       b|  3|
+   *   |  2|       b|  5|
+   *   |  3|       b|  3|
+   *   |  1|       a|  4|
+   *   |  1|       a|  4|
+   *   |  2|       a|  2|
+   *   +---+--------+---+
+   * }}}
+   *
    * @param start boundary start, inclusive. The frame is unbounded if this is
    *              the minimum long value ([[Window.unboundedPreceding]]).
    * @param end boundary end, inclusive. The frame is unbounded if this is the

From 0093257ea94d3a197ca061b54c04685d7c1f616a Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Wed, 2 Nov 2016 11:41:49 -0700
Subject: [PATCH 0011/1204] [SPARK-14393][SQL] values generated by
 non-deterministic functions shouldn't change after coalesce or union

## What changes were proposed in this pull request?

When a user appended a column using a "nondeterministic" function to a DataFrame, e.g., `rand`, `randn`, and `monotonically_increasing_id`, the expected semantic is the following:
- The value in each row should remain unchanged, as if we materialize the column immediately, regardless of later DataFrame operations.

However, since we use `TaskContext.getPartitionId` to get the partition index from the current thread, the values from nondeterministic columns might change if we call `union` or `coalesce` after. `TaskContext.getPartitionId` returns the partition index of the current Spark task, which might not be the corresponding partition index of the DataFrame where we defined the column.

See the unit tests below or JIRA for examples.

This PR uses the partition index from `RDD.mapPartitionWithIndex` instead of `TaskContext` and fixes the partition initialization logic in whole-stage codegen, normal codegen, and codegen fallback. `initializeStatesForPartition(partitionIndex: Int)` was added to `Projection`, `Nondeterministic`, and `Predicate` (codegen) and initialized right after object creation in `mapPartitionWithIndex`. `newPredicate` now returns a `Predicate` instance rather than a function for proper initialization.
## How was this patch tested?

Unit tests. (Actually I'm not very confident that this PR fixed all issues without introducing new ones ...)

cc: rxin davies

Author: Xiangrui Meng <meng@databricks.com>

Closes #15567 from mengxr/SPARK-14393.

(cherry picked from commit 02f203107b8eda1f1576e36c4f12b0e3bc5e910e)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../main/scala/org/apache/spark/rdd/RDD.scala | 16 +++++-
 .../sql/catalyst/expressions/Expression.scala | 19 +++++--
 .../catalyst/expressions/InputFileName.scala  |  2 +-
 .../MonotonicallyIncreasingID.scala           | 11 ++--
 .../sql/catalyst/expressions/Projection.scala | 22 +++++---
 .../expressions/SparkPartitionID.scala        | 13 +++--
 .../expressions/codegen/CodeGenerator.scala   | 14 +++++
 .../expressions/codegen/CodegenFallback.scala | 18 +++++--
 .../codegen/GenerateMutableProjection.scala   |  4 ++
 .../codegen/GeneratePredicate.scala           | 18 +++++--
 .../codegen/GenerateSafeProjection.scala      |  4 ++
 .../codegen/GenerateUnsafeProjection.scala    |  4 ++
 .../sql/catalyst/expressions/package.scala    | 10 +++-
 .../sql/catalyst/expressions/predicates.scala |  4 --
 .../expressions/randomExpressions.scala       | 14 ++---
 .../sql/catalyst/optimizer/Optimizer.scala    |  1 +
 .../expressions/ExpressionEvalHelper.scala    |  5 +-
 .../CodegenExpressionCachingSuite.scala       | 13 +++--
 .../sql/execution/DataSourceScanExec.scala    |  6 ++-
 .../spark/sql/execution/ExistingRDD.scala     |  3 +-
 .../spark/sql/execution/GenerateExec.scala    |  3 +-
 .../spark/sql/execution/SparkPlan.scala       |  4 +-
 .../sql/execution/WholeStageCodegenExec.scala |  8 ++-
 .../execution/basicPhysicalOperators.scala    |  8 +--
 .../columnar/InMemoryTableScanExec.scala      |  5 +-
 .../joins/BroadcastNestedLoopJoinExec.scala   |  7 +--
 .../joins/CartesianProductExec.scala          |  8 +--
 .../spark/sql/execution/joins/HashJoin.scala  |  2 +-
 .../execution/joins/SortMergeJoinExec.scala   |  2 +-
 .../apache/spark/sql/execution/objects.scala  |  6 ++-
 .../spark/sql/DataFrameFunctionsSuite.scala   | 52 +++++++++++++++++++
 .../hive/execution/HiveTableScanExec.scala    |  3 +-
 32 files changed, 231 insertions(+), 78 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index db535de9e9bb3..e018af35cb18d 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -788,14 +788,26 @@ abstract class RDD[T: ClassTag](
   }
 
   /**
-   * [performance] Spark's internal mapPartitions method which skips closure cleaning. It is a
-   * performance API to be used carefully only if we are sure that the RDD elements are
+   * [performance] Spark's internal mapPartitionsWithIndex method that skips closure cleaning.
+   * It is a performance API to be used carefully only if we are sure that the RDD elements are
    * serializable and don't require closure cleaning.
    *
    * @param preservesPartitioning indicates whether the input function preserves the partitioner,
    * which should be `false` unless this is a pair RDD and the input function doesn't modify
    * the keys.
    */
+  private[spark] def mapPartitionsWithIndexInternal[U: ClassTag](
+      f: (Int, Iterator[T]) => Iterator[U],
+      preservesPartitioning: Boolean = false): RDD[U] = withScope {
+    new MapPartitionsRDD(
+      this,
+      (context: TaskContext, index: Int, iter: Iterator[T]) => f(index, iter),
+      preservesPartitioning)
+  }
+
+  /**
+   * [performance] Spark's internal mapPartitions method that skips closure cleaning.
+   */
   private[spark] def mapPartitionsInternal[U: ClassTag](
       f: Iterator[T] => Iterator[U],
       preservesPartitioning: Boolean = false): RDD[U] = withScope {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 9edc1ceff26a7..726a231fd814e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -272,17 +272,28 @@ trait Nondeterministic extends Expression {
   final override def deterministic: Boolean = false
   final override def foldable: Boolean = false
 
+  @transient
   private[this] var initialized = false
 
-  final def setInitialValues(): Unit = {
-    initInternal()
+  /**
+   * Initializes internal states given the current partition index and mark this as initialized.
+   * Subclasses should override [[initializeInternal()]].
+   */
+  final def initialize(partitionIndex: Int): Unit = {
+    initializeInternal(partitionIndex)
     initialized = true
   }
 
-  protected def initInternal(): Unit
+  protected def initializeInternal(partitionIndex: Int): Unit
 
+  /**
+   * @inheritdoc
+   * Throws an exception if [[initialize()]] is not called yet.
+   * Subclasses should override [[evalInternal()]].
+   */
   final override def eval(input: InternalRow = null): Any = {
-    require(initialized, "nondeterministic expression should be initialized before evaluate")
+    require(initialized,
+      s"Nondeterministic expression ${this.getClass.getName} should be initialized before eval.")
     evalInternal(input)
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InputFileName.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InputFileName.scala
index 96929ecf56375..b6c12c5351119 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InputFileName.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InputFileName.scala
@@ -37,7 +37,7 @@ case class InputFileName() extends LeafExpression with Nondeterministic {
 
   override def prettyName: String = "input_file_name"
 
-  override protected def initInternal(): Unit = {}
+  override protected def initializeInternal(partitionIndex: Int): Unit = {}
 
   override protected def evalInternal(input: InternalRow): UTF8String = {
     InputFileNameHolder.getInputFileName()
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala
index 5b4922e0cf2b7..72b8dcca26e2f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala
@@ -50,9 +50,9 @@ case class MonotonicallyIncreasingID() extends LeafExpression with Nondeterminis
 
   @transient private[this] var partitionMask: Long = _
 
-  override protected def initInternal(): Unit = {
+  override protected def initializeInternal(partitionIndex: Int): Unit = {
     count = 0L
-    partitionMask = TaskContext.getPartitionId().toLong << 33
+    partitionMask = partitionIndex.toLong << 33
   }
 
   override def nullable: Boolean = false
@@ -68,9 +68,10 @@ case class MonotonicallyIncreasingID() extends LeafExpression with Nondeterminis
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val countTerm = ctx.freshName("count")
     val partitionMaskTerm = ctx.freshName("partitionMask")
-    ctx.addMutableState(ctx.JAVA_LONG, countTerm, s"$countTerm = 0L;")
-    ctx.addMutableState(ctx.JAVA_LONG, partitionMaskTerm,
-      s"$partitionMaskTerm = ((long) org.apache.spark.TaskContext.getPartitionId()) << 33;")
+    ctx.addMutableState(ctx.JAVA_LONG, countTerm, "")
+    ctx.addMutableState(ctx.JAVA_LONG, partitionMaskTerm, "")
+    ctx.addPartitionInitializationStatement(s"$countTerm = 0L;")
+    ctx.addPartitionInitializationStatement(s"$partitionMaskTerm = ((long) partitionIndex) << 33;")
 
     ev.copy(code = s"""
       final ${ctx.javaType(dataType)} ${ev.value} = $partitionMaskTerm + $countTerm;
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
index 03e054d098511..476e37e6a9bac 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
@@ -23,6 +23,7 @@ import org.apache.spark.sql.types.{DataType, StructType}
 
 /**
  * A [[Projection]] that is calculated by calling the `eval` of each of the specified expressions.
+ *
  * @param expressions a sequence of expressions that determine the value of each column of the
  *                    output row.
  */
@@ -30,10 +31,12 @@ class InterpretedProjection(expressions: Seq[Expression]) extends Projection {
   def this(expressions: Seq[Expression], inputSchema: Seq[Attribute]) =
     this(expressions.map(BindReferences.bindReference(_, inputSchema)))
 
-  expressions.foreach(_.foreach {
-    case n: Nondeterministic => n.setInitialValues()
-    case _ =>
-  })
+  override def initialize(partitionIndex: Int): Unit = {
+    expressions.foreach(_.foreach {
+      case n: Nondeterministic => n.initialize(partitionIndex)
+      case _ =>
+    })
+  }
 
   // null check is required for when Kryo invokes the no-arg constructor.
   protected val exprArray = if (expressions != null) expressions.toArray else null
@@ -54,6 +57,7 @@ class InterpretedProjection(expressions: Seq[Expression]) extends Projection {
 /**
  * A [[MutableProjection]] that is calculated by calling `eval` on each of the specified
  * expressions.
+ *
  * @param expressions a sequence of expressions that determine the value of each column of the
  *                    output row.
  */
@@ -63,10 +67,12 @@ case class InterpretedMutableProjection(expressions: Seq[Expression]) extends Mu
 
   private[this] val buffer = new Array[Any](expressions.size)
 
-  expressions.foreach(_.foreach {
-    case n: Nondeterministic => n.setInitialValues()
-    case _ =>
-  })
+  override def initialize(partitionIndex: Int): Unit = {
+    expressions.foreach(_.foreach {
+      case n: Nondeterministic => n.initialize(partitionIndex)
+      case _ =>
+    })
+  }
 
   private[this] val exprArray = expressions.toArray
   private[this] var mutableRow: InternalRow = new GenericInternalRow(exprArray.length)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala
index 1f675d5b07270..6bef473cac060 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala
@@ -17,16 +17,15 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import org.apache.spark.TaskContext
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
 import org.apache.spark.sql.types.{DataType, IntegerType}
 
 /**
- * Expression that returns the current partition id of the Spark task.
+ * Expression that returns the current partition id.
  */
 @ExpressionDescription(
-  usage = "_FUNC_() - Returns the current partition id of the Spark task",
+  usage = "_FUNC_() - Returns the current partition id",
   extended = "> SELECT _FUNC_();\n 0")
 case class SparkPartitionID() extends LeafExpression with Nondeterministic {
 
@@ -38,16 +37,16 @@ case class SparkPartitionID() extends LeafExpression with Nondeterministic {
 
   override val prettyName = "SPARK_PARTITION_ID"
 
-  override protected def initInternal(): Unit = {
-    partitionId = TaskContext.getPartitionId()
+  override protected def initializeInternal(partitionIndex: Int): Unit = {
+    partitionId = partitionIndex
   }
 
   override protected def evalInternal(input: InternalRow): Int = partitionId
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val idTerm = ctx.freshName("partitionId")
-    ctx.addMutableState(ctx.JAVA_INT, idTerm,
-      s"$idTerm = org.apache.spark.TaskContext.getPartitionId();")
+    ctx.addMutableState(ctx.JAVA_INT, idTerm, "")
+    ctx.addPartitionInitializationStatement(s"$idTerm = partitionIndex;")
     ev.copy(code = s"final ${ctx.javaType(dataType)} ${ev.value} = $idTerm;", isNull = "false")
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 6cab50ae1bf8d..9c3c6d3b2a7f2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -184,6 +184,20 @@ class CodegenContext {
     splitExpressions(initCodes, "init", Nil)
   }
 
+  /**
+   * Code statements to initialize states that depend on the partition index.
+   * An integer `partitionIndex` will be made available within the scope.
+   */
+  val partitionInitializationStatements: mutable.ArrayBuffer[String] = mutable.ArrayBuffer.empty
+
+  def addPartitionInitializationStatement(statement: String): Unit = {
+    partitionInitializationStatements += statement
+  }
+
+  def initPartition(): String = {
+    partitionInitializationStatements.mkString("\n")
+  }
+
   /**
    * Holding all the functions those will be added into generated class.
    */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodegenFallback.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodegenFallback.scala
index 6a5a3e7933eea..0322d1dd6a9ff 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodegenFallback.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodegenFallback.scala
@@ -25,15 +25,23 @@ import org.apache.spark.sql.catalyst.expressions.{Expression, LeafExpression, No
 trait CodegenFallback extends Expression {
 
   protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    foreach {
-      case n: Nondeterministic => n.setInitialValues()
-      case _ =>
-    }
-
     // LeafNode does not need `input`
     val input = if (this.isInstanceOf[LeafExpression]) "null" else ctx.INPUT_ROW
     val idx = ctx.references.length
     ctx.references += this
+    var childIndex = idx
+    this.foreach {
+      case n: Nondeterministic =>
+        // This might add the current expression twice, but it won't hurt.
+        ctx.references += n
+        childIndex += 1
+        ctx.addPartitionInitializationStatement(
+          s"""
+             |((Nondeterministic) references[$childIndex])
+             |  .initialize(partitionIndex);
+          """.stripMargin)
+      case _ =>
+    }
     val objectTerm = ctx.freshName("obj")
     val placeHolder = ctx.registerComment(this.toString)
     if (nullable) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
index 5c4b56b0b224c..4d732445544a8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
@@ -111,6 +111,10 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
           ${ctx.initMutableStates()}
         }
 
+        public void initialize(int partitionIndex) {
+          ${ctx.initPartition()}
+        }
+
         ${ctx.declareAddedFunctions()}
 
         public ${classOf[BaseMutableProjection].getName} target(InternalRow row) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
index 39aa7b17de6c9..dcd1ed96a298e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
@@ -25,19 +25,26 @@ import org.apache.spark.sql.catalyst.expressions._
  */
 abstract class Predicate {
   def eval(r: InternalRow): Boolean
+
+  /**
+   * Initializes internal states given the current partition index.
+   * This is used by nondeterministic expressions to set initial states.
+   * The default implementation does nothing.
+   */
+  def initialize(partitionIndex: Int): Unit = {}
 }
 
 /**
  * Generates bytecode that evaluates a boolean [[Expression]] on a given input [[InternalRow]].
  */
-object GeneratePredicate extends CodeGenerator[Expression, (InternalRow) => Boolean] {
+object GeneratePredicate extends CodeGenerator[Expression, Predicate] {
 
   protected def canonicalize(in: Expression): Expression = ExpressionCanonicalizer.execute(in)
 
   protected def bind(in: Expression, inputSchema: Seq[Attribute]): Expression =
     BindReferences.bindReference(in, inputSchema)
 
-  protected def create(predicate: Expression): ((InternalRow) => Boolean) = {
+  protected def create(predicate: Expression): Predicate = {
     val ctx = newCodeGenContext()
     val eval = predicate.genCode(ctx)
 
@@ -55,6 +62,10 @@ object GeneratePredicate extends CodeGenerator[Expression, (InternalRow) => Bool
           ${ctx.initMutableStates()}
         }
 
+        public void initialize(int partitionIndex) {
+          ${ctx.initPartition()}
+        }
+
         ${ctx.declareAddedFunctions()}
 
         public boolean eval(InternalRow ${ctx.INPUT_ROW}) {
@@ -67,7 +78,6 @@ object GeneratePredicate extends CodeGenerator[Expression, (InternalRow) => Bool
       new CodeAndComment(codeBody, ctx.getPlaceHolderToComments()))
     logDebug(s"Generated predicate '$predicate':\n${CodeFormatter.format(code)}")
 
-    val p = CodeGenerator.compile(code).generate(ctx.references.toArray).asInstanceOf[Predicate]
-    (r: InternalRow) => p.eval(r)
+    CodeGenerator.compile(code).generate(ctx.references.toArray).asInstanceOf[Predicate]
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
index 2773e1a666212..b1cb6edefb852 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
@@ -173,6 +173,10 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
           ${ctx.initMutableStates()}
         }
 
+        public void initialize(int partitionIndex) {
+          ${ctx.initPartition()}
+        }
+
         ${ctx.declareAddedFunctions()}
 
         public java.lang.Object apply(java.lang.Object _i) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
index 7cc45372daa5a..7e4c9089a2cb9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
@@ -380,6 +380,10 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
           ${ctx.initMutableStates()}
         }
 
+        public void initialize(int partitionIndex) {
+          ${ctx.initPartition()}
+        }
+
         ${ctx.declareAddedFunctions()}
 
         // Scala.Function1 need this
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
index 1510a4796683c..1b00c9e79da22 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
@@ -64,7 +64,15 @@ package object expressions  {
    * column of the new row. If the schema of the input row is specified, then the given expression
    * will be bound to that schema.
    */
-  abstract class Projection extends (InternalRow => InternalRow)
+  abstract class Projection extends (InternalRow => InternalRow) {
+
+    /**
+     * Initializes internal states given the current partition index.
+     * This is used by nondeterministic expressions to set initial states.
+     * The default implementation does nothing.
+     */
+    def initialize(partitionIndex: Int): Unit = {}
+  }
 
   /**
    * Converts a [[InternalRow]] to another Row given a sequence of expression that define each
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 9394e39aadd9d..c941a576d00d6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -31,10 +31,6 @@ object InterpretedPredicate {
     create(BindReferences.bindReference(expression, inputSchema))
 
   def create(expression: Expression): (InternalRow => Boolean) = {
-    expression.foreach {
-      case n: Nondeterministic => n.setInitialValues()
-      case _ =>
-    }
     (r: InternalRow) => expression.eval(r).asInstanceOf[Boolean]
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
index ca200768b2286..e09029f5aab9b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
@@ -42,8 +42,8 @@ abstract class RDG extends LeafExpression with Nondeterministic {
    */
   @transient protected var rng: XORShiftRandom = _
 
-  override protected def initInternal(): Unit = {
-    rng = new XORShiftRandom(seed + TaskContext.getPartitionId)
+  override protected def initializeInternal(partitionIndex: Int): Unit = {
+    rng = new XORShiftRandom(seed + partitionIndex)
   }
 
   override def nullable: Boolean = false
@@ -70,8 +70,9 @@ case class Rand(seed: Long) extends RDG {
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val rngTerm = ctx.freshName("rng")
     val className = classOf[XORShiftRandom].getName
-    ctx.addMutableState(className, rngTerm,
-      s"$rngTerm = new $className(${seed}L + org.apache.spark.TaskContext.getPartitionId());")
+    ctx.addMutableState(className, rngTerm, "")
+    ctx.addPartitionInitializationStatement(
+      s"$rngTerm = new $className(${seed}L + partitionIndex);")
     ev.copy(code = s"""
       final ${ctx.javaType(dataType)} ${ev.value} = $rngTerm.nextDouble();""", isNull = "false")
   }
@@ -93,8 +94,9 @@ case class Randn(seed: Long) extends RDG {
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val rngTerm = ctx.freshName("rng")
     val className = classOf[XORShiftRandom].getName
-    ctx.addMutableState(className, rngTerm,
-      s"$rngTerm = new $className(${seed}L + org.apache.spark.TaskContext.getPartitionId());")
+    ctx.addMutableState(className, rngTerm, "")
+    ctx.addPartitionInitializationStatement(
+      s"$rngTerm = new $className(${seed}L + partitionIndex);")
     ev.copy(code = s"""
       final ${ctx.javaType(dataType)} ${ev.value} = $rngTerm.nextGaussian();""", isNull = "false")
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index e5e2cd7d27d15..b6ad5db74e3c8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -1060,6 +1060,7 @@ object ConvertToLocalRelation extends Rule[LogicalPlan] {
     case Project(projectList, LocalRelation(output, data))
         if !projectList.exists(hasUnevaluableExpr) =>
       val projection = new InterpretedProjection(projectList, output)
+      projection.initialize(0)
       LocalRelation(projectList.map(_.toAttribute), data.map(projection))
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
index f0c149c02b9aa..9ceb709185417 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
@@ -75,7 +75,7 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks {
 
   protected def evaluate(expression: Expression, inputRow: InternalRow = EmptyRow): Any = {
     expression.foreach {
-      case n: Nondeterministic => n.setInitialValues()
+      case n: Nondeterministic => n.initialize(0)
       case _ =>
     }
     expression.eval(inputRow)
@@ -121,6 +121,7 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks {
     val plan = generateProject(
       GenerateMutableProjection.generate(Alias(expression, s"Optimized($expression)")() :: Nil),
       expression)
+    plan.initialize(0)
 
     val actual = plan(inputRow).get(0, expression.dataType)
     if (!checkResult(actual, expected)) {
@@ -182,12 +183,14 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks {
     var plan = generateProject(
       GenerateMutableProjection.generate(Alias(expression, s"Optimized($expression)")() :: Nil),
       expression)
+    plan.initialize(0)
     var actual = plan(inputRow).get(0, expression.dataType)
     assert(checkResult(actual, expected))
 
     plan = generateProject(
       GenerateUnsafeProjection.generate(Alias(expression, s"Optimized($expression)")() :: Nil),
       expression)
+    plan.initialize(0)
     actual = FromUnsafeProjection(expression.dataType :: Nil)(
       plan(inputRow)).get(0, expression.dataType)
     assert(checkResult(actual, expected))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodegenExpressionCachingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodegenExpressionCachingSuite.scala
index 06dc3bd33b90e..fe5cb8eda824f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodegenExpressionCachingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodegenExpressionCachingSuite.scala
@@ -31,19 +31,22 @@ class CodegenExpressionCachingSuite extends SparkFunSuite {
     // Use an Add to wrap two of them together in case we only initialize the top level expressions.
     val expr = And(NondeterministicExpression(), NondeterministicExpression())
     val instance = UnsafeProjection.create(Seq(expr))
+    instance.initialize(0)
     assert(instance.apply(null).getBoolean(0) === false)
   }
 
   test("GenerateMutableProjection should initialize expressions") {
     val expr = And(NondeterministicExpression(), NondeterministicExpression())
     val instance = GenerateMutableProjection.generate(Seq(expr))
+    instance.initialize(0)
     assert(instance.apply(null).getBoolean(0) === false)
   }
 
   test("GeneratePredicate should initialize expressions") {
     val expr = And(NondeterministicExpression(), NondeterministicExpression())
     val instance = GeneratePredicate.generate(expr)
-    assert(instance.apply(null) === false)
+    instance.initialize(0)
+    assert(instance.eval(null) === false)
   }
 
   test("GenerateUnsafeProjection should not share expression instances") {
@@ -73,13 +76,13 @@ class CodegenExpressionCachingSuite extends SparkFunSuite {
   test("GeneratePredicate should not share expression instances") {
     val expr1 = MutableExpression()
     val instance1 = GeneratePredicate.generate(expr1)
-    assert(instance1.apply(null) === false)
+    assert(instance1.eval(null) === false)
 
     val expr2 = MutableExpression()
     expr2.mutableState = true
     val instance2 = GeneratePredicate.generate(expr2)
-    assert(instance1.apply(null) === false)
-    assert(instance2.apply(null) === true)
+    assert(instance1.eval(null) === false)
+    assert(instance2.eval(null) === true)
   }
 
 }
@@ -89,7 +92,7 @@ class CodegenExpressionCachingSuite extends SparkFunSuite {
  */
 case class NondeterministicExpression()
   extends LeafExpression with Nondeterministic with CodegenFallback {
-  override protected def initInternal(): Unit = { }
+  override protected def initializeInternal(partitionIndex: Int): Unit = {}
   override protected def evalInternal(input: InternalRow): Any = false
   override def nullable: Boolean = false
   override def dataType: DataType = BooleanType
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index fdd1fa3648251..e485b52b43f76 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -71,8 +71,9 @@ case class RowDataSourceScanExec(
     val unsafeRow = if (outputUnsafeRows) {
       rdd
     } else {
-      rdd.mapPartitionsInternal { iter =>
+      rdd.mapPartitionsWithIndexInternal { (index, iter) =>
         val proj = UnsafeProjection.create(schema)
+        proj.initialize(index)
         iter.map(proj)
       }
     }
@@ -284,8 +285,9 @@ case class FileSourceScanExec(
       val unsafeRows = {
         val scan = inputRDD
         if (needsUnsafeRowConversion) {
-          scan.mapPartitionsInternal { iter =>
+          scan.mapPartitionsWithIndexInternal { (index, iter) =>
             val proj = UnsafeProjection.create(schema)
+            proj.initialize(index)
             iter.map(proj)
           }
         } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
index 455fb5bfbb6f7..aab087cd98716 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
@@ -190,8 +190,9 @@ case class RDDScanExec(
 
   protected override def doExecute(): RDD[InternalRow] = {
     val numOutputRows = longMetric("numOutputRows")
-    rdd.mapPartitionsInternal { iter =>
+    rdd.mapPartitionsWithIndexInternal { (index, iter) =>
       val proj = UnsafeProjection.create(schema)
+      proj.initialize(index)
       iter.map { r =>
         numOutputRows += 1
         proj(r)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala
index 2663129562660..19fbf0c162048 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala
@@ -94,8 +94,9 @@ case class GenerateExec(
     }
 
     val numOutputRows = longMetric("numOutputRows")
-    rows.mapPartitionsInternal { iter =>
+    rows.mapPartitionsWithIndexInternal { (index, iter) =>
       val proj = UnsafeProjection.create(output, output)
+      proj.initialize(index)
       iter.map { r =>
         numOutputRows += 1
         proj(r)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
index 24d0cffef82a2..cadab37a449aa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
@@ -29,7 +29,7 @@ import org.apache.spark.rdd.{RDD, RDDOperationScope}
 import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.codegen._
+import org.apache.spark.sql.catalyst.expressions.codegen.{Predicate => GenPredicate, _}
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution.metric.SQLMetric
@@ -354,7 +354,7 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
   }
 
   protected def newPredicate(
-      expression: Expression, inputSchema: Seq[Attribute]): (InternalRow) => Boolean = {
+      expression: Expression, inputSchema: Seq[Attribute]): GenPredicate = {
     GeneratePredicate.generate(expression, inputSchema)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index 6303483f22fd3..516b9d5444d31 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -331,6 +331,7 @@ case class WholeStageCodegenExec(child: SparkPlan) extends UnaryExecNode with Co
           partitionIndex = index;
           this.inputs = inputs;
           ${ctx.initMutableStates()}
+          ${ctx.initPartition()}
         }
 
         ${ctx.declareAddedFunctions()}
@@ -383,10 +384,13 @@ case class WholeStageCodegenExec(child: SparkPlan) extends UnaryExecNode with Co
     } else {
       // Right now, we support up to two input RDDs.
       rdds.head.zipPartitions(rdds(1)) { (leftIter, rightIter) =>
-        val partitionIndex = TaskContext.getPartitionId()
+        Iterator((leftIter, rightIter))
+        // a small hack to obtain the correct partition index
+      }.mapPartitionsWithIndex { (index, zippedIter) =>
+        val (leftIter, rightIter) = zippedIter.next()
         val clazz = CodeGenerator.compile(cleanedSource)
         val buffer = clazz.generate(references).asInstanceOf[BufferedRowIterator]
-        buffer.init(partitionIndex, Array(leftIter, rightIter))
+        buffer.init(index, Array(leftIter, rightIter))
         new Iterator[InternalRow] {
           override def hasNext: Boolean = {
             val v = buffer.hasNext
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index a5291e0c12f88..32133f52630cd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -70,9 +70,10 @@ case class ProjectExec(projectList: Seq[NamedExpression], child: SparkPlan)
   }
 
   protected override def doExecute(): RDD[InternalRow] = {
-    child.execute().mapPartitionsInternal { iter =>
+    child.execute().mapPartitionsWithIndexInternal { (index, iter) =>
       val project = UnsafeProjection.create(projectList, child.output,
         subexpressionEliminationEnabled)
+      project.initialize(index)
       iter.map(project)
     }
   }
@@ -205,10 +206,11 @@ case class FilterExec(condition: Expression, child: SparkPlan)
 
   protected override def doExecute(): RDD[InternalRow] = {
     val numOutputRows = longMetric("numOutputRows")
-    child.execute().mapPartitionsInternal { iter =>
+    child.execute().mapPartitionsWithIndexInternal { (index, iter) =>
       val predicate = newPredicate(condition, child.output)
+      predicate.initialize(0)
       iter.filter { row =>
-        val r = predicate(row)
+        val r = predicate.eval(row)
         if (r) numOutputRows += 1
         r
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
index b87016d5a5696..9028caa446e8c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
@@ -132,10 +132,11 @@ case class InMemoryTableScanExec(
     val relOutput: AttributeSeq = relation.output
     val buffers = relation.cachedColumnBuffers
 
-    buffers.mapPartitionsInternal { cachedBatchIterator =>
+    buffers.mapPartitionsWithIndexInternal { (index, cachedBatchIterator) =>
       val partitionFilter = newPredicate(
         partitionFilters.reduceOption(And).getOrElse(Literal(true)),
         schema)
+      partitionFilter.initialize(index)
 
       // Find the ordinals and data types of the requested columns.
       val (requestedColumnIndices, requestedColumnDataTypes) =
@@ -147,7 +148,7 @@ case class InMemoryTableScanExec(
       val cachedBatchesToScan =
         if (inMemoryPartitionPruningEnabled) {
           cachedBatchIterator.filter { cachedBatch =>
-            if (!partitionFilter(cachedBatch.stats)) {
+            if (!partitionFilter.eval(cachedBatch.stats)) {
               def statsString: String = schemaIndex.map {
                 case (a, i) =>
                   val value = cachedBatch.stats.get(i, a.dataType)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoinExec.scala
index bfe7e3dea45df..f526a19876670 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoinExec.scala
@@ -52,7 +52,7 @@ case class BroadcastNestedLoopJoinExec(
       UnspecifiedDistribution :: BroadcastDistribution(IdentityBroadcastMode) :: Nil
   }
 
-  private[this] def genResultProjection: InternalRow => InternalRow = joinType match {
+  private[this] def genResultProjection: UnsafeProjection = joinType match {
     case LeftExistence(j) =>
       UnsafeProjection.create(output, output)
     case other =>
@@ -84,7 +84,7 @@ case class BroadcastNestedLoopJoinExec(
 
   @transient private lazy val boundCondition = {
     if (condition.isDefined) {
-      newPredicate(condition.get, streamed.output ++ broadcast.output)
+      newPredicate(condition.get, streamed.output ++ broadcast.output).eval _
     } else {
       (r: InternalRow) => true
     }
@@ -366,8 +366,9 @@ case class BroadcastNestedLoopJoinExec(
     }
 
     val numOutputRows = longMetric("numOutputRows")
-    resultRdd.mapPartitionsInternal { iter =>
+    resultRdd.mapPartitionsWithIndexInternal { (index, iter) =>
       val resultProj = genResultProjection
+      resultProj.initialize(index)
       iter.map { r =>
         numOutputRows += 1
         resultProj(r)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProductExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProductExec.scala
index 15dc9b40662e2..8341fe2ffd078 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProductExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProductExec.scala
@@ -98,15 +98,15 @@ case class CartesianProductExec(
     val rightResults = right.execute().asInstanceOf[RDD[UnsafeRow]]
 
     val pair = new UnsafeCartesianRDD(leftResults, rightResults, right.output.size)
-    pair.mapPartitionsInternal { iter =>
+    pair.mapPartitionsWithIndexInternal { (index, iter) =>
       val joiner = GenerateUnsafeRowJoiner.create(left.schema, right.schema)
       val filtered = if (condition.isDefined) {
-        val boundCondition: (InternalRow) => Boolean =
-          newPredicate(condition.get, left.output ++ right.output)
+        val boundCondition = newPredicate(condition.get, left.output ++ right.output)
+        boundCondition.initialize(index)
         val joined = new JoinedRow
 
         iter.filter { r =>
-          boundCondition(joined(r._1, r._2))
+          boundCondition.eval(joined(r._1, r._2))
         }
       } else {
         iter
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
index 05c5e2f4cd77b..1aef5f6864263 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
@@ -81,7 +81,7 @@ trait HashJoin {
     UnsafeProjection.create(streamedKeys)
 
   @transient private[this] lazy val boundCondition = if (condition.isDefined) {
-    newPredicate(condition.get, streamedPlan.output ++ buildPlan.output)
+    newPredicate(condition.get, streamedPlan.output ++ buildPlan.output).eval _
   } else {
     (r: InternalRow) => true
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
index ecf7cf289f034..ca9c0ed8cec32 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@@ -101,7 +101,7 @@ case class SortMergeJoinExec(
     left.execute().zipPartitions(right.execute()) { (leftIter, rightIter) =>
       val boundCondition: (InternalRow) => Boolean = {
         condition.map { cond =>
-          newPredicate(cond, left.output ++ right.output)
+          newPredicate(cond, left.output ++ right.output).eval _
         }.getOrElse {
           (r: InternalRow) => true
         }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala
index 9df56bbf1ef87..fde3b2a528994 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala
@@ -87,8 +87,9 @@ case class DeserializeToObjectExec(
   }
 
   override protected def doExecute(): RDD[InternalRow] = {
-    child.execute().mapPartitionsInternal { iter =>
+    child.execute().mapPartitionsWithIndexInternal { (index, iter) =>
       val projection = GenerateSafeProjection.generate(deserializer :: Nil, child.output)
+      projection.initialize(index)
       iter.map(projection)
     }
   }
@@ -124,8 +125,9 @@ case class SerializeFromObjectExec(
   }
 
   override protected def doExecute(): RDD[InternalRow] = {
-    child.execute().mapPartitionsInternal { iter =>
+    child.execute().mapPartitionsWithIndexInternal { (index, iter) =>
       val projection = UnsafeProjection.create(serializer)
+      projection.initialize(index)
       iter.map(projection)
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 586a0fffeb7a1..0e9a2c6cf7dec 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -19,7 +19,13 @@ package org.apache.spark.sql
 
 import java.nio.charset.StandardCharsets
 
+import scala.util.Random
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
 
@@ -406,4 +412,50 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
       Seq(Row(true), Row(true))
     )
   }
+
+  private def assertValuesDoNotChangeAfterCoalesceOrUnion(v: Column): Unit = {
+    import DataFrameFunctionsSuite.CodegenFallbackExpr
+    for ((codegenFallback, wholeStage) <- Seq((true, false), (false, false), (false, true))) {
+      val c = if (codegenFallback) {
+        Column(CodegenFallbackExpr(v.expr))
+      } else {
+        v
+      }
+      withSQLConf(
+        (SQLConf.WHOLESTAGE_FALLBACK.key, codegenFallback.toString),
+        (SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key, wholeStage.toString)) {
+        val df = spark.range(0, 4, 1, 4).withColumn("c", c)
+        val rows = df.collect()
+        val rowsAfterCoalesce = df.coalesce(2).collect()
+        assert(rows === rowsAfterCoalesce, "Values changed after coalesce when " +
+          s"codegenFallback=$codegenFallback and wholeStage=$wholeStage.")
+
+        val df1 = spark.range(0, 2, 1, 2).withColumn("c", c)
+        val rows1 = df1.collect()
+        val df2 = spark.range(2, 4, 1, 2).withColumn("c", c)
+        val rows2 = df2.collect()
+        val rowsAfterUnion = df1.union(df2).collect()
+        assert(rowsAfterUnion === rows1 ++ rows2, "Values changed after union when " +
+          s"codegenFallback=$codegenFallback and wholeStage=$wholeStage.")
+      }
+    }
+  }
+
+  test("SPARK-14393: values generated by non-deterministic functions shouldn't change after " +
+    "coalesce or union") {
+    Seq(
+      monotonically_increasing_id(), spark_partition_id(),
+      rand(Random.nextLong()), randn(Random.nextLong())
+    ).foreach(assertValuesDoNotChangeAfterCoalesceOrUnion(_))
+  }
+}
+
+object DataFrameFunctionsSuite {
+  case class CodegenFallbackExpr(child: Expression) extends Expression with CodegenFallback {
+    override def children: Seq[Expression] = Seq(child)
+    override def nullable: Boolean = child.nullable
+    override def dataType: DataType = child.dataType
+    override lazy val resolved = true
+    override def eval(input: InternalRow): Any = child.eval(input)
+  }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala
index 231f204b12b47..c80695bd3e0fe 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala
@@ -154,8 +154,9 @@ case class HiveTableScanExec(
     val numOutputRows = longMetric("numOutputRows")
     // Avoid to serialize MetastoreRelation because schema is lazy. (see SPARK-15649)
     val outputSchema = schema
-    rdd.mapPartitionsInternal { iter =>
+    rdd.mapPartitionsWithIndexInternal { (index, iter) =>
       val proj = UnsafeProjection.create(outputSchema)
+      proj.initialize(index)
       iter.map { r =>
         numOutputRows += 1
         proj(r)

From bd3ea6595788a4fe5399e6c6c666618d8cb6872c Mon Sep 17 00:00:00 2001
From: Jeff Zhang <zjffdu@apache.org>
Date: Wed, 2 Nov 2016 11:47:45 -0700
Subject: [PATCH 0012/1204] [SPARK-18160][CORE][YARN] spark.files & spark.jars
 should not be passed to driver in yarn mode

## What changes were proposed in this pull request?

spark.files is still passed to driver in yarn mode, so SparkContext will still handle it which cause the error in the jira desc.

## How was this patch tested?

Tested manually in a 5 node cluster. As this issue only happens in multiple node cluster, so I didn't write test for it.

Author: Jeff Zhang <zjffdu@apache.org>

Closes #15669 from zjffdu/SPARK-18160.

(cherry picked from commit 3c24299b71e23e159edbb972347b13430f92a465)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../scala/org/apache/spark/SparkContext.scala | 29 ++++---------------
 .../org/apache/spark/deploy/yarn/Client.scala |  5 +++-
 2 files changed, 10 insertions(+), 24 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 4694790c72cd8..63478c88b057b 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1716,29 +1716,12 @@ class SparkContext(config: SparkConf) extends Logging {
         key = uri.getScheme match {
           // A JAR file which exists only on the driver node
           case null | "file" =>
-            if (master == "yarn" && deployMode == "cluster") {
-              // In order for this to work in yarn cluster mode the user must specify the
-              // --addJars option to the client to upload the file into the distributed cache
-              // of the AM to make it show up in the current working directory.
-              val fileName = new Path(uri.getPath).getName()
-              try {
-                env.rpcEnv.fileServer.addJar(new File(fileName))
-              } catch {
-                case e: Exception =>
-                  // For now just log an error but allow to go through so spark examples work.
-                  // The spark examples don't really need the jar distributed since its also
-                  // the app jar.
-                  logError("Error adding jar (" + e + "), was the --addJars option used?")
-                  null
-              }
-            } else {
-              try {
-                env.rpcEnv.fileServer.addJar(new File(uri.getPath))
-              } catch {
-                case exc: FileNotFoundException =>
-                  logError(s"Jar not found at $path")
-                  null
-              }
+            try {
+              env.rpcEnv.fileServer.addJar(new File(uri.getPath))
+            } catch {
+              case exc: FileNotFoundException =>
+                logError(s"Jar not found at $path")
+                null
             }
           // A JAR file which exists locally on every worker node
           case "local" =>
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 55e4a833b6707..053a78617d4e0 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -1202,7 +1202,10 @@ private object Client extends Logging {
     // Note that any env variable with the SPARK_ prefix gets propagated to all (remote) processes
     System.setProperty("SPARK_YARN_MODE", "true")
     val sparkConf = new SparkConf
-
+    // SparkSubmit would use yarn cache to distribute files & jars in yarn mode,
+    // so remove them from sparkConf here for yarn mode.
+    sparkConf.remove("spark.jars")
+    sparkConf.remove("spark.files")
     val args = new ClientArguments(argStrings)
     new Client(args, sparkConf).run()
   }

From 1eef8e5cd09dfb8b77044ef9864321618e8ea8c8 Mon Sep 17 00:00:00 2001
From: Steve Loughran <stevel@apache.org>
Date: Wed, 2 Nov 2016 11:52:29 -0700
Subject: [PATCH 0013/1204] [SPARK-17058][BUILD] Add maven
 snapshots-and-staging profile to build/test against staging artifacts

## What changes were proposed in this pull request?

Adds a `snapshots-and-staging profile` so that  RCs of projects like Hadoop and HBase can be used in developer-only build and test runs. There's a comment above the profile telling people not to use this in production.

There's no attempt to do the same for SBT, as Ivy is different.
## How was this patch tested?

Tested by building against the Hadoop 2.7.3 RC 1 JARs

without the profile (and without any local copy of the 2.7.3 artifacts), the build failed

```
mvn install -DskipTests -Pyarn,hadoop-2.7,hive -Dhadoop.version=2.7.3

...

[INFO] ------------------------------------------------------------------------
[INFO] Building Spark Project Launcher 2.1.0-SNAPSHOT
[INFO] ------------------------------------------------------------------------
Downloading: https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-client/2.7.3/hadoop-client-2.7.3.pom
[WARNING] The POM for org.apache.hadoop:hadoop-client:jar:2.7.3 is missing, no dependency information available
Downloading: https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-client/2.7.3/hadoop-client-2.7.3.jar
[INFO] ------------------------------------------------------------------------
[INFO] Reactor Summary:
[INFO]
[INFO] Spark Project Parent POM ........................... SUCCESS [  4.482 s]
[INFO] Spark Project Tags ................................. SUCCESS [ 17.402 s]
[INFO] Spark Project Sketch ............................... SUCCESS [ 11.252 s]
[INFO] Spark Project Networking ........................... SUCCESS [ 13.458 s]
[INFO] Spark Project Shuffle Streaming Service ............ SUCCESS [  9.043 s]
[INFO] Spark Project Unsafe ............................... SUCCESS [ 16.027 s]
[INFO] Spark Project Launcher ............................. FAILURE [  1.653 s]
[INFO] Spark Project Core ................................. SKIPPED
...
```

With the profile, the build completed

```
mvn install -DskipTests -Pyarn,hadoop-2.7,hive,snapshots-and-staging -Dhadoop.version=2.7.3
```

Author: Steve Loughran <stevel@apache.org>

Closes #14646 from steveloughran/stevel/SPARK-17058-support-asf-snapshots.

(cherry picked from commit 37d95227a21de602b939dae84943ba007f434513)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 pom.xml | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/pom.xml b/pom.xml
index aaf7cfa7eb2ad..04d2eaa1d3bac 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2693,6 +2693,54 @@
       </build>
     </profile>
 
+    <!--
+     This is a profile to enable the use of the ASF snapshot and staging repositories
+     during a build. It is useful when testing againt nightly or RC releases of dependencies.
+     It MUST NOT be used when building copies of Spark to use in production of for distribution,
+     -->
+    <profile>
+      <id>snapshots-and-staging</id>
+      <properties>
+        <!-- override point for ASF staging/snapshot repos -->
+        <asf.staging>https://repository.apache.org/content/groups/staging/</asf.staging>
+        <asf.snapshots>https://repository.apache.org/content/repositories/snapshots/</asf.snapshots>
+      </properties>
+
+      <pluginRepositories>
+        <pluginRepository>
+          <id>ASF Staging</id>
+          <url>${asf.staging}</url>
+        </pluginRepository>
+        <pluginRepository>
+          <id>ASF Snapshots</id>
+          <url>${asf.snapshots}</url>
+          <snapshots>
+            <enabled>true</enabled>
+          </snapshots>
+          <releases>
+            <enabled>false</enabled>
+          </releases>
+        </pluginRepository>
+
+      </pluginRepositories>
+      <repositories>
+        <repository>
+          <id>ASF Staging</id>
+          <url>${asf.staging}</url>
+        </repository>
+        <repository>
+          <id>ASF Snapshots</id>
+          <url>${asf.snapshots}</url>
+          <snapshots>
+            <enabled>true</enabled>
+          </snapshots>
+          <releases>
+            <enabled>false</enabled>
+          </releases>
+        </repository>
+      </repositories>
+    </profile>
+
     <!--
       These empty profiles are available in some sub-modules. Declare them here so that
       maven does not complain when they're provided on the command line for a sub-module

From 2aff2ea81d260a47e7762b2990ed62a91e5d0198 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Wed, 2 Nov 2016 15:53:02 -0700
Subject: [PATCH 0014/1204] [SPARK-18214][SQL] Simplify RuntimeReplaceable type
 coercion

## What changes were proposed in this pull request?
RuntimeReplaceable is used to create aliases for expressions, but the way it deals with type coercion is pretty weird (each expression is responsible for how to handle type coercion, which does not obey the normal implicit type cast rules).

This patch simplifies its handling by allowing the analyzer to traverse into the actual expression of a RuntimeReplaceable.

## How was this patch tested?
- Correctness should be guaranteed by existing unit tests already
- Removed SQLCompatibilityFunctionSuite and moved it sql-compatibility-functions.sql
- Added a new test case in sql-compatibility-functions.sql for verifying explain behavior.

Author: Reynold Xin <rxin@databricks.com>

Closes #15723 from rxin/SPARK-18214.

(cherry picked from commit fd90541c35af2bccf0155467bec8cea7c8865046)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../sql/catalyst/analysis/TypeCoercion.scala  |   2 -
 .../sql/catalyst/expressions/Expression.scala |  30 ++---
 .../expressions/datetimeExpressions.scala     |   2 -
 .../expressions/nullExpressions.scala         |  75 ++++-------
 .../catalyst/optimizer/finishAnalysis.scala   |   2 +-
 .../expressions/NullFunctionsSuite.scala      |  19 ++-
 .../inputs/sql-compatibility-functions.sql    |  25 ++++
 .../resources/sql-tests/results/array.sql.out |   5 +-
 .../sql-compatibility-functions.sql.out       | 124 ++++++++++++++++++
 .../sql/SQLCompatibilityFunctionSuite.scala   |  98 --------------
 .../apache/spark/sql/SQLQueryTestSuite.scala  |   4 +-
 11 files changed, 204 insertions(+), 182 deletions(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/sql-compatibility-functions.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out
 delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/SQLCompatibilityFunctionSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 01b04c036d150..6662a9e974fc2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -528,8 +528,6 @@ object TypeCoercion {
         NaNvl(l, Cast(r, DoubleType))
       case NaNvl(l, r) if l.dataType == FloatType && r.dataType == DoubleType =>
         NaNvl(Cast(l, DoubleType), r)
-
-      case e: RuntimeReplaceable => e.replaceForTypeCoercion()
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 726a231fd814e..221f830aa8583 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -186,7 +186,7 @@ abstract class Expression extends TreeNode[Expression] {
    */
   def prettyName: String = nodeName.toLowerCase
 
-  protected def flatArguments = productIterator.flatMap {
+  protected def flatArguments: Iterator[Any] = productIterator.flatMap {
     case t: Traversable[_] => t
     case single => single :: Nil
   }
@@ -229,26 +229,16 @@ trait Unevaluable extends Expression {
  * An expression that gets replaced at runtime (currently by the optimizer) into a different
  * expression for evaluation. This is mainly used to provide compatibility with other databases.
  * For example, we use this to support "nvl" by replacing it with "coalesce".
+ *
+ * A RuntimeReplaceable should have the original parameters along with a "child" expression in the
+ * case class constructor, and define a normal constructor that accepts only the original
+ * parameters. For an example, see [[Nvl]]. To make sure the explain plan and expression SQL
+ * works correctly, the implementation should also override flatArguments method and sql method.
  */
-trait RuntimeReplaceable extends Unevaluable {
-  /**
-   * Method for concrete implementations to override that specifies how to construct the expression
-   * that should replace the current one.
-   */
-  def replaceForEvaluation(): Expression
-
-  /**
-   * Method for concrete implementations to override that specifies how to coerce the input types.
-   */
-  def replaceForTypeCoercion(): Expression
-
-  /** The expression that should be used during evaluation. */
-  lazy val replaced: Expression = replaceForEvaluation()
-
-  override def nullable: Boolean = replaced.nullable
-  override def foldable: Boolean = replaced.foldable
-  override def dataType: DataType = replaced.dataType
-  override def checkInputDataTypes(): TypeCheckResult = replaced.checkInputDataTypes()
+trait RuntimeReplaceable extends UnaryExpression with Unevaluable {
+  override def nullable: Boolean = child.nullable
+  override def foldable: Boolean = child.foldable
+  override def dataType: DataType = child.dataType
 }
 
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 67c078ae5e264..05bfa7dcfc88f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -488,8 +488,6 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes {
           }""")
     }
   }
-
-  override def prettyName: String = "unix_time"
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
index 1c18265e0fed4..70862a87ef9c6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
@@ -89,78 +89,53 @@ case class Coalesce(children: Seq[Expression]) extends Expression {
 
 
 @ExpressionDescription(usage = "_FUNC_(a,b) - Returns b if a is null, or a otherwise.")
-case class IfNull(left: Expression, right: Expression) extends RuntimeReplaceable {
-  override def children: Seq[Expression] = Seq(left, right)
-
-  override def replaceForEvaluation(): Expression = Coalesce(Seq(left, right))
+case class IfNull(left: Expression, right: Expression, child: Expression)
+  extends RuntimeReplaceable {
 
-  override def replaceForTypeCoercion(): Expression = {
-    if (left.dataType != right.dataType) {
-      TypeCoercion.findTightestCommonTypeOfTwo(left.dataType, right.dataType).map { dtype =>
-        copy(left = Cast(left, dtype), right = Cast(right, dtype))
-      }.getOrElse(this)
-    } else {
-      this
-    }
+  def this(left: Expression, right: Expression) = {
+    this(left, right, Coalesce(Seq(left, right)))
   }
+
+  override def flatArguments: Iterator[Any] = Iterator(left, right)
+  override def sql: String = s"$prettyName(${left.sql}, ${right.sql})"
 }
 
 
 @ExpressionDescription(usage = "_FUNC_(a,b) - Returns null if a equals to b, or a otherwise.")
-case class NullIf(left: Expression, right: Expression) extends RuntimeReplaceable {
-  override def children: Seq[Expression] = Seq(left, right)
+case class NullIf(left: Expression, right: Expression, child: Expression)
+  extends RuntimeReplaceable {
 
-  override def replaceForEvaluation(): Expression = {
-    If(EqualTo(left, right), Literal.create(null, left.dataType), left)
+  def this(left: Expression, right: Expression) = {
+    this(left, right, If(EqualTo(left, right), Literal.create(null, left.dataType), left))
   }
 
-  override def replaceForTypeCoercion(): Expression = {
-    if (left.dataType != right.dataType) {
-      TypeCoercion.findTightestCommonTypeOfTwo(left.dataType, right.dataType).map { dtype =>
-        copy(left = Cast(left, dtype), right = Cast(right, dtype))
-      }.getOrElse(this)
-    } else {
-      this
-    }
-  }
+  override def flatArguments: Iterator[Any] = Iterator(left, right)
+  override def sql: String = s"$prettyName(${left.sql}, ${right.sql})"
 }
 
 
 @ExpressionDescription(usage = "_FUNC_(a,b) - Returns b if a is null, or a otherwise.")
-case class Nvl(left: Expression, right: Expression) extends RuntimeReplaceable {
-  override def children: Seq[Expression] = Seq(left, right)
+case class Nvl(left: Expression, right: Expression, child: Expression) extends RuntimeReplaceable {
 
-  override def replaceForEvaluation(): Expression = Coalesce(Seq(left, right))
-
-  override def replaceForTypeCoercion(): Expression = {
-    if (left.dataType != right.dataType) {
-      TypeCoercion.findTightestCommonTypeToString(left.dataType, right.dataType).map { dtype =>
-        copy(left = Cast(left, dtype), right = Cast(right, dtype))
-      }.getOrElse(this)
-    } else {
-      this
-    }
+  def this(left: Expression, right: Expression) = {
+    this(left, right, Coalesce(Seq(left, right)))
   }
+
+  override def flatArguments: Iterator[Any] = Iterator(left, right)
+  override def sql: String = s"$prettyName(${left.sql}, ${right.sql})"
 }
 
 
 @ExpressionDescription(usage = "_FUNC_(a,b,c) - Returns b if a is not null, or c otherwise.")
-case class Nvl2(expr1: Expression, expr2: Expression, expr3: Expression)
+case class Nvl2(expr1: Expression, expr2: Expression, expr3: Expression, child: Expression)
   extends RuntimeReplaceable {
 
-  override def replaceForEvaluation(): Expression = If(IsNotNull(expr1), expr2, expr3)
-
-  override def children: Seq[Expression] = Seq(expr1, expr2, expr3)
-
-  override def replaceForTypeCoercion(): Expression = {
-    if (expr2.dataType != expr3.dataType) {
-      TypeCoercion.findTightestCommonTypeOfTwo(expr2.dataType, expr3.dataType).map { dtype =>
-        copy(expr2 = Cast(expr2, dtype), expr3 = Cast(expr3, dtype))
-      }.getOrElse(this)
-    } else {
-      this
-    }
+  def this(expr1: Expression, expr2: Expression, expr3: Expression) = {
+    this(expr1, expr2, expr3, If(IsNotNull(expr1), expr2, expr3))
   }
+
+  override def flatArguments: Iterator[Any] = Iterator(expr1, expr2, expr3)
+  override def sql: String = s"$prettyName(${expr1.sql}, ${expr2.sql}, ${expr3.sql})"
 }
 
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
index 7c667315870f5..f20eb958fe973 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.types._
  */
 object ReplaceExpressions extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
-    case e: RuntimeReplaceable => e.replaced
+    case e: RuntimeReplaceable => e.child
   }
 }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullFunctionsSuite.scala
index e736379930619..62c9ab3b67fb6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullFunctionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullFunctionsSuite.scala
@@ -18,7 +18,9 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer
 import org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Project}
 import org.apache.spark.sql.types._
 
 class NullFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
@@ -86,18 +88,23 @@ class NullFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("SPARK-16602 Nvl should support numeric-string cases") {
+    def analyze(expr: Expression): Expression = {
+      val relation = LocalRelation()
+      SimpleAnalyzer.execute(Project(Seq(Alias(expr, "c")()), relation)).expressions.head
+    }
+
     val intLit = Literal.create(1, IntegerType)
     val doubleLit = Literal.create(2.2, DoubleType)
     val stringLit = Literal.create("c", StringType)
     val nullLit = Literal.create(null, NullType)
 
-    assert(Nvl(intLit, doubleLit).replaceForTypeCoercion().dataType == DoubleType)
-    assert(Nvl(intLit, stringLit).replaceForTypeCoercion().dataType == StringType)
-    assert(Nvl(stringLit, doubleLit).replaceForTypeCoercion().dataType == StringType)
+    assert(analyze(new Nvl(intLit, doubleLit)).dataType == DoubleType)
+    assert(analyze(new Nvl(intLit, stringLit)).dataType == StringType)
+    assert(analyze(new Nvl(stringLit, doubleLit)).dataType == StringType)
 
-    assert(Nvl(nullLit, intLit).replaceForTypeCoercion().dataType == IntegerType)
-    assert(Nvl(doubleLit, nullLit).replaceForTypeCoercion().dataType == DoubleType)
-    assert(Nvl(nullLit, stringLit).replaceForTypeCoercion().dataType == StringType)
+    assert(analyze(new Nvl(nullLit, intLit)).dataType == IntegerType)
+    assert(analyze(new Nvl(doubleLit, nullLit)).dataType == DoubleType)
+    assert(analyze(new Nvl(nullLit, stringLit)).dataType == StringType)
   }
 
   test("AtLeastNNonNulls") {
diff --git a/sql/core/src/test/resources/sql-tests/inputs/sql-compatibility-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/sql-compatibility-functions.sql
new file mode 100644
index 0000000000000..2b5b692d29ef4
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/sql-compatibility-functions.sql
@@ -0,0 +1,25 @@
+-- A test suite for functions added for compatibility with other databases such as Oracle, MSSQL.
+-- These functions are typically implemented using the trait RuntimeReplaceable.
+
+SELECT ifnull(null, 'x'), ifnull('y', 'x'), ifnull(null, null);
+SELECT nullif('x', 'x'), nullif('x', 'y');
+SELECT nvl(null, 'x'), nvl('y', 'x'), nvl(null, null);
+SELECT nvl2(null, 'x', 'y'), nvl2('n', 'x', 'y'), nvl2(null, null, null);
+
+-- type coercion
+SELECT ifnull(1, 2.1d), ifnull(null, 2.1d);
+SELECT nullif(1, 2.1d), nullif(1, 1.0d);
+SELECT nvl(1, 2.1d), nvl(null, 2.1d);
+SELECT nvl2(null, 1, 2.1d), nvl2('n', 1, 2.1d);
+
+-- explain for these functions; use range to avoid constant folding
+explain extended
+select ifnull(id, 'x'), nullif(id, 'x'), nvl(id, 'x'), nvl2(id, 'x', 'y')
+from range(2);
+
+-- SPARK-16730 cast alias functions for Hive compatibility
+SELECT boolean(1), tinyint(1), smallint(1), int(1), bigint(1);
+SELECT float(1), double(1), decimal(1);
+SELECT date("2014-04-04"), timestamp(date("2014-04-04"));
+-- error handling: only one argument
+SELECT string(1, 2);
diff --git a/sql/core/src/test/resources/sql-tests/results/array.sql.out b/sql/core/src/test/resources/sql-tests/results/array.sql.out
index 499a3d5fb72f6..981b2504bcaad 100644
--- a/sql/core/src/test/resources/sql-tests/results/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/array.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 10
+-- Number of queries: 12
 
 
 -- !query 0
@@ -124,6 +124,7 @@ struct<sort_array(boolean_array, true):array<boolean>,sort_array(tinyint_array,
 -- !query 8 output
 [true]	[1,2]	[1,2]	[1,2]	[1,2]	[9223372036854775808,9223372036854775809]	[1.0,2.0]	[1.0,2.0]	[2016-03-13,2016-03-14]	[2016-11-12 20:54:00.0,2016-11-15 20:54:00.0]
 
+
 -- !query 9
 select sort_array(array('b', 'd'), '1')
 -- !query 9 schema
@@ -132,6 +133,7 @@ struct<>
 org.apache.spark.sql.AnalysisException
 cannot resolve 'sort_array(array('b', 'd'), '1')' due to data type mismatch: Sort order in second argument requires a boolean literal.; line 1 pos 7
 
+
 -- !query 10
 select sort_array(array('b', 'd'), cast(NULL as boolean))
 -- !query 10 schema
@@ -140,6 +142,7 @@ struct<>
 org.apache.spark.sql.AnalysisException
 cannot resolve 'sort_array(array('b', 'd'), CAST(NULL AS BOOLEAN))' due to data type mismatch: Sort order in second argument requires a boolean literal.; line 1 pos 7
 
+
 -- !query 11
 select
   size(boolean_array),
diff --git a/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out
new file mode 100644
index 0000000000000..9f0b95994be53
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out
@@ -0,0 +1,124 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 13
+
+
+-- !query 0
+SELECT ifnull(null, 'x'), ifnull('y', 'x'), ifnull(null, null)
+-- !query 0 schema
+struct<ifnull(NULL, 'x'):string,ifnull('y', 'x'):string,ifnull(NULL, NULL):null>
+-- !query 0 output
+x	y	NULL
+
+
+-- !query 1
+SELECT nullif('x', 'x'), nullif('x', 'y')
+-- !query 1 schema
+struct<nullif('x', 'x'):string,nullif('x', 'y'):string>
+-- !query 1 output
+NULL	x
+
+
+-- !query 2
+SELECT nvl(null, 'x'), nvl('y', 'x'), nvl(null, null)
+-- !query 2 schema
+struct<nvl(NULL, 'x'):string,nvl('y', 'x'):string,nvl(NULL, NULL):null>
+-- !query 2 output
+x	y	NULL
+
+
+-- !query 3
+SELECT nvl2(null, 'x', 'y'), nvl2('n', 'x', 'y'), nvl2(null, null, null)
+-- !query 3 schema
+struct<nvl2(NULL, 'x', 'y'):string,nvl2('n', 'x', 'y'):string,nvl2(NULL, NULL, NULL):null>
+-- !query 3 output
+y	x	NULL
+
+
+-- !query 4
+SELECT ifnull(1, 2.1d), ifnull(null, 2.1d)
+-- !query 4 schema
+struct<ifnull(1, 2.1D):double,ifnull(NULL, 2.1D):double>
+-- !query 4 output
+1.0	2.1
+
+
+-- !query 5
+SELECT nullif(1, 2.1d), nullif(1, 1.0d)
+-- !query 5 schema
+struct<nullif(1, 2.1D):int,nullif(1, 1.0D):int>
+-- !query 5 output
+1	NULL
+
+
+-- !query 6
+SELECT nvl(1, 2.1d), nvl(null, 2.1d)
+-- !query 6 schema
+struct<nvl(1, 2.1D):double,nvl(NULL, 2.1D):double>
+-- !query 6 output
+1.0	2.1
+
+
+-- !query 7
+SELECT nvl2(null, 1, 2.1d), nvl2('n', 1, 2.1d)
+-- !query 7 schema
+struct<nvl2(NULL, 1, 2.1D):double,nvl2('n', 1, 2.1D):double>
+-- !query 7 output
+2.1	1.0
+
+
+-- !query 8
+explain extended
+select ifnull(id, 'x'), nullif(id, 'x'), nvl(id, 'x'), nvl2(id, 'x', 'y')
+from range(2)
+-- !query 8 schema
+struct<plan:string>
+-- !query 8 output
+== Parsed Logical Plan ==
+'Project [unresolvedalias('ifnull('id, x), None), unresolvedalias('nullif('id, x), None), unresolvedalias('nvl('id, x), None), unresolvedalias('nvl2('id, x, y), None)]
++- 'UnresolvedTableValuedFunction range, [2]
+
+== Analyzed Logical Plan ==
+ifnull(`id`, 'x'): string, nullif(`id`, 'x'): bigint, nvl(`id`, 'x'): string, nvl2(`id`, 'x', 'y'): string
+Project [ifnull(id#xL, x) AS ifnull(`id`, 'x')#x, nullif(id#xL, x) AS nullif(`id`, 'x')#xL, nvl(id#xL, x) AS nvl(`id`, 'x')#x, nvl2(id#xL, x, y) AS nvl2(`id`, 'x', 'y')#x]
++- Range (0, 2, step=1, splits=None)
+
+== Optimized Logical Plan ==
+Project [coalesce(cast(id#xL as string), x) AS ifnull(`id`, 'x')#x, id#xL AS nullif(`id`, 'x')#xL, coalesce(cast(id#xL as string), x) AS nvl(`id`, 'x')#x, x AS nvl2(`id`, 'x', 'y')#x]
++- Range (0, 2, step=1, splits=None)
+
+== Physical Plan ==
+*Project [coalesce(cast(id#xL as string), x) AS ifnull(`id`, 'x')#x, id#xL AS nullif(`id`, 'x')#xL, coalesce(cast(id#xL as string), x) AS nvl(`id`, 'x')#x, x AS nvl2(`id`, 'x', 'y')#x]
++- *Range (0, 2, step=1, splits=None)
+
+
+-- !query 9
+SELECT boolean(1), tinyint(1), smallint(1), int(1), bigint(1)
+-- !query 9 schema
+struct<CAST(1 AS BOOLEAN):boolean,CAST(1 AS TINYINT):tinyint,CAST(1 AS SMALLINT):smallint,CAST(1 AS INT):int,CAST(1 AS BIGINT):bigint>
+-- !query 9 output
+true	1	1	1	1
+
+
+-- !query 10
+SELECT float(1), double(1), decimal(1)
+-- !query 10 schema
+struct<CAST(1 AS FLOAT):float,CAST(1 AS DOUBLE):double,CAST(1 AS DECIMAL(10,0)):decimal(10,0)>
+-- !query 10 output
+1.0	1.0	1
+
+
+-- !query 11
+SELECT date("2014-04-04"), timestamp(date("2014-04-04"))
+-- !query 11 schema
+struct<CAST(2014-04-04 AS DATE):date,CAST(CAST(2014-04-04 AS DATE) AS TIMESTAMP):timestamp>
+-- !query 11 output
+2014-04-04	2014-04-04 00:00:00
+
+
+-- !query 12
+SELECT string(1, 2)
+-- !query 12 schema
+struct<>
+-- !query 12 output
+org.apache.spark.sql.AnalysisException
+Function string accepts only one argument; line 1 pos 7
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLCompatibilityFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLCompatibilityFunctionSuite.scala
deleted file mode 100644
index 27b60e0d9def8..0000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLCompatibilityFunctionSuite.scala
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql
-
-import java.math.BigDecimal
-import java.sql.Timestamp
-
-import org.apache.spark.sql.test.SharedSQLContext
-
-/**
- * A test suite for functions added for compatibility with other databases such as Oracle, MSSQL.
- *
- * These functions are typically implemented using the trait
- * [[org.apache.spark.sql.catalyst.expressions.RuntimeReplaceable]].
- */
-class SQLCompatibilityFunctionSuite extends QueryTest with SharedSQLContext {
-
-  test("ifnull") {
-    checkAnswer(
-      sql("SELECT ifnull(null, 'x'), ifnull('y', 'x'), ifnull(null, null)"),
-      Row("x", "y", null))
-
-    // Type coercion
-    checkAnswer(
-      sql("SELECT ifnull(1, 2.1d), ifnull(null, 2.1d)"),
-      Row(1.0, 2.1))
-  }
-
-  test("nullif") {
-    checkAnswer(
-      sql("SELECT nullif('x', 'x'), nullif('x', 'y')"),
-      Row(null, "x"))
-
-    // Type coercion
-    checkAnswer(
-      sql("SELECT nullif(1, 2.1d), nullif(1, 1.0d)"),
-      Row(1.0, null))
-  }
-
-  test("nvl") {
-    checkAnswer(
-      sql("SELECT nvl(null, 'x'), nvl('y', 'x'), nvl(null, null)"),
-      Row("x", "y", null))
-
-    // Type coercion
-    checkAnswer(
-      sql("SELECT nvl(1, 2.1d), nvl(null, 2.1d)"),
-      Row(1.0, 2.1))
-  }
-
-  test("nvl2") {
-    checkAnswer(
-      sql("SELECT nvl2(null, 'x', 'y'), nvl2('n', 'x', 'y'), nvl2(null, null, null)"),
-      Row("y", "x", null))
-
-    // Type coercion
-    checkAnswer(
-      sql("SELECT nvl2(null, 1, 2.1d), nvl2('n', 1, 2.1d)"),
-      Row(2.1, 1.0))
-  }
-
-  test("SPARK-16730 cast alias functions for Hive compatibility") {
-    checkAnswer(
-      sql("SELECT boolean(1), tinyint(1), smallint(1), int(1), bigint(1)"),
-      Row(true, 1.toByte, 1.toShort, 1, 1L))
-
-    checkAnswer(
-      sql("SELECT float(1), double(1), decimal(1)"),
-      Row(1.toFloat, 1.0, new BigDecimal(1)))
-
-    checkAnswer(
-      sql("SELECT date(\"2014-04-04\"), timestamp(date(\"2014-04-04\"))"),
-      Row(new java.util.Date(114, 3, 4), new Timestamp(114, 3, 4, 0, 0, 0, 0)))
-
-    checkAnswer(
-      sql("SELECT string(1)"),
-      Row("1"))
-
-    // Error handling: only one argument
-    val errorMsg = intercept[AnalysisException](sql("SELECT string(1, 2)")).getMessage
-    assert(errorMsg.contains("Function string accepts only one argument"))
-  }
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index 2d73d9f1fc802..1a4049fb339cb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -26,7 +26,6 @@ import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.catalyst.util.{fileToString, stringToFile}
-import org.apache.spark.sql.execution.command.ShowColumnsCommand
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types.StructType
 
@@ -215,7 +214,8 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
     try {
       val df = session.sql(sql)
       val schema = df.schema
-      val answer = df.queryExecution.hiveResultString()
+      // Get answer, but also get rid of the #1234 expression ids that show up in explain plans
+      val answer = df.queryExecution.hiveResultString().map(_.replaceAll("#\\d+", "#x"))
 
       // If the output is not pre-sorted, sort it.
       if (isSorted(df.queryExecution.analyzed)) (schema, answer) else (schema, answer.sorted)

From 5ea2f9e5e449c02f77635918bfcc7ba7193c97a2 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 2 Nov 2016 18:05:14 -0700
Subject: [PATCH 0015/1204] [SPARK-17470][SQL] unify path for data source table
 and locationUri for hive serde table

## What changes were proposed in this pull request?

Due to a limitation of hive metastore(table location must be directory path, not file path), we always store `path` for data source table in storage properties, instead of the `locationUri` field. However, we should not expose this difference to `CatalogTable` level, but just treat it as a hack in `HiveExternalCatalog`, like we store table schema of data source table in table properties.

This PR unifies `path` and `locationUri` outside of `HiveExternalCatalog`, both data source table and hive serde table should use the `locationUri` field.

This PR also unifies the way we handle default table location for managed table. Previously, the default table location of hive serde managed table is set by external catalog, but the one of data source table is set by command. After this PR, we follow the hive way and the default table location is always set by external catalog.

For managed non-file-based tables, we will assign a default table location and create an empty directory for it, the table location will be removed when the table is dropped. This is reasonable as metastore doesn't care about whether a table is file-based or not, and an empty table directory has no harm.
For external non-file-based tables, ideally we can omit the table location, but due to a hive metastore issue, we will assign a random location to it, and remove it right after the table is created. See SPARK-15269 for more details. This is fine as it's well isolated in `HiveExternalCatalog`.

To keep the existing behaviour of the `path` option, in this PR we always add the `locationUri` to storage properties using key `path`, before passing storage properties to `DataSource` as data source options.
## How was this patch tested?

existing tests.

Author: Wenchen Fan <wenchen@databricks.com>

Closes #15024 from cloud-fan/path.

(cherry picked from commit 3a1bc6f4780f8384c1211b1335e7394a4a28377e)
Signed-off-by: Yin Huai <yhuai@databricks.com>
---
 R/pkg/inst/tests/testthat/test_sparkSQL.R     |   4 +-
 .../catalyst/catalog/InMemoryCatalog.scala    |  40 ++-
 .../apache/spark/sql/DataFrameWriter.scala    |   5 +-
 .../spark/sql/execution/SparkSqlParser.scala  |  17 +-
 .../command/createDataSourceTables.scala      |  37 +--
 .../spark/sql/execution/command/ddl.scala     |  23 +-
 .../spark/sql/execution/command/tables.scala  |  50 +---
 .../execution/datasources/DataSource.scala    | 241 ++++++++++--------
 .../datasources/DataSourceStrategy.scala      |   3 +-
 .../spark/sql/internal/CatalogImpl.scala      |   4 +-
 .../sql/execution/command/DDLSuite.scala      |   1 -
 .../spark/sql/sources/PathOptionSuite.scala   | 136 ++++++++++
 .../spark/sql/hive/HiveExternalCatalog.scala  | 227 +++++++++++------
 .../spark/sql/hive/HiveMetastoreCatalog.scala |  16 +-
 .../sql/hive/HiveMetastoreCatalogSuite.scala  |   3 +-
 .../sql/hive/MetastoreDataSourcesSuite.scala  |  28 +-
 .../spark/sql/hive/MultiDatabaseSuite.scala   |   2 +-
 .../sql/hive/execution/HiveDDLSuite.scala     |  14 +-
 .../sql/hive/execution/SQLQuerySuite.scala    |   4 +-
 19 files changed, 520 insertions(+), 335 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala

diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index d7fe6b32822a7..ee48baa59c7af 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -2659,7 +2659,7 @@ test_that("Call DataFrameWriter.save() API in Java without path and check argume
   # It makes sure that we can omit path argument in write.df API and then it calls
   # DataFrameWriter.save() without path.
   expect_error(write.df(df, source = "csv"),
-               "Error in save : illegal argument - 'path' is not specified")
+              "Error in save : illegal argument - Expected exactly one path to be specified")
   expect_error(write.json(df, jsonPath),
               "Error in json : analysis error - path file:.*already exists")
   expect_error(write.text(df, jsonPath),
@@ -2667,7 +2667,7 @@ test_that("Call DataFrameWriter.save() API in Java without path and check argume
   expect_error(write.orc(df, jsonPath),
               "Error in orc : analysis error - path file:.*already exists")
   expect_error(write.parquet(df, jsonPath),
-                            "Error in parquet : analysis error - path file:.*already exists")
+              "Error in parquet : analysis error - path file:.*already exists")
 
   # Arguments checking in R side.
   expect_error(write.df(df, "data.tmp", source = c(1, 2)),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
index f95c9f8cfa2d4..ea675b76607d6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
@@ -196,18 +196,32 @@ class InMemoryCatalog(
         throw new TableAlreadyExistsException(db = db, table = table)
       }
     } else {
-      if (tableDefinition.tableType == CatalogTableType.MANAGED) {
-        val dir = new Path(catalog(db).db.locationUri, table)
+      // Set the default table location if this is a managed table and its location is not
+      // specified.
+      // Ideally we should not create a managed table with location, but Hive serde table can
+      // specify location for managed table. And in [[CreateDataSourceTableAsSelectCommand]] we have
+      // to create the table directory and write out data before we create this table, to avoid
+      // exposing a partial written table.
+      val needDefaultTableLocation =
+        tableDefinition.tableType == CatalogTableType.MANAGED &&
+          tableDefinition.storage.locationUri.isEmpty
+
+      val tableWithLocation = if (needDefaultTableLocation) {
+        val defaultTableLocation = new Path(catalog(db).db.locationUri, table)
         try {
-          val fs = dir.getFileSystem(hadoopConfig)
-          fs.mkdirs(dir)
+          val fs = defaultTableLocation.getFileSystem(hadoopConfig)
+          fs.mkdirs(defaultTableLocation)
         } catch {
           case e: IOException =>
             throw new SparkException(s"Unable to create table $table as failed " +
-              s"to create its directory $dir", e)
+              s"to create its directory $defaultTableLocation", e)
         }
+        tableDefinition.withNewStorage(locationUri = Some(defaultTableLocation.toUri.toString))
+      } else {
+        tableDefinition
       }
-      catalog(db).tables.put(table, new TableDesc(tableDefinition))
+
+      catalog(db).tables.put(table, new TableDesc(tableWithLocation))
     }
   }
 
@@ -218,8 +232,12 @@ class InMemoryCatalog(
       purge: Boolean): Unit = synchronized {
     requireDbExists(db)
     if (tableExists(db, table)) {
-      if (getTable(db, table).tableType == CatalogTableType.MANAGED) {
-        val dir = new Path(catalog(db).db.locationUri, table)
+      val tableMeta = getTable(db, table)
+      if (tableMeta.tableType == CatalogTableType.MANAGED) {
+        assert(tableMeta.storage.locationUri.isDefined,
+          "Managed table should always have table location, as we will assign a default location " +
+            "to it if it doesn't have one.")
+        val dir = new Path(tableMeta.storage.locationUri.get)
         try {
           val fs = dir.getFileSystem(hadoopConfig)
           fs.delete(dir, true)
@@ -244,7 +262,10 @@ class InMemoryCatalog(
     oldDesc.table = oldDesc.table.copy(identifier = TableIdentifier(newName, Some(db)))
 
     if (oldDesc.table.tableType == CatalogTableType.MANAGED) {
-      val oldDir = new Path(catalog(db).db.locationUri, oldName)
+      assert(oldDesc.table.storage.locationUri.isDefined,
+        "Managed table should always have table location, as we will assign a default location " +
+          "to it if it doesn't have one.")
+      val oldDir = new Path(oldDesc.table.storage.locationUri.get)
       val newDir = new Path(catalog(db).db.locationUri, newName)
       try {
         val fs = oldDir.getFileSystem(hadoopConfig)
@@ -254,6 +275,7 @@ class InMemoryCatalog(
           throw new SparkException(s"Unable to rename table $oldName to $newName as failed " +
             s"to rename its directory $oldDir", e)
       }
+      oldDesc.table = oldDesc.table.withNewStorage(locationUri = Some(newDir.toUri.toString))
     }
 
     catalog(db).tables.put(newName, oldDesc)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 700f4835ac89a..f95362e292280 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -373,7 +373,8 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
         throw new AnalysisException(s"Table $tableIdent already exists.")
 
       case _ =>
-        val tableType = if (new CaseInsensitiveMap(extraOptions.toMap).contains("path")) {
+        val storage = DataSource.buildStorageFormatFromOptions(extraOptions.toMap)
+        val tableType = if (storage.locationUri.isDefined) {
           CatalogTableType.EXTERNAL
         } else {
           CatalogTableType.MANAGED
@@ -382,7 +383,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
         val tableDesc = CatalogTable(
           identifier = tableIdent,
           tableType = tableType,
-          storage = CatalogStorageFormat.empty.copy(properties = extraOptions.toMap),
+          storage = storage,
           schema = new StructType,
           provider = Some(source),
           partitionColumnNames = partitioningColumns.getOrElse(Nil),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index fe183d0097d03..634ffde3543cb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -343,7 +343,8 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
 
     // TODO: this may be wrong for non file-based data source like JDBC, which should be external
     // even there is no `path` in options. We should consider allow the EXTERNAL keyword.
-    val tableType = if (new CaseInsensitiveMap(options).contains("path")) {
+    val storage = DataSource.buildStorageFormatFromOptions(options)
+    val tableType = if (storage.locationUri.isDefined) {
       CatalogTableType.EXTERNAL
     } else {
       CatalogTableType.MANAGED
@@ -352,7 +353,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
     val tableDesc = CatalogTable(
       identifier = table,
       tableType = tableType,
-      storage = CatalogStorageFormat.empty.copy(properties = options),
+      storage = storage,
       schema = schema.getOrElse(new StructType),
       provider = Some(provider),
       partitionColumnNames = partitionColumnNames,
@@ -1062,17 +1063,9 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
         if (conf.convertCTAS && !hasStorageProperties) {
           // At here, both rowStorage.serdeProperties and fileStorage.serdeProperties
           // are empty Maps.
-          val optionsWithPath = if (location.isDefined) {
-            Map("path" -> location.get)
-          } else {
-            Map.empty[String, String]
-          }
-
           val newTableDesc = tableDesc.copy(
-            storage = CatalogStorageFormat.empty.copy(properties = optionsWithPath),
-            provider = Some(conf.defaultDataSourceName)
-          )
-
+            storage = CatalogStorageFormat.empty.copy(locationUri = location),
+            provider = Some(conf.defaultDataSourceName))
           CreateTable(newTableDesc, mode, Some(q))
         } else {
           CreateTable(tableDesc, mode, Some(q))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
index 2a9743130d4c4..d4b28274cc453 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
@@ -57,13 +57,14 @@ case class CreateDataSourceTableCommand(table: CatalogTable, ignoreIfExists: Boo
 
     // Create the relation to validate the arguments before writing the metadata to the metastore,
     // and infer the table schema and partition if users didn't specify schema in CREATE TABLE.
+    val pathOption = table.storage.locationUri.map("path" -> _)
     val dataSource: BaseRelation =
       DataSource(
         sparkSession = sparkSession,
         userSpecifiedSchema = if (table.schema.isEmpty) None else Some(table.schema),
         className = table.provider.get,
         bucketSpec = table.bucketSpec,
-        options = table.storage.properties).resolveRelation()
+        options = table.storage.properties ++ pathOption).resolveRelation()
 
     dataSource match {
       case fs: HadoopFsRelation =>
@@ -85,14 +86,7 @@ case class CreateDataSourceTableCommand(table: CatalogTable, ignoreIfExists: Boo
       }
     }
 
-    val optionsWithPath = if (table.tableType == CatalogTableType.MANAGED) {
-      table.storage.properties + ("path" -> sessionState.catalog.defaultTablePath(table.identifier))
-    } else {
-      table.storage.properties
-    }
-
     val newTable = table.copy(
-      storage = table.storage.copy(properties = optionsWithPath),
       schema = dataSource.schema,
       partitionColumnNames = partitionColumnNames,
       // If metastore partition management for file source tables is enabled, we start off with
@@ -140,12 +134,6 @@ case class CreateDataSourceTableAsSelectCommand(
     val tableIdentWithDB = table.identifier.copy(database = Some(db))
     val tableName = tableIdentWithDB.unquotedString
 
-    val optionsWithPath = if (table.tableType == CatalogTableType.MANAGED) {
-      table.storage.properties + ("path" -> sessionState.catalog.defaultTablePath(table.identifier))
-    } else {
-      table.storage.properties
-    }
-
     var createMetastoreTable = false
     var existingSchema = Option.empty[StructType]
     if (sparkSession.sessionState.catalog.tableExists(tableIdentWithDB)) {
@@ -162,13 +150,7 @@ case class CreateDataSourceTableAsSelectCommand(
           return Seq.empty[Row]
         case SaveMode.Append =>
           // Check if the specified data source match the data source of the existing table.
-          val dataSource = DataSource(
-            sparkSession = sparkSession,
-            userSpecifiedSchema = Some(query.schema.asNullable),
-            partitionColumns = table.partitionColumnNames,
-            bucketSpec = table.bucketSpec,
-            className = provider,
-            options = optionsWithPath)
+          val existingProvider = DataSource.lookupDataSource(provider)
           // TODO: Check that options from the resolved relation match the relation that we are
           // inserting into (i.e. using the same compression).
 
@@ -178,7 +160,7 @@ case class CreateDataSourceTableAsSelectCommand(
             case l @ LogicalRelation(_: InsertableRelation | _: HadoopFsRelation, _, _) =>
               // check if the file formats match
               l.relation match {
-                case r: HadoopFsRelation if r.fileFormat.getClass != dataSource.providingClass =>
+                case r: HadoopFsRelation if r.fileFormat.getClass != existingProvider =>
                   throw new AnalysisException(
                     s"The file format of the existing table $tableName is " +
                       s"`${r.fileFormat.getClass.getName}`. It doesn't match the specified " +
@@ -213,13 +195,20 @@ case class CreateDataSourceTableAsSelectCommand(
       case None => data
     }
 
+    val tableLocation = if (table.tableType == CatalogTableType.MANAGED) {
+      Some(sessionState.catalog.defaultTablePath(table.identifier))
+    } else {
+      table.storage.locationUri
+    }
+
     // Create the relation based on the data of df.
+    val pathOption = tableLocation.map("path" -> _)
     val dataSource = DataSource(
       sparkSession,
       className = provider,
       partitionColumns = table.partitionColumnNames,
       bucketSpec = table.bucketSpec,
-      options = optionsWithPath)
+      options = table.storage.properties ++ pathOption)
 
     val result = try {
       dataSource.write(mode, df)
@@ -230,7 +219,7 @@ case class CreateDataSourceTableAsSelectCommand(
     }
     if (createMetastoreTable) {
       val newTable = table.copy(
-        storage = table.storage.copy(properties = optionsWithPath),
+        storage = table.storage.copy(locationUri = tableLocation),
         // We will use the schema of resolved.relation as the schema of the table (instead of
         // the schema of df). It is important since the nullability may be changed by the relation
         // provider (for example, see org.apache.spark.sql.parquet.DefaultSource).
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 61e0550cef5e3..52af915b0be65 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -485,14 +485,6 @@ case class AlterTableRecoverPartitionsCommand(
     }
   }
 
-  private def getBasePath(table: CatalogTable): Option[String] = {
-    if (table.provider == Some("hive")) {
-      table.storage.locationUri
-    } else {
-      new CaseInsensitiveMap(table.storage.properties).get("path")
-    }
-  }
-
   override def run(spark: SparkSession): Seq[Row] = {
     val catalog = spark.sessionState.catalog
     val table = catalog.getTableMetadata(tableName)
@@ -503,13 +495,12 @@ case class AlterTableRecoverPartitionsCommand(
         s"Operation not allowed: $cmd only works on partitioned tables: $tableIdentWithDB")
     }
 
-    val tablePath = getBasePath(table)
-    if (tablePath.isEmpty) {
+    if (table.storage.locationUri.isEmpty) {
       throw new AnalysisException(s"Operation not allowed: $cmd only works on table with " +
         s"location provided: $tableIdentWithDB")
     }
 
-    val root = new Path(tablePath.get)
+    val root = new Path(table.storage.locationUri.get)
     logInfo(s"Recover all the partitions in $root")
     val fs = root.getFileSystem(spark.sparkContext.hadoopConfiguration)
 
@@ -688,15 +679,7 @@ case class AlterTableSetLocationCommand(
         catalog.alterPartitions(table.identifier, Seq(newPart))
       case None =>
         // No partition spec is specified, so we set the location for the table itself
-        val newTable =
-          if (DDLUtils.isDatasourceTable(table)) {
-            table.withNewStorage(
-              locationUri = Some(location),
-              properties = table.storage.properties ++ Map("path" -> location))
-          } else {
-            table.withNewStorage(locationUri = Some(location))
-          }
-        catalog.alterTable(newTable)
+        catalog.alterTable(table.withNewStorage(locationUri = Some(location)))
     }
     Seq.empty[Row]
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 4acfffb628047..f32c956f5999e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -36,6 +36,7 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.util.quoteIdentifier
 import org.apache.spark.sql.execution.datasources.PartitioningUtils
+import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
@@ -62,25 +63,6 @@ case class CreateTableLikeCommand(
     val catalog = sparkSession.sessionState.catalog
     val sourceTableDesc = catalog.getTempViewOrPermanentTableMetadata(sourceTable)
 
-    // Storage format
-    val newStorage =
-      if (sourceTableDesc.tableType == CatalogTableType.VIEW) {
-        val newPath = catalog.defaultTablePath(targetTable)
-        CatalogStorageFormat.empty.copy(properties = Map("path" -> newPath))
-      } else if (DDLUtils.isDatasourceTable(sourceTableDesc)) {
-        val newPath = catalog.defaultTablePath(targetTable)
-        val newSerdeProp =
-          sourceTableDesc.storage.properties.filterKeys(_.toLowerCase != "path") ++
-            Map("path" -> newPath)
-        sourceTableDesc.storage.copy(
-          locationUri = None,
-          properties = newSerdeProp)
-      } else {
-        sourceTableDesc.storage.copy(
-          locationUri = None,
-          properties = sourceTableDesc.storage.properties)
-      }
-
     val newProvider = if (sourceTableDesc.tableType == CatalogTableType.VIEW) {
       Some(sparkSession.sessionState.conf.defaultDataSourceName)
     } else {
@@ -91,7 +73,8 @@ case class CreateTableLikeCommand(
       CatalogTable(
         identifier = targetTable,
         tableType = CatalogTableType.MANAGED,
-        storage = newStorage,
+        // We are creating a new managed table, which should not have custom table location.
+        storage = sourceTableDesc.storage.copy(locationUri = None),
         schema = sourceTableDesc.schema,
         provider = newProvider,
         partitionColumnNames = sourceTableDesc.partitionColumnNames,
@@ -170,13 +153,6 @@ case class AlterTableRenameCommand(
           case NonFatal(e) => log.warn(e.toString, e)
         }
       }
-      // For datasource tables, we also need to update the "path" serde property
-      if (DDLUtils.isDatasourceTable(table) && table.tableType == CatalogTableType.MANAGED) {
-        val newPath = catalog.defaultTablePath(newName)
-        val newTable = table.withNewStorage(
-          properties = table.storage.properties ++ Map("path" -> newPath))
-        catalog.alterTable(newTable)
-      }
       // Invalidate the table last, otherwise uncaching the table would load the logical plan
       // back into the hive metastore cache
       catalog.refreshTable(oldName)
@@ -367,8 +343,9 @@ case class TruncateTableCommand(
       DDLUtils.verifyPartitionProviderIsHive(spark, table, "TRUNCATE TABLE ... PARTITION")
     }
     val locations =
-      if (DDLUtils.isDatasourceTable(table)) {
-        Seq(table.storage.properties.get("path"))
+      // TODO: The `InMemoryCatalog` doesn't support listPartition with partial partition spec.
+      if (spark.conf.get(CATALOG_IMPLEMENTATION) == "in-memory") {
+        Seq(table.storage.locationUri)
       } else if (table.partitionColumnNames.isEmpty) {
         Seq(table.storage.locationUri)
       } else {
@@ -916,17 +893,18 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman
   }
 
   private def showDataSourceTableOptions(metadata: CatalogTable, builder: StringBuilder): Unit = {
-    val props = metadata.properties
-
     builder ++= s"USING ${metadata.provider.get}\n"
 
-    val dataSourceOptions = metadata.storage.properties.filterNot {
-      case (key, value) =>
+    val dataSourceOptions = metadata.storage.properties.map {
+      case (key, value) => s"${quoteIdentifier(key)} '${escapeSingleQuotedString(value)}'"
+    } ++ metadata.storage.locationUri.flatMap { location =>
+      if (metadata.tableType == MANAGED) {
         // If it's a managed table, omit PATH option. Spark SQL always creates external table
         // when the table creation DDL contains the PATH option.
-        key.toLowerCase == "path" && metadata.tableType == MANAGED
-    }.map {
-      case (key, value) => s"${quoteIdentifier(key)} '${escapeSingleQuotedString(value)}'"
+        None
+      } else {
+        Some(s"path '${escapeSingleQuotedString(location)}'")
+      }
     }
 
     if (dataSourceOptions.nonEmpty) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 3f956c427655e..0b50448a7af18 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -29,7 +29,7 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable}
+import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable}
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
 import org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider
@@ -78,115 +78,9 @@ case class DataSource(
 
   case class SourceInfo(name: String, schema: StructType, partitionColumns: Seq[String])
 
-  lazy val providingClass: Class[_] = lookupDataSource(className)
+  lazy val providingClass: Class[_] = DataSource.lookupDataSource(className)
   lazy val sourceInfo = sourceSchema()
 
-  /** A map to maintain backward compatibility in case we move data sources around. */
-  private val backwardCompatibilityMap: Map[String, String] = {
-    val jdbc = classOf[JdbcRelationProvider].getCanonicalName
-    val json = classOf[JsonFileFormat].getCanonicalName
-    val parquet = classOf[ParquetFileFormat].getCanonicalName
-    val csv = classOf[CSVFileFormat].getCanonicalName
-    val libsvm = "org.apache.spark.ml.source.libsvm.LibSVMFileFormat"
-    val orc = "org.apache.spark.sql.hive.orc.OrcFileFormat"
-
-    Map(
-      "org.apache.spark.sql.jdbc" -> jdbc,
-      "org.apache.spark.sql.jdbc.DefaultSource" -> jdbc,
-      "org.apache.spark.sql.execution.datasources.jdbc.DefaultSource" -> jdbc,
-      "org.apache.spark.sql.execution.datasources.jdbc" -> jdbc,
-      "org.apache.spark.sql.json" -> json,
-      "org.apache.spark.sql.json.DefaultSource" -> json,
-      "org.apache.spark.sql.execution.datasources.json" -> json,
-      "org.apache.spark.sql.execution.datasources.json.DefaultSource" -> json,
-      "org.apache.spark.sql.parquet" -> parquet,
-      "org.apache.spark.sql.parquet.DefaultSource" -> parquet,
-      "org.apache.spark.sql.execution.datasources.parquet" -> parquet,
-      "org.apache.spark.sql.execution.datasources.parquet.DefaultSource" -> parquet,
-      "org.apache.spark.sql.hive.orc.DefaultSource" -> orc,
-      "org.apache.spark.sql.hive.orc" -> orc,
-      "org.apache.spark.ml.source.libsvm.DefaultSource" -> libsvm,
-      "org.apache.spark.ml.source.libsvm" -> libsvm,
-      "com.databricks.spark.csv" -> csv
-    )
-  }
-
-  /**
-   * Class that were removed in Spark 2.0. Used to detect incompatibility libraries for Spark 2.0.
-   */
-  private val spark2RemovedClasses = Set(
-    "org.apache.spark.sql.DataFrame",
-    "org.apache.spark.sql.sources.HadoopFsRelationProvider",
-    "org.apache.spark.Logging")
-
-  /** Given a provider name, look up the data source class definition. */
-  private def lookupDataSource(provider0: String): Class[_] = {
-    val provider = backwardCompatibilityMap.getOrElse(provider0, provider0)
-    val provider2 = s"$provider.DefaultSource"
-    val loader = Utils.getContextOrSparkClassLoader
-    val serviceLoader = ServiceLoader.load(classOf[DataSourceRegister], loader)
-
-    try {
-      serviceLoader.asScala.filter(_.shortName().equalsIgnoreCase(provider)).toList match {
-        // the provider format did not match any given registered aliases
-        case Nil =>
-          try {
-            Try(loader.loadClass(provider)).orElse(Try(loader.loadClass(provider2))) match {
-              case Success(dataSource) =>
-                // Found the data source using fully qualified path
-                dataSource
-              case Failure(error) =>
-                if (provider.toLowerCase == "orc" ||
-                  provider.startsWith("org.apache.spark.sql.hive.orc")) {
-                  throw new AnalysisException(
-                    "The ORC data source must be used with Hive support enabled")
-                } else if (provider.toLowerCase == "avro" ||
-                  provider == "com.databricks.spark.avro") {
-                  throw new AnalysisException(
-                    s"Failed to find data source: ${provider.toLowerCase}. Please find an Avro " +
-                      "package at " +
-                      "https://cwiki.apache.org/confluence/display/SPARK/Third+Party+Projects")
-                } else {
-                  throw new ClassNotFoundException(
-                    s"Failed to find data source: $provider. Please find packages at " +
-                      "https://cwiki.apache.org/confluence/display/SPARK/Third+Party+Projects",
-                    error)
-                }
-            }
-          } catch {
-            case e: NoClassDefFoundError => // This one won't be caught by Scala NonFatal
-              // NoClassDefFoundError's class name uses "/" rather than "." for packages
-              val className = e.getMessage.replaceAll("/", ".")
-              if (spark2RemovedClasses.contains(className)) {
-                throw new ClassNotFoundException(s"$className was removed in Spark 2.0. " +
-                  "Please check if your library is compatible with Spark 2.0", e)
-              } else {
-                throw e
-              }
-          }
-        case head :: Nil =>
-          // there is exactly one registered alias
-          head.getClass
-        case sources =>
-          // There are multiple registered aliases for the input
-          sys.error(s"Multiple sources found for $provider " +
-            s"(${sources.map(_.getClass.getName).mkString(", ")}), " +
-            "please specify the fully qualified class name.")
-      }
-    } catch {
-      case e: ServiceConfigurationError if e.getCause.isInstanceOf[NoClassDefFoundError] =>
-        // NoClassDefFoundError's class name uses "/" rather than "." for packages
-        val className = e.getCause.getMessage.replaceAll("/", ".")
-        if (spark2RemovedClasses.contains(className)) {
-          throw new ClassNotFoundException(s"Detected an incompatible DataSourceRegister. " +
-            "Please remove the incompatible library from classpath or upgrade it. " +
-            s"Error: ${e.getMessage}", e)
-        } else {
-          throw e
-        }
-    }
-  }
-
   /**
    * Infer the schema of the given FileFormat, returns a pair of schema and partition column names.
    */
@@ -470,13 +364,14 @@ case class DataSource(
         //  1. Only one output path can be specified on the write path;
         //  2. Output path must be a legal HDFS style file system path;
         //  3. It's OK that the output path doesn't exist yet;
-        val caseInsensitiveOptions = new CaseInsensitiveMap(options)
-        val outputPath = {
-          val path = new Path(caseInsensitiveOptions.getOrElse("path", {
-            throw new IllegalArgumentException("'path' is not specified")
-          }))
+        val allPaths = paths ++ new CaseInsensitiveMap(options).get("path")
+        val outputPath = if (allPaths.length == 1) {
+          val path = new Path(allPaths.head)
           val fs = path.getFileSystem(sparkSession.sessionState.newHadoopConf())
           path.makeQualified(fs.getUri, fs.getWorkingDirectory)
+        } else {
+          throw new IllegalArgumentException("Expected exactly one path to be specified, but " +
+            s"got: ${allPaths.mkString(", ")}")
         }
 
         val caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
@@ -539,3 +434,123 @@ case class DataSource(
     }
   }
 }
+
+object DataSource {
+
+  /** A map to maintain backward compatibility in case we move data sources around. */
+  private val backwardCompatibilityMap: Map[String, String] = {
+    val jdbc = classOf[JdbcRelationProvider].getCanonicalName
+    val json = classOf[JsonFileFormat].getCanonicalName
+    val parquet = classOf[ParquetFileFormat].getCanonicalName
+    val csv = classOf[CSVFileFormat].getCanonicalName
+    val libsvm = "org.apache.spark.ml.source.libsvm.LibSVMFileFormat"
+    val orc = "org.apache.spark.sql.hive.orc.OrcFileFormat"
+
+    Map(
+      "org.apache.spark.sql.jdbc" -> jdbc,
+      "org.apache.spark.sql.jdbc.DefaultSource" -> jdbc,
+      "org.apache.spark.sql.execution.datasources.jdbc.DefaultSource" -> jdbc,
+      "org.apache.spark.sql.execution.datasources.jdbc" -> jdbc,
+      "org.apache.spark.sql.json" -> json,
+      "org.apache.spark.sql.json.DefaultSource" -> json,
+      "org.apache.spark.sql.execution.datasources.json" -> json,
+      "org.apache.spark.sql.execution.datasources.json.DefaultSource" -> json,
+      "org.apache.spark.sql.parquet" -> parquet,
+      "org.apache.spark.sql.parquet.DefaultSource" -> parquet,
+      "org.apache.spark.sql.execution.datasources.parquet" -> parquet,
+      "org.apache.spark.sql.execution.datasources.parquet.DefaultSource" -> parquet,
+      "org.apache.spark.sql.hive.orc.DefaultSource" -> orc,
+      "org.apache.spark.sql.hive.orc" -> orc,
+      "org.apache.spark.ml.source.libsvm.DefaultSource" -> libsvm,
+      "org.apache.spark.ml.source.libsvm" -> libsvm,
+      "com.databricks.spark.csv" -> csv
+    )
+  }
+
+  /**
+   * Class that were removed in Spark 2.0. Used to detect incompatibility libraries for Spark 2.0.
+   */
+  private val spark2RemovedClasses = Set(
+    "org.apache.spark.sql.DataFrame",
+    "org.apache.spark.sql.sources.HadoopFsRelationProvider",
+    "org.apache.spark.Logging")
+
+  /** Given a provider name, look up the data source class definition. */
+  def lookupDataSource(provider: String): Class[_] = {
+    val provider1 = backwardCompatibilityMap.getOrElse(provider, provider)
+    val provider2 = s"$provider1.DefaultSource"
+    val loader = Utils.getContextOrSparkClassLoader
+    val serviceLoader = ServiceLoader.load(classOf[DataSourceRegister], loader)
+
+    try {
+      serviceLoader.asScala.filter(_.shortName().equalsIgnoreCase(provider1)).toList match {
+        // the provider format did not match any given registered aliases
+        case Nil =>
+          try {
+            Try(loader.loadClass(provider1)).orElse(Try(loader.loadClass(provider2))) match {
+              case Success(dataSource) =>
+                // Found the data source using fully qualified path
+                dataSource
+              case Failure(error) =>
+                if (provider1.toLowerCase == "orc" ||
+                  provider1.startsWith("org.apache.spark.sql.hive.orc")) {
+                  throw new AnalysisException(
+                    "The ORC data source must be used with Hive support enabled")
+                } else if (provider1.toLowerCase == "avro" ||
+                  provider1 == "com.databricks.spark.avro") {
+                  throw new AnalysisException(
+                    s"Failed to find data source: ${provider1.toLowerCase}. Please find an Avro " +
+                      "package at " +
+                      "https://cwiki.apache.org/confluence/display/SPARK/Third+Party+Projects")
+                } else {
+                  throw new ClassNotFoundException(
+                    s"Failed to find data source: $provider1. Please find packages at " +
+                      "https://cwiki.apache.org/confluence/display/SPARK/Third+Party+Projects",
+                    error)
+                }
+            }
+          } catch {
+            case e: NoClassDefFoundError => // This one won't be caught by Scala NonFatal
+              // NoClassDefFoundError's class name uses "/" rather than "." for packages
+              val className = e.getMessage.replaceAll("/", ".")
+              if (spark2RemovedClasses.contains(className)) {
+                throw new ClassNotFoundException(s"$className was removed in Spark 2.0. " +
+                  "Please check if your library is compatible with Spark 2.0", e)
+              } else {
+                throw e
+              }
+          }
+        case head :: Nil =>
+          // there is exactly one registered alias
+          head.getClass
+        case sources =>
+          // There are multiple registered aliases for the input
+          sys.error(s"Multiple sources found for $provider1 " +
+            s"(${sources.map(_.getClass.getName).mkString(", ")}), " +
+            "please specify the fully qualified class name.")
+      }
+    } catch {
+      case e: ServiceConfigurationError if e.getCause.isInstanceOf[NoClassDefFoundError] =>
+        // NoClassDefFoundError's class name uses "/" rather than "." for packages
+        val className = e.getCause.getMessage.replaceAll("/", ".")
+        if (spark2RemovedClasses.contains(className)) {
+          throw new ClassNotFoundException(s"Detected an incompatible DataSourceRegister. " +
+            "Please remove the incompatible library from classpath or upgrade it. " +
+            s"Error: ${e.getMessage}", e)
+        } else {
+          throw e
+        }
+    }
+  }
+
+  /**
+   * When creating a data source table, the `path` option has a special meaning: the table location.
+   * This method extracts the `path` option and treat it as table location to build a
+   * [[CatalogStorageFormat]]. Note that, the `path` option is removed from options after this.
+   */
+  def buildStorageFormatFromOptions(options: Map[String, String]): CatalogStorageFormat = {
+    val path = new CaseInsensitiveMap(options).get("path")
+    val optionsWithoutPath = options.filterKeys(_.toLowerCase != "path")
+    CatalogStorageFormat.empty.copy(locationUri = path, properties = optionsWithoutPath)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index 47c1f9d3fac1e..e87998fe4ad8d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -237,6 +237,7 @@ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan]
       sparkSession: SparkSession,
       simpleCatalogRelation: SimpleCatalogRelation): LogicalPlan = {
     val table = simpleCatalogRelation.catalogTable
+    val pathOption = table.storage.locationUri.map("path" -> _)
     val dataSource =
       DataSource(
         sparkSession,
@@ -244,7 +245,7 @@ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan]
         partitionColumns = table.partitionColumnNames,
         bucketSpec = table.bucketSpec,
         className = table.provider.get,
-        options = table.storage.properties)
+        options = table.storage.properties ++ pathOption)
 
     LogicalRelation(
       dataSource.resolveRelation(),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
index 44fd38dfb96f6..d3e323cb12891 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.{DefinedByConstructorParams, FunctionIdenti
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
-import org.apache.spark.sql.execution.datasources.CreateTable
+import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource}
 import org.apache.spark.sql.types.StructType
 
 
@@ -354,7 +354,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
     val tableDesc = CatalogTable(
       identifier = tableIdent,
       tableType = CatalogTableType.EXTERNAL,
-      storage = CatalogStorageFormat.empty.copy(properties = options),
+      storage = DataSource.buildStorageFormatFromOptions(options),
       schema = schema,
       provider = Some(source)
     )
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 9fb0f5384d889..bde3c8a42e1c0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -1145,7 +1145,6 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
           assert(storageFormat.properties.isEmpty)
           assert(storageFormat.locationUri === Some(expected))
         } else {
-          assert(storageFormat.properties.get("path") === Some(expected))
           assert(storageFormat.locationUri === Some(expected))
         }
       } else {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala
new file mode 100644
index 0000000000000..bef47aacd3379
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala
@@ -0,0 +1,136 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql.sources
+
+import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession, SQLContext}
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.execution.datasources.LogicalRelation
+import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.sql.types.{IntegerType, Metadata, MetadataBuilder, StructType}
+
+class TestOptionsSource extends SchemaRelationProvider with CreatableRelationProvider {
+
+  // This is used in the read path.
+  override def createRelation(
+      sqlContext: SQLContext,
+      parameters: Map[String, String],
+      schema: StructType): BaseRelation = {
+    new TestOptionsRelation(parameters)(sqlContext.sparkSession)
+  }
+
+  // This is used in the write path.
+  override def createRelation(
+      sqlContext: SQLContext,
+      mode: SaveMode,
+      parameters: Map[String, String],
+      data: DataFrame): BaseRelation = {
+    new TestOptionsRelation(parameters)(sqlContext.sparkSession)
+  }
+}
+
+class TestOptionsRelation(val options: Map[String, String])(@transient val session: SparkSession)
+  extends BaseRelation {
+
+  override def sqlContext: SQLContext = session.sqlContext
+
+  def pathOption: Option[String] = options.get("path")
+
+  // We can't get the relation directly for write path, here we put the path option in schema
+  // metadata, so that we can test it later.
+  override def schema: StructType = {
+    val metadataWithPath = pathOption.map {
+      path => new MetadataBuilder().putString("path", path).build()
+    }
+    new StructType().add("i", IntegerType, true, metadataWithPath.getOrElse(Metadata.empty))
+  }
+}
+
+class PathOptionSuite extends DataSourceTest with SharedSQLContext {
+
+  test("path option always exist") {
+    withTable("src") {
+      sql(
+        s"""
+           |CREATE TABLE src(i int)
+           |USING ${classOf[TestOptionsSource].getCanonicalName}
+           |OPTIONS (PATH '/tmp/path')
+        """.stripMargin)
+      assert(getPathOption("src") == Some("/tmp/path"))
+    }
+
+    // should exist even path option is not specified when creating table
+    withTable("src") {
+      sql(s"CREATE TABLE src(i int) USING ${classOf[TestOptionsSource].getCanonicalName}")
+      assert(getPathOption("src") == Some(defaultTablePath("src")))
+    }
+  }
+
+  test("path option also exist for write path") {
+    withTable("src") {
+      withTempPath { path =>
+        sql(
+          s"""
+            |CREATE TABLE src
+            |USING ${classOf[TestOptionsSource].getCanonicalName}
+            |OPTIONS (PATH '${path.getAbsolutePath}')
+            |AS SELECT 1
+          """.stripMargin)
+        assert(spark.table("src").schema.head.metadata.getString("path") == path.getAbsolutePath)
+      }
+    }
+
+    // should exist even path option is not specified when creating table
+    withTable("src") {
+      sql(
+        s"""
+           |CREATE TABLE src
+           |USING ${classOf[TestOptionsSource].getCanonicalName}
+           |AS SELECT 1
+          """.stripMargin)
+      assert(spark.table("src").schema.head.metadata.getString("path") == defaultTablePath("src"))
+    }
+  }
+
+  test("path option always represent the value of table location") {
+    withTable("src") {
+      sql(
+        s"""
+           |CREATE TABLE src(i int)
+           |USING ${classOf[TestOptionsSource].getCanonicalName}
+           |OPTIONS (PATH '/tmp/path')""".stripMargin)
+      sql("ALTER TABLE src SET LOCATION '/tmp/path2'")
+      assert(getPathOption("src") == Some("/tmp/path2"))
+    }
+
+    withTable("src", "src2") {
+      sql(s"CREATE TABLE src(i int) USING ${classOf[TestOptionsSource].getCanonicalName}")
+      sql("ALTER TABLE src RENAME TO src2")
+      assert(getPathOption("src2") == Some(defaultTablePath("src2")))
+    }
+  }
+
+  private def getPathOption(tableName: String): Option[String] = {
+    spark.table(tableName).queryExecution.analyzed.collect {
+      case LogicalRelation(r: TestOptionsRelation, _, _) => r.pathOption
+    }.head
+  }
+
+  private def defaultTablePath(tableName: String): String = {
+    spark.sessionState.catalog.defaultTablePath(TableIdentifier(tableName))
+  }
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 409c316c6802c..ebba203ac593c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -29,7 +29,7 @@ import org.apache.thrift.TException
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
+import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions._
@@ -38,9 +38,8 @@ import org.apache.spark.sql.execution.command.{ColumnStatStruct, DDLUtils}
 import org.apache.spark.sql.execution.datasources.CaseInsensitiveMap
 import org.apache.spark.sql.hive.client.HiveClient
 import org.apache.spark.sql.internal.HiveSerDe
-import org.apache.spark.sql.internal.SQLConf._
 import org.apache.spark.sql.internal.StaticSQLConf._
-import org.apache.spark.sql.types.{DataType, StructField, StructType}
+import org.apache.spark.sql.types.{DataType, StructType}
 
 
 /**
@@ -189,66 +188,39 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       throw new TableAlreadyExistsException(db = db, table = table)
     }
     // Before saving data source table metadata into Hive metastore, we should:
-    //  1. Put table provider, schema, partition column names, bucket specification and partition
-    //     provider in table properties.
+    //  1. Put table metadata like provider, schema, etc. in table properties.
     //  2. Check if this table is hive compatible
-    //    2.1  If it's not hive compatible, set schema, partition columns and bucket spec to empty
-    //         and save table metadata to Hive.
+    //    2.1  If it's not hive compatible, set location URI, schema, partition columns and bucket
+    //         spec to empty and save table metadata to Hive.
     //    2.2  If it's hive compatible, set serde information in table metadata and try to save
     //         it to Hive. If it fails, treat it as not hive compatible and go back to 2.1
     if (DDLUtils.isDatasourceTable(tableDefinition)) {
-      // data source table always have a provider, it's guaranteed by `DDLUtils.isDatasourceTable`.
-      val provider = tableDefinition.provider.get
-      val partitionColumns = tableDefinition.partitionColumnNames
-      val bucketSpec = tableDefinition.bucketSpec
-
-      val tableProperties = new scala.collection.mutable.HashMap[String, String]
-      tableProperties.put(DATASOURCE_PROVIDER, provider)
-      if (tableDefinition.partitionProviderIsHive) {
-        tableProperties.put(TABLE_PARTITION_PROVIDER, "hive")
-      }
-
-      // Serialized JSON schema string may be too long to be stored into a single metastore table
-      // property. In this case, we split the JSON string and store each part as a separate table
-      // property.
-      val threshold = conf.get(SCHEMA_STRING_LENGTH_THRESHOLD)
-      val schemaJsonString = tableDefinition.schema.json
-      // Split the JSON string.
-      val parts = schemaJsonString.grouped(threshold).toSeq
-      tableProperties.put(DATASOURCE_SCHEMA_NUMPARTS, parts.size.toString)
-      parts.zipWithIndex.foreach { case (part, index) =>
-        tableProperties.put(s"$DATASOURCE_SCHEMA_PART_PREFIX$index", part)
-      }
-
-      if (partitionColumns.nonEmpty) {
-        tableProperties.put(DATASOURCE_SCHEMA_NUMPARTCOLS, partitionColumns.length.toString)
-        partitionColumns.zipWithIndex.foreach { case (partCol, index) =>
-          tableProperties.put(s"$DATASOURCE_SCHEMA_PARTCOL_PREFIX$index", partCol)
-        }
-      }
-
-      if (bucketSpec.isDefined) {
-        val BucketSpec(numBuckets, bucketColumnNames, sortColumnNames) = bucketSpec.get
+      val tableProperties = tableMetaToTableProps(tableDefinition)
 
-        tableProperties.put(DATASOURCE_SCHEMA_NUMBUCKETS, numBuckets.toString)
-        tableProperties.put(DATASOURCE_SCHEMA_NUMBUCKETCOLS, bucketColumnNames.length.toString)
-        bucketColumnNames.zipWithIndex.foreach { case (bucketCol, index) =>
-          tableProperties.put(s"$DATASOURCE_SCHEMA_BUCKETCOL_PREFIX$index", bucketCol)
-        }
-
-        if (sortColumnNames.nonEmpty) {
-          tableProperties.put(DATASOURCE_SCHEMA_NUMSORTCOLS, sortColumnNames.length.toString)
-          sortColumnNames.zipWithIndex.foreach { case (sortCol, index) =>
-            tableProperties.put(s"$DATASOURCE_SCHEMA_SORTCOL_PREFIX$index", sortCol)
-          }
-        }
+      val needDefaultTableLocation = tableDefinition.tableType == MANAGED &&
+        tableDefinition.storage.locationUri.isEmpty
+      val tableLocation = if (needDefaultTableLocation) {
+        Some(defaultTablePath(tableDefinition.identifier))
+      } else {
+        tableDefinition.storage.locationUri
       }
+      // Ideally we should also put `locationUri` in table properties like provider, schema, etc.
+      // However, in older version of Spark we already store table location in storage properties
+      // with key "path". Here we keep this behaviour for backward compatibility.
+      val storagePropsWithLocation = tableDefinition.storage.properties ++
+        tableLocation.map("path" -> _)
 
       // converts the table metadata to Spark SQL specific format, i.e. set data schema, names and
       // bucket specification to empty. Note that partition columns are retained, so that we can
       // call partition-related Hive API later.
       def newSparkSQLSpecificMetastoreTable(): CatalogTable = {
         tableDefinition.copy(
+          // Hive only allows directory paths as location URIs while Spark SQL data source tables
+          // also allow file paths. For non-hive-compatible format, we should not set location URI
+          // to avoid hive metastore to throw exception.
+          storage = tableDefinition.storage.copy(
+            locationUri = None,
+            properties = storagePropsWithLocation),
           schema = tableDefinition.partitionSchema,
           bucketSpec = None,
           properties = tableDefinition.properties ++ tableProperties)
@@ -259,10 +231,9 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
         val location = if (tableDefinition.tableType == EXTERNAL) {
           // When we hit this branch, we are saving an external data source table with hive
           // compatible format, which means the data source is file-based and must have a `path`.
-          val map = new CaseInsensitiveMap(tableDefinition.storage.properties)
-          require(map.contains("path"),
+          require(tableDefinition.storage.locationUri.isDefined,
             "External file-based data source table must have a `path` entry in storage properties.")
-          Some(new Path(map("path")).toUri.toString)
+          Some(new Path(tableDefinition.storage.locationUri.get).toUri.toString)
         } else {
           None
         }
@@ -272,7 +243,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
             locationUri = location,
             inputFormat = serde.inputFormat,
             outputFormat = serde.outputFormat,
-            serde = serde.serde
+            serde = serde.serde,
+            properties = storagePropsWithLocation
           ),
           properties = tableDefinition.properties ++ tableProperties)
       }
@@ -337,6 +309,68 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     }
   }
 
+  /**
+   * Data source tables may be non Hive compatible and we need to store table metadata in table
+   * properties to workaround some Hive metastore limitations.
+   * This method puts table provider, partition provider, schema, partition column names, bucket
+   * specification into a map, which can be used as table properties later.
+   */
+  private def tableMetaToTableProps(table: CatalogTable): scala.collection.Map[String, String] = {
+    // data source table always have a provider, it's guaranteed by `DDLUtils.isDatasourceTable`.
+    val provider = table.provider.get
+    val partitionColumns = table.partitionColumnNames
+    val bucketSpec = table.bucketSpec
+
+    val properties = new scala.collection.mutable.HashMap[String, String]
+    properties.put(DATASOURCE_PROVIDER, provider)
+    if (table.partitionProviderIsHive) {
+      properties.put(TABLE_PARTITION_PROVIDER, "hive")
+    }
+
+    // Serialized JSON schema string may be too long to be stored into a single metastore table
+    // property. In this case, we split the JSON string and store each part as a separate table
+    // property.
+    val threshold = conf.get(SCHEMA_STRING_LENGTH_THRESHOLD)
+    val schemaJsonString = table.schema.json
+    // Split the JSON string.
+    val parts = schemaJsonString.grouped(threshold).toSeq
+    properties.put(DATASOURCE_SCHEMA_NUMPARTS, parts.size.toString)
+    parts.zipWithIndex.foreach { case (part, index) =>
+      properties.put(s"$DATASOURCE_SCHEMA_PART_PREFIX$index", part)
+    }
+
+    if (partitionColumns.nonEmpty) {
+      properties.put(DATASOURCE_SCHEMA_NUMPARTCOLS, partitionColumns.length.toString)
+      partitionColumns.zipWithIndex.foreach { case (partCol, index) =>
+        properties.put(s"$DATASOURCE_SCHEMA_PARTCOL_PREFIX$index", partCol)
+      }
+    }
+
+    if (bucketSpec.isDefined) {
+      val BucketSpec(numBuckets, bucketColumnNames, sortColumnNames) = bucketSpec.get
+
+      properties.put(DATASOURCE_SCHEMA_NUMBUCKETS, numBuckets.toString)
+      properties.put(DATASOURCE_SCHEMA_NUMBUCKETCOLS, bucketColumnNames.length.toString)
+      bucketColumnNames.zipWithIndex.foreach { case (bucketCol, index) =>
+        properties.put(s"$DATASOURCE_SCHEMA_BUCKETCOL_PREFIX$index", bucketCol)
+      }
+
+      if (sortColumnNames.nonEmpty) {
+        properties.put(DATASOURCE_SCHEMA_NUMSORTCOLS, sortColumnNames.length.toString)
+        sortColumnNames.zipWithIndex.foreach { case (sortCol, index) =>
+          properties.put(s"$DATASOURCE_SCHEMA_SORTCOL_PREFIX$index", sortCol)
+        }
+      }
+    }
+
+    properties
+  }
+
+  private def defaultTablePath(tableIdent: TableIdentifier): String = {
+    val dbLocation = getDatabase(tableIdent.database.get).locationUri
+    new Path(new Path(dbLocation), tableIdent.table).toString
+  }
+
   private def saveTableIntoHive(tableDefinition: CatalogTable, ignoreIfExists: Boolean): Unit = {
     assert(DDLUtils.isDatasourceTable(tableDefinition),
       "saveTableIntoHive only takes data source table.")
@@ -383,11 +417,35 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
   }
 
   override def renameTable(db: String, oldName: String, newName: String): Unit = withClient {
-    val newTable = client.getTable(db, oldName)
-      .copy(identifier = TableIdentifier(newName, Some(db)))
+    val rawTable = client.getTable(db, oldName)
+
+    val storageWithNewPath = if (rawTable.tableType == MANAGED) {
+      // If it's a managed table and we are renaming it, then the path option becomes inaccurate
+      // and we need to update it according to the new table name.
+      val newTablePath = defaultTablePath(TableIdentifier(newName, Some(db)))
+      updateLocationInStorageProps(rawTable, Some(newTablePath))
+    } else {
+      rawTable.storage
+    }
+
+    val newTable = rawTable.copy(
+      identifier = TableIdentifier(newName, Some(db)),
+      storage = storageWithNewPath)
+
     client.alterTable(oldName, newTable)
   }
 
+  private def getLocationFromStorageProps(table: CatalogTable): Option[String] = {
+    new CaseInsensitiveMap(table.storage.properties).get("path")
+  }
+
+  private def updateLocationInStorageProps(
+      table: CatalogTable,
+      newPath: Option[String]): CatalogStorageFormat = {
+    val propsWithoutPath = table.storage.properties.filterKeys(_.toLowerCase != "path")
+    table.storage.copy(properties = propsWithoutPath ++ newPath.map("path" -> _))
+  }
+
   /**
    * Alter a table whose name that matches the one specified in `tableDefinition`,
    * assuming the table exists.
@@ -418,21 +476,36 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     }
 
     if (DDLUtils.isDatasourceTable(withStatsProps)) {
-      val oldDef = client.getTable(db, withStatsProps.identifier.table)
-      // Sets the `schema`, `partitionColumnNames` and `bucketSpec` from the old table definition,
-      // to retain the spark specific format if it is. Also add old data source properties to table
-      // properties, to retain the data source table format.
-      val oldDataSourceProps = oldDef.properties.filter(_._1.startsWith(SPARK_SQL_PREFIX))
+      val oldTableDef = client.getTable(db, withStatsProps.identifier.table)
+
+      val oldLocation = getLocationFromStorageProps(oldTableDef)
+      val newLocation = tableDefinition.storage.locationUri
+      // Only update the `locationUri` field if the location is really changed, because this table
+      // may be not Hive-compatible and can not set the `locationUri` field. We should respect the
+      // old `locationUri` even it's None.
+      val storageWithNewLocation = if (oldLocation == newLocation) {
+        oldTableDef.storage
+      } else {
+        updateLocationInStorageProps(oldTableDef, newLocation).copy(locationUri = newLocation)
+      }
+
       val partitionProviderProp = if (tableDefinition.partitionProviderIsHive) {
         TABLE_PARTITION_PROVIDER -> "hive"
       } else {
         TABLE_PARTITION_PROVIDER -> "builtin"
       }
+
+      // Sets the `schema`, `partitionColumnNames` and `bucketSpec` from the old table definition,
+      // to retain the spark specific format if it is. Also add old data source properties to table
+      // properties, to retain the data source table format.
+      val oldDataSourceProps = oldTableDef.properties.filter(_._1.startsWith(SPARK_SQL_PREFIX))
+      val newTableProps = oldDataSourceProps ++ withStatsProps.properties + partitionProviderProp
       val newDef = withStatsProps.copy(
-        schema = oldDef.schema,
-        partitionColumnNames = oldDef.partitionColumnNames,
-        bucketSpec = oldDef.bucketSpec,
-        properties = oldDataSourceProps ++ withStatsProps.properties + partitionProviderProp)
+        storage = storageWithNewLocation,
+        schema = oldTableDef.schema,
+        partitionColumnNames = oldTableDef.partitionColumnNames,
+        bucketSpec = oldTableDef.bucketSpec,
+        properties = newTableProps)
 
       client.alterTable(newDef)
     } else {
@@ -465,22 +538,16 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     } else {
       getProviderFromTableProperties(table).map { provider =>
         assert(provider != "hive", "Hive serde table should not save provider in table properties.")
-        // SPARK-15269: Persisted data source tables always store the location URI as a storage
-        // property named "path" instead of standard Hive `dataLocation`, because Hive only
-        // allows directory paths as location URIs while Spark SQL data source tables also
-        // allows file paths. So the standard Hive `dataLocation` is meaningless for Spark SQL
-        // data source tables.
-        // Spark SQL may also save external data source in Hive compatible format when
-        // possible, so that these tables can be directly accessed by Hive. For these tables,
-        // `dataLocation` is still necessary. Here we also check for input format because only
-        // these Hive compatible tables set this field.
-        val storage = if (table.tableType == EXTERNAL && table.storage.inputFormat.isEmpty) {
-          table.storage.copy(locationUri = None)
-        } else {
-          table.storage
+        // Internally we store the table location in storage properties with key "path" for data
+        // source tables. Here we set the table location to `locationUri` field and filter out the
+        // path option in storage properties, to avoid exposing this concept externally.
+        val storageWithLocation = {
+          val tableLocation = getLocationFromStorageProps(table)
+          updateLocationInStorageProps(table, None).copy(locationUri = tableLocation)
         }
+
         table.copy(
-          storage = storage,
+          storage = storageWithLocation,
           schema = getSchemaFromTableProperties(table),
           provider = Some(provider),
           partitionColumnNames = getPartitionColumnsFromTableProperties(table),
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 624ab747e442f..8e5fc88aad448 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -17,16 +17,13 @@
 
 package org.apache.spark.sql.hive
 
-import scala.collection.JavaConverters._
-
 import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache}
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{AnalysisException, SaveMode, SparkSession}
-import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog._
-import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.execution.command.DDLUtils
@@ -56,12 +53,6 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
       tableIdent.table.toLowerCase)
   }
 
-  private def getQualifiedTableName(t: CatalogTable): QualifiedTableName = {
-    QualifiedTableName(
-      t.identifier.database.getOrElse(getCurrentDatabase).toLowerCase,
-      t.identifier.table.toLowerCase)
-  }
-
   /** A cache of Spark SQL data source tables that have been accessed. */
   protected[hive] val cachedDataSourceTables: LoadingCache[QualifiedTableName, LogicalPlan] = {
     val cacheLoader = new CacheLoader[QualifiedTableName, LogicalPlan]() {
@@ -69,6 +60,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
         logDebug(s"Creating new cached data source for $in")
         val table = sparkSession.sharedState.externalCatalog.getTable(in.database, in.name)
 
+        val pathOption = table.storage.locationUri.map("path" -> _)
         val dataSource =
           DataSource(
             sparkSession,
@@ -76,7 +68,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
             partitionColumns = table.partitionColumnNames,
             bucketSpec = table.bucketSpec,
             className = table.provider.get,
-            options = table.storage.properties,
+            options = table.storage.properties ++ pathOption,
             catalogTable = Some(table))
 
         LogicalRelation(dataSource.resolveRelation(), catalogTable = Some(table))
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
index 0477ea4d4c380..7abc4d9623f71 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
@@ -142,8 +142,7 @@ class DataSourceWithHiveMetastoreCatalogSuite
           assert(hiveTable.storage.serde === Some(serde))
 
           assert(hiveTable.tableType === CatalogTableType.EXTERNAL)
-          assert(hiveTable.storage.locationUri ===
-            Some(path.toURI.toString.stripSuffix(File.separator)))
+          assert(hiveTable.storage.locationUri === Some(path.toString))
 
           val columns = hiveTable.schema
           assert(columns.map(_.name) === Seq("d1", "d2"))
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index eaa67d370db37..c50f92e783c88 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -998,7 +998,8 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
         identifier = TableIdentifier("not_skip_hive_metadata"),
         tableType = CatalogTableType.EXTERNAL,
         storage = CatalogStorageFormat.empty.copy(
-          properties = Map("path" -> tempPath.getCanonicalPath, "skipHiveMetadata" -> "false")
+          locationUri = Some(tempPath.getCanonicalPath),
+          properties = Map("skipHiveMetadata" -> "false")
         ),
         schema = schema,
         provider = Some("parquet")
@@ -1282,9 +1283,7 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
         sql("insert into t values (2, 3, 4)")
         checkAnswer(table("t"), Seq(Row(1, 2, 3), Row(2, 3, 4)))
         val catalogTable = hiveClient.getTable("default", "t")
-        // there should not be a lowercase key 'path' now
-        assert(catalogTable.storage.properties.get("path").isEmpty)
-        assert(catalogTable.storage.properties.get("PATH").isDefined)
+        assert(catalogTable.storage.locationUri.isDefined)
       }
     }
   }
@@ -1351,4 +1350,25 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
       sparkSession.sparkContext.conf.set(DEBUG_MODE, previousValue)
     }
   }
+
+  test("SPARK-17470: support old table that stores table location in storage properties") {
+    withTable("old") {
+      withTempPath { path =>
+        Seq(1 -> "a").toDF("i", "j").write.parquet(path.getAbsolutePath)
+        val tableDesc = CatalogTable(
+          identifier = TableIdentifier("old", Some("default")),
+          tableType = CatalogTableType.EXTERNAL,
+          storage = CatalogStorageFormat.empty.copy(
+            properties = Map("path" -> path.getAbsolutePath)
+          ),
+          schema = new StructType(),
+          properties = Map(
+            HiveExternalCatalog.DATASOURCE_PROVIDER -> "parquet",
+            HiveExternalCatalog.DATASOURCE_SCHEMA ->
+              new StructType().add("i", "int").add("j", "string").json))
+        hiveClient.createTable(tableDesc, ignoreIfExists = false)
+        checkAnswer(spark.table("old"), Row(1, "a"))
+      }
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala
index 7ba880e476137..cfc1d81d544eb 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala
@@ -29,7 +29,7 @@ class MultiDatabaseSuite extends QueryTest with SQLTestUtils with TestHiveSingle
     val expectedPath =
       spark.sharedState.externalCatalog.getDatabase(dbName).locationUri + "/" + tableName
 
-    assert(metastoreTable.storage.properties("path") === expectedPath)
+    assert(metastoreTable.storage.locationUri.get === expectedPath)
   }
 
   private def getTableNames(dbName: Option[String] = None): Array[String] = {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index e9268a922cf54..682d7d4b163dd 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -859,14 +859,6 @@ class HiveDDLSuite
     }
   }
 
-  private def getTablePath(table: CatalogTable): Option[String] = {
-    if (DDLUtils.isDatasourceTable(table)) {
-      new CaseInsensitiveMap(table.storage.properties).get("path")
-    } else {
-      table.storage.locationUri
-    }
-  }
-
   private def checkCreateTableLike(sourceTable: CatalogTable, targetTable: CatalogTable): Unit = {
     // The created table should be a MANAGED table with empty view text and original text.
     assert(targetTable.tableType == CatalogTableType.MANAGED,
@@ -915,10 +907,8 @@ class HiveDDLSuite
       assert(targetTable.provider == sourceTable.provider)
     }
 
-    val sourceTablePath = getTablePath(sourceTable)
-    val targetTablePath = getTablePath(targetTable)
-    assert(targetTablePath.nonEmpty, "target table path should not be empty")
-    assert(sourceTablePath != targetTablePath,
+    assert(targetTable.storage.locationUri.nonEmpty, "target table path should not be empty")
+    assert(sourceTable.storage.locationUri != targetTable.storage.locationUri,
       "source table/view path should be different from target table path")
 
     // The source table contents should not been seen in the target table.
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index b9353b5b5d2a7..3a597d6afb153 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -517,7 +517,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     val catalogTable =
       sessionState.catalog.getTableMetadata(TableIdentifier(tableName))
     relation match {
-      case LogicalRelation(r: HadoopFsRelation, _, _) =>
+      case LogicalRelation(r: HadoopFsRelation, _, Some(table)) =>
         if (!isDataSourceTable) {
           fail(
             s"${classOf[MetastoreRelation].getCanonicalName} is expected, but found " +
@@ -525,7 +525,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
         }
         userSpecifiedLocation match {
           case Some(location) =>
-            assert(r.options("path") === location)
+            assert(table.storage.locationUri.get === location)
           case None => // OK.
         }
         assert(catalogTable.provider.get === format)

From 1e29f0a0d2772efc5e9cdc9727847388a87547d4 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Wed, 2 Nov 2016 20:56:30 -0700
Subject: [PATCH 0016/1204] [SPARK-17963][SQL][DOCUMENTATION] Add examples
 (extend) in each expression and improve documentation

## What changes were proposed in this pull request?

This PR proposes to change the documentation for functions. Please refer the discussion from https://github.com/apache/spark/pull/15513

The changes include
- Re-indent the documentation
- Add examples/arguments in `extended` where the arguments are multiple or specific format (e.g. xml/ json).

For examples, the documentation was updated as below:
### Functions with single line usage

**Before**
- `pow`

  ``` sql
  Usage: pow(x1, x2) - Raise x1 to the power of x2.
  Extended Usage:
  > SELECT pow(2, 3);
   8.0
  ```
- `current_timestamp`

  ``` sql
  Usage: current_timestamp() - Returns the current timestamp at the start of query evaluation.
  Extended Usage:
  No example for current_timestamp.
  ```

**After**
- `pow`

  ``` sql
  Usage: pow(expr1, expr2) - Raises `expr1` to the power of `expr2`.
  Extended Usage:
      Examples:
        > SELECT pow(2, 3);
         8.0
  ```

- `current_timestamp`

  ``` sql
  Usage: current_timestamp() - Returns the current timestamp at the start of query evaluation.
  Extended Usage:
      No example/argument for current_timestamp.
  ```
### Functions with (already) multiple line usage

**Before**
- `approx_count_distinct`

  ``` sql
  Usage: approx_count_distinct(expr) - Returns the estimated cardinality by HyperLogLog++.
      approx_count_distinct(expr, relativeSD=0.05) - Returns the estimated cardinality by HyperLogLog++
        with relativeSD, the maximum estimation error allowed.

  Extended Usage:
  No example for approx_count_distinct.
  ```
- `percentile_approx`

  ``` sql
  Usage:
        percentile_approx(col, percentage [, accuracy]) - Returns the approximate percentile value of numeric
        column `col` at the given percentage. The value of percentage must be between 0.0
        and 1.0. The `accuracy` parameter (default: 10000) is a positive integer literal which
        controls approximation accuracy at the cost of memory. Higher value of `accuracy` yields
        better accuracy, `1.0/accuracy` is the relative error of the approximation.

        percentile_approx(col, array(percentage1 [, percentage2]...) [, accuracy]) - Returns the approximate
        percentile array of column `col` at the given percentage array. Each value of the
        percentage array must be between 0.0 and 1.0. The `accuracy` parameter (default: 10000) is
        a positive integer literal which controls approximation accuracy at the cost of memory.
        Higher value of `accuracy` yields better accuracy, `1.0/accuracy` is the relative error of
        the approximation.

  Extended Usage:
  No example for percentile_approx.
  ```

**After**
- `approx_count_distinct`

  ``` sql
  Usage:
      approx_count_distinct(expr[, relativeSD]) - Returns the estimated cardinality by HyperLogLog++.
        `relativeSD` defines the maximum estimation error allowed.

  Extended Usage:
      No example/argument for approx_count_distinct.
  ```

- `percentile_approx`

  ``` sql
  Usage:
      percentile_approx(col, percentage [, accuracy]) - Returns the approximate percentile value of numeric
        column `col` at the given percentage. The value of percentage must be between 0.0
        and 1.0. The `accuracy` parameter (default: 10000) is a positive numeric literal which
        controls approximation accuracy at the cost of memory. Higher value of `accuracy` yields
        better accuracy, `1.0/accuracy` is the relative error of the approximation.
        When `percentage` is an array, each value of the percentage array must be between 0.0 and 1.0.
        In this case, returns the approximate percentile array of column `col` at the given
        percentage array.

  Extended Usage:
      Examples:
        > SELECT percentile_approx(10.0, array(0.5, 0.4, 0.1), 100);
         [10.0,10.0,10.0]
        > SELECT percentile_approx(10.0, 0.5, 100);
         10.0
  ```
## How was this patch tested?

Manually tested

**When examples are multiple**

``` sql
spark-sql> describe function extended reflect;
Function: reflect
Class: org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection
Usage: reflect(class, method[, arg1[, arg2 ..]]) - Calls a method with reflection.
Extended Usage:
    Examples:
      > SELECT reflect('java.util.UUID', 'randomUUID');
       c33fb387-8500-4bfa-81d2-6e0e3e930df2
      > SELECT reflect('java.util.UUID', 'fromString', 'a5cf6c42-0c85-418f-af6c-3e4e5b1328f2');
       a5cf6c42-0c85-418f-af6c-3e4e5b1328f2
```

**When `Usage` is in single line**

``` sql
spark-sql> describe function extended min;
Function: min
Class: org.apache.spark.sql.catalyst.expressions.aggregate.Min
Usage: min(expr) - Returns the minimum value of `expr`.
Extended Usage:
    No example/argument for min.
```

**When `Usage` is already in multiple lines**

``` sql
spark-sql> describe function extended percentile_approx;
Function: percentile_approx
Class: org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile
Usage:
    percentile_approx(col, percentage [, accuracy]) - Returns the approximate percentile value of numeric
      column `col` at the given percentage. The value of percentage must be between 0.0
      and 1.0. The `accuracy` parameter (default: 10000) is a positive numeric literal which
      controls approximation accuracy at the cost of memory. Higher value of `accuracy` yields
      better accuracy, `1.0/accuracy` is the relative error of the approximation.
      When `percentage` is an array, each value of the percentage array must be between 0.0 and 1.0.
      In this case, returns the approximate percentile array of column `col` at the given
      percentage array.

Extended Usage:
    Examples:
      > SELECT percentile_approx(10.0, array(0.5, 0.4, 0.1), 100);
       [10.0,10.0,10.0]
      > SELECT percentile_approx(10.0, 0.5, 100);
       10.0
```

**When example/argument is missing**

``` sql
spark-sql> describe function extended rank;
Function: rank
Class: org.apache.spark.sql.catalyst.expressions.Rank
Usage:
    rank() - Computes the rank of a value in a group of values. The result is one plus the number
      of rows preceding or equal to the current row in the ordering of the partition. The values
      will produce gaps in the sequence.

Extended Usage:
    No example/argument for rank.
```

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #15677 from HyukjinKwon/SPARK-17963-1.

(cherry picked from commit 7eb2ca8e338e04034a662920261e028f56b07395)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../expressions/ExpressionDescription.java    |   2 +-
 .../expressions/CallMethodViaReflection.scala |  12 +-
 .../spark/sql/catalyst/expressions/Cast.scala |   8 +-
 .../catalyst/expressions/InputFileName.scala  |   3 +-
 .../MonotonicallyIncreasingID.scala           |  14 +-
 .../expressions/SparkPartitionID.scala        |   3 +-
 .../aggregate/ApproximatePercentile.scala     |  26 +-
 .../expressions/aggregate/Average.scala       |   2 +-
 .../aggregate/CentralMomentAgg.scala          |  14 +-
 .../catalyst/expressions/aggregate/Corr.scala |   4 +-
 .../expressions/aggregate/Count.scala         |  10 +-
 .../expressions/aggregate/Covariance.scala    |   4 +-
 .../expressions/aggregate/First.scala         |   8 +-
 .../aggregate/HyperLogLogPlusPlus.scala       |   8 +-
 .../catalyst/expressions/aggregate/Last.scala |   5 +-
 .../catalyst/expressions/aggregate/Max.scala  |   2 +-
 .../catalyst/expressions/aggregate/Min.scala  |   2 +-
 .../catalyst/expressions/aggregate/Sum.scala  |   2 +-
 .../expressions/aggregate/collect.scala       |   2 +-
 .../sql/catalyst/expressions/arithmetic.scala |  79 +++-
 .../expressions/bitwiseExpressions.scala      |  32 +-
 .../expressions/collectionOperations.scala    |  36 +-
 .../expressions/complexTypeCreator.scala      |  29 +-
 .../expressions/conditionalExpressions.scala  |   9 +-
 .../expressions/datetimeExpressions.scala     | 199 +++++++---
 .../sql/catalyst/expressions/generators.scala |  36 +-
 .../expressions/jsonExpressions.scala         |  14 +-
 .../expressions/mathExpressions.scala         | 346 ++++++++++++++----
 .../spark/sql/catalyst/expressions/misc.scala |  59 ++-
 .../expressions/nullExpressions.scala         |  72 +++-
 .../sql/catalyst/expressions/predicates.scala |  24 +-
 .../expressions/randomExpressions.scala       |  24 +-
 .../expressions/regexpExpressions.scala       |  30 +-
 .../expressions/stringExpressions.scala       | 317 ++++++++++++----
 .../expressions/windowExpressions.scala       | 117 +++---
 .../sql/catalyst/expressions/xml/xpath.scala  |  78 +++-
 .../sql/execution/command/functions.scala     |  22 +-
 .../org/apache/spark/sql/SQLQuerySuite.scala  |   7 +-
 .../sql/execution/command/DDLSuite.scala      |  22 +-
 .../sql/hive/execution/SQLQuerySuite.scala    |  24 +-
 40 files changed, 1256 insertions(+), 451 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionDescription.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionDescription.java
index 9e10f27d59d55..62a2ce47d0ce6 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionDescription.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionDescription.java
@@ -39,5 +39,5 @@
 @Retention(RetentionPolicy.RUNTIME)
 public @interface ExpressionDescription {
     String usage() default "_FUNC_ is undocumented";
-    String extended() default "No example for _FUNC_.";
+    String extended() default "\n    No example/argument for _FUNC_.\n";
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
index fe24c0489fc98..40f1b148f9287 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
@@ -43,11 +43,15 @@ import org.apache.spark.util.Utils
  *                 and the second element should be a literal string for the method name,
  *                 and the remaining are input arguments to the Java method.
  */
-// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(class,method[,arg1[,arg2..]]) calls method with reflection",
-  extended = "> SELECT _FUNC_('java.util.UUID', 'randomUUID');\n c33fb387-8500-4bfa-81d2-6e0e3e930df2")
-// scalastyle:on line.size.limit
+  usage = "_FUNC_(class, method[, arg1[, arg2 ..]]) - Calls a method with reflection.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('java.util.UUID', 'randomUUID');
+       c33fb387-8500-4bfa-81d2-6e0e3e930df2
+      > SELECT _FUNC_('java.util.UUID', 'fromString', 'a5cf6c42-0c85-418f-af6c-3e4e5b1328f2');
+       a5cf6c42-0c85-418f-af6c-3e4e5b1328f2
+  """)
 case class CallMethodViaReflection(children: Seq[Expression])
   extends Expression with CodegenFallback {
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 58fd65f62ffe7..4db1ae6faa159 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -114,8 +114,12 @@ object Cast {
 
 /** Cast the child expression to the target data type. */
 @ExpressionDescription(
-  usage = " - Cast value v to the target data type.",
-  extended = "> SELECT _FUNC_('10' as int);\n 10")
+  usage = "_FUNC_(expr AS type) - Casts the value `expr` to the target data type `type`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('10' as int);
+       10
+  """)
 case class Cast(child: Expression, dataType: DataType) extends UnaryExpression with NullIntolerant {
 
   override def toString: String = s"cast($child as ${dataType.simpleString})"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InputFileName.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InputFileName.scala
index b6c12c5351119..b7fb285133bfc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InputFileName.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InputFileName.scala
@@ -27,8 +27,7 @@ import org.apache.spark.unsafe.types.UTF8String
  * Expression that returns the name of the current file being read.
  */
 @ExpressionDescription(
-  usage = "_FUNC_() - Returns the name of the current file being read if available",
-  extended = "> SELECT _FUNC_();\n ''")
+  usage = "_FUNC_() - Returns the name of the current file being read if available.")
 case class InputFileName() extends LeafExpression with Nondeterministic {
 
   override def nullable: Boolean = true
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala
index 72b8dcca26e2f..32358a99e7ce7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala
@@ -33,13 +33,13 @@ import org.apache.spark.sql.types.{DataType, LongType}
  * Since this expression is stateful, it cannot be a case object.
  */
 @ExpressionDescription(
-  usage =
-    """_FUNC_() - Returns monotonically increasing 64-bit integers.
-      The generated ID is guaranteed to be monotonically increasing and unique, but not consecutive.
-      The current implementation puts the partition ID in the upper 31 bits, and the lower 33 bits
-      represent the record number within each partition. The assumption is that the data frame has
-      less than 1 billion partitions, and each partition has less than 8 billion records.""",
-  extended = "> SELECT _FUNC_();\n 0")
+  usage = """
+    _FUNC_() - Returns monotonically increasing 64-bit integers. The generated ID is guaranteed
+      to be monotonically increasing and unique, but not consecutive. The current implementation
+      puts the partition ID in the upper 31 bits, and the lower 33 bits represent the record number
+      within each partition. The assumption is that the data frame has less than 1 billion
+      partitions, and each partition has less than 8 billion records.
+  """)
 case class MonotonicallyIncreasingID() extends LeafExpression with Nondeterministic {
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala
index 6bef473cac060..8db7efdbb5dd4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala
@@ -25,8 +25,7 @@ import org.apache.spark.sql.types.{DataType, IntegerType}
  * Expression that returns the current partition id.
  */
 @ExpressionDescription(
-  usage = "_FUNC_() - Returns the current partition id",
-  extended = "> SELECT _FUNC_();\n 0")
+  usage = "_FUNC_() - Returns the current partition id.")
 case class SparkPartitionID() extends LeafExpression with Nondeterministic {
 
   override def nullable: Boolean = false
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
index f91ff87fc1c01..692cbd7c0d32c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
@@ -49,21 +49,23 @@ import org.apache.spark.sql.types._
  *                           DEFAULT_PERCENTILE_ACCURACY.
  */
 @ExpressionDescription(
-  usage =
-    """
-      _FUNC_(col, percentage [, accuracy]) - Returns the approximate percentile value of numeric
+  usage = """
+    _FUNC_(col, percentage [, accuracy]) - Returns the approximate percentile value of numeric
       column `col` at the given percentage. The value of percentage must be between 0.0
-      and 1.0. The `accuracy` parameter (default: 10000) is a positive integer literal which
+      and 1.0. The `accuracy` parameter (default: 10000) is a positive numeric literal which
       controls approximation accuracy at the cost of memory. Higher value of `accuracy` yields
       better accuracy, `1.0/accuracy` is the relative error of the approximation.
-
-      _FUNC_(col, array(percentage1 [, percentage2]...) [, accuracy]) - Returns the approximate
-      percentile array of column `col` at the given percentage array. Each value of the
-      percentage array must be between 0.0 and 1.0. The `accuracy` parameter (default: 10000) is
-       a positive integer literal which controls approximation accuracy at the cost of memory.
-       Higher value of `accuracy` yields better accuracy, `1.0/accuracy` is the relative error of
-       the approximation.
-    """)
+      When `percentage` is an array, each value of the percentage array must be between 0.0 and 1.0.
+      In this case, returns the approximate percentile array of column `col` at the given
+      percentage array.
+  """,
+  extended = """
+    Examples:
+      > SELECT percentile_approx(10.0, array(0.5, 0.4, 0.1), 100);
+       [10.0,10.0,10.0]
+      > SELECT percentile_approx(10.0, 0.5, 100);
+       10.0
+  """)
 case class ApproximatePercentile(
     child: Expression,
     percentageExpression: Expression,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala
index ff70774847830..d523420530c2c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.util.TypeUtils
 import org.apache.spark.sql.types._
 
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Returns the mean calculated from values of a group.")
+  usage = "_FUNC_(expr) - Returns the mean calculated from values of a group.")
 case class Average(child: Expression) extends DeclarativeAggregate {
 
   override def prettyName: String = "avg"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
index 17a7c6dce89ca..302054708ccb5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
@@ -132,7 +132,7 @@ abstract class CentralMomentAgg(child: Expression) extends DeclarativeAggregate
 // Compute the population standard deviation of a column
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Returns the population standard deviation calculated from values of a group.")
+  usage = "_FUNC_(expr) - Returns the population standard deviation calculated from values of a group.")
 // scalastyle:on line.size.limit
 case class StddevPop(child: Expression) extends CentralMomentAgg(child) {
 
@@ -147,8 +147,10 @@ case class StddevPop(child: Expression) extends CentralMomentAgg(child) {
 }
 
 // Compute the sample standard deviation of a column
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Returns the sample standard deviation calculated from values of a group.")
+  usage = "_FUNC_(expr) - Returns the sample standard deviation calculated from values of a group.")
+// scalastyle:on line.size.limit
 case class StddevSamp(child: Expression) extends CentralMomentAgg(child) {
 
   override protected def momentOrder = 2
@@ -164,7 +166,7 @@ case class StddevSamp(child: Expression) extends CentralMomentAgg(child) {
 
 // Compute the population variance of a column
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Returns the population variance calculated from values of a group.")
+  usage = "_FUNC_(expr) - Returns the population variance calculated from values of a group.")
 case class VariancePop(child: Expression) extends CentralMomentAgg(child) {
 
   override protected def momentOrder = 2
@@ -179,7 +181,7 @@ case class VariancePop(child: Expression) extends CentralMomentAgg(child) {
 
 // Compute the sample variance of a column
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Returns the sample variance calculated from values of a group.")
+  usage = "_FUNC_(expr) - Returns the sample variance calculated from values of a group.")
 case class VarianceSamp(child: Expression) extends CentralMomentAgg(child) {
 
   override protected def momentOrder = 2
@@ -194,7 +196,7 @@ case class VarianceSamp(child: Expression) extends CentralMomentAgg(child) {
 }
 
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Returns the Skewness value calculated from values of a group.")
+  usage = "_FUNC_(expr) - Returns the skewness value calculated from values of a group.")
 case class Skewness(child: Expression) extends CentralMomentAgg(child) {
 
   override def prettyName: String = "skewness"
@@ -209,7 +211,7 @@ case class Skewness(child: Expression) extends CentralMomentAgg(child) {
 }
 
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Returns the Kurtosis value calculated from values of a group.")
+  usage = "_FUNC_(expr) - Returns the kurtosis value calculated from values of a group.")
 case class Kurtosis(child: Expression) extends CentralMomentAgg(child) {
 
   override protected def momentOrder = 4
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala
index e29265e2f41e1..657f519d2a05e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala
@@ -28,8 +28,10 @@ import org.apache.spark.sql.types._
  * Definition of Pearson correlation can be found at
  * http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient
  */
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(x,y) - Returns Pearson coefficient of correlation between a set of number pairs.")
+  usage = "_FUNC_(expr1, expr2) - Returns Pearson coefficient of correlation between a set of number pairs.")
+// scalastyle:on line.size.limit
 case class Corr(x: Expression, y: Expression) extends DeclarativeAggregate {
 
   override def children: Seq[Expression] = Seq(x, y)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala
index 17ae012af79be..bcae0dc0754c4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala
@@ -23,9 +23,13 @@ import org.apache.spark.sql.types._
 
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = """_FUNC_(*) - Returns the total number of retrieved rows, including rows containing NULL values.
-    _FUNC_(expr) - Returns the number of rows for which the supplied expression is non-NULL.
-    _FUNC_(DISTINCT expr[, expr...]) - Returns the number of rows for which the supplied expression(s) are unique and non-NULL.""")
+  usage = """
+    _FUNC_(*) - Returns the total number of retrieved rows, including rows containing null.
+
+    _FUNC_(expr) - Returns the number of rows for which the supplied expression is non-null.
+
+    _FUNC_(DISTINCT expr[, expr...]) - Returns the number of rows for which the supplied expression(s) are unique and non-null.
+  """)
 // scalastyle:on line.size.limit
 case class Count(children: Seq[Expression]) extends DeclarativeAggregate {
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala
index d80afbebf7404..ae5ed779700b6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala
@@ -77,7 +77,7 @@ abstract class Covariance(x: Expression, y: Expression) extends DeclarativeAggre
 }
 
 @ExpressionDescription(
-  usage = "_FUNC_(x,y) - Returns the population covariance of a set of number pairs.")
+  usage = "_FUNC_(expr1, expr2) - Returns the population covariance of a set of number pairs.")
 case class CovPopulation(left: Expression, right: Expression) extends Covariance(left, right) {
   override val evaluateExpression: Expression = {
     If(n === Literal(0.0), Literal.create(null, DoubleType),
@@ -88,7 +88,7 @@ case class CovPopulation(left: Expression, right: Expression) extends Covariance
 
 
 @ExpressionDescription(
-  usage = "_FUNC_(x,y) - Returns the sample covariance of a set of number pairs.")
+  usage = "_FUNC_(expr1, expr2) - Returns the sample covariance of a set of number pairs.")
 case class CovSample(left: Expression, right: Expression) extends Covariance(left, right) {
   override val evaluateExpression: Expression = {
     If(n === Literal(0.0), Literal.create(null, DoubleType),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala
index d702c08cfd342..29b8947980004 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala
@@ -29,10 +29,10 @@ import org.apache.spark.sql.types._
  * a single partition, and we use a single reducer to do the aggregation.).
  */
 @ExpressionDescription(
-  usage = """_FUNC_(expr) - Returns the first value of `child` for a group of rows.
-    _FUNC_(expr,isIgnoreNull=false) - Returns the first value of `child` for a group of rows.
-      If isIgnoreNull is true, returns only non-null values.
-    """)
+  usage = """
+    _FUNC_(expr[, isIgnoreNull]) - Returns the first value of `expr` for a group of rows.
+      If `isIgnoreNull` is true, returns only non-null values.
+  """)
 case class First(child: Expression, ignoreNullsExpr: Expression) extends DeclarativeAggregate {
 
   def this(child: Expression) = this(child, Literal.create(false, BooleanType))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala
index 83c8d400c5d6a..b9862aa04fcd9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala
@@ -47,10 +47,10 @@ import org.apache.spark.sql.types._
  */
 // scalastyle:on
 @ExpressionDescription(
-  usage = """_FUNC_(expr) - Returns the estimated cardinality by HyperLogLog++.
-    _FUNC_(expr, relativeSD=0.05) - Returns the estimated cardinality by HyperLogLog++
-      with relativeSD, the maximum estimation error allowed.
-    """)
+  usage = """
+    _FUNC_(expr[, relativeSD]) - Returns the estimated cardinality by HyperLogLog++.
+      `relativeSD` defines the maximum estimation error allowed.
+  """)
 case class HyperLogLogPlusPlus(
     child: Expression,
     relativeSD: Double = 0.05,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
index 8579f7292d3ab..b0a363e7d6dce 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
@@ -29,7 +29,10 @@ import org.apache.spark.sql.types._
  * a single partition, and we use a single reducer to do the aggregation.).
  */
 @ExpressionDescription(
-  usage = "_FUNC_(expr,isIgnoreNull) - Returns the last value of `child` for a group of rows.")
+  usage = """
+    _FUNC_(expr[, isIgnoreNull]) - Returns the last value of `expr` for a group of rows.
+      If `isIgnoreNull` is true, returns only non-null values.
+  """)
 case class Last(child: Expression, ignoreNullsExpr: Expression) extends DeclarativeAggregate {
 
   def this(child: Expression) = this(child, Literal.create(false, BooleanType))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala
index c534fe495fc13..f32c9c677a864 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.util.TypeUtils
 import org.apache.spark.sql.types._
 
 @ExpressionDescription(
-  usage = "_FUNC_(expr) - Returns the maximum value of expr.")
+  usage = "_FUNC_(expr) - Returns the maximum value of `expr`.")
 case class Max(child: Expression) extends DeclarativeAggregate {
 
   override def children: Seq[Expression] = child :: Nil
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala
index 35289b468183c..9ef42b96975af 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.util.TypeUtils
 import org.apache.spark.sql.types._
 
 @ExpressionDescription(
-  usage = "_FUNC_(expr) - Returns the minimum value of expr.")
+  usage = "_FUNC_(expr) - Returns the minimum value of `expr`.")
 case class Min(child: Expression) extends DeclarativeAggregate {
 
   override def children: Seq[Expression] = child :: Nil
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala
index ad217f25b5a26..f3731d40058e3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.util.TypeUtils
 import org.apache.spark.sql.types._
 
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Returns the sum calculated from values of a group.")
+  usage = "_FUNC_(expr) - Returns the sum calculated from values of a group.")
 case class Sum(child: Expression) extends DeclarativeAggregate {
 
   override def children: Seq[Expression] = child :: Nil
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
index 89eb864e94702..d2880d58aefe1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
@@ -106,7 +106,7 @@ case class CollectList(
 }
 
 /**
- * Collect a list of unique elements.
+ * Collect a set of unique elements.
  */
 @ExpressionDescription(
   usage = "_FUNC_(expr) - Collects and returns a set of unique elements.")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
index 6f3db79622fa2..4870093e9250f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
@@ -25,7 +25,12 @@ import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
 
 @ExpressionDescription(
-  usage = "_FUNC_(a) - Returns -a.")
+  usage = "_FUNC_(expr) - Returns the negated value of `expr`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(1);
+       -1
+  """)
 case class UnaryMinus(child: Expression) extends UnaryExpression
     with ExpectsInputTypes with NullIntolerant {
 
@@ -62,7 +67,7 @@ case class UnaryMinus(child: Expression) extends UnaryExpression
 }
 
 @ExpressionDescription(
-  usage = "_FUNC_(a) - Returns a.")
+  usage = "_FUNC_(expr) - Returns the value of `expr`.")
 case class UnaryPositive(child: Expression)
     extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
   override def prettyName: String = "positive"
@@ -84,7 +89,11 @@ case class UnaryPositive(child: Expression)
  */
 @ExpressionDescription(
   usage = "_FUNC_(expr) - Returns the absolute value of the numeric value.",
-  extended = "> SELECT _FUNC_('-1');\n 1")
+  extended = """
+    Examples:
+      > SELECT _FUNC_(-1);
+       1
+  """)
 case class Abs(child: Expression)
     extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
 
@@ -131,7 +140,12 @@ object BinaryArithmetic {
 }
 
 @ExpressionDescription(
-  usage = "a _FUNC_ b - Returns a+b.")
+  usage = "expr1 _FUNC_ expr2 - Returns `expr1`+`expr2`.",
+  extended = """
+    Examples:
+      > SELECT 1 _FUNC_ 2;
+       3
+  """)
 case class Add(left: Expression, right: Expression) extends BinaryArithmetic with NullIntolerant {
 
   override def inputType: AbstractDataType = TypeCollection.NumericAndInterval
@@ -162,7 +176,12 @@ case class Add(left: Expression, right: Expression) extends BinaryArithmetic wit
 }
 
 @ExpressionDescription(
-  usage = "a _FUNC_ b - Returns a-b.")
+  usage = "expr1 _FUNC_ expr2 - Returns `expr1`-`expr2`.",
+  extended = """
+    Examples:
+      > SELECT 2 _FUNC_ 1;
+       1
+  """)
 case class Subtract(left: Expression, right: Expression)
     extends BinaryArithmetic with NullIntolerant {
 
@@ -194,7 +213,12 @@ case class Subtract(left: Expression, right: Expression)
 }
 
 @ExpressionDescription(
-  usage = "a _FUNC_ b - Multiplies a by b.")
+  usage = "expr1 _FUNC_ expr2 - Returns `expr1`*`expr2`.",
+  extended = """
+    Examples:
+      > SELECT 2 _FUNC_ 3;
+       6
+  """)
 case class Multiply(left: Expression, right: Expression)
     extends BinaryArithmetic with NullIntolerant {
 
@@ -208,9 +232,17 @@ case class Multiply(left: Expression, right: Expression)
   protected override def nullSafeEval(input1: Any, input2: Any): Any = numeric.times(input1, input2)
 }
 
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "a _FUNC_ b - Divides a by b.",
-  extended = "> SELECT 3 _FUNC_ 2;\n 1.5")
+  usage = "expr1 _FUNC_ expr2 - Returns `expr1`/`expr2`. It always performs floating point division.",
+  extended = """
+    Examples:
+      > SELECT 3 _FUNC_ 2;
+       1.5
+      > SELECT 2L _FUNC_ 2L;
+       1.0
+  """)
+// scalastyle:on line.size.limit
 case class Divide(left: Expression, right: Expression)
     extends BinaryArithmetic with NullIntolerant {
 
@@ -286,7 +318,12 @@ case class Divide(left: Expression, right: Expression)
 }
 
 @ExpressionDescription(
-  usage = "a _FUNC_ b - Returns the remainder when dividing a by b.")
+  usage = "expr1 _FUNC_ expr2 - Returns the remainder after `expr1`/`expr2`.",
+  extended = """
+    Examples:
+      > SELECT 2 _FUNC_ 1.8;
+       0.2
+  """)
 case class Remainder(left: Expression, right: Expression)
     extends BinaryArithmetic with NullIntolerant {
 
@@ -367,8 +404,14 @@ case class Remainder(left: Expression, right: Expression)
 }
 
 @ExpressionDescription(
-  usage = "_FUNC_(a, b) - Returns the positive modulo",
-  extended = "> SELECT _FUNC_(10,3);\n 1")
+  usage = "_FUNC_(expr1, expr2) - Returns the positive value of `expr1` mod `expr2`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(10, 3);
+       1
+      > SELECT _FUNC_(-10, 3);
+       2
+  """)
 case class Pmod(left: Expression, right: Expression) extends BinaryArithmetic with NullIntolerant {
 
   override def toString: String = s"pmod($left, $right)"
@@ -471,7 +514,12 @@ case class Pmod(left: Expression, right: Expression) extends BinaryArithmetic wi
  * It takes at least 2 parameters, and returns null iff all parameters are null.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(n1, ...) - Returns the least value of all parameters, skipping null values.")
+  usage = "_FUNC_(expr, ...) - Returns the least value of all parameters, skipping null values.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(10, 9, 2, 4, 3);
+       2
+  """)
 case class Least(children: Seq[Expression]) extends Expression {
 
   override def nullable: Boolean = children.forall(_.nullable)
@@ -531,7 +579,12 @@ case class Least(children: Seq[Expression]) extends Expression {
  * It takes at least 2 parameters, and returns null iff all parameters are null.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(n1, ...) - Returns the greatest value of all parameters, skipping null values.")
+  usage = "_FUNC_(expr, ...) - Returns the greatest value of all parameters, skipping null values.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(10, 9, 2, 4, 3);
+       10
+  """)
 case class Greatest(children: Seq[Expression]) extends Expression {
 
   override def nullable: Boolean = children.forall(_.nullable)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
index 3a0a882e3876e..2918040771433 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
@@ -27,8 +27,12 @@ import org.apache.spark.sql.types._
  * Code generation inherited from BinaryArithmetic.
  */
 @ExpressionDescription(
-  usage = "a _FUNC_ b - Bitwise AND.",
-  extended = "> SELECT 3 _FUNC_ 5; 1")
+  usage = "expr1 _FUNC_ expr2 - Returns the result of bitwise AND of `expr1` and `expr2`.",
+  extended = """
+    Examples:
+      > SELECT 3 _FUNC_ 5;
+       1
+  """)
 case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithmetic {
 
   override def inputType: AbstractDataType = IntegralType
@@ -55,8 +59,12 @@ case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithme
  * Code generation inherited from BinaryArithmetic.
  */
 @ExpressionDescription(
-  usage = "a _FUNC_ b - Bitwise OR.",
-  extended = "> SELECT 3 _FUNC_ 5; 7")
+  usage = "expr1 _FUNC_ expr2 - Returns the result of bitwise OR of `expr1` and `expr2`.",
+  extended = """
+    Examples:
+      > SELECT 3 _FUNC_ 5;
+       7
+  """)
 case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmetic {
 
   override def inputType: AbstractDataType = IntegralType
@@ -83,8 +91,12 @@ case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmet
  * Code generation inherited from BinaryArithmetic.
  */
 @ExpressionDescription(
-  usage = "a _FUNC_ b - Bitwise exclusive OR.",
-  extended = "> SELECT 3 _FUNC_ 5; 2")
+  usage = "expr1 _FUNC_ expr2 - Returns the result of bitwise exclusive OR of `expr1` and `expr2`.",
+  extended = """
+    Examples:
+      > SELECT 3 _FUNC_ 5;
+       2
+  """)
 case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithmetic {
 
   override def inputType: AbstractDataType = IntegralType
@@ -109,8 +121,12 @@ case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithme
  * A function that calculates bitwise not(~) of a number.
  */
 @ExpressionDescription(
-  usage = "_FUNC_ b - Bitwise NOT.",
-  extended = "> SELECT _FUNC_ 0; -1")
+  usage = "_FUNC_ expr - Returns the result of bitwise NOT of `expr`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_ 0;
+       -1
+  """)
 case class BitwiseNot(child: Expression) extends UnaryExpression with ExpectsInputTypes {
 
   override def inputTypes: Seq[AbstractDataType] = Seq(IntegralType)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index f56bb39d10791..c863ba434120d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -28,8 +28,12 @@ import org.apache.spark.sql.types._
  * Given an array or map, returns its size. Returns -1 if null.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(expr) - Returns the size of an array or a map.",
-  extended = " > SELECT _FUNC_(array('b', 'd', 'c', 'a'));\n 4")
+  usage = "_FUNC_(expr) - Returns the size of an array or a map. Returns -1 if null.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(array('b', 'd', 'c', 'a'));
+       4
+  """)
 case class Size(child: Expression) extends UnaryExpression with ExpectsInputTypes {
   override def dataType: DataType = IntegerType
   override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(ArrayType, MapType))
@@ -60,7 +64,11 @@ case class Size(child: Expression) extends UnaryExpression with ExpectsInputType
  */
 @ExpressionDescription(
   usage = "_FUNC_(map) - Returns an unordered array containing the keys of the map.",
-  extended = " > SELECT _FUNC_(map(1, 'a', 2, 'b'));\n [1,2]")
+  extended = """
+    Examples:
+      > SELECT _FUNC_(map(1, 'a', 2, 'b'));
+       [1,2]
+  """)
 case class MapKeys(child: Expression)
   extends UnaryExpression with ExpectsInputTypes {
 
@@ -84,7 +92,11 @@ case class MapKeys(child: Expression)
  */
 @ExpressionDescription(
   usage = "_FUNC_(map) - Returns an unordered array containing the values of the map.",
-  extended = " > SELECT _FUNC_(map(1, 'a', 2, 'b'));\n [\"a\",\"b\"]")
+  extended = """
+    Examples:
+      > SELECT _FUNC_(map(1, 'a', 2, 'b'));
+       ["a","b"]
+  """)
 case class MapValues(child: Expression)
   extends UnaryExpression with ExpectsInputTypes {
 
@@ -109,8 +121,12 @@ case class MapValues(child: Expression)
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(array(obj1, obj2, ...), ascendingOrder) - Sorts the input array in ascending order according to the natural ordering of the array elements.",
-  extended = " > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true);\n 'a', 'b', 'c', 'd'")
+  usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in ascending or descending order according to the natural ordering of the array elements.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true);
+       ["a","b","c","d"]
+  """)
 // scalastyle:on line.size.limit
 case class SortArray(base: Expression, ascendingOrder: Expression)
   extends BinaryExpression with ExpectsInputTypes with CodegenFallback {
@@ -200,8 +216,12 @@ case class SortArray(base: Expression, ascendingOrder: Expression)
  * Checks if the array (left) has the element (right)
  */
 @ExpressionDescription(
-  usage = "_FUNC_(array, value) - Returns TRUE if the array contains the value.",
-  extended = " > SELECT _FUNC_(array(1, 2, 3), 2);\n true")
+  usage = "_FUNC_(array, value) - Returns true if the array contains the value.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(array(1, 2, 3), 2);
+       true
+  """)
 case class ArrayContains(left: Expression, right: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index dbfb2996ec9d5..c9f36649ec8ee 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -30,7 +30,12 @@ import org.apache.spark.unsafe.types.UTF8String
  * Returns an Array containing the evaluation of all children expressions.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(n0, ...) - Returns an array with the given elements.")
+  usage = "_FUNC_(expr, ...) - Returns an array with the given elements.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(1, 2, 3);
+       [1,2,3]
+  """)
 case class CreateArray(children: Seq[Expression]) extends Expression {
 
   override def foldable: Boolean = children.forall(_.foldable)
@@ -84,7 +89,12 @@ case class CreateArray(children: Seq[Expression]) extends Expression {
  * The children are a flatted sequence of kv pairs, e.g. (key1, value1, key2, value2, ...)
  */
 @ExpressionDescription(
-  usage = "_FUNC_(key0, value0, key1, value1...) - Creates a map with the given key/value pairs.")
+  usage = "_FUNC_(key0, value0, key1, value1, ...) - Creates a map with the given key/value pairs.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(1.0, '2', 3.0, '4');
+       {1.0:"2",3.0:"4"}
+  """)
 case class CreateMap(children: Seq[Expression]) extends Expression {
   lazy val keys = children.indices.filter(_ % 2 == 0).map(children)
   lazy val values = children.indices.filter(_ % 2 != 0).map(children)
@@ -276,7 +286,12 @@ trait CreateNamedStructLike extends Expression {
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(name1, val1, name2, val2, ...) - Creates a struct with the given field names and values.")
+  usage = "_FUNC_(name1, val1, name2, val2, ...) - Creates a struct with the given field names and values.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_("a", 1, "b", 2, "c", 3);
+       {"a":1,"b":2,"c":3}
+  """)
 // scalastyle:on line.size.limit
 case class CreateNamedStruct(children: Seq[Expression]) extends CreateNamedStructLike {
 
@@ -329,8 +344,12 @@ case class CreateNamedStructUnsafe(children: Seq[Expression]) extends CreateName
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(text[, pairDelim, keyValueDelim]) - Creates a map after splitting the text into key/value pairs using delimiters. Default delimiters are ',' for pairDelim and ':' for keyValueDelim.",
-  extended = """ > SELECT _FUNC_('a:1,b:2,c:3',',',':');\n map("a":"1","b":"2","c":"3") """)
+  usage = "_FUNC_(text[, pairDelim[, keyValueDelim]]) - Creates a map after splitting the text into key/value pairs using delimiters. Default delimiters are ',' for `pairDelim` and ':' for `keyValueDelim`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('a:1,b:2,c:3', ',', ':');
+       map("a":"1","b":"2","c":"3")
+  """)
 // scalastyle:on line.size.limit
 case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: Expression)
   extends TernaryExpression with CodegenFallback with ExpectsInputTypes {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
index 71d4e9a3c9471..a7d9e2dfcdb62 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
@@ -24,7 +24,12 @@ import org.apache.spark.sql.types._
 
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(expr1,expr2,expr3) - If expr1 is TRUE then IF() returns expr2; otherwise it returns expr3.")
+  usage = "_FUNC_(expr1, expr2, expr3) - If `expr1` evaluates to true, then returns `expr2`; otherwise returns `expr3`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(1 < 2, 'a', 'b');
+       a
+  """)
 // scalastyle:on line.size.limit
 case class If(predicate: Expression, trueValue: Expression, falseValue: Expression)
   extends Expression {
@@ -162,7 +167,7 @@ abstract class CaseWhenBase(
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "CASE WHEN a THEN b [WHEN c THEN d]* [ELSE e] END - When a = true, returns b; when c = true, return d; else return e.")
+  usage = "CASE WHEN expr1 THEN expr2 [WHEN expr3 THEN expr4]* [ELSE expr5] END - When `expr1` = true, returns `expr2`; when `expr3` = true, return `expr4`; else return `expr5`.")
 // scalastyle:on line.size.limit
 case class CaseWhen(
     val branches: Seq[(Expression, Expression)],
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 05bfa7dcfc88f..9cec6be841de0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -75,8 +75,12 @@ case class CurrentTimestamp() extends LeafExpression with CodegenFallback {
  * Adds a number of days to startdate.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(start_date, num_days) - Returns the date that is num_days after start_date.",
-  extended = "> SELECT _FUNC_('2016-07-30', 1);\n '2016-07-31'")
+  usage = "_FUNC_(start_date, num_days) - Returns the date that is `num_days` after `start_date`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('2016-07-30', 1);
+       2016-07-31
+  """)
 case class DateAdd(startDate: Expression, days: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
 
@@ -104,8 +108,12 @@ case class DateAdd(startDate: Expression, days: Expression)
  * Subtracts a number of days to startdate.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(start_date, num_days) - Returns the date that is num_days before start_date.",
-  extended = "> SELECT _FUNC_('2016-07-30', 1);\n '2016-07-29'")
+  usage = "_FUNC_(start_date, num_days) - Returns the date that is `num_days` before `start_date`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('2016-07-30', 1);
+       2016-07-29
+  """)
 case class DateSub(startDate: Expression, days: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
   override def left: Expression = startDate
@@ -129,8 +137,12 @@ case class DateSub(startDate: Expression, days: Expression)
 }
 
 @ExpressionDescription(
-  usage = "_FUNC_(param) - Returns the hour component of the string/timestamp/interval.",
-  extended = "> SELECT _FUNC_('2009-07-30 12:58:59');\n 12")
+  usage = "_FUNC_(timestamp) - Returns the hour component of the string/timestamp.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('2009-07-30 12:58:59');
+       12
+  """)
 case class Hour(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
   override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType)
@@ -148,8 +160,12 @@ case class Hour(child: Expression) extends UnaryExpression with ImplicitCastInpu
 }
 
 @ExpressionDescription(
-  usage = "_FUNC_(param) - Returns the minute component of the string/timestamp/interval.",
-  extended = "> SELECT _FUNC_('2009-07-30 12:58:59');\n 58")
+  usage = "_FUNC_(timestamp) - Returns the minute component of the string/timestamp.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('2009-07-30 12:58:59');
+       58
+  """)
 case class Minute(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
   override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType)
@@ -167,8 +183,12 @@ case class Minute(child: Expression) extends UnaryExpression with ImplicitCastIn
 }
 
 @ExpressionDescription(
-  usage = "_FUNC_(param) - Returns the second component of the string/timestamp/interval.",
-  extended = "> SELECT _FUNC_('2009-07-30 12:58:59');\n 59")
+  usage = "_FUNC_(timestamp) - Returns the second component of the string/timestamp.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('2009-07-30 12:58:59');
+       59
+  """)
 case class Second(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
   override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType)
@@ -186,8 +206,12 @@ case class Second(child: Expression) extends UnaryExpression with ImplicitCastIn
 }
 
 @ExpressionDescription(
-  usage = "_FUNC_(param) - Returns the day of year of date/timestamp.",
-  extended = "> SELECT _FUNC_('2016-04-09');\n 100")
+  usage = "_FUNC_(date) - Returns the day of year of the date/timestamp.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('2016-04-09');
+       100
+  """)
 case class DayOfYear(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
   override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
@@ -205,8 +229,12 @@ case class DayOfYear(child: Expression) extends UnaryExpression with ImplicitCas
 }
 
 @ExpressionDescription(
-  usage = "_FUNC_(param) - Returns the year component of the date/timestamp/interval.",
-  extended = "> SELECT _FUNC_('2016-07-30');\n 2016")
+  usage = "_FUNC_(date) - Returns the year component of the date/timestamp.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('2016-07-30');
+       2016
+  """)
 case class Year(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
   override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
@@ -224,7 +252,12 @@ case class Year(child: Expression) extends UnaryExpression with ImplicitCastInpu
 }
 
 @ExpressionDescription(
-  usage = "_FUNC_(param) - Returns the quarter of the year for date, in the range 1 to 4.")
+  usage = "_FUNC_(date) - Returns the quarter of the year for date, in the range 1 to 4.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('2016-08-31');
+       3
+  """)
 case class Quarter(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
   override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
@@ -242,8 +275,12 @@ case class Quarter(child: Expression) extends UnaryExpression with ImplicitCastI
 }
 
 @ExpressionDescription(
-  usage = "_FUNC_(param) - Returns the month component of the date/timestamp/interval",
-  extended = "> SELECT _FUNC_('2016-07-30');\n 7")
+  usage = "_FUNC_(date) - Returns the month component of the date/timestamp.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('2016-07-30');
+       7
+  """)
 case class Month(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
   override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
@@ -261,8 +298,12 @@ case class Month(child: Expression) extends UnaryExpression with ImplicitCastInp
 }
 
 @ExpressionDescription(
-  usage = "_FUNC_(param) - Returns the day of month of date/timestamp, or the day of interval.",
-  extended = "> SELECT _FUNC_('2009-07-30');\n 30")
+  usage = "_FUNC_(date) - Returns the day of month of the date/timestamp.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('2009-07-30');
+       30
+  """)
 case class DayOfMonth(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
   override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
@@ -280,8 +321,12 @@ case class DayOfMonth(child: Expression) extends UnaryExpression with ImplicitCa
 }
 
 @ExpressionDescription(
-  usage = "_FUNC_(param) - Returns the week of the year of the given date.",
-  extended = "> SELECT _FUNC_('2008-02-20');\n 8")
+  usage = "_FUNC_(date) - Returns the week of the year of the given date.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('2008-02-20');
+       8
+  """)
 case class WeekOfYear(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
   override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
@@ -320,8 +365,12 @@ case class WeekOfYear(child: Expression) extends UnaryExpression with ImplicitCa
 
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(date/timestamp/string, fmt) - Converts a date/timestamp/string to a value of string in the format specified by the date format fmt.",
-  extended = "> SELECT _FUNC_('2016-04-08', 'y')\n '2016'")
+  usage = "_FUNC_(timestamp, fmt) - Converts `timestamp` to a value of string in the format specified by the date format `fmt`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('2016-04-08', 'y');
+       2016
+  """)
 // scalastyle:on line.size.limit
 case class DateFormatClass(left: Expression, right: Expression) extends BinaryExpression
   with ImplicitCastInputTypes {
@@ -351,7 +400,12 @@ case class DateFormatClass(left: Expression, right: Expression) extends BinaryEx
  * Deterministic version of [[UnixTimestamp]], must have at least one parameter.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(date[, pattern]) - Returns the UNIX timestamp of the give time.")
+  usage = "_FUNC_(expr[, pattern]) - Returns the UNIX timestamp of the give time.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('2016-04-08', 'yyyy-MM-dd');
+       1460041200
+  """)
 case class ToUnixTimestamp(timeExp: Expression, format: Expression) extends UnixTime {
   override def left: Expression = timeExp
   override def right: Expression = format
@@ -374,7 +428,14 @@ case class ToUnixTimestamp(timeExp: Expression, format: Expression) extends Unix
  * second parameter.
  */
 @ExpressionDescription(
-  usage = "_FUNC_([date[, pattern]]) - Returns the UNIX timestamp of current or specified time.")
+  usage = "_FUNC_([expr[, pattern]]) - Returns the UNIX timestamp of current or specified time.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_();
+       1476884637
+      > SELECT _FUNC_('2016-04-08', 'yyyy-MM-dd');
+       1460041200
+  """)
 case class UnixTimestamp(timeExp: Expression, format: Expression) extends UnixTime {
   override def left: Expression = timeExp
   override def right: Expression = format
@@ -497,8 +558,12 @@ abstract class UnixTime extends BinaryExpression with ExpectsInputTypes {
  * Note that hive Language Manual says it returns 0 if fail, but in fact it returns null.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(unix_time, format) - Returns unix_time in the specified format",
-  extended = "> SELECT _FUNC_(0, 'yyyy-MM-dd HH:mm:ss');\n '1970-01-01 00:00:00'")
+  usage = "_FUNC_(unix_time, format) - Returns `unix_time` in the specified `format`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(0, 'yyyy-MM-dd HH:mm:ss');
+       1970-01-01 00:00:00
+  """)
 case class FromUnixTime(sec: Expression, format: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
 
@@ -586,7 +651,11 @@ case class FromUnixTime(sec: Expression, format: Expression)
  */
 @ExpressionDescription(
   usage = "_FUNC_(date) - Returns the last day of the month which the date belongs to.",
-  extended = "> SELECT _FUNC_('2009-01-12');\n '2009-01-31'")
+  extended = """
+    Examples:
+      > SELECT _FUNC_('2009-01-12');
+       2009-01-31
+  """)
 case class LastDay(startDate: Expression) extends UnaryExpression with ImplicitCastInputTypes {
   override def child: Expression = startDate
 
@@ -615,8 +684,12 @@ case class LastDay(startDate: Expression) extends UnaryExpression with ImplicitC
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(start_date, day_of_week) - Returns the first date which is later than start_date and named as indicated.",
-  extended = "> SELECT _FUNC_('2015-01-14', 'TU');\n '2015-01-20'")
+  usage = "_FUNC_(start_date, day_of_week) - Returns the first date which is later than `start_date` and named as indicated.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('2015-01-14', 'TU');
+       2015-01-20
+  """)
 // scalastyle:on line.size.limit
 case class NextDay(startDate: Expression, dayOfWeek: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
@@ -701,11 +774,17 @@ case class TimeAdd(start: Expression, interval: Expression)
 }
 
 /**
- * Assumes given timestamp is UTC and converts to given timezone.
+ * Given a timestamp, which corresponds to a certain time of day in UTC, returns another timestamp
+ * that corresponds to the same time of day in the given timezone.
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(timestamp, string timezone) - Assumes given timestamp is UTC and converts to given timezone.")
+  usage = "_FUNC_(timestamp, timezone) - Given a timestamp, which corresponds to a certain time of day in UTC, returns another timestamp that corresponds to the same time of day in the given timezone.",
+  extended = """
+    Examples:
+      > SELECT from_utc_timestamp('2016-08-31', 'Asia/Seoul');
+       2016-08-31 09:00:00
+  """)
 // scalastyle:on line.size.limit
 case class FromUTCTimestamp(left: Expression, right: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
@@ -784,9 +863,15 @@ case class TimeSub(start: Expression, interval: Expression)
 /**
  * Returns the date that is num_months after start_date.
  */
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(start_date, num_months) - Returns the date that is num_months after start_date.",
-  extended = "> SELECT _FUNC_('2016-08-31', 1);\n '2016-09-30'")
+  usage = "_FUNC_(start_date, num_months) - Returns the date that is `num_months` after `start_date`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('2016-08-31', 1);
+       2016-09-30
+  """)
+// scalastyle:on line.size.limit
 case class AddMonths(startDate: Expression, numMonths: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
 
@@ -814,9 +899,15 @@ case class AddMonths(startDate: Expression, numMonths: Expression)
 /**
  * Returns number of months between dates date1 and date2.
  */
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(date1, date2) - returns number of months between dates date1 and date2.",
-  extended = "> SELECT _FUNC_('1997-02-28 10:30:00', '1996-10-30');\n 3.94959677")
+  usage = "_FUNC_(timestamp1, timestamp2) - Returns number of months between `timestamp1` and `timestamp2`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('1997-02-28 10:30:00', '1996-10-30');
+       3.94959677
+  """)
+// scalastyle:on line.size.limit
 case class MonthsBetween(date1: Expression, date2: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
 
@@ -842,11 +933,17 @@ case class MonthsBetween(date1: Expression, date2: Expression)
 }
 
 /**
- * Assumes given timestamp is in given timezone and converts to UTC.
+ * Given a timestamp, which corresponds to a certain time of day in the given timezone, returns
+ * another timestamp that corresponds to the same time of day in UTC.
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(timestamp, string timezone) - Assumes given timestamp is in given timezone and converts to UTC.")
+  usage = "_FUNC_(timestamp, timezone) - Given a timestamp, which corresponds to a certain time of day in the given timezone, returns another timestamp that corresponds to the same time of day in UTC.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('2016-08-31', 'Asia/Seoul');
+       2016-08-30 15:00:00
+  """)
 // scalastyle:on line.size.limit
 case class ToUTCTimestamp(left: Expression, right: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
@@ -897,8 +994,12 @@ case class ToUTCTimestamp(left: Expression, right: Expression)
  * Returns the date part of a timestamp or string.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(expr) - Extracts the date part of the date or datetime expression expr.",
-  extended = "> SELECT _FUNC_('2009-07-30 04:17:52');\n '2009-07-30'")
+  usage = "_FUNC_(expr) - Extracts the date part of the date or timestamp expression `expr`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('2009-07-30 04:17:52');
+       2009-07-30
+  """)
 case class ToDate(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
   // Implicit casting of spark will accept string in both date and timestamp format, as
@@ -921,8 +1022,14 @@ case class ToDate(child: Expression) extends UnaryExpression with ImplicitCastIn
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(date, fmt) - Returns returns date with the time portion of the day truncated to the unit specified by the format model fmt.",
-  extended = "> SELECT _FUNC_('2009-02-12', 'MM')\n '2009-02-01'\n> SELECT _FUNC_('2015-10-27', 'YEAR');\n '2015-01-01'")
+  usage = "_FUNC_(date, fmt) - Returns `date` with the time portion of the day truncated to the unit specified by the format model `fmt`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('2009-02-12', 'MM');
+       2009-02-01
+      > SELECT _FUNC_('2015-10-27', 'YEAR');
+       2015-01-01
+  """)
 // scalastyle:on line.size.limit
 case class TruncDate(date: Expression, format: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
@@ -994,8 +1101,12 @@ case class TruncDate(date: Expression, format: Expression)
  * Returns the number of days from startDate to endDate.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(date1, date2) - Returns the number of days between date1 and date2.",
-  extended = "> SELECT _FUNC_('2009-07-30', '2009-07-31');\n 1")
+  usage = "_FUNC_(date1, date2) - Returns the number of days between `date1` and `date2`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('2009-07-30', '2009-07-31');
+       1
+  """)
 case class DateDiff(endDate: Expression, startDate: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
index f74208ff66db7..d042bfb63d567 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
@@ -102,8 +102,13 @@ case class UserDefinedGenerator(
  * }}}
  */
 @ExpressionDescription(
-  usage = "_FUNC_(n, v1, ..., vk) - Separate v1, ..., vk into n rows.",
-  extended = "> SELECT _FUNC_(2, 1, 2, 3);\n  [1,2]\n  [3,null]")
+  usage = "_FUNC_(n, expr1, ..., exprk) - Separates `expr1`, ..., `exprk` into `n` rows.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(2, 1, 2, 3);
+       1  2
+       3  NULL
+  """)
 case class Stack(children: Seq[Expression])
     extends Expression with Generator with CodegenFallback {
 
@@ -226,8 +231,13 @@ abstract class ExplodeBase(child: Expression, position: Boolean)
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(a) - Separates the elements of array a into multiple rows, or the elements of map a into multiple rows and columns.",
-  extended = "> SELECT _FUNC_(array(10,20));\n  10\n  20")
+  usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows, or the elements of map `expr` into multiple rows and columns.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(array(10, 20));
+       10
+       20
+  """)
 // scalastyle:on line.size.limit
 case class Explode(child: Expression) extends ExplodeBase(child, position = false)
 
@@ -242,8 +252,13 @@ case class Explode(child: Expression) extends ExplodeBase(child, position = fals
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(a) - Separates the elements of array a into multiple rows with positions, or the elements of a map into multiple rows and columns with positions.",
-  extended = "> SELECT _FUNC_(array(10,20));\n  0\t10\n  1\t20")
+  usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows with positions, or the elements of map `expr` into multiple rows and columns with positions.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(array(10,20));
+       0  10
+       1  20
+  """)
 // scalastyle:on line.size.limit
 case class PosExplode(child: Expression) extends ExplodeBase(child, position = true)
 
@@ -251,8 +266,13 @@ case class PosExplode(child: Expression) extends ExplodeBase(child, position = t
  * Explodes an array of structs into a table.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(a) - Explodes an array of structs into a table.",
-  extended = "> SELECT _FUNC_(array(struct(1, 'a'), struct(2, 'b')));\n  [1,a]\n  [2,b]")
+  usage = "_FUNC_(expr) - Explodes an array of structs into a table.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(array(struct(1, 'a'), struct(2, 'b')));
+       1  a
+       2  b
+  """)
 case class Inline(child: Expression) extends UnaryExpression with Generator with CodegenFallback {
 
   override def checkInputDataTypes(): TypeCheckResult = child.dataType match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index 244a5a34f3594..e034735375274 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -110,7 +110,12 @@ private[this] object SharedFactory {
  * of the extracted json object. It will return null if the input json string is invalid.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(json_txt, path) - Extract a json object from path")
+  usage = "_FUNC_(json_txt, path) - Extracts a json object from `path`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('{"a":"b"}', '$.a');
+       b
+  """)
 case class GetJsonObject(json: Expression, path: Expression)
   extends BinaryExpression with ExpectsInputTypes with CodegenFallback {
 
@@ -326,7 +331,12 @@ case class GetJsonObject(json: Expression, path: Expression)
 
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(jsonStr, p1, p2, ..., pn) - like get_json_object, but it takes multiple names and return a tuple. All the input parameters and output column types are string.")
+  usage = "_FUNC_(jsonStr, p1, p2, ..., pn) - Return a tuple like the function get_json_object, but it takes multiple names. All the input parameters and output column types are string.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('{"a":1, "b":2}', 'a', 'b');
+       1  2
+  """)
 // scalastyle:on line.size.limit
 case class JsonTuple(children: Seq[Expression])
   extends Generator with CodegenFallback {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
index 5152265152aed..a60494a5bb69d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
@@ -139,8 +139,12 @@ abstract class BinaryMathExpression(f: (Double, Double) => Double, name: String)
  * evaluated by the optimizer during constant folding.
  */
 @ExpressionDescription(
-  usage = "_FUNC_() - Returns Euler's number, E.",
-  extended = "> SELECT _FUNC_();\n 2.718281828459045")
+  usage = "_FUNC_() - Returns Euler's number, e.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_();
+       2.718281828459045
+  """)
 case class EulerNumber() extends LeafMathExpression(math.E, "E")
 
 /**
@@ -148,8 +152,12 @@ case class EulerNumber() extends LeafMathExpression(math.E, "E")
  * evaluated by the optimizer during constant folding.
  */
 @ExpressionDescription(
-  usage = "_FUNC_() - Returns PI.",
-  extended = "> SELECT _FUNC_();\n 3.141592653589793")
+  usage = "_FUNC_() - Returns pi.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_();
+       3.141592653589793
+  """)
 case class Pi() extends LeafMathExpression(math.Pi, "PI")
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -158,29 +166,61 @@ case class Pi() extends LeafMathExpression(math.Pi, "PI")
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Returns the arc cosine of x if -1<=x<=1 or NaN otherwise.",
-  extended = "> SELECT _FUNC_(1);\n 0.0\n> SELECT _FUNC_(2);\n NaN")
+  usage = "_FUNC_(expr) - Returns the inverse cosine (a.k.a. arccosine) of `expr` if -1<=`expr`<=1 or NaN otherwise.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(1);
+       0.0
+      > SELECT _FUNC_(2);
+       NaN
+  """)
+// scalastyle:on line.size.limit
 case class Acos(child: Expression) extends UnaryMathExpression(math.acos, "ACOS")
 
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Returns the arc sin of x if -1<=x<=1 or NaN otherwise.",
-  extended = "> SELECT _FUNC_(0);\n 0.0\n> SELECT _FUNC_(2);\n NaN")
+  usage = "_FUNC_(expr) - Returns the inverse sine (a.k.a. arcsine) the arc sin of `expr` if -1<=`expr`<=1 or NaN otherwise.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(0);
+       0.0
+      > SELECT _FUNC_(2);
+       NaN
+  """)
+// scalastyle:on line.size.limit
 case class Asin(child: Expression) extends UnaryMathExpression(math.asin, "ASIN")
 
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Returns the arc tangent.",
-  extended = "> SELECT _FUNC_(0);\n 0.0")
+  usage = "_FUNC_(expr) - Returns the inverse tangent (a.k.a. arctangent).",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(0);
+       0.0
+  """)
+// scalastyle:on line.size.limit
 case class Atan(child: Expression) extends UnaryMathExpression(math.atan, "ATAN")
 
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Returns the cube root of a double value.",
-  extended = "> SELECT _FUNC_(27.0);\n 3.0")
+  usage = "_FUNC_(expr) - Returns the cube root of `expr`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(27.0);
+       3.0
+  """)
 case class Cbrt(child: Expression) extends UnaryMathExpression(math.cbrt, "CBRT")
 
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Returns the smallest integer not smaller than x.",
-  extended = "> SELECT _FUNC_(-0.1);\n 0\n> SELECT _FUNC_(5);\n 5")
+  usage = "_FUNC_(expr) - Returns the smallest integer not smaller than `expr`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(-0.1);
+       0
+      > SELECT _FUNC_(5);
+       5
+  """)
 case class Ceil(child: Expression) extends UnaryMathExpression(math.ceil, "CEIL") {
   override def dataType: DataType = child.dataType match {
     case dt @ DecimalType.Fixed(_, 0) => dt
@@ -208,13 +248,21 @@ case class Ceil(child: Expression) extends UnaryMathExpression(math.ceil, "CEIL"
 }
 
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Returns the cosine of x.",
-  extended = "> SELECT _FUNC_(0);\n 1.0")
+  usage = "_FUNC_(expr) - Returns the cosine of `expr`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(0);
+       1.0
+  """)
 case class Cos(child: Expression) extends UnaryMathExpression(math.cos, "COS")
 
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Returns the hyperbolic cosine of x.",
-  extended = "> SELECT _FUNC_(0);\n 1.0")
+  usage = "_FUNC_(expr) - Returns the hyperbolic cosine of `expr`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(0);
+       1.0
+  """)
 case class Cosh(child: Expression) extends UnaryMathExpression(math.cosh, "COSH")
 
 /**
@@ -225,8 +273,14 @@ case class Cosh(child: Expression) extends UnaryMathExpression(math.cosh, "COSH"
  * @param toBaseExpr to which base
  */
 @ExpressionDescription(
-  usage = "_FUNC_(num, from_base, to_base) - Convert num from from_base to to_base.",
-  extended = "> SELECT _FUNC_('100', 2, 10);\n '4'\n> SELECT _FUNC_(-10, 16, -10);\n '16'")
+  usage = "_FUNC_(num, from_base, to_base) - Convert `num` from `from_base` to `to_base`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('100', 2, 10);
+       4
+      > SELECT _FUNC_(-10, 16, -10);
+       16
+  """)
 case class Conv(numExpr: Expression, fromBaseExpr: Expression, toBaseExpr: Expression)
   extends TernaryExpression with ImplicitCastInputTypes {
 
@@ -256,18 +310,32 @@ case class Conv(numExpr: Expression, fromBaseExpr: Expression, toBaseExpr: Expre
 }
 
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Returns e to the power of x.",
-  extended = "> SELECT _FUNC_(0);\n 1.0")
+  usage = "_FUNC_(expr) - Returns e to the power of `expr`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(0);
+       1.0
+  """)
 case class Exp(child: Expression) extends UnaryMathExpression(math.exp, "EXP")
 
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Returns exp(x) - 1.",
-  extended = "> SELECT _FUNC_(0);\n 0.0")
+  usage = "_FUNC_(expr) - Returns exp(`expr`) - 1.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(0);
+       0.0
+  """)
 case class Expm1(child: Expression) extends UnaryMathExpression(math.expm1, "EXPM1")
 
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Returns the largest integer not greater than x.",
-  extended = "> SELECT _FUNC_(-0.1);\n -1\n> SELECT _FUNC_(5);\n 5")
+  usage = "_FUNC_(expr) - Returns the largest integer not greater than `expr`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(-0.1);
+       -1
+      > SELECT _FUNC_(5);
+       5
+  """)
 case class Floor(child: Expression) extends UnaryMathExpression(math.floor, "FLOOR") {
   override def dataType: DataType = child.dataType match {
     case dt @ DecimalType.Fixed(_, 0) => dt
@@ -326,8 +394,12 @@ object Factorial {
 }
 
 @ExpressionDescription(
-  usage = "_FUNC_(n) - Returns n factorial for n is [0..20]. Otherwise, NULL.",
-  extended = "> SELECT _FUNC_(5);\n 120")
+  usage = "_FUNC_(expr) - Returns the factorial of `expr`. `expr` is [0..20]. Otherwise, null.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(5);
+       120
+  """)
 case class Factorial(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
   override def inputTypes: Seq[DataType] = Seq(IntegerType)
@@ -361,13 +433,21 @@ case class Factorial(child: Expression) extends UnaryExpression with ImplicitCas
 }
 
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Returns the natural logarithm of x with base e.",
-  extended = "> SELECT _FUNC_(1);\n 0.0")
+  usage = "_FUNC_(expr) - Returns the natural logarithm (base e) of `expr`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(1);
+       0.0
+  """)
 case class Log(child: Expression) extends UnaryLogExpression(math.log, "LOG")
 
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Returns the logarithm of x with base 2.",
-  extended = "> SELECT _FUNC_(2);\n 1.0")
+  usage = "_FUNC_(expr) - Returns the logarithm of `expr` with base 2.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(2);
+       1.0
+  """)
 case class Log2(child: Expression)
   extends UnaryLogExpression((x: Double) => math.log(x) / math.log(2), "LOG2") {
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
@@ -384,71 +464,127 @@ case class Log2(child: Expression)
 }
 
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Returns the logarithm of x with base 10.",
-  extended = "> SELECT _FUNC_(10);\n 1.0")
+  usage = "_FUNC_(expr) - Returns the logarithm of `expr` with base 10.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(10);
+       1.0
+  """)
 case class Log10(child: Expression) extends UnaryLogExpression(math.log10, "LOG10")
 
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Returns log(1 + x).",
-  extended = "> SELECT _FUNC_(0);\n 0.0")
+  usage = "_FUNC_(expr) - Returns log(1 + `expr`).",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(0);
+       0.0
+  """)
 case class Log1p(child: Expression) extends UnaryLogExpression(math.log1p, "LOG1P") {
   protected override val yAsymptote: Double = -1.0
 }
 
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(x, d) - Return the rounded x at d decimal places.",
-  extended = "> SELECT _FUNC_(12.3456, 1);\n 12.3")
+  usage = "_FUNC_(expr) - Returns the double value that is closest in value to the argument and is equal to a mathematical integer.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(12.3456);
+       12.0
+  """)
+// scalastyle:on line.size.limit
 case class Rint(child: Expression) extends UnaryMathExpression(math.rint, "ROUND") {
   override def funcName: String = "rint"
 }
 
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Returns the sign of x.",
-  extended = "> SELECT _FUNC_(40);\n 1.0")
+  usage = "_FUNC_(expr) - Returns -1.0, 0.0 or 1.0 as `expr` is negative, 0 or positive.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(40);
+       1.0
+  """)
 case class Signum(child: Expression) extends UnaryMathExpression(math.signum, "SIGNUM")
 
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Returns the sine of x.",
-  extended = "> SELECT _FUNC_(0);\n 0.0")
+  usage = "_FUNC_(expr) - Returns the sine of `expr`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(0);
+       0.0
+  """)
 case class Sin(child: Expression) extends UnaryMathExpression(math.sin, "SIN")
 
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Returns the hyperbolic sine of x.",
-  extended = "> SELECT _FUNC_(0);\n 0.0")
+  usage = "_FUNC_(expr) - Returns the hyperbolic sine of `expr`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(0);
+       0.0
+  """)
 case class Sinh(child: Expression) extends UnaryMathExpression(math.sinh, "SINH")
 
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Returns the square root of x.",
-  extended = "> SELECT _FUNC_(4);\n 2.0")
+  usage = "_FUNC_(expr) - Returns the square root of `expr`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(4);
+       2.0
+  """)
 case class Sqrt(child: Expression) extends UnaryMathExpression(math.sqrt, "SQRT")
 
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Returns the tangent of x.",
-  extended = "> SELECT _FUNC_(0);\n 0.0")
+  usage = "_FUNC_(expr) - Returns the tangent of `expr`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(0);
+       0.0
+  """)
 case class Tan(child: Expression) extends UnaryMathExpression(math.tan, "TAN")
 
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Returns the hyperbolic tangent of x.",
-  extended = "> SELECT _FUNC_(0);\n 0.0")
+  usage = "_FUNC_(expr) - Returns the hyperbolic tangent of `expr`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(0);
+       0.0
+  """)
 case class Tanh(child: Expression) extends UnaryMathExpression(math.tanh, "TANH")
 
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Converts radians to degrees.",
-  extended = "> SELECT _FUNC_(3.141592653589793);\n 180.0")
+  usage = "_FUNC_(expr) - Converts radians to degrees.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(3.141592653589793);
+       180.0
+  """)
 case class ToDegrees(child: Expression) extends UnaryMathExpression(math.toDegrees, "DEGREES") {
   override def funcName: String = "toDegrees"
 }
 
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Converts degrees to radians.",
-  extended = "> SELECT _FUNC_(180);\n 3.141592653589793")
+  usage = "_FUNC_(expr) - Converts degrees to radians.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(180);
+       3.141592653589793
+  """)
 case class ToRadians(child: Expression) extends UnaryMathExpression(math.toRadians, "RADIANS") {
   override def funcName: String = "toRadians"
 }
 
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Returns x in binary.",
-  extended = "> SELECT _FUNC_(13);\n '1101'")
+  usage = "_FUNC_(expr) - Returns the string representation of the long value `expr` represented in binary.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(13);
+       1101
+      > SELECT _FUNC_(-13);
+       1111111111111111111111111111111111111111111111111111111111110011
+      > SELECT _FUNC_(13.3);
+       1101
+  """)
+// scalastyle:on line.size.limit
 case class Bin(child: Expression)
   extends UnaryExpression with Serializable with ImplicitCastInputTypes {
 
@@ -541,8 +677,14 @@ object Hex {
  * and returns the resulting STRING. Negative numbers would be treated as two's complement.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Convert the argument to hexadecimal.",
-  extended = "> SELECT _FUNC_(17);\n '11'\n> SELECT _FUNC_('Spark SQL');\n '537061726B2053514C'")
+  usage = "_FUNC_(expr) - Converts `expr` to hexadecimal.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(17);
+       11
+      > SELECT _FUNC_('Spark SQL');
+       537061726B2053514C
+  """)
 case class Hex(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
   override def inputTypes: Seq[AbstractDataType] =
@@ -572,8 +714,12 @@ case class Hex(child: Expression) extends UnaryExpression with ImplicitCastInput
  * Resulting characters are returned as a byte array.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(x) - Converts hexadecimal argument to binary.",
-  extended = "> SELECT decode(_FUNC_('537061726B2053514C'),'UTF-8');\n 'Spark SQL'")
+  usage = "_FUNC_(expr) - Converts hexadecimal `expr` to binary.",
+  extended = """
+    Examples:
+      > SELECT decode(_FUNC_('537061726B2053514C'), 'UTF-8');
+       Spark SQL
+  """)
 case class Unhex(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
   override def inputTypes: Seq[AbstractDataType] = Seq(StringType)
@@ -602,9 +748,15 @@ case class Unhex(child: Expression) extends UnaryExpression with ImplicitCastInp
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(x,y) - Returns the arc tangent2.",
-  extended = "> SELECT _FUNC_(0, 0);\n 0.0")
+  usage = "_FUNC_(expr1, expr2) - Returns the angle in radians between the positive x-axis of a plane and the point given by the coordinates (`expr1`, `expr2`).",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(0, 0);
+       0.0
+  """)
+// scalastyle:on line.size.limit
 case class Atan2(left: Expression, right: Expression)
   extends BinaryMathExpression(math.atan2, "ATAN2") {
 
@@ -619,8 +771,12 @@ case class Atan2(left: Expression, right: Expression)
 }
 
 @ExpressionDescription(
-  usage = "_FUNC_(x1, x2) - Raise x1 to the power of x2.",
-  extended = "> SELECT _FUNC_(2, 3);\n 8.0")
+  usage = "_FUNC_(expr1, expr2) - Raises `expr1` to the power of `expr2`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(2, 3);
+       8.0
+  """)
 case class Pow(left: Expression, right: Expression)
   extends BinaryMathExpression(math.pow, "POWER") {
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
@@ -636,8 +792,12 @@ case class Pow(left: Expression, right: Expression)
  * @param right number of bits to left shift.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(a, b) - Bitwise left shift.",
-  extended = "> SELECT _FUNC_(2, 1);\n 4")
+  usage = "_FUNC_(base, expr) - Bitwise left shift.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(2, 1);
+       4
+  """)
 case class ShiftLeft(left: Expression, right: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
 
@@ -660,14 +820,18 @@ case class ShiftLeft(left: Expression, right: Expression)
 
 
 /**
- * Bitwise right shift.
+ * Bitwise (signed) right shift.
  *
  * @param left the base number to shift.
  * @param right number of bits to right shift.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(a, b) - Bitwise right shift.",
-  extended = "> SELECT _FUNC_(4, 1);\n 2")
+  usage = "_FUNC_(base, expr) - Bitwise (signed) right shift.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(4, 1);
+       2
+  """)
 case class ShiftRight(left: Expression, right: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
 
@@ -696,8 +860,12 @@ case class ShiftRight(left: Expression, right: Expression)
  * @param right the number of bits to right shift.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(a, b) - Bitwise unsigned right shift.",
-  extended = "> SELECT _FUNC_(4, 1);\n 2")
+  usage = "_FUNC_(base, expr) - Bitwise unsigned right shift.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(4, 1);
+       2
+  """)
 case class ShiftRightUnsigned(left: Expression, right: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
 
@@ -719,8 +887,12 @@ case class ShiftRightUnsigned(left: Expression, right: Expression)
 }
 
 @ExpressionDescription(
-  usage = "_FUNC_(a, b) - Returns sqrt(a**2 + b**2).",
-  extended = "> SELECT _FUNC_(3, 4);\n 5.0")
+  usage = "_FUNC_(expr1, expr2) - Returns sqrt(`expr1`**2 + `expr2`**2).",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(3, 4);
+       5.0
+  """)
 case class Hypot(left: Expression, right: Expression)
   extends BinaryMathExpression(math.hypot, "HYPOT")
 
@@ -732,8 +904,12 @@ case class Hypot(left: Expression, right: Expression)
  * @param right the number to compute the logarithm of.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(b, x) - Returns the logarithm of x with base b.",
-  extended = "> SELECT _FUNC_(10, 100);\n 2.0")
+  usage = "_FUNC_(base, expr) - Returns the logarithm of `expr` with `base`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(10, 100);
+       2.0
+  """)
 case class Logarithm(left: Expression, right: Expression)
   extends BinaryMathExpression((c1, c2) => math.log(c2) / math.log(c1), "LOG") {
 
@@ -956,9 +1132,15 @@ abstract class RoundBase(child: Expression, scale: Expression,
  * Round an expression to d decimal places using HALF_UP rounding mode.
  * round(2.5) == 3.0, round(3.5) == 4.0.
  */
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(x, d) - Round x to d decimal places using HALF_UP rounding mode.",
-  extended = "> SELECT _FUNC_(2.5, 0);\n 3.0")
+  usage = "_FUNC_(expr, d) - Returns `expr` rounded to `d` decimal places using HALF_UP rounding mode.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(2.5, 0);
+       3.0
+  """)
+// scalastyle:on line.size.limit
 case class Round(child: Expression, scale: Expression)
   extends RoundBase(child, scale, BigDecimal.RoundingMode.HALF_UP, "ROUND_HALF_UP")
     with Serializable with ImplicitCastInputTypes {
@@ -970,9 +1152,15 @@ case class Round(child: Expression, scale: Expression)
  * also known as Gaussian rounding or bankers' rounding.
  * round(2.5) = 2.0, round(3.5) = 4.0.
  */
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(x, d) - Round x to d decimal places using HALF_EVEN rounding mode.",
-  extended = "> SELECT _FUNC_(2.5, 0);\n 2.0")
+  usage = "_FUNC_(expr, d) - Returns `expr` rounded to `d` decimal places using HALF_EVEN rounding mode.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(2.5, 0);
+       2.0
+  """)
+// scalastyle:on line.size.limit
 case class BRound(child: Expression, scale: Expression)
   extends RoundBase(child, scale, BigDecimal.RoundingMode.HALF_EVEN, "ROUND_HALF_EVEN")
     with Serializable with ImplicitCastInputTypes {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index 5ead16908732f..2ce10ef13215e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -38,8 +38,12 @@ import org.apache.spark.unsafe.Platform
  * For input of type [[BinaryType]]
  */
 @ExpressionDescription(
-  usage = "_FUNC_(input) - Returns an MD5 128-bit checksum as a hex string of the input",
-  extended = "> SELECT _FUNC_('Spark');\n '8cde774d6f7333752ed72cacddb05126'")
+  usage = "_FUNC_(expr) - Returns an MD5 128-bit checksum as a hex string of `expr`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('Spark');
+       8cde774d6f7333752ed72cacddb05126
+  """)
 case class Md5(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
   override def dataType: DataType = StringType
@@ -65,10 +69,15 @@ case class Md5(child: Expression) extends UnaryExpression with ImplicitCastInput
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = """_FUNC_(input, bitLength) - Returns a checksum of SHA-2 family as a hex string of the input.
-            SHA-224, SHA-256, SHA-384, and SHA-512 are supported. Bit length of 0 is equivalent to 256.""",
-  extended = """> SELECT _FUNC_('Spark', 0);
-               '529bc3b07127ecb7e53a4dcf1991d9152c24537d919178022b2c42657f79a26b'""")
+  usage = """
+    _FUNC_(expr, bitLength) - Returns a checksum of SHA-2 family as a hex string of `expr`.
+      SHA-224, SHA-256, SHA-384, and SHA-512 are supported. Bit length of 0 is equivalent to 256.
+  """,
+  extended = """
+    Examples:
+      > SELECT _FUNC_('Spark', 256);
+       529bc3b07127ecb7e53a4dcf1991d9152c24537d919178022b2c42657f79a26b
+  """)
 // scalastyle:on line.size.limit
 case class Sha2(left: Expression, right: Expression)
   extends BinaryExpression with Serializable with ImplicitCastInputTypes {
@@ -136,8 +145,12 @@ case class Sha2(left: Expression, right: Expression)
  * For input of type [[BinaryType]] or [[StringType]]
  */
 @ExpressionDescription(
-  usage = "_FUNC_(input) - Returns a sha1 hash value as a hex string of the input",
-  extended = "> SELECT _FUNC_('Spark');\n '85f5955f4b27a9a4c2aab6ffe5d7189fc298b92c'")
+  usage = "_FUNC_(expr) - Returns a sha1 hash value as a hex string of the `expr`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('Spark');
+       85f5955f4b27a9a4c2aab6ffe5d7189fc298b92c
+  """)
 case class Sha1(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
   override def dataType: DataType = StringType
@@ -159,8 +172,12 @@ case class Sha1(child: Expression) extends UnaryExpression with ImplicitCastInpu
  * For input of type [[BinaryType]]
  */
 @ExpressionDescription(
-  usage = "_FUNC_(input) - Returns a cyclic redundancy check value as a bigint of the input",
-  extended = "> SELECT _FUNC_('Spark');\n '1557323817'")
+  usage = "_FUNC_(expr) - Returns a cyclic redundancy check value of the `expr` as a bigint.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('Spark');
+       1557323817
+  """)
 case class Crc32(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
   override def dataType: DataType = LongType
@@ -490,7 +507,12 @@ abstract class InterpretedHashFunction {
  * and bucketing have same data distribution.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(a1, a2, ...) - Returns a hash value of the arguments.")
+  usage = "_FUNC_(expr1, expr2, ...) - Returns a hash value of the arguments.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('Spark', array(123), 2);
+        -1321691492
+  """)
 case class Murmur3Hash(children: Seq[Expression], seed: Int) extends HashExpression[Int] {
   def this(arguments: Seq[Expression]) = this(arguments, 42)
 
@@ -544,7 +566,12 @@ case class PrintToStderr(child: Expression) extends UnaryExpression {
  * A function throws an exception if 'condition' is not true.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(condition) - Throw an exception if 'condition' is not true.")
+  usage = "_FUNC_(expr) - Throws an exception if `expr` is not true.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(0 < 1);
+       NULL
+  """)
 case class AssertTrue(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
   override def nullable: Boolean = true
@@ -613,7 +640,11 @@ object XxHash64Function extends InterpretedHashFunction {
  */
 @ExpressionDescription(
   usage = "_FUNC_() - Returns the current database.",
-  extended = "> SELECT _FUNC_()")
+  extended = """
+    Examples:
+      > SELECT _FUNC_();
+       default
+  """)
 case class CurrentDatabase() extends LeafExpression with Unevaluable {
   override def dataType: DataType = StringType
   override def foldable: Boolean = true
@@ -631,7 +662,7 @@ case class CurrentDatabase() extends LeafExpression with Unevaluable {
  * TODO: Support Decimal and date related types
  */
 @ExpressionDescription(
-  usage = "_FUNC_(a1, a2, ...) - Returns a hash value of the arguments.")
+  usage = "_FUNC_(expr1, expr2, ...) - Returns a hash value of the arguments.")
 case class HiveHash(children: Seq[Expression]) extends HashExpression[Int] {
   override val seed = 0
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
index 70862a87ef9c6..8b2e8f3e7ef73 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
@@ -34,9 +34,15 @@ import org.apache.spark.sql.types._
  *   coalesce(null, null, null) => null
  * }}}
  */
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(a1, a2, ...) - Returns the first non-null argument if exists. Otherwise, NULL.",
-  extended = "> SELECT _FUNC_(NULL, 1, NULL);\n 1")
+  usage = "_FUNC_(expr1, expr2, ...) - Returns the first non-null argument if exists. Otherwise, null.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(NULL, 1, NULL);
+       1
+  """)
+// scalastyle:on line.size.limit
 case class Coalesce(children: Seq[Expression]) extends Expression {
 
   /** Coalesce is nullable if all of its children are nullable, or if it has no children. */
@@ -88,7 +94,13 @@ case class Coalesce(children: Seq[Expression]) extends Expression {
 }
 
 
-@ExpressionDescription(usage = "_FUNC_(a,b) - Returns b if a is null, or a otherwise.")
+@ExpressionDescription(
+  usage = "_FUNC_(expr1, expr2) - Returns `expr2` if `expr1` is null, or `expr1` otherwise.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(NULL, array('2'));
+       ["2"]
+  """)
 case class IfNull(left: Expression, right: Expression, child: Expression)
   extends RuntimeReplaceable {
 
@@ -101,7 +113,13 @@ case class IfNull(left: Expression, right: Expression, child: Expression)
 }
 
 
-@ExpressionDescription(usage = "_FUNC_(a,b) - Returns null if a equals to b, or a otherwise.")
+@ExpressionDescription(
+  usage = "_FUNC_(expr1, expr2) - Returns null if `expr1` equals to `expr2`, or `expr1` otherwise.",
+  extended = """
+   Examples:
+     > SELECT _FUNC_(2, 2);
+      NULL
+  """)
 case class NullIf(left: Expression, right: Expression, child: Expression)
   extends RuntimeReplaceable {
 
@@ -114,7 +132,13 @@ case class NullIf(left: Expression, right: Expression, child: Expression)
 }
 
 
-@ExpressionDescription(usage = "_FUNC_(a,b) - Returns b if a is null, or a otherwise.")
+@ExpressionDescription(
+  usage = "_FUNC_(expr1, expr2) - Returns `expr2` if `expr1` is null, or `expr1` otherwise.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(NULL, array('2'));
+       ["2"]
+  """)
 case class Nvl(left: Expression, right: Expression, child: Expression) extends RuntimeReplaceable {
 
   def this(left: Expression, right: Expression) = {
@@ -126,7 +150,15 @@ case class Nvl(left: Expression, right: Expression, child: Expression) extends R
 }
 
 
-@ExpressionDescription(usage = "_FUNC_(a,b,c) - Returns b if a is not null, or c otherwise.")
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(expr1, expr2, expr3) - Returns `expr2` if `expr1` is not null, or `expr3` otherwise.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(NULL, 2, 1);
+       1
+  """)
+// scalastyle:on line.size.limit
 case class Nvl2(expr1: Expression, expr2: Expression, expr3: Expression, child: Expression)
   extends RuntimeReplaceable {
 
@@ -143,7 +175,12 @@ case class Nvl2(expr1: Expression, expr2: Expression, expr3: Expression, child:
  * Evaluates to `true` iff it's NaN.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(a) - Returns true if a is NaN and false otherwise.")
+  usage = "_FUNC_(expr) - Returns true if `expr` is NaN, or false otherwise.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(cast('NaN' as double));
+       true
+  """)
 case class IsNaN(child: Expression) extends UnaryExpression
   with Predicate with ImplicitCastInputTypes {
 
@@ -181,7 +218,12 @@ case class IsNaN(child: Expression) extends UnaryExpression
  * This Expression is useful for mapping NaN values to null.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(a,b) - Returns a iff it's not NaN, or b otherwise.")
+  usage = "_FUNC_(expr1, expr2) - Returns `expr1` if it's not NaN, or `expr2` otherwise.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(cast('NaN' as double), 123);
+       123.0
+  """)
 case class NaNvl(left: Expression, right: Expression)
     extends BinaryExpression with ImplicitCastInputTypes {
 
@@ -236,7 +278,12 @@ case class NaNvl(left: Expression, right: Expression)
  * An expression that is evaluated to true if the input is null.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(a) - Returns true if a is NULL and false otherwise.")
+  usage = "_FUNC_(expr) - Returns true if `expr` is null, or false otherwise.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(1);
+       false
+  """)
 case class IsNull(child: Expression) extends UnaryExpression with Predicate {
   override def nullable: Boolean = false
 
@@ -257,7 +304,12 @@ case class IsNull(child: Expression) extends UnaryExpression with Predicate {
  * An expression that is evaluated to true if the input is not null.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(a) - Returns true if a is not NULL and false otherwise.")
+  usage = "_FUNC_(expr) - Returns true if `expr` is not null, or false otherwise.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(1);
+       true
+  """)
 case class IsNotNull(child: Expression) extends UnaryExpression with Predicate {
   override def nullable: Boolean = false
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index c941a576d00d6..7946c201f4ffc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -89,7 +89,7 @@ trait PredicateHelper {
 }
 
 @ExpressionDescription(
-  usage = "_FUNC_ a - Logical not")
+  usage = "_FUNC_ expr - Logical not.")
 case class Not(child: Expression)
   extends UnaryExpression with Predicate with ImplicitCastInputTypes with NullIntolerant {
 
@@ -111,7 +111,7 @@ case class Not(child: Expression)
  * Evaluates to `true` if `list` contains `value`.
  */
 @ExpressionDescription(
-  usage = "expr _FUNC_(val1, val2, ...) - Returns true if expr equals to any valN.")
+  usage = "expr1 _FUNC_(expr2, expr3, ...) - Returns true if `expr` equals to any valN.")
 case class In(value: Expression, list: Seq[Expression]) extends Predicate
     with ImplicitCastInputTypes {
 
@@ -248,7 +248,7 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with
 }
 
 @ExpressionDescription(
-  usage = "a _FUNC_ b - Logical AND.")
+  usage = "expr1 _FUNC_ expr2 - Logical AND.")
 case class And(left: Expression, right: Expression) extends BinaryOperator with Predicate {
 
   override def inputType: AbstractDataType = BooleanType
@@ -311,7 +311,7 @@ case class And(left: Expression, right: Expression) extends BinaryOperator with
 }
 
 @ExpressionDescription(
-  usage = "a _FUNC_ b - Logical OR.")
+  usage = "expr1 _FUNC_ expr2 - Logical OR.")
 case class Or(left: Expression, right: Expression) extends BinaryOperator with Predicate {
 
   override def inputType: AbstractDataType = BooleanType
@@ -406,7 +406,7 @@ object Equality {
 }
 
 @ExpressionDescription(
-  usage = "a _FUNC_ b - Returns TRUE if a equals b and false otherwise.")
+  usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` equals `expr2`, or false otherwise.")
 case class EqualTo(left: Expression, right: Expression)
     extends BinaryComparison with NullIntolerant {
 
@@ -432,8 +432,10 @@ case class EqualTo(left: Expression, right: Expression)
 }
 
 @ExpressionDescription(
-  usage = """a _FUNC_ b - Returns same result with EQUAL(=) operator for non-null operands,
-    but returns TRUE if both are NULL, FALSE if one of the them is NULL.""")
+  usage = """
+    expr1 _FUNC_ expr2 - Returns same result as the EQUAL(=) operator for non-null operands,
+      but returns true if both are null, false if one of the them is null.
+  """)
 case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComparison {
 
   override def inputType: AbstractDataType = AnyDataType
@@ -473,7 +475,7 @@ case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComp
 }
 
 @ExpressionDescription(
-  usage = "a _FUNC_ b - Returns TRUE if a is less than b.")
+  usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is less than `expr2`.")
 case class LessThan(left: Expression, right: Expression)
     extends BinaryComparison with NullIntolerant {
 
@@ -487,7 +489,7 @@ case class LessThan(left: Expression, right: Expression)
 }
 
 @ExpressionDescription(
-  usage = "a _FUNC_ b - Returns TRUE if a is not greater than b.")
+  usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is less than or equal to `expr2`.")
 case class LessThanOrEqual(left: Expression, right: Expression)
     extends BinaryComparison with NullIntolerant {
 
@@ -501,7 +503,7 @@ case class LessThanOrEqual(left: Expression, right: Expression)
 }
 
 @ExpressionDescription(
-  usage = "a _FUNC_ b - Returns TRUE if a is greater than b.")
+  usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is greater than `expr2`.")
 case class GreaterThan(left: Expression, right: Expression)
     extends BinaryComparison with NullIntolerant {
 
@@ -515,7 +517,7 @@ case class GreaterThan(left: Expression, right: Expression)
 }
 
 @ExpressionDescription(
-  usage = "a _FUNC_ b - Returns TRUE if a is not smaller than b.")
+  usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is greater than or equal to `expr2`.")
 case class GreaterThanOrEqual(left: Expression, right: Expression)
     extends BinaryComparison with NullIntolerant {
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
index e09029f5aab9b..a331a5557b455 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
@@ -55,8 +55,17 @@ abstract class RDG extends LeafExpression with Nondeterministic {
 }
 
 /** Generate a random column with i.i.d. uniformly distributed values in [0, 1). */
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(a) - Returns a random column with i.i.d. uniformly distributed values in [0, 1).")
+  usage = "_FUNC_([seed]) - Returns a random value with independent and identically distributed (i.i.d.) uniformly distributed values in [0, 1).",
+  extended = """
+    Examples:
+      > SELECT _FUNC_();
+       0.9629742951434543
+      > SELECT _FUNC_(0);
+       0.8446490682263027
+  """)
+// scalastyle:on line.size.limit
 case class Rand(seed: Long) extends RDG {
   override protected def evalInternal(input: InternalRow): Double = rng.nextDouble()
 
@@ -78,9 +87,18 @@ case class Rand(seed: Long) extends RDG {
   }
 }
 
-/** Generate a random column with i.i.d. gaussian random distribution. */
+/** Generate a random column with i.i.d. values drawn from the standard normal distribution. */
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(a) - Returns a random column with i.i.d. gaussian random distribution.")
+  usage = "_FUNC_([seed]) - Returns a random value with independent and identically distributed (i.i.d.) values drawn from the standard normal distribution.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_();
+       -0.3254147983080288
+      > SELECT _FUNC_(0);
+       1.1164209726833079
+  """)
+// scalastyle:on line.size.limit
 case class Randn(seed: Long) extends RDG {
   override protected def evalInternal(input: InternalRow): Double = rng.nextGaussian()
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index d25da3fd587b6..5648ad6b6dc18 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -68,7 +68,7 @@ trait StringRegexExpression extends ImplicitCastInputTypes {
  * Simple RegEx pattern matching function
  */
 @ExpressionDescription(
-  usage = "str _FUNC_ pattern - Returns true if str matches pattern and false otherwise.")
+  usage = "str _FUNC_ pattern - Returns true if `str` matches `pattern`, or false otherwise.")
 case class Like(left: Expression, right: Expression)
   extends BinaryExpression with StringRegexExpression {
 
@@ -121,7 +121,7 @@ case class Like(left: Expression, right: Expression)
 }
 
 @ExpressionDescription(
-  usage = "str _FUNC_ regexp - Returns true if str matches regexp and false otherwise.")
+  usage = "str _FUNC_ regexp - Returns true if `str` matches `regexp`, or false otherwise.")
 case class RLike(left: Expression, right: Expression)
   extends BinaryExpression with StringRegexExpression {
 
@@ -175,8 +175,12 @@ case class RLike(left: Expression, right: Expression)
  * Splits str around pat (pattern is a regular expression).
  */
 @ExpressionDescription(
-  usage = "_FUNC_(str, regex) - Splits str around occurrences that match regex",
-  extended = "> SELECT _FUNC_('oneAtwoBthreeC', '[ABC]');\n ['one', 'two', 'three']")
+  usage = "_FUNC_(str, regex) - Splits `str` around occurrences that match `regex`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('oneAtwoBthreeC', '[ABC]');
+       ["one","two","three",""]
+  """)
 case class StringSplit(str: Expression, pattern: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
 
@@ -206,9 +210,15 @@ case class StringSplit(str: Expression, pattern: Expression)
  *
  * NOTE: this expression is not THREAD-SAFE, as it has some internal mutable status.
  */
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(str, regexp, rep) - replace all substrings of str that match regexp with rep.",
-  extended = "> SELECT _FUNC_('100-200', '(\\d+)', 'num');\n 'num-num'")
+  usage = "_FUNC_(str, regexp, rep) - Replaces all substrings of `str` that match `regexp` with `rep`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('100-200', '(\d+)', 'num');
+       num-num
+  """)
+// scalastyle:on line.size.limit
 case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expression)
   extends TernaryExpression with ImplicitCastInputTypes {
 
@@ -309,8 +319,12 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
  * NOTE: this expression is not THREAD-SAFE, as it has some internal mutable status.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(str, regexp[, idx]) - extracts a group that matches regexp.",
-  extended = "> SELECT _FUNC_('100-200', '(\\d+)-(\\d+)', 1);\n '100'")
+  usage = "_FUNC_(str, regexp[, idx]) - Extracts a group that matches `regexp`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('100-200', '(\d+)-(\d+)', 1);
+       100
+  """)
 case class RegExpExtract(subject: Expression, regexp: Expression, idx: Expression)
   extends TernaryExpression with ImplicitCastInputTypes {
   def this(s: Expression, r: Expression) = this(s, r, Literal(1))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 25a5e3fd7da73..5f533fecf8d07 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -40,9 +40,15 @@ import org.apache.spark.unsafe.types.{ByteArray, UTF8String}
  * An expression that concatenates multiple input strings into a single string.
  * If any input is null, concat returns null.
  */
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(str1, str2, ..., strN) - Returns the concatenation of str1, str2, ..., strN",
-  extended = "> SELECT _FUNC_('Spark','SQL');\n 'SparkSQL'")
+  usage = "_FUNC_(str1, str2, ..., strN) - Returns the concatenation of `str1`, `str2`, ..., `strN`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('Spark','SQL');
+       SparkSQL
+  """)
+// scalastyle:on line.size.limit
 case class Concat(children: Seq[Expression]) extends Expression with ImplicitCastInputTypes {
 
   override def inputTypes: Seq[AbstractDataType] = Seq.fill(children.size)(StringType)
@@ -78,10 +84,15 @@ case class Concat(children: Seq[Expression]) extends Expression with ImplicitCas
  *
  * Returns null if the separator is null. Otherwise, concat_ws skips all null values.
  */
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage =
-    "_FUNC_(sep, [str | array(str)]+) - Returns the concatenation of the strings separated by sep.",
-  extended = "> SELECT _FUNC_(' ', Spark', 'SQL');\n 'Spark SQL'")
+  usage = "_FUNC_(sep, [str | array(str)]+) - Returns the concatenation of the strings separated by `sep`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(' ', Spark', 'SQL');
+       Spark SQL
+  """)
+// scalastyle:on line.size.limit
 case class ConcatWs(children: Seq[Expression])
   extends Expression with ImplicitCastInputTypes {
 
@@ -167,9 +178,15 @@ case class ConcatWs(children: Seq[Expression])
   }
 }
 
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(n, str1, str2, ...) - returns the n-th string, e.g. returns str2 when n is 2",
-  extended = "> SELECT _FUNC_(1, 'scala', 'java') FROM src LIMIT 1;\n" + "'scala'")
+  usage = "_FUNC_(n, str1, str2, ...) - Returns the `n`-th string, e.g., returns `str2` when `n` is 2.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(1, 'scala', 'java');
+       scala
+  """)
+// scalastyle:on line.size.limit
 case class Elt(children: Seq[Expression])
   extends Expression with ImplicitCastInputTypes {
 
@@ -246,8 +263,12 @@ trait String2StringExpression extends ImplicitCastInputTypes {
  * A function that converts the characters of a string to uppercase.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(str) - Returns str with all characters changed to uppercase",
-  extended = "> SELECT _FUNC_('SparkSql');\n 'SPARKSQL'")
+  usage = "_FUNC_(str) - Returns `str` with all characters changed to uppercase.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('SparkSql');
+       SPARKSQL
+  """)
 case class Upper(child: Expression)
   extends UnaryExpression with String2StringExpression {
 
@@ -262,8 +283,12 @@ case class Upper(child: Expression)
  * A function that converts the characters of a string to lowercase.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(str) - Returns str with all characters changed to lowercase",
-  extended = "> SELECT _FUNC_('SparkSql');\n 'sparksql'")
+  usage = "_FUNC_(str) - Returns `str` with all characters changed to lowercase.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('SparkSql');
+       sparksql
+  """)
 case class Lower(child: Expression) extends UnaryExpression with String2StringExpression {
 
   override def convert(v: UTF8String): UTF8String = v.toLowerCase
@@ -347,8 +372,12 @@ object StringTranslate {
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = """_FUNC_(input, from, to) - Translates the input string by replacing the characters present in the from string with the corresponding characters in the to string""",
-  extended = "> SELECT _FUNC_('AaBbCc', 'abc', '123');\n 'A1B2C3'")
+  usage = "_FUNC_(input, from, to) - Translates the `input` string by replacing the characters present in the `from` string with the corresponding characters in the `to` string.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('AaBbCc', 'abc', '123');
+       A1B2C3
+  """)
 // scalastyle:on line.size.limit
 case class StringTranslate(srcExpr: Expression, matchingExpr: Expression, replaceExpr: Expression)
   extends TernaryExpression with ImplicitCastInputTypes {
@@ -407,9 +436,15 @@ case class StringTranslate(srcExpr: Expression, matchingExpr: Expression, replac
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = """_FUNC_(str, str_array) - Returns the index (1-based) of the given string (left) in the comma-delimited list (right).
-    Returns 0, if the string wasn't found or if the given string (left) contains a comma.""",
-  extended = "> SELECT _FUNC_('ab','abc,b,ab,c,def');\n 3")
+  usage = """
+    _FUNC_(str, str_array) - Returns the index (1-based) of the given string (`str`) in the comma-delimited list (`str_array`).
+      Returns 0, if the string was not found or if the given string (`str`) contains a comma.
+  """,
+  extended = """
+    Examples:
+      > SELECT _FUNC_('ab','abc,b,ab,c,def');
+       3
+  """)
 // scalastyle:on
 case class FindInSet(left: Expression, right: Expression) extends BinaryExpression
     with ImplicitCastInputTypes {
@@ -434,8 +469,12 @@ case class FindInSet(left: Expression, right: Expression) extends BinaryExpressi
  * A function that trim the spaces from both ends for the specified string.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(str) - Removes the leading and trailing space characters from str.",
-  extended = "> SELECT _FUNC_('    SparkSQL   ');\n 'SparkSQL'")
+  usage = "_FUNC_(str) - Removes the leading and trailing space characters from `str`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('    SparkSQL   ');
+       SparkSQL
+  """)
 case class StringTrim(child: Expression)
   extends UnaryExpression with String2StringExpression {
 
@@ -452,8 +491,12 @@ case class StringTrim(child: Expression)
  * A function that trim the spaces from left end for given string.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(str) - Removes the leading space characters from str.",
-  extended = "> SELECT _FUNC_('    SparkSQL   ');\n 'SparkSQL   '")
+  usage = "_FUNC_(str) - Removes the leading and trailing space characters from `str`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('    SparkSQL');
+       SparkSQL
+  """)
 case class StringTrimLeft(child: Expression)
   extends UnaryExpression with String2StringExpression {
 
@@ -470,8 +513,12 @@ case class StringTrimLeft(child: Expression)
  * A function that trim the spaces from right end for given string.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(str) - Removes the trailing space characters from str.",
-  extended = "> SELECT _FUNC_('    SparkSQL   ');\n '    SparkSQL'")
+  usage = "_FUNC_(str) - Removes the trailing space characters from `str`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('    SparkSQL   ');
+           SparkSQL
+  """)
 case class StringTrimRight(child: Expression)
   extends UnaryExpression with String2StringExpression {
 
@@ -492,8 +539,12 @@ case class StringTrimRight(child: Expression)
  * NOTE: that this is not zero based, but 1-based index. The first character in str has index 1.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(str, substr) - Returns the (1-based) index of the first occurrence of substr in str.",
-  extended = "> SELECT _FUNC_('SparkSQL', 'SQL');\n 6")
+  usage = "_FUNC_(str, substr) - Returns the (1-based) index of the first occurrence of `substr` in `str`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('SparkSQL', 'SQL');
+       6
+  """)
 case class StringInstr(str: Expression, substr: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
 
@@ -522,12 +573,18 @@ case class StringInstr(str: Expression, substr: Expression)
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = """_FUNC_(str, delim, count) - Returns the substring from str before count occurrences of the delimiter delim.
-    If count is positive, everything to the left of the final delimiter (counting from the
-    left) is returned. If count is negative, everything to the right of the final delimiter
-    (counting from the right) is returned. Substring_index performs a case-sensitive match
-    when searching for delim.""",
-  extended = "> SELECT _FUNC_('www.apache.org', '.', 2);\n 'www.apache'")
+  usage = """
+    _FUNC_(str, delim, count) - Returns the substring from `str` before `count` occurrences of the delimiter `delim`.
+      If `count` is positive, everything to the left of the final delimiter (counting from the
+      left) is returned. If `count` is negative, everything to the right of the final delimiter
+      (counting from the right) is returned. The function substring_index performs a case-sensitive match
+      when searching for `delim`.
+  """,
+  extended = """
+    Examples:
+      > SELECT _FUNC_('www.apache.org', '.', 2);
+       www.apache
+  """)
 // scalastyle:on line.size.limit
 case class SubstringIndex(strExpr: Expression, delimExpr: Expression, countExpr: Expression)
  extends TernaryExpression with ImplicitCastInputTypes {
@@ -554,9 +611,15 @@ case class SubstringIndex(strExpr: Expression, delimExpr: Expression, countExpr:
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = """_FUNC_(substr, str[, pos]) - Returns the position of the first occurrence of substr in str after position pos.
-    The given pos and return value are 1-based.""",
-  extended = "> SELECT _FUNC_('bar', 'foobarbar', 5);\n 7")
+  usage = """
+    _FUNC_(substr, str[, pos]) - Returns the position of the first occurrence of `substr` in `str` after position `pos`.
+      The given `pos` and return value are 1-based.
+  """,
+  extended = """
+    Examples:
+      > SELECT _FUNC_('bar', 'foobarbar', 5);
+       7
+  """)
 // scalastyle:on line.size.limit
 case class StringLocate(substr: Expression, str: Expression, start: Expression)
   extends TernaryExpression with ImplicitCastInputTypes {
@@ -631,10 +694,17 @@ case class StringLocate(substr: Expression, str: Expression, start: Expression)
  * Returns str, left-padded with pad to a length of len.
  */
 @ExpressionDescription(
-  usage = """_FUNC_(str, len, pad) - Returns str, left-padded with pad to a length of len.
-    If str is longer than len, the return value is shortened to len characters.""",
-  extended = "> SELECT _FUNC_('hi', 5, '??');\n '???hi'\n" +
-    "> SELECT _FUNC_('hi', 1, '??');\n 'h'")
+  usage = """
+    _FUNC_(str, len, pad) - Returns `str`, left-padded with `pad` to a length of `len`.
+      If `str` is longer than `len`, the return value is shortened to `len` characters.
+  """,
+  extended = """
+    Examples:
+      > SELECT _FUNC_('hi', 5, '??');
+       ???hi
+      > SELECT _FUNC_('hi', 1, '??');
+       h
+  """)
 case class StringLPad(str: Expression, len: Expression, pad: Expression)
   extends TernaryExpression with ImplicitCastInputTypes {
 
@@ -657,10 +727,17 @@ case class StringLPad(str: Expression, len: Expression, pad: Expression)
  * Returns str, right-padded with pad to a length of len.
  */
 @ExpressionDescription(
-  usage = """_FUNC_(str, len, pad) - Returns str, right-padded with pad to a length of len.
-    If str is longer than len, the return value is shortened to len characters.""",
-  extended = "> SELECT _FUNC_('hi', 5, '??');\n 'hi???'\n" +
-    "> SELECT _FUNC_('hi', 1, '??');\n 'h'")
+  usage = """
+    _FUNC_(str, len, pad) - Returns `str`, right-padded with `pad` to a length of `len`.
+      If `str` is longer than `len`, the return value is shortened to `len` characters.
+  """,
+  extended = """
+    Examples:
+     > SELECT _FUNC_('hi', 5, '??');
+      hi???
+     > SELECT _FUNC_('hi', 1, '??');
+      h
+  """)
 case class StringRPad(str: Expression, len: Expression, pad: Expression)
   extends TernaryExpression with ImplicitCastInputTypes {
 
@@ -696,16 +773,16 @@ object ParseUrl {
  * Extracts a part from a URL
  */
 @ExpressionDescription(
-  usage = "_FUNC_(url, partToExtract[, key]) - extracts a part from a URL",
-  extended = """Parts: HOST, PATH, QUERY, REF, PROTOCOL, AUTHORITY, FILE, USERINFO.
-    Key specifies which query to extract.
+  usage = "_FUNC_(url, partToExtract[, key]) - Extracts a part from a URL.",
+  extended = """
     Examples:
       > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'HOST')
-      'spark.apache.org'
+       spark.apache.org
       > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'QUERY')
-      'query=1'
+       query=1
       > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'QUERY', 'query')
-      '1'""")
+       1
+  """)
 case class ParseUrl(children: Seq[Expression])
   extends Expression with ExpectsInputTypes with CodegenFallback {
 
@@ -851,8 +928,12 @@ case class ParseUrl(children: Seq[Expression])
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(String format, Obj... args) - Returns a formatted string from printf-style format strings.",
-  extended = "> SELECT _FUNC_(\"Hello World %d %s\", 100, \"days\");\n 'Hello World 100 days'")
+  usage = "_FUNC_(strfmt, obj, ...) - Returns a formatted string from printf-style format strings.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_("Hello World %d %s", 100, "days");
+       Hello World 100 days
+  """)
 // scalastyle:on line.size.limit
 case class FormatString(children: Expression*) extends Expression with ImplicitCastInputTypes {
 
@@ -923,10 +1004,15 @@ case class FormatString(children: Expression*) extends Expression with ImplicitC
  * Words are delimited by whitespace.
  */
 @ExpressionDescription(
-  usage =
-   """_FUNC_(str) - Returns str with the first letter of each word in uppercase.
-     All other letters are in lowercase. Words are delimited by white space.""",
-  extended = "> SELECT initcap('sPark sql');\n 'Spark Sql'")
+  usage = """
+    _FUNC_(str) - Returns `str` with the first letter of each word in uppercase.
+      All other letters are in lowercase. Words are delimited by white space.
+  """,
+  extended = """
+    Examples:
+      > SELECT initcap('sPark sql');
+       Spark Sql
+  """)
 case class InitCap(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
   override def inputTypes: Seq[DataType] = Seq(StringType)
@@ -944,8 +1030,12 @@ case class InitCap(child: Expression) extends UnaryExpression with ImplicitCastI
  * Returns the string which repeat the given string value n times.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(str, n) - Returns the string which repeat the given string value n times.",
-  extended = "> SELECT _FUNC_('123', 2);\n '123123'")
+  usage = "_FUNC_(str, n) - Returns the string which repeats the given string value n times.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('123', 2);
+       123123
+  """)
 case class StringRepeat(str: Expression, times: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
 
@@ -970,7 +1060,11 @@ case class StringRepeat(str: Expression, times: Expression)
  */
 @ExpressionDescription(
   usage = "_FUNC_(str) - Returns the reversed given string.",
-  extended = "> SELECT _FUNC_('Spark SQL');\n 'LQS krapS'")
+  extended = """
+    Examples:
+      > SELECT _FUNC_('Spark SQL');
+       LQS krapS
+  """)
 case class StringReverse(child: Expression) extends UnaryExpression with String2StringExpression {
   override def convert(v: UTF8String): UTF8String = v.reverse()
 
@@ -982,11 +1076,15 @@ case class StringReverse(child: Expression) extends UnaryExpression with String2
 }
 
 /**
- * Returns a n spaces string.
+ * Returns a string consisting of n spaces.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(n) - Returns a n spaces string.",
-  extended = "> SELECT _FUNC_(2);\n '  '")
+  usage = "_FUNC_(n) - Returns a string consisting of `n` spaces.",
+  extended = """
+    Examples:
+      > SELECT concat(_FUNC_(2), '1');
+         1
+  """)
 case class StringSpace(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes {
 
@@ -1014,8 +1112,16 @@ case class StringSpace(child: Expression)
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(str, pos[, len]) - Returns the substring of str that starts at pos and is of length len or the slice of byte array that starts at pos and is of length len.",
-  extended = "> SELECT _FUNC_('Spark SQL', 5);\n 'k SQL'\n> SELECT _FUNC_('Spark SQL', -3);\n 'SQL'\n> SELECT _FUNC_('Spark SQL', 5, 1);\n 'k'")
+  usage = "_FUNC_(str, pos[, len]) - Returns the substring of `str` that starts at `pos` and is of length `len`, or the slice of byte array that starts at `pos` and is of length `len`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('Spark SQL', 5);
+       k SQL
+      > SELECT _FUNC_('Spark SQL', -3);
+       SQL
+      > SELECT _FUNC_('Spark SQL', 5, 1);
+       k
+  """)
 // scalastyle:on line.size.limit
 case class Substring(str: Expression, pos: Expression, len: Expression)
   extends TernaryExpression with ImplicitCastInputTypes {
@@ -1055,8 +1161,12 @@ case class Substring(str: Expression, pos: Expression, len: Expression)
  * A function that return the length of the given string or binary expression.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(str | binary) - Returns the length of str or number of bytes in binary data.",
-  extended = "> SELECT _FUNC_('Spark SQL');\n 9")
+  usage = "_FUNC_(expr) - Returns the length of `expr` or number of bytes in binary data.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('Spark SQL');
+       9
+  """)
 case class Length(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
   override def dataType: DataType = IntegerType
   override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(StringType, BinaryType))
@@ -1079,7 +1189,11 @@ case class Length(child: Expression) extends UnaryExpression with ImplicitCastIn
  */
 @ExpressionDescription(
   usage = "_FUNC_(str1, str2) - Returns the Levenshtein distance between the two given strings.",
-  extended = "> SELECT _FUNC_('kitten', 'sitting');\n 3")
+  extended = """
+    Examples:
+      > SELECT _FUNC_('kitten', 'sitting');
+       3
+  """)
 case class Levenshtein(left: Expression, right: Expression) extends BinaryExpression
     with ImplicitCastInputTypes {
 
@@ -1096,11 +1210,15 @@ case class Levenshtein(left: Expression, right: Expression) extends BinaryExpres
 }
 
 /**
- * A function that return soundex code of the given string expression.
+ * A function that return Soundex code of the given string expression.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(str) - Returns soundex code of the string.",
-  extended = "> SELECT _FUNC_('Miller');\n 'M460'")
+  usage = "_FUNC_(str) - Returns Soundex code of the string.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('Miller');
+       M460
+  """)
 case class SoundEx(child: Expression) extends UnaryExpression with ExpectsInputTypes {
 
   override def dataType: DataType = StringType
@@ -1118,9 +1236,14 @@ case class SoundEx(child: Expression) extends UnaryExpression with ExpectsInputT
  * Returns the numeric value of the first character of str.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(str) - Returns the numeric value of the first character of str.",
-  extended = "> SELECT _FUNC_('222');\n 50\n" +
-    "> SELECT _FUNC_(2);\n 50")
+  usage = "_FUNC_(str) - Returns the numeric value of the first character of `str`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('222');
+       50
+      > SELECT _FUNC_(2);
+       50
+  """)
 case class Ascii(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
   override def dataType: DataType = IntegerType
@@ -1153,7 +1276,12 @@ case class Ascii(child: Expression) extends UnaryExpression with ImplicitCastInp
  * Converts the argument from binary to a base 64 string.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(bin) - Convert the argument from binary to a base 64 string.")
+  usage = "_FUNC_(bin) - Converts the argument from a binary `bin` to a base 64 string.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('Spark SQL');
+       U3BhcmsgU1FM
+  """)
 case class Base64(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
   override def dataType: DataType = StringType
@@ -1177,7 +1305,12 @@ case class Base64(child: Expression) extends UnaryExpression with ImplicitCastIn
  * Converts the argument from a base 64 string to BINARY.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(str) - Convert the argument from a base 64 string to binary.")
+  usage = "_FUNC_(str) - Converts the argument from a base 64 string `str` to a binary.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('U3BhcmsgU1FM');
+       Spark SQL
+  """)
 case class UnBase64(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
   override def dataType: DataType = BinaryType
@@ -1199,8 +1332,15 @@ case class UnBase64(child: Expression) extends UnaryExpression with ImplicitCast
  * (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
  * If either argument is null, the result will also be null.
  */
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(bin, str) - Decode the first argument using the second argument character set.")
+  usage = "_FUNC_(bin, charset) - Decodes the first argument using the second argument character set.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(encode('abc', 'utf-8'), 'utf-8');
+       abc
+  """)
+// scalastyle:on line.size.limit
 case class Decode(bin: Expression, charset: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
 
@@ -1231,8 +1371,15 @@ case class Decode(bin: Expression, charset: Expression)
  * (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
  * If either argument is null, the result will also be null.
  */
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(str, str) - Encode the first argument using the second argument character set.")
+  usage = "_FUNC_(str, charset) - Encodes the first argument using the second argument character set.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('abc', 'utf-8');
+       abc
+  """)
+// scalastyle:on line.size.limit
 case class Encode(value: Expression, charset: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
 
@@ -1263,10 +1410,16 @@ case class Encode(value: Expression, charset: Expression)
  * fractional part.
  */
 @ExpressionDescription(
-  usage = """_FUNC_(X, D) - Formats the number X like '#,###,###.##', rounded to D decimal places.
-    If D is 0, the result has no decimal point or fractional part.
-    This is supposed to function like MySQL's FORMAT.""",
-  extended = "> SELECT _FUNC_(12332.123456, 4);\n '12,332.1235'")
+  usage = """
+    _FUNC_(expr1, expr2) - Formats the number `expr1` like '#,###,###.##', rounded to `expr2`
+      decimal places. If `expr2` is 0, the result has no decimal point or fractional part.
+      This is supposed to function like MySQL's FORMAT.
+  """,
+  extended = """
+    Examples:
+      > SELECT _FUNC_(12332.123456, 4);
+       12,332.1235
+  """)
 case class FormatNumber(x: Expression, d: Expression)
   extends BinaryExpression with ExpectsInputTypes {
 
@@ -1388,8 +1541,12 @@ case class FormatNumber(x: Expression, d: Expression)
  * The 'lang' and 'country' arguments are optional, and if omitted, the default locale is used.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(str[, lang, country]) - Splits str into an array of array of words.",
-  extended = "> SELECT _FUNC_('Hi there! Good morning.');\n  [['Hi','there'], ['Good','morning']]")
+  usage = "_FUNC_(str[, lang, country]) - Splits `str` into an array of array of words.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('Hi there! Good morning.');
+       [["Hi","there"],["Good","morning"]]
+  """)
 case class Sentences(
     str: Expression,
     language: Expression = Literal(""),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index b47486f7af7f9..3cbbcdf4a96cc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -321,7 +321,7 @@ abstract class OffsetWindowFunction
   val input: Expression
 
   /**
-   * Default result value for the function when the 'offset'th row does not exist.
+   * Default result value for the function when the `offset`th row does not exist.
    */
   val default: Expression
 
@@ -372,22 +372,23 @@ abstract class OffsetWindowFunction
 }
 
 /**
- * The Lead function returns the value of 'x' at the 'offset'th row after the current row in
+ * The Lead function returns the value of `input` at the `offset`th row after the current row in
  * the window. Offsets start at 0, which is the current row. The offset must be constant
- * integer value. The default offset is 1. When the value of 'x' is null at the 'offset'th row,
- * null is returned. If there is no such offset row, the default expression is evaluated.
+ * integer value. The default offset is 1. When the value of `input` is null at the `offset`th row,
+ * null is returned. If there is no such offset row, the `default` expression is evaluated.
  *
- * @param input expression to evaluate 'offset' rows after the current row.
+ * @param input expression to evaluate `offset` rows after the current row.
  * @param offset rows to jump ahead in the partition.
  * @param default to use when the offset is larger than the window. The default value is null.
  */
-@ExpressionDescription(usage =
-  """_FUNC_(input, offset, default) - LEAD returns the value of 'x' at the 'offset'th row
-     after the current row in the window.
-     The default value of 'offset' is 1 and the default value of 'default' is null.
-     If the value of 'x' at the 'offset'th row is null, null is returned.
-     If there is no such offset row (e.g. when the offset is 1, the last row of the window
-     does not have any subsequent row), 'default' is returned.""")
+@ExpressionDescription(
+  usage = """
+    _FUNC_(input[, offset[, default]]) - Returns the value of `input` at the `offset`th row
+      after the current row in the window. The default value of `offset` is 1 and the default
+      value of `default` is null. If the value of `input` at the `offset`th row is null,
+      null is returned. If there is no such an offset row (e.g., when the offset is 1, the last
+      row of the window does not have any subsequent row), `default` is returned.
+  """)
 case class Lead(input: Expression, offset: Expression, default: Expression)
     extends OffsetWindowFunction {
 
@@ -401,22 +402,23 @@ case class Lead(input: Expression, offset: Expression, default: Expression)
 }
 
 /**
- * The Lag function returns the value of 'x' at the 'offset'th row before the current row in
+ * The Lag function returns the value of `input` at the `offset`th row before the current row in
  * the window. Offsets start at 0, which is the current row. The offset must be constant
- * integer value. The default offset is 1. When the value of 'x' is null at the 'offset'th row,
- * null is returned. If there is no such offset row, the default expression is evaluated.
+ * integer value. The default offset is 1. When the value of `input` is null at the `offset`th row,
+ * null is returned. If there is no such offset row, the `default` expression is evaluated.
  *
- * @param input expression to evaluate 'offset' rows before the current row.
+ * @param input expression to evaluate `offset` rows before the current row.
  * @param offset rows to jump back in the partition.
  * @param default to use when the offset row does not exist.
  */
-@ExpressionDescription(usage =
-  """_FUNC_(input, offset, default) - LAG returns the value of 'x' at the 'offset'th row
-     before the current row in the window.
-     The default value of 'offset' is 1 and the default value of 'default' is null.
-     If the value of 'x' at the 'offset'th row is null, null is returned.
-     If there is no such offset row (e.g. when the offset is 1, the first row of the window
-     does not have any previous row), 'default' is returned.""")
+@ExpressionDescription(
+  usage = """
+    _FUNC_(input[, offset[, default]]) - Returns the value of `input` at the `offset`th row
+      before the current row in the window. The default value of `offset` is 1 and the default
+      value of `default` is null. If the value of `input` at the `offset`th row is null,
+      null is returned. If there is no such offset row (e.g., when the offset is 1, the first
+      row of the window does not have any previous row), `default` is returned.
+  """)
 case class Lag(input: Expression, offset: Expression, default: Expression)
     extends OffsetWindowFunction {
 
@@ -471,26 +473,28 @@ object SizeBasedWindowFunction {
  *
  * This documentation has been based upon similar documentation for the Hive and Presto projects.
  */
-@ExpressionDescription(usage =
-  """_FUNC_() - The ROW_NUMBER() function assigns a unique, sequential number to
-     each row, starting with one, according to the ordering of rows within
-     the window partition.""")
+@ExpressionDescription(
+  usage = """
+    _FUNC_() - Assigns a unique, sequential number to each row, starting with one,
+      according to the ordering of rows within the window partition.
+  """)
 case class RowNumber() extends RowNumberLike {
   override val evaluateExpression = rowNumber
   override def prettyName: String = "row_number"
 }
 
 /**
- * The CumeDist function computes the position of a value relative to a all values in the partition.
+ * The CumeDist function computes the position of a value relative to all values in the partition.
  * The result is the number of rows preceding or equal to the current row in the ordering of the
  * partition divided by the total number of rows in the window partition. Any tie values in the
  * ordering will evaluate to the same position.
  *
  * This documentation has been based upon similar documentation for the Hive and Presto projects.
  */
-@ExpressionDescription(usage =
-  """_FUNC_() - The CUME_DIST() function computes the position of a value relative to
-     a all values in the partition.""")
+@ExpressionDescription(
+  usage = """
+    _FUNC_() - Computes the position of a value relative to all values in the partition.
+  """)
 case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction {
   override def dataType: DataType = DoubleType
   // The frame for CUME_DIST is Range based instead of Row based, because CUME_DIST must
@@ -501,8 +505,8 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction {
 }
 
 /**
- * The NTile function divides the rows for each window partition into 'n' buckets ranging from 1 to
- * at most 'n'. Bucket values will differ by at most 1. If the number of rows in the partition does
+ * The NTile function divides the rows for each window partition into `n` buckets ranging from 1 to
+ * at most `n`. Bucket values will differ by at most 1. If the number of rows in the partition does
  * not divide evenly into the number of buckets, then the remainder values are distributed one per
  * bucket, starting with the first bucket.
  *
@@ -521,9 +525,11 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction {
  *
  * @param buckets number of buckets to divide the rows in. Default value is 1.
  */
-@ExpressionDescription(usage =
-  """_FUNC_(x) - The NTILE(n) function divides the rows for each window partition
-     into 'n' buckets ranging from 1 to at most 'n'.""")
+@ExpressionDescription(
+  usage = """
+    _FUNC_(n) - Divides the rows for each window partition into `n` buckets ranging
+      from 1 to at most `n`.
+  """)
 case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindowFunction {
   def this() = this(Literal(1))
 
@@ -587,9 +593,9 @@ case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindow
 
 /**
  * A RankLike function is a WindowFunction that changes its value based on a change in the value of
- * the order of the window in which is processed. For instance, when the value of 'x' changes in a
- * window ordered by 'x' the rank function also changes. The size of the change of the rank function
- * is (typically) not dependent on the size of the change in 'x'.
+ * the order of the window in which is processed. For instance, when the value of `input` changes
+ * in a window ordered by `input` the rank function also changes. The size of the change of the
+ * rank function is (typically) not dependent on the size of the change in `input`.
  *
  * This documentation has been based upon similar documentation for the Hive and Presto projects.
  */
@@ -635,7 +641,7 @@ abstract class RankLike extends AggregateWindowFunction {
 
 /**
  * The Rank function computes the rank of a value in a group of values. The result is one plus the
- * number of rows preceding or equal to the current row in the ordering of the partition. Tie values
+ * number of rows preceding or equal to the current row in the ordering of the partition. The values
  * will produce gaps in the sequence.
  *
  * This documentation has been based upon similar documentation for the Hive and Presto projects.
@@ -644,10 +650,12 @@ abstract class RankLike extends AggregateWindowFunction {
  *                 change in rank. This is an internal parameter and will be assigned by the
  *                 Analyser.
  */
-@ExpressionDescription(usage =
-  """_FUNC_() -  RANK() computes the rank of a value in a group of values. The result
-     is one plus the number of rows preceding or equal to the current row in the
-     ordering of the partition. Tie values will produce gaps in the sequence.""")
+@ExpressionDescription(
+  usage = """
+    _FUNC_() - Computes the rank of a value in a group of values. The result is one plus the number
+      of rows preceding or equal to the current row in the ordering of the partition. The values
+      will produce gaps in the sequence.
+  """)
 case class Rank(children: Seq[Expression]) extends RankLike {
   def this() = this(Nil)
   override def withOrder(order: Seq[Expression]): Rank = Rank(order)
@@ -655,8 +663,8 @@ case class Rank(children: Seq[Expression]) extends RankLike {
 
 /**
  * The DenseRank function computes the rank of a value in a group of values. The result is one plus
- * the previously assigned rank value. Unlike Rank, DenseRank will not produce gaps in the ranking
- * sequence.
+ * the previously assigned rank value. Unlike [[Rank]], [[DenseRank]] will not produce gaps in the
+ * ranking sequence.
  *
  * This documentation has been based upon similar documentation for the Hive and Presto projects.
  *
@@ -664,10 +672,12 @@ case class Rank(children: Seq[Expression]) extends RankLike {
  *                 change in rank. This is an internal parameter and will be assigned by the
  *                 Analyser.
  */
-@ExpressionDescription(usage =
-  """_FUNC_() - The DENSE_RANK() function computes the rank of a value in a group of
-     values. The result is one plus the previously assigned rank value. Unlike Rank,
-     DenseRank will not produce gaps in the ranking sequence.""")
+@ExpressionDescription(
+  usage = """
+    _FUNC_() - Computes the rank of a value in a group of values. The result is one plus the
+      previously assigned rank value. Unlike the function rank, dense_rank will not produce gaps
+      in the ranking sequence.
+  """)
 case class DenseRank(children: Seq[Expression]) extends RankLike {
   def this() = this(Nil)
   override def withOrder(order: Seq[Expression]): DenseRank = DenseRank(order)
@@ -692,9 +702,10 @@ case class DenseRank(children: Seq[Expression]) extends RankLike {
  *                 change in rank. This is an internal parameter and will be assigned by the
  *                 Analyser.
  */
-@ExpressionDescription(usage =
-  """_FUNC_() - PERCENT_RANK() The PercentRank function computes the percentage
-     ranking of a value in a group of values.""")
+@ExpressionDescription(
+  usage = """
+    _FUNC_() - Computes the percentage ranking of a value in a group of values.
+  """)
 case class PercentRank(children: Seq[Expression]) extends RankLike with SizeBasedWindowFunction {
   def this() = this(Nil)
   override def withOrder(order: Seq[Expression]): PercentRank = PercentRank(order)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
index 47f039e6a4cc4..aa328045cafdb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
@@ -55,9 +55,15 @@ abstract class XPathExtract extends BinaryExpression with ExpectsInputTypes with
   def path: Expression
 }
 
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(xml, xpath) - Evaluates a boolean xpath expression.",
-  extended = "> SELECT _FUNC_('<a><b>1</b></a>','a/b');\ntrue")
+  usage = "_FUNC_(xml, xpath) - Returns true if the XPath expression evaluates to true, or if a matching node is found.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('<a><b>1</b></a>','a/b');
+       true
+  """)
+// scalastyle:on line.size.limit
 case class XPathBoolean(xml: Expression, path: Expression) extends XPathExtract {
 
   override def prettyName: String = "xpath_boolean"
@@ -68,11 +74,17 @@ case class XPathBoolean(xml: Expression, path: Expression) extends XPathExtract
   }
 }
 
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(xml, xpath) - Returns a short value that matches the xpath expression",
-  extended = "> SELECT _FUNC_('<a><b>1</b><b>2</b></a>','sum(a/b)');\n3")
+  usage = "_FUNC_(xml, xpath) - Returns a short integer value, or the value zero if no match is found, or a match is found but the value is non-numeric.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('<a><b>1</b><b>2</b></a>', 'sum(a/b)');
+       3
+  """)
+// scalastyle:on line.size.limit
 case class XPathShort(xml: Expression, path: Expression) extends XPathExtract {
-  override def prettyName: String = "xpath_int"
+  override def prettyName: String = "xpath_short"
   override def dataType: DataType = ShortType
 
   override def nullSafeEval(xml: Any, path: Any): Any = {
@@ -81,9 +93,15 @@ case class XPathShort(xml: Expression, path: Expression) extends XPathExtract {
   }
 }
 
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(xml, xpath) - Returns an integer value that matches the xpath expression",
-  extended = "> SELECT _FUNC_('<a><b>1</b><b>2</b></a>','sum(a/b)');\n3")
+  usage = "_FUNC_(xml, xpath) - Returns an integer value, or the value zero if no match is found, or a match is found but the value is non-numeric.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('<a><b>1</b><b>2</b></a>', 'sum(a/b)');
+       3
+  """)
+// scalastyle:on line.size.limit
 case class XPathInt(xml: Expression, path: Expression) extends XPathExtract {
   override def prettyName: String = "xpath_int"
   override def dataType: DataType = IntegerType
@@ -94,9 +112,15 @@ case class XPathInt(xml: Expression, path: Expression) extends XPathExtract {
   }
 }
 
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(xml, xpath) - Returns a long value that matches the xpath expression",
-  extended = "> SELECT _FUNC_('<a><b>1</b><b>2</b></a>','sum(a/b)');\n3")
+  usage = "_FUNC_(xml, xpath) - Returns a long integer value, or the value zero if no match is found, or a match is found but the value is non-numeric.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('<a><b>1</b><b>2</b></a>', 'sum(a/b)');
+       3
+  """)
+// scalastyle:on line.size.limit
 case class XPathLong(xml: Expression, path: Expression) extends XPathExtract {
   override def prettyName: String = "xpath_long"
   override def dataType: DataType = LongType
@@ -107,9 +131,15 @@ case class XPathLong(xml: Expression, path: Expression) extends XPathExtract {
   }
 }
 
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(xml, xpath) - Returns a float value that matches the xpath expression",
-  extended = "> SELECT _FUNC_('<a><b>1</b><b>2</b></a>','sum(a/b)');\n3.0")
+  usage = "_FUNC_(xml, xpath) - Returns a float value, the value zero if no match is found, or NaN if a match is found but the value is non-numeric.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('<a><b>1</b><b>2</b></a>', 'sum(a/b)');
+       3.0
+  """)
+// scalastyle:on line.size.limit
 case class XPathFloat(xml: Expression, path: Expression) extends XPathExtract {
   override def prettyName: String = "xpath_float"
   override def dataType: DataType = FloatType
@@ -120,9 +150,15 @@ case class XPathFloat(xml: Expression, path: Expression) extends XPathExtract {
   }
 }
 
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(xml, xpath) - Returns a double value that matches the xpath expression",
-  extended = "> SELECT _FUNC_('<a><b>1</b><b>2</b></a>','sum(a/b)');\n3.0")
+  usage = "_FUNC_(xml, xpath) - Returns a double value, the value zero if no match is found, or NaN if a match is found but the value is non-numeric.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('<a><b>1</b><b>2</b></a>', 'sum(a/b)');
+       3.0
+  """)
+// scalastyle:on line.size.limit
 case class XPathDouble(xml: Expression, path: Expression) extends XPathExtract {
   override def prettyName: String = "xpath_float"
   override def dataType: DataType = DoubleType
@@ -135,8 +171,12 @@ case class XPathDouble(xml: Expression, path: Expression) extends XPathExtract {
 
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(xml, xpath) - Returns the text contents of the first xml node that matches the xpath expression",
-  extended = "> SELECT _FUNC_('<a><b>b</b><c>cc</c></a>','a/c');\ncc")
+  usage = "_FUNC_(xml, xpath) - Returns the text contents of the first xml node that matches the XPath expression.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('<a><b>b</b><c>cc</c></a>','a/c');
+       cc
+  """)
 // scalastyle:on line.size.limit
 case class XPathString(xml: Expression, path: Expression) extends XPathExtract {
   override def prettyName: String = "xpath_string"
@@ -150,8 +190,12 @@ case class XPathString(xml: Expression, path: Expression) extends XPathExtract {
 
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(xml, xpath) - Returns a string array of values within xml nodes that match the xpath expression",
-  extended = "> SELECT _FUNC_('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>','a/b/text()');\n['b1','b2','b3']")
+  usage = "_FUNC_(xml, xpath) - Returns a string array of values within the nodes of xml that match the XPath expression.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>','a/b/text()');
+       ['b1','b2','b3']
+  """)
 // scalastyle:on line.size.limit
 case class XPathList(xml: Expression, path: Expression) extends XPathExtract {
   override def prettyName: String = "xpath"
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
index 24d825f5cb33a..ea5398761c46d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
@@ -103,18 +103,22 @@ case class DescribeFunctionCommand(
     functionName.funcName.toLowerCase match {
       case "<>" =>
         Row(s"Function: $functionName") ::
-          Row(s"Usage: a <> b - Returns TRUE if a is not equal to b") :: Nil
+          Row("Usage: expr1 <> expr2 - " +
+            "Returns true if `expr1` is not equal to `expr2`.") :: Nil
       case "!=" =>
         Row(s"Function: $functionName") ::
-          Row(s"Usage: a != b - Returns TRUE if a is not equal to b") :: Nil
+          Row("Usage: expr1 != expr2 - " +
+            "Returns true if `expr1` is not equal to `expr2`.") :: Nil
       case "between" =>
-        Row(s"Function: between") ::
-          Row(s"Usage: a [NOT] BETWEEN b AND c - " +
-            s"evaluate if a is [not] in between b and c") :: Nil
+        Row("Function: between") ::
+          Row("Usage: expr1 [NOT] BETWEEN expr2 AND expr3 - " +
+            "evaluate if `expr1` is [not] in between `expr2` and `expr3`.") :: Nil
       case "case" =>
-        Row(s"Function: case") ::
-          Row(s"Usage: CASE a WHEN b THEN c [WHEN d THEN e]* [ELSE f] END - " +
-            s"When a = b, returns c; when a = d, return e; else return f") :: Nil
+        Row("Function: case") ::
+          Row("Usage: CASE expr1 WHEN expr2 THEN expr3 " +
+            "[WHEN expr4 THEN expr5]* [ELSE expr6] END - " +
+            "When `expr1` = `expr2`, returns `expr3`; " +
+            "when `expr1` = `expr4`, return `expr5`; else return `expr6`.") :: Nil
       case _ =>
         try {
           val info = sparkSession.sessionState.catalog.lookupFunctionInfo(functionName)
@@ -126,7 +130,7 @@ case class DescribeFunctionCommand(
 
           if (isExtended) {
             result :+
-              Row(s"Extended Usage:\n${replaceFunctionName(info.getExtended, name)}")
+              Row(s"Extended Usage:${replaceFunctionName(info.getExtended, info.getName)}")
           } else {
             result
           }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 9a3d93cf17b78..6b517bc70f7d2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -85,15 +85,16 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
     checkKeywordsExist(sql("describe function extended upper"),
       "Function: upper",
       "Class: org.apache.spark.sql.catalyst.expressions.Upper",
-      "Usage: upper(str) - Returns str with all characters changed to uppercase",
+      "Usage: upper(str) - Returns `str` with all characters changed to uppercase",
       "Extended Usage:",
+      "Examples:",
       "> SELECT upper('SparkSql');",
-      "'SPARKSQL'")
+      "SPARKSQL")
 
     checkKeywordsExist(sql("describe functioN Upper"),
       "Function: upper",
       "Class: org.apache.spark.sql.catalyst.expressions.Upper",
-      "Usage: upper(str) - Returns str with all characters changed to uppercase")
+      "Usage: upper(str) - Returns `str` with all characters changed to uppercase")
 
     checkKeywordsNotExist(sql("describe functioN Upper"), "Extended Usage")
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index bde3c8a42e1c0..22d4c929bf565 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -1445,34 +1445,34 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
       sql("DESCRIBE FUNCTION log"),
       Row("Class: org.apache.spark.sql.catalyst.expressions.Logarithm") ::
         Row("Function: log") ::
-        Row("Usage: log(b, x) - Returns the logarithm of x with base b.") :: Nil
+        Row("Usage: log(base, expr) - Returns the logarithm of `expr` with `base`.") :: Nil
     )
     // predicate operator
     checkAnswer(
       sql("DESCRIBE FUNCTION or"),
       Row("Class: org.apache.spark.sql.catalyst.expressions.Or") ::
         Row("Function: or") ::
-        Row("Usage: a or b - Logical OR.") :: Nil
+        Row("Usage: expr1 or expr2 - Logical OR.") :: Nil
     )
     checkAnswer(
       sql("DESCRIBE FUNCTION !"),
       Row("Class: org.apache.spark.sql.catalyst.expressions.Not") ::
         Row("Function: !") ::
-        Row("Usage: ! a - Logical not") :: Nil
+        Row("Usage: ! expr - Logical not.") :: Nil
     )
     // arithmetic operators
     checkAnswer(
       sql("DESCRIBE FUNCTION +"),
       Row("Class: org.apache.spark.sql.catalyst.expressions.Add") ::
         Row("Function: +") ::
-        Row("Usage: a + b - Returns a+b.") :: Nil
+        Row("Usage: expr1 + expr2 - Returns `expr1`+`expr2`.") :: Nil
     )
     // comparison operators
     checkAnswer(
       sql("DESCRIBE FUNCTION <"),
       Row("Class: org.apache.spark.sql.catalyst.expressions.LessThan") ::
         Row("Function: <") ::
-        Row("Usage: a < b - Returns TRUE if a is less than b.") :: Nil
+        Row("Usage: expr1 < expr2 - Returns true if `expr1` is less than `expr2`.") :: Nil
     )
     // STRING
     checkAnswer(
@@ -1480,15 +1480,21 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
       Row("Class: org.apache.spark.sql.catalyst.expressions.Concat") ::
         Row("Function: concat") ::
         Row("Usage: concat(str1, str2, ..., strN) " +
-          "- Returns the concatenation of str1, str2, ..., strN") :: Nil
+          "- Returns the concatenation of `str1`, `str2`, ..., `strN`.") :: Nil
     )
     // extended mode
     checkAnswer(
       sql("DESCRIBE FUNCTION EXTENDED ^"),
       Row("Class: org.apache.spark.sql.catalyst.expressions.BitwiseXor") ::
-        Row("Extended Usage:\n> SELECT 3 ^ 5; 2") ::
+        Row(
+          """Extended Usage:
+            |    Examples:
+            |      > SELECT 3 ^ 5;
+            |       2
+            |  """.stripMargin) ::
         Row("Function: ^") ::
-        Row("Usage: a ^ b - Bitwise exclusive OR.") :: Nil
+        Row("Usage: expr1 ^ expr2 - Returns the result of " +
+          "bitwise exclusive OR of `expr1` and `expr2`.") :: Nil
     )
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 3a597d6afb153..ad70835d06d92 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -271,15 +271,16 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     checkKeywordsExist(sql("describe function extended upper"),
       "Function: upper",
       "Class: org.apache.spark.sql.catalyst.expressions.Upper",
-      "Usage: upper(str) - Returns str with all characters changed to uppercase",
+      "Usage: upper(str) - Returns `str` with all characters changed to uppercase",
       "Extended Usage:",
-      "> SELECT upper('SparkSql')",
-      "'SPARKSQL'")
+      "Examples:",
+      "> SELECT upper('SparkSql');",
+      "SPARKSQL")
 
     checkKeywordsExist(sql("describe functioN Upper"),
       "Function: upper",
       "Class: org.apache.spark.sql.catalyst.expressions.Upper",
-      "Usage: upper(str) - Returns str with all characters changed to uppercase")
+      "Usage: upper(str) - Returns `str` with all characters changed to uppercase")
 
     checkKeywordsNotExist(sql("describe functioN Upper"),
       "Extended Usage")
@@ -290,25 +291,28 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     checkKeywordsExist(sql("describe functioN  `~`"),
       "Function: ~",
       "Class: org.apache.spark.sql.catalyst.expressions.BitwiseNot",
-      "Usage: ~ b - Bitwise NOT.")
+      "Usage: ~ expr - Returns the result of bitwise NOT of `expr`.")
 
     // Hard coded describe functions
     checkKeywordsExist(sql("describe function  `<>`"),
       "Function: <>",
-      "Usage: a <> b - Returns TRUE if a is not equal to b")
+      "Usage: expr1 <> expr2 - Returns true if `expr1` is not equal to `expr2`")
 
     checkKeywordsExist(sql("describe function  `!=`"),
       "Function: !=",
-      "Usage: a != b - Returns TRUE if a is not equal to b")
+      "Usage: expr1 != expr2 - Returns true if `expr1` is not equal to `expr2`")
 
     checkKeywordsExist(sql("describe function  `between`"),
       "Function: between",
-      "Usage: a [NOT] BETWEEN b AND c - evaluate if a is [not] in between b and c")
+      "Usage: expr1 [NOT] BETWEEN expr2 AND expr3 - " +
+        "evaluate if `expr1` is [not] in between `expr2` and `expr3`")
 
     checkKeywordsExist(sql("describe function  `case`"),
       "Function: case",
-      "Usage: CASE a WHEN b THEN c [WHEN d THEN e]* [ELSE f] END - " +
-        "When a = b, returns c; when a = d, return e; else return f")
+      "Usage: CASE expr1 WHEN expr2 THEN expr3 " +
+        "[WHEN expr4 THEN expr5]* [ELSE expr6] END - " +
+        "When `expr1` = `expr2`, returns `expr3`; " +
+        "when `expr1` = `expr4`, return `expr5`; else return `expr6`")
   }
 
   test("describe functions - user defined functions") {

From 2cf39d63833ea0bf2a4c66c259409ee7808fdab6 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Wed, 2 Nov 2016 21:01:03 -0700
Subject: [PATCH 0017/1204] [SPARK-18175][SQL] Improve the test case coverage
 of implicit type casting

### What changes were proposed in this pull request?

So far, we have limited test case coverage about implicit type casting. We need to draw a matrix to find all the possible casting pairs.
- Reorged the existing test cases
- Added all the possible type casting pairs
- Drawed a matrix to show the implicit type casting. The table is very wide. Maybe hard to review. Thus, you also can access the same table via the link to [a google sheet](https://docs.google.com/spreadsheets/d/19PS4ikrs-Yye_mfu-rmIKYGnNe-NmOTt5DDT1fOD3pI/edit?usp=sharing).

SourceType\CastToType | ByteType | ShortType | IntegerType | LongType | DoubleType | FloatType | Dec(10, 2) | BinaryType | BooleanType | StringType | DateType | TimestampType | ArrayType | MapType | StructType | NullType | CalendarIntervalType | DecimalType | NumericType | IntegralType
------------ | ------------ | ------------ | ------------ | ------------ | ------------ | ------------ | ------------ | ------------ | ------------ | ------------ | ------------ | ------------ | ------------ | ------------ | ------------ | ------------ | ------------ | ------------ | ------------ |  -----------
**ByteType** | ByteType | ShortType | IntegerType | LongType | DoubleType | FloatType | Dec(10, 2) | X    | X    | StringType | X    | X    | X    | X    | X    | X    | X    | DecimalType(3, 0) | ByteType | ByteType
**ShortType** | ByteType | ShortType | IntegerType | LongType | DoubleType | FloatType | Dec(10, 2) | X    | X    | StringType | X    | X    | X    | X    | X    | X    | X    | DecimalType(5, 0) | ShortType | ShortType
**IntegerType** | ByteType | ShortType | IntegerType | LongType | DoubleType | FloatType | Dec(10, 2) | X    | X    | StringType | X    | X    | X    | X    | X    | X    | X    | DecimalType(10, 0) | IntegerType | IntegerType
**LongType** | ByteType | ShortType | IntegerType | LongType | DoubleType | FloatType | Dec(10, 2) | X    | X    | StringType | X    | X    | X    | X    | X    | X    | X    | DecimalType(20, 0) | LongType | LongType
**DoubleType** | ByteType | ShortType | IntegerType | LongType | DoubleType | FloatType | Dec(10, 2) | X    | X    | StringType | X    | X    | X    | X    | X    | X    | X    | DecimalType(30, 15) | DoubleType | IntegerType
**FloatType** | ByteType | ShortType | IntegerType | LongType | DoubleType | FloatType | Dec(10, 2) | X    | X    | StringType | X    | X    | X    | X    | X    | X    | X    | DecimalType(14, 7) | FloatType | IntegerType
**Dec(10, 2)** | ByteType | ShortType | IntegerType | LongType | DoubleType | FloatType | Dec(10, 2) | X    | X    | StringType | X    | X    | X    | X    | X    | X    | X    | DecimalType(10, 2) | Dec(10, 2) | IntegerType
**BinaryType** | X    | X    | X    | X    | X    | X    | X    | BinaryType | X    | StringType | X    | X    | X    | X    | X    | X    | X    | X    | X    | X
**BooleanType** | X    | X    | X    | X    | X    | X    | X    | X    | BooleanType | StringType | X    | X    | X    | X    | X    | X    | X    | X    | X    | X
**StringType** | ByteType | ShortType | IntegerType | LongType | DoubleType | FloatType | Dec(10, 2) | BinaryType | X    | StringType | DateType | TimestampType | X    | X    | X    | X    | X    | DecimalType(38, 18) | DoubleType | X
**DateType** | X    | X    | X    | X    | X    | X    | X    | X    | X    | StringType | DateType | TimestampType | X    | X    | X    | X    | X    | X    | X    | X
**TimestampType** | X    | X    | X    | X    | X    | X    | X    | X    | X    | StringType | DateType | TimestampType | X    | X    | X    | X    | X    | X    | X    | X
**ArrayType** | X    | X    | X    | X    | X    | X    | X    | X    | X    | X    | X    | X    | ArrayType* | X    | X    | X    | X    | X    | X    | X
**MapType** | X    | X    | X    | X    | X    | X    | X    | X    | X    | X    | X    | X    | X    | MapType* | X    | X    | X    | X    | X    | X
**StructType** | X    | X    | X    | X    | X    | X    | X    | X    | X    | X    | X    | X    | X    | X    | StructType* | X    | X    | X    | X    | X
**NullType** | ByteType | ShortType | IntegerType | LongType | DoubleType | FloatType | Dec(10, 2) | BinaryType | BooleanType | StringType | DateType | TimestampType | ArrayType | MapType | StructType | NullType | CalendarIntervalType | DecimalType(38, 18) | DoubleType | IntegerType
**CalendarIntervalType** | X    | X    | X    | X    | X    | X    | X    | X    | X    | X    | X    | X    | X    | X    | X    | X    | CalendarIntervalType | X    | X    | X
Note: ArrayType\*, MapType\*, StructType\* are castable only when the internal child types also match; otherwise, not castable
### How was this patch tested?
N/A

Author: gatorsmile <gatorsmile@gmail.com>

Closes #15691 from gatorsmile/implicitTypeCasting.

(cherry picked from commit 9ddec8636c4f5e8c4592aefecec9886b409ced8f)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../catalyst/analysis/TypeCoercionSuite.scala | 255 ++++++++++++++----
 1 file changed, 199 insertions(+), 56 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
index 6f69613f85315..590c9d5e8474b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
@@ -30,37 +30,211 @@ import org.apache.spark.unsafe.types.CalendarInterval
 
 class TypeCoercionSuite extends PlanTest {
 
-  test("eligible implicit type cast") {
-    def shouldCast(from: DataType, to: AbstractDataType, expected: DataType): Unit = {
-      val got = TypeCoercion.ImplicitTypeCasts.implicitCast(Literal.create(null, from), to)
-      assert(got.map(_.dataType) == Option(expected),
-        s"Failed to cast $from to $to")
+  // scalastyle:off line.size.limit
+  // The following table shows all implicit data type conversions that are not visible to the user.
+  // +----------------------+----------+-----------+-------------+----------+------------+-----------+------------+------------+-------------+------------+----------+---------------+------------+----------+-------------+----------+----------------------+---------------------+-------------+--------------+
+  // | Source Type\CAST TO  | ByteType | ShortType | IntegerType | LongType | DoubleType | FloatType | Dec(10, 2) | BinaryType | BooleanType | StringType | DateType | TimestampType | ArrayType  | MapType  | StructType  | NullType | CalendarIntervalType |     DecimalType     | NumericType | IntegralType |
+  // +----------------------+----------+-----------+-------------+----------+------------+-----------+------------+------------+-------------+------------+----------+---------------+------------+----------+-------------+----------+----------------------+---------------------+-------------+--------------+
+  // | ByteType             | ByteType | ShortType | IntegerType | LongType | DoubleType | FloatType | Dec(10, 2) | X          | X           | StringType | X        | X             | X          | X        | X           | X        | X                    | DecimalType(3, 0)   | ByteType    | ByteType     |
+  // | ShortType            | ByteType | ShortType | IntegerType | LongType | DoubleType | FloatType | Dec(10, 2) | X          | X           | StringType | X        | X             | X          | X        | X           | X        | X                    | DecimalType(5, 0)   | ShortType   | ShortType    |
+  // | IntegerType          | ByteType | ShortType | IntegerType | LongType | DoubleType | FloatType | Dec(10, 2) | X          | X           | StringType | X        | X             | X          | X        | X           | X        | X                    | DecimalType(10, 0)  | IntegerType | IntegerType  |
+  // | LongType             | ByteType | ShortType | IntegerType | LongType | DoubleType | FloatType | Dec(10, 2) | X          | X           | StringType | X        | X             | X          | X        | X           | X        | X                    | DecimalType(20, 0)  | LongType    | LongType     |
+  // | DoubleType           | ByteType | ShortType | IntegerType | LongType | DoubleType | FloatType | Dec(10, 2) | X          | X           | StringType | X        | X             | X          | X        | X           | X        | X                    | DecimalType(30, 15) | DoubleType  | IntegerType  |
+  // | FloatType            | ByteType | ShortType | IntegerType | LongType | DoubleType | FloatType | Dec(10, 2) | X          | X           | StringType | X        | X             | X          | X        | X           | X        | X                    | DecimalType(14, 7)  | FloatType   | IntegerType  |
+  // | Dec(10, 2)           | ByteType | ShortType | IntegerType | LongType | DoubleType | FloatType | Dec(10, 2) | X          | X           | StringType | X        | X             | X          | X        | X           | X        | X                    | DecimalType(10, 2)  | Dec(10, 2)  | IntegerType  |
+  // | BinaryType           | X        | X         | X           | X        | X          | X         | X          | BinaryType | X           | StringType | X        | X             | X          | X        | X           | X        | X                    | X                   | X           | X            |
+  // | BooleanType          | X        | X         | X           | X        | X          | X         | X          | X          | BooleanType | StringType | X        | X             | X          | X        | X           | X        | X                    | X                   | X           | X            |
+  // | StringType           | ByteType | ShortType | IntegerType | LongType | DoubleType | FloatType | Dec(10, 2) | BinaryType | X           | StringType | DateType | TimestampType | X          | X        | X           | X        | X                    | DecimalType(38, 18) | DoubleType  | X            |
+  // | DateType             | X        | X         | X           | X        | X          | X         | X          | X          | X           | StringType | DateType | TimestampType | X          | X        | X           | X        | X                    | X                   | X           | X            |
+  // | TimestampType        | X        | X         | X           | X        | X          | X         | X          | X          | X           | StringType | DateType | TimestampType | X          | X        | X           | X        | X                    | X                   | X           | X            |
+  // | ArrayType            | X        | X         | X           | X        | X          | X         | X          | X          | X           | X          | X        | X             | ArrayType* | X        | X           | X        | X                    | X                   | X           | X            |
+  // | MapType              | X        | X         | X           | X        | X          | X         | X          | X          | X           | X          | X        | X             | X          | MapType* | X           | X        | X                    | X                   | X           | X            |
+  // | StructType           | X        | X         | X           | X        | X          | X         | X          | X          | X           | X          | X        | X             | X          | X        | StructType* | X        | X                    | X                   | X           | X            |
+  // | NullType             | ByteType | ShortType | IntegerType | LongType | DoubleType | FloatType | Dec(10, 2) | BinaryType | BooleanType | StringType | DateType | TimestampType | ArrayType  | MapType  | StructType  | NullType | CalendarIntervalType | DecimalType(38, 18) | DoubleType  | IntegerType  |
+  // | CalendarIntervalType | X        | X         | X           | X        | X          | X         | X          | X          | X           | X          | X        | X             | X          | X        | X           | X        | CalendarIntervalType | X                   | X           | X            |
+  // +----------------------+----------+-----------+-------------+----------+------------+-----------+------------+------------+-------------+------------+----------+---------------+------------+----------+-------------+----------+----------------------+---------------------+-------------+--------------+
+  // Note: ArrayType*, MapType*, StructType* are castable only when the internal child types also match; otherwise, not castable
+  // scalastyle:on line.size.limit
+
+  private def shouldCast(from: DataType, to: AbstractDataType, expected: DataType): Unit = {
+    val got = TypeCoercion.ImplicitTypeCasts.implicitCast(Literal.create(null, from), to)
+    assert(got.map(_.dataType) == Option(expected),
+      s"Failed to cast $from to $to")
+  }
+
+  private def shouldNotCast(from: DataType, to: AbstractDataType): Unit = {
+    val got = TypeCoercion.ImplicitTypeCasts.implicitCast(Literal.create(null, from), to)
+    assert(got.isEmpty, s"Should not be able to cast $from to $to, but got $got")
+  }
+
+  val integralTypes: Seq[DataType] =
+    Seq(ByteType, ShortType, IntegerType, LongType)
+  val fractionalTypes: Seq[DataType] =
+    Seq(DoubleType, FloatType, DecimalType.SYSTEM_DEFAULT, DecimalType(10, 2))
+  val numericTypes: Seq[DataType] = integralTypes ++ fractionalTypes
+  val atomicTypes: Seq[DataType] =
+    numericTypes ++ Seq(BinaryType, BooleanType, StringType, DateType, TimestampType)
+  val complexTypes: Seq[DataType] =
+    Seq(ArrayType(IntegerType),
+      ArrayType(StringType),
+      MapType(StringType, StringType),
+      new StructType().add("a1", StringType),
+      new StructType().add("a1", StringType).add("a2", IntegerType))
+  val allTypes: Seq[DataType] =
+    atomicTypes ++ complexTypes ++ Seq(NullType, CalendarIntervalType)
+
+  // Check whether the type `checkedType` can be cast to all the types in `castableTypes`,
+  // but cannot be cast to the other types in `allTypes`.
+  private def checkTypeCasting(checkedType: DataType, castableTypes: Seq[DataType]): Unit = {
+    val nonCastableTypes = allTypes.filterNot(castableTypes.contains)
+
+    castableTypes.foreach { tpe =>
+      shouldCast(checkedType, tpe, tpe)
+    }
+    nonCastableTypes.foreach { tpe =>
+      shouldNotCast(checkedType, tpe)
     }
+  }
+
+  test("implicit type cast - ByteType") {
+    val checkedType = ByteType
+    checkTypeCasting(checkedType, castableTypes = numericTypes ++ Seq(StringType))
+    shouldCast(checkedType, DecimalType, DecimalType.ByteDecimal)
+    shouldCast(checkedType, NumericType, checkedType)
+    shouldCast(checkedType, IntegralType, checkedType)
+  }
+
+  test("implicit type cast - ShortType") {
+    val checkedType = ShortType
+    checkTypeCasting(checkedType, castableTypes = numericTypes ++ Seq(StringType))
+    shouldCast(checkedType, DecimalType, DecimalType.ShortDecimal)
+    shouldCast(checkedType, NumericType, checkedType)
+    shouldCast(checkedType, IntegralType, checkedType)
+  }
+
+  test("implicit type cast - IntegerType") {
+    val checkedType = IntegerType
+    checkTypeCasting(checkedType, castableTypes = numericTypes ++ Seq(StringType))
+    shouldCast(IntegerType, DecimalType, DecimalType.IntDecimal)
+    shouldCast(checkedType, NumericType, checkedType)
+    shouldCast(checkedType, IntegralType, checkedType)
+  }
 
-    shouldCast(NullType, NullType, NullType)
-    shouldCast(NullType, IntegerType, IntegerType)
-    shouldCast(NullType, DecimalType, DecimalType.SYSTEM_DEFAULT)
+  test("implicit type cast - LongType") {
+    val checkedType = LongType
+    checkTypeCasting(checkedType, castableTypes = numericTypes ++ Seq(StringType))
+    shouldCast(checkedType, DecimalType, DecimalType.LongDecimal)
+    shouldCast(checkedType, NumericType, checkedType)
+    shouldCast(checkedType, IntegralType, checkedType)
+  }
 
-    shouldCast(ByteType, IntegerType, IntegerType)
-    shouldCast(IntegerType, IntegerType, IntegerType)
-    shouldCast(IntegerType, LongType, LongType)
-    shouldCast(IntegerType, DecimalType, DecimalType(10, 0))
-    shouldCast(LongType, IntegerType, IntegerType)
-    shouldCast(LongType, DecimalType, DecimalType(20, 0))
+  test("implicit type cast - FloatType") {
+    val checkedType = FloatType
+    checkTypeCasting(checkedType, castableTypes = numericTypes ++ Seq(StringType))
+    shouldCast(checkedType, DecimalType, DecimalType.FloatDecimal)
+    shouldCast(checkedType, NumericType, checkedType)
+    shouldNotCast(checkedType, IntegralType)
+  }
 
-    shouldCast(DateType, TimestampType, TimestampType)
-    shouldCast(TimestampType, DateType, DateType)
+  test("implicit type cast - DoubleType") {
+    val checkedType = DoubleType
+    checkTypeCasting(checkedType, castableTypes = numericTypes ++ Seq(StringType))
+    shouldCast(checkedType, DecimalType, DecimalType.DoubleDecimal)
+    shouldCast(checkedType, NumericType, checkedType)
+    shouldNotCast(checkedType, IntegralType)
+  }
 
-    shouldCast(StringType, IntegerType, IntegerType)
-    shouldCast(StringType, DateType, DateType)
-    shouldCast(StringType, TimestampType, TimestampType)
-    shouldCast(IntegerType, StringType, StringType)
-    shouldCast(DateType, StringType, StringType)
-    shouldCast(TimestampType, StringType, StringType)
+  test("implicit type cast - DecimalType(10, 2)") {
+    val checkedType = DecimalType(10, 2)
+    checkTypeCasting(checkedType, castableTypes = numericTypes ++ Seq(StringType))
+    shouldCast(checkedType, DecimalType, checkedType)
+    shouldCast(checkedType, NumericType, checkedType)
+    shouldNotCast(checkedType, IntegralType)
+  }
 
-    shouldCast(StringType, BinaryType, BinaryType)
-    shouldCast(BinaryType, StringType, StringType)
+  test("implicit type cast - BinaryType") {
+    val checkedType = BinaryType
+    checkTypeCasting(checkedType, castableTypes = Seq(checkedType, StringType))
+    shouldNotCast(checkedType, DecimalType)
+    shouldNotCast(checkedType, NumericType)
+    shouldNotCast(checkedType, IntegralType)
+  }
 
+  test("implicit type cast - BooleanType") {
+    val checkedType = BooleanType
+    checkTypeCasting(checkedType, castableTypes = Seq(checkedType, StringType))
+    shouldNotCast(checkedType, DecimalType)
+    shouldNotCast(checkedType, NumericType)
+    shouldNotCast(checkedType, IntegralType)
+  }
+
+  test("implicit type cast - StringType") {
+    val checkedType = StringType
+    val nonCastableTypes =
+      complexTypes ++ Seq(BooleanType, NullType, CalendarIntervalType)
+    checkTypeCasting(checkedType, castableTypes = allTypes.filterNot(nonCastableTypes.contains))
+    shouldCast(checkedType, DecimalType, DecimalType.SYSTEM_DEFAULT)
+    shouldCast(checkedType, NumericType, NumericType.defaultConcreteType)
+    shouldNotCast(checkedType, IntegralType)
+  }
+
+  test("implicit type cast - DateType") {
+    val checkedType = DateType
+    checkTypeCasting(checkedType, castableTypes = Seq(checkedType, StringType, TimestampType))
+    shouldNotCast(checkedType, DecimalType)
+    shouldNotCast(checkedType, NumericType)
+    shouldNotCast(checkedType, IntegralType)
+  }
+
+  test("implicit type cast - TimestampType") {
+    val checkedType = TimestampType
+    checkTypeCasting(checkedType, castableTypes = Seq(checkedType, StringType, DateType))
+    shouldNotCast(checkedType, DecimalType)
+    shouldNotCast(checkedType, NumericType)
+    shouldNotCast(checkedType, IntegralType)
+  }
+
+  test("implicit type cast - ArrayType(StringType)") {
+    val checkedType = ArrayType(StringType)
+    checkTypeCasting(checkedType, castableTypes = Seq(checkedType))
+    shouldNotCast(checkedType, DecimalType)
+    shouldNotCast(checkedType, NumericType)
+    shouldNotCast(checkedType, IntegralType)
+  }
+
+  test("implicit type cast - MapType(StringType, StringType)") {
+    val checkedType = MapType(StringType, StringType)
+    checkTypeCasting(checkedType, castableTypes = Seq(checkedType))
+    shouldNotCast(checkedType, DecimalType)
+    shouldNotCast(checkedType, NumericType)
+    shouldNotCast(checkedType, IntegralType)
+  }
+
+  test("implicit type cast - StructType().add(\"a1\", StringType)") {
+    val checkedType = new StructType().add("a1", StringType)
+    checkTypeCasting(checkedType, castableTypes = Seq(checkedType))
+    shouldNotCast(checkedType, DecimalType)
+    shouldNotCast(checkedType, NumericType)
+    shouldNotCast(checkedType, IntegralType)
+  }
+
+  test("implicit type cast - NullType") {
+    val checkedType = NullType
+    checkTypeCasting(checkedType, castableTypes = allTypes)
+    shouldCast(checkedType, DecimalType, DecimalType.SYSTEM_DEFAULT)
+    shouldCast(checkedType, NumericType, NumericType.defaultConcreteType)
+    shouldCast(checkedType, IntegralType, IntegralType.defaultConcreteType)
+  }
+
+  test("implicit type cast - CalendarIntervalType") {
+    val checkedType = CalendarIntervalType
+    checkTypeCasting(checkedType, castableTypes = Seq(checkedType))
+    shouldNotCast(checkedType, DecimalType)
+    shouldNotCast(checkedType, NumericType)
+    shouldNotCast(checkedType, IntegralType)
+  }
+
+  test("eligible implicit type cast - TypeCollection") {
     shouldCast(NullType, TypeCollection(StringType, BinaryType), StringType)
 
     shouldCast(StringType, TypeCollection(StringType, BinaryType), StringType)
@@ -81,15 +255,8 @@ class TypeCoercionSuite extends PlanTest {
     shouldCast(DecimalType(10, 2), TypeCollection(DecimalType, IntegerType), DecimalType(10, 2))
     shouldCast(IntegerType, TypeCollection(DecimalType(10, 2), StringType), DecimalType(10, 2))
 
-    shouldCast(StringType, NumericType, DoubleType)
     shouldCast(StringType, TypeCollection(NumericType, BinaryType), DoubleType)
 
-    // NumericType should not be changed when function accepts any of them.
-    Seq(ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType,
-      DecimalType.SYSTEM_DEFAULT, DecimalType(10, 2)).foreach { tpe =>
-      shouldCast(tpe, NumericType, tpe)
-    }
-
     shouldCast(
       ArrayType(StringType, false),
       TypeCollection(ArrayType(StringType), StringType),
@@ -101,32 +268,8 @@ class TypeCoercionSuite extends PlanTest {
       ArrayType(StringType, true))
   }
 
-  test("ineligible implicit type cast") {
-    def shouldNotCast(from: DataType, to: AbstractDataType): Unit = {
-      val got = TypeCoercion.ImplicitTypeCasts.implicitCast(Literal.create(null, from), to)
-      assert(got.isEmpty, s"Should not be able to cast $from to $to, but got $got")
-    }
-
-    shouldNotCast(IntegerType, DateType)
-    shouldNotCast(IntegerType, TimestampType)
-    shouldNotCast(LongType, DateType)
-    shouldNotCast(LongType, TimestampType)
-    shouldNotCast(DecimalType.SYSTEM_DEFAULT, DateType)
-    shouldNotCast(DecimalType.SYSTEM_DEFAULT, TimestampType)
-
+  test("ineligible implicit type cast - TypeCollection") {
     shouldNotCast(IntegerType, TypeCollection(DateType, TimestampType))
-
-    shouldNotCast(IntegerType, ArrayType)
-    shouldNotCast(IntegerType, MapType)
-    shouldNotCast(IntegerType, StructType)
-
-    shouldNotCast(CalendarIntervalType, StringType)
-
-    // Don't implicitly cast complex types to string.
-    shouldNotCast(ArrayType(StringType), StringType)
-    shouldNotCast(MapType(StringType, StringType), StringType)
-    shouldNotCast(new StructType().add("a1", StringType), StringType)
-    shouldNotCast(MapType(StringType, StringType), StringType)
   }
 
   test("tightest common bound for types") {

From 965c964c2657aaf575f0e00ce6b74a8f05172c06 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Wed, 2 Nov 2016 23:50:50 -0700
Subject: [PATCH 0018/1204] [SPARK-18200][GRAPHX] Support zero as an initial
 capacity in OpenHashSet

## What changes were proposed in this pull request?

[SPARK-18200](https://issues.apache.org/jira/browse/SPARK-18200) reports Apache Spark 2.x raises `java.lang.IllegalArgumentException: requirement failed: Invalid initial capacity` while running `triangleCount`. The root cause is that `VertexSet`, a type alias of `OpenHashSet`, does not allow zero as a initial size. This PR loosens the restriction to allow zero.

## How was this patch tested?

Pass the Jenkins test with a new test case in `OpenHashSetSuite`.

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #15741 from dongjoon-hyun/SPARK-18200.

(cherry picked from commit d24e736471f34ef8f2c12766393379c4213fe96e)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../org/apache/spark/util/collection/OpenHashSet.scala | 10 +++++++---
 .../spark/util/collection/OpenHashMapSuite.scala       |  3 ---
 .../spark/util/collection/OpenHashSetSuite.scala       |  5 +++++
 .../util/collection/PrimitiveKeyOpenHashMapSuite.scala |  3 ---
 4 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala b/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala
index 0f6a425e3db9a..7a1be8515d965 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala
@@ -48,7 +48,7 @@ class OpenHashSet[@specialized(Long, Int) T: ClassTag](
 
   require(initialCapacity <= OpenHashSet.MAX_CAPACITY,
     s"Can't make capacity bigger than ${OpenHashSet.MAX_CAPACITY} elements")
-  require(initialCapacity >= 1, "Invalid initial capacity")
+  require(initialCapacity >= 0, "Invalid initial capacity")
   require(loadFactor < 1.0, "Load factor must be less than 1.0")
   require(loadFactor > 0.0, "Load factor must be greater than 0.0")
 
@@ -271,8 +271,12 @@ class OpenHashSet[@specialized(Long, Int) T: ClassTag](
   private def hashcode(h: Int): Int = Hashing.murmur3_32().hashInt(h).asInt()
 
   private def nextPowerOf2(n: Int): Int = {
-    val highBit = Integer.highestOneBit(n)
-    if (highBit == n) n else highBit << 1
+    if (n == 0) {
+      2
+    } else {
+      val highBit = Integer.highestOneBit(n)
+      if (highBit == n) n else highBit << 1
+    }
   }
 }
 
diff --git a/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
index 3066e9996abda..335ecb9320ab9 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
@@ -49,9 +49,6 @@ class OpenHashMapSuite extends SparkFunSuite with Matchers {
     intercept[IllegalArgumentException] {
       new OpenHashMap[String, Int](-1)
     }
-    intercept[IllegalArgumentException] {
-      new OpenHashMap[String, String](0)
-    }
   }
 
   test("primitive value") {
diff --git a/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala
index 2607a543dd614..210bc5c099742 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala
@@ -176,4 +176,9 @@ class OpenHashSetSuite extends SparkFunSuite with Matchers {
     assert(set.size === 1000)
     assert(set.capacity > 1000)
   }
+
+  test("SPARK-18200 Support zero as an initial set size") {
+    val set = new OpenHashSet[Long](0)
+    assert(set.size === 0)
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala
index 508e737b725bc..f5ee428020fd4 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala
@@ -49,9 +49,6 @@ class PrimitiveKeyOpenHashMapSuite extends SparkFunSuite with Matchers {
     intercept[IllegalArgumentException] {
       new PrimitiveKeyOpenHashMap[Int, Int](-1)
     }
-    intercept[IllegalArgumentException] {
-      new PrimitiveKeyOpenHashMap[Int, Int](0)
-    }
   }
 
   test("basic operations") {

From c4c5328f2ab2ddb2137e575865ced93c6bc624b1 Mon Sep 17 00:00:00 2001
From: Daoyuan Wang <daoyuan.wang@intel.com>
Date: Thu, 3 Nov 2016 00:18:03 -0700
Subject: [PATCH 0019/1204] [SPARK-17122][SQL] support drop current database

## What changes were proposed in this pull request?

In Spark 1.6 and earlier, we can drop the database we are using. In Spark 2.0, native implementation prevent us from dropping current database, which may break some old queries. This PR would re-enable the feature.
## How was this patch tested?

one new unit test in `SessionCatalogSuite`.

Author: Daoyuan Wang <daoyuan.wang@intel.com>

Closes #15011 from adrian-wang/dropcurrent.

(cherry picked from commit 96cc1b5675273c276e04c4dc19ef9033a314292d)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../sql/catalyst/catalog/SessionCatalog.scala     |  2 --
 .../catalyst/catalog/SessionCatalogSuite.scala    | 15 +++++++++++++++
 .../spark/sql/execution/command/DDLSuite.scala    |  9 +++++----
 3 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 714ef825ab831..2d2120dda8bde 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -160,8 +160,6 @@ class SessionCatalog(
     val dbName = formatDatabaseName(db)
     if (dbName == DEFAULT_DATABASE) {
       throw new AnalysisException(s"Can not drop default database")
-    } else if (dbName == getCurrentDatabase) {
-      throw new AnalysisException(s"Can not drop current database `$dbName`")
     }
     externalCatalog.dropDatabase(dbName, ignoreIfNotExists, cascade)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index 187611bc77460..b77fef225a0c8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -127,6 +127,21 @@ class SessionCatalogSuite extends SparkFunSuite {
     catalog.dropDatabase("db_that_does_not_exist", ignoreIfNotExists = true, cascade = false)
   }
 
+  test("drop current database and drop default database") {
+    val catalog = new SessionCatalog(newBasicCatalog())
+    catalog.setCurrentDatabase("db1")
+    assert(catalog.getCurrentDatabase == "db1")
+    catalog.dropDatabase("db1", ignoreIfNotExists = false, cascade = true)
+    intercept[NoSuchDatabaseException] {
+      catalog.createTable(newTable("tbl1", "db1"), ignoreIfExists = false)
+    }
+    catalog.setCurrentDatabase("default")
+    assert(catalog.getCurrentDatabase == "default")
+    intercept[AnalysisException] {
+      catalog.dropDatabase("default", ignoreIfNotExists = false, cascade = true)
+    }
+  }
+
   test("alter database") {
     val catalog = new SessionCatalog(newBasicCatalog())
     val db1 = catalog.getDatabaseMetadata("db1")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 22d4c929bf565..d4d001497deb2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -1599,10 +1599,11 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
   test("drop current database") {
     sql("CREATE DATABASE temp")
     sql("USE temp")
-    val m = intercept[AnalysisException] {
-      sql("DROP DATABASE temp")
-    }.getMessage
-    assert(m.contains("Can not drop current database `temp`"))
+    sql("DROP DATABASE temp")
+    val e = intercept[AnalysisException] {
+        sql("CREATE TABLE t (a INT, b INT)")
+      }.getMessage
+    assert(e.contains("Database 'temp' not found"))
   }
 
   test("drop default database") {

From bc7f05f5f03653c623190b8178bcbe981a41c2f3 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Thu, 3 Nov 2016 02:42:48 -0700
Subject: [PATCH 0020/1204] [SPARK-18219] Move commit protocol API (internal)
 from sql/core to core module

## What changes were proposed in this pull request?
This patch moves the new commit protocol API from sql/core to core module, so we can use it in the future in the RDD API.

As part of this patch, I also moved the speficiation of the random uuid for the write path out of the commit protocol, and instead pass in a job id.

## How was this patch tested?
N/A

Author: Reynold Xin <rxin@databricks.com>

Closes #15731 from rxin/SPARK-18219.

(cherry picked from commit 937af592e65f4dd878aafcabf8fe2cfe7fa3d9b3)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../internal/io/FileCommitProtocol.scala      | 126 +++++++++
 .../io/HadoopMapReduceCommitProtocol.scala    | 111 ++++++++
 .../datasources/FileCommitProtocol.scala      | 257 ------------------
 .../datasources/FileFormatWriter.scala        |   3 +-
 .../InsertIntoHadoopFsRelationCommand.scala   |   6 +-
 .../SQLHadoopMapReduceCommitProtocol.scala    |  72 +++++
 .../execution/streaming/FileStreamSink.scala  |   9 +-
 .../ManifestFileCommitProtocol.scala          |   6 +-
 .../apache/spark/sql/internal/SQLConf.scala   |   4 +-
 9 files changed, 327 insertions(+), 267 deletions(-)
 create mode 100644 core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala
 create mode 100644 core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
 delete mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileCommitProtocol.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SQLHadoopMapReduceCommitProtocol.scala

diff --git a/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala
new file mode 100644
index 0000000000000..fb8020585cf89
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.internal.io
+
+import org.apache.hadoop.mapreduce._
+
+import org.apache.spark.util.Utils
+
+
+/**
+ * An interface to define how a single Spark job commits its outputs. Two notes:
+ *
+ * 1. Implementations must be serializable, as the committer instance instantiated on the driver
+ *    will be used for tasks on executors.
+ * 2. Implementations should have a constructor with either 2 or 3 arguments:
+ *    (jobId: String, path: String) or (jobId: String, path: String, isAppend: Boolean).
+ * 3. A committer should not be reused across multiple Spark jobs.
+ *
+ * The proper call sequence is:
+ *
+ * 1. Driver calls setupJob.
+ * 2. As part of each task's execution, executor calls setupTask and then commitTask
+ *    (or abortTask if task failed).
+ * 3. When all necessary tasks completed successfully, the driver calls commitJob. If the job
+ *    failed to execute (e.g. too many failed tasks), the job should call abortJob.
+ */
+abstract class FileCommitProtocol {
+  import FileCommitProtocol._
+
+  /**
+   * Setups up a job. Must be called on the driver before any other methods can be invoked.
+   */
+  def setupJob(jobContext: JobContext): Unit
+
+  /**
+   * Commits a job after the writes succeed. Must be called on the driver.
+   */
+  def commitJob(jobContext: JobContext, taskCommits: Seq[TaskCommitMessage]): Unit
+
+  /**
+   * Aborts a job after the writes fail. Must be called on the driver.
+   *
+   * Calling this function is a best-effort attempt, because it is possible that the driver
+   * just crashes (or killed) before it can call abort.
+   */
+  def abortJob(jobContext: JobContext): Unit
+
+  /**
+   * Sets up a task within a job.
+   * Must be called before any other task related methods can be invoked.
+   */
+  def setupTask(taskContext: TaskAttemptContext): Unit
+
+  /**
+   * Notifies the commit protocol to add a new file, and gets back the full path that should be
+   * used. Must be called on the executors when running tasks.
+   *
+   * Note that the returned temp file may have an arbitrary path. The commit protocol only
+   * promises that the file will be at the location specified by the arguments after job commit.
+   *
+   * A full file path consists of the following parts:
+   *  1. the base path
+   *  2. some sub-directory within the base path, used to specify partitioning
+   *  3. file prefix, usually some unique job id with the task id
+   *  4. bucket id
+   *  5. source specific file extension, e.g. ".snappy.parquet"
+   *
+   * The "dir" parameter specifies 2, and "ext" parameter specifies both 4 and 5, and the rest
+   * are left to the commit protocol implementation to decide.
+   */
+  def newTaskTempFile(taskContext: TaskAttemptContext, dir: Option[String], ext: String): String
+
+  /**
+   * Commits a task after the writes succeed. Must be called on the executors when running tasks.
+   */
+  def commitTask(taskContext: TaskAttemptContext): TaskCommitMessage
+
+  /**
+   * Aborts a task after the writes have failed. Must be called on the executors when running tasks.
+   *
+   * Calling this function is a best-effort attempt, because it is possible that the executor
+   * just crashes (or killed) before it can call abort.
+   */
+  def abortTask(taskContext: TaskAttemptContext): Unit
+}
+
+
+object FileCommitProtocol {
+  class TaskCommitMessage(val obj: Any) extends Serializable
+
+  object EmptyTaskCommitMessage extends TaskCommitMessage(null)
+
+  /**
+   * Instantiates a FileCommitProtocol using the given className.
+   */
+  def instantiate(className: String, jobId: String, outputPath: String, isAppend: Boolean)
+    : FileCommitProtocol = {
+    val clazz = Utils.classForName(className).asInstanceOf[Class[FileCommitProtocol]]
+
+    // First try the one with argument (jobId: String, outputPath: String, isAppend: Boolean).
+    // If that doesn't exist, try the one with (jobId: string, outputPath: String).
+    try {
+      val ctor = clazz.getDeclaredConstructor(classOf[String], classOf[String], classOf[Boolean])
+      ctor.newInstance(jobId, outputPath, isAppend.asInstanceOf[java.lang.Boolean])
+    } catch {
+      case _: NoSuchMethodException =>
+        val ctor = clazz.getDeclaredConstructor(classOf[String], classOf[String])
+        ctor.newInstance(jobId, outputPath)
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
new file mode 100644
index 0000000000000..66ccb6d437708
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.internal.io
+
+import java.util.Date
+
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.mapreduce._
+import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
+import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
+
+import org.apache.spark.SparkHadoopWriter
+import org.apache.spark.internal.Logging
+import org.apache.spark.mapred.SparkHadoopMapRedUtil
+
+/**
+ * An [[FileCommitProtocol]] implementation backed by an underlying Hadoop OutputCommitter
+ * (from the newer mapreduce API, not the old mapred API).
+ *
+ * Unlike Hadoop's OutputCommitter, this implementation is serializable.
+ */
+class HadoopMapReduceCommitProtocol(jobId: String, path: String)
+  extends FileCommitProtocol with Serializable with Logging {
+
+  import FileCommitProtocol._
+
+  /** OutputCommitter from Hadoop is not serializable so marking it transient. */
+  @transient private var committer: OutputCommitter = _
+
+  protected def setupCommitter(context: TaskAttemptContext): OutputCommitter = {
+    context.getOutputFormatClass.newInstance().getOutputCommitter(context)
+  }
+
+  override def newTaskTempFile(
+      taskContext: TaskAttemptContext, dir: Option[String], ext: String): String = {
+    // The file name looks like part-r-00000-2dd664f9-d2c4-4ffe-878f-c6c70c1fb0cb_00003.gz.parquet
+    // Note that %05d does not truncate the split number, so if we have more than 100000 tasks,
+    // the file name is fine and won't overflow.
+    val split = taskContext.getTaskAttemptID.getTaskID.getId
+    val filename = f"part-$split%05d-$jobId$ext"
+
+    val stagingDir: String = committer match {
+      // For FileOutputCommitter it has its own staging path called "work path".
+      case f: FileOutputCommitter => Option(f.getWorkPath.toString).getOrElse(path)
+      case _ => path
+    }
+
+    dir.map { d =>
+      new Path(new Path(stagingDir, d), filename).toString
+    }.getOrElse {
+      new Path(stagingDir, filename).toString
+    }
+  }
+
+  override def setupJob(jobContext: JobContext): Unit = {
+    // Setup IDs
+    val jobId = SparkHadoopWriter.createJobID(new Date, 0)
+    val taskId = new TaskID(jobId, TaskType.MAP, 0)
+    val taskAttemptId = new TaskAttemptID(taskId, 0)
+
+    // Set up the configuration object
+    jobContext.getConfiguration.set("mapred.job.id", jobId.toString)
+    jobContext.getConfiguration.set("mapred.tip.id", taskAttemptId.getTaskID.toString)
+    jobContext.getConfiguration.set("mapred.task.id", taskAttemptId.toString)
+    jobContext.getConfiguration.setBoolean("mapred.task.is.map", true)
+    jobContext.getConfiguration.setInt("mapred.task.partition", 0)
+
+    val taskAttemptContext = new TaskAttemptContextImpl(jobContext.getConfiguration, taskAttemptId)
+    committer = setupCommitter(taskAttemptContext)
+    committer.setupJob(jobContext)
+  }
+
+  override def commitJob(jobContext: JobContext, taskCommits: Seq[TaskCommitMessage]): Unit = {
+    committer.commitJob(jobContext)
+  }
+
+  override def abortJob(jobContext: JobContext): Unit = {
+    committer.abortJob(jobContext, JobStatus.State.FAILED)
+  }
+
+  override def setupTask(taskContext: TaskAttemptContext): Unit = {
+    committer = setupCommitter(taskContext)
+    committer.setupTask(taskContext)
+  }
+
+  override def commitTask(taskContext: TaskAttemptContext): TaskCommitMessage = {
+    val attemptId = taskContext.getTaskAttemptID
+    SparkHadoopMapRedUtil.commitTask(
+      committer, taskContext, attemptId.getJobID.getId, attemptId.getTaskID.getId)
+    EmptyTaskCommitMessage
+  }
+
+  override def abortTask(taskContext: TaskAttemptContext): Unit = {
+    committer.abortTask(taskContext)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileCommitProtocol.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileCommitProtocol.scala
deleted file mode 100644
index f5dd5ce22919d..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileCommitProtocol.scala
+++ /dev/null
@@ -1,257 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.datasources
-
-import java.util.{Date, UUID}
-
-import org.apache.hadoop.fs.Path
-import org.apache.hadoop.mapreduce._
-import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
-import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
-
-import org.apache.spark.SparkHadoopWriter
-import org.apache.spark.internal.Logging
-import org.apache.spark.mapred.SparkHadoopMapRedUtil
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.util.Utils
-
-
-object FileCommitProtocol {
-  class TaskCommitMessage(val obj: Any) extends Serializable
-
-  object EmptyTaskCommitMessage extends TaskCommitMessage(null)
-
-  /**
-   * Instantiates a FileCommitProtocol using the given className.
-   */
-  def instantiate(className: String, outputPath: String, isAppend: Boolean): FileCommitProtocol = {
-    try {
-      val clazz = Utils.classForName(className).asInstanceOf[Class[FileCommitProtocol]]
-
-      // First try the one with argument (outputPath: String, isAppend: Boolean).
-      // If that doesn't exist, try the one with (outputPath: String).
-      try {
-        val ctor = clazz.getDeclaredConstructor(classOf[String], classOf[Boolean])
-        ctor.newInstance(outputPath, isAppend.asInstanceOf[java.lang.Boolean])
-      } catch {
-        case _: NoSuchMethodException =>
-          val ctor = clazz.getDeclaredConstructor(classOf[String])
-          ctor.newInstance(outputPath)
-      }
-    } catch {
-      case e: ClassNotFoundException =>
-        throw e
-    }
-  }
-}
-
-
-/**
- * An interface to define how a single Spark job commits its outputs. Two notes:
- *
- * 1. Implementations must be serializable, as the committer instance instantiated on the driver
- *    will be used for tasks on executors.
- * 2. A committer should not be reused across multiple Spark jobs.
- *
- * The proper call sequence is:
- *
- * 1. Driver calls setupJob.
- * 2. As part of each task's execution, executor calls setupTask and then commitTask
- *    (or abortTask if task failed).
- * 3. When all necessary tasks completed successfully, the driver calls commitJob. If the job
- *    failed to execute (e.g. too many failed tasks), the job should call abortJob.
- */
-abstract class FileCommitProtocol {
-  import FileCommitProtocol._
-
-  /**
-   * Setups up a job. Must be called on the driver before any other methods can be invoked.
-   */
-  def setupJob(jobContext: JobContext): Unit
-
-  /**
-   * Commits a job after the writes succeed. Must be called on the driver.
-   */
-  def commitJob(jobContext: JobContext, taskCommits: Seq[TaskCommitMessage]): Unit
-
-  /**
-   * Aborts a job after the writes fail. Must be called on the driver.
-   *
-   * Calling this function is a best-effort attempt, because it is possible that the driver
-   * just crashes (or killed) before it can call abort.
-   */
-  def abortJob(jobContext: JobContext): Unit
-
-  /**
-   * Sets up a task within a job.
-   * Must be called before any other task related methods can be invoked.
-   */
-  def setupTask(taskContext: TaskAttemptContext): Unit
-
-  /**
-   * Notifies the commit protocol to add a new file, and gets back the full path that should be
-   * used. Must be called on the executors when running tasks.
-   *
-   * Note that the returned temp file may have an arbitrary path. The commit protocol only
-   * promises that the file will be at the location specified by the arguments after job commit.
-   *
-   * A full file path consists of the following parts:
-   *  1. the base path
-   *  2. some sub-directory within the base path, used to specify partitioning
-   *  3. file prefix, usually some unique job id with the task id
-   *  4. bucket id
-   *  5. source specific file extension, e.g. ".snappy.parquet"
-   *
-   * The "dir" parameter specifies 2, and "ext" parameter specifies both 4 and 5, and the rest
-   * are left to the commit protocol implementation to decide.
-   */
-  def newTaskTempFile(taskContext: TaskAttemptContext, dir: Option[String], ext: String): String
-
-  /**
-   * Commits a task after the writes succeed. Must be called on the executors when running tasks.
-   */
-  def commitTask(taskContext: TaskAttemptContext): TaskCommitMessage
-
-  /**
-   * Aborts a task after the writes have failed. Must be called on the executors when running tasks.
-   *
-   * Calling this function is a best-effort attempt, because it is possible that the executor
-   * just crashes (or killed) before it can call abort.
-   */
-  def abortTask(taskContext: TaskAttemptContext): Unit
-}
-
-
-/**
- * An [[FileCommitProtocol]] implementation backed by an underlying Hadoop OutputCommitter
- * (from the newer mapreduce API, not the old mapred API).
- *
- * Unlike Hadoop's OutputCommitter, this implementation is serializable.
- */
-class HadoopCommitProtocolWrapper(path: String, isAppend: Boolean)
-  extends FileCommitProtocol with Serializable with Logging {
-
-  import FileCommitProtocol._
-
-  /** OutputCommitter from Hadoop is not serializable so marking it transient. */
-  @transient private var committer: OutputCommitter = _
-
-  /** UUID used to identify the job in file name. */
-  private val uuid: String = UUID.randomUUID().toString
-
-  private def setupCommitter(context: TaskAttemptContext): Unit = {
-    committer = context.getOutputFormatClass.newInstance().getOutputCommitter(context)
-
-    if (!isAppend) {
-      // If we are appending data to an existing dir, we will only use the output committer
-      // associated with the file output format since it is not safe to use a custom
-      // committer for appending. For example, in S3, direct parquet output committer may
-      // leave partial data in the destination dir when the appending job fails.
-      // See SPARK-8578 for more details.
-      val configuration = context.getConfiguration
-      val clazz =
-        configuration.getClass(SQLConf.OUTPUT_COMMITTER_CLASS.key, null, classOf[OutputCommitter])
-
-      if (clazz != null) {
-        logInfo(s"Using user defined output committer class ${clazz.getCanonicalName}")
-
-        // Every output format based on org.apache.hadoop.mapreduce.lib.output.OutputFormat
-        // has an associated output committer. To override this output committer,
-        // we will first try to use the output committer set in SQLConf.OUTPUT_COMMITTER_CLASS.
-        // If a data source needs to override the output committer, it needs to set the
-        // output committer in prepareForWrite method.
-        if (classOf[FileOutputCommitter].isAssignableFrom(clazz)) {
-          // The specified output committer is a FileOutputCommitter.
-          // So, we will use the FileOutputCommitter-specified constructor.
-          val ctor = clazz.getDeclaredConstructor(classOf[Path], classOf[TaskAttemptContext])
-          committer = ctor.newInstance(new Path(path), context)
-        } else {
-          // The specified output committer is just an OutputCommitter.
-          // So, we will use the no-argument constructor.
-          val ctor = clazz.getDeclaredConstructor()
-          committer = ctor.newInstance()
-        }
-      }
-    }
-    logInfo(s"Using output committer class ${committer.getClass.getCanonicalName}")
-  }
-
-  override def newTaskTempFile(
-      taskContext: TaskAttemptContext, dir: Option[String], ext: String): String = {
-    // The file name looks like part-r-00000-2dd664f9-d2c4-4ffe-878f-c6c70c1fb0cb_00003.gz.parquet
-    // Note that %05d does not truncate the split number, so if we have more than 100000 tasks,
-    // the file name is fine and won't overflow.
-    val split = taskContext.getTaskAttemptID.getTaskID.getId
-    val filename = f"part-$split%05d-$uuid$ext"
-
-    val stagingDir: String = committer match {
-      // For FileOutputCommitter it has its own staging path called "work path".
-      case f: FileOutputCommitter => Option(f.getWorkPath.toString).getOrElse(path)
-      case _ => path
-    }
-
-    dir.map { d =>
-      new Path(new Path(stagingDir, d), filename).toString
-    }.getOrElse {
-      new Path(stagingDir, filename).toString
-    }
-  }
-
-  override def setupJob(jobContext: JobContext): Unit = {
-    // Setup IDs
-    val jobId = SparkHadoopWriter.createJobID(new Date, 0)
-    val taskId = new TaskID(jobId, TaskType.MAP, 0)
-    val taskAttemptId = new TaskAttemptID(taskId, 0)
-
-    // Set up the configuration object
-    jobContext.getConfiguration.set("mapred.job.id", jobId.toString)
-    jobContext.getConfiguration.set("mapred.tip.id", taskAttemptId.getTaskID.toString)
-    jobContext.getConfiguration.set("mapred.task.id", taskAttemptId.toString)
-    jobContext.getConfiguration.setBoolean("mapred.task.is.map", true)
-    jobContext.getConfiguration.setInt("mapred.task.partition", 0)
-
-    val taskAttemptContext = new TaskAttemptContextImpl(jobContext.getConfiguration, taskAttemptId)
-    setupCommitter(taskAttemptContext)
-
-    committer.setupJob(jobContext)
-  }
-
-  override def commitJob(jobContext: JobContext, taskCommits: Seq[TaskCommitMessage]): Unit = {
-    committer.commitJob(jobContext)
-  }
-
-  override def abortJob(jobContext: JobContext): Unit = {
-    committer.abortJob(jobContext, JobStatus.State.FAILED)
-  }
-
-  override def setupTask(taskContext: TaskAttemptContext): Unit = {
-    setupCommitter(taskContext)
-    committer.setupTask(taskContext)
-  }
-
-  override def commitTask(taskContext: TaskAttemptContext): TaskCommitMessage = {
-    val attemptId = taskContext.getTaskAttemptID
-    SparkHadoopMapRedUtil.commitTask(
-      committer, taskContext, attemptId.getJobID.getId, attemptId.getTaskID.getId)
-    EmptyTaskCommitMessage
-  }
-
-  override def abortTask(taskContext: TaskAttemptContext): Unit = {
-    committer.abortTask(taskContext)
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
index bc00a0a749c09..e404dcd5452b9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
@@ -29,6 +29,8 @@ import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
 
 import org.apache.spark._
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.io.FileCommitProtocol
+import org.apache.spark.internal.io.FileCommitProtocol.TaskCommitMessage
 import org.apache.spark.sql.{Dataset, SparkSession}
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
@@ -37,7 +39,6 @@ import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.{SQLExecution, UnsafeKVExternalSorter}
-import org.apache.spark.sql.execution.datasources.FileCommitProtocol.TaskCommitMessage
 import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
 import org.apache.spark.util.{SerializableConfiguration, Utils}
 import org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
index 230c74a47ba2a..927c0c5b95a17 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
@@ -21,6 +21,7 @@ import java.io.IOException
 
 import org.apache.hadoop.fs.Path
 
+import org.apache.spark.internal.io.FileCommitProtocol
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
@@ -86,8 +87,9 @@ case class InsertIntoHadoopFsRelationCommand(
     if (doInsertion) {
       val committer = FileCommitProtocol.instantiate(
         sparkSession.sessionState.conf.fileCommitProtocolClass,
-        outputPath.toString,
-        isAppend)
+        jobId = java.util.UUID.randomUUID().toString,
+        outputPath = outputPath.toString,
+        isAppend = isAppend)
 
       FileFormatWriter.write(
         sparkSession = sparkSession,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SQLHadoopMapReduceCommitProtocol.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SQLHadoopMapReduceCommitProtocol.scala
new file mode 100644
index 0000000000000..9b9ed28412cac
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SQLHadoopMapReduceCommitProtocol.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.mapreduce.{OutputCommitter, TaskAttemptContext}
+import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.io.HadoopMapReduceCommitProtocol
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * A variant of [[HadoopMapReduceCommitProtocol]] that allows specifying the actual
+ * Hadoop output committer using an option specified in SQLConf.
+ */
+class SQLHadoopMapReduceCommitProtocol(jobId: String, path: String, isAppend: Boolean)
+  extends HadoopMapReduceCommitProtocol(jobId, path) with Serializable with Logging {
+
+  override protected def setupCommitter(context: TaskAttemptContext): OutputCommitter = {
+    var committer = context.getOutputFormatClass.newInstance().getOutputCommitter(context)
+
+    if (!isAppend) {
+      // If we are appending data to an existing dir, we will only use the output committer
+      // associated with the file output format since it is not safe to use a custom
+      // committer for appending. For example, in S3, direct parquet output committer may
+      // leave partial data in the destination dir when the appending job fails.
+      // See SPARK-8578 for more details.
+      val configuration = context.getConfiguration
+      val clazz =
+        configuration.getClass(SQLConf.OUTPUT_COMMITTER_CLASS.key, null, classOf[OutputCommitter])
+
+      if (clazz != null) {
+        logInfo(s"Using user defined output committer class ${clazz.getCanonicalName}")
+
+        // Every output format based on org.apache.hadoop.mapreduce.lib.output.OutputFormat
+        // has an associated output committer. To override this output committer,
+        // we will first try to use the output committer set in SQLConf.OUTPUT_COMMITTER_CLASS.
+        // If a data source needs to override the output committer, it needs to set the
+        // output committer in prepareForWrite method.
+        if (classOf[FileOutputCommitter].isAssignableFrom(clazz)) {
+          // The specified output committer is a FileOutputCommitter.
+          // So, we will use the FileOutputCommitter-specified constructor.
+          val ctor = clazz.getDeclaredConstructor(classOf[Path], classOf[TaskAttemptContext])
+          committer = ctor.newInstance(new Path(path), context)
+        } else {
+          // The specified output committer is just an OutputCommitter.
+          // So, we will use the no-argument constructor.
+          val ctor = clazz.getDeclaredConstructor()
+          committer = ctor.newInstance()
+        }
+      }
+    }
+    logInfo(s"Using output committer class ${committer.getClass.getCanonicalName}")
+    committer
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
index daec2b5450971..e849cafef4184 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
@@ -20,9 +20,10 @@ package org.apache.spark.sql.execution.streaming
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.io.FileCommitProtocol
 import org.apache.spark.sql.{DataFrame, SparkSession}
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.execution.datasources.{FileCommitProtocol, FileFormat, FileFormatWriter}
+import org.apache.spark.sql.execution.datasources.{FileFormat, FileFormatWriter}
 
 object FileStreamSink {
   // The name of the subdirectory that is used to store metadata about which files are valid.
@@ -54,7 +55,11 @@ class FileStreamSink(
       logInfo(s"Skipping already committed batch $batchId")
     } else {
       val committer = FileCommitProtocol.instantiate(
-        sparkSession.sessionState.conf.streamingFileCommitProtocolClass, path, isAppend = false)
+        className = sparkSession.sessionState.conf.streamingFileCommitProtocolClass,
+        jobId = batchId.toString,
+        outputPath = path,
+        isAppend = false)
+
       committer match {
         case manifestCommitter: ManifestFileCommitProtocol =>
           manifestCommitter.setupManifestOptions(fileLog, batchId)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ManifestFileCommitProtocol.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ManifestFileCommitProtocol.scala
index 510312267a98d..1fe13fa1623fc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ManifestFileCommitProtocol.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ManifestFileCommitProtocol.scala
@@ -25,8 +25,8 @@ import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapreduce.{JobContext, TaskAttemptContext}
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.execution.datasources.FileCommitProtocol
-import org.apache.spark.sql.execution.datasources.FileCommitProtocol.TaskCommitMessage
+import org.apache.spark.internal.io.FileCommitProtocol
+import org.apache.spark.internal.io.FileCommitProtocol.TaskCommitMessage
 
 /**
  * A [[FileCommitProtocol]] that tracks the list of valid files in a manifest file, used in
@@ -34,7 +34,7 @@ import org.apache.spark.sql.execution.datasources.FileCommitProtocol.TaskCommitM
  *
  * @param path path to write the final output to.
  */
-class ManifestFileCommitProtocol(path: String)
+class ManifestFileCommitProtocol(jobId: String, path: String)
   extends FileCommitProtocol with Serializable with Logging {
 
   // Track the list of files added by a task, only used on the executors.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 7bb3ac02fa5d0..7b8ed65054c3c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -30,7 +30,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.network.util.ByteUnit
 import org.apache.spark.sql.catalyst.CatalystConf
-import org.apache.spark.sql.execution.datasources.HadoopCommitProtocolWrapper
+import org.apache.spark.sql.execution.datasources.SQLHadoopMapReduceCommitProtocol
 import org.apache.spark.sql.execution.streaming.ManifestFileCommitProtocol
 import org.apache.spark.util.Utils
 
@@ -385,7 +385,7 @@ object SQLConf {
     SQLConfigBuilder("spark.sql.sources.commitProtocolClass")
       .internal()
       .stringConf
-      .createWithDefault(classOf[HadoopCommitProtocolWrapper].getName)
+      .createWithDefault(classOf[SQLHadoopMapReduceCommitProtocol].getName)
 
   val PARALLEL_PARTITION_DISCOVERY_THRESHOLD =
     SQLConfigBuilder("spark.sql.sources.parallelPartitionDiscovery.threshold")

From 71104c9c97a648c94e6619279ad49752c01c89c3 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Thu, 3 Nov 2016 02:45:54 -0700
Subject: [PATCH 0021/1204] [SQL] minor - internal doc improvement for
 InsertIntoTable.

## What changes were proposed in this pull request?
I was reading this part of the code and was really confused by the "partition" parameter. This patch adds some documentation for it to reduce confusion in the future.

I also looked around other logical plans but most of them are either already documented, or pretty self-evident to people that know Spark SQL.

## How was this patch tested?
N/A - doc change only.

Author: Reynold Xin <rxin@databricks.com>

Closes #15749 from rxin/doc-improvement.

(cherry picked from commit 0ea5d5b24c1f7b29efeac0e72d271aba279523f7)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../plans/logical/basicLogicalOperators.scala | 16 ++++++++++
 .../hive/execution/InsertIntoHiveTable.scala  | 31 ++++++++++++++++---
 2 files changed, 42 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 7a15c2285d584..65ceab2ce27b1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -360,6 +360,22 @@ case class OverwriteOptions(
   }
 }
 
+/**
+ * Insert some data into a table.
+ *
+ * @param table the logical plan representing the table. In the future this should be a
+ *              [[org.apache.spark.sql.catalyst.catalog.CatalogTable]] once we converge Hive tables
+ *              and data source tables.
+ * @param partition a map from the partition key to the partition value (optional). If the partition
+ *                  value is optional, dynamic partition insert will be performed.
+ *                  As an example, `INSERT INTO tbl PARTITION (a=1, b=2) AS ...` would have
+ *                  Map('a' -> Some('1'), 'b' -> Some('2')),
+ *                  and `INSERT INTO tbl PARTITION (a=1, b) AS ...`
+ *                  would have Map('a' -> Some('1'), 'b' -> None).
+ * @param child the logical plan representing data to write to.
+ * @param overwrite overwrite existing table or partitions.
+ * @param ifNotExists If true, only write if the table or partition does not exist.
+ */
 case class InsertIntoTable(
     table: LogicalPlan,
     partition: Map[String, Option[String]],
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index 05164d774ccaf..15be12cfc0ad4 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -35,13 +35,35 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
-import org.apache.spark.sql.execution.command.{AlterTableAddPartitionCommand, AlterTableDropPartitionCommand}
 import org.apache.spark.sql.hive._
 import org.apache.spark.sql.hive.HiveShim.{ShimFileSinkDesc => FileSinkDesc}
 import org.apache.spark.SparkException
 import org.apache.spark.util.SerializableJobConf
 
 
+/**
+ * Command for writing data out to a Hive table.
+ *
+ * This class is mostly a mess, for legacy reasons (since it evolved in organic ways and had to
+ * follow Hive's internal implementations closely, which itself was a mess too). Please don't
+ * blame Reynold for this! He was just moving code around!
+ *
+ * In the future we should converge the write path for Hive with the normal data source write path,
+ * as defined in [[org.apache.spark.sql.execution.datasources.FileFormatWriter]].
+ *
+ * @param table the logical plan representing the table. In the future this should be a
+ *              [[org.apache.spark.sql.catalyst.catalog.CatalogTable]] once we converge Hive tables
+ *              and data source tables.
+ * @param partition a map from the partition key to the partition value (optional). If the partition
+ *                  value is optional, dynamic partition insert will be performed.
+ *                  As an example, `INSERT INTO tbl PARTITION (a=1, b=2) AS ...` would have
+ *                  Map('a' -> Some('1'), 'b' -> Some('2')),
+ *                  and `INSERT INTO tbl PARTITION (a=1, b) AS ...`
+ *                  would have Map('a' -> Some('1'), 'b' -> None).
+ * @param child the logical plan representing data to write to.
+ * @param overwrite overwrite existing table or partitions.
+ * @param ifNotExists If true, only write if the table or partition does not exist.
+ */
 case class InsertIntoHiveTable(
     table: MetastoreRelation,
     partition: Map[String, Option[String]],
@@ -81,8 +103,7 @@ case class InsertIntoHiveTable(
         throw new IllegalStateException("Cannot create staging directory  '" + dir.toString + "'")
       }
       fs.deleteOnExit(dir)
-    }
-    catch {
+    } catch {
       case e: IOException =>
         throw new RuntimeException(
           "Cannot create staging directory '" + dir.toString + "': " + e.getMessage, e)
@@ -123,7 +144,7 @@ case class InsertIntoHiveTable(
 
     FileOutputFormat.setOutputPath(
       conf.value,
-      SparkHiveWriterContainer.createPathFromString(fileSinkConf.getDirName, conf.value))
+      SparkHiveWriterContainer.createPathFromString(fileSinkConf.getDirName(), conf.value))
     log.debug("Saving as hadoop file of type " + valueClass.getSimpleName)
     writerContainer.driverSideSetup()
     sqlContext.sparkContext.runJob(rdd, writerContainer.writeToFile _)
@@ -263,7 +284,7 @@ case class InsertIntoHiveTable(
           // version and we may not want to catch up new Hive version every time. We delete the
           // Hive partition first and then load data file into the Hive partition.
           if (oldPart.nonEmpty && overwrite) {
-            oldPart.get.storage.locationUri.map { uri =>
+            oldPart.get.storage.locationUri.foreach { uri =>
               val partitionPath = new Path(uri)
               val fs = partitionPath.getFileSystem(hadoopConf)
               if (fs.exists(partitionPath)) {

From 99891e56ea286580323fd82e303064d3c0730d85 Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Thu, 3 Nov 2016 07:45:20 -0700
Subject: [PATCH 0022/1204] [SPARK-18177][ML][PYSPARK] Add missing
 'subsamplingRate' of pyspark GBTClassifier

## What changes were proposed in this pull request?
Add missing 'subsamplingRate' of pyspark GBTClassifier

## How was this patch tested?
existing tests

Author: Zheng RuiFeng <ruifengz@foxmail.com>

Closes #15692 from zhengruifeng/gbt_subsamplingRate.

(cherry picked from commit 9dc9f9a5dde37d085808a264cfb9cf4d4f72417d)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 python/pyspark/ml/classification.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index d9ff356b9403a..56c8c62259e79 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -900,19 +900,19 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol
     def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, lossType="logistic",
-                 maxIter=20, stepSize=0.1, seed=None):
+                 maxIter=20, stepSize=0.1, seed=None, subsamplingRate=1.0):
         """
         __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
-                 lossType="logistic", maxIter=20, stepSize=0.1, seed=None)
+                 lossType="logistic", maxIter=20, stepSize=0.1, seed=None, subsamplingRate=1.0)
         """
         super(GBTClassifier, self).__init__()
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.classification.GBTClassifier", self.uid)
         self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                          maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
-                         lossType="logistic", maxIter=20, stepSize=0.1)
+                         lossType="logistic", maxIter=20, stepSize=0.1, subsamplingRate=1.0)
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
 
@@ -921,12 +921,12 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
     def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
-                  lossType="logistic", maxIter=20, stepSize=0.1, seed=None):
+                  lossType="logistic", maxIter=20, stepSize=0.1, seed=None, subsamplingRate=1.0):
         """
         setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
-                  lossType="logistic", maxIter=20, stepSize=0.1, seed=None)
+                  lossType="logistic", maxIter=20, stepSize=0.1, seed=None, subsamplingRate=1.0)
         Sets params for Gradient Boosted Tree Classification.
         """
         kwargs = self.setParams._input_kwargs

From c2876bfbf06fe1057c4236128d41782c61685c53 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Thu, 3 Nov 2016 16:35:36 +0100
Subject: [PATCH 0023/1204] [SPARK-17981][SPARK-17957][SQL] Fix Incorrect
 Nullability Setting to False in FilterExec

### What changes were proposed in this pull request?

When `FilterExec` contains `isNotNull`, which could be inferred and pushed down or users specified, we convert the nullability of the involved columns if the top-layer expression is null-intolerant. However, this is not correct, if the top-layer expression is not a leaf expression, it could still tolerate the null when it has null-tolerant child expressions.

For example, `cast(coalesce(a#5, a#15) as double)`. Although `cast` is a null-intolerant expression, but obviously`coalesce` is null-tolerant. Thus, it could eat null.

When the nullability is wrong, we could generate incorrect results in different cases. For example,

``` Scala
    val df1 = Seq((1, 2), (2, 3)).toDF("a", "b")
    val df2 = Seq((2, 5), (3, 4)).toDF("a", "c")
    val joinedDf = df1.join(df2, Seq("a"), "outer").na.fill(0)
    val df3 = Seq((3, 1)).toDF("a", "d")
    joinedDf.join(df3, "a").show
```

The optimized plan is like

```
Project [a#29, b#30, c#31, d#42]
+- Join Inner, (a#29 = a#41)
   :- Project [cast(coalesce(cast(coalesce(a#5, a#15) as double), 0.0) as int) AS a#29, cast(coalesce(cast(b#6 as double), 0.0) as int) AS b#30, cast(coalesce(cast(c#16 as double), 0.0) as int) AS c#31]
   :  +- Filter isnotnull(cast(coalesce(cast(coalesce(a#5, a#15) as double), 0.0) as int))
   :     +- Join FullOuter, (a#5 = a#15)
   :        :- LocalRelation [a#5, b#6]
   :        +- LocalRelation [a#15, c#16]
   +- LocalRelation [a#41, d#42]
```

Without the fix, it returns an empty result. With the fix, it can return a correct answer:

```
+---+---+---+---+
|  a|  b|  c|  d|
+---+---+---+---+
|  3|  0|  4|  1|
+---+---+---+---+
```
### How was this patch tested?

Added test cases to verify the nullability changes in FilterExec. Also added a test case for verifying the reported incorrect result.

Author: gatorsmile <gatorsmile@gmail.com>

Closes #15523 from gatorsmile/nullabilityFilterExec.

(cherry picked from commit 66a99f4a411ee7dc94ff1070a8fd6865fd004093)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../execution/basicPhysicalOperators.scala    |  8 +-
 .../org/apache/spark/sql/DataFrameSuite.scala | 74 ++++++++++++++++++-
 2 files changed, 79 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index 32133f52630cd..e6f1de5cb05b8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -90,7 +90,13 @@ case class FilterExec(condition: Expression, child: SparkPlan)
 
   // Split out all the IsNotNulls from condition.
   private val (notNullPreds, otherPreds) = splitConjunctivePredicates(condition).partition {
-    case IsNotNull(a: NullIntolerant) if a.references.subsetOf(child.outputSet) => true
+    case IsNotNull(a) => isNullIntolerant(a) && a.references.subsetOf(child.outputSet)
+    case _ => false
+  }
+
+  // If one expression and its children are null intolerant, it is null intolerant.
+  private def isNullIntolerant(expr: Expression): Boolean = expr match {
+    case e: NullIntolerant => e.children.forall(isNullIntolerant)
     case _ => false
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 33b3b78c9f04f..f5bc8785d5a2c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -28,8 +28,8 @@ import org.scalatest.Matchers._
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.plans.logical.{OneRowRelation, Project, Union}
-import org.apache.spark.sql.execution.QueryExecution
+import org.apache.spark.sql.catalyst.plans.logical.{Filter, OneRowRelation, Project, Union}
+import org.apache.spark.sql.execution.{FilterExec, QueryExecution}
 import org.apache.spark.sql.execution.aggregate.HashAggregateExec
 import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ReusedExchangeExec, ShuffleExchange}
 import org.apache.spark.sql.functions._
@@ -1635,6 +1635,76 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
     }
   }
 
+  private def verifyNullabilityInFilterExec(
+      df: DataFrame,
+      expr: String,
+      expectedNonNullableColumns: Seq[String]): Unit = {
+    val dfWithFilter = df.where(s"isnotnull($expr)").selectExpr(expr)
+    // In the logical plan, all the output columns of input dataframe are nullable
+    dfWithFilter.queryExecution.optimizedPlan.collect {
+      case e: Filter => assert(e.output.forall(_.nullable))
+    }
+
+    dfWithFilter.queryExecution.executedPlan.collect {
+      // When the child expression in isnotnull is null-intolerant (i.e. any null input will
+      // result in null output), the involved columns are converted to not nullable;
+      // otherwise, no change should be made.
+      case e: FilterExec =>
+        assert(e.output.forall { o =>
+          if (expectedNonNullableColumns.contains(o.name)) !o.nullable else o.nullable
+        })
+    }
+  }
+
+  test("SPARK-17957: no change on nullability in FilterExec output") {
+    val df = sparkContext.parallelize(Seq(
+      null.asInstanceOf[java.lang.Integer] -> new java.lang.Integer(3),
+      new java.lang.Integer(1) -> null.asInstanceOf[java.lang.Integer],
+      new java.lang.Integer(2) -> new java.lang.Integer(4))).toDF()
+
+    verifyNullabilityInFilterExec(df,
+      expr = "Rand()", expectedNonNullableColumns = Seq.empty[String])
+    verifyNullabilityInFilterExec(df,
+      expr = "coalesce(_1, _2)", expectedNonNullableColumns = Seq.empty[String])
+    verifyNullabilityInFilterExec(df,
+      expr = "coalesce(_1, 0) + Rand()", expectedNonNullableColumns = Seq.empty[String])
+    verifyNullabilityInFilterExec(df,
+      expr = "cast(coalesce(cast(coalesce(_1, _2) as double), 0.0) as int)",
+      expectedNonNullableColumns = Seq.empty[String])
+  }
+
+  test("SPARK-17957: set nullability to false in FilterExec output") {
+    val df = sparkContext.parallelize(Seq(
+      null.asInstanceOf[java.lang.Integer] -> new java.lang.Integer(3),
+      new java.lang.Integer(1) -> null.asInstanceOf[java.lang.Integer],
+      new java.lang.Integer(2) -> new java.lang.Integer(4))).toDF()
+
+    verifyNullabilityInFilterExec(df,
+      expr = "_1 + _2 * 3", expectedNonNullableColumns = Seq("_1", "_2"))
+    verifyNullabilityInFilterExec(df,
+      expr = "_1 + _2", expectedNonNullableColumns = Seq("_1", "_2"))
+    verifyNullabilityInFilterExec(df,
+      expr = "_1", expectedNonNullableColumns = Seq("_1"))
+    // `constructIsNotNullConstraints` infers the IsNotNull(_2) from IsNotNull(_2 + Rand())
+    // Thus, we are able to set nullability of _2 to false.
+    // If IsNotNull(_2) is not given from `constructIsNotNullConstraints`, the impl of
+    // isNullIntolerant in `FilterExec` needs an update for more advanced inference.
+    verifyNullabilityInFilterExec(df,
+      expr = "_2 + Rand()", expectedNonNullableColumns = Seq("_2"))
+    verifyNullabilityInFilterExec(df,
+      expr = "_2 * 3 + coalesce(_1, 0)", expectedNonNullableColumns = Seq("_2"))
+    verifyNullabilityInFilterExec(df,
+      expr = "cast((_1 + _2) as boolean)", expectedNonNullableColumns = Seq("_1", "_2"))
+  }
+
+  test("SPARK-17957: outer join + na.fill") {
+    val df1 = Seq((1, 2), (2, 3)).toDF("a", "b")
+    val df2 = Seq((2, 5), (3, 4)).toDF("a", "c")
+    val joinedDf = df1.join(df2, Seq("a"), "outer").na.fill(0)
+    val df3 = Seq((3, 1)).toDF("a", "d")
+    checkAnswer(joinedDf.join(df3, "a"), Row(3, 0, 4, 1))
+  }
+
   test("SPARK-17123: Performing set operations that combine non-scala native types") {
     val dates = Seq(
       (new Date(0), BigDecimal.valueOf(1), new Timestamp(2)),

From 4f91630c8100ee3a6fd168bc4247ca6fadd0a736 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Thu, 3 Nov 2016 11:48:05 -0700
Subject: [PATCH 0024/1204] [SPARK-18244][SQL] Rename partitionProviderIsHive
 -> tracksPartitionsInCatalog

## What changes were proposed in this pull request?
This patch renames partitionProviderIsHive to tracksPartitionsInCatalog, as the old name was too Hive specific.

## How was this patch tested?
Should be covered by existing tests.

Author: Reynold Xin <rxin@databricks.com>

Closes #15750 from rxin/SPARK-18244.

(cherry picked from commit b17057c0a69b9c56e503483d97f5dc209eef0884)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../sql/catalyst/catalog/interface.scala      |  9 ++++----
 .../sql/catalyst/trees/TreeNodeSuite.scala    |  2 +-
 .../command/createDataSourceTables.scala      |  2 +-
 .../spark/sql/execution/command/ddl.scala     |  4 ++--
 .../spark/sql/execution/command/tables.scala  |  2 +-
 .../execution/datasources/DataSource.scala    |  2 +-
 .../datasources/DataSourceStrategy.scala      |  7 ++++---
 .../InsertIntoHadoopFsRelationCommand.scala   |  6 +-----
 .../sql/execution/command/DDLSuite.scala      |  2 +-
 .../spark/sql/hive/HiveExternalCatalog.scala  | 21 ++++++++++++-------
 10 files changed, 30 insertions(+), 27 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 7c3bec897956a..34748a04859ad 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -138,8 +138,9 @@ case class BucketSpec(
  *                 Can be None if this table is a View, should be "hive" for hive serde tables.
  * @param unsupportedFeatures is a list of string descriptions of features that are used by the
  *        underlying table but not supported by Spark SQL yet.
- * @param partitionProviderIsHive whether this table's partition metadata is stored in the Hive
- *                                metastore.
+ * @param tracksPartitionsInCatalog whether this table's partition metadata is stored in the
+ *                                  catalog. If false, it is inferred automatically based on file
+ *                                  structure.
  */
 case class CatalogTable(
     identifier: TableIdentifier,
@@ -158,7 +159,7 @@ case class CatalogTable(
     viewText: Option[String] = None,
     comment: Option[String] = None,
     unsupportedFeatures: Seq[String] = Seq.empty,
-    partitionProviderIsHive: Boolean = false) {
+    tracksPartitionsInCatalog: Boolean = false) {
 
   /** schema of this table's partition columns */
   def partitionSchema: StructType = StructType(schema.filter {
@@ -217,7 +218,7 @@ case class CatalogTable(
         if (properties.nonEmpty) s"Properties: $tableProperties" else "",
         if (stats.isDefined) s"Statistics: ${stats.get.simpleString}" else "",
         s"$storage",
-        if (partitionProviderIsHive) "Partition Provider: Hive" else "")
+        if (tracksPartitionsInCatalog) "Partition Provider: Catalog" else "")
 
     output.filter(_.nonEmpty).mkString("CatalogTable(\n\t", "\n\t", ")")
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
index 3eff12f9eed14..af1eaa1f23746 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
@@ -489,7 +489,7 @@ class TreeNodeSuite extends SparkFunSuite {
         "owner" -> "",
         "createTime" -> 0,
         "lastAccessTime" -> -1,
-        "partitionProviderIsHive" -> false,
+        "tracksPartitionsInCatalog" -> false,
         "properties" -> JNull,
         "unsupportedFeatures" -> List.empty[String]))
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
index d4b28274cc453..7e16e43f2bb0e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
@@ -92,7 +92,7 @@ case class CreateDataSourceTableCommand(table: CatalogTable, ignoreIfExists: Boo
       // If metastore partition management for file source tables is enabled, we start off with
       // partition provider hive, but no partitions in the metastore. The user has to call
       // `msck repair table` to populate the table partitions.
-      partitionProviderIsHive = partitionColumnNames.nonEmpty &&
+      tracksPartitionsInCatalog = partitionColumnNames.nonEmpty &&
         sparkSession.sessionState.conf.manageFilesourcePartitions)
     // We will return Nil or throw exception at the beginning if the table already exists, so when
     // we reach here, the table should not exist and we should set `ignoreIfExists` to false.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 52af915b0be65..b4d3ca1f37074 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -523,7 +523,7 @@ case class AlterTableRecoverPartitionsCommand(
     // Updates the table to indicate that its partition metadata is stored in the Hive metastore.
     // This is always the case for Hive format tables, but is not true for Datasource tables created
     // before Spark 2.1 unless they are converted via `msck repair table`.
-    spark.sessionState.catalog.alterTable(table.copy(partitionProviderIsHive = true))
+    spark.sessionState.catalog.alterTable(table.copy(tracksPartitionsInCatalog = true))
     catalog.refreshTable(tableName)
     logInfo(s"Recovered all partitions ($total).")
     Seq.empty[Row]
@@ -702,7 +702,7 @@ object DDLUtils {
         s"$action is not allowed on $tableName since filesource partition management is " +
           "disabled (spark.sql.hive.manageFilesourcePartitions = false).")
     }
-    if (!table.partitionProviderIsHive && isDatasourceTable(table)) {
+    if (!table.tracksPartitionsInCatalog && isDatasourceTable(table)) {
       throw new AnalysisException(
         s"$action is not allowed on $tableName since its partition metadata is not stored in " +
           "the Hive metastore. To import this information into the metastore, run " +
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index f32c956f5999e..00c646b9185b3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -467,7 +467,7 @@ case class DescribeTableCommand(
 
     if (table.tableType == CatalogTableType.VIEW) describeViewInfo(table, buffer)
 
-    if (DDLUtils.isDatasourceTable(table) && table.partitionProviderIsHive) {
+    if (DDLUtils.isDatasourceTable(table) && table.tracksPartitionsInCatalog) {
       append(buffer, "Partition Provider:", "Hive", "")
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 0b50448a7af18..52666119351b1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -308,7 +308,7 @@ case class DataSource(
         }
 
         val fileCatalog = if (sparkSession.sqlContext.conf.manageFilesourcePartitions &&
-            catalogTable.isDefined && catalogTable.get.partitionProviderIsHive) {
+            catalogTable.isDefined && catalogTable.get.tracksPartitionsInCatalog) {
           new CatalogFileIndex(
             sparkSession,
             catalogTable.get,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index e87998fe4ad8d..a548e88cb683a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -182,9 +182,10 @@ case class DataSourceAnalysis(conf: CatalystConf) extends Rule[LogicalPlan] {
           "Cannot overwrite a path that is also being read from.")
       }
 
-      val overwritingSinglePartition = (overwrite.specificPartition.isDefined &&
+      val overwritingSinglePartition =
+        overwrite.specificPartition.isDefined &&
         t.sparkSession.sessionState.conf.manageFilesourcePartitions &&
-        l.catalogTable.get.partitionProviderIsHive)
+        l.catalogTable.get.tracksPartitionsInCatalog
 
       val effectiveOutputPath = if (overwritingSinglePartition) {
         val partition = t.sparkSession.sessionState.catalog.getPartition(
@@ -203,7 +204,7 @@ case class DataSourceAnalysis(conf: CatalystConf) extends Rule[LogicalPlan] {
       def refreshPartitionsCallback(updatedPartitions: Seq[TablePartitionSpec]): Unit = {
         if (l.catalogTable.isDefined && updatedPartitions.nonEmpty &&
             l.catalogTable.get.partitionColumnNames.nonEmpty &&
-            l.catalogTable.get.partitionProviderIsHive) {
+            l.catalogTable.get.tracksPartitionsInCatalog) {
           val metastoreUpdater = AlterTableAddPartitionCommand(
             l.catalogTable.get.identifier,
             updatedPartitions.map(p => (p, None)),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
index 927c0c5b95a17..9c75e2ae74761 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
@@ -31,11 +31,7 @@ import org.apache.spark.sql.execution.command.RunnableCommand
 
 /**
  * A command for writing data to a [[HadoopFsRelation]].  Supports both overwriting and appending.
- * Writing to dynamic partitions is also supported.  Each [[InsertIntoHadoopFsRelationCommand]]
- * issues a single write job, and owns a UUID that identifies this job.  Each concrete
- * implementation of [[HadoopFsRelation]] should use this UUID together with task id to generate
- * unique file path for each task output file.  This UUID is passed to executor side via a
- * property named `spark.sql.sources.writeJobUUID`.
+ * Writing to dynamic partitions is also supported.
  */
 case class InsertIntoHadoopFsRelationCommand(
     outputPath: Path,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index d4d001497deb2..52b09c54464e7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -96,7 +96,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
       provider = Some("hive"),
       partitionColumnNames = Seq("a", "b"),
       createTime = 0L,
-      partitionProviderIsHive = true)
+      tracksPartitionsInCatalog = true)
   }
 
   private def createTable(catalog: SessionCatalog, name: TableIdentifier): Unit = {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index ebba203ac593c..64ba52672b1c8 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -323,8 +323,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
 
     val properties = new scala.collection.mutable.HashMap[String, String]
     properties.put(DATASOURCE_PROVIDER, provider)
-    if (table.partitionProviderIsHive) {
-      properties.put(TABLE_PARTITION_PROVIDER, "hive")
+    if (table.tracksPartitionsInCatalog) {
+      properties.put(TABLE_PARTITION_PROVIDER, TABLE_PARTITION_PROVIDER_CATALOG)
     }
 
     // Serialized JSON schema string may be too long to be stored into a single metastore table
@@ -489,10 +489,10 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
         updateLocationInStorageProps(oldTableDef, newLocation).copy(locationUri = newLocation)
       }
 
-      val partitionProviderProp = if (tableDefinition.partitionProviderIsHive) {
-        TABLE_PARTITION_PROVIDER -> "hive"
+      val partitionProviderProp = if (tableDefinition.tracksPartitionsInCatalog) {
+        TABLE_PARTITION_PROVIDER -> TABLE_PARTITION_PROVIDER_CATALOG
       } else {
-        TABLE_PARTITION_PROVIDER -> "builtin"
+        TABLE_PARTITION_PROVIDER -> TABLE_PARTITION_PROVIDER_FILESYSTEM
       }
 
       // Sets the `schema`, `partitionColumnNames` and `bucketSpec` from the old table definition,
@@ -537,7 +537,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       table
     } else {
       getProviderFromTableProperties(table).map { provider =>
-        assert(provider != "hive", "Hive serde table should not save provider in table properties.")
+        assert(provider != TABLE_PARTITION_PROVIDER_CATALOG,
+          "Hive serde table should not save provider in table properties.")
         // Internally we store the table location in storage properties with key "path" for data
         // source tables. Here we set the table location to `locationUri` field and filter out the
         // path option in storage properties, to avoid exposing this concept externally.
@@ -545,6 +546,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
           val tableLocation = getLocationFromStorageProps(table)
           updateLocationInStorageProps(table, None).copy(locationUri = tableLocation)
         }
+        val partitionProvider = table.properties.get(TABLE_PARTITION_PROVIDER)
 
         table.copy(
           storage = storageWithLocation,
@@ -552,9 +554,10 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
           provider = Some(provider),
           partitionColumnNames = getPartitionColumnsFromTableProperties(table),
           bucketSpec = getBucketSpecFromTableProperties(table),
-          partitionProviderIsHive = table.properties.get(TABLE_PARTITION_PROVIDER) == Some("hive"))
+          tracksPartitionsInCatalog = partitionProvider == Some(TABLE_PARTITION_PROVIDER_CATALOG)
+        )
       } getOrElse {
-        table.copy(provider = Some("hive"), partitionProviderIsHive = true)
+        table.copy(provider = Some("hive"), tracksPartitionsInCatalog = true)
       }
     }
 
@@ -851,6 +854,8 @@ object HiveExternalCatalog {
   val STATISTICS_COL_STATS_PREFIX = STATISTICS_PREFIX + "colStats."
 
   val TABLE_PARTITION_PROVIDER = SPARK_SQL_PREFIX + "partitionProvider"
+  val TABLE_PARTITION_PROVIDER_CATALOG = "catalog"
+  val TABLE_PARTITION_PROVIDER_FILESYSTEM = "filesystem"
 
 
   def getProviderFromTableProperties(metadata: CatalogTable): Option[String] = {

From 3e139e2390085cfb42f7136f150b0fa08c14eb61 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=A6=8F=E6=98=9F?= <fuxing@wacai.com>
Date: Thu, 3 Nov 2016 12:02:01 -0700
Subject: [PATCH 0025/1204] [SPARK-18237][HIVE] hive.exec.stagingdir have no
 effect
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

hive.exec.stagingdir have no effect in spark2.0.1，
Hive confs in hive-site.xml will be loaded in `hadoopConf`, so we should use `hadoopConf` in `InsertIntoHiveTable` instead of `SessionState.conf`

Author: 福星 <fuxing@wacai.com>

Closes #15744 from ClassNotFoundExp/master.

(cherry picked from commit 16293311cdb25a62733a9aae4355659b971a3ce1)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../apache/spark/sql/hive/execution/InsertIntoHiveTable.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index 15be12cfc0ad4..e333fc7febc2a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -76,7 +76,8 @@ case class InsertIntoHiveTable(
 
   def output: Seq[Attribute] = Seq.empty
 
-  val stagingDir = sessionState.conf.getConfString("hive.exec.stagingdir", ".hive-staging")
+  val hadoopConf = sessionState.newHadoopConf()
+  val stagingDir = hadoopConf.get("hive.exec.stagingdir", ".hive-staging")
 
   private def executionId: String = {
     val rand: Random = new Random
@@ -163,7 +164,6 @@ case class InsertIntoHiveTable(
     // instances within the closure, since Serializer is not serializable while TableDesc is.
     val tableDesc = table.tableDesc
     val tableLocation = table.hiveQlTable.getDataLocation
-    val hadoopConf = sessionState.newHadoopConf()
     val tmpLocation = getExternalTmpPath(tableLocation, hadoopConf)
     val fileSinkConf = new FileSinkDesc(tmpLocation.toString, tableDesc, false)
     val isCompressed = hadoopConf.get("hive.exec.compress.output", "false").toBoolean

From 569f77a11819523bdf5dc2c6429fc3399cbb6519 Mon Sep 17 00:00:00 2001
From: Kishor Patil <kpatil@yahoo-inc.com>
Date: Thu, 3 Nov 2016 16:10:26 -0500
Subject: [PATCH 0026/1204] [SPARK-18099][YARN] Fail if same files added to
 distributed cache for --files and --archives
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

During spark-submit, if yarn dist cache is instructed to add same file under --files and --archives, This code change ensures the spark yarn distributed cache behaviour is retained i.e. to warn and fail if same files is mentioned in both --files and --archives.
## How was this patch tested?

Manually tested:
1. if same jar is mentioned in --jars and --files it will continue to submit the job.
- basically functionality [SPARK-14423] #12203 is unchanged
  1. if same file is mentioned in --files and --archives it will fail to submit the job.

Please review https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark before opening a pull request.

… under archives and files

Author: Kishor Patil <kpatil@yahoo-inc.com>

Closes #15627 from kishorvpatil/spark18099.

(cherry picked from commit 098e4ca9c7af61e64839a50c65be449749af6482)
Signed-off-by: Tom Graves <tgraves@yahoo-inc.com>
---
 .../org/apache/spark/deploy/yarn/Client.scala | 12 +++++-
 .../spark/deploy/yarn/ClientSuite.scala       | 42 +++++++++++++++++++
 2 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 053a78617d4e0..172fb46c986c6 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -598,8 +598,16 @@ private[spark] class Client(
     ).foreach { case (flist, resType, addToClasspath) =>
       flist.foreach { file =>
         val (_, localizedPath) = distribute(file, resType = resType)
-        if (addToClasspath && localizedPath != null) {
-          cachedSecondaryJarLinks += localizedPath
+        // If addToClassPath, we ignore adding jar multiple times to distitrbuted cache.
+        if (addToClasspath) {
+          if (localizedPath != null) {
+            cachedSecondaryJarLinks += localizedPath
+          }
+        } else {
+          if (localizedPath != null) {
+            throw new IllegalArgumentException(s"Attempt to add ($file) multiple times" +
+              " to the distributed cache.")
+          }
         }
       }
     }
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
index 0a4f291e25fb0..06516c1baf1cc 100644
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
@@ -282,6 +282,48 @@ class ClientSuite extends SparkFunSuite with Matchers with BeforeAndAfterAll
     }
   }
 
+  test("distribute archive multiple times") {
+    val libs = Utils.createTempDir()
+    // Create jars dir and RELEASE file to avoid IllegalStateException.
+    val jarsDir = new File(libs, "jars")
+    assert(jarsDir.mkdir())
+    new FileOutputStream(new File(libs, "RELEASE")).close()
+
+    val userLib1 = Utils.createTempDir()
+    val testJar = TestUtils.createJarWithFiles(Map(), userLib1)
+
+    // Case 1:  FILES_TO_DISTRIBUTE and ARCHIVES_TO_DISTRIBUTE can't have duplicate files
+    val sparkConf = new SparkConfWithEnv(Map("SPARK_HOME" -> libs.getAbsolutePath))
+      .set(FILES_TO_DISTRIBUTE, Seq(testJar.getPath))
+      .set(ARCHIVES_TO_DISTRIBUTE, Seq(testJar.getPath))
+
+    val client = createClient(sparkConf)
+    val tempDir = Utils.createTempDir()
+    intercept[IllegalArgumentException] {
+      client.prepareLocalResources(new Path(tempDir.getAbsolutePath()), Nil)
+    }
+
+    // Case 2: FILES_TO_DISTRIBUTE can't have duplicate files.
+    val sparkConfFiles = new SparkConfWithEnv(Map("SPARK_HOME" -> libs.getAbsolutePath))
+      .set(FILES_TO_DISTRIBUTE, Seq(testJar.getPath, testJar.getPath))
+
+    val clientFiles = createClient(sparkConfFiles)
+    val tempDirForFiles = Utils.createTempDir()
+    intercept[IllegalArgumentException] {
+      clientFiles.prepareLocalResources(new Path(tempDirForFiles.getAbsolutePath()), Nil)
+    }
+
+    // Case 3: ARCHIVES_TO_DISTRIBUTE can't have duplicate files.
+    val sparkConfArchives = new SparkConfWithEnv(Map("SPARK_HOME" -> libs.getAbsolutePath))
+      .set(ARCHIVES_TO_DISTRIBUTE, Seq(testJar.getPath, testJar.getPath))
+
+    val clientArchives = createClient(sparkConfArchives)
+    val tempDirForArchives = Utils.createTempDir()
+    intercept[IllegalArgumentException] {
+      clientArchives.prepareLocalResources(new Path(tempDirForArchives.getAbsolutePath()), Nil)
+    }
+  }
+
   test("distribute local spark jars") {
     val temp = Utils.createTempDir()
     val jarsDir = new File(temp, "jars")

From 2daca62cd342203694f22232ceb026dcaf56d3d5 Mon Sep 17 00:00:00 2001
From: cody koeninger <cody@koeninger.org>
Date: Thu, 3 Nov 2016 14:43:25 -0700
Subject: [PATCH 0027/1204] [SPARK-18212][SS][KAFKA] increase executor poll
 timeout

## What changes were proposed in this pull request?

Increase poll timeout to try and address flaky test

## How was this patch tested?

Ran existing unit tests

Author: cody koeninger <cody@koeninger.org>

Closes #15737 from koeninger/SPARK-18212.

(cherry picked from commit 67659c9afaeb2289e56fd87fafee953e8f050383)
Signed-off-by: Michael Armbrust <michael@databricks.com>
---
 .../scala/org/apache/spark/sql/kafka010/KafkaSource.scala    | 5 ++++-
 .../scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala | 3 ++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
index 61cba737d148a..b21508cd7ebd8 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
@@ -88,7 +88,10 @@ private[kafka010] case class KafkaSource(
 
   private val sc = sqlContext.sparkContext
 
-  private val pollTimeoutMs = sourceOptions.getOrElse("kafkaConsumer.pollTimeoutMs", "512").toLong
+  private val pollTimeoutMs = sourceOptions.getOrElse(
+    "kafkaConsumer.pollTimeoutMs",
+    sc.conf.getTimeAsMs("spark.network.timeout", "120s").toString
+  ).toLong
 
   private val maxOffsetFetchAttempts =
     sourceOptions.getOrElse("fetchOffset.numRetries", "3").toInt
diff --git a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala
index 5b5a9ac48c7ca..98394251bb233 100644
--- a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala
+++ b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala
@@ -66,7 +66,8 @@ private[spark] class KafkaRDD[K, V](
       " must be set to false for executor kafka params, else offsets may commit before processing")
 
   // TODO is it necessary to have separate configs for initial poll time vs ongoing poll time?
-  private val pollTimeout = conf.getLong("spark.streaming.kafka.consumer.poll.ms", 512)
+  private val pollTimeout = conf.getLong("spark.streaming.kafka.consumer.poll.ms",
+    conf.getTimeAsMs("spark.network.timeout", "120s"))
   private val cacheInitialCapacity =
     conf.getInt("spark.streaming.kafka.consumer.cache.initialCapacity", 16)
   private val cacheMaxCapacity =

From af60b1ebbf5cb91dc724aad9d3d7476ce9085ac9 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Thu, 3 Nov 2016 15:30:45 -0700
Subject: [PATCH 0028/1204] [SPARK-18257][SS] Improve error reporting for
 FileStressSuite

## What changes were proposed in this pull request?
This patch improves error reporting for FileStressSuite, when there is an error in Spark itself (not user code). This works by simply tightening the exception verification, and gets rid of the unnecessary thread for starting the stream.

Also renamed the class FileStreamStressSuite to make it more obvious it is a streaming suite.

## How was this patch tested?
This is a test only change and I manually verified error reporting by injecting some bug in the addBatch code for FileStreamSink.

Author: Reynold Xin <rxin@databricks.com>

Closes #15757 from rxin/SPARK-18257.

(cherry picked from commit f22954ad49bf5a32c7b6d8487cd38ffe0da904ca)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 ...uite.scala => FileStreamStressSuite.scala} | 33 ++++++++++---------
 1 file changed, 18 insertions(+), 15 deletions(-)
 rename sql/core/src/test/scala/org/apache/spark/sql/streaming/{FileStressSuite.scala => FileStreamStressSuite.scala} (85%)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStressSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamStressSuite.scala
similarity index 85%
rename from sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStressSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamStressSuite.scala
index f9e236c449634..28412ea07a75c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStressSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamStressSuite.scala
@@ -36,9 +36,12 @@ import org.apache.spark.util.Utils
  *
  * At the end, the resulting files are loaded and the answer is checked.
  */
-class FileStressSuite extends StreamTest {
+class FileStreamStressSuite extends StreamTest {
   import testImplicits._
 
+  // Error message thrown in the streaming job for testing recovery.
+  private val injectedErrorMsg = "test suite injected failure!"
+
   testQuietly("fault tolerance stress test - unpartitioned output") {
     stressTest(partitionWrites = false)
   }
@@ -101,13 +104,14 @@ class FileStressSuite extends StreamTest {
     val input = spark.readStream.format("text").load(inputDir)
 
     def startStream(): StreamingQuery = {
+      val errorMsg = injectedErrorMsg  // work around serialization issue
       val output = input
         .repartition(5)
         .as[String]
         .mapPartitions { iter =>
           val rand = Random.nextInt(100)
           if (rand < 10) {
-            sys.error("failure")
+            sys.error(errorMsg)
           }
           iter.map(_.toLong)
         }
@@ -131,22 +135,21 @@ class FileStressSuite extends StreamTest {
     }
 
     var failures = 0
-    val streamThread = new Thread("stream runner") {
-      while (continue) {
-        if (failures % 10 == 0) { logError(s"Query restart #$failures") }
-        stream = startStream()
-
-        try {
-          stream.awaitTermination()
-        } catch {
-          case ce: StreamingQueryException =>
-            failures += 1
-        }
+    while (continue) {
+      if (failures % 10 == 0) { logError(s"Query restart #$failures") }
+      stream = startStream()
+
+      try {
+        stream.awaitTermination()
+      } catch {
+        case e: StreamingQueryException
+          if e.getCause != null && e.getCause.getCause != null &&
+              e.getCause.getCause.getMessage.contains(injectedErrorMsg) =>
+          // Getting the expected error message
+          failures += 1
       }
     }
 
-    streamThread.join()
-
     logError(s"Stream restarted $failures times.")
     assert(spark.read.parquet(outputDir).distinct().count() == numRecords)
   }

From 37550c49218e1890f8adc10c9549a23dc072e21f Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Thu, 3 Nov 2016 17:27:23 -0700
Subject: [PATCH 0029/1204] [SPARK-18138][DOCS] Document that Java 7, Python
 2.6, Scala 2.10, Hadoop < 2.6 are deprecated in Spark 2.1.0

## What changes were proposed in this pull request?

Document that Java 7, Python 2.6, Scala 2.10, Hadoop < 2.6 are deprecated in Spark 2.1.0. This does not actually implement any of the change in SPARK-18138, just peppers the documentation with notices about it.

## How was this patch tested?

Doc build

Author: Sean Owen <sowen@cloudera.com>

Closes #15733 from srowen/SPARK-18138.

(cherry picked from commit dc4c60098641cf64007e2f0e36378f000ad5f6b1)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../main/scala/org/apache/spark/SparkContext.scala   | 12 ++++++++++++
 docs/building-spark.md                               |  6 ++++++
 docs/index.md                                        |  4 ++++
 docs/programming-guide.md                            |  4 ++++
 python/pyspark/context.py                            |  4 ++++
 5 files changed, 30 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 63478c88b057b..9f0f6074229dd 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -183,6 +183,8 @@ class SparkContext(config: SparkConf) extends Logging {
   // log out Spark Version in Spark driver log
   logInfo(s"Running Spark version $SPARK_VERSION")
 
+  warnDeprecatedVersions()
+
   /* ------------------------------------------------------------------------------------- *
    | Private variables. These variables keep the internal state of the context, and are    |
    | not accessible by the outside world. They're mutable since we want to initialize all  |
@@ -346,6 +348,16 @@ class SparkContext(config: SparkConf) extends Logging {
     value
   }
 
+  private def warnDeprecatedVersions(): Unit = {
+    val javaVersion = System.getProperty("java.version").split("[+.\\-]+", 3)
+    if (javaVersion.length >= 2 && javaVersion(1).toInt == 7) {
+      logWarning("Support for Java 7 is deprecated as of Spark 2.0.0")
+    }
+    if (scala.util.Properties.releaseVersion.exists(_.startsWith("2.10"))) {
+      logWarning("Support for Scala 2.10 is deprecated as of Spark 2.1.0")
+    }
+  }
+
   /** Control our logLevel. This overrides any user-defined log settings.
    * @param logLevel The desired log level as a string.
    * Valid log levels include: ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN
diff --git a/docs/building-spark.md b/docs/building-spark.md
index ebe46a42a15c6..2b404bd3e116c 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -13,6 +13,7 @@ redirect_from: "building-with-maven.html"
 
 The Maven-based build is the build of reference for Apache Spark.
 Building Spark using Maven requires Maven 3.3.9 or newer and Java 7+.
+Note that support for Java 7 is deprecated as of Spark 2.0.0 and may be removed in Spark 2.2.0.
 
 ### Setting up Maven's Memory Usage
 
@@ -79,6 +80,9 @@ Because HDFS is not protocol-compatible across versions, if you want to read fro
   </tbody>
 </table>
 
+Note that support for versions of Hadoop before 2.6 are deprecated as of Spark 2.1.0 and may be 
+removed in Spark 2.2.0.
+
 
 You can enable the `yarn` profile and optionally set the `yarn.version` property if it is different from `hadoop.version`. Spark only supports YARN versions 2.2.0 and later.
 
@@ -129,6 +133,8 @@ To produce a Spark package compiled with Scala 2.10, use the `-Dscala-2.10` prop
 
     ./dev/change-scala-version.sh 2.10
     ./build/mvn -Pyarn -Phadoop-2.4 -Dscala-2.10 -DskipTests clean package
+    
+Note that support for Scala 2.10 is deprecated as of Spark 2.1.0 and may be removed in Spark 2.2.0.
 
 ## Building submodules individually
 
diff --git a/docs/index.md b/docs/index.md
index a7a92f6c4f6d7..fe51439ae08d7 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -28,6 +28,10 @@ Spark runs on Java 7+, Python 2.6+/3.4+ and R 3.1+. For the Scala API, Spark {{s
 uses Scala {{site.SCALA_BINARY_VERSION}}. You will need to use a compatible Scala version
 ({{site.SCALA_BINARY_VERSION}}.x).
 
+Note that support for Java 7 and Python 2.6 are deprecated as of Spark 2.0.0, and support for 
+Scala 2.10 and versions of Hadoop before 2.6 are deprecated as of Spark 2.1.0, and may be 
+removed in Spark 2.2.0.
+
 # Running the Examples and Shell
 
 Spark comes with several sample programs.  Scala, Java, Python and R examples are in the
diff --git a/docs/programming-guide.md b/docs/programming-guide.md
index 7516579ec6dbf..b9a2110b602a0 100644
--- a/docs/programming-guide.md
+++ b/docs/programming-guide.md
@@ -59,6 +59,8 @@ Spark {{site.SPARK_VERSION}} works with Java 7 and higher. If you are using Java
 for concisely writing functions, otherwise you can use the classes in the
 [org.apache.spark.api.java.function](api/java/index.html?org/apache/spark/api/java/function/package-summary.html) package.
 
+Note that support for Java 7 is deprecated as of Spark 2.0.0 and may be removed in Spark 2.2.0.
+
 To write a Spark application in Java, you need to add a dependency on Spark. Spark is available through Maven Central at:
 
     groupId = org.apache.spark
@@ -87,6 +89,8 @@ import org.apache.spark.SparkConf
 Spark {{site.SPARK_VERSION}} works with Python 2.6+ or Python 3.4+. It can use the standard CPython interpreter,
 so C libraries like NumPy can be used. It also works with PyPy 2.3+.
 
+Note that support for Python 2.6 is deprecated as of Spark 2.0.0, and may be removed in Spark 2.2.0.
+
 To run Spark applications in Python, use the `bin/spark-submit` script located in the Spark directory.
 This script will load Spark's Java/Scala libraries and allow you to submit applications to a cluster.
 You can also use `bin/pyspark` to launch an interactive Python shell.
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 1b2e199c395be..2c2cf6a373bb7 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -22,6 +22,7 @@
 import signal
 import sys
 import threading
+import warnings
 from threading import RLock
 from tempfile import NamedTemporaryFile
 
@@ -187,6 +188,9 @@ def _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize,
         self.pythonExec = os.environ.get("PYSPARK_PYTHON", 'python')
         self.pythonVer = "%d.%d" % sys.version_info[:2]
 
+        if sys.version_info < (2, 7):
+            warnings.warn("Support for Python 2.6 is deprecated as of Spark 2.0.0")
+
         # Broadcast's __reduce__ method stores Broadcast instances here.
         # This allows other code to determine which Broadcast instances have
         # been pickled, so it can determine which Java broadcast objects to

From 91d567150b305d05acb8543da5cbf21df244352d Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@databricks.com>
Date: Thu, 3 Nov 2016 21:59:59 -0700
Subject: [PATCH 0030/1204] [SPARK-18259][SQL] Do not capture Throwable in
 QueryExecution

## What changes were proposed in this pull request?
`QueryExecution.toString` currently captures `java.lang.Throwable`s; this is far from a best practice and can lead to confusing situation or invalid application states. This PR fixes this by only capturing `AnalysisException`s.

## How was this patch tested?
Added a `QueryExecutionSuite`.

Author: Herman van Hovell <hvanhovell@databricks.com>

Closes #15760 from hvanhovell/SPARK-18259.

(cherry picked from commit aa412c55e31e61419d3de57ef4b13e50f9b38af0)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../spark/sql/execution/QueryExecution.scala  |  2 +-
 .../sql/execution/QueryExecutionSuite.scala   | 50 +++++++++++++++++++
 2 files changed, 51 insertions(+), 1 deletion(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index cb45a6d78b9b6..b3ef29f6e34c4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -104,7 +104,7 @@ class QueryExecution(val sparkSession: SparkSession, val logical: LogicalPlan) {
     ReuseSubquery(sparkSession.sessionState.conf))
 
   protected def stringOrError[A](f: => A): String =
-    try f.toString catch { case e: Throwable => e.toString }
+    try f.toString catch { case e: AnalysisException => e.toString }
 
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
new file mode 100644
index 0000000000000..8bceab39f71d5
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation}
+import org.apache.spark.sql.test.SharedSQLContext
+
+class QueryExecutionSuite extends SharedSQLContext {
+  test("toString() exception/error handling") {
+    val badRule = new SparkStrategy {
+      var mode: String = ""
+      override def apply(plan: LogicalPlan): Seq[SparkPlan] = mode.toLowerCase match {
+        case "exception" => throw new AnalysisException(mode)
+        case "error" => throw new Error(mode)
+        case _ => Nil
+      }
+    }
+    spark.experimental.extraStrategies = badRule :: Nil
+
+    def qe: QueryExecution = new QueryExecution(spark, OneRowRelation)
+
+    // Nothing!
+    badRule.mode = ""
+    assert(qe.toString.contains("OneRowRelation"))
+
+    // Throw an AnalysisException - this should be captured.
+    badRule.mode = "exception"
+    assert(qe.toString.contains("org.apache.spark.sql.AnalysisException"))
+
+    // Throw an Error - this should not be captured.
+    badRule.mode = "error"
+    val error = intercept[Error](qe.toString)
+    assert(error.getMessage.contains("error"))
+  }
+}

From 8e145a94bbaca6ba4bff258cf4028bcf0317499f Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Thu, 3 Nov 2016 22:27:35 -0700
Subject: [PATCH 0031/1204] [SPARK-14393][SQL][DOC] update doc for python and R

## What changes were proposed in this pull request?

minor doc update that should go to master & branch-2.1

## How was this patch tested?

manual

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #15747 from felixcheung/pySPARK-14393.

(cherry picked from commit a08463b1d32348a81d0f148dfaf22741d5c23b1a)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/R/functions.R             | 2 +-
 python/pyspark/sql/functions.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 4d94b4cd05d44..9a545f0647915 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -1485,7 +1485,7 @@ setMethod("soundex",
 
 #' Return the partition ID as a column
 #'
-#' Return the partition ID of the Spark task as a SparkDataFrame column.
+#' Return the partition ID as a SparkDataFrame column.
 #' Note that this is nondeterministic because it depends on data partitioning and
 #' task scheduling.
 #'
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 45e3c22bfc6a9..245357a4bad9f 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -543,7 +543,7 @@ def shiftRightUnsigned(col, numBits):
 
 @since(1.6)
 def spark_partition_id():
-    """A column for partition ID of the Spark task.
+    """A column for partition ID.
 
     Note that this is indeterministic because it depends on data partitioning and task scheduling.
 

From cfe76028bb116d72eab6601bff3b2a1856597370 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 3 Nov 2016 23:15:33 -0700
Subject: [PATCH 0032/1204] [SPARK-18200][GRAPHX][FOLLOW-UP] Support zero as an
 initial capacity in OpenHashSet

## What changes were proposed in this pull request?

This is a follow-up PR of #15741 in order to keep `nextPowerOf2` consistent.

**Before**
```
nextPowerOf2(0) => 2
nextPowerOf2(1) => 1
nextPowerOf2(2) => 2
nextPowerOf2(3) => 4
nextPowerOf2(4) => 4
nextPowerOf2(5) => 8
```

**After**
```
nextPowerOf2(0) => 1
nextPowerOf2(1) => 1
nextPowerOf2(2) => 2
nextPowerOf2(3) => 4
nextPowerOf2(4) => 4
nextPowerOf2(5) => 8
```

## How was this patch tested?

N/A

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #15754 from dongjoon-hyun/SPARK-18200-2.

(cherry picked from commit 27602c33751cebf6cd173c0de103454608cf6625)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../scala/org/apache/spark/util/collection/OpenHashSet.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala b/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala
index 7a1be8515d965..60f6f537c1d54 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala
@@ -272,7 +272,7 @@ class OpenHashSet[@specialized(Long, Int) T: ClassTag](
 
   private def nextPowerOf2(n: Int): Int = {
     if (n == 0) {
-      2
+      1
     } else {
       val highBit = Integer.highestOneBit(n)
       if (highBit == n) n else highBit << 1

From a2d7e25e7c85ce17c8ceac5e1806afe96d3acc14 Mon Sep 17 00:00:00 2001
From: Adam Roberts <aroberts@uk.ibm.com>
Date: Fri, 4 Nov 2016 12:06:06 -0700
Subject: [PATCH 0033/1204] [SPARK-18197][CORE] Optimise AppendOnlyMap
 implementation

## What changes were proposed in this pull request?
This improvement works by using the fastest comparison test first and we observed a 1% throughput performance improvement on PageRank (HiBench large profile) with this change.

We used tprof and before the change in AppendOnlyMap.changeValue (where the optimisation occurs) this method was being used for 8053 profiling ticks representing 0.72% of the overall application time.

After this change we observed this method only occurring for 2786 ticks and for 0.25% of the overall time.

## How was this patch tested?
Existing unit tests and for performance we used HiBench large, profiling with tprof and IBM Healthcenter.

Author: Adam Roberts <aroberts@uk.ibm.com>

Closes #15714 from a-roberts/patch-9.

(cherry picked from commit a42d738c5de08bd395a7c220c487146173c6c163)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../apache/spark/util/collection/AppendOnlyMap.scala   | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala
index 6b74a29aceda9..bcb95b416dd25 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala
@@ -140,16 +140,16 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64)
     var i = 1
     while (true) {
       val curKey = data(2 * pos)
-      if (k.eq(curKey) || k.equals(curKey)) {
-        val newValue = updateFunc(true, data(2 * pos + 1).asInstanceOf[V])
-        data(2 * pos + 1) = newValue.asInstanceOf[AnyRef]
-        return newValue
-      } else if (curKey.eq(null)) {
+      if (curKey.eq(null)) {
         val newValue = updateFunc(false, null.asInstanceOf[V])
         data(2 * pos) = k
         data(2 * pos + 1) = newValue.asInstanceOf[AnyRef]
         incrementSize()
         return newValue
+      } else if (k.eq(curKey) || k.equals(curKey)) {
+        val newValue = updateFunc(true, data(2 * pos + 1).asInstanceOf[V])
+        data(2 * pos + 1) = newValue.asInstanceOf[AnyRef]
+        return newValue
       } else {
         val delta = i
         pos = (pos + delta) & mask

From e51978c3deaa91ae8115c8f2db1af692622a1616 Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@databricks.com>
Date: Fri, 4 Nov 2016 21:18:13 +0100
Subject: [PATCH 0034/1204] [SPARK-17337][SQL] Do not pushdown predicates
 through filters with  predicate subqueries

## What changes were proposed in this pull request?
The `PushDownPredicate` rule can create a wrong result if we try to push a filter containing a predicate subquery through a project when the subquery and the project share attributes (have the same source).

The current PR fixes this by making sure that we do not push down when there is a predicate subquery that outputs the same attributes as the filters new child plan.

## How was this patch tested?
Added a test to `SubquerySuite`. nsyca has done previous work this. I have taken test from his initial PR.

Author: Herman van Hovell <hvanhovell@databricks.com>

Closes #15761 from hvanhovell/SPARK-17337.

(cherry picked from commit 550cd56e8b6addb26efe3ce16976c9c34fa0c832)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../sql/catalyst/optimizer/Optimizer.scala    | 16 ++++++++++++-
 .../org/apache/spark/sql/SubquerySuite.scala  | 24 +++++++++++++++----
 2 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index b6ad5db74e3c8..6ba8b33b3fa74 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -689,7 +689,7 @@ object PushDownPredicate extends Rule[LogicalPlan] with PredicateHelper {
     // state and all the input rows processed before. In another word, the order of input rows
     // matters for non-deterministic expressions, while pushing down predicates changes the order.
     case filter @ Filter(condition, project @ Project(fields, grandChild))
-      if fields.forall(_.deterministic) =>
+      if fields.forall(_.deterministic) && canPushThroughCondition(grandChild, condition) =>
 
       // Create a map of Aliases to their values from the child projection.
       // e.g., 'SELECT a + b AS c, d ...' produces Map(c -> a + b).
@@ -830,6 +830,20 @@ object PushDownPredicate extends Rule[LogicalPlan] with PredicateHelper {
       filter
     }
   }
+
+  /**
+   * Check if we can safely push a filter through a projection, by making sure that predicate
+   * subqueries in the condition do not contain the same attributes as the plan they are moved
+   * into. This can happen when the plan and predicate subquery have the same source.
+   */
+  private def canPushThroughCondition(plan: LogicalPlan, condition: Expression): Boolean = {
+    val attributes = plan.outputSet
+    val matched = condition.find {
+      case PredicateSubquery(p, _, _, _) => p.outputSet.intersect(attributes).nonEmpty
+      case _ => false
+    }
+    matched.isEmpty
+  }
 }
 
 /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index eab45050f7e63..89348668340be 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -608,8 +608,8 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
             | where exists (select 1 from onerow t2 where t1.c1=t2.c1)
             | and   exists (select 1 from onerow LIMIT 1)""".stripMargin),
         Row(1) :: Nil)
-     }
-   }
+    }
+  }
 
   test("SPARK-16804: Correlated subqueries containing LIMIT - 2") {
     withTempView("onerow") {
@@ -623,6 +623,22 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
             |               from   (select 1 from onerow t2 LIMIT 1)
             |               where  t1.c1=t2.c1)""".stripMargin),
         Row(1) :: Nil)
-     }
-   }
+    }
+  }
+
+  test("SPARK-17337: Incorrect column resolution leads to incorrect results") {
+    withTempView("t1", "t2") {
+      Seq(1, 2).toDF("c1").createOrReplaceTempView("t1")
+      Seq(1).toDF("c2").createOrReplaceTempView("t2")
+
+      checkAnswer(
+        sql(
+          """
+            | select *
+            | from   (select t2.c2+1 as c3
+            |         from   t1 left join t2 on t1.c1=t2.c2) t3
+            | where  c3 not in (select c2 from t2)""".stripMargin),
+        Row(2) :: Nil)
+    }
+  }
 }

From 0a303a6948a3224070fc16516e0cc0a84df6df7f Mon Sep 17 00:00:00 2001
From: Eric Liang <ekl@databricks.com>
Date: Fri, 4 Nov 2016 15:54:28 -0700
Subject: [PATCH 0035/1204] [SPARK-18167] Re-enable the non-flaky parts of
 SQLQuerySuite

## What changes were proposed in this pull request?

It seems the proximate cause of the test failures is that `cast(str as decimal)` in derby will raise an exception instead of returning NULL. This is a problem since Hive sometimes inserts `__HIVE_DEFAULT_PARTITION__` entries into the partition table as documented here: https://github.com/apache/hive/blob/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java#L1034

Basically, when these special default partitions are present, partition pruning pushdown using the SQL-direct mode will fail due this cast exception. As commented on in `MetaStoreDirectSql.java` above, this is normally fine since Hive falls back to JDO pruning, however when the pruning predicate contains an unsupported operator such as `>`, that will fail as well.

The only remaining question is why this behavior is nondeterministic. We know that when the test flakes, retries do not help, therefore the cause must be environmental. The current best hypothesis is that some config is different between different jenkins runs, which is why this PR prints out the Spark SQL and Hive confs for the test. The hope is that by comparing the config state for failure vs success we can isolate the root cause of the flakiness.

**Update:** we could not isolate the issue. It does not seem to be due to configuration differences. As such, I'm going to enable the non-flaky parts of the test since we are fairly confident these issues only occur with Derby (which is not used in production).

## How was this patch tested?

N/A

Author: Eric Liang <ekl@databricks.com>

Closes #15725 from ericl/print-confs-out.

(cherry picked from commit 4cee2ce251110218e68c0f8f30363ec2f2498bea)
Signed-off-by: Yin Huai <yhuai@databricks.com>
---
 .../sql/hive/execution/SQLQuerySuite.scala    | 31 ++++++-------------
 1 file changed, 10 insertions(+), 21 deletions(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index ad70835d06d92..cc09aef32699b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -1569,27 +1569,16 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     ).map(i => Row(i._1, i._2, i._3, i._4)))
   }
 
-  ignore("SPARK-10562: partition by column with mixed case name") {
-    def runOnce() {
-      withTable("tbl10562") {
-        val df = Seq(2012 -> "a").toDF("Year", "val")
-        df.write.partitionBy("Year").saveAsTable("tbl10562")
-        checkAnswer(sql("SELECT year FROM tbl10562"), Row(2012))
-        checkAnswer(sql("SELECT Year FROM tbl10562"), Row(2012))
-        checkAnswer(sql("SELECT yEAr FROM tbl10562"), Row(2012))
-        checkAnswer(sql("SELECT val FROM tbl10562 WHERE Year > 2015"), Nil)
-        checkAnswer(sql("SELECT val FROM tbl10562 WHERE Year == 2012"), Row("a"))
-      }
-    }
-    try {
-      runOnce()
-    } catch {
-      case t: Throwable =>
-        // Retry to gather more test data. TODO(ekl) revert this once we deflake this test.
-        runOnce()
-        runOnce()
-        runOnce()
-        throw t
+  test("SPARK-10562: partition by column with mixed case name") {
+    withTable("tbl10562") {
+      val df = Seq(2012 -> "a").toDF("Year", "val")
+      df.write.partitionBy("Year").saveAsTable("tbl10562")
+      checkAnswer(sql("SELECT year FROM tbl10562"), Row(2012))
+      checkAnswer(sql("SELECT Year FROM tbl10562"), Row(2012))
+      checkAnswer(sql("SELECT yEAr FROM tbl10562"), Row(2012))
+// TODO(ekl) this is causing test flakes [SPARK-18167], but we think the issue is derby specific
+//      checkAnswer(sql("SELECT val FROM tbl10562 WHERE Year > 2015"), Nil)
+      checkAnswer(sql("SELECT val FROM tbl10562 WHERE Year == 2012"), Row("a"))
     }
   }
 

From 491db67a5fd067ef5e767ac4a07144722302d95a Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Fri, 4 Nov 2016 23:34:29 -0700
Subject: [PATCH 0036/1204] [SPARK-18189] [SQL] [Followup] Move test from
 ReplSuite to prevent java.lang.ClassCircularityError

closes #15774

(cherry picked from commit 0f7c9e84e0d00813bf56712097677add5657f19f)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../scala/org/apache/spark/repl/ReplSuite.scala | 17 -----------------
 .../org/apache/spark/sql/DatasetSuite.scala     | 12 ++++++++++++
 2 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index 96d2dfc2658b9..9262e938c2a60 100644
--- a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -473,21 +473,4 @@ class ReplSuite extends SparkFunSuite {
     assertDoesNotContain("AssertionError", output)
     assertDoesNotContain("Exception", output)
   }
-
-  test("SPARK-18189: Fix serialization issue in KeyValueGroupedDataset") {
-    val resultValue = 12345
-    val output = runInterpreter("local",
-      s"""
-         |val keyValueGrouped = Seq((1, 2), (3, 4)).toDS().groupByKey(_._1)
-         |val mapGroups = keyValueGrouped.mapGroups((k, v) => (k, 1))
-         |val broadcasted = sc.broadcast($resultValue)
-         |
-         |// Using broadcast triggers serialization issue in KeyValueGroupedDataset
-         |val dataset = mapGroups.map(_ => broadcasted.value)
-         |dataset.collect()
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains(s": Array[Int] = Array($resultValue, $resultValue)", output)
-  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 55f04878052aa..6fa7b0487732e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -923,6 +923,18 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
         .groupByKey(_.a).flatMapGroups { case (x, iter) => List[Int]() })
   }
 
+  test("SPARK-18189: Fix serialization issue in KeyValueGroupedDataset") {
+    val resultValue = 12345
+    val keyValueGrouped = Seq((1, 2), (3, 4)).toDS().groupByKey(_._1)
+    val mapGroups = keyValueGrouped.mapGroups((k, v) => (k, 1))
+    val broadcasted = spark.sparkContext.broadcast(resultValue)
+
+    // Using broadcast triggers serialization issue in KeyValueGroupedDataset
+    val dataset = mapGroups.map(_ => broadcasted.value)
+
+    assert(dataset.collect() sameElements Array(resultValue, resultValue))
+  }
+
   Seq(true, false).foreach { eager =>
     def testCheckpointing(testName: String)(f: => Unit): Unit = {
       test(s"Dataset.checkpoint() - $testName (eager = $eager)") {

From 707630147e51114aa90f58f375df43bb2b5f7fb4 Mon Sep 17 00:00:00 2001
From: Weiqing Yang <yangweiqing001@gmail.com>
Date: Fri, 4 Nov 2016 23:44:46 -0700
Subject: [PATCH 0037/1204] [SPARK-17710][FOLLOW UP] Add comments to state why
 'Utils.classForName' is not used

## What changes were proposed in this pull request?
Add comments.

## How was this patch tested?
Build passed.

Author: Weiqing Yang <yangweiqing001@gmail.com>

Closes #15776 from weiqingy/SPARK-17710.

(cherry picked from commit 8a9ca1924792d1a7c733bdfd757996b3ade0d63d)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 core/src/main/scala/org/apache/spark/util/Utils.scala | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 22c28fba2087e..1de66af632a8a 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2539,6 +2539,8 @@ private[util] object CallerContext extends Logging {
   val callerContextSupported: Boolean = {
     SparkHadoopUtil.get.conf.getBoolean("hadoop.caller.context.enabled", false) && {
       try {
+        // `Utils.classForName` will make `ReplSuite` fail with `ClassCircularityError` in
+        // master Maven build, so do not use it before resolving SPARK-17714.
         // scalastyle:off classforname
         Class.forName("org.apache.hadoop.ipc.CallerContext")
         Class.forName("org.apache.hadoop.ipc.CallerContext$Builder")
@@ -2604,6 +2606,8 @@ private[spark] class CallerContext(
   def setCurrentContext(): Unit = {
     if (CallerContext.callerContextSupported) {
       try {
+        // `Utils.classForName` will make `ReplSuite` fail with `ClassCircularityError` in
+        // master Maven build, so do not use it before resolving SPARK-17714.
         // scalastyle:off classforname
         val callerContext = Class.forName("org.apache.hadoop.ipc.CallerContext")
         val builder = Class.forName("org.apache.hadoop.ipc.CallerContext$Builder")

From 42386e796f6519d22092fba88a8c42cba6511d7c Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Sat, 5 Nov 2016 00:07:51 -0700
Subject: [PATCH 0038/1204] [SPARK-18260] Make from_json null safe

## What changes were proposed in this pull request?

`from_json` is currently not safe against `null` rows. This PR adds a fix and a regression test for it.

## How was this patch tested?

Regression test

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #15771 from brkyvz/json_fix.

(cherry picked from commit 6e2701815761d5870111cb56300e30d3059b39ed)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../spark/sql/catalyst/expressions/jsonExpressions.scala  | 4 +++-
 .../sql/catalyst/expressions/JsonExpressionsSuite.scala   | 8 ++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index e034735375274..89fe7c48c000e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -498,7 +498,9 @@ case class JsonToStruct(schema: StructType, options: Map[String, String], child:
   override def children: Seq[Expression] = child :: Nil
 
   override def eval(input: InternalRow): Any = {
-    try parser.parse(child.eval(input).toString).head catch {
+    val json = child.eval(input)
+    if (json == null) return null
+    try parser.parse(json.toString).head catch {
       case _: SparkSQLJsonProcessingException => null
     }
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
index f9db649bc2404..3bfa0bfda6209 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
@@ -344,6 +344,14 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     )
   }
 
+  test("from_json null input column") {
+    val schema = StructType(StructField("a", IntegerType) :: Nil)
+    checkEvaluation(
+      JsonToStruct(schema, Map.empty, Literal(null)),
+      null
+    )
+  }
+
   test("to_json") {
     val schema = StructType(StructField("a", IntegerType) :: Nil)
     val struct = Literal.create(create_row(1), schema)

From d3b6066900a16f5c4351ac9117d651fec9a84b51 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Sat, 5 Nov 2016 00:58:50 -0700
Subject: [PATCH 0039/1204] [SPARK-17183][SPARK-17983][SPARK-18101][SQL] put
 hive serde table schema to table properties like data source table

## What changes were proposed in this pull request?

For data source tables, we will put its table schema, partition columns, etc. to table properties, to work around some hive metastore issues, e.g. not case-preserving, bad decimal type support, etc.

We should also do this for hive serde tables, to reduce the difference between hive serde tables and data source tables, e.g. column names should be case preserving.
## How was this patch tested?

existing tests, and a new test in `HiveExternalCatalog`

Author: Wenchen Fan <wenchen@databricks.com>

Closes #14750 from cloud-fan/minor1.

(cherry picked from commit 95ec4e25bb65f37f80222ffe70a95993a9149f80)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../catalyst/catalog/ExternalCatalog.scala    |   8 +-
 .../catalyst/catalog/InMemoryCatalog.scala    |   6 -
 .../org/apache/spark/sql/types/DataType.scala |  24 ++
 .../catalog/ExternalCatalogSuite.scala        |  20 ++
 .../apache/spark/sql/DataFrameWriter.scala    |  10 +-
 .../spark/sql/execution/SparkSqlParser.scala  |   4 +-
 .../spark/sql/execution/SparkStrategies.scala |   6 +-
 .../spark/sql/execution/command/ddl.scala     |   4 +-
 .../sql/execution/datasources/rules.scala     |   5 +-
 .../spark/sql/hive/HiveExternalCatalog.scala  | 218 +++++++++++++-----
 .../input1-2-d3aa54d5436b7b59ff5c7091b7ca6145 |   4 +-
 .../input2-1-e0efeda558cd0194f4764a5735147b16 |   4 +-
 .../input2-2-aa9ab0598e0cb7a12c719f9b3d98dbfd |   4 +-
 .../input2-4-235f92683416fab031e6e7490487b15b |   6 +-
 ...columns-2-b74990316ec4245fd8a7011e684b39da |   6 +-
 .../hive/PartitionedTablePerfStatsSuite.scala |   9 +-
 .../sql/hive/execution/SQLQuerySuite.scala    |   4 +-
 17 files changed, 245 insertions(+), 97 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
index a5e02523d2889..14dd707fa0f1c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.catalog
 
-import org.apache.spark.sql.catalyst.analysis.{FunctionAlreadyExistsException, NoSuchDatabaseException, NoSuchFunctionException}
+import org.apache.spark.sql.catalyst.analysis.{FunctionAlreadyExistsException, NoSuchDatabaseException, NoSuchFunctionException, NoSuchTableException}
 import org.apache.spark.sql.catalyst.expressions.Expression
 
 
@@ -39,6 +39,12 @@ abstract class ExternalCatalog {
     }
   }
 
+  protected def requireTableExists(db: String, table: String): Unit = {
+    if (!tableExists(db, table)) {
+      throw new NoSuchTableException(db = db, table = table)
+    }
+  }
+
   protected def requireFunctionExists(db: String, funcName: String): Unit = {
     if (!functionExists(db, funcName)) {
       throw new NoSuchFunctionException(db = db, func = funcName)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
index ea675b76607d6..bc396880f22a3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
@@ -64,12 +64,6 @@ class InMemoryCatalog(
     catalog(db).tables(table).partitions.contains(spec)
   }
 
-  private def requireTableExists(db: String, table: String): Unit = {
-    if (!tableExists(db, table)) {
-      throw new NoSuchTableException(db = db, table = table)
-    }
-  }
-
   private def requireTableNotExists(db: String, table: String): Unit = {
     if (tableExists(db, table)) {
       throw new TableAlreadyExistsException(db = db, table = table)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
index 312585df1516b..2642d9395ba88 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
@@ -250,4 +250,28 @@ object DataType {
       case (fromDataType, toDataType) => fromDataType == toDataType
     }
   }
+
+  /**
+   * Compares two types, ignoring nullability of ArrayType, MapType, StructType, and ignoring case
+   * sensitivity of field names in StructType.
+   */
+  private[sql] def equalsIgnoreCaseAndNullability(from: DataType, to: DataType): Boolean = {
+    (from, to) match {
+      case (ArrayType(fromElement, _), ArrayType(toElement, _)) =>
+        equalsIgnoreCaseAndNullability(fromElement, toElement)
+
+      case (MapType(fromKey, fromValue, _), MapType(toKey, toValue, _)) =>
+        equalsIgnoreCaseAndNullability(fromKey, toKey) &&
+          equalsIgnoreCaseAndNullability(fromValue, toValue)
+
+      case (StructType(fromFields), StructType(toFields)) =>
+        fromFields.length == toFields.length &&
+          fromFields.zip(toFields).forall { case (l, r) =>
+            l.name.equalsIgnoreCase(r.name) &&
+              equalsIgnoreCaseAndNullability(l.dataType, r.dataType)
+          }
+
+      case (fromDataType, toDataType) => fromDataType == toDataType
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
index f283f4287c5bf..66f92d1b1b0af 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
@@ -270,6 +270,26 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
     assert(catalog.listTables("db2", "*1").toSet == Set("tbl1"))
   }
 
+  test("column names should be case-preserving and column nullability should be retained") {
+    val catalog = newBasicCatalog()
+    val tbl = CatalogTable(
+      identifier = TableIdentifier("tbl", Some("db1")),
+      tableType = CatalogTableType.MANAGED,
+      storage = storageFormat,
+      schema = new StructType()
+        .add("HelLo", "int", nullable = false)
+        .add("WoRLd", "int", nullable = true),
+      provider = Some("hive"),
+      partitionColumnNames = Seq("WoRLd"),
+      bucketSpec = Some(BucketSpec(4, Seq("HelLo"), Nil)))
+    catalog.createTable(tbl, ignoreIfExists = false)
+
+    val readBack = catalog.getTable("db1", "tbl")
+    assert(readBack.schema == tbl.schema)
+    assert(readBack.partitionColumnNames == tbl.partitionColumnNames)
+    assert(readBack.bucketSpec == tbl.bucketSpec)
+  }
+
   // --------------------------------------------------------------------------
   // Partitions
   // --------------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index f95362e292280..e0c89811ddbfa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -24,10 +24,10 @@ import scala.collection.JavaConverters._
 import org.apache.spark.annotation.InterfaceStability
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
-import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType}
-import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, OverwriteOptions, Union}
-import org.apache.spark.sql.execution.command.AlterTableRecoverPartitionsCommand
-import org.apache.spark.sql.execution.datasources.{CaseInsensitiveMap, CreateTable, DataSource, HadoopFsRelation}
+import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable, CatalogTableType}
+import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, OverwriteOptions}
+import org.apache.spark.sql.execution.command.{AlterTableRecoverPartitionsCommand, DDLUtils}
+import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource, HadoopFsRelation}
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -359,7 +359,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
   }
 
   private def saveAsTable(tableIdent: TableIdentifier): Unit = {
-    if (source.toLowerCase == "hive") {
+    if (source.toLowerCase == DDLUtils.HIVE_PROVIDER) {
       throw new AnalysisException("Cannot create hive serde table with saveAsTable API")
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 634ffde3543cb..b8be3d17ba444 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -331,7 +331,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
     }
     val options = Option(ctx.tablePropertyList).map(visitPropertyKeyValues).getOrElse(Map.empty)
     val provider = ctx.tableProvider.qualifiedName.getText
-    if (provider.toLowerCase == "hive") {
+    if (provider.toLowerCase == DDLUtils.HIVE_PROVIDER) {
       throw new AnalysisException("Cannot create hive serde table with CREATE TABLE USING")
     }
     val schema = Option(ctx.colTypeList()).map(createSchema)
@@ -1034,7 +1034,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
       tableType = tableType,
       storage = storage,
       schema = schema,
-      provider = Some("hive"),
+      provider = Some(DDLUtils.HIVE_PROVIDER),
       partitionColumnNames = partitionCols.map(_.name),
       properties = properties,
       comment = comment)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index 5412aca95dcf1..190fdd84343ee 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -415,7 +415,8 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
 
   object DDLStrategy extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
-      case CreateTable(tableDesc, mode, None) if tableDesc.provider.get == "hive" =>
+      case CreateTable(tableDesc, mode, None)
+        if tableDesc.provider.get == DDLUtils.HIVE_PROVIDER =>
         val cmd = CreateTableCommand(tableDesc, ifNotExists = mode == SaveMode.Ignore)
         ExecutedCommandExec(cmd) :: Nil
 
@@ -427,7 +428,8 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       // CREATE TABLE ... AS SELECT ... for hive serde table is handled in hive module, by rule
       // `CreateTables`
 
-      case CreateTable(tableDesc, mode, Some(query)) if tableDesc.provider.get != "hive" =>
+      case CreateTable(tableDesc, mode, Some(query))
+        if tableDesc.provider.get != DDLUtils.HIVE_PROVIDER =>
         val cmd =
           CreateDataSourceTableAsSelectCommand(
             tableDesc,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index b4d3ca1f37074..8500ab460a1b6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -687,8 +687,10 @@ case class AlterTableSetLocationCommand(
 
 
 object DDLUtils {
+  val HIVE_PROVIDER = "hive"
+
   def isDatasourceTable(table: CatalogTable): Boolean = {
-    table.provider.isDefined && table.provider.get != "hive"
+    table.provider.isDefined && table.provider.get != HIVE_PROVIDER
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index 4647b11af4dfb..5ba44ff9f5d9d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, Cast, RowOrd
 import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.{BaseRelation, InsertableRelation}
 import org.apache.spark.sql.types.{AtomicType, StructType}
@@ -127,7 +128,7 @@ case class AnalyzeCreateTable(sparkSession: SparkSession) extends Rule[LogicalPl
     checkDuplication(normalizedPartitionCols, "partition")
 
     if (schema.nonEmpty && normalizedPartitionCols.length == schema.length) {
-      if (tableDesc.provider.get == "hive") {
+      if (tableDesc.provider.get == DDLUtils.HIVE_PROVIDER) {
         // When we hit this branch, it means users didn't specify schema for the table to be
         // created, as we always include partition columns in table schema for hive serde tables.
         // The real schema will be inferred at hive metastore by hive serde, plus the given
@@ -292,7 +293,7 @@ object HiveOnlyCheck extends (LogicalPlan => Unit) {
   def apply(plan: LogicalPlan): Unit = {
     plan.foreach {
       case CreateTable(tableDesc, _, Some(_))
-          if tableDesc.provider.get == "hive" =>
+          if tableDesc.provider.get == DDLUtils.HIVE_PROVIDER =>
         throw new AnalysisException("Hive support is required to use CREATE Hive TABLE AS SELECT")
 
       case _ => // OK
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 64ba52672b1c8..b537061d0d221 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -95,8 +95,14 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     }
   }
 
-  private def requireTableExists(db: String, table: String): Unit = {
-    withClient { getTable(db, table) }
+  /**
+   * Get the raw table metadata from hive metastore directly. The raw table metadata may contains
+   * special data source properties and should not be exposed outside of `HiveExternalCatalog`. We
+   * should interpret these special data source properties and restore the original table metadata
+   * before returning it.
+   */
+  private def getRawTable(db: String, table: String): CatalogTable = withClient {
+    client.getTable(db, table)
   }
 
   /**
@@ -187,16 +193,32 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     if (tableExists(db, table) && !ignoreIfExists) {
       throw new TableAlreadyExistsException(db = db, table = table)
     }
-    // Before saving data source table metadata into Hive metastore, we should:
-    //  1. Put table metadata like provider, schema, etc. in table properties.
-    //  2. Check if this table is hive compatible
-    //    2.1  If it's not hive compatible, set location URI, schema, partition columns and bucket
-    //         spec to empty and save table metadata to Hive.
-    //    2.2  If it's hive compatible, set serde information in table metadata and try to save
-    //         it to Hive. If it fails, treat it as not hive compatible and go back to 2.1
-    if (DDLUtils.isDatasourceTable(tableDefinition)) {
+
+    if (tableDefinition.tableType == VIEW) {
+      client.createTable(tableDefinition, ignoreIfExists)
+    } else if (tableDefinition.provider.get == DDLUtils.HIVE_PROVIDER) {
+      // Here we follow data source tables and put table metadata like provider, schema, etc. in
+      // table properties, so that we can work around the Hive metastore issue about not case
+      // preserving and make Hive serde table support mixed-case column names.
+      val tableWithDataSourceProps = tableDefinition.copy(
+        properties = tableDefinition.properties ++ tableMetaToTableProps(tableDefinition))
+      client.createTable(tableWithDataSourceProps, ignoreIfExists)
+    } else {
+      // To work around some hive metastore issues, e.g. not case-preserving, bad decimal type
+      // support, no column nullability, etc., we should do some extra works before saving table
+      // metadata into Hive metastore:
+      //  1. Put table metadata like provider, schema, etc. in table properties.
+      //  2. Check if this table is hive compatible.
+      //    2.1  If it's not hive compatible, set location URI, schema, partition columns and bucket
+      //         spec to empty and save table metadata to Hive.
+      //    2.2  If it's hive compatible, set serde information in table metadata and try to save
+      //         it to Hive. If it fails, treat it as not hive compatible and go back to 2.1
       val tableProperties = tableMetaToTableProps(tableDefinition)
 
+      // Ideally we should not create a managed table with location, but Hive serde table can
+      // specify location for managed table. And in [[CreateDataSourceTableAsSelectCommand]] we have
+      // to create the table directory and write out data before we create this table, to avoid
+      // exposing a partial written table.
       val needDefaultTableLocation = tableDefinition.tableType == MANAGED &&
         tableDefinition.storage.locationUri.isEmpty
       val tableLocation = if (needDefaultTableLocation) {
@@ -304,8 +326,6 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
           logWarning(message)
           saveTableIntoHive(newSparkSQLSpecificMetastoreTable(), ignoreIfExists)
       }
-    } else {
-      client.createTable(tableDefinition, ignoreIfExists)
     }
   }
 
@@ -417,11 +437,17 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
   }
 
   override def renameTable(db: String, oldName: String, newName: String): Unit = withClient {
-    val rawTable = client.getTable(db, oldName)
-
-    val storageWithNewPath = if (rawTable.tableType == MANAGED) {
-      // If it's a managed table and we are renaming it, then the path option becomes inaccurate
-      // and we need to update it according to the new table name.
+    val rawTable = getRawTable(db, oldName)
+
+    // Note that Hive serde tables don't use path option in storage properties to store the value
+    // of table location, but use `locationUri` field to store it directly. And `locationUri` field
+    // will be updated automatically in Hive metastore by the `alterTable` call at the end of this
+    // method. Here we only update the path option if the path option already exists in storage
+    // properties, to avoid adding a unnecessary path option for Hive serde tables.
+    val hasPathOption = new CaseInsensitiveMap(rawTable.storage.properties).contains("path")
+    val storageWithNewPath = if (rawTable.tableType == MANAGED && hasPathOption) {
+      // If it's a managed table with path option and we are renaming it, then the path option
+      // becomes inaccurate and we need to update it according to the new table name.
       val newTablePath = defaultTablePath(TableIdentifier(newName, Some(db)))
       updateLocationInStorageProps(rawTable, Some(newTablePath))
     } else {
@@ -442,7 +468,11 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
   private def updateLocationInStorageProps(
       table: CatalogTable,
       newPath: Option[String]): CatalogStorageFormat = {
-    val propsWithoutPath = table.storage.properties.filterKeys(_.toLowerCase != "path")
+    // We can't use `filterKeys` here, as the map returned by `filterKeys` is not serializable,
+    // while `CatalogTable` should be serializable.
+    val propsWithoutPath = table.storage.properties.filter {
+      case (k, v) => k.toLowerCase != "path"
+    }
     table.storage.copy(properties = propsWithoutPath ++ newPath.map("path" -> _))
   }
 
@@ -475,18 +505,51 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       tableDefinition
     }
 
-    if (DDLUtils.isDatasourceTable(withStatsProps)) {
-      val oldTableDef = client.getTable(db, withStatsProps.identifier.table)
+    if (tableDefinition.tableType == VIEW) {
+      client.alterTable(withStatsProps)
+    } else {
+      val oldTableDef = getRawTable(db, withStatsProps.identifier.table)
 
-      val oldLocation = getLocationFromStorageProps(oldTableDef)
-      val newLocation = tableDefinition.storage.locationUri
-      // Only update the `locationUri` field if the location is really changed, because this table
-      // may be not Hive-compatible and can not set the `locationUri` field. We should respect the
-      // old `locationUri` even it's None.
-      val storageWithNewLocation = if (oldLocation == newLocation) {
-        oldTableDef.storage
+      val newStorage = if (tableDefinition.provider.get == DDLUtils.HIVE_PROVIDER) {
+        tableDefinition.storage
       } else {
-        updateLocationInStorageProps(oldTableDef, newLocation).copy(locationUri = newLocation)
+        // We can't alter the table storage of data source table directly for 2 reasons:
+        //   1. internally we use path option in storage properties to store the value of table
+        //      location, but the given `tableDefinition` is from outside and doesn't have the path
+        //      option, we need to add it manually.
+        //   2. this data source table may be created on a file, not a directory, then we can't set
+        //      the `locationUri` field and save it to Hive metastore, because Hive only allows
+        //      directory as table location.
+        //
+        // For example, an external data source table is created with a single file '/path/to/file'.
+        // Internally, we will add a path option with value '/path/to/file' to storage properties,
+        // and set the `locationUri` to a special value due to SPARK-15269(please see
+        // `saveTableIntoHive` for more details). When users try to get the table metadata back, we
+        // will restore the `locationUri` field from the path option and remove the path option from
+        // storage properties. When users try to alter the table storage, the given
+        // `tableDefinition` will have `locationUri` field with value `/path/to/file` and the path
+        // option is not set.
+        //
+        // Here we need 2 extra steps:
+        //   1. add path option to storage properties, to match the internal format, i.e. using path
+        //      option to store the value of table location.
+        //   2. set the `locationUri` field back to the old one from the existing table metadata,
+        //      if users don't want to alter the table location. This step is necessary as the
+        //      `locationUri` is not always same with the path option, e.g. in the above example
+        //      `locationUri` is a special value and we should respect it. Note that, if users
+        //       want to alter the table location to a file path, we will fail. This should be fixed
+        //       in the future.
+
+        val newLocation = tableDefinition.storage.locationUri
+        val storageWithPathOption = tableDefinition.storage.copy(
+          properties = tableDefinition.storage.properties ++ newLocation.map("path" -> _))
+
+        val oldLocation = getLocationFromStorageProps(oldTableDef)
+        if (oldLocation == newLocation) {
+          storageWithPathOption.copy(locationUri = oldTableDef.storage.locationUri)
+        } else {
+          storageWithPathOption
+        }
       }
 
       val partitionProviderProp = if (tableDefinition.tracksPartitionsInCatalog) {
@@ -498,23 +561,21 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       // Sets the `schema`, `partitionColumnNames` and `bucketSpec` from the old table definition,
       // to retain the spark specific format if it is. Also add old data source properties to table
       // properties, to retain the data source table format.
-      val oldDataSourceProps = oldTableDef.properties.filter(_._1.startsWith(SPARK_SQL_PREFIX))
+      val oldDataSourceProps = oldTableDef.properties.filter(_._1.startsWith(DATASOURCE_PREFIX))
       val newTableProps = oldDataSourceProps ++ withStatsProps.properties + partitionProviderProp
       val newDef = withStatsProps.copy(
-        storage = storageWithNewLocation,
+        storage = newStorage,
         schema = oldTableDef.schema,
         partitionColumnNames = oldTableDef.partitionColumnNames,
         bucketSpec = oldTableDef.bucketSpec,
         properties = newTableProps)
 
       client.alterTable(newDef)
-    } else {
-      client.alterTable(withStatsProps)
     }
   }
 
   override def getTable(db: String, table: String): CatalogTable = withClient {
-    restoreTableMetadata(client.getTable(db, table))
+    restoreTableMetadata(getRawTable(db, table))
   }
 
   override def getTableOption(db: String, table: String): Option[CatalogTable] = withClient {
@@ -536,28 +597,17 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     val tableWithSchema = if (table.tableType == VIEW) {
       table
     } else {
-      getProviderFromTableProperties(table).map { provider =>
-        assert(provider != TABLE_PARTITION_PROVIDER_CATALOG,
-          "Hive serde table should not save provider in table properties.")
-        // Internally we store the table location in storage properties with key "path" for data
-        // source tables. Here we set the table location to `locationUri` field and filter out the
-        // path option in storage properties, to avoid exposing this concept externally.
-        val storageWithLocation = {
-          val tableLocation = getLocationFromStorageProps(table)
-          updateLocationInStorageProps(table, None).copy(locationUri = tableLocation)
-        }
-        val partitionProvider = table.properties.get(TABLE_PARTITION_PROVIDER)
-
-        table.copy(
-          storage = storageWithLocation,
-          schema = getSchemaFromTableProperties(table),
-          provider = Some(provider),
-          partitionColumnNames = getPartitionColumnsFromTableProperties(table),
-          bucketSpec = getBucketSpecFromTableProperties(table),
-          tracksPartitionsInCatalog = partitionProvider == Some(TABLE_PARTITION_PROVIDER_CATALOG)
-        )
-      } getOrElse {
-        table.copy(provider = Some("hive"), tracksPartitionsInCatalog = true)
+      getProviderFromTableProperties(table) match {
+        // No provider in table properties, which means this table is created by Spark prior to 2.1,
+        // or is created at Hive side.
+        case None =>
+          table.copy(provider = Some(DDLUtils.HIVE_PROVIDER), tracksPartitionsInCatalog = true)
+
+        // This is a Hive serde table created by Spark 2.1 or higher versions.
+        case Some(DDLUtils.HIVE_PROVIDER) => restoreHiveSerdeTable(table)
+
+        // This is a regular data source table.
+        case Some(provider) => restoreDataSourceTable(table, provider)
       }
     }
 
@@ -583,6 +633,50 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     tableWithStats.copy(properties = getOriginalTableProperties(table))
   }
 
+  private def restoreHiveSerdeTable(table: CatalogTable): CatalogTable = {
+    val hiveTable = table.copy(
+      provider = Some(DDLUtils.HIVE_PROVIDER),
+      tracksPartitionsInCatalog = true)
+
+    val schemaFromTableProps = getSchemaFromTableProperties(table)
+    if (DataType.equalsIgnoreCaseAndNullability(schemaFromTableProps, table.schema)) {
+      hiveTable.copy(
+        schema = schemaFromTableProps,
+        partitionColumnNames = getPartitionColumnsFromTableProperties(table),
+        bucketSpec = getBucketSpecFromTableProperties(table))
+    } else {
+      // Hive metastore may change the table schema, e.g. schema inference. If the table
+      // schema we read back is different(ignore case and nullability) from the one in table
+      // properties which was written when creating table, we should respect the table schema
+      // from hive.
+      logWarning(s"The table schema given by Hive metastore(${table.schema.simpleString}) is " +
+        "different from the schema when this table was created by Spark SQL" +
+        s"(${schemaFromTableProps.simpleString}). We have to fall back to the table schema from " +
+        "Hive metastore which is not case preserving.")
+      hiveTable
+    }
+  }
+
+  private def restoreDataSourceTable(table: CatalogTable, provider: String): CatalogTable = {
+    // Internally we store the table location in storage properties with key "path" for data
+    // source tables. Here we set the table location to `locationUri` field and filter out the
+    // path option in storage properties, to avoid exposing this concept externally.
+    val storageWithLocation = {
+      val tableLocation = getLocationFromStorageProps(table)
+      // We pass None as `newPath` here, to remove the path option in storage properties.
+      updateLocationInStorageProps(table, newPath = None).copy(locationUri = tableLocation)
+    }
+    val partitionProvider = table.properties.get(TABLE_PARTITION_PROVIDER)
+
+    table.copy(
+      provider = Some(provider),
+      storage = storageWithLocation,
+      schema = getSchemaFromTableProperties(table),
+      partitionColumnNames = getPartitionColumnsFromTableProperties(table),
+      bucketSpec = getBucketSpecFromTableProperties(table),
+      tracksPartitionsInCatalog = partitionProvider == Some(TABLE_PARTITION_PROVIDER_CATALOG))
+  }
+
   override def tableExists(db: String, table: String): Boolean = withClient {
     client.tableExists(db, table)
   }
@@ -623,7 +717,11 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
 
     val orderedPartitionSpec = new util.LinkedHashMap[String, String]()
     getTable(db, table).partitionColumnNames.foreach { colName =>
-      orderedPartitionSpec.put(colName, partition(colName))
+      // Hive metastore is not case preserving and keeps partition columns with lower cased names,
+      // and Hive will validate the column names in partition spec to make sure they are partition
+      // columns. Here we Lowercase the column names before passing the partition spec to Hive
+      // client, to satisfy Hive.
+      orderedPartitionSpec.put(colName.toLowerCase, partition(colName))
     }
 
     client.loadPartition(
@@ -648,7 +746,11 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
 
     val orderedPartitionSpec = new util.LinkedHashMap[String, String]()
     getTable(db, table).partitionColumnNames.foreach { colName =>
-      orderedPartitionSpec.put(colName, partition(colName))
+      // Hive metastore is not case preserving and keeps partition columns with lower cased names,
+      // and Hive will validate the column names in partition spec to make sure they are partition
+      // columns. Here we Lowercase the column names before passing the partition spec to Hive
+      // client, to satisfy Hive.
+      orderedPartitionSpec.put(colName.toLowerCase, partition(colName))
     }
 
     client.loadDynamicPartitions(
@@ -754,7 +856,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       db: String,
       table: String,
       predicates: Seq[Expression]): Seq[CatalogTablePartition] = withClient {
-    val rawTable = client.getTable(db, table)
+    val rawTable = getRawTable(db, table)
     val catalogTable = restoreTableMetadata(rawTable)
     val partitionColumnNames = catalogTable.partitionColumnNames.toSet
     val nonPartitionPruningPredicates = predicates.filterNot {
diff --git a/sql/hive/src/test/resources/golden/input1-2-d3aa54d5436b7b59ff5c7091b7ca6145 b/sql/hive/src/test/resources/golden/input1-2-d3aa54d5436b7b59ff5c7091b7ca6145
index d3ffb995aff4b..93ba96ec8c159 100644
--- a/sql/hive/src/test/resources/golden/input1-2-d3aa54d5436b7b59ff5c7091b7ca6145
+++ b/sql/hive/src/test/resources/golden/input1-2-d3aa54d5436b7b59ff5c7091b7ca6145
@@ -1,2 +1,2 @@
-a                   	int                 	                    
-b                   	double              	                    
+A                   	int
+B                   	double
diff --git a/sql/hive/src/test/resources/golden/input2-1-e0efeda558cd0194f4764a5735147b16 b/sql/hive/src/test/resources/golden/input2-1-e0efeda558cd0194f4764a5735147b16
index d3ffb995aff4b..93ba96ec8c159 100644
--- a/sql/hive/src/test/resources/golden/input2-1-e0efeda558cd0194f4764a5735147b16
+++ b/sql/hive/src/test/resources/golden/input2-1-e0efeda558cd0194f4764a5735147b16
@@ -1,2 +1,2 @@
-a                   	int                 	                    
-b                   	double              	                    
+A                   	int
+B                   	double
diff --git a/sql/hive/src/test/resources/golden/input2-2-aa9ab0598e0cb7a12c719f9b3d98dbfd b/sql/hive/src/test/resources/golden/input2-2-aa9ab0598e0cb7a12c719f9b3d98dbfd
index d3ffb995aff4b..93ba96ec8c159 100644
--- a/sql/hive/src/test/resources/golden/input2-2-aa9ab0598e0cb7a12c719f9b3d98dbfd
+++ b/sql/hive/src/test/resources/golden/input2-2-aa9ab0598e0cb7a12c719f9b3d98dbfd
@@ -1,2 +1,2 @@
-a                   	int                 	                    
-b                   	double              	                    
+A                   	int
+B                   	double
diff --git a/sql/hive/src/test/resources/golden/input2-4-235f92683416fab031e6e7490487b15b b/sql/hive/src/test/resources/golden/input2-4-235f92683416fab031e6e7490487b15b
index 77eaef91c9c3f..d52fcf0ebbdb3 100644
--- a/sql/hive/src/test/resources/golden/input2-4-235f92683416fab031e6e7490487b15b
+++ b/sql/hive/src/test/resources/golden/input2-4-235f92683416fab031e6e7490487b15b
@@ -1,3 +1,3 @@
-a                   	array<int>          	                    
-b                   	double              	                    
-c                   	map<double,int>     	                    
+A                   	array<int>
+B                   	double
+C                   	map<double,int>
diff --git a/sql/hive/src/test/resources/golden/show_columns-2-b74990316ec4245fd8a7011e684b39da b/sql/hive/src/test/resources/golden/show_columns-2-b74990316ec4245fd8a7011e684b39da
index 70c14c3ef34ab..2f7168cba9307 100644
--- a/sql/hive/src/test/resources/golden/show_columns-2-b74990316ec4245fd8a7011e684b39da
+++ b/sql/hive/src/test/resources/golden/show_columns-2-b74990316ec4245fd8a7011e684b39da
@@ -1,3 +1,3 @@
-key                 
-value               
-ds                  
+KEY
+VALUE
+ds
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
index d8e31c4e39a5c..b41bc862e9bc5 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
@@ -105,12 +105,9 @@ class PartitionedTablePerfStatsSuite
         assert(df4.count() == 0)
         assert(df4.inputFiles.length == 0)
 
-        // TODO(ekl) enable for hive tables as well once SPARK-17983 is fixed
-        if (spec.isDatasourceTable) {
-          val df5 = spark.sql("select * from test where fieldOne = 4")
-          assert(df5.count() == 1)
-          assert(df5.inputFiles.length == 5)
-        }
+        val df5 = spark.sql("select * from test where fieldOne = 4")
+        assert(df5.count() == 1)
+        assert(df5.inputFiles.length == 5)
       }
     }
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index cc09aef32699b..28e5dffb11523 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -521,7 +521,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     val catalogTable =
       sessionState.catalog.getTableMetadata(TableIdentifier(tableName))
     relation match {
-      case LogicalRelation(r: HadoopFsRelation, _, Some(table)) =>
+      case LogicalRelation(r: HadoopFsRelation, _, _) =>
         if (!isDataSourceTable) {
           fail(
             s"${classOf[MetastoreRelation].getCanonicalName} is expected, but found " +
@@ -529,7 +529,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
         }
         userSpecifiedLocation match {
           case Some(location) =>
-            assert(table.storage.locationUri.get === location)
+            assert(r.options("path") === location)
           case None => // OK.
         }
         assert(catalogTable.provider.get === format)

From 6d292069d3229a29862fe83c23a82edcf2289e1f Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Sat, 5 Nov 2016 11:29:17 +0100
Subject: [PATCH 0040/1204] [SPARK-18287][SQL] Move hash expressions from
 misc.scala into hash.scala

## What changes were proposed in this pull request?
As the title suggests, this patch moves hash expressions from misc.scala into hash.scala, to make it easier to find the hash functions. I wanted to do this a while ago but decided to wait for the branch-2.1 cut so the chance of conflicts will be smaller.

## How was this patch tested?
Test cases were also moved out of MiscFunctionsSuite into HashExpressionsSuite.

Author: Reynold Xin <rxin@databricks.com>

Closes #15784 from rxin/SPARK-18287.

(cherry picked from commit e2648d35577c9664968cf6da5069277dbfb410d2)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../spark/sql/catalyst/expressions/hash.scala | 788 ++++++++++++++++++
 .../spark/sql/catalyst/expressions/misc.scala | 761 -----------------
 .../expressions/HashExpressionsSuite.scala    | 144 ++++
 .../expressions/MiscFunctionsSuite.scala      | 119 ---
 4 files changed, 932 insertions(+), 880 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
new file mode 100644
index 0000000000000..415ef4e4a37ec
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
@@ -0,0 +1,788 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import java.security.{MessageDigest, NoSuchAlgorithmException}
+import java.util.zip.CRC32
+
+import scala.annotation.tailrec
+
+import org.apache.commons.codec.digest.DigestUtils
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.expressions.codegen._
+import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.hash.Murmur3_x86_32
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
+import org.apache.spark.unsafe.Platform
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// This file defines all the expressions for hashing.
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+ * A function that calculates an MD5 128-bit checksum and returns it as a hex string
+ * For input of type [[BinaryType]]
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr) - Returns an MD5 128-bit checksum as a hex string of `expr`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('Spark');
+       8cde774d6f7333752ed72cacddb05126
+  """)
+case class Md5(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
+
+  override def dataType: DataType = StringType
+
+  override def inputTypes: Seq[DataType] = Seq(BinaryType)
+
+  protected override def nullSafeEval(input: Any): Any =
+    UTF8String.fromString(DigestUtils.md5Hex(input.asInstanceOf[Array[Byte]]))
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    defineCodeGen(ctx, ev, c =>
+      s"UTF8String.fromString(org.apache.commons.codec.digest.DigestUtils.md5Hex($c))")
+  }
+}
+
+/**
+ * A function that calculates the SHA-2 family of functions (SHA-224, SHA-256, SHA-384, and SHA-512)
+ * and returns it as a hex string. The first argument is the string or binary to be hashed. The
+ * second argument indicates the desired bit length of the result, which must have a value of 224,
+ * 256, 384, 512, or 0 (which is equivalent to 256). SHA-224 is supported starting from Java 8. If
+ * asking for an unsupported SHA function, the return value is NULL. If either argument is NULL or
+ * the hash length is not one of the permitted values, the return value is NULL.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_(expr, bitLength) - Returns a checksum of SHA-2 family as a hex string of `expr`.
+      SHA-224, SHA-256, SHA-384, and SHA-512 are supported. Bit length of 0 is equivalent to 256.
+  """,
+  extended = """
+    Examples:
+      > SELECT _FUNC_('Spark', 256);
+       529bc3b07127ecb7e53a4dcf1991d9152c24537d919178022b2c42657f79a26b
+  """)
+// scalastyle:on line.size.limit
+case class Sha2(left: Expression, right: Expression)
+  extends BinaryExpression with Serializable with ImplicitCastInputTypes {
+
+  override def dataType: DataType = StringType
+  override def nullable: Boolean = true
+
+  override def inputTypes: Seq[DataType] = Seq(BinaryType, IntegerType)
+
+  protected override def nullSafeEval(input1: Any, input2: Any): Any = {
+    val bitLength = input2.asInstanceOf[Int]
+    val input = input1.asInstanceOf[Array[Byte]]
+    bitLength match {
+      case 224 =>
+        // DigestUtils doesn't support SHA-224 now
+        try {
+          val md = MessageDigest.getInstance("SHA-224")
+          md.update(input)
+          UTF8String.fromBytes(md.digest())
+        } catch {
+          // SHA-224 is not supported on the system, return null
+          case noa: NoSuchAlgorithmException => null
+        }
+      case 256 | 0 =>
+        UTF8String.fromString(DigestUtils.sha256Hex(input))
+      case 384 =>
+        UTF8String.fromString(DigestUtils.sha384Hex(input))
+      case 512 =>
+        UTF8String.fromString(DigestUtils.sha512Hex(input))
+      case _ => null
+    }
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val digestUtils = "org.apache.commons.codec.digest.DigestUtils"
+    nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
+      s"""
+        if ($eval2 == 224) {
+          try {
+            java.security.MessageDigest md = java.security.MessageDigest.getInstance("SHA-224");
+            md.update($eval1);
+            ${ev.value} = UTF8String.fromBytes(md.digest());
+          } catch (java.security.NoSuchAlgorithmException e) {
+            ${ev.isNull} = true;
+          }
+        } else if ($eval2 == 256 || $eval2 == 0) {
+          ${ev.value} =
+            UTF8String.fromString($digestUtils.sha256Hex($eval1));
+        } else if ($eval2 == 384) {
+          ${ev.value} =
+            UTF8String.fromString($digestUtils.sha384Hex($eval1));
+        } else if ($eval2 == 512) {
+          ${ev.value} =
+            UTF8String.fromString($digestUtils.sha512Hex($eval1));
+        } else {
+          ${ev.isNull} = true;
+        }
+      """
+    })
+  }
+}
+
+/**
+ * A function that calculates a sha1 hash value and returns it as a hex string
+ * For input of type [[BinaryType]] or [[StringType]]
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr) - Returns a sha1 hash value as a hex string of the `expr`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('Spark');
+       85f5955f4b27a9a4c2aab6ffe5d7189fc298b92c
+  """)
+case class Sha1(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
+
+  override def dataType: DataType = StringType
+
+  override def inputTypes: Seq[DataType] = Seq(BinaryType)
+
+  protected override def nullSafeEval(input: Any): Any =
+    UTF8String.fromString(DigestUtils.sha1Hex(input.asInstanceOf[Array[Byte]]))
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    defineCodeGen(ctx, ev, c =>
+      s"UTF8String.fromString(org.apache.commons.codec.digest.DigestUtils.sha1Hex($c))"
+    )
+  }
+}
+
+/**
+ * A function that computes a cyclic redundancy check value and returns it as a bigint
+ * For input of type [[BinaryType]]
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr) - Returns a cyclic redundancy check value of the `expr` as a bigint.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('Spark');
+       1557323817
+  """)
+case class Crc32(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
+
+  override def dataType: DataType = LongType
+
+  override def inputTypes: Seq[DataType] = Seq(BinaryType)
+
+  protected override def nullSafeEval(input: Any): Any = {
+    val checksum = new CRC32
+    checksum.update(input.asInstanceOf[Array[Byte]], 0, input.asInstanceOf[Array[Byte]].length)
+    checksum.getValue
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val CRC32 = "java.util.zip.CRC32"
+    val checksum = ctx.freshName("checksum")
+    nullSafeCodeGen(ctx, ev, value => {
+      s"""
+        $CRC32 $checksum = new $CRC32();
+        $checksum.update($value, 0, $value.length);
+        ${ev.value} = $checksum.getValue();
+      """
+    })
+  }
+}
+
+
+/**
+ * A function that calculates hash value for a group of expressions.  Note that the `seed` argument
+ * is not exposed to users and should only be set inside spark SQL.
+ *
+ * The hash value for an expression depends on its type and seed:
+ *  - null:               seed
+ *  - boolean:            turn boolean into int, 1 for true, 0 for false, and then use murmur3 to
+ *                        hash this int with seed.
+ *  - byte, short, int:   use murmur3 to hash the input as int with seed.
+ *  - long:               use murmur3 to hash the long input with seed.
+ *  - float:              turn it into int: java.lang.Float.floatToIntBits(input), and hash it.
+ *  - double:             turn it into long: java.lang.Double.doubleToLongBits(input), and hash it.
+ *  - decimal:            if it's a small decimal, i.e. precision <= 18, turn it into long and hash
+ *                        it. Else, turn it into bytes and hash it.
+ *  - calendar interval:  hash `microseconds` first, and use the result as seed to hash `months`.
+ *  - binary:             use murmur3 to hash the bytes with seed.
+ *  - string:             get the bytes of string and hash it.
+ *  - array:              The `result` starts with seed, then use `result` as seed, recursively
+ *                        calculate hash value for each element, and assign the element hash value
+ *                        to `result`.
+ *  - map:                The `result` starts with seed, then use `result` as seed, recursively
+ *                        calculate hash value for each key-value, and assign the key-value hash
+ *                        value to `result`.
+ *  - struct:             The `result` starts with seed, then use `result` as seed, recursively
+ *                        calculate hash value for each field, and assign the field hash value to
+ *                        `result`.
+ *
+ * Finally we aggregate the hash values for each expression by the same way of struct.
+ */
+abstract class HashExpression[E] extends Expression {
+  /** Seed of the HashExpression. */
+  val seed: E
+
+  override def foldable: Boolean = children.forall(_.foldable)
+
+  override def nullable: Boolean = false
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    if (children.isEmpty) {
+      TypeCheckResult.TypeCheckFailure("function hash requires at least one argument")
+    } else {
+      TypeCheckResult.TypeCheckSuccess
+    }
+  }
+
+  override def eval(input: InternalRow = null): Any = {
+    var hash = seed
+    var i = 0
+    val len = children.length
+    while (i < len) {
+      hash = computeHash(children(i).eval(input), children(i).dataType, hash)
+      i += 1
+    }
+    hash
+  }
+
+  protected def computeHash(value: Any, dataType: DataType, seed: E): E
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    ev.isNull = "false"
+    val childrenHash = children.map { child =>
+      val childGen = child.genCode(ctx)
+      childGen.code + ctx.nullSafeExec(child.nullable, childGen.isNull) {
+        computeHash(childGen.value, child.dataType, ev.value, ctx)
+      }
+    }.mkString("\n")
+
+    ev.copy(code = s"""
+      ${ctx.javaType(dataType)} ${ev.value} = $seed;
+      $childrenHash""")
+  }
+
+  protected def nullSafeElementHash(
+      input: String,
+      index: String,
+      nullable: Boolean,
+      elementType: DataType,
+      result: String,
+      ctx: CodegenContext): String = {
+    val element = ctx.freshName("element")
+
+    ctx.nullSafeExec(nullable, s"$input.isNullAt($index)") {
+      s"""
+        final ${ctx.javaType(elementType)} $element = ${ctx.getValue(input, elementType, index)};
+        ${computeHash(element, elementType, result, ctx)}
+      """
+    }
+  }
+
+  protected def genHashInt(i: String, result: String): String =
+    s"$result = $hasherClassName.hashInt($i, $result);"
+
+  protected def genHashLong(l: String, result: String): String =
+    s"$result = $hasherClassName.hashLong($l, $result);"
+
+  protected def genHashBytes(b: String, result: String): String = {
+    val offset = "Platform.BYTE_ARRAY_OFFSET"
+    s"$result = $hasherClassName.hashUnsafeBytes($b, $offset, $b.length, $result);"
+  }
+
+  protected def genHashBoolean(input: String, result: String): String =
+    genHashInt(s"$input ? 1 : 0", result)
+
+  protected def genHashFloat(input: String, result: String): String =
+    genHashInt(s"Float.floatToIntBits($input)", result)
+
+  protected def genHashDouble(input: String, result: String): String =
+    genHashLong(s"Double.doubleToLongBits($input)", result)
+
+  protected def genHashDecimal(
+      ctx: CodegenContext,
+      d: DecimalType,
+      input: String,
+      result: String): String = {
+    if (d.precision <= Decimal.MAX_LONG_DIGITS) {
+      genHashLong(s"$input.toUnscaledLong()", result)
+    } else {
+      val bytes = ctx.freshName("bytes")
+      s"""
+            final byte[] $bytes = $input.toJavaBigDecimal().unscaledValue().toByteArray();
+            ${genHashBytes(bytes, result)}
+          """
+    }
+  }
+
+  protected def genHashCalendarInterval(input: String, result: String): String = {
+    val microsecondsHash = s"$hasherClassName.hashLong($input.microseconds, $result)"
+    s"$result = $hasherClassName.hashInt($input.months, $microsecondsHash);"
+  }
+
+  protected def genHashString(input: String, result: String): String = {
+    val baseObject = s"$input.getBaseObject()"
+    val baseOffset = s"$input.getBaseOffset()"
+    val numBytes = s"$input.numBytes()"
+    s"$result = $hasherClassName.hashUnsafeBytes($baseObject, $baseOffset, $numBytes, $result);"
+  }
+
+  protected def genHashForMap(
+      ctx: CodegenContext,
+      input: String,
+      result: String,
+      keyType: DataType,
+      valueType: DataType,
+      valueContainsNull: Boolean): String = {
+    val index = ctx.freshName("index")
+    val keys = ctx.freshName("keys")
+    val values = ctx.freshName("values")
+    s"""
+        final ArrayData $keys = $input.keyArray();
+        final ArrayData $values = $input.valueArray();
+        for (int $index = 0; $index < $input.numElements(); $index++) {
+          ${nullSafeElementHash(keys, index, false, keyType, result, ctx)}
+          ${nullSafeElementHash(values, index, valueContainsNull, valueType, result, ctx)}
+        }
+      """
+  }
+
+  protected def genHashForArray(
+      ctx: CodegenContext,
+      input: String,
+      result: String,
+      elementType: DataType,
+      containsNull: Boolean): String = {
+    val index = ctx.freshName("index")
+    s"""
+        for (int $index = 0; $index < $input.numElements(); $index++) {
+          ${nullSafeElementHash(input, index, containsNull, elementType, result, ctx)}
+        }
+      """
+  }
+
+  protected def genHashForStruct(
+      ctx: CodegenContext,
+      input: String,
+      result: String,
+      fields: Array[StructField]): String = {
+    fields.zipWithIndex.map { case (field, index) =>
+      nullSafeElementHash(input, index.toString, field.nullable, field.dataType, result, ctx)
+    }.mkString("\n")
+  }
+
+  @tailrec
+  private def computeHashWithTailRec(
+      input: String,
+      dataType: DataType,
+      result: String,
+      ctx: CodegenContext): String = dataType match {
+    case NullType => ""
+    case BooleanType => genHashBoolean(input, result)
+    case ByteType | ShortType | IntegerType | DateType => genHashInt(input, result)
+    case LongType | TimestampType => genHashLong(input, result)
+    case FloatType => genHashFloat(input, result)
+    case DoubleType => genHashDouble(input, result)
+    case d: DecimalType => genHashDecimal(ctx, d, input, result)
+    case CalendarIntervalType => genHashCalendarInterval(input, result)
+    case BinaryType => genHashBytes(input, result)
+    case StringType => genHashString(input, result)
+    case ArrayType(et, containsNull) => genHashForArray(ctx, input, result, et, containsNull)
+    case MapType(kt, vt, valueContainsNull) =>
+      genHashForMap(ctx, input, result, kt, vt, valueContainsNull)
+    case StructType(fields) => genHashForStruct(ctx, input, result, fields)
+    case udt: UserDefinedType[_] => computeHashWithTailRec(input, udt.sqlType, result, ctx)
+  }
+
+  protected def computeHash(
+      input: String,
+      dataType: DataType,
+      result: String,
+      ctx: CodegenContext): String = computeHashWithTailRec(input, dataType, result, ctx)
+
+  protected def hasherClassName: String
+}
+
+/**
+ * Base class for interpreted hash functions.
+ */
+abstract class InterpretedHashFunction {
+  protected def hashInt(i: Int, seed: Long): Long
+
+  protected def hashLong(l: Long, seed: Long): Long
+
+  protected def hashUnsafeBytes(base: AnyRef, offset: Long, length: Int, seed: Long): Long
+
+  def hash(value: Any, dataType: DataType, seed: Long): Long = {
+    value match {
+      case null => seed
+      case b: Boolean => hashInt(if (b) 1 else 0, seed)
+      case b: Byte => hashInt(b, seed)
+      case s: Short => hashInt(s, seed)
+      case i: Int => hashInt(i, seed)
+      case l: Long => hashLong(l, seed)
+      case f: Float => hashInt(java.lang.Float.floatToIntBits(f), seed)
+      case d: Double => hashLong(java.lang.Double.doubleToLongBits(d), seed)
+      case d: Decimal =>
+        val precision = dataType.asInstanceOf[DecimalType].precision
+        if (precision <= Decimal.MAX_LONG_DIGITS) {
+          hashLong(d.toUnscaledLong, seed)
+        } else {
+          val bytes = d.toJavaBigDecimal.unscaledValue().toByteArray
+          hashUnsafeBytes(bytes, Platform.BYTE_ARRAY_OFFSET, bytes.length, seed)
+        }
+      case c: CalendarInterval => hashInt(c.months, hashLong(c.microseconds, seed))
+      case a: Array[Byte] =>
+        hashUnsafeBytes(a, Platform.BYTE_ARRAY_OFFSET, a.length, seed)
+      case s: UTF8String =>
+        hashUnsafeBytes(s.getBaseObject, s.getBaseOffset, s.numBytes(), seed)
+
+      case array: ArrayData =>
+        val elementType = dataType match {
+          case udt: UserDefinedType[_] => udt.sqlType.asInstanceOf[ArrayType].elementType
+          case ArrayType(et, _) => et
+        }
+        var result = seed
+        var i = 0
+        while (i < array.numElements()) {
+          result = hash(array.get(i, elementType), elementType, result)
+          i += 1
+        }
+        result
+
+      case map: MapData =>
+        val (kt, vt) = dataType match {
+          case udt: UserDefinedType[_] =>
+            val mapType = udt.sqlType.asInstanceOf[MapType]
+            mapType.keyType -> mapType.valueType
+          case MapType(kt, vt, _) => kt -> vt
+        }
+        val keys = map.keyArray()
+        val values = map.valueArray()
+        var result = seed
+        var i = 0
+        while (i < map.numElements()) {
+          result = hash(keys.get(i, kt), kt, result)
+          result = hash(values.get(i, vt), vt, result)
+          i += 1
+        }
+        result
+
+      case struct: InternalRow =>
+        val types: Array[DataType] = dataType match {
+          case udt: UserDefinedType[_] =>
+            udt.sqlType.asInstanceOf[StructType].map(_.dataType).toArray
+          case StructType(fields) => fields.map(_.dataType)
+        }
+        var result = seed
+        var i = 0
+        val len = struct.numFields
+        while (i < len) {
+          result = hash(struct.get(i, types(i)), types(i), result)
+          i += 1
+        }
+        result
+    }
+  }
+}
+
+/**
+ * A MurMur3 Hash expression.
+ *
+ * We should use this hash function for both shuffle and bucket, so that we can guarantee shuffle
+ * and bucketing have same data distribution.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr1, expr2, ...) - Returns a hash value of the arguments.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_('Spark', array(123), 2);
+        -1321691492
+  """)
+case class Murmur3Hash(children: Seq[Expression], seed: Int) extends HashExpression[Int] {
+  def this(arguments: Seq[Expression]) = this(arguments, 42)
+
+  override def dataType: DataType = IntegerType
+
+  override def prettyName: String = "hash"
+
+  override protected def hasherClassName: String = classOf[Murmur3_x86_32].getName
+
+  override protected def computeHash(value: Any, dataType: DataType, seed: Int): Int = {
+    Murmur3HashFunction.hash(value, dataType, seed).toInt
+  }
+}
+
+object Murmur3HashFunction extends InterpretedHashFunction {
+  override protected def hashInt(i: Int, seed: Long): Long = {
+    Murmur3_x86_32.hashInt(i, seed.toInt)
+  }
+
+  override protected def hashLong(l: Long, seed: Long): Long = {
+    Murmur3_x86_32.hashLong(l, seed.toInt)
+  }
+
+  override protected def hashUnsafeBytes(base: AnyRef, offset: Long, len: Int, seed: Long): Long = {
+    Murmur3_x86_32.hashUnsafeBytes(base, offset, len, seed.toInt)
+  }
+}
+
+/**
+ * A xxHash64 64-bit hash expression.
+ */
+case class XxHash64(children: Seq[Expression], seed: Long) extends HashExpression[Long] {
+  def this(arguments: Seq[Expression]) = this(arguments, 42L)
+
+  override def dataType: DataType = LongType
+
+  override def prettyName: String = "xxHash"
+
+  override protected def hasherClassName: String = classOf[XXH64].getName
+
+  override protected def computeHash(value: Any, dataType: DataType, seed: Long): Long = {
+    XxHash64Function.hash(value, dataType, seed)
+  }
+}
+
+object XxHash64Function extends InterpretedHashFunction {
+  override protected def hashInt(i: Int, seed: Long): Long = XXH64.hashInt(i, seed)
+
+  override protected def hashLong(l: Long, seed: Long): Long = XXH64.hashLong(l, seed)
+
+  override protected def hashUnsafeBytes(base: AnyRef, offset: Long, len: Int, seed: Long): Long = {
+    XXH64.hashUnsafeBytes(base, offset, len, seed)
+  }
+}
+
+
+/**
+ * Simulates Hive's hashing function at
+ * org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils#hashcode() in Hive
+ *
+ * We should use this hash function for both shuffle and bucket of Hive tables, so that
+ * we can guarantee shuffle and bucketing have same data distribution
+ *
+ * TODO: Support Decimal and date related types
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr1, expr2, ...) - Returns a hash value of the arguments.")
+case class HiveHash(children: Seq[Expression]) extends HashExpression[Int] {
+  override val seed = 0
+
+  override def dataType: DataType = IntegerType
+
+  override def prettyName: String = "hive-hash"
+
+  override protected def hasherClassName: String = classOf[HiveHasher].getName
+
+  override protected def computeHash(value: Any, dataType: DataType, seed: Int): Int = {
+    HiveHashFunction.hash(value, dataType, seed).toInt
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    ev.isNull = "false"
+    val childHash = ctx.freshName("childHash")
+    val childrenHash = children.map { child =>
+      val childGen = child.genCode(ctx)
+      childGen.code + ctx.nullSafeExec(child.nullable, childGen.isNull) {
+        computeHash(childGen.value, child.dataType, childHash, ctx)
+      } + s"${ev.value} = (31 * ${ev.value}) + $childHash;"
+    }.mkString(s"int $childHash = 0;", s"\n$childHash = 0;\n", "")
+
+    ev.copy(code = s"""
+      ${ctx.javaType(dataType)} ${ev.value} = $seed;
+      $childrenHash""")
+  }
+
+  override def eval(input: InternalRow = null): Int = {
+    var hash = seed
+    var i = 0
+    val len = children.length
+    while (i < len) {
+      hash = (31 * hash) + computeHash(children(i).eval(input), children(i).dataType, hash)
+      i += 1
+    }
+    hash
+  }
+
+  override protected def genHashInt(i: String, result: String): String =
+    s"$result = $hasherClassName.hashInt($i);"
+
+  override protected def genHashLong(l: String, result: String): String =
+    s"$result = $hasherClassName.hashLong($l);"
+
+  override protected def genHashBytes(b: String, result: String): String =
+    s"$result = $hasherClassName.hashUnsafeBytes($b, Platform.BYTE_ARRAY_OFFSET, $b.length);"
+
+  override protected def genHashCalendarInterval(input: String, result: String): String = {
+    s"""
+        $result = (31 * $hasherClassName.hashInt($input.months)) +
+          $hasherClassName.hashLong($input.microseconds);"
+     """
+  }
+
+  override protected def genHashString(input: String, result: String): String = {
+    val baseObject = s"$input.getBaseObject()"
+    val baseOffset = s"$input.getBaseOffset()"
+    val numBytes = s"$input.numBytes()"
+    s"$result = $hasherClassName.hashUnsafeBytes($baseObject, $baseOffset, $numBytes);"
+  }
+
+  override protected def genHashForArray(
+      ctx: CodegenContext,
+      input: String,
+      result: String,
+      elementType: DataType,
+      containsNull: Boolean): String = {
+    val index = ctx.freshName("index")
+    val childResult = ctx.freshName("childResult")
+    s"""
+        int $childResult = 0;
+        for (int $index = 0; $index < $input.numElements(); $index++) {
+          $childResult = 0;
+          ${nullSafeElementHash(input, index, containsNull, elementType, childResult, ctx)};
+          $result = (31 * $result) + $childResult;
+        }
+      """
+  }
+
+  override protected def genHashForMap(
+      ctx: CodegenContext,
+      input: String,
+      result: String,
+      keyType: DataType,
+      valueType: DataType,
+      valueContainsNull: Boolean): String = {
+    val index = ctx.freshName("index")
+    val keys = ctx.freshName("keys")
+    val values = ctx.freshName("values")
+    val keyResult = ctx.freshName("keyResult")
+    val valueResult = ctx.freshName("valueResult")
+    s"""
+        final ArrayData $keys = $input.keyArray();
+        final ArrayData $values = $input.valueArray();
+        int $keyResult = 0;
+        int $valueResult = 0;
+        for (int $index = 0; $index < $input.numElements(); $index++) {
+          $keyResult = 0;
+          ${nullSafeElementHash(keys, index, false, keyType, keyResult, ctx)}
+          $valueResult = 0;
+          ${nullSafeElementHash(values, index, valueContainsNull, valueType, valueResult, ctx)}
+          $result += $keyResult ^ $valueResult;
+        }
+      """
+  }
+
+  override protected def genHashForStruct(
+      ctx: CodegenContext,
+      input: String,
+      result: String,
+      fields: Array[StructField]): String = {
+    val localResult = ctx.freshName("localResult")
+    val childResult = ctx.freshName("childResult")
+    fields.zipWithIndex.map { case (field, index) =>
+      s"""
+         $childResult = 0;
+         ${nullSafeElementHash(input, index.toString, field.nullable, field.dataType,
+           childResult, ctx)}
+         $localResult = (31 * $localResult) + $childResult;
+       """
+    }.mkString(
+      s"""
+         int $localResult = 0;
+         int $childResult = 0;
+       """,
+      "",
+      s"$result = (31 * $result) + $localResult;"
+    )
+  }
+}
+
+object HiveHashFunction extends InterpretedHashFunction {
+  override protected def hashInt(i: Int, seed: Long): Long = {
+    HiveHasher.hashInt(i)
+  }
+
+  override protected def hashLong(l: Long, seed: Long): Long = {
+    HiveHasher.hashLong(l)
+  }
+
+  override protected def hashUnsafeBytes(base: AnyRef, offset: Long, len: Int, seed: Long): Long = {
+    HiveHasher.hashUnsafeBytes(base, offset, len)
+  }
+
+  override def hash(value: Any, dataType: DataType, seed: Long): Long = {
+    value match {
+      case null => 0
+      case array: ArrayData =>
+        val elementType = dataType match {
+          case udt: UserDefinedType[_] => udt.sqlType.asInstanceOf[ArrayType].elementType
+          case ArrayType(et, _) => et
+        }
+
+        var result = 0
+        var i = 0
+        val length = array.numElements()
+        while (i < length) {
+          result = (31 * result) + hash(array.get(i, elementType), elementType, 0).toInt
+          i += 1
+        }
+        result
+
+      case map: MapData =>
+        val (kt, vt) = dataType match {
+          case udt: UserDefinedType[_] =>
+            val mapType = udt.sqlType.asInstanceOf[MapType]
+            mapType.keyType -> mapType.valueType
+          case MapType(_kt, _vt, _) => _kt -> _vt
+        }
+        val keys = map.keyArray()
+        val values = map.valueArray()
+
+        var result = 0
+        var i = 0
+        val length = map.numElements()
+        while (i < length) {
+          result += hash(keys.get(i, kt), kt, 0).toInt ^ hash(values.get(i, vt), vt, 0).toInt
+          i += 1
+        }
+        result
+
+      case struct: InternalRow =>
+        val types: Array[DataType] = dataType match {
+          case udt: UserDefinedType[_] =>
+            udt.sqlType.asInstanceOf[StructType].map(_.dataType).toArray
+          case StructType(fields) => fields.map(_.dataType)
+        }
+
+        var result = 0
+        var i = 0
+        val length = struct.numFields
+        while (i < length) {
+          result = (31 * result) + hash(struct.get(i, types(i)), types(i), seed + 1).toInt
+          i += 1
+        }
+        result
+
+      case _ => super.hash(value, dataType, seed)
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index 2ce10ef13215e..a874a1cf37086 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -17,529 +17,9 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import java.security.{MessageDigest, NoSuchAlgorithmException}
-import java.util.zip.CRC32
-
-import scala.annotation.tailrec
-
-import org.apache.commons.codec.digest.DigestUtils
-
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.expressions.codegen._
-import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.hash.Murmur3_x86_32
-import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
-import org.apache.spark.unsafe.Platform
-
-/**
- * A function that calculates an MD5 128-bit checksum and returns it as a hex string
- * For input of type [[BinaryType]]
- */
-@ExpressionDescription(
-  usage = "_FUNC_(expr) - Returns an MD5 128-bit checksum as a hex string of `expr`.",
-  extended = """
-    Examples:
-      > SELECT _FUNC_('Spark');
-       8cde774d6f7333752ed72cacddb05126
-  """)
-case class Md5(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
-
-  override def dataType: DataType = StringType
-
-  override def inputTypes: Seq[DataType] = Seq(BinaryType)
-
-  protected override def nullSafeEval(input: Any): Any =
-    UTF8String.fromString(DigestUtils.md5Hex(input.asInstanceOf[Array[Byte]]))
-
-  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    defineCodeGen(ctx, ev, c =>
-      s"UTF8String.fromString(org.apache.commons.codec.digest.DigestUtils.md5Hex($c))")
-  }
-}
-
-/**
- * A function that calculates the SHA-2 family of functions (SHA-224, SHA-256, SHA-384, and SHA-512)
- * and returns it as a hex string. The first argument is the string or binary to be hashed. The
- * second argument indicates the desired bit length of the result, which must have a value of 224,
- * 256, 384, 512, or 0 (which is equivalent to 256). SHA-224 is supported starting from Java 8. If
- * asking for an unsupported SHA function, the return value is NULL. If either argument is NULL or
- * the hash length is not one of the permitted values, the return value is NULL.
- */
-// scalastyle:off line.size.limit
-@ExpressionDescription(
-  usage = """
-    _FUNC_(expr, bitLength) - Returns a checksum of SHA-2 family as a hex string of `expr`.
-      SHA-224, SHA-256, SHA-384, and SHA-512 are supported. Bit length of 0 is equivalent to 256.
-  """,
-  extended = """
-    Examples:
-      > SELECT _FUNC_('Spark', 256);
-       529bc3b07127ecb7e53a4dcf1991d9152c24537d919178022b2c42657f79a26b
-  """)
-// scalastyle:on line.size.limit
-case class Sha2(left: Expression, right: Expression)
-  extends BinaryExpression with Serializable with ImplicitCastInputTypes {
-
-  override def dataType: DataType = StringType
-  override def nullable: Boolean = true
-
-  override def inputTypes: Seq[DataType] = Seq(BinaryType, IntegerType)
-
-  protected override def nullSafeEval(input1: Any, input2: Any): Any = {
-    val bitLength = input2.asInstanceOf[Int]
-    val input = input1.asInstanceOf[Array[Byte]]
-    bitLength match {
-      case 224 =>
-        // DigestUtils doesn't support SHA-224 now
-        try {
-          val md = MessageDigest.getInstance("SHA-224")
-          md.update(input)
-          UTF8String.fromBytes(md.digest())
-        } catch {
-          // SHA-224 is not supported on the system, return null
-          case noa: NoSuchAlgorithmException => null
-        }
-      case 256 | 0 =>
-        UTF8String.fromString(DigestUtils.sha256Hex(input))
-      case 384 =>
-        UTF8String.fromString(DigestUtils.sha384Hex(input))
-      case 512 =>
-        UTF8String.fromString(DigestUtils.sha512Hex(input))
-      case _ => null
-    }
-  }
-
-  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val digestUtils = "org.apache.commons.codec.digest.DigestUtils"
-    nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
-      s"""
-        if ($eval2 == 224) {
-          try {
-            java.security.MessageDigest md = java.security.MessageDigest.getInstance("SHA-224");
-            md.update($eval1);
-            ${ev.value} = UTF8String.fromBytes(md.digest());
-          } catch (java.security.NoSuchAlgorithmException e) {
-            ${ev.isNull} = true;
-          }
-        } else if ($eval2 == 256 || $eval2 == 0) {
-          ${ev.value} =
-            UTF8String.fromString($digestUtils.sha256Hex($eval1));
-        } else if ($eval2 == 384) {
-          ${ev.value} =
-            UTF8String.fromString($digestUtils.sha384Hex($eval1));
-        } else if ($eval2 == 512) {
-          ${ev.value} =
-            UTF8String.fromString($digestUtils.sha512Hex($eval1));
-        } else {
-          ${ev.isNull} = true;
-        }
-      """
-    })
-  }
-}
-
-/**
- * A function that calculates a sha1 hash value and returns it as a hex string
- * For input of type [[BinaryType]] or [[StringType]]
- */
-@ExpressionDescription(
-  usage = "_FUNC_(expr) - Returns a sha1 hash value as a hex string of the `expr`.",
-  extended = """
-    Examples:
-      > SELECT _FUNC_('Spark');
-       85f5955f4b27a9a4c2aab6ffe5d7189fc298b92c
-  """)
-case class Sha1(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
-
-  override def dataType: DataType = StringType
-
-  override def inputTypes: Seq[DataType] = Seq(BinaryType)
-
-  protected override def nullSafeEval(input: Any): Any =
-    UTF8String.fromString(DigestUtils.sha1Hex(input.asInstanceOf[Array[Byte]]))
-
-  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    defineCodeGen(ctx, ev, c =>
-      s"UTF8String.fromString(org.apache.commons.codec.digest.DigestUtils.sha1Hex($c))"
-    )
-  }
-}
-
-/**
- * A function that computes a cyclic redundancy check value and returns it as a bigint
- * For input of type [[BinaryType]]
- */
-@ExpressionDescription(
-  usage = "_FUNC_(expr) - Returns a cyclic redundancy check value of the `expr` as a bigint.",
-  extended = """
-    Examples:
-      > SELECT _FUNC_('Spark');
-       1557323817
-  """)
-case class Crc32(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
-
-  override def dataType: DataType = LongType
-
-  override def inputTypes: Seq[DataType] = Seq(BinaryType)
-
-  protected override def nullSafeEval(input: Any): Any = {
-    val checksum = new CRC32
-    checksum.update(input.asInstanceOf[Array[Byte]], 0, input.asInstanceOf[Array[Byte]].length)
-    checksum.getValue
-  }
-
-  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val CRC32 = "java.util.zip.CRC32"
-    val checksum = ctx.freshName("checksum")
-    nullSafeCodeGen(ctx, ev, value => {
-      s"""
-        $CRC32 $checksum = new $CRC32();
-        $checksum.update($value, 0, $value.length);
-        ${ev.value} = $checksum.getValue();
-      """
-    })
-  }
-}
-
-
-/**
- * A function that calculates hash value for a group of expressions.  Note that the `seed` argument
- * is not exposed to users and should only be set inside spark SQL.
- *
- * The hash value for an expression depends on its type and seed:
- *  - null:               seed
- *  - boolean:            turn boolean into int, 1 for true, 0 for false, and then use murmur3 to
- *                        hash this int with seed.
- *  - byte, short, int:   use murmur3 to hash the input as int with seed.
- *  - long:               use murmur3 to hash the long input with seed.
- *  - float:              turn it into int: java.lang.Float.floatToIntBits(input), and hash it.
- *  - double:             turn it into long: java.lang.Double.doubleToLongBits(input), and hash it.
- *  - decimal:            if it's a small decimal, i.e. precision <= 18, turn it into long and hash
- *                        it. Else, turn it into bytes and hash it.
- *  - calendar interval:  hash `microseconds` first, and use the result as seed to hash `months`.
- *  - binary:             use murmur3 to hash the bytes with seed.
- *  - string:             get the bytes of string and hash it.
- *  - array:              The `result` starts with seed, then use `result` as seed, recursively
- *                        calculate hash value for each element, and assign the element hash value
- *                        to `result`.
- *  - map:                The `result` starts with seed, then use `result` as seed, recursively
- *                        calculate hash value for each key-value, and assign the key-value hash
- *                        value to `result`.
- *  - struct:             The `result` starts with seed, then use `result` as seed, recursively
- *                        calculate hash value for each field, and assign the field hash value to
- *                        `result`.
- *
- * Finally we aggregate the hash values for each expression by the same way of struct.
- */
-abstract class HashExpression[E] extends Expression {
-  /** Seed of the HashExpression. */
-  val seed: E
-
-  override def foldable: Boolean = children.forall(_.foldable)
-
-  override def nullable: Boolean = false
-
-  override def checkInputDataTypes(): TypeCheckResult = {
-    if (children.isEmpty) {
-      TypeCheckResult.TypeCheckFailure("function hash requires at least one argument")
-    } else {
-      TypeCheckResult.TypeCheckSuccess
-    }
-  }
-
-  override def eval(input: InternalRow): Any = {
-    var hash = seed
-    var i = 0
-    val len = children.length
-    while (i < len) {
-      hash = computeHash(children(i).eval(input), children(i).dataType, hash)
-      i += 1
-    }
-    hash
-  }
-
-  protected def computeHash(value: Any, dataType: DataType, seed: E): E
-
-  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    ev.isNull = "false"
-    val childrenHash = children.map { child =>
-      val childGen = child.genCode(ctx)
-      childGen.code + ctx.nullSafeExec(child.nullable, childGen.isNull) {
-        computeHash(childGen.value, child.dataType, ev.value, ctx)
-      }
-    }.mkString("\n")
-
-    ev.copy(code = s"""
-      ${ctx.javaType(dataType)} ${ev.value} = $seed;
-      $childrenHash""")
-  }
-
-  protected def nullSafeElementHash(
-      input: String,
-      index: String,
-      nullable: Boolean,
-      elementType: DataType,
-      result: String,
-      ctx: CodegenContext): String = {
-    val element = ctx.freshName("element")
-
-    ctx.nullSafeExec(nullable, s"$input.isNullAt($index)") {
-      s"""
-        final ${ctx.javaType(elementType)} $element = ${ctx.getValue(input, elementType, index)};
-        ${computeHash(element, elementType, result, ctx)}
-      """
-    }
-  }
-
-  protected def genHashInt(i: String, result: String): String =
-    s"$result = $hasherClassName.hashInt($i, $result);"
-
-  protected def genHashLong(l: String, result: String): String =
-    s"$result = $hasherClassName.hashLong($l, $result);"
-
-  protected def genHashBytes(b: String, result: String): String = {
-    val offset = "Platform.BYTE_ARRAY_OFFSET"
-    s"$result = $hasherClassName.hashUnsafeBytes($b, $offset, $b.length, $result);"
-  }
-
-  protected def genHashBoolean(input: String, result: String): String =
-    genHashInt(s"$input ? 1 : 0", result)
-
-  protected def genHashFloat(input: String, result: String): String =
-    genHashInt(s"Float.floatToIntBits($input)", result)
-
-  protected def genHashDouble(input: String, result: String): String =
-    genHashLong(s"Double.doubleToLongBits($input)", result)
-
-  protected def genHashDecimal(
-      ctx: CodegenContext,
-      d: DecimalType,
-      input: String,
-      result: String): String = {
-    if (d.precision <= Decimal.MAX_LONG_DIGITS) {
-      genHashLong(s"$input.toUnscaledLong()", result)
-    } else {
-      val bytes = ctx.freshName("bytes")
-      s"""
-            final byte[] $bytes = $input.toJavaBigDecimal().unscaledValue().toByteArray();
-            ${genHashBytes(bytes, result)}
-          """
-    }
-  }
-
-  protected def genHashCalendarInterval(input: String, result: String): String = {
-    val microsecondsHash = s"$hasherClassName.hashLong($input.microseconds, $result)"
-    s"$result = $hasherClassName.hashInt($input.months, $microsecondsHash);"
-  }
-
-  protected def genHashString(input: String, result: String): String = {
-    val baseObject = s"$input.getBaseObject()"
-    val baseOffset = s"$input.getBaseOffset()"
-    val numBytes = s"$input.numBytes()"
-    s"$result = $hasherClassName.hashUnsafeBytes($baseObject, $baseOffset, $numBytes, $result);"
-  }
-
-  protected def genHashForMap(
-      ctx: CodegenContext,
-      input: String,
-      result: String,
-      keyType: DataType,
-      valueType: DataType,
-      valueContainsNull: Boolean): String = {
-    val index = ctx.freshName("index")
-    val keys = ctx.freshName("keys")
-    val values = ctx.freshName("values")
-    s"""
-        final ArrayData $keys = $input.keyArray();
-        final ArrayData $values = $input.valueArray();
-        for (int $index = 0; $index < $input.numElements(); $index++) {
-          ${nullSafeElementHash(keys, index, false, keyType, result, ctx)}
-          ${nullSafeElementHash(values, index, valueContainsNull, valueType, result, ctx)}
-        }
-      """
-  }
-
-  protected def genHashForArray(
-      ctx: CodegenContext,
-      input: String,
-      result: String,
-      elementType: DataType,
-      containsNull: Boolean): String = {
-    val index = ctx.freshName("index")
-    s"""
-        for (int $index = 0; $index < $input.numElements(); $index++) {
-          ${nullSafeElementHash(input, index, containsNull, elementType, result, ctx)}
-        }
-      """
-  }
-
-  protected def genHashForStruct(
-      ctx: CodegenContext,
-      input: String,
-      result: String,
-      fields: Array[StructField]): String = {
-    fields.zipWithIndex.map { case (field, index) =>
-      nullSafeElementHash(input, index.toString, field.nullable, field.dataType, result, ctx)
-    }.mkString("\n")
-  }
-
-  @tailrec
-  private def computeHashWithTailRec(
-      input: String,
-      dataType: DataType,
-      result: String,
-      ctx: CodegenContext): String = dataType match {
-    case NullType => ""
-    case BooleanType => genHashBoolean(input, result)
-    case ByteType | ShortType | IntegerType | DateType => genHashInt(input, result)
-    case LongType | TimestampType => genHashLong(input, result)
-    case FloatType => genHashFloat(input, result)
-    case DoubleType => genHashDouble(input, result)
-    case d: DecimalType => genHashDecimal(ctx, d, input, result)
-    case CalendarIntervalType => genHashCalendarInterval(input, result)
-    case BinaryType => genHashBytes(input, result)
-    case StringType => genHashString(input, result)
-    case ArrayType(et, containsNull) => genHashForArray(ctx, input, result, et, containsNull)
-    case MapType(kt, vt, valueContainsNull) =>
-      genHashForMap(ctx, input, result, kt, vt, valueContainsNull)
-    case StructType(fields) => genHashForStruct(ctx, input, result, fields)
-    case udt: UserDefinedType[_] => computeHashWithTailRec(input, udt.sqlType, result, ctx)
-  }
-
-  protected def computeHash(
-      input: String,
-      dataType: DataType,
-      result: String,
-      ctx: CodegenContext): String = computeHashWithTailRec(input, dataType, result, ctx)
-
-  protected def hasherClassName: String
-}
-
-/**
- * Base class for interpreted hash functions.
- */
-abstract class InterpretedHashFunction {
-  protected def hashInt(i: Int, seed: Long): Long
-
-  protected def hashLong(l: Long, seed: Long): Long
-
-  protected def hashUnsafeBytes(base: AnyRef, offset: Long, length: Int, seed: Long): Long
-
-  def hash(value: Any, dataType: DataType, seed: Long): Long = {
-    value match {
-      case null => seed
-      case b: Boolean => hashInt(if (b) 1 else 0, seed)
-      case b: Byte => hashInt(b, seed)
-      case s: Short => hashInt(s, seed)
-      case i: Int => hashInt(i, seed)
-      case l: Long => hashLong(l, seed)
-      case f: Float => hashInt(java.lang.Float.floatToIntBits(f), seed)
-      case d: Double => hashLong(java.lang.Double.doubleToLongBits(d), seed)
-      case d: Decimal =>
-        val precision = dataType.asInstanceOf[DecimalType].precision
-        if (precision <= Decimal.MAX_LONG_DIGITS) {
-          hashLong(d.toUnscaledLong, seed)
-        } else {
-          val bytes = d.toJavaBigDecimal.unscaledValue().toByteArray
-          hashUnsafeBytes(bytes, Platform.BYTE_ARRAY_OFFSET, bytes.length, seed)
-        }
-      case c: CalendarInterval => hashInt(c.months, hashLong(c.microseconds, seed))
-      case a: Array[Byte] =>
-        hashUnsafeBytes(a, Platform.BYTE_ARRAY_OFFSET, a.length, seed)
-      case s: UTF8String =>
-        hashUnsafeBytes(s.getBaseObject, s.getBaseOffset, s.numBytes(), seed)
-
-      case array: ArrayData =>
-        val elementType = dataType match {
-          case udt: UserDefinedType[_] => udt.sqlType.asInstanceOf[ArrayType].elementType
-          case ArrayType(et, _) => et
-        }
-        var result = seed
-        var i = 0
-        while (i < array.numElements()) {
-          result = hash(array.get(i, elementType), elementType, result)
-          i += 1
-        }
-        result
-
-      case map: MapData =>
-        val (kt, vt) = dataType match {
-          case udt: UserDefinedType[_] =>
-            val mapType = udt.sqlType.asInstanceOf[MapType]
-            mapType.keyType -> mapType.valueType
-          case MapType(kt, vt, _) => kt -> vt
-        }
-        val keys = map.keyArray()
-        val values = map.valueArray()
-        var result = seed
-        var i = 0
-        while (i < map.numElements()) {
-          result = hash(keys.get(i, kt), kt, result)
-          result = hash(values.get(i, vt), vt, result)
-          i += 1
-        }
-        result
-
-      case struct: InternalRow =>
-        val types: Array[DataType] = dataType match {
-          case udt: UserDefinedType[_] =>
-            udt.sqlType.asInstanceOf[StructType].map(_.dataType).toArray
-          case StructType(fields) => fields.map(_.dataType)
-        }
-        var result = seed
-        var i = 0
-        val len = struct.numFields
-        while (i < len) {
-          result = hash(struct.get(i, types(i)), types(i), result)
-          i += 1
-        }
-        result
-    }
-  }
-}
-
-/**
- * A MurMur3 Hash expression.
- *
- * We should use this hash function for both shuffle and bucket, so that we can guarantee shuffle
- * and bucketing have same data distribution.
- */
-@ExpressionDescription(
-  usage = "_FUNC_(expr1, expr2, ...) - Returns a hash value of the arguments.",
-  extended = """
-    Examples:
-      > SELECT _FUNC_('Spark', array(123), 2);
-        -1321691492
-  """)
-case class Murmur3Hash(children: Seq[Expression], seed: Int) extends HashExpression[Int] {
-  def this(arguments: Seq[Expression]) = this(arguments, 42)
-
-  override def dataType: DataType = IntegerType
-
-  override def prettyName: String = "hash"
-
-  override protected def hasherClassName: String = classOf[Murmur3_x86_32].getName
-
-  override protected def computeHash(value: Any, dataType: DataType, seed: Int): Int = {
-    Murmur3HashFunction.hash(value, dataType, seed).toInt
-  }
-}
-
-object Murmur3HashFunction extends InterpretedHashFunction {
-  override protected def hashInt(i: Int, seed: Long): Long = {
-    Murmur3_x86_32.hashInt(i, seed.toInt)
-  }
-
-  override protected def hashLong(l: Long, seed: Long): Long = {
-    Murmur3_x86_32.hashLong(l, seed.toInt)
-  }
-
-  override protected def hashUnsafeBytes(base: AnyRef, offset: Long, len: Int, seed: Long): Long = {
-    Murmur3_x86_32.hashUnsafeBytes(base, offset, len, seed.toInt)
-  }
-}
 
 /**
  * Print the result of an expression to stderr (used for debugging codegen).
@@ -608,33 +88,6 @@ case class AssertTrue(child: Expression) extends UnaryExpression with ImplicitCa
   override def sql: String = s"assert_true(${child.sql})"
 }
 
-/**
- * A xxHash64 64-bit hash expression.
- */
-case class XxHash64(children: Seq[Expression], seed: Long) extends HashExpression[Long] {
-  def this(arguments: Seq[Expression]) = this(arguments, 42L)
-
-  override def dataType: DataType = LongType
-
-  override def prettyName: String = "xxHash"
-
-  override protected def hasherClassName: String = classOf[XXH64].getName
-
-  override protected def computeHash(value: Any, dataType: DataType, seed: Long): Long = {
-    XxHash64Function.hash(value, dataType, seed)
-  }
-}
-
-object XxHash64Function extends InterpretedHashFunction {
-  override protected def hashInt(i: Int, seed: Long): Long = XXH64.hashInt(i, seed)
-
-  override protected def hashLong(l: Long, seed: Long): Long = XXH64.hashLong(l, seed)
-
-  override protected def hashUnsafeBytes(base: AnyRef, offset: Long, len: Int, seed: Long): Long = {
-    XXH64.hashUnsafeBytes(base, offset, len, seed)
-  }
-}
-
 /**
  * Returns the current database of the SessionCatalog.
  */
@@ -651,217 +104,3 @@ case class CurrentDatabase() extends LeafExpression with Unevaluable {
   override def nullable: Boolean = false
   override def prettyName: String = "current_database"
 }
-
-/**
- * Simulates Hive's hashing function at
- * org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils#hashcode() in Hive
- *
- * We should use this hash function for both shuffle and bucket of Hive tables, so that
- * we can guarantee shuffle and bucketing have same data distribution
- *
- * TODO: Support Decimal and date related types
- */
-@ExpressionDescription(
-  usage = "_FUNC_(expr1, expr2, ...) - Returns a hash value of the arguments.")
-case class HiveHash(children: Seq[Expression]) extends HashExpression[Int] {
-  override val seed = 0
-
-  override def dataType: DataType = IntegerType
-
-  override def prettyName: String = "hive-hash"
-
-  override protected def hasherClassName: String = classOf[HiveHasher].getName
-
-  override protected def computeHash(value: Any, dataType: DataType, seed: Int): Int = {
-    HiveHashFunction.hash(value, dataType, seed).toInt
-  }
-
-  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    ev.isNull = "false"
-    val childHash = ctx.freshName("childHash")
-    val childrenHash = children.map { child =>
-      val childGen = child.genCode(ctx)
-      childGen.code + ctx.nullSafeExec(child.nullable, childGen.isNull) {
-        computeHash(childGen.value, child.dataType, childHash, ctx)
-      } + s"${ev.value} = (31 * ${ev.value}) + $childHash;"
-    }.mkString(s"int $childHash = 0;", s"\n$childHash = 0;\n", "")
-
-    ev.copy(code = s"""
-      ${ctx.javaType(dataType)} ${ev.value} = $seed;
-      $childrenHash""")
-  }
-
-  override def eval(input: InternalRow): Int = {
-    var hash = seed
-    var i = 0
-    val len = children.length
-    while (i < len) {
-      hash = (31 * hash) + computeHash(children(i).eval(input), children(i).dataType, hash)
-      i += 1
-    }
-    hash
-  }
-
-  override protected def genHashInt(i: String, result: String): String =
-    s"$result = $hasherClassName.hashInt($i);"
-
-  override protected def genHashLong(l: String, result: String): String =
-    s"$result = $hasherClassName.hashLong($l);"
-
-  override protected def genHashBytes(b: String, result: String): String =
-    s"$result = $hasherClassName.hashUnsafeBytes($b, Platform.BYTE_ARRAY_OFFSET, $b.length);"
-
-  override protected def genHashCalendarInterval(input: String, result: String): String = {
-    s"""
-        $result = (31 * $hasherClassName.hashInt($input.months)) +
-          $hasherClassName.hashLong($input.microseconds);"
-     """
-  }
-
-  override protected def genHashString(input: String, result: String): String = {
-    val baseObject = s"$input.getBaseObject()"
-    val baseOffset = s"$input.getBaseOffset()"
-    val numBytes = s"$input.numBytes()"
-    s"$result = $hasherClassName.hashUnsafeBytes($baseObject, $baseOffset, $numBytes);"
-  }
-
-  override protected def genHashForArray(
-      ctx: CodegenContext,
-      input: String,
-      result: String,
-      elementType: DataType,
-      containsNull: Boolean): String = {
-    val index = ctx.freshName("index")
-    val childResult = ctx.freshName("childResult")
-    s"""
-        int $childResult = 0;
-        for (int $index = 0; $index < $input.numElements(); $index++) {
-          $childResult = 0;
-          ${nullSafeElementHash(input, index, containsNull, elementType, childResult, ctx)};
-          $result = (31 * $result) + $childResult;
-        }
-      """
-  }
-
-  override protected def genHashForMap(
-      ctx: CodegenContext,
-      input: String,
-      result: String,
-      keyType: DataType,
-      valueType: DataType,
-      valueContainsNull: Boolean): String = {
-    val index = ctx.freshName("index")
-    val keys = ctx.freshName("keys")
-    val values = ctx.freshName("values")
-    val keyResult = ctx.freshName("keyResult")
-    val valueResult = ctx.freshName("valueResult")
-    s"""
-        final ArrayData $keys = $input.keyArray();
-        final ArrayData $values = $input.valueArray();
-        int $keyResult = 0;
-        int $valueResult = 0;
-        for (int $index = 0; $index < $input.numElements(); $index++) {
-          $keyResult = 0;
-          ${nullSafeElementHash(keys, index, false, keyType, keyResult, ctx)}
-          $valueResult = 0;
-          ${nullSafeElementHash(values, index, valueContainsNull, valueType, valueResult, ctx)}
-          $result += $keyResult ^ $valueResult;
-        }
-      """
-  }
-
-  override protected def genHashForStruct(
-      ctx: CodegenContext,
-      input: String,
-      result: String,
-      fields: Array[StructField]): String = {
-    val localResult = ctx.freshName("localResult")
-    val childResult = ctx.freshName("childResult")
-    fields.zipWithIndex.map { case (field, index) =>
-      s"""
-         $childResult = 0;
-         ${nullSafeElementHash(input, index.toString, field.nullable, field.dataType,
-           childResult, ctx)}
-         $localResult = (31 * $localResult) + $childResult;
-       """
-    }.mkString(
-      s"""
-         int $localResult = 0;
-         int $childResult = 0;
-       """,
-      "",
-      s"$result = (31 * $result) + $localResult;"
-    )
-  }
-}
-
-object HiveHashFunction extends InterpretedHashFunction {
-  override protected def hashInt(i: Int, seed: Long): Long = {
-    HiveHasher.hashInt(i)
-  }
-
-  override protected def hashLong(l: Long, seed: Long): Long = {
-    HiveHasher.hashLong(l)
-  }
-
-  override protected def hashUnsafeBytes(base: AnyRef, offset: Long, len: Int, seed: Long): Long = {
-    HiveHasher.hashUnsafeBytes(base, offset, len)
-  }
-
-  override def hash(value: Any, dataType: DataType, seed: Long): Long = {
-    value match {
-      case null => 0
-      case array: ArrayData =>
-        val elementType = dataType match {
-          case udt: UserDefinedType[_] => udt.sqlType.asInstanceOf[ArrayType].elementType
-          case ArrayType(et, _) => et
-        }
-
-        var result = 0
-        var i = 0
-        val length = array.numElements()
-        while (i < length) {
-          result = (31 * result) + hash(array.get(i, elementType), elementType, 0).toInt
-          i += 1
-        }
-        result
-
-      case map: MapData =>
-        val (kt, vt) = dataType match {
-          case udt: UserDefinedType[_] =>
-            val mapType = udt.sqlType.asInstanceOf[MapType]
-            mapType.keyType -> mapType.valueType
-          case MapType(_kt, _vt, _) => _kt -> _vt
-        }
-        val keys = map.keyArray()
-        val values = map.valueArray()
-
-        var result = 0
-        var i = 0
-        val length = map.numElements()
-        while (i < length) {
-          result += hash(keys.get(i, kt), kt, 0).toInt ^ hash(values.get(i, vt), vt, 0).toInt
-          i += 1
-        }
-        result
-
-      case struct: InternalRow =>
-        val types: Array[DataType] = dataType match {
-          case udt: UserDefinedType[_] =>
-            udt.sqlType.asInstanceOf[StructType].map(_.dataType).toArray
-          case StructType(fields) => fields.map(_.dataType)
-        }
-
-        var result = 0
-        var i = 0
-        val length = struct.numFields
-        while (i < length) {
-          result = (31 * result) + hash(struct.get(i, types(i)), types(i), seed + 1).toInt
-          i += 1
-        }
-        result
-
-      case _ => super.hash(value, dataType, seed)
-    }
-  }
-}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
new file mode 100644
index 0000000000000..c714bc03dc0d5
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import java.nio.charset.StandardCharsets
+
+import org.apache.commons.codec.digest.DigestUtils
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.{RandomDataGenerator, Row}
+import org.apache.spark.sql.catalyst.encoders.{ExamplePointUDT, RowEncoder}
+import org.apache.spark.sql.types._
+
+class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
+
+  test("md5") {
+    checkEvaluation(Md5(Literal("ABC".getBytes(StandardCharsets.UTF_8))),
+      "902fbdd2b1df0c4f70b4a5d23525e932")
+    checkEvaluation(Md5(Literal.create(Array[Byte](1, 2, 3, 4, 5, 6), BinaryType)),
+      "6ac1e56bc78f031059be7be854522c4c")
+    checkEvaluation(Md5(Literal.create(null, BinaryType)), null)
+    checkConsistencyBetweenInterpretedAndCodegen(Md5, BinaryType)
+  }
+
+  test("sha1") {
+    checkEvaluation(Sha1(Literal("ABC".getBytes(StandardCharsets.UTF_8))),
+      "3c01bdbb26f358bab27f267924aa2c9a03fcfdb8")
+    checkEvaluation(Sha1(Literal.create(Array[Byte](1, 2, 3, 4, 5, 6), BinaryType)),
+      "5d211bad8f4ee70e16c7d343a838fc344a1ed961")
+    checkEvaluation(Sha1(Literal.create(null, BinaryType)), null)
+    checkEvaluation(Sha1(Literal("".getBytes(StandardCharsets.UTF_8))),
+      "da39a3ee5e6b4b0d3255bfef95601890afd80709")
+    checkConsistencyBetweenInterpretedAndCodegen(Sha1, BinaryType)
+  }
+
+  test("sha2") {
+    checkEvaluation(Sha2(Literal("ABC".getBytes(StandardCharsets.UTF_8)), Literal(256)),
+      DigestUtils.sha256Hex("ABC"))
+    checkEvaluation(Sha2(Literal.create(Array[Byte](1, 2, 3, 4, 5, 6), BinaryType), Literal(384)),
+      DigestUtils.sha384Hex(Array[Byte](1, 2, 3, 4, 5, 6)))
+    // unsupported bit length
+    checkEvaluation(Sha2(Literal.create(null, BinaryType), Literal(1024)), null)
+    checkEvaluation(Sha2(Literal.create(null, BinaryType), Literal(512)), null)
+    checkEvaluation(Sha2(Literal("ABC".getBytes(StandardCharsets.UTF_8)),
+      Literal.create(null, IntegerType)), null)
+    checkEvaluation(Sha2(Literal.create(null, BinaryType), Literal.create(null, IntegerType)), null)
+  }
+
+  test("crc32") {
+    checkEvaluation(Crc32(Literal("ABC".getBytes(StandardCharsets.UTF_8))), 2743272264L)
+    checkEvaluation(Crc32(Literal.create(Array[Byte](1, 2, 3, 4, 5, 6), BinaryType)),
+      2180413220L)
+    checkEvaluation(Crc32(Literal.create(null, BinaryType)), null)
+    checkConsistencyBetweenInterpretedAndCodegen(Crc32, BinaryType)
+  }
+
+  private val structOfString = new StructType().add("str", StringType)
+  private val structOfUDT = new StructType().add("udt", new ExamplePointUDT, false)
+  private val arrayOfString = ArrayType(StringType)
+  private val arrayOfNull = ArrayType(NullType)
+  private val mapOfString = MapType(StringType, StringType)
+  private val arrayOfUDT = ArrayType(new ExamplePointUDT, false)
+
+  testHash(
+    new StructType()
+      .add("null", NullType)
+      .add("boolean", BooleanType)
+      .add("byte", ByteType)
+      .add("short", ShortType)
+      .add("int", IntegerType)
+      .add("long", LongType)
+      .add("float", FloatType)
+      .add("double", DoubleType)
+      .add("bigDecimal", DecimalType.SYSTEM_DEFAULT)
+      .add("smallDecimal", DecimalType.USER_DEFAULT)
+      .add("string", StringType)
+      .add("binary", BinaryType)
+      .add("date", DateType)
+      .add("timestamp", TimestampType)
+      .add("udt", new ExamplePointUDT))
+
+  testHash(
+    new StructType()
+      .add("arrayOfNull", arrayOfNull)
+      .add("arrayOfString", arrayOfString)
+      .add("arrayOfArrayOfString", ArrayType(arrayOfString))
+      .add("arrayOfArrayOfInt", ArrayType(ArrayType(IntegerType)))
+      .add("arrayOfMap", ArrayType(mapOfString))
+      .add("arrayOfStruct", ArrayType(structOfString))
+      .add("arrayOfUDT", arrayOfUDT))
+
+  testHash(
+    new StructType()
+      .add("mapOfIntAndString", MapType(IntegerType, StringType))
+      .add("mapOfStringAndArray", MapType(StringType, arrayOfString))
+      .add("mapOfArrayAndInt", MapType(arrayOfString, IntegerType))
+      .add("mapOfArray", MapType(arrayOfString, arrayOfString))
+      .add("mapOfStringAndStruct", MapType(StringType, structOfString))
+      .add("mapOfStructAndString", MapType(structOfString, StringType))
+      .add("mapOfStruct", MapType(structOfString, structOfString)))
+
+  testHash(
+    new StructType()
+      .add("structOfString", structOfString)
+      .add("structOfStructOfString", new StructType().add("struct", structOfString))
+      .add("structOfArray", new StructType().add("array", arrayOfString))
+      .add("structOfMap", new StructType().add("map", mapOfString))
+      .add("structOfArrayAndMap",
+        new StructType().add("array", arrayOfString).add("map", mapOfString))
+      .add("structOfUDT", structOfUDT))
+
+  private def testHash(inputSchema: StructType): Unit = {
+    val inputGenerator = RandomDataGenerator.forType(inputSchema, nullable = false).get
+    val encoder = RowEncoder(inputSchema)
+    val seed = scala.util.Random.nextInt()
+    test(s"murmur3/xxHash64/hive hash: ${inputSchema.simpleString}") {
+      for (_ <- 1 to 10) {
+        val input = encoder.toRow(inputGenerator.apply().asInstanceOf[Row]).asInstanceOf[UnsafeRow]
+        val literals = input.toSeq(inputSchema).zip(inputSchema.map(_.dataType)).map {
+          case (value, dt) => Literal.create(value, dt)
+        }
+        // Only test the interpreted version has same result with codegen version.
+        checkEvaluation(Murmur3Hash(literals, seed), Murmur3Hash(literals, seed).eval())
+        checkEvaluation(XxHash64(literals, seed), XxHash64(literals, seed).eval())
+        checkEvaluation(HiveHash(literals), HiveHash(literals).eval())
+      }
+    }
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala
index 13ce588462028..ed82efe7be2e8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala
@@ -17,58 +17,11 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import java.nio.charset.StandardCharsets
-
-import org.apache.commons.codec.digest.DigestUtils
-
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.{RandomDataGenerator, Row}
-import org.apache.spark.sql.catalyst.encoders.{ExamplePointUDT, RowEncoder}
 import org.apache.spark.sql.types._
 
 class MiscFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
-  test("md5") {
-    checkEvaluation(Md5(Literal("ABC".getBytes(StandardCharsets.UTF_8))),
-      "902fbdd2b1df0c4f70b4a5d23525e932")
-    checkEvaluation(Md5(Literal.create(Array[Byte](1, 2, 3, 4, 5, 6), BinaryType)),
-      "6ac1e56bc78f031059be7be854522c4c")
-    checkEvaluation(Md5(Literal.create(null, BinaryType)), null)
-    checkConsistencyBetweenInterpretedAndCodegen(Md5, BinaryType)
-  }
-
-  test("sha1") {
-    checkEvaluation(Sha1(Literal("ABC".getBytes(StandardCharsets.UTF_8))),
-      "3c01bdbb26f358bab27f267924aa2c9a03fcfdb8")
-    checkEvaluation(Sha1(Literal.create(Array[Byte](1, 2, 3, 4, 5, 6), BinaryType)),
-      "5d211bad8f4ee70e16c7d343a838fc344a1ed961")
-    checkEvaluation(Sha1(Literal.create(null, BinaryType)), null)
-    checkEvaluation(Sha1(Literal("".getBytes(StandardCharsets.UTF_8))),
-      "da39a3ee5e6b4b0d3255bfef95601890afd80709")
-    checkConsistencyBetweenInterpretedAndCodegen(Sha1, BinaryType)
-  }
-
-  test("sha2") {
-    checkEvaluation(Sha2(Literal("ABC".getBytes(StandardCharsets.UTF_8)), Literal(256)),
-      DigestUtils.sha256Hex("ABC"))
-    checkEvaluation(Sha2(Literal.create(Array[Byte](1, 2, 3, 4, 5, 6), BinaryType), Literal(384)),
-      DigestUtils.sha384Hex(Array[Byte](1, 2, 3, 4, 5, 6)))
-    // unsupported bit length
-    checkEvaluation(Sha2(Literal.create(null, BinaryType), Literal(1024)), null)
-    checkEvaluation(Sha2(Literal.create(null, BinaryType), Literal(512)), null)
-    checkEvaluation(Sha2(Literal("ABC".getBytes(StandardCharsets.UTF_8)),
-      Literal.create(null, IntegerType)), null)
-    checkEvaluation(Sha2(Literal.create(null, BinaryType), Literal.create(null, IntegerType)), null)
-  }
-
-  test("crc32") {
-    checkEvaluation(Crc32(Literal("ABC".getBytes(StandardCharsets.UTF_8))), 2743272264L)
-    checkEvaluation(Crc32(Literal.create(Array[Byte](1, 2, 3, 4, 5, 6), BinaryType)),
-      2180413220L)
-    checkEvaluation(Crc32(Literal.create(null, BinaryType)), null)
-    checkConsistencyBetweenInterpretedAndCodegen(Crc32, BinaryType)
-  }
-
   test("assert_true") {
     intercept[RuntimeException] {
       checkEvaluation(AssertTrue(Literal.create(false, BooleanType)), null)
@@ -86,76 +39,4 @@ class MiscFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(AssertTrue(Cast(Literal(1), BooleanType)), null)
   }
 
-  private val structOfString = new StructType().add("str", StringType)
-  private val structOfUDT = new StructType().add("udt", new ExamplePointUDT, false)
-  private val arrayOfString = ArrayType(StringType)
-  private val arrayOfNull = ArrayType(NullType)
-  private val mapOfString = MapType(StringType, StringType)
-  private val arrayOfUDT = ArrayType(new ExamplePointUDT, false)
-
-  testHash(
-    new StructType()
-      .add("null", NullType)
-      .add("boolean", BooleanType)
-      .add("byte", ByteType)
-      .add("short", ShortType)
-      .add("int", IntegerType)
-      .add("long", LongType)
-      .add("float", FloatType)
-      .add("double", DoubleType)
-      .add("bigDecimal", DecimalType.SYSTEM_DEFAULT)
-      .add("smallDecimal", DecimalType.USER_DEFAULT)
-      .add("string", StringType)
-      .add("binary", BinaryType)
-      .add("date", DateType)
-      .add("timestamp", TimestampType)
-      .add("udt", new ExamplePointUDT))
-
-  testHash(
-    new StructType()
-      .add("arrayOfNull", arrayOfNull)
-      .add("arrayOfString", arrayOfString)
-      .add("arrayOfArrayOfString", ArrayType(arrayOfString))
-      .add("arrayOfArrayOfInt", ArrayType(ArrayType(IntegerType)))
-      .add("arrayOfMap", ArrayType(mapOfString))
-      .add("arrayOfStruct", ArrayType(structOfString))
-      .add("arrayOfUDT", arrayOfUDT))
-
-  testHash(
-    new StructType()
-      .add("mapOfIntAndString", MapType(IntegerType, StringType))
-      .add("mapOfStringAndArray", MapType(StringType, arrayOfString))
-      .add("mapOfArrayAndInt", MapType(arrayOfString, IntegerType))
-      .add("mapOfArray", MapType(arrayOfString, arrayOfString))
-      .add("mapOfStringAndStruct", MapType(StringType, structOfString))
-      .add("mapOfStructAndString", MapType(structOfString, StringType))
-      .add("mapOfStruct", MapType(structOfString, structOfString)))
-
-  testHash(
-    new StructType()
-      .add("structOfString", structOfString)
-      .add("structOfStructOfString", new StructType().add("struct", structOfString))
-      .add("structOfArray", new StructType().add("array", arrayOfString))
-      .add("structOfMap", new StructType().add("map", mapOfString))
-      .add("structOfArrayAndMap",
-        new StructType().add("array", arrayOfString).add("map", mapOfString))
-      .add("structOfUDT", structOfUDT))
-
-  private def testHash(inputSchema: StructType): Unit = {
-    val inputGenerator = RandomDataGenerator.forType(inputSchema, nullable = false).get
-    val encoder = RowEncoder(inputSchema)
-    val seed = scala.util.Random.nextInt()
-    test(s"murmur3/xxHash64/hive hash: ${inputSchema.simpleString}") {
-      for (_ <- 1 to 10) {
-        val input = encoder.toRow(inputGenerator.apply().asInstanceOf[Row]).asInstanceOf[UnsafeRow]
-        val literals = input.toSeq(inputSchema).zip(inputSchema.map(_.dataType)).map {
-          case (value, dt) => Literal.create(value, dt)
-        }
-        // Only test the interpreted version has same result with codegen version.
-        checkEvaluation(Murmur3Hash(literals, seed), Murmur3Hash(literals, seed).eval())
-        checkEvaluation(XxHash64(literals, seed), XxHash64(literals, seed).eval())
-        checkEvaluation(HiveHash(literals), HiveHash(literals).eval())
-      }
-    }
-  }
 }

From 3071d876b72eea71b227067204bc754e8555b020 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Sat, 5 Nov 2016 13:41:35 +0100
Subject: [PATCH 0041/1204] [SPARK-18192][MINOR][FOLLOWUP] Missed json test in
 FileStreamSinkSuite

## What changes were proposed in this pull request?

This PR proposes to fix

```diff
 test("FileStreamSink - json") {
-  testFormat(Some("text"))
+  testFormat(Some("json"))
 }
```

`text` is being tested above

```
test("FileStreamSink - text") {
  testFormat(Some("text"))
}
```

## How was this patch tested?

Fixed test in `FileStreamSinkSuite.scala`.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #15785 from HyukjinKwon/SPARK-18192.

(cherry picked from commit a87471c83006ec11c372b4f915e17a0501f1f536)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../org/apache/spark/sql/streaming/FileStreamSinkSuite.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
index 0f140f94f630e..fa97d9292e551 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
@@ -152,7 +152,7 @@ class FileStreamSinkSuite extends StreamTest {
   }
 
   test("FileStreamSink - json") {
-    testFormat(Some("text"))
+    testFormat(Some("json"))
   }
 
   def testFormat(format: Option[String]): Unit = {

From 446d72c5273177208333042e897c892151aa9eee Mon Sep 17 00:00:00 2001
From: wangyang <wangyang@haizhi.com>
Date: Sat, 5 Nov 2016 14:32:28 +0100
Subject: [PATCH 0042/1204] [SPARK-17849][SQL] Fix NPE problem when using
 grouping sets

## What changes were proposed in this pull request?

Prior this pr, the following code would cause an NPE:
`case class point(a:String, b:String, c:String, d: Int)`

`val data = Seq(
point("1","2","3", 1),
point("4","5","6", 1),
point("7","8","9", 1)
)`
`sc.parallelize(data).toDF().registerTempTable("table")`
`spark.sql("select a, b, c, count(d) from table group by a, b, c GROUPING SETS ((a)) ").show()`

The reason is that when the grouping_id() behavior was changed in #10677, some code (which should be changed) was left out.

Take the above code for example, prior #10677, the bit mask for set "(a)" was `001`, while after #10677 the bit mask was changed to `011`. However, the `nonNullBitmask` was not changed accordingly.

This pr will fix this problem.
## How was this patch tested?

add integration tests

Author: wangyang <wangyang@haizhi.com>

Closes #15416 from yangw1234/groupingid.

(cherry picked from commit fb0d60814a79747beb68da9613679141c44f2540)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  9 +++-
 .../sql-tests/inputs/grouping_set.sql         | 17 ++++++++
 .../sql-tests/results/grouping_set.sql.out    | 42 +++++++++++++++++++
 3 files changed, 66 insertions(+), 2 deletions(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 5011f2fdbf9b7..8dbec408002f1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -299,10 +299,15 @@ class Analyzer(
           case other => Alias(other, other.toString)()
         }
 
-        val nonNullBitmask = x.bitmasks.reduce(_ & _)
+        // The rightmost bit in the bitmasks corresponds to the last expression in groupByAliases
+        // with 0 indicating this expression is in the grouping set. The following line of code
+        // calculates the bitmask representing the expressions that absent in at least one grouping
+        // set (indicated by 1).
+        val nullBitmask = x.bitmasks.reduce(_ | _)
 
+        val attrLength = groupByAliases.length
         val expandedAttributes = groupByAliases.zipWithIndex.map { case (a, idx) =>
-          a.toAttribute.withNullability((nonNullBitmask & 1 << idx) == 0)
+          a.toAttribute.withNullability(((nullBitmask >> (attrLength - idx - 1)) & 1) == 1)
         }
 
         val expand = Expand(x.bitmasks, groupByAliases, expandedAttributes, gid, x.child)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql b/sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql
new file mode 100644
index 0000000000000..3594283505280
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql
@@ -0,0 +1,17 @@
+CREATE TEMPORARY VIEW grouping AS SELECT * FROM VALUES
+  ("1", "2", "3", 1),
+  ("4", "5", "6", 1),
+  ("7", "8", "9", 1)
+  as grouping(a, b, c, d);
+
+-- SPARK-17849: grouping set throws NPE #1
+SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS (());
+
+-- SPARK-17849: grouping set throws NPE #2
+SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((a));
+
+-- SPARK-17849: grouping set throws NPE #3
+SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((c));
+
+
+
diff --git a/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out b/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out
new file mode 100644
index 0000000000000..edb38a52b7514
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out
@@ -0,0 +1,42 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 4
+
+
+-- !query 0
+CREATE TEMPORARY VIEW grouping AS SELECT * FROM VALUES
+  ("1", "2", "3", 1),
+  ("4", "5", "6", 1),
+  ("7", "8", "9", 1)
+  as grouping(a, b, c, d)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS (())
+-- !query 1 schema
+struct<a:string,b:string,c:string,count(d):bigint>
+-- !query 1 output
+NULL	NULL	NULL	3
+
+
+-- !query 2
+SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((a))
+-- !query 2 schema
+struct<a:string,b:string,c:string,count(d):bigint>
+-- !query 2 output
+1	NULL	NULL	1
+4	NULL	NULL	1
+7	NULL	NULL	1
+
+
+-- !query 3
+SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((c))
+-- !query 3 schema
+struct<a:string,b:string,c:string,count(d):bigint>
+-- !query 3 output
+NULL	NULL	3	1
+NULL	NULL	6	1
+NULL	NULL	9	1

From dcbc4265839d8a6f0300af6bcc1e8d8c102ec23f Mon Sep 17 00:00:00 2001
From: "Susan X. Huynh" <xhuynh@mesosphere.com>
Date: Sat, 5 Nov 2016 17:45:15 +0000
Subject: [PATCH 0043/1204] [SPARK-17964][SPARKR] Enable SparkR with Mesos
 client mode and cluster mode

## What changes were proposed in this pull request?

Enabled SparkR with Mesos client mode and cluster mode. Just a few changes were required to get this working on Mesos: (1) removed the SparkR on Mesos error checks and (2) do not require "--class" to be specified for R apps. The logic to check spark.mesos.executor.home was already in there.

sun-rui

## How was this patch tested?

1. SparkSubmitSuite
2. On local mesos cluster (on laptop): ran SparkR shell, spark-submit client mode, and spark-submit cluster mode, with the "examples/src/main/R/dataframe.R" example application.
3. On multi-node mesos cluster: ran SparkR shell, spark-submit client mode, and spark-submit cluster mode, with the "examples/src/main/R/dataframe.R" example application. I tested with the following --conf values set: spark.mesos.executor.docker.image and spark.mesos.executor.home

This contribution is my original work and I license the work to the project under the project's open source license.

Author: Susan X. Huynh <xhuynh@mesosphere.com>

Closes #15700 from susanxhuynh/susan-r-branch.

(cherry picked from commit 9a87c313859a6557bbf7bca7239043cb77ea23be)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../main/scala/org/apache/spark/api/r/RUtils.scala |  1 -
 .../org/apache/spark/deploy/SparkSubmit.scala      | 14 +++++++-------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/api/r/RUtils.scala b/core/src/main/scala/org/apache/spark/api/r/RUtils.scala
index 77825e75e5136..fdd8cf62f0e5f 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RUtils.scala
@@ -84,7 +84,6 @@ private[spark] object RUtils {
       }
     } else {
       // Otherwise, assume the package is local
-      // TODO: support this for Mesos
       val sparkRPkgPath = localSparkRPackagePath.getOrElse {
           throw new SparkException("SPARK_HOME not set. Can't locate SparkR package.")
       }
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 5c052286099f5..c70061bc5b5bc 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -322,7 +322,7 @@ object SparkSubmit {
     }
 
     // Require all R files to be local
-    if (args.isR && !isYarnCluster) {
+    if (args.isR && !isYarnCluster && !isMesosCluster) {
       if (Utils.nonLocalPaths(args.primaryResource).nonEmpty) {
         printErrorAndExit(s"Only local R files are supported: ${args.primaryResource}")
       }
@@ -330,9 +330,6 @@ object SparkSubmit {
 
     // The following modes are not supported or applicable
     (clusterManager, deployMode) match {
-      case (MESOS, CLUSTER) if args.isR =>
-        printErrorAndExit("Cluster deploy mode is currently not supported for R " +
-          "applications on Mesos clusters.")
       case (STANDALONE, CLUSTER) if args.isPython =>
         printErrorAndExit("Cluster deploy mode is currently not supported for python " +
           "applications on standalone clusters.")
@@ -410,9 +407,9 @@ object SparkSubmit {
       printErrorAndExit("Distributing R packages with standalone cluster is not supported.")
     }
 
-    // TODO: Support SparkR with mesos cluster
-    if (args.isR && clusterManager == MESOS) {
-      printErrorAndExit("SparkR is not supported for Mesos cluster.")
+    // TODO: Support distributing R packages with mesos cluster
+    if (args.isR && clusterManager == MESOS && !RUtils.rPackages.isEmpty) {
+      printErrorAndExit("Distributing R packages with mesos cluster is not supported.")
     }
 
     // If we're running an R app, set the main class to our specific R runner
@@ -598,6 +595,9 @@ object SparkSubmit {
         if (args.pyFiles != null) {
           sysProps("spark.submit.pyFiles") = args.pyFiles
         }
+      } else if (args.isR) {
+        // Second argument is main class
+        childArgs += (args.primaryResource, "")
       } else {
         childArgs += (args.primaryResource, args.mainClass)
       }

From e9f1d4aaa472cf69165ffe75ed9b92618fa3900f Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Sat, 5 Nov 2016 21:47:33 -0700
Subject: [PATCH 0044/1204] [MINOR][DOCUMENTATION] Fix some minor descriptions
 in functions consistently with expressions

## What changes were proposed in this pull request?

This PR proposes to improve documentation and fix some descriptions equivalent to several minor fixes identified in https://github.com/apache/spark/pull/15677

Also, this suggests to change `Note:` and `NOTE:` to `.. note::` consistently with the others which marks up pretty.

## How was this patch tested?

Jenkins tests and manually.

For PySpark, `Note:` and `NOTE:` to `.. note::` make the document as below:

**From**

![2016-11-04 6 53 35](https://cloud.githubusercontent.com/assets/6477701/20002648/42989922-a2c5-11e6-8a32-b73eda49e8c3.png)
![2016-11-04 6 53 45](https://cloud.githubusercontent.com/assets/6477701/20002650/429fb310-a2c5-11e6-926b-e030d7eb0185.png)
![2016-11-04 6 54 11](https://cloud.githubusercontent.com/assets/6477701/20002649/429d570a-a2c5-11e6-9e7e-44090f337e32.png)
![2016-11-04 6 53 51](https://cloud.githubusercontent.com/assets/6477701/20002647/4297fc74-a2c5-11e6-801a-b89fbcbfca44.png)
![2016-11-04 6 53 51](https://cloud.githubusercontent.com/assets/6477701/20002697/749f5780-a2c5-11e6-835f-022e1f2f82e3.png)

**To**

![2016-11-04 7 03 48](https://cloud.githubusercontent.com/assets/6477701/20002659/4961b504-a2c5-11e6-9ee0-ef0751482f47.png)
![2016-11-04 7 04 03](https://cloud.githubusercontent.com/assets/6477701/20002660/49871d3a-a2c5-11e6-85ea-d9a5d11efeff.png)
![2016-11-04 7 04 28](https://cloud.githubusercontent.com/assets/6477701/20002662/498e0f14-a2c5-11e6-803d-c0c5aeda4153.png)
![2016-11-04 7 33 39](https://cloud.githubusercontent.com/assets/6477701/20002731/a76e30d2-a2c5-11e6-993b-0481b8342d6b.png)
![2016-11-04 7 33 39](https://cloud.githubusercontent.com/assets/6477701/20002731/a76e30d2-a2c5-11e6-993b-0481b8342d6b.png)

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #15765 from HyukjinKwon/minor-function-doc.

(cherry picked from commit 15d392688456ad9f963417843c52a7b610f771d2)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/R/functions.R                           | 22 +++++++-----
 python/pyspark/sql/functions.py               | 35 +++++++++++--------
 .../org/apache/spark/sql/functions.scala      | 30 +++++++++-------
 3 files changed, 51 insertions(+), 36 deletions(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 9a545f0647915..f8a9d3ce5d918 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -2317,7 +2317,8 @@ setMethod("date_format", signature(y = "Column", x = "character"),
 
 #' from_utc_timestamp
 #'
-#' Assumes given timestamp is UTC and converts to given timezone.
+#' Given a timestamp, which corresponds to a certain time of day in UTC, returns another timestamp
+#' that corresponds to the same time of day in the given timezone.
 #'
 #' @param y Column to compute on.
 #' @param x time zone to use.
@@ -2340,7 +2341,7 @@ setMethod("from_utc_timestamp", signature(y = "Column", x = "character"),
 #' Locate the position of the first occurrence of substr column in the given string.
 #' Returns null if either of the arguments are null.
 #'
-#' NOTE: The position is not zero based, but 1 based index, returns 0 if substr
+#' NOTE: The position is not zero based, but 1 based index. Returns 0 if substr
 #' could not be found in str.
 #'
 #' @param y column to check
@@ -2391,7 +2392,8 @@ setMethod("next_day", signature(y = "Column", x = "character"),
 
 #' to_utc_timestamp
 #'
-#' Assumes given timestamp is in given timezone and converts to UTC.
+#' Given a timestamp, which corresponds to a certain time of day in the given timezone, returns
+#' another timestamp that corresponds to the same time of day in UTC.
 #'
 #' @param y Column to compute on
 #' @param x timezone to use
@@ -2539,7 +2541,7 @@ setMethod("shiftLeft", signature(y = "Column", x = "numeric"),
 
 #' shiftRight
 #'
-#' Shift the given value numBits right. If the given value is a long value, it will return
+#' (Signed) shift the given value numBits right. If the given value is a long value, it will return
 #' a long value else it will return an integer value.
 #'
 #' @param y column to compute on.
@@ -2777,7 +2779,7 @@ setMethod("window", signature(x = "Column"),
 #' locate
 #'
 #' Locate the position of the first occurrence of substr.
-#' NOTE: The position is not zero based, but 1 based index, returns 0 if substr
+#' NOTE: The position is not zero based, but 1 based index. Returns 0 if substr
 #' could not be found in str.
 #'
 #' @param substr a character string to be matched.
@@ -2823,7 +2825,8 @@ setMethod("lpad", signature(x = "Column", len = "numeric", pad = "character"),
 
 #' rand
 #'
-#' Generate a random column with i.i.d. samples from U[0.0, 1.0].
+#' Generate a random column with independent and identically distributed (i.i.d.) samples
+#' from U[0.0, 1.0].
 #'
 #' @param seed a random seed. Can be missing.
 #' @family normal_funcs
@@ -2852,7 +2855,8 @@ setMethod("rand", signature(seed = "numeric"),
 
 #' randn
 #'
-#' Generate a column with i.i.d. samples from the standard normal distribution.
+#' Generate a column with independent and identically distributed (i.i.d.) samples from
+#' the standard normal distribution.
 #'
 #' @param seed a random seed. Can be missing.
 #' @family normal_funcs
@@ -3442,8 +3446,8 @@ setMethod("size",
 
 #' sort_array
 #'
-#' Sorts the input array for the given column in ascending order,
-#' according to the natural ordering of the array elements.
+#' Sorts the input array in ascending or descending order according
+#' to the natural ordering of the array elements.
 #'
 #' @param x A Column to sort
 #' @param asc A logical flag indicating the sorting order.
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 245357a4bad9f..46a092f16d4fc 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -359,8 +359,8 @@ def grouping_id(*cols):
 
        (grouping(c1) << (n-1)) + (grouping(c2) << (n-2)) + ... + grouping(cn)
 
-    Note: the list of columns should match with grouping columns exactly, or empty (means all the
-    grouping columns).
+    .. note:: the list of columns should match with grouping columns exactly, or empty (means all
+        the grouping columns).
 
     >>> df.cube("name").agg(grouping_id(), sum("age")).orderBy("name").show()
     +-----+-------------+--------+
@@ -457,7 +457,8 @@ def nanvl(col1, col2):
 
 @since(1.4)
 def rand(seed=None):
-    """Generates a random column with i.i.d. samples from U[0.0, 1.0].
+    """Generates a random column with independent and identically distributed (i.i.d.) samples
+    from U[0.0, 1.0].
     """
     sc = SparkContext._active_spark_context
     if seed is not None:
@@ -469,7 +470,8 @@ def rand(seed=None):
 
 @since(1.4)
 def randn(seed=None):
-    """Generates a column with i.i.d. samples from the standard normal distribution.
+    """Generates a column with independent and identically distributed (i.i.d.) samples from
+    the standard normal distribution.
     """
     sc = SparkContext._active_spark_context
     if seed is not None:
@@ -518,7 +520,7 @@ def shiftLeft(col, numBits):
 
 @since(1.5)
 def shiftRight(col, numBits):
-    """Shift the given value numBits right.
+    """(Signed) shift the given value numBits right.
 
     >>> spark.createDataFrame([(42,)], ['a']).select(shiftRight('a', 1).alias('r')).collect()
     [Row(r=21)]
@@ -777,8 +779,8 @@ def date_format(date, format):
     A pattern could be for instance `dd.MM.yyyy` and could return a string like '18.03.1993'. All
     pattern letters of the Java class `java.text.SimpleDateFormat` can be used.
 
-    NOTE: Use when ever possible specialized functions like `year`. These benefit from a
-    specialized implementation.
+    .. note:: Use when ever possible specialized functions like `year`. These benefit from a
+        specialized implementation.
 
     >>> df = spark.createDataFrame([('2015-04-08',)], ['a'])
     >>> df.select(date_format('a', 'MM/dd/yyy').alias('date')).collect()
@@ -1059,7 +1061,8 @@ def unix_timestamp(timestamp=None, format='yyyy-MM-dd HH:mm:ss'):
 @since(1.5)
 def from_utc_timestamp(timestamp, tz):
     """
-    Assumes given timestamp is UTC and converts to given timezone.
+    Given a timestamp, which corresponds to a certain time of day in UTC, returns another timestamp
+    that corresponds to the same time of day in the given timezone.
 
     >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
     >>> df.select(from_utc_timestamp(df.t, "PST").alias('t')).collect()
@@ -1072,7 +1075,8 @@ def from_utc_timestamp(timestamp, tz):
 @since(1.5)
 def to_utc_timestamp(timestamp, tz):
     """
-    Assumes given timestamp is in given timezone and converts to UTC.
+    Given a timestamp, which corresponds to a certain time of day in the given timezone, returns
+    another timestamp that corresponds to the same time of day in UTC.
 
     >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
     >>> df.select(to_utc_timestamp(df.t, "PST").alias('t')).collect()
@@ -1314,8 +1318,8 @@ def instr(str, substr):
     Locate the position of the first occurrence of substr column in the given string.
     Returns null if either of the arguments are null.
 
-    NOTE: The position is not zero based, but 1 based index, returns 0 if substr
-    could not be found in str.
+    .. note:: The position is not zero based, but 1 based index. Returns 0 if substr
+        could not be found in str.
 
     >>> df = spark.createDataFrame([('abcd',)], ['s',])
     >>> df.select(instr(df.s, 'b').alias('s')).collect()
@@ -1379,8 +1383,8 @@ def locate(substr, str, pos=1):
     """
     Locate the position of the first occurrence of substr in a string column, after position pos.
 
-    NOTE: The position is not zero based, but 1 based index. returns 0 if substr
-    could not be found in str.
+    .. note:: The position is not zero based, but 1 based index. Returns 0 if substr
+        could not be found in str.
 
     :param substr: a string
     :param str: a Column of :class:`pyspark.sql.types.StringType`
@@ -1442,7 +1446,7 @@ def split(str, pattern):
     """
     Splits str around pattern (pattern is a regular expression).
 
-    NOTE: pattern is a string represent the regular expression.
+    .. note:: pattern is a string represent the regular expression.
 
     >>> df = spark.createDataFrame([('ab12cd',)], ['s',])
     >>> df.select(split(df.s, '[0-9]+').alias('s')).collect()
@@ -1785,7 +1789,8 @@ def size(col):
 @since(1.5)
 def sort_array(col, asc=True):
     """
-    Collection function: sorts the input array for the given column in ascending order.
+    Collection function: sorts the input array in ascending or descending order according
+    to the natural ordering of the array elements.
 
     :param col: name of column or expression
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 944a476114faf..e221c032b82f6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -1117,7 +1117,8 @@ object functions {
   def not(e: Column): Column = !e
 
   /**
-   * Generate a random column with i.i.d. samples from U[0.0, 1.0].
+   * Generate a random column with independent and identically distributed (i.i.d.) samples
+   * from U[0.0, 1.0].
    *
    * Note that this is indeterministic when data partitions are not fixed.
    *
@@ -1127,7 +1128,8 @@ object functions {
   def rand(seed: Long): Column = withExpr { Rand(seed) }
 
   /**
-   * Generate a random column with i.i.d. samples from U[0.0, 1.0].
+   * Generate a random column with independent and identically distributed (i.i.d.) samples
+   * from U[0.0, 1.0].
    *
    * @group normal_funcs
    * @since 1.4.0
@@ -1135,7 +1137,8 @@ object functions {
   def rand(): Column = rand(Utils.random.nextLong)
 
   /**
-   * Generate a column with i.i.d. samples from the standard normal distribution.
+   * Generate a column with independent and identically distributed (i.i.d.) samples from
+   * the standard normal distribution.
    *
    * Note that this is indeterministic when data partitions are not fixed.
    *
@@ -1145,7 +1148,8 @@ object functions {
   def randn(seed: Long): Column = withExpr { Randn(seed) }
 
   /**
-   * Generate a column with i.i.d. samples from the standard normal distribution.
+   * Generate a column with independent and identically distributed (i.i.d.) samples from
+   * the standard normal distribution.
    *
    * @group normal_funcs
    * @since 1.4.0
@@ -1153,7 +1157,7 @@ object functions {
   def randn(): Column = randn(Utils.random.nextLong)
 
   /**
-   * Partition ID of the Spark task.
+   * Partition ID.
    *
    * Note that this is indeterministic because it depends on data partitioning and task scheduling.
    *
@@ -1877,8 +1881,8 @@ object functions {
   def shiftLeft(e: Column, numBits: Int): Column = withExpr { ShiftLeft(e.expr, lit(numBits).expr) }
 
   /**
-   * Shift the given value numBits right. If the given value is a long value, it will return
-   * a long value else it will return an integer value.
+   * (Signed) shift the given value numBits right. If the given value is a long value, it will
+   * return a long value else it will return an integer value.
    *
    * @group math_funcs
    * @since 1.5.0
@@ -2203,7 +2207,7 @@ object functions {
    * Locate the position of the first occurrence of substr column in the given string.
    * Returns null if either of the arguments are null.
    *
-   * NOTE: The position is not zero based, but 1 based index, returns 0 if substr
+   * NOTE: The position is not zero based, but 1 based index. Returns 0 if substr
    * could not be found in str.
    *
    * @group string_funcs
@@ -2238,7 +2242,7 @@ object functions {
 
   /**
    * Locate the position of the first occurrence of substr.
-   * NOTE: The position is not zero based, but 1 based index, returns 0 if substr
+   * NOTE: The position is not zero based, but 1 based index. Returns 0 if substr
    * could not be found in str.
    *
    * @group string_funcs
@@ -2666,7 +2670,8 @@ object functions {
   }
 
   /**
-   * Assumes given timestamp is UTC and converts to given timezone.
+   * Given a timestamp, which corresponds to a certain time of day in UTC, returns another timestamp
+   * that corresponds to the same time of day in the given timezone.
    * @group datetime_funcs
    * @since 1.5.0
    */
@@ -2675,7 +2680,8 @@ object functions {
   }
 
   /**
-   * Assumes given timestamp is in given timezone and converts to UTC.
+   * Given a timestamp, which corresponds to a certain time of day in the given timezone, returns
+   * another timestamp that corresponds to the same time of day in UTC.
    * @group datetime_funcs
    * @since 1.5.0
    */
@@ -2996,7 +3002,7 @@ object functions {
   def sort_array(e: Column): Column = sort_array(e, asc = true)
 
   /**
-   * Sorts the input array for the given column in ascending / descending order,
+   * Sorts the input array for the given column in ascending or descending order,
    * according to the natural ordering of the array elements.
    *
    * @group collection_funcs

From c42301f1eb09565cfaa044b05984ed67879bd946 Mon Sep 17 00:00:00 2001
From: sethah <seth.hendrickson16@gmail.com>
Date: Sat, 5 Nov 2016 22:38:07 -0700
Subject: [PATCH 0045/1204] [SPARK-18276][ML] ML models should copy the
 training summary and set parent

## What changes were proposed in this pull request?

Only some of the models which contain a training summary currently set the summaries in the copy method. Linear/Logistic regression do, GLR, GMM, KM, and BKM do not. Additionally, these copy methods did not set the parent pointer of the copied model. This patch modifies the copy methods of the four models mentioned above to copy the training summary and set the parent.

## How was this patch tested?

Add unit tests in Linear/Logistic/GeneralizedLinear regression and GaussianMixture/KMeans/BisectingKMeans to check the parent pointer of the copied model and check that the copied model has a summary.

Author: sethah <seth.hendrickson16@gmail.com>

Closes #15773 from sethah/SPARK-18276.

(cherry picked from commit 23ce0d1e91076d90c1a87d698a94d283d08cf899)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 .../apache/spark/ml/clustering/BisectingKMeans.scala  |  5 +++--
 .../apache/spark/ml/clustering/GaussianMixture.scala  |  5 +++--
 .../scala/org/apache/spark/ml/clustering/KMeans.scala |  5 +++--
 .../ml/regression/GeneralizedLinearRegression.scala   |  6 ++++--
 .../apache/spark/ml/tuning/TrainValidationSplit.scala |  2 +-
 .../ml/classification/LogisticRegressionSuite.scala   | 11 +++++++----
 .../spark/ml/clustering/BisectingKMeansSuite.scala    | 10 +++++++++-
 .../spark/ml/clustering/GaussianMixtureSuite.scala    | 10 +++++++++-
 .../org/apache/spark/ml/clustering/KMeansSuite.scala  | 10 +++++++++-
 .../regression/GeneralizedLinearRegressionSuite.scala |  5 ++++-
 .../spark/ml/regression/LinearRegressionSuite.scala   |  5 ++++-
 .../spark/ml/tuning/TrainValidationSplitSuite.scala   |  8 ++++++--
 12 files changed, 62 insertions(+), 20 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
index 2718dd93dcb5a..f8a606d60b2aa 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
@@ -94,8 +94,9 @@ class BisectingKMeansModel private[ml] (
 
   @Since("2.0.0")
   override def copy(extra: ParamMap): BisectingKMeansModel = {
-    val copied = new BisectingKMeansModel(uid, parentModel)
-    copyValues(copied, extra)
+    val copied = copyValues(new BisectingKMeansModel(uid, parentModel), extra)
+    if (trainingSummary.isDefined) copied.setSummary(trainingSummary.get)
+    copied.setParent(this.parent)
   }
 
   @Since("2.0.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
index 8fac63fefbb55..a0bd66e731a1d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
@@ -89,8 +89,9 @@ class GaussianMixtureModel private[ml] (
 
   @Since("2.0.0")
   override def copy(extra: ParamMap): GaussianMixtureModel = {
-    val copied = new GaussianMixtureModel(uid, weights, gaussians)
-    copyValues(copied, extra).setParent(this.parent)
+    val copied = copyValues(new GaussianMixtureModel(uid, weights, gaussians), extra)
+    if (trainingSummary.isDefined) copied.setSummary(trainingSummary.get)
+    copied.setParent(this.parent)
   }
 
   @Since("2.0.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
index 85bb8c93b3fa9..a0d481b294ac7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
@@ -108,8 +108,9 @@ class KMeansModel private[ml] (
 
   @Since("1.5.0")
   override def copy(extra: ParamMap): KMeansModel = {
-    val copied = new KMeansModel(uid, parentModel)
-    copyValues(copied, extra)
+    val copied = copyValues(new KMeansModel(uid, parentModel), extra)
+    if (trainingSummary.isDefined) copied.setSummary(trainingSummary.get)
+    copied.setParent(this.parent)
   }
 
   /** @group setParam */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index 8656ecf609ea4..1938e8ecc513d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -776,8 +776,10 @@ class GeneralizedLinearRegressionModel private[ml] (
 
   @Since("2.0.0")
   override def copy(extra: ParamMap): GeneralizedLinearRegressionModel = {
-    copyValues(new GeneralizedLinearRegressionModel(uid, coefficients, intercept), extra)
-      .setParent(parent)
+    val copied = copyValues(new GeneralizedLinearRegressionModel(uid, coefficients, intercept),
+      extra)
+    if (trainingSummary.isDefined) copied.setSummary(trainingSummary.get)
+    copied.setParent(parent)
   }
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
index 0fdba1cb8814a..5d1a39f7c16db 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
@@ -221,7 +221,7 @@ class TrainValidationSplitModel private[ml] (
       uid,
       bestModel.copy(extra).asInstanceOf[Model[_]],
       validationMetrics.clone())
-    copyValues(copied, extra)
+    copyValues(copied, extra).setParent(parent)
   }
 
   @Since("2.0.0")
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 8771fd2e9d2b2..2877285eb4d59 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -27,7 +27,7 @@ import org.apache.spark.ml.attribute.NominalAttribute
 import org.apache.spark.ml.classification.LogisticRegressionSuite._
 import org.apache.spark.ml.feature.{Instance, LabeledPoint}
 import org.apache.spark.ml.linalg.{DenseMatrix, Matrices, SparseMatrix, SparseVector, Vector, Vectors}
-import org.apache.spark.ml.param.ParamsSuite
+import org.apache.spark.ml.param.{ParamMap, ParamsSuite}
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
 import org.apache.spark.ml.util.TestingUtils._
 import org.apache.spark.mllib.util.MLlibTestSparkContext
@@ -141,6 +141,12 @@ class LogisticRegressionSuite
     assert(model.getProbabilityCol === "probability")
     assert(model.intercept !== 0.0)
     assert(model.hasParent)
+
+    // copied model must have the same parent.
+    MLTestingUtils.checkCopy(model)
+    assert(model.hasSummary)
+    val copiedModel = model.copy(ParamMap.empty)
+    assert(copiedModel.hasSummary)
   }
 
   test("empty probabilityCol") {
@@ -251,9 +257,6 @@ class LogisticRegressionSuite
     mlr.setFitIntercept(false)
     val mlrModel = mlr.fit(smallMultinomialDataset)
     assert(mlrModel.interceptVector === Vectors.sparse(3, Seq()))
-
-    // copied model must have the same parent.
-    MLTestingUtils.checkCopy(model)
   }
 
   test("logistic regression with setters") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
index f2368a9f8dad5..49797d938d751 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.ml.clustering
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.ml.util.DefaultReadWriteTest
+import org.apache.spark.ml.param.ParamMap
+import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.sql.Dataset
 
@@ -41,6 +42,13 @@ class BisectingKMeansSuite
     assert(bkm.getPredictionCol === "prediction")
     assert(bkm.getMaxIter === 20)
     assert(bkm.getMinDivisibleClusterSize === 1.0)
+    val model = bkm.setMaxIter(1).fit(dataset)
+
+    // copied model must have the same parent
+    MLTestingUtils.checkCopy(model)
+    assert(model.hasSummary)
+    val copiedModel = model.copy(ParamMap.empty)
+    assert(copiedModel.hasSummary)
   }
 
   test("setter/getter") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala
index 003fa6abf6597..7165b63ed3b96 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.ml.clustering
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.ml.util.DefaultReadWriteTest
+import org.apache.spark.ml.param.ParamMap
+import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.sql.Dataset
 
@@ -43,6 +44,13 @@ class GaussianMixtureSuite extends SparkFunSuite with MLlibTestSparkContext
     assert(gm.getPredictionCol === "prediction")
     assert(gm.getMaxIter === 100)
     assert(gm.getTol === 0.01)
+    val model = gm.setMaxIter(1).fit(dataset)
+
+    // copied model must have the same parent
+    MLTestingUtils.checkCopy(model)
+    assert(model.hasSummary)
+    val copiedModel = model.copy(ParamMap.empty)
+    assert(copiedModel.hasSummary)
   }
 
   test("set parameters") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
index ca392653557c4..73972557d2631 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
@@ -19,7 +19,8 @@ package org.apache.spark.ml.clustering
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.linalg.{Vector, Vectors}
-import org.apache.spark.ml.util.DefaultReadWriteTest
+import org.apache.spark.ml.param.ParamMap
+import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
 import org.apache.spark.mllib.clustering.{KMeans => MLlibKMeans}
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
@@ -47,6 +48,13 @@ class KMeansSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultR
     assert(kmeans.getInitMode === MLlibKMeans.K_MEANS_PARALLEL)
     assert(kmeans.getInitSteps === 2)
     assert(kmeans.getTol === 1e-4)
+    val model = kmeans.setMaxIter(1).fit(dataset)
+
+    // copied model must have the same parent
+    MLTestingUtils.checkCopy(model)
+    assert(model.hasSummary)
+    val copiedModel = model.copy(ParamMap.empty)
+    assert(copiedModel.hasSummary)
   }
 
   test("set parameters") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
index ac1ef5feb95ba..111bc974642d9 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
@@ -24,7 +24,7 @@ import org.apache.spark.ml.classification.LogisticRegressionSuite._
 import org.apache.spark.ml.feature.Instance
 import org.apache.spark.ml.feature.LabeledPoint
 import org.apache.spark.ml.linalg.{BLAS, DenseVector, Vector, Vectors}
-import org.apache.spark.ml.param.ParamsSuite
+import org.apache.spark.ml.param.{ParamMap, ParamsSuite}
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
 import org.apache.spark.ml.util.TestingUtils._
 import org.apache.spark.mllib.random._
@@ -183,6 +183,9 @@ class GeneralizedLinearRegressionSuite
 
     // copied model must have the same parent.
     MLTestingUtils.checkCopy(model)
+    assert(model.hasSummary)
+    val copiedModel = model.copy(ParamMap.empty)
+    assert(copiedModel.hasSummary)
 
     assert(model.getFeaturesCol === "features")
     assert(model.getPredictionCol === "prediction")
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
index c0e8afbf5e346..df97d0b2ae7ad 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
@@ -23,7 +23,7 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.feature.Instance
 import org.apache.spark.ml.feature.LabeledPoint
 import org.apache.spark.ml.linalg.{DenseVector, Vector, Vectors}
-import org.apache.spark.ml.param.ParamsSuite
+import org.apache.spark.ml.param.{ParamMap, ParamsSuite}
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
 import org.apache.spark.ml.util.TestingUtils._
 import org.apache.spark.mllib.util.{LinearDataGenerator, MLlibTestSparkContext}
@@ -143,6 +143,9 @@ class LinearRegressionSuite
 
     // copied model must have the same parent.
     MLTestingUtils.checkCopy(model)
+    assert(model.hasSummary)
+    val copiedModel = model.copy(ParamMap.empty)
+    assert(copiedModel.hasSummary)
 
     model.transform(datasetWithDenseFeature)
       .select("label", "prediction")
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala
index 87100ae2e342f..4463a9b6e543a 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala
@@ -22,11 +22,11 @@ import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel}
 import org.apache.spark.ml.classification.LogisticRegressionSuite.generateLogisticInput
 import org.apache.spark.ml.evaluation.{BinaryClassificationEvaluator, Evaluator, RegressionEvaluator}
-import org.apache.spark.ml.linalg.{DenseMatrix, Vectors}
+import org.apache.spark.ml.linalg.Vectors
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.param.shared.HasInputCol
 import org.apache.spark.ml.regression.LinearRegression
-import org.apache.spark.ml.util.DefaultReadWriteTest
+import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
 import org.apache.spark.mllib.util.{LinearDataGenerator, MLlibTestSparkContext}
 import org.apache.spark.sql.Dataset
 import org.apache.spark.sql.types.StructType
@@ -78,6 +78,10 @@ class TrainValidationSplitSuite
       .setTrainRatio(0.5)
       .setSeed(42L)
     val cvModel = cv.fit(dataset)
+
+    // copied model must have the same paren.
+    MLTestingUtils.checkCopy(cvModel)
+
     val parent = cvModel.bestModel.parent.asInstanceOf[LinearRegression]
     assert(parent.getRegParam === 0.001)
     assert(parent.getMaxIter === 10)

From dcbf3fd4bd42059aed9c966d4f0cdf58815eb802 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Sun, 6 Nov 2016 14:11:37 +0000
Subject: [PATCH 0046/1204] [SPARK-17854][SQL] rand/randn allows null/long as
 input seed

## What changes were proposed in this pull request?

This PR proposes `rand`/`randn` accept `null` as input in Scala/SQL and `LongType` as input in SQL. In this case, it treats the values as `0`.

So, this PR includes both changes below:
- `null` support

  It seems MySQL also accepts this.

  ``` sql
  mysql> select rand(0);
  +---------------------+
  | rand(0)             |
  +---------------------+
  | 0.15522042769493574 |
  +---------------------+
  1 row in set (0.00 sec)

  mysql> select rand(NULL);
  +---------------------+
  | rand(NULL)          |
  +---------------------+
  | 0.15522042769493574 |
  +---------------------+
  1 row in set (0.00 sec)
  ```

  and also Hive does according to [HIVE-14694](https://issues.apache.org/jira/browse/HIVE-14694)

  So the codes below:

  ``` scala
  spark.range(1).selectExpr("rand(null)").show()
  ```

  prints..

  **Before**

  ```
    Input argument to rand must be an integer literal.;; line 1 pos 0
  org.apache.spark.sql.AnalysisException: Input argument to rand must be an integer literal.;; line 1 pos 0
  at org.apache.spark.sql.catalyst.analysis.FunctionRegistry$$anonfun$5.apply(FunctionRegistry.scala:465)
  at org.apache.spark.sql.catalyst.analysis.FunctionRegistry$$anonfun$5.apply(FunctionRegistry.scala:444)
  ```

  **After**

  ```
    +-----------------------+
    |rand(CAST(NULL AS INT))|
    +-----------------------+
    |    0.13385709732307427|
    +-----------------------+
  ```
- `LongType` support in SQL.

  In addition, it make the function allows to take `LongType` consistently within Scala/SQL.

  In more details, the codes below:

  ``` scala
  spark.range(1).select(rand(1), rand(1L)).show()
  spark.range(1).selectExpr("rand(1)", "rand(1L)").show()
  ```

  prints..

  **Before**

  ```
  +------------------+------------------+
  |           rand(1)|           rand(1)|
  +------------------+------------------+
  |0.2630967864682161|0.2630967864682161|
  +------------------+------------------+

  Input argument to rand must be an integer literal.;; line 1 pos 0
  org.apache.spark.sql.AnalysisException: Input argument to rand must be an integer literal.;; line 1 pos 0
  at org.apache.spark.sql.catalyst.analysis.FunctionRegistry$$anonfun$5.apply(FunctionRegistry.scala:465)
  at
  ```

  **After**

  ```
  +------------------+------------------+
  |           rand(1)|           rand(1)|
  +------------------+------------------+
  |0.2630967864682161|0.2630967864682161|
  +------------------+------------------+

  +------------------+------------------+
  |           rand(1)|           rand(1)|
  +------------------+------------------+
  |0.2630967864682161|0.2630967864682161|
  +------------------+------------------+
  ```
## How was this patch tested?

Unit tests in `DataFrameSuite.scala` and `RandomSuite.scala`.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #15432 from HyukjinKwon/SPARK-17854.

(cherry picked from commit 340f09d100cb669bc6795f085aac6fa05630a076)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../expressions/randomExpressions.scala       | 50 ++++++-----
 .../catalyst/expressions/RandomSuite.scala    |  6 ++
 .../resources/sql-tests/inputs/random.sql     | 17 ++++
 .../sql-tests/results/random.sql.out          | 84 +++++++++++++++++++
 4 files changed, 135 insertions(+), 22 deletions(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/random.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/results/random.sql.out

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
index a331a5557b455..1d7a3c7356075 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
@@ -17,11 +17,10 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import org.apache.spark.TaskContext
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
-import org.apache.spark.sql.types.{DataType, DoubleType}
+import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 import org.apache.spark.util.random.XORShiftRandom
 
@@ -32,10 +31,7 @@ import org.apache.spark.util.random.XORShiftRandom
  *
  * Since this expression is stateful, it cannot be a case object.
  */
-abstract class RDG extends LeafExpression with Nondeterministic {
-
-  protected def seed: Long
-
+abstract class RDG extends UnaryExpression with ExpectsInputTypes with Nondeterministic {
   /**
    * Record ID within each partition. By being transient, the Random Number Generator is
    * reset every time we serialize and deserialize and initialize it.
@@ -46,12 +42,18 @@ abstract class RDG extends LeafExpression with Nondeterministic {
     rng = new XORShiftRandom(seed + partitionIndex)
   }
 
+  @transient protected lazy val seed: Long = child match {
+    case Literal(s, IntegerType) => s.asInstanceOf[Int]
+    case Literal(s, LongType) => s.asInstanceOf[Long]
+    case _ => throw new AnalysisException(
+      s"Input argument to $prettyName must be an integer, long or null literal.")
+  }
+
   override def nullable: Boolean = false
 
   override def dataType: DataType = DoubleType
 
-  // NOTE: Even if the user doesn't provide a seed, Spark SQL adds a default seed.
-  override def sql: String = s"$prettyName($seed)"
+  override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(IntegerType, LongType))
 }
 
 /** Generate a random column with i.i.d. uniformly distributed values in [0, 1). */
@@ -64,17 +66,15 @@ abstract class RDG extends LeafExpression with Nondeterministic {
        0.9629742951434543
       > SELECT _FUNC_(0);
        0.8446490682263027
+      > SELECT _FUNC_(null);
+       0.8446490682263027
   """)
 // scalastyle:on line.size.limit
-case class Rand(seed: Long) extends RDG {
-  override protected def evalInternal(input: InternalRow): Double = rng.nextDouble()
+case class Rand(child: Expression) extends RDG {
 
-  def this() = this(Utils.random.nextLong())
+  def this() = this(Literal(Utils.random.nextLong(), LongType))
 
-  def this(seed: Expression) = this(seed match {
-    case IntegerLiteral(s) => s
-    case _ => throw new AnalysisException("Input argument to rand must be an integer literal.")
-  })
+  override protected def evalInternal(input: InternalRow): Double = rng.nextDouble()
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val rngTerm = ctx.freshName("rng")
@@ -87,6 +87,10 @@ case class Rand(seed: Long) extends RDG {
   }
 }
 
+object Rand {
+  def apply(seed: Long): Rand = Rand(Literal(seed, LongType))
+}
+
 /** Generate a random column with i.i.d. values drawn from the standard normal distribution. */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
@@ -97,17 +101,15 @@ case class Rand(seed: Long) extends RDG {
        -0.3254147983080288
       > SELECT _FUNC_(0);
        1.1164209726833079
+      > SELECT _FUNC_(null);
+       1.1164209726833079
   """)
 // scalastyle:on line.size.limit
-case class Randn(seed: Long) extends RDG {
-  override protected def evalInternal(input: InternalRow): Double = rng.nextGaussian()
+case class Randn(child: Expression) extends RDG {
 
-  def this() = this(Utils.random.nextLong())
+  def this() = this(Literal(Utils.random.nextLong(), LongType))
 
-  def this(seed: Expression) = this(seed match {
-    case IntegerLiteral(s) => s
-    case _ => throw new AnalysisException("Input argument to randn must be an integer literal.")
-  })
+  override protected def evalInternal(input: InternalRow): Double = rng.nextGaussian()
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val rngTerm = ctx.freshName("rng")
@@ -119,3 +121,7 @@ case class Randn(seed: Long) extends RDG {
       final ${ctx.javaType(dataType)} ${ev.value} = $rngTerm.nextGaussian();""", isNull = "false")
   }
 }
+
+object Randn {
+  def apply(seed: Long): Randn = Randn(Literal(seed, LongType))
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala
index b7a0d44fa7e57..752c9d5449ee2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala
@@ -20,12 +20,18 @@ package org.apache.spark.sql.catalyst.expressions
 import org.scalatest.Matchers._
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.types.{IntegerType, LongType}
 
 class RandomSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   test("random") {
     checkDoubleEvaluation(Rand(30), 0.31429268272540556 +- 0.001)
     checkDoubleEvaluation(Randn(30), -0.4798519469521663 +- 0.001)
+
+    checkDoubleEvaluation(
+      new Rand(Literal.create(null, LongType)), 0.8446490682263027 +- 0.001)
+    checkDoubleEvaluation(
+      new Randn(Literal.create(null, IntegerType)), 1.1164209726833079 +- 0.001)
   }
 
   test("SPARK-9127 codegen with long seed") {
diff --git a/sql/core/src/test/resources/sql-tests/inputs/random.sql b/sql/core/src/test/resources/sql-tests/inputs/random.sql
new file mode 100644
index 0000000000000..a1aae7b8759dc
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/random.sql
@@ -0,0 +1,17 @@
+-- rand with the seed 0
+SELECT rand(0);
+SELECT rand(cast(3 / 7 AS int));
+SELECT rand(NULL);
+SELECT rand(cast(NULL AS int));
+
+-- rand unsupported data type
+SELECT rand(1.0);
+
+-- randn with the seed 0
+SELECT randn(0L);
+SELECT randn(cast(3 / 7 AS long));
+SELECT randn(NULL);
+SELECT randn(cast(NULL AS long));
+
+-- randn unsupported data type
+SELECT rand('1')
diff --git a/sql/core/src/test/resources/sql-tests/results/random.sql.out b/sql/core/src/test/resources/sql-tests/results/random.sql.out
new file mode 100644
index 0000000000000..bca67320fe7bb
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/random.sql.out
@@ -0,0 +1,84 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 10
+
+
+-- !query 0
+SELECT rand(0)
+-- !query 0 schema
+struct<rand(0):double>
+-- !query 0 output
+0.8446490682263027
+
+
+-- !query 1
+SELECT rand(cast(3 / 7 AS int))
+-- !query 1 schema
+struct<rand(CAST((CAST(3 AS DOUBLE) / CAST(7 AS DOUBLE)) AS INT)):double>
+-- !query 1 output
+0.8446490682263027
+
+
+-- !query 2
+SELECT rand(NULL)
+-- !query 2 schema
+struct<rand(CAST(NULL AS INT)):double>
+-- !query 2 output
+0.8446490682263027
+
+
+-- !query 3
+SELECT rand(cast(NULL AS int))
+-- !query 3 schema
+struct<rand(CAST(NULL AS INT)):double>
+-- !query 3 output
+0.8446490682263027
+
+
+-- !query 4
+SELECT rand(1.0)
+-- !query 4 schema
+struct<>
+-- !query 4 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'rand(1.0BD)' due to data type mismatch: argument 1 requires (int or bigint) type, however, '1.0BD' is of decimal(2,1) type.; line 1 pos 7
+
+
+-- !query 5
+SELECT randn(0L)
+-- !query 5 schema
+struct<randn(0):double>
+-- !query 5 output
+1.1164209726833079
+
+
+-- !query 6
+SELECT randn(cast(3 / 7 AS long))
+-- !query 6 schema
+struct<randn(CAST((CAST(3 AS DOUBLE) / CAST(7 AS DOUBLE)) AS BIGINT)):double>
+-- !query 6 output
+1.1164209726833079
+
+
+-- !query 7
+SELECT randn(NULL)
+-- !query 7 schema
+struct<randn(CAST(NULL AS INT)):double>
+-- !query 7 output
+1.1164209726833079
+
+
+-- !query 8
+SELECT randn(cast(NULL AS long))
+-- !query 8 schema
+struct<randn(CAST(NULL AS BIGINT)):double>
+-- !query 8 output
+1.1164209726833079
+
+
+-- !query 9
+SELECT rand('1')
+-- !query 9 schema
+struct<>
+-- !query 9 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'rand('1')' due to data type mismatch: argument 1 requires (int or bigint) type, however, ''1'' is of string type.; line 1 pos 7

From d2f2cf68a62a3f8beb7cdfef8393acfdcb785975 Mon Sep 17 00:00:00 2001
From: Wojciech Szymanski <wk.szymanski@gmail.com>
Date: Sun, 6 Nov 2016 07:43:13 -0800
Subject: [PATCH 0047/1204] [SPARK-18210][ML] Pipeline.copy does not create an
 instance with the same UID

## What changes were proposed in this pull request?

Motivation:
`org.apache.spark.ml.Pipeline.copy(extra: ParamMap)` does not create an instance with the same UID. It does not conform to the method specification from its base class `org.apache.spark.ml.param.Params.copy(extra: ParamMap)`

Solution:
- fix for Pipeline UID
- introduced new tests for `org.apache.spark.ml.Pipeline.copy`
- minor improvements in test for `org.apache.spark.ml.PipelineModel.copy`

## How was this patch tested?

Introduced new unit test: `org.apache.spark.ml.PipelineSuite."Pipeline.copy"`
Improved existing unit test: `org.apache.spark.ml.PipelineSuite."PipelineModel.copy"`

Author: Wojciech Szymanski <wk.szymanski@gmail.com>

Closes #15759 from wojtek-szymanski/SPARK-18210.

(cherry picked from commit b89d0556dff0520ab35882382242fbfa7d9478eb)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 .../scala/org/apache/spark/ml/Pipeline.scala  |  2 +-
 .../org/apache/spark/ml/PipelineSuite.scala   | 22 +++++++++++++++++--
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
index 195a93e086725..f406f8c426d0c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
@@ -169,7 +169,7 @@ class Pipeline @Since("1.4.0") (
   override def copy(extra: ParamMap): Pipeline = {
     val map = extractParamMap(extra)
     val newStages = map(stages).map(_.copy(extra))
-    new Pipeline().setStages(newStages)
+    new Pipeline(uid).setStages(newStages)
   }
 
   @Since("1.2.0")
diff --git a/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala
index 6413ca1f8b19e..dafc6c200f95f 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala
@@ -101,13 +101,31 @@ class PipelineSuite extends SparkFunSuite with MLlibTestSparkContext with Defaul
     }
   }
 
+  test("Pipeline.copy") {
+    val hashingTF = new HashingTF()
+      .setNumFeatures(100)
+    val pipeline = new Pipeline("pipeline").setStages(Array[Transformer](hashingTF))
+    val copied = pipeline.copy(ParamMap(hashingTF.numFeatures -> 10))
+
+    assert(copied.uid === pipeline.uid,
+      "copy should create an instance with the same UID")
+    assert(copied.getStages(0).asInstanceOf[HashingTF].getNumFeatures === 10,
+      "copy should handle extra stage params")
+  }
+
   test("PipelineModel.copy") {
     val hashingTF = new HashingTF()
       .setNumFeatures(100)
-    val model = new PipelineModel("pipeline", Array[Transformer](hashingTF))
+    val model = new PipelineModel("pipelineModel", Array[Transformer](hashingTF))
+      .setParent(new Pipeline())
     val copied = model.copy(ParamMap(hashingTF.numFeatures -> 10))
-    require(copied.stages(0).asInstanceOf[HashingTF].getNumFeatures === 10,
+
+    assert(copied.uid === model.uid,
+      "copy should create an instance with the same UID")
+    assert(copied.stages(0).asInstanceOf[HashingTF].getNumFeatures === 10,
       "copy should handle extra stage params")
+    assert(copied.parent === model.parent,
+      "copy should create an instance with the same parent")
   }
 
   test("pipeline model constructors") {

From a8fbcdbf252634b1ebc910d8f5e86c16c39167f8 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Sun, 6 Nov 2016 18:52:05 -0800
Subject: [PATCH 0048/1204] [SPARK-18269][SQL] CSV datasource should read null
 properly when schema is lager than parsed tokens

## What changes were proposed in this pull request?

Currently, there are the three cases when reading CSV by datasource when it is `PERMISSIVE` parse mode.

- schema == parsed tokens (from each line)
  No problem to cast the value in the tokens to the field in the schema as they are equal.

- schema < parsed tokens (from each line)
  It slices the tokens into the number of fields in schema.

- schema > parsed tokens (from each line)
  It appends `null` into parsed tokens so that safely values can be casted with the schema.

However, when `null` is appended in the third case, we should take `null` into account when casting the values.

In case of `StringType`, it is fine as `UTF8String.fromString(datum)` produces `null` when the input is `null`. Therefore, this case will happen only when schema is explicitly given and schema includes data types that are not `StringType`.

The codes below:

```scala
val path = "/tmp/a"
Seq("1").toDF().write.text(path.getAbsolutePath)
val schema = StructType(
  StructField("a", IntegerType, true) ::
  StructField("b", IntegerType, true) :: Nil)
spark.read.schema(schema).option("header", "false").csv(path).show()
```

prints

**Before**

```
java.lang.NumberFormatException: null
at java.lang.Integer.parseInt(Integer.java:542)
at java.lang.Integer.parseInt(Integer.java:615)
at scala.collection.immutable.StringLike$class.toInt(StringLike.scala:272)
at scala.collection.immutable.StringOps.toInt(StringOps.scala:29)
at org.apache.spark.sql.execution.datasources.csv.CSVTypeCast$.castTo(CSVInferSchema.scala:24)
```

**After**

```
+---+----+
|  a|   b|
+---+----+
|  1|null|
+---+----+
```

## How was this patch tested?

Unit test in `CSVSuite.scala` and `CSVTypeCastSuite.scala`

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #15767 from HyukjinKwon/SPARK-18269.

(cherry picked from commit 556a3b7d07f36c29ceb88fb6c24cc229e0e53ee4)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../datasources/csv/CSVInferSchema.scala      | 17 +++-
 .../datasources/csv/CSVRelation.scala         |  1 +
 .../execution/datasources/csv/CSVSuite.scala  | 15 +++
 .../datasources/csv/CSVTypeCastSuite.scala    | 93 +++++++++++--------
 4 files changed, 81 insertions(+), 45 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala
index 1981d8607c0c6..c63aae9d83855 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala
@@ -221,18 +221,27 @@ private[csv] object CSVTypeCast {
    * Currently we do not support complex types (ArrayType, MapType, StructType).
    *
    * For string types, this is simply the datum. For other types.
-   * For other nullable types, this is null if the string datum is empty.
+   * For other nullable types, returns null if it is null or equals to the value specified
+   * in `nullValue` option.
    *
    * @param datum string value
-   * @param castType SparkSQL type
+   * @param name field name in schema.
+   * @param castType data type to cast `datum` into.
+   * @param nullable nullability for the field.
+   * @param options CSV options.
    */
   def castTo(
       datum: String,
+      name: String,
       castType: DataType,
       nullable: Boolean = true,
       options: CSVOptions = CSVOptions()): Any = {
 
-    if (nullable && datum == options.nullValue) {
+    // datum can be null if the number of fields found is less than the length of the schema
+    if (datum == options.nullValue || datum == null) {
+      if (!nullable) {
+        throw new RuntimeException(s"null value found but field $name is not nullable.")
+      }
       null
     } else {
       castType match {
@@ -281,7 +290,7 @@ private[csv] object CSVTypeCast {
               DateTimeUtils.millisToDays(DateTimeUtils.stringToTime(datum).getTime)
             }
         case _: StringType => UTF8String.fromString(datum)
-        case udt: UserDefinedType[_] => castTo(datum, udt.sqlType, nullable, options)
+        case udt: UserDefinedType[_] => castTo(datum, name, udt.sqlType, nullable, options)
         case _ => throw new RuntimeException(s"Unsupported type: ${castType.typeName}")
       }
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala
index a249b9d9d59b8..a47b4141531fd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVRelation.scala
@@ -124,6 +124,7 @@ object CSVRelation extends Logging {
             // value is not stored in the row.
             val value = CSVTypeCast.castTo(
               indexSafeTokens(index),
+              field.name,
               field.dataType,
               field.nullable,
               params)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index 8209b5bd7f9de..491ff72337a81 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -890,4 +890,19 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
       }
     }
   }
+
+  test("load null when the schema is larger than parsed tokens ") {
+    withTempPath { path =>
+      Seq("1").toDF().write.text(path.getAbsolutePath)
+      val schema = StructType(
+        StructField("a", IntegerType, true) ::
+        StructField("b", IntegerType, true) :: Nil)
+      val df = spark.read
+        .schema(schema)
+        .option("header", "false")
+        .csv(path.getAbsolutePath)
+
+      checkAnswer(df, Row(1, null))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVTypeCastSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVTypeCastSuite.scala
index c74406b9cbfbb..46333d12138fb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVTypeCastSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVTypeCastSuite.scala
@@ -36,7 +36,7 @@ class CSVTypeCastSuite extends SparkFunSuite {
 
     stringValues.zip(decimalValues).foreach { case (strVal, decimalVal) =>
       val decimalValue = new BigDecimal(decimalVal.toString)
-      assert(CSVTypeCast.castTo(strVal, decimalType) ===
+      assert(CSVTypeCast.castTo(strVal, "_1", decimalType) ===
         Decimal(decimalValue, decimalType.precision, decimalType.scale))
     }
   }
@@ -67,80 +67,91 @@ class CSVTypeCastSuite extends SparkFunSuite {
 
   test("Nullable types are handled") {
     assertNull(
-      CSVTypeCast.castTo("-", ByteType, nullable = true, CSVOptions("nullValue", "-")))
+      CSVTypeCast.castTo("-", "_1", ByteType, nullable = true, CSVOptions("nullValue", "-")))
     assertNull(
-      CSVTypeCast.castTo("-", ShortType, nullable = true, CSVOptions("nullValue", "-")))
+      CSVTypeCast.castTo("-", "_1", ShortType, nullable = true, CSVOptions("nullValue", "-")))
     assertNull(
-      CSVTypeCast.castTo("-", IntegerType, nullable = true, CSVOptions("nullValue", "-")))
+      CSVTypeCast.castTo("-", "_1", IntegerType, nullable = true, CSVOptions("nullValue", "-")))
     assertNull(
-      CSVTypeCast.castTo("-", LongType, nullable = true, CSVOptions("nullValue", "-")))
+      CSVTypeCast.castTo("-", "_1", LongType, nullable = true, CSVOptions("nullValue", "-")))
     assertNull(
-      CSVTypeCast.castTo("-", FloatType, nullable = true, CSVOptions("nullValue", "-")))
+      CSVTypeCast.castTo("-", "_1", FloatType, nullable = true, CSVOptions("nullValue", "-")))
     assertNull(
-      CSVTypeCast.castTo("-", DoubleType, nullable = true, CSVOptions("nullValue", "-")))
+      CSVTypeCast.castTo("-", "_1", DoubleType, nullable = true, CSVOptions("nullValue", "-")))
     assertNull(
-      CSVTypeCast.castTo("-", BooleanType, nullable = true, CSVOptions("nullValue", "-")))
+      CSVTypeCast.castTo("-", "_1", BooleanType, nullable = true, CSVOptions("nullValue", "-")))
     assertNull(
-      CSVTypeCast.castTo("-", DecimalType.DoubleDecimal, true, CSVOptions("nullValue", "-")))
+      CSVTypeCast.castTo("-", "_1", DecimalType.DoubleDecimal, true, CSVOptions("nullValue", "-")))
     assertNull(
-      CSVTypeCast.castTo("-", TimestampType, nullable = true, CSVOptions("nullValue", "-")))
+      CSVTypeCast.castTo("-", "_1", TimestampType, nullable = true, CSVOptions("nullValue", "-")))
     assertNull(
-      CSVTypeCast.castTo("-", DateType, nullable = true, CSVOptions("nullValue", "-")))
+      CSVTypeCast.castTo("-", "_1", DateType, nullable = true, CSVOptions("nullValue", "-")))
     assertNull(
-      CSVTypeCast.castTo("-", StringType, nullable = true, CSVOptions("nullValue", "-")))
+      CSVTypeCast.castTo("-", "_1", StringType, nullable = true, CSVOptions("nullValue", "-")))
+    assertNull(
+      CSVTypeCast.castTo(null, "_1", IntegerType, nullable = true, CSVOptions("nullValue", "-")))
+
+    // casting a null to not nullable field should throw an exception.
+    var message = intercept[RuntimeException] {
+      CSVTypeCast.castTo(null, "_1", IntegerType, nullable = false, CSVOptions("nullValue", "-"))
+    }.getMessage
+    assert(message.contains("null value found but field _1 is not nullable."))
+
+    message = intercept[RuntimeException] {
+      CSVTypeCast.castTo("-", "_1", StringType, nullable = false, CSVOptions("nullValue", "-"))
+    }.getMessage
+    assert(message.contains("null value found but field _1 is not nullable."))
   }
 
   test("String type should also respect `nullValue`") {
     assertNull(
-      CSVTypeCast.castTo("", StringType, nullable = true, CSVOptions()))
-    assert(
-      CSVTypeCast.castTo("", StringType, nullable = false, CSVOptions()) ==
-        UTF8String.fromString(""))
+      CSVTypeCast.castTo("", "_1", StringType, nullable = true, CSVOptions()))
 
     assert(
-      CSVTypeCast.castTo("", StringType, nullable = true, CSVOptions("nullValue", "null")) ==
+      CSVTypeCast.castTo("", "_1", StringType, nullable = true, CSVOptions("nullValue", "null")) ==
         UTF8String.fromString(""))
     assert(
-      CSVTypeCast.castTo("", StringType, nullable = false, CSVOptions("nullValue", "null")) ==
+      CSVTypeCast.castTo("", "_1", StringType, nullable = false, CSVOptions("nullValue", "null")) ==
         UTF8String.fromString(""))
 
     assertNull(
-      CSVTypeCast.castTo(null, StringType, nullable = true, CSVOptions("nullValue", "null")))
+      CSVTypeCast.castTo(null, "_1", StringType, nullable = true, CSVOptions("nullValue", "null")))
   }
 
   test("Throws exception for empty string with non null type") {
-    val exception = intercept[NumberFormatException]{
-      CSVTypeCast.castTo("", IntegerType, nullable = false, CSVOptions())
+    val exception = intercept[RuntimeException]{
+      CSVTypeCast.castTo("", "_1", IntegerType, nullable = false, CSVOptions())
     }
-    assert(exception.getMessage.contains("For input string: \"\""))
+    assert(exception.getMessage.contains("null value found but field _1 is not nullable."))
   }
 
   test("Types are cast correctly") {
-    assert(CSVTypeCast.castTo("10", ByteType) == 10)
-    assert(CSVTypeCast.castTo("10", ShortType) == 10)
-    assert(CSVTypeCast.castTo("10", IntegerType) == 10)
-    assert(CSVTypeCast.castTo("10", LongType) == 10)
-    assert(CSVTypeCast.castTo("1.00", FloatType) == 1.0)
-    assert(CSVTypeCast.castTo("1.00", DoubleType) == 1.0)
-    assert(CSVTypeCast.castTo("true", BooleanType) == true)
+    assert(CSVTypeCast.castTo("10", "_1", ByteType) == 10)
+    assert(CSVTypeCast.castTo("10", "_1", ShortType) == 10)
+    assert(CSVTypeCast.castTo("10", "_1", IntegerType) == 10)
+    assert(CSVTypeCast.castTo("10", "_1", LongType) == 10)
+    assert(CSVTypeCast.castTo("1.00", "_1", FloatType) == 1.0)
+    assert(CSVTypeCast.castTo("1.00", "_1", DoubleType) == 1.0)
+    assert(CSVTypeCast.castTo("true", "_1", BooleanType) == true)
 
     val timestampsOptions = CSVOptions("timestampFormat", "dd/MM/yyyy hh:mm")
     val customTimestamp = "31/01/2015 00:00"
     val expectedTime = timestampsOptions.timestampFormat.parse(customTimestamp).getTime
     val castedTimestamp =
-      CSVTypeCast.castTo(customTimestamp, TimestampType, nullable = true, timestampsOptions)
+      CSVTypeCast.castTo(customTimestamp, "_1", TimestampType, nullable = true, timestampsOptions)
     assert(castedTimestamp == expectedTime * 1000L)
 
     val customDate = "31/01/2015"
     val dateOptions = CSVOptions("dateFormat", "dd/MM/yyyy")
     val expectedDate = dateOptions.dateFormat.parse(customDate).getTime
-    val castedDate = CSVTypeCast.castTo(customTimestamp, DateType, nullable = true, dateOptions)
+    val castedDate =
+      CSVTypeCast.castTo(customTimestamp, "_1", DateType, nullable = true, dateOptions)
     assert(castedDate == DateTimeUtils.millisToDays(expectedDate))
 
     val timestamp = "2015-01-01 00:00:00"
-    assert(CSVTypeCast.castTo(timestamp, TimestampType) ==
+    assert(CSVTypeCast.castTo(timestamp, "_1", TimestampType) ==
       DateTimeUtils.stringToTime(timestamp).getTime  * 1000L)
-    assert(CSVTypeCast.castTo("2015-01-01", DateType) ==
+    assert(CSVTypeCast.castTo("2015-01-01", "_1", DateType) ==
       DateTimeUtils.millisToDays(DateTimeUtils.stringToTime("2015-01-01").getTime))
   }
 
@@ -148,8 +159,8 @@ class CSVTypeCastSuite extends SparkFunSuite {
     val originalLocale = Locale.getDefault
     try {
       Locale.setDefault(new Locale("fr", "FR"))
-      assert(CSVTypeCast.castTo("1,00", FloatType) == 100.0) // Would parse as 1.0 in fr-FR
-      assert(CSVTypeCast.castTo("1,00", DoubleType) == 100.0)
+      assert(CSVTypeCast.castTo("1,00", "_1", FloatType) == 100.0) // Would parse as 1.0 in fr-FR
+      assert(CSVTypeCast.castTo("1,00", "_1", DoubleType) == 100.0)
     } finally {
       Locale.setDefault(originalLocale)
     }
@@ -157,7 +168,7 @@ class CSVTypeCastSuite extends SparkFunSuite {
 
   test("Float NaN values are parsed correctly") {
     val floatVal: Float = CSVTypeCast.castTo(
-      "nn", FloatType, nullable = true, CSVOptions("nanValue", "nn")).asInstanceOf[Float]
+      "nn", "_1", FloatType, nullable = true, CSVOptions("nanValue", "nn")).asInstanceOf[Float]
 
     // Java implements the IEEE-754 floating point standard which guarantees that any comparison
     // against NaN will return false (except != which returns true)
@@ -166,32 +177,32 @@ class CSVTypeCastSuite extends SparkFunSuite {
 
   test("Double NaN values are parsed correctly") {
     val doubleVal: Double = CSVTypeCast.castTo(
-      "-", DoubleType, nullable = true, CSVOptions("nanValue", "-")).asInstanceOf[Double]
+      "-", "_1", DoubleType, nullable = true, CSVOptions("nanValue", "-")).asInstanceOf[Double]
 
     assert(doubleVal.isNaN)
   }
 
   test("Float infinite values can be parsed") {
     val floatVal1 = CSVTypeCast.castTo(
-      "max", FloatType, nullable = true, CSVOptions("negativeInf", "max")).asInstanceOf[Float]
+      "max", "_1", FloatType, nullable = true, CSVOptions("negativeInf", "max")).asInstanceOf[Float]
 
     assert(floatVal1 == Float.NegativeInfinity)
 
     val floatVal2 = CSVTypeCast.castTo(
-      "max", FloatType, nullable = true, CSVOptions("positiveInf", "max")).asInstanceOf[Float]
+      "max", "_1", FloatType, nullable = true, CSVOptions("positiveInf", "max")).asInstanceOf[Float]
 
     assert(floatVal2 == Float.PositiveInfinity)
   }
 
   test("Double infinite values can be parsed") {
     val doubleVal1 = CSVTypeCast.castTo(
-      "max", DoubleType, nullable = true, CSVOptions("negativeInf", "max")
+      "max", "_1", DoubleType, nullable = true, CSVOptions("negativeInf", "max")
     ).asInstanceOf[Double]
 
     assert(doubleVal1 == Double.NegativeInfinity)
 
     val doubleVal2 = CSVTypeCast.castTo(
-      "max", DoubleType, nullable = true, CSVOptions("positiveInf", "max")
+      "max", "_1", DoubleType, nullable = true, CSVOptions("positiveInf", "max")
     ).asInstanceOf[Double]
 
     assert(doubleVal2 == Double.PositiveInfinity)

From 9c78d355c541c2abfb4945e5d67bf0d2ba4b4d16 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Sun, 6 Nov 2016 18:57:13 -0800
Subject: [PATCH 0049/1204] [SPARK-18173][SQL] data source tables should
 support truncating partition

## What changes were proposed in this pull request?

Previously `TRUNCATE TABLE ... PARTITION` will always truncate the whole table for data source tables, this PR fixes it and improve `InMemoryCatalog` to make this command work with it.
## How was this patch tested?

existing tests

Author: Wenchen Fan <wenchen@databricks.com>

Closes #15688 from cloud-fan/truncate.

(cherry picked from commit 46b2e499935386e28899d860110a6ab16c107c0c)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../catalyst/catalog/InMemoryCatalog.scala    | 23 +++++--
 .../catalog/ExternalCatalogSuite.scala        | 11 ++++
 .../spark/sql/execution/command/tables.scala  | 16 +++--
 .../sql/execution/command/DDLSuite.scala      | 49 +++++++++++---
 .../sql/hive/execution/HiveDDLSuite.scala     | 64 +++++++++++++++++++
 5 files changed, 146 insertions(+), 17 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
index bc396880f22a3..20db81e6f9060 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
@@ -487,11 +487,26 @@ class InMemoryCatalog(
       table: String,
       partialSpec: Option[TablePartitionSpec] = None): Seq[CatalogTablePartition] = synchronized {
     requireTableExists(db, table)
-    if (partialSpec.nonEmpty) {
-      throw new UnsupportedOperationException(
-        "listPartition with partial partition spec is not implemented")
+
+    partialSpec match {
+      case None => catalog(db).tables(table).partitions.values.toSeq
+      case Some(partial) =>
+        catalog(db).tables(table).partitions.toSeq.collect {
+          case (spec, partition) if isPartialPartitionSpec(partial, spec) => partition
+        }
+    }
+  }
+
+  /**
+   * Returns true if `spec1` is a partial partition spec w.r.t. `spec2`, e.g. PARTITION (a=1) is a
+   * partial partition spec w.r.t. PARTITION (a=1,b=2).
+   */
+  private def isPartialPartitionSpec(
+      spec1: TablePartitionSpec,
+      spec2: TablePartitionSpec): Boolean = {
+    spec1.forall {
+      case (partitionColumn, value) => spec2(partitionColumn) == value
     }
-    catalog(db).tables(table).partitions.values.toSeq
   }
 
   override def listPartitionsByFilter(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
index 66f92d1b1b0af..34bdfc8a98710 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
@@ -320,6 +320,17 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
     catalog.createPartitions("db2", "tbl2", Seq(part1), ignoreIfExists = true)
   }
 
+  test("list partitions with partial partition spec") {
+    val catalog = newBasicCatalog()
+    val parts = catalog.listPartitions("db2", "tbl2", Some(Map("a" -> "1")))
+    assert(parts.length == 1)
+    assert(parts.head.spec == part1.spec)
+
+    // if no partition is matched for the given partition spec, an empty list should be returned.
+    assert(catalog.listPartitions("db2", "tbl2", Some(Map("a" -> "unknown", "b" -> "1"))).isEmpty)
+    assert(catalog.listPartitions("db2", "tbl2", Some(Map("a" -> "unknown"))).isEmpty)
+  }
+
   test("drop partitions") {
     val catalog = newBasicCatalog()
     assert(catalogPartitionsEqual(catalog, "db2", "tbl2", Seq(part1, part2)))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 00c646b9185b3..3cfa639a2fc1f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -343,13 +343,19 @@ case class TruncateTableCommand(
       DDLUtils.verifyPartitionProviderIsHive(spark, table, "TRUNCATE TABLE ... PARTITION")
     }
     val locations =
-      // TODO: The `InMemoryCatalog` doesn't support listPartition with partial partition spec.
-      if (spark.conf.get(CATALOG_IMPLEMENTATION) == "in-memory") {
-        Seq(table.storage.locationUri)
-      } else if (table.partitionColumnNames.isEmpty) {
+      if (table.partitionColumnNames.isEmpty) {
         Seq(table.storage.locationUri)
       } else {
-        catalog.listPartitions(table.identifier, partitionSpec).map(_.storage.locationUri)
+        // Here we diverge from Hive when the given partition spec contains all partition columns
+        // but no partition is matched: Hive will throw an exception and we just do nothing.
+        val normalizedSpec = partitionSpec.map { spec =>
+          PartitioningUtils.normalizePartitionSpec(
+            spec,
+            table.partitionColumnNames,
+            table.identifier.quotedString,
+            spark.sessionState.conf.resolver)
+        }
+        catalog.listPartitions(table.identifier, normalizedSpec).map(_.storage.locationUri)
       }
     val hadoopConf = spark.sessionState.newHadoopConf()
     locations.foreach { location =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 52b09c54464e7..864af8d578b17 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -1628,29 +1628,62 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
 
   test("truncate table - datasource table") {
     import testImplicits._
-    val data = (1 to 10).map { i => (i, i) }.toDF("width", "length")
 
+    val data = (1 to 10).map { i => (i, i) }.toDF("width", "length")
     // Test both a Hive compatible and incompatible code path.
     Seq("json", "parquet").foreach { format =>
       withTable("rectangles") {
         data.write.format(format).saveAsTable("rectangles")
         assume(spark.table("rectangles").collect().nonEmpty,
           "bad test; table was empty to begin with")
+
         sql("TRUNCATE TABLE rectangles")
         assert(spark.table("rectangles").collect().isEmpty)
+
+        // not supported since the table is not partitioned
+        assertUnsupported("TRUNCATE TABLE rectangles PARTITION (width=1)")
       }
     }
+  }
 
-    withTable("rectangles", "rectangles2") {
-      data.write.saveAsTable("rectangles")
-      data.write.partitionBy("length").saveAsTable("rectangles2")
+  test("truncate partitioned table - datasource table") {
+    import testImplicits._
 
-      // not supported since the table is not partitioned
-      assertUnsupported("TRUNCATE TABLE rectangles PARTITION (width=1)")
+    val data = (1 to 10).map { i => (i % 3, i % 5, i) }.toDF("width", "length", "height")
 
+    withTable("partTable") {
+      data.write.partitionBy("width", "length").saveAsTable("partTable")
       // supported since partitions are stored in the metastore
-      sql("TRUNCATE TABLE rectangles2 PARTITION (width=1)")
-      assert(spark.table("rectangles2").collect().isEmpty)
+      sql("TRUNCATE TABLE partTable PARTITION (width=1, length=1)")
+      assert(spark.table("partTable").filter($"width" === 1).collect().nonEmpty)
+      assert(spark.table("partTable").filter($"width" === 1 && $"length" === 1).collect().isEmpty)
+    }
+
+    withTable("partTable") {
+      data.write.partitionBy("width", "length").saveAsTable("partTable")
+      // support partial partition spec
+      sql("TRUNCATE TABLE partTable PARTITION (width=1)")
+      assert(spark.table("partTable").collect().nonEmpty)
+      assert(spark.table("partTable").filter($"width" === 1).collect().isEmpty)
+    }
+
+    withTable("partTable") {
+      data.write.partitionBy("width", "length").saveAsTable("partTable")
+      // do nothing if no partition is matched for the given partial partition spec
+      sql("TRUNCATE TABLE partTable PARTITION (width=100)")
+      assert(spark.table("partTable").count() == data.count())
+
+      // do nothing if no partition is matched for the given non-partial partition spec
+      // TODO: This behaviour is different from Hive, we should decide whether we need to follow
+      // Hive's behaviour or stick with our existing behaviour later.
+      sql("TRUNCATE TABLE partTable PARTITION (width=100, length=100)")
+      assert(spark.table("partTable").count() == data.count())
+
+      // throw exception if the column in partition spec is not a partition column.
+      val e = intercept[AnalysisException] {
+        sql("TRUNCATE TABLE partTable PARTITION (unknown=1)")
+      }
+      assert(e.message.contains("unknown is not a valid partition column"))
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 682d7d4b163dd..4150e649bef84 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -1098,4 +1098,68 @@ class HiveDDLSuite
       }
     }
   }
+
+  test("truncate table - datasource table") {
+    import testImplicits._
+
+    val data = (1 to 10).map { i => (i, i) }.toDF("width", "length")
+    // Test both a Hive compatible and incompatible code path.
+    Seq("json", "parquet").foreach { format =>
+      withTable("rectangles") {
+        data.write.format(format).saveAsTable("rectangles")
+        assume(spark.table("rectangles").collect().nonEmpty,
+          "bad test; table was empty to begin with")
+
+        sql("TRUNCATE TABLE rectangles")
+        assert(spark.table("rectangles").collect().isEmpty)
+
+        // not supported since the table is not partitioned
+        val e = intercept[AnalysisException] {
+          sql("TRUNCATE TABLE rectangles PARTITION (width=1)")
+        }
+        assert(e.message.contains("Operation not allowed"))
+      }
+    }
+  }
+
+  test("truncate partitioned table - datasource table") {
+    import testImplicits._
+
+    val data = (1 to 10).map { i => (i % 3, i % 5, i) }.toDF("width", "length", "height")
+
+    withTable("partTable") {
+      data.write.partitionBy("width", "length").saveAsTable("partTable")
+      // supported since partitions are stored in the metastore
+      sql("TRUNCATE TABLE partTable PARTITION (width=1, length=1)")
+      assert(spark.table("partTable").filter($"width" === 1).collect().nonEmpty)
+      assert(spark.table("partTable").filter($"width" === 1 && $"length" === 1).collect().isEmpty)
+    }
+
+    withTable("partTable") {
+      data.write.partitionBy("width", "length").saveAsTable("partTable")
+      // support partial partition spec
+      sql("TRUNCATE TABLE partTable PARTITION (width=1)")
+      assert(spark.table("partTable").collect().nonEmpty)
+      assert(spark.table("partTable").filter($"width" === 1).collect().isEmpty)
+    }
+
+    withTable("partTable") {
+      data.write.partitionBy("width", "length").saveAsTable("partTable")
+      // do nothing if no partition is matched for the given partial partition spec
+      sql("TRUNCATE TABLE partTable PARTITION (width=100)")
+      assert(spark.table("partTable").count() == data.count())
+
+      // do nothing if no partition is matched for the given non-partial partition spec
+      // TODO: This behaviour is different from Hive, we should decide whether we need to follow
+      // Hive's behaviour or stick with our existing behaviour later.
+      sql("TRUNCATE TABLE partTable PARTITION (width=100, length=100)")
+      assert(spark.table("partTable").count() == data.count())
+
+      // throw exception if the column in partition spec is not a partition column.
+      val e = intercept[AnalysisException] {
+        sql("TRUNCATE TABLE partTable PARTITION (unknown=1)")
+      }
+      assert(e.message.contains("unknown is not a valid partition column"))
+    }
+  }
 }

From 9ebd5e563d26cf42b9d32e8926de109101360d43 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Sun, 6 Nov 2016 22:42:05 -0800
Subject: [PATCH 0050/1204] [SPARK-18167][SQL] Disable flaky hive partition
 pruning test.

(cherry picked from commit 07ac3f09daf2b28436bc69f76badd1e36d756e4d)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../org/apache/spark/sql/hive/execution/SQLQuerySuite.scala     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 28e5dffb11523..5e08ef31121fd 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -1569,7 +1569,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     ).map(i => Row(i._1, i._2, i._3, i._4)))
   }
 
-  test("SPARK-10562: partition by column with mixed case name") {
+  ignore("SPARK-10562: partition by column with mixed case name") {
     withTable("tbl10562") {
       val df = Seq(2012 -> "a").toDF("Year", "val")
       df.write.partitionBy("Year").saveAsTable("tbl10562")

From 2fa1a632ae4e68ffa01fad0d6150219c13355724 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Sun, 6 Nov 2016 22:44:55 -0800
Subject: [PATCH 0051/1204] [SPARK-18296][SQL] Use consistent naming for
 expression test suites

## What changes were proposed in this pull request?
We have an undocumented naming convention to call expression unit tests ExpressionsSuite, and the end-to-end tests FunctionsSuite. It'd be great to make all test suites consistent with this naming convention.

## How was this patch tested?
This is a test-only naming change.

Author: Reynold Xin <rxin@databricks.com>

Closes #15793 from rxin/SPARK-18296.

(cherry picked from commit 9db06c442cf85e41d51c7b167817f4e7971bf0da)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 ...seFunctionsSuite.scala => BitwiseExpressionsSuite.scala} | 2 +-
 ...unctionsSuite.scala => CollectionExpressionsSuite.scala} | 3 +--
 ...{MathFunctionsSuite.scala => MathExpressionsSuite.scala} | 2 +-
 ...{MiscFunctionsSuite.scala => MiscExpressionsSuite.scala} | 2 +-
 ...{NullFunctionsSuite.scala => NullExpressionsSuite.scala} | 2 +-
 ...{MathExpressionsSuite.scala => MathFunctionsSuite.scala} | 6 +++---
 6 files changed, 8 insertions(+), 9 deletions(-)
 rename sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/{BitwiseFunctionsSuite.scala => BitwiseExpressionsSuite.scala} (98%)
 rename sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/{CollectionFunctionsSuite.scala => CollectionExpressionsSuite.scala} (98%)
 rename sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/{MathFunctionsSuite.scala => MathExpressionsSuite.scala} (99%)
 rename sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/{MiscFunctionsSuite.scala => MiscExpressionsSuite.scala} (95%)
 rename sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/{NullFunctionsSuite.scala => NullExpressionsSuite.scala} (98%)
 rename sql/core/src/test/scala/org/apache/spark/sql/{MathExpressionsSuite.scala => MathFunctionsSuite.scala} (98%)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseExpressionsSuite.scala
similarity index 98%
rename from sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseFunctionsSuite.scala
rename to sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseExpressionsSuite.scala
index 3a310c0e9a7a6..4188dade3fe65 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseFunctionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseExpressionsSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.types._
 
 
-class BitwiseFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
+class BitwiseExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   import IntegralLiteralTestUtils._
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
similarity index 98%
rename from sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionFunctionsSuite.scala
rename to sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
index c76dad208ea1e..020687e4b3a27 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionFunctionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
@@ -20,8 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.types._
 
-
-class CollectionFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
+class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   test("Array and Map Size") {
     val a0 = Literal.create(Seq(1, 2, 3), ArrayType(IntegerType))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
similarity index 99%
rename from sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala
rename to sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
index f88c9e8df16d0..6b5bfac94645c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.catalyst.optimizer.SimpleTestOptimizer
 import org.apache.spark.sql.catalyst.plans.logical.{OneRowRelation, Project}
 import org.apache.spark.sql.types._
 
-class MathFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
+class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   import IntegralLiteralTestUtils._
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscExpressionsSuite.scala
similarity index 95%
rename from sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala
rename to sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscExpressionsSuite.scala
index ed82efe7be2e8..a26d070a99c52 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscFunctionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscExpressionsSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.types._
 
-class MiscFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
+class MiscExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   test("assert_true") {
     intercept[RuntimeException] {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala
similarity index 98%
rename from sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullFunctionsSuite.scala
rename to sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala
index 62c9ab3b67fb6..5064a1f63f83d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullFunctionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Project}
 import org.apache.spark.sql.types._
 
-class NullFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
+class NullExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   def testAllTypes(testFunc: (Any, DataType) => Unit): Unit = {
     testFunc(false, BooleanType)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
similarity index 98%
rename from sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
index 6944c6f848179..37443d0342980 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
@@ -23,13 +23,13 @@ import org.apache.spark.sql.functions._
 import org.apache.spark.sql.functions.{log => logarithm}
 import org.apache.spark.sql.test.SharedSQLContext
 
-private object MathExpressionsTestData {
+private object MathFunctionsTestData {
   case class DoubleData(a: java.lang.Double, b: java.lang.Double)
   case class NullDoubles(a: java.lang.Double)
 }
 
-class MathExpressionsSuite extends QueryTest with SharedSQLContext {
-  import MathExpressionsTestData._
+class MathFunctionsSuite extends QueryTest with SharedSQLContext {
+  import MathFunctionsTestData._
   import testImplicits._
 
   private lazy val doubleData = (1 to 10).map(i => DoubleData(i * 0.2 - 1, i * -0.2 + 1)).toDF()

From 4101029579de920215b426ca6537c1f0e4e4e5ae Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Mon, 7 Nov 2016 01:16:37 -0800
Subject: [PATCH 0052/1204] [SPARK-16904][SQL] Removal of Hive Built-in Hash
 Functions and TestHiveFunctionRegistry

### What changes were proposed in this pull request?

Currently, the Hive built-in `hash` function is not being used in Spark since Spark 2.0. The public interface does not allow users to unregister the Spark built-in functions. Thus, users will never use Hive's built-in `hash` function.

The only exception here is `TestHiveFunctionRegistry`, which allows users to unregister the built-in functions. Thus, we can load Hive's hash function in the test cases. If we disable it, 10+ test cases will fail because the results are different from the Hive golden answer files.

This PR is to remove `hash` from the list of `hiveFunctions` in `HiveSessionCatalog`. It will also remove `TestHiveFunctionRegistry`. This removal makes us easier to remove `TestHiveSessionState` in the future.
### How was this patch tested?
N/A

Author: gatorsmile <gatorsmile@gmail.com>

Closes #14498 from gatorsmile/removeHash.

(cherry picked from commit 57626a55703a189e03148398f67c36cd0e557044)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../execution/HiveCompatibilitySuite.scala    | 41 +++++++++----------
 .../spark/sql/hive/HiveSessionCatalog.scala   |  1 -
 .../apache/spark/sql/hive/test/TestHive.scala | 28 -------------
 3 files changed, 20 insertions(+), 50 deletions(-)

diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index f5d10de8cd2bf..5cd4935e225ee 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -57,8 +57,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     TestHive.setConf(SQLConf.COLUMN_BATCH_SIZE, 5)
     // Enable in-memory partition pruning for testing purposes
     TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, true)
-    // Use Hive hash expression instead of the native one
-    TestHive.sessionState.functionRegistry.unregisterFunction("hash")
     // Ensures that the plans generation use metastore relation and not OrcRelation
     // Was done because SqlBuilder does not work with plans having logical relation
     TestHive.setConf(HiveUtils.CONVERT_METASTORE_ORC, false)
@@ -76,7 +74,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
       TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, originalInMemoryPartitionPruning)
       TestHive.setConf(HiveUtils.CONVERT_METASTORE_ORC, originalConvertMetastoreOrc)
       TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, originalCrossJoinEnabled)
-      TestHive.sessionState.functionRegistry.restore()
 
       // For debugging dump some statistics about how much time was spent in various optimizer rules
       logWarning(RuleExecutor.dumpTimeSpent())
@@ -581,7 +578,26 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "auto_join6",
     "auto_join7",
     "auto_join8",
-    "auto_join9"
+    "auto_join9",
+
+    // These tests are based on the Hive's hash function, which is different from Spark
+    "auto_join19",
+    "auto_join22",
+    "auto_join25",
+    "auto_join26",
+    "auto_join27",
+    "auto_join28",
+    "auto_join30",
+    "auto_join31",
+    "auto_join_nulls",
+    "auto_join_reordering_values",
+    "correlationoptimizer1",
+    "correlationoptimizer2",
+    "correlationoptimizer3",
+    "correlationoptimizer4",
+    "multiMapJoin1",
+    "orc_dictionary_threshold",
+    "udf_hash"
   )
 
   /**
@@ -601,16 +617,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "annotate_stats_part",
     "annotate_stats_table",
     "annotate_stats_union",
-    "auto_join19",
-    "auto_join22",
-    "auto_join25",
-    "auto_join26",
-    "auto_join27",
-    "auto_join28",
-    "auto_join30",
-    "auto_join31",
-    "auto_join_nulls",
-    "auto_join_reordering_values",
     "binary_constant",
     "binarysortable_1",
     "cast1",
@@ -623,15 +629,11 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "compute_stats_long",
     "compute_stats_string",
     "convert_enum_to_string",
-    "correlationoptimizer1",
     "correlationoptimizer10",
     "correlationoptimizer11",
     "correlationoptimizer13",
     "correlationoptimizer14",
     "correlationoptimizer15",
-    "correlationoptimizer2",
-    "correlationoptimizer3",
-    "correlationoptimizer4",
     "correlationoptimizer6",
     "correlationoptimizer7",
     "correlationoptimizer8",
@@ -871,7 +873,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "merge2",
     "merge4",
     "mergejoins",
-    "multiMapJoin1",
     "multiMapJoin2",
     "multi_insert_gby",
     "multi_insert_gby3",
@@ -893,7 +894,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "nullinput2",
     "nullscript",
     "optional_outer",
-    "orc_dictionary_threshold",
     "order",
     "order2",
     "outer_join_ppr",
@@ -1026,7 +1026,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "udf_from_unixtime",
     "udf_greaterthan",
     "udf_greaterthanorequal",
-    "udf_hash",
     "udf_hex",
     "udf_if",
     "udf_index",
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 4f2910abfd216..9df20ce1553ec 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -233,7 +233,6 @@ private[sql] class HiveSessionCatalog(
   // in_file, index, matchpath, ngrams, noop, noopstreaming, noopwithmap,
   // noopwithmapstreaming, parse_url_tuple, reflect2, windowingtablefunction.
   private val hiveFunctions = Seq(
-    "hash",
     "histogram_numeric",
     "percentile"
   )
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 90000445dffb2..a8dd5102b750d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -492,24 +492,6 @@ private[hive] class TestHiveQueryExecution(
   }
 }
 
-
-private[hive] class TestHiveFunctionRegistry extends SimpleFunctionRegistry {
-
-  private val removedFunctions =
-    collection.mutable.ArrayBuffer.empty[(String, (ExpressionInfo, FunctionBuilder))]
-
-  def unregisterFunction(name: String): Unit = synchronized {
-    functionBuilders.remove(name).foreach(f => removedFunctions += name -> f)
-  }
-
-  def restore(): Unit = synchronized {
-    removedFunctions.foreach {
-      case (name, (info, builder)) => registerFunction(name, info, builder)
-    }
-  }
-}
-
-
 private[hive] class TestHiveSessionState(
     sparkSession: TestHiveSparkSession)
   extends HiveSessionState(sparkSession) { self =>
@@ -525,16 +507,6 @@ private[hive] class TestHiveSessionState(
     }
   }
 
-  override lazy val functionRegistry: TestHiveFunctionRegistry = {
-    // We use TestHiveFunctionRegistry at here to track functions that have been explicitly
-    // unregistered (through TestHiveFunctionRegistry.unregisterFunction method).
-    val fr = new TestHiveFunctionRegistry
-    org.apache.spark.sql.catalyst.analysis.FunctionRegistry.expressions.foreach {
-      case (name, (info, builder)) => fr.registerFunction(name, info, builder)
-    }
-    fr
-  }
-
   override def executePlan(plan: LogicalPlan): TestHiveQueryExecution = {
     new TestHiveQueryExecution(sparkSession, plan)
   }

From df40ee2b483989a47cb85d248280cc02f527112d Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 7 Nov 2016 12:18:19 +0100
Subject: [PATCH 0053/1204] [SPARK-18125][SQL] Fix a compilation error in
 codegen due to splitExpression

## What changes were proposed in this pull request?

As reported in the jira, sometimes the generated java code in codegen will cause compilation error.

Code snippet to test it:

    case class Route(src: String, dest: String, cost: Int)
    case class GroupedRoutes(src: String, dest: String, routes: Seq[Route])

    val ds = sc.parallelize(Array(
      Route("a", "b", 1),
      Route("a", "b", 2),
      Route("a", "c", 2),
      Route("a", "d", 10),
      Route("b", "a", 1),
      Route("b", "a", 5),
      Route("b", "c", 6))
    ).toDF.as[Route]

    val grped = ds.map(r => GroupedRoutes(r.src, r.dest, Seq(r)))
      .groupByKey(r => (r.src, r.dest))
      .reduceGroups { (g1: GroupedRoutes, g2: GroupedRoutes) =>
        GroupedRoutes(g1.src, g1.dest, g1.routes ++ g2.routes)
      }.map(_._2)

The problem here is, in `ReferenceToExpressions` we evaluate the children vars to local variables. Then the result expression is evaluated to use those children variables. In the above case, the result expression code is too long and will be split by `CodegenContext.splitExpression`. So those local variables cannot be accessed and cause compilation error.

## How was this patch tested?

Jenkins tests.

Please review https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark before opening a pull request.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #15693 from viirya/fix-codege-compilation-error.

(cherry picked from commit a814eeac6b3c38d1294b88c60cd083fc4d01bd25)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../expressions/ReferenceToExpressions.scala  | 27 +++++++++++---
 .../org/apache/spark/sql/DatasetSuite.scala   | 37 +++++++++++++++++++
 2 files changed, 58 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ReferenceToExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ReferenceToExpressions.scala
index 127797c0974bb..6c75a7a50214f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ReferenceToExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ReferenceToExpressions.scala
@@ -63,15 +63,30 @@ case class ReferenceToExpressions(result: Expression, children: Seq[Expression])
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val childrenGen = children.map(_.genCode(ctx))
-    val childrenVars = childrenGen.zip(children).map {
-      case (childGen, child) => LambdaVariable(childGen.value, childGen.isNull, child.dataType)
-    }
+    val (classChildrenVars, initClassChildrenVars) = childrenGen.zip(children).map {
+      case (childGen, child) =>
+        // SPARK-18125: The children vars are local variables. If the result expression uses
+        // splitExpression, those variables cannot be accessed so compilation fails.
+        // To fix it, we use class variables to hold those local variables.
+        val classChildVarName = ctx.freshName("classChildVar")
+        val classChildVarIsNull = ctx.freshName("classChildVarIsNull")
+        ctx.addMutableState(ctx.javaType(child.dataType), classChildVarName, "")
+        ctx.addMutableState("boolean", classChildVarIsNull, "")
+
+        val classChildVar =
+          LambdaVariable(classChildVarName, classChildVarIsNull, child.dataType)
+
+        val initCode = s"${classChildVar.value} = ${childGen.value};\n" +
+          s"${classChildVar.isNull} = ${childGen.isNull};"
+
+        (classChildVar, initCode)
+    }.unzip
 
     val resultGen = result.transform {
-      case b: BoundReference => childrenVars(b.ordinal)
+      case b: BoundReference => classChildrenVars(b.ordinal)
     }.genCode(ctx)
 
-    ExprCode(code = childrenGen.map(_.code).mkString("\n") + "\n" + resultGen.code,
-      isNull = resultGen.isNull, value = resultGen.value)
+    ExprCode(code = childrenGen.map(_.code).mkString("\n") + initClassChildrenVars.mkString("\n") +
+      resultGen.code, isNull = resultGen.isNull, value = resultGen.value)
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 6fa7b0487732e..a8dd422aa0c85 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -923,6 +923,40 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
         .groupByKey(_.a).flatMapGroups { case (x, iter) => List[Int]() })
   }
 
+  test("SPARK-18125: Spark generated code causes CompileException") {
+    val data = Array(
+      Route("a", "b", 1),
+      Route("a", "b", 2),
+      Route("a", "c", 2),
+      Route("a", "d", 10),
+      Route("b", "a", 1),
+      Route("b", "a", 5),
+      Route("b", "c", 6))
+    val ds = sparkContext.parallelize(data).toDF.as[Route]
+
+    val grped = ds.map(r => GroupedRoutes(r.src, r.dest, Seq(r)))
+      .groupByKey(r => (r.src, r.dest))
+      .reduceGroups { (g1: GroupedRoutes, g2: GroupedRoutes) =>
+        GroupedRoutes(g1.src, g1.dest, g1.routes ++ g2.routes)
+      }.map(_._2)
+
+    val expected = Seq(
+      GroupedRoutes("a", "d", Seq(Route("a", "d", 10))),
+      GroupedRoutes("b", "c", Seq(Route("b", "c", 6))),
+      GroupedRoutes("a", "b", Seq(Route("a", "b", 1), Route("a", "b", 2))),
+      GroupedRoutes("b", "a", Seq(Route("b", "a", 1), Route("b", "a", 5))),
+      GroupedRoutes("a", "c", Seq(Route("a", "c", 2)))
+    )
+
+    implicit def ordering[GroupedRoutes]: Ordering[GroupedRoutes] = new Ordering[GroupedRoutes] {
+      override def compare(x: GroupedRoutes, y: GroupedRoutes): Int = {
+        x.toString.compareTo(y.toString)
+      }
+    }
+
+    checkDatasetUnorderly(grped, expected: _*)
+  }
+
   test("SPARK-18189: Fix serialization issue in KeyValueGroupedDataset") {
     val resultValue = 12345
     val keyValueGrouped = Seq((1, 2), (3, 4)).toDS().groupByKey(_._1)
@@ -1071,3 +1105,6 @@ object DatasetTransform {
     ds.map(_ + 1)
   }
 }
+
+case class Route(src: String, dest: String, cost: Int)
+case class GroupedRoutes(src: String, dest: String, routes: Seq[Route])

From 6b332909f044f2d47f49cbf699f2f2f22206decf Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Mon, 7 Nov 2016 04:07:19 -0800
Subject: [PATCH 0054/1204] [SPARK-18291][SPARKR][ML] SparkR glm predict should
 output original label when family = binomial.

## What changes were proposed in this pull request?
SparkR ```spark.glm``` predict should output original label when family = "binomial".

## How was this patch tested?
Add unit test.
You can also run the following code to test:
```R
training <- suppressWarnings(createDataFrame(iris))
training <- training[training$Species %in% c("versicolor", "virginica"), ]
model <- spark.glm(training, Species ~ Sepal_Length + Sepal_Width,family = binomial(link = "logit"))
showDF(predict(model, training))
```
Before this change:
```
+------------+-----------+------------+-----------+----------+-----+-------------------+
|Sepal_Length|Sepal_Width|Petal_Length|Petal_Width|   Species|label|         prediction|
+------------+-----------+------------+-----------+----------+-----+-------------------+
|         7.0|        3.2|         4.7|        1.4|versicolor|  0.0| 0.8271421517601544|
|         6.4|        3.2|         4.5|        1.5|versicolor|  0.0| 0.6044595910413112|
|         6.9|        3.1|         4.9|        1.5|versicolor|  0.0| 0.7916340858281998|
|         5.5|        2.3|         4.0|        1.3|versicolor|  0.0|0.16080518180591158|
|         6.5|        2.8|         4.6|        1.5|versicolor|  0.0| 0.6112229217050189|
|         5.7|        2.8|         4.5|        1.3|versicolor|  0.0| 0.2555087295500885|
|         6.3|        3.3|         4.7|        1.6|versicolor|  0.0| 0.5681507664364834|
|         4.9|        2.4|         3.3|        1.0|versicolor|  0.0|0.05990570219972002|
|         6.6|        2.9|         4.6|        1.3|versicolor|  0.0| 0.6644434078306246|
|         5.2|        2.7|         3.9|        1.4|versicolor|  0.0|0.11293577405862379|
|         5.0|        2.0|         3.5|        1.0|versicolor|  0.0|0.06152372321585971|
|         5.9|        3.0|         4.2|        1.5|versicolor|  0.0|0.35250697207602555|
|         6.0|        2.2|         4.0|        1.0|versicolor|  0.0|0.32267018290814303|
|         6.1|        2.9|         4.7|        1.4|versicolor|  0.0|  0.433391153814592|
|         5.6|        2.9|         3.6|        1.3|versicolor|  0.0| 0.2280744262436993|
|         6.7|        3.1|         4.4|        1.4|versicolor|  0.0| 0.7219848389339459|
|         5.6|        3.0|         4.5|        1.5|versicolor|  0.0|0.23527698971404695|
|         5.8|        2.7|         4.1|        1.0|versicolor|  0.0|  0.285024533520016|
|         6.2|        2.2|         4.5|        1.5|versicolor|  0.0| 0.4107047877447493|
|         5.6|        2.5|         3.9|        1.1|versicolor|  0.0|0.20083561961645083|
+------------+-----------+------------+-----------+----------+-----+-------------------+
```
After this change:
```
+------------+-----------+------------+-----------+----------+-----+----------+
|Sepal_Length|Sepal_Width|Petal_Length|Petal_Width|   Species|label|prediction|
+------------+-----------+------------+-----------+----------+-----+----------+
|         7.0|        3.2|         4.7|        1.4|versicolor|  0.0| virginica|
|         6.4|        3.2|         4.5|        1.5|versicolor|  0.0| virginica|
|         6.9|        3.1|         4.9|        1.5|versicolor|  0.0| virginica|
|         5.5|        2.3|         4.0|        1.3|versicolor|  0.0|versicolor|
|         6.5|        2.8|         4.6|        1.5|versicolor|  0.0| virginica|
|         5.7|        2.8|         4.5|        1.3|versicolor|  0.0|versicolor|
|         6.3|        3.3|         4.7|        1.6|versicolor|  0.0| virginica|
|         4.9|        2.4|         3.3|        1.0|versicolor|  0.0|versicolor|
|         6.6|        2.9|         4.6|        1.3|versicolor|  0.0| virginica|
|         5.2|        2.7|         3.9|        1.4|versicolor|  0.0|versicolor|
|         5.0|        2.0|         3.5|        1.0|versicolor|  0.0|versicolor|
|         5.9|        3.0|         4.2|        1.5|versicolor|  0.0|versicolor|
|         6.0|        2.2|         4.0|        1.0|versicolor|  0.0|versicolor|
|         6.1|        2.9|         4.7|        1.4|versicolor|  0.0|versicolor|
|         5.6|        2.9|         3.6|        1.3|versicolor|  0.0|versicolor|
|         6.7|        3.1|         4.4|        1.4|versicolor|  0.0| virginica|
|         5.6|        3.0|         4.5|        1.5|versicolor|  0.0|versicolor|
|         5.8|        2.7|         4.1|        1.0|versicolor|  0.0|versicolor|
|         6.2|        2.2|         4.5|        1.5|versicolor|  0.0|versicolor|
|         5.6|        2.5|         3.9|        1.1|versicolor|  0.0|versicolor|
+------------+-----------+------------+-----------+----------+-----+----------+
```

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #15788 from yanboliang/spark-18291.

(cherry picked from commit daa975f4bfa4f904697bf3365a4be9987032e490)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 R/pkg/inst/tests/testthat/test_mllib.R        | 20 +++--
 .../GeneralizedLinearRegressionWrapper.scala  | 77 +++++++++++++++++--
 2 files changed, 84 insertions(+), 13 deletions(-)

diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R
index db98d0e45547e..27c59f0b9624c 100644
--- a/R/pkg/inst/tests/testthat/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -64,6 +64,16 @@ test_that("spark.glm and predict", {
   rVals <- predict(glm(Sepal.Width ~ Sepal.Length + Species, data = iris), iris)
   expect_true(all(abs(rVals - vals) < 1e-6), rVals - vals)
 
+  # binomial family
+  binomialTraining <- training[training$Species %in% c("versicolor", "virginica"), ]
+  model <- spark.glm(binomialTraining, Species ~ Sepal_Length + Sepal_Width,
+    family = binomial(link = "logit"))
+  prediction <- predict(model, binomialTraining)
+  expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), "character")
+  expected <- c("virginica", "virginica", "virginica", "versicolor", "virginica",
+    "versicolor", "virginica", "versicolor", "virginica", "versicolor")
+  expect_equal(as.list(take(select(prediction, "prediction"), 10))[[1]], expected)
+
   # poisson family
   model <- spark.glm(training, Sepal_Width ~ Sepal_Length + Species,
   family = poisson(link = identity))
@@ -128,10 +138,10 @@ test_that("spark.glm summary", {
   expect_equal(stats$aic, rStats$aic)
 
   # Test spark.glm works with weighted dataset
-  a1 <- c(0, 1, 2, 3)
-  a2 <- c(5, 2, 1, 3)
-  w <- c(1, 2, 3, 4)
-  b <- c(1, 0, 1, 0)
+  a1 <- c(0, 1, 2, 3, 4)
+  a2 <- c(5, 2, 1, 3, 2)
+  w <- c(1, 2, 3, 4, 5)
+  b <- c(1, 0, 1, 0, 0)
   data <- as.data.frame(cbind(a1, a2, w, b))
   df <- suppressWarnings(createDataFrame(data))
 
@@ -158,7 +168,7 @@ test_that("spark.glm summary", {
   data <- as.data.frame(cbind(a1, a2, b))
   df <- suppressWarnings(createDataFrame(data))
   regStats <- summary(spark.glm(df, b ~ a1 + a2, regParam = 1.0))
-  expect_equal(regStats$aic, 13.32836, tolerance = 1e-4) # 13.32836 is from summary() result
+  expect_equal(regStats$aic, 14.00976, tolerance = 1e-4) # 14.00976 is from summary() result
 })
 
 test_that("spark.glm save/load", {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala
index b1bb577e1ffe4..995b1ef03bcec 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala
@@ -23,11 +23,16 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.ml.{Pipeline, PipelineModel}
-import org.apache.spark.ml.attribute.AttributeGroup
-import org.apache.spark.ml.feature.RFormula
+import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, NominalAttribute}
+import org.apache.spark.ml.feature.{IndexToString, RFormula}
 import org.apache.spark.ml.regression._
+import org.apache.spark.ml.Transformer
+import org.apache.spark.ml.param.ParamMap
+import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.util._
 import org.apache.spark.sql._
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.types._
 
 private[r] class GeneralizedLinearRegressionWrapper private (
     val pipeline: PipelineModel,
@@ -42,6 +47,8 @@ private[r] class GeneralizedLinearRegressionWrapper private (
     val rNumIterations: Int,
     val isLoaded: Boolean = false) extends MLWritable {
 
+  import GeneralizedLinearRegressionWrapper._
+
   private val glm: GeneralizedLinearRegressionModel =
     pipeline.stages(1).asInstanceOf[GeneralizedLinearRegressionModel]
 
@@ -52,7 +59,15 @@ private[r] class GeneralizedLinearRegressionWrapper private (
   def residuals(residualsType: String): DataFrame = glm.summary.residuals(residualsType)
 
   def transform(dataset: Dataset[_]): DataFrame = {
-    pipeline.transform(dataset).drop(glm.getFeaturesCol)
+    if (rFamily == "binomial") {
+      pipeline.transform(dataset)
+        .drop(PREDICTED_LABEL_PROB_COL)
+        .drop(PREDICTED_LABEL_INDEX_COL)
+        .drop(glm.getFeaturesCol)
+    } else {
+      pipeline.transform(dataset)
+        .drop(glm.getFeaturesCol)
+    }
   }
 
   override def write: MLWriter =
@@ -62,6 +77,10 @@ private[r] class GeneralizedLinearRegressionWrapper private (
 private[r] object GeneralizedLinearRegressionWrapper
   extends MLReadable[GeneralizedLinearRegressionWrapper] {
 
+  val PREDICTED_LABEL_PROB_COL = "pred_label_prob"
+  val PREDICTED_LABEL_INDEX_COL = "pred_label_idx"
+  val PREDICTED_LABEL_COL = "prediction"
+
   def fit(
       formula: String,
       data: DataFrame,
@@ -71,8 +90,8 @@ private[r] object GeneralizedLinearRegressionWrapper
       maxIter: Int,
       weightCol: String,
       regParam: Double): GeneralizedLinearRegressionWrapper = {
-    val rFormula = new RFormula()
-      .setFormula(formula)
+    val rFormula = new RFormula().setFormula(formula)
+    if (family == "binomial") rFormula.setForceIndexLabel(true)
     RWrapperUtils.checkDataColumns(rFormula, data)
     val rFormulaModel = rFormula.fit(data)
     // get labels and feature names from output schema
@@ -90,9 +109,27 @@ private[r] object GeneralizedLinearRegressionWrapper
       .setWeightCol(weightCol)
       .setRegParam(regParam)
       .setFeaturesCol(rFormula.getFeaturesCol)
-    val pipeline = new Pipeline()
-      .setStages(Array(rFormulaModel, glr))
-      .fit(data)
+    val pipeline = if (family == "binomial") {
+      // Convert prediction from probability to label index.
+      val probToPred = new ProbabilityToPrediction()
+        .setInputCol(PREDICTED_LABEL_PROB_COL)
+        .setOutputCol(PREDICTED_LABEL_INDEX_COL)
+      // Convert prediction from label index to original label.
+      val labelAttr = Attribute.fromStructField(schema(rFormulaModel.getLabelCol))
+        .asInstanceOf[NominalAttribute]
+      val labels = labelAttr.values.get
+      val idxToStr = new IndexToString()
+        .setInputCol(PREDICTED_LABEL_INDEX_COL)
+        .setOutputCol(PREDICTED_LABEL_COL)
+        .setLabels(labels)
+
+      new Pipeline()
+        .setStages(Array(rFormulaModel, glr.setPredictionCol(PREDICTED_LABEL_PROB_COL),
+          probToPred, idxToStr))
+        .fit(data)
+    } else {
+      new Pipeline().setStages(Array(rFormulaModel, glr)).fit(data)
+    }
 
     val glm: GeneralizedLinearRegressionModel =
       pipeline.stages(1).asInstanceOf[GeneralizedLinearRegressionModel]
@@ -200,3 +237,27 @@ private[r] object GeneralizedLinearRegressionWrapper
     }
   }
 }
+
+/**
+ * This utility transformer converts the predicted value of GeneralizedLinearRegressionModel
+ * with "binomial" family from probability to prediction according to threshold 0.5.
+ */
+private[r] class ProbabilityToPrediction private[r] (override val uid: String)
+  extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable {
+
+  def this() = this(Identifiable.randomUID("probToPred"))
+
+  def setInputCol(value: String): this.type = set(inputCol, value)
+
+  def setOutputCol(value: String): this.type = set(outputCol, value)
+
+  override def transformSchema(schema: StructType): StructType = {
+    StructType(schema.fields :+ StructField($(outputCol), DoubleType))
+  }
+
+  override def transform(dataset: Dataset[_]): DataFrame = {
+    dataset.withColumn($(outputCol), round(col($(inputCol))))
+  }
+
+  override def copy(extra: ParamMap): ProbabilityToPrediction = defaultCopy(extra)
+}

From 7a84edb2475446ff3a98e8cc8dcf62ee801fbbb9 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Mon, 7 Nov 2016 10:43:36 -0800
Subject: [PATCH 0055/1204] [SPARK-18283][STRUCTURED STREAMING][KAFKA] Added
 test to check whether default starting offset in latest

## What changes were proposed in this pull request?

Added test to check whether default starting offset in latest

## How was this patch tested?
new unit test

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #15778 from tdas/SPARK-18283.

(cherry picked from commit b06c23db9aedae48c9eba9d702ae82fa5647cfe5)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../spark/sql/kafka010/KafkaSourceSuite.scala | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
index ed4cc75920e8e..89e713f92df46 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
@@ -306,6 +306,30 @@ class KafkaSourceSuite extends KafkaSourceTest {
     )
   }
 
+  test("starting offset is latest by default") {
+    val topic = newTopic()
+    testUtils.createTopic(topic, partitions = 5)
+    testUtils.sendMessages(topic, Array("0"))
+    require(testUtils.getLatestOffsets(Set(topic)).size === 5)
+
+    val reader = spark
+      .readStream
+      .format("kafka")
+      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+      .option("subscribe", topic)
+
+    val kafka = reader.load()
+      .selectExpr("CAST(value AS STRING)")
+      .as[String]
+    val mapped = kafka.map(_.toInt)
+
+    testStream(mapped)(
+      makeSureGetOffsetCalled,
+      AddKafkaData(Set(topic), 1, 2, 3),
+      CheckAnswer(1, 2, 3)  // should not have 0
+    )
+  }
+
   test("bad source options") {
     def testBadOptions(options: (String, String)*)(expectedMsgs: String*): Unit = {
       val ex = intercept[IllegalArgumentException] {

From d1eac3ef4af2f8c58395ff6f8bb58a1806a8c09b Mon Sep 17 00:00:00 2001
From: Weiqing Yang <yangweiqing001@gmail.com>
Date: Mon, 7 Nov 2016 21:33:01 +0100
Subject: [PATCH 0056/1204] [SPARK-17108][SQL] Fix BIGINT and INT comparison
 failure in spark sql

## What changes were proposed in this pull request?

Add a function to check if two integers are compatible when invoking `acceptsType()` in `DataType`.
## How was this patch tested?

Manually.
E.g.

```
    spark.sql("create table t3(a map<bigint, array<string>>)")
    spark.sql("select * from t3 where a[1] is not null")
```

Before:

```
cannot resolve 't.`a`[1]' due to data type mismatch: argument 2 requires bigint type, however, '1' is of int type.; line 1 pos 22
org.apache.spark.sql.AnalysisException: cannot resolve 't.`a`[1]' due to data type mismatch: argument 2 requires bigint type, however, '1' is of int type.; line 1 pos 22
    at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42)
    at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:82)
    at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:74)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:307)
```

After:
 Run the sql queries above. No errors.

Author: Weiqing Yang <yangweiqing001@gmail.com>

Closes #15448 from weiqingy/SPARK_17108.

(cherry picked from commit 0d95662e7fff26669d4f70e88fdac7a4128a4f49)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../catalyst/expressions/complexTypeExtractors.scala |  2 +-
 .../spark/sql/hive/execution/SQLQuerySuite.scala     | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
index abb5594bfa7f8..0c256c3d890f1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
@@ -260,7 +260,7 @@ case class GetArrayItem(child: Expression, ordinal: Expression)
  * We need to do type checking here as `key` expression maybe unresolved.
  */
 case class GetMapValue(child: Expression, key: Expression)
-  extends BinaryExpression with ExpectsInputTypes with ExtractValue {
+  extends BinaryExpression with ImplicitCastInputTypes with ExtractValue {
 
   private def keyType = child.dataType.asInstanceOf[MapType].keyType
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 5e08ef31121fd..c21db3595fa19 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -1939,6 +1939,18 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     }
   }
 
+
+  test("SPARK-17108: Fix BIGINT and INT comparison failure in spark sql") {
+    sql("create table t1(a map<bigint, array<string>>)")
+    sql("select * from t1 where a[1] is not null")
+
+    sql("create table t2(a map<int, array<string>>)")
+    sql("select * from t2 where a[1] is not null")
+
+    sql("create table t3(a map<bigint, array<string>>)")
+    sql("select * from t3 where a[1L] is not null")
+  }
+
   test("SPARK-17796 Support wildcard character in filename for LOAD DATA LOCAL INPATH") {
     withTempDir { dir =>
       for (i <- 1 to 3) {

From 9873d57f2c76d1a6995c4ff5a45be1259a7948f0 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Tue, 8 Nov 2016 00:14:57 +0100
Subject: [PATCH 0057/1204] [SPARK-17490][SQL] Optimize SerializeFromObject()
 for a primitive array

Waiting for merging #13680

This PR optimizes `SerializeFromObject()` for an primitive array. This is derived from #13758 to address one of problems by using a simple way in #13758.

The current implementation always generates `GenericArrayData` from `SerializeFromObject()` for any type of an array in a logical plan. This involves a boxing at a constructor of `GenericArrayData` when `SerializedFromObject()` has an primitive array.

This PR enables to generate `UnsafeArrayData` from `SerializeFromObject()` for a primitive array. It can avoid boxing to create an instance of `ArrayData` in the generated code by Catalyst.

This PR also generate `UnsafeArrayData` in a case for `RowEncoder.serializeFor` or `CatalystTypeConverters.createToCatalystConverter`.

Performance improvement of `SerializeFromObject()` is up to 2.0x

```
OpenJDK 64-Bit Server VM 1.8.0_91-b14 on Linux 4.4.11-200.fc22.x86_64
Intel Xeon E3-12xx v2 (Ivy Bridge)

Without this PR
Write an array in Dataset:               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------
Int                                            556 /  608         15.1          66.3       1.0X
Double                                        1668 / 1746          5.0         198.8       0.3X

with this PR
Write an array in Dataset:               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------
Int                                            352 /  401         23.8          42.0       1.0X
Double                                         821 /  885         10.2          97.9       0.4X
```

Here is an example program that will happen in mllib as described in [SPARK-16070](https://issues.apache.org/jira/browse/SPARK-16070).

```
sparkContext.parallelize(Seq(Array(1, 2)), 1).toDS.map(e => e).show
```

Generated code before applying this PR

``` java
/* 039 */   protected void processNext() throws java.io.IOException {
/* 040 */     while (inputadapter_input.hasNext()) {
/* 041 */       InternalRow inputadapter_row = (InternalRow) inputadapter_input.next();
/* 042 */       int[] inputadapter_value = (int[])inputadapter_row.get(0, null);
/* 043 */
/* 044 */       Object mapelements_obj = ((Expression) references[0]).eval(null);
/* 045 */       scala.Function1 mapelements_value1 = (scala.Function1) mapelements_obj;
/* 046 */
/* 047 */       boolean mapelements_isNull = false || false;
/* 048 */       int[] mapelements_value = null;
/* 049 */       if (!mapelements_isNull) {
/* 050 */         Object mapelements_funcResult = null;
/* 051 */         mapelements_funcResult = mapelements_value1.apply(inputadapter_value);
/* 052 */         if (mapelements_funcResult == null) {
/* 053 */           mapelements_isNull = true;
/* 054 */         } else {
/* 055 */           mapelements_value = (int[]) mapelements_funcResult;
/* 056 */         }
/* 057 */
/* 058 */       }
/* 059 */       mapelements_isNull = mapelements_value == null;
/* 060 */
/* 061 */       serializefromobject_argIsNulls[0] = mapelements_isNull;
/* 062 */       serializefromobject_argValue = mapelements_value;
/* 063 */
/* 064 */       boolean serializefromobject_isNull = false;
/* 065 */       for (int idx = 0; idx < 1; idx++) {
/* 066 */         if (serializefromobject_argIsNulls[idx]) { serializefromobject_isNull = true; break; }
/* 067 */       }
/* 068 */
/* 069 */       final ArrayData serializefromobject_value = serializefromobject_isNull ? null : new org.apache.spark.sql.catalyst.util.GenericArrayData(serializefromobject_argValue);
/* 070 */       serializefromobject_holder.reset();
/* 071 */
/* 072 */       serializefromobject_rowWriter.zeroOutNullBytes();
/* 073 */
/* 074 */       if (serializefromobject_isNull) {
/* 075 */         serializefromobject_rowWriter.setNullAt(0);
/* 076 */       } else {
/* 077 */         // Remember the current cursor so that we can calculate how many bytes are
/* 078 */         // written later.
/* 079 */         final int serializefromobject_tmpCursor = serializefromobject_holder.cursor;
/* 080 */
/* 081 */         if (serializefromobject_value instanceof UnsafeArrayData) {
/* 082 */           final int serializefromobject_sizeInBytes = ((UnsafeArrayData) serializefromobject_value).getSizeInBytes();
/* 083 */           // grow the global buffer before writing data.
/* 084 */           serializefromobject_holder.grow(serializefromobject_sizeInBytes);
/* 085 */           ((UnsafeArrayData) serializefromobject_value).writeToMemory(serializefromobject_holder.buffer, serializefromobject_holder.cursor);
/* 086 */           serializefromobject_holder.cursor += serializefromobject_sizeInBytes;
/* 087 */
/* 088 */         } else {
/* 089 */           final int serializefromobject_numElements = serializefromobject_value.numElements();
/* 090 */           serializefromobject_arrayWriter.initialize(serializefromobject_holder, serializefromobject_numElements, 4);
/* 091 */
/* 092 */           for (int serializefromobject_index = 0; serializefromobject_index < serializefromobject_numElements; serializefromobject_index++) {
/* 093 */             if (serializefromobject_value.isNullAt(serializefromobject_index)) {
/* 094 */               serializefromobject_arrayWriter.setNullInt(serializefromobject_index);
/* 095 */             } else {
/* 096 */               final int serializefromobject_element = serializefromobject_value.getInt(serializefromobject_index);
/* 097 */               serializefromobject_arrayWriter.write(serializefromobject_index, serializefromobject_element);
/* 098 */             }
/* 099 */           }
/* 100 */         }
/* 101 */
/* 102 */         serializefromobject_rowWriter.setOffsetAndSize(0, serializefromobject_tmpCursor, serializefromobject_holder.cursor - serializefromobject_tmpCursor);
/* 103 */       }
/* 104 */       serializefromobject_result.setTotalSize(serializefromobject_holder.totalSize());
/* 105 */       append(serializefromobject_result);
/* 106 */       if (shouldStop()) return;
/* 107 */     }
/* 108 */   }
/* 109 */ }
```

Generated code after applying this PR

``` java
/* 035 */   protected void processNext() throws java.io.IOException {
/* 036 */     while (inputadapter_input.hasNext()) {
/* 037 */       InternalRow inputadapter_row = (InternalRow) inputadapter_input.next();
/* 038 */       int[] inputadapter_value = (int[])inputadapter_row.get(0, null);
/* 039 */
/* 040 */       Object mapelements_obj = ((Expression) references[0]).eval(null);
/* 041 */       scala.Function1 mapelements_value1 = (scala.Function1) mapelements_obj;
/* 042 */
/* 043 */       boolean mapelements_isNull = false || false;
/* 044 */       int[] mapelements_value = null;
/* 045 */       if (!mapelements_isNull) {
/* 046 */         Object mapelements_funcResult = null;
/* 047 */         mapelements_funcResult = mapelements_value1.apply(inputadapter_value);
/* 048 */         if (mapelements_funcResult == null) {
/* 049 */           mapelements_isNull = true;
/* 050 */         } else {
/* 051 */           mapelements_value = (int[]) mapelements_funcResult;
/* 052 */         }
/* 053 */
/* 054 */       }
/* 055 */       mapelements_isNull = mapelements_value == null;
/* 056 */
/* 057 */       boolean serializefromobject_isNull = mapelements_isNull;
/* 058 */       final ArrayData serializefromobject_value = serializefromobject_isNull ? null : org.apache.spark.sql.catalyst.expressions.UnsafeArrayData.fromPrimitiveArray(mapelements_value);
/* 059 */       serializefromobject_isNull = serializefromobject_value == null;
/* 060 */       serializefromobject_holder.reset();
/* 061 */
/* 062 */       serializefromobject_rowWriter.zeroOutNullBytes();
/* 063 */
/* 064 */       if (serializefromobject_isNull) {
/* 065 */         serializefromobject_rowWriter.setNullAt(0);
/* 066 */       } else {
/* 067 */         // Remember the current cursor so that we can calculate how many bytes are
/* 068 */         // written later.
/* 069 */         final int serializefromobject_tmpCursor = serializefromobject_holder.cursor;
/* 070 */
/* 071 */         if (serializefromobject_value instanceof UnsafeArrayData) {
/* 072 */           final int serializefromobject_sizeInBytes = ((UnsafeArrayData) serializefromobject_value).getSizeInBytes();
/* 073 */           // grow the global buffer before writing data.
/* 074 */           serializefromobject_holder.grow(serializefromobject_sizeInBytes);
/* 075 */           ((UnsafeArrayData) serializefromobject_value).writeToMemory(serializefromobject_holder.buffer, serializefromobject_holder.cursor);
/* 076 */           serializefromobject_holder.cursor += serializefromobject_sizeInBytes;
/* 077 */
/* 078 */         } else {
/* 079 */           final int serializefromobject_numElements = serializefromobject_value.numElements();
/* 080 */           serializefromobject_arrayWriter.initialize(serializefromobject_holder, serializefromobject_numElements, 4);
/* 081 */
/* 082 */           for (int serializefromobject_index = 0; serializefromobject_index < serializefromobject_numElements; serializefromobject_index++) {
/* 083 */             if (serializefromobject_value.isNullAt(serializefromobject_index)) {
/* 084 */               serializefromobject_arrayWriter.setNullInt(serializefromobject_index);
/* 085 */             } else {
/* 086 */               final int serializefromobject_element = serializefromobject_value.getInt(serializefromobject_index);
/* 087 */               serializefromobject_arrayWriter.write(serializefromobject_index, serializefromobject_element);
/* 088 */             }
/* 089 */           }
/* 090 */         }
/* 091 */
/* 092 */         serializefromobject_rowWriter.setOffsetAndSize(0, serializefromobject_tmpCursor, serializefromobject_holder.cursor - serializefromobject_tmpCursor);
/* 093 */       }
/* 094 */       serializefromobject_result.setTotalSize(serializefromobject_holder.totalSize());
/* 095 */       append(serializefromobject_result);
/* 096 */       if (shouldStop()) return;
/* 097 */     }
/* 098 */   }
/* 099 */ }
```

Added a test in `DatasetSuite`, `RowEncoderSuite`, and `CatalystTypeConvertersSuite`

Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com>

Closes #15044 from kiszk/SPARK-17490.

(cherry picked from commit 19cf208063f035d793d2306295a251a9af7e32f6)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../spark/sql/catalyst/ScalaReflection.scala  | 16 ++++
 .../sql/catalyst/encoders/RowEncoder.scala    | 27 +++---
 .../spark/sql/catalyst/util/ArrayData.scala   | 15 +++-
 .../CatalystTypeConvertersSuite.scala         | 33 ++++++++
 .../catalyst/encoders/RowEncoderSuite.scala   | 26 ++++++
 .../org/apache/spark/sql/DatasetSuite.scala   | 18 ++++
 .../benchmark/PrimitiveArrayBenchmark.scala   | 82 +++++++++++++++++++
 7 files changed, 203 insertions(+), 14 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index 31c6e5def143b..7bcaea7ea2f79 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -441,6 +441,22 @@ object ScalaReflection extends ScalaReflection {
           val newPath = s"""- array element class: "$clsName"""" +: walkedTypePath
           MapObjects(serializerFor(_, elementType, newPath), input, dt)
 
+         case dt @ (BooleanType | ByteType | ShortType | IntegerType | LongType |
+                    FloatType | DoubleType) =>
+          val cls = input.dataType.asInstanceOf[ObjectType].cls
+          if (cls.isArray && cls.getComponentType.isPrimitive) {
+            StaticInvoke(
+              classOf[UnsafeArrayData],
+              ArrayType(dt, false),
+              "fromPrimitiveArray",
+              input :: Nil)
+          } else {
+            NewInstance(
+              classOf[GenericArrayData],
+              input :: Nil,
+              dataType = ArrayType(dt, schemaFor(elementType).nullable))
+          }
+
         case dt =>
           NewInstance(
             classOf[GenericArrayData],
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
index 2a6fcd03a26b0..e95e97b9dc6cb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
@@ -23,7 +23,7 @@ import scala.reflect.ClassTag
 import org.apache.spark.SparkException
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils, GenericArrayData}
+import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, DateTimeUtils, GenericArrayData}
 import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.catalyst.analysis.GetColumnByOrdinal
 import org.apache.spark.sql.catalyst.expressions.objects._
@@ -119,18 +119,19 @@ object RowEncoder {
         "fromString",
         inputObject :: Nil)
 
-    case t @ ArrayType(et, _) => et match {
-      case BooleanType | ByteType | ShortType | IntegerType | LongType | FloatType | DoubleType =>
-        // TODO: validate input type for primitive array.
-        NewInstance(
-          classOf[GenericArrayData],
-          inputObject :: Nil,
-          dataType = t)
-      case _ => MapObjects(
-        element => serializerFor(ValidateExternalType(element, et), et),
-        inputObject,
-        ObjectType(classOf[Object]))
-    }
+    case t @ ArrayType(et, cn) =>
+      et match {
+        case BooleanType | ByteType | ShortType | IntegerType | LongType | FloatType | DoubleType =>
+          StaticInvoke(
+            classOf[ArrayData],
+            t,
+            "toArrayData",
+            inputObject :: Nil)
+        case _ => MapObjects(
+          element => serializerFor(ValidateExternalType(element, et), et),
+          inputObject,
+          ObjectType(classOf[Object]))
+      }
 
     case t @ MapType(kt, vt, valueNullable) =>
       val keys =
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala
index cad4a08b0d839..140e86d670a5b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala
@@ -19,9 +19,22 @@ package org.apache.spark.sql.catalyst.util
 
 import scala.reflect.ClassTag
 
-import org.apache.spark.sql.catalyst.expressions.SpecializedGetters
+import org.apache.spark.sql.catalyst.expressions.{SpecializedGetters, UnsafeArrayData}
 import org.apache.spark.sql.types.DataType
 
+object ArrayData {
+  def toArrayData(input: Any): ArrayData = input match {
+    case a: Array[Boolean] => UnsafeArrayData.fromPrimitiveArray(a)
+    case a: Array[Byte] => UnsafeArrayData.fromPrimitiveArray(a)
+    case a: Array[Short] => UnsafeArrayData.fromPrimitiveArray(a)
+    case a: Array[Int] => UnsafeArrayData.fromPrimitiveArray(a)
+    case a: Array[Long] => UnsafeArrayData.fromPrimitiveArray(a)
+    case a: Array[Float] => UnsafeArrayData.fromPrimitiveArray(a)
+    case a: Array[Double] => UnsafeArrayData.fromPrimitiveArray(a)
+    case other => new GenericArrayData(other)
+  }
+}
+
 abstract class ArrayData extends SpecializedGetters with Serializable {
   def numElements(): Int
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
index 03bb102c67fe7..f3702ec92b425 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.catalyst
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.expressions.UnsafeArrayData
+import org.apache.spark.sql.catalyst.util.GenericArrayData
 import org.apache.spark.sql.types._
 
 class CatalystTypeConvertersSuite extends SparkFunSuite {
@@ -61,4 +63,35 @@ class CatalystTypeConvertersSuite extends SparkFunSuite {
   test("option handling in createToCatalystConverter") {
     assert(CatalystTypeConverters.createToCatalystConverter(IntegerType)(Some(123)) === 123)
   }
+
+  test("primitive array handling") {
+    val intArray = Array(1, 100, 10000)
+    val intUnsafeArray = UnsafeArrayData.fromPrimitiveArray(intArray)
+    val intArrayType = ArrayType(IntegerType, false)
+    assert(CatalystTypeConverters.createToScalaConverter(intArrayType)(intUnsafeArray) === intArray)
+
+    val doubleArray = Array(1.1, 111.1, 11111.1)
+    val doubleUnsafeArray = UnsafeArrayData.fromPrimitiveArray(doubleArray)
+    val doubleArrayType = ArrayType(DoubleType, false)
+    assert(CatalystTypeConverters.createToScalaConverter(doubleArrayType)(doubleUnsafeArray)
+      === doubleArray)
+  }
+
+  test("An array with null handling") {
+    val intArray = Array(1, null, 100, null, 10000)
+    val intGenericArray = new GenericArrayData(intArray)
+    val intArrayType = ArrayType(IntegerType, true)
+    assert(CatalystTypeConverters.createToScalaConverter(intArrayType)(intGenericArray)
+      === intArray)
+    assert(CatalystTypeConverters.createToCatalystConverter(intArrayType)(intArray)
+      == intGenericArray)
+
+    val doubleArray = Array(1.1, null, 111.1, null, 11111.1)
+    val doubleGenericArray = new GenericArrayData(doubleArray)
+    val doubleArrayType = ArrayType(DoubleType, true)
+    assert(CatalystTypeConverters.createToScalaConverter(doubleArrayType)(doubleGenericArray)
+      === doubleArray)
+    assert(CatalystTypeConverters.createToCatalystConverter(doubleArrayType)(doubleArray)
+      == doubleGenericArray)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
index 2e513ea22c151..1a5569a77dc7a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
@@ -191,6 +191,32 @@ class RowEncoderSuite extends SparkFunSuite {
     assert(encoder.serializer.head.nullable == false)
   }
 
+  test("RowEncoder should support primitive arrays") {
+    val schema = new StructType()
+      .add("booleanPrimitiveArray", ArrayType(BooleanType, false))
+      .add("bytePrimitiveArray", ArrayType(ByteType, false))
+      .add("shortPrimitiveArray", ArrayType(ShortType, false))
+      .add("intPrimitiveArray", ArrayType(IntegerType, false))
+      .add("longPrimitiveArray", ArrayType(LongType, false))
+      .add("floatPrimitiveArray", ArrayType(FloatType, false))
+      .add("doublePrimitiveArray", ArrayType(DoubleType, false))
+    val encoder = RowEncoder(schema).resolveAndBind()
+    val input = Seq(
+      Array(true, false),
+      Array(1.toByte, 64.toByte, Byte.MaxValue),
+      Array(1.toShort, 255.toShort, Short.MaxValue),
+      Array(1, 10000, Int.MaxValue),
+      Array(1.toLong, 1000000.toLong, Long.MaxValue),
+      Array(1.1.toFloat, 123.456.toFloat, Float.MaxValue),
+      Array(11.1111, 123456.7890123, Double.MaxValue)
+    )
+    val row = encoder.toRow(Row.fromSeq(input))
+    val convertedBack = encoder.fromRow(row)
+    input.zipWithIndex.map { case (array, index) =>
+      assert(convertedBack.getSeq(index) === array)
+    }
+  }
+
   test("RowEncoder should support array as the external type for ArrayType") {
     val schema = new StructType()
       .add("array", ArrayType(IntegerType))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index a8dd422aa0c85..81fa8cbf22384 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -1033,6 +1033,24 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
       checkAnswer(agg, ds.groupBy('id % 2).agg(count('id)))
     }
   }
+
+  test("identity map for primitive arrays") {
+    val arrayByte = Array(1.toByte, 2.toByte, 3.toByte)
+    val arrayInt = Array(1, 2, 3)
+    val arrayLong = Array(1.toLong, 2.toLong, 3.toLong)
+    val arrayDouble = Array(1.1, 2.2, 3.3)
+    val arrayString = Array("a", "b", "c")
+    val dsByte = sparkContext.parallelize(Seq(arrayByte), 1).toDS.map(e => e)
+    val dsInt = sparkContext.parallelize(Seq(arrayInt), 1).toDS.map(e => e)
+    val dsLong = sparkContext.parallelize(Seq(arrayLong), 1).toDS.map(e => e)
+    val dsDouble = sparkContext.parallelize(Seq(arrayDouble), 1).toDS.map(e => e)
+    val dsString = sparkContext.parallelize(Seq(arrayString), 1).toDS.map(e => e)
+    checkDataset(dsByte, arrayByte)
+    checkDataset(dsInt, arrayInt)
+    checkDataset(dsLong, arrayLong)
+    checkDataset(dsDouble, arrayDouble)
+    checkDataset(dsString, arrayString)
+  }
 }
 
 case class Generic[T](id: T, value: Double)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala
new file mode 100644
index 0000000000000..e7c8f2717fd74
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.benchmark
+
+import scala.concurrent.duration._
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.util.Benchmark
+
+/**
+ * Benchmark [[PrimitiveArray]] for DataFrame and Dataset program using primitive array
+ * To run this:
+ *  1. replace ignore(...) with test(...)
+ *  2. build/sbt "sql/test-only *benchmark.PrimitiveArrayBenchmark"
+ *
+ * Benchmarks in this file are skipped in normal builds.
+ */
+class PrimitiveArrayBenchmark extends BenchmarkBase {
+
+  def writeDatasetArray(iters: Int): Unit = {
+    import sparkSession.implicits._
+
+    val count = 1024 * 1024 * 2
+
+    val sc = sparkSession.sparkContext
+    val primitiveIntArray = Array.fill[Int](count)(65535)
+    val dsInt = sc.parallelize(Seq(primitiveIntArray), 1).toDS
+    dsInt.count  // force to build dataset
+    val intArray = { i: Int =>
+      var n = 0
+      var len = 0
+      while (n < iters) {
+        len += dsInt.map(e => e).queryExecution.toRdd.collect.length
+        n += 1
+      }
+    }
+    val primitiveDoubleArray = Array.fill[Double](count)(65535.0)
+    val dsDouble = sc.parallelize(Seq(primitiveDoubleArray), 1).toDS
+    dsDouble.count  // force to build dataset
+    val doubleArray = { i: Int =>
+      var n = 0
+      var len = 0
+      while (n < iters) {
+        len += dsDouble.map(e => e).queryExecution.toRdd.collect.length
+        n += 1
+      }
+    }
+
+    val benchmark = new Benchmark("Write an array in Dataset", count * iters)
+    benchmark.addCase("Int   ")(intArray)
+    benchmark.addCase("Double")(doubleArray)
+    benchmark.run
+    /*
+    OpenJDK 64-Bit Server VM 1.8.0_91-b14 on Linux 4.4.11-200.fc22.x86_64
+    Intel Xeon E3-12xx v2 (Ivy Bridge)
+    Write an array in Dataset:               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+    ------------------------------------------------------------------------------------------------
+    Int                                            352 /  401         23.8          42.0       1.0X
+    Double                                         821 /  885         10.2          97.9       0.4X
+    */
+  }
+
+  ignore("Write an array in Dataset") {
+    writeDatasetArray(4)
+  }
+}

From 4af82d56f79ac3cceb08b702413ae2b35dfea48b Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Mon, 7 Nov 2016 16:54:40 -0800
Subject: [PATCH 0058/1204] [SPARK-18295][SQL] Make to_json function null safe
 (matching it to from_json)

## What changes were proposed in this pull request?

This PR proposes to match up the behaviour of `to_json` to `from_json` function for null-safety.

Currently, it throws `NullPointException` but this PR fixes this to produce `null` instead.

with the data below:

```scala
import spark.implicits._

val df = Seq(Some(Tuple1(Tuple1(1))), None).toDF("a")
df.show()
```

```
+----+
|   a|
+----+
| [1]|
|null|
+----+
```

the codes below

```scala
import org.apache.spark.sql.functions._

df.select(to_json($"a")).show()
```

produces..

**Before**

throws `NullPointException` as below:

```
java.lang.NullPointerException
  at org.apache.spark.sql.catalyst.json.JacksonGenerator.org$apache$spark$sql$catalyst$json$JacksonGenerator$$writeFields(JacksonGenerator.scala:138)
  at org.apache.spark.sql.catalyst.json.JacksonGenerator$$anonfun$write$1.apply$mcV$sp(JacksonGenerator.scala:194)
  at org.apache.spark.sql.catalyst.json.JacksonGenerator.org$apache$spark$sql$catalyst$json$JacksonGenerator$$writeObject(JacksonGenerator.scala:131)
  at org.apache.spark.sql.catalyst.json.JacksonGenerator.write(JacksonGenerator.scala:193)
  at org.apache.spark.sql.catalyst.expressions.StructToJson.eval(jsonExpressions.scala:544)
  at org.apache.spark.sql.catalyst.expressions.Alias.eval(namedExpressions.scala:142)
  at org.apache.spark.sql.catalyst.expressions.InterpretedProjection.apply(Projection.scala:48)
  at org.apache.spark.sql.catalyst.expressions.InterpretedProjection.apply(Projection.scala:30)
  at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
```

**After**

```
+---------------+
|structtojson(a)|
+---------------+
|       {"_1":1}|
|           null|
+---------------+
```

## How was this patch tested?

Unit test in `JsonExpressionsSuite.scala` and `JsonFunctionsSuite.scala`.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #15792 from HyukjinKwon/SPARK-18295.

(cherry picked from commit 3eda05703f02413540f180ade01f0f114e70b9cc)
Signed-off-by: Michael Armbrust <michael@databricks.com>
---
 .../sql/catalyst/expressions/jsonExpressions.scala | 14 +++++---------
 .../expressions/JsonExpressionsSuite.scala         | 13 +++++++++++--
 .../org/apache/spark/sql/JsonFunctionsSuite.scala  | 14 ++++++++++++++
 3 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index 89fe7c48c000e..b61583d0dafb6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -484,7 +484,7 @@ case class JsonTuple(children: Seq[Expression])
  * Converts an json input string to a [[StructType]] with the specified schema.
  */
 case class JsonToStruct(schema: StructType, options: Map[String, String], child: Expression)
-  extends Expression with CodegenFallback with ExpectsInputTypes {
+  extends UnaryExpression with CodegenFallback with ExpectsInputTypes {
   override def nullable: Boolean = true
 
   @transient
@@ -495,11 +495,8 @@ case class JsonToStruct(schema: StructType, options: Map[String, String], child:
       new JSONOptions(options ++ Map("mode" -> ParseModes.FAIL_FAST_MODE)))
 
   override def dataType: DataType = schema
-  override def children: Seq[Expression] = child :: Nil
 
-  override def eval(input: InternalRow): Any = {
-    val json = child.eval(input)
-    if (json == null) return null
+  override def nullSafeEval(json: Any): Any = {
     try parser.parse(json.toString).head catch {
       case _: SparkSQLJsonProcessingException => null
     }
@@ -512,7 +509,7 @@ case class JsonToStruct(schema: StructType, options: Map[String, String], child:
  * Converts a [[StructType]] to a json output string.
  */
 case class StructToJson(options: Map[String, String], child: Expression)
-  extends Expression with CodegenFallback with ExpectsInputTypes {
+  extends UnaryExpression with CodegenFallback with ExpectsInputTypes {
   override def nullable: Boolean = true
 
   @transient
@@ -523,7 +520,6 @@ case class StructToJson(options: Map[String, String], child: Expression)
     new JacksonGenerator(child.dataType.asInstanceOf[StructType], writer)
 
   override def dataType: DataType = StringType
-  override def children: Seq[Expression] = child :: Nil
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (StructType.acceptsType(child.dataType)) {
@@ -540,8 +536,8 @@ case class StructToJson(options: Map[String, String], child: Expression)
     }
   }
 
-  override def eval(input: InternalRow): Any = {
-    gen.write(child.eval(input).asInstanceOf[InternalRow])
+  override def nullSafeEval(row: Any): Any = {
+    gen.write(row.asInstanceOf[InternalRow])
     gen.flush()
     val json = writer.toString
     writer.reset()
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
index 3bfa0bfda6209..3b0e90824b766 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.util.ParseModes
-import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
+import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
 import org.apache.spark.unsafe.types.UTF8String
 
 class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
@@ -347,7 +347,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   test("from_json null input column") {
     val schema = StructType(StructField("a", IntegerType) :: Nil)
     checkEvaluation(
-      JsonToStruct(schema, Map.empty, Literal(null)),
+      JsonToStruct(schema, Map.empty, Literal.create(null, StringType)),
       null
     )
   }
@@ -360,4 +360,13 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       """{"a":1}"""
     )
   }
+
+  test("to_json null input column") {
+    val schema = StructType(StructField("a", IntegerType) :: Nil)
+    val struct = Literal.create(null, schema)
+    checkEvaluation(
+      StructToJson(Map.empty, struct),
+      null
+    )
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index 59ae889cf3b92..7d63d31d9b979 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -141,4 +141,18 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext {
     assert(e.getMessage.contains(
       "Unable to convert column a of type calendarinterval to JSON."))
   }
+
+  test("roundtrip in to_json and from_json") {
+    val dfOne = Seq(Some(Tuple1(Tuple1(1))), None).toDF("struct")
+    val schemaOne = dfOne.schema(0).dataType.asInstanceOf[StructType]
+    val readBackOne = dfOne.select(to_json($"struct").as("json"))
+      .select(from_json($"json", schemaOne).as("struct"))
+    checkAnswer(dfOne, readBackOne)
+
+    val dfTwo = Seq(Some("""{"a":1}"""), None).toDF("json")
+    val schemaTwo = new StructType().add("a", IntegerType)
+    val readBackTwo = dfTwo.select(from_json($"json", schemaTwo).as("struct"))
+      .select(to_json($"struct").as("json"))
+    checkAnswer(dfTwo, readBackTwo)
+  }
 }

From 29f59c73301628fb63086660f64fdb5272a312fe Mon Sep 17 00:00:00 2001
From: Ryan Blue <blue@apache.org>
Date: Mon, 7 Nov 2016 17:36:15 -0800
Subject: [PATCH 0059/1204] [SPARK-18086] Add support for Hive session vars.

## What changes were proposed in this pull request?

This adds support for Hive variables:

* Makes values set via `spark-sql --hivevar name=value` accessible
* Adds `getHiveVar` and `setHiveVar` to the `HiveClient` interface
* Adds a SessionVariables trait for sessions like Hive that support variables (including Hive vars)
* Adds SessionVariables support to variable substitution
* Adds SessionVariables support to the SET command

## How was this patch tested?

* Adds a test to all supported Hive versions for accessing Hive variables
* Adds HiveVariableSubstitutionSuite

Author: Ryan Blue <blue@apache.org>

Closes #15738 from rdblue/SPARK-18086-add-hivevar-support.

(cherry picked from commit 9b0593d5e99bb919c4abb8d0836a126ec2eaf1d5)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../sql/execution/command/SetCommand.scala    | 11 ++++
 .../sql/internal/VariableSubstitution.scala   |  5 +-
 .../hive/thriftserver/SparkSQLCLIDriver.scala |  6 ++-
 .../hive/HiveVariableSubstitutionSuite.scala  | 50 +++++++++++++++++++
 4 files changed, 67 insertions(+), 5 deletions(-)
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveVariableSubstitutionSuite.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
index af6def52d07d1..dc8d97594c7a7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
@@ -60,6 +60,13 @@ case class SetCommand(kv: Option[(String, Option[String])]) extends RunnableComm
       }
       (keyValueOutput, runFunc)
 
+    case Some((key @ SetCommand.VariableName(name), Some(value))) =>
+      val runFunc = (sparkSession: SparkSession) => {
+        sparkSession.conf.set(name, value)
+        Seq(Row(key, value))
+      }
+      (keyValueOutput, runFunc)
+
     // Configures a single property.
     case Some((key, Some(value))) =>
       val runFunc = (sparkSession: SparkSession) => {
@@ -117,6 +124,10 @@ case class SetCommand(kv: Option[(String, Option[String])]) extends RunnableComm
 
 }
 
+object SetCommand {
+  val VariableName = """hivevar:([^=]+)""".r
+}
+
 /**
  * This command is for resetting SQLConf to the default values. Command that runs
  * {{{
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
index 50725a09c42b3..791a9cf813b6a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
@@ -17,10 +17,7 @@
 
 package org.apache.spark.sql.internal
 
-import java.util.regex.Pattern
-
 import org.apache.spark.internal.config._
-import org.apache.spark.sql.AnalysisException
 
 /**
  * A helper class that enables substitution using syntax like
@@ -37,6 +34,7 @@ class VariableSubstitution(conf: SQLConf) {
   private val reader = new ConfigReader(provider)
     .bind("spark", provider)
     .bind("sparkconf", provider)
+    .bind("hivevar", provider)
     .bind("hiveconf", provider)
 
   /**
@@ -49,5 +47,4 @@ class VariableSubstitution(conf: SQLConf) {
       input
     }
   }
-
 }
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index 5dafec1c3021b..0c79b6f4211ff 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -38,7 +38,7 @@ import org.apache.thrift.transport.TSocket
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.hive.HiveUtils
+import org.apache.spark.sql.hive.{HiveSessionState, HiveUtils}
 import org.apache.spark.util.ShutdownHookManager
 
 /**
@@ -291,6 +291,10 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
     throw new RuntimeException("Remote operations not supported")
   }
 
+  override def setHiveVariables(hiveVariables: java.util.Map[String, String]): Unit = {
+    hiveVariables.asScala.foreach(kv => SparkSQLEnv.sqlContext.conf.setConfString(kv._1, kv._2))
+  }
+
   override def processCmd(cmd: String): Int = {
     val cmd_trimmed: String = cmd.trim()
     val cmd_lower = cmd_trimmed.toLowerCase(Locale.ENGLISH)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveVariableSubstitutionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveVariableSubstitutionSuite.scala
new file mode 100644
index 0000000000000..84d3946ca5c6f
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveVariableSubstitutionSuite.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.hive.test.TestHiveSingleton
+
+class HiveVariableSubstitutionSuite extends QueryTest with TestHiveSingleton {
+  test("SET hivevar with prefix") {
+    spark.sql("SET hivevar:county=gram")
+    assert(spark.conf.getOption("county") === Some("gram"))
+  }
+
+  test("SET hivevar with dotted name") {
+    spark.sql("SET hivevar:eloquent.mosquito.alphabet=zip")
+    assert(spark.conf.getOption("eloquent.mosquito.alphabet") === Some("zip"))
+  }
+
+  test("hivevar substitution") {
+    spark.conf.set("pond", "bus")
+    checkAnswer(spark.sql("SELECT '${hivevar:pond}'"), Row("bus") :: Nil)
+  }
+
+  test("variable substitution without a prefix") {
+    spark.sql("SET hivevar:flask=plaid")
+    checkAnswer(spark.sql("SELECT '${flask}'"), Row("plaid") :: Nil)
+  }
+
+  test("variable substitution precedence") {
+    spark.conf.set("turn.aloof", "questionable")
+    spark.sql("SET hivevar:turn.aloof=dime")
+    // hivevar clobbers the conf setting
+    checkAnswer(spark.sql("SELECT '${turn.aloof}'"), Row("dime") :: Nil)
+  }
+}

From 4943929d85a2aaf404c140d2d2589a597f484976 Mon Sep 17 00:00:00 2001
From: Liwei Lin <lwlin7@gmail.com>
Date: Mon, 7 Nov 2016 17:49:24 -0800
Subject: [PATCH 0060/1204] [SPARK-18261][STRUCTURED STREAMING] Add statistics
 to MemorySink for joining

## What changes were proposed in this pull request?

Right now, there is no way to join the output of a memory sink with any table:

> UnsupportedOperationException: LeafNode MemoryPlan must implement statistics

This patch adds statistics to MemorySink, making joining snapshots of memory streams with tables possible.

## How was this patch tested?

Added a test case.

Author: Liwei Lin <lwlin7@gmail.com>

Closes #15786 from lw-lin/memory-sink-stat.

(cherry picked from commit c1a0c66bd2662bc40f312da474c3b95229fe92d0)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../spark/sql/execution/streaming/memory.scala   |  6 +++++-
 .../spark/sql/streaming/MemorySinkSuite.scala    | 16 ++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
index 48d9791faf1e9..613c7ccdd226a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
@@ -27,7 +27,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.encoders.encoderFor
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.plans.logical.LeafNode
+import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, Statistics}
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.Utils
@@ -212,4 +212,8 @@ class MemorySink(val schema: StructType, outputMode: OutputMode) extends Sink wi
  */
 case class MemoryPlan(sink: MemorySink, output: Seq[Attribute]) extends LeafNode {
   def this(sink: MemorySink) = this(sink, sink.schema.toAttributes)
+
+  private val sizePerRow = sink.schema.toAttributes.map(_.dataType.defaultSize).sum
+
+  override def statistics: Statistics = Statistics(sizePerRow * sink.allData.size)
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/MemorySinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/MemorySinkSuite.scala
index 310d75630272b..4e9fba9dbaa1e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/MemorySinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/MemorySinkSuite.scala
@@ -187,6 +187,22 @@ class MemorySinkSuite extends StreamTest with BeforeAndAfter {
     query.stop()
   }
 
+  test("MemoryPlan statistics") {
+    implicit val schema = new StructType().add(new StructField("value", IntegerType))
+    val sink = new MemorySink(schema, InternalOutputModes.Append)
+    val plan = new MemoryPlan(sink)
+
+    // Before adding data, check output
+    checkAnswer(sink.allData, Seq.empty)
+    assert(plan.statistics.sizeInBytes === 0)
+
+    sink.addBatch(0, 1 to 3)
+    assert(plan.statistics.sizeInBytes === 12)
+
+    sink.addBatch(1, 4 to 6)
+    assert(plan.statistics.sizeInBytes === 24)
+  }
+
   ignore("stress test") {
     // Ignore the stress test as it takes several minutes to run
     (0 until 1000).foreach { _ =>

From 4cb4e5ff0ab9537758bf0b418ddd40dfe9537609 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Mon, 7 Nov 2016 18:34:21 -0800
Subject: [PATCH 0061/1204] [SPARK-18217][SQL] Disallow creating permanent
 views based on temporary views or UDFs

### What changes were proposed in this pull request?
Based on the discussion in [SPARK-18209](https://issues.apache.org/jira/browse/SPARK-18209). It doesn't really make sense to create permanent views based on temporary views or temporary UDFs.

To disallow the supports and issue the exceptions, this PR needs to detect whether a temporary view/UDF is being used when defining a permanent view. Basically, this PR can be split to two sub-tasks:

**Task 1:** detecting a temporary view from the query plan of view definition.
When finding an unresolved temporary view, Analyzer replaces it by a `SubqueryAlias` with the corresponding logical plan, which is stored in an in-memory HashMap. After replacement, it is impossible to detect whether the `SubqueryAlias` is added/generated from a temporary view. Thus, to detect the usage of a temporary view in view definition, this PR traverses the unresolved logical plan and uses the name of an `UnresolvedRelation` to detect whether it is a (global) temporary view.

**Task 2:** detecting a temporary UDF from the query plan of view definition.
Detecting usage of a temporary UDF in view definition is not straightfoward.

First, in the analyzed plan, we are having different forms to represent the functions. More importantly, some classes (e.g., `HiveGenericUDF`) are not accessible from `CreateViewCommand`, which is part of  `sql/core`. Thus, we used the unanalyzed plan `child` of `CreateViewCommand` to detect the usage of a temporary UDF. Because the plan has already been successfully analyzed, we can assume the functions have been defined/registered.

Second, in Spark, the functions have four forms: Spark built-in functions, built-in hash functions, permanent UDFs and temporary UDFs. We do not have any direct way to determine whether a function is temporary or not. Thus, we introduced a function `isTemporaryFunction` in `SessionCatalog`. This function contains the detailed logics to determine whether a function is temporary or not.

### How was this patch tested?
Added test cases.

Author: gatorsmile <gatorsmile@gmail.com>

Closes #15764 from gatorsmile/blockTempFromPermViewCreation.

(cherry picked from commit 1da64e1fa0970277d1fb47dec8adca47b068b1ec)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../sql/catalyst/catalog/SessionCatalog.scala | 18 ++++
 .../catalog/SessionCatalogSuite.scala         | 28 ++++++
 .../spark/sql/execution/command/views.scala   | 38 ++++++-
 .../spark/sql/hive/HiveSessionCatalog.scala   |  1 +
 .../sql/hive/execution/SQLViewSuite.scala     | 99 +++++++++++++++++--
 5 files changed, 172 insertions(+), 12 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 2d2120dda8bde..c8b61d8df3585 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -923,6 +923,24 @@ class SessionCatalog(
     }
   }
 
+  /**
+   * Returns whether it is a temporary function. If not existed, returns false.
+   */
+  def isTemporaryFunction(name: FunctionIdentifier): Boolean = {
+    // copied from HiveSessionCatalog
+    val hiveFunctions = Seq(
+      "hash",
+      "histogram_numeric",
+      "percentile")
+
+    // A temporary function is a function that has been registered in functionRegistry
+    // without a database name, and is neither a built-in function nor a Hive function
+    name.database.isEmpty &&
+      functionRegistry.functionExists(name.funcName) &&
+      !FunctionRegistry.builtin.functionExists(name.funcName) &&
+      !hiveFunctions.contains(name.funcName.toLowerCase)
+  }
+
   protected def failFunctionLookup(name: String): Nothing = {
     throw new NoSuchFunctionException(db = currentDb, func = name)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index b77fef225a0c8..001d9c47785d2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -919,6 +919,34 @@ class SessionCatalogSuite extends SparkFunSuite {
       catalog.lookupFunction(FunctionIdentifier("temp1"), arguments) === Literal(arguments.length))
   }
 
+  test("isTemporaryFunction") {
+    val externalCatalog = newBasicCatalog()
+    val sessionCatalog = new SessionCatalog(externalCatalog)
+
+    // Returns false when the function does not exist
+    assert(!sessionCatalog.isTemporaryFunction(FunctionIdentifier("temp1")))
+
+    val tempFunc1 = (e: Seq[Expression]) => e.head
+    val info1 = new ExpressionInfo("tempFunc1", "temp1")
+    sessionCatalog.createTempFunction("temp1", info1, tempFunc1, ignoreIfExists = false)
+
+    // Returns true when the function is temporary
+    assert(sessionCatalog.isTemporaryFunction(FunctionIdentifier("temp1")))
+
+    // Returns false when the function is permanent
+    assert(externalCatalog.listFunctions("db2", "*").toSet == Set("func1"))
+    assert(!sessionCatalog.isTemporaryFunction(FunctionIdentifier("func1", Some("db2"))))
+    assert(!sessionCatalog.isTemporaryFunction(FunctionIdentifier("db2.func1")))
+    sessionCatalog.setCurrentDatabase("db2")
+    assert(!sessionCatalog.isTemporaryFunction(FunctionIdentifier("func1")))
+
+    // Returns false when the function is built-in or hive
+    assert(FunctionRegistry.builtin.functionExists("sum"))
+    assert(!sessionCatalog.isTemporaryFunction(FunctionIdentifier("sum")))
+    assert(!sessionCatalog.isTemporaryFunction(FunctionIdentifier("histogram_numeric")))
+    assert(!sessionCatalog.isTemporaryFunction(FunctionIdentifier("percentile")))
+  }
+
   test("drop function") {
     val externalCatalog = newBasicCatalog()
     val sessionCatalog = new SessionCatalog(externalCatalog)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index bbcd9c4ef564c..30472ec45ce44 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -19,14 +19,14 @@ package org.apache.spark.sql.execution.command
 
 import scala.util.control.NonFatal
 
-import org.apache.spark.sql.{AnalysisException, Dataset, Row, SparkSession}
+import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
 import org.apache.spark.sql.catalyst.{SQLBuilder, TableIdentifier}
+import org.apache.spark.sql.catalyst.analysis.{UnresolvedFunction, UnresolvedRelation}
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
 import org.apache.spark.sql.catalyst.expressions.Alias
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
-import org.apache.spark.sql.execution.datasources.{DataSource, LogicalRelation}
-import org.apache.spark.sql.types.{MetadataBuilder, StructType}
+import org.apache.spark.sql.types.MetadataBuilder
 
 
 /**
@@ -131,6 +131,10 @@ case class CreateViewCommand(
         s"specified by CREATE VIEW (num: `${userSpecifiedColumns.length}`).")
     }
 
+    // When creating a permanent view, not allowed to reference temporary objects.
+    // This should be called after `qe.assertAnalyzed()` (i.e., `child` can be resolved)
+    verifyTemporaryObjectsNotExists(sparkSession)
+
     val aliasedPlan = if (userSpecifiedColumns.isEmpty) {
       analyzedPlan
     } else {
@@ -172,6 +176,34 @@ case class CreateViewCommand(
     Seq.empty[Row]
   }
 
+  /**
+   * Permanent views are not allowed to reference temp objects, including temp function and views
+   */
+  private def verifyTemporaryObjectsNotExists(sparkSession: SparkSession): Unit = {
+    if (!isTemporary) {
+      // This func traverses the unresolved plan `child`. Below are the reasons:
+      // 1) Analyzer replaces unresolved temporary views by a SubqueryAlias with the corresponding
+      // logical plan. After replacement, it is impossible to detect whether the SubqueryAlias is
+      // added/generated from a temporary view.
+      // 2) The temp functions are represented by multiple classes. Most are inaccessible from this
+      // package (e.g., HiveGenericUDF).
+      child.collect {
+        // Disallow creating permanent views based on temporary views.
+        case s: UnresolvedRelation
+          if sparkSession.sessionState.catalog.isTemporaryTable(s.tableIdentifier) =>
+          throw new AnalysisException(s"Not allowed to create a permanent view $name by " +
+            s"referencing a temporary view ${s.tableIdentifier}")
+        case other if !other.resolved => other.expressions.flatMap(_.collect {
+          // Disallow creating permanent views based on temporary UDFs.
+          case e: UnresolvedFunction
+            if sparkSession.sessionState.catalog.isTemporaryFunction(e.name) =>
+            throw new AnalysisException(s"Not allowed to create a permanent view $name by " +
+              s"referencing a temporary function `${e.name}`")
+        })
+      }
+    }
+  }
+
   /**
    * Returns a [[CatalogTable]] that can be used to save in the catalog. This comment canonicalize
    * SQL based on the analyzed plan, and also creates the proper schema for the view.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 9df20ce1553ec..4a9b28a455a44 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -232,6 +232,7 @@ private[sql] class HiveSessionCatalog(
   // current_user, ewah_bitmap, ewah_bitmap_and, ewah_bitmap_empty, ewah_bitmap_or, field,
   // in_file, index, matchpath, ngrams, noop, noopstreaming, noopwithmap,
   // noopwithmapstreaming, parse_url_tuple, reflect2, windowingtablefunction.
+  // Note: don't forget to update SessionCatalog.isTemporaryFunction
   private val hiveFunctions = Seq(
     "histogram_numeric",
     "percentile"
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLViewSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLViewSuite.scala
index 2af935da689c9..ba65db71ede7f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLViewSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLViewSuite.scala
@@ -38,21 +38,46 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     spark.sql(s"DROP TABLE IF EXISTS jt")
   }
 
-  test("nested views (interleaved with temporary views)") {
-    withView("jtv1", "jtv2", "jtv3", "temp_jtv1", "temp_jtv2", "temp_jtv3") {
+  test("create a permanent view on a permanent view") {
+    withView("jtv1", "jtv2") {
       sql("CREATE VIEW jtv1 AS SELECT * FROM jt WHERE id > 3")
       sql("CREATE VIEW jtv2 AS SELECT * FROM jtv1 WHERE id < 6")
       checkAnswer(sql("select count(*) FROM jtv2"), Row(2))
+    }
+  }
 
-      // Checks temporary views
+  test("create a temp view on a permanent view") {
+    withView("jtv1", "temp_jtv1") {
+      sql("CREATE VIEW jtv1 AS SELECT * FROM jt WHERE id > 3")
+      sql("CREATE TEMPORARY VIEW temp_jtv1 AS SELECT * FROM jtv1 WHERE id < 6")
+      checkAnswer(sql("select count(*) FROM temp_jtv1"), Row(2))
+    }
+  }
+
+  test("create a temp view on a temp view") {
+    withView("temp_jtv1", "temp_jtv2") {
       sql("CREATE TEMPORARY VIEW temp_jtv1 AS SELECT * FROM jt WHERE id > 3")
       sql("CREATE TEMPORARY VIEW temp_jtv2 AS SELECT * FROM temp_jtv1 WHERE id < 6")
       checkAnswer(sql("select count(*) FROM temp_jtv2"), Row(2))
+    }
+  }
+
+  test("create a permanent view on a temp view") {
+    withView("jtv1", "temp_jtv1", "global_temp_jtv1") {
+      sql("CREATE TEMPORARY VIEW temp_jtv1 AS SELECT * FROM jt WHERE id > 3")
+      var e = intercept[AnalysisException] {
+        sql("CREATE VIEW jtv1 AS SELECT * FROM temp_jtv1 WHERE id < 6")
+      }.getMessage
+      assert(e.contains("Not allowed to create a permanent view `jtv1` by " +
+        "referencing a temporary view `temp_jtv1`"))
 
-      // Checks interleaved temporary view and normal view
-      sql("CREATE TEMPORARY VIEW temp_jtv3 AS SELECT * FROM jt WHERE id > 3")
-      sql("CREATE VIEW jtv3 AS SELECT * FROM temp_jtv3 WHERE id < 6")
-      checkAnswer(sql("select count(*) FROM jtv3"), Row(2))
+      val globalTempDB = spark.sharedState.globalTempViewManager.database
+      sql("CREATE GLOBAL TEMP VIEW global_temp_jtv1 AS SELECT * FROM jt WHERE id > 0")
+      e = intercept[AnalysisException] {
+        sql(s"CREATE VIEW jtv1 AS SELECT * FROM $globalTempDB.global_temp_jtv1 WHERE id < 6")
+      }.getMessage
+      assert(e.contains(s"Not allowed to create a permanent view `jtv1` by referencing " +
+        s"a temporary view `global_temp`.`global_temp_jtv1`"))
     }
   }
 
@@ -439,7 +464,7 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     }
   }
 
-  test("SPARK-14933 - create view from hive parquet tabale") {
+  test("SPARK-14933 - create view from hive parquet table") {
     withTable("t_part") {
       withView("v_part") {
         spark.sql("create table t_part stored as parquet as select 1 as a, 2 as b")
@@ -451,7 +476,7 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     }
   }
 
-  test("SPARK-14933 - create view from hive orc tabale") {
+  test("SPARK-14933 - create view from hive orc table") {
     withTable("t_orc") {
       withView("v_orc") {
         spark.sql("create table t_orc stored as orc as select 1 as a, 2 as b")
@@ -462,4 +487,60 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
       }
     }
   }
+
+  test("create a permanent/temp view using a hive, built-in, and permanent user function") {
+    val permanentFuncName = "myUpper"
+    val permanentFuncClass =
+      classOf[org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper].getCanonicalName
+    val builtInFuncNameInLowerCase = "abs"
+    val builtInFuncNameInMixedCase = "aBs"
+    val hiveFuncName = "histogram_numeric"
+
+    withUserDefinedFunction(permanentFuncName -> false) {
+      sql(s"CREATE FUNCTION $permanentFuncName AS '$permanentFuncClass'")
+      withTable("tab1") {
+        (1 to 10).map(i => (s"$i", i)).toDF("str", "id").write.saveAsTable("tab1")
+        Seq("VIEW", "TEMPORARY VIEW").foreach { viewMode =>
+          withView("view1") {
+            sql(
+              s"""
+                 |CREATE $viewMode view1
+                 |AS SELECT
+                 |$permanentFuncName(str),
+                 |$builtInFuncNameInLowerCase(id),
+                 |$builtInFuncNameInMixedCase(id) as aBs,
+                 |$hiveFuncName(id, 5) over()
+                 |FROM tab1
+               """.stripMargin)
+            checkAnswer(sql("select count(*) FROM view1"), Row(10))
+          }
+        }
+      }
+    }
+  }
+
+  test("create a permanent/temp view using a temporary function") {
+    val tempFunctionName = "temp"
+    val functionClass =
+      classOf[org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper].getCanonicalName
+    withUserDefinedFunction(tempFunctionName -> true) {
+      sql(s"CREATE TEMPORARY FUNCTION $tempFunctionName AS '$functionClass'")
+      withView("view1", "tempView1") {
+        withTable("tab1") {
+          (1 to 10).map(i => s"$i").toDF("id").write.saveAsTable("tab1")
+
+          // temporary view
+          sql(s"CREATE TEMPORARY VIEW tempView1 AS SELECT $tempFunctionName(id) from tab1")
+          checkAnswer(sql("select count(*) FROM tempView1"), Row(10))
+
+          // permanent view
+          val e = intercept[AnalysisException] {
+            sql(s"CREATE VIEW view1 AS SELECT $tempFunctionName(id) from tab1")
+          }.getMessage
+          assert(e.contains("Not allowed to create a permanent view `view1` by referencing " +
+            s"a temporary function `$tempFunctionName`"))
+        }
+      }
+    }
+  }
 }

From c8879bf1ee2af9ccd5d5656571d931d2fc1da024 Mon Sep 17 00:00:00 2001
From: fidato <fidato.july13@gmail.com>
Date: Mon, 7 Nov 2016 18:41:17 -0800
Subject: [PATCH 0062/1204] [SPARK-16575][CORE] partition calculation mismatch
 with sc.binaryFiles

## What changes were proposed in this pull request?

This Pull request comprises of the critical bug SPARK-16575 changes. This change rectifies the issue with BinaryFileRDD partition calculations as  upon creating an RDD with sc.binaryFiles, the resulting RDD always just consisted of two partitions only.
## How was this patch tested?

The original issue ie. getNumPartitions on binary Files RDD (always having two partitions) was first replicated and then tested upon the changes. Also the unit tests have been checked and passed.

This contribution is my original work and I licence the work to the project under the project's open source license

srowen hvanhovell rxin vanzin skyluc kmader zsxwing datafarmer Please have a look .

Author: fidato <fidato.july13@gmail.com>

Closes #15327 from fidato13/SPARK-16575.

(cherry picked from commit 6f3697136aa68dc39d3ce42f43a7af554d2a3bf9)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../apache/spark/input/PortableDataStream.scala  | 14 +++++++++++---
 .../apache/spark/internal/config/package.scala   | 13 +++++++++++++
 .../org/apache/spark/rdd/BinaryFileRDD.scala     |  4 ++--
 docs/configuration.md                            | 16 ++++++++++++++++
 4 files changed, 42 insertions(+), 5 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala b/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala
index f66510b6f977f..59404e08895a3 100644
--- a/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala
+++ b/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala
@@ -27,6 +27,9 @@ import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapreduce.{InputSplit, JobContext, RecordReader, TaskAttemptContext}
 import org.apache.hadoop.mapreduce.lib.input.{CombineFileInputFormat, CombineFileRecordReader, CombineFileSplit}
 
+import org.apache.spark.internal.config
+import org.apache.spark.SparkContext
+
 /**
  * A general format for reading whole files in as streams, byte arrays,
  * or other functions to be added
@@ -40,9 +43,14 @@ private[spark] abstract class StreamFileInputFormat[T]
    * Allow minPartitions set by end-user in order to keep compatibility with old Hadoop API
    * which is set through setMaxSplitSize
    */
-  def setMinPartitions(context: JobContext, minPartitions: Int) {
-    val totalLen = listStatus(context).asScala.filterNot(_.isDirectory).map(_.getLen).sum
-    val maxSplitSize = math.ceil(totalLen / math.max(minPartitions, 1.0)).toLong
+  def setMinPartitions(sc: SparkContext, context: JobContext, minPartitions: Int) {
+    val defaultMaxSplitBytes = sc.getConf.get(config.FILES_MAX_PARTITION_BYTES)
+    val openCostInBytes = sc.getConf.get(config.FILES_OPEN_COST_IN_BYTES)
+    val defaultParallelism = sc.defaultParallelism
+    val files = listStatus(context).asScala
+    val totalBytes = files.filterNot(_.isDirectory).map(_.getLen + openCostInBytes).sum
+    val bytesPerCore = totalBytes / defaultParallelism
+    val maxSplitSize = Math.min(defaultMaxSplitBytes, Math.max(openCostInBytes, bytesPerCore))
     super.setMaxSplitSize(maxSplitSize)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 497ca92c7bc60..4a3e3d5c79eff 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -206,4 +206,17 @@ package object config {
       "encountering corrupt files and contents that have been read will still be returned.")
     .booleanConf
     .createWithDefault(false)
+
+  private[spark] val FILES_MAX_PARTITION_BYTES = ConfigBuilder("spark.files.maxPartitionBytes")
+    .doc("The maximum number of bytes to pack into a single partition when reading files.")
+    .longConf
+    .createWithDefault(128 * 1024 * 1024)
+
+  private[spark] val FILES_OPEN_COST_IN_BYTES = ConfigBuilder("spark.files.openCostInBytes")
+    .doc("The estimated cost to open a file, measured by the number of bytes could be scanned in" +
+      " the same time. This is used when putting multiple files into a partition. It's better to" +
+      " over estimate, then the partitions with small files will be faster than partitions with" +
+      " bigger files.")
+    .longConf
+    .createWithDefault(4 * 1024 * 1024)
 }
diff --git a/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala b/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala
index 41832e8354741..50d977a92da51 100644
--- a/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala
@@ -26,7 +26,7 @@ import org.apache.spark.{Partition, SparkContext}
 import org.apache.spark.input.StreamFileInputFormat
 
 private[spark] class BinaryFileRDD[T](
-    sc: SparkContext,
+    @transient private val sc: SparkContext,
     inputFormatClass: Class[_ <: StreamFileInputFormat[T]],
     keyClass: Class[String],
     valueClass: Class[T],
@@ -43,7 +43,7 @@ private[spark] class BinaryFileRDD[T](
       case _ =>
     }
     val jobContext = new JobContextImpl(conf, jobId)
-    inputFormat.setMinPartitions(jobContext, minPartitions)
+    inputFormat.setMinPartitions(sc, jobContext, minPartitions)
     val rawSplits = inputFormat.getSplits(jobContext).toArray
     val result = new Array[Partition](rawSplits.size)
     for (i <- 0 until rawSplits.size) {
diff --git a/docs/configuration.md b/docs/configuration.md
index 0017219e07261..d0acd944dd6b9 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1034,6 +1034,22 @@ Apart from these, the following properties are also available, and may be useful
     its contents do not match those of the source.
   </td>
 </tr>
+<tr>
+  <td><code>spark.files.maxPartitionBytes</code></td>
+  <td>134217728 (128 MB)</td>
+  <td>
+    The maximum number of bytes to pack into a single partition when reading files.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.files.openCostInBytes</code></td>
+  <td>4194304 (4 MB)</td>
+  <td>
+    The estimated cost to open a file, measured by the number of bytes could be scanned in the same
+    time. This is used when putting multiple files into a partition. It is better to over estimate,
+    then the partitions with small files will be faster than partitions with bigger files.
+  </td>
+</tr>
 <tr>
     <td><code>spark.hadoop.cloneConf</code></td>
     <td>false</td>

From ee400f67a471c9445d9d7e4957113fc62bff6abf Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Tue, 8 Nov 2016 12:01:54 +0100
Subject: [PATCH 0063/1204] [SPARK-18207][SQL] Fix a compilation error due to
 HashExpression.doGenCode

This PR avoids a compilation error due to more than 64KB Java byte code size. This error occur since  generate java code for computing a hash value for a row is too big. This PR fixes this compilation error by splitting a big code chunk into multiple methods by calling `CodegenContext.splitExpression` at `HashExpression.doGenCode`

The test case requires a calculation of hash code for a row that includes 1000 String fields. `HashExpression.doGenCode` generate a lot of Java code for this computation into one function. As a result, the size of the corresponding Java bytecode is more than 64 KB.

Generated code without this PR
````java
/* 027 */   public UnsafeRow apply(InternalRow i) {
/* 028 */     boolean isNull = false;
/* 029 */
/* 030 */     int value1 = 42;
/* 031 */
/* 032 */     boolean isNull2 = i.isNullAt(0);
/* 033 */     UTF8String value2 = isNull2 ? null : (i.getUTF8String(0));
/* 034 */     if (!isNull2) {
/* 035 */       value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value2.getBaseObject(), value2.getBaseOffset(), value2.numBytes(), value1);
/* 036 */     }
/* 037 */
/* 038 */
/* 039 */     boolean isNull3 = i.isNullAt(1);
/* 040 */     UTF8String value3 = isNull3 ? null : (i.getUTF8String(1));
/* 041 */     if (!isNull3) {
/* 042 */       value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value3.getBaseObject(), value3.getBaseOffset(), value3.numBytes(), value1);
/* 043 */     }
/* 044 */
/* 045 */
...
/* 7024 */
/* 7025 */     boolean isNull1001 = i.isNullAt(999);
/* 7026 */     UTF8String value1001 = isNull1001 ? null : (i.getUTF8String(999));
/* 7027 */     if (!isNull1001) {
/* 7028 */       value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value1001.getBaseObject(), value1001.getBaseOffset(), value1001.numBytes(), value1);
/* 7029 */     }
/* 7030 */
/* 7031 */
/* 7032 */     boolean isNull1002 = i.isNullAt(1000);
/* 7033 */     UTF8String value1002 = isNull1002 ? null : (i.getUTF8String(1000));
/* 7034 */     if (!isNull1002) {
/* 7035 */       value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value1002.getBaseObject(), value1002.getBaseOffset(), value1002.numBytes(), value1);
/* 7036 */     }
````

Generated code with this PR
````java
/* 3807 */   private void apply_249(InternalRow i) {
/* 3808 */
/* 3809 */     boolean isNull998 = i.isNullAt(996);
/* 3810 */     UTF8String value998 = isNull998 ? null : (i.getUTF8String(996));
/* 3811 */     if (!isNull998) {
/* 3812 */       value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value998.getBaseObject(), value998.getBaseOffset(), value998.numBytes(), value1);
/* 3813 */     }
/* 3814 */
/* 3815 */     boolean isNull999 = i.isNullAt(997);
/* 3816 */     UTF8String value999 = isNull999 ? null : (i.getUTF8String(997));
/* 3817 */     if (!isNull999) {
/* 3818 */       value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value999.getBaseObject(), value999.getBaseOffset(), value999.numBytes(), value1);
/* 3819 */     }
/* 3820 */
/* 3821 */     boolean isNull1000 = i.isNullAt(998);
/* 3822 */     UTF8String value1000 = isNull1000 ? null : (i.getUTF8String(998));
/* 3823 */     if (!isNull1000) {
/* 3824 */       value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value1000.getBaseObject(), value1000.getBaseOffset(), value1000.numBytes(), value1);
/* 3825 */     }
/* 3826 */
/* 3827 */     boolean isNull1001 = i.isNullAt(999);
/* 3828 */     UTF8String value1001 = isNull1001 ? null : (i.getUTF8String(999));
/* 3829 */     if (!isNull1001) {
/* 3830 */       value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value1001.getBaseObject(), value1001.getBaseOffset(), value1001.numBytes(), value1);
/* 3831 */     }
/* 3832 */
/* 3833 */   }
/* 3834 */
...
/* 4532 */   private void apply_0(InternalRow i) {
/* 4533 */
/* 4534 */     boolean isNull2 = i.isNullAt(0);
/* 4535 */     UTF8String value2 = isNull2 ? null : (i.getUTF8String(0));
/* 4536 */     if (!isNull2) {
/* 4537 */       value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value2.getBaseObject(), value2.getBaseOffset(), value2.numBytes(), value1);
/* 4538 */     }
/* 4539 */
/* 4540 */     boolean isNull3 = i.isNullAt(1);
/* 4541 */     UTF8String value3 = isNull3 ? null : (i.getUTF8String(1));
/* 4542 */     if (!isNull3) {
/* 4543 */       value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value3.getBaseObject(), value3.getBaseOffset(), value3.numBytes(), value1);
/* 4544 */     }
/* 4545 */
/* 4546 */     boolean isNull4 = i.isNullAt(2);
/* 4547 */     UTF8String value4 = isNull4 ? null : (i.getUTF8String(2));
/* 4548 */     if (!isNull4) {
/* 4549 */       value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value4.getBaseObject(), value4.getBaseOffset(), value4.numBytes(), value1);
/* 4550 */     }
/* 4551 */
/* 4552 */     boolean isNull5 = i.isNullAt(3);
/* 4553 */     UTF8String value5 = isNull5 ? null : (i.getUTF8String(3));
/* 4554 */     if (!isNull5) {
/* 4555 */       value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value5.getBaseObject(), value5.getBaseOffset(), value5.numBytes(), value1);
/* 4556 */     }
/* 4557 */
/* 4558 */   }
...
/* 7344 */   public UnsafeRow apply(InternalRow i) {
/* 7345 */     boolean isNull = false;
/* 7346 */
/* 7347 */     value1 = 42;
/* 7348 */     apply_0(i);
/* 7349 */     apply_1(i);
...
/* 7596 */     apply_248(i);
/* 7597 */     apply_249(i);
/* 7598 */     apply_250(i);
/* 7599 */     apply_251(i);
...
````

Add a new test in `DataFrameSuite`

Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com>

Closes #15745 from kiszk/SPARK-18207.

(cherry picked from commit 47731e1865fa1e3a8881a1f4420017bdc026e455)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../spark/sql/catalyst/expressions/hash.scala | 18 +++++++++------
 .../expressions/HashExpressionsSuite.scala    | 22 +++++++++++++++++++
 2 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
index 415ef4e4a37ec..e14f0544c2b81 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
@@ -268,15 +268,16 @@ abstract class HashExpression[E] extends Expression {
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     ev.isNull = "false"
-    val childrenHash = children.map { child =>
+    val childrenHash = ctx.splitExpressions(ctx.INPUT_ROW, children.map { child =>
       val childGen = child.genCode(ctx)
       childGen.code + ctx.nullSafeExec(child.nullable, childGen.isNull) {
         computeHash(childGen.value, child.dataType, ev.value, ctx)
       }
-    }.mkString("\n")
+    })
 
+    ctx.addMutableState(ctx.javaType(dataType), ev.value, "")
     ev.copy(code = s"""
-      ${ctx.javaType(dataType)} ${ev.value} = $seed;
+      ${ev.value} = $seed;
       $childrenHash""")
   }
 
@@ -600,15 +601,18 @@ case class HiveHash(children: Seq[Expression]) extends HashExpression[Int] {
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     ev.isNull = "false"
     val childHash = ctx.freshName("childHash")
-    val childrenHash = children.map { child =>
+    val childrenHash = ctx.splitExpressions(ctx.INPUT_ROW, children.map { child =>
       val childGen = child.genCode(ctx)
       childGen.code + ctx.nullSafeExec(child.nullable, childGen.isNull) {
         computeHash(childGen.value, child.dataType, childHash, ctx)
-      } + s"${ev.value} = (31 * ${ev.value}) + $childHash;"
-    }.mkString(s"int $childHash = 0;", s"\n$childHash = 0;\n", "")
+      } + s"${ev.value} = (31 * ${ev.value}) + $childHash;" +
+        s"\n$childHash = 0;"
+    })
 
+    ctx.addMutableState(ctx.javaType(dataType), ev.value, "")
+    ctx.addMutableState("int", childHash, s"$childHash = 0;")
     ev.copy(code = s"""
-      ${ctx.javaType(dataType)} ${ev.value} = $seed;
+      ${ev.value} = $seed;
       $childrenHash""")
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
index c714bc03dc0d5..032629265269a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
@@ -24,7 +24,9 @@ import org.apache.commons.codec.digest.DigestUtils
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.{RandomDataGenerator, Row}
 import org.apache.spark.sql.catalyst.encoders.{ExamplePointUDT, RowEncoder}
+import org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
 
 class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
@@ -124,6 +126,26 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
         new StructType().add("array", arrayOfString).add("map", mapOfString))
       .add("structOfUDT", structOfUDT))
 
+  test("SPARK-18207: Compute hash for a lot of expressions") {
+    val N = 1000
+    val wideRow = new GenericInternalRow(
+      Seq.tabulate(N)(i => UTF8String.fromString(i.toString)).toArray[Any])
+    val schema = StructType((1 to N).map(i => StructField("", StringType)))
+
+    val exprs = schema.fields.zipWithIndex.map { case (f, i) =>
+      BoundReference(i, f.dataType, true)
+    }
+    val murmur3HashExpr = Murmur3Hash(exprs, 42)
+    val murmur3HashPlan = GenerateMutableProjection.generate(Seq(murmur3HashExpr))
+    val murmursHashEval = Murmur3Hash(exprs, 42).eval(wideRow)
+    assert(murmur3HashPlan(wideRow).getInt(0) == murmursHashEval)
+
+    val hiveHashExpr = HiveHash(exprs)
+    val hiveHashPlan = GenerateMutableProjection.generate(Seq(hiveHashExpr))
+    val hiveHashEval = HiveHash(exprs).eval(wideRow)
+    assert(hiveHashPlan(wideRow).getInt(0) == hiveHashEval)
+  }
+
   private def testHash(inputSchema: StructType): Unit = {
     val inputGenerator = RandomDataGenerator.forType(inputSchema, nullable = false).get
     val encoder = RowEncoder(inputSchema)

From 3b360e57a249b33b1b50e58d01a1b78a1c922d88 Mon Sep 17 00:00:00 2001
From: root <root@iZbp1gsnrlfzjxh82cz80vZ.(none)>
Date: Tue, 8 Nov 2016 12:09:32 +0100
Subject: [PATCH 0064/1204] [SPARK-18137][SQL] Fix RewriteDistinctAggregates
 UnresolvedException when a UDAF has a foldable TypeCheck

## What changes were proposed in this pull request?

In RewriteDistinctAggregates rewrite funtion,after the UDAF's childs are mapped to AttributeRefference, If the UDAF(such as ApproximatePercentile) has a foldable TypeCheck for the input, It will failed because the AttributeRefference is not foldable,then the UDAF is not resolved, and then nullify on the unresolved object will throw a Exception.

In this PR, only map Unfoldable child to AttributeRefference, this can avoid the UDAF's foldable TypeCheck. and then only Expand Unfoldable child, there is no need to Expand a static value(foldable value).

**Before sql result**

> select percentile_approxy(key,0.99999),count(distinct key),sume(distinc key) from src limit 1
> org.apache.spark.sql.catalyst.analysis.UnresolvedException: Invalid call to dataType on unresolved object, tree: 'percentile_approx(CAST(src.`key` AS DOUBLE), CAST(0.99999BD AS DOUBLE), 10000)
> at org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute.dataType(unresolved.scala:92)
>     at org.apache.spark.sql.catalyst.optimizer.RewriteDistinctAggregates$.org$apache$spark$sql$catalyst$optimizer$RewriteDistinctAggregates$$nullify(RewriteDistinctAggregates.scala:261)

**After sql result**

> select percentile_approxy(key,0.99999),count(distinct key),sume(distinc key) from src limit 1
> [498.0,309,79136]
## How was this patch tested?

Add a test case in HiveUDFSuit.

Author: root <root@iZbp1gsnrlfzjxh82cz80vZ.(none)>

Closes #15668 from windpiger/RewriteDistinctUDAFUnresolveExcep.

(cherry picked from commit c291bd2745a8a2e4ba91d8697879eb8da10287e2)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../optimizer/RewriteDistinctAggregates.scala | 35 ++++++++++++++-----
 .../sql/hive/execution/HiveUDFSuite.scala     | 35 +++++++++++++++++++
 2 files changed, 61 insertions(+), 9 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
index d6a39ecf53b86..cd8912f793f89 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
@@ -115,9 +115,21 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
     }
 
     // Extract distinct aggregate expressions.
-    val distinctAggGroups = aggExpressions
-      .filter(_.isDistinct)
-      .groupBy(_.aggregateFunction.children.toSet)
+    val distinctAggGroups = aggExpressions.filter(_.isDistinct).groupBy { e =>
+        val unfoldableChildren = e.aggregateFunction.children.filter(!_.foldable).toSet
+        if (unfoldableChildren.nonEmpty) {
+          // Only expand the unfoldable children
+          unfoldableChildren
+        } else {
+          // If aggregateFunction's children are all foldable
+          // we must expand at least one of the children (here we take the first child),
+          // or If we don't, we will get the wrong result, for example:
+          // count(distinct 1) will be explained to count(1) after the rewrite function.
+          // Generally, the distinct aggregateFunction should not run
+          // foldable TypeCheck for the first child.
+          e.aggregateFunction.children.take(1).toSet
+        }
+    }
 
     // Check if the aggregates contains functions that do not support partial aggregation.
     val existsNonPartial = aggExpressions.exists(!_.aggregateFunction.supportsPartial)
@@ -136,8 +148,9 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
       def evalWithinGroup(id: Literal, e: Expression) = If(EqualTo(gid, id), e, nullify(e))
       def patchAggregateFunctionChildren(
           af: AggregateFunction)(
-          attrs: Expression => Expression): AggregateFunction = {
-        af.withNewChildren(af.children.map(attrs)).asInstanceOf[AggregateFunction]
+          attrs: Expression => Option[Expression]): AggregateFunction = {
+        val newChildren = af.children.map(c => attrs(c).getOrElse(c))
+        af.withNewChildren(newChildren).asInstanceOf[AggregateFunction]
       }
 
       // Setup unique distinct aggregate children.
@@ -161,7 +174,7 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
           val operators = expressions.map { e =>
             val af = e.aggregateFunction
             val naf = patchAggregateFunctionChildren(af) { x =>
-              evalWithinGroup(id, distinctAggChildAttrLookup(x))
+              distinctAggChildAttrLookup.get(x).map(evalWithinGroup(id, _))
             }
             (e, e.copy(aggregateFunction = naf, isDistinct = false))
           }
@@ -170,8 +183,12 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
       }
 
       // Setup expand for the 'regular' aggregate expressions.
-      val regularAggExprs = aggExpressions.filter(!_.isDistinct)
-      val regularAggChildren = regularAggExprs.flatMap(_.aggregateFunction.children).distinct
+      // only expand unfoldable children
+      val regularAggExprs = aggExpressions
+        .filter(e => !e.isDistinct && e.children.exists(!_.foldable))
+      val regularAggChildren = regularAggExprs
+        .flatMap(_.aggregateFunction.children.filter(!_.foldable))
+        .distinct
       val regularAggChildAttrMap = regularAggChildren.map(expressionAttributePair)
 
       // Setup aggregates for 'regular' aggregate expressions.
@@ -179,7 +196,7 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
       val regularAggChildAttrLookup = regularAggChildAttrMap.toMap
       val regularAggOperatorMap = regularAggExprs.map { e =>
         // Perform the actual aggregation in the initial aggregate.
-        val af = patchAggregateFunctionChildren(e.aggregateFunction)(regularAggChildAttrLookup)
+        val af = patchAggregateFunctionChildren(e.aggregateFunction)(regularAggChildAttrLookup.get)
         val operator = Alias(e.copy(aggregateFunction = af), e.sql)()
 
         // Select the result of the first aggregate in the last aggregate.
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
index f690035c845f7..48adc833f4b22 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
@@ -150,6 +150,41 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
   }
 
   test("Generic UDAF aggregates") {
+
+    checkAnswer(sql(
+     """
+       |SELECT percentile_approx(2, 0.99999),
+       |       sum(distinct 1),
+       |       count(distinct 1,2,3,4) FROM src LIMIT 1
+     """.stripMargin), sql("SELECT 2, 1, 1 FROM src LIMIT 1").collect().toSeq)
+
+    checkAnswer(sql(
+      """
+        |SELECT ceiling(percentile_approx(distinct key, 0.99999)),
+        |       count(distinct key),
+        |       sum(distinct key),
+        |       count(distinct 1),
+        |       sum(distinct 1),
+        |       sum(1) FROM src LIMIT 1
+      """.stripMargin),
+      sql(
+        """
+          |SELECT max(key),
+          |       count(distinct key),
+          |       sum(distinct key),
+          |       1, 1, sum(1) FROM src LIMIT 1
+        """.stripMargin).collect().toSeq)
+
+    checkAnswer(sql(
+      """
+        |SELECT ceiling(percentile_approx(distinct key, 0.9 + 0.09999)),
+        |       count(distinct key), sum(distinct key),
+        |       count(distinct 1), sum(distinct 1),
+        |       sum(1) FROM src LIMIT 1
+      """.stripMargin),
+      sql("SELECT max(key), count(distinct key), sum(distinct key), 1, 1, sum(1) FROM src LIMIT 1")
+        .collect().toSeq)
+
     checkAnswer(sql("SELECT ceiling(percentile_approx(key, 0.99999D)) FROM src LIMIT 1"),
       sql("SELECT max(key) FROM src LIMIT 1").collect().toSeq)
 

From ef6b6d3d4790c1da7e3fddb961dd8efc977e033f Mon Sep 17 00:00:00 2001
From: chie8842 <hayashidac@nttdata.co.jp>
Date: Tue, 8 Nov 2016 13:45:37 +0000
Subject: [PATCH 0065/1204] [SPARK-13770][DOCUMENTATION][ML] Document the ML
 feature Interaction

I created Scala and Java example and added documentation.

Author: chie8842 <hayashidac@nttdata.co.jp>

Closes #15658 from hayashidac/SPARK-13770.

(cherry picked from commit ee2e741ac16b01d9cae0eadd35af774547bbd415)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/ml-features.md                           | 52 +++++++++++
 .../examples/ml/JavaInteractionExample.java   | 88 +++++++++++++++++++
 .../examples/ml/InteractionExample.scala      | 68 ++++++++++++++
 3 files changed, 208 insertions(+)
 create mode 100644 examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/InteractionExample.scala

diff --git a/docs/ml-features.md b/docs/ml-features.md
index 352887d3ba6e3..903177210d820 100644
--- a/docs/ml-features.md
+++ b/docs/ml-features.md
@@ -729,6 +729,58 @@ for more details on the API.
 </div>
 </div>
 
+## Interaction
+
+`Interaction` is a `Transformer` which takes vector or double-valued columns, and generates a single vector column that contains the product of all combinations of one value from each input column.
+
+For example, if you have 2 vector type columns each of which has 3 dimensions as input columns, then then you'll get a 9-dimensional vector as the output column.
+
+**Examples**
+
+Assume that we have the following DataFrame with the columns "id1", "vec1", and "vec2":
+
+~~~~
+  id1|vec1          |vec2          
+  ---|--------------|--------------
+  1  |[1.0,2.0,3.0] |[8.0,4.0,5.0] 
+  2  |[4.0,3.0,8.0] |[7.0,9.0,8.0] 
+  3  |[6.0,1.0,9.0] |[2.0,3.0,6.0] 
+  4  |[10.0,8.0,6.0]|[9.0,4.0,5.0] 
+  5  |[9.0,2.0,7.0] |[10.0,7.0,3.0]
+  6  |[1.0,1.0,4.0] |[2.0,8.0,4.0]     
+~~~~
+
+Applying `Interaction` with those input columns,
+then `interactedCol` as the output column contains:
+
+~~~~
+  id1|vec1          |vec2          |interactedCol                                         
+  ---|--------------|--------------|------------------------------------------------------
+  1  |[1.0,2.0,3.0] |[8.0,4.0,5.0] |[8.0,4.0,5.0,16.0,8.0,10.0,24.0,12.0,15.0]            
+  2  |[4.0,3.0,8.0] |[7.0,9.0,8.0] |[56.0,72.0,64.0,42.0,54.0,48.0,112.0,144.0,128.0]     
+  3  |[6.0,1.0,9.0] |[2.0,3.0,6.0] |[36.0,54.0,108.0,6.0,9.0,18.0,54.0,81.0,162.0]        
+  4  |[10.0,8.0,6.0]|[9.0,4.0,5.0] |[360.0,160.0,200.0,288.0,128.0,160.0,216.0,96.0,120.0]
+  5  |[9.0,2.0,7.0] |[10.0,7.0,3.0]|[450.0,315.0,135.0,100.0,70.0,30.0,350.0,245.0,105.0] 
+  6  |[1.0,1.0,4.0] |[2.0,8.0,4.0] |[12.0,48.0,24.0,12.0,48.0,24.0,48.0,192.0,96.0]       
+~~~~
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+
+Refer to the [Interaction Scala docs](api/scala/index.html#org.apache.spark.ml.feature.Interaction)
+for more details on the API.
+
+{% include_example scala/org/apache/spark/examples/ml/InteractionExample.scala %}
+</div>
+
+<div data-lang="java" markdown="1">
+
+Refer to the [Interaction Java docs](api/java/org/apache/spark/ml/feature/Interaction.html)
+for more details on the API.
+
+{% include_example java/org/apache/spark/examples/ml/JavaInteractionExample.java %}
+</div>
+</div>
 
 ## Normalizer
 
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java
new file mode 100644
index 0000000000000..4213c05703cc6
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.ml.feature.Interaction;
+import org.apache.spark.ml.feature.VectorAssembler;
+import org.apache.spark.ml.linalg.Vectors;
+import org.apache.spark.sql.*;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+import java.util.Arrays;
+import java.util.List;
+
+// $example on$
+// $example off$
+
+public class JavaInteractionExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaInteractionExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+      RowFactory.create(1, 1, 2, 3, 8, 4, 5),
+      RowFactory.create(2, 4, 3, 8, 7, 9, 8),
+      RowFactory.create(3, 6, 1, 9, 2, 3, 6),
+      RowFactory.create(4, 10, 8, 6, 9, 4, 5),
+      RowFactory.create(5, 9, 2, 7, 10, 7, 3),
+      RowFactory.create(6, 1, 1, 4, 2, 8, 4)
+    );
+    
+    StructType schema = new StructType(new StructField[]{
+      new StructField("id1", DataTypes.IntegerType, false, Metadata.empty()),
+      new StructField("id2", DataTypes.IntegerType, false, Metadata.empty()),
+      new StructField("id3", DataTypes.IntegerType, false, Metadata.empty()),
+      new StructField("id4", DataTypes.IntegerType, false, Metadata.empty()),
+      new StructField("id5", DataTypes.IntegerType, false, Metadata.empty()),
+      new StructField("id6", DataTypes.IntegerType, false, Metadata.empty()),
+      new StructField("id7", DataTypes.IntegerType, false, Metadata.empty())
+    });
+
+    Dataset<Row> df = spark.createDataFrame(data, schema);
+
+    VectorAssembler assembler1 = new VectorAssembler()
+            .setInputCols(new String[]{"id2", "id3", "id4"})
+            .setOutputCol("vec1");
+
+    Dataset<Row> assembled1 = assembler1.transform(df);
+
+    VectorAssembler assembler2 = new VectorAssembler()
+            .setInputCols(new String[]{"id5", "id6", "id7"})
+            .setOutputCol("vec2");
+
+    Dataset<Row> assembled2 = assembler2.transform(assembled1).select("id1", "vec1", "vec2");
+
+    Interaction interaction = new Interaction()
+            .setInputCols(new String[]{"id1","vec1","vec2"})
+            .setOutputCol("interactedCol");
+
+    Dataset<Row> interacted = interaction.transform(assembled2);
+
+    interacted.show(false);
+    // $example off$
+
+    spark.stop();
+  }
+}
+
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/InteractionExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/InteractionExample.scala
new file mode 100644
index 0000000000000..8113c992b1d69
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/InteractionExample.scala
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.Interaction
+import org.apache.spark.ml.feature.VectorAssembler
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object InteractionExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("InteractionExample")
+      .getOrCreate()
+
+    // $example on$
+    val df = spark.createDataFrame(Seq(
+      (1, 1, 2, 3, 8, 4, 5),
+      (2, 4, 3, 8, 7, 9, 8),
+      (3, 6, 1, 9, 2, 3, 6),
+      (4, 10, 8, 6, 9, 4, 5),
+      (5, 9, 2, 7, 10, 7, 3),
+      (6, 1, 1, 4, 2, 8, 4)
+    )).toDF("id1", "id2", "id3", "id4", "id5", "id6", "id7")
+
+    val assembler1 = new VectorAssembler().
+      setInputCols(Array("id2", "id3", "id4")).
+      setOutputCol("vec1")
+
+    val assembled1 = assembler1.transform(df)
+
+    val assembler2 = new VectorAssembler().
+      setInputCols(Array("id5", "id6", "id7")).
+      setOutputCol("vec2")
+
+    val assembled2 = assembler2.transform(assembled1).select("id1", "vec1", "vec2")
+
+    val interaction = new Interaction()
+      .setInputCols(Array("id1", "vec1", "vec2"))
+      .setOutputCol("interactedCol")
+
+    val interacted = interaction.transform(assembled2)
+
+    interacted.show(truncate = false)
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println

From 9595a71066ff222b28c7505db6b9426d78acaea8 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 8 Nov 2016 22:28:29 +0800
Subject: [PATCH 0066/1204] [SPARK-18346][SQL] TRUNCATE TABLE should fail if no
 partition is matched for the given non-partial partition spec

## What changes were proposed in this pull request?

a follow up of https://github.com/apache/spark/pull/15688

## How was this patch tested?

updated test in `DDLSuite`

Author: Wenchen Fan <wenchen@databricks.com>

Closes #15805 from cloud-fan/truncate.

(cherry picked from commit 73feaa30ebfb62c81c7ce2c60ce2163611dd8852)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/command/tables.scala  | 33 ++++++++++++-------
 .../sql/execution/command/DDLSuite.scala      |  9 +++--
 .../sql/hive/execution/HiveDDLSuite.scala     | 12 +++----
 3 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 3cfa639a2fc1f..3a856fa0f5699 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -30,13 +30,13 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
 import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionException
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTableType._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.util.quoteIdentifier
 import org.apache.spark.sql.execution.datasources.PartitioningUtils
-import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
@@ -324,38 +324,47 @@ case class TruncateTableCommand(
   override def run(spark: SparkSession): Seq[Row] = {
     val catalog = spark.sessionState.catalog
     val table = catalog.getTableMetadata(tableName)
-    val tableIdentwithDB = table.identifier.quotedString
+    val tableIdentWithDB = table.identifier.quotedString
 
     if (table.tableType == CatalogTableType.EXTERNAL) {
       throw new AnalysisException(
-        s"Operation not allowed: TRUNCATE TABLE on external tables: $tableIdentwithDB")
+        s"Operation not allowed: TRUNCATE TABLE on external tables: $tableIdentWithDB")
     }
     if (table.tableType == CatalogTableType.VIEW) {
       throw new AnalysisException(
-        s"Operation not allowed: TRUNCATE TABLE on views: $tableIdentwithDB")
+        s"Operation not allowed: TRUNCATE TABLE on views: $tableIdentWithDB")
     }
     if (table.partitionColumnNames.isEmpty && partitionSpec.isDefined) {
       throw new AnalysisException(
         s"Operation not allowed: TRUNCATE TABLE ... PARTITION is not supported " +
-        s"for tables that are not partitioned: $tableIdentwithDB")
+        s"for tables that are not partitioned: $tableIdentWithDB")
     }
     if (partitionSpec.isDefined) {
       DDLUtils.verifyPartitionProviderIsHive(spark, table, "TRUNCATE TABLE ... PARTITION")
     }
+
+    val partCols = table.partitionColumnNames
     val locations =
-      if (table.partitionColumnNames.isEmpty) {
+      if (partCols.isEmpty) {
         Seq(table.storage.locationUri)
       } else {
-        // Here we diverge from Hive when the given partition spec contains all partition columns
-        // but no partition is matched: Hive will throw an exception and we just do nothing.
         val normalizedSpec = partitionSpec.map { spec =>
           PartitioningUtils.normalizePartitionSpec(
             spec,
-            table.partitionColumnNames,
+            partCols,
             table.identifier.quotedString,
             spark.sessionState.conf.resolver)
         }
-        catalog.listPartitions(table.identifier, normalizedSpec).map(_.storage.locationUri)
+        val partLocations =
+          catalog.listPartitions(table.identifier, normalizedSpec).map(_.storage.locationUri)
+
+        // Fail if the partition spec is fully specified (not partial) and the partition does not
+        // exist.
+        for (spec <- partitionSpec if partLocations.isEmpty && spec.size == partCols.length) {
+          throw new NoSuchPartitionException(table.database, table.identifier.table, spec)
+        }
+
+        partLocations
       }
     val hadoopConf = spark.sessionState.newHadoopConf()
     locations.foreach { location =>
@@ -368,7 +377,7 @@ case class TruncateTableCommand(
         } catch {
           case NonFatal(e) =>
             throw new AnalysisException(
-              s"Failed to truncate table $tableIdentwithDB when removing data of the path: $path " +
+              s"Failed to truncate table $tableIdentWithDB when removing data of the path: $path " +
                 s"because of ${e.toString}")
         }
       }
@@ -381,7 +390,7 @@ case class TruncateTableCommand(
       spark.sharedState.cacheManager.uncacheQuery(spark.table(table.identifier))
     } catch {
       case NonFatal(e) =>
-        log.warn(s"Exception when attempting to uncache table $tableIdentwithDB", e)
+        log.warn(s"Exception when attempting to uncache table $tableIdentWithDB", e)
     }
     Seq.empty[Row]
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 864af8d578b17..df3a3c34c39a0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -1673,11 +1673,10 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
       sql("TRUNCATE TABLE partTable PARTITION (width=100)")
       assert(spark.table("partTable").count() == data.count())
 
-      // do nothing if no partition is matched for the given non-partial partition spec
-      // TODO: This behaviour is different from Hive, we should decide whether we need to follow
-      // Hive's behaviour or stick with our existing behaviour later.
-      sql("TRUNCATE TABLE partTable PARTITION (width=100, length=100)")
-      assert(spark.table("partTable").count() == data.count())
+      // throw exception if no partition is matched for the given non-partial partition spec.
+      intercept[NoSuchPartitionException] {
+        sql("TRUNCATE TABLE partTable PARTITION (width=100, length=100)")
+      }
 
       // throw exception if the column in partition spec is not a partition column.
       val e = intercept[AnalysisException] {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 4150e649bef84..0076a778683ca 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -23,11 +23,10 @@ import org.apache.hadoop.fs.Path
 import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode}
-import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
+import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogTable, CatalogTableType}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.execution.command.DDLUtils
-import org.apache.spark.sql.execution.datasources.CaseInsensitiveMap
 import org.apache.spark.sql.hive.HiveExternalCatalog
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
@@ -1149,11 +1148,10 @@ class HiveDDLSuite
       sql("TRUNCATE TABLE partTable PARTITION (width=100)")
       assert(spark.table("partTable").count() == data.count())
 
-      // do nothing if no partition is matched for the given non-partial partition spec
-      // TODO: This behaviour is different from Hive, we should decide whether we need to follow
-      // Hive's behaviour or stick with our existing behaviour later.
-      sql("TRUNCATE TABLE partTable PARTITION (width=100, length=100)")
-      assert(spark.table("partTable").count() == data.count())
+      // throw exception if no partition is matched for the given non-partial partition spec.
+      intercept[NoSuchPartitionException] {
+        sql("TRUNCATE TABLE partTable PARTITION (width=100, length=100)")
+      }
 
       // throw exception if the column in partition spec is not a partition column.
       val e = intercept[AnalysisException] {

From 876eee2b1610d7de5ed6f86d06bf6105d7c9de16 Mon Sep 17 00:00:00 2001
From: Kishor Patil <kpatil@yahoo-inc.com>
Date: Tue, 8 Nov 2016 12:13:09 -0600
Subject: [PATCH 0067/1204] [SPARK-18357] Fix yarn files/archive broken issue
 andd unit tests

## What changes were proposed in this pull request?

The #15627 broke functionality with yarn --files --archives does not accept any files.
This patch ensures that --files and --archives accept unique files.

## How was this patch tested?

A. I added unit tests.
B. Also, manually tested --files with --archives to throw exception if duplicate files are specified and continue if unique files are specified.

Author: Kishor Patil <kpatil@yahoo-inc.com>

Closes #15810 from kishorvpatil/SPARK18357.

(cherry picked from commit 245e5a2f80e3195b7f8a38b480b29bfc23af66bf)
Signed-off-by: Tom Graves <tgraves@yahoo-inc.com>
---
 .../org/apache/spark/deploy/yarn/Client.scala   |  2 +-
 .../apache/spark/deploy/yarn/ClientSuite.scala  | 17 +++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 172fb46c986c6..e77fa386dc933 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -604,7 +604,7 @@ private[spark] class Client(
             cachedSecondaryJarLinks += localizedPath
           }
         } else {
-          if (localizedPath != null) {
+          if (localizedPath == null) {
             throw new IllegalArgumentException(s"Attempt to add ($file) multiple times" +
               " to the distributed cache.")
           }
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
index 06516c1baf1cc..7deaf0af94849 100644
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
@@ -322,6 +322,23 @@ class ClientSuite extends SparkFunSuite with Matchers with BeforeAndAfterAll
     intercept[IllegalArgumentException] {
       clientArchives.prepareLocalResources(new Path(tempDirForArchives.getAbsolutePath()), Nil)
     }
+
+    // Case 4: FILES_TO_DISTRIBUTE can have unique file.
+    val sparkConfFilesUniq = new SparkConfWithEnv(Map("SPARK_HOME" -> libs.getAbsolutePath))
+      .set(FILES_TO_DISTRIBUTE, Seq(testJar.getPath))
+
+    val clientFilesUniq = createClient(sparkConfFilesUniq)
+    val tempDirForFilesUniq = Utils.createTempDir()
+    clientFilesUniq.prepareLocalResources(new Path(tempDirForFilesUniq.getAbsolutePath()), Nil)
+
+    // Case 5: ARCHIVES_TO_DISTRIBUTE can have unique file.
+    val sparkConfArchivesUniq = new SparkConfWithEnv(Map("SPARK_HOME" -> libs.getAbsolutePath))
+      .set(ARCHIVES_TO_DISTRIBUTE, Seq(testJar.getPath))
+
+    val clientArchivesUniq = createClient(sparkConfArchivesUniq)
+    val tempDirArchivesUniq = Utils.createTempDir()
+    clientArchivesUniq.prepareLocalResources(new Path(tempDirArchivesUniq.getAbsolutePath()), Nil)
+
   }
 
   test("distribute local spark jars") {

From 21bbf94b41fbd193e370a3820131e449aaf0e3db Mon Sep 17 00:00:00 2001
From: "Joseph K. Bradley" <joseph@databricks.com>
Date: Tue, 8 Nov 2016 12:58:29 -0800
Subject: [PATCH 0068/1204] [SPARK-17748][ML] Minor cleanups to one-pass linear
 regression with elastic net

## What changes were proposed in this pull request?

* Made SingularMatrixException private ml
* WeightedLeastSquares: Changed to allow tol >= 0 instead of only tol > 0

## How was this patch tested?

existing tests

Author: Joseph K. Bradley <joseph@databricks.com>

Closes #15779 from jkbradley/wls-cleanups.

(cherry picked from commit 26e1c53aceee37e3687a372ff6c6f05463fd8a94)
Signed-off-by: Joseph K. Bradley <joseph@databricks.com>
---
 .../spark/ml/optim/NormalEquationSolver.scala |  9 ++++----
 .../spark/ml/optim/WeightedLeastSquares.scala |  4 ++--
 .../ml/regression/LinearRegression.scala      | 22 ++++++++++++++-----
 3 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/NormalEquationSolver.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/NormalEquationSolver.scala
index 2f5299b010223..96fd0d18b5ae9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/NormalEquationSolver.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/NormalEquationSolver.scala
@@ -16,9 +16,10 @@
  */
 package org.apache.spark.ml.optim
 
+import scala.collection.mutable
+
 import breeze.linalg.{DenseVector => BDV}
 import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS, OWLQN => BreezeOWLQN}
-import scala.collection.mutable
 
 import org.apache.spark.ml.linalg.{BLAS, DenseVector, Vectors}
 import org.apache.spark.mllib.linalg.CholeskyDecomposition
@@ -57,7 +58,7 @@ private[ml] sealed trait NormalEquationSolver {
  */
 private[ml] class CholeskySolver extends NormalEquationSolver {
 
-  def solve(
+  override def solve(
       bBar: Double,
       bbBar: Double,
       abBar: DenseVector,
@@ -80,7 +81,7 @@ private[ml] class QuasiNewtonSolver(
     tol: Double,
     l1RegFunc: Option[(Int) => Double]) extends NormalEquationSolver {
 
-  def solve(
+  override def solve(
       bBar: Double,
       bbBar: Double,
       abBar: DenseVector,
@@ -156,7 +157,7 @@ private[ml] class QuasiNewtonSolver(
  * Exception thrown when solving a linear system Ax = b for which the matrix A is non-invertible
  * (singular).
  */
-class SingularMatrixException(message: String, cause: Throwable)
+private[spark] class SingularMatrixException(message: String, cause: Throwable)
   extends IllegalArgumentException(message, cause) {
 
   def this(message: String) = this(message, null)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
index 90c24e1b590ea..56ab9675700a0 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
@@ -47,7 +47,7 @@ private[ml] class WeightedLeastSquaresModel(
  * formulation:
  *
  * min,,x,z,, 1/2 sum,,i,, w,,i,, (a,,i,,^T^ x + z - b,,i,,)^2^ / sum,,i,, w,,i,,
- *   + lambda / delta (1/2 (1 - alpha) sumj,, (sigma,,j,, x,,j,,)^2^
+ *   + lambda / delta (1/2 (1 - alpha) sum,,j,, (sigma,,j,, x,,j,,)^2^
  *   + alpha sum,,j,, abs(sigma,,j,, x,,j,,)),
  *
  * where lambda is the regularization parameter, alpha is the ElasticNet mixing parameter,
@@ -91,7 +91,7 @@ private[ml] class WeightedLeastSquares(
   require(elasticNetParam >= 0.0 && elasticNetParam <= 1.0,
     s"elasticNetParam must be in [0, 1]: $elasticNetParam")
   require(maxIter >= 0, s"maxIter must be a positive integer: $maxIter")
-  require(tol > 0, s"tol must be greater than zero: $tol")
+  require(tol >= 0.0, s"tol must be >= 0, but was set to $tol")
 
   /**
    * Creates a [[WeightedLeastSquaresModel]] from an RDD of [[Instance]]s.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index ae876b3839734..9639b07496c13 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -31,7 +31,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.ml.feature.Instance
 import org.apache.spark.ml.linalg.{Vector, Vectors}
 import org.apache.spark.ml.linalg.BLAS._
-import org.apache.spark.ml.optim.{NormalEquationSolver, WeightedLeastSquares}
+import org.apache.spark.ml.optim.WeightedLeastSquares
 import org.apache.spark.ml.PredictorParams
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.param.shared._
@@ -160,11 +160,13 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
   /**
    * Set the solver algorithm used for optimization.
    * In case of linear regression, this can be "l-bfgs", "normal" and "auto".
-   * "l-bfgs" denotes Limited-memory BFGS which is a limited-memory quasi-Newton
-   * optimization method. "normal" denotes using Normal Equation as an analytical
-   * solution to the linear regression problem.
-   * The default value is "auto" which means that the solver algorithm is
-   * selected automatically.
+   *  - "l-bfgs" denotes Limited-memory BFGS which is a limited-memory quasi-Newton
+   *    optimization method.
+   *  - "normal" denotes using Normal Equation as an analytical solution to the linear regression
+   *    problem.  This solver is limited to [[LinearRegression.MAX_FEATURES_FOR_NORMAL_SOLVER]].
+   *  - "auto" (default) means that the solver algorithm is selected automatically.
+   *    The Normal Equations solver will be used when possible, but this will automatically fall
+   *    back to iterative optimization methods when needed.
    *
    * @group setParam
    */
@@ -404,6 +406,14 @@ object LinearRegression extends DefaultParamsReadable[LinearRegression] {
 
   @Since("1.6.0")
   override def load(path: String): LinearRegression = super.load(path)
+
+  /**
+   * When using [[LinearRegression.solver]] == "normal", the solver must limit the number of
+   * features to at most this number.  The entire covariance matrix X^T^X will be collected
+   * to the driver. This limit helps prevent memory overflow errors.
+   */
+  @Since("2.1.0")
+  val MAX_FEATURES_FOR_NORMAL_SOLVER: Int = WeightedLeastSquares.MAX_NUM_FEATURES
 }
 
 /**

From ba80eaf72a9d78a3838677595b42b4bffdda0357 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Tue, 8 Nov 2016 13:14:56 -0800
Subject: [PATCH 0069/1204] [SPARK-18280][CORE] Fix potential deadlock in
 `StandaloneSchedulerBackend.dead`

## What changes were proposed in this pull request?

"StandaloneSchedulerBackend.dead" is called in a RPC thread, so it should not call "SparkContext.stop" in the same thread. "SparkContext.stop" will block until all RPC threads exit, if it's called inside a RPC thread, it will be dead-lock.

This PR add a thread local flag inside RPC threads. `SparkContext.stop` uses it to decide if launching a new thread to stop the SparkContext.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #15775 from zsxwing/SPARK-18280.
---
 .../scala/org/apache/spark/SparkContext.scala | 22 +++++++++++++++++--
 .../scala/org/apache/spark/rpc/RpcEnv.scala   |  4 ++++
 .../apache/spark/rpc/netty/Dispatcher.scala   |  1 +
 .../apache/spark/rpc/netty/NettyRpcEnv.scala  |  3 +++
 .../org/apache/spark/rpc/RpcEnvSuite.scala    | 13 +++++++++++
 5 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 9f0f6074229dd..25a3d609a6b09 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1757,8 +1757,26 @@ class SparkContext(config: SparkConf) extends Logging {
    */
   def listJars(): Seq[String] = addedJars.keySet.toSeq
 
-  // Shut down the SparkContext.
-  def stop() {
+  /**
+   * Shut down the SparkContext.
+   */
+  def stop(): Unit = {
+    if (env.rpcEnv.isInRPCThread) {
+      // `stop` will block until all RPC threads exit, so we cannot call stop inside a RPC thread.
+      // We should launch a new thread to call `stop` to avoid dead-lock.
+      new Thread("stop-spark-context") {
+        setDaemon(true)
+
+        override def run(): Unit = {
+          _stop()
+        }
+      }.start()
+    } else {
+      _stop()
+    }
+  }
+
+  private def _stop() {
     if (LiveListenerBus.withinListenerThread.value) {
       throw new SparkException(
         s"Cannot stop SparkContext within listener thread of ${LiveListenerBus.name}")
diff --git a/core/src/main/scala/org/apache/spark/rpc/RpcEnv.scala b/core/src/main/scala/org/apache/spark/rpc/RpcEnv.scala
index 579122868afc8..bbc416381490b 100644
--- a/core/src/main/scala/org/apache/spark/rpc/RpcEnv.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/RpcEnv.scala
@@ -147,6 +147,10 @@ private[spark] abstract class RpcEnv(conf: SparkConf) {
    */
   def openChannel(uri: String): ReadableByteChannel
 
+  /**
+   * Return if the current thread is a RPC thread.
+   */
+  def isInRPCThread: Boolean
 }
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala b/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala
index a02cf30a5d831..67baabd2cbff2 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala
@@ -201,6 +201,7 @@ private[netty] class Dispatcher(nettyEnv: NettyRpcEnv) extends Logging {
   /** Message loop used for dispatching messages. */
   private class MessageLoop extends Runnable {
     override def run(): Unit = {
+      NettyRpcEnv.rpcThreadFlag.value = true
       try {
         while (true) {
           try {
diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala b/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
index e51649a1ecce9..0b8cd144a2161 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
@@ -408,10 +408,13 @@ private[netty] class NettyRpcEnv(
 
   }
 
+  override def isInRPCThread: Boolean = NettyRpcEnv.rpcThreadFlag.value
 }
 
 private[netty] object NettyRpcEnv extends Logging {
 
+  private[netty] val rpcThreadFlag = new DynamicVariable[Boolean](false)
+
   /**
    * When deserializing the [[NettyRpcEndpointRef]], it needs a reference to [[NettyRpcEnv]].
    * Use `currentEnv` to wrap the deserialization codes. E.g.,
diff --git a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
index acdf21df9a161..aa0705987d837 100644
--- a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
@@ -870,6 +870,19 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
     verify(endpoint, never()).onDisconnected(any())
     verify(endpoint, never()).onNetworkError(any(), any())
   }
+
+  test("isInRPCThread") {
+    val rpcEndpointRef = env.setupEndpoint("isInRPCThread", new RpcEndpoint {
+      override val rpcEnv = env
+
+      override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
+        case m => context.reply(rpcEnv.isInRPCThread)
+      }
+    })
+    assert(rpcEndpointRef.askWithRetry[Boolean]("hello") === true)
+    assert(env.isInRPCThread === false)
+    env.stop(rpcEndpointRef)
+  }
 }
 
 class UnserializableClass

From 988f9080a08e861c34a8734de8304e6e0e5a22c7 Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Tue, 8 Nov 2016 15:08:09 -0800
Subject: [PATCH 0070/1204] [SPARK-18342] Make rename failures fatal in
 HDFSBackedStateStore

## What changes were proposed in this pull request?

If the rename operation in the state store fails (`fs.rename` returns `false`), the StateStore should throw an exception and have the task retry. Currently if renames fail, nothing happens during execution immediately. However, you will observe that snapshot operations will fail, and then any attempt at recovery (executor failure / checkpoint recovery) also fails.

## How was this patch tested?

Unit test

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #15804 from brkyvz/rename-state.

(cherry picked from commit 6f7ecb0f2975d24a71e4240cf623f5bd8992bbeb)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../state/HDFSBackedStateStoreProvider.scala  |  6 ++-
 .../streaming/state/StateStoreSuite.scala     | 41 ++++++++++++++++---
 2 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
index f1e7f1d113ce7..808713161c316 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
@@ -254,7 +254,9 @@ private[state] class HDFSBackedStateStoreProvider(
   private def commitUpdates(newVersion: Long, map: MapType, tempDeltaFile: Path): Path = {
     synchronized {
       val finalDeltaFile = deltaFile(newVersion)
-      fs.rename(tempDeltaFile, finalDeltaFile)
+      if (!fs.rename(tempDeltaFile, finalDeltaFile)) {
+        throw new IOException(s"Failed to rename $tempDeltaFile to $finalDeltaFile")
+      }
       loadedMaps.put(newVersion, map)
       finalDeltaFile
     }
@@ -525,7 +527,7 @@ private[state] class HDFSBackedStateStoreProvider(
 
         val deltaFiles = allFiles.filter { file =>
           file.version > snapshotFile.version && file.version <= version
-        }
+        }.toList
         verify(
           deltaFiles.size == version - snapshotFile.version,
           s"Unexpected list of delta files for version $version for $this: $deltaFiles"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
index fcf300b3c81bb..504a26516107f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
@@ -17,13 +17,14 @@
 
 package org.apache.spark.sql.execution.streaming.state
 
-import java.io.File
+import java.io.{File, IOException}
+import java.net.URI
 
 import scala.collection.mutable
 import scala.util.Random
 
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.Path
+import org.apache.hadoop.fs.{FileStatus, Path, RawLocalFileSystem}
 import org.scalatest.{BeforeAndAfter, PrivateMethodTester}
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.time.SpanSugar._
@@ -455,6 +456,18 @@ class StateStoreSuite extends SparkFunSuite with BeforeAndAfter with PrivateMeth
     }
   }
 
+  test("SPARK-18342: commit fails when rename fails") {
+    import RenameReturnsFalseFileSystem._
+    val dir = scheme + "://" + Utils.createDirectory(tempDir, Random.nextString(5)).toString
+    val conf = new Configuration()
+    conf.set(s"fs.$scheme.impl", classOf[RenameReturnsFalseFileSystem].getName)
+    val provider = newStoreProvider(dir = dir, hadoopConf = conf)
+    val store = provider.getStore(0)
+    put(store, "a", 0)
+    val e = intercept[IllegalStateException](store.commit())
+    assert(e.getCause.getMessage.contains("Failed to rename"))
+  }
+
   def getDataFromFiles(
       provider: HDFSBackedStateStoreProvider,
     version: Int = -1): Set[(String, Int)] = {
@@ -524,9 +537,10 @@ class StateStoreSuite extends SparkFunSuite with BeforeAndAfter with PrivateMeth
   def newStoreProvider(
       opId: Long = Random.nextLong,
       partition: Int = 0,
-      minDeltasForSnapshot: Int = SQLConf.STATE_STORE_MIN_DELTAS_FOR_SNAPSHOT.defaultValue.get
+      minDeltasForSnapshot: Int = SQLConf.STATE_STORE_MIN_DELTAS_FOR_SNAPSHOT.defaultValue.get,
+      dir: String = Utils.createDirectory(tempDir, Random.nextString(5)).toString,
+      hadoopConf: Configuration = new Configuration()
     ): HDFSBackedStateStoreProvider = {
-    val dir = Utils.createDirectory(tempDir, Random.nextString(5)).toString
     val sqlConf = new SQLConf()
     sqlConf.setConf(SQLConf.STATE_STORE_MIN_DELTAS_FOR_SNAPSHOT, minDeltasForSnapshot)
     new HDFSBackedStateStoreProvider(
@@ -534,7 +548,7 @@ class StateStoreSuite extends SparkFunSuite with BeforeAndAfter with PrivateMeth
       keySchema,
       valueSchema,
       new StateStoreConf(sqlConf),
-      new Configuration())
+      hadoopConf)
   }
 
   def remove(store: StateStore, condition: String => Boolean): Unit = {
@@ -598,3 +612,20 @@ private[state] object StateStoreSuite {
     }}.toSet
   }
 }
+
+/**
+ * Fake FileSystem to test that the StateStore throws an exception while committing the
+ * delta file, when `fs.rename` returns `false`.
+ */
+class RenameReturnsFalseFileSystem extends RawLocalFileSystem {
+  import RenameReturnsFalseFileSystem._
+  override def getUri: URI = {
+    URI.create(s"$scheme:///")
+  }
+
+  override def rename(src: Path, dst: Path): Boolean = false
+}
+
+object RenameReturnsFalseFileSystem {
+  val scheme = s"StateStoreSuite${math.abs(Random.nextInt)}fs"
+}

From 98dd7ac719d592e64488a4ecd1ea3543b326fe29 Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Tue, 8 Nov 2016 16:00:45 -0800
Subject: [PATCH 0071/1204] [SPARK-18239][SPARKR] Gradient Boosted Tree for R

## What changes were proposed in this pull request?

Gradient Boosted Tree in R.
With a few minor improvements to RandomForest in R.

Since this is relatively isolated I'd like to target this for branch-2.1

## How was this patch tested?

manual tests, unit tests

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #15746 from felixcheung/rgbt.

(cherry picked from commit 55964c15a7b639f920dfe6c104ae4fdcd673705c)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/NAMESPACE                               |   9 +-
 R/pkg/R/generics.R                            |   4 +
 R/pkg/R/mllib.R                               | 331 +++++++++++++++---
 R/pkg/inst/tests/testthat/test_mllib.R        |  68 ++++
 .../spark/ml/r/GBTClassificationWrapper.scala | 164 +++++++++
 .../spark/ml/r/GBTRegressionWrapper.scala     | 144 ++++++++
 .../org/apache/spark/ml/r/RWrappers.scala     |   4 +
 .../r/RandomForestClassificationWrapper.scala |  14 +-
 .../ml/r/RandomForestRegressionWrapper.scala  |  14 +-
 python/pyspark/ml/regression.py               |  10 +-
 10 files changed, 696 insertions(+), 66 deletions(-)
 create mode 100644 mllib/src/main/scala/org/apache/spark/ml/r/GBTClassificationWrapper.scala
 create mode 100644 mllib/src/main/scala/org/apache/spark/ml/r/GBTRegressionWrapper.scala

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 9cd6269f9a8f7..daee09de88263 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -45,7 +45,8 @@ exportMethods("glm",
               "spark.als",
               "spark.kstest",
               "spark.logit",
-              "spark.randomForest")
+              "spark.randomForest",
+              "spark.gbt")
 
 # Job group lifecycle management methods
 export("setJobGroup",
@@ -353,7 +354,9 @@ export("as.DataFrame",
        "read.ml",
        "print.summary.KSTest",
        "print.summary.RandomForestRegressionModel",
-       "print.summary.RandomForestClassificationModel")
+       "print.summary.RandomForestClassificationModel",
+       "print.summary.GBTRegressionModel",
+       "print.summary.GBTClassificationModel")
 
 export("structField",
        "structField.jobj",
@@ -380,6 +383,8 @@ S3method(print, summary.GeneralizedLinearRegressionModel)
 S3method(print, summary.KSTest)
 S3method(print, summary.RandomForestRegressionModel)
 S3method(print, summary.RandomForestClassificationModel)
+S3method(print, summary.GBTRegressionModel)
+S3method(print, summary.GBTClassificationModel)
 S3method(structField, character)
 S3method(structField, jobj)
 S3method(structType, jobj)
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 0271b26a10a90..7653ca7bccec9 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1343,6 +1343,10 @@ setGeneric("spark.als", function(data, ...) { standardGeneric("spark.als") })
 setGeneric("spark.gaussianMixture",
            function(data, formula, ...) { standardGeneric("spark.gaussianMixture") })
 
+#' @rdname spark.gbt
+#' @export
+setGeneric("spark.gbt", function(data, formula, ...) { standardGeneric("spark.gbt") })
+
 #' @rdname spark.glm
 #' @export
 setGeneric("spark.glm", function(data, formula, ...) { standardGeneric("spark.glm") })
diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 7a220b8d53a2f..1065b4b37d7f3 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -116,6 +116,20 @@ setClass("RandomForestRegressionModel", representation(jobj = "jobj"))
 #' @note RandomForestClassificationModel since 2.1.0
 setClass("RandomForestClassificationModel", representation(jobj = "jobj"))
 
+#' S4 class that represents a GBTRegressionModel
+#'
+#' @param jobj a Java object reference to the backing Scala GBTRegressionModel
+#' @export
+#' @note GBTRegressionModel since 2.1.0
+setClass("GBTRegressionModel", representation(jobj = "jobj"))
+
+#' S4 class that represents a GBTClassificationModel
+#'
+#' @param jobj a Java object reference to the backing Scala GBTClassificationModel
+#' @export
+#' @note GBTClassificationModel since 2.1.0
+setClass("GBTClassificationModel", representation(jobj = "jobj"))
+
 #' Saves the MLlib model to the input path
 #'
 #' Saves the MLlib model to the input path. For more information, see the specific
@@ -124,7 +138,8 @@ setClass("RandomForestClassificationModel", representation(jobj = "jobj"))
 #' @name write.ml
 #' @export
 #' @seealso \link{spark.glm}, \link{glm},
-#' @seealso \link{spark.als}, \link{spark.gaussianMixture}, \link{spark.isoreg}, \link{spark.kmeans},
+#' @seealso \link{spark.als}, \link{spark.gaussianMixture}, \link{spark.gbt}, \link{spark.isoreg},
+#' @seealso \link{spark.kmeans},
 #' @seealso \link{spark.lda}, \link{spark.logit}, \link{spark.mlp}, \link{spark.naiveBayes},
 #' @seealso \link{spark.randomForest}, \link{spark.survreg},
 #' @seealso \link{read.ml}
@@ -138,7 +153,8 @@ NULL
 #' @name predict
 #' @export
 #' @seealso \link{spark.glm}, \link{glm},
-#' @seealso \link{spark.als}, \link{spark.gaussianMixture}, \link{spark.isoreg}, \link{spark.kmeans},
+#' @seealso \link{spark.als}, \link{spark.gaussianMixture}, \link{spark.gbt}, \link{spark.isoreg},
+#' @seealso \link{spark.kmeans},
 #' @seealso \link{spark.logit}, \link{spark.mlp}, \link{spark.naiveBayes},
 #' @seealso \link{spark.randomForest}, \link{spark.survreg}
 NULL
@@ -634,7 +650,7 @@ setMethod("fitted", signature(object = "KMeansModel"),
 #  Get the summary of a k-means model
 
 #' @param object a fitted k-means model.
-#' @return \code{summary} returns the model's coefficients, size and cluster.
+#' @return \code{summary} returns the model's features, coefficients, k, size and cluster.
 #' @rdname spark.kmeans
 #' @export
 #' @note summary(KMeansModel) since 2.0.0
@@ -679,15 +695,15 @@ setMethod("predict", signature(object = "KMeansModel"),
 #' @param data SparkDataFrame for training
 #' @param formula A symbolic description of the model to be fitted. Currently only a few formula
 #'                operators are supported, including '~', '.', ':', '+', and '-'.
-#' @param regParam the regularization parameter. Default is 0.0.
+#' @param regParam the regularization parameter.
 #' @param elasticNetParam the ElasticNet mixing parameter. For alpha = 0.0, the penalty is an L2 penalty.
 #'                        For alpha = 1.0, it is an L1 penalty. For 0.0 < alpha < 1.0, the penalty is a combination
 #'                        of L1 and L2. Default is 0.0 which is an L2 penalty.
 #' @param maxIter maximum iteration number.
 #' @param tol convergence tolerance of iterations.
-#' @param fitIntercept whether to fit an intercept term. Default is TRUE.
+#' @param fitIntercept whether to fit an intercept term.
 #' @param family the name of family which is a description of the label distribution to be used in the model.
-#'               Supported options: Default is "auto".
+#'               Supported options:
 #'                 \itemize{
 #'                   \item{"auto": Automatically select the family based on the number of classes:
 #'                           If number of classes == 1 || number of classes == 2, set to "binomial".
@@ -705,11 +721,11 @@ setMethod("predict", signature(object = "KMeansModel"),
 #'                  threshold p is equivalent to setting thresholds c(1-p, p). In multiclass (or binary) classification to adjust the probability of
 #'                  predicting each class. Array must have length equal to the number of classes, with values > 0,
 #'                  excepting that at most one value may be 0. The class with largest value p/t is predicted, where p
-#'                  is the original probability of that class and t is the class's threshold. Default is 0.5.
+#'                  is the original probability of that class and t is the class's threshold.
 #' @param weightCol The weight column name.
 #' @param aggregationDepth depth for treeAggregate (>= 2). If the dimensions of features or the number of partitions
-#'                         are large, this param could be adjusted to a larger size. Default is 2.
-#' @param probabilityCol column name for predicted class conditional probabilities. Default is "probability".
+#'                         are large, this param could be adjusted to a larger size.
+#' @param probabilityCol column name for predicted class conditional probabilities.
 #' @param ... additional arguments passed to the method.
 #' @return \code{spark.logit} returns a fitted logistic regression model
 #' @rdname spark.logit
@@ -791,8 +807,10 @@ setMethod("predict", signature(object = "LogisticRegressionModel"),
 #  Get the summary of an LogisticRegressionModel
 
 #' @param object an LogisticRegressionModel fitted by \code{spark.logit}
-#' @return \code{summary} returns the Binary Logistic regression results of a given model as lists. Note that
-#'                        Multinomial logistic regression summary is not available now.
+#' @return \code{summary} returns the Binary Logistic regression results of a given model as list,
+#'         including roc, areaUnderROC, pr, fMeasureByThreshold, precisionByThreshold,
+#'         recallByThreshold, totalIterations, objectiveHistory. Note that Multinomial logistic
+#'         regression summary is not available now.
 #' @rdname spark.logit
 #' @aliases summary,LogisticRegressionModel-method
 #' @export
@@ -1141,6 +1159,10 @@ read.ml <- function(path) {
     new("RandomForestRegressionModel", jobj = jobj)
   } else if (isInstanceOf(jobj, "org.apache.spark.ml.r.RandomForestClassifierWrapper")) {
     new("RandomForestClassificationModel", jobj = jobj)
+  } else if (isInstanceOf(jobj, "org.apache.spark.ml.r.GBTRegressorWrapper")) {
+    new("GBTRegressionModel", jobj = jobj)
+  } else if (isInstanceOf(jobj, "org.apache.spark.ml.r.GBTClassifierWrapper")) {
+    new("GBTClassificationModel", jobj = jobj)
   } else {
     stop("Unsupported model: ", jobj)
   }
@@ -1196,13 +1218,13 @@ setMethod("spark.survreg", signature(data = "SparkDataFrame", formula = "formula
 #' data and \code{write.ml}/\code{read.ml} to save/load fitted models.
 #'
 #' @param data A SparkDataFrame for training
-#' @param features Features column name, default "features". Either libSVM-format column or
-#'        character-format column is valid.
-#' @param k Number of topics, default 10
-#' @param maxIter Maximum iterations, default 20
-#' @param optimizer Optimizer to train an LDA model, "online" or "em", default "online"
+#' @param features Features column name. Either libSVM-format column or character-format column is
+#'        valid.
+#' @param k Number of topics.
+#' @param maxIter Maximum iterations.
+#' @param optimizer Optimizer to train an LDA model, "online" or "em", default is "online".
 #' @param subsamplingRate (For online optimizer) Fraction of the corpus to be sampled and used in
-#'        each iteration of mini-batch gradient descent, in range (0, 1], default 0.05
+#'        each iteration of mini-batch gradient descent, in range (0, 1].
 #' @param topicConcentration concentration parameter (commonly named \code{beta} or \code{eta}) for
 #'        the prior placed on topic distributions over terms, default -1 to set automatically on the
 #'        Spark side. Use \code{summary} to retrieve the effective topicConcentration. Only 1-size
@@ -1263,7 +1285,7 @@ setMethod("spark.lda", signature(data = "SparkDataFrame"),
 # similarly to R's summary().
 
 #' @param object a fitted AFT survival regression model.
-#' @return \code{summary} returns a list containing the model's coefficients,
+#' @return \code{summary} returns a list containing the model's features, coefficients,
 #' intercept and log(scale)
 #' @rdname spark.survreg
 #' @export
@@ -1351,7 +1373,7 @@ setMethod("spark.gaussianMixture", signature(data = "SparkDataFrame", formula =
 #  Get the summary of a multivariate gaussian mixture model
 
 #' @param object a fitted gaussian mixture model.
-#' @return \code{summary} returns the model's lambda, mu, sigma and posterior.
+#' @return \code{summary} returns the model's lambda, mu, sigma, k, dim and posterior.
 #' @aliases spark.gaussianMixture,SparkDataFrame,formula-method
 #' @rdname spark.gaussianMixture
 #' @export
@@ -1644,33 +1666,38 @@ print.summary.KSTest <- function(x, ...) {
 #' model, \code{predict} to make predictions on new data, and \code{write.ml}/\code{read.ml} to
 #' save/load fitted models.
 #' For more details, see
-#' \href{http://spark.apache.org/docs/latest/ml-classification-regression.html}{Random Forest}
+#' \href{http://spark.apache.org/docs/latest/ml-classification-regression.html#random-forest-regression}{
+#' Random Forest Regression} and
+#' \href{http://spark.apache.org/docs/latest/ml-classification-regression.html#random-forest-classifier}{
+#' Random Forest Classification}
 #'
 #' @param data a SparkDataFrame for training.
 #' @param formula a symbolic description of the model to be fitted. Currently only a few formula
 #'                operators are supported, including '~', ':', '+', and '-'.
 #' @param type type of model, one of "regression" or "classification", to fit
-#' @param maxDepth Maximum depth of the tree (>= 0). (default = 5)
+#' @param maxDepth Maximum depth of the tree (>= 0).
 #' @param maxBins Maximum number of bins used for discretizing continuous features and for choosing
 #'                how to split on features at each node. More bins give higher granularity. Must be
-#'                >= 2 and >= number of categories in any categorical feature. (default = 32)
+#'                >= 2 and >= number of categories in any categorical feature.
 #' @param numTrees Number of trees to train (>= 1).
 #' @param impurity Criterion used for information gain calculation.
 #'                 For regression, must be "variance". For classification, must be one of
-#'                 "entropy" and "gini". (default = gini)
-#' @param minInstancesPerNode Minimum number of instances each child must have after split.
-#' @param minInfoGain Minimum information gain for a split to be considered at a tree node.
-#' @param checkpointInterval Param for set checkpoint interval (>= 1) or disable checkpoint (-1).
+#'                 "entropy" and "gini", default is "gini".
 #' @param featureSubsetStrategy The number of features to consider for splits at each tree node.
 #'        Supported options: "auto", "all", "onethird", "sqrt", "log2", (0.0-1.0], [1-n].
 #' @param seed integer seed for random number generation.
 #' @param subsamplingRate Fraction of the training data used for learning each decision tree, in
-#'                        range (0, 1]. (default = 1.0)
-#' @param probabilityCol column name for predicted class conditional probabilities, only for
-#'                       classification. (default = "probability")
+#'                        range (0, 1].
+#' @param minInstancesPerNode Minimum number of instances each child must have after split.
+#' @param minInfoGain Minimum information gain for a split to be considered at a tree node.
+#' @param checkpointInterval Param for set checkpoint interval (>= 1) or disable checkpoint (-1).
 #' @param maxMemoryInMB Maximum memory in MB allocated to histogram aggregation.
 #' @param cacheNodeIds If FALSE, the algorithm will pass trees to executors to match instances with
-#'                     nodes.
+#'                     nodes. If TRUE, the algorithm will cache node IDs for each instance. Caching
+#'                     can speed up training of deeper trees. Users can set how often should the
+#'                     cache be checkpointed or disable it by setting checkpointInterval.
+#' @param probabilityCol column name for predicted class conditional probabilities, only for
+#'                       classification.
 #' @param ... additional arguments passed to the method.
 #' @aliases spark.randomForest,SparkDataFrame,formula-method
 #' @return \code{spark.randomForest} returns a fitted Random Forest model.
@@ -1703,9 +1730,9 @@ print.summary.KSTest <- function(x, ...) {
 setMethod("spark.randomForest", signature(data = "SparkDataFrame", formula = "formula"),
           function(data, formula, type = c("regression", "classification"),
                    maxDepth = 5, maxBins = 32, numTrees = 20, impurity = NULL,
-                   minInstancesPerNode = 1, minInfoGain = 0.0, checkpointInterval = 10,
                    featureSubsetStrategy = "auto", seed = NULL, subsamplingRate = 1.0,
-                   probabilityCol = "probability", maxMemoryInMB = 256, cacheNodeIds = FALSE) {
+                   minInstancesPerNode = 1, minInfoGain = 0.0, checkpointInterval = 10,
+                   maxMemoryInMB = 256, cacheNodeIds = FALSE, probabilityCol = "probability") {
             type <- match.arg(type)
             formula <- paste(deparse(formula), collapse = "")
             if (!is.null(seed)) {
@@ -1749,7 +1776,7 @@ setMethod("spark.randomForest", signature(data = "SparkDataFrame", formula = "fo
 #' @rdname spark.randomForest
 #' @aliases predict,RandomForestRegressionModel-method
 #' @export
-#' @note predict(randomForestRegressionModel) since 2.1.0
+#' @note predict(RandomForestRegressionModel) since 2.1.0
 setMethod("predict", signature(object = "RandomForestRegressionModel"),
           function(object, newData) {
             predict_internal(object, newData)
@@ -1758,7 +1785,7 @@ setMethod("predict", signature(object = "RandomForestRegressionModel"),
 #' @rdname spark.randomForest
 #' @aliases predict,RandomForestClassificationModel-method
 #' @export
-#' @note predict(randomForestClassificationModel) since 2.1.0
+#' @note predict(RandomForestClassificationModel) since 2.1.0
 setMethod("predict", signature(object = "RandomForestClassificationModel"),
           function(object, newData) {
             predict_internal(object, newData)
@@ -1789,8 +1816,8 @@ setMethod("write.ml", signature(object = "RandomForestClassificationModel", path
             write_internal(object, path, overwrite)
           })
 
-#  Get the summary of an RandomForestRegressionModel model
-summary.randomForest <- function(model) {
+# Create the summary of a tree ensemble model (eg. Random Forest, GBT)
+summary.treeEnsemble <- function(model) {
   jobj <- model@jobj
   formula <- callJMethod(jobj, "formula")
   numFeatures <- callJMethod(jobj, "numFeatures")
@@ -1807,20 +1834,23 @@ summary.randomForest <- function(model) {
        jobj = jobj)
 }
 
-#' @return \code{summary} returns the model's features as lists, depth and number of nodes
-#'                        or number of classes.
+#  Get the summary of a Random Forest Regression Model
+
+#' @return \code{summary} returns a summary object of the fitted model, a list of components
+#'         including formula, number of features, list of features, feature importances, number of
+#'         trees, and tree weights
 #' @rdname spark.randomForest
 #' @aliases summary,RandomForestRegressionModel-method
 #' @export
 #' @note summary(RandomForestRegressionModel) since 2.1.0
 setMethod("summary", signature(object = "RandomForestRegressionModel"),
           function(object) {
-            ans <- summary.randomForest(object)
+            ans <- summary.treeEnsemble(object)
             class(ans) <- "summary.RandomForestRegressionModel"
             ans
           })
 
-#  Get the summary of an RandomForestClassificationModel model
+#  Get the summary of a Random Forest Classification Model
 
 #' @rdname spark.randomForest
 #' @aliases summary,RandomForestClassificationModel-method
@@ -1828,13 +1858,13 @@ setMethod("summary", signature(object = "RandomForestRegressionModel"),
 #' @note summary(RandomForestClassificationModel) since 2.1.0
 setMethod("summary", signature(object = "RandomForestClassificationModel"),
           function(object) {
-            ans <- summary.randomForest(object)
+            ans <- summary.treeEnsemble(object)
             class(ans) <- "summary.RandomForestClassificationModel"
             ans
           })
 
-#  Prints the summary of Random Forest Regression Model
-print.summary.randomForest <- function(x) {
+#  Prints the summary of tree ensemble models (eg. Random Forest, GBT)
+print.summary.treeEnsemble <- function(x) {
   jobj <- x$jobj
   cat("Formula: ", x$formula)
   cat("\nNumber of features: ", x$numFeatures)
@@ -1848,13 +1878,15 @@ print.summary.randomForest <- function(x) {
   invisible(x)
 }
 
+#  Prints the summary of Random Forest Regression Model
+
 #' @param x summary object of Random Forest regression model or classification model
 #'          returned by \code{summary}.
 #' @rdname spark.randomForest
 #' @export
 #' @note print.summary.RandomForestRegressionModel since 2.1.0
 print.summary.RandomForestRegressionModel <- function(x, ...) {
-  print.summary.randomForest(x)
+  print.summary.treeEnsemble(x)
 }
 
 #  Prints the summary of Random Forest Classification Model
@@ -1863,5 +1895,214 @@ print.summary.RandomForestRegressionModel <- function(x, ...) {
 #' @export
 #' @note print.summary.RandomForestClassificationModel since 2.1.0
 print.summary.RandomForestClassificationModel <- function(x, ...) {
-  print.summary.randomForest(x)
+  print.summary.treeEnsemble(x)
+}
+
+#' Gradient Boosted Tree Model for Regression and Classification
+#'
+#' \code{spark.gbt} fits a Gradient Boosted Tree Regression model or Classification model on a
+#' SparkDataFrame. Users can call \code{summary} to get a summary of the fitted
+#' Gradient Boosted Tree model, \code{predict} to make predictions on new data, and
+#' \code{write.ml}/\code{read.ml} to save/load fitted models.
+#' For more details, see
+#' \href{http://spark.apache.org/docs/latest/ml-classification-regression.html#gradient-boosted-tree-regression}{
+#' GBT Regression} and
+#' \href{http://spark.apache.org/docs/latest/ml-classification-regression.html#gradient-boosted-tree-classifier}{
+#' GBT Classification}
+#'
+#' @param data a SparkDataFrame for training.
+#' @param formula a symbolic description of the model to be fitted. Currently only a few formula
+#'                operators are supported, including '~', ':', '+', and '-'.
+#' @param type type of model, one of "regression" or "classification", to fit
+#' @param maxDepth Maximum depth of the tree (>= 0).
+#' @param maxBins Maximum number of bins used for discretizing continuous features and for choosing
+#'                how to split on features at each node. More bins give higher granularity. Must be
+#'                >= 2 and >= number of categories in any categorical feature.
+#' @param maxIter Param for maximum number of iterations (>= 0).
+#' @param stepSize Param for Step size to be used for each iteration of optimization.
+#' @param lossType Loss function which GBT tries to minimize.
+#'                 For classification, must be "logistic". For regression, must be one of
+#'                 "squared" (L2) and "absolute" (L1), default is "squared".
+#' @param seed integer seed for random number generation.
+#' @param subsamplingRate Fraction of the training data used for learning each decision tree, in
+#'                        range (0, 1].
+#' @param minInstancesPerNode Minimum number of instances each child must have after split. If a
+#'                            split causes the left or right child to have fewer than
+#'                            minInstancesPerNode, the split will be discarded as invalid. Should be
+#'                            >= 1.
+#' @param minInfoGain Minimum information gain for a split to be considered at a tree node.
+#' @param checkpointInterval Param for set checkpoint interval (>= 1) or disable checkpoint (-1).
+#' @param maxMemoryInMB Maximum memory in MB allocated to histogram aggregation.
+#' @param cacheNodeIds If FALSE, the algorithm will pass trees to executors to match instances with
+#'                     nodes. If TRUE, the algorithm will cache node IDs for each instance. Caching
+#'                     can speed up training of deeper trees. Users can set how often should the
+#'                     cache be checkpointed or disable it by setting checkpointInterval.
+#' @param ... additional arguments passed to the method.
+#' @aliases spark.gbt,SparkDataFrame,formula-method
+#' @return \code{spark.gbt} returns a fitted Gradient Boosted Tree model.
+#' @rdname spark.gbt
+#' @name spark.gbt
+#' @export
+#' @examples
+#' \dontrun{
+#' # fit a Gradient Boosted Tree Regression Model
+#' df <- createDataFrame(longley)
+#' model <- spark.gbt(df, Employed ~ ., type = "regression", maxDepth = 5, maxBins = 16)
+#'
+#' # get the summary of the model
+#' summary(model)
+#'
+#' # make predictions
+#' predictions <- predict(model, df)
+#'
+#' # save and load the model
+#' path <- "path/to/model"
+#' write.ml(model, path)
+#' savedModel <- read.ml(path)
+#' summary(savedModel)
+#'
+#' # fit a Gradient Boosted Tree Classification Model
+#' # label must be binary - Only binary classification is supported for GBT.
+#' df <- createDataFrame(iris[iris$Species != "virginica", ])
+#' model <- spark.gbt(df, Species ~ Petal_Length + Petal_Width, "classification")
+#'
+#' # numeric label is also supported
+#' iris2 <- iris[iris$Species != "virginica", ]
+#' iris2$NumericSpecies <- ifelse(iris2$Species == "setosa", 0, 1)
+#' df <- createDataFrame(iris2)
+#' model <- spark.gbt(df, NumericSpecies ~ ., type = "classification")
+#' }
+#' @note spark.gbt since 2.1.0
+setMethod("spark.gbt", signature(data = "SparkDataFrame", formula = "formula"),
+          function(data, formula, type = c("regression", "classification"),
+                   maxDepth = 5, maxBins = 32, maxIter = 20, stepSize = 0.1, lossType = NULL,
+                   seed = NULL, subsamplingRate = 1.0, minInstancesPerNode = 1, minInfoGain = 0.0,
+                   checkpointInterval = 10, maxMemoryInMB = 256, cacheNodeIds = FALSE) {
+            type <- match.arg(type)
+            formula <- paste(deparse(formula), collapse = "")
+            if (!is.null(seed)) {
+              seed <- as.character(as.integer(seed))
+            }
+            switch(type,
+                   regression = {
+                     if (is.null(lossType)) lossType <- "squared"
+                     lossType <- match.arg(lossType, c("squared", "absolute"))
+                     jobj <- callJStatic("org.apache.spark.ml.r.GBTRegressorWrapper",
+                                         "fit", data@sdf, formula, as.integer(maxDepth),
+                                         as.integer(maxBins), as.integer(maxIter),
+                                         as.numeric(stepSize), as.integer(minInstancesPerNode),
+                                         as.numeric(minInfoGain), as.integer(checkpointInterval),
+                                         lossType, seed, as.numeric(subsamplingRate),
+                                         as.integer(maxMemoryInMB), as.logical(cacheNodeIds))
+                     new("GBTRegressionModel", jobj = jobj)
+                   },
+                   classification = {
+                     if (is.null(lossType)) lossType <- "logistic"
+                     lossType <- match.arg(lossType, "logistic")
+                     jobj <- callJStatic("org.apache.spark.ml.r.GBTClassifierWrapper",
+                                         "fit", data@sdf, formula, as.integer(maxDepth),
+                                         as.integer(maxBins), as.integer(maxIter),
+                                         as.numeric(stepSize), as.integer(minInstancesPerNode),
+                                         as.numeric(minInfoGain), as.integer(checkpointInterval),
+                                         lossType, seed, as.numeric(subsamplingRate),
+                                         as.integer(maxMemoryInMB), as.logical(cacheNodeIds))
+                     new("GBTClassificationModel", jobj = jobj)
+                   }
+            )
+          })
+
+# Makes predictions from a Gradient Boosted Tree Regression model or Classification model
+
+#' @param newData a SparkDataFrame for testing.
+#' @return \code{predict} returns a SparkDataFrame containing predicted labeled in a column named
+#' "prediction"
+#' @rdname spark.gbt
+#' @aliases predict,GBTRegressionModel-method
+#' @export
+#' @note predict(GBTRegressionModel) since 2.1.0
+setMethod("predict", signature(object = "GBTRegressionModel"),
+          function(object, newData) {
+            predict_internal(object, newData)
+          })
+
+#' @rdname spark.gbt
+#' @aliases predict,GBTClassificationModel-method
+#' @export
+#' @note predict(GBTClassificationModel) since 2.1.0
+setMethod("predict", signature(object = "GBTClassificationModel"),
+          function(object, newData) {
+            predict_internal(object, newData)
+          })
+
+# Save the Gradient Boosted Tree Regression or Classification model to the input path.
+
+#' @param object A fitted Gradient Boosted Tree regression model or classification model
+#' @param path The directory where the model is saved
+#' @param overwrite Overwrites or not if the output path already exists. Default is FALSE
+#'                  which means throw exception if the output path exists.
+#' @aliases write.ml,GBTRegressionModel,character-method
+#' @rdname spark.gbt
+#' @export
+#' @note write.ml(GBTRegressionModel, character) since 2.1.0
+setMethod("write.ml", signature(object = "GBTRegressionModel", path = "character"),
+          function(object, path, overwrite = FALSE) {
+            write_internal(object, path, overwrite)
+          })
+
+#' @aliases write.ml,GBTClassificationModel,character-method
+#' @rdname spark.gbt
+#' @export
+#' @note write.ml(GBTClassificationModel, character) since 2.1.0
+setMethod("write.ml", signature(object = "GBTClassificationModel", path = "character"),
+          function(object, path, overwrite = FALSE) {
+            write_internal(object, path, overwrite)
+          })
+
+#  Get the summary of a Gradient Boosted Tree Regression Model
+
+#' @return \code{summary} returns a summary object of the fitted model, a list of components
+#'         including formula, number of features, list of features, feature importances, number of
+#'         trees, and tree weights
+#' @rdname spark.gbt
+#' @aliases summary,GBTRegressionModel-method
+#' @export
+#' @note summary(GBTRegressionModel) since 2.1.0
+setMethod("summary", signature(object = "GBTRegressionModel"),
+          function(object) {
+            ans <- summary.treeEnsemble(object)
+            class(ans) <- "summary.GBTRegressionModel"
+            ans
+          })
+
+#  Get the summary of a Gradient Boosted Tree Classification Model
+
+#' @rdname spark.gbt
+#' @aliases summary,GBTClassificationModel-method
+#' @export
+#' @note summary(GBTClassificationModel) since 2.1.0
+setMethod("summary", signature(object = "GBTClassificationModel"),
+          function(object) {
+            ans <- summary.treeEnsemble(object)
+            class(ans) <- "summary.GBTClassificationModel"
+            ans
+          })
+
+#  Prints the summary of Gradient Boosted Tree Regression Model
+
+#' @param x summary object of Gradient Boosted Tree regression model or classification model
+#'          returned by \code{summary}.
+#' @rdname spark.gbt
+#' @export
+#' @note print.summary.GBTRegressionModel since 2.1.0
+print.summary.GBTRegressionModel <- function(x, ...) {
+  print.summary.treeEnsemble(x)
+}
+
+#  Prints the summary of Gradient Boosted Tree Classification Model
+
+#' @rdname spark.gbt
+#' @export
+#' @note print.summary.GBTClassificationModel since 2.1.0
+print.summary.GBTClassificationModel <- function(x, ...) {
+  print.summary.treeEnsemble(x)
 }
diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R
index 27c59f0b9624c..1e456ef5c6b16 100644
--- a/R/pkg/inst/tests/testthat/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -949,4 +949,72 @@ test_that("spark.randomForest Classification", {
   unlink(modelPath)
 })
 
+test_that("spark.gbt", {
+  # regression
+  data <- suppressWarnings(createDataFrame(longley))
+  model <- spark.gbt(data, Employed ~ ., "regression", maxDepth = 5, maxBins = 16, seed = 123)
+  predictions <- collect(predict(model, data))
+  expect_equal(predictions$prediction, c(60.323, 61.122, 60.171, 61.187,
+                                         63.221, 63.639, 64.989, 63.761,
+                                         66.019, 67.857, 68.169, 66.513,
+                                         68.655, 69.564, 69.331, 70.551),
+               tolerance = 1e-4)
+  stats <- summary(model)
+  expect_equal(stats$numTrees, 20)
+  expect_equal(stats$formula, "Employed ~ .")
+  expect_equal(stats$numFeatures, 6)
+  expect_equal(length(stats$treeWeights), 20)
+
+  modelPath <- tempfile(pattern = "spark-gbtRegression", fileext = ".tmp")
+  write.ml(model, modelPath)
+  expect_error(write.ml(model, modelPath))
+  write.ml(model, modelPath, overwrite = TRUE)
+  model2 <- read.ml(modelPath)
+  stats2 <- summary(model2)
+  expect_equal(stats$formula, stats2$formula)
+  expect_equal(stats$numFeatures, stats2$numFeatures)
+  expect_equal(stats$features, stats2$features)
+  expect_equal(stats$featureImportances, stats2$featureImportances)
+  expect_equal(stats$numTrees, stats2$numTrees)
+  expect_equal(stats$treeWeights, stats2$treeWeights)
+
+  unlink(modelPath)
+
+  # classification
+  # label must be binary - GBTClassifier currently only supports binary classification.
+  iris2 <- iris[iris$Species != "virginica", ]
+  data <- suppressWarnings(createDataFrame(iris2))
+  model <- spark.gbt(data, Species ~ Petal_Length + Petal_Width, "classification")
+  stats <- summary(model)
+  expect_equal(stats$numFeatures, 2)
+  expect_equal(stats$numTrees, 20)
+  expect_error(capture.output(stats), NA)
+  expect_true(length(capture.output(stats)) > 6)
+  predictions <- collect(predict(model, data))$prediction
+  # test string prediction values
+  expect_equal(length(grep("setosa", predictions)), 50)
+  expect_equal(length(grep("versicolor", predictions)), 50)
+
+  modelPath <- tempfile(pattern = "spark-gbtClassification", fileext = ".tmp")
+  write.ml(model, modelPath)
+  expect_error(write.ml(model, modelPath))
+  write.ml(model, modelPath, overwrite = TRUE)
+  model2 <- read.ml(modelPath)
+  stats2 <- summary(model2)
+  expect_equal(stats$depth, stats2$depth)
+  expect_equal(stats$numNodes, stats2$numNodes)
+  expect_equal(stats$numClasses, stats2$numClasses)
+
+  unlink(modelPath)
+
+  iris2$NumericSpecies <- ifelse(iris2$Species == "setosa", 0, 1)
+  df <- suppressWarnings(createDataFrame(iris2))
+  m <- spark.gbt(df, NumericSpecies ~ ., type = "classification")
+  s <- summary(m)
+  # test numeric prediction values
+  expect_equal(iris2$NumericSpecies, as.double(collect(predict(m, df))$prediction))
+  expect_equal(s$numFeatures, 5)
+  expect_equal(s$numTrees, 20)
+})
+
 sparkR.session.stop()
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/GBTClassificationWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/GBTClassificationWrapper.scala
new file mode 100644
index 0000000000000..8946025032200
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/GBTClassificationWrapper.scala
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.r
+
+import org.apache.hadoop.fs.Path
+import org.json4s._
+import org.json4s.JsonDSL._
+import org.json4s.jackson.JsonMethods._
+
+import org.apache.spark.ml.{Pipeline, PipelineModel}
+import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, NominalAttribute}
+import org.apache.spark.ml.classification.{GBTClassificationModel, GBTClassifier}
+import org.apache.spark.ml.feature.{IndexToString, RFormula}
+import org.apache.spark.ml.linalg.Vector
+import org.apache.spark.ml.util._
+import org.apache.spark.sql.{DataFrame, Dataset}
+
+private[r] class GBTClassifierWrapper private (
+  val pipeline: PipelineModel,
+  val formula: String,
+  val features: Array[String]) extends MLWritable {
+
+  import GBTClassifierWrapper._
+
+  private val gbtcModel: GBTClassificationModel =
+    pipeline.stages(1).asInstanceOf[GBTClassificationModel]
+
+  lazy val numFeatures: Int = gbtcModel.numFeatures
+  lazy val featureImportances: Vector = gbtcModel.featureImportances
+  lazy val numTrees: Int = gbtcModel.getNumTrees
+  lazy val treeWeights: Array[Double] = gbtcModel.treeWeights
+
+  def summary: String = gbtcModel.toDebugString
+
+  def transform(dataset: Dataset[_]): DataFrame = {
+    pipeline.transform(dataset)
+      .drop(PREDICTED_LABEL_INDEX_COL)
+      .drop(gbtcModel.getFeaturesCol)
+  }
+
+  override def write: MLWriter = new
+      GBTClassifierWrapper.GBTClassifierWrapperWriter(this)
+}
+
+private[r] object GBTClassifierWrapper extends MLReadable[GBTClassifierWrapper] {
+
+  val PREDICTED_LABEL_INDEX_COL = "pred_label_idx"
+  val PREDICTED_LABEL_COL = "prediction"
+
+  def fit(  // scalastyle:ignore
+      data: DataFrame,
+      formula: String,
+      maxDepth: Int,
+      maxBins: Int,
+      maxIter: Int,
+      stepSize: Double,
+      minInstancesPerNode: Int,
+      minInfoGain: Double,
+      checkpointInterval: Int,
+      lossType: String,
+      seed: String,
+      subsamplingRate: Double,
+      maxMemoryInMB: Int,
+      cacheNodeIds: Boolean): GBTClassifierWrapper = {
+
+    val rFormula = new RFormula()
+      .setFormula(formula)
+      .setForceIndexLabel(true)
+    RWrapperUtils.checkDataColumns(rFormula, data)
+    val rFormulaModel = rFormula.fit(data)
+
+    // get feature names from output schema
+    val schema = rFormulaModel.transform(data).schema
+    val featureAttrs = AttributeGroup.fromStructField(schema(rFormulaModel.getFeaturesCol))
+      .attributes.get
+    val features = featureAttrs.map(_.name.get)
+
+    // get label names from output schema
+    val labelAttr = Attribute.fromStructField(schema(rFormulaModel.getLabelCol))
+      .asInstanceOf[NominalAttribute]
+    val labels = labelAttr.values.get
+
+    // assemble and fit the pipeline
+    val rfc = new GBTClassifier()
+      .setMaxDepth(maxDepth)
+      .setMaxBins(maxBins)
+      .setMaxIter(maxIter)
+      .setStepSize(stepSize)
+      .setMinInstancesPerNode(minInstancesPerNode)
+      .setMinInfoGain(minInfoGain)
+      .setCheckpointInterval(checkpointInterval)
+      .setLossType(lossType)
+      .setSubsamplingRate(subsamplingRate)
+      .setMaxMemoryInMB(maxMemoryInMB)
+      .setCacheNodeIds(cacheNodeIds)
+      .setFeaturesCol(rFormula.getFeaturesCol)
+      .setPredictionCol(PREDICTED_LABEL_INDEX_COL)
+    if (seed != null && seed.length > 0) rfc.setSeed(seed.toLong)
+
+    val idxToStr = new IndexToString()
+      .setInputCol(PREDICTED_LABEL_INDEX_COL)
+      .setOutputCol(PREDICTED_LABEL_COL)
+      .setLabels(labels)
+
+    val pipeline = new Pipeline()
+      .setStages(Array(rFormulaModel, rfc, idxToStr))
+      .fit(data)
+
+    new GBTClassifierWrapper(pipeline, formula, features)
+  }
+
+  override def read: MLReader[GBTClassifierWrapper] = new GBTClassifierWrapperReader
+
+  override def load(path: String): GBTClassifierWrapper = super.load(path)
+
+  class GBTClassifierWrapperWriter(instance: GBTClassifierWrapper)
+    extends MLWriter {
+
+    override protected def saveImpl(path: String): Unit = {
+      val rMetadataPath = new Path(path, "rMetadata").toString
+      val pipelinePath = new Path(path, "pipeline").toString
+
+      val rMetadata = ("class" -> instance.getClass.getName) ~
+        ("formula" -> instance.formula) ~
+        ("features" -> instance.features.toSeq)
+      val rMetadataJson: String = compact(render(rMetadata))
+
+      sc.parallelize(Seq(rMetadataJson), 1).saveAsTextFile(rMetadataPath)
+      instance.pipeline.save(pipelinePath)
+    }
+  }
+
+  class GBTClassifierWrapperReader extends MLReader[GBTClassifierWrapper] {
+
+    override def load(path: String): GBTClassifierWrapper = {
+      implicit val format = DefaultFormats
+      val rMetadataPath = new Path(path, "rMetadata").toString
+      val pipelinePath = new Path(path, "pipeline").toString
+      val pipeline = PipelineModel.load(pipelinePath)
+
+      val rMetadataStr = sc.textFile(rMetadataPath, 1).first()
+      val rMetadata = parse(rMetadataStr)
+      val formula = (rMetadata \ "formula").extract[String]
+      val features = (rMetadata \ "features").extract[Array[String]]
+
+      new GBTClassifierWrapper(pipeline, formula, features)
+    }
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/GBTRegressionWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/GBTRegressionWrapper.scala
new file mode 100644
index 0000000000000..585077588eb9b
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/GBTRegressionWrapper.scala
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.r
+
+import org.apache.hadoop.fs.Path
+import org.json4s._
+import org.json4s.JsonDSL._
+import org.json4s.jackson.JsonMethods._
+
+import org.apache.spark.ml.{Pipeline, PipelineModel}
+import org.apache.spark.ml.attribute.AttributeGroup
+import org.apache.spark.ml.feature.RFormula
+import org.apache.spark.ml.linalg.Vector
+import org.apache.spark.ml.regression.{GBTRegressionModel, GBTRegressor}
+import org.apache.spark.ml.util._
+import org.apache.spark.sql.{DataFrame, Dataset}
+
+private[r] class GBTRegressorWrapper private (
+  val pipeline: PipelineModel,
+  val formula: String,
+  val features: Array[String]) extends MLWritable {
+
+  private val gbtrModel: GBTRegressionModel =
+    pipeline.stages(1).asInstanceOf[GBTRegressionModel]
+
+  lazy val numFeatures: Int = gbtrModel.numFeatures
+  lazy val featureImportances: Vector = gbtrModel.featureImportances
+  lazy val numTrees: Int = gbtrModel.getNumTrees
+  lazy val treeWeights: Array[Double] = gbtrModel.treeWeights
+
+  def summary: String = gbtrModel.toDebugString
+
+  def transform(dataset: Dataset[_]): DataFrame = {
+    pipeline.transform(dataset).drop(gbtrModel.getFeaturesCol)
+  }
+
+  override def write: MLWriter = new
+      GBTRegressorWrapper.GBTRegressorWrapperWriter(this)
+}
+
+private[r] object GBTRegressorWrapper extends MLReadable[GBTRegressorWrapper] {
+  def fit(  // scalastyle:ignore
+      data: DataFrame,
+      formula: String,
+      maxDepth: Int,
+      maxBins: Int,
+      maxIter: Int,
+      stepSize: Double,
+      minInstancesPerNode: Int,
+      minInfoGain: Double,
+      checkpointInterval: Int,
+      lossType: String,
+      seed: String,
+      subsamplingRate: Double,
+      maxMemoryInMB: Int,
+      cacheNodeIds: Boolean): GBTRegressorWrapper = {
+
+    val rFormula = new RFormula()
+      .setFormula(formula)
+    RWrapperUtils.checkDataColumns(rFormula, data)
+    val rFormulaModel = rFormula.fit(data)
+
+    // get feature names from output schema
+    val schema = rFormulaModel.transform(data).schema
+    val featureAttrs = AttributeGroup.fromStructField(schema(rFormulaModel.getFeaturesCol))
+      .attributes.get
+    val features = featureAttrs.map(_.name.get)
+
+    // assemble and fit the pipeline
+    val rfr = new GBTRegressor()
+      .setMaxDepth(maxDepth)
+      .setMaxBins(maxBins)
+      .setMaxIter(maxIter)
+      .setStepSize(stepSize)
+      .setMinInstancesPerNode(minInstancesPerNode)
+      .setMinInfoGain(minInfoGain)
+      .setCheckpointInterval(checkpointInterval)
+      .setLossType(lossType)
+      .setSubsamplingRate(subsamplingRate)
+      .setMaxMemoryInMB(maxMemoryInMB)
+      .setCacheNodeIds(cacheNodeIds)
+      .setFeaturesCol(rFormula.getFeaturesCol)
+    if (seed != null && seed.length > 0) rfr.setSeed(seed.toLong)
+
+    val pipeline = new Pipeline()
+      .setStages(Array(rFormulaModel, rfr))
+      .fit(data)
+
+    new GBTRegressorWrapper(pipeline, formula, features)
+  }
+
+  override def read: MLReader[GBTRegressorWrapper] = new GBTRegressorWrapperReader
+
+  override def load(path: String): GBTRegressorWrapper = super.load(path)
+
+  class GBTRegressorWrapperWriter(instance: GBTRegressorWrapper)
+    extends MLWriter {
+
+    override protected def saveImpl(path: String): Unit = {
+      val rMetadataPath = new Path(path, "rMetadata").toString
+      val pipelinePath = new Path(path, "pipeline").toString
+
+      val rMetadata = ("class" -> instance.getClass.getName) ~
+        ("formula" -> instance.formula) ~
+        ("features" -> instance.features.toSeq)
+      val rMetadataJson: String = compact(render(rMetadata))
+
+      sc.parallelize(Seq(rMetadataJson), 1).saveAsTextFile(rMetadataPath)
+      instance.pipeline.save(pipelinePath)
+    }
+  }
+
+  class GBTRegressorWrapperReader extends MLReader[GBTRegressorWrapper] {
+
+    override def load(path: String): GBTRegressorWrapper = {
+      implicit val format = DefaultFormats
+      val rMetadataPath = new Path(path, "rMetadata").toString
+      val pipelinePath = new Path(path, "pipeline").toString
+      val pipeline = PipelineModel.load(pipelinePath)
+
+      val rMetadataStr = sc.textFile(rMetadataPath, 1).first()
+      val rMetadata = parse(rMetadataStr)
+      val formula = (rMetadata \ "formula").extract[String]
+      val features = (rMetadata \ "features").extract[Array[String]]
+
+      new GBTRegressorWrapper(pipeline, formula, features)
+    }
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/RWrappers.scala b/mllib/src/main/scala/org/apache/spark/ml/r/RWrappers.scala
index 0e09e18027ca7..b59fe292349bf 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/RWrappers.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/RWrappers.scala
@@ -60,6 +60,10 @@ private[r] object RWrappers extends MLReader[Object] {
         RandomForestRegressorWrapper.load(path)
       case "org.apache.spark.ml.r.RandomForestClassifierWrapper" =>
         RandomForestClassifierWrapper.load(path)
+      case "org.apache.spark.ml.r.GBTRegressorWrapper" =>
+        GBTRegressorWrapper.load(path)
+      case "org.apache.spark.ml.r.GBTClassifierWrapper" =>
+        GBTClassifierWrapper.load(path)
       case _ =>
         throw new SparkException(s"SparkR read.ml does not support load $className")
     }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestClassificationWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestClassificationWrapper.scala
index b0088ddaf3b1d..6947ba7e7597a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestClassificationWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestClassificationWrapper.scala
@@ -35,18 +35,18 @@ private[r] class RandomForestClassifierWrapper private (
   val formula: String,
   val features: Array[String]) extends MLWritable {
 
-  private val DTModel: RandomForestClassificationModel =
+  private val rfcModel: RandomForestClassificationModel =
     pipeline.stages(1).asInstanceOf[RandomForestClassificationModel]
 
-  lazy val numFeatures: Int = DTModel.numFeatures
-  lazy val featureImportances: Vector = DTModel.featureImportances
-  lazy val numTrees: Int = DTModel.getNumTrees
-  lazy val treeWeights: Array[Double] = DTModel.treeWeights
+  lazy val numFeatures: Int = rfcModel.numFeatures
+  lazy val featureImportances: Vector = rfcModel.featureImportances
+  lazy val numTrees: Int = rfcModel.getNumTrees
+  lazy val treeWeights: Array[Double] = rfcModel.treeWeights
 
-  def summary: String = DTModel.toDebugString
+  def summary: String = rfcModel.toDebugString
 
   def transform(dataset: Dataset[_]): DataFrame = {
-    pipeline.transform(dataset).drop(DTModel.getFeaturesCol)
+    pipeline.transform(dataset).drop(rfcModel.getFeaturesCol)
   }
 
   override def write: MLWriter = new
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestRegressionWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestRegressionWrapper.scala
index c8874407fa75e..4b9a3a731da9b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestRegressionWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestRegressionWrapper.scala
@@ -35,18 +35,18 @@ private[r] class RandomForestRegressorWrapper private (
   val formula: String,
   val features: Array[String]) extends MLWritable {
 
-  private val DTModel: RandomForestRegressionModel =
+  private val rfrModel: RandomForestRegressionModel =
     pipeline.stages(1).asInstanceOf[RandomForestRegressionModel]
 
-  lazy val numFeatures: Int = DTModel.numFeatures
-  lazy val featureImportances: Vector = DTModel.featureImportances
-  lazy val numTrees: Int = DTModel.getNumTrees
-  lazy val treeWeights: Array[Double] = DTModel.treeWeights
+  lazy val numFeatures: Int = rfrModel.numFeatures
+  lazy val featureImportances: Vector = rfrModel.featureImportances
+  lazy val numTrees: Int = rfrModel.getNumTrees
+  lazy val treeWeights: Array[Double] = rfrModel.treeWeights
 
-  def summary: String = DTModel.toDebugString
+  def summary: String = rfrModel.toDebugString
 
   def transform(dataset: Dataset[_]): DataFrame = {
-    pipeline.transform(dataset).drop(DTModel.getFeaturesCol)
+    pipeline.transform(dataset).drop(rfrModel.getFeaturesCol)
   }
 
   override def write: MLWriter = new
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 9233d2e7e1a77..0bc319ca4d601 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -828,7 +828,7 @@ def featureImportances(self):
 @inherit_doc
 class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasSeed,
                             RandomForestParams, TreeRegressorParams, HasCheckpointInterval,
-                            JavaMLWritable, JavaMLReadable, HasVarianceCol):
+                            JavaMLWritable, JavaMLReadable):
     """
     `Random Forest <http://en.wikipedia.org/wiki/Random_forest>`_
     learning algorithm for regression.
@@ -876,13 +876,13 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
                  impurity="variance", subsamplingRate=1.0, seed=None, numTrees=20,
-                 featureSubsetStrategy="auto", varianceCol=None):
+                 featureSubsetStrategy="auto"):
         """
         __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
                  impurity="variance", subsamplingRate=1.0, seed=None, numTrees=20, \
-                 featureSubsetStrategy="auto", varianceCol=None)
+                 featureSubsetStrategy="auto")
         """
         super(RandomForestRegressor, self).__init__()
         self._java_obj = self._new_java_obj(
@@ -900,13 +900,13 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
                   impurity="variance", subsamplingRate=1.0, seed=None, numTrees=20,
-                  featureSubsetStrategy="auto", varianceCol=None):
+                  featureSubsetStrategy="auto"):
         """
         setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
                   impurity="variance", subsamplingRate=1.0, seed=None, numTrees=20, \
-                  featureSubsetStrategy="auto", varianceCol=None)
+                  featureSubsetStrategy="auto")
         Sets params for linear regression.
         """
         kwargs = self.setParams._input_kwargs

From 0dc14f12917626a5d7f0c9a21e4edd0b63587470 Mon Sep 17 00:00:00 2001
From: Eric Liang <ekl@databricks.com>
Date: Wed, 9 Nov 2016 15:00:46 +0800
Subject: [PATCH 0072/1204] [SPARK-18333][SQL] Revert hacks in parquet and orc
 reader to support case insensitive resolution

## What changes were proposed in this pull request?

These are no longer needed after https://issues.apache.org/jira/browse/SPARK-17183

cc cloud-fan

## How was this patch tested?

Existing parquet and orc tests.

Author: Eric Liang <ekl@databricks.com>

Closes #15799 from ericl/sc-4929.

(cherry picked from commit 4afa39e223c70e91b6ee19e9ea76fa9115203d74)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../parquet/ParquetReadSupport.scala          |  6 +---
 .../parquet/ParquetSchemaSuite.scala          | 28 -------------------
 .../spark/sql/hive/orc/OrcFileFormat.scala    | 12 +-------
 3 files changed, 2 insertions(+), 44 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
index 4dea8cf29ec58..f1a35dd8a6200 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
@@ -269,15 +269,11 @@ private[parquet] object ParquetReadSupport {
    */
   private def clipParquetGroupFields(
       parquetRecord: GroupType, structType: StructType): Seq[Type] = {
-    val parquetFieldMap = parquetRecord.getFields.asScala
-      .map(f => f.getName -> f).toMap
-    val caseInsensitiveParquetFieldMap = parquetRecord.getFields.asScala
-      .map(f => f.getName.toLowerCase -> f).toMap
+    val parquetFieldMap = parquetRecord.getFields.asScala.map(f => f.getName -> f).toMap
     val toParquet = new ParquetSchemaConverter(writeLegacyParquetFormat = false)
     structType.map { f =>
       parquetFieldMap
         .get(f.name)
-        .orElse(caseInsensitiveParquetFieldMap.get(f.name.toLowerCase))
         .map(clipParquetType(_, f.dataType))
         .getOrElse(toParquet.convertField(f))
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
index c3d202ced24c8..8a980a7eb538f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
@@ -1080,34 +1080,6 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
     }
   }
 
-  testSchemaClipping(
-    "falls back to case insensitive resolution",
-
-    parquetSchema =
-      """message root {
-        |  required group A {
-        |    optional int32 B;
-        |  }
-        |  optional int32 c;
-        |}
-      """.stripMargin,
-
-    catalystSchema = {
-      val nestedType = new StructType().add("b", IntegerType, nullable = true)
-      new StructType()
-        .add("a", nestedType, nullable = true)
-        .add("c", IntegerType, nullable = true)
-    },
-
-    expectedSchema =
-      """message root {
-        |  required group A {
-        |    optional int32 B;
-        |  }
-        |  optional int32 c;
-        |}
-      """.stripMargin)
-
   testSchemaClipping(
     "simple nested struct",
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
index 7c519a074317a..42c92ed5cae26 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
@@ -305,17 +305,7 @@ private[orc] object OrcRelation extends HiveInspectors {
 
   def setRequiredColumns(
       conf: Configuration, physicalSchema: StructType, requestedSchema: StructType): Unit = {
-    val caseInsensitiveFieldMap: Map[String, Int] = physicalSchema.fieldNames
-      .zipWithIndex
-      .map(f => (f._1.toLowerCase, f._2))
-      .toMap
-    val ids = requestedSchema.map { a =>
-      val exactMatch: Option[Int] = physicalSchema.getFieldIndex(a.name)
-      val res = exactMatch.getOrElse(
-        caseInsensitiveFieldMap.getOrElse(a.name,
-          throw new IllegalArgumentException(s"""Field "$a.name" does not exist.""")))
-      res: Integer
-    }
+    val ids = requestedSchema.map(a => physicalSchema.fieldIndex(a.name): Integer)
     val (sortedIDs, sortedNames) = ids.zip(requestedSchema.fieldNames).sorted.unzip
     HiveShim.appendReadColumns(conf, sortedIDs, sortedNames)
   }

From f672083693c2c4dfea6dc43c024993d4561b1e79 Mon Sep 17 00:00:00 2001
From: Ryan Blue <blue@apache.org>
Date: Tue, 8 Nov 2016 23:47:48 -0800
Subject: [PATCH 0073/1204] [SPARK-18368] Fix regexp_replace with task
 serialization.

## What changes were proposed in this pull request?

This makes the result value both transient and lazy, so that if the RegExpReplace object is initialized then serialized, `result: StringBuffer` will be correctly initialized.

## How was this patch tested?

* Verified that this patch fixed the query that found the bug.
* Added a test case that fails without the fix.

Author: Ryan Blue <blue@apache.org>

Closes #15816 from rdblue/SPARK-18368-fix-regexp-replace.

(cherry picked from commit b9192bb3ffc319ebee7dbd15c24656795e454749)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../catalyst/expressions/regexpExpressions.scala  |  2 +-
 .../expressions/ExpressionEvalHelper.scala        | 15 +++++++++------
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index 5648ad6b6dc18..4896a6225aa80 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -230,7 +230,7 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
   @transient private var lastReplacement: String = _
   @transient private var lastReplacementInUTF8: UTF8String = _
   // result buffer write by Matcher
-  @transient private val result: StringBuffer = new StringBuffer
+  @transient private lazy val result: StringBuffer = new StringBuffer
 
   override def nullSafeEval(s: Any, p: Any, r: Any): Any = {
     if (!p.equals(lastRegex)) {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
index 9ceb709185417..f83650424a964 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
@@ -22,7 +22,8 @@ import org.scalactic.TripleEqualsSupport.Spread
 import org.scalatest.exceptions.TestFailedException
 import org.scalatest.prop.GeneratorDrivenPropertyChecks
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.serializer.JavaSerializer
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.optimizer.SimpleTestOptimizer
@@ -43,13 +44,15 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks {
 
   protected def checkEvaluation(
       expression: => Expression, expected: Any, inputRow: InternalRow = EmptyRow): Unit = {
+    val serializer = new JavaSerializer(new SparkConf()).newInstance
+    val expr: Expression = serializer.deserialize(serializer.serialize(expression))
     val catalystValue = CatalystTypeConverters.convertToCatalyst(expected)
-    checkEvaluationWithoutCodegen(expression, catalystValue, inputRow)
-    checkEvaluationWithGeneratedMutableProjection(expression, catalystValue, inputRow)
-    if (GenerateUnsafeProjection.canSupport(expression.dataType)) {
-      checkEvalutionWithUnsafeProjection(expression, catalystValue, inputRow)
+    checkEvaluationWithoutCodegen(expr, catalystValue, inputRow)
+    checkEvaluationWithGeneratedMutableProjection(expr, catalystValue, inputRow)
+    if (GenerateUnsafeProjection.canSupport(expr.dataType)) {
+      checkEvalutionWithUnsafeProjection(expr, catalystValue, inputRow)
     }
-    checkEvaluationWithOptimization(expression, catalystValue, inputRow)
+    checkEvaluationWithOptimization(expr, catalystValue, inputRow)
   }
 
   /**

From b89c38b2ee2c418ad2de4f2fc70ad9f81eac3240 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Wed, 9 Nov 2016 00:11:48 -0800
Subject: [PATCH 0074/1204] [SPARK-17659][SQL] Partitioned View is Not
 Supported By SHOW CREATE TABLE

### What changes were proposed in this pull request?

`Partitioned View` is not supported by SPARK SQL. For Hive partitioned view, SHOW CREATE TABLE is unable to generate the right DDL. Thus, SHOW CREATE TABLE should not support it like the other Hive-only features. This PR is to issue an exception when detecting the view is a partitioned view.
### How was this patch tested?

Added a test case

Author: gatorsmile <gatorsmile@gmail.com>

Closes #15233 from gatorsmile/partitionedView.

(cherry picked from commit e256392a128c8fffa8abb86ab99224ae09b0e1ff)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../spark/sql/execution/command/tables.scala  |  2 +-
 .../sql/hive/client/HiveClientImpl.scala      |  4 +++
 .../spark/sql/hive/ShowCreateTableSuite.scala | 28 +++++++++++++++++++
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 3a856fa0f5699..e49a1f5acd0c9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -780,7 +780,7 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman
   private def showCreateHiveTable(metadata: CatalogTable): String = {
     def reportUnsupportedError(features: Seq[String]): Unit = {
       throw new AnalysisException(
-        s"Failed to execute SHOW CREATE TABLE against table ${metadata.identifier.quotedString}, " +
+        s"Failed to execute SHOW CREATE TABLE against table/view ${metadata.identifier}, " +
           "which is created by Hive and uses the following unsupported feature(s)\n" +
           features.map(" - " + _).mkString("\n")
       )
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 84873bbbb81ce..2bf9a26b0b7fc 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -377,6 +377,10 @@ private[hive] class HiveClientImpl(
         unsupportedFeatures += "bucketing"
       }
 
+      if (h.getTableType == HiveTableType.VIRTUAL_VIEW && partCols.nonEmpty) {
+        unsupportedFeatures += "partitioned view"
+      }
+
       val properties = Option(h.getParameters).map(_.asScala.toMap).orNull
 
       CatalogTable(
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala
index e925921165d6a..68df80943430a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala
@@ -265,6 +265,34 @@ class ShowCreateTableSuite extends QueryTest with SQLTestUtils with TestHiveSing
     }
   }
 
+  test("hive partitioned view is not supported") {
+    withTable("t1") {
+      withView("v1") {
+        sql(
+          s"""
+             |CREATE TABLE t1 (c1 INT, c2 STRING)
+             |PARTITIONED BY (
+             |  p1 BIGINT COMMENT 'bla',
+             |  p2 STRING )
+           """.stripMargin)
+
+        createRawHiveTable(
+          s"""
+             |CREATE VIEW v1
+             |PARTITIONED ON (p1, p2)
+             |AS SELECT * from t1
+           """.stripMargin
+        )
+
+        val cause = intercept[AnalysisException] {
+          sql("SHOW CREATE TABLE v1")
+        }
+
+        assert(cause.getMessage.contains(" - partitioned view"))
+      }
+    }
+  }
+
   private def createRawHiveTable(ddl: String): Unit = {
     hiveContext.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client.runSqlHive(ddl)
   }

From ac441d1738efb008a607e3f852fff3744007fc1d Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Wed, 9 Nov 2016 17:48:16 +0000
Subject: [PATCH 0075/1204] [SPARK-18292][SQL] LogicalPlanToSQLSuite should not
 use resource dependent path for golden file generation

## What changes were proposed in this pull request?

`LogicalPlanToSQLSuite` uses the following command to update the existing answer files.

```bash
SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "hive/test-only *LogicalPlanToSQLSuite"
```

However, after introducing `getTestResourcePath`, it fails to update the previous golden answer files in the predefined directory. This issue aims to fix that.

## How was this patch tested?

It's a testsuite update. Manual.

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #15789 from dongjoon-hyun/SPARK-18292.

(cherry picked from commit 02c5325b8ff75bf2e5bcb66e0482298ab408b091)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../spark/sql/catalyst/LogicalPlanToSQLSuite.scala     | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala
index 12d18dc87ceb4..8696337b9dc8a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala
@@ -46,7 +46,15 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils {
 
   // Used for generating new query answer files by saving
   private val regenerateGoldenFiles: Boolean = System.getenv("SPARK_GENERATE_GOLDEN_FILES") == "1"
-  private val goldenSQLPath = getTestResourcePath("sqlgen")
+  private val goldenSQLPath = {
+    // If regenerateGoldenFiles is true, we must be running this in SBT and we use hard-coded
+    // relative path. Otherwise, we use classloader's getResource to find the location.
+    if (regenerateGoldenFiles) {
+      java.nio.file.Paths.get("src", "test", "resources", "sqlgen").toFile.getCanonicalPath
+    } else {
+      getTestResourcePath("sqlgen")
+    }
+  }
 
   protected override def beforeAll(): Unit = {
     super.beforeAll()

From 5bd31dc9d4cb7423c2d9c11260386665057656d3 Mon Sep 17 00:00:00 2001
From: Vinayak <vijoshi5@in.ibm.com>
Date: Wed, 9 Nov 2016 10:40:14 -0800
Subject: [PATCH 0076/1204] [SPARK-16808][CORE] History Server main page does
 not honor APPLICATION_WEB_PROXY_BASE

## What changes were proposed in this pull request?

Application links generated on the history server UI no longer (regression from 1.6) contain the configured spark.ui.proxyBase in the links. To address this, made the uiRoot available globally to all javascripts for Web UI. Updated the mustache template (historypage-template.html) to include the uiroot for rendering links to the applications.

The existing test was not sufficient to verify the scenario where ajax call is used to populate the application listing template, so added a new selenium test case to cover this scenario.

## How was this patch tested?

Existing tests and a new unit test.
No visual changes to the UI.

Author: Vinayak <vijoshi5@in.ibm.com>

Closes #15742 from vijoshi/SPARK-16808_master.

(cherry picked from commit 06a13ecca728e431c66fafb333b3bcff808e1afd)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/ui/static/historypage-template.html |  6 +-
 .../org/apache/spark/ui/static/historypage.js |  6 +-
 .../org/apache/spark/ui/static/webui.js       |  6 ++
 .../scala/org/apache/spark/ui/UIUtils.scala   |  1 +
 .../deploy/history/HistoryServerSuite.scala   | 80 ++++++++++++++++++-
 5 files changed, 92 insertions(+), 7 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html b/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html
index 1fd6ef4a71253..42e2d9abdeb5e 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html
+++ b/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html
@@ -68,16 +68,16 @@
   <tbody>
   {{#applications}}
     <tr>
-      <td class="rowGroupColumn"><span title="{{id}}"><a href="/history/{{id}}/{{num}}/jobs/">{{id}}</a></span></td>
+      <td class="rowGroupColumn"><span title="{{id}}"><a href="{{uiroot}}/history/{{id}}/{{num}}/jobs/">{{id}}</a></span></td>
       <td class="rowGroupColumn">{{name}}</td>
       {{#attempts}}
-      <td class="attemptIDSpan"><a href="/history/{{id}}/{{attemptId}}/jobs/">{{attemptId}}</a></td>
+      <td class="attemptIDSpan"><a href="{{uiroot}}/history/{{id}}/{{attemptId}}/jobs/">{{attemptId}}</a></td>
       <td>{{startTime}}</td>
       <td>{{endTime}}</td>
       <td><span title="{{duration}}" class="durationClass">{{duration}}</span></td>
       <td>{{sparkUser}}</td>
       <td>{{lastUpdated}}</td>
-      <td><a href="/api/v1/applications/{{id}}/{{num}}/logs" class="btn btn-info btn-mini">Download</a></td>
+      <td><a href="{{uiroot}}/api/v1/applications/{{id}}/{{num}}/logs" class="btn btn-info btn-mini">Download</a></td>
       {{/attempts}}
     </tr>
   {{/applications}}
diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage.js b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
index 2a32e18672a22..6c0ec8d5fce54 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/historypage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
@@ -119,7 +119,11 @@ $(document).ready(function() {
         }
       }
 
-      var data = {"applications": array}
+      var data = {
+        "uiroot": uiRoot,
+        "applications": array
+        }
+
       $.get("static/historypage-template.html", function(template) {
         historySummary.append(Mustache.render($(template).filter("#history-summary-template").html(),data));
         var selector = "#history-summary-table";
diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui.js b/core/src/main/resources/org/apache/spark/ui/static/webui.js
index e37307aa1f705..0fa1fcf25f8b9 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/webui.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/webui.js
@@ -15,6 +15,12 @@
  * limitations under the License.
  */
 
+var uiRoot = "";
+
+function setUIRoot(val) {
+    uiRoot = val;
+}
+
 function collapseTablePageLoad(name, table){
   if (window.localStorage.getItem(name) == "true") {
     // Set it to false so that the click function can revert it
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index 66b097aa8166d..57f6f2f0a9be5 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -171,6 +171,7 @@ private[spark] object UIUtils extends Logging {
     <script src={prependBaseUri("/static/timeline-view.js")}></script>
     <script src={prependBaseUri("/static/log-view.js")}></script>
     <script src={prependBaseUri("/static/webui.js")}></script>
+    <script>setUIRoot('{UIUtils.uiRoot}')</script>
   }
 
   def vizHeaderNodes: Seq[Node] = {
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
index a595bc174a310..715811a46f42d 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -29,6 +29,8 @@ import com.codahale.metrics.Counter
 import com.google.common.io.{ByteStreams, Files}
 import org.apache.commons.io.{FileUtils, IOUtils}
 import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
+import org.eclipse.jetty.proxy.ProxyServlet
+import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder}
 import org.json4s.JsonAST._
 import org.json4s.jackson.JsonMethods
 import org.json4s.jackson.JsonMethods._
@@ -258,8 +260,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
     getContentAndCode("foobar")._1 should be (HttpServletResponse.SC_NOT_FOUND)
   }
 
-  test("relative links are prefixed with uiRoot (spark.ui.proxyBase)") {
-    val proxyBaseBeforeTest = System.getProperty("spark.ui.proxyBase")
+  test("static relative links are prefixed with uiRoot (spark.ui.proxyBase)") {
     val uiRoot = Option(System.getenv("APPLICATION_WEB_PROXY_BASE")).getOrElse("/testwebproxybase")
     val page = new HistoryPage(server)
     val request = mock[HttpServletRequest]
@@ -267,7 +268,6 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
     // when
     System.setProperty("spark.ui.proxyBase", uiRoot)
     val response = page.render(request)
-    System.setProperty("spark.ui.proxyBase", Option(proxyBaseBeforeTest).getOrElse(""))
 
     // then
     val urls = response \\ "@href" map (_.toString)
@@ -275,6 +275,80 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
     all (siteRelativeLinks) should startWith (uiRoot)
   }
 
+  test("ajax rendered relative links are prefixed with uiRoot (spark.ui.proxyBase)") {
+    val uiRoot = "/testwebproxybase"
+    System.setProperty("spark.ui.proxyBase", uiRoot)
+
+    server.stop()
+
+    val conf = new SparkConf()
+      .set("spark.history.fs.logDirectory", logDir)
+      .set("spark.history.fs.update.interval", "0")
+      .set("spark.testing", "true")
+
+    provider = new FsHistoryProvider(conf)
+    provider.checkForLogs()
+    val securityManager = new SecurityManager(conf)
+
+    server = new HistoryServer(conf, provider, securityManager, 18080)
+    server.initialize()
+    server.bind()
+
+    val port = server.boundPort
+
+    val servlet = new ProxyServlet {
+      override def rewriteTarget(request: HttpServletRequest): String = {
+        // servlet acts like a proxy that redirects calls made on
+        // spark.ui.proxyBase context path to the normal servlet handlers operating off "/"
+        val sb = request.getRequestURL()
+
+        if (request.getQueryString() != null) {
+          sb.append(s"?${request.getQueryString()}")
+        }
+
+        val proxyidx = sb.indexOf(uiRoot)
+        sb.delete(proxyidx, proxyidx + uiRoot.length).toString
+      }
+    }
+
+    val contextHandler = new ServletContextHandler
+    val holder = new ServletHolder(servlet)
+    contextHandler.setContextPath(uiRoot)
+    contextHandler.addServlet(holder, "/")
+    server.attachHandler(contextHandler)
+
+    implicit val webDriver: WebDriver = new HtmlUnitDriver(true) {
+      getWebClient.getOptions.setThrowExceptionOnScriptError(false)
+    }
+
+    try {
+      val url = s"http://localhost:$port"
+
+      go to s"$url$uiRoot"
+
+      // expect the ajax call to finish in 5 seconds
+      implicitlyWait(org.scalatest.time.Span(5, org.scalatest.time.Seconds))
+
+      // once this findAll call returns, we know the ajax load of the table completed
+      findAll(ClassNameQuery("odd"))
+
+      val links = findAll(TagNameQuery("a"))
+        .map(_.attribute("href"))
+        .filter(_.isDefined)
+        .map(_.get)
+        .filter(_.startsWith(url)).toList
+
+      // there are atleast some URL links that were generated via javascript,
+      // and they all contain the spark.ui.proxyBase (uiRoot)
+      links.length should be > 4
+      all(links) should startWith(url + uiRoot)
+    } finally {
+      contextHandler.stop()
+      quit()
+    }
+
+  }
+
   test("incomplete apps get refreshed") {
 
     implicit val webDriver: WebDriver = new HtmlUnitDriver

From 626f6d6d4f297fd67cfec017a790d79ddad41d70 Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Wed, 9 Nov 2016 10:47:29 -0800
Subject: [PATCH 0077/1204] Revert "[SPARK-18368] Fix regexp_replace with task
 serialization."

This reverts commit b9192bb3ffc319ebee7dbd15c24656795e454749.
---
 .../catalyst/expressions/regexpExpressions.scala  |  2 +-
 .../expressions/ExpressionEvalHelper.scala        | 15 ++++++---------
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index 4896a6225aa80..5648ad6b6dc18 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -230,7 +230,7 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
   @transient private var lastReplacement: String = _
   @transient private var lastReplacementInUTF8: UTF8String = _
   // result buffer write by Matcher
-  @transient private lazy val result: StringBuffer = new StringBuffer
+  @transient private val result: StringBuffer = new StringBuffer
 
   override def nullSafeEval(s: Any, p: Any, r: Any): Any = {
     if (!p.equals(lastRegex)) {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
index f83650424a964..9ceb709185417 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
@@ -22,8 +22,7 @@ import org.scalactic.TripleEqualsSupport.Spread
 import org.scalatest.exceptions.TestFailedException
 import org.scalatest.prop.GeneratorDrivenPropertyChecks
 
-import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.serializer.JavaSerializer
+import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.optimizer.SimpleTestOptimizer
@@ -44,15 +43,13 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks {
 
   protected def checkEvaluation(
       expression: => Expression, expected: Any, inputRow: InternalRow = EmptyRow): Unit = {
-    val serializer = new JavaSerializer(new SparkConf()).newInstance
-    val expr: Expression = serializer.deserialize(serializer.serialize(expression))
     val catalystValue = CatalystTypeConverters.convertToCatalyst(expected)
-    checkEvaluationWithoutCodegen(expr, catalystValue, inputRow)
-    checkEvaluationWithGeneratedMutableProjection(expr, catalystValue, inputRow)
-    if (GenerateUnsafeProjection.canSupport(expr.dataType)) {
-      checkEvalutionWithUnsafeProjection(expr, catalystValue, inputRow)
+    checkEvaluationWithoutCodegen(expression, catalystValue, inputRow)
+    checkEvaluationWithGeneratedMutableProjection(expression, catalystValue, inputRow)
+    if (GenerateUnsafeProjection.canSupport(expression.dataType)) {
+      checkEvalutionWithUnsafeProjection(expression, catalystValue, inputRow)
     }
-    checkEvaluationWithOptimization(expr, catalystValue, inputRow)
+    checkEvaluationWithOptimization(expression, catalystValue, inputRow)
   }
 
   /**

From 80f58510a7a3e039eecf875f02a115c0fd166f55 Mon Sep 17 00:00:00 2001
From: Ryan Blue <blue@apache.org>
Date: Wed, 9 Nov 2016 11:00:53 -0800
Subject: [PATCH 0078/1204] [SPARK-18368][SQL] Fix regexp replace when
 serialized

## What changes were proposed in this pull request?

This makes the result value both transient and lazy, so that if the RegExpReplace object is initialized then serialized, `result: StringBuffer` will be correctly initialized.

## How was this patch tested?

* Verified that this patch fixed the query that found the bug.
* Added a test case that fails without the fix.

Author: Ryan Blue <blue@apache.org>

Closes #15834 from rdblue/SPARK-18368-fix-regexp-replace.

(cherry picked from commit d4028de97687385fa1d1eb6301eb544c0ea4a135)
Signed-off-by: Yin Huai <yhuai@databricks.com>
---
 .../catalyst/expressions/regexpExpressions.scala |  2 +-
 .../expressions/RegexpExpressionsSuite.scala     | 16 +++++++++++++++-
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index 5648ad6b6dc18..4896a6225aa80 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -230,7 +230,7 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
   @transient private var lastReplacement: String = _
   @transient private var lastReplacementInUTF8: UTF8String = _
   // result buffer write by Matcher
-  @transient private val result: StringBuffer = new StringBuffer
+  @transient private lazy val result: StringBuffer = new StringBuffer
 
   override def nullSafeEval(s: Any, p: Any, r: Any): Any = {
     if (!p.equals(lastRegex)) {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
index 5299549e7b4da..d0d1aaa9d299d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
@@ -17,7 +17,8 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.serializer.JavaSerializer
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.types.StringType
 
@@ -191,4 +192,17 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(StringSplit(s1, s2), null, row3)
   }
 
+  test("RegExpReplace serialization") {
+    val serializer = new JavaSerializer(new SparkConf()).newInstance
+
+    val row = create_row("abc", "b", "")
+
+    val s = 's.string.at(0)
+    val p = 'p.string.at(1)
+    val r = 'r.string.at(2)
+
+    val expr: RegExpReplace = serializer.deserialize(serializer.serialize(RegExpReplace(s, p, r)))
+    checkEvaluation(expr, "ac", row)
+  }
+
 }

From 4424c901e82ed4992d5568cbc5a5f524b88dc5eb Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@databricks.com>
Date: Wed, 9 Nov 2016 12:26:09 -0800
Subject: [PATCH 0079/1204] [SPARK-18370][SQL] Add table information to
 InsertIntoHadoopFsRelationCommand

## What changes were proposed in this pull request?
`InsertIntoHadoopFsRelationCommand` does not keep track if it inserts into a table and what table it inserts to. This can make debugging these statements problematic. This PR adds table information the `InsertIntoHadoopFsRelationCommand`. Explaining this SQL command `insert into prq select * from range(0, 100000)` now yields the following executed plan:
```
== Physical Plan ==
ExecutedCommand
   +- InsertIntoHadoopFsRelationCommand file:/dev/assembly/spark-warehouse/prq, ParquetFormat, <function1>, Map(serialization.format -> 1, path -> file:/dev/assembly/spark-warehouse/prq), Append, CatalogTable(
	Table: `default`.`prq`
	Owner: hvanhovell
	Created: Wed Nov 09 17:42:30 CET 2016
	Last Access: Thu Jan 01 01:00:00 CET 1970
	Type: MANAGED
	Schema: [StructField(id,LongType,true)]
	Provider: parquet
	Properties: [transient_lastDdlTime=1478709750]
	Storage(Location: file:/dev/assembly/spark-warehouse/prq, InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat, OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat, Serde: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe, Properties: [serialization.format=1]))
         +- Project [id#7L]
            +- Range (0, 100000, step=1, splits=None)
```

## How was this patch tested?
Added extra checks to the `ParquetMetastoreSuite`

Author: Herman van Hovell <hvanhovell@databricks.com>

Closes #15832 from hvanhovell/SPARK-18370.

(cherry picked from commit d8b81f778af8c3d7112ad37f691c49215b392836)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../apache/spark/sql/execution/datasources/DataSource.scala | 3 ++-
 .../sql/execution/datasources/DataSourceStrategy.scala      | 5 +++--
 .../datasources/InsertIntoHadoopFsRelationCommand.scala     | 5 +++--
 .../scala/org/apache/spark/sql/hive/parquetSuites.scala     | 6 ++++--
 4 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 52666119351b1..5d663949df6b5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -424,7 +424,8 @@ case class DataSource(
             _ => Unit, // No existing table needs to be refreshed.
             options,
             data.logicalPlan,
-            mode)
+            mode,
+            catalogTable)
         sparkSession.sessionState.executePlan(plan).toRdd
         // Replace the schema with that of the DataFrame we just wrote out to avoid re-inferring it.
         copy(userSpecifiedSchema = Some(data.schema.asNullable)).resolveRelation()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index a548e88cb683a..2d43a6ad098ed 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -162,7 +162,7 @@ case class DataSourceAnalysis(conf: CatalystConf) extends Rule[LogicalPlan] {
 
 
     case i @ logical.InsertIntoTable(
-           l @ LogicalRelation(t: HadoopFsRelation, _, _), part, query, overwrite, false)
+           l @ LogicalRelation(t: HadoopFsRelation, _, table), part, query, overwrite, false)
         if query.resolved && t.schema.asNullable == query.schema.asNullable =>
 
       // Sanity checks
@@ -222,7 +222,8 @@ case class DataSourceAnalysis(conf: CatalystConf) extends Rule[LogicalPlan] {
         refreshPartitionsCallback,
         t.options,
         query,
-        mode)
+        mode,
+        table)
 
       insertCmd
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
index 9c75e2ae74761..a0a8cb5024c33 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
@@ -23,7 +23,7 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.internal.io.FileCommitProtocol
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.catalog.BucketSpec
+import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
@@ -41,7 +41,8 @@ case class InsertIntoHadoopFsRelationCommand(
     refreshFunction: (Seq[TablePartitionSpec]) => Unit,
     options: Map[String, String],
     @transient query: LogicalPlan,
-    mode: SaveMode)
+    mode: SaveMode,
+    catalogTable: Option[CatalogTable])
   extends RunnableCommand {
 
   override protected def innerChildren: Seq[LogicalPlan] = query :: Nil
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
index 9fc62a389db4d..3644ff952eb0d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
@@ -307,7 +307,8 @@ class ParquetMetastoreSuite extends ParquetPartitioningTest {
 
       val df = sql("INSERT INTO TABLE test_insert_parquet SELECT a FROM jt")
       df.queryExecution.sparkPlan match {
-        case ExecutedCommandExec(_: InsertIntoHadoopFsRelationCommand) => // OK
+        case ExecutedCommandExec(cmd: InsertIntoHadoopFsRelationCommand) =>
+          assert(cmd.catalogTable.map(_.identifier.table) === Some("test_insert_parquet"))
         case o => fail("test_insert_parquet should be converted to a " +
           s"${classOf[HadoopFsRelation ].getCanonicalName} and " +
           s"${classOf[InsertIntoDataSourceCommand].getCanonicalName} should have been SparkPlan. " +
@@ -337,7 +338,8 @@ class ParquetMetastoreSuite extends ParquetPartitioningTest {
 
       val df = sql("INSERT INTO TABLE test_insert_parquet SELECT a FROM jt_array")
       df.queryExecution.sparkPlan match {
-        case ExecutedCommandExec(_: InsertIntoHadoopFsRelationCommand) => // OK
+        case ExecutedCommandExec(cmd: InsertIntoHadoopFsRelationCommand) =>
+          assert(cmd.catalogTable.map(_.identifier.table) === Some("test_insert_parquet"))
         case o => fail("test_insert_parquet should be converted to a " +
           s"${classOf[HadoopFsRelation ].getCanonicalName} and " +
           s"${classOf[InsertIntoDataSourceCommand].getCanonicalName} should have been SparkPlan." +

From b7d29256bad465bd01a5bfaaf7163b911e01182b Mon Sep 17 00:00:00 2001
From: Tyson Condie <tcondie@gmail.com>
Date: Wed, 9 Nov 2016 15:03:22 -0800
Subject: [PATCH 0080/1204] [SPARK-17829][SQL] Stable format for offset log

## What changes were proposed in this pull request?

Currently we use java serialization for the WAL that stores the offsets contained in each batch. This has two main issues:
It can break across spark releases (though this is not the only thing preventing us from upgrading a running query)
It is unnecessarily opaque to the user.
I'd propose we require offsets to provide a user readable serialization and use that instead. JSON is probably a good option.
## How was this patch tested?

Tests were added for KafkaSourceOffset in [KafkaSourceOffsetSuite](external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceOffsetSuite.scala) and for LongOffset in [OffsetSuite](sql/core/src/test/scala/org/apache/spark/sql/streaming/OffsetSuite.scala)

Please review https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark before opening a pull request.

zsxwing marmbrus

Author: Tyson Condie <tcondie@gmail.com>
Author: Tyson Condie <tcondie@clash.local>

Closes #15626 from tcondie/spark-8360.

(cherry picked from commit 3f62e1b5d9e75dc07bac3aa4db3e8d0615cc3cc3)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../apache/spark/sql/kafka010/JsonUtils.scala |  2 -
 .../spark/sql/kafka010/KafkaSource.scala      | 19 ++++-
 .../sql/kafka010/KafkaSourceOffset.scala      | 14 +++-
 .../sql/kafka010/KafkaSourceOffsetSuite.scala | 55 ++++++++++++-
 python/pyspark/sql/streaming.py               | 12 +--
 .../streaming/CompactibleFileStreamLog.scala  | 23 +++---
 .../streaming/FileStreamSinkLog.scala         |  8 --
 .../streaming/FileStreamSource.scala          |  4 +-
 .../streaming/FileStreamSourceLog.scala       |  8 --
 .../execution/streaming/HDFSMetadataLog.scala | 22 ++---
 .../sql/execution/streaming/LongOffset.scala  | 21 ++++-
 .../sql/execution/streaming/Offset.scala      | 36 ++++++++-
 ...{CompositeOffset.scala => OffsetSeq.scala} | 15 ++--
 .../execution/streaming/OffsetSeqLog.scala    | 80 +++++++++++++++++++
 .../sql/execution/streaming/Source.scala      |  8 ++
 .../execution/streaming/StreamExecution.scala | 11 ++-
 .../execution/streaming/StreamProgress.scala  |  4 +-
 .../sql/execution/streaming/memory.scala      | 32 ++++----
 .../sql/execution/streaming/socket.scala      | 25 +++---
 .../streaming/StreamingQueryException.scala   |  6 +-
 .../sql/streaming/StreamingQueryStatus.scala  |  6 +-
 .../streaming/OffsetSeqLogSuite.scala         | 63 +++++++++++++++
 .../spark/sql/streaming/OffsetSuite.scala     | 24 ++----
 .../streaming/StreamingQueryStatusSuite.scala | 16 ++--
 .../sql/streaming/StreamingQuerySuite.scala   | 38 ++++-----
 25 files changed, 402 insertions(+), 150 deletions(-)
 rename sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/{CompositeOffset.scala => OffsetSeq.scala} (83%)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/JsonUtils.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/JsonUtils.scala
index 40d568a12c25d..13d717092a898 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/JsonUtils.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/JsonUtils.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.kafka010
 
-import java.io.Writer
-
 import scala.collection.mutable.HashMap
 import scala.util.control.NonFatal
 
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
index b21508cd7ebd8..5bcc5124b0915 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
@@ -18,6 +18,8 @@
 package org.apache.spark.sql.kafka010
 
 import java.{util => ju}
+import java.io._
+import java.nio.charset.StandardCharsets
 
 import scala.collection.JavaConverters._
 import scala.util.control.NonFatal
@@ -114,7 +116,22 @@ private[kafka010] case class KafkaSource(
    * `KafkaConsumer.poll` may hang forever (KAFKA-1894).
    */
   private lazy val initialPartitionOffsets = {
-    val metadataLog = new HDFSMetadataLog[KafkaSourceOffset](sqlContext.sparkSession, metadataPath)
+    val metadataLog =
+      new HDFSMetadataLog[KafkaSourceOffset](sqlContext.sparkSession, metadataPath) {
+        override def serialize(metadata: KafkaSourceOffset, out: OutputStream): Unit = {
+          val bytes = metadata.json.getBytes(StandardCharsets.UTF_8)
+          out.write(bytes.length)
+          out.write(bytes)
+        }
+
+        override def deserialize(in: InputStream): KafkaSourceOffset = {
+          val length = in.read()
+          val bytes = new Array[Byte](length)
+          in.read(bytes)
+          KafkaSourceOffset(SerializedOffset(new String(bytes, StandardCharsets.UTF_8)))
+        }
+      }
+
     metadataLog.get(0).getOrElse {
       val offsets = startingOffsets match {
         case EarliestOffsets => KafkaSourceOffset(fetchEarliestOffsets())
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceOffset.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceOffset.scala
index b5ade982515f0..b5da415b3097e 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceOffset.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceOffset.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.kafka010
 
 import org.apache.kafka.common.TopicPartition
 
-import org.apache.spark.sql.execution.streaming.Offset
+import org.apache.spark.sql.execution.streaming.{Offset, SerializedOffset}
 
 /**
  * An [[Offset]] for the [[KafkaSource]]. This one tracks all partitions of subscribed topics and
@@ -27,9 +27,8 @@ import org.apache.spark.sql.execution.streaming.Offset
  */
 private[kafka010]
 case class KafkaSourceOffset(partitionToOffsets: Map[TopicPartition, Long]) extends Offset {
-  override def toString(): String = {
-    partitionToOffsets.toSeq.sortBy(_._1.toString).mkString("[", ", ", "]")
-  }
+
+  override val json = JsonUtils.partitionOffsets(partitionToOffsets)
 }
 
 /** Companion object of the [[KafkaSourceOffset]] */
@@ -38,6 +37,7 @@ private[kafka010] object KafkaSourceOffset {
   def getPartitionOffsets(offset: Offset): Map[TopicPartition, Long] = {
     offset match {
       case o: KafkaSourceOffset => o.partitionToOffsets
+      case so: SerializedOffset => KafkaSourceOffset(so).partitionToOffsets
       case _ =>
         throw new IllegalArgumentException(
           s"Invalid conversion from offset of ${offset.getClass} to KafkaSourceOffset")
@@ -51,4 +51,10 @@ private[kafka010] object KafkaSourceOffset {
   def apply(offsetTuples: (String, Int, Long)*): KafkaSourceOffset = {
     KafkaSourceOffset(offsetTuples.map { case(t, p, o) => (new TopicPartition(t, p), o) }.toMap)
   }
+
+  /**
+   * Returns [[KafkaSourceOffset]] from a JSON [[SerializedOffset]]
+   */
+  def apply(offset: SerializedOffset): KafkaSourceOffset =
+    KafkaSourceOffset(JsonUtils.partitionOffsets(offset.json))
 }
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceOffsetSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceOffsetSuite.scala
index 7056a41b1751e..881018fd95665 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceOffsetSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceOffsetSuite.scala
@@ -17,9 +17,13 @@
 
 package org.apache.spark.sql.kafka010
 
+import java.io.File
+
+import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.streaming.OffsetSuite
+import org.apache.spark.sql.test.SharedSQLContext
 
-class KafkaSourceOffsetSuite extends OffsetSuite {
+class KafkaSourceOffsetSuite extends OffsetSuite with SharedSQLContext {
 
   compare(
     one = KafkaSourceOffset(("t", 0, 1L)),
@@ -36,4 +40,53 @@ class KafkaSourceOffsetSuite extends OffsetSuite {
   compare(
     one = KafkaSourceOffset(("t", 0, 1L)),
     two = KafkaSourceOffset(("t", 0, 2L), ("t", 1, 1L)))
+
+
+  val kso1 = KafkaSourceOffset(("t", 0, 1L))
+  val kso2 = KafkaSourceOffset(("t", 0, 2L), ("t", 1, 3L))
+  val kso3 = KafkaSourceOffset(("t", 0, 2L), ("t", 1, 3L), ("t", 1, 4L))
+
+  compare(KafkaSourceOffset(SerializedOffset(kso1.json)),
+    KafkaSourceOffset(SerializedOffset(kso2.json)))
+
+  test("basic serialization - deserialization") {
+    assert(KafkaSourceOffset.getPartitionOffsets(kso1) ==
+      KafkaSourceOffset.getPartitionOffsets(SerializedOffset(kso1.json)))
+  }
+
+
+  testWithUninterruptibleThread("OffsetSeqLog serialization - deserialization") {
+    withTempDir { temp =>
+      // use non-existent directory to test whether log make the dir
+      val dir = new File(temp, "dir")
+      val metadataLog = new OffsetSeqLog(spark, dir.getAbsolutePath)
+      val batch0 = OffsetSeq.fill(kso1)
+      val batch1 = OffsetSeq.fill(kso2, kso3)
+
+      val batch0Serialized = OffsetSeq.fill(batch0.offsets.flatMap(_.map(o =>
+        SerializedOffset(o.json))): _*)
+
+      val batch1Serialized = OffsetSeq.fill(batch1.offsets.flatMap(_.map(o =>
+        SerializedOffset(o.json))): _*)
+
+      assert(metadataLog.add(0, batch0))
+      assert(metadataLog.getLatest() === Some(0 -> batch0Serialized))
+      assert(metadataLog.get(0) === Some(batch0Serialized))
+
+      assert(metadataLog.add(1, batch1))
+      assert(metadataLog.get(0) === Some(batch0Serialized))
+      assert(metadataLog.get(1) === Some(batch1Serialized))
+      assert(metadataLog.getLatest() === Some(1 -> batch1Serialized))
+      assert(metadataLog.get(None, Some(1)) ===
+        Array(0 -> batch0Serialized, 1 -> batch1Serialized))
+
+      // Adding the same batch does nothing
+      metadataLog.add(1, OffsetSeq.fill(LongOffset(3)))
+      assert(metadataLog.get(0) === Some(batch0Serialized))
+      assert(metadataLog.get(1) === Some(batch1Serialized))
+      assert(metadataLog.getLatest() === Some(1 -> batch1Serialized))
+      assert(metadataLog.get(None, Some(1)) ===
+        Array(0 -> batch0Serialized, 1 -> batch1Serialized))
+    }
+  }
 }
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index 1c94413e3c457..f326f16232690 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -220,7 +220,7 @@ def __str__(self):
                 triggerId: 5
             Source statuses [1 source]:
                 Source 1 - MySource1
-                    Available offset: #0
+                    Available offset: 0
                     Input rate: 15.5 rows/sec
                     Processing rate: 23.5 rows/sec
                     Trigger details:
@@ -228,7 +228,7 @@ def __str__(self):
                         latency.getOffset.source: 10
                         latency.getBatch.source: 20
             Sink status - MySink
-                Committed offsets: [#1, -]
+                Committed offsets: [1, -]
         """
         return self._jsqs.toString()
 
@@ -366,7 +366,7 @@ def __str__(self):
 
         >>> print(sqs.sourceStatuses[0])
         Status of source MySource1
-            Available offset: #0
+            Available offset: 0
             Input rate: 15.5 rows/sec
             Processing rate: 23.5 rows/sec
             Trigger details:
@@ -396,7 +396,7 @@ def offsetDesc(self):
         Description of the current offset if known.
 
         >>> sqs.sourceStatuses[0].offsetDesc
-        u'#0'
+        u'0'
         """
         return self._jss.offsetDesc()
 
@@ -457,7 +457,7 @@ def __str__(self):
 
         >>> print(sqs.sinkStatus)
         Status of sink MySink
-            Committed offsets: [#1, -]
+            Committed offsets: [1, -]
         """
         return self._jss.toString()
 
@@ -481,7 +481,7 @@ def offsetDesc(self):
         Description of the current offsets up to which data has been written by the sink.
 
         >>> sqs.sinkStatus.offsetDesc
-        u'[#1, -]'
+        u'[1, -]'
         """
         return self._jss.offsetDesc()
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala
index b26edeeb04009..8af3db1968882 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala
@@ -24,6 +24,8 @@ import scala.io.{Source => IOSource}
 import scala.reflect.ClassTag
 
 import org.apache.hadoop.fs.{Path, PathFilter}
+import org.json4s.NoTypeHints
+import org.json4s.jackson.Serialization
 
 import org.apache.spark.sql.SparkSession
 
@@ -37,7 +39,7 @@ import org.apache.spark.sql.SparkSession
  * compact log files every 10 batches by default into a big file. When
  * doing a compaction, it will read all old log files and merge them with the new batch.
  */
-abstract class CompactibleFileStreamLog[T: ClassTag](
+abstract class CompactibleFileStreamLog[T <: AnyRef : ClassTag](
     metadataLogVersion: String,
     sparkSession: SparkSession,
     path: String)
@@ -45,6 +47,11 @@ abstract class CompactibleFileStreamLog[T: ClassTag](
 
   import CompactibleFileStreamLog._
 
+  private implicit val formats = Serialization.formats(NoTypeHints)
+
+  /** Needed to serialize type T into JSON when using Jackson */
+  private implicit val manifest = Manifest.classType[T](implicitly[ClassTag[T]].runtimeClass)
+
   /**
    * If we delete the old files after compaction at once, there is a race condition in S3: other
    * processes may see the old files are deleted but still cannot see the compaction file using
@@ -58,16 +65,6 @@ abstract class CompactibleFileStreamLog[T: ClassTag](
 
   protected def compactInterval: Int
 
-  /**
-   * Serialize the data into encoded string.
-   */
-  protected def serializeData(t: T): String
-
-  /**
-   * Deserialize the string into data object.
-   */
-  protected def deserializeData(encodedString: String): T
-
   /**
    * Filter out the obsolete logs.
    */
@@ -99,7 +96,7 @@ abstract class CompactibleFileStreamLog[T: ClassTag](
     out.write(metadataLogVersion.getBytes(UTF_8))
     logData.foreach { data =>
       out.write('\n')
-      out.write(serializeData(data).getBytes(UTF_8))
+      out.write(Serialization.write(data).getBytes(UTF_8))
     }
   }
 
@@ -112,7 +109,7 @@ abstract class CompactibleFileStreamLog[T: ClassTag](
     if (version != metadataLogVersion) {
       throw new IllegalStateException(s"Unknown log version: ${version}")
     }
-    lines.map(deserializeData).toArray
+    lines.map(Serialization.read[T]).toArray
   }
 
   override def add(batchId: Long, logs: Array[T]): Boolean = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala
index f9e24167a17ec..b4f14151f1ef2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala
@@ -93,14 +93,6 @@ class FileStreamSinkLog(
     s"Please set ${SQLConf.FILE_SINK_LOG_COMPACT_INTERVAL.key} (was $compactInterval) " +
       "to a positive value.")
 
-  protected override def serializeData(data: SinkFileStatus): String = {
-    write(data)
-  }
-
-  protected override def deserializeData(encodedString: String): SinkFileStatus = {
-    read[SinkFileStatus](encodedString)
-  }
-
   override def compactLogs(logs: Seq[SinkFileStatus]): Seq[SinkFileStatus] = {
     val deletedFiles = logs.filter(_.action == FileStreamSinkLog.DELETE_ACTION).map(_.path).toSet
     if (deletedFiles.isEmpty) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
index 680df01acc1a6..8494aef004bb5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
@@ -131,8 +131,8 @@ class FileStreamSource(
    * Returns the data that is between the offsets (`start`, `end`].
    */
   override def getBatch(start: Option[Offset], end: Offset): DataFrame = {
-    val startId = start.map(_.asInstanceOf[LongOffset].offset).getOrElse(-1L)
-    val endId = end.asInstanceOf[LongOffset].offset
+    val startId = start.flatMap(LongOffset.convert(_)).getOrElse(LongOffset(-1L)).offset
+    val endId = LongOffset.convert(end).getOrElse(LongOffset(0)).offset
 
     assert(startId <= endId)
     val files = metadataLog.get(Some(startId + 1), Some(endId)).flatMap(_._2)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala
index 4681f2ba08c84..fe81b15607068 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala
@@ -60,14 +60,6 @@ class FileStreamSourceLog(
     }
   }
 
-  protected override def serializeData(data: FileEntry): String = {
-    Serialization.write(data)
-  }
-
-  protected override def deserializeData(encodedString: String): FileEntry = {
-    Serialization.read[FileEntry](encodedString)
-  }
-
   def compactLogs(logs: Seq[FileEntry]): Seq[FileEntry] = {
     logs
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
index 9a0f87cf0498c..db7057d7da70c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
@@ -17,7 +17,8 @@
 
 package org.apache.spark.sql.execution.streaming
 
-import java.io.{FileNotFoundException, InputStream, IOException, OutputStream}
+import java.io._
+import java.nio.charset.StandardCharsets
 import java.util.{ConcurrentModificationException, EnumSet, UUID}
 
 import scala.reflect.ClassTag
@@ -26,9 +27,10 @@ import org.apache.commons.io.IOUtils
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs._
 import org.apache.hadoop.fs.permission.FsPermission
+import org.json4s.NoTypeHints
+import org.json4s.jackson.Serialization
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.serializer.JavaSerializer
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.util.UninterruptibleThread
 
@@ -44,9 +46,14 @@ import org.apache.spark.util.UninterruptibleThread
  * Note: [[HDFSMetadataLog]] doesn't support S3-like file systems as they don't guarantee listing
  * files in a directory always shows the latest files.
  */
-class HDFSMetadataLog[T: ClassTag](sparkSession: SparkSession, path: String)
+class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path: String)
   extends MetadataLog[T] with Logging {
 
+  private implicit val formats = Serialization.formats(NoTypeHints)
+
+  /** Needed to serialize type T into JSON when using Jackson */
+  private implicit val manifest = Manifest.classType[T](implicitly[ClassTag[T]].runtimeClass)
+
   // Avoid serializing generic sequences, see SPARK-17372
   require(implicitly[ClassTag[T]].runtimeClass != classOf[Seq[_]],
     "Should not create a log with type Seq, use Arrays instead - see SPARK-17372")
@@ -67,8 +74,6 @@ class HDFSMetadataLog[T: ClassTag](sparkSession: SparkSession, path: String)
     override def accept(path: Path): Boolean = isBatchFile(path)
   }
 
-  private val serializer = new JavaSerializer(sparkSession.sparkContext.conf).newInstance()
-
   protected def batchIdToPath(batchId: Long): Path = {
     new Path(metadataPath, batchId.toString)
   }
@@ -88,14 +93,13 @@ class HDFSMetadataLog[T: ClassTag](sparkSession: SparkSession, path: String)
 
   protected def serialize(metadata: T, out: OutputStream): Unit = {
     // called inside a try-finally where the underlying stream is closed in the caller
-    val outStream = serializer.serializeStream(out)
-    outStream.writeObject(metadata)
+    Serialization.write(metadata, out)
   }
 
   protected def deserialize(in: InputStream): T = {
     // called inside a try-finally where the underlying stream is closed in the caller
-    val inStream = serializer.deserializeStream(in)
-    inStream.readObject[T]()
+    val reader = new InputStreamReader(in, StandardCharsets.UTF_8)
+    Serialization.read[T](reader)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/LongOffset.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/LongOffset.scala
index c5e8827777792..5f0b195fcfcb8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/LongOffset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/LongOffset.scala
@@ -22,8 +22,27 @@ package org.apache.spark.sql.execution.streaming
  */
 case class LongOffset(offset: Long) extends Offset {
 
+  override val json = offset.toString
+
   def +(increment: Long): LongOffset = new LongOffset(offset + increment)
   def -(decrement: Long): LongOffset = new LongOffset(offset - decrement)
+}
+
+object LongOffset {
+
+  /**
+   * LongOffset factory from serialized offset.
+   * @return new LongOffset
+   */
+  def apply(offset: SerializedOffset) : LongOffset = new LongOffset(offset.json.toLong)
 
-  override def toString: String = s"#$offset"
+  /**
+   * Convert generic Offset to LongOffset if possible.
+   * @return converted LongOffset
+   */
+  def convert(offset: Offset): Option[LongOffset] = offset match {
+    case lo: LongOffset => Some(lo)
+    case so: SerializedOffset => Some(LongOffset(so))
+    case _ => None
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Offset.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Offset.scala
index 1f52abf277581..4efcee0f8f9d6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Offset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Offset.scala
@@ -23,4 +23,38 @@ package org.apache.spark.sql.execution.streaming
  * ordering of two [[Offset]] instances.  We do assume that if two offsets are `equal` then no
  * new data has arrived.
  */
-trait Offset extends Serializable {}
+abstract class Offset {
+
+  /**
+   * Equality based on JSON string representation. We leverage the
+   * JSON representation for normalization between the Offset's
+   * in memory and on disk representations.
+   */
+  override def equals(obj: Any): Boolean = obj match {
+    case o: Offset => this.json == o.json
+    case _ => false
+  }
+
+  override def hashCode(): Int = this.json.hashCode
+
+  override def toString(): String = this.json.toString
+
+  /**
+   * A JSON-serialized representation of an Offset that is
+   * used for saving offsets to the offset log.
+   * Note: We assume that equivalent/equal offsets serialize to
+   * identical JSON strings.
+   *
+   * @return JSON string encoding
+   */
+  def json: String
+}
+
+/**
+ * Used when loading a JSON serialized offset from external storage.
+ * We are currently not responsible for converting JSON serialized
+ * data into an internal (i.e., object) representation. Sources should
+ * define a factory method in their source Offset companion objects
+ * that accepts a [[SerializedOffset]] for doing the conversion.
+ */
+case class SerializedOffset(override val json: String) extends Offset
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompositeOffset.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
similarity index 83%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompositeOffset.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
index ebc6ee8184902..a4e1fe6797097 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompositeOffset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
@@ -17,12 +17,14 @@
 
 package org.apache.spark.sql.execution.streaming
 
+
 /**
  * An ordered collection of offsets, used to track the progress of processing data from one or more
  * [[Source]]s that are present in a streaming query. This is similar to simplified, single-instance
  * vector clock that must progress linearly forward.
  */
-case class CompositeOffset(offsets: Seq[Option[Offset]]) extends Offset {
+case class OffsetSeq(offsets: Seq[Option[Offset]]) {
+
   /**
    * Unpacks an offset into [[StreamProgress]] by associating each offset with the order list of
    * sources.
@@ -36,15 +38,16 @@ case class CompositeOffset(offsets: Seq[Option[Offset]]) extends Offset {
   }
 
   override def toString: String =
-    offsets.map(_.map(_.toString).getOrElse("-")).mkString("[", ", ", "]")
+    offsets.map(_.map(_.json).getOrElse("-")).mkString("[", ", ", "]")
 }
 
-object CompositeOffset {
+object OffsetSeq {
+
   /**
-   * Returns a [[CompositeOffset]] with a variable sequence of offsets.
+   * Returns a [[OffsetSeq]] with a variable sequence of offsets.
    * `nulls` in the sequence are converted to `None`s.
    */
-  def fill(offsets: Offset*): CompositeOffset = {
-    CompositeOffset(offsets.map(Option(_)))
+  def fill(offsets: Offset*): OffsetSeq = {
+    OffsetSeq(offsets.map(Option(_)))
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala
new file mode 100644
index 0000000000000..d1c9d95be9fdb
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala
@@ -0,0 +1,80 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql.execution.streaming
+
+
+import java.io.{InputStream, OutputStream}
+import java.nio.charset.StandardCharsets._
+
+import scala.io.{Source => IOSource}
+
+import org.apache.spark.sql.SparkSession
+
+/**
+ * This class is used to log offsets to persistent files in HDFS.
+ * Each file corresponds to a specific batch of offsets. The file
+ * format contain a version string in the first line, followed
+ * by a the JSON string representation of the offsets separated
+ * by a newline character. If a source offset is missing, then
+ * that line will contain a string value defined in the
+ * SERIALIZED_VOID_OFFSET variable in [[OffsetSeqLog]] companion object.
+ * For instance, when dealine wiht [[LongOffset]] types:
+ *   v1   // version 1
+ *   {0}  // LongOffset 0
+ *   {3}  // LongOffset 3
+ *   -    // No offset for this source i.e., an invalid JSON string
+ *   {2}  // LongOffset 2
+ *   ...
+ */
+class OffsetSeqLog(sparkSession: SparkSession, path: String)
+  extends HDFSMetadataLog[OffsetSeq](sparkSession, path) {
+
+  override protected def deserialize(in: InputStream): OffsetSeq = {
+    // called inside a try-finally where the underlying stream is closed in the caller
+    def parseOffset(value: String): Offset = value match {
+      case OffsetSeqLog.SERIALIZED_VOID_OFFSET => null
+      case json => SerializedOffset(json)
+    }
+    val lines = IOSource.fromInputStream(in, UTF_8.name()).getLines()
+    if (!lines.hasNext) {
+      throw new IllegalStateException("Incomplete log file")
+    }
+    val version = lines.next()
+    if (version != OffsetSeqLog.VERSION) {
+      throw new IllegalStateException(s"Unknown log version: ${version}")
+    }
+    OffsetSeq.fill(lines.map(parseOffset).toArray: _*)
+  }
+
+  override protected def serialize(metadata: OffsetSeq, out: OutputStream): Unit = {
+    // called inside a try-finally where the underlying stream is closed in the caller
+    out.write(OffsetSeqLog.VERSION.getBytes(UTF_8))
+    metadata.offsets.map(_.map(_.json)).foreach { offset =>
+      out.write('\n')
+      offset match {
+        case Some(json: String) => out.write(json.getBytes(UTF_8))
+        case None => out.write(OffsetSeqLog.SERIALIZED_VOID_OFFSET.getBytes(UTF_8))
+      }
+    }
+  }
+}
+
+object OffsetSeqLog {
+  private val VERSION = "v1"
+  private val SERIALIZED_VOID_OFFSET = "-"
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
index f3bd5bfe23fdf..75ffe90f2bb70 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
@@ -45,6 +45,14 @@ trait Source  {
    * Higher layers will always call this method with a value of `start` greater than or equal
    * to the last value passed to `commit` and a value of `end` less than or equal to the
    * last value returned by `getOffset`
+   *
+   * It is possible for the [[Offset]] type to be a [[SerializedOffset]] when it was
+   * obtained from the log. Moreover, [[StreamExecution]] only compares the [[Offset]]
+   * JSON representation to determine if the two objects are equal. This could have
+   * ramifications when upgrading [[Offset]] JSON formats i.e., two equivalent [[Offset]]
+   * objects could differ between version. Consequently, [[StreamExecution]] may call
+   * this method with two such equivalent [[Offset]] objects. In which case, the [[Source]]
+   * should return an empty [[DataFrame]]
    */
   def getBatch(start: Option[Offset], end: Offset): DataFrame
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 37af1a550aaf1..57e89f85361e4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -34,7 +34,6 @@ import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.execution.{QueryExecution, SparkPlan}
 import org.apache.spark.sql.execution.command.ExplainCommand
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming._
 import org.apache.spark.util.{Clock, UninterruptibleThread, Utils}
 
@@ -149,7 +148,7 @@ class StreamExecution(
    * processing is done.  Thus, the Nth record in this log indicated data that is currently being
    * processed and the N-1th entry indicates which offsets have been durably committed to the sink.
    */
-  val offsetLog = new HDFSMetadataLog[CompositeOffset](sparkSession, checkpointFile("offsets"))
+  val offsetLog = new OffsetSeqLog(sparkSession, checkpointFile("offsets"))
 
   /** Whether the query is currently active or not */
   override def isActive: Boolean = state == ACTIVE
@@ -249,7 +248,7 @@ class StreamExecution(
           this,
           s"Query $name terminated with exception: ${e.getMessage}",
           e,
-          Some(committedOffsets.toCompositeOffset(sources)))
+          Some(committedOffsets.toOffsetSeq(sources)))
         logError(s"Query $name terminated with error", e)
         // Rethrow the fatal errors to allow the user using `Thread.UncaughtExceptionHandler` to
         // handle them
@@ -343,7 +342,7 @@ class StreamExecution(
     }
     if (hasNewData) {
       reportTimeTaken(OFFSET_WAL_WRITE_LATENCY) {
-        assert(offsetLog.add(currentBatchId, availableOffsets.toCompositeOffset(sources)),
+        assert(offsetLog.add(currentBatchId, availableOffsets.toOffsetSeq(sources)),
           s"Concurrent update to the log. Multiple streaming jobs detected for $currentBatchId")
         logInfo(s"Committed offsets for batch $currentBatchId.")
 
@@ -684,14 +683,14 @@ class StreamExecution(
     val sourceStatuses = sources.map { s =>
       SourceStatus(
         s.toString,
-        localAvailableOffsets.get(s).map(_.toString).getOrElse("-"), // TODO: use json if available
+        localAvailableOffsets.get(s).map(_.json).getOrElse("-"),
         streamMetrics.currentSourceInputRate(s),
         streamMetrics.currentSourceProcessingRate(s),
         streamMetrics.currentSourceTriggerDetails(s))
     }.toArray
     val sinkStatus = SinkStatus(
       sink.toString,
-      committedOffsets.toCompositeOffset(sources).toString)
+      committedOffsets.toOffsetSeq(sources).toString)
 
     currentStatus =
       StreamingQueryStatus(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala
index db0bd9e6bc6f0..05a65476709cd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala
@@ -26,8 +26,8 @@ class StreamProgress(
     val baseMap: immutable.Map[Source, Offset] = new immutable.HashMap[Source, Offset])
   extends scala.collection.immutable.Map[Source, Offset] {
 
-  def toCompositeOffset(source: Seq[Source]): CompositeOffset = {
-    CompositeOffset(source.map(get))
+  def toOffsetSeq(source: Seq[Source]): OffsetSeq = {
+    OffsetSeq(source.map(get))
   }
 
   override def toString: String =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
index 613c7ccdd226a..582b5481220da 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
@@ -106,8 +106,8 @@ case class MemoryStream[A : Encoder](id: Int, sqlContext: SQLContext)
   override def getBatch(start: Option[Offset], end: Offset): DataFrame = {
     // Compute the internal batch numbers to fetch: [startOrdinal, endOrdinal)
     val startOrdinal =
-      start.map(_.asInstanceOf[LongOffset]).getOrElse(LongOffset(-1)).offset.toInt + 1
-    val endOrdinal = end.asInstanceOf[LongOffset].offset.toInt + 1
+      start.flatMap(LongOffset.convert).getOrElse(LongOffset(-1)).offset.toInt + 1
+    val endOrdinal = LongOffset.convert(end).getOrElse(LongOffset(-1)).offset.toInt + 1
 
     // Internal buffer only holds the batches after lastCommittedOffset.
     val newBlocks = synchronized {
@@ -127,19 +127,21 @@ case class MemoryStream[A : Encoder](id: Int, sqlContext: SQLContext)
   }
 
   override def commit(end: Offset): Unit = synchronized {
-    end match {
-      case newOffset: LongOffset =>
-        val offsetDiff = (newOffset.offset - lastOffsetCommitted.offset).toInt
-
-        if (offsetDiff < 0) {
-          sys.error(s"Offsets committed out of order: $lastOffsetCommitted followed by $end")
-        }
-
-        batches.trimStart(offsetDiff)
-        lastOffsetCommitted = newOffset
-      case _ =>
-        sys.error(s"MemoryStream.commit() received an offset ($end) that did not originate with " +
-          "an instance of this class")
+    def check(newOffset: LongOffset): Unit = {
+      val offsetDiff = (newOffset.offset - lastOffsetCommitted.offset).toInt
+
+      if (offsetDiff < 0) {
+        sys.error(s"Offsets committed out of order: $lastOffsetCommitted followed by $end")
+      }
+
+      batches.trimStart(offsetDiff)
+      lastOffsetCommitted = newOffset
+    }
+
+    LongOffset.convert(end) match {
+      case Some(lo) => check(lo)
+      case None => sys.error(s"MemoryStream.commit() received an offset ($end) " +
+        "that did not originate with an instance of this class")
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/socket.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/socket.scala
index 042977f870b8e..900d92bc0d959 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/socket.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/socket.scala
@@ -116,8 +116,8 @@ class TextSocketSource(host: String, port: Int, includeTimestamp: Boolean, sqlCo
   /** Returns the data that is between the offsets (`start`, `end`]. */
   override def getBatch(start: Option[Offset], end: Offset): DataFrame = synchronized {
     val startOrdinal =
-      start.map(_.asInstanceOf[LongOffset]).getOrElse(LongOffset(-1)).offset.toInt + 1
-    val endOrdinal = end.asInstanceOf[LongOffset].offset.toInt + 1
+      start.flatMap(LongOffset.convert).getOrElse(LongOffset(-1)).offset.toInt + 1
+    val endOrdinal = LongOffset.convert(end).getOrElse(LongOffset(-1)).offset.toInt + 1
 
     // Internal buffer only holds the batches after lastOffsetCommitted
     val rawList = synchronized {
@@ -140,20 +140,19 @@ class TextSocketSource(host: String, port: Int, includeTimestamp: Boolean, sqlCo
   }
 
   override def commit(end: Offset): Unit = synchronized {
-    if (end.isInstanceOf[LongOffset]) {
-      val newOffset = end.asInstanceOf[LongOffset]
-      val offsetDiff = (newOffset.offset - lastOffsetCommitted.offset).toInt
-
-      if (offsetDiff < 0) {
-        sys.error(s"Offsets committed out of order: $lastOffsetCommitted followed by $end")
-      }
-
-      batches.trimStart(offsetDiff)
-      lastOffsetCommitted = newOffset
-    } else {
+    val newOffset = LongOffset.convert(end).getOrElse(
       sys.error(s"TextSocketStream.commit() received an offset ($end) that did not " +
         s"originate with an instance of this class")
+    )
+
+    val offsetDiff = (newOffset.offset - lastOffsetCommitted.offset).toInt
+
+    if (offsetDiff < 0) {
+      sys.error(s"Offsets committed out of order: $lastOffsetCommitted followed by $end")
     }
+
+    batches.trimStart(offsetDiff)
+    lastOffsetCommitted = newOffset
   }
 
   /** Stop this source. */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala
index bd3e5a5618ec4..0a58142e066ac 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.streaming
 
 import org.apache.spark.annotation.Experimental
-import org.apache.spark.sql.execution.streaming.{Offset, StreamExecution}
+import org.apache.spark.sql.execution.streaming.{Offset, OffsetSeq, StreamExecution}
 
 /**
  * :: Experimental ::
@@ -36,8 +36,8 @@ class StreamingQueryException private[sql](
     @transient val query: StreamingQuery,
     val message: String,
     val cause: Throwable,
-    val startOffset: Option[Offset] = None,
-    val endOffset: Option[Offset] = None)
+    val startOffset: Option[OffsetSeq] = None,
+    val endOffset: Option[OffsetSeq] = None)
   extends Exception(message, cause) {
 
   /** Time when the exception occurred */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
index a50b0d96c13f7..99c7729d02351 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
@@ -27,7 +27,7 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.annotation.Experimental
-import org.apache.spark.sql.execution.streaming.{CompositeOffset, LongOffset}
+import org.apache.spark.sql.execution.streaming.{LongOffset, OffsetSeq}
 import org.apache.spark.util.JsonProtocol
 
 /**
@@ -140,7 +140,7 @@ private[sql] object StreamingQueryStatus {
       sourceStatuses = Array(
         SourceStatus(
           desc = "MySource1",
-          offsetDesc = LongOffset(0).toString,
+          offsetDesc = LongOffset(0).json,
           inputRate = 15.5,
           processingRate = 23.5,
           triggerDetails = Map(
@@ -149,7 +149,7 @@ private[sql] object StreamingQueryStatus {
             SOURCE_GET_BATCH_LATENCY -> "20"))),
       sinkStatus = SinkStatus(
         desc = "MySink",
-        offsetDesc = CompositeOffset(Some(LongOffset(1)) :: None :: Nil).toString),
+        offsetDesc = OffsetSeq(Some(LongOffset(1)) :: None :: Nil).toString),
       triggerDetails = Map(
         TRIGGER_ID -> "5",
         IS_TRIGGER_ACTIVE -> "true",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala
new file mode 100644
index 0000000000000..3afd11fa4686d
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming
+
+import java.io.File
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.test.SharedSQLContext
+
+class OffsetSeqLogSuite extends SparkFunSuite with SharedSQLContext {
+
+  /** test string offset type */
+  case class StringOffset(override val json: String) extends Offset
+
+  testWithUninterruptibleThread("serialization - deserialization") {
+    withTempDir { temp =>
+      val dir = new File(temp, "dir") // use non-existent directory to test whether log make the dir
+    val metadataLog = new OffsetSeqLog(spark, dir.getAbsolutePath)
+      val batch0 = OffsetSeq.fill(LongOffset(0), LongOffset(1), LongOffset(2))
+      val batch1 = OffsetSeq.fill(StringOffset("one"), StringOffset("two"), StringOffset("three"))
+
+      val batch0Serialized = OffsetSeq.fill(batch0.offsets.flatMap(_.map(o =>
+        SerializedOffset(o.json))): _*)
+
+      val batch1Serialized = OffsetSeq.fill(batch1.offsets.flatMap(_.map(o =>
+        SerializedOffset(o.json))): _*)
+
+      assert(metadataLog.add(0, batch0))
+      assert(metadataLog.getLatest() === Some(0 -> batch0Serialized))
+      assert(metadataLog.get(0) === Some(batch0Serialized))
+
+      assert(metadataLog.add(1, batch1))
+      assert(metadataLog.get(0) === Some(batch0Serialized))
+      assert(metadataLog.get(1) === Some(batch1Serialized))
+      assert(metadataLog.getLatest() === Some(1 -> batch1Serialized))
+      assert(metadataLog.get(None, Some(1)) ===
+        Array(0 -> batch0Serialized, 1 -> batch1Serialized))
+
+      // Adding the same batch does nothing
+      metadataLog.add(1, OffsetSeq.fill(LongOffset(3)))
+      assert(metadataLog.get(0) === Some(batch0Serialized))
+      assert(metadataLog.get(1) === Some(batch1Serialized))
+      assert(metadataLog.getLatest() === Some(1 -> batch1Serialized))
+      assert(metadataLog.get(None, Some(1)) ===
+        Array(0 -> batch0Serialized, 1 -> batch1Serialized))
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/OffsetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/OffsetSuite.scala
index b65a987770304..f208f9bd9b6e3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/OffsetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/OffsetSuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.streaming
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.execution.streaming.{CompositeOffset, LongOffset, Offset}
+import org.apache.spark.sql.execution.streaming.{LongOffset, Offset, SerializedOffset}
 
 trait OffsetSuite extends SparkFunSuite {
   /** Creates test to check all the comparisons of offsets given a `one` that is less than `two`. */
@@ -35,25 +35,11 @@ trait OffsetSuite extends SparkFunSuite {
 class LongOffsetSuite extends OffsetSuite {
   val one = LongOffset(1)
   val two = LongOffset(2)
+  val three = LongOffset(3)
   compare(one, two)
-}
-
-class CompositeOffsetSuite extends OffsetSuite {
-  compare(
-    one = CompositeOffset(Some(LongOffset(1)) :: Nil),
-    two = CompositeOffset(Some(LongOffset(2)) :: Nil))
-
-  compare(
-    one = CompositeOffset(None :: Nil),
-    two = CompositeOffset(Some(LongOffset(2)) :: Nil))
-
-  compare(
-    one = CompositeOffset.fill(LongOffset(0), LongOffset(1)),
-    two = CompositeOffset.fill(LongOffset(1), LongOffset(2)))
-
-  compare(
-    one = CompositeOffset.fill(LongOffset(1), LongOffset(1)),
-    two = CompositeOffset.fill(LongOffset(1), LongOffset(2)))
 
+  compare(LongOffset(SerializedOffset(one.json)),
+          LongOffset(SerializedOffset(three.json)))
 }
 
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusSuite.scala
index 1a98cf2ba74e6..6af19fb0c2327 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusSuite.scala
@@ -24,7 +24,7 @@ class StreamingQueryStatusSuite extends SparkFunSuite {
     assert(StreamingQueryStatus.testStatus.sourceStatuses(0).toString ===
       """
         |Status of source MySource1
-        |    Available offset: #0
+        |    Available offset: 0
         |    Input rate: 15.5 rows/sec
         |    Processing rate: 23.5 rows/sec
         |    Trigger details:
@@ -36,7 +36,7 @@ class StreamingQueryStatusSuite extends SparkFunSuite {
     assert(StreamingQueryStatus.testStatus.sinkStatus.toString ===
       """
         |Status of sink MySink
-        |    Committed offsets: [#1, -]
+        |    Committed offsets: [1, -]
       """.stripMargin.trim, "SinkStatus.toString does not match")
 
     assert(StreamingQueryStatus.testStatus.toString ===
@@ -56,7 +56,7 @@ class StreamingQueryStatusSuite extends SparkFunSuite {
         |        triggerId: 5
         |    Source statuses [1 source]:
         |        Source 1 - MySource1
-        |            Available offset: #0
+        |            Available offset: 0
         |            Input rate: 15.5 rows/sec
         |            Processing rate: 23.5 rows/sec
         |            Trigger details:
@@ -64,7 +64,7 @@ class StreamingQueryStatusSuite extends SparkFunSuite {
         |                latency.getOffset.source: 10
         |                latency.getBatch.source: 20
         |    Sink status - MySink
-        |        Committed offsets: [#1, -]
+        |        Committed offsets: [1, -]
       """.stripMargin.trim, "StreamingQueryStatus.toString does not match")
 
   }
@@ -72,10 +72,10 @@ class StreamingQueryStatusSuite extends SparkFunSuite {
   test("json") {
     assert(StreamingQueryStatus.testStatus.json ===
       """
-        |{"sourceStatuses":[{"description":"MySource1","offsetDesc":"#0","inputRate":15.5,
+        |{"sourceStatuses":[{"description":"MySource1","offsetDesc":"0","inputRate":15.5,
         |"processingRate":23.5,"triggerDetails":{"numRows.input.source":"100",
         |"latency.getOffset.source":"10","latency.getBatch.source":"20"}}],
-        |"sinkStatus":{"description":"MySink","offsetDesc":"[#1, -]"}}
+        |"sinkStatus":{"description":"MySink","offsetDesc":"[1, -]"}}
       """.stripMargin.replace("\n", "").trim)
   }
 
@@ -86,7 +86,7 @@ class StreamingQueryStatusSuite extends SparkFunSuite {
           |{
           |  "sourceStatuses" : [ {
           |    "description" : "MySource1",
-          |    "offsetDesc" : "#0",
+          |    "offsetDesc" : "0",
           |    "inputRate" : 15.5,
           |    "processingRate" : 23.5,
           |    "triggerDetails" : {
@@ -97,7 +97,7 @@ class StreamingQueryStatusSuite extends SparkFunSuite {
           |  } ],
           |  "sinkStatus" : {
           |    "description" : "MySink",
-          |    "offsetDesc" : "[#1, -]"
+          |    "offsetDesc" : "[1, -]"
           |  }
           |}
         """.stripMargin.trim)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index 31b7fe0b04da9..e2e66d6663e19 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -104,7 +104,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
       TestAwaitTermination(ExpectException[SparkException], timeoutMs = 10),
       AssertOnQuery(
         q =>
-          q.exception.get.startOffset.get === q.committedOffsets.toCompositeOffset(Seq(inputData)),
+          q.exception.get.startOffset.get === q.committedOffsets.toOffsetSeq(Seq(inputData)),
         "incorrect start offset on exception")
     )
   }
@@ -124,13 +124,13 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
       AssertOnQuery(_.status.sourceStatuses(0).inputRate === 0.0),
       AssertOnQuery(_.status.sourceStatuses(0).processingRate === 0.0),
       AssertOnQuery(_.status.sinkStatus.description.contains("Memory")),
-      AssertOnQuery(_.status.sinkStatus.offsetDesc === CompositeOffset(None :: Nil).toString),
+      AssertOnQuery(_.status.sinkStatus.offsetDesc === OffsetSeq(None :: Nil).toString),
       AssertOnQuery(_.sourceStatuses(0).description.contains("Memory")),
       AssertOnQuery(_.sourceStatuses(0).offsetDesc === "-"),
       AssertOnQuery(_.sourceStatuses(0).inputRate === 0.0),
       AssertOnQuery(_.sourceStatuses(0).processingRate === 0.0),
       AssertOnQuery(_.sinkStatus.description.contains("Memory")),
-      AssertOnQuery(_.sinkStatus.offsetDesc === new CompositeOffset(None :: Nil).toString),
+      AssertOnQuery(_.sinkStatus.offsetDesc === new OffsetSeq(None :: Nil).toString),
 
       AddData(inputData, 1, 2),
       CheckAnswer(6, 3),
@@ -139,38 +139,38 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
       AssertOnQuery(_.status.processingRate >= 0.0),
       AssertOnQuery(_.status.sourceStatuses.length === 1),
       AssertOnQuery(_.status.sourceStatuses(0).description.contains("Memory")),
-      AssertOnQuery(_.status.sourceStatuses(0).offsetDesc === LongOffset(0).toString),
+      AssertOnQuery(_.status.sourceStatuses(0).offsetDesc === LongOffset(0).json),
       AssertOnQuery(_.status.sourceStatuses(0).inputRate >= 0.0),
       AssertOnQuery(_.status.sourceStatuses(0).processingRate >= 0.0),
       AssertOnQuery(_.status.sinkStatus.description.contains("Memory")),
       AssertOnQuery(_.status.sinkStatus.offsetDesc ===
-        CompositeOffset.fill(LongOffset(0)).toString),
-      AssertOnQuery(_.sourceStatuses(0).offsetDesc === LongOffset(0).toString),
+        OffsetSeq.fill(LongOffset(0)).toString),
+      AssertOnQuery(_.sourceStatuses(0).offsetDesc === LongOffset(0).json),
       AssertOnQuery(_.sourceStatuses(0).inputRate >= 0.0),
       AssertOnQuery(_.sourceStatuses(0).processingRate >= 0.0),
-      AssertOnQuery(_.sinkStatus.offsetDesc === CompositeOffset.fill(LongOffset(0)).toString),
+      AssertOnQuery(_.sinkStatus.offsetDesc === OffsetSeq.fill(LongOffset(0)).toString),
 
       AddData(inputData, 1, 2),
       CheckAnswer(6, 3, 6, 3),
-      AssertOnQuery(_.status.sourceStatuses(0).offsetDesc === LongOffset(1).toString),
+      AssertOnQuery(_.status.sourceStatuses(0).offsetDesc === LongOffset(1).json),
       AssertOnQuery(_.status.sinkStatus.offsetDesc ===
-        CompositeOffset.fill(LongOffset(1)).toString),
-      AssertOnQuery(_.sourceStatuses(0).offsetDesc === LongOffset(1).toString),
-      AssertOnQuery(_.sinkStatus.offsetDesc === CompositeOffset.fill(LongOffset(1)).toString),
+        OffsetSeq.fill(LongOffset(1)).toString),
+      AssertOnQuery(_.sourceStatuses(0).offsetDesc === LongOffset(1).json),
+      AssertOnQuery(_.sinkStatus.offsetDesc === OffsetSeq.fill(LongOffset(1)).toString),
 
       StopStream,
       AssertOnQuery(_.status.inputRate === 0.0),
       AssertOnQuery(_.status.processingRate === 0.0),
       AssertOnQuery(_.status.sourceStatuses.length === 1),
-      AssertOnQuery(_.status.sourceStatuses(0).offsetDesc === LongOffset(1).toString),
+      AssertOnQuery(_.status.sourceStatuses(0).offsetDesc === LongOffset(1).json),
       AssertOnQuery(_.status.sourceStatuses(0).inputRate === 0.0),
       AssertOnQuery(_.status.sourceStatuses(0).processingRate === 0.0),
       AssertOnQuery(_.status.sinkStatus.offsetDesc ===
-        CompositeOffset.fill(LongOffset(1)).toString),
-      AssertOnQuery(_.sourceStatuses(0).offsetDesc === LongOffset(1).toString),
+        OffsetSeq.fill(LongOffset(1)).toString),
+      AssertOnQuery(_.sourceStatuses(0).offsetDesc === LongOffset(1).json),
       AssertOnQuery(_.sourceStatuses(0).inputRate === 0.0),
       AssertOnQuery(_.sourceStatuses(0).processingRate === 0.0),
-      AssertOnQuery(_.sinkStatus.offsetDesc === CompositeOffset.fill(LongOffset(1)).toString),
+      AssertOnQuery(_.sinkStatus.offsetDesc === OffsetSeq.fill(LongOffset(1)).toString),
       AssertOnQuery(_.status.triggerDetails.isEmpty),
 
       StartStream(),
@@ -179,15 +179,15 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
       AssertOnQuery(_.status.inputRate === 0.0),
       AssertOnQuery(_.status.processingRate === 0.0),
       AssertOnQuery(_.status.sourceStatuses.length === 1),
-      AssertOnQuery(_.status.sourceStatuses(0).offsetDesc === LongOffset(2).toString),
+      AssertOnQuery(_.status.sourceStatuses(0).offsetDesc === LongOffset(2).json),
       AssertOnQuery(_.status.sourceStatuses(0).inputRate === 0.0),
       AssertOnQuery(_.status.sourceStatuses(0).processingRate === 0.0),
       AssertOnQuery(_.status.sinkStatus.offsetDesc ===
-        CompositeOffset.fill(LongOffset(1)).toString),
-      AssertOnQuery(_.sourceStatuses(0).offsetDesc === LongOffset(2).toString),
+        OffsetSeq.fill(LongOffset(1)).toString),
+      AssertOnQuery(_.sourceStatuses(0).offsetDesc === LongOffset(2).json),
       AssertOnQuery(_.sourceStatuses(0).inputRate === 0.0),
       AssertOnQuery(_.sourceStatuses(0).processingRate === 0.0),
-      AssertOnQuery(_.sinkStatus.offsetDesc === CompositeOffset.fill(LongOffset(1)).toString)
+      AssertOnQuery(_.sinkStatus.offsetDesc === OffsetSeq.fill(LongOffset(1)).toString)
     )
   }
 

From 8c489a78d263bdd4ae2fb79de6fd00e21d124b69 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 10 Nov 2016 13:03:59 +0800
Subject: [PATCH 0081/1204] [SPARK-18147][SQL] do not fail for very complex
 aggregator result type

## What changes were proposed in this pull request?

~In `TypedAggregateExpression.evaluateExpression`, we may create `ReferenceToExpressions` with `CreateStruct`, and `CreateStruct` may generate too many codes and split them into several methods.  `ReferenceToExpressions` will replace `BoundReference` in `CreateStruct` with `LambdaVariable`, which can only be used as local variables and doesn't work if we split the generated code.~

It's already fixed by #15693 , this pr adds regression test

## How was this patch tested?

new test in `DatasetAggregatorSuite`

Author: Wenchen Fan <wenchen@databricks.com>

Closes #15807 from cloud-fan/typed-agg.

(cherry picked from commit 6021c95a3aa3858b0499782b23b08ef92c73245d)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/DatasetAggregatorSuite.scala    | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala
index b117fbd0bcf97..36b2651e5a9e8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala
@@ -134,6 +134,19 @@ object NullResultAgg extends Aggregator[AggData, AggData, AggData] {
   override def outputEncoder: Encoder[AggData] = Encoders.product[AggData]
 }
 
+case class ComplexAggData(d1: AggData, d2: AggData)
+
+object VeryComplexResultAgg extends Aggregator[Row, String, ComplexAggData] {
+  override def zero: String = ""
+  override def reduce(buffer: String, input: Row): String = buffer + input.getString(1)
+  override def merge(b1: String, b2: String): String = b1 + b2
+  override def finish(reduction: String): ComplexAggData = {
+    ComplexAggData(AggData(reduction.length, reduction), AggData(reduction.length, reduction))
+  }
+  override def bufferEncoder: Encoder[String] = Encoders.STRING
+  override def outputEncoder: Encoder[ComplexAggData] = Encoders.product[ComplexAggData]
+}
+
 
 class DatasetAggregatorSuite extends QueryTest with SharedSQLContext {
   import testImplicits._
@@ -312,4 +325,12 @@ class DatasetAggregatorSuite extends QueryTest with SharedSQLContext {
     val ds3 = sql("SELECT 'Some String' AS b, 1279869254 AS a").as[AggData]
     assert(ds3.select(NameAgg.toColumn).schema.head.nullable === true)
   }
+
+  test("SPARK-18147: very complex aggregator result type") {
+    val df = Seq(1 -> "a", 2 -> "b", 2 -> "c").toDF("i", "j")
+
+    checkAnswer(
+      df.groupBy($"i").agg(VeryComplexResultAgg.toColumn),
+      Row(1, Row(Row(1, "a"), Row(1, "a"))) :: Row(2, Row(Row(2, "bc"), Row(2, "bc"))) :: Nil)
+  }
 }

From b54d71b6f3e265b0af9fad30c0f1ea5d2baa1a94 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 10 Nov 2016 10:23:45 +0000
Subject: [PATCH 0082/1204] [MINOR][PYSPARK] Improve error message when running
 PySpark with different minor versions

## What changes were proposed in this pull request?

Currently the error message is correct but doesn't provide additional hint to new users. It would be better to hint related configuration to users in the message.

## How was this patch tested?

N/A because it only changes error message.

Please review https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark before opening a pull request.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #15822 from viirya/minor-pyspark-worker-errmsg.

(cherry picked from commit cc86fcd0d6746a9821c8082cf91dafad101e0a9c)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 python/pyspark/worker.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index cf47ab8f96c6d..09182829538fc 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -119,7 +119,9 @@ def main(infile, outfile):
         version = utf8_deserializer.loads(infile)
         if version != "%d.%d" % sys.version_info[:2]:
             raise Exception(("Python in worker has different version %s than that in " +
-                             "driver %s, PySpark cannot run with different minor versions") %
+                             "driver %s, PySpark cannot run with different minor versions." +
+                             "Please check environment variables PYSPARK_PYTHON and " +
+                             "PYSPARK_DRIVER_PYTHON are correctly set.") %
                             ("%d.%d" % sys.version_info[:2], version))
 
         # initialize global state

From 62236b9eb951f171d96e9d7f5f12d641a2da9a26 Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Thu, 10 Nov 2016 10:20:03 -0800
Subject: [PATCH 0083/1204] [SPARK-18262][BUILD][SQL] JSON.org license is now
 CatX

## What changes were proposed in this pull request?

Try excluding org.json:json from hive-exec dep as it's Cat X now. It may be the case that it's not used by the part of Hive Spark uses anyway.

## How was this patch tested?

Existing tests

Author: Sean Owen <sowen@cloudera.com>

Closes #15798 from srowen/SPARK-18262.

(cherry picked from commit 16eaad9daed0b633e6a714b5704509aa7107d6e5)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 NOTICE                         | 3 ---
 dev/deps/spark-deps-hadoop-2.2 | 1 -
 dev/deps/spark-deps-hadoop-2.3 | 1 -
 dev/deps/spark-deps-hadoop-2.4 | 1 -
 dev/deps/spark-deps-hadoop-2.6 | 1 -
 dev/deps/spark-deps-hadoop-2.7 | 1 -
 pom.xml                        | 5 +++++
 7 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/NOTICE b/NOTICE
index 69b513ea3ba3c..f4b64b5c3f470 100644
--- a/NOTICE
+++ b/NOTICE
@@ -421,9 +421,6 @@ Copyright (c) 2011, Terrence Parr.
 This product includes/uses ASM (http://asm.ow2.org/),
 Copyright (c) 2000-2007 INRIA, France Telecom.
 
-This product includes/uses org.json (http://www.json.org/java/index.html),
-Copyright (c) 2002 JSON.org
-
 This product includes/uses JLine (http://jline.sourceforge.net/),
 Copyright (c) 2002-2006, Marc Prud'hommeaux <mwp1@cornell.edu>.
 
diff --git a/dev/deps/spark-deps-hadoop-2.2 b/dev/deps/spark-deps-hadoop-2.2
index 99279a4ca8be9..6e749ac16cac0 100644
--- a/dev/deps/spark-deps-hadoop-2.2
+++ b/dev/deps/spark-deps-hadoop-2.2
@@ -103,7 +103,6 @@ jline-2.12.1.jar
 joda-time-2.9.3.jar
 jodd-core-3.5.2.jar
 jpam-1.1.jar
-json-20090211.jar
 json4s-ast_2.11-3.2.11.jar
 json4s-core_2.11-3.2.11.jar
 json4s-jackson_2.11-3.2.11.jar
diff --git a/dev/deps/spark-deps-hadoop-2.3 b/dev/deps/spark-deps-hadoop-2.3
index f094b4a7e167a..515995a0a46bd 100644
--- a/dev/deps/spark-deps-hadoop-2.3
+++ b/dev/deps/spark-deps-hadoop-2.3
@@ -108,7 +108,6 @@ jline-2.12.1.jar
 joda-time-2.9.3.jar
 jodd-core-3.5.2.jar
 jpam-1.1.jar
-json-20090211.jar
 json4s-ast_2.11-3.2.11.jar
 json4s-core_2.11-3.2.11.jar
 json4s-jackson_2.11-3.2.11.jar
diff --git a/dev/deps/spark-deps-hadoop-2.4 b/dev/deps/spark-deps-hadoop-2.4
index 7f0ef98680a15..d2139fd952406 100644
--- a/dev/deps/spark-deps-hadoop-2.4
+++ b/dev/deps/spark-deps-hadoop-2.4
@@ -108,7 +108,6 @@ jline-2.12.1.jar
 joda-time-2.9.3.jar
 jodd-core-3.5.2.jar
 jpam-1.1.jar
-json-20090211.jar
 json4s-ast_2.11-3.2.11.jar
 json4s-core_2.11-3.2.11.jar
 json4s-jackson_2.11-3.2.11.jar
diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6
index 4a27bf3deecb6..b5cecf72ec35f 100644
--- a/dev/deps/spark-deps-hadoop-2.6
+++ b/dev/deps/spark-deps-hadoop-2.6
@@ -116,7 +116,6 @@ jline-2.12.1.jar
 joda-time-2.9.3.jar
 jodd-core-3.5.2.jar
 jpam-1.1.jar
-json-20090211.jar
 json4s-ast_2.11-3.2.11.jar
 json4s-core_2.11-3.2.11.jar
 json4s-jackson_2.11-3.2.11.jar
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 151670a8e23e4..a5e03a78e7ea8 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -116,7 +116,6 @@ jline-2.12.1.jar
 joda-time-2.9.3.jar
 jodd-core-3.5.2.jar
 jpam-1.1.jar
-json-20090211.jar
 json4s-ast_2.11-3.2.11.jar
 json4s-core_2.11-3.2.11.jar
 json4s-jackson_2.11-3.2.11.jar
diff --git a/pom.xml b/pom.xml
index 04d2eaa1d3bac..8aa0a6c3caab9 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1429,6 +1429,11 @@
             <groupId>jline</groupId>
             <artifactId>jline</artifactId>
           </exclusion>
+          <!-- Cat X license now; see SPARK-18262 -->
+          <exclusion>
+            <groupId>org.json</groupId>
+            <artifactId>json</artifactId>
+          </exclusion>
         </exclusions>
       </dependency>
       <dependency>

From be3933ddfa3b6b6cf458c0fc4865a61fef40e76a Mon Sep 17 00:00:00 2001
From: Michael Allman <michael@videoamp.com>
Date: Thu, 10 Nov 2016 13:41:13 -0800
Subject: [PATCH 0084/1204] [SPARK-17993][SQL] Fix Parquet log output
 redirection
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

(Link to Jira issue: https://issues.apache.org/jira/browse/SPARK-17993)
## What changes were proposed in this pull request?

PR #14690 broke parquet log output redirection for converted partitioned Hive tables. For example, when querying parquet files written by Parquet-mr 1.6.0 Spark prints a torrent of (harmless) warning messages from the Parquet reader:

```
Oct 18, 2016 7:42:18 PM WARNING: org.apache.parquet.CorruptStatistics: Ignoring statistics because created_by could not be parsed (see PARQUET-251): parquet-mr version 1.6.0
org.apache.parquet.VersionParser$VersionParseException: Could not parse created_by: parquet-mr version 1.6.0 using format: (.+) version ((.*) )?\(build ?(.*)\)
    at org.apache.parquet.VersionParser.parse(VersionParser.java:112)
    at org.apache.parquet.CorruptStatistics.shouldIgnoreStatistics(CorruptStatistics.java:60)
    at org.apache.parquet.format.converter.ParquetMetadataConverter.fromParquetStatistics(ParquetMetadataConverter.java:263)
    at org.apache.parquet.hadoop.ParquetFileReader$Chunk.readAllPages(ParquetFileReader.java:583)
    at org.apache.parquet.hadoop.ParquetFileReader.readNextRowGroup(ParquetFileReader.java:513)
    at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.checkEndOfRowGroup(VectorizedParquetRecordReader.java:270)
    at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextBatch(VectorizedParquetRecordReader.java:225)
    at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextKeyValue(VectorizedParquetRecordReader.java:137)
    at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39)
    at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:102)
    at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:162)
    at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:102)
    at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.scan_nextBatch$(Unknown Source)
    at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source)
    at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
    at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:372)
    at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:231)
    at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:225)
    at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:803)
    at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:803)
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
    at org.apache.spark.scheduler.Task.run(Task.scala:99)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
    at java.lang.Thread.run(Thread.java:745)
```

This only happens during execution, not planning, and it doesn't matter what log level the `SparkContext` is set to. That's because Parquet (versions < 1.9) doesn't use slf4j for logging. Note, you can tell that log redirection is not working here because the log message format does not conform to the default Spark log message format.

This is a regression I noted as something we needed to fix as a follow up.

It appears that the problem arose because we removed the call to `inferSchema` during Hive table conversion. That call is what triggered the output redirection.

## How was this patch tested?

I tested this manually in four ways:
1. Executing `spark.sqlContext.range(10).selectExpr("id as a").write.mode("overwrite").parquet("test")`.
2. Executing `spark.read.format("parquet").load(legacyParquetFile).show` for a Parquet file `legacyParquetFile` written using Parquet-mr 1.6.0.
3. Executing `select * from legacy_parquet_table limit 1` for some unpartitioned Parquet-based Hive table written using Parquet-mr 1.6.0.
4. Executing `select * from legacy_partitioned_parquet_table where partcol=x limit 1` for some partitioned Parquet-based Hive table written using Parquet-mr 1.6.0.

I ran each test with a new instance of `spark-shell` or `spark-sql`.

Incidentally, I found that test case 3 was not a regression—redirection was not occurring in the master codebase prior to #14690.

I spent some time working on a unit test, but based on my experience working on this ticket I feel that automated testing here is far from feasible.

cc ericl dongjoon-hyun

Author: Michael Allman <michael@videoamp.com>

Closes #15538 from mallman/spark-17993-fix_parquet_log_redirection.

(cherry picked from commit b533fa2b205544b42dcebe0a6fee9d8275f6da7d)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../parquet/ParquetLogRedirector.java         | 72 +++++++++++++++++++
 .../parquet/ParquetFileFormat.scala           | 58 ++++-----------
 sql/core/src/test/resources/log4j.properties  |  4 +-
 sql/hive/src/test/resources/log4j.properties  |  4 ++
 4 files changed, 90 insertions(+), 48 deletions(-)
 create mode 100644 sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetLogRedirector.java

diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetLogRedirector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetLogRedirector.java
new file mode 100644
index 0000000000000..7a7f32ee1e87b
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetLogRedirector.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.parquet;
+
+import java.io.Serializable;
+import java.util.logging.Handler;
+import java.util.logging.Logger;
+
+import org.apache.parquet.Log;
+import org.slf4j.bridge.SLF4JBridgeHandler;
+
+// Redirects the JUL logging for parquet-mr versions <= 1.8 to SLF4J logging using
+// SLF4JBridgeHandler. Parquet-mr versions >= 1.9 use SLF4J directly
+final class ParquetLogRedirector implements Serializable {
+  // Client classes should hold a reference to INSTANCE to ensure redirection occurs. This is
+  // especially important for Serializable classes where fields are set but constructors are
+  // ignored
+  static final ParquetLogRedirector INSTANCE = new ParquetLogRedirector();
+
+  // JUL loggers must be held by a strong reference, otherwise they may get destroyed by GC.
+  // However, the root JUL logger used by Parquet isn't properly referenced.  Here we keep
+  // references to loggers in both parquet-mr <= 1.6 and 1.7/1.8
+  private static final Logger apacheParquetLogger =
+    Logger.getLogger(Log.class.getPackage().getName());
+  private static final Logger parquetLogger = Logger.getLogger("parquet");
+
+  static {
+    // For parquet-mr 1.7 and 1.8, which are under `org.apache.parquet` namespace.
+    try {
+      Class.forName(Log.class.getName());
+      redirect(Logger.getLogger(Log.class.getPackage().getName()));
+    } catch (ClassNotFoundException ex) {
+      throw new RuntimeException(ex);
+    }
+
+    // For parquet-mr 1.6.0 and lower versions bundled with Hive, which are under `parquet`
+    // namespace.
+    try {
+      Class.forName("parquet.Log");
+      redirect(Logger.getLogger("parquet"));
+    } catch (Throwable t) {
+      // SPARK-9974: com.twitter:parquet-hadoop-bundle:1.6.0 is not packaged into the assembly
+      // when Spark is built with SBT. So `parquet.Log` may not be found.  This try/catch block
+      // should be removed after this issue is fixed.
+    }
+  }
+
+  private ParquetLogRedirector() {
+  }
+
+  private static void redirect(Logger logger) {
+    for (Handler handler : logger.getHandlers()) {
+      logger.removeHandler(handler);
+    }
+    logger.setUseParentHandlers(false);
+    logger.addHandler(new SLF4JBridgeHandler());
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index b8ea7f40c4ab3..031a0fe57893f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.execution.datasources.parquet
 
 import java.net.URI
-import java.util.logging.{Logger => JLogger}
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
@@ -29,14 +28,12 @@ import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.hadoop.mapreduce._
 import org.apache.hadoop.mapreduce.lib.input.FileSplit
 import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
-import org.apache.parquet.{Log => ApacheParquetLog}
 import org.apache.parquet.filter2.compat.FilterCompat
 import org.apache.parquet.filter2.predicate.FilterApi
 import org.apache.parquet.hadoop._
 import org.apache.parquet.hadoop.codec.CodecConfig
 import org.apache.parquet.hadoop.util.ContextUtil
 import org.apache.parquet.schema.MessageType
-import org.slf4j.bridge.SLF4JBridgeHandler
 
 import org.apache.spark.{SparkException, TaskContext}
 import org.apache.spark.internal.Logging
@@ -56,6 +53,11 @@ class ParquetFileFormat
   with DataSourceRegister
   with Logging
   with Serializable {
+  // Hold a reference to the (serializable) singleton instance of ParquetLogRedirector. This
+  // ensures the ParquetLogRedirector class is initialized whether an instance of ParquetFileFormat
+  // is constructed or deserialized. Do not heed the Scala compiler's warning about an unused field
+  // here.
+  private val parquetLogRedirector = ParquetLogRedirector.INSTANCE
 
   override def shortName(): String = "parquet"
 
@@ -129,10 +131,14 @@ class ParquetFileFormat
       conf.setBoolean(ParquetOutputFormat.ENABLE_JOB_SUMMARY, false)
     }
 
-    ParquetFileFormat.redirectParquetLogs()
-
     new OutputWriterFactory {
-      override def newInstance(
+      // This OutputWriterFactory instance is deserialized when writing Parquet files on the
+      // executor side without constructing or deserializing ParquetFileFormat. Therefore, we hold
+      // another reference to ParquetLogRedirector.INSTANCE here to ensure the latter class is
+      // initialized.
+      private val parquetLogRedirector = ParquetLogRedirector.INSTANCE
+
+        override def newInstance(
           path: String,
           dataSchema: StructType,
           context: TaskAttemptContext): OutputWriter = {
@@ -673,44 +679,4 @@ object ParquetFileFormat extends Logging {
         Failure(cause)
     }.toOption
   }
-
-  // JUL loggers must be held by a strong reference, otherwise they may get destroyed by GC.
-  // However, the root JUL logger used by Parquet isn't properly referenced.  Here we keep
-  // references to loggers in both parquet-mr <= 1.6 and >= 1.7
-  val apacheParquetLogger: JLogger = JLogger.getLogger(classOf[ApacheParquetLog].getPackage.getName)
-  val parquetLogger: JLogger = JLogger.getLogger("parquet")
-
-  // Parquet initializes its own JUL logger in a static block which always prints to stdout.  Here
-  // we redirect the JUL logger via SLF4J JUL bridge handler.
-  val redirectParquetLogsViaSLF4J: Unit = {
-    def redirect(logger: JLogger): Unit = {
-      logger.getHandlers.foreach(logger.removeHandler)
-      logger.setUseParentHandlers(false)
-      logger.addHandler(new SLF4JBridgeHandler)
-    }
-
-    // For parquet-mr 1.7.0 and above versions, which are under `org.apache.parquet` namespace.
-    // scalastyle:off classforname
-    Class.forName(classOf[ApacheParquetLog].getName)
-    // scalastyle:on classforname
-    redirect(JLogger.getLogger(classOf[ApacheParquetLog].getPackage.getName))
-
-    // For parquet-mr 1.6.0 and lower versions bundled with Hive, which are under `parquet`
-    // namespace.
-    try {
-      // scalastyle:off classforname
-      Class.forName("parquet.Log")
-      // scalastyle:on classforname
-      redirect(JLogger.getLogger("parquet"))
-    } catch { case _: Throwable =>
-      // SPARK-9974: com.twitter:parquet-hadoop-bundle:1.6.0 is not packaged into the assembly
-      // when Spark is built with SBT. So `parquet.Log` may not be found.  This try/catch block
-      // should be removed after this issue is fixed.
-    }
-  }
-
-  /**
-   * ParquetFileFormat.prepareWrite calls this function to initialize `redirectParquetLogsViaSLF4J`.
-   */
-  def redirectParquetLogs(): Unit = {}
 }
diff --git a/sql/core/src/test/resources/log4j.properties b/sql/core/src/test/resources/log4j.properties
index 33b9ecf1e2826..25b817382195a 100644
--- a/sql/core/src/test/resources/log4j.properties
+++ b/sql/core/src/test/resources/log4j.properties
@@ -53,5 +53,5 @@ log4j.additivity.hive.ql.metadata.Hive=false
 log4j.logger.hive.ql.metadata.Hive=OFF
 
 # Parquet related logging
-log4j.logger.org.apache.parquet.hadoop=WARN
-log4j.logger.org.apache.spark.sql.parquet=INFO
+log4j.logger.org.apache.parquet=ERROR
+log4j.logger.parquet=ERROR
diff --git a/sql/hive/src/test/resources/log4j.properties b/sql/hive/src/test/resources/log4j.properties
index fea3404769d9d..072bb25d30a87 100644
--- a/sql/hive/src/test/resources/log4j.properties
+++ b/sql/hive/src/test/resources/log4j.properties
@@ -59,3 +59,7 @@ log4j.logger.hive.ql.metadata.Hive=OFF
 
 log4j.additivity.org.apache.hadoop.hive.ql.io.RCFile=false
 log4j.logger.org.apache.hadoop.hive.ql.io.RCFile=ERROR
+
+# Parquet related logging
+log4j.logger.org.apache.parquet=ERROR
+log4j.logger.parquet=ERROR

From c602894f25bf9e61b759815674008471858cc71e Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 10 Nov 2016 13:42:48 -0800
Subject: [PATCH 0085/1204] [SPARK-17990][SPARK-18302][SQL] correct several
 partition related behaviours of ExternalCatalog

## What changes were proposed in this pull request?

This PR corrects several partition related behaviors of `ExternalCatalog`:

1. default partition location should not always lower case the partition column names in path string(fix `HiveExternalCatalog`)
2. rename partition should not always lower case the partition column names in updated partition path string(fix `HiveExternalCatalog`)
3. rename partition should update the partition location only for managed table(fix `InMemoryCatalog`)
4. create partition with existing directory should be fine(fix `InMemoryCatalog`)
5. create partition with non-existing directory should create that directory(fix `InMemoryCatalog`)
6. drop partition from external table should not delete the directory(fix `InMemoryCatalog`)

## How was this patch tested?

new tests in `ExternalCatalogSuite`

Author: Wenchen Fan <wenchen@databricks.com>

Closes #15797 from cloud-fan/partition.

(cherry picked from commit 2f7461f31331cfc37f6cfa3586b7bbefb3af5547)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../catalog/ExternalCatalogUtils.scala        | 121 ++++++++++++++
 .../catalyst/catalog/InMemoryCatalog.scala    |  92 +++++------
 .../sql/catalyst/catalog/interface.scala      |  11 ++
 .../catalog/ExternalCatalogSuite.scala        | 150 ++++++++++++++----
 .../catalog/SessionCatalogSuite.scala         |  24 ++-
 .../spark/sql/execution/command/ddl.scala     |   8 +-
 .../spark/sql/execution/command/tables.scala  |   3 +-
 .../datasources/CatalogFileIndex.scala        |   2 +-
 .../datasources/DataSourceStrategy.scala      |   2 +-
 .../datasources/FileFormatWriter.scala        |   6 +-
 .../PartitioningAwareFileIndex.scala          |   2 -
 .../datasources/PartitioningUtils.scala       |  94 +----------
 .../sql/execution/command/DDLSuite.scala      |   8 +-
 .../ParquetPartitionDiscoverySuite.scala      |  21 +--
 .../spark/sql/hive/HiveExternalCatalog.scala  |  51 +++++-
 .../spark/sql/hive/HiveSparkSubmitSuite.scala |   4 +-
 .../spark/sql/hive/MultiDatabaseSuite.scala   |   2 +-
 .../sql/hive/execution/HiveDDLSuite.scala     |   2 +-
 .../sql/hive/execution/SQLQuerySuite.scala    |   2 +-
 19 files changed, 397 insertions(+), 208 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
new file mode 100644
index 0000000000000..b1442eec164d8
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.catalog
+
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.util.Shell
+
+import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+
+object ExternalCatalogUtils {
+  // This duplicates default value of Hive `ConfVars.DEFAULTPARTITIONNAME`, since catalyst doesn't
+  // depend on Hive.
+  val DEFAULT_PARTITION_NAME = "__HIVE_DEFAULT_PARTITION__"
+
+  //////////////////////////////////////////////////////////////////////////////////////////////////
+  // The following string escaping code is mainly copied from Hive (o.a.h.h.common.FileUtils).
+  //////////////////////////////////////////////////////////////////////////////////////////////////
+
+  val charToEscape = {
+    val bitSet = new java.util.BitSet(128)
+
+    /**
+     * ASCII 01-1F are HTTP control characters that need to be escaped.
+     * \u000A and \u000D are \n and \r, respectively.
+     */
+    val clist = Array(
+      '\u0001', '\u0002', '\u0003', '\u0004', '\u0005', '\u0006', '\u0007', '\u0008', '\u0009',
+      '\n', '\u000B', '\u000C', '\r', '\u000E', '\u000F', '\u0010', '\u0011', '\u0012', '\u0013',
+      '\u0014', '\u0015', '\u0016', '\u0017', '\u0018', '\u0019', '\u001A', '\u001B', '\u001C',
+      '\u001D', '\u001E', '\u001F', '"', '#', '%', '\'', '*', '/', ':', '=', '?', '\\', '\u007F',
+      '{', '[', ']', '^')
+
+    clist.foreach(bitSet.set(_))
+
+    if (Shell.WINDOWS) {
+      Array(' ', '<', '>', '|').foreach(bitSet.set(_))
+    }
+
+    bitSet
+  }
+
+  def needsEscaping(c: Char): Boolean = {
+    c >= 0 && c < charToEscape.size() && charToEscape.get(c)
+  }
+
+  def escapePathName(path: String): String = {
+    val builder = new StringBuilder()
+    path.foreach { c =>
+      if (needsEscaping(c)) {
+        builder.append('%')
+        builder.append(f"${c.asInstanceOf[Int]}%02X")
+      } else {
+        builder.append(c)
+      }
+    }
+
+    builder.toString()
+  }
+
+
+  def unescapePathName(path: String): String = {
+    val sb = new StringBuilder
+    var i = 0
+
+    while (i < path.length) {
+      val c = path.charAt(i)
+      if (c == '%' && i + 2 < path.length) {
+        val code: Int = try {
+          Integer.parseInt(path.substring(i + 1, i + 3), 16)
+        } catch {
+          case _: Exception => -1
+        }
+        if (code >= 0) {
+          sb.append(code.asInstanceOf[Char])
+          i += 3
+        } else {
+          sb.append(c)
+          i += 1
+        }
+      } else {
+        sb.append(c)
+        i += 1
+      }
+    }
+
+    sb.toString()
+  }
+
+  def generatePartitionPath(
+      spec: TablePartitionSpec,
+      partitionColumnNames: Seq[String],
+      tablePath: Path): Path = {
+    val partitionPathStrings = partitionColumnNames.map { col =>
+      val partitionValue = spec(col)
+      val partitionString = if (partitionValue == null) {
+        DEFAULT_PARTITION_NAME
+      } else {
+        escapePathName(partitionValue)
+      }
+      escapePathName(col) + "=" + partitionString
+    }
+    partitionPathStrings.foldLeft(tablePath) { (totalPath, nextPartPath) =>
+      new Path(totalPath, nextPartPath)
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
index 20db81e6f9060..a3ffeaa63f690 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
@@ -231,7 +231,7 @@ class InMemoryCatalog(
         assert(tableMeta.storage.locationUri.isDefined,
           "Managed table should always have table location, as we will assign a default location " +
             "to it if it doesn't have one.")
-        val dir = new Path(tableMeta.storage.locationUri.get)
+        val dir = new Path(tableMeta.location)
         try {
           val fs = dir.getFileSystem(hadoopConfig)
           fs.delete(dir, true)
@@ -259,7 +259,7 @@ class InMemoryCatalog(
       assert(oldDesc.table.storage.locationUri.isDefined,
         "Managed table should always have table location, as we will assign a default location " +
           "to it if it doesn't have one.")
-      val oldDir = new Path(oldDesc.table.storage.locationUri.get)
+      val oldDir = new Path(oldDesc.table.location)
       val newDir = new Path(catalog(db).db.locationUri, newName)
       try {
         val fs = oldDir.getFileSystem(hadoopConfig)
@@ -355,25 +355,28 @@ class InMemoryCatalog(
       }
     }
 
-    val tableDir = new Path(catalog(db).db.locationUri, table)
-    val partitionColumnNames = getTable(db, table).partitionColumnNames
+    val tableMeta = getTable(db, table)
+    val partitionColumnNames = tableMeta.partitionColumnNames
+    val tablePath = new Path(tableMeta.location)
     // TODO: we should follow hive to roll back if one partition path failed to create.
     parts.foreach { p =>
-      // If location is set, the partition is using an external partition location and we don't
-      // need to handle its directory.
-      if (p.storage.locationUri.isEmpty) {
-        val partitionPath = partitionColumnNames.flatMap { col =>
-          p.spec.get(col).map(col + "=" + _)
-        }.mkString("/")
-        try {
-          val fs = tableDir.getFileSystem(hadoopConfig)
-          fs.mkdirs(new Path(tableDir, partitionPath))
-        } catch {
-          case e: IOException =>
-            throw new SparkException(s"Unable to create partition path $partitionPath", e)
+      val partitionPath = p.storage.locationUri.map(new Path(_)).getOrElse {
+        ExternalCatalogUtils.generatePartitionPath(p.spec, partitionColumnNames, tablePath)
+      }
+
+      try {
+        val fs = tablePath.getFileSystem(hadoopConfig)
+        if (!fs.exists(partitionPath)) {
+          fs.mkdirs(partitionPath)
         }
+      } catch {
+        case e: IOException =>
+          throw new SparkException(s"Unable to create partition path $partitionPath", e)
       }
-      existingParts.put(p.spec, p)
+
+      existingParts.put(
+        p.spec,
+        p.copy(storage = p.storage.copy(locationUri = Some(partitionPath.toString))))
     }
   }
 
@@ -392,19 +395,15 @@ class InMemoryCatalog(
       }
     }
 
-    val tableDir = new Path(catalog(db).db.locationUri, table)
-    val partitionColumnNames = getTable(db, table).partitionColumnNames
-    // TODO: we should follow hive to roll back if one partition path failed to delete.
+    val shouldRemovePartitionLocation = getTable(db, table).tableType == CatalogTableType.MANAGED
+    // TODO: we should follow hive to roll back if one partition path failed to delete, and support
+    // partial partition spec.
     partSpecs.foreach { p =>
-      // If location is set, the partition is using an external partition location and we don't
-      // need to handle its directory.
-      if (existingParts.contains(p) && existingParts(p).storage.locationUri.isEmpty) {
-        val partitionPath = partitionColumnNames.flatMap { col =>
-          p.get(col).map(col + "=" + _)
-        }.mkString("/")
+      if (existingParts.contains(p) && shouldRemovePartitionLocation) {
+        val partitionPath = new Path(existingParts(p).location)
         try {
-          val fs = tableDir.getFileSystem(hadoopConfig)
-          fs.delete(new Path(tableDir, partitionPath), true)
+          val fs = partitionPath.getFileSystem(hadoopConfig)
+          fs.delete(partitionPath, true)
         } catch {
           case e: IOException =>
             throw new SparkException(s"Unable to delete partition path $partitionPath", e)
@@ -423,33 +422,34 @@ class InMemoryCatalog(
     requirePartitionsExist(db, table, specs)
     requirePartitionsNotExist(db, table, newSpecs)
 
-    val tableDir = new Path(catalog(db).db.locationUri, table)
-    val partitionColumnNames = getTable(db, table).partitionColumnNames
+    val tableMeta = getTable(db, table)
+    val partitionColumnNames = tableMeta.partitionColumnNames
+    val tablePath = new Path(tableMeta.location)
+    val shouldUpdatePartitionLocation = getTable(db, table).tableType == CatalogTableType.MANAGED
+    val existingParts = catalog(db).tables(table).partitions
     // TODO: we should follow hive to roll back if one partition path failed to rename.
     specs.zip(newSpecs).foreach { case (oldSpec, newSpec) =>
-      val newPart = getPartition(db, table, oldSpec).copy(spec = newSpec)
-      val existingParts = catalog(db).tables(table).partitions
-
-      // If location is set, the partition is using an external partition location and we don't
-      // need to handle its directory.
-      if (newPart.storage.locationUri.isEmpty) {
-        val oldPath = partitionColumnNames.flatMap { col =>
-          oldSpec.get(col).map(col + "=" + _)
-        }.mkString("/")
-        val newPath = partitionColumnNames.flatMap { col =>
-          newSpec.get(col).map(col + "=" + _)
-        }.mkString("/")
+      val oldPartition = getPartition(db, table, oldSpec)
+      val newPartition = if (shouldUpdatePartitionLocation) {
+        val oldPartPath = new Path(oldPartition.location)
+        val newPartPath = ExternalCatalogUtils.generatePartitionPath(
+          newSpec, partitionColumnNames, tablePath)
         try {
-          val fs = tableDir.getFileSystem(hadoopConfig)
-          fs.rename(new Path(tableDir, oldPath), new Path(tableDir, newPath))
+          val fs = tablePath.getFileSystem(hadoopConfig)
+          fs.rename(oldPartPath, newPartPath)
         } catch {
           case e: IOException =>
-            throw new SparkException(s"Unable to rename partition path $oldPath", e)
+            throw new SparkException(s"Unable to rename partition path $oldPartPath", e)
         }
+        oldPartition.copy(
+          spec = newSpec,
+          storage = oldPartition.storage.copy(locationUri = Some(newPartPath.toString)))
+      } else {
+        oldPartition.copy(spec = newSpec)
       }
 
       existingParts.remove(oldSpec)
-      existingParts.put(newSpec, newPart)
+      existingParts.put(newSpec, newPartition)
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 34748a04859ad..93c70de18ae7e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -99,6 +99,12 @@ case class CatalogTablePartition(
     output.filter(_.nonEmpty).mkString("CatalogPartition(\n\t", "\n\t", ")")
   }
 
+  /** Return the partition location, assuming it is specified. */
+  def location: String = storage.locationUri.getOrElse {
+    val specString = spec.map { case (k, v) => s"$k=$v" }.mkString(", ")
+    throw new AnalysisException(s"Partition [$specString] did not specify locationUri")
+  }
+
   /**
    * Given the partition schema, returns a row with that schema holding the partition values.
    */
@@ -171,6 +177,11 @@ case class CatalogTable(
     throw new AnalysisException(s"table $identifier did not specify database")
   }
 
+  /** Return the table location, assuming it is specified. */
+  def location: String = storage.locationUri.getOrElse {
+    throw new AnalysisException(s"table $identifier did not specify locationUri")
+  }
+
   /** Return the fully qualified name of this table, assuming the database was specified. */
   def qualifiedName: String = identifier.unquotedString
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
index 34bdfc8a98710..303a8662d3f4d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
@@ -17,9 +17,8 @@
 
 package org.apache.spark.sql.catalyst.catalog
 
-import java.io.File
-import java.net.URI
-
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
 import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark.SparkFunSuite
@@ -320,6 +319,33 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
     catalog.createPartitions("db2", "tbl2", Seq(part1), ignoreIfExists = true)
   }
 
+  test("create partitions without location") {
+    val catalog = newBasicCatalog()
+    val table = CatalogTable(
+      identifier = TableIdentifier("tbl", Some("db1")),
+      tableType = CatalogTableType.MANAGED,
+      storage = CatalogStorageFormat(None, None, None, None, false, Map.empty),
+      schema = new StructType()
+        .add("col1", "int")
+        .add("col2", "string")
+        .add("partCol1", "int")
+        .add("partCol2", "string"),
+      provider = Some("hive"),
+      partitionColumnNames = Seq("partCol1", "partCol2"))
+    catalog.createTable(table, ignoreIfExists = false)
+
+    val partition = CatalogTablePartition(Map("partCol1" -> "1", "partCol2" -> "2"), storageFormat)
+    catalog.createPartitions("db1", "tbl", Seq(partition), ignoreIfExists = false)
+
+    val partitionLocation = catalog.getPartition(
+      "db1",
+      "tbl",
+      Map("partCol1" -> "1", "partCol2" -> "2")).location
+    val tableLocation = catalog.getTable("db1", "tbl").location
+    val defaultPartitionLocation = new Path(new Path(tableLocation, "partCol1=1"), "partCol2=2")
+    assert(new Path(partitionLocation) == defaultPartitionLocation)
+  }
+
   test("list partitions with partial partition spec") {
     val catalog = newBasicCatalog()
     val parts = catalog.listPartitions("db2", "tbl2", Some(Map("a" -> "1")))
@@ -399,6 +425,46 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
     intercept[AnalysisException] { catalog.getPartition("db2", "tbl2", part2.spec) }
   }
 
+  test("rename partitions should update the location for managed table") {
+    val catalog = newBasicCatalog()
+    val table = CatalogTable(
+      identifier = TableIdentifier("tbl", Some("db1")),
+      tableType = CatalogTableType.MANAGED,
+      storage = CatalogStorageFormat(None, None, None, None, false, Map.empty),
+      schema = new StructType()
+        .add("col1", "int")
+        .add("col2", "string")
+        .add("partCol1", "int")
+        .add("partCol2", "string"),
+      provider = Some("hive"),
+      partitionColumnNames = Seq("partCol1", "partCol2"))
+    catalog.createTable(table, ignoreIfExists = false)
+
+    val tableLocation = catalog.getTable("db1", "tbl").location
+
+    val mixedCasePart1 = CatalogTablePartition(
+      Map("partCol1" -> "1", "partCol2" -> "2"), storageFormat)
+    val mixedCasePart2 = CatalogTablePartition(
+      Map("partCol1" -> "3", "partCol2" -> "4"), storageFormat)
+
+    catalog.createPartitions("db1", "tbl", Seq(mixedCasePart1), ignoreIfExists = false)
+    assert(
+      new Path(catalog.getPartition("db1", "tbl", mixedCasePart1.spec).location) ==
+        new Path(new Path(tableLocation, "partCol1=1"), "partCol2=2"))
+
+    catalog.renamePartitions("db1", "tbl", Seq(mixedCasePart1.spec), Seq(mixedCasePart2.spec))
+    assert(
+      new Path(catalog.getPartition("db1", "tbl", mixedCasePart2.spec).location) ==
+        new Path(new Path(tableLocation, "partCol1=3"), "partCol2=4"))
+
+    // For external tables, RENAME PARTITION should not update the partition location.
+    val existingPartLoc = catalog.getPartition("db2", "tbl2", part1.spec).location
+    catalog.renamePartitions("db2", "tbl2", Seq(part1.spec), Seq(part3.spec))
+    assert(
+      new Path(catalog.getPartition("db2", "tbl2", part3.spec).location) ==
+        new Path(existingPartLoc))
+  }
+
   test("rename partitions when database/table does not exist") {
     val catalog = newBasicCatalog()
     intercept[AnalysisException] {
@@ -419,11 +485,6 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
   test("alter partitions") {
     val catalog = newBasicCatalog()
     try {
-      // Note: Before altering table partitions in Hive, you *must* set the current database
-      // to the one that contains the table of interest. Otherwise you will end up with the
-      // most helpful error message ever: "Unable to alter partition. alter is not possible."
-      // See HIVE-2742 for more detail.
-      catalog.setCurrentDatabase("db2")
       val newLocation = newUriForDatabase()
       val newSerde = "com.sparkbricks.text.EasySerde"
       val newSerdeProps = Map("spark" -> "bricks", "compressed" -> "false")
@@ -571,10 +632,11 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
   // --------------------------------------------------------------------------
 
   private def exists(uri: String, children: String*): Boolean = {
-    val base = new File(new URI(uri))
-    children.foldLeft(base) {
-      case (parent, child) => new File(parent, child)
-    }.exists()
+    val base = new Path(uri)
+    val finalPath = children.foldLeft(base) {
+      case (parent, child) => new Path(parent, child)
+    }
+    base.getFileSystem(new Configuration()).exists(finalPath)
   }
 
   test("create/drop database should create/delete the directory") {
@@ -623,7 +685,6 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
 
   test("create/drop/rename partitions should create/delete/rename the directory") {
     val catalog = newBasicCatalog()
-    val databaseDir = catalog.getDatabase("db1").locationUri
     val table = CatalogTable(
       identifier = TableIdentifier("tbl", Some("db1")),
       tableType = CatalogTableType.MANAGED,
@@ -631,34 +692,61 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
       schema = new StructType()
         .add("col1", "int")
         .add("col2", "string")
-        .add("a", "int")
-        .add("b", "string"),
+        .add("partCol1", "int")
+        .add("partCol2", "string"),
       provider = Some("hive"),
-      partitionColumnNames = Seq("a", "b")
-    )
+      partitionColumnNames = Seq("partCol1", "partCol2"))
     catalog.createTable(table, ignoreIfExists = false)
 
+    val tableLocation = catalog.getTable("db1", "tbl").location
+
+    val part1 = CatalogTablePartition(Map("partCol1" -> "1", "partCol2" -> "2"), storageFormat)
+    val part2 = CatalogTablePartition(Map("partCol1" -> "3", "partCol2" -> "4"), storageFormat)
+    val part3 = CatalogTablePartition(Map("partCol1" -> "5", "partCol2" -> "6"), storageFormat)
+
     catalog.createPartitions("db1", "tbl", Seq(part1, part2), ignoreIfExists = false)
-    assert(exists(databaseDir, "tbl", "a=1", "b=2"))
-    assert(exists(databaseDir, "tbl", "a=3", "b=4"))
+    assert(exists(tableLocation, "partCol1=1", "partCol2=2"))
+    assert(exists(tableLocation, "partCol1=3", "partCol2=4"))
 
     catalog.renamePartitions("db1", "tbl", Seq(part1.spec), Seq(part3.spec))
-    assert(!exists(databaseDir, "tbl", "a=1", "b=2"))
-    assert(exists(databaseDir, "tbl", "a=5", "b=6"))
+    assert(!exists(tableLocation, "partCol1=1", "partCol2=2"))
+    assert(exists(tableLocation, "partCol1=5", "partCol2=6"))
 
     catalog.dropPartitions("db1", "tbl", Seq(part2.spec, part3.spec), ignoreIfNotExists = false,
       purge = false)
-    assert(!exists(databaseDir, "tbl", "a=3", "b=4"))
-    assert(!exists(databaseDir, "tbl", "a=5", "b=6"))
+    assert(!exists(tableLocation, "partCol1=3", "partCol2=4"))
+    assert(!exists(tableLocation, "partCol1=5", "partCol2=6"))
 
-    val externalPartition = CatalogTablePartition(
-      Map("a" -> "7", "b" -> "8"),
+    val tempPath = Utils.createTempDir()
+    // create partition with existing directory is OK.
+    val partWithExistingDir = CatalogTablePartition(
+      Map("partCol1" -> "7", "partCol2" -> "8"),
       CatalogStorageFormat(
-        Some(Utils.createTempDir().getAbsolutePath),
-        None, None, None, false, Map.empty)
-    )
-    catalog.createPartitions("db1", "tbl", Seq(externalPartition), ignoreIfExists = false)
-    assert(!exists(databaseDir, "tbl", "a=7", "b=8"))
+        Some(tempPath.getAbsolutePath),
+        None, None, None, false, Map.empty))
+    catalog.createPartitions("db1", "tbl", Seq(partWithExistingDir), ignoreIfExists = false)
+
+    tempPath.delete()
+    // create partition with non-existing directory will create that directory.
+    val partWithNonExistingDir = CatalogTablePartition(
+      Map("partCol1" -> "9", "partCol2" -> "10"),
+      CatalogStorageFormat(
+        Some(tempPath.getAbsolutePath),
+        None, None, None, false, Map.empty))
+    catalog.createPartitions("db1", "tbl", Seq(partWithNonExistingDir), ignoreIfExists = false)
+    assert(tempPath.exists())
+  }
+
+  test("drop partition from external table should not delete the directory") {
+    val catalog = newBasicCatalog()
+    catalog.createPartitions("db2", "tbl1", Seq(part1), ignoreIfExists = false)
+
+    val partPath = new Path(catalog.getPartition("db2", "tbl1", part1.spec).location)
+    val fs = partPath.getFileSystem(new Configuration)
+    assert(fs.exists(partPath))
+
+    catalog.dropPartitions("db2", "tbl1", Seq(part1.spec), ignoreIfNotExists = false, purge = false)
+    assert(fs.exists(partPath))
   }
 }
 
@@ -731,7 +819,7 @@ abstract class CatalogTestUtils {
     CatalogTable(
       identifier = TableIdentifier(name, database),
       tableType = CatalogTableType.EXTERNAL,
-      storage = storageFormat,
+      storage = storageFormat.copy(locationUri = Some(Utils.createTempDir().getAbsolutePath)),
       schema = new StructType()
         .add("col1", "int")
         .add("col2", "string")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index 001d9c47785d2..52385de50db6b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -527,13 +527,13 @@ class SessionCatalogSuite extends SparkFunSuite {
     sessionCatalog.createTable(newTable("tbl", "mydb"), ignoreIfExists = false)
     sessionCatalog.createPartitions(
       TableIdentifier("tbl", Some("mydb")), Seq(part1, part2), ignoreIfExists = false)
-    assert(catalogPartitionsEqual(externalCatalog, "mydb", "tbl", Seq(part1, part2)))
+    assert(catalogPartitionsEqual(externalCatalog.listPartitions("mydb", "tbl"), part1, part2))
     // Create partitions without explicitly specifying database
     sessionCatalog.setCurrentDatabase("mydb")
     sessionCatalog.createPartitions(
       TableIdentifier("tbl"), Seq(partWithMixedOrder), ignoreIfExists = false)
     assert(catalogPartitionsEqual(
-      externalCatalog, "mydb", "tbl", Seq(part1, part2, partWithMixedOrder)))
+      externalCatalog.listPartitions("mydb", "tbl"), part1, part2, partWithMixedOrder))
   }
 
   test("create partitions when database/table does not exist") {
@@ -586,13 +586,13 @@ class SessionCatalogSuite extends SparkFunSuite {
   test("drop partitions") {
     val externalCatalog = newBasicCatalog()
     val sessionCatalog = new SessionCatalog(externalCatalog)
-    assert(catalogPartitionsEqual(externalCatalog, "db2", "tbl2", Seq(part1, part2)))
+    assert(catalogPartitionsEqual(externalCatalog.listPartitions("db2", "tbl2"), part1, part2))
     sessionCatalog.dropPartitions(
       TableIdentifier("tbl2", Some("db2")),
       Seq(part1.spec),
       ignoreIfNotExists = false,
       purge = false)
-    assert(catalogPartitionsEqual(externalCatalog, "db2", "tbl2", Seq(part2)))
+    assert(catalogPartitionsEqual(externalCatalog.listPartitions("db2", "tbl2"), part2))
     // Drop partitions without explicitly specifying database
     sessionCatalog.setCurrentDatabase("db2")
     sessionCatalog.dropPartitions(
@@ -604,7 +604,7 @@ class SessionCatalogSuite extends SparkFunSuite {
     // Drop multiple partitions at once
     sessionCatalog.createPartitions(
       TableIdentifier("tbl2", Some("db2")), Seq(part1, part2), ignoreIfExists = false)
-    assert(catalogPartitionsEqual(externalCatalog, "db2", "tbl2", Seq(part1, part2)))
+    assert(catalogPartitionsEqual(externalCatalog.listPartitions("db2", "tbl2"), part1, part2))
     sessionCatalog.dropPartitions(
       TableIdentifier("tbl2", Some("db2")),
       Seq(part1.spec, part2.spec),
@@ -844,10 +844,11 @@ class SessionCatalogSuite extends SparkFunSuite {
 
   test("list partitions") {
     val catalog = new SessionCatalog(newBasicCatalog())
-    assert(catalog.listPartitions(TableIdentifier("tbl2", Some("db2"))).toSet == Set(part1, part2))
+    assert(catalogPartitionsEqual(
+      catalog.listPartitions(TableIdentifier("tbl2", Some("db2"))), part1, part2))
     // List partitions without explicitly specifying database
     catalog.setCurrentDatabase("db2")
-    assert(catalog.listPartitions(TableIdentifier("tbl2")).toSet == Set(part1, part2))
+    assert(catalogPartitionsEqual(catalog.listPartitions(TableIdentifier("tbl2")), part1, part2))
   }
 
   test("list partitions when database/table does not exist") {
@@ -860,6 +861,15 @@ class SessionCatalogSuite extends SparkFunSuite {
     }
   }
 
+  private def catalogPartitionsEqual(
+      actualParts: Seq[CatalogTablePartition],
+      expectedParts: CatalogTablePartition*): Boolean = {
+    // ExternalCatalog may set a default location for partitions, here we ignore the partition
+    // location when comparing them.
+    actualParts.map(p => p.copy(storage = p.storage.copy(locationUri = None))).toSet ==
+      expectedParts.map(p => p.copy(storage = p.storage.copy(locationUri = None))).toSet
+  }
+
   // --------------------------------------------------------------------------
   // Functions
   // --------------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 8500ab460a1b6..84a63fdb9f36f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -29,7 +29,7 @@ import org.apache.hadoop.mapred.{FileInputFormat, JobConf}
 import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.Resolver
-import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogTable, CatalogTablePartition, CatalogTableType, SessionCatalog}
+import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.execution.datasources.{CaseInsensitiveMap, PartitioningUtils}
@@ -500,7 +500,7 @@ case class AlterTableRecoverPartitionsCommand(
         s"location provided: $tableIdentWithDB")
     }
 
-    val root = new Path(table.storage.locationUri.get)
+    val root = new Path(table.location)
     logInfo(s"Recover all the partitions in $root")
     val fs = root.getFileSystem(spark.sparkContext.hadoopConfiguration)
 
@@ -558,9 +558,9 @@ case class AlterTableRecoverPartitionsCommand(
       val name = st.getPath.getName
       if (st.isDirectory && name.contains("=")) {
         val ps = name.split("=", 2)
-        val columnName = PartitioningUtils.unescapePathName(ps(0))
+        val columnName = ExternalCatalogUtils.unescapePathName(ps(0))
         // TODO: Validate the value
-        val value = PartitioningUtils.unescapePathName(ps(1))
+        val value = ExternalCatalogUtils.unescapePathName(ps(1))
         if (resolver(columnName, partitionNames.head)) {
           scanPartitions(spark, fs, filter, st.getPath, spec ++ Map(partitionNames.head -> value),
             partitionNames.drop(1), threshold, resolver)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index e49a1f5acd0c9..119e732d0202c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -710,7 +710,8 @@ case class ShowPartitionsCommand(
 
   private def getPartName(spec: TablePartitionSpec, partColNames: Seq[String]): String = {
     partColNames.map { name =>
-      PartitioningUtils.escapePathName(name) + "=" + PartitioningUtils.escapePathName(spec(name))
+      ExternalCatalogUtils.escapePathName(name) + "=" +
+        ExternalCatalogUtils.escapePathName(spec(name))
     }.mkString(File.separator)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/CatalogFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/CatalogFileIndex.scala
index 443a2ec033a98..4ad91dcceb432 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/CatalogFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/CatalogFileIndex.scala
@@ -67,7 +67,7 @@ class CatalogFileIndex(
       val selectedPartitions = sparkSession.sessionState.catalog.listPartitionsByFilter(
         table.identifier, filters)
       val partitions = selectedPartitions.map { p =>
-        val path = new Path(p.storage.locationUri.get)
+        val path = new Path(p.location)
         val fs = path.getFileSystem(hadoopConf)
         PartitionPath(
           p.toRow(partitionSchema), path.makeQualified(fs.getUri, fs.getWorkingDirectory))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index 2d43a6ad098ed..739aeac877b99 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -190,7 +190,7 @@ case class DataSourceAnalysis(conf: CatalystConf) extends Rule[LogicalPlan] {
       val effectiveOutputPath = if (overwritingSinglePartition) {
         val partition = t.sparkSession.sessionState.catalog.getPartition(
           l.catalogTable.get.identifier, overwrite.specificPartition.get)
-        new Path(partition.storage.locationUri.get)
+        new Path(partition.location)
       } else {
         outputPath
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
index e404dcd5452b9..0f8ed9e23fe3b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
@@ -32,7 +32,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.internal.io.FileCommitProtocol
 import org.apache.spark.internal.io.FileCommitProtocol.TaskCommitMessage
 import org.apache.spark.sql.{Dataset, SparkSession}
-import org.apache.spark.sql.catalyst.catalog.BucketSpec
+import org.apache.spark.sql.catalyst.catalog.{BucketSpec, ExternalCatalogUtils}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
@@ -281,11 +281,11 @@ object FileFormatWriter extends Logging {
     private def partitionStringExpression: Seq[Expression] = {
       description.partitionColumns.zipWithIndex.flatMap { case (c, i) =>
         val escaped = ScalaUDF(
-          PartitioningUtils.escapePathName _,
+          ExternalCatalogUtils.escapePathName _,
           StringType,
           Seq(Cast(c, StringType)),
           Seq(StringType))
-        val str = If(IsNull(c), Literal(PartitioningUtils.DEFAULT_PARTITION_NAME), escaped)
+        val str = If(IsNull(c), Literal(ExternalCatalogUtils.DEFAULT_PARTITION_NAME), escaped)
         val partitionName = Literal(c.name + "=") :: str :: Nil
         if (i == 0) partitionName else Literal(Path.SEPARATOR) :: partitionName
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
index a8a722dd3c620..3740caa22c37e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
@@ -128,7 +128,6 @@ abstract class PartitioningAwareFileIndex(
       case Some(userProvidedSchema) if userProvidedSchema.nonEmpty =>
         val spec = PartitioningUtils.parsePartitions(
           leafDirs,
-          PartitioningUtils.DEFAULT_PARTITION_NAME,
           typeInference = false,
           basePaths = basePaths)
 
@@ -148,7 +147,6 @@ abstract class PartitioningAwareFileIndex(
       case _ =>
         PartitioningUtils.parsePartitions(
           leafDirs,
-          PartitioningUtils.DEFAULT_PARTITION_NAME,
           typeInference = sparkSession.sessionState.conf.partitionColumnTypeInferenceEnabled,
           basePaths = basePaths)
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index b51b41869bf06..a28b04ca3fb5a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -25,7 +25,6 @@ import scala.collection.mutable.ArrayBuffer
 import scala.util.Try
 
 import org.apache.hadoop.fs.Path
-import org.apache.hadoop.util.Shell
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
@@ -56,15 +55,15 @@ object PartitionSpec {
 }
 
 object PartitioningUtils {
-  // This duplicates default value of Hive `ConfVars.DEFAULTPARTITIONNAME`, since sql/core doesn't
-  // depend on Hive.
-  val DEFAULT_PARTITION_NAME = "__HIVE_DEFAULT_PARTITION__"
 
   private[datasources] case class PartitionValues(columnNames: Seq[String], literals: Seq[Literal])
   {
     require(columnNames.size == literals.size)
   }
 
+  import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.DEFAULT_PARTITION_NAME
+  import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.unescapePathName
+
   /**
    * Given a group of qualified paths, tries to parse them and returns a partition specification.
    * For example, given:
@@ -90,12 +89,11 @@ object PartitioningUtils {
    */
   private[datasources] def parsePartitions(
       paths: Seq[Path],
-      defaultPartitionName: String,
       typeInference: Boolean,
       basePaths: Set[Path]): PartitionSpec = {
     // First, we need to parse every partition's path and see if we can find partition values.
     val (partitionValues, optDiscoveredBasePaths) = paths.map { path =>
-      parsePartition(path, defaultPartitionName, typeInference, basePaths)
+      parsePartition(path, typeInference, basePaths)
     }.unzip
 
     // We create pairs of (path -> path's partition value) here
@@ -173,7 +171,6 @@ object PartitioningUtils {
    */
   private[datasources] def parsePartition(
       path: Path,
-      defaultPartitionName: String,
       typeInference: Boolean,
       basePaths: Set[Path]): (Option[PartitionValues], Option[Path]) = {
     val columns = ArrayBuffer.empty[(String, Literal)]
@@ -196,7 +193,7 @@ object PartitioningUtils {
         // Let's say currentPath is a path of "/table/a=1/", currentPath.getName will give us a=1.
         // Once we get the string, we try to parse it and find the partition column and value.
         val maybeColumn =
-          parsePartitionColumn(currentPath.getName, defaultPartitionName, typeInference)
+          parsePartitionColumn(currentPath.getName, typeInference)
         maybeColumn.foreach(columns += _)
 
         // Now, we determine if we should stop.
@@ -228,7 +225,6 @@ object PartitioningUtils {
 
   private def parsePartitionColumn(
       columnSpec: String,
-      defaultPartitionName: String,
       typeInference: Boolean): Option[(String, Literal)] = {
     val equalSignIndex = columnSpec.indexOf('=')
     if (equalSignIndex == -1) {
@@ -240,7 +236,7 @@ object PartitioningUtils {
       val rawColumnValue = columnSpec.drop(equalSignIndex + 1)
       assert(rawColumnValue.nonEmpty, s"Empty partition column value in '$columnSpec'")
 
-      val literal = inferPartitionColumnValue(rawColumnValue, defaultPartitionName, typeInference)
+      val literal = inferPartitionColumnValue(rawColumnValue, typeInference)
       Some(columnName -> literal)
     }
   }
@@ -355,7 +351,6 @@ object PartitioningUtils {
    */
   private[datasources] def inferPartitionColumnValue(
       raw: String,
-      defaultPartitionName: String,
       typeInference: Boolean): Literal = {
     val decimalTry = Try {
       // `BigDecimal` conversion can fail when the `field` is not a form of number.
@@ -380,14 +375,14 @@ object PartitioningUtils {
         .orElse(Try(Literal(JTimestamp.valueOf(unescapePathName(raw)))))
         // Then falls back to string
         .getOrElse {
-          if (raw == defaultPartitionName) {
+          if (raw == DEFAULT_PARTITION_NAME) {
             Literal.create(null, NullType)
           } else {
             Literal.create(unescapePathName(raw), StringType)
           }
         }
     } else {
-      if (raw == defaultPartitionName) {
+      if (raw == DEFAULT_PARTITION_NAME) {
         Literal.create(null, NullType)
       } else {
         Literal.create(unescapePathName(raw), StringType)
@@ -450,77 +445,4 @@ object PartitioningUtils {
       Literal.create(Cast(l, desiredType).eval(), desiredType)
     }
   }
-
-  //////////////////////////////////////////////////////////////////////////////////////////////////
-  // The following string escaping code is mainly copied from Hive (o.a.h.h.common.FileUtils).
-  //////////////////////////////////////////////////////////////////////////////////////////////////
-
-  val charToEscape = {
-    val bitSet = new java.util.BitSet(128)
-
-    /**
-     * ASCII 01-1F are HTTP control characters that need to be escaped.
-     * \u000A and \u000D are \n and \r, respectively.
-     */
-    val clist = Array(
-      '\u0001', '\u0002', '\u0003', '\u0004', '\u0005', '\u0006', '\u0007', '\u0008', '\u0009',
-      '\n', '\u000B', '\u000C', '\r', '\u000E', '\u000F', '\u0010', '\u0011', '\u0012', '\u0013',
-      '\u0014', '\u0015', '\u0016', '\u0017', '\u0018', '\u0019', '\u001A', '\u001B', '\u001C',
-      '\u001D', '\u001E', '\u001F', '"', '#', '%', '\'', '*', '/', ':', '=', '?', '\\', '\u007F',
-      '{', '[', ']', '^')
-
-    clist.foreach(bitSet.set(_))
-
-    if (Shell.WINDOWS) {
-      Array(' ', '<', '>', '|').foreach(bitSet.set(_))
-    }
-
-    bitSet
-  }
-
-  def needsEscaping(c: Char): Boolean = {
-    c >= 0 && c < charToEscape.size() && charToEscape.get(c)
-  }
-
-  def escapePathName(path: String): String = {
-    val builder = new StringBuilder()
-    path.foreach { c =>
-      if (needsEscaping(c)) {
-        builder.append('%')
-        builder.append(f"${c.asInstanceOf[Int]}%02X")
-      } else {
-        builder.append(c)
-      }
-    }
-
-    builder.toString()
-  }
-
-  def unescapePathName(path: String): String = {
-    val sb = new StringBuilder
-    var i = 0
-
-    while (i < path.length) {
-      val c = path.charAt(i)
-      if (c == '%' && i + 2 < path.length) {
-        val code: Int = try {
-          Integer.parseInt(path.substring(i + 1, i + 3), 16)
-        } catch {
-          case _: Exception => -1
-        }
-        if (code >= 0) {
-          sb.append(code.asInstanceOf[Char])
-          i += 3
-        } else {
-          sb.append(c)
-          i += 1
-        }
-      } else {
-        sb.append(c)
-        i += 1
-      }
-    }
-
-    sb.toString()
-  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index df3a3c34c39a0..363715c6d2249 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -875,7 +875,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
     assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == Set(part1))
 
     val part2 = Map("a" -> "2", "b" -> "6")
-    val root = new Path(catalog.getTableMetadata(tableIdent).storage.locationUri.get)
+    val root = new Path(catalog.getTableMetadata(tableIdent).location)
     val fs = root.getFileSystem(spark.sparkContext.hadoopConfiguration)
     // valid
     fs.mkdirs(new Path(new Path(root, "a=1"), "b=5"))
@@ -1133,7 +1133,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
     }
     assert(catalog.getTableMetadata(tableIdent).storage.locationUri.isDefined)
     assert(catalog.getTableMetadata(tableIdent).storage.properties.isEmpty)
-    assert(catalog.getPartition(tableIdent, partSpec).storage.locationUri.isEmpty)
+    assert(catalog.getPartition(tableIdent, partSpec).storage.locationUri.isDefined)
     assert(catalog.getPartition(tableIdent, partSpec).storage.properties.isEmpty)
     // Verify that the location is set to the expected string
     def verifyLocation(expected: String, spec: Option[TablePartitionSpec] = None): Unit = {
@@ -1296,9 +1296,9 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
     sql("ALTER TABLE dbx.tab1 ADD IF NOT EXISTS " +
       "PARTITION (a='2', b='6') LOCATION 'paris' PARTITION (a='3', b='7')")
     assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == Set(part1, part2, part3))
-    assert(catalog.getPartition(tableIdent, part1).storage.locationUri.isEmpty)
+    assert(catalog.getPartition(tableIdent, part1).storage.locationUri.isDefined)
     assert(catalog.getPartition(tableIdent, part2).storage.locationUri == Option("paris"))
-    assert(catalog.getPartition(tableIdent, part3).storage.locationUri.isEmpty)
+    assert(catalog.getPartition(tableIdent, part3).storage.locationUri.isDefined)
 
     // add partitions without explicitly specifying database
     catalog.setCurrentDatabase("dbx")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
index 120a3a2ef33aa..22e35a1bc0b1d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
@@ -29,6 +29,7 @@ import org.apache.parquet.hadoop.ParquetOutputFormat
 
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils
 import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.{PartitionPath => Partition}
@@ -48,11 +49,11 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
   import PartitioningUtils._
   import testImplicits._
 
-  val defaultPartitionName = "__HIVE_DEFAULT_PARTITION__"
+  val defaultPartitionName = ExternalCatalogUtils.DEFAULT_PARTITION_NAME
 
   test("column type inference") {
     def check(raw: String, literal: Literal): Unit = {
-      assert(inferPartitionColumnValue(raw, defaultPartitionName, true) === literal)
+      assert(inferPartitionColumnValue(raw, true) === literal)
     }
 
     check("10", Literal.create(10, IntegerType))
@@ -76,7 +77,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
       "hdfs://host:9000/path/a=10.5/b=hello")
 
     var exception = intercept[AssertionError] {
-      parsePartitions(paths.map(new Path(_)), defaultPartitionName, true, Set.empty[Path])
+      parsePartitions(paths.map(new Path(_)), true, Set.empty[Path])
     }
     assert(exception.getMessage().contains("Conflicting directory structures detected"))
 
@@ -88,7 +89,6 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
 
     parsePartitions(
       paths.map(new Path(_)),
-      defaultPartitionName,
       true,
       Set(new Path("hdfs://host:9000/path/")))
 
@@ -101,7 +101,6 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
 
     parsePartitions(
       paths.map(new Path(_)),
-      defaultPartitionName,
       true,
       Set(new Path("hdfs://host:9000/path/something=true/table")))
 
@@ -114,7 +113,6 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
 
     parsePartitions(
       paths.map(new Path(_)),
-      defaultPartitionName,
       true,
       Set(new Path("hdfs://host:9000/path/table=true")))
 
@@ -127,7 +125,6 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
     exception = intercept[AssertionError] {
       parsePartitions(
         paths.map(new Path(_)),
-        defaultPartitionName,
         true,
         Set(new Path("hdfs://host:9000/path/")))
     }
@@ -147,7 +144,6 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
     exception = intercept[AssertionError] {
       parsePartitions(
         paths.map(new Path(_)),
-        defaultPartitionName,
         true,
         Set(new Path("hdfs://host:9000/tmp/tables/")))
     }
@@ -156,13 +152,13 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
 
   test("parse partition") {
     def check(path: String, expected: Option[PartitionValues]): Unit = {
-      val actual = parsePartition(new Path(path), defaultPartitionName, true, Set.empty[Path])._1
+      val actual = parsePartition(new Path(path), true, Set.empty[Path])._1
       assert(expected === actual)
     }
 
     def checkThrows[T <: Throwable: Manifest](path: String, expected: String): Unit = {
       val message = intercept[T] {
-        parsePartition(new Path(path), defaultPartitionName, true, Set.empty[Path])
+        parsePartition(new Path(path), true, Set.empty[Path])
       }.getMessage
 
       assert(message.contains(expected))
@@ -204,7 +200,6 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
     // when the basePaths is the same as the path to a leaf directory
     val partitionSpec1: Option[PartitionValues] = parsePartition(
       path = new Path("file://path/a=10"),
-      defaultPartitionName = defaultPartitionName,
       typeInference = true,
       basePaths = Set(new Path("file://path/a=10")))._1
 
@@ -213,7 +208,6 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
     // when the basePaths is the path to a base directory of leaf directories
     val partitionSpec2: Option[PartitionValues] = parsePartition(
       path = new Path("file://path/a=10"),
-      defaultPartitionName = defaultPartitionName,
       typeInference = true,
       basePaths = Set(new Path("file://path")))._1
 
@@ -231,7 +225,6 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
       val actualSpec =
         parsePartitions(
           paths.map(new Path(_)),
-          defaultPartitionName,
           true,
           rootPaths)
       assert(actualSpec === spec)
@@ -314,7 +307,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
   test("parse partitions with type inference disabled") {
     def check(paths: Seq[String], spec: PartitionSpec): Unit = {
       val actualSpec =
-        parsePartitions(paths.map(new Path(_)), defaultPartitionName, false, Set.empty[Path])
+        parsePartitions(paths.map(new Path(_)), false, Set.empty[Path])
       assert(actualSpec === spec)
     }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index b537061d0d221..42ce1a88a2b67 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.hive
 
+import java.io.IOException
 import java.util
 
 import scala.util.control.NonFatal
@@ -26,7 +27,7 @@ import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hadoop.hive.ql.metadata.HiveException
 import org.apache.thrift.TException
 
-import org.apache.spark.SparkConf
+import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.TableIdentifier
@@ -255,7 +256,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
           // compatible format, which means the data source is file-based and must have a `path`.
           require(tableDefinition.storage.locationUri.isDefined,
             "External file-based data source table must have a `path` entry in storage properties.")
-          Some(new Path(tableDefinition.storage.locationUri.get).toUri.toString)
+          Some(new Path(tableDefinition.location).toUri.toString)
         } else {
           None
         }
@@ -789,7 +790,21 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       parts: Seq[CatalogTablePartition],
       ignoreIfExists: Boolean): Unit = withClient {
     requireTableExists(db, table)
-    val lowerCasedParts = parts.map(p => p.copy(spec = lowerCasePartitionSpec(p.spec)))
+
+    val tableMeta = getTable(db, table)
+    val partitionColumnNames = tableMeta.partitionColumnNames
+    val tablePath = new Path(tableMeta.location)
+    val partsWithLocation = parts.map { p =>
+      // Ideally we can leave the partition location empty and let Hive metastore to set it.
+      // However, Hive metastore is not case preserving and will generate wrong partition location
+      // with lower cased partition column names. Here we set the default partition location
+      // manually to avoid this problem.
+      val partitionPath = p.storage.locationUri.map(new Path(_)).getOrElse {
+        ExternalCatalogUtils.generatePartitionPath(p.spec, partitionColumnNames, tablePath)
+      }
+      p.copy(storage = p.storage.copy(locationUri = Some(partitionPath.toString)))
+    }
+    val lowerCasedParts = partsWithLocation.map(p => p.copy(spec = lowerCasePartitionSpec(p.spec)))
     client.createPartitions(db, table, lowerCasedParts, ignoreIfExists)
   }
 
@@ -810,6 +825,31 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       newSpecs: Seq[TablePartitionSpec]): Unit = withClient {
     client.renamePartitions(
       db, table, specs.map(lowerCasePartitionSpec), newSpecs.map(lowerCasePartitionSpec))
+
+    val tableMeta = getTable(db, table)
+    val partitionColumnNames = tableMeta.partitionColumnNames
+    // Hive metastore is not case preserving and keeps partition columns with lower cased names.
+    // When Hive rename partition for managed tables, it will create the partition location with
+    // a default path generate by the new spec with lower cased partition column names. This is
+    // unexpected and we need to rename them manually and alter the partition location.
+    val hasUpperCasePartitionColumn = partitionColumnNames.exists(col => col.toLowerCase != col)
+    if (tableMeta.tableType == MANAGED && hasUpperCasePartitionColumn) {
+      val tablePath = new Path(tableMeta.location)
+      val newParts = newSpecs.map { spec =>
+        val partition = client.getPartition(db, table, lowerCasePartitionSpec(spec))
+        val wrongPath = new Path(partition.location)
+        val rightPath = ExternalCatalogUtils.generatePartitionPath(
+          spec, partitionColumnNames, tablePath)
+        try {
+          tablePath.getFileSystem(hadoopConf).rename(wrongPath, rightPath)
+        } catch {
+          case e: IOException => throw new SparkException(
+            s"Unable to rename partition path from $wrongPath to $rightPath", e)
+        }
+        partition.copy(storage = partition.storage.copy(locationUri = Some(rightPath.toString)))
+      }
+      alterPartitions(db, table, newParts)
+    }
   }
 
   override def alterPartitions(
@@ -817,6 +857,11 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       table: String,
       newParts: Seq[CatalogTablePartition]): Unit = withClient {
     val lowerCasedParts = newParts.map(p => p.copy(spec = lowerCasePartitionSpec(p.spec)))
+    // Note: Before altering table partitions in Hive, you *must* set the current database
+    // to the one that contains the table of interest. Otherwise you will end up with the
+    // most helpful error message ever: "Unable to alter partition. alter is not possible."
+    // See HIVE-2742 for more detail.
+    client.setCurrentDatabase(db)
     client.alterPartitions(db, table, lowerCasedParts)
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index d3873cf6c8231..fbd705172cae6 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -445,7 +445,7 @@ object SetWarehouseLocationTest extends Logging {
         catalog.getTableMetadata(TableIdentifier("testLocation", Some("default")))
       val expectedLocation =
         "file:" + expectedWarehouseLocation.toString + "/testlocation"
-      val actualLocation = tableMetadata.storage.locationUri.get
+      val actualLocation = tableMetadata.location
       if (actualLocation != expectedLocation) {
         throw new Exception(
           s"Expected table location is $expectedLocation. But, it is actually $actualLocation")
@@ -461,7 +461,7 @@ object SetWarehouseLocationTest extends Logging {
         catalog.getTableMetadata(TableIdentifier("testLocation", Some("testLocationDB")))
       val expectedLocation =
         "file:" + expectedWarehouseLocation.toString + "/testlocationdb.db/testlocation"
-      val actualLocation = tableMetadata.storage.locationUri.get
+      val actualLocation = tableMetadata.location
       if (actualLocation != expectedLocation) {
         throw new Exception(
           s"Expected table location is $expectedLocation. But, it is actually $actualLocation")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala
index cfc1d81d544eb..9f4401ae22560 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala
@@ -29,7 +29,7 @@ class MultiDatabaseSuite extends QueryTest with SQLTestUtils with TestHiveSingle
     val expectedPath =
       spark.sharedState.externalCatalog.getDatabase(dbName).locationUri + "/" + tableName
 
-    assert(metastoreTable.storage.locationUri.get === expectedPath)
+    assert(metastoreTable.location === expectedPath)
   }
 
   private def getTableNames(dbName: Option[String] = None): Array[String] = {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 0076a778683ca..6efae13ddf69d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -425,7 +425,7 @@ class HiveDDLSuite
     sql("CREATE TABLE tab1 (height INT, length INT) PARTITIONED BY (a INT, b INT)")
     val part1 = Map("a" -> "1", "b" -> "5")
     val part2 = Map("a" -> "2", "b" -> "6")
-    val root = new Path(catalog.getTableMetadata(tableIdent).storage.locationUri.get)
+    val root = new Path(catalog.getTableMetadata(tableIdent).location)
     val fs = root.getFileSystem(spark.sparkContext.hadoopConfiguration)
     // valid
     fs.mkdirs(new Path(new Path(root, "a=1"), "b=5"))
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index c21db3595fa19..e607af67f93e5 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -542,7 +542,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
         }
         userSpecifiedLocation match {
           case Some(location) =>
-            assert(r.catalogTable.storage.locationUri.get === location)
+            assert(r.catalogTable.location === location)
           case None => // OK.
         }
         // Also make sure that the format and serde are as desired.

From 064d4315f246450043a52882fcf59e95d79701e8 Mon Sep 17 00:00:00 2001
From: Eric Liang <ekl@databricks.com>
Date: Thu, 10 Nov 2016 17:00:43 -0800
Subject: [PATCH 0086/1204] [SPARK-18185] Fix all forms of INSERT / OVERWRITE
 TABLE for Datasource tables

## What changes were proposed in this pull request?

As of current 2.1, INSERT OVERWRITE with dynamic partitions against a Datasource table will overwrite the entire table instead of only the partitions matching the static keys, as in Hive. It also doesn't respect custom partition locations.

This PR adds support for all these operations to Datasource tables managed by the Hive metastore. It is implemented as follows
- During planning time, the full set of partitions affected by an INSERT or OVERWRITE command is read from the Hive metastore.
- The planner identifies any partitions with custom locations and includes this in the write task metadata.
- FileFormatWriter tasks refer to this custom locations map when determining where to write for dynamic partition output.
- When the write job finishes, the set of written partitions is compared against the initial set of matched partitions, and the Hive metastore is updated to reflect the newly added / removed partitions.

It was necessary to introduce a method for staging files with absolute output paths to `FileCommitProtocol`. These files are not handled by the Hadoop output committer but are moved to their final locations when the job commits.

The overwrite behavior of legacy Datasource tables is also changed: no longer will the entire table be overwritten if a partial partition spec is present.

cc cloud-fan yhuai

## How was this patch tested?

Unit tests, existing tests.

Author: Eric Liang <ekl@databricks.com>
Author: Wenchen Fan <wenchen@databricks.com>

Closes #15814 from ericl/sc-5027.

(cherry picked from commit a3356343cbf58b930326f45721fb4ecade6f8029)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../internal/io/FileCommitProtocol.scala      |  15 ++
 .../io/HadoopMapReduceCommitProtocol.scala    |  63 ++++++-
 .../sql/catalyst/parser/AstBuilder.scala      |  12 +-
 .../plans/logical/basicLogicalOperators.scala |  10 +-
 .../sql/catalyst/parser/PlanParserSuite.scala |   4 +-
 .../execution/datasources/DataSource.scala    |  20 ++-
 .../datasources/DataSourceStrategy.scala      |  94 +++++++---
 .../datasources/FileFormatWriter.scala        |  26 ++-
 .../InsertIntoHadoopFsRelationCommand.scala   |  61 ++++++-
 .../datasources/PartitioningUtils.scala       |  10 ++
 .../execution/streaming/FileStreamSink.scala  |   2 +-
 .../ManifestFileCommitProtocol.scala          |   6 +
 .../PartitionProviderCompatibilitySuite.scala | 161 +++++++++++++++++-
 13 files changed, 411 insertions(+), 73 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala
index fb8020585cf89..afd2250c93a8a 100644
--- a/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala
@@ -82,9 +82,24 @@ abstract class FileCommitProtocol {
    *
    * The "dir" parameter specifies 2, and "ext" parameter specifies both 4 and 5, and the rest
    * are left to the commit protocol implementation to decide.
+   *
+   * Important: it is the caller's responsibility to add uniquely identifying content to "ext"
+   * if a task is going to write out multiple files to the same dir. The file commit protocol only
+   * guarantees that files written by different tasks will not conflict.
    */
   def newTaskTempFile(taskContext: TaskAttemptContext, dir: Option[String], ext: String): String
 
+  /**
+   * Similar to newTaskTempFile(), but allows files to committed to an absolute output location.
+   * Depending on the implementation, there may be weaker guarantees around adding files this way.
+   *
+   * Important: it is the caller's responsibility to add uniquely identifying content to "ext"
+   * if a task is going to write out multiple files to the same dir. The file commit protocol only
+   * guarantees that files written by different tasks will not conflict.
+   */
+  def newTaskTempFileAbsPath(
+      taskContext: TaskAttemptContext, absoluteDir: String, ext: String): String
+
   /**
    * Commits a task after the writes succeed. Must be called on the executors when running tasks.
    */
diff --git a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
index 66ccb6d437708..c99b75e52325e 100644
--- a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
@@ -17,7 +17,9 @@
 
 package org.apache.spark.internal.io
 
-import java.util.Date
+import java.util.{Date, UUID}
+
+import scala.collection.mutable
 
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapreduce._
@@ -42,17 +44,26 @@ class HadoopMapReduceCommitProtocol(jobId: String, path: String)
   /** OutputCommitter from Hadoop is not serializable so marking it transient. */
   @transient private var committer: OutputCommitter = _
 
+  /**
+   * Tracks files staged by this task for absolute output paths. These outputs are not managed by
+   * the Hadoop OutputCommitter, so we must move these to their final locations on job commit.
+   *
+   * The mapping is from the temp output path to the final desired output path of the file.
+   */
+  @transient private var addedAbsPathFiles: mutable.Map[String, String] = null
+
+  /**
+   * The staging directory for all files committed with absolute output paths.
+   */
+  private def absPathStagingDir: Path = new Path(path, "_temporary-" + jobId)
+
   protected def setupCommitter(context: TaskAttemptContext): OutputCommitter = {
     context.getOutputFormatClass.newInstance().getOutputCommitter(context)
   }
 
   override def newTaskTempFile(
       taskContext: TaskAttemptContext, dir: Option[String], ext: String): String = {
-    // The file name looks like part-r-00000-2dd664f9-d2c4-4ffe-878f-c6c70c1fb0cb_00003.gz.parquet
-    // Note that %05d does not truncate the split number, so if we have more than 100000 tasks,
-    // the file name is fine and won't overflow.
-    val split = taskContext.getTaskAttemptID.getTaskID.getId
-    val filename = f"part-$split%05d-$jobId$ext"
+    val filename = getFilename(taskContext, ext)
 
     val stagingDir: String = committer match {
       // For FileOutputCommitter it has its own staging path called "work path".
@@ -67,6 +78,28 @@ class HadoopMapReduceCommitProtocol(jobId: String, path: String)
     }
   }
 
+  override def newTaskTempFileAbsPath(
+      taskContext: TaskAttemptContext, absoluteDir: String, ext: String): String = {
+    val filename = getFilename(taskContext, ext)
+    val absOutputPath = new Path(absoluteDir, filename).toString
+
+    // Include a UUID here to prevent file collisions for one task writing to different dirs.
+    // In principle we could include hash(absoluteDir) instead but this is simpler.
+    val tmpOutputPath = new Path(
+      absPathStagingDir, UUID.randomUUID().toString() + "-" + filename).toString
+
+    addedAbsPathFiles(tmpOutputPath) = absOutputPath
+    tmpOutputPath
+  }
+
+  private def getFilename(taskContext: TaskAttemptContext, ext: String): String = {
+    // The file name looks like part-r-00000-2dd664f9-d2c4-4ffe-878f-c6c70c1fb0cb_00003.gz.parquet
+    // Note that %05d does not truncate the split number, so if we have more than 100000 tasks,
+    // the file name is fine and won't overflow.
+    val split = taskContext.getTaskAttemptID.getTaskID.getId
+    f"part-$split%05d-$jobId$ext"
+  }
+
   override def setupJob(jobContext: JobContext): Unit = {
     // Setup IDs
     val jobId = SparkHadoopWriter.createJobID(new Date, 0)
@@ -87,25 +120,41 @@ class HadoopMapReduceCommitProtocol(jobId: String, path: String)
 
   override def commitJob(jobContext: JobContext, taskCommits: Seq[TaskCommitMessage]): Unit = {
     committer.commitJob(jobContext)
+    val filesToMove = taskCommits.map(_.obj.asInstanceOf[Map[String, String]])
+      .foldLeft(Map[String, String]())(_ ++ _)
+    logDebug(s"Committing files staged for absolute locations $filesToMove")
+    val fs = absPathStagingDir.getFileSystem(jobContext.getConfiguration)
+    for ((src, dst) <- filesToMove) {
+      fs.rename(new Path(src), new Path(dst))
+    }
+    fs.delete(absPathStagingDir, true)
   }
 
   override def abortJob(jobContext: JobContext): Unit = {
     committer.abortJob(jobContext, JobStatus.State.FAILED)
+    val fs = absPathStagingDir.getFileSystem(jobContext.getConfiguration)
+    fs.delete(absPathStagingDir, true)
   }
 
   override def setupTask(taskContext: TaskAttemptContext): Unit = {
     committer = setupCommitter(taskContext)
     committer.setupTask(taskContext)
+    addedAbsPathFiles = mutable.Map[String, String]()
   }
 
   override def commitTask(taskContext: TaskAttemptContext): TaskCommitMessage = {
     val attemptId = taskContext.getTaskAttemptID
     SparkHadoopMapRedUtil.commitTask(
       committer, taskContext, attemptId.getJobID.getId, attemptId.getTaskID.getId)
-    EmptyTaskCommitMessage
+    new TaskCommitMessage(addedAbsPathFiles.toMap)
   }
 
   override def abortTask(taskContext: TaskAttemptContext): Unit = {
     committer.abortTask(taskContext)
+    // best effort cleanup of other staged files
+    for ((src, _) <- addedAbsPathFiles) {
+      val tmp = new Path(src)
+      tmp.getFileSystem(taskContext.getConfiguration).delete(tmp, false)
+    }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 4b151c81d8f8b..2006844923cf7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -172,24 +172,20 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
     val tableIdent = visitTableIdentifier(ctx.tableIdentifier)
     val partitionKeys = Option(ctx.partitionSpec).map(visitPartitionSpec).getOrElse(Map.empty)
 
-    val dynamicPartitionKeys = partitionKeys.filter(_._2.isEmpty)
+    val dynamicPartitionKeys: Map[String, Option[String]] = partitionKeys.filter(_._2.isEmpty)
     if (ctx.EXISTS != null && dynamicPartitionKeys.nonEmpty) {
       throw new ParseException(s"Dynamic partitions do not support IF NOT EXISTS. Specified " +
         "partitions with value: " + dynamicPartitionKeys.keys.mkString("[", ",", "]"), ctx)
     }
     val overwrite = ctx.OVERWRITE != null
-    val overwritePartition =
-      if (overwrite && partitionKeys.nonEmpty && dynamicPartitionKeys.isEmpty) {
-        Some(partitionKeys.map(t => (t._1, t._2.get)))
-      } else {
-        None
-      }
+    val staticPartitionKeys: Map[String, String] =
+      partitionKeys.filter(_._2.nonEmpty).map(t => (t._1, t._2.get))
 
     InsertIntoTable(
       UnresolvedRelation(tableIdent, None),
       partitionKeys,
       query,
-      OverwriteOptions(overwrite, overwritePartition),
+      OverwriteOptions(overwrite, if (overwrite) staticPartitionKeys else Map.empty),
       ctx.EXISTS != null)
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 65ceab2ce27b1..574caf039d3d2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -350,13 +350,15 @@ case class BroadcastHint(child: LogicalPlan) extends UnaryNode {
  * Options for writing new data into a table.
  *
  * @param enabled whether to overwrite existing data in the table.
- * @param specificPartition only data in the specified partition will be overwritten.
+ * @param staticPartitionKeys if non-empty, specifies that we only want to overwrite partitions
+ *                            that match this partial partition spec. If empty, all partitions
+ *                            will be overwritten.
  */
 case class OverwriteOptions(
     enabled: Boolean,
-    specificPartition: Option[CatalogTypes.TablePartitionSpec] = None) {
-  if (specificPartition.isDefined) {
-    assert(enabled, "Overwrite must be enabled when specifying a partition to overwrite.")
+    staticPartitionKeys: CatalogTypes.TablePartitionSpec = Map.empty) {
+  if (staticPartitionKeys.nonEmpty) {
+    assert(enabled, "Overwrite must be enabled when specifying specific partitions.")
   }
 }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index 7400f3430e99c..e5f1f7b3bd4cf 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -185,9 +185,9 @@ class PlanParserSuite extends PlanTest {
         OverwriteOptions(
           overwrite,
           if (overwrite && partition.nonEmpty) {
-            Some(partition.map(kv => (kv._1, kv._2.get)))
+            partition.map(kv => (kv._1, kv._2.get))
           } else {
-            None
+            Map.empty
           }),
         ifNotExists)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 5d663949df6b5..65422f1495f03 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -417,15 +417,17 @@ case class DataSource(
         // will be adjusted within InsertIntoHadoopFsRelation.
         val plan =
           InsertIntoHadoopFsRelationCommand(
-            outputPath,
-            columns,
-            bucketSpec,
-            format,
-            _ => Unit, // No existing table needs to be refreshed.
-            options,
-            data.logicalPlan,
-            mode,
-            catalogTable)
+            outputPath = outputPath,
+            staticPartitionKeys = Map.empty,
+            customPartitionLocations = Map.empty,
+            partitionColumns = columns,
+            bucketSpec = bucketSpec,
+            fileFormat = format,
+            refreshFunction = _ => Unit, // No existing table needs to be refreshed.
+            options = options,
+            query = data.logicalPlan,
+            mode = mode,
+            catalogTable = catalogTable)
         sparkSession.sessionState.executePlan(plan).toRdd
         // Replace the schema with that of the DataFrame we just wrote out to avoid re-inferring it.
         copy(userSpecifiedSchema = Some(data.schema.asNullable)).resolveRelation()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index 739aeac877b99..4f19a2d00b0e4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -24,10 +24,10 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.{CatalystConf, CatalystTypeConverters, InternalRow}
+import org.apache.spark.sql.catalyst.{CatalystConf, CatalystTypeConverters, InternalRow, TableIdentifier}
 import org.apache.spark.sql.catalyst.CatalystTypeConverters.convertToScala
 import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.catalog.{CatalogTable, SimpleCatalogRelation}
+import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTablePartition, SimpleCatalogRelation}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.expressions._
@@ -37,7 +37,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, Union}
 import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, UnknownPartitioning}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.{RowDataSourceScanExec, SparkPlan}
-import org.apache.spark.sql.execution.command.{AlterTableAddPartitionCommand, DDLUtils, ExecutedCommandExec}
+import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
@@ -182,41 +182,53 @@ case class DataSourceAnalysis(conf: CatalystConf) extends Rule[LogicalPlan] {
           "Cannot overwrite a path that is also being read from.")
       }
 
-      val overwritingSinglePartition =
-        overwrite.specificPartition.isDefined &&
+      val partitionSchema = query.resolve(
+        t.partitionSchema, t.sparkSession.sessionState.analyzer.resolver)
+      val partitionsTrackedByCatalog =
         t.sparkSession.sessionState.conf.manageFilesourcePartitions &&
+        l.catalogTable.isDefined && l.catalogTable.get.partitionColumnNames.nonEmpty &&
         l.catalogTable.get.tracksPartitionsInCatalog
 
-      val effectiveOutputPath = if (overwritingSinglePartition) {
-        val partition = t.sparkSession.sessionState.catalog.getPartition(
-          l.catalogTable.get.identifier, overwrite.specificPartition.get)
-        new Path(partition.location)
-      } else {
-        outputPath
-      }
-
-      val effectivePartitionSchema = if (overwritingSinglePartition) {
-        Nil
-      } else {
-        query.resolve(t.partitionSchema, t.sparkSession.sessionState.analyzer.resolver)
+      var initialMatchingPartitions: Seq[TablePartitionSpec] = Nil
+      var customPartitionLocations: Map[TablePartitionSpec, String] = Map.empty
+
+      // When partitions are tracked by the catalog, compute all custom partition locations that
+      // may be relevant to the insertion job.
+      if (partitionsTrackedByCatalog) {
+        val matchingPartitions = t.sparkSession.sessionState.catalog.listPartitions(
+          l.catalogTable.get.identifier, Some(overwrite.staticPartitionKeys))
+        initialMatchingPartitions = matchingPartitions.map(_.spec)
+        customPartitionLocations = getCustomPartitionLocations(
+          t.sparkSession, l.catalogTable.get, outputPath, matchingPartitions)
       }
 
+      // Callback for updating metastore partition metadata after the insertion job completes.
+      // TODO(ekl) consider moving this into InsertIntoHadoopFsRelationCommand
       def refreshPartitionsCallback(updatedPartitions: Seq[TablePartitionSpec]): Unit = {
-        if (l.catalogTable.isDefined && updatedPartitions.nonEmpty &&
-            l.catalogTable.get.partitionColumnNames.nonEmpty &&
-            l.catalogTable.get.tracksPartitionsInCatalog) {
-          val metastoreUpdater = AlterTableAddPartitionCommand(
-            l.catalogTable.get.identifier,
-            updatedPartitions.map(p => (p, None)),
-            ifNotExists = true)
-          metastoreUpdater.run(t.sparkSession)
+        if (partitionsTrackedByCatalog) {
+          val newPartitions = updatedPartitions.toSet -- initialMatchingPartitions
+          if (newPartitions.nonEmpty) {
+            AlterTableAddPartitionCommand(
+              l.catalogTable.get.identifier, newPartitions.toSeq.map(p => (p, None)),
+              ifNotExists = true).run(t.sparkSession)
+          }
+          if (overwrite.enabled) {
+            val deletedPartitions = initialMatchingPartitions.toSet -- updatedPartitions
+            if (deletedPartitions.nonEmpty) {
+              AlterTableDropPartitionCommand(
+                l.catalogTable.get.identifier, deletedPartitions.toSeq,
+                ifExists = true, purge = true).run(t.sparkSession)
+            }
+          }
         }
         t.location.refresh()
       }
 
       val insertCmd = InsertIntoHadoopFsRelationCommand(
-        effectiveOutputPath,
-        effectivePartitionSchema,
+        outputPath,
+        if (overwrite.enabled) overwrite.staticPartitionKeys else Map.empty,
+        customPartitionLocations,
+        partitionSchema,
         t.bucketSpec,
         t.fileFormat,
         refreshPartitionsCallback,
@@ -227,6 +239,34 @@ case class DataSourceAnalysis(conf: CatalystConf) extends Rule[LogicalPlan] {
 
       insertCmd
   }
+
+  /**
+   * Given a set of input partitions, returns those that have locations that differ from the
+   * Hive default (e.g. /k1=v1/k2=v2). These partitions were manually assigned locations by
+   * the user.
+   *
+   * @return a mapping from partition specs to their custom locations
+   */
+  private def getCustomPartitionLocations(
+      spark: SparkSession,
+      table: CatalogTable,
+      basePath: Path,
+      partitions: Seq[CatalogTablePartition]): Map[TablePartitionSpec, String] = {
+    val hadoopConf = spark.sessionState.newHadoopConf
+    val fs = basePath.getFileSystem(hadoopConf)
+    val qualifiedBasePath = basePath.makeQualified(fs.getUri, fs.getWorkingDirectory)
+    partitions.flatMap { p =>
+      val defaultLocation = qualifiedBasePath.suffix(
+        "/" + PartitioningUtils.getPathFragment(p.spec, table.partitionSchema)).toString
+      val catalogLocation = new Path(p.location).makeQualified(
+        fs.getUri, fs.getWorkingDirectory).toString
+      if (catalogLocation != defaultLocation) {
+        Some(p.spec -> catalogLocation)
+      } else {
+        None
+      }
+    }.toMap
+  }
 }
 
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
index 0f8ed9e23fe3b..edcce103d0963 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
@@ -47,6 +47,10 @@ import org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter
 /** A helper object for writing FileFormat data out to a location. */
 object FileFormatWriter extends Logging {
 
+  /** Describes how output files should be placed in the filesystem. */
+  case class OutputSpec(
+    outputPath: String, customPartitionLocations: Map[TablePartitionSpec, String])
+
   /** A shared job description for all the write tasks. */
   private class WriteJobDescription(
       val uuid: String,  // prevent collision between different (appending) write jobs
@@ -56,7 +60,8 @@ object FileFormatWriter extends Logging {
       val partitionColumns: Seq[Attribute],
       val nonPartitionColumns: Seq[Attribute],
       val bucketSpec: Option[BucketSpec],
-      val path: String)
+      val path: String,
+      val customPartitionLocations: Map[TablePartitionSpec, String])
     extends Serializable {
 
     assert(AttributeSet(allColumns) == AttributeSet(partitionColumns ++ nonPartitionColumns),
@@ -83,7 +88,7 @@ object FileFormatWriter extends Logging {
       plan: LogicalPlan,
       fileFormat: FileFormat,
       committer: FileCommitProtocol,
-      outputPath: String,
+      outputSpec: OutputSpec,
       hadoopConf: Configuration,
       partitionColumns: Seq[Attribute],
       bucketSpec: Option[BucketSpec],
@@ -93,7 +98,7 @@ object FileFormatWriter extends Logging {
     val job = Job.getInstance(hadoopConf)
     job.setOutputKeyClass(classOf[Void])
     job.setOutputValueClass(classOf[InternalRow])
-    FileOutputFormat.setOutputPath(job, new Path(outputPath))
+    FileOutputFormat.setOutputPath(job, new Path(outputSpec.outputPath))
 
     val partitionSet = AttributeSet(partitionColumns)
     val dataColumns = plan.output.filterNot(partitionSet.contains)
@@ -111,7 +116,8 @@ object FileFormatWriter extends Logging {
       partitionColumns = partitionColumns,
       nonPartitionColumns = dataColumns,
       bucketSpec = bucketSpec,
-      path = outputPath)
+      path = outputSpec.outputPath,
+      customPartitionLocations = outputSpec.customPartitionLocations)
 
     SQLExecution.withNewExecutionId(sparkSession, queryExecution) {
       // This call shouldn't be put into the `try` block below because it only initializes and
@@ -308,7 +314,17 @@ object FileFormatWriter extends Logging {
       }
       val ext = bucketId + description.outputWriterFactory.getFileExtension(taskAttemptContext)
 
-      val path = committer.newTaskTempFile(taskAttemptContext, partDir, ext)
+      val customPath = partDir match {
+        case Some(dir) =>
+          description.customPartitionLocations.get(PartitioningUtils.parsePathFragment(dir))
+        case _ =>
+          None
+      }
+      val path = if (customPath.isDefined) {
+        committer.newTaskTempFileAbsPath(taskAttemptContext, customPath.get, ext)
+      } else {
+        committer.newTaskTempFile(taskAttemptContext, partDir, ext)
+      }
       val newWriter = description.outputWriterFactory.newInstance(
         path = path,
         dataSchema = description.nonPartitionColumns.toStructType,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
index a0a8cb5024c33..28975e1546e79 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources
 
 import java.io.IOException
 
-import org.apache.hadoop.fs.Path
+import org.apache.hadoop.fs.{FileSystem, Path}
 
 import org.apache.spark.internal.io.FileCommitProtocol
 import org.apache.spark.sql._
@@ -32,19 +32,32 @@ import org.apache.spark.sql.execution.command.RunnableCommand
 /**
  * A command for writing data to a [[HadoopFsRelation]].  Supports both overwriting and appending.
  * Writing to dynamic partitions is also supported.
+ *
+ * @param staticPartitionKeys partial partitioning spec for write. This defines the scope of
+ *                            partition overwrites: when the spec is empty, all partitions are
+ *                            overwritten. When it covers a prefix of the partition keys, only
+ *                            partitions matching the prefix are overwritten.
+ * @param customPartitionLocations mapping of partition specs to their custom locations. The
+ *                                 caller should guarantee that exactly those table partitions
+ *                                 falling under the specified static partition keys are contained
+ *                                 in this map, and that no other partitions are.
  */
 case class InsertIntoHadoopFsRelationCommand(
     outputPath: Path,
+    staticPartitionKeys: TablePartitionSpec,
+    customPartitionLocations: Map[TablePartitionSpec, String],
     partitionColumns: Seq[Attribute],
     bucketSpec: Option[BucketSpec],
     fileFormat: FileFormat,
-    refreshFunction: (Seq[TablePartitionSpec]) => Unit,
+    refreshFunction: Seq[TablePartitionSpec] => Unit,
     options: Map[String, String],
     @transient query: LogicalPlan,
     mode: SaveMode,
     catalogTable: Option[CatalogTable])
   extends RunnableCommand {
 
+  import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.escapePathName
+
   override protected def innerChildren: Seq[LogicalPlan] = query :: Nil
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
@@ -66,10 +79,7 @@ case class InsertIntoHadoopFsRelationCommand(
       case (SaveMode.ErrorIfExists, true) =>
         throw new AnalysisException(s"path $qualifiedOutputPath already exists.")
       case (SaveMode.Overwrite, true) =>
-        if (!fs.delete(qualifiedOutputPath, true /* recursively */)) {
-          throw new IOException(s"Unable to clear output " +
-            s"directory $qualifiedOutputPath prior to writing to it")
-        }
+        deleteMatchingPartitions(fs, qualifiedOutputPath)
         true
       case (SaveMode.Append, _) | (SaveMode.Overwrite, _) | (SaveMode.ErrorIfExists, false) =>
         true
@@ -93,7 +103,8 @@ case class InsertIntoHadoopFsRelationCommand(
         plan = query,
         fileFormat = fileFormat,
         committer = committer,
-        outputPath = qualifiedOutputPath.toString,
+        outputSpec = FileFormatWriter.OutputSpec(
+          qualifiedOutputPath.toString, customPartitionLocations),
         hadoopConf = hadoopConf,
         partitionColumns = partitionColumns,
         bucketSpec = bucketSpec,
@@ -105,4 +116,40 @@ case class InsertIntoHadoopFsRelationCommand(
 
     Seq.empty[Row]
   }
+
+  /**
+   * Deletes all partition files that match the specified static prefix. Partitions with custom
+   * locations are also cleared based on the custom locations map given to this class.
+   */
+  private def deleteMatchingPartitions(fs: FileSystem, qualifiedOutputPath: Path): Unit = {
+    val staticPartitionPrefix = if (staticPartitionKeys.nonEmpty) {
+      "/" + partitionColumns.flatMap { p =>
+        staticPartitionKeys.get(p.name) match {
+          case Some(value) =>
+            Some(escapePathName(p.name) + "=" + escapePathName(value))
+          case None =>
+            None
+        }
+      }.mkString("/")
+    } else {
+      ""
+    }
+    // first clear the path determined by the static partition keys (e.g. /table/foo=1)
+    val staticPrefixPath = qualifiedOutputPath.suffix(staticPartitionPrefix)
+    if (fs.exists(staticPrefixPath) && !fs.delete(staticPrefixPath, true /* recursively */)) {
+      throw new IOException(s"Unable to clear output " +
+        s"directory $staticPrefixPath prior to writing to it")
+    }
+    // now clear all custom partition locations (e.g. /custom/dir/where/foo=2/bar=4)
+    for ((spec, customLoc) <- customPartitionLocations) {
+      assert(
+        (staticPartitionKeys.toSet -- spec).isEmpty,
+        "Custom partition location did not match static partitioning keys")
+      val path = new Path(customLoc)
+      if (fs.exists(path) && !fs.delete(path, true)) {
+        throw new IOException(s"Unable to clear partition " +
+          s"directory $path prior to writing to it")
+      }
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index a28b04ca3fb5a..bf9f318780ec2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -62,6 +62,7 @@ object PartitioningUtils {
   }
 
   import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.DEFAULT_PARTITION_NAME
+  import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.escapePathName
   import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.unescapePathName
 
   /**
@@ -252,6 +253,15 @@ object PartitioningUtils {
     }.toMap
   }
 
+  /**
+   * This is the inverse of parsePathFragment().
+   */
+  def getPathFragment(spec: TablePartitionSpec, partitionSchema: StructType): String = {
+    partitionSchema.map { field =>
+      escapePathName(field.name) + "=" + escapePathName(spec(field.name))
+    }.mkString("/")
+  }
+
   /**
    * Normalize the column names in partition specification, w.r.t. the real partition column names
    * and case sensitivity. e.g., if the partition spec has a column named `monTh`, and there is a
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
index e849cafef4184..f1c5f9ab5067d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
@@ -80,7 +80,7 @@ class FileStreamSink(
         plan = data.logicalPlan,
         fileFormat = fileFormat,
         committer = committer,
-        outputPath = path,
+        outputSpec = FileFormatWriter.OutputSpec(path, Map.empty),
         hadoopConf = hadoopConf,
         partitionColumns = partitionColumns,
         bucketSpec = None,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ManifestFileCommitProtocol.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ManifestFileCommitProtocol.scala
index 1fe13fa1623fc..92191c8b64b72 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ManifestFileCommitProtocol.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ManifestFileCommitProtocol.scala
@@ -96,6 +96,12 @@ class ManifestFileCommitProtocol(jobId: String, path: String)
     file
   }
 
+  override def newTaskTempFileAbsPath(
+      taskContext: TaskAttemptContext, absoluteDir: String, ext: String): String = {
+    throw new UnsupportedOperationException(
+      s"$this does not support adding files with an absolute path")
+  }
+
   override def commitTask(taskContext: TaskAttemptContext): TaskCommitMessage = {
     if (addedFiles.nonEmpty) {
       val fs = new Path(addedFiles.head).getFileSystem(taskContext.getConfiguration)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
index ac435bf6195b0..a1aa07456fd36 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.{AnalysisException, QueryTest}
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
+import org.apache.spark.util.Utils
 
 class PartitionProviderCompatibilitySuite
   extends QueryTest with TestHiveSingleton with SQLTestUtils {
@@ -135,7 +136,7 @@ class PartitionProviderCompatibilitySuite
     }
   }
 
-  test("insert overwrite partition of legacy datasource table overwrites entire table") {
+  test("insert overwrite partition of legacy datasource table") {
     withSQLConf(SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key -> "false") {
       withTable("test") {
         withTempDir { dir =>
@@ -144,9 +145,9 @@ class PartitionProviderCompatibilitySuite
             """insert overwrite table test
               |partition (partCol=1)
               |select * from range(100)""".stripMargin)
-          assert(spark.sql("select * from test").count() == 100)
+          assert(spark.sql("select * from test").count() == 104)
 
-          // Dynamic partitions case
+          // Overwriting entire table
           spark.sql("insert overwrite table test select id, id from range(10)".stripMargin)
           assert(spark.sql("select * from test").count() == 10)
         }
@@ -186,4 +187,158 @@ class PartitionProviderCompatibilitySuite
       }
     }
   }
+
+  /**
+   * Runs a test against a multi-level partitioned table, then validates that the custom locations
+   * were respected by the output writer.
+   *
+   * The initial partitioning structure is:
+   *   /P1=0/P2=0  -- custom location a
+   *   /P1=0/P2=1  -- custom location b
+   *   /P1=1/P2=0  -- custom location c
+   *   /P1=1/P2=1  -- default location
+   */
+  private def testCustomLocations(testFn: => Unit): Unit = {
+    val base = Utils.createTempDir(namePrefix = "base")
+    val a = Utils.createTempDir(namePrefix = "a")
+    val b = Utils.createTempDir(namePrefix = "b")
+    val c = Utils.createTempDir(namePrefix = "c")
+    try {
+      spark.sql(s"""
+        |create table test (id long, P1 int, P2 int)
+        |using parquet
+        |options (path "${base.getAbsolutePath}")
+        |partitioned by (P1, P2)""".stripMargin)
+      spark.sql(s"alter table test add partition (P1=0, P2=0) location '${a.getAbsolutePath}'")
+      spark.sql(s"alter table test add partition (P1=0, P2=1) location '${b.getAbsolutePath}'")
+      spark.sql(s"alter table test add partition (P1=1, P2=0) location '${c.getAbsolutePath}'")
+      spark.sql(s"alter table test add partition (P1=1, P2=1)")
+
+      testFn
+
+      // Now validate the partition custom locations were respected
+      val initialCount = spark.sql("select * from test").count()
+      val numA = spark.sql("select * from test where P1=0 and P2=0").count()
+      val numB = spark.sql("select * from test where P1=0 and P2=1").count()
+      val numC = spark.sql("select * from test where P1=1 and P2=0").count()
+      Utils.deleteRecursively(a)
+      spark.sql("refresh table test")
+      assert(spark.sql("select * from test where P1=0 and P2=0").count() == 0)
+      assert(spark.sql("select * from test").count() == initialCount - numA)
+      Utils.deleteRecursively(b)
+      spark.sql("refresh table test")
+      assert(spark.sql("select * from test where P1=0 and P2=1").count() == 0)
+      assert(spark.sql("select * from test").count() == initialCount - numA - numB)
+      Utils.deleteRecursively(c)
+      spark.sql("refresh table test")
+      assert(spark.sql("select * from test where P1=1 and P2=0").count() == 0)
+      assert(spark.sql("select * from test").count() == initialCount - numA - numB - numC)
+    } finally {
+      Utils.deleteRecursively(base)
+      Utils.deleteRecursively(a)
+      Utils.deleteRecursively(b)
+      Utils.deleteRecursively(c)
+      spark.sql("drop table test")
+    }
+  }
+
+  test("sanity check table setup") {
+    testCustomLocations {
+      assert(spark.sql("select * from test").count() == 0)
+      assert(spark.sql("show partitions test").count() == 4)
+    }
+  }
+
+  test("insert into partial dynamic partitions") {
+    testCustomLocations {
+      spark.sql("insert into test partition (P1=0, P2) select id, id from range(10)")
+      assert(spark.sql("select * from test").count() == 10)
+      assert(spark.sql("show partitions test").count() == 12)
+      spark.sql("insert into test partition (P1=0, P2) select id, id from range(10)")
+      assert(spark.sql("select * from test").count() == 20)
+      assert(spark.sql("show partitions test").count() == 12)
+      spark.sql("insert into test partition (P1=1, P2) select id, id from range(10)")
+      assert(spark.sql("select * from test").count() == 30)
+      assert(spark.sql("show partitions test").count() == 20)
+      spark.sql("insert into test partition (P1=2, P2) select id, id from range(10)")
+      assert(spark.sql("select * from test").count() == 40)
+      assert(spark.sql("show partitions test").count() == 30)
+    }
+  }
+
+  test("insert into fully dynamic partitions") {
+    testCustomLocations {
+      spark.sql("insert into test partition (P1, P2) select id, id, id from range(10)")
+      assert(spark.sql("select * from test").count() == 10)
+      assert(spark.sql("show partitions test").count() == 12)
+      spark.sql("insert into test partition (P1, P2) select id, id, id from range(10)")
+      assert(spark.sql("select * from test").count() == 20)
+      assert(spark.sql("show partitions test").count() == 12)
+    }
+  }
+
+  test("insert into static partition") {
+    testCustomLocations {
+      spark.sql("insert into test partition (P1=0, P2=0) select id from range(10)")
+      assert(spark.sql("select * from test").count() == 10)
+      assert(spark.sql("show partitions test").count() == 4)
+      spark.sql("insert into test partition (P1=0, P2=0) select id from range(10)")
+      assert(spark.sql("select * from test").count() == 20)
+      assert(spark.sql("show partitions test").count() == 4)
+      spark.sql("insert into test partition (P1=1, P2=1) select id from range(10)")
+      assert(spark.sql("select * from test").count() == 30)
+      assert(spark.sql("show partitions test").count() == 4)
+    }
+  }
+
+  test("overwrite partial dynamic partitions") {
+    testCustomLocations {
+      spark.sql("insert overwrite table test partition (P1=0, P2) select id, id from range(10)")
+      assert(spark.sql("select * from test").count() == 10)
+      assert(spark.sql("show partitions test").count() == 12)
+      spark.sql("insert overwrite table test partition (P1=0, P2) select id, id from range(5)")
+      assert(spark.sql("select * from test").count() == 5)
+      assert(spark.sql("show partitions test").count() == 7)
+      spark.sql("insert overwrite table test partition (P1=0, P2) select id, id from range(1)")
+      assert(spark.sql("select * from test").count() == 1)
+      assert(spark.sql("show partitions test").count() == 3)
+      spark.sql("insert overwrite table test partition (P1=1, P2) select id, id from range(10)")
+      assert(spark.sql("select * from test").count() == 11)
+      assert(spark.sql("show partitions test").count() == 11)
+      spark.sql("insert overwrite table test partition (P1=1, P2) select id, id from range(1)")
+      assert(spark.sql("select * from test").count() == 2)
+      assert(spark.sql("show partitions test").count() == 2)
+      spark.sql("insert overwrite table test partition (P1=3, P2) select id, id from range(100)")
+      assert(spark.sql("select * from test").count() == 102)
+      assert(spark.sql("show partitions test").count() == 102)
+    }
+  }
+
+  test("overwrite fully dynamic partitions") {
+    testCustomLocations {
+      spark.sql("insert overwrite table test partition (P1, P2) select id, id, id from range(10)")
+      assert(spark.sql("select * from test").count() == 10)
+      assert(spark.sql("show partitions test").count() == 10)
+      spark.sql("insert overwrite table test partition (P1, P2) select id, id, id from range(5)")
+      assert(spark.sql("select * from test").count() == 5)
+      assert(spark.sql("show partitions test").count() == 5)
+    }
+  }
+
+  test("overwrite static partition") {
+    testCustomLocations {
+      spark.sql("insert overwrite table test partition (P1=0, P2=0) select id from range(10)")
+      assert(spark.sql("select * from test").count() == 10)
+      assert(spark.sql("show partitions test").count() == 4)
+      spark.sql("insert overwrite table test partition (P1=0, P2=0) select id from range(5)")
+      assert(spark.sql("select * from test").count() == 5)
+      assert(spark.sql("show partitions test").count() == 4)
+      spark.sql("insert overwrite table test partition (P1=1, P2=1) select id from range(5)")
+      assert(spark.sql("select * from test").count() == 10)
+      assert(spark.sql("show partitions test").count() == 4)
+      spark.sql("insert overwrite table test partition (P1=1, P2=2) select id from range(5)")
+      assert(spark.sql("select * from test").count() == 15)
+      assert(spark.sql("show partitions test").count() == 5)
+    }
+  }
 }

From 51dca6143670ec1c1cb090047c3941becaf41fa9 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Thu, 10 Nov 2016 17:13:10 -0800
Subject: [PATCH 0087/1204] [SPARK-18401][SPARKR][ML] SparkR random forest
 should support output original label.

## What changes were proposed in this pull request?
SparkR ```spark.randomForest``` classification prediction should output original label rather than the indexed label. This issue is very similar with [SPARK-18291](https://issues.apache.org/jira/browse/SPARK-18291).

## How was this patch tested?
Add unit tests.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #15842 from yanboliang/spark-18401.

(cherry picked from commit 5ddf69470b93c0b8a28bb4ac905e7670d9c50a95)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 R/pkg/inst/tests/testthat/test_mllib.R        | 24 ++++++++++++++++
 .../r/RandomForestClassificationWrapper.scala | 28 ++++++++++++++++---
 2 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R
index 1e456ef5c6b16..33e85b78de4fe 100644
--- a/R/pkg/inst/tests/testthat/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -935,6 +935,10 @@ test_that("spark.randomForest Classification", {
   expect_equal(stats$numTrees, 20)
   expect_error(capture.output(stats), NA)
   expect_true(length(capture.output(stats)) > 6)
+  # Test string prediction values
+  predictions <- collect(predict(model, data))$prediction
+  expect_equal(length(grep("setosa", predictions)), 50)
+  expect_equal(length(grep("versicolor", predictions)), 50)
 
   modelPath <- tempfile(pattern = "spark-randomForestClassification", fileext = ".tmp")
   write.ml(model, modelPath)
@@ -947,6 +951,26 @@ test_that("spark.randomForest Classification", {
   expect_equal(stats$numClasses, stats2$numClasses)
 
   unlink(modelPath)
+
+  # Test numeric response variable
+  labelToIndex <- function(species) {
+    switch(as.character(species),
+      setosa = 0.0,
+      versicolor = 1.0,
+      virginica = 2.0
+    )
+  }
+  iris$NumericSpecies <- lapply(iris$Species, labelToIndex)
+  data <- suppressWarnings(createDataFrame(iris[-5]))
+  model <- spark.randomForest(data, NumericSpecies ~ Petal_Length + Petal_Width, "classification",
+                              maxDepth = 5, maxBins = 16)
+  stats <- summary(model)
+  expect_equal(stats$numFeatures, 2)
+  expect_equal(stats$numTrees, 20)
+  # Test numeric prediction values
+  predictions <- collect(predict(model, data))$prediction
+  expect_equal(length(grep("1.0", predictions)), 50)
+  expect_equal(length(grep("2.0", predictions)), 50)
 })
 
 test_that("spark.gbt", {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestClassificationWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestClassificationWrapper.scala
index 6947ba7e7597a..31f846dc6cfec 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestClassificationWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestClassificationWrapper.scala
@@ -23,9 +23,9 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.ml.{Pipeline, PipelineModel}
-import org.apache.spark.ml.attribute.AttributeGroup
+import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, NominalAttribute}
 import org.apache.spark.ml.classification.{RandomForestClassificationModel, RandomForestClassifier}
-import org.apache.spark.ml.feature.RFormula
+import org.apache.spark.ml.feature.{IndexToString, RFormula}
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.util._
 import org.apache.spark.sql.{DataFrame, Dataset}
@@ -35,6 +35,8 @@ private[r] class RandomForestClassifierWrapper private (
   val formula: String,
   val features: Array[String]) extends MLWritable {
 
+  import RandomForestClassifierWrapper._
+
   private val rfcModel: RandomForestClassificationModel =
     pipeline.stages(1).asInstanceOf[RandomForestClassificationModel]
 
@@ -46,7 +48,9 @@ private[r] class RandomForestClassifierWrapper private (
   def summary: String = rfcModel.toDebugString
 
   def transform(dataset: Dataset[_]): DataFrame = {
-    pipeline.transform(dataset).drop(rfcModel.getFeaturesCol)
+    pipeline.transform(dataset)
+      .drop(PREDICTED_LABEL_INDEX_COL)
+      .drop(rfcModel.getFeaturesCol)
   }
 
   override def write: MLWriter = new
@@ -54,6 +58,10 @@ private[r] class RandomForestClassifierWrapper private (
 }
 
 private[r] object RandomForestClassifierWrapper extends MLReadable[RandomForestClassifierWrapper] {
+
+  val PREDICTED_LABEL_INDEX_COL = "pred_label_idx"
+  val PREDICTED_LABEL_COL = "prediction"
+
   def fit(  // scalastyle:ignore
       data: DataFrame,
       formula: String,
@@ -73,6 +81,7 @@ private[r] object RandomForestClassifierWrapper extends MLReadable[RandomForestC
 
     val rFormula = new RFormula()
       .setFormula(formula)
+      .setForceIndexLabel(true)
     RWrapperUtils.checkDataColumns(rFormula, data)
     val rFormulaModel = rFormula.fit(data)
 
@@ -82,6 +91,11 @@ private[r] object RandomForestClassifierWrapper extends MLReadable[RandomForestC
       .attributes.get
     val features = featureAttrs.map(_.name.get)
 
+    // get label names from output schema
+    val labelAttr = Attribute.fromStructField(schema(rFormulaModel.getLabelCol))
+      .asInstanceOf[NominalAttribute]
+    val labels = labelAttr.values.get
+
     // assemble and fit the pipeline
     val rfc = new RandomForestClassifier()
       .setMaxDepth(maxDepth)
@@ -97,10 +111,16 @@ private[r] object RandomForestClassifierWrapper extends MLReadable[RandomForestC
       .setCacheNodeIds(cacheNodeIds)
       .setProbabilityCol(probabilityCol)
       .setFeaturesCol(rFormula.getFeaturesCol)
+      .setPredictionCol(PREDICTED_LABEL_INDEX_COL)
     if (seed != null && seed.length > 0) rfc.setSeed(seed.toLong)
 
+    val idxToStr = new IndexToString()
+      .setInputCol(PREDICTED_LABEL_INDEX_COL)
+      .setOutputCol(PREDICTED_LABEL_COL)
+      .setLabels(labels)
+
     val pipeline = new Pipeline()
-      .setStages(Array(rFormulaModel, rfc))
+      .setStages(Array(rFormulaModel, rfc, idxToStr))
       .fit(data)
 
     new RandomForestClassifierWrapper(pipeline, formula, features)

From 00c9c7d96489778dfe38a36675d3162bf8844880 Mon Sep 17 00:00:00 2001
From: Vinayak <vijoshi5@in.ibm.com>
Date: Fri, 11 Nov 2016 12:54:16 -0600
Subject: [PATCH 0088/1204] [SPARK-17843][WEB UI] Indicate event logs pending
 for processing on history server UI

## What changes were proposed in this pull request?

History Server UI's application listing to display information on currently under process event logs so a user knows that pending this processing an application may not list on the UI.

When there are no event logs under process, the application list page has a "Last Updated" date-time at the top indicating the date-time of the last _completed_ scan of the event logs. The value is displayed to the user in his/her local time zone.
## How was this patch tested?

All unit tests pass. Particularly all the suites under org.apache.spark.deploy.history.\* were run to test changes.
- Very first startup - Pending logs - no logs processed yet:

<img width="1280" alt="screen shot 2016-10-24 at 3 07 04 pm" src="https://cloud.githubusercontent.com/assets/12079825/19640981/b8d2a96a-99fc-11e6-9b1f-2d736fe90e48.png">
- Very first startup - Pending logs - some logs processed:

<img width="1280" alt="screen shot 2016-10-24 at 3 18 42 pm" src="https://cloud.githubusercontent.com/assets/12079825/19641087/3f8e3bae-99fd-11e6-9ef1-e0e70d71d8ef.png">
- Last updated - No currently pending logs:

<img width="1280" alt="screen shot 2016-10-17 at 8 34 37 pm" src="https://cloud.githubusercontent.com/assets/12079825/19443100/4d13946c-94a9-11e6-8ee2-c442729bb206.png">
- Last updated - With some currently pending logs:

<img width="1280" alt="screen shot 2016-10-24 at 3 09 31 pm" src="https://cloud.githubusercontent.com/assets/12079825/19640903/7323ba3a-99fc-11e6-8359-6a45753dbb28.png">
- No applications found and No currently pending logs:

<img width="1280" alt="screen shot 2016-10-24 at 3 24 26 pm" src="https://cloud.githubusercontent.com/assets/12079825/19641364/03a2cb04-99fe-11e6-87d6-d09587fc6201.png">

Author: Vinayak <vijoshi5@in.ibm.com>

Closes #15410 from vijoshi/SAAS-608_master.

(cherry picked from commit a531fe1a82ec515314f2db2e2305283fef24067f)
Signed-off-by: Tom Graves <tgraves@yahoo-inc.com>
---
 .../spark/ui/static/historypage-common.js     | 24 ++++++++
 .../history/ApplicationHistoryProvider.scala  | 24 ++++++++
 .../deploy/history/FsHistoryProvider.scala    | 59 +++++++++++++------
 .../spark/deploy/history/HistoryPage.scala    | 19 ++++++
 .../spark/deploy/history/HistoryServer.scala  |  8 +++
 5 files changed, 116 insertions(+), 18 deletions(-)
 create mode 100644 core/src/main/resources/org/apache/spark/ui/static/historypage-common.js

diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage-common.js b/core/src/main/resources/org/apache/spark/ui/static/historypage-common.js
new file mode 100644
index 0000000000000..55d540d8317a0
--- /dev/null
+++ b/core/src/main/resources/org/apache/spark/ui/static/historypage-common.js
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+$(document).ready(function() {
+    if ($('#last-updated').length) {
+      var lastUpdatedMillis = Number($('#last-updated').text());
+      var updatedDate = new Date(lastUpdatedMillis);
+      $('#last-updated').text(updatedDate.toLocaleDateString()+", "+updatedDate.toLocaleTimeString())
+    }
+});
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
index 06530ff836466..d7d82800b8b55 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
@@ -74,6 +74,30 @@ private[history] case class LoadedAppUI(
 
 private[history] abstract class ApplicationHistoryProvider {
 
+  /**
+   * Returns the count of application event logs that the provider is currently still processing.
+   * History Server UI can use this to indicate to a user that the application listing on the UI
+   * can be expected to list additional known applications once the processing of these
+   * application event logs completes.
+   *
+   * A History Provider that does not have a notion of count of event logs that may be pending
+   * for processing need not override this method.
+   *
+   * @return Count of application event logs that are currently under process
+   */
+  def getEventLogsUnderProcess(): Int = {
+    return 0;
+  }
+
+  /**
+   * Returns the time the history provider last updated the application history information
+   *
+   * @return 0 if this is undefined or unsupported, otherwise the last updated time in millis
+   */
+  def getLastUpdatedTime(): Long = {
+    return 0;
+  }
+
   /**
    * Returns a list of applications available for the history server to show.
    *
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index dfc1aad64c818..ca38a47639422 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -19,7 +19,7 @@ package org.apache.spark.deploy.history
 
 import java.io.{FileNotFoundException, IOException, OutputStream}
 import java.util.UUID
-import java.util.concurrent.{Executors, ExecutorService, TimeUnit}
+import java.util.concurrent.{Executors, ExecutorService, Future, TimeUnit}
 import java.util.zip.{ZipEntry, ZipOutputStream}
 
 import scala.collection.mutable
@@ -108,7 +108,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
 
   // The modification time of the newest log detected during the last scan.   Currently only
   // used for logging msgs (logs are re-scanned based on file size, rather than modtime)
-  private var lastScanTime = -1L
+  private val lastScanTime = new java.util.concurrent.atomic.AtomicLong(-1)
 
   // Mapping of application IDs to their metadata, in descending end time order. Apps are inserted
   // into the map in order, so the LinkedHashMap maintains the correct ordering.
@@ -120,6 +120,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
   // List of application logs to be deleted by event log cleaner.
   private var attemptsToClean = new mutable.ListBuffer[FsApplicationAttemptInfo]
 
+  private val pendingReplayTasksCount = new java.util.concurrent.atomic.AtomicInteger(0)
+
   /**
    * Return a runnable that performs the given operation on the event logs.
    * This operation is expected to be executed periodically.
@@ -226,6 +228,10 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     applications.get(appId)
   }
 
+  override def getEventLogsUnderProcess(): Int = pendingReplayTasksCount.get()
+
+  override def getLastUpdatedTime(): Long = lastScanTime.get()
+
   override def getAppUI(appId: String, attemptId: Option[String]): Option[LoadedAppUI] = {
     try {
       applications.get(appId).flatMap { appInfo =>
@@ -329,26 +335,43 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       if (logInfos.nonEmpty) {
         logDebug(s"New/updated attempts found: ${logInfos.size} ${logInfos.map(_.getPath)}")
       }
-      logInfos.map { file =>
-          replayExecutor.submit(new Runnable {
+
+      var tasks = mutable.ListBuffer[Future[_]]()
+
+      try {
+        for (file <- logInfos) {
+          tasks += replayExecutor.submit(new Runnable {
             override def run(): Unit = mergeApplicationListing(file)
           })
         }
-        .foreach { task =>
-          try {
-            // Wait for all tasks to finish. This makes sure that checkForLogs
-            // is not scheduled again while some tasks are already running in
-            // the replayExecutor.
-            task.get()
-          } catch {
-            case e: InterruptedException =>
-              throw e
-            case e: Exception =>
-              logError("Exception while merging application listings", e)
-          }
+      } catch {
+        // let the iteration over logInfos break, since an exception on
+        // replayExecutor.submit (..) indicates the ExecutorService is unable
+        // to take any more submissions at this time
+
+        case e: Exception =>
+          logError(s"Exception while submitting event log for replay", e)
+      }
+
+      pendingReplayTasksCount.addAndGet(tasks.size)
+
+      tasks.foreach { task =>
+        try {
+          // Wait for all tasks to finish. This makes sure that checkForLogs
+          // is not scheduled again while some tasks are already running in
+          // the replayExecutor.
+          task.get()
+        } catch {
+          case e: InterruptedException =>
+            throw e
+          case e: Exception =>
+            logError("Exception while merging application listings", e)
+        } finally {
+          pendingReplayTasksCount.decrementAndGet()
         }
+      }
 
-      lastScanTime = newLastScanTime
+      lastScanTime.set(newLastScanTime)
     } catch {
       case e: Exception => logError("Exception in checking for event log updates", e)
     }
@@ -365,7 +388,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     } catch {
       case e: Exception =>
         logError("Exception encountered when attempting to update last scan time", e)
-        lastScanTime
+        lastScanTime.get()
     } finally {
       if (!fs.delete(path, true)) {
         logWarning(s"Error deleting ${path}")
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
index 96b9ecf43b14c..0e7a6c24d4fa5 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
@@ -30,13 +30,30 @@ private[history] class HistoryPage(parent: HistoryServer) extends WebUIPage("")
       Option(request.getParameter("showIncomplete")).getOrElse("false").toBoolean
 
     val allAppsSize = parent.getApplicationList().count(_.completed != requestedIncomplete)
+    val eventLogsUnderProcessCount = parent.getEventLogsUnderProcess()
+    val lastUpdatedTime = parent.getLastUpdatedTime()
     val providerConfig = parent.getProviderConfig()
     val content =
+      <script src={UIUtils.prependBaseUri("/static/historypage-common.js")}></script>
       <div>
           <div class="span12">
             <ul class="unstyled">
               {providerConfig.map { case (k, v) => <li><strong>{k}:</strong> {v}</li> }}
             </ul>
+            {
+            if (eventLogsUnderProcessCount > 0) {
+              <p>There are {eventLogsUnderProcessCount} event log(s) currently being
+                processed which may result in additional applications getting listed on this page.
+                Refresh the page to view updates. </p>
+            }
+            }
+
+            {
+            if (lastUpdatedTime > 0) {
+              <p>Last updated: <span id="last-updated">{lastUpdatedTime}</span></p>
+            }
+            }
+
             {
             if (allAppsSize > 0) {
               <script src={UIUtils.prependBaseUri("/static/dataTables.rowsGroup.js")}></script> ++
@@ -46,6 +63,8 @@ private[history] class HistoryPage(parent: HistoryServer) extends WebUIPage("")
                 <script>setAppLimit({parent.maxApplications})</script>
             } else if (requestedIncomplete) {
               <h4>No incomplete applications found!</h4>
+            } else if (eventLogsUnderProcessCount > 0) {
+              <h4>No completed applications found!</h4>
             } else {
               <h4>No completed applications found!</h4> ++ parent.emptyListingHtml
             }
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index 3175b36b3e56f..7e21fa681aa1e 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -179,6 +179,14 @@ class HistoryServer(
     provider.getListing()
   }
 
+  def getEventLogsUnderProcess(): Int = {
+    provider.getEventLogsUnderProcess()
+  }
+
+  def getLastUpdatedTime(): Long = {
+    provider.getLastUpdatedTime()
+  }
+
   def getApplicationInfoList: Iterator[ApplicationInfo] = {
     getApplicationList().map(ApplicationsListResource.appHistoryInfoToPublicAppInfo)
   }

From 465e4b40b3b7760bfcd0f03a14b805029ed599f1 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Fri, 11 Nov 2016 13:28:18 -0800
Subject: [PATCH 0089/1204] [SPARK-17982][SQL] SQLBuilder should wrap the
 generated SQL with parenthesis for LIMIT

## What changes were proposed in this pull request?

Currently, `SQLBuilder` handles `LIMIT` by always adding `LIMIT` at the end of the generated subSQL. It makes `RuntimeException`s like the following. This PR adds a parenthesis always except `SubqueryAlias` is used together with `LIMIT`.

**Before**

``` scala
scala> sql("CREATE TABLE tbl(id INT)")
scala> sql("CREATE VIEW v1(id2) AS SELECT id FROM tbl LIMIT 2")
java.lang.RuntimeException: Failed to analyze the canonicalized SQL: ...
```

**After**

``` scala
scala> sql("CREATE TABLE tbl(id INT)")
scala> sql("CREATE VIEW v1(id2) AS SELECT id FROM tbl LIMIT 2")
scala> sql("SELECT id2 FROM v1")
res4: org.apache.spark.sql.DataFrame = [id2: int]
```

**Fixed cases in this PR**

The following two cases are the detail query plans having problematic SQL generations.

1. `SELECT * FROM (SELECT id FROM tbl LIMIT 2)`

    Please note that **FROM SELECT** part of the generated SQL in the below. When we don't use '()' for limit, this fails.

```scala
# Original logical plan:
Project [id#1]
+- GlobalLimit 2
   +- LocalLimit 2
      +- Project [id#1]
         +- MetastoreRelation default, tbl

# Canonicalized logical plan:
Project [gen_attr_0#1 AS id#4]
+- SubqueryAlias tbl
   +- Project [gen_attr_0#1]
      +- GlobalLimit 2
         +- LocalLimit 2
            +- Project [gen_attr_0#1]
               +- SubqueryAlias gen_subquery_0
                  +- Project [id#1 AS gen_attr_0#1]
                     +- SQLTable default, tbl, [id#1]

# Generated SQL:
SELECT `gen_attr_0` AS `id` FROM (SELECT `gen_attr_0` FROM SELECT `gen_attr_0` FROM (SELECT `id` AS `gen_attr_0` FROM `default`.`tbl`) AS gen_subquery_0 LIMIT 2) AS tbl
```

2. `SELECT * FROM (SELECT id FROM tbl TABLESAMPLE (2 ROWS))`

    Please note that **((~~~) AS gen_subquery_0 LIMIT 2)** in the below. When we use '()' for limit on `SubqueryAlias`, this fails.

```scala
# Original logical plan:
Project [id#1]
+- Project [id#1]
   +- GlobalLimit 2
      +- LocalLimit 2
         +- MetastoreRelation default, tbl

# Canonicalized logical plan:
Project [gen_attr_0#1 AS id#4]
+- SubqueryAlias tbl
   +- Project [gen_attr_0#1]
      +- GlobalLimit 2
         +- LocalLimit 2
            +- SubqueryAlias gen_subquery_0
               +- Project [id#1 AS gen_attr_0#1]
                  +- SQLTable default, tbl, [id#1]

# Generated SQL:
SELECT `gen_attr_0` AS `id` FROM (SELECT `gen_attr_0` FROM ((SELECT `id` AS `gen_attr_0` FROM `default`.`tbl`) AS gen_subquery_0 LIMIT 2)) AS tbl
```

## How was this patch tested?

Pass the Jenkins test with a newly added test case.

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #15546 from dongjoon-hyun/SPARK-17982.

(cherry picked from commit d42bb7cc4e32c173769bd7da5b9b5eafb510860c)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../org/apache/spark/sql/catalyst/SQLBuilder.scala     |  7 ++++++-
 .../test/resources/sqlgen/generate_with_other_1.sql    |  2 +-
 .../test/resources/sqlgen/generate_with_other_2.sql    |  2 +-
 sql/hive/src/test/resources/sqlgen/limit.sql           |  4 ++++
 .../spark/sql/catalyst/LogicalPlanToSQLSuite.scala     | 10 ++++++++++
 5 files changed, 22 insertions(+), 3 deletions(-)
 create mode 100644 sql/hive/src/test/resources/sqlgen/limit.sql

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/SQLBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/SQLBuilder.scala
index 6f821f80cc4c5..380454267eaf4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/SQLBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/SQLBuilder.scala
@@ -138,9 +138,14 @@ class SQLBuilder private (
     case g: Generate =>
       generateToSQL(g)
 
-    case Limit(limitExpr, child) =>
+    // This prevents a pattern of `((...) AS gen_subquery_0 LIMIT 1)` which does not work.
+    // For example, `SELECT * FROM (SELECT id FROM tbl TABLESAMPLE (2 ROWS))` makes this plan.
+    case Limit(limitExpr, child: SubqueryAlias) =>
       s"${toSQL(child)} LIMIT ${limitExpr.sql}"
 
+    case Limit(limitExpr, child) =>
+      s"(${toSQL(child)} LIMIT ${limitExpr.sql})"
+
     case Filter(condition, child) =>
       val whereOrHaving = child match {
         case _: Aggregate => "HAVING"
diff --git a/sql/hive/src/test/resources/sqlgen/generate_with_other_1.sql b/sql/hive/src/test/resources/sqlgen/generate_with_other_1.sql
index ab444d0c70936..0739f8fff5467 100644
--- a/sql/hive/src/test/resources/sqlgen/generate_with_other_1.sql
+++ b/sql/hive/src/test/resources/sqlgen/generate_with_other_1.sql
@@ -5,4 +5,4 @@ WHERE id > 2
 ORDER BY val, id
 LIMIT 5
 --------------------------------------------------------------------------------
-SELECT `gen_attr_0` AS `val`, `gen_attr_1` AS `id` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT gen_subquery_0.`gen_attr_2`, gen_subquery_0.`gen_attr_3`, gen_subquery_0.`gen_attr_4`, gen_subquery_0.`gen_attr_1` FROM (SELECT `arr` AS `gen_attr_2`, `arr2` AS `gen_attr_3`, `json` AS `gen_attr_4`, `id` AS `gen_attr_1` FROM `default`.`parquet_t3`) AS gen_subquery_0 WHERE (`gen_attr_1` > CAST(2 AS BIGINT))) AS gen_subquery_1 LATERAL VIEW explode(`gen_attr_2`) gen_subquery_2 AS `gen_attr_0` ORDER BY `gen_attr_0` ASC NULLS FIRST, `gen_attr_1` ASC NULLS FIRST LIMIT 5) AS parquet_t3
+SELECT `gen_attr_0` AS `val`, `gen_attr_1` AS `id` FROM ((SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT gen_subquery_0.`gen_attr_2`, gen_subquery_0.`gen_attr_3`, gen_subquery_0.`gen_attr_4`, gen_subquery_0.`gen_attr_1` FROM (SELECT `arr` AS `gen_attr_2`, `arr2` AS `gen_attr_3`, `json` AS `gen_attr_4`, `id` AS `gen_attr_1` FROM `default`.`parquet_t3`) AS gen_subquery_0 WHERE (`gen_attr_1` > CAST(2 AS BIGINT))) AS gen_subquery_1 LATERAL VIEW explode(`gen_attr_2`) gen_subquery_2 AS `gen_attr_0` ORDER BY `gen_attr_0` ASC NULLS FIRST, `gen_attr_1` ASC NULLS FIRST LIMIT 5)) AS parquet_t3
diff --git a/sql/hive/src/test/resources/sqlgen/generate_with_other_2.sql b/sql/hive/src/test/resources/sqlgen/generate_with_other_2.sql
index 42a2369f34d1c..c4b344ee238a5 100644
--- a/sql/hive/src/test/resources/sqlgen/generate_with_other_2.sql
+++ b/sql/hive/src/test/resources/sqlgen/generate_with_other_2.sql
@@ -7,4 +7,4 @@ WHERE val > 2
 ORDER BY val, id
 LIMIT 5
 --------------------------------------------------------------------------------
-SELECT `gen_attr_0` AS `val`, `gen_attr_1` AS `id` FROM (SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `arr` AS `gen_attr_4`, `arr2` AS `gen_attr_3`, `json` AS `gen_attr_5`, `id` AS `gen_attr_1` FROM `default`.`parquet_t3`) AS gen_subquery_0 LATERAL VIEW explode(`gen_attr_3`) gen_subquery_2 AS `gen_attr_2` LATERAL VIEW explode(`gen_attr_2`) gen_subquery_3 AS `gen_attr_0` WHERE (`gen_attr_0` > CAST(2 AS BIGINT)) ORDER BY `gen_attr_0` ASC NULLS FIRST, `gen_attr_1` ASC NULLS FIRST LIMIT 5) AS gen_subquery_1
+SELECT `gen_attr_0` AS `val`, `gen_attr_1` AS `id` FROM ((SELECT `gen_attr_0`, `gen_attr_1` FROM (SELECT `arr` AS `gen_attr_4`, `arr2` AS `gen_attr_3`, `json` AS `gen_attr_5`, `id` AS `gen_attr_1` FROM `default`.`parquet_t3`) AS gen_subquery_0 LATERAL VIEW explode(`gen_attr_3`) gen_subquery_2 AS `gen_attr_2` LATERAL VIEW explode(`gen_attr_2`) gen_subquery_3 AS `gen_attr_0` WHERE (`gen_attr_0` > CAST(2 AS BIGINT)) ORDER BY `gen_attr_0` ASC NULLS FIRST, `gen_attr_1` ASC NULLS FIRST LIMIT 5)) AS gen_subquery_1
diff --git a/sql/hive/src/test/resources/sqlgen/limit.sql b/sql/hive/src/test/resources/sqlgen/limit.sql
new file mode 100644
index 0000000000000..7a6b060fbf505
--- /dev/null
+++ b/sql/hive/src/test/resources/sqlgen/limit.sql
@@ -0,0 +1,4 @@
+-- This file is automatically generated by LogicalPlanToSQLSuite.
+SELECT * FROM (SELECT id FROM tbl LIMIT 2)
+--------------------------------------------------------------------------------
+SELECT `gen_attr_0` AS `id` FROM (SELECT `gen_attr_0` FROM (SELECT `gen_attr_0` FROM (SELECT `id` AS `gen_attr_0`, `name` AS `gen_attr_1` FROM `default`.`tbl`) AS gen_subquery_0 LIMIT 2)) AS tbl
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala
index 8696337b9dc8a..557ea44d1c80b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/LogicalPlanToSQLSuite.scala
@@ -1173,4 +1173,14 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with SQLTestUtils {
       )
     }
   }
+
+  test("SPARK-17982 - limit") {
+    withTable("tbl") {
+      sql("CREATE TABLE tbl(id INT, name STRING)")
+      checkSQL(
+        "SELECT * FROM (SELECT id FROM tbl LIMIT 2)",
+        "limit"
+      )
+    }
+  }
 }

From 87820da782fd2d08078227a2ce5c363c3e1cb0f0 Mon Sep 17 00:00:00 2001
From: Ryan Blue <blue@apache.org>
Date: Fri, 11 Nov 2016 13:52:10 -0800
Subject: [PATCH 0090/1204] [SPARK-18387][SQL] Add serialization to
 checkEvaluation.

## What changes were proposed in this pull request?

This removes the serialization test from RegexpExpressionsSuite and
replaces it by serializing all expressions in checkEvaluation.

This also fixes math constant expressions by making LeafMathExpression
Serializable and fixes NumberFormat values that are null or invalid
after serialization.

## How was this patch tested?

This patch is to tests.

Author: Ryan Blue <blue@apache.org>

Closes #15847 from rdblue/SPARK-18387-fix-serializable-expressions.

(cherry picked from commit 6e95325fc3726d260054bd6e7c0717b3c139917e)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../expressions/mathExpressions.scala         |  2 +-
 .../expressions/stringExpressions.scala       | 44 +++++++++++--------
 .../expressions/ExpressionEvalHelper.scala    | 15 ++++---
 .../expressions/RegexpExpressionsSuite.scala  | 16 +------
 4 files changed, 36 insertions(+), 41 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
index a60494a5bb69d..65273a77b1054 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
@@ -36,7 +36,7 @@ import org.apache.spark.unsafe.types.UTF8String
  * @param name The short name of the function
  */
 abstract class LeafMathExpression(c: Double, name: String)
-  extends LeafExpression with CodegenFallback {
+  extends LeafExpression with CodegenFallback with Serializable {
 
   override def dataType: DataType = DoubleType
   override def foldable: Boolean = true
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 5f533fecf8d07..e74ef9a08750e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -1431,18 +1431,20 @@ case class FormatNumber(x: Expression, d: Expression)
 
   // Associated with the pattern, for the last d value, and we will update the
   // pattern (DecimalFormat) once the new coming d value differ with the last one.
+  // This is an Option to distinguish between 0 (numberFormat is valid) and uninitialized after
+  // serialization (numberFormat has not been updated for dValue = 0).
   @transient
-  private var lastDValue: Int = -100
+  private var lastDValue: Option[Int] = None
 
   // A cached DecimalFormat, for performance concern, we will change it
   // only if the d value changed.
   @transient
-  private val pattern: StringBuffer = new StringBuffer()
+  private lazy val pattern: StringBuffer = new StringBuffer()
 
   // SPARK-13515: US Locale configures the DecimalFormat object to use a dot ('.')
   // as a decimal separator.
   @transient
-  private val numberFormat = new DecimalFormat("", new DecimalFormatSymbols(Locale.US))
+  private lazy val numberFormat = new DecimalFormat("", new DecimalFormatSymbols(Locale.US))
 
   override protected def nullSafeEval(xObject: Any, dObject: Any): Any = {
     val dValue = dObject.asInstanceOf[Int]
@@ -1450,24 +1452,28 @@ case class FormatNumber(x: Expression, d: Expression)
       return null
     }
 
-    if (dValue != lastDValue) {
-      // construct a new DecimalFormat only if a new dValue
-      pattern.delete(0, pattern.length)
-      pattern.append("#,###,###,###,###,###,##0")
-
-      // decimal place
-      if (dValue > 0) {
-        pattern.append(".")
-
-        var i = 0
-        while (i < dValue) {
-          i += 1
-          pattern.append("0")
+    lastDValue match {
+      case Some(last) if last == dValue =>
+        // use the current pattern
+      case _ =>
+        // construct a new DecimalFormat only if a new dValue
+        pattern.delete(0, pattern.length)
+        pattern.append("#,###,###,###,###,###,##0")
+
+        // decimal place
+        if (dValue > 0) {
+          pattern.append(".")
+
+          var i = 0
+          while (i < dValue) {
+            i += 1
+            pattern.append("0")
+          }
         }
-      }
-      lastDValue = dValue
 
-      numberFormat.applyLocalizedPattern(pattern.toString)
+        lastDValue = Some(dValue)
+
+        numberFormat.applyLocalizedPattern(pattern.toString)
     }
 
     x.dataType match {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
index 9ceb709185417..f83650424a964 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
@@ -22,7 +22,8 @@ import org.scalactic.TripleEqualsSupport.Spread
 import org.scalatest.exceptions.TestFailedException
 import org.scalatest.prop.GeneratorDrivenPropertyChecks
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.serializer.JavaSerializer
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.optimizer.SimpleTestOptimizer
@@ -43,13 +44,15 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks {
 
   protected def checkEvaluation(
       expression: => Expression, expected: Any, inputRow: InternalRow = EmptyRow): Unit = {
+    val serializer = new JavaSerializer(new SparkConf()).newInstance
+    val expr: Expression = serializer.deserialize(serializer.serialize(expression))
     val catalystValue = CatalystTypeConverters.convertToCatalyst(expected)
-    checkEvaluationWithoutCodegen(expression, catalystValue, inputRow)
-    checkEvaluationWithGeneratedMutableProjection(expression, catalystValue, inputRow)
-    if (GenerateUnsafeProjection.canSupport(expression.dataType)) {
-      checkEvalutionWithUnsafeProjection(expression, catalystValue, inputRow)
+    checkEvaluationWithoutCodegen(expr, catalystValue, inputRow)
+    checkEvaluationWithGeneratedMutableProjection(expr, catalystValue, inputRow)
+    if (GenerateUnsafeProjection.canSupport(expr.dataType)) {
+      checkEvalutionWithUnsafeProjection(expr, catalystValue, inputRow)
     }
-    checkEvaluationWithOptimization(expression, catalystValue, inputRow)
+    checkEvaluationWithOptimization(expr, catalystValue, inputRow)
   }
 
   /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
index d0d1aaa9d299d..5299549e7b4da 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
@@ -17,8 +17,7 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.serializer.JavaSerializer
+import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.types.StringType
 
@@ -192,17 +191,4 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(StringSplit(s1, s2), null, row3)
   }
 
-  test("RegExpReplace serialization") {
-    val serializer = new JavaSerializer(new SparkConf()).newInstance
-
-    val row = create_row("abc", "b", "")
-
-    val s = 's.string.at(0)
-    val p = 'p.string.at(1)
-    val r = 'r.string.at(2)
-
-    val expr: RegExpReplace = serializer.deserialize(serializer.serialize(RegExpReplace(s, p, r)))
-    checkEvaluation(expr, "ac", row)
-  }
-
 }

From c2ebda443b2678e554d859d866af53e2e94822f2 Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Fri, 11 Nov 2016 15:49:55 -0800
Subject: [PATCH 0091/1204] [SPARK-18264][SPARKR] build vignettes with package,
 update vignettes for CRAN release build and add info on release

## What changes were proposed in this pull request?

Changes to DESCRIPTION to build vignettes.
Changes the metadata for vignettes to generate the recommended format (which is about <10% of size before). Unfortunately it does not look as nice
(before - left, after - right)

![image](https://cloud.githubusercontent.com/assets/8969467/20040492/b75883e6-a40d-11e6-9534-25cdd5d59a8b.png)

![image](https://cloud.githubusercontent.com/assets/8969467/20040490/a40f4d42-a40d-11e6-8c91-af00ddcbdad9.png)

Also add information on how to run build/release to CRAN later.

## How was this patch tested?

manually, unit tests

shivaram

We need this for branch-2.1

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #15790 from felixcheung/rpkgvignettes.

(cherry picked from commit ba23f768f7419039df85530b84258ec31f0c22b4)
Signed-off-by: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
---
 R/CRAN_RELEASE.md                    | 91 ++++++++++++++++++++++++++++
 R/README.md                          |  8 +--
 R/check-cran.sh                      | 33 ++++++++--
 R/create-docs.sh                     | 19 +-----
 R/pkg/DESCRIPTION                    |  9 ++-
 R/pkg/vignettes/sparkr-vignettes.Rmd |  9 +--
 6 files changed, 134 insertions(+), 35 deletions(-)
 create mode 100644 R/CRAN_RELEASE.md

diff --git a/R/CRAN_RELEASE.md b/R/CRAN_RELEASE.md
new file mode 100644
index 0000000000000..bea8f9fbe4eec
--- /dev/null
+++ b/R/CRAN_RELEASE.md
@@ -0,0 +1,91 @@
+# SparkR CRAN Release
+
+To release SparkR as a package to CRAN, we would use the `devtools` package. Please work with the
+`dev@spark.apache.org` community and R package maintainer on this.
+
+### Release
+
+First, check that the `Version:` field in the `pkg/DESCRIPTION` file is updated. Also, check for stale files not under source control.
+
+Note that while `check-cran.sh` is running `R CMD check`, it is doing so with `--no-manual --no-vignettes`, which skips a few vignettes or PDF checks - therefore it will be preferred to run `R CMD check` on the source package built manually before uploading a release.
+
+To upload a release, we would need to update the `cran-comments.md`. This should generally contain the results from running the `check-cran.sh` script along with comments on status of all `WARNING` (should not be any) or `NOTE`. As a part of `check-cran.sh` and the release process, the vignettes is build - make sure `SPARK_HOME` is set and Spark jars are accessible.
+
+Once everything is in place, run in R under the `SPARK_HOME/R` directory:
+
+```R
+paths <- .libPaths(); .libPaths(c("lib", paths)); Sys.setenv(SPARK_HOME=tools::file_path_as_absolute("..")); devtools::release(); .libPaths(paths)
+```
+
+For more information please refer to http://r-pkgs.had.co.nz/release.html#release-check
+
+### Testing: build package manually
+
+To build package manually such as to inspect the resulting `.tar.gz` file content, we would also use the `devtools` package.
+
+Source package is what get released to CRAN. CRAN would then build platform-specific binary packages from the source package.
+
+#### Build source package
+
+To build source package locally without releasing to CRAN, run in R under the `SPARK_HOME/R` directory:
+
+```R
+paths <- .libPaths(); .libPaths(c("lib", paths)); Sys.setenv(SPARK_HOME=tools::file_path_as_absolute("..")); devtools::build("pkg"); .libPaths(paths)
+```
+
+(http://r-pkgs.had.co.nz/vignettes.html#vignette-workflow-2)
+
+Similarly, the source package is also created by `check-cran.sh` with `R CMD build pkg`.
+
+For example, this should be the content of the source package:
+
+```sh
+DESCRIPTION	R		inst		tests
+NAMESPACE	build		man		vignettes
+
+inst/doc/
+sparkr-vignettes.html
+sparkr-vignettes.Rmd
+sparkr-vignettes.Rman
+
+build/
+vignette.rds
+
+man/
+ *.Rd files...
+
+vignettes/
+sparkr-vignettes.Rmd
+```
+
+#### Test source package
+
+To install, run this:
+
+```sh
+R CMD INSTALL SparkR_2.1.0.tar.gz
+```
+
+With "2.1.0" replaced with the version of SparkR.
+
+This command installs SparkR to the default libPaths. Once that is done, you should be able to start R and run:
+
+```R
+library(SparkR)
+vignette("sparkr-vignettes", package="SparkR")
+```
+
+#### Build binary package
+
+To build binary package locally, run in R under the `SPARK_HOME/R` directory:
+
+```R
+paths <- .libPaths(); .libPaths(c("lib", paths)); Sys.setenv(SPARK_HOME=tools::file_path_as_absolute("..")); devtools::build("pkg", binary = TRUE); .libPaths(paths)
+```
+
+For example, this should be the content of the binary package:
+
+```sh
+DESCRIPTION	Meta		R		html		tests
+INDEX		NAMESPACE	help		profile		worker
+```
diff --git a/R/README.md b/R/README.md
index 932d5272d0b4f..47f9a86dfde11 100644
--- a/R/README.md
+++ b/R/README.md
@@ -6,7 +6,7 @@ SparkR is an R package that provides a light-weight frontend to use Spark from R
 
 Libraries of sparkR need to be created in `$SPARK_HOME/R/lib`. This can be done by running the script `$SPARK_HOME/R/install-dev.sh`.
 By default the above script uses the system wide installation of R. However, this can be changed to any user installed location of R by setting the environment variable `R_HOME` the full path of the base directory where R is installed, before running install-dev.sh script.
-Example: 
+Example:
 ```bash
 # where /home/username/R is where R is installed and /home/username/R/bin contains the files R and RScript
 export R_HOME=/home/username/R
@@ -46,7 +46,7 @@ Sys.setenv(SPARK_HOME="/Users/username/spark")
 # This line loads SparkR from the installed directory
 .libPaths(c(file.path(Sys.getenv("SPARK_HOME"), "R", "lib"), .libPaths()))
 library(SparkR)
-sc <- sparkR.init(master="local")
+sparkR.session()
 ```
 
 #### Making changes to SparkR
@@ -54,11 +54,11 @@ sc <- sparkR.init(master="local")
 The [instructions](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark) for making contributions to Spark also apply to SparkR.
 If you only make R file changes (i.e. no Scala changes) then you can just re-install the R package using `R/install-dev.sh` and test your changes.
 Once you have made your changes, please include unit tests for them and run existing unit tests using the `R/run-tests.sh` script as described below.
-    
+
 #### Generating documentation
 
 The SparkR documentation (Rd files and HTML files) are not a part of the source repository. To generate them you can run the script `R/create-docs.sh`. This script uses `devtools` and `knitr` to generate the docs and these packages need to be installed on the machine before using the script. Also, you may need to install these [prerequisites](https://github.com/apache/spark/tree/master/docs#prerequisites). See also, `R/DOCUMENTATION.md`
-    
+
 ### Examples, Unit tests
 
 SparkR comes with several sample programs in the `examples/src/main/r` directory.
diff --git a/R/check-cran.sh b/R/check-cran.sh
index bb331466ae931..c5f042848c90c 100755
--- a/R/check-cran.sh
+++ b/R/check-cran.sh
@@ -36,11 +36,27 @@ if [ ! -z "$R_HOME" ]
 fi
 echo "USING R_HOME = $R_HOME"
 
-# Build the latest docs
+# Build the latest docs, but not vignettes, which is built with the package next
 $FWDIR/create-docs.sh
 
-# Build a zip file containing the source package
-"$R_SCRIPT_PATH/"R CMD build $FWDIR/pkg
+# Build source package with vignettes
+SPARK_HOME="$(cd "${FWDIR}"/..; pwd)"
+. "${SPARK_HOME}"/bin/load-spark-env.sh
+if [ -f "${SPARK_HOME}/RELEASE" ]; then
+  SPARK_JARS_DIR="${SPARK_HOME}/jars"
+else
+  SPARK_JARS_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION/jars"
+fi
+
+if [ -d "$SPARK_JARS_DIR" ]; then
+  # Build a zip file containing the source package with vignettes
+  SPARK_HOME="${SPARK_HOME}" "$R_SCRIPT_PATH/"R CMD build $FWDIR/pkg
+
+  find pkg/vignettes/. -not -name '.' -not -name '*.Rmd' -not -name '*.md' -not -name '*.pdf' -not -name '*.html' -delete
+else
+  echo "Error Spark JARs not found in $SPARK_HOME"
+  exit 1
+fi
 
 # Run check as-cran.
 VERSION=`grep Version $FWDIR/pkg/DESCRIPTION | awk '{print $NF}'`
@@ -54,11 +70,16 @@ fi
 
 if [ -n "$NO_MANUAL" ]
 then
-  CRAN_CHECK_OPTIONS=$CRAN_CHECK_OPTIONS" --no-manual"
+  CRAN_CHECK_OPTIONS=$CRAN_CHECK_OPTIONS" --no-manual --no-vignettes"
 fi
 
 echo "Running CRAN check with $CRAN_CHECK_OPTIONS options"
 
-"$R_SCRIPT_PATH/"R CMD check $CRAN_CHECK_OPTIONS SparkR_"$VERSION".tar.gz
-
+if [ -n "$NO_TESTS" ] && [ -n "$NO_MANUAL" ]
+then
+  "$R_SCRIPT_PATH/"R CMD check $CRAN_CHECK_OPTIONS SparkR_"$VERSION".tar.gz
+else
+  # This will run tests and/or build vignettes, and require SPARK_HOME
+  SPARK_HOME="${SPARK_HOME}" "$R_SCRIPT_PATH/"R CMD check $CRAN_CHECK_OPTIONS SparkR_"$VERSION".tar.gz
+fi
 popd > /dev/null
diff --git a/R/create-docs.sh b/R/create-docs.sh
index 69ffc5f678c36..84e6aa928cb0f 100755
--- a/R/create-docs.sh
+++ b/R/create-docs.sh
@@ -20,7 +20,7 @@
 # Script to create API docs and vignettes for SparkR
 # This requires `devtools`, `knitr` and `rmarkdown` to be installed on the machine.
 
-# After running this script the html docs can be found in 
+# After running this script the html docs can be found in
 # $SPARK_HOME/R/pkg/html
 # The vignettes can be found in
 # $SPARK_HOME/R/pkg/vignettes/sparkr_vignettes.html
@@ -52,21 +52,4 @@ Rscript -e 'libDir <- "../../lib"; library(SparkR, lib.loc=libDir); library(knit
 
 popd
 
-# Find Spark jars.
-if [ -f "${SPARK_HOME}/RELEASE" ]; then
-  SPARK_JARS_DIR="${SPARK_HOME}/jars"
-else
-  SPARK_JARS_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION/jars"
-fi
-
-# Only create vignettes if Spark JARs exist
-if [ -d "$SPARK_JARS_DIR" ]; then
-  # render creates SparkR vignettes
-  Rscript -e 'library(rmarkdown); paths <- .libPaths(); .libPaths(c("lib", paths)); Sys.setenv(SPARK_HOME=tools::file_path_as_absolute("..")); render("pkg/vignettes/sparkr-vignettes.Rmd"); .libPaths(paths)'
-
-  find pkg/vignettes/. -not -name '.' -not -name '*.Rmd' -not -name '*.md' -not -name '*.pdf' -not -name '*.html' -delete
-else
-  echo "Skipping R vignettes as Spark JARs not found in $SPARK_HOME"
-fi
-
 popd
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 5a83883089e0e..fe41a9e7dabbd 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: SparkR
 Type: Package
 Title: R Frontend for Apache Spark
-Version: 2.0.0
-Date: 2016-08-27
+Version: 2.1.0
+Date: 2016-11-06
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
                     email = "shivaram@cs.berkeley.edu"),
              person("Xiangrui", "Meng", role = "aut",
@@ -18,7 +18,9 @@ Depends:
 Suggests:
     testthat,
     e1071,
-    survival
+    survival,
+    knitr,
+    rmarkdown
 Description: The SparkR package provides an R frontend for Apache Spark.
 License: Apache License (== 2.0)
 Collate:
@@ -48,3 +50,4 @@ Collate:
     'utils.R'
     'window.R'
 RoxygenNote: 5.0.1
+VignetteBuilder: knitr
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index 80e876027bddb..73a5e26a3ba9c 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -1,12 +1,13 @@
 ---
 title: "SparkR - Practical Guide"
 output:
-  html_document:
-    theme: united
+  rmarkdown::html_vignette:
     toc: true
     toc_depth: 4
-    toc_float: true
-    highlight: textmate
+vignette: >
+  %\VignetteIndexEntry{SparkR - Practical Guide}
+  %\VignetteEngine{knitr::rmarkdown}
+  \usepackage[utf8]{inputenc}
 ---
 
 ## Overview

From 56859c029476bc41b2d2e05043c119146b287bce Mon Sep 17 00:00:00 2001
From: sethah <seth.hendrickson16@gmail.com>
Date: Sat, 12 Nov 2016 01:38:26 +0000
Subject: [PATCH 0092/1204] [SPARK-18060][ML] Avoid unnecessary computation for
 MLOR

## What changes were proposed in this pull request?

Before this patch, the gradient updates for multinomial logistic regression were computed by an outer loop over the number of classes and an inner loop over the number of features. Inside the inner loop, we standardized the feature value (`value / featuresStd(index)`), which means we performed the computation `numFeatures * numClasses` times. We only need to perform that computation `numFeatures` times, however. If we re-order the inner and outer loop, we can avoid this, but then we lose sequential memory access. In this patch, we instead lay out the coefficients in column major order while we train, so that we can avoid the extra computation and retain sequential memory access. We convert back to row-major order when we create the model.

## How was this patch tested?

This is an implementation detail only, so the original behavior should be maintained. All tests pass. I ran some performance tests to verify speedups. The results are below, and show significant speedups.
## Performance Tests

**Setup**

3 node bare-metal cluster
120 cores total
384 gb RAM total

**Results**

NOTE: The `currentMasterTime` and `thisPatchTime` are times in seconds for a single iteration of L-BFGS or OWL-QN.

|    |   numPoints |   numFeatures |   numClasses |   regParam |   elasticNetParam |   currentMasterTime (sec) |   thisPatchTime (sec) |   pctSpeedup |
|----|-------------|---------------|--------------|------------|-------------------|---------------------------|-----------------------|--------------|
|  0 |       1e+07 |           100 |          500 |       0.5  |                 0 |                        90 |                    18 |           80 |
|  1 |       1e+08 |           100 |           50 |       0.5  |                 0 |                        90 |                    19 |           78 |
|  2 |       1e+08 |           100 |           50 |       0.05 |                 1 |                        72 |                    19 |           73 |
|  3 |       1e+06 |           100 |         5000 |       0.5  |                 0 |                        93 |                    53 |           43 |
|  4 |       1e+07 |           100 |         5000 |       0.5  |                 0 |                       900 |                   390 |           56 |
|  5 |       1e+08 |           100 |          500 |       0.5  |                 0 |                       840 |                   174 |           79 |
|  6 |       1e+08 |           100 |          200 |       0.5  |                 0 |                       360 |                    72 |           80 |
|  7 |       1e+08 |          1000 |            5 |       0.5  |                 0 |                         9 |                     3 |           66 |

Author: sethah <seth.hendrickson16@gmail.com>

Closes #15593 from sethah/MLOR_PERF_COL_MAJOR_COEF.

(cherry picked from commit 46b2550bcd3690a260b995fd4d024a73b92a0299)
Signed-off-by: DB Tsai <dbtsai@dbtsai.com>
---
 .../classification/LogisticRegression.scala   | 125 +++++++++++-------
 1 file changed, 74 insertions(+), 51 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index c4651054fd765..18b9b3043db8a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -438,18 +438,14 @@ class LogisticRegression @Since("1.2.0") (
           val standardizationParam = $(standardization)
           def regParamL1Fun = (index: Int) => {
             // Remove the L1 penalization on the intercept
-            val isIntercept = $(fitIntercept) && ((index + 1) % numFeaturesPlusIntercept == 0)
+            val isIntercept = $(fitIntercept) && index >= numFeatures * numCoefficientSets
             if (isIntercept) {
               0.0
             } else {
               if (standardizationParam) {
                 regParamL1
               } else {
-                val featureIndex = if ($(fitIntercept)) {
-                  index % numFeaturesPlusIntercept
-                } else {
-                  index % numFeatures
-                }
+                val featureIndex = index / numCoefficientSets
                 // If `standardization` is false, we still standardize the data
                 // to improve the rate of convergence; as a result, we have to
                 // perform this reverse standardization by penalizing each component
@@ -466,6 +462,15 @@ class LogisticRegression @Since("1.2.0") (
           new BreezeOWLQN[Int, BDV[Double]]($(maxIter), 10, regParamL1Fun, $(tol))
         }
 
+        /*
+          The coefficients are laid out in column major order during training. e.g. for
+          `numClasses = 3` and `numFeatures = 2` and `fitIntercept = true` the layout is:
+
+           Array(beta_11, beta_21, beta_31, beta_12, beta_22, beta_32, intercept_1, intercept_2,
+             intercept_3)
+
+           where beta_jk corresponds to the coefficient for class `j` and feature `k`.
+         */
         val initialCoefficientsWithIntercept =
           Vectors.zeros(numCoefficientSets * numFeaturesPlusIntercept)
 
@@ -489,13 +494,14 @@ class LogisticRegression @Since("1.2.0") (
           val initialCoefWithInterceptArray = initialCoefficientsWithIntercept.toArray
           val providedCoef = optInitialModel.get.coefficientMatrix
           providedCoef.foreachActive { (row, col, value) =>
-            val flatIndex = row * numFeaturesPlusIntercept + col
+            // convert matrix to column major for training
+            val flatIndex = col * numCoefficientSets + row
             // We need to scale the coefficients since they will be trained in the scaled space
             initialCoefWithInterceptArray(flatIndex) = value * featuresStd(col)
           }
           if ($(fitIntercept)) {
             optInitialModel.get.interceptVector.foreachActive { (index, value) =>
-              val coefIndex = (index + 1) * numFeaturesPlusIntercept - 1
+              val coefIndex = numCoefficientSets * numFeatures + index
               initialCoefWithInterceptArray(coefIndex) = value
             }
           }
@@ -526,7 +532,7 @@ class LogisticRegression @Since("1.2.0") (
           val rawIntercepts = histogram.map(c => math.log(c + 1)) // add 1 for smoothing
           val rawMean = rawIntercepts.sum / rawIntercepts.length
           rawIntercepts.indices.foreach { i =>
-            initialCoefficientsWithIntercept.toArray(i * numFeaturesPlusIntercept + numFeatures) =
+            initialCoefficientsWithIntercept.toArray(numClasses * numFeatures + i) =
               rawIntercepts(i) - rawMean
           }
         } else if ($(fitIntercept)) {
@@ -572,16 +578,20 @@ class LogisticRegression @Since("1.2.0") (
         /*
            The coefficients are trained in the scaled space; we're converting them back to
            the original space.
+
+           Additionally, since the coefficients were laid out in column major order during training
+           to avoid extra computation, we convert them back to row major before passing them to the
+           model.
+
            Note that the intercept in scaled space and original space is the same;
            as a result, no scaling is needed.
          */
         val rawCoefficients = state.x.toArray.clone()
         val coefficientArray = Array.tabulate(numCoefficientSets * numFeatures) { i =>
-          // flatIndex will loop though rawCoefficients, and skip the intercept terms.
-          val flatIndex = if ($(fitIntercept)) i + i / numFeatures else i
+          val colMajorIndex = (i % numFeatures) * numCoefficientSets + i / numFeatures
           val featureIndex = i % numFeatures
           if (featuresStd(featureIndex) != 0.0) {
-            rawCoefficients(flatIndex) / featuresStd(featureIndex)
+            rawCoefficients(colMajorIndex) / featuresStd(featureIndex)
           } else {
             0.0
           }
@@ -618,7 +628,7 @@ class LogisticRegression @Since("1.2.0") (
 
         val interceptsArray: Array[Double] = if ($(fitIntercept)) {
           Array.tabulate(numCoefficientSets) { i =>
-            val coefIndex = (i + 1) * numFeaturesPlusIntercept - 1
+            val coefIndex = numFeatures * numCoefficientSets + i
             rawCoefficients(coefIndex)
           }
         } else {
@@ -697,6 +707,7 @@ class LogisticRegressionModel private[spark] (
   /**
    * A vector of model coefficients for "binomial" logistic regression. If this model was trained
    * using the "multinomial" family then an exception is thrown.
+   *
    * @return Vector
    */
   @Since("2.0.0")
@@ -720,6 +731,7 @@ class LogisticRegressionModel private[spark] (
   /**
    * The model intercept for "binomial" logistic regression. If this model was fit with the
    * "multinomial" family then an exception is thrown.
+   *
    * @return Double
    */
   @Since("1.3.0")
@@ -1389,6 +1401,12 @@ class BinaryLogisticRegressionSummary private[classification] (
  *    $$
  * </blockquote></p>
  *
+ * @note In order to avoid unnecessary computation during calculation of the gradient updates
+ *       we lay out the coefficients in column major order during training. This allows us to
+ *       perform feature standardization once, while still retaining sequential memory access
+ *       for speed. We convert back to row major order when we create the model,
+ *       since this form is optimal for the matrix operations used for prediction.
+ *
  * @param bcCoefficients The broadcast coefficients corresponding to the features.
  * @param bcFeaturesStd The broadcast standard deviation values of the features.
  * @param numClasses the number of possible outcomes for k classes classification problem in
@@ -1486,23 +1504,25 @@ private class LogisticAggregator(
     var marginOfLabel = 0.0
     var maxMargin = Double.NegativeInfinity
 
-    val margins = Array.tabulate(numClasses) { i =>
-      var margin = 0.0
-      features.foreachActive { (index, value) =>
-        if (localFeaturesStd(index) != 0.0 && value != 0.0) {
-          margin += localCoefficients(i * numFeaturesPlusIntercept + index) *
-            value / localFeaturesStd(index)
-        }
+    val margins = new Array[Double](numClasses)
+    features.foreachActive { (index, value) =>
+      val stdValue = value / localFeaturesStd(index)
+      var j = 0
+      while (j < numClasses) {
+        margins(j) += localCoefficients(index * numClasses + j) * stdValue
+        j += 1
       }
-
+    }
+    var i = 0
+    while (i < numClasses) {
       if (fitIntercept) {
-        margin += localCoefficients(i * numFeaturesPlusIntercept + numFeatures)
+        margins(i) += localCoefficients(numClasses * numFeatures + i)
       }
-      if (i == label.toInt) marginOfLabel = margin
-      if (margin > maxMargin) {
-        maxMargin = margin
+      if (i == label.toInt) marginOfLabel = margins(i)
+      if (margins(i) > maxMargin) {
+        maxMargin = margins(i)
       }
-      margin
+      i += 1
     }
 
     /**
@@ -1510,33 +1530,39 @@ private class LogisticAggregator(
      * We address this by subtracting maxMargin from all the margins, so it's guaranteed
      * that all of the new margins will be smaller than zero to prevent arithmetic overflow.
      */
+    val multipliers = new Array[Double](numClasses)
     val sum = {
       var temp = 0.0
-      if (maxMargin > 0) {
-        for (i <- 0 until numClasses) {
-          margins(i) -= maxMargin
-          temp += math.exp(margins(i))
-        }
-      } else {
-        for (i <- 0 until numClasses) {
-          temp += math.exp(margins(i))
-        }
+      var i = 0
+      while (i < numClasses) {
+        if (maxMargin > 0) margins(i) -= maxMargin
+        val exp = math.exp(margins(i))
+        temp += exp
+        multipliers(i) = exp
+        i += 1
       }
       temp
     }
 
-    for (i <- 0 until numClasses) {
-      val multiplier = math.exp(margins(i)) / sum - {
-        if (label == i) 1.0 else 0.0
-      }
-      features.foreachActive { (index, value) =>
-        if (localFeaturesStd(index) != 0.0 && value != 0.0) {
-          localGradientArray(i * numFeaturesPlusIntercept + index) +=
-            weight * multiplier * value / localFeaturesStd(index)
+    margins.indices.foreach { i =>
+      multipliers(i) = multipliers(i) / sum - (if (label == i) 1.0 else 0.0)
+    }
+    features.foreachActive { (index, value) =>
+      if (localFeaturesStd(index) != 0.0 && value != 0.0) {
+        val stdValue = value / localFeaturesStd(index)
+        var j = 0
+        while (j < numClasses) {
+          localGradientArray(index * numClasses + j) +=
+            weight * multipliers(j) * stdValue
+          j += 1
         }
       }
-      if (fitIntercept) {
-        localGradientArray(i * numFeaturesPlusIntercept + numFeatures) += weight * multiplier
+    }
+    if (fitIntercept) {
+      var i = 0
+      while (i < numClasses) {
+        localGradientArray(numFeatures * numClasses + i) += weight * multipliers(i)
+        i += 1
       }
     }
 
@@ -1637,6 +1663,7 @@ private class LogisticCostFun(
     val bcCoeffs = instances.context.broadcast(coeffs)
     val featuresStd = bcFeaturesStd.value
     val numFeatures = featuresStd.length
+    val numCoefficientSets = if (multinomial) numClasses else 1
 
     val logisticAggregator = {
       val seqOp = (c: LogisticAggregator, instance: Instance) => c.add(instance)
@@ -1656,7 +1683,7 @@ private class LogisticCostFun(
       var sum = 0.0
       coeffs.foreachActive { case (index, value) =>
         // We do not apply regularization to the intercepts
-        val isIntercept = fitIntercept && ((index + 1) % (numFeatures + 1) == 0)
+        val isIntercept = fitIntercept && index >= numCoefficientSets * numFeatures
         if (!isIntercept) {
           // The following code will compute the loss of the regularization; also
           // the gradient of the regularization, and add back to totalGradientArray.
@@ -1665,11 +1692,7 @@ private class LogisticCostFun(
               totalGradientArray(index) += regParamL2 * value
               value * value
             } else {
-              val featureIndex = if (fitIntercept) {
-                index % (numFeatures + 1)
-              } else {
-                index % numFeatures
-              }
+              val featureIndex = index / numCoefficientSets
               if (featuresStd(featureIndex) != 0.0) {
                 // If `standardization` is false, we still standardize the data
                 // to improve the rate of convergence; as a result, we have to

From 893355143a177f1fea1d2fb6f6e617574e5c5e52 Mon Sep 17 00:00:00 2001
From: Guoqiang Li <witgo@qq.com>
Date: Sat, 12 Nov 2016 09:49:14 +0000
Subject: [PATCH 0093/1204] [SPARK-18375][SPARK-18383][BUILD][CORE] Upgrade
 netty to 4.0.42.Final

## What changes were proposed in this pull request?

One of the important changes for 4.0.42.Final is "Support any FileRegion implementation when using epoll transport netty/netty#5825".
In 4.0.42.Final, `MessageWithHeader` can work properly when `spark.[shuffle|rpc].io.mode` is set to epoll

## How was this patch tested?

Existing tests

Author: Guoqiang Li <witgo@qq.com>

Closes #15830 from witgo/SPARK-18375_netty-4.0.42.

(cherry picked from commit bc41d997ea287080f549219722b6d9049adef4e2)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 core/src/main/scala/org/apache/spark/util/Utils.scala | 4 ++++
 dev/deps/spark-deps-hadoop-2.2                        | 2 +-
 dev/deps/spark-deps-hadoop-2.3                        | 2 +-
 dev/deps/spark-deps-hadoop-2.4                        | 2 +-
 dev/deps/spark-deps-hadoop-2.6                        | 2 +-
 dev/deps/spark-deps-hadoop-2.7                        | 2 +-
 pom.xml                                               | 2 +-
 7 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 1de66af632a8a..892e112e18f85 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -39,6 +39,7 @@ import scala.reflect.ClassTag
 import scala.util.Try
 import scala.util.control.{ControlThrowable, NonFatal}
 
+import _root_.io.netty.channel.unix.Errors.NativeIoException
 import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache}
 import com.google.common.io.{ByteStreams, Files => GFiles}
 import com.google.common.net.InetAddresses
@@ -2222,6 +2223,9 @@ private[spark] object Utils extends Logging {
         isBindCollision(e.getCause)
       case e: MultiException =>
         e.getThrowables.asScala.exists(isBindCollision)
+      case e: NativeIoException =>
+        (e.getMessage != null && e.getMessage.startsWith("bind() failed: ")) ||
+          isBindCollision(e.getCause)
       case e: Exception => isBindCollision(e.getCause)
       case _ => false
     }
diff --git a/dev/deps/spark-deps-hadoop-2.2 b/dev/deps/spark-deps-hadoop-2.2
index 6e749ac16cac0..bbdea069f9496 100644
--- a/dev/deps/spark-deps-hadoop-2.2
+++ b/dev/deps/spark-deps-hadoop-2.2
@@ -123,7 +123,7 @@ metrics-json-3.1.2.jar
 metrics-jvm-3.1.2.jar
 minlog-1.3.0.jar
 netty-3.8.0.Final.jar
-netty-all-4.0.41.Final.jar
+netty-all-4.0.42.Final.jar
 objenesis-2.1.jar
 opencsv-2.3.jar
 oro-2.0.8.jar
diff --git a/dev/deps/spark-deps-hadoop-2.3 b/dev/deps/spark-deps-hadoop-2.3
index 515995a0a46bd..a2dec41d64519 100644
--- a/dev/deps/spark-deps-hadoop-2.3
+++ b/dev/deps/spark-deps-hadoop-2.3
@@ -130,7 +130,7 @@ metrics-jvm-3.1.2.jar
 minlog-1.3.0.jar
 mx4j-3.0.2.jar
 netty-3.8.0.Final.jar
-netty-all-4.0.41.Final.jar
+netty-all-4.0.42.Final.jar
 objenesis-2.1.jar
 opencsv-2.3.jar
 oro-2.0.8.jar
diff --git a/dev/deps/spark-deps-hadoop-2.4 b/dev/deps/spark-deps-hadoop-2.4
index d2139fd952406..c1f02b93d751c 100644
--- a/dev/deps/spark-deps-hadoop-2.4
+++ b/dev/deps/spark-deps-hadoop-2.4
@@ -130,7 +130,7 @@ metrics-jvm-3.1.2.jar
 minlog-1.3.0.jar
 mx4j-3.0.2.jar
 netty-3.8.0.Final.jar
-netty-all-4.0.41.Final.jar
+netty-all-4.0.42.Final.jar
 objenesis-2.1.jar
 opencsv-2.3.jar
 oro-2.0.8.jar
diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6
index b5cecf72ec35f..4f04636be712b 100644
--- a/dev/deps/spark-deps-hadoop-2.6
+++ b/dev/deps/spark-deps-hadoop-2.6
@@ -138,7 +138,7 @@ metrics-jvm-3.1.2.jar
 minlog-1.3.0.jar
 mx4j-3.0.2.jar
 netty-3.8.0.Final.jar
-netty-all-4.0.41.Final.jar
+netty-all-4.0.42.Final.jar
 objenesis-2.1.jar
 opencsv-2.3.jar
 oro-2.0.8.jar
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index a5e03a78e7ea8..da3af9ffa155b 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -139,7 +139,7 @@ metrics-jvm-3.1.2.jar
 minlog-1.3.0.jar
 mx4j-3.0.2.jar
 netty-3.8.0.Final.jar
-netty-all-4.0.41.Final.jar
+netty-all-4.0.42.Final.jar
 objenesis-2.1.jar
 opencsv-2.3.jar
 oro-2.0.8.jar
diff --git a/pom.xml b/pom.xml
index 8aa0a6c3caab9..650b4cd965b66 100644
--- a/pom.xml
+++ b/pom.xml
@@ -552,7 +552,7 @@
       <dependency>
         <groupId>io.netty</groupId>
         <artifactId>netty-all</artifactId>
-        <version>4.0.41.Final</version>
+        <version>4.0.42.Final</version>
       </dependency>
       <dependency>
         <groupId>io.netty</groupId>

From b2ba83d10ac06614c0126f4b0d913f6979051682 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Sat, 12 Nov 2016 06:13:22 -0800
Subject: [PATCH 0094/1204] [SPARK-14077][ML][FOLLOW-UP] Minor refactor and
 cleanup for NaiveBayes

## What changes were proposed in this pull request?
* Refactor out ```trainWithLabelCheck``` and make ```mllib.NaiveBayes``` call into it.
* Avoid capturing the outer object for ```modelType```.
* Move ```requireNonnegativeValues``` and ```requireZeroOneBernoulliValues``` to companion object.

## How was this patch tested?
Existing tests.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #15826 from yanboliang/spark-14077-2.

(cherry picked from commit 22cb3a060a440205281b71686637679645454ca6)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 .../spark/ml/classification/NaiveBayes.scala  | 72 +++++++++----------
 .../mllib/classification/NaiveBayes.scala     |  6 +-
 2 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
index b03a07a6bc1e7..f1a7676c74b0e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
@@ -76,7 +76,7 @@ class NaiveBayes @Since("1.5.0") (
   extends ProbabilisticClassifier[Vector, NaiveBayes, NaiveBayesModel]
   with NaiveBayesParams with DefaultParamsWritable {
 
-  import NaiveBayes.{Bernoulli, Multinomial}
+  import NaiveBayes._
 
   @Since("1.5.0")
   def this() = this(Identifiable.randomUID("nb"))
@@ -110,21 +110,20 @@ class NaiveBayes @Since("1.5.0") (
   @Since("2.1.0")
   def setWeightCol(value: String): this.type = set(weightCol, value)
 
+  override protected def train(dataset: Dataset[_]): NaiveBayesModel = {
+    trainWithLabelCheck(dataset, positiveLabel = true)
+  }
+
   /**
    * ml assumes input labels in range [0, numClasses). But this implementation
    * is also called by mllib NaiveBayes which allows other kinds of input labels
-   * such as {-1, +1}. Here we use this parameter to switch between different processing logic.
-   * It should be removed when we remove mllib NaiveBayes.
+   * such as {-1, +1}. `positiveLabel` is used to determine whether the label
+   * should be checked and it should be removed when we remove mllib NaiveBayes.
    */
-  private[spark] var isML: Boolean = true
-
-  private[spark] def setIsML(isML: Boolean): this.type = {
-    this.isML = isML
-    this
-  }
-
-  override protected def train(dataset: Dataset[_]): NaiveBayesModel = {
-    if (isML) {
+  private[spark] def trainWithLabelCheck(
+      dataset: Dataset[_],
+      positiveLabel: Boolean): NaiveBayesModel = {
+    if (positiveLabel) {
       val numClasses = getNumClasses(dataset)
       if (isDefined(thresholds)) {
         require($(thresholds).length == numClasses, this.getClass.getSimpleName +
@@ -133,28 +132,9 @@ class NaiveBayes @Since("1.5.0") (
       }
     }
 
-    val requireNonnegativeValues: Vector => Unit = (v: Vector) => {
-      val values = v match {
-        case sv: SparseVector => sv.values
-        case dv: DenseVector => dv.values
-      }
-
-      require(values.forall(_ >= 0.0),
-        s"Naive Bayes requires nonnegative feature values but found $v.")
-    }
-
-    val requireZeroOneBernoulliValues: Vector => Unit = (v: Vector) => {
-      val values = v match {
-        case sv: SparseVector => sv.values
-        case dv: DenseVector => dv.values
-      }
-
-      require(values.forall(v => v == 0.0 || v == 1.0),
-        s"Bernoulli naive Bayes requires 0 or 1 feature values but found $v.")
-    }
-
+    val modelTypeValue = $(modelType)
     val requireValues: Vector => Unit = {
-      $(modelType) match {
+      modelTypeValue match {
         case Multinomial =>
           requireNonnegativeValues
         case Bernoulli =>
@@ -226,13 +206,33 @@ class NaiveBayes @Since("1.5.0") (
 @Since("1.6.0")
 object NaiveBayes extends DefaultParamsReadable[NaiveBayes] {
   /** String name for multinomial model type. */
-  private[spark] val Multinomial: String = "multinomial"
+  private[classification] val Multinomial: String = "multinomial"
 
   /** String name for Bernoulli model type. */
-  private[spark] val Bernoulli: String = "bernoulli"
+  private[classification] val Bernoulli: String = "bernoulli"
 
   /* Set of modelTypes that NaiveBayes supports */
-  private[spark] val supportedModelTypes = Set(Multinomial, Bernoulli)
+  private[classification] val supportedModelTypes = Set(Multinomial, Bernoulli)
+
+  private[NaiveBayes] def requireNonnegativeValues(v: Vector): Unit = {
+    val values = v match {
+      case sv: SparseVector => sv.values
+      case dv: DenseVector => dv.values
+    }
+
+    require(values.forall(_ >= 0.0),
+      s"Naive Bayes requires nonnegative feature values but found $v.")
+  }
+
+  private[NaiveBayes] def requireZeroOneBernoulliValues(v: Vector): Unit = {
+    val values = v match {
+      case sv: SparseVector => sv.values
+      case dv: DenseVector => dv.values
+    }
+
+    require(values.forall(v => v == 0.0 || v == 1.0),
+      s"Bernoulli naive Bayes requires 0 or 1 feature values but found $v.")
+  }
 
   @Since("1.6.0")
   override def load(path: String): NaiveBayes = super.load(path)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
index 33561be4b5bc1..767d056861a8b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -364,12 +364,12 @@ class NaiveBayes private (
     val nb = new NewNaiveBayes()
       .setModelType(modelType)
       .setSmoothing(lambda)
-      .setIsML(false)
 
     val dataset = data.map { case LabeledPoint(label, features) => (label, features.asML) }
       .toDF("label", "features")
 
-    val newModel = nb.fit(dataset)
+    // mllib NaiveBayes allows input labels like {-1, +1}, so set `positiveLabel` as false.
+    val newModel = nb.trainWithLabelCheck(dataset, positiveLabel = false)
 
     val pi = newModel.pi.toArray
     val theta = Array.fill[Double](newModel.numClasses, newModel.numFeatures)(0.0)
@@ -378,7 +378,7 @@ class NaiveBayes private (
         theta(i)(j) = v
     }
 
-    require(newModel.oldLabels != null,
+    assert(newModel.oldLabels != null,
       "The underlying ML NaiveBayes training does not produce labels.")
     new NaiveBayesModel(newModel.oldLabels, pi, theta, modelType)
   }

From 6fae4241f281638d52071102c7f0ee6c2c73a8c7 Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Sat, 12 Nov 2016 14:50:37 -0800
Subject: [PATCH 0095/1204] [SPARK-18418] Fix flags for make_binary_release for
 hadoop profile

## What changes were proposed in this pull request?

Fix the flags used to specify the hadoop version

## How was this patch tested?

Manually tested as part of https://github.com/apache/spark/pull/15659 by having the build succeed.

cc joshrosen

Author: Holden Karau <holden@us.ibm.com>

Closes #15860 from holdenk/minor-fix-release-build-script.

(cherry picked from commit 1386fd28daf798bf152606f4da30a36223d75d18)
Signed-off-by: Josh Rosen <joshrosen@databricks.com>
---
 dev/create-release/release-build.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index 96f9b5714ebb8..81f0d63054e29 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -187,10 +187,10 @@ if [[ "$1" == "package" ]]; then
   # We increment the Zinc port each time to avoid OOM's and other craziness if multiple builds
   # share the same Zinc server.
   FLAGS="-Psparkr -Phive -Phive-thriftserver -Pyarn -Pmesos"
-  make_binary_release "hadoop2.3" "-Phadoop2.3 $FLAGS" "3033" &
-  make_binary_release "hadoop2.4" "-Phadoop2.4 $FLAGS" "3034" &
-  make_binary_release "hadoop2.6" "-Phadoop2.6 $FLAGS" "3035" &
-  make_binary_release "hadoop2.7" "-Phadoop2.7 $FLAGS" "3036" &
+  make_binary_release "hadoop2.3" "-Phadoop-2.3 $FLAGS" "3033" &
+  make_binary_release "hadoop2.4" "-Phadoop-2.4 $FLAGS" "3034" &
+  make_binary_release "hadoop2.6" "-Phadoop-2.6 $FLAGS" "3035" &
+  make_binary_release "hadoop2.7" "-Phadoop-2.7 $FLAGS" "3036" &
   make_binary_release "hadoop2.4-without-hive" "-Psparkr -Phadoop-2.4 -Pyarn -Pmesos" "3037" &
   make_binary_release "without-hadoop" "-Psparkr -Phadoop-provided -Pyarn -Pmesos" "3038" &
   wait

From 0c69224ed752c25be1545cfe8ba0db8487a70bf2 Mon Sep 17 00:00:00 2001
From: Denny Lee <dennylee@gallifrey.local>
Date: Sun, 13 Nov 2016 18:10:06 -0800
Subject: [PATCH 0096/1204] [SPARK-18426][STRUCTURED STREAMING] Python
 Documentation Fix for Structured Streaming Programming Guide

## What changes were proposed in this pull request?

Update the python section of the Structured Streaming Guide from .builder() to .builder

## How was this patch tested?

Validated documentation and successfully running the test example.

Please review https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark before opening a pull request.

'Builder' object is not callable object hence changed .builder() to
.builder

Author: Denny Lee <dennylee@gallifrey.local>

Closes #15872 from dennyglee/master.

(cherry picked from commit b91a51bb231af321860415075a7f404bc46e0a74)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 docs/structured-streaming-programming-guide.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index d838ed35a14fd..d2545584ae3b0 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -58,7 +58,7 @@ from pyspark.sql.functions import explode
 from pyspark.sql.functions import split
 
 spark = SparkSession \
-    .builder() \
+    .builder \
     .appName("StructuredNetworkWordCount") \
     .getOrCreate()
 {% endhighlight %}

From 8fc6455c0b77f81be79908bb65e6264bf61c90e7 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Sun, 13 Nov 2016 20:25:12 -0800
Subject: [PATCH 0097/1204] [SPARK-18412][SPARKR][ML] Fix exception for some
 SparkR ML algorithms training on libsvm data

## What changes were proposed in this pull request?
* Fix the following exceptions which throws when ```spark.randomForest```(classification), ```spark.gbt```(classification), ```spark.naiveBayes``` and ```spark.glm```(binomial family) were fitted on libsvm data.
```
java.lang.IllegalArgumentException: requirement failed: If label column already exists, forceIndexLabel can not be set with true.
```
See [SPARK-18412](https://issues.apache.org/jira/browse/SPARK-18412) for more detail about how to reproduce this bug.
* Refactor out ```getFeaturesAndLabels``` to RWrapperUtils, since lots of ML algorithm wrappers use this function.
* Drop some unwanted columns when making prediction.

## How was this patch tested?
Add unit test.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #15851 from yanboliang/spark-18412.

(cherry picked from commit 07be232ea12dfc8dc3701ca948814be7dbebf4ee)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 R/pkg/inst/tests/testthat/test_mllib.R        | 18 ++++++++--
 .../spark/ml/r/GBTClassificationWrapper.scala | 18 ++++------
 .../GeneralizedLinearRegressionWrapper.scala  |  5 ++-
 .../apache/spark/ml/r/NaiveBayesWrapper.scala | 14 +++-----
 .../org/apache/spark/ml/r/RWrapperUtils.scala | 36 ++++++++++++++++---
 .../r/RandomForestClassificationWrapper.scala | 18 ++++------
 6 files changed, 68 insertions(+), 41 deletions(-)

diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R
index 33e85b78de4fe..4831ce27bec8a 100644
--- a/R/pkg/inst/tests/testthat/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -881,7 +881,8 @@ test_that("spark.kstest", {
   expect_match(capture.output(stats)[1], "Kolmogorov-Smirnov test summary:")
 })
 
-test_that("spark.randomForest Regression", {
+test_that("spark.randomForest", {
+  # regression
   data <- suppressWarnings(createDataFrame(longley))
   model <- spark.randomForest(data, Employed ~ ., "regression", maxDepth = 5, maxBins = 16,
                               numTrees = 1)
@@ -923,9 +924,8 @@ test_that("spark.randomForest Regression", {
   expect_equal(stats$treeWeights, stats2$treeWeights)
 
   unlink(modelPath)
-})
 
-test_that("spark.randomForest Classification", {
+  # classification
   data <- suppressWarnings(createDataFrame(iris))
   model <- spark.randomForest(data, Species ~ Petal_Length + Petal_Width, "classification",
                               maxDepth = 5, maxBins = 16)
@@ -971,6 +971,12 @@ test_that("spark.randomForest Classification", {
   predictions <- collect(predict(model, data))$prediction
   expect_equal(length(grep("1.0", predictions)), 50)
   expect_equal(length(grep("2.0", predictions)), 50)
+
+  # spark.randomForest classification can work on libsvm data
+  data <- read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"),
+                source = "libsvm")
+  model <- spark.randomForest(data, label ~ features, "classification")
+  expect_equal(summary(model)$numFeatures, 4)
 })
 
 test_that("spark.gbt", {
@@ -1039,6 +1045,12 @@ test_that("spark.gbt", {
   expect_equal(iris2$NumericSpecies, as.double(collect(predict(m, df))$prediction))
   expect_equal(s$numFeatures, 5)
   expect_equal(s$numTrees, 20)
+
+  # spark.gbt classification can work on libsvm data
+  data <- read.df(absoluteSparkPath("data/mllib/sample_binary_classification_data.txt"),
+                source = "libsvm")
+  model <- spark.gbt(data, label ~ features, "classification")
+  expect_equal(summary(model)$numFeatures, 692)
 })
 
 sparkR.session.stop()
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/GBTClassificationWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/GBTClassificationWrapper.scala
index 8946025032200..aacb41ee2659b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/GBTClassificationWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/GBTClassificationWrapper.scala
@@ -23,10 +23,10 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.ml.{Pipeline, PipelineModel}
-import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, NominalAttribute}
 import org.apache.spark.ml.classification.{GBTClassificationModel, GBTClassifier}
 import org.apache.spark.ml.feature.{IndexToString, RFormula}
 import org.apache.spark.ml.linalg.Vector
+import org.apache.spark.ml.r.RWrapperUtils._
 import org.apache.spark.ml.util._
 import org.apache.spark.sql.{DataFrame, Dataset}
 
@@ -51,6 +51,7 @@ private[r] class GBTClassifierWrapper private (
     pipeline.transform(dataset)
       .drop(PREDICTED_LABEL_INDEX_COL)
       .drop(gbtcModel.getFeaturesCol)
+      .drop(gbtcModel.getLabelCol)
   }
 
   override def write: MLWriter = new
@@ -81,19 +82,11 @@ private[r] object GBTClassifierWrapper extends MLReadable[GBTClassifierWrapper]
     val rFormula = new RFormula()
       .setFormula(formula)
       .setForceIndexLabel(true)
-    RWrapperUtils.checkDataColumns(rFormula, data)
+    checkDataColumns(rFormula, data)
     val rFormulaModel = rFormula.fit(data)
 
-    // get feature names from output schema
-    val schema = rFormulaModel.transform(data).schema
-    val featureAttrs = AttributeGroup.fromStructField(schema(rFormulaModel.getFeaturesCol))
-      .attributes.get
-    val features = featureAttrs.map(_.name.get)
-
-    // get label names from output schema
-    val labelAttr = Attribute.fromStructField(schema(rFormulaModel.getLabelCol))
-      .asInstanceOf[NominalAttribute]
-    val labels = labelAttr.values.get
+    // get labels and feature names from output schema
+    val (features, labels) = getFeaturesAndLabels(rFormulaModel, data)
 
     // assemble and fit the pipeline
     val rfc = new GBTClassifier()
@@ -109,6 +102,7 @@ private[r] object GBTClassifierWrapper extends MLReadable[GBTClassifierWrapper]
       .setMaxMemoryInMB(maxMemoryInMB)
       .setCacheNodeIds(cacheNodeIds)
       .setFeaturesCol(rFormula.getFeaturesCol)
+      .setLabelCol(rFormula.getLabelCol)
       .setPredictionCol(PREDICTED_LABEL_INDEX_COL)
     if (seed != null && seed.length > 0) rfc.setSeed(seed.toLong)
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala
index 995b1ef03bcec..add4d49110d16 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala
@@ -29,6 +29,7 @@ import org.apache.spark.ml.regression._
 import org.apache.spark.ml.Transformer
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.param.shared._
+import org.apache.spark.ml.r.RWrapperUtils._
 import org.apache.spark.ml.util._
 import org.apache.spark.sql._
 import org.apache.spark.sql.functions._
@@ -64,6 +65,7 @@ private[r] class GeneralizedLinearRegressionWrapper private (
         .drop(PREDICTED_LABEL_PROB_COL)
         .drop(PREDICTED_LABEL_INDEX_COL)
         .drop(glm.getFeaturesCol)
+        .drop(glm.getLabelCol)
     } else {
       pipeline.transform(dataset)
         .drop(glm.getFeaturesCol)
@@ -92,7 +94,7 @@ private[r] object GeneralizedLinearRegressionWrapper
       regParam: Double): GeneralizedLinearRegressionWrapper = {
     val rFormula = new RFormula().setFormula(formula)
     if (family == "binomial") rFormula.setForceIndexLabel(true)
-    RWrapperUtils.checkDataColumns(rFormula, data)
+    checkDataColumns(rFormula, data)
     val rFormulaModel = rFormula.fit(data)
     // get labels and feature names from output schema
     val schema = rFormulaModel.transform(data).schema
@@ -109,6 +111,7 @@ private[r] object GeneralizedLinearRegressionWrapper
       .setWeightCol(weightCol)
       .setRegParam(regParam)
       .setFeaturesCol(rFormula.getFeaturesCol)
+      .setLabelCol(rFormula.getLabelCol)
     val pipeline = if (family == "binomial") {
       // Convert prediction from probability to label index.
       val probToPred = new ProbabilityToPrediction()
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/NaiveBayesWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/NaiveBayesWrapper.scala
index 4fdab2dd94655..0afea4be3d1dd 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/NaiveBayesWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/NaiveBayesWrapper.scala
@@ -23,9 +23,9 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.ml.{Pipeline, PipelineModel}
-import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, NominalAttribute}
 import org.apache.spark.ml.classification.{NaiveBayes, NaiveBayesModel}
 import org.apache.spark.ml.feature.{IndexToString, RFormula}
+import org.apache.spark.ml.r.RWrapperUtils._
 import org.apache.spark.ml.util._
 import org.apache.spark.sql.{DataFrame, Dataset}
 
@@ -46,6 +46,7 @@ private[r] class NaiveBayesWrapper private (
     pipeline.transform(dataset)
       .drop(PREDICTED_LABEL_INDEX_COL)
       .drop(naiveBayesModel.getFeaturesCol)
+      .drop(naiveBayesModel.getLabelCol)
   }
 
   override def write: MLWriter = new NaiveBayesWrapper.NaiveBayesWrapperWriter(this)
@@ -60,21 +61,16 @@ private[r] object NaiveBayesWrapper extends MLReadable[NaiveBayesWrapper] {
     val rFormula = new RFormula()
       .setFormula(formula)
       .setForceIndexLabel(true)
-    RWrapperUtils.checkDataColumns(rFormula, data)
+    checkDataColumns(rFormula, data)
     val rFormulaModel = rFormula.fit(data)
     // get labels and feature names from output schema
-    val schema = rFormulaModel.transform(data).schema
-    val labelAttr = Attribute.fromStructField(schema(rFormulaModel.getLabelCol))
-      .asInstanceOf[NominalAttribute]
-    val labels = labelAttr.values.get
-    val featureAttrs = AttributeGroup.fromStructField(schema(rFormulaModel.getFeaturesCol))
-      .attributes.get
-    val features = featureAttrs.map(_.name.get)
+    val (features, labels) = getFeaturesAndLabels(rFormulaModel, data)
     // assemble and fit the pipeline
     val naiveBayes = new NaiveBayes()
       .setSmoothing(smoothing)
       .setModelType("bernoulli")
       .setFeaturesCol(rFormula.getFeaturesCol)
+      .setLabelCol(rFormula.getLabelCol)
       .setPredictionCol(PREDICTED_LABEL_INDEX_COL)
     val idxToStr = new IndexToString()
       .setInputCol(PREDICTED_LABEL_INDEX_COL)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/RWrapperUtils.scala b/mllib/src/main/scala/org/apache/spark/ml/r/RWrapperUtils.scala
index 379007c4d948d..665e50af67d46 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/RWrapperUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/RWrapperUtils.scala
@@ -18,11 +18,12 @@
 package org.apache.spark.ml.r
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.ml.feature.RFormula
+import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, NominalAttribute}
+import org.apache.spark.ml.feature.{RFormula, RFormulaModel}
 import org.apache.spark.ml.util.Identifiable
 import org.apache.spark.sql.Dataset
 
-object RWrapperUtils extends Logging {
+private[r] object RWrapperUtils extends Logging {
 
   /**
    * DataFrame column check.
@@ -32,14 +33,41 @@ object RWrapperUtils extends Logging {
    *
    * @param rFormula RFormula instance
    * @param data Input dataset
-   * @return Unit
    */
   def checkDataColumns(rFormula: RFormula, data: Dataset[_]): Unit = {
     if (data.schema.fieldNames.contains(rFormula.getFeaturesCol)) {
       val newFeaturesName = s"${Identifiable.randomUID(rFormula.getFeaturesCol)}"
-      logWarning(s"data containing ${rFormula.getFeaturesCol} column, " +
+      logInfo(s"data containing ${rFormula.getFeaturesCol} column, " +
         s"using new name $newFeaturesName instead")
       rFormula.setFeaturesCol(newFeaturesName)
     }
+
+    if (rFormula.getForceIndexLabel && data.schema.fieldNames.contains(rFormula.getLabelCol)) {
+      val newLabelName = s"${Identifiable.randomUID(rFormula.getLabelCol)}"
+      logInfo(s"data containing ${rFormula.getLabelCol} column and we force to index label, " +
+        s"using new name $newLabelName instead")
+      rFormula.setLabelCol(newLabelName)
+    }
+  }
+
+  /**
+   * Get the feature names and original labels from the schema
+   * of DataFrame transformed by RFormulaModel.
+   *
+   * @param rFormulaModel The RFormulaModel instance.
+   * @param data Input dataset.
+   * @return The feature names and original labels.
+   */
+  def getFeaturesAndLabels(
+      rFormulaModel: RFormulaModel,
+      data: Dataset[_]): (Array[String], Array[String]) = {
+    val schema = rFormulaModel.transform(data).schema
+    val featureAttrs = AttributeGroup.fromStructField(schema(rFormulaModel.getFeaturesCol))
+      .attributes.get
+    val features = featureAttrs.map(_.name.get)
+    val labelAttr = Attribute.fromStructField(schema(rFormulaModel.getLabelCol))
+      .asInstanceOf[NominalAttribute]
+    val labels = labelAttr.values.get
+    (features, labels)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestClassificationWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestClassificationWrapper.scala
index 31f846dc6cfec..0b860e5af96e3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestClassificationWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestClassificationWrapper.scala
@@ -23,10 +23,10 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.ml.{Pipeline, PipelineModel}
-import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, NominalAttribute}
 import org.apache.spark.ml.classification.{RandomForestClassificationModel, RandomForestClassifier}
 import org.apache.spark.ml.feature.{IndexToString, RFormula}
 import org.apache.spark.ml.linalg.Vector
+import org.apache.spark.ml.r.RWrapperUtils._
 import org.apache.spark.ml.util._
 import org.apache.spark.sql.{DataFrame, Dataset}
 
@@ -51,6 +51,7 @@ private[r] class RandomForestClassifierWrapper private (
     pipeline.transform(dataset)
       .drop(PREDICTED_LABEL_INDEX_COL)
       .drop(rfcModel.getFeaturesCol)
+      .drop(rfcModel.getLabelCol)
   }
 
   override def write: MLWriter = new
@@ -82,19 +83,11 @@ private[r] object RandomForestClassifierWrapper extends MLReadable[RandomForestC
     val rFormula = new RFormula()
       .setFormula(formula)
       .setForceIndexLabel(true)
-    RWrapperUtils.checkDataColumns(rFormula, data)
+    checkDataColumns(rFormula, data)
     val rFormulaModel = rFormula.fit(data)
 
-    // get feature names from output schema
-    val schema = rFormulaModel.transform(data).schema
-    val featureAttrs = AttributeGroup.fromStructField(schema(rFormulaModel.getFeaturesCol))
-      .attributes.get
-    val features = featureAttrs.map(_.name.get)
-
-    // get label names from output schema
-    val labelAttr = Attribute.fromStructField(schema(rFormulaModel.getLabelCol))
-      .asInstanceOf[NominalAttribute]
-    val labels = labelAttr.values.get
+    // get labels and feature names from output schema
+    val (features, labels) = getFeaturesAndLabels(rFormulaModel, data)
 
     // assemble and fit the pipeline
     val rfc = new RandomForestClassifier()
@@ -111,6 +104,7 @@ private[r] object RandomForestClassifierWrapper extends MLReadable[RandomForestC
       .setCacheNodeIds(cacheNodeIds)
       .setProbabilityCol(probabilityCol)
       .setFeaturesCol(rFormula.getFeaturesCol)
+      .setLabelCol(rFormula.getLabelCol)
       .setPredictionCol(PREDICTED_LABEL_INDEX_COL)
     if (seed != null && seed.length > 0) rfc.setSeed(seed.toLong)
 

From 12bde11ca0613dbd7d917c81a8b480d5a9355da5 Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Mon, 14 Nov 2016 16:52:07 +0900
Subject: [PATCH 0098/1204] [SPARK-18382][WEBUI] "run at null:-1" in UI when no
 file/line info in call site info

## What changes were proposed in this pull request?

Avoid reporting null/-1 file / line number in call sites if encountering StackTraceElement without this info

## How was this patch tested?

Existing tests

Author: Sean Owen <sowen@cloudera.com>

Closes #15862 from srowen/SPARK-18382.

(cherry picked from commit f95b124c68ccc2e318f6ac30685aa47770eea8f3)
Signed-off-by: Kousuke Saruta <sarutak@oss.nttdata.co.jp>
---
 core/src/main/scala/org/apache/spark/util/Utils.scala | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 892e112e18f85..a2386d6b9e12f 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -1419,8 +1419,12 @@ private[spark] object Utils extends Logging {
             }
             callStack(0) = ste.toString // Put last Spark method on top of the stack trace.
           } else {
-            firstUserLine = ste.getLineNumber
-            firstUserFile = ste.getFileName
+            if (ste.getFileName != null) {
+              firstUserFile = ste.getFileName
+              if (ste.getLineNumber >= 0) {
+                firstUserLine = ste.getLineNumber
+              }
+            }
             callStack += ste.toString
             insideSpark = false
           }

From d554c02f4f50d3d58661d5f87aacf34152545c24 Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Mon, 14 Nov 2016 12:08:06 +0100
Subject: [PATCH 0099/1204] [SPARK-18166][MLLIB] Fix Poisson GLM bug due to
 wrong requirement of response values

## What changes were proposed in this pull request?

The current implementation of Poisson GLM seems to allow only positive values. This is incorrect since the support of Poisson includes the origin. The bug is easily fixed by changing the test of the Poisson variable from  'require(y **>** 0.0' to  'require(y **>=** 0.0'.

mengxr  srowen

Author: actuaryzhang <actuaryzhang10@gmail.com>
Author: actuaryzhang <actuaryzhang@uber.com>

Closes #15683 from actuaryzhang/master.

(cherry picked from commit ae6cddb78742be94aa0851ce719f293e0a64ce4f)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../GeneralizedLinearRegression.scala         |  4 +-
 .../GeneralizedLinearRegressionSuite.scala    | 45 +++++++++++++++++++
 2 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index 1938e8ecc513d..1d2961e0277f5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -501,8 +501,8 @@ object GeneralizedLinearRegression extends DefaultParamsReadable[GeneralizedLine
     val defaultLink: Link = Log
 
     override def initialize(y: Double, weight: Double): Double = {
-      require(y > 0.0, "The response variable of Poisson family " +
-        s"should be positive, but got $y")
+      require(y >= 0.0, "The response variable of Poisson family " +
+        s"should be non-negative, but got $y")
       y
     }
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
index 111bc974642d9..6a4ac1735b2cb 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
@@ -44,6 +44,7 @@ class GeneralizedLinearRegressionSuite
   @transient var datasetGaussianInverse: DataFrame = _
   @transient var datasetBinomial: DataFrame = _
   @transient var datasetPoissonLog: DataFrame = _
+  @transient var datasetPoissonLogWithZero: DataFrame = _
   @transient var datasetPoissonIdentity: DataFrame = _
   @transient var datasetPoissonSqrt: DataFrame = _
   @transient var datasetGammaInverse: DataFrame = _
@@ -88,6 +89,12 @@ class GeneralizedLinearRegressionSuite
       xVariance = Array(0.7, 1.2), nPoints = 10000, seed, noiseLevel = 0.01,
       family = "poisson", link = "log").toDF()
 
+    datasetPoissonLogWithZero = generateGeneralizedLinearRegressionInput(
+      intercept = -1.5, coefficients = Array(0.22, 0.06), xMean = Array(2.9, 10.5),
+      xVariance = Array(0.7, 1.2), nPoints = 100, seed, noiseLevel = 0.01,
+      family = "poisson", link = "log")
+      .map{x => LabeledPoint(if (x.label < 0.7) 0.0 else x.label, x.features)}.toDF()
+
     datasetPoissonIdentity = generateGeneralizedLinearRegressionInput(
       intercept = 2.5, coefficients = Array(2.2, 0.6), xMean = Array(2.9, 10.5),
       xVariance = Array(0.7, 1.2), nPoints = 10000, seed, noiseLevel = 0.01,
@@ -139,6 +146,10 @@ class GeneralizedLinearRegressionSuite
       label + "," + features.toArray.mkString(",")
     }.repartition(1).saveAsTextFile(
       "target/tmp/GeneralizedLinearRegressionSuite/datasetPoissonLog")
+    datasetPoissonLogWithZero.rdd.map { case Row(label: Double, features: Vector) =>
+      label + "," + features.toArray.mkString(",")
+    }.repartition(1).saveAsTextFile(
+      "target/tmp/GeneralizedLinearRegressionSuite/datasetPoissonLogWithZero")
     datasetPoissonIdentity.rdd.map { case Row(label: Double, features: Vector) =>
       label + "," + features.toArray.mkString(",")
     }.repartition(1).saveAsTextFile(
@@ -456,6 +467,40 @@ class GeneralizedLinearRegressionSuite
     }
   }
 
+  test("generalized linear regression: poisson family against glm (with zero values)") {
+    /*
+       R code:
+       f1 <- data$V1 ~ data$V2 + data$V3 - 1
+       f2 <- data$V1 ~ data$V2 + data$V3
+
+       data <- read.csv("path", header=FALSE)
+       for (formula in c(f1, f2)) {
+         model <- glm(formula, family="poisson", data=data)
+         print(as.vector(coef(model)))
+       }
+       [1]  0.4272661 -0.1565423
+       [1] -3.6911354  0.6214301  0.1295814
+     */
+    val expected = Seq(
+      Vectors.dense(0.0, 0.4272661, -0.1565423),
+      Vectors.dense(-3.6911354, 0.6214301, 0.1295814))
+
+    import GeneralizedLinearRegression._
+
+    var idx = 0
+    val link = "log"
+    val dataset = datasetPoissonLogWithZero
+    for (fitIntercept <- Seq(false, true)) {
+      val trainer = new GeneralizedLinearRegression().setFamily("poisson").setLink(link)
+        .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
+      val model = trainer.fit(dataset)
+      val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
+      assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with poisson family, " +
+        s"$link link and fitIntercept = $fitIntercept (with zero values).")
+      idx += 1
+    }
+  }
+
   test("generalized linear regression: gamma family against glm") {
     /*
        R code:

From 518dc1e1e63a8955b16a3f2ca7592264fd637ae6 Mon Sep 17 00:00:00 2001
From: WangTaoTheTonic <wangtao111@huawei.com>
Date: Mon, 14 Nov 2016 12:22:36 +0100
Subject: [PATCH 0100/1204] [SPARK-18396][HISTORYSERVER] Duration" column makes
 search result confused, maybe we should make it unsearchable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

When we search data in History Server, it will check if any columns contains the search string. Duration is represented as long value in table, so if we search simple string like "003", "111", the duration containing "003", ‘111“ will be showed, which make not much sense to users.
We cannot simply transfer the long value to meaning format like "1 h", "3.2 min" because they are also used for sorting. Better way to handle it is ban "Duration" columns from searching.

## How was this patch tested

manually tests.

Before("local-1478225166651" pass the filter because its duration in long value, which is "257244245" contains search string "244"):
![before](https://cloud.githubusercontent.com/assets/5276001/20203166/f851ffc6-a7ff-11e6-8fe6-91a90ca92b23.jpg)

After:
![after](https://cloud.githubusercontent.com/assets/5276001/20178646/2129fbb0-a78d-11e6-9edb-39f885ce3ed0.jpg)

Author: WangTaoTheTonic <wangtao111@huawei.com>

Closes #15838 from WangTaoTheTonic/duration.

(cherry picked from commit 637a0bb88f74712001f32a53ff66fd0b8cb67e4a)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../main/resources/org/apache/spark/ui/static/historypage.js   | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage.js b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
index 6c0ec8d5fce54..8fd91865b0429 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/historypage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
@@ -139,6 +139,9 @@ $(document).ready(function() {
                         {name: 'eighth'},
                         {name: 'ninth'},
                     ],
+                    "columnDefs": [
+                        {"searchable": false, "targets": [5]}
+                    ],
                     "autoWidth": false,
                     "order": [[ 4, "desc" ]]
         };

From c07fe1c5924e167fb569427e5e6b78adcfde648e Mon Sep 17 00:00:00 2001
From: Noritaka Sekiyama <moomindani@gmail.com>
Date: Mon, 14 Nov 2016 21:07:59 +0900
Subject: [PATCH 0101/1204] [SPARK-18432][DOC] Changed HDFS default block size
 from 64MB to 128MB

Changed HDFS default block size from 64MB to 128MB.
https://issues.apache.org/jira/browse/SPARK-18432

Author: Noritaka Sekiyama <moomindani@gmail.com>

Closes #15879 from moomindani/SPARK-18432.

(cherry picked from commit 9d07ceee7860921eafb55b47852f1b51089c98da)
Signed-off-by: Kousuke Saruta <sarutak@oss.nttdata.co.jp>
---
 docs/programming-guide.md | 6 +++---
 docs/tuning.md            | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/programming-guide.md b/docs/programming-guide.md
index b9a2110b602a0..58bf17b4a84ef 100644
--- a/docs/programming-guide.md
+++ b/docs/programming-guide.md
@@ -343,7 +343,7 @@ Some notes on reading files with Spark:
 
 * All of Spark's file-based input methods, including `textFile`, support running on directories, compressed files, and wildcards as well. For example, you can use `textFile("/my/directory")`, `textFile("/my/directory/*.txt")`, and `textFile("/my/directory/*.gz")`.
 
-* The `textFile` method also takes an optional second argument for controlling the number of partitions of the file. By default, Spark creates one partition for each block of the file (blocks being 64MB by default in HDFS), but you can also ask for a higher number of partitions by passing a larger value. Note that you cannot have fewer partitions than blocks.
+* The `textFile` method also takes an optional second argument for controlling the number of partitions of the file. By default, Spark creates one partition for each block of the file (blocks being 128MB by default in HDFS), but you can also ask for a higher number of partitions by passing a larger value. Note that you cannot have fewer partitions than blocks.
 
 Apart from text files, Spark's Scala API also supports several other data formats:
 
@@ -375,7 +375,7 @@ Some notes on reading files with Spark:
 
 * All of Spark's file-based input methods, including `textFile`, support running on directories, compressed files, and wildcards as well. For example, you can use `textFile("/my/directory")`, `textFile("/my/directory/*.txt")`, and `textFile("/my/directory/*.gz")`.
 
-* The `textFile` method also takes an optional second argument for controlling the number of partitions of the file. By default, Spark creates one partition for each block of the file (blocks being 64MB by default in HDFS), but you can also ask for a higher number of partitions by passing a larger value. Note that you cannot have fewer partitions than blocks.
+* The `textFile` method also takes an optional second argument for controlling the number of partitions of the file. By default, Spark creates one partition for each block of the file (blocks being 128MB by default in HDFS), but you can also ask for a higher number of partitions by passing a larger value. Note that you cannot have fewer partitions than blocks.
 
 Apart from text files, Spark's Java API also supports several other data formats:
 
@@ -407,7 +407,7 @@ Some notes on reading files with Spark:
 
 * All of Spark's file-based input methods, including `textFile`, support running on directories, compressed files, and wildcards as well. For example, you can use `textFile("/my/directory")`, `textFile("/my/directory/*.txt")`, and `textFile("/my/directory/*.gz")`.
 
-* The `textFile` method also takes an optional second argument for controlling the number of partitions of the file. By default, Spark creates one partition for each block of the file (blocks being 64MB by default in HDFS), but you can also ask for a higher number of partitions by passing a larger value. Note that you cannot have fewer partitions than blocks.
+* The `textFile` method also takes an optional second argument for controlling the number of partitions of the file. By default, Spark creates one partition for each block of the file (blocks being 128MB by default in HDFS), but you can also ask for a higher number of partitions by passing a larger value. Note that you cannot have fewer partitions than blocks.
 
 Apart from text files, Spark's Python API also supports several other data formats:
 
diff --git a/docs/tuning.md b/docs/tuning.md
index 9c43b315bbb9e..0de303a3bd9bf 100644
--- a/docs/tuning.md
+++ b/docs/tuning.md
@@ -224,8 +224,8 @@ temporary objects created during task execution. Some steps which may be useful
 
 * As an example, if your task is reading data from HDFS, the amount of memory used by the task can be estimated using
   the size of the data block read from HDFS. Note that the size of a decompressed block is often 2 or 3 times the
-  size of the block. So if we wish to have 3 or 4 tasks' worth of working space, and the HDFS block size is 64 MB,
-  we can estimate size of Eden to be `4*3*64MB`.
+  size of the block. So if we wish to have 3 or 4 tasks' worth of working space, and the HDFS block size is 128 MB,
+  we can estimate size of Eden to be `4*3*128MB`.
 
 * Monitor how the frequency and time taken by garbage collection changes with the new settings.
 

From 81e36fcf17648c0929b3338502012e315239d36d Mon Sep 17 00:00:00 2001
From: Mark Hamstra <markhamstra@gmail.com>
Date: Tue, 18 Oct 2016 10:27:54 -0700
Subject: [PATCH 0102/1204] CSD naming

---
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/java8-tests/pom.xml              | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mesos/pom.xml                             | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 repl/pom.xml                              | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 yarn/pom.xml                              | 2 +-
 36 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/assembly/pom.xml b/assembly/pom.xml
index ec243eaebaea7..b6495acb0a1cf 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index fcefe64d59c91..964d52b585487 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 511e1f29de368..32e57453b2bb4 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 606ad15739617..62599e6cf44ac 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 626f023a5b99c..dcc2a8a43b95e 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 1c60d510e5703..3520db9e85fb0 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 45af98d94ef91..3407f88ae7834 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index eac99ab82a2e4..4fec9e10e997a 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/examples/pom.xml b/examples/pom.xml
index 90bbd3fbb9404..12e10225d6b24 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 57d553b75b872..1ee1612837468 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index fb0292a5f11e0..c56477a6f7f98 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 5e9275c8e66d9..0f29f0d1785cb 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 7b68ca7373fe6..6ee9853c82094 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/java8-tests/pom.xml b/external/java8-tests/pom.xml
index 1bc206e8675f1..9ca2bb05bd023 100644
--- a/external/java8-tests/pom.xml
+++ b/external/java8-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index 4f5045326a009..085d7b7b036d0 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index ebff5fd07a9b9..ca69fcbab976a 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index c36d479007091..420d596e929bc 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index bc02b8a66246a..05016233e8126 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index 91ccd4a927e98..ea4c232b8f1df 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index f7cb764463396..e2901fbd59cbe 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index 57809ff692c28..1d6b173b7c45e 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index fab409d3e9f96..22891753a1894 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 10d5ba93ebb88..75f7f804acbab 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index 6023cf0771862..ef39eb58e6ccb 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mesos/pom.xml b/mesos/pom.xml
index 57cc26a4ccef9..9b9bad34d05ca 100644
--- a/mesos/pom.xml
+++ b/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 8c985fd13ac06..06921d2d99609 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 4484998a49c8f..80e7c50dd94c6 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 650b4cd965b66..0ff957b980506 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.1.0-SNAPSHOT</version>
+  <version>2.1.0-csd-1-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/repl/pom.xml b/repl/pom.xml
index 73493e600e546..9bc4d5c5d9fbd 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 82b49ebb21a44..e549b19289f1f 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 7da77158ff07e..8f403874c9859 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 819897cd46858..124d560feaa9b 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 2be99cb1046f4..9bdd79f081e45 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 07a0dab0ee047..6c14e6f6aba0d 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index b9be8db684a90..dec44d94fcfdc 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 64ff845b5ae9a..f6337a48dca6e 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From df2168128300a626e5a0261cc7753872194c157e Mon Sep 17 00:00:00 2001
From: Mark Hamstra <markhamstra@gmail.com>
Date: Thu, 20 Oct 2016 11:01:20 -0700
Subject: [PATCH 0103/1204] bumped jsr305 and commons-net

---
 pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index 0ff957b980506..d2f77017a405d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -174,7 +174,7 @@
     <jersey.version>2.22.2</jersey.version>
     <joda.version>2.9.3</joda.version>
     <jodd.version>3.5.2</jodd.version>
-    <jsr305.version>1.3.9</jsr305.version>
+    <jsr305.version>3.0.0</jsr305.version>
     <libthrift.version>0.9.2</libthrift.version>
     <antlr4.version>4.5.3</antlr4.version>
     <jpam.version>1.1</jpam.version>
@@ -547,7 +547,7 @@
       <dependency>
         <groupId>commons-net</groupId>
         <artifactId>commons-net</artifactId>
-        <version>2.2</version>
+        <version>3.1</version>
       </dependency>
       <dependency>
         <groupId>io.netty</groupId>

From d050ed3fef1fd28cfa8af0502a20a6e68aeb6b78 Mon Sep 17 00:00:00 2001
From: Mark Hamstra <markhamstra@gmail.com>
Date: Wed, 9 Nov 2016 15:50:10 -0800
Subject: [PATCH 0104/1204] csd hive

---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index d2f77017a405d..c44460970f5d9 100644
--- a/pom.xml
+++ b/pom.xml
@@ -130,7 +130,7 @@
     <curator.version>2.4.0</curator.version>
     <hive.group>org.spark-project.hive</hive.group>
     <!-- Version used in Maven Hive dependency -->
-    <hive.version>1.2.1.spark2</hive.version>
+    <hive.version>1.2.1.spark2-csd-1</hive.version>
     <!-- Version used for internal directory structure -->
     <hive.version.short>1.2.1</hive.version.short>
     <derby.version>10.12.1.1</derby.version>

From 3c623d226a0c495c36c86d199879b9e922d1ece2 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Mon, 14 Nov 2016 10:03:01 -0800
Subject: [PATCH 0105/1204] [SPARK-18416][STRUCTURED STREAMING] Fixed temp file
 leak in state store

## What changes were proposed in this pull request?

StateStore.get() causes temporary files to be created immediately, even if the store is not used to make updates for new version. The temp file is not closed as store.commit() is not called in those cases, thus keeping the output stream to temp file open forever.

This PR fixes it by opening the temp file only when there are updates being made.

## How was this patch tested?

New unit test

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #15859 from tdas/SPARK-18416.

(cherry picked from commit bdfe60ac921172be0fb77de2f075cc7904a3b238)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../state/HDFSBackedStateStoreProvider.scala  | 10 +--
 .../streaming/state/StateStoreSuite.scala     | 63 +++++++++++++++++++
 2 files changed, 68 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
index 808713161c316..f07feaad5dc71 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
@@ -87,8 +87,7 @@ private[state] class HDFSBackedStateStoreProvider(
 
     private val newVersion = version + 1
     private val tempDeltaFile = new Path(baseDir, s"temp-${Random.nextLong}")
-    private val tempDeltaFileStream = compressStream(fs.create(tempDeltaFile, true))
-
+    private lazy val tempDeltaFileStream = compressStream(fs.create(tempDeltaFile, true))
     private val allUpdates = new java.util.HashMap[UnsafeRow, StoreUpdate]()
 
     @volatile private var state: STATE = UPDATING
@@ -101,7 +100,7 @@ private[state] class HDFSBackedStateStoreProvider(
     }
 
     override def put(key: UnsafeRow, value: UnsafeRow): Unit = {
-      verify(state == UPDATING, "Cannot remove after already committed or aborted")
+      verify(state == UPDATING, "Cannot put after already committed or aborted")
 
       val isNewKey = !mapToUpdate.containsKey(key)
       mapToUpdate.put(key, value)
@@ -125,6 +124,7 @@ private[state] class HDFSBackedStateStoreProvider(
     /** Remove keys that match the following condition */
     override def remove(condition: UnsafeRow => Boolean): Unit = {
       verify(state == UPDATING, "Cannot remove after already committed or aborted")
+
       val keyIter = mapToUpdate.keySet().iterator()
       while (keyIter.hasNext) {
         val key = keyIter.next
@@ -154,7 +154,7 @@ private[state] class HDFSBackedStateStoreProvider(
         finalizeDeltaFile(tempDeltaFileStream)
         finalDeltaFile = commitUpdates(newVersion, mapToUpdate, tempDeltaFile)
         state = COMMITTED
-        logInfo(s"Committed version $newVersion for $this")
+        logInfo(s"Committed version $newVersion for $this to file $finalDeltaFile")
         newVersion
       } catch {
         case NonFatal(e) =>
@@ -174,7 +174,7 @@ private[state] class HDFSBackedStateStoreProvider(
       if (tempDeltaFile != null) {
         fs.delete(tempDeltaFile, true)
       }
-      logInfo("Aborted")
+      logInfo(s"Aborted version $newVersion for $this")
     }
 
     /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
index 504a26516107f..533cd0cd2a2ea 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
@@ -468,6 +468,69 @@ class StateStoreSuite extends SparkFunSuite with BeforeAndAfter with PrivateMeth
     assert(e.getCause.getMessage.contains("Failed to rename"))
   }
 
+  test("SPARK-18416: do not create temp delta file until the store is updated") {
+    val dir = Utils.createDirectory(tempDir, Random.nextString(5)).toString
+    val storeId = StateStoreId(dir, 0, 0)
+    val storeConf = StateStoreConf.empty
+    val hadoopConf = new Configuration()
+    val deltaFileDir = new File(s"$dir/0/0/")
+
+    def numTempFiles: Int = {
+      if (deltaFileDir.exists) {
+        deltaFileDir.listFiles.map(_.getName).count(n => n.contains("temp") && !n.startsWith("."))
+      } else 0
+    }
+
+    def numDeltaFiles: Int = {
+      if (deltaFileDir.exists) {
+        deltaFileDir.listFiles.map(_.getName).count(n => n.contains(".delta") && !n.startsWith("."))
+      } else 0
+    }
+
+    def shouldNotCreateTempFile[T](body: => T): T = {
+      val before = numTempFiles
+      val result = body
+      assert(numTempFiles === before)
+      result
+    }
+
+    // Getting the store should not create temp file
+    val store0 = shouldNotCreateTempFile {
+      StateStore.get(storeId, keySchema, valueSchema, 0, storeConf, hadoopConf)
+    }
+
+    // Put should create a temp file
+    put(store0, "a", 1)
+    assert(numTempFiles === 1)
+    assert(numDeltaFiles === 0)
+
+    // Commit should remove temp file and create a delta file
+    store0.commit()
+    assert(numTempFiles === 0)
+    assert(numDeltaFiles === 1)
+
+    // Remove should create a temp file
+    val store1 = shouldNotCreateTempFile {
+      StateStore.get(storeId, keySchema, valueSchema, 1, storeConf, hadoopConf)
+    }
+    remove(store1, _ == "a")
+    assert(numTempFiles === 1)
+    assert(numDeltaFiles === 1)
+
+    // Commit should remove temp file and create a delta file
+    store1.commit()
+    assert(numTempFiles === 0)
+    assert(numDeltaFiles === 2)
+
+    // Commit without any updates should create a delta file
+    val store2 = shouldNotCreateTempFile {
+      StateStore.get(storeId, keySchema, valueSchema, 2, storeConf, hadoopConf)
+    }
+    store2.commit()
+    assert(numTempFiles === 0)
+    assert(numDeltaFiles === 3)
+  }
+
   def getDataFromFiles(
       provider: HDFSBackedStateStoreProvider,
     version: Int = -1): Set[(String, Int)] = {

From db691f05cec9e03f507c5ed544bcc6edefb3842d Mon Sep 17 00:00:00 2001
From: cody koeninger <cody@koeninger.org>
Date: Mon, 14 Nov 2016 11:10:37 -0800
Subject: [PATCH 0106/1204] [SPARK-17510][STREAMING][KAFKA] config max rate on
 a per-partition basis

## What changes were proposed in this pull request?

Allow configuration of max rate on a per-topicpartition basis.
## How was this patch tested?

Unit tests.

The reporter (Jeff Nadler) said he could test on his workload, so let's wait on that report.

Author: cody koeninger <cody@koeninger.org>

Closes #15132 from koeninger/SPARK-17510.

(cherry picked from commit 89d1fa58dbe88560b1f2b0362fcc3035ccc888be)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../kafka010/DirectKafkaInputDStream.scala    | 11 ++--
 .../spark/streaming/kafka010/KafkaUtils.scala | 53 ++++++++++++++++++-
 .../kafka010/PerPartitionConfig.scala         | 47 ++++++++++++++++
 .../kafka010/DirectKafkaStreamSuite.scala     | 34 ++++++++----
 .../kafka/DirectKafkaInputDStream.scala       |  4 +-
 5 files changed, 131 insertions(+), 18 deletions(-)
 create mode 100644 external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/PerPartitionConfig.scala

diff --git a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala
index 7e57bb18cbd50..794f53c5abfd0 100644
--- a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala
+++ b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala
@@ -57,7 +57,8 @@ import org.apache.spark.streaming.scheduler.rate.RateEstimator
 private[spark] class DirectKafkaInputDStream[K, V](
     _ssc: StreamingContext,
     locationStrategy: LocationStrategy,
-    consumerStrategy: ConsumerStrategy[K, V]
+    consumerStrategy: ConsumerStrategy[K, V],
+    ppc: PerPartitionConfig
   ) extends InputDStream[ConsumerRecord[K, V]](_ssc) with Logging with CanCommitOffsets {
 
   val executorKafkaParams = {
@@ -128,12 +129,9 @@ private[spark] class DirectKafkaInputDStream[K, V](
     }
   }
 
-  private val maxRateLimitPerPartition: Int = context.sparkContext.getConf.getInt(
-    "spark.streaming.kafka.maxRatePerPartition", 0)
-
   protected[streaming] def maxMessagesPerPartition(
     offsets: Map[TopicPartition, Long]): Option[Map[TopicPartition, Long]] = {
-    val estimatedRateLimit = rateController.map(_.getLatestRate().toInt)
+    val estimatedRateLimit = rateController.map(_.getLatestRate())
 
     // calculate a per-partition rate limit based on current lag
     val effectiveRateLimitPerPartition = estimatedRateLimit.filter(_ > 0) match {
@@ -144,11 +142,12 @@ private[spark] class DirectKafkaInputDStream[K, V](
         val totalLag = lagPerPartition.values.sum
 
         lagPerPartition.map { case (tp, lag) =>
+          val maxRateLimitPerPartition = ppc.maxRatePerPartition(tp)
           val backpressureRate = Math.round(lag / totalLag.toFloat * rate)
           tp -> (if (maxRateLimitPerPartition > 0) {
             Math.min(backpressureRate, maxRateLimitPerPartition)} else backpressureRate)
         }
-      case None => offsets.map { case (tp, offset) => tp -> maxRateLimitPerPartition }
+      case None => offsets.map { case (tp, offset) => tp -> ppc.maxRatePerPartition(tp) }
     }
 
     if (effectiveRateLimitPerPartition.values.sum > 0) {
diff --git a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaUtils.scala b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaUtils.scala
index b2190bfa05a3a..c11917f59d5b8 100644
--- a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaUtils.scala
+++ b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaUtils.scala
@@ -123,7 +123,31 @@ object KafkaUtils extends Logging {
       locationStrategy: LocationStrategy,
       consumerStrategy: ConsumerStrategy[K, V]
     ): InputDStream[ConsumerRecord[K, V]] = {
-    new DirectKafkaInputDStream[K, V](ssc, locationStrategy, consumerStrategy)
+    val ppc = new DefaultPerPartitionConfig(ssc.sparkContext.getConf)
+    createDirectStream[K, V](ssc, locationStrategy, consumerStrategy, ppc)
+  }
+
+  /**
+   * :: Experimental ::
+   * Scala constructor for a DStream where
+   * each given Kafka topic/partition corresponds to an RDD partition.
+   * @param locationStrategy In most cases, pass in LocationStrategies.preferConsistent,
+   *   see [[LocationStrategies]] for more details.
+   * @param consumerStrategy In most cases, pass in ConsumerStrategies.subscribe,
+   *   see [[ConsumerStrategies]] for more details.
+   * @param perPartitionConfig configuration of settings such as max rate on a per-partition basis.
+   *   see [[PerPartitionConfig]] for more details.
+   * @tparam K type of Kafka message key
+   * @tparam V type of Kafka message value
+   */
+  @Experimental
+  def createDirectStream[K, V](
+      ssc: StreamingContext,
+      locationStrategy: LocationStrategy,
+      consumerStrategy: ConsumerStrategy[K, V],
+      perPartitionConfig: PerPartitionConfig
+    ): InputDStream[ConsumerRecord[K, V]] = {
+    new DirectKafkaInputDStream[K, V](ssc, locationStrategy, consumerStrategy, perPartitionConfig)
   }
 
   /**
@@ -150,6 +174,33 @@ object KafkaUtils extends Logging {
         jssc.ssc, locationStrategy, consumerStrategy))
   }
 
+  /**
+   * :: Experimental ::
+   * Java constructor for a DStream where
+   * each given Kafka topic/partition corresponds to an RDD partition.
+   * @param keyClass Class of the keys in the Kafka records
+   * @param valueClass Class of the values in the Kafka records
+   * @param locationStrategy In most cases, pass in LocationStrategies.preferConsistent,
+   *   see [[LocationStrategies]] for more details.
+   * @param consumerStrategy In most cases, pass in ConsumerStrategies.subscribe,
+   *   see [[ConsumerStrategies]] for more details
+   * @param perPartitionConfig configuration of settings such as max rate on a per-partition basis.
+   *   see [[PerPartitionConfig]] for more details.
+   * @tparam K type of Kafka message key
+   * @tparam V type of Kafka message value
+   */
+  @Experimental
+  def createDirectStream[K, V](
+      jssc: JavaStreamingContext,
+      locationStrategy: LocationStrategy,
+      consumerStrategy: ConsumerStrategy[K, V],
+      perPartitionConfig: PerPartitionConfig
+    ): JavaInputDStream[ConsumerRecord[K, V]] = {
+    new JavaInputDStream(
+      createDirectStream[K, V](
+        jssc.ssc, locationStrategy, consumerStrategy, perPartitionConfig))
+  }
+
   /**
    * Tweak kafka params to prevent issues on executors
    */
diff --git a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/PerPartitionConfig.scala b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/PerPartitionConfig.scala
new file mode 100644
index 0000000000000..4792f2a955110
--- /dev/null
+++ b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/PerPartitionConfig.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.kafka010
+
+import org.apache.kafka.common.TopicPartition
+
+import org.apache.spark.SparkConf
+import org.apache.spark.annotation.Experimental
+
+/**
+ * :: Experimental ::
+ * Interface for user-supplied configurations that can't otherwise be set via Spark properties,
+ * because they need tweaking on a per-partition basis,
+ */
+@Experimental
+abstract class PerPartitionConfig extends Serializable {
+  /**
+   *  Maximum rate (number of records per second) at which data will be read
+   *  from each Kafka partition.
+   */
+  def maxRatePerPartition(topicPartition: TopicPartition): Long
+}
+
+/**
+ * Default per-partition configuration
+ */
+private class DefaultPerPartitionConfig(conf: SparkConf)
+    extends PerPartitionConfig {
+  val maxRate = conf.getLong("spark.streaming.kafka.maxRatePerPartition", 0)
+
+  def maxRatePerPartition(topicPartition: TopicPartition): Long = maxRate
+}
diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala
index 02aec43c3b34f..f36e0a901f7b0 100644
--- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala
+++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala
@@ -252,7 +252,8 @@ class DirectKafkaStreamSuite
       val s = new DirectKafkaInputDStream[String, String](
         ssc,
         preferredHosts,
-        ConsumerStrategies.Subscribe[String, String](List(topic), kafkaParams.asScala))
+        ConsumerStrategies.Subscribe[String, String](List(topic), kafkaParams.asScala),
+        new DefaultPerPartitionConfig(sparkConf))
       s.consumer.poll(0)
       assert(
         s.consumer.position(topicPartition) >= offsetBeforeStart,
@@ -306,7 +307,8 @@ class DirectKafkaStreamSuite
         ConsumerStrategies.Assign[String, String](
           List(topicPartition),
           kafkaParams.asScala,
-          Map(topicPartition -> 11L)))
+          Map(topicPartition -> 11L)),
+        new DefaultPerPartitionConfig(sparkConf))
       s.consumer.poll(0)
       assert(
         s.consumer.position(topicPartition) >= offsetBeforeStart,
@@ -518,7 +520,7 @@ class DirectKafkaStreamSuite
 
   test("maxMessagesPerPartition with backpressure disabled") {
     val topic = "maxMessagesPerPartition"
-    val kafkaStream = getDirectKafkaStream(topic, None)
+    val kafkaStream = getDirectKafkaStream(topic, None, None)
 
     val input = Map(new TopicPartition(topic, 0) -> 50L, new TopicPartition(topic, 1) -> 50L)
     assert(kafkaStream.maxMessagesPerPartition(input).get ==
@@ -528,7 +530,7 @@ class DirectKafkaStreamSuite
   test("maxMessagesPerPartition with no lag") {
     val topic = "maxMessagesPerPartition"
     val rateController = Some(new ConstantRateController(0, new ConstantEstimator(100), 100))
-    val kafkaStream = getDirectKafkaStream(topic, rateController)
+    val kafkaStream = getDirectKafkaStream(topic, rateController, None)
 
     val input = Map(new TopicPartition(topic, 0) -> 0L, new TopicPartition(topic, 1) -> 0L)
     assert(kafkaStream.maxMessagesPerPartition(input).isEmpty)
@@ -537,11 +539,19 @@ class DirectKafkaStreamSuite
   test("maxMessagesPerPartition respects max rate") {
     val topic = "maxMessagesPerPartition"
     val rateController = Some(new ConstantRateController(0, new ConstantEstimator(100), 1000))
-    val kafkaStream = getDirectKafkaStream(topic, rateController)
+    val ppc = Some(new PerPartitionConfig {
+      def maxRatePerPartition(tp: TopicPartition) =
+        if (tp.topic == topic && tp.partition == 0) {
+          50
+        } else {
+          100
+        }
+    })
+    val kafkaStream = getDirectKafkaStream(topic, rateController, ppc)
 
     val input = Map(new TopicPartition(topic, 0) -> 1000L, new TopicPartition(topic, 1) -> 1000L)
     assert(kafkaStream.maxMessagesPerPartition(input).get ==
-      Map(new TopicPartition(topic, 0) -> 10L, new TopicPartition(topic, 1) -> 10L))
+      Map(new TopicPartition(topic, 0) -> 5L, new TopicPartition(topic, 1) -> 10L))
   }
 
   test("using rate controller") {
@@ -570,7 +580,9 @@ class DirectKafkaStreamSuite
       new DirectKafkaInputDStream[String, String](
         ssc,
         preferredHosts,
-        ConsumerStrategies.Subscribe[String, String](List(topic), kafkaParams.asScala)) {
+        ConsumerStrategies.Subscribe[String, String](List(topic), kafkaParams.asScala),
+        new DefaultPerPartitionConfig(sparkConf)
+      ) {
         override protected[streaming] val rateController =
           Some(new DirectKafkaRateController(id, estimator))
       }.map(r => (r.key, r.value))
@@ -616,7 +628,10 @@ class DirectKafkaStreamSuite
     }.toSeq.sortBy { _._1 }
   }
 
-  private def getDirectKafkaStream(topic: String, mockRateController: Option[RateController]) = {
+  private def getDirectKafkaStream(
+      topic: String,
+      mockRateController: Option[RateController],
+      ppc: Option[PerPartitionConfig]) = {
     val batchIntervalMilliseconds = 100
 
     val sparkConf = new SparkConf()
@@ -643,7 +658,8 @@ class DirectKafkaStreamSuite
           tps.foreach(tp => consumer.seek(tp, 0))
           consumer
         }
-      }
+      },
+      ppc.getOrElse(new DefaultPerPartitionConfig(sparkConf))
     ) {
         override protected[streaming] val rateController = mockRateController
     }
diff --git a/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/DirectKafkaInputDStream.scala b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/DirectKafkaInputDStream.scala
index c3c799375bbeb..d52c230eb7849 100644
--- a/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/DirectKafkaInputDStream.scala
+++ b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/DirectKafkaInputDStream.scala
@@ -88,12 +88,12 @@ class DirectKafkaInputDStream[
 
   protected val kc = new KafkaCluster(kafkaParams)
 
-  private val maxRateLimitPerPartition: Int = context.sparkContext.getConf.getInt(
+  private val maxRateLimitPerPartition: Long = context.sparkContext.getConf.getLong(
       "spark.streaming.kafka.maxRatePerPartition", 0)
 
   protected[streaming] def maxMessagesPerPartition(
       offsets: Map[TopicAndPartition, Long]): Option[Map[TopicAndPartition, Long]] = {
-    val estimatedRateLimit = rateController.map(_.getLatestRate().toInt)
+    val estimatedRateLimit = rateController.map(_.getLatestRate())
 
     // calculate a per-partition rate limit based on current lag
     val effectiveRateLimitPerPartition = estimatedRateLimit.filter(_ > 0) match {

From cff7a70b59c3ac2cb1fab2216e9e6dcf2a6ac89a Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Mon, 14 Nov 2016 19:42:00 +0000
Subject: [PATCH 0107/1204] [SPARK-11496][GRAPHX][FOLLOWUP] Add param checking
 for runParallelPersonalizedPageRank

## What changes were proposed in this pull request?
add the param checking to keep in line with other algos

## How was this patch tested?
existing tests

Author: Zheng RuiFeng <ruifengz@foxmail.com>

Closes #15876 from zhengruifeng/param_check_runParallelPersonalizedPageRank.

(cherry picked from commit 75934457d75996be71ffd0d4b448497d656c0d40)
Signed-off-by: DB Tsai <dbtsai@dbtsai.com>
---
 .../main/scala/org/apache/spark/graphx/lib/PageRank.scala  | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
index f4b00757a8b54..c0c3c73463aab 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
@@ -185,6 +185,13 @@ object PageRank extends Logging {
   def runParallelPersonalizedPageRank[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED],
     numIter: Int, resetProb: Double = 0.15,
     sources: Array[VertexId]): Graph[Vector, Double] = {
+    require(numIter > 0, s"Number of iterations must be greater than 0," +
+      s" but got ${numIter}")
+    require(resetProb >= 0 && resetProb <= 1, s"Random reset probability must belong" +
+      s" to [0, 1], but got ${resetProb}")
+    require(sources.nonEmpty, s"The list of sources must be non-empty," +
+      s" but got ${sources.mkString("[", ",", "]")}")
+
     // TODO if one sources vertex id is outside of the int range
     // we won't be able to store its activations in a sparse vector
     val zero = Vectors.sparse(sources.size, List()).asBreeze

From ae66799feec895751f49418885da58f35fc2aaa6 Mon Sep 17 00:00:00 2001
From: Nattavut Sutyanyong <nsy.can@gmail.com>
Date: Mon, 14 Nov 2016 20:59:15 +0100
Subject: [PATCH 0108/1204] [SPARK-17348][SQL] Incorrect results from subquery
 transformation

## What changes were proposed in this pull request?

Return an Analysis exception when there is a correlated non-equality predicate in a subquery and the correlated column from the outer reference is not from the immediate parent operator of the subquery. This PR prevents incorrect results from subquery transformation in such case.

Test cases, both positive and negative tests, are added.

## How was this patch tested?

sql/test, catalyst/test, hive/test, and scenarios that will produce incorrect results without this PR and product correct results when subquery transformation does happen.

Author: Nattavut Sutyanyong <nsy.can@gmail.com>

Closes #15763 from nsyca/spark-17348.

(cherry picked from commit bd85603ba5f9e61e1aa8326d3e4d5703b5977a4c)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 44 +++++++++
 .../sql/catalyst/analysis/CheckAnalysis.scala |  7 --
 .../org/apache/spark/sql/SubquerySuite.scala  | 95 ++++++++++++++++++-
 3 files changed, 137 insertions(+), 9 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 8dbec408002f1..dcee2e4b1fe73 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -972,6 +972,37 @@ class Analyzer(
         }
       }
 
+      // SPARK-17348: A potential incorrect result case.
+      // When a correlated predicate is a non-equality predicate,
+      // certain operators are not permitted from the operator
+      // hosting the correlated predicate up to the operator on the outer table.
+      // Otherwise, the pull up of the correlated predicate
+      // will generate a plan with a different semantics
+      // which could return incorrect result.
+      // Currently we check for Aggregate and Window operators
+      //
+      // Below shows an example of a Logical Plan during Analyzer phase that
+      // show this problem. Pulling the correlated predicate [outer(c2#77) >= ..]
+      // through the Aggregate (or Window) operator could alter the result of
+      // the Aggregate.
+      //
+      // Project [c1#76]
+      // +- Project [c1#87, c2#88]
+      // :  (Aggregate or Window operator)
+      // :  +- Filter [outer(c2#77) >= c2#88)]
+      // :     +- SubqueryAlias t2, `t2`
+      // :        +- Project [_1#84 AS c1#87, _2#85 AS c2#88]
+      // :           +- LocalRelation [_1#84, _2#85]
+      // +- SubqueryAlias t1, `t1`
+      // +- Project [_1#73 AS c1#76, _2#74 AS c2#77]
+      // +- LocalRelation [_1#73, _2#74]
+      def failOnNonEqualCorrelatedPredicate(found: Boolean, p: LogicalPlan): Unit = {
+        if (found) {
+          // Report a non-supported case as an exception
+          failAnalysis(s"Correlated column is not allowed in a non-equality predicate:\n$p")
+        }
+      }
+
       /** Determine which correlated predicate references are missing from this plan. */
       def missingReferences(p: LogicalPlan): AttributeSet = {
         val localPredicateReferences = p.collect(predicateMap)
@@ -982,12 +1013,20 @@ class Analyzer(
         localPredicateReferences -- p.outputSet
       }
 
+      var foundNonEqualCorrelatedPred : Boolean = false
+
       // Simplify the predicates before pulling them out.
       val transformed = BooleanSimplification(sub) transformUp {
         case f @ Filter(cond, child) =>
           // Find all predicates with an outer reference.
           val (correlated, local) = splitConjunctivePredicates(cond).partition(containsOuter)
 
+          // Find any non-equality correlated predicates
+          foundNonEqualCorrelatedPred = foundNonEqualCorrelatedPred || correlated.exists {
+            case _: EqualTo | _: EqualNullSafe => false
+            case _ => true
+          }
+
           // Rewrite the filter without the correlated predicates if any.
           correlated match {
             case Nil => f
@@ -1009,12 +1048,17 @@ class Analyzer(
           }
         case a @ Aggregate(grouping, expressions, child) =>
           failOnOuterReference(a)
+          failOnNonEqualCorrelatedPredicate(foundNonEqualCorrelatedPred, a)
+
           val referencesToAdd = missingReferences(a)
           if (referencesToAdd.nonEmpty) {
             Aggregate(grouping ++ referencesToAdd, expressions ++ referencesToAdd, child)
           } else {
             a
           }
+        case w : Window =>
+          failOnNonEqualCorrelatedPredicate(foundNonEqualCorrelatedPred, w)
+          w
         case j @ Join(left, _, RightOuter, _) =>
           failOnOuterReference(j)
           failOnOuterReferenceInSubTree(left, "a RIGHT OUTER JOIN")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 3455a567b7786..7b75c1f70974b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -119,13 +119,6 @@ trait CheckAnalysis extends PredicateHelper {
             }
 
           case s @ ScalarSubquery(query, conditions, _) if conditions.nonEmpty =>
-            // Make sure we are using equi-joins.
-            conditions.foreach {
-              case _: EqualTo | _: EqualNullSafe => // ok
-              case e => failAnalysis(
-                s"The correlated scalar subquery can only contain equality predicates: $e")
-            }
-
             // Make sure correlated scalar subqueries contain one row for every outer row by
             // enforcing that they are aggregates which contain exactly one aggregate expressions.
             // The analyzer has already checked that subquery contained only one output column, and
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index 89348668340be..c84a6f161893c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -498,10 +498,10 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
 
   test("non-equal correlated scalar subquery") {
     val msg1 = intercept[AnalysisException] {
-      sql("select a, (select b from l l2 where l2.a < l1.a) sum_b from l l1")
+      sql("select a, (select sum(b) from l l2 where l2.a < l1.a) sum_b from l l1")
     }
     assert(msg1.getMessage.contains(
-      "The correlated scalar subquery can only contain equality predicates"))
+      "Correlated column is not allowed in a non-equality predicate:"))
   }
 
   test("disjunctive correlated scalar subquery") {
@@ -639,6 +639,97 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
             |         from   t1 left join t2 on t1.c1=t2.c2) t3
             | where  c3 not in (select c2 from t2)""".stripMargin),
         Row(2) :: Nil)
+     }
+   }
+
+   test("SPARK-17348: Correlated subqueries with non-equality predicate (good case)") {
+     withTempView("t1", "t2") {
+       Seq((1, 1)).toDF("c1", "c2").createOrReplaceTempView("t1")
+       Seq((1, 1), (2, 0)).toDF("c1", "c2").createOrReplaceTempView("t2")
+
+       // Simple case
+       checkAnswer(
+         sql(
+           """
+             | select c1
+             | from   t1
+             | where  c1 in (select t2.c1
+             |               from   t2
+             |               where  t1.c2 >= t2.c2)""".stripMargin),
+         Row(1) :: Nil)
+
+       // More complex case with OR predicate
+       checkAnswer(
+         sql(
+           """
+             | select t1.c1
+             | from   t1, t1 as t3
+             | where  t1.c1 = t3.c1
+             | and    (t1.c1 in (select t2.c1
+             |                   from   t2
+             |                   where  t1.c2 >= t2.c2
+             |                          or t3.c2 < t2.c2)
+             |         or t1.c2 >= 0)""".stripMargin),
+         Row(1) :: Nil)
+    }
+  }
+
+  test("SPARK-17348: Correlated subqueries with non-equality predicate (error case)") {
+    withTempView("t1", "t2", "t3", "t4") {
+      Seq((1, 1)).toDF("c1", "c2").createOrReplaceTempView("t1")
+      Seq((1, 1), (2, 0)).toDF("c1", "c2").createOrReplaceTempView("t2")
+      Seq((2, 1)).toDF("c1", "c2").createOrReplaceTempView("t3")
+      Seq((1, 1), (2, 2)).toDF("c1", "c2").createOrReplaceTempView("t4")
+
+      // Simplest case
+      intercept[AnalysisException] {
+        sql(
+          """
+            | select t1.c1
+            | from   t1
+            | where  t1.c1 in (select max(t2.c1)
+            |                  from   t2
+            |                  where  t1.c2 >= t2.c2)""".stripMargin).collect()
+      }
+
+      // Add a HAVING on top and augmented within an OR predicate
+      intercept[AnalysisException] {
+        sql(
+          """
+            | select t1.c1
+            | from   t1
+            | where  t1.c1 in (select max(t2.c1)
+            |                  from   t2
+            |                  where  t1.c2 >= t2.c2
+            |                  having count(*) > 0 )
+            |         or t1.c2 >= 0""".stripMargin).collect()
+      }
+
+      // Add a HAVING on top and augmented within an OR predicate
+      intercept[AnalysisException] {
+        sql(
+          """
+            | select t1.c1
+            | from   t1, t1 as t3
+            | where  t1.c1 = t3.c1
+            | and    (t1.c1 in (select max(t2.c1)
+            |                   from   t2
+            |                   where  t1.c2 = t2.c2
+            |                          or t3.c2 = t2.c2)
+            |        )""".stripMargin).collect()
+      }
+
+      // In Window expression: changing the data set to
+      // demonstrate if this query ran, it would return incorrect result.
+      intercept[AnalysisException] {
+        sql(
+          """
+          | select c1
+          | from   t3
+          | where  c1 in (select max(t4.c1) over ()
+          |               from   t4
+          |               where t3.c2 >= t4.c2)""".stripMargin).collect()
+      }
     }
   }
 }

From 27999b3661481c0232135dbe021787afe963d812 Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Mon, 14 Nov 2016 16:46:26 -0800
Subject: [PATCH 0109/1204] [SPARK-18124] Observed delay based Event Time
 Watermarks

This PR adds a new method `withWatermark` to the `Dataset` API, which can be used specify an _event time watermark_.  An event time watermark allows the streaming engine to reason about the point in time after which we no longer expect to see late data.  This PR also has augmented `StreamExecution` to use this watermark for several purposes:
  - To know when a given time window aggregation is finalized and thus results can be emitted when using output modes that do not allow updates (e.g. `Append` mode).
  - To minimize the amount of state that we need to keep for on-going aggregations, by evicting state for groups that are no longer expected to change.  Although, we do still maintain all state if the query requires (i.e. if the event time is not present in the `groupBy` or when running in `Complete` mode).

An example that emits windowed counts of records, waiting up to 5 minutes for late data to arrive.
```scala
df.withWatermark("eventTime", "5 minutes")
  .groupBy(window($"eventTime", "1 minute") as 'window)
  .count()
  .writeStream
  .format("console")
  .mode("append") // In append mode, we only output finalized aggregations.
  .start()
```

### Calculating the watermark.
The current event time is computed by looking at the `MAX(eventTime)` seen this epoch across all of the partitions in the query minus some user defined _delayThreshold_.  An additional constraint is that the watermark must increase monotonically.

Note that since we must coordinate this value across partitions occasionally, the actual watermark used is only guaranteed to be at least `delay` behind the actual event time.  In some cases we may still process records that arrive more than delay late.

This mechanism was chosen for the initial implementation over processing time for two reasons:
  - it is robust to downtime that could affect processing delay
  - it does not require syncing of time or timezones between the producer and the processing engine.

### Other notable implementation details
 - A new trigger metric `eventTimeWatermark` outputs the current value of the watermark.
 - We mark the event time column in the `Attribute` metadata using the key `spark.watermarkDelay`.  This allows downstream operations to know which column holds the event time.  Operations like `window` propagate this metadata.
 - `explain()` marks the watermark with a suffix of `-T${delayMs}` to ease debugging of how this information is propagated.
 - Currently, we don't filter out late records, but instead rely on the state store to avoid emitting records that are both added and filtered in the same epoch.

### Remaining in this PR
 - [ ] The test for recovery is currently failing as we don't record the watermark used in the offset log.  We will need to do so to ensure determinism, but this is deferred until #15626 is merged.

### Other follow-ups
There are some natural additional features that we should consider for future work:
 - Ability to write records that arrive too late to some external store in case any out-of-band remediation is required.
 - `Update` mode so you can get partial results before a group is evicted.
 - Other mechanisms for calculating the watermark.  In particular a watermark based on quantiles would be more robust to outliers.

Author: Michael Armbrust <michael@databricks.com>

Closes #15702 from marmbrus/watermarks.

(cherry picked from commit c07187823a98f0d1a0f58c06e28a27e1abed157a)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../spark/unsafe/types/CalendarInterval.java  |   4 +
 .../apache/spark/sql/AnalysisException.scala  |   3 +-
 .../sql/catalyst/analysis/Analyzer.scala      |   8 +-
 .../sql/catalyst/analysis/CheckAnalysis.scala |  10 +
 .../UnsupportedOperationChecker.scala         |  18 +-
 .../sql/catalyst/analysis/unresolved.scala    |   3 +-
 .../expressions/namedExpressions.scala        |  17 +-
 .../plans/logical/EventTimeWatermark.scala    |  51 +++++
 .../scala/org/apache/spark/sql/Dataset.scala  |  40 +++-
 .../spark/sql/execution/SparkStrategies.scala |  12 +-
 .../sql/execution/aggregate/AggUtils.scala    |   9 +-
 .../sql/execution/command/commands.scala      |   2 +-
 .../streaming/EventTimeWatermarkExec.scala    |  93 +++++++++
 .../sql/execution/streaming/ForeachSink.scala |   3 +-
 .../streaming/IncrementalExecution.scala      |  12 +-
 .../streaming/StatefulAggregate.scala         | 170 +++++++++-------
 .../execution/streaming/StreamExecution.scala |  25 ++-
 .../execution/streaming/StreamMetrics.scala   |   1 +
 .../state/HDFSBackedStateStoreProvider.scala  |  23 ++-
 .../streaming/state/StateStore.scala          |   7 +-
 .../streaming/state/StateStoreSuite.scala     |   6 +-
 .../spark/sql/streaming/WatermarkSuite.scala  | 191 ++++++++++++++++++
 22 files changed, 597 insertions(+), 111 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/streaming/WatermarkSuite.scala

diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
index 518ed6470a753..a7b0e6f80c2b6 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
@@ -252,6 +252,10 @@ public static long parseSecondNano(String secondNano) throws IllegalArgumentExce
   public final int months;
   public final long microseconds;
 
+  public final long milliseconds() {
+    return this.microseconds / MICROS_PER_MILLI;
+  }
+
   public CalendarInterval(int months, long microseconds) {
     this.months = months;
     this.microseconds = microseconds;
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
index 7defb9df862c0..ff8576157305b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
@@ -31,7 +31,8 @@ class AnalysisException protected[sql] (
     val message: String,
     val line: Option[Int] = None,
     val startPosition: Option[Int] = None,
-    val plan: Option[LogicalPlan] = None,
+    // Some plans fail to serialize due to bugs in scala collections.
+    @transient val plan: Option[LogicalPlan] = None,
     val cause: Option[Throwable] = None)
   extends Exception(message, cause.orNull) with Serializable {
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index dcee2e4b1fe73..b7e167557c559 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -2213,7 +2213,13 @@ object TimeWindowing extends Rule[LogicalPlan] {
           windowExpressions.head.timeColumn.resolved &&
           windowExpressions.head.checkInputDataTypes().isSuccess) {
         val window = windowExpressions.head
-        val windowAttr = AttributeReference("window", window.dataType)()
+
+        val metadata = window.timeColumn match {
+          case a: Attribute => a.metadata
+          case _ => Metadata.empty
+        }
+        val windowAttr =
+          AttributeReference("window", window.dataType, metadata = metadata)()
 
         val maxNumOverlapping = math.ceil(window.windowDuration * 1.0 / window.slideDuration).toInt
         val windows = Seq.tabulate(maxNumOverlapping + 1) { i =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 7b75c1f70974b..98e50d0d3c674 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -148,6 +148,16 @@ trait CheckAnalysis extends PredicateHelper {
         }
 
         operator match {
+          case etw: EventTimeWatermark =>
+            etw.eventTime.dataType match {
+              case s: StructType
+                if s.find(_.name == "end").map(_.dataType) == Some(TimestampType) =>
+              case _: TimestampType =>
+              case _ =>
+                failAnalysis(
+                  s"Event time must be defined on a window or a timestamp, but " +
+                  s"${etw.eventTime.name} is of type ${etw.eventTime.dataType.simpleString}")
+            }
           case f: Filter if f.condition.dataType != BooleanType =>
             failAnalysis(
               s"filter expression '${f.condition.sql}' " +
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
index e81370c504abb..c054fcbef36f3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.{AnalysisException, InternalOutputModes}
+import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.streaming.OutputMode
@@ -55,9 +56,20 @@ object UnsupportedOperationChecker {
     // Disallow some output mode
     outputMode match {
       case InternalOutputModes.Append if aggregates.nonEmpty =>
-        throwError(
-          s"$outputMode output mode not supported when there are streaming aggregations on " +
-            s"streaming DataFrames/DataSets")(plan)
+        val aggregate = aggregates.head
+
+        // Find any attributes that are associated with an eventTime watermark.
+        val watermarkAttributes = aggregate.groupingExpressions.collect {
+          case a: Attribute if a.metadata.contains(EventTimeWatermark.delayKey) => a
+        }
+
+        // We can append rows to the sink once the group is under the watermark. Without this
+        // watermark a group is never "finished" so we would never output anything.
+        if (watermarkAttributes.isEmpty) {
+          throwError(
+            s"$outputMode output mode not supported when there are streaming aggregations on " +
+                s"streaming DataFrames/DataSets")(plan)
+        }
 
       case InternalOutputModes.Complete | InternalOutputModes.Update if aggregates.isEmpty =>
         throwError(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 235ae04782455..36ed9ba50372b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, Codege
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan}
 import org.apache.spark.sql.catalyst.trees.TreeNode
 import org.apache.spark.sql.catalyst.util.quoteIdentifier
-import org.apache.spark.sql.types.{DataType, StructType}
+import org.apache.spark.sql.types.{DataType, Metadata, StructType}
 
 /**
  * Thrown when an invalid attempt is made to access a property of a tree that has yet to be fully
@@ -98,6 +98,7 @@ case class UnresolvedAttribute(nameParts: Seq[String]) extends Attribute with Un
   override def withNullability(newNullability: Boolean): UnresolvedAttribute = this
   override def withQualifier(newQualifier: Option[String]): UnresolvedAttribute = this
   override def withName(newName: String): UnresolvedAttribute = UnresolvedAttribute.quoted(newName)
+  override def withMetadata(newMetadata: Metadata): Attribute = this
 
   override def toString: String = s"'$name"
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
index 306a99d5a37bf..1274757136051 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
@@ -22,6 +22,7 @@ import java.util.{Objects, UUID}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.expressions.codegen._
+import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark
 import org.apache.spark.sql.catalyst.util.quoteIdentifier
 import org.apache.spark.sql.types._
 
@@ -104,6 +105,7 @@ abstract class Attribute extends LeafExpression with NamedExpression with NullIn
   def withNullability(newNullability: Boolean): Attribute
   def withQualifier(newQualifier: Option[String]): Attribute
   def withName(newName: String): Attribute
+  def withMetadata(newMetadata: Metadata): Attribute
 
   override def toAttribute: Attribute = this
   def newInstance(): Attribute
@@ -292,11 +294,22 @@ case class AttributeReference(
     }
   }
 
+  override def withMetadata(newMetadata: Metadata): Attribute = {
+    AttributeReference(name, dataType, nullable, newMetadata)(exprId, qualifier, isGenerated)
+  }
+
   override protected final def otherCopyArgs: Seq[AnyRef] = {
     exprId :: qualifier :: isGenerated :: Nil
   }
 
-  override def toString: String = s"$name#${exprId.id}$typeSuffix"
+  /** Used to signal the column used to calculate an eventTime watermark (e.g. a#1-T{delayMs}) */
+  private def delaySuffix = if (metadata.contains(EventTimeWatermark.delayKey)) {
+    s"-T${metadata.getLong(EventTimeWatermark.delayKey)}ms"
+  } else {
+    ""
+  }
+
+  override def toString: String = s"$name#${exprId.id}$typeSuffix$delaySuffix"
 
   // Since the expression id is not in the first constructor it is missing from the default
   // tree string.
@@ -332,6 +345,8 @@ case class PrettyAttribute(
   override def withQualifier(newQualifier: Option[String]): Attribute =
     throw new UnsupportedOperationException
   override def withName(newName: String): Attribute = throw new UnsupportedOperationException
+  override def withMetadata(newMetadata: Metadata): Attribute =
+    throw new UnsupportedOperationException
   override def qualifier: Option[String] = throw new UnsupportedOperationException
   override def exprId: ExprId = throw new UnsupportedOperationException
   override def nullable: Boolean = true
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala
new file mode 100644
index 0000000000000..4224a7997c410
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
+import org.apache.spark.sql.types.MetadataBuilder
+import org.apache.spark.unsafe.types.CalendarInterval
+
+object EventTimeWatermark {
+  /** The [[org.apache.spark.sql.types.Metadata]] key used to hold the eventTime watermark delay. */
+  val delayKey = "spark.watermarkDelayMs"
+}
+
+/**
+ * Used to mark a user specified column as holding the event time for a row.
+ */
+case class EventTimeWatermark(
+    eventTime: Attribute,
+    delay: CalendarInterval,
+    child: LogicalPlan) extends LogicalPlan {
+
+  // Update the metadata on the eventTime column to include the desired delay.
+  override val output: Seq[Attribute] = child.output.map { a =>
+    if (a semanticEquals eventTime) {
+      val updatedMetadata = new MetadataBuilder()
+        .withMetadata(a.metadata)
+        .putLong(EventTimeWatermark.delayKey, delay.milliseconds)
+        .build()
+      a.withMetadata(updatedMetadata)
+    } else {
+      a
+    }
+  }
+
+  override val children: Seq[LogicalPlan] = child :: Nil
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index eb2b20afc37cf..af30683cc01c4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -50,6 +50,7 @@ import org.apache.spark.sql.execution.python.EvaluatePython
 import org.apache.spark.sql.streaming.DataStreamWriter
 import org.apache.spark.sql.types._
 import org.apache.spark.storage.StorageLevel
+import org.apache.spark.unsafe.types.CalendarInterval
 import org.apache.spark.util.Utils
 
 private[sql] object Dataset {
@@ -476,7 +477,7 @@ class Dataset[T] private[sql](
    * `collect()`, will throw an [[AnalysisException]] when there is a streaming
    * source present.
    *
-   * @group basic
+   * @group streaming
    * @since 2.0.0
    */
   @Experimental
@@ -496,8 +497,6 @@ class Dataset[T] private[sql](
   /**
    * Returns a checkpointed version of this Dataset.
    *
-   * @param eager When true, materializes the underlying checkpointed RDD eagerly.
-   *
    * @group basic
    * @since 2.1.0
    */
@@ -535,6 +534,41 @@ class Dataset[T] private[sql](
       )(sparkSession)).as[T]
   }
 
+  /**
+   * :: Experimental ::
+   * Defines an event time watermark for this [[Dataset]]. A watermark tracks a point in time
+   * before which we assume no more late data is going to arrive.
+   *
+   * Spark will use this watermark for several purposes:
+   *  - To know when a given time window aggregation can be finalized and thus can be emitted when
+   *    using output modes that do not allow updates.
+   *  - To minimize the amount of state that we need to keep for on-going aggregations.
+   *
+   *  The current watermark is computed by looking at the `MAX(eventTime)` seen across
+   *  all of the partitions in the query minus a user specified `delayThreshold`.  Due to the cost
+   *  of coordinating this value across partitions, the actual watermark used is only guaranteed
+   *  to be at least `delayThreshold` behind the actual event time.  In some cases we may still
+   *  process records that arrive more than `delayThreshold` late.
+   *
+   * @param eventTime the name of the column that contains the event time of the row.
+   * @param delayThreshold the minimum delay to wait to data to arrive late, relative to the latest
+   *                       record that has been processed in the form of an interval
+   *                       (e.g. "1 minute" or "5 hours").
+   *
+   * @group streaming
+   * @since 2.1.0
+   */
+  @Experimental
+  @InterfaceStability.Evolving
+  // We only accept an existing column name, not a derived column here as a watermark that is
+  // defined on a derived column cannot referenced elsewhere in the plan.
+  def withWatermark(eventTime: String, delayThreshold: String): Dataset[T] = withTypedPlan {
+    val parsedDelay =
+      Option(CalendarInterval.fromString("interval " + delayThreshold))
+        .getOrElse(throw new AnalysisException(s"Unable to parse time delay '$delayThreshold'"))
+    EventTimeWatermark(UnresolvedAttribute(eventTime), parsedDelay, logicalPlan)
+  }
+
   /**
    * Displays the Dataset in a tabular form. Strings more than 20 characters will be truncated,
    * and all cells will be aligned right. For example:
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index 190fdd84343ee..2308ae8a6c611 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -18,20 +18,23 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{execution, SaveMode, Strategy}
+import org.apache.spark.sql.{SaveMode, Strategy}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning._
 import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.logical.{BroadcastHint, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.{BroadcastHint, EventTimeWatermark, LogicalPlan}
 import org.apache.spark.sql.catalyst.plans.physical._
+import org.apache.spark.sql.execution
 import org.apache.spark.sql.execution.columnar.{InMemoryRelation, InMemoryTableScanExec}
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.exchange.ShuffleExchange
 import org.apache.spark.sql.execution.joins.{BuildLeft, BuildRight}
-import org.apache.spark.sql.execution.streaming.{MemoryPlan, StreamingExecutionRelation, StreamingRelation, StreamingRelationExec}
+import org.apache.spark.sql.execution.streaming._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.streaming.StreamingQuery
 
 /**
  * Converts a logical plan into zero or more SparkPlans.  This API is exposed for experimenting
@@ -224,6 +227,9 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
    */
   object StatefulAggregationStrategy extends Strategy {
     override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
+      case EventTimeWatermark(columnName, delay, child) =>
+        EventTimeWatermarkExec(columnName, delay, planLater(child)) :: Nil
+
       case PhysicalAggregation(
         namedGroupingExpressions, aggregateExpressions, rewrittenResultExpressions, child) =>
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala
index 4fbb9d554c9bf..f7ea8970edf90 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala
@@ -313,8 +313,13 @@ object AggUtils {
     }
     // Note: stateId and returnAllStates are filled in later with preparation rules
     // in IncrementalExecution.
-    val saved = StateStoreSaveExec(
-      groupingAttributes, stateId = None, returnAllStates = None, partialMerged2)
+    val saved =
+      StateStoreSaveExec(
+        groupingAttributes,
+        stateId = None,
+        outputMode = None,
+        eventTimeWatermark = None,
+        partialMerged2)
 
     val finalAndCompleteAggregate: SparkPlan = {
       val finalAggregateExpressions = functionsWithoutDistinct.map(_.copy(mode = Final))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
index d82e54e57564c..52d8dc22a2d4d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
@@ -104,7 +104,7 @@ case class ExplainCommand(
       if (logicalPlan.isStreaming) {
         // This is used only by explaining `Dataset/DataFrame` created by `spark.readStream`, so the
         // output mode does not matter since there is no `Sink`.
-        new IncrementalExecution(sparkSession, logicalPlan, OutputMode.Append(), "<unknown>", 0)
+        new IncrementalExecution(sparkSession, logicalPlan, OutputMode.Append(), "<unknown>", 0, 0)
       } else {
         sparkSession.sessionState.executePlan(logicalPlan)
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
new file mode 100644
index 0000000000000..4c8cb069d23a0
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming
+
+import scala.math.max
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Attribute, UnsafeProjection}
+import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark
+import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.types.MetadataBuilder
+import org.apache.spark.unsafe.types.CalendarInterval
+import org.apache.spark.util.AccumulatorV2
+
+/** Tracks the maximum positive long seen. */
+class MaxLong(protected var currentValue: Long = 0)
+  extends AccumulatorV2[Long, Long] {
+
+  override def isZero: Boolean = value == 0
+  override def value: Long = currentValue
+  override def copy(): AccumulatorV2[Long, Long] = new MaxLong(currentValue)
+
+  override def reset(): Unit = {
+    currentValue = 0
+  }
+
+  override def add(v: Long): Unit = {
+    currentValue = max(v, value)
+  }
+
+  override def merge(other: AccumulatorV2[Long, Long]): Unit = {
+    currentValue = max(value, other.value)
+  }
+}
+
+/**
+ * Used to mark a column as the containing the event time for a given record. In addition to
+ * adding appropriate metadata to this column, this operator also tracks the maximum observed event
+ * time. Based on the maximum observed time and a user specified delay, we can calculate the
+ * `watermark` after which we assume we will no longer see late records for a particular time
+ * period.
+ */
+case class EventTimeWatermarkExec(
+    eventTime: Attribute,
+    delay: CalendarInterval,
+    child: SparkPlan) extends SparkPlan {
+
+  // TODO: Use Spark SQL Metrics?
+  val maxEventTime = new MaxLong
+  sparkContext.register(maxEventTime)
+
+  override protected def doExecute(): RDD[InternalRow] = {
+    child.execute().mapPartitions { iter =>
+      val getEventTime = UnsafeProjection.create(eventTime :: Nil, child.output)
+      iter.map { row =>
+        maxEventTime.add(getEventTime(row).getLong(0))
+        row
+      }
+    }
+  }
+
+  // Update the metadata on the eventTime column to include the desired delay.
+  override val output: Seq[Attribute] = child.output.map { a =>
+    if (a semanticEquals eventTime) {
+      val updatedMetadata = new MetadataBuilder()
+          .withMetadata(a.metadata)
+          .putLong(EventTimeWatermark.delayKey, delay.milliseconds)
+          .build()
+
+      a.withMetadata(updatedMetadata)
+    } else {
+      a
+    }
+  }
+
+  override def children: Seq[SparkPlan] = child :: Nil
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ForeachSink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ForeachSink.scala
index 24f98b9211f12..f5c550dd6ac3a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ForeachSink.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ForeachSink.scala
@@ -60,7 +60,8 @@ class ForeachSink[T : Encoder](writer: ForeachWriter[T]) extends Sink with Seria
             deserialized,
             data.queryExecution.asInstanceOf[IncrementalExecution].outputMode,
             data.queryExecution.asInstanceOf[IncrementalExecution].checkpointLocation,
-            data.queryExecution.asInstanceOf[IncrementalExecution].currentBatchId)
+            data.queryExecution.asInstanceOf[IncrementalExecution].currentBatchId,
+            data.queryExecution.asInstanceOf[IncrementalExecution].currentEventTimeWatermark)
           incrementalExecution.toRdd.mapPartitions { rows =>
             rows.map(_.get(0, objectType))
           }.asInstanceOf[RDD[T]]
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
index 05294df2673dc..e9d072f8a98b0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
@@ -32,11 +32,13 @@ class IncrementalExecution(
     logicalPlan: LogicalPlan,
     val outputMode: OutputMode,
     val checkpointLocation: String,
-    val currentBatchId: Long)
+    val currentBatchId: Long,
+    val currentEventTimeWatermark: Long)
   extends QueryExecution(sparkSession, logicalPlan) {
 
   // TODO: make this always part of planning.
-  val stateStrategy = sparkSession.sessionState.planner.StatefulAggregationStrategy +:
+  val stateStrategy =
+    sparkSession.sessionState.planner.StatefulAggregationStrategy +:
     sparkSession.sessionState.planner.StreamingRelationStrategy +:
     sparkSession.sessionState.experimentalMethods.extraStrategies
 
@@ -57,17 +59,17 @@ class IncrementalExecution(
   val state = new Rule[SparkPlan] {
 
     override def apply(plan: SparkPlan): SparkPlan = plan transform {
-      case StateStoreSaveExec(keys, None, None,
+      case StateStoreSaveExec(keys, None, None, None,
              UnaryExecNode(agg,
                StateStoreRestoreExec(keys2, None, child))) =>
         val stateId = OperatorStateId(checkpointLocation, operatorId, currentBatchId)
-        val returnAllStates = if (outputMode == InternalOutputModes.Complete) true else false
         operatorId += 1
 
         StateStoreSaveExec(
           keys,
           Some(stateId),
-          Some(returnAllStates),
+          Some(outputMode),
+          Some(currentEventTimeWatermark),
           agg.withNewChildren(
             StateStoreRestoreExec(
               keys,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulAggregate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulAggregate.scala
index ad8238f189c64..7af978a9c4aa2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulAggregate.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulAggregate.scala
@@ -21,12 +21,17 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.errors._
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
+import org.apache.spark.sql.catalyst.expressions.codegen.{GeneratePredicate, GenerateUnsafeProjection}
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.execution
+import org.apache.spark.sql.InternalOutputModes._
+import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark
 import org.apache.spark.sql.execution.metric.SQLMetrics
 import org.apache.spark.sql.execution.streaming.state._
 import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.streaming.OutputMode
+import org.apache.spark.sql.types.StructType
+
 
 /** Used to identify the state store for a given operator. */
 case class OperatorStateId(
@@ -92,8 +97,9 @@ case class StateStoreRestoreExec(
  */
 case class StateStoreSaveExec(
     keyExpressions: Seq[Attribute],
-    stateId: Option[OperatorStateId],
-    returnAllStates: Option[Boolean],
+    stateId: Option[OperatorStateId] = None,
+    outputMode: Option[OutputMode] = None,
+    eventTimeWatermark: Option[Long] = None,
     child: SparkPlan)
   extends execution.UnaryExecNode with StatefulOperator {
 
@@ -104,9 +110,9 @@ case class StateStoreSaveExec(
 
   override protected def doExecute(): RDD[InternalRow] = {
     metrics // force lazy init at driver
-    assert(returnAllStates.nonEmpty,
-      "Incorrect planning in IncrementalExecution, returnAllStates have not been set")
-    val saveAndReturnFunc = if (returnAllStates.get) saveAndReturnAll _ else saveAndReturnUpdated _
+    assert(outputMode.nonEmpty,
+      "Incorrect planning in IncrementalExecution, outputMode has not been set")
+
     child.execute().mapPartitionsWithStateStore(
       getStateId.checkpointLocation,
       operatorId = getStateId.operatorId,
@@ -114,75 +120,95 @@ case class StateStoreSaveExec(
       keyExpressions.toStructType,
       child.output.toStructType,
       sqlContext.sessionState,
-      Some(sqlContext.streams.stateStoreCoordinator)
-    )(saveAndReturnFunc)
+      Some(sqlContext.streams.stateStoreCoordinator)) { (store, iter) =>
+        val getKey = GenerateUnsafeProjection.generate(keyExpressions, child.output)
+        val numOutputRows = longMetric("numOutputRows")
+        val numTotalStateRows = longMetric("numTotalStateRows")
+        val numUpdatedStateRows = longMetric("numUpdatedStateRows")
+
+        outputMode match {
+          // Update and output all rows in the StateStore.
+          case Some(Complete) =>
+            while (iter.hasNext) {
+              val row = iter.next().asInstanceOf[UnsafeRow]
+              val key = getKey(row)
+              store.put(key.copy(), row.copy())
+              numUpdatedStateRows += 1
+            }
+            store.commit()
+            numTotalStateRows += store.numKeys()
+            store.iterator().map { case (k, v) =>
+              numOutputRows += 1
+              v.asInstanceOf[InternalRow]
+            }
+
+          // Update and output only rows being evicted from the StateStore
+          case Some(Append) =>
+            while (iter.hasNext) {
+              val row = iter.next().asInstanceOf[UnsafeRow]
+              val key = getKey(row)
+              store.put(key.copy(), row.copy())
+              numUpdatedStateRows += 1
+            }
+
+            val watermarkAttribute =
+              keyExpressions.find(_.metadata.contains(EventTimeWatermark.delayKey)).get
+            // If we are evicting based on a window, use the end of the window.  Otherwise just
+            // use the attribute itself.
+            val evictionExpression =
+              if (watermarkAttribute.dataType.isInstanceOf[StructType]) {
+                LessThanOrEqual(
+                  GetStructField(watermarkAttribute, 1),
+                  Literal(eventTimeWatermark.get * 1000))
+              } else {
+                LessThanOrEqual(
+                  watermarkAttribute,
+                  Literal(eventTimeWatermark.get * 1000))
+              }
+
+            logInfo(s"Filtering state store on: $evictionExpression")
+            val predicate = newPredicate(evictionExpression, keyExpressions)
+            store.remove(predicate.eval)
+
+            store.commit()
+
+            numTotalStateRows += store.numKeys()
+            store.updates().filter(_.isInstanceOf[ValueRemoved]).map { removed =>
+              numOutputRows += 1
+              removed.value.asInstanceOf[InternalRow]
+            }
+
+          // Update and output modified rows from the StateStore.
+          case Some(Update) =>
+            new Iterator[InternalRow] {
+              private[this] val baseIterator = iter
+
+              override def hasNext: Boolean = {
+                if (!baseIterator.hasNext) {
+                  store.commit()
+                  numTotalStateRows += store.numKeys()
+                  false
+                } else {
+                  true
+                }
+              }
+
+              override def next(): InternalRow = {
+                val row = baseIterator.next().asInstanceOf[UnsafeRow]
+                val key = getKey(row)
+                store.put(key.copy(), row.copy())
+                numOutputRows += 1
+                numUpdatedStateRows += 1
+                row
+              }
+            }
+
+          case _ => throw new UnsupportedOperationException(s"Invalid output mode: $outputMode")
+        }
+    }
   }
 
   override def output: Seq[Attribute] = child.output
 
   override def outputPartitioning: Partitioning = child.outputPartitioning
-
-  /**
-   * Save all the rows to the state store, and return all the rows in the state store.
-   * Note that this returns an iterator that pipelines the saving to store with downstream
-   * processing.
-   */
-  private def saveAndReturnUpdated(
-      store: StateStore,
-      iter: Iterator[InternalRow]): Iterator[InternalRow] = {
-    val numOutputRows = longMetric("numOutputRows")
-    val numTotalStateRows = longMetric("numTotalStateRows")
-    val numUpdatedStateRows = longMetric("numUpdatedStateRows")
-
-    new Iterator[InternalRow] {
-      private[this] val baseIterator = iter
-      private[this] val getKey = GenerateUnsafeProjection.generate(keyExpressions, child.output)
-
-      override def hasNext: Boolean = {
-        if (!baseIterator.hasNext) {
-          store.commit()
-          numTotalStateRows += store.numKeys()
-          false
-        } else {
-          true
-        }
-      }
-
-      override def next(): InternalRow = {
-        val row = baseIterator.next().asInstanceOf[UnsafeRow]
-        val key = getKey(row)
-        store.put(key.copy(), row.copy())
-        numOutputRows += 1
-        numUpdatedStateRows += 1
-        row
-      }
-    }
-  }
-
-  /**
-   * Save all the rows to the state store, and return all the rows in the state store.
-   * Note that the saving to store is blocking; only after all the rows have been saved
-   * is the iterator on the update store data is generated.
-   */
-  private def saveAndReturnAll(
-      store: StateStore,
-      iter: Iterator[InternalRow]): Iterator[InternalRow] = {
-    val getKey = GenerateUnsafeProjection.generate(keyExpressions, child.output)
-    val numOutputRows = longMetric("numOutputRows")
-    val numTotalStateRows = longMetric("numTotalStateRows")
-    val numUpdatedStateRows = longMetric("numUpdatedStateRows")
-
-    while (iter.hasNext) {
-      val row = iter.next().asInstanceOf[UnsafeRow]
-      val key = getKey(row)
-      store.put(key.copy(), row.copy())
-      numUpdatedStateRows += 1
-    }
-    store.commit()
-    numTotalStateRows += store.numKeys()
-    store.iterator().map { case (k, v) =>
-      numOutputRows += 1
-      v.asInstanceOf[InternalRow]
-    }
-  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 57e89f85361e4..3ca6feac05cef 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -92,6 +92,9 @@ class StreamExecution(
   /** The current batchId or -1 if execution has not yet been initialized. */
   private var currentBatchId: Long = -1
 
+  /** The current eventTime watermark, used to bound the lateness of data that will processed. */
+  private var currentEventTimeWatermark: Long = 0
+
   /** All stream sources present in the query plan. */
   private val sources =
     logicalPlan.collect { case s: StreamingExecutionRelation => s.source }
@@ -427,7 +430,8 @@ class StreamExecution(
         triggerLogicalPlan,
         outputMode,
         checkpointFile("state"),
-        currentBatchId)
+        currentBatchId,
+        currentEventTimeWatermark)
       lastExecution.executedPlan // Force the lazy generation of execution plan
     }
 
@@ -436,6 +440,25 @@ class StreamExecution(
     sink.addBatch(currentBatchId, nextBatch)
     reportNumRows(executedPlan, triggerLogicalPlan, newData)
 
+    // Update the eventTime watermark if we find one in the plan.
+    // TODO: Does this need to be an AttributeMap?
+    lastExecution.executedPlan.collect {
+      case e: EventTimeWatermarkExec =>
+        logTrace(s"Maximum observed eventTime: ${e.maxEventTime.value}")
+        (e.maxEventTime.value / 1000) - e.delay.milliseconds()
+    }.headOption.foreach { newWatermark =>
+      if (newWatermark > currentEventTimeWatermark) {
+        logInfo(s"Updating eventTime watermark to: $newWatermark ms")
+        currentEventTimeWatermark = newWatermark
+      } else {
+        logTrace(s"Event time didn't move: $newWatermark < $currentEventTimeWatermark")
+      }
+
+      if (newWatermark != 0) {
+        streamMetrics.reportTriggerDetail(EVENT_TIME_WATERMARK, newWatermark)
+      }
+    }
+
     awaitBatchLock.lock()
     try {
       // Wake up any threads that are waiting for the stream to progress.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetrics.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetrics.scala
index e98d1883e4596..5645554a58f6e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetrics.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetrics.scala
@@ -221,6 +221,7 @@ object StreamMetrics extends Logging {
   val IS_TRIGGER_ACTIVE = "isTriggerActive"
   val IS_DATA_PRESENT_IN_TRIGGER = "isDataPresentInTrigger"
   val STATUS_MESSAGE = "statusMessage"
+  val EVENT_TIME_WATERMARK = "eventTimeWatermark"
 
   val START_TIMESTAMP = "timestamp.triggerStart"
   val GET_OFFSET_TIMESTAMP = "timestamp.afterGetOffset"
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
index f07feaad5dc71..493fdaaec5069 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
@@ -109,7 +109,7 @@ private[state] class HDFSBackedStateStoreProvider(
         case Some(ValueAdded(_, _)) =>
           // Value did not exist in previous version and was added already, keep it marked as added
           allUpdates.put(key, ValueAdded(key, value))
-        case Some(ValueUpdated(_, _)) | Some(KeyRemoved(_)) =>
+        case Some(ValueUpdated(_, _)) | Some(ValueRemoved(_, _)) =>
           // Value existed in previous version and updated/removed, mark it as updated
           allUpdates.put(key, ValueUpdated(key, value))
         case None =>
@@ -124,24 +124,25 @@ private[state] class HDFSBackedStateStoreProvider(
     /** Remove keys that match the following condition */
     override def remove(condition: UnsafeRow => Boolean): Unit = {
       verify(state == UPDATING, "Cannot remove after already committed or aborted")
-
-      val keyIter = mapToUpdate.keySet().iterator()
-      while (keyIter.hasNext) {
-        val key = keyIter.next
-        if (condition(key)) {
-          keyIter.remove()
+      val entryIter = mapToUpdate.entrySet().iterator()
+      while (entryIter.hasNext) {
+        val entry = entryIter.next
+        if (condition(entry.getKey)) {
+          val value = entry.getValue
+          val key = entry.getKey
+          entryIter.remove()
 
           Option(allUpdates.get(key)) match {
             case Some(ValueUpdated(_, _)) | None =>
               // Value existed in previous version and maybe was updated, mark removed
-              allUpdates.put(key, KeyRemoved(key))
+              allUpdates.put(key, ValueRemoved(key, value))
             case Some(ValueAdded(_, _)) =>
               // Value did not exist in previous version and was added, should not appear in updates
               allUpdates.remove(key)
-            case Some(KeyRemoved(_)) =>
+            case Some(ValueRemoved(_, _)) =>
               // Remove already in update map, no need to change
           }
-          writeToDeltaFile(tempDeltaFileStream, KeyRemoved(key))
+          writeToDeltaFile(tempDeltaFileStream, ValueRemoved(key, value))
         }
       }
     }
@@ -334,7 +335,7 @@ private[state] class HDFSBackedStateStoreProvider(
         writeUpdate(key, value)
       case ValueUpdated(key, value) =>
         writeUpdate(key, value)
-      case KeyRemoved(key) =>
+      case ValueRemoved(key, value) =>
         writeRemove(key)
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
index 7132e284c28f4..9bc6c0e2b9334 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
@@ -99,13 +99,16 @@ trait StateStoreProvider {
 
 
 /** Trait representing updates made to a [[StateStore]]. */
-sealed trait StoreUpdate
+sealed trait StoreUpdate {
+  def key: UnsafeRow
+  def value: UnsafeRow
+}
 
 case class ValueAdded(key: UnsafeRow, value: UnsafeRow) extends StoreUpdate
 
 case class ValueUpdated(key: UnsafeRow, value: UnsafeRow) extends StoreUpdate
 
-case class KeyRemoved(key: UnsafeRow) extends StoreUpdate
+case class ValueRemoved(key: UnsafeRow, value: UnsafeRow) extends StoreUpdate
 
 
 /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
index 533cd0cd2a2ea..05fc7345a7daf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
@@ -668,11 +668,11 @@ private[state] object StateStoreSuite {
   }
 
   def updatesToSet(iterator: Iterator[StoreUpdate]): Set[TestUpdate] = {
-    iterator.map { _ match {
+    iterator.map {
       case ValueAdded(key, value) => Added(rowToString(key), rowToInt(value))
       case ValueUpdated(key, value) => Updated(rowToString(key), rowToInt(value))
-      case KeyRemoved(key) => Removed(rowToString(key))
-    }}.toSet
+      case ValueRemoved(key, _) => Removed(rowToString(key))
+    }.toSet
   }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/WatermarkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/WatermarkSuite.scala
new file mode 100644
index 0000000000000..3617ec0f564c1
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/WatermarkSuite.scala
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming
+
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.execution.streaming._
+import org.apache.spark.sql.functions.{count, window}
+
+class WatermarkSuite extends StreamTest with BeforeAndAfter with Logging {
+
+  import testImplicits._
+
+  after {
+    sqlContext.streams.active.foreach(_.stop())
+  }
+
+  test("error on bad column") {
+    val inputData = MemoryStream[Int].toDF()
+    val e = intercept[AnalysisException] {
+      inputData.withWatermark("badColumn", "1 minute")
+    }
+    assert(e.getMessage contains "badColumn")
+  }
+
+  test("error on wrong type") {
+    val inputData = MemoryStream[Int].toDF()
+    val e = intercept[AnalysisException] {
+      inputData.withWatermark("value", "1 minute")
+    }
+    assert(e.getMessage contains "value")
+    assert(e.getMessage contains "int")
+  }
+
+
+  test("watermark metric") {
+    val inputData = MemoryStream[Int]
+
+    val windowedAggregation = inputData.toDF()
+        .withColumn("eventTime", $"value".cast("timestamp"))
+        .withWatermark("eventTime", "10 seconds")
+        .groupBy(window($"eventTime", "5 seconds") as 'window)
+        .agg(count("*") as 'count)
+        .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
+
+    testStream(windowedAggregation)(
+      AddData(inputData, 15),
+      AssertOnLastQueryStatus { status =>
+        status.triggerDetails.get(StreamMetrics.EVENT_TIME_WATERMARK) === "5000"
+      },
+      AddData(inputData, 15),
+      AssertOnLastQueryStatus { status =>
+        status.triggerDetails.get(StreamMetrics.EVENT_TIME_WATERMARK) === "5000"
+      },
+      AddData(inputData, 25),
+      AssertOnLastQueryStatus { status =>
+        status.triggerDetails.get(StreamMetrics.EVENT_TIME_WATERMARK) === "15000"
+      }
+    )
+  }
+
+  test("append-mode watermark aggregation") {
+    val inputData = MemoryStream[Int]
+
+    val windowedAggregation = inputData.toDF()
+      .withColumn("eventTime", $"value".cast("timestamp"))
+      .withWatermark("eventTime", "10 seconds")
+      .groupBy(window($"eventTime", "5 seconds") as 'window)
+      .agg(count("*") as 'count)
+      .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
+
+    testStream(windowedAggregation)(
+      AddData(inputData, 10, 11, 12, 13, 14, 15),
+      CheckAnswer(),
+      AddData(inputData, 25), // Advance watermark to 15 seconds
+      CheckAnswer(),
+      AddData(inputData, 25), // Evict items less than previous watermark.
+      CheckAnswer((10, 5))
+    )
+  }
+
+  ignore("recovery") {
+    val inputData = MemoryStream[Int]
+
+    val windowedAggregation = inputData.toDF()
+        .withColumn("eventTime", $"value".cast("timestamp"))
+        .withWatermark("eventTime", "10 seconds")
+        .groupBy(window($"eventTime", "5 seconds") as 'window)
+        .agg(count("*") as 'count)
+        .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
+
+    testStream(windowedAggregation)(
+      AddData(inputData, 10, 11, 12, 13, 14, 15),
+      CheckAnswer(),
+      AddData(inputData, 25), // Advance watermark to 15 seconds
+      StopStream,
+      StartStream(),
+      CheckAnswer(),
+      AddData(inputData, 25), // Evict items less than previous watermark.
+      StopStream,
+      StartStream(),
+      CheckAnswer((10, 5))
+    )
+  }
+
+  test("dropping old data") {
+    val inputData = MemoryStream[Int]
+
+    val windowedAggregation = inputData.toDF()
+        .withColumn("eventTime", $"value".cast("timestamp"))
+        .withWatermark("eventTime", "10 seconds")
+        .groupBy(window($"eventTime", "5 seconds") as 'window)
+        .agg(count("*") as 'count)
+        .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
+
+    testStream(windowedAggregation)(
+      AddData(inputData, 10, 11, 12),
+      CheckAnswer(),
+      AddData(inputData, 25),     // Advance watermark to 15 seconds
+      CheckAnswer(),
+      AddData(inputData, 25),     // Evict items less than previous watermark.
+      CheckAnswer((10, 3)),
+      AddData(inputData, 10),     // 10 is later than 15 second watermark
+      CheckAnswer((10, 3)),
+      AddData(inputData, 25),
+      CheckAnswer((10, 3))        // Should not emit an incorrect partial result.
+    )
+  }
+
+  test("complete mode") {
+    val inputData = MemoryStream[Int]
+
+    val windowedAggregation = inputData.toDF()
+        .withColumn("eventTime", $"value".cast("timestamp"))
+        .withWatermark("eventTime", "10 seconds")
+        .groupBy(window($"eventTime", "5 seconds") as 'window)
+        .agg(count("*") as 'count)
+        .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
+
+    // No eviction when asked to compute complete results.
+    testStream(windowedAggregation, OutputMode.Complete)(
+      AddData(inputData, 10, 11, 12),
+      CheckAnswer((10, 3)),
+      AddData(inputData, 25),
+      CheckAnswer((10, 3), (25, 1)),
+      AddData(inputData, 25),
+      CheckAnswer((10, 3), (25, 2)),
+      AddData(inputData, 10),
+      CheckAnswer((10, 4), (25, 2)),
+      AddData(inputData, 25),
+      CheckAnswer((10, 4), (25, 3))
+    )
+  }
+
+  test("group by on raw timestamp") {
+    val inputData = MemoryStream[Int]
+
+    val windowedAggregation = inputData.toDF()
+        .withColumn("eventTime", $"value".cast("timestamp"))
+        .withWatermark("eventTime", "10 seconds")
+        .groupBy($"eventTime")
+        .agg(count("*") as 'count)
+        .select($"eventTime".cast("long").as[Long], $"count".as[Long])
+
+    testStream(windowedAggregation)(
+      AddData(inputData, 10),
+      CheckAnswer(),
+      AddData(inputData, 25), // Advance watermark to 15 seconds
+      CheckAnswer(),
+      AddData(inputData, 25), // Evict items less than previous watermark.
+      CheckAnswer((10, 1))
+    )
+  }
+}

From 649c15fae423a415cb6165aa0ef6d97ab4949afb Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Mon, 14 Nov 2016 21:15:39 -0800
Subject: [PATCH 0110/1204] [SPARK-18428][DOC] Update docs for GraphX

## What changes were proposed in this pull request?
1, Add link of `VertexRDD` and `EdgeRDD`
2, Notify in `Vertex and Edge RDDs` that not all methods are listed
3, `VertexID` -> `VertexId`

## How was this patch tested?
No tests, only docs is modified

Author: Zheng RuiFeng <ruifengz@foxmail.com>

Closes #15875 from zhengruifeng/update_graphop_doc.

(cherry picked from commit c31def1ddcbed340bfc071d54fb3dc7945cb525a)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 docs/graphx-programming-guide.md | 68 ++++++++++++++++----------------
 1 file changed, 35 insertions(+), 33 deletions(-)

diff --git a/docs/graphx-programming-guide.md b/docs/graphx-programming-guide.md
index 58671e6f146d8..1097cf1211c1f 100644
--- a/docs/graphx-programming-guide.md
+++ b/docs/graphx-programming-guide.md
@@ -11,6 +11,7 @@ description: GraphX graph processing library guide for Spark SPARK_VERSION_SHORT
 <!-- All the documentation links  -->
 
 [EdgeRDD]: api/scala/index.html#org.apache.spark.graphx.EdgeRDD
+[VertexRDD]: api/scala/index.html#org.apache.spark.graphx.VertexRDD
 [Edge]: api/scala/index.html#org.apache.spark.graphx.Edge
 [EdgeTriplet]: api/scala/index.html#org.apache.spark.graphx.EdgeTriplet
 [Graph]: api/scala/index.html#org.apache.spark.graphx.Graph
@@ -89,7 +90,7 @@ with user defined objects attached to each vertex and edge.  A directed multigra
 graph with potentially multiple parallel edges sharing the same source and destination vertex.  The
 ability to support parallel edges simplifies modeling scenarios where there can be multiple
 relationships (e.g., co-worker and friend) between the same vertices.  Each vertex is keyed by a
-*unique* 64-bit long identifier (`VertexID`).  GraphX does not impose any ordering constraints on
+*unique* 64-bit long identifier (`VertexId`).  GraphX does not impose any ordering constraints on
 the vertex identifiers.  Similarly, edges have corresponding source and destination vertex
 identifiers.
 
@@ -130,12 +131,12 @@ class Graph[VD, ED] {
 }
 {% endhighlight %}
 
-The classes `VertexRDD[VD]` and `EdgeRDD[ED]` extend and are optimized versions of `RDD[(VertexID,
+The classes `VertexRDD[VD]` and `EdgeRDD[ED]` extend and are optimized versions of `RDD[(VertexId,
 VD)]` and `RDD[Edge[ED]]` respectively.  Both `VertexRDD[VD]` and `EdgeRDD[ED]` provide  additional
 functionality built around graph computation and leverage internal optimizations.  We discuss the
-`VertexRDD` and `EdgeRDD` API in greater detail in the section on [vertex and edge
+`VertexRDD`[VertexRDD] and `EdgeRDD`[EdgeRDD] API in greater detail in the section on [vertex and edge
 RDDs](#vertex_and_edge_rdds) but for now they can be thought of as simply RDDs of the form:
-`RDD[(VertexID, VD)]` and `RDD[Edge[ED]]`.
+`RDD[(VertexId, VD)]` and `RDD[Edge[ED]]`.
 
 ### Example Property Graph
 
@@ -197,7 +198,7 @@ graph.edges.filter(e => e.srcId > e.dstId).count
 {% endhighlight %}
 
 > Note that `graph.vertices` returns an `VertexRDD[(String, String)]` which extends
-> `RDD[(VertexID, (String, String))]` and so we use the scala `case` expression to deconstruct the
+> `RDD[(VertexId, (String, String))]` and so we use the scala `case` expression to deconstruct the
 > tuple.  On the other hand, `graph.edges` returns an `EdgeRDD` containing `Edge[String]` objects.
 > We could have also used the case class type constructor as in the following:
 > {% highlight scala %}
@@ -287,7 +288,7 @@ class Graph[VD, ED] {
   // Change the partitioning heuristic  ============================================================
   def partitionBy(partitionStrategy: PartitionStrategy): Graph[VD, ED]
   // Transform vertex and edge attributes ==========================================================
-  def mapVertices[VD2](map: (VertexID, VD) => VD2): Graph[VD2, ED]
+  def mapVertices[VD2](map: (VertexId, VD) => VD2): Graph[VD2, ED]
   def mapEdges[ED2](map: Edge[ED] => ED2): Graph[VD, ED2]
   def mapEdges[ED2](map: (PartitionID, Iterator[Edge[ED]]) => Iterator[ED2]): Graph[VD, ED2]
   def mapTriplets[ED2](map: EdgeTriplet[VD, ED] => ED2): Graph[VD, ED2]
@@ -297,18 +298,18 @@ class Graph[VD, ED] {
   def reverse: Graph[VD, ED]
   def subgraph(
       epred: EdgeTriplet[VD,ED] => Boolean = (x => true),
-      vpred: (VertexID, VD) => Boolean = ((v, d) => true))
+      vpred: (VertexId, VD) => Boolean = ((v, d) => true))
     : Graph[VD, ED]
   def mask[VD2, ED2](other: Graph[VD2, ED2]): Graph[VD, ED]
   def groupEdges(merge: (ED, ED) => ED): Graph[VD, ED]
   // Join RDDs with the graph ======================================================================
-  def joinVertices[U](table: RDD[(VertexID, U)])(mapFunc: (VertexID, VD, U) => VD): Graph[VD, ED]
-  def outerJoinVertices[U, VD2](other: RDD[(VertexID, U)])
-      (mapFunc: (VertexID, VD, Option[U]) => VD2)
+  def joinVertices[U](table: RDD[(VertexId, U)])(mapFunc: (VertexId, VD, U) => VD): Graph[VD, ED]
+  def outerJoinVertices[U, VD2](other: RDD[(VertexId, U)])
+      (mapFunc: (VertexId, VD, Option[U]) => VD2)
     : Graph[VD2, ED]
   // Aggregate information about adjacent triplets =================================================
-  def collectNeighborIds(edgeDirection: EdgeDirection): VertexRDD[Array[VertexID]]
-  def collectNeighbors(edgeDirection: EdgeDirection): VertexRDD[Array[(VertexID, VD)]]
+  def collectNeighborIds(edgeDirection: EdgeDirection): VertexRDD[Array[VertexId]]
+  def collectNeighbors(edgeDirection: EdgeDirection): VertexRDD[Array[(VertexId, VD)]]
   def aggregateMessages[Msg: ClassTag](
       sendMsg: EdgeContext[VD, ED, Msg] => Unit,
       mergeMsg: (Msg, Msg) => Msg,
@@ -316,15 +317,15 @@ class Graph[VD, ED] {
     : VertexRDD[A]
   // Iterative graph-parallel computation ==========================================================
   def pregel[A](initialMsg: A, maxIterations: Int, activeDirection: EdgeDirection)(
-      vprog: (VertexID, VD, A) => VD,
-      sendMsg: EdgeTriplet[VD, ED] => Iterator[(VertexID,A)],
+      vprog: (VertexId, VD, A) => VD,
+      sendMsg: EdgeTriplet[VD, ED] => Iterator[(VertexId,A)],
       mergeMsg: (A, A) => A)
     : Graph[VD, ED]
   // Basic graph algorithms ========================================================================
   def pageRank(tol: Double, resetProb: Double = 0.15): Graph[Double, Double]
-  def connectedComponents(): Graph[VertexID, ED]
+  def connectedComponents(): Graph[VertexId, ED]
   def triangleCount(): Graph[Int, ED]
-  def stronglyConnectedComponents(numIter: Int): Graph[VertexID, ED]
+  def stronglyConnectedComponents(numIter: Int): Graph[VertexId, ED]
 }
 {% endhighlight %}
 
@@ -481,7 +482,7 @@ original value.
 > is therefore recommended that the input RDD be made unique using the following which will
 > also *pre-index* the resulting values to substantially accelerate the subsequent join.
 > {% highlight scala %}
-val nonUniqueCosts: RDD[(VertexID, Double)]
+val nonUniqueCosts: RDD[(VertexId, Double)]
 val uniqueCosts: VertexRDD[Double] =
   graph.vertices.aggregateUsingIndex(nonUnique, (a,b) => a + b)
 val joinedGraph = graph.joinVertices(uniqueCosts)(
@@ -511,7 +512,7 @@ val degreeGraph = graph.outerJoinVertices(outDegrees) { (id, oldAttr, outDegOpt)
 > provide type annotation for the user defined function:
 > {% highlight scala %}
 val joinedGraph = graph.joinVertices(uniqueCosts,
-  (id: VertexID, oldCost: Double, extraCost: Double) => oldCost + extraCost)
+  (id: VertexId, oldCost: Double, extraCost: Double) => oldCost + extraCost)
 {% endhighlight %}
 
 >
@@ -558,7 +559,7 @@ The user defined `mergeMsg` function takes two messages destined to the same ver
 yields a single message.  Think of `mergeMsg` as the <i>reduce</i> function in map-reduce.
 The  [`aggregateMessages`][Graph.aggregateMessages] operator returns a `VertexRDD[Msg]`
 containing the aggregate message (of type `Msg`) destined to each vertex.  Vertices that did not
-receive a message are not included in the returned `VertexRDD`.
+receive a message are not included in the returned `VertexRDD`[VertexRDD].
 
 <!--
 > An [`EdgeContext`][EdgeContext] is provided in place of a [`EdgeTriplet`][EdgeTriplet] to
@@ -815,21 +816,22 @@ object Graph {
 
 GraphX exposes `RDD` views of the vertices and edges stored within the graph.  However, because
 GraphX maintains the vertices and edges in optimized data structures and these data structures
-provide additional functionality, the vertices and edges are returned as `VertexRDD` and `EdgeRDD`
+provide additional functionality, the vertices and edges are returned as `VertexRDD`[VertexRDD] and `EdgeRDD`[EdgeRDD]
 respectively.  In this section we review some of the additional useful functionality in these types.
+Note that this is just an incomplete list, please refer to the API docs for the official list of operations. 
 
 ## VertexRDDs
 
-The `VertexRDD[A]` extends `RDD[(VertexID, A)]` and adds the additional constraint that each
-`VertexID` occurs only *once*.  Moreover, `VertexRDD[A]` represents a *set* of vertices each with an
+The `VertexRDD[A]` extends `RDD[(VertexId, A)]` and adds the additional constraint that each
+`VertexId` occurs only *once*.  Moreover, `VertexRDD[A]` represents a *set* of vertices each with an
 attribute of type `A`.  Internally, this is achieved by storing the vertex attributes in a reusable
 hash-map data-structure.  As a consequence if two `VertexRDD`s are derived from the same base
-`VertexRDD` (e.g., by `filter` or `mapValues`) they can be joined in constant time without hash
-evaluations. To leverage this indexed data structure, the `VertexRDD` exposes the following
+`VertexRDD`[VertexRDD] (e.g., by `filter` or `mapValues`) they can be joined in constant time without hash
+evaluations. To leverage this indexed data structure, the `VertexRDD`[VertexRDD] exposes the following
 additional functionality:
 
 {% highlight scala %}
-class VertexRDD[VD] extends RDD[(VertexID, VD)] {
+class VertexRDD[VD] extends RDD[(VertexId, VD)] {
   // Filter the vertex set but preserves the internal index
   def filter(pred: Tuple2[VertexId, VD] => Boolean): VertexRDD[VD]
   // Transform the values without changing the ids (preserves the internal index)
@@ -847,17 +849,17 @@ class VertexRDD[VD] extends RDD[(VertexID, VD)] {
 }
 {% endhighlight %}
 
-Notice, for example,  how the `filter` operator returns an `VertexRDD`.  Filter is actually
+Notice, for example,  how the `filter` operator returns an `VertexRDD`[VertexRDD].  Filter is actually
 implemented using a `BitSet` thereby reusing the index and preserving the ability to do fast joins
 with other `VertexRDD`s.  Likewise, the `mapValues` operators do not allow the `map` function to
-change the `VertexID` thereby enabling the same `HashMap` data structures to be reused.  Both the
+change the `VertexId` thereby enabling the same `HashMap` data structures to be reused.  Both the
 `leftJoin` and `innerJoin` are able to identify when joining two `VertexRDD`s derived from the same
 `HashMap` and implement the join by linear scan rather than costly point lookups.
 
-The `aggregateUsingIndex` operator is useful for efficient construction of a new `VertexRDD` from an
-`RDD[(VertexID, A)]`.  Conceptually, if I have constructed a `VertexRDD[B]` over a set of vertices,
-*which is a super-set* of the vertices in some `RDD[(VertexID, A)]` then I can reuse the index to
-both aggregate and then subsequently index the `RDD[(VertexID, A)]`.  For example:
+The `aggregateUsingIndex` operator is useful for efficient construction of a new `VertexRDD`[VertexRDD] from an
+`RDD[(VertexId, A)]`.  Conceptually, if I have constructed a `VertexRDD[B]` over a set of vertices,
+*which is a super-set* of the vertices in some `RDD[(VertexId, A)]` then I can reuse the index to
+both aggregate and then subsequently index the `RDD[(VertexId, A)]`.  For example:
 
 {% highlight scala %}
 val setA: VertexRDD[Int] = VertexRDD(sc.parallelize(0L until 100L).map(id => (id, 1)))
@@ -878,7 +880,7 @@ of the various partitioning strategies defined in [`PartitionStrategy`][Partitio
 each partition, edge attributes and adjacency structure, are stored separately enabling maximum
 reuse when changing attribute values.
 
-The three additional functions exposed by the `EdgeRDD` are:
+The three additional functions exposed by the `EdgeRDD`[EdgeRDD] are:
 {% highlight scala %}
 // Transform the edge attributes while preserving the structure
 def mapValues[ED2](f: Edge[ED] => ED2): EdgeRDD[ED2]
@@ -888,7 +890,7 @@ def reverse: EdgeRDD[ED]
 def innerJoin[ED2, ED3](other: EdgeRDD[ED2])(f: (VertexId, VertexId, ED, ED2) => ED3): EdgeRDD[ED3]
 {% endhighlight %}
 
-In most applications we have found that operations on the `EdgeRDD` are accomplished through the
+In most applications we have found that operations on the `EdgeRDD`[EdgeRDD] are accomplished through the
 graph operators or rely on operations defined in the base `RDD` class.
 
 # Optimized Representation

From a0125fd6847d5dbce92dc92cb5b16ee00f0ff6a8 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Mon, 14 Nov 2016 21:21:34 -0800
Subject: [PATCH 0111/1204] [SPARK-18430][SQL] Fixed Exception Messages when
 Hitting an Invocation Exception of Function Lookup

### What changes were proposed in this pull request?
When the exception is an invocation exception during function lookup, we return a useless/confusing error message:

For example,
```Scala
df.selectExpr("concat_ws()")
```
Below is the error message we got:
```
null; line 1 pos 0
org.apache.spark.sql.AnalysisException: null; line 1 pos 0
```

To get the meaningful error message, we need to get the cause. The fix is exactly the same as what we did in https://github.com/apache/spark/pull/12136. After the fix, the message we got is the exception issued in the constuctor of function implementation:
```
requirement failed: concat_ws requires at least one argument.; line 1 pos 0
org.apache.spark.sql.AnalysisException: requirement failed: concat_ws requires at least one argument.; line 1 pos 0
```

### How was this patch tested?
Added test cases.

Author: gatorsmile <gatorsmile@gmail.com>

Closes #15878 from gatorsmile/functionNotFound.

(cherry picked from commit 86430cc4e8dbc65a091a532fc9c5ec12b7be04f4)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../catalyst/analysis/FunctionRegistry.scala  |  5 ++++-
 .../sql-tests/inputs/string-functions.sql     |  3 +++
 .../results/string-functions.sql.out          | 20 +++++++++++++++++++
 3 files changed, 27 insertions(+), 1 deletion(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/results/string-functions.sql.out

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index b028d07fb8d0c..007cdc1ccbe4e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -446,7 +446,10 @@ object FunctionRegistry {
         // If there is an apply method that accepts Seq[Expression], use that one.
         Try(varargCtor.get.newInstance(expressions).asInstanceOf[Expression]) match {
           case Success(e) => e
-          case Failure(e) => throw new AnalysisException(e.getMessage)
+          case Failure(e) =>
+            // the exception is an invocation exception. To get a meaningful message, we need the
+            // cause.
+            throw new AnalysisException(e.getCause.getMessage)
         }
       } else {
         // Otherwise, find a constructor method that matches the number of arguments, and use that.
diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
new file mode 100644
index 0000000000000..f21981ef7b72a
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
@@ -0,0 +1,3 @@
+-- Argument number exception
+select concat_ws();
+select format_string();
diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
new file mode 100644
index 0000000000000..6961e9b65922f
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
@@ -0,0 +1,20 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 2
+
+
+-- !query 0
+select concat_ws()
+-- !query 0 schema
+struct<>
+-- !query 0 output
+org.apache.spark.sql.AnalysisException
+requirement failed: concat_ws requires at least one argument.; line 1 pos 7
+
+
+-- !query 1
+select format_string()
+-- !query 1 schema
+struct<>
+-- !query 1 output
+org.apache.spark.sql.AnalysisException
+requirement failed: format_string() should take at least 1 argument; line 1 pos 7

From 0762c0cebe66f806b138420baa562787fd0cf375 Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Tue, 15 Nov 2016 15:44:50 +0100
Subject: [PATCH 0112/1204] [SPARK-18427][DOC] Update docs of mllib.KMeans

## What changes were proposed in this pull request?
1,Remove `runs` from docs of mllib.KMeans
2,Add notes for `k` according to comments in sources
## How was this patch tested?
existing tests

Author: Zheng RuiFeng <ruifengz@foxmail.com>

Closes #15873 from zhengruifeng/update_doc_mllib_kmeans.

(cherry picked from commit 33be4da5391b884191c405ffbce7d382ea8a2f66)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/mllib-clustering.md                          | 6 ++----
 examples/src/main/python/mllib/k_means_example.py | 3 +--
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/docs/mllib-clustering.md b/docs/mllib-clustering.md
index d5f6ae379a85e..8990e95796b67 100644
--- a/docs/mllib-clustering.md
+++ b/docs/mllib-clustering.md
@@ -24,13 +24,11 @@ variant of the [k-means++](http://en.wikipedia.org/wiki/K-means%2B%2B) method
 called [kmeans||](http://theory.stanford.edu/~sergei/papers/vldb12-kmpar.pdf).
 The implementation in `spark.mllib` has the following parameters:
 
-* *k* is the number of desired clusters.
+* *k* is the number of desired clusters. Note that it is possible for fewer than k clusters to be returned, for example, if there are fewer than k distinct points to cluster.
 * *maxIterations* is the maximum number of iterations to run.
 * *initializationMode* specifies either random initialization or
 initialization via k-means\|\|.
-* *runs* is the number of times to run the k-means algorithm (k-means is not
-guaranteed to find a globally optimal solution, and when run multiple times on
-a given dataset, the algorithm returns the best clustering result).
+* *runs* This param has no effect since Spark 2.0.0.
 * *initializationSteps* determines the number of steps in the k-means\|\| algorithm.
 * *epsilon* determines the distance threshold within which we consider k-means to have converged.
 * *initialModel* is an optional set of cluster centers used for initialization. If this parameter is supplied, only one run is performed.
diff --git a/examples/src/main/python/mllib/k_means_example.py b/examples/src/main/python/mllib/k_means_example.py
index 5c397e62ef10e..d6058f45020c4 100644
--- a/examples/src/main/python/mllib/k_means_example.py
+++ b/examples/src/main/python/mllib/k_means_example.py
@@ -36,8 +36,7 @@
     parsedData = data.map(lambda line: array([float(x) for x in line.split(' ')]))
 
     # Build the model (cluster the data)
-    clusters = KMeans.train(parsedData, 2, maxIterations=10,
-                            runs=10, initializationMode="random")
+    clusters = KMeans.train(parsedData, 2, maxIterations=10, initializationMode="random")
 
     # Evaluate clustering by computing Within Set Sum of Squared Errors
     def error(point):

From 0af94e77221415fa006c467440514ee1c9e693f4 Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@databricks.com>
Date: Tue, 15 Nov 2016 06:59:25 -0800
Subject: [PATCH 0113/1204] [SPARK-18300][SQL] Do not apply foldable
 propagation with expand as a child.

## What changes were proposed in this pull request?
The `FoldablePropagation` optimizer rule, pulls foldable values out from under an `Expand`. This breaks the `Expand` in two ways:

- It rewrites the output attributes of the `Expand`. We explicitly define output attributes for `Expand`, these are (unfortunately) considered as part of the expressions of the `Expand` and can be rewritten.
- Expand can actually change the column (it will typically re-use the attributes or the underlying plan). This means that we cannot safely propagate the expressions from under an `Expand`.

This PR fixes this and (hopefully) other issues by explicitly whitelisting allowed operators.

## How was this patch tested?
Added tests to `FoldablePropagationSuite` and to `SQLQueryTestSuite`.

Author: Herman van Hovell <hvanhovell@databricks.com>

Closes #15857 from hvanhovell/SPARK-18300.

(cherry picked from commit f14ae4900ad0ed66ba36108b7792d56cd6767a69)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../sql/catalyst/optimizer/expressions.scala  | 58 ++++++++++---------
 .../optimizer/FoldablePropagationSuite.scala  | 27 +++++++--
 .../resources/sql-tests/inputs/group-by.sql   |  3 +
 .../sql-tests/results/group-by.sql.out        | 10 +++-
 4 files changed, 67 insertions(+), 31 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index b7458910da13e..3a7004ef297f6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -428,43 +428,49 @@ object FoldablePropagation extends Rule[LogicalPlan] {
       }
       case _ => Nil
     })
+    val replaceFoldable: PartialFunction[Expression, Expression] = {
+      case a: AttributeReference if foldableMap.contains(a) => foldableMap(a)
+    }
 
     if (foldableMap.isEmpty) {
       plan
     } else {
       var stop = false
       CleanupAliases(plan.transformUp {
-        case u: Union =>
-          stop = true
-          u
-        case c: Command =>
-          stop = true
-          c
-        // For outer join, although its output attributes are derived from its children, they are
-        // actually different attributes: the output of outer join is not always picked from its
-        // children, but can also be null.
+        // A leaf node should not stop the folding process (note that we are traversing up the
+        // tree, starting at the leaf nodes); so we are allowing it.
+        case l: LeafNode =>
+          l
+
+        // Whitelist of all nodes we are allowed to apply this rule to.
+        case p @ (_: Project | _: Filter | _: SubqueryAlias | _: Aggregate | _: Window |
+                  _: Sample | _: GlobalLimit | _: LocalLimit | _: Generate | _: Distinct |
+                  _: AppendColumns | _: AppendColumnsWithObject | _: BroadcastHint |
+                  _: RedistributeData | _: Repartition | _: Sort | _: TypedFilter) if !stop =>
+          p.transformExpressions(replaceFoldable)
+
+        // Allow inner joins. We do not allow outer join, although its output attributes are
+        // derived from its children, they are actually different attributes: the output of outer
+        // join is not always picked from its children, but can also be null.
         // TODO(cloud-fan): It seems more reasonable to use new attributes as the output attributes
         // of outer join.
-        case j @ Join(_, _, LeftOuter | RightOuter | FullOuter, _) =>
+        case j @ Join(_, _, Inner, _) =>
+          j.transformExpressions(replaceFoldable)
+
+        // We can fold the projections an expand holds. However expand changes the output columns
+        // and often reuses the underlying attributes; so we cannot assume that a column is still
+        // foldable after the expand has been applied.
+        // TODO(hvanhovell): Expand should use new attributes as the output attributes.
+        case expand: Expand if !stop =>
+          val newExpand = expand.copy(projections = expand.projections.map { projection =>
+            projection.map(_.transform(replaceFoldable))
+          })
           stop = true
-          j
+          newExpand
 
-        // These 3 operators take attributes as constructor parameters, and these attributes
-        // can't be replaced by alias.
-        case m: MapGroups =>
-          stop = true
-          m
-        case f: FlatMapGroupsInR =>
-          stop = true
-          f
-        case c: CoGroup =>
+        case other =>
           stop = true
-          c
-
-        case p: LogicalPlan if !stop => p.transformExpressions {
-          case a: AttributeReference if foldableMap.contains(a) =>
-            foldableMap(a)
-        }
+          other
       })
     }
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
index 355b3fc4aa637..82756f545a8c7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
@@ -116,16 +116,35 @@ class FoldablePropagationSuite extends PlanTest {
   test("Propagate in subqueries of Union queries") {
     val query = Union(
       Seq(
-        testRelation.select(Literal(1).as('x), 'a).select('x + 'a),
-        testRelation.select(Literal(2).as('x), 'a).select('x + 'a)))
+        testRelation.select(Literal(1).as('x), 'a).select('x, 'x + 'a),
+        testRelation.select(Literal(2).as('x), 'a).select('x, 'x + 'a)))
       .select('x)
     val optimized = Optimize.execute(query.analyze)
     val correctAnswer = Union(
       Seq(
-        testRelation.select(Literal(1).as('x), 'a).select((Literal(1).as('x) + 'a).as("(x + a)")),
-        testRelation.select(Literal(2).as('x), 'a).select((Literal(2).as('x) + 'a).as("(x + a)"))))
+        testRelation.select(Literal(1).as('x), 'a)
+          .select(Literal(1).as('x), (Literal(1).as('x) + 'a).as("(x + a)")),
+        testRelation.select(Literal(2).as('x), 'a)
+          .select(Literal(2).as('x), (Literal(2).as('x) + 'a).as("(x + a)"))))
       .select('x).analyze
+    comparePlans(optimized, correctAnswer)
+  }
 
+  test("Propagate in expand") {
+    val c1 = Literal(1).as('a)
+    val c2 = Literal(2).as('b)
+    val a1 = c1.toAttribute.withNullability(true)
+    val a2 = c2.toAttribute.withNullability(true)
+    val expand = Expand(
+      Seq(Seq(Literal(null), 'b), Seq('a, Literal(null))),
+      Seq(a1, a2),
+      OneRowRelation.select(c1, c2))
+    val query = expand.where(a1.isNotNull).select(a1, a2).analyze
+    val optimized = Optimize.execute(query)
+    val correctExpand = expand.copy(projections = Seq(
+      Seq(Literal(null), c2),
+      Seq(c1, Literal(null))))
+    val correctAnswer = correctExpand.where(a1.isNotNull).select(a1, a2).analyze
     comparePlans(optimized, correctAnswer)
   }
 }
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
index d950ec83d98c3..4d0ed43153004 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -32,3 +32,6 @@ SELECT a + 1 + 1, COUNT(b) FROM testData GROUP BY a + 1;
 -- Aggregate with nulls.
 SELECT SKEWNESS(a), KURTOSIS(a), MIN(a), MAX(a), AVG(a), VARIANCE(a), STDDEV(a), SUM(a), COUNT(a)
 FROM testData;
+
+-- Aggregate with foldable input and multiple distinct groups.
+SELECT COUNT(DISTINCT b), COUNT(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY a;
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index af6c930d64b76..4b87d5161fc0e 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 14
+-- Number of queries: 15
 
 
 -- !query 0
@@ -131,3 +131,11 @@ FROM testData
 struct<skewness(CAST(a AS DOUBLE)):double,kurtosis(CAST(a AS DOUBLE)):double,min(a):int,max(a):int,avg(a):double,var_samp(CAST(a AS DOUBLE)):double,stddev_samp(CAST(a AS DOUBLE)):double,sum(a):bigint,count(a):bigint>
 -- !query 13 output
 -0.2723801058145729	-1.5069204152249134	1	3	2.142857142857143	0.8095238095238094	0.8997354108424372	15	7
+
+
+-- !query 14
+SELECT COUNT(DISTINCT b), COUNT(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY a
+-- !query 14 schema
+struct<count(DISTINCT b):bigint,count(DISTINCT b, c):bigint>
+-- !query 14 output
+1	1

From 5f7a9af66c0c05225f175f36bc10016874fab6fc Mon Sep 17 00:00:00 2001
From: Aaditya Ramesh <aramesh@conviva.com>
Date: Tue, 15 Nov 2016 13:01:01 -0800
Subject: [PATCH 0114/1204] [SPARK-13027][STREAMING] Added batch time as a
 parameter to updateStateByKey

Added RDD batch time as an input parameter to the update function in updateStateByKey.

Author: Aaditya Ramesh <aramesh@conviva.com>

Closes #11122 from aramesh117/SPARK-13027.

(cherry picked from commit 6f9e598ccf92f6272bbfb56ac56d3101387131b9)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../dstream/PairDStreamFunctions.scala        | 40 +++++++++--
 .../streaming/dstream/StateDStream.scala      | 28 ++++----
 .../streaming/BasicOperationsSuite.scala      | 66 +++++++++++++++++++
 .../spark/streaming/DStreamClosureSuite.scala | 12 ++++
 4 files changed, 126 insertions(+), 20 deletions(-)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
index 2f2a6d13dd79b..ac739411fd212 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
@@ -453,9 +453,12 @@ class PairDStreamFunctions[K, V](self: DStream[(K, V)])
   def updateStateByKey[S: ClassTag](
       updateFunc: (Iterator[(K, Seq[V], Option[S])]) => Iterator[(K, S)],
       partitioner: Partitioner,
-      rememberPartitioner: Boolean
-    ): DStream[(K, S)] = ssc.withScope {
-     new StateDStream(self, ssc.sc.clean(updateFunc), partitioner, rememberPartitioner, None)
+      rememberPartitioner: Boolean): DStream[(K, S)] = ssc.withScope {
+    val cleanedFunc = ssc.sc.clean(updateFunc)
+    val newUpdateFunc = (_: Time, it: Iterator[(K, Seq[V], Option[S])]) => {
+      cleanedFunc(it)
+    }
+    new StateDStream(self, newUpdateFunc, partitioner, rememberPartitioner, None)
   }
 
   /**
@@ -499,10 +502,33 @@ class PairDStreamFunctions[K, V](self: DStream[(K, V)])
       updateFunc: (Iterator[(K, Seq[V], Option[S])]) => Iterator[(K, S)],
       partitioner: Partitioner,
       rememberPartitioner: Boolean,
-      initialRDD: RDD[(K, S)]
-    ): DStream[(K, S)] = ssc.withScope {
-     new StateDStream(self, ssc.sc.clean(updateFunc), partitioner,
-       rememberPartitioner, Some(initialRDD))
+      initialRDD: RDD[(K, S)]): DStream[(K, S)] = ssc.withScope {
+    val cleanedFunc = ssc.sc.clean(updateFunc)
+    val newUpdateFunc = (_: Time, it: Iterator[(K, Seq[V], Option[S])]) => {
+      cleanedFunc(it)
+    }
+    new StateDStream(self, newUpdateFunc, partitioner, rememberPartitioner, Some(initialRDD))
+  }
+
+  /**
+   * Return a new "state" DStream where the state for each key is updated by applying
+   * the given function on the previous state of the key and the new values of the key.
+   * org.apache.spark.Partitioner is used to control the partitioning of each RDD.
+   * @param updateFunc State update function. If `this` function returns None, then
+   *                   corresponding state key-value pair will be eliminated.
+   * @param partitioner Partitioner for controlling the partitioning of each RDD in the new
+   *                    DStream.
+   * @tparam S State type
+   */
+  def updateStateByKey[S: ClassTag](updateFunc: (Time, K, Seq[V], Option[S]) => Option[S],
+      partitioner: Partitioner,
+      rememberPartitioner: Boolean,
+      initialRDD: Option[RDD[(K, S)]] = None): DStream[(K, S)] = ssc.withScope {
+    val cleanedFunc = ssc.sc.clean(updateFunc)
+    val newUpdateFunc = (time: Time, iterator: Iterator[(K, Seq[V], Option[S])]) => {
+      iterator.flatMap(t => cleanedFunc(time, t._1, t._2, t._3).map(s => (t._1, s)))
+    }
+    new StateDStream(self, newUpdateFunc, partitioner, rememberPartitioner, initialRDD)
   }
 
   /**
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala
index 8efb09a8ce981..5bf1dabf08f45 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala
@@ -27,7 +27,7 @@ import org.apache.spark.streaming.{Duration, Time}
 private[streaming]
 class StateDStream[K: ClassTag, V: ClassTag, S: ClassTag](
     parent: DStream[(K, V)],
-    updateFunc: (Iterator[(K, Seq[V], Option[S])]) => Iterator[(K, S)],
+    updateFunc: (Time, Iterator[(K, Seq[V], Option[S])]) => Iterator[(K, S)],
     partitioner: Partitioner,
     preservePartitioning: Boolean,
     initialRDD: Option[RDD[(K, S)]]
@@ -41,8 +41,10 @@ class StateDStream[K: ClassTag, V: ClassTag, S: ClassTag](
 
   override val mustCheckpoint = true
 
-  private [this] def computeUsingPreviousRDD (
-    parentRDD: RDD[(K, V)], prevStateRDD: RDD[(K, S)]) = {
+  private [this] def computeUsingPreviousRDD(
+      batchTime: Time,
+      parentRDD: RDD[(K, V)],
+      prevStateRDD: RDD[(K, S)]) = {
     // Define the function for the mapPartition operation on cogrouped RDD;
     // first map the cogrouped tuple to tuples of required type,
     // and then apply the update function
@@ -53,7 +55,7 @@ class StateDStream[K: ClassTag, V: ClassTag, S: ClassTag](
         val headOption = if (itr.hasNext) Some(itr.next()) else None
         (t._1, t._2._1.toSeq, headOption)
       }
-      updateFuncLocal(i)
+      updateFuncLocal(batchTime, i)
     }
     val cogroupedRDD = parentRDD.cogroup(prevStateRDD, partitioner)
     val stateRDD = cogroupedRDD.mapPartitions(finalFunc, preservePartitioning)
@@ -68,15 +70,14 @@ class StateDStream[K: ClassTag, V: ClassTag, S: ClassTag](
       case Some(prevStateRDD) =>    // If previous state RDD exists
         // Try to get the parent RDD
         parent.getOrCompute(validTime) match {
-          case Some(parentRDD) =>   // If parent RDD exists, then compute as usual
-            computeUsingPreviousRDD(parentRDD, prevStateRDD)
-          case None =>    // If parent RDD does not exist
-
+          case Some(parentRDD) =>    // If parent RDD exists, then compute as usual
+            computeUsingPreviousRDD (validTime, parentRDD, prevStateRDD)
+          case None =>     // If parent RDD does not exist
             // Re-apply the update function to the old state RDD
             val updateFuncLocal = updateFunc
             val finalFunc = (iterator: Iterator[(K, S)]) => {
               val i = iterator.map(t => (t._1, Seq[V](), Option(t._2)))
-              updateFuncLocal(i)
+              updateFuncLocal(validTime, i)
             }
             val stateRDD = prevStateRDD.mapPartitions(finalFunc, preservePartitioning)
             Some(stateRDD)
@@ -93,15 +94,16 @@ class StateDStream[K: ClassTag, V: ClassTag, S: ClassTag](
                 // and then apply the update function
                 val updateFuncLocal = updateFunc
                 val finalFunc = (iterator: Iterator[(K, Iterable[V])]) => {
-                  updateFuncLocal(iterator.map(tuple => (tuple._1, tuple._2.toSeq, None)))
+                  updateFuncLocal (validTime,
+                    iterator.map (tuple => (tuple._1, tuple._2.toSeq, None)))
                 }
 
                 val groupedRDD = parentRDD.groupByKey(partitioner)
                 val sessionRDD = groupedRDD.mapPartitions(finalFunc, preservePartitioning)
                 // logDebug("Generating state RDD for time " + validTime + " (first)")
-                Some(sessionRDD)
-              case Some(initialStateRDD) =>
-                computeUsingPreviousRDD(parentRDD, initialStateRDD)
+                Some (sessionRDD)
+              case Some (initialStateRDD) =>
+                computeUsingPreviousRDD(validTime, parentRDD, initialStateRDD)
             }
           case None => // If parent RDD does not exist, then nothing to do!
             // logDebug("Not generating state RDD (no previous state, no parent)")
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
index cfcbdc7c382f9..4e702bbb92061 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
@@ -471,6 +471,72 @@ class BasicOperationsSuite extends TestSuiteBase {
     testOperation(inputData, updateStateOperation, outputData, true)
   }
 
+  test("updateStateByKey - testing time stamps as input") {
+    type StreamingState = Long
+    val initial: Seq[(String, StreamingState)] = Seq(("a", 0L), ("c", 0L))
+
+    val inputData =
+      Seq(
+        Seq("a"),
+        Seq("a", "b"),
+        Seq("a", "b", "c"),
+        Seq("a", "b"),
+        Seq("a"),
+        Seq()
+      )
+
+    // a -> 1000, 3000, 6000, 10000, 15000, 15000
+    // b -> 0, 2000, 5000, 9000, 9000, 9000
+    // c -> 1000, 1000, 3000, 3000, 3000, 3000
+
+    val outputData: Seq[Seq[(String, StreamingState)]] = Seq(
+        Seq(
+          ("a", 1000L),
+          ("c", 0L)), // t = 1000
+        Seq(
+          ("a", 3000L),
+          ("b", 2000L),
+          ("c", 0L)), // t = 2000
+        Seq(
+          ("a", 6000L),
+          ("b", 5000L),
+          ("c", 3000L)), // t = 3000
+        Seq(
+          ("a", 10000L),
+          ("b", 9000L),
+          ("c", 3000L)), // t = 4000
+        Seq(
+          ("a", 15000L),
+          ("b", 9000L),
+          ("c", 3000L)), // t = 5000
+        Seq(
+          ("a", 15000L),
+          ("b", 9000L),
+          ("c", 3000L)) // t = 6000
+      )
+
+    val updateStateOperation = (s: DStream[String]) => {
+      val initialRDD = s.context.sparkContext.makeRDD(initial)
+      val updateFunc = (time: Time,
+                        key: String,
+                        values: Seq[Int],
+                        state: Option[StreamingState]) => {
+        // Update only if we receive values for this key during the batch.
+        if (values.nonEmpty) {
+          Option(time.milliseconds + state.getOrElse(0L))
+        } else {
+          Option(state.getOrElse(0L))
+        }
+      }
+      s.map(x => (x, 1)).updateStateByKey[StreamingState](updateFunc = updateFunc,
+        partitioner = new HashPartitioner (numInputPartitions), rememberPartitioner = false,
+        initialRDD = Option(initialRDD))
+    }
+
+    testOperation(input = inputData, operation = updateStateOperation,
+      expectedOutput = outputData, useSet = true)
+  }
+
   test("updateStateByKey - with initial value RDD") {
     val initial = Seq(("a", 1), ("c", 2))
 
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala
index 1fc34f569f9f4..2ab600ab817e0 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala
@@ -164,6 +164,10 @@ class DStreamClosureSuite extends SparkFunSuite with BeforeAndAfterAll {
   private def testUpdateStateByKey(ds: DStream[(Int, Int)]): Unit = {
     val updateF1 = (_: Seq[Int], _: Option[Int]) => { return; Some(1) }
     val updateF2 = (_: Iterator[(Int, Seq[Int], Option[Int])]) => { return; Seq((1, 1)).toIterator }
+    val updateF3 = (_: Time, _: Int, _: Seq[Int], _: Option[Int]) => {
+      return
+      Option(1)
+    }
     val initialRDD = ds.ssc.sparkContext.emptyRDD[Int].map { i => (i, i) }
     expectCorrectException { ds.updateStateByKey(updateF1) }
     expectCorrectException { ds.updateStateByKey(updateF1, 5) }
@@ -177,6 +181,14 @@ class DStreamClosureSuite extends SparkFunSuite with BeforeAndAfterAll {
     expectCorrectException {
       ds.updateStateByKey(updateF2, new HashPartitioner(5), true, initialRDD)
     }
+    expectCorrectException {
+      ds.updateStateByKey(
+        updateFunc = updateF3,
+        partitioner = new HashPartitioner(5),
+        rememberPartitioner = true,
+        initialRDD = Option(initialRDD)
+      )
+    }
   }
   private def testMapValues(ds: DStream[(Int, Int)]): Unit = expectCorrectException {
     ds.mapValues { _ => return; 1 }

From f13a33b477a3f9cc81f9decee736e7c50d8205e1 Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Tue, 15 Nov 2016 13:09:29 -0800
Subject: [PATCH 0115/1204] [SPARK-18337] Complete mode memory sinks should be
 able to recover from checkpoints

## What changes were proposed in this pull request?

It would be nice if memory sinks can also recover from checkpoints. For correctness reasons, the only time we should support it is in `Complete` OutputMode. We can support this in CompleteMode, because the output of the StateStore is already persisted in the checkpoint directory.

## How was this patch tested?

Unit test

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #15801 from brkyvz/mem-stream.

(cherry picked from commit 2afdaa9805f44b45242978eab9a9623d31dddbf3)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../sql/streaming/DataStreamWriter.scala      |  6 +-
 .../test/DataStreamReaderWriterSuite.scala    | 65 +++++++++++++++++++
 2 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index b959444b49298..daed1dcb77370 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -222,14 +222,16 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
 
       val sink = new MemorySink(df.schema, outputMode)
       val resultDf = Dataset.ofRows(df.sparkSession, new MemoryPlan(sink))
+      val chkpointLoc = extraOptions.get("checkpointLocation")
+      val recoverFromChkpoint = chkpointLoc.isDefined && outputMode == OutputMode.Complete()
       val query = df.sparkSession.sessionState.streamingQueryManager.startQuery(
         extraOptions.get("queryName"),
-        extraOptions.get("checkpointLocation"),
+        chkpointLoc,
         df,
         sink,
         outputMode,
         useTempCheckpointLocation = true,
-        recoverFromCheckpointLocation = false,
+        recoverFromCheckpointLocation = recoverFromChkpoint,
         trigger = trigger)
       resultDf.createOrReplaceTempView(query.name)
       query
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
index f0994395813e4..5630464f40803 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.streaming.test
 
+import java.io.File
 import java.util.concurrent.TimeUnit
 
 import scala.concurrent.duration._
@@ -467,4 +468,68 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
     val sq = df.writeStream.format("console").start()
     sq.stop()
   }
+
+  test("MemorySink can recover from a checkpoint in Complete Mode") {
+    import testImplicits._
+    val ms = new MemoryStream[Int](0, sqlContext)
+    val df = ms.toDF().toDF("a")
+    val checkpointLoc = newMetadataDir
+    val checkpointDir = new File(checkpointLoc, "offsets")
+    checkpointDir.mkdirs()
+    assert(checkpointDir.exists())
+    val tableName = "test"
+    def startQuery: StreamingQuery = {
+      df.groupBy("a")
+        .count()
+        .writeStream
+        .format("memory")
+        .queryName(tableName)
+        .option("checkpointLocation", checkpointLoc)
+        .outputMode("complete")
+        .start()
+    }
+    // no exception here
+    val q = startQuery
+    ms.addData(0, 1)
+    q.processAllAvailable()
+    q.stop()
+
+    checkAnswer(
+      spark.table(tableName),
+      Seq(Row(0, 1), Row(1, 1))
+    )
+    spark.sql(s"drop table $tableName")
+    // verify table is dropped
+    intercept[AnalysisException](spark.table(tableName).collect())
+    val q2 = startQuery
+    ms.addData(0)
+    q2.processAllAvailable()
+    checkAnswer(
+      spark.table(tableName),
+      Seq(Row(0, 2), Row(1, 1))
+    )
+
+    q2.stop()
+  }
+
+  test("append mode memory sink's do not support checkpoint recovery") {
+    import testImplicits._
+    val ms = new MemoryStream[Int](0, sqlContext)
+    val df = ms.toDF().toDF("a")
+    val checkpointLoc = newMetadataDir
+    val checkpointDir = new File(checkpointLoc, "offsets")
+    checkpointDir.mkdirs()
+    assert(checkpointDir.exists())
+
+    val e = intercept[AnalysisException] {
+      df.writeStream
+        .format("memory")
+        .queryName("test")
+        .option("checkpointLocation", checkpointLoc)
+        .outputMode("append")
+        .start()
+    }
+    assert(e.getMessage.contains("does not support recovering"))
+    assert(e.getMessage.contains("checkpoint location"))
+  }
 }

From b424dc947be8ea7230bfdf7f66976fbf63c85f85 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Tue, 15 Nov 2016 15:12:30 -0800
Subject: [PATCH 0116/1204] [SPARK-18440][STRUCTURED STREAMING] Pass correct
 query execution to FileFormatWriter

## What changes were proposed in this pull request?

SPARK-18012 refactored the file write path in FileStreamSink using FileFormatWriter which always uses the default non-streaming QueryExecution to perform the writes. This is wrong for FileStreamSink, because the streaming QueryExecution (i.e. IncrementalExecution) should be used for correctly incrementalizing aggregation. The addition of watermarks in SPARK-18124, file stream sink should logically supports aggregation + watermark + append mode. But actually it fails with
```
16:23:07.389 ERROR org.apache.spark.sql.execution.streaming.StreamExecution: Query query-0 terminated with error
java.lang.AssertionError: assertion failed: No plan for EventTimeWatermark timestamp#7: timestamp, interval 10 seconds
+- LocalRelation [timestamp#7]

	at scala.Predef$.assert(Predef.scala:170)
	at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:92)
	at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:77)
	at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:74)
	at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
	at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
	at scala.collection.Iterator$class.foreach(Iterator.scala:893)
	at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
	at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
	at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
	at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:74)
	at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:66)
	at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
	at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
	at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:92)
	at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:77)
	at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:74)
```

This PR fixes it by passing the correct query execution.

## How was this patch tested?
New unit test

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #15885 from tdas/SPARK-18440.

(cherry picked from commit 1ae4652b7e1f77a984b8459c778cb06c814192c5)
Signed-off-by: Michael Armbrust <michael@databricks.com>
---
 .../datasources/FileFormatWriter.scala        |  9 +--
 .../InsertIntoHadoopFsRelationCommand.scala   |  2 +-
 .../execution/streaming/FileStreamSink.scala  |  2 +-
 .../sql/streaming/FileStreamSinkSuite.scala   | 78 +++++++++++++++++--
 4 files changed, 79 insertions(+), 12 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
index edcce103d0963..a9f79da6358dc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
@@ -38,7 +38,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.execution.{SQLExecution, UnsafeKVExternalSorter}
+import org.apache.spark.sql.execution.{QueryExecution, SQLExecution, UnsafeKVExternalSorter}
 import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
 import org.apache.spark.util.{SerializableConfiguration, Utils}
 import org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter
@@ -85,7 +85,7 @@ object FileFormatWriter extends Logging {
    */
   def write(
       sparkSession: SparkSession,
-      plan: LogicalPlan,
+      queryExecution: QueryExecution,
       fileFormat: FileFormat,
       committer: FileCommitProtocol,
       outputSpec: OutputSpec,
@@ -101,8 +101,7 @@ object FileFormatWriter extends Logging {
     FileOutputFormat.setOutputPath(job, new Path(outputSpec.outputPath))
 
     val partitionSet = AttributeSet(partitionColumns)
-    val dataColumns = plan.output.filterNot(partitionSet.contains)
-    val queryExecution = Dataset.ofRows(sparkSession, plan).queryExecution
+    val dataColumns = queryExecution.logical.output.filterNot(partitionSet.contains)
 
     // Note: prepareWrite has side effect. It sets "job".
     val outputWriterFactory =
@@ -112,7 +111,7 @@ object FileFormatWriter extends Logging {
       uuid = UUID.randomUUID().toString,
       serializableHadoopConf = new SerializableConfiguration(job.getConfiguration),
       outputWriterFactory = outputWriterFactory,
-      allColumns = plan.output,
+      allColumns = queryExecution.logical.output,
       partitionColumns = partitionColumns,
       nonPartitionColumns = dataColumns,
       bucketSpec = bucketSpec,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
index 28975e1546e79..a9bde903b3b5e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
@@ -100,7 +100,7 @@ case class InsertIntoHadoopFsRelationCommand(
 
       FileFormatWriter.write(
         sparkSession = sparkSession,
-        plan = query,
+        queryExecution = Dataset.ofRows(sparkSession, query).queryExecution,
         fileFormat = fileFormat,
         committer = committer,
         outputSpec = FileFormatWriter.OutputSpec(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
index f1c5f9ab5067d..0dbe2a71ed3bc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
@@ -77,7 +77,7 @@ class FileStreamSink(
 
       FileFormatWriter.write(
         sparkSession = sparkSession,
-        plan = data.logicalPlan,
+        queryExecution = data.queryExecution,
         fileFormat = fileFormat,
         committer = committer,
         outputSpec = FileFormatWriter.OutputSpec(path, Map.empty),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
index fa97d9292e551..09613ef9e4348 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
@@ -21,13 +21,14 @@ import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.execution.DataSourceScanExec
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.streaming.{MemoryStream, MetadataLogFileIndex}
+import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
 import org.apache.spark.util.Utils
 
 class FileStreamSinkSuite extends StreamTest {
   import testImplicits._
 
-  test("FileStreamSink - unpartitioned writing and batch reading") {
+  test("unpartitioned writing and batch reading") {
     val inputData = MemoryStream[Int]
     val df = inputData.toDF()
 
@@ -59,7 +60,7 @@ class FileStreamSinkSuite extends StreamTest {
     }
   }
 
-  test("FileStreamSink - partitioned writing and batch reading") {
+  test("partitioned writing and batch reading") {
     val inputData = MemoryStream[Int]
     val ds = inputData.toDS()
 
@@ -142,16 +143,83 @@ class FileStreamSinkSuite extends StreamTest {
     }
   }
 
-  test("FileStreamSink - parquet") {
+  // This tests whether FileStreamSink works with aggregations. Specifically, it tests
+  // whether the the correct streaming QueryExecution (i.e. IncrementalExecution) is used to
+  // to execute the trigger for writing data to file sink. See SPARK-18440 for more details.
+  test("writing with aggregation") {
+
+    // Since FileStreamSink currently only supports append mode, we will test FileStreamSink
+    // with aggregations using event time windows and watermark, which allows
+    // aggregation + append mode.
+    val inputData = MemoryStream[Long]
+    val inputDF = inputData.toDF.toDF("time")
+    val outputDf = inputDF
+      .selectExpr("CAST(time AS timestamp) AS timestamp")
+      .withWatermark("timestamp", "10 seconds")
+      .groupBy(window($"timestamp", "5 seconds"))
+      .count()
+      .select("window.start", "window.end", "count")
+
+    val outputDir = Utils.createTempDir(namePrefix = "stream.output").getCanonicalPath
+    val checkpointDir = Utils.createTempDir(namePrefix = "stream.checkpoint").getCanonicalPath
+
+    var query: StreamingQuery = null
+
+    try {
+      query =
+        outputDf.writeStream
+          .option("checkpointLocation", checkpointDir)
+          .format("parquet")
+          .start(outputDir)
+
+
+      def addTimestamp(timestampInSecs: Int*): Unit = {
+        inputData.addData(timestampInSecs.map(_ * 1L): _*)
+        failAfter(streamingTimeout) {
+          query.processAllAvailable()
+        }
+      }
+
+      def check(expectedResult: ((Long, Long), Long)*): Unit = {
+        val outputDf = spark.read.parquet(outputDir)
+          .selectExpr(
+            "CAST(start as BIGINT) AS start",
+            "CAST(end as BIGINT) AS end",
+            "count")
+        checkDataset(
+          outputDf.as[(Long, Long, Long)],
+          expectedResult.map(x => (x._1._1, x._1._2, x._2)): _*)
+      }
+
+      addTimestamp(100) // watermark = None before this, watermark = 100 - 10 = 90 after this
+      check() // nothing emitted yet
+
+      addTimestamp(104, 123) // watermark = 90 before this, watermark = 123 - 10 = 113 after this
+      check() // nothing emitted yet
+
+      addTimestamp(140) // wm = 113 before this, emit results on 100-105, wm = 130 after this
+      check((100L, 105L) -> 2L)
+
+      addTimestamp(150) // wm = 130s before this, emit results on 120-125, wm = 150 after this
+      check((100L, 105L) -> 2L, (120L, 125L) -> 1L)
+
+    } finally {
+      if (query != null) {
+        query.stop()
+      }
+    }
+  }
+
+  test("parquet") {
     testFormat(None) // should not throw error as default format parquet when not specified
     testFormat(Some("parquet"))
   }
 
-  test("FileStreamSink - text") {
+  test("text") {
     testFormat(Some("text"))
   }
 
-  test("FileStreamSink - json") {
+  test("json") {
     testFormat(Some("json"))
   }
 

From e469d3badffdf9d1cd8399a06d0bdb61781e76d4 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Tue, 15 Nov 2016 15:44:15 -0800
Subject: [PATCH 0117/1204] [SPARK-18423][STREAMING] ReceiverTracker should
 close checkpoint dir when stopped even if it was not started

## What changes were proposed in this pull request?

Several tests are being failed on Windows due to the failure of removing the checkpoint dir between each tests.

This is caused by not closed file in `ReceiverTracker`. When it is not started, it does not close it even if `stop()` is called.

```
Test org.apache.spark.streaming.JavaAPISuite.testCheckpointMasterRecovery started
Test org.apache.spark.streaming.JavaAPISuite.testCheckpointMasterRecovery failed: java.io.IOException: Failed to delete: C:\projects\spark\target\tmp\1478983663710-0, took 3.828 sec
    at org.apache.spark.util.Utils$.deleteRecursively(Utils.scala:1010)
    at org.apache.spark.util.Utils.deleteRecursively(Utils.scala)
    at org.apache.spark.streaming.JavaAPISuite.testCheckpointMasterRecovery(JavaAPISuite.java:1809)
    ...
```

```
- mapWithState - basic operations with simple API (7 seconds, 640 milliseconds)
Exception encountered when attempting to run a suite with class name: org.apache.spark.streaming.MapWithStateSuite *** ABORTED *** (12 seconds, 688 milliseconds)
  java.io.IOException: Failed to delete: C:\projects\spark\streaming\checkpoint\spark-b8486e2b-6468-4e6f-bb24-88277d2c033c
  ...
```

## How was this patch tested?

Tests in `JavaAPISuite` and `MapWithStateSuite`.

Manually tested via AppVeyor:

**Before**

- `org.apache.spark.streaming.JavaAPISuite`
  Build: https://ci.appveyor.com/project/spark-test/spark/build/71-MapWithStateSuite-1
  Diff: https://github.com/apache/spark/compare/master...spark-test:188c828e682ec45b75d15c3dfc782bcdc8ce024c

- `org.apache.spark.streaming.MapWithStateSuite`
  Build: https://ci.appveyor.com/project/spark-test/spark/build/72-MapWithStateSuite-1
  Diff: https://github.com/apache/spark/compare/master...spark-test:8f6945d0ccde022a23d3848f6b7fe6da1e7c902e

**After**

- `org.apache.spark.streaming.JavaAPISuite`
  Build started: [Streaming] `org.apache.spark.streaming.JavaAPISuite` [![PR-15867](https://ci.appveyor.com/api/projects/status/github/spark-test/spark?branch=3D74F2D5-B0D5-4E1D-874C-685AE694FD37&svg=true)](https://ci.appveyor.com/project/spark-test/spark/branch/3D74F2D5-B0D5-4E1D-874C-685AE694FD37)
  Diff: https://github.com/apache/spark/compare/master...spark-test:3D74F2D5-B0D5-4E1D-874C-685AE694FD37

- `org.apache.spark.streaming.MapWithStateSuite`
  Build started: [Streaming] `org.apache.spark.streaming.MapWithStateSuite` [![PR-15867](https://ci.appveyor.com/api/projects/status/github/spark-test/spark?branch=C8E88B64-49F0-4157-9AFA-FC3ACC442351&svg=true)](https://ci.appveyor.com/project/spark-test/spark/branch/C8E88B64-49F0-4157-9AFA-FC3ACC442351)
  Diff: https://github.com/apache/spark/compare/master...spark-test:C8E88B64-49F0-4157-9AFA-FC3ACC442351

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #15867 from HyukjinKwon/SPARK-18423.

(cherry picked from commit 503378f10ca92064034aa88e0feebe4718af8bbe)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../spark/streaming/scheduler/ReceiverTracker.scala    | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala
index b9d898a72362e..8f55d982a904c 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala
@@ -197,6 +197,13 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
       receivedBlockTracker.stop()
       logInfo("ReceiverTracker stopped")
       trackerState = Stopped
+    } else if (isTrackerInitialized) {
+      trackerState = Stopping
+      // `ReceivedBlockTracker` is open when this instance is created. We should
+      // close this even if this `ReceiverTracker` is not started.
+      receivedBlockTracker.stop()
+      logInfo("ReceiverTracker stopped")
+      trackerState = Stopped
     }
   }
 
@@ -446,6 +453,9 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
     endpoint.send(StartAllReceivers(receivers))
   }
 
+  /** Check if tracker has been marked for initiated */
+  private def isTrackerInitialized: Boolean = trackerState == Initialized
+
   /** Check if tracker has been marked for starting */
   private def isTrackerStarted: Boolean = trackerState == Started
 

From 1126c3194ee1c79015cf1d3808bc963aa93dcadf Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Tue, 15 Nov 2016 15:59:04 -0800
Subject: [PATCH 0118/1204] [SPARK-17732][SQL] ALTER TABLE DROP PARTITION
 should support comparators

## What changes were proposed in this pull request?

This PR aims to support `comparators`, e.g. '<', '<=', '>', '>=', again in Apache Spark 2.0 for backward compatibility.

**Spark 1.6**

``` scala
scala> sql("CREATE TABLE sales(id INT) PARTITIONED BY (country STRING, quarter STRING)")
res0: org.apache.spark.sql.DataFrame = [result: string]

scala> sql("ALTER TABLE sales DROP PARTITION (country < 'KR')")
res1: org.apache.spark.sql.DataFrame = [result: string]
```

**Spark 2.0**

``` scala
scala> sql("CREATE TABLE sales(id INT) PARTITIONED BY (country STRING, quarter STRING)")
res0: org.apache.spark.sql.DataFrame = []

scala> sql("ALTER TABLE sales DROP PARTITION (country < 'KR')")
org.apache.spark.sql.catalyst.parser.ParseException:
mismatched input '<' expecting {')', ','}(line 1, pos 42)
```

After this PR, it's supported.

## How was this patch tested?

Pass the Jenkins test with a newly added testcase.

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #15704 from dongjoon-hyun/SPARK-17732-2.
---
 .../spark/sql/catalyst/parser/SqlBase.g4      |   6 +-
 .../sql/catalyst/parser/AstBuilder.scala      |  30 ++++-
 .../spark/sql/execution/SparkSqlParser.scala  |   2 +-
 .../spark/sql/execution/command/ddl.scala     |  51 +++++++--
 .../datasources/DataSourceStrategy.scala      |   8 +-
 .../execution/command/DDLCommandSuite.scala   |   9 +-
 .../sql/hive/execution/HiveDDLSuite.scala     | 103 ++++++++++++++++++
 7 files changed, 185 insertions(+), 24 deletions(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index b599a884957a8..fcca11c69f0a3 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -239,11 +239,7 @@ partitionSpecLocation
     ;
 
 partitionSpec
-    : PARTITION '(' partitionVal (',' partitionVal)* ')'
-    ;
-
-partitionVal
-    : identifier (EQ constant)?
+    : PARTITION '(' expression (',' expression)* ')'
     ;
 
 describeFuncName
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 2006844923cf7..97056bba9d763 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -194,10 +194,15 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
    */
   override def visitPartitionSpec(
       ctx: PartitionSpecContext): Map[String, Option[String]] = withOrigin(ctx) {
-    val parts = ctx.partitionVal.asScala.map { pVal =>
-      val name = pVal.identifier.getText
-      val value = Option(pVal.constant).map(visitStringConstant)
-      name -> value
+    val parts = ctx.expression.asScala.map { pVal =>
+      expression(pVal) match {
+        case UnresolvedAttribute(name :: Nil) =>
+          name -> None
+        case cmp @ EqualTo(UnresolvedAttribute(name :: Nil), constant: Literal) =>
+          name -> Option(constant.toString)
+        case _ =>
+          throw new ParseException("Invalid partition filter specification", ctx)
+      }
     }
     // Before calling `toMap`, we check duplicated keys to avoid silently ignore partition values
     // in partition spec like PARTITION(a='1', b='2', a='3'). The real semantical check for
@@ -206,6 +211,23 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
     parts.toMap
   }
 
+  /**
+   * Create a partition filter specification.
+   */
+  def visitPartitionFilterSpec(ctx: PartitionSpecContext): Expression = withOrigin(ctx) {
+    val parts = ctx.expression.asScala.map { pVal =>
+      expression(pVal) match {
+        case EqualNullSafe(_, _) =>
+          throw new ParseException("'<=>' operator is not allowed in partition specification.", ctx)
+        case cmp @ BinaryComparison(UnresolvedAttribute(name :: Nil), constant: Literal) =>
+          cmp.withNewChildren(Seq(AttributeReference(name, StringType)(), constant))
+        case _ =>
+          throw new ParseException("Invalid partition filter specification", ctx)
+      }
+    }
+    parts.reduceLeft(And)
+  }
+
   /**
    * Create a partition specification map without optional values.
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index b8be3d17ba444..112d812cb6c76 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -813,7 +813,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
     }
     AlterTableDropPartitionCommand(
       visitTableIdentifier(ctx.tableIdentifier),
-      ctx.partitionSpec.asScala.map(visitNonOptionalPartitionSpec),
+      ctx.partitionSpec.asScala.map(visitPartitionFilterSpec),
       ctx.EXISTS != null,
       ctx.PURGE != null)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 84a63fdb9f36f..6c1c398940d0f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -31,7 +31,8 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.Resolver
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, BinaryComparison}
+import org.apache.spark.sql.catalyst.expressions.{EqualTo, Expression, PredicateHelper}
 import org.apache.spark.sql.execution.datasources.{CaseInsensitiveMap, PartitioningUtils}
 import org.apache.spark.sql.types._
 import org.apache.spark.util.SerializableConfiguration
@@ -418,27 +419,55 @@ case class AlterTableRenamePartitionCommand(
  */
 case class AlterTableDropPartitionCommand(
     tableName: TableIdentifier,
-    specs: Seq[TablePartitionSpec],
+    specs: Seq[Expression],
     ifExists: Boolean,
     purge: Boolean)
-  extends RunnableCommand {
+  extends RunnableCommand with PredicateHelper {
+
+  private def isRangeComparison(expr: Expression): Boolean = {
+    expr.find(e => e.isInstanceOf[BinaryComparison] && !e.isInstanceOf[EqualTo]).isDefined
+  }
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
     val catalog = sparkSession.sessionState.catalog
     val table = catalog.getTableMetadata(tableName)
+    val resolver = sparkSession.sessionState.conf.resolver
     DDLUtils.verifyAlterTableType(catalog, table, isView = false)
     DDLUtils.verifyPartitionProviderIsHive(sparkSession, table, "ALTER TABLE DROP PARTITION")
 
-    val normalizedSpecs = specs.map { spec =>
-      PartitioningUtils.normalizePartitionSpec(
-        spec,
-        table.partitionColumnNames,
-        table.identifier.quotedString,
-        sparkSession.sessionState.conf.resolver)
+    specs.foreach { expr =>
+      expr.references.foreach { attr =>
+        if (!table.partitionColumnNames.exists(resolver(_, attr.name))) {
+          throw new AnalysisException(s"${attr.name} is not a valid partition column " +
+            s"in table ${table.identifier.quotedString}.")
+        }
+      }
     }
 
-    catalog.dropPartitions(
-      table.identifier, normalizedSpecs, ignoreIfNotExists = ifExists, purge = purge)
+    if (specs.exists(isRangeComparison)) {
+      val partitionSet = specs.flatMap { spec =>
+        val partitions = catalog.listPartitionsByFilter(table.identifier, Seq(spec)).map(_.spec)
+        if (partitions.isEmpty && !ifExists) {
+          throw new AnalysisException(s"There is no partition for ${spec.sql}")
+        }
+        partitions
+      }.distinct
+      catalog.dropPartitions(
+        table.identifier, partitionSet, ignoreIfNotExists = ifExists, purge = purge)
+    } else {
+      val normalizedSpecs = specs.map { expr =>
+        val spec = splitConjunctivePredicates(expr).map {
+          case BinaryComparison(AttributeReference(name, _, _, _), right) => name -> right.toString
+        }.toMap
+        PartitioningUtils.normalizePartitionSpec(
+          spec,
+          table.partitionColumnNames,
+          table.identifier.quotedString,
+          resolver)
+      }
+      catalog.dropPartitions(
+        table.identifier, normalizedSpecs, ignoreIfNotExists = ifExists, purge = purge)
+    }
     Seq.empty[Row]
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index 4f19a2d00b0e4..e81512d1abf84 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -215,8 +215,14 @@ case class DataSourceAnalysis(conf: CatalystConf) extends Rule[LogicalPlan] {
           if (overwrite.enabled) {
             val deletedPartitions = initialMatchingPartitions.toSet -- updatedPartitions
             if (deletedPartitions.nonEmpty) {
+              import org.apache.spark.sql.catalyst.expressions._
+              val expressions = deletedPartitions.map { specs =>
+                specs.map { case (key, value) =>
+                  EqualTo(AttributeReference(key, StringType)(), Literal.create(value, StringType))
+                }.reduceLeft(And)
+              }.toSeq
               AlterTableDropPartitionCommand(
-                l.catalogTable.get.identifier, deletedPartitions.toSeq,
+                l.catalogTable.get.identifier, expressions,
                 ifExists = true, purge = true).run(t.sparkSession)
             }
           }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
index d31e7aeb3a78a..057528bef5084 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
@@ -21,6 +21,7 @@ import scala.reflect.{classTag, ClassTag}
 
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog._
+import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, EqualTo, Literal}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.Project
@@ -612,8 +613,12 @@ class DDLCommandSuite extends PlanTest {
     val expected1_table = AlterTableDropPartitionCommand(
       tableIdent,
       Seq(
-        Map("dt" -> "2008-08-08", "country" -> "us"),
-        Map("dt" -> "2009-09-09", "country" -> "uk")),
+        And(
+          EqualTo(AttributeReference("dt", StringType)(), Literal.create("2008-08-08", StringType)),
+          EqualTo(AttributeReference("country", StringType)(), Literal.create("us", StringType))),
+        And(
+          EqualTo(AttributeReference("dt", StringType)(), Literal.create("2009-09-09", StringType)),
+          EqualTo(AttributeReference("country", StringType)(), Literal.create("uk", StringType)))),
       ifExists = true,
       purge = false)
     val expected2_table = expected1_table.copy(ifExists = false)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 6efae13ddf69d..a2b04863d39b7 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode}
 import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogTable, CatalogTableType}
 import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.hive.HiveExternalCatalog
 import org.apache.spark.sql.hive.test.TestHiveSingleton
@@ -225,6 +226,108 @@ class HiveDDLSuite
     }
   }
 
+  test("SPARK-17732: Drop partitions by filter") {
+    withTable("sales") {
+      sql("CREATE TABLE sales(id INT) PARTITIONED BY (country STRING, quarter STRING)")
+
+      for (country <- Seq("US", "CA", "KR")) {
+        for (quarter <- 1 to 4) {
+          sql(s"ALTER TABLE sales ADD PARTITION (country = '$country', quarter = '$quarter')")
+        }
+      }
+
+      sql("ALTER TABLE sales DROP PARTITION (country < 'KR', quarter > '2')")
+      checkAnswer(sql("SHOW PARTITIONS sales"),
+        Row("country=CA/quarter=1") ::
+        Row("country=CA/quarter=2") ::
+        Row("country=KR/quarter=1") ::
+        Row("country=KR/quarter=2") ::
+        Row("country=KR/quarter=3") ::
+        Row("country=KR/quarter=4") ::
+        Row("country=US/quarter=1") ::
+        Row("country=US/quarter=2") ::
+        Row("country=US/quarter=3") ::
+        Row("country=US/quarter=4") :: Nil)
+
+      sql("ALTER TABLE sales DROP PARTITION (country < 'KR'), PARTITION (quarter <= '1')")
+      checkAnswer(sql("SHOW PARTITIONS sales"),
+        Row("country=KR/quarter=2") ::
+        Row("country=KR/quarter=3") ::
+        Row("country=KR/quarter=4") ::
+        Row("country=US/quarter=2") ::
+        Row("country=US/quarter=3") ::
+        Row("country=US/quarter=4") :: Nil)
+
+      sql("ALTER TABLE sales DROP PARTITION (country='KR', quarter='4')")
+      sql("ALTER TABLE sales DROP PARTITION (country='US', quarter='3')")
+      checkAnswer(sql("SHOW PARTITIONS sales"),
+        Row("country=KR/quarter=2") ::
+        Row("country=KR/quarter=3") ::
+        Row("country=US/quarter=2") ::
+        Row("country=US/quarter=4") :: Nil)
+
+      sql("ALTER TABLE sales DROP PARTITION (quarter <= 2), PARTITION (quarter >= '4')")
+      checkAnswer(sql("SHOW PARTITIONS sales"),
+        Row("country=KR/quarter=3") :: Nil)
+
+      // According to the declarative partition spec definitions, this drops the union of target
+      // partitions without exceptions. Hive raises exceptions because it handles them sequentially.
+      sql("ALTER TABLE sales DROP PARTITION (quarter <= 4), PARTITION (quarter <= '3')")
+      checkAnswer(sql("SHOW PARTITIONS sales"), Nil)
+    }
+  }
+
+  test("SPARK-17732: Error handling for drop partitions by filter") {
+    withTable("sales") {
+      sql("CREATE TABLE sales(id INT) PARTITIONED BY (country STRING, quarter STRING)")
+
+      val m = intercept[AnalysisException] {
+        sql("ALTER TABLE sales DROP PARTITION (unknown = 'KR')")
+      }.getMessage
+      assert(m.contains("unknown is not a valid partition column in table"))
+
+      val m2 = intercept[AnalysisException] {
+        sql("ALTER TABLE sales DROP PARTITION (unknown < 'KR')")
+      }.getMessage
+      assert(m2.contains("unknown is not a valid partition column in table"))
+
+      val m3 = intercept[AnalysisException] {
+        sql("ALTER TABLE sales DROP PARTITION (unknown <=> 'KR')")
+      }.getMessage
+      assert(m3.contains("'<=>' operator is not allowed in partition specification"))
+
+      val m4 = intercept[ParseException] {
+        sql("ALTER TABLE sales DROP PARTITION (unknown <=> upper('KR'))")
+      }.getMessage
+      assert(m4.contains("'<=>' operator is not allowed in partition specification"))
+
+      val m5 = intercept[ParseException] {
+        sql("ALTER TABLE sales DROP PARTITION (country < 'KR', quarter)")
+      }.getMessage
+      assert(m5.contains("Invalid partition filter specification"))
+
+      sql(s"ALTER TABLE sales ADD PARTITION (country = 'KR', quarter = '3')")
+      val m6 = intercept[AnalysisException] {
+        sql("ALTER TABLE sales DROP PARTITION (quarter <= '4'), PARTITION (quarter <= '2')")
+      }.getMessage
+      // The query is not executed because `PARTITION (quarter <= '2')` is invalid.
+      checkAnswer(sql("SHOW PARTITIONS sales"),
+        Row("country=KR/quarter=3") :: Nil)
+      assert(m6.contains("There is no partition for (`quarter` <= '2')"))
+    }
+  }
+
+  test("SPARK-17732: Partition filter is not allowed in ADD PARTITION") {
+    withTable("sales") {
+      sql("CREATE TABLE sales(id INT) PARTITIONED BY (country STRING, quarter STRING)")
+
+      val m = intercept[ParseException] {
+        sql("ALTER TABLE sales ADD PARTITION (country = 'US', quarter < '1')")
+      }.getMessage()
+      assert(m.contains("Invalid partition filter specification"))
+    }
+  }
+
   test("drop views") {
     withTable("tab1") {
       val tabName = "tab1"

From 175c47864b893d924166b6eb17c52042611eeb97 Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@databricks.com>
Date: Tue, 15 Nov 2016 16:55:02 -0800
Subject: [PATCH 0119/1204] [SPARK-18300][SQL] Fix scala 2.10 build for
 FoldablePropagation

## What changes were proposed in this pull request?
Commit https://github.com/apache/spark/commit/f14ae4900ad0ed66ba36108b7792d56cd6767a69 broke the scala 2.10 build. This PR fixes this by simplifying the used pattern match.

## How was this patch tested?
Tested building manually. Ran `build/sbt -Dscala-2.10 -Pscala-2.10 package`.

Author: Herman van Hovell <hvanhovell@databricks.com>

Closes #15891 from hvanhovell/SPARK-18300-scala-2.10.

(cherry picked from commit 4b35d13baca189a50cdaa2ba435d10a1f953e3f8)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../sql/catalyst/optimizer/expressions.scala  | 33 +++++++++++++++----
 1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 3a7004ef297f6..6958398e03f70 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -442,12 +442,9 @@ object FoldablePropagation extends Rule[LogicalPlan] {
         case l: LeafNode =>
           l
 
-        // Whitelist of all nodes we are allowed to apply this rule to.
-        case p @ (_: Project | _: Filter | _: SubqueryAlias | _: Aggregate | _: Window |
-                  _: Sample | _: GlobalLimit | _: LocalLimit | _: Generate | _: Distinct |
-                  _: AppendColumns | _: AppendColumnsWithObject | _: BroadcastHint |
-                  _: RedistributeData | _: Repartition | _: Sort | _: TypedFilter) if !stop =>
-          p.transformExpressions(replaceFoldable)
+        // We can only propagate foldables for a subset of unary nodes.
+        case u: UnaryNode if !stop && canPropagateFoldables(u) =>
+          u.transformExpressions(replaceFoldable)
 
         // Allow inner joins. We do not allow outer join, although its output attributes are
         // derived from its children, they are actually different attributes: the output of outer
@@ -474,6 +471,30 @@ object FoldablePropagation extends Rule[LogicalPlan] {
       })
     }
   }
+
+  /**
+   * Whitelist of all [[UnaryNode]]s for which allow foldable propagation.
+   */
+  private def canPropagateFoldables(u: UnaryNode): Boolean = u match {
+    case _: Project => true
+    case _: Filter => true
+    case _: SubqueryAlias => true
+    case _: Aggregate => true
+    case _: Window => true
+    case _: Sample => true
+    case _: GlobalLimit => true
+    case _: LocalLimit => true
+    case _: Generate => true
+    case _: Distinct => true
+    case _: AppendColumns => true
+    case _: AppendColumnsWithObject => true
+    case _: BroadcastHint => true
+    case _: RedistributeData => true
+    case _: Repartition => true
+    case _: Sort => true
+    case _: TypedFilter => true
+    case _ => false
+  }
 }
 
 

From 436ae201f825c02b9720805ada8c0dca496a1ac5 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 15 Nov 2016 20:24:36 -0800
Subject: [PATCH 0120/1204] [SPARK-18377][SQL] warehouse path should be a
 static conf

## What changes were proposed in this pull request?

it's weird that every session can set its own warehouse path at runtime, we should forbid it and make it a static conf.

## How was this patch tested?

existing tests.

Author: Wenchen Fan <wenchen@databricks.com>

Closes #15825 from cloud-fan/warehouse.

(cherry picked from commit 4ac9759f807d217b6f67badc6d5f6b7138eb92d2)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../sql/catalyst/catalog/SessionCatalog.scala |   9 +-
 .../apache/spark/sql/internal/SQLConf.scala   |  12 +-
 .../spark/sql/internal/SharedState.scala      |  32 +--
 .../sql/execution/command/DDLSuite.scala      | 193 +++++++-----------
 .../spark/sql/internal/SQLConfSuite.scala     |  16 +-
 .../org/apache/spark/sql/hive/HiveUtils.scala |   4 +-
 .../sql/hive/execution/HiveDDLSuite.scala     |  85 ++++----
 7 files changed, 142 insertions(+), 209 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index c8b61d8df3585..19a8fcdd8b75b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -83,14 +83,7 @@ class SessionCatalog(
   // check whether the temporary table or function exists, then, if not, operate on
   // the corresponding item in the current database.
   @GuardedBy("this")
-  protected var currentDb = {
-    val defaultName = DEFAULT_DATABASE
-    val defaultDbDefinition =
-      CatalogDatabase(defaultName, "default database", conf.warehousePath, Map())
-    // Initialize default database if it doesn't already exist
-    createDatabase(defaultDbDefinition, ignoreIfExists = true)
-    formatDatabaseName(defaultName)
-  }
+  protected var currentDb = formatDatabaseName(DEFAULT_DATABASE)
 
   /**
    * Format table name, taking into account case sensitivity.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 7b8ed65054c3c..7cca9dba2962a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -56,11 +56,6 @@ object SQLConf {
 
   }
 
-  val WAREHOUSE_PATH = SQLConfigBuilder("spark.sql.warehouse.dir")
-    .doc("The default location for managed databases and tables.")
-    .stringConf
-    .createWithDefault(Utils.resolveURI("spark-warehouse").toString)
-
   val OPTIMIZER_MAX_ITERATIONS = SQLConfigBuilder("spark.sql.optimizer.maxIterations")
     .internal()
     .doc("The max number of iterations the optimizer and analyzer runs.")
@@ -773,7 +768,7 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging {
 
   def variableSubstituteDepth: Int = getConf(VARIABLE_SUBSTITUTE_DEPTH)
 
-  def warehousePath: String = new Path(getConf(WAREHOUSE_PATH)).toString
+  def warehousePath: String = new Path(getConf(StaticSQLConf.WAREHOUSE_PATH)).toString
 
   def ignoreCorruptFiles: Boolean = getConf(IGNORE_CORRUPT_FILES)
 
@@ -918,6 +913,11 @@ object StaticSQLConf {
     }
   }
 
+  val WAREHOUSE_PATH = buildConf("spark.sql.warehouse.dir")
+    .doc("The default location for managed databases and tables.")
+    .stringConf
+    .createWithDefault(Utils.resolveURI("spark-warehouse").toString)
+
   val CATALOG_IMPLEMENTATION = buildConf("spark.sql.catalogImplementation")
     .internal()
     .stringConf
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index c6083b372a2db..6232c18b1cea8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -23,10 +23,9 @@ import scala.util.control.NonFatal
 import org.apache.hadoop.conf.Configuration
 
 import org.apache.spark.{SparkConf, SparkContext, SparkException}
-import org.apache.spark.internal.config._
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{SparkSession, SQLContext}
-import org.apache.spark.sql.catalyst.catalog.{ExternalCatalog, GlobalTempViewManager, InMemoryCatalog}
+import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.execution.CacheManager
 import org.apache.spark.sql.execution.ui.{SQLListener, SQLTab}
 import org.apache.spark.sql.internal.StaticSQLConf._
@@ -40,34 +39,35 @@ private[sql] class SharedState(val sparkContext: SparkContext) extends Logging {
 
   // Load hive-site.xml into hadoopConf and determine the warehouse path we want to use, based on
   // the config from both hive and Spark SQL. Finally set the warehouse config value to sparkConf.
-  {
+  val warehousePath = {
     val configFile = Utils.getContextOrSparkClassLoader.getResource("hive-site.xml")
     if (configFile != null) {
       sparkContext.hadoopConfiguration.addResource(configFile)
     }
 
     // Set the Hive metastore warehouse path to the one we use
-    val tempConf = new SQLConf
-    sparkContext.conf.getAll.foreach { case (k, v) => tempConf.setConfString(k, v) }
     val hiveWarehouseDir = sparkContext.hadoopConfiguration.get("hive.metastore.warehouse.dir")
-    if (hiveWarehouseDir != null && !tempConf.contains(SQLConf.WAREHOUSE_PATH.key)) {
+    if (hiveWarehouseDir != null && !sparkContext.conf.contains(WAREHOUSE_PATH.key)) {
       // If hive.metastore.warehouse.dir is set and spark.sql.warehouse.dir is not set,
       // we will respect the value of hive.metastore.warehouse.dir.
-      tempConf.setConfString(SQLConf.WAREHOUSE_PATH.key, hiveWarehouseDir)
-      sparkContext.conf.set(SQLConf.WAREHOUSE_PATH.key, hiveWarehouseDir)
-      logInfo(s"${SQLConf.WAREHOUSE_PATH.key} is not set, but hive.metastore.warehouse.dir " +
-        s"is set. Setting ${SQLConf.WAREHOUSE_PATH.key} to the value of " +
+      sparkContext.conf.set(WAREHOUSE_PATH.key, hiveWarehouseDir)
+      logInfo(s"${WAREHOUSE_PATH.key} is not set, but hive.metastore.warehouse.dir " +
+        s"is set. Setting ${WAREHOUSE_PATH.key} to the value of " +
         s"hive.metastore.warehouse.dir ('$hiveWarehouseDir').")
+      hiveWarehouseDir
     } else {
       // If spark.sql.warehouse.dir is set, we will override hive.metastore.warehouse.dir using
       // the value of spark.sql.warehouse.dir.
       // When neither spark.sql.warehouse.dir nor hive.metastore.warehouse.dir is set,
       // we will set hive.metastore.warehouse.dir to the default value of spark.sql.warehouse.dir.
-      sparkContext.conf.set("hive.metastore.warehouse.dir", tempConf.warehousePath)
+      val sparkWarehouseDir = sparkContext.conf.get(WAREHOUSE_PATH)
+      sparkContext.conf.set("hive.metastore.warehouse.dir", sparkWarehouseDir)
+      sparkWarehouseDir
     }
 
-    logInfo(s"Warehouse path is '${tempConf.warehousePath}'.")
   }
+  logInfo(s"Warehouse path is '$warehousePath'.")
+
 
   /**
    * Class for caching query results reused in future executions.
@@ -88,6 +88,14 @@ private[sql] class SharedState(val sparkContext: SparkContext) extends Logging {
       sparkContext.conf,
       sparkContext.hadoopConfiguration)
 
+  // Create the default database if it doesn't exist.
+  {
+    val defaultDbDefinition = CatalogDatabase(
+      SessionCatalog.DEFAULT_DATABASE, "default database", warehousePath, Map())
+    // Initialize default database if it doesn't already exist
+    externalCatalog.createDatabase(defaultDbDefinition, ignoreIfExists = true)
+  }
+
   /**
    * A manager for global temporary views.
    */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 363715c6d2249..a01073987423e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -125,17 +125,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
       assert("file" === pathInCatalog.getScheme)
       val expectedPath = new Path(path).toUri
       assert(expectedPath.getPath === pathInCatalog.getPath)
-
-      withSQLConf(SQLConf.WAREHOUSE_PATH.key -> path) {
-        sql(s"CREATE DATABASE db2")
-        val pathInCatalog2 = new Path(catalog.getDatabaseMetadata("db2").locationUri).toUri
-        assert("file" === pathInCatalog2.getScheme)
-        val expectedPath2 = new Path(spark.sessionState.conf.warehousePath + "/" + "db2.db").toUri
-        assert(expectedPath2.getPath === pathInCatalog2.getPath)
-      }
-
       sql("DROP DATABASE db1")
-      sql("DROP DATABASE db2")
     }
   }
 
@@ -146,55 +136,22 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
     fs.makeQualified(hadoopPath).toString
   }
 
-  test("Create/Drop Database") {
-    withTempDir { tmpDir =>
-      val path = tmpDir.getCanonicalPath
-      withSQLConf(SQLConf.WAREHOUSE_PATH.key -> path) {
-        val catalog = spark.sessionState.catalog
-        val databaseNames = Seq("db1", "`database`")
-
-        databaseNames.foreach { dbName =>
-          try {
-            val dbNameWithoutBackTicks = cleanIdentifier(dbName)
-
-            sql(s"CREATE DATABASE $dbName")
-            val db1 = catalog.getDatabaseMetadata(dbNameWithoutBackTicks)
-            val expectedLocation = makeQualifiedPath(s"$path/$dbNameWithoutBackTicks.db")
-            assert(db1 == CatalogDatabase(
-              dbNameWithoutBackTicks,
-              "",
-              expectedLocation,
-              Map.empty))
-            sql(s"DROP DATABASE $dbName CASCADE")
-            assert(!catalog.databaseExists(dbNameWithoutBackTicks))
-          } finally {
-            catalog.reset()
-          }
-        }
-      }
-    }
-  }
-
   test("Create Database using Default Warehouse Path") {
-    withSQLConf(SQLConf.WAREHOUSE_PATH.key -> "") {
-      // Will use the default location if and only if we unset the conf
-      spark.conf.unset(SQLConf.WAREHOUSE_PATH.key)
-      val catalog = spark.sessionState.catalog
-      val dbName = "db1"
-      try {
-        sql(s"CREATE DATABASE $dbName")
-        val db1 = catalog.getDatabaseMetadata(dbName)
-        val expectedLocation = makeQualifiedPath(s"spark-warehouse/$dbName.db")
-        assert(db1 == CatalogDatabase(
-          dbName,
-          "",
-          expectedLocation,
-          Map.empty))
-        sql(s"DROP DATABASE $dbName CASCADE")
-        assert(!catalog.databaseExists(dbName))
-      } finally {
-        catalog.reset()
-      }
+    val catalog = spark.sessionState.catalog
+    val dbName = "db1"
+    try {
+      sql(s"CREATE DATABASE $dbName")
+      val db1 = catalog.getDatabaseMetadata(dbName)
+      val expectedLocation = makeQualifiedPath(s"spark-warehouse/$dbName.db")
+      assert(db1 == CatalogDatabase(
+        dbName,
+        "",
+        expectedLocation,
+        Map.empty))
+      sql(s"DROP DATABASE $dbName CASCADE")
+      assert(!catalog.databaseExists(dbName))
+    } finally {
+      catalog.reset()
     }
   }
 
@@ -224,31 +181,26 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
   }
 
   test("Create Database - database already exists") {
-    withTempDir { tmpDir =>
-      val path = tmpDir.getCanonicalPath
-      withSQLConf(SQLConf.WAREHOUSE_PATH.key -> path) {
-        val catalog = spark.sessionState.catalog
-        val databaseNames = Seq("db1", "`database`")
-
-        databaseNames.foreach { dbName =>
-          try {
-            val dbNameWithoutBackTicks = cleanIdentifier(dbName)
-            sql(s"CREATE DATABASE $dbName")
-            val db1 = catalog.getDatabaseMetadata(dbNameWithoutBackTicks)
-            val expectedLocation = makeQualifiedPath(s"$path/$dbNameWithoutBackTicks.db")
-            assert(db1 == CatalogDatabase(
-              dbNameWithoutBackTicks,
-              "",
-              expectedLocation,
-              Map.empty))
-
-            intercept[DatabaseAlreadyExistsException] {
-              sql(s"CREATE DATABASE $dbName")
-            }
-          } finally {
-            catalog.reset()
-          }
+    val catalog = spark.sessionState.catalog
+    val databaseNames = Seq("db1", "`database`")
+
+    databaseNames.foreach { dbName =>
+      try {
+        val dbNameWithoutBackTicks = cleanIdentifier(dbName)
+        sql(s"CREATE DATABASE $dbName")
+        val db1 = catalog.getDatabaseMetadata(dbNameWithoutBackTicks)
+        val expectedLocation = makeQualifiedPath(s"spark-warehouse/$dbNameWithoutBackTicks.db")
+        assert(db1 == CatalogDatabase(
+          dbNameWithoutBackTicks,
+          "",
+          expectedLocation,
+          Map.empty))
+
+        intercept[DatabaseAlreadyExistsException] {
+          sql(s"CREATE DATABASE $dbName")
         }
+      } finally {
+        catalog.reset()
       }
     }
   }
@@ -473,47 +425,42 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
   }
 
   test("Alter/Describe Database") {
-    withTempDir { tmpDir =>
-      val path = tmpDir.getCanonicalPath
-      withSQLConf(SQLConf.WAREHOUSE_PATH.key -> path) {
-        val catalog = spark.sessionState.catalog
-        val databaseNames = Seq("db1", "`database`")
-
-        databaseNames.foreach { dbName =>
-          try {
-            val dbNameWithoutBackTicks = cleanIdentifier(dbName)
-            val location = makeQualifiedPath(s"$path/$dbNameWithoutBackTicks.db")
-
-            sql(s"CREATE DATABASE $dbName")
-
-            checkAnswer(
-              sql(s"DESCRIBE DATABASE EXTENDED $dbName"),
-              Row("Database Name", dbNameWithoutBackTicks) ::
-                Row("Description", "") ::
-                Row("Location", location) ::
-                Row("Properties", "") :: Nil)
-
-            sql(s"ALTER DATABASE $dbName SET DBPROPERTIES ('a'='a', 'b'='b', 'c'='c')")
-
-            checkAnswer(
-              sql(s"DESCRIBE DATABASE EXTENDED $dbName"),
-              Row("Database Name", dbNameWithoutBackTicks) ::
-                Row("Description", "") ::
-                Row("Location", location) ::
-                Row("Properties", "((a,a), (b,b), (c,c))") :: Nil)
-
-            sql(s"ALTER DATABASE $dbName SET DBPROPERTIES ('d'='d')")
-
-            checkAnswer(
-              sql(s"DESCRIBE DATABASE EXTENDED $dbName"),
-              Row("Database Name", dbNameWithoutBackTicks) ::
-                Row("Description", "") ::
-                Row("Location", location) ::
-                Row("Properties", "((a,a), (b,b), (c,c), (d,d))") :: Nil)
-          } finally {
-            catalog.reset()
-          }
-        }
+    val catalog = spark.sessionState.catalog
+    val databaseNames = Seq("db1", "`database`")
+
+    databaseNames.foreach { dbName =>
+      try {
+        val dbNameWithoutBackTicks = cleanIdentifier(dbName)
+        val location = makeQualifiedPath(s"spark-warehouse/$dbNameWithoutBackTicks.db")
+
+        sql(s"CREATE DATABASE $dbName")
+
+        checkAnswer(
+          sql(s"DESCRIBE DATABASE EXTENDED $dbName"),
+          Row("Database Name", dbNameWithoutBackTicks) ::
+            Row("Description", "") ::
+            Row("Location", location) ::
+            Row("Properties", "") :: Nil)
+
+        sql(s"ALTER DATABASE $dbName SET DBPROPERTIES ('a'='a', 'b'='b', 'c'='c')")
+
+        checkAnswer(
+          sql(s"DESCRIBE DATABASE EXTENDED $dbName"),
+          Row("Database Name", dbNameWithoutBackTicks) ::
+            Row("Description", "") ::
+            Row("Location", location) ::
+            Row("Properties", "((a,a), (b,b), (c,c))") :: Nil)
+
+        sql(s"ALTER DATABASE $dbName SET DBPROPERTIES ('d'='d')")
+
+        checkAnswer(
+          sql(s"DESCRIBE DATABASE EXTENDED $dbName"),
+          Row("Database Name", dbNameWithoutBackTicks) ::
+            Row("Description", "") ::
+            Row("Location", location) ::
+            Row("Properties", "((a,a), (b,b), (c,c), (d,d))") :: Nil)
+      } finally {
+        catalog.reset()
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
index 11d4693f1c2a3..a283ff971adcd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
@@ -215,18 +215,10 @@ class SQLConfSuite extends QueryTest with SharedSQLContext {
   }
 
   test("default value of WAREHOUSE_PATH") {
-
-    val original = spark.conf.get(SQLConf.WAREHOUSE_PATH)
-    try {
-      // to get the default value, always unset it
-      spark.conf.unset(SQLConf.WAREHOUSE_PATH.key)
-      // JVM adds a trailing slash if the directory exists and leaves it as-is, if it doesn't
-      // In our comparison, strip trailing slash off of both sides, to account for such cases
-      assert(new Path(Utils.resolveURI("spark-warehouse")).toString.stripSuffix("/") === spark
-        .sessionState.conf.warehousePath.stripSuffix("/"))
-    } finally {
-      sql(s"set ${SQLConf.WAREHOUSE_PATH}=$original")
-    }
+    // JVM adds a trailing slash if the directory exists and leaves it as-is, if it doesn't
+    // In our comparison, strip trailing slash off of both sides, to account for such cases
+    assert(new Path(Utils.resolveURI("spark-warehouse")).toString.stripSuffix("/") === spark
+      .sessionState.conf.warehousePath.stripSuffix("/"))
   }
 
   test("MAX_CASES_BRANCHES") {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
index a5ef8723c8b6f..81cd65c3cc337 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
@@ -39,7 +39,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.hive.client._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf._
-import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
+import org.apache.spark.sql.internal.StaticSQLConf.{CATALOG_IMPLEMENTATION, WAREHOUSE_PATH}
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
@@ -373,7 +373,7 @@ private[spark] object HiveUtils extends Logging {
         propMap.put(confvar.varname, confvar.getDefaultExpr())
       }
     }
-    propMap.put(SQLConf.WAREHOUSE_PATH.key, localMetastore.toURI.toString)
+    propMap.put(WAREHOUSE_PATH.key, localMetastore.toURI.toString)
     propMap.put(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname,
       s"jdbc:derby:${withInMemoryMode};databaseName=${localMetastore.getAbsolutePath};create=true")
     propMap.put("datanucleus.rdbms.datastoreAdapterClassName",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index a2b04863d39b7..15e3927b755af 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -722,53 +722,46 @@ class HiveDDLSuite
   }
 
   private def dropDatabase(cascade: Boolean, tableExists: Boolean): Unit = {
-    withTempPath { tmpDir =>
-      val path = tmpDir.toString
-      withSQLConf(SQLConf.WAREHOUSE_PATH.key -> path) {
-        val dbName = "db1"
-        val fs = new Path(path).getFileSystem(spark.sessionState.newHadoopConf())
-        val dbPath = new Path(path)
-        // the database directory does not exist
-        assert(!fs.exists(dbPath))
-
-        sql(s"CREATE DATABASE $dbName")
-        val catalog = spark.sessionState.catalog
-        val expectedDBLocation = "file:" + appendTrailingSlash(dbPath.toString) + s"$dbName.db"
-        val db1 = catalog.getDatabaseMetadata(dbName)
-        assert(db1 == CatalogDatabase(
-          dbName,
-          "",
-          expectedDBLocation,
-          Map.empty))
-        // the database directory was created
-        assert(fs.exists(dbPath) && fs.isDirectory(dbPath))
-        sql(s"USE $dbName")
-
-        val tabName = "tab1"
-        assert(!tableDirectoryExists(TableIdentifier(tabName), Option(expectedDBLocation)))
-        sql(s"CREATE TABLE $tabName as SELECT 1")
-        assert(tableDirectoryExists(TableIdentifier(tabName), Option(expectedDBLocation)))
-
-        if (!tableExists) {
-          sql(s"DROP TABLE $tabName")
-          assert(!tableDirectoryExists(TableIdentifier(tabName), Option(expectedDBLocation)))
-        }
+    val dbName = "db1"
+    val dbPath = new Path(spark.sessionState.conf.warehousePath)
+    val fs = dbPath.getFileSystem(spark.sessionState.newHadoopConf())
 
-        sql(s"USE default")
-        val sqlDropDatabase = s"DROP DATABASE $dbName ${if (cascade) "CASCADE" else "RESTRICT"}"
-        if (tableExists && !cascade) {
-          val message = intercept[AnalysisException] {
-            sql(sqlDropDatabase)
-          }.getMessage
-          assert(message.contains(s"Database $dbName is not empty. One or more tables exist."))
-          // the database directory was not removed
-          assert(fs.exists(new Path(expectedDBLocation)))
-        } else {
-          sql(sqlDropDatabase)
-          // the database directory was removed and the inclusive table directories are also removed
-          assert(!fs.exists(new Path(expectedDBLocation)))
-        }
-      }
+    sql(s"CREATE DATABASE $dbName")
+    val catalog = spark.sessionState.catalog
+    val expectedDBLocation = "file:" + appendTrailingSlash(dbPath.toString) + s"$dbName.db"
+    val db1 = catalog.getDatabaseMetadata(dbName)
+    assert(db1 == CatalogDatabase(
+      dbName,
+      "",
+      expectedDBLocation,
+      Map.empty))
+    // the database directory was created
+    assert(fs.exists(dbPath) && fs.isDirectory(dbPath))
+    sql(s"USE $dbName")
+
+    val tabName = "tab1"
+    assert(!tableDirectoryExists(TableIdentifier(tabName), Option(expectedDBLocation)))
+    sql(s"CREATE TABLE $tabName as SELECT 1")
+    assert(tableDirectoryExists(TableIdentifier(tabName), Option(expectedDBLocation)))
+
+    if (!tableExists) {
+      sql(s"DROP TABLE $tabName")
+      assert(!tableDirectoryExists(TableIdentifier(tabName), Option(expectedDBLocation)))
+    }
+
+    sql(s"USE default")
+    val sqlDropDatabase = s"DROP DATABASE $dbName ${if (cascade) "CASCADE" else "RESTRICT"}"
+    if (tableExists && !cascade) {
+      val message = intercept[AnalysisException] {
+        sql(sqlDropDatabase)
+      }.getMessage
+      assert(message.contains(s"Database $dbName is not empty. One or more tables exist."))
+      // the database directory was not removed
+      assert(fs.exists(new Path(expectedDBLocation)))
+    } else {
+      sql(sqlDropDatabase)
+      // the database directory was removed and the inclusive table directories are also removed
+      assert(!fs.exists(new Path(expectedDBLocation)))
     }
   }
 

From 7b57e480d2f2c0695eb4036199cd0db52c6f2008 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Wed, 16 Nov 2016 01:04:18 -0800
Subject: [PATCH 0121/1204] [SPARK-18438][SPARKR][ML] spark.mlp should support
 RFormula.

## What changes were proposed in this pull request?
```spark.mlp``` should support ```RFormula``` like other ML algorithm wrappers.
BTW, I did some cleanup and improvement for ```spark.mlp```.

## How was this patch tested?
Unit tests.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #15883 from yanboliang/spark-18438.

(cherry picked from commit 95eb06bd7d0f7110ef62c8d1cb6337c72b10d99f)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 R/pkg/R/generics.R                            |  2 +-
 R/pkg/R/mllib.R                               | 30 +++++----
 R/pkg/inst/tests/testthat/test_mllib.R        | 63 +++++++++++++------
 ...ultilayerPerceptronClassifierWrapper.scala | 61 ++++++++++--------
 4 files changed, 96 insertions(+), 60 deletions(-)

diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 7653ca7bccec9..499c7b279ea9d 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1373,7 +1373,7 @@ setGeneric("spark.logit", function(data, formula, ...) { standardGeneric("spark.
 
 #' @rdname spark.mlp
 #' @export
-setGeneric("spark.mlp", function(data, ...) { standardGeneric("spark.mlp") })
+setGeneric("spark.mlp", function(data, formula, ...) { standardGeneric("spark.mlp") })
 
 #' @rdname spark.naiveBayes
 #' @export
diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 1065b4b37d7f3..265e64e7466fa 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -525,7 +525,7 @@ setMethod("write.ml", signature(object = "LDAModel", path = "character"),
 #' @note spark.isoreg since 2.1.0
 setMethod("spark.isoreg", signature(data = "SparkDataFrame", formula = "formula"),
           function(data, formula, isotonic = TRUE, featureIndex = 0, weightCol = NULL) {
-            formula <- paste0(deparse(formula), collapse = "")
+            formula <- paste(deparse(formula), collapse = "")
 
             if (is.null(weightCol)) {
               weightCol <- ""
@@ -775,7 +775,7 @@ setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula")
                    tol = 1E-6, fitIntercept = TRUE, family = "auto", standardization = TRUE,
                    thresholds = 0.5, weightCol = NULL, aggregationDepth = 2,
                    probabilityCol = "probability") {
-            formula <- paste0(deparse(formula), collapse = "")
+            formula <- paste(deparse(formula), collapse = "")
 
             if (is.null(weightCol)) {
               weightCol <- ""
@@ -858,6 +858,8 @@ setMethod("summary", signature(object = "LogisticRegressionModel"),
 #'   Multilayer Perceptron}
 #'
 #' @param data a \code{SparkDataFrame} of observations and labels for model fitting.
+#' @param formula a symbolic description of the model to be fitted. Currently only a few formula
+#'                operators are supported, including '~', '.', ':', '+', and '-'.
 #' @param blockSize blockSize parameter.
 #' @param layers integer vector containing the number of nodes for each layer
 #' @param solver solver parameter, supported options: "gd" (minibatch gradient descent) or "l-bfgs".
@@ -870,7 +872,7 @@ setMethod("summary", signature(object = "LogisticRegressionModel"),
 #' @param ... additional arguments passed to the method.
 #' @return \code{spark.mlp} returns a fitted Multilayer Perceptron Classification Model.
 #' @rdname spark.mlp
-#' @aliases spark.mlp,SparkDataFrame-method
+#' @aliases spark.mlp,SparkDataFrame,formula-method
 #' @name spark.mlp
 #' @seealso \link{read.ml}
 #' @export
@@ -879,7 +881,7 @@ setMethod("summary", signature(object = "LogisticRegressionModel"),
 #' df <- read.df("data/mllib/sample_multiclass_classification_data.txt", source = "libsvm")
 #'
 #' # fit a Multilayer Perceptron Classification Model
-#' model <- spark.mlp(df, blockSize = 128, layers = c(4, 3), solver = "l-bfgs",
+#' model <- spark.mlp(df, label ~ features, blockSize = 128, layers = c(4, 3), solver = "l-bfgs",
 #'                    maxIter = 100, tol = 0.5, stepSize = 1, seed = 1,
 #'                    initialWeights = c(0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9))
 #'
@@ -896,9 +898,10 @@ setMethod("summary", signature(object = "LogisticRegressionModel"),
 #' summary(savedModel)
 #' }
 #' @note spark.mlp since 2.1.0
-setMethod("spark.mlp", signature(data = "SparkDataFrame"),
-          function(data, layers, blockSize = 128, solver = "l-bfgs", maxIter = 100,
+setMethod("spark.mlp", signature(data = "SparkDataFrame", formula = "formula"),
+          function(data, formula, layers, blockSize = 128, solver = "l-bfgs", maxIter = 100,
                    tol = 1E-6, stepSize = 0.03, seed = NULL, initialWeights = NULL) {
+            formula <- paste(deparse(formula), collapse = "")
             if (is.null(layers)) {
               stop ("layers must be a integer vector with length > 1.")
             }
@@ -913,7 +916,7 @@ setMethod("spark.mlp", signature(data = "SparkDataFrame"),
               initialWeights <- as.array(as.numeric(na.omit(initialWeights)))
             }
             jobj <- callJStatic("org.apache.spark.ml.r.MultilayerPerceptronClassifierWrapper",
-                                "fit", data@sdf, as.integer(blockSize), as.array(layers),
+                                "fit", data@sdf, formula, as.integer(blockSize), as.array(layers),
                                 as.character(solver), as.integer(maxIter), as.numeric(tol),
                                 as.numeric(stepSize), seed, initialWeights)
             new("MultilayerPerceptronClassificationModel", jobj = jobj)
@@ -936,9 +939,10 @@ setMethod("predict", signature(object = "MultilayerPerceptronClassificationModel
 # Returns the summary of a Multilayer Perceptron Classification Model produced by \code{spark.mlp}
 
 #' @param object a Multilayer Perceptron Classification Model fitted by \code{spark.mlp}
-#' @return \code{summary} returns a list containing \code{labelCount}, \code{layers}, and
-#'         \code{weights}. For \code{weights}, it is a numeric vector with length equal to
-#'         the expected given the architecture (i.e., for 8-10-2 network, 100 connection weights).
+#' @return \code{summary} returns a list containing \code{numOfInputs}, \code{numOfOutputs},
+#'         \code{layers}, and \code{weights}. For \code{weights}, it is a numeric vector with
+#'         length equal to the expected given the architecture (i.e., for 8-10-2 network,
+#'         112 connection weights).
 #' @rdname spark.mlp
 #' @export
 #' @aliases summary,MultilayerPerceptronClassificationModel-method
@@ -946,10 +950,12 @@ setMethod("predict", signature(object = "MultilayerPerceptronClassificationModel
 setMethod("summary", signature(object = "MultilayerPerceptronClassificationModel"),
           function(object) {
             jobj <- object@jobj
-            labelCount <- callJMethod(jobj, "labelCount")
             layers <- unlist(callJMethod(jobj, "layers"))
+            numOfInputs <- head(layers, n = 1)
+            numOfOutputs <- tail(layers, n = 1)
             weights <- callJMethod(jobj, "weights")
-            list(labelCount = labelCount, layers = layers, weights = weights)
+            list(numOfInputs = numOfInputs, numOfOutputs = numOfOutputs,
+                 layers = layers, weights = weights)
           })
 
 #' Naive Bayes Models
diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R
index 4831ce27bec8a..70a033de5308e 100644
--- a/R/pkg/inst/tests/testthat/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -371,12 +371,13 @@ test_that("spark.kmeans", {
 test_that("spark.mlp", {
   df <- read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"),
                 source = "libsvm")
-  model <- spark.mlp(df, blockSize = 128, layers = c(4, 5, 4, 3), solver = "l-bfgs", maxIter = 100,
-                     tol = 0.5, stepSize = 1, seed = 1)
+  model <- spark.mlp(df, label ~ features, blockSize = 128, layers = c(4, 5, 4, 3),
+                     solver = "l-bfgs", maxIter = 100, tol = 0.5, stepSize = 1, seed = 1)
 
   # Test summary method
   summary <- summary(model)
-  expect_equal(summary$labelCount, 3)
+  expect_equal(summary$numOfInputs, 4)
+  expect_equal(summary$numOfOutputs, 3)
   expect_equal(summary$layers, c(4, 5, 4, 3))
   expect_equal(length(summary$weights), 64)
   expect_equal(head(summary$weights, 5), list(-0.878743, 0.2154151, -1.16304, -0.6583214, 1.009825),
@@ -385,7 +386,7 @@ test_that("spark.mlp", {
   # Test predict method
   mlpTestDF <- df
   mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
-  expect_equal(head(mlpPredictions$prediction, 6), c(0, 1, 1, 1, 1, 1))
+  expect_equal(head(mlpPredictions$prediction, 6), c("1.0", "0.0", "0.0", "0.0", "0.0", "0.0"))
 
   # Test model save/load
   modelPath <- tempfile(pattern = "spark-mlp", fileext = ".tmp")
@@ -395,46 +396,68 @@ test_that("spark.mlp", {
   model2 <- read.ml(modelPath)
   summary2 <- summary(model2)
 
-  expect_equal(summary2$labelCount, 3)
+  expect_equal(summary2$numOfInputs, 4)
+  expect_equal(summary2$numOfOutputs, 3)
   expect_equal(summary2$layers, c(4, 5, 4, 3))
   expect_equal(length(summary2$weights), 64)
 
   unlink(modelPath)
 
   # Test default parameter
-  model <- spark.mlp(df, layers = c(4, 5, 4, 3))
+  model <- spark.mlp(df, label ~ features, layers = c(4, 5, 4, 3))
   mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
-  expect_equal(head(mlpPredictions$prediction, 10), c(1, 1, 1, 1, 0, 1, 2, 2, 1, 0))
+  expect_equal(head(mlpPredictions$prediction, 10),
+               c("1.0", "1.0", "1.0", "1.0", "0.0", "1.0", "2.0", "2.0", "1.0", "0.0"))
 
   # Test illegal parameter
-  expect_error(spark.mlp(df, layers = NULL), "layers must be a integer vector with length > 1.")
-  expect_error(spark.mlp(df, layers = c()), "layers must be a integer vector with length > 1.")
-  expect_error(spark.mlp(df, layers = c(3)), "layers must be a integer vector with length > 1.")
+  expect_error(spark.mlp(df, label ~ features, layers = NULL),
+               "layers must be a integer vector with length > 1.")
+  expect_error(spark.mlp(df, label ~ features, layers = c()),
+               "layers must be a integer vector with length > 1.")
+  expect_error(spark.mlp(df, label ~ features, layers = c(3)),
+               "layers must be a integer vector with length > 1.")
 
   # Test random seed
   # default seed
-  model <- spark.mlp(df, layers = c(4, 5, 4, 3), maxIter = 10)
+  model <- spark.mlp(df, label ~ features, layers = c(4, 5, 4, 3), maxIter = 10)
   mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
-  expect_equal(head(mlpPredictions$prediction, 12), c(1, 1, 1, 1, 0, 1, 2, 2, 1, 2, 0, 1))
+  expect_equal(head(mlpPredictions$prediction, 10),
+               c("1.0", "1.0", "1.0", "1.0", "0.0", "1.0", "2.0", "2.0", "1.0", "0.0"))
   # seed equals 10
-  model <- spark.mlp(df, layers = c(4, 5, 4, 3), maxIter = 10, seed = 10)
+  model <- spark.mlp(df, label ~ features, layers = c(4, 5, 4, 3), maxIter = 10, seed = 10)
   mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
-  expect_equal(head(mlpPredictions$prediction, 12), c(1, 1, 1, 1, 2, 1, 2, 2, 1, 0, 0, 1))
+  expect_equal(head(mlpPredictions$prediction, 10),
+               c("1.0", "1.0", "1.0", "1.0", "0.0", "1.0", "2.0", "2.0", "1.0", "0.0"))
 
   # test initialWeights
-  model <- spark.mlp(df, layers = c(4, 3), maxIter = 2, initialWeights =
+  model <- spark.mlp(df, label ~ features, layers = c(4, 3), maxIter = 2, initialWeights =
     c(0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9))
   mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
-  expect_equal(head(mlpPredictions$prediction, 12), c(1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1))
+  expect_equal(head(mlpPredictions$prediction, 10),
+               c("1.0", "1.0", "1.0", "1.0", "2.0", "1.0", "2.0", "2.0", "1.0", "0.0"))
 
-  model <- spark.mlp(df, layers = c(4, 3), maxIter = 2, initialWeights =
+  model <- spark.mlp(df, label ~ features, layers = c(4, 3), maxIter = 2, initialWeights =
     c(0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 5.0, 5.0, 5.0, 5.0, 9.0, 9.0, 9.0, 9.0, 9.0))
   mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
-  expect_equal(head(mlpPredictions$prediction, 12), c(1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1))
+  expect_equal(head(mlpPredictions$prediction, 10),
+               c("1.0", "1.0", "1.0", "1.0", "2.0", "1.0", "2.0", "2.0", "1.0", "0.0"))
 
-  model <- spark.mlp(df, layers = c(4, 3), maxIter = 2)
+  model <- spark.mlp(df, label ~ features, layers = c(4, 3), maxIter = 2)
   mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
-  expect_equal(head(mlpPredictions$prediction, 12), c(1, 1, 1, 1, 0, 1, 0, 2, 1, 0, 0, 1))
+  expect_equal(head(mlpPredictions$prediction, 10),
+               c("1.0", "1.0", "1.0", "1.0", "0.0", "1.0", "0.0", "2.0", "1.0", "0.0"))
+
+  # Test formula works well
+  df <- suppressWarnings(createDataFrame(iris))
+  model <- spark.mlp(df, Species ~ Sepal_Length + Sepal_Width + Petal_Length + Petal_Width,
+                     layers = c(4, 3))
+  summary <- summary(model)
+  expect_equal(summary$numOfInputs, 4)
+  expect_equal(summary$numOfOutputs, 3)
+  expect_equal(summary$layers, c(4, 3))
+  expect_equal(length(summary$weights), 15)
+  expect_equal(head(summary$weights, 5), list(-1.1957257, -5.2693685, 7.4489734, -6.3751413,
+               -10.2376130), tolerance = 1e-6)
 })
 
 test_that("spark.naiveBayes", {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/MultilayerPerceptronClassifierWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/MultilayerPerceptronClassifierWrapper.scala
index 2193eb80e9fdd..d34de30931143 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/MultilayerPerceptronClassifierWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/MultilayerPerceptronClassifierWrapper.scala
@@ -24,19 +24,29 @@ import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.ml.{Pipeline, PipelineModel}
 import org.apache.spark.ml.classification.{MultilayerPerceptronClassificationModel, MultilayerPerceptronClassifier}
+import org.apache.spark.ml.feature.{IndexToString, RFormula}
 import org.apache.spark.ml.linalg.Vectors
+import org.apache.spark.ml.r.RWrapperUtils._
 import org.apache.spark.ml.util.{MLReadable, MLReader, MLWritable, MLWriter}
 import org.apache.spark.sql.{DataFrame, Dataset}
 
 private[r] class MultilayerPerceptronClassifierWrapper private (
-    val pipeline: PipelineModel,
-    val labelCount: Long,
-    val layers: Array[Int],
-    val weights: Array[Double]
+    val pipeline: PipelineModel
   ) extends MLWritable {
 
+  import MultilayerPerceptronClassifierWrapper._
+
+  val mlpModel: MultilayerPerceptronClassificationModel =
+    pipeline.stages(1).asInstanceOf[MultilayerPerceptronClassificationModel]
+
+  val weights: Array[Double] = mlpModel.weights.toArray
+  val layers: Array[Int] = mlpModel.layers
+
   def transform(dataset: Dataset[_]): DataFrame = {
     pipeline.transform(dataset)
+      .drop(mlpModel.getFeaturesCol)
+      .drop(mlpModel.getLabelCol)
+      .drop(PREDICTED_LABEL_INDEX_COL)
   }
 
   /**
@@ -49,10 +59,12 @@ private[r] class MultilayerPerceptronClassifierWrapper private (
 private[r] object MultilayerPerceptronClassifierWrapper
   extends MLReadable[MultilayerPerceptronClassifierWrapper] {
 
+  val PREDICTED_LABEL_INDEX_COL = "pred_label_idx"
   val PREDICTED_LABEL_COL = "prediction"
 
   def fit(
       data: DataFrame,
+      formula: String,
       blockSize: Int,
       layers: Array[Int],
       solver: String,
@@ -62,8 +74,13 @@ private[r] object MultilayerPerceptronClassifierWrapper
       seed: String,
       initialWeights: Array[Double]
      ): MultilayerPerceptronClassifierWrapper = {
+    val rFormula = new RFormula()
+      .setFormula(formula)
+      .setForceIndexLabel(true)
+    checkDataColumns(rFormula, data)
+    val rFormulaModel = rFormula.fit(data)
     // get labels and feature names from output schema
-    val schema = data.schema
+    val (_, labels) = getFeaturesAndLabels(rFormulaModel, data)
 
     // assemble and fit the pipeline
     val mlp = new MultilayerPerceptronClassifier()
@@ -73,25 +90,25 @@ private[r] object MultilayerPerceptronClassifierWrapper
       .setMaxIter(maxIter)
       .setTol(tol)
       .setStepSize(stepSize)
-      .setPredictionCol(PREDICTED_LABEL_COL)
+      .setFeaturesCol(rFormula.getFeaturesCol)
+      .setLabelCol(rFormula.getLabelCol)
+      .setPredictionCol(PREDICTED_LABEL_INDEX_COL)
     if (seed != null && seed.length > 0) mlp.setSeed(seed.toInt)
     if (initialWeights != null) {
       require(initialWeights.length > 0)
       mlp.setInitialWeights(Vectors.dense(initialWeights))
     }
 
+    val idxToStr = new IndexToString()
+      .setInputCol(PREDICTED_LABEL_INDEX_COL)
+      .setOutputCol(PREDICTED_LABEL_COL)
+      .setLabels(labels)
+
     val pipeline = new Pipeline()
-      .setStages(Array(mlp))
+      .setStages(Array(rFormulaModel, mlp, idxToStr))
       .fit(data)
 
-    val multilayerPerceptronClassificationModel: MultilayerPerceptronClassificationModel =
-    pipeline.stages.head.asInstanceOf[MultilayerPerceptronClassificationModel]
-
-    val weights = multilayerPerceptronClassificationModel.weights.toArray
-    val layersFromPipeline = multilayerPerceptronClassificationModel.layers
-    val labelCount = data.select("label").distinct().count()
-
-    new MultilayerPerceptronClassifierWrapper(pipeline, labelCount, layersFromPipeline, weights)
+    new MultilayerPerceptronClassifierWrapper(pipeline)
   }
 
   /**
@@ -107,17 +124,10 @@ private[r] object MultilayerPerceptronClassifierWrapper
 
     override def load(path: String): MultilayerPerceptronClassifierWrapper = {
       implicit val format = DefaultFormats
-      val rMetadataPath = new Path(path, "rMetadata").toString
       val pipelinePath = new Path(path, "pipeline").toString
 
-      val rMetadataStr = sc.textFile(rMetadataPath, 1).first()
-      val rMetadata = parse(rMetadataStr)
-      val labelCount = (rMetadata \ "labelCount").extract[Long]
-      val layers = (rMetadata \ "layers").extract[Array[Int]]
-      val weights = (rMetadata \ "weights").extract[Array[Double]]
-
       val pipeline = PipelineModel.load(pipelinePath)
-      new MultilayerPerceptronClassifierWrapper(pipeline, labelCount, layers, weights)
+      new MultilayerPerceptronClassifierWrapper(pipeline)
     }
   }
 
@@ -128,10 +138,7 @@ private[r] object MultilayerPerceptronClassifierWrapper
       val rMetadataPath = new Path(path, "rMetadata").toString
       val pipelinePath = new Path(path, "pipeline").toString
 
-      val rMetadata = ("class" -> instance.getClass.getName) ~
-        ("labelCount" -> instance.labelCount) ~
-        ("layers" -> instance.layers.toSeq) ~
-        ("weights" -> instance.weights.toArray.toSeq)
+      val rMetadata = "class" -> instance.getClass.getName
       val rMetadataJson: String = compact(render(rMetadata))
       sc.parallelize(Seq(rMetadataJson), 1).saveAsTextFile(rMetadataPath)
 

From b18c5a9b97981742b6ee1c928705d9af0dc85e70 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Wed, 16 Nov 2016 17:12:18 +0800
Subject: [PATCH 0122/1204] [SPARK-18433][SQL] Improve DataSource option keys
 to be more case-insensitive

## What changes were proposed in this pull request?

This PR aims to improve DataSource option keys to be more case-insensitive

DataSource partially use CaseInsensitiveMap in code-path. For example, the following fails to find url.

```scala
val df = spark.createDataFrame(sparkContext.parallelize(arr2x2), schema2)
df.write.format("jdbc")
    .option("UrL", url1)
    .option("dbtable", "TEST.SAVETEST")
    .options(properties.asScala)
    .save()
```

This PR makes DataSource options to use CaseInsensitiveMap internally and also makes DataSource to use CaseInsensitiveMap generally except `InMemoryFileIndex` and `InsertIntoHadoopFsRelationCommand`. We can not pass them CaseInsensitiveMap because they creates new case-sensitive HadoopConfs by calling newHadoopConfWithOptions(options) inside.

## How was this patch tested?

Pass the Jenkins test with newly added test cases.

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #15884 from dongjoon-hyun/SPARK-18433.

(cherry picked from commit 74f5c2176d8449e41f520febd38109edaf3f4172)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/json/JSONOptions.scala |  6 ++--
 .../catalyst/util/CaseInsensitiveMap.scala    | 36 +++++++++++++++++++
 .../spark/sql/execution/command/ddl.scala     |  2 +-
 .../execution/datasources/DataSource.scala    | 30 ++++++++--------
 .../datasources/csv/CSVOptions.scala          |  8 +++--
 .../spark/sql/execution/datasources/ddl.scala | 18 ----------
 .../datasources/jdbc/JDBCOptions.scala        | 10 ++++--
 .../datasources/parquet/ParquetOptions.scala  |  6 +++-
 .../streaming/FileStreamOptions.scala         |  8 +++--
 .../datasources/csv/CSVInferSchemaSuite.scala |  5 +++
 .../datasources/json/JsonSuite.scala          | 19 ++++++++--
 .../datasources/parquet/ParquetIOSuite.scala  |  7 ++++
 .../spark/sql/jdbc/JDBCWriteSuite.scala       |  9 +++++
 .../sql/streaming/FileStreamSourceSuite.scala |  5 +++
 .../spark/sql/hive/HiveExternalCatalog.scala  |  2 +-
 .../spark/sql/hive/orc/OrcOptions.scala       |  6 +++-
 .../spark/sql/hive/orc/OrcSourceSuite.scala   |  4 +++
 .../apache/spark/sql/hive/parquetSuites.scala |  1 +
 18 files changed, 133 insertions(+), 49 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CaseInsensitiveMap.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
index c45970658cf07..38e191bbbad6c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
@@ -23,7 +23,7 @@ import com.fasterxml.jackson.core.{JsonFactory, JsonParser}
 import org.apache.commons.lang3.time.FastDateFormat
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.catalyst.util.{CompressionCodecs, ParseModes}
+import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CompressionCodecs, ParseModes}
 
 /**
  * Options for parsing JSON data into Spark SQL rows.
@@ -31,9 +31,11 @@ import org.apache.spark.sql.catalyst.util.{CompressionCodecs, ParseModes}
  * Most of these map directly to Jackson's internal options, specified in [[JsonParser.Feature]].
  */
 private[sql] class JSONOptions(
-    @transient private val parameters: Map[String, String])
+    @transient private val parameters: CaseInsensitiveMap)
   extends Logging with Serializable  {
 
+  def this(parameters: Map[String, String]) = this(new CaseInsensitiveMap(parameters))
+
   val samplingRatio =
     parameters.get("samplingRatio").map(_.toDouble).getOrElse(1.0)
   val primitivesAsString =
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CaseInsensitiveMap.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CaseInsensitiveMap.scala
new file mode 100644
index 0000000000000..a7f7a8a66382a
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CaseInsensitiveMap.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+/**
+ * Builds a map in which keys are case insensitive
+ */
+class CaseInsensitiveMap(map: Map[String, String]) extends Map[String, String]
+  with Serializable {
+
+  val baseMap = map.map(kv => kv.copy(_1 = kv._1.toLowerCase))
+
+  override def get(k: String): Option[String] = baseMap.get(k.toLowerCase)
+
+  override def + [B1 >: String](kv: (String, B1)): Map[String, B1] =
+    baseMap + kv.copy(_1 = kv._1.toLowerCase)
+
+  override def iterator: Iterator[(String, String)] = baseMap.iterator
+
+  override def -(key: String): Map[String, String] = baseMap - key.toLowerCase
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 6c1c398940d0f..588aa05c37b49 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, BinaryComparison}
 import org.apache.spark.sql.catalyst.expressions.{EqualTo, Expression, PredicateHelper}
-import org.apache.spark.sql.execution.datasources.{CaseInsensitiveMap, PartitioningUtils}
+import org.apache.spark.sql.execution.datasources.PartitioningUtils
 import org.apache.spark.sql.types._
 import org.apache.spark.util.SerializableConfiguration
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 65422f1495f03..cfee7be1e3f07 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -31,6 +31,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable}
 import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
 import org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider
 import org.apache.spark.sql.execution.datasources.json.JsonFileFormat
@@ -80,13 +81,13 @@ case class DataSource(
 
   lazy val providingClass: Class[_] = DataSource.lookupDataSource(className)
   lazy val sourceInfo = sourceSchema()
+  private val caseInsensitiveOptions = new CaseInsensitiveMap(options)
 
   /**
    * Infer the schema of the given FileFormat, returns a pair of schema and partition column names.
    */
   private def inferFileFormatSchema(format: FileFormat): (StructType, Seq[String]) = {
     userSpecifiedSchema.map(_ -> partitionColumns).orElse {
-      val caseInsensitiveOptions = new CaseInsensitiveMap(options)
       val allPaths = caseInsensitiveOptions.get("path")
       val globbedPaths = allPaths.toSeq.flatMap { path =>
         val hdfsPath = new Path(path)
@@ -114,11 +115,10 @@ case class DataSource(
     providingClass.newInstance() match {
       case s: StreamSourceProvider =>
         val (name, schema) = s.sourceSchema(
-          sparkSession.sqlContext, userSpecifiedSchema, className, options)
+          sparkSession.sqlContext, userSpecifiedSchema, className, caseInsensitiveOptions)
         SourceInfo(name, schema, Nil)
 
       case format: FileFormat =>
-        val caseInsensitiveOptions = new CaseInsensitiveMap(options)
         val path = caseInsensitiveOptions.getOrElse("path", {
           throw new IllegalArgumentException("'path' is not specified")
         })
@@ -158,10 +158,14 @@ case class DataSource(
     providingClass.newInstance() match {
       case s: StreamSourceProvider =>
         s.createSource(
-          sparkSession.sqlContext, metadataPath, userSpecifiedSchema, className, options)
+          sparkSession.sqlContext,
+          metadataPath,
+          userSpecifiedSchema,
+          className,
+          caseInsensitiveOptions)
 
       case format: FileFormat =>
-        val path = new CaseInsensitiveMap(options).getOrElse("path", {
+        val path = caseInsensitiveOptions.getOrElse("path", {
           throw new IllegalArgumentException("'path' is not specified")
         })
         new FileStreamSource(
@@ -171,7 +175,7 @@ case class DataSource(
           schema = sourceInfo.schema,
           partitionColumns = sourceInfo.partitionColumns,
           metadataPath = metadataPath,
-          options = options)
+          options = caseInsensitiveOptions)
       case _ =>
         throw new UnsupportedOperationException(
           s"Data source $className does not support streamed reading")
@@ -182,10 +186,9 @@ case class DataSource(
   def createSink(outputMode: OutputMode): Sink = {
     providingClass.newInstance() match {
       case s: StreamSinkProvider =>
-        s.createSink(sparkSession.sqlContext, options, partitionColumns, outputMode)
+        s.createSink(sparkSession.sqlContext, caseInsensitiveOptions, partitionColumns, outputMode)
 
       case fileFormat: FileFormat =>
-        val caseInsensitiveOptions = new CaseInsensitiveMap(options)
         val path = caseInsensitiveOptions.getOrElse("path", {
           throw new IllegalArgumentException("'path' is not specified")
         })
@@ -193,7 +196,7 @@ case class DataSource(
           throw new IllegalArgumentException(
             s"Data source $className does not support $outputMode output mode")
         }
-        new FileStreamSink(sparkSession, path, fileFormat, partitionColumns, options)
+        new FileStreamSink(sparkSession, path, fileFormat, partitionColumns, caseInsensitiveOptions)
 
       case _ =>
         throw new UnsupportedOperationException(
@@ -234,7 +237,6 @@ case class DataSource(
    *                        that files already exist, we don't need to check them again.
    */
   def resolveRelation(checkFilesExist: Boolean = true): BaseRelation = {
-    val caseInsensitiveOptions = new CaseInsensitiveMap(options)
     val relation = (providingClass.newInstance(), userSpecifiedSchema) match {
       // TODO: Throw when too much is given.
       case (dataSource: SchemaRelationProvider, Some(schema)) =>
@@ -274,7 +276,7 @@ case class DataSource(
           dataSchema = dataSchema,
           bucketSpec = None,
           format,
-          options)(sparkSession)
+          caseInsensitiveOptions)(sparkSession)
 
       // This is a non-streaming file based datasource.
       case (format: FileFormat, _) =>
@@ -358,13 +360,13 @@ case class DataSource(
 
     providingClass.newInstance() match {
       case dataSource: CreatableRelationProvider =>
-        dataSource.createRelation(sparkSession.sqlContext, mode, options, data)
+        dataSource.createRelation(sparkSession.sqlContext, mode, caseInsensitiveOptions, data)
       case format: FileFormat =>
         // Don't glob path for the write path.  The contracts here are:
         //  1. Only one output path can be specified on the write path;
         //  2. Output path must be a legal HDFS style file system path;
         //  3. It's OK that the output path doesn't exist yet;
-        val allPaths = paths ++ new CaseInsensitiveMap(options).get("path")
+        val allPaths = paths ++ caseInsensitiveOptions.get("path")
         val outputPath = if (allPaths.length == 1) {
           val path = new Path(allPaths.head)
           val fs = path.getFileSystem(sparkSession.sessionState.newHadoopConf())
@@ -391,7 +393,7 @@ case class DataSource(
           // TODO: Case sensitivity.
           val sameColumns =
             existingPartitionColumns.map(_.toLowerCase()) == partitionColumns.map(_.toLowerCase())
-          if (existingPartitionColumns.size > 0 && !sameColumns) {
+          if (existingPartitionColumns.nonEmpty && !sameColumns) {
             throw new AnalysisException(
               s"""Requested partitioning does not match existing partitioning.
                  |Existing partitioning columns:
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
index 5903729c11fc5..21e50307b5ab0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
@@ -23,11 +23,13 @@ import java.util.Locale
 import org.apache.commons.lang3.time.FastDateFormat
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.catalyst.util.{CompressionCodecs, ParseModes}
+import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CompressionCodecs, ParseModes}
 
-private[csv] class CSVOptions(@transient private val parameters: Map[String, String])
+private[csv] class CSVOptions(@transient private val parameters: CaseInsensitiveMap)
   extends Logging with Serializable {
 
+  def this(parameters: Map[String, String]) = this(new CaseInsensitiveMap(parameters))
+
   private def getChar(paramName: String, default: Char): Char = {
     val paramValue = parameters.get(paramName)
     paramValue match {
@@ -128,7 +130,7 @@ private[csv] class CSVOptions(@transient private val parameters: Map[String, Str
 
 object CSVOptions {
 
-  def apply(): CSVOptions = new CSVOptions(Map.empty)
+  def apply(): CSVOptions = new CSVOptions(new CaseInsensitiveMap(Map.empty))
 
   def apply(paramName: String, paramValue: String): CSVOptions = {
     new CSVOptions(Map(paramName -> paramValue))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala
index 59fb48ffea598..fa8dfa9640d3d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala
@@ -96,21 +96,3 @@ case class RefreshResource(path: String)
     Seq.empty[Row]
   }
 }
-
-/**
- * Builds a map in which keys are case insensitive
- */
-class CaseInsensitiveMap(map: Map[String, String]) extends Map[String, String]
-  with Serializable {
-
-  val baseMap = map.map(kv => kv.copy(_1 = kv._1.toLowerCase))
-
-  override def get(k: String): Option[String] = baseMap.get(k.toLowerCase)
-
-  override def + [B1 >: String](kv: (String, B1)): Map[String, B1] =
-    baseMap + kv.copy(_1 = kv._1.toLowerCase)
-
-  override def iterator: Iterator[(String, String)] = baseMap.iterator
-
-  override def -(key: String): Map[String, String] = baseMap - key.toLowerCase
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
index fcd7409159def..7f419b5788c4f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
@@ -22,19 +22,23 @@ import java.util.Properties
 
 import scala.collection.mutable.ArrayBuffer
 
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+
 /**
  * Options for the JDBC data source.
  */
 class JDBCOptions(
-    @transient private val parameters: Map[String, String])
+    @transient private val parameters: CaseInsensitiveMap)
   extends Serializable {
 
   import JDBCOptions._
 
+  def this(parameters: Map[String, String]) = this(new CaseInsensitiveMap(parameters))
+
   def this(url: String, table: String, parameters: Map[String, String]) = {
-    this(parameters ++ Map(
+    this(new CaseInsensitiveMap(parameters ++ Map(
       JDBCOptions.JDBC_URL -> url,
-      JDBCOptions.JDBC_TABLE_NAME -> table))
+      JDBCOptions.JDBC_TABLE_NAME -> table)))
   }
 
   val asConnectionProperties: Properties = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala
index d0fd23605bea8..a81a95d510855 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala
@@ -19,18 +19,22 @@ package org.apache.spark.sql.execution.datasources.parquet
 
 import org.apache.parquet.hadoop.metadata.CompressionCodecName
 
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Options for the Parquet data source.
  */
 private[parquet] class ParquetOptions(
-    @transient private val parameters: Map[String, String],
+    @transient private val parameters: CaseInsensitiveMap,
     @transient private val sqlConf: SQLConf)
   extends Serializable {
 
   import ParquetOptions._
 
+  def this(parameters: Map[String, String], sqlConf: SQLConf) =
+    this(new CaseInsensitiveMap(parameters), sqlConf)
+
   /**
    * Compression codec to use. By default use the value specified in SQLConf.
    * Acceptable values are defined in [[shortParquetCompressionCodecNames]].
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala
index 3efc20c1d662d..fdea65cb10ae0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala
@@ -20,13 +20,15 @@ package org.apache.spark.sql.execution.streaming
 import scala.util.Try
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.execution.datasources.CaseInsensitiveMap
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.util.Utils
 
 /**
  * User specified options for file streams.
  */
-class FileStreamOptions(parameters: Map[String, String]) extends Logging {
+class FileStreamOptions(parameters: CaseInsensitiveMap) extends Logging {
+
+  def this(parameters: Map[String, String]) = this(new CaseInsensitiveMap(parameters))
 
   val maxFilesPerTrigger: Option[Int] = parameters.get("maxFilesPerTrigger").map { str =>
     Try(str.toInt).toOption.filter(_ > 0).getOrElse {
@@ -50,5 +52,5 @@ class FileStreamOptions(parameters: Map[String, String]) extends Logging {
 
   /** Options as specified by the user, in a case-insensitive map, without "path" set. */
   val optionMapWithoutPath: Map[String, String] =
-    new CaseInsensitiveMap(parameters).filterKeys(_ != "path")
+    parameters.filterKeys(_ != "path")
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala
index 5e00f669b8593..93f752d107ca3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala
@@ -109,4 +109,9 @@ class CSVInferSchemaSuite extends SparkFunSuite {
     val mergedNullTypes = CSVInferSchema.mergeRowTypes(Array(NullType), Array(NullType))
     assert(mergedNullTypes.deep == Array(NullType).deep)
   }
+
+  test("SPARK-18433: Improve DataSource option keys to be more case-insensitive") {
+    val options = new CSVOptions(Map("TiMeStampFormat" -> "yyyy-mm"))
+    assert(CSVInferSchema.inferField(TimestampType, "2015-08", options) == TimestampType)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 456052f79afcc..598e44ec8c194 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -1366,7 +1366,7 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
 
   test("SPARK-6245 JsonRDD.inferSchema on empty RDD") {
     // This is really a test that it doesn't throw an exception
-    val emptySchema = InferSchema.infer(empty, "", new JSONOptions(Map()))
+    val emptySchema = InferSchema.infer(empty, "", new JSONOptions(Map.empty[String, String]))
     assert(StructType(Seq()) === emptySchema)
   }
 
@@ -1390,7 +1390,8 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
   }
 
   test("SPARK-8093 Erase empty structs") {
-    val emptySchema = InferSchema.infer(emptyRecords, "", new JSONOptions(Map()))
+    val emptySchema = InferSchema.infer(
+      emptyRecords, "", new JSONOptions(Map.empty[String, String]))
     assert(StructType(Seq()) === emptySchema)
   }
 
@@ -1749,4 +1750,18 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
       checkAnswer(stringTimestampsWithFormat, expectedStringDatesWithFormat)
     }
   }
+
+  test("SPARK-18433: Improve DataSource option keys to be more case-insensitive") {
+    val records = sparkContext
+      .parallelize("""{"a": 3, "b": 1.1}""" :: """{"a": 3.1, "b": 0.000001}""" :: Nil)
+
+    val schema = StructType(
+      StructField("a", DecimalType(21, 1), true) ::
+      StructField("b", DecimalType(7, 6), true) :: Nil)
+
+    val df1 = spark.read.option("prefersDecimal", "true").json(records)
+    assert(df1.schema == schema)
+    val df2 = spark.read.option("PREfersdecimaL", "true").json(records)
+    assert(df2.schema == schema)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
index 580eade4b1412..acdadb3103c8a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -736,6 +736,13 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSQLContext {
       }
     }
   }
+
+  test("SPARK-18433: Improve DataSource option keys to be more case-insensitive") {
+    withSQLConf(SQLConf.PARQUET_COMPRESSION.key -> "snappy") {
+      val option = new ParquetOptions(Map("Compression" -> "uncompressed"), spark.sessionState.conf)
+      assert(option.compressionCodecClassName == "UNCOMPRESSED")
+    }
+  }
 }
 
 class JobCommitFailureParquetOutputCommitter(outputPath: Path, context: TaskAttemptContext)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
index 96540ec92da73..e3d3c6c3a887c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
@@ -303,4 +303,13 @@ class JDBCWriteSuite extends SharedSQLContext with BeforeAndAfter {
     assert(e.contains("If 'partitionColumn' is specified then 'lowerBound', 'upperBound'," +
       " and 'numPartitions' are required."))
   }
+
+  test("SPARK-18433: Improve DataSource option keys to be more case-insensitive") {
+    val df = spark.createDataFrame(sparkContext.parallelize(arr2x2), schema2)
+    df.write.format("jdbc")
+      .option("Url", url1)
+      .option("dbtable", "TEST.SAVETEST")
+      .options(properties.asScala)
+      .save()
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index fab7642994ffc..b365af76c3795 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -1004,6 +1004,11 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
       )
     }
   }
+
+  test("SPARK-18433: Improve DataSource option keys to be more case-insensitive") {
+    val options = new FileStreamOptions(Map("maxfilespertrigger" -> "1"))
+    assert(options.maxFilesPerTrigger == Some(1))
+  }
 }
 
 class FileStreamSourceStressTestSuite extends FileStreamSourceTest {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 42ce1a88a2b67..cbd00da81cfcd 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -35,8 +35,8 @@ import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, Statistics}
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.execution.command.{ColumnStatStruct, DDLUtils}
-import org.apache.spark.sql.execution.datasources.CaseInsensitiveMap
 import org.apache.spark.sql.hive.client.HiveClient
 import org.apache.spark.sql.internal.HiveSerDe
 import org.apache.spark.sql.internal.StaticSQLConf._
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcOptions.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcOptions.scala
index c2a126d3bf9c0..ac587ab99ae28 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcOptions.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcOptions.scala
@@ -17,14 +17,18 @@
 
 package org.apache.spark.sql.hive.orc
 
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+
 /**
  * Options for the ORC data source.
  */
-private[orc] class OrcOptions(@transient private val parameters: Map[String, String])
+private[orc] class OrcOptions(@transient private val parameters: CaseInsensitiveMap)
   extends Serializable {
 
   import OrcOptions._
 
+  def this(parameters: Map[String, String]) = this(new CaseInsensitiveMap(parameters))
+
   /**
    * Compression codec to use. By default snappy compression.
    * Acceptable values are defined in [[shortOrcCompressionCodecNames]].
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala
index 0f37cd7bf3652..12f948041a8ab 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala
@@ -146,6 +146,10 @@ abstract class OrcSuite extends QueryTest with TestHiveSingleton with BeforeAndA
 
     sql("DROP TABLE IF EXISTS orcNullValues")
   }
+
+  test("SPARK-18433: Improve DataSource option keys to be more case-insensitive") {
+    assert(new OrcOptions(Map("Orc.Compress" -> "NONE")).compressionCodec == "NONE")
+  }
 }
 
 class OrcSourceSuite extends OrcSuite {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
index 3644ff952eb0d..2ce60fe58921d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.execution.DataSourceScanExec
 import org.apache.spark.sql.execution.command.ExecutedCommandExec
 import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, InsertIntoDataSourceCommand, InsertIntoHadoopFsRelationCommand, LogicalRelation}
+import org.apache.spark.sql.execution.datasources.parquet.ParquetOptions
 import org.apache.spark.sql.hive.execution.HiveTableScanExec
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf

From 4567db9da47f0830e952614393d6105f4f5587a0 Mon Sep 17 00:00:00 2001
From: Liwei Lin <lwlin7@gmail.com>
Date: Wed, 16 Nov 2016 09:51:59 +0000
Subject: [PATCH 0123/1204] [DOC][MINOR] Kafka doc: breakup into lines

## Before

![before](https://cloud.githubusercontent.com/assets/15843379/20340231/99b039fe-ac1b-11e6-9ba9-b44582427459.png)

## After

![after](https://cloud.githubusercontent.com/assets/15843379/20340236/9d5796e2-ac1b-11e6-92bb-6da40ba1a383.png)

Author: Liwei Lin <lwlin7@gmail.com>

Closes #15903 from lw-lin/kafka-doc-lines.

(cherry picked from commit 3e01f128284993f39463c0ccd902b774f57cce76)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/structured-streaming-kafka-integration.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/structured-streaming-kafka-integration.md b/docs/structured-streaming-kafka-integration.md
index c4c9fb3f7d3db..2458bb5ffa298 100644
--- a/docs/structured-streaming-kafka-integration.md
+++ b/docs/structured-streaming-kafka-integration.md
@@ -240,6 +240,7 @@ Kafka's own configurations can be set via `DataStreamReader.option` with `kafka.
 [Kafka consumer config docs](http://kafka.apache.org/documentation.html#newconsumerconfigs).
 
 Note that the following Kafka params cannot be set and the Kafka source will throw an exception:
+
 - **group.id**: Kafka source will create a unique group id for each query automatically.
 - **auto.offset.reset**: Set the source option `startingOffsets` to specify
  where to start instead. Structured Streaming manages which offsets are consumed internally, rather 

From a94659ceeb339a93f72bad3ed059bd2cdfca4df9 Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Wed, 16 Nov 2016 10:16:36 +0000
Subject: [PATCH 0124/1204] [SPARK-18400][STREAMING] NPE when resharding
 Kinesis Stream

## What changes were proposed in this pull request?

Avoid NPE in KinesisRecordProcessor when shutdown happens without successful init

## How was this patch tested?

Existing tests

Author: Sean Owen <sowen@cloudera.com>

Closes #15882 from srowen/SPARK-18400.

(cherry picked from commit 43a26899e5dd2364297eaf8985bd68367e4735a7)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../kinesis/KinesisRecordProcessor.scala      | 42 ++++++++++---------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
index 80e0cce055862..a0ccd086d90fa 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
@@ -27,7 +27,6 @@ import com.amazonaws.services.kinesis.clientlibrary.types.ShutdownReason
 import com.amazonaws.services.kinesis.model.Record
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.streaming.Duration
 
 /**
  * Kinesis-specific implementation of the Kinesis Client Library (KCL) IRecordProcessor.
@@ -102,27 +101,32 @@ private[kinesis] class KinesisRecordProcessor[T](receiver: KinesisReceiver[T], w
    * @param checkpointer used to perform a Kinesis checkpoint for ShutdownReason.TERMINATE
    * @param reason for shutdown (ShutdownReason.TERMINATE or ShutdownReason.ZOMBIE)
    */
-  override def shutdown(checkpointer: IRecordProcessorCheckpointer, reason: ShutdownReason) {
+  override def shutdown(
+      checkpointer: IRecordProcessorCheckpointer,
+      reason: ShutdownReason): Unit = {
     logInfo(s"Shutdown:  Shutting down workerId $workerId with reason $reason")
-    reason match {
-      /*
-       * TERMINATE Use Case.  Checkpoint.
-       * Checkpoint to indicate that all records from the shard have been drained and processed.
-       * It's now OK to read from the new shards that resulted from a resharding event.
-       */
-      case ShutdownReason.TERMINATE =>
-        receiver.removeCheckpointer(shardId, checkpointer)
+    // null if not initialized before shutdown:
+    if (shardId == null) {
+      logWarning(s"No shardId for workerId $workerId?")
+    } else {
+      reason match {
+        /*
+         * TERMINATE Use Case.  Checkpoint.
+         * Checkpoint to indicate that all records from the shard have been drained and processed.
+         * It's now OK to read from the new shards that resulted from a resharding event.
+         */
+        case ShutdownReason.TERMINATE => receiver.removeCheckpointer(shardId, checkpointer)
 
-      /*
-       * ZOMBIE Use Case or Unknown reason.  NoOp.
-       * No checkpoint because other workers may have taken over and already started processing
-       *    the same records.
-       * This may lead to records being processed more than once.
-       */
-      case _ =>
-        receiver.removeCheckpointer(shardId, null) // return null so that we don't checkpoint
+        /*
+         * ZOMBIE Use Case or Unknown reason.  NoOp.
+         * No checkpoint because other workers may have taken over and already started processing
+         *    the same records.
+         * This may lead to records being processed more than once.
+         * Return null so that we don't checkpoint
+         */
+        case _ => receiver.removeCheckpointer(shardId, null)
+      }
     }
-
   }
 }
 

From 6b2301b89bf5a89bd2b8a3d85c9c05a490be2ddb Mon Sep 17 00:00:00 2001
From: uncleGen <hustyugm@gmail.com>
Date: Wed, 16 Nov 2016 10:19:10 +0000
Subject: [PATCH 0125/1204] [SPARK-18410][STREAMING] Add structured kafka
 example

## What changes were proposed in this pull request?

This PR provides structured kafka wordcount examples

## How was this patch tested?

Author: uncleGen <hustyugm@gmail.com>

Closes #15849 from uncleGen/SPARK-18410.

(cherry picked from commit e6145772eda8d6d3727605e80a7c2f182c801003)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../JavaStructuredKafkaWordCount.java         | 96 +++++++++++++++++++
 .../streaming/structured_kafka_wordcount.py   | 90 +++++++++++++++++
 .../streaming/StructuredKafkaWordCount.scala  | 85 ++++++++++++++++
 3 files changed, 271 insertions(+)
 create mode 100644 examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredKafkaWordCount.java
 create mode 100644 examples/src/main/python/sql/streaming/structured_kafka_wordcount.py
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredKafkaWordCount.scala

diff --git a/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredKafkaWordCount.java b/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredKafkaWordCount.java
new file mode 100644
index 0000000000000..0f45cfeca4429
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredKafkaWordCount.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.sql.streaming;
+
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.streaming.StreamingQuery;
+
+import java.util.Arrays;
+import java.util.Iterator;
+
+/**
+ * Consumes messages from one or more topics in Kafka and does wordcount.
+ * Usage: JavaStructuredKafkaWordCount <bootstrap-servers> <subscribe-type> <topics>
+ *   <bootstrap-servers> The Kafka "bootstrap.servers" configuration. A
+ *   comma-separated list of host:port.
+ *   <subscribe-type> There are three kinds of type, i.e. 'assign', 'subscribe',
+ *   'subscribePattern'.
+ *   |- <assign> Specific TopicPartitions to consume. Json string
+ *   |  {"topicA":[0,1],"topicB":[2,4]}.
+ *   |- <subscribe> The topic list to subscribe. A comma-separated list of
+ *   |  topics.
+ *   |- <subscribePattern> The pattern used to subscribe to topic(s).
+ *   |  Java regex string.
+ *   |- Only one of "assign, "subscribe" or "subscribePattern" options can be
+ *   |  specified for Kafka source.
+ *   <topics> Different value format depends on the value of 'subscribe-type'.
+ *
+ * Example:
+ *    `$ bin/run-example \
+ *      sql.streaming.JavaStructuredKafkaWordCount host1:port1,host2:port2 \
+ *      subscribe topic1,topic2`
+ */
+public final class JavaStructuredKafkaWordCount {
+
+  public static void main(String[] args) throws Exception {
+    if (args.length < 3) {
+      System.err.println("Usage: JavaStructuredKafkaWordCount <bootstrap-servers> " +
+        "<subscribe-type> <topics>");
+      System.exit(1);
+    }
+
+    String bootstrapServers = args[0];
+    String subscribeType = args[1];
+    String topics = args[2];
+
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaStructuredKafkaWordCount")
+      .getOrCreate();
+
+    // Create DataSet representing the stream of input lines from kafka
+    Dataset<String> lines = spark
+      .readStream()
+      .format("kafka")
+      .option("kafka.bootstrap.servers", bootstrapServers)
+      .option(subscribeType, topics)
+      .load()
+      .selectExpr("CAST(value AS STRING)")
+      .as(Encoders.STRING());
+
+    // Generate running word count
+    Dataset<Row> wordCounts = lines.flatMap(new FlatMapFunction<String, String>() {
+      @Override
+      public Iterator<String> call(String x) {
+        return Arrays.asList(x.split(" ")).iterator();
+      }
+    }, Encoders.STRING()).groupBy("value").count();
+
+    // Start running the query that prints the running counts to the console
+    StreamingQuery query = wordCounts.writeStream()
+      .outputMode("complete")
+      .format("console")
+      .start();
+
+    query.awaitTermination();
+  }
+}
diff --git a/examples/src/main/python/sql/streaming/structured_kafka_wordcount.py b/examples/src/main/python/sql/streaming/structured_kafka_wordcount.py
new file mode 100644
index 0000000000000..9e8a552b3b10b
--- /dev/null
+++ b/examples/src/main/python/sql/streaming/structured_kafka_wordcount.py
@@ -0,0 +1,90 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+ Consumes messages from one or more topics in Kafka and does wordcount.
+ Usage: structured_kafka_wordcount.py <bootstrap-servers> <subscribe-type> <topics>
+   <bootstrap-servers> The Kafka "bootstrap.servers" configuration. A
+   comma-separated list of host:port.
+   <subscribe-type> There are three kinds of type, i.e. 'assign', 'subscribe',
+   'subscribePattern'.
+   |- <assign> Specific TopicPartitions to consume. Json string
+   |  {"topicA":[0,1],"topicB":[2,4]}.
+   |- <subscribe> The topic list to subscribe. A comma-separated list of
+   |  topics.
+   |- <subscribePattern> The pattern used to subscribe to topic(s).
+   |  Java regex string.
+   |- Only one of "assign, "subscribe" or "subscribePattern" options can be
+   |  specified for Kafka source.
+   <topics> Different value format depends on the value of 'subscribe-type'.
+
+ Run the example
+    `$ bin/spark-submit examples/src/main/python/sql/streaming/structured_kafka_wordcount.py \
+    host1:port1,host2:port2 subscribe topic1,topic2`
+"""
+from __future__ import print_function
+
+import sys
+
+from pyspark.sql import SparkSession
+from pyspark.sql.functions import explode
+from pyspark.sql.functions import split
+
+if __name__ == "__main__":
+    if len(sys.argv) != 4:
+        print("""
+        Usage: structured_kafka_wordcount.py <bootstrap-servers> <subscribe-type> <topics>
+        """, file=sys.stderr)
+        exit(-1)
+
+    bootstrapServers = sys.argv[1]
+    subscribeType = sys.argv[2]
+    topics = sys.argv[3]
+
+    spark = SparkSession\
+        .builder\
+        .appName("StructuredKafkaWordCount")\
+        .getOrCreate()
+
+    # Create DataSet representing the stream of input lines from kafka
+    lines = spark\
+        .readStream\
+        .format("kafka")\
+        .option("kafka.bootstrap.servers", bootstrapServers)\
+        .option(subscribeType, topics)\
+        .load()\
+        .selectExpr("CAST(value AS STRING)")
+
+    # Split the lines into words
+    words = lines.select(
+        # explode turns each item in an array into a separate row
+        explode(
+            split(lines.value, ' ')
+        ).alias('word')
+    )
+
+    # Generate running word count
+    wordCounts = words.groupBy('word').count()
+
+    # Start running the query that prints the running counts to the console
+    query = wordCounts\
+        .writeStream\
+        .outputMode('complete')\
+        .format('console')\
+        .start()
+
+    query.awaitTermination()
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredKafkaWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredKafkaWordCount.scala
new file mode 100644
index 0000000000000..c26f73e788814
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredKafkaWordCount.scala
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.sql.streaming
+
+import org.apache.spark.sql.SparkSession
+
+/**
+ * Consumes messages from one or more topics in Kafka and does wordcount.
+ * Usage: StructuredKafkaWordCount <bootstrap-servers> <subscribe-type> <topics>
+ *   <bootstrap-servers> The Kafka "bootstrap.servers" configuration. A
+ *   comma-separated list of host:port.
+ *   <subscribe-type> There are three kinds of type, i.e. 'assign', 'subscribe',
+ *   'subscribePattern'.
+ *   |- <assign> Specific TopicPartitions to consume. Json string
+ *   |  {"topicA":[0,1],"topicB":[2,4]}.
+ *   |- <subscribe> The topic list to subscribe. A comma-separated list of
+ *   |  topics.
+ *   |- <subscribePattern> The pattern used to subscribe to topic(s).
+ *   |  Java regex string.
+ *   |- Only one of "assign, "subscribe" or "subscribePattern" options can be
+ *   |  specified for Kafka source.
+ *   <topics> Different value format depends on the value of 'subscribe-type'.
+ *
+ * Example:
+ *    `$ bin/run-example \
+ *      sql.streaming.StructuredKafkaWordCount host1:port1,host2:port2 \
+ *      subscribe topic1,topic2`
+ */
+object StructuredKafkaWordCount {
+  def main(args: Array[String]): Unit = {
+    if (args.length < 3) {
+      System.err.println("Usage: StructuredKafkaWordCount <bootstrap-servers> " +
+        "<subscribe-type> <topics>")
+      System.exit(1)
+    }
+
+    val Array(bootstrapServers, subscribeType, topics) = args
+
+    val spark = SparkSession
+      .builder
+      .appName("StructuredKafkaWordCount")
+      .getOrCreate()
+
+    import spark.implicits._
+
+    // Create DataSet representing the stream of input lines from kafka
+    val lines = spark
+      .readStream
+      .format("kafka")
+      .option("kafka.bootstrap.servers", bootstrapServers)
+      .option(subscribeType, topics)
+      .load()
+      .selectExpr("CAST(value AS STRING)")
+      .as[String]
+
+    // Generate running word count
+    val wordCounts = lines.flatMap(_.split(" ")).groupBy("value").count()
+
+    // Start running the query that prints the running counts to the console
+    val query = wordCounts.writeStream
+      .outputMode("complete")
+      .format("console")
+      .start()
+
+    query.awaitTermination()
+  }
+
+}
+// scalastyle:on println

From 8208470084153f0be6818f66309f63dcdcb16519 Mon Sep 17 00:00:00 2001
From: Weiqing Yang <yangweiqing001@gmail.com>
Date: Wed, 16 Nov 2016 10:34:56 +0000
Subject: [PATCH 0126/1204] [MINOR][DOC] Fix typos in the 'configuration',
 'monitoring' and 'sql-programming-guide' documentation

## What changes were proposed in this pull request?

Fix typos in the 'configuration', 'monitoring' and 'sql-programming-guide' documentation.

## How was this patch tested?
Manually.

Author: Weiqing Yang <yangweiqing001@gmail.com>

Closes #15886 from weiqingy/fixTypo.

(cherry picked from commit 241e04bc03efb1379622c0c84299e617512973ac)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/configuration.md         | 2 +-
 docs/monitoring.md            | 2 +-
 docs/sql-programming-guide.md | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index d0acd944dd6b9..e0c661349caab 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1916,7 +1916,7 @@ showDF(properties, numRows = 200, truncate = FALSE)
   <td><code>spark.r.heartBeatInterval</code></td>
   <td>100</td>
   <td>
-    Interval for heartbeats sents from SparkR backend to R process to prevent connection timeout.
+    Interval for heartbeats sent from SparkR backend to R process to prevent connection timeout.
   </td>
 </tr>
 
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 5bc5e18c4d45f..2eef4568d00e9 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -41,7 +41,7 @@ directory must be supplied in the `spark.history.fs.logDirectory` configuration
 and should contain sub-directories that each represents an application's event logs.
 
 The spark jobs themselves must be configured to log events, and to log them to the same shared,
-writeable directory. For example, if the server was configured with a log directory of
+writable directory. For example, if the server was configured with a log directory of
 `hdfs://namenode/shared/spark-logs`, then the client-side options would be:
 
 ```
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index b9be7a7545ef8..ba3e55fc061a7 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -222,9 +222,9 @@ The `sql` function enables applications to run SQL queries programmatically and
 
 ## Global Temporary View
 
-Temporay views in Spark SQL are session-scoped and will disappear if the session that creates it
+Temporary views in Spark SQL are session-scoped and will disappear if the session that creates it
 terminates. If you want to have a temporary view that is shared among all sessions and keep alive
-until the Spark application terminiates, you can create a global temporary view. Global temporary
+until the Spark application terminates, you can create a global temporary view. Global temporary
 view is tied to a system preserved database `global_temp`, and we must use the qualified name to
 refer it, e.g. `SELECT * FROM global_temp.view1`.
 
@@ -1029,7 +1029,7 @@ following command:
 bin/spark-shell --driver-class-path postgresql-9.4.1207.jar --jars postgresql-9.4.1207.jar
 {% endhighlight %}
 
-Tables from the remote database can be loaded as a DataFrame or Spark SQL Temporary table using
+Tables from the remote database can be loaded as a DataFrame or Spark SQL temporary view using
 the Data Sources API. Users can specify the JDBC connection properties in the data source options.
 <code>user</code> and <code>password</code> are normally provided as connection properties for
 logging into the data sources. In addition to the connection properties, Spark also supports

From 6b6eb4e520d07a27aa68d3450f3c7613b233d928 Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Wed, 16 Nov 2016 02:46:27 -0800
Subject: [PATCH 0127/1204] [SPARK-18434][ML] Add missing ParamValidations for
 ML algos

## What changes were proposed in this pull request?
Add missing ParamValidations for ML algos
## How was this patch tested?
existing tests

Author: Zheng RuiFeng <ruifengz@foxmail.com>

Closes #15881 from zhengruifeng/arg_checking.

(cherry picked from commit c68f1a38af67957ee28889667193da8f64bb4342)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 .../scala/org/apache/spark/ml/feature/IDF.scala     |  3 ++-
 .../scala/org/apache/spark/ml/feature/PCA.scala     |  3 ++-
 .../org/apache/spark/ml/feature/Word2Vec.scala      | 13 ++++++++-----
 .../spark/ml/regression/IsotonicRegression.scala    |  3 ++-
 .../spark/ml/regression/LinearRegression.scala      |  6 +++++-
 .../scala/org/apache/spark/ml/tree/treeParams.scala |  4 +++-
 6 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
index 6386dd8a10801..46a0730f5ddb8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
@@ -44,7 +44,8 @@ private[feature] trait IDFBase extends Params with HasInputCol with HasOutputCol
    * @group param
    */
   final val minDocFreq = new IntParam(
-    this, "minDocFreq", "minimum number of documents in which a term should appear for filtering")
+    this, "minDocFreq", "minimum number of documents in which a term should appear for filtering" +
+      " (>= 0)", ParamValidators.gtEq(0))
 
   setDefault(minDocFreq -> 0)
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
index 6b913480fdc28..444006fe1edb6 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
@@ -44,7 +44,8 @@ private[feature] trait PCAParams extends Params with HasInputCol with HasOutputC
    * The number of principal components.
    * @group param
    */
-  final val k: IntParam = new IntParam(this, "k", "the number of principal components")
+  final val k: IntParam = new IntParam(this, "k", "the number of principal components (> 0)",
+    ParamValidators.gt(0))
 
   /** @group getParam */
   def getK: Int = $(k)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
index d53f3df514dff..3ed08c983d561 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
@@ -43,7 +43,8 @@ private[feature] trait Word2VecBase extends Params
    * @group param
    */
   final val vectorSize = new IntParam(
-    this, "vectorSize", "the dimension of codes after transforming from words")
+    this, "vectorSize", "the dimension of codes after transforming from words (> 0)",
+    ParamValidators.gt(0))
   setDefault(vectorSize -> 100)
 
   /** @group getParam */
@@ -55,7 +56,8 @@ private[feature] trait Word2VecBase extends Params
    * @group expertParam
    */
   final val windowSize = new IntParam(
-    this, "windowSize", "the window size (context words from [-window, window])")
+    this, "windowSize", "the window size (context words from [-window, window]) (> 0)",
+    ParamValidators.gt(0))
   setDefault(windowSize -> 5)
 
   /** @group expertGetParam */
@@ -67,7 +69,8 @@ private[feature] trait Word2VecBase extends Params
    * @group param
    */
   final val numPartitions = new IntParam(
-    this, "numPartitions", "number of partitions for sentences of words")
+    this, "numPartitions", "number of partitions for sentences of words (> 0)",
+    ParamValidators.gt(0))
   setDefault(numPartitions -> 1)
 
   /** @group getParam */
@@ -80,7 +83,7 @@ private[feature] trait Word2VecBase extends Params
    * @group param
    */
   final val minCount = new IntParam(this, "minCount", "the minimum number of times a token must " +
-    "appear to be included in the word2vec model's vocabulary")
+    "appear to be included in the word2vec model's vocabulary (>= 0)", ParamValidators.gtEq(0))
   setDefault(minCount -> 5)
 
   /** @group getParam */
@@ -95,7 +98,7 @@ private[feature] trait Word2VecBase extends Params
    */
   final val maxSentenceLength = new IntParam(this, "maxSentenceLength", "Maximum length " +
     "(in words) of each sentence in the input data. Any sentence longer than this threshold will " +
-    "be divided into chunks up to the size.")
+    "be divided into chunks up to the size (> 0)", ParamValidators.gt(0))
   setDefault(maxSentenceLength -> 1000)
 
   /** @group getParam */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
index cd7b4f2a9c56e..4d274f3a5bbf1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
@@ -61,7 +61,8 @@ private[regression] trait IsotonicRegressionBase extends Params with HasFeatures
    * @group param
    */
   final val featureIndex: IntParam = new IntParam(this, "featureIndex",
-    "The index of the feature if featuresCol is a vector column, no effect otherwise.")
+    "The index of the feature if featuresCol is a vector column, no effect otherwise (>= 0)",
+    ParamValidators.gtEq(0))
 
   /** @group getParam */
   final def getFeatureIndex: Int = $(featureIndex)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 9639b07496c13..71c542adf6f6f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -171,7 +171,11 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
    * @group setParam
    */
   @Since("1.6.0")
-  def setSolver(value: String): this.type = set(solver, value)
+  def setSolver(value: String): this.type = {
+    require(Set("auto", "l-bfgs", "normal").contains(value),
+      s"Solver $value was not supported. Supported options: auto, l-bfgs, normal")
+    set(solver, value)
+  }
   setDefault(solver -> "auto")
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
index 57c7e44e97607..5a551533be9ca 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
@@ -73,11 +73,13 @@ private[ml] trait DecisionTreeParams extends PredictorParams
 
   /**
    * Minimum information gain for a split to be considered at a tree node.
+   * Should be >= 0.0.
    * (default = 0.0)
    * @group param
    */
   final val minInfoGain: DoubleParam = new DoubleParam(this, "minInfoGain",
-    "Minimum information gain for a split to be considered at a tree node.")
+    "Minimum information gain for a split to be considered at a tree node.",
+    ParamValidators.gtEq(0.0))
 
   /**
    * Maximum memory in MB allocated to histogram aggregation. If too small, then 1 node will be

From 416bc3dd3db7f7ae2cc7b3ffe395decd0c5b73f9 Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Wed, 16 Nov 2016 10:53:23 +0000
Subject: [PATCH 0128/1204] [SPARK-18446][ML][DOCS] Add links to API docs for
 ML algos

## What changes were proposed in this pull request?
Add links to API docs for ML algos
## How was this patch tested?
Manual checking for the API links

Author: Zheng RuiFeng <ruifengz@foxmail.com>

Closes #15890 from zhengruifeng/algo_link.

(cherry picked from commit a75e3fe923372c56bc1b2f4baeaaf5868ad28341)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/ml-classification-regression.md | 39 ++++++++++++++++++++++++++++
 docs/ml-pipeline.md                  | 25 ++++++++++++++++++
 docs/ml-tuning.md                    | 17 ++++++++++++
 3 files changed, 81 insertions(+)

diff --git a/docs/ml-classification-regression.md b/docs/ml-classification-regression.md
index bb2e404330cc0..cb2ccbf4fe157 100644
--- a/docs/ml-classification-regression.md
+++ b/docs/ml-classification-regression.md
@@ -55,14 +55,23 @@ $\alpha$ and `regParam` corresponds to $\lambda$.
 <div class="codetabs">
 
 <div data-lang="scala" markdown="1">
+
+More details on parameters can be found in the [Scala API documentation](api/scala/index.html#org.apache.spark.ml.classification.LogisticRegression).
+
 {% include_example scala/org/apache/spark/examples/ml/LogisticRegressionWithElasticNetExample.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
+
+More details on parameters can be found in the [Java API documentation](api/java/org/apache/spark/ml/classification/LogisticRegression.html).
+
 {% include_example java/org/apache/spark/examples/ml/JavaLogisticRegressionWithElasticNetExample.java %}
 </div>
 
 <div data-lang="python" markdown="1">
+
+More details on parameters can be found in the [Python API documentation](api/python/pyspark.ml.html#pyspark.ml.classification.LogisticRegression).
+
 {% include_example python/ml/logistic_regression_with_elastic_net.py %}
 </div>
 
@@ -289,14 +298,23 @@ MLPC employs backpropagation for learning the model. We use the logistic loss fu
 <div class="codetabs">
 
 <div data-lang="scala" markdown="1">
+
+Refer to the [Scala API docs](api/scala/index.html#org.apache.spark.ml.classification.MultilayerPerceptronClassifier) for more details.
+
 {% include_example scala/org/apache/spark/examples/ml/MultilayerPerceptronClassifierExample.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
+
+Refer to the [Java API docs](api/java/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.html) for more details.
+
 {% include_example java/org/apache/spark/examples/ml/JavaMultilayerPerceptronClassifierExample.java %}
 </div>
 
 <div data-lang="python" markdown="1">
+
+Refer to the [Python API docs](api/python/pyspark.ml.html#pyspark.ml.classification.MultilayerPerceptronClassifier) for more details.
+
 {% include_example python/ml/multilayer_perceptron_classification.py %}
 </div>
 
@@ -392,15 +410,24 @@ regression model and extracting model summary statistics.
 <div class="codetabs">
 
 <div data-lang="scala" markdown="1">
+
+More details on parameters can be found in the [Scala API documentation](api/scala/index.html#org.apache.spark.ml.regression.LinearRegression).
+
 {% include_example scala/org/apache/spark/examples/ml/LinearRegressionWithElasticNetExample.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
+
+More details on parameters can be found in the [Java API documentation](api/java/org/apache/spark/ml/regression/LinearRegression.html).
+
 {% include_example java/org/apache/spark/examples/ml/JavaLinearRegressionWithElasticNetExample.java %}
 </div>
 
 <div data-lang="python" markdown="1">
 <!--- TODO: Add python model summaries once implemented -->
+
+More details on parameters can be found in the [Python API documentation](api/python/pyspark.ml.html#pyspark.ml.regression.LinearRegression).
+
 {% include_example python/ml/linear_regression_with_elastic_net.py %}
 </div>
 
@@ -519,18 +546,21 @@ function and extracting model summary statistics.
 <div class="codetabs">
 
 <div data-lang="scala" markdown="1">
+
 Refer to the [Scala API docs](api/scala/index.html#org.apache.spark.ml.regression.GeneralizedLinearRegression) for more details.
 
 {% include_example scala/org/apache/spark/examples/ml/GeneralizedLinearRegressionExample.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
+
 Refer to the [Java API docs](api/java/org/apache/spark/ml/regression/GeneralizedLinearRegression.html) for more details.
 
 {% include_example java/org/apache/spark/examples/ml/JavaGeneralizedLinearRegressionExample.java %}
 </div>
 
 <div data-lang="python" markdown="1">
+
 Refer to the [Python API docs](api/python/pyspark.ml.html#pyspark.ml.regression.GeneralizedLinearRegression) for more details.
 
 {% include_example python/ml/generalized_linear_regression_example.py %}
@@ -705,14 +735,23 @@ The implementation matches the result from R's survival function
 <div class="codetabs">
 
 <div data-lang="scala" markdown="1">
+
+Refer to the [Scala API docs](api/scala/index.html#org.apache.spark.ml.regression.AFTSurvivalRegression) for more details.
+
 {% include_example scala/org/apache/spark/examples/ml/AFTSurvivalRegressionExample.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
+
+Refer to the [Java API docs](api/java/org/apache/spark/ml/regression/AFTSurvivalRegression.html) for more details.
+
 {% include_example java/org/apache/spark/examples/ml/JavaAFTSurvivalRegressionExample.java %}
 </div>
 
 <div data-lang="python" markdown="1">
+
+Refer to the [Python API docs](api/python/pyspark.ml.html#pyspark.ml.regression.AFTSurvivalRegression) for more details.
+
 {% include_example python/ml/aft_survival_regression.py %}
 </div>
 
diff --git a/docs/ml-pipeline.md b/docs/ml-pipeline.md
index adb057ba7e250..b4d6be94f5eb0 100644
--- a/docs/ml-pipeline.md
+++ b/docs/ml-pipeline.md
@@ -207,14 +207,29 @@ This example covers the concepts of `Estimator`, `Transformer`, and `Param`.
 <div class="codetabs">
 
 <div data-lang="scala">
+
+Refer to the [`Estimator` Scala docs](api/scala/index.html#org.apache.spark.ml.Estimator),
+the [`Transformer` Scala docs](api/scala/index.html#org.apache.spark.ml.Transformer) and
+the [`Params` Scala docs](api/scala/index.html#org.apache.spark.ml.param.Params) for details on the API.
+
 {% include_example scala/org/apache/spark/examples/ml/EstimatorTransformerParamExample.scala %}
 </div>
 
 <div data-lang="java">
+
+Refer to the [`Estimator` Java docs](api/java/org/apache/spark/ml/Estimator.html),
+the [`Transformer` Java docs](api/java/org/apache/spark/ml/Transformer.html) and
+the [`Params` Java docs](api/java/org/apache/spark/ml/param/Params.html) for details on the API.
+
 {% include_example java/org/apache/spark/examples/ml/JavaEstimatorTransformerParamExample.java %}
 </div>
 
 <div data-lang="python">
+
+Refer to the [`Estimator` Python docs](api/python/pyspark.ml.html#pyspark.ml.Estimator),
+the [`Transformer` Python docs](api/python/pyspark.ml.html#pyspark.ml.Transformer) and
+the [`Params` Python docs](api/python/pyspark.ml.html#pyspark.ml.param.Params) for more details on the API.
+
 {% include_example python/ml/estimator_transformer_param_example.py %}
 </div>
 
@@ -227,14 +242,24 @@ This example follows the simple text document `Pipeline` illustrated in the figu
 <div class="codetabs">
 
 <div data-lang="scala">
+
+Refer to the [`Pipeline` Scala docs](api/scala/index.html#org.apache.spark.ml.Pipeline) for details on the API.
+
 {% include_example scala/org/apache/spark/examples/ml/PipelineExample.scala %}
 </div>
 
 <div data-lang="java">
+
+
+Refer to the [`Pipeline` Java docs](api/java/org/apache/spark/ml/Pipeline.html) for details on the API.
+
 {% include_example java/org/apache/spark/examples/ml/JavaPipelineExample.java %}
 </div>
 
 <div data-lang="python">
+
+Refer to the [`Pipeline` Python docs](api/python/pyspark.ml.html#pyspark.ml.Pipeline) for more details on the API.
+
 {% include_example python/ml/pipeline_example.py %}
 </div>
 
diff --git a/docs/ml-tuning.md b/docs/ml-tuning.md
index 2ca90c7092fd3..15748720b7ae2 100644
--- a/docs/ml-tuning.md
+++ b/docs/ml-tuning.md
@@ -75,15 +75,23 @@ However, it is also a well-established method for choosing parameters which is m
 <div class="codetabs">
 
 <div data-lang="scala">
+
+Refer to the [`CrossValidator` Scala docs](api/scala/index.html#org.apache.spark.ml.tuning.CrossValidator) for details on the API.
+
 {% include_example scala/org/apache/spark/examples/ml/ModelSelectionViaCrossValidationExample.scala %}
 </div>
 
 <div data-lang="java">
+
+Refer to the [`CrossValidator` Java docs](api/java/org/apache/spark/ml/tuning/CrossValidator.html) for details on the API.
+
 {% include_example java/org/apache/spark/examples/ml/JavaModelSelectionViaCrossValidationExample.java %}
 </div>
 
 <div data-lang="python">
 
+Refer to the [`CrossValidator` Python docs](api/python/pyspark.ml.html#pyspark.ml.tuning.CrossValidator) for more details on the API.
+
 {% include_example python/ml/cross_validator.py %}
 </div>
 
@@ -107,14 +115,23 @@ Like `CrossValidator`, `TrainValidationSplit` finally fits the `Estimator` using
 <div class="codetabs">
 
 <div data-lang="scala" markdown="1">
+
+Refer to the [`TrainValidationSplit` Scala docs](api/scala/index.html#org.apache.spark.ml.tuning.TrainValidationSplit) for details on the API.
+
 {% include_example scala/org/apache/spark/examples/ml/ModelSelectionViaTrainValidationSplitExample.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
+
+Refer to the [`TrainValidationSplit` Java docs](api/java/org/apache/spark/ml/tuning/TrainValidationSplit.html) for details on the API.
+
 {% include_example java/org/apache/spark/examples/ml/JavaModelSelectionViaTrainValidationSplitExample.java %}
 </div>
 
 <div data-lang="python">
+
+Refer to the [`TrainValidationSplit` Python docs](api/python/pyspark.ml.html#pyspark.ml.tuning.TrainValidationSplit) for more details on the API.
+
 {% include_example python/ml/train_validation_split.py %}
 </div>
 

From b0ae8712358fc8c07aa5efe4d0bd337e7e452078 Mon Sep 17 00:00:00 2001
From: Xianyang Liu <xyliu0530@icloud.com>
Date: Wed, 16 Nov 2016 11:59:00 +0000
Subject: [PATCH 0129/1204] [SPARK-18420][BUILD] Fix the errors caused by lint
 check in Java

Small fix, fix the errors caused by lint check in Java

- Clear unused objects and `UnusedImports`.
- Add comments around the method `finalize` of `NioBufferedFileInputStream`to turn off checkstyle.
- Cut the line which is longer than 100 characters into two lines.

Travis CI.
```
$ build/mvn -T 4 -q -DskipTests -Pyarn -Phadoop-2.3 -Pkinesis-asl -Phive -Phive-thriftserver install
$ dev/lint-java
```
Before:
```
Checkstyle checks failed at following occurrences:
[ERROR] src/main/java/org/apache/spark/network/util/TransportConf.java:[21,8] (imports) UnusedImports: Unused import - org.apache.commons.crypto.cipher.CryptoCipherFactory.
[ERROR] src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java:[516,5] (modifier) RedundantModifier: Redundant 'public' modifier.
[ERROR] src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java:[133] (coding) NoFinalizer: Avoid using finalizer method.
[ERROR] src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeMapData.java:[71] (sizes) LineLength: Line is longer than 100 characters (found 113).
[ERROR] src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java:[112] (sizes) LineLength: Line is longer than 100 characters (found 110).
[ERROR] src/test/java/org/apache/spark/sql/catalyst/expressions/HiveHasherSuite.java:[31,17] (modifier) ModifierOrder: 'static' modifier out of order with the JLS suggestions.
[ERROR]src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionWithElasticNetExample.java:[64] (sizes) LineLength: Line is longer than 100 characters (found 103).
[ERROR] src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java:[22,8] (imports) UnusedImports: Unused import - org.apache.spark.ml.linalg.Vectors.
[ERROR] src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java:[51] (regexp) RegexpSingleline: No trailing whitespace allowed.
```

After:
```
$ build/mvn -T 4 -q -DskipTests -Pyarn -Phadoop-2.3 -Pkinesis-asl -Phive -Phive-thriftserver install
$ dev/lint-java
Using `mvn` from path: /home/travis/build/ConeyLiu/spark/build/apache-maven-3.3.9/bin/mvn
Checkstyle checks passed.
```

Author: Xianyang Liu <xyliu0530@icloud.com>

Closes #15865 from ConeyLiu/master.

(cherry picked from commit 7569cf6cb85bda7d0e76d3e75e286d4796e77e08)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../spark/io/NioBufferedFileInputStream.java      |  2 ++
 dev/checkstyle.xml                                | 15 +++++++++++++++
 .../spark/examples/ml/JavaInteractionExample.java |  3 +--
 ...vaLogisticRegressionWithElasticNetExample.java |  4 ++--
 .../sql/catalyst/expressions/UnsafeArrayData.java |  3 ++-
 .../sql/catalyst/expressions/UnsafeMapData.java   |  3 ++-
 .../sql/catalyst/expressions/HiveHasherSuite.java |  1 -
 7 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java b/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java
index f6d1288cb263d..ea5f1a9abf69b 100644
--- a/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java
+++ b/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java
@@ -130,8 +130,10 @@ public synchronized void close() throws IOException {
     StorageUtils.dispose(byteBuffer);
   }
 
+  //checkstyle.off: NoFinalizer
   @Override
   protected void finalize() throws IOException {
     close();
   }
+  //checkstyle.on: NoFinalizer
 }
diff --git a/dev/checkstyle.xml b/dev/checkstyle.xml
index 3de6aa91dcd51..92c5251c85037 100644
--- a/dev/checkstyle.xml
+++ b/dev/checkstyle.xml
@@ -52,6 +52,20 @@
       <property name="file" value="dev/checkstyle-suppressions.xml"/>
     </module>
 
+    <!--
+    If you wish to turn off checking for a section of code, you can put a comment in the source
+    before and after the section, with the following syntax:
+
+      // checkstyle:off no.XXX (such as checkstyle.off: NoFinalizer)
+      ...  // stuff that breaks the styles
+      // checkstyle:on
+    -->
+    <module name="SuppressionCommentFilter">
+        <property name="offCommentFormat" value="checkstyle.off\: ([\w\|]+)"/>
+        <property name="onCommentFormat" value="checkstyle.on\: ([\w\|]+)"/>
+        <property name="checkFormat" value="$1"/>
+    </module>
+
     <!-- Checks for whitespace                               -->
     <!-- See http://checkstyle.sf.net/config_whitespace.html -->
     <module name="FileTabCharacter">
@@ -168,5 +182,6 @@
         <module name="UnusedImports"/>
         <module name="RedundantImport"/>
         <module name="RedundantModifier"/>
+        <module name="FileContentsHolder"/>
     </module>
 </module>
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java
index 4213c05703cc6..3684a87e22e7b 100644
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaInteractionExample.java
@@ -19,7 +19,6 @@
 
 import org.apache.spark.ml.feature.Interaction;
 import org.apache.spark.ml.feature.VectorAssembler;
-import org.apache.spark.ml.linalg.Vectors;
 import org.apache.spark.sql.*;
 import org.apache.spark.sql.types.DataTypes;
 import org.apache.spark.sql.types.Metadata;
@@ -48,7 +47,7 @@ public static void main(String[] args) {
       RowFactory.create(5, 9, 2, 7, 10, 7, 3),
       RowFactory.create(6, 1, 1, 4, 2, 8, 4)
     );
-    
+
     StructType schema = new StructType(new StructField[]{
       new StructField("id1", DataTypes.IntegerType, false, Metadata.empty()),
       new StructField("id2", DataTypes.IntegerType, false, Metadata.empty()),
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionWithElasticNetExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionWithElasticNetExample.java
index b8fb5972ea418..4cdec21d23023 100644
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionWithElasticNetExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaLogisticRegressionWithElasticNetExample.java
@@ -60,8 +60,8 @@ public static void main(String[] args) {
     LogisticRegressionModel mlrModel = mlr.fit(training);
 
     // Print the coefficients and intercepts for logistic regression with multinomial family
-    System.out.println("Multinomial coefficients: "
-            + lrModel.coefficientMatrix() + "\nMultinomial intercepts: " + mlrModel.interceptVector());
+    System.out.println("Multinomial coefficients: " + lrModel.coefficientMatrix()
+      + "\nMultinomial intercepts: " + mlrModel.interceptVector());
     // $example off$
 
     spark.stop();
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
index 86523c1474015..e8c33871f97bc 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
@@ -109,7 +109,8 @@ public void pointTo(Object baseObject, long baseOffset, int sizeInBytes) {
     // Read the number of elements from the first 8 bytes.
     final long numElements = Platform.getLong(baseObject, baseOffset);
     assert numElements >= 0 : "numElements (" + numElements + ") should >= 0";
-    assert numElements <= Integer.MAX_VALUE : "numElements (" + numElements + ") should <= Integer.MAX_VALUE";
+    assert numElements <= Integer.MAX_VALUE :
+      "numElements (" + numElements + ") should <= Integer.MAX_VALUE";
 
     this.numElements = (int)numElements;
     this.baseObject = baseObject;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeMapData.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeMapData.java
index 35029f5a50e3e..f17441dfccb6d 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeMapData.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeMapData.java
@@ -68,7 +68,8 @@ public void pointTo(Object baseObject, long baseOffset, int sizeInBytes) {
     // Read the numBytes of key array from the first 8 bytes.
     final long keyArraySize = Platform.getLong(baseObject, baseOffset);
     assert keyArraySize >= 0 : "keyArraySize (" + keyArraySize + ") should >= 0";
-    assert keyArraySize <= Integer.MAX_VALUE : "keyArraySize (" + keyArraySize + ") should <= Integer.MAX_VALUE";
+    assert keyArraySize <= Integer.MAX_VALUE :
+      "keyArraySize (" + keyArraySize + ") should <= Integer.MAX_VALUE";
     final int valueArraySize = sizeInBytes - (int)keyArraySize - 8;
     assert valueArraySize >= 0 : "valueArraySize (" + valueArraySize + ") should >= 0";
 
diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/HiveHasherSuite.java b/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/HiveHasherSuite.java
index 67a5eb0c7fe8f..b67c6f3e6e85e 100644
--- a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/HiveHasherSuite.java
+++ b/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/HiveHasherSuite.java
@@ -28,7 +28,6 @@
 import java.util.Set;
 
 public class HiveHasherSuite {
-  private final static HiveHasher hasher = new HiveHasher();
 
   @Test
   public void testKnownIntegerInputs() {

From c0dbe08d604dea543eb17ccb802a8a20d6c21a69 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Wed, 16 Nov 2016 08:25:15 -0800
Subject: [PATCH 0130/1204] [SPARK-18415][SQL] Weird Plan Output when CTE used
 in RunnableCommand

### What changes were proposed in this pull request?
Currently, when CTE is used in RunnableCommand, the Analyzer does not replace the logical node `With`. The child plan of RunnableCommand is not resolved. Thus, the output of the `With` plan node looks very confusing.
For example,
```
sql(
  """
    |CREATE VIEW cte_view AS
    |WITH w AS (SELECT 1 AS n), cte1 (select 2), cte2 as (select 3)
    |SELECT n FROM w
  """.stripMargin).explain()
```
The output is like
```
ExecutedCommand
   +- CreateViewCommand `cte_view`, WITH w AS (SELECT 1 AS n), cte1 (select 2), cte2 as (select 3)
SELECT n FROM w, false, false, PersistedView
         +- 'With [(w,SubqueryAlias w
+- Project [1 AS n#16]
   +- OneRowRelation$
), (cte1,'SubqueryAlias cte1
+- 'Project [unresolvedalias(2, None)]
   +- OneRowRelation$
), (cte2,'SubqueryAlias cte2
+- 'Project [unresolvedalias(3, None)]
   +- OneRowRelation$
)]
            +- 'Project ['n]
               +- 'UnresolvedRelation `w`
```
After the fix, the output is as shown below.
```
ExecutedCommand
   +- CreateViewCommand `cte_view`, WITH w AS (SELECT 1 AS n), cte1 (select 2), cte2 as (select 3)
SELECT n FROM w, false, false, PersistedView
         +- CTE [w, cte1, cte2]
            :  :- SubqueryAlias w
            :  :  +- Project [1 AS n#16]
            :  :     +- OneRowRelation$
            :  :- 'SubqueryAlias cte1
            :  :  +- 'Project [unresolvedalias(2, None)]
            :  :     +- OneRowRelation$
            :  +- 'SubqueryAlias cte2
            :     +- 'Project [unresolvedalias(3, None)]
            :        +- OneRowRelation$
            +- 'Project ['n]
               +- 'UnresolvedRelation `w`
```

BTW, this PR also fixes the output of the view type.

### How was this patch tested?
Manual

Author: gatorsmile <gatorsmile@gmail.com>

Closes #15854 from gatorsmile/cteName.

(cherry picked from commit 608ecc512b759514c75a1b475582f237ed569f10)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../catalyst/plans/logical/basicLogicalOperators.scala    | 8 ++++++++
 .../org/apache/spark/sql/execution/command/views.scala    | 4 +++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 574caf039d3d2..dd6c8fd1dcf3e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.types._
+import org.apache.spark.util.Utils
 
 /**
  * When planning take() or collect() operations, this special node that is inserted at the top of
@@ -405,6 +406,13 @@ case class InsertIntoTable(
  */
 case class With(child: LogicalPlan, cteRelations: Seq[(String, SubqueryAlias)]) extends UnaryNode {
   override def output: Seq[Attribute] = child.output
+
+  override def simpleString: String = {
+    val cteAliases = Utils.truncatedString(cteRelations.map(_._1), "[", ", ", "]")
+    s"CTE $cteAliases"
+  }
+
+  override def innerChildren: Seq[QueryPlan[_]] = cteRelations.map(_._2)
 }
 
 case class WithWindowDefinition(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index 30472ec45ce44..154141bf83c7d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -33,7 +33,9 @@ import org.apache.spark.sql.types.MetadataBuilder
  * ViewType is used to specify the expected view type when we want to create or replace a view in
  * [[CreateViewCommand]].
  */
-sealed trait ViewType
+sealed trait ViewType {
+  override def toString: String = getClass.getSimpleName.stripSuffix("$")
+}
 
 /**
  * LocalTempView means session-scoped local temporary views. Its lifetime is the lifetime of the

From b86e962c90c4322cd98b5bf3b19e251da2d32442 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Wed, 16 Nov 2016 10:00:59 -0800
Subject: [PATCH 0131/1204] [SPARK-18459][SPARK-18460][STRUCTUREDSTREAMING]
 Rename triggerId to batchId and add triggerDetails to json in
 StreamingQueryStatus

## What changes were proposed in this pull request?

SPARK-18459: triggerId seems like a number that should be increasing with each trigger, whether or not there is data in it. However, actually, triggerId increases only where there is a batch of data in a trigger. So its better to rename it to batchId.

SPARK-18460: triggerDetails was missing from json representation. Fixed it.

## How was this patch tested?
Updated existing unit tests.

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #15895 from tdas/SPARK-18459.

(cherry picked from commit 0048ce7ce64b02cbb6a1c4a2963a0b1b9541047e)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 python/pyspark/sql/streaming.py               |  6 ++---
 .../execution/streaming/StreamMetrics.scala   |  8 +++----
 .../sql/streaming/StreamingQueryStatus.scala  |  4 ++--
 .../streaming/StreamMetricsSuite.scala        |  8 +++----
 .../StreamingQueryListenerSuite.scala         |  4 ++--
 .../streaming/StreamingQueryStatusSuite.scala | 22 +++++++++++++++++--
 6 files changed, 35 insertions(+), 17 deletions(-)

diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index f326f16232690..0e4589be976ea 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -212,12 +212,12 @@ def __str__(self):
             Processing rate 23.5 rows/sec
             Latency: 345.0 ms
             Trigger details:
+                batchId: 5
                 isDataPresentInTrigger: true
                 isTriggerActive: true
                 latency.getBatch.total: 20
                 latency.getOffset.total: 10
                 numRows.input.total: 100
-                triggerId: 5
             Source statuses [1 source]:
                 Source 1 - MySource1
                     Available offset: 0
@@ -341,8 +341,8 @@ def triggerDetails(self):
         If no trigger is currently active, then it will have details of the last completed trigger.
 
         >>> sqs.triggerDetails
-        {u'triggerId': u'5', u'latency.getBatch.total': u'20', u'numRows.input.total': u'100',
-        u'isTriggerActive': u'true', u'latency.getOffset.total': u'10',
+        {u'latency.getBatch.total': u'20', u'numRows.input.total': u'100',
+        u'isTriggerActive': u'true', u'batchId': u'5', u'latency.getOffset.total': u'10',
         u'isDataPresentInTrigger': u'true'}
         """
         return self._jsqs.triggerDetails()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetrics.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetrics.scala
index 5645554a58f6e..942e6ed8944be 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetrics.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetrics.scala
@@ -78,13 +78,13 @@ class StreamMetrics(sources: Set[Source], triggerClock: Clock, codahaleSourceNam
 
   // =========== Setter methods ===========
 
-  def reportTriggerStarted(triggerId: Long): Unit = synchronized {
+  def reportTriggerStarted(batchId: Long): Unit = synchronized {
     numInputRows.clear()
     triggerDetails.clear()
     sourceTriggerDetails.values.foreach(_.clear())
 
-    reportTriggerDetail(TRIGGER_ID, triggerId)
-    sources.foreach(s => reportSourceTriggerDetail(s, TRIGGER_ID, triggerId))
+    reportTriggerDetail(BATCH_ID, batchId)
+    sources.foreach(s => reportSourceTriggerDetail(s, BATCH_ID, batchId))
     reportTriggerDetail(IS_TRIGGER_ACTIVE, true)
     currentTriggerStartTimestamp = triggerClock.getTimeMillis()
     reportTriggerDetail(START_TIMESTAMP, currentTriggerStartTimestamp)
@@ -217,7 +217,7 @@ object StreamMetrics extends Logging {
   }
 
 
-  val TRIGGER_ID = "triggerId"
+  val BATCH_ID = "batchId"
   val IS_TRIGGER_ACTIVE = "isTriggerActive"
   val IS_DATA_PRESENT_IN_TRIGGER = "isDataPresentInTrigger"
   val STATUS_MESSAGE = "statusMessage"
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
index 99c7729d02351..ba732ff7fc2ce 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
@@ -102,7 +102,7 @@ class StreamingQueryStatus private(
     ("inputRate" -> JDouble(inputRate)) ~
     ("processingRate" -> JDouble(processingRate)) ~
     ("latency" -> latency.map(JDouble).getOrElse(JNothing)) ~
-    ("triggerDetails" -> JsonProtocol.mapToJson(triggerDetails.asScala))
+    ("triggerDetails" -> JsonProtocol.mapToJson(triggerDetails.asScala)) ~
     ("sourceStatuses" -> JArray(sourceStatuses.map(_.jsonValue).toList)) ~
     ("sinkStatus" -> sinkStatus.jsonValue)
   }
@@ -151,7 +151,7 @@ private[sql] object StreamingQueryStatus {
         desc = "MySink",
         offsetDesc = OffsetSeq(Some(LongOffset(1)) :: None :: Nil).toString),
       triggerDetails = Map(
-        TRIGGER_ID -> "5",
+        BATCH_ID -> "5",
         IS_TRIGGER_ACTIVE -> "true",
         IS_DATA_PRESENT_IN_TRIGGER -> "true",
         GET_OFFSET_LATENCY -> "10",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/StreamMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/StreamMetricsSuite.scala
index 938423db64745..38c4ece439770 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/StreamMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/StreamMetricsSuite.scala
@@ -50,10 +50,10 @@ class StreamMetricsSuite extends SparkFunSuite {
     assert(sm.currentSourceProcessingRate(source) === 0.0)
     assert(sm.currentLatency() === None)
     assert(sm.currentTriggerDetails() ===
-      Map(TRIGGER_ID -> "1", IS_TRIGGER_ACTIVE -> "true",
+      Map(BATCH_ID -> "1", IS_TRIGGER_ACTIVE -> "true",
         START_TIMESTAMP -> "0", "key" -> "value"))
     assert(sm.currentSourceTriggerDetails(source) ===
-      Map(TRIGGER_ID -> "1", "key2" -> "value2"))
+      Map(BATCH_ID -> "1", "key2" -> "value2"))
 
     // Finishing the trigger should calculate the rates, except input rate which needs
     // to have another trigger interval
@@ -66,11 +66,11 @@ class StreamMetricsSuite extends SparkFunSuite {
     assert(sm.currentSourceProcessingRate(source) === 100.0)
     assert(sm.currentLatency() === None)
     assert(sm.currentTriggerDetails() ===
-      Map(TRIGGER_ID -> "1", IS_TRIGGER_ACTIVE -> "false",
+      Map(BATCH_ID -> "1", IS_TRIGGER_ACTIVE -> "false",
         START_TIMESTAMP -> "0", FINISH_TIMESTAMP -> "1000",
         NUM_INPUT_ROWS -> "100", "key" -> "value"))
     assert(sm.currentSourceTriggerDetails(source) ===
-      Map(TRIGGER_ID -> "1", NUM_SOURCE_INPUT_ROWS -> "100", "key2" -> "value2"))
+      Map(BATCH_ID -> "1", NUM_SOURCE_INPUT_ROWS -> "100", "key2" -> "value2"))
 
     // After another trigger starts, the rates and latencies should not change until
     // new rows are reported
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
index cebb32a0a56cc..98f3bec7080af 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
@@ -84,7 +84,7 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
       AssertOnLastQueryStatus { status: StreamingQueryStatus =>
         // Check the correctness of the trigger info of the last completed batch reported by
         // onQueryProgress
-        assert(status.triggerDetails.containsKey("triggerId"))
+        assert(status.triggerDetails.containsKey("batchId"))
         assert(status.triggerDetails.get("isTriggerActive") === "false")
         assert(status.triggerDetails.get("isDataPresentInTrigger") === "true")
 
@@ -104,7 +104,7 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
         assert(status.triggerDetails.get("numRows.state.aggregation1.updated") === "1")
 
         assert(status.sourceStatuses.length === 1)
-        assert(status.sourceStatuses(0).triggerDetails.containsKey("triggerId"))
+        assert(status.sourceStatuses(0).triggerDetails.containsKey("batchId"))
         assert(status.sourceStatuses(0).triggerDetails.get("latency.getOffset.source") === "100")
         assert(status.sourceStatuses(0).triggerDetails.get("latency.getBatch.source") === "200")
         assert(status.sourceStatuses(0).triggerDetails.get("numRows.input.source") === "2")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusSuite.scala
index 6af19fb0c2327..50a7d92ede9a5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusSuite.scala
@@ -48,12 +48,12 @@ class StreamingQueryStatusSuite extends SparkFunSuite {
         |    Processing rate 23.5 rows/sec
         |    Latency: 345.0 ms
         |    Trigger details:
+        |        batchId: 5
         |        isDataPresentInTrigger: true
         |        isTriggerActive: true
         |        latency.getBatch.total: 20
         |        latency.getOffset.total: 10
         |        numRows.input.total: 100
-        |        triggerId: 5
         |    Source statuses [1 source]:
         |        Source 1 - MySource1
         |            Available offset: 0
@@ -72,7 +72,11 @@ class StreamingQueryStatusSuite extends SparkFunSuite {
   test("json") {
     assert(StreamingQueryStatus.testStatus.json ===
       """
-        |{"sourceStatuses":[{"description":"MySource1","offsetDesc":"0","inputRate":15.5,
+        |{"name":"query","id":1,"timestamp":123,"inputRate":15.5,"processingRate":23.5,
+        |"latency":345.0,"triggerDetails":{"latency.getBatch.total":"20",
+        |"numRows.input.total":"100","isTriggerActive":"true","batchId":"5",
+        |"latency.getOffset.total":"10","isDataPresentInTrigger":"true"},
+        |"sourceStatuses":[{"description":"MySource1","offsetDesc":"0","inputRate":15.5,
         |"processingRate":23.5,"triggerDetails":{"numRows.input.source":"100",
         |"latency.getOffset.source":"10","latency.getBatch.source":"20"}}],
         |"sinkStatus":{"description":"MySink","offsetDesc":"[1, -]"}}
@@ -84,6 +88,20 @@ class StreamingQueryStatusSuite extends SparkFunSuite {
       StreamingQueryStatus.testStatus.prettyJson ===
         """
           |{
+          |  "name" : "query",
+          |  "id" : 1,
+          |  "timestamp" : 123,
+          |  "inputRate" : 15.5,
+          |  "processingRate" : 23.5,
+          |  "latency" : 345.0,
+          |  "triggerDetails" : {
+          |    "latency.getBatch.total" : "20",
+          |    "numRows.input.total" : "100",
+          |    "isTriggerActive" : "true",
+          |    "batchId" : "5",
+          |    "latency.getOffset.total" : "10",
+          |    "isDataPresentInTrigger" : "true"
+          |  },
           |  "sourceStatuses" : [ {
           |    "description" : "MySource1",
           |    "offsetDesc" : "0",

From 3d4756d56b852dcf4e1bebe621d4a30570873c3c Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Wed, 16 Nov 2016 11:03:10 -0800
Subject: [PATCH 0132/1204] [SPARK-18461][DOCS][STRUCTUREDSTREAMING] Added more
 information about monitoring streaming queries

## What changes were proposed in this pull request?
<img width="941" alt="screen shot 2016-11-15 at 6 27 32 pm" src="https://cloud.githubusercontent.com/assets/663212/20332521/4190b858-ab61-11e6-93a6-4bdc05105ed9.png">
<img width="940" alt="screen shot 2016-11-15 at 6 27 45 pm" src="https://cloud.githubusercontent.com/assets/663212/20332525/44a0d01e-ab61-11e6-8668-47f925490d4f.png">

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #15897 from tdas/SPARK-18461.

(cherry picked from commit bb6cdfd9a6a6b6c91aada7c3174436146045ed1e)
Signed-off-by: Michael Armbrust <michael@databricks.com>
---
 .../structured-streaming-programming-guide.md | 182 +++++++++++++++++-
 1 file changed, 179 insertions(+), 3 deletions(-)

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index d2545584ae3b0..77b66b3b3a497 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -1087,9 +1087,185 @@ spark.streams().awaitAnyTermination()  # block until any one of them terminates
 </div>
 </div>
 
-Finally, for asynchronous monitoring of streaming queries, you can create and attach a `StreamingQueryListener`
-([Scala](api/scala/index.html#org.apache.spark.sql.streaming.StreamingQueryListener)/[Java](api/java/org/apache/spark/sql/streaming/StreamingQueryListener.html) docs),
-which will give you regular callback-based updates when queries are started and terminated.
+
+## Monitoring Streaming Queries
+There are two ways you can monitor queries. You can directly get the current status
+of an active query using `streamingQuery.status`, which will return a `StreamingQueryStatus` object
+([Scala](api/scala/index.html#org.apache.spark.sql.streaming.StreamingQueryStatus)/[Java](api/java/org/apache/spark/sql/streaming/StreamingQueryStatus.html)/[Python](api/python/pyspark.sql.html#pyspark.sql.streaming.StreamingQueryStatus) docs)
+that has all the details like current ingestion rates, processing rates, average latency,
+details of the currently active trigger, etc.
+
+<div class="codetabs">
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+val query: StreamingQuery = ...
+
+println(query.status)
+
+/* Will print the current status of the query
+
+Status of query 'queryName'
+    Query id: 1
+    Status timestamp: 123
+    Input rate: 15.5 rows/sec
+    Processing rate 23.5 rows/sec
+    Latency: 345.0 ms
+    Trigger details:
+        batchId: 5
+        isDataPresentInTrigger: true
+        isTriggerActive: true
+        latency.getBatch.total: 20
+        latency.getOffset.total: 10
+        numRows.input.total: 100
+    Source statuses [1 source]:
+        Source 1 - MySource1
+            Available offset: 0
+            Input rate: 15.5 rows/sec
+            Processing rate: 23.5 rows/sec
+            Trigger details:
+                numRows.input.source: 100
+                latency.getOffset.source: 10
+                latency.getBatch.source: 20
+    Sink status - MySink
+        Committed offsets: [1, -]
+*/
+{% endhighlight %}
+
+</div>
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+StreamingQuery query = ...
+
+System.out.println(query.status);
+
+/* Will print the current status of the query
+
+Status of query 'queryName'
+    Query id: 1
+    Status timestamp: 123
+    Input rate: 15.5 rows/sec
+    Processing rate 23.5 rows/sec
+    Latency: 345.0 ms
+    Trigger details:
+        batchId: 5
+        isDataPresentInTrigger: true
+        isTriggerActive: true
+        latency.getBatch.total: 20
+        latency.getOffset.total: 10
+        numRows.input.total: 100
+    Source statuses [1 source]:
+        Source 1 - MySource1
+            Available offset: 0
+            Input rate: 15.5 rows/sec
+            Processing rate: 23.5 rows/sec
+            Trigger details:
+                numRows.input.source: 100
+                latency.getOffset.source: 10
+                latency.getBatch.source: 20
+    Sink status - MySink
+        Committed offsets: [1, -]
+*/
+{% endhighlight %}
+
+</div>
+<div data-lang="python"  markdown="1">
+
+{% highlight python %}
+query = ...  // a StreamingQuery
+
+print(query.status)
+
+'''
+Will print the current status of the query
+
+Status of query 'queryName'
+    Query id: 1
+    Status timestamp: 123
+    Input rate: 15.5 rows/sec
+    Processing rate 23.5 rows/sec
+    Latency: 345.0 ms
+    Trigger details:
+        batchId: 5
+        isDataPresentInTrigger: true
+        isTriggerActive: true
+        latency.getBatch.total: 20
+        latency.getOffset.total: 10
+        numRows.input.total: 100
+    Source statuses [1 source]:
+        Source 1 - MySource1
+            Available offset: 0
+            Input rate: 15.5 rows/sec
+            Processing rate: 23.5 rows/sec
+            Trigger details:
+                numRows.input.source: 100
+                latency.getOffset.source: 10
+                latency.getBatch.source: 20
+    Sink status - MySink
+        Committed offsets: [1, -]
+'''
+{% endhighlight %}
+
+</div>
+</div>
+
+
+You can also asynchronously monitor all queries associated with a
+`SparkSession` by attaching a `StreamingQueryListener`
+([Scala](api/scala/index.html#org.apache.spark.sql.streaming.StreamingQueryListener)/[Java](api/java/org/apache/spark/sql/streaming/StreamingQueryListener.html) docs).
+Once you attach your custom `StreamingQueryListener` object with
+`sparkSession.streams.attachListener()`, you will get callbacks when a query is started and
+stopped and when there is progress made in an active query. Here is an example,
+
+<div class="codetabs">
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+val spark: SparkSession = ...
+
+spark.streams.addListener(new StreamingQueryListener() {
+
+    override def onQueryStarted(queryStarted: QueryStartedEvent): Unit = {
+        println("Query started: " + queryTerminated.queryStatus.name)
+    }
+    override def onQueryTerminated(queryTerminated: QueryTerminatedEvent): Unit = {
+        println("Query terminated: " + queryTerminated.queryStatus.name)
+    }
+    override def onQueryProgress(queryProgress: QueryProgressEvent): Unit = {
+        println("Query made progress: " + queryProgress.queryStatus)
+    }
+})
+{% endhighlight %}
+
+</div>
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+SparkSession spark = ...
+
+spark.streams.addListener(new StreamingQueryListener() {
+
+    @Overrides void onQueryStarted(QueryStartedEvent queryStarted) {
+        System.out.println("Query started: " + queryTerminated.queryStatus.name);
+    }
+    @Overrides void onQueryTerminated(QueryTerminatedEvent queryTerminated) {
+        System.out.println("Query terminated: " + queryTerminated.queryStatus.name);
+    }
+    @Overrides void onQueryProgress(QueryProgressEvent queryProgress) {
+        System.out.println("Query made progress: " + queryProgress.queryStatus);
+    }
+});
+{% endhighlight %}
+
+</div>
+<div data-lang="python"  markdown="1">
+{% highlight bash %}
+Not available in Python.
+{% endhighlight %}
+
+</div>
+</div>
 
 ## Recovering from Failures with Checkpointing 
 In case of a failure or intentional shutdown, you can recover the previous progress and state of a previous query, and continue where it left off. This is done using checkpointing and write ahead logs. You can configure a query with a checkpoint location, and the query will save all the progress information (i.e. range of offsets processed in each trigger) and the running aggregates (e.g. word counts in the [quick example](#quick-example)) to the checkpoint location. As of Spark 2.0, this checkpoint location has to be a path in an HDFS compatible file system, and can be set as an option in the DataStreamWriter when [starting a query](#starting-streaming-queries). 

From 523abfe19caa11747133877b0c8319c68ac66e56 Mon Sep 17 00:00:00 2001
From: Artur Sukhenko <artur.sukhenko@gmail.com>
Date: Wed, 16 Nov 2016 15:08:01 -0800
Subject: [PATCH 0133/1204] [YARN][DOC] Increasing NodeManager's heap size with
 External Shuffle Service

## What changes were proposed in this pull request?

Suggest users to increase `NodeManager's` heap size if `External Shuffle Service` is enabled as
`NM` can spend a lot of time doing GC resulting in  shuffle operations being a bottleneck due to `Shuffle Read blocked time` bumped up.
Also because of GC  `NodeManager` can use an enormous amount of CPU and cluster performance will suffer.
I have seen NodeManager using 5-13G RAM and up to 2700% CPU with `spark_shuffle` service on.

## How was this patch tested?

#### Added step 5:
![shuffle_service](https://cloud.githubusercontent.com/assets/15244468/20355499/2fec0fde-ac2a-11e6-8f8b-1c80daf71be1.png)

Author: Artur Sukhenko <artur.sukhenko@gmail.com>

Closes #15906 from Devian-ua/nmHeapSize.

(cherry picked from commit 55589987be89ff78dadf44498352fbbd811a206e)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 docs/running-on-yarn.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index cd18808681ece..fe0221ce7c5b6 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -559,6 +559,8 @@ pre-packaged distribution.
 1. In the `yarn-site.xml` on each node, add `spark_shuffle` to `yarn.nodemanager.aux-services`,
 then set `yarn.nodemanager.aux-services.spark_shuffle.class` to
 `org.apache.spark.network.yarn.YarnShuffleService`.
+1. Increase `NodeManager's` heap size by setting `YARN_HEAPSIZE` (1000 by default) in `etc/hadoop/yarn-env.sh` 
+to avoid garbage collection issues during shuffle. 
 1. Restart all `NodeManager`s in your cluster.
 
 The following extra configuration options are available when the shuffle service is running on YARN:

From 9515793820c7954d82116238a67e632ea3e783b5 Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@happy-camper.st>
Date: Thu, 17 Nov 2016 11:21:08 +0800
Subject: [PATCH 0134/1204] [SPARK-18442][SQL] Fix nullability of WrapOption.

## What changes were proposed in this pull request?

The nullability of `WrapOption` should be `false`.

## How was this patch tested?

Existing tests.

Author: Takuya UESHIN <ueshin@happy-camper.st>

Closes #15887 from ueshin/issues/SPARK-18442.

(cherry picked from commit 170eeb345f951de89a39fe565697b3e913011768)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/catalyst/expressions/objects/objects.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 50e2ac3c36d93..0e3d99127ed56 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -341,7 +341,7 @@ case class WrapOption(child: Expression, optType: DataType)
 
   override def dataType: DataType = ObjectType(classOf[Option[_]])
 
-  override def nullable: Boolean = true
+  override def nullable: Boolean = false
 
   override def inputTypes: Seq[AbstractDataType] = optType :: Nil
 

From 6a3cbbc037fe631e1b89c46000373dc2ba86a5eb Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Wed, 16 Nov 2016 14:22:15 -0800
Subject: [PATCH 0135/1204] [SPARK-1267][SPARK-18129] Allow PySpark to be pip
 installed

## What changes were proposed in this pull request?

This PR aims to provide a pip installable PySpark package. This does a bunch of work to copy the jars over and package them with the Python code (to prevent challenges from trying to use different versions of the Python code with different versions of the JAR). It does not currently publish to PyPI but that is the natural follow up (SPARK-18129).

Done:
- pip installable on conda [manual tested]
- setup.py installed on a non-pip managed system (RHEL) with YARN [manual tested]
- Automated testing of this (virtualenv)
- packaging and signing with release-build*

Possible follow up work:
- release-build update to publish to PyPI (SPARK-18128)
- figure out who owns the pyspark package name on prod PyPI (is it someone with in the project or should we ask PyPI or should we choose a different name to publish with like ApachePySpark?)
- Windows support and or testing ( SPARK-18136 )
- investigate details of wheel caching and see if we can avoid cleaning the wheel cache during our test
- consider how we want to number our dev/snapshot versions

Explicitly out of scope:
- Using pip installed PySpark to start a standalone cluster
- Using pip installed PySpark for non-Python Spark programs

*I've done some work to test release-build locally but as a non-committer I've just done local testing.
## How was this patch tested?

Automated testing with virtualenv, manual testing with conda, a system wide install, and YARN integration.

release-build changes tested locally as a non-committer (no testing of upload artifacts to Apache staging websites)

Author: Holden Karau <holden@us.ibm.com>
Author: Juliet Hougland <juliet@cloudera.com>
Author: Juliet Hougland <not@myemail.com>

Closes #15659 from holdenk/SPARK-1267-pip-install-pyspark.
---
 .gitignore                                    |   2 +
 bin/beeline                                   |   2 +-
 bin/find-spark-home                           |  41 ++++
 bin/load-spark-env.sh                         |   2 +-
 bin/pyspark                                   |   6 +-
 bin/run-example                               |   2 +-
 bin/spark-class                               |   6 +-
 bin/spark-shell                               |   4 +-
 bin/spark-sql                                 |   2 +-
 bin/spark-submit                              |   2 +-
 bin/sparkR                                    |   2 +-
 dev/create-release/release-build.sh           |  26 ++-
 dev/create-release/release-tag.sh             |  11 +-
 dev/lint-python                               |   4 +-
 dev/make-distribution.sh                      |  16 +-
 dev/pip-sanity-check.py                       |  36 +++
 dev/run-pip-tests                             | 115 ++++++++++
 dev/run-tests-jenkins.py                      |   1 +
 dev/run-tests.py                              |   7 +
 dev/sparktestsupport/__init__.py              |   1 +
 docs/building-spark.md                        |   8 +
 docs/index.md                                 |   4 +-
 .../spark/launcher/CommandBuilderUtils.java   |   2 +-
 python/MANIFEST.in                            |  22 ++
 python/README.md                              |  32 +++
 python/pyspark/__init__.py                    |   1 +
 python/pyspark/find_spark_home.py             |  74 +++++++
 python/pyspark/java_gateway.py                |   3 +-
 python/pyspark/version.py                     |  19 ++
 python/setup.cfg                              |  22 ++
 python/setup.py                               | 209 ++++++++++++++++++
 31 files changed, 660 insertions(+), 24 deletions(-)
 create mode 100755 bin/find-spark-home
 create mode 100644 dev/pip-sanity-check.py
 create mode 100755 dev/run-pip-tests
 create mode 100644 python/MANIFEST.in
 create mode 100644 python/README.md
 create mode 100755 python/pyspark/find_spark_home.py
 create mode 100644 python/pyspark/version.py
 create mode 100644 python/setup.cfg
 create mode 100644 python/setup.py

diff --git a/.gitignore b/.gitignore
index 39d17e1793f77..5634a434db0c0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -57,6 +57,8 @@ project/plugins/project/build.properties
 project/plugins/src_managed/
 project/plugins/target/
 python/lib/pyspark.zip
+python/deps
+python/pyspark/python
 reports/
 scalastyle-on-compile.generated.xml
 scalastyle-output.xml
diff --git a/bin/beeline b/bin/beeline
index 1627626941a73..058534699e44b 100755
--- a/bin/beeline
+++ b/bin/beeline
@@ -25,7 +25,7 @@ set -o posix
 
 # Figure out if SPARK_HOME is set
 if [ -z "${SPARK_HOME}" ]; then
-  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+  source "$(dirname "$0")"/find-spark-home
 fi
 
 CLASS="org.apache.hive.beeline.BeeLine"
diff --git a/bin/find-spark-home b/bin/find-spark-home
new file mode 100755
index 0000000000000..fa78407d4175a
--- /dev/null
+++ b/bin/find-spark-home
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Attempts to find a proper value for SPARK_HOME. Should be included using "source" directive.
+
+FIND_SPARK_HOME_PYTHON_SCRIPT="$(cd "$(dirname "$0")"; pwd)/find_spark_home.py"
+
+# Short cirtuit if the user already has this set.
+if [ ! -z "${SPARK_HOME}" ]; then
+   exit 0
+elif [ ! -f "$FIND_SPARK_HOME_PYTHON_SCRIPT" ]; then
+  # If we are not in the same directory as find_spark_home.py we are not pip installed so we don't
+  # need to search the different Python directories for a Spark installation.
+  # Note only that, if the user has pip installed PySpark but is directly calling pyspark-shell or
+  # spark-submit in another directory we want to use that version of PySpark rather than the
+  # pip installed version of PySpark.
+  export SPARK_HOME="$(cd "$(dirname "$0")"/..; pwd)"
+else
+  # We are pip installed, use the Python script to resolve a reasonable SPARK_HOME
+  # Default to standard python interpreter unless told otherwise
+  if [[ -z "$PYSPARK_DRIVER_PYTHON" ]]; then
+     PYSPARK_DRIVER_PYTHON="${PYSPARK_PYTHON:-"python"}"
+  fi
+  export SPARK_HOME=$($PYSPARK_DRIVER_PYTHON "$FIND_SPARK_HOME_PYTHON_SCRIPT")
+fi
diff --git a/bin/load-spark-env.sh b/bin/load-spark-env.sh
index eaea964ed5b3d..8a2f709960a25 100644
--- a/bin/load-spark-env.sh
+++ b/bin/load-spark-env.sh
@@ -23,7 +23,7 @@
 
 # Figure out where Spark is installed
 if [ -z "${SPARK_HOME}" ]; then
-  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+  source "$(dirname "$0")"/find-spark-home
 fi
 
 if [ -z "$SPARK_ENV_LOADED" ]; then
diff --git a/bin/pyspark b/bin/pyspark
index d6b3ab0a44321..98387c2ec5b8a 100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@ -18,7 +18,7 @@
 #
 
 if [ -z "${SPARK_HOME}" ]; then
-  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+  source "$(dirname "$0")"/find-spark-home
 fi
 
 source "${SPARK_HOME}"/bin/load-spark-env.sh
@@ -46,7 +46,7 @@ WORKS_WITH_IPYTHON=$(python -c 'import sys; print(sys.version_info >= (2, 7, 0))
 
 # Determine the Python executable to use for the executors:
 if [[ -z "$PYSPARK_PYTHON" ]]; then
-  if [[ $PYSPARK_DRIVER_PYTHON == *ipython* && ! WORKS_WITH_IPYTHON ]]; then
+  if [[ $PYSPARK_DRIVER_PYTHON == *ipython* && ! $WORKS_WITH_IPYTHON ]]; then
     echo "IPython requires Python 2.7+; please install python2.7 or set PYSPARK_PYTHON" 1>&2
     exit 1
   else
@@ -68,7 +68,7 @@ if [[ -n "$SPARK_TESTING" ]]; then
   unset YARN_CONF_DIR
   unset HADOOP_CONF_DIR
   export PYTHONHASHSEED=0
-  exec "$PYSPARK_DRIVER_PYTHON" -m $1
+  exec "$PYSPARK_DRIVER_PYTHON" -m "$1"
   exit
 fi
 
diff --git a/bin/run-example b/bin/run-example
index dd0e3c4120260..4ba5399311d33 100755
--- a/bin/run-example
+++ b/bin/run-example
@@ -18,7 +18,7 @@
 #
 
 if [ -z "${SPARK_HOME}" ]; then
-  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+  source "$(dirname "$0")"/find-spark-home
 fi
 
 export _SPARK_CMD_USAGE="Usage: ./bin/run-example [options] example-class [example args]"
diff --git a/bin/spark-class b/bin/spark-class
index 377c8d1add3f6..77ea40cc37946 100755
--- a/bin/spark-class
+++ b/bin/spark-class
@@ -18,7 +18,7 @@
 #
 
 if [ -z "${SPARK_HOME}" ]; then
-  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+  source "$(dirname "$0")"/find-spark-home
 fi
 
 . "${SPARK_HOME}"/bin/load-spark-env.sh
@@ -27,7 +27,7 @@ fi
 if [ -n "${JAVA_HOME}" ]; then
   RUNNER="${JAVA_HOME}/bin/java"
 else
-  if [ `command -v java` ]; then
+  if [ "$(command -v java)" ]; then
     RUNNER="java"
   else
     echo "JAVA_HOME is not set" >&2
@@ -36,7 +36,7 @@ else
 fi
 
 # Find Spark jars.
-if [ -f "${SPARK_HOME}/RELEASE" ]; then
+if [ -d "${SPARK_HOME}/jars" ]; then
   SPARK_JARS_DIR="${SPARK_HOME}/jars"
 else
   SPARK_JARS_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION/jars"
diff --git a/bin/spark-shell b/bin/spark-shell
index 6583b5bd880ee..421f36cac3d47 100755
--- a/bin/spark-shell
+++ b/bin/spark-shell
@@ -21,7 +21,7 @@
 # Shell script for starting the Spark Shell REPL
 
 cygwin=false
-case "`uname`" in
+case "$(uname)" in
   CYGWIN*) cygwin=true;;
 esac
 
@@ -29,7 +29,7 @@ esac
 set -o posix
 
 if [ -z "${SPARK_HOME}" ]; then
-  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+  source "$(dirname "$0")"/find-spark-home
 fi
 
 export _SPARK_CMD_USAGE="Usage: ./bin/spark-shell [options]"
diff --git a/bin/spark-sql b/bin/spark-sql
index 970d12cbf51dd..b08b944ebd319 100755
--- a/bin/spark-sql
+++ b/bin/spark-sql
@@ -18,7 +18,7 @@
 #
 
 if [ -z "${SPARK_HOME}" ]; then
-  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+  source "$(dirname "$0")"/find-spark-home
 fi
 
 export _SPARK_CMD_USAGE="Usage: ./bin/spark-sql [options] [cli option]"
diff --git a/bin/spark-submit b/bin/spark-submit
index 023f9c162f4b8..4e9d3614e6370 100755
--- a/bin/spark-submit
+++ b/bin/spark-submit
@@ -18,7 +18,7 @@
 #
 
 if [ -z "${SPARK_HOME}" ]; then
-  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+  source "$(dirname "$0")"/find-spark-home
 fi
 
 # disable randomized hash for string in Python 3.3+
diff --git a/bin/sparkR b/bin/sparkR
index 2c07a82e2173b..29ab10df8ab6d 100755
--- a/bin/sparkR
+++ b/bin/sparkR
@@ -18,7 +18,7 @@
 #
 
 if [ -z "${SPARK_HOME}" ]; then
-  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+  source "$(dirname "$0")"/find-spark-home
 fi
 
 source "${SPARK_HOME}"/bin/load-spark-env.sh
diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index 81f0d63054e29..1dbfa3b6e361b 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -162,14 +162,35 @@ if [[ "$1" == "package" ]]; then
     export ZINC_PORT=$ZINC_PORT
     echo "Creating distribution: $NAME ($FLAGS)"
 
+    # Write out the NAME and VERSION to PySpark version info we rewrite the - into a . and SNAPSHOT
+    # to dev0 to be closer to PEP440. We use the NAME as a "local version".
+    PYSPARK_VERSION=`echo "$SPARK_VERSION+$NAME" |  sed -r "s/-/./" | sed -r "s/SNAPSHOT/dev0/"`
+    echo "__version__='$PYSPARK_VERSION'" > python/pyspark/version.py
+
     # Get maven home set by MVN
     MVN_HOME=`$MVN -version 2>&1 | grep 'Maven home' | awk '{print $NF}'`
 
-    ./dev/make-distribution.sh --name $NAME --mvn $MVN_HOME/bin/mvn --tgz $FLAGS \
+    echo "Creating distribution"
+    ./dev/make-distribution.sh --name $NAME --mvn $MVN_HOME/bin/mvn --tgz --pip $FLAGS \
       -DzincPort=$ZINC_PORT 2>&1 >  ../binary-release-$NAME.log
     cd ..
-    cp spark-$SPARK_VERSION-bin-$NAME/spark-$SPARK_VERSION-bin-$NAME.tgz .
 
+    echo "Copying and signing python distribution"
+    PYTHON_DIST_NAME=pyspark-$PYSPARK_VERSION.tar.gz
+    cp spark-$SPARK_VERSION-bin-$NAME/python/dist/$PYTHON_DIST_NAME .
+
+    echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --armour \
+      --output $PYTHON_DIST_NAME.asc \
+      --detach-sig $PYTHON_DIST_NAME
+    echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \
+      MD5 $PYTHON_DIST_NAME > \
+      $PYTHON_DIST_NAME.md5
+    echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \
+      SHA512 $PYTHON_DIST_NAME > \
+      $PYTHON_DIST_NAME.sha
+
+    echo "Copying and signing regular binary distribution"
+    cp spark-$SPARK_VERSION-bin-$NAME/spark-$SPARK_VERSION-bin-$NAME.tgz .
     echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --armour \
       --output spark-$SPARK_VERSION-bin-$NAME.tgz.asc \
       --detach-sig spark-$SPARK_VERSION-bin-$NAME.tgz
@@ -208,6 +229,7 @@ if [[ "$1" == "package" ]]; then
   # Re-upload a second time and leave the files in the timestamped upload directory:
   LFTP mkdir -p $dest_dir
   LFTP mput -O $dest_dir 'spark-*'
+  LFTP mput -O $dest_dir 'pyspark-*'
   exit 0
 fi
 
diff --git a/dev/create-release/release-tag.sh b/dev/create-release/release-tag.sh
index b7e5100ca7408..370a62ce15bc4 100755
--- a/dev/create-release/release-tag.sh
+++ b/dev/create-release/release-tag.sh
@@ -65,6 +65,7 @@ sed -i".tmp1" 's/Version.*$/Version: '"$RELEASE_VERSION"'/g' R/pkg/DESCRIPTION
 # Set the release version in docs
 sed -i".tmp1" 's/SPARK_VERSION:.*$/SPARK_VERSION: '"$RELEASE_VERSION"'/g' docs/_config.yml
 sed -i".tmp2" 's/SPARK_VERSION_SHORT:.*$/SPARK_VERSION_SHORT: '"$RELEASE_VERSION"'/g' docs/_config.yml
+sed -i".tmp3" 's/__version__ = .*$/__version__ = "'"$RELEASE_VERSION"'"/' python/pyspark/version.py
 
 git commit -a -m "Preparing Spark release $RELEASE_TAG"
 echo "Creating tag $RELEASE_TAG at the head of $GIT_BRANCH"
@@ -74,12 +75,16 @@ git tag $RELEASE_TAG
 $MVN versions:set -DnewVersion=$NEXT_VERSION | grep -v "no value" # silence logs
 # Remove -SNAPSHOT before setting the R version as R expects version strings to only have numbers
 R_NEXT_VERSION=`echo $NEXT_VERSION | sed 's/-SNAPSHOT//g'`
-sed -i".tmp2" 's/Version.*$/Version: '"$R_NEXT_VERSION"'/g' R/pkg/DESCRIPTION
+sed -i".tmp4" 's/Version.*$/Version: '"$R_NEXT_VERSION"'/g' R/pkg/DESCRIPTION
+# Write out the R_NEXT_VERSION to PySpark version info we use dev0 instead of SNAPSHOT to be closer
+# to PEP440.
+sed -i".tmp5" 's/__version__ = .*$/__version__ = "'"$R_NEXT_VERSION.dev0"'"/' python/pyspark/version.py
+
 
 # Update docs with next version
-sed -i".tmp3" 's/SPARK_VERSION:.*$/SPARK_VERSION: '"$NEXT_VERSION"'/g' docs/_config.yml
+sed -i".tmp6" 's/SPARK_VERSION:.*$/SPARK_VERSION: '"$NEXT_VERSION"'/g' docs/_config.yml
 # Use R version for short version
-sed -i".tmp4" 's/SPARK_VERSION_SHORT:.*$/SPARK_VERSION_SHORT: '"$R_NEXT_VERSION"'/g' docs/_config.yml
+sed -i".tmp7" 's/SPARK_VERSION_SHORT:.*$/SPARK_VERSION_SHORT: '"$R_NEXT_VERSION"'/g' docs/_config.yml
 
 git commit -a -m "Preparing development version $NEXT_VERSION"
 
diff --git a/dev/lint-python b/dev/lint-python
index 63487043a50b6..3f878c2dad6b1 100755
--- a/dev/lint-python
+++ b/dev/lint-python
@@ -20,7 +20,9 @@
 SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
 SPARK_ROOT_DIR="$(dirname "$SCRIPT_DIR")"
 PATHS_TO_CHECK="./python/pyspark/ ./examples/src/main/python/ ./dev/sparktestsupport"
-PATHS_TO_CHECK="$PATHS_TO_CHECK ./dev/run-tests.py ./python/run-tests.py ./dev/run-tests-jenkins.py"
+# TODO: fix pep8 errors with the rest of the Python scripts under dev
+PATHS_TO_CHECK="$PATHS_TO_CHECK ./dev/run-tests.py ./python/*.py ./dev/run-tests-jenkins.py"
+PATHS_TO_CHECK="$PATHS_TO_CHECK ./dev/pip-sanity-check.py"
 PEP8_REPORT_PATH="$SPARK_ROOT_DIR/dev/pep8-report.txt"
 PYLINT_REPORT_PATH="$SPARK_ROOT_DIR/dev/pylint-report.txt"
 PYLINT_INSTALL_INFO="$SPARK_ROOT_DIR/dev/pylint-info.txt"
diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
index 9be4fdfa51c93..49b46fbc3fb27 100755
--- a/dev/make-distribution.sh
+++ b/dev/make-distribution.sh
@@ -33,6 +33,7 @@ SPARK_HOME="$(cd "`dirname "$0"`/.."; pwd)"
 DISTDIR="$SPARK_HOME/dist"
 
 MAKE_TGZ=false
+MAKE_PIP=false
 NAME=none
 MVN="$SPARK_HOME/build/mvn"
 
@@ -40,7 +41,7 @@ function exit_with_usage {
   echo "make-distribution.sh - tool for making binary distributions of Spark"
   echo ""
   echo "usage:"
-  cl_options="[--name] [--tgz] [--mvn <mvn-command>]"
+  cl_options="[--name] [--tgz] [--pip] [--mvn <mvn-command>]"
   echo "make-distribution.sh $cl_options <maven build options>"
   echo "See Spark's \"Building Spark\" doc for correct Maven options."
   echo ""
@@ -67,6 +68,9 @@ while (( "$#" )); do
     --tgz)
       MAKE_TGZ=true
       ;;
+    --pip)
+      MAKE_PIP=true
+      ;;
     --mvn)
       MVN="$2"
       shift
@@ -201,6 +205,16 @@ fi
 # Copy data files
 cp -r "$SPARK_HOME/data" "$DISTDIR"
 
+# Make pip package
+if [ "$MAKE_PIP" == "true" ]; then
+  echo "Building python distribution package"
+  cd $SPARK_HOME/python
+  python setup.py sdist
+  cd ..
+else
+  echo "Skipping creating pip installable PySpark"
+fi
+
 # Copy other things
 mkdir "$DISTDIR"/conf
 cp "$SPARK_HOME"/conf/*.template "$DISTDIR"/conf
diff --git a/dev/pip-sanity-check.py b/dev/pip-sanity-check.py
new file mode 100644
index 0000000000000..430c2ab52766a
--- /dev/null
+++ b/dev/pip-sanity-check.py
@@ -0,0 +1,36 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark.sql import SparkSession
+import sys
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("PipSanityCheck")\
+        .getOrCreate()
+    sc = spark.sparkContext
+    rdd = sc.parallelize(range(100), 10)
+    value = rdd.reduce(lambda x, y: x + y)
+    if (value != 4950):
+        print("Value {0} did not match expected value.".format(value), file=sys.stderr)
+        sys.exit(-1)
+    print("Successfully ran pip sanity check")
+
+    spark.stop()
diff --git a/dev/run-pip-tests b/dev/run-pip-tests
new file mode 100755
index 0000000000000..e1da18e60bb3d
--- /dev/null
+++ b/dev/run-pip-tests
@@ -0,0 +1,115 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Stop on error
+set -e
+# Set nullglob for when we are checking existence based on globs
+shopt -s nullglob
+
+FWDIR="$(cd "$(dirname "$0")"/..; pwd)"
+cd "$FWDIR"
+
+echo "Constucting virtual env for testing"
+VIRTUALENV_BASE=$(mktemp -d)
+
+# Clean up the virtual env enviroment used if we created one.
+function delete_virtualenv() {
+  echo "Cleaning up temporary directory - $VIRTUALENV_BASE"
+  rm -rf "$VIRTUALENV_BASE"
+}
+trap delete_virtualenv EXIT
+
+# Some systems don't have pip or virtualenv - in those cases our tests won't work.
+if ! hash virtualenv 2>/dev/null; then
+  echo "Missing virtualenv skipping pip installability tests."
+  exit 0
+fi
+if ! hash pip 2>/dev/null; then
+  echo "Missing pip, skipping pip installability tests."
+  exit 0
+fi
+
+# Figure out which Python execs we should test pip installation with
+PYTHON_EXECS=()
+if hash python2 2>/dev/null; then
+  # We do this since we are testing with virtualenv and the default virtual env python
+  # is in /usr/bin/python
+  PYTHON_EXECS+=('python2')
+elif hash python 2>/dev/null; then
+  # If python2 isn't installed fallback to python if available
+  PYTHON_EXECS+=('python')
+fi
+if hash python3 2>/dev/null; then
+  PYTHON_EXECS+=('python3')
+fi
+
+# Determine which version of PySpark we are building for archive name
+PYSPARK_VERSION=$(python -c "exec(open('python/pyspark/version.py').read());print __version__")
+PYSPARK_DIST="$FWDIR/python/dist/pyspark-$PYSPARK_VERSION.tar.gz"
+# The pip install options we use for all the pip commands
+PIP_OPTIONS="--upgrade --no-cache-dir --force-reinstall "
+# Test both regular user and edit/dev install modes.
+PIP_COMMANDS=("pip install $PIP_OPTIONS $PYSPARK_DIST"
+	      "pip install $PIP_OPTIONS -e python/")
+
+for python in "${PYTHON_EXECS[@]}"; do
+  for install_command in "${PIP_COMMANDS[@]}"; do
+    echo "Testing pip installation with python $python"
+    # Create a temp directory for us to work in and save its name to a file for cleanup
+    echo "Using $VIRTUALENV_BASE for virtualenv"
+    VIRTUALENV_PATH="$VIRTUALENV_BASE"/$python
+    rm -rf "$VIRTUALENV_PATH"
+    mkdir -p "$VIRTUALENV_PATH"
+    virtualenv --python=$python "$VIRTUALENV_PATH"
+    source "$VIRTUALENV_PATH"/bin/activate
+    # Upgrade pip
+    pip install --upgrade pip
+
+    echo "Creating pip installable source dist"
+    cd "$FWDIR"/python
+    $python setup.py sdist
+
+
+    echo "Installing dist into virtual env"
+    cd dist
+    # Verify that the dist directory only contains one thing to install
+    sdists=(*.tar.gz)
+    if [ ${#sdists[@]} -ne 1 ]; then
+      echo "Unexpected number of targets found in dist directory - please cleanup existing sdists first."
+      exit -1
+    fi
+    # Do the actual installation
+    cd "$FWDIR"
+    $install_command
+
+    cd /
+
+    echo "Run basic sanity check on pip installed version with spark-submit"
+    spark-submit "$FWDIR"/dev/pip-sanity-check.py
+    echo "Run basic sanity check with import based"
+    python "$FWDIR"/dev/pip-sanity-check.py
+    echo "Run the tests for context.py"
+    python "$FWDIR"/python/pyspark/context.py
+
+    cd "$FWDIR"
+
+  done
+done
+
+exit 0
diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py
index a48d918f9dc1f..1d1e72faccf2a 100755
--- a/dev/run-tests-jenkins.py
+++ b/dev/run-tests-jenkins.py
@@ -128,6 +128,7 @@ def run_tests(tests_timeout):
         ERROR_CODES["BLOCK_MIMA"]: 'MiMa tests',
         ERROR_CODES["BLOCK_SPARK_UNIT_TESTS"]: 'Spark unit tests',
         ERROR_CODES["BLOCK_PYSPARK_UNIT_TESTS"]: 'PySpark unit tests',
+        ERROR_CODES["BLOCK_PYSPARK_PIP_TESTS"]: 'PySpark pip packaging tests',
         ERROR_CODES["BLOCK_SPARKR_UNIT_TESTS"]: 'SparkR unit tests',
         ERROR_CODES["BLOCK_TIMEOUT"]: 'from timeout after a configured wait of \`%s\`' % (
             tests_timeout)
diff --git a/dev/run-tests.py b/dev/run-tests.py
index 5d661f5f1a1c5..ab285ac96af7e 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -432,6 +432,12 @@ def run_python_tests(test_modules, parallelism):
     run_cmd(command)
 
 
+def run_python_packaging_tests():
+    set_title_and_block("Running PySpark packaging tests", "BLOCK_PYSPARK_PIP_TESTS")
+    command = [os.path.join(SPARK_HOME, "dev", "run-pip-tests")]
+    run_cmd(command)
+
+
 def run_build_tests():
     set_title_and_block("Running build tests", "BLOCK_BUILD_TESTS")
     run_cmd([os.path.join(SPARK_HOME, "dev", "test-dependencies.sh")])
@@ -583,6 +589,7 @@ def main():
     modules_with_python_tests = [m for m in test_modules if m.python_test_goals]
     if modules_with_python_tests:
         run_python_tests(modules_with_python_tests, opts.parallelism)
+        run_python_packaging_tests()
     if any(m.should_run_r_tests for m in test_modules):
         run_sparkr_tests()
 
diff --git a/dev/sparktestsupport/__init__.py b/dev/sparktestsupport/__init__.py
index 89015f8c4fb9c..38f25da41f775 100644
--- a/dev/sparktestsupport/__init__.py
+++ b/dev/sparktestsupport/__init__.py
@@ -33,5 +33,6 @@
     "BLOCK_SPARKR_UNIT_TESTS": 20,
     "BLOCK_JAVA_STYLE": 21,
     "BLOCK_BUILD_TESTS": 22,
+    "BLOCK_PYSPARK_PIP_TESTS": 23,
     "BLOCK_TIMEOUT": 124
 }
diff --git a/docs/building-spark.md b/docs/building-spark.md
index 2b404bd3e116c..88da0cc9c3bbf 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -265,6 +265,14 @@ or
 Java 8 tests are automatically enabled when a Java 8 JDK is detected.
 If you have JDK 8 installed but it is not the system default, you can set JAVA_HOME to point to JDK 8 before running the tests.
 
+## PySpark pip installable
+
+If you are building Spark for use in a Python environment and you wish to pip install it, you will first need to build the Spark JARs as described above. Then you can construct an sdist package suitable for setup.py and pip installable package.
+
+    cd python; python setup.py sdist
+
+**Note:** Due to packaging requirements you can not directly pip install from the Python directory, rather you must first build the sdist package as described above.
+
 ## PySpark Tests with Maven
 
 If you are building PySpark and wish to run the PySpark tests you will need to build Spark with Hive support.
diff --git a/docs/index.md b/docs/index.md
index fe51439ae08d7..39de11de854a7 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -14,7 +14,9 @@ It also supports a rich set of higher-level tools including [Spark SQL](sql-prog
 
 Get Spark from the [downloads page](http://spark.apache.org/downloads.html) of the project website. This documentation is for Spark version {{site.SPARK_VERSION}}. Spark uses Hadoop's client libraries for HDFS and YARN. Downloads are pre-packaged for a handful of popular Hadoop versions.
 Users can also download a "Hadoop free" binary and run Spark with any Hadoop version
-[by augmenting Spark's classpath](hadoop-provided.html). 
+[by augmenting Spark's classpath](hadoop-provided.html).
+Scala and Java users can include Spark in their projects using its maven cooridnates and in the future Python users can also install Spark from PyPI.
+
 
 If you'd like to build Spark from 
 source, visit [Building Spark](building-spark.html).
diff --git a/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java b/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java
index 62a22008d0d5d..250b2a882feb5 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java
@@ -357,7 +357,7 @@ static int javaMajorVersion(String javaVersion) {
   static String findJarsDir(String sparkHome, String scalaVersion, boolean failIfNotFound) {
     // TODO: change to the correct directory once the assembly build is changed.
     File libdir;
-    if (new File(sparkHome, "RELEASE").isFile()) {
+    if (new File(sparkHome, "jars").isDirectory()) {
       libdir = new File(sparkHome, "jars");
       checkState(!failIfNotFound || libdir.isDirectory(),
         "Library directory '%s' does not exist.",
diff --git a/python/MANIFEST.in b/python/MANIFEST.in
new file mode 100644
index 0000000000000..bbcce1baa439d
--- /dev/null
+++ b/python/MANIFEST.in
@@ -0,0 +1,22 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+global-exclude *.py[cod] __pycache__ .DS_Store
+recursive-include deps/jars *.jar
+graft deps/bin
+recursive-include deps/examples *.py
+recursive-include lib *.zip
+include README.md
diff --git a/python/README.md b/python/README.md
new file mode 100644
index 0000000000000..0a5c8010b8486
--- /dev/null
+++ b/python/README.md
@@ -0,0 +1,32 @@
+# Apache Spark
+
+Spark is a fast and general cluster computing system for Big Data. It provides
+high-level APIs in Scala, Java, Python, and R, and an optimized engine that
+supports general computation graphs for data analysis. It also supports a
+rich set of higher-level tools including Spark SQL for SQL and DataFrames,
+MLlib for machine learning, GraphX for graph processing,
+and Spark Streaming for stream processing.
+
+<http://spark.apache.org/>
+
+## Online Documentation
+
+You can find the latest Spark documentation, including a programming
+guide, on the [project web page](http://spark.apache.org/documentation.html)
+
+
+## Python Packaging
+
+This README file only contains basic information related to pip installed PySpark.
+This packaging is currently experimental and may change in future versions (although we will do our best to keep compatibility).
+Using PySpark requires the Spark JARs, and if you are building this from source please see the builder instructions at
+["Building Spark"](http://spark.apache.org/docs/latest/building-spark.html).
+
+The Python packaging for Spark is not intended to replace all of the other use cases. This Python packaged version of Spark is suitable for interacting with an existing cluster (be it Spark standalone, YARN, or Mesos) - but does not contain the tools required to setup your own standalone Spark cluster. You can download the full version of Spark from the [Apache Spark downloads page](http://spark.apache.org/downloads.html).
+
+
+**NOTE:** If you are using this with a Spark standalone cluster you must ensure that the version (including minor version) matches or you may experience odd errors.
+
+## Python Requirements
+
+At its core PySpark depends on Py4J (currently version 0.10.4), but additional sub-packages have their own requirements (including numpy and pandas).
\ No newline at end of file
diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py
index ec1687415a7f6..5f93586a48a5a 100644
--- a/python/pyspark/__init__.py
+++ b/python/pyspark/__init__.py
@@ -50,6 +50,7 @@
 from pyspark.serializers import MarshalSerializer, PickleSerializer
 from pyspark.status import *
 from pyspark.profiler import Profiler, BasicProfiler
+from pyspark.version import __version__
 
 
 def since(version):
diff --git a/python/pyspark/find_spark_home.py b/python/pyspark/find_spark_home.py
new file mode 100755
index 0000000000000..212a618b767ab
--- /dev/null
+++ b/python/pyspark/find_spark_home.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This script attempt to determine the correct setting for SPARK_HOME given
+# that Spark may have been installed on the system with pip.
+
+from __future__ import print_function
+import os
+import sys
+
+
+def _find_spark_home():
+    """Find the SPARK_HOME."""
+    # If the enviroment has SPARK_HOME set trust it.
+    if "SPARK_HOME" in os.environ:
+        return os.environ["SPARK_HOME"]
+
+    def is_spark_home(path):
+        """Takes a path and returns true if the provided path could be a reasonable SPARK_HOME"""
+        return (os.path.isfile(os.path.join(path, "bin/spark-submit")) and
+                (os.path.isdir(os.path.join(path, "jars")) or
+                 os.path.isdir(os.path.join(path, "assembly"))))
+
+    paths = ["../", os.path.dirname(os.path.realpath(__file__))]
+
+    # Add the path of the PySpark module if it exists
+    if sys.version < "3":
+        import imp
+        try:
+            module_home = imp.find_module("pyspark")[1]
+            paths.append(module_home)
+            # If we are installed in edit mode also look two dirs up
+            paths.append(os.path.join(module_home, "../../"))
+        except ImportError:
+            # Not pip installed no worries
+            pass
+    else:
+        from importlib.util import find_spec
+        try:
+            module_home = os.path.dirname(find_spec("pyspark").origin)
+            paths.append(module_home)
+            # If we are installed in edit mode also look two dirs up
+            paths.append(os.path.join(module_home, "../../"))
+        except ImportError:
+            # Not pip installed no worries
+            pass
+
+    # Normalize the paths
+    paths = [os.path.abspath(p) for p in paths]
+
+    try:
+        return next(path for path in paths if is_spark_home(path))
+    except StopIteration:
+        print("Could not find valid SPARK_HOME while searching {0}".format(paths), file=sys.stderr)
+        exit(-1)
+
+if __name__ == "__main__":
+    print(_find_spark_home())
diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py
index c1cf843d84388..3c783ae541a1f 100644
--- a/python/pyspark/java_gateway.py
+++ b/python/pyspark/java_gateway.py
@@ -29,6 +29,7 @@
     xrange = range
 
 from py4j.java_gateway import java_import, JavaGateway, GatewayClient
+from pyspark.find_spark_home import _find_spark_home
 from pyspark.serializers import read_int
 
 
@@ -41,7 +42,7 @@ def launch_gateway(conf=None):
     if "PYSPARK_GATEWAY_PORT" in os.environ:
         gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"])
     else:
-        SPARK_HOME = os.environ["SPARK_HOME"]
+        SPARK_HOME = _find_spark_home()
         # Launch the Py4j gateway using Spark's run command so that we pick up the
         # proper classpath and settings from spark-env.sh
         on_windows = platform.system() == "Windows"
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
new file mode 100644
index 0000000000000..08a301695fda7
--- /dev/null
+++ b/python/pyspark/version.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__version__ = "2.1.0.dev0"
diff --git a/python/setup.cfg b/python/setup.cfg
new file mode 100644
index 0000000000000..d100b932bbafc
--- /dev/null
+++ b/python/setup.cfg
@@ -0,0 +1,22 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+[bdist_wheel]
+universal = 1
+
+[metadata]
+description-file = README.md
diff --git a/python/setup.py b/python/setup.py
new file mode 100644
index 0000000000000..625aea04073f5
--- /dev/null
+++ b/python/setup.py
@@ -0,0 +1,209 @@
+#!/usr/bin/env python
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+import glob
+import os
+import sys
+from setuptools import setup, find_packages
+from shutil import copyfile, copytree, rmtree
+
+if sys.version_info < (2, 7):
+    print("Python versions prior to 2.7 are not supported for pip installed PySpark.",
+          file=sys.stderr)
+    exit(-1)
+
+try:
+    exec(open('pyspark/version.py').read())
+except IOError:
+    print("Failed to load PySpark version file for packaging. You must be in Spark's python dir.",
+          file=sys.stderr)
+    sys.exit(-1)
+VERSION = __version__
+# A temporary path so we can access above the Python project root and fetch scripts and jars we need
+TEMP_PATH = "deps"
+SPARK_HOME = os.path.abspath("../")
+
+# Provide guidance about how to use setup.py
+incorrect_invocation_message = """
+If you are installing pyspark from spark source, you must first build Spark and
+run sdist.
+
+    To build Spark with maven you can run:
+      ./build/mvn -DskipTests clean package
+    Building the source dist is done in the Python directory:
+      cd python
+      python setup.py sdist
+      pip install dist/*.tar.gz"""
+
+# Figure out where the jars are we need to package with PySpark.
+JARS_PATH = glob.glob(os.path.join(SPARK_HOME, "assembly/target/scala-*/jars/"))
+
+if len(JARS_PATH) == 1:
+    JARS_PATH = JARS_PATH[0]
+elif (os.path.isfile("../RELEASE") and len(glob.glob("../jars/spark*core*.jar")) == 1):
+    # Release mode puts the jars in a jars directory
+    JARS_PATH = os.path.join(SPARK_HOME, "jars")
+elif len(JARS_PATH) > 1:
+    print("Assembly jars exist for multiple scalas ({0}), please cleanup assembly/target".format(
+        JARS_PATH), file=sys.stderr)
+    sys.exit(-1)
+elif len(JARS_PATH) == 0 and not os.path.exists(TEMP_PATH):
+    print(incorrect_invocation_message, file=sys.stderr)
+    sys.exit(-1)
+
+EXAMPLES_PATH = os.path.join(SPARK_HOME, "examples/src/main/python")
+SCRIPTS_PATH = os.path.join(SPARK_HOME, "bin")
+SCRIPTS_TARGET = os.path.join(TEMP_PATH, "bin")
+JARS_TARGET = os.path.join(TEMP_PATH, "jars")
+EXAMPLES_TARGET = os.path.join(TEMP_PATH, "examples")
+
+
+# Check and see if we are under the spark path in which case we need to build the symlink farm.
+# This is important because we only want to build the symlink farm while under Spark otherwise we
+# want to use the symlink farm. And if the symlink farm exists under while under Spark (e.g. a
+# partially built sdist) we should error and have the user sort it out.
+in_spark = (os.path.isfile("../core/src/main/scala/org/apache/spark/SparkContext.scala") or
+            (os.path.isfile("../RELEASE") and len(glob.glob("../jars/spark*core*.jar")) == 1))
+
+
+def _supports_symlinks():
+    """Check if the system supports symlinks (e.g. *nix) or not."""
+    return getattr(os, "symlink", None) is not None
+
+
+if (in_spark):
+    # Construct links for setup
+    try:
+        os.mkdir(TEMP_PATH)
+    except:
+        print("Temp path for symlink to parent already exists {0}".format(TEMP_PATH),
+              file=sys.stderr)
+        exit(-1)
+
+try:
+    # We copy the shell script to be under pyspark/python/pyspark so that the launcher scripts
+    # find it where expected. The rest of the files aren't copied because they are accessed
+    # using Python imports instead which will be resolved correctly.
+    try:
+        os.makedirs("pyspark/python/pyspark")
+    except OSError:
+        # Don't worry if the directory already exists.
+        pass
+    copyfile("pyspark/shell.py", "pyspark/python/pyspark/shell.py")
+
+    if (in_spark):
+        # Construct the symlink farm - this is necessary since we can't refer to the path above the
+        # package root and we need to copy the jars and scripts which are up above the python root.
+        if _supports_symlinks():
+            os.symlink(JARS_PATH, JARS_TARGET)
+            os.symlink(SCRIPTS_PATH, SCRIPTS_TARGET)
+            os.symlink(EXAMPLES_PATH, EXAMPLES_TARGET)
+        else:
+            # For windows fall back to the slower copytree
+            copytree(JARS_PATH, JARS_TARGET)
+            copytree(SCRIPTS_PATH, SCRIPTS_TARGET)
+            copytree(EXAMPLES_PATH, EXAMPLES_TARGET)
+    else:
+        # If we are not inside of SPARK_HOME verify we have the required symlink farm
+        if not os.path.exists(JARS_TARGET):
+            print("To build packaging must be in the python directory under the SPARK_HOME.",
+                  file=sys.stderr)
+
+    if not os.path.isdir(SCRIPTS_TARGET):
+        print(incorrect_invocation_message, file=sys.stderr)
+        exit(-1)
+
+    # Scripts directive requires a list of each script path and does not take wild cards.
+    script_names = os.listdir(SCRIPTS_TARGET)
+    scripts = list(map(lambda script: os.path.join(SCRIPTS_TARGET, script), script_names))
+    # We add find_spark_home.py to the bin directory we install so that pip installed PySpark
+    # will search for SPARK_HOME with Python.
+    scripts.append("pyspark/find_spark_home.py")
+
+    # Parse the README markdown file into rst for PyPI
+    long_description = "!!!!! missing pandoc do not upload to PyPI !!!!"
+    try:
+        import pypandoc
+        long_description = pypandoc.convert('README.md', 'rst')
+    except ImportError:
+        print("Could not import pypandoc - required to package PySpark", file=sys.stderr)
+
+    setup(
+        name='pyspark',
+        version=VERSION,
+        description='Apache Spark Python API',
+        long_description=long_description,
+        author='Spark Developers',
+        author_email='dev@spark.apache.org',
+        url='https://github.com/apache/spark/tree/master/python',
+        packages=['pyspark',
+                  'pyspark.mllib',
+                  'pyspark.ml',
+                  'pyspark.sql',
+                  'pyspark.streaming',
+                  'pyspark.bin',
+                  'pyspark.jars',
+                  'pyspark.python.pyspark',
+                  'pyspark.python.lib',
+                  'pyspark.examples.src.main.python'],
+        include_package_data=True,
+        package_dir={
+            'pyspark.jars': 'deps/jars',
+            'pyspark.bin': 'deps/bin',
+            'pyspark.python.lib': 'lib',
+            'pyspark.examples.src.main.python': 'deps/examples',
+        },
+        package_data={
+            'pyspark.jars': ['*.jar'],
+            'pyspark.bin': ['*'],
+            'pyspark.python.lib': ['*.zip'],
+            'pyspark.examples.src.main.python': ['*.py', '*/*.py']},
+        scripts=scripts,
+        license='http://www.apache.org/licenses/LICENSE-2.0',
+        install_requires=['py4j==0.10.4'],
+        setup_requires=['pypandoc'],
+        extras_require={
+            'ml': ['numpy>=1.7'],
+            'mllib': ['numpy>=1.7'],
+            'sql': ['pandas']
+        },
+        classifiers=[
+            'Development Status :: 5 - Production/Stable',
+            'License :: OSI Approved :: Apache Software License',
+            'Programming Language :: Python :: 2.7',
+            'Programming Language :: Python :: 3',
+            'Programming Language :: Python :: 3.4',
+            'Programming Language :: Python :: 3.5',
+            'Programming Language :: Python :: Implementation :: CPython',
+            'Programming Language :: Python :: Implementation :: PyPy']
+    )
+finally:
+    # We only cleanup the symlink farm if we were in Spark, otherwise we are installing rather than
+    # packaging.
+    if (in_spark):
+        # Depending on cleaning up the symlink farm or copied version
+        if _supports_symlinks():
+            os.remove(os.path.join(TEMP_PATH, "jars"))
+            os.remove(os.path.join(TEMP_PATH, "bin"))
+            os.remove(os.path.join(TEMP_PATH, "examples"))
+        else:
+            rmtree(os.path.join(TEMP_PATH, "jars"))
+            rmtree(os.path.join(TEMP_PATH, "bin"))
+            rmtree(os.path.join(TEMP_PATH, "examples"))
+        os.rmdir(TEMP_PATH)

From 014fceee04c69d7944c74b3794e821e4d1003dd0 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 17 Nov 2016 00:00:38 -0800
Subject: [PATCH 0136/1204] [SPARK-18464][SQL] support old table which doesn't
 store schema in metastore

## What changes were proposed in this pull request?

Before Spark 2.1, users can create an external data source table without schema, and we will infer the table schema at runtime. In Spark 2.1, we decided to infer the schema when the table was created, so that we don't need to infer it again and again at runtime.

This is a good improvement, but we should still respect and support old tables which doesn't store table schema in metastore.

## How was this patch tested?

regression test.

Author: Wenchen Fan <wenchen@databricks.com>

Closes #15900 from cloud-fan/hive-catalog.

(cherry picked from commit 07b3f045cd6f79b92bc86b3b1b51d3d5e6bd37ce)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../spark/sql/execution/command/tables.scala  |  8 ++++++-
 .../spark/sql/hive/HiveExternalCatalog.scala  |  5 +++++
 .../spark/sql/hive/HiveMetastoreCatalog.scala |  4 +++-
 .../sql/hive/MetastoreDataSourcesSuite.scala  | 22 +++++++++++++++++++
 4 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 119e732d0202c..7049e53a78684 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -431,7 +431,13 @@ case class DescribeTableCommand(
       describeSchema(catalog.lookupRelation(table).schema, result)
     } else {
       val metadata = catalog.getTableMetadata(table)
-      describeSchema(metadata.schema, result)
+      if (metadata.schema.isEmpty) {
+        // In older version(prior to 2.1) of Spark, the table schema can be empty and should be
+        // inferred at runtime. We should still support it.
+        describeSchema(catalog.lookupRelation(metadata.identifier).schema, result)
+      } else {
+        describeSchema(metadata.schema, result)
+      }
 
       describePartitionInfo(metadata, result)
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index cbd00da81cfcd..843305883abc8 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -1023,6 +1023,11 @@ object HiveExternalCatalog {
       // After SPARK-6024, we removed this flag.
       // Although we are not using `spark.sql.sources.schema` any more, we need to still support.
       DataType.fromJson(schema.get).asInstanceOf[StructType]
+    } else if (props.filterKeys(_.startsWith(DATASOURCE_SCHEMA_PREFIX)).isEmpty) {
+      // If there is no schema information in table properties, it means the schema of this table
+      // was empty when saving into metastore, which is possible in older version(prior to 2.1) of
+      // Spark. We should respect it.
+      new StructType()
     } else {
       val numSchemaParts = props.get(DATASOURCE_SCHEMA_NUMPARTS)
       if (numSchemaParts.isDefined) {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 8e5fc88aad448..edbde5d10b47c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -64,7 +64,9 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
         val dataSource =
           DataSource(
             sparkSession,
-            userSpecifiedSchema = Some(table.schema),
+            // In older version(prior to 2.1) of Spark, the table schema can be empty and should be
+            // inferred at runtime. We should still support it.
+            userSpecifiedSchema = if (table.schema.isEmpty) None else Some(table.schema),
             partitionColumns = table.partitionColumnNames,
             bucketSpec = table.bucketSpec,
             className = table.provider.get,
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index c50f92e783c88..4ab1a54edc46d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -1371,4 +1371,26 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
       }
     }
   }
+
+  test("SPARK-18464: support old table which doesn't store schema in table properties") {
+    withTable("old") {
+      withTempPath { path =>
+        Seq(1 -> "a").toDF("i", "j").write.parquet(path.getAbsolutePath)
+        val tableDesc = CatalogTable(
+          identifier = TableIdentifier("old", Some("default")),
+          tableType = CatalogTableType.EXTERNAL,
+          storage = CatalogStorageFormat.empty.copy(
+            properties = Map("path" -> path.getAbsolutePath)
+          ),
+          schema = new StructType(),
+          properties = Map(
+            HiveExternalCatalog.DATASOURCE_PROVIDER -> "parquet"))
+        hiveClient.createTable(tableDesc, ignoreIfExists = false)
+
+        checkAnswer(spark.table("old"), Row(1, "a"))
+
+        checkAnswer(sql("DESC old"), Row("i", "int", null) :: Row("j", "string", null) :: Nil)
+      }
+    }
+  }
 }

From 2ee4fc8891be53b2fae43faa5cd09ade32173bba Mon Sep 17 00:00:00 2001
From: Weiqing Yang <yangweiqing001@gmail.com>
Date: Thu, 17 Nov 2016 11:13:22 +0000
Subject: [PATCH 0137/1204] [YARN][DOC] Remove non-Yarn specific configurations
 from running-on-yarn.md

## What changes were proposed in this pull request?

Remove `spark.driver.memory`, `spark.executor.memory`,  `spark.driver.cores`, and `spark.executor.cores` from `running-on-yarn.md` as they are not Yarn-specific, and they are also defined in`configuration.md`.

## How was this patch tested?
Build passed & Manually check.

Author: Weiqing Yang <yangweiqing001@gmail.com>

Closes #15869 from weiqingy/yarnDoc.

(cherry picked from commit a3cac7bd86a6fe8e9b42da1bf580aaeb59378304)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/running-on-yarn.md | 36 ------------------------------------
 1 file changed, 36 deletions(-)

diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index fe0221ce7c5b6..4d1fafc07b8fc 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -117,28 +117,6 @@ To use a custom metrics.properties for the application master and executors, upd
     Use lower-case suffixes, e.g. <code>k</code>, <code>m</code>, <code>g</code>, <code>t</code>, and <code>p</code>, for kibi-, mebi-, gibi-, tebi-, and pebibytes, respectively.
   </td>
 </tr>
-<tr>
-  <td><code>spark.driver.memory</code></td>
-  <td>1g</td>
-  <td>
-    Amount of memory to use for the driver process, i.e. where SparkContext is initialized.
-    (e.g. <code>1g</code>, <code>2g</code>).
-
-    <br /><em>Note:</em> In client mode, this config must not be set through the <code>SparkConf</code>
-    directly in your application, because the driver JVM has already started at that point.
-    Instead, please set this through the <code>--driver-memory</code> command line option
-    or in your default properties file.
-  </td>
-</tr>
-<tr>
-  <td><code>spark.driver.cores</code></td>
-  <td><code>1</code></td>
-  <td>
-    Number of cores used by the driver in YARN cluster mode.
-    Since the driver is run in the same JVM as the YARN Application Master in cluster mode, this also controls the cores used by the YARN Application Master.
-    In client mode, use <code>spark.yarn.am.cores</code> to control the number of cores used by the YARN Application Master instead.
-  </td>
-</tr>
 <tr>
   <td><code>spark.yarn.am.cores</code></td>
   <td><code>1</code></td>
@@ -233,13 +211,6 @@ To use a custom metrics.properties for the application master and executors, upd
     Comma-separated list of jars to be placed in the working directory of each executor.
   </td>
 </tr>
-<tr>
-  <td><code>spark.executor.cores</code></td>
-  <td>1 in YARN mode, all the available cores on the worker in standalone mode.</td>
-  <td>
-    The number of cores to use on each executor. For YARN and standalone mode only.
-  </td>
-</tr>
 <tr>
  <td><code>spark.executor.instances</code></td>
   <td><code>2</code></td>
@@ -247,13 +218,6 @@ To use a custom metrics.properties for the application master and executors, upd
     The number of executors for static allocation. With <code>spark.dynamicAllocation.enabled</code>, the initial set of executors will be at least this large.
   </td>
 </tr>
-<tr>
-  <td><code>spark.executor.memory</code></td>
-  <td>1g</td>
-  <td>
-    Amount of memory to use per executor process (e.g. <code>2g</code>, <code>8g</code>).
-  </td>
-</tr>
 <tr>
  <td><code>spark.yarn.executor.memoryOverhead</code></td>
   <td>executorMemory * 0.10, with minimum of 384 </td>

From 4fcecb4cf081fba0345f1939420ca1d9f6de720c Mon Sep 17 00:00:00 2001
From: anabranch <wac.chambers@gmail.com>
Date: Thu, 17 Nov 2016 11:34:55 +0000
Subject: [PATCH 0138/1204] [SPARK-18365][DOCS] Improve Sample Method
 Documentation

## What changes were proposed in this pull request?

I found the documentation for the sample method to be confusing, this adds more clarification across all languages.

- [x] Scala
- [x] Python
- [x] R
- [x] RDD Scala
- [ ] RDD Python with SEED
- [X] RDD Java
- [x] RDD Java with SEED
- [x] RDD Python

## How was this patch tested?

NA

Please review https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark before opening a pull request.

Author: anabranch <wac.chambers@gmail.com>
Author: Bill Chambers <bill@databricks.com>

Closes #15815 from anabranch/SPARK-18365.

(cherry picked from commit 49b6f456aca350e9e2c170782aa5cc75e7822680)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 R/pkg/R/DataFrame.R                                    |  4 +++-
 .../main/scala/org/apache/spark/api/java/JavaRDD.scala |  8 ++++++--
 core/src/main/scala/org/apache/spark/rdd/RDD.scala     |  3 +++
 python/pyspark/rdd.py                                  |  5 +++++
 python/pyspark/sql/dataframe.py                        |  5 +++++
 .../src/main/scala/org/apache/spark/sql/Dataset.scala  | 10 ++++++++--
 6 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 1cf9b38ea6483..4e3d97bb3ad07 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -936,7 +936,9 @@ setMethod("unique",
 
 #' Sample
 #'
-#' Return a sampled subset of this SparkDataFrame using a random seed.
+#' Return a sampled subset of this SparkDataFrame using a random seed. 
+#' Note: this is not guaranteed to provide exactly the fraction specified
+#' of the total count of of the given SparkDataFrame.
 #'
 #' @param x A SparkDataFrame
 #' @param withReplacement Sampling with replacement or not
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
index 20d6c9341bf7a..d67cff64e6e46 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
@@ -98,7 +98,9 @@ class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: ClassTag[T])
   def repartition(numPartitions: Int): JavaRDD[T] = rdd.repartition(numPartitions)
 
   /**
-   * Return a sampled subset of this RDD.
+   * Return a sampled subset of this RDD with a random seed.
+   * Note: this is NOT guaranteed to provide exactly the fraction of the count
+   * of the given [[RDD]].
    *
    * @param withReplacement can elements be sampled multiple times (replaced when sampled out)
    * @param fraction expected size of the sample as a fraction of this RDD's size
@@ -109,7 +111,9 @@ class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: ClassTag[T])
     sample(withReplacement, fraction, Utils.random.nextLong)
 
   /**
-   * Return a sampled subset of this RDD.
+   * Return a sampled subset of this RDD, with a user-supplied seed.
+   * Note: this is NOT guaranteed to provide exactly the fraction of the count
+   * of the given [[RDD]].
    *
    * @param withReplacement can elements be sampled multiple times (replaced when sampled out)
    * @param fraction expected size of the sample as a fraction of this RDD's size
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index e018af35cb18d..cded899db1f5c 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -466,6 +466,9 @@ abstract class RDD[T: ClassTag](
   /**
    * Return a sampled subset of this RDD.
    *
+   * Note: this is NOT guaranteed to provide exactly the fraction of the count
+   * of the given [[RDD]].
+   *
    * @param withReplacement can elements be sampled multiple times (replaced when sampled out)
    * @param fraction expected size of the sample as a fraction of this RDD's size
    *  without replacement: probability that each element is chosen; fraction must be [0, 1]
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 2de2c2fd1a60b..a163ceafe9d3b 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -386,6 +386,11 @@ def sample(self, withReplacement, fraction, seed=None):
             with replacement: expected number of times each element is chosen; fraction must be >= 0
         :param seed: seed for the random number generator
 
+        .. note::
+
+            This is not guaranteed to provide exactly the fraction specified of the total count
+            of the given :class:`DataFrame`.
+
         >>> rdd = sc.parallelize(range(100), 4)
         >>> 6 <= rdd.sample(False, 0.1, 81).count() <= 14
         True
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 29710acf54c4f..38998900837cf 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -549,6 +549,11 @@ def distinct(self):
     def sample(self, withReplacement, fraction, seed=None):
         """Returns a sampled subset of this :class:`DataFrame`.
 
+        .. note::
+
+            This is not guaranteed to provide exactly the fraction specified of the total count
+            of the given :class:`DataFrame`.
+
         >>> df.sample(False, 0.5, 42).count()
         2
         """
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index af30683cc01c4..3761773698df3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -1646,7 +1646,10 @@ class Dataset[T] private[sql](
   }
 
   /**
-   * Returns a new Dataset by sampling a fraction of rows.
+   * Returns a new [[Dataset]] by sampling a fraction of rows, using a user-supplied seed.
+   *
+   * Note: this is NOT guaranteed to provide exactly the fraction of the count
+   * of the given [[Dataset]].
    *
    * @param withReplacement Sample with replacement or not.
    * @param fraction Fraction of rows to generate.
@@ -1665,7 +1668,10 @@ class Dataset[T] private[sql](
   }
 
   /**
-   * Returns a new Dataset by sampling a fraction of rows, using a random seed.
+   * Returns a new [[Dataset]] by sampling a fraction of rows, using a random seed.
+   *
+   * Note: this is NOT guaranteed to provide exactly the fraction of the total count
+   * of the given [[Dataset]].
    *
    * @param withReplacement Sample with replacement or not.
    * @param fraction Fraction of rows to generate.

From 42777b1b3c10d3945494e27f1dedd43f2f836361 Mon Sep 17 00:00:00 2001
From: VinceShieh <vincent.xie@intel.com>
Date: Thu, 17 Nov 2016 13:37:42 +0000
Subject: [PATCH 0139/1204] [SPARK-17462][MLLIB]use VersionUtils to parse Spark
 version strings

## What changes were proposed in this pull request?

Several places in MLlib use custom regexes or other approaches to parse Spark versions.
Those should be fixed to use the VersionUtils. This PR replaces custom regexes with
VersionUtils to get Spark version numbers.
## How was this patch tested?

Existing tests.

Signed-off-by: VinceShieh vincent.xieintel.com

Author: VinceShieh <vincent.xie@intel.com>

Closes #15055 from VinceShieh/SPARK-17462.

(cherry picked from commit de77c67750dc868d75d6af173c3820b75a9fe4b7)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../main/scala/org/apache/spark/ml/clustering/KMeans.scala  | 6 ++----
 mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala  | 6 ++----
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
index a0d481b294ac7..26505b4cc1501 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
@@ -33,6 +33,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, Dataset, Row}
 import org.apache.spark.sql.functions.{col, udf}
 import org.apache.spark.sql.types.{IntegerType, StructType}
+import org.apache.spark.util.VersionUtils.majorVersion
 
 /**
  * Common params for KMeans and KMeansModel
@@ -232,10 +233,7 @@ object KMeansModel extends MLReadable[KMeansModel] {
       val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
       val dataPath = new Path(path, "data").toString
 
-      val versionRegex = "([0-9]+)\\.(.+)".r
-      val versionRegex(major, _) = metadata.sparkVersion
-
-      val clusterCenters = if (major.toInt >= 2) {
+      val clusterCenters = if (majorVersion(metadata.sparkVersion) >= 2) {
         val data: Dataset[Data] = sparkSession.read.parquet(dataPath).as[Data]
         data.collect().sortBy(_.clusterIdx).map(_.clusterCenter).map(OldVectors.fromML)
       } else {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
index 444006fe1edb6..1e49352b8517e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
@@ -34,6 +34,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.{StructField, StructType}
+import org.apache.spark.util.VersionUtils.majorVersion
 
 /**
  * Params for [[PCA]] and [[PCAModel]].
@@ -204,11 +205,8 @@ object PCAModel extends MLReadable[PCAModel] {
     override def load(path: String): PCAModel = {
       val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
 
-      val versionRegex = "([0-9]+)\\.(.+)".r
-      val versionRegex(major, _) = metadata.sparkVersion
-
       val dataPath = new Path(path, "data").toString
-      val model = if (major.toInt >= 2) {
+      val model = if (majorVersion(metadata.sparkVersion) >= 2) {
         val Row(pc: DenseMatrix, explainedVariance: DenseVector) =
           sparkSession.read.parquet(dataPath)
             .select("pc", "explainedVariance")

From 536a2159393c82d414cc46797c8bfd958f453d33 Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Thu, 17 Nov 2016 13:40:16 +0000
Subject: [PATCH 0140/1204] [SPARK-18480][DOCS] Fix wrong links for ML guide
 docs

## What changes were proposed in this pull request?
1, There are two `[Graph.partitionBy]` in `graphx-programming-guide.md`, the first one had no effert.
2, `DataFrame`, `Transformer`, `Pipeline` and `Parameter`  in `ml-pipeline.md` were linked to `ml-guide.html` by mistake.
3, `PythonMLLibAPI` in `mllib-linear-methods.md` was not accessable, because class `PythonMLLibAPI` is private.
4, Other link updates.
## How was this patch tested?
 manual tests

Author: Zheng RuiFeng <ruifengz@foxmail.com>

Closes #15912 from zhengruifeng/md_fix.

(cherry picked from commit cdaf4ce9fe58c4606be8aa2a5c3756d30545c850)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/graphx-programming-guide.md                     |  1 -
 docs/ml-classification-regression.md                 |  4 ++--
 docs/ml-features.md                                  |  2 +-
 docs/ml-pipeline.md                                  | 12 ++++++------
 docs/mllib-linear-methods.md                         |  4 +---
 .../main/scala/org/apache/spark/ml/feature/LSH.scala |  2 +-
 .../spark/ml/tree/impl/GradientBoostedTrees.scala    |  8 ++++----
 .../org/apache/spark/ml/tree/impl/RandomForest.scala |  8 ++++----
 8 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/docs/graphx-programming-guide.md b/docs/graphx-programming-guide.md
index 1097cf1211c1f..e271b28fb4f28 100644
--- a/docs/graphx-programming-guide.md
+++ b/docs/graphx-programming-guide.md
@@ -36,7 +36,6 @@ description: GraphX graph processing library guide for Spark SPARK_VERSION_SHORT
 [Graph.fromEdgeTuples]: api/scala/index.html#org.apache.spark.graphx.Graph$@fromEdgeTuples[VD](RDD[(VertexId,VertexId)],VD,Option[PartitionStrategy])(ClassTag[VD]):Graph[VD,Int]
 [Graph.fromEdges]: api/scala/index.html#org.apache.spark.graphx.Graph$@fromEdges[VD,ED](RDD[Edge[ED]],VD)(ClassTag[VD],ClassTag[ED]):Graph[VD,ED]
 [PartitionStrategy]: api/scala/index.html#org.apache.spark.graphx.PartitionStrategy
-[Graph.partitionBy]: api/scala/index.html#org.apache.spark.graphx.Graph$@partitionBy(partitionStrategy:org.apache.spark.graphx.PartitionStrategy):org.apache.spark.graphx.Graph[VD,ED]
 [PageRank]: api/scala/index.html#org.apache.spark.graphx.lib.PageRank$
 [ConnectedComponents]: api/scala/index.html#org.apache.spark.graphx.lib.ConnectedComponents$
 [TriangleCount]: api/scala/index.html#org.apache.spark.graphx.lib.TriangleCount$
diff --git a/docs/ml-classification-regression.md b/docs/ml-classification-regression.md
index cb2ccbf4fe157..c72c01fcff830 100644
--- a/docs/ml-classification-regression.md
+++ b/docs/ml-classification-regression.md
@@ -984,7 +984,7 @@ Random forests combine many decision trees in order to reduce the risk of overfi
 The `spark.ml` implementation supports random forests for binary and multiclass classification and for regression,
 using both continuous and categorical features.
 
-For more information on the algorithm itself, please see the [`spark.mllib` documentation on random forests](mllib-ensembles.html).
+For more information on the algorithm itself, please see the [`spark.mllib` documentation on random forests](mllib-ensembles.html#random-forests).
 
 ### Inputs and Outputs
 
@@ -1065,7 +1065,7 @@ GBTs iteratively train decision trees in order to minimize a loss function.
 The `spark.ml` implementation supports GBTs for binary classification and for regression,
 using both continuous and categorical features.
 
-For more information on the algorithm itself, please see the [`spark.mllib` documentation on GBTs](mllib-ensembles.html).
+For more information on the algorithm itself, please see the [`spark.mllib` documentation on GBTs](mllib-ensembles.html#gradient-boosted-trees-gbts).
 
 ### Inputs and Outputs
 
diff --git a/docs/ml-features.md b/docs/ml-features.md
index 903177210d820..45724a3716e74 100644
--- a/docs/ml-features.md
+++ b/docs/ml-features.md
@@ -694,7 +694,7 @@ for more details on the API.
 `VectorIndexer` helps index categorical features in datasets of `Vector`s.
 It can both automatically decide which features are categorical and convert original values to category indices.  Specifically, it does the following:
 
-1. Take an input column of type [Vector](api/scala/index.html#org.apache.spark.mllib.linalg.Vector) and a parameter `maxCategories`.
+1. Take an input column of type [Vector](api/scala/index.html#org.apache.spark.ml.linalg.Vector) and a parameter `maxCategories`.
 2. Decide which features should be categorical based on the number of distinct values, where features with at most `maxCategories` are declared categorical.
 3. Compute 0-based category indices for each categorical feature.
 4. Index categorical features and transform original feature values to indices.
diff --git a/docs/ml-pipeline.md b/docs/ml-pipeline.md
index b4d6be94f5eb0..0384513ab7014 100644
--- a/docs/ml-pipeline.md
+++ b/docs/ml-pipeline.md
@@ -38,26 +38,26 @@ algorithms into a single pipeline, or workflow.
 This section covers the key concepts introduced by the Pipelines API, where the pipeline concept is
 mostly inspired by the [scikit-learn](http://scikit-learn.org/) project.
 
-* **[`DataFrame`](ml-guide.html#dataframe)**: This ML API uses `DataFrame` from Spark SQL as an ML
+* **[`DataFrame`](ml-pipeline.html#dataframe)**: This ML API uses `DataFrame` from Spark SQL as an ML
   dataset, which can hold a variety of data types.
   E.g., a `DataFrame` could have different columns storing text, feature vectors, true labels, and predictions.
 
-* **[`Transformer`](ml-guide.html#transformers)**: A `Transformer` is an algorithm which can transform one `DataFrame` into another `DataFrame`.
+* **[`Transformer`](ml-pipeline.html#transformers)**: A `Transformer` is an algorithm which can transform one `DataFrame` into another `DataFrame`.
 E.g., an ML model is a `Transformer` which transforms a `DataFrame` with features into a `DataFrame` with predictions.
 
-* **[`Estimator`](ml-guide.html#estimators)**: An `Estimator` is an algorithm which can be fit on a `DataFrame` to produce a `Transformer`.
+* **[`Estimator`](ml-pipeline.html#estimators)**: An `Estimator` is an algorithm which can be fit on a `DataFrame` to produce a `Transformer`.
 E.g., a learning algorithm is an `Estimator` which trains on a `DataFrame` and produces a model.
 
-* **[`Pipeline`](ml-guide.html#pipeline)**: A `Pipeline` chains multiple `Transformer`s and `Estimator`s together to specify an ML workflow.
+* **[`Pipeline`](ml-pipeline.html#pipeline)**: A `Pipeline` chains multiple `Transformer`s and `Estimator`s together to specify an ML workflow.
 
-* **[`Parameter`](ml-guide.html#parameters)**: All `Transformer`s and `Estimator`s now share a common API for specifying parameters.
+* **[`Parameter`](ml-pipeline.html#parameters)**: All `Transformer`s and `Estimator`s now share a common API for specifying parameters.
 
 ## DataFrame
 
 Machine learning can be applied to a wide variety of data types, such as vectors, text, images, and structured data.
 This API adopts the `DataFrame` from Spark SQL in order to support a variety of data types.
 
-`DataFrame` supports many basic and structured types; see the [Spark SQL datatype reference](sql-programming-guide.html#spark-sql-datatype-reference) for a list of supported types.
+`DataFrame` supports many basic and structured types; see the [Spark SQL datatype reference](sql-programming-guide.html#data-types) for a list of supported types.
 In addition to the types listed in the Spark SQL guide, `DataFrame` can use ML [`Vector`](mllib-data-types.html#local-vector) types.
 
 A `DataFrame` can be created either implicitly or explicitly from a regular `RDD`.  See the code examples below and the [Spark SQL programming guide](sql-programming-guide.html) for examples.
diff --git a/docs/mllib-linear-methods.md b/docs/mllib-linear-methods.md
index 816bdf1317000..3085539b40e61 100644
--- a/docs/mllib-linear-methods.md
+++ b/docs/mllib-linear-methods.md
@@ -139,7 +139,7 @@ and logistic regression.
 Linear SVMs supports only binary classification, while logistic regression supports both binary and
 multiclass classification problems.
 For both methods, `spark.mllib` supports L1 and L2 regularized variants.
-The training data set is represented by an RDD of [LabeledPoint](mllib-data-types.html) in MLlib,
+The training data set is represented by an RDD of [LabeledPoint](mllib-data-types.html#labeled-point) in MLlib,
 where labels are class indices starting from zero: $0, 1, 2, \ldots$.
 
 ### Linear Support Vector Machines (SVMs)
@@ -491,5 +491,3 @@ Algorithms are all implemented in Scala:
 * [RidgeRegressionWithSGD](api/scala/index.html#org.apache.spark.mllib.regression.RidgeRegressionWithSGD)
 * [LassoWithSGD](api/scala/index.html#org.apache.spark.mllib.regression.LassoWithSGD)
 
-Python calls the Scala implementation via
-[PythonMLLibAPI](api/scala/index.html#org.apache.spark.mllib.api.python.PythonMLLibAPI).
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala
index 333a8c364a884..eb117c40eea3a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala
@@ -40,7 +40,7 @@ private[ml] trait LSHParams extends HasInputCol with HasOutputCol {
    * @group param
    */
   final val outputDim: IntParam = new IntParam(this, "outputDim", "output dimension, where" +
-    "increasing dimensionality lowers the false negative rate, and decreasing dimensionality" +
+    " increasing dimensionality lowers the false negative rate, and decreasing dimensionality" +
     " improves the running performance", ParamValidators.gt(0))
 
   /** @group getParam */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala
index 7bef899a633d9..ede0a060eef95 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala
@@ -34,7 +34,7 @@ private[spark] object GradientBoostedTrees extends Logging {
 
   /**
    * Method to train a gradient boosting model
-   * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
+   * @param input Training dataset: RDD of [[LabeledPoint]].
    * @param seed Random seed.
    * @return tuple of ensemble models and weights:
    *         (array of decision tree models, array of model weights)
@@ -59,7 +59,7 @@ private[spark] object GradientBoostedTrees extends Logging {
 
   /**
    * Method to validate a gradient boosting model
-   * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
+   * @param input Training dataset: RDD of [[LabeledPoint]].
    * @param validationInput Validation dataset.
    *                        This dataset should be different from the training dataset,
    *                        but it should follow the same distribution.
@@ -162,7 +162,7 @@ private[spark] object GradientBoostedTrees extends Logging {
    * Method to calculate error of the base learner for the gradient boosting calculation.
    * Note: This method is not used by the gradient boosting algorithm but is useful for debugging
    * purposes.
-   * @param data Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
+   * @param data Training dataset: RDD of [[LabeledPoint]].
    * @param trees Boosted Decision Tree models
    * @param treeWeights Learning rates at each boosting iteration.
    * @param loss evaluation metric.
@@ -184,7 +184,7 @@ private[spark] object GradientBoostedTrees extends Logging {
   /**
    * Method to compute error or loss for every iteration of gradient boosting.
    *
-   * @param data RDD of [[org.apache.spark.mllib.regression.LabeledPoint]]
+   * @param data RDD of [[LabeledPoint]]
    * @param trees Boosted Decision Tree models
    * @param treeWeights Learning rates at each boosting iteration.
    * @param loss evaluation metric.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
index b504f411d256d..8ae5ca3c84b0e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
@@ -82,7 +82,7 @@ private[spark] object RandomForest extends Logging {
   /**
    * Train a random forest.
    *
-   * @param input Training data: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]]
+   * @param input Training data: RDD of [[LabeledPoint]]
    * @return an unweighted set of trees
    */
   def run(
@@ -343,7 +343,7 @@ private[spark] object RandomForest extends Logging {
   /**
    * Given a group of nodes, this finds the best split for each node.
    *
-   * @param input Training data: RDD of [[org.apache.spark.ml.tree.impl.TreePoint]]
+   * @param input Training data: RDD of [[TreePoint]]
    * @param metadata Learning and dataset metadata
    * @param topNodesForGroup For each tree in group, tree index -> root node.
    *                         Used for matching instances with nodes.
@@ -854,10 +854,10 @@ private[spark] object RandomForest extends Logging {
    *       and for multiclass classification with a high-arity feature,
    *       there is one bin per category.
    *
-   * @param input Training data: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]]
+   * @param input Training data: RDD of [[LabeledPoint]]
    * @param metadata Learning and dataset metadata
    * @param seed random seed
-   * @return Splits, an Array of [[org.apache.spark.mllib.tree.model.Split]]
+   * @return Splits, an Array of [[Split]]
    *          of size (numFeatures, numSplits)
    */
   protected[tree] def findSplits(

From 978798880c0b1e6a15e8a342847e1ff4d83a5ac0 Mon Sep 17 00:00:00 2001
From: root <root@iZbp1gsnrlfzjxh82cz80vZ.(none)>
Date: Thu, 17 Nov 2016 17:04:19 +0000
Subject: [PATCH 0141/1204] [SPARK-18490][SQL] duplication nodename extrainfo
 for ShuffleExchange

## What changes were proposed in this pull request?

   In ShuffleExchange, the nodename's extraInfo are the same when exchangeCoordinator.isEstimated
 is true or false.

Merge the two situation in the PR.

Author: root <root@iZbp1gsnrlfzjxh82cz80vZ.(none)>

Closes #15920 from windpiger/DupNodeNameShuffleExchange.

(cherry picked from commit b0aa1aa1af6c513a6a881eaea96abdd2b480ef98)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../apache/spark/sql/execution/exchange/ShuffleExchange.scala | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchange.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchange.scala
index 7a4a251370706..125a4930c6528 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchange.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchange.scala
@@ -45,9 +45,7 @@ case class ShuffleExchange(
 
   override def nodeName: String = {
     val extraInfo = coordinator match {
-      case Some(exchangeCoordinator) if exchangeCoordinator.isEstimated =>
-        s"(coordinator id: ${System.identityHashCode(coordinator)})"
-      case Some(exchangeCoordinator) if !exchangeCoordinator.isEstimated =>
+      case Some(exchangeCoordinator) =>
         s"(coordinator id: ${System.identityHashCode(coordinator)})"
       case None => ""
     }

From fc466be4fd8def06880f59d50e5567c22cc53d6a Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 17 Nov 2016 17:31:12 -0800
Subject: [PATCH 0142/1204] [SPARK-18360][SQL] default table path of tables in
 default database should depend on the location of default database

## What changes were proposed in this pull request?

The current semantic of the warehouse config:

1. it's a static config, which means you can't change it once your spark application is launched.
2. Once a database is created, its location won't change even the warehouse path config is changed.
3. default database is a special case, although its location is fixed, but the locations of tables created in it are not. If a Spark app starts with warehouse path B(while the location of default database is A), then users create a table `tbl` in default database, its location will be `B/tbl` instead of `A/tbl`. If uses change the warehouse path config to C, and create another table `tbl2`, its location will still be `B/tbl2` instead of `C/tbl2`.

rule 3 doesn't make sense and I think we made it by mistake, not intentionally. Data source tables don't follow rule 3 and treat default database like normal ones.

This PR fixes hive serde tables to make it consistent with data source tables.

## How was this patch tested?

HiveSparkSubmitSuite

Author: Wenchen Fan <wenchen@databricks.com>

Closes #15812 from cloud-fan/default-db.

(cherry picked from commit ce13c2672318242748f7520ed4ce6bcfad4fb428)
Signed-off-by: Yin Huai <yhuai@databricks.com>
---
 .../spark/sql/hive/HiveExternalCatalog.scala  | 237 ++++++++++--------
 .../spark/sql/hive/HiveSparkSubmitSuite.scala |  76 +++++-
 2 files changed, 190 insertions(+), 123 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 843305883abc8..cacffcf33c263 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -197,136 +197,151 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
 
     if (tableDefinition.tableType == VIEW) {
       client.createTable(tableDefinition, ignoreIfExists)
-    } else if (tableDefinition.provider.get == DDLUtils.HIVE_PROVIDER) {
-      // Here we follow data source tables and put table metadata like provider, schema, etc. in
-      // table properties, so that we can work around the Hive metastore issue about not case
-      // preserving and make Hive serde table support mixed-case column names.
-      val tableWithDataSourceProps = tableDefinition.copy(
-        properties = tableDefinition.properties ++ tableMetaToTableProps(tableDefinition))
-      client.createTable(tableWithDataSourceProps, ignoreIfExists)
     } else {
-      // To work around some hive metastore issues, e.g. not case-preserving, bad decimal type
-      // support, no column nullability, etc., we should do some extra works before saving table
-      // metadata into Hive metastore:
-      //  1. Put table metadata like provider, schema, etc. in table properties.
-      //  2. Check if this table is hive compatible.
-      //    2.1  If it's not hive compatible, set location URI, schema, partition columns and bucket
-      //         spec to empty and save table metadata to Hive.
-      //    2.2  If it's hive compatible, set serde information in table metadata and try to save
-      //         it to Hive. If it fails, treat it as not hive compatible and go back to 2.1
-      val tableProperties = tableMetaToTableProps(tableDefinition)
-
       // Ideally we should not create a managed table with location, but Hive serde table can
       // specify location for managed table. And in [[CreateDataSourceTableAsSelectCommand]] we have
       // to create the table directory and write out data before we create this table, to avoid
       // exposing a partial written table.
       val needDefaultTableLocation = tableDefinition.tableType == MANAGED &&
         tableDefinition.storage.locationUri.isEmpty
+
       val tableLocation = if (needDefaultTableLocation) {
         Some(defaultTablePath(tableDefinition.identifier))
       } else {
         tableDefinition.storage.locationUri
       }
-      // Ideally we should also put `locationUri` in table properties like provider, schema, etc.
-      // However, in older version of Spark we already store table location in storage properties
-      // with key "path". Here we keep this behaviour for backward compatibility.
-      val storagePropsWithLocation = tableDefinition.storage.properties ++
-        tableLocation.map("path" -> _)
-
-      // converts the table metadata to Spark SQL specific format, i.e. set data schema, names and
-      // bucket specification to empty. Note that partition columns are retained, so that we can
-      // call partition-related Hive API later.
-      def newSparkSQLSpecificMetastoreTable(): CatalogTable = {
-        tableDefinition.copy(
-          // Hive only allows directory paths as location URIs while Spark SQL data source tables
-          // also allow file paths. For non-hive-compatible format, we should not set location URI
-          // to avoid hive metastore to throw exception.
-          storage = tableDefinition.storage.copy(
-            locationUri = None,
-            properties = storagePropsWithLocation),
-          schema = tableDefinition.partitionSchema,
-          bucketSpec = None,
-          properties = tableDefinition.properties ++ tableProperties)
+
+      if (tableDefinition.provider.get == DDLUtils.HIVE_PROVIDER) {
+        val tableWithDataSourceProps = tableDefinition.copy(
+          // We can't leave `locationUri` empty and count on Hive metastore to set a default table
+          // location, because Hive metastore uses hive.metastore.warehouse.dir to generate default
+          // table location for tables in default database, while we expect to use the location of
+          // default database.
+          storage = tableDefinition.storage.copy(locationUri = tableLocation),
+          // Here we follow data source tables and put table metadata like provider, schema, etc. in
+          // table properties, so that we can work around the Hive metastore issue about not case
+          // preserving and make Hive serde table support mixed-case column names.
+          properties = tableDefinition.properties ++ tableMetaToTableProps(tableDefinition))
+        client.createTable(tableWithDataSourceProps, ignoreIfExists)
+      } else {
+        createDataSourceTable(
+          tableDefinition.withNewStorage(locationUri = tableLocation),
+          ignoreIfExists)
       }
+    }
+  }
 
-      // converts the table metadata to Hive compatible format, i.e. set the serde information.
-      def newHiveCompatibleMetastoreTable(serde: HiveSerDe): CatalogTable = {
-        val location = if (tableDefinition.tableType == EXTERNAL) {
-          // When we hit this branch, we are saving an external data source table with hive
-          // compatible format, which means the data source is file-based and must have a `path`.
-          require(tableDefinition.storage.locationUri.isDefined,
-            "External file-based data source table must have a `path` entry in storage properties.")
-          Some(new Path(tableDefinition.location).toUri.toString)
-        } else {
-          None
-        }
+  private def createDataSourceTable(table: CatalogTable, ignoreIfExists: Boolean): Unit = {
+    // To work around some hive metastore issues, e.g. not case-preserving, bad decimal type
+    // support, no column nullability, etc., we should do some extra works before saving table
+    // metadata into Hive metastore:
+    //  1. Put table metadata like provider, schema, etc. in table properties.
+    //  2. Check if this table is hive compatible.
+    //    2.1  If it's not hive compatible, set location URI, schema, partition columns and bucket
+    //         spec to empty and save table metadata to Hive.
+    //    2.2  If it's hive compatible, set serde information in table metadata and try to save
+    //         it to Hive. If it fails, treat it as not hive compatible and go back to 2.1
+    val tableProperties = tableMetaToTableProps(table)
+
+    // Ideally we should also put `locationUri` in table properties like provider, schema, etc.
+    // However, in older version of Spark we already store table location in storage properties
+    // with key "path". Here we keep this behaviour for backward compatibility.
+    val storagePropsWithLocation = table.storage.properties ++
+      table.storage.locationUri.map("path" -> _)
+
+    // converts the table metadata to Spark SQL specific format, i.e. set data schema, names and
+    // bucket specification to empty. Note that partition columns are retained, so that we can
+    // call partition-related Hive API later.
+    def newSparkSQLSpecificMetastoreTable(): CatalogTable = {
+      table.copy(
+        // Hive only allows directory paths as location URIs while Spark SQL data source tables
+        // also allow file paths. For non-hive-compatible format, we should not set location URI
+        // to avoid hive metastore to throw exception.
+        storage = table.storage.copy(
+          locationUri = None,
+          properties = storagePropsWithLocation),
+        schema = table.partitionSchema,
+        bucketSpec = None,
+        properties = table.properties ++ tableProperties)
+    }
 
-        tableDefinition.copy(
-          storage = tableDefinition.storage.copy(
-            locationUri = location,
-            inputFormat = serde.inputFormat,
-            outputFormat = serde.outputFormat,
-            serde = serde.serde,
-            properties = storagePropsWithLocation
-          ),
-          properties = tableDefinition.properties ++ tableProperties)
+    // converts the table metadata to Hive compatible format, i.e. set the serde information.
+    def newHiveCompatibleMetastoreTable(serde: HiveSerDe): CatalogTable = {
+      val location = if (table.tableType == EXTERNAL) {
+        // When we hit this branch, we are saving an external data source table with hive
+        // compatible format, which means the data source is file-based and must have a `path`.
+        require(table.storage.locationUri.isDefined,
+          "External file-based data source table must have a `path` entry in storage properties.")
+        Some(new Path(table.location).toUri.toString)
+      } else {
+        None
       }
 
-      val qualifiedTableName = tableDefinition.identifier.quotedString
-      val maybeSerde = HiveSerDe.sourceToSerDe(tableDefinition.provider.get)
-      val skipHiveMetadata = tableDefinition.storage.properties
-        .getOrElse("skipHiveMetadata", "false").toBoolean
-
-      val (hiveCompatibleTable, logMessage) = maybeSerde match {
-        case _ if skipHiveMetadata =>
-          val message =
-            s"Persisting data source table $qualifiedTableName into Hive metastore in" +
-              "Spark SQL specific format, which is NOT compatible with Hive."
-          (None, message)
-
-        // our bucketing is un-compatible with hive(different hash function)
-        case _ if tableDefinition.bucketSpec.nonEmpty =>
-          val message =
-            s"Persisting bucketed data source table $qualifiedTableName into " +
-              "Hive metastore in Spark SQL specific format, which is NOT compatible with Hive. "
-          (None, message)
-
-        case Some(serde) =>
-          val message =
-            s"Persisting file based data source table $qualifiedTableName into " +
-              s"Hive metastore in Hive compatible format."
-          (Some(newHiveCompatibleMetastoreTable(serde)), message)
-
-        case _ =>
-          val provider = tableDefinition.provider.get
-          val message =
-            s"Couldn't find corresponding Hive SerDe for data source provider $provider. " +
-              s"Persisting data source table $qualifiedTableName into Hive metastore in " +
-              s"Spark SQL specific format, which is NOT compatible with Hive."
-          (None, message)
-      }
+      table.copy(
+        storage = table.storage.copy(
+          locationUri = location,
+          inputFormat = serde.inputFormat,
+          outputFormat = serde.outputFormat,
+          serde = serde.serde,
+          properties = storagePropsWithLocation
+        ),
+        properties = table.properties ++ tableProperties)
+    }
 
-      (hiveCompatibleTable, logMessage) match {
-        case (Some(table), message) =>
-          // We first try to save the metadata of the table in a Hive compatible way.
-          // If Hive throws an error, we fall back to save its metadata in the Spark SQL
-          // specific way.
-          try {
-            logInfo(message)
-            saveTableIntoHive(table, ignoreIfExists)
-          } catch {
-            case NonFatal(e) =>
-              val warningMessage =
-                s"Could not persist ${tableDefinition.identifier.quotedString} in a Hive " +
-                  "compatible way. Persisting it into Hive metastore in Spark SQL specific format."
-              logWarning(warningMessage, e)
-              saveTableIntoHive(newSparkSQLSpecificMetastoreTable(), ignoreIfExists)
-          }
+    val qualifiedTableName = table.identifier.quotedString
+    val maybeSerde = HiveSerDe.sourceToSerDe(table.provider.get)
+    val skipHiveMetadata = table.storage.properties
+      .getOrElse("skipHiveMetadata", "false").toBoolean
+
+    val (hiveCompatibleTable, logMessage) = maybeSerde match {
+      case _ if skipHiveMetadata =>
+        val message =
+          s"Persisting data source table $qualifiedTableName into Hive metastore in" +
+            "Spark SQL specific format, which is NOT compatible with Hive."
+        (None, message)
+
+      // our bucketing is un-compatible with hive(different hash function)
+      case _ if table.bucketSpec.nonEmpty =>
+        val message =
+          s"Persisting bucketed data source table $qualifiedTableName into " +
+            "Hive metastore in Spark SQL specific format, which is NOT compatible with Hive. "
+        (None, message)
+
+      case Some(serde) =>
+        val message =
+          s"Persisting file based data source table $qualifiedTableName into " +
+            s"Hive metastore in Hive compatible format."
+        (Some(newHiveCompatibleMetastoreTable(serde)), message)
+
+      case _ =>
+        val provider = table.provider.get
+        val message =
+          s"Couldn't find corresponding Hive SerDe for data source provider $provider. " +
+            s"Persisting data source table $qualifiedTableName into Hive metastore in " +
+            s"Spark SQL specific format, which is NOT compatible with Hive."
+        (None, message)
+    }
 
-        case (None, message) =>
-          logWarning(message)
-          saveTableIntoHive(newSparkSQLSpecificMetastoreTable(), ignoreIfExists)
-      }
+    (hiveCompatibleTable, logMessage) match {
+      case (Some(table), message) =>
+        // We first try to save the metadata of the table in a Hive compatible way.
+        // If Hive throws an error, we fall back to save its metadata in the Spark SQL
+        // specific way.
+        try {
+          logInfo(message)
+          saveTableIntoHive(table, ignoreIfExists)
+        } catch {
+          case NonFatal(e) =>
+            val warningMessage =
+              s"Could not persist ${table.identifier.quotedString} in a Hive " +
+                "compatible way. Persisting it into Hive metastore in Spark SQL specific format."
+            logWarning(warningMessage, e)
+            saveTableIntoHive(newSparkSQLSpecificMetastoreTable(), ignoreIfExists)
+        }
+
+      case (None, message) =>
+        logWarning(message)
+        saveTableIntoHive(newSparkSQLSpecificMetastoreTable(), ignoreIfExists)
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index fbd705172cae6..a670560c5969d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -24,6 +24,7 @@ import java.util.Date
 import scala.collection.mutable.ArrayBuffer
 import scala.tools.nsc.Properties
 
+import org.apache.hadoop.fs.Path
 import org.scalatest.{BeforeAndAfterEach, Matchers}
 import org.scalatest.concurrent.Timeouts
 import org.scalatest.exceptions.TestFailedDueToTimeoutException
@@ -33,11 +34,12 @@ import org.apache.spark._
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{QueryTest, Row, SparkSession}
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
-import org.apache.spark.sql.catalyst.catalog.{CatalogFunction, FunctionResource, JarResource}
+import org.apache.spark.sql.catalyst.catalog._
+import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.expressions.Window
 import org.apache.spark.sql.hive.test.{TestHive, TestHiveContext}
 import org.apache.spark.sql.test.ProcessTestUtils.ProcessOutputCapturer
-import org.apache.spark.sql.types.DecimalType
+import org.apache.spark.sql.types.{DecimalType, StructType}
 import org.apache.spark.util.{ResetSystemProperties, Utils}
 
 /**
@@ -295,6 +297,20 @@ class HiveSparkSubmitSuite
     runSparkSubmit(args)
   }
 
+  test("SPARK-18360: default table path of tables in default database should depend on the " +
+    "location of default database") {
+    val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
+    val args = Seq(
+      "--class", SPARK_18360.getClass.getName.stripSuffix("$"),
+      "--name", "SPARK-18360",
+      "--master", "local-cluster[2,1,1024]",
+      "--conf", "spark.ui.enabled=false",
+      "--conf", "spark.master.rest.enabled=false",
+      "--driver-java-options", "-Dderby.system.durability=test",
+      unusedJar.toString)
+    runSparkSubmit(args)
+  }
+
   // NOTE: This is an expensive operation in terms of time (10 seconds+). Use sparingly.
   // This is copied from org.apache.spark.deploy.SparkSubmitSuite
   private def runSparkSubmit(args: Seq[String]): Unit = {
@@ -397,11 +413,7 @@ object SetWarehouseLocationTest extends Logging {
   def main(args: Array[String]): Unit = {
     Utils.configTestLog4j("INFO")
 
-    val sparkConf = new SparkConf(loadDefaults = true)
-    val builder = SparkSession.builder()
-      .config(sparkConf)
-      .config("spark.ui.enabled", "false")
-      .enableHiveSupport()
+    val sparkConf = new SparkConf(loadDefaults = true).set("spark.ui.enabled", "false")
     val providedExpectedWarehouseLocation =
       sparkConf.getOption("spark.sql.test.expectedWarehouseDir")
 
@@ -410,7 +422,7 @@ object SetWarehouseLocationTest extends Logging {
         // If spark.sql.test.expectedWarehouseDir is set, the warehouse dir is set
         // through spark-summit. So, neither spark.sql.warehouse.dir nor
         // hive.metastore.warehouse.dir is set at here.
-        (builder.getOrCreate(), warehouseDir)
+        (new TestHiveContext(new SparkContext(sparkConf)).sparkSession, warehouseDir)
       case None =>
         val warehouseLocation = Utils.createTempDir()
         warehouseLocation.delete()
@@ -420,10 +432,10 @@ object SetWarehouseLocationTest extends Logging {
         // spark.sql.warehouse.dir and hive.metastore.warehouse.dir.
         // We are expecting that the value of spark.sql.warehouse.dir will override the
         // value of hive.metastore.warehouse.dir.
-        val session = builder
-          .config("spark.sql.warehouse.dir", warehouseLocation.toString)
-          .config("hive.metastore.warehouse.dir", hiveWarehouseLocation.toString)
-          .getOrCreate()
+        val session = new TestHiveContext(new SparkContext(sparkConf
+          .set("spark.sql.warehouse.dir", warehouseLocation.toString)
+          .set("hive.metastore.warehouse.dir", hiveWarehouseLocation.toString)))
+          .sparkSession
         (session, warehouseLocation.toString)
 
     }
@@ -801,3 +813,43 @@ object SPARK_14244 extends QueryTest {
     }
   }
 }
+
+object SPARK_18360 {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession.builder()
+      .config("spark.ui.enabled", "false")
+      .enableHiveSupport().getOrCreate()
+
+    val defaultDbLocation = spark.catalog.getDatabase("default").locationUri
+    assert(new Path(defaultDbLocation) == new Path(spark.sharedState.warehousePath))
+
+    val hiveClient = spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client
+
+    try {
+      val tableMeta = CatalogTable(
+        identifier = TableIdentifier("test_tbl", Some("default")),
+        tableType = CatalogTableType.MANAGED,
+        storage = CatalogStorageFormat.empty,
+        schema = new StructType().add("i", "int"),
+        provider = Some(DDLUtils.HIVE_PROVIDER))
+
+      val newWarehousePath = Utils.createTempDir().getAbsolutePath
+      hiveClient.runSqlHive(s"SET hive.metastore.warehouse.dir=$newWarehousePath")
+      hiveClient.createTable(tableMeta, ignoreIfExists = false)
+      val rawTable = hiveClient.getTable("default", "test_tbl")
+      // Hive will use the value of `hive.metastore.warehouse.dir` to generate default table
+      // location for tables in default database.
+      assert(rawTable.storage.locationUri.get.contains(newWarehousePath))
+      hiveClient.dropTable("default", "test_tbl", ignoreIfNotExists = false, purge = false)
+
+      spark.sharedState.externalCatalog.createTable(tableMeta, ignoreIfExists = false)
+      val readBack = spark.sharedState.externalCatalog.getTable("default", "test_tbl")
+      // Spark SQL will use the location of default database to generate default table
+      // location for tables in default database.
+      assert(readBack.storage.locationUri.get.contains(defaultDbLocation))
+    } finally {
+      hiveClient.dropTable("default", "test_tbl", ignoreIfNotExists = true, purge = false)
+      hiveClient.runSqlHive(s"SET hive.metastore.warehouse.dir=$defaultDbLocation")
+    }
+  }
+}

From e8b1955e20a966da9a95f75320680cbab1096540 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Thu, 17 Nov 2016 18:45:15 -0800
Subject: [PATCH 0143/1204] [SPARK-18462] Fix ClassCastException in
 SparkListenerDriverAccumUpdates event

## What changes were proposed in this pull request?

This patch fixes a `ClassCastException: java.lang.Integer cannot be cast to java.lang.Long` error which could occur in the HistoryServer while trying to process a deserialized `SparkListenerDriverAccumUpdates` event.

The problem stems from how `jackson-module-scala` handles primitive type parameters (see https://github.com/FasterXML/jackson-module-scala/wiki/FAQ#deserializing-optionint-and-other-primitive-challenges for more details). This was causing a problem where our code expected a field to be deserialized as a `(Long, Long)` tuple but we got an `(Int, Int)` tuple instead.

This patch hacks around this issue by registering a custom `Converter` with Jackson in order to deserialize the tuples as `(Object, Object)` and perform the appropriate casting.

## How was this patch tested?

New regression tests in `SQLListenerSuite`.

Author: Josh Rosen <joshrosen@databricks.com>

Closes #15922 from JoshRosen/SPARK-18462.

(cherry picked from commit d9dd979d170f44383a9a87f892f2486ddb3cca7d)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../spark/sql/execution/ui/SQLListener.scala  | 39 +++++++++++++++-
 .../sql/execution/ui/SQLListenerSuite.scala   | 44 ++++++++++++++++++-
 2 files changed, 80 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala
index 60f13432d78d2..5daf21595d8a2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala
@@ -19,6 +19,11 @@ package org.apache.spark.sql.execution.ui
 
 import scala.collection.mutable
 
+import com.fasterxml.jackson.databind.JavaType
+import com.fasterxml.jackson.databind.`type`.TypeFactory
+import com.fasterxml.jackson.databind.annotation.JsonDeserialize
+import com.fasterxml.jackson.databind.util.Converter
+
 import org.apache.spark.{JobExecutionStatus, SparkConf}
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.internal.Logging
@@ -43,9 +48,41 @@ case class SparkListenerSQLExecutionEnd(executionId: Long, time: Long)
   extends SparkListenerEvent
 
 @DeveloperApi
-case class SparkListenerDriverAccumUpdates(executionId: Long, accumUpdates: Seq[(Long, Long)])
+case class SparkListenerDriverAccumUpdates(
+    executionId: Long,
+    @JsonDeserialize(contentConverter = classOf[LongLongTupleConverter])
+    accumUpdates: Seq[(Long, Long)])
   extends SparkListenerEvent
 
+/**
+ * Jackson [[Converter]] for converting an (Int, Int) tuple into a (Long, Long) tuple.
+ *
+ * This is necessary due to limitations in how Jackson's scala module deserializes primitives;
+ * see the "Deserializing Option[Int] and other primitive challenges" section in
+ * https://github.com/FasterXML/jackson-module-scala/wiki/FAQ for a discussion of this issue and
+ * SPARK-18462 for the specific problem that motivated this conversion.
+ */
+private class LongLongTupleConverter extends Converter[(Object, Object), (Long, Long)] {
+
+  override def convert(in: (Object, Object)): (Long, Long) = {
+    def toLong(a: Object): Long = a match {
+      case i: java.lang.Integer => i.intValue()
+      case l: java.lang.Long => l.longValue()
+    }
+    (toLong(in._1), toLong(in._2))
+  }
+
+  override def getInputType(typeFactory: TypeFactory): JavaType = {
+    val objectType = typeFactory.uncheckedSimpleType(classOf[Object])
+    typeFactory.constructSimpleType(classOf[(_, _)], classOf[(_, _)], Array(objectType, objectType))
+  }
+
+  override def getOutputType(typeFactory: TypeFactory): JavaType = {
+    val longType = typeFactory.uncheckedSimpleType(classOf[Long])
+    typeFactory.constructSimpleType(classOf[(_, _)], classOf[(_, _)], Array(longType, longType))
+  }
+}
+
 class SQLHistoryListenerFactory extends SparkHistoryListenerFactory {
 
   override def createListeners(conf: SparkConf, sparkUI: SparkUI): Seq[SparkListener] = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLListenerSuite.scala
index 19b6d2603129c..7b4ff675fba72 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLListenerSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.ui
 
 import java.util.Properties
 
+import org.json4s.jackson.JsonMethods._
 import org.mockito.Mockito.mock
 
 import org.apache.spark._
@@ -35,10 +36,10 @@ import org.apache.spark.sql.execution.{LeafExecNode, QueryExecution, SparkPlanIn
 import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.ui.SparkUI
-import org.apache.spark.util.{AccumulatorMetadata, LongAccumulator}
+import org.apache.spark.util.{AccumulatorMetadata, JsonProtocol, LongAccumulator}
 
 
-class SQLListenerSuite extends SparkFunSuite with SharedSQLContext {
+class SQLListenerSuite extends SparkFunSuite with SharedSQLContext with JsonTestUtils {
   import testImplicits._
   import org.apache.spark.AccumulatorSuite.makeInfo
 
@@ -416,6 +417,45 @@ class SQLListenerSuite extends SparkFunSuite with SharedSQLContext {
     assert(driverUpdates(physicalPlan.longMetric("dummy").id) == expectedAccumValue)
   }
 
+  test("roundtripping SparkListenerDriverAccumUpdates through JsonProtocol (SPARK-18462)") {
+    val event = SparkListenerDriverAccumUpdates(1L, Seq((2L, 3L)))
+    val json = JsonProtocol.sparkEventToJson(event)
+    assertValidDataInJson(json,
+      parse("""
+        |{
+        |  "Event": "org.apache.spark.sql.execution.ui.SparkListenerDriverAccumUpdates",
+        |  "executionId": 1,
+        |  "accumUpdates": [[2,3]]
+        |}
+      """.stripMargin))
+    JsonProtocol.sparkEventFromJson(json) match {
+      case SparkListenerDriverAccumUpdates(executionId, accums) =>
+        assert(executionId == 1L)
+        accums.foreach { case (a, b) =>
+          assert(a == 2L)
+          assert(b == 3L)
+        }
+    }
+
+    // Test a case where the numbers in the JSON can only fit in longs:
+    val longJson = parse(
+      """
+        |{
+        |  "Event": "org.apache.spark.sql.execution.ui.SparkListenerDriverAccumUpdates",
+        |  "executionId": 4294967294,
+        |  "accumUpdates": [[4294967294,3]]
+        |}
+      """.stripMargin)
+    JsonProtocol.sparkEventFromJson(longJson) match {
+      case SparkListenerDriverAccumUpdates(executionId, accums) =>
+        assert(executionId == 4294967294L)
+        accums.foreach { case (a, b) =>
+          assert(a == 4294967294L)
+          assert(b == 3L)
+        }
+    }
+  }
+
 }
 
 

From 5912c19e76719a1c388a7a151af03ebf71b8f0db Mon Sep 17 00:00:00 2001
From: Tyson Condie <tcondie@gmail.com>
Date: Fri, 18 Nov 2016 11:11:24 -0800
Subject: [PATCH 0144/1204] [SPARK-18187][SQL] CompactibleFileStreamLog should
 not use "compactInterval" direcly with user setting.

## What changes were proposed in this pull request?
CompactibleFileStreamLog relys on "compactInterval" to detect a compaction batch. If the "compactInterval" is reset by user, CompactibleFileStreamLog will return wrong answer, resulting data loss. This PR procides a way to check the validity of 'compactInterval', and calculate an appropriate value.

## How was this patch tested?
When restart a stream, we change the 'spark.sql.streaming.fileSource.log.compactInterval' different with the former one.

The primary solution to this issue was given by uncleGen
Added extensions include an additional metadata field in OffsetSeq and CompactibleFileStreamLog APIs. zsxwing

Author: Tyson Condie <tcondie@gmail.com>
Author: genmao.ygm <genmao.ygm@genmaoygmdeMacBook-Air.local>

Closes #15852 from tcondie/spark-18187.

(cherry picked from commit 51baca2219fda8692b88fc8552548544aec73a1e)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../streaming/CompactibleFileStreamLog.scala  | 61 ++++++++++++++++++-
 .../streaming/FileStreamSinkLog.scala         |  8 ++-
 .../streaming/FileStreamSourceLog.scala       |  9 +--
 .../execution/streaming/HDFSMetadataLog.scala |  2 +-
 .../sql/execution/streaming/OffsetSeq.scala   | 12 +++-
 .../execution/streaming/OffsetSeqLog.scala    | 31 +++++++---
 .../CompactibleFileStreamLogSuite.scala       | 33 ++++++++++
 .../sql/streaming/FileStreamSourceSuite.scala | 41 ++++++++-----
 .../spark/sql/streaming/StreamTest.scala      | 20 +++++-
 9 files changed, 178 insertions(+), 39 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala
index 8af3db1968882..8529ceac30f1e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala
@@ -63,7 +63,46 @@ abstract class CompactibleFileStreamLog[T <: AnyRef : ClassTag](
 
   protected def isDeletingExpiredLog: Boolean
 
-  protected def compactInterval: Int
+  protected def defaultCompactInterval: Int
+
+  protected final lazy val compactInterval: Int = {
+    // SPARK-18187: "compactInterval" can be set by user via defaultCompactInterval.
+    // If there are existing log entries, then we should ensure a compatible compactInterval
+    // is used, irrespective of the defaultCompactInterval. There are three cases:
+    //
+    // 1. If there is no '.compact' file, we can use the default setting directly.
+    // 2. If there are two or more '.compact' files, we use the interval of patch id suffix with
+    // '.compact' as compactInterval. This case could arise if isDeletingExpiredLog == false.
+    // 3. If there is only one '.compact' file, then we must find a compact interval
+    // that is compatible with (i.e., a divisor of) the previous compact file, and that
+    // faithfully tries to represent the revised default compact interval i.e., is at least
+    // is large if possible.
+    // e.g., if defaultCompactInterval is 5 (and previous compact interval could have
+    // been any 2,3,4,6,12), then a log could be: 11.compact, 12, 13, in which case
+    // will ensure that the new compactInterval = 6 > 5 and (11 + 1) % 6 == 0
+    val compactibleBatchIds = fileManager.list(metadataPath, batchFilesFilter)
+      .filter(f => f.getPath.toString.endsWith(CompactibleFileStreamLog.COMPACT_FILE_SUFFIX))
+      .map(f => pathToBatchId(f.getPath))
+      .sorted
+      .reverse
+
+    // Case 1
+    var interval = defaultCompactInterval
+    if (compactibleBatchIds.length >= 2) {
+      // Case 2
+      val latestCompactBatchId = compactibleBatchIds(0)
+      val previousCompactBatchId = compactibleBatchIds(1)
+      interval = (latestCompactBatchId - previousCompactBatchId).toInt
+    } else if (compactibleBatchIds.length == 1) {
+      // Case 3
+      interval = CompactibleFileStreamLog.deriveCompactInterval(
+        defaultCompactInterval, compactibleBatchIds(0).toInt)
+    }
+    assert(interval > 0, s"intervalValue = $interval not positive value.")
+    logInfo(s"Set the compact interval to $interval " +
+      s"[defaultCompactInterval: $defaultCompactInterval]")
+    interval
+  }
 
   /**
    * Filter out the obsolete logs.
@@ -245,4 +284,24 @@ object CompactibleFileStreamLog {
   def nextCompactionBatchId(batchId: Long, compactInterval: Long): Long = {
     (batchId + compactInterval + 1) / compactInterval * compactInterval - 1
   }
+
+  /**
+   * Derives a compact interval from the latest compact batch id and
+   * a default compact interval.
+   */
+  def deriveCompactInterval(defaultInterval: Int, latestCompactBatchId: Int) : Int = {
+    if (latestCompactBatchId + 1 <= defaultInterval) {
+      latestCompactBatchId + 1
+    } else if (defaultInterval < (latestCompactBatchId + 1) / 2) {
+      // Find the first divisor >= default compact interval
+      def properDivisors(min: Int, n: Int) =
+        (min to n/2).view.filter(i => n % i == 0) :+ n
+
+      properDivisors(defaultInterval, latestCompactBatchId + 1).head
+    } else {
+      // default compact interval > than any divisor other than latest compact id
+      latestCompactBatchId + 1
+    }
+  }
 }
+
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala
index b4f14151f1ef2..eb6eed87eca7b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala
@@ -88,9 +88,11 @@ class FileStreamSinkLog(
 
   protected override val isDeletingExpiredLog = sparkSession.sessionState.conf.fileSinkLogDeletion
 
-  protected override val compactInterval = sparkSession.sessionState.conf.fileSinkLogCompactInterval
-  require(compactInterval > 0,
-    s"Please set ${SQLConf.FILE_SINK_LOG_COMPACT_INTERVAL.key} (was $compactInterval) " +
+  protected override val defaultCompactInterval =
+    sparkSession.sessionState.conf.fileSinkLogCompactInterval
+
+  require(defaultCompactInterval > 0,
+    s"Please set ${SQLConf.FILE_SINK_LOG_COMPACT_INTERVAL.key} (was $defaultCompactInterval) " +
       "to a positive value.")
 
   override def compactLogs(logs: Seq[SinkFileStatus]): Seq[SinkFileStatus] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala
index fe81b15607068..327b3ac267766 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala
@@ -38,11 +38,12 @@ class FileStreamSourceLog(
   import CompactibleFileStreamLog._
 
   // Configurations about metadata compaction
-  protected override val compactInterval =
+  protected override val defaultCompactInterval: Int =
     sparkSession.sessionState.conf.fileSourceLogCompactInterval
-  require(compactInterval > 0,
-    s"Please set ${SQLConf.FILE_SOURCE_LOG_COMPACT_INTERVAL.key} (was $compactInterval) to a " +
-      s"positive value.")
+
+  require(defaultCompactInterval > 0,
+    s"Please set ${SQLConf.FILE_SOURCE_LOG_COMPACT_INTERVAL.key} " +
+      s"(was $defaultCompactInterval) to a positive value.")
 
   protected override val fileCleanupDelayMs =
     sparkSession.sessionState.conf.fileSourceLogCleanupDelay
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
index db7057d7da70c..080729b2ca8d6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
@@ -70,7 +70,7 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
   /**
    * A `PathFilter` to filter only batch files
    */
-  private val batchFilesFilter = new PathFilter {
+  protected val batchFilesFilter = new PathFilter {
     override def accept(path: Path): Boolean = isBatchFile(path)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
index a4e1fe6797097..7469caeee3be5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
@@ -23,7 +23,7 @@ package org.apache.spark.sql.execution.streaming
  * [[Source]]s that are present in a streaming query. This is similar to simplified, single-instance
  * vector clock that must progress linearly forward.
  */
-case class OffsetSeq(offsets: Seq[Option[Offset]]) {
+case class OffsetSeq(offsets: Seq[Option[Offset]], metadata: Option[String] = None) {
 
   /**
    * Unpacks an offset into [[StreamProgress]] by associating each offset with the order list of
@@ -47,7 +47,13 @@ object OffsetSeq {
    * Returns a [[OffsetSeq]] with a variable sequence of offsets.
    * `nulls` in the sequence are converted to `None`s.
    */
-  def fill(offsets: Offset*): OffsetSeq = {
-    OffsetSeq(offsets.map(Option(_)))
+  def fill(offsets: Offset*): OffsetSeq = OffsetSeq.fill(None, offsets: _*)
+
+  /**
+   * Returns a [[OffsetSeq]] with metadata and a variable sequence of offsets.
+   * `nulls` in the sequence are converted to `None`s.
+   */
+  def fill(metadata: Option[String], offsets: Offset*): OffsetSeq = {
+    OffsetSeq(offsets.map(Option(_)), metadata)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala
index d1c9d95be9fdb..cc25b4474ba2c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala
@@ -33,12 +33,13 @@ import org.apache.spark.sql.SparkSession
  * by a newline character. If a source offset is missing, then
  * that line will contain a string value defined in the
  * SERIALIZED_VOID_OFFSET variable in [[OffsetSeqLog]] companion object.
- * For instance, when dealine wiht [[LongOffset]] types:
- *   v1   // version 1
- *   {0}  // LongOffset 0
- *   {3}  // LongOffset 3
- *   -    // No offset for this source i.e., an invalid JSON string
- *   {2}  // LongOffset 2
+ * For instance, when dealing with [[LongOffset]] types:
+ *   v1        // version 1
+ *   metadata
+ *   {0}       // LongOffset 0
+ *   {3}       // LongOffset 3
+ *   -         // No offset for this source i.e., an invalid JSON string
+ *   {2}       // LongOffset 2
  *   ...
  */
 class OffsetSeqLog(sparkSession: SparkSession, path: String)
@@ -58,13 +59,25 @@ class OffsetSeqLog(sparkSession: SparkSession, path: String)
     if (version != OffsetSeqLog.VERSION) {
       throw new IllegalStateException(s"Unknown log version: ${version}")
     }
-    OffsetSeq.fill(lines.map(parseOffset).toArray: _*)
+
+    // read metadata
+    val metadata = lines.next().trim match {
+      case "" => None
+      case md => Some(md)
+    }
+    OffsetSeq.fill(metadata, lines.map(parseOffset).toArray: _*)
   }
 
-  override protected def serialize(metadata: OffsetSeq, out: OutputStream): Unit = {
+  override protected def serialize(offsetSeq: OffsetSeq, out: OutputStream): Unit = {
     // called inside a try-finally where the underlying stream is closed in the caller
     out.write(OffsetSeqLog.VERSION.getBytes(UTF_8))
-    metadata.offsets.map(_.map(_.json)).foreach { offset =>
+
+    // write metadata
+    out.write('\n')
+    out.write(offsetSeq.metadata.getOrElse("").getBytes(UTF_8))
+
+    // write offsets, one per line
+    offsetSeq.offsets.map(_.map(_.json)).foreach { offset =>
       out.write('\n')
       offset match {
         case Some(json: String) => out.write(json.getBytes(UTF_8))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala
new file mode 100644
index 0000000000000..2cd2157b293cb
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming
+
+import org.apache.spark.SparkFunSuite
+
+class CompactibleFileStreamLogSuite extends SparkFunSuite {
+
+  import CompactibleFileStreamLog._
+
+  test("deriveCompactInterval") {
+    // latestCompactBatchId(4) + 1 <= default(5)
+    // then use latestestCompactBatchId + 1 === 5
+    assert(5 === deriveCompactInterval(5, 4))
+    // First divisor of 10 greater than 4 === 5
+    assert(5 === deriveCompactInterval(4, 9))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index b365af76c3795..a099153d2e58e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.streaming
 
 import java.io.File
 
+import scala.collection.mutable
+
 import org.scalatest.PrivateMethodTester
 import org.scalatest.time.SpanSugar._
 
@@ -896,32 +898,38 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
     }
   }
 
-  test("compacat metadata log") {
+  test("compact interval metadata log") {
     val _sources = PrivateMethod[Seq[Source]]('sources)
     val _metadataLog = PrivateMethod[FileStreamSourceLog]('metadataLog)
 
-    def verify(execution: StreamExecution)
-      (batchId: Long, expectedBatches: Int): Boolean = {
+    def verify(
+        execution: StreamExecution,
+        batchId: Long,
+        expectedBatches: Int,
+        expectedCompactInterval: Int): Boolean = {
       import CompactibleFileStreamLog._
 
       val fileSource = (execution invokePrivate _sources()).head.asInstanceOf[FileStreamSource]
       val metadataLog = fileSource invokePrivate _metadataLog()
 
-      if (isCompactionBatch(batchId, 2)) {
+      if (isCompactionBatch(batchId, expectedCompactInterval)) {
         val path = metadataLog.batchIdToPath(batchId)
 
         // Assert path name should be ended with compact suffix.
-        assert(path.getName.endsWith(COMPACT_FILE_SUFFIX))
+        assert(path.getName.endsWith(COMPACT_FILE_SUFFIX),
+          "path does not end with compact file suffix")
 
         // Compacted batch should include all entries from start.
         val entries = metadataLog.get(batchId)
-        assert(entries.isDefined)
-        assert(entries.get.length === metadataLog.allFiles().length)
-        assert(metadataLog.get(None, Some(batchId)).flatMap(_._2).length === entries.get.length)
+        assert(entries.isDefined, "Entries not defined")
+        assert(entries.get.length === metadataLog.allFiles().length, "clean up check")
+        assert(metadataLog.get(None, Some(batchId)).flatMap(_._2).length ===
+          entries.get.length, "Length check")
       }
 
       assert(metadataLog.allFiles().sortBy(_.batchId) ===
-        metadataLog.get(None, Some(batchId)).flatMap(_._2).sortBy(_.batchId))
+        metadataLog.get(None, Some(batchId)).flatMap(_._2).sortBy(_.batchId),
+        "Batch id mismatch")
 
       metadataLog.get(None, Some(batchId)).flatMap(_._2).length === expectedBatches
     }
@@ -932,26 +940,27 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
       ) {
         val fileStream = createFileStream("text", src.getCanonicalPath)
         val filtered = fileStream.filter($"value" contains "keep")
+        val updateConf = Map(SQLConf.FILE_SOURCE_LOG_COMPACT_INTERVAL.key -> "5")
 
         testStream(filtered)(
           AddTextFileData("drop1\nkeep2\nkeep3", src, tmp),
           CheckAnswer("keep2", "keep3"),
-          AssertOnQuery(verify(_)(0L, 1)),
+          AssertOnQuery(verify(_, 0L, 1, 2)),
           AddTextFileData("drop4\nkeep5\nkeep6", src, tmp),
           CheckAnswer("keep2", "keep3", "keep5", "keep6"),
-          AssertOnQuery(verify(_)(1L, 2)),
+          AssertOnQuery(verify(_, 1L, 2, 2)),
           AddTextFileData("drop7\nkeep8\nkeep9", src, tmp),
           CheckAnswer("keep2", "keep3", "keep5", "keep6", "keep8", "keep9"),
-          AssertOnQuery(verify(_)(2L, 3)),
+          AssertOnQuery(verify(_, 2L, 3, 2)),
           StopStream,
-          StartStream(),
-          AssertOnQuery(verify(_)(2L, 3)),
+          StartStream(additionalConfs = updateConf),
+          AssertOnQuery(verify(_, 2L, 3, 2)),
           AddTextFileData("drop10\nkeep11", src, tmp),
           CheckAnswer("keep2", "keep3", "keep5", "keep6", "keep8", "keep9", "keep11"),
-          AssertOnQuery(verify(_)(3L, 4)),
+          AssertOnQuery(verify(_, 3L, 4, 2)),
           AddTextFileData("drop12\nkeep13", src, tmp),
           CheckAnswer("keep2", "keep3", "keep5", "keep6", "keep8", "keep9", "keep11", "keep13"),
-          AssertOnQuery(verify(_)(4L, 5))
+          AssertOnQuery(verify(_, 4L, 5, 2))
         )
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index 742833065144d..a6b2d4b9ab4c8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -161,7 +161,8 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
   /** Starts the stream, resuming if data has already been processed. It must not be running. */
   case class StartStream(
       trigger: Trigger = ProcessingTime(0),
-      triggerClock: Clock = new SystemClock)
+      triggerClock: Clock = new SystemClock,
+      additionalConfs: Map[String, String] = Map.empty)
     extends StreamAction
 
   /** Advance the trigger clock's time manually. */
@@ -240,6 +241,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
     var lastStream: StreamExecution = null
     val awaiting = new mutable.HashMap[Int, Offset]() // source index -> offset to wait for
     val sink = new MemorySink(stream.schema, outputMode)
+    val resetConfValues = mutable.Map[String, Option[String]]()
 
     @volatile
     var streamDeathCause: Throwable = null
@@ -330,7 +332,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
       startedTest.foreach { action =>
         logInfo(s"Processing test stream action: $action")
         action match {
-          case StartStream(trigger, triggerClock) =>
+          case StartStream(trigger, triggerClock, additionalConfs) =>
             verify(currentStream == null, "stream already running")
             verify(triggerClock.isInstanceOf[SystemClock]
               || triggerClock.isInstanceOf[StreamManualClock],
@@ -338,6 +340,14 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
             if (triggerClock.isInstanceOf[StreamManualClock]) {
               manualClockExpectedTime = triggerClock.asInstanceOf[StreamManualClock].getTimeMillis()
             }
+
+            additionalConfs.foreach(pair => {
+              val value =
+                if (spark.conf.contains(pair._1)) Some(spark.conf.get(pair._1)) else None
+              resetConfValues(pair._1) = value
+              spark.conf.set(pair._1, pair._2)
+            })
+
             lastStream = currentStream
             currentStream =
               spark
@@ -519,6 +529,12 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
         currentStream.stop()
       }
       spark.streams.removeListener(statusCollector)
+
+      // Rollback prev configuration values
+      resetConfValues.foreach {
+        case (key, Some(value)) => spark.conf.set(key, value)
+        case (key, None) => spark.conf.unset(key)
+      }
     }
   }
 

From ec622eb7e1ffd0775c9ca4683d1032ca8d41654a Mon Sep 17 00:00:00 2001
From: Andrew Ray <ray.andrew@gmail.com>
Date: Fri, 18 Nov 2016 11:19:49 -0800
Subject: [PATCH 0145/1204] [SPARK-18457][SQL] ORC and other columnar formats
 using HiveShim read all columns when doing a simple count

## What changes were proposed in this pull request?

When reading zero columns (e.g., count(*)) from ORC or any other format that uses HiveShim, actually set the read column list to empty for Hive to use.

## How was this patch tested?

Query correctness is handled by existing unit tests. I'm happy to add more if anyone can point out some case that is not covered.

Reduction in data read can be verified in the UI when built with a recent version of Hadoop say:
```
build/mvn -Pyarn -Phadoop-2.7 -Dhadoop.version=2.7.0 -Phive -DskipTests clean package
```
However the default Hadoop 2.2 that is used for unit tests does not report actual bytes read and instead just full file sizes (see FileScanRDD.scala line 80). Therefore I don't think there is a good way to add a unit test for this.

I tested with the following setup using above build options
```
case class OrcData(intField: Long, stringField: String)
spark.range(1,1000000).map(i => OrcData(i, s"part-$i")).toDF().write.format("orc").save("orc_test")

sql(
      s"""CREATE EXTERNAL TABLE orc_test(
         |  intField LONG,
         |  stringField STRING
         |)
         |STORED AS ORC
         |LOCATION '${System.getProperty("user.dir") + "/orc_test"}'
       """.stripMargin)
```

## Results

query | Spark 2.0.2 | this PR
---|---|---
`sql("select count(*) from orc_test").collect`|4.4 MB|199.4 KB
`sql("select intField from orc_test").collect`|743.4 KB|743.4 KB
`sql("select * from orc_test").collect`|4.4 MB|4.4 MB

Author: Andrew Ray <ray.andrew@gmail.com>

Closes #15898 from aray/sql-orc-no-col.

(cherry picked from commit 795e9fc9213cb9941ae131aadcafddb94bde5f74)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../org/apache/spark/sql/hive/HiveShim.scala  |  6 ++---
 .../spark/sql/hive/orc/OrcQuerySuite.scala    | 25 ++++++++++++++++++-
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
index 0d2a765a388aa..9e9894803ce25 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
@@ -69,13 +69,13 @@ private[hive] object HiveShim {
   }
 
   /*
-   * Cannot use ColumnProjectionUtils.appendReadColumns directly, if ids is null or empty
+   * Cannot use ColumnProjectionUtils.appendReadColumns directly, if ids is null
    */
   def appendReadColumns(conf: Configuration, ids: Seq[Integer], names: Seq[String]) {
-    if (ids != null && ids.nonEmpty) {
+    if (ids != null) {
       ColumnProjectionUtils.appendReadColumns(conf, ids.asJava)
     }
-    if (names != null && names.nonEmpty) {
+    if (names != null) {
       appendReadColumnNames(conf, names)
     }
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
index ecb5972984523..a628977af2f4e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
@@ -20,11 +20,13 @@ package org.apache.spark.sql.hive.orc
 import java.nio.charset.StandardCharsets
 import java.sql.Timestamp
 
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.hive.ql.io.orc.{OrcStruct, SparkOrcNewRecordReader}
 import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.execution.datasources.LogicalRelation
+import org.apache.spark.sql.execution.datasources.{LogicalRelation, RecordReaderIterator}
 import org.apache.spark.sql.hive.{HiveUtils, MetastoreRelation}
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.hive.test.TestHive.implicits._
@@ -577,4 +579,25 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
       assert(spark.table(tableName).schema == schema.copy(fields = expectedFields))
     }
   }
+
+  test("Empty schema does not read data from ORC file") {
+    val data = Seq((1, 1), (2, 2))
+    withOrcFile(data) { path =>
+      val requestedSchema = StructType(Nil)
+      val conf = new Configuration()
+      val physicalSchema = OrcFileOperator.readSchema(Seq(path), Some(conf)).get
+      OrcRelation.setRequiredColumns(conf, physicalSchema, requestedSchema)
+      val maybeOrcReader = OrcFileOperator.getFileReader(path, Some(conf))
+      assert(maybeOrcReader.isDefined)
+      val orcRecordReader = new SparkOrcNewRecordReader(
+        maybeOrcReader.get, conf, 0, maybeOrcReader.get.getContentLength)
+
+      val recordsIterator = new RecordReaderIterator[OrcStruct](orcRecordReader)
+      try {
+        assert(recordsIterator.next().toString == "{null, null}")
+      } finally {
+        recordsIterator.close()
+      }
+    }
+  }
 }

From 6717981e4d76f0794a75c60586de4677c49659ad Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Fri, 18 Nov 2016 21:45:18 +0000
Subject: [PATCH 0146/1204] [SPARK-18422][CORE] Fix wholeTextFiles test to pass
 on Windows in JavaAPISuite

## What changes were proposed in this pull request?

This PR fixes the test `wholeTextFiles` in `JavaAPISuite.java`. This is failed due to the different path format on Windows.

For example, the path in `container` was

```
C:\projects\spark\target\tmp\1478967560189-0/part-00000
```

whereas `new URI(res._1()).getPath()` was as below:

```
/C:/projects/spark/target/tmp/1478967560189-0/part-00000
```

## How was this patch tested?

Tests in `JavaAPISuite.java`.

Tested via AppVeyor.

**Before**
Build: https://ci.appveyor.com/project/spark-test/spark/build/63-JavaAPISuite-1
Diff: https://github.com/apache/spark/compare/master...spark-test:JavaAPISuite-1

```
[info] Test org.apache.spark.JavaAPISuite.wholeTextFiles started
[error] Test org.apache.spark.JavaAPISuite.wholeTextFiles failed: java.lang.AssertionError: expected:<spark is easy to use.
[error] > but was:<null>, took 0.578 sec
[error]     at org.apache.spark.JavaAPISuite.wholeTextFiles(JavaAPISuite.java:1089)
...
```

**After**
Build started: [CORE] `org.apache.spark.JavaAPISuite` [![PR-15866](https://ci.appveyor.com/api/projects/status/github/spark-test/spark?branch=198DDA52-F201-4D2B-BE2F-244E0C1725B2&svg=true)](https://ci.appveyor.com/project/spark-test/spark/branch/198DDA52-F201-4D2B-BE2F-244E0C1725B2)
Diff: https://github.com/apache/spark/compare/master...spark-test:198DDA52-F201-4D2B-BE2F-244E0C1725B2

```
[info] Test org.apache.spark.JavaAPISuite.wholeTextFiles started
...
```

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #15866 from HyukjinKwon/SPARK-18422.

(cherry picked from commit 40d59ff5eaac6df237fe3d50186695c3806b268c)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../java/org/apache/spark/JavaAPISuite.java     | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/core/src/test/java/org/apache/spark/JavaAPISuite.java b/core/src/test/java/org/apache/spark/JavaAPISuite.java
index 533025ba83e72..7bebe0612f9a8 100644
--- a/core/src/test/java/org/apache/spark/JavaAPISuite.java
+++ b/core/src/test/java/org/apache/spark/JavaAPISuite.java
@@ -20,7 +20,6 @@
 import java.io.*;
 import java.nio.channels.FileChannel;
 import java.nio.ByteBuffer;
-import java.net.URI;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -46,6 +45,7 @@
 import com.google.common.collect.Lists;
 import com.google.common.base.Throwables;
 import com.google.common.io.Files;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.compress.DefaultCodec;
@@ -1075,18 +1075,23 @@ public void wholeTextFiles() throws Exception {
     byte[] content2 = "spark is also easy to use.\n".getBytes(StandardCharsets.UTF_8);
 
     String tempDirName = tempDir.getAbsolutePath();
-    Files.write(content1, new File(tempDirName + "/part-00000"));
-    Files.write(content2, new File(tempDirName + "/part-00001"));
+    String path1 = new Path(tempDirName, "part-00000").toUri().getPath();
+    String path2 = new Path(tempDirName, "part-00001").toUri().getPath();
+
+    Files.write(content1, new File(path1));
+    Files.write(content2, new File(path2));
 
     Map<String, String> container = new HashMap<>();
-    container.put(tempDirName+"/part-00000", new Text(content1).toString());
-    container.put(tempDirName+"/part-00001", new Text(content2).toString());
+    container.put(path1, new Text(content1).toString());
+    container.put(path2, new Text(content2).toString());
 
     JavaPairRDD<String, String> readRDD = sc.wholeTextFiles(tempDirName, 3);
     List<Tuple2<String, String>> result = readRDD.collect();
 
     for (Tuple2<String, String> res : result) {
-      assertEquals(res._2(), container.get(new URI(res._1()).getPath()));
+      // Note that the paths from `wholeTextFiles` are in URI format on Windows,
+      // for example, file:/C:/a/b/c.
+      assertEquals(res._2(), container.get(new Path(res._1()).toUri().getPath()));
     }
   }
 

From 136f687c6282c328c2ae121fc3d45207550d184b Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Fri, 18 Nov 2016 16:13:02 -0800
Subject: [PATCH 0147/1204] [SPARK-18477][SS] Enable interrupts for HDFS in
 HDFSMetadataLog

## What changes were proposed in this pull request?

HDFS `write` may just hang until timeout if some network error happens. It's better to enable interrupts to allow stopping the query fast on HDFS.

This PR just changes the logic to only disable interrupts for local file system, as HADOOP-10622 only happens for local file system.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #15911 from zsxwing/interrupt-on-dfs.

(cherry picked from commit e5f5c29e021d504284fe5ad1a77dcd5a992ac10a)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../execution/streaming/HDFSMetadataLog.scala | 56 ++++++++++++++-----
 1 file changed, 41 insertions(+), 15 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
index 080729b2ca8d6..d95ec7f67feb3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
@@ -105,25 +105,34 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
   /**
    * Store the metadata for the specified batchId and return `true` if successful. If the batchId's
    * metadata has already been stored, this method will return `false`.
-   *
-   * Note that this method must be called on a [[org.apache.spark.util.UninterruptibleThread]]
-   * so that interrupts can be disabled while writing the batch file. This is because there is a
-   * potential dead-lock in Hadoop "Shell.runCommand" before 2.5.0 (HADOOP-10622). If the thread
-   * running "Shell.runCommand" is interrupted, then the thread can get deadlocked. In our
-   * case, `writeBatch` creates a file using HDFS API and calls "Shell.runCommand" to set the
-   * file permissions, and can get deadlocked if the stream execution thread is stopped by
-   * interrupt. Hence, we make sure that this method is called on [[UninterruptibleThread]] which
-   * allows us to disable interrupts here. Also see SPARK-14131.
    */
   override def add(batchId: Long, metadata: T): Boolean = {
     get(batchId).map(_ => false).getOrElse {
       // Only write metadata when the batch has not yet been written
-      Thread.currentThread match {
-        case ut: UninterruptibleThread =>
-          ut.runUninterruptibly { writeBatch(batchId, metadata, serialize) }
-        case _ =>
-          throw new IllegalStateException(
-            "HDFSMetadataLog.add() must be executed on a o.a.spark.util.UninterruptibleThread")
+      if (fileManager.isLocalFileSystem) {
+        Thread.currentThread match {
+          case ut: UninterruptibleThread =>
+            // When using a local file system, "writeBatch" must be called on a
+            // [[org.apache.spark.util.UninterruptibleThread]] so that interrupts can be disabled
+            // while writing the batch file. This is because there is a potential dead-lock in
+            // Hadoop "Shell.runCommand" before 2.5.0 (HADOOP-10622). If the thread running
+            // "Shell.runCommand" is interrupted, then the thread can get deadlocked. In our case,
+            // `writeBatch` creates a file using HDFS API and will call "Shell.runCommand" to set
+            // the file permission if using the local file system, and can get deadlocked if the
+            // stream execution thread is stopped by interrupt. Hence, we make sure that
+            // "writeBatch" is called on [[UninterruptibleThread]] which allows us to disable
+            // interrupts here. Also see SPARK-14131.
+            ut.runUninterruptibly { writeBatch(batchId, metadata, serialize) }
+          case _ =>
+            throw new IllegalStateException(
+              "HDFSMetadataLog.add() on a local file system must be executed on " +
+                "a o.a.spark.util.UninterruptibleThread")
+        }
+      } else {
+        // For a distributed file system, such as HDFS or S3, if the network is broken, write
+        // operations may just hang until timeout. We should enable interrupts to allow stopping
+        // the query fast.
+        writeBatch(batchId, metadata, serialize)
       }
       true
     }
@@ -298,6 +307,9 @@ object HDFSMetadataLog {
 
     /** Recursively delete a path if it exists. Should not throw exception if file doesn't exist. */
     def delete(path: Path): Unit
+
+    /** Whether the file systme is a local FS. */
+    def isLocalFileSystem: Boolean
   }
 
   /**
@@ -342,6 +354,13 @@ object HDFSMetadataLog {
         // ignore if file has already been deleted
       }
     }
+
+    override def isLocalFileSystem: Boolean = fc.getDefaultFileSystem match {
+      case _: local.LocalFs | _: local.RawLocalFs =>
+        // LocalFs = RawLocalFs + ChecksumFs
+        true
+      case _ => false
+    }
   }
 
   /**
@@ -398,5 +417,12 @@ object HDFSMetadataLog {
           // ignore if file has already been deleted
       }
     }
+
+    override def isLocalFileSystem: Boolean = fs match {
+      case _: LocalFileSystem | _: RawLocalFileSystem =>
+        // LocalFileSystem = RawLocalFileSystem + ChecksumFileSystem
+        true
+      case _ => false
+    }
   }
 }

From 4b1df0e89badd9bb175673aefc96d3f9358e976d Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Fri, 18 Nov 2016 16:34:11 -0800
Subject: [PATCH 0148/1204] [SPARK-18505][SQL] Simplify AnalyzeColumnCommand

## What changes were proposed in this pull request?
I'm spending more time at the design & code level for cost-based optimizer now, and have found a number of issues related to maintainability and compatibility that I will like to address.

This is a small pull request to clean up AnalyzeColumnCommand:

1. Removed warning on duplicated columns. Warnings in log messages are useless since most users that run SQL don't see them.
2. Removed the nested updateStats function, by just inlining the function.
3. Renamed a few functions to better reflect what they do.
4. Removed the factory apply method for ColumnStatStruct. It is a bad pattern to use a apply method that returns an instantiation of a class that is not of the same type (ColumnStatStruct.apply used to return CreateNamedStruct).
5. Renamed ColumnStatStruct to just AnalyzeColumnCommand.
6. Added more documentation explaining some of the non-obvious return types and code blocks.

In follow-up pull requests, I'd like to address the following:

1. Get rid of the Map[String, ColumnStat] map, since internally we should be using Attribute to reference columns, rather than strings.
2. Decouple the fields exposed by ColumnStat and internals of Spark SQL's execution path. Currently the two are coupled because ColumnStat takes in an InternalRow.
3. Correctness: Remove code path that stores statistics in the catalog using the base64 encoding of the UnsafeRow format, which is not stable across Spark versions.
4. Clearly document the data representation stored in the catalog for statistics.

## How was this patch tested?
Affected test cases have been updated.

Author: Reynold Xin <rxin@databricks.com>

Closes #15933 from rxin/SPARK-18505.

(cherry picked from commit 6f7ff75091154fed7649ea6d79e887aad9fbde6a)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../command/AnalyzeColumnCommand.scala        | 115 ++++++++++--------
 .../spark/sql/StatisticsColumnSuite.scala     |   2 +-
 .../org/apache/spark/sql/StatisticsTest.scala |   7 +-
 .../spark/sql/hive/HiveExternalCatalog.scala  |   4 +-
 .../sql/hive/client/HiveClientImpl.scala      |   2 +-
 5 files changed, 74 insertions(+), 56 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
index 6141fab4aff0d..7fc57d09e9243 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
@@ -17,8 +17,7 @@
 
 package org.apache.spark.sql.execution.command
 
-import scala.collection.mutable
-
+import org.apache.spark.internal.Logging
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
@@ -44,13 +43,16 @@ case class AnalyzeColumnCommand(
     val tableIdentWithDB = TableIdentifier(tableIdent.table, Some(db))
     val relation = EliminateSubqueryAliases(sessionState.catalog.lookupRelation(tableIdentWithDB))
 
-    relation match {
+    // Compute total size
+    val (catalogTable: CatalogTable, sizeInBytes: Long) = relation match {
       case catalogRel: CatalogRelation =>
-        updateStats(catalogRel.catalogTable,
+        // This is a Hive serde format table
+        (catalogRel.catalogTable,
           AnalyzeTableCommand.calculateTotalSize(sessionState, catalogRel.catalogTable))
 
       case logicalRel: LogicalRelation if logicalRel.catalogTable.isDefined =>
-        updateStats(logicalRel.catalogTable.get,
+        // This is a data source format table
+        (logicalRel.catalogTable.get,
           AnalyzeTableCommand.calculateTotalSize(sessionState, logicalRel.catalogTable.get))
 
       case otherRelation =>
@@ -58,45 +60,45 @@ case class AnalyzeColumnCommand(
           s"${otherRelation.nodeName}.")
     }
 
-    def updateStats(catalogTable: CatalogTable, newTotalSize: Long): Unit = {
-      val (rowCount, columnStats) = computeColStats(sparkSession, relation)
-      // We also update table-level stats in order to keep them consistent with column-level stats.
-      val statistics = Statistics(
-        sizeInBytes = newTotalSize,
-        rowCount = Some(rowCount),
-        // Newly computed column stats should override the existing ones.
-        colStats = catalogTable.stats.map(_.colStats).getOrElse(Map()) ++ columnStats)
-      sessionState.catalog.alterTable(catalogTable.copy(stats = Some(statistics)))
-      // Refresh the cached data source table in the catalog.
-      sessionState.catalog.refreshTable(tableIdentWithDB)
-    }
+    // Compute stats for each column
+    val (rowCount, newColStats) =
+      AnalyzeColumnCommand.computeColStats(sparkSession, relation, columnNames)
+
+    // We also update table-level stats in order to keep them consistent with column-level stats.
+    val statistics = Statistics(
+      sizeInBytes = sizeInBytes,
+      rowCount = Some(rowCount),
+      // Newly computed column stats should override the existing ones.
+      colStats = catalogTable.stats.map(_.colStats).getOrElse(Map.empty) ++ newColStats)
+
+    sessionState.catalog.alterTable(catalogTable.copy(stats = Some(statistics)))
+
+    // Refresh the cached data source table in the catalog.
+    sessionState.catalog.refreshTable(tableIdentWithDB)
 
     Seq.empty[Row]
   }
+}
 
+object AnalyzeColumnCommand extends Logging {
+
+  /**
+   * Compute stats for the given columns.
+   * @return (row count, map from column name to ColumnStats)
+   *
+   * This is visible for testing.
+   */
   def computeColStats(
       sparkSession: SparkSession,
-      relation: LogicalPlan): (Long, Map[String, ColumnStat]) = {
+      relation: LogicalPlan,
+      columnNames: Seq[String]): (Long, Map[String, ColumnStat]) = {
 
-    // check correctness of column names
-    val attributesToAnalyze = mutable.MutableList[Attribute]()
-    val duplicatedColumns = mutable.MutableList[String]()
+    // Resolve the column names and dedup using AttributeSet
     val resolver = sparkSession.sessionState.conf.resolver
-    columnNames.foreach { col =>
+    val attributesToAnalyze = AttributeSet(columnNames.map { col =>
       val exprOption = relation.output.find(attr => resolver(attr.name, col))
-      val expr = exprOption.getOrElse(throw new AnalysisException(s"Invalid column name: $col."))
-      // do deduplication
-      if (!attributesToAnalyze.contains(expr)) {
-        attributesToAnalyze += expr
-      } else {
-        duplicatedColumns += col
-      }
-    }
-    if (duplicatedColumns.nonEmpty) {
-      logWarning("Duplicate column names were deduplicated in `ANALYZE TABLE` statement. " +
-        s"Input columns: ${columnNames.mkString("(", ", ", ")")}. " +
-        s"Duplicate columns: ${duplicatedColumns.mkString("(", ", ", ")")}.")
-    }
+      exprOption.getOrElse(throw new AnalysisException(s"Invalid column name: $col."))
+    }).toSeq
 
     // Collect statistics per column.
     // The first element in the result will be the overall row count, the following elements
@@ -104,22 +106,21 @@ case class AnalyzeColumnCommand(
     // The layout of each struct follows the layout of the ColumnStats.
     val ndvMaxErr = sparkSession.sessionState.conf.ndvMaxError
     val expressions = Count(Literal(1)).toAggregateExpression() +:
-      attributesToAnalyze.map(ColumnStatStruct(_, ndvMaxErr))
+      attributesToAnalyze.map(AnalyzeColumnCommand.createColumnStatStruct(_, ndvMaxErr))
     val namedExpressions = expressions.map(e => Alias(e, e.toString)())
     val statsRow = Dataset.ofRows(sparkSession, Aggregate(Nil, namedExpressions, relation))
       .queryExecution.toRdd.collect().head
 
     // unwrap the result
+    // TODO: Get rid of numFields by using the public Dataset API.
     val rowCount = statsRow.getLong(0)
     val columnStats = attributesToAnalyze.zipWithIndex.map { case (expr, i) =>
-      val numFields = ColumnStatStruct.numStatFields(expr.dataType)
+      val numFields = AnalyzeColumnCommand.numStatFields(expr.dataType)
       (expr.name, ColumnStat(statsRow.getStruct(i + 1, numFields)))
     }.toMap
     (rowCount, columnStats)
   }
-}
 
-object ColumnStatStruct {
   private val zero = Literal(0, LongType)
   private val one = Literal(1, LongType)
 
@@ -137,7 +138,11 @@ object ColumnStatStruct {
   private def numTrues(e: Expression): Expression = Sum(If(e, one, zero))
   private def numFalses(e: Expression): Expression = Sum(If(Not(e), one, zero))
 
-  private def getStruct(exprs: Seq[Expression]): CreateNamedStruct = {
+  /**
+   * Creates a struct that groups the sequence of expressions together. This is used to create
+   * one top level struct per column.
+   */
+  private def createStruct(exprs: Seq[Expression]): CreateNamedStruct = {
     CreateStruct(exprs.map { expr: Expression =>
       expr.transformUp {
         case af: AggregateFunction => af.toAggregateExpression()
@@ -161,6 +166,7 @@ object ColumnStatStruct {
     Seq(numNulls(e), numTrues(e), numFalses(e))
   }
 
+  // TODO(rxin): Get rid of this function.
   def numStatFields(dataType: DataType): Int = {
     dataType match {
       case BinaryType | BooleanType => 3
@@ -168,14 +174,25 @@ object ColumnStatStruct {
     }
   }
 
-  def apply(attr: Attribute, relativeSD: Double): CreateNamedStruct = attr.dataType match {
-    // Use aggregate functions to compute statistics we need.
-    case _: NumericType | TimestampType | DateType => getStruct(numericColumnStat(attr, relativeSD))
-    case StringType => getStruct(stringColumnStat(attr, relativeSD))
-    case BinaryType => getStruct(binaryColumnStat(attr))
-    case BooleanType => getStruct(booleanColumnStat(attr))
-    case otherType =>
-      throw new AnalysisException("Analyzing columns is not supported for column " +
-        s"${attr.name} of data type: ${attr.dataType}.")
+  /**
+   * Creates a struct expression that contains the statistics to collect for a column.
+   *
+   * @param attr column to collect statistics
+   * @param relativeSD relative error for approximate number of distinct values.
+   */
+  def createColumnStatStruct(attr: Attribute, relativeSD: Double): CreateNamedStruct = {
+    attr.dataType match {
+      case _: NumericType | TimestampType | DateType =>
+        createStruct(numericColumnStat(attr, relativeSD))
+      case StringType =>
+        createStruct(stringColumnStat(attr, relativeSD))
+      case BinaryType =>
+        createStruct(binaryColumnStat(attr))
+      case BooleanType =>
+        createStruct(booleanColumnStat(attr))
+      case otherType =>
+        throw new AnalysisException("Analyzing columns is not supported for column " +
+            s"${attr.name} of data type: ${attr.dataType}.")
+    }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsColumnSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsColumnSuite.scala
index f1a201abd8da6..e866ac2cb3b34 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsColumnSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsColumnSuite.scala
@@ -79,7 +79,7 @@ class StatisticsColumnSuite extends StatisticsTest {
         val tableIdent = TableIdentifier(table, Some("default"))
         val relation = spark.sessionState.catalog.lookupRelation(tableIdent)
         val (_, columnStats) =
-          AnalyzeColumnCommand(tableIdent, columnsToAnalyze).computeColStats(spark, relation)
+          AnalyzeColumnCommand.computeColStats(spark, relation, columnsToAnalyze)
         assert(columnStats.contains(colName1))
         assert(columnStats.contains(colName2))
         // check deduplication
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsTest.scala
index 5134ac0e7e5b3..915ee0d31bca2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsTest.scala
@@ -19,11 +19,12 @@ package org.apache.spark.sql
 
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, Statistics}
-import org.apache.spark.sql.execution.command.{AnalyzeColumnCommand, ColumnStatStruct}
+import org.apache.spark.sql.execution.command.AnalyzeColumnCommand
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
 
+
 trait StatisticsTest extends QueryTest with SharedSQLContext {
 
   def checkColStats(
@@ -36,7 +37,7 @@ trait StatisticsTest extends QueryTest with SharedSQLContext {
       val tableIdent = TableIdentifier(table, Some("default"))
       val relation = spark.sessionState.catalog.lookupRelation(tableIdent)
       val (_, columnStats) =
-        AnalyzeColumnCommand(tableIdent, columns.map(_.name)).computeColStats(spark, relation)
+        AnalyzeColumnCommand.computeColStats(spark, relation, columns.map(_.name))
       expectedColStatsSeq.foreach { case (field, expectedColStat) =>
         assert(columnStats.contains(field.name))
         val colStat = columnStats(field.name)
@@ -48,7 +49,7 @@ trait StatisticsTest extends QueryTest with SharedSQLContext {
 
         // check if we get the same colStat after encoding and decoding
         val encodedCS = colStat.toString
-        val numFields = ColumnStatStruct.numStatFields(field.dataType)
+        val numFields = AnalyzeColumnCommand.numStatFields(field.dataType)
         val decodedCS = ColumnStat(numFields, encodedCS)
         StatisticsTest.checkColStat(
           dataType = field.dataType,
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index cacffcf33c263..5dbb4024bbee0 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, Statistics}
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
-import org.apache.spark.sql.execution.command.{ColumnStatStruct, DDLUtils}
+import org.apache.spark.sql.execution.command.{AnalyzeColumnCommand, DDLUtils}
 import org.apache.spark.sql.hive.client.HiveClient
 import org.apache.spark.sql.internal.HiveSerDe
 import org.apache.spark.sql.internal.StaticSQLConf._
@@ -634,7 +634,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
         .map { case (k, v) => (k.drop(STATISTICS_COL_STATS_PREFIX.length), v) }
       val colStats: Map[String, ColumnStat] = tableWithSchema.schema.collect {
         case f if colStatsProps.contains(f.name) =>
-          val numFields = ColumnStatStruct.numStatFields(f.dataType)
+          val numFields = AnalyzeColumnCommand.numStatFields(f.dataType)
           (f.name, ColumnStat(numFields, colStatsProps(f.name)))
       }.toMap
       tableWithSchema.copy(
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 2bf9a26b0b7fc..daae8523c6366 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -97,7 +97,7 @@ private[hive] class HiveClientImpl(
   }
 
   // Create an internal session state for this HiveClientImpl.
-  val state = {
+  val state: SessionState = {
     val original = Thread.currentThread().getContextClassLoader
     // Switch to the initClassLoader.
     Thread.currentThread().setContextClassLoader(initClassLoader)

From b4bad04c5e20b06992100c1d44ece9d3a5b4f817 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Fri, 18 Nov 2016 16:34:38 -0800
Subject: [PATCH 0149/1204] [SPARK-18497][SS] Make ForeachSink support
 watermark

## What changes were proposed in this pull request?

The issue in ForeachSink is the new created DataSet still uses the old QueryExecution. When `foreachPartition` is called, `QueryExecution.toString` will be called and then fail because it doesn't know how to plan EventTimeWatermark.

This PR just replaces the QueryExecution with IncrementalExecution to fix the issue.

## How was this patch tested?

`test("foreach with watermark")`.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #15934 from zsxwing/SPARK-18497.

(cherry picked from commit 2a40de408b5eb47edba92f9fe92a42ed1e78bf98)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../sql/execution/streaming/ForeachSink.scala | 16 ++++-----
 .../streaming/ForeachSinkSuite.scala          | 35 +++++++++++++++++++
 2 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ForeachSink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ForeachSink.scala
index f5c550dd6ac3a..c93fcfb77cc93 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ForeachSink.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ForeachSink.scala
@@ -47,22 +47,22 @@ class ForeachSink[T : Encoder](writer: ForeachWriter[T]) extends Sink with Seria
     // method supporting incremental planning. But in the long run, we should generally make newly
     // created Datasets use `IncrementalExecution` where necessary (which is SPARK-16264 tries to
     // resolve).
-
+    val incrementalExecution = data.queryExecution.asInstanceOf[IncrementalExecution]
     val datasetWithIncrementalExecution =
-      new Dataset(data.sparkSession, data.logicalPlan, implicitly[Encoder[T]]) {
+      new Dataset(data.sparkSession, incrementalExecution, implicitly[Encoder[T]]) {
         override lazy val rdd: RDD[T] = {
           val objectType = exprEnc.deserializer.dataType
           val deserialized = CatalystSerde.deserialize[T](logicalPlan)
 
           // was originally: sparkSession.sessionState.executePlan(deserialized) ...
-          val incrementalExecution = new IncrementalExecution(
+          val newIncrementalExecution = new IncrementalExecution(
             this.sparkSession,
             deserialized,
-            data.queryExecution.asInstanceOf[IncrementalExecution].outputMode,
-            data.queryExecution.asInstanceOf[IncrementalExecution].checkpointLocation,
-            data.queryExecution.asInstanceOf[IncrementalExecution].currentBatchId,
-            data.queryExecution.asInstanceOf[IncrementalExecution].currentEventTimeWatermark)
-          incrementalExecution.toRdd.mapPartitions { rows =>
+            incrementalExecution.outputMode,
+            incrementalExecution.checkpointLocation,
+            incrementalExecution.currentBatchId,
+            incrementalExecution.currentEventTimeWatermark)
+          newIncrementalExecution.toRdd.mapPartitions { rows =>
             rows.map(_.get(0, objectType))
           }.asInstanceOf[RDD[T]]
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/ForeachSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/ForeachSinkSuite.scala
index 9e059216110f2..ee6261036fdd0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/ForeachSinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/ForeachSinkSuite.scala
@@ -25,6 +25,7 @@ import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.ForeachWriter
+import org.apache.spark.sql.functions.{count, window}
 import org.apache.spark.sql.streaming.{OutputMode, StreamingQueryException, StreamTest}
 import org.apache.spark.sql.test.SharedSQLContext
 
@@ -169,6 +170,40 @@ class ForeachSinkSuite extends StreamTest with SharedSQLContext with BeforeAndAf
       assert(errorEvent.error.get.getMessage === "error")
     }
   }
+
+  test("foreach with watermark") {
+    val inputData = MemoryStream[Int]
+
+    val windowedAggregation = inputData.toDF()
+      .withColumn("eventTime", $"value".cast("timestamp"))
+      .withWatermark("eventTime", "10 seconds")
+      .groupBy(window($"eventTime", "5 seconds") as 'window)
+      .agg(count("*") as 'count)
+      .select($"count".as[Long])
+      .map(_.toInt)
+      .repartition(1)
+
+    val query = windowedAggregation
+      .writeStream
+      .outputMode(OutputMode.Complete)
+      .foreach(new TestForeachWriter())
+      .start()
+    try {
+      inputData.addData(10, 11, 12)
+      query.processAllAvailable()
+
+      val allEvents = ForeachSinkSuite.allEvents()
+      assert(allEvents.size === 1)
+      val expectedEvents = Seq(
+        ForeachSinkSuite.Open(partition = 0, version = 0),
+        ForeachSinkSuite.Process(value = 3),
+        ForeachSinkSuite.Close(None)
+      )
+      assert(allEvents === Seq(expectedEvents))
+    } finally {
+      query.stop()
+    }
+  }
 }
 
 /** A global object to collect events in the executor */

From 693401be24bfefe5305038b87888cdeb641d7642 Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Sat, 19 Nov 2016 09:00:11 +0000
Subject: [PATCH 0150/1204] [SPARK-18448][CORE] SparkSession should implement
 java.lang.AutoCloseable like JavaSparkContext

## What changes were proposed in this pull request?

Just adds `close()` + `Closeable` as a synonym for `stop()`. This makes it usable in Java in try-with-resources, as suggested by ash211  (`Closeable` extends `AutoCloseable` BTW)

## How was this patch tested?

Existing tests

Author: Sean Owen <sowen@cloudera.com>

Closes #15932 from srowen/SPARK-18448.

(cherry picked from commit db9fb9baacbf8640dd37a507b7450db727c7e6ea)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../main/scala/org/apache/spark/sql/SparkSession.scala | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 3045eb69f427f..58b2ab3957173 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql
 
 import java.beans.Introspector
+import java.io.Closeable
 import java.util.concurrent.atomic.AtomicReference
 
 import scala.collection.JavaConverters._
@@ -72,7 +73,7 @@ import org.apache.spark.util.Utils
 class SparkSession private(
     @transient val sparkContext: SparkContext,
     @transient private val existingSharedState: Option[SharedState])
-  extends Serializable with Logging { self =>
+  extends Serializable with Closeable with Logging { self =>
 
   private[sql] def this(sc: SparkContext) {
     this(sc, None)
@@ -647,6 +648,13 @@ class SparkSession private(
     sparkContext.stop()
   }
 
+  /**
+   * Synonym for `stop()`.
+   *
+   * @since 2.2.0
+   */
+  override def close(): Unit = stop()
+
   /**
    * Parses the data type in our internal string representation. The data type string should
    * have the same format as the one generated by `toString` in scala.

From 4b396a6545ec0f1e31b0e211228f04bdc5660300 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Sat, 19 Nov 2016 11:24:15 +0000
Subject: [PATCH 0151/1204] [SPARK-18445][BUILD][DOCS] Fix the markdown for
 `Note:`/`NOTE:`/`Note that`/`'''Note:'''` across Scala/Java API documentation

It seems in Scala/Java,

- `Note:`
- `NOTE:`
- `Note that`
- `'''Note:'''`
- `note`

This PR proposes to fix those to `note` to be consistent.

**Before**

- Scala
  ![2016-11-17 6 16 39](https://cloud.githubusercontent.com/assets/6477701/20383180/1a7aed8c-acf2-11e6-9611-5eaf6d52c2e0.png)

- Java
  ![2016-11-17 6 14 41](https://cloud.githubusercontent.com/assets/6477701/20383096/c8ffc680-acf1-11e6-914a-33460bf1401d.png)

**After**

- Scala
  ![2016-11-17 6 16 44](https://cloud.githubusercontent.com/assets/6477701/20383167/09940490-acf2-11e6-937a-0d5e1dc2cadf.png)

- Java
  ![2016-11-17 6 13 39](https://cloud.githubusercontent.com/assets/6477701/20383132/e7c2a57e-acf1-11e6-9c47-b849674d4d88.png)

The notes were found via

```bash
grep -r "NOTE: " . | \ # Note:|NOTE:|Note that|'''Note:'''
grep -v "// NOTE: " | \  # starting with // does not appear in API documentation.
grep -E '.scala|.java' | \ # java/scala files
grep -v Suite | \ # exclude tests
grep -v Test | \ # exclude tests
grep -e 'org.apache.spark.api.java' \ # packages appear in API documenation
-e 'org.apache.spark.api.java.function' \ # note that this is a regular expression. So actual matches were mostly `org/apache/spark/api/java/functions ...`
-e 'org.apache.spark.api.r' \
...
```

```bash
grep -r "Note that " . | \ # Note:|NOTE:|Note that|'''Note:'''
grep -v "// Note that " | \  # starting with // does not appear in API documentation.
grep -E '.scala|.java' | \ # java/scala files
grep -v Suite | \ # exclude tests
grep -v Test | \ # exclude tests
grep -e 'org.apache.spark.api.java' \ # packages appear in API documenation
-e 'org.apache.spark.api.java.function' \
-e 'org.apache.spark.api.r' \
...
```

```bash
grep -r "Note: " . | \ # Note:|NOTE:|Note that|'''Note:'''
grep -v "// Note: " | \  # starting with // does not appear in API documentation.
grep -E '.scala|.java' | \ # java/scala files
grep -v Suite | \ # exclude tests
grep -v Test | \ # exclude tests
grep -e 'org.apache.spark.api.java' \ # packages appear in API documenation
-e 'org.apache.spark.api.java.function' \
-e 'org.apache.spark.api.r' \
...
```

```bash
grep -r "'''Note:'''" . | \ # Note:|NOTE:|Note that|'''Note:'''
grep -v "// '''Note:''' " | \  # starting with // does not appear in API documentation.
grep -E '.scala|.java' | \ # java/scala files
grep -v Suite | \ # exclude tests
grep -v Test | \ # exclude tests
grep -e 'org.apache.spark.api.java' \ # packages appear in API documenation
-e 'org.apache.spark.api.java.function' \
-e 'org.apache.spark.api.r' \
...
```

And then fixed one by one comparing with API documentation/access modifiers.

After that, manually tested via `jekyll build`.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #15889 from HyukjinKwon/SPARK-18437.

(cherry picked from commit d5b1d5fc80153571c308130833d0c0774de62c92)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../org/apache/spark/ContextCleaner.scala     |  2 +-
 .../scala/org/apache/spark/Partitioner.scala  |  2 +-
 .../scala/org/apache/spark/SparkConf.scala    |  6 +-
 .../scala/org/apache/spark/SparkContext.scala | 47 ++++++++-------
 .../apache/spark/api/java/JavaDoubleRDD.scala |  4 +-
 .../apache/spark/api/java/JavaPairRDD.scala   | 26 ++++----
 .../org/apache/spark/api/java/JavaRDD.scala   | 12 ++--
 .../apache/spark/api/java/JavaRDDLike.scala   |  3 +-
 .../spark/api/java/JavaSparkContext.scala     | 21 +++----
 .../api/java/JavaSparkStatusTracker.scala     |  2 +-
 .../apache/spark/io/CompressionCodec.scala    | 23 ++++---
 .../apache/spark/partial/BoundedDouble.scala  |  2 +-
 .../org/apache/spark/rdd/CoGroupedRDD.scala   |  8 +--
 .../apache/spark/rdd/DoubleRDDFunctions.scala |  2 +-
 .../org/apache/spark/rdd/HadoopRDD.scala      |  6 +-
 .../org/apache/spark/rdd/NewHadoopRDD.scala   |  6 +-
 .../apache/spark/rdd/PairRDDFunctions.scala   | 23 +++----
 .../spark/rdd/PartitionPruningRDD.scala       |  2 +-
 .../spark/rdd/PartitionwiseSampledRDD.scala   |  2 +-
 .../main/scala/org/apache/spark/rdd/RDD.scala | 46 +++++++-------
 .../apache/spark/rdd/RDDCheckpointData.scala  |  2 +-
 .../spark/rdd/ReliableCheckpointRDD.scala     |  2 +-
 .../spark/rdd/SequenceFileRDDFunctions.scala  |  5 +-
 .../apache/spark/rdd/ZippedWithIndexRDD.scala |  2 +-
 .../spark/scheduler/AccumulableInfo.scala     | 10 ++--
 .../spark/serializer/JavaSerializer.scala     |  2 +-
 .../spark/serializer/KryoSerializer.scala     |  2 +-
 .../apache/spark/serializer/Serializer.scala  |  2 +-
 .../apache/spark/storage/StorageUtils.scala   | 19 +++---
 .../org/apache/spark/util/AccumulatorV2.scala |  5 +-
 .../spark/scheduler/DAGSchedulerSuite.scala   |  2 +-
 docs/mllib-isotonic-regression.md             |  2 +-
 docs/streaming-programming-guide.md           |  2 +-
 .../spark/sql/kafka010/KafkaSource.scala      |  2 +-
 .../spark/streaming/kafka/KafkaUtils.scala    |  8 +--
 .../streaming/kinesis/KinesisUtils.scala      | 60 +++++++++----------
 .../kinesis/KinesisBackedBlockRDDSuite.scala  |  2 +-
 .../apache/spark/graphx/impl/GraphImpl.scala  |  2 +-
 .../apache/spark/graphx/lib/PageRank.scala    |  2 +-
 .../org/apache/spark/ml/linalg/Vectors.scala  |  2 +-
 .../scala/org/apache/spark/ml/Model.scala     |  2 +-
 .../DecisionTreeClassifier.scala              |  6 +-
 .../ml/classification/GBTClassifier.scala     |  6 +-
 .../classification/LogisticRegression.scala   | 36 +++++------
 .../spark/ml/clustering/GaussianMixture.scala |  6 +-
 .../spark/ml/feature/MinMaxScaler.scala       |  3 +-
 .../spark/ml/feature/OneHotEncoder.scala      |  3 +-
 .../org/apache/spark/ml/feature/PCA.scala     |  5 +-
 .../spark/ml/feature/StopWordsRemover.scala   |  5 +-
 .../spark/ml/feature/StringIndexer.scala      |  6 +-
 .../org/apache/spark/ml/param/params.scala    |  2 +-
 .../ml/regression/DecisionTreeRegressor.scala |  6 +-
 .../GeneralizedLinearRegression.scala         |  4 +-
 .../ml/regression/LinearRegression.scala      | 28 +++++----
 .../ml/source/libsvm/LibSVMDataSource.scala   |  2 +-
 .../ml/tree/impl/GradientBoostedTrees.scala   |  4 +-
 .../org/apache/spark/ml/util/ReadWrite.scala  |  2 +-
 .../classification/LogisticRegression.scala   | 28 +++++----
 .../spark/mllib/classification/SVM.scala      | 20 ++++---
 .../mllib/clustering/GaussianMixture.scala    |  8 +--
 .../spark/mllib/clustering/KMeans.scala       |  8 ++-
 .../apache/spark/mllib/clustering/LDA.scala   |  4 +-
 .../spark/mllib/clustering/LDAModel.scala     |  2 +-
 .../spark/mllib/clustering/LDAOptimizer.scala |  6 +-
 .../mllib/evaluation/AreaUnderCurve.scala     |  2 +-
 .../apache/spark/mllib/linalg/Vectors.scala   |  6 +-
 .../linalg/distributed/BlockMatrix.scala      |  2 +-
 .../linalg/distributed/IndexedRowMatrix.scala |  5 +-
 .../mllib/linalg/distributed/RowMatrix.scala  | 21 ++++---
 .../spark/mllib/optimization/Gradient.scala   |  3 +-
 .../apache/spark/mllib/rdd/RDDFunctions.scala |  2 +-
 .../MatrixFactorizationModel.scala            |  6 +-
 .../apache/spark/mllib/stat/Statistics.scala  | 34 +++++------
 .../spark/mllib/tree/DecisionTree.scala       | 32 +++++-----
 .../apache/spark/mllib/tree/loss/Loss.scala   | 12 ++--
 .../mllib/tree/model/treeEnsembleModels.scala |  4 +-
 pom.xml                                       |  7 +++
 project/SparkBuild.scala                      |  3 +-
 python/pyspark/mllib/stat/KernelDensity.py    |  2 +-
 python/pyspark/mllib/util.py                  |  2 +-
 python/pyspark/rdd.py                         |  4 +-
 python/pyspark/streaming/kafka.py             |  4 +-
 .../scala/org/apache/spark/sql/Encoders.scala |  8 +--
 .../sql/types/CalendarIntervalType.scala      |  4 +-
 .../scala/org/apache/spark/sql/Column.scala   |  2 +-
 .../spark/sql/DataFrameStatFunctions.scala    |  3 +-
 .../apache/spark/sql/DataFrameWriter.scala    |  2 +-
 .../scala/org/apache/spark/sql/Dataset.scala  | 56 ++++++++---------
 .../org/apache/spark/sql/SQLContext.scala     |  7 ++-
 .../org/apache/spark/sql/SparkSession.scala   |  9 +--
 .../apache/spark/sql/UDFRegistration.scala    |  3 +-
 .../execution/streaming/state/package.scala   |  4 +-
 .../sql/expressions/UserDefinedFunction.scala |  8 ++-
 .../org/apache/spark/sql/functions.scala      | 22 +++----
 .../apache/spark/sql/jdbc/JdbcDialects.scala  |  2 +-
 .../apache/spark/sql/sources/interfaces.scala | 10 ++--
 .../sql/util/QueryExecutionListener.scala     |  8 ++-
 .../columnar/InMemoryColumnarQuerySuite.scala |  2 +-
 .../spark/streaming/StreamingContext.scala    | 18 +++---
 .../streaming/api/java/JavaPairDStream.scala  |  2 +-
 .../api/java/JavaStreamingContext.scala       | 40 +++++++------
 .../spark/streaming/dstream/DStream.scala     |  4 +-
 .../dstream/MapWithStateDStream.scala         |  2 +-
 .../WriteAheadLogBackedBlockRDDSuite.scala    |  2 +-
 104 files changed, 516 insertions(+), 435 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ContextCleaner.scala b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
index 5678d790e9e76..af913454fce69 100644
--- a/core/src/main/scala/org/apache/spark/ContextCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
@@ -139,7 +139,7 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
     periodicGCService.shutdown()
   }
 
-  /** Register a RDD for cleanup when it is garbage collected. */
+  /** Register an RDD for cleanup when it is garbage collected. */
   def registerRDDForCleanup(rdd: RDD[_]): Unit = {
     registerForCleanup(rdd, CleanRDD(rdd.id))
   }
diff --git a/core/src/main/scala/org/apache/spark/Partitioner.scala b/core/src/main/scala/org/apache/spark/Partitioner.scala
index 93dfbc0e6ed65..f83f5278e8b8f 100644
--- a/core/src/main/scala/org/apache/spark/Partitioner.scala
+++ b/core/src/main/scala/org/apache/spark/Partitioner.scala
@@ -101,7 +101,7 @@ class HashPartitioner(partitions: Int) extends Partitioner {
  * A [[org.apache.spark.Partitioner]] that partitions sortable records by range into roughly
  * equal ranges. The ranges are determined by sampling the content of the RDD passed in.
  *
- * Note that the actual number of partitions created by the RangePartitioner might not be the same
+ * @note The actual number of partitions created by the RangePartitioner might not be the same
  * as the `partitions` parameter, in the case where the number of sampled records is less than
  * the value of `partitions`.
  */
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index c9c342df82c97..04d657c09afd0 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -42,10 +42,10 @@ import org.apache.spark.util.Utils
  * All setter methods in this class support chaining. For example, you can write
  * `new SparkConf().setMaster("local").setAppName("My app")`.
  *
- * Note that once a SparkConf object is passed to Spark, it is cloned and can no longer be modified
- * by the user. Spark does not support modifying the configuration at runtime.
- *
  * @param loadDefaults whether to also load values from Java system properties
+ *
+ * @note Once a SparkConf object is passed to Spark, it is cloned and can no longer be modified
+ * by the user. Spark does not support modifying the configuration at runtime.
  */
 class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Serializable {
 
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 25a3d609a6b09..1261e3e735761 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -281,7 +281,7 @@ class SparkContext(config: SparkConf) extends Logging {
   /**
    * A default Hadoop Configuration for the Hadoop code (e.g. file systems) that we reuse.
    *
-   * '''Note:''' As it will be reused in all Hadoop RDDs, it's better not to modify it unless you
+   * @note As it will be reused in all Hadoop RDDs, it's better not to modify it unless you
    * plan to set some global configurations for all Hadoop RDDs.
    */
   def hadoopConfiguration: Configuration = _hadoopConfiguration
@@ -700,7 +700,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * Execute a block of code in a scope such that all new RDDs created in this body will
    * be part of the same scope. For more detail, see {{org.apache.spark.rdd.RDDOperationScope}}.
    *
-   * Note: Return statements are NOT allowed in the given body.
+   * @note Return statements are NOT allowed in the given body.
    */
   private[spark] def withScope[U](body: => U): U = RDDOperationScope.withScope[U](this)(body)
 
@@ -927,7 +927,7 @@ class SparkContext(config: SparkConf) extends Logging {
   /**
    * Load data from a flat binary file, assuming the length of each record is constant.
    *
-   * '''Note:''' We ensure that the byte array for each record in the resulting RDD
+   * @note We ensure that the byte array for each record in the resulting RDD
    * has the provided record length.
    *
    * @param path Directory to the input data files, the path can be comma separated paths as the
@@ -970,7 +970,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * @param valueClass Class of the values
    * @param minPartitions Minimum number of Hadoop Splits to generate.
    *
-   * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+   * @note Because Hadoop's RecordReader class re-uses the same Writable object for each
    * record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
    * operation will create many references to the same object.
    * If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
@@ -995,7 +995,7 @@ class SparkContext(config: SparkConf) extends Logging {
 
   /** Get an RDD for a Hadoop file with an arbitrary InputFormat
    *
-   * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+   * @note Because Hadoop's RecordReader class re-uses the same Writable object for each
    * record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
    * operation will create many references to the same object.
    * If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
@@ -1034,7 +1034,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * val file = sparkContext.hadoopFile[LongWritable, Text, TextInputFormat](path, minPartitions)
    * }}}
    *
-   * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+   * @note Because Hadoop's RecordReader class re-uses the same Writable object for each
    * record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
    * operation will create many references to the same object.
    * If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
@@ -1058,7 +1058,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * val file = sparkContext.hadoopFile[LongWritable, Text, TextInputFormat](path)
    * }}}
    *
-   * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+   * @note Because Hadoop's RecordReader class re-uses the same Writable object for each
    * record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
    * operation will create many references to the same object.
    * If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
@@ -1084,7 +1084,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * Get an RDD for a given Hadoop file with an arbitrary new API InputFormat
    * and extra configuration options to pass to the input format.
    *
-   * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+   * @note Because Hadoop's RecordReader class re-uses the same Writable object for each
    * record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
    * operation will create many references to the same object.
    * If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
@@ -1124,7 +1124,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * @param kClass Class of the keys
    * @param vClass Class of the values
    *
-   * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+   * @note Because Hadoop's RecordReader class re-uses the same Writable object for each
    * record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
    * operation will create many references to the same object.
    * If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
@@ -1150,7 +1150,7 @@ class SparkContext(config: SparkConf) extends Logging {
   /**
    * Get an RDD for a Hadoop SequenceFile with given key and value types.
    *
-   * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+   * @note Because Hadoop's RecordReader class re-uses the same Writable object for each
    * record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
    * operation will create many references to the same object.
    * If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
@@ -1169,7 +1169,7 @@ class SparkContext(config: SparkConf) extends Logging {
   /**
    * Get an RDD for a Hadoop SequenceFile with given key and value types.
    *
-   * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+   * @note Because Hadoop's RecordReader class re-uses the same Writable object for each
    * record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
    * operation will create many references to the same object.
    * If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
@@ -1199,7 +1199,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * for the appropriate type. In addition, we pass the converter a ClassTag of its type to
    * allow it to figure out the Writable class to use in the subclass case.
    *
-   * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+   * @note Because Hadoop's RecordReader class re-uses the same Writable object for each
    * record, directly caching the returned RDD or directly passing it to an aggregation or shuffle
    * operation will create many references to the same object.
    * If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first
@@ -1330,16 +1330,18 @@ class SparkContext(config: SparkConf) extends Logging {
   }
 
   /**
-   * Register the given accumulator.  Note that accumulators must be registered before use, or it
-   * will throw exception.
+   * Register the given accumulator.
+   *
+   * @note Accumulators must be registered before use, or it will throw exception.
    */
   def register(acc: AccumulatorV2[_, _]): Unit = {
     acc.register(this)
   }
 
   /**
-   * Register the given accumulator with given name.  Note that accumulators must be registered
-   * before use, or it will throw exception.
+   * Register the given accumulator with given name.
+   *
+   * @note Accumulators must be registered before use, or it will throw exception.
    */
   def register(acc: AccumulatorV2[_, _], name: String): Unit = {
     acc.register(this, name = Some(name))
@@ -1550,7 +1552,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * :: DeveloperApi ::
    * Request that the cluster manager kill the specified executors.
    *
-   * Note: This is an indication to the cluster manager that the application wishes to adjust
+   * @note This is an indication to the cluster manager that the application wishes to adjust
    * its resource usage downwards. If the application wishes to replace the executors it kills
    * through this method with new ones, it should follow up explicitly with a call to
    * {{SparkContext#requestExecutors}}.
@@ -1572,7 +1574,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * :: DeveloperApi ::
    * Request that the cluster manager kill the specified executor.
    *
-   * Note: This is an indication to the cluster manager that the application wishes to adjust
+   * @note This is an indication to the cluster manager that the application wishes to adjust
    * its resource usage downwards. If the application wishes to replace the executor it kills
    * through this method with a new one, it should follow up explicitly with a call to
    * {{SparkContext#requestExecutors}}.
@@ -1590,7 +1592,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * this request. This assumes the cluster manager will automatically and eventually
    * fulfill all missing application resource requests.
    *
-   * Note: The replace is by no means guaranteed; another application on the same cluster
+   * @note The replace is by no means guaranteed; another application on the same cluster
    * can steal the window of opportunity and acquire this application's resources in the
    * mean time.
    *
@@ -1639,7 +1641,8 @@ class SparkContext(config: SparkConf) extends Logging {
 
   /**
    * Returns an immutable map of RDDs that have marked themselves as persistent via cache() call.
-   * Note that this does not necessarily mean the caching or computation was successful.
+   *
+   * @note This does not necessarily mean the caching or computation was successful.
    */
   def getPersistentRDDs: Map[Int, RDD[_]] = persistentRdds.toMap
 
@@ -2298,7 +2301,7 @@ object SparkContext extends Logging {
    * singleton object. Because we can only have one active SparkContext per JVM,
    * this is useful when applications may wish to share a SparkContext.
    *
-   * Note: This function cannot be used to create multiple SparkContext instances
+   * @note This function cannot be used to create multiple SparkContext instances
    * even if multiple contexts are allowed.
    */
   def getOrCreate(config: SparkConf): SparkContext = {
@@ -2323,7 +2326,7 @@ object SparkContext extends Logging {
    *
    * This method allows not passing a SparkConf (useful if just retrieving).
    *
-   * Note: This function cannot be used to create multiple SparkContext instances
+   * @note This function cannot be used to create multiple SparkContext instances
    * even if multiple contexts are allowed.
    */
   def getOrCreate(): SparkContext = {
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala
index 0026fc9dad517..a32a4b28c1731 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala
@@ -153,7 +153,7 @@ class JavaDoubleRDD(val srdd: RDD[scala.Double])
    * Return the intersection of this RDD and another one. The output will not contain any duplicate
    * elements, even if the input RDDs did.
    *
-   * Note that this method performs a shuffle internally.
+   * @note This method performs a shuffle internally.
    */
   def intersection(other: JavaDoubleRDD): JavaDoubleRDD = fromRDD(srdd.intersection(other.srdd))
 
@@ -256,7 +256,7 @@ class JavaDoubleRDD(val srdd: RDD[scala.Double])
    *  e.g 1&lt;=x&lt;10 , 10&lt;=x&lt;20, 20&lt;=x&lt;50
    *  And on the input of 1 and 50 we would have a histogram of 1,0,0
    *
-   * Note: if your histogram is evenly spaced (e.g. [0, 10, 20, 30]) this can be switched
+   * @note If your histogram is evenly spaced (e.g. [0, 10, 20, 30]) this can be switched
    * from an O(log n) insertion to O(1) per element. (where n = # buckets) if you set evenBuckets
    * to true.
    * buckets must be sorted and not contain any duplicates.
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
index 1c95bc4bfcaaf..bff5a29bb60f1 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
@@ -206,7 +206,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * Return the intersection of this RDD and another one. The output will not contain any duplicate
    * elements, even if the input RDDs did.
    *
-   * Note that this method performs a shuffle internally.
+   * @note This method performs a shuffle internally.
    */
   def intersection(other: JavaPairRDD[K, V]): JavaPairRDD[K, V] =
     new JavaPairRDD[K, V](rdd.intersection(other.rdd))
@@ -223,9 +223,9 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
   /**
    * Generic function to combine the elements for each key using a custom set of aggregation
    * functions. Turns a JavaPairRDD[(K, V)] into a result of type JavaPairRDD[(K, C)], for a
-   * "combined type" C. Note that V and C can be different -- for example, one might group an
-   * RDD of type (Int, Int) into an RDD of type (Int, List[Int]). Users provide three
-   * functions:
+   * "combined type" C.
+   *
+   * Users provide three functions:
    *
    *  - `createCombiner`, which turns a V into a C (e.g., creates a one-element list)
    *  - `mergeValue`, to merge a V into a C (e.g., adds it to the end of a list)
@@ -234,6 +234,9 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * In addition, users can control the partitioning of the output RDD, the serializer that is use
    * for the shuffle, and whether to perform map-side aggregation (if a mapper can produce multiple
    * items with the same key).
+   *
+   * @note V and C can be different -- for example, one might group an RDD of type (Int, Int) into
+   * an RDD of type (Int, List[Int]).
    */
   def combineByKey[C](createCombiner: JFunction[V, C],
       mergeValue: JFunction2[C, V, C],
@@ -255,9 +258,9 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
   /**
    * Generic function to combine the elements for each key using a custom set of aggregation
    * functions. Turns a JavaPairRDD[(K, V)] into a result of type JavaPairRDD[(K, C)], for a
-   * "combined type" C. Note that V and C can be different -- for example, one might group an
-   * RDD of type (Int, Int) into an RDD of type (Int, List[Int]). Users provide three
-   * functions:
+   * "combined type" C.
+   *
+   * Users provide three functions:
    *
    *  - `createCombiner`, which turns a V into a C (e.g., creates a one-element list)
    *  - `mergeValue`, to merge a V into a C (e.g., adds it to the end of a list)
@@ -265,6 +268,9 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    *
    * In addition, users can control the partitioning of the output RDD. This method automatically
    * uses map-side aggregation in shuffling the RDD.
+   *
+   * @note V and C can be different -- for example, one might group an RDD of type (Int, Int) into
+   * an RDD of type (Int, List[Int]).
    */
   def combineByKey[C](createCombiner: JFunction[V, C],
       mergeValue: JFunction2[C, V, C],
@@ -398,7 +404,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * Group the values for each key in the RDD into a single sequence. Allows controlling the
    * partitioning of the resulting key-value pair RDD by passing a Partitioner.
    *
-   * Note: If you are grouping in order to perform an aggregation (such as a sum or average) over
+   * @note If you are grouping in order to perform an aggregation (such as a sum or average) over
    * each key, using [[JavaPairRDD.reduceByKey]] or [[JavaPairRDD.combineByKey]]
    * will provide much better performance.
    */
@@ -409,7 +415,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * Group the values for each key in the RDD into a single sequence. Hash-partitions the
    * resulting RDD with into `numPartitions` partitions.
    *
-   * Note: If you are grouping in order to perform an aggregation (such as a sum or average) over
+   * @note If you are grouping in order to perform an aggregation (such as a sum or average) over
    * each key, using [[JavaPairRDD.reduceByKey]] or [[JavaPairRDD.combineByKey]]
    * will provide much better performance.
    */
@@ -539,7 +545,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * Group the values for each key in the RDD into a single sequence. Hash-partitions the
    * resulting RDD with the existing partitioner/parallelism level.
    *
-   * Note: If you are grouping in order to perform an aggregation (such as a sum or average) over
+   * @note If you are grouping in order to perform an aggregation (such as a sum or average) over
    * each key, using [[JavaPairRDD.reduceByKey]] or [[JavaPairRDD.combineByKey]]
    * will provide much better performance.
    */
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
index d67cff64e6e46..ccd94f876e0b8 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
@@ -99,27 +99,29 @@ class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: ClassTag[T])
 
   /**
    * Return a sampled subset of this RDD with a random seed.
-   * Note: this is NOT guaranteed to provide exactly the fraction of the count
-   * of the given [[RDD]].
    *
    * @param withReplacement can elements be sampled multiple times (replaced when sampled out)
    * @param fraction expected size of the sample as a fraction of this RDD's size
    *  without replacement: probability that each element is chosen; fraction must be [0, 1]
    *  with replacement: expected number of times each element is chosen; fraction must be >= 0
+   *
+   * @note This is NOT guaranteed to provide exactly the fraction of the count
+   * of the given [[RDD]].
    */
   def sample(withReplacement: Boolean, fraction: Double): JavaRDD[T] =
     sample(withReplacement, fraction, Utils.random.nextLong)
 
   /**
    * Return a sampled subset of this RDD, with a user-supplied seed.
-   * Note: this is NOT guaranteed to provide exactly the fraction of the count
-   * of the given [[RDD]].
    *
    * @param withReplacement can elements be sampled multiple times (replaced when sampled out)
    * @param fraction expected size of the sample as a fraction of this RDD's size
    *  without replacement: probability that each element is chosen; fraction must be [0, 1]
    *  with replacement: expected number of times each element is chosen; fraction must be >= 0
    * @param seed seed for the random number generator
+   *
+   * @note This is NOT guaranteed to provide exactly the fraction of the count
+   * of the given [[RDD]].
    */
   def sample(withReplacement: Boolean, fraction: Double, seed: Long): JavaRDD[T] =
     wrapRDD(rdd.sample(withReplacement, fraction, seed))
@@ -157,7 +159,7 @@ class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: ClassTag[T])
    * Return the intersection of this RDD and another one. The output will not contain any duplicate
    * elements, even if the input RDDs did.
    *
-   * Note that this method performs a shuffle internally.
+   * @note This method performs a shuffle internally.
    */
   def intersection(other: JavaRDD[T]): JavaRDD[T] = wrapRDD(rdd.intersection(other.rdd))
 
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
index a37c52cbaf210..eda16d957cc58 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
@@ -47,7 +47,8 @@ private[spark] abstract class AbstractJavaRDDLike[T, This <: JavaRDDLike[T, This
 
 /**
  * Defines operations common to several Java RDD implementations.
- * Note that this trait is not intended to be implemented by user code.
+ *
+ * @note This trait is not intended to be implemented by user code.
  */
 trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
   def wrapRDD(rdd: RDD[T]): This
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
index 4e50c2686dd53..38d347aeab8c6 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
@@ -298,7 +298,7 @@ class JavaSparkContext(val sc: SparkContext)
   /**
    * Get an RDD for a Hadoop SequenceFile with given key and value types.
    *
-   * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+   * @note Because Hadoop's RecordReader class re-uses the same Writable object for each
    * record, directly caching the returned RDD will create many references to the same object.
    * If you plan to directly cache Hadoop writable objects, you should first copy them using
    * a `map` function.
@@ -316,7 +316,7 @@ class JavaSparkContext(val sc: SparkContext)
   /**
    * Get an RDD for a Hadoop SequenceFile.
    *
-   * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+   * @note Because Hadoop's RecordReader class re-uses the same Writable object for each
    * record, directly caching the returned RDD will create many references to the same object.
    * If you plan to directly cache Hadoop writable objects, you should first copy them using
    * a `map` function.
@@ -366,7 +366,7 @@ class JavaSparkContext(val sc: SparkContext)
    * @param valueClass Class of the values
    * @param minPartitions Minimum number of Hadoop Splits to generate.
    *
-   * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+   * @note Because Hadoop's RecordReader class re-uses the same Writable object for each
    * record, directly caching the returned RDD will create many references to the same object.
    * If you plan to directly cache Hadoop writable objects, you should first copy them using
    * a `map` function.
@@ -396,7 +396,7 @@ class JavaSparkContext(val sc: SparkContext)
    * @param keyClass Class of the keys
    * @param valueClass Class of the values
    *
-   * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+   * @note Because Hadoop's RecordReader class re-uses the same Writable object for each
    * record, directly caching the returned RDD will create many references to the same object.
    * If you plan to directly cache Hadoop writable objects, you should first copy them using
    * a `map` function.
@@ -416,7 +416,7 @@ class JavaSparkContext(val sc: SparkContext)
   /**
    * Get an RDD for a Hadoop file with an arbitrary InputFormat.
    *
-   * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+   * @note Because Hadoop's RecordReader class re-uses the same Writable object for each
    * record, directly caching the returned RDD will create many references to the same object.
    * If you plan to directly cache Hadoop writable objects, you should first copy them using
    * a `map` function.
@@ -437,7 +437,7 @@ class JavaSparkContext(val sc: SparkContext)
   /**
    * Get an RDD for a Hadoop file with an arbitrary InputFormat
    *
-   * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+   * @note Because Hadoop's RecordReader class re-uses the same Writable object for each
    * record, directly caching the returned RDD will create many references to the same object.
    * If you plan to directly cache Hadoop writable objects, you should first copy them using
    * a `map` function.
@@ -458,7 +458,7 @@ class JavaSparkContext(val sc: SparkContext)
    * Get an RDD for a given Hadoop file with an arbitrary new API InputFormat
    * and extra configuration options to pass to the input format.
    *
-   * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+   * @note Because Hadoop's RecordReader class re-uses the same Writable object for each
    * record, directly caching the returned RDD will create many references to the same object.
    * If you plan to directly cache Hadoop writable objects, you should first copy them using
    * a `map` function.
@@ -487,7 +487,7 @@ class JavaSparkContext(val sc: SparkContext)
    * @param kClass Class of the keys
    * @param vClass Class of the values
    *
-   * '''Note:''' Because Hadoop's RecordReader class re-uses the same Writable object for each
+   * @note Because Hadoop's RecordReader class re-uses the same Writable object for each
    * record, directly caching the returned RDD will create many references to the same object.
    * If you plan to directly cache Hadoop writable objects, you should first copy them using
    * a `map` function.
@@ -694,7 +694,7 @@ class JavaSparkContext(val sc: SparkContext)
   /**
    * Returns the Hadoop configuration used for the Hadoop code (e.g. file systems) we reuse.
    *
-   * '''Note:''' As it will be reused in all Hadoop RDDs, it's better not to modify it unless you
+   * @note As it will be reused in all Hadoop RDDs, it's better not to modify it unless you
    * plan to set some global configurations for all Hadoop RDDs.
    */
   def hadoopConfiguration(): Configuration = {
@@ -811,7 +811,8 @@ class JavaSparkContext(val sc: SparkContext)
 
   /**
    * Returns a Java map of JavaRDDs that have marked themselves as persistent via cache() call.
-   * Note that this does not necessarily mean the caching or computation was successful.
+   *
+   * @note This does not necessarily mean the caching or computation was successful.
    */
   def getPersistentRDDs: JMap[java.lang.Integer, JavaRDD[_]] = {
     sc.getPersistentRDDs.mapValues(s => JavaRDD.fromRDD(s))
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkStatusTracker.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkStatusTracker.scala
index 99ca3c77cced0..6aa290ecd7bb5 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaSparkStatusTracker.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkStatusTracker.scala
@@ -31,7 +31,7 @@ import org.apache.spark.{SparkContext, SparkJobInfo, SparkStageInfo}
  * will provide information for the last `spark.ui.retainedStages` stages and
  * `spark.ui.retainedJobs` jobs.
  *
- * NOTE: this class's constructor should be considered private and may be subject to change.
+ * @note This class's constructor should be considered private and may be subject to change.
  */
 class JavaSparkStatusTracker private[spark] (sc: SparkContext) {
 
diff --git a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
index ae014becef755..6ba79e506a648 100644
--- a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
+++ b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
@@ -32,9 +32,8 @@ import org.apache.spark.util.Utils
  * CompressionCodec allows the customization of choosing different compression implementations
  * to be used in block storage.
  *
- * Note: The wire protocol for a codec is not guaranteed compatible across versions of Spark.
- *       This is intended for use as an internal compression utility within a single
- *       Spark application.
+ * @note The wire protocol for a codec is not guaranteed compatible across versions of Spark.
+ * This is intended for use as an internal compression utility within a single Spark application.
  */
 @DeveloperApi
 trait CompressionCodec {
@@ -103,9 +102,9 @@ private[spark] object CompressionCodec {
  * LZ4 implementation of [[org.apache.spark.io.CompressionCodec]].
  * Block size can be configured by `spark.io.compression.lz4.blockSize`.
  *
- * Note: The wire protocol for this codec is not guaranteed to be compatible across versions
- *       of Spark. This is intended for use as an internal compression utility within a single Spark
- *       application.
+ * @note The wire protocol for this codec is not guaranteed to be compatible across versions
+ * of Spark. This is intended for use as an internal compression utility within a single Spark
+ * application.
  */
 @DeveloperApi
 class LZ4CompressionCodec(conf: SparkConf) extends CompressionCodec {
@@ -123,9 +122,9 @@ class LZ4CompressionCodec(conf: SparkConf) extends CompressionCodec {
  * :: DeveloperApi ::
  * LZF implementation of [[org.apache.spark.io.CompressionCodec]].
  *
- * Note: The wire protocol for this codec is not guaranteed to be compatible across versions
- *       of Spark. This is intended for use as an internal compression utility within a single Spark
- *       application.
+ * @note The wire protocol for this codec is not guaranteed to be compatible across versions
+ * of Spark. This is intended for use as an internal compression utility within a single Spark
+ * application.
  */
 @DeveloperApi
 class LZFCompressionCodec(conf: SparkConf) extends CompressionCodec {
@@ -143,9 +142,9 @@ class LZFCompressionCodec(conf: SparkConf) extends CompressionCodec {
  * Snappy implementation of [[org.apache.spark.io.CompressionCodec]].
  * Block size can be configured by `spark.io.compression.snappy.blockSize`.
  *
- * Note: The wire protocol for this codec is not guaranteed to be compatible across versions
- *       of Spark. This is intended for use as an internal compression utility within a single Spark
- *       application.
+ * @note The wire protocol for this codec is not guaranteed to be compatible across versions
+ * of Spark. This is intended for use as an internal compression utility within a single Spark
+ * application.
  */
 @DeveloperApi
 class SnappyCompressionCodec(conf: SparkConf) extends CompressionCodec {
diff --git a/core/src/main/scala/org/apache/spark/partial/BoundedDouble.scala b/core/src/main/scala/org/apache/spark/partial/BoundedDouble.scala
index ab6aba6fc7d6a..8f579c5a3033c 100644
--- a/core/src/main/scala/org/apache/spark/partial/BoundedDouble.scala
+++ b/core/src/main/scala/org/apache/spark/partial/BoundedDouble.scala
@@ -28,7 +28,7 @@ class BoundedDouble(val mean: Double, val confidence: Double, val low: Double, v
     this.mean.hashCode ^ this.confidence.hashCode ^ this.low.hashCode ^ this.high.hashCode
 
   /**
-   * Note that consistent with Double, any NaN value will make equality false
+   * @note Consistent with Double, any NaN value will make equality false
    */
   override def equals(that: Any): Boolean =
     that match {
diff --git a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
index 2381f54ee3f06..a091f06b4ed7c 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
@@ -66,14 +66,14 @@ private[spark] class CoGroupPartition(
 
 /**
  * :: DeveloperApi ::
- * A RDD that cogroups its parents. For each key k in parent RDDs, the resulting RDD contains a
+ * An RDD that cogroups its parents. For each key k in parent RDDs, the resulting RDD contains a
  * tuple with the list of values for that key.
  *
- * Note: This is an internal API. We recommend users use RDD.cogroup(...) instead of
- * instantiating this directly.
- *
  * @param rdds parent RDDs.
  * @param part partitioner used to partition the shuffle output
+ *
+ * @note This is an internal API. We recommend users use RDD.cogroup(...) instead of
+ * instantiating this directly.
  */
 @DeveloperApi
 class CoGroupedRDD[K: ClassTag](
diff --git a/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala
index a05a770b40c57..f3ab324d59119 100644
--- a/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala
@@ -158,7 +158,7 @@ class DoubleRDDFunctions(self: RDD[Double]) extends Logging with Serializable {
    *  e.g 1<=x<10 , 10<=x<20, 20<=x<=50
    *  And on the input of 1 and 50 we would have a histogram of 1, 0, 1
    *
-   * Note: if your histogram is evenly spaced (e.g. [0, 10, 20, 30]) this can be switched
+   * @note If your histogram is evenly spaced (e.g. [0, 10, 20, 30]) this can be switched
    * from an O(log n) insertion to O(1) per element. (where n = # buckets) if you set evenBuckets
    * to true.
    * buckets must be sorted and not contain any duplicates.
diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index 36a2f5c87e372..86351b8c575e5 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -84,9 +84,6 @@ private[spark] class HadoopPartition(rddId: Int, override val index: Int, s: Inp
  * An RDD that provides core functionality for reading data stored in Hadoop (e.g., files in HDFS,
  * sources in HBase, or S3), using the older MapReduce API (`org.apache.hadoop.mapred`).
  *
- * Note: Instantiating this class directly is not recommended, please use
- * [[org.apache.spark.SparkContext.hadoopRDD()]]
- *
  * @param sc The SparkContext to associate the RDD with.
  * @param broadcastedConf A general Hadoop Configuration, or a subclass of it. If the enclosed
  *   variable references an instance of JobConf, then that JobConf will be used for the Hadoop job.
@@ -97,6 +94,9 @@ private[spark] class HadoopPartition(rddId: Int, override val index: Int, s: Inp
  * @param keyClass Class of the key associated with the inputFormatClass.
  * @param valueClass Class of the value associated with the inputFormatClass.
  * @param minPartitions Minimum number of HadoopRDD partitions (Hadoop Splits) to generate.
+ *
+ * @note Instantiating this class directly is not recommended, please use
+ * [[org.apache.spark.SparkContext.hadoopRDD()]]
  */
 @DeveloperApi
 class HadoopRDD[K, V](
diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
index 488e777fea371..a5965f597038d 100644
--- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
@@ -57,13 +57,13 @@ private[spark] class NewHadoopPartition(
  * An RDD that provides core functionality for reading data stored in Hadoop (e.g., files in HDFS,
  * sources in HBase, or S3), using the new MapReduce API (`org.apache.hadoop.mapreduce`).
  *
- * Note: Instantiating this class directly is not recommended, please use
- * [[org.apache.spark.SparkContext.newAPIHadoopRDD()]]
- *
  * @param sc The SparkContext to associate the RDD with.
  * @param inputFormatClass Storage format of the data to be read.
  * @param keyClass Class of the key associated with the inputFormatClass.
  * @param valueClass Class of the value associated with the inputFormatClass.
+ *
+ * @note Instantiating this class directly is not recommended, please use
+ * [[org.apache.spark.SparkContext.newAPIHadoopRDD()]]
  */
 @DeveloperApi
 class NewHadoopRDD[K, V](
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index 67baad1c51bca..9ed0f3d8086a5 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -59,8 +59,8 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    * :: Experimental ::
    * Generic function to combine the elements for each key using a custom set of aggregation
    * functions. Turns an RDD[(K, V)] into a result of type RDD[(K, C)], for a "combined type" C
-   * Note that V and C can be different -- for example, one might group an RDD of type
-   * (Int, Int) into an RDD of type (Int, Seq[Int]). Users provide three functions:
+   *
+   * Users provide three functions:
    *
    *  - `createCombiner`, which turns a V into a C (e.g., creates a one-element list)
    *  - `mergeValue`, to merge a V into a C (e.g., adds it to the end of a list)
@@ -68,6 +68,9 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    *
    * In addition, users can control the partitioning of the output RDD, and whether to perform
    * map-side aggregation (if a mapper can produce multiple items with the same key).
+   *
+   * @note V and C can be different -- for example, one might group an RDD of type
+   * (Int, Int) into an RDD of type (Int, Seq[Int]).
    */
   @Experimental
   def combineByKeyWithClassTag[C](
@@ -363,7 +366,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
   /**
    * Count the number of elements for each key, collecting the results to a local Map.
    *
-   * Note that this method should only be used if the resulting map is expected to be small, as
+   * @note This method should only be used if the resulting map is expected to be small, as
    * the whole thing is loaded into the driver's memory.
    * To handle very large results, consider using rdd.mapValues(_ => 1L).reduceByKey(_ + _), which
    * returns an RDD[T, Long] instead of a map.
@@ -490,11 +493,11 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    * The ordering of elements within each group is not guaranteed, and may even differ
    * each time the resulting RDD is evaluated.
    *
-   * Note: This operation may be very expensive. If you are grouping in order to perform an
+   * @note This operation may be very expensive. If you are grouping in order to perform an
    * aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]]
    * or [[PairRDDFunctions.reduceByKey]] will provide much better performance.
    *
-   * Note: As currently implemented, groupByKey must be able to hold all the key-value pairs for any
+   * @note As currently implemented, groupByKey must be able to hold all the key-value pairs for any
    * key in memory. If a key has too many values, it can result in an [[OutOfMemoryError]].
    */
   def groupByKey(partitioner: Partitioner): RDD[(K, Iterable[V])] = self.withScope {
@@ -514,11 +517,11 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    * resulting RDD with into `numPartitions` partitions. The ordering of elements within
    * each group is not guaranteed, and may even differ each time the resulting RDD is evaluated.
    *
-   * Note: This operation may be very expensive. If you are grouping in order to perform an
+   * @note This operation may be very expensive. If you are grouping in order to perform an
    * aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]]
    * or [[PairRDDFunctions.reduceByKey]] will provide much better performance.
    *
-   * Note: As currently implemented, groupByKey must be able to hold all the key-value pairs for any
+   * @note As currently implemented, groupByKey must be able to hold all the key-value pairs for any
    * key in memory. If a key has too many values, it can result in an [[OutOfMemoryError]].
    */
   def groupByKey(numPartitions: Int): RDD[(K, Iterable[V])] = self.withScope {
@@ -635,7 +638,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    * within each group is not guaranteed, and may even differ each time the resulting RDD is
    * evaluated.
    *
-   * Note: This operation may be very expensive. If you are grouping in order to perform an
+   * @note This operation may be very expensive. If you are grouping in order to perform an
    * aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]]
    * or [[PairRDDFunctions.reduceByKey]] will provide much better performance.
    */
@@ -1016,7 +1019,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    * Output the RDD to any Hadoop-supported file system, using a Hadoop `OutputFormat` class
    * supporting the key and value types K and V in this RDD.
    *
-   * Note that, we should make sure our tasks are idempotent when speculation is enabled, i.e. do
+   * @note We should make sure our tasks are idempotent when speculation is enabled, i.e. do
    * not use output committer that writes data directly.
    * There is an example in https://issues.apache.org/jira/browse/SPARK-10063 to show the bad
    * result of using direct output committer with speculation enabled.
@@ -1070,7 +1073,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    * output paths required (e.g. a table name to write to) in the same way as it would be
    * configured for a Hadoop MapReduce job.
    *
-   * Note that, we should make sure our tasks are idempotent when speculation is enabled, i.e. do
+   * @note We should make sure our tasks are idempotent when speculation is enabled, i.e. do
    * not use output committer that writes data directly.
    * There is an example in https://issues.apache.org/jira/browse/SPARK-10063 to show the bad
    * result of using direct output committer with speculation enabled.
diff --git a/core/src/main/scala/org/apache/spark/rdd/PartitionPruningRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PartitionPruningRDD.scala
index 0c6ddda52cee9..ce75a16031a3f 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PartitionPruningRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PartitionPruningRDD.scala
@@ -48,7 +48,7 @@ private[spark] class PruneDependency[T](rdd: RDD[T], partitionFilterFunc: Int =>
 
 /**
  * :: DeveloperApi ::
- * A RDD used to prune RDD partitions/partitions so we can avoid launching tasks on
+ * An RDD used to prune RDD partitions/partitions so we can avoid launching tasks on
  * all partitions. An example use case: If we know the RDD is partitioned by range,
  * and the execution DAG has a filter on the key, we can avoid launching tasks
  * on partitions that don't have the range covering the key.
diff --git a/core/src/main/scala/org/apache/spark/rdd/PartitionwiseSampledRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PartitionwiseSampledRDD.scala
index 3b1acacf409b9..6a89ea8786464 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PartitionwiseSampledRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PartitionwiseSampledRDD.scala
@@ -32,7 +32,7 @@ class PartitionwiseSampledRDDPartition(val prev: Partition, val seed: Long)
 }
 
 /**
- * A RDD sampled from its parent RDD partition-wise. For each partition of the parent RDD,
+ * An RDD sampled from its parent RDD partition-wise. For each partition of the parent RDD,
  * a user-specified [[org.apache.spark.util.random.RandomSampler]] instance is used to obtain
  * a random sample of the records in the partition. The random seeds assigned to the samplers
  * are guaranteed to have different values.
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index cded899db1f5c..bff2b8f1d06c9 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -428,7 +428,7 @@ abstract class RDD[T: ClassTag](
    * current upstream partitions will be executed in parallel (per whatever
    * the current partitioning is).
    *
-   * Note: With shuffle = true, you can actually coalesce to a larger number
+   * @note With shuffle = true, you can actually coalesce to a larger number
    * of partitions. This is useful if you have a small number of partitions,
    * say 100, potentially with a few partitions being abnormally large. Calling
    * coalesce(1000, shuffle = true) will result in 1000 partitions with the
@@ -466,14 +466,14 @@ abstract class RDD[T: ClassTag](
   /**
    * Return a sampled subset of this RDD.
    *
-   * Note: this is NOT guaranteed to provide exactly the fraction of the count
-   * of the given [[RDD]].
-   *
    * @param withReplacement can elements be sampled multiple times (replaced when sampled out)
    * @param fraction expected size of the sample as a fraction of this RDD's size
    *  without replacement: probability that each element is chosen; fraction must be [0, 1]
    *  with replacement: expected number of times each element is chosen; fraction must be >= 0
    * @param seed seed for the random number generator
+   *
+   * @note This is NOT guaranteed to provide exactly the fraction of the count
+   * of the given [[RDD]].
    */
   def sample(
       withReplacement: Boolean,
@@ -537,13 +537,13 @@ abstract class RDD[T: ClassTag](
   /**
    * Return a fixed-size sampled subset of this RDD in an array
    *
-   * @note this method should only be used if the resulting array is expected to be small, as
-   * all the data is loaded into the driver's memory.
-   *
    * @param withReplacement whether sampling is done with replacement
    * @param num size of the returned sample
    * @param seed seed for the random number generator
    * @return sample of specified size in an array
+   *
+   * @note this method should only be used if the resulting array is expected to be small, as
+   * all the data is loaded into the driver's memory.
    */
   def takeSample(
       withReplacement: Boolean,
@@ -618,7 +618,7 @@ abstract class RDD[T: ClassTag](
    * Return the intersection of this RDD and another one. The output will not contain any duplicate
    * elements, even if the input RDDs did.
    *
-   * Note that this method performs a shuffle internally.
+   * @note This method performs a shuffle internally.
    */
   def intersection(other: RDD[T]): RDD[T] = withScope {
     this.map(v => (v, null)).cogroup(other.map(v => (v, null)))
@@ -630,7 +630,7 @@ abstract class RDD[T: ClassTag](
    * Return the intersection of this RDD and another one. The output will not contain any duplicate
    * elements, even if the input RDDs did.
    *
-   * Note that this method performs a shuffle internally.
+   * @note This method performs a shuffle internally.
    *
    * @param partitioner Partitioner to use for the resulting RDD
    */
@@ -646,7 +646,7 @@ abstract class RDD[T: ClassTag](
    * Return the intersection of this RDD and another one. The output will not contain any duplicate
    * elements, even if the input RDDs did.  Performs a hash partition across the cluster
    *
-   * Note that this method performs a shuffle internally.
+   * @note This method performs a shuffle internally.
    *
    * @param numPartitions How many partitions to use in the resulting RDD
    */
@@ -674,7 +674,7 @@ abstract class RDD[T: ClassTag](
    * mapping to that key. The ordering of elements within each group is not guaranteed, and
    * may even differ each time the resulting RDD is evaluated.
    *
-   * Note: This operation may be very expensive. If you are grouping in order to perform an
+   * @note This operation may be very expensive. If you are grouping in order to perform an
    * aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]]
    * or [[PairRDDFunctions.reduceByKey]] will provide much better performance.
    */
@@ -687,7 +687,7 @@ abstract class RDD[T: ClassTag](
    * mapping to that key. The ordering of elements within each group is not guaranteed, and
    * may even differ each time the resulting RDD is evaluated.
    *
-   * Note: This operation may be very expensive. If you are grouping in order to perform an
+   * @note This operation may be very expensive. If you are grouping in order to perform an
    * aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]]
    * or [[PairRDDFunctions.reduceByKey]] will provide much better performance.
    */
@@ -702,7 +702,7 @@ abstract class RDD[T: ClassTag](
    * mapping to that key. The ordering of elements within each group is not guaranteed, and
    * may even differ each time the resulting RDD is evaluated.
    *
-   * Note: This operation may be very expensive. If you are grouping in order to perform an
+   * @note This operation may be very expensive. If you are grouping in order to perform an
    * aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]]
    * or [[PairRDDFunctions.reduceByKey]] will provide much better performance.
    */
@@ -921,7 +921,7 @@ abstract class RDD[T: ClassTag](
   /**
    * Return an array that contains all of the elements in this RDD.
    *
-   * @note this method should only be used if the resulting array is expected to be small, as
+   * @note This method should only be used if the resulting array is expected to be small, as
    * all the data is loaded into the driver's memory.
    */
   def collect(): Array[T] = withScope {
@@ -934,7 +934,7 @@ abstract class RDD[T: ClassTag](
    *
    * The iterator will consume as much memory as the largest partition in this RDD.
    *
-   * Note: this results in multiple Spark jobs, and if the input RDD is the result
+   * @note This results in multiple Spark jobs, and if the input RDD is the result
    * of a wide transformation (e.g. join with different partitioners), to avoid
    * recomputing the input RDD should be cached first.
    */
@@ -1182,7 +1182,7 @@ abstract class RDD[T: ClassTag](
   /**
    * Return the count of each unique value in this RDD as a local map of (value, count) pairs.
    *
-   * Note that this method should only be used if the resulting map is expected to be small, as
+   * @note This method should only be used if the resulting map is expected to be small, as
    * the whole thing is loaded into the driver's memory.
    * To handle very large results, consider using rdd.map(x =&gt; (x, 1L)).reduceByKey(_ + _), which
    * returns an RDD[T, Long] instead of a map.
@@ -1272,7 +1272,7 @@ abstract class RDD[T: ClassTag](
    * This is similar to Scala's zipWithIndex but it uses Long instead of Int as the index type.
    * This method needs to trigger a spark job when this RDD contains more than one partitions.
    *
-   * Note that some RDDs, such as those returned by groupBy(), do not guarantee order of
+   * @note Some RDDs, such as those returned by groupBy(), do not guarantee order of
    * elements in a partition. The index assigned to each element is therefore not guaranteed,
    * and may even change if the RDD is reevaluated. If a fixed ordering is required to guarantee
    * the same index assignments, you should sort the RDD with sortByKey() or save it to a file.
@@ -1286,7 +1286,7 @@ abstract class RDD[T: ClassTag](
    * 2*n+k, ..., where n is the number of partitions. So there may exist gaps, but this method
    * won't trigger a spark job, which is different from [[org.apache.spark.rdd.RDD#zipWithIndex]].
    *
-   * Note that some RDDs, such as those returned by groupBy(), do not guarantee order of
+   * @note Some RDDs, such as those returned by groupBy(), do not guarantee order of
    * elements in a partition. The unique ID assigned to each element is therefore not guaranteed,
    * and may even change if the RDD is reevaluated. If a fixed ordering is required to guarantee
    * the same index assignments, you should sort the RDD with sortByKey() or save it to a file.
@@ -1305,10 +1305,10 @@ abstract class RDD[T: ClassTag](
    * results from that partition to estimate the number of additional partitions needed to satisfy
    * the limit.
    *
-   * @note this method should only be used if the resulting array is expected to be small, as
+   * @note This method should only be used if the resulting array is expected to be small, as
    * all the data is loaded into the driver's memory.
    *
-   * @note due to complications in the internal implementation, this method will raise
+   * @note Due to complications in the internal implementation, this method will raise
    * an exception if called on an RDD of `Nothing` or `Null`.
    */
   def take(num: Int): Array[T] = withScope {
@@ -1370,7 +1370,7 @@ abstract class RDD[T: ClassTag](
    *   // returns Array(6, 5)
    * }}}
    *
-   * @note this method should only be used if the resulting array is expected to be small, as
+   * @note This method should only be used if the resulting array is expected to be small, as
    * all the data is loaded into the driver's memory.
    *
    * @param num k, the number of top elements to return
@@ -1393,7 +1393,7 @@ abstract class RDD[T: ClassTag](
    *   // returns Array(2, 3)
    * }}}
    *
-   * @note this method should only be used if the resulting array is expected to be small, as
+   * @note This method should only be used if the resulting array is expected to be small, as
    * all the data is loaded into the driver's memory.
    *
    * @param num k, the number of elements to return
@@ -1438,7 +1438,7 @@ abstract class RDD[T: ClassTag](
   }
 
   /**
-   * @note due to complications in the internal implementation, this method will raise an
+   * @note Due to complications in the internal implementation, this method will raise an
    * exception if called on an RDD of `Nothing` or `Null`. This may be come up in practice
    * because, for example, the type of `parallelize(Seq())` is `RDD[Nothing]`.
    * (`parallelize(Seq())` should be avoided anyway in favor of `parallelize(Seq[T]())`.)
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDDCheckpointData.scala b/core/src/main/scala/org/apache/spark/rdd/RDDCheckpointData.scala
index 429514b4f6bee..1070bb96b2524 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDDCheckpointData.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDDCheckpointData.scala
@@ -32,7 +32,7 @@ private[spark] object CheckpointState extends Enumeration {
 
 /**
  * This class contains all the information related to RDD checkpointing. Each instance of this
- * class is associated with a RDD. It manages process of checkpointing of the associated RDD,
+ * class is associated with an RDD. It manages process of checkpointing of the associated RDD,
  * as well as, manages the post-checkpoint state by providing the updated partitions,
  * iterator and preferred locations of the checkpointed RDD.
  */
diff --git a/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala
index eac901d10067c..7f399ecf81a08 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala
@@ -151,7 +151,7 @@ private[spark] object ReliableCheckpointRDD extends Logging {
   }
 
   /**
-   * Write a RDD partition's data to a checkpoint file.
+   * Write an RDD partition's data to a checkpoint file.
    */
   def writePartitionToCheckpointFile[T: ClassTag](
       path: String,
diff --git a/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala
index 1311b481c7c71..86a332790fb00 100644
--- a/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala
@@ -27,9 +27,10 @@ import org.apache.spark.internal.Logging
 
 /**
  * Extra functions available on RDDs of (key, value) pairs to create a Hadoop SequenceFile,
- * through an implicit conversion. Note that this can't be part of PairRDDFunctions because
- * we need more implicit parameters to convert our keys and values to Writable.
+ * through an implicit conversion.
  *
+ * @note This can't be part of PairRDDFunctions because we need more implicit parameters to
+ * convert our keys and values to Writable.
  */
 class SequenceFileRDDFunctions[K <% Writable: ClassTag, V <% Writable : ClassTag](
     self: RDD[(K, V)],
diff --git a/core/src/main/scala/org/apache/spark/rdd/ZippedWithIndexRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ZippedWithIndexRDD.scala
index b0e5ba0865c63..8425b211d6ecf 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ZippedWithIndexRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ZippedWithIndexRDD.scala
@@ -29,7 +29,7 @@ class ZippedWithIndexRDDPartition(val prev: Partition, val startIndex: Long)
 }
 
 /**
- * Represents a RDD zipped with its element indices. The ordering is first based on the partition
+ * Represents an RDD zipped with its element indices. The ordering is first based on the partition
  * index and then the ordering of items within each partition. So the first item in the first
  * partition gets index 0, and the last item in the last partition receives the largest index.
  *
diff --git a/core/src/main/scala/org/apache/spark/scheduler/AccumulableInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/AccumulableInfo.scala
index cedacad44afec..0a5fe5a1d3ee1 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/AccumulableInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/AccumulableInfo.scala
@@ -24,11 +24,6 @@ import org.apache.spark.annotation.DeveloperApi
  * :: DeveloperApi ::
  * Information about an [[org.apache.spark.Accumulable]] modified during a task or stage.
  *
- * Note: once this is JSON serialized the types of `update` and `value` will be lost and be
- * cast to strings. This is because the user can define an accumulator of any type and it will
- * be difficult to preserve the type in consumers of the event log. This does not apply to
- * internal accumulators that represent task level metrics.
- *
  * @param id accumulator ID
  * @param name accumulator name
  * @param update partial value from a task, may be None if used on driver to describe a stage
@@ -36,6 +31,11 @@ import org.apache.spark.annotation.DeveloperApi
  * @param internal whether this accumulator was internal
  * @param countFailedValues whether to count this accumulator's partial value if the task failed
  * @param metadata internal metadata associated with this accumulator, if any
+ *
+ * @note Once this is JSON serialized the types of `update` and `value` will be lost and be
+ * cast to strings. This is because the user can define an accumulator of any type and it will
+ * be difficult to preserve the type in consumers of the event log. This does not apply to
+ * internal accumulators that represent task level metrics.
  */
 @DeveloperApi
 case class AccumulableInfo private[spark] (
diff --git a/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
index 8b72da2ee01b7..f60dcfddfdc20 100644
--- a/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
@@ -131,7 +131,7 @@ private[spark] class JavaSerializerInstance(
  * :: DeveloperApi ::
  * A Spark serializer that uses Java's built-in serialization.
  *
- * Note that this serializer is not guaranteed to be wire-compatible across different versions of
+ * @note This serializer is not guaranteed to be wire-compatible across different versions of
  * Spark. It is intended to be used to serialize/de-serialize data within a single
  * Spark application.
  */
diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
index 0d26281fe1076..19e020c968a9a 100644
--- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
@@ -45,7 +45,7 @@ import org.apache.spark.util.collection.CompactBuffer
 /**
  * A Spark serializer that uses the [[https://code.google.com/p/kryo/ Kryo serialization library]].
  *
- * Note that this serializer is not guaranteed to be wire-compatible across different versions of
+ * @note This serializer is not guaranteed to be wire-compatible across different versions of
  * Spark. It is intended to be used to serialize/de-serialize data within a single
  * Spark application.
  */
diff --git a/core/src/main/scala/org/apache/spark/serializer/Serializer.scala b/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
index cb95246d5b0ca..afe6cd86059f0 100644
--- a/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
@@ -40,7 +40,7 @@ import org.apache.spark.util.NextIterator
  *
  * 2. Java serialization interface.
  *
- * Note that serializers are not required to be wire-compatible across different versions of Spark.
+ * @note Serializers are not required to be wire-compatible across different versions of Spark.
  * They are intended to be used to serialize/de-serialize data within a single Spark application.
  */
 @DeveloperApi
diff --git a/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala b/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
index fb9941bbd9e0f..e12f2e6095d5a 100644
--- a/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
+++ b/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
@@ -71,7 +71,7 @@ class StorageStatus(val blockManagerId: BlockManagerId, val maxMem: Long) {
   /**
    * Return the blocks stored in this block manager.
    *
-   * Note that this is somewhat expensive, as it involves cloning the underlying maps and then
+   * @note This is somewhat expensive, as it involves cloning the underlying maps and then
    * concatenating them together. Much faster alternatives exist for common operations such as
    * contains, get, and size.
    */
@@ -80,7 +80,7 @@ class StorageStatus(val blockManagerId: BlockManagerId, val maxMem: Long) {
   /**
    * Return the RDD blocks stored in this block manager.
    *
-   * Note that this is somewhat expensive, as it involves cloning the underlying maps and then
+   * @note This is somewhat expensive, as it involves cloning the underlying maps and then
    * concatenating them together. Much faster alternatives exist for common operations such as
    * getting the memory, disk, and off-heap memory sizes occupied by this RDD.
    */
@@ -128,7 +128,8 @@ class StorageStatus(val blockManagerId: BlockManagerId, val maxMem: Long) {
 
   /**
    * Return whether the given block is stored in this block manager in O(1) time.
-   * Note that this is much faster than `this.blocks.contains`, which is O(blocks) time.
+   *
+   * @note This is much faster than `this.blocks.contains`, which is O(blocks) time.
    */
   def containsBlock(blockId: BlockId): Boolean = {
     blockId match {
@@ -141,7 +142,8 @@ class StorageStatus(val blockManagerId: BlockManagerId, val maxMem: Long) {
 
   /**
    * Return the given block stored in this block manager in O(1) time.
-   * Note that this is much faster than `this.blocks.get`, which is O(blocks) time.
+   *
+   * @note This is much faster than `this.blocks.get`, which is O(blocks) time.
    */
   def getBlock(blockId: BlockId): Option[BlockStatus] = {
     blockId match {
@@ -154,19 +156,22 @@ class StorageStatus(val blockManagerId: BlockManagerId, val maxMem: Long) {
 
   /**
    * Return the number of blocks stored in this block manager in O(RDDs) time.
-   * Note that this is much faster than `this.blocks.size`, which is O(blocks) time.
+   *
+   * @note This is much faster than `this.blocks.size`, which is O(blocks) time.
    */
   def numBlocks: Int = _nonRddBlocks.size + numRddBlocks
 
   /**
    * Return the number of RDD blocks stored in this block manager in O(RDDs) time.
-   * Note that this is much faster than `this.rddBlocks.size`, which is O(RDD blocks) time.
+   *
+   * @note This is much faster than `this.rddBlocks.size`, which is O(RDD blocks) time.
    */
   def numRddBlocks: Int = _rddBlocks.values.map(_.size).sum
 
   /**
    * Return the number of blocks that belong to the given RDD in O(1) time.
-   * Note that this is much faster than `this.rddBlocksById(rddId).size`, which is
+   *
+   * @note This is much faster than `this.rddBlocksById(rddId).size`, which is
    * O(blocks in this RDD) time.
    */
   def numRddBlocksById(rddId: Int): Int = _rddBlocks.get(rddId).map(_.size).getOrElse(0)
diff --git a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
index d3ddd39131326..1326f0977c241 100644
--- a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
+++ b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
@@ -59,8 +59,9 @@ abstract class AccumulatorV2[IN, OUT] extends Serializable {
   }
 
   /**
-   * Returns true if this accumulator has been registered.  Note that all accumulators must be
-   * registered before use, or it will throw exception.
+   * Returns true if this accumulator has been registered.
+   *
+   * @note All accumulators must be registered before use, or it will throw exception.
    */
   final def isRegistered: Boolean =
     metadata != null && AccumulatorContext.get(metadata.id).isDefined
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index bec95d13d193a..5e8a854e46a0f 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -2076,7 +2076,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with Timeou
   }
 
   /**
-   * Checks the DAGScheduler's internal logic for traversing a RDD DAG by making sure that
+   * Checks the DAGScheduler's internal logic for traversing an RDD DAG by making sure that
    * getShuffleDependencies correctly returns the direct shuffle dependencies of a particular
    * RDD. The test creates the following RDD graph (where n denotes a narrow dependency and s
    * denotes a shuffle dependency):
diff --git a/docs/mllib-isotonic-regression.md b/docs/mllib-isotonic-regression.md
index d90905a86ade9..ca84551506b2b 100644
--- a/docs/mllib-isotonic-regression.md
+++ b/docs/mllib-isotonic-regression.md
@@ -27,7 +27,7 @@ best fitting the original data points.
 [pool adjacent violators algorithm](http://doi.org/10.1198/TECH.2010.10111)
 which uses an approach to
 [parallelizing isotonic regression](http://doi.org/10.1007/978-3-642-99789-1_10).
-The training input is a RDD of tuples of three double values that represent
+The training input is an RDD of tuples of three double values that represent
 label, feature and weight in this order. Additionally IsotonicRegression algorithm has one
 optional parameter called $isotonic$ defaulting to true.
 This argument specifies if the isotonic regression is
diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index 0b0315b366501..18fc1cd934826 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -2191,7 +2191,7 @@ consistent batch processing times. Make sure you set the CMS GC on both the driv
 
 - When data is received from a stream source, receiver creates blocks of data.  A new block of data is generated every blockInterval milliseconds. N blocks of data are created during the batchInterval where N = batchInterval/blockInterval. These blocks are distributed by the BlockManager of the current executor to the block managers of other executors. After that, the Network Input Tracker running on the driver is informed about the block locations for further processing.
 
-- A RDD is created on the driver for the blocks created during the batchInterval. The blocks generated during the batchInterval are partitions of the RDD. Each partition is a task in spark. blockInterval== batchinterval would mean that a single partition is created and probably it is processed locally.
+- An RDD is created on the driver for the blocks created during the batchInterval. The blocks generated during the batchInterval are partitions of the RDD. Each partition is a task in spark. blockInterval== batchinterval would mean that a single partition is created and probably it is processed locally.
 
 - The map tasks on the blocks are processed in the executors (one that received the block, and another where the block was replicated) that has the blocks irrespective of block interval, unless non-local scheduling kicks in.
 Having bigger blockinterval means bigger blocks. A high value of `spark.locality.wait` increases the chance of processing a block on the local node. A balance needs to be found out between these two parameters to ensure that the bigger blocks are processed locally.
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
index 5bcc5124b0915..341081a338c0e 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
@@ -279,7 +279,7 @@ private[kafka010] case class KafkaSource(
       }
     }.toArray
 
-    // Create a RDD that reads from Kafka and get the (key, value) pair as byte arrays.
+    // Create an RDD that reads from Kafka and get the (key, value) pair as byte arrays.
     val rdd = new KafkaSourceRDD(
       sc, executorKafkaParams, offsetRanges, pollTimeoutMs).map { cr =>
       Row(cr.key, cr.value, cr.topic, cr.partition, cr.offset, cr.timestamp, cr.timestampType.id)
diff --git a/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala
index b17e198077949..56f0cb0b166a2 100644
--- a/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala
+++ b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala
@@ -223,7 +223,7 @@ object KafkaUtils {
   }
 
   /**
-   * Create a RDD from Kafka using offset ranges for each topic and partition.
+   * Create an RDD from Kafka using offset ranges for each topic and partition.
    *
    * @param sc SparkContext object
    * @param kafkaParams Kafka <a href="http://kafka.apache.org/documentation.html#configuration">
@@ -255,7 +255,7 @@ object KafkaUtils {
   }
 
   /**
-   * Create a RDD from Kafka using offset ranges for each topic and partition. This allows you
+   * Create an RDD from Kafka using offset ranges for each topic and partition. This allows you
    * specify the Kafka leader to connect to (to optimize fetching) and access the message as well
    * as the metadata.
    *
@@ -303,7 +303,7 @@ object KafkaUtils {
   }
 
   /**
-   * Create a RDD from Kafka using offset ranges for each topic and partition.
+   * Create an RDD from Kafka using offset ranges for each topic and partition.
    *
    * @param jsc JavaSparkContext object
    * @param kafkaParams Kafka <a href="http://kafka.apache.org/documentation.html#configuration">
@@ -340,7 +340,7 @@ object KafkaUtils {
   }
 
   /**
-   * Create a RDD from Kafka using offset ranges for each topic and partition. This allows you
+   * Create an RDD from Kafka using offset ranges for each topic and partition. This allows you
    * specify the Kafka leader to connect to (to optimize fetching) and access the message as well
    * as the metadata.
    *
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
index a0007d33d6257..b2daffa34ccbf 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
@@ -33,10 +33,6 @@ object KinesisUtils {
    * Create an input stream that pulls messages from a Kinesis stream.
    * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
    *
-   * Note: The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain
-   * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain
-   * gets the AWS credentials.
-   *
    * @param ssc StreamingContext object
    * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
    *                        (KCL) to update DynamoDB
@@ -57,6 +53,10 @@ object KinesisUtils {
    *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
    * @param messageHandler A custom message handler that can generate a generic output from a
    *                       Kinesis `Record`, which contains both message data, and metadata.
+   *
+   * @note The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain
+   * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain
+   * gets the AWS credentials.
    */
   def createStream[T: ClassTag](
       ssc: StreamingContext,
@@ -81,10 +81,6 @@ object KinesisUtils {
    * Create an input stream that pulls messages from a Kinesis stream.
    * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
    *
-   * Note:
-   *  The given AWS credentials will get saved in DStream checkpoints if checkpointing
-   *  is enabled. Make sure that your checkpoint directory is secure.
-   *
    * @param ssc StreamingContext object
    * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
    *                        (KCL) to update DynamoDB
@@ -107,6 +103,9 @@ object KinesisUtils {
    *                       Kinesis `Record`, which contains both message data, and metadata.
    * @param awsAccessKeyId  AWS AccessKeyId (if null, will use DefaultAWSCredentialsProviderChain)
    * @param awsSecretKey  AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain)
+   *
+   * @note The given AWS credentials will get saved in DStream checkpoints if checkpointing
+   * is enabled. Make sure that your checkpoint directory is secure.
    */
   // scalastyle:off
   def createStream[T: ClassTag](
@@ -134,10 +133,6 @@ object KinesisUtils {
    * Create an input stream that pulls messages from a Kinesis stream.
    * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
    *
-   * Note: The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain
-   * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain
-   * gets the AWS credentials.
-   *
    * @param ssc StreamingContext object
    * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
    *                        (KCL) to update DynamoDB
@@ -156,6 +151,10 @@ object KinesisUtils {
    *                            details on the different types of checkpoints.
    * @param storageLevel Storage level to use for storing the received objects.
    *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
+   *
+   * @note The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain
+   * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain
+   * gets the AWS credentials.
    */
   def createStream(
       ssc: StreamingContext,
@@ -178,10 +177,6 @@ object KinesisUtils {
    * Create an input stream that pulls messages from a Kinesis stream.
    * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
    *
-   * Note:
-   *  The given AWS credentials will get saved in DStream checkpoints if checkpointing
-   *  is enabled. Make sure that your checkpoint directory is secure.
-   *
    * @param ssc StreamingContext object
    * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
    *                        (KCL) to update DynamoDB
@@ -202,6 +197,9 @@ object KinesisUtils {
    *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
    * @param awsAccessKeyId  AWS AccessKeyId (if null, will use DefaultAWSCredentialsProviderChain)
    * @param awsSecretKey  AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain)
+   *
+   * @note The given AWS credentials will get saved in DStream checkpoints if checkpointing
+   * is enabled. Make sure that your checkpoint directory is secure.
    */
   def createStream(
       ssc: StreamingContext,
@@ -225,10 +223,6 @@ object KinesisUtils {
    * Create an input stream that pulls messages from a Kinesis stream.
    * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
    *
-   * Note: The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain
-   * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain
-   * gets the AWS credentials.
-   *
    * @param jssc Java StreamingContext object
    * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
    *                        (KCL) to update DynamoDB
@@ -250,6 +244,10 @@ object KinesisUtils {
    * @param messageHandler A custom message handler that can generate a generic output from a
    *                       Kinesis `Record`, which contains both message data, and metadata.
    * @param recordClass Class of the records in DStream
+   *
+   * @note The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain
+   * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain
+   * gets the AWS credentials.
    */
   def createStream[T](
       jssc: JavaStreamingContext,
@@ -272,10 +270,6 @@ object KinesisUtils {
    * Create an input stream that pulls messages from a Kinesis stream.
    * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
    *
-   * Note:
-   * The given AWS credentials will get saved in DStream checkpoints if checkpointing
-   * is enabled. Make sure that your checkpoint directory is secure.
-   *
    * @param jssc Java StreamingContext object
    * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
    *                        (KCL) to update DynamoDB
@@ -299,6 +293,9 @@ object KinesisUtils {
    * @param recordClass Class of the records in DStream
    * @param awsAccessKeyId  AWS AccessKeyId (if null, will use DefaultAWSCredentialsProviderChain)
    * @param awsSecretKey  AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain)
+   *
+   * @note The given AWS credentials will get saved in DStream checkpoints if checkpointing
+   * is enabled. Make sure that your checkpoint directory is secure.
    */
   // scalastyle:off
   def createStream[T](
@@ -326,10 +323,6 @@ object KinesisUtils {
    * Create an input stream that pulls messages from a Kinesis stream.
    * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
    *
-   * Note: The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain
-   * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain
-   * gets the AWS credentials.
-   *
    * @param jssc Java StreamingContext object
    * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
    *                        (KCL) to update DynamoDB
@@ -348,6 +341,10 @@ object KinesisUtils {
    *                            details on the different types of checkpoints.
    * @param storageLevel Storage level to use for storing the received objects.
    *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
+   *
+   * @note The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain
+   * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain
+   * gets the AWS credentials.
    */
   def createStream(
       jssc: JavaStreamingContext,
@@ -367,10 +364,6 @@ object KinesisUtils {
    * Create an input stream that pulls messages from a Kinesis stream.
    * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
    *
-   * Note:
-   * The given AWS credentials will get saved in DStream checkpoints if checkpointing
-   * is enabled. Make sure that your checkpoint directory is secure.
-   *
    * @param jssc Java StreamingContext object
    * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
    *                        (KCL) to update DynamoDB
@@ -391,6 +384,9 @@ object KinesisUtils {
    *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
    * @param awsAccessKeyId  AWS AccessKeyId (if null, will use DefaultAWSCredentialsProviderChain)
    * @param awsSecretKey  AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain)
+   *
+   * @note The given AWS credentials will get saved in DStream checkpoints if checkpointing
+   * is enabled. Make sure that your checkpoint directory is secure.
    */
   def createStream(
       jssc: JavaStreamingContext,
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDDSuite.scala b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDDSuite.scala
index 905c33834df16..a4d81a680979e 100644
--- a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDDSuite.scala
+++ b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDDSuite.scala
@@ -221,7 +221,7 @@ abstract class KinesisBackedBlockRDDTests(aggregateTestData: Boolean)
     assert(collectedData.toSet === testData.toSet)
 
     // Verify that the block fetching is skipped when isBlockValid is set to false.
-    // This is done by using a RDD whose data is only in memory but is set to skip block fetching
+    // This is done by using an RDD whose data is only in memory but is set to skip block fetching
     // Using that RDD will throw exception, as it skips block fetching even if the blocks are in
     // in BlockManager.
     if (testIsBlockValid) {
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
index e18831382d4d5..3810110099993 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
@@ -42,7 +42,7 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
 
   @transient override val edges: EdgeRDDImpl[ED, VD] = replicatedVertexView.edges
 
-  /** Return a RDD that brings edges together with their source and destination vertices. */
+  /** Return an RDD that brings edges together with their source and destination vertices. */
   @transient override lazy val triplets: RDD[EdgeTriplet[VD, ED]] = {
     replicatedVertexView.upgrade(vertices, true, true)
     replicatedVertexView.edges.partitionsRDD.mapPartitions(_.flatMap {
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
index c0c3c73463aab..f926984aa6335 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
@@ -58,7 +58,7 @@ import org.apache.spark.ml.linalg.{Vector, Vectors}
  * `alpha` is the random reset probability (typically 0.15), `inNbrs[i]` is the set of
  * neighbors which link to `i` and `outDeg[j]` is the out degree of vertex `j`.
  *
- * Note that this is not the "normalized" PageRank and as a consequence pages that have no
+ * @note This is not the "normalized" PageRank and as a consequence pages that have no
  * inlinks will have a PageRank of alpha.
  */
 object PageRank extends Logging {
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
index 2e4a58dc6291c..22e4ec693b1f7 100644
--- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
@@ -30,7 +30,7 @@ import org.apache.spark.annotation.Since
 /**
  * Represents a numeric vector, whose index type is Int and value type is Double.
  *
- * Note: Users should not implement this interface.
+ * @note Users should not implement this interface.
  */
 @Since("2.0.0")
 sealed trait Vector extends Serializable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Model.scala b/mllib/src/main/scala/org/apache/spark/ml/Model.scala
index 252acc156583f..c581fed177273 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Model.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Model.scala
@@ -30,7 +30,7 @@ import org.apache.spark.ml.param.ParamMap
 abstract class Model[M <: Model[M]] extends Transformer {
   /**
    * The parent estimator that produced this model.
-   * Note: For ensembles' component Models, this value can be null.
+   * @note For ensembles' component Models, this value can be null.
    */
   @transient var parent: Estimator[M] = _
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
index bb192ab5f25ab..7424031ed4608 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
@@ -207,9 +207,9 @@ class DecisionTreeClassificationModel private[ml] (
    *     where gain is scaled by the number of instances passing through node
    *   - Normalize importances for tree to sum to 1.
    *
-   * Note: Feature importance for single decision trees can have high variance due to
-   *       correlated predictor variables. Consider using a [[RandomForestClassifier]]
-   *       to determine feature importance instead.
+   * @note Feature importance for single decision trees can have high variance due to
+   * correlated predictor variables. Consider using a [[RandomForestClassifier]]
+   * to determine feature importance instead.
    */
   @Since("2.0.0")
   lazy val featureImportances: Vector = TreeEnsembleModel.featureImportances(this, numFeatures)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
index f8f164e8c14bd..52f93f5a6b345 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
@@ -43,7 +43,6 @@ import org.apache.spark.sql.types.DoubleType
  * Gradient-Boosted Trees (GBTs) (http://en.wikipedia.org/wiki/Gradient_boosting)
  * learning algorithm for classification.
  * It supports binary labels, as well as both continuous and categorical features.
- * Note: Multiclass labels are not currently supported.
  *
  * The implementation is based upon: J.H. Friedman. "Stochastic Gradient Boosting." 1999.
  *
@@ -54,6 +53,8 @@ import org.apache.spark.sql.types.DoubleType
  *    based on the loss function, whereas the original gradient boosting method does not.
  *  - We expect to implement TreeBoost in the future:
  *    [https://issues.apache.org/jira/browse/SPARK-4240]
+ *
+ * @note Multiclass labels are not currently supported.
  */
 @Since("1.4.0")
 class GBTClassifier @Since("1.4.0") (
@@ -169,10 +170,11 @@ object GBTClassifier extends DefaultParamsReadable[GBTClassifier] {
  * Gradient-Boosted Trees (GBTs) (http://en.wikipedia.org/wiki/Gradient_boosting)
  * model for classification.
  * It supports binary labels, as well as both continuous and categorical features.
- * Note: Multiclass labels are not currently supported.
  *
  * @param _trees  Decision trees in the ensemble.
  * @param _treeWeights  Weights for the decision trees in the ensemble.
+ *
+ * @note Multiclass labels are not currently supported.
  */
 @Since("1.6.0")
 class GBTClassificationModel private[ml](
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 18b9b3043db8a..71a7fe53c15f8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -1191,8 +1191,8 @@ class BinaryLogisticRegressionSummary private[classification] (
    * with (0.0, 0.0) prepended and (1.0, 1.0) appended to it.
    * See http://en.wikipedia.org/wiki/Receiver_operating_characteristic
    *
-   * Note: This ignores instance weights (setting all to 1.0) from `LogisticRegression.weightCol`.
-   *       This will change in later Spark versions.
+   * @note This ignores instance weights (setting all to 1.0) from `LogisticRegression.weightCol`.
+   * This will change in later Spark versions.
    */
   @Since("1.5.0")
   @transient lazy val roc: DataFrame = binaryMetrics.roc().toDF("FPR", "TPR")
@@ -1200,8 +1200,8 @@ class BinaryLogisticRegressionSummary private[classification] (
   /**
    * Computes the area under the receiver operating characteristic (ROC) curve.
    *
-   * Note: This ignores instance weights (setting all to 1.0) from `LogisticRegression.weightCol`.
-   *       This will change in later Spark versions.
+   * @note This ignores instance weights (setting all to 1.0) from `LogisticRegression.weightCol`.
+   * This will change in later Spark versions.
    */
   @Since("1.5.0")
   lazy val areaUnderROC: Double = binaryMetrics.areaUnderROC()
@@ -1210,8 +1210,8 @@ class BinaryLogisticRegressionSummary private[classification] (
    * Returns the precision-recall curve, which is a Dataframe containing
    * two fields recall, precision with (0.0, 1.0) prepended to it.
    *
-   * Note: This ignores instance weights (setting all to 1.0) from `LogisticRegression.weightCol`.
-   *       This will change in later Spark versions.
+   * @note This ignores instance weights (setting all to 1.0) from `LogisticRegression.weightCol`.
+   * This will change in later Spark versions.
    */
   @Since("1.5.0")
   @transient lazy val pr: DataFrame = binaryMetrics.pr().toDF("recall", "precision")
@@ -1219,8 +1219,8 @@ class BinaryLogisticRegressionSummary private[classification] (
   /**
    * Returns a dataframe with two fields (threshold, F-Measure) curve with beta = 1.0.
    *
-   * Note: This ignores instance weights (setting all to 1.0) from `LogisticRegression.weightCol`.
-   *       This will change in later Spark versions.
+   * @note This ignores instance weights (setting all to 1.0) from `LogisticRegression.weightCol`.
+   * This will change in later Spark versions.
    */
   @Since("1.5.0")
   @transient lazy val fMeasureByThreshold: DataFrame = {
@@ -1232,8 +1232,8 @@ class BinaryLogisticRegressionSummary private[classification] (
    * Every possible probability obtained in transforming the dataset are used
    * as thresholds used in calculating the precision.
    *
-   * Note: This ignores instance weights (setting all to 1.0) from `LogisticRegression.weightCol`.
-   *       This will change in later Spark versions.
+   * @note This ignores instance weights (setting all to 1.0) from `LogisticRegression.weightCol`.
+   * This will change in later Spark versions.
    */
   @Since("1.5.0")
   @transient lazy val precisionByThreshold: DataFrame = {
@@ -1245,8 +1245,8 @@ class BinaryLogisticRegressionSummary private[classification] (
    * Every possible probability obtained in transforming the dataset are used
    * as thresholds used in calculating the recall.
    *
-   * Note: This ignores instance weights (setting all to 1.0) from `LogisticRegression.weightCol`.
-   *       This will change in later Spark versions.
+   * @note This ignores instance weights (setting all to 1.0) from `LogisticRegression.weightCol`.
+   * This will change in later Spark versions.
    */
   @Since("1.5.0")
   @transient lazy val recallByThreshold: DataFrame = {
@@ -1401,18 +1401,18 @@ class BinaryLogisticRegressionSummary private[classification] (
  *    $$
  * </blockquote></p>
  *
- * @note In order to avoid unnecessary computation during calculation of the gradient updates
- *       we lay out the coefficients in column major order during training. This allows us to
- *       perform feature standardization once, while still retaining sequential memory access
- *       for speed. We convert back to row major order when we create the model,
- *       since this form is optimal for the matrix operations used for prediction.
- *
  * @param bcCoefficients The broadcast coefficients corresponding to the features.
  * @param bcFeaturesStd The broadcast standard deviation values of the features.
  * @param numClasses the number of possible outcomes for k classes classification problem in
  *                   Multinomial Logistic Regression.
  * @param fitIntercept Whether to fit an intercept term.
  * @param multinomial Whether to use multinomial (softmax) or binary loss
+ *
+ * @note In order to avoid unnecessary computation during calculation of the gradient updates
+ * we lay out the coefficients in column major order during training. This allows us to
+ * perform feature standardization once, while still retaining sequential memory access
+ * for speed. We convert back to row major order when we create the model,
+ * since this form is optimal for the matrix operations used for prediction.
  */
 private class LogisticAggregator(
     bcCoefficients: Broadcast[Vector],
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
index a0bd66e731a1d..c6035cc4c9647 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
@@ -268,9 +268,9 @@ object GaussianMixtureModel extends MLReadable[GaussianMixtureModel] {
  * While this process is generally guaranteed to converge, it is not guaranteed
  * to find a global optimum.
  *
- * Note: For high-dimensional data (with many features), this algorithm may perform poorly.
- *       This is due to high-dimensional data (a) making it difficult to cluster at all (based
- *       on statistical/theoretical arguments) and (b) numerical issues with Gaussian distributions.
+ * @note For high-dimensional data (with many features), this algorithm may perform poorly.
+ * This is due to high-dimensional data (a) making it difficult to cluster at all (based
+ * on statistical/theoretical arguments) and (b) numerical issues with Gaussian distributions.
  */
 @Since("2.0.0")
 @Experimental
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
index 28cbe1cb01e9a..ccfb0ce8f85ca 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
@@ -85,7 +85,8 @@ private[feature] trait MinMaxScalerParams extends Params with HasInputCol with H
  * </blockquote></p>
  *
  * For the case $E_{max} == E_{min}$, $Rescaled(e_i) = 0.5 * (max + min)$.
- * Note that since zero values will probably be transformed to non-zero values, output of the
+ *
+ * @note Since zero values will probably be transformed to non-zero values, output of the
  * transformer will be DenseVector even for sparse input.
  */
 @Since("1.5.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
index e8e28ba29c841..ea401216aec7b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
@@ -36,7 +36,8 @@ import org.apache.spark.sql.types.{DoubleType, NumericType, StructType}
  * The last category is not included by default (configurable via [[OneHotEncoder!.dropLast]]
  * because it makes the vector entries sum up to one, and hence linearly dependent.
  * So an input value of 4.0 maps to `[0.0, 0.0, 0.0, 0.0]`.
- * Note that this is different from scikit-learn's OneHotEncoder, which keeps all categories.
+ *
+ * @note This is different from scikit-learn's OneHotEncoder, which keeps all categories.
  * The output vectors are sparse.
  *
  * @see [[StringIndexer]] for converting categorical values into category indices
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
index 1e49352b8517e..6e08bf059124c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
@@ -142,8 +142,9 @@ class PCAModel private[ml] (
 
   /**
    * Transform a vector by computed Principal Components.
-   * NOTE: Vectors to be transformed must be the same length
-   * as the source vectors given to [[PCA.fit()]].
+   *
+   * @note Vectors to be transformed must be the same length as the source vectors given
+   * to [[PCA.fit()]].
    */
   @Since("2.0.0")
   override def transform(dataset: Dataset[_]): DataFrame = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
index 666070037cdd8..0ced21365ff6f 100755
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
@@ -28,7 +28,10 @@ import org.apache.spark.sql.types.{ArrayType, StringType, StructType}
 
 /**
  * A feature transformer that filters out stop words from input.
- * Note: null values from input array are preserved unless adding null to stopWords explicitly.
+ *
+ * @note null values from input array are preserved unless adding null to stopWords
+ * explicitly.
+ *
  * @see [[http://en.wikipedia.org/wiki/Stop_words]]
  */
 @Since("1.5.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
index 80fe46796f807..8b155f00017cf 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
@@ -113,11 +113,11 @@ object StringIndexer extends DefaultParamsReadable[StringIndexer] {
 /**
  * Model fitted by [[StringIndexer]].
  *
- * NOTE: During transformation, if the input column does not exist,
+ * @param labels  Ordered list of labels, corresponding to indices to be assigned.
+ *
+ * @note During transformation, if the input column does not exist,
  * [[StringIndexerModel.transform]] would return the input dataset unmodified.
  * This is a temporary fix for the case when target labels do not exist during prediction.
- *
- * @param labels  Ordered list of labels, corresponding to indices to be assigned.
  */
 @Since("1.4.0")
 class StringIndexerModel (
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
index 9245931b27ca6..96206e0b7ad88 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
@@ -533,7 +533,7 @@ trait Params extends Identifiable with Serializable {
    * Returns all params sorted by their names. The default implementation uses Java reflection to
    * list all public methods that have no arguments and return [[Param]].
    *
-   * Note: Developer should not use this method in constructor because we cannot guarantee that
+   * @note Developer should not use this method in constructor because we cannot guarantee that
    * this variable gets initialized before other params.
    */
   lazy val params: Array[Param[_]] = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
index ebc6c12ddcf92..1419da874709f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
@@ -207,9 +207,9 @@ class DecisionTreeRegressionModel private[ml] (
    *     where gain is scaled by the number of instances passing through node
    *   - Normalize importances for tree to sum to 1.
    *
-   * Note: Feature importance for single decision trees can have high variance due to
-   *       correlated predictor variables. Consider using a [[RandomForestRegressor]]
-   *       to determine feature importance instead.
+   * @note Feature importance for single decision trees can have high variance due to
+   * correlated predictor variables. Consider using a [[RandomForestRegressor]]
+   * to determine feature importance instead.
    */
   @Since("2.0.0")
   lazy val featureImportances: Vector = TreeEnsembleModel.featureImportances(this, numFeatures)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index 1d2961e0277f5..736fd3b9e0f64 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -879,8 +879,8 @@ class GeneralizedLinearRegressionSummary private[regression] (
    * Private copy of model to ensure Params are not modified outside this class.
    * Coefficients is not a deep copy, but that is acceptable.
    *
-   * NOTE: [[predictionCol]] must be set correctly before the value of [[model]] is set,
-   *       and [[model]] must be set before [[predictions]] is set!
+   * @note [[predictionCol]] must be set correctly before the value of [[model]] is set,
+   * and [[model]] must be set before [[predictions]] is set!
    */
   protected val model: GeneralizedLinearRegressionModel =
     origModel.copy(ParamMap.empty).setPredictionCol(predictionCol)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 71c542adf6f6f..da7ce6b46f2ab 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -103,11 +103,13 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
   /**
    * Whether to standardize the training features before fitting the model.
    * The coefficients of models will be always returned on the original scale,
-   * so it will be transparent for users. Note that with/without standardization,
-   * the models should be always converged to the same solution when no regularization
-   * is applied. In R's GLMNET package, the default behavior is true as well.
+   * so it will be transparent for users.
    * Default is true.
    *
+   * @note With/without standardization, the models should be always converged
+   * to the same solution when no regularization is applied. In R's GLMNET package,
+   * the default behavior is true as well.
+   *
    * @group setParam
    */
   @Since("1.5.0")
@@ -624,8 +626,8 @@ class LinearRegressionSummary private[regression] (
    * explainedVariance = 1 - variance(y - \hat{y}) / variance(y)
    * Reference: [[http://en.wikipedia.org/wiki/Explained_variation]]
    *
-   * Note: This ignores instance weights (setting all to 1.0) from [[LinearRegression.weightCol]].
-   *       This will change in later Spark versions.
+   * @note This ignores instance weights (setting all to 1.0) from [[LinearRegression.weightCol]].
+   * This will change in later Spark versions.
    */
   @Since("1.5.0")
   val explainedVariance: Double = metrics.explainedVariance
@@ -634,8 +636,8 @@ class LinearRegressionSummary private[regression] (
    * Returns the mean absolute error, which is a risk function corresponding to the
    * expected value of the absolute error loss or l1-norm loss.
    *
-   * Note: This ignores instance weights (setting all to 1.0) from [[LinearRegression.weightCol]].
-   *       This will change in later Spark versions.
+   * @note This ignores instance weights (setting all to 1.0) from [[LinearRegression.weightCol]].
+   * This will change in later Spark versions.
    */
   @Since("1.5.0")
   val meanAbsoluteError: Double = metrics.meanAbsoluteError
@@ -644,8 +646,8 @@ class LinearRegressionSummary private[regression] (
    * Returns the mean squared error, which is a risk function corresponding to the
    * expected value of the squared error loss or quadratic loss.
    *
-   * Note: This ignores instance weights (setting all to 1.0) from [[LinearRegression.weightCol]].
-   *       This will change in later Spark versions.
+   * @note This ignores instance weights (setting all to 1.0) from [[LinearRegression.weightCol]].
+   * This will change in later Spark versions.
    */
   @Since("1.5.0")
   val meanSquaredError: Double = metrics.meanSquaredError
@@ -654,8 +656,8 @@ class LinearRegressionSummary private[regression] (
    * Returns the root mean squared error, which is defined as the square root of
    * the mean squared error.
    *
-   * Note: This ignores instance weights (setting all to 1.0) from [[LinearRegression.weightCol]].
-   *       This will change in later Spark versions.
+   * @note This ignores instance weights (setting all to 1.0) from [[LinearRegression.weightCol]].
+   * This will change in later Spark versions.
    */
   @Since("1.5.0")
   val rootMeanSquaredError: Double = metrics.rootMeanSquaredError
@@ -664,8 +666,8 @@ class LinearRegressionSummary private[regression] (
    * Returns R^2^, the coefficient of determination.
    * Reference: [[http://en.wikipedia.org/wiki/Coefficient_of_determination]]
    *
-   * Note: This ignores instance weights (setting all to 1.0) from [[LinearRegression.weightCol]].
-   *       This will change in later Spark versions.
+   * @note This ignores instance weights (setting all to 1.0) from [[LinearRegression.weightCol]].
+   * This will change in later Spark versions.
    */
   @Since("1.5.0")
   val r2: Double = metrics.r2
diff --git a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMDataSource.scala b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMDataSource.scala
index 73d813064decb..e1376927030e4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMDataSource.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMDataSource.scala
@@ -48,7 +48,7 @@ import org.apache.spark.sql.{DataFrame, DataFrameReader}
  *    inconsistent feature dimensions.
  *  - "vectorType": feature vector type, "sparse" (default) or "dense".
  *
- * Note that this class is public for documentation purpose. Please don't use this class directly.
+ * @note This class is public for documentation purpose. Please don't use this class directly.
  * Rather, use the data source API as illustrated above.
  *
  * @see [[https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/ LIBSVM datasets]]
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala
index ede0a060eef95..0a0bc4c006389 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala
@@ -98,7 +98,7 @@ private[spark] object GradientBoostedTrees extends Logging {
    * @param initTreeWeight: learning rate assigned to the first tree.
    * @param initTree: first DecisionTreeModel.
    * @param loss: evaluation metric.
-   * @return a RDD with each element being a zip of the prediction and error
+   * @return an RDD with each element being a zip of the prediction and error
    *         corresponding to every sample.
    */
   def computeInitialPredictionAndError(
@@ -121,7 +121,7 @@ private[spark] object GradientBoostedTrees extends Logging {
    * @param treeWeight: Learning rate.
    * @param tree: Tree using which the prediction and error should be updated.
    * @param loss: evaluation metric.
-   * @return a RDD with each element being a zip of the prediction and error
+   * @return an RDD with each element being a zip of the prediction and error
    *         corresponding to each sample.
    */
   def updatePredictionError(
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
index bc4f9e6716ee8..e5fa5d53e3fca 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
@@ -221,7 +221,7 @@ trait MLReadable[T] {
   /**
    * Reads an ML instance from the input path, a shortcut of `read.load(path)`.
    *
-   * Note: Implementing classes should override this to be Java-friendly.
+   * @note Implementing classes should override this to be Java-friendly.
    */
   @Since("1.6.0")
   def load(path: String): T = read.load(path)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index d851b983349c9..4b650000736e2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -202,9 +202,11 @@ object LogisticRegressionModel extends Loader[LogisticRegressionModel] {
  * Train a classification model for Binary Logistic Regression
  * using Stochastic Gradient Descent. By default L2 regularization is used,
  * which can be changed via `LogisticRegressionWithSGD.optimizer`.
- * NOTE: Labels used in Logistic Regression should be {0, 1, ..., k - 1}
- * for k classes multi-label classification problem.
+ *
  * Using [[LogisticRegressionWithLBFGS]] is recommended over this.
+ *
+ * @note Labels used in Logistic Regression should be {0, 1, ..., k - 1}
+ * for k classes multi-label classification problem.
  */
 @Since("0.8.0")
 class LogisticRegressionWithSGD private[mllib] (
@@ -239,7 +241,8 @@ class LogisticRegressionWithSGD private[mllib] (
 
 /**
  * Top-level methods for calling Logistic Regression using Stochastic Gradient Descent.
- * NOTE: Labels used in Logistic Regression should be {0, 1}
+ *
+ * @note Labels used in Logistic Regression should be {0, 1}
  */
 @Since("0.8.0")
 @deprecated("Use ml.classification.LogisticRegression or LogisticRegressionWithLBFGS", "2.0.0")
@@ -252,7 +255,6 @@ object LogisticRegressionWithSGD {
    * number of iterations of gradient descent using the specified step size. Each iteration uses
    * `miniBatchFraction` fraction of the data to calculate the gradient. The weights used in
    * gradient descent are initialized using the initial weights provided.
-   * NOTE: Labels used in Logistic Regression should be {0, 1}
    *
    * @param input RDD of (label, array of features) pairs.
    * @param numIterations Number of iterations of gradient descent to run.
@@ -260,6 +262,8 @@ object LogisticRegressionWithSGD {
    * @param miniBatchFraction Fraction of data to be used per iteration.
    * @param initialWeights Initial set of weights to be used. Array should be equal in size to
    *        the number of features in the data.
+   *
+   * @note Labels used in Logistic Regression should be {0, 1}
    */
   @Since("1.0.0")
   def train(
@@ -276,13 +280,13 @@ object LogisticRegressionWithSGD {
    * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed
    * number of iterations of gradient descent using the specified step size. Each iteration uses
    * `miniBatchFraction` fraction of the data to calculate the gradient.
-   * NOTE: Labels used in Logistic Regression should be {0, 1}
    *
    * @param input RDD of (label, array of features) pairs.
    * @param numIterations Number of iterations of gradient descent to run.
    * @param stepSize Step size to be used for each iteration of gradient descent.
-
    * @param miniBatchFraction Fraction of data to be used per iteration.
+   *
+   * @note Labels used in Logistic Regression should be {0, 1}
    */
   @Since("1.0.0")
   def train(
@@ -298,13 +302,13 @@ object LogisticRegressionWithSGD {
    * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed
    * number of iterations of gradient descent using the specified step size. We use the entire data
    * set to update the gradient in each iteration.
-   * NOTE: Labels used in Logistic Regression should be {0, 1}
    *
    * @param input RDD of (label, array of features) pairs.
    * @param stepSize Step size to be used for each iteration of Gradient Descent.
-
    * @param numIterations Number of iterations of gradient descent to run.
    * @return a LogisticRegressionModel which has the weights and offset from training.
+   *
+   * @note Labels used in Logistic Regression should be {0, 1}
    */
   @Since("1.0.0")
   def train(
@@ -318,11 +322,12 @@ object LogisticRegressionWithSGD {
    * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed
    * number of iterations of gradient descent using a step size of 1.0. We use the entire data set
    * to update the gradient in each iteration.
-   * NOTE: Labels used in Logistic Regression should be {0, 1}
    *
    * @param input RDD of (label, array of features) pairs.
    * @param numIterations Number of iterations of gradient descent to run.
    * @return a LogisticRegressionModel which has the weights and offset from training.
+   *
+   * @note Labels used in Logistic Regression should be {0, 1}
    */
   @Since("1.0.0")
   def train(
@@ -335,8 +340,6 @@ object LogisticRegressionWithSGD {
 /**
  * Train a classification model for Multinomial/Binary Logistic Regression using
  * Limited-memory BFGS. Standard feature scaling and L2 regularization are used by default.
- * NOTE: Labels used in Logistic Regression should be {0, 1, ..., k - 1}
- * for k classes multi-label classification problem.
  *
  * Earlier implementations of LogisticRegressionWithLBFGS applies a regularization
  * penalty to all elements including the intercept. If this is called with one of
@@ -344,6 +347,9 @@ object LogisticRegressionWithSGD {
  * into a call to ml.LogisticRegression, otherwise this will use the existing mllib
  * GeneralizedLinearAlgorithm trainer, resulting in a regularization penalty to the
  * intercept.
+ *
+ * @note Labels used in Logistic Regression should be {0, 1, ..., k - 1}
+ * for k classes multi-label classification problem.
  */
 @Since("1.1.0")
 class LogisticRegressionWithLBFGS
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
index 7c3ccbb40b812..aec1526b55c49 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
@@ -125,7 +125,8 @@ object SVMModel extends Loader[SVMModel] {
 /**
  * Train a Support Vector Machine (SVM) using Stochastic Gradient Descent. By default L2
  * regularization is used, which can be changed via [[SVMWithSGD.optimizer]].
- * NOTE: Labels used in SVM should be {0, 1}.
+ *
+ * @note Labels used in SVM should be {0, 1}.
  */
 @Since("0.8.0")
 class SVMWithSGD private (
@@ -158,7 +159,9 @@ class SVMWithSGD private (
 }
 
 /**
- * Top-level methods for calling SVM. NOTE: Labels used in SVM should be {0, 1}.
+ * Top-level methods for calling SVM.
+ *
+ * @note Labels used in SVM should be {0, 1}.
  */
 @Since("0.8.0")
 object SVMWithSGD {
@@ -169,8 +172,6 @@ object SVMWithSGD {
    * `miniBatchFraction` fraction of the data to calculate the gradient. The weights used in
    * gradient descent are initialized using the initial weights provided.
    *
-   * NOTE: Labels used in SVM should be {0, 1}.
-   *
    * @param input RDD of (label, array of features) pairs.
    * @param numIterations Number of iterations of gradient descent to run.
    * @param stepSize Step size to be used for each iteration of gradient descent.
@@ -178,6 +179,8 @@ object SVMWithSGD {
    * @param miniBatchFraction Fraction of data to be used per iteration.
    * @param initialWeights Initial set of weights to be used. Array should be equal in size to
    *        the number of features in the data.
+   *
+   * @note Labels used in SVM should be {0, 1}.
    */
   @Since("0.8.0")
   def train(
@@ -195,7 +198,8 @@ object SVMWithSGD {
    * Train a SVM model given an RDD of (label, features) pairs. We run a fixed number
    * of iterations of gradient descent using the specified step size. Each iteration uses
    * `miniBatchFraction` fraction of the data to calculate the gradient.
-   * NOTE: Labels used in SVM should be {0, 1}
+   *
+   * @note Labels used in SVM should be {0, 1}
    *
    * @param input RDD of (label, array of features) pairs.
    * @param numIterations Number of iterations of gradient descent to run.
@@ -217,13 +221,14 @@ object SVMWithSGD {
    * Train a SVM model given an RDD of (label, features) pairs. We run a fixed number
    * of iterations of gradient descent using the specified step size. We use the entire data set to
    * update the gradient in each iteration.
-   * NOTE: Labels used in SVM should be {0, 1}
    *
    * @param input RDD of (label, array of features) pairs.
    * @param stepSize Step size to be used for each iteration of Gradient Descent.
    * @param regParam Regularization parameter.
    * @param numIterations Number of iterations of gradient descent to run.
    * @return a SVMModel which has the weights and offset from training.
+   *
+   * @note Labels used in SVM should be {0, 1}
    */
   @Since("0.8.0")
   def train(
@@ -238,11 +243,12 @@ object SVMWithSGD {
    * Train a SVM model given an RDD of (label, features) pairs. We run a fixed number
    * of iterations of gradient descent using a step size of 1.0. We use the entire data set to
    * update the gradient in each iteration.
-   * NOTE: Labels used in SVM should be {0, 1}
    *
    * @param input RDD of (label, array of features) pairs.
    * @param numIterations Number of iterations of gradient descent to run.
    * @return a SVMModel which has the weights and offset from training.
+   *
+   * @note Labels used in SVM should be {0, 1}
    */
   @Since("0.8.0")
   def train(input: RDD[LabeledPoint], numIterations: Int): SVMModel = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
index 43193adf3e184..56cdeea5f7a3f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
@@ -41,14 +41,14 @@ import org.apache.spark.util.Utils
  * While this process is generally guaranteed to converge, it is not guaranteed
  * to find a global optimum.
  *
- * Note: For high-dimensional data (with many features), this algorithm may perform poorly.
- *       This is due to high-dimensional data (a) making it difficult to cluster at all (based
- *       on statistical/theoretical arguments) and (b) numerical issues with Gaussian distributions.
- *
  * @param k Number of independent Gaussians in the mixture model.
  * @param convergenceTol Maximum change in log-likelihood at which convergence
  *                       is considered to have occurred.
  * @param maxIterations Maximum number of iterations allowed.
+ *
+ * @note For high-dimensional data (with many features), this algorithm may perform poorly.
+ * This is due to high-dimensional data (a) making it difficult to cluster at all (based
+ * on statistical/theoretical arguments) and (b) numerical issues with Gaussian distributions.
  */
 @Since("1.3.0")
 class GaussianMixture private (
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index ed9c064879d01..fa72b72e2d921 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -56,14 +56,18 @@ class KMeans private (
   def this() = this(2, 20, KMeans.K_MEANS_PARALLEL, 2, 1e-4, Utils.random.nextLong())
 
   /**
-   * Number of clusters to create (k). Note that it is possible for fewer than k clusters to
+   * Number of clusters to create (k).
+   *
+   * @note It is possible for fewer than k clusters to
    * be returned, for example, if there are fewer than k distinct points to cluster.
    */
   @Since("1.4.0")
   def getK: Int = k
 
   /**
-   * Set the number of clusters to create (k). Note that it is possible for fewer than k clusters to
+   * Set the number of clusters to create (k).
+   *
+   * @note It is possible for fewer than k clusters to
    * be returned, for example, if there are fewer than k distinct points to cluster. Default: 2.
    */
   @Since("0.8.0")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
index d999b9be8e8ac..7c52abdeaac22 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
@@ -175,7 +175,7 @@ class LDA private (
    *
    * This is the parameter to a symmetric Dirichlet distribution.
    *
-   * Note: The topics' distributions over terms are called "beta" in the original LDA paper
+   * @note The topics' distributions over terms are called "beta" in the original LDA paper
    * by Blei et al., but are called "phi" in many later papers such as Asuncion et al., 2009.
    */
   @Since("1.3.0")
@@ -187,7 +187,7 @@ class LDA private (
    *
    * This is the parameter to a symmetric Dirichlet distribution.
    *
-   * Note: The topics' distributions over terms are called "beta" in the original LDA paper
+   * @note The topics' distributions over terms are called "beta" in the original LDA paper
    * by Blei et al., but are called "phi" in many later papers such as Asuncion et al., 2009.
    *
    * If set to -1, then topicConcentration is set automatically.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
index 90d8a558f10d4..b5b0e64a2a6c6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
@@ -66,7 +66,7 @@ abstract class LDAModel private[clustering] extends Saveable {
    *
    * This is the parameter to a symmetric Dirichlet distribution.
    *
-   * Note: The topics' distributions over terms are called "beta" in the original LDA paper
+   * @note The topics' distributions over terms are called "beta" in the original LDA paper
    * by Blei et al., but are called "phi" in many later papers such as Asuncion et al., 2009.
    */
   @Since("1.5.0")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
index ae324f86fe6d1..7365ea1f200da 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
@@ -93,9 +93,11 @@ final class EMLDAOptimizer extends LDAOptimizer {
   /**
    * If using checkpointing, this indicates whether to keep the last checkpoint (vs clean up).
    * Deleting the checkpoint can cause failures if a data partition is lost, so set this bit with
-   * care.  Note that checkpoints will be cleaned up via reference counting, regardless.
+   * care.
    *
    * Default: true
+   *
+   * @note Checkpoints will be cleaned up via reference counting, regardless.
    */
   @Since("2.0.0")
   def setKeepLastCheckpoint(keepLastCheckpoint: Boolean): this.type = {
@@ -348,7 +350,7 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
    * Mini-batch fraction in (0, 1], which sets the fraction of document sampled and used in
    * each iteration.
    *
-   * Note that this should be adjusted in synch with [[LDA.setMaxIterations()]]
+   * @note This should be adjusted in synch with [[LDA.setMaxIterations()]]
    * so the entire corpus is used.  Specifically, set both so that
    * maxIterations * miniBatchFraction >= 1.
    *
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/AreaUnderCurve.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/AreaUnderCurve.scala
index f0779491e6374..003d1411a9cf7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/AreaUnderCurve.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/AreaUnderCurve.scala
@@ -39,7 +39,7 @@ private[evaluation] object AreaUnderCurve {
   /**
    * Returns the area under the given curve.
    *
-   * @param curve a RDD of ordered 2D points stored in pairs representing a curve
+   * @param curve an RDD of ordered 2D points stored in pairs representing a curve
    */
   def of(curve: RDD[(Double, Double)]): Double = {
     curve.sliding(2).aggregate(0.0)(
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
index fbd217af74ecb..c94d7890cf557 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
@@ -40,7 +40,7 @@ import org.apache.spark.sql.types._
 /**
  * Represents a numeric vector, whose index type is Int and value type is Double.
  *
- * Note: Users should not implement this interface.
+ * @note Users should not implement this interface.
  */
 @SQLUserDefinedType(udt = classOf[VectorUDT])
 @Since("1.0.0")
@@ -132,7 +132,9 @@ sealed trait Vector extends Serializable {
 
   /**
    * Number of active entries.  An "active entry" is an element which is explicitly stored,
-   * regardless of its value.  Note that inactive entries have value 0.
+   * regardless of its value.
+   *
+   * @note Inactive entries have value 0.
    */
   @Since("1.4.0")
   def numActives: Int
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
index 377be6bfb9886..03866753b50ee 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
@@ -451,7 +451,7 @@ class BlockMatrix @Since("1.3.0") (
    * [[BlockMatrix]] will only consist of blocks of [[DenseMatrix]]. This may cause
    * some performance issues until support for multiplying two sparse matrices is added.
    *
-   * Note: The behavior of multiply has changed in 1.6.0. `multiply` used to throw an error when
+   * @note The behavior of multiply has changed in 1.6.0. `multiply` used to throw an error when
    * there were blocks with duplicate indices. Now, the blocks with duplicate indices will be added
    * with each other.
    */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
index b03b3ecde94f4..809906a158337 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
@@ -188,8 +188,9 @@ class IndexedRowMatrix @Since("1.0.0") (
   }
 
   /**
-   * Computes the Gramian matrix `A^T A`. Note that this cannot be
-   * computed on matrices with more than 65535 columns.
+   * Computes the Gramian matrix `A^T A`.
+   *
+   * @note This cannot be computed on matrices with more than 65535 columns.
    */
   @Since("1.0.0")
   def computeGramianMatrix(): Matrix = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
index ec32e37afb792..4b120332ab8d8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
@@ -106,8 +106,9 @@ class RowMatrix @Since("1.0.0") (
   }
 
   /**
-   * Computes the Gramian matrix `A^T A`. Note that this cannot be computed on matrices with
-   * more than 65535 columns.
+   * Computes the Gramian matrix `A^T A`.
+   *
+   * @note This cannot be computed on matrices with more than 65535 columns.
    */
   @Since("1.0.0")
   def computeGramianMatrix(): Matrix = {
@@ -168,9 +169,6 @@ class RowMatrix @Since("1.0.0") (
    * ARPACK is set to 300 or k * 3, whichever is larger. The numerical tolerance for ARPACK's
    * eigen-decomposition is set to 1e-10.
    *
-   * @note The conditions that decide which method to use internally and the default parameters are
-   *       subject to change.
-   *
    * @param k number of leading singular values to keep (0 &lt; k &lt;= n).
    *          It might return less than k if
    *          there are numerically zero singular values or there are not enough Ritz values
@@ -180,6 +178,9 @@ class RowMatrix @Since("1.0.0") (
    * @param rCond the reciprocal condition number. All singular values smaller than rCond * sigma(0)
    *              are treated as zero, where sigma(0) is the largest singular value.
    * @return SingularValueDecomposition(U, s, V). U = null if computeU = false.
+   *
+   * @note The conditions that decide which method to use internally and the default parameters are
+   * subject to change.
    */
   @Since("1.0.0")
   def computeSVD(
@@ -319,9 +320,11 @@ class RowMatrix @Since("1.0.0") (
   }
 
   /**
-   * Computes the covariance matrix, treating each row as an observation. Note that this cannot
-   * be computed on matrices with more than 65535 columns.
+   * Computes the covariance matrix, treating each row as an observation.
+   *
    * @return a local dense matrix of size n x n
+   *
+   * @note This cannot be computed on matrices with more than 65535 columns.
    */
   @Since("1.0.0")
   def computeCovariance(): Matrix = {
@@ -369,12 +372,12 @@ class RowMatrix @Since("1.0.0") (
    * The row data do not need to be "centered" first; it is not necessary for
    * the mean of each column to be 0.
    *
-   * Note that this cannot be computed on matrices with more than 65535 columns.
-   *
    * @param k number of top principal components.
    * @return a matrix of size n-by-k, whose columns are principal components, and
    * a vector of values which indicate how much variance each principal component
    * explains
+   *
+   * @note This cannot be computed on matrices with more than 65535 columns.
    */
   @Since("1.6.0")
   def computePrincipalComponentsAndExplainedVariance(k: Int): (Matrix, Vector) = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
index 81e64de4e5b5d..c49e72646bf13 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
@@ -305,7 +305,8 @@ class LeastSquaresGradient extends Gradient {
  * :: DeveloperApi ::
  * Compute gradient and loss for a Hinge loss function, as used in SVM binary classification.
  * See also the documentation for the precise formulation.
- * NOTE: This assumes that the labels are {0,1}
+ *
+ * @note This assumes that the labels are {0,1}
  */
 @DeveloperApi
 class HingeGradient extends Gradient {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala b/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala
index 0f7857b8d8627..005119616f063 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala
@@ -31,7 +31,7 @@ import org.apache.spark.rdd.RDD
 class RDDFunctions[T: ClassTag](self: RDD[T]) extends Serializable {
 
   /**
-   * Returns a RDD from grouping items of its parent RDD in fixed size blocks by passing a sliding
+   * Returns an RDD from grouping items of its parent RDD in fixed size blocks by passing a sliding
    * window over them. The ordering is first based on the partition index and then the ordering of
    * items within each partition. This is similar to sliding in Scala collections, except that it
    * becomes an empty RDD if the window size is greater than the total number of items. It needs to
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
index c642573ccba6d..24e4dcccc843f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
@@ -43,14 +43,14 @@ import org.apache.spark.storage.StorageLevel
 /**
  * Model representing the result of matrix factorization.
  *
- * Note: If you create the model directly using constructor, please be aware that fast prediction
- * requires cached user/product features and their associated partitioners.
- *
  * @param rank Rank for the features in this model.
  * @param userFeatures RDD of tuples where each tuple represents the userId and
  *                     the features computed for this user.
  * @param productFeatures RDD of tuples where each tuple represents the productId
  *                        and the features computed for this product.
+ *
+ * @note If you create the model directly using constructor, please be aware that fast prediction
+ * requires cached user/product features and their associated partitioners.
  */
 @Since("0.8.0")
 class MatrixFactorizationModel @Since("0.8.0") (
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
index f3159f7e724cc..925fdf4d7e7bc 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
@@ -60,15 +60,15 @@ object Statistics {
    * Compute the correlation matrix for the input RDD of Vectors using the specified method.
    * Methods currently supported: `pearson` (default), `spearman`.
    *
-   * Note that for Spearman, a rank correlation, we need to create an RDD[Double] for each column
-   * and sort it in order to retrieve the ranks and then join the columns back into an RDD[Vector],
-   * which is fairly costly. Cache the input RDD before calling corr with `method = "spearman"` to
-   * avoid recomputing the common lineage.
-   *
    * @param X an RDD[Vector] for which the correlation matrix is to be computed.
    * @param method String specifying the method to use for computing correlation.
    *               Supported: `pearson` (default), `spearman`
    * @return Correlation matrix comparing columns in X.
+   *
+   * @note For Spearman, a rank correlation, we need to create an RDD[Double] for each column
+   * and sort it in order to retrieve the ranks and then join the columns back into an RDD[Vector],
+   * which is fairly costly. Cache the input RDD before calling corr with `method = "spearman"` to
+   * avoid recomputing the common lineage.
    */
   @Since("1.1.0")
   def corr(X: RDD[Vector], method: String): Matrix = Correlations.corrMatrix(X, method)
@@ -77,12 +77,12 @@ object Statistics {
    * Compute the Pearson correlation for the input RDDs.
    * Returns NaN if either vector has 0 variance.
    *
-   * Note: the two input RDDs need to have the same number of partitions and the same number of
-   * elements in each partition.
-   *
    * @param x RDD[Double] of the same cardinality as y.
    * @param y RDD[Double] of the same cardinality as x.
    * @return A Double containing the Pearson correlation between the two input RDD[Double]s
+   *
+   * @note The two input RDDs need to have the same number of partitions and the same number of
+   * elements in each partition.
    */
   @Since("1.1.0")
   def corr(x: RDD[Double], y: RDD[Double]): Double = Correlations.corr(x, y)
@@ -98,15 +98,15 @@ object Statistics {
    * Compute the correlation for the input RDDs using the specified method.
    * Methods currently supported: `pearson` (default), `spearman`.
    *
-   * Note: the two input RDDs need to have the same number of partitions and the same number of
-   * elements in each partition.
-   *
    * @param x RDD[Double] of the same cardinality as y.
    * @param y RDD[Double] of the same cardinality as x.
    * @param method String specifying the method to use for computing correlation.
    *               Supported: `pearson` (default), `spearman`
    * @return A Double containing the correlation between the two input RDD[Double]s using the
    *         specified method.
+   *
+   * @note The two input RDDs need to have the same number of partitions and the same number of
+   * elements in each partition.
    */
   @Since("1.1.0")
   def corr(x: RDD[Double], y: RDD[Double], method: String): Double = Correlations.corr(x, y, method)
@@ -122,15 +122,15 @@ object Statistics {
    * Conduct Pearson's chi-squared goodness of fit test of the observed data against the
    * expected distribution.
    *
-   * Note: the two input Vectors need to have the same size.
-   *       `observed` cannot contain negative values.
-   *       `expected` cannot contain nonpositive values.
-   *
    * @param observed Vector containing the observed categorical counts/relative frequencies.
    * @param expected Vector containing the expected categorical counts/relative frequencies.
    *                 `expected` is rescaled if the `expected` sum differs from the `observed` sum.
    * @return ChiSquaredTest object containing the test statistic, degrees of freedom, p-value,
    *         the method used, and the null hypothesis.
+   *
+   * @note The two input Vectors need to have the same size.
+   * `observed` cannot contain negative values.
+   * `expected` cannot contain nonpositive values.
    */
   @Since("1.1.0")
   def chiSqTest(observed: Vector, expected: Vector): ChiSqTestResult = {
@@ -141,11 +141,11 @@ object Statistics {
    * Conduct Pearson's chi-squared goodness of fit test of the observed data against the uniform
    * distribution, with each category having an expected frequency of `1 / observed.size`.
    *
-   * Note: `observed` cannot contain negative values.
-   *
    * @param observed Vector containing the observed categorical counts/relative frequencies.
    * @return ChiSquaredTest object containing the test statistic, degrees of freedom, p-value,
    *         the method used, and the null hypothesis.
+   *
+   * @note `observed` cannot contain negative values.
    */
   @Since("1.1.0")
   def chiSqTest(observed: Vector): ChiSqTestResult = ChiSqTest.chiSquared(observed)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
index 36feab7859b43..d846c43cf2913 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
@@ -75,10 +75,6 @@ object DecisionTree extends Serializable with Logging {
    * Method to train a decision tree model.
    * The method supports binary and multiclass classification and regression.
    *
-   * Note: Using [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]]
-   *       and [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
-   *       is recommended to clearly separate classification and regression.
-   *
    * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
    *              For classification, labels should take values {0, 1, ..., numClasses-1}.
    *              For regression, labels are real numbers.
@@ -86,6 +82,10 @@ object DecisionTree extends Serializable with Logging {
    *                 of decision tree (classification or regression), feature type (continuous,
    *                 categorical), depth of the tree, quantile calculation strategy, etc.
    * @return DecisionTreeModel that can be used for prediction.
+   *
+   * @note Using [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]]
+   * and [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
+   * is recommended to clearly separate classification and regression.
    */
   @Since("1.0.0")
   def train(input: RDD[LabeledPoint], strategy: Strategy): DecisionTreeModel = {
@@ -96,10 +96,6 @@ object DecisionTree extends Serializable with Logging {
    * Method to train a decision tree model.
    * The method supports binary and multiclass classification and regression.
    *
-   * Note: Using [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]]
-   *       and [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
-   *       is recommended to clearly separate classification and regression.
-   *
    * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
    *              For classification, labels should take values {0, 1, ..., numClasses-1}.
    *              For regression, labels are real numbers.
@@ -108,6 +104,10 @@ object DecisionTree extends Serializable with Logging {
    * @param maxDepth Maximum depth of the tree (e.g. depth 0 means 1 leaf node, depth 1 means
    *                 1 internal node + 2 leaf nodes).
    * @return DecisionTreeModel that can be used for prediction.
+   *
+   * @note Using [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]]
+   * and [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
+   * is recommended to clearly separate classification and regression.
    */
   @Since("1.0.0")
   def train(
@@ -123,10 +123,6 @@ object DecisionTree extends Serializable with Logging {
    * Method to train a decision tree model.
    * The method supports binary and multiclass classification and regression.
    *
-   * Note: Using [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]]
-   *       and [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
-   *       is recommended to clearly separate classification and regression.
-   *
    * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
    *              For classification, labels should take values {0, 1, ..., numClasses-1}.
    *              For regression, labels are real numbers.
@@ -136,6 +132,10 @@ object DecisionTree extends Serializable with Logging {
    *                 1 internal node + 2 leaf nodes).
    * @param numClasses Number of classes for classification. Default value of 2.
    * @return DecisionTreeModel that can be used for prediction.
+   *
+   * @note Using [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]]
+   * and [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
+   * is recommended to clearly separate classification and regression.
    */
   @Since("1.2.0")
   def train(
@@ -152,10 +152,6 @@ object DecisionTree extends Serializable with Logging {
    * Method to train a decision tree model.
    * The method supports binary and multiclass classification and regression.
    *
-   * Note: Using [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]]
-   *       and [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
-   *       is recommended to clearly separate classification and regression.
-   *
    * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
    *              For classification, labels should take values {0, 1, ..., numClasses-1}.
    *              For regression, labels are real numbers.
@@ -170,6 +166,10 @@ object DecisionTree extends Serializable with Logging {
    *                                indicates that feature n is categorical with k categories
    *                                indexed from 0: {0, 1, ..., k-1}.
    * @return DecisionTreeModel that can be used for prediction.
+   *
+   * @note Using [[org.apache.spark.mllib.tree.DecisionTree$#trainClassifier]]
+   * and [[org.apache.spark.mllib.tree.DecisionTree$#trainRegressor]]
+   * is recommended to clearly separate classification and regression.
    */
   @Since("1.0.0")
   def train(
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala
index de14ddf024d75..09274a2e1b2ac 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala
@@ -42,11 +42,13 @@ trait Loss extends Serializable {
 
   /**
    * Method to calculate error of the base learner for the gradient boosting calculation.
-   * Note: This method is not used by the gradient boosting algorithm but is useful for debugging
-   * purposes.
+   *
    * @param model Model of the weak learner.
    * @param data Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
    * @return Measure of model error on data
+   *
+   * @note This method is not used by the gradient boosting algorithm but is useful for debugging
+   * purposes.
    */
   @Since("1.2.0")
   def computeError(model: TreeEnsembleModel, data: RDD[LabeledPoint]): Double = {
@@ -55,11 +57,13 @@ trait Loss extends Serializable {
 
   /**
    * Method to calculate loss when the predictions are already known.
-   * Note: This method is used in the method evaluateEachIteration to avoid recomputing the
-   * predicted values from previously fit trees.
+   *
    * @param prediction Predicted label.
    * @param label True label.
    * @return Measure of model error on datapoint.
+   *
+   * @note This method is used in the method evaluateEachIteration to avoid recomputing the
+   * predicted values from previously fit trees.
    */
   private[spark] def computeError(prediction: Double, label: Double): Double
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
index 657ed0a8ecda8..299950785e420 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
@@ -187,7 +187,7 @@ object GradientBoostedTreesModel extends Loader[GradientBoostedTreesModel] {
    * @param initTreeWeight: learning rate assigned to the first tree.
    * @param initTree: first DecisionTreeModel.
    * @param loss: evaluation metric.
-   * @return a RDD with each element being a zip of the prediction and error
+   * @return an RDD with each element being a zip of the prediction and error
    *         corresponding to every sample.
    */
   @Since("1.4.0")
@@ -213,7 +213,7 @@ object GradientBoostedTreesModel extends Loader[GradientBoostedTreesModel] {
    * @param treeWeight: Learning rate.
    * @param tree: Tree using which the prediction and error should be updated.
    * @param loss: evaluation metric.
-   * @return a RDD with each element being a zip of the prediction and error
+   * @return an RDD with each element being a zip of the prediction and error
    *         corresponding to each sample.
    */
   @Since("1.4.0")
diff --git a/pom.xml b/pom.xml
index 650b4cd965b66..024b2850d0a3d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2476,6 +2476,13 @@
             <artifactId>maven-javadoc-plugin</artifactId>
             <configuration>
               <additionalparam>-Xdoclint:all -Xdoclint:-missing</additionalparam>
+              <tags>
+                <tag>
+                  <name>note</name>
+                  <placement>a</placement>
+                  <head>Note:</head>
+                </tag>
+              </tags>
             </configuration>
           </plugin>
         </plugins>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 2d3a95b163a76..92b45657210e1 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -741,7 +741,8 @@ object Unidoc {
     javacOptions in (JavaUnidoc, unidoc) := Seq(
       "-windowtitle", "Spark " + version.value.replaceAll("-SNAPSHOT", "") + " JavaDoc",
       "-public",
-      "-noqualifier", "java.lang"
+      "-noqualifier", "java.lang",
+      "-tag", """note:a:Note\:"""
     ),
 
     // Use GitHub repository for Scaladoc source links
diff --git a/python/pyspark/mllib/stat/KernelDensity.py b/python/pyspark/mllib/stat/KernelDensity.py
index 3b1c5519bd87e..7250eab6705a7 100644
--- a/python/pyspark/mllib/stat/KernelDensity.py
+++ b/python/pyspark/mllib/stat/KernelDensity.py
@@ -28,7 +28,7 @@
 
 class KernelDensity(object):
     """
-    Estimate probability density at required points given a RDD of samples
+    Estimate probability density at required points given an RDD of samples
     from the population.
 
     >>> kd = KernelDensity()
diff --git a/python/pyspark/mllib/util.py b/python/pyspark/mllib/util.py
index ed6fd4bca4c54..97755807ef262 100644
--- a/python/pyspark/mllib/util.py
+++ b/python/pyspark/mllib/util.py
@@ -499,7 +499,7 @@ def generateLinearInput(intercept, weights, xMean, xVariance,
     def generateLinearRDD(sc, nexamples, nfeatures, eps,
                           nParts=2, intercept=0.0):
         """
-        Generate a RDD of LabeledPoints.
+        Generate an RDD of LabeledPoints.
         """
         return callMLlibFunc(
             "generateLinearRDDWrapper", sc, int(nexamples), int(nfeatures),
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index a163ceafe9d3b..641787ee20e0c 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -1218,7 +1218,7 @@ def mergeMaps(m1, m2):
 
     def top(self, num, key=None):
         """
-        Get the top N elements from a RDD.
+        Get the top N elements from an RDD.
 
         Note that this method should only be used if the resulting array is expected
         to be small, as all the data is loaded into the driver's memory.
@@ -1242,7 +1242,7 @@ def merge(a, b):
 
     def takeOrdered(self, num, key=None):
         """
-        Get the N elements from a RDD ordered in ascending order or as
+        Get the N elements from an RDD ordered in ascending order or as
         specified by the optional key function.
 
         Note that this method should only be used if the resulting array is expected
diff --git a/python/pyspark/streaming/kafka.py b/python/pyspark/streaming/kafka.py
index bf27d8047a753..134424add3b62 100644
--- a/python/pyspark/streaming/kafka.py
+++ b/python/pyspark/streaming/kafka.py
@@ -144,7 +144,7 @@ def createRDD(sc, kafkaParams, offsetRanges, leaders=None,
         """
         .. note:: Experimental
 
-        Create a RDD from Kafka using offset ranges for each topic and partition.
+        Create an RDD from Kafka using offset ranges for each topic and partition.
 
         :param sc:  SparkContext object
         :param kafkaParams: Additional params for Kafka
@@ -155,7 +155,7 @@ def createRDD(sc, kafkaParams, offsetRanges, leaders=None,
         :param valueDecoder:  A function used to decode value (default is utf8_decoder)
         :param messageHandler: A function used to convert KafkaMessageAndMetadata. You can assess
                                meta using messageHandler (default is None).
-        :return: A RDD object
+        :return: An RDD object
         """
         if leaders is None:
             leaders = dict()
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
index dc90659a676e0..0b95a8821b05a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
@@ -165,10 +165,10 @@ object Encoders {
    * (Scala-specific) Creates an encoder that serializes objects of type T using generic Java
    * serialization. This encoder maps T into a single byte array (binary) field.
    *
-   * Note that this is extremely inefficient and should only be used as the last resort.
-   *
    * T must be publicly accessible.
    *
+   * @note This is extremely inefficient and should only be used as the last resort.
+   *
    * @since 1.6.0
    */
   def javaSerialization[T: ClassTag]: Encoder[T] = genericSerializer(useKryo = false)
@@ -177,10 +177,10 @@ object Encoders {
    * Creates an encoder that serializes objects of type T using generic Java serialization.
    * This encoder maps T into a single byte array (binary) field.
    *
-   * Note that this is extremely inefficient and should only be used as the last resort.
-   *
    * T must be publicly accessible.
    *
+   * @note This is extremely inefficient and should only be used as the last resort.
+   *
    * @since 1.6.0
    */
   def javaSerialization[T](clazz: Class[T]): Encoder[T] = javaSerialization(ClassTag[T](clazz))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala
index e121044288e5a..21f3497ba06fb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala
@@ -23,10 +23,10 @@ import org.apache.spark.annotation.InterfaceStability
  * The data type representing calendar time intervals. The calendar time interval is stored
  * internally in two components: number of months the number of microseconds.
  *
- * Note that calendar intervals are not comparable.
- *
  * Please use the singleton [[DataTypes.CalendarIntervalType]].
  *
+ * @note Calendar intervals are not comparable.
+ *
  * @since 1.5.0
  */
 @InterfaceStability.Stable
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 7a131b30eafd7..fa3b2b9de5d5d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -118,7 +118,7 @@ class TypedColumn[-T, U](
  *   $"a" === $"b"
  * }}}
  *
- * Note that the internal Catalyst expression can be accessed via "expr", but this method is for
+ * @note The internal Catalyst expression can be accessed via "expr", but this method is for
  * debugging purposes only and can change in any future Spark releases.
  *
  * @groupname java_expr_ops Java-specific expression operators
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
index b5bbcee37150f..6335fc4579a28 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
@@ -51,7 +51,6 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    * The algorithm was first present in [[http://dx.doi.org/10.1145/375663.375670 Space-efficient
    * Online Computation of Quantile Summaries]] by Greenwald and Khanna.
    *
-   * Note that NaN values will be removed from the numerical column before calculation
    * @param col the name of the numerical column
    * @param probabilities a list of quantile probabilities
    *   Each number must belong to [0, 1].
@@ -61,6 +60,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    *   Note that values greater than 1 are accepted but give the same result as 1.
    * @return the approximate quantiles at the given probabilities
    *
+   * @note NaN values will be removed from the numerical column before calculation
+   *
    * @since 2.0.0
    */
   def approxQuantile(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index e0c89811ddbfa..15281f24fa628 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -218,7 +218,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    * Inserts the content of the [[DataFrame]] to the specified table. It requires that
    * the schema of the [[DataFrame]] is the same as the schema of the table.
    *
-   * Note: Unlike `saveAsTable`, `insertInto` ignores the column names and just uses position-based
+   * @note Unlike `saveAsTable`, `insertInto` ignores the column names and just uses position-based
    * resolution. For example:
    *
    * {{{
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 3761773698df3..3c75a6a45ec86 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -377,7 +377,7 @@ class Dataset[T] private[sql](
 
   /**
    * Converts this strongly typed collection of data to generic `DataFrame` with columns renamed.
-   * This can be quite convenient in conversion from a RDD of tuples into a [[DataFrame]] with
+   * This can be quite convenient in conversion from an RDD of tuples into a [[DataFrame]] with
    * meaningful names. For example:
    * {{{
    *   val rdd: RDD[(Int, String)] = ...
@@ -703,13 +703,13 @@ class Dataset[T] private[sql](
    *   df1.join(df2, "user_id")
    * }}}
    *
-   * Note that if you perform a self-join using this function without aliasing the input
-   * [[DataFrame]]s, you will NOT be able to reference any columns after the join, since
-   * there is no way to disambiguate which side of the join you would like to reference.
-   *
    * @param right Right side of the join operation.
    * @param usingColumn Name of the column to join on. This column must exist on both sides.
    *
+   * @note If you perform a self-join using this function without aliasing the input
+   * [[DataFrame]]s, you will NOT be able to reference any columns after the join, since
+   * there is no way to disambiguate which side of the join you would like to reference.
+   *
    * @group untypedrel
    * @since 2.0.0
    */
@@ -728,13 +728,13 @@ class Dataset[T] private[sql](
    *   df1.join(df2, Seq("user_id", "user_name"))
    * }}}
    *
-   * Note that if you perform a self-join using this function without aliasing the input
-   * [[DataFrame]]s, you will NOT be able to reference any columns after the join, since
-   * there is no way to disambiguate which side of the join you would like to reference.
-   *
    * @param right Right side of the join operation.
    * @param usingColumns Names of the columns to join on. This columns must exist on both sides.
    *
+   * @note If you perform a self-join using this function without aliasing the input
+   * [[DataFrame]]s, you will NOT be able to reference any columns after the join, since
+   * there is no way to disambiguate which side of the join you would like to reference.
+   *
    * @group untypedrel
    * @since 2.0.0
    */
@@ -748,14 +748,14 @@ class Dataset[T] private[sql](
    * Different from other join functions, the join columns will only appear once in the output,
    * i.e. similar to SQL's `JOIN USING` syntax.
    *
-   * Note that if you perform a self-join using this function without aliasing the input
-   * [[DataFrame]]s, you will NOT be able to reference any columns after the join, since
-   * there is no way to disambiguate which side of the join you would like to reference.
-   *
    * @param right Right side of the join operation.
    * @param usingColumns Names of the columns to join on. This columns must exist on both sides.
    * @param joinType One of: `inner`, `outer`, `left_outer`, `right_outer`, `leftsemi`.
    *
+   * @note If you perform a self-join using this function without aliasing the input
+   * [[DataFrame]]s, you will NOT be able to reference any columns after the join, since
+   * there is no way to disambiguate which side of the join you would like to reference.
+   *
    * @group untypedrel
    * @since 2.0.0
    */
@@ -856,10 +856,10 @@ class Dataset[T] private[sql](
   /**
    * Explicit cartesian join with another [[DataFrame]].
    *
-   * Note that cartesian joins are very expensive without an extra filter that can be pushed down.
-   *
    * @param right Right side of the join operation.
    *
+   * @note Cartesian joins are very expensive without an extra filter that can be pushed down.
+   *
    * @group untypedrel
    * @since 2.1.0
    */
@@ -1044,7 +1044,8 @@ class Dataset[T] private[sql](
 
   /**
    * Selects column based on the column name and return it as a [[Column]].
-   * Note that the column name can also reference to a nested column like `a.b`.
+   *
+   * @note The column name can also reference to a nested column like `a.b`.
    *
    * @group untypedrel
    * @since 2.0.0
@@ -1053,7 +1054,8 @@ class Dataset[T] private[sql](
 
   /**
    * Selects column based on the column name and return it as a [[Column]].
-   * Note that the column name can also reference to a nested column like `a.b`.
+   *
+   * @note The column name can also reference to a nested column like `a.b`.
    *
    * @group untypedrel
    * @since 2.0.0
@@ -1621,7 +1623,7 @@ class Dataset[T] private[sql](
    * Returns a new Dataset containing rows only in both this Dataset and another Dataset.
    * This is equivalent to `INTERSECT` in SQL.
    *
-   * Note that, equality checking is performed directly on the encoded representation of the data
+   * @note Equality checking is performed directly on the encoded representation of the data
    * and thus is not affected by a custom `equals` function defined on `T`.
    *
    * @group typedrel
@@ -1635,7 +1637,7 @@ class Dataset[T] private[sql](
    * Returns a new Dataset containing rows in this Dataset but not in another Dataset.
    * This is equivalent to `EXCEPT` in SQL.
    *
-   * Note that, equality checking is performed directly on the encoded representation of the data
+   * @note Equality checking is performed directly on the encoded representation of the data
    * and thus is not affected by a custom `equals` function defined on `T`.
    *
    * @group typedrel
@@ -1648,13 +1650,13 @@ class Dataset[T] private[sql](
   /**
    * Returns a new [[Dataset]] by sampling a fraction of rows, using a user-supplied seed.
    *
-   * Note: this is NOT guaranteed to provide exactly the fraction of the count
-   * of the given [[Dataset]].
-   *
    * @param withReplacement Sample with replacement or not.
    * @param fraction Fraction of rows to generate.
    * @param seed Seed for sampling.
    *
+   * @note This is NOT guaranteed to provide exactly the fraction of the count
+   * of the given [[Dataset]].
+   *
    * @group typedrel
    * @since 1.6.0
    */
@@ -1670,12 +1672,12 @@ class Dataset[T] private[sql](
   /**
    * Returns a new [[Dataset]] by sampling a fraction of rows, using a random seed.
    *
-   * Note: this is NOT guaranteed to provide exactly the fraction of the total count
-   * of the given [[Dataset]].
-   *
    * @param withReplacement Sample with replacement or not.
    * @param fraction Fraction of rows to generate.
    *
+   * @note This is NOT guaranteed to provide exactly the fraction of the total count
+   * of the given [[Dataset]].
+   *
    * @group typedrel
    * @since 1.6.0
    */
@@ -2375,7 +2377,7 @@ class Dataset[T] private[sql](
    *
    * The iterator will consume as much memory as the largest partition in this Dataset.
    *
-   * Note: this results in multiple Spark jobs, and if the input Dataset is the result
+   * @note this results in multiple Spark jobs, and if the input Dataset is the result
    * of a wide transformation (e.g. join with different partitioners), to avoid
    * recomputing the input Dataset should be cached first.
    *
@@ -2453,7 +2455,7 @@ class Dataset[T] private[sql](
    * Returns a new Dataset that contains only the unique rows from this Dataset.
    * This is an alias for `dropDuplicates`.
    *
-   * Note that, equality checking is performed directly on the encoded representation of the data
+   * @note Equality checking is performed directly on the encoded representation of the data
    * and thus is not affected by a custom `equals` function defined on `T`.
    *
    * @group typedrel
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 3c5cf037c578d..2fae93651b344 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -181,9 +181,6 @@ class SQLContext private[sql](val sparkSession: SparkSession)
 
   /**
    * A collection of methods for registering user-defined functions (UDF).
-   * Note that the user-defined functions must be deterministic. Due to optimization,
-   * duplicate invocations may be eliminated or the function may even be invoked more times than
-   * it is present in the query.
    *
    * The following example registers a Scala closure as UDF:
    * {{{
@@ -208,6 +205,10 @@ class SQLContext private[sql](val sparkSession: SparkSession)
    *       DataTypes.StringType);
    * }}}
    *
+   * @note The user-defined functions must be deterministic. Due to optimization,
+   * duplicate invocations may be eliminated or the function may even be invoked more times than
+   * it is present in the query.
+   *
    * @group basic
    * @since 1.3.0
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 58b2ab3957173..e09e3caa3c981 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -155,9 +155,6 @@ class SparkSession private(
 
   /**
    * A collection of methods for registering user-defined functions (UDF).
-   * Note that the user-defined functions must be deterministic. Due to optimization,
-   * duplicate invocations may be eliminated or the function may even be invoked more times than
-   * it is present in the query.
    *
    * The following example registers a Scala closure as UDF:
    * {{{
@@ -182,6 +179,10 @@ class SparkSession private(
    *       DataTypes.StringType);
    * }}}
    *
+   * @note The user-defined functions must be deterministic. Due to optimization,
+   * duplicate invocations may be eliminated or the function may even be invoked more times than
+   * it is present in the query.
+   *
    * @since 2.0.0
    */
   def udf: UDFRegistration = sessionState.udf
@@ -201,7 +202,7 @@ class SparkSession private(
    * Start a new session with isolated SQL configurations, temporary tables, registered
    * functions are isolated, but sharing the underlying [[SparkContext]] and cached data.
    *
-   * Note: Other than the [[SparkContext]], all shared state is initialized lazily.
+   * @note Other than the [[SparkContext]], all shared state is initialized lazily.
    * This method will force the initialization of the shared state to ensure that parent
    * and child sessions are set up with the same shared state. If the underlying catalog
    * implementation is Hive, this will initialize the metastore, which may take some time.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
index 0444ad10d34fb..6043c5ee14b54 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -39,7 +39,8 @@ import org.apache.spark.util.Utils
 
 /**
  * Functions for registering user-defined functions. Use [[SQLContext.udf]] to access this.
- * Note that the user-defined functions must be deterministic.
+ *
+ * @note The user-defined functions must be deterministic.
  *
  * @since 1.3.0
  */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala
index 4914a9d722a83..1b56c08f729c6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala
@@ -28,7 +28,7 @@ package object state {
 
   implicit class StateStoreOps[T: ClassTag](dataRDD: RDD[T]) {
 
-    /** Map each partition of a RDD along with data in a [[StateStore]]. */
+    /** Map each partition of an RDD along with data in a [[StateStore]]. */
     def mapPartitionsWithStateStore[U: ClassTag](
         sqlContext: SQLContext,
         checkpointLocation: String,
@@ -49,7 +49,7 @@ package object state {
         storeUpdateFunction)
     }
 
-    /** Map each partition of a RDD along with data in a [[StateStore]]. */
+    /** Map each partition of an RDD along with data in a [[StateStore]]. */
     private[streaming] def mapPartitionsWithStateStore[U: ClassTag](
         checkpointLocation: String,
         operatorId: Long,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
index 28598af781653..36dd5f78ac137 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
@@ -25,9 +25,7 @@ import org.apache.spark.sql.types.DataType
 
 /**
  * A user-defined function. To create one, use the `udf` functions in [[functions]].
- * Note that the user-defined functions must be deterministic. Due to optimization,
- * duplicate invocations may be eliminated or the function may even be invoked more times than
- * it is present in the query.
+ *
  * As an example:
  * {{{
  *   // Defined a UDF that returns true or false based on some numeric score.
@@ -37,6 +35,10 @@ import org.apache.spark.sql.types.DataType
  *   df.select( predict(df("score")) )
  * }}}
  *
+ * @note The user-defined functions must be deterministic. Due to optimization,
+ * duplicate invocations may be eliminated or the function may even be invoked more times than
+ * it is present in the query.
+ *
  * @since 1.3.0
  */
 @InterfaceStability.Stable
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index e221c032b82f6..d5940c638acdb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -476,7 +476,7 @@ object functions {
    *
    *   (grouping(c1) << (n-1)) + (grouping(c2) << (n-2)) + ... + grouping(cn)
    *
-   * Note: the list of columns should match with grouping columns exactly, or empty (means all the
+   * @note The list of columns should match with grouping columns exactly, or empty (means all the
    * grouping columns).
    *
    * @group agg_funcs
@@ -489,7 +489,7 @@ object functions {
    *
    *   (grouping(c1) << (n-1)) + (grouping(c2) << (n-2)) + ... + grouping(cn)
    *
-   * Note: the list of columns should match with grouping columns exactly.
+   * @note The list of columns should match with grouping columns exactly.
    *
    * @group agg_funcs
    * @since 2.0.0
@@ -1120,7 +1120,7 @@ object functions {
    * Generate a random column with independent and identically distributed (i.i.d.) samples
    * from U[0.0, 1.0].
    *
-   * Note that this is indeterministic when data partitions are not fixed.
+   * @note This is indeterministic when data partitions are not fixed.
    *
    * @group normal_funcs
    * @since 1.4.0
@@ -1140,7 +1140,7 @@ object functions {
    * Generate a column with independent and identically distributed (i.i.d.) samples from
    * the standard normal distribution.
    *
-   * Note that this is indeterministic when data partitions are not fixed.
+   * @note This is indeterministic when data partitions are not fixed.
    *
    * @group normal_funcs
    * @since 1.4.0
@@ -1159,7 +1159,7 @@ object functions {
   /**
    * Partition ID.
    *
-   * Note that this is indeterministic because it depends on data partitioning and task scheduling.
+   * @note This is indeterministic because it depends on data partitioning and task scheduling.
    *
    * @group normal_funcs
    * @since 1.6.0
@@ -2207,7 +2207,7 @@ object functions {
    * Locate the position of the first occurrence of substr column in the given string.
    * Returns null if either of the arguments are null.
    *
-   * NOTE: The position is not zero based, but 1 based index. Returns 0 if substr
+   * @note The position is not zero based, but 1 based index. Returns 0 if substr
    * could not be found in str.
    *
    * @group string_funcs
@@ -2242,7 +2242,8 @@ object functions {
 
   /**
    * Locate the position of the first occurrence of substr.
-   * NOTE: The position is not zero based, but 1 based index. Returns 0 if substr
+   *
+   * @note The position is not zero based, but 1 based index. Returns 0 if substr
    * could not be found in str.
    *
    * @group string_funcs
@@ -2255,7 +2256,7 @@ object functions {
   /**
    * Locate the position of the first occurrence of substr in a string column, after position pos.
    *
-   * NOTE: The position is not zero based, but 1 based index. returns 0 if substr
+   * @note The position is not zero based, but 1 based index. returns 0 if substr
    * could not be found in str.
    *
    * @group string_funcs
@@ -2369,7 +2370,8 @@ object functions {
 
   /**
    * Splits str around pattern (pattern is a regular expression).
-   * NOTE: pattern is a string representation of the regular expression.
+   *
+   * @note Pattern is a string representation of the regular expression.
    *
    * @group string_funcs
    * @since 1.5.0
@@ -2468,7 +2470,7 @@ object functions {
    * A pattern could be for instance `dd.MM.yyyy` and could return a string like '18.03.1993'. All
    * pattern letters of [[java.text.SimpleDateFormat]] can be used.
    *
-   * NOTE: Use when ever possible specialized functions like [[year]]. These benefit from a
+   * @note Use when ever possible specialized functions like [[year]]. These benefit from a
    * specialized implementation.
    *
    * @group datetime_funcs
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index dec316be7aea1..7c64e28d24724 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -140,7 +140,7 @@ abstract class JdbcDialect extends Serializable {
  * tried in reverse order. A user-added dialect will thus be applied first,
  * overwriting the defaults.
  *
- * Note that all new dialects are applied to new jdbc DataFrames only. Make
+ * @note All new dialects are applied to new jdbc DataFrames only. Make
  * sure to register your dialects first.
  */
 @DeveloperApi
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
index 15a48072525b2..ff6dd8cb0cf92 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
@@ -69,7 +69,8 @@ trait DataSourceRegister {
 trait RelationProvider {
   /**
    * Returns a new base relation with the given parameters.
-   * Note: the parameters' keywords are case insensitive and this insensitivity is enforced
+   *
+   * @note The parameters' keywords are case insensitive and this insensitivity is enforced
    * by the Map that is passed to the function.
    */
   def createRelation(sqlContext: SQLContext, parameters: Map[String, String]): BaseRelation
@@ -99,7 +100,8 @@ trait RelationProvider {
 trait SchemaRelationProvider {
   /**
    * Returns a new base relation with the given parameters and user defined schema.
-   * Note: the parameters' keywords are case insensitive and this insensitivity is enforced
+   *
+   * @note The parameters' keywords are case insensitive and this insensitivity is enforced
    * by the Map that is passed to the function.
    */
   def createRelation(
@@ -205,7 +207,7 @@ abstract class BaseRelation {
    * large to broadcast. This method will be called multiple times during query planning
    * and thus should not perform expensive operations for each invocation.
    *
-   * Note that it is always better to overestimate size than underestimate, because underestimation
+   * @note It is always better to overestimate size than underestimate, because underestimation
    * could lead to execution plans that are suboptimal (i.e. broadcasting a very large table).
    *
    * @since 1.3.0
@@ -219,7 +221,7 @@ abstract class BaseRelation {
    *
    * If `needConversion` is `false`, buildScan() should return an [[RDD]] of [[InternalRow]]
    *
-   * Note: The internal representation is not stable across releases and thus data sources outside
+   * @note The internal representation is not stable across releases and thus data sources outside
    * of Spark SQL should leave this as true.
    *
    * @since 1.4.0
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala
index 5e93fc469a41f..4504582187b97 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.execution.QueryExecution
  * :: Experimental ::
  * The interface of query execution listener that can be used to analyze execution metrics.
  *
- * Note that implementations should guarantee thread-safety as they can be invoked by
+ * @note Implementations should guarantee thread-safety as they can be invoked by
  * multiple different threads.
  */
 @Experimental
@@ -39,24 +39,26 @@ trait QueryExecutionListener {
 
   /**
    * A callback function that will be called when a query executed successfully.
-   * Note that this can be invoked by multiple different threads.
    *
    * @param funcName name of the action that triggered this query.
    * @param qe the QueryExecution object that carries detail information like logical plan,
    *           physical plan, etc.
    * @param durationNs the execution time for this query in nanoseconds.
+   *
+   * @note This can be invoked by multiple different threads.
    */
   @DeveloperApi
   def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit
 
   /**
    * A callback function that will be called when a query execution failed.
-   * Note that this can be invoked by multiple different threads.
    *
    * @param funcName the name of the action that triggered this query.
    * @param qe the QueryExecution object that carries detail information like logical plan,
    *           physical plan, etc.
    * @param exception the exception that failed this query.
+   *
+   * @note This can be invoked by multiple different threads.
    */
   @DeveloperApi
   def onFailure(funcName: String, qe: QueryExecution, exception: Exception): Unit
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
index 0daa29b666f62..b272c8e7d79c2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
@@ -157,7 +157,7 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
     val allColumns = fields.map(_.name).mkString(",")
     val schema = StructType(fields)
 
-    // Create a RDD for the schema
+    // Create an RDD for the schema
     val rdd =
       sparkContext.parallelize((1 to 10000), 10).map { i =>
         Row(
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index 4808d0fcbc6cc..444261da8de6a 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -421,11 +421,11 @@ class StreamingContext private[streaming] (
    * by "moving" them from another location within the same file system. File names
    * starting with . are ignored.
    *
-   * '''Note:''' We ensure that the byte array for each record in the
-   * resulting RDDs of the DStream has the provided record length.
-   *
    * @param directory HDFS directory to monitor for new file
    * @param recordLength length of each record in bytes
+   *
+   * @note We ensure that the byte array for each record in the
+   * resulting RDDs of the DStream has the provided record length.
    */
   def binaryRecordsStream(
       directory: String,
@@ -447,12 +447,12 @@ class StreamingContext private[streaming] (
    * Create an input stream from a queue of RDDs. In each batch,
    * it will process either one or all of the RDDs returned by the queue.
    *
-   * NOTE: Arbitrary RDDs can be added to `queueStream`, there is no way to recover data of
-   * those RDDs, so `queueStream` doesn't support checkpointing.
-   *
    * @param queue      Queue of RDDs. Modifications to this data structure must be synchronized.
    * @param oneAtATime Whether only one RDD should be consumed from the queue in every interval
    * @tparam T         Type of objects in the RDD
+   *
+   * @note Arbitrary RDDs can be added to `queueStream`, there is no way to recover data of
+   * those RDDs, so `queueStream` doesn't support checkpointing.
    */
   def queueStream[T: ClassTag](
       queue: Queue[RDD[T]],
@@ -465,14 +465,14 @@ class StreamingContext private[streaming] (
    * Create an input stream from a queue of RDDs. In each batch,
    * it will process either one or all of the RDDs returned by the queue.
    *
-   * NOTE: Arbitrary RDDs can be added to `queueStream`, there is no way to recover data of
-   * those RDDs, so `queueStream` doesn't support checkpointing.
-   *
    * @param queue      Queue of RDDs. Modifications to this data structure must be synchronized.
    * @param oneAtATime Whether only one RDD should be consumed from the queue in every interval
    * @param defaultRDD Default RDD is returned by the DStream when the queue is empty.
    *                   Set as null if no RDD should be returned when empty
    * @tparam T         Type of objects in the RDD
+   *
+   * @note Arbitrary RDDs can be added to `queueStream`, there is no way to recover data of
+   * those RDDs, so `queueStream` doesn't support checkpointing.
    */
   def queueStream[T: ClassTag](
       queue: Queue[RDD[T]],
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
index da9ff858853cf..aa4003c62e1e7 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
@@ -74,7 +74,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
    */
   def repartition(numPartitions: Int): JavaPairDStream[K, V] = dstream.repartition(numPartitions)
 
-  /** Method that generates a RDD for the given Duration */
+  /** Method that generates an RDD for the given Duration */
   def compute(validTime: Time): JavaPairRDD[K, V] = {
     dstream.compute(validTime) match {
       case Some(rdd) => new JavaPairRDD(rdd)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
index 4c4376a089f59..b43b9405def97 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
@@ -218,11 +218,11 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
    * for new files and reads them as flat binary files with fixed record lengths,
    * yielding byte arrays
    *
-   * '''Note:''' We ensure that the byte array for each record in the
-   * resulting RDDs of the DStream has the provided record length.
-   *
    * @param directory HDFS directory to monitor for new files
    * @param recordLength The length at which to split the records
+   *
+   * @note We ensure that the byte array for each record in the
+   * resulting RDDs of the DStream has the provided record length.
    */
   def binaryRecordsStream(directory: String, recordLength: Int): JavaDStream[Array[Byte]] = {
     ssc.binaryRecordsStream(directory, recordLength)
@@ -352,13 +352,13 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
    * Create an input stream from a queue of RDDs. In each batch,
    * it will process either one or all of the RDDs returned by the queue.
    *
-   * NOTE:
+   * @param queue      Queue of RDDs
+   * @tparam T         Type of objects in the RDD
+   *
+   * @note
    * 1. Changes to the queue after the stream is created will not be recognized.
    * 2. Arbitrary RDDs can be added to `queueStream`, there is no way to recover data of
    * those RDDs, so `queueStream` doesn't support checkpointing.
-   *
-   * @param queue      Queue of RDDs
-   * @tparam T         Type of objects in the RDD
    */
   def queueStream[T](queue: java.util.Queue[JavaRDD[T]]): JavaDStream[T] = {
     implicit val cm: ClassTag[T] =
@@ -372,14 +372,14 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
    * Create an input stream from a queue of RDDs. In each batch,
    * it will process either one or all of the RDDs returned by the queue.
    *
-   * NOTE:
-   * 1. Changes to the queue after the stream is created will not be recognized.
-   * 2. Arbitrary RDDs can be added to `queueStream`, there is no way to recover data of
-   * those RDDs, so `queueStream` doesn't support checkpointing.
-   *
    * @param queue      Queue of RDDs
    * @param oneAtATime Whether only one RDD should be consumed from the queue in every interval
    * @tparam T         Type of objects in the RDD
+   *
+   * @note
+   * 1. Changes to the queue after the stream is created will not be recognized.
+   * 2. Arbitrary RDDs can be added to `queueStream`, there is no way to recover data of
+   * those RDDs, so `queueStream` doesn't support checkpointing.
    */
   def queueStream[T](
       queue: java.util.Queue[JavaRDD[T]],
@@ -396,7 +396,7 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
    * Create an input stream from a queue of RDDs. In each batch,
    * it will process either one or all of the RDDs returned by the queue.
    *
-   * NOTE:
+   * @note
    * 1. Changes to the queue after the stream is created will not be recognized.
    * 2. Arbitrary RDDs can be added to `queueStream`, there is no way to recover data of
    * those RDDs, so `queueStream` doesn't support checkpointing.
@@ -454,9 +454,10 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
   /**
    * Create a new DStream in which each RDD is generated by applying a function on RDDs of
    * the DStreams. The order of the JavaRDDs in the transform function parameter will be the
-   * same as the order of corresponding DStreams in the list. Note that for adding a
-   * JavaPairDStream in the list of JavaDStreams, convert it to a JavaDStream using
-   * [[org.apache.spark.streaming.api.java.JavaPairDStream]].toJavaDStream().
+   * same as the order of corresponding DStreams in the list.
+   *
+   * @note For adding a JavaPairDStream in the list of JavaDStreams, convert it to a
+   * JavaDStream using [[org.apache.spark.streaming.api.java.JavaPairDStream]].toJavaDStream().
    * In the transform function, convert the JavaRDD corresponding to that JavaDStream to
    * a JavaPairRDD using org.apache.spark.api.java.JavaPairRDD.fromJavaRDD().
    */
@@ -476,9 +477,10 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
   /**
    * Create a new DStream in which each RDD is generated by applying a function on RDDs of
    * the DStreams. The order of the JavaRDDs in the transform function parameter will be the
-   * same as the order of corresponding DStreams in the list. Note that for adding a
-   * JavaPairDStream in the list of JavaDStreams, convert it to a JavaDStream using
-   * [[org.apache.spark.streaming.api.java.JavaPairDStream]].toJavaDStream().
+   * same as the order of corresponding DStreams in the list.
+   *
+   * @note For adding a JavaPairDStream in the list of JavaDStreams, convert it to
+   * a JavaDStream using [[org.apache.spark.streaming.api.java.JavaPairDStream]].toJavaDStream().
    * In the transform function, convert the JavaRDD corresponding to that JavaDStream to
    * a JavaPairRDD using org.apache.spark.api.java.JavaPairRDD.fromJavaRDD().
    */
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
index fa15a0bf65ab9..938a7fac1af41 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
@@ -68,13 +68,13 @@ abstract class DStream[T: ClassTag] (
   // Methods that should be implemented by subclasses of DStream
   // =======================================================================
 
-  /** Time interval after which the DStream generates a RDD */
+  /** Time interval after which the DStream generates an RDD */
   def slideDuration: Duration
 
   /** List of parent DStreams on which this DStream depends on */
   def dependencies: List[DStream[_]]
 
-  /** Method that generates a RDD for the given time */
+  /** Method that generates an RDD for the given time */
   def compute(validTime: Time): Option[RDD[T]]
 
   // =======================================================================
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapWithStateDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapWithStateDStream.scala
index ed08191f41cc8..9512db7d7d757 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapWithStateDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapWithStateDStream.scala
@@ -128,7 +128,7 @@ class InternalMapWithStateDStream[K: ClassTag, V: ClassTag, S: ClassTag, E: Clas
     super.initialize(time)
   }
 
-  /** Method that generates a RDD for the given time */
+  /** Method that generates an RDD for the given time */
   override def compute(validTime: Time): Option[RDD[MapWithStateRDDRecord[K, S, E]]] = {
     // Get the previous state or create a new empty state RDD
     val prevStateRDD = getOrCompute(validTime - slideDuration) match {
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
index ce5a6e00fb2fe..a37fac87300b7 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
@@ -186,7 +186,7 @@ class WriteAheadLogBackedBlockRDDSuite
     assert(rdd.collect() === data.flatten)
 
     // Verify that the block fetching is skipped when isBlockValid is set to false.
-    // This is done by using a RDD whose data is only in memory but is set to skip block fetching
+    // This is done by using an RDD whose data is only in memory but is set to skip block fetching
     // Using that RDD will throw exception, as it skips block fetching even if the blocks are in
     // in BlockManager.
     if (testIsBlockValid) {

From 30a6fbbb0fb47f5b74ceba3384f28a61bf4e4740 Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Sat, 19 Nov 2016 11:28:25 +0000
Subject: [PATCH 0152/1204] [SPARK-18353][CORE] spark.rpc.askTimeout defalut
 value is not 120s

## What changes were proposed in this pull request?

Avoid hard-coding spark.rpc.askTimeout to non-default in Client; fix doc about spark.rpc.askTimeout default

## How was this patch tested?

Existing tests

Author: Sean Owen <sowen@cloudera.com>

Closes #15833 from srowen/SPARK-18353.

(cherry picked from commit 8b1e1088eb274fb15260cd5d6d9508d42837a4d6)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 core/src/main/scala/org/apache/spark/deploy/Client.scala | 4 +++-
 docs/configuration.md                                    | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/Client.scala b/core/src/main/scala/org/apache/spark/deploy/Client.scala
index ee276e1b71138..a4de3d7eaf458 100644
--- a/core/src/main/scala/org/apache/spark/deploy/Client.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/Client.scala
@@ -221,7 +221,9 @@ object Client {
     val conf = new SparkConf()
     val driverArgs = new ClientArguments(args)
 
-    conf.set("spark.rpc.askTimeout", "10")
+    if (!conf.contains("spark.rpc.askTimeout")) {
+      conf.set("spark.rpc.askTimeout", "10s")
+    }
     Logger.getRootLogger.setLevel(driverArgs.logLevel)
 
     val rpcEnv =
diff --git a/docs/configuration.md b/docs/configuration.md
index e0c661349caab..c2329b411fc69 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1175,7 +1175,7 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td><code>spark.rpc.askTimeout</code></td>
-  <td>120s</td>
+  <td><code>spark.network.timeout</code></td>
   <td>
     Duration for an RPC ask operation to wait before timing out.
   </td>
@@ -1531,7 +1531,7 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td><code>spark.core.connection.ack.wait.timeout</code></td>
-  <td>60s</td>
+  <td><code>spark.network.timeout</code></td>
   <td>
     How long for the connection to wait for ack to occur before timing
     out and giving up. To avoid unwilling timeout caused by long pause like GC,

From 15ad3a319b91a8b495da9a0e6f5386417991d30d Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Sat, 19 Nov 2016 13:48:56 +0000
Subject: [PATCH 0153/1204] [SPARK-18448][CORE] Fix @since 2.1.0 on new
 SparkSession.close() method

## What changes were proposed in this pull request?

Fix since 2.1.0 on new SparkSession.close() method. I goofed in https://github.com/apache/spark/pull/15932 because it was back-ported to 2.1 instead of just master as originally planned.

Author: Sean Owen <sowen@cloudera.com>

Closes #15938 from srowen/SPARK-18448.2.

(cherry picked from commit ded5fefb6f5c0a97bf3d7fa1c0494dc434b6ee40)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index e09e3caa3c981..71b1880dc0715 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -652,7 +652,7 @@ class SparkSession private(
   /**
    * Synonym for `stop()`.
    *
-   * @since 2.2.0
+   * @since 2.1.0
    */
   override def close(): Unit = stop()
 

From 15eb86c29c02178f4413df63c39b8df3cda30ca8 Mon Sep 17 00:00:00 2001
From: sethah <seth.hendrickson16@gmail.com>
Date: Sun, 20 Nov 2016 01:42:37 +0000
Subject: [PATCH 0154/1204] [SPARK-18456][ML][FOLLOWUP] Use matrix abstraction
 for coefficients in LogisticRegression training

## What changes were proposed in this pull request?

This is a follow up to some of the discussion [here](https://github.com/apache/spark/pull/15593). During LogisticRegression training, we store the coefficients combined with intercepts as a flat vector, but a more natural abstraction is a matrix. Here, we refactor the code to use matrix where possible, which makes the code more readable and greatly simplifies the indexing.

Note: We do not use a Breeze matrix for the cost function as was mentioned in the linked PR. This is because LBFGS/OWLQN require an implicit `MutableInnerProductModule[DenseMatrix[Double], Double]` which is not natively defined in Breeze. We would need to extend Breeze in Spark to define it ourselves. Also, we do not modify the `regParamL1Fun` because OWLQN in Breeze requires a `MutableEnumeratedCoordinateField[(Int, Int), DenseVector[Double]]` (since we still use a dense vector for coefficients). Here again we would have to extend Breeze inside Spark.

## How was this patch tested?

This is internal code refactoring - the current unit tests passing show us that the change did not break anything. No added functionality in this patch.

Author: sethah <seth.hendrickson16@gmail.com>

Closes #15893 from sethah/logreg_refactor.

(cherry picked from commit 856e0042007c789dda4539fb19a5d4580999fbf4)
Signed-off-by: DB Tsai <dbtsai@dbtsai.com>
---
 .../classification/LogisticRegression.scala   | 115 ++++++++----------
 1 file changed, 53 insertions(+), 62 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 71a7fe53c15f8..f58efd36a1c66 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -463,16 +463,11 @@ class LogisticRegression @Since("1.2.0") (
         }
 
         /*
-          The coefficients are laid out in column major order during training. e.g. for
-          `numClasses = 3` and `numFeatures = 2` and `fitIntercept = true` the layout is:
-
-           Array(beta_11, beta_21, beta_31, beta_12, beta_22, beta_32, intercept_1, intercept_2,
-             intercept_3)
-
-           where beta_jk corresponds to the coefficient for class `j` and feature `k`.
+          The coefficients are laid out in column major order during training. Here we initialize
+          a column major matrix of initial coefficients.
          */
-        val initialCoefficientsWithIntercept =
-          Vectors.zeros(numCoefficientSets * numFeaturesPlusIntercept)
+        val initialCoefWithInterceptMatrix =
+          Matrices.zeros(numCoefficientSets, numFeaturesPlusIntercept)
 
         val initialModelIsValid = optInitialModel match {
           case Some(_initialModel) =>
@@ -491,18 +486,15 @@ class LogisticRegression @Since("1.2.0") (
         }
 
         if (initialModelIsValid) {
-          val initialCoefWithInterceptArray = initialCoefficientsWithIntercept.toArray
           val providedCoef = optInitialModel.get.coefficientMatrix
-          providedCoef.foreachActive { (row, col, value) =>
-            // convert matrix to column major for training
-            val flatIndex = col * numCoefficientSets + row
+          providedCoef.foreachActive { (classIndex, featureIndex, value) =>
             // We need to scale the coefficients since they will be trained in the scaled space
-            initialCoefWithInterceptArray(flatIndex) = value * featuresStd(col)
+            initialCoefWithInterceptMatrix.update(classIndex, featureIndex,
+              value * featuresStd(featureIndex))
           }
           if ($(fitIntercept)) {
-            optInitialModel.get.interceptVector.foreachActive { (index, value) =>
-              val coefIndex = numCoefficientSets * numFeatures + index
-              initialCoefWithInterceptArray(coefIndex) = value
+            optInitialModel.get.interceptVector.foreachActive { (classIndex, value) =>
+              initialCoefWithInterceptMatrix.update(classIndex, numFeatures, value)
             }
           }
         } else if ($(fitIntercept) && isMultinomial) {
@@ -532,8 +524,7 @@ class LogisticRegression @Since("1.2.0") (
           val rawIntercepts = histogram.map(c => math.log(c + 1)) // add 1 for smoothing
           val rawMean = rawIntercepts.sum / rawIntercepts.length
           rawIntercepts.indices.foreach { i =>
-            initialCoefficientsWithIntercept.toArray(numClasses * numFeatures + i) =
-              rawIntercepts(i) - rawMean
+            initialCoefWithInterceptMatrix.update(i, numFeatures, rawIntercepts(i) - rawMean)
           }
         } else if ($(fitIntercept)) {
           /*
@@ -549,12 +540,12 @@ class LogisticRegression @Since("1.2.0") (
                b = \log{P(1) / P(0)} = \log{count_1 / count_0}
              }}}
            */
-          initialCoefficientsWithIntercept.toArray(numFeatures) = math.log(
-            histogram(1) / histogram(0))
+          initialCoefWithInterceptMatrix.update(0, numFeatures,
+            math.log(histogram(1) / histogram(0)))
         }
 
         val states = optimizer.iterations(new CachedDiffFunction(costFun),
-          initialCoefficientsWithIntercept.asBreeze.toDenseVector)
+          new BDV[Double](initialCoefWithInterceptMatrix.toArray))
 
         /*
            Note that in Logistic Regression, the objective history (loss + regularization)
@@ -586,15 +577,24 @@ class LogisticRegression @Since("1.2.0") (
            Note that the intercept in scaled space and original space is the same;
            as a result, no scaling is needed.
          */
-        val rawCoefficients = state.x.toArray.clone()
-        val coefficientArray = Array.tabulate(numCoefficientSets * numFeatures) { i =>
-          val colMajorIndex = (i % numFeatures) * numCoefficientSets + i / numFeatures
-          val featureIndex = i % numFeatures
-          if (featuresStd(featureIndex) != 0.0) {
-            rawCoefficients(colMajorIndex) / featuresStd(featureIndex)
-          } else {
-            0.0
+        val allCoefficients = state.x.toArray.clone()
+        val allCoefMatrix = new DenseMatrix(numCoefficientSets, numFeaturesPlusIntercept,
+          allCoefficients)
+        val denseCoefficientMatrix = new DenseMatrix(numCoefficientSets, numFeatures,
+          new Array[Double](numCoefficientSets * numFeatures), isTransposed = true)
+        val interceptVec = if ($(fitIntercept) || !isMultinomial) {
+          Vectors.zeros(numCoefficientSets)
+        } else {
+          Vectors.sparse(numCoefficientSets, Seq())
+        }
+        // separate intercepts and coefficients from the combined matrix
+        allCoefMatrix.foreachActive { (classIndex, featureIndex, value) =>
+          val isIntercept = $(fitIntercept) && (featureIndex == numFeatures)
+          if (!isIntercept && featuresStd(featureIndex) != 0.0) {
+            denseCoefficientMatrix.update(classIndex, featureIndex,
+              value / featuresStd(featureIndex))
           }
+          if (isIntercept) interceptVec.toArray(classIndex) = value
         }
 
         if ($(regParam) == 0.0 && isMultinomial) {
@@ -607,17 +607,16 @@ class LogisticRegression @Since("1.2.0") (
             Friedman, et al. "Regularization Paths for Generalized Linear Models via
               Coordinate Descent," https://core.ac.uk/download/files/153/6287975.pdf
            */
-          val coefficientMean = coefficientArray.sum / coefficientArray.length
-          coefficientArray.indices.foreach { i => coefficientArray(i) -= coefficientMean}
+          val denseValues = denseCoefficientMatrix.values
+          val coefficientMean = denseValues.sum / denseValues.length
+          denseCoefficientMatrix.update(_ - coefficientMean)
         }
 
-        val denseCoefficientMatrix =
-          new DenseMatrix(numCoefficientSets, numFeatures, coefficientArray, isTransposed = true)
         // TODO: use `denseCoefficientMatrix.compressed` after SPARK-17471
         val compressedCoefficientMatrix = if (isMultinomial) {
           denseCoefficientMatrix
         } else {
-          val compressedVector = Vectors.dense(coefficientArray).compressed
+          val compressedVector = Vectors.dense(denseCoefficientMatrix.values).compressed
           compressedVector match {
             case dv: DenseVector => denseCoefficientMatrix
             case sv: SparseVector =>
@@ -626,25 +625,13 @@ class LogisticRegression @Since("1.2.0") (
           }
         }
 
-        val interceptsArray: Array[Double] = if ($(fitIntercept)) {
-          Array.tabulate(numCoefficientSets) { i =>
-            val coefIndex = numFeatures * numCoefficientSets + i
-            rawCoefficients(coefIndex)
-          }
-        } else {
-          Array.empty[Double]
-        }
-        val interceptVector = if (interceptsArray.nonEmpty && isMultinomial) {
-          // The intercepts are never regularized, so we always center the mean.
-          val interceptMean = interceptsArray.sum / numClasses
-          interceptsArray.indices.foreach { i => interceptsArray(i) -= interceptMean }
-          Vectors.dense(interceptsArray)
-        } else if (interceptsArray.length == 1) {
-          Vectors.dense(interceptsArray)
-        } else {
-          Vectors.sparse(numCoefficientSets, Seq())
+        // center the intercepts when using multinomial algorithm
+        if ($(fitIntercept) && isMultinomial) {
+          val interceptArray = interceptVec.toArray
+          val interceptMean = interceptArray.sum / interceptArray.length
+          (0 until interceptVec.size).foreach { i => interceptArray(i) -= interceptMean }
         }
-        (compressedCoefficientMatrix, interceptVector.compressed, arrayBuilder.result())
+        (compressedCoefficientMatrix, interceptVec.compressed, arrayBuilder.result())
       }
     }
 
@@ -1424,6 +1411,7 @@ private class LogisticAggregator(
   private val numFeatures = bcFeaturesStd.value.length
   private val numFeaturesPlusIntercept = if (fitIntercept) numFeatures + 1 else numFeatures
   private val coefficientSize = bcCoefficients.value.size
+  private val numCoefficientSets = if (multinomial) numClasses else 1
   if (multinomial) {
     require(numClasses ==  coefficientSize / numFeaturesPlusIntercept, s"The number of " +
       s"coefficients should be ${numClasses * numFeaturesPlusIntercept} but was $coefficientSize")
@@ -1633,12 +1621,12 @@ private class LogisticAggregator(
     lossSum / weightSum
   }
 
-  def gradient: Vector = {
+  def gradient: Matrix = {
     require(weightSum > 0.0, s"The effective number of instances should be " +
       s"greater than 0.0, but $weightSum.")
     val result = Vectors.dense(gradientSumArray.clone())
     scal(1.0 / weightSum, result)
-    result
+    new DenseMatrix(numCoefficientSets, numFeaturesPlusIntercept, result.toArray)
   }
 }
 
@@ -1664,6 +1652,7 @@ private class LogisticCostFun(
     val featuresStd = bcFeaturesStd.value
     val numFeatures = featuresStd.length
     val numCoefficientSets = if (multinomial) numClasses else 1
+    val numFeaturesPlusIntercept = if (fitIntercept) numFeatures + 1 else numFeatures
 
     val logisticAggregator = {
       val seqOp = (c: LogisticAggregator, instance: Instance) => c.add(instance)
@@ -1675,24 +1664,25 @@ private class LogisticCostFun(
       )(seqOp, combOp, aggregationDepth)
     }
 
-    val totalGradientArray = logisticAggregator.gradient.toArray
+    val totalGradientMatrix = logisticAggregator.gradient
+    val coefMatrix = new DenseMatrix(numCoefficientSets, numFeaturesPlusIntercept, coeffs.toArray)
     // regVal is the sum of coefficients squares excluding intercept for L2 regularization.
     val regVal = if (regParamL2 == 0.0) {
       0.0
     } else {
       var sum = 0.0
-      coeffs.foreachActive { case (index, value) =>
+      coefMatrix.foreachActive { case (classIndex, featureIndex, value) =>
         // We do not apply regularization to the intercepts
-        val isIntercept = fitIntercept && index >= numCoefficientSets * numFeatures
+        val isIntercept = fitIntercept && (featureIndex == numFeatures)
         if (!isIntercept) {
           // The following code will compute the loss of the regularization; also
           // the gradient of the regularization, and add back to totalGradientArray.
           sum += {
             if (standardization) {
-              totalGradientArray(index) += regParamL2 * value
+              val gradValue = totalGradientMatrix(classIndex, featureIndex)
+              totalGradientMatrix.update(classIndex, featureIndex, gradValue + regParamL2 * value)
               value * value
             } else {
-              val featureIndex = index / numCoefficientSets
               if (featuresStd(featureIndex) != 0.0) {
                 // If `standardization` is false, we still standardize the data
                 // to improve the rate of convergence; as a result, we have to
@@ -1700,7 +1690,8 @@ private class LogisticCostFun(
                 // differently to get effectively the same objective function when
                 // the training dataset is not standardized.
                 val temp = value / (featuresStd(featureIndex) * featuresStd(featureIndex))
-                totalGradientArray(index) += regParamL2 * temp
+                val gradValue = totalGradientMatrix(classIndex, featureIndex)
+                totalGradientMatrix.update(classIndex, featureIndex, gradValue + regParamL2 * temp)
                 value * temp
               } else {
                 0.0
@@ -1713,6 +1704,6 @@ private class LogisticCostFun(
     }
     bcCoeffs.destroy(blocking = false)
 
-    (logisticAggregator.loss + regVal, new BDV(totalGradientArray))
+    (logisticAggregator.loss + regVal, new BDV(totalGradientMatrix.toArray))
   }
 }

From b0b2f10817f38d9cebd2e436a07d4dd3e41e9328 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Sat, 19 Nov 2016 21:50:20 -0800
Subject: [PATCH 0155/1204] [SPARK-18458][CORE] Fix signed integer overflow
 problem at an expression in RadixSort.java

## What changes were proposed in this pull request?

This PR avoids that a result of an expression is negative due to signed integer overflow (e.g. 0x10?????? * 8 < 0). This PR casts each operand to `long` before executing a calculation. Since the result is interpreted as long, the result of the expression is positive.

## How was this patch tested?

Manually executed query82 of TPC-DS with 100TB

Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com>

Closes #15907 from kiszk/SPARK-18458.

(cherry picked from commit d93b6552473468df297a08c0bef9ea0bf0f5c13a)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../collection/unsafe/sort/RadixSort.java     | 48 ++++++++++---------
 .../unsafe/sort/UnsafeInMemorySorter.java     |  2 +-
 .../unsafe/sort/RadixSortSuite.scala          | 28 +++++------
 3 files changed, 40 insertions(+), 38 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/RadixSort.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/RadixSort.java
index 404361734a55b..3dd318471008b 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/RadixSort.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/RadixSort.java
@@ -17,6 +17,8 @@
 
 package org.apache.spark.util.collection.unsafe.sort;
 
+import com.google.common.primitives.Ints;
+
 import org.apache.spark.unsafe.Platform;
 import org.apache.spark.unsafe.array.LongArray;
 
@@ -40,14 +42,14 @@ public class RadixSort {
    *         of always copying the data back to position zero for efficiency.
    */
   public static int sort(
-      LongArray array, int numRecords, int startByteIndex, int endByteIndex,
+      LongArray array, long numRecords, int startByteIndex, int endByteIndex,
       boolean desc, boolean signed) {
     assert startByteIndex >= 0 : "startByteIndex (" + startByteIndex + ") should >= 0";
     assert endByteIndex <= 7 : "endByteIndex (" + endByteIndex + ") should <= 7";
     assert endByteIndex > startByteIndex;
     assert numRecords * 2 <= array.size();
-    int inIndex = 0;
-    int outIndex = numRecords;
+    long inIndex = 0;
+    long outIndex = numRecords;
     if (numRecords > 0) {
       long[][] counts = getCounts(array, numRecords, startByteIndex, endByteIndex);
       for (int i = startByteIndex; i <= endByteIndex; i++) {
@@ -55,13 +57,13 @@ public static int sort(
           sortAtByte(
             array, numRecords, counts[i], i, inIndex, outIndex,
             desc, signed && i == endByteIndex);
-          int tmp = inIndex;
+          long tmp = inIndex;
           inIndex = outIndex;
           outIndex = tmp;
         }
       }
     }
-    return inIndex;
+    return Ints.checkedCast(inIndex);
   }
 
   /**
@@ -78,14 +80,14 @@ public static int sort(
    * @param signed whether this is a signed (two's complement) sort (only applies to last byte).
    */
   private static void sortAtByte(
-      LongArray array, int numRecords, long[] counts, int byteIdx, int inIndex, int outIndex,
+      LongArray array, long numRecords, long[] counts, int byteIdx, long inIndex, long outIndex,
       boolean desc, boolean signed) {
     assert counts.length == 256;
     long[] offsets = transformCountsToOffsets(
-      counts, numRecords, array.getBaseOffset() + outIndex * 8, 8, desc, signed);
+      counts, numRecords, array.getBaseOffset() + outIndex * 8L, 8, desc, signed);
     Object baseObject = array.getBaseObject();
-    long baseOffset = array.getBaseOffset() + inIndex * 8;
-    long maxOffset = baseOffset + numRecords * 8;
+    long baseOffset = array.getBaseOffset() + inIndex * 8L;
+    long maxOffset = baseOffset + numRecords * 8L;
     for (long offset = baseOffset; offset < maxOffset; offset += 8) {
       long value = Platform.getLong(baseObject, offset);
       int bucket = (int)((value >>> (byteIdx * 8)) & 0xff);
@@ -106,13 +108,13 @@ private static void sortAtByte(
    *         significant byte. If the byte does not need sorting the array will be null.
    */
   private static long[][] getCounts(
-      LongArray array, int numRecords, int startByteIndex, int endByteIndex) {
+      LongArray array, long numRecords, int startByteIndex, int endByteIndex) {
     long[][] counts = new long[8][];
     // Optimization: do a fast pre-pass to determine which byte indices we can skip for sorting.
     // If all the byte values at a particular index are the same we don't need to count it.
     long bitwiseMax = 0;
     long bitwiseMin = -1L;
-    long maxOffset = array.getBaseOffset() + numRecords * 8;
+    long maxOffset = array.getBaseOffset() + numRecords * 8L;
     Object baseObject = array.getBaseObject();
     for (long offset = array.getBaseOffset(); offset < maxOffset; offset += 8) {
       long value = Platform.getLong(baseObject, offset);
@@ -146,18 +148,18 @@ private static long[][] getCounts(
    * @return the input counts array.
    */
   private static long[] transformCountsToOffsets(
-      long[] counts, int numRecords, long outputOffset, int bytesPerRecord,
+      long[] counts, long numRecords, long outputOffset, long bytesPerRecord,
       boolean desc, boolean signed) {
     assert counts.length == 256;
     int start = signed ? 128 : 0;  // output the negative records first (values 129-255).
     if (desc) {
-      int pos = numRecords;
+      long pos = numRecords;
       for (int i = start; i < start + 256; i++) {
         pos -= counts[i & 0xff];
         counts[i & 0xff] = outputOffset + pos * bytesPerRecord;
       }
     } else {
-      int pos = 0;
+      long pos = 0;
       for (int i = start; i < start + 256; i++) {
         long tmp = counts[i & 0xff];
         counts[i & 0xff] = outputOffset + pos * bytesPerRecord;
@@ -176,8 +178,8 @@ private static long[] transformCountsToOffsets(
    */
   public static int sortKeyPrefixArray(
       LongArray array,
-      int startIndex,
-      int numRecords,
+      long startIndex,
+      long numRecords,
       int startByteIndex,
       int endByteIndex,
       boolean desc,
@@ -186,8 +188,8 @@ public static int sortKeyPrefixArray(
     assert endByteIndex <= 7 : "endByteIndex (" + endByteIndex + ") should <= 7";
     assert endByteIndex > startByteIndex;
     assert numRecords * 4 <= array.size();
-    int inIndex = startIndex;
-    int outIndex = startIndex + numRecords * 2;
+    long inIndex = startIndex;
+    long outIndex = startIndex + numRecords * 2L;
     if (numRecords > 0) {
       long[][] counts = getKeyPrefixArrayCounts(
         array, startIndex, numRecords, startByteIndex, endByteIndex);
@@ -196,13 +198,13 @@ public static int sortKeyPrefixArray(
           sortKeyPrefixArrayAtByte(
             array, numRecords, counts[i], i, inIndex, outIndex,
             desc, signed && i == endByteIndex);
-          int tmp = inIndex;
+          long tmp = inIndex;
           inIndex = outIndex;
           outIndex = tmp;
         }
       }
     }
-    return inIndex;
+    return Ints.checkedCast(inIndex);
   }
 
   /**
@@ -210,7 +212,7 @@ public static int sortKeyPrefixArray(
    * getCounts with some added parameters but that seems to hurt in benchmarks.
    */
   private static long[][] getKeyPrefixArrayCounts(
-      LongArray array, int startIndex, int numRecords, int startByteIndex, int endByteIndex) {
+      LongArray array, long startIndex, long numRecords, int startByteIndex, int endByteIndex) {
     long[][] counts = new long[8][];
     long bitwiseMax = 0;
     long bitwiseMin = -1L;
@@ -238,11 +240,11 @@ private static long[][] getKeyPrefixArrayCounts(
    * Specialization of sortAtByte() for key-prefix arrays.
    */
   private static void sortKeyPrefixArrayAtByte(
-      LongArray array, int numRecords, long[] counts, int byteIdx, int inIndex, int outIndex,
+      LongArray array, long numRecords, long[] counts, int byteIdx, long inIndex, long outIndex,
       boolean desc, boolean signed) {
     assert counts.length == 256;
     long[] offsets = transformCountsToOffsets(
-      counts, numRecords, array.getBaseOffset() + outIndex * 8, 16, desc, signed);
+      counts, numRecords, array.getBaseOffset() + outIndex * 8L, 16, desc, signed);
     Object baseObject = array.getBaseObject();
     long baseOffset = array.getBaseOffset() + inIndex * 8L;
     long maxOffset = baseOffset + numRecords * 16L;
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
index 2a71e68adafad..252a35ec6bdf5 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
@@ -322,7 +322,7 @@ public UnsafeSorterIterator getSortedIterator() {
     if (sortComparator != null) {
       if (this.radixSortSupport != null) {
         offset = RadixSort.sortKeyPrefixArray(
-          array, nullBoundaryPos, (pos - nullBoundaryPos) / 2, 0, 7,
+          array, nullBoundaryPos, (pos - nullBoundaryPos) / 2L, 0, 7,
           radixSortSupport.sortDescending(), radixSortSupport.sortSigned());
       } else {
         MemoryBlock unused = new MemoryBlock(
diff --git a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala
index 366ffda7788d3..d5956ea32096a 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala
@@ -22,6 +22,8 @@ import java.util.{Arrays, Comparator}
 
 import scala.util.Random
 
+import com.google.common.primitives.Ints
+
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.internal.Logging
 import org.apache.spark.unsafe.array.LongArray
@@ -30,7 +32,7 @@ import org.apache.spark.util.collection.Sorter
 import org.apache.spark.util.random.XORShiftRandom
 
 class RadixSortSuite extends SparkFunSuite with Logging {
-  private val N = 10000  // scale this down for more readable results
+  private val N = 10000L  // scale this down for more readable results
 
   /**
    * Describes a type of sort to test, e.g. two's complement descending. Each sort type has
@@ -73,22 +75,22 @@ class RadixSortSuite extends SparkFunSuite with Logging {
       },
       2, 4, false, false, true))
 
-  private def generateTestData(size: Int, rand: => Long): (Array[JLong], LongArray) = {
-    val ref = Array.tabulate[Long](size) { i => rand }
-    val extended = ref ++ Array.fill[Long](size)(0)
+  private def generateTestData(size: Long, rand: => Long): (Array[JLong], LongArray) = {
+    val ref = Array.tabulate[Long](Ints.checkedCast(size)) { i => rand }
+    val extended = ref ++ Array.fill[Long](Ints.checkedCast(size))(0)
     (ref.map(i => new JLong(i)), new LongArray(MemoryBlock.fromLongArray(extended)))
   }
 
-  private def generateKeyPrefixTestData(size: Int, rand: => Long): (LongArray, LongArray) = {
-    val ref = Array.tabulate[Long](size * 2) { i => rand }
-    val extended = ref ++ Array.fill[Long](size * 2)(0)
+  private def generateKeyPrefixTestData(size: Long, rand: => Long): (LongArray, LongArray) = {
+    val ref = Array.tabulate[Long](Ints.checkedCast(size * 2)) { i => rand }
+    val extended = ref ++ Array.fill[Long](Ints.checkedCast(size * 2))(0)
     (new LongArray(MemoryBlock.fromLongArray(ref)),
      new LongArray(MemoryBlock.fromLongArray(extended)))
   }
 
-  private def collectToArray(array: LongArray, offset: Int, length: Int): Array[Long] = {
+  private def collectToArray(array: LongArray, offset: Int, length: Long): Array[Long] = {
     var i = 0
-    val out = new Array[Long](length)
+    val out = new Array[Long](Ints.checkedCast(length))
     while (i < length) {
       out(i) = array.get(offset + i)
       i += 1
@@ -107,15 +109,13 @@ class RadixSortSuite extends SparkFunSuite with Logging {
     }
   }
 
-  private def referenceKeyPrefixSort(buf: LongArray, lo: Int, hi: Int, refCmp: PrefixComparator) {
+  private def referenceKeyPrefixSort(buf: LongArray, lo: Long, hi: Long, refCmp: PrefixComparator) {
     val sortBuffer = new LongArray(MemoryBlock.fromLongArray(new Array[Long](buf.size().toInt)))
     new Sorter(new UnsafeSortDataFormat(sortBuffer)).sort(
-      buf, lo, hi, new Comparator[RecordPointerAndKeyPrefix] {
+      buf, Ints.checkedCast(lo), Ints.checkedCast(hi), new Comparator[RecordPointerAndKeyPrefix] {
         override def compare(
             r1: RecordPointerAndKeyPrefix,
-            r2: RecordPointerAndKeyPrefix): Int = {
-          refCmp.compare(r1.keyPrefix, r2.keyPrefix)
-        }
+            r2: RecordPointerAndKeyPrefix): Int = refCmp.compare(r1.keyPrefix, r2.keyPrefix)
       })
   }
 

From 94a9eed11a11510a91dc4c8adb793dc3cbdef8f5 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Sat, 19 Nov 2016 21:57:09 -0800
Subject: [PATCH 0156/1204] [SPARK-18508][SQL] Fix documentation error for
 DateDiff

## What changes were proposed in this pull request?
The previous documentation and example for DateDiff was wrong.

## How was this patch tested?
Doc only change.

Author: Reynold Xin <rxin@databricks.com>

Closes #15937 from rxin/datediff-doc.

(cherry picked from commit bce9a03677f931d52491e7768aba9e4a19a7e696)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../sql/catalyst/expressions/datetimeExpressions.scala     | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 9cec6be841de0..1db1d1995d942 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -1101,11 +1101,14 @@ case class TruncDate(date: Expression, format: Expression)
  * Returns the number of days from startDate to endDate.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(date1, date2) - Returns the number of days between `date1` and `date2`.",
+  usage = "_FUNC_(endDate, startDate) - Returns the number of days from `startDate` to `endDate`.",
   extended = """
     Examples:
-      > SELECT _FUNC_('2009-07-30', '2009-07-31');
+      > SELECT _FUNC_('2009-07-31', '2009-07-30');
        1
+
+      > SELECT _FUNC_('2009-07-30', '2009-07-31');
+       -1
   """)
 case class DateDiff(endDate: Expression, startDate: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {

From 063da0c8d4e82a47cf7841578dcf968080c3d89d Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Sat, 19 Nov 2016 21:57:49 -0800
Subject: [PATCH 0157/1204] [SQL] Fix documentation for Concat and ConcatWs

(cherry picked from commit a64f25d8b403b17ff68c9575f6f35b22e5b62427)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../sql/catalyst/expressions/stringExpressions.scala   | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index e74ef9a08750e..908aa44f81c97 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -40,15 +40,13 @@ import org.apache.spark.unsafe.types.{ByteArray, UTF8String}
  * An expression that concatenates multiple input strings into a single string.
  * If any input is null, concat returns null.
  */
-// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(str1, str2, ..., strN) - Returns the concatenation of `str1`, `str2`, ..., `strN`.",
+  usage = "_FUNC_(str1, str2, ..., strN) - Returns the concatenation of str1, str2, ..., strN.",
   extended = """
     Examples:
-      > SELECT _FUNC_('Spark','SQL');
+      > SELECT _FUNC_('Spark', 'SQL');
        SparkSQL
   """)
-// scalastyle:on line.size.limit
 case class Concat(children: Seq[Expression]) extends Expression with ImplicitCastInputTypes {
 
   override def inputTypes: Seq[AbstractDataType] = Seq.fill(children.size)(StringType)
@@ -89,8 +87,8 @@ case class Concat(children: Seq[Expression]) extends Expression with ImplicitCas
   usage = "_FUNC_(sep, [str | array(str)]+) - Returns the concatenation of the strings separated by `sep`.",
   extended = """
     Examples:
-      > SELECT _FUNC_(' ', Spark', 'SQL');
-       Spark SQL
+      > SELECT _FUNC_(' ', 'Spark', 'SQL');
+        Spark SQL
   """)
 // scalastyle:on line.size.limit
 case class ConcatWs(children: Seq[Expression])

From bc3e7b3b8a0dfc00d22bf5ee168f308a6ef5d78b Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Sun, 20 Nov 2016 09:52:03 +0000
Subject: [PATCH 0158/1204] [SPARK-3359][BUILD][DOCS] Print examples and
 disable group and tparam tags in javadoc

## What changes were proposed in this pull request?

This PR proposes/fixes two things.

- Remove many errors to generate javadoc with Java8 from unrecognisable tags, `tparam` and `group`.

  ```
  [error] .../spark/mllib/target/java/org/apache/spark/ml/classification/Classifier.java:18: error: unknown tag: group
  [error]   /** group setParam */
  [error]       ^
  [error] .../spark/mllib/target/java/org/apache/spark/ml/classification/Classifier.java:8: error: unknown tag: tparam
  [error]  * tparam FeaturesType  Type of input features.  E.g., <code>Vector</code>
  [error]    ^
  ...
  ```

  It does not fully resolve the problem but remove many errors. It seems both `group` and `tparam` are unrecognisable in javadoc. It seems we can't print them pretty in javadoc in a way of `example` here because they appear differently (both examples can be found in http://spark.apache.org/docs/2.0.2/api/scala/index.html#org.apache.spark.ml.classification.Classifier).

- Print `example` in javadoc.
  Currently, there are few `example` tag in several places.

  ```
  ./graphx/src/main/scala/org/apache/spark/graphx/Graph.scala:   * example This operation might be used to evaluate a graph
  ./graphx/src/main/scala/org/apache/spark/graphx/Graph.scala:   * example We might use this operation to change the vertex values
  ./graphx/src/main/scala/org/apache/spark/graphx/Graph.scala:   * example This function might be used to initialize edge
  ./graphx/src/main/scala/org/apache/spark/graphx/Graph.scala:   * example This function might be used to initialize edge
  ./graphx/src/main/scala/org/apache/spark/graphx/Graph.scala:   * example This function might be used to initialize edge
  ./graphx/src/main/scala/org/apache/spark/graphx/Graph.scala:   * example We can use this function to compute the in-degree of each
  ./graphx/src/main/scala/org/apache/spark/graphx/Graph.scala:   * example This function is used to update the vertices with new values based on external data.
  ./graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala:   * example Loads a file in the following format:
  ./graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala:   * example This function is used to update the vertices with new
  ./graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala:   * example This function can be used to filter the graph based on some property, without
  ./graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala: * example We can use the Pregel abstraction to implement PageRank:
  ./graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala: * example Construct a `VertexRDD` from a plain RDD:
  ./repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkCommandLine.scala: * example new SparkCommandLine(Nil).settings
  ./repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkIMain.scala:   * example addImports("org.apache.spark.SparkContext")
  ./sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala: * example {{{
  ```

**Before**

  <img width="505" alt="2016-11-20 2 43 23" src="https://cloud.githubusercontent.com/assets/6477701/20457285/26f07e1c-aecb-11e6-9ae9-d9dee66845f4.png">

**After**
  <img width="499" alt="2016-11-20 1 27 17" src="https://cloud.githubusercontent.com/assets/6477701/20457240/409124e4-aeca-11e6-9a91-0ba514148b52.png">

## How was this patch tested?

Maunally tested by `jekyll build` with Java 7 and 8

```
java version "1.7.0_80"
Java(TM) SE Runtime Environment (build 1.7.0_80-b15)
Java HotSpot(TM) 64-Bit Server VM (build 24.80-b11, mixed mode)
```

```
java version "1.8.0_45"
Java(TM) SE Runtime Environment (build 1.8.0_45-b14)
Java HotSpot(TM) 64-Bit Server VM (build 25.45-b02, mixed mode)
```

Note: this does not make sbt unidoc suceed with Java 8 yet but it reduces the number of errors with Java 8.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #15939 from HyukjinKwon/SPARK-3359-javadoc.

(cherry picked from commit c528812ce770fd8a6626e7f9d2f8ca9d1e84642b)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 pom.xml                  | 13 +++++++++++++
 project/SparkBuild.scala |  5 ++++-
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 024b2850d0a3d..7c0b0b59dc62b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2477,11 +2477,24 @@
             <configuration>
               <additionalparam>-Xdoclint:all -Xdoclint:-missing</additionalparam>
               <tags>
+                <tag>
+                  <name>example</name>
+                  <placement>a</placement>
+                  <head>Example:</head>
+                </tag>
                 <tag>
                   <name>note</name>
                   <placement>a</placement>
                   <head>Note:</head>
                 </tag>
+                <tag>
+                  <name>group</name>
+                  <placement>X</placement>
+                </tag>
+                <tag>
+                  <name>tparam</name>
+                  <placement>X</placement>
+                </tag>
               </tags>
             </configuration>
           </plugin>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 92b45657210e1..429a163d22a6d 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -742,7 +742,10 @@ object Unidoc {
       "-windowtitle", "Spark " + version.value.replaceAll("-SNAPSHOT", "") + " JavaDoc",
       "-public",
       "-noqualifier", "java.lang",
-      "-tag", """note:a:Note\:"""
+      "-tag", """example:a:Example\:""",
+      "-tag", """note:a:Note\:""",
+      "-tag", "group:X",
+      "-tag", "tparam:X"
     ),
 
     // Use GitHub repository for Scaladoc source links

From cffaf5035816fa6ffc4dadd47bede1eff6371fee Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@databricks.com>
Date: Sun, 20 Nov 2016 12:46:29 -0800
Subject: [PATCH 0159/1204] [SPARK-17732][SQL] Revert ALTER TABLE DROP
 PARTITION should support comparators

This reverts commit 1126c3194ee1c79015cf1d3808bc963aa93dcadf.

Author: Herman van Hovell <hvanhovell@databricks.com>

Closes #15948 from hvanhovell/SPARK-17732.
---
 .../spark/sql/catalyst/parser/SqlBase.g4      |   6 +-
 .../sql/catalyst/parser/AstBuilder.scala      |  30 +----
 .../spark/sql/execution/SparkSqlParser.scala  |   2 +-
 .../spark/sql/execution/command/ddl.scala     |  51 ++-------
 .../datasources/DataSourceStrategy.scala      |   8 +-
 .../execution/command/DDLCommandSuite.scala   |   9 +-
 .../sql/hive/execution/HiveDDLSuite.scala     | 103 ------------------
 7 files changed, 24 insertions(+), 185 deletions(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index fcca11c69f0a3..b599a884957a8 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -239,7 +239,11 @@ partitionSpecLocation
     ;
 
 partitionSpec
-    : PARTITION '(' expression (',' expression)* ')'
+    : PARTITION '(' partitionVal (',' partitionVal)* ')'
+    ;
+
+partitionVal
+    : identifier (EQ constant)?
     ;
 
 describeFuncName
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 97056bba9d763..2006844923cf7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -194,15 +194,10 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
    */
   override def visitPartitionSpec(
       ctx: PartitionSpecContext): Map[String, Option[String]] = withOrigin(ctx) {
-    val parts = ctx.expression.asScala.map { pVal =>
-      expression(pVal) match {
-        case UnresolvedAttribute(name :: Nil) =>
-          name -> None
-        case cmp @ EqualTo(UnresolvedAttribute(name :: Nil), constant: Literal) =>
-          name -> Option(constant.toString)
-        case _ =>
-          throw new ParseException("Invalid partition filter specification", ctx)
-      }
+    val parts = ctx.partitionVal.asScala.map { pVal =>
+      val name = pVal.identifier.getText
+      val value = Option(pVal.constant).map(visitStringConstant)
+      name -> value
     }
     // Before calling `toMap`, we check duplicated keys to avoid silently ignore partition values
     // in partition spec like PARTITION(a='1', b='2', a='3'). The real semantical check for
@@ -211,23 +206,6 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
     parts.toMap
   }
 
-  /**
-   * Create a partition filter specification.
-   */
-  def visitPartitionFilterSpec(ctx: PartitionSpecContext): Expression = withOrigin(ctx) {
-    val parts = ctx.expression.asScala.map { pVal =>
-      expression(pVal) match {
-        case EqualNullSafe(_, _) =>
-          throw new ParseException("'<=>' operator is not allowed in partition specification.", ctx)
-        case cmp @ BinaryComparison(UnresolvedAttribute(name :: Nil), constant: Literal) =>
-          cmp.withNewChildren(Seq(AttributeReference(name, StringType)(), constant))
-        case _ =>
-          throw new ParseException("Invalid partition filter specification", ctx)
-      }
-    }
-    parts.reduceLeft(And)
-  }
-
   /**
    * Create a partition specification map without optional values.
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 112d812cb6c76..b8be3d17ba444 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -813,7 +813,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
     }
     AlterTableDropPartitionCommand(
       visitTableIdentifier(ctx.tableIdentifier),
-      ctx.partitionSpec.asScala.map(visitPartitionFilterSpec),
+      ctx.partitionSpec.asScala.map(visitNonOptionalPartitionSpec),
       ctx.EXISTS != null,
       ctx.PURGE != null)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 588aa05c37b49..570a9967871e9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -31,8 +31,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.Resolver
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, BinaryComparison}
-import org.apache.spark.sql.catalyst.expressions.{EqualTo, Expression, PredicateHelper}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.execution.datasources.PartitioningUtils
 import org.apache.spark.sql.types._
 import org.apache.spark.util.SerializableConfiguration
@@ -419,55 +418,27 @@ case class AlterTableRenamePartitionCommand(
  */
 case class AlterTableDropPartitionCommand(
     tableName: TableIdentifier,
-    specs: Seq[Expression],
+    specs: Seq[TablePartitionSpec],
     ifExists: Boolean,
     purge: Boolean)
-  extends RunnableCommand with PredicateHelper {
-
-  private def isRangeComparison(expr: Expression): Boolean = {
-    expr.find(e => e.isInstanceOf[BinaryComparison] && !e.isInstanceOf[EqualTo]).isDefined
-  }
+  extends RunnableCommand {
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
     val catalog = sparkSession.sessionState.catalog
     val table = catalog.getTableMetadata(tableName)
-    val resolver = sparkSession.sessionState.conf.resolver
     DDLUtils.verifyAlterTableType(catalog, table, isView = false)
     DDLUtils.verifyPartitionProviderIsHive(sparkSession, table, "ALTER TABLE DROP PARTITION")
 
-    specs.foreach { expr =>
-      expr.references.foreach { attr =>
-        if (!table.partitionColumnNames.exists(resolver(_, attr.name))) {
-          throw new AnalysisException(s"${attr.name} is not a valid partition column " +
-            s"in table ${table.identifier.quotedString}.")
-        }
-      }
+    val normalizedSpecs = specs.map { spec =>
+      PartitioningUtils.normalizePartitionSpec(
+        spec,
+        table.partitionColumnNames,
+        table.identifier.quotedString,
+        sparkSession.sessionState.conf.resolver)
     }
 
-    if (specs.exists(isRangeComparison)) {
-      val partitionSet = specs.flatMap { spec =>
-        val partitions = catalog.listPartitionsByFilter(table.identifier, Seq(spec)).map(_.spec)
-        if (partitions.isEmpty && !ifExists) {
-          throw new AnalysisException(s"There is no partition for ${spec.sql}")
-        }
-        partitions
-      }.distinct
-      catalog.dropPartitions(
-        table.identifier, partitionSet, ignoreIfNotExists = ifExists, purge = purge)
-    } else {
-      val normalizedSpecs = specs.map { expr =>
-        val spec = splitConjunctivePredicates(expr).map {
-          case BinaryComparison(AttributeReference(name, _, _, _), right) => name -> right.toString
-        }.toMap
-        PartitioningUtils.normalizePartitionSpec(
-          spec,
-          table.partitionColumnNames,
-          table.identifier.quotedString,
-          resolver)
-      }
-      catalog.dropPartitions(
-        table.identifier, normalizedSpecs, ignoreIfNotExists = ifExists, purge = purge)
-    }
+    catalog.dropPartitions(
+      table.identifier, normalizedSpecs, ignoreIfNotExists = ifExists, purge = purge)
     Seq.empty[Row]
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index e81512d1abf84..4f19a2d00b0e4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -215,14 +215,8 @@ case class DataSourceAnalysis(conf: CatalystConf) extends Rule[LogicalPlan] {
           if (overwrite.enabled) {
             val deletedPartitions = initialMatchingPartitions.toSet -- updatedPartitions
             if (deletedPartitions.nonEmpty) {
-              import org.apache.spark.sql.catalyst.expressions._
-              val expressions = deletedPartitions.map { specs =>
-                specs.map { case (key, value) =>
-                  EqualTo(AttributeReference(key, StringType)(), Literal.create(value, StringType))
-                }.reduceLeft(And)
-              }.toSeq
               AlterTableDropPartitionCommand(
-                l.catalogTable.get.identifier, expressions,
+                l.catalogTable.get.identifier, deletedPartitions.toSeq,
                 ifExists = true, purge = true).run(t.sparkSession)
             }
           }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
index 057528bef5084..d31e7aeb3a78a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
@@ -21,7 +21,6 @@ import scala.reflect.{classTag, ClassTag}
 
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog._
-import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, EqualTo, Literal}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.Project
@@ -613,12 +612,8 @@ class DDLCommandSuite extends PlanTest {
     val expected1_table = AlterTableDropPartitionCommand(
       tableIdent,
       Seq(
-        And(
-          EqualTo(AttributeReference("dt", StringType)(), Literal.create("2008-08-08", StringType)),
-          EqualTo(AttributeReference("country", StringType)(), Literal.create("us", StringType))),
-        And(
-          EqualTo(AttributeReference("dt", StringType)(), Literal.create("2009-09-09", StringType)),
-          EqualTo(AttributeReference("country", StringType)(), Literal.create("uk", StringType)))),
+        Map("dt" -> "2008-08-08", "country" -> "us"),
+        Map("dt" -> "2009-09-09", "country" -> "uk")),
       ifExists = true,
       purge = false)
     val expected2_table = expected1_table.copy(ifExists = false)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 15e3927b755af..951e0704148b3 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -26,7 +26,6 @@ import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode}
 import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogTable, CatalogTableType}
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.hive.HiveExternalCatalog
 import org.apache.spark.sql.hive.test.TestHiveSingleton
@@ -226,108 +225,6 @@ class HiveDDLSuite
     }
   }
 
-  test("SPARK-17732: Drop partitions by filter") {
-    withTable("sales") {
-      sql("CREATE TABLE sales(id INT) PARTITIONED BY (country STRING, quarter STRING)")
-
-      for (country <- Seq("US", "CA", "KR")) {
-        for (quarter <- 1 to 4) {
-          sql(s"ALTER TABLE sales ADD PARTITION (country = '$country', quarter = '$quarter')")
-        }
-      }
-
-      sql("ALTER TABLE sales DROP PARTITION (country < 'KR', quarter > '2')")
-      checkAnswer(sql("SHOW PARTITIONS sales"),
-        Row("country=CA/quarter=1") ::
-        Row("country=CA/quarter=2") ::
-        Row("country=KR/quarter=1") ::
-        Row("country=KR/quarter=2") ::
-        Row("country=KR/quarter=3") ::
-        Row("country=KR/quarter=4") ::
-        Row("country=US/quarter=1") ::
-        Row("country=US/quarter=2") ::
-        Row("country=US/quarter=3") ::
-        Row("country=US/quarter=4") :: Nil)
-
-      sql("ALTER TABLE sales DROP PARTITION (country < 'KR'), PARTITION (quarter <= '1')")
-      checkAnswer(sql("SHOW PARTITIONS sales"),
-        Row("country=KR/quarter=2") ::
-        Row("country=KR/quarter=3") ::
-        Row("country=KR/quarter=4") ::
-        Row("country=US/quarter=2") ::
-        Row("country=US/quarter=3") ::
-        Row("country=US/quarter=4") :: Nil)
-
-      sql("ALTER TABLE sales DROP PARTITION (country='KR', quarter='4')")
-      sql("ALTER TABLE sales DROP PARTITION (country='US', quarter='3')")
-      checkAnswer(sql("SHOW PARTITIONS sales"),
-        Row("country=KR/quarter=2") ::
-        Row("country=KR/quarter=3") ::
-        Row("country=US/quarter=2") ::
-        Row("country=US/quarter=4") :: Nil)
-
-      sql("ALTER TABLE sales DROP PARTITION (quarter <= 2), PARTITION (quarter >= '4')")
-      checkAnswer(sql("SHOW PARTITIONS sales"),
-        Row("country=KR/quarter=3") :: Nil)
-
-      // According to the declarative partition spec definitions, this drops the union of target
-      // partitions without exceptions. Hive raises exceptions because it handles them sequentially.
-      sql("ALTER TABLE sales DROP PARTITION (quarter <= 4), PARTITION (quarter <= '3')")
-      checkAnswer(sql("SHOW PARTITIONS sales"), Nil)
-    }
-  }
-
-  test("SPARK-17732: Error handling for drop partitions by filter") {
-    withTable("sales") {
-      sql("CREATE TABLE sales(id INT) PARTITIONED BY (country STRING, quarter STRING)")
-
-      val m = intercept[AnalysisException] {
-        sql("ALTER TABLE sales DROP PARTITION (unknown = 'KR')")
-      }.getMessage
-      assert(m.contains("unknown is not a valid partition column in table"))
-
-      val m2 = intercept[AnalysisException] {
-        sql("ALTER TABLE sales DROP PARTITION (unknown < 'KR')")
-      }.getMessage
-      assert(m2.contains("unknown is not a valid partition column in table"))
-
-      val m3 = intercept[AnalysisException] {
-        sql("ALTER TABLE sales DROP PARTITION (unknown <=> 'KR')")
-      }.getMessage
-      assert(m3.contains("'<=>' operator is not allowed in partition specification"))
-
-      val m4 = intercept[ParseException] {
-        sql("ALTER TABLE sales DROP PARTITION (unknown <=> upper('KR'))")
-      }.getMessage
-      assert(m4.contains("'<=>' operator is not allowed in partition specification"))
-
-      val m5 = intercept[ParseException] {
-        sql("ALTER TABLE sales DROP PARTITION (country < 'KR', quarter)")
-      }.getMessage
-      assert(m5.contains("Invalid partition filter specification"))
-
-      sql(s"ALTER TABLE sales ADD PARTITION (country = 'KR', quarter = '3')")
-      val m6 = intercept[AnalysisException] {
-        sql("ALTER TABLE sales DROP PARTITION (quarter <= '4'), PARTITION (quarter <= '2')")
-      }.getMessage
-      // The query is not executed because `PARTITION (quarter <= '2')` is invalid.
-      checkAnswer(sql("SHOW PARTITIONS sales"),
-        Row("country=KR/quarter=3") :: Nil)
-      assert(m6.contains("There is no partition for (`quarter` <= '2')"))
-    }
-  }
-
-  test("SPARK-17732: Partition filter is not allowed in ADD PARTITION") {
-    withTable("sales") {
-      sql("CREATE TABLE sales(id INT) PARTITIONED BY (country STRING, quarter STRING)")
-
-      val m = intercept[ParseException] {
-        sql("ALTER TABLE sales ADD PARTITION (country = 'US', quarter < '1')")
-      }.getMessage()
-      assert(m.contains("Invalid partition filter specification"))
-    }
-  }
-
   test("drop views") {
     withTable("tab1") {
       val tabName = "tab1"

From f8662db72815b9c89f2448511d117e6d224e0b11 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Sun, 20 Nov 2016 20:00:59 -0800
Subject: [PATCH 0160/1204] [HOTFIX][SQL] Fix DDLSuite failure.

(cherry picked from commit b625a36ebc59cbacc223fc03005bc0f6d296b6e7)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../org/apache/spark/sql/execution/command/DDLSuite.scala     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index a01073987423e..02d9d15684904 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -1426,8 +1426,8 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
       sql("DESCRIBE FUNCTION 'concat'"),
       Row("Class: org.apache.spark.sql.catalyst.expressions.Concat") ::
         Row("Function: concat") ::
-        Row("Usage: concat(str1, str2, ..., strN) " +
-          "- Returns the concatenation of `str1`, `str2`, ..., `strN`.") :: Nil
+        Row("Usage: concat(str1, str2, ..., strN) - " +
+            "Returns the concatenation of str1, str2, ..., strN.") :: Nil
     )
     // extended mode
     checkAnswer(

From fb4e6359d1fdb9e4f05fcfa03839024e8b91b47a Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@happy-camper.st>
Date: Mon, 21 Nov 2016 12:05:01 +0800
Subject: [PATCH 0161/1204] [SPARK-18467][SQL] Extracts method for preparing
 arguments from StaticInvoke, Invoke and NewInstance and modify to short
 circuit if arguments have null when `needNullCheck == true`.

## What changes were proposed in this pull request?

This pr extracts method for preparing arguments from `StaticInvoke`, `Invoke` and `NewInstance` and modify to short circuit if arguments have `null` when `propageteNull == true`.

The steps are as follows:

1. Introduce `InvokeLike` to extract common logic from `StaticInvoke`, `Invoke` and `NewInstance` to prepare arguments.
`StaticInvoke` and `Invoke` had a risk to exceed 64kb JVM limit to prepare arguments but after this patch they can handle them because they share the preparing code of NewInstance, which handles the limit well.

2. Remove unneeded null checking and fix nullability of `NewInstance`.
Avoid some of nullabilty checking which are not needed because the expression is not nullable.

3. Modify to short circuit if arguments have `null` when `needNullCheck == true`.
If `needNullCheck == true`, preparing arguments can be skipped if we found one of them is `null`, so modified to short circuit in the case.

## How was this patch tested?

Existing tests.

Author: Takuya UESHIN <ueshin@happy-camper.st>

Closes #15901 from ueshin/issues/SPARK-18467.

(cherry picked from commit 658547974915ebcaae83e13e4c3bdf68d5426fda)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../expressions/objects/objects.scala         | 163 +++++++++++-------
 1 file changed, 101 insertions(+), 62 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 0e3d99127ed56..0b36091ece1bf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -32,6 +32,78 @@ import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCo
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData}
 import org.apache.spark.sql.types._
 
+/**
+ * Common base class for [[StaticInvoke]], [[Invoke]], and [[NewInstance]].
+ */
+trait InvokeLike extends Expression with NonSQLExpression {
+
+  def arguments: Seq[Expression]
+
+  def propagateNull: Boolean
+
+  protected lazy val needNullCheck: Boolean = propagateNull && arguments.exists(_.nullable)
+
+  /**
+   * Prepares codes for arguments.
+   *
+   * - generate codes for argument.
+   * - use ctx.splitExpressions() to not exceed 64kb JVM limit while preparing arguments.
+   * - avoid some of nullabilty checking which are not needed because the expression is not
+   *   nullable.
+   * - when needNullCheck == true, short circuit if we found one of arguments is null because
+   *   preparing rest of arguments can be skipped in the case.
+   *
+   * @param ctx a [[CodegenContext]]
+   * @return (code to prepare arguments, argument string, result of argument null check)
+   */
+  def prepareArguments(ctx: CodegenContext): (String, String, String) = {
+
+    val resultIsNull = if (needNullCheck) {
+      val resultIsNull = ctx.freshName("resultIsNull")
+      ctx.addMutableState("boolean", resultIsNull, "")
+      resultIsNull
+    } else {
+      "false"
+    }
+    val argValues = arguments.map { e =>
+      val argValue = ctx.freshName("argValue")
+      ctx.addMutableState(ctx.javaType(e.dataType), argValue, "")
+      argValue
+    }
+
+    val argCodes = if (needNullCheck) {
+      val reset = s"$resultIsNull = false;"
+      val argCodes = arguments.zipWithIndex.map { case (e, i) =>
+        val expr = e.genCode(ctx)
+        val updateResultIsNull = if (e.nullable) {
+          s"$resultIsNull = ${expr.isNull};"
+        } else {
+          ""
+        }
+        s"""
+          if (!$resultIsNull) {
+            ${expr.code}
+            $updateResultIsNull
+            ${argValues(i)} = ${expr.value};
+          }
+        """
+      }
+      reset +: argCodes
+    } else {
+      arguments.zipWithIndex.map { case (e, i) =>
+        val expr = e.genCode(ctx)
+        s"""
+          ${expr.code}
+          ${argValues(i)} = ${expr.value};
+        """
+      }
+    }
+    val argCode = ctx.splitExpressions(ctx.INPUT_ROW, argCodes)
+
+    (argCode, argValues.mkString(", "), resultIsNull)
+  }
+}
+
 /**
  * Invokes a static function, returning the result.  By default, any of the arguments being null
  * will result in returning null instead of calling the function.
@@ -50,7 +122,7 @@ case class StaticInvoke(
     dataType: DataType,
     functionName: String,
     arguments: Seq[Expression] = Nil,
-    propagateNull: Boolean = true) extends Expression with NonSQLExpression {
+    propagateNull: Boolean = true) extends InvokeLike {
 
   val objectName = staticObject.getName.stripSuffix("$")
 
@@ -62,16 +134,10 @@ case class StaticInvoke(
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val javaType = ctx.javaType(dataType)
-    val argGen = arguments.map(_.genCode(ctx))
-    val argString = argGen.map(_.value).mkString(", ")
 
-    val callFunc = s"$objectName.$functionName($argString)"
+    val (argCode, argString, resultIsNull) = prepareArguments(ctx)
 
-    val setIsNull = if (propagateNull && arguments.nonEmpty) {
-      s"boolean ${ev.isNull} = ${argGen.map(_.isNull).mkString(" || ")};"
-    } else {
-      s"boolean ${ev.isNull} = false;"
-    }
+    val callFunc = s"$objectName.$functionName($argString)"
 
     // If the function can return null, we do an extra check to make sure our null bit is still set
     // correctly.
@@ -82,9 +148,9 @@ case class StaticInvoke(
     }
 
     val code = s"""
-      ${argGen.map(_.code).mkString("\n")}
-      $setIsNull
-      final $javaType ${ev.value} = ${ev.isNull} ? ${ctx.defaultValue(dataType)} : $callFunc;
+      $argCode
+      boolean ${ev.isNull} = $resultIsNull;
+      final $javaType ${ev.value} = $resultIsNull ? ${ctx.defaultValue(dataType)} : $callFunc;
       $postNullCheck
      """
     ev.copy(code = code)
@@ -103,13 +169,15 @@ case class StaticInvoke(
  * @param functionName The name of the method to call.
  * @param dataType The expected return type of the function.
  * @param arguments An optional list of expressions, whos evaluation will be passed to the function.
+ * @param propagateNull When true, and any of the arguments is null, null will be returned instead
+ *                      of calling the function.
  */
 case class Invoke(
     targetObject: Expression,
     functionName: String,
     dataType: DataType,
     arguments: Seq[Expression] = Nil,
-    propagateNull: Boolean = true) extends Expression with NonSQLExpression {
+    propagateNull: Boolean = true) extends InvokeLike {
 
   override def nullable: Boolean = true
   override def children: Seq[Expression] = targetObject +: arguments
@@ -131,8 +199,8 @@ case class Invoke(
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val javaType = ctx.javaType(dataType)
     val obj = targetObject.genCode(ctx)
-    val argGen = arguments.map(_.genCode(ctx))
-    val argString = argGen.map(_.value).mkString(", ")
+
+    val (argCode, argString, resultIsNull) = prepareArguments(ctx)
 
     val returnPrimitive = method.isDefined && method.get.getReturnType.isPrimitive
     val needTryCatch = method.isDefined && method.get.getExceptionTypes.nonEmpty
@@ -164,12 +232,6 @@ case class Invoke(
       """
     }
 
-    val setIsNull = if (propagateNull && arguments.nonEmpty) {
-      s"boolean ${ev.isNull} = ${obj.isNull} || ${argGen.map(_.isNull).mkString(" || ")};"
-    } else {
-      s"boolean ${ev.isNull} = ${obj.isNull};"
-    }
-
     // If the function can return null, we do an extra check to make sure our null bit is still set
     // correctly.
     val postNullCheck = if (ctx.defaultValue(dataType) == "null") {
@@ -177,15 +239,19 @@ case class Invoke(
     } else {
       ""
     }
+
     val code = s"""
       ${obj.code}
-      ${argGen.map(_.code).mkString("\n")}
-      $setIsNull
+      boolean ${ev.isNull} = true;
       $javaType ${ev.value} = ${ctx.defaultValue(dataType)};
-      if (!${ev.isNull}) {
-        $evaluate
+      if (!${obj.isNull}) {
+        $argCode
+        ${ev.isNull} = $resultIsNull;
+        if (!${ev.isNull}) {
+          $evaluate
+        }
+        $postNullCheck
       }
-      $postNullCheck
      """
     ev.copy(code = code)
   }
@@ -223,10 +289,10 @@ case class NewInstance(
     arguments: Seq[Expression],
     propagateNull: Boolean,
     dataType: DataType,
-    outerPointer: Option[() => AnyRef]) extends Expression with NonSQLExpression {
+    outerPointer: Option[() => AnyRef]) extends InvokeLike {
   private val className = cls.getName
 
-  override def nullable: Boolean = propagateNull
+  override def nullable: Boolean = needNullCheck
 
   override def children: Seq[Expression] = arguments
 
@@ -245,52 +311,25 @@ case class NewInstance(
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val javaType = ctx.javaType(dataType)
-    val argIsNulls = ctx.freshName("argIsNulls")
-    ctx.addMutableState("boolean[]", argIsNulls,
-      s"$argIsNulls = new boolean[${arguments.size}];")
-    val argValues = arguments.zipWithIndex.map { case (e, i) =>
-      val argValue = ctx.freshName("argValue")
-      ctx.addMutableState(ctx.javaType(e.dataType), argValue, "")
-      argValue
-    }
 
-    val argCodes = arguments.zipWithIndex.map { case (e, i) =>
-      val expr = e.genCode(ctx)
-      expr.code + s"""
-       $argIsNulls[$i] = ${expr.isNull};
-       ${argValues(i)} = ${expr.value};
-     """
-    }
-    val argCode = ctx.splitExpressions(ctx.INPUT_ROW, argCodes)
+    val (argCode, argString, resultIsNull) = prepareArguments(ctx)
 
     val outer = outerPointer.map(func => Literal.fromObject(func()).genCode(ctx))
 
-    var isNull = ev.isNull
-    val setIsNull = if (propagateNull && arguments.nonEmpty) {
-      s"""
-       boolean $isNull = false;
-       for (int idx = 0; idx < ${arguments.length}; idx++) {
-         if ($argIsNulls[idx]) { $isNull = true; break; }
-       }
-     """
-    } else {
-      isNull = "false"
-      ""
-    }
+    ev.isNull = resultIsNull
 
     val constructorCall = outer.map { gen =>
-      s"""${gen.value}.new ${cls.getSimpleName}(${argValues.mkString(", ")})"""
+      s"${gen.value}.new ${cls.getSimpleName}($argString)"
     }.getOrElse {
-      s"new $className(${argValues.mkString(", ")})"
+      s"new $className($argString)"
     }
 
     val code = s"""
       $argCode
       ${outer.map(_.code).getOrElse("")}
-      $setIsNull
-      final $javaType ${ev.value} = $isNull ? ${ctx.defaultValue(javaType)} : $constructorCall;
-     """
-    ev.copy(code = code, isNull = isNull)
+      final $javaType ${ev.value} = ${ev.isNull} ? ${ctx.defaultValue(javaType)} : $constructorCall;
+    """
+    ev.copy(code = code)
   }
 
   override def toString: String = s"newInstance($cls)"

From 31002e4a77ca56492f41bf35e7c8f263d767d3aa Mon Sep 17 00:00:00 2001
From: sethah <seth.hendrickson16@gmail.com>
Date: Mon, 21 Nov 2016 05:36:49 -0800
Subject: [PATCH 0162/1204] [SPARK-18282][ML][PYSPARK] Add python clustering
 summaries for GMM and BKM

## What changes were proposed in this pull request?

Add model summary APIs for `GaussianMixtureModel` and `BisectingKMeansModel` in pyspark.

## How was this patch tested?

Unit tests.

Author: sethah <seth.hendrickson16@gmail.com>

Closes #15777 from sethah/pyspark_cluster_summaries.

(cherry picked from commit e811fbf9ed131bccbc46f3c5701c4ff317222fd9)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 .../classification/LogisticRegression.scala   |  11 +-
 .../spark/ml/clustering/BisectingKMeans.scala |   9 +-
 .../spark/ml/clustering/GaussianMixture.scala |   9 +-
 .../apache/spark/ml/clustering/KMeans.scala   |   9 +-
 .../GeneralizedLinearRegression.scala         |  11 +-
 .../ml/regression/LinearRegression.scala      |  14 +-
 .../LogisticRegressionSuite.scala             |   2 +
 .../ml/clustering/BisectingKMeansSuite.scala  |   3 +
 .../ml/clustering/GaussianMixtureSuite.scala  |   3 +
 .../spark/ml/clustering/KMeansSuite.scala     |   3 +
 .../GeneralizedLinearRegressionSuite.scala    |   2 +
 .../ml/regression/LinearRegressionSuite.scala |   2 +
 python/pyspark/ml/classification.py           |  15 +-
 python/pyspark/ml/clustering.py               | 162 +++++++++++++++++-
 python/pyspark/ml/regression.py               |  16 +-
 python/pyspark/ml/tests.py                    |  32 ++++
 16 files changed, 256 insertions(+), 47 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index f58efd36a1c66..d07b4adebb08f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -648,7 +648,7 @@ class LogisticRegression @Since("1.2.0") (
         $(labelCol),
         $(featuresCol),
         objectiveHistory)
-      model.setSummary(logRegSummary)
+      model.setSummary(Some(logRegSummary))
     } else {
       model
     }
@@ -790,9 +790,9 @@ class LogisticRegressionModel private[spark] (
     }
   }
 
-  private[classification] def setSummary(
-      summary: LogisticRegressionTrainingSummary): this.type = {
-    this.trainingSummary = Some(summary)
+  private[classification]
+  def setSummary(summary: Option[LogisticRegressionTrainingSummary]): this.type = {
+    this.trainingSummary = summary
     this
   }
 
@@ -887,8 +887,7 @@ class LogisticRegressionModel private[spark] (
   override def copy(extra: ParamMap): LogisticRegressionModel = {
     val newModel = copyValues(new LogisticRegressionModel(uid, coefficientMatrix, interceptVector,
       numClasses, isMultinomial), extra)
-    if (trainingSummary.isDefined) newModel.setSummary(trainingSummary.get)
-    newModel.setParent(parent)
+    newModel.setSummary(trainingSummary).setParent(parent)
   }
 
   override protected def raw2prediction(rawPrediction: Vector): Double = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
index f8a606d60b2aa..e6ca3aedffd9d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
@@ -95,8 +95,7 @@ class BisectingKMeansModel private[ml] (
   @Since("2.0.0")
   override def copy(extra: ParamMap): BisectingKMeansModel = {
     val copied = copyValues(new BisectingKMeansModel(uid, parentModel), extra)
-    if (trainingSummary.isDefined) copied.setSummary(trainingSummary.get)
-    copied.setParent(this.parent)
+    copied.setSummary(trainingSummary).setParent(this.parent)
   }
 
   @Since("2.0.0")
@@ -132,8 +131,8 @@ class BisectingKMeansModel private[ml] (
 
   private var trainingSummary: Option[BisectingKMeansSummary] = None
 
-  private[clustering] def setSummary(summary: BisectingKMeansSummary): this.type = {
-    this.trainingSummary = Some(summary)
+  private[clustering] def setSummary(summary: Option[BisectingKMeansSummary]): this.type = {
+    this.trainingSummary = summary
     this
   }
 
@@ -265,7 +264,7 @@ class BisectingKMeans @Since("2.0.0") (
     val model = copyValues(new BisectingKMeansModel(uid, parentModel).setParent(this))
     val summary = new BisectingKMeansSummary(
       model.transform(dataset), $(predictionCol), $(featuresCol), $(k))
-    model.setSummary(summary)
+    model.setSummary(Some(summary))
     instr.logSuccess(model)
     model
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
index c6035cc4c9647..92d0b7d085f12 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
@@ -90,8 +90,7 @@ class GaussianMixtureModel private[ml] (
   @Since("2.0.0")
   override def copy(extra: ParamMap): GaussianMixtureModel = {
     val copied = copyValues(new GaussianMixtureModel(uid, weights, gaussians), extra)
-    if (trainingSummary.isDefined) copied.setSummary(trainingSummary.get)
-    copied.setParent(this.parent)
+    copied.setSummary(trainingSummary).setParent(this.parent)
   }
 
   @Since("2.0.0")
@@ -150,8 +149,8 @@ class GaussianMixtureModel private[ml] (
 
   private var trainingSummary: Option[GaussianMixtureSummary] = None
 
-  private[clustering] def setSummary(summary: GaussianMixtureSummary): this.type = {
-    this.trainingSummary = Some(summary)
+  private[clustering] def setSummary(summary: Option[GaussianMixtureSummary]): this.type = {
+    this.trainingSummary = summary
     this
   }
 
@@ -340,7 +339,7 @@ class GaussianMixture @Since("2.0.0") (
       .setParent(this)
     val summary = new GaussianMixtureSummary(model.transform(dataset),
       $(predictionCol), $(probabilityCol), $(featuresCol), $(k))
-    model.setSummary(summary)
+    model.setSummary(Some(summary))
     instr.logNumFeatures(model.gaussians.head.mean.size)
     instr.logSuccess(model)
     model
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
index 26505b4cc1501..152bd13b7a17a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
@@ -110,8 +110,7 @@ class KMeansModel private[ml] (
   @Since("1.5.0")
   override def copy(extra: ParamMap): KMeansModel = {
     val copied = copyValues(new KMeansModel(uid, parentModel), extra)
-    if (trainingSummary.isDefined) copied.setSummary(trainingSummary.get)
-    copied.setParent(this.parent)
+    copied.setSummary(trainingSummary).setParent(this.parent)
   }
 
   /** @group setParam */
@@ -165,8 +164,8 @@ class KMeansModel private[ml] (
 
   private var trainingSummary: Option[KMeansSummary] = None
 
-  private[clustering] def setSummary(summary: KMeansSummary): this.type = {
-    this.trainingSummary = Some(summary)
+  private[clustering] def setSummary(summary: Option[KMeansSummary]): this.type = {
+    this.trainingSummary = summary
     this
   }
 
@@ -325,7 +324,7 @@ class KMeans @Since("1.5.0") (
     val model = copyValues(new KMeansModel(uid, parentModel).setParent(this))
     val summary = new KMeansSummary(
       model.transform(dataset), $(predictionCol), $(featuresCol), $(k))
-    model.setSummary(summary)
+    model.setSummary(Some(summary))
     instr.logSuccess(model)
     model
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index 736fd3b9e0f64..3f9de1fe74c9c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -270,7 +270,7 @@ class GeneralizedLinearRegression @Since("2.0.0") (@Since("2.0.0") override val
           .setParent(this))
       val trainingSummary = new GeneralizedLinearRegressionTrainingSummary(dataset, model,
         wlsModel.diagInvAtWA.toArray, 1, getSolver)
-      return model.setSummary(trainingSummary)
+      return model.setSummary(Some(trainingSummary))
     }
 
     // Fit Generalized Linear Model by iteratively reweighted least squares (IRLS).
@@ -284,7 +284,7 @@ class GeneralizedLinearRegression @Since("2.0.0") (@Since("2.0.0") override val
         .setParent(this))
     val trainingSummary = new GeneralizedLinearRegressionTrainingSummary(dataset, model,
       irlsModel.diagInvAtWA.toArray, irlsModel.numIterations, getSolver)
-    model.setSummary(trainingSummary)
+    model.setSummary(Some(trainingSummary))
   }
 
   @Since("2.0.0")
@@ -761,8 +761,8 @@ class GeneralizedLinearRegressionModel private[ml] (
   def hasSummary: Boolean = trainingSummary.nonEmpty
 
   private[regression]
-  def setSummary(summary: GeneralizedLinearRegressionTrainingSummary): this.type = {
-    this.trainingSummary = Some(summary)
+  def setSummary(summary: Option[GeneralizedLinearRegressionTrainingSummary]): this.type = {
+    this.trainingSummary = summary
     this
   }
 
@@ -778,8 +778,7 @@ class GeneralizedLinearRegressionModel private[ml] (
   override def copy(extra: ParamMap): GeneralizedLinearRegressionModel = {
     val copied = copyValues(new GeneralizedLinearRegressionModel(uid, coefficients, intercept),
       extra)
-    if (trainingSummary.isDefined) copied.setSummary(trainingSummary.get)
-    copied.setParent(parent)
+    copied.setSummary(trainingSummary).setParent(parent)
   }
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index da7ce6b46f2ab..8ea5e1e6c453a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -225,7 +225,7 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
         model.diagInvAtWA.toArray,
         model.objectiveHistory)
 
-      return lrModel.setSummary(trainingSummary)
+      return lrModel.setSummary(Some(trainingSummary))
     }
 
     val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE
@@ -278,7 +278,7 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
           model,
           Array(0D),
           Array(0D))
-        return model.setSummary(trainingSummary)
+        return model.setSummary(Some(trainingSummary))
       } else {
         require($(regParam) == 0.0, "The standard deviation of the label is zero. " +
           "Model cannot be regularized.")
@@ -400,7 +400,7 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
       model,
       Array(0D),
       objectiveHistory)
-    model.setSummary(trainingSummary)
+    model.setSummary(Some(trainingSummary))
   }
 
   @Since("1.4.0")
@@ -446,8 +446,9 @@ class LinearRegressionModel private[ml] (
     throw new SparkException("No training summary available for this LinearRegressionModel")
   }
 
-  private[regression] def setSummary(summary: LinearRegressionTrainingSummary): this.type = {
-    this.trainingSummary = Some(summary)
+  private[regression]
+  def setSummary(summary: Option[LinearRegressionTrainingSummary]): this.type = {
+    this.trainingSummary = summary
     this
   }
 
@@ -490,8 +491,7 @@ class LinearRegressionModel private[ml] (
   @Since("1.4.0")
   override def copy(extra: ParamMap): LinearRegressionModel = {
     val newModel = copyValues(new LinearRegressionModel(uid, coefficients, intercept), extra)
-    if (trainingSummary.isDefined) newModel.setSummary(trainingSummary.get)
-    newModel.setParent(parent)
+    newModel.setSummary(trainingSummary).setParent(parent)
   }
 
   /**
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 2877285eb4d59..e360542eae2ab 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -147,6 +147,8 @@ class LogisticRegressionSuite
     assert(model.hasSummary)
     val copiedModel = model.copy(ParamMap.empty)
     assert(copiedModel.hasSummary)
+    model.setSummary(None)
+    assert(!model.hasSummary)
   }
 
   test("empty probabilityCol") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
index 49797d938d751..fc491cd6161fd 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
@@ -109,6 +109,9 @@ class BisectingKMeansSuite
     assert(clusterSizes.length === k)
     assert(clusterSizes.sum === numRows)
     assert(clusterSizes.forall(_ >= 0))
+
+    model.setSummary(None)
+    assert(!model.hasSummary)
   }
 
   test("read/write") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala
index 7165b63ed3b96..07299123f8a47 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala
@@ -111,6 +111,9 @@ class GaussianMixtureSuite extends SparkFunSuite with MLlibTestSparkContext
     assert(clusterSizes.length === k)
     assert(clusterSizes.sum === numRows)
     assert(clusterSizes.forall(_ >= 0))
+
+    model.setSummary(None)
+    assert(!model.hasSummary)
   }
 
   test("read/write") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
index 73972557d2631..c1b7242e11a8f 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
@@ -123,6 +123,9 @@ class KMeansSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultR
     assert(clusterSizes.length === k)
     assert(clusterSizes.sum === numRows)
     assert(clusterSizes.forall(_ >= 0))
+
+    model.setSummary(None)
+    assert(!model.hasSummary)
   }
 
   test("KMeansModel transform with non-default feature and prediction cols") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
index 6a4ac1735b2cb..9b0fa67630d2e 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
@@ -197,6 +197,8 @@ class GeneralizedLinearRegressionSuite
     assert(model.hasSummary)
     val copiedModel = model.copy(ParamMap.empty)
     assert(copiedModel.hasSummary)
+    model.setSummary(None)
+    assert(!model.hasSummary)
 
     assert(model.getFeaturesCol === "features")
     assert(model.getPredictionCol === "prediction")
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
index df97d0b2ae7ad..0be82742a33be 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
@@ -146,6 +146,8 @@ class LinearRegressionSuite
     assert(model.hasSummary)
     val copiedModel = model.copy(ParamMap.empty)
     assert(copiedModel.hasSummary)
+    model.setSummary(None)
+    assert(!model.hasSummary)
 
     model.transform(datasetWithDenseFeature)
       .select("label", "prediction")
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 56c8c62259e79..83e1e89347660 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -309,13 +309,16 @@ def interceptVector(self):
     @since("2.0.0")
     def summary(self):
         """
-        Gets summary (e.g. residuals, mse, r-squared ) of model on
-        training set. An exception is thrown if
-        `trainingSummary is None`.
+        Gets summary (e.g. accuracy/precision/recall, objective history, total iterations) of model
+        trained on the training set. An exception is thrown if `trainingSummary is None`.
         """
-        java_blrt_summary = self._call_java("summary")
-        # Note: Once multiclass is added, update this to return correct summary
-        return BinaryLogisticRegressionTrainingSummary(java_blrt_summary)
+        if self.hasSummary:
+            java_blrt_summary = self._call_java("summary")
+            # Note: Once multiclass is added, update this to return correct summary
+            return BinaryLogisticRegressionTrainingSummary(java_blrt_summary)
+        else:
+            raise RuntimeError("No training summary available for this %s" %
+                               self.__class__.__name__)
 
     @property
     @since("2.0.0")
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index 7632f05c3b68c..e58ec1e7ac296 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -17,16 +17,74 @@
 
 from pyspark import since, keyword_only
 from pyspark.ml.util import *
-from pyspark.ml.wrapper import JavaEstimator, JavaModel
+from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaWrapper
 from pyspark.ml.param.shared import *
 from pyspark.ml.common import inherit_doc
 
-__all__ = ['BisectingKMeans', 'BisectingKMeansModel',
+__all__ = ['BisectingKMeans', 'BisectingKMeansModel', 'BisectingKMeansSummary',
            'KMeans', 'KMeansModel',
-           'GaussianMixture', 'GaussianMixtureModel',
+           'GaussianMixture', 'GaussianMixtureModel', 'GaussianMixtureSummary',
            'LDA', 'LDAModel', 'LocalLDAModel', 'DistributedLDAModel']
 
 
+class ClusteringSummary(JavaWrapper):
+    """
+    .. note:: Experimental
+
+    Clustering results for a given model.
+
+    .. versionadded:: 2.1.0
+    """
+
+    @property
+    @since("2.1.0")
+    def predictionCol(self):
+        """
+        Name for column of predicted clusters in `predictions`.
+        """
+        return self._call_java("predictionCol")
+
+    @property
+    @since("2.1.0")
+    def predictions(self):
+        """
+        DataFrame produced by the model's `transform` method.
+        """
+        return self._call_java("predictions")
+
+    @property
+    @since("2.1.0")
+    def featuresCol(self):
+        """
+        Name for column of features in `predictions`.
+        """
+        return self._call_java("featuresCol")
+
+    @property
+    @since("2.1.0")
+    def k(self):
+        """
+        The number of clusters the model was trained with.
+        """
+        return self._call_java("k")
+
+    @property
+    @since("2.1.0")
+    def cluster(self):
+        """
+        DataFrame of predicted cluster centers for each training data point.
+        """
+        return self._call_java("cluster")
+
+    @property
+    @since("2.1.0")
+    def clusterSizes(self):
+        """
+        Size of (number of data points in) each cluster.
+        """
+        return self._call_java("clusterSizes")
+
+
 class GaussianMixtureModel(JavaModel, JavaMLWritable, JavaMLReadable):
     """
     .. note:: Experimental
@@ -56,6 +114,28 @@ def gaussiansDF(self):
         """
         return self._call_java("gaussiansDF")
 
+    @property
+    @since("2.1.0")
+    def hasSummary(self):
+        """
+        Indicates whether a training summary exists for this model
+        instance.
+        """
+        return self._call_java("hasSummary")
+
+    @property
+    @since("2.1.0")
+    def summary(self):
+        """
+        Gets summary (e.g. cluster assignments, cluster sizes) of the model trained on the
+        training set. An exception is thrown if no summary exists.
+        """
+        if self.hasSummary:
+            return GaussianMixtureSummary(self._call_java("summary"))
+        else:
+            raise RuntimeError("No training summary available for this %s" %
+                               self.__class__.__name__)
+
 
 @inherit_doc
 class GaussianMixture(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIter, HasTol, HasSeed,
@@ -92,6 +172,13 @@ class GaussianMixture(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIte
     >>> gm = GaussianMixture(k=3, tol=0.0001,
     ...                      maxIter=10, seed=10)
     >>> model = gm.fit(df)
+    >>> model.hasSummary
+    True
+    >>> summary = model.summary
+    >>> summary.k
+    3
+    >>> summary.clusterSizes
+    [2, 2, 2]
     >>> weights = model.weights
     >>> len(weights)
     3
@@ -118,6 +205,8 @@ class GaussianMixture(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIte
     >>> model_path = temp_path + "/gmm_model"
     >>> model.save(model_path)
     >>> model2 = GaussianMixtureModel.load(model_path)
+    >>> model2.hasSummary
+    False
     >>> model2.weights == model.weights
     True
     >>> model2.gaussiansDF.show()
@@ -181,6 +270,32 @@ def getK(self):
         return self.getOrDefault(self.k)
 
 
+class GaussianMixtureSummary(ClusteringSummary):
+    """
+    .. note:: Experimental
+
+    Gaussian mixture clustering results for a given model.
+
+    .. versionadded:: 2.1.0
+    """
+
+    @property
+    @since("2.1.0")
+    def probabilityCol(self):
+        """
+        Name for column of predicted probability of each cluster in `predictions`.
+        """
+        return self._call_java("probabilityCol")
+
+    @property
+    @since("2.1.0")
+    def probability(self):
+        """
+        DataFrame of probabilities of each cluster for each training data point.
+        """
+        return self._call_java("probability")
+
+
 class KMeansModel(JavaModel, JavaMLWritable, JavaMLReadable):
     """
     Model fitted by KMeans.
@@ -346,6 +461,27 @@ def computeCost(self, dataset):
         """
         return self._call_java("computeCost", dataset)
 
+    @property
+    @since("2.1.0")
+    def hasSummary(self):
+        """
+        Indicates whether a training summary exists for this model instance.
+        """
+        return self._call_java("hasSummary")
+
+    @property
+    @since("2.1.0")
+    def summary(self):
+        """
+        Gets summary (e.g. cluster assignments, cluster sizes) of the model trained on the
+        training set. An exception is thrown if no summary exists.
+        """
+        if self.hasSummary:
+            return BisectingKMeansSummary(self._call_java("summary"))
+        else:
+            raise RuntimeError("No training summary available for this %s" %
+                               self.__class__.__name__)
+
 
 @inherit_doc
 class BisectingKMeans(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIter, HasSeed,
@@ -373,6 +509,13 @@ class BisectingKMeans(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIte
     2
     >>> model.computeCost(df)
     2.000...
+    >>> model.hasSummary
+    True
+    >>> summary = model.summary
+    >>> summary.k
+    2
+    >>> summary.clusterSizes
+    [2, 2]
     >>> transformed = model.transform(df).select("features", "prediction")
     >>> rows = transformed.collect()
     >>> rows[0].prediction == rows[1].prediction
@@ -387,6 +530,8 @@ class BisectingKMeans(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIte
     >>> model_path = temp_path + "/bkm_model"
     >>> model.save(model_path)
     >>> model2 = BisectingKMeansModel.load(model_path)
+    >>> model2.hasSummary
+    False
     >>> model.clusterCenters()[0] == model2.clusterCenters()[0]
     array([ True,  True], dtype=bool)
     >>> model.clusterCenters()[1] == model2.clusterCenters()[1]
@@ -460,6 +605,17 @@ def _create_model(self, java_model):
         return BisectingKMeansModel(java_model)
 
 
+class BisectingKMeansSummary(ClusteringSummary):
+    """
+    .. note:: Experimental
+
+    Bisecting KMeans clustering results for a given model.
+
+    .. versionadded:: 2.1.0
+    """
+    pass
+
+
 @inherit_doc
 class LDAModel(JavaModel):
     """
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 0bc319ca4d601..385391ba53fd4 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -160,8 +160,12 @@ def summary(self):
         training set. An exception is thrown if
         `trainingSummary is None`.
         """
-        java_lrt_summary = self._call_java("summary")
-        return LinearRegressionTrainingSummary(java_lrt_summary)
+        if self.hasSummary:
+            java_lrt_summary = self._call_java("summary")
+            return LinearRegressionTrainingSummary(java_lrt_summary)
+        else:
+            raise RuntimeError("No training summary available for this %s" %
+                               self.__class__.__name__)
 
     @property
     @since("2.0.0")
@@ -1459,8 +1463,12 @@ def summary(self):
         training set. An exception is thrown if
         `trainingSummary is None`.
         """
-        java_glrt_summary = self._call_java("summary")
-        return GeneralizedLinearRegressionTrainingSummary(java_glrt_summary)
+        if self.hasSummary:
+            java_glrt_summary = self._call_java("summary")
+            return GeneralizedLinearRegressionTrainingSummary(java_glrt_summary)
+        else:
+            raise RuntimeError("No training summary available for this %s" %
+                               self.__class__.__name__)
 
     @property
     @since("2.0.0")
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 9d46cc3b4ae64..c0f0d4073564e 100755
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -1097,6 +1097,38 @@ def test_logistic_regression_summary(self):
         sameSummary = model.evaluate(df)
         self.assertAlmostEqual(sameSummary.areaUnderROC, s.areaUnderROC)
 
+    def test_gaussian_mixture_summary(self):
+        data = [(Vectors.dense(1.0),), (Vectors.dense(5.0),), (Vectors.dense(10.0),),
+                (Vectors.sparse(1, [], []),)]
+        df = self.spark.createDataFrame(data, ["features"])
+        gmm = GaussianMixture(k=2)
+        model = gmm.fit(df)
+        self.assertTrue(model.hasSummary)
+        s = model.summary
+        self.assertTrue(isinstance(s.predictions, DataFrame))
+        self.assertEqual(s.probabilityCol, "probability")
+        self.assertTrue(isinstance(s.probability, DataFrame))
+        self.assertEqual(s.featuresCol, "features")
+        self.assertEqual(s.predictionCol, "prediction")
+        self.assertTrue(isinstance(s.cluster, DataFrame))
+        self.assertEqual(len(s.clusterSizes), 2)
+        self.assertEqual(s.k, 2)
+
+    def test_bisecting_kmeans_summary(self):
+        data = [(Vectors.dense(1.0),), (Vectors.dense(5.0),), (Vectors.dense(10.0),),
+                (Vectors.sparse(1, [], []),)]
+        df = self.spark.createDataFrame(data, ["features"])
+        bkm = BisectingKMeans(k=2)
+        model = bkm.fit(df)
+        self.assertTrue(model.hasSummary)
+        s = model.summary
+        self.assertTrue(isinstance(s.predictions, DataFrame))
+        self.assertEqual(s.featuresCol, "features")
+        self.assertEqual(s.predictionCol, "prediction")
+        self.assertTrue(isinstance(s.cluster, DataFrame))
+        self.assertEqual(len(s.clusterSizes), 2)
+        self.assertEqual(s.k, 2)
+
 
 class OneVsRestTests(SparkSessionTestCase):
 

From 251a9927646f367ca2cf75a87e80ce1c061a8f27 Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@happy-camper.st>
Date: Mon, 21 Nov 2016 05:50:35 -0800
Subject: [PATCH 0163/1204] [SPARK-18398][SQL] Fix nullabilities of MapObjects
 and ExternalMapToCatalyst.

## What changes were proposed in this pull request?

The nullabilities of `MapObject` can be made more strict by relying on `inputObject.nullable` and `lambdaFunction.nullable`.

Also `ExternalMapToCatalyst.dataType` can be made more strict by relying on `valueConverter.nullable`.

## How was this patch tested?

Existing tests.

Author: Takuya UESHIN <ueshin@happy-camper.st>

Closes #15840 from ueshin/issues/SPARK-18398.

(cherry picked from commit 9f262ae163b6dca6526665b3ad12b3b2ea8fb873)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../spark/sql/catalyst/expressions/objects/objects.scala  | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 0b36091ece1bf..5c27179ec3b46 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -461,14 +461,15 @@ case class MapObjects private(
     lambdaFunction: Expression,
     inputData: Expression) extends Expression with NonSQLExpression {
 
-  override def nullable: Boolean = true
+  override def nullable: Boolean = inputData.nullable
 
   override def children: Seq[Expression] = lambdaFunction :: inputData :: Nil
 
   override def eval(input: InternalRow): Any =
     throw new UnsupportedOperationException("Only code-generated evaluation is supported")
 
-  override def dataType: DataType = ArrayType(lambdaFunction.dataType)
+  override def dataType: DataType =
+    ArrayType(lambdaFunction.dataType, containsNull = lambdaFunction.nullable)
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val elementJavaType = ctx.javaType(loopVarDataType)
@@ -642,7 +643,8 @@ case class ExternalMapToCatalyst private(
 
   override def foldable: Boolean = false
 
-  override def dataType: MapType = MapType(keyConverter.dataType, valueConverter.dataType)
+  override def dataType: MapType = MapType(
+    keyConverter.dataType, valueConverter.dataType, valueContainsNull = valueConverter.nullable)
 
   override def eval(input: InternalRow): Any =
     throw new UnsupportedOperationException("Only code-generated evaluation is supported")

From b0a73c9be3b691f95d2f6ace3d6304db7f69705f Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 21 Nov 2016 16:14:59 -0500
Subject: [PATCH 0164/1204] [SPARK-18517][SQL] DROP TABLE IF EXISTS should not
 warn for non-existing tables

## What changes were proposed in this pull request?

Currently, `DROP TABLE IF EXISTS` shows warning for non-existing tables. However, it had better be quiet for this case by definition of the command.

**BEFORE**
```scala
scala> sql("DROP TABLE IF EXISTS nonexist")
16/11/20 20:48:26 WARN DropTableCommand: org.apache.spark.sql.catalyst.analysis.NoSuchTableException: Table or view 'nonexist' not found in database 'default';
```

**AFTER**
```scala
scala> sql("DROP TABLE IF EXISTS nonexist")
res0: org.apache.spark.sql.DataFrame = []
```

## How was this patch tested?

Manual because this is related to the warning messages instead of exceptions.

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #15953 from dongjoon-hyun/SPARK-18517.

(cherry picked from commit ddd02f50bb7458410d65427321efc75da5e65224)
Signed-off-by: Andrew Or <andrewor14@gmail.com>
---
 .../scala/org/apache/spark/sql/execution/command/ddl.scala     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 570a9967871e9..0f126d0200eff 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -28,7 +28,7 @@ import org.apache.hadoop.mapred.{FileInputFormat, JobConf}
 
 import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.Resolver
+import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, Resolver}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
@@ -202,6 +202,7 @@ case class DropTableCommand(
       sparkSession.sharedState.cacheManager.uncacheQuery(
         sparkSession.table(tableName.quotedString))
     } catch {
+      case _: NoSuchTableException if ifExists =>
       case NonFatal(e) => log.warn(e.toString, e)
     }
     catalog.refreshTable(tableName)

From 406f33987ac078fb20d2f5e81b7e1f646ea53fed Mon Sep 17 00:00:00 2001
From: Gabriel Huang <gabi.xiaohuang@gmail.com>
Date: Mon, 21 Nov 2016 16:08:34 -0500
Subject: [PATCH 0165/1204] [SPARK-18361][PYSPARK] Expose RDD localCheckpoint
 in PySpark

## What changes were proposed in this pull request?

Expose RDD's localCheckpoint() and associated functions in PySpark.

## How was this patch tested?

I added a UnitTest in python/pyspark/tests.py which passes.

I certify that this is my original work, and I license it to the project under the project's open source license.

Gabriel HUANG
Developer at Cardabel (http://cardabel.com/)

Author: Gabriel Huang <gabi.xiaohuang@gmail.com>

Closes #15811 from gabrielhuang/pyspark-localcheckpoint.
---
 python/pyspark/rdd.py   | 33 ++++++++++++++++++++++++++++++++-
 python/pyspark/tests.py | 17 +++++++++++++++++
 2 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 641787ee20e0c..f21a364df9100 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -263,13 +263,44 @@ def checkpoint(self):
 
     def isCheckpointed(self):
         """
-        Return whether this RDD has been checkpointed or not
+        Return whether this RDD is checkpointed and materialized, either reliably or locally.
         """
         return self._jrdd.rdd().isCheckpointed()
 
+    def localCheckpoint(self):
+        """
+        Mark this RDD for local checkpointing using Spark's existing caching layer.
+
+        This method is for users who wish to truncate RDD lineages while skipping the expensive
+        step of replicating the materialized data in a reliable distributed file system. This is
+        useful for RDDs with long lineages that need to be truncated periodically (e.g. GraphX).
+
+        Local checkpointing sacrifices fault-tolerance for performance. In particular, checkpointed
+        data is written to ephemeral local storage in the executors instead of to a reliable,
+        fault-tolerant storage. The effect is that if an executor fails during the computation,
+        the checkpointed data may no longer be accessible, causing an irrecoverable job failure.
+
+        This is NOT safe to use with dynamic allocation, which removes executors along
+        with their cached blocks. If you must use both features, you are advised to set
+        L{spark.dynamicAllocation.cachedExecutorIdleTimeout} to a high value.
+
+        The checkpoint directory set through L{SparkContext.setCheckpointDir()} is not used.
+        """
+        self._jrdd.rdd().localCheckpoint()
+
+    def isLocallyCheckpointed(self):
+        """
+        Return whether this RDD is marked for local checkpointing.
+
+        Exposed for testing.
+        """
+        return self._jrdd.rdd().isLocallyCheckpointed()
+
     def getCheckpointFile(self):
         """
         Gets the name of the file to which this RDD was checkpointed
+
+        Not defined if RDD is checkpointed locally.
         """
         checkpointFile = self._jrdd.rdd().getCheckpointFile()
         if checkpointFile.isDefined():
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index 3e0bd16d85ca4..ab4bef8329cd0 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -390,6 +390,23 @@ def test_checkpoint_and_restore(self):
         self.assertEqual([1, 2, 3, 4], recovered.collect())
 
 
+class LocalCheckpointTests(ReusedPySparkTestCase):
+
+    def test_basic_localcheckpointing(self):
+        parCollection = self.sc.parallelize([1, 2, 3, 4])
+        flatMappedRDD = parCollection.flatMap(lambda x: range(1, x + 1))
+
+        self.assertFalse(flatMappedRDD.isCheckpointed())
+        self.assertFalse(flatMappedRDD.isLocallyCheckpointed())
+
+        flatMappedRDD.localCheckpoint()
+        result = flatMappedRDD.collect()
+        time.sleep(1)  # 1 second
+        self.assertTrue(flatMappedRDD.isCheckpointed())
+        self.assertTrue(flatMappedRDD.isLocallyCheckpointed())
+        self.assertEqual(flatMappedRDD.collect(), result)
+
+
 class AddFileTests(PySparkTestCase):
 
     def test_add_py_file(self):

From 2afc18be23150d283361d374caf8cbfd3da63c9c Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Mon, 21 Nov 2016 13:23:32 -0800
Subject: [PATCH 0166/1204] [SPARK-17765][SQL] Support for writing out
 user-defined type in ORC datasource

## What changes were proposed in this pull request?

This PR adds the support for `UserDefinedType` when writing out instead of throwing `ClassCastException` in ORC data source.

In more details, `OrcStruct` is being created based on string from`DataType.catalogString`. For user-defined type, it seems it returns `sqlType.simpleString` for `catalogString` by default[1]. However, during type-dispatching to match the output with the schema, it tries to cast to, for example, `StructType`[2].

So, running the codes below (`MyDenseVector` was borrowed[3]) :

``` scala
val data = Seq((1, new UDT.MyDenseVector(Array(0.25, 2.25, 4.25))))
val udtDF = data.toDF("id", "vectors")
udtDF.write.orc("/tmp/test.orc")
```

ends up throwing an exception as below:

```
java.lang.ClassCastException: org.apache.spark.sql.UDT$MyDenseVectorUDT cannot be cast to org.apache.spark.sql.types.ArrayType
    at org.apache.spark.sql.hive.HiveInspectors$class.wrapperFor(HiveInspectors.scala:381)
    at org.apache.spark.sql.hive.orc.OrcSerializer.wrapperFor(OrcFileFormat.scala:164)
...
```

So, this PR uses `UserDefinedType.sqlType` during finding the correct converter when writing out in ORC data source.

[1]https://github.com/apache/spark/blob/dfdcab00c7b6200c22883baa3ebc5818be09556f/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala#L95
[2]https://github.com/apache/spark/blob/d2dc8c4a162834818190ffd82894522c524ca3e5/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala#L326
[3]https://github.com/apache/spark/blob/2bfed1a0c5be7d0718fd574a4dad90f4f6b44be7/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala#L38-L70
## How was this patch tested?

Unit tests in `OrcQuerySuite`.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #15361 from HyukjinKwon/SPARK-17765.

(cherry picked from commit a2d464770cd183daa7d727bf377bde9c21e29e6a)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../org/apache/spark/sql/hive/HiveInspectors.scala     |  3 +++
 .../org/apache/spark/sql/hive/orc/OrcQuerySuite.scala  | 10 ++++++++++
 2 files changed, 13 insertions(+)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
index e303065127c3b..52aa1088acd4a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
@@ -246,6 +246,9 @@ private[hive] trait HiveInspectors {
    * Wraps with Hive types based on object inspector.
    */
   protected def wrapperFor(oi: ObjectInspector, dataType: DataType): Any => Any = oi match {
+    case _ if dataType.isInstanceOf[UserDefinedType[_]] =>
+      val sqlType = dataType.asInstanceOf[UserDefinedType[_]].sqlType
+      wrapperFor(oi, sqlType)
     case x: ConstantObjectInspector =>
       (o: Any) =>
         x.getWritableConstantValue
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
index a628977af2f4e..b8761e9de2886 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
@@ -93,6 +93,16 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
     }
   }
 
+  test("Read/write UserDefinedType") {
+    withTempPath { path =>
+      val data = Seq((1, new UDT.MyDenseVector(Array(0.25, 2.25, 4.25))))
+      val udtDF = data.toDF("id", "vectors")
+      udtDF.write.orc(path.getAbsolutePath)
+      val readBack = spark.read.schema(udtDF.schema).orc(path.getAbsolutePath)
+      checkAnswer(udtDF, readBack)
+    }
+  }
+
   test("Creating case class RDD table") {
     val data = (1 to 100).map(i => (i, s"val_$i"))
     sparkContext.parallelize(data).toDF().createOrReplaceTempView("t")

From 6dbe44891458b497c1ad4df8d8358e326fb3f795 Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Mon, 21 Nov 2016 17:24:02 -0800
Subject: [PATCH 0167/1204] [SPARK-18493] Add missing python APIs:
 withWatermark and checkpoint to dataframe

## What changes were proposed in this pull request?

This PR adds two of the newly added methods of `Dataset`s to Python:
`withWatermark` and `checkpoint`

## How was this patch tested?

Doc tests

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #15921 from brkyvz/py-watermark.

(cherry picked from commit 97a8239a625df455d2c439f3628a529d6d9413ca)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 python/pyspark/sql/dataframe.py               | 57 ++++++++++++++++++-
 .../scala/org/apache/spark/sql/Dataset.scala  | 10 +++-
 2 files changed, 62 insertions(+), 5 deletions(-)

diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 38998900837cf..6fe622643291e 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -322,6 +322,54 @@ def show(self, n=20, truncate=True):
     def __repr__(self):
         return "DataFrame[%s]" % (", ".join("%s: %s" % c for c in self.dtypes))
 
+    @since(2.1)
+    def checkpoint(self, eager=True):
+        """Returns a checkpointed version of this Dataset. Checkpointing can be used to truncate the
+        logical plan of this DataFrame, which is especially useful in iterative algorithms where the
+        plan may grow exponentially. It will be saved to files inside the checkpoint
+        directory set with L{SparkContext.setCheckpointDir()}.
+
+        :param eager: Whether to checkpoint this DataFrame immediately
+
+        .. note:: Experimental
+        """
+        jdf = self._jdf.checkpoint(eager)
+        return DataFrame(jdf, self.sql_ctx)
+
+    @since(2.1)
+    def withWatermark(self, eventTime, delayThreshold):
+        """Defines an event time watermark for this :class:`DataFrame`. A watermark tracks a point
+        in time before which we assume no more late data is going to arrive.
+
+        Spark will use this watermark for several purposes:
+          - To know when a given time window aggregation can be finalized and thus can be emitted
+            when using output modes that do not allow updates.
+
+          - To minimize the amount of state that we need to keep for on-going aggregations.
+
+        The current watermark is computed by looking at the `MAX(eventTime)` seen across
+        all of the partitions in the query minus a user specified `delayThreshold`.  Due to the cost
+        of coordinating this value across partitions, the actual watermark used is only guaranteed
+        to be at least `delayThreshold` behind the actual event time.  In some cases we may still
+        process records that arrive more than `delayThreshold` late.
+
+        :param eventTime: the name of the column that contains the event time of the row.
+        :param delayThreshold: the minimum delay to wait to data to arrive late, relative to the
+            latest record that has been processed in the form of an interval
+            (e.g. "1 minute" or "5 hours").
+
+        .. note:: Experimental
+
+        >>> sdf.select('name', sdf.time.cast('timestamp')).withWatermark('time', '10 minutes')
+        DataFrame[name: string, time: timestamp]
+        """
+        if not eventTime or type(eventTime) is not str:
+            raise TypeError("eventTime should be provided as a string")
+        if not delayThreshold or type(delayThreshold) is not str:
+            raise TypeError("delayThreshold should be provided as a string interval")
+        jdf = self._jdf.withWatermark(eventTime, delayThreshold)
+        return DataFrame(jdf, self.sql_ctx)
+
     @since(1.3)
     def count(self):
         """Returns the number of rows in this :class:`DataFrame`.
@@ -1626,6 +1674,7 @@ def _test():
     from pyspark.context import SparkContext
     from pyspark.sql import Row, SQLContext, SparkSession
     import pyspark.sql.dataframe
+    from pyspark.sql.functions import from_unixtime
     globs = pyspark.sql.dataframe.__dict__.copy()
     sc = SparkContext('local[4]', 'PythonTest')
     globs['sc'] = sc
@@ -1638,9 +1687,11 @@ def _test():
     globs['df3'] = sc.parallelize([Row(name='Alice', age=2),
                                    Row(name='Bob', age=5)]).toDF()
     globs['df4'] = sc.parallelize([Row(name='Alice', age=10, height=80),
-                                  Row(name='Bob', age=5, height=None),
-                                  Row(name='Tom', age=None, height=None),
-                                  Row(name=None, age=None, height=None)]).toDF()
+                                   Row(name='Bob', age=5, height=None),
+                                   Row(name='Tom', age=None, height=None),
+                                   Row(name=None, age=None, height=None)]).toDF()
+    globs['sdf'] = sc.parallelize([Row(name='Tom', time=1479441846),
+                                   Row(name='Bob', time=1479442946)]).toDF()
 
     (failure_count, test_count) = doctest.testmod(
         pyspark.sql.dataframe, globs=globs,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 3c75a6a45ec86..7ba6ffce278cf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -485,7 +485,10 @@ class Dataset[T] private[sql](
   def isStreaming: Boolean = logicalPlan.isStreaming
 
   /**
-   * Returns a checkpointed version of this Dataset.
+   * Eagerly checkpoint a Dataset and return the new Dataset. Checkpointing can be used to truncate
+   * the logical plan of this Dataset, which is especially useful in iterative algorithms where the
+   * plan may grow exponentially. It will be saved to files inside the checkpoint
+   * directory set with `SparkContext#setCheckpointDir`.
    *
    * @group basic
    * @since 2.1.0
@@ -495,7 +498,10 @@ class Dataset[T] private[sql](
   def checkpoint(): Dataset[T] = checkpoint(eager = true)
 
   /**
-   * Returns a checkpointed version of this Dataset.
+   * Returns a checkpointed version of this Dataset. Checkpointing can be used to truncate the
+   * logical plan of this Dataset, which is especially useful in iterative algorithms where the
+   * plan may grow exponentially. It will be saved to files inside the checkpoint
+   * directory set with `SparkContext#setCheckpointDir`.
    *
    * @group basic
    * @since 2.1.0

From aaa2a173a81868a92d61bcc9420961aaa7eaeb57 Mon Sep 17 00:00:00 2001
From: Liwei Lin <lwlin7@gmail.com>
Date: Mon, 21 Nov 2016 21:14:13 -0800
Subject: [PATCH 0168/1204] [SPARK-18425][STRUCTURED STREAMING][TESTS] Test
 `CompactibleFileStreamLog` directly

## What changes were proposed in this pull request?

Right now we are testing the most of `CompactibleFileStreamLog` in `FileStreamSinkLogSuite` (because `FileStreamSinkLog` once was the only subclass of `CompactibleFileStreamLog`, but now it's not the case any more).

Let's refactor the tests so that `CompactibleFileStreamLog` is directly tested, making future changes (like https://github.com/apache/spark/pull/15828, https://github.com/apache/spark/pull/15827) to `CompactibleFileStreamLog` much easier to test and much easier to review.

## How was this patch tested?

the PR itself is about tests

Author: Liwei Lin <lwlin7@gmail.com>

Closes #15870 from lw-lin/test-compact-1113.

(cherry picked from commit ebeb0830a3a4837c7354a0eee667b9f5fad389c5)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../CompactibleFileStreamLogSuite.scala       | 216 +++++++++++++++++-
 .../streaming/FileStreamSinkLogSuite.scala    |  68 ------
 2 files changed, 214 insertions(+), 70 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala
index 2cd2157b293cb..e511fda57912c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala
@@ -17,12 +17,79 @@
 
 package org.apache.spark.sql.execution.streaming
 
-import org.apache.spark.SparkFunSuite
+import java.io._
+import java.nio.charset.StandardCharsets._
 
-class CompactibleFileStreamLogSuite extends SparkFunSuite {
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.sql.execution.streaming.FakeFileSystem._
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.test.SharedSQLContext
+
+class CompactibleFileStreamLogSuite extends SparkFunSuite with SharedSQLContext {
+
+  /** To avoid caching of FS objects */
+  override protected val sparkConf =
+    new SparkConf().set(s"spark.hadoop.fs.$scheme.impl.disable.cache", "true")
 
   import CompactibleFileStreamLog._
 
+  /** -- testing of `object CompactibleFileStreamLog` begins -- */
+
+  test("getBatchIdFromFileName") {
+    assert(1234L === getBatchIdFromFileName("1234"))
+    assert(1234L === getBatchIdFromFileName("1234.compact"))
+    intercept[NumberFormatException] {
+      getBatchIdFromFileName("1234a")
+    }
+  }
+
+  test("isCompactionBatch") {
+    assert(false === isCompactionBatch(0, compactInterval = 3))
+    assert(false === isCompactionBatch(1, compactInterval = 3))
+    assert(true === isCompactionBatch(2, compactInterval = 3))
+    assert(false === isCompactionBatch(3, compactInterval = 3))
+    assert(false === isCompactionBatch(4, compactInterval = 3))
+    assert(true === isCompactionBatch(5, compactInterval = 3))
+  }
+
+  test("nextCompactionBatchId") {
+    assert(2 === nextCompactionBatchId(0, compactInterval = 3))
+    assert(2 === nextCompactionBatchId(1, compactInterval = 3))
+    assert(5 === nextCompactionBatchId(2, compactInterval = 3))
+    assert(5 === nextCompactionBatchId(3, compactInterval = 3))
+    assert(5 === nextCompactionBatchId(4, compactInterval = 3))
+    assert(8 === nextCompactionBatchId(5, compactInterval = 3))
+  }
+
+  test("getValidBatchesBeforeCompactionBatch") {
+    intercept[AssertionError] {
+      getValidBatchesBeforeCompactionBatch(0, compactInterval = 3)
+    }
+    intercept[AssertionError] {
+      getValidBatchesBeforeCompactionBatch(1, compactInterval = 3)
+    }
+    assert(Seq(0, 1) === getValidBatchesBeforeCompactionBatch(2, compactInterval = 3))
+    intercept[AssertionError] {
+      getValidBatchesBeforeCompactionBatch(3, compactInterval = 3)
+    }
+    intercept[AssertionError] {
+      getValidBatchesBeforeCompactionBatch(4, compactInterval = 3)
+    }
+    assert(Seq(2, 3, 4) === getValidBatchesBeforeCompactionBatch(5, compactInterval = 3))
+  }
+
+  test("getAllValidBatches") {
+    assert(Seq(0) === getAllValidBatches(0, compactInterval = 3))
+    assert(Seq(0, 1) === getAllValidBatches(1, compactInterval = 3))
+    assert(Seq(2) === getAllValidBatches(2, compactInterval = 3))
+    assert(Seq(2, 3) === getAllValidBatches(3, compactInterval = 3))
+    assert(Seq(2, 3, 4) === getAllValidBatches(4, compactInterval = 3))
+    assert(Seq(5) === getAllValidBatches(5, compactInterval = 3))
+    assert(Seq(5, 6) === getAllValidBatches(6, compactInterval = 3))
+    assert(Seq(5, 6, 7) === getAllValidBatches(7, compactInterval = 3))
+    assert(Seq(8) === getAllValidBatches(8, compactInterval = 3))
+  }
+
   test("deriveCompactInterval") {
     // latestCompactBatchId(4) + 1 <= default(5)
     // then use latestestCompactBatchId + 1 === 5
@@ -30,4 +97,149 @@ class CompactibleFileStreamLogSuite extends SparkFunSuite {
     // First divisor of 10 greater than 4 === 5
     assert(5 === deriveCompactInterval(4, 9))
   }
+
+  /** -- testing of `object CompactibleFileStreamLog` ends -- */
+
+  test("batchIdToPath") {
+    withFakeCompactibleFileStreamLog(
+      fileCleanupDelayMs = Long.MaxValue,
+      defaultCompactInterval = 3,
+      compactibleLog => {
+        assert("0" === compactibleLog.batchIdToPath(0).getName)
+        assert("1" === compactibleLog.batchIdToPath(1).getName)
+        assert("2.compact" === compactibleLog.batchIdToPath(2).getName)
+        assert("3" === compactibleLog.batchIdToPath(3).getName)
+        assert("4" === compactibleLog.batchIdToPath(4).getName)
+        assert("5.compact" === compactibleLog.batchIdToPath(5).getName)
+      })
+  }
+
+  test("serialize") {
+    withFakeCompactibleFileStreamLog(
+      fileCleanupDelayMs = Long.MaxValue,
+      defaultCompactInterval = 3,
+      compactibleLog => {
+        val logs = Array("entry_1", "entry_2", "entry_3")
+        val expected = s"""${FakeCompactibleFileStreamLog.VERSION}
+            |"entry_1"
+            |"entry_2"
+            |"entry_3"""".stripMargin
+        val baos = new ByteArrayOutputStream()
+        compactibleLog.serialize(logs, baos)
+        assert(expected === baos.toString(UTF_8.name()))
+
+        baos.reset()
+        compactibleLog.serialize(Array(), baos)
+        assert(FakeCompactibleFileStreamLog.VERSION === baos.toString(UTF_8.name()))
+      })
+  }
+
+  test("deserialize") {
+    withFakeCompactibleFileStreamLog(
+      fileCleanupDelayMs = Long.MaxValue,
+      defaultCompactInterval = 3,
+      compactibleLog => {
+        val logs = s"""${FakeCompactibleFileStreamLog.VERSION}
+            |"entry_1"
+            |"entry_2"
+            |"entry_3"""".stripMargin
+        val expected = Array("entry_1", "entry_2", "entry_3")
+        assert(expected ===
+          compactibleLog.deserialize(new ByteArrayInputStream(logs.getBytes(UTF_8))))
+
+        assert(Nil ===
+          compactibleLog.deserialize(
+            new ByteArrayInputStream(FakeCompactibleFileStreamLog.VERSION.getBytes(UTF_8))))
+      })
+  }
+
+  testWithUninterruptibleThread("compact") {
+    withFakeCompactibleFileStreamLog(
+      fileCleanupDelayMs = Long.MaxValue,
+      defaultCompactInterval = 3,
+      compactibleLog => {
+        for (batchId <- 0 to 10) {
+          compactibleLog.add(batchId, Array("some_path_" + batchId))
+          val expectedFiles = (0 to batchId).map { id => "some_path_" + id }
+          assert(compactibleLog.allFiles() === expectedFiles)
+          if (isCompactionBatch(batchId, 3)) {
+            // Since batchId is a compaction batch, the batch log file should contain all logs
+            assert(compactibleLog.get(batchId).getOrElse(Nil) === expectedFiles)
+          }
+        }
+      })
+  }
+
+  testWithUninterruptibleThread("delete expired file") {
+    // Set `fileCleanupDelayMs` to 0 so that we can detect the deleting behaviour deterministically
+    withFakeCompactibleFileStreamLog(
+      fileCleanupDelayMs = 0,
+      defaultCompactInterval = 3,
+      compactibleLog => {
+        val fs = compactibleLog.metadataPath.getFileSystem(spark.sessionState.newHadoopConf())
+
+        def listBatchFiles(): Set[String] = {
+          fs.listStatus(compactibleLog.metadataPath).map(_.getPath.getName).filter { fileName =>
+            try {
+              getBatchIdFromFileName(fileName)
+              true
+            } catch {
+              case _: NumberFormatException => false
+            }
+          }.toSet
+        }
+
+        compactibleLog.add(0, Array("some_path_0"))
+        assert(Set("0") === listBatchFiles())
+        compactibleLog.add(1, Array("some_path_1"))
+        assert(Set("0", "1") === listBatchFiles())
+        compactibleLog.add(2, Array("some_path_2"))
+        assert(Set("2.compact") === listBatchFiles())
+        compactibleLog.add(3, Array("some_path_3"))
+        assert(Set("2.compact", "3") === listBatchFiles())
+        compactibleLog.add(4, Array("some_path_4"))
+        assert(Set("2.compact", "3", "4") === listBatchFiles())
+        compactibleLog.add(5, Array("some_path_5"))
+        assert(Set("5.compact") === listBatchFiles())
+      })
+  }
+
+  private def withFakeCompactibleFileStreamLog(
+    fileCleanupDelayMs: Long,
+    defaultCompactInterval: Int,
+    f: FakeCompactibleFileStreamLog => Unit
+  ): Unit = {
+    withTempDir { file =>
+      val compactibleLog = new FakeCompactibleFileStreamLog(
+        fileCleanupDelayMs,
+        defaultCompactInterval,
+        spark,
+        file.getCanonicalPath)
+      f(compactibleLog)
+    }
+  }
+}
+
+object FakeCompactibleFileStreamLog {
+  val VERSION = "test_version"
+}
+
+class FakeCompactibleFileStreamLog(
+    _fileCleanupDelayMs: Long,
+    _defaultCompactInterval: Int,
+    sparkSession: SparkSession,
+    path: String)
+  extends CompactibleFileStreamLog[String](
+    FakeCompactibleFileStreamLog.VERSION,
+    sparkSession,
+    path
+  ) {
+
+  override protected def fileCleanupDelayMs: Long = _fileCleanupDelayMs
+
+  override protected def isDeletingExpiredLog: Boolean = true
+
+  override protected def defaultCompactInterval: Int = _defaultCompactInterval
+
+  override def compactLogs(logs: Seq[String]): Seq[String] = logs
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala
index e1bc674a28071..e046fee0c04d3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala
@@ -29,61 +29,6 @@ class FileStreamSinkLogSuite extends SparkFunSuite with SharedSQLContext {
   import CompactibleFileStreamLog._
   import FileStreamSinkLog._
 
-  test("getBatchIdFromFileName") {
-    assert(1234L === getBatchIdFromFileName("1234"))
-    assert(1234L === getBatchIdFromFileName("1234.compact"))
-    intercept[NumberFormatException] {
-      getBatchIdFromFileName("1234a")
-    }
-  }
-
-  test("isCompactionBatch") {
-    assert(false === isCompactionBatch(0, compactInterval = 3))
-    assert(false === isCompactionBatch(1, compactInterval = 3))
-    assert(true === isCompactionBatch(2, compactInterval = 3))
-    assert(false === isCompactionBatch(3, compactInterval = 3))
-    assert(false === isCompactionBatch(4, compactInterval = 3))
-    assert(true === isCompactionBatch(5, compactInterval = 3))
-  }
-
-  test("nextCompactionBatchId") {
-    assert(2 === nextCompactionBatchId(0, compactInterval = 3))
-    assert(2 === nextCompactionBatchId(1, compactInterval = 3))
-    assert(5 === nextCompactionBatchId(2, compactInterval = 3))
-    assert(5 === nextCompactionBatchId(3, compactInterval = 3))
-    assert(5 === nextCompactionBatchId(4, compactInterval = 3))
-    assert(8 === nextCompactionBatchId(5, compactInterval = 3))
-  }
-
-  test("getValidBatchesBeforeCompactionBatch") {
-    intercept[AssertionError] {
-      getValidBatchesBeforeCompactionBatch(0, compactInterval = 3)
-    }
-    intercept[AssertionError] {
-      getValidBatchesBeforeCompactionBatch(1, compactInterval = 3)
-    }
-    assert(Seq(0, 1) === getValidBatchesBeforeCompactionBatch(2, compactInterval = 3))
-    intercept[AssertionError] {
-      getValidBatchesBeforeCompactionBatch(3, compactInterval = 3)
-    }
-    intercept[AssertionError] {
-      getValidBatchesBeforeCompactionBatch(4, compactInterval = 3)
-    }
-    assert(Seq(2, 3, 4) === getValidBatchesBeforeCompactionBatch(5, compactInterval = 3))
-  }
-
-  test("getAllValidBatches") {
-    assert(Seq(0) === getAllValidBatches(0, compactInterval = 3))
-    assert(Seq(0, 1) === getAllValidBatches(1, compactInterval = 3))
-    assert(Seq(2) === getAllValidBatches(2, compactInterval = 3))
-    assert(Seq(2, 3) === getAllValidBatches(3, compactInterval = 3))
-    assert(Seq(2, 3, 4) === getAllValidBatches(4, compactInterval = 3))
-    assert(Seq(5) === getAllValidBatches(5, compactInterval = 3))
-    assert(Seq(5, 6) === getAllValidBatches(6, compactInterval = 3))
-    assert(Seq(5, 6, 7) === getAllValidBatches(7, compactInterval = 3))
-    assert(Seq(8) === getAllValidBatches(8, compactInterval = 3))
-  }
-
   test("compactLogs") {
     withFileStreamSinkLog { sinkLog =>
       val logs = Seq(
@@ -184,19 +129,6 @@ class FileStreamSinkLogSuite extends SparkFunSuite with SharedSQLContext {
     }
   }
 
-  test("batchIdToPath") {
-    withSQLConf(SQLConf.FILE_SINK_LOG_COMPACT_INTERVAL.key -> "3") {
-      withFileStreamSinkLog { sinkLog =>
-        assert("0" === sinkLog.batchIdToPath(0).getName)
-        assert("1" === sinkLog.batchIdToPath(1).getName)
-        assert("2.compact" === sinkLog.batchIdToPath(2).getName)
-        assert("3" === sinkLog.batchIdToPath(3).getName)
-        assert("4" === sinkLog.batchIdToPath(4).getName)
-        assert("5.compact" === sinkLog.batchIdToPath(5).getName)
-      }
-    }
-  }
-
   testWithUninterruptibleThread("compact") {
     withSQLConf(SQLConf.FILE_SINK_LOG_COMPACT_INTERVAL.key -> "3") {
       withFileStreamSinkLog { sinkLog =>

From c7021407597480bddf226ffa6d1d3f682408dfeb Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Tue, 22 Nov 2016 00:05:30 -0800
Subject: [PATCH 0169/1204] [SPARK-18444][SPARKR] SparkR running in
 yarn-cluster mode should not download Spark package.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?
When running SparkR job in yarn-cluster mode, it will download Spark package from apache website which is not necessary.
```
./bin/spark-submit --master yarn-cluster ./examples/src/main/r/dataframe.R
```
The following is output:
```
Attaching package: ‘SparkR’

The following objects are masked from ‘package:stats’:

    cov, filter, lag, na.omit, predict, sd, var, window

The following objects are masked from ‘package:base’:

    as.data.frame, colnames, colnames<-, drop, endsWith, intersect,
    rank, rbind, sample, startsWith, subset, summary, transform, union

Spark not found in SPARK_HOME:
Spark not found in the cache directory. Installation will start.
MirrorUrl not provided.
Looking for preferred site from apache website...
......
```
There's no ```SPARK_HOME``` in yarn-cluster mode since the R process is in a remote host of the yarn cluster rather than in the client host. The JVM comes up first and the R process then connects to it. So in such cases we should never have to download Spark as Spark is already running.

## How was this patch tested?
Offline test.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #15888 from yanboliang/spark-18444.

(cherry picked from commit acb97157796231fef74aba985825b05b607b9279)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 R/pkg/R/sparkR.R                        | 20 +++++++----
 R/pkg/R/utils.R                         |  4 +++
 R/pkg/inst/tests/testthat/test_sparkR.R | 46 +++++++++++++++++++++++++
 3 files changed, 64 insertions(+), 6 deletions(-)
 create mode 100644 R/pkg/inst/tests/testthat/test_sparkR.R

diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index 6b4a2f2fdc85c..a7152b4313993 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -373,8 +373,13 @@ sparkR.session <- function(
     overrideEnvs(sparkConfigMap, paramMap)
   }
 
+  deployMode <- ""
+  if (exists("spark.submit.deployMode", envir = sparkConfigMap)) {
+    deployMode <- sparkConfigMap[["spark.submit.deployMode"]]
+  }
+
   if (!exists(".sparkRjsc", envir = .sparkREnv)) {
-    retHome <- sparkCheckInstall(sparkHome, master)
+    retHome <- sparkCheckInstall(sparkHome, master, deployMode)
     if (!is.null(retHome)) sparkHome <- retHome
     sparkExecutorEnvMap <- new.env()
     sparkR.sparkContext(master, appName, sparkHome, sparkConfigMap, sparkExecutorEnvMap,
@@ -550,24 +555,27 @@ processSparkPackages <- function(packages) {
 #
 # @param sparkHome directory to find Spark package.
 # @param master the Spark master URL, used to check local or remote mode.
+# @param deployMode whether to deploy your driver on the worker nodes (cluster)
+#        or locally as an external client (client).
 # @return NULL if no need to update sparkHome, and new sparkHome otherwise.
-sparkCheckInstall <- function(sparkHome, master) {
+sparkCheckInstall <- function(sparkHome, master, deployMode) {
   if (!isSparkRShell()) {
     if (!is.na(file.info(sparkHome)$isdir)) {
       msg <- paste0("Spark package found in SPARK_HOME: ", sparkHome)
       message(msg)
       NULL
     } else {
-      if (!nzchar(master) || isMasterLocal(master)) {
-        msg <- paste0("Spark not found in SPARK_HOME: ",
-                      sparkHome)
+      if (isMasterLocal(master)) {
+        msg <- paste0("Spark not found in SPARK_HOME: ", sparkHome)
         message(msg)
         packageLocalDir <- install.spark()
         packageLocalDir
-      } else {
+      } else if (isClientMode(master) || deployMode == "client") {
         msg <- paste0("Spark not found in SPARK_HOME: ",
                       sparkHome, "\n", installInstruction("remote"))
         stop(msg)
+      } else {
+        NULL
       }
     }
   } else {
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index 20004549cc037..098c0e3e31e95 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -777,6 +777,10 @@ isMasterLocal <- function(master) {
   grepl("^local(\\[([0-9]+|\\*)\\])?$", master, perl = TRUE)
 }
 
+isClientMode <- function(master) {
+  grepl("([a-z]+)-client$", master, perl = TRUE)
+}
+
 isSparkRShell <- function() {
   grepl(".*shell\\.R$", Sys.getenv("R_PROFILE_USER"), perl = TRUE)
 }
diff --git a/R/pkg/inst/tests/testthat/test_sparkR.R b/R/pkg/inst/tests/testthat/test_sparkR.R
new file mode 100644
index 0000000000000..f73fc6baeccef
--- /dev/null
+++ b/R/pkg/inst/tests/testthat/test_sparkR.R
@@ -0,0 +1,46 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+context("functions in sparkR.R")
+
+test_that("sparkCheckInstall", {
+  # "local, yarn-client, mesos-client" mode, SPARK_HOME was set correctly,
+  # and the SparkR job was submitted by "spark-submit"
+  sparkHome <- paste0(tempdir(), "/", "sparkHome")
+  dir.create(sparkHome)
+  master <- ""
+  deployMode <- ""
+  expect_true(is.null(sparkCheckInstall(sparkHome, master, deployMode)))
+  unlink(sparkHome, recursive = TRUE)
+
+  # "yarn-cluster, mesos-cluster" mode, SPARK_HOME was not set,
+  # and the SparkR job was submitted by "spark-submit"
+  sparkHome <- ""
+  master <- ""
+  deployMode <- ""
+  expect_true(is.null(sparkCheckInstall(sparkHome, master, deployMode)))
+
+  # "yarn-client, mesos-client" mode, SPARK_HOME was not set
+  sparkHome <- ""
+  master <- "yarn-client"
+  deployMode <- ""
+  expect_error(sparkCheckInstall(sparkHome, master, deployMode))
+  sparkHome <- ""
+  master <- ""
+  deployMode <- "client"
+  expect_error(sparkCheckInstall(sparkHome, master, deployMode))
+})

From 63aa01ffe06e49af032b57ba2eb28dfb8f14f779 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Tue, 22 Nov 2016 11:26:10 +0000
Subject: [PATCH 0170/1204] [SPARK-18514][DOCS] Fix the markdown for
 `Note:`/`NOTE:`/`Note that` across R API documentation

## What changes were proposed in this pull request?

It seems in R, there are

- `Note:`
- `NOTE:`
- `Note that`

This PR proposes to fix those to `Note:` to be consistent.

**Before**

![2016-11-21 11 30 07](https://cloud.githubusercontent.com/assets/6477701/20468848/2f27b0fa-afde-11e6-89e3-993701269dbe.png)

**After**

![2016-11-21 11 29 44](https://cloud.githubusercontent.com/assets/6477701/20468851/39469664-afde-11e6-9929-ad80be7fc405.png)

## How was this patch tested?

The notes were found via

```bash
grep -r "NOTE: " .
grep -r "Note that " .
```

And then fixed one by one comparing with API documentation.

After that, manually tested via `sh create-docs.sh` under `./R`.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #15952 from HyukjinKwon/SPARK-18514.

(cherry picked from commit 4922f9cdcac8b7c10320ac1fb701997fffa45d46)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 R/pkg/R/DataFrame.R | 6 ++++--
 R/pkg/R/functions.R | 7 ++++---
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 4e3d97bb3ad07..9a51d530f120a 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2541,7 +2541,8 @@ generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) {
 #'
 #' Return a new SparkDataFrame containing the union of rows in this SparkDataFrame
 #' and another SparkDataFrame. This is equivalent to \code{UNION ALL} in SQL.
-#' Note that this does not remove duplicate rows across the two SparkDataFrames.
+#'
+#' Note: This does not remove duplicate rows across the two SparkDataFrames.
 #'
 #' @param x A SparkDataFrame
 #' @param y A SparkDataFrame
@@ -2584,7 +2585,8 @@ setMethod("unionAll",
 #' Union two or more SparkDataFrames
 #'
 #' Union two or more SparkDataFrames. This is equivalent to \code{UNION ALL} in SQL.
-#' Note that this does not remove duplicate rows across the two SparkDataFrames.
+#'
+#' Note: This does not remove duplicate rows across the two SparkDataFrames.
 #'
 #' @param x a SparkDataFrame.
 #' @param ... additional SparkDataFrame(s).
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index f8a9d3ce5d918..bf5c96373c632 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -2296,7 +2296,7 @@ setMethod("n", signature(x = "Column"),
 #' A pattern could be for instance \preformatted{dd.MM.yyyy} and could return a string like '18.03.1993'. All
 #' pattern letters of \code{java.text.SimpleDateFormat} can be used.
 #'
-#' NOTE: Use when ever possible specialized functions like \code{year}. These benefit from a
+#' Note: Use when ever possible specialized functions like \code{year}. These benefit from a
 #' specialized implementation.
 #'
 #' @param y Column to compute on.
@@ -2341,7 +2341,7 @@ setMethod("from_utc_timestamp", signature(y = "Column", x = "character"),
 #' Locate the position of the first occurrence of substr column in the given string.
 #' Returns null if either of the arguments are null.
 #'
-#' NOTE: The position is not zero based, but 1 based index. Returns 0 if substr
+#' Note: The position is not zero based, but 1 based index. Returns 0 if substr
 #' could not be found in str.
 #'
 #' @param y column to check
@@ -2779,7 +2779,8 @@ setMethod("window", signature(x = "Column"),
 #' locate
 #'
 #' Locate the position of the first occurrence of substr.
-#' NOTE: The position is not zero based, but 1 based index. Returns 0 if substr
+#'
+#' Note: The position is not zero based, but 1 based index. Returns 0 if substr
 #' could not be found in str.
 #'
 #' @param substr a character string to be matched.

From 36cd10d19d95418cec4b789545afc798088be315 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Tue, 22 Nov 2016 11:40:18 +0000
Subject: [PATCH 0171/1204] [SPARK-18447][DOCS] Fix the markdown for
 `Note:`/`NOTE:`/`Note that` across Python API documentation

## What changes were proposed in this pull request?

It seems in Python, there are

- `Note:`
- `NOTE:`
- `Note that`
- `.. note::`

This PR proposes to fix those to `.. note::` to be consistent.

**Before**

<img width="567" alt="2016-11-21 1 18 49" src="https://cloud.githubusercontent.com/assets/6477701/20464305/85144c86-af88-11e6-8ee9-90f584dd856c.png">

<img width="617" alt="2016-11-21 12 42 43" src="https://cloud.githubusercontent.com/assets/6477701/20464263/27be5022-af88-11e6-8577-4bbca7cdf36c.png">

**After**

<img width="554" alt="2016-11-21 1 18 42" src="https://cloud.githubusercontent.com/assets/6477701/20464306/8fe48932-af88-11e6-83e1-fc3cbf74407d.png">

<img width="628" alt="2016-11-21 12 42 51" src="https://cloud.githubusercontent.com/assets/6477701/20464264/2d3e156e-af88-11e6-93f3-cab8d8d02983.png">

## How was this patch tested?

The notes were found via

```bash
grep -r "Note: " .
grep -r "NOTE: " .
grep -r "Note that " .
```

And then fixed one by one comparing with API documentation.

After that, manually tested via `make html` under `./python/docs`.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #15947 from HyukjinKwon/SPARK-18447.

(cherry picked from commit 933a6548d423cf17448207a99299cf36fc1a95f6)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 python/pyspark/conf.py                     |  4 +-
 python/pyspark/context.py                  |  8 ++--
 python/pyspark/ml/classification.py        | 45 +++++++++---------
 python/pyspark/ml/clustering.py            |  8 ++--
 python/pyspark/ml/feature.py               | 13 +++---
 python/pyspark/ml/linalg/__init__.py       | 11 +++--
 python/pyspark/ml/regression.py            | 32 ++++++-------
 python/pyspark/mllib/clustering.py         |  6 +--
 python/pyspark/mllib/feature.py            | 24 +++++-----
 python/pyspark/mllib/linalg/__init__.py    | 11 +++--
 python/pyspark/mllib/linalg/distributed.py | 15 +++---
 python/pyspark/mllib/regression.py         |  2 +-
 python/pyspark/mllib/stat/_statistics.py   |  3 +-
 python/pyspark/mllib/tree.py               | 12 ++---
 python/pyspark/rdd.py                      | 54 +++++++++++-----------
 python/pyspark/sql/dataframe.py            | 28 ++++++-----
 python/pyspark/sql/functions.py            | 11 +++--
 python/pyspark/sql/streaming.py            | 10 ++--
 python/pyspark/streaming/context.py        |  2 +-
 python/pyspark/streaming/kinesis.py        |  4 +-
 20 files changed, 157 insertions(+), 146 deletions(-)

diff --git a/python/pyspark/conf.py b/python/pyspark/conf.py
index 64b6f238e9c32..491b3a81972bc 100644
--- a/python/pyspark/conf.py
+++ b/python/pyspark/conf.py
@@ -90,8 +90,8 @@ class SparkConf(object):
     All setter methods in this class support chaining. For example,
     you can write C{conf.setMaster("local").setAppName("My app")}.
 
-    Note that once a SparkConf object is passed to Spark, it is cloned
-    and can no longer be modified by the user.
+    .. note:: Once a SparkConf object is passed to Spark, it is cloned
+        and can no longer be modified by the user.
     """
 
     def __init__(self, loadDefaults=True, _jvm=None, _jconf=None):
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 2c2cf6a373bb7..2fd3aee01d76c 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -520,8 +520,8 @@ def wholeTextFiles(self, path, minPartitions=None, use_unicode=True):
           ...
           (a-hdfs-path/part-nnnnn, its content)
 
-        NOTE: Small files are preferred, as each file will be loaded
-        fully in memory.
+        .. note:: Small files are preferred, as each file will be loaded
+            fully in memory.
 
         >>> dirPath = os.path.join(tempdir, "files")
         >>> os.mkdir(dirPath)
@@ -547,8 +547,8 @@ def binaryFiles(self, path, minPartitions=None):
         in a key-value pair, where the key is the path of each file, the
         value is the content of each file.
 
-        Note: Small files are preferred, large file is also allowable, but
-        may cause bad performance.
+        .. note:: Small files are preferred, large file is also allowable, but
+            may cause bad performance.
         """
         minPartitions = minPartitions or self.defaultMinPartitions
         return RDD(self._jsc.binaryFiles(path, minPartitions), self,
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 83e1e89347660..8054a34db30f2 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -440,9 +440,9 @@ def roc(self):
         .. seealso:: `Wikipedia reference \
         <http://en.wikipedia.org/wiki/Receiver_operating_characteristic>`_
 
-        Note: This ignores instance weights (setting all to 1.0) from
-        `LogisticRegression.weightCol`. This will change in later Spark
-        versions.
+        .. note:: This ignores instance weights (setting all to 1.0) from
+            `LogisticRegression.weightCol`. This will change in later Spark
+            versions.
         """
         return self._call_java("roc")
 
@@ -453,9 +453,9 @@ def areaUnderROC(self):
         Computes the area under the receiver operating characteristic
         (ROC) curve.
 
-        Note: This ignores instance weights (setting all to 1.0) from
-        `LogisticRegression.weightCol`. This will change in later Spark
-        versions.
+        .. note:: This ignores instance weights (setting all to 1.0) from
+            `LogisticRegression.weightCol`. This will change in later Spark
+            versions.
         """
         return self._call_java("areaUnderROC")
 
@@ -467,9 +467,9 @@ def pr(self):
         containing two fields recall, precision with (0.0, 1.0) prepended
         to it.
 
-        Note: This ignores instance weights (setting all to 1.0) from
-        `LogisticRegression.weightCol`. This will change in later Spark
-        versions.
+        .. note:: This ignores instance weights (setting all to 1.0) from
+            `LogisticRegression.weightCol`. This will change in later Spark
+            versions.
         """
         return self._call_java("pr")
 
@@ -480,9 +480,9 @@ def fMeasureByThreshold(self):
         Returns a dataframe with two fields (threshold, F-Measure) curve
         with beta = 1.0.
 
-        Note: This ignores instance weights (setting all to 1.0) from
-        `LogisticRegression.weightCol`. This will change in later Spark
-        versions.
+        .. note:: This ignores instance weights (setting all to 1.0) from
+            `LogisticRegression.weightCol`. This will change in later Spark
+            versions.
         """
         return self._call_java("fMeasureByThreshold")
 
@@ -494,9 +494,9 @@ def precisionByThreshold(self):
         Every possible probability obtained in transforming the dataset
         are used as thresholds used in calculating the precision.
 
-        Note: This ignores instance weights (setting all to 1.0) from
-        `LogisticRegression.weightCol`. This will change in later Spark
-        versions.
+        .. note:: This ignores instance weights (setting all to 1.0) from
+            `LogisticRegression.weightCol`. This will change in later Spark
+            versions.
         """
         return self._call_java("precisionByThreshold")
 
@@ -508,9 +508,9 @@ def recallByThreshold(self):
         Every possible probability obtained in transforming the dataset
         are used as thresholds used in calculating the recall.
 
-        Note: This ignores instance weights (setting all to 1.0) from
-        `LogisticRegression.weightCol`. This will change in later Spark
-        versions.
+        .. note:: This ignores instance weights (setting all to 1.0) from
+            `LogisticRegression.weightCol`. This will change in later Spark
+            versions.
         """
         return self._call_java("recallByThreshold")
 
@@ -695,9 +695,9 @@ def featureImportances(self):
             where gain is scaled by the number of instances passing through node
           - Normalize importances for tree to sum to 1.
 
-        Note: Feature importance for single decision trees can have high variance due to
-              correlated predictor variables. Consider using a :py:class:`RandomForestClassifier`
-              to determine feature importance instead.
+        .. note:: Feature importance for single decision trees can have high variance due to
+            correlated predictor variables. Consider using a :py:class:`RandomForestClassifier`
+            to determine feature importance instead.
         """
         return self._call_java("featureImportances")
 
@@ -839,7 +839,6 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol
     `Gradient-Boosted Trees (GBTs) <http://en.wikipedia.org/wiki/Gradient_boosting>`_
     learning algorithm for classification.
     It supports binary labels, as well as both continuous and categorical features.
-    Note: Multiclass labels are not currently supported.
 
     The implementation is based upon: J.H. Friedman. "Stochastic Gradient Boosting." 1999.
 
@@ -851,6 +850,8 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol
     - We expect to implement TreeBoost in the future:
     `SPARK-4240 <https://issues.apache.org/jira/browse/SPARK-4240>`_
 
+    .. note:: Multiclass labels are not currently supported.
+
     >>> from numpy import allclose
     >>> from pyspark.ml.linalg import Vectors
     >>> from pyspark.ml.feature import StringIndexer
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index e58ec1e7ac296..b29b5ac70e6fe 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -155,7 +155,7 @@ class GaussianMixture(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIte
     While this process is generally guaranteed to converge, it is not guaranteed
     to find a global optimum.
 
-    Note: For high-dimensional data (with many features), this algorithm may perform poorly.
+    .. note:: For high-dimensional data (with many features), this algorithm may perform poorly.
           This is due to high-dimensional data (a) making it difficult to cluster at all
           (based on statistical/theoretical arguments) and (b) numerical issues with
           Gaussian distributions.
@@ -749,9 +749,9 @@ def getCheckpointFiles(self):
         If using checkpointing and :py:attr:`LDA.keepLastCheckpoint` is set to true, then there may
         be saved checkpoint files.  This method is provided so that users can manage those files.
 
-        Note that removing the checkpoints can cause failures if a partition is lost and is needed
-        by certain :py:class:`DistributedLDAModel` methods.  Reference counting will clean up the
-        checkpoints when this model and derivative data go out of scope.
+        .. note:: Removing the checkpoints can cause failures if a partition is lost and is needed
+            by certain :py:class:`DistributedLDAModel` methods.  Reference counting will clean up
+            the checkpoints when this model and derivative data go out of scope.
 
         :return  List of checkpoint files from training
         """
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 635cf1304588e..40b63d4d31d4b 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -742,8 +742,8 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Jav
 
     For the case E_max == E_min, Rescaled(e_i) = 0.5 * (max + min)
 
-    Note that since zero values will probably be transformed to non-zero values, output of the
-    transformer will be DenseVector even for sparse input.
+    .. note:: Since zero values will probably be transformed to non-zero values, output of the
+        transformer will be DenseVector even for sparse input.
 
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame([(Vectors.dense([0.0]),), (Vectors.dense([2.0]),)], ["a"])
@@ -1014,9 +1014,9 @@ class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
     :py:attr:`dropLast`) because it makes the vector entries sum up to
     one, and hence linearly dependent.
     So an input value of 4.0 maps to `[0.0, 0.0, 0.0, 0.0]`.
-    Note that this is different from scikit-learn's OneHotEncoder,
-    which keeps all categories.
-    The output vectors are sparse.
+
+    .. note:: This is different from scikit-learn's OneHotEncoder,
+        which keeps all categories. The output vectors are sparse.
 
     .. seealso::
 
@@ -1698,7 +1698,8 @@ def getLabels(self):
 class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
     A feature transformer that filters out stop words from input.
-    Note: null values from input array are preserved unless adding null to stopWords explicitly.
+
+    .. note:: null values from input array are preserved unless adding null to stopWords explicitly.
 
     >>> df = spark.createDataFrame([(["a", "b", "c"],)], ["text"])
     >>> remover = StopWordsRemover(inputCol="text", outputCol="words", stopWords=["b"])
diff --git a/python/pyspark/ml/linalg/__init__.py b/python/pyspark/ml/linalg/__init__.py
index a5df727fdb418..1705c156ce4c8 100644
--- a/python/pyspark/ml/linalg/__init__.py
+++ b/python/pyspark/ml/linalg/__init__.py
@@ -746,11 +746,12 @@ def __hash__(self):
 class Vectors(object):
 
     """
-    Factory methods for working with vectors. Note that dense vectors
-    are simply represented as NumPy array objects, so there is no need
-    to covert them for use in MLlib. For sparse vectors, the factory
-    methods in this class create an MLlib-compatible type, or users
-    can pass in SciPy's C{scipy.sparse} column vectors.
+    Factory methods for working with vectors.
+
+    .. note:: Dense vectors are simply represented as NumPy array objects,
+        so there is no need to covert them for use in MLlib. For sparse vectors,
+        the factory methods in this class create an MLlib-compatible type, or users
+        can pass in SciPy's C{scipy.sparse} column vectors.
     """
 
     @staticmethod
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 385391ba53fd4..b42e807069802 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -245,9 +245,9 @@ def explainedVariance(self):
         .. seealso:: `Wikipedia explain variation \
         <http://en.wikipedia.org/wiki/Explained_variation>`_
 
-        Note: This ignores instance weights (setting all to 1.0) from
-        `LinearRegression.weightCol`. This will change in later Spark
-        versions.
+        .. note:: This ignores instance weights (setting all to 1.0) from
+            `LinearRegression.weightCol`. This will change in later Spark
+            versions.
         """
         return self._call_java("explainedVariance")
 
@@ -259,9 +259,9 @@ def meanAbsoluteError(self):
         corresponding to the expected value of the absolute error
         loss or l1-norm loss.
 
-        Note: This ignores instance weights (setting all to 1.0) from
-        `LinearRegression.weightCol`. This will change in later Spark
-        versions.
+        .. note:: This ignores instance weights (setting all to 1.0) from
+            `LinearRegression.weightCol`. This will change in later Spark
+            versions.
         """
         return self._call_java("meanAbsoluteError")
 
@@ -273,9 +273,9 @@ def meanSquaredError(self):
         corresponding to the expected value of the squared error
         loss or quadratic loss.
 
-        Note: This ignores instance weights (setting all to 1.0) from
-        `LinearRegression.weightCol`. This will change in later Spark
-        versions.
+        .. note:: This ignores instance weights (setting all to 1.0) from
+            `LinearRegression.weightCol`. This will change in later Spark
+            versions.
         """
         return self._call_java("meanSquaredError")
 
@@ -286,9 +286,9 @@ def rootMeanSquaredError(self):
         Returns the root mean squared error, which is defined as the
         square root of the mean squared error.
 
-        Note: This ignores instance weights (setting all to 1.0) from
-        `LinearRegression.weightCol`. This will change in later Spark
-        versions.
+        .. note:: This ignores instance weights (setting all to 1.0) from
+            `LinearRegression.weightCol`. This will change in later Spark
+            versions.
         """
         return self._call_java("rootMeanSquaredError")
 
@@ -301,9 +301,9 @@ def r2(self):
         .. seealso:: `Wikipedia coefficient of determination \
         <http://en.wikipedia.org/wiki/Coefficient_of_determination>`
 
-        Note: This ignores instance weights (setting all to 1.0) from
-        `LinearRegression.weightCol`. This will change in later Spark
-        versions.
+        .. note:: This ignores instance weights (setting all to 1.0) from
+            `LinearRegression.weightCol`. This will change in later Spark
+            versions.
         """
         return self._call_java("r2")
 
@@ -822,7 +822,7 @@ def featureImportances(self):
             where gain is scaled by the number of instances passing through node
           - Normalize importances for tree to sum to 1.
 
-        Note: Feature importance for single decision trees can have high variance due to
+        .. note:: Feature importance for single decision trees can have high variance due to
               correlated predictor variables. Consider using a :py:class:`RandomForestRegressor`
               to determine feature importance instead.
         """
diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py
index 2036168e456fd..91123ace3387e 100644
--- a/python/pyspark/mllib/clustering.py
+++ b/python/pyspark/mllib/clustering.py
@@ -699,9 +699,9 @@ class StreamingKMeansModel(KMeansModel):
     * n_t+1: New number of weights.
     * a: Decay Factor, which gives the forgetfulness.
 
-    Note that if a is set to 1, it is the weighted mean of the previous
-    and new data. If it set to zero, the old centroids are completely
-    forgotten.
+    .. note:: If a is set to 1, it is the weighted mean of the previous
+        and new data. If it set to zero, the old centroids are completely
+        forgotten.
 
     :param clusterCenters:
       Initial cluster centers.
diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py
index 7eaa2282cb8bb..bde0f67be775c 100644
--- a/python/pyspark/mllib/feature.py
+++ b/python/pyspark/mllib/feature.py
@@ -114,9 +114,9 @@ def transform(self, vector):
         """
         Applies transformation on a vector or an RDD[Vector].
 
-        Note: In Python, transform cannot currently be used within
-              an RDD transformation or action.
-              Call transform directly on the RDD instead.
+        .. note:: In Python, transform cannot currently be used within
+            an RDD transformation or action.
+            Call transform directly on the RDD instead.
 
         :param vector: Vector or RDD of Vector to be transformed.
         """
@@ -139,9 +139,9 @@ def transform(self, vector):
         """
         Applies standardization transformation on a vector.
 
-        Note: In Python, transform cannot currently be used within
-              an RDD transformation or action.
-              Call transform directly on the RDD instead.
+        .. note:: In Python, transform cannot currently be used within
+            an RDD transformation or action.
+            Call transform directly on the RDD instead.
 
         :param vector: Vector or RDD of Vector to be standardized.
         :return: Standardized vector. If the variance of a column is
@@ -407,7 +407,7 @@ class HashingTF(object):
     Maps a sequence of terms to their term frequencies using the hashing
     trick.
 
-    Note: the terms must be hashable (can not be dict/set/list...).
+    .. note:: The terms must be hashable (can not be dict/set/list...).
 
     :param numFeatures: number of features (default: 2^20)
 
@@ -469,9 +469,9 @@ def transform(self, x):
         the terms which occur in fewer than `minDocFreq`
         documents will have an entry of 0.
 
-        Note: In Python, transform cannot currently be used within
-              an RDD transformation or action.
-              Call transform directly on the RDD instead.
+        .. note:: In Python, transform cannot currently be used within
+            an RDD transformation or action.
+            Call transform directly on the RDD instead.
 
         :param x: an RDD of term frequency vectors or a term frequency
                   vector
@@ -551,7 +551,7 @@ def transform(self, word):
         """
         Transforms a word to its vector representation
 
-        Note: local use only
+        .. note:: Local use only
 
         :param word: a word
         :return: vector representation of word(s)
@@ -570,7 +570,7 @@ def findSynonyms(self, word, num):
         :param num: number of synonyms to find
         :return: array of (word, cosineSimilarity)
 
-        Note: local use only
+        .. note:: Local use only
         """
         if not isinstance(word, basestring):
             word = _convert_to_vector(word)
diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py
index d37e715c8d8ec..031f22c02098e 100644
--- a/python/pyspark/mllib/linalg/__init__.py
+++ b/python/pyspark/mllib/linalg/__init__.py
@@ -835,11 +835,12 @@ def __hash__(self):
 class Vectors(object):
 
     """
-    Factory methods for working with vectors. Note that dense vectors
-    are simply represented as NumPy array objects, so there is no need
-    to covert them for use in MLlib. For sparse vectors, the factory
-    methods in this class create an MLlib-compatible type, or users
-    can pass in SciPy's C{scipy.sparse} column vectors.
+    Factory methods for working with vectors.
+
+    .. note:: Dense vectors are simply represented as NumPy array objects,
+        so there is no need to covert them for use in MLlib. For sparse vectors,
+        the factory methods in this class create an MLlib-compatible type, or users
+        can pass in SciPy's C{scipy.sparse} column vectors.
     """
 
     @staticmethod
diff --git a/python/pyspark/mllib/linalg/distributed.py b/python/pyspark/mllib/linalg/distributed.py
index 538cada7d163d..600655c912ca6 100644
--- a/python/pyspark/mllib/linalg/distributed.py
+++ b/python/pyspark/mllib/linalg/distributed.py
@@ -171,8 +171,9 @@ def computeColumnSummaryStatistics(self):
     def computeCovariance(self):
         """
         Computes the covariance matrix, treating each row as an
-        observation. Note that this cannot be computed on matrices
-        with more than 65535 columns.
+        observation.
+
+        .. note:: This cannot be computed on matrices with more than 65535 columns.
 
         >>> rows = sc.parallelize([[1, 2], [2, 1]])
         >>> mat = RowMatrix(rows)
@@ -185,8 +186,9 @@ def computeCovariance(self):
     @since('2.0.0')
     def computeGramianMatrix(self):
         """
-        Computes the Gramian matrix `A^T A`. Note that this cannot be
-        computed on matrices with more than 65535 columns.
+        Computes the Gramian matrix `A^T A`.
+
+        .. note:: This cannot be computed on matrices with more than 65535 columns.
 
         >>> rows = sc.parallelize([[1, 2, 3], [4, 5, 6]])
         >>> mat = RowMatrix(rows)
@@ -458,8 +460,9 @@ def columnSimilarities(self):
     @since('2.0.0')
     def computeGramianMatrix(self):
         """
-        Computes the Gramian matrix `A^T A`. Note that this cannot be
-        computed on matrices with more than 65535 columns.
+        Computes the Gramian matrix `A^T A`.
+
+        .. note:: This cannot be computed on matrices with more than 65535 columns.
 
         >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]),
         ...                        IndexedRow(1, [4, 5, 6])])
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index 705022934e41b..1b66f5b51044b 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -44,7 +44,7 @@ class LabeledPoint(object):
       Vector of features for this point (NumPy array, list,
       pyspark.mllib.linalg.SparseVector, or scipy.sparse column matrix).
 
-    Note: 'label' and 'features' are accessible as class attributes.
+    .. note:: 'label' and 'features' are accessible as class attributes.
 
     .. versionadded:: 1.0.0
     """
diff --git a/python/pyspark/mllib/stat/_statistics.py b/python/pyspark/mllib/stat/_statistics.py
index 67d5f0e44f41c..49b26446dbc32 100644
--- a/python/pyspark/mllib/stat/_statistics.py
+++ b/python/pyspark/mllib/stat/_statistics.py
@@ -164,7 +164,6 @@ def chiSqTest(observed, expected=None):
         of fit test of the observed data against the expected distribution,
         or againt the uniform distribution (by default), with each category
         having an expected frequency of `1 / len(observed)`.
-        (Note: `observed` cannot contain negative values)
 
         If `observed` is matrix, conduct Pearson's independence test on the
         input contingency matrix, which cannot contain negative entries or
@@ -176,6 +175,8 @@ def chiSqTest(observed, expected=None):
         contingency matrix for which the chi-squared statistic is computed.
         All label and feature values must be categorical.
 
+        .. note:: `observed` cannot contain negative values
+
         :param observed: it could be a vector containing the observed categorical
                          counts/relative frequencies, or the contingency matrix
                          (containing either counts or relative frequencies),
diff --git a/python/pyspark/mllib/tree.py b/python/pyspark/mllib/tree.py
index b3011d42e56af..a6089fc8b9d32 100644
--- a/python/pyspark/mllib/tree.py
+++ b/python/pyspark/mllib/tree.py
@@ -40,9 +40,9 @@ def predict(self, x):
         Predict values for a single data point or an RDD of points using
         the model trained.
 
-        Note: In Python, predict cannot currently be used within an RDD
-              transformation or action.
-              Call predict directly on the RDD instead.
+        .. note:: In Python, predict cannot currently be used within an RDD
+            transformation or action.
+            Call predict directly on the RDD instead.
         """
         if isinstance(x, RDD):
             return self.call("predict", x.map(_convert_to_vector))
@@ -85,9 +85,9 @@ def predict(self, x):
         """
         Predict the label of one or more examples.
 
-        Note: In Python, predict cannot currently be used within an RDD
-              transformation or action.
-              Call predict directly on the RDD instead.
+        .. note:: In Python, predict cannot currently be used within an RDD
+            transformation or action.
+            Call predict directly on the RDD instead.
 
         :param x:
           Data point (feature vector), or an RDD of data points (feature
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index f21a364df9100..9e05da89af082 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -417,10 +417,8 @@ def sample(self, withReplacement, fraction, seed=None):
             with replacement: expected number of times each element is chosen; fraction must be >= 0
         :param seed: seed for the random number generator
 
-        .. note::
-
-            This is not guaranteed to provide exactly the fraction specified of the total count
-            of the given :class:`DataFrame`.
+        .. note:: This is not guaranteed to provide exactly the fraction specified of the total
+            count of the given :class:`DataFrame`.
 
         >>> rdd = sc.parallelize(range(100), 4)
         >>> 6 <= rdd.sample(False, 0.1, 81).count() <= 14
@@ -460,8 +458,8 @@ def takeSample(self, withReplacement, num, seed=None):
         """
         Return a fixed-size sampled subset of this RDD.
 
-        Note that this method should only be used if the resulting array is expected
-        to be small, as all the data is loaded into the driver's memory.
+        .. note:: This method should only be used if the resulting array is expected
+            to be small, as all the data is loaded into the driver's memory.
 
         >>> rdd = sc.parallelize(range(0, 10))
         >>> len(rdd.takeSample(True, 20, 1))
@@ -572,7 +570,7 @@ def intersection(self, other):
         Return the intersection of this RDD and another one. The output will
         not contain any duplicate elements, even if the input RDDs did.
 
-        Note that this method performs a shuffle internally.
+        .. note:: This method performs a shuffle internally.
 
         >>> rdd1 = sc.parallelize([1, 10, 2, 3, 4, 5])
         >>> rdd2 = sc.parallelize([1, 6, 2, 3, 7, 8])
@@ -803,8 +801,9 @@ def func(it):
     def collect(self):
         """
         Return a list that contains all of the elements in this RDD.
-        Note that this method should only be used if the resulting array is expected
-        to be small, as all the data is loaded into the driver's memory.
+
+        .. note:: This method should only be used if the resulting array is expected
+            to be small, as all the data is loaded into the driver's memory.
         """
         with SCCallSiteSync(self.context) as css:
             port = self.ctx._jvm.PythonRDD.collectAndServe(self._jrdd.rdd())
@@ -1251,10 +1250,10 @@ def top(self, num, key=None):
         """
         Get the top N elements from an RDD.
 
-        Note that this method should only be used if the resulting array is expected
-        to be small, as all the data is loaded into the driver's memory.
+        .. note:: This method should only be used if the resulting array is expected
+            to be small, as all the data is loaded into the driver's memory.
 
-        Note: It returns the list sorted in descending order.
+        .. note:: It returns the list sorted in descending order.
 
         >>> sc.parallelize([10, 4, 2, 12, 3]).top(1)
         [12]
@@ -1276,8 +1275,8 @@ def takeOrdered(self, num, key=None):
         Get the N elements from an RDD ordered in ascending order or as
         specified by the optional key function.
 
-        Note that this method should only be used if the resulting array is expected
-        to be small, as all the data is loaded into the driver's memory.
+        .. note:: this method should only be used if the resulting array is expected
+            to be small, as all the data is loaded into the driver's memory.
 
         >>> sc.parallelize([10, 1, 2, 9, 3, 4, 5, 6, 7]).takeOrdered(6)
         [1, 2, 3, 4, 5, 6]
@@ -1298,11 +1297,11 @@ def take(self, num):
         that partition to estimate the number of additional partitions needed
         to satisfy the limit.
 
-        Note that this method should only be used if the resulting array is expected
-        to be small, as all the data is loaded into the driver's memory.
-
         Translated from the Scala implementation in RDD#take().
 
+        .. note:: this method should only be used if the resulting array is expected
+            to be small, as all the data is loaded into the driver's memory.
+
         >>> sc.parallelize([2, 3, 4, 5, 6]).cache().take(2)
         [2, 3]
         >>> sc.parallelize([2, 3, 4, 5, 6]).take(10)
@@ -1366,8 +1365,9 @@ def first(self):
 
     def isEmpty(self):
         """
-        Returns true if and only if the RDD contains no elements at all. Note that an RDD
-        may be empty even when it has at least 1 partition.
+        Returns true if and only if the RDD contains no elements at all.
+
+        .. note:: an RDD may be empty even when it has at least 1 partition.
 
         >>> sc.parallelize([]).isEmpty()
         True
@@ -1558,8 +1558,8 @@ def collectAsMap(self):
         """
         Return the key-value pairs in this RDD to the master as a dictionary.
 
-        Note that this method should only be used if the resulting data is expected
-        to be small, as all the data is loaded into the driver's memory.
+        .. note:: this method should only be used if the resulting data is expected
+            to be small, as all the data is loaded into the driver's memory.
 
         >>> m = sc.parallelize([(1, 2), (3, 4)]).collectAsMap()
         >>> m[1]
@@ -1796,8 +1796,7 @@ def combineByKey(self, createCombiner, mergeValue, mergeCombiners,
         set of aggregation functions.
 
         Turns an RDD[(K, V)] into a result of type RDD[(K, C)], for a "combined
-        type" C.  Note that V and C can be different -- for example, one might
-        group an RDD of type (Int, Int) into an RDD of type (Int, List[Int]).
+        type" C.
 
         Users provide three functions:
 
@@ -1809,6 +1808,9 @@ def combineByKey(self, createCombiner, mergeValue, mergeCombiners,
 
         In addition, users can control the partitioning of the output RDD.
 
+        .. note:: V and C can be different -- for example, one might group an RDD of type
+            (Int, Int) into an RDD of type (Int, List[Int]).
+
         >>> x = sc.parallelize([("a", 1), ("b", 1), ("a", 1)])
         >>> def add(a, b): return a + str(b)
         >>> sorted(x.combineByKey(str, add, add).collect())
@@ -1880,9 +1882,9 @@ def groupByKey(self, numPartitions=None, partitionFunc=portable_hash):
         Group the values for each key in the RDD into a single sequence.
         Hash-partitions the resulting RDD with numPartitions partitions.
 
-        Note: If you are grouping in order to perform an aggregation (such as a
-        sum or average) over each key, using reduceByKey or aggregateByKey will
-        provide much better performance.
+        .. note:: If you are grouping in order to perform an aggregation (such as a
+            sum or average) over each key, using reduceByKey or aggregateByKey will
+            provide much better performance.
 
         >>> rdd = sc.parallelize([("a", 1), ("b", 1), ("a", 1)])
         >>> sorted(rdd.groupByKey().mapValues(len).collect())
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 6fe622643291e..b9d90384e3e2c 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -457,7 +457,7 @@ def foreachPartition(self, f):
     def cache(self):
         """Persists the :class:`DataFrame` with the default storage level (C{MEMORY_AND_DISK}).
 
-        .. note:: the default storage level has changed to C{MEMORY_AND_DISK} to match Scala in 2.0.
+        .. note:: The default storage level has changed to C{MEMORY_AND_DISK} to match Scala in 2.0.
         """
         self.is_cached = True
         self._jdf.cache()
@@ -470,7 +470,7 @@ def persist(self, storageLevel=StorageLevel.MEMORY_AND_DISK):
         a new storage level if the :class:`DataFrame` does not have a storage level set yet.
         If no storage level is specified defaults to (C{MEMORY_AND_DISK}).
 
-        .. note:: the default storage level has changed to C{MEMORY_AND_DISK} to match Scala in 2.0.
+        .. note:: The default storage level has changed to C{MEMORY_AND_DISK} to match Scala in 2.0.
         """
         self.is_cached = True
         javaStorageLevel = self._sc._getJavaStorageLevel(storageLevel)
@@ -597,10 +597,8 @@ def distinct(self):
     def sample(self, withReplacement, fraction, seed=None):
         """Returns a sampled subset of this :class:`DataFrame`.
 
-        .. note::
-
-            This is not guaranteed to provide exactly the fraction specified of the total count
-            of the given :class:`DataFrame`.
+        .. note:: This is not guaranteed to provide exactly the fraction specified of the total
+            count of the given :class:`DataFrame`.
 
         >>> df.sample(False, 0.5, 42).count()
         2
@@ -866,8 +864,8 @@ def describe(self, *cols):
         This include count, mean, stddev, min, and max. If no columns are
         given, this function computes statistics for all numerical or string columns.
 
-        .. note:: This function is meant for exploratory data analysis, as we make no \
-        guarantee about the backward compatibility of the schema of the resulting DataFrame.
+        .. note:: This function is meant for exploratory data analysis, as we make no
+            guarantee about the backward compatibility of the schema of the resulting DataFrame.
 
         >>> df.describe(['age']).show()
         +-------+------------------+
@@ -900,8 +898,8 @@ def describe(self, *cols):
     def head(self, n=None):
         """Returns the first ``n`` rows.
 
-        Note that this method should only be used if the resulting array is expected
-        to be small, as all the data is loaded into the driver's memory.
+        .. note:: This method should only be used if the resulting array is expected
+            to be small, as all the data is loaded into the driver's memory.
 
         :param n: int, default 1. Number of rows to return.
         :return: If n is greater than 1, return a list of :class:`Row`.
@@ -1462,8 +1460,8 @@ def freqItems(self, cols, support=None):
         "http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou".
         :func:`DataFrame.freqItems` and :func:`DataFrameStatFunctions.freqItems` are aliases.
 
-        .. note::  This function is meant for exploratory data analysis, as we make no \
-        guarantee about the backward compatibility of the schema of the resulting DataFrame.
+        .. note:: This function is meant for exploratory data analysis, as we make no
+            guarantee about the backward compatibility of the schema of the resulting DataFrame.
 
         :param cols: Names of the columns to calculate frequent items for as a list or tuple of
             strings.
@@ -1564,11 +1562,11 @@ def toDF(self, *cols):
     def toPandas(self):
         """Returns the contents of this :class:`DataFrame` as Pandas ``pandas.DataFrame``.
 
-        Note that this method should only be used if the resulting Pandas's DataFrame is expected
-        to be small, as all the data is loaded into the driver's memory.
-
         This is only available if Pandas is installed and available.
 
+        .. note:: This method should only be used if the resulting Pandas's DataFrame is expected
+            to be small, as all the data is loaded into the driver's memory.
+
         >>> df.toPandas()  # doctest: +SKIP
            age   name
         0    2  Alice
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 46a092f16d4fc..d8abafcde3846 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -359,7 +359,7 @@ def grouping_id(*cols):
 
        (grouping(c1) << (n-1)) + (grouping(c2) << (n-2)) + ... + grouping(cn)
 
-    .. note:: the list of columns should match with grouping columns exactly, or empty (means all
+    .. note:: The list of columns should match with grouping columns exactly, or empty (means all
         the grouping columns).
 
     >>> df.cube("name").agg(grouping_id(), sum("age")).orderBy("name").show()
@@ -547,7 +547,7 @@ def shiftRightUnsigned(col, numBits):
 def spark_partition_id():
     """A column for partition ID.
 
-    Note that this is indeterministic because it depends on data partitioning and task scheduling.
+    .. note:: This is indeterministic because it depends on data partitioning and task scheduling.
 
     >>> df.repartition(1).select(spark_partition_id().alias("pid")).collect()
     [Row(pid=0), Row(pid=0)]
@@ -1852,9 +1852,10 @@ def __call__(self, *cols):
 @since(1.3)
 def udf(f, returnType=StringType()):
     """Creates a :class:`Column` expression representing a user defined function (UDF).
-    Note that the user-defined functions must be deterministic. Due to optimization,
-    duplicate invocations may be eliminated or the function may even be invoked more times than
-    it is present in the query.
+
+    .. note:: The user-defined functions must be deterministic. Due to optimization,
+        duplicate invocations may be eliminated or the function may even be invoked more times than
+        it is present in the query.
 
     :param f: python function
     :param returnType: a :class:`pyspark.sql.types.DataType` object
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index 0e4589be976ea..9c3a237699f96 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -90,10 +90,12 @@ def awaitTermination(self, timeout=None):
     @since(2.0)
     def processAllAvailable(self):
         """Blocks until all available data in the source has been processed and committed to the
-        sink. This method is intended for testing. Note that in the case of continually arriving
-        data, this method may block forever. Additionally, this method is only guaranteed to block
-        until data that has been synchronously appended data to a stream source prior to invocation.
-        (i.e. `getOffset` must immediately reflect the addition).
+        sink. This method is intended for testing.
+
+        .. note:: In the case of continually arriving data, this method may block forever.
+            Additionally, this method is only guaranteed to block until data that has been
+            synchronously appended data to a stream source prior to invocation.
+            (i.e. `getOffset` must immediately reflect the addition).
         """
         return self._jsq.processAllAvailable()
 
diff --git a/python/pyspark/streaming/context.py b/python/pyspark/streaming/context.py
index ec3ad9933cf60..17c34f8a1c54c 100644
--- a/python/pyspark/streaming/context.py
+++ b/python/pyspark/streaming/context.py
@@ -304,7 +304,7 @@ def queueStream(self, rdds, oneAtATime=True, default=None):
         Create an input stream from an queue of RDDs or list. In each batch,
         it will process either one or all of the RDDs returned by the queue.
 
-        NOTE: changes to the queue after the stream is created will not be recognized.
+        .. note:: Changes to the queue after the stream is created will not be recognized.
 
         @param rdds:       Queue of RDDs
         @param oneAtATime: pick one rdd each time or pick all of them once.
diff --git a/python/pyspark/streaming/kinesis.py b/python/pyspark/streaming/kinesis.py
index 434ce83e1e6f9..3a8d8b819fd37 100644
--- a/python/pyspark/streaming/kinesis.py
+++ b/python/pyspark/streaming/kinesis.py
@@ -42,8 +42,8 @@ def createStream(ssc, kinesisAppName, streamName, endpointUrl, regionName,
         Create an input stream that pulls messages from a Kinesis stream. This uses the
         Kinesis Client Library (KCL) to pull messages from Kinesis.
 
-        Note: The given AWS credentials will get saved in DStream checkpoints if checkpointing is
-        enabled. Make sure that your checkpoint directory is secure.
+        .. note:: The given AWS credentials will get saved in DStream checkpoints if checkpointing
+            is enabled. Make sure that your checkpoint directory is secure.
 
         :param ssc:  StreamingContext object
         :param kinesisAppName:  Kinesis application name used by the Kinesis Client Library (KCL) to

From 0e60e4b88014fcdd54acc650bfd3a1683f06f09e Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 22 Nov 2016 09:16:20 -0800
Subject: [PATCH 0172/1204] [SPARK-18519][SQL] map type can not be used in
 EqualTo

## What changes were proposed in this pull request?

Technically map type is not orderable, but can be used in equality comparison. However, due to the limitation of the current implementation, map type can't be used in equality comparison so that it can't be join key or grouping key.

This PR makes this limitation explicit, to avoid wrong result.

## How was this patch tested?

updated tests.

Author: Wenchen Fan <wenchen@databricks.com>

Closes #15956 from cloud-fan/map-type.

(cherry picked from commit bb152cdfbb8d02130c71d2326ae81939725c2cf0)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../sql/catalyst/analysis/CheckAnalysis.scala | 15 -------
 .../sql/catalyst/expressions/predicates.scala | 30 +++++++++++++
 .../analysis/AnalysisErrorSuite.scala         | 44 +++++++------------
 .../ExpressionTypeCheckingSuite.scala         |  2 +
 4 files changed, 48 insertions(+), 43 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 98e50d0d3c674..80e577e5c4c79 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -183,21 +183,6 @@ trait CheckAnalysis extends PredicateHelper {
               s"join condition '${condition.sql}' " +
                 s"of type ${condition.dataType.simpleString} is not a boolean.")
 
-          case j @ Join(_, _, _, Some(condition)) =>
-            def checkValidJoinConditionExprs(expr: Expression): Unit = expr match {
-              case p: Predicate =>
-                p.asInstanceOf[Expression].children.foreach(checkValidJoinConditionExprs)
-              case e if e.dataType.isInstanceOf[BinaryType] =>
-                failAnalysis(s"binary type expression ${e.sql} cannot be used " +
-                  "in join conditions")
-              case e if e.dataType.isInstanceOf[MapType] =>
-                failAnalysis(s"map type expression ${e.sql} cannot be used " +
-                  "in join conditions")
-              case _ => // OK
-            }
-
-            checkValidJoinConditionExprs(condition)
-
           case Aggregate(groupingExprs, aggregateExprs, child) =>
             def checkValidAggregateExpression(expr: Expression): Unit = expr match {
               case aggExpr: AggregateExpression =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 7946c201f4ffc..2ad452b6a90ca 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -412,6 +412,21 @@ case class EqualTo(left: Expression, right: Expression)
 
   override def inputType: AbstractDataType = AnyDataType
 
+  override def checkInputDataTypes(): TypeCheckResult = {
+    super.checkInputDataTypes() match {
+      case TypeCheckResult.TypeCheckSuccess =>
+        // TODO: although map type is not orderable, technically map type should be able to be used
+        // in equality comparison, remove this type check once we support it.
+        if (left.dataType.existsRecursively(_.isInstanceOf[MapType])) {
+          TypeCheckResult.TypeCheckFailure("Cannot use map type in EqualTo, but the actual " +
+            s"input type is ${left.dataType.catalogString}.")
+        } else {
+          TypeCheckResult.TypeCheckSuccess
+        }
+      case failure => failure
+    }
+  }
+
   override def symbol: String = "="
 
   protected override def nullSafeEval(input1: Any, input2: Any): Any = {
@@ -440,6 +455,21 @@ case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComp
 
   override def inputType: AbstractDataType = AnyDataType
 
+  override def checkInputDataTypes(): TypeCheckResult = {
+    super.checkInputDataTypes() match {
+      case TypeCheckResult.TypeCheckSuccess =>
+        // TODO: although map type is not orderable, technically map type should be able to be used
+        // in equality comparison, remove this type check once we support it.
+        if (left.dataType.existsRecursively(_.isInstanceOf[MapType])) {
+          TypeCheckResult.TypeCheckFailure("Cannot use map type in EqualNullSafe, but the actual " +
+            s"input type is ${left.dataType.catalogString}.")
+        } else {
+          TypeCheckResult.TypeCheckSuccess
+        }
+      case failure => failure
+    }
+  }
+
   override def symbol: String = "<=>"
 
   override def nullable: Boolean = false
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index 21afe9fec5944..8c1faea2394c6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -465,34 +465,22 @@ class AnalysisErrorSuite extends AnalysisTest {
         "another aggregate function." :: Nil)
   }
 
-  test("Join can't work on binary and map types") {
-    val plan =
-      Join(
-        LocalRelation(
-          AttributeReference("a", BinaryType)(exprId = ExprId(2)),
-          AttributeReference("b", IntegerType)(exprId = ExprId(1))),
-        LocalRelation(
-          AttributeReference("c", BinaryType)(exprId = ExprId(4)),
-          AttributeReference("d", IntegerType)(exprId = ExprId(3))),
-        Cross,
-        Some(EqualTo(AttributeReference("a", BinaryType)(exprId = ExprId(2)),
-          AttributeReference("c", BinaryType)(exprId = ExprId(4)))))
-
-    assertAnalysisError(plan, "binary type expression `a` cannot be used in join conditions" :: Nil)
-
-    val plan2 =
-      Join(
-        LocalRelation(
-          AttributeReference("a", MapType(IntegerType, StringType))(exprId = ExprId(2)),
-          AttributeReference("b", IntegerType)(exprId = ExprId(1))),
-        LocalRelation(
-          AttributeReference("c", MapType(IntegerType, StringType))(exprId = ExprId(4)),
-          AttributeReference("d", IntegerType)(exprId = ExprId(3))),
-        Cross,
-        Some(EqualTo(AttributeReference("a", MapType(IntegerType, StringType))(exprId = ExprId(2)),
-          AttributeReference("c", MapType(IntegerType, StringType))(exprId = ExprId(4)))))
-
-    assertAnalysisError(plan2, "map type expression `a` cannot be used in join conditions" :: Nil)
+  test("Join can work on binary types but can't work on map types") {
+    val left = LocalRelation('a.binary, 'b.map(StringType, StringType))
+    val right = LocalRelation('c.binary, 'd.map(StringType, StringType))
+
+    val plan1 = left.join(
+      right,
+      joinType = Cross,
+      condition = Some('a === 'c))
+
+    assertAnalysisSuccess(plan1)
+
+    val plan2 = left.join(
+      right,
+      joinType = Cross,
+      condition = Some('b === 'd))
+    assertAnalysisError(plan2, "Cannot use map type in EqualTo" :: Nil)
   }
 
   test("PredicateSubQuery is used outside of a filter") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
index 542e654bbce12..744057b7c5f4c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
@@ -111,6 +111,8 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite {
     assertErrorForDifferingTypes(GreaterThan('intField, 'booleanField))
     assertErrorForDifferingTypes(GreaterThanOrEqual('intField, 'booleanField))
 
+    assertError(EqualTo('mapField, 'mapField), "Cannot use map type in EqualTo")
+    assertError(EqualNullSafe('mapField, 'mapField), "Cannot use map type in EqualNullSafe")
     assertError(LessThan('mapField, 'mapField),
       s"requires ${TypeCollection.Ordered.simpleString} type")
     assertError(LessThanOrEqual('mapField, 'mapField),

From 0e624e990b3b426dba0a6149ad6340f85d214a58 Mon Sep 17 00:00:00 2001
From: Nattavut Sutyanyong <nsy.can@gmail.com>
Date: Tue, 22 Nov 2016 12:06:21 -0800
Subject: [PATCH 0173/1204] [SPARK-18504][SQL] Scalar subquery with extra group
 by columns returning incorrect result

## What changes were proposed in this pull request?

This PR blocks an incorrect result scenario in scalar subquery where there are GROUP BY column(s)
that are not part of the correlated predicate(s).

Example:
// Incorrect result
Seq(1).toDF("c1").createOrReplaceTempView("t1")
Seq((1,1),(1,2)).toDF("c1","c2").createOrReplaceTempView("t2")
sql("select (select sum(-1) from t2 where t1.c1=t2.c1 group by t2.c2) from t1").show

// How can selecting a scalar subquery from a 1-row table return 2 rows?

## How was this patch tested?
sql/test, catalyst/test
new test case covering the reported problem is added to SubquerySuite.scala

Author: Nattavut Sutyanyong <nsy.can@gmail.com>

Closes #15936 from nsyca/scalarSubqueryIncorrect-1.

(cherry picked from commit 45ea46b7b397f023b4da878eb11e21b08d931115)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  3 --
 .../sql/catalyst/analysis/CheckAnalysis.scala | 30 +++++++++++++++----
 .../org/apache/spark/sql/SubquerySuite.scala  | 12 ++++++++
 3 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index b7e167557c559..2918e9d158829 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1182,9 +1182,6 @@ class Analyzer(
      */
     private def resolveSubQueries(plan: LogicalPlan, plans: Seq[LogicalPlan]): LogicalPlan = {
       plan transformExpressions {
-        case s @ ScalarSubquery(sub, conditions, exprId)
-            if sub.resolved && conditions.isEmpty && sub.output.size != 1 =>
-          failAnalysis(s"Scalar subquery must return only one column, but got ${sub.output.size}")
         case s @ ScalarSubquery(sub, _, exprId) if !sub.resolved =>
           resolveSubQuery(s, plans, 1)(ScalarSubquery(_, _, exprId))
         case e @ Exists(sub, exprId) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 80e577e5c4c79..26d26385904f6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -117,19 +117,37 @@ trait CheckAnalysis extends PredicateHelper {
                 failAnalysis(s"Window specification $s is not valid because $m")
               case None => w
             }
+          case s @ ScalarSubquery(query, conditions, _)
+            // If no correlation, the output must be exactly one column
+            if (conditions.isEmpty && query.output.size != 1) =>
+              failAnalysis(
+                s"Scalar subquery must return only one column, but got ${query.output.size}")
 
           case s @ ScalarSubquery(query, conditions, _) if conditions.nonEmpty =>
-            // Make sure correlated scalar subqueries contain one row for every outer row by
-            // enforcing that they are aggregates which contain exactly one aggregate expressions.
-            // The analyzer has already checked that subquery contained only one output column, and
-            // added all the grouping expressions to the aggregate.
-            def checkAggregate(a: Aggregate): Unit = {
-              val aggregates = a.expressions.flatMap(_.collect {
+            def checkAggregate(agg: Aggregate): Unit = {
+              // Make sure correlated scalar subqueries contain one row for every outer row by
+              // enforcing that they are aggregates which contain exactly one aggregate expressions.
+              // The analyzer has already checked that subquery contained only one output column,
+              // and added all the grouping expressions to the aggregate.
+              val aggregates = agg.expressions.flatMap(_.collect {
                 case a: AggregateExpression => a
               })
               if (aggregates.isEmpty) {
                 failAnalysis("The output of a correlated scalar subquery must be aggregated")
               }
+
+              // SPARK-18504: block cases where GROUP BY columns
+              // are not part of the correlated columns
+              val groupByCols = ExpressionSet.apply(agg.groupingExpressions.flatMap(_.references))
+              val predicateCols = ExpressionSet.apply(conditions.flatMap(_.references))
+              val invalidCols = groupByCols.diff(predicateCols)
+              // GROUP BY columns must be a subset of columns in the predicates
+              if (invalidCols.nonEmpty) {
+                failAnalysis(
+                  "a GROUP BY clause in a scalar correlated subquery " +
+                    "cannot contain non-correlated columns: " +
+                    invalidCols.mkString(","))
+              }
             }
 
             // Skip projects and subquery aliases added by the Analyzer and the SQLBuilder.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index c84a6f161893c..f1dd1c620e660 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -483,6 +483,18 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
       Row(1, null) :: Row(2, 6.0) :: Row(3, 2.0) :: Row(null, null) :: Row(6, null) :: Nil)
   }
 
+  test("SPARK-18504 extra GROUP BY column in correlated scalar subquery is not permitted") {
+    withTempView("t") {
+      Seq((1, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("t")
+
+      val errMsg = intercept[AnalysisException] {
+        sql("select (select sum(-1) from t t2 where t1.c2 = t2.c1 group by t2.c2) sum from t t1")
+      }
+      assert(errMsg.getMessage.contains(
+        "a GROUP BY clause in a scalar correlated subquery cannot contain non-correlated columns:"))
+    }
+  }
+
   test("non-aggregated correlated scalar subquery") {
     val msg1 = intercept[AnalysisException] {
       sql("select a, (select b from l l2 where l2.a = l1.a) sum_b from l l1")

From fa360134d06e5bfb423f0bd769edb47dbda1d9af Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 22 Nov 2016 15:25:22 -0500
Subject: [PATCH 0174/1204] [SPARK-18507][SQL]
 HiveExternalCatalog.listPartitions should only call getTable once

## What changes were proposed in this pull request?

HiveExternalCatalog.listPartitions should only call `getTable` once, instead of calling it for every partitions.

## How was this patch tested?

N/A

Author: Wenchen Fan <wenchen@databricks.com>

Closes #15978 from cloud-fan/perf.

(cherry picked from commit 702cd403fc8e5ce8281fe8828197ead46bdb8832)
Signed-off-by: Andrew Or <andrewor14@gmail.com>
---
 .../scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala  | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 5dbb4024bbee0..ff0923f04893d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -907,8 +907,9 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       db: String,
       table: String,
       partialSpec: Option[TablePartitionSpec] = None): Seq[CatalogTablePartition] = withClient {
+    val actualPartColNames = getTable(db, table).partitionColumnNames
     client.getPartitions(db, table, partialSpec.map(lowerCasePartitionSpec)).map { part =>
-      part.copy(spec = restorePartitionSpec(part.spec, getTable(db, table).partitionColumnNames))
+      part.copy(spec = restorePartitionSpec(part.spec, actualPartColNames))
     }
   }
 

From fb2ea54a69b521463b93b270b63081da726ee036 Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Tue, 22 Nov 2016 13:03:50 -0800
Subject: [PATCH 0175/1204] [SPARK-18465] Add 'IF EXISTS' clause to 'UNCACHE'
 to not throw exceptions when table doesn't exist

## What changes were proposed in this pull request?

While this behavior is debatable, consider the following use case:
```sql
UNCACHE TABLE foo;
CACHE TABLE foo AS
SELECT * FROM bar
```
The command above fails the first time you run it. But I want to run the command above over and over again, and I don't want to change my code just for the first run of it.
The issue is that subsequent `CACHE TABLE` commands do not overwrite the existing table.

Now we can do:
```sql
UNCACHE TABLE IF EXISTS foo;
CACHE TABLE foo AS
SELECT * FROM bar
```

## How was this patch tested?

Unit tests

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #15896 from brkyvz/uncache.

(cherry picked from commit bdc8153e8689262708c7fade5c065bd7fc8a84fc)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../org/apache/spark/sql/catalyst/parser/SqlBase.g4  |  2 +-
 .../apache/spark/sql/execution/SparkSqlParser.scala  |  2 +-
 .../apache/spark/sql/execution/command/cache.scala   | 12 ++++++++++--
 .../org/apache/spark/sql/hive/CachedTableSuite.scala |  5 ++++-
 4 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index b599a884957a8..0aa2a97407c53 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -142,7 +142,7 @@ statement
     | REFRESH TABLE tableIdentifier                                    #refreshTable
     | REFRESH .*?                                                      #refreshResource
     | CACHE LAZY? TABLE tableIdentifier (AS? query)?                   #cacheTable
-    | UNCACHE TABLE tableIdentifier                                    #uncacheTable
+    | UNCACHE TABLE (IF EXISTS)? tableIdentifier                       #uncacheTable
     | CLEAR CACHE                                                      #clearCache
     | LOAD DATA LOCAL? INPATH path=STRING OVERWRITE? INTO TABLE
         tableIdentifier partitionSpec?                                 #loadData
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index b8be3d17ba444..47610453ac23a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -233,7 +233,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
    * Create an [[UncacheTableCommand]] logical plan.
    */
   override def visitUncacheTable(ctx: UncacheTableContext): LogicalPlan = withOrigin(ctx) {
-    UncacheTableCommand(visitTableIdentifier(ctx.tableIdentifier))
+    UncacheTableCommand(visitTableIdentifier(ctx.tableIdentifier), ctx.EXISTS != null)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala
index c31f4dc9aba4b..336f14dd97aea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.command
 
 import org.apache.spark.sql.{Dataset, Row, SparkSession}
 import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 
@@ -49,10 +50,17 @@ case class CacheTableCommand(
 }
 
 
-case class UncacheTableCommand(tableIdent: TableIdentifier) extends RunnableCommand {
+case class UncacheTableCommand(
+    tableIdent: TableIdentifier,
+    ifExists: Boolean) extends RunnableCommand {
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
-    sparkSession.catalog.uncacheTable(tableIdent.quotedString)
+    val tableId = tableIdent.quotedString
+    try {
+      sparkSession.catalog.uncacheTable(tableId)
+    } catch {
+      case _: NoSuchTableException if ifExists => // don't throw
+    }
     Seq.empty[Row]
   }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
index fc35304c80ecc..3871b3d785882 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
@@ -101,13 +101,16 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
     sql("DROP TABLE IF EXISTS nonexistantTable")
   }
 
-  test("correct error on uncache of nonexistant tables") {
+  test("uncache of nonexistant tables") {
+    // make sure table doesn't exist
+    intercept[NoSuchTableException](spark.table("nonexistantTable"))
     intercept[NoSuchTableException] {
       spark.catalog.uncacheTable("nonexistantTable")
     }
     intercept[NoSuchTableException] {
       sql("UNCACHE TABLE nonexistantTable")
     }
+    sql("UNCACHE TABLE IF EXISTS nonexistantTable")
   }
 
   test("no error on uncache of non-cached table") {

From bd338f60d7f30f0cb735dffb39b3a6ec60766301 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Tue, 22 Nov 2016 14:15:57 -0800
Subject: [PATCH 0176/1204] [SPARK-18373][SPARK-18529][SS][KAFKA] Make
 failOnDataLoss=false work with Spark jobs

## What changes were proposed in this pull request?

This PR adds `CachedKafkaConsumer.getAndIgnoreLostData` to handle corner cases of `failOnDataLoss=false`.

It also resolves [SPARK-18529](https://issues.apache.org/jira/browse/SPARK-18529) after refactoring codes: Timeout will throw a TimeoutException.

## How was this patch tested?

Because I cannot find any way to manually control the Kafka server to clean up logs, it's impossible to write unit tests for each corner case. Therefore, I just created `test("stress test for failOnDataLoss=false")` which should cover most of corner cases.

I also modified some existing tests to test for both `failOnDataLoss=false` and `failOnDataLoss=true` to make sure it doesn't break existing logic.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #15820 from zsxwing/failOnDataLoss.

(cherry picked from commit 2fd101b2f0028e005fbb0bdd29e59af37aa637da)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../sql/kafka010/CachedKafkaConsumer.scala    | 236 ++++++++++++--
 .../spark/sql/kafka010/KafkaSource.scala      |  23 +-
 .../spark/sql/kafka010/KafkaSourceRDD.scala   |  42 ++-
 .../spark/sql/kafka010/KafkaSourceSuite.scala | 297 +++++++++++++++---
 .../spark/sql/kafka010/KafkaTestUtils.scala   |  20 +-
 5 files changed, 523 insertions(+), 95 deletions(-)

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaConsumer.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaConsumer.scala
index 3b5a96534f9b6..3f438e99185b5 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaConsumer.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaConsumer.scala
@@ -18,12 +18,16 @@
 package org.apache.spark.sql.kafka010
 
 import java.{util => ju}
+import java.util.concurrent.TimeoutException
 
-import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord, KafkaConsumer}
+import scala.collection.JavaConverters._
+
+import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord, KafkaConsumer, OffsetOutOfRangeException}
 import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.{SparkEnv, SparkException, TaskContext}
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.kafka010.KafkaSource._
 
 
 /**
@@ -34,10 +38,18 @@ import org.apache.spark.internal.Logging
 private[kafka010] case class CachedKafkaConsumer private(
     topicPartition: TopicPartition,
     kafkaParams: ju.Map[String, Object]) extends Logging {
+  import CachedKafkaConsumer._
 
   private val groupId = kafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG).asInstanceOf[String]
 
-  private val consumer = {
+  private var consumer = createConsumer
+
+  /** Iterator to the already fetch data */
+  private var fetchedData = ju.Collections.emptyIterator[ConsumerRecord[Array[Byte], Array[Byte]]]
+  private var nextOffsetInFetchedData = UNKNOWN_OFFSET
+
+  /** Create a KafkaConsumer to fetch records for `topicPartition` */
+  private def createConsumer: KafkaConsumer[Array[Byte], Array[Byte]] = {
     val c = new KafkaConsumer[Array[Byte], Array[Byte]](kafkaParams)
     val tps = new ju.ArrayList[TopicPartition]()
     tps.add(topicPartition)
@@ -45,42 +57,193 @@ private[kafka010] case class CachedKafkaConsumer private(
     c
   }
 
-  /** Iterator to the already fetch data */
-  private var fetchedData = ju.Collections.emptyIterator[ConsumerRecord[Array[Byte], Array[Byte]]]
-  private var nextOffsetInFetchedData = -2L
-
   /**
-   * Get the record for the given offset, waiting up to timeout ms if IO is necessary.
-   * Sequential forward access will use buffers, but random access will be horribly inefficient.
+   * Get the record for the given offset if available. Otherwise it will either throw error
+   * (if failOnDataLoss = true), or return the next available offset within [offset, untilOffset),
+   * or null.
+   *
+   * @param offset the offset to fetch.
+   * @param untilOffset the max offset to fetch. Exclusive.
+   * @param pollTimeoutMs timeout in milliseconds to poll data from Kafka.
+   * @param failOnDataLoss When `failOnDataLoss` is `true`, this method will either return record at
+   *                       offset if available, or throw exception.when `failOnDataLoss` is `false`,
+   *                       this method will either return record at offset if available, or return
+   *                       the next earliest available record less than untilOffset, or null. It
+   *                       will not throw any exception.
    */
-  def get(offset: Long, pollTimeoutMs: Long): ConsumerRecord[Array[Byte], Array[Byte]] = {
+  def get(
+      offset: Long,
+      untilOffset: Long,
+      pollTimeoutMs: Long,
+      failOnDataLoss: Boolean): ConsumerRecord[Array[Byte], Array[Byte]] = {
+    require(offset < untilOffset,
+      s"offset must always be less than untilOffset [offset: $offset, untilOffset: $untilOffset]")
     logDebug(s"Get $groupId $topicPartition nextOffset $nextOffsetInFetchedData requested $offset")
-    if (offset != nextOffsetInFetchedData) {
-      logInfo(s"Initial fetch for $topicPartition $offset")
-      seek(offset)
-      poll(pollTimeoutMs)
+    // The following loop is basically for `failOnDataLoss = false`. When `failOnDataLoss` is
+    // `false`, first, we will try to fetch the record at `offset`. If no such record exists, then
+    // we will move to the next available offset within `[offset, untilOffset)` and retry.
+    // If `failOnDataLoss` is `true`, the loop body will be executed only once.
+    var toFetchOffset = offset
+    while (toFetchOffset != UNKNOWN_OFFSET) {
+      try {
+        return fetchData(toFetchOffset, pollTimeoutMs)
+      } catch {
+        case e: OffsetOutOfRangeException =>
+          // When there is some error thrown, it's better to use a new consumer to drop all cached
+          // states in the old consumer. We don't need to worry about the performance because this
+          // is not a common path.
+          resetConsumer()
+          reportDataLoss(failOnDataLoss, s"Cannot fetch offset $toFetchOffset", e)
+          toFetchOffset = getEarliestAvailableOffsetBetween(toFetchOffset, untilOffset)
+      }
     }
+    resetFetchedData()
+    null
+  }
 
-    if (!fetchedData.hasNext()) { poll(pollTimeoutMs) }
-    assert(fetchedData.hasNext(),
-      s"Failed to get records for $groupId $topicPartition $offset " +
-        s"after polling for $pollTimeoutMs")
-    var record = fetchedData.next()
+  /**
+   * Return the next earliest available offset in [offset, untilOffset). If all offsets in
+   * [offset, untilOffset) are invalid (e.g., the topic is deleted and recreated), it will return
+   * `UNKNOWN_OFFSET`.
+   */
+  private def getEarliestAvailableOffsetBetween(offset: Long, untilOffset: Long): Long = {
+    val (earliestOffset, latestOffset) = getAvailableOffsetRange()
+    logWarning(s"Some data may be lost. Recovering from the earliest offset: $earliestOffset")
+    if (offset >= latestOffset || earliestOffset >= untilOffset) {
+      // [offset, untilOffset) and [earliestOffset, latestOffset) have no overlap,
+      // either
+      // --------------------------------------------------------
+      //         ^                 ^         ^         ^
+      //         |                 |         |         |
+      //   earliestOffset   latestOffset   offset   untilOffset
+      //
+      // or
+      // --------------------------------------------------------
+      //      ^          ^              ^                ^
+      //      |          |              |                |
+      //   offset   untilOffset   earliestOffset   latestOffset
+      val warningMessage =
+        s"""
+          |The current available offset range is [$earliestOffset, $latestOffset).
+          | Offset ${offset} is out of range, and records in [$offset, $untilOffset) will be
+          | skipped ${additionalMessage(failOnDataLoss = false)}
+        """.stripMargin
+      logWarning(warningMessage)
+      UNKNOWN_OFFSET
+    } else if (offset >= earliestOffset) {
+      // -----------------------------------------------------------------------------
+      //         ^            ^                  ^                                 ^
+      //         |            |                  |                                 |
+      //   earliestOffset   offset   min(untilOffset,latestOffset)   max(untilOffset, latestOffset)
+      //
+      // This will happen when a topic is deleted and recreated, and new data are pushed very fast,
+      // then we will see `offset` disappears first then appears again. Although the parameters
+      // are same, the state in Kafka cluster is changed, so the outer loop won't be endless.
+      logWarning(s"Found a disappeared offset $offset. " +
+        s"Some data may be lost ${additionalMessage(failOnDataLoss = false)}")
+      offset
+    } else {
+      // ------------------------------------------------------------------------------
+      //      ^           ^                       ^                                 ^
+      //      |           |                       |                                 |
+      //   offset   earliestOffset   min(untilOffset,latestOffset)   max(untilOffset, latestOffset)
+      val warningMessage =
+        s"""
+           |The current available offset range is [$earliestOffset, $latestOffset).
+           | Offset ${offset} is out of range, and records in [$offset, $earliestOffset) will be
+           | skipped ${additionalMessage(failOnDataLoss = false)}
+        """.stripMargin
+      logWarning(warningMessage)
+      earliestOffset
+    }
+  }
 
-    if (record.offset != offset) {
-      logInfo(s"Buffer miss for $groupId $topicPartition $offset")
+  /**
+   * Get the record at `offset`.
+   *
+   * @throws OffsetOutOfRangeException if `offset` is out of range
+   * @throws TimeoutException if cannot fetch the record in `pollTimeoutMs` milliseconds.
+   */
+  private def fetchData(
+      offset: Long,
+      pollTimeoutMs: Long): ConsumerRecord[Array[Byte], Array[Byte]] = {
+    if (offset != nextOffsetInFetchedData || !fetchedData.hasNext()) {
+      // This is the first fetch, or the last pre-fetched data has been drained.
+      // Seek to the offset because we may call seekToBeginning or seekToEnd before this.
       seek(offset)
       poll(pollTimeoutMs)
-      assert(fetchedData.hasNext(),
-        s"Failed to get records for $groupId $topicPartition $offset " +
-          s"after polling for $pollTimeoutMs")
-      record = fetchedData.next()
+    }
+
+    if (!fetchedData.hasNext()) {
+      // We cannot fetch anything after `poll`. Two possible cases:
+      // - `offset` is out of range so that Kafka returns nothing. Just throw
+      // `OffsetOutOfRangeException` to let the caller handle it.
+      // - Cannot fetch any data before timeout. TimeoutException will be thrown.
+      val (earliestOffset, latestOffset) = getAvailableOffsetRange()
+      if (offset < earliestOffset || offset >= latestOffset) {
+        throw new OffsetOutOfRangeException(
+          Map(topicPartition -> java.lang.Long.valueOf(offset)).asJava)
+      } else {
+        throw new TimeoutException(
+          s"Cannot fetch record for offset $offset in $pollTimeoutMs milliseconds")
+      }
+    } else {
+      val record = fetchedData.next()
+      nextOffsetInFetchedData = record.offset + 1
+      // `seek` is always called before "poll". So "record.offset" must be same as "offset".
       assert(record.offset == offset,
-        s"Got wrong record for $groupId $topicPartition even after seeking to offset $offset")
+        s"The fetched data has a different offset: expected $offset but was ${record.offset}")
+      record
     }
+  }
+
+  /** Create a new consumer and reset cached states */
+  private def resetConsumer(): Unit = {
+    consumer.close()
+    consumer = createConsumer
+    resetFetchedData()
+  }
 
-    nextOffsetInFetchedData = offset + 1
-    record
+  /** Reset the internal pre-fetched data. */
+  private def resetFetchedData(): Unit = {
+    nextOffsetInFetchedData = UNKNOWN_OFFSET
+    fetchedData = ju.Collections.emptyIterator[ConsumerRecord[Array[Byte], Array[Byte]]]
+  }
+
+  /**
+   * Return an addition message including useful message and instruction.
+   */
+  private def additionalMessage(failOnDataLoss: Boolean): String = {
+    if (failOnDataLoss) {
+      s"(GroupId: $groupId, TopicPartition: $topicPartition). " +
+        s"$INSTRUCTION_FOR_FAIL_ON_DATA_LOSS_TRUE"
+    } else {
+      s"(GroupId: $groupId, TopicPartition: $topicPartition). " +
+        s"$INSTRUCTION_FOR_FAIL_ON_DATA_LOSS_FALSE"
+    }
+  }
+
+  /**
+   * Throw an exception or log a warning as per `failOnDataLoss`.
+   */
+  private def reportDataLoss(
+      failOnDataLoss: Boolean,
+      message: String,
+      cause: Throwable = null): Unit = {
+    val finalMessage = s"$message ${additionalMessage(failOnDataLoss)}"
+    if (failOnDataLoss) {
+      if (cause != null) {
+        throw new IllegalStateException(finalMessage)
+      } else {
+        throw new IllegalStateException(finalMessage, cause)
+      }
+    } else {
+      if (cause != null) {
+        logWarning(finalMessage)
+      } else {
+        logWarning(finalMessage, cause)
+      }
+    }
   }
 
   private def close(): Unit = consumer.close()
@@ -96,10 +259,24 @@ private[kafka010] case class CachedKafkaConsumer private(
     logDebug(s"Polled $groupId ${p.partitions()}  ${r.size}")
     fetchedData = r.iterator
   }
+
+  /**
+   * Return the available offset range of the current partition. It's a pair of the earliest offset
+   * and the latest offset.
+   */
+  private def getAvailableOffsetRange(): (Long, Long) = {
+    consumer.seekToBeginning(Set(topicPartition).asJava)
+    val earliestOffset = consumer.position(topicPartition)
+    consumer.seekToEnd(Set(topicPartition).asJava)
+    val latestOffset = consumer.position(topicPartition)
+    (earliestOffset, latestOffset)
+  }
 }
 
 private[kafka010] object CachedKafkaConsumer extends Logging {
 
+  private val UNKNOWN_OFFSET = -2L
+
   private case class CacheKey(groupId: String, topicPartition: TopicPartition)
 
   private lazy val cache = {
@@ -140,7 +317,10 @@ private[kafka010] object CachedKafkaConsumer extends Logging {
     // If this is reattempt at running the task, then invalidate cache and start with
     // a new consumer
     if (TaskContext.get != null && TaskContext.get.attemptNumber > 1) {
-      cache.remove(key)
+      val removedConsumer = cache.remove(key)
+      if (removedConsumer != null) {
+        removedConsumer.close()
+      }
       new CachedKafkaConsumer(topicPartition, kafkaParams)
     } else {
       if (!cache.containsKey(key)) {
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
index 341081a338c0e..1d0d402b82a35 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
@@ -281,7 +281,7 @@ private[kafka010] case class KafkaSource(
 
     // Create an RDD that reads from Kafka and get the (key, value) pair as byte arrays.
     val rdd = new KafkaSourceRDD(
-      sc, executorKafkaParams, offsetRanges, pollTimeoutMs).map { cr =>
+      sc, executorKafkaParams, offsetRanges, pollTimeoutMs, failOnDataLoss).map { cr =>
       Row(cr.key, cr.value, cr.topic, cr.partition, cr.offset, cr.timestamp, cr.timestampType.id)
     }
 
@@ -463,10 +463,9 @@ private[kafka010] case class KafkaSource(
    */
   private def reportDataLoss(message: String): Unit = {
     if (failOnDataLoss) {
-      throw new IllegalStateException(message +
-        ". Set the source option 'failOnDataLoss' to 'false' if you want to ignore these checks.")
+      throw new IllegalStateException(message + s". $INSTRUCTION_FOR_FAIL_ON_DATA_LOSS_TRUE")
     } else {
-      logWarning(message)
+      logWarning(message + s". $INSTRUCTION_FOR_FAIL_ON_DATA_LOSS_FALSE")
     }
   }
 }
@@ -475,6 +474,22 @@ private[kafka010] case class KafkaSource(
 /** Companion object for the [[KafkaSource]]. */
 private[kafka010] object KafkaSource {
 
+  val INSTRUCTION_FOR_FAIL_ON_DATA_LOSS_FALSE =
+    """
+      |Some data may have been lost because they are not available in Kafka any more; either the
+      | data was aged out by Kafka or the topic may have been deleted before all the data in the
+      | topic was processed. If you want your streaming query to fail on such cases, set the source
+      | option "failOnDataLoss" to "true".
+    """.stripMargin
+
+  val INSTRUCTION_FOR_FAIL_ON_DATA_LOSS_TRUE =
+    """
+      |Some data may have been lost because they are not available in Kafka any more; either the
+      | data was aged out by Kafka or the topic may have been deleted before all the data in the
+      | topic was processed. If you don't want your streaming query to fail on such cases, set the
+      | source option "failOnDataLoss" to "false".
+    """.stripMargin
+
   def kafkaSchema: StructType = StructType(Seq(
     StructField("key", BinaryType),
     StructField("value", BinaryType),
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala
index 802dd040aed93..244cd2c225bdd 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala
@@ -28,6 +28,7 @@ import org.apache.spark.{Partition, SparkContext, TaskContext}
 import org.apache.spark.partial.{BoundedDouble, PartialResult}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.NextIterator
 
 
 /** Offset range that one partition of the KafkaSourceRDD has to read */
@@ -61,7 +62,8 @@ private[kafka010] class KafkaSourceRDD(
     sc: SparkContext,
     executorKafkaParams: ju.Map[String, Object],
     offsetRanges: Seq[KafkaSourceRDDOffsetRange],
-    pollTimeoutMs: Long)
+    pollTimeoutMs: Long,
+    failOnDataLoss: Boolean)
   extends RDD[ConsumerRecord[Array[Byte], Array[Byte]]](sc, Nil) {
 
   override def persist(newLevel: StorageLevel): this.type = {
@@ -130,23 +132,31 @@ private[kafka010] class KafkaSourceRDD(
       logInfo(s"Beginning offset ${range.fromOffset} is the same as ending offset " +
         s"skipping ${range.topic} ${range.partition}")
       Iterator.empty
-
     } else {
-
-      val consumer = CachedKafkaConsumer.getOrCreate(
-        range.topic, range.partition, executorKafkaParams)
-      var requestOffset = range.fromOffset
-
-      logDebug(s"Creating iterator for $range")
-
-      new Iterator[ConsumerRecord[Array[Byte], Array[Byte]]]() {
-        override def hasNext(): Boolean = requestOffset < range.untilOffset
-        override def next(): ConsumerRecord[Array[Byte], Array[Byte]] = {
-          assert(hasNext(), "Can't call next() once untilOffset has been reached")
-          val r = consumer.get(requestOffset, pollTimeoutMs)
-          requestOffset += 1
-          r
+      new NextIterator[ConsumerRecord[Array[Byte], Array[Byte]]]() {
+        val consumer = CachedKafkaConsumer.getOrCreate(
+          range.topic, range.partition, executorKafkaParams)
+        var requestOffset = range.fromOffset
+
+        override def getNext(): ConsumerRecord[Array[Byte], Array[Byte]] = {
+          if (requestOffset >= range.untilOffset) {
+            // Processed all offsets in this partition.
+            finished = true
+            null
+          } else {
+            val r = consumer.get(requestOffset, range.untilOffset, pollTimeoutMs, failOnDataLoss)
+            if (r == null) {
+              // Losing some data. Skip the rest offsets in this partition.
+              finished = true
+              null
+            } else {
+              requestOffset = r.offset + 1
+              r
+            }
+          }
         }
+
+        override protected def close(): Unit = {}
       }
     }
   }
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
index 89e713f92df46..cd52fd93d10a4 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
@@ -17,8 +17,12 @@
 
 package org.apache.spark.sql.kafka010
 
+import java.util.Properties
+import java.util.concurrent.ConcurrentLinkedQueue
 import java.util.concurrent.atomic.AtomicInteger
 
+import scala.collection.JavaConverters._
+import scala.collection.mutable
 import scala.util.Random
 
 import org.apache.kafka.clients.producer.RecordMetadata
@@ -27,8 +31,9 @@ import org.scalatest.concurrent.Eventually._
 import org.scalatest.concurrent.PatienceConfiguration.Timeout
 import org.scalatest.time.SpanSugar._
 
+import org.apache.spark.sql.ForeachWriter
 import org.apache.spark.sql.execution.streaming._
-import org.apache.spark.sql.streaming.{ ProcessingTime, StreamTest }
+import org.apache.spark.sql.streaming.{ProcessingTime, StreamTest}
 import org.apache.spark.sql.test.SharedSQLContext
 
 abstract class KafkaSourceTest extends StreamTest with SharedSQLContext {
@@ -202,7 +207,7 @@ class KafkaSourceSuite extends KafkaSourceTest {
 
   test("cannot stop Kafka stream") {
     val topic = newTopic()
-    testUtils.createTopic(newTopic(), partitions = 5)
+    testUtils.createTopic(topic, partitions = 5)
     testUtils.sendMessages(topic, (101 to 105).map { _.toString }.toArray)
 
     val reader = spark
@@ -223,52 +228,85 @@ class KafkaSourceSuite extends KafkaSourceTest {
     )
   }
 
-  test("assign from latest offsets") {
-    val topic = newTopic()
-    testFromLatestOffsets(topic, false, "assign" -> assignString(topic, 0 to 4))
-  }
+  for (failOnDataLoss <- Seq(true, false)) {
+    test(s"assign from latest offsets (failOnDataLoss: $failOnDataLoss)") {
+      val topic = newTopic()
+      testFromLatestOffsets(
+        topic,
+        addPartitions = false,
+        failOnDataLoss = failOnDataLoss,
+        "assign" -> assignString(topic, 0 to 4))
+    }
 
-  test("assign from earliest offsets") {
-    val topic = newTopic()
-    testFromEarliestOffsets(topic, false, "assign" -> assignString(topic, 0 to 4))
-  }
+    test(s"assign from earliest offsets (failOnDataLoss: $failOnDataLoss)") {
+      val topic = newTopic()
+      testFromEarliestOffsets(
+        topic,
+        addPartitions = false,
+        failOnDataLoss = failOnDataLoss,
+        "assign" -> assignString(topic, 0 to 4))
+    }
 
-  test("assign from specific offsets") {
-    val topic = newTopic()
-    testFromSpecificOffsets(topic, "assign" -> assignString(topic, 0 to 4))
-  }
+    test(s"assign from specific offsets (failOnDataLoss: $failOnDataLoss)") {
+      val topic = newTopic()
+      testFromSpecificOffsets(
+        topic,
+        failOnDataLoss = failOnDataLoss,
+        "assign" -> assignString(topic, 0 to 4),
+        "failOnDataLoss" -> failOnDataLoss.toString)
+    }
 
-  test("subscribing topic by name from latest offsets") {
-    val topic = newTopic()
-    testFromLatestOffsets(topic, true, "subscribe" -> topic)
-  }
+    test(s"subscribing topic by name from latest offsets (failOnDataLoss: $failOnDataLoss)") {
+      val topic = newTopic()
+      testFromLatestOffsets(
+        topic,
+        addPartitions = true,
+        failOnDataLoss = failOnDataLoss,
+        "subscribe" -> topic)
+    }
 
-  test("subscribing topic by name from earliest offsets") {
-    val topic = newTopic()
-    testFromEarliestOffsets(topic, true, "subscribe" -> topic)
-  }
+    test(s"subscribing topic by name from earliest offsets (failOnDataLoss: $failOnDataLoss)") {
+      val topic = newTopic()
+      testFromEarliestOffsets(
+        topic,
+        addPartitions = true,
+        failOnDataLoss = failOnDataLoss,
+        "subscribe" -> topic)
+    }
 
-  test("subscribing topic by name from specific offsets") {
-    val topic = newTopic()
-    testFromSpecificOffsets(topic, "subscribe" -> topic)
-  }
+    test(s"subscribing topic by name from specific offsets (failOnDataLoss: $failOnDataLoss)") {
+      val topic = newTopic()
+      testFromSpecificOffsets(topic, failOnDataLoss = failOnDataLoss, "subscribe" -> topic)
+    }
 
-  test("subscribing topic by pattern from latest offsets") {
-    val topicPrefix = newTopic()
-    val topic = topicPrefix + "-suffix"
-    testFromLatestOffsets(topic, true, "subscribePattern" -> s"$topicPrefix-.*")
-  }
+    test(s"subscribing topic by pattern from latest offsets (failOnDataLoss: $failOnDataLoss)") {
+      val topicPrefix = newTopic()
+      val topic = topicPrefix + "-suffix"
+      testFromLatestOffsets(
+        topic,
+        addPartitions = true,
+        failOnDataLoss = failOnDataLoss,
+        "subscribePattern" -> s"$topicPrefix-.*")
+    }
 
-  test("subscribing topic by pattern from earliest offsets") {
-    val topicPrefix = newTopic()
-    val topic = topicPrefix + "-suffix"
-    testFromEarliestOffsets(topic, true, "subscribePattern" -> s"$topicPrefix-.*")
-  }
+    test(s"subscribing topic by pattern from earliest offsets (failOnDataLoss: $failOnDataLoss)") {
+      val topicPrefix = newTopic()
+      val topic = topicPrefix + "-suffix"
+      testFromEarliestOffsets(
+        topic,
+        addPartitions = true,
+        failOnDataLoss = failOnDataLoss,
+        "subscribePattern" -> s"$topicPrefix-.*")
+    }
 
-  test("subscribing topic by pattern from specific offsets") {
-    val topicPrefix = newTopic()
-    val topic = topicPrefix + "-suffix"
-    testFromSpecificOffsets(topic, "subscribePattern" -> s"$topicPrefix-.*")
+    test(s"subscribing topic by pattern from specific offsets (failOnDataLoss: $failOnDataLoss)") {
+      val topicPrefix = newTopic()
+      val topic = topicPrefix + "-suffix"
+      testFromSpecificOffsets(
+        topic,
+        failOnDataLoss = failOnDataLoss,
+        "subscribePattern" -> s"$topicPrefix-.*")
+    }
   }
 
   test("subscribing topic by pattern with topic deletions") {
@@ -413,13 +451,59 @@ class KafkaSourceSuite extends KafkaSourceTest {
     )
   }
 
+  test("delete a topic when a Spark job is running") {
+    KafkaSourceSuite.collectedData.clear()
+
+    val topic = newTopic()
+    testUtils.createTopic(topic, partitions = 1)
+    testUtils.sendMessages(topic, (1 to 10).map(_.toString).toArray)
+
+    val reader = spark
+      .readStream
+      .format("kafka")
+      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+      .option("kafka.metadata.max.age.ms", "1")
+      .option("subscribe", topic)
+      // If a topic is deleted and we try to poll data starting from offset 0,
+      // the Kafka consumer will just block until timeout and return an empty result.
+      // So set the timeout to 1 second to make this test fast.
+      .option("kafkaConsumer.pollTimeoutMs", "1000")
+      .option("startingOffsets", "earliest")
+      .option("failOnDataLoss", "false")
+    val kafka = reader.load()
+      .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+      .as[(String, String)]
+    KafkaSourceSuite.globalTestUtils = testUtils
+    // The following ForeachWriter will delete the topic before fetching data from Kafka
+    // in executors.
+    val query = kafka.map(kv => kv._2.toInt).writeStream.foreach(new ForeachWriter[Int] {
+      override def open(partitionId: Long, version: Long): Boolean = {
+        KafkaSourceSuite.globalTestUtils.deleteTopic(topic)
+        true
+      }
+
+      override def process(value: Int): Unit = {
+        KafkaSourceSuite.collectedData.add(value)
+      }
+
+      override def close(errorOrNull: Throwable): Unit = {}
+    }).start()
+    query.processAllAvailable()
+    query.stop()
+    // `failOnDataLoss` is `false`, we should not fail the query
+    assert(query.exception.isEmpty)
+  }
+
   private def newTopic(): String = s"topic-${topicId.getAndIncrement()}"
 
   private def assignString(topic: String, partitions: Iterable[Int]): String = {
     JsonUtils.partitions(partitions.map(p => new TopicPartition(topic, p)))
   }
 
-  private def testFromSpecificOffsets(topic: String, options: (String, String)*): Unit = {
+  private def testFromSpecificOffsets(
+      topic: String,
+      failOnDataLoss: Boolean,
+      options: (String, String)*): Unit = {
     val partitionOffsets = Map(
       new TopicPartition(topic, 0) -> -2L,
       new TopicPartition(topic, 1) -> -1L,
@@ -448,6 +532,7 @@ class KafkaSourceSuite extends KafkaSourceTest {
       .option("startingOffsets", startingOffsets)
       .option("kafka.bootstrap.servers", testUtils.brokerAddress)
       .option("kafka.metadata.max.age.ms", "1")
+      .option("failOnDataLoss", failOnDataLoss.toString)
     options.foreach { case (k, v) => reader.option(k, v) }
     val kafka = reader.load()
       .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
@@ -469,6 +554,7 @@ class KafkaSourceSuite extends KafkaSourceTest {
   private def testFromLatestOffsets(
       topic: String,
       addPartitions: Boolean,
+      failOnDataLoss: Boolean,
       options: (String, String)*): Unit = {
     testUtils.createTopic(topic, partitions = 5)
     testUtils.sendMessages(topic, Array("-1"))
@@ -480,6 +566,7 @@ class KafkaSourceSuite extends KafkaSourceTest {
       .option("startingOffsets", s"latest")
       .option("kafka.bootstrap.servers", testUtils.brokerAddress)
       .option("kafka.metadata.max.age.ms", "1")
+      .option("failOnDataLoss", failOnDataLoss.toString)
     options.foreach { case (k, v) => reader.option(k, v) }
     val kafka = reader.load()
       .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
@@ -513,6 +600,7 @@ class KafkaSourceSuite extends KafkaSourceTest {
   private def testFromEarliestOffsets(
       topic: String,
       addPartitions: Boolean,
+      failOnDataLoss: Boolean,
       options: (String, String)*): Unit = {
     testUtils.createTopic(topic, partitions = 5)
     testUtils.sendMessages(topic, (1 to 3).map { _.toString }.toArray)
@@ -524,6 +612,7 @@ class KafkaSourceSuite extends KafkaSourceTest {
       .option("startingOffsets", s"earliest")
       .option("kafka.bootstrap.servers", testUtils.brokerAddress)
       .option("kafka.metadata.max.age.ms", "1")
+      .option("failOnDataLoss", failOnDataLoss.toString)
     options.foreach { case (k, v) => reader.option(k, v) }
     val kafka = reader.load()
       .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
@@ -552,6 +641,11 @@ class KafkaSourceSuite extends KafkaSourceTest {
   }
 }
 
+object KafkaSourceSuite {
+  @volatile var globalTestUtils: KafkaTestUtils = _
+  val collectedData = new ConcurrentLinkedQueue[Any]()
+}
+
 
 class KafkaSourceStressSuite extends KafkaSourceTest {
 
@@ -615,7 +709,7 @@ class KafkaSourceStressSuite extends KafkaSourceTest {
                 }
               })
           case 2 => // Add new partitions
-            AddKafkaData(topics.toSet, d: _*)(message = "Add partitiosn",
+            AddKafkaData(topics.toSet, d: _*)(message = "Add partition",
               topicAction = (topic, partition) => {
                 testUtils.addPartitions(topic, partition.get + nextInt(1, 6))
               })
@@ -626,3 +720,122 @@ class KafkaSourceStressSuite extends KafkaSourceTest {
       iterations = 50)
   }
 }
+
+class KafkaSourceStressForDontFailOnDataLossSuite extends StreamTest with SharedSQLContext {
+
+  import testImplicits._
+
+  private var testUtils: KafkaTestUtils = _
+
+  private val topicId = new AtomicInteger(0)
+
+  private def newTopic(): String = s"failOnDataLoss-${topicId.getAndIncrement()}"
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    testUtils = new KafkaTestUtils {
+      override def brokerConfiguration: Properties = {
+        val props = super.brokerConfiguration
+        // Try to make Kafka clean up messages as fast as possible. However, there is a hard-code
+        // 30 seconds delay (kafka.log.LogManager.InitialTaskDelayMs) so this test should run at
+        // least 30 seconds.
+        props.put("log.cleaner.backoff.ms", "100")
+        props.put("log.segment.bytes", "40")
+        props.put("log.retention.bytes", "40")
+        props.put("log.retention.check.interval.ms", "100")
+        props.put("delete.retention.ms", "10")
+        props.put("log.flush.scheduler.interval.ms", "10")
+        props
+      }
+    }
+    testUtils.setup()
+  }
+
+  override def afterAll(): Unit = {
+    if (testUtils != null) {
+      testUtils.teardown()
+      testUtils = null
+      super.afterAll()
+    }
+  }
+
+  test("stress test for failOnDataLoss=false") {
+    val reader = spark
+      .readStream
+      .format("kafka")
+      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+      .option("kafka.metadata.max.age.ms", "1")
+      .option("subscribePattern", "failOnDataLoss.*")
+      .option("startingOffsets", "earliest")
+      .option("failOnDataLoss", "false")
+    val kafka = reader.load()
+      .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+      .as[(String, String)]
+    val query = kafka.map(kv => kv._2.toInt).writeStream.foreach(new ForeachWriter[Int] {
+
+      override def open(partitionId: Long, version: Long): Boolean = {
+        true
+      }
+
+      override def process(value: Int): Unit = {
+        // Slow down the processing speed so that messages may be aged out.
+        Thread.sleep(Random.nextInt(500))
+      }
+
+      override def close(errorOrNull: Throwable): Unit = {
+      }
+    }).start()
+
+    val testTime = 1.minutes
+    val startTime = System.currentTimeMillis()
+    // Track the current existing topics
+    val topics = mutable.ArrayBuffer[String]()
+    // Track topics that have been deleted
+    val deletedTopics = mutable.Set[String]()
+    while (System.currentTimeMillis() - testTime.toMillis < startTime) {
+      Random.nextInt(10) match {
+        case 0 => // Create a new topic
+          val topic = newTopic()
+          topics += topic
+          // As pushing messages into Kafka updates Zookeeper asynchronously, there is a small
+          // chance that a topic will be recreated after deletion due to the asynchronous update.
+          // Hence, always overwrite to handle this race condition.
+          testUtils.createTopic(topic, partitions = 1, overwrite = true)
+          logInfo(s"Create topic $topic")
+        case 1 if topics.nonEmpty => // Delete an existing topic
+          val topic = topics.remove(Random.nextInt(topics.size))
+          testUtils.deleteTopic(topic)
+          logInfo(s"Delete topic $topic")
+          deletedTopics += topic
+        case 2 if deletedTopics.nonEmpty => // Recreate a topic that was deleted.
+          val topic = deletedTopics.toSeq(Random.nextInt(deletedTopics.size))
+          deletedTopics -= topic
+          topics += topic
+          // As pushing messages into Kafka updates Zookeeper asynchronously, there is a small
+          // chance that a topic will be recreated after deletion due to the asynchronous update.
+          // Hence, always overwrite to handle this race condition.
+          testUtils.createTopic(topic, partitions = 1, overwrite = true)
+          logInfo(s"Create topic $topic")
+        case 3 =>
+          Thread.sleep(1000)
+        case _ => // Push random messages
+          for (topic <- topics) {
+            val size = Random.nextInt(10)
+            for (_ <- 0 until size) {
+              testUtils.sendMessages(topic, Array(Random.nextInt(10).toString))
+            }
+          }
+      }
+      // `failOnDataLoss` is `false`, we should not fail the query
+      if (query.exception.nonEmpty) {
+        throw query.exception.get
+      }
+    }
+
+    query.stop()
+    // `failOnDataLoss` is `false`, we should not fail the query
+    if (query.exception.nonEmpty) {
+      throw query.exception.get
+    }
+  }
+}
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
index 9b24ccdd560e8..f43917e151c57 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
@@ -155,8 +155,16 @@ class KafkaTestUtils extends Logging {
   }
 
   /** Create a Kafka topic and wait until it is propagated to the whole cluster */
-  def createTopic(topic: String, partitions: Int): Unit = {
-    AdminUtils.createTopic(zkUtils, topic, partitions, 1)
+  def createTopic(topic: String, partitions: Int, overwrite: Boolean = false): Unit = {
+    var created = false
+    while (!created) {
+      try {
+        AdminUtils.createTopic(zkUtils, topic, partitions, 1)
+        created = true
+      } catch {
+        case e: kafka.common.TopicExistsException if overwrite => deleteTopic(topic)
+      }
+    }
     // wait until metadata is propagated
     (0 until partitions).foreach { p =>
       waitUntilMetadataIsPropagated(topic, p)
@@ -244,7 +252,7 @@ class KafkaTestUtils extends Logging {
     offsets
   }
 
-  private def brokerConfiguration: Properties = {
+  protected def brokerConfiguration: Properties = {
     val props = new Properties()
     props.put("broker.id", "0")
     props.put("host.name", "localhost")
@@ -302,9 +310,11 @@ class KafkaTestUtils extends Logging {
         }
         checkpoints.forall(checkpointsPerLogDir => !checkpointsPerLogDir.contains(tp))
       })
-      deletePath && topicPath && replicaManager && logManager && cleaner
+      // ensure the topic is gone
+      val deleted = !zkUtils.getAllTopics().contains(topic)
+      deletePath && topicPath && replicaManager && logManager && cleaner && deleted
     }
-    eventually(timeout(10.seconds)) {
+    eventually(timeout(60.seconds)) {
       assert(isDeleted, s"$topic not deleted after timeout")
     }
   }

From 64b9de9c079672eff49dc38e55749d9a26c743a6 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Tue, 22 Nov 2016 15:10:49 -0800
Subject: [PATCH 0177/1204] [SPARK-16803][SQL] SaveAsTable does not work when
 target table is a Hive serde table

### What changes were proposed in this pull request?

In Spark 2.0, `SaveAsTable` does not work when the target table is a Hive serde table, but Spark 1.6 works.

**Spark 1.6**

``` Scala
scala> sql("create table sample.sample stored as SEQUENCEFILE as select 1 as key, 'abc' as value")
res2: org.apache.spark.sql.DataFrame = []

scala> val df = sql("select key, value as value from sample.sample")
df: org.apache.spark.sql.DataFrame = [key: int, value: string]

scala> df.write.mode("append").saveAsTable("sample.sample")

scala> sql("select * from sample.sample").show()
+---+-----+
|key|value|
+---+-----+
|  1|  abc|
|  1|  abc|
+---+-----+
```

**Spark 2.0**

``` Scala
scala> df.write.mode("append").saveAsTable("sample.sample")
org.apache.spark.sql.AnalysisException: Saving data in MetastoreRelation sample, sample
 is not supported.;
```

So far, we do not plan to support it in Spark 2.1 due to the risk. Spark 1.6 works because it internally uses insertInto. But, if we change it back it will break the semantic of saveAsTable (this method uses by-name resolution instead of using by-position resolution used by insertInto). More extra changes are needed to support `hive` as a `format` in DataFrameWriter.

Instead, users should use insertInto API. This PR corrects the error messages. Users can understand how to bypass it before we support it in a separate PR.
### How was this patch tested?

Test cases are added

Author: gatorsmile <gatorsmile@gmail.com>

Closes #15926 from gatorsmile/saveAsTableFix5.

(cherry picked from commit 9c42d4a76ca8046fcca2e20067f2aa461977e65a)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../command/createDataSourceTables.scala      |  4 ++++
 .../sql/hive/MetastoreDataSourcesSuite.scala  | 20 +++++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
index 7e16e43f2bb0e..add732c1afc16 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
@@ -175,6 +175,10 @@ case class CreateDataSourceTableAsSelectCommand(
               existingSchema = Some(l.schema)
             case s: SimpleCatalogRelation if DDLUtils.isDatasourceTable(s.metadata) =>
               existingSchema = Some(s.metadata.schema)
+            case c: CatalogRelation if c.catalogTable.provider == Some(DDLUtils.HIVE_PROVIDER) =>
+              throw new AnalysisException("Saving data in the Hive serde table " +
+                s"${c.catalogTable.identifier} is not supported yet. Please use the " +
+                "insertInto() API as an alternative..")
             case o =>
               throw new AnalysisException(s"Saving data in ${o.toString} is not supported.")
           }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index 4ab1a54edc46d..c7cc75fbc8a07 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -413,6 +413,26 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
     }
   }
 
+  test("saveAsTable(CTAS) using append and insertInto when the target table is Hive serde") {
+    val tableName = "tab1"
+    withTable(tableName) {
+      sql(s"CREATE TABLE $tableName STORED AS SEQUENCEFILE AS SELECT 1 AS key, 'abc' AS value")
+
+      val df = sql(s"SELECT key, value FROM $tableName")
+      val e = intercept[AnalysisException] {
+        df.write.mode(SaveMode.Append).saveAsTable(tableName)
+      }.getMessage
+      assert(e.contains("Saving data in the Hive serde table `default`.`tab1` is not supported " +
+        "yet. Please use the insertInto() API as an alternative."))
+
+      df.write.insertInto(tableName)
+      checkAnswer(
+        sql(s"SELECT * FROM $tableName"),
+        Row(1, "abc") :: Row(1, "abc") :: Nil
+      )
+    }
+  }
+
   test("SPARK-5839 HiveMetastoreCatalog does not recognize table aliases of data source tables.") {
     withTable("savedJsonTable") {
       // Save the df as a managed table (by not specifying the path).

From 4b96ffb13a5171ef422aed955fd6b50354ae4253 Mon Sep 17 00:00:00 2001
From: Dilip Biswal <dbiswal@us.ibm.com>
Date: Tue, 22 Nov 2016 15:57:07 -0800
Subject: [PATCH 0178/1204] [SPARK-18533] Raise correct error upon
 specification of schema for datasource tables created using CTAS

## What changes were proposed in this pull request?
Fixes the inconsistency of error raised between data source and hive serde
tables when schema is specified in CTAS scenario. In the process the grammar for
create table (datasource) is simplified.

**before:**
``` SQL
spark-sql> create table t2 (c1 int, c2 int) using parquet as select * from t1;
Error in query:
mismatched input 'as' expecting {<EOF>, '.', 'OPTIONS', 'CLUSTERED', 'PARTITIONED'}(line 1, pos 64)

== SQL ==
create table t2 (c1 int, c2 int) using parquet as select * from t1
----------------------------------------------------------------^^^
```

**After:**
```SQL
spark-sql> create table t2 (c1 int, c2 int) using parquet as select * from t1
         > ;
Error in query:
Operation not allowed: Schema may not be specified in a Create Table As Select (CTAS) statement(line 1, pos 0)

== SQL ==
create table t2 (c1 int, c2 int) using parquet as select * from t1
^^^
```
## How was this patch tested?
Added a new test in CreateTableAsSelectSuite

Author: Dilip Biswal <dbiswal@us.ibm.com>

Closes #15968 from dilipbiswal/ctas.

(cherry picked from commit 39a1d30636857715247c82d551b200e1c331ad69)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../spark/sql/catalyst/parser/SqlBase.g4      |  6 +----
 .../spark/sql/execution/SparkSqlParser.scala  | 24 +++++++++++++++++--
 .../sources/CreateTableAsSelectSuite.scala    |  9 +++++++
 3 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 0aa2a97407c53..df85c70c6cdea 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -71,11 +71,7 @@ statement
     | createTableHeader ('(' colTypeList ')')? tableProvider
         (OPTIONS tablePropertyList)?
         (PARTITIONED BY partitionColumnNames=identifierList)?
-        bucketSpec?                                                    #createTableUsing
-    | createTableHeader tableProvider
-        (OPTIONS tablePropertyList)?
-        (PARTITIONED BY partitionColumnNames=identifierList)?
-        bucketSpec? AS? query                                          #createTableUsing
+        bucketSpec? (AS? query)?                                       #createTableUsing
     | createTableHeader ('(' columns=colTypeList ')')?
         (COMMENT STRING)?
         (PARTITIONED BY '(' partitionColumns=colTypeList ')')?
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 47610453ac23a..5f89a229d6242 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -322,7 +322,20 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
   }
 
   /**
-   * Create a [[CreateTable]] logical plan.
+   * Create a data source table, returning a [[CreateTable]] logical plan.
+   *
+   * Expected format:
+   * {{{
+   *   CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
+   *   USING table_provider
+   *   [OPTIONS table_property_list]
+   *   [PARTITIONED BY (col_name, col_name, ...)]
+   *   [CLUSTERED BY (col_name, col_name, ...)
+   *    [SORTED BY (col_name [ASC|DESC], ...)]
+   *    INTO num_buckets BUCKETS
+   *   ]
+   *   [AS select_statement];
+   * }}}
    */
   override def visitCreateTableUsing(ctx: CreateTableUsingContext): LogicalPlan = withOrigin(ctx) {
     val (table, temp, ifNotExists, external) = visitCreateTableHeader(ctx.createTableHeader)
@@ -371,6 +384,12 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
         operationNotAllowed("CREATE TEMPORARY TABLE ... USING ... AS query", ctx)
       }
 
+      // Don't allow explicit specification of schema for CTAS
+      if (schema.nonEmpty) {
+        operationNotAllowed(
+          "Schema may not be specified in a Create Table As Select (CTAS) statement",
+          ctx)
+      }
       CreateTable(tableDesc, mode, Some(query))
     } else {
       if (temp) {
@@ -1052,7 +1071,8 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
             "CTAS statement."
           operationNotAllowed(errorMessage, ctx)
         }
-        // Just use whatever is projected in the select statement as our schema
+
+        // Don't allow explicit specification of schema for CTAS.
         if (schema.nonEmpty) {
           operationNotAllowed(
             "Schema may not be specified in a Create Table As Select (CTAS) statement",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
index 5cc9467395adc..61939fe5ef5b5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
@@ -249,4 +249,13 @@ class CreateTableAsSelectSuite
       }
     }
   }
+
+  test("specifying the column list for CTAS") {
+    withTable("t") {
+      val e = intercept[ParseException] {
+        sql("CREATE TABLE t (a int, b int) USING parquet AS SELECT 1, 2")
+      }.getMessage
+      assert(e.contains("Schema may not be specified in a Create Table As Select (CTAS)"))
+    }
+  }
 }

From 3be2d1e0b52bf15ac28a9f96b03ae048e680b035 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Tue, 22 Nov 2016 16:49:15 -0800
Subject: [PATCH 0179/1204] [SPARK-18530][SS][KAFKA] Change Kafka timestamp
 column type to TimestampType

## What changes were proposed in this pull request?

Changed Kafka timestamp column type to TimestampType.

## How was this patch tested?

`test("Kafka column types")`.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #15969 from zsxwing/SPARK-18530.

(cherry picked from commit d0212eb0f22473ee5482fe98dafc24e16ffcfc63)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../spark/sql/kafka010/KafkaSource.scala      | 16 +++-
 .../spark/sql/kafka010/KafkaSourceSuite.scala | 81 ++++++++++++++++++-
 2 files changed, 93 insertions(+), 4 deletions(-)

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
index 1d0d402b82a35..d9ab4bb4f873d 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
@@ -32,9 +32,12 @@ import org.apache.spark.SparkContext
 import org.apache.spark.internal.Logging
 import org.apache.spark.scheduler.ExecutorCacheTaskLocation
 import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.kafka010.KafkaSource._
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.UninterruptibleThread
 
 /**
@@ -282,7 +285,14 @@ private[kafka010] case class KafkaSource(
     // Create an RDD that reads from Kafka and get the (key, value) pair as byte arrays.
     val rdd = new KafkaSourceRDD(
       sc, executorKafkaParams, offsetRanges, pollTimeoutMs, failOnDataLoss).map { cr =>
-      Row(cr.key, cr.value, cr.topic, cr.partition, cr.offset, cr.timestamp, cr.timestampType.id)
+      InternalRow(
+        cr.key,
+        cr.value,
+        UTF8String.fromString(cr.topic),
+        cr.partition,
+        cr.offset,
+        DateTimeUtils.fromJavaTimestamp(new java.sql.Timestamp(cr.timestamp)),
+        cr.timestampType.id)
     }
 
     logInfo("GetBatch generating RDD of offset range: " +
@@ -293,7 +303,7 @@ private[kafka010] case class KafkaSource(
       currentPartitionOffsets = Some(untilPartitionOffsets)
     }
 
-    sqlContext.createDataFrame(rdd, schema)
+    sqlContext.internalCreateDataFrame(rdd, schema)
   }
 
   /** Stop this source and free any resources it has allocated. */
@@ -496,7 +506,7 @@ private[kafka010] object KafkaSource {
     StructField("topic", StringType),
     StructField("partition", IntegerType),
     StructField("offset", LongType),
-    StructField("timestamp", LongType),
+    StructField("timestamp", TimestampType),
     StructField("timestampType", IntegerType)
   ))
 
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
index cd52fd93d10a4..f9f62581a3066 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
@@ -17,11 +17,11 @@
 
 package org.apache.spark.sql.kafka010
 
+import java.nio.charset.StandardCharsets.UTF_8
 import java.util.Properties
 import java.util.concurrent.ConcurrentLinkedQueue
 import java.util.concurrent.atomic.AtomicInteger
 
-import scala.collection.JavaConverters._
 import scala.collection.mutable
 import scala.util.Random
 
@@ -33,6 +33,7 @@ import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.sql.ForeachWriter
 import org.apache.spark.sql.execution.streaming._
+import org.apache.spark.sql.functions.{count, window}
 import org.apache.spark.sql.streaming.{ProcessingTime, StreamTest}
 import org.apache.spark.sql.test.SharedSQLContext
 
@@ -551,6 +552,84 @@ class KafkaSourceSuite extends KafkaSourceTest {
     )
   }
 
+  test("Kafka column types") {
+    val now = System.currentTimeMillis()
+    val topic = newTopic()
+    testUtils.createTopic(newTopic(), partitions = 1)
+    testUtils.sendMessages(topic, Array(1).map(_.toString))
+
+    val kafka = spark
+      .readStream
+      .format("kafka")
+      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+      .option("kafka.metadata.max.age.ms", "1")
+      .option("startingOffsets", s"earliest")
+      .option("subscribe", topic)
+      .load()
+
+    val query = kafka
+      .writeStream
+      .format("memory")
+      .outputMode("append")
+      .queryName("kafkaColumnTypes")
+      .start()
+    query.processAllAvailable()
+    val rows = spark.table("kafkaColumnTypes").collect()
+    assert(rows.length === 1, s"Unexpected results: ${rows.toList}")
+    val row = rows(0)
+    assert(row.getAs[Array[Byte]]("key") === null, s"Unexpected results: $row")
+    assert(row.getAs[Array[Byte]]("value") === "1".getBytes(UTF_8), s"Unexpected results: $row")
+    assert(row.getAs[String]("topic") === topic, s"Unexpected results: $row")
+    assert(row.getAs[Int]("partition") === 0, s"Unexpected results: $row")
+    assert(row.getAs[Long]("offset") === 0L, s"Unexpected results: $row")
+    // We cannot check the exact timestamp as it's the time that messages were inserted by the
+    // producer. So here we just use a low bound to make sure the internal conversion works.
+    assert(row.getAs[java.sql.Timestamp]("timestamp").getTime >= now, s"Unexpected results: $row")
+    assert(row.getAs[Int]("timestampType") === 0, s"Unexpected results: $row")
+    query.stop()
+  }
+
+  test("KafkaSource with watermark") {
+    val now = System.currentTimeMillis()
+    val topic = newTopic()
+    testUtils.createTopic(newTopic(), partitions = 1)
+    testUtils.sendMessages(topic, Array(1).map(_.toString))
+
+    val kafka = spark
+      .readStream
+      .format("kafka")
+      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+      .option("kafka.metadata.max.age.ms", "1")
+      .option("startingOffsets", s"earliest")
+      .option("subscribe", topic)
+      .load()
+
+    val windowedAggregation = kafka
+      .withWatermark("timestamp", "10 seconds")
+      .groupBy(window($"timestamp", "5 seconds") as 'window)
+      .agg(count("*") as 'count)
+      .select($"window".getField("start") as 'window, $"count")
+
+    val query = windowedAggregation
+      .writeStream
+      .format("memory")
+      .outputMode("complete")
+      .queryName("kafkaWatermark")
+      .start()
+    query.processAllAvailable()
+    val rows = spark.table("kafkaWatermark").collect()
+    assert(rows.length === 1, s"Unexpected results: ${rows.toList}")
+    val row = rows(0)
+    // We cannot check the exact window start time as it depands on the time that messages were
+    // inserted by the producer. So here we just use a low bound to make sure the internal
+    // conversion works.
+    assert(
+      row.getAs[java.sql.Timestamp]("window").getTime >= now - 5 * 1000,
+      s"Unexpected results: $row")
+    assert(row.getAs[Int]("count") === 1, s"Unexpected results: $row")
+    query.stop()
+  }
+
   private def testFromLatestOffsets(
       topic: String,
       addPartitions: Boolean,

From fc5fee83e363bc6df22459a9b1ba2ba11bfdfa20 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Tue, 22 Nov 2016 19:17:48 -0800
Subject: [PATCH 0180/1204] [SPARK-18501][ML][SPARKR] Fix spark.glm errors when
 fitting on collinear data

## What changes were proposed in this pull request?
* Fix SparkR ```spark.glm``` errors when fitting on collinear data, since ```standard error of coefficients, t value and p value``` are not available in this condition.
* Scala/Python GLM summary should throw exception if users get ```standard error of coefficients, t value and p value``` but the underlying WLS was solved by local "l-bfgs".

## How was this patch tested?
Add unit tests.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #15930 from yanboliang/spark-18501.

(cherry picked from commit 982b82e32e0fc7d30c5d557944a79eb3e6d2da59)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 R/pkg/R/mllib.R                               | 21 ++++++--
 R/pkg/inst/tests/testthat/test_mllib.R        |  9 ++++
 .../GeneralizedLinearRegressionWrapper.scala  | 54 +++++++++++--------
 .../GeneralizedLinearRegression.scala         | 46 +++++++++++++---
 .../GeneralizedLinearRegressionSuite.scala    | 21 ++++++++
 5 files changed, 115 insertions(+), 36 deletions(-)

diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 265e64e7466fa..02bc6456de4d0 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -278,8 +278,10 @@ setMethod("glm", signature(formula = "formula", family = "ANY", data = "SparkDat
 
 #' @param object a fitted generalized linear model.
 #' @return \code{summary} returns a summary object of the fitted model, a list of components
-#'         including at least the coefficients, null/residual deviance, null/residual degrees
-#'         of freedom, AIC and number of iterations IRLS takes.
+#'         including at least the coefficients matrix (which includes coefficients, standard error
+#'         of coefficients, t value and p value), null/residual deviance, null/residual degrees of
+#'         freedom, AIC and number of iterations IRLS takes. If there are collinear columns
+#'         in you data, the coefficients matrix only provides coefficients.
 #'
 #' @rdname spark.glm
 #' @export
@@ -303,9 +305,18 @@ setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"),
             } else {
               dataFrame(callJMethod(jobj, "rDevianceResiduals"))
             }
-            coefficients <- matrix(coefficients, ncol = 4)
-            colnames(coefficients) <- c("Estimate", "Std. Error", "t value", "Pr(>|t|)")
-            rownames(coefficients) <- unlist(features)
+            # If the underlying WeightedLeastSquares using "normal" solver, we can provide
+            # coefficients, standard error of coefficients, t value and p value. Otherwise,
+            # it will be fitted by local "l-bfgs", we can only provide coefficients.
+            if (length(features) == length(coefficients)) {
+              coefficients <- matrix(coefficients, ncol = 1)
+              colnames(coefficients) <- c("Estimate")
+              rownames(coefficients) <- unlist(features)
+            } else {
+              coefficients <- matrix(coefficients, ncol = 4)
+              colnames(coefficients) <- c("Estimate", "Std. Error", "t value", "Pr(>|t|)")
+              rownames(coefficients) <- unlist(features)
+            }
             ans <- list(deviance.resid = deviance.resid, coefficients = coefficients,
                         dispersion = dispersion, null.deviance = null.deviance,
                         deviance = deviance, df.null = df.null, df.residual = df.residual,
diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R
index 70a033de5308e..b05be476a3fa8 100644
--- a/R/pkg/inst/tests/testthat/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -169,6 +169,15 @@ test_that("spark.glm summary", {
   df <- suppressWarnings(createDataFrame(data))
   regStats <- summary(spark.glm(df, b ~ a1 + a2, regParam = 1.0))
   expect_equal(regStats$aic, 14.00976, tolerance = 1e-4) # 14.00976 is from summary() result
+
+  # Test spark.glm works on collinear data
+  A <- matrix(c(1, 2, 3, 4, 2, 4, 6, 8), 4, 2)
+  b <- c(1, 2, 3, 4)
+  data <- as.data.frame(cbind(A, b))
+  df <- createDataFrame(data)
+  stats <- summary(spark.glm(df, b ~ . - 1))
+  coefs <- unlist(stats$coefficients)
+  expect_true(all(abs(c(0.5, 0.25) - coefs) < 1e-4))
 })
 
 test_that("spark.glm save/load", {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala
index add4d49110d16..8bcc9fe5d1b85 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala
@@ -144,30 +144,38 @@ private[r] object GeneralizedLinearRegressionWrapper
       features
     }
 
-    val rCoefficientStandardErrors = if (glm.getFitIntercept) {
-      Array(summary.coefficientStandardErrors.last) ++
-        summary.coefficientStandardErrors.dropRight(1)
+    val rCoefficients: Array[Double] = if (summary.isNormalSolver) {
+      val rCoefficientStandardErrors = if (glm.getFitIntercept) {
+        Array(summary.coefficientStandardErrors.last) ++
+          summary.coefficientStandardErrors.dropRight(1)
+      } else {
+        summary.coefficientStandardErrors
+      }
+
+      val rTValues = if (glm.getFitIntercept) {
+        Array(summary.tValues.last) ++ summary.tValues.dropRight(1)
+      } else {
+        summary.tValues
+      }
+
+      val rPValues = if (glm.getFitIntercept) {
+        Array(summary.pValues.last) ++ summary.pValues.dropRight(1)
+      } else {
+        summary.pValues
+      }
+
+      if (glm.getFitIntercept) {
+        Array(glm.intercept) ++ glm.coefficients.toArray ++
+          rCoefficientStandardErrors ++ rTValues ++ rPValues
+      } else {
+        glm.coefficients.toArray ++ rCoefficientStandardErrors ++ rTValues ++ rPValues
+      }
     } else {
-      summary.coefficientStandardErrors
-    }
-
-    val rTValues = if (glm.getFitIntercept) {
-      Array(summary.tValues.last) ++ summary.tValues.dropRight(1)
-    } else {
-      summary.tValues
-    }
-
-    val rPValues = if (glm.getFitIntercept) {
-      Array(summary.pValues.last) ++ summary.pValues.dropRight(1)
-    } else {
-      summary.pValues
-    }
-
-    val rCoefficients: Array[Double] = if (glm.getFitIntercept) {
-      Array(glm.intercept) ++ glm.coefficients.toArray ++
-        rCoefficientStandardErrors ++ rTValues ++ rPValues
-    } else {
-      glm.coefficients.toArray ++ rCoefficientStandardErrors ++ rTValues ++ rPValues
+      if (glm.getFitIntercept) {
+        Array(glm.intercept) ++ glm.coefficients.toArray
+      } else {
+        glm.coefficients.toArray
+      }
     }
 
     val rDispersion: Double = summary.dispersion
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index 3f9de1fe74c9c..f33dd0fd294ba 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -1063,45 +1063,75 @@ class GeneralizedLinearRegressionTrainingSummary private[regression] (
 
   import GeneralizedLinearRegression._
 
+  /**
+   * Whether the underlying [[WeightedLeastSquares]] using the "normal" solver.
+   */
+  private[ml] val isNormalSolver: Boolean = {
+    diagInvAtWA.length != 1 || diagInvAtWA(0) != 0
+  }
+
   /**
    * Standard error of estimated coefficients and intercept.
+   * This value is only available when the underlying [[WeightedLeastSquares]]
+   * using the "normal" solver.
    *
    * If [[GeneralizedLinearRegression.fitIntercept]] is set to true,
    * then the last element returned corresponds to the intercept.
    */
   @Since("2.0.0")
   lazy val coefficientStandardErrors: Array[Double] = {
-    diagInvAtWA.map(_ * dispersion).map(math.sqrt)
+    if (isNormalSolver) {
+      diagInvAtWA.map(_ * dispersion).map(math.sqrt)
+    } else {
+      throw new UnsupportedOperationException(
+        "No Std. Error of coefficients available for this GeneralizedLinearRegressionModel")
+    }
   }
 
   /**
    * T-statistic of estimated coefficients and intercept.
+   * This value is only available when the underlying [[WeightedLeastSquares]]
+   * using the "normal" solver.
    *
    * If [[GeneralizedLinearRegression.fitIntercept]] is set to true,
    * then the last element returned corresponds to the intercept.
    */
   @Since("2.0.0")
   lazy val tValues: Array[Double] = {
-    val estimate = if (model.getFitIntercept) {
-      Array.concat(model.coefficients.toArray, Array(model.intercept))
+    if (isNormalSolver) {
+      val estimate = if (model.getFitIntercept) {
+        Array.concat(model.coefficients.toArray, Array(model.intercept))
+      } else {
+        model.coefficients.toArray
+      }
+      estimate.zip(coefficientStandardErrors).map { x => x._1 / x._2 }
     } else {
-      model.coefficients.toArray
+      throw new UnsupportedOperationException(
+        "No t-statistic available for this GeneralizedLinearRegressionModel")
     }
-    estimate.zip(coefficientStandardErrors).map { x => x._1 / x._2 }
   }
 
   /**
    * Two-sided p-value of estimated coefficients and intercept.
+   * This value is only available when the underlying [[WeightedLeastSquares]]
+   * using the "normal" solver.
    *
    * If [[GeneralizedLinearRegression.fitIntercept]] is set to true,
    * then the last element returned corresponds to the intercept.
    */
   @Since("2.0.0")
   lazy val pValues: Array[Double] = {
-    if (model.getFamily == Binomial.name || model.getFamily == Poisson.name) {
-      tValues.map { x => 2.0 * (1.0 - dist.Gaussian(0.0, 1.0).cdf(math.abs(x))) }
+    if (isNormalSolver) {
+      if (model.getFamily == Binomial.name || model.getFamily == Poisson.name) {
+        tValues.map { x => 2.0 * (1.0 - dist.Gaussian(0.0, 1.0).cdf(math.abs(x))) }
+      } else {
+        tValues.map { x =>
+          2.0 * (1.0 - dist.StudentsT(degreesOfFreedom.toDouble).cdf(math.abs(x)))
+        }
+      }
     } else {
-      tValues.map { x => 2.0 * (1.0 - dist.StudentsT(degreesOfFreedom.toDouble).cdf(math.abs(x))) }
+      throw new UnsupportedOperationException(
+        "No p-value available for this GeneralizedLinearRegressionModel")
     }
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
index 9b0fa67630d2e..4fab2160339c6 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
@@ -1048,6 +1048,27 @@ class GeneralizedLinearRegressionSuite
     assert(summary.solver === "irls")
   }
 
+  test("glm handle collinear features") {
+    val collinearInstances = Seq(
+      Instance(1.0, 1.0, Vectors.dense(1.0, 2.0)),
+      Instance(2.0, 1.0, Vectors.dense(2.0, 4.0)),
+      Instance(3.0, 1.0, Vectors.dense(3.0, 6.0)),
+      Instance(4.0, 1.0, Vectors.dense(4.0, 8.0))
+    ).toDF()
+    val trainer = new GeneralizedLinearRegression()
+    val model = trainer.fit(collinearInstances)
+    // to make it clear that underlying WLS did not solve analytically
+    intercept[UnsupportedOperationException] {
+      model.summary.coefficientStandardErrors
+    }
+    intercept[UnsupportedOperationException] {
+      model.summary.pValues
+    }
+    intercept[UnsupportedOperationException] {
+      model.summary.tValues
+    }
+  }
+
   test("read/write") {
     def checkModelData(
         model: GeneralizedLinearRegressionModel,

From fabb5aeaf62e5c18d5d489e769e998e52379ba20 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Tue, 22 Nov 2016 22:25:27 -0800
Subject: [PATCH 0181/1204] [SPARK-18179][SQL] Throws analysis exception with a
 proper message for unsupported argument types in reflect/java_method function

## What changes were proposed in this pull request?

This PR proposes throwing an `AnalysisException` with a proper message rather than `NoSuchElementException` with the message ` key not found: TimestampType` when unsupported types are given to `reflect` and `java_method` functions.

```scala
spark.range(1).selectExpr("reflect('java.lang.String', 'valueOf', cast('1990-01-01' as timestamp))")
```

produces

**Before**

```
java.util.NoSuchElementException: key not found: TimestampType
  at scala.collection.MapLike$class.default(MapLike.scala:228)
  at scala.collection.AbstractMap.default(Map.scala:59)
  at scala.collection.MapLike$class.apply(MapLike.scala:141)
  at scala.collection.AbstractMap.apply(Map.scala:59)
  at org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection$$anonfun$findMethod$1$$anonfun$apply$1.apply(CallMethodViaReflection.scala:159)
...
```

**After**

```
cannot resolve 'reflect('java.lang.String', 'valueOf', CAST('1990-01-01' AS TIMESTAMP))' due to data type mismatch: arguments from the third require boolean, byte, short, integer, long, float, double or string expressions; line 1 pos 0;
'Project [unresolvedalias(reflect(java.lang.String, valueOf, cast(1990-01-01 as timestamp)), Some(<function1>))]
+- Range (0, 1, step=1, splits=Some(2))
...
```

Added message is,

```
arguments from the third require boolean, byte, short, integer, long, float, double or string expressions
```

## How was this patch tested?

Tests added in `CallMethodViaReflection`.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #15694 from HyukjinKwon/SPARK-18179.

(cherry picked from commit 2559fb4b40c9f42f7b3ed2b77de14461f68b6fa5)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../catalyst/expressions/CallMethodViaReflection.scala   | 4 ++++
 .../expressions/CallMethodViaReflectionSuite.scala       | 9 +++++++++
 2 files changed, 13 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
index 40f1b148f9287..4859e0c537610 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
@@ -65,6 +65,10 @@ case class CallMethodViaReflection(children: Seq[Expression])
       TypeCheckFailure("first two arguments should be string literals")
     } else if (!classExists) {
       TypeCheckFailure(s"class $className not found")
+    } else if (children.slice(2, children.length)
+        .exists(e => !CallMethodViaReflection.typeMapping.contains(e.dataType))) {
+      TypeCheckFailure("arguments from the third require boolean, byte, short, " +
+        "integer, long, float, double or string expressions")
     } else if (method == null) {
       TypeCheckFailure(s"cannot find a static method that matches the argument types in $className")
     } else {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflectionSuite.scala
index 43367c7e14c34..88d4d460751b6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflectionSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import java.sql.Timestamp
+
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure
 import org.apache.spark.sql.types.{IntegerType, StringType}
@@ -85,6 +87,13 @@ class CallMethodViaReflectionSuite extends SparkFunSuite with ExpressionEvalHelp
     assert(createExpr(staticClassName, "method1").checkInputDataTypes().isSuccess)
   }
 
+  test("unsupported type checking") {
+    val ret = createExpr(staticClassName, "method1", new Timestamp(1)).checkInputDataTypes()
+    assert(ret.isFailure)
+    val errorMsg = ret.asInstanceOf[TypeCheckFailure].message
+    assert(errorMsg.contains("arguments from the third require boolean, byte, short"))
+  }
+
   test("invoking methods using acceptable types") {
     checkEvaluation(createExpr(staticClassName, "method1"), "m1")
     checkEvaluation(createExpr(staticClassName, "method2", 2), "m2")

From 5f198d200d47703f6ab770e592c0a1d9f8d7b0dc Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Wed, 23 Nov 2016 11:25:47 +0000
Subject: [PATCH 0182/1204] [SPARK-18073][DOCS][WIP] Migrate wiki to
 spark.apache.org web site

## What changes were proposed in this pull request?

Updates links to the wiki to links to the new location of content on spark.apache.org.

## How was this patch tested?

Doc builds

Author: Sean Owen <sowen@cloudera.com>

Closes #15967 from srowen/SPARK-18073.1.

(cherry picked from commit 7e0cd1d9b168286386f15e9b55988733476ae2bb)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .github/PULL_REQUEST_TEMPLATE                         |  2 +-
 CONTRIBUTING.md                                       |  4 ++--
 R/README.md                                           |  2 +-
 R/pkg/DESCRIPTION                                     |  2 +-
 README.md                                             | 11 ++++++-----
 dev/checkstyle.xml                                    |  2 +-
 docs/_layouts/global.html                             |  4 ++--
 docs/building-spark.md                                |  4 ++--
 docs/contributing-to-spark.md                         |  2 +-
 docs/index.md                                         |  4 ++--
 docs/sparkr.md                                        |  2 +-
 docs/streaming-programming-guide.md                   |  2 +-
 .../spark/sql/execution/datasources/DataSource.scala  |  5 ++---
 13 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/.github/PULL_REQUEST_TEMPLATE b/.github/PULL_REQUEST_TEMPLATE
index 0e41cf1826453..5af45d6fa7988 100644
--- a/.github/PULL_REQUEST_TEMPLATE
+++ b/.github/PULL_REQUEST_TEMPLATE
@@ -7,4 +7,4 @@
 (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests)
 (If this patch involves UI changes, please attach a screenshot; otherwise, remove this)
 
-Please review https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark before opening a pull request.
+Please review http://spark.apache.org/contributing.html before opening a pull request.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 1a8206abe3838..8fdd5aa9e7dfb 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,12 +1,12 @@
 ## Contributing to Spark
 
 *Before opening a pull request*, review the 
-[Contributing to Spark wiki](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark). 
+[Contributing to Spark guide](http://spark.apache.org/contributing.html). 
 It lists steps that are required before creating a PR. In particular, consider:
 
 - Is the change important and ready enough to ask the community to spend time reviewing?
 - Have you searched for existing, related JIRAs and pull requests?
-- Is this a new feature that can stand alone as a [third party project](https://cwiki.apache.org/confluence/display/SPARK/Third+Party+Projects) ?
+- Is this a new feature that can stand alone as a [third party project](http://spark.apache.org/third-party-projects.html) ?
 - Is the change being proposed clearly explained and motivated?
 
 When you contribute code, you affirm that the contribution is your original work and that you 
diff --git a/R/README.md b/R/README.md
index 47f9a86dfde11..4c40c5963db70 100644
--- a/R/README.md
+++ b/R/README.md
@@ -51,7 +51,7 @@ sparkR.session()
 
 #### Making changes to SparkR
 
-The [instructions](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark) for making contributions to Spark also apply to SparkR.
+The [instructions](http://spark.apache.org/contributing.html) for making contributions to Spark also apply to SparkR.
 If you only make R file changes (i.e. no Scala changes) then you can just re-install the R package using `R/install-dev.sh` and test your changes.
 Once you have made your changes, please include unit tests for them and run existing unit tests using the `R/run-tests.sh` script as described below.
 
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index fe41a9e7dabbd..981ae1246476b 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -11,7 +11,7 @@ Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
                     email = "felixcheung@apache.org"),
              person(family = "The Apache Software Foundation", role = c("aut", "cph")))
 URL: http://www.apache.org/ http://spark.apache.org/
-BugReports: https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark#ContributingtoSpark-ContributingBugReports
+BugReports: http://spark.apache.org/contributing.html
 Depends:
     R (>= 3.0),
     methods
diff --git a/README.md b/README.md
index dd7d0e22495b3..853f7f5ded3cb 100644
--- a/README.md
+++ b/README.md
@@ -29,8 +29,9 @@ To build Spark and its example programs, run:
 You can build Spark using more than one thread by using the -T option with Maven, see ["Parallel builds in Maven 3"](https://cwiki.apache.org/confluence/display/MAVEN/Parallel+builds+in+Maven+3).
 More detailed documentation is available from the project site, at
 ["Building Spark"](http://spark.apache.org/docs/latest/building-spark.html).
-For developing Spark using an IDE, see [Eclipse](https://cwiki.apache.org/confluence/display/SPARK/Useful+Developer+Tools#UsefulDeveloperTools-Eclipse)
-and [IntelliJ](https://cwiki.apache.org/confluence/display/SPARK/Useful+Developer+Tools#UsefulDeveloperTools-IntelliJ).
+
+For general development tips, including info on developing Spark using an IDE, see 
+[http://spark.apache.org/developer-tools.html](the Useful Developer Tools page).
 
 ## Interactive Scala Shell
 
@@ -80,7 +81,7 @@ can be run using:
     ./dev/run-tests
 
 Please see the guidance on how to
-[run tests for a module, or individual tests](https://cwiki.apache.org/confluence/display/SPARK/Useful+Developer+Tools).
+[run tests for a module, or individual tests](http://spark.apache.org/developer-tools.html#individual-tests).
 
 ## A Note About Hadoop Versions
 
@@ -100,5 +101,5 @@ in the online documentation for an overview on how to configure Spark.
 
 ## Contributing
 
-Please review the [Contribution to Spark](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark)
-wiki for information on how to get started contributing to the project.
+Please review the [Contribution to Spark guide](http://spark.apache.org/contributing.html)
+for information on how to get started contributing to the project.
diff --git a/dev/checkstyle.xml b/dev/checkstyle.xml
index 92c5251c85037..fd73ca73ee7ef 100644
--- a/dev/checkstyle.xml
+++ b/dev/checkstyle.xml
@@ -28,7 +28,7 @@
 
     with Spark-specific changes from:
 
-    https://cwiki.apache.org/confluence/display/SPARK/Spark+Code+Style+Guide
+    http://spark.apache.org/contributing.html#code-style-guide
 
     Checkstyle is very configurable. Be sure to read the documentation at
     http://checkstyle.sf.net (or in your downloaded distribution).
diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html
index ad5b5c9adfac8..c00d0db63cd10 100755
--- a/docs/_layouts/global.html
+++ b/docs/_layouts/global.html
@@ -113,8 +113,8 @@
                                 <li><a href="hardware-provisioning.html">Hardware Provisioning</a></li>
                                 <li class="divider"></li>
                                 <li><a href="building-spark.html">Building Spark</a></li>
-                                <li><a href="https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark">Contributing to Spark</a></li>
-                                <li><a href="https://cwiki.apache.org/confluence/display/SPARK/Third+Party+Projects">Third Party Projects</a></li>
+                                <li><a href="http://spark.apache.org/contributing.html">Contributing to Spark</a></li>
+                                <li><a href="http://spark.apache.org/third-party-projects.html">Third Party Projects</a></li>
                             </ul>
                         </li>
                     </ul>
diff --git a/docs/building-spark.md b/docs/building-spark.md
index 88da0cc9c3bbf..65c2895b29b10 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -197,7 +197,7 @@ can be set to control the SBT build. For example:
 To avoid the overhead of launching sbt each time you need to re-compile, you can launch sbt
 in interactive mode by running `build/sbt`, and then run all build commands at the command
 prompt. For more recommendations on reducing build time, refer to the
-[wiki page](https://cwiki.apache.org/confluence/display/SPARK/Useful+Developer+Tools#UsefulDeveloperTools-ReducingBuildTimes).
+[Useful Developer Tools page](http://spark.apache.org/developer-tools.html).
 
 ## Encrypted Filesystems
 
@@ -215,7 +215,7 @@ to the `sharedSettings` val. See also [this PR](https://github.com/apache/spark/
 ## IntelliJ IDEA or Eclipse
 
 For help in setting up IntelliJ IDEA or Eclipse for Spark development, and troubleshooting, refer to the
-[wiki page for IDE setup](https://cwiki.apache.org/confluence/display/SPARK/Useful+Developer+Tools#UsefulDeveloperTools-IDESetup).
+[Useful Developer Tools page](http://spark.apache.org/developer-tools.html).
 
 
 # Running Tests
diff --git a/docs/contributing-to-spark.md b/docs/contributing-to-spark.md
index ef1b3ad6da57a..9252545e4a129 100644
--- a/docs/contributing-to-spark.md
+++ b/docs/contributing-to-spark.md
@@ -5,4 +5,4 @@ title: Contributing to Spark
 
 The Spark team welcomes all forms of contributions, including bug reports, documentation or patches.
 For the newest information on how to contribute to the project, please read the
-[wiki page on contributing to Spark](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark).
+[Contributing to Spark guide](http://spark.apache.org/contributing.html).
diff --git a/docs/index.md b/docs/index.md
index 39de11de854a7..c5d34cb5c4e73 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -125,8 +125,8 @@ options for deployment:
 * Integration with other storage systems:
   * [OpenStack Swift](storage-openstack-swift.html)
 * [Building Spark](building-spark.html): build Spark using the Maven system
-* [Contributing to Spark](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark)
-* [Third Party Projects](https://cwiki.apache.org/confluence/display/SPARK/Third+Party+Projects): related third party Spark projects
+* [Contributing to Spark](http://spark.apache.org/contributing.html)
+* [Third Party Projects](http://spark.apache.org/third-party-projects.html): related third party Spark projects
 
 **External Resources:**
 
diff --git a/docs/sparkr.md b/docs/sparkr.md
index f30bd4026fed3..d26949226b117 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -126,7 +126,7 @@ head(df)
 SparkR supports operating on a variety of data sources through the `SparkDataFrame` interface. This section describes the general methods for loading and saving data using Data Sources. You can check the Spark SQL programming guide for more [specific options](sql-programming-guide.html#manually-specifying-options) that are available for the built-in data sources.
 
 The general method for creating SparkDataFrames from data sources is `read.df`. This method takes in the path for the file to load and the type of data source, and the currently active SparkSession will be used automatically.
-SparkR supports reading JSON, CSV and Parquet files natively, and through packages available from sources like [Third Party Projects](https://cwiki.apache.org/confluence/display/SPARK/Third+Party+Projects), you can find data source connectors for popular file formats like Avro. These packages can either be added by
+SparkR supports reading JSON, CSV and Parquet files natively, and through packages available from sources like [Third Party Projects](http://spark.apache.org/third-party-projects.html), you can find data source connectors for popular file formats like Avro. These packages can either be added by
 specifying `--packages` with `spark-submit` or `sparkR` commands, or if initializing SparkSession with `sparkPackages` parameter when in an interactive R shell or from RStudio.
 
 <div data-lang="r" markdown="1">
diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index 18fc1cd934826..1fcd198685a51 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -2382,7 +2382,7 @@ additional effort may be necessary to achieve exactly-once semantics. There are
     - [Kafka Integration Guide](streaming-kafka-integration.html)
     - [Kinesis Integration Guide](streaming-kinesis-integration.html)
     - [Custom Receiver Guide](streaming-custom-receivers.html)
-* Third-party DStream data sources can be found in [Third Party Projects](https://cwiki.apache.org/confluence/display/SPARK/Third+Party+Projects)
+* Third-party DStream data sources can be found in [Third Party Projects](http://spark.apache.org/third-party-projects.html)
 * API documentation
   - Scala docs
     * [StreamingContext](api/scala/index.html#org.apache.spark.streaming.StreamingContext) and
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index cfee7be1e3f07..84fde0bbf9268 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -505,12 +505,11 @@ object DataSource {
                   provider1 == "com.databricks.spark.avro") {
                   throw new AnalysisException(
                     s"Failed to find data source: ${provider1.toLowerCase}. Please find an Avro " +
-                      "package at " +
-                      "https://cwiki.apache.org/confluence/display/SPARK/Third+Party+Projects")
+                      "package at http://spark.apache.org/third-party-projects.html")
                 } else {
                   throw new ClassNotFoundException(
                     s"Failed to find data source: $provider1. Please find packages at " +
-                      "https://cwiki.apache.org/confluence/display/SPARK/Third+Party+Projects",
+                      "http://spark.apache.org/third-party-projects.html",
                     error)
                 }
             }

From ebeb051405b84cb4abafbb6929ddcfadf59672db Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 23 Nov 2016 04:15:19 -0800
Subject: [PATCH 0183/1204] [SPARK-18053][SQL] compare unsafe and safe
 complex-type values correctly

## What changes were proposed in this pull request?

In Spark SQL, some expression may output safe format values, e.g. `CreateArray`, `CreateStruct`, `Cast`, etc. When we compare 2 values, we should be able to compare safe and unsafe formats.

The `GreaterThan`, `LessThan`, etc. in Spark SQL already handles it, but the `EqualTo` doesn't. This PR fixes it.

## How was this patch tested?

new unit test and regression test

Author: Wenchen Fan <wenchen@databricks.com>

Closes #15929 from cloud-fan/type-aware.

(cherry picked from commit 84284e8c82542d80dad94e458a0c0210bf803db3)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../sql/catalyst/expressions/UnsafeRow.java   |  6 +---
 .../expressions/codegen/CodeGenerator.scala   | 20 ++++++++++--
 .../sql/catalyst/expressions/predicates.scala | 32 +++----------------
 .../catalyst/expressions/PredicateSuite.scala | 29 +++++++++++++++++
 .../org/apache/spark/sql/SQLQuerySuite.scala  |  7 ++++
 5 files changed, 59 insertions(+), 35 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
index c3f0abac244cf..d205547698c5b 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
@@ -578,12 +578,8 @@ public boolean equals(Object other) {
       return (sizeInBytes == o.sizeInBytes) &&
         ByteArrayMethods.arrayEquals(baseObject, baseOffset, o.baseObject, o.baseOffset,
           sizeInBytes);
-    } else if (!(other instanceof InternalRow)) {
-      return false;
-    } else {
-      throw new IllegalArgumentException(
-        "Cannot compare UnsafeRow to " + other.getClass().getName());
     }
+    return false;
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 9c3c6d3b2a7f2..09007b7c89fe3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -481,8 +481,13 @@ class CodegenContext {
     case FloatType => s"(java.lang.Float.isNaN($c1) && java.lang.Float.isNaN($c2)) || $c1 == $c2"
     case DoubleType => s"(java.lang.Double.isNaN($c1) && java.lang.Double.isNaN($c2)) || $c1 == $c2"
     case dt: DataType if isPrimitiveType(dt) => s"$c1 == $c2"
+    case dt: DataType if dt.isInstanceOf[AtomicType] => s"$c1.equals($c2)"
+    case array: ArrayType => genComp(array, c1, c2) + " == 0"
+    case struct: StructType => genComp(struct, c1, c2) + " == 0"
     case udt: UserDefinedType[_] => genEqual(udt.sqlType, c1, c2)
-    case other => s"$c1.equals($c2)"
+    case _ =>
+      throw new IllegalArgumentException(
+        "cannot generate equality code for un-comparable type: " + dataType.simpleString)
   }
 
   /**
@@ -512,6 +517,11 @@ class CodegenContext {
       val funcCode: String =
         s"""
           public int $compareFunc(ArrayData a, ArrayData b) {
+            // when comparing unsafe arrays, try equals first as it compares the binary directly
+            // which is very fast.
+            if (a instanceof UnsafeArrayData && b instanceof UnsafeArrayData && a.equals(b)) {
+              return 0;
+            }
             int lengthA = a.numElements();
             int lengthB = b.numElements();
             int $minLength = (lengthA > lengthB) ? lengthB : lengthA;
@@ -551,6 +561,11 @@ class CodegenContext {
       val funcCode: String =
         s"""
           public int $compareFunc(InternalRow a, InternalRow b) {
+            // when comparing unsafe rows, try equals first as it compares the binary directly
+            // which is very fast.
+            if (a instanceof UnsafeRow && b instanceof UnsafeRow && a.equals(b)) {
+              return 0;
+            }
             InternalRow i = null;
             $comparisons
             return 0;
@@ -561,7 +576,8 @@ class CodegenContext {
     case other if other.isInstanceOf[AtomicType] => s"$c1.compare($c2)"
     case udt: UserDefinedType[_] => genComp(udt.sqlType, c1, c2)
     case _ =>
-      throw new IllegalArgumentException("cannot generate compare code for un-comparable type")
+      throw new IllegalArgumentException(
+        "cannot generate compare code for un-comparable type: " + dataType.simpleString)
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 2ad452b6a90ca..3fcbb05372d87 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -388,6 +388,8 @@ abstract class BinaryComparison extends BinaryOperator with Predicate {
       defineCodeGen(ctx, ev, (c1, c2) => s"${ctx.genComp(left.dataType, c1, c2)} $symbol 0")
     }
   }
+
+  protected lazy val ordering = TypeUtils.getInterpretedOrdering(left.dataType)
 }
 
 
@@ -429,17 +431,7 @@ case class EqualTo(left: Expression, right: Expression)
 
   override def symbol: String = "="
 
-  protected override def nullSafeEval(input1: Any, input2: Any): Any = {
-    if (left.dataType == FloatType) {
-      Utils.nanSafeCompareFloats(input1.asInstanceOf[Float], input2.asInstanceOf[Float]) == 0
-    } else if (left.dataType == DoubleType) {
-      Utils.nanSafeCompareDoubles(input1.asInstanceOf[Double], input2.asInstanceOf[Double]) == 0
-    } else if (left.dataType != BinaryType) {
-      input1 == input2
-    } else {
-      java.util.Arrays.equals(input1.asInstanceOf[Array[Byte]], input2.asInstanceOf[Array[Byte]])
-    }
-  }
+  protected override def nullSafeEval(left: Any, right: Any): Any = ordering.equiv(left, right)
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     defineCodeGen(ctx, ev, (c1, c2) => ctx.genEqual(left.dataType, c1, c2))
@@ -482,15 +474,7 @@ case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComp
     } else if (input1 == null || input2 == null) {
       false
     } else {
-      if (left.dataType == FloatType) {
-        Utils.nanSafeCompareFloats(input1.asInstanceOf[Float], input2.asInstanceOf[Float]) == 0
-      } else if (left.dataType == DoubleType) {
-        Utils.nanSafeCompareDoubles(input1.asInstanceOf[Double], input2.asInstanceOf[Double]) == 0
-      } else if (left.dataType != BinaryType) {
-        input1 == input2
-      } else {
-        java.util.Arrays.equals(input1.asInstanceOf[Array[Byte]], input2.asInstanceOf[Array[Byte]])
-      }
+      ordering.equiv(input1, input2)
     }
   }
 
@@ -513,8 +497,6 @@ case class LessThan(left: Expression, right: Expression)
 
   override def symbol: String = "<"
 
-  private lazy val ordering = TypeUtils.getInterpretedOrdering(left.dataType)
-
   protected override def nullSafeEval(input1: Any, input2: Any): Any = ordering.lt(input1, input2)
 }
 
@@ -527,8 +509,6 @@ case class LessThanOrEqual(left: Expression, right: Expression)
 
   override def symbol: String = "<="
 
-  private lazy val ordering = TypeUtils.getInterpretedOrdering(left.dataType)
-
   protected override def nullSafeEval(input1: Any, input2: Any): Any = ordering.lteq(input1, input2)
 }
 
@@ -541,8 +521,6 @@ case class GreaterThan(left: Expression, right: Expression)
 
   override def symbol: String = ">"
 
-  private lazy val ordering = TypeUtils.getInterpretedOrdering(left.dataType)
-
   protected override def nullSafeEval(input1: Any, input2: Any): Any = ordering.gt(input1, input2)
 }
 
@@ -555,7 +533,5 @@ case class GreaterThanOrEqual(left: Expression, right: Expression)
 
   override def symbol: String = ">="
 
-  private lazy val ordering = TypeUtils.getInterpretedOrdering(left.dataType)
-
   protected override def nullSafeEval(input1: Any, input2: Any): Any = ordering.gteq(input1, input2)
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
index 2a445b8cdb091..f9f6799e6e72f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
@@ -21,6 +21,8 @@ import scala.collection.immutable.HashSet
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.RandomDataGenerator
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.util.GenericArrayData
 import org.apache.spark.sql.types._
 
 
@@ -293,4 +295,31 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(EqualNullSafe(nullInt, normalInt), false)
     checkEvaluation(EqualNullSafe(nullInt, nullInt), true)
   }
+
+  test("EqualTo on complex type") {
+    val array = new GenericArrayData(Array(1, 2, 3))
+    val struct = create_row("a", 1L, array)
+
+    val arrayType = ArrayType(IntegerType)
+    val structType = new StructType()
+      .add("1", StringType)
+      .add("2", LongType)
+      .add("3", ArrayType(IntegerType))
+
+    val projection = UnsafeProjection.create(
+      new StructType().add("array", arrayType).add("struct", structType))
+
+    val unsafeRow = projection(InternalRow(array, struct))
+
+    val unsafeArray = unsafeRow.getArray(0)
+    val unsafeStruct = unsafeRow.getStruct(1, 3)
+
+    checkEvaluation(EqualTo(
+      Literal.create(array, arrayType),
+      Literal.create(unsafeArray, arrayType)), true)
+
+    checkEvaluation(EqualTo(
+      Literal.create(struct, structType),
+      Literal.create(unsafeStruct, structType)), true)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 6b517bc70f7d2..806381008aba6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -2476,4 +2476,11 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
       }
     }
   }
+
+  test("SPARK-18053: ARRAY equality is broken") {
+    withTable("array_tbl") {
+      spark.range(10).select(array($"id").as("arr")).write.saveAsTable("array_tbl")
+      assert(sql("SELECT * FROM array_tbl where arr = ARRAY(1L)").count == 1)
+    }
+  }
 }

From 539c193af7e3e08e9b48df15e94eafcc3532105c Mon Sep 17 00:00:00 2001
From: Eric Liang <ekl@databricks.com>
Date: Wed, 23 Nov 2016 20:14:08 +0800
Subject: [PATCH 0184/1204] [SPARK-18545][SQL] Verify number of hive client
 RPCs in PartitionedTablePerfStatsSuite

## What changes were proposed in this pull request?

This would help catch accidental O(n) calls to the hive client as in https://issues.apache.org/jira/browse/SPARK-18507

## How was this patch tested?

Checked that the test fails before https://issues.apache.org/jira/browse/SPARK-18507 was patched. cc cloud-fan

Author: Eric Liang <ekl@databricks.com>

Closes #15985 from ericl/spark-18545.

(cherry picked from commit 85235ed6c600270e3fa434738bd50dce3564440a)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/metrics/source/StaticSources.scala  |  7 +++
 .../sql/hive/client/HiveClientImpl.scala      |  1 +
 .../hive/PartitionedTablePerfStatsSuite.scala | 58 ++++++++++++++++++-
 3 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/metrics/source/StaticSources.scala b/core/src/main/scala/org/apache/spark/metrics/source/StaticSources.scala
index 3f7cfd9d2c11f..b433cd0a89ac9 100644
--- a/core/src/main/scala/org/apache/spark/metrics/source/StaticSources.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/source/StaticSources.scala
@@ -85,6 +85,11 @@ object HiveCatalogMetrics extends Source {
    */
   val METRIC_FILE_CACHE_HITS = metricRegistry.counter(MetricRegistry.name("fileCacheHits"))
 
+  /**
+   * Tracks the total number of Hive client calls (e.g. to lookup a table).
+   */
+  val METRIC_HIVE_CLIENT_CALLS = metricRegistry.counter(MetricRegistry.name("hiveClientCalls"))
+
   /**
    * Resets the values of all metrics to zero. This is useful in tests.
    */
@@ -92,10 +97,12 @@ object HiveCatalogMetrics extends Source {
     METRIC_PARTITIONS_FETCHED.dec(METRIC_PARTITIONS_FETCHED.getCount())
     METRIC_FILES_DISCOVERED.dec(METRIC_FILES_DISCOVERED.getCount())
     METRIC_FILE_CACHE_HITS.dec(METRIC_FILE_CACHE_HITS.getCount())
+    METRIC_HIVE_CLIENT_CALLS.dec(METRIC_HIVE_CLIENT_CALLS.getCount())
   }
 
   // clients can use these to avoid classloader issues with the codahale classes
   def incrementFetchedPartitions(n: Int): Unit = METRIC_PARTITIONS_FETCHED.inc(n)
   def incrementFilesDiscovered(n: Int): Unit = METRIC_FILES_DISCOVERED.inc(n)
   def incrementFileCacheHits(n: Int): Unit = METRIC_FILE_CACHE_HITS.inc(n)
+  def incrementHiveClientCalls(n: Int): Unit = METRIC_HIVE_CLIENT_CALLS.inc(n)
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index daae8523c6366..68dcfd86731bd 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -281,6 +281,7 @@ private[hive] class HiveClientImpl(
     shim.setCurrentSessionState(state)
     val ret = try f finally {
       Thread.currentThread().setContextClassLoader(original)
+      HiveCatalogMetrics.incrementHiveClientCalls(1)
     }
     ret
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
index b41bc862e9bc5..9838b9a4eba3d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
@@ -57,7 +57,11 @@ class PartitionedTablePerfStatsSuite
   }
 
   private def setupPartitionedHiveTable(tableName: String, dir: File): Unit = {
-    spark.range(5).selectExpr("id as fieldOne", "id as partCol1", "id as partCol2").write
+    setupPartitionedHiveTable(tableName, dir, 5)
+  }
+
+  private def setupPartitionedHiveTable(tableName: String, dir: File, scale: Int): Unit = {
+    spark.range(scale).selectExpr("id as fieldOne", "id as partCol1", "id as partCol2").write
       .partitionBy("partCol1", "partCol2")
       .mode("overwrite")
       .parquet(dir.getAbsolutePath)
@@ -71,7 +75,11 @@ class PartitionedTablePerfStatsSuite
   }
 
   private def setupPartitionedDatasourceTable(tableName: String, dir: File): Unit = {
-    spark.range(5).selectExpr("id as fieldOne", "id as partCol1", "id as partCol2").write
+    setupPartitionedDatasourceTable(tableName, dir, 5)
+  }
+
+  private def setupPartitionedDatasourceTable(tableName: String, dir: File, scale: Int): Unit = {
+    spark.range(scale).selectExpr("id as fieldOne", "id as partCol1", "id as partCol2").write
       .partitionBy("partCol1", "partCol2")
       .mode("overwrite")
       .parquet(dir.getAbsolutePath)
@@ -242,6 +250,52 @@ class PartitionedTablePerfStatsSuite
     }
   }
 
+  test("hive table: num hive client calls does not scale with partition count") {
+    withSQLConf(SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key -> "true") {
+      withTable("test") {
+        withTempDir { dir =>
+          setupPartitionedHiveTable("test", dir, scale = 100)
+
+          HiveCatalogMetrics.reset()
+          assert(spark.sql("select * from test where partCol1 = 1").count() == 1)
+          assert(HiveCatalogMetrics.METRIC_HIVE_CLIENT_CALLS.getCount() > 0)
+          assert(HiveCatalogMetrics.METRIC_HIVE_CLIENT_CALLS.getCount() < 10)
+
+          HiveCatalogMetrics.reset()
+          assert(spark.sql("select * from test").count() == 100)
+          assert(HiveCatalogMetrics.METRIC_HIVE_CLIENT_CALLS.getCount() < 10)
+
+          HiveCatalogMetrics.reset()
+          assert(spark.sql("show partitions test").count() == 100)
+          assert(HiveCatalogMetrics.METRIC_HIVE_CLIENT_CALLS.getCount() < 10)
+        }
+      }
+    }
+  }
+
+  test("datasource table: num hive client calls does not scale with partition count") {
+    withSQLConf(SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key -> "true") {
+      withTable("test") {
+        withTempDir { dir =>
+          setupPartitionedDatasourceTable("test", dir, scale = 100)
+
+          HiveCatalogMetrics.reset()
+          assert(spark.sql("select * from test where partCol1 = 1").count() == 1)
+          assert(HiveCatalogMetrics.METRIC_HIVE_CLIENT_CALLS.getCount() > 0)
+          assert(HiveCatalogMetrics.METRIC_HIVE_CLIENT_CALLS.getCount() < 10)
+
+          HiveCatalogMetrics.reset()
+          assert(spark.sql("select * from test").count() == 100)
+          assert(HiveCatalogMetrics.METRIC_HIVE_CLIENT_CALLS.getCount() < 10)
+
+          HiveCatalogMetrics.reset()
+          assert(spark.sql("show partitions test").count() == 100)
+          assert(HiveCatalogMetrics.METRIC_HIVE_CLIENT_CALLS.getCount() < 10)
+        }
+      }
+    }
+  }
+
   test("hive table: files read and cached when filesource partition management is off") {
     withSQLConf(SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key -> "false") {
       withTable("test") {

From e11d7c6874debfbbe44be4a2b0983d6b6763fff8 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Wed, 23 Nov 2016 04:22:26 -0800
Subject: [PATCH 0185/1204] [SPARK-18557] Downgrade confusing memory leak
 warning message

## What changes were proposed in this pull request?
TaskMemoryManager has a memory leak detector that gets called at task completion callback and checks whether any memory has not been released. If they are not released by the time the callback is invoked, TaskMemoryManager releases them.

The current error message says something like the following:
```
WARN  [Executor task launch worker-0]
org.apache.spark.memory.TaskMemoryManager - leak 16.3 MB memory from
org.apache.spark.unsafe.map.BytesToBytesMap33fb6a15
In practice, there are multiple reasons why these can be triggered in the normal code path (e.g. limit, or task failures), and the fact that these messages are log means the "leak" is fixed by TaskMemoryManager.
```

To not confuse users, this patch downgrade the message from warning to debug level, and avoids using the word "leak" since it is not actually a leak.

## How was this patch tested?
N/A - this is a simple logging improvement.

Author: Reynold Xin <rxin@databricks.com>

Closes #15989 from rxin/SPARK-18557.

(cherry picked from commit 9785ed40d7fe4e1fcd440e55706519c6e5f8d6b1)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../main/java/org/apache/spark/memory/TaskMemoryManager.java  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java b/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
index 1a700aa37554e..c40974b54cb47 100644
--- a/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
+++ b/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
@@ -378,14 +378,14 @@ public long cleanUpAllAllocatedMemory() {
       for (MemoryConsumer c: consumers) {
         if (c != null && c.getUsed() > 0) {
           // In case of failed task, it's normal to see leaked memory
-          logger.warn("leak " + Utils.bytesToString(c.getUsed()) + " memory from " + c);
+          logger.debug("unreleased " + Utils.bytesToString(c.getUsed()) + " memory from " + c);
         }
       }
       consumers.clear();
 
       for (MemoryBlock page : pageTable) {
         if (page != null) {
-          logger.warn("leak a page: " + page + " in task " + taskAttemptId);
+          logger.debug("unreleased page: " + page + " in task " + taskAttemptId);
           memoryManager.tungstenMemoryAllocator().free(page);
         }
       }

From 599dac1594ed52934dd483e12d2e39d514793dd9 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Wed, 23 Nov 2016 20:48:41 +0800
Subject: [PATCH 0186/1204] [SPARK-18522][SQL] Explicit contract for column
 stats serialization

## What changes were proposed in this pull request?
The current implementation of column stats uses the base64 encoding of the internal UnsafeRow format to persist statistics (in table properties in Hive metastore). This is an internal format that is not stable across different versions of Spark and should NOT be used for persistence. In addition, it would be better if statistics stored in the catalog is human readable.

This pull request introduces the following changes:

1. Created a single ColumnStat class to for all data types. All data types track the same set of statistics.
2. Updated the implementation for stats collection to get rid of the dependency on internal data structures (e.g. InternalRow, or storing DateType as an int32). For example, previously dates were stored as a single integer, but are now stored as java.sql.Date. When we implement the next steps of CBO, we can add code to convert those back into internal types again.
3. Documented clearly what JVM data types are being used to store what data.
4. Defined a simple Map[String, String] interface for serializing and deserializing column stats into/from the catalog.
5. Rearranged the method/function structure so it is more clear what the supported data types are, and also moved how stats are generated into ColumnStat class so they are easy to find.

## How was this patch tested?
Removed most of the original test cases created for column statistics, and added three very simple ones to cover all the cases. The three test cases validate:
1. Roundtrip serialization works.
2. Behavior when analyzing non-existent column or unsupported data type column.
3. Result for stats collection for all valid data types.

Also moved parser related tests into a parser test suite and added an explicit serialization test for the Hive external catalog.

Author: Reynold Xin <rxin@databricks.com>

Closes #15959 from rxin/SPARK-18522.

(cherry picked from commit 70ad07a9d20586ae182c4e60ed97bdddbcbceff3)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/plans/logical/Statistics.scala   | 212 ++++++++---
 .../command/AnalyzeColumnCommand.scala        | 105 +-----
 .../spark/sql/StatisticsCollectionSuite.scala | 218 ++++++++++++
 .../spark/sql/StatisticsColumnSuite.scala     | 334 ------------------
 .../apache/spark/sql/StatisticsSuite.scala    |  92 -----
 .../org/apache/spark/sql/StatisticsTest.scala | 130 -------
 .../sql/execution/SparkSqlParserSuite.scala   |  26 +-
 .../spark/sql/hive/HiveExternalCatalog.scala  |  93 +++--
 .../spark/sql/hive/StatisticsSuite.scala      | 299 ++++++----------
 9 files changed, 591 insertions(+), 918 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
 delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/StatisticsColumnSuite.scala
 delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/StatisticsSuite.scala
 delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/StatisticsTest.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
index f3e2147b8f974..79865609cb647 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
@@ -17,12 +17,15 @@
 
 package org.apache.spark.sql.catalyst.plans.logical
 
-import org.apache.commons.codec.binary.Base64
+import scala.util.control.NonFatal
 
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.UnsafeRow
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.types._
 
+
 /**
  * Estimates of various statistics.  The default estimation logic simply lazily multiplies the
  * corresponding statistic produced by the children.  To override this behavior, override
@@ -58,60 +61,175 @@ case class Statistics(
   }
 }
 
+
 /**
- * Statistics for a column.
+ * Statistics collected for a column.
+ *
+ * 1. Supported data types are defined in `ColumnStat.supportsType`.
+ * 2. The JVM data type stored in min/max is the external data type (used in Row) for the
+ * corresponding Catalyst data type. For example, for DateType we store java.sql.Date, and for
+ * TimestampType we store java.sql.Timestamp.
+ * 3. For integral types, they are all upcasted to longs, i.e. shorts are stored as longs.
+ * 4. There is no guarantee that the statistics collected are accurate. Approximation algorithms
+ *    (sketches) might have been used, and the data collected can also be stale.
+ *
+ * @param distinctCount number of distinct values
+ * @param min minimum value
+ * @param max maximum value
+ * @param nullCount number of nulls
+ * @param avgLen average length of the values. For fixed-length types, this should be a constant.
+ * @param maxLen maximum length of the values. For fixed-length types, this should be a constant.
  */
-case class ColumnStat(statRow: InternalRow) {
+case class ColumnStat(
+    distinctCount: BigInt,
+    min: Option[Any],
+    max: Option[Any],
+    nullCount: BigInt,
+    avgLen: Long,
+    maxLen: Long) {
 
-  def forNumeric[T <: AtomicType](dataType: T): NumericColumnStat[T] = {
-    NumericColumnStat(statRow, dataType)
-  }
-  def forString: StringColumnStat = StringColumnStat(statRow)
-  def forBinary: BinaryColumnStat = BinaryColumnStat(statRow)
-  def forBoolean: BooleanColumnStat = BooleanColumnStat(statRow)
+  // We currently don't store min/max for binary/string type. This can change in the future and
+  // then we need to remove this require.
+  require(min.isEmpty || (!min.get.isInstanceOf[Array[Byte]] && !min.get.isInstanceOf[String]))
+  require(max.isEmpty || (!max.get.isInstanceOf[Array[Byte]] && !max.get.isInstanceOf[String]))
 
-  override def toString: String = {
-    // use Base64 for encoding
-    Base64.encodeBase64String(statRow.asInstanceOf[UnsafeRow].getBytes)
+  /**
+   * Returns a map from string to string that can be used to serialize the column stats.
+   * The key is the name of the field (e.g. "distinctCount" or "min"), and the value is the string
+   * representation for the value. The deserialization side is defined in [[ColumnStat.fromMap]].
+   *
+   * As part of the protocol, the returned map always contains a key called "version".
+   * In the case min/max values are null (None), they won't appear in the map.
+   */
+  def toMap: Map[String, String] = {
+    val map = new scala.collection.mutable.HashMap[String, String]
+    map.put(ColumnStat.KEY_VERSION, "1")
+    map.put(ColumnStat.KEY_DISTINCT_COUNT, distinctCount.toString)
+    map.put(ColumnStat.KEY_NULL_COUNT, nullCount.toString)
+    map.put(ColumnStat.KEY_AVG_LEN, avgLen.toString)
+    map.put(ColumnStat.KEY_MAX_LEN, maxLen.toString)
+    min.foreach { v => map.put(ColumnStat.KEY_MIN_VALUE, v.toString) }
+    max.foreach { v => map.put(ColumnStat.KEY_MAX_VALUE, v.toString) }
+    map.toMap
   }
 }
 
-object ColumnStat {
-  def apply(numFields: Int, str: String): ColumnStat = {
-    // use Base64 for decoding
-    val bytes = Base64.decodeBase64(str)
-    val unsafeRow = new UnsafeRow(numFields)
-    unsafeRow.pointTo(bytes, bytes.length)
-    ColumnStat(unsafeRow)
+
+object ColumnStat extends Logging {
+
+  // List of string keys used to serialize ColumnStat
+  val KEY_VERSION = "version"
+  private val KEY_DISTINCT_COUNT = "distinctCount"
+  private val KEY_MIN_VALUE = "min"
+  private val KEY_MAX_VALUE = "max"
+  private val KEY_NULL_COUNT = "nullCount"
+  private val KEY_AVG_LEN = "avgLen"
+  private val KEY_MAX_LEN = "maxLen"
+
+  /** Returns true iff the we support gathering column statistics on column of the given type. */
+  def supportsType(dataType: DataType): Boolean = dataType match {
+    case _: IntegralType => true
+    case _: DecimalType => true
+    case DoubleType | FloatType => true
+    case BooleanType => true
+    case DateType => true
+    case TimestampType => true
+    case BinaryType | StringType => true
+    case _ => false
   }
-}
 
-case class NumericColumnStat[T <: AtomicType](statRow: InternalRow, dataType: T) {
-  // The indices here must be consistent with `ColumnStatStruct.numericColumnStat`.
-  val numNulls: Long = statRow.getLong(0)
-  val max: T#InternalType = statRow.get(1, dataType).asInstanceOf[T#InternalType]
-  val min: T#InternalType = statRow.get(2, dataType).asInstanceOf[T#InternalType]
-  val ndv: Long = statRow.getLong(3)
-}
+  /**
+   * Creates a [[ColumnStat]] object from the given map. This is used to deserialize column stats
+   * from some external storage. The serialization side is defined in [[ColumnStat.toMap]].
+   */
+  def fromMap(table: String, field: StructField, map: Map[String, String])
+    : Option[ColumnStat] = {
+    val str2val: (String => Any) = field.dataType match {
+      case _: IntegralType => _.toLong
+      case _: DecimalType => new java.math.BigDecimal(_)
+      case DoubleType | FloatType => _.toDouble
+      case BooleanType => _.toBoolean
+      case DateType => java.sql.Date.valueOf
+      case TimestampType => java.sql.Timestamp.valueOf
+      // This version of Spark does not use min/max for binary/string types so we ignore it.
+      case BinaryType | StringType => _ => null
+      case _ =>
+        throw new AnalysisException("Column statistics deserialization is not supported for " +
+          s"column ${field.name} of data type: ${field.dataType}.")
+    }
 
-case class StringColumnStat(statRow: InternalRow) {
-  // The indices here must be consistent with `ColumnStatStruct.stringColumnStat`.
-  val numNulls: Long = statRow.getLong(0)
-  val avgColLen: Double = statRow.getDouble(1)
-  val maxColLen: Long = statRow.getInt(2)
-  val ndv: Long = statRow.getLong(3)
-}
+    try {
+      Some(ColumnStat(
+        distinctCount = BigInt(map(KEY_DISTINCT_COUNT).toLong),
+        // Note that flatMap(Option.apply) turns Option(null) into None.
+        min = map.get(KEY_MIN_VALUE).map(str2val).flatMap(Option.apply),
+        max = map.get(KEY_MAX_VALUE).map(str2val).flatMap(Option.apply),
+        nullCount = BigInt(map(KEY_NULL_COUNT).toLong),
+        avgLen = map.getOrElse(KEY_AVG_LEN, field.dataType.defaultSize.toString).toLong,
+        maxLen = map.getOrElse(KEY_MAX_LEN, field.dataType.defaultSize.toString).toLong
+      ))
+    } catch {
+      case NonFatal(e) =>
+        logWarning(s"Failed to parse column statistics for column ${field.name} in table $table", e)
+        None
+    }
+  }
 
-case class BinaryColumnStat(statRow: InternalRow) {
-  // The indices here must be consistent with `ColumnStatStruct.binaryColumnStat`.
-  val numNulls: Long = statRow.getLong(0)
-  val avgColLen: Double = statRow.getDouble(1)
-  val maxColLen: Long = statRow.getInt(2)
-}
+  /**
+   * Constructs an expression to compute column statistics for a given column.
+   *
+   * The expression should create a single struct column with the following schema:
+   * distinctCount: Long, min: T, max: T, nullCount: Long, avgLen: Long, maxLen: Long
+   *
+   * Together with [[rowToColumnStat]], this function is used to create [[ColumnStat]] and
+   * as a result should stay in sync with it.
+   */
+  def statExprs(col: Attribute, relativeSD: Double): CreateNamedStruct = {
+    def struct(exprs: Expression*): CreateNamedStruct = CreateStruct(exprs.map { expr =>
+      expr.transformUp { case af: AggregateFunction => af.toAggregateExpression() }
+    })
+    val one = Literal(1, LongType)
+
+    // the approximate ndv (num distinct value) should never be larger than the number of rows
+    val numNonNulls = if (col.nullable) Count(col) else Count(one)
+    val ndv = Least(Seq(HyperLogLogPlusPlus(col, relativeSD), numNonNulls))
+    val numNulls = Subtract(Count(one), numNonNulls)
+
+    def fixedLenTypeStruct(castType: DataType) = {
+      // For fixed width types, avg size should be the same as max size.
+      val avgSize = Literal(col.dataType.defaultSize, LongType)
+      struct(ndv, Cast(Min(col), castType), Cast(Max(col), castType), numNulls, avgSize, avgSize)
+    }
+
+    col.dataType match {
+      case _: IntegralType => fixedLenTypeStruct(LongType)
+      case _: DecimalType => fixedLenTypeStruct(col.dataType)
+      case DoubleType | FloatType => fixedLenTypeStruct(DoubleType)
+      case BooleanType => fixedLenTypeStruct(col.dataType)
+      case DateType => fixedLenTypeStruct(col.dataType)
+      case TimestampType => fixedLenTypeStruct(col.dataType)
+      case BinaryType | StringType =>
+        // For string and binary type, we don't store min/max.
+        val nullLit = Literal(null, col.dataType)
+        struct(
+          ndv, nullLit, nullLit, numNulls,
+          Ceil(Average(Length(col))), Cast(Max(Length(col)), LongType))
+      case _ =>
+        throw new AnalysisException("Analyzing column statistics is not supported for column " +
+            s"${col.name} of data type: ${col.dataType}.")
+    }
+  }
+
+  /** Convert a struct for column stats (defined in statExprs) into [[ColumnStat]]. */
+  def rowToColumnStat(row: Row): ColumnStat = {
+    ColumnStat(
+      distinctCount = BigInt(row.getLong(0)),
+      min = Option(row.get(1)),  // for string/binary min/max, get should return null
+      max = Option(row.get(2)),
+      nullCount = BigInt(row.getLong(3)),
+      avgLen = row.getLong(4),
+      maxLen = row.getLong(5)
+    )
+  }
 
-case class BooleanColumnStat(statRow: InternalRow) {
-  // The indices here must be consistent with `ColumnStatStruct.booleanColumnStat`.
-  val numNulls: Long = statRow.getLong(0)
-  val numTrues: Long = statRow.getLong(1)
-  val numFalses: Long = statRow.getLong(2)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
index 7fc57d09e9243..9dffe3614a87c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
@@ -24,9 +24,8 @@ import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
 import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, CatalogTable}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
-import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, ColumnStat, LogicalPlan, Statistics}
+import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.execution.datasources.LogicalRelation
-import org.apache.spark.sql.types._
 
 
 /**
@@ -62,7 +61,7 @@ case class AnalyzeColumnCommand(
 
     // Compute stats for each column
     val (rowCount, newColStats) =
-      AnalyzeColumnCommand.computeColStats(sparkSession, relation, columnNames)
+      AnalyzeColumnCommand.computeColumnStats(sparkSession, tableIdent.table, relation, columnNames)
 
     // We also update table-level stats in order to keep them consistent with column-level stats.
     val statistics = Statistics(
@@ -88,8 +87,9 @@ object AnalyzeColumnCommand extends Logging {
    *
    * This is visible for testing.
    */
-  def computeColStats(
+  def computeColumnStats(
       sparkSession: SparkSession,
+      tableName: String,
       relation: LogicalPlan,
       columnNames: Seq[String]): (Long, Map[String, ColumnStat]) = {
 
@@ -97,102 +97,33 @@ object AnalyzeColumnCommand extends Logging {
     val resolver = sparkSession.sessionState.conf.resolver
     val attributesToAnalyze = AttributeSet(columnNames.map { col =>
       val exprOption = relation.output.find(attr => resolver(attr.name, col))
-      exprOption.getOrElse(throw new AnalysisException(s"Invalid column name: $col."))
+      exprOption.getOrElse(throw new AnalysisException(s"Column $col does not exist."))
     }).toSeq
 
+    // Make sure the column types are supported for stats gathering.
+    attributesToAnalyze.foreach { attr =>
+      if (!ColumnStat.supportsType(attr.dataType)) {
+        throw new AnalysisException(
+          s"Column ${attr.name} in table $tableName is of type ${attr.dataType}, " +
+            "and Spark does not support statistics collection on this column type.")
+      }
+    }
+
     // Collect statistics per column.
     // The first element in the result will be the overall row count, the following elements
     // will be structs containing all column stats.
     // The layout of each struct follows the layout of the ColumnStats.
     val ndvMaxErr = sparkSession.sessionState.conf.ndvMaxError
     val expressions = Count(Literal(1)).toAggregateExpression() +:
-      attributesToAnalyze.map(AnalyzeColumnCommand.createColumnStatStruct(_, ndvMaxErr))
+        attributesToAnalyze.map(ColumnStat.statExprs(_, ndvMaxErr))
+
     val namedExpressions = expressions.map(e => Alias(e, e.toString)())
-    val statsRow = Dataset.ofRows(sparkSession, Aggregate(Nil, namedExpressions, relation))
-      .queryExecution.toRdd.collect().head
+    val statsRow = Dataset.ofRows(sparkSession, Aggregate(Nil, namedExpressions, relation)).head()
 
-    // unwrap the result
-    // TODO: Get rid of numFields by using the public Dataset API.
     val rowCount = statsRow.getLong(0)
     val columnStats = attributesToAnalyze.zipWithIndex.map { case (expr, i) =>
-      val numFields = AnalyzeColumnCommand.numStatFields(expr.dataType)
-      (expr.name, ColumnStat(statsRow.getStruct(i + 1, numFields)))
+      (expr.name, ColumnStat.rowToColumnStat(statsRow.getStruct(i + 1)))
     }.toMap
     (rowCount, columnStats)
   }
-
-  private val zero = Literal(0, LongType)
-  private val one = Literal(1, LongType)
-
-  private def numNulls(e: Expression): Expression = {
-    if (e.nullable) Sum(If(IsNull(e), one, zero)) else zero
-  }
-  private def max(e: Expression): Expression = Max(e)
-  private def min(e: Expression): Expression = Min(e)
-  private def ndv(e: Expression, relativeSD: Double): Expression = {
-    // the approximate ndv should never be larger than the number of rows
-    Least(Seq(HyperLogLogPlusPlus(e, relativeSD), Count(one)))
-  }
-  private def avgLength(e: Expression): Expression = Average(Length(e))
-  private def maxLength(e: Expression): Expression = Max(Length(e))
-  private def numTrues(e: Expression): Expression = Sum(If(e, one, zero))
-  private def numFalses(e: Expression): Expression = Sum(If(Not(e), one, zero))
-
-  /**
-   * Creates a struct that groups the sequence of expressions together. This is used to create
-   * one top level struct per column.
-   */
-  private def createStruct(exprs: Seq[Expression]): CreateNamedStruct = {
-    CreateStruct(exprs.map { expr: Expression =>
-      expr.transformUp {
-        case af: AggregateFunction => af.toAggregateExpression()
-      }
-    })
-  }
-
-  private def numericColumnStat(e: Expression, relativeSD: Double): Seq[Expression] = {
-    Seq(numNulls(e), max(e), min(e), ndv(e, relativeSD))
-  }
-
-  private def stringColumnStat(e: Expression, relativeSD: Double): Seq[Expression] = {
-    Seq(numNulls(e), avgLength(e), maxLength(e), ndv(e, relativeSD))
-  }
-
-  private def binaryColumnStat(e: Expression): Seq[Expression] = {
-    Seq(numNulls(e), avgLength(e), maxLength(e))
-  }
-
-  private def booleanColumnStat(e: Expression): Seq[Expression] = {
-    Seq(numNulls(e), numTrues(e), numFalses(e))
-  }
-
-  // TODO(rxin): Get rid of this function.
-  def numStatFields(dataType: DataType): Int = {
-    dataType match {
-      case BinaryType | BooleanType => 3
-      case _ => 4
-    }
-  }
-
-  /**
-   * Creates a struct expression that contains the statistics to collect for a column.
-   *
-   * @param attr column to collect statistics
-   * @param relativeSD relative error for approximate number of distinct values.
-   */
-  def createColumnStatStruct(attr: Attribute, relativeSD: Double): CreateNamedStruct = {
-    attr.dataType match {
-      case _: NumericType | TimestampType | DateType =>
-        createStruct(numericColumnStat(attr, relativeSD))
-      case StringType =>
-        createStruct(stringColumnStat(attr, relativeSD))
-      case BinaryType =>
-        createStruct(binaryColumnStat(attr))
-      case BooleanType =>
-        createStruct(booleanColumnStat(attr))
-      case otherType =>
-        throw new AnalysisException("Analyzing columns is not supported for column " +
-            s"${attr.name} of data type: ${attr.dataType}.")
-    }
-  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
new file mode 100644
index 0000000000000..1fcccd061079e
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -0,0 +1,218 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.{lang => jl}
+import java.sql.{Date, Timestamp}
+
+import scala.collection.mutable
+
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.execution.datasources.LogicalRelation
+import org.apache.spark.sql.test.{SharedSQLContext, SQLTestUtils}
+import org.apache.spark.sql.test.SQLTestData.ArrayData
+import org.apache.spark.sql.types._
+
+
+/**
+ * End-to-end suite testing statistics collection and use on both entire table and columns.
+ */
+class StatisticsCollectionSuite extends StatisticsCollectionTestBase with SharedSQLContext {
+  import testImplicits._
+
+  private def checkTableStats(tableName: String, expectedRowCount: Option[Int])
+    : Option[Statistics] = {
+    val df = spark.table(tableName)
+    val stats = df.queryExecution.analyzed.collect { case rel: LogicalRelation =>
+      assert(rel.catalogTable.get.stats.flatMap(_.rowCount) === expectedRowCount)
+      rel.catalogTable.get.stats
+    }
+    assert(stats.size == 1)
+    stats.head
+  }
+
+  test("estimates the size of a limit 0 on outer join") {
+    withTempView("test") {
+      Seq(("one", 1), ("two", 2), ("three", 3), ("four", 4)).toDF("k", "v")
+        .createOrReplaceTempView("test")
+      val df1 = spark.table("test")
+      val df2 = spark.table("test").limit(0)
+      val df = df1.join(df2, Seq("k"), "left")
+
+      val sizes = df.queryExecution.analyzed.collect { case g: Join =>
+        g.statistics.sizeInBytes
+      }
+
+      assert(sizes.size === 1, s"number of Join nodes is wrong:\n ${df.queryExecution}")
+      assert(sizes.head === BigInt(96),
+        s"expected exact size 96 for table 'test', got: ${sizes.head}")
+    }
+  }
+
+  test("analyze column command - unsupported types and invalid columns") {
+    val tableName = "column_stats_test1"
+    withTable(tableName) {
+      Seq(ArrayData(Seq(1, 2, 3), Seq(Seq(1, 2, 3)))).toDF().write.saveAsTable(tableName)
+
+      // Test unsupported data types
+      val err1 = intercept[AnalysisException] {
+        sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR COLUMNS data")
+      }
+      assert(err1.message.contains("does not support statistics collection"))
+
+      // Test invalid columns
+      val err2 = intercept[AnalysisException] {
+        sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR COLUMNS some_random_column")
+      }
+      assert(err2.message.contains("does not exist"))
+    }
+  }
+
+  test("test table-level statistics for data source table") {
+    val tableName = "tbl"
+    withTable(tableName) {
+      sql(s"CREATE TABLE $tableName(i INT, j STRING) USING parquet")
+      Seq(1 -> "a", 2 -> "b").toDF("i", "j").write.mode("overwrite").insertInto(tableName)
+
+      // noscan won't count the number of rows
+      sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS noscan")
+      checkTableStats(tableName, expectedRowCount = None)
+
+      // without noscan, we count the number of rows
+      sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS")
+      checkTableStats(tableName, expectedRowCount = Some(2))
+    }
+  }
+
+  test("SPARK-15392: DataFrame created from RDD should not be broadcasted") {
+    val rdd = sparkContext.range(1, 100).map(i => Row(i, i))
+    val df = spark.createDataFrame(rdd, new StructType().add("a", LongType).add("b", LongType))
+    assert(df.queryExecution.analyzed.statistics.sizeInBytes >
+      spark.sessionState.conf.autoBroadcastJoinThreshold)
+    assert(df.selectExpr("a").queryExecution.analyzed.statistics.sizeInBytes >
+      spark.sessionState.conf.autoBroadcastJoinThreshold)
+  }
+
+  test("estimates the size of limit") {
+    withTempView("test") {
+      Seq(("one", 1), ("two", 2), ("three", 3), ("four", 4)).toDF("k", "v")
+        .createOrReplaceTempView("test")
+      Seq((0, 1), (1, 24), (2, 48)).foreach { case (limit, expected) =>
+        val df = sql(s"""SELECT * FROM test limit $limit""")
+
+        val sizesGlobalLimit = df.queryExecution.analyzed.collect { case g: GlobalLimit =>
+          g.statistics.sizeInBytes
+        }
+        assert(sizesGlobalLimit.size === 1, s"Size wrong for:\n ${df.queryExecution}")
+        assert(sizesGlobalLimit.head === BigInt(expected),
+          s"expected exact size $expected for table 'test', got: ${sizesGlobalLimit.head}")
+
+        val sizesLocalLimit = df.queryExecution.analyzed.collect { case l: LocalLimit =>
+          l.statistics.sizeInBytes
+        }
+        assert(sizesLocalLimit.size === 1, s"Size wrong for:\n ${df.queryExecution}")
+        assert(sizesLocalLimit.head === BigInt(expected),
+          s"expected exact size $expected for table 'test', got: ${sizesLocalLimit.head}")
+      }
+    }
+  }
+
+}
+
+
+/**
+ * The base for test cases that we want to include in both the hive module (for verifying behavior
+ * when using the Hive external catalog) as well as in the sql/core module.
+ */
+abstract class StatisticsCollectionTestBase extends QueryTest with SQLTestUtils {
+  import testImplicits._
+
+  private val dec1 = new java.math.BigDecimal("1.000000000000000000")
+  private val dec2 = new java.math.BigDecimal("8.000000000000000000")
+  private val d1 = Date.valueOf("2016-05-08")
+  private val d2 = Date.valueOf("2016-05-09")
+  private val t1 = Timestamp.valueOf("2016-05-08 00:00:01")
+  private val t2 = Timestamp.valueOf("2016-05-09 00:00:02")
+
+  /**
+   * Define a very simple 3 row table used for testing column serialization.
+   * Note: last column is seq[int] which doesn't support stats collection.
+   */
+  protected val data = Seq[
+    (jl.Boolean, jl.Byte, jl.Short, jl.Integer, jl.Long,
+      jl.Double, jl.Float, java.math.BigDecimal,
+      String, Array[Byte], Date, Timestamp,
+      Seq[Int])](
+    (false, 1.toByte, 1.toShort, 1, 1L, 1.0, 1.0f, dec1, "s1", "b1".getBytes, d1, t1, null),
+    (true, 2.toByte, 3.toShort, 4, 5L, 6.0, 7.0f, dec2, "ss9", "bb0".getBytes, d2, t2, null),
+    (null, null, null, null, null, null, null, null, null, null, null, null, null)
+  )
+
+  /** A mapping from column to the stats collected. */
+  protected val stats = mutable.LinkedHashMap(
+    "cbool" -> ColumnStat(2, Some(false), Some(true), 1, 1, 1),
+    "cbyte" -> ColumnStat(2, Some(1L), Some(2L), 1, 1, 1),
+    "cshort" -> ColumnStat(2, Some(1L), Some(3L), 1, 2, 2),
+    "cint" -> ColumnStat(2, Some(1L), Some(4L), 1, 4, 4),
+    "clong" -> ColumnStat(2, Some(1L), Some(5L), 1, 8, 8),
+    "cdouble" -> ColumnStat(2, Some(1.0), Some(6.0), 1, 8, 8),
+    "cfloat" -> ColumnStat(2, Some(1.0), Some(7.0), 1, 4, 4),
+    "cdecimal" -> ColumnStat(2, Some(dec1), Some(dec2), 1, 16, 16),
+    "cstring" -> ColumnStat(2, None, None, 1, 3, 3),
+    "cbinary" -> ColumnStat(2, None, None, 1, 3, 3),
+    "cdate" -> ColumnStat(2, Some(d1), Some(d2), 1, 4, 4),
+    "ctimestamp" -> ColumnStat(2, Some(t1), Some(t2), 1, 8, 8)
+  )
+
+  test("column stats round trip serialization") {
+    // Make sure we serialize and then deserialize and we will get the result data
+    val df = data.toDF(stats.keys.toSeq :+ "carray" : _*)
+    stats.zip(df.schema).foreach { case ((k, v), field) =>
+      withClue(s"column $k with type ${field.dataType}") {
+        val roundtrip = ColumnStat.fromMap("table_is_foo", field, v.toMap)
+        assert(roundtrip == Some(v))
+      }
+    }
+  }
+
+  test("analyze column command - result verification") {
+    val tableName = "column_stats_test2"
+    // (data.head.productArity - 1) because the last column does not support stats collection.
+    assert(stats.size == data.head.productArity - 1)
+    val df = data.toDF(stats.keys.toSeq :+ "carray" : _*)
+
+    withTable(tableName) {
+      df.write.saveAsTable(tableName)
+
+      // Collect statistics
+      sql(s"analyze table $tableName compute STATISTICS FOR COLUMNS " + stats.keys.mkString(", "))
+
+      // Validate statistics
+      val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName))
+      assert(table.stats.isDefined)
+      assert(table.stats.get.colStats.size == stats.size)
+
+      stats.foreach { case (k, v) =>
+        withClue(s"column $k") {
+          assert(table.stats.get.colStats(k) == v)
+        }
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsColumnSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsColumnSuite.scala
deleted file mode 100644
index e866ac2cb3b34..0000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsColumnSuite.scala
+++ /dev/null
@@ -1,334 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql
-
-import java.sql.{Date, Timestamp}
-
-import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
-import org.apache.spark.sql.catalyst.parser.ParseException
-import org.apache.spark.sql.catalyst.plans.logical.ColumnStat
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
-import org.apache.spark.sql.execution.command.AnalyzeColumnCommand
-import org.apache.spark.sql.test.SQLTestData.ArrayData
-import org.apache.spark.sql.types._
-
-class StatisticsColumnSuite extends StatisticsTest {
-  import testImplicits._
-
-  test("parse analyze column commands") {
-    val tableName = "tbl"
-
-    // we need to specify column names
-    intercept[ParseException] {
-      sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR COLUMNS")
-    }
-
-    val analyzeSql = s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR COLUMNS key, value"
-    val parsed = spark.sessionState.sqlParser.parsePlan(analyzeSql)
-    val expected = AnalyzeColumnCommand(TableIdentifier(tableName), Seq("key", "value"))
-    comparePlans(parsed, expected)
-  }
-
-  test("analyzing columns of non-atomic types is not supported") {
-    val tableName = "tbl"
-    withTable(tableName) {
-      Seq(ArrayData(Seq(1, 2, 3), Seq(Seq(1, 2, 3)))).toDF().write.saveAsTable(tableName)
-      val err = intercept[AnalysisException] {
-        sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR COLUMNS data")
-      }
-      assert(err.message.contains("Analyzing columns is not supported"))
-    }
-  }
-
-  test("check correctness of columns") {
-    val table = "tbl"
-    val colName1 = "abc"
-    val colName2 = "x.yz"
-    withTable(table) {
-      sql(s"CREATE TABLE $table ($colName1 int, `$colName2` string) USING PARQUET")
-
-      val invalidColError = intercept[AnalysisException] {
-        sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS key")
-      }
-      assert(invalidColError.message == "Invalid column name: key.")
-
-      withSQLConf("spark.sql.caseSensitive" -> "true") {
-        val invalidErr = intercept[AnalysisException] {
-          sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS ${colName1.toUpperCase}")
-        }
-        assert(invalidErr.message == s"Invalid column name: ${colName1.toUpperCase}.")
-      }
-
-      withSQLConf("spark.sql.caseSensitive" -> "false") {
-        val columnsToAnalyze = Seq(colName2.toUpperCase, colName1, colName2)
-        val tableIdent = TableIdentifier(table, Some("default"))
-        val relation = spark.sessionState.catalog.lookupRelation(tableIdent)
-        val (_, columnStats) =
-          AnalyzeColumnCommand.computeColStats(spark, relation, columnsToAnalyze)
-        assert(columnStats.contains(colName1))
-        assert(columnStats.contains(colName2))
-        // check deduplication
-        assert(columnStats.size == 2)
-        assert(!columnStats.contains(colName2.toUpperCase))
-      }
-    }
-  }
-
-  private def getNonNullValues[T](values: Seq[Option[T]]): Seq[T] = {
-    values.filter(_.isDefined).map(_.get)
-  }
-
-  test("column-level statistics for integral type columns") {
-    val values = (0 to 5).map { i =>
-      if (i % 2 == 0) None else Some(i)
-    }
-    val data = values.map { i =>
-      (i.map(_.toByte), i.map(_.toShort), i.map(_.toInt), i.map(_.toLong))
-    }
-
-    val df = data.toDF("c1", "c2", "c3", "c4")
-    val nonNullValues = getNonNullValues[Int](values)
-    val expectedColStatsSeq = df.schema.map { f =>
-      val colStat = ColumnStat(InternalRow(
-        values.count(_.isEmpty).toLong,
-        nonNullValues.max,
-        nonNullValues.min,
-        nonNullValues.distinct.length.toLong))
-      (f, colStat)
-    }
-    checkColStats(df, expectedColStatsSeq)
-  }
-
-  test("column-level statistics for fractional type columns") {
-    val values: Seq[Option[Decimal]] = (0 to 5).map { i =>
-      if (i == 0) None else Some(Decimal(i + i * 0.01))
-    }
-    val data = values.map { i =>
-      (i.map(_.toFloat), i.map(_.toDouble), i)
-    }
-
-    val df = data.toDF("c1", "c2", "c3")
-    val nonNullValues = getNonNullValues[Decimal](values)
-    val numNulls = values.count(_.isEmpty).toLong
-    val ndv = nonNullValues.distinct.length.toLong
-    val expectedColStatsSeq = df.schema.map { f =>
-      val colStat = f.dataType match {
-        case floatType: FloatType =>
-          ColumnStat(InternalRow(numNulls, nonNullValues.max.toFloat, nonNullValues.min.toFloat,
-            ndv))
-        case doubleType: DoubleType =>
-          ColumnStat(InternalRow(numNulls, nonNullValues.max.toDouble, nonNullValues.min.toDouble,
-            ndv))
-        case decimalType: DecimalType =>
-          ColumnStat(InternalRow(numNulls, nonNullValues.max, nonNullValues.min, ndv))
-      }
-      (f, colStat)
-    }
-    checkColStats(df, expectedColStatsSeq)
-  }
-
-  test("column-level statistics for string column") {
-    val values = Seq(None, Some("a"), Some("bbbb"), Some("cccc"), Some(""))
-    val df = values.toDF("c1")
-    val nonNullValues = getNonNullValues[String](values)
-    val expectedColStatsSeq = df.schema.map { f =>
-      val colStat = ColumnStat(InternalRow(
-        values.count(_.isEmpty).toLong,
-        nonNullValues.map(_.length).sum / nonNullValues.length.toDouble,
-        nonNullValues.map(_.length).max.toInt,
-        nonNullValues.distinct.length.toLong))
-      (f, colStat)
-    }
-    checkColStats(df, expectedColStatsSeq)
-  }
-
-  test("column-level statistics for binary column") {
-    val values = Seq(None, Some("a"), Some("bbbb"), Some("cccc"), Some("")).map(_.map(_.getBytes))
-    val df = values.toDF("c1")
-    val nonNullValues = getNonNullValues[Array[Byte]](values)
-    val expectedColStatsSeq = df.schema.map { f =>
-      val colStat = ColumnStat(InternalRow(
-        values.count(_.isEmpty).toLong,
-        nonNullValues.map(_.length).sum / nonNullValues.length.toDouble,
-        nonNullValues.map(_.length).max.toInt))
-      (f, colStat)
-    }
-    checkColStats(df, expectedColStatsSeq)
-  }
-
-  test("column-level statistics for boolean column") {
-    val values = Seq(None, Some(true), Some(false), Some(true))
-    val df = values.toDF("c1")
-    val nonNullValues = getNonNullValues[Boolean](values)
-    val expectedColStatsSeq = df.schema.map { f =>
-      val colStat = ColumnStat(InternalRow(
-        values.count(_.isEmpty).toLong,
-        nonNullValues.count(_.equals(true)).toLong,
-        nonNullValues.count(_.equals(false)).toLong))
-      (f, colStat)
-    }
-    checkColStats(df, expectedColStatsSeq)
-  }
-
-  test("column-level statistics for date column") {
-    val values = Seq(None, Some("1970-01-01"), Some("1970-02-02")).map(_.map(Date.valueOf))
-    val df = values.toDF("c1")
-    val nonNullValues = getNonNullValues[Date](values)
-    val expectedColStatsSeq = df.schema.map { f =>
-      val colStat = ColumnStat(InternalRow(
-        values.count(_.isEmpty).toLong,
-        // Internally, DateType is represented as the number of days from 1970-01-01.
-        nonNullValues.map(DateTimeUtils.fromJavaDate).max,
-        nonNullValues.map(DateTimeUtils.fromJavaDate).min,
-        nonNullValues.distinct.length.toLong))
-      (f, colStat)
-    }
-    checkColStats(df, expectedColStatsSeq)
-  }
-
-  test("column-level statistics for timestamp column") {
-    val values = Seq(None, Some("1970-01-01 00:00:00"), Some("1970-01-01 00:00:05")).map { i =>
-      i.map(Timestamp.valueOf)
-    }
-    val df = values.toDF("c1")
-    val nonNullValues = getNonNullValues[Timestamp](values)
-    val expectedColStatsSeq = df.schema.map { f =>
-      val colStat = ColumnStat(InternalRow(
-        values.count(_.isEmpty).toLong,
-        // Internally, TimestampType is represented as the number of days from 1970-01-01
-        nonNullValues.map(DateTimeUtils.fromJavaTimestamp).max,
-        nonNullValues.map(DateTimeUtils.fromJavaTimestamp).min,
-        nonNullValues.distinct.length.toLong))
-      (f, colStat)
-    }
-    checkColStats(df, expectedColStatsSeq)
-  }
-
-  test("column-level statistics for null columns") {
-    val values = Seq(None, None)
-    val data = values.map { i =>
-      (i.map(_.toString), i.map(_.toString.toInt))
-    }
-    val df = data.toDF("c1", "c2")
-    val expectedColStatsSeq = df.schema.map { f =>
-      (f, ColumnStat(InternalRow(values.count(_.isEmpty).toLong, null, null, 0L)))
-    }
-    checkColStats(df, expectedColStatsSeq)
-  }
-
-  test("column-level statistics for columns with different types") {
-    val intSeq = Seq(1, 2)
-    val doubleSeq = Seq(1.01d, 2.02d)
-    val stringSeq = Seq("a", "bb")
-    val binarySeq = Seq("a", "bb").map(_.getBytes)
-    val booleanSeq = Seq(true, false)
-    val dateSeq = Seq("1970-01-01", "1970-02-02").map(Date.valueOf)
-    val timestampSeq = Seq("1970-01-01 00:00:00", "1970-01-01 00:00:05").map(Timestamp.valueOf)
-    val longSeq = Seq(5L, 4L)
-
-    val data = intSeq.indices.map { i =>
-      (intSeq(i), doubleSeq(i), stringSeq(i), binarySeq(i), booleanSeq(i), dateSeq(i),
-        timestampSeq(i), longSeq(i))
-    }
-    val df = data.toDF("c1", "c2", "c3", "c4", "c5", "c6", "c7", "c8")
-    val expectedColStatsSeq = df.schema.map { f =>
-      val colStat = f.dataType match {
-        case IntegerType =>
-          ColumnStat(InternalRow(0L, intSeq.max, intSeq.min, intSeq.distinct.length.toLong))
-        case DoubleType =>
-          ColumnStat(InternalRow(0L, doubleSeq.max, doubleSeq.min,
-              doubleSeq.distinct.length.toLong))
-        case StringType =>
-          ColumnStat(InternalRow(0L, stringSeq.map(_.length).sum / stringSeq.length.toDouble,
-                stringSeq.map(_.length).max.toInt, stringSeq.distinct.length.toLong))
-        case BinaryType =>
-          ColumnStat(InternalRow(0L, binarySeq.map(_.length).sum / binarySeq.length.toDouble,
-                binarySeq.map(_.length).max.toInt))
-        case BooleanType =>
-          ColumnStat(InternalRow(0L, booleanSeq.count(_.equals(true)).toLong,
-              booleanSeq.count(_.equals(false)).toLong))
-        case DateType =>
-          ColumnStat(InternalRow(0L, dateSeq.map(DateTimeUtils.fromJavaDate).max,
-                dateSeq.map(DateTimeUtils.fromJavaDate).min, dateSeq.distinct.length.toLong))
-        case TimestampType =>
-          ColumnStat(InternalRow(0L, timestampSeq.map(DateTimeUtils.fromJavaTimestamp).max,
-                timestampSeq.map(DateTimeUtils.fromJavaTimestamp).min,
-                timestampSeq.distinct.length.toLong))
-        case LongType =>
-          ColumnStat(InternalRow(0L, longSeq.max, longSeq.min, longSeq.distinct.length.toLong))
-      }
-      (f, colStat)
-    }
-    checkColStats(df, expectedColStatsSeq)
-  }
-
-  test("update table-level stats while collecting column-level stats") {
-    val table = "tbl"
-    withTable(table) {
-      sql(s"CREATE TABLE $table (c1 int) USING PARQUET")
-      sql(s"INSERT INTO $table SELECT 1")
-      sql(s"ANALYZE TABLE $table COMPUTE STATISTICS")
-      checkTableStats(tableName = table, expectedRowCount = Some(1))
-
-      // update table-level stats between analyze table and analyze column commands
-      sql(s"INSERT INTO $table SELECT 1")
-      sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS c1")
-      val fetchedStats = checkTableStats(tableName = table, expectedRowCount = Some(2))
-
-      val colStat = fetchedStats.get.colStats("c1")
-      StatisticsTest.checkColStat(
-        dataType = IntegerType,
-        colStat = colStat,
-        expectedColStat = ColumnStat(InternalRow(0L, 1, 1, 1L)),
-        rsd = spark.sessionState.conf.ndvMaxError)
-    }
-  }
-
-  test("analyze column stats independently") {
-    val table = "tbl"
-    withTable(table) {
-      sql(s"CREATE TABLE $table (c1 int, c2 long) USING PARQUET")
-      sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS c1")
-      val fetchedStats1 = checkTableStats(tableName = table, expectedRowCount = Some(0))
-      assert(fetchedStats1.get.colStats.size == 1)
-      val expected1 = ColumnStat(InternalRow(0L, null, null, 0L))
-      val rsd = spark.sessionState.conf.ndvMaxError
-      StatisticsTest.checkColStat(
-        dataType = IntegerType,
-        colStat = fetchedStats1.get.colStats("c1"),
-        expectedColStat = expected1,
-        rsd = rsd)
-
-      sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS c2")
-      val fetchedStats2 = checkTableStats(tableName = table, expectedRowCount = Some(0))
-      // column c1 is kept in the stats
-      assert(fetchedStats2.get.colStats.size == 2)
-      StatisticsTest.checkColStat(
-        dataType = IntegerType,
-        colStat = fetchedStats2.get.colStats("c1"),
-        expectedColStat = expected1,
-        rsd = rsd)
-      val expected2 = ColumnStat(InternalRow(0L, null, null, 0L))
-      StatisticsTest.checkColStat(
-        dataType = LongType,
-        colStat = fetchedStats2.get.colStats("c2"),
-        expectedColStat = expected2,
-        rsd = rsd)
-    }
-  }
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsSuite.scala
deleted file mode 100644
index 8cf42e9248c2a..0000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsSuite.scala
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql
-
-import org.apache.spark.sql.catalyst.plans.logical.{GlobalLimit, Join, LocalLimit}
-import org.apache.spark.sql.types._
-
-class StatisticsSuite extends StatisticsTest {
-  import testImplicits._
-
-  test("SPARK-15392: DataFrame created from RDD should not be broadcasted") {
-    val rdd = sparkContext.range(1, 100).map(i => Row(i, i))
-    val df = spark.createDataFrame(rdd, new StructType().add("a", LongType).add("b", LongType))
-    assert(df.queryExecution.analyzed.statistics.sizeInBytes >
-      spark.sessionState.conf.autoBroadcastJoinThreshold)
-    assert(df.selectExpr("a").queryExecution.analyzed.statistics.sizeInBytes >
-      spark.sessionState.conf.autoBroadcastJoinThreshold)
-  }
-
-  test("estimates the size of limit") {
-    withTempView("test") {
-      Seq(("one", 1), ("two", 2), ("three", 3), ("four", 4)).toDF("k", "v")
-        .createOrReplaceTempView("test")
-      Seq((0, 1), (1, 24), (2, 48)).foreach { case (limit, expected) =>
-        val df = sql(s"""SELECT * FROM test limit $limit""")
-
-        val sizesGlobalLimit = df.queryExecution.analyzed.collect { case g: GlobalLimit =>
-          g.statistics.sizeInBytes
-        }
-        assert(sizesGlobalLimit.size === 1, s"Size wrong for:\n ${df.queryExecution}")
-        assert(sizesGlobalLimit.head === BigInt(expected),
-          s"expected exact size $expected for table 'test', got: ${sizesGlobalLimit.head}")
-
-        val sizesLocalLimit = df.queryExecution.analyzed.collect { case l: LocalLimit =>
-          l.statistics.sizeInBytes
-        }
-        assert(sizesLocalLimit.size === 1, s"Size wrong for:\n ${df.queryExecution}")
-        assert(sizesLocalLimit.head === BigInt(expected),
-          s"expected exact size $expected for table 'test', got: ${sizesLocalLimit.head}")
-      }
-    }
-  }
-
-  test("estimates the size of a limit 0 on outer join") {
-    withTempView("test") {
-      Seq(("one", 1), ("two", 2), ("three", 3), ("four", 4)).toDF("k", "v")
-        .createOrReplaceTempView("test")
-      val df1 = spark.table("test")
-      val df2 = spark.table("test").limit(0)
-      val df = df1.join(df2, Seq("k"), "left")
-
-      val sizes = df.queryExecution.analyzed.collect { case g: Join =>
-        g.statistics.sizeInBytes
-      }
-
-      assert(sizes.size === 1, s"number of Join nodes is wrong:\n ${df.queryExecution}")
-      assert(sizes.head === BigInt(96),
-        s"expected exact size 96 for table 'test', got: ${sizes.head}")
-    }
-  }
-
-  test("test table-level statistics for data source table created in InMemoryCatalog") {
-    val tableName = "tbl"
-    withTable(tableName) {
-      sql(s"CREATE TABLE $tableName(i INT, j STRING) USING parquet")
-      Seq(1 -> "a", 2 -> "b").toDF("i", "j").write.mode("overwrite").insertInto(tableName)
-
-      // noscan won't count the number of rows
-      sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS noscan")
-      checkTableStats(tableName, expectedRowCount = None)
-
-      // without noscan, we count the number of rows
-      sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS")
-      checkTableStats(tableName, expectedRowCount = Some(2))
-    }
-  }
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsTest.scala
deleted file mode 100644
index 915ee0d31bca2..0000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsTest.scala
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql
-
-import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, Statistics}
-import org.apache.spark.sql.execution.command.AnalyzeColumnCommand
-import org.apache.spark.sql.execution.datasources.LogicalRelation
-import org.apache.spark.sql.test.SharedSQLContext
-import org.apache.spark.sql.types._
-
-
-trait StatisticsTest extends QueryTest with SharedSQLContext {
-
-  def checkColStats(
-      df: DataFrame,
-      expectedColStatsSeq: Seq[(StructField, ColumnStat)]): Unit = {
-    val table = "tbl"
-    withTable(table) {
-      df.write.format("json").saveAsTable(table)
-      val columns = expectedColStatsSeq.map(_._1)
-      val tableIdent = TableIdentifier(table, Some("default"))
-      val relation = spark.sessionState.catalog.lookupRelation(tableIdent)
-      val (_, columnStats) =
-        AnalyzeColumnCommand.computeColStats(spark, relation, columns.map(_.name))
-      expectedColStatsSeq.foreach { case (field, expectedColStat) =>
-        assert(columnStats.contains(field.name))
-        val colStat = columnStats(field.name)
-        StatisticsTest.checkColStat(
-          dataType = field.dataType,
-          colStat = colStat,
-          expectedColStat = expectedColStat,
-          rsd = spark.sessionState.conf.ndvMaxError)
-
-        // check if we get the same colStat after encoding and decoding
-        val encodedCS = colStat.toString
-        val numFields = AnalyzeColumnCommand.numStatFields(field.dataType)
-        val decodedCS = ColumnStat(numFields, encodedCS)
-        StatisticsTest.checkColStat(
-          dataType = field.dataType,
-          colStat = decodedCS,
-          expectedColStat = expectedColStat,
-          rsd = spark.sessionState.conf.ndvMaxError)
-      }
-    }
-  }
-
-  def checkTableStats(tableName: String, expectedRowCount: Option[Int]): Option[Statistics] = {
-    val df = spark.table(tableName)
-    val stats = df.queryExecution.analyzed.collect { case rel: LogicalRelation =>
-      assert(rel.catalogTable.get.stats.flatMap(_.rowCount) === expectedRowCount)
-      rel.catalogTable.get.stats
-    }
-    assert(stats.size == 1)
-    stats.head
-  }
-}
-
-object StatisticsTest {
-  def checkColStat(
-      dataType: DataType,
-      colStat: ColumnStat,
-      expectedColStat: ColumnStat,
-      rsd: Double): Unit = {
-    dataType match {
-      case StringType =>
-        val cs = colStat.forString
-        val expectedCS = expectedColStat.forString
-        assert(cs.numNulls == expectedCS.numNulls)
-        assert(cs.avgColLen == expectedCS.avgColLen)
-        assert(cs.maxColLen == expectedCS.maxColLen)
-        checkNdv(ndv = cs.ndv, expectedNdv = expectedCS.ndv, rsd = rsd)
-      case BinaryType =>
-        val cs = colStat.forBinary
-        val expectedCS = expectedColStat.forBinary
-        assert(cs.numNulls == expectedCS.numNulls)
-        assert(cs.avgColLen == expectedCS.avgColLen)
-        assert(cs.maxColLen == expectedCS.maxColLen)
-      case BooleanType =>
-        val cs = colStat.forBoolean
-        val expectedCS = expectedColStat.forBoolean
-        assert(cs.numNulls == expectedCS.numNulls)
-        assert(cs.numTrues == expectedCS.numTrues)
-        assert(cs.numFalses == expectedCS.numFalses)
-      case atomicType: AtomicType =>
-        checkNumericColStats(
-          dataType = atomicType, colStat = colStat, expectedColStat = expectedColStat, rsd = rsd)
-    }
-  }
-
-  private def checkNumericColStats(
-      dataType: AtomicType,
-      colStat: ColumnStat,
-      expectedColStat: ColumnStat,
-      rsd: Double): Unit = {
-    val cs = colStat.forNumeric(dataType)
-    val expectedCS = expectedColStat.forNumeric(dataType)
-    assert(cs.numNulls == expectedCS.numNulls)
-    assert(cs.max == expectedCS.max)
-    assert(cs.min == expectedCS.min)
-    checkNdv(ndv = cs.ndv, expectedNdv = expectedCS.ndv, rsd = rsd)
-  }
-
-  private def checkNdv(ndv: Long, expectedNdv: Long, rsd: Double): Unit = {
-    // ndv is an approximate value, so we make sure we have the value, and it should be
-    // within 3*SD's of the given rsd.
-    if (expectedNdv == 0) {
-      assert(ndv == 0)
-    } else if (expectedNdv > 0) {
-      assert(ndv > 0)
-      val error = math.abs((ndv / expectedNdv.toDouble) - 1.0d)
-      assert(error <= rsd * 3.0d, "Error should be within 3 std. errors.")
-    }
-  }
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
index 797fe9ffa8be1..b070138be05d5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
@@ -23,9 +23,8 @@ import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat,
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.execution.command.{AnalyzeTableCommand, DescribeFunctionCommand,
-  DescribeTableCommand, ShowFunctionsCommand}
-import org.apache.spark.sql.execution.datasources.{CreateTable, CreateTempViewUsing}
+import org.apache.spark.sql.execution.command._
+import org.apache.spark.sql.execution.datasources.CreateTable
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
 import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType}
 
@@ -221,12 +220,22 @@ class SparkSqlParserSuite extends PlanTest {
     intercept("explain describe tables x", "Unsupported SQL statement")
   }
 
-  test("SPARK-18106 analyze table") {
+  test("analyze table statistics") {
     assertEqual("analyze table t compute statistics",
       AnalyzeTableCommand(TableIdentifier("t"), noscan = false))
     assertEqual("analyze table t compute statistics noscan",
       AnalyzeTableCommand(TableIdentifier("t"), noscan = true))
-    assertEqual("analyze table t partition (a) compute statistics noscan",
+    assertEqual("analyze table t partition (a) compute statistics nOscAn",
+      AnalyzeTableCommand(TableIdentifier("t"), noscan = true))
+
+    // Partitions specified - we currently parse them but don't do anything with it
+    assertEqual("ANALYZE TABLE t PARTITION(ds='2008-04-09', hr=11) COMPUTE STATISTICS",
+      AnalyzeTableCommand(TableIdentifier("t"), noscan = false))
+    assertEqual("ANALYZE TABLE t PARTITION(ds='2008-04-09', hr=11) COMPUTE STATISTICS noscan",
+      AnalyzeTableCommand(TableIdentifier("t"), noscan = true))
+    assertEqual("ANALYZE TABLE t PARTITION(ds, hr) COMPUTE STATISTICS",
+      AnalyzeTableCommand(TableIdentifier("t"), noscan = false))
+    assertEqual("ANALYZE TABLE t PARTITION(ds, hr) COMPUTE STATISTICS noscan",
       AnalyzeTableCommand(TableIdentifier("t"), noscan = true))
 
     intercept("analyze table t compute statistics xxxx",
@@ -234,4 +243,11 @@ class SparkSqlParserSuite extends PlanTest {
     intercept("analyze table t partition (a) compute statistics xxxx",
       "Expected `NOSCAN` instead of `xxxx`")
   }
+
+  test("analyze table column statistics") {
+    intercept("ANALYZE TABLE t COMPUTE STATISTICS FOR COLUMNS", "")
+
+    assertEqual("ANALYZE TABLE t COMPUTE STATISTICS FOR COLUMNS key, value",
+      AnalyzeColumnCommand(TableIdentifier("t"), Seq("key", "value")))
+  }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index ff0923f04893d..fd9dc32063872 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, Statistics}
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
-import org.apache.spark.sql.execution.command.{AnalyzeColumnCommand, DDLUtils}
+import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.hive.client.HiveClient
 import org.apache.spark.sql.internal.HiveSerDe
 import org.apache.spark.sql.internal.StaticSQLConf._
@@ -514,7 +514,9 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
         statsProperties += STATISTICS_NUM_ROWS -> stats.rowCount.get.toString()
       }
       stats.colStats.foreach { case (colName, colStat) =>
-        statsProperties += (STATISTICS_COL_STATS_PREFIX + colName) -> colStat.toString
+        colStat.toMap.foreach { case (k, v) =>
+          statsProperties += (columnStatKeyPropName(colName, k) -> v)
+        }
       }
       tableDefinition.copy(properties = tableDefinition.properties ++ statsProperties)
     } else {
@@ -605,48 +607,65 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
    * It reads table schema, provider, partition column names and bucket specification from table
    * properties, and filter out these special entries from table properties.
    */
-  private def restoreTableMetadata(table: CatalogTable): CatalogTable = {
+  private def restoreTableMetadata(inputTable: CatalogTable): CatalogTable = {
     if (conf.get(DEBUG_MODE)) {
-      return table
+      return inputTable
     }
 
-    val tableWithSchema = if (table.tableType == VIEW) {
-      table
-    } else {
-      getProviderFromTableProperties(table) match {
+    var table = inputTable
+
+    if (table.tableType != VIEW) {
+      table.properties.get(DATASOURCE_PROVIDER) match {
         // No provider in table properties, which means this table is created by Spark prior to 2.1,
         // or is created at Hive side.
         case None =>
-          table.copy(provider = Some(DDLUtils.HIVE_PROVIDER), tracksPartitionsInCatalog = true)
+          table = table.copy(
+            provider = Some(DDLUtils.HIVE_PROVIDER), tracksPartitionsInCatalog = true)
 
         // This is a Hive serde table created by Spark 2.1 or higher versions.
-        case Some(DDLUtils.HIVE_PROVIDER) => restoreHiveSerdeTable(table)
+        case Some(DDLUtils.HIVE_PROVIDER) =>
+          table = restoreHiveSerdeTable(table)
 
         // This is a regular data source table.
-        case Some(provider) => restoreDataSourceTable(table, provider)
+        case Some(provider) =>
+          table = restoreDataSourceTable(table, provider)
       }
     }
 
     // construct Spark's statistics from information in Hive metastore
-    val statsProps = tableWithSchema.properties.filterKeys(_.startsWith(STATISTICS_PREFIX))
-    val tableWithStats = if (statsProps.nonEmpty) {
-      val colStatsProps = statsProps.filterKeys(_.startsWith(STATISTICS_COL_STATS_PREFIX))
-        .map { case (k, v) => (k.drop(STATISTICS_COL_STATS_PREFIX.length), v) }
-      val colStats: Map[String, ColumnStat] = tableWithSchema.schema.collect {
-        case f if colStatsProps.contains(f.name) =>
-          val numFields = AnalyzeColumnCommand.numStatFields(f.dataType)
-          (f.name, ColumnStat(numFields, colStatsProps(f.name)))
-      }.toMap
-      tableWithSchema.copy(
+    val statsProps = table.properties.filterKeys(_.startsWith(STATISTICS_PREFIX))
+
+    if (statsProps.nonEmpty) {
+      val colStats = new scala.collection.mutable.HashMap[String, ColumnStat]
+
+      // For each column, recover its column stats. Note that this is currently a O(n^2) operation,
+      // but given the number of columns it usually not enormous, this is probably OK as a start.
+      // If we want to map this a linear operation, we'd need a stronger contract between the
+      // naming convention used for serialization.
+      table.schema.foreach { field =>
+        if (statsProps.contains(columnStatKeyPropName(field.name, ColumnStat.KEY_VERSION))) {
+          // If "version" field is defined, then the column stat is defined.
+          val keyPrefix = columnStatKeyPropName(field.name, "")
+          val colStatMap = statsProps.filterKeys(_.startsWith(keyPrefix)).map { case (k, v) =>
+            (k.drop(keyPrefix.length), v)
+          }
+
+          ColumnStat.fromMap(table.identifier.table, field, colStatMap).foreach {
+            colStat => colStats += field.name -> colStat
+          }
+        }
+      }
+
+      table = table.copy(
         stats = Some(Statistics(
-          sizeInBytes = BigInt(tableWithSchema.properties(STATISTICS_TOTAL_SIZE)),
-          rowCount = tableWithSchema.properties.get(STATISTICS_NUM_ROWS).map(BigInt(_)),
-          colStats = colStats)))
-    } else {
-      tableWithSchema
+          sizeInBytes = BigInt(table.properties(STATISTICS_TOTAL_SIZE)),
+          rowCount = table.properties.get(STATISTICS_NUM_ROWS).map(BigInt(_)),
+          colStats = colStats.toMap)))
     }
 
-    tableWithStats.copy(properties = getOriginalTableProperties(table))
+    // Get the original table properties as defined by the user.
+    table.copy(
+      properties = table.properties.filterNot { case (key, _) => key.startsWith(SPARK_SQL_PREFIX) })
   }
 
   private def restoreHiveSerdeTable(table: CatalogTable): CatalogTable = {
@@ -1020,17 +1039,17 @@ object HiveExternalCatalog {
   val TABLE_PARTITION_PROVIDER_CATALOG = "catalog"
   val TABLE_PARTITION_PROVIDER_FILESYSTEM = "filesystem"
 
-
-  def getProviderFromTableProperties(metadata: CatalogTable): Option[String] = {
-    metadata.properties.get(DATASOURCE_PROVIDER)
-  }
-
-  def getOriginalTableProperties(metadata: CatalogTable): Map[String, String] = {
-    metadata.properties.filterNot { case (key, _) => key.startsWith(SPARK_SQL_PREFIX) }
+  /**
+   * Returns the fully qualified name used in table properties for a particular column stat.
+   * For example, for column "mycol", and "min" stat, this should return
+   * "spark.sql.statistics.colStats.mycol.min".
+   */
+  private def columnStatKeyPropName(columnName: String, statKey: String): String = {
+    STATISTICS_COL_STATS_PREFIX + columnName + "." + statKey
   }
 
   // A persisted data source table always store its schema in the catalog.
-  def getSchemaFromTableProperties(metadata: CatalogTable): StructType = {
+  private def getSchemaFromTableProperties(metadata: CatalogTable): StructType = {
     val errorMessage = "Could not read schema from the hive metastore because it is corrupted."
     val props = metadata.properties
     val schema = props.get(DATASOURCE_SCHEMA)
@@ -1078,11 +1097,11 @@ object HiveExternalCatalog {
     )
   }
 
-  def getPartitionColumnsFromTableProperties(metadata: CatalogTable): Seq[String] = {
+  private def getPartitionColumnsFromTableProperties(metadata: CatalogTable): Seq[String] = {
     getColumnNamesByType(metadata.properties, "part", "partitioning columns")
   }
 
-  def getBucketSpecFromTableProperties(metadata: CatalogTable): Option[BucketSpec] = {
+  private def getBucketSpecFromTableProperties(metadata: CatalogTable): Option[BucketSpec] = {
     metadata.properties.get(DATASOURCE_SCHEMA_NUMBUCKETS).map { numBuckets =>
       BucketSpec(
         numBuckets.toInt,
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index 4f5ebc3d838b9..5ae202fdc98da 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -22,56 +22,16 @@ import java.io.{File, PrintWriter}
 import scala.reflect.ClassTag
 
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
-import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, Statistics}
-import org.apache.spark.sql.execution.command.{AnalyzeTableCommand, DDLUtils}
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.plans.logical.Statistics
+import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.execution.joins._
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types._
 
-class StatisticsSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
-
-  test("parse analyze commands") {
-    def assertAnalyzeCommand(analyzeCommand: String, c: Class[_]) {
-      val parsed = spark.sessionState.sqlParser.parsePlan(analyzeCommand)
-      val operators = parsed.collect {
-        case a: AnalyzeTableCommand => a
-        case o => o
-      }
-
-      assert(operators.size === 1)
-      if (operators(0).getClass() != c) {
-        fail(
-          s"""$analyzeCommand expected command: $c, but got ${operators(0)}
-             |parsed command:
-             |$parsed
-           """.stripMargin)
-      }
-    }
-
-    assertAnalyzeCommand(
-      "ANALYZE TABLE Table1 COMPUTE STATISTICS",
-      classOf[AnalyzeTableCommand])
-    assertAnalyzeCommand(
-      "ANALYZE TABLE Table1 PARTITION(ds='2008-04-09', hr=11) COMPUTE STATISTICS",
-      classOf[AnalyzeTableCommand])
-    assertAnalyzeCommand(
-      "ANALYZE TABLE Table1 PARTITION(ds='2008-04-09', hr=11) COMPUTE STATISTICS noscan",
-      classOf[AnalyzeTableCommand])
-    assertAnalyzeCommand(
-      "ANALYZE TABLE Table1 PARTITION(ds, hr) COMPUTE STATISTICS",
-      classOf[AnalyzeTableCommand])
-    assertAnalyzeCommand(
-      "ANALYZE TABLE Table1 PARTITION(ds, hr) COMPUTE STATISTICS noscan",
-      classOf[AnalyzeTableCommand])
-
-    assertAnalyzeCommand(
-      "ANALYZE TABLE Table1 COMPUTE STATISTICS nOscAn",
-      classOf[AnalyzeTableCommand])
-  }
+class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleton {
 
   test("MetastoreRelations fallback to HDFS for size estimation") {
     val enableFallBackToHdfsForStats = spark.sessionState.conf.fallBackToHdfsForStatsEnabled
@@ -310,6 +270,110 @@ class StatisticsSuite extends QueryTest with TestHiveSingleton with SQLTestUtils
     }
   }
 
+  test("verify serialized column stats after analyzing columns") {
+    import testImplicits._
+
+    val tableName = "column_stats_test2"
+    // (data.head.productArity - 1) because the last column does not support stats collection.
+    assert(stats.size == data.head.productArity - 1)
+    val df = data.toDF(stats.keys.toSeq :+ "carray" : _*)
+
+    withTable(tableName) {
+      df.write.saveAsTable(tableName)
+
+      // Collect statistics
+      sql(s"analyze table $tableName compute STATISTICS FOR COLUMNS " + stats.keys.mkString(", "))
+
+      // Validate statistics
+      val hiveClient = spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client
+      val table = hiveClient.getTable("default", tableName)
+
+      val props = table.properties.filterKeys(_.startsWith("spark.sql.statistics.colStats"))
+      assert(props == Map(
+        "spark.sql.statistics.colStats.cbinary.avgLen" -> "3",
+        "spark.sql.statistics.colStats.cbinary.distinctCount" -> "2",
+        "spark.sql.statistics.colStats.cbinary.maxLen" -> "3",
+        "spark.sql.statistics.colStats.cbinary.nullCount" -> "1",
+        "spark.sql.statistics.colStats.cbinary.version" -> "1",
+        "spark.sql.statistics.colStats.cbool.avgLen" -> "1",
+        "spark.sql.statistics.colStats.cbool.distinctCount" -> "2",
+        "spark.sql.statistics.colStats.cbool.max" -> "true",
+        "spark.sql.statistics.colStats.cbool.maxLen" -> "1",
+        "spark.sql.statistics.colStats.cbool.min" -> "false",
+        "spark.sql.statistics.colStats.cbool.nullCount" -> "1",
+        "spark.sql.statistics.colStats.cbool.version" -> "1",
+        "spark.sql.statistics.colStats.cbyte.avgLen" -> "1",
+        "spark.sql.statistics.colStats.cbyte.distinctCount" -> "2",
+        "spark.sql.statistics.colStats.cbyte.max" -> "2",
+        "spark.sql.statistics.colStats.cbyte.maxLen" -> "1",
+        "spark.sql.statistics.colStats.cbyte.min" -> "1",
+        "spark.sql.statistics.colStats.cbyte.nullCount" -> "1",
+        "spark.sql.statistics.colStats.cbyte.version" -> "1",
+        "spark.sql.statistics.colStats.cdate.avgLen" -> "4",
+        "spark.sql.statistics.colStats.cdate.distinctCount" -> "2",
+        "spark.sql.statistics.colStats.cdate.max" -> "2016-05-09",
+        "spark.sql.statistics.colStats.cdate.maxLen" -> "4",
+        "spark.sql.statistics.colStats.cdate.min" -> "2016-05-08",
+        "spark.sql.statistics.colStats.cdate.nullCount" -> "1",
+        "spark.sql.statistics.colStats.cdate.version" -> "1",
+        "spark.sql.statistics.colStats.cdecimal.avgLen" -> "16",
+        "spark.sql.statistics.colStats.cdecimal.distinctCount" -> "2",
+        "spark.sql.statistics.colStats.cdecimal.max" -> "8.000000000000000000",
+        "spark.sql.statistics.colStats.cdecimal.maxLen" -> "16",
+        "spark.sql.statistics.colStats.cdecimal.min" -> "1.000000000000000000",
+        "spark.sql.statistics.colStats.cdecimal.nullCount" -> "1",
+        "spark.sql.statistics.colStats.cdecimal.version" -> "1",
+        "spark.sql.statistics.colStats.cdouble.avgLen" -> "8",
+        "spark.sql.statistics.colStats.cdouble.distinctCount" -> "2",
+        "spark.sql.statistics.colStats.cdouble.max" -> "6.0",
+        "spark.sql.statistics.colStats.cdouble.maxLen" -> "8",
+        "spark.sql.statistics.colStats.cdouble.min" -> "1.0",
+        "spark.sql.statistics.colStats.cdouble.nullCount" -> "1",
+        "spark.sql.statistics.colStats.cdouble.version" -> "1",
+        "spark.sql.statistics.colStats.cfloat.avgLen" -> "4",
+        "spark.sql.statistics.colStats.cfloat.distinctCount" -> "2",
+        "spark.sql.statistics.colStats.cfloat.max" -> "7.0",
+        "spark.sql.statistics.colStats.cfloat.maxLen" -> "4",
+        "spark.sql.statistics.colStats.cfloat.min" -> "1.0",
+        "spark.sql.statistics.colStats.cfloat.nullCount" -> "1",
+        "spark.sql.statistics.colStats.cfloat.version" -> "1",
+        "spark.sql.statistics.colStats.cint.avgLen" -> "4",
+        "spark.sql.statistics.colStats.cint.distinctCount" -> "2",
+        "spark.sql.statistics.colStats.cint.max" -> "4",
+        "spark.sql.statistics.colStats.cint.maxLen" -> "4",
+        "spark.sql.statistics.colStats.cint.min" -> "1",
+        "spark.sql.statistics.colStats.cint.nullCount" -> "1",
+        "spark.sql.statistics.colStats.cint.version" -> "1",
+        "spark.sql.statistics.colStats.clong.avgLen" -> "8",
+        "spark.sql.statistics.colStats.clong.distinctCount" -> "2",
+        "spark.sql.statistics.colStats.clong.max" -> "5",
+        "spark.sql.statistics.colStats.clong.maxLen" -> "8",
+        "spark.sql.statistics.colStats.clong.min" -> "1",
+        "spark.sql.statistics.colStats.clong.nullCount" -> "1",
+        "spark.sql.statistics.colStats.clong.version" -> "1",
+        "spark.sql.statistics.colStats.cshort.avgLen" -> "2",
+        "spark.sql.statistics.colStats.cshort.distinctCount" -> "2",
+        "spark.sql.statistics.colStats.cshort.max" -> "3",
+        "spark.sql.statistics.colStats.cshort.maxLen" -> "2",
+        "spark.sql.statistics.colStats.cshort.min" -> "1",
+        "spark.sql.statistics.colStats.cshort.nullCount" -> "1",
+        "spark.sql.statistics.colStats.cshort.version" -> "1",
+        "spark.sql.statistics.colStats.cstring.avgLen" -> "3",
+        "spark.sql.statistics.colStats.cstring.distinctCount" -> "2",
+        "spark.sql.statistics.colStats.cstring.maxLen" -> "3",
+        "spark.sql.statistics.colStats.cstring.nullCount" -> "1",
+        "spark.sql.statistics.colStats.cstring.version" -> "1",
+        "spark.sql.statistics.colStats.ctimestamp.avgLen" -> "8",
+        "spark.sql.statistics.colStats.ctimestamp.distinctCount" -> "2",
+        "spark.sql.statistics.colStats.ctimestamp.max" -> "2016-05-09 00:00:02.0",
+        "spark.sql.statistics.colStats.ctimestamp.maxLen" -> "8",
+        "spark.sql.statistics.colStats.ctimestamp.min" -> "2016-05-08 00:00:01.0",
+        "spark.sql.statistics.colStats.ctimestamp.nullCount" -> "1",
+        "spark.sql.statistics.colStats.ctimestamp.version" -> "1"
+      ))
+    }
+  }
+
   private def testUpdatingTableStats(tableDescription: String, createTableCmd: String): Unit = {
     test("test table-level statistics for " + tableDescription) {
       val parquetTable = "parquetTable"
@@ -319,7 +383,8 @@ class StatisticsSuite extends QueryTest with TestHiveSingleton with SQLTestUtils
           TableIdentifier(parquetTable))
         assert(DDLUtils.isDatasourceTable(catalogTable))
 
-        sql(s"INSERT INTO TABLE $parquetTable SELECT * FROM src")
+        // Add a filter to avoid creating too many partitions
+        sql(s"INSERT INTO TABLE $parquetTable SELECT * FROM src WHERE key < 10")
         checkTableStats(
           parquetTable, isDataSourceTable = true, hasSizeInBytes = false, expectedRowCounts = None)
 
@@ -328,7 +393,7 @@ class StatisticsSuite extends QueryTest with TestHiveSingleton with SQLTestUtils
         val fetchedStats1 = checkTableStats(
           parquetTable, isDataSourceTable = true, hasSizeInBytes = true, expectedRowCounts = None)
 
-        sql(s"INSERT INTO TABLE $parquetTable SELECT * FROM src")
+        sql(s"INSERT INTO TABLE $parquetTable SELECT * FROM src WHERE key < 10")
         sql(s"ANALYZE TABLE $parquetTable COMPUTE STATISTICS noscan")
         val fetchedStats2 = checkTableStats(
           parquetTable, isDataSourceTable = true, hasSizeInBytes = true, expectedRowCounts = None)
@@ -340,7 +405,7 @@ class StatisticsSuite extends QueryTest with TestHiveSingleton with SQLTestUtils
           parquetTable,
           isDataSourceTable = true,
           hasSizeInBytes = true,
-          expectedRowCounts = Some(1000))
+          expectedRowCounts = Some(20))
         assert(fetchedStats3.get.sizeInBytes == fetchedStats2.get.sizeInBytes)
       }
     }
@@ -369,6 +434,7 @@ class StatisticsSuite extends QueryTest with TestHiveSingleton with SQLTestUtils
     }
   }
 
+  /** Used to test refreshing cached metadata once table stats are updated. */
   private def getStatsBeforeAfterUpdate(isAnalyzeColumns: Boolean): (Statistics, Statistics) = {
     val tableName = "tbl"
     var statsBeforeUpdate: Statistics = null
@@ -411,145 +477,6 @@ class StatisticsSuite extends QueryTest with TestHiveSingleton with SQLTestUtils
     assert(statsAfterUpdate.rowCount == Some(2))
   }
 
-  test("test refreshing column stats of cached data source table by `ANALYZE TABLE` statement") {
-    val (statsBeforeUpdate, statsAfterUpdate) = getStatsBeforeAfterUpdate(isAnalyzeColumns = true)
-
-    assert(statsBeforeUpdate.sizeInBytes > 0)
-    assert(statsBeforeUpdate.rowCount == Some(1))
-    StatisticsTest.checkColStat(
-      dataType = IntegerType,
-      colStat = statsBeforeUpdate.colStats("key"),
-      expectedColStat = ColumnStat(InternalRow(0L, 1, 1, 1L)),
-      rsd = spark.sessionState.conf.ndvMaxError)
-
-    assert(statsAfterUpdate.sizeInBytes > statsBeforeUpdate.sizeInBytes)
-    assert(statsAfterUpdate.rowCount == Some(2))
-    StatisticsTest.checkColStat(
-      dataType = IntegerType,
-      colStat = statsAfterUpdate.colStats("key"),
-      expectedColStat = ColumnStat(InternalRow(0L, 2, 1, 2L)),
-      rsd = spark.sessionState.conf.ndvMaxError)
-  }
-
-  private lazy val (testDataFrame, expectedColStatsSeq) = {
-    import testImplicits._
-
-    val intSeq = Seq(1, 2)
-    val stringSeq = Seq("a", "bb")
-    val binarySeq = Seq("a", "bb").map(_.getBytes)
-    val booleanSeq = Seq(true, false)
-    val data = intSeq.indices.map { i =>
-      (intSeq(i), stringSeq(i), binarySeq(i), booleanSeq(i))
-    }
-    val df: DataFrame = data.toDF("c1", "c2", "c3", "c4")
-    val expectedColStatsSeq: Seq[(StructField, ColumnStat)] = df.schema.map { f =>
-      val colStat = f.dataType match {
-        case IntegerType =>
-          ColumnStat(InternalRow(0L, intSeq.max, intSeq.min, intSeq.distinct.length.toLong))
-        case StringType =>
-          ColumnStat(InternalRow(0L, stringSeq.map(_.length).sum / stringSeq.length.toDouble,
-            stringSeq.map(_.length).max.toInt, stringSeq.distinct.length.toLong))
-        case BinaryType =>
-          ColumnStat(InternalRow(0L, binarySeq.map(_.length).sum / binarySeq.length.toDouble,
-            binarySeq.map(_.length).max.toInt))
-        case BooleanType =>
-          ColumnStat(InternalRow(0L, booleanSeq.count(_.equals(true)).toLong,
-            booleanSeq.count(_.equals(false)).toLong))
-      }
-      (f, colStat)
-    }
-    (df, expectedColStatsSeq)
-  }
-
-  private def checkColStats(
-      tableName: String,
-      isDataSourceTable: Boolean,
-      expectedColStatsSeq: Seq[(StructField, ColumnStat)]): Unit = {
-    val readback = spark.table(tableName)
-    val stats = readback.queryExecution.analyzed.collect {
-      case rel: MetastoreRelation =>
-        assert(!isDataSourceTable, "Expected a Hive serde table, but got a data source table")
-        rel.catalogTable.stats.get
-      case rel: LogicalRelation =>
-        assert(isDataSourceTable, "Expected a data source table, but got a Hive serde table")
-        rel.catalogTable.get.stats.get
-    }
-    assert(stats.length == 1)
-    val columnStats = stats.head.colStats
-    assert(columnStats.size == expectedColStatsSeq.length)
-    expectedColStatsSeq.foreach { case (field, expectedColStat) =>
-      StatisticsTest.checkColStat(
-        dataType = field.dataType,
-        colStat = columnStats(field.name),
-        expectedColStat = expectedColStat,
-        rsd = spark.sessionState.conf.ndvMaxError)
-    }
-  }
-
-  test("generate and load column-level stats for data source table") {
-    val dsTable = "dsTable"
-    withTable(dsTable) {
-      testDataFrame.write.format("parquet").saveAsTable(dsTable)
-      sql(s"ANALYZE TABLE $dsTable COMPUTE STATISTICS FOR COLUMNS c1, c2, c3, c4")
-      checkColStats(dsTable, isDataSourceTable = true, expectedColStatsSeq)
-    }
-  }
-
-  test("generate and load column-level stats for hive serde table") {
-    val hTable = "hTable"
-    val tmp = "tmp"
-    withTable(hTable, tmp) {
-      testDataFrame.write.format("parquet").saveAsTable(tmp)
-      sql(s"CREATE TABLE $hTable (c1 int, c2 string, c3 binary, c4 boolean) STORED AS TEXTFILE")
-      sql(s"INSERT INTO $hTable SELECT * FROM $tmp")
-      sql(s"ANALYZE TABLE $hTable COMPUTE STATISTICS FOR COLUMNS c1, c2, c3, c4")
-      checkColStats(hTable, isDataSourceTable = false, expectedColStatsSeq)
-    }
-  }
-
-  // When caseSensitive is on, for columns with only case difference, they are different columns
-  // and we should generate column stats for all of them.
-  private def checkCaseSensitiveColStats(columnName: String): Unit = {
-    val tableName = "tbl"
-    withTable(tableName) {
-      val column1 = columnName.toLowerCase
-      val column2 = columnName.toUpperCase
-      withSQLConf("spark.sql.caseSensitive" -> "true") {
-        sql(s"CREATE TABLE $tableName (`$column1` int, `$column2` double) USING PARQUET")
-        sql(s"INSERT INTO $tableName SELECT 1, 3.0")
-        sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR COLUMNS `$column1`, `$column2`")
-        val readback = spark.table(tableName)
-        val relations = readback.queryExecution.analyzed.collect { case rel: LogicalRelation =>
-          val columnStats = rel.catalogTable.get.stats.get.colStats
-          assert(columnStats.size == 2)
-          StatisticsTest.checkColStat(
-            dataType = IntegerType,
-            colStat = columnStats(column1),
-            expectedColStat = ColumnStat(InternalRow(0L, 1, 1, 1L)),
-            rsd = spark.sessionState.conf.ndvMaxError)
-          StatisticsTest.checkColStat(
-            dataType = DoubleType,
-            colStat = columnStats(column2),
-            expectedColStat = ColumnStat(InternalRow(0L, 3.0d, 3.0d, 1L)),
-            rsd = spark.sessionState.conf.ndvMaxError)
-          rel
-        }
-        assert(relations.size == 1)
-      }
-    }
-  }
-
-  test("check column statistics for case sensitive column names") {
-    checkCaseSensitiveColStats(columnName = "c1")
-  }
-
-  test("check column statistics for case sensitive non-ascii column names") {
-    // scalastyle:off
-    // non ascii characters are not allowed in the source code, so we disable the scalastyle.
-    checkCaseSensitiveColStats(columnName = "列c")
-    // scalastyle:on
-  }
-
   test("estimates the size of a test MetastoreRelation") {
     val df = sql("""SELECT * FROM src""")
     val sizes = df.queryExecution.analyzed.collect { case mr: MetastoreRelation =>

From 835f03f344f2dea2134409d09e06b34feaae09f9 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 23 Nov 2016 12:54:18 -0500
Subject: [PATCH 0187/1204] [SPARK-18050][SQL] do not create default database
 if it already exists

## What changes were proposed in this pull request?

When we try to create the default database, we ask hive to do nothing if it already exists. However, Hive will log an error message instead of doing nothing, and the error message is quite annoying and confusing.

In this PR, we only create default database if it doesn't exist.

## How was this patch tested?

N/A

Author: Wenchen Fan <wenchen@databricks.com>

Closes #15993 from cloud-fan/default-db.

(cherry picked from commit f129ebcd302168b628f47705f4a7d6b7e7b057b0)
Signed-off-by: Andrew Or <andrewor14@gmail.com>
---
 .../scala/org/apache/spark/sql/internal/SharedState.scala | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index 6232c18b1cea8..8de95fe64e663 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -92,8 +92,12 @@ private[sql] class SharedState(val sparkContext: SparkContext) extends Logging {
   {
     val defaultDbDefinition = CatalogDatabase(
       SessionCatalog.DEFAULT_DATABASE, "default database", warehousePath, Map())
-    // Initialize default database if it doesn't already exist
-    externalCatalog.createDatabase(defaultDbDefinition, ignoreIfExists = true)
+    // Initialize default database if it doesn't exist
+    if (!externalCatalog.databaseExists(SessionCatalog.DEFAULT_DATABASE)) {
+      // There may be another Spark application creating default database at the same time, here we
+      // set `ignoreIfExists = true` to avoid `DatabaseAlreadyExists` exception.
+      externalCatalog.createDatabase(defaultDbDefinition, ignoreIfExists = true)
+    }
   }
 
   /**

From 15d2cf26427084c0398f8d9303c218f360c52bb7 Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Wed, 23 Nov 2016 11:48:59 -0800
Subject: [PATCH 0188/1204] [SPARK-18510] Fix data corruption from inferred
 partition column dataTypes

## What changes were proposed in this pull request?

### The Issue

If I specify my schema when doing
```scala
spark.read
  .schema(someSchemaWherePartitionColumnsAreStrings)
```
but if the partition inference can infer it as IntegerType or I assume LongType or DoubleType (basically fixed size types), then once UnsafeRows are generated, your data will be corrupted.

### Proposed solution

The partition handling code path is kind of a mess. In my fix I'm probably adding to the mess, but at least trying to standardize the code path.

The real issue is that a user that uses the `spark.read` code path can never clearly specify what the partition columns are. If you try to specify the fields in `schema`, we practically ignore what the user provides, and fall back to our inferred data types. What happens in the end is data corruption.

My solution tries to fix this by always trying to infer partition columns the first time you specify the table. Once we find what the partition columns are, we try to find them in the user specified schema and use the dataType provided there, or fall back to the smallest common data type.

We will ALWAYS append partition columns to the user's schema, even if they didn't ask for it. We will only use the data type they provided if they specified it. While this is confusing, this has been the behavior since Spark 1.6, and I didn't want to change this behavior in the QA period of Spark 2.1. We may revisit this decision later.

A side effect of this PR is that we won't need https://github.com/apache/spark/pull/15942 if this PR goes in.

## How was this patch tested?

Regression tests

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #15951 from brkyvz/partition-corruption.

(cherry picked from commit 0d1bf2b6c8ac4d4141d7cef0552c22e586843c57)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 R/pkg/inst/tests/testthat/test_sparkSQL.R     |   2 +-
 .../execution/datasources/DataSource.scala    | 159 ++++++++++++------
 .../sql/execution/command/DDLSuite.scala      |   2 +-
 .../sql/streaming/FileStreamSourceSuite.scala |   2 +-
 .../test/DataStreamReaderWriterSuite.scala    |  45 ++++-
 .../sql/test/DataFrameReaderWriterSuite.scala |  38 ++++-
 6 files changed, 190 insertions(+), 58 deletions(-)

diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index ee48baa59c7af..c669c2e2e26ef 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -2684,7 +2684,7 @@ test_that("Call DataFrameWriter.load() API in Java without path and check argume
   # It makes sure that we can omit path argument in read.df API and then it calls
   # DataFrameWriter.load() without path.
   expect_error(read.df(source = "json"),
-               paste("Error in loadDF : analysis error - Unable to infer schema for JSON at .",
+               paste("Error in loadDF : analysis error - Unable to infer schema for JSON.",
                      "It must be specified manually"))
   expect_error(read.df("arbitrary_path"), "Error in loadDF : analysis error - Path does not exist")
   expect_error(read.json("arbitrary_path"), "Error in json : analysis error - Path does not exist")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 84fde0bbf9268..dbc3e712332f7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -61,8 +61,12 @@ import org.apache.spark.util.Utils
  *              qualified. This option only works when reading from a [[FileFormat]].
  * @param userSpecifiedSchema An optional specification of the schema of the data. When present
  *                            we skip attempting to infer the schema.
- * @param partitionColumns A list of column names that the relation is partitioned by. When this
- *                         list is empty, the relation is unpartitioned.
+ * @param partitionColumns A list of column names that the relation is partitioned by. This list is
+ *                         generally empty during the read path, unless this DataSource is managed
+ *                         by Hive. In these cases, during `resolveRelation`, we will call
+ *                         `getOrInferFileFormatSchema` for file based DataSources to infer the
+ *                         partitioning. In other cases, if this list is empty, then this table
+ *                         is unpartitioned.
  * @param bucketSpec An optional specification for bucketing (hash-partitioning) of the data.
  * @param catalogTable Optional catalog table reference that can be used to push down operations
  *                     over the datasource to the catalog service.
@@ -84,30 +88,106 @@ case class DataSource(
   private val caseInsensitiveOptions = new CaseInsensitiveMap(options)
 
   /**
-   * Infer the schema of the given FileFormat, returns a pair of schema and partition column names.
+   * Get the schema of the given FileFormat, if provided by `userSpecifiedSchema`, or try to infer
+   * it. In the read path, only managed tables by Hive provide the partition columns properly when
+   * initializing this class. All other file based data sources will try to infer the partitioning,
+   * and then cast the inferred types to user specified dataTypes if the partition columns exist
+   * inside `userSpecifiedSchema`, otherwise we can hit data corruption bugs like SPARK-18510.
+   * This method will try to skip file scanning whether `userSpecifiedSchema` and
+   * `partitionColumns` are provided. Here are some code paths that use this method:
+   *   1. `spark.read` (no schema): Most amount of work. Infer both schema and partitioning columns
+   *   2. `spark.read.schema(userSpecifiedSchema)`: Parse partitioning columns, cast them to the
+   *     dataTypes provided in `userSpecifiedSchema` if they exist or fallback to inferred
+   *     dataType if they don't.
+   *   3. `spark.readStream.schema(userSpecifiedSchema)`: For streaming use cases, users have to
+   *     provide the schema. Here, we also perform partition inference like 2, and try to use
+   *     dataTypes in `userSpecifiedSchema`. All subsequent triggers for this stream will re-use
+   *     this information, therefore calls to this method should be very cheap, i.e. there won't
+   *     be any further inference in any triggers.
+   *   4. `df.saveAsTable(tableThatExisted)`: In this case, we call this method to resolve the
+   *     existing table's partitioning scheme. This is achieved by not providing
+   *     `userSpecifiedSchema`. For this case, we add the boolean `justPartitioning` for an early
+   *     exit, if we don't care about the schema of the original table.
+   *
+   * @param format the file format object for this DataSource
+   * @param justPartitioning Whether to exit early and provide just the schema partitioning.
+   * @return A pair of the data schema (excluding partition columns) and the schema of the partition
+   *         columns. If `justPartitioning` is `true`, then the dataSchema will be provided as
+   *         `null`.
    */
-  private def inferFileFormatSchema(format: FileFormat): (StructType, Seq[String]) = {
-    userSpecifiedSchema.map(_ -> partitionColumns).orElse {
-      val allPaths = caseInsensitiveOptions.get("path")
+  private def getOrInferFileFormatSchema(
+      format: FileFormat,
+      justPartitioning: Boolean = false): (StructType, StructType) = {
+    // the operations below are expensive therefore try not to do them if we don't need to
+    lazy val tempFileCatalog = {
+      val allPaths = caseInsensitiveOptions.get("path") ++ paths
+      val hadoopConf = sparkSession.sessionState.newHadoopConf()
       val globbedPaths = allPaths.toSeq.flatMap { path =>
         val hdfsPath = new Path(path)
-        val fs = hdfsPath.getFileSystem(sparkSession.sessionState.newHadoopConf())
+        val fs = hdfsPath.getFileSystem(hadoopConf)
         val qualified = hdfsPath.makeQualified(fs.getUri, fs.getWorkingDirectory)
         SparkHadoopUtil.get.globPathIfNecessary(qualified)
       }.toArray
-      val fileCatalog = new InMemoryFileIndex(sparkSession, globbedPaths, options, None)
-      val partitionSchema = fileCatalog.partitionSpec().partitionColumns
-      val inferred = format.inferSchema(
+      new InMemoryFileIndex(sparkSession, globbedPaths, options, None)
+    }
+    val partitionSchema = if (partitionColumns.isEmpty && catalogTable.isEmpty) {
+      // Try to infer partitioning, because no DataSource in the read path provides the partitioning
+      // columns properly unless it is a Hive DataSource
+      val resolved = tempFileCatalog.partitionSchema.map { partitionField =>
+        val equality = sparkSession.sessionState.conf.resolver
+        // SPARK-18510: try to get schema from userSpecifiedSchema, otherwise fallback to inferred
+        userSpecifiedSchema.flatMap(_.find(f => equality(f.name, partitionField.name))).getOrElse(
+          partitionField)
+      }
+      StructType(resolved)
+    } else {
+      // in streaming mode, we have already inferred and registered partition columns, we will
+      // never have to materialize the lazy val below
+      lazy val inferredPartitions = tempFileCatalog.partitionSchema
+      // maintain old behavior before SPARK-18510. If userSpecifiedSchema is empty used inferred
+      // partitioning
+      if (userSpecifiedSchema.isEmpty) {
+        inferredPartitions
+      } else {
+        val partitionFields = partitionColumns.map { partitionColumn =>
+          userSpecifiedSchema.flatMap(_.find(_.name == partitionColumn)).orElse {
+            val inferredOpt = inferredPartitions.find(_.name == partitionColumn)
+            if (inferredOpt.isDefined) {
+              logDebug(
+                s"""Type of partition column: $partitionColumn not found in specified schema
+                   |for $format.
+                   |User Specified Schema
+                   |=====================
+                   |${userSpecifiedSchema.orNull}
+                   |
+                   |Falling back to inferred dataType if it exists.
+                 """.stripMargin)
+            }
+            inferredPartitions.find(_.name == partitionColumn)
+          }.getOrElse {
+            throw new AnalysisException(s"Failed to resolve the schema for $format for " +
+              s"the partition column: $partitionColumn. It must be specified manually.")
+          }
+        }
+        StructType(partitionFields)
+      }
+    }
+    if (justPartitioning) {
+      return (null, partitionSchema)
+    }
+    val dataSchema = userSpecifiedSchema.map { schema =>
+      val equality = sparkSession.sessionState.conf.resolver
+      StructType(schema.filterNot(f => partitionSchema.exists(p => equality(p.name, f.name))))
+    }.orElse {
+      format.inferSchema(
         sparkSession,
         caseInsensitiveOptions,
-        fileCatalog.allFiles())
-
-      inferred.map { inferredSchema =>
-        StructType(inferredSchema ++ partitionSchema) -> partitionSchema.map(_.name)
-      }
+        tempFileCatalog.allFiles())
     }.getOrElse {
-      throw new AnalysisException("Unable to infer schema. It must be specified manually.")
+      throw new AnalysisException(
+        s"Unable to infer schema for $format. It must be specified manually.")
     }
+    (dataSchema, partitionSchema)
   }
 
   /** Returns the name and schema of the source that can be used to continually read data. */
@@ -144,8 +224,8 @@ case class DataSource(
               "you may be able to create a static DataFrame on that directory with " +
               "'spark.read.load(directory)' and infer schema from it.")
         }
-        val (schema, partCols) = inferFileFormatSchema(format)
-        SourceInfo(s"FileSource[$path]", schema, partCols)
+        val (schema, partCols) = getOrInferFileFormatSchema(format)
+        SourceInfo(s"FileSource[$path]", StructType(schema ++ partCols), partCols.fieldNames)
 
       case _ =>
         throw new UnsupportedOperationException(
@@ -272,7 +352,7 @@ case class DataSource(
 
         HadoopFsRelation(
           fileCatalog,
-          partitionSchema = fileCatalog.partitionSpec().partitionColumns,
+          partitionSchema = fileCatalog.partitionSchema,
           dataSchema = dataSchema,
           bucketSpec = None,
           format,
@@ -281,9 +361,10 @@ case class DataSource(
       // This is a non-streaming file based datasource.
       case (format: FileFormat, _) =>
         val allPaths = caseInsensitiveOptions.get("path") ++ paths
+        val hadoopConf = sparkSession.sessionState.newHadoopConf()
         val globbedPaths = allPaths.flatMap { path =>
           val hdfsPath = new Path(path)
-          val fs = hdfsPath.getFileSystem(sparkSession.sessionState.newHadoopConf())
+          val fs = hdfsPath.getFileSystem(hadoopConf)
           val qualified = hdfsPath.makeQualified(fs.getUri, fs.getWorkingDirectory)
           val globPath = SparkHadoopUtil.get.globPathIfNecessary(qualified)
 
@@ -291,23 +372,14 @@ case class DataSource(
             throw new AnalysisException(s"Path does not exist: $qualified")
           }
           // Sufficient to check head of the globPath seq for non-glob scenario
+          // Don't need to check once again if files exist in streaming mode
           if (checkFilesExist && !fs.exists(globPath.head)) {
             throw new AnalysisException(s"Path does not exist: ${globPath.head}")
           }
           globPath
         }.toArray
 
-        // If they gave a schema, then we try and figure out the types of the partition columns
-        // from that schema.
-        val partitionSchema = userSpecifiedSchema.map { schema =>
-          StructType(
-            partitionColumns.map { c =>
-              // TODO: Case sensitivity.
-              schema
-                  .find(_.name.toLowerCase() == c.toLowerCase())
-                  .getOrElse(throw new AnalysisException(s"Invalid partition column '$c'"))
-            })
-        }
+        val (dataSchema, inferredPartitionSchema) = getOrInferFileFormatSchema(format)
 
         val fileCatalog = if (sparkSession.sqlContext.conf.manageFilesourcePartitions &&
             catalogTable.isDefined && catalogTable.get.tracksPartitionsInCatalog) {
@@ -316,27 +388,12 @@ case class DataSource(
             catalogTable.get,
             catalogTable.get.stats.map(_.sizeInBytes.toLong).getOrElse(0L))
         } else {
-          new InMemoryFileIndex(
-            sparkSession, globbedPaths, options, partitionSchema)
-        }
-
-        val dataSchema = userSpecifiedSchema.map { schema =>
-          val equality = sparkSession.sessionState.conf.resolver
-          StructType(schema.filterNot(f => partitionColumns.exists(equality(_, f.name))))
-        }.orElse {
-          format.inferSchema(
-            sparkSession,
-            caseInsensitiveOptions,
-            fileCatalog.asInstanceOf[InMemoryFileIndex].allFiles())
-        }.getOrElse {
-          throw new AnalysisException(
-            s"Unable to infer schema for $format at ${allPaths.take(2).mkString(",")}. " +
-              "It must be specified manually")
+          new InMemoryFileIndex(sparkSession, globbedPaths, options, Some(inferredPartitionSchema))
         }
 
         HadoopFsRelation(
           fileCatalog,
-          partitionSchema = fileCatalog.partitionSchema,
+          partitionSchema = inferredPartitionSchema,
           dataSchema = dataSchema.asNullable,
           bucketSpec = bucketSpec,
           format,
@@ -384,11 +441,7 @@ case class DataSource(
         // up.  If we fail to load the table for whatever reason, ignore the check.
         if (mode == SaveMode.Append) {
           val existingPartitionColumns = Try {
-            resolveRelation()
-              .asInstanceOf[HadoopFsRelation]
-              .partitionSchema
-              .fieldNames
-              .toSeq
+            getOrInferFileFormatSchema(format, justPartitioning = true)._2.fieldNames.toList
           }.getOrElse(Seq.empty[String])
           // TODO: Case sensitivity.
           val sameColumns =
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 02d9d15684904..10843e9ba5753 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -274,7 +274,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
           pathToPartitionedTable,
           userSpecifiedSchema = Option("num int, str string"),
           userSpecifiedPartitionCols = partitionCols,
-          expectedSchema = new StructType().add("num", IntegerType).add("str", StringType),
+          expectedSchema = new StructType().add("str", StringType).add("num", IntegerType),
           expectedPartitionCols = partitionCols.map(Seq(_)).getOrElse(Seq.empty[String]))
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index a099153d2e58e..bad6642ea4058 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -282,7 +282,7 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
           createFileStreamSourceAndGetSchema(
             format = Some("json"), path = Some(src.getCanonicalPath), schema = None)
         }
-        assert("Unable to infer schema. It must be specified manually.;" === e.getMessage)
+        assert("Unable to infer schema for JSON. It must be specified manually.;" === e.getMessage)
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
index 5630464f40803..0eb95a02432fb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.sources.{StreamSinkProvider, StreamSourceProvider}
 import org.apache.spark.sql.streaming.{OutputMode, ProcessingTime, StreamingQuery, StreamTest}
-import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
+import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
 object LastOptions {
@@ -532,4 +532,47 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
     assert(e.getMessage.contains("does not support recovering"))
     assert(e.getMessage.contains("checkpoint location"))
   }
+
+  test("SPARK-18510: use user specified types for partition columns in file sources") {
+    import org.apache.spark.sql.functions.udf
+    import testImplicits._
+    withTempDir { src =>
+      val createArray = udf { (length: Long) =>
+        for (i <- 1 to length.toInt) yield i.toString
+      }
+      spark.range(4).select(createArray('id + 1) as 'ex, 'id, 'id % 4 as 'part).coalesce(1).write
+        .partitionBy("part", "id")
+        .mode("overwrite")
+        .parquet(src.toString)
+      // Specify a random ordering of the schema, partition column in the middle, etc.
+      // Also let's say that the partition columns are Strings instead of Longs.
+      // partition columns should go to the end
+      val schema = new StructType()
+        .add("id", StringType)
+        .add("ex", ArrayType(StringType))
+
+      val sdf = spark.readStream
+        .schema(schema)
+        .format("parquet")
+        .load(src.toString)
+
+      assert(sdf.schema.toList === List(
+        StructField("ex", ArrayType(StringType)),
+        StructField("part", IntegerType), // inferred partitionColumn dataType
+        StructField("id", StringType))) // used user provided partitionColumn dataType
+
+      val sq = sdf.writeStream
+        .queryName("corruption_test")
+        .format("memory")
+        .start()
+      sq.processAllAvailable()
+      checkAnswer(
+        spark.table("corruption_test"),
+        // notice how `part` is ordered before `id`
+        Row(Array("1"), 0, "0") :: Row(Array("1", "2"), 1, "1") ::
+          Row(Array("1", "2", "3"), 2, "2") :: Row(Array("1", "2", "3", "4"), 3, "3") :: Nil
+      )
+      sq.stop()
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
index a7fda01098560..e0887e0f1c7de 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
@@ -24,7 +24,7 @@ import org.scalatest.BeforeAndAfter
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.sources._
-import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
+import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
 
@@ -573,4 +573,40 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with Be
       }
     }
   }
+
+  test("SPARK-18510: use user specified types for partition columns in file sources") {
+    import org.apache.spark.sql.functions.udf
+    import testImplicits._
+    withTempDir { src =>
+      val createArray = udf { (length: Long) =>
+        for (i <- 1 to length.toInt) yield i.toString
+      }
+      spark.range(4).select(createArray('id + 1) as 'ex, 'id, 'id % 4 as 'part).coalesce(1).write
+        .partitionBy("part", "id")
+        .mode("overwrite")
+        .parquet(src.toString)
+      // Specify a random ordering of the schema, partition column in the middle, etc.
+      // Also let's say that the partition columns are Strings instead of Longs.
+      // partition columns should go to the end
+      val schema = new StructType()
+        .add("id", StringType)
+        .add("ex", ArrayType(StringType))
+      val df = spark.read
+        .schema(schema)
+        .format("parquet")
+        .load(src.toString)
+
+      assert(df.schema.toList === List(
+        StructField("ex", ArrayType(StringType)),
+        StructField("part", IntegerType), // inferred partitionColumn dataType
+        StructField("id", StringType))) // used user provided partitionColumn dataType
+
+      checkAnswer(
+        df,
+        // notice how `part` is ordered before `id`
+        Row(Array("1"), 0, "0") :: Row(Array("1", "2"), 1, "1") ::
+          Row(Array("1", "2", "3"), 2, "2") :: Row(Array("1", "2", "3", "4"), 3, "3") :: Nil
+      )
+    }
+  }
 }

From 27d81d0007f4358480148fa6f3f6b079a5431a81 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 23 Nov 2016 16:15:35 -0800
Subject: [PATCH 0189/1204] [SPARK-18510][SQL] Follow up to address comments in
 #15951

## What changes were proposed in this pull request?

This PR addressed the rest comments in #15951.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #15997 from zsxwing/SPARK-18510-follow-up.

(cherry picked from commit 223fa218e1f637f0d62332785a3bee225b65b990)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../execution/datasources/DataSource.scala    | 35 +++++++++++--------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index dbc3e712332f7..ccfc759c8fa7e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -118,8 +118,10 @@ case class DataSource(
   private def getOrInferFileFormatSchema(
       format: FileFormat,
       justPartitioning: Boolean = false): (StructType, StructType) = {
-    // the operations below are expensive therefore try not to do them if we don't need to
-    lazy val tempFileCatalog = {
+    // the operations below are expensive therefore try not to do them if we don't need to, e.g.,
+    // in streaming mode, we have already inferred and registered partition columns, we will
+    // never have to materialize the lazy val below
+    lazy val tempFileIndex = {
       val allPaths = caseInsensitiveOptions.get("path") ++ paths
       val hadoopConf = sparkSession.sessionState.newHadoopConf()
       val globbedPaths = allPaths.toSeq.flatMap { path =>
@@ -133,7 +135,7 @@ case class DataSource(
     val partitionSchema = if (partitionColumns.isEmpty && catalogTable.isEmpty) {
       // Try to infer partitioning, because no DataSource in the read path provides the partitioning
       // columns properly unless it is a Hive DataSource
-      val resolved = tempFileCatalog.partitionSchema.map { partitionField =>
+      val resolved = tempFileIndex.partitionSchema.map { partitionField =>
         val equality = sparkSession.sessionState.conf.resolver
         // SPARK-18510: try to get schema from userSpecifiedSchema, otherwise fallback to inferred
         userSpecifiedSchema.flatMap(_.find(f => equality(f.name, partitionField.name))).getOrElse(
@@ -141,17 +143,17 @@ case class DataSource(
       }
       StructType(resolved)
     } else {
-      // in streaming mode, we have already inferred and registered partition columns, we will
-      // never have to materialize the lazy val below
-      lazy val inferredPartitions = tempFileCatalog.partitionSchema
       // maintain old behavior before SPARK-18510. If userSpecifiedSchema is empty used inferred
       // partitioning
       if (userSpecifiedSchema.isEmpty) {
+        val inferredPartitions = tempFileIndex.partitionSchema
         inferredPartitions
       } else {
         val partitionFields = partitionColumns.map { partitionColumn =>
-          userSpecifiedSchema.flatMap(_.find(_.name == partitionColumn)).orElse {
-            val inferredOpt = inferredPartitions.find(_.name == partitionColumn)
+          val equality = sparkSession.sessionState.conf.resolver
+          userSpecifiedSchema.flatMap(_.find(c => equality(c.name, partitionColumn))).orElse {
+            val inferredPartitions = tempFileIndex.partitionSchema
+            val inferredOpt = inferredPartitions.find(p => equality(p.name, partitionColumn))
             if (inferredOpt.isDefined) {
               logDebug(
                 s"""Type of partition column: $partitionColumn not found in specified schema
@@ -163,7 +165,7 @@ case class DataSource(
                    |Falling back to inferred dataType if it exists.
                  """.stripMargin)
             }
-            inferredPartitions.find(_.name == partitionColumn)
+            inferredOpt
           }.getOrElse {
             throw new AnalysisException(s"Failed to resolve the schema for $format for " +
               s"the partition column: $partitionColumn. It must be specified manually.")
@@ -182,7 +184,7 @@ case class DataSource(
       format.inferSchema(
         sparkSession,
         caseInsensitiveOptions,
-        tempFileCatalog.allFiles())
+        tempFileIndex.allFiles())
     }.getOrElse {
       throw new AnalysisException(
         s"Unable to infer schema for $format. It must be specified manually.")
@@ -224,8 +226,11 @@ case class DataSource(
               "you may be able to create a static DataFrame on that directory with " +
               "'spark.read.load(directory)' and infer schema from it.")
         }
-        val (schema, partCols) = getOrInferFileFormatSchema(format)
-        SourceInfo(s"FileSource[$path]", StructType(schema ++ partCols), partCols.fieldNames)
+        val (dataSchema, partitionSchema) = getOrInferFileFormatSchema(format)
+        SourceInfo(
+          s"FileSource[$path]",
+          StructType(dataSchema ++ partitionSchema),
+          partitionSchema.fieldNames)
 
       case _ =>
         throw new UnsupportedOperationException(
@@ -379,7 +384,7 @@ case class DataSource(
           globPath
         }.toArray
 
-        val (dataSchema, inferredPartitionSchema) = getOrInferFileFormatSchema(format)
+        val (dataSchema, partitionSchema) = getOrInferFileFormatSchema(format)
 
         val fileCatalog = if (sparkSession.sqlContext.conf.manageFilesourcePartitions &&
             catalogTable.isDefined && catalogTable.get.tracksPartitionsInCatalog) {
@@ -388,12 +393,12 @@ case class DataSource(
             catalogTable.get,
             catalogTable.get.stats.map(_.sizeInBytes.toLong).getOrElse(0L))
         } else {
-          new InMemoryFileIndex(sparkSession, globbedPaths, options, Some(inferredPartitionSchema))
+          new InMemoryFileIndex(sparkSession, globbedPaths, options, Some(partitionSchema))
         }
 
         HadoopFsRelation(
           fileCatalog,
-          partitionSchema = inferredPartitionSchema,
+          partitionSchema = partitionSchema,
           dataSchema = dataSchema.asNullable,
           bucketSpec = bucketSpec,
           format,

From 04ec74f1274a164b2f72b31e2c147e042bf41bd9 Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Thu, 24 Nov 2016 05:46:05 -0800
Subject: [PATCH 0190/1204] [SPARK-18520][ML] Add missing setXXXCol methods for
 BisectingKMeansModel and GaussianMixtureModel

## What changes were proposed in this pull request?
add `setFeaturesCol` and `setPredictionCol` for BiKModel and GMModel
add `setProbabilityCol` for GMModel
## How was this patch tested?
existing tests

Author: Zheng RuiFeng <ruifengz@foxmail.com>

Closes #15957 from zhengruifeng/bikm_set.

(cherry picked from commit 2dfabec38c24174e7f747c27c7144f7738483ec1)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 .../apache/spark/ml/clustering/BisectingKMeans.scala |  8 ++++++++
 .../apache/spark/ml/clustering/GaussianMixture.scala | 12 ++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
index e6ca3aedffd9d..cf11ba37abb58 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
@@ -98,6 +98,14 @@ class BisectingKMeansModel private[ml] (
     copied.setSummary(trainingSummary).setParent(this.parent)
   }
 
+  /** @group setParam */
+  @Since("2.1.0")
+  def setFeaturesCol(value: String): this.type = set(featuresCol, value)
+
+  /** @group setParam */
+  @Since("2.1.0")
+  def setPredictionCol(value: String): this.type = set(predictionCol, value)
+
   @Since("2.0.0")
   override def transform(dataset: Dataset[_]): DataFrame = {
     transformSchema(dataset.schema, logging = true)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
index 92d0b7d085f12..19998ca44b115 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
@@ -87,6 +87,18 @@ class GaussianMixtureModel private[ml] (
     @Since("2.0.0") val gaussians: Array[MultivariateGaussian])
   extends Model[GaussianMixtureModel] with GaussianMixtureParams with MLWritable {
 
+  /** @group setParam */
+  @Since("2.1.0")
+  def setFeaturesCol(value: String): this.type = set(featuresCol, value)
+
+  /** @group setParam */
+  @Since("2.1.0")
+  def setPredictionCol(value: String): this.type = set(predictionCol, value)
+
+  /** @group setParam */
+  @Since("2.1.0")
+  def setProbabilityCol(value: String): this.type = set(probabilityCol, value)
+
   @Since("2.0.0")
   override def copy(extra: ParamMap): GaussianMixtureModel = {
     val copied = copyValues(new GaussianMixtureModel(uid, weights, gaussians), extra)

From a7f414561325a7140557562d45fecc5ccbc8d7ff Mon Sep 17 00:00:00 2001
From: Nattavut Sutyanyong <nsy.can@gmail.com>
Date: Thu, 24 Nov 2016 12:07:55 -0800
Subject: [PATCH 0191/1204] [SPARK-18578][SQL] Full outer join in correlated
 subquery returns incorrect results

## What changes were proposed in this pull request?

- Raise Analysis exception when correlated predicates exist in the descendant operators of either operand of a Full outer join in a subquery as well as in a FOJ operator itself
- Raise Analysis exception when correlated predicates exists in a Window operator (a side effect inadvertently introduced by SPARK-17348)

## How was this patch tested?

Run sql/test catalyst/test and new test cases, added to SubquerySuite, showing the reported incorrect results.

Author: Nattavut Sutyanyong <nsy.can@gmail.com>

Closes #16005 from nsyca/FOJ-incorrect.1.

(cherry picked from commit a367d5ff005884322fb8bb43a1cfa4d4bf54b31a)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 10 +++++
 .../org/apache/spark/sql/SubquerySuite.scala  | 45 +++++++++++++++++++
 2 files changed, 55 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 2918e9d158829..2d272762b384f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1017,6 +1017,10 @@ class Analyzer(
 
       // Simplify the predicates before pulling them out.
       val transformed = BooleanSimplification(sub) transformUp {
+        // WARNING:
+        // Only Filter can host correlated expressions at this time
+        // Anyone adding a new "case" below needs to add the call to
+        // "failOnOuterReference" to disallow correlated expressions in it.
         case f @ Filter(cond, child) =>
           // Find all predicates with an outer reference.
           val (correlated, local) = splitConjunctivePredicates(cond).partition(containsOuter)
@@ -1057,12 +1061,18 @@ class Analyzer(
             a
           }
         case w : Window =>
+          failOnOuterReference(w)
           failOnNonEqualCorrelatedPredicate(foundNonEqualCorrelatedPred, w)
           w
         case j @ Join(left, _, RightOuter, _) =>
           failOnOuterReference(j)
           failOnOuterReferenceInSubTree(left, "a RIGHT OUTER JOIN")
           j
+        // SPARK-18578: Do not allow any correlated predicate
+        // in a Full (Outer) Join operator and its descendants
+        case j @ Join(_, _, FullOuter, _) =>
+          failOnOuterReferenceInSubTree(j, "a FULL OUTER JOIN")
+          j
         case j @ Join(_, right, jt, _) if !jt.isInstanceOf[InnerLike] =>
           failOnOuterReference(j)
           failOnOuterReferenceInSubTree(right, "a LEFT (OUTER) JOIN")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index f1dd1c620e660..73a53944964fd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -744,4 +744,49 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
       }
     }
   }
+  // This restriction applies to
+  // the permutation of { LOJ, ROJ, FOJ } x { EXISTS, IN, scalar subquery }
+  // where correlated predicates appears in right operand of LOJ,
+  // or in left operand of ROJ, or in either operand of FOJ.
+  // The test cases below cover the representatives of the patterns
+  test("Correlated subqueries in outer joins") {
+    withTempView("t1", "t2", "t3") {
+      Seq(1).toDF("c1").createOrReplaceTempView("t1")
+      Seq(2).toDF("c1").createOrReplaceTempView("t2")
+      Seq(1).toDF("c1").createOrReplaceTempView("t3")
+
+      // Left outer join (LOJ) in IN subquery context
+      intercept[AnalysisException] {
+        sql(
+          """
+            | select t1.c1
+            | from   t1
+            | where  1 IN (select 1
+            |              from   t3 left outer join
+            |                     (select c1 from t2 where t1.c1 = 2) t2
+            |                     on t2.c1 = t3.c1)""".stripMargin).collect()
+      }
+      // Right outer join (ROJ) in EXISTS subquery context
+      intercept[AnalysisException] {
+        sql(
+          """
+            | select t1.c1
+            | from   t1
+            | where  exists (select 1
+            |                from   (select c1 from t2 where t1.c1 = 2) t2
+            |                       right outer join t3
+            |                       on t2.c1 = t3.c1)""".stripMargin).collect()
+      }
+      // SPARK-18578: Full outer join (FOJ) in scalar subquery context
+      intercept[AnalysisException] {
+        sql(
+          """
+            | select (select max(1)
+            |         from   (select c1 from  t2 where t1.c1 = 2 and t1.c1=t2.c1) t2
+            |                full join t3
+            |                on t2.c1=t3.c1)
+            | from   t1""".stripMargin).collect()
+      }
+    }
+  }
 }

From 57dbc682dfafc87076dcaafd29c637cb16ace91a Mon Sep 17 00:00:00 2001
From: uncleGen <hustyugm@gmail.com>
Date: Fri, 25 Nov 2016 09:10:17 +0000
Subject: [PATCH 0192/1204] [SPARK-18575][WEB] Keep same style: adjust the
 position of driver log links

## What changes were proposed in this pull request?

NOT BUG, just adjust the position of driver log link to keep the same style with other executors log link.

![image](https://cloud.githubusercontent.com/assets/7402327/20590092/f8bddbb8-b25b-11e6-9aaf-3b5b3073df10.png)

## How was this patch tested?
 no

Author: uncleGen <hustyugm@gmail.com>

Closes #16001 from uncleGen/SPARK-18575.

(cherry picked from commit f58a8aa20106ea36386db79a8a66f529a8da75c9)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../spark/scheduler/cluster/YarnClusterSchedulerBackend.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
index ced597bed36d9..4f3d5ebf403e0 100644
--- a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
+++ b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
@@ -55,8 +55,8 @@ private[spark] class YarnClusterSchedulerBackend(
       val baseUrl = s"$httpScheme$httpAddress/node/containerlogs/$containerId/$user"
       logDebug(s"Base URL for logs: $baseUrl")
       driverLogs = Some(Map(
-        "stderr" -> s"$baseUrl/stderr?start=-4096",
-        "stdout" -> s"$baseUrl/stdout?start=-4096"))
+        "stdout" -> s"$baseUrl/stdout?start=-4096",
+        "stderr" -> s"$baseUrl/stderr?start=-4096"))
     } catch {
       case e: Exception =>
         logInfo("Error while building AM log links, so AM" +

From a49dfa93e160d63e806f35cb6b6953367916f44b Mon Sep 17 00:00:00 2001
From: "n.fraison" <n.fraison@criteo.com>
Date: Fri, 25 Nov 2016 09:45:51 +0000
Subject: [PATCH 0193/1204] [SPARK-18119][SPARK-CORE] Namenode safemode check
 is only performed on one namenode which can stuck the startup of SparkHistory
 server

## What changes were proposed in this pull request?

Instead of using the setSafeMode method that check the first namenode used the one which permitts to check only for active NNs
## How was this patch tested?

manual tests

Please review https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark before opening a pull request.

This commit is contributed by Criteo SA under the Apache v2 licence.

Author: n.fraison <n.fraison@criteo.com>

Closes #15648 from ashangit/SPARK-18119.

(cherry picked from commit f42db0c0c1434bfcccaa70d0db55e16c4396af04)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../org/apache/spark/deploy/history/FsHistoryProvider.scala   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index ca38a47639422..8ef69b142cd15 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -663,9 +663,9 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       false
   }
 
-  // For testing.
   private[history] def isFsInSafeMode(dfs: DistributedFileSystem): Boolean = {
-    dfs.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_GET)
+    /* true to check only for Active NNs status */
+    dfs.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_GET, true)
   }
 
   /**

From 69856f28361022812d2af83128d8591694bcef4b Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Fri, 25 Nov 2016 11:27:07 +0000
Subject: [PATCH 0194/1204] [SPARK-3359][BUILD][DOCS] More changes to resolve
 javadoc 8 errors that will help unidoc/genjavadoc compatibility

## What changes were proposed in this pull request?

This PR only tries to fix things that looks pretty straightforward and were fixed in other previous PRs before.

This PR roughly fixes several things as below:

- Fix unrecognisable class and method links in javadoc by changing it from `[[..]]` to `` `...` ``

  ```
  [error] .../spark/sql/core/target/java/org/apache/spark/sql/streaming/DataStreamReader.java:226: error: reference not found
  [error]    * Loads text files and returns a {link DataFrame} whose schema starts with a string column named
  ```

- Fix an exception annotation and remove code backticks in `throws` annotation

  Currently, sbt unidoc with Java 8 complains as below:

  ```
  [error] .../java/org/apache/spark/sql/streaming/StreamingQuery.java:72: error: unexpected text
  [error]    * throws StreamingQueryException, if <code>this</code> query has terminated with an exception.
  ```

  `throws` should specify the correct class name from `StreamingQueryException,` to `StreamingQueryException` without backticks. (see [JDK-8007644](https://bugs.openjdk.java.net/browse/JDK-8007644)).

- Fix `[[http..]]` to `<a href="http..."></a>`.

  ```diff
  -   * [[https://blogs.oracle.com/java-platform-group/entry/diagnosing_tls_ssl_and_https Oracle
  -   * blog page]].
  +   * <a href="https://blogs.oracle.com/java-platform-group/entry/diagnosing_tls_ssl_and_https">
  +   * Oracle blog page</a>.
  ```

   `[[http...]]` link markdown in scaladoc is unrecognisable in javadoc.

- It seems class can't have `return` annotation. So, two cases of this were removed.

  ```
  [error] .../java/org/apache/spark/mllib/regression/IsotonicRegression.java:27: error: invalid use of return
  [error]    * return New instance of IsotonicRegression.
  ```

- Fix < to `&lt;` and > to `&gt;` according to HTML rules.

- Fix `</p>` complaint

- Exclude unrecognisable in javadoc, `constructor`, `todo` and `groupname`.

## How was this patch tested?

Manually tested by `jekyll build` with Java 7 and 8

```
java version "1.7.0_80"
Java(TM) SE Runtime Environment (build 1.7.0_80-b15)
Java HotSpot(TM) 64-Bit Server VM (build 24.80-b11, mixed mode)
```

```
java version "1.8.0_45"
Java(TM) SE Runtime Environment (build 1.8.0_45-b14)
Java HotSpot(TM) 64-Bit Server VM (build 25.45-b02, mixed mode)
```

Note: this does not yet make sbt unidoc suceed with Java 8 yet but it reduces the number of errors with Java 8.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #15999 from HyukjinKwon/SPARK-3359-errors.

(cherry picked from commit 51b1c1551d3a7147403b9e821fcc7c8f57b4824c)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../scala/org/apache/spark/SSLOptions.scala   |  4 +-
 .../apache/spark/api/java/JavaPairRDD.scala   |  6 +-
 .../org/apache/spark/api/java/JavaRDD.scala   | 10 +--
 .../spark/api/java/JavaSparkContext.scala     | 14 ++--
 .../apache/spark/io/CompressionCodec.scala    |  2 +-
 .../main/scala/org/apache/spark/rdd/RDD.scala | 18 ++---
 .../spark/security/CryptoStreamUtils.scala    |  4 +-
 .../spark/serializer/KryoSerializer.scala     |  3 +-
 .../storage/BlockReplicationPolicy.scala      |  7 +-
 .../scala/org/apache/spark/ui/UIUtils.scala   |  4 +-
 .../org/apache/spark/util/AccumulatorV2.scala |  2 +-
 .../org/apache/spark/util/RpcUtils.scala      |  2 +-
 .../org/apache/spark/util/StatCounter.scala   |  4 +-
 .../org/apache/spark/util/ThreadUtils.scala   |  6 +-
 .../scala/org/apache/spark/util/Utils.scala   | 10 +--
 .../spark/util/io/ChunkedByteBuffer.scala     |  2 +-
 .../scala/org/apache/spark/graphx/Graph.scala |  4 +-
 .../org/apache/spark/graphx/GraphLoader.scala |  2 +-
 .../spark/graphx/impl/EdgeRDDImpl.scala       |  2 +-
 .../apache/spark/graphx/lib/PageRank.scala    |  4 +-
 .../apache/spark/graphx/lib/SVDPlusPlus.scala |  3 +-
 .../spark/graphx/lib/TriangleCount.scala      |  2 +-
 .../distribution/MultivariateGaussian.scala   |  3 +-
 .../scala/org/apache/spark/ml/Predictor.scala |  2 +-
 .../spark/ml/attribute/AttributeGroup.scala   |  2 +-
 .../spark/ml/attribute/attributes.scala       |  4 +-
 .../classification/LogisticRegression.scala   | 74 +++++++++----------
 .../MultilayerPerceptronClassifier.scala      |  1 -
 .../spark/ml/classification/NaiveBayes.scala  |  8 +-
 .../RandomForestClassifier.scala              |  6 +-
 .../spark/ml/clustering/BisectingKMeans.scala | 14 ++--
 .../ml/clustering/ClusteringSummary.scala     |  2 +-
 .../spark/ml/clustering/GaussianMixture.scala |  6 +-
 .../apache/spark/ml/clustering/KMeans.scala   |  8 +-
 .../org/apache/spark/ml/clustering/LDA.scala  | 42 +++++------
 .../org/apache/spark/ml/feature/DCT.scala     |  3 +-
 .../org/apache/spark/ml/feature/MinHash.scala |  5 +-
 .../spark/ml/feature/MinMaxScaler.scala       |  4 +-
 .../ml/feature/PolynomialExpansion.scala      | 14 ++--
 .../spark/ml/feature/RandomProjection.scala   |  4 +-
 .../spark/ml/feature/StandardScaler.scala     |  4 +-
 .../spark/ml/feature/StopWordsRemover.scala   |  5 +-
 .../org/apache/spark/ml/feature/package.scala |  3 +-
 .../IterativelyReweightedLeastSquares.scala   |  7 +-
 .../spark/ml/param/shared/sharedParams.scala  | 12 +--
 .../ml/regression/AFTSurvivalRegression.scala | 27 +++----
 .../ml/regression/DecisionTreeRegressor.scala |  4 +-
 .../spark/ml/regression/GBTRegressor.scala    |  4 +-
 .../GeneralizedLinearRegression.scala         | 12 +--
 .../ml/regression/LinearRegression.scala      | 38 +++++-----
 .../ml/regression/RandomForestRegressor.scala |  5 +-
 .../ml/source/libsvm/LibSVMDataSource.scala   | 13 ++--
 .../ml/tree/impl/GradientBoostedTrees.scala   | 10 +--
 .../spark/ml/tree/impl/RandomForest.scala     |  2 +-
 .../org/apache/spark/ml/tree/treeParams.scala |  6 +-
 .../spark/ml/tuning/CrossValidator.scala      |  4 +-
 .../org/apache/spark/ml/util/ReadWrite.scala  | 10 +--
 .../mllib/classification/NaiveBayes.scala     | 28 +++----
 .../mllib/clustering/BisectingKMeans.scala    | 21 +++---
 .../clustering/BisectingKMeansModel.scala     |  4 +-
 .../mllib/clustering/GaussianMixture.scala    |  6 +-
 .../clustering/GaussianMixtureModel.scala     |  2 +-
 .../apache/spark/mllib/clustering/LDA.scala   | 24 +++---
 .../spark/mllib/clustering/LDAModel.scala     |  2 +-
 .../spark/mllib/clustering/LDAOptimizer.scala |  2 +-
 .../clustering/PowerIterationClustering.scala | 13 ++--
 .../mllib/clustering/StreamingKMeans.scala    |  4 +-
 .../mllib/evaluation/RegressionMetrics.scala  | 10 ++-
 .../org/apache/spark/mllib/fpm/FPGrowth.scala | 12 +--
 .../apache/spark/mllib/fpm/PrefixSpan.scala   |  7 +-
 .../linalg/distributed/BlockMatrix.scala      | 20 ++---
 .../linalg/distributed/CoordinateMatrix.scala |  4 +-
 .../linalg/distributed/IndexedRowMatrix.scala |  4 +-
 .../mllib/linalg/distributed/RowMatrix.scala  |  2 +-
 .../spark/mllib/optimization/Gradient.scala   | 24 +++---
 .../mllib/optimization/GradientDescent.scala  |  4 +-
 .../spark/mllib/optimization/LBFGS.scala      |  7 +-
 .../spark/mllib/optimization/NNLS.scala       |  2 +-
 .../spark/mllib/optimization/Updater.scala    |  6 +-
 .../org/apache/spark/mllib/package.scala      |  4 +-
 .../apache/spark/mllib/rdd/RDDFunctions.scala |  2 +-
 .../spark/mllib/recommendation/ALS.scala      |  7 +-
 .../MatrixFactorizationModel.scala            |  6 +-
 .../mllib/regression/IsotonicRegression.scala |  9 +--
 .../stat/MultivariateOnlineSummarizer.scala   |  7 +-
 .../apache/spark/mllib/stat/Statistics.scala  | 11 +--
 .../distribution/MultivariateGaussian.scala   |  3 +-
 .../mllib/tree/GradientBoostedTrees.scala     |  2 +-
 .../spark/mllib/tree/RandomForest.scala       |  8 +-
 .../apache/spark/mllib/tree/model/Split.scala |  2 +-
 .../org/apache/spark/mllib/util/MLUtils.scala | 10 +--
 .../spark/mllib/util/modelSaveLoad.scala      |  2 +-
 pom.xml                                       | 12 +++
 project/SparkBuild.scala                      |  5 +-
 .../main/scala/org/apache/spark/sql/Row.scala |  2 +-
 .../aggregate/CentralMomentAgg.scala          |  4 +-
 .../apache/spark/sql/types/BinaryType.scala   |  2 +-
 .../apache/spark/sql/types/BooleanType.scala  |  2 +-
 .../org/apache/spark/sql/types/ByteType.scala |  2 +-
 .../sql/types/CalendarIntervalType.scala      |  2 +-
 .../org/apache/spark/sql/types/DateType.scala |  2 +-
 .../apache/spark/sql/types/DecimalType.scala  |  4 +-
 .../apache/spark/sql/types/DoubleType.scala   |  2 +-
 .../apache/spark/sql/types/FloatType.scala    |  2 +-
 .../apache/spark/sql/types/IntegerType.scala  |  2 +-
 .../org/apache/spark/sql/types/LongType.scala |  2 +-
 .../org/apache/spark/sql/types/MapType.scala  |  2 +-
 .../org/apache/spark/sql/types/NullType.scala |  2 +-
 .../apache/spark/sql/types/ShortType.scala    |  2 +-
 .../apache/spark/sql/types/StringType.scala   |  2 +-
 .../spark/sql/types/TimestampType.scala       |  2 +-
 .../apache/spark/sql/DataFrameReader.scala    | 17 +++--
 .../spark/sql/DataFrameStatFunctions.scala    | 16 ++--
 .../apache/spark/sql/DataFrameWriter.scala    |  4 +-
 .../org/apache/spark/sql/SQLContext.scala     | 62 ++++++++--------
 .../sql/execution/stat/FrequentItems.scala    |  3 +-
 .../sql/execution/stat/StatFunctions.scala    |  4 +-
 .../spark/sql/expressions/Aggregator.scala    |  8 +-
 .../sql/expressions/UserDefinedFunction.scala |  2 +-
 .../apache/spark/sql/expressions/Window.scala | 16 ++--
 .../spark/sql/expressions/WindowSpec.scala    | 16 ++--
 .../sql/expressions/scalalang/typed.scala     |  2 +-
 .../apache/spark/sql/expressions/udaf.scala   | 24 +++---
 .../apache/spark/sql/jdbc/JdbcDialects.scala  |  6 +-
 .../sql/streaming/DataStreamReader.scala      | 20 ++---
 .../sql/streaming/DataStreamWriter.scala      |  8 +-
 .../spark/sql/streaming/StreamingQuery.scala  | 10 ++-
 .../sql/streaming/StreamingQueryManager.scala |  8 +-
 .../sql/util/QueryExecutionListener.scala     |  2 +-
 .../hive/execution/InsertIntoHiveTable.scala  |  4 +-
 .../spark/sql/hive/orc/OrcFileFormat.scala    |  4 +-
 .../spark/sql/hive/orc/OrcFileOperator.scala  |  2 +-
 132 files changed, 558 insertions(+), 499 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SSLOptions.scala b/core/src/main/scala/org/apache/spark/SSLOptions.scala
index be19179b00a49..5f14102c3c366 100644
--- a/core/src/main/scala/org/apache/spark/SSLOptions.scala
+++ b/core/src/main/scala/org/apache/spark/SSLOptions.scala
@@ -150,8 +150,8 @@ private[spark] object SSLOptions extends Logging {
    * $ - `[ns].enabledAlgorithms` - a comma separated list of ciphers
    *
    * For a list of protocols and ciphers supported by particular Java versions, you may go to
-   * [[https://blogs.oracle.com/java-platform-group/entry/diagnosing_tls_ssl_and_https Oracle
-   * blog page]].
+   * <a href="https://blogs.oracle.com/java-platform-group/entry/diagnosing_tls_ssl_and_https">
+   * Oracle blog page</a>.
    *
    * You can optionally specify the default configuration. If you do, for each setting which is
    * missing in SparkConf, the corresponding setting is used from the default configuration.
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
index bff5a29bb60f1..d7e3a1b1be48c 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
@@ -405,7 +405,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * partitioning of the resulting key-value pair RDD by passing a Partitioner.
    *
    * @note If you are grouping in order to perform an aggregation (such as a sum or average) over
-   * each key, using [[JavaPairRDD.reduceByKey]] or [[JavaPairRDD.combineByKey]]
+   * each key, using `JavaPairRDD.reduceByKey` or `JavaPairRDD.combineByKey`
    * will provide much better performance.
    */
   def groupByKey(partitioner: Partitioner): JavaPairRDD[K, JIterable[V]] =
@@ -416,7 +416,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * resulting RDD with into `numPartitions` partitions.
    *
    * @note If you are grouping in order to perform an aggregation (such as a sum or average) over
-   * each key, using [[JavaPairRDD.reduceByKey]] or [[JavaPairRDD.combineByKey]]
+   * each key, using `JavaPairRDD.reduceByKey` or `JavaPairRDD.combineByKey`
    * will provide much better performance.
    */
   def groupByKey(numPartitions: Int): JavaPairRDD[K, JIterable[V]] =
@@ -546,7 +546,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * resulting RDD with the existing partitioner/parallelism level.
    *
    * @note If you are grouping in order to perform an aggregation (such as a sum or average) over
-   * each key, using [[JavaPairRDD.reduceByKey]] or [[JavaPairRDD.combineByKey]]
+   * each key, using `JavaPairRDD.reduceByKey` or `JavaPairRDD.combineByKey`
    * will provide much better performance.
    */
   def groupByKey(): JavaPairRDD[K, JIterable[V]] =
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
index ccd94f876e0b8..a20d264be5afd 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
@@ -103,10 +103,10 @@ class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: ClassTag[T])
    * @param withReplacement can elements be sampled multiple times (replaced when sampled out)
    * @param fraction expected size of the sample as a fraction of this RDD's size
    *  without replacement: probability that each element is chosen; fraction must be [0, 1]
-   *  with replacement: expected number of times each element is chosen; fraction must be >= 0
+   *  with replacement: expected number of times each element is chosen; fraction must be &gt;= 0
    *
    * @note This is NOT guaranteed to provide exactly the fraction of the count
-   * of the given [[RDD]].
+   * of the given `RDD`.
    */
   def sample(withReplacement: Boolean, fraction: Double): JavaRDD[T] =
     sample(withReplacement, fraction, Utils.random.nextLong)
@@ -117,11 +117,11 @@ class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: ClassTag[T])
    * @param withReplacement can elements be sampled multiple times (replaced when sampled out)
    * @param fraction expected size of the sample as a fraction of this RDD's size
    *  without replacement: probability that each element is chosen; fraction must be [0, 1]
-   *  with replacement: expected number of times each element is chosen; fraction must be >= 0
+   *  with replacement: expected number of times each element is chosen; fraction must be &gt;= 0
    * @param seed seed for the random number generator
    *
    * @note This is NOT guaranteed to provide exactly the fraction of the count
-   * of the given [[RDD]].
+   * of the given `RDD`.
    */
   def sample(withReplacement: Boolean, fraction: Double, seed: Long): JavaRDD[T] =
     wrapRDD(rdd.sample(withReplacement, fraction, seed))
@@ -167,7 +167,7 @@ class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: ClassTag[T])
    * Return an RDD with the elements from `this` that are not in `other`.
    *
    * Uses `this` partitioner/partition size, because even if `other` is huge, the resulting
-   * RDD will be <= us.
+   * RDD will be &lt;= us.
    */
   def subtract(other: JavaRDD[T]): JavaRDD[T] = wrapRDD(rdd.subtract(other))
 
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
index 38d347aeab8c6..9481156bc93a5 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
@@ -238,7 +238,9 @@ class JavaSparkContext(val sc: SparkContext)
    * }}}
    *
    * Do
-   * `JavaPairRDD<String, byte[]> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`,
+   * {{{
+   *   JavaPairRDD<String, byte[]> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")
+   * }}}
    *
    * then `rdd` contains
    * {{{
@@ -270,7 +272,9 @@ class JavaSparkContext(val sc: SparkContext)
    * }}}
    *
    * Do
-   * `JavaPairRDD<String, byte[]> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`,
+   * {{{
+   *   JavaPairRDD<String, byte[]> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")
+   * }}},
    *
    * then `rdd` contains
    * {{{
@@ -749,7 +753,7 @@ class JavaSparkContext(val sc: SparkContext)
 
   /**
    * Get a local property set in this thread, or null if it is missing. See
-   * [[org.apache.spark.api.java.JavaSparkContext.setLocalProperty]].
+   * `org.apache.spark.api.java.JavaSparkContext.setLocalProperty`.
    */
   def getLocalProperty(key: String): String = sc.getLocalProperty(key)
 
@@ -769,7 +773,7 @@ class JavaSparkContext(val sc: SparkContext)
    * Application programmers can use this method to group all those jobs together and give a
    * group description. Once set, the Spark web UI will associate such jobs with this group.
    *
-   * The application can also use [[org.apache.spark.api.java.JavaSparkContext.cancelJobGroup]]
+   * The application can also use `org.apache.spark.api.java.JavaSparkContext.cancelJobGroup`
    * to cancel all running jobs in this group. For example,
    * {{{
    * // In the main thread:
@@ -802,7 +806,7 @@ class JavaSparkContext(val sc: SparkContext)
 
   /**
    * Cancel active jobs for the specified group. See
-   * [[org.apache.spark.api.java.JavaSparkContext.setJobGroup]] for more information.
+   * `org.apache.spark.api.java.JavaSparkContext.setJobGroup` for more information.
    */
   def cancelJobGroup(groupId: String): Unit = sc.cancelJobGroup(groupId)
 
diff --git a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
index 6ba79e506a648..2e991ce394c42 100644
--- a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
+++ b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
@@ -172,7 +172,7 @@ private final object SnappyCompressionCodec {
 }
 
 /**
- * Wrapper over [[SnappyOutputStream]] which guards against write-after-close and double-close
+ * Wrapper over `SnappyOutputStream` which guards against write-after-close and double-close
  * issues. See SPARK-7660 for more details. This wrapping can be removed if we upgrade to a version
  * of snappy-java that contains the fix for https://github.com/xerial/snappy-java/issues/107.
  */
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index bff2b8f1d06c9..8e673447581cf 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -70,8 +70,8 @@ import org.apache.spark.util.random.{BernoulliCellSampler, BernoulliSampler, Poi
  * All of the scheduling and execution in Spark is done based on these methods, allowing each RDD
  * to implement its own way of computing itself. Indeed, users can implement custom RDDs (e.g. for
  * reading data from a new storage system) by overriding these functions. Please refer to the
- * [[http://people.csail.mit.edu/matei/papers/2012/nsdi_spark.pdf Spark paper]] for more details
- * on RDD internals.
+ * <a href="http://people.csail.mit.edu/matei/papers/2012/nsdi_spark.pdf">Spark paper</a>
+ * for more details on RDD internals.
  */
 abstract class RDD[T: ClassTag](
     @transient private var _sc: SparkContext,
@@ -469,7 +469,7 @@ abstract class RDD[T: ClassTag](
    * @param withReplacement can elements be sampled multiple times (replaced when sampled out)
    * @param fraction expected size of the sample as a fraction of this RDD's size
    *  without replacement: probability that each element is chosen; fraction must be [0, 1]
-   *  with replacement: expected number of times each element is chosen; fraction must be >= 0
+   *  with replacement: expected number of times each element is chosen; fraction must be &gt;= 0
    * @param seed seed for the random number generator
    *
    * @note This is NOT guaranteed to provide exactly the fraction of the count
@@ -675,8 +675,8 @@ abstract class RDD[T: ClassTag](
    * may even differ each time the resulting RDD is evaluated.
    *
    * @note This operation may be very expensive. If you are grouping in order to perform an
-   * aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]]
-   * or [[PairRDDFunctions.reduceByKey]] will provide much better performance.
+   * aggregation (such as a sum or average) over each key, using `PairRDDFunctions.aggregateByKey`
+   * or `PairRDDFunctions.reduceByKey` will provide much better performance.
    */
   def groupBy[K](f: T => K)(implicit kt: ClassTag[K]): RDD[(K, Iterable[T])] = withScope {
     groupBy[K](f, defaultPartitioner(this))
@@ -688,8 +688,8 @@ abstract class RDD[T: ClassTag](
    * may even differ each time the resulting RDD is evaluated.
    *
    * @note This operation may be very expensive. If you are grouping in order to perform an
-   * aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]]
-   * or [[PairRDDFunctions.reduceByKey]] will provide much better performance.
+   * aggregation (such as a sum or average) over each key, using `PairRDDFunctions.aggregateByKey`
+   * or `PairRDDFunctions.reduceByKey` will provide much better performance.
    */
   def groupBy[K](
       f: T => K,
@@ -703,8 +703,8 @@ abstract class RDD[T: ClassTag](
    * may even differ each time the resulting RDD is evaluated.
    *
    * @note This operation may be very expensive. If you are grouping in order to perform an
-   * aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]]
-   * or [[PairRDDFunctions.reduceByKey]] will provide much better performance.
+   * aggregation (such as a sum or average) over each key, using `PairRDDFunctions.aggregateByKey`
+   * or `PairRDDFunctions.reduceByKey` will provide much better performance.
    */
   def groupBy[K](f: T => K, p: Partitioner)(implicit kt: ClassTag[K], ord: Ordering[K] = null)
       : RDD[(K, Iterable[T])] = withScope {
diff --git a/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala b/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala
index 8f15f50bee814..f41fc38be2080 100644
--- a/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala
+++ b/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala
@@ -46,7 +46,7 @@ private[spark] object CryptoStreamUtils extends Logging {
   val COMMONS_CRYPTO_CONF_PREFIX = "commons.crypto."
 
   /**
-   * Helper method to wrap [[OutputStream]] with [[CryptoOutputStream]] for encryption.
+   * Helper method to wrap `OutputStream` with `CryptoOutputStream` for encryption.
    */
   def createCryptoOutputStream(
       os: OutputStream,
@@ -62,7 +62,7 @@ private[spark] object CryptoStreamUtils extends Logging {
   }
 
   /**
-   * Helper method to wrap [[InputStream]] with [[CryptoInputStream]] for decryption.
+   * Helper method to wrap `InputStream` with `CryptoInputStream` for decryption.
    */
   def createCryptoInputStream(
       is: InputStream,
diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
index 19e020c968a9a..7eb2da1c2748c 100644
--- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
@@ -43,7 +43,8 @@ import org.apache.spark.util.{BoundedPriorityQueue, SerializableConfiguration, S
 import org.apache.spark.util.collection.CompactBuffer
 
 /**
- * A Spark serializer that uses the [[https://code.google.com/p/kryo/ Kryo serialization library]].
+ * A Spark serializer that uses the <a href="https://code.google.com/p/kryo/">
+ * Kryo serialization library</a>.
  *
  * @note This serializer is not guaranteed to be wire-compatible across different versions of
  * Spark. It is intended to be used to serialize/de-serialize data within a single
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockReplicationPolicy.scala b/core/src/main/scala/org/apache/spark/storage/BlockReplicationPolicy.scala
index bf087af16a5b1..bb8a684b4c7a8 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockReplicationPolicy.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockReplicationPolicy.scala
@@ -89,17 +89,18 @@ class RandomBlockReplicationPolicy
     prioritizedPeers
   }
 
+  // scalastyle:off line.size.limit
   /**
    * Uses sampling algorithm by Robert Floyd. Finds a random sample in O(n) while
-   * minimizing space usage
-   * [[http://math.stackexchange.com/questions/178690/
-   * whats-the-proof-of-correctness-for-robert-floyds-algorithm-for-selecting-a-sin]]
+   * minimizing space usage. Please see <a href="http://math.stackexchange.com/questions/178690/whats-the-proof-of-correctness-for-robert-floyds-algorithm-for-selecting-a-sin">
+   * here</a>.
    *
    * @param n total number of indices
    * @param m number of samples needed
    * @param r random number generator
    * @return list of m random unique indices
    */
+  // scalastyle:on line.size.limit
   private def getSampleIds(n: Int, m: Int, r: Random): List[Int] = {
     val indices = (n - m + 1 to n).foldLeft(Set.empty[Int]) {case (set, i) =>
       val t = r.nextInt(i) + 1
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index 57f6f2f0a9be5..dbeb970c81dfe 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -422,8 +422,8 @@ private[spark] object UIUtils extends Logging {
    * the whole string will rendered as a simple escaped text.
    *
    * Note: In terms of security, only anchor tags with root relative links are supported. So any
-   * attempts to embed links outside Spark UI, or other tags like <script> will cause in the whole
-   * description to be treated as plain text.
+   * attempts to embed links outside Spark UI, or other tags like &lt;script&gt; will cause in
+   * the whole description to be treated as plain text.
    *
    * @param desc        the original job or stage description string, which may contain html tags.
    * @param basePathUri with which to prepend the relative links; this is used when plainText is
diff --git a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
index 1326f0977c241..00e0cf257cd4a 100644
--- a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
+++ b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
@@ -224,7 +224,7 @@ private[spark] object AccumulatorContext {
    * Registers an [[AccumulatorV2]] created on the driver such that it can be used on the executors.
    *
    * All accumulators registered here can later be used as a container for accumulating partial
-   * values across multiple tasks. This is what [[org.apache.spark.scheduler.DAGScheduler]] does.
+   * values across multiple tasks. This is what `org.apache.spark.scheduler.DAGScheduler` does.
    * Note: if an accumulator is registered here, it should also be registered with the active
    * context cleaner for cleanup so as to avoid memory leaks.
    *
diff --git a/core/src/main/scala/org/apache/spark/util/RpcUtils.scala b/core/src/main/scala/org/apache/spark/util/RpcUtils.scala
index e3b588374ce1a..46a5cb2cff5a5 100644
--- a/core/src/main/scala/org/apache/spark/util/RpcUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/RpcUtils.scala
@@ -23,7 +23,7 @@ import org.apache.spark.rpc.{RpcAddress, RpcEndpointRef, RpcEnv, RpcTimeout}
 private[spark] object RpcUtils {
 
   /**
-   * Retrieve a [[RpcEndpointRef]] which is located in the driver via its name.
+   * Retrieve a `RpcEndpointRef` which is located in the driver via its name.
    */
   def makeDriverRef(name: String, conf: SparkConf, rpcEnv: RpcEnv): RpcEndpointRef = {
     val driverHost: String = conf.get("spark.driver.host", "localhost")
diff --git a/core/src/main/scala/org/apache/spark/util/StatCounter.scala b/core/src/main/scala/org/apache/spark/util/StatCounter.scala
index 45381365f1e52..1e02638591f8b 100644
--- a/core/src/main/scala/org/apache/spark/util/StatCounter.scala
+++ b/core/src/main/scala/org/apache/spark/util/StatCounter.scala
@@ -22,8 +22,8 @@ import org.apache.spark.annotation.Since
 /**
  * A class for tracking the statistics of a set of numbers (count, mean and variance) in a
  * numerically robust way. Includes support for merging two StatCounters. Based on Welford
- * and Chan's [[http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance algorithms]]
- * for running variance.
+ * and Chan's <a href="http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance">
+ * algorithms</a> for running variance.
  *
  * @constructor Initialize the StatCounter with the given values.
  */
diff --git a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
index d093e7bfc3dac..60a6e82c6f90d 100644
--- a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
@@ -180,8 +180,8 @@ private[spark] object ThreadUtils {
 
   // scalastyle:off awaitresult
   /**
-   * Preferred alternative to [[Await.result()]]. This method wraps and re-throws any exceptions
-   * thrown by the underlying [[Await]] call, ensuring that this thread's stack trace appears in
+   * Preferred alternative to `Await.result()`. This method wraps and re-throws any exceptions
+   * thrown by the underlying `Await` call, ensuring that this thread's stack trace appears in
    * logs.
    */
   @throws(classOf[SparkException])
@@ -196,7 +196,7 @@ private[spark] object ThreadUtils {
   }
 
   /**
-   * Calls [[Awaitable.result]] directly to avoid using `ForkJoinPool`'s `BlockingContext`, wraps
+   * Calls `Awaitable.result` directly to avoid using `ForkJoinPool`'s `BlockingContext`, wraps
    * and re-throws any exceptions with nice stack track.
    *
    * Codes running in the user's thread may be in a thread of Scala ForkJoinPool. As concurrent
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index a2386d6b9e12f..acad2fdf733c8 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -1673,8 +1673,8 @@ private[spark] object Utils extends Logging {
   }
 
   /**
-   * NaN-safe version of [[java.lang.Double.compare()]] which allows NaN values to be compared
-   * according to semantics where NaN == NaN and NaN > any non-NaN double.
+   * NaN-safe version of `java.lang.Double.compare()` which allows NaN values to be compared
+   * according to semantics where NaN == NaN and NaN &gt; any non-NaN double.
    */
   def nanSafeCompareDoubles(x: Double, y: Double): Int = {
     val xIsNan: Boolean = java.lang.Double.isNaN(x)
@@ -1687,8 +1687,8 @@ private[spark] object Utils extends Logging {
   }
 
   /**
-   * NaN-safe version of [[java.lang.Float.compare()]] which allows NaN values to be compared
-   * according to semantics where NaN == NaN and NaN > any non-NaN float.
+   * NaN-safe version of `java.lang.Float.compare()` which allows NaN values to be compared
+   * according to semantics where NaN == NaN and NaN &gt; any non-NaN float.
    */
   def nanSafeCompareFloats(x: Float, y: Float): Int = {
     val xIsNan: Boolean = java.lang.Float.isNaN(x)
@@ -2340,7 +2340,7 @@ private[spark] object Utils extends Logging {
    * A spark url (`spark://host:port`) is a special URI that its scheme is `spark` and only contains
    * host and port.
    *
-   * @throws SparkException if `sparkUrl` is invalid.
+   * @note Throws `SparkException` if sparkUrl is invalid.
    */
   def extractHostPortFromSparkUrl(sparkUrl: String): (String, Int) = {
     try {
diff --git a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala
index 89b0874e3865a..da08661d137d0 100644
--- a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala
+++ b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala
@@ -148,7 +148,7 @@ private[spark] class ChunkedByteBuffer(var chunks: Array[ByteBuffer]) {
 /**
  * Reads data from a ChunkedByteBuffer.
  *
- * @param dispose if true, [[ChunkedByteBuffer.dispose()]] will be called at the end of the stream
+ * @param dispose if true, `ChunkedByteBuffer.dispose()` will be called at the end of the stream
  *                in order to close any memory-mapped files which back the buffer.
  */
 private class ChunkedByteBufferInputStream(
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
index 922ec7955fd6d..c55a5885ba805 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
@@ -54,8 +54,8 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
    *
    * @return an RDD containing the edges in this graph
    *
-   * @see [[Edge]] for the edge type.
-   * @see [[Graph#triplets]] to get an RDD which contains all the edges
+   * @see `Edge` for the edge type.
+   * @see `Graph#triplets` to get an RDD which contains all the edges
    * along with their vertex data.
    *
    */
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
index f678e5f1238fb..add21f41ea3ba 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
@@ -32,7 +32,7 @@ object GraphLoader extends Logging {
    * id and a target id. Skips lines that begin with `#`.
    *
    * If desired the edges can be automatically oriented in the positive
-   * direction (source Id < target Id) by setting `canonicalOrientation` to
+   * direction (source Id &lt; target Id) by setting `canonicalOrientation` to
    * true.
    *
    * @example Loads a file in the following format:
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
index 98e082cc44e1a..faa985594ec08 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
@@ -41,7 +41,7 @@ class EdgeRDDImpl[ED: ClassTag, VD: ClassTag] private[graphx] (
 
   /**
    * If `partitionsRDD` already has a partitioner, use it. Otherwise assume that the
-   * [[PartitionID]]s in `partitionsRDD` correspond to the actual partitions and create a new
+   * `PartitionID`s in `partitionsRDD` correspond to the actual partitions and create a new
    * partitioner that allows co-partitioning with `partitionsRDD`.
    */
   override val partitioner =
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
index f926984aa6335..feb3f47667f8c 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
@@ -28,7 +28,7 @@ import org.apache.spark.ml.linalg.{Vector, Vectors}
 /**
  * PageRank algorithm implementation. There are two implementations of PageRank implemented.
  *
- * The first implementation uses the standalone [[Graph]] interface and runs PageRank
+ * The first implementation uses the standalone `Graph` interface and runs PageRank
  * for a fixed number of iterations:
  * {{{
  * var PR = Array.fill(n)( 1.0 )
@@ -41,7 +41,7 @@ import org.apache.spark.ml.linalg.{Vector, Vectors}
  * }
  * }}}
  *
- * The second implementation uses the [[Pregel]] interface and runs PageRank until
+ * The second implementation uses the `Pregel` interface and runs PageRank until
  * convergence:
  *
  * {{{
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
index bb2ffab0f60f8..59fdd855e6f37 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
@@ -42,7 +42,8 @@ object SVDPlusPlus {
   /**
    * Implement SVD++ based on "Factorization Meets the Neighborhood:
    * a Multifaceted Collaborative Filtering Model",
-   * available at [[http://public.research.att.com/~volinsky/netflix/kdd08koren.pdf]].
+   * available at <a href="http://public.research.att.com/~volinsky/netflix/kdd08koren.pdf">
+   * here</a>.
    *
    * The prediction rule is rui = u + bu + bi + qi*(pu + |N(u)|^^-0.5^^*sum(y)),
    * see the details on page 6.
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
index 34e9e22c3a35a..21b22968a1a69 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
@@ -36,7 +36,7 @@ import org.apache.spark.graphx._
  * self cycles and canonicalizes the graph to ensure that the following conditions hold:
  * <ul>
  * <li> There are no self edges</li>
- * <li> All edges are oriented src > dst</li>
+ * <li> All edges are oriented src &gt; dst</li>
  * <li> There are no duplicate edges</li>
  * </ul>
  * However, the canonicalization procedure is costly as it requires repartitioning the graph.
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussian.scala b/mllib-local/src/main/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussian.scala
index 0be28677eff31..3167e0c286d47 100644
--- a/mllib-local/src/main/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussian.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussian.scala
@@ -28,7 +28,8 @@ import org.apache.spark.ml.linalg.{Matrices, Matrix, Vector, Vectors}
  * This class provides basic functionality for a Multivariate Gaussian (Normal) Distribution. In
  * the event that the covariance matrix is singular, the density will be computed in a
  * reduced dimensional subspace under which the distribution is supported.
- * (see [[http://en.wikipedia.org/wiki/Multivariate_normal_distribution#Degenerate_case]])
+ * (see <a href="http://en.wikipedia.org/wiki/Multivariate_normal_distribution#Degenerate_case">
+ * here</a>)
  *
  * @param mean The mean vector of the distribution
  * @param cov The covariance matrix of the distribution
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
index aa92edde7acd1..4b43a3aa5b700 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
@@ -59,7 +59,7 @@ private[ml] trait PredictorParams extends Params
 /**
  * :: DeveloperApi ::
  * Abstraction for prediction problems (regression and classification). It accepts all NumericType
- * labels and will automatically cast it to DoubleType in [[fit()]].
+ * labels and will automatically cast it to DoubleType in `fit()`.
  *
  * @tparam FeaturesType  Type of features.
  *                       E.g., [[org.apache.spark.mllib.linalg.VectorUDT]] for vector features.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala
index 12b9732a4c3d2..527cb2d547b63 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala
@@ -239,7 +239,7 @@ object AttributeGroup {
     }
   }
 
-  /** Creates an attribute group from a [[StructField]] instance. */
+  /** Creates an attribute group from a `StructField` instance. */
   def fromStructField(field: StructField): AttributeGroup = {
     require(field.dataType == new VectorUDT)
     if (field.metadata.contains(ML_ATTR)) {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
index 27554acdf3c26..cc7e8bc301ad3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
@@ -98,7 +98,7 @@ sealed abstract class Attribute extends Serializable {
   def toMetadata(): Metadata = toMetadata(Metadata.empty)
 
   /**
-   * Converts to a [[StructField]] with some existing metadata.
+   * Converts to a `StructField` with some existing metadata.
    * @param existingMetadata existing metadata to carry over
    */
   def toStructField(existingMetadata: Metadata): StructField = {
@@ -109,7 +109,7 @@ sealed abstract class Attribute extends Serializable {
     StructField(name.get, DoubleType, nullable = false, newMetadata)
   }
 
-  /** Converts to a [[StructField]]. */
+  /** Converts to a `StructField`. */
   def toStructField(): StructField = toStructField(Metadata.empty)
 
   override def toString: String = toMetadataImpl(withType = true).toString
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index d07b4adebb08f..fe29926e0d994 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -56,13 +56,13 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
   /**
    * Set threshold in binary classification, in range [0, 1].
    *
-   * If the estimated probability of class label 1 is > threshold, then predict 1, else 0.
+   * If the estimated probability of class label 1 is &gt; threshold, then predict 1, else 0.
    * A high threshold encourages the model to predict 0 more often;
    * a low threshold encourages the model to predict 1 more often.
    *
    * Note: Calling this with threshold p is equivalent to calling `setThresholds(Array(1-p, p))`.
-   *       When [[setThreshold()]] is called, any user-set value for [[thresholds]] will be cleared.
-   *       If both [[threshold]] and [[thresholds]] are set in a ParamMap, then they must be
+   *       When `setThreshold()` is called, any user-set value for `thresholds` will be cleared.
+   *       If both `threshold` and `thresholds` are set in a ParamMap, then they must be
    *       equivalent.
    *
    * Default is 0.5.
@@ -101,12 +101,12 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
   /**
    * Get threshold for binary classification.
    *
-   * If [[thresholds]] is set with length 2 (i.e., binary classification),
+   * If `thresholds` is set with length 2 (i.e., binary classification),
    * this returns the equivalent threshold: {{{1 / (1 + thresholds(0) / thresholds(1))}}}.
-   * Otherwise, returns [[threshold]] if set, or its default value if unset.
+   * Otherwise, returns `threshold` if set, or its default value if unset.
    *
    * @group getParam
-   * @throws IllegalArgumentException if [[thresholds]] is set to an array of length other than 2.
+   * @throws IllegalArgumentException if `thresholds` is set to an array of length other than 2.
    */
   override def getThreshold: Double = {
     checkThresholdConsistency()
@@ -122,13 +122,13 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
 
   /**
    * Set thresholds in multiclass (or binary) classification to adjust the probability of
-   * predicting each class. Array must have length equal to the number of classes, with values > 0,
-   * excepting that at most one value may be 0.
+   * predicting each class. Array must have length equal to the number of classes,
+   * with values &gt; 0, excepting that at most one value may be 0.
    * The class with largest value p/t is predicted, where p is the original probability of that
    * class and t is the class's threshold.
    *
-   * Note: When [[setThresholds()]] is called, any user-set value for [[threshold]] will be cleared.
-   *       If both [[threshold]] and [[thresholds]] are set in a ParamMap, then they must be
+   * Note: When `setThresholds()` is called, any user-set value for `threshold` will be cleared.
+   *       If both `threshold` and `thresholds` are set in a ParamMap, then they must be
    *       equivalent.
    *
    * @group setParam
@@ -141,8 +141,8 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
   /**
    * Get thresholds for binary or multiclass classification.
    *
-   * If [[thresholds]] is set, return its value.
-   * Otherwise, if [[threshold]] is set, return the equivalent thresholds for binary
+   * If `thresholds` is set, return its value.
+   * Otherwise, if `threshold` is set, return the equivalent thresholds for binary
    * classification: (1-threshold, threshold).
    * If neither are set, throw an exception.
    *
@@ -159,9 +159,9 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
   }
 
   /**
-   * If [[threshold]] and [[thresholds]] are both set, ensures they are consistent.
+   * If `threshold` and `thresholds` are both set, ensures they are consistent.
    *
-   * @throws IllegalArgumentException if [[threshold]] and [[thresholds]] are not equivalent
+   * @throws IllegalArgumentException if `threshold` and `thresholds` are not equivalent
    */
   protected def checkThresholdConsistency(): Unit = {
     if (isSet(threshold) && isSet(thresholds)) {
@@ -207,7 +207,7 @@ class LogisticRegression @Since("1.2.0") (
   /**
    * Set the ElasticNet mixing parameter.
    * For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.
-   * For 0 < alpha < 1, the penalty is a combination of L1 and L2.
+   * For 0 &lt; alpha &lt; 1, the penalty is a combination of L1 and L2.
    * Default is 0.0 which is an L2 penalty.
    *
    * @group setParam
@@ -294,7 +294,7 @@ class LogisticRegression @Since("1.2.0") (
   override def getThresholds: Array[Double] = super.getThresholds
 
   /**
-   * Suggested depth for treeAggregate (>= 2).
+   * Suggested depth for treeAggregate (&gt;= 2).
    * If the dimensions of features or the number of partitions are large,
    * this param could be adjusted to a larger size.
    * Default is 2.
@@ -815,7 +815,7 @@ class LogisticRegressionModel private[spark] (
 
   /**
    * Predict label for the given feature vector.
-   * The behavior of this can be adjusted using [[thresholds]].
+   * The behavior of this can be adjusted using `thresholds`.
    */
   override protected def predict(features: Vector): Double = if (isMultinomial) {
     super.predict(features)
@@ -1274,7 +1274,7 @@ class BinaryLogisticRegressionSummary private[classification] (
  *
  * The probability of the multinomial outcome $y$ taking on any of the K possible outcomes is:
  *
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    P(y_i=0|\vec{x}_i, \beta) = \frac{e^{\vec{x}_i^T \vec{\beta}_0}}{\sum_{k=0}^{K-1}
  *       e^{\vec{x}_i^T \vec{\beta}_k}} \\
@@ -1283,7 +1283,7 @@ class BinaryLogisticRegressionSummary private[classification] (
  *    P(y_i=K-1|\vec{x}_i, \beta) = \frac{e^{\vec{x}_i^T \vec{\beta}_{K-1}}\,}{\sum_{k=0}^{K-1}
  *       e^{\vec{x}_i^T \vec{\beta}_k}}
  *    $$
- * </blockquote></p>
+ * </blockquote>
  *
  * The model coefficients $\beta = (\beta_0, \beta_1, \beta_2, ..., \beta_{K-1})$ become a matrix
  * which has dimension of $K \times (N+1)$ if the intercepts are added. If the intercepts are not
@@ -1292,7 +1292,7 @@ class BinaryLogisticRegressionSummary private[classification] (
  * Note that the coefficients in the model above lack identifiability. That is, any constant scalar
  * can be added to all of the coefficients and the probabilities remain the same.
  *
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    \begin{align}
  *    \frac{e^{\vec{x}_i^T \left(\vec{\beta}_0 + \vec{c}\right)}}{\sum_{k=0}^{K-1}
@@ -1302,7 +1302,7 @@ class BinaryLogisticRegressionSummary private[classification] (
  *    = \frac{e^{\vec{x}_i^T \vec{\beta}_0}}{\sum_{k=0}^{K-1} e^{\vec{x}_i^T \vec{\beta}_k}}
  *    \end{align}
  *    $$
- * </blockquote></p>
+ * </blockquote>
  *
  * However, when regularization is added to the loss function, the coefficients are indeed
  * identifiable because there is only one set of coefficients which minimizes the regularization
@@ -1314,7 +1314,7 @@ class BinaryLogisticRegressionSummary private[classification] (
  * The loss of objective function for a single instance of data (we do not include the
  * regularization term here for simplicity) can be written as
  *
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    \begin{align}
  *    \ell\left(\beta, x_i\right) &= -log{P\left(y_i \middle| \vec{x}_i, \beta\right)} \\
@@ -1322,14 +1322,14 @@ class BinaryLogisticRegressionSummary private[classification] (
  *    &= log\left(\sum_{k=0}^{K-1} e^{margins_k}\right) - margins_y
  *    \end{align}
  *    $$
- * </blockquote></p>
+ * </blockquote>
  *
  * where ${margins}_k = \vec{x}_i^T \vec{\beta}_k$.
  *
  * For optimization, we have to calculate the first derivative of the loss function, and a simple
  * calculation shows that
  *
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    \begin{align}
  *    \frac{\partial \ell(\beta, \vec{x}_i, w_i)}{\partial \beta_{j, k}}
@@ -1338,54 +1338,54 @@ class BinaryLogisticRegressionSummary private[classification] (
  *    &= x_{i, j} \cdot w_i \cdot multiplier_k
  *    \end{align}
  *    $$
- * </blockquote></p>
+ * </blockquote>
  *
  * where $w_i$ is the sample weight, $I_{y=k}$ is an indicator function
  *
- *  <p><blockquote>
+ *  <blockquote>
  *    $$
  *    I_{y=k} = \begin{cases}
  *          1 & y = k \\
  *          0 & else
  *       \end{cases}
  *    $$
- * </blockquote></p>
+ * </blockquote>
  *
  * and
  *
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    multiplier_k = \left(\frac{e^{\vec{x}_i \cdot \vec{\beta}_k}}{\sum_{k=0}^{K-1}
  *       e^{\vec{x}_i \cdot \vec{\beta}_k}} - I_{y=k}\right)
  *    $$
- * </blockquote></p>
+ * </blockquote>
  *
  * If any of margins is larger than 709.78, the numerical computation of multiplier and loss
  * function will suffer from arithmetic overflow. This issue occurs when there are outliers in
  * data which are far away from the hyperplane, and this will cause the failing of training once
- * infinity is introduced. Note that this is only a concern when max(margins) > 0.
+ * infinity is introduced. Note that this is only a concern when max(margins) &gt; 0.
  *
- * Fortunately, when max(margins) = maxMargin > 0, the loss function and the multiplier can easily
- * be rewritten into the following equivalent numerically stable formula.
+ * Fortunately, when max(margins) = maxMargin &gt; 0, the loss function and the multiplier can
+ * easily be rewritten into the following equivalent numerically stable formula.
  *
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    \ell\left(\beta, x\right) = log\left(\sum_{k=0}^{K-1} e^{margins_k - maxMargin}\right) -
  *       margins_{y} + maxMargin
  *    $$
- * </blockquote></p>
+ * </blockquote>
  *
  * Note that each term, $(margins_k - maxMargin)$ in the exponential is no greater than zero; as a
  * result, overflow will not happen with this formula.
  *
  * For $multiplier$, a similar trick can be applied as the following,
  *
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    multiplier_k = \left(\frac{e^{\vec{x}_i \cdot \vec{\beta}_k - maxMargin}}{\sum_{k'=0}^{K-1}
  *       e^{\vec{x}_i \cdot \vec{\beta}_{k'} - maxMargin}} - I_{y=k}\right)
  *    $$
- * </blockquote></p>
+ * </blockquote>
  *
  * @param bcCoefficients The broadcast coefficients corresponding to the features.
  * @param bcFeaturesStd The broadcast standard deviation values of the features.
@@ -1513,7 +1513,7 @@ private class LogisticAggregator(
     }
 
     /**
-     * When maxMargin > 0, the original formula could cause overflow.
+     * When maxMargin &gt; 0, the original formula could cause overflow.
      * We address this by subtracting maxMargin from all the margins, so it's guaranteed
      * that all of the new margins will be smaller than zero to prevent arithmetic overflow.
      */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
index 88fe7cb4a6e0f..1b45eafbaca23 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
@@ -289,7 +289,6 @@ object MultilayerPerceptronClassifier
  * @param uid uid
  * @param layers array of layer sizes including input and output layers
  * @param weights the weights of layers
- * @return prediction model
  */
 @Since("1.5.0")
 @Experimental
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
index f1a7676c74b0e..a2ac7000003d4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
@@ -60,16 +60,20 @@ private[ml] trait NaiveBayesParams extends PredictorParams with HasWeightCol {
   final def getModelType: String = $(modelType)
 }
 
+// scalastyle:off line.size.limit
 /**
  * Naive Bayes Classifiers.
  * It supports Multinomial NB
- * ([[http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html]])
+ * (see <a href="http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html">
+ * here</a>)
  * which can handle finitely supported discrete data. For example, by converting documents into
  * TF-IDF vectors, it can be used for document classification. By making every vector a
  * binary (0/1) data, it can also be used as Bernoulli NB
- * ([[http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html]]).
+ * (see <a href="http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html">
+ * here</a>).
  * The input feature values must be nonnegative.
  */
+// scalastyle:on line.size.limit
 @Since("1.5.0")
 class NaiveBayes @Since("1.5.0") (
     @Since("1.5.0") override val uid: String)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
index 52345b0626c47..907c73e2e4d0a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.functions._
 
 
 /**
- * [[http://en.wikipedia.org/wiki/Random_forest  Random Forest]] learning algorithm for
+ * <a href="http://en.wikipedia.org/wiki/Random_forest">Random Forest</a> learning algorithm for
  * classification.
  * It supports both binary and multiclass labels, as well as both continuous and categorical
  * features.
@@ -144,7 +144,7 @@ object RandomForestClassifier extends DefaultParamsReadable[RandomForestClassifi
 }
 
 /**
- * [[http://en.wikipedia.org/wiki/Random_forest  Random Forest]] model for classification.
+ * <a href="http://en.wikipedia.org/wiki/Random_forest">Random Forest</a> model for classification.
  * It supports both binary and multiclass labels, as well as both continuous and categorical
  * features.
  *
@@ -249,7 +249,7 @@ class RandomForestClassificationModel private[ml] (
    * (Hastie, Tibshirani, Friedman. "The Elements of Statistical Learning, 2nd Edition." 2001.)
    * and follows the implementation from scikit-learn.
    *
-   * @see [[DecisionTreeClassificationModel.featureImportances]]
+   * @see `DecisionTreeClassificationModel.featureImportances`
    */
   @Since("1.5.0")
   lazy val featureImportances: Vector = TreeEnsembleModel.featureImportances(trees, numFeatures)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
index cf11ba37abb58..c7a170ddc7351 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
@@ -42,7 +42,7 @@ private[clustering] trait BisectingKMeansParams extends Params
   with HasMaxIter with HasFeaturesCol with HasSeed with HasPredictionCol {
 
   /**
-   * The desired number of leaf clusters. Must be > 1. Default: 4.
+   * The desired number of leaf clusters. Must be &gt; 1. Default: 4.
    * The actual number could be smaller if there are no divisible leaf clusters.
    * @group param
    */
@@ -55,8 +55,8 @@ private[clustering] trait BisectingKMeansParams extends Params
   def getK: Int = $(k)
 
   /**
-   * The minimum number of points (if >= 1.0) or the minimum proportion
-   * of points (if < 1.0) of a divisible cluster (default: 1.0).
+   * The minimum number of points (if &gt;= 1.0) or the minimum proportion
+   * of points (if &lt; 1.0) of a divisible cluster (default: 1.0).
    * @group expertParam
    */
   @Since("2.0.0")
@@ -208,9 +208,9 @@ object BisectingKMeansModel extends MLReadable[BisectingKMeansModel] {
  * If bisecting all divisible clusters on the bottom level would result more than `k` leaf clusters,
  * larger clusters get higher priority.
  *
- * @see [[http://glaros.dtc.umn.edu/gkhome/fetch/papers/docclusterKDDTMW00.pdf
- *     Steinbach, Karypis, and Kumar, A comparison of document clustering techniques,
- *     KDD Workshop on Text Mining, 2000.]]
+ * @see <a href="http://glaros.dtc.umn.edu/gkhome/fetch/papers/docclusterKDDTMW00.pdf">
+ * Steinbach, Karypis, and Kumar, A comparison of document clustering techniques,
+ * KDD Workshop on Text Mining, 2000.</a>
  */
 @Since("2.0.0")
 @Experimental
@@ -296,7 +296,7 @@ object BisectingKMeans extends DefaultParamsReadable[BisectingKMeans] {
  * :: Experimental ::
  * Summary of BisectingKMeans.
  *
- * @param predictions  [[DataFrame]] produced by [[BisectingKMeansModel.transform()]].
+ * @param predictions  `DataFrame` produced by `BisectingKMeansModel.transform()`.
  * @param predictionCol  Name for column of predicted clusters in `predictions`.
  * @param featuresCol  Name for column of features in `predictions`.
  * @param k  Number of clusters.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/ClusteringSummary.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/ClusteringSummary.scala
index 8b5f525194f28..44e832b058b62 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/ClusteringSummary.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/ClusteringSummary.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.{DataFrame, Row}
  * :: Experimental ::
  * Summary of clustering algorithms.
  *
- * @param predictions  [[DataFrame]] produced by model.transform().
+ * @param predictions  `DataFrame` produced by model.transform().
  * @param predictionCol  Name for column of predicted clusters in `predictions`.
  * @param featuresCol  Name for column of features in `predictions`.
  * @param k  Number of clusters.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
index 19998ca44b115..74109344aac08 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
@@ -44,7 +44,7 @@ private[clustering] trait GaussianMixtureParams extends Params with HasMaxIter w
   with HasSeed with HasPredictionCol with HasProbabilityCol with HasTol {
 
   /**
-   * Number of independent Gaussians in the mixture model. Must be > 1. Default: 2.
+   * Number of independent Gaussians in the mixture model. Must be &gt; 1. Default: 2.
    * @group param
    */
   @Since("2.0.0")
@@ -76,7 +76,7 @@ private[clustering] trait GaussianMixtureParams extends Params with HasMaxIter w
  * @param weights Weight for each Gaussian distribution in the mixture.
  *                This is a multinomial probability distribution over the k Gaussians,
  *                where weights(i) is the weight for Gaussian i, and weights sum to 1.
- * @param gaussians Array of [[MultivariateGaussian]] where gaussians(i) represents
+ * @param gaussians Array of `MultivariateGaussian` where gaussians(i) represents
  *                  the Multivariate Gaussian (Normal) Distribution for Gaussian i
  */
 @Since("2.0.0")
@@ -374,7 +374,7 @@ object GaussianMixture extends DefaultParamsReadable[GaussianMixture] {
  * :: Experimental ::
  * Summary of GaussianMixture.
  *
- * @param predictions  [[DataFrame]] produced by [[GaussianMixtureModel.transform()]].
+ * @param predictions  `DataFrame` produced by `GaussianMixtureModel.transform()`.
  * @param predictionCol  Name for column of predicted clusters in `predictions`.
  * @param probabilityCol  Name for column of predicted probability of each cluster
  *                        in `predictions`.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
index 152bd13b7a17a..6e124eb6ddca0 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
@@ -42,7 +42,7 @@ private[clustering] trait KMeansParams extends Params with HasMaxIter with HasFe
   with HasSeed with HasPredictionCol with HasTol {
 
   /**
-   * The number of clusters to create (k). Must be > 1. Note that it is possible for fewer than
+   * The number of clusters to create (k). Must be &gt; 1. Note that it is possible for fewer than
    * k clusters to be returned, for example, if there are fewer than k distinct points to cluster.
    * Default: 2.
    * @group param
@@ -72,7 +72,7 @@ private[clustering] trait KMeansParams extends Params with HasMaxIter with HasFe
 
   /**
    * Param for the number of steps for the k-means|| initialization mode. This is an advanced
-   * setting -- the default of 2 is almost always enough. Must be > 0. Default: 2.
+   * setting -- the default of 2 is almost always enough. Must be &gt; 0. Default: 2.
    * @group expertParam
    */
   @Since("1.5.0")
@@ -250,7 +250,7 @@ object KMeansModel extends MLReadable[KMeansModel] {
  * :: Experimental ::
  * K-means clustering with support for k-means|| initialization proposed by Bahmani et al.
  *
- * @see [[http://dx.doi.org/10.14778/2180912.2180915 Bahmani et al., Scalable k-means++.]]
+ * @see <a href="http://dx.doi.org/10.14778/2180912.2180915">Bahmani et al., Scalable k-means++.</a>
  */
 @Since("1.5.0")
 @Experimental
@@ -346,7 +346,7 @@ object KMeans extends DefaultParamsReadable[KMeans] {
  * :: Experimental ::
  * Summary of KMeans.
  *
- * @param predictions  [[DataFrame]] produced by [[KMeansModel.transform()]].
+ * @param predictions  `DataFrame` produced by `KMeansModel.transform()`.
  * @param predictionCol  Name for column of predicted clusters in `predictions`.
  * @param featuresCol  Name for column of features in `predictions`.
  * @param k  Number of clusters.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
index 7773802854c00..6032ab3db9350 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
@@ -50,7 +50,7 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
   with HasSeed with HasCheckpointInterval {
 
   /**
-   * Param for the number of topics (clusters) to infer. Must be > 1. Default: 10.
+   * Param for the number of topics (clusters) to infer. Must be &gt; 1. Default: 10.
    *
    * @group param
    */
@@ -78,13 +78,13 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
    *  - EM
    *     - Currently only supports symmetric distributions, so all values in the vector should be
    *       the same.
-   *     - Values should be > 1.0
+   *     - Values should be &gt; 1.0
    *     - default = uniformly (50 / k) + 1, where 50/k is common in LDA libraries and +1 follows
    *       from Asuncion et al. (2009), who recommend a +1 adjustment for EM.
    *  - Online
-   *     - Values should be >= 0
+   *     - Values should be &gt;= 0
    *     - default = uniformly (1.0 / k), following the implementation from
-   *       [[https://github.com/Blei-Lab/onlineldavb]].
+   *       <a href="https://github.com/Blei-Lab/onlineldavb">here</a>.
    *
    * @group param
    */
@@ -120,13 +120,13 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
    *
    * Optimizer-specific parameter settings:
    *  - EM
-   *     - Value should be > 1.0
+   *     - Value should be &gt; 1.0
    *     - default = 0.1 + 1, where 0.1 gives a small amount of smoothing and +1 follows
    *       Asuncion et al. (2009), who recommend a +1 adjustment for EM.
    *  - Online
-   *     - Value should be >= 0
+   *     - Value should be &gt;= 0
    *     - default = (1.0 / k), following the implementation from
-   *       [[https://github.com/Blei-Lab/onlineldavb]].
+   *       <a href="https://github.com/Blei-Lab/onlineldavb">here</a>.
    *
    * @group param
    */
@@ -162,11 +162,11 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
    *  - Online LDA:
    *     Hoffman, Blei and Bach.  "Online Learning for Latent Dirichlet Allocation."
    *     Neural Information Processing Systems, 2010.
-   *     [[http://www.cs.columbia.edu/~blei/papers/HoffmanBleiBach2010b.pdf]]
+   *     See <a href="http://www.cs.columbia.edu/~blei/papers/HoffmanBleiBach2010b.pdf">here</a>
    *  - EM:
    *     Asuncion et al.  "On Smoothing and Inference for Topic Models."
    *     Uncertainty in Artificial Intelligence, 2009.
-   *     [[http://arxiv.org/pdf/1205.2662.pdf]]
+   *     See <a href="http://arxiv.org/pdf/1205.2662.pdf">here</a>
    *
    * @group param
    */
@@ -245,9 +245,9 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
    * Fraction of the corpus to be sampled and used in each iteration of mini-batch gradient descent,
    * in range (0, 1].
    *
-   * Note that this should be adjusted in synch with [[LDA.maxIter]]
+   * Note that this should be adjusted in synch with `LDA.maxIter`
    * so the entire corpus is used.  Specifically, set both so that
-   * maxIterations * miniBatchFraction >= 1.
+   * maxIterations * miniBatchFraction &gt;= 1.
    *
    * Note: This is the same as the `miniBatchFraction` parameter in
    *       [[org.apache.spark.mllib.clustering.OnlineLDAOptimizer]].
@@ -293,8 +293,8 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
    * cause failures if a data partition is lost, so set this bit with care.
    * Note that checkpoints will be cleaned up via reference counting, regardless.
    *
-   * See [[DistributedLDAModel.getCheckpointFiles]] for getting remaining checkpoints and
-   * [[DistributedLDAModel.deleteCheckpointFiles]] for removing remaining checkpoints.
+   * See `DistributedLDAModel.getCheckpointFiles` for getting remaining checkpoints and
+   * `DistributedLDAModel.deleteCheckpointFiles` for removing remaining checkpoints.
    *
    * Default: true
    *
@@ -431,7 +431,7 @@ sealed abstract class LDAModel private[ml] (
   private[ml] def getEffectiveTopicConcentration: Double = getModel.topicConcentration
 
   /**
-   * The features for LDA should be a [[Vector]] representing the word counts in a document.
+   * The features for LDA should be a `Vector` representing the word counts in a document.
    * The vector should be of length vocabSize, with counts for each term (word).
    *
    * @group setParam
@@ -650,7 +650,7 @@ object LocalLDAModel extends MLReadable[LocalLDAModel] {
  * for each training document.
  *
  * @param oldLocalModelOption  Used to implement [[oldLocalModel]] as a lazy val, but keeping
- *                             [[copy()]] cheap.
+ *                             `copy()` cheap.
  */
 @Since("1.6.0")
 @Experimental
@@ -701,7 +701,7 @@ class DistributedLDAModel private[ml] (
    *  - Even with [[logPrior]], this is NOT the same as the data log likelihood given the
    *    hyperparameters.
    *  - This is computed from the topic distributions computed during training. If you call
-   *    [[logLikelihood()]] on the same training dataset, the topic distributions will be computed
+   *    `logLikelihood()` on the same training dataset, the topic distributions will be computed
    *    again, possibly giving different results.
    */
   @Since("1.6.0")
@@ -719,7 +719,7 @@ class DistributedLDAModel private[ml] (
   /**
    * :: DeveloperApi ::
    *
-   * If using checkpointing and [[LDA.keepLastCheckpoint]] is set to true, then there may be
+   * If using checkpointing and `LDA.keepLastCheckpoint` is set to true, then there may be
    * saved checkpoint files.  This method is provided so that users can manage those files.
    *
    * Note that removing the checkpoints can cause failures if a partition is lost and is needed
@@ -804,13 +804,13 @@ object DistributedLDAModel extends MLReadable[DistributedLDAModel] {
  *
  * Input data (featuresCol):
  *  LDA is given a collection of documents as input data, via the featuresCol parameter.
- *  Each document is specified as a [[Vector]] of length vocabSize, where each entry is the
+ *  Each document is specified as a `Vector` of length vocabSize, where each entry is the
  *  count for the corresponding term (word) in the document.  Feature transformers such as
  *  [[org.apache.spark.ml.feature.Tokenizer]] and [[org.apache.spark.ml.feature.CountVectorizer]]
  *  can be useful for converting text to word count vectors.
  *
- * @see [[http://en.wikipedia.org/wiki/Latent_Dirichlet_allocation Latent Dirichlet allocation
- *       (Wikipedia)]]
+ * @see <a href="http://en.wikipedia.org/wiki/Latent_Dirichlet_allocation">
+ * Latent Dirichlet allocation (Wikipedia)</a>
  */
 @Since("1.6.0")
 @Experimental
@@ -826,7 +826,7 @@ class LDA @Since("1.6.0") (
     optimizeDocConcentration -> true, keepLastCheckpoint -> true)
 
   /**
-   * The features for LDA should be a [[Vector]] representing the word counts in a document.
+   * The features for LDA should be a `Vector` representing the word counts in a document.
    * The vector should be of length vocabSize, with counts for each term (word).
    *
    * @group setParam
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala
index 6ff36b35ca4c1..682787a830113 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala
@@ -32,7 +32,8 @@ import org.apache.spark.sql.types.DataType
  * It returns a real vector of the same length representing the DCT. The return vector is scaled
  * such that the transform matrix is unitary (aka scaled DCT-II).
  *
- * More information on [[https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II Wikipedia]].
+ * More information on <a href="https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II">
+ * DCT-II in Discrete cosine transform (Wikipedia)</a>.
  */
 @Since("1.5.0")
 class DCT @Since("1.5.0") (@Since("1.5.0") override val uid: String)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHash.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHash.scala
index d9d0f32254e24..f37233e1ab9c8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHash.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHash.scala
@@ -37,7 +37,8 @@ import org.apache.spark.sql.types.StructType
  * where `k_i` is the i-th coefficient, and both `x` and `k_i` are from `Z_prime^*`
  *
  * Reference:
- * [[https://en.wikipedia.org/wiki/Perfect_hash_function Wikipedia on Perfect Hash Function]]
+ * <a href="https://en.wikipedia.org/wiki/Perfect_hash_function">
+ * Wikipedia on Perfect Hash Function</a>
  *
  * @param numEntries The number of entries of the hash functions.
  * @param randCoefficients An array of random coefficients, each used by one hash function.
@@ -98,7 +99,7 @@ class MinHashModel private[ml] (
  * as binary "1" values.
  *
  * References:
- * [[https://en.wikipedia.org/wiki/MinHash Wikipedia on MinHash]]
+ * <a href="https://en.wikipedia.org/wiki/MinHash">Wikipedia on MinHash</a>
  */
 @Experimental
 @Since("2.1.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
index ccfb0ce8f85ca..19978c97d2cfd 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
@@ -78,11 +78,11 @@ private[feature] trait MinMaxScalerParams extends Params with HasInputCol with H
  * statistics, which is also known as min-max normalization or Rescaling. The rescaled value for
  * feature E is calculated as:
  *
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    Rescaled(e_i) = \frac{e_i - E_{min}}{E_{max} - E_{min}} * (max - min) + min
  *    $$
- * </blockquote></p>
+ * </blockquote>
  *
  * For the case $E_{max} == E_{min}$, $Rescaled(e_i) = 0.5 * (max + min)$.
  *
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
index 25fb6be5afd81..4be17da3e9f76 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
@@ -30,10 +30,12 @@ import org.apache.spark.sql.types.DataType
 
 /**
  * Perform feature expansion in a polynomial space. As said in wikipedia of Polynomial Expansion,
- * which is available at [[http://en.wikipedia.org/wiki/Polynomial_expansion]], "In mathematics, an
- * expansion of a product of sums expresses it as a sum of products by using the fact that
- * multiplication distributes over addition". Take a 2-variable feature vector as an example:
- * `(x, y)`, if we want to expand it with degree 2, then we get `(x, x * x, y, x * y, y * y)`.
+ * which is available at
+ * <a href="http://en.wikipedia.org/wiki/Polynomial_expansion">Polynomial expansion (Wikipedia)</a>
+ * , "In mathematics, an expansion of a product of sums expresses it as a sum of products by using
+ * the fact that multiplication distributes over addition". Take a 2-variable feature vector
+ * as an example: `(x, y)`, if we want to expand it with degree 2, then we get
+ * `(x, x * x, y, x * y, y * y)`.
  */
 @Since("1.4.0")
 class PolynomialExpansion @Since("1.4.0") (@Since("1.4.0") override val uid: String)
@@ -76,11 +78,11 @@ class PolynomialExpansion @Since("1.4.0") (@Since("1.4.0") override val uid: Str
  * (n + d choose d) (including 1 and first-order values). For example, let f([a, b, c], 3) be the
  * function that expands [a, b, c] to their monomials of degree 3. We have the following recursion:
  *
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    f([a, b, c], 3) &= f([a, b], 3) ++ f([a, b], 2) * c ++ f([a, b], 1) * c^2 ++ [c^3]
  *    $$
- * </blockquote></p>
+ * </blockquote>
  *
  * To handle sparsity, if c is zero, we can skip all monomials that contain it. We remember the
  * current index and increment it properly for sparse input.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RandomProjection.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RandomProjection.scala
index 1b524c6710b42..2bff59a0da173 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/RandomProjection.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RandomProjection.scala
@@ -113,8 +113,8 @@ class RandomProjectionModel private[ml] (
  *
  * References:
  *
- * 1. [[https://en.wikipedia.org/wiki/Locality-sensitive_hashing#Stable_distributions
- * Wikipedia on Stable Distributions]]
+ * 1. <a href="https://en.wikipedia.org/wiki/Locality-sensitive_hashing#Stable_distributions">
+ * Wikipedia on Stable Distributions</a>
  *
  * 2. Wang, Jingdong et al. "Hashing for similarity search: A survey." arXiv preprint
  * arXiv:1408.2927 (2014).
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
index d76d556280e96..8f125d8fd51d2 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
@@ -79,8 +79,8 @@ private[feature] trait StandardScalerParams extends Params with HasInputCol with
  * statistics on the samples in the training set.
  *
  * The "unit std" is computed using the
- * [[https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation
- *   corrected sample standard deviation]],
+ * <a href="https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation">
+ * corrected sample standard deviation</a>,
  * which is computed as the square root of the unbiased sample variance.
  */
 @Since("1.2.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
index 0ced21365ff6f..a55816249c74b 100755
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.types.{ArrayType, StringType, StructType}
  * @note null values from input array are preserved unless adding null to stopWords
  * explicitly.
  *
- * @see [[http://en.wikipedia.org/wiki/Stop_words]]
+ * @see <a href="http://en.wikipedia.org/wiki/Stop_words">Stop words (Wikipedia)</a>
  */
 @Since("1.5.0")
 class StopWordsRemover @Since("1.5.0") (@Since("1.5.0") override val uid: String)
@@ -132,7 +132,8 @@ object StopWordsRemover extends DefaultParamsReadable[StopWordsRemover] {
    * Loads the default stop words for the given language.
    * Supported languages: danish, dutch, english, finnish, french, german, hungarian,
    * italian, norwegian, portuguese, russian, spanish, swedish, turkish
-   * @see [[http://anoncvs.postgresql.org/cvsweb.cgi/pgsql/src/backend/snowball/stopwords/]]
+   * @see <a href="http://anoncvs.postgresql.org/cvsweb.cgi/pgsql/src/backend/snowball/stopwords/">
+   * here</a>
    */
   @Since("2.0.0")
   def loadDefaultStopWords(language: String): Array[String] = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/package.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/package.scala
index b94187ae787cc..5dd648aecc95c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/package.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/package.scala
@@ -84,6 +84,7 @@ import org.apache.spark.sql.DataFrame
  * input dataset, while MLlib's feature transformers operate lazily on individual columns,
  * which is more efficient and flexible to handle large and complex datasets.
  *
- * @see [[http://scikit-learn.org/stable/modules/preprocessing.html scikit-learn.preprocessing]]
+ * @see <a href="http://scikit-learn.org/stable/modules/preprocessing.html">
+ * scikit-learn.preprocessing</a>
  */
 package object feature
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquares.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquares.scala
index 8a6b862cda170..143bf539b0afe 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquares.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquares.scala
@@ -50,9 +50,10 @@ private[ml] class IterativelyReweightedLeastSquaresModel(
  * @param maxIter maximum number of iterations.
  * @param tol the convergence tolerance.
  *
- * @see [[http://www.jstor.org/stable/2345503 P. J. Green, Iteratively Reweighted Least Squares
- *     for Maximum Likelihood Estimation, and some Robust and Resistant Alternatives,
- *     Journal of the Royal Statistical Society. Series B, 1984.]]
+ * @see <a href="http://www.jstor.org/stable/2345503">P. J. Green, Iteratively
+ * Reweighted Least Squares for Maximum Likelihood Estimation, and some Robust
+ * and Resistant Alternatives, Journal of the Royal Statistical Society.
+ * Series B, 1984.</a>
  */
 private[ml] class IterativelyReweightedLeastSquares(
     val initialModel: WeightedLeastSquaresModel,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
index fa4530927e8b0..e3e03dfd43dd6 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
@@ -29,7 +29,7 @@ import org.apache.spark.ml.param._
 private[ml] trait HasRegParam extends Params {
 
   /**
-   * Param for regularization parameter (>= 0).
+   * Param for regularization parameter (&gt;= 0).
    * @group param
    */
   final val regParam: DoubleParam = new DoubleParam(this, "regParam", "regularization parameter (>= 0)", ParamValidators.gtEq(0))
@@ -44,7 +44,7 @@ private[ml] trait HasRegParam extends Params {
 private[ml] trait HasMaxIter extends Params {
 
   /**
-   * Param for maximum number of iterations (>= 0).
+   * Param for maximum number of iterations (&gt;= 0).
    * @group param
    */
   final val maxIter: IntParam = new IntParam(this, "maxIter", "maximum number of iterations (>= 0)", ParamValidators.gtEq(0))
@@ -238,7 +238,7 @@ private[ml] trait HasOutputCol extends Params {
 private[ml] trait HasCheckpointInterval extends Params {
 
   /**
-   * Param for set checkpoint interval (>= 1) or disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed every 10 iterations.
+   * Param for set checkpoint interval (&gt;= 1) or disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed every 10 iterations.
    * @group param
    */
   final val checkpointInterval: IntParam = new IntParam(this, "checkpointInterval", "set checkpoint interval (>= 1) or disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed every 10 iterations", (interval: Int) => interval == -1 || interval >= 1)
@@ -334,7 +334,7 @@ private[ml] trait HasElasticNetParam extends Params {
 private[ml] trait HasTol extends Params {
 
   /**
-   * Param for the convergence tolerance for iterative algorithms (>= 0).
+   * Param for the convergence tolerance for iterative algorithms (&gt;= 0).
    * @group param
    */
   final val tol: DoubleParam = new DoubleParam(this, "tol", "the convergence tolerance for iterative algorithms (>= 0)", ParamValidators.gtEq(0))
@@ -349,7 +349,7 @@ private[ml] trait HasTol extends Params {
 private[ml] trait HasStepSize extends Params {
 
   /**
-   * Param for Step size to be used for each iteration of optimization (> 0).
+   * Param for Step size to be used for each iteration of optimization (&gt; 0).
    * @group param
    */
   final val stepSize: DoubleParam = new DoubleParam(this, "stepSize", "Step size to be used for each iteration of optimization (> 0)", ParamValidators.gt(0))
@@ -396,7 +396,7 @@ private[ml] trait HasSolver extends Params {
 private[ml] trait HasAggregationDepth extends Params {
 
   /**
-   * Param for suggested depth for treeAggregate (>= 2).
+   * Param for suggested depth for treeAggregate (&gt;= 2).
    * @group expertParam
    */
   final val aggregationDepth: IntParam = new IntParam(this, "aggregationDepth", "suggested depth for treeAggregate (>= 2)", ParamValidators.gtEq(2))
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
index 9d5ba999781f6..d6ad1ea6d1096 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
@@ -119,7 +119,8 @@ private[regression] trait AFTSurvivalRegressionParams extends Params
 /**
  * :: Experimental ::
  * Fit a parametric survival regression model named accelerated failure time (AFT) model
- * ([[https://en.wikipedia.org/wiki/Accelerated_failure_time_model]])
+ * (see <a href="https://en.wikipedia.org/wiki/Accelerated_failure_time_model">
+ * Accelerated failure time model (Wikipedia)</a>)
  * based on the Weibull distribution of the survival time.
  */
 @Experimental
@@ -432,24 +433,24 @@ object AFTSurvivalRegressionModel extends MLReadable[AFTSurvivalRegressionModel]
  * Given the values of the covariates $x^{'}$, for random lifetime $t_{i}$ of subjects i = 1,..,n,
  * with possible right-censoring, the likelihood function under the AFT model is given as
  *
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    L(\beta,\sigma)=\prod_{i=1}^n[\frac{1}{\sigma}f_{0}
  *      (\frac{\log{t_{i}}-x^{'}\beta}{\sigma})]^{\delta_{i}}S_{0}
  *    (\frac{\log{t_{i}}-x^{'}\beta}{\sigma})^{1-\delta_{i}}
  *    $$
- * </blockquote></p>
+ * </blockquote>
  *
  * Where $\delta_{i}$ is the indicator of the event has occurred i.e. uncensored or not.
  * Using $\epsilon_{i}=\frac{\log{t_{i}}-x^{'}\beta}{\sigma}$, the log-likelihood function
  * assumes the form
  *
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    \iota(\beta,\sigma)=\sum_{i=1}^{n}[-\delta_{i}\log\sigma+
  *    \delta_{i}\log{f_{0}}(\epsilon_{i})+(1-\delta_{i})\log{S_{0}(\epsilon_{i})}]
  *    $$
- * </blockquote></p>
+ * </blockquote>
  * Where $S_{0}(\epsilon_{i})$ is the baseline survivor function,
  * and $f_{0}(\epsilon_{i})$ is corresponding density function.
  *
@@ -458,34 +459,34 @@ object AFTSurvivalRegressionModel extends MLReadable[AFTSurvivalRegressionModel]
  * to extreme value distribution for log of the lifetime,
  * and the $S_{0}(\epsilon)$ function is
  *
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    S_{0}(\epsilon_{i})=\exp(-e^{\epsilon_{i}})
  *    $$
- * </blockquote></p>
+ * </blockquote>
  *
  * and the $f_{0}(\epsilon_{i})$ function is
  *
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    f_{0}(\epsilon_{i})=e^{\epsilon_{i}}\exp(-e^{\epsilon_{i}})
  *    $$
- * </blockquote></p>
+ * </blockquote>
  *
  * The log-likelihood function for Weibull distribution of lifetime is
  *
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    \iota(\beta,\sigma)=
  *    -\sum_{i=1}^n[\delta_{i}\log\sigma-\delta_{i}\epsilon_{i}+e^{\epsilon_{i}}]
  *    $$
- * </blockquote></p>
+ * </blockquote>
  *
  * Due to minimizing the negative log-likelihood equivalent to maximum a posteriori probability,
  * the loss function we use to optimize is $-\iota(\beta,\sigma)$.
  * The gradient functions for $\beta$ and $\log\sigma$ respectively are
  *
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    \frac{\partial (-\iota)}{\partial \beta}=
  *    \sum_{1=1}^{n}[\delta_{i}-e^{\epsilon_{i}}]\frac{x_{i}}{\sigma} \\
@@ -493,7 +494,7 @@ object AFTSurvivalRegressionModel extends MLReadable[AFTSurvivalRegressionModel]
  *    \frac{\partial (-\iota)}{\partial (\log\sigma)}=
  *    \sum_{i=1}^{n}[\delta_{i}+(\delta_{i}-e^{\epsilon_{i}})\epsilon_{i}]
  *    $$
- * </blockquote></p>
+ * </blockquote>
  *
  * @param bcParameters The broadcasted value includes three part: The log of scale parameter,
  *                     the intercept and regression coefficients corresponding to the features.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
index 1419da874709f..894b6a2ca2041 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
@@ -38,8 +38,8 @@ import org.apache.spark.sql.functions._
 
 
 /**
- * [[http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree]] learning algorithm
- * for regression.
+ * <a href="http://en.wikipedia.org/wiki/Decision_tree_learning">Decision tree</a>
+ * learning algorithm for regression.
  * It supports both continuous and categorical features.
  */
 @Since("1.4.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
index fa69d60836e68..ed2d05525d611 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
@@ -38,7 +38,7 @@ import org.apache.spark.sql.{DataFrame, Dataset}
 import org.apache.spark.sql.functions._
 
 /**
- * [[http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)]]
+ * <a href="http://en.wikipedia.org/wiki/Gradient_boosting">Gradient-Boosted Trees (GBTs)</a>
  * learning algorithm for regression.
  * It supports both continuous and categorical features.
  *
@@ -151,7 +151,7 @@ object GBTRegressor extends DefaultParamsReadable[GBTRegressor] {
 }
 
 /**
- * [[http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)]]
+ * <a href="http://en.wikipedia.org/wiki/Gradient_boosting">Gradient-Boosted Trees (GBTs)</a>
  * model for regression.
  * It supports both continuous and categorical features.
  * @param _trees  Decision trees in the ensemble.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index f33dd0fd294ba..1201ecd5e4e61 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -123,9 +123,11 @@ private[regression] trait GeneralizedLinearRegressionBase extends PredictorParam
 /**
  * :: Experimental ::
  *
- * Fit a Generalized Linear Model ([[https://en.wikipedia.org/wiki/Generalized_linear_model]])
- * specified by giving a symbolic description of the linear predictor (link function) and
- * a description of the error distribution (family).
+ * Fit a Generalized Linear Model
+ * (see <a href="https://en.wikipedia.org/wiki/Generalized_linear_model">
+ * Generalized linear model (Wikipedia)</a>)
+ * specified by giving a symbolic description of the linear
+ * predictor (link function) and a description of the error distribution (family).
  * It supports "gaussian", "binomial", "poisson" and "gamma" as family.
  * Valid link functions for each family is listed below. The first link function of each family
  * is the default one.
@@ -196,11 +198,11 @@ class GeneralizedLinearRegression @Since("2.0.0") (@Since("2.0.0") override val
   /**
    * Sets the regularization parameter for L2 regularization.
    * The regularization term is
-   * <p><blockquote>
+   * <blockquote>
    *    $$
    *    0.5 * regParam * L2norm(coefficients)^2
    *    $$
-   * </blockquote></p>
+   * </blockquote>
    * Default is 0.0.
    *
    * @group setParam
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 8ea5e1e6c453a..eb4e38cc83c19 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -624,7 +624,8 @@ class LinearRegressionSummary private[regression] (
   /**
    * Returns the explained variance regression score.
    * explainedVariance = 1 - variance(y - \hat{y}) / variance(y)
-   * Reference: [[http://en.wikipedia.org/wiki/Explained_variation]]
+   * Reference: <a href="http://en.wikipedia.org/wiki/Explained_variation">
+   * Wikipedia explain variation</a>
    *
    * @note This ignores instance weights (setting all to 1.0) from [[LinearRegression.weightCol]].
    * This will change in later Spark versions.
@@ -664,7 +665,8 @@ class LinearRegressionSummary private[regression] (
 
   /**
    * Returns R^2^, the coefficient of determination.
-   * Reference: [[http://en.wikipedia.org/wiki/Coefficient_of_determination]]
+   * Reference: <a href="http://en.wikipedia.org/wiki/Coefficient_of_determination">
+   * Wikipedia coefficient of determination</a>
    *
    * @note This ignores instance weights (setting all to 1.0) from [[LinearRegression.weightCol]].
    * This will change in later Spark versions.
@@ -805,11 +807,11 @@ class LinearRegressionSummary private[regression] (
  * When training with intercept enabled,
  * The objective function in the scaled space is given by
  *
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    L = 1/2n ||\sum_i w_i(x_i - \bar{x_i}) / \hat{x_i} - (y - \bar{y}) / \hat{y}||^2,
  *    $$
- * </blockquote></p>
+ * </blockquote>
  *
  * where $\bar{x_i}$ is the mean of $x_i$, $\hat{x_i}$ is the standard deviation of $x_i$,
  * $\bar{y}$ is the mean of label, and $\hat{y}$ is the standard deviation of label.
@@ -820,7 +822,7 @@ class LinearRegressionSummary private[regression] (
  *
  * This can be rewritten as
  *
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    \begin{align}
  *     L &= 1/2n ||\sum_i (w_i/\hat{x_i})x_i - \sum_i (w_i/\hat{x_i})\bar{x_i} - y / \hat{y}
@@ -828,34 +830,34 @@ class LinearRegressionSummary private[regression] (
  *       &= 1/2n ||\sum_i w_i^\prime x_i - y / \hat{y} + offset||^2 = 1/2n diff^2
  *    \end{align}
  *    $$
- * </blockquote></p>
+ * </blockquote>
  *
  * where $w_i^\prime$ is the effective coefficients defined by $w_i/\hat{x_i}$, offset is
  *
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    - \sum_i (w_i/\hat{x_i})\bar{x_i} + \bar{y} / \hat{y}.
  *    $$
- * </blockquote></p>
+ * </blockquote>
  *
  * and diff is
  *
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    \sum_i w_i^\prime x_i - y / \hat{y} + offset
  *    $$
- * </blockquote></p>
+ * </blockquote>
  *
  * Note that the effective coefficients and offset don't depend on training dataset,
  * so they can be precomputed.
  *
  * Now, the first derivative of the objective function in scaled space is
  *
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    \frac{\partial L}{\partial w_i} = diff/N (x_i - \bar{x_i}) / \hat{x_i}
  *    $$
- * </blockquote></p>
+ * </blockquote>
  *
  * However, $(x_i - \bar{x_i})$ will densify the computation, so it's not
  * an ideal formula when the training dataset is sparse format.
@@ -865,7 +867,7 @@ class LinearRegressionSummary private[regression] (
  * objective function from all the samples is
  *
  *
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    \begin{align}
  *       \frac{\partial L}{\partial w_i} &=
@@ -874,14 +876,14 @@ class LinearRegressionSummary private[regression] (
  *         &= 1/N ((\sum_j diff_j x_{ij} / \hat{x_i}) + correction_i)
  *    \end{align}
  *    $$
- * </blockquote></p>
+ * </blockquote>
  *
  * where $correction_i = - diffSum \bar{x_i} / \hat{x_i}$
  *
  * A simple math can show that diffSum is actually zero, so we don't even
  * need to add the correction terms in the end. From the definition of diff,
  *
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    \begin{align}
  *       diffSum &= \sum_j (\sum_i w_i(x_{ij} - \bar{x_i})
@@ -890,17 +892,17 @@ class LinearRegressionSummary private[regression] (
  *         &= 0
  *    \end{align}
  *    $$
- * </blockquote></p>
+ * </blockquote>
  *
  * As a result, the first derivative of the total objective function only depends on
  * the training dataset, which can be easily computed in distributed fashion, and is
  * sparse format friendly.
  *
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    \frac{\partial L}{\partial w_i} = 1/N ((\sum_j diff_j x_{ij} / \hat{x_i})
  *    $$
- * </blockquote></p>
+ * </blockquote>
  *
  * @param bcCoefficients The broadcast coefficients corresponding to the features.
  * @param labelStd The standard deviation value of the label.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
index 0ad00aa6f9280..d60f05eed58d9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
@@ -37,7 +37,8 @@ import org.apache.spark.sql.functions._
 
 
 /**
- * [[http://en.wikipedia.org/wiki/Random_forest  Random Forest]] learning algorithm for regression.
+ * <a href="http://en.wikipedia.org/wiki/Random_forest">Random Forest</a>
+ * learning algorithm for regression.
  * It supports both continuous and categorical features.
  */
 @Since("1.4.0")
@@ -132,7 +133,7 @@ object RandomForestRegressor extends DefaultParamsReadable[RandomForestRegressor
 }
 
 /**
- * [[http://en.wikipedia.org/wiki/Random_forest  Random Forest]] model for regression.
+ * <a href="http://en.wikipedia.org/wiki/Random_forest">Random Forest</a> model for regression.
  * It supports both continuous and categorical features.
  *
  * @param _trees  Decision trees in the ensemble.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMDataSource.scala b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMDataSource.scala
index e1376927030e4..e4de8483cfa3c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMDataSource.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMDataSource.scala
@@ -17,15 +17,12 @@
 
 package org.apache.spark.ml.source.libsvm
 
-import org.apache.spark.ml.linalg.Vector
-import org.apache.spark.sql.{DataFrame, DataFrameReader}
-
 /**
- * `libsvm` package implements Spark SQL data source API for loading LIBSVM data as [[DataFrame]].
- * The loaded [[DataFrame]] has two columns: `label` containing labels stored as doubles and
- * `features` containing feature vectors stored as [[Vector]]s.
+ * `libsvm` package implements Spark SQL data source API for loading LIBSVM data as `DataFrame`.
+ * The loaded `DataFrame` has two columns: `label` containing labels stored as doubles and
+ * `features` containing feature vectors stored as `Vector`s.
  *
- * To use LIBSVM data source, you need to set "libsvm" as the format in [[DataFrameReader]] and
+ * To use LIBSVM data source, you need to set "libsvm" as the format in `DataFrameReader` and
  * optionally specify options, for example:
  * {{{
  *   // Scala
@@ -51,6 +48,6 @@ import org.apache.spark.sql.{DataFrame, DataFrameReader}
  * @note This class is public for documentation purpose. Please don't use this class directly.
  * Rather, use the data source API as illustrated above.
  *
- * @see [[https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/ LIBSVM datasets]]
+ * @see <a href="https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/">LIBSVM datasets</a>
  */
 class LibSVMDataSource private() {}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala
index 0a0bc4c006389..f3bace8181570 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala
@@ -34,7 +34,7 @@ private[spark] object GradientBoostedTrees extends Logging {
 
   /**
    * Method to train a gradient boosting model
-   * @param input Training dataset: RDD of [[LabeledPoint]].
+   * @param input Training dataset: RDD of `LabeledPoint`.
    * @param seed Random seed.
    * @return tuple of ensemble models and weights:
    *         (array of decision tree models, array of model weights)
@@ -59,12 +59,12 @@ private[spark] object GradientBoostedTrees extends Logging {
 
   /**
    * Method to validate a gradient boosting model
-   * @param input Training dataset: RDD of [[LabeledPoint]].
+   * @param input Training dataset: RDD of `LabeledPoint`.
    * @param validationInput Validation dataset.
    *                        This dataset should be different from the training dataset,
    *                        but it should follow the same distribution.
    *                        E.g., these two datasets could be created from an original dataset
-   *                        by using [[org.apache.spark.rdd.RDD.randomSplit()]]
+   *                        by using `org.apache.spark.rdd.RDD.randomSplit()`
    * @param seed Random seed.
    * @return tuple of ensemble models and weights:
    *         (array of decision tree models, array of model weights)
@@ -162,7 +162,7 @@ private[spark] object GradientBoostedTrees extends Logging {
    * Method to calculate error of the base learner for the gradient boosting calculation.
    * Note: This method is not used by the gradient boosting algorithm but is useful for debugging
    * purposes.
-   * @param data Training dataset: RDD of [[LabeledPoint]].
+   * @param data Training dataset: RDD of `LabeledPoint`.
    * @param trees Boosted Decision Tree models
    * @param treeWeights Learning rates at each boosting iteration.
    * @param loss evaluation metric.
@@ -184,7 +184,7 @@ private[spark] object GradientBoostedTrees extends Logging {
   /**
    * Method to compute error or loss for every iteration of gradient boosting.
    *
-   * @param data RDD of [[LabeledPoint]]
+   * @param data RDD of `LabeledPoint`
    * @param trees Boosted Decision Tree models
    * @param treeWeights Learning rates at each boosting iteration.
    * @param loss evaluation metric.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
index 8ae5ca3c84b0e..a61ea374cbd46 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
@@ -82,7 +82,7 @@ private[spark] object RandomForest extends Logging {
   /**
    * Train a random forest.
    *
-   * @param input Training data: RDD of [[LabeledPoint]]
+   * @param input Training data: RDD of `LabeledPoint`
    * @return an unweighted set of trees
    */
   def run(
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
index 5a551533be9ca..40510ad804ef0 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
@@ -342,9 +342,9 @@ private[ml] trait HasFeatureSubsetStrategy extends Params {
    *  - sqrt: recommended by Breiman manual for random forests
    *  - The defaults of sqrt (classification) and onethird (regression) match the R randomForest
    *    package.
-   * @see [[http://www.stat.berkeley.edu/~breiman/randomforest2001.pdf  Breiman (2001)]]
-   * @see [[http://www.stat.berkeley.edu/~breiman/Using_random_forests_V3.1.pdf  Breiman manual for
-   *     random forests]]
+   * @see <a href="http://www.stat.berkeley.edu/~breiman/randomforest2001.pdf">Breiman (2001)</a>
+   * @see <a href="http://www.stat.berkeley.edu/~breiman/Using_random_forests_V3.1.pdf">
+   * Breiman manual for random forests</a>
    *
    * @group param
    */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
index 6ea52ef7f025f..85191d46fd360 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
@@ -40,7 +40,7 @@ import org.apache.spark.sql.types.StructType
  */
 private[ml] trait CrossValidatorParams extends ValidatorParams {
   /**
-   * Param for number of folds for cross validation.  Must be >= 2.
+   * Param for number of folds for cross validation.  Must be &gt;= 2.
    * Default: 3
    *
    * @group param
@@ -198,7 +198,7 @@ object CrossValidator extends MLReadable[CrossValidator] {
  *
  * @param bestModel The best model selected from k-fold cross validation.
  * @param avgMetrics Average cross-validation metrics for each paramMap in
- *                   [[CrossValidator.estimatorParamMaps]], in the corresponding order.
+ *                   `CrossValidator.estimatorParamMaps`, in the corresponding order.
  */
 @Since("1.2.0")
 class CrossValidatorModel private[ml] (
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
index e5fa5d53e3fca..5b7e5ec75c842 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
@@ -163,7 +163,7 @@ trait MLWritable {
 /**
  * :: DeveloperApi ::
  *
- * Helper trait for making simple [[Params]] types writable.  If a [[Params]] class stores
+ * Helper trait for making simple `Params` types writable.  If a `Params` class stores
  * all data as [[org.apache.spark.ml.param.Param]] values, then extending this trait will provide
  * a default implementation of writing saved instances of the class.
  * This only handles simple [[org.apache.spark.ml.param.Param]] types; e.g., it will not handle
@@ -231,7 +231,7 @@ trait MLReadable[T] {
 /**
  * :: DeveloperApi ::
  *
- * Helper trait for making simple [[Params]] types readable.  If a [[Params]] class stores
+ * Helper trait for making simple `Params` types readable.  If a `Params` class stores
  * all data as [[org.apache.spark.ml.param.Param]] values, then extending this trait will provide
  * a default implementation of reading saved instances of the class.
  * This only handles simple [[org.apache.spark.ml.param.Param]] types; e.g., it will not handle
@@ -360,7 +360,7 @@ private[ml] object DefaultParamsReader {
 
     /**
      * Get the JSON value of the [[org.apache.spark.ml.param.Param]] of the given name.
-     * This can be useful for getting a Param value before an instance of [[Params]]
+     * This can be useful for getting a Param value before an instance of `Params`
      * is available.
      */
     def getParamValue(paramName: String): JValue = {
@@ -438,7 +438,7 @@ private[ml] object DefaultParamsReader {
   }
 
   /**
-   * Load a [[Params]] instance from the given path, and return it.
+   * Load a `Params` instance from the given path, and return it.
    * This assumes the instance implements [[MLReadable]].
    */
   def loadParamsInstance[T](path: String, sc: SparkContext): T = {
@@ -454,7 +454,7 @@ private[ml] object DefaultParamsReader {
 private[ml] object MetaAlgorithmReadWrite {
   /**
    * Examine the given estimator (which may be a compound estimator) and extract a mapping
-   * from UIDs to corresponding [[Params]] instances.
+   * from UIDs to corresponding `Params` instances.
    */
   def getUidMap(instance: Params): Map[String, Params] = {
     val uidList = getUidMapImpl(instance)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
index 767d056861a8b..fa46ba3ace508 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -302,10 +302,11 @@ object NaiveBayesModel extends Loader[NaiveBayesModel] {
 /**
  * Trains a Naive Bayes model given an RDD of `(label, features)` pairs.
  *
- * This is the Multinomial NB ([[http://tinyurl.com/lsdw6p]]) which can handle all kinds of
- * discrete data.  For example, by converting documents into TF-IDF vectors, it can be used for
- * document classification.  By making every vector a 0-1 vector, it can also be used as
- * Bernoulli NB ([[http://tinyurl.com/p7c96j6]]). The input feature values must be nonnegative.
+ * This is the Multinomial NB (see <a href="http://tinyurl.com/lsdw6p">here</a>) which can
+ * handle all kinds of discrete data. For example, by converting documents into TF-IDF
+ * vectors, it can be used for document classification. By making every vector a 0-1 vector,
+ * it can also be used as Bernoulli NB (see <a href="http://tinyurl.com/p7c96j6">here</a>).
+ * The input feature values must be nonnegative.
  */
 @Since("0.9.0")
 class NaiveBayes private (
@@ -402,9 +403,9 @@ object NaiveBayes {
   /**
    * Trains a Naive Bayes model given an RDD of `(label, features)` pairs.
    *
-   * This is the default Multinomial NB ([[http://tinyurl.com/lsdw6p]]) which can handle all
-   * kinds of discrete data.  For example, by converting documents into TF-IDF vectors, it
-   * can be used for document classification.
+   * This is the default Multinomial NB (see <a href="http://tinyurl.com/lsdw6p">here</a>)
+   * which can handle all kinds of discrete data. For example, by converting documents into
+   * TF-IDF vectors, it can be used for document classification.
    *
    * This version of the method uses a default smoothing parameter of 1.0.
    *
@@ -419,9 +420,9 @@ object NaiveBayes {
   /**
    * Trains a Naive Bayes model given an RDD of `(label, features)` pairs.
    *
-   * This is the default Multinomial NB ([[http://tinyurl.com/lsdw6p]]) which can handle all
-   * kinds of discrete data.  For example, by converting documents into TF-IDF vectors, it
-   * can be used for document classification.
+   * This is the default Multinomial NB (see <a href="http://tinyurl.com/lsdw6p">here</a>)
+   * which can handle all kinds of discrete data. For example, by converting documents
+   * into TF-IDF vectors, it can be used for document classification.
    *
    * @param input RDD of `(label, array of features)` pairs.  Every vector should be a frequency
    *              vector or a count vector.
@@ -435,9 +436,10 @@ object NaiveBayes {
   /**
    * Trains a Naive Bayes model given an RDD of `(label, features)` pairs.
    *
-   * The model type can be set to either Multinomial NB ([[http://tinyurl.com/lsdw6p]])
-   * or Bernoulli NB ([[http://tinyurl.com/p7c96j6]]). The Multinomial NB can handle
-   * discrete count data and can be called by setting the model type to "multinomial".
+   * The model type can be set to either Multinomial NB (see <a href="http://tinyurl.com/lsdw6p">
+   * here</a>) or Bernoulli NB (see <a href="http://tinyurl.com/p7c96j6">here</a>).
+   * The Multinomial NB can handle discrete count data and can be called by setting the model
+   * type to "multinomial".
    * For example, it can be used with word counts or TF_IDF vectors of documents.
    * The Bernoulli model fits presence or absence (0-1) counts. By making every vector a
    * 0-1 vector and setting the model type to "bernoulli", the  fits and predicts as
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
index e6b89712e219d..31f51417528b3 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
@@ -43,13 +43,14 @@ import org.apache.spark.storage.StorageLevel
  * @param k the desired number of leaf clusters (default: 4). The actual number could be smaller if
  *          there are no divisible leaf clusters.
  * @param maxIterations the max number of k-means iterations to split clusters (default: 20)
- * @param minDivisibleClusterSize the minimum number of points (if >= 1.0) or the minimum proportion
- *                                of points (if < 1.0) of a divisible cluster (default: 1)
+ * @param minDivisibleClusterSize the minimum number of points (if &gt;= 1.0) or the minimum
+ *                                proportion of points (if &lt; 1.0) of a divisible cluster
+ *                                (default: 1)
  * @param seed a random seed (default: hash value of the class name)
  *
- * @see [[http://glaros.dtc.umn.edu/gkhome/fetch/papers/docclusterKDDTMW00.pdf
- *     Steinbach, Karypis, and Kumar, A comparison of document clustering techniques,
- *     KDD Workshop on Text Mining, 2000.]]
+ * @see <a href="http://glaros.dtc.umn.edu/gkhome/fetch/papers/docclusterKDDTMW00.pdf">
+ * Steinbach, Karypis, and Kumar, A comparison of document clustering techniques,
+ * KDD Workshop on Text Mining, 2000.</a>
  */
 @Since("1.6.0")
 class BisectingKMeans private (
@@ -100,8 +101,8 @@ class BisectingKMeans private (
   def getMaxIterations: Int = this.maxIterations
 
   /**
-   * Sets the minimum number of points (if >= `1.0`) or the minimum proportion of points
-   * (if < `1.0`) of a divisible cluster (default: 1).
+   * Sets the minimum number of points (if &gt;= `1.0`) or the minimum proportion of points
+   * (if &lt; `1.0`) of a divisible cluster (default: 1).
    */
   @Since("1.6.0")
   def setMinDivisibleClusterSize(minDivisibleClusterSize: Double): this.type = {
@@ -112,8 +113,8 @@ class BisectingKMeans private (
   }
 
   /**
-   * Gets the minimum number of points (if >= `1.0`) or the minimum proportion of points
-   * (if < `1.0`) of a divisible cluster.
+   * Gets the minimum number of points (if &gt;= `1.0`) or the minimum proportion of points
+   * (if &lt; `1.0`) of a divisible cluster.
    */
   @Since("1.6.0")
   def getMinDivisibleClusterSize: Double = minDivisibleClusterSize
@@ -218,7 +219,7 @@ class BisectingKMeans private (
   }
 
   /**
-   * Java-friendly version of [[run()]].
+   * Java-friendly version of `run()`.
    */
   def run(data: JavaRDD[Vector]): BisectingKMeansModel = run(data.rdd)
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
index 8438015ccecea..6f1ab091b2317 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala
@@ -71,7 +71,7 @@ class BisectingKMeansModel private[clustering] (
   }
 
   /**
-   * Java-friendly version of [[predict()]].
+   * Java-friendly version of `predict()`.
    */
   @Since("1.6.0")
   def predict(points: JavaRDD[Vector]): JavaRDD[java.lang.Integer] =
@@ -95,7 +95,7 @@ class BisectingKMeansModel private[clustering] (
   }
 
   /**
-   * Java-friendly version of [[computeCost()]].
+   * Java-friendly version of `computeCost()`.
    */
   @Since("1.6.0")
   def computeCost(data: JavaRDD[Vector]): Double = this.computeCost(data.rdd)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
index 56cdeea5f7a3f..6873d4277a8db 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
@@ -234,7 +234,7 @@ class GaussianMixture private (
   }
 
   /**
-   * Java-friendly version of [[run()]]
+   * Java-friendly version of `run()`
    */
   @Since("1.3.0")
   def run(data: JavaRDD[Vector]): GaussianMixtureModel = run(data.rdd)
@@ -273,8 +273,8 @@ class GaussianMixture private (
 
 private[clustering] object GaussianMixture {
   /**
-   * Heuristic to distribute the computation of the [[MultivariateGaussian]]s, approximately when
-   * d > 25 except for when k is very small.
+   * Heuristic to distribute the computation of the `MultivariateGaussian`s, approximately when
+   * d &gt; 25 except for when k is very small.
    * @param k  Number of topics
    * @param d  Number of features
    */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
index c30cc3e2398e4..afbe4f978b286 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
@@ -80,7 +80,7 @@ class GaussianMixtureModel @Since("1.3.0") (
   }
 
   /**
-   * Java-friendly version of [[predict()]]
+   * Java-friendly version of `predict()`
    */
   @Since("1.4.0")
   def predict(points: JavaRDD[Vector]): JavaRDD[java.lang.Integer] =
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
index 7c52abdeaac22..16742bd284e69 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
@@ -39,8 +39,8 @@ import org.apache.spark.util.Utils
  *  - Original LDA paper (journal version):
  *    Blei, Ng, and Jordan.  "Latent Dirichlet Allocation."  JMLR, 2003.
  *
- * @see [[http://en.wikipedia.org/wiki/Latent_Dirichlet_allocation Latent Dirichlet allocation
- *       (Wikipedia)]]
+ * @see <a href="http://en.wikipedia.org/wiki/Latent_Dirichlet_allocation">
+ * Latent Dirichlet allocation (Wikipedia)</a>
  */
 @Since("1.3.0")
 class LDA private (
@@ -113,20 +113,20 @@ class LDA private (
    *
    * If set to a singleton vector Vector(-1), then docConcentration is set automatically. If set to
    * singleton vector Vector(t) where t != -1, then t is replicated to a vector of length k during
-   * [[LDAOptimizer.initialize()]]. Otherwise, the [[docConcentration]] vector must be length k.
+   * `LDAOptimizer.initialize()`. Otherwise, the [[docConcentration]] vector must be length k.
    * (default = Vector(-1) = automatic)
    *
    * Optimizer-specific parameter settings:
    *  - EM
    *     - Currently only supports symmetric distributions, so all values in the vector should be
    *       the same.
-   *     - Values should be > 1.0
+   *     - Values should be &gt; 1.0
    *     - default = uniformly (50 / k) + 1, where 50/k is common in LDA libraries and +1 follows
    *       from Asuncion et al. (2009), who recommend a +1 adjustment for EM.
    *  - Online
-   *     - Values should be >= 0
+   *     - Values should be &gt;= 0
    *     - default = uniformly (1.0 / k), following the implementation from
-   *       [[https://github.com/Blei-Lab/onlineldavb]].
+   *       <a href="https://github.com/Blei-Lab/onlineldavb">here</a>.
    */
   @Since("1.5.0")
   def setDocConcentration(docConcentration: Vector): this.type = {
@@ -158,13 +158,13 @@ class LDA private (
   def getAlpha: Double = getDocConcentration
 
   /**
-   * Alias for [[setDocConcentration()]]
+   * Alias for `setDocConcentration()`
    */
   @Since("1.5.0")
   def setAlpha(alpha: Vector): this.type = setDocConcentration(alpha)
 
   /**
-   * Alias for [[setDocConcentration()]]
+   * Alias for `setDocConcentration()`
    */
   @Since("1.3.0")
   def setAlpha(alpha: Double): this.type = setDocConcentration(alpha)
@@ -195,13 +195,13 @@ class LDA private (
    *
    * Optimizer-specific parameter settings:
    *  - EM
-   *     - Value should be > 1.0
+   *     - Value should be &gt; 1.0
    *     - default = 0.1 + 1, where 0.1 gives a small amount of smoothing and +1 follows
    *       Asuncion et al. (2009), who recommend a +1 adjustment for EM.
    *  - Online
-   *     - Value should be >= 0
+   *     - Value should be &gt;= 0
    *     - default = (1.0 / k), following the implementation from
-   *       [[https://github.com/Blei-Lab/onlineldavb]].
+   *       <a href="https://github.com/Blei-Lab/onlineldavb">here</a>.
    */
   @Since("1.3.0")
   def setTopicConcentration(topicConcentration: Double): this.type = {
@@ -321,7 +321,7 @@ class LDA private (
    * @param documents  RDD of documents, which are term (word) count vectors paired with IDs.
    *                   The term count vectors are "bags of words" with a fixed-size vocabulary
    *                   (where the vocabulary size is the length of the vector).
-   *                   Document IDs must be unique and >= 0.
+   *                   Document IDs must be unique and &gt;= 0.
    * @return  Inferred LDA model
    */
   @Since("1.3.0")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
index b5b0e64a2a6c6..017fbc6feb0d7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
@@ -171,7 +171,7 @@ abstract class LDAModel private[clustering] extends Saveable {
    *                   The term count vectors are "bags of words" with a fixed-size vocabulary
    *                   (where the vocabulary size is the length of the vector).
    *                   This must use the same vocabulary (ordering of term counts) as in training.
-   *                   Document IDs must be unique and >= 0.
+   *                   Document IDs must be unique and &gt;= 0.
    * @return  Estimated topic distribution for each document.
    *          The returned RDD may be zipped with the given RDD, where each returned vector
    *          is a multinomial distribution over topics.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
index 7365ea1f200da..9687fc8804e89 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
@@ -563,7 +563,7 @@ private[clustering] object OnlineLDAOptimizer {
    *
    * An optimization (Lee, Seung: Algorithms for non-negative matrix factorization, NIPS 2001)
    * avoids explicit computation of variational parameter `phi`.
-   * @see [[http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.31.7566]]
+   * @see <a href="http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.31.7566">here</a>
    *
    * @return Returns a tuple of `gammad` - estimate of gamma, the topic distribution, `sstatsd` -
    *         statistics for updating lambda and `ids` - list of termCounts vector indices.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
index c760ddd6ad40b..4d3e265455da6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
@@ -36,7 +36,7 @@ import org.apache.spark.util.random.XORShiftRandom
  * Model produced by [[PowerIterationClustering]].
  *
  * @param k number of clusters
- * @param assignments an RDD of clustering [[PowerIterationClustering#Assignment]]s
+ * @param assignments an RDD of clustering `PowerIterationClustering#Assignment`s
  */
 @Since("1.3.0")
 class PowerIterationClusteringModel @Since("1.3.0") (
@@ -103,9 +103,9 @@ object PowerIterationClusteringModel extends Loader[PowerIterationClusteringMode
 
 /**
  * Power Iteration Clustering (PIC), a scalable graph clustering algorithm developed by
- * [[http://www.icml2010.org/papers/387.pdf Lin and Cohen]]. From the abstract: PIC finds a very
- * low-dimensional embedding of a dataset using truncated power iteration on a normalized pair-wise
- * similarity matrix of the data.
+ * <a href="http://www.icml2010.org/papers/387.pdf">Lin and Cohen</a>. From the abstract: PIC finds
+ * a very low-dimensional embedding of a dataset using truncated power iteration on a normalized
+ * pair-wise similarity matrix of the data.
  *
  * @param k Number of clusters.
  * @param maxIterations Maximum number of iterations of the PIC algorithm.
@@ -113,7 +113,8 @@ object PowerIterationClusteringModel extends Loader[PowerIterationClusteringMode
  *                 as vertex properties, or "degree" to use normalized sum similarities.
  *                 Default: random.
  *
- * @see [[http://en.wikipedia.org/wiki/Spectral_clustering Spectral clustering (Wikipedia)]]
+ * @see <a href="http://en.wikipedia.org/wiki/Spectral_clustering">
+ * Spectral clustering (Wikipedia)</a>
  */
 @Since("1.3.0")
 class PowerIterationClustering private[clustering] (
@@ -210,7 +211,7 @@ class PowerIterationClustering private[clustering] (
   }
 
   /**
-   * A Java-friendly version of [[PowerIterationClustering.run]].
+   * A Java-friendly version of `PowerIterationClustering.run`.
    */
   @Since("1.3.0")
   def run(similarities: JavaRDD[(java.lang.Long, java.lang.Long, java.lang.Double)])
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
index f20ab09bf0b42..85c37c438d93a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
@@ -39,14 +39,14 @@ import org.apache.spark.util.random.XORShiftRandom
  * generalized to incorporate forgetfullness (i.e. decay).
  * The update rule (for each cluster) is:
  *
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    \begin{align}
  *     c_t+1 &= [(c_t * n_t * a) + (x_t * m_t)] / [n_t + m_t] \\
  *     n_t+t &= n_t * a + m_t
  *    \end{align}
  *    $$
- * </blockquote></p>
+ * </blockquote>
  *
  * Where c_t is the previously estimated centroid for that cluster,
  * n_t is the number of points assigned to it thus far, x_t is the centroid
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
index 8f777cc35b93f..ad99b00a31fd5 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
@@ -74,7 +74,8 @@ class RegressionMetrics @Since("2.0.0") (
   /**
    * Returns the variance explained by regression.
    * explainedVariance = $\sum_i (\hat{y_i} - \bar{y})^2^ / n$
-   * @see [[https://en.wikipedia.org/wiki/Fraction_of_variance_unexplained]]
+   * @see <a href="https://en.wikipedia.org/wiki/Fraction_of_variance_unexplained">
+   * Fraction of variance unexplained (Wikipedia)</a>
    */
   @Since("1.2.0")
   def explainedVariance: Double = {
@@ -110,10 +111,11 @@ class RegressionMetrics @Since("2.0.0") (
 
   /**
    * Returns R^2^, the unadjusted coefficient of determination.
-   * @see [[http://en.wikipedia.org/wiki/Coefficient_of_determination]]
+   * @see <a href="http://en.wikipedia.org/wiki/Coefficient_of_determination">
+   * Coefficient of determination (Wikipedia)</a>
    * In case of regression through the origin, the definition of R^2^ is to be modified.
-   * @see J. G. Eisenhauer, Regression through the Origin. Teaching Statistics 25, 76-80 (2003)
-   * [[https://online.stat.psu.edu/~ajw13/stat501/SpecialTopics/Reg_thru_origin.pdf]]
+   * @see <a href="https://online.stat.psu.edu/~ajw13/stat501/SpecialTopics/Reg_thru_origin.pdf">
+   * J. G. Eisenhauer, Regression through the Origin. Teaching Statistics 25, 76-80 (2003)</a>
    */
   @Since("1.2.0")
   def r2: Double = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
index 0f7fbe9556c5d..b53386012280d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
@@ -147,18 +147,18 @@ object FPGrowthModel extends Loader[FPGrowthModel[_]] {
 
 /**
  * A parallel FP-growth algorithm to mine frequent itemsets. The algorithm is described in
- * [[http://dx.doi.org/10.1145/1454008.1454027 Li et al., PFP: Parallel FP-Growth for Query
- *  Recommendation]]. PFP distributes computation in such a way that each worker executes an
+ * <a href="http://dx.doi.org/10.1145/1454008.1454027">Li et al., PFP: Parallel FP-Growth for Query
+ * Recommendation</a>. PFP distributes computation in such a way that each worker executes an
  * independent group of mining tasks. The FP-Growth algorithm is described in
- * [[http://dx.doi.org/10.1145/335191.335372 Han et al., Mining frequent patterns without candidate
- *  generation]].
+ * <a href="http://dx.doi.org/10.1145/335191.335372">Han et al., Mining frequent patterns without
+ * candidate generation</a>.
  *
  * @param minSupport the minimal support level of the frequent pattern, any pattern that appears
  *                   more than (minSupport * size-of-the-dataset) times will be output
  * @param numPartitions number of partitions used by parallel FP-growth
  *
- * @see [[http://en.wikipedia.org/wiki/Association_rule_learning Association rule learning
- *       (Wikipedia)]]
+ * @see <a href="http://en.wikipedia.org/wiki/Association_rule_learning">
+ * Association rule learning (Wikipedia)</a>
  *
  */
 @Since("1.3.0")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
index 7382000791cfb..a5641672218dd 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
@@ -44,7 +44,8 @@ import org.apache.spark.storage.StorageLevel
 /**
  * A parallel PrefixSpan algorithm to mine frequent sequential patterns.
  * The PrefixSpan algorithm is described in J. Pei, et al., PrefixSpan: Mining Sequential Patterns
- * Efficiently by Prefix-Projected Pattern Growth ([[http://doi.org/10.1109/ICDE.2001.914830]]).
+ * Efficiently by Prefix-Projected Pattern Growth
+ * (see <a href="http://doi.org/10.1109/ICDE.2001.914830">here</a>).
  *
  * @param minSupport the minimal support level of the sequential pattern, any pattern that appears
  *                   more than (minSupport * size-of-the-dataset) times will be output
@@ -55,8 +56,8 @@ import org.apache.spark.storage.StorageLevel
  *                           processing. If a projected database exceeds this size, another
  *                           iteration of distributed prefix growth is run.
  *
- * @see [[https://en.wikipedia.org/wiki/Sequential_Pattern_Mining Sequential Pattern Mining
- *       (Wikipedia)]]
+ * @see <a href="https://en.wikipedia.org/wiki/Sequential_Pattern_Mining">Sequential Pattern Mining
+ * (Wikipedia)</a>
  */
 @Since("1.5.0")
 class PrefixSpan private (
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
index 03866753b50ee..9e75217410d36 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
@@ -385,10 +385,10 @@ class BlockMatrix @Since("1.3.0") (
   /**
    * Adds the given block matrix `other` to `this` block matrix: `this + other`.
    * The matrices must have the same size and matching `rowsPerBlock` and `colsPerBlock`
-   * values. If one of the blocks that are being added are instances of [[SparseMatrix]],
-   * the resulting sub matrix will also be a [[SparseMatrix]], even if it is being added
-   * to a [[DenseMatrix]]. If two dense matrices are added, the output will also be a
-   * [[DenseMatrix]].
+   * values. If one of the blocks that are being added are instances of `SparseMatrix`,
+   * the resulting sub matrix will also be a `SparseMatrix`, even if it is being added
+   * to a `DenseMatrix`. If two dense matrices are added, the output will also be a
+   * `DenseMatrix`.
    */
   @Since("1.3.0")
   def add(other: BlockMatrix): BlockMatrix =
@@ -397,10 +397,10 @@ class BlockMatrix @Since("1.3.0") (
   /**
    * Subtracts the given block matrix `other` from `this` block matrix: `this - other`.
    * The matrices must have the same size and matching `rowsPerBlock` and `colsPerBlock`
-   * values. If one of the blocks that are being subtracted are instances of [[SparseMatrix]],
-   * the resulting sub matrix will also be a [[SparseMatrix]], even if it is being subtracted
-   * from a [[DenseMatrix]]. If two dense matrices are subtracted, the output will also be a
-   * [[DenseMatrix]].
+   * values. If one of the blocks that are being subtracted are instances of `SparseMatrix`,
+   * the resulting sub matrix will also be a `SparseMatrix`, even if it is being subtracted
+   * from a `DenseMatrix`. If two dense matrices are subtracted, the output will also be a
+   * `DenseMatrix`.
    */
   @Since("2.0.0")
   def subtract(other: BlockMatrix): BlockMatrix =
@@ -447,8 +447,8 @@ class BlockMatrix @Since("1.3.0") (
   /**
    * Left multiplies this [[BlockMatrix]] to `other`, another [[BlockMatrix]]. The `colsPerBlock`
    * of this matrix must equal the `rowsPerBlock` of `other`. If `other` contains
-   * [[SparseMatrix]], they will have to be converted to a [[DenseMatrix]]. The output
-   * [[BlockMatrix]] will only consist of blocks of [[DenseMatrix]]. This may cause
+   * `SparseMatrix`, they will have to be converted to a `DenseMatrix`. The output
+   * [[BlockMatrix]] will only consist of blocks of `DenseMatrix`. This may cause
    * some performance issues until support for multiplying two sparse matrices is added.
    *
    * @note The behavior of multiply has changed in 1.6.0. `multiply` used to throw an error when
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
index 008b03d1cc334..d2c5b14a5b128 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
@@ -101,14 +101,14 @@ class CoordinateMatrix @Since("1.0.0") (
     toIndexedRowMatrix().toRowMatrix()
   }
 
-  /** Converts to BlockMatrix. Creates blocks of [[SparseMatrix]] with size 1024 x 1024. */
+  /** Converts to BlockMatrix. Creates blocks of `SparseMatrix` with size 1024 x 1024. */
   @Since("1.3.0")
   def toBlockMatrix(): BlockMatrix = {
     toBlockMatrix(1024, 1024)
   }
 
   /**
-   * Converts to BlockMatrix. Creates blocks of [[SparseMatrix]].
+   * Converts to BlockMatrix. Creates blocks of `SparseMatrix`.
    * @param rowsPerBlock The number of rows of each block. The blocks at the bottom edge may have
    *                     a smaller value. Must be an integer value greater than 0.
    * @param colsPerBlock The number of columns of each block. The blocks at the right edge may have
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
index 809906a158337..590e959daa1f4 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
@@ -90,14 +90,14 @@ class IndexedRowMatrix @Since("1.0.0") (
     new RowMatrix(rows.map(_.vector), 0L, nCols)
   }
 
-  /** Converts to BlockMatrix. Creates blocks of [[SparseMatrix]] with size 1024 x 1024. */
+  /** Converts to BlockMatrix. Creates blocks of `SparseMatrix` with size 1024 x 1024. */
   @Since("1.3.0")
   def toBlockMatrix(): BlockMatrix = {
     toBlockMatrix(1024, 1024)
   }
 
   /**
-   * Converts to BlockMatrix. Creates blocks of [[SparseMatrix]].
+   * Converts to BlockMatrix. Creates blocks of `SparseMatrix`.
    * @param rowsPerBlock The number of rows of each block. The blocks at the bottom edge may have
    *                     a smaller value. Must be an integer value greater than 0.
    * @param colsPerBlock The number of columns of each block. The blocks at the right edge may have
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
index 4b120332ab8d8..78a8810052aef 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
@@ -531,7 +531,7 @@ class RowMatrix @Since("1.0.0") (
    * decomposition (factorization) for the [[RowMatrix]] of a tall and skinny shape.
    * Reference:
    *  Paul G. Constantine, David F. Gleich. "Tall and skinny QR factorizations in MapReduce
-   *  architectures"  ([[http://dx.doi.org/10.1145/1996092.1996103]])
+   *  architectures" (see <a href="http://dx.doi.org/10.1145/1996092.1996103">here</a>)
    *
    * @param computeQ whether to computeQ
    * @return QRDecomposition(Q, R), Q = null if computeQ = false.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
index c49e72646bf13..0efce3c76f15a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
@@ -67,14 +67,14 @@ abstract class Gradient extends Serializable {
  * http://statweb.stanford.edu/~tibs/ElemStatLearn/ , Eq. (4.17) on page 119 gives the formula of
  * multinomial logistic regression model. A simple calculation shows that
  *
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    P(y=0|x, w) = 1 / (1 + \sum_i^{K-1} \exp(x w_i))\\
  *    P(y=1|x, w) = exp(x w_1) / (1 + \sum_i^{K-1} \exp(x w_i))\\
  *    ...\\
  *    P(y=K-1|x, w) = exp(x w_{K-1}) / (1 + \sum_i^{K-1} \exp(x w_i))\\
  *    $$
- * </blockquote></p>
+ * </blockquote>
  *
  * for K classes multiclass classification problem.
  *
@@ -83,7 +83,7 @@ abstract class Gradient extends Serializable {
  * will be (K-1) * N.
  *
  * As a result, the loss of objective function for a single instance of data can be written as
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    \begin{align}
  *    l(w, x) &= -log P(y|x, w) = -\alpha(y) log P(y=0|x, w) - (1-\alpha(y)) log P(y|x, w) \\
@@ -91,7 +91,7 @@ abstract class Gradient extends Serializable {
  *            &= log(1 + \sum_i^{K-1}\exp(margins_i)) - (1-\alpha(y)) margins_{y-1}
  *    \end{align}
  *    $$
- * </blockquote></p>
+ * </blockquote>
  *
  * where $\alpha(i) = 1$ if $i \ne 0$, and
  *       $\alpha(i) = 0$ if $i == 0$,
@@ -100,7 +100,7 @@ abstract class Gradient extends Serializable {
  * For optimization, we have to calculate the first derivative of the loss function, and
  * a simple calculation shows that
  *
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    \begin{align}
  *      \frac{\partial l(w, x)}{\partial w_{ij}} &=
@@ -108,7 +108,7 @@ abstract class Gradient extends Serializable {
  *                                               &= multiplier_i * x_j
  *    \end{align}
  *    $$
- * </blockquote></p>
+ * </blockquote>
  *
  * where $\delta_{i, j} = 1$ if $i == j$,
  *       $\delta_{i, j} = 0$ if $i != j$, and
@@ -118,12 +118,12 @@ abstract class Gradient extends Serializable {
  * If any of margins is larger than 709.78, the numerical computation of multiplier and loss
  * function will be suffered from arithmetic overflow. This issue occurs when there are outliers
  * in data which are far away from hyperplane, and this will cause the failing of training once
- * infinity / infinity is introduced. Note that this is only a concern when max(margins) > 0.
+ * infinity / infinity is introduced. Note that this is only a concern when max(margins) &gt; 0.
  *
- * Fortunately, when max(margins) = maxMargin > 0, the loss function and the multiplier can be
+ * Fortunately, when max(margins) = maxMargin &gt; 0, the loss function and the multiplier can be
  * easily rewritten into the following equivalent numerically stable formula.
  *
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    \begin{align}
  *      l(w, x) &= log(1 + \sum_i^{K-1}\exp(margins_i)) - (1-\alpha(y)) margins_{y-1} \\
@@ -132,7 +132,7 @@ abstract class Gradient extends Serializable {
  *              &= log(1 + sum) + maxMargin - (1-\alpha(y)) margins_{y-1}
  *    \end{align}
  *    $$
- * </blockquote></p>
+ * </blockquote>
 
  * where sum = $\exp(-maxMargin) + \sum_i^{K-1}\exp(margins_i - maxMargin) - 1$.
  *
@@ -141,7 +141,7 @@ abstract class Gradient extends Serializable {
  *
  * For multiplier, similar trick can be applied as the following,
  *
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    \begin{align}
  *      multiplier
@@ -150,7 +150,7 @@ abstract class Gradient extends Serializable {
  *       &= \exp(margins_i - maxMargin) / (1 + sum) - (1-\alpha(y)\delta_{y, i+1})
  *    \end{align}
  *    $$
- * </blockquote></p>
+ * </blockquote>
  *
  * where each term in $\exp$ is also smaller than zero, so overflow is not a concern.
  *
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
index 123e0bb3e607a..67da88e804da2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
@@ -88,10 +88,10 @@ class GradientDescent private[spark] (private var gradient: Gradient, private va
    * convergenceTol is a condition which decides iteration termination.
    * The end of iteration is decided based on below logic.
    *
-   *  - If the norm of the new solution vector is >1, the diff of solution vectors
+   *  - If the norm of the new solution vector is &gt;1, the diff of solution vectors
    *    is compared to relative tolerance which means normalizing by the norm of
    *    the new solution vector.
-   *  - If the norm of the new solution vector is <=1, the diff of solution vectors
+   *  - If the norm of the new solution vector is &lt;=1, the diff of solution vectors
    *    is compared to absolute tolerance which is not normalizing.
    *
    * Must be between 0.0 and 1.0 inclusively.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
index e49363c2c64d9..6232ff30a747e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
@@ -31,7 +31,8 @@ import org.apache.spark.rdd.RDD
 /**
  * :: DeveloperApi ::
  * Class used to solve an optimization problem using Limited-memory BFGS.
- * Reference: [[http://en.wikipedia.org/wiki/Limited-memory_BFGS]]
+ * Reference: <a href="http://en.wikipedia.org/wiki/Limited-memory_BFGS">
+ * Wikipedia on Limited-memory BFGS</a>
  * @param gradient Gradient function to be used.
  * @param updater Updater to be used to update weights after every iteration.
  */
@@ -48,8 +49,8 @@ class LBFGS(private var gradient: Gradient, private var updater: Updater)
    * Set the number of corrections used in the LBFGS update. Default 10.
    * Values of numCorrections less than 3 are not recommended; large values
    * of numCorrections will result in excessive computing time.
-   * 3 < numCorrections < 10 is recommended.
-   * Restriction: numCorrections > 0
+   * 3 &lt; numCorrections &lt; 10 is recommended.
+   * Restriction: numCorrections &gt; 0
    */
   def setNumCorrections(corrections: Int): this.type = {
     require(corrections > 0,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala
index 64d52bae00907..b7c9fcfbfe60f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala
@@ -54,7 +54,7 @@ private[spark] object NNLS {
    *
    * We solve the problem
    *   min_x      1/2 x^T ata x^T - x^T atb
-   *   subject to x >= 0
+   *   subject to x &gt;= 0
    *
    * The method used is similar to one described by Polyak (B. T. Polyak, The conjugate gradient
    * method in extremal problems, Zh. Vychisl. Mat. Mat. Fiz. 9(4)(1969), pp. 94-112) for bound-
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
index 67d484575db52..aa7dd1aaa60fe 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
@@ -95,9 +95,9 @@ class SimpleUpdater extends Updater {
  * The corresponding proximal operator for the L1 norm is the soft-thresholding
  * function. That is, each weight component is shrunk towards 0 by shrinkageVal.
  *
- * If w >  shrinkageVal, set weight component to w-shrinkageVal.
- * If w < -shrinkageVal, set weight component to w+shrinkageVal.
- * If -shrinkageVal < w < shrinkageVal, set weight component to 0.
+ * If w &gt; shrinkageVal, set weight component to w-shrinkageVal.
+ * If w &lt; -shrinkageVal, set weight component to w+shrinkageVal.
+ * If -shrinkageVal &lt; w &lt; shrinkageVal, set weight component to 0.
  *
  * Equivalently, set weight component to signum(w) * max(0.0, abs(w) - shrinkageVal)
  */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/package.scala b/mllib/src/main/scala/org/apache/spark/mllib/package.scala
index 9810b6f668064..8323afcb6a833 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/package.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/package.scala
@@ -32,7 +32,7 @@ package org.apache.spark
  * to reach feature parity with the RDD-based APIs.
  * And once we reach feature parity, this package will be deprecated.
  *
- * @see [[https://issues.apache.org/jira/browse/SPARK-4591 SPARK-4591]] to track the progress of
- *     feature parity
+ * @see <a href="https://issues.apache.org/jira/browse/SPARK-4591">SPARK-4591</a> to track
+ * the progress of feature parity
  */
 package object mllib
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala b/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala
index 005119616f063..32e6ecf6308e0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala
@@ -48,7 +48,7 @@ class RDDFunctions[T: ClassTag](self: RDD[T]) extends Serializable {
   }
 
   /**
-   * [[sliding(Int, Int)*]] with step = 1.
+   * `sliding(Int, Int)*` with step = 1.
    */
   def sliding(windowSize: Int): RDD[Array[T]] = sliding(windowSize, 1)
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
index cc9ee15738ad6..d215885797176 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
@@ -54,11 +54,12 @@ case class Rating @Since("0.8.0") (
  *
  * For implicit preference data, the algorithm used is based on
  * "Collaborative Filtering for Implicit Feedback Datasets", available at
- * [[http://dx.doi.org/10.1109/ICDM.2008.22]], adapted for the blocked approach used here.
+ * <a href="http://dx.doi.org/10.1109/ICDM.2008.22">here</a>, adapted for the blocked approach
+ * used here.
  *
  * Essentially instead of finding the low-rank approximations to the rating matrix `R`,
  * this finds the approximations for a preference matrix `P` where the elements of `P` are 1 if
- * r > 0 and 0 if r <= 0. The ratings then act as 'confidence' values related to strength of
+ * r &gt; 0 and 0 if r &lt;= 0. The ratings then act as 'confidence' values related to strength of
  * indicated user
  * preferences rather than explicit ratings given to items.
  */
@@ -280,7 +281,7 @@ class ALS private (
   }
 
   /**
-   * Java-friendly version of [[ALS.run]].
+   * Java-friendly version of `ALS.run`.
    */
   @Since("1.3.0")
   def run(ratings: JavaRDD[Rating]): MatrixFactorizationModel = run(ratings.rdd)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
index 24e4dcccc843f..23045fa2b6863 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
@@ -146,7 +146,7 @@ class MatrixFactorizationModel @Since("0.8.0") (
   }
 
   /**
-   * Java-friendly version of [[MatrixFactorizationModel.predict]].
+   * Java-friendly version of `MatrixFactorizationModel.predict`.
    */
   @Since("1.2.0")
   def predict(usersProducts: JavaPairRDD[JavaInteger, JavaInteger]): JavaRDD[Rating] = {
@@ -195,7 +195,7 @@ class MatrixFactorizationModel @Since("0.8.0") (
    *  - human-readable (JSON) model metadata to path/metadata/
    *  - Parquet formatted data to path/data/
    *
-   * The model may be loaded using [[Loader.load]].
+   * The model may be loaded using `Loader.load`.
    *
    * @param sc  Spark context used to save model data.
    * @param path  Path specifying the directory in which to save this model.
@@ -320,7 +320,7 @@ object MatrixFactorizationModel extends Loader[MatrixFactorizationModel] {
   /**
    * Load a model from the given path.
    *
-   * The model should have been saved by [[Saveable.save]].
+   * The model should have been saved by `Saveable.save`.
    *
    * @param sc  Spark context used for loading model files.
    * @param path  Path specifying the directory to which the model was saved.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
index 377326f8739b7..36894d52346af 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
@@ -238,23 +238,22 @@ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] {
  * Sequential PAV implementation based on:
  * Tibshirani, Ryan J., Holger Hoefling, and Robert Tibshirani.
  *   "Nearly-isotonic regression." Technometrics 53.1 (2011): 54-61.
- *   Available from [[http://www.stat.cmu.edu/~ryantibs/papers/neariso.pdf]]
+ *   Available from <a href="http://www.stat.cmu.edu/~ryantibs/papers/neariso.pdf">here</a>
  *
  * Sequential PAV parallelization based on:
  * Kearsley, Anthony J., Richard A. Tapia, and Michael W. Trosset.
  *   "An approach to parallelizing isotonic regression."
  *   Applied Mathematics and Parallel Computing. Physica-Verlag HD, 1996. 141-147.
- *   Available from [[http://softlib.rice.edu/pub/CRPC-TRs/reports/CRPC-TR96640.pdf]]
+ *   Available from <a href="http://softlib.rice.edu/pub/CRPC-TRs/reports/CRPC-TR96640.pdf">here</a>
  *
- * @see [[http://en.wikipedia.org/wiki/Isotonic_regression Isotonic regression (Wikipedia)]]
+ * @see <a href="http://en.wikipedia.org/wiki/Isotonic_regression">Isotonic regression
+ * (Wikipedia)</a>
  */
 @Since("1.3.0")
 class IsotonicRegression private (private var isotonic: Boolean) extends Serializable {
 
   /**
    * Constructs IsotonicRegression instance with default parameter isotonic = true.
-   *
-   * @return New instance of IsotonicRegression.
    */
   @Since("1.3.0")
   def this() = this(true)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
index 7a2a7a35a91cd..7dc0c459ec032 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
@@ -30,12 +30,15 @@ import org.apache.spark.mllib.linalg.{Vector, Vectors}
  * the corresponding joint dataset.
  *
  * A numerically stable algorithm is implemented to compute the mean and variance of instances:
- * Reference: [[http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance variance-wiki]]
+ * Reference: <a href="http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance">
+ * variance-wiki</a>
  * Zero elements (including explicit zero values) are skipped when calling add(),
  * to have time complexity O(nnz) instead of O(n) for each column.
  *
  * For weighted instances, the unbiased estimation of variance is defined by the reliability
- * weights: [[https://en.wikipedia.org/wiki/Weighted_arithmetic_mean#Reliability_weights]].
+ * weights:
+ * see <a href="https://en.wikipedia.org/wiki/Weighted_arithmetic_mean#Reliability_weights">
+ * Reliability weights (Wikipedia)</a>.
  */
 @Since("1.1.0")
 @DeveloperApi
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
index 925fdf4d7e7bc..7ba9b292969e7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
@@ -88,7 +88,7 @@ object Statistics {
   def corr(x: RDD[Double], y: RDD[Double]): Double = Correlations.corr(x, y)
 
   /**
-   * Java-friendly version of [[corr()]]
+   * Java-friendly version of `corr()`
    */
   @Since("1.4.1")
   def corr(x: JavaRDD[java.lang.Double], y: JavaRDD[java.lang.Double]): Double =
@@ -112,7 +112,7 @@ object Statistics {
   def corr(x: RDD[Double], y: RDD[Double], method: String): Double = Correlations.corr(x, y, method)
 
   /**
-   * Java-friendly version of [[corr()]]
+   * Java-friendly version of `corr()`
    */
   @Since("1.4.1")
   def corr(x: JavaRDD[java.lang.Double], y: JavaRDD[java.lang.Double], method: String): Double =
@@ -176,7 +176,7 @@ object Statistics {
     ChiSqTest.chiSquaredFeatures(data)
   }
 
-  /** Java-friendly version of [[chiSqTest()]] */
+  /** Java-friendly version of `chiSqTest()` */
   @Since("1.5.0")
   def chiSqTest(data: JavaRDD[LabeledPoint]): Array[ChiSqTestResult] = chiSqTest(data.rdd)
 
@@ -186,7 +186,8 @@ object Statistics {
    * distribution of the sample data and the theoretical distribution we can provide a test for the
    * the null hypothesis that the sample data comes from that theoretical distribution.
    * For more information on KS Test:
-   * @see [[https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test]]
+   * @see <a href="https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test">
+   * Kolmogorov-Smirnov test (Wikipedia)</a>
    *
    * @param data an `RDD[Double]` containing the sample of data to test
    * @param cdf a `Double => Double` function to calculate the theoretical CDF at a given value
@@ -217,7 +218,7 @@ object Statistics {
     KolmogorovSmirnovTest.testOneSample(data, distName, params: _*)
   }
 
-  /** Java-friendly version of [[kolmogorovSmirnovTest()]] */
+  /** Java-friendly version of `kolmogorovSmirnovTest()` */
   @Since("1.5.0")
   @varargs
   def kolmogorovSmirnovTest(
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala
index 39c3644450d6d..4cf662e036346 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala
@@ -28,7 +28,8 @@ import org.apache.spark.mllib.util.MLUtils
  * This class provides basic functionality for a Multivariate Gaussian (Normal) Distribution. In
  * the event that the covariance matrix is singular, the density will be computed in a
  * reduced dimensional subspace under which the distribution is supported.
- * (see [[http://en.wikipedia.org/wiki/Multivariate_normal_distribution#Degenerate_case]])
+ * (see <a href="http://en.wikipedia.org/wiki/Multivariate_normal_distribution#Degenerate_case">
+ * Degenerate case in Multivariate normal distribution (Wikipedia)</a>)
  *
  * @param mu The mean vector of the distribution
  * @param sigma The covariance matrix of the distribution
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
index ece1e41d986d0..cdeef16135015 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
@@ -29,7 +29,7 @@ import org.apache.spark.rdd.RDD
 
 /**
  * A class that implements
- * [[http://en.wikipedia.org/wiki/Gradient_boosting  Stochastic Gradient Boosting]]
+ * <a href="http://en.wikipedia.org/wiki/Gradient_boosting">Stochastic Gradient Boosting</a>
  * for regression and binary classification.
  *
  * The implementation is based upon:
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
index 14f11ce51b878..428af21406092 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
@@ -36,7 +36,7 @@ import org.apache.spark.util.Utils
 
 
 /**
- * A class that implements a [[http://en.wikipedia.org/wiki/Random_forest  Random Forest]]
+ * A class that implements a <a href="http://en.wikipedia.org/wiki/Random_forest">Random Forest</a>
  * learning algorithm for classification and regression.
  * It supports both continuous and categorical features.
  *
@@ -46,9 +46,9 @@ import org.apache.spark.util.Utils
  *  - The defaults of sqrt (classification) and onethird (regression) match the R randomForest
  *    package.
  *
- * @see [[http://www.stat.berkeley.edu/~breiman/randomforest2001.pdf  Breiman (2001)]]
- * @see [[http://www.stat.berkeley.edu/~breiman/Using_random_forests_V3.1.pdf  Breiman manual for
- *     random forests]]
+ * @see <a href="http://www.stat.berkeley.edu/~breiman/randomforest2001.pdf">Breiman (2001)</a>
+ * @see <a href="http://www.stat.berkeley.edu/~breiman/Using_random_forests_V3.1.pdf">
+ * Breiman manual for random forests</a>
  * @param strategy The configuration parameters for the random forest algorithm which specify
  *                 the type of random forest (classification or regression), feature type
  *                 (continuous, categorical), depth of the tree, quantile calculation strategy,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
index 5cef9d0631b59..be2704df3444f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
@@ -25,7 +25,7 @@ import org.apache.spark.mllib.tree.configuration.FeatureType.FeatureType
  * Split applied to a feature
  * @param feature feature index
  * @param threshold Threshold for continuous feature.
- *                  Split left if feature <= threshold, else right.
+ *                  Split left if feature &lt;= threshold, else right.
  * @param featureType type of feature -- categorical or continuous
  * @param categories Split left if categorical feature value is in this set, else right.
  */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
index e96c2bc6edfc3..6bb3271aacb44 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
@@ -213,7 +213,7 @@ object MLUtils extends Logging {
   }
 
   /**
-   * Version of [[kFold()]] taking a Long seed.
+   * Version of `kFold()` taking a Long seed.
    */
   @Since("2.0.0")
   def kFold[T: ClassTag](rdd: RDD[T], numFolds: Int, seed: Long): Array[(RDD[T], RDD[T])] = {
@@ -262,7 +262,7 @@ object MLUtils extends Logging {
    * @param dataset input dataset
    * @param cols a list of vector columns to be converted. New vector columns will be ignored. If
    *             unspecified, all old vector columns will be converted except nested ones.
-   * @return the input [[DataFrame]] with old vector columns converted to the new vector type
+   * @return the input `DataFrame` with old vector columns converted to the new vector type
    */
   @Since("2.0.0")
   @varargs
@@ -314,7 +314,7 @@ object MLUtils extends Logging {
    * @param dataset input dataset
    * @param cols a list of vector columns to be converted. Old vector columns will be ignored. If
    *             unspecified, all new vector columns will be converted except nested ones.
-   * @return the input [[DataFrame]] with new vector columns converted to the old vector type
+   * @return the input `DataFrame` with new vector columns converted to the old vector type
    */
   @Since("2.0.0")
   @varargs
@@ -366,7 +366,7 @@ object MLUtils extends Logging {
    * @param dataset input dataset
    * @param cols a list of matrix columns to be converted. New matrix columns will be ignored. If
    *             unspecified, all old matrix columns will be converted except nested ones.
-   * @return the input [[DataFrame]] with old matrix columns converted to the new matrix type
+   * @return the input `DataFrame` with old matrix columns converted to the new matrix type
    */
   @Since("2.0.0")
   @varargs
@@ -416,7 +416,7 @@ object MLUtils extends Logging {
    * @param dataset input dataset
    * @param cols a list of matrix columns to be converted. Old matrix columns will be ignored. If
    *             unspecified, all new matrix columns will be converted except nested ones.
-   * @return the input [[DataFrame]] with new matrix columns converted to the old matrix type
+   * @return the input `DataFrame` with new matrix columns converted to the old matrix type
    */
   @Since("2.0.0")
   @varargs
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala
index c881c8ea50c09..da0eb04764c57 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala
@@ -72,7 +72,7 @@ trait Loader[M <: Saveable] {
   /**
    * Load a model from the given path.
    *
-   * The model should have been saved by [[Saveable.save]].
+   * The model should have been saved by `Saveable.save`.
    *
    * @param sc  Spark context used for loading model files.
    * @param path  Path specifying the directory to which the model was saved.
diff --git a/pom.xml b/pom.xml
index 7c0b0b59dc62b..5c417d2b35727 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2495,6 +2495,18 @@
                   <name>tparam</name>
                   <placement>X</placement>
                 </tag>
+                <tag>
+                  <name>constructor</name>
+                  <placement>X</placement>
+                </tag>
+                <tag>
+                  <name>todo</name>
+                  <placement>X</placement>
+                </tag>
+                <tag>
+                  <name>groupname</name>
+                  <placement>X</placement>
+                </tag>
               </tags>
             </configuration>
           </plugin>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 429a163d22a6d..e3fbe0379fb7b 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -745,7 +745,10 @@ object Unidoc {
       "-tag", """example:a:Example\:""",
       "-tag", """note:a:Note\:""",
       "-tag", "group:X",
-      "-tag", "tparam:X"
+      "-tag", "tparam:X",
+      "-tag", "constructor:X",
+      "-tag", "todo:X",
+      "-tag", "groupname:X"
     ),
 
     // Use GitHub repository for Scaladoc source links
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
index 65f91429648c1..a821d2ca34579 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
@@ -343,7 +343,7 @@ trait Row extends Serializable {
   }
 
   /**
-   * Returns a Map(name -> value) for the requested fieldNames
+   * Returns a Map(name -&gt; value) for the requested fieldNames
    * For primitive types if value is null it returns 'zero value' specific for primitive
    * ie. 0 for Int - use isNullAt to ensure that value is not null
    *
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
index 302054708ccb5..1a93f4590331b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
@@ -37,8 +37,8 @@ import org.apache.spark.sql.types._
  *  - Xiangrui Meng.  "Simpler Online Updates for Arbitrary-Order Central Moments."
  *      2015. http://arxiv.org/abs/1510.04923
  *
- * @see [[https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
- *     Algorithms for calculating variance (Wikipedia)]]
+ * @see <a href="https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance">
+ * Algorithms for calculating variance (Wikipedia)</a>
  *
  * @param child to compute central moments of.
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BinaryType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BinaryType.scala
index a4a358a242c70..02c8318b4d413 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BinaryType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BinaryType.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.util.TypeUtils
 
 /**
  * The data type representing `Array[Byte]` values.
- * Please use the singleton [[DataTypes.BinaryType]].
+ * Please use the singleton `DataTypes.BinaryType`.
  */
 @InterfaceStability.Stable
 class BinaryType private() extends AtomicType {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BooleanType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BooleanType.scala
index 059f89f9cda32..cee78f4b4ac1a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BooleanType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BooleanType.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.ScalaReflectionLock
 
 
 /**
- * The data type representing `Boolean` values. Please use the singleton [[DataTypes.BooleanType]].
+ * The data type representing `Boolean` values. Please use the singleton `DataTypes.BooleanType`.
  *
  * @since 1.3.0
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ByteType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ByteType.scala
index bc6251f024e58..b1dd5eda36bd6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ByteType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ByteType.scala
@@ -24,7 +24,7 @@ import org.apache.spark.annotation.InterfaceStability
 import org.apache.spark.sql.catalyst.ScalaReflectionLock
 
 /**
- * The data type representing `Byte` values. Please use the singleton [[DataTypes.ByteType]].
+ * The data type representing `Byte` values. Please use the singleton `DataTypes.ByteType`.
  *
  * @since 1.3.0
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala
index 21f3497ba06fb..2342036a57460 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala
@@ -23,7 +23,7 @@ import org.apache.spark.annotation.InterfaceStability
  * The data type representing calendar time intervals. The calendar time interval is stored
  * internally in two components: number of months the number of microseconds.
  *
- * Please use the singleton [[DataTypes.CalendarIntervalType]].
+ * Please use the singleton `DataTypes.CalendarIntervalType`.
  *
  * @note Calendar intervals are not comparable.
  *
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala
index 8d0ecc051f4ce..0c0574b845536 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.ScalaReflectionLock
 /**
  * A date type, supporting "0001-01-01" through "9999-12-31".
  *
- * Please use the singleton [[DataTypes.DateType]].
+ * Please use the singleton `DataTypes.DateType`.
  *
  * Internally, this is represented as the number of days from 1970-01-01.
  *
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
index d7ca0cbeedcd3..cecad3b7b4c0a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.expressions.Expression
  *
  * The default precision and scale is (10, 0).
  *
- * Please use [[DataTypes.createDecimalType()]] to create a specific instance.
+ * Please use `DataTypes.createDecimalType()` to create a specific instance.
  *
  * @since 1.3.0
  */
@@ -92,7 +92,7 @@ case class DecimalType(precision: Int, scale: Int) extends FractionalType {
   }
 
   /**
-   * The default size of a value of the DecimalType is 8 bytes (precision <= 18) or 16 bytes.
+   * The default size of a value of the DecimalType is 8 bytes (precision &lt;= 18) or 16 bytes.
    */
   override def defaultSize: Int = if (precision <= Decimal.MAX_LONG_DIGITS) 8 else 16
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala
index c21ac0e43eee0..400f7aed6ae72 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.ScalaReflectionLock
 import org.apache.spark.util.Utils
 
 /**
- * The data type representing `Double` values. Please use the singleton [[DataTypes.DoubleType]].
+ * The data type representing `Double` values. Please use the singleton `DataTypes.DoubleType`.
  *
  * @since 1.3.0
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala
index c5bf8883bad93..b9812b236d575 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.ScalaReflectionLock
 import org.apache.spark.util.Utils
 
 /**
- * The data type representing `Float` values. Please use the singleton [[DataTypes.FloatType]].
+ * The data type representing `Float` values. Please use the singleton `DataTypes.FloatType`.
  *
  * @since 1.3.0
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/IntegerType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/IntegerType.scala
index 724e59c0bcbf4..dca612ecbfed9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/IntegerType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/IntegerType.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.ScalaReflectionLock
 
 
 /**
- * The data type representing `Int` values. Please use the singleton [[DataTypes.IntegerType]].
+ * The data type representing `Int` values. Please use the singleton `DataTypes.IntegerType`.
  *
  * @since 1.3.0
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/LongType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/LongType.scala
index 42285a9d0aa29..396c3355701c5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/LongType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/LongType.scala
@@ -24,7 +24,7 @@ import org.apache.spark.annotation.InterfaceStability
 import org.apache.spark.sql.catalyst.ScalaReflectionLock
 
 /**
- * The data type representing `Long` values. Please use the singleton [[DataTypes.LongType]].
+ * The data type representing `Long` values. Please use the singleton `DataTypes.LongType`.
  *
  * @since 1.3.0
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala
index 3a32aa43d1c3a..fbf3a61786251 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala
@@ -25,7 +25,7 @@ import org.apache.spark.annotation.InterfaceStability
 /**
  * The data type for Maps. Keys in a map are not allowed to have `null` values.
  *
- * Please use [[DataTypes.createMapType()]] to create a specific instance.
+ * Please use `DataTypes.createMapType()` to create a specific instance.
  *
  * @param keyType The data type of map keys.
  * @param valueType The data type of map values.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala
index bdf9a819d007b..494225b47a270 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala
@@ -21,7 +21,7 @@ import org.apache.spark.annotation.InterfaceStability
 
 
 /**
- * The data type representing `NULL` values. Please use the singleton [[DataTypes.NullType]].
+ * The data type representing `NULL` values. Please use the singleton `DataTypes.NullType`.
  *
  * @since 1.3.0
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ShortType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ShortType.scala
index 3fee299d578cc..1410d5ba0e0b0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ShortType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ShortType.scala
@@ -24,7 +24,7 @@ import org.apache.spark.annotation.InterfaceStability
 import org.apache.spark.sql.catalyst.ScalaReflectionLock
 
 /**
- * The data type representing `Short` values. Please use the singleton [[DataTypes.ShortType]].
+ * The data type representing `Short` values. Please use the singleton `DataTypes.ShortType`.
  *
  * @since 1.3.0
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StringType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StringType.scala
index 5d5a6f52a305b..d1c0da3479d76 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StringType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StringType.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.ScalaReflectionLock
 import org.apache.spark.unsafe.types.UTF8String
 
 /**
- * The data type representing `String` values. Please use the singleton [[DataTypes.StringType]].
+ * The data type representing `String` values. Please use the singleton `DataTypes.StringType`.
  *
  * @since 1.3.0
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampType.scala
index 4540d8358acad..2875995420053 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampType.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.ScalaReflectionLock
 
 /**
  * The data type representing `java.sql.Timestamp` values.
- * Please use the singleton [[DataTypes.TimestampType]].
+ * Please use the singleton `DataTypes.TimestampType`.
  *
  * @since 1.3.0
  */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index a77937efd7e15..5be9a99369997 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -239,8 +239,8 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
   }
 
   /**
-   * Loads a JSON file ([[http://jsonlines.org/ JSON Lines text format or newline-delimited JSON]])
-   * and returns the result as a [[DataFrame]].
+   * Loads a JSON file (<a href="http://jsonlines.org/">JSON Lines text format or
+   * newline-delimited JSON</a>) and returns the result as a [[DataFrame]].
    * See the documentation on the overloaded `json()` method with varargs for more details.
    *
    * @since 1.4.0
@@ -251,8 +251,8 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
   }
 
   /**
-   * Loads a JSON file ([[http://jsonlines.org/ JSON Lines text format or newline-delimited JSON]])
-   * and returns the result as a [[DataFrame]].
+   * Loads a JSON file (<a href="http://jsonlines.org/">JSON Lines text format or
+   * newline-delimited JSON</a>) and returns the result as a [[DataFrame]].
    *
    * This function goes through the input once to determine the input schema. If you know the
    * schema in advance, use the version that specifies the schema to avoid the extra scan.
@@ -297,8 +297,9 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
   def json(paths: String*): DataFrame = format("json").load(paths : _*)
 
   /**
-   * Loads a `JavaRDD[String]` storing JSON objects ([[http://jsonlines.org/ JSON Lines text format
-   * or newline-delimited JSON]]) and returns the result as a [[DataFrame]].
+   * Loads a `JavaRDD[String]` storing JSON objects (<a href="http://jsonlines.org/">JSON
+   * Lines text format or newline-delimited JSON</a>) and returns the result as
+   * a [[DataFrame]].
    *
    * Unless the schema is specified using [[schema]] function, this function goes through the
    * input once to determine the input schema.
@@ -309,8 +310,8 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
   def json(jsonRDD: JavaRDD[String]): DataFrame = json(jsonRDD.rdd)
 
   /**
-   * Loads an `RDD[String]` storing JSON objects ([[http://jsonlines.org/ JSON Lines text format or
-   * newline-delimited JSON]]) and returns the result as a [[DataFrame]].
+   * Loads an `RDD[String]` storing JSON objects (<a href="http://jsonlines.org/">JSON Lines
+   * text format or newline-delimited JSON</a>) and returns the result as a [[DataFrame]].
    *
    * Unless the schema is specified using [[schema]] function, this function goes through the
    * input once to determine the input schema.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
index 6335fc4579a28..a9a861c4635b2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
@@ -48,8 +48,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    *
    * This method implements a variation of the Greenwald-Khanna algorithm (with some speed
    * optimizations).
-   * The algorithm was first present in [[http://dx.doi.org/10.1145/375663.375670 Space-efficient
-   * Online Computation of Quantile Summaries]] by Greenwald and Khanna.
+   * The algorithm was first present in <a href="http://dx.doi.org/10.1145/375663.375670">
+   * Space-efficient Online Computation of Quantile Summaries</a> by Greenwald and Khanna.
    *
    * @param col the name of the numerical column
    * @param probabilities a list of quantile probabilities
@@ -184,7 +184,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
   /**
    * Finding frequent items for columns, possibly with false positives. Using the
    * frequent element count algorithm described in
-   * [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou]].
+   * <a href="http://dx.doi.org/10.1145/762471.762473">here</a>, proposed by Karp,
+   * Schenker, and Papadimitriou.
    * The `support` should be greater than 1e-4.
    *
    * This function is meant for exploratory data analysis, as we make no guarantee about the
@@ -230,7 +231,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
   /**
    * Finding frequent items for columns, possibly with false positives. Using the
    * frequent element count algorithm described in
-   * [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou]].
+   * <a href="http://dx.doi.org/10.1145/762471.762473">here</a>, proposed by Karp,
+   * Schenker, and Papadimitriou.
    * Uses a `default` support of 1%.
    *
    * This function is meant for exploratory data analysis, as we make no guarantee about the
@@ -248,7 +250,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
   /**
    * (Scala-specific) Finding frequent items for columns, possibly with false positives. Using the
    * frequent element count algorithm described in
-   * [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou]].
+   * <a href="http://dx.doi.org/10.1145/762471.762473">here</a>, proposed by Karp, Schenker,
+   * and Papadimitriou.
    *
    * This function is meant for exploratory data analysis, as we make no guarantee about the
    * backward compatibility of the schema of the resulting [[DataFrame]].
@@ -291,7 +294,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
   /**
    * (Scala-specific) Finding frequent items for columns, possibly with false positives. Using the
    * frequent element count algorithm described in
-   * [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou]].
+   * <a href="http://dx.doi.org/10.1145/762471.762473">here</a>, proposed by Karp, Schenker,
+   * and Papadimitriou.
    * Uses a `default` support of 1%.
    *
    * This function is meant for exploratory data analysis, as we make no guarantee about the
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 15281f24fa628..2d863422fbabe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -442,8 +442,8 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
   }
 
   /**
-   * Saves the content of the [[DataFrame]] in JSON format ([[http://jsonlines.org/ JSON Lines text
-   * format or newline-delimited JSON]]) at the specified path.
+   * Saves the content of the [[DataFrame]] in JSON format (<a href="http://jsonlines.org/">
+   * JSON Lines text format or newline-delimited JSON</a>) at the specified path.
    * This is equivalent to:
    * {{{
    *   format("json").save(path)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 2fae93651b344..858fa4c7609b6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -172,7 +172,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
   def experimental: ExperimentalMethods = sparkSession.experimental
 
   /**
-   * Returns a [[DataFrame]] with no rows or columns.
+   * Returns a `DataFrame` with no rows or columns.
    *
    * @group basic
    * @since 1.3.0
@@ -254,7 +254,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
   /**
    * :: Experimental ::
    * (Scala-specific) Implicit methods available in Scala for converting
-   * common Scala objects into [[DataFrame]]s.
+   * common Scala objects into `DataFrame`s.
    *
    * {{{
    *   val sqlContext = new SQLContext(sc)
@@ -298,7 +298,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
   }
 
   /**
-   * Convert a [[BaseRelation]] created for external data sources into a [[DataFrame]].
+   * Convert a [[BaseRelation]] created for external data sources into a `DataFrame`.
    *
    * @group dataframes
    * @since 1.3.0
@@ -309,7 +309,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
 
   /**
    * :: DeveloperApi ::
-   * Creates a [[DataFrame]] from an [[RDD]] containing [[Row]]s using the given schema.
+   * Creates a `DataFrame` from an [[RDD]] containing [[Row]]s using the given schema.
    * It is important to make sure that the structure of every [[Row]] of the provided RDD matches
    * the provided schema. Otherwise, there will be runtime exception.
    * Example:
@@ -438,7 +438,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
 
   /**
    * :: DeveloperApi ::
-   * Creates a [[DataFrame]] from a [[JavaRDD]] containing [[Row]]s using the given schema.
+   * Creates a `DataFrame` from a [[JavaRDD]] containing [[Row]]s using the given schema.
    * It is important to make sure that the structure of every [[Row]] of the provided RDD matches
    * the provided schema. Otherwise, there will be runtime exception.
    *
@@ -453,7 +453,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
 
   /**
    * :: DeveloperApi ::
-   * Creates a [[DataFrame]] from a [[java.util.List]] containing [[Row]]s using the given schema.
+   * Creates a `DataFrame` from a [[java.util.List]] containing [[Row]]s using the given schema.
    * It is important to make sure that the structure of every [[Row]] of the provided List matches
    * the provided schema. Otherwise, there will be runtime exception.
    *
@@ -504,7 +504,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
 
   /**
    * Returns a [[DataFrameReader]] that can be used to read non-streaming data in as a
-   * [[DataFrame]].
+   * `DataFrame`.
    * {{{
    *   sqlContext.read.parquet("/path/to/file.parquet")
    *   sqlContext.read.schema(schema).json("/path/to/file.json")
@@ -518,7 +518,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
 
   /**
    * :: Experimental ::
-   * Returns a [[DataStreamReader]] that can be used to read streaming data in as a [[DataFrame]].
+   * Returns a [[DataStreamReader]] that can be used to read streaming data in as a `DataFrame`.
    * {{{
    *   sparkSession.readStream.parquet("/path/to/directory/of/parquet/files")
    *   sparkSession.readStream.schema(schema).json("/path/to/directory/of/json/files")
@@ -617,7 +617,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
   }
 
   /**
-   * Registers the given [[DataFrame]] as a temporary table in the catalog. Temporary tables exist
+   * Registers the given `DataFrame` as a temporary table in the catalog. Temporary tables exist
    * only during the lifetime of this instance of SQLContext.
    */
   private[sql] def registerDataFrameAsTable(df: DataFrame, tableName: String): Unit = {
@@ -638,7 +638,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
 
   /**
    * :: Experimental ::
-   * Creates a [[DataFrame]] with a single [[LongType]] column named `id`, containing elements
+   * Creates a `DataFrame` with a single [[LongType]] column named `id`, containing elements
    * in a range from 0 to `end` (exclusive) with step value 1.
    *
    * @since 1.4.1
@@ -650,7 +650,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
 
   /**
    * :: Experimental ::
-   * Creates a [[DataFrame]] with a single [[LongType]] column named `id`, containing elements
+   * Creates a `DataFrame` with a single [[LongType]] column named `id`, containing elements
    * in a range from `start` to `end` (exclusive) with step value 1.
    *
    * @since 1.4.0
@@ -662,7 +662,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
 
   /**
    * :: Experimental ::
-   * Creates a [[DataFrame]] with a single [[LongType]] column named `id`, containing elements
+   * Creates a `DataFrame` with a single [[LongType]] column named `id`, containing elements
    * in a range from `start` to `end` (exclusive) with a step value.
    *
    * @since 2.0.0
@@ -676,7 +676,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
 
   /**
    * :: Experimental ::
-   * Creates a [[DataFrame]] with a single [[LongType]] column named `id`, containing elements
+   * Creates a `DataFrame` with a single [[LongType]] column named `id`, containing elements
    * in an range from `start` to `end` (exclusive) with an step value, with partition number
    * specified.
    *
@@ -690,7 +690,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
   }
 
   /**
-   * Executes a SQL query using Spark, returning the result as a [[DataFrame]]. The dialect that is
+   * Executes a SQL query using Spark, returning the result as a `DataFrame`. The dialect that is
    * used for SQL parsing can be configured with 'spark.sql.dialect'.
    *
    * @group basic
@@ -699,7 +699,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
   def sql(sqlText: String): DataFrame = sparkSession.sql(sqlText)
 
   /**
-   * Returns the specified table as a [[DataFrame]].
+   * Returns the specified table as a `DataFrame`.
    *
    * @group ddl_ops
    * @since 1.3.0
@@ -709,7 +709,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
   }
 
   /**
-   * Returns a [[DataFrame]] containing names of existing tables in the current database.
+   * Returns a `DataFrame` containing names of existing tables in the current database.
    * The returned DataFrame has two columns, tableName and isTemporary (a Boolean
    * indicating if a table is a temporary one or not).
    *
@@ -721,7 +721,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
   }
 
   /**
-   * Returns a [[DataFrame]] containing names of existing tables in the given database.
+   * Returns a `DataFrame` containing names of existing tables in the given database.
    * The returned DataFrame has two columns, tableName and isTemporary (a Boolean
    * indicating if a table is a temporary one or not).
    *
@@ -799,8 +799,8 @@ class SQLContext private[sql](val sparkSession: SparkSession)
   }
 
   /**
-   * Loads a Parquet file, returning the result as a [[DataFrame]]. This function returns an empty
-   * [[DataFrame]] if no paths are passed in.
+   * Loads a Parquet file, returning the result as a `DataFrame`. This function returns an empty
+   * `DataFrame` if no paths are passed in.
    *
    * @group specificdata
    * @deprecated As of 1.4.0, replaced by `read().parquet()`.
@@ -816,7 +816,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
   }
 
   /**
-   * Loads a JSON file (one object per line), returning the result as a [[DataFrame]].
+   * Loads a JSON file (one object per line), returning the result as a `DataFrame`.
    * It goes through the entire dataset once to determine the schema.
    *
    * @group specificdata
@@ -829,7 +829,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
 
   /**
    * Loads a JSON file (one object per line) and applies the given schema,
-   * returning the result as a [[DataFrame]].
+   * returning the result as a `DataFrame`.
    *
    * @group specificdata
    * @deprecated As of 1.4.0, replaced by `read().json()`.
@@ -850,7 +850,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
 
   /**
    * Loads an RDD[String] storing JSON objects (one object per record), returning the result as a
-   * [[DataFrame]].
+   * `DataFrame`.
    * It goes through the entire dataset once to determine the schema.
    *
    * @group specificdata
@@ -861,7 +861,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
 
   /**
    * Loads an RDD[String] storing JSON objects (one object per record), returning the result as a
-   * [[DataFrame]].
+   * `DataFrame`.
    * It goes through the entire dataset once to determine the schema.
    *
    * @group specificdata
@@ -872,7 +872,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
 
   /**
    * Loads an RDD[String] storing JSON objects (one object per record) and applies the given schema,
-   * returning the result as a [[DataFrame]].
+   * returning the result as a `DataFrame`.
    *
    * @group specificdata
    * @deprecated As of 1.4.0, replaced by `read().json()`.
@@ -884,7 +884,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
 
   /**
    * Loads an JavaRDD<String> storing JSON objects (one object per record) and applies the given
-   * schema, returning the result as a [[DataFrame]].
+   * schema, returning the result as a `DataFrame`.
    *
    * @group specificdata
    * @deprecated As of 1.4.0, replaced by `read().json()`.
@@ -896,7 +896,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
 
   /**
    * Loads an RDD[String] storing JSON objects (one object per record) inferring the
-   * schema, returning the result as a [[DataFrame]].
+   * schema, returning the result as a `DataFrame`.
    *
    * @group specificdata
    * @deprecated As of 1.4.0, replaced by `read().json()`.
@@ -908,7 +908,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
 
   /**
    * Loads a JavaRDD[String] storing JSON objects (one object per record) inferring the
-   * schema, returning the result as a [[DataFrame]].
+   * schema, returning the result as a `DataFrame`.
    *
    * @group specificdata
    * @deprecated As of 1.4.0, replaced by `read().json()`.
@@ -995,7 +995,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
   }
 
   /**
-   * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
+   * Construct a `DataFrame` representing the database table accessible via JDBC URL
    * url named table.
    *
    * @group specificdata
@@ -1007,7 +1007,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
   }
 
   /**
-   * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
+   * Construct a `DataFrame` representing the database table accessible via JDBC URL
    * url named table.  Partitions of the table will be retrieved in parallel based on the parameters
    * passed to this function.
    *
@@ -1031,10 +1031,10 @@ class SQLContext private[sql](val sparkSession: SparkSession)
   }
 
   /**
-   * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
+   * Construct a `DataFrame` representing the database table accessible via JDBC URL
    * url named table. The theParts parameter gives a list expressions
    * suitable for inclusion in WHERE clauses; each one defines one partition
-   * of the [[DataFrame]].
+   * of the `DataFrame`.
    *
    * @group specificdata
    * @deprecated As of 1.4.0, replaced by `read().jdbc()`.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala
index b9dbfcf7734c3..cdb755edc79a1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala
@@ -69,7 +69,8 @@ object FrequentItems extends Logging {
   /**
    * Finding frequent items for columns, possibly with false positives. Using the
    * frequent element count algorithm described in
-   * [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou]].
+   * <a href="http://dx.doi.org/10.1145/762471.762473">here</a>, proposed by Karp, Schenker,
+   * and Papadimitriou.
    * The `support` should be greater than 1e-4.
    * For Internal use only.
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
index c02b15498748f..2b2e706125ede 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
@@ -41,8 +41,8 @@ object StatFunctions extends Logging {
    *
    * This method implements a variation of the Greenwald-Khanna algorithm (with some speed
    * optimizations).
-   * The algorithm was first present in [[http://dx.doi.org/10.1145/375663.375670 Space-efficient
-   * Online Computation of Quantile Summaries]] by Greenwald and Khanna.
+   * The algorithm was first present in <a href="http://dx.doi.org/10.1145/375663.375670">
+   * Space-efficient Online Computation of Quantile Summaries</a> by Greenwald and Khanna.
    *
    * @param df the dataframe
    * @param cols numerical columns of the dataframe
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
index eea98414003ba..058c38c8cb8f4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression
 
 /**
  * :: Experimental ::
- * A base class for user-defined aggregations, which can be used in [[Dataset]] operations to take
+ * A base class for user-defined aggregations, which can be used in `Dataset` operations to take
  * all of the elements of a group and reduce them to a single value.
  *
  * For example, the following aggregator extracts an `int` from a specific class and adds them up:
@@ -80,19 +80,19 @@ abstract class Aggregator[-IN, BUF, OUT] extends Serializable {
   def finish(reduction: BUF): OUT
 
   /**
-   * Specifies the [[Encoder]] for the intermediate value type.
+   * Specifies the `Encoder` for the intermediate value type.
    * @since 2.0.0
    */
   def bufferEncoder: Encoder[BUF]
 
   /**
-   * Specifies the [[Encoder]] for the final ouput value type.
+   * Specifies the `Encoder` for the final ouput value type.
    * @since 2.0.0
    */
   def outputEncoder: Encoder[OUT]
 
   /**
-   * Returns this `Aggregator` as a [[TypedColumn]] that can be used in [[Dataset]].
+   * Returns this `Aggregator` as a `TypedColumn` that can be used in `Dataset`.
    * operations.
    * @since 1.6.0
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
index 36dd5f78ac137..b13fe7016092c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.functions
 import org.apache.spark.sql.types.DataType
 
 /**
- * A user-defined function. To create one, use the `udf` functions in [[functions]].
+ * A user-defined function. To create one, use the `udf` functions in `functions`.
  *
  * As an example:
  * {{{
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
index 327bc379d4132..f3cf3052ea3ea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
@@ -117,8 +117,8 @@ object Window {
    * "current row", while "-1" means the row before the current row, and "5" means the fifth row
    * after the current row.
    *
-   * We recommend users use [[Window.unboundedPreceding]], [[Window.unboundedFollowing]],
-   * and [[Window.currentRow]] to specify special boundary values, rather than using integral
+   * We recommend users use `Window.unboundedPreceding`, `Window.unboundedFollowing`,
+   * and `Window.currentRow` to specify special boundary values, rather than using integral
    * values directly.
    *
    * A row based boundary is based on the position of the row within the partition.
@@ -148,9 +148,9 @@ object Window {
    * }}}
    *
    * @param start boundary start, inclusive. The frame is unbounded if this is
-   *              the minimum long value ([[Window.unboundedPreceding]]).
+   *              the minimum long value (`Window.unboundedPreceding`).
    * @param end boundary end, inclusive. The frame is unbounded if this is the
-   *            maximum long value  ([[Window.unboundedFollowing]]).
+   *            maximum long value  (`Window.unboundedFollowing`).
    * @since 2.1.0
    */
   // Note: when updating the doc for this method, also update WindowSpec.rowsBetween.
@@ -166,8 +166,8 @@ object Window {
    * while "-1" means one off before the current row, and "5" means the five off after the
    * current row.
    *
-   * We recommend users use [[Window.unboundedPreceding]], [[Window.unboundedFollowing]],
-   * and [[Window.currentRow]] to specify special boundary values, rather than using integral
+   * We recommend users use `Window.unboundedPreceding`, `Window.unboundedFollowing`,
+   * and `Window.currentRow` to specify special boundary values, rather than using integral
    * values directly.
    *
    * A range based boundary is based on the actual value of the ORDER BY
@@ -200,9 +200,9 @@ object Window {
    * }}}
    *
    * @param start boundary start, inclusive. The frame is unbounded if this is
-   *              the minimum long value ([[Window.unboundedPreceding]]).
+   *              the minimum long value (`Window.unboundedPreceding`).
    * @param end boundary end, inclusive. The frame is unbounded if this is the
-   *            maximum long value  ([[Window.unboundedFollowing]]).
+   *            maximum long value  (`Window.unboundedFollowing`).
    * @since 2.1.0
    */
   // Note: when updating the doc for this method, also update WindowSpec.rangeBetween.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
index 4a8ce695bd4da..de7d7a1772753 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
@@ -85,8 +85,8 @@ class WindowSpec private[sql](
    * "current row", while "-1" means the row before the current row, and "5" means the fifth row
    * after the current row.
    *
-   * We recommend users use [[Window.unboundedPreceding]], [[Window.unboundedFollowing]],
-   * and [[Window.currentRow]] to specify special boundary values, rather than using integral
+   * We recommend users use `Window.unboundedPreceding`, `Window.unboundedFollowing`,
+   * and `[Window.currentRow` to specify special boundary values, rather than using integral
    * values directly.
    *
    * A row based boundary is based on the position of the row within the partition.
@@ -116,9 +116,9 @@ class WindowSpec private[sql](
    * }}}
    *
    * @param start boundary start, inclusive. The frame is unbounded if this is
-   *              the minimum long value ([[Window.unboundedPreceding]]).
+   *              the minimum long value (`Window.unboundedPreceding`).
    * @param end boundary end, inclusive. The frame is unbounded if this is the
-   *            maximum long value  ([[Window.unboundedFollowing]]).
+   *            maximum long value  (`Window.unboundedFollowing`).
    * @since 1.4.0
    */
   // Note: when updating the doc for this method, also update Window.rowsBetween.
@@ -133,8 +133,8 @@ class WindowSpec private[sql](
    * while "-1" means one off before the current row, and "5" means the five off after the
    * current row.
    *
-   * We recommend users use [[Window.unboundedPreceding]], [[Window.unboundedFollowing]],
-   * and [[Window.currentRow]] to specify special boundary values, rather than using integral
+   * We recommend users use `Window.unboundedPreceding`, `Window.unboundedFollowing`,
+   * and `[Window.currentRow` to specify special boundary values, rather than using integral
    * values directly.
    *
    * A range based boundary is based on the actual value of the ORDER BY
@@ -167,9 +167,9 @@ class WindowSpec private[sql](
    * }}}
    *
    * @param start boundary start, inclusive. The frame is unbounded if this is
-   *              the minimum long value ([[Window.unboundedPreceding]]).
+   *              the minimum long value (`Window.unboundedPreceding`).
    * @param end boundary end, inclusive. The frame is unbounded if this is the
-   *            maximum long value  ([[Window.unboundedFollowing]]).
+   *            maximum long value  (`Window.unboundedFollowing`).
    * @since 1.4.0
    */
   // Note: when updating the doc for this method, also update Window.rangeBetween.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
index aa71cb9e3bc85..650ffd4586592 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.execution.aggregate._
 
 /**
  * :: Experimental ::
- * Type-safe functions available for [[Dataset]] operations in Scala.
+ * Type-safe functions available for `Dataset` operations in Scala.
  *
  * Java users should use [[org.apache.spark.sql.expressions.javalang.typed]].
  *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala
index bc9788d81fe6a..4976b875fa298 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala
@@ -32,9 +32,9 @@ import org.apache.spark.sql.types._
 abstract class UserDefinedAggregateFunction extends Serializable {
 
   /**
-   * A [[StructType]] represents data types of input arguments of this aggregate function.
+   * A `StructType` represents data types of input arguments of this aggregate function.
    * For example, if a [[UserDefinedAggregateFunction]] expects two input arguments
-   * with type of [[DoubleType]] and [[LongType]], the returned [[StructType]] will look like
+   * with type of `DoubleType` and `LongType`, the returned `StructType` will look like
    *
    * ```
    *   new StructType()
@@ -42,7 +42,7 @@ abstract class UserDefinedAggregateFunction extends Serializable {
    *    .add("longInput", LongType)
    * ```
    *
-   * The name of a field of this [[StructType]] is only used to identify the corresponding
+   * The name of a field of this `StructType` is only used to identify the corresponding
    * input argument. Users can choose names to identify the input arguments.
    *
    * @since 1.5.0
@@ -50,10 +50,10 @@ abstract class UserDefinedAggregateFunction extends Serializable {
   def inputSchema: StructType
 
   /**
-   * A [[StructType]] represents data types of values in the aggregation buffer.
+   * A `StructType` represents data types of values in the aggregation buffer.
    * For example, if a [[UserDefinedAggregateFunction]]'s buffer has two values
-   * (i.e. two intermediate values) with type of [[DoubleType]] and [[LongType]],
-   * the returned [[StructType]] will look like
+   * (i.e. two intermediate values) with type of `DoubleType` and `LongType`,
+   * the returned `StructType` will look like
    *
    * ```
    *   new StructType()
@@ -61,7 +61,7 @@ abstract class UserDefinedAggregateFunction extends Serializable {
    *    .add("longInput", LongType)
    * ```
    *
-   * The name of a field of this [[StructType]] is only used to identify the corresponding
+   * The name of a field of this `StructType` is only used to identify the corresponding
    * buffer value. Users can choose names to identify the input arguments.
    *
    * @since 1.5.0
@@ -69,7 +69,7 @@ abstract class UserDefinedAggregateFunction extends Serializable {
   def bufferSchema: StructType
 
   /**
-   * The [[DataType]] of the returned value of this [[UserDefinedAggregateFunction]].
+   * The `DataType` of the returned value of this [[UserDefinedAggregateFunction]].
    *
    * @since 1.5.0
    */
@@ -121,7 +121,7 @@ abstract class UserDefinedAggregateFunction extends Serializable {
   def evaluate(buffer: Row): Any
 
   /**
-   * Creates a [[Column]] for this UDAF using given [[Column]]s as input arguments.
+   * Creates a `Column` for this UDAF using given `Column`s as input arguments.
    *
    * @since 1.5.0
    */
@@ -136,8 +136,8 @@ abstract class UserDefinedAggregateFunction extends Serializable {
   }
 
   /**
-   * Creates a [[Column]] for this UDAF using the distinct values of the given
-   * [[Column]]s as input arguments.
+   * Creates a `Column` for this UDAF using the distinct values of the given
+   * `Column`s as input arguments.
    *
    * @since 1.5.0
    */
@@ -153,7 +153,7 @@ abstract class UserDefinedAggregateFunction extends Serializable {
 }
 
 /**
- * A [[Row]] representing a mutable aggregation buffer.
+ * A `Row` representing a mutable aggregation buffer.
  *
  * This is not meant to be extended outside of Spark.
  *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index 7c64e28d24724..83857c322a0ec 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -40,7 +40,7 @@ case class JdbcType(databaseTypeDefinition : String, jdbcNullType : Int)
  * SQL dialect of a certain database or jdbc driver.
  * Lots of databases define types that aren't explicitly supported
  * by the JDBC spec.  Some JDBC drivers also report inaccurate
- * information---for instance, BIT(n>1) being reported as a BIT type is quite
+ * information---for instance, BIT(n&gt;1) being reported as a BIT type is quite
  * common, even though BIT in JDBC is meant for single-bit values.  Also, there
  * does not appear to be a standard name for an unbounded string or binary
  * type; we use BLOB and CLOB by default but override with database-specific
@@ -134,7 +134,7 @@ abstract class JdbcDialect extends Serializable {
 
 /**
  * :: DeveloperApi ::
- * Registry of dialects that apply to every new jdbc [[org.apache.spark.sql.DataFrame]].
+ * Registry of dialects that apply to every new jdbc `org.apache.spark.sql.DataFrame`.
  *
  * If multiple matching dialects are registered then all matching ones will be
  * tried in reverse order. A user-added dialect will thus be applied first,
@@ -148,7 +148,7 @@ abstract class JdbcDialect extends Serializable {
 object JdbcDialects {
 
   /**
-   * Register a dialect for use on all new matching jdbc [[org.apache.spark.sql.DataFrame]].
+   * Register a dialect for use on all new matching jdbc `org.apache.spark.sql.DataFrame`.
    * Reading an existing dialect will cause a move-to-front.
    *
    * @param dialect The new dialect.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index 40b482e4c01a5..c50733534e2b5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -27,8 +27,8 @@ import org.apache.spark.sql.execution.streaming.StreamingRelation
 import org.apache.spark.sql.types.StructType
 
 /**
- * Interface used to load a streaming [[Dataset]] from external storage systems (e.g. file systems,
- * key-value stores, etc). Use [[SparkSession.readStream]] to access this.
+ * Interface used to load a streaming `Dataset` from external storage systems (e.g. file systems,
+ * key-value stores, etc). Use `SparkSession.readStream` to access this.
  *
  * @since 2.0.0
  */
@@ -109,7 +109,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
 
 
   /**
-   * Loads input data stream in as a [[DataFrame]], for data streams that don't require a path
+   * Loads input data stream in as a `DataFrame`, for data streams that don't require a path
    * (e.g. external key-value stores).
    *
    * @since 2.0.0
@@ -125,7 +125,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
   }
 
   /**
-   * Loads input in as a [[DataFrame]], for data streams that read from some path.
+   * Loads input in as a `DataFrame`, for data streams that read from some path.
    *
    * @since 2.0.0
    */
@@ -134,8 +134,8 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
   }
 
   /**
-   * Loads a JSON file stream ([[http://jsonlines.org/ JSON Lines text format or newline-delimited
-   * JSON]]) and returns the result as a [[DataFrame]].
+   * Loads a JSON file stream (<a href="http://jsonlines.org/">JSON Lines text format or
+   * newline-delimited JSON</a>) and returns the result as a `DataFrame`.
    *
    * This function goes through the input once to determine the input schema. If you know the
    * schema in advance, use the version that specifies the schema to avoid the extra scan.
@@ -181,7 +181,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
   def json(path: String): DataFrame = format("json").load(path)
 
   /**
-   * Loads a CSV file stream and returns the result as a [[DataFrame]].
+   * Loads a CSV file stream and returns the result as a `DataFrame`.
    *
    * This function will go through the input once to determine the input schema if `inferSchema`
    * is enabled. To avoid going through the entire data once, disable `inferSchema` option or
@@ -243,7 +243,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
   def csv(path: String): DataFrame = format("csv").load(path)
 
   /**
-   * Loads a Parquet file stream, returning the result as a [[DataFrame]].
+   * Loads a Parquet file stream, returning the result as a `DataFrame`.
    *
    * You can set the following Parquet-specific option(s) for reading Parquet files:
    * <ul>
@@ -262,7 +262,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
   }
 
   /**
-   * Loads text files and returns a [[DataFrame]] whose schema starts with a string column named
+   * Loads text files and returns a `DataFrame` whose schema starts with a string column named
    * "value", and followed by partitioned columns if there are any.
    *
    * Each line in the text files is a new row in the resulting DataFrame. For example:
@@ -285,7 +285,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
   def text(path: String): DataFrame = format("text").load(path)
 
   /**
-   * Loads text file(s) and returns a [[Dataset]] of String. The underlying schema of the Dataset
+   * Loads text file(s) and returns a `Dataset` of String. The underlying schema of the Dataset
    * contains a single string column named "value".
    *
    * If the directory structure of the text files contains partitioning information, those are
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index daed1dcb77370..b3c600ae53dbb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -26,8 +26,8 @@ import org.apache.spark.sql.execution.streaming.{ForeachSink, MemoryPlan, Memory
 
 /**
  * :: Experimental ::
- * Interface used to write a streaming [[Dataset]] to external storage systems (e.g. file systems,
- * key-value stores, etc). Use [[Dataset.writeStream]] to access this.
+ * Interface used to write a streaming `Dataset` to external storage systems (e.g. file systems,
+ * key-value stores, etc). Use `Dataset.writeStream` to access this.
  *
  * @since 2.0.0
  */
@@ -273,8 +273,8 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
 
   /**
    * Starts the execution of the streaming query, which will continually send results to the given
-   * [[ForeachWriter]] as as new data arrives. The [[ForeachWriter]] can be used to send the data
-   * generated by the [[DataFrame]]/[[Dataset]] to an external system.
+   * `ForeachWriter` as as new data arrives. The `ForeachWriter` can be used to send the data
+   * generated by the `DataFrame`/`Dataset` to an external system.
    *
    * Scala example:
    * {{{
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
index 0a85414451981..374313f2ca9ab 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
@@ -31,7 +31,7 @@ trait StreamingQuery {
 
   /**
    * Returns the name of the query. This name is unique across all active queries. This can be
-   * set in the [[org.apache.spark.sql.DataStreamWriter DataStreamWriter]] as
+   * set in the `org.apache.spark.sql.streaming.DataStreamWriter` as
    * `dataframe.writeStream.queryName("query").start()`.
    * @since 2.0.0
    */
@@ -45,7 +45,7 @@ trait StreamingQuery {
   def id: Long
 
   /**
-   * Returns the [[SparkSession]] associated with `this`.
+   * Returns the `SparkSession` associated with `this`.
    * @since 2.0.0
    */
   def sparkSession: SparkSession
@@ -90,10 +90,11 @@ trait StreamingQuery {
    * immediately (if the query was terminated by `stop()`), or throw the exception
    * immediately (if the query has terminated with exception).
    *
-   * @throws StreamingQueryException, if `this` query has terminated with an exception.
+   * @throws StreamingQueryException if the query has terminated with an exception.
    *
    * @since 2.0.0
    */
+  @throws[StreamingQueryException]
   def awaitTermination(): Unit
 
   /**
@@ -106,10 +107,11 @@ trait StreamingQuery {
    * `true` immediately (if the query was terminated by `stop()`), or throw the exception
    * immediately (if the query has terminated with exception).
    *
-   * @throws StreamingQueryException, if `this` query has terminated with an exception
+   * @throws StreamingQueryException if the query has terminated with an exception
    *
    * @since 2.0.0
    */
+  @throws[StreamingQueryException]
   def awaitTermination(timeoutMs: Long): Boolean
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index bba7bc753eea9..53968a82d8e22 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -31,7 +31,7 @@ import org.apache.spark.util.{Clock, SystemClock, Utils}
 
 /**
  * :: Experimental ::
- * A class to manage all the [[StreamingQuery]] active on a [[SparkSession]].
+ * A class to manage all the [[StreamingQuery]] active on a `SparkSession`.
  *
  * @since 2.0.0
  */
@@ -81,10 +81,11 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) {
    * users need to stop all of them after any of them terminates with exception, and then check the
    * `query.exception()` for each query.
    *
-   * @throws StreamingQueryException, if any query has terminated with an exception
+   * @throws StreamingQueryException if any query has terminated with an exception
    *
    * @since 2.0.0
    */
+  @throws[StreamingQueryException]
   def awaitAnyTermination(): Unit = {
     awaitTerminationLock.synchronized {
       while (lastTerminatedQuery == null) {
@@ -113,10 +114,11 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) {
    * users need to stop all of them after any of them terminates with exception, and then check the
    * `query.exception()` for each query.
    *
-   * @throws StreamingQueryException, if any query has terminated with an exception
+   * @throws StreamingQueryException if any query has terminated with an exception
    *
    * @since 2.0.0
    */
+  @throws[StreamingQueryException]
   def awaitAnyTermination(timeoutMs: Long): Boolean = {
 
     val startTime = System.currentTimeMillis
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala
index 4504582187b97..26ad0eadd9d4c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala
@@ -68,7 +68,7 @@ trait QueryExecutionListener {
 /**
  * :: Experimental ::
  *
- * Manager for [[QueryExecutionListener]]. See [[org.apache.spark.sql.SQLContext.listenerManager]].
+ * Manager for [[QueryExecutionListener]]. See `org.apache.spark.sql.SQLContext.listenerManager`.
  */
 @Experimental
 @InterfaceStability.Evolving
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index e333fc7febc2a..a2d64da0012f1 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -57,9 +57,9 @@ import org.apache.spark.util.SerializableJobConf
  * @param partition a map from the partition key to the partition value (optional). If the partition
  *                  value is optional, dynamic partition insert will be performed.
  *                  As an example, `INSERT INTO tbl PARTITION (a=1, b=2) AS ...` would have
- *                  Map('a' -> Some('1'), 'b' -> Some('2')),
+ *                  Map('a' -&gt; Some('1'), 'b' -&gt; Some('2')),
  *                  and `INSERT INTO tbl PARTITION (a=1, b) AS ...`
- *                  would have Map('a' -> Some('1'), 'b' -> None).
+ *                  would have Map('a' -&gt; Some('1'), 'b' -&gt; None).
  * @param child the logical plan representing data to write to.
  * @param overwrite overwrite existing table or partitions.
  * @param ifNotExists If true, only write if the table or partition does not exist.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
index 42c92ed5cae26..0a7631f782193 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
@@ -42,8 +42,8 @@ import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.SerializableConfiguration
 
 /**
- * [[FileFormat]] for reading ORC files. If this is moved or renamed, please update
- * [[DataSource]]'s backwardCompatibilityMap.
+ * `FileFormat` for reading ORC files. If this is moved or renamed, please update
+ * `DataSource`'s backwardCompatibilityMap.
  */
 class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable {
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
index f5db73b715820..3f1f86c278db0 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
@@ -38,7 +38,7 @@ private[orc] object OrcFileOperator extends Logging {
    * 1. Retrieving file metadata (schema and compression codecs, etc.)
    * 2. Read the actual file content (in this case, the given path should point to the target file)
    *
-   * @note As recorded by SPARK-8501, ORC writes an empty schema (<code>struct&lt;&gt;</code) to an
+   * @note As recorded by SPARK-8501, ORC writes an empty schema (<code>struct&lt;&gt;</code>) to an
    *       ORC file if the file contains zero rows. This is OK for Hive since the schema of the
    *       table is managed by metastore.  But this becomes a problem when reading ORC files
    *       directly from HDFS via Spark SQL, because we have to discover the schema from raw ORC

From b5afdaca33996eb8af5927bf6e0cff291ed97c7f Mon Sep 17 00:00:00 2001
From: Zhenhua Wang <wzh_zju@163.com>
Date: Fri, 25 Nov 2016 05:02:48 -0800
Subject: [PATCH 0195/1204] [SPARK-18559][SQL] Fix HLL++ with small relative
 error

## What changes were proposed in this pull request?

In `HyperLogLogPlusPlus`, if the relative error is so small that p >= 19, it will cause ArrayIndexOutOfBoundsException in `THRESHOLDS(p-4)` . We should check `p` and when p >= 19, regress to the original HLL result and use the small range correction they use.

The pr also fixes the upper bound in the log info in `require()`.
The upper bound is computed by:
```
val relativeSD = 1.106d / Math.pow(Math.E, p * Math.log(2.0d) / 2.0d)
```
which is derived from the equation for computing `p`:
```
val p = 2.0d * Math.log(1.106d / relativeSD) / Math.log(2.0d)
```

## How was this patch tested?

add test cases for:
1. checking validity of parameter relatvieSD
2. estimation with smaller relative error so that p >= 19

Author: Zhenhua Wang <wzh_zju@163.com>
Author: wangzhenhua <wangzhenhua@huawei.com>

Closes #15990 from wzhfy/hllppRsd.

(cherry picked from commit 5ecdc7c5c019acc6b1f9c2e6c5b7d35957eadb88)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../expressions/aggregate/HyperLogLogPlusPlus.scala      | 9 ++++++---
 .../expressions/aggregate/HyperLogLogPlusPlusSuite.scala | 9 ++++++++-
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala
index b9862aa04fcd9..77b7eb228edc5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala
@@ -93,7 +93,7 @@ case class HyperLogLogPlusPlus(
   private[this] val p = Math.ceil(2.0d * Math.log(1.106d / relativeSD) / Math.log(2.0d)).toInt
 
   require(p >= 4, "HLL++ requires at least 4 bits for addressing. " +
-    "Use a lower error, at most 27%.")
+    "Use a lower error, at most 39%.")
 
   /**
    * Shift used to extract the index of the register from the hashed value.
@@ -296,8 +296,9 @@ case class HyperLogLogPlusPlus(
     // We integrate two steps from the paper:
     // val Z = 1.0d / zInverse
     // val E = alphaM2 * Z
+    val E = alphaM2 / zInverse
     @inline
-    def EBiasCorrected = alphaM2 / zInverse match {
+    def EBiasCorrected = E match {
       case e if p < 19 && e < 5.0d * m => e - estimateBias(e)
       case e => e
     }
@@ -306,7 +307,9 @@ case class HyperLogLogPlusPlus(
     val estimate = if (V > 0) {
       // Use linear counting for small cardinality estimates.
       val H = m * Math.log(m / V)
-      if (H <= THRESHOLDS(p - 4)) {
+      // HLL++ is defined only when p < 19, otherwise we need to fallback to HLL.
+      // The threshold `2.5 * m` is from the original HLL algorithm.
+      if ((p < 19 && H <= THRESHOLDS(p - 4)) || E <= 2.5 * m) {
         H
       } else {
         EBiasCorrected
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlusSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlusSuite.scala
index 17f6b71bb270b..cc53880af5b24 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlusSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlusSuite.scala
@@ -50,6 +50,13 @@ class HyperLogLogPlusPlusSuite extends SparkFunSuite {
     assert(error < hll.trueRsd * 3.0d, "Error should be within 3 std. errors.")
   }
 
+  test("test invalid parameter relativeSD") {
+    // `relativeSD` should be at most 39%.
+    intercept[IllegalArgumentException] {
+      new HyperLogLogPlusPlus(new BoundReference(0, IntegerType, true), relativeSD = 0.4)
+    }
+  }
+
   test("add nulls") {
     val (hll, input, buffer) = createEstimator(0.05)
     input.setNullAt(0)
@@ -83,7 +90,7 @@ class HyperLogLogPlusPlusSuite extends SparkFunSuite {
   test("deterministic cardinality estimation") {
     val repeats = 10
     testCardinalityEstimates(
-      Seq(0.1, 0.05, 0.025, 0.01),
+      Seq(0.1, 0.05, 0.025, 0.01, 0.001),
       Seq(100, 500, 1000, 5000, 10000, 50000, 100000, 500000, 1000000).map(_ * repeats),
       i => i / repeats,
       i => i / repeats)

From 906d82c4ca28c5f54d2c3f7fa58006a89472c78b Mon Sep 17 00:00:00 2001
From: jiangxingbo <jiangxb1987@gmail.com>
Date: Fri, 25 Nov 2016 12:44:34 -0800
Subject: [PATCH 0196/1204] [SPARK-18436][SQL] isin causing SQL syntax error
 with JDBC

## What changes were proposed in this pull request?

The expression `in(empty seq)` is invalid in some data source. Since `in(empty seq)` is always false, we should generate `in(empty seq)` to false literal in optimizer.
The sql `SELECT * FROM t WHERE a IN ()` throws a `ParseException` which is consistent with Hive, don't need to change that behavior.

## How was this patch tested?
Add new test case in `OptimizeInSuite`.

Author: jiangxingbo <jiangxb1987@gmail.com>

Closes #15977 from jiangxb1987/isin-empty.

(cherry picked from commit e2fb9fd365466da888ab8b3a2a0836049a65f8c8)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../catalyst/expressions/PredicateSuite.scala | 24 ++++++++++---------
 .../execution/datasources/jdbc/JDBCRDD.scala  |  2 ++
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala |  2 ++
 3 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
index f9f6799e6e72f..6fc3de178f6df 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
@@ -35,7 +35,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
     test(s"3VL $name") {
       truthTable.foreach {
         case (l, r, answer) =>
-          val expr = op(Literal.create(l, BooleanType), Literal.create(r, BooleanType))
+          val expr = op(NonFoldableLiteral(l, BooleanType), NonFoldableLiteral(r, BooleanType))
           checkEvaluation(expr, answer)
       }
     }
@@ -72,7 +72,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
         (false, true) ::
         (null, null) :: Nil
     notTrueTable.foreach { case (v, answer) =>
-      checkEvaluation(Not(Literal.create(v, BooleanType)), answer)
+      checkEvaluation(Not(NonFoldableLiteral(v, BooleanType)), answer)
     }
     checkConsistencyBetweenInterpretedAndCodegen(Not, BooleanType)
   }
@@ -120,12 +120,14 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
       (null, null, null) :: Nil)
 
   test("IN") {
-    checkEvaluation(In(Literal.create(null, IntegerType), Seq(Literal(1), Literal(2))), null)
-    checkEvaluation(In(Literal.create(null, IntegerType), Seq(Literal.create(null, IntegerType))),
-      null)
-    checkEvaluation(In(Literal(1), Seq(Literal.create(null, IntegerType))), null)
-    checkEvaluation(In(Literal(1), Seq(Literal(1), Literal.create(null, IntegerType))), true)
-    checkEvaluation(In(Literal(2), Seq(Literal(1), Literal.create(null, IntegerType))), null)
+    checkEvaluation(In(NonFoldableLiteral(null, IntegerType), Seq(Literal(1), Literal(2))), null)
+    checkEvaluation(In(NonFoldableLiteral(null, IntegerType),
+      Seq(NonFoldableLiteral(null, IntegerType))), null)
+    checkEvaluation(In(NonFoldableLiteral(null, IntegerType), Seq.empty), null)
+    checkEvaluation(In(Literal(1), Seq.empty), false)
+    checkEvaluation(In(Literal(1), Seq(NonFoldableLiteral(null, IntegerType))), null)
+    checkEvaluation(In(Literal(1), Seq(Literal(1), NonFoldableLiteral(null, IntegerType))), true)
+    checkEvaluation(In(Literal(2), Seq(Literal(1), NonFoldableLiteral(null, IntegerType))), null)
     checkEvaluation(In(Literal(1), Seq(Literal(1), Literal(2))), true)
     checkEvaluation(In(Literal(2), Seq(Literal(1), Literal(2))), true)
     checkEvaluation(In(Literal(3), Seq(Literal(1), Literal(2))), false)
@@ -133,7 +135,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
       And(In(Literal(1), Seq(Literal(1), Literal(2))), In(Literal(2), Seq(Literal(1), Literal(2)))),
       true)
 
-    val ns = Literal.create(null, StringType)
+    val ns = NonFoldableLiteral(null, StringType)
     checkEvaluation(In(ns, Seq(Literal("1"), Literal("2"))), null)
     checkEvaluation(In(ns, Seq(ns)), null)
     checkEvaluation(In(Literal("a"), Seq(ns)), null)
@@ -153,7 +155,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
           case _ => value
         }
       }
-      val input = inputData.map(Literal.create(_, t))
+      val input = inputData.map(NonFoldableLiteral(_, t))
       val expected = if (inputData(0) == null) {
         null
       } else if (inputData.slice(1, 10).contains(inputData(0))) {
@@ -277,7 +279,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
   test("BinaryComparison: null test") {
     // Use -1 (default value for codegen) which can trigger some weird bugs, e.g. SPARK-14757
     val normalInt = Literal(-1)
-    val nullInt = Literal.create(null, IntegerType)
+    val nullInt = NonFoldableLiteral(null, IntegerType)
 
     def nullTest(op: (Expression, Expression) => Expression): Unit = {
       checkEvaluation(op(normalInt, nullInt), null)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
index c0fabc81e42a4..a1e5dfdbf739e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
@@ -120,6 +120,8 @@ object JDBCRDD extends Logging {
       case StringStartsWith(attr, value) => s"${attr} LIKE '${value}%'"
       case StringEndsWith(attr, value) => s"${attr} LIKE '%${value}'"
       case StringContains(attr, value) => s"${attr} LIKE '%${value}%'"
+      case In(attr, value) if value.isEmpty =>
+        s"CASE WHEN ${attr} IS NULL THEN NULL ELSE FALSE END"
       case In(attr, value) => s"$attr IN (${compileValue(value)})"
       case Not(f) => compileFilter(f).map(p => s"(NOT ($p))").getOrElse(null)
       case Or(f1, f2) =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index 71cf5e6a22916..f921939ada73f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -619,6 +619,8 @@ class JDBCSuite extends SparkFunSuite
     assert(doCompileFilter(GreaterThan("col0", 3)) === "col0 > 3")
     assert(doCompileFilter(GreaterThanOrEqual("col0", 3)) === "col0 >= 3")
     assert(doCompileFilter(In("col1", Array("jkl"))) === "col1 IN ('jkl')")
+    assert(doCompileFilter(In("col1", Array.empty)) ===
+      "CASE WHEN col1 IS NULL THEN NULL ELSE FALSE END")
     assert(doCompileFilter(Not(In("col1", Array("mno", "pqr"))))
       === "(NOT (col1 IN ('mno', 'pqr')))")
     assert(doCompileFilter(IsNull("col1")) === "col1 IS NULL")

From da66b9742eabb2654b369f634eb05910220a6441 Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@happy-camper.st>
Date: Fri, 25 Nov 2016 20:25:29 -0800
Subject: [PATCH 0197/1204] [SPARK-18583][SQL] Fix nullability of
 InputFileName.

## What changes were proposed in this pull request?

The nullability of `InputFileName` should be `false`.

## How was this patch tested?

Existing tests.

Author: Takuya UESHIN <ueshin@happy-camper.st>

Closes #16007 from ueshin/issues/SPARK-18583.

(cherry picked from commit a88329d4553b40c45ebf9eacf229db7839d46769)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../org/apache/spark/rdd/InputFileNameHolder.scala     | 10 +++++++++-
 .../spark/sql/catalyst/expressions/InputFileName.scala |  2 +-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rdd/InputFileNameHolder.scala b/core/src/main/scala/org/apache/spark/rdd/InputFileNameHolder.scala
index f40d4c8e0a4d0..960c91a154db1 100644
--- a/core/src/main/scala/org/apache/spark/rdd/InputFileNameHolder.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/InputFileNameHolder.scala
@@ -22,6 +22,8 @@ import org.apache.spark.unsafe.types.UTF8String
 /**
  * This holds file names of the current Spark task. This is used in HadoopRDD,
  * FileScanRDD, NewHadoopRDD and InputFileName function in Spark SQL.
+ *
+ * The returned value should never be null but empty string if it is unknown.
  */
 private[spark] object InputFileNameHolder {
   /**
@@ -32,9 +34,15 @@ private[spark] object InputFileNameHolder {
     override protected def initialValue(): UTF8String = UTF8String.fromString("")
   }
 
+  /**
+   * Returns the holding file name or empty string if it is unknown.
+   */
   def getInputFileName(): UTF8String = inputFileName.get()
 
-  private[spark] def setInputFileName(file: String) = inputFileName.set(UTF8String.fromString(file))
+  private[spark] def setInputFileName(file: String) = {
+    require(file != null, "The input file name cannot be null")
+    inputFileName.set(UTF8String.fromString(file))
+  }
 
   private[spark] def unsetInputFileName(): Unit = inputFileName.remove()
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InputFileName.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InputFileName.scala
index b7fb285133bfc..d412336699d80 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InputFileName.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InputFileName.scala
@@ -30,7 +30,7 @@ import org.apache.spark.unsafe.types.UTF8String
   usage = "_FUNC_() - Returns the name of the current file being read if available.")
 case class InputFileName() extends LeafExpression with Nondeterministic {
 
-  override def nullable: Boolean = true
+  override def nullable: Boolean = false
 
   override def dataType: DataType = StringType
 

From 830ee1345b491bf10fd089d931ef22e28f98e615 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Sat, 26 Nov 2016 05:28:41 -0800
Subject: [PATCH 0198/1204] [SPARK-18481][ML] ML 2.1 QA: Remove deprecated
 methods for ML

## What changes were proposed in this pull request?
Remove deprecated methods for ML.

## How was this patch tested?
Existing tests.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #15913 from yanboliang/spark-18481.

(cherry picked from commit c4a7eef0ce2d305c5c90a0a9a73b5a32eccfba95)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 .../scala/org/apache/spark/ml/Pipeline.scala  |  4 +
 .../ml/classification/GBTClassifier.scala     |  6 ++
 .../classification/LogisticRegression.scala   |  8 +-
 .../RandomForestClassifier.scala              | 11 +--
 .../spark/ml/feature/ChiSqSelector.scala      |  7 --
 .../org/apache/spark/ml/param/params.scala    | 15 ----
 .../spark/ml/regression/GBTRegressor.scala    |  6 ++
 .../ml/regression/LinearRegression.scala      |  3 -
 .../ml/regression/RandomForestRegressor.scala | 10 +--
 .../org/apache/spark/ml/tree/treeModels.scala |  5 --
 .../org/apache/spark/ml/tree/treeParams.scala | 90 ++++++++-----------
 .../org/apache/spark/ml/util/ReadWrite.scala  |  2 +-
 .../classification/GBTClassifierSuite.scala   |  8 ++
 .../LogisticRegressionSuite.scala             |  6 ++
 project/MimaExcludes.scala                    | 30 +++++++
 python/pyspark/ml/util.py                     | 40 ++++++++-
 16 files changed, 144 insertions(+), 107 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
index f406f8c426d0c..38176b96ba2ed 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
@@ -46,6 +46,10 @@ abstract class PipelineStage extends Params with Logging {
    *
    * Check transform validity and derive the output schema from the input schema.
    *
+   * We check validity for interactions between parameters during `transformSchema` and
+   * raise an exception if any parameter value is invalid. Parameter value checks which
+   * do not depend on other parameters are handled by `Param.validate()`.
+   *
    * Typical implementation should first conduct verification on schema change and parameter
    * validity, including complex parameter interaction checks.
    */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
index 52f93f5a6b345..ca5223133317c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
@@ -203,6 +203,12 @@ class GBTClassificationModel private[ml](
   @Since("1.4.0")
   override def trees: Array[DecisionTreeRegressionModel] = _trees
 
+  /**
+   * Number of trees in ensemble
+   */
+  @Since("2.0.0")
+  val getNumTrees: Int = trees.length
+
   @Since("1.4.0")
   override def treeWeights: Array[Double] = _treeWeights
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index fe29926e0d994..41b84f481633c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -40,7 +40,7 @@ import org.apache.spark.mllib.util.MLUtils
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, Dataset, Row}
 import org.apache.spark.sql.functions.{col, lit}
-import org.apache.spark.sql.types.DoubleType
+import org.apache.spark.sql.types.{DataType, DoubleType, StructType}
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.VersionUtils
 
@@ -176,8 +176,12 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
     }
   }
 
-  override def validateParams(): Unit = {
+  override protected def validateAndTransformSchema(
+      schema: StructType,
+      fitting: Boolean,
+      featuresDataType: DataType): StructType = {
     checkThresholdConsistency()
+    super.validateAndTransformSchema(schema, fitting, featuresDataType)
   }
 }
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
index 907c73e2e4d0a..d151213f9edd8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
@@ -158,7 +158,7 @@ class RandomForestClassificationModel private[ml] (
     @Since("1.6.0") override val numFeatures: Int,
     @Since("1.5.0") override val numClasses: Int)
   extends ProbabilisticClassificationModel[Vector, RandomForestClassificationModel]
-  with RandomForestClassificationModelParams with TreeEnsembleModel[DecisionTreeClassificationModel]
+  with RandomForestClassifierParams with TreeEnsembleModel[DecisionTreeClassificationModel]
   with MLWritable with Serializable {
 
   require(_trees.nonEmpty, "RandomForestClassificationModel requires at least 1 tree.")
@@ -221,15 +221,6 @@ class RandomForestClassificationModel private[ml] (
     }
   }
 
-  /**
-   * Number of trees in ensemble
-   *
-   * @deprecated  Use [[getNumTrees]] instead.  This method will be removed in 2.1.0
-   */
-  // TODO: Once this is removed, then this class can inherit from RandomForestClassifierParams
-  @deprecated("Use getNumTrees instead.  This method will be removed in 2.1.0.", "2.0.0")
-  val numTrees: Int = trees.length
-
   @Since("1.4.0")
   override def copy(extra: ParamMap): RandomForestClassificationModel = {
     copyValues(new RandomForestClassificationModel(uid, _trees, numFeatures, numClasses), extra)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala
index 653fa41124f88..7cd0f159c6be7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala
@@ -216,13 +216,6 @@ final class ChiSqSelectorModel private[ml] (
   @Since("1.6.0")
   def setOutputCol(value: String): this.type = set(outputCol, value)
 
-  /**
-   * @group setParam
-   */
-  @Since("1.6.0")
-  @deprecated("labelCol is not used by ChiSqSelectorModel.", "2.0.0")
-  def setLabelCol(value: String): this.type = set(labelCol, value)
-
   @Since("2.0.0")
   override def transform(dataset: Dataset[_]): DataFrame = {
     val transformedSchema = transformSchema(dataset.schema, logging = true)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
index 96206e0b7ad88..5bd8ebe0987a9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
@@ -546,21 +546,6 @@ trait Params extends Identifiable with Serializable {
       .map(m => m.invoke(this).asInstanceOf[Param[_]])
   }
 
-  /**
-   * Validates parameter values stored internally.
-   * Raise an exception if any parameter value is invalid.
-   *
-   * This only needs to check for interactions between parameters.
-   * Parameter value checks which do not depend on other parameters are handled by
-   * `Param.validate()`. This method does not handle input/output column parameters;
-   * those are checked during schema validation.
-   * @deprecated Will be removed in 2.1.0. All the checks should be merged into transformSchema
-   */
-  @deprecated("Will be removed in 2.1.0. Checks should be merged into transformSchema.", "2.0.0")
-  def validateParams(): Unit = {
-    // Do nothing by default.  Override to handle Param interactions.
-  }
-
   /**
    * Explains a param.
    * @param param input param, must belong to this instance.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
index ed2d05525d611..6d8159aa3bdcf 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
@@ -183,6 +183,12 @@ class GBTRegressionModel private[ml](
   @Since("1.4.0")
   override def trees: Array[DecisionTreeRegressionModel] = _trees
 
+  /**
+   * Number of trees in ensemble
+   */
+  @Since("2.0.0")
+  val getNumTrees: Int = trees.length
+
   @Since("1.4.0")
   override def treeWeights: Array[Double] = _treeWeights
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index eb4e38cc83c19..19ddf36a718c4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -611,9 +611,6 @@ class LinearRegressionSummary private[regression] (
     private val privateModel: LinearRegressionModel,
     private val diagInvAtWA: Array[Double]) extends Serializable {
 
-  @deprecated("The model field is deprecated and will be removed in 2.1.0.", "2.0.0")
-  val model: LinearRegressionModel = privateModel
-
   @transient private val metrics = new RegressionMetrics(
     predictions
       .select(col(predictionCol), col(labelCol).cast(DoubleType))
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
index d60f05eed58d9..90d89c51c5740 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
@@ -145,7 +145,7 @@ class RandomForestRegressionModel private[ml] (
     private val _trees: Array[DecisionTreeRegressionModel],
     override val numFeatures: Int)
   extends PredictionModel[Vector, RandomForestRegressionModel]
-  with RandomForestRegressionModelParams with TreeEnsembleModel[DecisionTreeRegressionModel]
+  with RandomForestRegressorParams with TreeEnsembleModel[DecisionTreeRegressionModel]
   with MLWritable with Serializable {
 
   require(_trees.nonEmpty, "RandomForestRegressionModel requires at least 1 tree.")
@@ -182,14 +182,6 @@ class RandomForestRegressionModel private[ml] (
     _trees.map(_.rootNode.predictImpl(features).prediction).sum / getNumTrees
   }
 
-  /**
-   * Number of trees in ensemble
-   * @deprecated  Use [[getNumTrees]] instead.  This method will be removed in 2.1.0
-   */
-  // TODO: Once this is removed, then this class can inherit from RandomForestRegressorParams
-  @deprecated("Use getNumTrees instead.  This method will be removed in 2.1.0.", "2.0.0")
-  val numTrees: Int = trees.length
-
   @Since("1.4.0")
   override def copy(extra: ParamMap): RandomForestRegressionModel = {
     copyValues(new RandomForestRegressionModel(uid, _trees, numFeatures), extra).setParent(parent)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
index d3cbc363799a5..0d6e9034e5ce4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
@@ -95,11 +95,6 @@ private[ml] trait TreeEnsembleModel[M <: DecisionTreeModel] {
   /** Trees in this ensemble. Warning: These have null parent Estimators. */
   def trees: Array[M]
 
-  /**
-   * Number of trees in ensemble
-   */
-  val getNumTrees: Int = trees.length
-
   /** Weights for each tree, zippable with [[trees]] */
   def treeWeights: Array[Double]
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
index 40510ad804ef0..83ab4b5da87be 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
@@ -319,8 +319,32 @@ private[ml] trait TreeEnsembleParams extends DecisionTreeParams {
   }
 }
 
-/** Used for [[RandomForestParams]] */
-private[ml] trait HasFeatureSubsetStrategy extends Params {
+/**
+ * Parameters for Random Forest algorithms.
+ */
+private[ml] trait RandomForestParams extends TreeEnsembleParams {
+
+  /**
+   * Number of trees to train (>= 1).
+   * If 1, then no bootstrapping is used.  If > 1, then bootstrapping is done.
+   * TODO: Change to always do bootstrapping (simpler).  SPARK-7130
+   * (default = 20)
+   *
+   * Note: The reason that we cannot add this to both GBT and RF (i.e. in TreeEnsembleParams)
+   * is the param `maxIter` controls how many trees a GBT has. The semantics in the algorithms
+   * are a bit different.
+   * @group param
+   */
+  final val numTrees: IntParam = new IntParam(this, "numTrees", "Number of trees to train (>= 1)",
+    ParamValidators.gtEq(1))
+
+  setDefault(numTrees -> 20)
+
+  /** @group setParam */
+  def setNumTrees(value: Int): this.type = set(numTrees, value)
+
+  /** @group getParam */
+  final def getNumTrees: Int = $(numTrees)
 
   /**
    * The number of features to consider for splits at each tree node.
@@ -366,38 +390,6 @@ private[ml] trait HasFeatureSubsetStrategy extends Params {
   final def getFeatureSubsetStrategy: String = $(featureSubsetStrategy).toLowerCase
 }
 
-/**
- * Used for [[RandomForestParams]].
- * This is separated out from [[RandomForestParams]] because of an issue with the
- * `numTrees` method conflicting with this Param in the Estimator.
- */
-private[ml] trait HasNumTrees extends Params {
-
-  /**
-   * Number of trees to train (>= 1).
-   * If 1, then no bootstrapping is used.  If > 1, then bootstrapping is done.
-   * TODO: Change to always do bootstrapping (simpler).  SPARK-7130
-   * (default = 20)
-   * @group param
-   */
-  final val numTrees: IntParam = new IntParam(this, "numTrees", "Number of trees to train (>= 1)",
-    ParamValidators.gtEq(1))
-
-  setDefault(numTrees -> 20)
-
-  /** @group setParam */
-  def setNumTrees(value: Int): this.type = set(numTrees, value)
-
-  /** @group getParam */
-  final def getNumTrees: Int = $(numTrees)
-}
-
-/**
- * Parameters for Random Forest algorithms.
- */
-private[ml] trait RandomForestParams extends TreeEnsembleParams
-  with HasFeatureSubsetStrategy with HasNumTrees
-
 private[spark] object RandomForestParams {
   // These options should be lowercase.
   final val supportedFeatureSubsetStrategies: Array[String] =
@@ -407,21 +399,15 @@ private[spark] object RandomForestParams {
 private[ml] trait RandomForestClassifierParams
   extends RandomForestParams with TreeClassifierParams
 
-private[ml] trait RandomForestClassificationModelParams extends TreeEnsembleParams
-  with HasFeatureSubsetStrategy with TreeClassifierParams
-
 private[ml] trait RandomForestRegressorParams
   extends RandomForestParams with TreeRegressorParams
 
-private[ml] trait RandomForestRegressionModelParams extends TreeEnsembleParams
-  with HasFeatureSubsetStrategy with TreeRegressorParams
-
 /**
  * Parameters for Gradient-Boosted Tree algorithms.
  *
  * Note: Marked as private and DeveloperApi since this may be made public in the future.
  */
-private[ml] trait GBTParams extends TreeEnsembleParams with HasMaxIter with HasStepSize {
+private[ml] trait GBTParams extends TreeEnsembleParams with HasMaxIter {
 
   /* TODO: Add this doc when we add this param.  SPARK-7132
    * Threshold for stopping early when runWithValidation is used.
@@ -434,24 +420,26 @@ private[ml] trait GBTParams extends TreeEnsembleParams with HasMaxIter with HasS
   // final val validationTol: DoubleParam = new DoubleParam(this, "validationTol", "")
   // validationTol -> 1e-5
 
-  setDefault(maxIter -> 20, stepSize -> 0.1)
-
   /** @group setParam */
   def setMaxIter(value: Int): this.type = set(maxIter, value)
 
   /**
-   * Step size (a.k.a. learning rate) in interval (0, 1] for shrinking the contribution of each
-   * estimator.
+   * Param for Step size (a.k.a. learning rate) in interval (0, 1] for shrinking
+   * the contribution of each estimator.
    * (default = 0.1)
-   * @group setParam
+   * @group param
    */
+  final val stepSize: DoubleParam = new DoubleParam(this, "stepSize", "Step size " +
+    "(a.k.a. learning rate) in interval (0, 1] for shrinking the contribution of each estimator.",
+    ParamValidators.inRange(0, 1, lowerInclusive = false, upperInclusive = true))
+
+  /** @group getParam */
+  final def getStepSize: Double = $(stepSize)
+
+  /** @group setParam */
   def setStepSize(value: Double): this.type = set(stepSize, value)
 
-  override def validateParams(): Unit = {
-    require(ParamValidators.inRange(0, 1, lowerInclusive = false, upperInclusive = true)(
-      getStepSize), "GBT parameter stepSize should be in interval (0, 1], " +
-      s"but it given invalid value $getStepSize.")
-  }
+  setDefault(maxIter -> 20, stepSize -> 0.1)
 
   /** (private[ml]) Create a BoostingStrategy instance to use with the old API. */
   private[ml] def getOldBoostingStrategy(
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
index 5b7e5ec75c842..bbb9886391697 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
@@ -46,7 +46,7 @@ private[util] sealed trait BaseReadWrite {
    * Sets the Spark SQLContext to use for saving/loading.
    */
   @Since("1.6.0")
-  @deprecated("Use session instead", "2.0.0")
+  @deprecated("Use session instead, This method will be removed in 2.2.0.", "2.0.0")
   def context(sqlContext: SQLContext): this.type = {
     optionSparkSession = Option(sqlContext.sparkSession)
     this
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
index 3492709677d4f..7c36745ab213b 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
@@ -70,6 +70,14 @@ class GBTClassifierSuite extends SparkFunSuite with MLlibTestSparkContext
     ParamsSuite.checkParams(model)
   }
 
+  test("GBT parameter stepSize should be in interval (0, 1]") {
+    withClue("GBT parameter stepSize should be in interval (0, 1]") {
+      intercept[IllegalArgumentException] {
+        new GBTClassifier().setStepSize(10)
+      }
+    }
+  }
+
   test("Binary classification with continuous features: Log Loss") {
     val categoricalFeatures = Map.empty[Int, Int]
     testCombinations.foreach {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index e360542eae2ab..9c4c59a5e60fa 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -192,6 +192,12 @@ class LogisticRegressionSuite
       }
     }
     // thresholds and threshold must be consistent: values
+    withClue("fit with ParamMap should throw error if threshold, thresholds do not match.") {
+      intercept[IllegalArgumentException] {
+        lr2.fit(smallBinaryDataset,
+          lr2.thresholds -> Array(0.3, 0.7), lr2.threshold -> (expectedThreshold / 2.0))
+      }
+    }
     withClue("fit with ParamMap should throw error if threshold, thresholds do not match.") {
       intercept[IllegalArgumentException] {
         val lr2model = lr2.fit(smallBinaryDataset,
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 350b144f8294b..03c9fcc0124d2 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -864,6 +864,36 @@ object MimaExcludes {
       // [SPARK-12221] Add CPU time to metrics
       ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.status.api.v1.TaskMetrics.this"),
       ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.status.api.v1.TaskMetricDistributions.this")
+    ) ++ Seq(
+      // [SPARK-18481] ML 2.1 QA: Remove deprecated methods for ML
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.PipelineStage.validateParams"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.param.JavaParams.validateParams"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.param.Params.validateParams"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.validateParams"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegression.validateParams"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassifier.validateParams"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionModel.validateParams"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.numTrees"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.feature.ChiSqSelectorModel.setLabelCol"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.evaluation.Evaluator.validateParams"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressor.validateParams"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.validateParams"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.LinearRegressionSummary.model"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.numTrees"),
+      ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.ml.classification.RandomForestClassifier"),
+      ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel"),
+      ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.ml.classification.GBTClassifier"),
+      ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.ml.classification.GBTClassificationModel"),
+      ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.ml.regression.RandomForestRegressor"),
+      ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel"),
+      ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.ml.regression.GBTRegressor"),
+      ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.ml.regression.GBTRegressionModel"),
+      ProblemFilters.exclude[FinalMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.getNumTrees"),
+      ProblemFilters.exclude[FinalMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.getNumTrees"),
+      ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.numTrees"),
+      ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setFeatureSubsetStrategy"),
+      ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.numTrees"),
+      ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setFeatureSubsetStrategy")
     )
   }
 
diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py
index 7d39c30122350..bec4b28952102 100644
--- a/python/pyspark/ml/util.py
+++ b/python/pyspark/ml/util.py
@@ -78,7 +78,14 @@ def overwrite(self):
         raise NotImplementedError("MLWriter is not yet implemented for type: %s" % type(self))
 
     def context(self, sqlContext):
-        """Sets the SQL context to use for saving."""
+        """
+        Sets the SQL context to use for saving.
+        .. note:: Deprecated in 2.1 and will be removed in 2.2, use session instead.
+        """
+        raise NotImplementedError("MLWriter is not yet implemented for type: %s" % type(self))
+
+    def session(self, sparkSession):
+        """Sets the Spark Session to use for saving."""
         raise NotImplementedError("MLWriter is not yet implemented for type: %s" % type(self))
 
 
@@ -105,10 +112,19 @@ def overwrite(self):
         return self
 
     def context(self, sqlContext):
-        """Sets the SQL context to use for saving."""
+        """
+        Sets the SQL context to use for saving.
+        .. note:: Deprecated in 2.1 and will be removed in 2.2, use session instead.
+        """
+        warnings.warn("Deprecated in 2.1 and will be removed in 2.2, use session instead.")
         self._jwrite.context(sqlContext._ssql_ctx)
         return self
 
+    def session(self, sparkSession):
+        """Sets the Spark Session to use for saving."""
+        self._jwrite.session(sparkSession._jsparkSession)
+        return self
+
 
 @inherit_doc
 class MLWritable(object):
@@ -155,7 +171,14 @@ def load(self, path):
         raise NotImplementedError("MLReader is not yet implemented for type: %s" % type(self))
 
     def context(self, sqlContext):
-        """Sets the SQL context to use for loading."""
+        """
+        Sets the SQL context to use for loading.
+        .. note:: Deprecated in 2.1 and will be removed in 2.2, use session instead.
+        """
+        raise NotImplementedError("MLReader is not yet implemented for type: %s" % type(self))
+
+    def session(self, sparkSession):
+        """Sets the Spark Session to use for loading."""
         raise NotImplementedError("MLReader is not yet implemented for type: %s" % type(self))
 
 
@@ -180,10 +203,19 @@ def load(self, path):
         return self._clazz._from_java(java_obj)
 
     def context(self, sqlContext):
-        """Sets the SQL context to use for loading."""
+        """
+        Sets the SQL context to use for loading.
+        .. note:: Deprecated in 2.1 and will be removed in 2.2, use session instead.
+        """
+        warnings.warn("Deprecated in 2.1 and will be removed in 2.2, use session instead.")
         self._jread.context(sqlContext._ssql_ctx)
         return self
 
+    def session(self, sparkSession):
+        """Sets the Spark Session to use for loading."""
+        self._jread.session(sparkSession._jsparkSession)
+        return self
+
     @classmethod
     def _java_loader_class(cls, clazz):
         """

From ff699332c113e21b942f5a62f475ae79ac6c0ee5 Mon Sep 17 00:00:00 2001
From: Weiqing Yang <yangweiqing001@gmail.com>
Date: Sat, 26 Nov 2016 15:41:37 +0000
Subject: [PATCH 0199/1204] [WIP][SQL][DOC] Fix incorrect `code` tag

## What changes were proposed in this pull request?
This PR is to fix incorrect `code` tag in `sql-programming-guide.md`

## How was this patch tested?
Manually.

Author: Weiqing Yang <yangweiqing001@gmail.com>

Closes #15941 from weiqingy/fixtag.

(cherry picked from commit f4a98e421e14434fddc3f9f1018a17124d660ef0)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/sql-programming-guide.md                                   | 2 +-
 .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index ba3e55fc061a7..3093d48282919 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -1089,7 +1089,7 @@ the following case-sensitive options:
   <tr>
      <td><code>isolationLevel</code></td>
      <td>
-       The transaction isolation level, which applies to current connection. It can be one of <code>NONE<code>, <code>READ_COMMITTED<code>, <code>READ_UNCOMMITTED<code>, <code>REPEATABLE_READ<code>, or <code>SERIALIZABLE<code>, corresponding to standard transaction isolation levels defined by JDBC's Connection object, with default of <code>READ_UNCOMMITTED<code>. This option applies only to writing. Please refer the documentation in <code>java.sql.Connection</code>.
+       The transaction isolation level, which applies to current connection. It can be one of <code>NONE</code>, <code>READ_COMMITTED</code>, <code>READ_UNCOMMITTED</code>, <code>REPEATABLE_READ</code>, or <code>SERIALIZABLE</code>, corresponding to standard transaction isolation levels defined by JDBC's Connection object, with default of <code>READ_UNCOMMITTED</code>. This option applies only to writing. Please refer the documentation in <code>java.sql.Connection</code>.
      </td>
    </tr>
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 7cca9dba2962a..5589805212b7e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -108,7 +108,7 @@ object SQLConf {
     .doc("Configures the maximum size in bytes for a table that will be broadcast to all worker " +
       "nodes when performing a join.  By setting this value to -1 broadcasting can be disabled. " +
       "Note that currently statistics are only supported for Hive Metastore tables where the " +
-      "command<code>ANALYZE TABLE &lt;tableName&gt; COMPUTE STATISTICS noscan</code> has been " +
+      "command <code>ANALYZE TABLE &lt;tableName&gt; COMPUTE STATISTICS noscan</code> has been " +
       "run, and file-based data source tables where the statistics are computed directly on " +
       "the files of data.")
     .longConf

From 9c5495728aac1693ddac96421f8a6181a595e775 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Sat, 26 Nov 2016 14:57:48 -0800
Subject: [PATCH 0200/1204] [SPARK-17251][SQL] Improve `OuterReference` to be
 `NamedExpression`

## What changes were proposed in this pull request?

Currently, `OuterReference` is not `NamedExpression`. So, it raises 'ClassCastException` when it used in projection lists of IN correlated subqueries. This PR aims to support that by making `OuterReference` as `NamedExpression` to show correct error messages.

```scala
scala> sql("CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES 1, 2 AS t1(a)")
scala> sql("CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES 1 AS t2(b)")
scala> sql("SELECT a FROM t1 WHERE a IN (SELECT a FROM t2)").show
java.lang.ClassCastException: org.apache.spark.sql.catalyst.expressions.OuterReference cannot be cast to org.apache.spark.sql.catalyst.expressions.NamedExpression
```

## How was this patch tested?

Pass the Jenkins test with new test cases.

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #16015 from dongjoon-hyun/SPARK-17251-2.

(cherry picked from commit 9c03c564605783d8e94f6795432bb59c33933e52)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  3 +-
 .../expressions/namedExpressions.scala        |  9 +++-
 .../analysis/ResolveSubquerySuite.scala       | 43 +++++++++++++++++++
 3 files changed, 53 insertions(+), 2 deletions(-)
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 2d272762b384f..e576d53280504 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -968,7 +968,8 @@ class Analyzer(
       def failOnOuterReference(p: LogicalPlan): Unit = {
         if (p.expressions.exists(containsOuter)) {
           failAnalysis(
-            s"Correlated predicates are not supported outside of WHERE/HAVING clauses: $p")
+            "Expressions referencing the outer query are not supported outside of WHERE/HAVING " +
+              s"clauses: $p")
         }
       }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
index 1274757136051..c842f85af693c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
@@ -356,10 +356,17 @@ case class PrettyAttribute(
  * A place holder used to hold a reference that has been resolved to a field outside of the current
  * plan. This is used for correlated subqueries.
  */
-case class OuterReference(e: NamedExpression) extends LeafExpression with Unevaluable {
+case class OuterReference(e: NamedExpression)
+  extends LeafExpression with NamedExpression with Unevaluable {
   override def dataType: DataType = e.dataType
   override def nullable: Boolean = e.nullable
   override def prettyName: String = "outer"
+
+  override def name: String = e.name
+  override def qualifier: Option[String] = e.qualifier
+  override def exprId: ExprId = e.exprId
+  override def toAttribute: Attribute = e.toAttribute
+  override def newInstance(): NamedExpression = OuterReference(e.newInstance())
 }
 
 object VirtualColumn {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala
new file mode 100644
index 0000000000000..4aafb2b83fb69
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.expressions.{In, ListQuery, OuterReference}
+import org.apache.spark.sql.catalyst.plans.logical.{Filter, LocalRelation, Project}
+
+/**
+ * Unit tests for [[ResolveSubquery]].
+ */
+class ResolveSubquerySuite extends AnalysisTest {
+
+  val a = 'a.int
+  val b = 'b.int
+  val t1 = LocalRelation(a)
+  val t2 = LocalRelation(b)
+
+  test("SPARK-17251 Improve `OuterReference` to be `NamedExpression`") {
+    val expr = Filter(In(a, Seq(ListQuery(Project(Seq(OuterReference(a)), t2)))), t1)
+    val m = intercept[AnalysisException] {
+      SimpleAnalyzer.ResolveSubquery(expr)
+    }.getMessage
+    assert(m.contains(
+      "Expressions referencing the outer query are not supported outside of WHERE/HAVING clauses"))
+  }
+}

From 1e8fbefa3b61e2deb3dc7d7d3467e4cec69e54ce Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Sun, 27 Nov 2016 19:43:24 -0800
Subject: [PATCH 0201/1204] [SPARK-18594][SQL] Name Validation of
 Databases/Tables

### What changes were proposed in this pull request?
Currently, the name validation checks are limited to table creation. It is enfored by Analyzer rule: `PreWriteCheck`.

However, table renaming and database creation have the same issues. It makes more sense to do the checks in `SessionCatalog`. This PR is to add it into `SessionCatalog`.

### How was this patch tested?
Added test cases

Author: gatorsmile <gatorsmile@gmail.com>

Closes #16018 from gatorsmile/nameValidate.

(cherry picked from commit 07f32c2283e26e86474ba8c9b50125831009a1ea)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../sql/catalyst/catalog/SessionCatalog.scala | 18 ++++++++++++
 .../catalog/SessionCatalogSuite.scala         | 27 ++++++++++++++++++
 .../sql/execution/datasources/rules.scala     | 28 ++++---------------
 .../spark/sql/hive/MultiDatabaseSuite.scala   | 11 ++++----
 4 files changed, 57 insertions(+), 27 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 19a8fcdd8b75b..002aecb9bf133 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -85,6 +85,21 @@ class SessionCatalog(
   @GuardedBy("this")
   protected var currentDb = formatDatabaseName(DEFAULT_DATABASE)
 
+  /**
+   * Checks if the given name conforms the Hive standard ("[a-zA-z_0-9]+"),
+   * i.e. if this name only contains characters, numbers, and _.
+   *
+   * This method is intended to have the same behavior of
+   * org.apache.hadoop.hive.metastore.MetaStoreUtils.validateName.
+   */
+  private def validateName(name: String): Unit = {
+    val validNameFormat = "([\\w_]+)".r
+    if (!validNameFormat.pattern.matcher(name).matches()) {
+      throw new AnalysisException(s"`$name` is not a valid name for tables/databases. " +
+        "Valid names only contain alphabet characters, numbers and _.")
+    }
+  }
+
   /**
    * Format table name, taking into account case sensitivity.
    */
@@ -143,6 +158,7 @@ class SessionCatalog(
         s"${globalTempViewManager.database} is a system preserved database, " +
           "you cannot create a database with this name.")
     }
+    validateName(dbName)
     val qualifiedPath = makeQualifiedPath(dbDefinition.locationUri).toString
     externalCatalog.createDatabase(
       dbDefinition.copy(name = dbName, locationUri = qualifiedPath),
@@ -226,6 +242,7 @@ class SessionCatalog(
   def createTable(tableDefinition: CatalogTable, ignoreIfExists: Boolean): Unit = {
     val db = formatDatabaseName(tableDefinition.identifier.database.getOrElse(getCurrentDatabase))
     val table = formatTableName(tableDefinition.identifier.table)
+    validateName(table)
     val newTableDefinition = tableDefinition.copy(identifier = TableIdentifier(table, Some(db)))
     requireDbExists(db)
     externalCatalog.createTable(newTableDefinition, ignoreIfExists)
@@ -474,6 +491,7 @@ class SessionCatalog(
       if (oldName.database.isDefined || !tempTables.contains(oldTableName)) {
         requireTableExists(TableIdentifier(oldTableName, Some(db)))
         requireTableNotExists(TableIdentifier(newTableName, Some(db)))
+        validateName(newTableName)
         externalCatalog.renameTable(db, oldTableName, newTableName)
       } else {
         if (newName.database.isDefined) {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index 52385de50db6b..da41d3614b784 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -61,6 +61,22 @@ class SessionCatalogSuite extends SparkFunSuite {
     assert(!catalog.databaseExists("does_not_exist"))
   }
 
+  def testInvalidName(func: (String) => Unit) {
+    // scalastyle:off
+    // non ascii characters are not allowed in the source code, so we disable the scalastyle.
+    val name = "砖"
+    // scalastyle:on
+    val e = intercept[AnalysisException] {
+      func(name)
+    }.getMessage
+    assert(e.contains(s"`$name` is not a valid name for tables/databases."))
+  }
+
+  test("create databases using invalid names") {
+    val catalog = new SessionCatalog(newEmptyCatalog())
+    testInvalidName(name => catalog.createDatabase(newDb(name), ignoreIfExists = true))
+  }
+
   test("get database when a database exists") {
     val catalog = new SessionCatalog(newBasicCatalog())
     val db1 = catalog.getDatabaseMetadata("db1")
@@ -194,6 +210,11 @@ class SessionCatalogSuite extends SparkFunSuite {
     assert(externalCatalog.listTables("db2").toSet == Set("tbl1", "tbl2", "tbl3"))
   }
 
+  test("create tables using invalid names") {
+    val catalog = new SessionCatalog(newEmptyCatalog())
+    testInvalidName(name => catalog.createTable(newTable(name, "db1"), ignoreIfExists = false))
+  }
+
   test("create table when database does not exist") {
     val catalog = new SessionCatalog(newBasicCatalog())
     // Creating table in non-existent database should always fail
@@ -309,6 +330,12 @@ class SessionCatalogSuite extends SparkFunSuite {
     }
   }
 
+  test("rename tables to an invalid name") {
+    val catalog = new SessionCatalog(newBasicCatalog())
+    testInvalidName(
+      name => catalog.renameTable(TableIdentifier("tbl1", Some("db2")), TableIdentifier(name)))
+  }
+
   test("rename table when database/table does not exist") {
     val catalog = new SessionCatalog(newBasicCatalog())
     intercept[NoSuchDatabaseException] {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index 5ba44ff9f5d9d..7154e3e41c93b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -309,24 +309,9 @@ case class PreWriteCheck(conf: SQLConf, catalog: SessionCatalog)
 
   def failAnalysis(msg: String): Unit = { throw new AnalysisException(msg) }
 
-  // This regex is used to check if the table name and database name is valid for `CreateTable`.
-  private val validNameFormat = Pattern.compile("[\\w_]+")
-
   def apply(plan: LogicalPlan): Unit = {
     plan.foreach {
       case c @ CreateTable(tableDesc, mode, query) if c.resolved =>
-        // Since we are saving table metadata to metastore, we should make sure the table name
-        // and database name don't break some common restrictions, e.g. special chars except
-        // underscore are not allowed.
-        val tblIdent = tableDesc.identifier
-        if (!validNameFormat.matcher(tblIdent.table).matches()) {
-          failAnalysis(s"Table name ${tblIdent.table} is not a valid name for " +
-            s"metastore. Metastore only accepts table name containing characters, numbers and _.")
-        }
-        if (tblIdent.database.exists(db => !validNameFormat.matcher(db).matches())) {
-          failAnalysis(s"Database name ${tblIdent.database.get} is not a valid name for " +
-            s"metastore. Metastore only accepts table name containing characters, numbers and _.")
-        }
         if (query.isDefined &&
           mode == SaveMode.Overwrite &&
           catalog.tableExists(tableDesc.identifier)) {
@@ -334,7 +319,7 @@ case class PreWriteCheck(conf: SQLConf, catalog: SessionCatalog)
           EliminateSubqueryAliases(catalog.lookupRelation(tableDesc.identifier)) match {
             // Only do the check if the table is a data source table
             // (the relation is a BaseRelation).
-            case l @ LogicalRelation(dest: BaseRelation, _, _) =>
+            case LogicalRelation(dest: BaseRelation, _, _) =>
               // Get all input data source relations of the query.
               val srcRelations = query.get.collect {
                 case LogicalRelation(src: BaseRelation, _, _) => src
@@ -347,9 +332,8 @@ case class PreWriteCheck(conf: SQLConf, catalog: SessionCatalog)
           }
         }
 
-      case i @ logical.InsertIntoTable(
-        l @ LogicalRelation(t: InsertableRelation, _, _),
-        partition, query, overwrite, ifNotExists) =>
+      case logical.InsertIntoTable(
+          l @ LogicalRelation(t: InsertableRelation, _, _), partition, query, _, _) =>
         // Right now, we do not support insert into a data source table with partition specs.
         if (partition.nonEmpty) {
           failAnalysis(s"Insert into a partition is not allowed because $l is not partitioned.")
@@ -367,15 +351,15 @@ case class PreWriteCheck(conf: SQLConf, catalog: SessionCatalog)
         }
 
       case logical.InsertIntoTable(
-        LogicalRelation(r: HadoopFsRelation, _, _), part, query, overwrite, _) =>
+        LogicalRelation(r: HadoopFsRelation, _, _), part, query, _, _) =>
         // We need to make sure the partition columns specified by users do match partition
         // columns of the relation.
         val existingPartitionColumns = r.partitionSchema.fieldNames.toSet
         val specifiedPartitionColumns = part.keySet
         if (existingPartitionColumns != specifiedPartitionColumns) {
-          failAnalysis(s"Specified partition columns " +
+          failAnalysis("Specified partition columns " +
             s"(${specifiedPartitionColumns.mkString(", ")}) " +
-            s"do not match the partition columns of the table. Please use " +
+            "do not match the partition columns of the table. Please use " +
             s"(${existingPartitionColumns.mkString(", ")}) as the partition columns.")
         } else {
           // OK
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala
index 9f4401ae22560..73224651092f6 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala
@@ -269,17 +269,17 @@ class MultiDatabaseSuite extends QueryTest with SQLTestUtils with TestHiveSingle
       val message = intercept[AnalysisException] {
         df.write.format("parquet").saveAsTable("`d:b`.`t:a`")
       }.getMessage
-      assert(message.contains("is not a valid name for metastore"))
+      assert(message.contains("Database 'd:b' not found"))
     }
 
     {
       val message = intercept[AnalysisException] {
         df.write.format("parquet").saveAsTable("`d:b`.`table`")
       }.getMessage
-      assert(message.contains("is not a valid name for metastore"))
+      assert(message.contains("Database 'd:b' not found"))
     }
 
-    withTempPath { dir =>
+    withTempDir { dir =>
       val path = dir.getCanonicalPath
 
       {
@@ -293,7 +293,8 @@ class MultiDatabaseSuite extends QueryTest with SQLTestUtils with TestHiveSingle
             |)
             """.stripMargin)
         }.getMessage
-        assert(message.contains("is not a valid name for metastore"))
+        assert(message.contains("`t:a` is not a valid name for tables/databases. " +
+          "Valid names only contain alphabet characters, numbers and _."))
       }
 
       {
@@ -307,7 +308,7 @@ class MultiDatabaseSuite extends QueryTest with SQLTestUtils with TestHiveSingle
               |)
               """.stripMargin)
         }.getMessage
-        assert(message.contains("is not a valid name for metastore"))
+        assert(message.contains("Database 'd:b' not found"))
       }
     }
   }

From 6b77889e8aea86322e90f0013d45872f867ba905 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Sun, 27 Nov 2016 21:45:50 -0800
Subject: [PATCH 0202/1204] [SPARK-18482][SQL] make sure Spark can access the
 table metadata created by older version of spark

## What changes were proposed in this pull request?

In Spark 2.1, we did a lot of refactor for `HiveExternalCatalog` and related code path. These refactor may introduce external behavior changes and break backward compatibility. e.g. http://issues.apache.org/jira/browse/SPARK-18464

To avoid future compatibility problems of `HiveExternalCatalog`, this PR dumps some typical table metadata from tables created by 2.0, and test if they can recognized by current version of Spark.

## How was this patch tested?

test only change

Author: Wenchen Fan <wenchen@databricks.com>

Closes #16003 from cloud-fan/test.

(cherry picked from commit fc2c13bdf0be5e349539b2ab90087c34b2d3faab)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 ...nalCatalogBackwardCompatibilitySuite.scala | 251 ++++++++++++++++++
 .../sql/hive/MetastoreDataSourcesSuite.scala  |  43 ---
 2 files changed, 251 insertions(+), 43 deletions(-)
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala
new file mode 100644
index 0000000000000..cca4480c44150
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala
@@ -0,0 +1,251 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import java.net.URI
+
+import org.scalatest.BeforeAndAfterEach
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
+import org.apache.spark.sql.hive.client.HiveClient
+import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.test.SQLTestUtils
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.util.Utils
+
+
+class HiveExternalCatalogBackwardCompatibilitySuite extends QueryTest
+  with SQLTestUtils with TestHiveSingleton with BeforeAndAfterEach {
+
+  // To test `HiveExternalCatalog`, we need to read/write the raw table meta from/to hive client.
+  val hiveClient: HiveClient =
+    spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client
+
+  val tempDir = Utils.createTempDir().getCanonicalFile
+
+  override def beforeEach(): Unit = {
+    sql("CREATE DATABASE test_db")
+    for ((tbl, _) <- rawTablesAndExpectations) {
+      hiveClient.createTable(tbl, ignoreIfExists = false)
+    }
+  }
+
+  override def afterEach(): Unit = {
+    Utils.deleteRecursively(tempDir)
+    hiveClient.dropDatabase("test_db", ignoreIfNotExists = false, cascade = true)
+  }
+
+  private def getTableMetadata(tableName: String): CatalogTable = {
+    spark.sharedState.externalCatalog.getTable("test_db", tableName)
+  }
+
+  private def defaultTablePath(tableName: String): String = {
+    spark.sessionState.catalog.defaultTablePath(TableIdentifier(tableName, Some("test_db")))
+  }
+
+
+  // Raw table metadata that are dumped from tables created by Spark 2.0. Note that, all spark
+  // versions prior to 2.1 would generate almost same raw table metadata for a specific table.
+  val simpleSchema = new StructType().add("i", "int")
+  val partitionedSchema = new StructType().add("i", "int").add("j", "int")
+
+  lazy val hiveTable = CatalogTable(
+    identifier = TableIdentifier("tbl1", Some("test_db")),
+    tableType = CatalogTableType.MANAGED,
+    storage = CatalogStorageFormat.empty.copy(
+      inputFormat = Some("org.apache.hadoop.mapred.TextInputFormat"),
+      outputFormat = Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat")),
+    schema = simpleSchema)
+
+  lazy val externalHiveTable = CatalogTable(
+    identifier = TableIdentifier("tbl2", Some("test_db")),
+    tableType = CatalogTableType.EXTERNAL,
+    storage = CatalogStorageFormat.empty.copy(
+      locationUri = Some(tempDir.getAbsolutePath),
+      inputFormat = Some("org.apache.hadoop.mapred.TextInputFormat"),
+      outputFormat = Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat")),
+    schema = simpleSchema)
+
+  lazy val partitionedHiveTable = CatalogTable(
+    identifier = TableIdentifier("tbl3", Some("test_db")),
+    tableType = CatalogTableType.MANAGED,
+    storage = CatalogStorageFormat.empty.copy(
+      inputFormat = Some("org.apache.hadoop.mapred.TextInputFormat"),
+      outputFormat = Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat")),
+    schema = partitionedSchema,
+    partitionColumnNames = Seq("j"))
+
+
+  val simpleSchemaJson =
+    """
+      |{
+      | "type": "struct",
+      | "fields": [{
+      |             "name": "i",
+      |             "type": "integer",
+      |             "nullable": true,
+      |             "metadata": {}
+      |            }]
+      |}
+    """.stripMargin
+
+  val partitionedSchemaJson =
+    """
+      |{
+      | "type": "struct",
+      | "fields": [{
+      |             "name": "i",
+      |             "type": "integer",
+      |             "nullable": true,
+      |             "metadata": {}
+      |            },
+      |            {
+      |             "name": "j",
+      |             "type": "integer",
+      |             "nullable": true,
+      |             "metadata": {}
+      |            }]
+      |}
+    """.stripMargin
+
+  lazy val dataSourceTable = CatalogTable(
+    identifier = TableIdentifier("tbl4", Some("test_db")),
+    tableType = CatalogTableType.MANAGED,
+    storage = CatalogStorageFormat.empty.copy(properties = Map("path" -> defaultTablePath("tbl4"))),
+    schema = new StructType(),
+    properties = Map(
+      "spark.sql.sources.provider" -> "json",
+      "spark.sql.sources.schema.numParts" -> "1",
+      "spark.sql.sources.schema.part.0" -> simpleSchemaJson))
+
+  lazy val hiveCompatibleDataSourceTable = CatalogTable(
+    identifier = TableIdentifier("tbl5", Some("test_db")),
+    tableType = CatalogTableType.MANAGED,
+    storage = CatalogStorageFormat.empty.copy(properties = Map("path" -> defaultTablePath("tbl5"))),
+    schema = simpleSchema,
+    properties = Map(
+      "spark.sql.sources.provider" -> "parquet",
+      "spark.sql.sources.schema.numParts" -> "1",
+      "spark.sql.sources.schema.part.0" -> simpleSchemaJson))
+
+  lazy val partitionedDataSourceTable = CatalogTable(
+    identifier = TableIdentifier("tbl6", Some("test_db")),
+    tableType = CatalogTableType.MANAGED,
+    storage = CatalogStorageFormat.empty.copy(properties = Map("path" -> defaultTablePath("tbl6"))),
+    schema = new StructType(),
+    properties = Map(
+      "spark.sql.sources.provider" -> "json",
+      "spark.sql.sources.schema.numParts" -> "1",
+      "spark.sql.sources.schema.part.0" -> partitionedSchemaJson,
+      "spark.sql.sources.schema.numPartCols" -> "1",
+      "spark.sql.sources.schema.partCol.0" -> "j"))
+
+  lazy val externalDataSourceTable = CatalogTable(
+    identifier = TableIdentifier("tbl7", Some("test_db")),
+    tableType = CatalogTableType.EXTERNAL,
+    storage = CatalogStorageFormat.empty.copy(
+      locationUri = Some(defaultTablePath("tbl7") + "-__PLACEHOLDER__"),
+      properties = Map("path" -> tempDir.getAbsolutePath)),
+    schema = new StructType(),
+    properties = Map(
+      "spark.sql.sources.provider" -> "json",
+      "spark.sql.sources.schema.numParts" -> "1",
+      "spark.sql.sources.schema.part.0" -> simpleSchemaJson))
+
+  lazy val hiveCompatibleExternalDataSourceTable = CatalogTable(
+    identifier = TableIdentifier("tbl8", Some("test_db")),
+    tableType = CatalogTableType.EXTERNAL,
+    storage = CatalogStorageFormat.empty.copy(
+      locationUri = Some(tempDir.getAbsolutePath),
+      properties = Map("path" -> tempDir.getAbsolutePath)),
+    schema = simpleSchema,
+    properties = Map(
+      "spark.sql.sources.provider" -> "parquet",
+      "spark.sql.sources.schema.numParts" -> "1",
+      "spark.sql.sources.schema.part.0" -> simpleSchemaJson))
+
+  lazy val dataSourceTableWithoutSchema = CatalogTable(
+    identifier = TableIdentifier("tbl9", Some("test_db")),
+    tableType = CatalogTableType.EXTERNAL,
+    storage = CatalogStorageFormat.empty.copy(
+      locationUri = Some(defaultTablePath("tbl9") + "-__PLACEHOLDER__"),
+      properties = Map("path" -> tempDir.getAbsolutePath)),
+    schema = new StructType(),
+    properties = Map("spark.sql.sources.provider" -> "json"))
+
+  // A list of all raw tables we want to test, with their expected schema.
+  lazy val rawTablesAndExpectations = Seq(
+    hiveTable -> simpleSchema,
+    externalHiveTable -> simpleSchema,
+    partitionedHiveTable -> partitionedSchema,
+    dataSourceTable -> simpleSchema,
+    hiveCompatibleDataSourceTable -> simpleSchema,
+    partitionedDataSourceTable -> partitionedSchema,
+    externalDataSourceTable -> simpleSchema,
+    hiveCompatibleExternalDataSourceTable -> simpleSchema,
+    dataSourceTableWithoutSchema -> new StructType())
+
+  test("make sure we can read table created by old version of Spark") {
+    for ((tbl, expectedSchema) <- rawTablesAndExpectations) {
+      val readBack = getTableMetadata(tbl.identifier.table)
+      assert(readBack.schema == expectedSchema)
+
+      if (tbl.tableType == CatalogTableType.EXTERNAL) {
+        // trim the URI prefix
+        val tableLocation = new URI(readBack.storage.locationUri.get).getPath
+        assert(tableLocation == tempDir.getAbsolutePath)
+      }
+    }
+  }
+
+  test("make sure we can alter table location created by old version of Spark") {
+    withTempDir { dir =>
+      for ((tbl, _) <- rawTablesAndExpectations if tbl.tableType == CatalogTableType.EXTERNAL) {
+        sql(s"ALTER TABLE ${tbl.identifier} SET LOCATION '${dir.getAbsolutePath}'")
+
+        val readBack = getTableMetadata(tbl.identifier.table)
+
+        // trim the URI prefix
+        val actualTableLocation = new URI(readBack.storage.locationUri.get).getPath
+        assert(actualTableLocation == dir.getAbsolutePath)
+      }
+    }
+  }
+
+  test("make sure we can rename table created by old version of Spark") {
+    for ((tbl, expectedSchema) <- rawTablesAndExpectations) {
+      val newName = tbl.identifier.table + "_renamed"
+      sql(s"ALTER TABLE ${tbl.identifier} RENAME TO $newName")
+
+      val readBack = getTableMetadata(newName)
+      assert(readBack.schema == expectedSchema)
+
+      // trim the URI prefix
+      val actualTableLocation = new URI(readBack.storage.locationUri.get).getPath
+      val expectedLocation = if (tbl.tableType == CatalogTableType.EXTERNAL) {
+        tempDir.getAbsolutePath
+      } else {
+        // trim the URI prefix
+        new URI(defaultTablePath(newName)).getPath
+      }
+      assert(actualTableLocation == expectedLocation)
+    }
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index c7cc75fbc8a07..a45f4b5d6376c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -1370,47 +1370,4 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
       sparkSession.sparkContext.conf.set(DEBUG_MODE, previousValue)
     }
   }
-
-  test("SPARK-17470: support old table that stores table location in storage properties") {
-    withTable("old") {
-      withTempPath { path =>
-        Seq(1 -> "a").toDF("i", "j").write.parquet(path.getAbsolutePath)
-        val tableDesc = CatalogTable(
-          identifier = TableIdentifier("old", Some("default")),
-          tableType = CatalogTableType.EXTERNAL,
-          storage = CatalogStorageFormat.empty.copy(
-            properties = Map("path" -> path.getAbsolutePath)
-          ),
-          schema = new StructType(),
-          properties = Map(
-            HiveExternalCatalog.DATASOURCE_PROVIDER -> "parquet",
-            HiveExternalCatalog.DATASOURCE_SCHEMA ->
-              new StructType().add("i", "int").add("j", "string").json))
-        hiveClient.createTable(tableDesc, ignoreIfExists = false)
-        checkAnswer(spark.table("old"), Row(1, "a"))
-      }
-    }
-  }
-
-  test("SPARK-18464: support old table which doesn't store schema in table properties") {
-    withTable("old") {
-      withTempPath { path =>
-        Seq(1 -> "a").toDF("i", "j").write.parquet(path.getAbsolutePath)
-        val tableDesc = CatalogTable(
-          identifier = TableIdentifier("old", Some("default")),
-          tableType = CatalogTableType.EXTERNAL,
-          storage = CatalogStorageFormat.empty.copy(
-            properties = Map("path" -> path.getAbsolutePath)
-          ),
-          schema = new StructType(),
-          properties = Map(
-            HiveExternalCatalog.DATASOURCE_PROVIDER -> "parquet"))
-        hiveClient.createTable(tableDesc, ignoreIfExists = false)
-
-        checkAnswer(spark.table("old"), Row(1, "a"))
-
-        checkAnswer(sql("DESC old"), Row("i", "int", null) :: Row("j", "string", null) :: Nil)
-      }
-    }
-  }
 }

From 886f880df42b3b2d64377b2e9a236dda180d610d Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@happy-camper.st>
Date: Sun, 27 Nov 2016 23:30:18 -0800
Subject: [PATCH 0203/1204] [SPARK-18585][SQL] Use `ev.isNull = "false"` if
 possible for Janino to have a chance to optimize.

## What changes were proposed in this pull request?

Janino can optimize `true ? a : b` into `a` or `false ? a : b` into `b`, or if/else with literal condition, so we should use literal as `ev.isNull` if possible.

## How was this patch tested?

Existing tests.

Author: Takuya UESHIN <ueshin@happy-camper.st>

Closes #16008 from ueshin/issues/SPARK-18585.

(cherry picked from commit 87141622ee6b11ac177f68f58d0dc5f8b9a9f948)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../sql/catalyst/expressions/complexTypeCreator.scala | 11 ++++-------
 .../sql/catalyst/expressions/nullExpressions.scala    |  6 ++----
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index c9f36649ec8ee..599fb638db32a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -61,7 +61,6 @@ case class CreateArray(children: Seq[Expression]) extends Expression {
     ctx.addMutableState("Object[]", values, s"this.$values = null;")
 
     ev.copy(code = s"""
-      final boolean ${ev.isNull} = false;
       this.$values = new Object[${children.size}];""" +
       ctx.splitExpressions(
         ctx.INPUT_ROW,
@@ -78,7 +77,7 @@ case class CreateArray(children: Seq[Expression]) extends Expression {
       s"""
         final ArrayData ${ev.value} = new $arrayClass($values);
         this.$values = null;
-      """)
+      """, isNull = "false")
   }
 
   override def prettyName: String = "array"
@@ -144,7 +143,6 @@ case class CreateMap(children: Seq[Expression]) extends Expression {
     val keyData = s"new $arrayClass($keyArray)"
     val valueData = s"new $arrayClass($valueArray)"
     ev.copy(code = s"""
-      final boolean ${ev.isNull} = false;
       $keyArray = new Object[${keys.size}];
       $valueArray = new Object[${values.size}];""" +
       ctx.splitExpressions(
@@ -177,7 +175,7 @@ case class CreateMap(children: Seq[Expression]) extends Expression {
         final MapData ${ev.value} = new $mapClass($keyData, $valueData);
         this.$keyArray = null;
         this.$valueArray = null;
-      """)
+      """, isNull = "false")
   }
 
   override def prettyName: String = "map"
@@ -301,7 +299,6 @@ case class CreateNamedStruct(children: Seq[Expression]) extends CreateNamedStruc
     ctx.addMutableState("Object[]", values, s"this.$values = null;")
 
     ev.copy(code = s"""
-      boolean ${ev.isNull} = false;
       $values = new Object[${valExprs.size}];""" +
       ctx.splitExpressions(
         ctx.INPUT_ROW,
@@ -317,7 +314,7 @@ case class CreateNamedStruct(children: Seq[Expression]) extends CreateNamedStruc
       s"""
         final InternalRow ${ev.value} = new $rowClass($values);
         this.$values = null;
-      """)
+      """, isNull = "false")
   }
 
   override def prettyName: String = "named_struct"
@@ -333,7 +330,7 @@ case class CreateNamedStruct(children: Seq[Expression]) extends CreateNamedStruc
 case class CreateNamedStructUnsafe(children: Seq[Expression]) extends CreateNamedStructLike {
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val eval = GenerateUnsafeProjection.createCode(ctx, valExprs)
-    ExprCode(code = eval.code, isNull = eval.isNull, value = eval.value)
+    ExprCode(code = eval.code, isNull = "false", value = eval.value)
   }
 
   override def prettyName: String = "named_struct_unsafe"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
index 8b2e8f3e7ef73..d24a502c9fbde 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
@@ -206,9 +206,8 @@ case class IsNaN(child: Expression) extends UnaryExpression
       case DoubleType | FloatType =>
         ev.copy(code = s"""
           ${eval.code}
-          boolean ${ev.isNull} = false;
           ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
-          ${ev.value} = !${eval.isNull} && Double.isNaN(${eval.value});""")
+          ${ev.value} = !${eval.isNull} && Double.isNaN(${eval.value});""", isNull = "false")
     }
   }
 }
@@ -383,7 +382,6 @@ case class AtLeastNNonNulls(n: Int, children: Seq[Expression]) extends Predicate
     ev.copy(code = s"""
       int $nonnull = 0;
       $code
-      boolean ${ev.isNull} = false;
-      boolean ${ev.value} = $nonnull >= $n;""")
+      boolean ${ev.value} = $nonnull >= $n;""", isNull = "false")
   }
 }

From d6e027e610bdff0123e71925735ecedcf4787b83 Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@databricks.com>
Date: Mon, 28 Nov 2016 02:56:26 -0800
Subject: [PATCH 0204/1204] [SPARK-18604][SQL] Make sure CollapseWindow returns
 the attributes in the same order.

## What changes were proposed in this pull request?
The `CollapseWindow` optimizer rule changes the order of output attributes. This modifies the output of the plan, which the optimizer cannot do. This also breaks things like `collect()` for which we use a `RowEncoder` that assumes that the output attributes of the executed plan are equal to those outputted by the logical plan.

## How was this patch tested?
I have updated an incorrect test in `CollapseWindowSuite`.

Author: Herman van Hovell <hvanhovell@databricks.com>

Closes #16027 from hvanhovell/SPARK-18604.

(cherry picked from commit 454b8049916a0353772a0ea5cfe14b62cbd81df4)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../spark/sql/catalyst/optimizer/Optimizer.scala    |  2 +-
 .../catalyst/optimizer/CollapseWindowSuite.scala    | 13 ++++++++-----
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 6ba8b33b3fa74..2679e026bb00a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -545,7 +545,7 @@ object CollapseRepartition extends Rule[LogicalPlan] {
 object CollapseWindow extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
     case w @ Window(we1, ps1, os1, Window(we2, ps2, os2, grandChild)) if ps1 == ps2 && os1 == os2 =>
-      w.copy(windowExpressions = we1 ++ we2, child = grandChild)
+      w.copy(windowExpressions = we2 ++ we1, child = grandChild)
   }
 }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseWindowSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseWindowSuite.scala
index 797076e55cfcc..3f7d1d9fd99af 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseWindowSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseWindowSuite.scala
@@ -46,12 +46,15 @@ class CollapseWindowSuite extends PlanTest {
       .window(Seq(sum(b).as('sum_b)), partitionSpec1, orderSpec1)
       .window(Seq(avg(b).as('avg_b)), partitionSpec1, orderSpec1)
 
-    val optimized = Optimize.execute(query.analyze)
+    val analyzed = query.analyze
+    val optimized = Optimize.execute(analyzed)
+    assert(analyzed.output === optimized.output)
+
     val correctAnswer = testRelation.window(Seq(
-        avg(b).as('avg_b),
-        sum(b).as('sum_b),
-        max(a).as('max_a),
-        min(a).as('min_a)), partitionSpec1, orderSpec1)
+      min(a).as('min_a),
+      max(a).as('max_a),
+      sum(b).as('sum_b),
+      avg(b).as('avg_b)), partitionSpec1, orderSpec1)
 
     comparePlans(optimized, correctAnswer)
   }

From 712bd5abc827c4eaf3f53bfc9155c8535584ca96 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Mon, 28 Nov 2016 04:18:35 -0800
Subject: [PATCH 0205/1204] [SPARK-18118][SQL] fix a compilation error due to
 nested JavaBeans

## What changes were proposed in this pull request?

This PR avoids a compilation error due to more than 64KB Java byte code size. This error occur since generated java code `SpecificSafeProjection.apply()` for nested JavaBeans is too big. This PR avoids this compilation error by splitting a big code chunk into multiple methods by calling `CodegenContext.splitExpression` at `InitializeJavaBean.doGenCode`
An object reference for JavaBean is stored to an instance variable `javaBean...`. Then, the instance variable will be referenced in the split methods.

Generated code with this PR
````
/* 22098 */   private void apply130_0(InternalRow i) {
...
/* 22125 */     boolean isNull238 = i.isNullAt(2);
/* 22126 */     InternalRow value238 = isNull238 ? null : (i.getStruct(2, 3));
/* 22127 */     boolean isNull236 = false;
/* 22128 */     test.org.apache.spark.sql.JavaDatasetSuite$Nesting1 value236 = null;
/* 22129 */     if (!false && isNull238) {
/* 22130 */
/* 22131 */       final test.org.apache.spark.sql.JavaDatasetSuite$Nesting1 value239 = null;
/* 22132 */       isNull236 = true;
/* 22133 */       value236 = value239;
/* 22134 */     } else {
/* 22135 */
/* 22136 */       final test.org.apache.spark.sql.JavaDatasetSuite$Nesting1 value241 = false ? null : new test.org.apache.spark.sql.JavaDatasetSuite$Nesting1();
/* 22137 */       this.javaBean14 = value241;
/* 22138 */       if (!false) {
/* 22139 */         apply25_0(i);
/* 22140 */         apply25_1(i);
/* 22141 */         apply25_2(i);
/* 22142 */       }
/* 22143 */       isNull236 = false;
/* 22144 */       value236 = value241;
/* 22145 */     }
/* 22146 */     this.javaBean.setField2(value236);
/* 22147 */
/* 22148 */   }
...
/* 22928 */   public java.lang.Object apply(java.lang.Object _i) {
/* 22929 */     InternalRow i = (InternalRow) _i;
/* 22930 */
/* 22931 */     final test.org.apache.spark.sql.JavaDatasetSuite$NestedComplicatedJavaBean value1 = false ? null : new test.org.apache.spark.sql.JavaDatasetSuite$NestedComplicatedJavaBean();
/* 22932 */     this.javaBean = value1;
/* 22933 */     if (!false) {
/* 22934 */       apply130_0(i);
/* 22935 */       apply130_1(i);
/* 22936 */       apply130_2(i);
/* 22937 */       apply130_3(i);
/* 22938 */       apply130_4(i);
/* 22939 */     }
/* 22940 */     if (false) {
/* 22941 */       mutableRow.setNullAt(0);
/* 22942 */     } else {
/* 22943 */
/* 22944 */       mutableRow.update(0, value1);
/* 22945 */     }
/* 22946 */
/* 22947 */     return mutableRow;
/* 22948 */   }
````

## How was this patch tested?

added a test suite into `JavaDatasetSuite.java`

Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com>

Closes #16032 from kiszk/SPARK-18118.

(cherry picked from commit f075cd9cb7157819df9aec67baee8913c4ed5c53)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../expressions/objects/objects.scala         |  10 +-
 .../apache/spark/sql/JavaDatasetSuite.java    | 429 ++++++++++++++++++
 2 files changed, 437 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 5c27179ec3b46..6952f54928161 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -896,19 +896,25 @@ case class InitializeJavaBean(beanInstance: Expression, setters: Map[String, Exp
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val instanceGen = beanInstance.genCode(ctx)
 
+    val javaBeanInstance = ctx.freshName("javaBean")
+    val beanInstanceJavaType = ctx.javaType(beanInstance.dataType)
+    ctx.addMutableState(beanInstanceJavaType, javaBeanInstance, "")
+
     val initialize = setters.map {
       case (setterMethod, fieldValue) =>
         val fieldGen = fieldValue.genCode(ctx)
         s"""
            ${fieldGen.code}
-           ${instanceGen.value}.$setterMethod(${fieldGen.value});
+           this.${javaBeanInstance}.$setterMethod(${fieldGen.value});
          """
     }
+    val initializeCode = ctx.splitExpressions(ctx.INPUT_ROW, initialize.toSeq)
 
     val code = s"""
       ${instanceGen.code}
+      this.${javaBeanInstance} = ${instanceGen.value};
       if (!${instanceGen.isNull}) {
-        ${initialize.mkString("\n")}
+        $initializeCode
       }
      """
     ev.copy(code = code, isNull = instanceGen.isNull, value = instanceGen.value)
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
index 96e8fb066854a..8304b728aa238 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
@@ -876,4 +876,433 @@ public void testRuntimeNullabilityCheck() {
       ds.collect();
     }
   }
+
+  public static class Nesting3 implements Serializable {
+    private Integer field3_1;
+    private Double field3_2;
+    private String field3_3;
+
+    public Nesting3() {
+    }
+
+    public Nesting3(Integer field3_1, Double field3_2, String field3_3) {
+      this.field3_1 = field3_1;
+      this.field3_2 = field3_2;
+      this.field3_3 = field3_3;
+    }
+
+    private Nesting3(Builder builder) {
+      setField3_1(builder.field3_1);
+      setField3_2(builder.field3_2);
+      setField3_3(builder.field3_3);
+    }
+
+    public static Builder newBuilder() {
+      return new Builder();
+    }
+
+    public Integer getField3_1() {
+      return field3_1;
+    }
+
+    public void setField3_1(Integer field3_1) {
+      this.field3_1 = field3_1;
+    }
+
+    public Double getField3_2() {
+      return field3_2;
+    }
+
+    public void setField3_2(Double field3_2) {
+      this.field3_2 = field3_2;
+    }
+
+    public String getField3_3() {
+      return field3_3;
+    }
+
+    public void setField3_3(String field3_3) {
+      this.field3_3 = field3_3;
+    }
+
+    public static final class Builder {
+      private Integer field3_1 = 0;
+      private Double field3_2 = 0.0;
+      private String field3_3 = "value";
+
+      private Builder() {
+      }
+
+      public Builder field3_1(Integer field3_1) {
+        this.field3_1 = field3_1;
+        return this;
+      }
+
+      public Builder field3_2(Double field3_2) {
+        this.field3_2 = field3_2;
+        return this;
+      }
+
+      public Builder field3_3(String field3_3) {
+        this.field3_3 = field3_3;
+        return this;
+      }
+
+      public Nesting3 build() {
+        return new Nesting3(this);
+      }
+    }
+  }
+
+  public static class Nesting2 implements Serializable {
+    private Nesting3 field2_1;
+    private Nesting3 field2_2;
+    private Nesting3 field2_3;
+
+    public Nesting2() {
+    }
+
+    public Nesting2(Nesting3 field2_1, Nesting3 field2_2, Nesting3 field2_3) {
+      this.field2_1 = field2_1;
+      this.field2_2 = field2_2;
+      this.field2_3 = field2_3;
+    }
+
+    private Nesting2(Builder builder) {
+      setField2_1(builder.field2_1);
+      setField2_2(builder.field2_2);
+      setField2_3(builder.field2_3);
+    }
+
+    public static Builder newBuilder() {
+      return new Builder();
+    }
+
+    public Nesting3 getField2_1() {
+      return field2_1;
+    }
+
+    public void setField2_1(Nesting3 field2_1) {
+      this.field2_1 = field2_1;
+    }
+
+    public Nesting3 getField2_2() {
+      return field2_2;
+    }
+
+    public void setField2_2(Nesting3 field2_2) {
+      this.field2_2 = field2_2;
+    }
+
+    public Nesting3 getField2_3() {
+      return field2_3;
+    }
+
+    public void setField2_3(Nesting3 field2_3) {
+      this.field2_3 = field2_3;
+    }
+
+
+    public static final class Builder {
+      private Nesting3 field2_1 = Nesting3.newBuilder().build();
+      private Nesting3 field2_2 = Nesting3.newBuilder().build();
+      private Nesting3 field2_3 = Nesting3.newBuilder().build();
+
+      private Builder() {
+      }
+
+      public Builder field2_1(Nesting3 field2_1) {
+        this.field2_1 = field2_1;
+        return this;
+      }
+
+      public Builder field2_2(Nesting3 field2_2) {
+        this.field2_2 = field2_2;
+        return this;
+      }
+
+      public Builder field2_3(Nesting3 field2_3) {
+        this.field2_3 = field2_3;
+        return this;
+      }
+
+      public Nesting2 build() {
+        return new Nesting2(this);
+      }
+    }
+  }
+
+  public static class Nesting1 implements Serializable {
+    private Nesting2 field1_1;
+    private Nesting2 field1_2;
+    private Nesting2 field1_3;
+
+    public Nesting1() {
+    }
+
+    public Nesting1(Nesting2 field1_1, Nesting2 field1_2, Nesting2 field1_3) {
+      this.field1_1 = field1_1;
+      this.field1_2 = field1_2;
+      this.field1_3 = field1_3;
+    }
+
+    private Nesting1(Builder builder) {
+      setField1_1(builder.field1_1);
+      setField1_2(builder.field1_2);
+      setField1_3(builder.field1_3);
+    }
+
+    public static Builder newBuilder() {
+      return new Builder();
+    }
+
+    public Nesting2 getField1_1() {
+      return field1_1;
+    }
+
+    public void setField1_1(Nesting2 field1_1) {
+      this.field1_1 = field1_1;
+    }
+
+    public Nesting2 getField1_2() {
+      return field1_2;
+    }
+
+    public void setField1_2(Nesting2 field1_2) {
+      this.field1_2 = field1_2;
+    }
+
+    public Nesting2 getField1_3() {
+      return field1_3;
+    }
+
+    public void setField1_3(Nesting2 field1_3) {
+      this.field1_3 = field1_3;
+    }
+
+
+    public static final class Builder {
+      private Nesting2 field1_1 = Nesting2.newBuilder().build();
+      private Nesting2 field1_2 = Nesting2.newBuilder().build();
+      private Nesting2 field1_3 = Nesting2.newBuilder().build();
+
+      private Builder() {
+      }
+
+      public Builder field1_1(Nesting2 field1_1) {
+        this.field1_1 = field1_1;
+        return this;
+      }
+
+      public Builder field1_2(Nesting2 field1_2) {
+        this.field1_2 = field1_2;
+        return this;
+      }
+
+      public Builder field1_3(Nesting2 field1_3) {
+        this.field1_3 = field1_3;
+        return this;
+      }
+
+      public Nesting1 build() {
+        return new Nesting1(this);
+      }
+    }
+  }
+
+  public static class NestedComplicatedJavaBean implements Serializable {
+    private Nesting1 field1;
+    private Nesting1 field2;
+    private Nesting1 field3;
+    private Nesting1 field4;
+    private Nesting1 field5;
+    private Nesting1 field6;
+    private Nesting1 field7;
+    private Nesting1 field8;
+    private Nesting1 field9;
+    private Nesting1 field10;
+
+    public NestedComplicatedJavaBean() {
+    }
+
+    private NestedComplicatedJavaBean(Builder builder) {
+      setField1(builder.field1);
+      setField2(builder.field2);
+      setField3(builder.field3);
+      setField4(builder.field4);
+      setField5(builder.field5);
+      setField6(builder.field6);
+      setField7(builder.field7);
+      setField8(builder.field8);
+      setField9(builder.field9);
+      setField10(builder.field10);
+    }
+
+    public static Builder newBuilder() {
+      return new Builder();
+    }
+
+    public Nesting1 getField1() {
+      return field1;
+    }
+
+    public void setField1(Nesting1 field1) {
+      this.field1 = field1;
+    }
+
+    public Nesting1 getField2() {
+      return field2;
+    }
+
+    public void setField2(Nesting1 field2) {
+      this.field2 = field2;
+    }
+
+    public Nesting1 getField3() {
+      return field3;
+    }
+
+    public void setField3(Nesting1 field3) {
+      this.field3 = field3;
+    }
+
+    public Nesting1 getField4() {
+      return field4;
+    }
+
+    public void setField4(Nesting1 field4) {
+      this.field4 = field4;
+    }
+
+    public Nesting1 getField5() {
+      return field5;
+    }
+
+    public void setField5(Nesting1 field5) {
+      this.field5 = field5;
+    }
+
+    public Nesting1 getField6() {
+      return field6;
+    }
+
+    public void setField6(Nesting1 field6) {
+      this.field6 = field6;
+    }
+
+    public Nesting1 getField7() {
+      return field7;
+    }
+
+    public void setField7(Nesting1 field7) {
+      this.field7 = field7;
+    }
+
+    public Nesting1 getField8() {
+      return field8;
+    }
+
+    public void setField8(Nesting1 field8) {
+      this.field8 = field8;
+    }
+
+    public Nesting1 getField9() {
+      return field9;
+    }
+
+    public void setField9(Nesting1 field9) {
+      this.field9 = field9;
+    }
+
+    public Nesting1 getField10() {
+      return field10;
+    }
+
+    public void setField10(Nesting1 field10) {
+      this.field10 = field10;
+    }
+
+    public static final class Builder {
+      private Nesting1 field1 = Nesting1.newBuilder().build();
+      private Nesting1 field2 = Nesting1.newBuilder().build();
+      private Nesting1 field3 = Nesting1.newBuilder().build();
+      private Nesting1 field4 = Nesting1.newBuilder().build();
+      private Nesting1 field5 = Nesting1.newBuilder().build();
+      private Nesting1 field6 = Nesting1.newBuilder().build();
+      private Nesting1 field7 = Nesting1.newBuilder().build();
+      private Nesting1 field8 = Nesting1.newBuilder().build();
+      private Nesting1 field9 = Nesting1.newBuilder().build();
+      private Nesting1 field10 = Nesting1.newBuilder().build();
+
+      private Builder() {
+      }
+
+      public Builder field1(Nesting1 field1) {
+        this.field1 = field1;
+        return this;
+      }
+
+      public Builder field2(Nesting1 field2) {
+        this.field2 = field2;
+        return this;
+      }
+
+      public Builder field3(Nesting1 field3) {
+        this.field3 = field3;
+        return this;
+      }
+
+      public Builder field4(Nesting1 field4) {
+        this.field4 = field4;
+        return this;
+      }
+
+      public Builder field5(Nesting1 field5) {
+        this.field5 = field5;
+        return this;
+      }
+
+      public Builder field6(Nesting1 field6) {
+        this.field6 = field6;
+        return this;
+      }
+
+      public Builder field7(Nesting1 field7) {
+        this.field7 = field7;
+        return this;
+      }
+
+      public Builder field8(Nesting1 field8) {
+        this.field8 = field8;
+        return this;
+      }
+
+      public Builder field9(Nesting1 field9) {
+        this.field9 = field9;
+        return this;
+      }
+
+      public Builder field10(Nesting1 field10) {
+        this.field10 = field10;
+        return this;
+      }
+
+      public NestedComplicatedJavaBean build() {
+        return new NestedComplicatedJavaBean(this);
+      }
+    }
+  }
+
+  @Test
+  public void test() {
+    /* SPARK-15285 Large numbers of Nested JavaBeans generates more than 64KB java bytecode */
+    List<NestedComplicatedJavaBean> data = new ArrayList<NestedComplicatedJavaBean>();
+    data.add(NestedComplicatedJavaBean.newBuilder().build());
+
+    NestedComplicatedJavaBean obj3 = new NestedComplicatedJavaBean();
+
+    Dataset<NestedComplicatedJavaBean> ds =
+      spark.createDataset(data, Encoders.bean(NestedComplicatedJavaBean.class));
+    ds.collectAsList();
+  }
 }

From e449f7546897c5f29075e6a0913a5a6106bcbb5f Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@databricks.com>
Date: Mon, 28 Nov 2016 04:41:43 -0800
Subject: [PATCH 0206/1204] [SPARK-18118][SQL] fix a compilation error due to
 nested JavaBeans

Remove this reference.

(cherry picked from commit 70dfdcbbf11c9c3174abc111afa2250236e31af2)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../apache/spark/sql/catalyst/expressions/objects/objects.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 6952f54928161..e517ec18eb540 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -905,7 +905,7 @@ case class InitializeJavaBean(beanInstance: Expression, setters: Map[String, Exp
         val fieldGen = fieldValue.genCode(ctx)
         s"""
            ${fieldGen.code}
-           this.${javaBeanInstance}.$setterMethod(${fieldGen.value});
+           ${javaBeanInstance}.$setterMethod(${fieldGen.value});
          """
     }
     val initializeCode = ctx.splitExpressions(ctx.INPUT_ROW, initialize.toSeq)

From a9d4febe900aa3eb9c595089e7283a64a24c8761 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Mon, 28 Nov 2016 07:04:38 -0800
Subject: [PATCH 0207/1204] [SPARK-17783][SQL] Hide Credentials in CREATE and
 DESC FORMATTED/EXTENDED a PERSISTENT/TEMP Table for JDBC

### What changes were proposed in this pull request?

We should never expose the Credentials in the EXPLAIN and DESC FORMATTED/EXTENDED command. However, below commands exposed the credentials.

In the related PR: https://github.com/apache/spark/pull/10452

> URL patterns to specify credential seems to be vary between different databases.

Thus, we hide the whole `url` value if it contains the keyword `password`. We also hide the `password` property.

Before the fix, the command outputs look like:

``` SQL
CREATE TABLE tab1
USING org.apache.spark.sql.jdbc
OPTIONS (
 url 'jdbc:h2:mem:testdb0;user=testUser;password=testPass',
 dbtable 'TEST.PEOPLE',
 user 'testUser',
 password '$password')

DESC FORMATTED tab1
DESC EXTENDED tab1
```

Before the fix,
- The output of SQL statement EXPLAIN
```
== Physical Plan ==
ExecutedCommand
   +- CreateDataSourceTableCommand CatalogTable(
	Table: `tab1`
	Created: Wed Nov 16 23:00:10 PST 2016
	Last Access: Wed Dec 31 15:59:59 PST 1969
	Type: MANAGED
	Provider: org.apache.spark.sql.jdbc
	Storage(Properties: [url=jdbc:h2:mem:testdb0;user=testUser;password=testPass, dbtable=TEST.PEOPLE, user=testUser, password=testPass])), false
```

- The output of `DESC FORMATTED`
```
...
|Storage Desc Parameters:    |                                                                  |       |
|  url                       |jdbc:h2:mem:testdb0;user=testUser;password=testPass               |       |
|  dbtable                   |TEST.PEOPLE                                                       |       |
|  user                      |testUser                                                          |       |
|  password                  |testPass                                                          |       |
+----------------------------+------------------------------------------------------------------+-------+
```

- The output of `DESC EXTENDED`
```
|# Detailed Table Information|CatalogTable(
	Table: `default`.`tab1`
	Created: Wed Nov 16 23:00:10 PST 2016
	Last Access: Wed Dec 31 15:59:59 PST 1969
	Type: MANAGED
	Schema: [StructField(NAME,StringType,false), StructField(THEID,IntegerType,false)]
	Provider: org.apache.spark.sql.jdbc
	Storage(Location: file:/Users/xiaoli/IdeaProjects/sparkDelivery/spark-warehouse/tab1, Properties: [url=jdbc:h2:mem:testdb0;user=testUser;password=testPass, dbtable=TEST.PEOPLE, user=testUser, password=testPass]))|       |
```

After the fix,
- The output of SQL statement EXPLAIN
```
== Physical Plan ==
ExecutedCommand
   +- CreateDataSourceTableCommand CatalogTable(
	Table: `tab1`
	Created: Wed Nov 16 22:43:49 PST 2016
	Last Access: Wed Dec 31 15:59:59 PST 1969
	Type: MANAGED
	Provider: org.apache.spark.sql.jdbc
	Storage(Properties: [url=###, dbtable=TEST.PEOPLE, user=testUser, password=###])), false
```
- The output of `DESC FORMATTED`
```
...
|Storage Desc Parameters:    |                                                                  |       |
|  url                       |###                                                               |       |
|  dbtable                   |TEST.PEOPLE                                                       |       |
|  user                      |testUser                                                          |       |
|  password                  |###                                                               |       |
+----------------------------+------------------------------------------------------------------+-------+
```

- The output of `DESC EXTENDED`
```
|# Detailed Table Information|CatalogTable(
	Table: `default`.`tab1`
	Created: Wed Nov 16 22:43:49 PST 2016
	Last Access: Wed Dec 31 15:59:59 PST 1969
	Type: MANAGED
	Schema: [StructField(NAME,StringType,false), StructField(THEID,IntegerType,false)]
	Provider: org.apache.spark.sql.jdbc
	Storage(Location: file:/Users/xiaoli/IdeaProjects/sparkDelivery/spark-warehouse/tab1, Properties: [url=###, dbtable=TEST.PEOPLE, user=testUser, password=###]))|       |
```

### How was this patch tested?

Added test cases

Author: gatorsmile <gatorsmile@gmail.com>

Closes #15358 from gatorsmile/maskCredentials.

(cherry picked from commit 9f273c5173c05017c3009faaf3e10f2f70a842d0)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../catalog/ExternalCatalogUtils.scala        | 15 +++++++++
 .../sql/catalyst/catalog/interface.scala      | 10 +++---
 .../spark/sql/execution/command/tables.scala  |  3 +-
 .../spark/sql/execution/datasources/ddl.scala | 10 +++++-
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 32 +++++++++++++++++++
 5 files changed, 62 insertions(+), 8 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
index b1442eec164d8..817c1ab688471 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
@@ -119,3 +119,18 @@ object ExternalCatalogUtils {
     }
   }
 }
+
+object CatalogUtils {
+  /**
+   * Masking credentials in the option lists. For example, in the sql plan explain output
+   * for JDBC data sources.
+   */
+  def maskCredentials(options: Map[String, String]): Map[String, String] = {
+    options.map {
+      case (key, _) if key.toLowerCase == "password" => (key, "###")
+      case (key, value) if key.toLowerCase == "url" && value.toLowerCase.contains("password") =>
+        (key, "###")
+      case o => o
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 93c70de18ae7e..d8bc86727e466 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -52,12 +52,10 @@ case class CatalogStorageFormat(
     properties: Map[String, String]) {
 
   override def toString: String = {
-    val serdePropsToString =
-      if (properties.nonEmpty) {
-        s"Properties: " + properties.map(p => p._1 + "=" + p._2).mkString("[", ", ", "]")
-      } else {
-        ""
-      }
+    val serdePropsToString = CatalogUtils.maskCredentials(properties) match {
+      case props if props.isEmpty => ""
+      case props => "Properties: " + props.map(p => p._1 + "=" + p._2).mkString("[", ", ", "]")
+    }
     val output =
       Seq(locationUri.map("Location: " + _).getOrElse(""),
         inputFormat.map("InputFormat: " + _).getOrElse(""),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 7049e53a78684..ca4d20a99cf7c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -503,7 +503,8 @@ case class DescribeTableCommand(
     describeBucketingInfo(metadata, buffer)
 
     append(buffer, "Storage Desc Parameters:", "", "")
-    metadata.storage.properties.foreach { case (key, value) =>
+    val maskedProperties = CatalogUtils.maskCredentials(metadata.storage.properties)
+    maskedProperties.foreach { case (key, value) =>
       append(buffer, s"  $key", value, "")
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala
index fa8dfa9640d3d..695ba1234d458 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources
 
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.catalog.CatalogTable
+import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogUtils}
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan}
 import org.apache.spark.sql.execution.command.RunnableCommand
@@ -56,6 +56,14 @@ case class CreateTempViewUsing(
       s"Temporary view '$tableIdent' should not have specified a database")
   }
 
+  override def argString: String = {
+    s"[tableIdent:$tableIdent " +
+      userSpecifiedSchema.map(_ + " ").getOrElse("") +
+      s"replace:$replace " +
+      s"provider:$provider " +
+      CatalogUtils.maskCredentials(options)
+  }
+
   def run(sparkSession: SparkSession): Seq[Row] = {
     val dataSource = DataSource(
       sparkSession,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index f921939ada73f..b16be457ed5c3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -734,6 +734,38 @@ class JDBCSuite extends SparkFunSuite
     }
   }
 
+  test("hide credentials in create and describe a persistent/temp table") {
+    val password = "testPass"
+    val tableName = "tab1"
+    Seq("TABLE", "TEMPORARY VIEW").foreach { tableType =>
+      withTable(tableName) {
+        val df = sql(
+          s"""
+             |CREATE $tableType $tableName
+             |USING org.apache.spark.sql.jdbc
+             |OPTIONS (
+             | url '$urlWithUserAndPass',
+             | dbtable 'TEST.PEOPLE',
+             | user 'testUser',
+             | password '$password')
+           """.stripMargin)
+
+        val explain = ExplainCommand(df.queryExecution.logical, extended = true)
+        spark.sessionState.executePlan(explain).executedPlan.executeCollect().foreach { r =>
+          assert(!r.toString.contains(password))
+        }
+
+        sql(s"DESC FORMATTED $tableName").collect().foreach { r =>
+          assert(!r.toString().contains(password))
+        }
+
+        sql(s"DESC EXTENDED $tableName").collect().foreach { r =>
+          assert(!r.toString().contains(password))
+        }
+      }
+    }
+  }
+
   test("SPARK 12941: The data type mapping for StringType to Oracle") {
     val oracleDialect = JdbcDialects.get("jdbc:oracle://127.0.0.1/db")
     assert(oracleDialect.getJDBCType(StringType).

From 32b259faed7e0573c0f465954205cbd3b94ee440 Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@databricks.com>
Date: Mon, 28 Nov 2016 07:10:52 -0800
Subject: [PATCH 0208/1204] [SPARK-18597][SQL] Do not push-down join conditions
 to the right side of a LEFT ANTI join

## What changes were proposed in this pull request?
We currently push down join conditions of a Left Anti join to both sides of the join. This is similar to Inner, Left Semi and Existence (a specialized left semi) join. The problem is that this changes the semantics of the join; a left anti join filters out rows that matches the join condition.

This PR fixes this by only pushing down conditions to the left hand side of the join. This is similar to the behavior of left outer join.

## How was this patch tested?
Added tests to `FilterPushdownSuite.scala` and created a SQLQueryTestSuite file for left anti joins with a regression test.

Author: Herman van Hovell <hvanhovell@databricks.com>

Closes #16026 from hvanhovell/SPARK-18597.

(cherry picked from commit 38e29824d9a50464daa397c28e89610ed0aed4b6)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../sql/catalyst/optimizer/Optimizer.scala    |  6 ++--
 .../optimizer/FilterPushdownSuite.scala       | 33 +++++++++++++++++++
 .../resources/sql-tests/inputs/anti-join.sql  |  7 ++++
 .../sql-tests/results/anti-join.sql.out       | 29 ++++++++++++++++
 4 files changed, 72 insertions(+), 3 deletions(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/anti-join.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/results/anti-join.sql.out

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 2679e026bb00a..805cad5cb953e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -932,7 +932,7 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
         split(joinCondition.map(splitConjunctivePredicates).getOrElse(Nil), left, right)
 
       joinType match {
-        case _: InnerLike | LeftExistence(_) =>
+        case _: InnerLike |  LeftSemi | ExistenceJoin(_) =>
           // push down the single side only join filter for both sides sub queries
           val newLeft = leftJoinConditions.
             reduceLeftOption(And).map(Filter(_, left)).getOrElse(left)
@@ -949,14 +949,14 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
           val newJoinCond = (rightJoinConditions ++ commonJoinCondition).reduceLeftOption(And)
 
           Join(newLeft, newRight, RightOuter, newJoinCond)
-        case LeftOuter =>
+        case LeftOuter | LeftAnti =>
           // push down the right side only join filter for right sub query
           val newLeft = left
           val newRight = rightJoinConditions.
             reduceLeftOption(And).map(Filter(_, right)).getOrElse(right)
           val newJoinCond = (leftJoinConditions ++ commonJoinCondition).reduceLeftOption(And)
 
-          Join(newLeft, newRight, LeftOuter, newJoinCond)
+          Join(newLeft, newRight, joinType, newJoinCond)
         case FullOuter => j
         case NaturalJoin(_) => sys.error("Untransformed NaturalJoin node")
         case UsingJoin(_, _) => sys.error("Untransformed Using join node")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
index 019f132d94cb2..3e67282d687f5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
@@ -514,6 +514,39 @@ class FilterPushdownSuite extends PlanTest {
     comparePlans(optimized, analysis.EliminateSubqueryAliases(correctAnswer))
   }
 
+  test("joins: push down where clause into left anti join") {
+    val x = testRelation.subquery('x)
+    val y = testRelation.subquery('y)
+    val originalQuery =
+      x.join(y, LeftAnti, Some("x.b".attr === "y.b".attr))
+        .where("x.a".attr > 10)
+        .analyze
+    val optimized = Optimize.execute(originalQuery)
+    val correctAnswer =
+      x.where("x.a".attr > 10)
+        .join(y, LeftAnti, Some("x.b".attr === "y.b".attr))
+        .analyze
+    comparePlans(optimized, analysis.EliminateSubqueryAliases(correctAnswer))
+  }
+
+  test("joins: only push down join conditions to the right of a left anti join") {
+    val x = testRelation.subquery('x)
+    val y = testRelation.subquery('y)
+    val originalQuery =
+      x.join(y,
+        LeftAnti,
+        Some("x.b".attr === "y.b".attr && "y.a".attr > 10 && "x.a".attr > 10)).analyze
+    val optimized = Optimize.execute(originalQuery)
+    val correctAnswer =
+      x.join(
+        y.where("y.a".attr > 10),
+        LeftAnti,
+        Some("x.b".attr === "y.b".attr && "x.a".attr > 10))
+        .analyze
+    comparePlans(optimized, analysis.EliminateSubqueryAliases(correctAnswer))
+  }
+
+
   val testRelationWithArrayType = LocalRelation('a.int, 'b.int, 'c_arr.array(IntegerType))
 
   test("generate: predicate referenced no generated column") {
diff --git a/sql/core/src/test/resources/sql-tests/inputs/anti-join.sql b/sql/core/src/test/resources/sql-tests/inputs/anti-join.sql
new file mode 100644
index 0000000000000..0346f57d609ad
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/anti-join.sql
@@ -0,0 +1,7 @@
+-- SPARK-18597: Do not push down predicates to left hand side in an anti-join
+CREATE OR REPLACE TEMPORARY VIEW tbl_a AS VALUES (1, 1), (2, 1), (3, 6) AS T(c1, c2);
+CREATE OR REPLACE TEMPORARY VIEW tbl_b AS VALUES 1 AS T(c1);
+
+SELECT *
+FROM   tbl_a
+       LEFT ANTI JOIN tbl_b ON ((tbl_a.c1 = tbl_a.c2) IS NULL OR tbl_a.c1 = tbl_a.c2);
diff --git a/sql/core/src/test/resources/sql-tests/results/anti-join.sql.out b/sql/core/src/test/resources/sql-tests/results/anti-join.sql.out
new file mode 100644
index 0000000000000..6f38c4d08bc5a
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/anti-join.sql.out
@@ -0,0 +1,29 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 3
+
+
+-- !query 0
+CREATE OR REPLACE TEMPORARY VIEW tbl_a AS VALUES (1, 1), (2, 1), (3, 6) AS T(c1, c2)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+CREATE OR REPLACE TEMPORARY VIEW tbl_b AS VALUES 1 AS T(c1)
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+SELECT *
+FROM   tbl_a
+       LEFT ANTI JOIN tbl_b ON ((tbl_a.c1 = tbl_a.c2) IS NULL OR tbl_a.c1 = tbl_a.c2)
+-- !query 2 schema
+struct<c1:int,c2:int>
+-- !query 2 output
+2	1
+3	6

From 34ad4d520ae0e4302972097c5985ab2c5a8d5e04 Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Mon, 28 Nov 2016 10:09:30 -0800
Subject: [PATCH 0209/1204] [SPARK-18602] Set the version of
 org.codehaus.janino:commons-compiler to 3.0.0 to match the version of
 org.codehaus.janino:janino

## What changes were proposed in this pull request?
org.codehaus.janino:janino depends on org.codehaus.janino:commons-compiler and we have been upgraded to org.codehaus.janino:janino 3.0.0.

However, seems we are still pulling in org.codehaus.janino:commons-compiler 2.7.6 because of calcite. It looks like an accident because we exclude janino from calcite (see here https://github.com/apache/spark/blob/branch-2.1/pom.xml#L1759). So, this PR upgrades org.codehaus.janino:commons-compiler to 3.0.0.

## How was this patch tested?
jenkins

Author: Yin Huai <yhuai@databricks.com>

Closes #16025 from yhuai/janino-commons-compile.

(cherry picked from commit eba727757ed5dc23c635e1926795aea62ec0fc66)
Signed-off-by: Yin Huai <yhuai@databricks.com>
---
 dev/deps/spark-deps-hadoop-2.2 | 2 +-
 dev/deps/spark-deps-hadoop-2.3 | 2 +-
 dev/deps/spark-deps-hadoop-2.4 | 2 +-
 dev/deps/spark-deps-hadoop-2.6 | 2 +-
 dev/deps/spark-deps-hadoop-2.7 | 2 +-
 pom.xml                        | 9 +++++++++
 sql/catalyst/pom.xml           | 4 ++++
 7 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.2 b/dev/deps/spark-deps-hadoop-2.2
index bbdea069f9496..89bfcef4d9466 100644
--- a/dev/deps/spark-deps-hadoop-2.2
+++ b/dev/deps/spark-deps-hadoop-2.2
@@ -24,7 +24,7 @@ commons-beanutils-core-1.8.0.jar
 commons-cli-1.2.jar
 commons-codec-1.10.jar
 commons-collections-3.2.2.jar
-commons-compiler-2.7.6.jar
+commons-compiler-3.0.0.jar
 commons-compress-1.4.1.jar
 commons-configuration-1.6.jar
 commons-crypto-1.0.0.jar
diff --git a/dev/deps/spark-deps-hadoop-2.3 b/dev/deps/spark-deps-hadoop-2.3
index a2dec41d64519..8df3858825e13 100644
--- a/dev/deps/spark-deps-hadoop-2.3
+++ b/dev/deps/spark-deps-hadoop-2.3
@@ -27,7 +27,7 @@ commons-beanutils-core-1.8.0.jar
 commons-cli-1.2.jar
 commons-codec-1.10.jar
 commons-collections-3.2.2.jar
-commons-compiler-2.7.6.jar
+commons-compiler-3.0.0.jar
 commons-compress-1.4.1.jar
 commons-configuration-1.6.jar
 commons-crypto-1.0.0.jar
diff --git a/dev/deps/spark-deps-hadoop-2.4 b/dev/deps/spark-deps-hadoop-2.4
index c1f02b93d751c..71e7fb6dd243d 100644
--- a/dev/deps/spark-deps-hadoop-2.4
+++ b/dev/deps/spark-deps-hadoop-2.4
@@ -27,7 +27,7 @@ commons-beanutils-core-1.8.0.jar
 commons-cli-1.2.jar
 commons-codec-1.10.jar
 commons-collections-3.2.2.jar
-commons-compiler-2.7.6.jar
+commons-compiler-3.0.0.jar
 commons-compress-1.4.1.jar
 commons-configuration-1.6.jar
 commons-crypto-1.0.0.jar
diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6
index 4f04636be712b..ba31391495f54 100644
--- a/dev/deps/spark-deps-hadoop-2.6
+++ b/dev/deps/spark-deps-hadoop-2.6
@@ -31,7 +31,7 @@ commons-beanutils-core-1.8.0.jar
 commons-cli-1.2.jar
 commons-codec-1.10.jar
 commons-collections-3.2.2.jar
-commons-compiler-2.7.6.jar
+commons-compiler-3.0.0.jar
 commons-compress-1.4.1.jar
 commons-configuration-1.6.jar
 commons-crypto-1.0.0.jar
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index da3af9ffa155b..b129e5a99e2ff 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -31,7 +31,7 @@ commons-beanutils-core-1.8.0.jar
 commons-cli-1.2.jar
 commons-codec-1.10.jar
 commons-collections-3.2.2.jar
-commons-compiler-2.7.6.jar
+commons-compiler-3.0.0.jar
 commons-compress-1.4.1.jar
 commons-configuration-1.6.jar
 commons-crypto-1.0.0.jar
diff --git a/pom.xml b/pom.xml
index 5c417d2b35727..c391102d37502 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1759,6 +1759,10 @@
             <groupId>org.codehaus.janino</groupId>
             <artifactId>janino</artifactId>
           </exclusion>
+          <exclusion>
+            <groupId>org.codehaus.janino</groupId>
+            <artifactId>commons-compiler</artifactId>
+          </exclusion>
           <!-- hsqldb interferes with the use of derby as the default db
             in hive's use of datanucleus.
           -->
@@ -1796,6 +1800,11 @@
         <artifactId>janino</artifactId>
         <version>${janino.version}</version>
       </dependency>
+      <dependency>
+        <groupId>org.codehaus.janino</groupId>
+        <artifactId>commons-compiler</artifactId>
+        <version>${janino.version}</version>
+      </dependency>
       <dependency>
         <groupId>joda-time</groupId>
         <artifactId>joda-time</artifactId>
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 82b49ebb21a44..f118a9a984620 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -70,6 +70,10 @@
       <groupId>org.codehaus.janino</groupId>
       <artifactId>janino</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.codehaus.janino</groupId>
+      <artifactId>commons-compiler</artifactId>
+    </dependency>
     <dependency>
       <groupId>org.antlr</groupId>
       <artifactId>antlr4-runtime</artifactId>

From 4d7947856be540bb671dc527fecb0881536d5a29 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 28 Nov 2016 10:57:17 -0800
Subject: [PATCH 0210/1204] [SQL][MINOR] DESC should use 'Catalog' as partition
 provider

## What changes were proposed in this pull request?

`CatalogTable` has a parameter named `tracksPartitionsInCatalog`, and in `CatalogTable.toString` we use `"Partition Provider: Catalog"` to represent it. This PR fixes `DESC TABLE` to make it consistent with `CatalogTable.toString`.

## How was this patch tested?

N/A

Author: Wenchen Fan <wenchen@databricks.com>

Closes #16035 from cloud-fan/minor.

(cherry picked from commit 185642846e25fa812f9c7f398ab20bffc1e25273)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../scala/org/apache/spark/sql/execution/command/tables.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index ca4d20a99cf7c..57d66f1f14785 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -489,7 +489,7 @@ case class DescribeTableCommand(
     if (table.tableType == CatalogTableType.VIEW) describeViewInfo(table, buffer)
 
     if (DDLUtils.isDatasourceTable(table) && table.tracksPartitionsInCatalog) {
-      append(buffer, "Partition Provider:", "Hive", "")
+      append(buffer, "Partition Provider:", "Catalog", "")
     }
   }
 

From 81e3f9711da5758fdeb297fe057685f648b6458b Mon Sep 17 00:00:00 2001
From: jiangxingbo <jiangxb1987@gmail.com>
Date: Mon, 28 Nov 2016 11:05:58 -0800
Subject: [PATCH 0211/1204] [SPARK-16282][SQL] Implement percentile SQL
 function.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

Implement percentile SQL function. It computes the exact percentile(s) of expr at pc with range in [0, 1].

## How was this patch tested?

Add a new testsuite `PercentileSuite` to test percentile directly.
Updated related testcases in `ExpressionToSQLSuite`.

Author: jiangxingbo <jiangxb1987@gmail.com>
Author: 蒋星博 <jiangxingbo@meituan.com>
Author: jiangxingbo <jiangxingbo@meituan.com>

Closes #14136 from jiangxb1987/percentile.

(cherry picked from commit 0f5f52a3d1e5dcf5b970c49e324e322b9deb00f3)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../catalyst/analysis/FunctionRegistry.scala  |   1 +
 .../expressions/aggregate/Percentile.scala    | 269 ++++++++++++++++++
 .../aggregate/PercentileSuite.scala           | 245 ++++++++++++++++
 .../spark/sql/hive/HiveSessionCatalog.scala   |   3 +-
 .../sql/catalyst/ExpressionToSQLSuite.scala   |   2 +
 5 files changed, 518 insertions(+), 2 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 007cdc1ccbe4e..2636afe6209ef 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -249,6 +249,7 @@ object FunctionRegistry {
     expression[Max]("max"),
     expression[Average]("mean"),
     expression[Min]("min"),
+    expression[Percentile]("percentile"),
     expression[Skewness]("skewness"),
     expression[ApproximatePercentile]("percentile_approx"),
     expression[StddevSamp]("std"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala
new file mode 100644
index 0000000000000..356e088d1d665
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions.aggregate
+
+import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}
+import java.util
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.types._
+import org.apache.spark.util.collection.OpenHashMap
+
+/**
+ * The Percentile aggregate function returns the exact percentile(s) of numeric column `expr` at
+ * the given percentage(s) with value range in [0.0, 1.0].
+ *
+ * The operator is bound to the slower sort based aggregation path because the number of elements
+ * and their partial order cannot be determined in advance. Therefore we have to store all the
+ * elements in memory, and that too many elements can cause GC paused and eventually OutOfMemory
+ * Errors.
+ *
+ * @param child child expression that produce numeric column value with `child.eval(inputRow)`
+ * @param percentageExpression Expression that represents a single percentage value or an array of
+ *                             percentage values. Each percentage value must be in the range
+ *                             [0.0, 1.0].
+ */
+@ExpressionDescription(
+  usage =
+    """
+      _FUNC_(col, percentage) - Returns the exact percentile value of numeric column `col` at the
+      given percentage. The value of percentage must be between 0.0 and 1.0.
+
+      _FUNC_(col, array(percentage1 [, percentage2]...)) - Returns the exact percentile value array
+      of numeric column `col` at the given percentage(s). Each value of the percentage array must
+      be between 0.0 and 1.0.
+    """)
+case class Percentile(
+  child: Expression,
+  percentageExpression: Expression,
+  mutableAggBufferOffset: Int = 0,
+  inputAggBufferOffset: Int = 0) extends TypedImperativeAggregate[OpenHashMap[Number, Long]] {
+
+  def this(child: Expression, percentageExpression: Expression) = {
+    this(child, percentageExpression, 0, 0)
+  }
+
+  override def prettyName: String = "percentile"
+
+  override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): Percentile =
+    copy(mutableAggBufferOffset = newMutableAggBufferOffset)
+
+  override def withNewInputAggBufferOffset(newInputAggBufferOffset: Int): Percentile =
+    copy(inputAggBufferOffset = newInputAggBufferOffset)
+
+  // Mark as lazy so that percentageExpression is not evaluated during tree transformation.
+  @transient
+  private lazy val returnPercentileArray = percentageExpression.dataType.isInstanceOf[ArrayType]
+
+  @transient
+  private lazy val percentages =
+    (percentageExpression.dataType, percentageExpression.eval()) match {
+      case (_, num: Double) => Seq(num)
+      case (ArrayType(baseType: NumericType, _), arrayData: ArrayData) =>
+        val numericArray = arrayData.toObjectArray(baseType)
+        numericArray.map { x =>
+          baseType.numeric.toDouble(x.asInstanceOf[baseType.InternalType])}.toSeq
+      case other =>
+        throw new AnalysisException(s"Invalid data type ${other._1} for parameter percentages")
+  }
+
+  override def children: Seq[Expression] = child :: percentageExpression :: Nil
+
+  // Returns null for empty inputs
+  override def nullable: Boolean = true
+
+  override lazy val dataType: DataType = percentageExpression.dataType match {
+    case _: ArrayType => ArrayType(DoubleType, false)
+    case _ => DoubleType
+  }
+
+  override def inputTypes: Seq[AbstractDataType] = percentageExpression.dataType match {
+    case _: ArrayType => Seq(NumericType, ArrayType)
+    case _ => Seq(NumericType, DoubleType)
+  }
+
+  // Check the inputTypes are valid, and the percentageExpression satisfies:
+  // 1. percentageExpression must be foldable;
+  // 2. percentages(s) must be in the range [0.0, 1.0].
+  override def checkInputDataTypes(): TypeCheckResult = {
+    // Validate the inputTypes
+    val defaultCheck = super.checkInputDataTypes()
+    if (defaultCheck.isFailure) {
+      defaultCheck
+    } else if (!percentageExpression.foldable) {
+      // percentageExpression must be foldable
+      TypeCheckFailure("The percentage(s) must be a constant literal, " +
+        s"but got $percentageExpression")
+    } else if (percentages.exists(percentage => percentage < 0.0 || percentage > 1.0)) {
+      // percentages(s) must be in the range [0.0, 1.0]
+      TypeCheckFailure("Percentage(s) must be between 0.0 and 1.0, " +
+        s"but got $percentageExpression")
+    } else {
+      TypeCheckSuccess
+    }
+  }
+
+  override def createAggregationBuffer(): OpenHashMap[Number, Long] = {
+    // Initialize new counts map instance here.
+    new OpenHashMap[Number, Long]()
+  }
+
+  override def update(buffer: OpenHashMap[Number, Long], input: InternalRow): Unit = {
+    val key = child.eval(input).asInstanceOf[Number]
+
+    // Null values are ignored in counts map.
+    if (key != null) {
+      buffer.changeValue(key, 1L, _ + 1L)
+    }
+  }
+
+  override def merge(buffer: OpenHashMap[Number, Long], other: OpenHashMap[Number, Long]): Unit = {
+    other.foreach { case (key, count) =>
+      buffer.changeValue(key, count, _ + count)
+    }
+  }
+
+  override def eval(buffer: OpenHashMap[Number, Long]): Any = {
+    generateOutput(getPercentiles(buffer))
+  }
+
+  private def getPercentiles(buffer: OpenHashMap[Number, Long]): Seq[Double] = {
+    if (buffer.isEmpty) {
+      return Seq.empty
+    }
+
+    val sortedCounts = buffer.toSeq.sortBy(_._1)(
+      child.dataType.asInstanceOf[NumericType].ordering.asInstanceOf[Ordering[Number]])
+    val accumlatedCounts = sortedCounts.scanLeft(sortedCounts.head._1, 0L) {
+      case ((key1, count1), (key2, count2)) => (key2, count1 + count2)
+    }.tail
+    val maxPosition = accumlatedCounts.last._2 - 1
+
+    percentages.map { percentile =>
+      getPercentile(accumlatedCounts, maxPosition * percentile).doubleValue()
+    }
+  }
+
+  private def generateOutput(results: Seq[Double]): Any = {
+    if (results.isEmpty) {
+      null
+    } else if (returnPercentileArray) {
+      new GenericArrayData(results)
+    } else {
+      results.head
+    }
+  }
+
+  /**
+   * Get the percentile value.
+   *
+   * This function has been based upon similar function from HIVE
+   * `org.apache.hadoop.hive.ql.udf.UDAFPercentile.getPercentile()`.
+   */
+  private def getPercentile(aggreCounts: Seq[(Number, Long)], position: Double): Number = {
+    // We may need to do linear interpolation to get the exact percentile
+    val lower = position.floor.toLong
+    val higher = position.ceil.toLong
+
+    // Use binary search to find the lower and the higher position.
+    val countsArray = aggreCounts.map(_._2).toArray[Long]
+    val lowerIndex = binarySearchCount(countsArray, 0, aggreCounts.size, lower + 1)
+    val higherIndex = binarySearchCount(countsArray, 0, aggreCounts.size, higher + 1)
+
+    val lowerKey = aggreCounts(lowerIndex)._1
+    if (higher == lower) {
+      // no interpolation needed because position does not have a fraction
+      return lowerKey
+    }
+
+    val higherKey = aggreCounts(higherIndex)._1
+    if (higherKey == lowerKey) {
+      // no interpolation needed because lower position and higher position has the same key
+      return lowerKey
+    }
+
+    // Linear interpolation to get the exact percentile
+    return (higher - position) * lowerKey.doubleValue() +
+      (position - lower) * higherKey.doubleValue()
+  }
+
+  /**
+   * use a binary search to find the index of the position closest to the current value.
+   */
+  private def binarySearchCount(
+      countsArray: Array[Long], start: Int, end: Int, value: Long): Int = {
+    util.Arrays.binarySearch(countsArray, 0, end, value) match {
+      case ix if ix < 0 => -(ix + 1)
+      case ix => ix
+    }
+  }
+
+  override def serialize(obj: OpenHashMap[Number, Long]): Array[Byte] = {
+    val buffer = new Array[Byte](4 << 10)  // 4K
+    val bos = new ByteArrayOutputStream()
+    val out = new DataOutputStream(bos)
+    try {
+      val projection = UnsafeProjection.create(Array[DataType](child.dataType, LongType))
+      // Write pairs in counts map to byte buffer.
+      obj.foreach { case (key, count) =>
+        val row = InternalRow.apply(key, count)
+        val unsafeRow = projection.apply(row)
+        out.writeInt(unsafeRow.getSizeInBytes)
+        unsafeRow.writeToStream(out, buffer)
+      }
+      out.writeInt(-1)
+      out.flush()
+
+      bos.toByteArray
+    } finally {
+      out.close()
+      bos.close()
+    }
+  }
+
+  override def deserialize(bytes: Array[Byte]): OpenHashMap[Number, Long] = {
+    val bis = new ByteArrayInputStream(bytes)
+    val ins = new DataInputStream(bis)
+    try {
+      val counts = new OpenHashMap[Number, Long]
+      // Read unsafeRow size and content in bytes.
+      var sizeOfNextRow = ins.readInt()
+      while (sizeOfNextRow >= 0) {
+        val bs = new Array[Byte](sizeOfNextRow)
+        ins.readFully(bs)
+        val row = new UnsafeRow(2)
+        row.pointTo(bs, sizeOfNextRow)
+        // Insert the pairs into counts map.
+        val key = row.get(0, child.dataType).asInstanceOf[Number]
+        val count = row.get(1, LongType).asInstanceOf[Long]
+        counts.update(key, count)
+        sizeOfNextRow = ins.readInt()
+      }
+
+      counts
+    } finally {
+      ins.close()
+      bis.close()
+    }
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala
new file mode 100644
index 0000000000000..f060ecc18426a
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala
@@ -0,0 +1,245 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions.aggregate
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult._
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.util.ArrayData
+import org.apache.spark.sql.types._
+import org.apache.spark.util.collection.OpenHashMap
+
+class PercentileSuite extends SparkFunSuite {
+
+  private val random = new java.util.Random()
+
+  private val data = (0 until 10000).map { _ =>
+    random.nextInt(10000)
+  }
+
+  test("serialize and de-serialize") {
+    val agg = new Percentile(BoundReference(0, IntegerType, true), Literal(0.5))
+
+    // Check empty serialize and deserialize
+    val buffer = new OpenHashMap[Number, Long]()
+    assert(compareEquals(agg.deserialize(agg.serialize(buffer)), buffer))
+
+    // Check non-empty buffer serializa and deserialize.
+    data.foreach { key =>
+      buffer.changeValue(key, 1L, _ + 1L)
+    }
+    assert(compareEquals(agg.deserialize(agg.serialize(buffer)), buffer))
+  }
+
+  test("class Percentile, high level interface, update, merge, eval...") {
+    val count = 10000
+    val data = (1 to count)
+    val percentages = Seq(0, 0.25, 0.5, 0.75, 1)
+    val expectedPercentiles = Seq(1, 2500.75, 5000.5, 7500.25, 10000)
+    val childExpression = Cast(BoundReference(0, IntegerType, nullable = false), DoubleType)
+    val percentageExpression = CreateArray(percentages.toSeq.map(Literal(_)))
+    val agg = new Percentile(childExpression, percentageExpression)
+
+    assert(agg.nullable)
+    val group1 = (0 until data.length / 2)
+    val group1Buffer = agg.createAggregationBuffer()
+    group1.foreach { index =>
+      val input = InternalRow(data(index))
+      agg.update(group1Buffer, input)
+    }
+
+    val group2 = (data.length / 2 until data.length)
+    val group2Buffer = agg.createAggregationBuffer()
+    group2.foreach { index =>
+      val input = InternalRow(data(index))
+      agg.update(group2Buffer, input)
+    }
+
+    val mergeBuffer = agg.createAggregationBuffer()
+    agg.merge(mergeBuffer, group1Buffer)
+    agg.merge(mergeBuffer, group2Buffer)
+
+    agg.eval(mergeBuffer) match {
+      case arrayData: ArrayData =>
+        val percentiles = arrayData.toDoubleArray()
+        assert(percentiles.zip(expectedPercentiles)
+          .forall(pair => pair._1 == pair._2))
+    }
+  }
+
+  test("class Percentile, low level interface, update, merge, eval...") {
+    val childExpression = Cast(BoundReference(0, IntegerType, nullable = true), DoubleType)
+    val inputAggregationBufferOffset = 1
+    val mutableAggregationBufferOffset = 2
+    val percentage = 0.5
+
+    // Phase one, partial mode aggregation
+    val agg = new Percentile(childExpression, Literal(percentage))
+      .withNewInputAggBufferOffset(inputAggregationBufferOffset)
+      .withNewMutableAggBufferOffset(mutableAggregationBufferOffset)
+
+    val mutableAggBuffer = new GenericInternalRow(
+      new Array[Any](mutableAggregationBufferOffset + 1))
+    agg.initialize(mutableAggBuffer)
+    val dataCount = 10
+    (1 to dataCount).foreach { data =>
+      agg.update(mutableAggBuffer, InternalRow(data))
+    }
+    agg.serializeAggregateBufferInPlace(mutableAggBuffer)
+
+    // Serialize the aggregation buffer
+    val serialized = mutableAggBuffer.getBinary(mutableAggregationBufferOffset)
+    val inputAggBuffer = new GenericInternalRow(Array[Any](null, serialized))
+
+    // Phase 2: final mode aggregation
+    // Re-initialize the aggregation buffer
+    agg.initialize(mutableAggBuffer)
+    agg.merge(mutableAggBuffer, inputAggBuffer)
+    val expectedPercentile = 5.5
+    assert(agg.eval(mutableAggBuffer).asInstanceOf[Double] == expectedPercentile)
+  }
+
+  test("call from sql query") {
+    // sql, single percentile
+    assertEqual(
+      s"percentile(`a`, 0.5D)",
+      new Percentile("a".attr, Literal(0.5)).sql: String)
+
+    // sql, array of percentile
+    assertEqual(
+      s"percentile(`a`, array(0.25D, 0.5D, 0.75D))",
+      new Percentile("a".attr, CreateArray(Seq(0.25, 0.5, 0.75).map(Literal(_)))).sql: String)
+
+    // sql(isDistinct = false), single percentile
+    assertEqual(
+      s"percentile(`a`, 0.5D)",
+      new Percentile("a".attr, Literal(0.5)).sql(isDistinct = false))
+
+    // sql(isDistinct = false), array of percentile
+    assertEqual(
+      s"percentile(`a`, array(0.25D, 0.5D, 0.75D))",
+      new Percentile("a".attr, CreateArray(Seq(0.25, 0.5, 0.75).map(Literal(_))))
+        .sql(isDistinct = false))
+
+    // sql(isDistinct = true), single percentile
+    assertEqual(
+      s"percentile(DISTINCT `a`, 0.5D)",
+      new Percentile("a".attr, Literal(0.5)).sql(isDistinct = true))
+
+    // sql(isDistinct = true), array of percentile
+    assertEqual(
+      s"percentile(DISTINCT `a`, array(0.25D, 0.5D, 0.75D))",
+      new Percentile("a".attr, CreateArray(Seq(0.25, 0.5, 0.75).map(Literal(_))))
+        .sql(isDistinct = true))
+  }
+
+  test("fail analysis if childExpression is invalid") {
+    val validDataTypes = Seq(ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType)
+    val percentage = Literal(0.5)
+
+    validDataTypes.foreach { dataType =>
+      val child = AttributeReference("a", dataType)()
+      val percentile = new Percentile(child, percentage)
+      assertEqual(percentile.checkInputDataTypes(), TypeCheckSuccess)
+    }
+
+    val invalidDataTypes = Seq(BooleanType, StringType, DateType, TimestampType,
+      CalendarIntervalType, NullType)
+
+    invalidDataTypes.foreach { dataType =>
+      val child = AttributeReference("a", dataType)()
+      val percentile = new Percentile(child, percentage)
+      assertEqual(percentile.checkInputDataTypes(),
+        TypeCheckFailure(s"argument 1 requires numeric type, however, " +
+            s"'`a`' is of ${dataType.simpleString} type."))
+    }
+  }
+
+  test("fails analysis if percentage(s) are invalid") {
+    val child = Cast(BoundReference(0, IntegerType, nullable = false), DoubleType)
+    val input = InternalRow(1)
+
+    val validPercentages = Seq(Literal(0D), Literal(0.5), Literal(1D),
+      CreateArray(Seq(0, 0.5, 1).map(Literal(_))))
+
+    validPercentages.foreach { percentage =>
+      val percentile1 = new Percentile(child, percentage)
+      assertEqual(percentile1.checkInputDataTypes(), TypeCheckSuccess)
+    }
+
+    val invalidPercentages = Seq(Literal(-0.5), Literal(1.5), Literal(2D),
+      CreateArray(Seq(-0.5, 0, 2).map(Literal(_))))
+
+    invalidPercentages.foreach { percentage =>
+      val percentile2 = new Percentile(child, percentage)
+      assertEqual(percentile2.checkInputDataTypes(),
+        TypeCheckFailure(s"Percentage(s) must be between 0.0 and 1.0, " +
+        s"but got ${percentage.simpleString}"))
+    }
+
+    val nonFoldablePercentage = Seq(NonFoldableLiteral(0.5),
+      CreateArray(Seq(0, 0.5, 1).map(NonFoldableLiteral(_))))
+
+    nonFoldablePercentage.foreach { percentage =>
+      val percentile3 = new Percentile(child, percentage)
+      assertEqual(percentile3.checkInputDataTypes(),
+        TypeCheckFailure(s"The percentage(s) must be a constant literal, " +
+          s"but got ${percentage}"))
+    }
+
+    val invalidDataTypes = Seq(ByteType, ShortType, IntegerType, LongType, FloatType,
+      BooleanType, StringType, DateType, TimestampType, CalendarIntervalType, NullType)
+
+    invalidDataTypes.foreach { dataType =>
+      val percentage = Literal(0.5, dataType)
+      val percentile4 = new Percentile(child, percentage)
+      assertEqual(percentile4.checkInputDataTypes(),
+        TypeCheckFailure(s"argument 2 requires double type, however, " +
+          s"'0.5' is of ${dataType.simpleString} type."))
+    }
+  }
+
+  test("null handling") {
+    val childExpression = Cast(BoundReference(0, IntegerType, nullable = true), DoubleType)
+    val agg = new Percentile(childExpression, Literal(0.5))
+    val buffer = new GenericInternalRow(new Array[Any](1))
+    agg.initialize(buffer)
+    // Empty aggregation buffer
+    assert(agg.eval(buffer) == null)
+    // Empty input row
+    agg.update(buffer, InternalRow(null))
+    assert(agg.eval(buffer) == null)
+
+    // Add some non-empty row
+    agg.update(buffer, InternalRow(0))
+    assert(agg.eval(buffer) != null)
+  }
+
+  private def compareEquals(
+      left: OpenHashMap[Number, Long], right: OpenHashMap[Number, Long]): Boolean = {
+    left.size == right.size && left.forall { case (key, count) =>
+      right.apply(key) == count
+    }
+  }
+
+  private def assertEqual[T](left: T, right: T): Unit = {
+    assert(left == right)
+  }
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 4a9b28a455a44..08bf1cd0efbb9 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -234,7 +234,6 @@ private[sql] class HiveSessionCatalog(
   // noopwithmapstreaming, parse_url_tuple, reflect2, windowingtablefunction.
   // Note: don't forget to update SessionCatalog.isTemporaryFunction
   private val hiveFunctions = Seq(
-    "histogram_numeric",
-    "percentile"
+    "histogram_numeric"
   )
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionToSQLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionToSQLSuite.scala
index fdd02821dfa29..27ea167b9050c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionToSQLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionToSQLSuite.scala
@@ -173,6 +173,8 @@ class ExpressionToSQLSuite extends SQLBuilderTest with SQLTestUtils {
     checkSqlGeneration("SELECT max(value) FROM t1 GROUP BY key")
     checkSqlGeneration("SELECT mean(value) FROM t1 GROUP BY key")
     checkSqlGeneration("SELECT min(value) FROM t1 GROUP BY key")
+    checkSqlGeneration("SELECT percentile(value, 0.25) FROM t1 GROUP BY key")
+    checkSqlGeneration("SELECT percentile(value, array(0.25, 0.75)) FROM t1 GROUP BY key")
     checkSqlGeneration("SELECT skewness(value) FROM t1 GROUP BY key")
     checkSqlGeneration("SELECT stddev(value) FROM t1 GROUP BY key")
     checkSqlGeneration("SELECT stddev_pop(value) FROM t1 GROUP BY key")

From b386943b2fe6af5237270bfa520295c1711bb341 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Mon, 28 Nov 2016 14:06:37 -0500
Subject: [PATCH 0212/1204] [SPARK-17680][SQL][TEST] Added test cases for
 InMemoryRelation

## What changes were proposed in this pull request?

This pull request adds test cases for the following cases:
- keep all data types with null or without null
- access `CachedBatch` disabling whole stage codegen
- access only some columns in `CachedBatch`

This PR is a part of https://github.com/apache/spark/pull/15219. Here are motivations to add these tests. When https://github.com/apache/spark/pull/15219 is enabled, the first two cases are handled by specialized (generated) code. The third one is a pitfall.

In general, even for now, it would be helpful to increase test coverage.
## How was this patch tested?

added test suites itself

Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com>

Closes #15462 from kiszk/columnartestsuites.
---
 .../columnar/InMemoryColumnarQuerySuite.scala | 148 +++++++++++++++++-
 1 file changed, 146 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
index b272c8e7d79c2..afeb47828edeb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
@@ -20,18 +20,96 @@ package org.apache.spark.sql.execution.columnar
 import java.nio.charset.StandardCharsets
 import java.sql.{Date, Timestamp}
 
-import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.{DataFrame, QueryTest, Row}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.test.SQLTestData._
 import org.apache.spark.sql.types._
-import org.apache.spark.storage.StorageLevel.MEMORY_ONLY
+import org.apache.spark.storage.StorageLevel._
 
 class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
   import testImplicits._
 
   setupTestData()
 
+  private def cachePrimitiveTest(data: DataFrame, dataType: String) {
+    data.createOrReplaceTempView(s"testData$dataType")
+    val storageLevel = MEMORY_ONLY
+    val plan = spark.sessionState.executePlan(data.logicalPlan).sparkPlan
+    val inMemoryRelation = InMemoryRelation(useCompression = true, 5, storageLevel, plan, None)
+
+    assert(inMemoryRelation.cachedColumnBuffers.getStorageLevel == storageLevel)
+    inMemoryRelation.cachedColumnBuffers.collect().head match {
+      case _: CachedBatch =>
+      case other => fail(s"Unexpected cached batch type: ${other.getClass.getName}")
+    }
+    checkAnswer(inMemoryRelation, data.collect().toSeq)
+  }
+
+  private def testPrimitiveType(nullability: Boolean): Unit = {
+    val dataTypes = Seq(BooleanType, ByteType, ShortType, IntegerType, LongType,
+      FloatType, DoubleType, DateType, TimestampType, DecimalType(25, 5), DecimalType(6, 5))
+    val schema = StructType(dataTypes.zipWithIndex.map { case (dataType, index) =>
+      StructField(s"col$index", dataType, nullability)
+    })
+    val rdd = spark.sparkContext.parallelize((1 to 10).map(i => Row(
+      if (nullability && i % 3 == 0) null else if (i % 2 == 0) true else false,
+      if (nullability && i % 3 == 0) null else i.toByte,
+      if (nullability && i % 3 == 0) null else i.toShort,
+      if (nullability && i % 3 == 0) null else i.toInt,
+      if (nullability && i % 3 == 0) null else i.toLong,
+      if (nullability && i % 3 == 0) null else (i + 0.25).toFloat,
+      if (nullability && i % 3 == 0) null else (i + 0.75).toDouble,
+      if (nullability && i % 3 == 0) null else new Date(i),
+      if (nullability && i % 3 == 0) null else new Timestamp(i * 1000000L),
+      if (nullability && i % 3 == 0) null else BigDecimal(Long.MaxValue.toString + ".12345"),
+      if (nullability && i % 3 == 0) null
+      else new java.math.BigDecimal(s"${i % 9 + 1}" + ".23456")
+    )))
+    cachePrimitiveTest(spark.createDataFrame(rdd, schema), "primitivesDateTimeStamp")
+  }
+
+  private def tesNonPrimitiveType(nullability: Boolean): Unit = {
+    val struct = StructType(StructField("f1", FloatType, false) ::
+      StructField("f2", ArrayType(BooleanType), true) :: Nil)
+    val schema = StructType(Seq(
+      StructField("col0", StringType, nullability),
+      StructField("col1", ArrayType(IntegerType), nullability),
+      StructField("col2", ArrayType(ArrayType(IntegerType)), nullability),
+      StructField("col3", MapType(StringType, IntegerType), nullability),
+      StructField("col4", struct, nullability)
+    ))
+    val rdd = spark.sparkContext.parallelize((1 to 10).map(i => Row(
+      if (nullability && i % 3 == 0) null else s"str${i}: test cache.",
+      if (nullability && i % 3 == 0) null else (i * 100 to i * 100 + i).toArray,
+      if (nullability && i % 3 == 0) null
+      else Array(Array(i, i + 1), Array(i * 100 + 1, i * 100, i * 100 + 2)),
+      if (nullability && i % 3 == 0) null else (i to i + i).map(j => s"key$j" -> j).toMap,
+      if (nullability && i % 3 == 0) null else Row((i + 0.25).toFloat, Seq(true, false, null))
+    )))
+    cachePrimitiveTest(spark.createDataFrame(rdd, schema), "StringArrayMapStruct")
+  }
+
+  test("primitive type with nullability:true") {
+    testPrimitiveType(true)
+  }
+
+  test("primitive type with nullability:false") {
+    testPrimitiveType(false)
+  }
+
+  test("non-primitive type with nullability:true") {
+    val schemaNull = StructType(Seq(StructField("col", NullType, true)))
+    val rddNull = spark.sparkContext.parallelize((1 to 10).map(i => Row(null)))
+    cachePrimitiveTest(spark.createDataFrame(rddNull, schemaNull), "Null")
+
+    tesNonPrimitiveType(true)
+  }
+
+  test("non-primitive type with nullability:false") {
+      tesNonPrimitiveType(false)
+  }
+
   test("simple columnar query") {
     val plan = spark.sessionState.executePlan(testData.logicalPlan).sparkPlan
     val scan = InMemoryRelation(useCompression = true, 5, MEMORY_ONLY, plan, None)
@@ -58,6 +136,13 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
     }.map(Row.fromTuple))
   }
 
+  test("access only some column of the all of columns") {
+    val df = spark.range(1, 100).map(i => (i, (i + 1).toFloat)).toDF("i", "f")
+    df.cache
+    df.count  // forced to build cache
+    assert(df.filter("f <= 10.0").count == 9)
+  }
+
   test("SPARK-1436 regression: in-memory columns must be able to be accessed multiple times") {
     val plan = spark.sessionState.executePlan(testData.logicalPlan).sparkPlan
     val scan = InMemoryRelation(useCompression = true, 5, MEMORY_ONLY, plan, None)
@@ -246,4 +331,63 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
     assert(cached.batchStats.value === expectedAnswer.size * INT.defaultSize)
   }
 
+  test("access primitive-type columns in CachedBatch without whole stage codegen") {
+    // whole stage codegen is not applied to a row with more than WHOLESTAGE_MAX_NUM_FIELDS fields
+    withSQLConf(SQLConf.WHOLESTAGE_MAX_NUM_FIELDS.key -> "2") {
+      val data = Seq(null, true, 1.toByte, 3.toShort, 7, 15.toLong,
+        31.25.toFloat, 63.75, new Date(127), new Timestamp(255000000L), null)
+      val dataTypes = Seq(NullType, BooleanType, ByteType, ShortType, IntegerType, LongType,
+        FloatType, DoubleType, DateType, TimestampType, IntegerType)
+      val schemas = dataTypes.zipWithIndex.map { case (dataType, index) =>
+        StructField(s"col$index", dataType, true)
+      }
+      val rdd = sparkContext.makeRDD(Seq(Row.fromSeq(data)))
+      val df = spark.createDataFrame(rdd, StructType(schemas))
+      val row = df.persist.take(1).apply(0)
+      checkAnswer(df, row)
+    }
+  }
+
+  test("access decimal/string-type columns in CachedBatch without whole stage codegen") {
+    withSQLConf(SQLConf.WHOLESTAGE_MAX_NUM_FIELDS.key -> "2") {
+      val data = Seq(BigDecimal(Long.MaxValue.toString + ".12345"),
+        new java.math.BigDecimal("1234567890.12345"),
+        new java.math.BigDecimal("1.23456"),
+        "test123"
+      )
+      val schemas = Seq(
+        StructField("col0", DecimalType(25, 5), true),
+        StructField("col1", DecimalType(15, 5), true),
+        StructField("col2", DecimalType(6, 5), true),
+        StructField("col3", StringType, true)
+      )
+      val rdd = sparkContext.makeRDD(Seq(Row.fromSeq(data)))
+      val df = spark.createDataFrame(rdd, StructType(schemas))
+      val row = df.persist.take(1).apply(0)
+      checkAnswer(df, row)
+    }
+  }
+
+  test("access non-primitive-type columns in CachedBatch without whole stage codegen") {
+    withSQLConf(SQLConf.WHOLESTAGE_MAX_NUM_FIELDS.key -> "2") {
+      val data = Seq((1 to 10).toArray,
+        Array(Array(10, 11), Array(100, 111, 123)),
+        Map("key1" -> 111, "key2" -> 222),
+        Row(1.25.toFloat, Seq(true, false, null))
+      )
+      val struct = StructType(StructField("f1", FloatType, false) ::
+        StructField("f2", ArrayType(BooleanType), true) :: Nil)
+      val schemas = Seq(
+        StructField("col0", ArrayType(IntegerType), true),
+        StructField("col1", ArrayType(ArrayType(IntegerType)), true),
+        StructField("col2", MapType(StringType, IntegerType), true),
+        StructField("col3", struct, true)
+      )
+      val rdd = sparkContext.makeRDD(Seq(Row.fromSeq(data)))
+      val df = spark.createDataFrame(rdd, StructType(schemas))
+      val row = df.persist.take(1).apply(0)
+      checkAnswer(df, row)
+    }
+  }
+
 }

From 80aabc0bd33dc5661a90133156247e7a8c1bf7f5 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Mon, 28 Nov 2016 11:48:12 -0800
Subject: [PATCH 0213/1204] Preparing Spark release v2.1.0-rc1

---
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 2 +-
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/java8-tests/pom.xml              | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mesos/pom.xml                             | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 yarn/pom.xml                              | 2 +-
 38 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/assembly/pom.xml b/assembly/pom.xml
index ec243eaebaea7..aebfd12227751 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index fcefe64d59c91..67d78d5f102fb 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 511e1f29de368..93790979d7b26 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 606ad15739617..53cb8dd815d81 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 626f023a5b99c..89bee8567fc74 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 1c60d510e5703..7b45b23e9c546 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 45af98d94ef91..9b84f1e0c1dfc 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index eac99ab82a2e4..bbe07006109ea 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index e4fc093fe7334..cd5849b37453c 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,7 +14,7 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.1.0-SNAPSHOT
+SPARK_VERSION: 2.1.0
 SPARK_VERSION_SHORT: 2.1.0
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.7"
diff --git a/examples/pom.xml b/examples/pom.xml
index 90bbd3fbb9404..2fb42413aca81 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 57d553b75b872..4061c5f089c54 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index fb0292a5f11e0..6cfc47ef00e2a 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 5e9275c8e66d9..58caf35f65a16 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 7b68ca7373fe6..ed32fc0ec4c18 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/java8-tests/pom.xml b/external/java8-tests/pom.xml
index 1bc206e8675f1..a3f3907573f21 100644
--- a/external/java8-tests/pom.xml
+++ b/external/java8-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index 4f5045326a009..9ae4461db64a2 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index ebff5fd07a9b9..f7276d0bd2197 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index c36d479007091..52c88150137e3 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index bc02b8a66246a..93b49bcf615b6 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index 91ccd4a927e98..cdfd29e3a9208 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index f7cb764463396..c6a79aa86bcf0 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index 57809ff692c28..3fa28aa81f214 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index fab409d3e9f96..5c828780600cd 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 10d5ba93ebb88..1818bc80ea78a 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index 6023cf0771862..d60a633b87699 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mesos/pom.xml b/mesos/pom.xml
index 57cc26a4ccef9..f8e43d2c43ec2 100644
--- a/mesos/pom.xml
+++ b/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 8c985fd13ac06..6dcb44cebb254 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 4484998a49c8f..5cf3a7f3e0f5e 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index c391102d37502..49f12703c04df 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.1.0-SNAPSHOT</version>
+  <version>2.1.0</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 08a301695fda7..e91e778cb518c 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.1.0.dev0"
+__version__ = "2.1.0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 73493e600e546..1e7db9b10f045 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index f118a9a984620..c58e0f43b2ac7 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 7da77158ff07e..37e7dccd2e27d 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 819897cd46858..468d758a77884 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 2be99cb1046f4..7bf4fc0df45e8 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 07a0dab0ee047..06569e6ee2231 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index b9be8db684a90..35d53b30191a5 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 64ff845b5ae9a..38374b5ae5a3b 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 75d73d13e82aa88a7043d60b041b97fdb19e49b9 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Mon, 28 Nov 2016 11:48:21 -0800
Subject: [PATCH 0214/1204] Preparing development version 2.1.1-SNAPSHOT

---
 R/pkg/DESCRIPTION                         | 2 +-
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 4 ++--
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/java8-tests/pom.xml              | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mesos/pom.xml                             | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 yarn/pom.xml                              | 2 +-
 39 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 981ae1246476b..46fb178112802 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: SparkR
 Type: Package
 Title: R Frontend for Apache Spark
-Version: 2.1.0
+Version: 2.1.1
 Date: 2016-11-06
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
                     email = "shivaram@cs.berkeley.edu"),
diff --git a/assembly/pom.xml b/assembly/pom.xml
index aebfd12227751..29522fd3fd829 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 67d78d5f102fb..85644c4a37bbe 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 93790979d7b26..e15ede974cf8c 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 53cb8dd815d81..c93a355b84d0b 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 89bee8567fc74..7c9870a8cb85e 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 7b45b23e9c546..8f949b94fd233 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 9b84f1e0c1dfc..a9b858e27150f 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index bbe07006109ea..d24ef118a5c1e 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index cd5849b37453c..84ad5500c0a7d 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,8 +14,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.1.0
-SPARK_VERSION_SHORT: 2.1.0
+SPARK_VERSION: 2.1.1-SNAPSHOT
+SPARK_VERSION_SHORT: 2.1.1
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.7"
 MESOS_VERSION: 1.0.0
diff --git a/examples/pom.xml b/examples/pom.xml
index 2fb42413aca81..8a9e6cfcfcc70 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 4061c5f089c54..3849c02ffb03c 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 6cfc47ef00e2a..964e45f31b741 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 58caf35f65a16..eec7a889ca1ff 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index ed32fc0ec4c18..a7622d08151fe 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/java8-tests/pom.xml b/external/java8-tests/pom.xml
index a3f3907573f21..e862126e48dbe 100644
--- a/external/java8-tests/pom.xml
+++ b/external/java8-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index 9ae4461db64a2..be8e73e41b947 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index f7276d0bd2197..fdfd2ccd4327a 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index 52c88150137e3..e5bf070124b6a 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index 93b49bcf615b6..c0a94f5950d5c 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index cdfd29e3a9208..a02e23c69171d 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index c6a79aa86bcf0..d7bb1acdc1d81 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index 3fa28aa81f214..c53b72eefe84d 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 5c828780600cd..41b16500dd2bc 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 1818bc80ea78a..96e34cacff8b0 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index d60a633b87699..c0b70dfdc3364 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mesos/pom.xml b/mesos/pom.xml
index f8e43d2c43ec2..532d6073343ba 100644
--- a/mesos/pom.xml
+++ b/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 6dcb44cebb254..6c3a35eeb9ecd 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 5cf3a7f3e0f5e..757906d137c29 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 49f12703c04df..555324524ee82 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.1.0</version>
+  <version>2.1.1-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index e91e778cb518c..6ae3609ae7fae 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.1.0"
+__version__ = "2.1.1.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 1e7db9b10f045..705316a944e28 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index c58e0f43b2ac7..72be7e1005f64 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 37e7dccd2e27d..d7989c2413040 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 468d758a77884..34e0ae5bbc229 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 7bf4fc0df45e8..c543a3e049531 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 06569e6ee2231..fba6a5d7734a4 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 35d53b30191a5..0c4c9c9f51828 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 38374b5ae5a3b..85ec270bf9965 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From cdf315ba1bd732291f05756281070eb7aa4e123f Mon Sep 17 00:00:00 2001
From: Yun Ni <yunn@uber.com>
Date: Mon, 28 Nov 2016 15:14:46 -0800
Subject: [PATCH 0215/1204] [SPARK-18408][ML] API Improvements for LSH

## What changes were proposed in this pull request?

(1) Change output schema to `Array of Vector` instead of `Vectors`
(2) Use `numHashTables` as the dimension of Array
(3) Rename `RandomProjection` to `BucketedRandomProjectionLSH`, `MinHash` to `MinHashLSH`
(4) Make `randUnitVectors/randCoefficients` private
(5) Make Multi-Probe NN Search and `hashDistance` private for future discussion

Saved for future PRs:
(1) AND-amplification and `numHashFunctions` as the dimension of Vector are saved for a future PR.
(2) `hashDistance` and MultiProbe NN Search needs more discussion. The current implementation is just a backward compatible one.

## How was this patch tested?
Related unit tests are modified to make sure the performance of LSH are ensured, and the outputs of the APIs meets expectation.

Author: Yun Ni <yunn@uber.com>
Author: Yunni <Euler57721@gmail.com>

Closes #15874 from Yunni/SPARK-18408-yunn-api-improvements.

(cherry picked from commit 05f7c6ffab2a6be548375cd624dc27092677232f)
Signed-off-by: Joseph K. Bradley <joseph@databricks.com>
---
 ...cala => BucketedRandomProjectionLSH.scala} |  77 +++++-----
 .../org/apache/spark/ml/feature/LSH.scala     | 138 ++++++++++--------
 .../{MinHash.scala => MinHashLSH.scala}       | 112 +++++++-------
 ...=> BucketedRandomProjectionLSHSuite.scala} | 100 +++++++------
 .../org/apache/spark/ml/feature/LSHTest.scala |  17 ++-
 ...nHashSuite.scala => MinHashLSHSuite.scala} |  83 ++++++++---
 6 files changed, 306 insertions(+), 221 deletions(-)
 rename mllib/src/main/scala/org/apache/spark/ml/feature/{RandomProjection.scala => BucketedRandomProjectionLSH.scala} (67%)
 rename mllib/src/main/scala/org/apache/spark/ml/feature/{MinHash.scala => MinHashLSH.scala} (54%)
 rename mllib/src/test/scala/org/apache/spark/ml/feature/{RandomProjectionSuite.scala => BucketedRandomProjectionLSHSuite.scala} (66%)
 rename mllib/src/test/scala/org/apache/spark/ml/feature/{MinHashSuite.scala => MinHashLSHSuite.scala} (60%)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RandomProjection.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
similarity index 67%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/RandomProjection.scala
rename to mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
index 2bff59a0da173..cbac16345a292 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/RandomProjection.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
@@ -34,9 +34,9 @@ import org.apache.spark.sql.types.StructType
 /**
  * :: Experimental ::
  *
- * Params for [[RandomProjection]].
+ * Params for [[BucketedRandomProjectionLSH]].
  */
-private[ml] trait RandomProjectionParams extends Params {
+private[ml] trait BucketedRandomProjectionLSHParams extends Params {
 
   /**
    * The length of each hash bucket, a larger bucket lowers the false negative rate. The number of
@@ -58,8 +58,8 @@ private[ml] trait RandomProjectionParams extends Params {
 /**
  * :: Experimental ::
  *
- * Model produced by [[RandomProjection]], where multiple random vectors are stored. The vectors
- * are normalized to be unit vectors and each vector is used in a hash function:
+ * Model produced by [[BucketedRandomProjectionLSH]], where multiple random vectors are stored. The
+ * vectors are normalized to be unit vectors and each vector is used in a hash function:
  *    `h_i(x) = floor(r_i.dot(x) / bucketLength)`
  * where `r_i` is the i-th random unit vector. The number of buckets will be `(max L2 norm of input
  * vectors) / bucketLength`.
@@ -68,18 +68,19 @@ private[ml] trait RandomProjectionParams extends Params {
  */
 @Experimental
 @Since("2.1.0")
-class RandomProjectionModel private[ml] (
+class BucketedRandomProjectionLSHModel private[ml](
     override val uid: String,
-    @Since("2.1.0") val randUnitVectors: Array[Vector])
-  extends LSHModel[RandomProjectionModel] with RandomProjectionParams {
+    private[ml] val randUnitVectors: Array[Vector])
+  extends LSHModel[BucketedRandomProjectionLSHModel] with BucketedRandomProjectionLSHParams {
 
   @Since("2.1.0")
-  override protected[ml] val hashFunction: (Vector) => Vector = {
+  override protected[ml] val hashFunction: Vector => Array[Vector] = {
     key: Vector => {
       val hashValues: Array[Double] = randUnitVectors.map({
         randUnitVector => Math.floor(BLAS.dot(key, randUnitVector) / $(bucketLength))
       })
-      Vectors.dense(hashValues)
+      // TODO: Output vectors of dimension numHashFunctions in SPARK-18450
+      hashValues.map(Vectors.dense(_))
     }
   }
 
@@ -89,27 +90,29 @@ class RandomProjectionModel private[ml] (
   }
 
   @Since("2.1.0")
-  override protected[ml] def hashDistance(x: Vector, y: Vector): Double = {
+  override protected[ml] def hashDistance(x: Seq[Vector], y: Seq[Vector]): Double = {
     // Since it's generated by hashing, it will be a pair of dense vectors.
-    x.toDense.values.zip(y.toDense.values).map(pair => math.abs(pair._1 - pair._2)).min
+    x.zip(y).map(vectorPair => Vectors.sqdist(vectorPair._1, vectorPair._2)).min
   }
 
   @Since("2.1.0")
   override def copy(extra: ParamMap): this.type = defaultCopy(extra)
 
   @Since("2.1.0")
-  override def write: MLWriter = new RandomProjectionModel.RandomProjectionModelWriter(this)
+  override def write: MLWriter = {
+    new BucketedRandomProjectionLSHModel.BucketedRandomProjectionLSHModelWriter(this)
+  }
 }
 
 /**
  * :: Experimental ::
  *
- * This [[RandomProjection]] implements Locality Sensitive Hashing functions for Euclidean
- * distance metrics.
+ * This [[BucketedRandomProjectionLSH]] implements Locality Sensitive Hashing functions for
+ * Euclidean distance metrics.
  *
  * The input is dense or sparse vectors, each of which represents a point in the Euclidean
- * distance space. The output will be vectors of configurable dimension. Hash value in the same
- * dimension is calculated by the same hash function.
+ * distance space. The output will be vectors of configurable dimension. Hash values in the
+ * same dimension are calculated by the same hash function.
  *
  * References:
  *
@@ -121,8 +124,9 @@ class RandomProjectionModel private[ml] (
  */
 @Experimental
 @Since("2.1.0")
-class RandomProjection(override val uid: String) extends LSH[RandomProjectionModel]
-  with RandomProjectionParams with HasSeed {
+class BucketedRandomProjectionLSH(override val uid: String)
+  extends LSH[BucketedRandomProjectionLSHModel]
+    with BucketedRandomProjectionLSHParams with HasSeed {
 
   @Since("2.1.0")
   override def setInputCol(value: String): this.type = super.setInputCol(value)
@@ -131,11 +135,11 @@ class RandomProjection(override val uid: String) extends LSH[RandomProjectionMod
   override def setOutputCol(value: String): this.type = super.setOutputCol(value)
 
   @Since("2.1.0")
-  override def setOutputDim(value: Int): this.type = super.setOutputDim(value)
+  override def setNumHashTables(value: Int): this.type = super.setNumHashTables(value)
 
   @Since("2.1.0")
   def this() = {
-    this(Identifiable.randomUID("random projection"))
+    this(Identifiable.randomUID("brp-lsh"))
   }
 
   /** @group setParam */
@@ -147,15 +151,16 @@ class RandomProjection(override val uid: String) extends LSH[RandomProjectionMod
   def setSeed(value: Long): this.type = set(seed, value)
 
   @Since("2.1.0")
-  override protected[this] def createRawLSHModel(inputDim: Int): RandomProjectionModel = {
+  override protected[this] def createRawLSHModel(
+    inputDim: Int): BucketedRandomProjectionLSHModel = {
     val rand = new Random($(seed))
     val randUnitVectors: Array[Vector] = {
-      Array.fill($(outputDim)) {
+      Array.fill($(numHashTables)) {
         val randArray = Array.fill(inputDim)(rand.nextGaussian())
         Vectors.fromBreeze(normalize(breeze.linalg.Vector(randArray)))
       }
     }
-    new RandomProjectionModel(uid, randUnitVectors)
+    new BucketedRandomProjectionLSHModel(uid, randUnitVectors)
   }
 
   @Since("2.1.0")
@@ -169,23 +174,25 @@ class RandomProjection(override val uid: String) extends LSH[RandomProjectionMod
 }
 
 @Since("2.1.0")
-object RandomProjection extends DefaultParamsReadable[RandomProjection] {
+object BucketedRandomProjectionLSH extends DefaultParamsReadable[BucketedRandomProjectionLSH] {
 
   @Since("2.1.0")
-  override def load(path: String): RandomProjection = super.load(path)
+  override def load(path: String): BucketedRandomProjectionLSH = super.load(path)
 }
 
 @Since("2.1.0")
-object RandomProjectionModel extends MLReadable[RandomProjectionModel] {
+object BucketedRandomProjectionLSHModel extends MLReadable[BucketedRandomProjectionLSHModel] {
 
   @Since("2.1.0")
-  override def read: MLReader[RandomProjectionModel] = new RandomProjectionModelReader
+  override def read: MLReader[BucketedRandomProjectionLSHModel] = {
+    new BucketedRandomProjectionLSHModelReader
+  }
 
   @Since("2.1.0")
-  override def load(path: String): RandomProjectionModel = super.load(path)
+  override def load(path: String): BucketedRandomProjectionLSHModel = super.load(path)
 
-  private[RandomProjectionModel] class RandomProjectionModelWriter(instance: RandomProjectionModel)
-    extends MLWriter {
+  private[BucketedRandomProjectionLSHModel] class BucketedRandomProjectionLSHModelWriter(
+    instance: BucketedRandomProjectionLSHModel) extends MLWriter {
 
     // TODO: Save using the existing format of Array[Vector] once SPARK-12878 is resolved.
     private case class Data(randUnitVectors: Matrix)
@@ -203,12 +210,13 @@ object RandomProjectionModel extends MLReadable[RandomProjectionModel] {
     }
   }
 
-  private class RandomProjectionModelReader extends MLReader[RandomProjectionModel] {
+  private class BucketedRandomProjectionLSHModelReader
+    extends MLReader[BucketedRandomProjectionLSHModel] {
 
     /** Checked against metadata when loading model */
-    private val className = classOf[RandomProjectionModel].getName
+    private val className = classOf[BucketedRandomProjectionLSHModel].getName
 
-    override def load(path: String): RandomProjectionModel = {
+    override def load(path: String): BucketedRandomProjectionLSHModel = {
       val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
 
       val dataPath = new Path(path, "data").toString
@@ -216,7 +224,8 @@ object RandomProjectionModel extends MLReadable[RandomProjectionModel] {
       val Row(randUnitVectors: Matrix) = MLUtils.convertMatrixColumnsToML(data, "randUnitVectors")
         .select("randUnitVectors")
         .head()
-      val model = new RandomProjectionModel(metadata.uid, randUnitVectors.rowIter.toArray)
+      val model = new BucketedRandomProjectionLSHModel(metadata.uid,
+        randUnitVectors.rowIter.toArray)
 
       DefaultParamsReader.getAndSetParams(model, metadata)
       model
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala
index eb117c40eea3a..309cc2ef52b04 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala
@@ -33,28 +33,28 @@ import org.apache.spark.sql.types._
  */
 private[ml] trait LSHParams extends HasInputCol with HasOutputCol {
   /**
-   * Param for the dimension of LSH OR-amplification.
+   * Param for the number of hash tables used in LSH OR-amplification.
    *
-   * In this implementation, we use LSH OR-amplification to reduce the false negative rate. The
-   * higher the dimension is, the lower the false negative rate.
+   * LSH OR-amplification can be used to reduce the false negative rate. Higher values for this
+   * param lead to a reduced false negative rate, at the expense of added computational complexity.
    * @group param
    */
-  final val outputDim: IntParam = new IntParam(this, "outputDim", "output dimension, where" +
-    " increasing dimensionality lowers the false negative rate, and decreasing dimensionality" +
-    " improves the running performance", ParamValidators.gt(0))
+  final val numHashTables: IntParam = new IntParam(this, "numHashTables", "number of hash " +
+    "tables, where increasing number of hash tables lowers the false negative rate, and " +
+    "decreasing it improves the running performance", ParamValidators.gt(0))
 
   /** @group getParam */
-  final def getOutputDim: Int = $(outputDim)
+  final def getNumHashTables: Int = $(numHashTables)
 
-  setDefault(outputDim -> 1)
+  setDefault(numHashTables -> 1)
 
   /**
    * Transform the Schema for LSH
-   * @param schema The schema of the input dataset without [[outputCol]]
-   * @return A derived schema with [[outputCol]] added
+   * @param schema The schema of the input dataset without [[outputCol]].
+   * @return A derived schema with [[outputCol]] added.
    */
   protected[this] final def validateAndTransformSchema(schema: StructType): StructType = {
-    SchemaUtils.appendColumn(schema, $(outputCol), new VectorUDT)
+    SchemaUtils.appendColumn(schema, $(outputCol), DataTypes.createArrayType(new VectorUDT))
   }
 }
 
@@ -66,32 +66,32 @@ private[ml] abstract class LSHModel[T <: LSHModel[T]]
   self: T =>
 
   /**
-   * The hash function of LSH, mapping a predefined KeyType to a Vector
+   * The hash function of LSH, mapping an input feature vector to multiple hash vectors.
    * @return The mapping of LSH function.
    */
-  protected[ml] val hashFunction: Vector => Vector
+  protected[ml] val hashFunction: Vector => Array[Vector]
 
   /**
    * Calculate the distance between two different keys using the distance metric corresponding
-   * to the hashFunction
-   * @param x One input vector in the metric space
-   * @param y One input vector in the metric space
-   * @return The distance between x and y
+   * to the hashFunction.
+   * @param x One input vector in the metric space.
+   * @param y One input vector in the metric space.
+   * @return The distance between x and y.
    */
   protected[ml] def keyDistance(x: Vector, y: Vector): Double
 
   /**
    * Calculate the distance between two different hash Vectors.
    *
-   * @param x One of the hash vector
-   * @param y Another hash vector
-   * @return The distance between hash vectors x and y
+   * @param x One of the hash vector.
+   * @param y Another hash vector.
+   * @return The distance between hash vectors x and y.
    */
-  protected[ml] def hashDistance(x: Vector, y: Vector): Double
+  protected[ml] def hashDistance(x: Seq[Vector], y: Seq[Vector]): Double
 
   override def transform(dataset: Dataset[_]): DataFrame = {
     transformSchema(dataset.schema, logging = true)
-    val transformUDF = udf(hashFunction, new VectorUDT)
+    val transformUDF = udf(hashFunction, DataTypes.createArrayType(new VectorUDT))
     dataset.withColumn($(outputCol), transformUDF(dataset($(inputCol))))
   }
 
@@ -99,29 +99,12 @@ private[ml] abstract class LSHModel[T <: LSHModel[T]]
     validateAndTransformSchema(schema)
   }
 
-  /**
-   * Given a large dataset and an item, approximately find at most k items which have the closest
-   * distance to the item. If the [[outputCol]] is missing, the method will transform the data; if
-   * the [[outputCol]] exists, it will use the [[outputCol]]. This allows caching of the
-   * transformed data when necessary.
-   *
-   * This method implements two ways of fetching k nearest neighbors:
-   *  - Single Probing: Fast, return at most k elements (Probing only one buckets)
-   *  - Multiple Probing: Slow, return exact k elements (Probing multiple buckets close to the key)
-   *
-   * @param dataset the dataset to search for nearest neighbors of the key
-   * @param key Feature vector representing the item to search for
-   * @param numNearestNeighbors The maximum number of nearest neighbors
-   * @param singleProbing True for using Single Probing; false for multiple probing
-   * @param distCol Output column for storing the distance between each result row and the key
-   * @return A dataset containing at most k items closest to the key. A distCol is added to show
-   *         the distance between each row and the key.
-   */
-  def approxNearestNeighbors(
+  // TODO: Fix the MultiProbe NN Search in SPARK-18454
+  private[feature] def approxNearestNeighbors(
       dataset: Dataset[_],
       key: Vector,
       numNearestNeighbors: Int,
-      singleProbing: Boolean,
+      singleProbe: Boolean,
       distCol: String): Dataset[_] = {
     require(numNearestNeighbors > 0, "The number of nearest neighbors cannot be less than 1")
     // Get Hash Value of the key
@@ -132,14 +115,24 @@ private[ml] abstract class LSHModel[T <: LSHModel[T]]
         dataset.toDF()
       }
 
-    // In the origin dataset, find the hash value that is closest to the key
-    val hashDistUDF = udf((x: Vector) => hashDistance(x, keyHash), DataTypes.DoubleType)
-    val hashDistCol = hashDistUDF(col($(outputCol)))
+    val modelSubset = if (singleProbe) {
+      def sameBucket(x: Seq[Vector], y: Seq[Vector]): Boolean = {
+        x.zip(y).exists(tuple => tuple._1 == tuple._2)
+      }
+
+      // In the origin dataset, find the hash value that hash the same bucket with the key
+      val sameBucketWithKeyUDF = udf((x: Seq[Vector]) =>
+        sameBucket(x, keyHash), DataTypes.BooleanType)
 
-    val modelSubset = if (singleProbing) {
-      modelDataset.filter(hashDistCol === 0.0)
+      modelDataset.filter(sameBucketWithKeyUDF(col($(outputCol))))
     } else {
+      // In the origin dataset, find the hash value that is closest to the key
+      // Limit the use of hashDist since it's controversial
+      val hashDistUDF = udf((x: Seq[Vector]) => hashDistance(x, keyHash), DataTypes.DoubleType)
+      val hashDistCol = hashDistUDF(col($(outputCol)))
+
       // Compute threshold to get exact k elements.
+      // TODO: SPARK-18409: Use approxQuantile to get the threshold
       val modelDatasetSortedByHash = modelDataset.sort(hashDistCol).limit(numNearestNeighbors)
       val thresholdDataset = modelDatasetSortedByHash.select(max(hashDistCol))
       val hashThreshold = thresholdDataset.take(1).head.getDouble(0)
@@ -155,8 +148,30 @@ private[ml] abstract class LSHModel[T <: LSHModel[T]]
   }
 
   /**
-   * Overloaded method for approxNearestNeighbors. Use Single Probing as default way to search
-   * nearest neighbors and "distCol" as default distCol.
+   * Given a large dataset and an item, approximately find at most k items which have the closest
+   * distance to the item. If the [[outputCol]] is missing, the method will transform the data; if
+   * the [[outputCol]] exists, it will use the [[outputCol]]. This allows caching of the
+   * transformed data when necessary.
+   *
+   * @note This method is experimental and will likely change behavior in the next release.
+   *
+   * @param dataset The dataset to search for nearest neighbors of the key.
+   * @param key Feature vector representing the item to search for.
+   * @param numNearestNeighbors The maximum number of nearest neighbors.
+   * @param distCol Output column for storing the distance between each result row and the key.
+   * @return A dataset containing at most k items closest to the key. A column "distCol" is added
+   *         to show the distance between each row and the key.
+   */
+  def approxNearestNeighbors(
+    dataset: Dataset[_],
+    key: Vector,
+    numNearestNeighbors: Int,
+    distCol: String): Dataset[_] = {
+    approxNearestNeighbors(dataset, key, numNearestNeighbors, true, distCol)
+  }
+
+  /**
+   * Overloaded method for approxNearestNeighbors. Use "distCol" as default distCol.
    */
   def approxNearestNeighbors(
       dataset: Dataset[_],
@@ -172,31 +187,28 @@ private[ml] abstract class LSHModel[T <: LSHModel[T]]
    *
    * @param dataset The dataset to transform and explode.
    * @param explodeCols The alias for the exploded columns, must be a seq of two strings.
-   * @return A dataset containing idCol, inputCol and explodeCols
+   * @return A dataset containing idCol, inputCol and explodeCols.
    */
   private[this] def processDataset(
       dataset: Dataset[_],
       inputName: String,
       explodeCols: Seq[String]): Dataset[_] = {
     require(explodeCols.size == 2, "explodeCols must be two strings.")
-    val vectorToMap = udf((x: Vector) => x.asBreeze.iterator.toMap,
-      MapType(DataTypes.IntegerType, DataTypes.DoubleType))
     val modelDataset: DataFrame = if (!dataset.columns.contains($(outputCol))) {
       transform(dataset)
     } else {
       dataset.toDF()
     }
     modelDataset.select(
-      struct(col("*")).as(inputName),
-      explode(vectorToMap(col($(outputCol)))).as(explodeCols))
+      struct(col("*")).as(inputName), posexplode(col($(outputCol))).as(explodeCols))
   }
 
   /**
    * Recreate a column using the same column name but different attribute id. Used in approximate
    * similarity join.
-   * @param dataset The dataset where a column need to recreate
-   * @param colName The name of the column to recreate
-   * @param tmpColName A temporary column name which does not conflict with existing columns
+   * @param dataset The dataset where a column need to recreate.
+   * @param colName The name of the column to recreate.
+   * @param tmpColName A temporary column name which does not conflict with existing columns.
    * @return
    */
   private[this] def recreateCol(
@@ -215,12 +227,12 @@ private[ml] abstract class LSHModel[T <: LSHModel[T]]
    * [[outputCol]] exists, it will use the [[outputCol]]. This allows caching of the transformed
    * data when necessary.
    *
-   * @param datasetA One of the datasets to join
-   * @param datasetB Another dataset to join
-   * @param threshold The threshold for the distance of row pairs
-   * @param distCol Output column for storing the distance between each result row and the key
+   * @param datasetA One of the datasets to join.
+   * @param datasetB Another dataset to join.
+   * @param threshold The threshold for the distance of row pairs.
+   * @param distCol Output column for storing the distance between each result row and the key.
    * @return A joined dataset containing pairs of rows. The original rows are in columns
-   *         "datasetA" and "datasetB", and a distCol is added to show the distance of each pair
+   *         "datasetA" and "datasetB", and a distCol is added to show the distance of each pair.
    */
   def approxSimilarityJoin(
       datasetA: Dataset[_],
@@ -293,7 +305,7 @@ private[ml] abstract class LSH[T <: LSHModel[T]]
   def setOutputCol(value: String): this.type = set(outputCol, value)
 
   /** @group setParam */
-  def setOutputDim(value: Int): this.type = set(outputDim, value)
+  def setNumHashTables(value: Int): this.type = set(numHashTables, value)
 
   /**
    * Validate and create a new instance of concrete LSHModel. Because different LSHModel may have
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHash.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala
similarity index 54%
rename from mllib/src/main/scala/org/apache/spark/ml/feature/MinHash.scala
rename to mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala
index f37233e1ab9c8..620e1fbb09ff7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHash.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala
@@ -31,37 +31,39 @@ import org.apache.spark.sql.types.StructType
 /**
  * :: Experimental ::
  *
- * Model produced by [[MinHash]], where multiple hash functions are stored. Each hash function is
- * a perfect hash function:
- *    `h_i(x) = (x * k_i mod prime) mod numEntries`
- * where `k_i` is the i-th coefficient, and both `x` and `k_i` are from `Z_prime^*`
+ * Model produced by [[MinHashLSH]], where multiple hash functions are stored. Each hash function
+ * is picked from the following family of hash functions, where a_i and b_i are randomly chosen
+ * integers less than prime:
+ *    `h_i(x) = ((x \cdot a_i + b_i) \mod prime)`
+ *
+ * This hash family is approximately min-wise independent according to the reference.
  *
  * Reference:
- * <a href="https://en.wikipedia.org/wiki/Perfect_hash_function">
- * Wikipedia on Perfect Hash Function</a>
+ * Tom Bohman, Colin Cooper, and Alan Frieze. "Min-wise independent linear permutations."
+ * Electronic Journal of Combinatorics 7 (2000): R26.
  *
- * @param numEntries The number of entries of the hash functions.
- * @param randCoefficients An array of random coefficients, each used by one hash function.
+ * @param randCoefficients Pairs of random coefficients. Each pair is used by one hash function.
  */
 @Experimental
 @Since("2.1.0")
-class MinHashModel private[ml] (
+class MinHashLSHModel private[ml](
     override val uid: String,
-    @Since("2.1.0") val numEntries: Int,
-    @Since("2.1.0") val randCoefficients: Array[Int])
-  extends LSHModel[MinHashModel] {
+    private[ml] val randCoefficients: Array[(Int, Int)])
+  extends LSHModel[MinHashLSHModel] {
 
   @Since("2.1.0")
-  override protected[ml] val hashFunction: Vector => Vector = {
-    elems: Vector =>
+  override protected[ml] val hashFunction: Vector => Array[Vector] = {
+    elems: Vector => {
       require(elems.numNonzeros > 0, "Must have at least 1 non zero entry.")
       val elemsList = elems.toSparse.indices.toList
-      val hashValues = randCoefficients.map({ randCoefficient: Int =>
-          elemsList.map({elem: Int =>
-            (1 + elem) * randCoefficient.toLong % MinHash.prime % numEntries
-          }).min.toDouble
-      })
-      Vectors.dense(hashValues)
+      val hashValues = randCoefficients.map { case (a, b) =>
+        elemsList.map { elem: Int =>
+          ((1 + elem) * a + b) % MinHashLSH.HASH_PRIME
+        }.min.toDouble
+      }
+      // TODO: Output vectors of dimension numHashFunctions in SPARK-18450
+      hashValues.map(Vectors.dense(_))
+    }
   }
 
   @Since("2.1.0")
@@ -75,16 +77,19 @@ class MinHashModel private[ml] (
   }
 
   @Since("2.1.0")
-  override protected[ml] def hashDistance(x: Vector, y: Vector): Double = {
+  override protected[ml] def hashDistance(x: Seq[Vector], y: Seq[Vector]): Double = {
     // Since it's generated by hashing, it will be a pair of dense vectors.
-    x.toDense.values.zip(y.toDense.values).map(pair => math.abs(pair._1 - pair._2)).min
+    // TODO: This hashDistance function requires more discussion in SPARK-18454
+    x.zip(y).map(vectorPair =>
+      vectorPair._1.toArray.zip(vectorPair._2.toArray).count(pair => pair._1 != pair._2)
+    ).min
   }
 
   @Since("2.1.0")
   override def copy(extra: ParamMap): this.type = defaultCopy(extra)
 
   @Since("2.1.0")
-  override def write: MLWriter = new MinHashModel.MinHashModelWriter(this)
+  override def write: MLWriter = new MinHashLSHModel.MinHashLSHModelWriter(this)
 }
 
 /**
@@ -93,18 +98,17 @@ class MinHashModel private[ml] (
  * LSH class for Jaccard distance.
  *
  * The input can be dense or sparse vectors, but it is more efficient if it is sparse. For example,
- *    `Vectors.sparse(10, Array[(2, 1.0), (3, 1.0), (5, 1.0)])`
- * means there are 10 elements in the space. This set contains elem 2, elem 3 and elem 5.
- * Also, any input vector must have at least 1 non-zero indices, and all non-zero values are treated
- * as binary "1" values.
+ *    `Vectors.sparse(10, Array((2, 1.0), (3, 1.0), (5, 1.0)))`
+ * means there are 10 elements in the space. This set contains elements 2, 3, and 5. Also, any
+ * input vector must have at least 1 non-zero index, and all non-zero values are
+ * treated as binary "1" values.
  *
  * References:
  * <a href="https://en.wikipedia.org/wiki/MinHash">Wikipedia on MinHash</a>
  */
 @Experimental
 @Since("2.1.0")
-class MinHash(override val uid: String) extends LSH[MinHashModel] with HasSeed {
-
+class MinHashLSH(override val uid: String) extends LSH[MinHashLSHModel] with HasSeed {
 
   @Since("2.1.0")
   override def setInputCol(value: String): this.type = super.setInputCol(value)
@@ -113,11 +117,11 @@ class MinHash(override val uid: String) extends LSH[MinHashModel] with HasSeed {
   override def setOutputCol(value: String): this.type = super.setOutputCol(value)
 
   @Since("2.1.0")
-  override def setOutputDim(value: Int): this.type = super.setOutputDim(value)
+  override def setNumHashTables(value: Int): this.type = super.setNumHashTables(value)
 
   @Since("2.1.0")
   def this() = {
-    this(Identifiable.randomUID("min hash"))
+    this(Identifiable.randomUID("mh-lsh"))
   }
 
   /** @group setParam */
@@ -125,13 +129,14 @@ class MinHash(override val uid: String) extends LSH[MinHashModel] with HasSeed {
   def setSeed(value: Long): this.type = set(seed, value)
 
   @Since("2.1.0")
-  override protected[ml] def createRawLSHModel(inputDim: Int): MinHashModel = {
-    require(inputDim <= MinHash.prime / 2,
-      s"The input vector dimension $inputDim exceeds the threshold ${MinHash.prime / 2}.")
+  override protected[ml] def createRawLSHModel(inputDim: Int): MinHashLSHModel = {
+    require(inputDim <= MinHashLSH.HASH_PRIME,
+      s"The input vector dimension $inputDim exceeds the threshold ${MinHashLSH.HASH_PRIME}.")
     val rand = new Random($(seed))
-    val numEntry = inputDim * 2
-    val randCoofs: Array[Int] = Array.fill($(outputDim))(1 + rand.nextInt(MinHash.prime - 1))
-    new MinHashModel(uid, numEntry, randCoofs)
+    val randCoefs: Array[(Int, Int)] = Array.fill($(numHashTables)) {
+        (1 + rand.nextInt(MinHashLSH.HASH_PRIME - 1), rand.nextInt(MinHashLSH.HASH_PRIME - 1))
+      }
+    new MinHashLSHModel(uid, randCoefs)
   }
 
   @Since("2.1.0")
@@ -145,48 +150,49 @@ class MinHash(override val uid: String) extends LSH[MinHashModel] with HasSeed {
 }
 
 @Since("2.1.0")
-object MinHash extends DefaultParamsReadable[MinHash] {
+object MinHashLSH extends DefaultParamsReadable[MinHashLSH] {
   // A large prime smaller than sqrt(2^63 − 1)
-  private[ml] val prime = 2038074743
+  private[ml] val HASH_PRIME = 2038074743
 
   @Since("2.1.0")
-  override def load(path: String): MinHash = super.load(path)
+  override def load(path: String): MinHashLSH = super.load(path)
 }
 
 @Since("2.1.0")
-object MinHashModel extends MLReadable[MinHashModel] {
+object MinHashLSHModel extends MLReadable[MinHashLSHModel] {
 
   @Since("2.1.0")
-  override def read: MLReader[MinHashModel] = new MinHashModelReader
+  override def read: MLReader[MinHashLSHModel] = new MinHashLSHModelReader
 
   @Since("2.1.0")
-  override def load(path: String): MinHashModel = super.load(path)
+  override def load(path: String): MinHashLSHModel = super.load(path)
 
-  private[MinHashModel] class MinHashModelWriter(instance: MinHashModel) extends MLWriter {
+  private[MinHashLSHModel] class MinHashLSHModelWriter(instance: MinHashLSHModel)
+    extends MLWriter {
 
-    private case class Data(numEntries: Int, randCoefficients: Array[Int])
+    private case class Data(randCoefficients: Array[Int])
 
     override protected def saveImpl(path: String): Unit = {
       DefaultParamsWriter.saveMetadata(instance, path, sc)
-      val data = Data(instance.numEntries, instance.randCoefficients)
+      val data = Data(instance.randCoefficients.flatMap(tuple => Array(tuple._1, tuple._2)))
       val dataPath = new Path(path, "data").toString
       sparkSession.createDataFrame(Seq(data)).repartition(1).write.parquet(dataPath)
     }
   }
 
-  private class MinHashModelReader extends MLReader[MinHashModel] {
+  private class MinHashLSHModelReader extends MLReader[MinHashLSHModel] {
 
     /** Checked against metadata when loading model */
-    private val className = classOf[MinHashModel].getName
+    private val className = classOf[MinHashLSHModel].getName
 
-    override def load(path: String): MinHashModel = {
+    override def load(path: String): MinHashLSHModel = {
       val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
 
       val dataPath = new Path(path, "data").toString
-      val data = sparkSession.read.parquet(dataPath).select("numEntries", "randCoefficients").head()
-      val numEntries = data.getAs[Int](0)
-      val randCoefficients = data.getAs[Seq[Int]](1).toArray
-      val model = new MinHashModel(metadata.uid, numEntries, randCoefficients)
+      val data = sparkSession.read.parquet(dataPath).select("randCoefficients").head()
+      val randCoefficients = data.getAs[Seq[Int]](0).grouped(2)
+        .map(tuple => (tuple(0), tuple(1))).toArray
+      val model = new MinHashLSHModel(metadata.uid, randCoefficients)
 
       DefaultParamsReader.getAndSetParams(model, metadata)
       model
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/RandomProjectionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSHSuite.scala
similarity index 66%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/RandomProjectionSuite.scala
rename to mllib/src/test/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSHSuite.scala
index cd82ee2117a07..ab937685a555c 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/RandomProjectionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSHSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.ml.util.TestingUtils._
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.sql.Dataset
 
-class RandomProjectionSuite
+class BucketedRandomProjectionLSHSuite
   extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
 
   @transient var dataset: Dataset[_] = _
@@ -43,70 +43,72 @@ class RandomProjectionSuite
   }
 
   test("params") {
-    ParamsSuite.checkParams(new RandomProjection)
-    val model = new RandomProjectionModel("rp", randUnitVectors = Array(Vectors.dense(1.0, 0.0)))
+    ParamsSuite.checkParams(new BucketedRandomProjectionLSH)
+    val model = new BucketedRandomProjectionLSHModel(
+      "brp", randUnitVectors = Array(Vectors.dense(1.0, 0.0)))
     ParamsSuite.checkParams(model)
   }
 
-  test("RandomProjection: default params") {
-    val rp = new RandomProjection
-    assert(rp.getOutputDim === 1.0)
+  test("BucketedRandomProjectionLSH: default params") {
+    val brp = new BucketedRandomProjectionLSH
+    assert(brp.getNumHashTables === 1.0)
   }
 
   test("read/write") {
-    def checkModelData(model: RandomProjectionModel, model2: RandomProjectionModel): Unit = {
+    def checkModelData(
+      model: BucketedRandomProjectionLSHModel,
+      model2: BucketedRandomProjectionLSHModel): Unit = {
       model.randUnitVectors.zip(model2.randUnitVectors)
         .foreach(pair => assert(pair._1 === pair._2))
     }
-    val mh = new RandomProjection()
+    val mh = new BucketedRandomProjectionLSH()
     val settings = Map("inputCol" -> "keys", "outputCol" -> "values", "bucketLength" -> 1.0)
     testEstimatorAndModelReadWrite(mh, dataset, settings, checkModelData)
   }
 
   test("hashFunction") {
     val randUnitVectors = Array(Vectors.dense(0.0, 1.0), Vectors.dense(1.0, 0.0))
-    val model = new RandomProjectionModel("rp", randUnitVectors)
+    val model = new BucketedRandomProjectionLSHModel("brp", randUnitVectors)
     model.set(model.bucketLength, 0.5)
     val res = model.hashFunction(Vectors.dense(1.23, 4.56))
-    assert(res.equals(Vectors.dense(9.0, 2.0)))
+    assert(res.length == 2)
+    assert(res(0).equals(Vectors.dense(9.0)))
+    assert(res(1).equals(Vectors.dense(2.0)))
   }
 
-  test("keyDistance and hashDistance") {
-    val model = new RandomProjectionModel("rp", Array(Vectors.dense(0.0, 1.0)))
+  test("keyDistance") {
+    val model = new BucketedRandomProjectionLSHModel("brp", Array(Vectors.dense(0.0, 1.0)))
     val keyDist = model.keyDistance(Vectors.dense(1, 2), Vectors.dense(-2, -2))
-    val hashDist = model.hashDistance(Vectors.dense(-5, 5), Vectors.dense(1, 2))
     assert(keyDist === 5)
-    assert(hashDist === 3)
   }
 
-  test("RandomProjection: randUnitVectors") {
-    val rp = new RandomProjection()
-      .setOutputDim(20)
+  test("BucketedRandomProjectionLSH: randUnitVectors") {
+    val brp = new BucketedRandomProjectionLSH()
+      .setNumHashTables(20)
       .setInputCol("keys")
       .setOutputCol("values")
       .setBucketLength(1.0)
       .setSeed(12345)
-    val unitVectors = rp.fit(dataset).randUnitVectors
+    val unitVectors = brp.fit(dataset).randUnitVectors
     unitVectors.foreach { v: Vector =>
       assert(Vectors.norm(v, 2.0) ~== 1.0 absTol 1e-14)
     }
   }
 
-  test("RandomProjection: test of LSH property") {
+  test("BucketedRandomProjectionLSH: test of LSH property") {
     // Project from 2 dimensional Euclidean Space to 1 dimensions
-    val rp = new RandomProjection()
-      .setOutputDim(1)
+    val brp = new BucketedRandomProjectionLSH()
       .setInputCol("keys")
       .setOutputCol("values")
       .setBucketLength(1.0)
       .setSeed(12345)
 
-    val (falsePositive, falseNegative) = LSHTest.calculateLSHProperty(dataset, rp, 8.0, 2.0)
+    val (falsePositive, falseNegative) = LSHTest.calculateLSHProperty(dataset, brp, 8.0, 2.0)
     assert(falsePositive < 0.4)
     assert(falseNegative < 0.4)
   }
 
-  test("RandomProjection with high dimension data: test of LSH property") {
+  test("BucketedRandomProjectionLSH with high dimension data: test of LSH property") {
     val numDim = 100
     val data = {
       for (i <- 0 until numDim; j <- Seq(-2, -1, 1, 2))
@@ -115,30 +117,30 @@ class RandomProjectionSuite
     val df = spark.createDataFrame(data.map(Tuple1.apply)).toDF("keys")
 
     // Project from 100 dimensional Euclidean Space to 10 dimensions
-    val rp = new RandomProjection()
-      .setOutputDim(10)
+    val brp = new BucketedRandomProjectionLSH()
+      .setNumHashTables(10)
       .setInputCol("keys")
       .setOutputCol("values")
       .setBucketLength(2.5)
       .setSeed(12345)
 
-    val (falsePositive, falseNegative) = LSHTest.calculateLSHProperty(df, rp, 3.0, 2.0)
+    val (falsePositive, falseNegative) = LSHTest.calculateLSHProperty(df, brp, 3.0, 2.0)
     assert(falsePositive < 0.3)
     assert(falseNegative < 0.3)
   }
 
-  test("approxNearestNeighbors for random projection") {
+  test("approxNearestNeighbors for bucketed random projection") {
     val key = Vectors.dense(1.2, 3.4)
 
-    val rp = new RandomProjection()
-      .setOutputDim(2)
+    val brp = new BucketedRandomProjectionLSH()
+      .setNumHashTables(2)
       .setInputCol("keys")
       .setOutputCol("values")
       .setBucketLength(4.0)
       .setSeed(12345)
 
-    val (precision, recall) = LSHTest.calculateApproxNearestNeighbors(rp, dataset, key, 100,
-      singleProbing = true)
+    val (precision, recall) = LSHTest.calculateApproxNearestNeighbors(brp, dataset, key, 100,
+      singleProbe = true)
     assert(precision >= 0.6)
     assert(recall >= 0.6)
   }
@@ -146,33 +148,47 @@ class RandomProjectionSuite
   test("approxNearestNeighbors with multiple probing") {
     val key = Vectors.dense(1.2, 3.4)
 
-    val rp = new RandomProjection()
-      .setOutputDim(20)
+    val brp = new BucketedRandomProjectionLSH()
+      .setNumHashTables(20)
       .setInputCol("keys")
       .setOutputCol("values")
       .setBucketLength(1.0)
       .setSeed(12345)
 
-    val (precision, recall) = LSHTest.calculateApproxNearestNeighbors(rp, dataset, key, 100,
-      singleProbing = false)
+    val (precision, recall) = LSHTest.calculateApproxNearestNeighbors(brp, dataset, key, 100,
+      singleProbe = false)
     assert(precision >= 0.7)
     assert(recall >= 0.7)
   }
 
-  test("approxSimilarityJoin for random projection on different dataset") {
+  test("approxNearestNeighbors for numNeighbors <= 0") {
+    val key = Vectors.dense(1.2, 3.4)
+
+    val model = new BucketedRandomProjectionLSHModel(
+      "brp", randUnitVectors = Array(Vectors.dense(1.0, 0.0)))
+
+    intercept[IllegalArgumentException] {
+      model.approxNearestNeighbors(dataset, key, 0)
+    }
+    intercept[IllegalArgumentException] {
+      model.approxNearestNeighbors(dataset, key, -1)
+    }
+  }
+
+  test("approxSimilarityJoin for bucketed random projection on different dataset") {
     val data2 = {
       for (i <- 0 until 24) yield Vectors.dense(10 * sin(Pi / 12 * i), 10 * cos(Pi / 12 * i))
     }
     val dataset2 = spark.createDataFrame(data2.map(Tuple1.apply)).toDF("keys")
 
-    val rp = new RandomProjection()
-      .setOutputDim(2)
+    val brp = new BucketedRandomProjectionLSH()
+      .setNumHashTables(2)
       .setInputCol("keys")
       .setOutputCol("values")
       .setBucketLength(4.0)
       .setSeed(12345)
 
-    val (precision, recall) = LSHTest.calculateApproxSimilarityJoin(rp, dataset, dataset2, 1.0)
+    val (precision, recall) = LSHTest.calculateApproxSimilarityJoin(brp, dataset, dataset2, 1.0)
     assert(precision == 1.0)
     assert(recall >= 0.7)
   }
@@ -183,14 +199,14 @@ class RandomProjectionSuite
     }
     val df = spark.createDataFrame(data.map(Tuple1.apply)).toDF("keys")
 
-    val rp = new RandomProjection()
-      .setOutputDim(2)
+    val brp = new BucketedRandomProjectionLSH()
+      .setNumHashTables(2)
       .setInputCol("keys")
       .setOutputCol("values")
       .setBucketLength(4.0)
       .setSeed(12345)
 
-    val (precision, recall) = LSHTest.calculateApproxSimilarityJoin(rp, df, df, 3.0)
+    val (precision, recall) = LSHTest.calculateApproxSimilarityJoin(brp, df, df, 3.0)
     assert(precision == 1.0)
     assert(recall >= 0.7)
   }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/LSHTest.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/LSHTest.scala
index 5c025546f332b..a9b559f7ba648 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/LSHTest.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/LSHTest.scala
@@ -58,12 +58,18 @@ private[ml] object LSHTest {
     val outputCol = model.getOutputCol
     val transformedData = model.transform(dataset)
 
-    SchemaUtils.checkColumnType(transformedData.schema, model.getOutputCol, new VectorUDT)
+    // Check output column type
+    SchemaUtils.checkColumnType(
+      transformedData.schema, model.getOutputCol, DataTypes.createArrayType(new VectorUDT))
+
+    // Check output column dimensions
+    val headHashValue = transformedData.select(outputCol).head().get(0).asInstanceOf[Seq[Vector]]
+    assert(headHashValue.length == model.getNumHashTables)
 
     // Perform a cross join and label each pair of same_bucket and distance
     val pairs = transformedData.as("a").crossJoin(transformedData.as("b"))
     val distUDF = udf((x: Vector, y: Vector) => model.keyDistance(x, y), DataTypes.DoubleType)
-    val sameBucket = udf((x: Vector, y: Vector) => model.hashDistance(x, y) == 0.0,
+    val sameBucket = udf((x: Seq[Vector], y: Seq[Vector]) => model.hashDistance(x, y) == 0.0,
       DataTypes.BooleanType)
     val result = pairs
       .withColumn("same_bucket", sameBucket(col(s"a.$outputCol"), col(s"b.$outputCol")))
@@ -83,6 +89,7 @@ private[ml] object LSHTest {
    * @param dataset the dataset to look for the key
    * @param key The key to hash for the item
    * @param k The maximum number of items closest to the key
+   * @param singleProbe True for using single-probe; false for multi-probe
    * @tparam T The class type of lsh
    * @return A tuple of two doubles, representing precision and recall rate
    */
@@ -91,7 +98,7 @@ private[ml] object LSHTest {
       dataset: Dataset[_],
       key: Vector,
       k: Int,
-      singleProbing: Boolean): (Double, Double) = {
+      singleProbe: Boolean): (Double, Double) = {
     val model = lsh.fit(dataset)
 
     // Compute expected
@@ -99,14 +106,14 @@ private[ml] object LSHTest {
     val expected = dataset.sort(distUDF(col(model.getInputCol))).limit(k)
 
     // Compute actual
-    val actual = model.approxNearestNeighbors(dataset, key, k, singleProbing, "distCol")
+    val actual = model.approxNearestNeighbors(dataset, key, k, singleProbe, "distCol")
 
     assert(actual.schema.sameType(model
       .transformSchema(dataset.schema)
       .add("distCol", DataTypes.DoubleType))
     )
 
-    if (!singleProbing) {
+    if (!singleProbe) {
       assert(actual.count() == k)
     }
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/MinHashSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/MinHashLSHSuite.scala
similarity index 60%
rename from mllib/src/test/scala/org/apache/spark/ml/feature/MinHashSuite.scala
rename to mllib/src/test/scala/org/apache/spark/ml/feature/MinHashLSHSuite.scala
index c32ca7d69cf84..3461cdf82460f 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/MinHashSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/MinHashLSHSuite.scala
@@ -24,7 +24,7 @@ import org.apache.spark.ml.util.DefaultReadWriteTest
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.sql.Dataset
 
-class MinHashSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
+class MinHashLSHSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
 
   @transient var dataset: Dataset[_] = _
 
@@ -38,45 +38,51 @@ class MinHashSuite extends SparkFunSuite with MLlibTestSparkContext with Default
   }
 
   test("params") {
-    ParamsSuite.checkParams(new MinHash)
-    val model = new MinHashModel("mh", numEntries = 2, randCoefficients = Array(1))
+    ParamsSuite.checkParams(new MinHashLSH)
+    val model = new MinHashLSHModel("mh", randCoefficients = Array((1, 0)))
     ParamsSuite.checkParams(model)
   }
 
-  test("MinHash: default params") {
-    val rp = new MinHash
-    assert(rp.getOutputDim === 1.0)
+  test("MinHashLSH: default params") {
+    val rp = new MinHashLSH
+    assert(rp.getNumHashTables === 1.0)
   }
 
   test("read/write") {
-    def checkModelData(model: MinHashModel, model2: MinHashModel): Unit = {
-      assert(model.numEntries === model2.numEntries)
+    def checkModelData(model: MinHashLSHModel, model2: MinHashLSHModel): Unit = {
       assertResult(model.randCoefficients)(model2.randCoefficients)
     }
-    val mh = new MinHash()
+    val mh = new MinHashLSH()
     val settings = Map("inputCol" -> "keys", "outputCol" -> "values")
     testEstimatorAndModelReadWrite(mh, dataset, settings, checkModelData)
   }
 
   test("hashFunction") {
-    val model = new MinHashModel("mh", numEntries = 20, randCoefficients = Array(0, 1, 3))
+    val model = new MinHashLSHModel("mh", randCoefficients = Array((0, 1), (1, 2), (3, 0)))
     val res = model.hashFunction(Vectors.sparse(10, Seq((2, 1.0), (3, 1.0), (5, 1.0), (7, 1.0))))
-    assert(res.equals(Vectors.dense(0.0, 3.0, 4.0)))
+    assert(res.length == 3)
+    assert(res(0).equals(Vectors.dense(1.0)))
+    assert(res(1).equals(Vectors.dense(5.0)))
+    assert(res(2).equals(Vectors.dense(9.0)))
   }
 
-  test("keyDistance and hashDistance") {
-    val model = new MinHashModel("mh", numEntries = 20, randCoefficients = Array(1))
+  test("hashFunction: empty vector") {
+    val model = new MinHashLSHModel("mh", randCoefficients = Array((0, 1), (1, 2), (3, 0)))
+    intercept[IllegalArgumentException] {
+      model.hashFunction(Vectors.sparse(10, Seq()))
+    }
+  }
+
+  test("keyDistance") {
+    val model = new MinHashLSHModel("mh", randCoefficients = Array((1, 0)))
     val v1 = Vectors.sparse(10, Seq((2, 1.0), (3, 1.0), (5, 1.0), (7, 1.0)))
     val v2 = Vectors.sparse(10, Seq((1, 1.0), (3, 1.0), (5, 1.0), (7, 1.0), (9, 1.0)))
     val keyDist = model.keyDistance(v1, v2)
-    val hashDist = model.hashDistance(Vectors.dense(-5, 5), Vectors.dense(1, 2))
     assert(keyDist === 0.5)
-    assert(hashDist === 3)
   }
 
-  test("MinHash: test of LSH property") {
-    val mh = new MinHash()
-      .setOutputDim(1)
+  test("MinHashLSH: test of LSH property") {
+    val mh = new MinHashLSH()
       .setInputCol("keys")
       .setOutputCol("values")
       .setSeed(12344)
@@ -86,9 +92,24 @@ class MinHashSuite extends SparkFunSuite with MLlibTestSparkContext with Default
     assert(falseNegative < 0.3)
   }
 
+  test("MinHashLSH: test of inputDim > prime") {
+    val mh = new MinHashLSH()
+      .setInputCol("keys")
+      .setOutputCol("values")
+      .setSeed(12344)
+
+    val data = {
+      for (i <- 0 to 2) yield Vectors.sparse(Int.MaxValue, (i until i + 5).map((_, 1.0)))
+    }
+    val badDataset = spark.createDataFrame(data.map(Tuple1.apply)).toDF("keys")
+    intercept[IllegalArgumentException] {
+      mh.fit(badDataset)
+    }
+  }
+
   test("approxNearestNeighbors for min hash") {
-    val mh = new MinHash()
-      .setOutputDim(20)
+    val mh = new MinHashLSH()
+      .setNumHashTables(20)
       .setInputCol("keys")
       .setOutputCol("values")
       .setSeed(12345)
@@ -97,12 +118,26 @@ class MinHashSuite extends SparkFunSuite with MLlibTestSparkContext with Default
       (0 until 100).filter(_.toString.contains("1")).map((_, 1.0)))
 
     val (precision, recall) = LSHTest.calculateApproxNearestNeighbors(mh, dataset, key, 20,
-      singleProbing = true)
+      singleProbe = true)
     assert(precision >= 0.7)
     assert(recall >= 0.7)
   }
 
-  test("approxSimilarityJoin for minhash on different dataset") {
+  test("approxNearestNeighbors for numNeighbors <= 0") {
+    val model = new MinHashLSHModel("mh", randCoefficients = Array((1, 0)))
+
+    val key: Vector = Vectors.sparse(100,
+      (0 until 100).filter(_.toString.contains("1")).map((_, 1.0)))
+
+    intercept[IllegalArgumentException] {
+      model.approxNearestNeighbors(dataset, key, 0)
+    }
+    intercept[IllegalArgumentException] {
+      model.approxNearestNeighbors(dataset, key, -1)
+    }
+  }
+
+  test("approxSimilarityJoin for min hash on different dataset") {
     val data1 = {
       for (i <- 0 until 20) yield Vectors.sparse(100, (5 * i until 5 * i + 5).map((_, 1.0)))
     }
@@ -113,8 +148,8 @@ class MinHashSuite extends SparkFunSuite with MLlibTestSparkContext with Default
     }
     val df2 = spark.createDataFrame(data2.map(Tuple1.apply)).toDF("keys")
 
-    val mh = new MinHash()
-      .setOutputDim(20)
+    val mh = new MinHashLSH()
+      .setNumHashTables(20)
       .setInputCol("keys")
       .setOutputCol("values")
       .setSeed(12345)

From c46928ff97371421613720a0d8d7f2baaa64bb73 Mon Sep 17 00:00:00 2001
From: Alexander Shorin <kxepal@apache.org>
Date: Mon, 28 Nov 2016 18:28:24 -0800
Subject: [PATCH 0216/1204] [SPARK-18523][PYSPARK] Make SparkContext.stop more
 reliable

## What changes were proposed in this pull request?

This PR fixes SparkContext broken state in which it may fall if spark driver get crashed or killed by OOM.

## How was this patch tested?

1. Start SparkContext;
2. Find Spark driver process and `kill -9` it;
3. Call `sc.stop()`;
4. Create new SparkContext after that;

Without this patch you will crash on step 3 and won't be able to do step 4 without manual reset private attibutes or IPython notebook / shell restart.

Author: Alexander Shorin <kxepal@apache.org>

Closes #15961 from kxepal/18523-make-spark-context-stop-more-reliable.

(cherry picked from commit 71352c94ad2a60d1695bd7ac0f4452539270e10c)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 python/pyspark/context.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 2fd3aee01d76c..5c4e79cb0499e 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -26,6 +26,8 @@
 from threading import RLock
 from tempfile import NamedTemporaryFile
 
+from py4j.protocol import Py4JError
+
 from pyspark import accumulators
 from pyspark.accumulators import Accumulator
 from pyspark.broadcast import Broadcast
@@ -373,8 +375,19 @@ def stop(self):
         Shut down the SparkContext.
         """
         if getattr(self, "_jsc", None):
-            self._jsc.stop()
-            self._jsc = None
+            try:
+                self._jsc.stop()
+            except Py4JError:
+                # Case: SPARK-18523
+                warnings.warn(
+                    'Unable to cleanly shutdown Spark JVM process.'
+                    ' It is possible that the process has crashed,'
+                    ' been killed or may also be in a zombie state.',
+                    RuntimeWarning
+                )
+                pass
+            finally:
+                self._jsc = None
         if getattr(self, "_accumulatorServer", None):
             self._accumulatorServer.shutdown()
             self._accumulatorServer = None

From a0c1c699e3c09027f6daa728a9ea2a8c0cd12d1c Mon Sep 17 00:00:00 2001
From: Shuai Lin <linshuai2012@gmail.com>
Date: Mon, 28 Nov 2016 20:23:48 -0800
Subject: [PATCH 0217/1204] [SPARK-16282][SQL] Follow-up: remove "percentile"
 from temp function detection after implementing it natively

## What changes were proposed in this pull request?

In #15764 we added a mechanism to detect if a function is temporary or not. Hive functions are treated as non-temporary. Of the three hive functions, now "percentile" has been implemented natively, and "hash" has been removed. So we should update the list.

## How was this patch tested?

Unit tests.

Author: Shuai Lin <linshuai2012@gmail.com>

Closes #16049 from lins05/update-temp-function-detect-hive-list.

(cherry picked from commit e64a2047eaf02d65dcf98b6e0710e10196aa74b1)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../apache/spark/sql/catalyst/catalog/SessionCatalog.scala   | 5 +----
 .../spark/sql/catalyst/catalog/SessionCatalogSuite.scala     | 1 -
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 002aecb9bf133..0b6a91fff71fe 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -939,10 +939,7 @@ class SessionCatalog(
    */
   def isTemporaryFunction(name: FunctionIdentifier): Boolean = {
     // copied from HiveSessionCatalog
-    val hiveFunctions = Seq(
-      "hash",
-      "histogram_numeric",
-      "percentile")
+    val hiveFunctions = Seq("histogram_numeric")
 
     // A temporary function is a function that has been registered in functionRegistry
     // without a database name, and is neither a built-in function nor a Hive function
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index da41d3614b784..3f27160d63934 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -981,7 +981,6 @@ class SessionCatalogSuite extends SparkFunSuite {
     assert(FunctionRegistry.builtin.functionExists("sum"))
     assert(!sessionCatalog.isTemporaryFunction(FunctionIdentifier("sum")))
     assert(!sessionCatalog.isTemporaryFunction(FunctionIdentifier("histogram_numeric")))
-    assert(!sessionCatalog.isTemporaryFunction(FunctionIdentifier("percentile")))
   }
 
   test("drop function") {

From 45e2b3c0e4cd5c6e1ce6d9c99950eda726d27250 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Mon, 28 Nov 2016 21:04:20 -0800
Subject: [PATCH 0218/1204] [SPARK-18588][SS][KAFKA] Ignore the flaky kafka
 test

## What changes were proposed in this pull request?

Ignore the flaky test to unblock other PRs while I'm debugging it.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16051 from zsxwing/ignore-flaky-kafka-test.

(cherry picked from commit 1633ff3b6c97e33191859f34c868782cbb0972fd)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
index f9f62581a3066..e1af14f95dfc9 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
@@ -838,7 +838,7 @@ class KafkaSourceStressForDontFailOnDataLossSuite extends StreamTest with Shared
     }
   }
 
-  test("stress test for failOnDataLoss=false") {
+  ignore("stress test for failOnDataLoss=false") {
     val reader = spark
       .readStream
       .format("kafka")

From c4cbdc864f7191ab1d49cdc360fe78ec16f48db5 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Mon, 28 Nov 2016 21:10:57 -0800
Subject: [PATCH 0219/1204] [SPARK-18547][CORE] Propagate I/O encryption key
 when executors register.

This change modifies the method used to propagate encryption keys used during
shuffle. Instead of relying on YARN's UserGroupInformation credential propagation,
this change explicitly distributes the key using the messages exchanged between
driver and executor during registration. When RPC encryption is enabled, this means
key propagation is also secure.

This allows shuffle encryption to work in non-YARN mode, which means that it's
easier to write unit tests for areas of the code that are affected by the feature.

The key is stored in the SecurityManager; because there are many instances of
that class used in the code, the key is only guaranteed to exist in the instance
managed by the SparkEnv. This path was chosen to avoid storing the key in the
SparkConf, which would risk having the key being written to disk as part of the
configuration (as, for example, is done when starting YARN applications).

Tested by new and existing unit tests (which were moved from the YARN module to
core), and by running apps with shuffle encryption enabled.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #15981 from vanzin/SPARK-18547.

(cherry picked from commit 8b325b17ecdf013b7a6edcb7ee3773546bd914df)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../org/apache/spark/SecurityManager.scala    |  23 +--
 .../scala/org/apache/spark/SparkContext.scala |   4 -
 .../scala/org/apache/spark/SparkEnv.scala     |  33 +++--
 .../CoarseGrainedExecutorBackend.scala        |   6 +-
 .../cluster/CoarseGrainedClusterMessage.scala |   7 +-
 .../CoarseGrainedSchedulerBackend.scala       |   6 +-
 .../spark/security/CryptoStreamUtils.scala    |  28 ++--
 .../spark/serializer/SerializerManager.scala  |  18 ++-
 .../security/CryptoStreamUtilsSuite.scala     | 135 ++++++++++--------
 docs/configuration.md                         |   3 +-
 .../spark/executor/MesosExecutorBackend.scala |   2 +-
 .../cluster/mesos/MesosClusterManager.scala   |   4 +
 .../mesos/MesosClusterManagerSuite.scala      |  11 +-
 .../org/apache/spark/deploy/yarn/Client.scala |   5 -
 .../spark/deploy/yarn/IOEncryptionSuite.scala | 108 --------------
 15 files changed, 166 insertions(+), 227 deletions(-)
 delete mode 100644 yarn/src/test/scala/org/apache/spark/deploy/yarn/IOEncryptionSuite.scala

diff --git a/core/src/main/scala/org/apache/spark/SecurityManager.scala b/core/src/main/scala/org/apache/spark/SecurityManager.scala
index 199365ad925a3..87fe56315203e 100644
--- a/core/src/main/scala/org/apache/spark/SecurityManager.scala
+++ b/core/src/main/scala/org/apache/spark/SecurityManager.scala
@@ -21,7 +21,6 @@ import java.lang.{Byte => JByte}
 import java.net.{Authenticator, PasswordAuthentication}
 import java.security.{KeyStore, SecureRandom}
 import java.security.cert.X509Certificate
-import javax.crypto.KeyGenerator
 import javax.net.ssl._
 
 import com.google.common.hash.HashCodes
@@ -33,7 +32,6 @@ import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.network.sasl.SecretKeyHolder
-import org.apache.spark.security.CryptoStreamUtils._
 import org.apache.spark.util.Utils
 
 /**
@@ -185,7 +183,9 @@ import org.apache.spark.util.Utils
  *  setting `spark.ssl.useNodeLocalConf` to `true`.
  */
 
-private[spark] class SecurityManager(sparkConf: SparkConf)
+private[spark] class SecurityManager(
+    sparkConf: SparkConf,
+    ioEncryptionKey: Option[Array[Byte]] = None)
   extends Logging with SecretKeyHolder {
 
   import SecurityManager._
@@ -415,6 +415,8 @@ private[spark] class SecurityManager(sparkConf: SparkConf)
     logInfo("Changing acls enabled to: " + aclsOn)
   }
 
+  def getIOEncryptionKey(): Option[Array[Byte]] = ioEncryptionKey
+
   /**
    * Generates or looks up the secret key.
    *
@@ -559,19 +561,4 @@ private[spark] object SecurityManager {
   // key used to store the spark secret in the Hadoop UGI
   val SECRET_LOOKUP_KEY = "sparkCookie"
 
-  /**
-   * Setup the cryptographic key used by IO encryption in credentials. The key is generated using
-   * [[KeyGenerator]]. The algorithm and key length is specified by the [[SparkConf]].
-   */
-  def initIOEncryptionKey(conf: SparkConf, credentials: Credentials): Unit = {
-    if (credentials.getSecretKey(SPARK_IO_TOKEN) == null) {
-      val keyLen = conf.get(IO_ENCRYPTION_KEY_SIZE_BITS)
-      val ioKeyGenAlgorithm = conf.get(IO_ENCRYPTION_KEYGEN_ALGORITHM)
-      val keyGen = KeyGenerator.getInstance(ioKeyGenAlgorithm)
-      keyGen.init(keyLen)
-
-      val ioKey = keyGen.generateKey()
-      credentials.addSecretKey(SPARK_IO_TOKEN, ioKey.getEncoded)
-    }
-  }
 }
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 1261e3e735761..a159a170ebc50 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -422,10 +422,6 @@ class SparkContext(config: SparkConf) extends Logging {
     }
 
     if (master == "yarn" && deployMode == "client") System.setProperty("SPARK_YARN_MODE", "true")
-    if (_conf.get(IO_ENCRYPTION_ENABLED) && !SparkHadoopUtil.get.isYarnMode()) {
-      throw new SparkException("IO encryption is only supported in YARN mode, please disable it " +
-        s"by setting ${IO_ENCRYPTION_ENABLED.key} to false")
-    }
 
     // "_jobProgressListener" should be set up before creating SparkEnv because when creating
     // "SparkEnv", some messages will be posted to "listenerBus" and we should not miss them.
diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index 1ffeb129880f9..1296386ac9bd3 100644
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -36,6 +36,7 @@ import org.apache.spark.network.netty.NettyBlockTransferService
 import org.apache.spark.rpc.{RpcEndpoint, RpcEndpointRef, RpcEnv}
 import org.apache.spark.scheduler.{LiveListenerBus, OutputCommitCoordinator}
 import org.apache.spark.scheduler.OutputCommitCoordinator.OutputCommitCoordinatorEndpoint
+import org.apache.spark.security.CryptoStreamUtils
 import org.apache.spark.serializer.{JavaSerializer, Serializer, SerializerManager}
 import org.apache.spark.shuffle.ShuffleManager
 import org.apache.spark.storage._
@@ -165,15 +166,20 @@ object SparkEnv extends Logging {
     val bindAddress = conf.get(DRIVER_BIND_ADDRESS)
     val advertiseAddress = conf.get(DRIVER_HOST_ADDRESS)
     val port = conf.get("spark.driver.port").toInt
+    val ioEncryptionKey = if (conf.get(IO_ENCRYPTION_ENABLED)) {
+      Some(CryptoStreamUtils.createKey(conf))
+    } else {
+      None
+    }
     create(
       conf,
       SparkContext.DRIVER_IDENTIFIER,
       bindAddress,
       advertiseAddress,
       port,
-      isDriver = true,
-      isLocal = isLocal,
-      numUsableCores = numCores,
+      isLocal,
+      numCores,
+      ioEncryptionKey,
       listenerBus = listenerBus,
       mockOutputCommitCoordinator = mockOutputCommitCoordinator
     )
@@ -189,6 +195,7 @@ object SparkEnv extends Logging {
       hostname: String,
       port: Int,
       numCores: Int,
+      ioEncryptionKey: Option[Array[Byte]],
       isLocal: Boolean): SparkEnv = {
     val env = create(
       conf,
@@ -196,9 +203,9 @@ object SparkEnv extends Logging {
       hostname,
       hostname,
       port,
-      isDriver = false,
-      isLocal = isLocal,
-      numUsableCores = numCores
+      isLocal,
+      numCores,
+      ioEncryptionKey
     )
     SparkEnv.set(env)
     env
@@ -213,18 +220,26 @@ object SparkEnv extends Logging {
       bindAddress: String,
       advertiseAddress: String,
       port: Int,
-      isDriver: Boolean,
       isLocal: Boolean,
       numUsableCores: Int,
+      ioEncryptionKey: Option[Array[Byte]],
       listenerBus: LiveListenerBus = null,
       mockOutputCommitCoordinator: Option[OutputCommitCoordinator] = None): SparkEnv = {
 
+    val isDriver = executorId == SparkContext.DRIVER_IDENTIFIER
+
     // Listener bus is only used on the driver
     if (isDriver) {
       assert(listenerBus != null, "Attempted to create driver SparkEnv with null listener bus!")
     }
 
-    val securityManager = new SecurityManager(conf)
+    val securityManager = new SecurityManager(conf, ioEncryptionKey)
+    ioEncryptionKey.foreach { _ =>
+      if (!securityManager.isSaslEncryptionEnabled()) {
+        logWarning("I/O encryption enabled without RPC encryption: keys will be visible on the " +
+          "wire.")
+      }
+    }
 
     val systemName = if (isDriver) driverSystemName else executorSystemName
     val rpcEnv = RpcEnv.create(systemName, bindAddress, advertiseAddress, port, conf,
@@ -270,7 +285,7 @@ object SparkEnv extends Logging {
       "spark.serializer", "org.apache.spark.serializer.JavaSerializer")
     logDebug(s"Using serializer: ${serializer.getClass}")
 
-    val serializerManager = new SerializerManager(serializer, conf)
+    val serializerManager = new SerializerManager(serializer, conf, ioEncryptionKey)
 
     val closureSerializer = new JavaSerializer(conf)
 
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index 7eec4ae64f296..92a27902c6696 100644
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -200,8 +200,8 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
         new SecurityManager(executorConf),
         clientMode = true)
       val driver = fetcher.setupEndpointRefByURI(driverUrl)
-      val props = driver.askWithRetry[Seq[(String, String)]](RetrieveSparkProps) ++
-        Seq[(String, String)](("spark.app.id", appId))
+      val cfg = driver.askWithRetry[SparkAppConfig](RetrieveSparkAppConfig)
+      val props = cfg.sparkProperties ++ Seq[(String, String)](("spark.app.id", appId))
       fetcher.shutdown()
 
       // Create SparkEnv using properties we fetched from the driver.
@@ -221,7 +221,7 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
       }
 
       val env = SparkEnv.createExecutorEnv(
-        driverConf, executorId, hostname, port, cores, isLocal = false)
+        driverConf, executorId, hostname, port, cores, cfg.ioEncryptionKey, isLocal = false)
 
       env.rpcEnv.setupEndpoint("Executor", new CoarseGrainedExecutorBackend(
         env.rpcEnv, driverUrl, executorId, hostname, cores, userClassPath, env))
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
index edc8aac5d1515..0a4f19d76073e 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
@@ -28,7 +28,12 @@ private[spark] sealed trait CoarseGrainedClusterMessage extends Serializable
 
 private[spark] object CoarseGrainedClusterMessages {
 
-  case object RetrieveSparkProps extends CoarseGrainedClusterMessage
+  case object RetrieveSparkAppConfig extends CoarseGrainedClusterMessage
+
+  case class SparkAppConfig(
+      sparkProperties: Seq[(String, String)],
+      ioEncryptionKey: Option[Array[Byte]])
+    extends CoarseGrainedClusterMessage
 
   case object RetrieveLastAllocatedExecutorId extends CoarseGrainedClusterMessage
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index 10d55c87fb8de..3452487e72e88 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -206,8 +206,10 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
         removeExecutor(executorId, reason)
         context.reply(true)
 
-      case RetrieveSparkProps =>
-        context.reply(sparkProperties)
+      case RetrieveSparkAppConfig =>
+        val reply = SparkAppConfig(sparkProperties,
+          SparkEnv.get.securityManager.getIOEncryptionKey())
+        context.reply(reply)
     }
 
     // Make fake resource offers on all executors
diff --git a/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala b/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala
index f41fc38be2080..8e3436f13480d 100644
--- a/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala
+++ b/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala
@@ -18,14 +18,13 @@ package org.apache.spark.security
 
 import java.io.{InputStream, OutputStream}
 import java.util.Properties
+import javax.crypto.KeyGenerator
 import javax.crypto.spec.{IvParameterSpec, SecretKeySpec}
 
 import org.apache.commons.crypto.random._
 import org.apache.commons.crypto.stream._
-import org.apache.hadoop.io.Text
 
 import org.apache.spark.SparkConf
-import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 
@@ -33,10 +32,6 @@ import org.apache.spark.internal.config._
  * A util class for manipulating IO encryption and decryption streams.
  */
 private[spark] object CryptoStreamUtils extends Logging {
-  /**
-   * Constants and variables for spark IO encryption
-   */
-  val SPARK_IO_TOKEN = new Text("SPARK_IO_TOKEN")
 
   // The initialization vector length in bytes.
   val IV_LENGTH_IN_BYTES = 16
@@ -50,12 +45,11 @@ private[spark] object CryptoStreamUtils extends Logging {
    */
   def createCryptoOutputStream(
       os: OutputStream,
-      sparkConf: SparkConf): OutputStream = {
+      sparkConf: SparkConf,
+      key: Array[Byte]): OutputStream = {
     val properties = toCryptoConf(sparkConf)
     val iv = createInitializationVector(properties)
     os.write(iv)
-    val credentials = SparkHadoopUtil.get.getCurrentUserCredentials()
-    val key = credentials.getSecretKey(SPARK_IO_TOKEN)
     val transformationStr = sparkConf.get(IO_CRYPTO_CIPHER_TRANSFORMATION)
     new CryptoOutputStream(transformationStr, properties, os,
       new SecretKeySpec(key, "AES"), new IvParameterSpec(iv))
@@ -66,12 +60,11 @@ private[spark] object CryptoStreamUtils extends Logging {
    */
   def createCryptoInputStream(
       is: InputStream,
-      sparkConf: SparkConf): InputStream = {
+      sparkConf: SparkConf,
+      key: Array[Byte]): InputStream = {
     val properties = toCryptoConf(sparkConf)
     val iv = new Array[Byte](IV_LENGTH_IN_BYTES)
     is.read(iv, 0, iv.length)
-    val credentials = SparkHadoopUtil.get.getCurrentUserCredentials()
-    val key = credentials.getSecretKey(SPARK_IO_TOKEN)
     val transformationStr = sparkConf.get(IO_CRYPTO_CIPHER_TRANSFORMATION)
     new CryptoInputStream(transformationStr, properties, is,
       new SecretKeySpec(key, "AES"), new IvParameterSpec(iv))
@@ -91,6 +84,17 @@ private[spark] object CryptoStreamUtils extends Logging {
     props
   }
 
+  /**
+   * Creates a new encryption key.
+   */
+  def createKey(conf: SparkConf): Array[Byte] = {
+    val keyLen = conf.get(IO_ENCRYPTION_KEY_SIZE_BITS)
+    val ioKeyGenAlgorithm = conf.get(IO_ENCRYPTION_KEYGEN_ALGORITHM)
+    val keyGen = KeyGenerator.getInstance(ioKeyGenAlgorithm)
+    keyGen.init(keyLen)
+    keyGen.generateKey().getEncoded()
+  }
+
   /**
    * This method to generate an IV (Initialization Vector) using secure random.
    */
diff --git a/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
index 2156d576f1874..ef8432ec0834a 100644
--- a/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
@@ -33,7 +33,12 @@ import org.apache.spark.util.io.{ChunkedByteBuffer, ChunkedByteBufferOutputStrea
  * Component which configures serialization, compression and encryption for various Spark
  * components, including automatic selection of which [[Serializer]] to use for shuffles.
  */
-private[spark] class SerializerManager(defaultSerializer: Serializer, conf: SparkConf) {
+private[spark] class SerializerManager(
+    defaultSerializer: Serializer,
+    conf: SparkConf,
+    encryptionKey: Option[Array[Byte]]) {
+
+  def this(defaultSerializer: Serializer, conf: SparkConf) = this(defaultSerializer, conf, None)
 
   private[this] val kryoSerializer = new KryoSerializer(conf)
 
@@ -63,9 +68,6 @@ private[spark] class SerializerManager(defaultSerializer: Serializer, conf: Spar
   // Whether to compress shuffle output temporarily spilled to disk
   private[this] val compressShuffleSpill = conf.getBoolean("spark.shuffle.spill.compress", true)
 
-  // Whether to enable IO encryption
-  private[this] val enableIOEncryption = conf.get(IO_ENCRYPTION_ENABLED)
-
   /* The compression codec to use. Note that the "lazy" val is necessary because we want to delay
    * the initialization of the compression codec until it is first used. The reason is that a Spark
    * program could be using a user-defined codec in a third party jar, which is loaded in
@@ -125,14 +127,18 @@ private[spark] class SerializerManager(defaultSerializer: Serializer, conf: Spar
    * Wrap an input stream for encryption if shuffle encryption is enabled
    */
   private[this] def wrapForEncryption(s: InputStream): InputStream = {
-    if (enableIOEncryption) CryptoStreamUtils.createCryptoInputStream(s, conf) else s
+    encryptionKey
+      .map { key => CryptoStreamUtils.createCryptoInputStream(s, conf, key) }
+      .getOrElse(s)
   }
 
   /**
    * Wrap an output stream for encryption if shuffle encryption is enabled
    */
   private[this] def wrapForEncryption(s: OutputStream): OutputStream = {
-    if (enableIOEncryption) CryptoStreamUtils.createCryptoOutputStream(s, conf) else s
+    encryptionKey
+      .map { key => CryptoStreamUtils.createCryptoOutputStream(s, conf, key) }
+      .getOrElse(s)
   }
 
   /**
diff --git a/core/src/test/scala/org/apache/spark/security/CryptoStreamUtilsSuite.scala b/core/src/test/scala/org/apache/spark/security/CryptoStreamUtilsSuite.scala
index 81eb907ac7ba6..a61ec74c7df8b 100644
--- a/core/src/test/scala/org/apache/spark/security/CryptoStreamUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/security/CryptoStreamUtilsSuite.scala
@@ -16,18 +16,21 @@
  */
 package org.apache.spark.security
 
-import java.security.PrivilegedExceptionAction
+import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
+import java.nio.charset.StandardCharsets.UTF_8
+import java.util.UUID
 
-import org.apache.hadoop.security.{Credentials, UserGroupInformation}
+import com.google.common.io.ByteStreams
 
-import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
+import org.apache.spark._
 import org.apache.spark.internal.config._
 import org.apache.spark.security.CryptoStreamUtils._
+import org.apache.spark.serializer.{JavaSerializer, SerializerManager}
+import org.apache.spark.storage.TempShuffleBlockId
 
 class CryptoStreamUtilsSuite extends SparkFunSuite {
-  val ugi = UserGroupInformation.createUserForTesting("testuser", Array("testgroup"))
 
-  test("Crypto configuration conversion") {
+  test("crypto configuration conversion") {
     val sparkKey1 = s"${SPARK_IO_ENCRYPTION_COMMONS_CONFIG_PREFIX}a.b.c"
     val sparkVal1 = "val1"
     val cryptoKey1 = s"${COMMONS_CRYPTO_CONF_PREFIX}a.b.c"
@@ -43,65 +46,85 @@ class CryptoStreamUtilsSuite extends SparkFunSuite {
     assert(!props.containsKey(cryptoKey2))
   }
 
-  test("Shuffle encryption is disabled by default") {
-    ugi.doAs(new PrivilegedExceptionAction[Unit]() {
-      override def run(): Unit = {
-        val credentials = UserGroupInformation.getCurrentUser.getCredentials()
-        val conf = new SparkConf()
-        initCredentials(conf, credentials)
-        assert(credentials.getSecretKey(SPARK_IO_TOKEN) === null)
-      }
-    })
+  test("shuffle encryption key length should be 128 by default") {
+    val conf = createConf()
+    var key = CryptoStreamUtils.createKey(conf)
+    val actual = key.length * (java.lang.Byte.SIZE)
+    assert(actual === 128)
   }
 
-  test("Shuffle encryption key length should be 128 by default") {
-    ugi.doAs(new PrivilegedExceptionAction[Unit]() {
-      override def run(): Unit = {
-        val credentials = UserGroupInformation.getCurrentUser.getCredentials()
-        val conf = new SparkConf()
-        conf.set(IO_ENCRYPTION_ENABLED, true)
-        initCredentials(conf, credentials)
-        var key = credentials.getSecretKey(SPARK_IO_TOKEN)
-        assert(key !== null)
-        val actual = key.length * (java.lang.Byte.SIZE)
-        assert(actual === 128)
-      }
-    })
+  test("create 256-bit key") {
+    val conf = createConf(IO_ENCRYPTION_KEY_SIZE_BITS.key -> "256")
+    var key = CryptoStreamUtils.createKey(conf)
+    val actual = key.length * (java.lang.Byte.SIZE)
+    assert(actual === 256)
   }
 
-  test("Initial credentials with key length in 256") {
-    ugi.doAs(new PrivilegedExceptionAction[Unit]() {
-      override def run(): Unit = {
-        val credentials = UserGroupInformation.getCurrentUser.getCredentials()
-        val conf = new SparkConf()
-        conf.set(IO_ENCRYPTION_KEY_SIZE_BITS, 256)
-        conf.set(IO_ENCRYPTION_ENABLED, true)
-        initCredentials(conf, credentials)
-        var key = credentials.getSecretKey(SPARK_IO_TOKEN)
-        assert(key !== null)
-        val actual = key.length * (java.lang.Byte.SIZE)
-        assert(actual === 256)
-      }
-    })
+  test("create key with invalid length") {
+    intercept[IllegalArgumentException] {
+      val conf = createConf(IO_ENCRYPTION_KEY_SIZE_BITS.key -> "328")
+      CryptoStreamUtils.createKey(conf)
+    }
   }
 
-  test("Initial credentials with invalid key length") {
-    ugi.doAs(new PrivilegedExceptionAction[Unit]() {
-      override def run(): Unit = {
-        val credentials = UserGroupInformation.getCurrentUser.getCredentials()
-        val conf = new SparkConf()
-        conf.set(IO_ENCRYPTION_KEY_SIZE_BITS, 328)
-        conf.set(IO_ENCRYPTION_ENABLED, true)
-        val thrown = intercept[IllegalArgumentException] {
-          initCredentials(conf, credentials)
-        }
-      }
-    })
+  test("serializer manager integration") {
+    val conf = createConf()
+      .set("spark.shuffle.compress", "true")
+      .set("spark.shuffle.spill.compress", "true")
+
+    val plainStr = "hello world"
+    val blockId = new TempShuffleBlockId(UUID.randomUUID())
+    val key = Some(CryptoStreamUtils.createKey(conf))
+    val serializerManager = new SerializerManager(new JavaSerializer(conf), conf,
+      encryptionKey = key)
+
+    val outputStream = new ByteArrayOutputStream()
+    val wrappedOutputStream = serializerManager.wrapStream(blockId, outputStream)
+    wrappedOutputStream.write(plainStr.getBytes(UTF_8))
+    wrappedOutputStream.close()
+
+    val encryptedBytes = outputStream.toByteArray
+    val encryptedStr = new String(encryptedBytes, UTF_8)
+    assert(plainStr !== encryptedStr)
+
+    val inputStream = new ByteArrayInputStream(encryptedBytes)
+    val wrappedInputStream = serializerManager.wrapStream(blockId, inputStream)
+    val decryptedBytes = ByteStreams.toByteArray(wrappedInputStream)
+    val decryptedStr = new String(decryptedBytes, UTF_8)
+    assert(decryptedStr === plainStr)
   }
 
-  private[this] def initCredentials(conf: SparkConf, credentials: Credentials): Unit = {
-    if (conf.get(IO_ENCRYPTION_ENABLED)) {
-      SecurityManager.initIOEncryptionKey(conf, credentials)
+  test("encryption key propagation to executors") {
+    val conf = createConf().setAppName("Crypto Test").setMaster("local-cluster[1,1,1024]")
+    val sc = new SparkContext(conf)
+    try {
+      val content = "This is the content to be encrypted."
+      val encrypted = sc.parallelize(Seq(1))
+        .map { str =>
+          val bytes = new ByteArrayOutputStream()
+          val out = CryptoStreamUtils.createCryptoOutputStream(bytes, SparkEnv.get.conf,
+            SparkEnv.get.securityManager.getIOEncryptionKey().get)
+          out.write(content.getBytes(UTF_8))
+          out.close()
+          bytes.toByteArray()
+        }.collect()(0)
+
+      assert(content != encrypted)
+
+      val in = CryptoStreamUtils.createCryptoInputStream(new ByteArrayInputStream(encrypted),
+        sc.conf, SparkEnv.get.securityManager.getIOEncryptionKey().get)
+      val decrypted = new String(ByteStreams.toByteArray(in), UTF_8)
+      assert(content === decrypted)
+    } finally {
+      sc.stop()
     }
   }
+
+  private def createConf(extra: (String, String)*): SparkConf = {
+    val conf = new SparkConf()
+    extra.foreach { case (k, v) => conf.set(k, v) }
+    conf.set(IO_ENCRYPTION_ENABLED, true)
+    conf
+  }
+
 }
diff --git a/docs/configuration.md b/docs/configuration.md
index c2329b411fc69..a6ba6cf6ee7aa 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -572,7 +572,8 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.io.encryption.enabled</code></td>
   <td>false</td>
   <td>
-    Enable IO encryption. Only supported in YARN mode.
+    Enable IO encryption. Currently supported by all modes except Mesos. It's recommended that RPC encryption
+    be enabled when using this feature.
   </td>
 </tr>
 <tr>
diff --git a/mesos/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala b/mesos/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
index 1937bd30bac51..ee9149ce0208b 100644
--- a/mesos/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
+++ b/mesos/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
@@ -75,7 +75,7 @@ private[spark] class MesosExecutorBackend
     val conf = new SparkConf(loadDefaults = true).setAll(properties)
     val port = conf.getInt("spark.executor.port", 0)
     val env = SparkEnv.createExecutorEnv(
-      conf, executorId, slaveInfo.getHostname, port, cpusPerTask, isLocal = false)
+      conf, executorId, slaveInfo.getHostname, port, cpusPerTask, None, isLocal = false)
 
     executor = new Executor(
       executorId,
diff --git a/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterManager.scala b/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterManager.scala
index a849c4afa24f5..ed29b346ba263 100644
--- a/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterManager.scala
+++ b/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterManager.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.scheduler.cluster.mesos
 
 import org.apache.spark.{SparkContext, SparkException}
+import org.apache.spark.internal.config._
 import org.apache.spark.scheduler.{ExternalClusterManager, SchedulerBackend, TaskScheduler, TaskSchedulerImpl}
 
 /**
@@ -37,6 +38,9 @@ private[spark] class MesosClusterManager extends ExternalClusterManager {
   override def createSchedulerBackend(sc: SparkContext,
       masterURL: String,
       scheduler: TaskScheduler): SchedulerBackend = {
+    require(!sc.conf.get(IO_ENCRYPTION_ENABLED),
+      "I/O encryption is currently not supported in Mesos.")
+
     val mesosUrl = MESOS_REGEX.findFirstMatchIn(masterURL).get.group(1)
     val coarse = sc.conf.getBoolean("spark.mesos.coarse", defaultValue = true)
     if (coarse) {
diff --git a/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterManagerSuite.scala b/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterManagerSuite.scala
index 6fce06632c57e..a55855428b471 100644
--- a/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterManagerSuite.scala
+++ b/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterManagerSuite.scala
@@ -17,7 +17,8 @@
 
 package org.apache.spark.scheduler.cluster.mesos
 
-import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkFunSuite}
+import org.apache.spark._
+import org.apache.spark.internal.config._
 
 class MesosClusterManagerSuite extends SparkFunSuite with LocalSparkContext {
     def testURL(masterURL: String, expectedClass: Class[_], coarse: Boolean) {
@@ -44,4 +45,12 @@ class MesosClusterManagerSuite extends SparkFunSuite with LocalSparkContext {
           classOf[MesosFineGrainedSchedulerBackend],
           coarse = false)
     }
+
+    test("mesos with i/o encryption throws error") {
+      val se = intercept[SparkException] {
+        val conf = new SparkConf().setAppName("test").set(IO_ENCRYPTION_ENABLED, true)
+        sc = new SparkContext("mesos", "test", conf)
+      }
+      assert(se.getCause().isInstanceOf[IllegalArgumentException])
+    }
 }
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index e77fa386dc933..2c7d9d6b3ed02 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -1013,12 +1013,7 @@ private[spark] class Client(
     val securityManager = new SecurityManager(sparkConf)
     amContainer.setApplicationACLs(
       YarnSparkHadoopUtil.getApplicationAclsForYarn(securityManager).asJava)
-
-    if (sparkConf.get(IO_ENCRYPTION_ENABLED)) {
-      SecurityManager.initIOEncryptionKey(sparkConf, credentials)
-    }
     setupSecurityToken(amContainer)
-
     amContainer
   }
 
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/IOEncryptionSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/IOEncryptionSuite.scala
deleted file mode 100644
index 1c60315b21ae8..0000000000000
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/IOEncryptionSuite.scala
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.deploy.yarn
-
-import java.io._
-import java.nio.charset.StandardCharsets
-import java.security.PrivilegedExceptionAction
-import java.util.UUID
-
-import org.apache.hadoop.security.{Credentials, UserGroupInformation}
-import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, Matchers}
-
-import org.apache.spark._
-import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.internal.config._
-import org.apache.spark.serializer._
-import org.apache.spark.storage._
-
-class IOEncryptionSuite extends SparkFunSuite with Matchers with BeforeAndAfterAll
-  with BeforeAndAfterEach {
-  private[this] val blockId = new TempShuffleBlockId(UUID.randomUUID())
-  private[this] val conf = new SparkConf()
-  private[this] val ugi = UserGroupInformation.createUserForTesting("testuser", Array("testgroup"))
-  private[this] val serializer = new KryoSerializer(conf)
-
-  override def beforeAll(): Unit = {
-    System.setProperty("SPARK_YARN_MODE", "true")
-    ugi.doAs(new PrivilegedExceptionAction[Unit]() {
-      override def run(): Unit = {
-        conf.set(IO_ENCRYPTION_ENABLED, true)
-        val creds = new Credentials()
-        SecurityManager.initIOEncryptionKey(conf, creds)
-        SparkHadoopUtil.get.addCurrentUserCredentials(creds)
-      }
-    })
-  }
-
-  override def afterAll(): Unit = {
-    SparkEnv.set(null)
-    System.clearProperty("SPARK_YARN_MODE")
-  }
-
-  override def beforeEach(): Unit = {
-    super.beforeEach()
-  }
-
-  override def afterEach(): Unit = {
-    super.afterEach()
-    conf.set("spark.shuffle.compress", false.toString)
-    conf.set("spark.shuffle.spill.compress", false.toString)
-  }
-
-  test("IO encryption read and write") {
-    ugi.doAs(new PrivilegedExceptionAction[Unit] {
-      override def run(): Unit = {
-        conf.set(IO_ENCRYPTION_ENABLED, true)
-        conf.set("spark.shuffle.compress", false.toString)
-        conf.set("spark.shuffle.spill.compress", false.toString)
-        testYarnIOEncryptionWriteRead()
-      }
-    })
-  }
-
-  test("IO encryption read and write with shuffle compression enabled") {
-    ugi.doAs(new PrivilegedExceptionAction[Unit] {
-      override def run(): Unit = {
-        conf.set(IO_ENCRYPTION_ENABLED, true)
-        conf.set("spark.shuffle.compress", true.toString)
-        conf.set("spark.shuffle.spill.compress", true.toString)
-        testYarnIOEncryptionWriteRead()
-      }
-    })
-  }
-
-  private[this] def testYarnIOEncryptionWriteRead(): Unit = {
-    val plainStr = "hello world"
-    val outputStream = new ByteArrayOutputStream()
-    val serializerManager = new SerializerManager(serializer, conf)
-    val wrappedOutputStream = serializerManager.wrapStream(blockId, outputStream)
-    wrappedOutputStream.write(plainStr.getBytes(StandardCharsets.UTF_8))
-    wrappedOutputStream.close()
-
-    val encryptedBytes = outputStream.toByteArray
-    val encryptedStr = new String(encryptedBytes)
-    assert(plainStr !== encryptedStr)
-
-    val inputStream = new ByteArrayInputStream(encryptedBytes)
-    val wrappedInputStream = serializerManager.wrapStream(blockId, inputStream)
-    val decryptedBytes = new Array[Byte](1024)
-    val len = wrappedInputStream.read(decryptedBytes)
-    val decryptedStr = new String(decryptedBytes, 0, len, StandardCharsets.UTF_8)
-    assert(decryptedStr === plainStr)
-  }
-}

From 1759cf69aa1a7059a5fe78d012a54bc0ba02677c Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@databricks.com>
Date: Mon, 28 Nov 2016 21:43:33 -0800
Subject: [PATCH 0220/1204] [SPARK-18058][SQL][TRIVIAL] Use
 dataType.sameResult(...) instead equality on asNullable datatypes

## What changes were proposed in this pull request?
This is absolutely minor. PR https://github.com/apache/spark/pull/15595 uses `dt1.asNullable == dt2.asNullable` expressions in a few places. It is however more efficient to call `dt1.sameType(dt2)`. I have replaced every instance of the first pattern with the second pattern (3/5 were introduced by #15595).

## How was this patch tested?
Existing tests.

Author: Herman van Hovell <hvanhovell@databricks.com>

Closes #16041 from hvanhovell/SPARK-18058.

(cherry picked from commit d449988b8819775fcfd27da53bb5143a7aab01f7)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../apache/spark/sql/catalyst/analysis/CheckAnalysis.scala  | 2 +-
 .../sql/catalyst/expressions/conditionalExpressions.scala   | 2 +-
 .../sql/catalyst/plans/logical/basicLogicalOperators.scala  | 6 +++---
 .../sql/execution/datasources/DataSourceStrategy.scala      | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 26d26385904f6..db417526ed5b9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -306,7 +306,7 @@ trait CheckAnalysis extends PredicateHelper {
               // Check if the data types match.
               dataTypes(child).zip(ref).zipWithIndex.foreach { case ((dt1, dt2), ci) =>
                 // SPARK-18058: we shall not care about the nullability of columns
-                if (dt1.asNullable != dt2.asNullable) {
+                if (!dt1.sameType(dt2)) {
                   failAnalysis(
                     s"""
                       |${operator.nodeName} can only be performed on tables with the compatible
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
index a7d9e2dfcdb62..afc190e6978d4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
@@ -41,7 +41,7 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi
     if (predicate.dataType != BooleanType) {
       TypeCheckResult.TypeCheckFailure(
         s"type of predicate expression in If should be boolean, not ${predicate.dataType}")
-    } else if (trueValue.dataType.asNullable != falseValue.dataType.asNullable) {
+    } else if (!trueValue.dataType.sameType(falseValue.dataType)) {
       TypeCheckResult.TypeCheckFailure(s"differing types in '$sql' " +
         s"(${trueValue.dataType.simpleString} and ${falseValue.dataType.simpleString}).")
     } else {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index dd6c8fd1dcf3e..da42df3366307 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -135,7 +135,7 @@ abstract class SetOperation(left: LogicalPlan, right: LogicalPlan) extends Binar
     childrenResolved &&
       left.output.length == right.output.length &&
       left.output.zip(right.output).forall { case (l, r) =>
-        l.dataType.asNullable == r.dataType.asNullable
+        l.dataType.sameType(r.dataType)
       } && duplicateResolved
 }
 
@@ -212,8 +212,8 @@ case class Union(children: Seq[LogicalPlan]) extends LogicalPlan {
         child.output.length == children.head.output.length &&
         // compare the data types with the first child
         child.output.zip(children.head.output).forall {
-          case (l, r) => l.dataType.asNullable == r.dataType.asNullable }
-      )
+          case (l, r) => l.dataType.sameType(r.dataType)
+        })
     children.length > 1 && childrenResolved && allChildrenCompatible
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index 4f19a2d00b0e4..f3d92bf7cc245 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -163,7 +163,7 @@ case class DataSourceAnalysis(conf: CatalystConf) extends Rule[LogicalPlan] {
 
     case i @ logical.InsertIntoTable(
            l @ LogicalRelation(t: HadoopFsRelation, _, table), part, query, overwrite, false)
-        if query.resolved && t.schema.asNullable == query.schema.asNullable =>
+        if query.resolved && t.schema.sameType(query.schema) =>
 
       // Sanity checks
       if (t.location.rootPaths.size != 1) {

From 27a1a5c99ff471ee15b56995d56cfd39b3ffe6e8 Mon Sep 17 00:00:00 2001
From: Eric Liang <ekl@databricks.com>
Date: Mon, 28 Nov 2016 21:58:01 -0800
Subject: [PATCH 0221/1204] [SPARK-18544][SQL] Append with df.saveAsTable
 writes data to wrong location

## What changes were proposed in this pull request?

We failed to properly propagate table metadata for existing tables for the saveAsTable command. This caused a downstream component to think the table was MANAGED, writing data to the wrong location.

## How was this patch tested?

Unit test that fails before the patch.

Author: Eric Liang <ekl@databricks.com>

Closes #15983 from ericl/spark-18544.

(cherry picked from commit e2318ede04fa7a756d1c8151775e1f2406a176ca)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../apache/spark/sql/DataFrameWriter.scala    | 21 ++++++++++++-------
 .../command/createDataSourceTables.scala      |  3 ++-
 .../PartitionProviderCompatibilitySuite.scala | 19 +++++++++++++++++
 3 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 2d863422fbabe..8294e4149b1c4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -373,8 +373,19 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
         throw new AnalysisException(s"Table $tableIdent already exists.")
 
       case _ =>
-        val storage = DataSource.buildStorageFormatFromOptions(extraOptions.toMap)
-        val tableType = if (storage.locationUri.isDefined) {
+        val existingTable = if (tableExists) {
+          Some(df.sparkSession.sessionState.catalog.getTableMetadata(tableIdent))
+        } else {
+          None
+        }
+        val storage = if (tableExists) {
+          existingTable.get.storage
+        } else {
+          DataSource.buildStorageFormatFromOptions(extraOptions.toMap)
+        }
+        val tableType = if (tableExists) {
+          existingTable.get.tableType
+        } else if (storage.locationUri.isDefined) {
           CatalogTableType.EXTERNAL
         } else {
           CatalogTableType.MANAGED
@@ -391,12 +402,6 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
         )
         df.sparkSession.sessionState.executePlan(
           CreateTable(tableDesc, mode, Some(df.logicalPlan))).toRdd
-        if (tableDesc.partitionColumnNames.nonEmpty &&
-            df.sparkSession.sqlContext.conf.manageFilesourcePartitions) {
-          // Need to recover partitions into the metastore so our saved data is visible.
-          df.sparkSession.sessionState.executePlan(
-            AlterTableRecoverPartitionsCommand(tableDesc.identifier)).toRdd
-        }
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
index add732c1afc16..422700c89194a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
@@ -212,7 +212,8 @@ case class CreateDataSourceTableAsSelectCommand(
       className = provider,
       partitionColumns = table.partitionColumnNames,
       bucketSpec = table.bucketSpec,
-      options = table.storage.properties ++ pathOption)
+      options = table.storage.properties ++ pathOption,
+      catalogTable = Some(table))
 
     val result = try {
       dataSource.write(mode, df)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
index a1aa07456fd36..cace5fa95cad0 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
@@ -188,6 +188,25 @@ class PartitionProviderCompatibilitySuite
     }
   }
 
+  for (enabled <- Seq(true, false)) {
+    test(s"SPARK-18544 append with saveAsTable - partition management $enabled") {
+      withSQLConf(SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key -> enabled.toString) {
+        withTable("test") {
+          withTempDir { dir =>
+            setupPartitionedDatasourceTable("test", dir)
+            if (enabled) {
+              spark.sql("msck repair table test")
+            }
+            assert(spark.sql("select * from test").count() == 5)
+            spark.range(10).selectExpr("id as fieldOne", "id as partCol")
+              .write.partitionBy("partCol").mode("append").saveAsTable("test")
+            assert(spark.sql("select * from test").count() == 15)
+          }
+        }
+      }
+    }
+  }
+
   /**
    * Runs a test against a multi-level partitioned table, then validates that the custom locations
    * were respected by the output writer.

From ea6957da20d3e03b95342a03a188c7ab5880cac7 Mon Sep 17 00:00:00 2001
From: Tyson Condie <tcondie@gmail.com>
Date: Mon, 28 Nov 2016 23:07:17 -0800
Subject: [PATCH 0222/1204] [SPARK-18339][SPARK-18513][SQL] Don't push down
 current_timestamp for filters in StructuredStreaming and persist batch and
 watermark timestamps to offset log.

## What changes were proposed in this pull request?

For the following workflow:
1. I have a column called time which is at minute level precision in a Streaming DataFrame
2. I want to perform groupBy time, count
3. Then I want my MemorySink to only have the last 30 minutes of counts and I perform this by
.where('time >= current_timestamp().cast("long") - 30 * 60)
what happens is that the `filter` gets pushed down before the aggregation, and the filter happens on the source data for the aggregation instead of the result of the aggregation (where I actually want to filter).
I guess the main issue here is that `current_timestamp` is non-deterministic in the streaming context and shouldn't be pushed down the filter.
Does this require us to store the `current_timestamp` for each trigger of the streaming job, that is something to discuss.

Furthermore, we want to persist current batch timestamp and watermark timestamp to the offset log so that these values are consistent across multiple executions of the same batch.

brkyvz zsxwing tdas

## How was this patch tested?

A test was added to StreamingAggregationSuite ensuring the above use case is handled. The test injects a stream of time values (in seconds) to a query that runs in complete mode and only outputs the (count) aggregation results for the past 10 seconds.

Author: Tyson Condie <tcondie@gmail.com>

Closes #15949 from tcondie/SPARK-18339.

(cherry picked from commit 3c0beea4752d39ee630a107316f40aff4a1b4ae7)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../expressions/datetimeExpressions.scala     |  33 +++++-
 .../streaming/IncrementalExecution.scala      |  19 +++-
 .../execution/streaming/StreamExecution.scala |  67 +++++++++---
 .../execution/streaming/StreamProgress.scala  |   4 +-
 .../sql/execution/streaming/memory.scala      |   4 +
 .../StreamExecutionMetadataSuite.scala        |  35 ++++++
 .../streaming/StreamingAggregationSuite.scala | 100 ++++++++++++++++++
 .../sql/streaming/StreamingQuerySuite.scala   |   4 +-
 .../spark/sql/streaming/WatermarkSuite.scala  |  40 ++++---
 9 files changed, 273 insertions(+), 33 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamExecutionMetadataSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 1db1d1995d942..ef1ac360daada 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -17,14 +17,14 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import java.sql.Timestamp
 import java.text.SimpleDateFormat
 import java.util.{Calendar, Locale, TimeZone}
 
 import scala.util.Try
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodegenFallback,
-  ExprCode}
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodegenFallback, ExprCode}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
@@ -71,6 +71,35 @@ case class CurrentTimestamp() extends LeafExpression with CodegenFallback {
   override def prettyName: String = "current_timestamp"
 }
 
+/**
+ * Expression representing the current batch time, which is used by StreamExecution to
+ * 1. prevent optimizer from pushing this expression below a stateful operator
+ * 2. allow IncrementalExecution to substitute this expression with a Literal(timestamp)
+ *
+ * There is no code generation since this expression should be replaced with a literal.
+ */
+case class CurrentBatchTimestamp(timestampMs: Long, dataType: DataType)
+  extends LeafExpression with Nondeterministic with CodegenFallback {
+
+  override def nullable: Boolean = false
+
+  override def prettyName: String = "current_batch_timestamp"
+
+  override protected def initializeInternal(partitionIndex: Int): Unit = {}
+
+  /**
+   * Need to return literal value in order to support compile time expression evaluation
+   * e.g., select(current_date())
+   */
+  override protected def evalInternal(input: InternalRow): Any = toLiteral.value
+
+  def toLiteral: Literal = dataType match {
+    case _: TimestampType =>
+      Literal(DateTimeUtils.fromJavaTimestamp(new Timestamp(timestampMs)), TimestampType)
+    case _: DateType => Literal(DateTimeUtils.millisToDays(timestampMs), DateType)
+  }
+}
+
 /**
  * Adds a number of days to startdate.
  */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
index e9d072f8a98b0..6ab6fa61dc200 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
@@ -17,7 +17,9 @@
 
 package org.apache.spark.sql.execution.streaming
 
-import org.apache.spark.sql.{InternalOutputModes, SparkSession}
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.expressions.{CurrentBatchTimestamp, Literal}
+import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.{QueryExecution, SparkPlan, SparkPlanner, UnaryExecNode}
@@ -34,7 +36,7 @@ class IncrementalExecution(
     val checkpointLocation: String,
     val currentBatchId: Long,
     val currentEventTimeWatermark: Long)
-  extends QueryExecution(sparkSession, logicalPlan) {
+  extends QueryExecution(sparkSession, logicalPlan) with Logging {
 
   // TODO: make this always part of planning.
   val stateStrategy =
@@ -49,6 +51,19 @@ class IncrementalExecution(
       sparkSession.sessionState.conf,
       stateStrategy)
 
+  /**
+   * See [SPARK-18339]
+   * Walk the optimized logical plan and replace CurrentBatchTimestamp
+   * with the desired literal
+   */
+  override lazy val optimizedPlan: LogicalPlan = {
+    sparkSession.sessionState.optimizer.execute(withCachedData) transformAllExpressions {
+      case ts @ CurrentBatchTimestamp(timestamp, _) =>
+        logInfo(s"Current batch timestamp = $timestamp")
+        ts.toLiteral
+    }
+  }
+
   /**
    * Records the current id for a given stateful operator in the query plan as the `state`
    * preparation walks the query plan.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 3ca6feac05cef..21664d7fd0381 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -25,11 +25,13 @@ import scala.collection.mutable.ArrayBuffer
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.fs.Path
+import org.json4s.NoTypeHints
+import org.json4s.jackson.Serialization
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, CurrentBatchTimestamp, CurrentDate, CurrentTimestamp}
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.execution.{QueryExecution, SparkPlan}
@@ -92,8 +94,8 @@ class StreamExecution(
   /** The current batchId or -1 if execution has not yet been initialized. */
   private var currentBatchId: Long = -1
 
-  /** The current eventTime watermark, used to bound the lateness of data that will processed. */
-  private var currentEventTimeWatermark: Long = 0
+  /** Stream execution metadata */
+  private var streamExecutionMetadata = StreamExecutionMetadata()
 
   /** All stream sources present in the query plan. */
   private val sources =
@@ -251,7 +253,7 @@ class StreamExecution(
           this,
           s"Query $name terminated with exception: ${e.getMessage}",
           e,
-          Some(committedOffsets.toOffsetSeq(sources)))
+          Some(committedOffsets.toOffsetSeq(sources, streamExecutionMetadata.json)))
         logError(s"Query $name terminated with error", e)
         // Rethrow the fatal errors to allow the user using `Thread.UncaughtExceptionHandler` to
         // handle them
@@ -288,7 +290,9 @@ class StreamExecution(
         logInfo(s"Resuming streaming query, starting with batch $batchId")
         currentBatchId = batchId
         availableOffsets = nextOffsets.toStreamProgress(sources)
-        logDebug(s"Found possibly uncommitted offsets $availableOffsets")
+        streamExecutionMetadata = StreamExecutionMetadata(nextOffsets.metadata.getOrElse("{}"))
+        logDebug(s"Found possibly unprocessed offsets $availableOffsets " +
+          s"at batch timestamp ${streamExecutionMetadata.batchTimestampMs}")
 
         offsetLog.get(batchId - 1).foreach {
           case lastOffsets =>
@@ -344,10 +348,14 @@ class StreamExecution(
       }
     }
     if (hasNewData) {
+      // Current batch timestamp in milliseconds
+      streamExecutionMetadata.batchTimestampMs = triggerClock.getTimeMillis()
       reportTimeTaken(OFFSET_WAL_WRITE_LATENCY) {
-        assert(offsetLog.add(currentBatchId, availableOffsets.toOffsetSeq(sources)),
+        assert(offsetLog.add(currentBatchId,
+          availableOffsets.toOffsetSeq(sources, streamExecutionMetadata.json)),
           s"Concurrent update to the log. Multiple streaming jobs detected for $currentBatchId")
-        logInfo(s"Committed offsets for batch $currentBatchId.")
+        logInfo(s"Committed offsets for batch $currentBatchId. " +
+          s"Metadata ${streamExecutionMetadata.toString}")
 
         // NOTE: The following code is correct because runBatches() processes exactly one
         // batch at a time. If we add pipeline parallelism (multiple batches in flight at
@@ -422,6 +430,12 @@ class StreamExecution(
     val replacementMap = AttributeMap(replacements)
     val triggerLogicalPlan = withNewSources transformAllExpressions {
       case a: Attribute if replacementMap.contains(a) => replacementMap(a)
+      case ct: CurrentTimestamp =>
+        CurrentBatchTimestamp(streamExecutionMetadata.batchTimestampMs,
+          ct.dataType)
+      case cd: CurrentDate =>
+        CurrentBatchTimestamp(streamExecutionMetadata.batchTimestampMs,
+          cd.dataType)
     }
 
     val executedPlan = reportTimeTaken(OPTIMIZER_LATENCY) {
@@ -431,7 +445,7 @@ class StreamExecution(
         outputMode,
         checkpointFile("state"),
         currentBatchId,
-        currentEventTimeWatermark)
+        streamExecutionMetadata.batchWatermarkMs)
       lastExecution.executedPlan // Force the lazy generation of execution plan
     }
 
@@ -447,11 +461,12 @@ class StreamExecution(
         logTrace(s"Maximum observed eventTime: ${e.maxEventTime.value}")
         (e.maxEventTime.value / 1000) - e.delay.milliseconds()
     }.headOption.foreach { newWatermark =>
-      if (newWatermark > currentEventTimeWatermark) {
+      if (newWatermark > streamExecutionMetadata.batchWatermarkMs) {
         logInfo(s"Updating eventTime watermark to: $newWatermark ms")
-        currentEventTimeWatermark = newWatermark
+        streamExecutionMetadata.batchWatermarkMs = newWatermark
       } else {
-        logTrace(s"Event time didn't move: $newWatermark < $currentEventTimeWatermark")
+        logTrace(s"Event time didn't move: $newWatermark < " +
+          s"$streamExecutionMetadata.currentEventTimeWatermark")
       }
 
       if (newWatermark != 0) {
@@ -713,7 +728,7 @@ class StreamExecution(
     }.toArray
     val sinkStatus = SinkStatus(
       sink.toString,
-      committedOffsets.toOffsetSeq(sources).toString)
+      committedOffsets.toOffsetSeq(sources, streamExecutionMetadata.json).toString)
 
     currentStatus =
       StreamingQueryStatus(
@@ -740,6 +755,34 @@ object StreamExecution {
   def nextId: Long = _nextId.getAndIncrement()
 }
 
+/**
+ * Contains metadata associated with a stream execution. This information is
+ * persisted to the offset log via the OffsetSeq metadata field. Current
+ * information contained in this object includes:
+ *
+ * @param batchWatermarkMs: The current eventTime watermark, used to
+ * bound the lateness of data that will processed. Time unit: milliseconds
+ * @param batchTimestampMs: The current batch processing timestamp.
+ * Time unit: milliseconds
+ */
+case class StreamExecutionMetadata(
+    var batchWatermarkMs: Long = 0,
+    var batchTimestampMs: Long = 0) {
+  private implicit val formats = StreamExecutionMetadata.formats
+
+  /**
+   * JSON string representation of this object.
+   */
+  def json: String = Serialization.write(this)
+}
+
+object StreamExecutionMetadata {
+  private implicit val formats = Serialization.formats(NoTypeHints)
+
+  def apply(json: String): StreamExecutionMetadata =
+    Serialization.read[StreamExecutionMetadata](json)
+}
+
 /**
  * A special thread to run the stream query. Some codes require to run in the StreamExecutionThread
  * and will use `classOf[StreamExecutionThread]` to check.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala
index 05a65476709cd..21b8750ca913d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala
@@ -26,8 +26,8 @@ class StreamProgress(
     val baseMap: immutable.Map[Source, Offset] = new immutable.HashMap[Source, Offset])
   extends scala.collection.immutable.Map[Source, Offset] {
 
-  def toOffsetSeq(source: Seq[Source]): OffsetSeq = {
-    OffsetSeq(source.map(get))
+  def toOffsetSeq(source: Seq[Source], metadata: String): OffsetSeq = {
+    OffsetSeq(source.map(get), Some(metadata))
   }
 
   override def toString: String =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
index 582b5481220da..adf6963577f49 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
@@ -206,6 +206,10 @@ class MemorySink(val schema: StructType, outputMode: OutputMode) extends Sink wi
     }
   }
 
+  def clear(): Unit = {
+    batches.clear()
+  }
+
   override def toString(): String = "MemorySink"
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamExecutionMetadataSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamExecutionMetadataSuite.scala
new file mode 100644
index 0000000000000..c7139c588d1d3
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamExecutionMetadataSuite.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming
+
+import org.apache.spark.sql.execution.streaming.StreamExecutionMetadata
+
+class StreamExecutionMetadataSuite extends StreamTest {
+
+  test("stream execution metadata") {
+    assert(StreamExecutionMetadata(0, 0) ===
+      StreamExecutionMetadata("""{}"""))
+    assert(StreamExecutionMetadata(1, 0) ===
+      StreamExecutionMetadata("""{"batchWatermarkMs":1}"""))
+    assert(StreamExecutionMetadata(0, 2) ===
+      StreamExecutionMetadata("""{"batchTimestampMs":2}"""))
+    assert(StreamExecutionMetadata(1, 2) ===
+      StreamExecutionMetadata(
+        """{"batchWatermarkMs":1,"batchTimestampMs":2}"""))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
index e59b5491f90b6..fbe560e8d9181 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
@@ -17,11 +17,14 @@
 
 package org.apache.spark.sql.streaming
 
+import java.util.TimeZone
+
 import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.InternalOutputModes._
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.state.StateStore
@@ -235,4 +238,101 @@ class StreamingAggregationSuite extends StreamTest with BeforeAndAfterAll {
       CheckLastBatch(("a", 30), ("b", 3), ("c", 1))
     )
   }
+
+  test("prune results by current_time, complete mode") {
+    import testImplicits._
+    val clock = new StreamManualClock
+    val inputData = MemoryStream[Long]
+    val aggregated =
+      inputData.toDF()
+        .groupBy($"value")
+        .agg(count("*"))
+        .where('value >= current_timestamp().cast("long") - 10L)
+
+    testStream(aggregated, Complete)(
+      StartStream(ProcessingTime("10 seconds"), triggerClock = clock),
+
+      // advance clock to 10 seconds, all keys retained
+      AddData(inputData, 0L, 5L, 5L, 10L),
+      AdvanceManualClock(10 * 1000),
+      CheckLastBatch((0L, 1), (5L, 2), (10L, 1)),
+
+      // advance clock to 20 seconds, should retain keys >= 10
+      AddData(inputData, 15L, 15L, 20L),
+      AdvanceManualClock(10 * 1000),
+      CheckLastBatch((10L, 1), (15L, 2), (20L, 1)),
+
+      // advance clock to 30 seconds, should retain keys >= 20
+      AddData(inputData, 0L, 85L),
+      AdvanceManualClock(10 * 1000),
+      CheckLastBatch((20L, 1), (85L, 1)),
+
+      // bounce stream and ensure correct batch timestamp is used
+      // i.e., we don't take it from the clock, which is at 90 seconds.
+      StopStream,
+      AssertOnQuery { q => // clear the sink
+        q.sink.asInstanceOf[MemorySink].clear()
+        // advance by a minute i.e., 90 seconds total
+        clock.advance(60 * 1000L)
+        true
+      },
+      StartStream(ProcessingTime("10 seconds"), triggerClock = clock),
+      CheckLastBatch((20L, 1), (85L, 1)),
+      AssertOnQuery { q =>
+        clock.getTimeMillis() == 90000L
+      },
+
+      // advance clock to 100 seconds, should retain keys >= 90
+      AddData(inputData, 85L, 90L, 100L, 105L),
+      AdvanceManualClock(10 * 1000),
+      CheckLastBatch((90L, 1), (100L, 1), (105L, 1))
+    )
+  }
+
+  test("prune results by current_date, complete mode") {
+    import testImplicits._
+    val clock = new StreamManualClock
+    val tz = TimeZone.getDefault.getID
+    val inputData = MemoryStream[Long]
+    val aggregated =
+      inputData.toDF()
+        .select(to_utc_timestamp(from_unixtime('value * DateTimeUtils.SECONDS_PER_DAY), tz))
+        .toDF("value")
+        .groupBy($"value")
+        .agg(count("*"))
+        .where($"value".cast("date") >= date_sub(current_date(), 10))
+        .select(($"value".cast("long") / DateTimeUtils.SECONDS_PER_DAY).cast("long"), $"count(1)")
+    testStream(aggregated, Complete)(
+      StartStream(ProcessingTime("10 day"), triggerClock = clock),
+      // advance clock to 10 days, should retain all keys
+      AddData(inputData, 0L, 5L, 5L, 10L),
+      AdvanceManualClock(DateTimeUtils.MILLIS_PER_DAY * 10),
+      CheckLastBatch((0L, 1), (5L, 2), (10L, 1)),
+      // advance clock to 20 days, should retain keys >= 10
+      AddData(inputData, 15L, 15L, 20L),
+      AdvanceManualClock(DateTimeUtils.MILLIS_PER_DAY * 10),
+      CheckLastBatch((10L, 1), (15L, 2), (20L, 1)),
+      // advance clock to 30 days, should retain keys >= 20
+      AddData(inputData, 85L),
+      AdvanceManualClock(DateTimeUtils.MILLIS_PER_DAY * 10),
+      CheckLastBatch((20L, 1), (85L, 1)),
+
+      // bounce stream and ensure correct batch timestamp is used
+      // i.e., we don't take it from the clock, which is at 90 days.
+      StopStream,
+      AssertOnQuery { q => // clear the sink
+        q.sink.asInstanceOf[MemorySink].clear()
+        // advance by 60 days i.e., 90 days total
+        clock.advance(DateTimeUtils.MILLIS_PER_DAY * 60)
+        true
+      },
+      StartStream(ProcessingTime("10 day"), triggerClock = clock),
+      CheckLastBatch((20L, 1), (85L, 1)),
+
+      // advance clock to 100 days, should retain keys >= 90
+      AddData(inputData, 85L, 90L, 100L, 105L),
+      AdvanceManualClock(DateTimeUtils.MILLIS_PER_DAY * 10),
+      CheckLastBatch((90L, 1), (100L, 1), (105L, 1))
+    )
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index e2e66d6663e19..8ecb33cf9d266 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -103,8 +103,8 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
       TestAwaitTermination(ExpectException[SparkException], timeoutMs = 2000),
       TestAwaitTermination(ExpectException[SparkException], timeoutMs = 10),
       AssertOnQuery(
-        q =>
-          q.exception.get.startOffset.get === q.committedOffsets.toOffsetSeq(Seq(inputData)),
+        q => q.exception.get.startOffset.get.offsets ===
+          q.committedOffsets.toOffsetSeq(Seq(inputData), "{}").offsets,
         "incorrect start offset on exception")
     )
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/WatermarkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/WatermarkSuite.scala
index 3617ec0f564c1..3e9488c7dc9af 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/WatermarkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/WatermarkSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.streaming
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.{AnalysisException, Row}
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.functions.{count, window}
 
@@ -96,27 +96,41 @@ class WatermarkSuite extends StreamTest with BeforeAndAfter with Logging {
     )
   }
 
-  ignore("recovery") {
+  test("recovery") {
     val inputData = MemoryStream[Int]
+    val df = inputData.toDF()
+      .withColumn("eventTime", $"value".cast("timestamp"))
+      .withWatermark("eventTime", "10 seconds")
+      .groupBy(window($"eventTime", "5 seconds") as 'window)
+      .agg(count("*") as 'count)
+      .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
 
-    val windowedAggregation = inputData.toDF()
-        .withColumn("eventTime", $"value".cast("timestamp"))
-        .withWatermark("eventTime", "10 seconds")
-        .groupBy(window($"eventTime", "5 seconds") as 'window)
-        .agg(count("*") as 'count)
-        .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
-
-    testStream(windowedAggregation)(
+    testStream(df)(
       AddData(inputData, 10, 11, 12, 13, 14, 15),
-      CheckAnswer(),
+      CheckLastBatch(),
       AddData(inputData, 25), // Advance watermark to 15 seconds
       StopStream,
       StartStream(),
-      CheckAnswer(),
+      CheckLastBatch(),
       AddData(inputData, 25), // Evict items less than previous watermark.
+      CheckLastBatch((10, 5)),
       StopStream,
+      AssertOnQuery { q => // clear the sink
+        q.sink.asInstanceOf[MemorySink].clear()
+        true
+      },
       StartStream(),
-      CheckAnswer((10, 5))
+      CheckLastBatch((10, 5)), // Recompute last batch and re-evict timestamp 10
+      AddData(inputData, 30), // Advance watermark to 20 seconds
+      CheckLastBatch(),
+      StopStream,
+      StartStream(), // Watermark should still be 15 seconds
+      AddData(inputData, 17),
+      CheckLastBatch(), // We still do not see next batch
+      AddData(inputData, 30), // Advance watermark to 20 seconds
+      CheckLastBatch(),
+      AddData(inputData, 30), // Evict items less than previous watermark.
+      CheckLastBatch((15, 2)) // Ensure we see next window
     )
   }
 

From 06a56df226aa0c03c21f23258630d8a96385c696 Mon Sep 17 00:00:00 2001
From: Davies Liu <davies@databricks.com>
Date: Tue, 29 Nov 2016 00:00:33 -0800
Subject: [PATCH 0223/1204] [SPARK-18188] add checksum for blocks of broadcast

## What changes were proposed in this pull request?

A TorrentBroadcast is serialized and compressed first, then splitted as fixed size blocks, if any block is corrupt when fetching from remote, the decompression/deserialization will fail without knowing which block is corrupt. Also, the corrupt block is kept in block manager and reported to driver, so other tasks (in same executor or from different executor) will also fail because of it.

This PR add checksum for each block, and check it after fetching a block from remote executor, because it's very likely that the corruption happen in network. When the corruption happen, it will throw the block away and throw an exception to fail the task, which will be retried.

Added a config for it: `spark.broadcast.checksum`, which is true by default.

## How was this patch tested?

Existing tests.

Author: Davies Liu <davies@databricks.com>

Closes #15935 from davies/broadcast_checksum.

(cherry picked from commit 7d5cb3af7621ad6eb85d1ba7f585c3921ca0a242)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../spark/broadcast/TorrentBroadcast.scala    | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
index e8d6d587b4824..f350784378795 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
@@ -19,6 +19,7 @@ package org.apache.spark.broadcast
 
 import java.io._
 import java.nio.ByteBuffer
+import java.util.zip.Adler32
 
 import scala.collection.JavaConverters._
 import scala.reflect.ClassTag
@@ -77,6 +78,7 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
     }
     // Note: use getSizeAsKb (not bytes) to maintain compatibility if no units are provided
     blockSize = conf.getSizeAsKb("spark.broadcast.blockSize", "4m").toInt * 1024
+    checksumEnabled = conf.getBoolean("spark.broadcast.checksum", true)
   }
   setConf(SparkEnv.get.conf)
 
@@ -85,10 +87,27 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
   /** Total number of blocks this broadcast variable contains. */
   private val numBlocks: Int = writeBlocks(obj)
 
+  /** Whether to generate checksum for blocks or not. */
+  private var checksumEnabled: Boolean = false
+  /** The checksum for all the blocks. */
+  private var checksums: Array[Int] = _
+
   override protected def getValue() = {
     _value
   }
 
+  private def calcChecksum(block: ByteBuffer): Int = {
+    val adler = new Adler32()
+    if (block.hasArray) {
+      adler.update(block.array, block.arrayOffset + block.position, block.limit - block.position)
+    } else {
+      val bytes = new Array[Byte](block.remaining())
+      block.duplicate.get(bytes)
+      adler.update(bytes)
+    }
+    adler.getValue.toInt
+  }
+
   /**
    * Divide the object into multiple blocks and put those blocks in the block manager.
    *
@@ -105,7 +124,13 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
     }
     val blocks =
       TorrentBroadcast.blockifyObject(value, blockSize, SparkEnv.get.serializer, compressionCodec)
+    if (checksumEnabled) {
+      checksums = new Array[Int](blocks.length)
+    }
     blocks.zipWithIndex.foreach { case (block, i) =>
+      if (checksumEnabled) {
+        checksums(i) = calcChecksum(block)
+      }
       val pieceId = BroadcastBlockId(id, "piece" + i)
       val bytes = new ChunkedByteBuffer(block.duplicate())
       if (!blockManager.putBytes(pieceId, bytes, MEMORY_AND_DISK_SER, tellMaster = true)) {
@@ -135,6 +160,13 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
         case None =>
           bm.getRemoteBytes(pieceId) match {
             case Some(b) =>
+              if (checksumEnabled) {
+                val sum = calcChecksum(b.chunks(0))
+                if (sum != checksums(pid)) {
+                  throw new SparkException(s"corrupt remote block $pieceId of $broadcastId:" +
+                    s" $sum != ${checksums(pid)}")
+                }
+              }
               // We found the block from remote executors/driver's BlockManager, so put the block
               // in this executor's BlockManager.
               if (!bm.putBytes(pieceId, b, StorageLevel.MEMORY_AND_DISK_SER, tellMaster = true)) {

From 84b2af229ca312023cd6343ecd2b1278542d9b9a Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Tue, 29 Nov 2016 09:41:32 +0000
Subject: [PATCH 0224/1204] [SPARK-3359][DOCS] Make javadoc8 working for
 unidoc/genjavadoc compatibility in Java API documentation

## What changes were proposed in this pull request?

This PR make `sbt unidoc` complete with Java 8.

This PR roughly includes several fixes as below:

- Fix unrecognisable class and method links in javadoc by changing it from `[[..]]` to `` `...` ``

  ```diff
  - * A column that will be computed based on the data in a [[DataFrame]].
  + * A column that will be computed based on the data in a `DataFrame`.
  ```

- Fix throws annotations so that they are recognisable in javadoc

- Fix URL links to `<a href="http..."></a>`.

  ```diff
  - * [[http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree]] model for regression.
  + * <a href="http://en.wikipedia.org/wiki/Decision_tree_learning">
  + * Decision tree (Wikipedia)</a> model for regression.
  ```

  ```diff
  -   * see http://en.wikipedia.org/wiki/Receiver_operating_characteristic
  +   * see <a href="http://en.wikipedia.org/wiki/Receiver_operating_characteristic">
  +   * Receiver operating characteristic (Wikipedia)</a>
  ```

- Fix < to > to

  - `greater than`/`greater than or equal to` or `less than`/`less than or equal to` where applicable.

  - Wrap it with `{{{...}}}` to print them in javadoc or use `{code ...}` or `{literal ..}`. Please refer https://github.com/apache/spark/pull/16013#discussion_r89665558

- Fix `</p>` complaint

## How was this patch tested?

Manually tested by `jekyll build` with Java 7 and 8

```
java version "1.7.0_80"
Java(TM) SE Runtime Environment (build 1.7.0_80-b15)
Java HotSpot(TM) 64-Bit Server VM (build 24.80-b11, mixed mode)
```

```
java version "1.8.0_45"
Java(TM) SE Runtime Environment (build 1.8.0_45-b14)
Java HotSpot(TM) 64-Bit Server VM (build 25.45-b02, mixed mode)
```

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #16013 from HyukjinKwon/SPARK-3359-errors-more.

(cherry picked from commit f830bb9170f6b853565d9dd30ca7418b93a54fe3)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../scala/org/apache/spark/Accumulator.scala  |  2 +-
 .../scala/org/apache/spark/SparkConf.scala    | 12 ++--
 .../scala/org/apache/spark/SparkContext.scala | 14 ++---
 .../scala/org/apache/spark/TaskContext.scala  |  4 +-
 .../org/apache/spark/TaskEndReason.scala      |  2 +-
 .../scala/org/apache/spark/TestUtils.scala    |  2 +-
 .../org/apache/spark/api/java/JavaRDD.scala   |  8 ++-
 .../apache/spark/rdd/DoubleRDDFunctions.scala |  4 +-
 .../org/apache/spark/rdd/HadoopRDD.scala      |  2 +-
 .../scala/org/apache/spark/rdd/JdbcRDD.scala  | 15 ++++-
 .../org/apache/spark/rdd/NewHadoopRDD.scala   |  2 +-
 .../apache/spark/rdd/PairRDDFunctions.scala   | 20 +++----
 .../main/scala/org/apache/spark/rdd/RDD.scala | 24 +++++---
 .../apache/spark/rdd/RDDCheckpointData.scala  |  3 +-
 .../apache/spark/rdd/coalesce-public.scala    |  4 +-
 .../spark/rpc/netty/RpcEndpointVerifier.scala |  4 +-
 .../spark/scheduler/InputFormatInfo.scala     |  2 +-
 .../apache/spark/scheduler/ResultTask.scala   |  2 +-
 .../spark/scheduler/ShuffleMapTask.scala      |  2 +-
 .../org/apache/spark/scheduler/Task.scala     |  2 +-
 .../spark/scheduler/TaskDescription.scala     |  2 +-
 .../spark/storage/BlockManagerMessages.scala  |  2 +-
 .../storage/ShuffleBlockFetcherIterator.scala |  4 +-
 .../scala/org/apache/spark/ui/UIUtils.scala   |  2 +-
 .../scala/org/apache/spark/util/Utils.scala   |  7 ++-
 .../spark/util/random/SamplingUtils.scala     | 18 +++---
 .../util/random/StratifiedSamplingUtils.scala | 33 ++++++----
 .../flume/FlumePollingInputDStream.scala      |  2 +-
 .../spark/streaming/kafka/KafkaCluster.scala  | 20 +++++--
 .../streaming/kafka/KafkaInputDStream.scala   |  2 +-
 .../spark/streaming/kafka/KafkaUtils.scala    | 18 +++---
 .../spark/streaming/kafka/OffsetRange.scala   |  2 +-
 .../org/apache/spark/graphx/GraphLoader.scala |  2 +-
 .../graphx/impl/VertexPartitionBase.scala     |  2 +-
 .../graphx/impl/VertexPartitionBaseOps.scala  |  2 +-
 .../spark/graphx/lib/TriangleCount.scala      |  2 +-
 .../classification/LogisticRegression.scala   | 15 ++---
 .../spark/ml/clustering/BisectingKMeans.scala |  4 +-
 .../spark/ml/clustering/GaussianMixture.scala |  2 +-
 .../org/apache/spark/ml/clustering/LDA.scala  | 10 ++--
 .../apache/spark/ml/feature/Bucketizer.scala  |  2 +-
 .../spark/ml/feature/CountVectorizer.scala    |  9 +--
 .../apache/spark/ml/feature/HashingTF.scala   |  2 +-
 .../org/apache/spark/ml/feature/NGram.scala   |  2 +-
 .../apache/spark/ml/feature/Normalizer.scala  |  2 +-
 .../spark/ml/feature/OneHotEncoder.scala      |  4 +-
 .../org/apache/spark/ml/feature/PCA.scala     |  4 +-
 .../ml/feature/PolynomialExpansion.scala      |  3 +-
 .../ml/feature/QuantileDiscretizer.scala      |  6 +-
 .../spark/ml/feature/SQLTransformer.scala     |  8 ++-
 .../spark/ml/feature/StopWordsRemover.scala   |  2 +-
 .../spark/ml/feature/StringIndexer.scala      |  8 +--
 .../apache/spark/ml/feature/Tokenizer.scala   |  2 +-
 .../spark/ml/feature/VectorIndexer.scala      |  9 +--
 .../spark/ml/feature/VectorSlicer.scala       |  4 +-
 .../apache/spark/ml/feature/package-info.java |  4 +-
 .../org/apache/spark/ml/param/params.scala    | 45 ++++++++------
 .../apache/spark/ml/recommendation/ALS.scala  |  6 +-
 .../ml/regression/AFTSurvivalRegression.scala |  2 +-
 .../ml/regression/DecisionTreeRegressor.scala |  3 +-
 .../spark/ml/regression/GBTRegressor.scala    |  2 +-
 .../GeneralizedLinearRegression.scala         | 22 +++----
 .../ml/regression/IsotonicRegression.scala    |  4 +-
 .../ml/regression/LinearRegression.scala      | 43 ++++++-------
 .../ml/regression/RandomForestRegressor.scala |  2 +-
 .../ml/tree/impl/DecisionTreeMetadata.scala   |  2 +-
 .../apache/spark/ml/util/MetadataUtils.scala  |  2 +-
 .../org/apache/spark/ml/util/ReadWrite.scala  |  8 +--
 .../spark/mllib/classification/SVM.scala      |  2 +-
 .../mllib/clustering/BisectingKMeans.scala    | 14 ++---
 .../mllib/clustering/GaussianMixture.scala    |  2 +-
 .../apache/spark/mllib/clustering/LDA.scala   | 24 ++++----
 .../spark/mllib/clustering/LDAModel.scala     |  4 +-
 .../spark/mllib/clustering/LDAOptimizer.scala |  4 +-
 .../BinaryClassificationMetrics.scala         |  8 ++-
 .../mllib/evaluation/RankingMetrics.scala     |  8 +--
 .../BinaryClassificationMetricComputers.scala |  2 +-
 .../spark/mllib/fpm/AssociationRules.scala    |  4 +-
 .../org/apache/spark/mllib/fpm/FPGrowth.scala |  6 +-
 .../apache/spark/mllib/fpm/PrefixSpan.scala   | 10 ++--
 .../linalg/EigenValueDecomposition.scala      |  2 +-
 .../apache/spark/mllib/linalg/Vectors.scala   |  4 +-
 .../mllib/optimization/GradientDescent.scala  |  6 +-
 .../spark/mllib/optimization/LBFGS.scala      |  3 +-
 .../spark/mllib/optimization/NNLS.scala       |  9 ++-
 .../spark/mllib/optimization/Updater.scala    |  6 +-
 .../spark/mllib/random/RandomRDDs.scala       |  8 +--
 .../apache/spark/mllib/rdd/SlidingRDD.scala   |  4 +-
 .../stat/test/KolmogorovSmirnovTest.scala     |  3 +-
 .../spark/mllib/stat/test/StreamingTest.scala |  6 +-
 .../mllib/stat/test/StreamingTestMethod.scala |  4 +-
 .../spark/mllib/tree/DecisionTree.scala       |  6 +-
 .../mllib/tree/GradientBoostedTrees.scala     |  6 +-
 .../spark/mllib/tree/RandomForest.scala       | 19 +++---
 .../tree/configuration/BoostingStrategy.scala | 12 ++--
 .../mllib/tree/configuration/Strategy.scala   |  8 +--
 .../apache/spark/mllib/tree/model/Split.scala |  2 +-
 .../spark/sql/InternalOutputModes.scala       |  2 +-
 .../main/scala/org/apache/spark/sql/Row.scala |  4 +-
 .../apache/spark/sql/types/DecimalType.scala  |  3 +-
 .../scala/org/apache/spark/sql/Column.scala   | 40 ++++++-------
 .../spark/sql/DataFrameNaFunctions.scala      | 36 +++++------
 .../apache/spark/sql/DataFrameReader.scala    | 43 +++++++------
 .../spark/sql/DataFrameStatFunctions.scala    | 28 +++++----
 .../apache/spark/sql/DataFrameWriter.scala    | 30 +++++-----
 .../scala/org/apache/spark/sql/Dataset.scala  | 44 +++++++-------
 .../org/apache/spark/sql/ForeachWriter.scala  |  3 +-
 .../spark/sql/KeyValueGroupedDataset.scala    |  8 +--
 .../spark/sql/RelationalGroupedDataset.scala  | 30 +++++-----
 .../org/apache/spark/sql/RuntimeConfig.scala  |  5 +-
 .../org/apache/spark/sql/SQLContext.scala     |  4 +-
 .../org/apache/spark/sql/SparkSession.scala   | 60 +++++++++----------
 .../apache/spark/sql/UDFRegistration.scala    |  2 +-
 .../org/apache/spark/sql/functions.scala      | 42 ++++++++-----
 .../spark/sql/internal/CatalogImpl.scala      | 14 ++---
 .../sql/internal/VariableSubstitution.scala   |  2 +-
 .../apache/spark/sql/jdbc/JdbcDialects.scala  |  4 +-
 .../apache/spark/sql/sources/interfaces.scala | 12 ++--
 .../hive/execution/InsertIntoHiveTable.scala  | 12 +++-
 .../org/apache/spark/sql/hive/hiveUDFs.scala  |  2 +-
 .../spark/sql/hive/hiveWriterContainers.scala |  2 +-
 .../apache/spark/streaming/StateSpec.scala    |  4 +-
 .../streaming/api/java/JavaPairDStream.scala  |  4 +-
 .../api/java/JavaStreamingContext.scala       |  2 +-
 .../dstream/PairDStreamFunctions.scala        |  4 +-
 125 files changed, 611 insertions(+), 524 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/Accumulator.scala b/core/src/main/scala/org/apache/spark/Accumulator.scala
index 9d1f1d59dbce1..7bea636c94aa0 100644
--- a/core/src/main/scala/org/apache/spark/Accumulator.scala
+++ b/core/src/main/scala/org/apache/spark/Accumulator.scala
@@ -26,7 +26,7 @@ package org.apache.spark
  *
  * An accumulator is created from an initial value `v` by calling
  * [[SparkContext#accumulator SparkContext.accumulator]].
- * Tasks running on the cluster can then add to it using the [[Accumulable#+= +=]] operator.
+ * Tasks running on the cluster can then add to it using the `+=` operator.
  * However, they cannot read its value. Only the driver program can read the accumulator's value,
  * using its [[#value]] method.
  *
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 04d657c09afd0..0c1c68de89f81 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -262,7 +262,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
   /**
    * Get a time parameter as seconds; throws a NoSuchElementException if it's not set. If no
    * suffix is provided then seconds are assumed.
-   * @throws NoSuchElementException
+   * @throws java.util.NoSuchElementException
    */
   def getTimeAsSeconds(key: String): Long = {
     Utils.timeStringAsSeconds(get(key))
@@ -279,7 +279,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
   /**
    * Get a time parameter as milliseconds; throws a NoSuchElementException if it's not set. If no
    * suffix is provided then milliseconds are assumed.
-   * @throws NoSuchElementException
+   * @throws java.util.NoSuchElementException
    */
   def getTimeAsMs(key: String): Long = {
     Utils.timeStringAsMs(get(key))
@@ -296,7 +296,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
   /**
    * Get a size parameter as bytes; throws a NoSuchElementException if it's not set. If no
    * suffix is provided then bytes are assumed.
-   * @throws NoSuchElementException
+   * @throws java.util.NoSuchElementException
    */
   def getSizeAsBytes(key: String): Long = {
     Utils.byteStringAsBytes(get(key))
@@ -320,7 +320,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
   /**
    * Get a size parameter as Kibibytes; throws a NoSuchElementException if it's not set. If no
    * suffix is provided then Kibibytes are assumed.
-   * @throws NoSuchElementException
+   * @throws java.util.NoSuchElementException
    */
   def getSizeAsKb(key: String): Long = {
     Utils.byteStringAsKb(get(key))
@@ -337,7 +337,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
   /**
    * Get a size parameter as Mebibytes; throws a NoSuchElementException if it's not set. If no
    * suffix is provided then Mebibytes are assumed.
-   * @throws NoSuchElementException
+   * @throws java.util.NoSuchElementException
    */
   def getSizeAsMb(key: String): Long = {
     Utils.byteStringAsMb(get(key))
@@ -354,7 +354,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
   /**
    * Get a size parameter as Gibibytes; throws a NoSuchElementException if it's not set. If no
    * suffix is provided then Gibibytes are assumed.
-   * @throws NoSuchElementException
+   * @throws java.util.NoSuchElementException
    */
   def getSizeAsGb(key: String): Long = {
     Utils.byteStringAsGb(get(key))
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index a159a170ebc50..1cb39a4209a1c 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -641,7 +641,7 @@ class SparkContext(config: SparkConf) extends Logging {
 
   /**
    * Get a local property set in this thread, or null if it is missing. See
-   * [[org.apache.spark.SparkContext.setLocalProperty]].
+   * `org.apache.spark.SparkContext.setLocalProperty`.
    */
   def getLocalProperty(key: String): String =
     Option(localProperties.get).map(_.getProperty(key)).orNull
@@ -659,7 +659,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * Application programmers can use this method to group all those jobs together and give a
    * group description. Once set, the Spark web UI will associate such jobs with this group.
    *
-   * The application can also use [[org.apache.spark.SparkContext.cancelJobGroup]] to cancel all
+   * The application can also use `org.apache.spark.SparkContext.cancelJobGroup` to cancel all
    * running jobs in this group. For example,
    * {{{
    * // In the main thread:
@@ -1380,7 +1380,7 @@ class SparkContext(config: SparkConf) extends Logging {
   }
 
   /**
-   * Create and register a [[CollectionAccumulator]], which starts with empty list and accumulates
+   * Create and register a `CollectionAccumulator`, which starts with empty list and accumulates
    * inputs by adding them into the list.
    */
   def collectionAccumulator[T]: CollectionAccumulator[T] = {
@@ -1390,7 +1390,7 @@ class SparkContext(config: SparkConf) extends Logging {
   }
 
   /**
-   * Create and register a [[CollectionAccumulator]], which starts with empty list and accumulates
+   * Create and register a `CollectionAccumulator`, which starts with empty list and accumulates
    * inputs by adding them into the list.
    */
   def collectionAccumulator[T](name: String): CollectionAccumulator[T] = {
@@ -2039,7 +2039,7 @@ class SparkContext(config: SparkConf) extends Logging {
   }
 
   /**
-   * Cancel active jobs for the specified group. See [[org.apache.spark.SparkContext.setJobGroup]]
+   * Cancel active jobs for the specified group. See `org.apache.spark.SparkContext.setJobGroup`
    * for more information.
    */
   def cancelJobGroup(groupId: String) {
@@ -2057,7 +2057,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * Cancel a given job if it's scheduled or running.
    *
    * @param jobId the job ID to cancel
-   * @throws InterruptedException if the cancel message cannot be sent
+   * @note Throws `InterruptedException` if the cancel message cannot be sent
    */
   def cancelJob(jobId: Int) {
     dagScheduler.cancelJob(jobId)
@@ -2067,7 +2067,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * Cancel a given stage and all jobs associated with it.
    *
    * @param stageId the stage ID to cancel
-   * @throws InterruptedException if the cancel message cannot be sent
+   * @note Throws `InterruptedException` if the cancel message cannot be sent
    */
   def cancelStage(stageId: Int) {
     dagScheduler.cancelStage(stageId)
diff --git a/core/src/main/scala/org/apache/spark/TaskContext.scala b/core/src/main/scala/org/apache/spark/TaskContext.scala
index 27abccf5ac2a9..0fd777ed12829 100644
--- a/core/src/main/scala/org/apache/spark/TaskContext.scala
+++ b/core/src/main/scala/org/apache/spark/TaskContext.scala
@@ -164,7 +164,7 @@ abstract class TaskContext extends Serializable {
 
   /**
    * Get a local property set upstream in the driver, or null if it is missing. See also
-   * [[org.apache.spark.SparkContext.setLocalProperty]].
+   * `org.apache.spark.SparkContext.setLocalProperty`.
    */
   def getLocalProperty(key: String): String
 
@@ -174,7 +174,7 @@ abstract class TaskContext extends Serializable {
   /**
    * ::DeveloperApi::
    * Returns all metrics sources with the given name which are associated with the instance
-   * which runs the task. For more information see [[org.apache.spark.metrics.MetricsSystem!]].
+   * which runs the task. For more information see `org.apache.spark.metrics.MetricsSystem`.
    */
   @DeveloperApi
   def getMetricsSources(sourceName: String): Seq[Source]
diff --git a/core/src/main/scala/org/apache/spark/TaskEndReason.scala b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
index 7ca3c103dbf5b..7745387dbceba 100644
--- a/core/src/main/scala/org/apache/spark/TaskEndReason.scala
+++ b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
@@ -65,7 +65,7 @@ sealed trait TaskFailedReason extends TaskEndReason {
 
 /**
  * :: DeveloperApi ::
- * A [[org.apache.spark.scheduler.ShuffleMapTask]] that completed successfully earlier, but we
+ * A `org.apache.spark.scheduler.ShuffleMapTask` that completed successfully earlier, but we
  * lost the executor before the stage completed. This means Spark needs to reschedule the task
  * to be re-executed on a different executor.
  */
diff --git a/core/src/main/scala/org/apache/spark/TestUtils.scala b/core/src/main/scala/org/apache/spark/TestUtils.scala
index 871b9d1ad575b..2909191bd6f14 100644
--- a/core/src/main/scala/org/apache/spark/TestUtils.scala
+++ b/core/src/main/scala/org/apache/spark/TestUtils.scala
@@ -186,7 +186,7 @@ private[spark] object TestUtils {
 
 
 /**
- * A [[SparkListener]] that detects whether spills have occurred in Spark jobs.
+ * A `SparkListener` that detects whether spills have occurred in Spark jobs.
  */
 private class SpillListener extends SparkListener {
   private val stageIdToTaskMetrics = new mutable.HashMap[Int, ArrayBuffer[TaskMetrics]]
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
index a20d264be5afd..94e26e687c66b 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
@@ -103,7 +103,8 @@ class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: ClassTag[T])
    * @param withReplacement can elements be sampled multiple times (replaced when sampled out)
    * @param fraction expected size of the sample as a fraction of this RDD's size
    *  without replacement: probability that each element is chosen; fraction must be [0, 1]
-   *  with replacement: expected number of times each element is chosen; fraction must be &gt;= 0
+   *  with replacement: expected number of times each element is chosen; fraction must be greater
+   *  than or equal to 0
    *
    * @note This is NOT guaranteed to provide exactly the fraction of the count
    * of the given `RDD`.
@@ -117,7 +118,8 @@ class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: ClassTag[T])
    * @param withReplacement can elements be sampled multiple times (replaced when sampled out)
    * @param fraction expected size of the sample as a fraction of this RDD's size
    *  without replacement: probability that each element is chosen; fraction must be [0, 1]
-   *  with replacement: expected number of times each element is chosen; fraction must be &gt;= 0
+   *  with replacement: expected number of times each element is chosen; fraction must be greater
+   *  than or equal to 0
    * @param seed seed for the random number generator
    *
    * @note This is NOT guaranteed to provide exactly the fraction of the count
@@ -167,7 +169,7 @@ class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: ClassTag[T])
    * Return an RDD with the elements from `this` that are not in `other`.
    *
    * Uses `this` partitioner/partition size, because even if `other` is huge, the resulting
-   * RDD will be &lt;= us.
+   * RDD will be less than or equal to us.
    */
   def subtract(other: JavaRDD[T]): JavaRDD[T] = wrapRDD(rdd.subtract(other))
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala
index f3ab324d59119..14331dfd0c987 100644
--- a/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala
@@ -152,10 +152,10 @@ class DoubleRDDFunctions(self: RDD[Double]) extends Logging with Serializable {
 
   /**
    * Compute a histogram using the provided buckets. The buckets are all open
-   * to the right except for the last which is closed
+   * to the right except for the last which is closed.
    *  e.g. for the array
    *  [1, 10, 20, 50] the buckets are [1, 10) [10, 20) [20, 50]
-   *  e.g 1<=x<10 , 10<=x<20, 20<=x<=50
+   *  e.g {@code <=x<10, 10<=x<20, 20<=x<=50}
    *  And on the input of 1 and 50 we would have a histogram of 1, 0, 1
    *
    * @note If your histogram is evenly spaced (e.g. [0, 10, 20, 30]) this can be switched
diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index 86351b8c575e5..ae4320d4583d6 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -96,7 +96,7 @@ private[spark] class HadoopPartition(rddId: Int, override val index: Int, s: Inp
  * @param minPartitions Minimum number of HadoopRDD partitions (Hadoop Splits) to generate.
  *
  * @note Instantiating this class directly is not recommended, please use
- * [[org.apache.spark.SparkContext.hadoopRDD()]]
+ * `org.apache.spark.SparkContext.hadoopRDD()`
  */
 @DeveloperApi
 class HadoopRDD[K, V](
diff --git a/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
index 0970b98071675..aab46b8954bf7 100644
--- a/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
@@ -41,7 +41,10 @@ private[spark] class JdbcPartition(idx: Int, val lower: Long, val upper: Long) e
  *   The RDD takes care of closing the connection.
  * @param sql the text of the query.
  *   The query must contain two ? placeholders for parameters used to partition the results.
- *   E.g. "select title, author from books where ? <= id and id <= ?"
+ *   For example,
+ *   {{{
+ *   select title, author from books where ? <= id and id <= ?
+ *   }}}
  * @param lowerBound the minimum value of the first placeholder
  * @param upperBound the maximum value of the second placeholder
  *   The lower and upper bounds are inclusive.
@@ -151,7 +154,10 @@ object JdbcRDD {
    *   The RDD takes care of closing the connection.
    * @param sql the text of the query.
    *   The query must contain two ? placeholders for parameters used to partition the results.
-   *   E.g. "select title, author from books where ? <= id and id <= ?"
+   *   For example,
+   *   {{{
+   *   select title, author from books where ? <= id and id <= ?
+   *   }}}
    * @param lowerBound the minimum value of the first placeholder
    * @param upperBound the maximum value of the second placeholder
    *   The lower and upper bounds are inclusive.
@@ -191,7 +197,10 @@ object JdbcRDD {
    *   The RDD takes care of closing the connection.
    * @param sql the text of the query.
    *   The query must contain two ? placeholders for parameters used to partition the results.
-   *   E.g. "select title, author from books where ? <= id and id <= ?"
+   *   For example,
+   *   {{{
+   *   select title, author from books where ? <= id and id <= ?
+   *   }}}
    * @param lowerBound the minimum value of the first placeholder
    * @param upperBound the maximum value of the second placeholder
    *   The lower and upper bounds are inclusive.
diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
index a5965f597038d..c783e1375283a 100644
--- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
@@ -63,7 +63,7 @@ private[spark] class NewHadoopPartition(
  * @param valueClass Class of the value associated with the inputFormatClass.
  *
  * @note Instantiating this class directly is not recommended, please use
- * [[org.apache.spark.SparkContext.newAPIHadoopRDD()]]
+ * `org.apache.spark.SparkContext.newAPIHadoopRDD()`
  */
 @DeveloperApi
 class NewHadoopRDD[K, V](
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index 9ed0f3d8086a5..969cd47038cfa 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -401,9 +401,9 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    * Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available
    * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
    *
-   * The relative accuracy is approximately `1.054 / sqrt(2^p)`. Setting a nonzero `sp > p`
-   * would trigger sparse representation of registers, which may reduce the memory consumption
-   * and increase accuracy when the cardinality is small.
+   * The relative accuracy is approximately `1.054 / sqrt(2^p)`. Setting a nonzero (`sp` is
+   * greater than `p`) would trigger sparse representation of registers, which may reduce the
+   * memory consumption and increase accuracy when the cardinality is small.
    *
    * @param p The precision value for the normal set.
    *          `p` must be a value between 4 and `sp` if `sp` is not zero (32 max).
@@ -494,8 +494,8 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    * each time the resulting RDD is evaluated.
    *
    * @note This operation may be very expensive. If you are grouping in order to perform an
-   * aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]]
-   * or [[PairRDDFunctions.reduceByKey]] will provide much better performance.
+   * aggregation (such as a sum or average) over each key, using `PairRDDFunctions.aggregateByKey`
+   * or `PairRDDFunctions.reduceByKey` will provide much better performance.
    *
    * @note As currently implemented, groupByKey must be able to hold all the key-value pairs for any
    * key in memory. If a key has too many values, it can result in an [[OutOfMemoryError]].
@@ -518,8 +518,8 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    * each group is not guaranteed, and may even differ each time the resulting RDD is evaluated.
    *
    * @note This operation may be very expensive. If you are grouping in order to perform an
-   * aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]]
-   * or [[PairRDDFunctions.reduceByKey]] will provide much better performance.
+   * aggregation (such as a sum or average) over each key, using `PairRDDFunctions.aggregateByKey`
+   * or `PairRDDFunctions.reduceByKey` will provide much better performance.
    *
    * @note As currently implemented, groupByKey must be able to hold all the key-value pairs for any
    * key in memory. If a key has too many values, it can result in an [[OutOfMemoryError]].
@@ -639,8 +639,8 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    * evaluated.
    *
    * @note This operation may be very expensive. If you are grouping in order to perform an
-   * aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]]
-   * or [[PairRDDFunctions.reduceByKey]] will provide much better performance.
+   * aggregation (such as a sum or average) over each key, using `PairRDDFunctions.aggregateByKey`
+   * or `PairRDDFunctions.reduceByKey` will provide much better performance.
    */
   def groupByKey(): RDD[(K, Iterable[V])] = self.withScope {
     groupByKey(defaultPartitioner(self))
@@ -910,7 +910,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
    * Return an RDD with the pairs from `this` whose keys are not in `other`.
    *
    * Uses `this` partitioner/partition size, because even if `other` is huge, the resulting
-   * RDD will be <= us.
+   * RDD will be less than or equal to us.
    */
   def subtractByKey[W: ClassTag](other: RDD[(K, W)]): RDD[(K, V)] = self.withScope {
     subtractByKey(other, self.partitioner.getOrElse(new HashPartitioner(self.partitions.length)))
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 8e673447581cf..f723fcb837f88 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -469,7 +469,8 @@ abstract class RDD[T: ClassTag](
    * @param withReplacement can elements be sampled multiple times (replaced when sampled out)
    * @param fraction expected size of the sample as a fraction of this RDD's size
    *  without replacement: probability that each element is chosen; fraction must be [0, 1]
-   *  with replacement: expected number of times each element is chosen; fraction must be &gt;= 0
+   *  with replacement: expected number of times each element is chosen; fraction must be greater
+   *  than or equal to 0
    * @param seed seed for the random number generator
    *
    * @note This is NOT guaranteed to provide exactly the fraction of the count
@@ -750,8 +751,10 @@ abstract class RDD[T: ClassTag](
    *                        print line function (like out.println()) as the 2nd parameter.
    *                        An example of pipe the RDD data of groupBy() in a streaming way,
    *                        instead of constructing a huge String to concat all the elements:
-   *                        def printRDDElement(record:(String, Seq[String]), f:String=&gt;Unit) =
-   *                          for (e &lt;- record._2) {f(e)}
+   *                        {{{
+   *                        def printRDDElement(record:(String, Seq[String]), f:String=>Unit) =
+   *                          for (e <- record._2) {f(e)}
+   *                        }}}
    * @param separateWorkingDir Use separate working directories for each task.
    * @param bufferSize Buffer size for the stdin writer for the piped process.
    * @param encoding Char encoding used for interacting (via stdin, stdout and stderr) with
@@ -1184,8 +1187,13 @@ abstract class RDD[T: ClassTag](
    *
    * @note This method should only be used if the resulting map is expected to be small, as
    * the whole thing is loaded into the driver's memory.
-   * To handle very large results, consider using rdd.map(x =&gt; (x, 1L)).reduceByKey(_ + _), which
-   * returns an RDD[T, Long] instead of a map.
+   * To handle very large results, consider using
+   *
+   * {{{
+   * rdd.map(x => (x, 1L)).reduceByKey(_ + _)
+   * }}}
+   *
+   * , which returns an RDD[T, Long] instead of a map.
    */
   def countByValue()(implicit ord: Ordering[T] = null): Map[T, Long] = withScope {
     map(value => (value, null)).countByKey()
@@ -1223,9 +1231,9 @@ abstract class RDD[T: ClassTag](
    * Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available
    * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
    *
-   * The relative accuracy is approximately `1.054 / sqrt(2^p)`. Setting a nonzero `sp &gt; p`
-   * would trigger sparse representation of registers, which may reduce the memory consumption
-   * and increase accuracy when the cardinality is small.
+   * The relative accuracy is approximately `1.054 / sqrt(2^p)`. Setting a nonzero (`sp` is greater
+   * than `p`) would trigger sparse representation of registers, which may reduce the memory
+   * consumption and increase accuracy when the cardinality is small.
    *
    * @param p The precision value for the normal set.
    *          `p` must be a value between 4 and `sp` if `sp` is not zero (32 max).
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDDCheckpointData.scala b/core/src/main/scala/org/apache/spark/rdd/RDDCheckpointData.scala
index 1070bb96b2524..6c552d4d12515 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDDCheckpointData.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDDCheckpointData.scala
@@ -23,7 +23,8 @@ import org.apache.spark.Partition
 
 /**
  * Enumeration to manage state transitions of an RDD through checkpointing
- * [ Initialized --> checkpointing in progress --> checkpointed ].
+ *
+ * [ Initialized --{@literal >} checkpointing in progress --{@literal >} checkpointed ]
  */
 private[spark] object CheckpointState extends Enumeration {
   type CheckpointState = Value
diff --git a/core/src/main/scala/org/apache/spark/rdd/coalesce-public.scala b/core/src/main/scala/org/apache/spark/rdd/coalesce-public.scala
index d8a80aa5aeb15..e00bc22aba44d 100644
--- a/core/src/main/scala/org/apache/spark/rdd/coalesce-public.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/coalesce-public.scala
@@ -35,14 +35,14 @@ trait PartitionCoalescer {
    * @param maxPartitions the maximum number of partitions to have after coalescing
    * @param parent the parent RDD whose partitions to coalesce
    * @return an array of [[PartitionGroup]]s, where each element is itself an array of
-   * [[Partition]]s and represents a partition after coalescing is performed.
+   * `Partition`s and represents a partition after coalescing is performed.
    */
   def coalesce(maxPartitions: Int, parent: RDD[_]): Array[PartitionGroup]
 }
 
 /**
  * ::DeveloperApi::
- * A group of [[Partition]]s
+ * A group of `Partition`s
  * @param prefLoc preferred location for the partition group
  */
 @DeveloperApi
diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/RpcEndpointVerifier.scala b/core/src/main/scala/org/apache/spark/rpc/netty/RpcEndpointVerifier.scala
index 99f20da2d66aa..430dcc50ba711 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/RpcEndpointVerifier.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/RpcEndpointVerifier.scala
@@ -20,7 +20,7 @@ package org.apache.spark.rpc.netty
 import org.apache.spark.rpc.{RpcCallContext, RpcEndpoint, RpcEnv}
 
 /**
- * An [[RpcEndpoint]] for remote [[RpcEnv]]s to query if an [[RpcEndpoint]] exists.
+ * An [[RpcEndpoint]] for remote [[RpcEnv]]s to query if an `RpcEndpoint` exists.
  *
  * This is used when setting up a remote endpoint reference.
  */
@@ -35,6 +35,6 @@ private[netty] class RpcEndpointVerifier(override val rpcEnv: RpcEnv, dispatcher
 private[netty] object RpcEndpointVerifier {
   val NAME = "endpoint-verifier"
 
-  /** A message used to ask the remote [[RpcEndpointVerifier]] if an [[RpcEndpoint]] exists. */
+  /** A message used to ask the remote [[RpcEndpointVerifier]] if an `RpcEndpoint` exists. */
   case class CheckExistence(name: String)
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala
index a6b032cc0084c..66ab9a52b7781 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala
@@ -153,7 +153,7 @@ object InputFormatInfo {
 
     a) For each host, count number of splits hosted on that host.
     b) Decrement the currently allocated containers on that host.
-    c) Compute rack info for each host and update rack -> count map based on (b).
+    c) Compute rack info for each host and update rack to count map based on (b).
     d) Allocate nodes based on (c)
     e) On the allocation result, ensure that we don't allocate "too many" jobs on a single node
        (even if data locality on that is very high) : this is to prevent fragility of job if a
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
index 1e7c63af2e797..d19353f2a9930 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
@@ -42,7 +42,7 @@ import org.apache.spark.rdd.RDD
  * @param outputId index of the task in this job (a job can launch tasks on only a subset of the
  *                 input RDD's partitions).
  * @param localProperties copy of thread-local properties set by the user on the driver side.
- * @param metrics a [[TaskMetrics]] that is created at driver side and sent to executor side.
+ * @param metrics a `TaskMetrics` that is created at driver side and sent to executor side.
  *
  * The parameters below are optional:
  * @param jobId id of the job this task belongs to
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
index 66d6790e168f2..31011de85bf7e 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
@@ -42,7 +42,7 @@ import org.apache.spark.shuffle.ShuffleWriter
  *                   the type should be (RDD[_], ShuffleDependency[_, _, _]).
  * @param partition partition of the RDD this task is associated with
  * @param locs preferred task execution locations for locality scheduling
- * @param metrics a [[TaskMetrics]] that is created at driver side and sent to executor side.
+ * @param metrics a `TaskMetrics` that is created at driver side and sent to executor side.
  * @param localProperties copy of thread-local properties set by the user on the driver side.
  *
  * The parameters below are optional:
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Task.scala b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
index 9385e3c31e1e4..112b08f2c03a9 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Task.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
@@ -45,7 +45,7 @@ import org.apache.spark.util._
  * @param stageId id of the stage this task belongs to
  * @param stageAttemptId attempt id of the stage this task belongs to
  * @param partitionId index of the number in the RDD
- * @param metrics a [[TaskMetrics]] that is created at driver side and sent to executor side.
+ * @param metrics a `TaskMetrics` that is created at driver side and sent to executor side.
  * @param localProperties copy of thread-local properties set by the user on the driver side.
  *
  * The parameters below are optional:
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskDescription.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskDescription.scala
index 1c7c81c488c3a..45c742cbff5e7 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskDescription.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskDescription.scala
@@ -23,7 +23,7 @@ import org.apache.spark.util.SerializableBuffer
 
 /**
  * Description of a task that gets passed onto executors to be executed, usually created by
- * [[TaskSetManager.resourceOffer]].
+ * `TaskSetManager.resourceOffer`.
  */
 private[spark] class TaskDescription(
     val taskId: Long,
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala
index 6bded92700504..d71acbb4cf771 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala
@@ -43,7 +43,7 @@ private[spark] object BlockManagerMessages {
     extends ToBlockManagerSlave
 
   /**
-   * Driver -> Executor message to trigger a thread dump.
+   * Driver to Executor message to trigger a thread dump.
    */
   case object TriggerThreadDump extends ToBlockManagerSlave
 
diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
index 4dc2f362329a0..269c12d6da444 100644
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
@@ -247,7 +247,7 @@ final class ShuffleBlockFetcherIterator(
 
   /**
    * Fetch the local blocks while we are fetching remote blocks. This is ok because
-   * [[ManagedBuffer]]'s memory is allocated lazily when we create the input stream, so all we
+   * `ManagedBuffer`'s memory is allocated lazily when we create the input stream, so all we
    * track in-memory are the ManagedBuffer references themselves.
    */
   private[this] def fetchLocalBlocks() {
@@ -423,7 +423,7 @@ object ShuffleBlockFetcherIterator {
    * @param address BlockManager that the block was fetched from.
    * @param size estimated size of the block, used to calculate bytesInFlight.
    *             Note that this is NOT the exact bytes.
-   * @param buf [[ManagedBuffer]] for the content.
+   * @param buf `ManagedBuffer` for the content.
    * @param isNetworkReqDone Is this the last network request for this host in this fetch request.
    */
   private[storage] case class SuccessFetchResult(
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index dbeb970c81dfe..d161843dd2230 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -422,7 +422,7 @@ private[spark] object UIUtils extends Logging {
    * the whole string will rendered as a simple escaped text.
    *
    * Note: In terms of security, only anchor tags with root relative links are supported. So any
-   * attempts to embed links outside Spark UI, or other tags like &lt;script&gt; will cause in
+   * attempts to embed links outside Spark UI, or other tags like {@code <script>} will cause in
    * the whole description to be treated as plain text.
    *
    * @param desc        the original job or stage description string, which may contain html tags.
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index acad2fdf733c8..ded3416299e9a 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -1674,7 +1674,7 @@ private[spark] object Utils extends Logging {
 
   /**
    * NaN-safe version of `java.lang.Double.compare()` which allows NaN values to be compared
-   * according to semantics where NaN == NaN and NaN &gt; any non-NaN double.
+   * according to semantics where NaN == NaN and NaN is greater than any non-NaN double.
    */
   def nanSafeCompareDoubles(x: Double, y: Double): Int = {
     val xIsNan: Boolean = java.lang.Double.isNaN(x)
@@ -1688,7 +1688,7 @@ private[spark] object Utils extends Logging {
 
   /**
    * NaN-safe version of `java.lang.Float.compare()` which allows NaN values to be compared
-   * according to semantics where NaN == NaN and NaN &gt; any non-NaN float.
+   * according to semantics where NaN == NaN and NaN is greater than any non-NaN float.
    */
   def nanSafeCompareFloats(x: Float, y: Float): Int = {
     val xIsNan: Boolean = java.lang.Float.isNaN(x)
@@ -2340,8 +2340,9 @@ private[spark] object Utils extends Logging {
    * A spark url (`spark://host:port`) is a special URI that its scheme is `spark` and only contains
    * host and port.
    *
-   * @note Throws `SparkException` if sparkUrl is invalid.
+   * @throws org.apache.spark.SparkException if sparkUrl is invalid.
    */
+  @throws(classOf[SparkException])
   def extractHostPortFromSparkUrl(sparkUrl: String): (String, Int) = {
     try {
       val uri = new java.net.URI(sparkUrl)
diff --git a/core/src/main/scala/org/apache/spark/util/random/SamplingUtils.scala b/core/src/main/scala/org/apache/spark/util/random/SamplingUtils.scala
index f98932a470165..297524c943e1f 100644
--- a/core/src/main/scala/org/apache/spark/util/random/SamplingUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/random/SamplingUtils.scala
@@ -67,17 +67,19 @@ private[spark] object SamplingUtils {
   }
 
   /**
-   * Returns a sampling rate that guarantees a sample of size >= sampleSizeLowerBound 99.99% of
-   * the time.
+   * Returns a sampling rate that guarantees a sample of size greater than or equal to
+   * sampleSizeLowerBound 99.99% of the time.
    *
    * How the sampling rate is determined:
+   *
    * Let p = num / total, where num is the sample size and total is the total number of
-   * datapoints in the RDD. We're trying to compute q > p such that
+   * datapoints in the RDD. We're trying to compute q {@literal >} p such that
    *   - when sampling with replacement, we're drawing each datapoint with prob_i ~ Pois(q),
-   *     where we want to guarantee Pr[s < num] < 0.0001 for s = sum(prob_i for i from 0 to total),
-   *     i.e. the failure rate of not having a sufficiently large sample < 0.0001.
+   *     where we want to guarantee
+   *     Pr[s {@literal <} num] {@literal <} 0.0001 for s = sum(prob_i for i from 0 to total),
+   *     i.e. the failure rate of not having a sufficiently large sample {@literal <} 0.0001.
    *     Setting q = p + 5 * sqrt(p/total) is sufficient to guarantee 0.9999 success rate for
-   *     num > 12, but we need a slightly larger q (9 empirically determined).
+   *     num {@literal >} 12, but we need a slightly larger q (9 empirically determined).
    *   - when sampling without replacement, we're drawing each datapoint with prob_i
    *     ~ Binomial(total, fraction) and our choice of q guarantees 1-delta, or 0.9999 success
    *     rate, where success rate is defined the same as in sampling with replacement.
@@ -108,14 +110,14 @@ private[spark] object SamplingUtils {
 private[spark] object PoissonBounds {
 
   /**
-   * Returns a lambda such that Pr[X > s] is very small, where X ~ Pois(lambda).
+   * Returns a lambda such that Pr[X {@literal >} s] is very small, where X ~ Pois(lambda).
    */
   def getLowerBound(s: Double): Double = {
     math.max(s - numStd(s) * math.sqrt(s), 1e-15)
   }
 
   /**
-   * Returns a lambda such that Pr[X < s] is very small, where X ~ Pois(lambda).
+   * Returns a lambda such that Pr[X {@literal <} s] is very small, where X ~ Pois(lambda).
    *
    * @param s sample size
    */
diff --git a/core/src/main/scala/org/apache/spark/util/random/StratifiedSamplingUtils.scala b/core/src/main/scala/org/apache/spark/util/random/StratifiedSamplingUtils.scala
index 67822749112c6..ce46fc8f201be 100644
--- a/core/src/main/scala/org/apache/spark/util/random/StratifiedSamplingUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/random/StratifiedSamplingUtils.scala
@@ -35,13 +35,14 @@ import org.apache.spark.rdd.RDD
  * high probability. This is achieved by maintaining a waitlist of size O(log(s)), where s is the
  * desired sample size for each stratum.
  *
- * Like in simple random sampling, we generate a random value for each item from the
- * uniform  distribution [0.0, 1.0]. All items with values <= min(values of items in the waitlist)
- * are accepted into the sample instantly. The threshold for instant accept is designed so that
- * s - numAccepted = O(sqrt(s)), where s is again the desired sample size. Thus, by maintaining a
- * waitlist size = O(sqrt(s)), we will be able to create a sample of the exact size s by adding
- * a portion of the waitlist to the set of items that are instantly accepted. The exact threshold
- * is computed by sorting the values in the waitlist and picking the value at (s - numAccepted).
+ * Like in simple random sampling, we generate a random value for each item from the uniform
+ * distribution [0.0, 1.0]. All items with values less than or equal to min(values of items in the
+ * waitlist) are accepted into the sample instantly. The threshold for instant accept is designed
+ * so that s - numAccepted = O(sqrt(s)), where s is again the desired sample size. Thus, by
+ * maintaining a waitlist size = O(sqrt(s)), we will be able to create a sample of the exact size
+ * s by adding a portion of the waitlist to the set of items that are instantly accepted. The exact
+ * threshold is computed by sorting the values in the waitlist and picking the value at
+ * (s - numAccepted).
  *
  * Note that since we use the same seed for the RNG when computing the thresholds and the actual
  * sample, our computed thresholds are guaranteed to produce the desired sample size.
@@ -160,12 +161,20 @@ private[spark] object StratifiedSamplingUtils extends Logging {
    *
    * To do so, we compute sampleSize = math.ceil(size * samplingRate) for each stratum and compare
    * it to the number of items that were accepted instantly and the number of items in the waitlist
-   * for that stratum. Most of the time, numAccepted <= sampleSize <= (numAccepted + numWaitlisted),
+   * for that stratum.
+   *
+   * Most of the time,
+   * {{{
+   * numAccepted <= sampleSize <= (numAccepted + numWaitlisted)
+   * }}}
    * which means we need to sort the elements in the waitlist by their associated values in order
-   * to find the value T s.t. |{elements in the stratum whose associated values <= T}| = sampleSize.
-   * Note that all elements in the waitlist have values >= bound for instant accept, so a T value
-   * in the waitlist range would allow all elements that were instantly accepted on the first pass
-   * to be included in the sample.
+   * to find the value T s.t.
+   * {{{
+   * |{elements in the stratum whose associated values <= T}| = sampleSize
+   * }}}.
+   * Note that all elements in the waitlist have values greater than or equal to bound for instant
+   * accept, so a T value in the waitlist range would allow all elements that were instantly
+   * accepted on the first pass to be included in the sample.
    */
   def computeThresholdByKey[K](finalResult: Map[K, AcceptanceResult],
       fractions: Map[K, Double]): Map[K, Double] = {
diff --git a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumePollingInputDStream.scala b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumePollingInputDStream.scala
index 54565840fa665..d84e289272c62 100644
--- a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumePollingInputDStream.scala
+++ b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumePollingInputDStream.scala
@@ -36,7 +36,7 @@ import org.apache.spark.streaming.flume.sink._
 import org.apache.spark.streaming.receiver.Receiver
 
 /**
- * A [[ReceiverInputDStream]] that can be used to read data from several Flume agents running
+ * A `ReceiverInputDStream` that can be used to read data from several Flume agents running
  * [[org.apache.spark.streaming.flume.sink.SparkSink]]s.
  * @param _ssc Streaming context that will execute this input stream
  * @param addresses List of addresses at which SparkSinks are listening
diff --git a/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala
index 35acb7b09f12b..e0e44d4440272 100644
--- a/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala
+++ b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala
@@ -231,7 +231,10 @@ class KafkaCluster(val kafkaParams: Map[String, String]) extends Serializable {
   // this 0 here indicates api version, in this case the original ZK backed api.
   private def defaultConsumerApiVersion: Short = 0
 
-  /** Requires Kafka >= 0.8.1.1.  Defaults to the original ZooKeeper backed api version. */
+  /**
+   * Requires Kafka 0.8.1.1 or later.
+   * Defaults to the original ZooKeeper backed API version.
+   */
   def getConsumerOffsets(
       groupId: String,
       topicAndPartitions: Set[TopicAndPartition]
@@ -250,7 +253,10 @@ class KafkaCluster(val kafkaParams: Map[String, String]) extends Serializable {
     }
   }
 
-  /** Requires Kafka >= 0.8.1.1.  Defaults to the original ZooKeeper backed api version. */
+  /**
+   * Requires Kafka 0.8.1.1 or later.
+   * Defaults to the original ZooKeeper backed API version.
+   */
   def getConsumerOffsetMetadata(
       groupId: String,
       topicAndPartitions: Set[TopicAndPartition]
@@ -287,7 +293,10 @@ class KafkaCluster(val kafkaParams: Map[String, String]) extends Serializable {
     Left(errs)
   }
 
-  /** Requires Kafka >= 0.8.1.1.  Defaults to the original ZooKeeper backed api version. */
+  /**
+   * Requires Kafka 0.8.1.1 or later.
+   * Defaults to the original ZooKeeper backed API version.
+   */
   def setConsumerOffsets(
       groupId: String,
       offsets: Map[TopicAndPartition, Long]
@@ -305,7 +314,10 @@ class KafkaCluster(val kafkaParams: Map[String, String]) extends Serializable {
     setConsumerOffsetMetadata(groupId, meta, consumerApiVersion)
   }
 
-  /** Requires Kafka >= 0.8.1.1.  Defaults to the original ZooKeeper backed api version. */
+  /**
+   * Requires Kafka 0.8.1.1 or later.
+   * Defaults to the original ZooKeeper backed API version.
+   */
   def setConsumerOffsetMetadata(
       groupId: String,
       metadata: Map[TopicAndPartition, OffsetAndMetadata]
diff --git a/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala
index 3713bda41b8ee..7ff3a98ca52cd 100644
--- a/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala
+++ b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala
@@ -38,7 +38,7 @@ import org.apache.spark.util.ThreadUtils
  *
  * @param kafkaParams Map of kafka configuration parameters.
  *                    See: http://kafka.apache.org/configuration.html
- * @param topics Map of (topic_name -> numPartitions) to consume. Each partition is consumed
+ * @param topics Map of (topic_name to numPartitions) to consume. Each partition is consumed
  * in its own thread.
  * @param storageLevel RDD storage level.
  */
diff --git a/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala
index 56f0cb0b166a2..d5aef8184fc87 100644
--- a/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala
+++ b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala
@@ -47,7 +47,7 @@ object KafkaUtils {
    * @param ssc       StreamingContext object
    * @param zkQuorum  Zookeeper quorum (hostname:port,hostname:port,..)
    * @param groupId   The group id for this consumer
-   * @param topics    Map of (topic_name -> numPartitions) to consume. Each partition is consumed
+   * @param topics    Map of (topic_name to numPartitions) to consume. Each partition is consumed
    *                  in its own thread
    * @param storageLevel  Storage level to use for storing the received objects
    *                      (default: StorageLevel.MEMORY_AND_DISK_SER_2)
@@ -72,7 +72,7 @@ object KafkaUtils {
    * @param ssc         StreamingContext object
    * @param kafkaParams Map of kafka configuration parameters,
    *                    see http://kafka.apache.org/08/configuration.html
-   * @param topics      Map of (topic_name -> numPartitions) to consume. Each partition is consumed
+   * @param topics      Map of (topic_name to numPartitions) to consume. Each partition is consumed
    *                    in its own thread.
    * @param storageLevel Storage level to use for storing the received objects
    * @tparam K type of Kafka message key
@@ -97,7 +97,7 @@ object KafkaUtils {
    * @param jssc      JavaStreamingContext object
    * @param zkQuorum  Zookeeper quorum (hostname:port,hostname:port,..)
    * @param groupId   The group id for this consumer
-   * @param topics    Map of (topic_name -> numPartitions) to consume. Each partition is consumed
+   * @param topics    Map of (topic_name to numPartitions) to consume. Each partition is consumed
    *                  in its own thread
    * @return DStream of (Kafka message key, Kafka message value)
    */
@@ -115,7 +115,7 @@ object KafkaUtils {
    * @param jssc      JavaStreamingContext object
    * @param zkQuorum  Zookeeper quorum (hostname:port,hostname:port,..).
    * @param groupId   The group id for this consumer.
-   * @param topics    Map of (topic_name -> numPartitions) to consume. Each partition is consumed
+   * @param topics    Map of (topic_name to numPartitions) to consume. Each partition is consumed
    *                  in its own thread.
    * @param storageLevel RDD storage level.
    * @return DStream of (Kafka message key, Kafka message value)
@@ -140,7 +140,7 @@ object KafkaUtils {
    * @param valueDecoderClass Type of kafka value decoder
    * @param kafkaParams Map of kafka configuration parameters,
    *                    see http://kafka.apache.org/08/configuration.html
-   * @param topics  Map of (topic_name -> numPartitions) to consume. Each partition is consumed
+   * @param topics  Map of (topic_name to numPartitions) to consume. Each partition is consumed
    *                in its own thread
    * @param storageLevel RDD storage level.
    * @tparam K type of Kafka message key
@@ -396,7 +396,7 @@ object KafkaUtils {
    *    You can access the offsets used in each batch from the generated RDDs (see
    *    [[org.apache.spark.streaming.kafka.HasOffsetRanges]]).
    *  - Failure Recovery: To recover from driver failures, you have to enable checkpointing
-   *    in the [[StreamingContext]]. The information on consumed offset can be
+   *    in the `StreamingContext`. The information on consumed offset can be
    *    recovered from the checkpoint. See the programming guide for details (constraints, etc.).
    *  - End-to-end semantics: This stream ensures that every records is effectively received and
    *    transformed exactly once, but gives no guarantees on whether the transformed data are
@@ -448,7 +448,7 @@ object KafkaUtils {
    *    You can access the offsets used in each batch from the generated RDDs (see
    *    [[org.apache.spark.streaming.kafka.HasOffsetRanges]]).
    *  - Failure Recovery: To recover from driver failures, you have to enable checkpointing
-   *    in the [[StreamingContext]]. The information on consumed offset can be
+   *    in the `StreamingContext`. The information on consumed offset can be
    *    recovered from the checkpoint. See the programming guide for details (constraints, etc.).
    *  - End-to-end semantics: This stream ensures that every records is effectively received and
    *    transformed exactly once, but gives no guarantees on whether the transformed data are
@@ -499,7 +499,7 @@ object KafkaUtils {
    *    You can access the offsets used in each batch from the generated RDDs (see
    *    [[org.apache.spark.streaming.kafka.HasOffsetRanges]]).
    *  - Failure Recovery: To recover from driver failures, you have to enable checkpointing
-   *    in the [[StreamingContext]]. The information on consumed offset can be
+   *    in the `StreamingContext`. The information on consumed offset can be
    *    recovered from the checkpoint. See the programming guide for details (constraints, etc.).
    *  - End-to-end semantics: This stream ensures that every records is effectively received and
    *    transformed exactly once, but gives no guarantees on whether the transformed data are
@@ -565,7 +565,7 @@ object KafkaUtils {
    *    You can access the offsets used in each batch from the generated RDDs (see
    *    [[org.apache.spark.streaming.kafka.HasOffsetRanges]]).
    *  - Failure Recovery: To recover from driver failures, you have to enable checkpointing
-   *    in the [[StreamingContext]]. The information on consumed offset can be
+   *    in the `StreamingContext`. The information on consumed offset can be
    *    recovered from the checkpoint. See the programming guide for details (constraints, etc.).
    *  - End-to-end semantics: This stream ensures that every records is effectively received and
    *    transformed exactly once, but gives no guarantees on whether the transformed data are
diff --git a/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/OffsetRange.scala b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/OffsetRange.scala
index d9b856e4697a0..10d364f987405 100644
--- a/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/OffsetRange.scala
+++ b/external/kafka-0-8/src/main/scala/org/apache/spark/streaming/kafka/OffsetRange.scala
@@ -22,7 +22,7 @@ import kafka.common.TopicAndPartition
 /**
  * Represents any object that has a collection of [[OffsetRange]]s. This can be used to access the
  * offset ranges in RDDs generated by the direct Kafka DStream (see
- * [[KafkaUtils.createDirectStream()]]).
+ * `KafkaUtils.createDirectStream()`).
  * {{{
  *   KafkaUtils.createDirectStream(...).foreachRDD { rdd =>
  *      val offsetRanges = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
index add21f41ea3ba..f665727ef90db 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
@@ -32,7 +32,7 @@ object GraphLoader extends Logging {
    * id and a target id. Skips lines that begin with `#`.
    *
    * If desired the edges can be automatically oriented in the positive
-   * direction (source Id &lt; target Id) by setting `canonicalOrientation` to
+   * direction (source Id is less than target Id) by setting `canonicalOrientation` to
    * true.
    *
    * @example Loads a file in the following format:
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBase.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBase.scala
index 8d608c99b1a1d..8da46db98be81 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBase.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBase.scala
@@ -57,7 +57,7 @@ private[graphx] object VertexPartitionBase {
  * concrete implementation. [[VertexPartitionBaseOps]] provides a variety of operations for
  * VertexPartitionBase and subclasses that provide implicit evidence of membership in the
  * `VertexPartitionBaseOpsConstructor` typeclass (for example,
- * [[VertexPartition.VertexPartitionOpsConstructor]]).
+ * `VertexPartition.VertexPartitionOpsConstructor`).
  */
 private[graphx] abstract class VertexPartitionBase[@specialized(Long, Int, Double) VD: ClassTag]
   extends Serializable {
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBaseOps.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBaseOps.scala
index 43594573cf013..a8ed59b09bbb7 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBaseOps.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBaseOps.scala
@@ -29,7 +29,7 @@ import org.apache.spark.util.collection.BitSet
 /**
  * A class containing additional operations for subclasses of VertexPartitionBase that provide
  * implicit evidence of membership in the `VertexPartitionBaseOpsConstructor` typeclass (for
- * example, [[VertexPartition.VertexPartitionOpsConstructor]]).
+ * example, `VertexPartition.VertexPartitionOpsConstructor`).
  */
 private[graphx] abstract class VertexPartitionBaseOps
     [VD: ClassTag, Self[X] <: VertexPartitionBase[X]: VertexPartitionBaseOpsConstructor]
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
index 21b22968a1a69..2715137d19ebc 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
@@ -36,7 +36,7 @@ import org.apache.spark.graphx._
  * self cycles and canonicalizes the graph to ensure that the following conditions hold:
  * <ul>
  * <li> There are no self edges</li>
- * <li> All edges are oriented src &gt; dst</li>
+ * <li> All edges are oriented (src is greater than dst)</li>
  * <li> There are no duplicate edges</li>
  * </ul>
  * However, the canonicalization procedure is costly as it requires repartitioning the graph.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 41b84f481633c..ec582266e6a47 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -56,8 +56,8 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
   /**
    * Set threshold in binary classification, in range [0, 1].
    *
-   * If the estimated probability of class label 1 is &gt; threshold, then predict 1, else 0.
-   * A high threshold encourages the model to predict 0 more often;
+   * If the estimated probability of class label 1 is greater than threshold, then predict 1,
+   * else 0. A high threshold encourages the model to predict 0 more often;
    * a low threshold encourages the model to predict 1 more often.
    *
    * Note: Calling this with threshold p is equivalent to calling `setThresholds(Array(1-p, p))`.
@@ -123,7 +123,7 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
   /**
    * Set thresholds in multiclass (or binary) classification to adjust the probability of
    * predicting each class. Array must have length equal to the number of classes,
-   * with values &gt; 0, excepting that at most one value may be 0.
+   * with values greater than 0, excepting that at most one value may be 0.
    * The class with largest value p/t is predicted, where p is the original probability of that
    * class and t is the class's threshold.
    *
@@ -210,8 +210,9 @@ class LogisticRegression @Since("1.2.0") (
 
   /**
    * Set the ElasticNet mixing parameter.
-   * For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.
-   * For 0 &lt; alpha &lt; 1, the penalty is a combination of L1 and L2.
+   * For alpha = 0, the penalty is an L2 penalty.
+   * For alpha = 1, it is an L1 penalty.
+   * For alpha in (0,1), the penalty is a combination of L1 and L2.
    * Default is 0.0 which is an L2 penalty.
    *
    * @group setParam
@@ -298,7 +299,7 @@ class LogisticRegression @Since("1.2.0") (
   override def getThresholds: Array[Double] = super.getThresholds
 
   /**
-   * Suggested depth for treeAggregate (&gt;= 2).
+   * Suggested depth for treeAggregate (greater than or equal to 2).
    * If the dimensions of features or the number of partitions are large,
    * this param could be adjusted to a larger size.
    * Default is 2.
@@ -1517,7 +1518,7 @@ private class LogisticAggregator(
     }
 
     /**
-     * When maxMargin &gt; 0, the original formula could cause overflow.
+     * When maxMargin is greater than 0, the original formula could cause overflow.
      * We address this by subtracting maxMargin from all the margins, so it's guaranteed
      * that all of the new margins will be smaller than zero to prevent arithmetic overflow.
      */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
index c7a170ddc7351..e58df6ba9108a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
@@ -55,8 +55,8 @@ private[clustering] trait BisectingKMeansParams extends Params
   def getK: Int = $(k)
 
   /**
-   * The minimum number of points (if &gt;= 1.0) or the minimum proportion
-   * of points (if &lt; 1.0) of a divisible cluster (default: 1.0).
+   * The minimum number of points (if greater than or equal to 1.0) or the minimum proportion
+   * of points (if less than 1.0) of a divisible cluster (default: 1.0).
    * @group expertParam
    */
   @Since("2.0.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
index 74109344aac08..c764c3aa32a4c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
@@ -44,7 +44,7 @@ private[clustering] trait GaussianMixtureParams extends Params with HasMaxIter w
   with HasSeed with HasPredictionCol with HasProbabilityCol with HasTol {
 
   /**
-   * Number of independent Gaussians in the mixture model. Must be &gt; 1. Default: 2.
+   * Number of independent Gaussians in the mixture model. Must be greater than 1. Default: 2.
    * @group param
    */
   @Since("2.0.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
index 6032ab3db9350..cd403d842b694 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
@@ -78,11 +78,11 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
    *  - EM
    *     - Currently only supports symmetric distributions, so all values in the vector should be
    *       the same.
-   *     - Values should be &gt; 1.0
+   *     - Values should be greater than 1.0
    *     - default = uniformly (50 / k) + 1, where 50/k is common in LDA libraries and +1 follows
    *       from Asuncion et al. (2009), who recommend a +1 adjustment for EM.
    *  - Online
-   *     - Values should be &gt;= 0
+   *     - Values should be greater than or equal to 0
    *     - default = uniformly (1.0 / k), following the implementation from
    *       <a href="https://github.com/Blei-Lab/onlineldavb">here</a>.
    *
@@ -120,11 +120,11 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
    *
    * Optimizer-specific parameter settings:
    *  - EM
-   *     - Value should be &gt; 1.0
+   *     - Value should be greater than 1.0
    *     - default = 0.1 + 1, where 0.1 gives a small amount of smoothing and +1 follows
    *       Asuncion et al. (2009), who recommend a +1 adjustment for EM.
    *  - Online
-   *     - Value should be &gt;= 0
+   *     - Value should be greater than or equal to 0
    *     - default = (1.0 / k), following the implementation from
    *       <a href="https://github.com/Blei-Lab/onlineldavb">here</a>.
    *
@@ -247,7 +247,7 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
    *
    * Note that this should be adjusted in synch with `LDA.maxIter`
    * so the entire corpus is used.  Specifically, set both so that
-   * maxIterations * miniBatchFraction &gt;= 1.
+   * maxIterations * miniBatchFraction greater than or equal to 1.
    *
    * Note: This is the same as the `miniBatchFraction` parameter in
    *       [[org.apache.spark.mllib.clustering.OnlineLDAOptimizer]].
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
index 1143f0f565ebd..260159f8b7ac4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
@@ -44,7 +44,7 @@ final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String
   /**
    * Parameter for mapping continuous features into buckets. With n+1 splits, there are n buckets.
    * A bucket defined by splits x,y holds values in the range [x,y) except the last bucket, which
-   * also includes y. Splits should be of length >= 3 and strictly increasing.
+   * also includes y. Splits should be of length greater than or equal to 3 and strictly increasing.
    * Values at -inf, inf must be explicitly provided to cover all Double values;
    * otherwise, values outside the splits specified will be treated as errors.
    *
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
index 6299f74a6bf96..1ebe29703bc47 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
@@ -53,8 +53,9 @@ private[feature] trait CountVectorizerParams extends Params with HasInputCol wit
   /**
    * Specifies the minimum number of different documents a term must appear in to be included
    * in the vocabulary.
-   * If this is an integer >= 1, this specifies the number of documents the term must appear in;
-   * if this is a double in [0,1), then this specifies the fraction of documents.
+   * If this is an integer greater than or equal to 1, this specifies the number of documents
+   * the term must appear in; if this is a double in [0,1), then this specifies the fraction of
+   * documents.
    *
    * Default: 1.0
    * @group param
@@ -78,8 +79,8 @@ private[feature] trait CountVectorizerParams extends Params with HasInputCol wit
   /**
    * Filter to ignore rare words in a document. For each document, terms with
    * frequency/count less than the given threshold are ignored.
-   * If this is an integer >= 1, then this specifies a count (of times the term must appear
-   * in the document);
+   * If this is an integer greater than or equal to 1, then this specifies a count (of times the
+   * term must appear in the document);
    * if this is a double in [0,1), then this specifies a fraction (out of the document's token
    * count).
    *
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
index a8792a35ff4ae..db432b6fefaff 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
@@ -52,7 +52,7 @@ class HashingTF @Since("1.4.0") (@Since("1.4.0") override val uid: String)
   def setOutputCol(value: String): this.type = set(outputCol, value)
 
   /**
-   * Number of features.  Should be > 0.
+   * Number of features. Should be greater than 0.
    * (default = 2^18^)
    * @group param
    */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala
index 4463aea0097e2..c8760f9dc178f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala
@@ -41,7 +41,7 @@ class NGram @Since("1.5.0") (@Since("1.5.0") override val uid: String)
   def this() = this(Identifiable.randomUID("ngram"))
 
   /**
-   * Minimum n-gram length, >= 1.
+   * Minimum n-gram length, greater than or equal to 1.
    * Default: 2, bigram features
    * @group param
    */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala
index eb0690058013f..6e96545c8cb7a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala
@@ -37,7 +37,7 @@ class Normalizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
   def this() = this(Identifiable.randomUID("normalizer"))
 
   /**
-   * Normalization in L^p^ space.  Must be >= 1.
+   * Normalization in L^p^ space. Must be greater than equal to 1.
    * (default: p = 2)
    * @group param
    */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
index ea401216aec7b..ba1380bdda451 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
@@ -33,14 +33,14 @@ import org.apache.spark.sql.types.{DoubleType, NumericType, StructType}
  * at most a single one-value per row that indicates the input category index.
  * For example with 5 categories, an input value of 2.0 would map to an output vector of
  * `[0.0, 0.0, 1.0, 0.0]`.
- * The last category is not included by default (configurable via [[OneHotEncoder!.dropLast]]
+ * The last category is not included by default (configurable via `OneHotEncoder!.dropLast`
  * because it makes the vector entries sum up to one, and hence linearly dependent.
  * So an input value of 4.0 maps to `[0.0, 0.0, 0.0, 0.0]`.
  *
  * @note This is different from scikit-learn's OneHotEncoder, which keeps all categories.
  * The output vectors are sparse.
  *
- * @see [[StringIndexer]] for converting categorical values into category indices
+ * @see `StringIndexer` for converting categorical values into category indices
  */
 @Since("1.4.0")
 class OneHotEncoder @Since("1.4.0") (@Since("1.4.0") override val uid: String) extends Transformer
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
index 6e08bf059124c..4143d864d7930 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
@@ -63,7 +63,7 @@ private[feature] trait PCAParams extends Params with HasInputCol with HasOutputC
 }
 
 /**
- * PCA trains a model to project vectors to a lower dimensional space of the top [[PCA!.k]]
+ * PCA trains a model to project vectors to a lower dimensional space of the top `PCA!.k`
  * principal components.
  */
 @Since("1.5.0")
@@ -144,7 +144,7 @@ class PCAModel private[ml] (
    * Transform a vector by computed Principal Components.
    *
    * @note Vectors to be transformed must be the same length as the source vectors given
-   * to [[PCA.fit()]].
+   * to `PCA.fit()`.
    */
   @Since("2.0.0")
   override def transform(dataset: Dataset[_]): DataFrame = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
index 4be17da3e9f76..292f9496a456c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
@@ -45,7 +45,8 @@ class PolynomialExpansion @Since("1.4.0") (@Since("1.4.0") override val uid: Str
   def this() = this(Identifiable.randomUID("poly"))
 
   /**
-   * The polynomial degree to expand, which should be >= 1.  A value of 1 means no expansion.
+   * The polynomial degree to expand, which should be greater than equal to 1. A value of 1 means
+   * no expansion.
    * Default: 2
    * @group param
    */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
index b9e01dde70d85..d8f33cd768dcd 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
@@ -35,7 +35,7 @@ private[feature] trait QuantileDiscretizerBase extends Params
 
   /**
    * Number of buckets (quantiles, or categories) into which data points are grouped. Must
-   * be >= 2.
+   * be greater than or equal to 2.
    *
    * See also [[handleInvalid]], which can optionally create an additional bucket for NaN values.
    *
@@ -52,7 +52,7 @@ private[feature] trait QuantileDiscretizerBase extends Params
 
   /**
    * Relative error (see documentation for
-   * [[org.apache.spark.sql.DataFrameStatFunctions.approxQuantile approxQuantile]] for description)
+   * `org.apache.spark.sql.DataFrameStatFunctions.approxQuantile` for description)
    * Must be in the range [0, 1].
    * default: 0.001
    * @group param
@@ -99,7 +99,7 @@ private[feature] trait QuantileDiscretizerBase extends Params
  * but NaNs will be counted in a special bucket[4].
  *
  * Algorithm: The bin ranges are chosen using an approximate algorithm (see the documentation for
- * [[org.apache.spark.sql.DataFrameStatFunctions.approxQuantile approxQuantile]]
+ * `org.apache.spark.sql.DataFrameStatFunctions.approxQuantile`
  * for a detailed description). The precision of the approximation can be controlled with the
  * `relativeError` parameter. The lower and upper bin bounds will be `-Infinity` and `+Infinity`,
  * covering all real values.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala
index b25fff973c441..65db06c0d6085 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala
@@ -32,9 +32,11 @@ import org.apache.spark.sql.types.StructType
  * the output, it can be any select clause that Spark SQL supports. Users can also
  * use Spark SQL built-in function and UDFs to operate on these selected columns.
  * For example, [[SQLTransformer]] supports statements like:
- *  - SELECT a, a + b AS a_b FROM __THIS__
- *  - SELECT a, SQRT(b) AS b_sqrt FROM __THIS__ where a > 5
- *  - SELECT a, b, SUM(c) AS c_sum FROM __THIS__ GROUP BY a, b
+ * {{{
+ *  SELECT a, a + b AS a_b FROM __THIS__
+ *  SELECT a, SQRT(b) AS b_sqrt FROM __THIS__ where a > 5
+ *  SELECT a, b, SUM(c) AS c_sum FROM __THIS__ GROUP BY a, b
+ * }}}
  */
 @Since("1.6.0")
 class SQLTransformer @Since("1.6.0") (@Since("1.6.0") override val uid: String) extends Transformer
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
index a55816249c74b..3fcd84c029e61 100755
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
@@ -52,7 +52,7 @@ class StopWordsRemover @Since("1.5.0") (@Since("1.5.0") override val uid: String
   /**
    * The words to be filtered out.
    * Default: English stop words
-   * @see [[StopWordsRemover.loadDefaultStopWords()]]
+   * @see `StopWordsRemover.loadDefaultStopWords()`
    * @group param
    */
   @Since("1.5.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
index 8b155f00017cf..0a4d31d1654e7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
@@ -60,7 +60,7 @@ private[feature] trait StringIndexerBase extends Params with HasInputCol with Ha
  * The indices are in [0, numLabels), ordered by label frequencies.
  * So the most frequent label gets index 0.
  *
- * @see [[IndexToString]] for the inverse transformation
+ * @see `IndexToString` for the inverse transformation
  */
 @Since("1.4.0")
 class StringIndexer @Since("1.4.0") (
@@ -116,7 +116,7 @@ object StringIndexer extends DefaultParamsReadable[StringIndexer] {
  * @param labels  Ordered list of labels, corresponding to indices to be assigned.
  *
  * @note During transformation, if the input column does not exist,
- * [[StringIndexerModel.transform]] would return the input dataset unmodified.
+ * `StringIndexerModel.transform` would return the input dataset unmodified.
  * This is a temporary fix for the case when target labels do not exist during prediction.
  */
 @Since("1.4.0")
@@ -247,12 +247,12 @@ object StringIndexerModel extends MLReadable[StringIndexerModel] {
 }
 
 /**
- * A [[Transformer]] that maps a column of indices back to a new column of corresponding
+ * A `Transformer` that maps a column of indices back to a new column of corresponding
  * string values.
  * The index-string mapping is either from the ML attributes of the input column,
  * or from user-supplied labels (which take precedence over ML attributes).
  *
- * @see [[StringIndexer]] for converting strings into indices
+ * @see `StringIndexer` for converting strings into indices
  */
 @Since("1.5.0")
 class IndexToString private[ml] (@Since("1.5.0") override val uid: String)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
index 45d8fa94a8f8f..cfaf6c0e610b3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
@@ -70,7 +70,7 @@ class RegexTokenizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
   def this() = this(Identifiable.randomUID("regexTok"))
 
   /**
-   * Minimum token length, >= 0.
+   * Minimum token length, greater than or equal to 0.
    * Default: 1, to avoid returning empty strings
    * @group param
    */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
index d1a5c2e82581e..d371da762c55d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
@@ -41,8 +41,8 @@ private[ml] trait VectorIndexerParams extends Params with HasInputCol with HasOu
 
   /**
    * Threshold for the number of values a categorical feature can take.
-   * If a feature is found to have > maxCategories values, then it is declared continuous.
-   * Must be >= 2.
+   * If a feature is found to have {@literal >} maxCategories values, then it is declared
+   * continuous. Must be greater than or equal to 2.
    *
    * (default = 20)
    * @group param
@@ -59,7 +59,7 @@ private[ml] trait VectorIndexerParams extends Params with HasInputCol with HasOu
 }
 
 /**
- * Class for indexing categorical feature columns in a dataset of [[Vector]].
+ * Class for indexing categorical feature columns in a dataset of `Vector`.
  *
  * This has 2 usage modes:
  *  - Automatically identify categorical features (default behavior)
@@ -76,7 +76,8 @@ private[ml] trait VectorIndexerParams extends Params with HasInputCol with HasOu
  *     - Warning: This can cause problems if features are continuous since this will collect ALL
  *       unique values to the driver.
  *     - E.g.: Feature 0 has unique values {-1.0, 0.0}, and feature 1 values {1.0, 3.0, 5.0}.
- *       If maxCategories >= 3, then both features will be declared categorical.
+ *       If maxCategories is greater than or equal to 3, then both features will be declared
+ *       categorical.
  *
  * This returns a model which can transform categorical features to use 0-based indices.
  *
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala
index 966ccb85d0e0e..e3e462d07e10c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala
@@ -32,8 +32,8 @@ import org.apache.spark.sql.types.StructType
  * This class takes a feature vector and outputs a new feature vector with a subarray of the
  * original features.
  *
- * The subset of features can be specified with either indices ([[setIndices()]])
- * or names ([[setNames()]]).  At least one feature must be selected. Duplicate features
+ * The subset of features can be specified with either indices (`setIndices()`)
+ * or names (`setNames()`). At least one feature must be selected. Duplicate features
  * are not allowed, so there can be no overlap between selected indices and names.
  *
  * The output vector will order features with the selected indices first (in the order given),
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/package-info.java b/mllib/src/main/scala/org/apache/spark/ml/feature/package-info.java
index dcff4245d1d26..ce7f335056872 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/package-info.java
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/package-info.java
@@ -61,12 +61,12 @@
  *      createStructField("id", IntegerType, false),
  *      createStructField("text", StringType, false),
  *      createStructField("rating", DoubleType, false)));
- *  JavaRDD<Row> rowRDD = jsc.parallelize(
+ *  JavaRDD&lt;Row&gt; rowRDD = jsc.parallelize(
  *    Arrays.asList(
  *      RowFactory.create(0, "Hi I heard about Spark", 3.0),
  *      RowFactory.create(1, "I wish Java could use case classes", 4.0),
  *      RowFactory.create(2, "Logistic regression models are neat", 4.0)));
- *  Dataset<Row> dataset = jsql.createDataFrame(rowRDD, schema);
+ *  Dataset&lt;Row&gt; dataset = jsql.createDataFrame(rowRDD, schema);
  *  // define feature transformers
  *  RegexTokenizer tok = new RegexTokenizer()
  *    .setInputCol("text")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
index 5bd8ebe0987a9..9adb0fa618f29 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
@@ -87,7 +87,7 @@ class Param[T](val parent: String, val name: String, val doc: String, val isVali
   def ->(value: T): ParamPair[T] = ParamPair(this, value)
   // scalastyle:on
 
-  /** Encodes a param value into JSON, which can be decoded by [[jsonDecode()]]. */
+  /** Encodes a param value into JSON, which can be decoded by `jsonDecode()`. */
   def jsonEncode(value: T): String = {
     value match {
       case x: String =>
@@ -140,7 +140,7 @@ private[ml] object Param {
 
 /**
  * :: DeveloperApi ::
- * Factory methods for common validation functions for [[Param.isValid]].
+ * Factory methods for common validation functions for `Param.isValid`.
  * The numerical methods only support Int, Long, Float, and Double.
  */
 @DeveloperApi
@@ -165,32 +165,39 @@ object ParamValidators {
         s" of unexpected input type: ${value.getClass}")
   }
 
-  /** Check if value > lowerBound */
+  /**
+   * Check if value is greater than lowerBound
+   */
   def gt[T](lowerBound: Double): T => Boolean = { (value: T) =>
     getDouble(value) > lowerBound
   }
 
-  /** Check if value >= lowerBound */
+  /**
+   * Check if value is greater than or equal to lowerBound
+   */
   def gtEq[T](lowerBound: Double): T => Boolean = { (value: T) =>
     getDouble(value) >= lowerBound
   }
 
-  /** Check if value < upperBound */
+  /**
+   * Check if value is less than upperBound
+   */
   def lt[T](upperBound: Double): T => Boolean = { (value: T) =>
     getDouble(value) < upperBound
   }
 
-  /** Check if value <= upperBound */
+  /**
+   * Check if value is less than or equal to upperBound
+   */
   def ltEq[T](upperBound: Double): T => Boolean = { (value: T) =>
     getDouble(value) <= upperBound
   }
 
   /**
    * Check for value in range lowerBound to upperBound.
-   * @param lowerInclusive  If true, check for value >= lowerBound.
-   *                        If false, check for value > lowerBound.
-   * @param upperInclusive  If true, check for value <= upperBound.
-   *                        If false, check for value < upperBound.
+   *
+   * @param lowerInclusive if true, range includes value = lowerBound
+   * @param upperInclusive if true, range includes value = upperBound
    */
   def inRange[T](
       lowerBound: Double,
@@ -203,7 +210,7 @@ object ParamValidators {
     lowerValid && upperValid
   }
 
-  /** Version of [[inRange()]] which uses inclusive be default: [lowerBound, upperBound] */
+  /** Version of `inRange()` which uses inclusive be default: [lowerBound, upperBound] */
   def inRange[T](lowerBound: Double, upperBound: Double): T => Boolean = {
     inRange[T](lowerBound, upperBound, lowerInclusive = true, upperInclusive = true)
   }
@@ -228,7 +235,7 @@ object ParamValidators {
 
 /**
  * :: DeveloperApi ::
- * Specialized version of [[Param[Double]]] for Java.
+ * Specialized version of `Param[Double]` for Java.
  */
 @DeveloperApi
 class DoubleParam(parent: String, name: String, doc: String, isValid: Double => Boolean)
@@ -288,7 +295,7 @@ private[param] object DoubleParam {
 
 /**
  * :: DeveloperApi ::
- * Specialized version of [[Param[Int]]] for Java.
+ * Specialized version of `Param[Int]` for Java.
  */
 @DeveloperApi
 class IntParam(parent: String, name: String, doc: String, isValid: Int => Boolean)
@@ -317,7 +324,7 @@ class IntParam(parent: String, name: String, doc: String, isValid: Int => Boolea
 
 /**
  * :: DeveloperApi ::
- * Specialized version of [[Param[Float]]] for Java.
+ * Specialized version of `Param[Float]` for Java.
  */
 @DeveloperApi
 class FloatParam(parent: String, name: String, doc: String, isValid: Float => Boolean)
@@ -378,7 +385,7 @@ private object FloatParam {
 
 /**
  * :: DeveloperApi ::
- * Specialized version of [[Param[Long]]] for Java.
+ * Specialized version of `Param[Long]` for Java.
  */
 @DeveloperApi
 class LongParam(parent: String, name: String, doc: String, isValid: Long => Boolean)
@@ -407,7 +414,7 @@ class LongParam(parent: String, name: String, doc: String, isValid: Long => Bool
 
 /**
  * :: DeveloperApi ::
- * Specialized version of [[Param[Boolean]]] for Java.
+ * Specialized version of `Param[Boolean]` for Java.
  */
 @DeveloperApi
 class BooleanParam(parent: String, name: String, doc: String) // No need for isValid
@@ -430,7 +437,7 @@ class BooleanParam(parent: String, name: String, doc: String) // No need for isV
 
 /**
  * :: DeveloperApi ::
- * Specialized version of [[Param[Array[String]]]] for Java.
+ * Specialized version of `Param[Array[String]]` for Java.
  */
 @DeveloperApi
 class StringArrayParam(parent: Params, name: String, doc: String, isValid: Array[String] => Boolean)
@@ -455,7 +462,7 @@ class StringArrayParam(parent: Params, name: String, doc: String, isValid: Array
 
 /**
  * :: DeveloperApi ::
- * Specialized version of [[Param[Array[Double]]]] for Java.
+ * Specialized version of `Param[Array[Double]]` for Java.
  */
 @DeveloperApi
 class DoubleArrayParam(parent: Params, name: String, doc: String, isValid: Array[Double] => Boolean)
@@ -485,7 +492,7 @@ class DoubleArrayParam(parent: Params, name: String, doc: String, isValid: Array
 
 /**
  * :: DeveloperApi ::
- * Specialized version of [[Param[Array[Int]]]] for Java.
+ * Specialized version of `Param[Array[Int]]` for Java.
  */
 @DeveloperApi
 class IntArrayParam(parent: Params, name: String, doc: String, isValid: Array[Int] => Boolean)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index 02e2384afe530..4e636dbd9f5fc 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -355,8 +355,8 @@ object ALSModel extends MLReadable[ALSModel] {
  *
  * Essentially instead of finding the low-rank approximations to the rating matrix `R`,
  * this finds the approximations for a preference matrix `P` where the elements of `P` are 1 if
- * r &gt; 0 and 0 if r &lt;= 0. The ratings then act as 'confidence' values related to strength of
- * indicated user
+ * r is greater than 0 and 0 if r is less than or equal to 0. The ratings then act as 'confidence'
+ * values related to strength of indicated user
  * preferences rather than explicit ratings given to items.
  */
 @Since("1.3.0")
@@ -877,7 +877,7 @@ object ALS extends DefaultParamsReadable[ALS] with Logging {
   }
 
   /**
-   * Builder for [[RatingBlock]]. [[mutable.ArrayBuilder]] is used to avoid boxing/unboxing.
+   * Builder for [[RatingBlock]]. `mutable.ArrayBuilder` is used to avoid boxing/unboxing.
    */
   private[recommendation] class RatingBlockBuilder[@specialized(Int, Long) ID: ClassTag]
     extends Serializable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
index d6ad1ea6d1096..af68e7b9d5809 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
@@ -185,7 +185,7 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S
   setDefault(tol -> 1E-6)
 
   /**
-   * Suggested depth for treeAggregate (>= 2).
+   * Suggested depth for treeAggregate (greater than or equal to 2).
    * If the dimensions of features or the number of partitions are large,
    * this param could be adjusted to a larger size.
    * Default is 2.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
index 894b6a2ca2041..0b0c46144bfbe 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
@@ -132,7 +132,8 @@ object DecisionTreeRegressor extends DefaultParamsReadable[DecisionTreeRegressor
 }
 
 /**
- * [[http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree]] model for regression.
+ * <a href="http://en.wikipedia.org/wiki/Decision_tree_learning">
+ * Decision tree (Wikipedia)</a> model for regression.
  * It supports both continuous and categorical features.
  * @param rootNode  Root of the decision tree
  */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
index 6d8159aa3bdcf..6e62c8d03c708 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
@@ -229,7 +229,7 @@ class GBTRegressionModel private[ml](
    * (Hastie, Tibshirani, Friedman. "The Elements of Statistical Learning, 2nd Edition." 2001.)
    * and follows the implementation from scikit-learn.
    *
-   * @see [[DecisionTreeRegressionModel.featureImportances]]
+   * @see `DecisionTreeRegressionModel.featureImportances`
    */
   @Since("2.0.0")
   lazy val featureImportances: Vector = TreeEnsembleModel.featureImportances(trees, numFeatures)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index 1201ecd5e4e61..e718cda2623a0 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -131,10 +131,10 @@ private[regression] trait GeneralizedLinearRegressionBase extends PredictorParam
  * It supports "gaussian", "binomial", "poisson" and "gamma" as family.
  * Valid link functions for each family is listed below. The first link function of each family
  * is the default one.
- *  - "gaussian" -> "identity", "log", "inverse"
- *  - "binomial" -> "logit", "probit", "cloglog"
- *  - "poisson"  -> "log", "identity", "sqrt"
- *  - "gamma"    -> "inverse", "identity", "log"
+ *  - "gaussian" : "identity", "log", "inverse"
+ *  - "binomial" : "logit", "probit", "cloglog"
+ *  - "poisson"  : "log", "identity", "sqrt"
+ *  - "gamma"    : "inverse", "identity", "log"
  */
 @Experimental
 @Since("2.0.0")
@@ -1066,7 +1066,7 @@ class GeneralizedLinearRegressionTrainingSummary private[regression] (
   import GeneralizedLinearRegression._
 
   /**
-   * Whether the underlying [[WeightedLeastSquares]] using the "normal" solver.
+   * Whether the underlying `WeightedLeastSquares` using the "normal" solver.
    */
   private[ml] val isNormalSolver: Boolean = {
     diagInvAtWA.length != 1 || diagInvAtWA(0) != 0
@@ -1074,10 +1074,10 @@ class GeneralizedLinearRegressionTrainingSummary private[regression] (
 
   /**
    * Standard error of estimated coefficients and intercept.
-   * This value is only available when the underlying [[WeightedLeastSquares]]
+   * This value is only available when the underlying `WeightedLeastSquares`
    * using the "normal" solver.
    *
-   * If [[GeneralizedLinearRegression.fitIntercept]] is set to true,
+   * If `GeneralizedLinearRegression.fitIntercept` is set to true,
    * then the last element returned corresponds to the intercept.
    */
   @Since("2.0.0")
@@ -1092,10 +1092,10 @@ class GeneralizedLinearRegressionTrainingSummary private[regression] (
 
   /**
    * T-statistic of estimated coefficients and intercept.
-   * This value is only available when the underlying [[WeightedLeastSquares]]
+   * This value is only available when the underlying `WeightedLeastSquares`
    * using the "normal" solver.
    *
-   * If [[GeneralizedLinearRegression.fitIntercept]] is set to true,
+   * If `GeneralizedLinearRegression.fitIntercept` is set to true,
    * then the last element returned corresponds to the intercept.
    */
   @Since("2.0.0")
@@ -1115,10 +1115,10 @@ class GeneralizedLinearRegressionTrainingSummary private[regression] (
 
   /**
    * Two-sided p-value of estimated coefficients and intercept.
-   * This value is only available when the underlying [[WeightedLeastSquares]]
+   * This value is only available when the underlying `WeightedLeastSquares`
    * using the "normal" solver.
    *
-   * If [[GeneralizedLinearRegression.fitIntercept]] is set to true,
+   * If `GeneralizedLinearRegression.fitIntercept` is set to true,
    * then the last element returned corresponds to the intercept.
    */
   @Since("2.0.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
index 4d274f3a5bbf1..c378a99e3c230 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
@@ -56,7 +56,7 @@ private[regression] trait IsotonicRegressionBase extends Params with HasFeatures
   final def getIsotonic: Boolean = $(isotonic)
 
   /**
-   * Param for the index of the feature if [[featuresCol]] is a vector column (default: `0`), no
+   * Param for the index of the feature if `featuresCol` is a vector column (default: `0`), no
    * effect otherwise.
    * @group param
    */
@@ -194,7 +194,7 @@ object IsotonicRegression extends DefaultParamsReadable[IsotonicRegression] {
  * Model fitted by IsotonicRegression.
  * Predicts using a piecewise linear function.
  *
- * For detailed rules see [[org.apache.spark.mllib.regression.IsotonicRegressionModel.predict()]].
+ * For detailed rules see `org.apache.spark.mllib.regression.IsotonicRegressionModel.predict()`.
  *
  * @param oldModel A [[org.apache.spark.mllib.regression.IsotonicRegressionModel]]
  *                 model trained by [[org.apache.spark.mllib.regression.IsotonicRegression]].
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 19ddf36a718c4..534ef87ec64ee 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -60,11 +60,11 @@ private[regression] trait LinearRegressionParams extends PredictorParams
  * The learning objective is to minimize the squared error, with regularization.
  * The specific squared error loss function used is:
  *
- * <p><blockquote>
+ * <blockquote>
  *    $$
  *    L = 1/2n ||A coefficients - y||^2^
  *    $$
- * </blockquote></p>
+ * </blockquote>
  *
  * This supports multiple types of regularization:
  *  - none (a.k.a. ordinary least squares)
@@ -118,8 +118,9 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
 
   /**
    * Set the ElasticNet mixing parameter.
-   * For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.
-   * For 0 < alpha < 1, the penalty is a combination of L1 and L2.
+   * For alpha = 0, the penalty is an L2 penalty.
+   * For alpha = 1, it is an L1 penalty.
+   * For alpha in (0,1), the penalty is a combination of L1 and L2.
    * Default is 0.0 which is an L2 penalty.
    *
    * @group setParam
@@ -165,7 +166,7 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
    *  - "l-bfgs" denotes Limited-memory BFGS which is a limited-memory quasi-Newton
    *    optimization method.
    *  - "normal" denotes using Normal Equation as an analytical solution to the linear regression
-   *    problem.  This solver is limited to [[LinearRegression.MAX_FEATURES_FOR_NORMAL_SOLVER]].
+   *    problem.  This solver is limited to `LinearRegression.MAX_FEATURES_FOR_NORMAL_SOLVER`.
    *  - "auto" (default) means that the solver algorithm is selected automatically.
    *    The Normal Equations solver will be used when possible, but this will automatically fall
    *    back to iterative optimization methods when needed.
@@ -181,7 +182,7 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
   setDefault(solver -> "auto")
 
   /**
-   * Suggested depth for treeAggregate (>= 2).
+   * Suggested depth for treeAggregate (greater than or equal to 2).
    * If the dimensions of features or the number of partitions are large,
    * this param could be adjusted to a larger size.
    * Default is 2.
@@ -338,12 +339,12 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
       /*
          Note that in Linear Regression, the objective history (loss + regularization) returned
          from optimizer is computed in the scaled space given by the following formula.
-         <p><blockquote>
+         <blockquote>
             $$
             L &= 1/2n||\sum_i w_i(x_i - \bar{x_i}) / \hat{x_i} - (y - \bar{y}) / \hat{y}||^2
                  + regTerms \\
             $$
-         </blockquote></p>
+         </blockquote>
        */
       val arrayBuilder = mutable.ArrayBuilder.make[Double]
       var state: optimizer.State = null
@@ -414,7 +415,7 @@ object LinearRegression extends DefaultParamsReadable[LinearRegression] {
   override def load(path: String): LinearRegression = super.load(path)
 
   /**
-   * When using [[LinearRegression.solver]] == "normal", the solver must limit the number of
+   * When using `LinearRegression.solver` == "normal", the solver must limit the number of
    * features to at most this number.  The entire covariance matrix X^T^X will be collected
    * to the driver. This limit helps prevent memory overflow errors.
    */
@@ -584,7 +585,7 @@ class LinearRegressionTrainingSummary private[regression] (
    *
    * This value is only available when using the "l-bfgs" solver.
    *
-   * @see [[LinearRegression.solver]]
+   * @see `LinearRegression.solver`
    */
   @Since("1.5.0")
   val totalIterations = objectiveHistory.length
@@ -624,7 +625,7 @@ class LinearRegressionSummary private[regression] (
    * Reference: <a href="http://en.wikipedia.org/wiki/Explained_variation">
    * Wikipedia explain variation</a>
    *
-   * @note This ignores instance weights (setting all to 1.0) from [[LinearRegression.weightCol]].
+   * @note This ignores instance weights (setting all to 1.0) from `LinearRegression.weightCol`.
    * This will change in later Spark versions.
    */
   @Since("1.5.0")
@@ -634,7 +635,7 @@ class LinearRegressionSummary private[regression] (
    * Returns the mean absolute error, which is a risk function corresponding to the
    * expected value of the absolute error loss or l1-norm loss.
    *
-   * @note This ignores instance weights (setting all to 1.0) from [[LinearRegression.weightCol]].
+   * @note This ignores instance weights (setting all to 1.0) from `LinearRegression.weightCol`.
    * This will change in later Spark versions.
    */
   @Since("1.5.0")
@@ -644,7 +645,7 @@ class LinearRegressionSummary private[regression] (
    * Returns the mean squared error, which is a risk function corresponding to the
    * expected value of the squared error loss or quadratic loss.
    *
-   * @note This ignores instance weights (setting all to 1.0) from [[LinearRegression.weightCol]].
+   * @note This ignores instance weights (setting all to 1.0) from `LinearRegression.weightCol`.
    * This will change in later Spark versions.
    */
   @Since("1.5.0")
@@ -654,7 +655,7 @@ class LinearRegressionSummary private[regression] (
    * Returns the root mean squared error, which is defined as the square root of
    * the mean squared error.
    *
-   * @note This ignores instance weights (setting all to 1.0) from [[LinearRegression.weightCol]].
+   * @note This ignores instance weights (setting all to 1.0) from `LinearRegression.weightCol`.
    * This will change in later Spark versions.
    */
   @Since("1.5.0")
@@ -665,7 +666,7 @@ class LinearRegressionSummary private[regression] (
    * Reference: <a href="http://en.wikipedia.org/wiki/Coefficient_of_determination">
    * Wikipedia coefficient of determination</a>
    *
-   * @note This ignores instance weights (setting all to 1.0) from [[LinearRegression.weightCol]].
+   * @note This ignores instance weights (setting all to 1.0) from `LinearRegression.weightCol`.
    * This will change in later Spark versions.
    */
   @Since("1.5.0")
@@ -711,10 +712,10 @@ class LinearRegressionSummary private[regression] (
    * Standard error of estimated coefficients and intercept.
    * This value is only available when using the "normal" solver.
    *
-   * If [[LinearRegression.fitIntercept]] is set to true,
+   * If `LinearRegression.fitIntercept` is set to true,
    * then the last element returned corresponds to the intercept.
    *
-   * @see [[LinearRegression.solver]]
+   * @see `LinearRegression.solver`
    */
   lazy val coefficientStandardErrors: Array[Double] = {
     if (diagInvAtWA.length == 1 && diagInvAtWA(0) == 0) {
@@ -739,10 +740,10 @@ class LinearRegressionSummary private[regression] (
    * T-statistic of estimated coefficients and intercept.
    * This value is only available when using the "normal" solver.
    *
-   * If [[LinearRegression.fitIntercept]] is set to true,
+   * If `LinearRegression.fitIntercept` is set to true,
    * then the last element returned corresponds to the intercept.
    *
-   * @see [[LinearRegression.solver]]
+   * @see `LinearRegression.solver`
    */
   lazy val tValues: Array[Double] = {
     if (diagInvAtWA.length == 1 && diagInvAtWA(0) == 0) {
@@ -762,10 +763,10 @@ class LinearRegressionSummary private[regression] (
    * Two-sided p-value of estimated coefficients and intercept.
    * This value is only available when using the "normal" solver.
    *
-   * If [[LinearRegression.fitIntercept]] is set to true,
+   * If `LinearRegression.fitIntercept` is set to true,
    * then the last element returned corresponds to the intercept.
    *
-   * @see [[LinearRegression.solver]]
+   * @see `LinearRegression.solver`
    */
   lazy val pValues: Array[Double] = {
     if (diagInvAtWA.length == 1 && diagInvAtWA(0) == 0) {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
index 90d89c51c5740..62dd729a2994a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
@@ -200,7 +200,7 @@ class RandomForestRegressionModel private[ml] (
    * (Hastie, Tibshirani, Friedman. "The Elements of Statistical Learning, 2nd Edition." 2001.)
    * and follows the implementation from scikit-learn.
    *
-   * @see [[DecisionTreeRegressionModel.featureImportances]]
+   * @see `DecisionTreeRegressionModel.featureImportances`
    */
   @Since("1.5.0")
   lazy val featureImportances: Vector = TreeEnsembleModel.featureImportances(trees, numFeatures)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DecisionTreeMetadata.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DecisionTreeMetadata.scala
index 442f52bf0231d..bc3c86a57c85b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DecisionTreeMetadata.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DecisionTreeMetadata.scala
@@ -35,7 +35,7 @@ import org.apache.spark.rdd.RDD
  * @param numClasses    For classification: labels can take values {0, ..., numClasses - 1}.
  *                      For regression: fixed at 0 (no meaning).
  * @param maxBins  Maximum number of bins, for all features.
- * @param featureArity  Map: categorical feature index --> arity.
+ * @param featureArity  Map: categorical feature index to arity.
  *                      I.e., the feature takes values in {0, ..., arity - 1}.
  * @param numBins  Number of bins for each feature.
  */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala b/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala
index f34a8310ddf1c..3e19f27183942 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala
@@ -48,7 +48,7 @@ private[spark] object MetadataUtils {
    *                        If a feature does not have metadata, it is assumed to be continuous.
    *                        If a feature is Nominal, then it must have the number of values
    *                        specified.
-   * @return  Map: feature index --> number of categories.
+   * @return  Map: feature index to number of categories.
    *          The map's set of keys will be the set of categorical feature indices.
    */
   def getCategoricalFeatures(featuresSchema: StructField): Map[Int, Int] = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
index bbb9886391697..95f480455ee45 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
@@ -76,7 +76,7 @@ private[util] sealed trait BaseReadWrite {
    */
   protected final def sqlContext: SQLContext = sparkSession.sqlContext
 
-  /** Returns the underlying [[SparkContext]]. */
+  /** Returns the underlying `SparkContext`. */
   protected final def sc: SparkContext = sparkSession.sparkContext
 }
 
@@ -169,7 +169,7 @@ trait MLWritable {
  * This only handles simple [[org.apache.spark.ml.param.Param]] types; e.g., it will not handle
  * [[org.apache.spark.sql.Dataset]].
  *
- * @see  [[DefaultParamsReadable]], the counterpart to this trait
+ * @see `DefaultParamsReadable`, the counterpart to this trait
  */
 @DeveloperApi
 trait DefaultParamsWritable extends MLWritable { self: Params =>
@@ -238,7 +238,7 @@ trait MLReadable[T] {
  * [[org.apache.spark.sql.Dataset]].
  *
  * @tparam T ML instance type
- * @see  [[DefaultParamsWritable]], the counterpart to this trait
+ * @see `DefaultParamsWritable`, the counterpart to this trait
  */
 @DeveloperApi
 trait DefaultParamsReadable[T] extends MLReadable[T] {
@@ -345,7 +345,7 @@ private[ml] object DefaultParamsReader {
   /**
    * All info from metadata file.
    *
-   * @param params  paramMap, as a [[JValue]]
+   * @param params  paramMap, as a `JValue`
    * @param metadata  All metadata, including the other fields
    * @param metadataJson  Full metadata file String (for debugging)
    */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
index aec1526b55c49..5fb04ed0ee9a2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
@@ -124,7 +124,7 @@ object SVMModel extends Loader[SVMModel] {
 
 /**
  * Train a Support Vector Machine (SVM) using Stochastic Gradient Descent. By default L2
- * regularization is used, which can be changed via [[SVMWithSGD.optimizer]].
+ * regularization is used, which can be changed via `SVMWithSGD.optimizer`.
  *
  * @note Labels used in SVM should be {0, 1}.
  */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
index 31f51417528b3..336f2fc114309 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
@@ -43,9 +43,9 @@ import org.apache.spark.storage.StorageLevel
  * @param k the desired number of leaf clusters (default: 4). The actual number could be smaller if
  *          there are no divisible leaf clusters.
  * @param maxIterations the max number of k-means iterations to split clusters (default: 20)
- * @param minDivisibleClusterSize the minimum number of points (if &gt;= 1.0) or the minimum
- *                                proportion of points (if &lt; 1.0) of a divisible cluster
- *                                (default: 1)
+ * @param minDivisibleClusterSize the minimum number of points (if greater than or equal 1.0) or
+ *                                the minimum proportion of points (if less than 1.0) of a divisible
+ *                                cluster (default: 1)
  * @param seed a random seed (default: hash value of the class name)
  *
  * @see <a href="http://glaros.dtc.umn.edu/gkhome/fetch/papers/docclusterKDDTMW00.pdf">
@@ -101,8 +101,8 @@ class BisectingKMeans private (
   def getMaxIterations: Int = this.maxIterations
 
   /**
-   * Sets the minimum number of points (if &gt;= `1.0`) or the minimum proportion of points
-   * (if &lt; `1.0`) of a divisible cluster (default: 1).
+   * Sets the minimum number of points (if greater than or equal to `1.0`) or the minimum proportion
+   * of points (if less than `1.0`) of a divisible cluster (default: 1).
    */
   @Since("1.6.0")
   def setMinDivisibleClusterSize(minDivisibleClusterSize: Double): this.type = {
@@ -113,8 +113,8 @@ class BisectingKMeans private (
   }
 
   /**
-   * Gets the minimum number of points (if &gt;= `1.0`) or the minimum proportion of points
-   * (if &lt; `1.0`) of a divisible cluster.
+   * Gets the minimum number of points (if greater than or equal to `1.0`) or the minimum proportion
+   * of points (if less than `1.0`) of a divisible cluster.
    */
   @Since("1.6.0")
   def getMinDivisibleClusterSize: Double = minDivisibleClusterSize
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
index 6873d4277a8db..10bd8468b35cf 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
@@ -274,7 +274,7 @@ class GaussianMixture private (
 private[clustering] object GaussianMixture {
   /**
    * Heuristic to distribute the computation of the `MultivariateGaussian`s, approximately when
-   * d &gt; 25 except for when k is very small.
+   * d is greater than 25 except for when k is very small.
    * @param k  Number of topics
    * @param d  Number of features
    */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
index 16742bd284e69..4cb9200030293 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
@@ -120,11 +120,11 @@ class LDA private (
    *  - EM
    *     - Currently only supports symmetric distributions, so all values in the vector should be
    *       the same.
-   *     - Values should be &gt; 1.0
+   *     - Values should be greater than 1.0
    *     - default = uniformly (50 / k) + 1, where 50/k is common in LDA libraries and +1 follows
    *       from Asuncion et al. (2009), who recommend a +1 adjustment for EM.
    *  - Online
-   *     - Values should be &gt;= 0
+   *     - Values should be greater than or equal to 0
    *     - default = uniformly (1.0 / k), following the implementation from
    *       <a href="https://github.com/Blei-Lab/onlineldavb">here</a>.
    */
@@ -195,11 +195,11 @@ class LDA private (
    *
    * Optimizer-specific parameter settings:
    *  - EM
-   *     - Value should be &gt; 1.0
+   *     - Value should be greater than 1.0
    *     - default = 0.1 + 1, where 0.1 gives a small amount of smoothing and +1 follows
    *       Asuncion et al. (2009), who recommend a +1 adjustment for EM.
    *  - Online
-   *     - Value should be &gt;= 0
+   *     - Value should be greater than or equal to 0
    *     - default = (1.0 / k), following the implementation from
    *       <a href="https://github.com/Blei-Lab/onlineldavb">here</a>.
    */
@@ -216,7 +216,7 @@ class LDA private (
   def getBeta: Double = getTopicConcentration
 
   /**
-   * Alias for [[setTopicConcentration()]]
+   * Alias for `setTopicConcentration()`
    */
   @Since("1.3.0")
   def setBeta(beta: Double): this.type = setTopicConcentration(beta)
@@ -261,11 +261,11 @@ class LDA private (
   def getCheckpointInterval: Int = checkpointInterval
 
   /**
-   * Parameter for set checkpoint interval (>= 1) or disable checkpoint (-1). E.g. 10 means that
-   * the cache will get checkpointed every 10 iterations. Checkpointing helps with recovery
-   * (when nodes fail). It also helps with eliminating temporary shuffle files on disk, which can be
-   * important when LDA is run for many iterations. If the checkpoint directory is not set in
-   * [[org.apache.spark.SparkContext]], this setting is ignored. (default = 10)
+   * Parameter for set checkpoint interval (greater than or equal to 1) or disable checkpoint (-1).
+   * E.g. 10 means that the cache will get checkpointed every 10 iterations. Checkpointing helps
+   * with recovery (when nodes fail). It also helps with eliminating temporary shuffle files on
+   * disk, which can be important when LDA is run for many iterations. If the checkpoint directory
+   * is not set in [[org.apache.spark.SparkContext]], this setting is ignored. (default = 10)
    *
    * @see [[org.apache.spark.SparkContext#setCheckpointDir]]
    */
@@ -321,7 +321,7 @@ class LDA private (
    * @param documents  RDD of documents, which are term (word) count vectors paired with IDs.
    *                   The term count vectors are "bags of words" with a fixed-size vocabulary
    *                   (where the vocabulary size is the length of the vector).
-   *                   Document IDs must be unique and &gt;= 0.
+   *                   Document IDs must be unique and greater than or equal to 0.
    * @return  Inferred LDA model
    */
   @Since("1.3.0")
@@ -340,7 +340,7 @@ class LDA private (
   }
 
   /**
-   * Java-friendly version of [[run()]]
+   * Java-friendly version of `run()`
    */
   @Since("1.3.0")
   def run(documents: JavaPairRDD[java.lang.Long, Vector]): LDAModel = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
index 017fbc6feb0d7..25ffd8561fe37 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
@@ -171,7 +171,7 @@ abstract class LDAModel private[clustering] extends Saveable {
    *                   The term count vectors are "bags of words" with a fixed-size vocabulary
    *                   (where the vocabulary size is the length of the vector).
    *                   This must use the same vocabulary (ordering of term counts) as in training.
-   *                   Document IDs must be unique and &gt;= 0.
+   *                   Document IDs must be unique and greater than or equal to 0.
    * @return  Estimated topic distribution for each document.
    *          The returned RDD may be zipped with the given RDD, where each returned vector
    *          is a multinomial distribution over topics.
@@ -392,7 +392,7 @@ class LocalLDAModel private[spark] (
    * literature).  Returns a vector of zeros for an empty document.
    *
    * Note this means to allow quick query for single document. For batch documents, please refer
-   * to [[topicDistributions()]] to avoid overhead.
+   * to `topicDistributions()` to avoid overhead.
    *
    * @param document document to predict topic mixture distributions for
    * @return topic mixture distribution for the document
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
index 9687fc8804e89..96b49bcc0aac1 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
@@ -350,9 +350,9 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
    * Mini-batch fraction in (0, 1], which sets the fraction of document sampled and used in
    * each iteration.
    *
-   * @note This should be adjusted in synch with [[LDA.setMaxIterations()]]
+   * @note This should be adjusted in synch with `LDA.setMaxIterations()`
    * so the entire corpus is used.  Specifically, set both so that
-   * maxIterations * miniBatchFraction >= 1.
+   * maxIterations * miniBatchFraction is at least 1.
    *
    * Default: 0.05, i.e., 5% of total documents.
    */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
index 92cd7f22dc439..9b7cd0427f5ed 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
@@ -78,7 +78,8 @@ class BinaryClassificationMetrics @Since("1.3.0") (
    * Returns the receiver operating characteristic (ROC) curve,
    * which is an RDD of (false positive rate, true positive rate)
    * with (0.0, 0.0) prepended and (1.0, 1.0) appended to it.
-   * @see http://en.wikipedia.org/wiki/Receiver_operating_characteristic
+   * @see <a href="http://en.wikipedia.org/wiki/Receiver_operating_characteristic">
+   * Receiver operating characteristic (Wikipedia)</a>
    */
   @Since("1.0.0")
   def roc(): RDD[(Double, Double)] = {
@@ -98,7 +99,8 @@ class BinaryClassificationMetrics @Since("1.3.0") (
   /**
    * Returns the precision-recall curve, which is an RDD of (recall, precision),
    * NOT (precision, recall), with (0.0, 1.0) prepended to it.
-   * @see http://en.wikipedia.org/wiki/Precision_and_recall
+   * @see <a href="http://en.wikipedia.org/wiki/Precision_and_recall">
+   * Precision and recall (Wikipedia)</a>
    */
   @Since("1.0.0")
   def pr(): RDD[(Double, Double)] = {
@@ -118,7 +120,7 @@ class BinaryClassificationMetrics @Since("1.3.0") (
    * Returns the (threshold, F-Measure) curve.
    * @param beta the beta factor in F-Measure computation.
    * @return an RDD of (threshold, F-Measure) pairs.
-   * @see http://en.wikipedia.org/wiki/F1_score
+   * @see <a href="http://en.wikipedia.org/wiki/F1_score">F1 score (Wikipedia)</a>
    */
   @Since("1.0.0")
   def fMeasureByThreshold(beta: Double): RDD[(Double, Double)] = createCurve(FMeasure(beta))
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
index e29b51c3a19da..b98aa0534152b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
@@ -30,7 +30,7 @@ import org.apache.spark.rdd.RDD
 /**
  * Evaluator for ranking algorithms.
  *
- * Java users should use [[RankingMetrics$.of]] to create a [[RankingMetrics]] instance.
+ * Java users should use `RankingMetrics$.of` to create a [[RankingMetrics]] instance.
  *
  * @param predictionAndLabels an RDD of (predicted ranking, ground truth set) pairs.
  */
@@ -41,9 +41,9 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
   /**
    * Compute the average precision of all the queries, truncated at ranking position k.
    *
-   * If for a query, the ranking algorithm returns n (n < k) results, the precision value will be
-   * computed as #(relevant items retrieved) / k. This formula also applies when the size of the
-   * ground truth set is less than k.
+   * If for a query, the ranking algorithm returns n (n is less than k) results, the precision
+   * value will be computed as #(relevant items retrieved) / k. This formula also applies when
+   * the size of the ground truth set is less than k.
    *
    * If a query has an empty ground truth set, zero will be used as precision together with
    * a log warning.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryClassificationMetricComputers.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryClassificationMetricComputers.scala
index be3319d60ce25..5a4c6aef50b7b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryClassificationMetricComputers.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/binary/BinaryClassificationMetricComputers.scala
@@ -62,7 +62,7 @@ private[evaluation] object Recall extends BinaryClassificationMetricComputer {
  * F-Measure. Defined as 0 if both precision and recall are 0. EG in the case that all examples
  * are false positives.
  * @param beta the beta constant in F-Measure
- * @see http://en.wikipedia.org/wiki/F1_score
+ * @see <a href="http://en.wikipedia.org/wiki/F1_score">F1 score (Wikipedia)</a>
  */
 private[evaluation] case class FMeasure(beta: Double) extends BinaryClassificationMetricComputer {
   private val beta2 = beta * beta
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
index 3c26d2670841b..dca031477d3b7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
@@ -28,7 +28,7 @@ import org.apache.spark.mllib.fpm.FPGrowth.FreqItemset
 import org.apache.spark.rdd.RDD
 
 /**
- * Generates association rules from a [[RDD[FreqItemset[Item]]]. This method only generates
+ * Generates association rules from a `RDD[FreqItemset[Item]]`. This method only generates
  * association rules which have a single item as the consequent.
  *
  */
@@ -56,7 +56,7 @@ class AssociationRules private[fpm] (
   /**
    * Computes the association rules with confidence above [[minConfidence]].
    * @param freqItemsets frequent itemset model obtained from [[FPGrowth]]
-   * @return a [[Set[Rule[Item]]] containing the association rules.
+   * @return a `Set[Rule[Item]]` containing the association rules.
    *
    */
   @Since("1.5.0")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
index b53386012280d..e3cf0d4979ed4 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
@@ -44,7 +44,7 @@ import org.apache.spark.storage.StorageLevel
 
 /**
  * Model trained by [[FPGrowth]], which holds frequent itemsets.
- * @param freqItemsets frequent itemset, which is an RDD of [[FreqItemset]]
+ * @param freqItemsets frequent itemset, which is an RDD of `FreqItemset`
  * @tparam Item item type
  */
 @Since("1.3.0")
@@ -69,7 +69,7 @@ class FPGrowthModel[Item: ClassTag] @Since("1.3.0") (
    *  - human-readable (JSON) model metadata to path/metadata/
    *  - Parquet formatted data to path/data/
    *
-   * The model may be loaded using [[FPGrowthModel.load]].
+   * The model may be loaded using `FPGrowthModel.load`.
    *
    * @param sc  Spark context used to save model data.
    * @param path  Path specifying the directory in which to save this model.
@@ -309,7 +309,7 @@ object FPGrowth {
 
   /**
    * Frequent itemset.
-   * @param items items in this itemset. Java users should call [[FreqItemset#javaItems]] instead.
+   * @param items items in this itemset. Java users should call `FreqItemset.javaItems` instead.
    * @param freq frequency
    * @tparam Item item type
    *
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
index a5641672218dd..327cb974ef96c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
@@ -211,7 +211,7 @@ class PrefixSpan private (
   }
 
   /**
-   * A Java-friendly version of [[run()]] that reads sequences from a [[JavaRDD]] and returns
+   * A Java-friendly version of `run()` that reads sequences from a `JavaRDD` and returns
    * frequent sequences in a [[PrefixSpanModel]].
    * @param data ordered sequences of itemsets stored as Java Iterable of Iterables
    * @tparam Item item type
@@ -366,13 +366,13 @@ object PrefixSpan extends Logging {
    * Items are represented by positive integers, and items in each itemset must be distinct and
    * ordered.
    * we use 0 as the delimiter between itemsets.
-   * For example, a sequence `<(12)(31)1>` is represented by `[0, 1, 2, 0, 1, 3, 0, 1, 0]`.
-   * The postfix of this sequence w.r.t. to prefix `<1>` is `<(_2)(13)1>`.
+   * For example, a sequence `(12)(31)1` is represented by `[0, 1, 2, 0, 1, 3, 0, 1, 0]`.
+   * The postfix of this sequence w.r.t. to prefix `1` is `(_2)(13)1`.
    * We may reuse the original items array `[0, 1, 2, 0, 1, 3, 0, 1, 0]` to represent the postfix,
    * and mark the start index of the postfix, which is `2` in this example.
    * So the active items in this postfix are `[2, 0, 1, 3, 0, 1, 0]`.
    * We also remember the start indices of partial projections, the ones that split an itemset.
-   * For example, another possible partial projection w.r.t. `<1>` is `<(_3)1>`.
+   * For example, another possible partial projection w.r.t. `1` is `(_3)1`.
    * We remember the start indices of partial projections, which is `[2, 5]` in this example.
    * This data structure makes it easier to do projections.
    *
@@ -583,7 +583,7 @@ class PrefixSpanModel[Item] @Since("1.5.0") (
    *  - human-readable (JSON) model metadata to path/metadata/
    *  - Parquet formatted data to path/data/
    *
-   * The model may be loaded using [[PrefixSpanModel.load]].
+   * The model may be loaded using `PrefixSpanModel.load`.
    *
    * @param sc  Spark context used to save model data.
    * @param path  Path specifying the directory in which to save this model.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala
index bb94745f078e8..7695aabf4313d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala
@@ -32,7 +32,7 @@ private[mllib] object EigenValueDecomposition {
    *
    * @param mul a function that multiplies the symmetric matrix with a DenseVector.
    * @param n dimension of the square matrix (maximum Int.MaxValue).
-   * @param k number of leading eigenvalues required, 0 < k < n.
+   * @param k number of leading eigenvalues required, where k must be positive and less than n.
    * @param tol tolerance of the eigs computation.
    * @param maxIterations the maximum number of Arnoldi update iterations.
    * @return a dense vector of eigenvalues in descending order and a dense matrix of eigenvectors
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
index c94d7890cf557..63ea9d3264b0f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
@@ -77,7 +77,7 @@ sealed trait Vector extends Serializable {
 
   /**
    * Returns a hash code value for the vector. The hash code is based on its size and its first 128
-   * nonzero entries, using a hash algorithm similar to [[java.util.Arrays.hashCode]].
+   * nonzero entries, using a hash algorithm similar to `java.util.Arrays.hashCode`.
    */
   override def hashCode(): Int = {
     // This is a reference implementation. It calls return in foreachActive, which is slow.
@@ -351,7 +351,7 @@ object Vectors {
   }
 
   /**
-   * Parses a string resulted from [[Vector.toString]] into a [[Vector]].
+   * Parses a string resulted from `Vector.toString` into a [[Vector]].
    */
   @Since("1.1.0")
   def parse(s: String): Vector = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
index 67da88e804da2..8979707666a2f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
@@ -88,11 +88,11 @@ class GradientDescent private[spark] (private var gradient: Gradient, private va
    * convergenceTol is a condition which decides iteration termination.
    * The end of iteration is decided based on below logic.
    *
-   *  - If the norm of the new solution vector is &gt;1, the diff of solution vectors
+   *  - If the norm of the new solution vector is greater than 1, the diff of solution vectors
    *    is compared to relative tolerance which means normalizing by the norm of
    *    the new solution vector.
-   *  - If the norm of the new solution vector is &lt;=1, the diff of solution vectors
-   *    is compared to absolute tolerance which is not normalizing.
+   *  - If the norm of the new solution vector is less than or equal to 1, the diff of solution
+   *    vectors is compared to absolute tolerance which is not normalizing.
    *
    * Must be between 0.0 and 1.0 inclusively.
    */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
index 6232ff30a747e..900eec18489c1 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
@@ -49,8 +49,7 @@ class LBFGS(private var gradient: Gradient, private var updater: Updater)
    * Set the number of corrections used in the LBFGS update. Default 10.
    * Values of numCorrections less than 3 are not recommended; large values
    * of numCorrections will result in excessive computing time.
-   * 3 &lt; numCorrections &lt; 10 is recommended.
-   * Restriction: numCorrections &gt; 0
+   * numCorrections must be positive, and values from 4 to 9 are generally recommended.
    */
   def setNumCorrections(corrections: Int): this.type = {
     require(corrections > 0,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala
index b7c9fcfbfe60f..86632ae335957 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala
@@ -53,8 +53,13 @@ private[spark] object NNLS {
    * projected gradient method.  That is, find x minimising ||Ax - b||_2 given A^T A and A^T b.
    *
    * We solve the problem
-   *   min_x      1/2 x^T ata x^T - x^T atb
-   *   subject to x &gt;= 0
+   *
+   * <blockquote>
+   *    $$
+   *    min_x 1/2 x^T ata x^T - x^T atb
+   *    $$
+   * </blockquote>
+   * where x is nonnegative.
    *
    * The method used is similar to one described by Polyak (B. T. Polyak, The conjugate gradient
    * method in extremal problems, Zh. Vychisl. Mat. Mat. Fiz. 9(4)(1969), pp. 94-112) for bound-
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
index aa7dd1aaa60fe..142f0ec6b9021 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala
@@ -95,9 +95,9 @@ class SimpleUpdater extends Updater {
  * The corresponding proximal operator for the L1 norm is the soft-thresholding
  * function. That is, each weight component is shrunk towards 0 by shrinkageVal.
  *
- * If w &gt; shrinkageVal, set weight component to w-shrinkageVal.
- * If w &lt; -shrinkageVal, set weight component to w+shrinkageVal.
- * If -shrinkageVal &lt; w &lt; shrinkageVal, set weight component to 0.
+ * If w is greater than shrinkageVal, set weight component to w-shrinkageVal.
+ * If w is less than -shrinkageVal, set weight component to w+shrinkageVal.
+ * If w is (-shrinkageVal, shrinkageVal), set weight component to 0.
  *
  * Equivalently, set weight component to signum(w) * max(0.0, abs(w) - shrinkageVal)
  */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
index 6d60136ddc38f..85d4d7f37f2c0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala
@@ -249,8 +249,8 @@ object RandomRDDs {
    *  shape and scale.
    *
    * @param sc SparkContext used to create the RDD.
-   * @param shape shape parameter (> 0) for the gamma distribution
-   * @param scale scale parameter (> 0) for the gamma distribution
+   * @param shape shape parameter (greater than 0) for the gamma distribution
+   * @param scale scale parameter (greater than 0) for the gamma distribution
    * @param size Size of the RDD.
    * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`).
    * @param seed Random seed (default: a random long integer).
@@ -766,8 +766,8 @@ object RandomRDDs {
    * gamma distribution with the input shape and scale.
    *
    * @param sc SparkContext used to create the RDD.
-   * @param shape shape parameter (> 0) for the gamma distribution.
-   * @param scale scale parameter (> 0) for the gamma distribution.
+   * @param shape shape parameter (greater than 0) for the gamma distribution.
+   * @param scale scale parameter (greater than 0) for the gamma distribution.
    * @param numRows Number of Vectors in the RDD.
    * @param numCols Number of elements in each Vector.
    * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/SlidingRDD.scala b/mllib/src/main/scala/org/apache/spark/mllib/rdd/SlidingRDD.scala
index adb5e51947f6d..365b2a06110f6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/SlidingRDD.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/SlidingRDD.scala
@@ -42,8 +42,8 @@ class SlidingRDDPartition[T](val idx: Int, val prev: Partition, val tail: Seq[T]
  * @param windowSize the window size, must be greater than 1
  * @param step step size for windows
  *
- * @see [[org.apache.spark.mllib.rdd.RDDFunctions.sliding(Int, Int)*]]
- * @see [[scala.collection.IterableLike.sliding(Int, Int)*]]
+ * @see `org.apache.spark.mllib.rdd.RDDFunctions.sliding(Int, Int)*`
+ * @see `scala.collection.IterableLike.sliding(Int, Int)*`
  */
 private[mllib]
 class SlidingRDD[T: ClassTag](@transient val parent: RDD[T], val windowSize: Int, val step: Int)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/KolmogorovSmirnovTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/KolmogorovSmirnovTest.scala
index a8b5955a7285d..d17f7047c5b2b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/KolmogorovSmirnovTest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/KolmogorovSmirnovTest.scala
@@ -31,7 +31,8 @@ import org.apache.spark.rdd.RDD
  * distribution of the sample data and the theoretical distribution we can provide a test for the
  * the null hypothesis that the sample data comes from that theoretical distribution.
  * For more information on KS Test:
- * @see [[https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test]]
+ * @see <a href="https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test">
+ * Kolmogorov-Smirnov test (Wikipedia)</a>
  *
  * Implementation note: We seek to implement the KS test with a minimal number of distributed
  * passes. We sort the RDD, and then perform the following operations on a per-partition basis:
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala
index 97c032de7a813..d680237bf687f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala
@@ -47,7 +47,7 @@ case class BinarySample @Since("1.6.0") (
  * of the observation.
  *
  * To address novelty affects, the `peacePeriod` specifies a set number of initial
- * [[org.apache.spark.rdd.RDD]] batches of the [[DStream]] to be dropped from significance testing.
+ * [[org.apache.spark.rdd.RDD]] batches of the `DStream` to be dropped from significance testing.
  *
  * The `windowSize` sets the number of batches each significance test is to be performed over. The
  * window is sliding with a stride length of 1 batch. Setting windowSize to 0 will perform
@@ -97,7 +97,7 @@ class StreamingTest @Since("1.6.0") () extends Logging with Serializable {
   }
 
   /**
-   * Register a [[DStream]] of values for significance testing.
+   * Register a `DStream` of values for significance testing.
    *
    * @param data stream of BinarySample(key,value) pairs where the key denotes group membership
    *             (true = experiment, false = control) and the value is the numerical metric to
@@ -114,7 +114,7 @@ class StreamingTest @Since("1.6.0") () extends Logging with Serializable {
   }
 
   /**
-   * Register a [[JavaDStream]] of values for significance testing.
+   * Register a `JavaDStream` of values for significance testing.
    *
    * @param data stream of BinarySample(isExperiment,value) pairs where the isExperiment denotes
    *             group (true = experiment, false = control) and the value is the numerical metric
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTestMethod.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTestMethod.scala
index ff27f28459e26..14ac14d6d61f4 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTestMethod.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTestMethod.scala
@@ -73,7 +73,7 @@ private[stat] sealed trait StreamingTestMethod extends Serializable {
  * This test does not assume equal variance between the two samples and does not assume equal
  * sample size.
  *
- * @see http://en.wikipedia.org/wiki/Welch%27s_t_test
+ * @see <a href="http://en.wikipedia.org/wiki/Welch%27s_t_test">Welch's t-test (Wikipedia)</a>
  */
 private[stat] object WelchTTest extends StreamingTestMethod with Logging {
 
@@ -115,7 +115,7 @@ private[stat] object WelchTTest extends StreamingTestMethod with Logging {
  * mean. This test assumes equal variance between the two samples and does not assume equal sample
  * size. For unequal variances, Welch's t-test should be used instead.
  *
- * @see http://en.wikipedia.org/wiki/Student%27s_t-test
+ * @see <a href="http://en.wikipedia.org/wiki/Student%27s_t-test">Student's t-test (Wikipedia)</a>
  */
 private[stat] object StudentTTest extends StreamingTestMethod with Logging {
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
index d846c43cf2913..499c80767aea7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
@@ -162,7 +162,7 @@ object DecisionTree extends Serializable with Logging {
    * @param numClasses Number of classes for classification. Default value of 2.
    * @param maxBins Maximum number of bins used for splitting features.
    * @param quantileCalculationStrategy  Algorithm for calculating quantiles.
-   * @param categoricalFeaturesInfo Map storing arity of categorical features. An entry (n -> k)
+   * @param categoricalFeaturesInfo Map storing arity of categorical features. An entry (n to k)
    *                                indicates that feature n is categorical with k categories
    *                                indexed from 0: {0, 1, ..., k-1}.
    * @return DecisionTreeModel that can be used for prediction.
@@ -192,7 +192,7 @@ object DecisionTree extends Serializable with Logging {
    * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
    *              Labels should take values {0, 1, ..., numClasses-1}.
    * @param numClasses Number of classes for classification.
-   * @param categoricalFeaturesInfo Map storing arity of categorical features. An entry (n -> k)
+   * @param categoricalFeaturesInfo Map storing arity of categorical features. An entry (n to k)
    *                                indicates that feature n is categorical with k categories
    *                                indexed from 0: {0, 1, ..., k-1}.
    * @param impurity Criterion used for information gain calculation.
@@ -238,7 +238,7 @@ object DecisionTree extends Serializable with Logging {
    *
    * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
    *              Labels are real numbers.
-   * @param categoricalFeaturesInfo Map storing arity of categorical features. An entry (n -> k)
+   * @param categoricalFeaturesInfo Map storing arity of categorical features. An entry (n to k)
    *                                indicates that feature n is categorical with k categories
    *                                indexed from 0: {0, 1, ..., k-1}.
    * @param impurity Criterion used for information gain calculation.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
index cdeef16135015..3e85678906b33 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
@@ -74,7 +74,7 @@ class GradientBoostedTrees private[spark] (
   }
 
   /**
-   * Java-friendly API for [[org.apache.spark.mllib.tree.GradientBoostedTrees!#run]].
+   * Java-friendly API for `org.apache.spark.mllib.tree.GradientBoostedTrees.run`.
    */
   @Since("1.2.0")
   def run(input: JavaRDD[LabeledPoint]): GradientBoostedTreesModel = {
@@ -89,7 +89,7 @@ class GradientBoostedTrees private[spark] (
    *                        This dataset should be different from the training dataset,
    *                        but it should follow the same distribution.
    *                        E.g., these two datasets could be created from an original dataset
-   *                        by using [[org.apache.spark.rdd.RDD.randomSplit()]]
+   *                        by using `org.apache.spark.rdd.RDD.randomSplit()`
    * @return GradientBoostedTreesModel that can be used for prediction.
    */
   @Since("1.4.0")
@@ -106,7 +106,7 @@ class GradientBoostedTrees private[spark] (
   }
 
   /**
-   * Java-friendly API for [[org.apache.spark.mllib.tree.GradientBoostedTrees!#runWithValidation]].
+   * Java-friendly API for `org.apache.spark.mllib.tree.GradientBoostedTrees.runWithValidation`.
    */
   @Since("1.4.0")
   def runWithValidation(
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
index 428af21406092..1f6cb086cefa9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
@@ -53,14 +53,15 @@ import org.apache.spark.util.Utils
  *                 the type of random forest (classification or regression), feature type
  *                 (continuous, categorical), depth of the tree, quantile calculation strategy,
  *                 etc.
- * @param numTrees If 1, then no bootstrapping is used.  If > 1, then bootstrapping is done.
+ * @param numTrees If 1, then no bootstrapping is used.  If greater than 1, then bootstrapping is
+ *                 done.
  * @param featureSubsetStrategy Number of features to consider for splits at each node.
  *                              Supported values: "auto", "all", "sqrt", "log2", "onethird".
  *                              Supported numerical values: "(0.0-1.0]", "[1-n]".
  *                              If "auto" is set, this parameter is set based on numTrees:
  *                                if numTrees == 1, set to "all";
- *                                if numTrees > 1 (forest) set to "sqrt" for classification and
- *                                  to "onethird" for regression.
+ *                                if numTrees is greater than 1 (forest) set to "sqrt" for
+ *                                  classification and to "onethird" for regression.
  *                              If a real value "n" in the range (0, 1.0] is set,
  *                                use n * number of features.
  *                              If an integer value "n" in the range (1, num features) is set,
@@ -111,7 +112,7 @@ object RandomForest extends Serializable with Logging {
    *                              Supported values: "auto", "all", "sqrt", "log2", "onethird".
    *                              If "auto" is set, this parameter is set based on numTrees:
    *                                if numTrees == 1, set to "all";
-   *                                if numTrees > 1 (forest) set to "sqrt".
+   *                                if numTrees is greater than 1 (forest) set to "sqrt".
    * @param seed Random seed for bootstrapping and choosing feature subsets.
    * @return RandomForestModel that can be used for prediction.
    */
@@ -134,7 +135,7 @@ object RandomForest extends Serializable with Logging {
    * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
    *              Labels should take values {0, 1, ..., numClasses-1}.
    * @param numClasses Number of classes for classification.
-   * @param categoricalFeaturesInfo Map storing arity of categorical features. An entry (n -> k)
+   * @param categoricalFeaturesInfo Map storing arity of categorical features. An entry (n to k)
    *                                indicates that feature n is categorical with k categories
    *                                indexed from 0: {0, 1, ..., k-1}.
    * @param numTrees Number of trees in the random forest.
@@ -142,7 +143,7 @@ object RandomForest extends Serializable with Logging {
    *                              Supported values: "auto", "all", "sqrt", "log2", "onethird".
    *                              If "auto" is set, this parameter is set based on numTrees:
    *                                if numTrees == 1, set to "all";
-   *                                if numTrees > 1 (forest) set to "sqrt".
+   *                                if numTrees is greater than 1 (forest) set to "sqrt".
    * @param impurity Criterion used for information gain calculation.
    *                 Supported values: "gini" (recommended) or "entropy".
    * @param maxDepth Maximum depth of the tree (e.g. depth 0 means 1 leaf node, depth 1 means
@@ -200,7 +201,7 @@ object RandomForest extends Serializable with Logging {
    *                              Supported values: "auto", "all", "sqrt", "log2", "onethird".
    *                              If "auto" is set, this parameter is set based on numTrees:
    *                                if numTrees == 1, set to "all";
-   *                                if numTrees > 1 (forest) set to "onethird".
+   *                                if numTrees is greater than 1 (forest) set to "onethird".
    * @param seed Random seed for bootstrapping and choosing feature subsets.
    * @return RandomForestModel that can be used for prediction.
    */
@@ -222,7 +223,7 @@ object RandomForest extends Serializable with Logging {
    *
    * @param input Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
    *              Labels are real numbers.
-   * @param categoricalFeaturesInfo Map storing arity of categorical features. An entry (n -> k)
+   * @param categoricalFeaturesInfo Map storing arity of categorical features. An entry (n to k)
    *                                indicates that feature n is categorical with k categories
    *                                indexed from 0: {0, 1, ..., k-1}.
    * @param numTrees Number of trees in the random forest.
@@ -230,7 +231,7 @@ object RandomForest extends Serializable with Logging {
    *                              Supported values: "auto", "all", "sqrt", "log2", "onethird".
    *                              If "auto" is set, this parameter is set based on numTrees:
    *                                if numTrees == 1, set to "all";
-   *                                if numTrees > 1 (forest) set to "onethird".
+   *                                if numTrees is greater than 1 (forest) set to "onethird".
    * @param impurity Criterion used for information gain calculation.
    *                 The only supported value for regression is "variance".
    * @param maxDepth Maximum depth of the tree. (e.g., depth 0 means 1 leaf node, depth 1 means
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala
index d8405d13ce904..4334b316cc83a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala
@@ -36,14 +36,14 @@ import org.apache.spark.mllib.tree.loss.{LogLoss, Loss, SquaredError}
  * @param validationTol validationTol is a condition which decides iteration termination when
  *                      runWithValidation is used.
  *                      The end of iteration is decided based on below logic:
- *                      If the current loss on the validation set is > 0.01, the diff
+ *                      If the current loss on the validation set is greater than 0.01, the diff
  *                      of validation error is compared to relative tolerance which is
  *                      validationTol * (current loss on the validation set).
- *                      If the current loss on the validation set is <= 0.01, the diff
- *                      of validation error is compared to absolute tolerance which is
+ *                      If the current loss on the validation set is less than or equal to 0.01,
+ *                      the diff of validation error is compared to absolute tolerance which is
  *                      validationTol * 0.01.
  *                      Ignored when
- *                      [[org.apache.spark.mllib.tree.GradientBoostedTrees.run()]] is used.
+ *                      `org.apache.spark.mllib.tree.GradientBoostedTrees.run()` is used.
  */
 @Since("1.2.0")
 case class BoostingStrategy @Since("1.4.0") (
@@ -92,8 +92,8 @@ object BoostingStrategy {
   /**
    * Returns default configuration for the boosting algorithm
    * @param algo Learning goal.  Supported:
-   *             [[org.apache.spark.mllib.tree.configuration.Algo.Classification]],
-   *             [[org.apache.spark.mllib.tree.configuration.Algo.Regression]]
+   *             `org.apache.spark.mllib.tree.configuration.Algo.Classification`,
+   *             `org.apache.spark.mllib.tree.configuration.Algo.Regression`
    * @return Configuration for boosting algorithm
    */
   @Since("1.3.0")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
index b34e1b1b56c43..58e8f5be7b9f0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
@@ -28,8 +28,8 @@ import org.apache.spark.mllib.tree.impurity.{Entropy, Gini, Impurity, Variance}
 /**
  * Stores all the configuration options for tree construction
  * @param algo  Learning goal.  Supported:
- *              [[org.apache.spark.mllib.tree.configuration.Algo.Classification]],
- *              [[org.apache.spark.mllib.tree.configuration.Algo.Regression]]
+ *              `org.apache.spark.mllib.tree.configuration.Algo.Classification`,
+ *              `org.apache.spark.mllib.tree.configuration.Algo.Regression`
  * @param impurity Criterion used for information gain calculation.
  *                 Supported for Classification: [[org.apache.spark.mllib.tree.impurity.Gini]],
  *                  [[org.apache.spark.mllib.tree.impurity.Entropy]].
@@ -43,9 +43,9 @@ import org.apache.spark.mllib.tree.impurity.{Entropy, Gini, Impurity, Variance}
  *                for choosing how to split on features at each node.
  *                More bins give higher granularity.
  * @param quantileCalculationStrategy Algorithm for calculating quantiles.  Supported:
- *                             [[org.apache.spark.mllib.tree.configuration.QuantileStrategy.Sort]]
+ *                             `org.apache.spark.mllib.tree.configuration.QuantileStrategy.Sort`
  * @param categoricalFeaturesInfo A map storing information about the categorical variables and the
- *                                number of discrete values they take. An entry (n -> k)
+ *                                number of discrete values they take. An entry (n to k)
  *                                indicates that feature n is categorical with k categories
  *                                indexed from 0: {0, 1, ..., k-1}.
  * @param minInstancesPerNode Minimum number of instances each child must have after split.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
index be2704df3444f..bda5e662779c7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala
@@ -25,7 +25,7 @@ import org.apache.spark.mllib.tree.configuration.FeatureType.FeatureType
  * Split applied to a feature
  * @param feature feature index
  * @param threshold Threshold for continuous feature.
- *                  Split left if feature &lt;= threshold, else right.
+ *                  Split left if feature is less than or equal to threshold, else right.
  * @param featureType type of feature -- categorical or continuous
  * @param categories Split left if categorical feature value is in this set, else right.
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/InternalOutputModes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/InternalOutputModes.scala
index 153f9f57faf42..594c41c2c7446 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/InternalOutputModes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/InternalOutputModes.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql
 import org.apache.spark.sql.streaming.OutputMode
 
 /**
- * Internal helper class to generate objects representing various [[OutputMode]]s,
+ * Internal helper class to generate objects representing various `OutputMode`s,
  */
 private[sql] object InternalOutputModes {
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
index a821d2ca34579..c362104b26ffb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
@@ -74,7 +74,7 @@ object Row {
  * It is invalid to use the native primitive interface to retrieve a value that is null, instead a
  * user must check `isNullAt` before attempting to retrieve a value that might be null.
  *
- * To create a new Row, use [[RowFactory.create()]] in Java or [[Row.apply()]] in Scala.
+ * To create a new Row, use `RowFactory.create()` in Java or `Row.apply()` in Scala.
  *
  * A [[Row]] object can be constructed by providing field values. Example:
  * {{{
@@ -343,7 +343,7 @@ trait Row extends Serializable {
   }
 
   /**
-   * Returns a Map(name -&gt; value) for the requested fieldNames
+   * Returns a Map consisting of names and values for the requested fieldNames
    * For primitive types if value is null it returns 'zero value' specific for primitive
    * ie. 0 for Int - use isNullAt to ensure that value is not null
    *
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
index cecad3b7b4c0a..4dc06fc9cf09b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
@@ -92,7 +92,8 @@ case class DecimalType(precision: Int, scale: Int) extends FractionalType {
   }
 
   /**
-   * The default size of a value of the DecimalType is 8 bytes (precision &lt;= 18) or 16 bytes.
+   * The default size of a value of the DecimalType is 8 bytes when precision is at most 18,
+   * and 16 bytes otherwise.
    */
   override def defaultSize: Int = if (precision <= Decimal.MAX_LONG_DIGITS) 8 else 16
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index fa3b2b9de5d5d..e99d7865bda91 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -97,7 +97,7 @@ class TypedColumn[-T, U](
 }
 
 /**
- * A column that will be computed based on the data in a [[DataFrame]].
+ * A column that will be computed based on the data in a `DataFrame`.
  *
  * A new column is constructed based on the input columns present in a dataframe:
  *
@@ -801,7 +801,7 @@ class Column(val expr: Expression) extends Logging {
 
   /**
    * An expression that gets an item at position `ordinal` out of an array,
-   * or gets a value by key `key` in a [[MapType]].
+   * or gets a value by key `key` in a `MapType`.
    *
    * @group expr_ops
    * @since 1.3.0
@@ -809,7 +809,7 @@ class Column(val expr: Expression) extends Logging {
   def getItem(key: Any): Column = withExpr { UnresolvedExtractValue(expr, Literal(key)) }
 
   /**
-   * An expression that gets a field by name in a [[StructType]].
+   * An expression that gets a field by name in a `StructType`.
    *
    * @group expr_ops
    * @since 1.3.0
@@ -1195,92 +1195,92 @@ class Column(val expr: Expression) extends Logging {
 class ColumnName(name: String) extends Column(name) {
 
   /**
-   * Creates a new [[StructField]] of type boolean.
+   * Creates a new `StructField` of type boolean.
    * @since 1.3.0
    */
   def boolean: StructField = StructField(name, BooleanType)
 
   /**
-   * Creates a new [[StructField]] of type byte.
+   * Creates a new `StructField` of type byte.
    * @since 1.3.0
    */
   def byte: StructField = StructField(name, ByteType)
 
   /**
-   * Creates a new [[StructField]] of type short.
+   * Creates a new `StructField` of type short.
    * @since 1.3.0
    */
   def short: StructField = StructField(name, ShortType)
 
   /**
-   * Creates a new [[StructField]] of type int.
+   * Creates a new `StructField` of type int.
    * @since 1.3.0
    */
   def int: StructField = StructField(name, IntegerType)
 
   /**
-   * Creates a new [[StructField]] of type long.
+   * Creates a new `StructField` of type long.
    * @since 1.3.0
    */
   def long: StructField = StructField(name, LongType)
 
   /**
-   * Creates a new [[StructField]] of type float.
+   * Creates a new `StructField` of type float.
    * @since 1.3.0
    */
   def float: StructField = StructField(name, FloatType)
 
   /**
-   * Creates a new [[StructField]] of type double.
+   * Creates a new `StructField` of type double.
    * @since 1.3.0
    */
   def double: StructField = StructField(name, DoubleType)
 
   /**
-   * Creates a new [[StructField]] of type string.
+   * Creates a new `StructField` of type string.
    * @since 1.3.0
    */
   def string: StructField = StructField(name, StringType)
 
   /**
-   * Creates a new [[StructField]] of type date.
+   * Creates a new `StructField` of type date.
    * @since 1.3.0
    */
   def date: StructField = StructField(name, DateType)
 
   /**
-   * Creates a new [[StructField]] of type decimal.
+   * Creates a new `StructField` of type decimal.
    * @since 1.3.0
    */
   def decimal: StructField = StructField(name, DecimalType.USER_DEFAULT)
 
   /**
-   * Creates a new [[StructField]] of type decimal.
+   * Creates a new `StructField` of type decimal.
    * @since 1.3.0
    */
   def decimal(precision: Int, scale: Int): StructField =
     StructField(name, DecimalType(precision, scale))
 
   /**
-   * Creates a new [[StructField]] of type timestamp.
+   * Creates a new `StructField` of type timestamp.
    * @since 1.3.0
    */
   def timestamp: StructField = StructField(name, TimestampType)
 
   /**
-   * Creates a new [[StructField]] of type binary.
+   * Creates a new `StructField` of type binary.
    * @since 1.3.0
    */
   def binary: StructField = StructField(name, BinaryType)
 
   /**
-   * Creates a new [[StructField]] of type array.
+   * Creates a new `StructField` of type array.
    * @since 1.3.0
    */
   def array(dataType: DataType): StructField = StructField(name, ArrayType(dataType))
 
   /**
-   * Creates a new [[StructField]] of type map.
+   * Creates a new `StructField` of type map.
    * @since 1.3.0
    */
   def map(keyType: DataType, valueType: DataType): StructField =
@@ -1289,13 +1289,13 @@ class ColumnName(name: String) extends Column(name) {
   def map(mapType: MapType): StructField = StructField(name, mapType)
 
   /**
-   * Creates a new [[StructField]] of type struct.
+   * Creates a new `StructField` of type struct.
    * @since 1.3.0
    */
   def struct(fields: StructField*): StructField = struct(StructType(fields))
 
   /**
-   * Creates a new [[StructField]] of type struct.
+   * Creates a new `StructField` of type struct.
    * @since 1.3.0
    */
   def struct(structType: StructType): StructField = StructField(name, structType)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
index 0d43f09bc54cd..184c5a11298d9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.types._
 
 
 /**
- * Functionality for working with missing data in [[DataFrame]]s.
+ * Functionality for working with missing data in `DataFrame`s.
  *
  * @since 1.3.1
  */
@@ -36,14 +36,14 @@ import org.apache.spark.sql.types._
 final class DataFrameNaFunctions private[sql](df: DataFrame) {
 
   /**
-   * Returns a new [[DataFrame]] that drops rows containing any null or NaN values.
+   * Returns a new `DataFrame` that drops rows containing any null or NaN values.
    *
    * @since 1.3.1
    */
   def drop(): DataFrame = drop("any", df.columns)
 
   /**
-   * Returns a new [[DataFrame]] that drops rows containing null or NaN values.
+   * Returns a new `DataFrame` that drops rows containing null or NaN values.
    *
    * If `how` is "any", then drop rows containing any null or NaN values.
    * If `how` is "all", then drop rows only if every column is null or NaN for that row.
@@ -53,7 +53,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
   def drop(how: String): DataFrame = drop(how, df.columns)
 
   /**
-   * Returns a new [[DataFrame]] that drops rows containing any null or NaN values
+   * Returns a new `DataFrame` that drops rows containing any null or NaN values
    * in the specified columns.
    *
    * @since 1.3.1
@@ -61,7 +61,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
   def drop(cols: Array[String]): DataFrame = drop(cols.toSeq)
 
   /**
-   * (Scala-specific) Returns a new [[DataFrame]] that drops rows containing any null or NaN values
+   * (Scala-specific) Returns a new `DataFrame` that drops rows containing any null or NaN values
    * in the specified columns.
    *
    * @since 1.3.1
@@ -69,7 +69,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
   def drop(cols: Seq[String]): DataFrame = drop(cols.size, cols)
 
   /**
-   * Returns a new [[DataFrame]] that drops rows containing null or NaN values
+   * Returns a new `DataFrame` that drops rows containing null or NaN values
    * in the specified columns.
    *
    * If `how` is "any", then drop rows containing any null or NaN values in the specified columns.
@@ -80,7 +80,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
   def drop(how: String, cols: Array[String]): DataFrame = drop(how, cols.toSeq)
 
   /**
-   * (Scala-specific) Returns a new [[DataFrame]] that drops rows containing null or NaN values
+   * (Scala-specific) Returns a new `DataFrame` that drops rows containing null or NaN values
    * in the specified columns.
    *
    * If `how` is "any", then drop rows containing any null or NaN values in the specified columns.
@@ -97,7 +97,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
   }
 
   /**
-   * Returns a new [[DataFrame]] that drops rows containing
+   * Returns a new `DataFrame` that drops rows containing
    * less than `minNonNulls` non-null and non-NaN values.
    *
    * @since 1.3.1
@@ -105,7 +105,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
   def drop(minNonNulls: Int): DataFrame = drop(minNonNulls, df.columns)
 
   /**
-   * Returns a new [[DataFrame]] that drops rows containing
+   * Returns a new `DataFrame` that drops rows containing
    * less than `minNonNulls` non-null and non-NaN values in the specified columns.
    *
    * @since 1.3.1
@@ -113,7 +113,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
   def drop(minNonNulls: Int, cols: Array[String]): DataFrame = drop(minNonNulls, cols.toSeq)
 
   /**
-   * (Scala-specific) Returns a new [[DataFrame]] that drops rows containing less than
+   * (Scala-specific) Returns a new `DataFrame` that drops rows containing less than
    * `minNonNulls` non-null and non-NaN values in the specified columns.
    *
    * @since 1.3.1
@@ -126,21 +126,21 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
   }
 
   /**
-   * Returns a new [[DataFrame]] that replaces null or NaN values in numeric columns with `value`.
+   * Returns a new `DataFrame` that replaces null or NaN values in numeric columns with `value`.
    *
    * @since 1.3.1
    */
   def fill(value: Double): DataFrame = fill(value, df.columns)
 
   /**
-   * Returns a new [[DataFrame]] that replaces null values in string columns with `value`.
+   * Returns a new `DataFrame` that replaces null values in string columns with `value`.
    *
    * @since 1.3.1
    */
   def fill(value: String): DataFrame = fill(value, df.columns)
 
   /**
-   * Returns a new [[DataFrame]] that replaces null or NaN values in specified numeric columns.
+   * Returns a new `DataFrame` that replaces null or NaN values in specified numeric columns.
    * If a specified column is not a numeric column, it is ignored.
    *
    * @since 1.3.1
@@ -148,7 +148,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
   def fill(value: Double, cols: Array[String]): DataFrame = fill(value, cols.toSeq)
 
   /**
-   * (Scala-specific) Returns a new [[DataFrame]] that replaces null or NaN values in specified
+   * (Scala-specific) Returns a new `DataFrame` that replaces null or NaN values in specified
    * numeric columns. If a specified column is not a numeric column, it is ignored.
    *
    * @since 1.3.1
@@ -167,7 +167,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
   }
 
   /**
-   * Returns a new [[DataFrame]] that replaces null values in specified string columns.
+   * Returns a new `DataFrame` that replaces null values in specified string columns.
    * If a specified column is not a string column, it is ignored.
    *
    * @since 1.3.1
@@ -175,7 +175,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
   def fill(value: String, cols: Array[String]): DataFrame = fill(value, cols.toSeq)
 
   /**
-   * (Scala-specific) Returns a new [[DataFrame]] that replaces null values in
+   * (Scala-specific) Returns a new `DataFrame` that replaces null values in
    * specified string columns. If a specified column is not a string column, it is ignored.
    *
    * @since 1.3.1
@@ -194,7 +194,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
   }
 
   /**
-   * Returns a new [[DataFrame]] that replaces null values.
+   * Returns a new `DataFrame` that replaces null values.
    *
    * The key of the map is the column name, and the value of the map is the replacement value.
    * The value must be of the following type:
@@ -213,7 +213,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
   def fill(valueMap: java.util.Map[String, Any]): DataFrame = fill0(valueMap.asScala.toSeq)
 
   /**
-   * (Scala-specific) Returns a new [[DataFrame]] that replaces null values.
+   * (Scala-specific) Returns a new `DataFrame` that replaces null values.
    *
    * The key of the map is the column name, and the value of the map is the replacement value.
    * The value must be of the following type: `Int`, `Long`, `Float`, `Double`, `String`, `Boolean`.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 5be9a99369997..1af2f9afea5eb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.types.StructType
 
 /**
  * Interface used to load a [[Dataset]] from external storage systems (e.g. file systems,
- * key-value stores, etc). Use [[SparkSession.read]] to access this.
+ * key-value stores, etc). Use `SparkSession.read` to access this.
  *
  * @since 1.4.0
  */
@@ -116,7 +116,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
   }
 
   /**
-   * Loads input in as a [[DataFrame]], for data sources that don't require a path (e.g. external
+   * Loads input in as a `DataFrame`, for data sources that don't require a path (e.g. external
    * key-value stores).
    *
    * @since 1.4.0
@@ -126,7 +126,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
   }
 
   /**
-   * Loads input in as a [[DataFrame]], for data sources that require a path (e.g. data backed by
+   * Loads input in as a `DataFrame`, for data sources that require a path (e.g. data backed by
    * a local or distributed file system).
    *
    * @since 1.4.0
@@ -136,7 +136,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
   }
 
   /**
-   * Loads input in as a [[DataFrame]], for data sources that support multiple paths.
+   * Loads input in as a `DataFrame`, for data sources that support multiple paths.
    * Only works if the source is a HadoopFsRelationProvider.
    *
    * @since 1.6.0
@@ -153,7 +153,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
   }
 
   /**
-   * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
+   * Construct a `DataFrame` representing the database table accessible via JDBC URL
    * url named table and connection properties.
    *
    * @since 1.4.0
@@ -163,7 +163,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
   }
 
   /**
-   * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
+   * Construct a `DataFrame` representing the database table accessible via JDBC URL
    * url named table. Partitions of the table will be retrieved in parallel based on the parameters
    * passed to this function.
    *
@@ -198,10 +198,10 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
   }
 
   /**
-   * Construct a [[DataFrame]] representing the database table accessible via JDBC URL
+   * Construct a `DataFrame` representing the database table accessible via JDBC URL
    * url named table using connection properties. The `predicates` parameter gives a list
    * expressions suitable for inclusion in WHERE clauses; each one defines one partition
-   * of the [[DataFrame]].
+   * of the `DataFrame`.
    *
    * Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash
    * your external database systems.
@@ -240,7 +240,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
 
   /**
    * Loads a JSON file (<a href="http://jsonlines.org/">JSON Lines text format or
-   * newline-delimited JSON</a>) and returns the result as a [[DataFrame]].
+   * newline-delimited JSON</a>) and returns the result as a `DataFrame`.
    * See the documentation on the overloaded `json()` method with varargs for more details.
    *
    * @since 1.4.0
@@ -252,7 +252,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
 
   /**
    * Loads a JSON file (<a href="http://jsonlines.org/">JSON Lines text format or
-   * newline-delimited JSON</a>) and returns the result as a [[DataFrame]].
+   * newline-delimited JSON</a>) and returns the result as a `DataFrame`.
    *
    * This function goes through the input once to determine the input schema. If you know the
    * schema in advance, use the version that specifies the schema to avoid the extra scan.
@@ -299,7 +299,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
   /**
    * Loads a `JavaRDD[String]` storing JSON objects (<a href="http://jsonlines.org/">JSON
    * Lines text format or newline-delimited JSON</a>) and returns the result as
-   * a [[DataFrame]].
+   * a `DataFrame`.
    *
    * Unless the schema is specified using [[schema]] function, this function goes through the
    * input once to determine the input schema.
@@ -311,7 +311,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
 
   /**
    * Loads an `RDD[String]` storing JSON objects (<a href="http://jsonlines.org/">JSON Lines
-   * text format or newline-delimited JSON</a>) and returns the result as a [[DataFrame]].
+   * text format or newline-delimited JSON</a>) and returns the result as a `DataFrame`.
    *
    * Unless the schema is specified using [[schema]] function, this function goes through the
    * input once to determine the input schema.
@@ -341,7 +341,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
   }
 
   /**
-   * Loads a CSV file and returns the result as a [[DataFrame]]. See the documentation on the
+   * Loads a CSV file and returns the result as a `DataFrame`. See the documentation on the
    * other overloaded `csv()` method for more details.
    *
    * @since 2.0.0
@@ -352,7 +352,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
   }
 
   /**
-   * Loads a CSV file and returns the result as a [[DataFrame]].
+   * Loads a CSV file and returns the result as a `DataFrame`.
    *
    * This function will go through the input once to determine the input schema if `inferSchema`
    * is enabled. To avoid going through the entire data once, disable `inferSchema` option or
@@ -392,7 +392,6 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * <li>`timestampFormat` (default `yyyy-MM-dd'T'HH:mm:ss.SSSZZ`): sets the string that
    * indicates a timestamp format. Custom date formats follow the formats at
    * `java.text.SimpleDateFormat`. This applies to timestamp type.</li>
-   * `java.sql.Timestamp.valueOf()` and `java.sql.Date.valueOf()` or ISO 8601 format.</li>
    * <li>`maxColumns` (default `20480`): defines a hard limit of how many columns
    * a record can have.</li>
    * <li>`maxCharsPerColumn` (default `-1`): defines the maximum number of characters allowed
@@ -415,7 +414,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
   def csv(paths: String*): DataFrame = format("csv").load(paths : _*)
 
   /**
-   * Loads a Parquet file, returning the result as a [[DataFrame]]. See the documentation
+   * Loads a Parquet file, returning the result as a `DataFrame`. See the documentation
    * on the other overloaded `parquet()` method for more details.
    *
    * @since 2.0.0
@@ -426,7 +425,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
   }
 
   /**
-   * Loads a Parquet file, returning the result as a [[DataFrame]].
+   * Loads a Parquet file, returning the result as a `DataFrame`.
    *
    * You can set the following Parquet-specific option(s) for reading Parquet files:
    * <ul>
@@ -442,7 +441,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
   }
 
   /**
-   * Loads an ORC file and returns the result as a [[DataFrame]].
+   * Loads an ORC file and returns the result as a `DataFrame`.
    *
    * @param path input path
    * @since 1.5.0
@@ -454,7 +453,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
   }
 
   /**
-   * Loads an ORC file and returns the result as a [[DataFrame]].
+   * Loads an ORC file and returns the result as a `DataFrame`.
    *
    * @param paths input paths
    * @since 2.0.0
@@ -464,7 +463,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
   def orc(paths: String*): DataFrame = format("orc").load(paths: _*)
 
   /**
-   * Returns the specified table as a [[DataFrame]].
+   * Returns the specified table as a `DataFrame`.
    *
    * @since 1.4.0
    */
@@ -475,7 +474,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
   }
 
   /**
-   * Loads text files and returns a [[DataFrame]] whose schema starts with a string column named
+   * Loads text files and returns a `DataFrame` whose schema starts with a string column named
    * "value", and followed by partitioned columns if there are any. See the documentation on
    * the other overloaded `text()` method for more details.
    *
@@ -487,7 +486,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
   }
 
   /**
-   * Loads text files and returns a [[DataFrame]] whose schema starts with a string column named
+   * Loads text files and returns a `DataFrame` whose schema starts with a string column named
    * "value", and followed by partitioned columns if there are any.
    *
    * Each line in the text files is a new row in the resulting DataFrame. For example:
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
index a9a861c4635b2..89c3a74f4f067 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.types._
 import org.apache.spark.util.sketch.{BloomFilter, CountMinSketch}
 
 /**
- * Statistic functions for [[DataFrame]]s.
+ * Statistic functions for `DataFrame`s.
  *
  * @since 1.4.0
  */
@@ -44,7 +44,9 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    * of `x` is close to (p * N).
    * More precisely,
    *
-   *   floor((p - err) * N) <= rank(x) <= ceil((p + err) * N).
+   * {{{
+   *   floor((p - err) * N) <= rank(x) <= ceil((p + err) * N)
+   * }}}
    *
    * This method implements a variation of the Greenwald-Khanna algorithm (with some speed
    * optimizations).
@@ -55,7 +57,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    * @param probabilities a list of quantile probabilities
    *   Each number must belong to [0, 1].
    *   For example 0 is the minimum, 0.5 is the median, 1 is the maximum.
-   * @param relativeError The relative target precision to achieve (>= 0).
+   * @param relativeError The relative target precision to achieve (greater or equal to 0).
    *   If set to zero, the exact quantiles are computed, which could be very expensive.
    *   Note that values greater than 1 are accepted but give the same result as 1.
    * @return the approximate quantiles at the given probabilities
@@ -189,7 +191,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    * The `support` should be greater than 1e-4.
    *
    * This function is meant for exploratory data analysis, as we make no guarantee about the
-   * backward compatibility of the schema of the resulting [[DataFrame]].
+   * backward compatibility of the schema of the resulting `DataFrame`.
    *
    * @param cols the names of the columns to search frequent items in.
    * @param support The minimum frequency for an item to be considered `frequent`. Should be greater
@@ -236,7 +238,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    * Uses a `default` support of 1%.
    *
    * This function is meant for exploratory data analysis, as we make no guarantee about the
-   * backward compatibility of the schema of the resulting [[DataFrame]].
+   * backward compatibility of the schema of the resulting `DataFrame`.
    *
    * @param cols the names of the columns to search frequent items in.
    * @return A Local DataFrame with the Array of frequent items for each column.
@@ -254,7 +256,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    * and Papadimitriou.
    *
    * This function is meant for exploratory data analysis, as we make no guarantee about the
-   * backward compatibility of the schema of the resulting [[DataFrame]].
+   * backward compatibility of the schema of the resulting `DataFrame`.
    *
    * @param cols the names of the columns to search frequent items in.
    * @return A Local DataFrame with the Array of frequent items for each column.
@@ -299,7 +301,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    * Uses a `default` support of 1%.
    *
    * This function is meant for exploratory data analysis, as we make no guarantee about the
-   * backward compatibility of the schema of the resulting [[DataFrame]].
+   * backward compatibility of the schema of the resulting `DataFrame`.
    *
    * @param cols the names of the columns to search frequent items in.
    * @return A Local DataFrame with the Array of frequent items for each column.
@@ -317,7 +319,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    *                  its fraction as zero.
    * @param seed random seed
    * @tparam T stratum type
-   * @return a new [[DataFrame]] that represents the stratified sample
+   * @return a new `DataFrame` that represents the stratified sample
    *
    * {{{
    *    val df = spark.createDataFrame(Seq((1, 1), (1, 2), (2, 1), (2, 1), (2, 3), (3, 2),
@@ -354,7 +356,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    *                  its fraction as zero.
    * @param seed random seed
    * @tparam T stratum type
-   * @return a new [[DataFrame]] that represents the stratified sample
+   * @return a new `DataFrame` that represents the stratified sample
    *
    * @since 1.5.0
    */
@@ -369,7 +371,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    * @param depth depth of the sketch
    * @param width width of the sketch
    * @param seed random seed
-   * @return a [[CountMinSketch]] over column `colName`
+   * @return a `CountMinSketch` over column `colName`
    * @since 2.0.0
    */
   def countMinSketch(colName: String, depth: Int, width: Int, seed: Int): CountMinSketch = {
@@ -383,7 +385,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    * @param eps relative error of the sketch
    * @param confidence confidence of the sketch
    * @param seed random seed
-   * @return a [[CountMinSketch]] over column `colName`
+   * @return a `CountMinSketch` over column `colName`
    * @since 2.0.0
    */
   def countMinSketch(
@@ -398,7 +400,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    * @param depth depth of the sketch
    * @param width width of the sketch
    * @param seed random seed
-   * @return a [[CountMinSketch]] over column `colName`
+   * @return a `CountMinSketch` over column `colName`
    * @since 2.0.0
    */
   def countMinSketch(col: Column, depth: Int, width: Int, seed: Int): CountMinSketch = {
@@ -412,7 +414,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
    * @param eps relative error of the sketch
    * @param confidence confidence of the sketch
    * @param seed random seed
-   * @return a [[CountMinSketch]] over column `colName`
+   * @return a `CountMinSketch` over column `colName`
    * @since 2.0.0
    */
   def countMinSketch(col: Column, eps: Double, confidence: Double, seed: Int): CountMinSketch = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 8294e4149b1c4..fa8e8cb985ef5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.types.StructType
 
 /**
  * Interface used to write a [[Dataset]] to external storage systems (e.g. file systems,
- * key-value stores, etc). Use [[Dataset.write]] to access this.
+ * key-value stores, etc). Use `Dataset.write` to access this.
  *
  * @since 1.4.0
  */
@@ -189,7 +189,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
   }
 
   /**
-   * Saves the content of the [[DataFrame]] at the specified path.
+   * Saves the content of the `DataFrame` at the specified path.
    *
    * @since 1.4.0
    */
@@ -199,7 +199,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
   }
 
   /**
-   * Saves the content of the [[DataFrame]] as the specified table.
+   * Saves the content of the `DataFrame` as the specified table.
    *
    * @since 1.4.0
    */
@@ -215,8 +215,8 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
     dataSource.write(mode, df)
   }
   /**
-   * Inserts the content of the [[DataFrame]] to the specified table. It requires that
-   * the schema of the [[DataFrame]] is the same as the schema of the table.
+   * Inserts the content of the `DataFrame` to the specified table. It requires that
+   * the schema of the `DataFrame` is the same as the schema of the table.
    *
    * @note Unlike `saveAsTable`, `insertInto` ignores the column names and just uses position-based
    * resolution. For example:
@@ -322,15 +322,15 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
   }
 
   /**
-   * Saves the content of the [[DataFrame]] as the specified table.
+   * Saves the content of the `DataFrame` as the specified table.
    *
    * In the case the table already exists, behavior of this function depends on the
    * save mode, specified by the `mode` function (default to throwing an exception).
-   * When `mode` is `Overwrite`, the schema of the [[DataFrame]] does not need to be
+   * When `mode` is `Overwrite`, the schema of the `DataFrame` does not need to be
    * the same as that of the existing table.
    *
    * When `mode` is `Append`, if there is an existing table, we will use the format and options of
-   * the existing table. The column order in the schema of the [[DataFrame]] doesn't need to be same
+   * the existing table. The column order in the schema of the `DataFrame` doesn't need to be same
    * as that of the existing table. Unlike `insertInto`, `saveAsTable` will use the column names to
    * find the correct column positions. For example:
    *
@@ -346,7 +346,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    *    +---+---+
    * }}}
    *
-   * When the DataFrame is created from a non-partitioned [[HadoopFsRelation]] with a single input
+   * When the DataFrame is created from a non-partitioned `HadoopFsRelation` with a single input
    * path, and the data source provider can be mapped to an existing Hive builtin SerDe (i.e. ORC
    * and Parquet), the table is persisted in a Hive compatible format, which means other systems
    * like Hive will be able to read this table. Otherwise, the table is persisted in a Spark SQL
@@ -406,7 +406,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
   }
 
   /**
-   * Saves the content of the [[DataFrame]] to an external database table via JDBC. In the case the
+   * Saves the content of the `DataFrame` to an external database table via JDBC. In the case the
    * table already exists in the external database, behavior of this function depends on the
    * save mode, specified by the `mode` function (default to throwing an exception).
    *
@@ -447,7 +447,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
   }
 
   /**
-   * Saves the content of the [[DataFrame]] in JSON format (<a href="http://jsonlines.org/">
+   * Saves the content of the `DataFrame` in JSON format (<a href="http://jsonlines.org/">
    * JSON Lines text format or newline-delimited JSON</a>) at the specified path.
    * This is equivalent to:
    * {{{
@@ -474,7 +474,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
   }
 
   /**
-   * Saves the content of the [[DataFrame]] in Parquet format at the specified path.
+   * Saves the content of the `DataFrame` in Parquet format at the specified path.
    * This is equivalent to:
    * {{{
    *   format("parquet").save(path)
@@ -495,7 +495,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
   }
 
   /**
-   * Saves the content of the [[DataFrame]] in ORC format at the specified path.
+   * Saves the content of the `DataFrame` in ORC format at the specified path.
    * This is equivalent to:
    * {{{
    *   format("orc").save(path)
@@ -516,7 +516,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
   }
 
   /**
-   * Saves the content of the [[DataFrame]] in a text file at the specified path.
+   * Saves the content of the `DataFrame` in a text file at the specified path.
    * The DataFrame must have only one column that is of string type.
    * Each row becomes a new line in the output file. For example:
    * {{{
@@ -541,7 +541,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
   }
 
   /**
-   * Saves the content of the [[DataFrame]] in CSV format at the specified path.
+   * Saves the content of the `DataFrame` in CSV format at the specified path.
    * This is equivalent to:
    * {{{
    *   format("csv").save(path)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 7ba6ffce278cf..fcc02e5eb3ef9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -68,7 +68,7 @@ private[sql] object Dataset {
 /**
  * A Dataset is a strongly typed collection of domain-specific objects that can be transformed
  * in parallel using functional or relational operations. Each Dataset also has an untyped view
- * called a [[DataFrame]], which is a Dataset of [[Row]].
+ * called a `DataFrame`, which is a Dataset of [[Row]].
  *
  * Operations available on Datasets are divided into transformations and actions. Transformations
  * are the ones that produce new Datasets, and actions are the ones that trigger computation and
@@ -363,7 +363,7 @@ class Dataset[T] private[sql](
    *  - When `U` is a tuple, the columns will be be mapped by ordinal (i.e. the first column will
    *    be assigned to `_1`).
    *  - When `U` is a primitive type (i.e. String, Int, etc), then the first column of the
-   *    [[DataFrame]] will be used.
+   *    `DataFrame` will be used.
    *
    * If the schema of the Dataset does not match the desired `U` type, you can use `select`
    * along with `alias` or `as` to rearrange or rename as required.
@@ -377,7 +377,7 @@ class Dataset[T] private[sql](
 
   /**
    * Converts this strongly typed collection of data to generic `DataFrame` with columns renamed.
-   * This can be quite convenient in conversion from an RDD of tuples into a [[DataFrame]] with
+   * This can be quite convenient in conversion from an RDD of tuples into a `DataFrame` with
    * meaningful names. For example:
    * {{{
    *   val rdd: RDD[(Int, String)] = ...
@@ -472,8 +472,8 @@ class Dataset[T] private[sql](
   /**
    * Returns true if this Dataset contains one or more sources that continuously
    * return data as it arrives. A Dataset that reads data from a streaming source
-   * must be executed as a [[StreamingQuery]] using the `start()` method in
-   * [[DataStreamWriter]]. Methods that return a single answer, e.g. `count()` or
+   * must be executed as a `StreamingQuery` using the `start()` method in
+   * `DataStreamWriter`. Methods that return a single answer, e.g. `count()` or
    * `collect()`, will throw an [[AnalysisException]] when there is a streaming
    * source present.
    *
@@ -685,7 +685,7 @@ class Dataset[T] private[sql](
   def stat: DataFrameStatFunctions = new DataFrameStatFunctions(toDF())
 
   /**
-   * Join with another [[DataFrame]].
+   * Join with another `DataFrame`.
    *
    * Behaves as an INNER JOIN and requires a subsequent join predicate.
    *
@@ -699,7 +699,7 @@ class Dataset[T] private[sql](
   }
 
   /**
-   * Inner equi-join with another [[DataFrame]] using the given column.
+   * Inner equi-join with another `DataFrame` using the given column.
    *
    * Different from other join functions, the join column will only appear once in the output,
    * i.e. similar to SQL's `JOIN USING` syntax.
@@ -713,7 +713,7 @@ class Dataset[T] private[sql](
    * @param usingColumn Name of the column to join on. This column must exist on both sides.
    *
    * @note If you perform a self-join using this function without aliasing the input
-   * [[DataFrame]]s, you will NOT be able to reference any columns after the join, since
+   * `DataFrame`s, you will NOT be able to reference any columns after the join, since
    * there is no way to disambiguate which side of the join you would like to reference.
    *
    * @group untypedrel
@@ -724,7 +724,7 @@ class Dataset[T] private[sql](
   }
 
   /**
-   * Inner equi-join with another [[DataFrame]] using the given columns.
+   * Inner equi-join with another `DataFrame` using the given columns.
    *
    * Different from other join functions, the join columns will only appear once in the output,
    * i.e. similar to SQL's `JOIN USING` syntax.
@@ -738,7 +738,7 @@ class Dataset[T] private[sql](
    * @param usingColumns Names of the columns to join on. This columns must exist on both sides.
    *
    * @note If you perform a self-join using this function without aliasing the input
-   * [[DataFrame]]s, you will NOT be able to reference any columns after the join, since
+   * `DataFrame`s, you will NOT be able to reference any columns after the join, since
    * there is no way to disambiguate which side of the join you would like to reference.
    *
    * @group untypedrel
@@ -749,7 +749,7 @@ class Dataset[T] private[sql](
   }
 
   /**
-   * Equi-join with another [[DataFrame]] using the given columns.
+   * Equi-join with another `DataFrame` using the given columns.
    *
    * Different from other join functions, the join columns will only appear once in the output,
    * i.e. similar to SQL's `JOIN USING` syntax.
@@ -759,7 +759,7 @@ class Dataset[T] private[sql](
    * @param joinType One of: `inner`, `outer`, `left_outer`, `right_outer`, `leftsemi`.
    *
    * @note If you perform a self-join using this function without aliasing the input
-   * [[DataFrame]]s, you will NOT be able to reference any columns after the join, since
+   * `DataFrame`s, you will NOT be able to reference any columns after the join, since
    * there is no way to disambiguate which side of the join you would like to reference.
    *
    * @group untypedrel
@@ -782,7 +782,7 @@ class Dataset[T] private[sql](
   }
 
   /**
-   * Inner join with another [[DataFrame]], using the given join expression.
+   * Inner join with another `DataFrame`, using the given join expression.
    *
    * {{{
    *   // The following two are equivalent:
@@ -796,7 +796,7 @@ class Dataset[T] private[sql](
   def join(right: Dataset[_], joinExprs: Column): DataFrame = join(right, joinExprs, "inner")
 
   /**
-   * Join with another [[DataFrame]], using the given join expression. The following performs
+   * Join with another `DataFrame`, using the given join expression. The following performs
    * a full outer join between `df1` and `df2`.
    *
    * {{{
@@ -860,7 +860,7 @@ class Dataset[T] private[sql](
   }
 
   /**
-   * Explicit cartesian join with another [[DataFrame]].
+   * Explicit cartesian join with another `DataFrame`.
    *
    * @param right Right side of the join operation.
    *
@@ -875,7 +875,7 @@ class Dataset[T] private[sql](
 
   /**
    * :: Experimental ::
-   * Joins this Dataset returning a [[Tuple2]] for each pair where `condition` evaluates to
+   * Joins this Dataset returning a `Tuple2` for each pair where `condition` evaluates to
    * true.
    *
    * This is similar to the relation `join` function with one important difference in the
@@ -956,7 +956,7 @@ class Dataset[T] private[sql](
 
   /**
    * :: Experimental ::
-   * Using inner equi-join to join this Dataset returning a [[Tuple2]] for each pair
+   * Using inner equi-join to join this Dataset returning a `Tuple2` for each pair
    * where `condition` evaluates to true.
    *
    * @param other Right side of the join.
@@ -2232,7 +2232,7 @@ class Dataset[T] private[sql](
   }
 
   /**
-   * Returns a new [[DataFrame]] that contains the result of applying a serialized R function
+   * Returns a new `DataFrame` that contains the result of applying a serialized R function
    * `func` to each partition.
    */
   private[sql] def mapPartitionsInR(
@@ -2446,7 +2446,7 @@ class Dataset[T] private[sql](
 
   /**
    * Returns a new Dataset that has exactly `numPartitions` partitions.
-   * Similar to coalesce defined on an [[RDD]], this operation results in a narrow dependency, e.g.
+   * Similar to coalesce defined on an `RDD`, this operation results in a narrow dependency, e.g.
    * if you go from 1000 partitions to 100 partitions, there will not be a shuffle, instead each of
    * the 100 new partitions will claim 10 of the current partitions.
    *
@@ -2536,7 +2536,7 @@ class Dataset[T] private[sql](
   def unpersist(): this.type = unpersist(blocking = false)
 
   /**
-   * Represents the content of the Dataset as an [[RDD]] of [[T]].
+   * Represents the content of the Dataset as an `RDD` of [[T]].
    *
    * @group basic
    * @since 1.6.0
@@ -2550,14 +2550,14 @@ class Dataset[T] private[sql](
   }
 
   /**
-   * Returns the content of the Dataset as a [[JavaRDD]] of [[T]]s.
+   * Returns the content of the Dataset as a `JavaRDD` of [[T]]s.
    * @group basic
    * @since 1.6.0
    */
   def toJavaRDD: JavaRDD[T] = rdd.toJavaRDD()
 
   /**
-   * Returns the content of the Dataset as a [[JavaRDD]] of [[T]]s.
+   * Returns the content of the Dataset as a `JavaRDD` of [[T]]s.
    * @group basic
    * @since 1.6.0
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala
index 1163035e315fc..b94ad59fa2f6e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala
@@ -18,11 +18,10 @@
 package org.apache.spark.sql
 
 import org.apache.spark.annotation.{Experimental, InterfaceStability}
-import org.apache.spark.sql.streaming.StreamingQuery
 
 /**
  * :: Experimental ::
- * A class to consume data generated by a [[StreamingQuery]]. Typically this is used to send the
+ * A class to consume data generated by a `StreamingQuery`. Typically this is used to send the
  * generated data to external systems. Each partition will use a new deserialized instance, so you
  * usually should do all the initialization (e.g. opening a connection or initiating a transaction)
  * in the `open` method.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
index 31ce8eb25e808..395d709f26591 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
@@ -131,7 +131,7 @@ class KeyValueGroupedDataset[K, V] private[sql](
    * This function does not support partial aggregation, and as a result requires shuffling all
    * the data in the [[Dataset]]. If an application intends to perform an aggregation over each
    * key, it is best to use the reduce function or an
-   * [[org.apache.spark.sql.expressions#Aggregator Aggregator]].
+   * `org.apache.spark.sql.expressions#Aggregator`.
    *
    * Internally, the implementation will spill to disk if any given group is too large to fit into
    * memory.  However, users must take care to avoid materializing the whole iterator for a group
@@ -160,7 +160,7 @@ class KeyValueGroupedDataset[K, V] private[sql](
    * This function does not support partial aggregation, and as a result requires shuffling all
    * the data in the [[Dataset]]. If an application intends to perform an aggregation over each
    * key, it is best to use the reduce function or an
-   * [[org.apache.spark.sql.expressions#Aggregator Aggregator]].
+   * `org.apache.spark.sql.expressions#Aggregator`.
    *
    * Internally, the implementation will spill to disk if any given group is too large to fit into
    * memory.  However, users must take care to avoid materializing the whole iterator for a group
@@ -182,7 +182,7 @@ class KeyValueGroupedDataset[K, V] private[sql](
    * This function does not support partial aggregation, and as a result requires shuffling all
    * the data in the [[Dataset]]. If an application intends to perform an aggregation over each
    * key, it is best to use the reduce function or an
-   * [[org.apache.spark.sql.expressions#Aggregator Aggregator]].
+   * `org.apache.spark.sql.expressions#Aggregator`.
    *
    * Internally, the implementation will spill to disk if any given group is too large to fit into
    * memory.  However, users must take care to avoid materializing the whole iterator for a group
@@ -205,7 +205,7 @@ class KeyValueGroupedDataset[K, V] private[sql](
    * This function does not support partial aggregation, and as a result requires shuffling all
    * the data in the [[Dataset]]. If an application intends to perform an aggregation over each
    * key, it is best to use the reduce function or an
-   * [[org.apache.spark.sql.expressions#Aggregator Aggregator]].
+   * `org.apache.spark.sql.expressions#Aggregator`.
    *
    * Internally, the implementation will spill to disk if any given group is too large to fit into
    * memory.  However, users must take care to avoid materializing the whole iterator for a group
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
index f019d1e9daceb..0fe8d87ebd6ba 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.types.NumericType
 import org.apache.spark.sql.types.StructType
 
 /**
- * A set of methods for aggregations on a [[DataFrame]], created by [[Dataset.groupBy]].
+ * A set of methods for aggregations on a `DataFrame`, created by `Dataset.groupBy`.
  *
  * The main method is the agg function, which has multiple variants. This class also contains
  * convenience some first order statistics such as mean, sum for convenience.
@@ -129,7 +129,7 @@ class RelationalGroupedDataset protected[sql](
 
   /**
    * (Scala-specific) Compute aggregates by specifying the column names and
-   * aggregate methods. The resulting [[DataFrame]] will also contain the grouping columns.
+   * aggregate methods. The resulting `DataFrame` will also contain the grouping columns.
    *
    * The available aggregate methods are `avg`, `max`, `min`, `sum`, `count`.
    * {{{
@@ -150,7 +150,7 @@ class RelationalGroupedDataset protected[sql](
 
   /**
    * (Scala-specific) Compute aggregates by specifying a map from column name to
-   * aggregate methods. The resulting [[DataFrame]] will also contain the grouping columns.
+   * aggregate methods. The resulting `DataFrame` will also contain the grouping columns.
    *
    * The available aggregate methods are `avg`, `max`, `min`, `sum`, `count`.
    * {{{
@@ -171,7 +171,7 @@ class RelationalGroupedDataset protected[sql](
 
   /**
    * (Java-specific) Compute aggregates by specifying a map from column name to
-   * aggregate methods. The resulting [[DataFrame]] will also contain the grouping columns.
+   * aggregate methods. The resulting `DataFrame` will also contain the grouping columns.
    *
    * The available aggregate methods are `avg`, `max`, `min`, `sum`, `count`.
    * {{{
@@ -228,7 +228,7 @@ class RelationalGroupedDataset protected[sql](
 
   /**
    * Count the number of rows for each group.
-   * The resulting [[DataFrame]] will also contain the grouping columns.
+   * The resulting `DataFrame` will also contain the grouping columns.
    *
    * @since 1.3.0
    */
@@ -236,7 +236,7 @@ class RelationalGroupedDataset protected[sql](
 
   /**
    * Compute the average value for each numeric columns for each group. This is an alias for `avg`.
-   * The resulting [[DataFrame]] will also contain the grouping columns.
+   * The resulting `DataFrame` will also contain the grouping columns.
    * When specified columns are given, only compute the average values for them.
    *
    * @since 1.3.0
@@ -248,7 +248,7 @@ class RelationalGroupedDataset protected[sql](
 
   /**
    * Compute the max value for each numeric columns for each group.
-   * The resulting [[DataFrame]] will also contain the grouping columns.
+   * The resulting `DataFrame` will also contain the grouping columns.
    * When specified columns are given, only compute the max values for them.
    *
    * @since 1.3.0
@@ -260,7 +260,7 @@ class RelationalGroupedDataset protected[sql](
 
   /**
    * Compute the mean value for each numeric columns for each group.
-   * The resulting [[DataFrame]] will also contain the grouping columns.
+   * The resulting `DataFrame` will also contain the grouping columns.
    * When specified columns are given, only compute the mean values for them.
    *
    * @since 1.3.0
@@ -272,7 +272,7 @@ class RelationalGroupedDataset protected[sql](
 
   /**
    * Compute the min value for each numeric column for each group.
-   * The resulting [[DataFrame]] will also contain the grouping columns.
+   * The resulting `DataFrame` will also contain the grouping columns.
    * When specified columns are given, only compute the min values for them.
    *
    * @since 1.3.0
@@ -284,7 +284,7 @@ class RelationalGroupedDataset protected[sql](
 
   /**
    * Compute the sum for each numeric columns for each group.
-   * The resulting [[DataFrame]] will also contain the grouping columns.
+   * The resulting `DataFrame` will also contain the grouping columns.
    * When specified columns are given, only compute the sum for them.
    *
    * @since 1.3.0
@@ -295,7 +295,7 @@ class RelationalGroupedDataset protected[sql](
   }
 
   /**
-   * Pivots a column of the current [[DataFrame]] and perform the specified aggregation.
+   * Pivots a column of the current `DataFrame` and perform the specified aggregation.
    * There are two versions of pivot function: one that requires the caller to specify the list
    * of distinct values to pivot on, and one that does not. The latter is more concise but less
    * efficient, because Spark needs to first compute the list of distinct values internally.
@@ -335,7 +335,7 @@ class RelationalGroupedDataset protected[sql](
   }
 
   /**
-   * Pivots a column of the current [[DataFrame]] and perform the specified aggregation.
+   * Pivots a column of the current `DataFrame` and perform the specified aggregation.
    * There are two versions of pivot function: one that requires the caller to specify the list
    * of distinct values to pivot on, and one that does not. The latter is more concise but less
    * efficient, because Spark needs to first compute the list of distinct values internally.
@@ -367,7 +367,7 @@ class RelationalGroupedDataset protected[sql](
   }
 
   /**
-   * Pivots a column of the current [[DataFrame]] and perform the specified aggregation.
+   * Pivots a column of the current `DataFrame` and perform the specified aggregation.
    * There are two versions of pivot function: one that requires the caller to specify the list
    * of distinct values to pivot on, and one that does not. The latter is more concise but less
    * efficient, because Spark needs to first compute the list of distinct values internally.
@@ -392,12 +392,12 @@ class RelationalGroupedDataset protected[sql](
    * Applies the given serialized R function `func` to each group of data. For each unique group,
    * the function will be passed the group key and an iterator that contains all of the elements in
    * the group. The function can return an iterator containing elements of an arbitrary type which
-   * will be returned as a new [[DataFrame]].
+   * will be returned as a new `DataFrame`.
    *
    * This function does not support partial aggregation, and as a result requires shuffling all
    * the data in the [[Dataset]]. If an application intends to perform an aggregation over each
    * key, it is best to use the reduce function or an
-   * [[org.apache.spark.sql.expressions#Aggregator Aggregator]].
+   * `org.apache.spark.sql.expressions#Aggregator`.
    *
    * Internally, the implementation will spill to disk if any given group is too large to fit into
    * memory.  However, users must take care to avoid materializing the whole iterator for a group
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala b/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
index 9108d19d0a0c2..edfcd7d56dc8b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 
 
 /**
- * Runtime configuration interface for Spark. To access this, use [[SparkSession.conf]].
+ * Runtime configuration interface for Spark. To access this, use `SparkSession.conf`.
  *
  * Options set here are automatically propagated to the Hadoop configuration during I/O.
  *
@@ -65,7 +65,8 @@ class RuntimeConfig private[sql](sqlConf: SQLConf = new SQLConf) {
   /**
    * Returns the value of Spark runtime configuration property for the given key.
    *
-   * @throws NoSuchElementException if the key is not set and does not have a default value
+   * @throws java.util.NoSuchElementException if the key is not set and does not have a default
+   *                                          value
    * @since 2.0.0
    */
   @throws[NoSuchElementException]("if the key is not set")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 858fa4c7609b6..6554359806a01 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -84,7 +84,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
 
   /**
    * Returns a [[SQLContext]] as new session, with separated SQL configurations, temporary
-   * tables, registered functions, but sharing the same [[SparkContext]], cached data and
+   * tables, registered functions, but sharing the same `SparkContext`, cached data and
    * other things.
    *
    * @since 1.6.0
@@ -883,7 +883,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
   }
 
   /**
-   * Loads an JavaRDD<String> storing JSON objects (one object per record) and applies the given
+   * Loads an JavaRDD[String] storing JSON objects (one object per record) and applies the given
    * schema, returning the result as a `DataFrame`.
    *
    * @group specificdata
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 71b1880dc0715..08d74ac0185b8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -93,7 +93,7 @@ class SparkSession private(
    * ----------------------- */
 
   /**
-   * State shared across sessions, including the [[SparkContext]], cached data, listener,
+   * State shared across sessions, including the `SparkContext`, cached data, listener,
    * and a catalog that interacts with external systems.
    */
   @transient
@@ -125,7 +125,7 @@ class SparkSession private(
    *
    * This is the interface through which the user can get and set all Spark and Hadoop
    * configurations that are relevant to Spark SQL. When getting the value of a config,
-   * this defaults to the value set in the underlying [[SparkContext]], if any.
+   * this defaults to the value set in the underlying `SparkContext`, if any.
    *
    * @since 2.0.0
    */
@@ -189,8 +189,8 @@ class SparkSession private(
 
   /**
    * :: Experimental ::
-   * Returns a [[StreamingQueryManager]] that allows managing all the
-   * [[StreamingQuery StreamingQueries]] active on `this`.
+   * Returns a `StreamingQueryManager` that allows managing all the
+   * `StreamingQuery`s active on `this`.
    *
    * @since 2.0.0
    */
@@ -200,9 +200,9 @@ class SparkSession private(
 
   /**
    * Start a new session with isolated SQL configurations, temporary tables, registered
-   * functions are isolated, but sharing the underlying [[SparkContext]] and cached data.
+   * functions are isolated, but sharing the underlying `SparkContext` and cached data.
    *
-   * @note Other than the [[SparkContext]], all shared state is initialized lazily.
+   * @note Other than the `SparkContext`, all shared state is initialized lazily.
    * This method will force the initialization of the shared state to ensure that parent
    * and child sessions are set up with the same shared state. If the underlying catalog
    * implementation is Hive, this will initialize the metastore, which may take some time.
@@ -219,7 +219,7 @@ class SparkSession private(
    * --------------------------------- */
 
   /**
-   * Returns a [[DataFrame]] with no rows or columns.
+   * Returns a `DataFrame` with no rows or columns.
    *
    * @since 2.0.0
    */
@@ -243,7 +243,7 @@ class SparkSession private(
 
   /**
    * :: Experimental ::
-   * Creates a [[DataFrame]] from an RDD of Product (e.g. case classes, tuples).
+   * Creates a `DataFrame` from an RDD of Product (e.g. case classes, tuples).
    *
    * @since 2.0.0
    */
@@ -257,7 +257,7 @@ class SparkSession private(
 
   /**
    * :: Experimental ::
-   * Creates a [[DataFrame]] from a local Seq of Product.
+   * Creates a `DataFrame` from a local Seq of Product.
    *
    * @since 2.0.0
    */
@@ -272,7 +272,7 @@ class SparkSession private(
 
   /**
    * :: DeveloperApi ::
-   * Creates a [[DataFrame]] from an [[RDD]] containing [[Row]]s using the given schema.
+   * Creates a `DataFrame` from an `RDD` containing [[Row]]s using the given schema.
    * It is important to make sure that the structure of every [[Row]] of the provided RDD matches
    * the provided schema. Otherwise, there will be runtime exception.
    * Example:
@@ -309,7 +309,7 @@ class SparkSession private(
 
   /**
    * :: DeveloperApi ::
-   * Creates a [[DataFrame]] from a [[JavaRDD]] containing [[Row]]s using the given schema.
+   * Creates a `DataFrame` from a `JavaRDD` containing [[Row]]s using the given schema.
    * It is important to make sure that the structure of every [[Row]] of the provided RDD matches
    * the provided schema. Otherwise, there will be runtime exception.
    *
@@ -323,7 +323,7 @@ class SparkSession private(
 
   /**
    * :: DeveloperApi ::
-   * Creates a [[DataFrame]] from a [[java.util.List]] containing [[Row]]s using the given schema.
+   * Creates a `DataFrame` from a [[java.util.List]] containing [[Row]]s using the given schema.
    * It is important to make sure that the structure of every [[Row]] of the provided List matches
    * the provided schema. Otherwise, there will be runtime exception.
    *
@@ -381,7 +381,7 @@ class SparkSession private(
   }
 
   /**
-   * Convert a [[BaseRelation]] created for external data sources into a [[DataFrame]].
+   * Convert a `BaseRelation` created for external data sources into a `DataFrame`.
    *
    * @since 2.0.0
    */
@@ -470,7 +470,7 @@ class SparkSession private(
 
   /**
    * :: Experimental ::
-   * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements
+   * Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements
    * in a range from 0 to `end` (exclusive) with step value 1.
    *
    * @since 2.0.0
@@ -481,7 +481,7 @@ class SparkSession private(
 
   /**
    * :: Experimental ::
-   * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements
+   * Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements
    * in a range from `start` to `end` (exclusive) with step value 1.
    *
    * @since 2.0.0
@@ -494,7 +494,7 @@ class SparkSession private(
 
   /**
    * :: Experimental ::
-   * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements
+   * Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements
    * in a range from `start` to `end` (exclusive) with a step value.
    *
    * @since 2.0.0
@@ -507,7 +507,7 @@ class SparkSession private(
 
   /**
    * :: Experimental ::
-   * Creates a [[Dataset]] with a single [[LongType]] column named `id`, containing elements
+   * Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements
    * in a range from `start` to `end` (exclusive) with a step value, with partition number
    * specified.
    *
@@ -520,7 +520,7 @@ class SparkSession private(
   }
 
   /**
-   * Creates a [[DataFrame]] from an RDD[Row].
+   * Creates a `DataFrame` from an RDD[Row].
    * User can specify whether the input rows should be converted to Catalyst rows.
    */
   private[sql] def internalCreateDataFrame(
@@ -533,7 +533,7 @@ class SparkSession private(
   }
 
   /**
-   * Creates a [[DataFrame]] from an RDD[Row].
+   * Creates a `DataFrame` from an RDD[Row].
    * User can specify whether the input rows should be converted to Catalyst rows.
    */
   private[sql] def createDataFrame(
@@ -566,7 +566,7 @@ class SparkSession private(
   @transient lazy val catalog: Catalog = new CatalogImpl(self)
 
   /**
-   * Returns the specified table as a [[DataFrame]].
+   * Returns the specified table as a `DataFrame`.
    *
    * @since 2.0.0
    */
@@ -583,7 +583,7 @@ class SparkSession private(
    * ----------------- */
 
   /**
-   * Executes a SQL query using Spark, returning the result as a [[DataFrame]].
+   * Executes a SQL query using Spark, returning the result as a `DataFrame`.
    * The dialect that is used for SQL parsing can be configured with 'spark.sql.dialect'.
    *
    * @since 2.0.0
@@ -594,7 +594,7 @@ class SparkSession private(
 
   /**
    * Returns a [[DataFrameReader]] that can be used to read non-streaming data in as a
-   * [[DataFrame]].
+   * `DataFrame`.
    * {{{
    *   sparkSession.read.parquet("/path/to/file.parquet")
    *   sparkSession.read.schema(schema).json("/path/to/file.json")
@@ -606,7 +606,7 @@ class SparkSession private(
 
   /**
    * :: Experimental ::
-   * Returns a [[DataStreamReader]] that can be used to read streaming data in as a [[DataFrame]].
+   * Returns a `DataStreamReader` that can be used to read streaming data in as a `DataFrame`.
    * {{{
    *   sparkSession.readStream.parquet("/path/to/directory/of/parquet/files")
    *   sparkSession.readStream.schema(schema).json("/path/to/directory/of/json/files")
@@ -624,7 +624,7 @@ class SparkSession private(
   /**
    * :: Experimental ::
    * (Scala-specific) Implicit methods available in Scala for converting
-   * common Scala objects into [[DataFrame]]s.
+   * common Scala objects into `DataFrame`s.
    *
    * {{{
    *   val sparkSession = SparkSession.builder.getOrCreate()
@@ -641,7 +641,7 @@ class SparkSession private(
   // scalastyle:on
 
   /**
-   * Stop the underlying [[SparkContext]].
+   * Stop the underlying `SparkContext`.
    *
    * @since 2.0.0
    */
@@ -726,7 +726,7 @@ object SparkSession {
 
     /**
      * Sets a config option. Options set using this method are automatically propagated to
-     * both [[SparkConf]] and SparkSession's own configuration.
+     * both `SparkConf` and SparkSession's own configuration.
      *
      * @since 2.0.0
      */
@@ -737,7 +737,7 @@ object SparkSession {
 
     /**
      * Sets a config option. Options set using this method are automatically propagated to
-     * both [[SparkConf]] and SparkSession's own configuration.
+     * both `SparkConf` and SparkSession's own configuration.
      *
      * @since 2.0.0
      */
@@ -748,7 +748,7 @@ object SparkSession {
 
     /**
      * Sets a config option. Options set using this method are automatically propagated to
-     * both [[SparkConf]] and SparkSession's own configuration.
+     * both `SparkConf` and SparkSession's own configuration.
      *
      * @since 2.0.0
      */
@@ -759,7 +759,7 @@ object SparkSession {
 
     /**
      * Sets a config option. Options set using this method are automatically propagated to
-     * both [[SparkConf]] and SparkSession's own configuration.
+     * both `SparkConf` and SparkSession's own configuration.
      *
      * @since 2.0.0
      */
@@ -769,7 +769,7 @@ object SparkSession {
     }
 
     /**
-     * Sets a list of config options based on the given [[SparkConf]].
+     * Sets a list of config options based on the given `SparkConf`.
      *
      * @since 2.0.0
      */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
index 6043c5ee14b54..c8be89c646957 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -38,7 +38,7 @@ import org.apache.spark.sql.types.{DataType, DataTypes}
 import org.apache.spark.util.Utils
 
 /**
- * Functions for registering user-defined functions. Use [[SQLContext.udf]] to access this.
+ * Functions for registering user-defined functions. Use `SQLContext.udf` to access this.
  *
  * @note The user-defined functions must be deterministic.
  *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index d5940c638acdb..650439a193015 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -474,7 +474,9 @@ object functions {
   /**
    * Aggregate function: returns the level of grouping, equals to
    *
-   *   (grouping(c1) << (n-1)) + (grouping(c2) << (n-2)) + ... + grouping(cn)
+   * {{{
+   *   (grouping(c1) <<; (n-1)) + (grouping(c2) <<; (n-2)) + ... + grouping(cn)
+   * }}}
    *
    * @note The list of columns should match with grouping columns exactly, or empty (means all the
    * grouping columns).
@@ -487,7 +489,9 @@ object functions {
   /**
    * Aggregate function: returns the level of grouping, equals to
    *
-   *   (grouping(c1) << (n-1)) + (grouping(c2) << (n-2)) + ... + grouping(cn)
+   * {{{
+   *   (grouping(c1) <<; (n-1)) + (grouping(c2) <<; (n-2)) + ... + grouping(cn)
+   * }}}
    *
    * @note The list of columns should match with grouping columns exactly.
    *
@@ -1048,9 +1052,12 @@ object functions {
    * within each partition in the lower 33 bits. The assumption is that the data frame has
    * less than 1 billion partitions, and each partition has less than 8 billion records.
    *
-   * As an example, consider a [[DataFrame]] with two partitions, each with 3 records.
+   * As an example, consider a `DataFrame` with two partitions, each with 3 records.
    * This expression would return the following IDs:
+   *
+   * {{{
    * 0, 1, 2, 8589934592 (1L << 33), 8589934593, 8589934594.
+   * }}}
    *
    * @group normal_funcs
    * @since 1.4.0
@@ -1066,9 +1073,12 @@ object functions {
    * within each partition in the lower 33 bits. The assumption is that the data frame has
    * less than 1 billion partitions, and each partition has less than 8 billion records.
    *
-   * As an example, consider a [[DataFrame]] with two partitions, each with 3 records.
+   * As an example, consider a `DataFrame` with two partitions, each with 3 records.
    * This expression would return the following IDs:
+   *
+   * {{{
    * 0, 1, 2, 8589934592 (1L << 33), 8589934593, 8589934594.
+   * }}}
    *
    * @group normal_funcs
    * @since 1.6.0
@@ -1184,7 +1194,7 @@ object functions {
 
   /**
    * Creates a new struct column.
-   * If the input column is a column in a [[DataFrame]], or a derived column expression
+   * If the input column is a column in a `DataFrame`, or a derived column expression
    * that is named (i.e. aliased), its name would be remained as the StructField's name,
    * otherwise, the newly generated StructField's name would be auto generated as col${index + 1},
    * i.e. col1, col2, col3, ...
@@ -1846,8 +1856,8 @@ object functions {
   def round(e: Column): Column = round(e, 0)
 
   /**
-   * Round the value of `e` to `scale` decimal places if `scale` >= 0
-   * or at integral part when `scale` < 0.
+   * Round the value of `e` to `scale` decimal places if `scale` is greater than or equal to 0
+   * or at integral part when `scale` is less than 0.
    *
    * @group math_funcs
    * @since 1.5.0
@@ -1864,7 +1874,7 @@ object functions {
 
   /**
    * Round the value of `e` to `scale` decimal places with HALF_EVEN round mode
-   * if `scale` >= 0 or at integral part when `scale` < 0.
+   * if `scale` is greater than or equal to 0 or at integral part when `scale` is less than 0.
    *
    * @group math_funcs
    * @since 2.0.0
@@ -2172,7 +2182,7 @@ object functions {
    * and returns the result as a string column.
    *
    * If d is 0, the result has no decimal point or fractional part.
-   * If d < 0, the result will be null.
+   * If d is less than 0, the result will be null.
    *
    * @group string_funcs
    * @since 1.5.0
@@ -2888,7 +2898,7 @@ object functions {
   }
 
   /**
-   * (Scala-specific) Parses a column containing a JSON string into a [[StructType]] with the
+   * (Scala-specific) Parses a column containing a JSON string into a `StructType` with the
    * specified schema. Returns `null`, in the case of an unparseable string.
    *
    * @param e a string column containing JSON data.
@@ -2904,7 +2914,7 @@ object functions {
   }
 
   /**
-   * (Java-specific) Parses a column containing a JSON string into a [[StructType]] with the
+   * (Java-specific) Parses a column containing a JSON string into a `StructType` with the
    * specified schema. Returns `null`, in the case of an unparseable string.
    *
    * @param e a string column containing JSON data.
@@ -2919,7 +2929,7 @@ object functions {
     from_json(e, schema, options.asScala.toMap)
 
   /**
-   * Parses a column containing a JSON string into a [[StructType]] with the specified schema.
+   * Parses a column containing a JSON string into a `StructType` with the specified schema.
    * Returns `null`, in the case of an unparseable string.
    *
    * @param e a string column containing JSON data.
@@ -2932,7 +2942,7 @@ object functions {
     from_json(e, schema, Map.empty[String, String])
 
   /**
-   * Parses a column containing a JSON string into a [[StructType]] with the specified schema.
+   * Parses a column containing a JSON string into a `StructType` with the specified schema.
    * Returns `null`, in the case of an unparseable string.
    *
    * @param e a string column containing JSON data.
@@ -2946,7 +2956,7 @@ object functions {
 
 
   /**
-   * (Scala-specific) Converts a column containing a [[StructType]] into a JSON string with the
+   * (Scala-specific) Converts a column containing a `StructType` into a JSON string with the
    * specified schema. Throws an exception, in the case of an unsupported type.
    *
    * @param e a struct column.
@@ -2961,7 +2971,7 @@ object functions {
   }
 
   /**
-   * (Java-specific) Converts a column containing a [[StructType]] into a JSON string with the
+   * (Java-specific) Converts a column containing a `StructType` into a JSON string with the
    * specified schema. Throws an exception, in the case of an unsupported type.
    *
    * @param e a struct column.
@@ -2975,7 +2985,7 @@ object functions {
     to_json(e, options.asScala.toMap)
 
   /**
-   * Converts a column containing a [[StructType]] into a JSON string with the
+   * Converts a column containing a `StructType` into a JSON string with the
    * specified schema. Throws an exception, in the case of an unsupported type.
    *
    * @param e a struct column.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
index d3e323cb12891..6d984621ccca1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.types.StructType
 
 
 /**
- * Internal implementation of the user-facing [[Catalog]].
+ * Internal implementation of the user-facing `Catalog`.
  */
 class CatalogImpl(sparkSession: SparkSession) extends Catalog {
 
@@ -175,8 +175,8 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
   }
 
   /**
-   * Get the database with the specified name. This throws an [[AnalysisException]] when no
-   * [[Database]] can be found.
+   * Get the database with the specified name. This throws an `AnalysisException` when no
+   * `Database` can be found.
    */
   override def getDatabase(dbName: String): Database = {
     makeDatabase(dbName)
@@ -184,7 +184,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
 
   /**
    * Get the table or view with the specified name. This table can be a temporary view or a
-   * table/view in the current database. This throws an [[AnalysisException]] when no [[Table]]
+   * table/view in the current database. This throws an `AnalysisException` when no `Table`
    * can be found.
    */
   override def getTable(tableName: String): Table = {
@@ -193,7 +193,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
 
   /**
    * Get the table or view with the specified name in the specified database. This throws an
-   * [[AnalysisException]] when no [[Table]] can be found.
+   * `AnalysisException` when no `Table` can be found.
    */
   override def getTable(dbName: String, tableName: String): Table = {
     makeTable(TableIdentifier(tableName, Option(dbName)))
@@ -201,7 +201,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
 
   /**
    * Get the function with the specified name. This function can be a temporary function or a
-   * function in the current database. This throws an [[AnalysisException]] when no [[Function]]
+   * function in the current database. This throws an `AnalysisException` when no `Function`
    * can be found.
    */
   override def getFunction(functionName: String): Function = {
@@ -209,7 +209,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
   }
 
   /**
-   * Get the function with the specified name. This returns [[None]] when no [[Function]] can be
+   * Get the function with the specified name. This returns `None` when no `Function` can be
    * found.
    */
   override def getFunction(dbName: String, functionName: String): Function = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
index 791a9cf813b6a..4e7c813be9922 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
@@ -23,7 +23,7 @@ import org.apache.spark.internal.config._
  * A helper class that enables substitution using syntax like
  * `${var}`, `${system:var}` and `${env:var}`.
  *
- * Variable substitution is controlled by [[SQLConf.variableSubstituteEnabled]].
+ * Variable substitution is controlled by `SQLConf.variableSubstituteEnabled`.
  */
 class VariableSubstitution(conf: SQLConf) {
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index 83857c322a0ec..e328b86437d62 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -40,8 +40,8 @@ case class JdbcType(databaseTypeDefinition : String, jdbcNullType : Int)
  * SQL dialect of a certain database or jdbc driver.
  * Lots of databases define types that aren't explicitly supported
  * by the JDBC spec.  Some JDBC drivers also report inaccurate
- * information---for instance, BIT(n&gt;1) being reported as a BIT type is quite
- * common, even though BIT in JDBC is meant for single-bit values.  Also, there
+ * information---for instance, BIT(n{@literal >}1) being reported as a BIT type is quite
+ * common, even though BIT in JDBC is meant for single-bit values. Also, there
  * does not appear to be a standard name for an unbounded string or binary
  * type; we use BLOB and CLOB by default but override with database-specific
  * alternatives when these are absent or do not behave correctly.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
index ff6dd8cb0cf92..f288ad61410f7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
@@ -112,7 +112,7 @@ trait SchemaRelationProvider {
 
 /**
  * ::Experimental::
- * Implemented by objects that can produce a streaming [[Source]] for a specific format or system.
+ * Implemented by objects that can produce a streaming `Source` for a specific format or system.
  *
  * @since 2.0.0
  */
@@ -143,7 +143,7 @@ trait StreamSourceProvider {
 
 /**
  * ::Experimental::
- * Implemented by objects that can produce a streaming [[Sink]] for a specific format or system.
+ * Implemented by objects that can produce a streaming `Sink` for a specific format or system.
  *
  * @since 2.0.0
  */
@@ -185,7 +185,7 @@ trait CreatableRelationProvider {
 
 /**
  * Represents a collection of tuples with a known schema. Classes that extend BaseRelation must
- * be able to produce the schema of their data in the form of a [[StructType]]. Concrete
+ * be able to produce the schema of their data in the form of a `StructType`. Concrete
  * implementation should inherit from one of the descendant `Scan` classes, which define various
  * abstract methods for execution.
  *
@@ -216,10 +216,10 @@ abstract class BaseRelation {
 
   /**
    * Whether does it need to convert the objects in Row to internal representation, for example:
-   *  java.lang.String -> UTF8String
-   *  java.lang.Decimal -> Decimal
+   *  java.lang.String to UTF8String
+   *  java.lang.Decimal to Decimal
    *
-   * If `needConversion` is `false`, buildScan() should return an [[RDD]] of [[InternalRow]]
+   * If `needConversion` is `false`, buildScan() should return an `RDD` of `InternalRow`
    *
    * @note The internal representation is not stable across releases and thus data sources outside
    * of Spark SQL should leave this as true.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index a2d64da0012f1..5f5c8e2432d6c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -57,9 +57,17 @@ import org.apache.spark.util.SerializableJobConf
  * @param partition a map from the partition key to the partition value (optional). If the partition
  *                  value is optional, dynamic partition insert will be performed.
  *                  As an example, `INSERT INTO tbl PARTITION (a=1, b=2) AS ...` would have
- *                  Map('a' -&gt; Some('1'), 'b' -&gt; Some('2')),
+ *
+ *                  {{{
+ *                  Map('a' -> Some('1'), 'b' -> Some('2'))
+ *                  }}}
+ *
  *                  and `INSERT INTO tbl PARTITION (a=1, b) AS ...`
- *                  would have Map('a' -&gt; Some('1'), 'b' -&gt; None).
+ *                  would have
+ *
+ *                  {{{
+ *                  Map('a' -> Some('1'), 'b' -> None)
+ *                  }}}.
  * @param child the logical plan representing data to write to.
  * @param overwrite overwrite existing table or partitions.
  * @param ifNotExists If true, only write if the table or partition does not exist.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
index 42033080dc34b..e30e0f9611f59 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
@@ -175,7 +175,7 @@ private[hive] case class HiveGenericUDF(
 
 /**
  * Converts a Hive Generic User Defined Table Generating Function (UDTF) to a
- * [[Generator]].  Note that the semantics of Generators do not allow
+ * `Generator`. Note that the semantics of Generators do not allow
  * Generators to maintain state in between input rows.  Thus UDTFs that rely on partitioning
  * dependent operations like calls to `close()` before producing output will not operate the same as
  * in Hive.  However, in practice this should not affect compatibility for most sane UDTFs
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala
index e53c3e4d4833b..16cfa9d1cc5c4 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala
@@ -48,7 +48,7 @@ import org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter
 
 /**
  * Internal helper class that saves an RDD using a Hive OutputFormat.
- * It is based on [[SparkHadoopWriter]].
+ * It is based on `SparkHadoopWriter`.
  */
 private[hive] class SparkHiveWriterContainer(
     @transient private val jobConf: JobConf,
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StateSpec.scala b/streaming/src/main/scala/org/apache/spark/streaming/StateSpec.scala
index 7c1ea2f89ddb8..c3b28bd516da5 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StateSpec.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StateSpec.scala
@@ -30,7 +30,7 @@ import org.apache.spark.util.ClosureCleaner
  * `mapWithState` operation of a
  * [[org.apache.spark.streaming.dstream.PairDStreamFunctions pair DStream]] (Scala) or a
  * [[org.apache.spark.streaming.api.java.JavaPairDStream JavaPairDStream]] (Java).
- * Use [[org.apache.spark.streaming.StateSpec.function() StateSpec.function]] factory methods
+ * Use `org.apache.spark.streaming.StateSpec.function()` factory methods
  * to create instances of this class.
  *
  * Example in Scala:
@@ -100,7 +100,7 @@ sealed abstract class StateSpec[KeyType, ValueType, StateType, MappedType] exten
 
 /**
  * :: Experimental ::
- * Builder object for creating instances of [[org.apache.spark.streaming.StateSpec StateSpec]]
+ * Builder object for creating instances of `org.apache.spark.streaming.StateSpec`
  * that is used for specifying the parameters of the DStream transformation `mapWithState`
  * that is used for specifying the parameters of the DStream transformation
  * `mapWithState` operation of a
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
index aa4003c62e1e7..2ec907c8cfd5f 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
@@ -434,8 +434,8 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
    * Return a [[JavaMapWithStateDStream]] by applying a function to every key-value element of
    * `this` stream, while maintaining some state data for each unique key. The mapping function
    * and other specification (e.g. partitioners, timeouts, initial state data, etc.) of this
-   * transformation can be specified using [[StateSpec]] class. The state data is accessible in
-   * as a parameter of type [[State]] in the mapping function.
+   * transformation can be specified using `StateSpec` class. The state data is accessible in
+   * as a parameter of type `State` in the mapping function.
    *
    * Example of using `mapWithState`:
    * {{{
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
index b43b9405def97..982e72cffbf3f 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
@@ -44,7 +44,7 @@ import org.apache.spark.streaming.scheduler.StreamingListener
  * A Java-friendly version of [[org.apache.spark.streaming.StreamingContext]] which is the main
  * entry point for Spark Streaming functionality. It provides methods to create
  * [[org.apache.spark.streaming.api.java.JavaDStream]] and
- * [[org.apache.spark.streaming.api.java.JavaPairDStream.]] from input sources. The internal
+ * [[org.apache.spark.streaming.api.java.JavaPairDStream]] from input sources. The internal
  * org.apache.spark.api.java.JavaSparkContext (see core Spark documentation) can be accessed
  * using `context.sparkContext`. After creating and transforming DStreams, the streaming
  * computation can be started and stopped using `context.start()` and `context.stop()`,
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
index ac739411fd212..f38c1e7996595 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
@@ -356,8 +356,8 @@ class PairDStreamFunctions[K, V](self: DStream[(K, V)])
    * Return a [[MapWithStateDStream]] by applying a function to every key-value element of
    * `this` stream, while maintaining some state data for each unique key. The mapping function
    * and other specification (e.g. partitioners, timeouts, initial state data, etc.) of this
-   * transformation can be specified using [[StateSpec]] class. The state data is accessible in
-   * as a parameter of type [[State]] in the mapping function.
+   * transformation can be specified using `StateSpec` class. The state data is accessible in
+   * as a parameter of type `State` in the mapping function.
    *
    * Example of using `mapWithState`:
    * {{{

From 124944ab639b879c43c07415ceb6de6b4dc2517a Mon Sep 17 00:00:00 2001
From: aokolnychyi <okolnychyyanton@gmail.com>
Date: Tue, 29 Nov 2016 13:49:39 +0000
Subject: [PATCH 0225/1204] [MINOR][DOCS] Updates to the Accumulator example in
 the programming guide. Fixed typos, AccumulatorV2 in Java

## What changes were proposed in this pull request?

This pull request contains updates to Scala and Java Accumulator code snippets in the programming guide.

- For Scala, the pull request fixes the signature of the 'add()' method in the custom Accumulator, which contained two params (as the old AccumulatorParam) instead of one (as in AccumulatorV2).

- The Java example was updated to use the AccumulatorV2 class since AccumulatorParam is marked as deprecated.

- Scala and Java examples are more consistent now.

## How was this patch tested?

This patch was tested manually by building the docs locally.

![image](https://cloud.githubusercontent.com/assets/6235869/20652099/77d98d18-b4f3-11e6-8565-a995fe8cf8e5.png)

Author: aokolnychyi <okolnychyyanton@gmail.com>

Closes #16024 from aokolnychyi/fixed_accumulator_example.

(cherry picked from commit f045d9dade66d44f5ca4768bfe6a484e9288ec8d)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/programming-guide.md | 54 ++++++++++++++++++++++++---------------
 1 file changed, 33 insertions(+), 21 deletions(-)

diff --git a/docs/programming-guide.md b/docs/programming-guide.md
index 58bf17b4a84ef..4267b8cae8110 100644
--- a/docs/programming-guide.md
+++ b/docs/programming-guide.md
@@ -1378,18 +1378,23 @@ res2: Long = 10
 
 While this code used the built-in support for accumulators of type Long, programmers can also
 create their own types by subclassing [AccumulatorV2](api/scala/index.html#org.apache.spark.util.AccumulatorV2).
-The AccumulatorV2 abstract class has several methods which need to override: 
-`reset` for resetting the accumulator to zero, and `add` for add anothor value into the accumulator, `merge` for merging another same-type accumulator into this one. Other methods need to override can refer to scala API document. For example, supposing we had a `MyVector` class
+The AccumulatorV2 abstract class has several methods which one has to override: `reset` for resetting
+the accumulator to zero, `add` for adding another value into the accumulator,
+`merge` for merging another same-type accumulator into this one. Other methods that must be overridden
+are contained in the [API documentation](api/scala/index.html#org.apache.spark.util.AccumulatorV2). For example, supposing we had a `MyVector` class
 representing mathematical vectors, we could write:
 
 {% highlight scala %}
-object VectorAccumulatorV2 extends AccumulatorV2[MyVector, MyVector] {
-  val vec_ : MyVector = MyVector.createZeroVector
-  def reset(): MyVector = {
-    vec_.reset()
+class VectorAccumulatorV2 extends AccumulatorV2[MyVector, MyVector] {
+
+  private val myVector: MyVector = MyVector.createZeroVector
+
+  def reset(): Unit = {
+    myVector.reset()
   }
-  def add(v1: MyVector, v2: MyVector): MyVector = {
-    vec_.add(v2)
+
+  def add(v: MyVector): Unit = {
+    myVector.add(v)
   }
   ...
 }
@@ -1424,29 +1429,36 @@ accum.value();
 // returns 10
 {% endhighlight %}
 
-Programmers can also create their own types by subclassing
-[AccumulatorParam](api/java/index.html?org/apache/spark/AccumulatorParam.html).
-The AccumulatorParam interface has two methods: `zero` for providing a "zero value" for your data
-type, and `addInPlace` for adding two values together. For example, supposing we had a `Vector` class
+While this code used the built-in support for accumulators of type Long, programmers can also
+create their own types by subclassing [AccumulatorV2](api/scala/index.html#org.apache.spark.util.AccumulatorV2).
+The AccumulatorV2 abstract class has several methods which one has to override: `reset` for resetting
+the accumulator to zero, `add` for adding another value into the accumulator,
+`merge` for merging another same-type accumulator into this one. Other methods that must be overridden
+are contained in the [API documentation](api/scala/index.html#org.apache.spark.util.AccumulatorV2). For example, supposing we had a `MyVector` class
 representing mathematical vectors, we could write:
 
 {% highlight java %}
-class VectorAccumulatorParam implements AccumulatorParam<Vector> {
-  public Vector zero(Vector initialValue) {
-    return Vector.zeros(initialValue.size());
+class VectorAccumulatorV2 implements AccumulatorV2<MyVector, MyVector> {
+
+  private MyVector myVector = MyVector.createZeroVector();
+
+  public void reset() {
+    myVector.reset();
   }
-  public Vector addInPlace(Vector v1, Vector v2) {
-    v1.addInPlace(v2); return v1;
+
+  public void add(MyVector v) {
+    myVector.add(v);
   }
+  ...
 }
 
 // Then, create an Accumulator of this type:
-Accumulator<Vector> vecAccum = sc.accumulator(new Vector(...), new VectorAccumulatorParam());
+VectorAccumulatorV2 myVectorAcc = new VectorAccumulatorV2();
+// Then, register it into spark context:
+jsc.sc().register(myVectorAcc, "MyVectorAcc1");
 {% endhighlight %}
 
-In Java, Spark also supports the more general [Accumulable](api/java/index.html?org/apache/spark/Accumulable.html)
-interface to accumulate data where the resulting type is not the same as the elements added (e.g. build
-a list by collecting together elements).
+Note that, when programmers define their own type of AccumulatorV2, the resulting type can be different than that of the elements added.
 
 </div>
 

From 086a3bdb283c0b234495385bd99b6077d3ea05bc Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Tue, 29 Nov 2016 13:50:24 +0000
Subject: [PATCH 0226/1204] [SPARK-18615][DOCS] Switch to multi-line doc to
 avoid a genjavadoc bug for backticks

## What changes were proposed in this pull request?

Currently, single line comment does not mark down backticks to `<code>..</code>` but prints as they are (`` `..` ``). For example, the line below:

```scala
/** Return an RDD with the pairs from `this` whose keys are not in `other`. */
```

So, we could work around this as below:

```scala
/**
 * Return an RDD with the pairs from `this` whose keys are not in `other`.
 */
```

- javadoc

  - **Before**
    ![2016-11-29 10 39 14](https://cloud.githubusercontent.com/assets/6477701/20693606/e64c8f90-b622-11e6-8dfc-4a029216e23d.png)

  - **After**
    ![2016-11-29 10 39 08](https://cloud.githubusercontent.com/assets/6477701/20693607/e7280d36-b622-11e6-8502-d2e21cd5556b.png)

- scaladoc (this one looks fine either way)

  - **Before**
    ![2016-11-29 10 38 22](https://cloud.githubusercontent.com/assets/6477701/20693640/12c18aa8-b623-11e6-901a-693e2f6f8066.png)

  - **After**
    ![2016-11-29 10 40 05](https://cloud.githubusercontent.com/assets/6477701/20693642/14eb043a-b623-11e6-82ac-7cd0000106d1.png)

I suspect this is related with SPARK-16153 and genjavadoc issue in ` typesafehub/genjavadoc#85`.

## How was this patch tested?

I found them via

```
grep -r "\/\*\*.*\`" . | grep .scala
````

and then checked if each is in the public API documentation with manually built docs (`jekyll build`) with Java 7.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #16050 from HyukjinKwon/javadoc-markdown.

(cherry picked from commit 1a870090e4266df570c3f56c1e2ea12d090d03d1)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../scala/org/apache/spark/SparkConf.scala    |  4 +++-
 .../apache/spark/api/java/JavaDoubleRDD.scala |  4 +++-
 .../apache/spark/api/java/JavaPairRDD.scala   | 12 ++++++++---
 .../org/apache/spark/api/java/JavaRDD.scala   |  4 +++-
 .../apache/spark/rdd/PairRDDFunctions.scala   |  8 ++++++--
 .../main/scala/org/apache/spark/rdd/RDD.scala |  8 ++++++--
 .../spark/graphx/impl/EdgeRDDImpl.scala       |  4 +++-
 .../apache/spark/graphx/impl/GraphImpl.scala  | 12 ++++++++---
 .../spark/graphx/impl/VertexRDDImpl.scala     |  4 +++-
 .../org/apache/spark/ml/linalg/Matrices.scala | 16 +++++++++++----
 .../scala/org/apache/spark/ml/Pipeline.scala  |  4 +++-
 .../spark/ml/attribute/AttributeGroup.scala   |  4 +++-
 .../spark/ml/attribute/attributes.scala       | 20 ++++++++++++++-----
 .../classification/LogisticRegression.scala   |  4 +++-
 .../GeneralizedLinearRegression.scala         |  4 +++-
 .../spark/mllib/feature/ChiSqSelector.scala   |  8 ++++++--
 .../apache/spark/mllib/linalg/Matrices.scala  | 16 +++++++++++----
 .../linalg/distributed/BlockMatrix.scala      |  4 +++-
 .../linalg/distributed/CoordinateMatrix.scala |  4 +++-
 .../linalg/distributed/IndexedRowMatrix.scala |  4 +++-
 .../apache/spark/mllib/stat/Statistics.scala  |  8 ++++++--
 .../scala/org/apache/spark/sql/Encoder.scala  |  4 +++-
 .../apache/spark/sql/types/ArrayType.scala    |  4 +++-
 .../apache/spark/streaming/StateSpec.scala    |  8 ++++++--
 24 files changed, 129 insertions(+), 43 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 0c1c68de89f81..d78b9f1b29685 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -378,7 +378,9 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
     settings.entrySet().asScala.map(x => (x.getKey, x.getValue)).toArray
   }
 
-  /** Get all parameters that start with `prefix` */
+  /**
+   * Get all parameters that start with `prefix`
+   */
   def getAllWithPrefix(prefix: String): Array[(String, String)] = {
     getAll.filter { case (k, v) => k.startsWith(prefix) }
       .map { case (k, v) => (k.substring(prefix.length), v) }
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala
index a32a4b28c1731..b71af0d42cdb0 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala
@@ -45,7 +45,9 @@ class JavaDoubleRDD(val srdd: RDD[scala.Double])
 
   import JavaDoubleRDD.fromRDD
 
-  /** Persist this RDD with the default storage level (`MEMORY_ONLY`). */
+  /**
+   * Persist this RDD with the default storage level (`MEMORY_ONLY`).
+   */
   def cache(): JavaDoubleRDD = fromRDD(srdd.cache())
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
index d7e3a1b1be48c..766aea213a972 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
@@ -54,7 +54,9 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
 
   // Common RDD functions
 
-  /** Persist this RDD with the default storage level (`MEMORY_ONLY`). */
+  /**
+   * Persist this RDD with the default storage level (`MEMORY_ONLY`).
+   */
   def cache(): JavaPairRDD[K, V] = new JavaPairRDD[K, V](rdd.cache())
 
   /**
@@ -454,13 +456,17 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
     fromRDD(rdd.subtractByKey(other))
   }
 
-  /** Return an RDD with the pairs from `this` whose keys are not in `other`. */
+  /**
+   * Return an RDD with the pairs from `this` whose keys are not in `other`.
+   */
   def subtractByKey[W](other: JavaPairRDD[K, W], numPartitions: Int): JavaPairRDD[K, V] = {
     implicit val ctag: ClassTag[W] = fakeClassTag
     fromRDD(rdd.subtractByKey(other, numPartitions))
   }
 
-  /** Return an RDD with the pairs from `this` whose keys are not in `other`. */
+  /**
+   * Return an RDD with the pairs from `this` whose keys are not in `other`.
+   */
   def subtractByKey[W](other: JavaPairRDD[K, W], p: Partitioner): JavaPairRDD[K, V] = {
     implicit val ctag: ClassTag[W] = fakeClassTag
     fromRDD(rdd.subtractByKey(other, p))
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
index 94e26e687c66b..41b5cab601c36 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
@@ -34,7 +34,9 @@ class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: ClassTag[T])
 
   // Common RDD functions
 
-  /** Persist this RDD with the default storage level (`MEMORY_ONLY`). */
+  /**
+   * Persist this RDD with the default storage level (`MEMORY_ONLY`).
+   */
   def cache(): JavaRDD[T] = wrapRDD(rdd.cache())
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index 969cd47038cfa..dc123e23b781c 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -916,14 +916,18 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
     subtractByKey(other, self.partitioner.getOrElse(new HashPartitioner(self.partitions.length)))
   }
 
-  /** Return an RDD with the pairs from `this` whose keys are not in `other`. */
+  /**
+   * Return an RDD with the pairs from `this` whose keys are not in `other`.
+   */
   def subtractByKey[W: ClassTag](
       other: RDD[(K, W)],
       numPartitions: Int): RDD[(K, V)] = self.withScope {
     subtractByKey(other, new HashPartitioner(numPartitions))
   }
 
-  /** Return an RDD with the pairs from `this` whose keys are not in `other`. */
+  /**
+   * Return an RDD with the pairs from `this` whose keys are not in `other`.
+   */
   def subtractByKey[W: ClassTag](other: RDD[(K, W)], p: Partitioner): RDD[(K, V)] = self.withScope {
     new SubtractedRDD[K, V, W](self, other, p)
   }
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index f723fcb837f88..d285e917b8a67 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -195,10 +195,14 @@ abstract class RDD[T: ClassTag](
     }
   }
 
-  /** Persist this RDD with the default storage level (`MEMORY_ONLY`). */
+  /**
+   * Persist this RDD with the default storage level (`MEMORY_ONLY`).
+   */
   def persist(): this.type = persist(StorageLevel.MEMORY_ONLY)
 
-  /** Persist this RDD with the default storage level (`MEMORY_ONLY`). */
+  /**
+   * Persist this RDD with the default storage level (`MEMORY_ONLY`).
+   */
   def cache(): this.type = persist()
 
   /**
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
index faa985594ec08..376c7b06f9d2b 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala
@@ -63,7 +63,9 @@ class EdgeRDDImpl[ED: ClassTag, VD: ClassTag] private[graphx] (
     this
   }
 
-  /** Persists the edge partitions using `targetStorageLevel`, which defaults to MEMORY_ONLY. */
+  /**
+   * Persists the edge partitions using `targetStorageLevel`, which defaults to MEMORY_ONLY.
+   */
   override def cache(): this.type = {
     partitionsRDD.persist(targetStorageLevel)
     this
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
index 3810110099993..5d2a53782b55d 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
@@ -277,7 +277,9 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
 
 object GraphImpl {
 
-  /** Create a graph from edges, setting referenced vertices to `defaultVertexAttr`. */
+  /**
+   * Create a graph from edges, setting referenced vertices to `defaultVertexAttr`.
+   */
   def apply[VD: ClassTag, ED: ClassTag](
       edges: RDD[Edge[ED]],
       defaultVertexAttr: VD,
@@ -286,7 +288,9 @@ object GraphImpl {
     fromEdgeRDD(EdgeRDD.fromEdges(edges), defaultVertexAttr, edgeStorageLevel, vertexStorageLevel)
   }
 
-  /** Create a graph from EdgePartitions, setting referenced vertices to `defaultVertexAttr`. */
+  /**
+   * Create a graph from EdgePartitions, setting referenced vertices to `defaultVertexAttr`.
+   */
   def fromEdgePartitions[VD: ClassTag, ED: ClassTag](
       edgePartitions: RDD[(PartitionID, EdgePartition[ED, VD])],
       defaultVertexAttr: VD,
@@ -296,7 +300,9 @@ object GraphImpl {
       vertexStorageLevel)
   }
 
-  /** Create a graph from vertices and edges, setting missing vertices to `defaultVertexAttr`. */
+  /**
+   * Create a graph from vertices and edges, setting missing vertices to `defaultVertexAttr`.
+   */
   def apply[VD: ClassTag, ED: ClassTag](
       vertices: RDD[(VertexId, VD)],
       edges: RDD[Edge[ED]],
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala
index d314522de9916..3c6f22d97360d 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala
@@ -63,7 +63,9 @@ class VertexRDDImpl[VD] private[graphx] (
     this
   }
 
-  /** Persists the vertex partitions at `targetStorageLevel`, which defaults to MEMORY_ONLY. */
+  /**
+   * Persists the vertex partitions at `targetStorageLevel`, which defaults to MEMORY_ONLY.
+   */
   override def cache(): this.type = {
     partitionsRDD.persist(targetStorageLevel)
     this
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
index 4d4b06b0952bd..d9ffdeb797fb8 100644
--- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
@@ -85,11 +85,15 @@ sealed trait Matrix extends Serializable {
   @Since("2.0.0")
   def copy: Matrix
 
-  /** Transpose the Matrix. Returns a new `Matrix` instance sharing the same underlying data. */
+  /**
+   * Transpose the Matrix. Returns a new `Matrix` instance sharing the same underlying data.
+   */
   @Since("2.0.0")
   def transpose: Matrix
 
-  /** Convenience method for `Matrix`-`DenseMatrix` multiplication. */
+  /**
+   * Convenience method for `Matrix`-`DenseMatrix` multiplication.
+   */
   @Since("2.0.0")
   def multiply(y: DenseMatrix): DenseMatrix = {
     val C: DenseMatrix = DenseMatrix.zeros(numRows, y.numCols)
@@ -97,13 +101,17 @@ sealed trait Matrix extends Serializable {
     C
   }
 
-  /** Convenience method for `Matrix`-`DenseVector` multiplication. For binary compatibility. */
+  /**
+   * Convenience method for `Matrix`-`DenseVector` multiplication. For binary compatibility.
+   */
   @Since("2.0.0")
   def multiply(y: DenseVector): DenseVector = {
     multiply(y.asInstanceOf[Vector])
   }
 
-  /** Convenience method for `Matrix`-`Vector` multiplication. */
+  /**
+   * Convenience method for `Matrix`-`Vector` multiplication.
+   */
   @Since("2.0.0")
   def multiply(y: Vector): DenseVector = {
     val output = new DenseVector(new Array[Double](numRows))
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
index 38176b96ba2ed..08e9cb9ba8668 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
@@ -216,7 +216,9 @@ object Pipeline extends MLReadable[Pipeline] {
     }
   }
 
-  /** Methods for `MLReader` and `MLWriter` shared between [[Pipeline]] and [[PipelineModel]] */
+  /**
+   * Methods for `MLReader` and `MLWriter` shared between [[Pipeline]] and [[PipelineModel]]
+   */
   private[ml] object SharedReadWrite {
 
     import org.json4s.JsonDSL._
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala
index 527cb2d547b63..21a246e454c83 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala
@@ -239,7 +239,9 @@ object AttributeGroup {
     }
   }
 
-  /** Creates an attribute group from a `StructField` instance. */
+  /**
+   * Creates an attribute group from a `StructField` instance.
+   */
   def fromStructField(field: StructField): AttributeGroup = {
     require(field.dataType == new VectorUDT)
     if (field.metadata.contains(ML_ATTR)) {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
index cc7e8bc301ad3..7fbfee75e96a9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
@@ -109,7 +109,9 @@ sealed abstract class Attribute extends Serializable {
     StructField(name.get, DoubleType, nullable = false, newMetadata)
   }
 
-  /** Converts to a `StructField`. */
+  /**
+   * Converts to a `StructField`.
+   */
   def toStructField(): StructField = toStructField(Metadata.empty)
 
   override def toString: String = toMetadataImpl(withType = true).toString
@@ -369,12 +371,16 @@ class NominalAttribute private[ml] (
   override def withIndex(index: Int): NominalAttribute = copy(index = Some(index))
   override def withoutIndex: NominalAttribute = copy(index = None)
 
-  /** Copy with new values and empty `numValues`. */
+  /**
+   * Copy with new values and empty `numValues`.
+   */
   def withValues(values: Array[String]): NominalAttribute = {
     copy(numValues = None, values = Some(values))
   }
 
-  /** Copy with new values and empty `numValues`. */
+  /**
+   * Copy with new values and empty `numValues`.
+   */
   @varargs
   def withValues(first: String, others: String*): NominalAttribute = {
     copy(numValues = None, values = Some((first +: others).toArray))
@@ -385,12 +391,16 @@ class NominalAttribute private[ml] (
     copy(values = None)
   }
 
-  /** Copy with a new `numValues` and empty `values`. */
+  /**
+   * Copy with a new `numValues` and empty `values`.
+   */
   def withNumValues(numValues: Int): NominalAttribute = {
     copy(numValues = Some(numValues), values = None)
   }
 
-  /** Copy without the `numValues`. */
+  /**
+   * Copy without the `numValues`.
+   */
   def withoutNumValues: NominalAttribute = copy(numValues = None)
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index ec582266e6a47..d3ae62e243302 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -1105,7 +1105,9 @@ sealed trait LogisticRegressionTrainingSummary extends LogisticRegressionSummary
  */
 sealed trait LogisticRegressionSummary extends Serializable {
 
-  /** Dataframe output by the model's `transform` method. */
+  /**
+   * Dataframe output by the model's `transform` method.
+   */
   def predictions: DataFrame
 
   /** Field in "predictions" which gives the probability of each class as a vector. */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index e718cda2623a0..770a2571bb9c2 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -886,7 +886,9 @@ class GeneralizedLinearRegressionSummary private[regression] (
   protected val model: GeneralizedLinearRegressionModel =
     origModel.copy(ParamMap.empty).setPredictionCol(predictionCol)
 
-  /** Predictions output by the model's `transform` method. */
+  /**
+   * Predictions output by the model's `transform` method.
+   */
   @Since("2.0.0") @transient val predictions: DataFrame = model.transform(dataset)
 
   private[regression] lazy val family: Family = Family.fromName(model.getFamily)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
index f9156b642785f..05ad2492f8c43 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
@@ -255,10 +255,14 @@ class ChiSqSelector @Since("2.1.0") () extends Serializable {
 
 private[spark] object ChiSqSelector {
 
-  /** String name for `numTopFeatures` selector type. */
+  /**
+   * String name for `numTopFeatures` selector type.
+   */
   val NumTopFeatures: String = "numTopFeatures"
 
-  /** String name for `percentile` selector type. */
+  /**
+   * String name for `percentile` selector type.
+   */
   val Percentile: String = "percentile"
 
   /** String name for `fpr` selector type. */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
index 542a69b3ef8cf..6c39fe5d84865 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
@@ -91,11 +91,15 @@ sealed trait Matrix extends Serializable {
   @Since("1.2.0")
   def copy: Matrix
 
-  /** Transpose the Matrix. Returns a new `Matrix` instance sharing the same underlying data. */
+  /**
+   * Transpose the Matrix. Returns a new `Matrix` instance sharing the same underlying data.
+   */
   @Since("1.3.0")
   def transpose: Matrix
 
-  /** Convenience method for `Matrix`-`DenseMatrix` multiplication. */
+  /**
+   * Convenience method for `Matrix`-`DenseMatrix` multiplication.
+   */
   @Since("1.2.0")
   def multiply(y: DenseMatrix): DenseMatrix = {
     val C: DenseMatrix = DenseMatrix.zeros(numRows, y.numCols)
@@ -103,13 +107,17 @@ sealed trait Matrix extends Serializable {
     C
   }
 
-  /** Convenience method for `Matrix`-`DenseVector` multiplication. For binary compatibility. */
+  /**
+   * Convenience method for `Matrix`-`DenseVector` multiplication. For binary compatibility.
+   */
   @Since("1.2.0")
   def multiply(y: DenseVector): DenseVector = {
     multiply(y.asInstanceOf[Vector])
   }
 
-  /** Convenience method for `Matrix`-`Vector` multiplication. */
+  /**
+   * Convenience method for `Matrix`-`Vector` multiplication.
+   */
   @Since("1.4.0")
   def multiply(y: Vector): DenseVector = {
     val output = new DenseVector(new Array[Double](numRows))
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
index 9e75217410d36..ff81a2f03e2a8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
@@ -295,7 +295,9 @@ class BlockMatrix @Since("1.3.0") (
     new IndexedRowMatrix(rows)
   }
 
-  /** Collect the distributed matrix on the driver as a `DenseMatrix`. */
+  /**
+   * Collect the distributed matrix on the driver as a `DenseMatrix`.
+   */
   @Since("1.3.0")
   def toLocalMatrix(): Matrix = {
     require(numRows() < Int.MaxValue, "The number of rows of this matrix should be less than " +
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
index d2c5b14a5b128..26ca1ef9be870 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
@@ -101,7 +101,9 @@ class CoordinateMatrix @Since("1.0.0") (
     toIndexedRowMatrix().toRowMatrix()
   }
 
-  /** Converts to BlockMatrix. Creates blocks of `SparseMatrix` with size 1024 x 1024. */
+  /**
+   * Converts to BlockMatrix. Creates blocks of `SparseMatrix` with size 1024 x 1024.
+   */
   @Since("1.3.0")
   def toBlockMatrix(): BlockMatrix = {
     toBlockMatrix(1024, 1024)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
index 590e959daa1f4..d7255d527f036 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
@@ -90,7 +90,9 @@ class IndexedRowMatrix @Since("1.0.0") (
     new RowMatrix(rows.map(_.vector), 0L, nCols)
   }
 
-  /** Converts to BlockMatrix. Creates blocks of `SparseMatrix` with size 1024 x 1024. */
+  /**
+   * Converts to BlockMatrix. Creates blocks of `SparseMatrix` with size 1024 x 1024.
+   */
   @Since("1.3.0")
   def toBlockMatrix(): BlockMatrix = {
     toBlockMatrix(1024, 1024)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
index 7ba9b292969e7..5ebbfb2b6298d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
@@ -176,7 +176,9 @@ object Statistics {
     ChiSqTest.chiSquaredFeatures(data)
   }
 
-  /** Java-friendly version of `chiSqTest()` */
+  /**
+   * Java-friendly version of `chiSqTest()`
+   */
   @Since("1.5.0")
   def chiSqTest(data: JavaRDD[LabeledPoint]): Array[ChiSqTestResult] = chiSqTest(data.rdd)
 
@@ -218,7 +220,9 @@ object Statistics {
     KolmogorovSmirnovTest.testOneSample(data, distName, params: _*)
   }
 
-  /** Java-friendly version of `kolmogorovSmirnovTest()` */
+  /**
+   * Java-friendly version of `kolmogorovSmirnovTest()`
+   */
   @Since("1.5.0")
   @varargs
   def kolmogorovSmirnovTest(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoder.scala
index b9f8c46443021..68ea47cedac9a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoder.scala
@@ -77,6 +77,8 @@ trait Encoder[T] extends Serializable {
   /** Returns the schema of encoding this type of object as a Row. */
   def schema: StructType
 
-  /** A ClassTag that can be used to construct and Array to contain a collection of `T`. */
+  /**
+   * A ClassTag that can be used to construct and Array to contain a collection of `T`.
+   */
   def clsTag: ClassTag[T]
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
index 5d70ef01373f5..d409271fbc6b5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
@@ -31,7 +31,9 @@ import org.apache.spark.sql.catalyst.util.ArrayData
  */
 @InterfaceStability.Stable
 object ArrayType extends AbstractDataType {
-  /** Construct a [[ArrayType]] object with the given element type. The `containsNull` is true. */
+  /**
+   * Construct a [[ArrayType]] object with the given element type. The `containsNull` is true.
+   */
   def apply(elementType: DataType): ArrayType = ArrayType(elementType, containsNull = true)
 
   override private[sql] def defaultConcreteType: DataType = ArrayType(NullType, containsNull = true)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StateSpec.scala b/streaming/src/main/scala/org/apache/spark/streaming/StateSpec.scala
index c3b28bd516da5..dcd698c860d8b 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StateSpec.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StateSpec.scala
@@ -70,10 +70,14 @@ import org.apache.spark.util.ClosureCleaner
 @Experimental
 sealed abstract class StateSpec[KeyType, ValueType, StateType, MappedType] extends Serializable {
 
-  /** Set the RDD containing the initial states that will be used by `mapWithState` */
+  /**
+   * Set the RDD containing the initial states that will be used by `mapWithState`
+   */
   def initialState(rdd: RDD[(KeyType, StateType)]): this.type
 
-  /** Set the RDD containing the initial states that will be used by `mapWithState` */
+  /**
+   * Set the RDD containing the initial states that will be used by `mapWithState`
+   */
   def initialState(javaPairRDD: JavaPairRDD[KeyType, StateType]): this.type
 
   /**

From d3aaed219b1a87765f0bf4d6b11eccdbcfb3672b Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Tue, 29 Nov 2016 11:19:35 -0800
Subject: [PATCH 0227/1204] [SPARK-18592][ML] Move DT/RF/GBT Param setter
 methods to subclasses

## What changes were proposed in this pull request?
Mainly two changes:
* Move DT/RF/GBT Param setter methods to subclasses.
* Deprecate corresponding setter methods in the model classes.

See discussion here https://github.com/apache/spark/pull/15913#discussion_r89662469.

## How was this patch tested?
Existing tests.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #16017 from yanboliang/spark-18592.

(cherry picked from commit 95f79850127204c75d1b356727237ef68d042e69)
Signed-off-by: Joseph K. Bradley <joseph@databricks.com>
---
 .../DecisionTreeClassifier.scala              | 36 ++++++--
 .../ml/classification/GBTClassifier.scala     | 44 ++++++---
 .../RandomForestClassifier.scala              | 45 ++++++---
 .../ml/regression/DecisionTreeRegressor.scala | 38 ++++++--
 .../spark/ml/regression/GBTRegressor.scala    | 47 +++++++---
 .../ml/regression/RandomForestRegressor.scala | 48 +++++++---
 .../org/apache/spark/ml/tree/treeParams.scala | 92 +++++++++++++++----
 7 files changed, 260 insertions(+), 90 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
index 7424031ed4608..7e0bc19a7aeb4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
@@ -52,33 +52,49 @@ class DecisionTreeClassifier @Since("1.4.0") (
 
   // Override parameter setters from parent trait for Java API compatibility.
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value)
+  override def setMaxDepth(value: Int): this.type = set(maxDepth, value)
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setMaxBins(value: Int): this.type = super.setMaxBins(value)
+  override def setMaxBins(value: Int): this.type = set(maxBins, value)
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setMinInstancesPerNode(value: Int): this.type =
-    super.setMinInstancesPerNode(value)
+  override def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value)
+  override def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
 
+  /** @group expertSetParam */
   @Since("1.4.0")
-  override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value)
+  override def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
 
+  /** @group expertSetParam */
   @Since("1.4.0")
-  override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value)
+  override def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
 
+  /**
+   * Specifies how often to checkpoint the cached node IDs.
+   * E.g. 10 means that the cache will get checkpointed every 10 iterations.
+   * This is only used if cacheNodeIds is true and if the checkpoint directory is set in
+   * [[org.apache.spark.SparkContext]].
+   * Must be >= 1.
+   * (default = 10)
+   * @group setParam
+   */
   @Since("1.4.0")
-  override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value)
+  override def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setImpurity(value: String): this.type = super.setImpurity(value)
+  override def setImpurity(value: String): this.type = set(impurity, value)
 
+  /** @group setParam */
   @Since("1.6.0")
-  override def setSeed(value: Long): this.type = super.setSeed(value)
+  override def setSeed(value: Long): this.type = set(seed, value)
 
   override protected def train(dataset: Dataset[_]): DecisionTreeClassificationModel = {
     val categoricalFeatures: Map[Int, Int] =
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
index ca5223133317c..c5fc3c8772908 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
@@ -69,31 +69,47 @@ class GBTClassifier @Since("1.4.0") (
 
   // Parameters from TreeClassifierParams:
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value)
+  override def setMaxDepth(value: Int): this.type = set(maxDepth, value)
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setMaxBins(value: Int): this.type = super.setMaxBins(value)
+  override def setMaxBins(value: Int): this.type = set(maxBins, value)
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setMinInstancesPerNode(value: Int): this.type =
-    super.setMinInstancesPerNode(value)
+  override def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value)
+  override def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
 
+  /** @group expertSetParam */
   @Since("1.4.0")
-  override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value)
+  override def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
 
+  /** @group expertSetParam */
   @Since("1.4.0")
-  override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value)
+  override def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
 
+  /**
+   * Specifies how often to checkpoint the cached node IDs.
+   * E.g. 10 means that the cache will get checkpointed every 10 iterations.
+   * This is only used if cacheNodeIds is true and if the checkpoint directory is set in
+   * [[org.apache.spark.SparkContext]].
+   * Must be >= 1.
+   * (default = 10)
+   * @group setParam
+   */
   @Since("1.4.0")
-  override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value)
+  override def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
 
   /**
    * The impurity setting is ignored for GBT models.
    * Individual trees are built using impurity "Variance."
+   *
+   * @group setParam
    */
   @Since("1.4.0")
   override def setImpurity(value: String): this.type = {
@@ -103,19 +119,23 @@ class GBTClassifier @Since("1.4.0") (
 
   // Parameters from TreeEnsembleParams:
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setSubsamplingRate(value: Double): this.type = super.setSubsamplingRate(value)
+  override def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value)
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setSeed(value: Long): this.type = super.setSeed(value)
+  override def setSeed(value: Long): this.type = set(seed, value)
 
   // Parameters from GBTParams:
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setMaxIter(value: Int): this.type = super.setMaxIter(value)
+  override def setMaxIter(value: Int): this.type = set(maxIter, value)
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setStepSize(value: Double): this.type = super.setStepSize(value)
+  override def setStepSize(value: Double): this.type = set(stepSize, value)
 
   // Parameters from GBTClassifierParams:
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
index d151213f9edd8..34c055dce6511 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
@@ -54,47 +54,66 @@ class RandomForestClassifier @Since("1.4.0") (
 
   // Parameters from TreeClassifierParams:
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value)
+  override def setMaxDepth(value: Int): this.type = set(maxDepth, value)
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setMaxBins(value: Int): this.type = super.setMaxBins(value)
+  override def setMaxBins(value: Int): this.type = set(maxBins, value)
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setMinInstancesPerNode(value: Int): this.type =
-    super.setMinInstancesPerNode(value)
+  override def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value)
+  override def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
 
+  /** @group expertSetParam */
   @Since("1.4.0")
-  override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value)
+  override def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
 
+  /** @group expertSetParam */
   @Since("1.4.0")
-  override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value)
+  override def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
 
+  /**
+   * Specifies how often to checkpoint the cached node IDs.
+   * E.g. 10 means that the cache will get checkpointed every 10 iterations.
+   * This is only used if cacheNodeIds is true and if the checkpoint directory is set in
+   * [[org.apache.spark.SparkContext]].
+   * Must be >= 1.
+   * (default = 10)
+   * @group setParam
+   */
   @Since("1.4.0")
-  override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value)
+  override def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setImpurity(value: String): this.type = super.setImpurity(value)
+  override def setImpurity(value: String): this.type = set(impurity, value)
 
   // Parameters from TreeEnsembleParams:
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setSubsamplingRate(value: Double): this.type = super.setSubsamplingRate(value)
+  override def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value)
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setSeed(value: Long): this.type = super.setSeed(value)
+  override def setSeed(value: Long): this.type = set(seed, value)
 
   // Parameters from RandomForestParams:
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setNumTrees(value: Int): this.type = super.setNumTrees(value)
+  override def setNumTrees(value: Int): this.type = set(numTrees, value)
 
+  /** @group setParam */
   @Since("1.4.0")
   override def setFeatureSubsetStrategy(value: String): this.type =
-    super.setFeatureSubsetStrategy(value)
+    set(featureSubsetStrategy, value)
 
   override protected def train(dataset: Dataset[_]): RandomForestClassificationModel = {
     val categoricalFeatures: Map[Int, Int] =
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
index 0b0c46144bfbe..0cdfa7b0b742a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
@@ -51,34 +51,52 @@ class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
   def this() = this(Identifiable.randomUID("dtr"))
 
   // Override parameter setters from parent trait for Java API compatibility.
+  /** @group setParam */
   @Since("1.4.0")
-  override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value)
+  override def setMaxDepth(value: Int): this.type = set(maxDepth, value)
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setMaxBins(value: Int): this.type = super.setMaxBins(value)
+  override def setMaxBins(value: Int): this.type = set(maxBins, value)
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setMinInstancesPerNode(value: Int): this.type =
-    super.setMinInstancesPerNode(value)
+  override def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value)
+  override def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
 
+  /** @group expertSetParam */
   @Since("1.4.0")
-  override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value)
+  override def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
 
+  /** @group expertSetParam */
   @Since("1.4.0")
-  override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value)
+  override def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
 
+  /**
+   * Specifies how often to checkpoint the cached node IDs.
+   * E.g. 10 means that the cache will get checkpointed every 10 iterations.
+   * This is only used if cacheNodeIds is true and if the checkpoint directory is set in
+   * [[org.apache.spark.SparkContext]].
+   * Must be >= 1.
+   * (default = 10)
+   * @group setParam
+   */
   @Since("1.4.0")
-  override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value)
+  override def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setImpurity(value: String): this.type = super.setImpurity(value)
+  override def setImpurity(value: String): this.type = set(impurity, value)
 
-  override def setSeed(value: Long): this.type = super.setSeed(value)
+  /** @group setParam */
+  @Since("1.6.0")
+  override def setSeed(value: Long): this.type = set(seed, value)
 
   /** @group setParam */
+  @Since("2.0.0")
   def setVarianceCol(value: String): this.type = set(varianceCol, value)
 
   override protected def train(dataset: Dataset[_]): DecisionTreeRegressionModel = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
index 6e62c8d03c708..49a3f8b6b5152 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
@@ -65,31 +65,48 @@ class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
   // Override parameter setters from parent trait for Java API compatibility.
 
   // Parameters from TreeRegressorParams:
+
+  /** @group setParam */
   @Since("1.4.0")
-  override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value)
+  override def setMaxDepth(value: Int): this.type = set(maxDepth, value)
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setMaxBins(value: Int): this.type = super.setMaxBins(value)
+  override def setMaxBins(value: Int): this.type = set(maxBins, value)
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setMinInstancesPerNode(value: Int): this.type =
-    super.setMinInstancesPerNode(value)
+  override def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value)
+  override def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
 
+  /** @group expertSetParam */
   @Since("1.4.0")
-  override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value)
+  override def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
 
+  /** @group expertSetParam */
   @Since("1.4.0")
-  override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value)
+  override def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
 
+  /**
+   * Specifies how often to checkpoint the cached node IDs.
+   * E.g. 10 means that the cache will get checkpointed every 10 iterations.
+   * This is only used if cacheNodeIds is true and if the checkpoint directory is set in
+   * [[org.apache.spark.SparkContext]].
+   * Must be >= 1.
+   * (default = 10)
+   * @group setParam
+   */
   @Since("1.4.0")
-  override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value)
+  override def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
 
   /**
    * The impurity setting is ignored for GBT models.
    * Individual trees are built using impurity "Variance."
+   *
+   * @group setParam
    */
   @Since("1.4.0")
   override def setImpurity(value: String): this.type = {
@@ -98,18 +115,24 @@ class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
   }
 
   // Parameters from TreeEnsembleParams:
+
+  /** @group setParam */
   @Since("1.4.0")
-  override def setSubsamplingRate(value: Double): this.type = super.setSubsamplingRate(value)
+  override def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value)
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setSeed(value: Long): this.type = super.setSeed(value)
+  override def setSeed(value: Long): this.type = set(seed, value)
 
   // Parameters from GBTParams:
+
+  /** @group setParam */
   @Since("1.4.0")
-  override def setMaxIter(value: Int): this.type = super.setMaxIter(value)
+  override def setMaxIter(value: Int): this.type = set(maxIter, value)
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setStepSize(value: Double): this.type = super.setStepSize(value)
+  override def setStepSize(value: Double): this.type = set(stepSize, value)
 
   // Parameters from GBTRegressorParams:
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
index 62dd729a2994a..67fb648625550 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
@@ -52,45 +52,67 @@ class RandomForestRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
   // Override parameter setters from parent trait for Java API compatibility.
 
   // Parameters from TreeRegressorParams:
+
+  /** @group setParam */
   @Since("1.4.0")
-  override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value)
+  override def setMaxDepth(value: Int): this.type = set(maxDepth, value)
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setMaxBins(value: Int): this.type = super.setMaxBins(value)
+  override def setMaxBins(value: Int): this.type = set(maxBins, value)
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setMinInstancesPerNode(value: Int): this.type =
-    super.setMinInstancesPerNode(value)
+  override def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setMinInfoGain(value: Double): this.type = super.setMinInfoGain(value)
+  override def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
 
+  /** @group expertSetParam */
   @Since("1.4.0")
-  override def setMaxMemoryInMB(value: Int): this.type = super.setMaxMemoryInMB(value)
+  override def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
 
+  /** @group expertSetParam */
   @Since("1.4.0")
-  override def setCacheNodeIds(value: Boolean): this.type = super.setCacheNodeIds(value)
+  override def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
 
+  /**
+   * Specifies how often to checkpoint the cached node IDs.
+   * E.g. 10 means that the cache will get checkpointed every 10 iterations.
+   * This is only used if cacheNodeIds is true and if the checkpoint directory is set in
+   * [[org.apache.spark.SparkContext]].
+   * Must be >= 1.
+   * (default = 10)
+   * @group setParam
+   */
   @Since("1.4.0")
-  override def setCheckpointInterval(value: Int): this.type = super.setCheckpointInterval(value)
+  override def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setImpurity(value: String): this.type = super.setImpurity(value)
+  override def setImpurity(value: String): this.type = set(impurity, value)
 
   // Parameters from TreeEnsembleParams:
+
+  /** @group setParam */
   @Since("1.4.0")
-  override def setSubsamplingRate(value: Double): this.type = super.setSubsamplingRate(value)
+  override def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value)
 
+  /** @group setParam */
   @Since("1.4.0")
-  override def setSeed(value: Long): this.type = super.setSeed(value)
+  override def setSeed(value: Long): this.type = set(seed, value)
 
   // Parameters from RandomForestParams:
+
+  /** @group setParam */
   @Since("1.4.0")
-  override def setNumTrees(value: Int): this.type = super.setNumTrees(value)
+  override def setNumTrees(value: Int): this.type = set(numTrees, value)
 
+  /** @group setParam */
   @Since("1.4.0")
   override def setFeatureSubsetStrategy(value: String): this.type =
-    super.setFeatureSubsetStrategy(value)
+    set(featureSubsetStrategy, value)
 
   override protected def train(dataset: Dataset[_]): RandomForestRegressionModel = {
     val categoricalFeatures: Map[Int, Int] =
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
index 83ab4b5da87be..c7a8f76eca84f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
@@ -107,54 +107,78 @@ private[ml] trait DecisionTreeParams extends PredictorParams
   setDefault(maxDepth -> 5, maxBins -> 32, minInstancesPerNode -> 1, minInfoGain -> 0.0,
     maxMemoryInMB -> 256, cacheNodeIds -> false, checkpointInterval -> 10)
 
-  /** @group setParam */
+  /**
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @group setParam
+   */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
   def setMaxDepth(value: Int): this.type = set(maxDepth, value)
 
   /** @group getParam */
   final def getMaxDepth: Int = $(maxDepth)
 
-  /** @group setParam */
+  /**
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @group setParam
+   */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
   def setMaxBins(value: Int): this.type = set(maxBins, value)
 
   /** @group getParam */
   final def getMaxBins: Int = $(maxBins)
 
-  /** @group setParam */
+  /**
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @group setParam
+   */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
   def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
 
   /** @group getParam */
   final def getMinInstancesPerNode: Int = $(minInstancesPerNode)
 
-  /** @group setParam */
+  /**
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @group setParam
+   */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
   def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
 
   /** @group getParam */
   final def getMinInfoGain: Double = $(minInfoGain)
 
-  /** @group setParam */
+  /**
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @group setParam
+   */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
   def setSeed(value: Long): this.type = set(seed, value)
 
-  /** @group expertSetParam */
+  /**
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @group expertSetParam
+   */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
   def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
 
   /** @group expertGetParam */
   final def getMaxMemoryInMB: Int = $(maxMemoryInMB)
 
-  /** @group expertSetParam */
+  /**
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @group expertSetParam
+   */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
   def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
 
   /** @group expertGetParam */
   final def getCacheNodeIds: Boolean = $(cacheNodeIds)
 
   /**
-   * Specifies how often to checkpoint the cached node IDs.
-   * E.g. 10 means that the cache will get checkpointed every 10 iterations.
-   * This is only used if cacheNodeIds is true and if the checkpoint directory is set in
-   * [[org.apache.spark.SparkContext]].
-   * Must be >= 1.
-   * (default = 10)
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
    * @group setParam
    */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
   def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
 
   /** (private[ml]) Create a Strategy instance to use with the old API. */
@@ -198,7 +222,11 @@ private[ml] trait TreeClassifierParams extends Params {
 
   setDefault(impurity -> "gini")
 
-  /** @group setParam */
+  /**
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @group setParam
+   */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
   def setImpurity(value: String): this.type = set(impurity, value)
 
   /** @group getParam */
@@ -243,7 +271,11 @@ private[ml] trait TreeRegressorParams extends Params {
 
   setDefault(impurity -> "variance")
 
-  /** @group setParam */
+  /**
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @group setParam
+   */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
   def setImpurity(value: String): this.type = set(impurity, value)
 
   /** @group getParam */
@@ -300,7 +332,11 @@ private[ml] trait TreeEnsembleParams extends DecisionTreeParams {
 
   setDefault(subsamplingRate -> 1.0)
 
-  /** @group setParam */
+  /**
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @group setParam
+   */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
   def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value)
 
   /** @group getParam */
@@ -340,7 +376,11 @@ private[ml] trait RandomForestParams extends TreeEnsembleParams {
 
   setDefault(numTrees -> 20)
 
-  /** @group setParam */
+  /**
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @group setParam
+   */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
   def setNumTrees(value: Int): this.type = set(numTrees, value)
 
   /** @group getParam */
@@ -383,7 +423,11 @@ private[ml] trait RandomForestParams extends TreeEnsembleParams {
 
   setDefault(featureSubsetStrategy -> "auto")
 
-  /** @group setParam */
+  /**
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @group setParam
+   */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
   def setFeatureSubsetStrategy(value: String): this.type = set(featureSubsetStrategy, value)
 
   /** @group getParam */
@@ -420,7 +464,11 @@ private[ml] trait GBTParams extends TreeEnsembleParams with HasMaxIter {
   // final val validationTol: DoubleParam = new DoubleParam(this, "validationTol", "")
   // validationTol -> 1e-5
 
-  /** @group setParam */
+  /**
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @group setParam
+   */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
   def setMaxIter(value: Int): this.type = set(maxIter, value)
 
   /**
@@ -436,7 +484,11 @@ private[ml] trait GBTParams extends TreeEnsembleParams with HasMaxIter {
   /** @group getParam */
   final def getStepSize: Double = $(stepSize)
 
-  /** @group setParam */
+  /**
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @group setParam
+   */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
   def setStepSize(value: Double): this.type = set(stepSize, value)
 
   setDefault(maxIter -> 20, stepSize -> 0.1)

From e8ca1aea56956755e6335c0b7d2cbaa43e1f1e18 Mon Sep 17 00:00:00 2001
From: Tyson Condie <tcondie@gmail.com>
Date: Tue, 29 Nov 2016 12:36:41 -0800
Subject: [PATCH 0228/1204] [SPARK-18498][SQL] Revise HDFSMetadataLog API for
 better testing

Revise HDFSMetadataLog API such that metadata object serialization and final batch file write are separated. This will allow serialization checks without worrying about batch file name formats. marmbrus zsxwing

Existing tests already ensure this API faithfully support core functionality i.e., creation of batch files.

Author: Tyson Condie <tcondie@gmail.com>

Closes #15924 from tcondie/SPARK-18498.

Signed-off-by: Michael Armbrust <michael@databricks.com>
(cherry picked from commit f643fe47f4889faf68da3da8d7850ee48df7c22f)
Signed-off-by: Michael Armbrust <michael@databricks.com>
---
 .../execution/streaming/HDFSMetadataLog.scala | 100 ++++++++++++------
 1 file changed, 66 insertions(+), 34 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
index d95ec7f67feb3..1b413528935f6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
@@ -138,14 +138,7 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
     }
   }
 
-  /**
-   * Write a batch to a temp file then rename it to the batch file.
-   *
-   * There may be multiple [[HDFSMetadataLog]] using the same metadata path. Although it is not a
-   * valid behavior, we still need to prevent it from destroying the files.
-   */
-  private def writeBatch(batchId: Long, metadata: T, writer: (T, OutputStream) => Unit): Unit = {
-    // Use nextId to create a temp file
+  def writeTempBatch(metadata: T, writer: (T, OutputStream) => Unit = serialize): Option[Path] = {
     var nextId = 0
     while (true) {
       val tempPath = new Path(metadataPath, s".${UUID.randomUUID.toString}.tmp")
@@ -153,33 +146,10 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
         val output = fileManager.create(tempPath)
         try {
           writer(metadata, output)
+          return Some(tempPath)
         } finally {
           IOUtils.closeQuietly(output)
         }
-        try {
-          // Try to commit the batch
-          // It will fail if there is an existing file (someone has committed the batch)
-          logDebug(s"Attempting to write log #${batchIdToPath(batchId)}")
-          fileManager.rename(tempPath, batchIdToPath(batchId))
-
-          // SPARK-17475: HDFSMetadataLog should not leak CRC files
-          // If the underlying filesystem didn't rename the CRC file, delete it.
-          val crcPath = new Path(tempPath.getParent(), s".${tempPath.getName()}.crc")
-          if (fileManager.exists(crcPath)) fileManager.delete(crcPath)
-          return
-        } catch {
-          case e: IOException if isFileAlreadyExistsException(e) =>
-            // If "rename" fails, it means some other "HDFSMetadataLog" has committed the batch.
-            // So throw an exception to tell the user this is not a valid behavior.
-            throw new ConcurrentModificationException(
-              s"Multiple HDFSMetadataLog are using $path", e)
-          case e: FileNotFoundException =>
-            // Sometimes, "create" will succeed when multiple writers are calling it at the same
-            // time. However, only one writer can call "rename" successfully, others will get
-            // FileNotFoundException because the first writer has removed it.
-            throw new ConcurrentModificationException(
-              s"Multiple HDFSMetadataLog are using $path", e)
-        }
       } catch {
         case e: IOException if isFileAlreadyExistsException(e) =>
           // Failed to create "tempPath". There are two cases:
@@ -195,10 +165,45 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
           // metadata path. In addition, the old Streaming also have this issue, people can create
           // malicious checkpoint files to crash a Streaming application too.
           nextId += 1
-      } finally {
-        fileManager.delete(tempPath)
       }
     }
+    None
+  }
+
+  /**
+   * Write a batch to a temp file then rename it to the batch file.
+   *
+   * There may be multiple [[HDFSMetadataLog]] using the same metadata path. Although it is not a
+   * valid behavior, we still need to prevent it from destroying the files.
+   */
+  private def writeBatch(batchId: Long, metadata: T, writer: (T, OutputStream) => Unit): Unit = {
+    val tempPath = writeTempBatch(metadata, writer).getOrElse(
+      throw new IllegalStateException(s"Unable to create temp batch file $batchId"))
+    try {
+      // Try to commit the batch
+      // It will fail if there is an existing file (someone has committed the batch)
+      logDebug(s"Attempting to write log #${batchIdToPath(batchId)}")
+      fileManager.rename(tempPath, batchIdToPath(batchId))
+
+      // SPARK-17475: HDFSMetadataLog should not leak CRC files
+      // If the underlying filesystem didn't rename the CRC file, delete it.
+      val crcPath = new Path(tempPath.getParent(), s".${tempPath.getName()}.crc")
+      if (fileManager.exists(crcPath)) fileManager.delete(crcPath)
+    } catch {
+      case e: IOException if isFileAlreadyExistsException(e) =>
+        // If "rename" fails, it means some other "HDFSMetadataLog" has committed the batch.
+        // So throw an exception to tell the user this is not a valid behavior.
+        throw new ConcurrentModificationException(
+          s"Multiple HDFSMetadataLog are using $path", e)
+      case e: FileNotFoundException =>
+        // Sometimes, "create" will succeed when multiple writers are calling it at the same
+        // time. However, only one writer can call "rename" successfully, others will get
+        // FileNotFoundException because the first writer has removed it.
+        throw new ConcurrentModificationException(
+          s"Multiple HDFSMetadataLog are using $path", e)
+    } finally {
+      fileManager.delete(tempPath)
+    }
   }
 
   private def isFileAlreadyExistsException(e: IOException): Boolean = {
@@ -208,6 +213,22 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
       (e.getMessage != null && e.getMessage.startsWith("File already exists: "))
   }
 
+  /**
+   * @return the deserialized metadata in a batch file, or None if file not exist.
+   * @throws IllegalArgumentException when path does not point to a batch file.
+   */
+  def get(batchFile: Path): Option[T] = {
+    if (fileManager.exists(batchFile)) {
+      if (isBatchFile(batchFile)) {
+        get(pathToBatchId(batchFile))
+      } else {
+        throw new IllegalArgumentException(s"File ${batchFile} is not a batch file!")
+      }
+    } else {
+      None
+    }
+  }
+
   override def get(batchId: Long): Option[T] = {
     val batchMetadataFile = batchIdToPath(batchId)
     if (fileManager.exists(batchMetadataFile)) {
@@ -250,6 +271,17 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
     None
   }
 
+  /**
+   * Get an array of [FileStatus] referencing batch files.
+   * The array is sorted by most recent batch file first to
+   * oldest batch file.
+   */
+  def getOrderedBatchFiles(): Array[FileStatus] = {
+    fileManager.list(metadataPath, batchFilesFilter)
+      .sortBy(f => pathToBatchId(f.getPath))
+      .reverse
+  }
+
   /**
    * Removes all the log entry earlier than thresholdBatchId (exclusive).
    */

From 68e8d243b847ab8467dcb2c39faf3bf6fa6c2283 Mon Sep 17 00:00:00 2001
From: Nattavut Sutyanyong <nsy.can@gmail.com>
Date: Tue, 29 Nov 2016 15:27:43 -0800
Subject: [PATCH 0229/1204] [SPARK-18614][SQL] Incorrect predicate pushdown
 from ExistenceJoin

## What changes were proposed in this pull request?

ExistenceJoin should be treated the same as LeftOuter and LeftAnti, not InnerLike and LeftSemi. This is not currently exposed because the rewrite of [NOT] EXISTS OR ... to ExistenceJoin happens in rule RewritePredicateSubquery, which is in a separate rule set and placed after the rule PushPredicateThroughJoin. During the transformation in the rule PushPredicateThroughJoin, an ExistenceJoin never exists.

The semantics of ExistenceJoin says we need to preserve all the rows from the left table through the join operation as if it is a regular LeftOuter join. The ExistenceJoin augments the LeftOuter operation with a new column called exists, set to true when the join condition in the ON clause is true and false otherwise. The filter of any rows will happen in the Filter operation above the ExistenceJoin.

Example:

A(c1, c2): { (1, 1), (1, 2) }
// B can be any value as it is irrelevant in this example
B(c1): { (NULL) }

select A.*
from   A
where  exists (select 1 from B where A.c1 = A.c2)
       or A.c2=2

In this example, the correct result is all the rows from A. If the pattern ExistenceJoin around line 935 in Optimizer.scala is indeed active, the code will push down the predicate A.c1 = A.c2 to be a Filter on relation A, which will incorrectly filter the row (1,2) from A.

## How was this patch tested?

Since this is not an exposed case, no new test cases is added. The scenario is discovered via a code review of another PR and confirmed to be valid with peer.

Author: Nattavut Sutyanyong <nsy.can@gmail.com>

Closes #16044 from nsyca/spark-18614.

(cherry picked from commit 3600635215f25d695c9be5931b5185fec8a35527)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../sql/catalyst/optimizer/Optimizer.scala      |  4 ++--
 .../optimizer/FilterPushdownSuite.scala         | 17 +++++++++++++++++
 .../inputs/{anti-join.sql => pred-pushdown.sql} |  7 ++++++-
 ...{anti-join.sql.out => pred-pushdown.sql.out} | 13 ++++++++++++-
 4 files changed, 37 insertions(+), 4 deletions(-)
 rename sql/core/src/test/resources/sql-tests/inputs/{anti-join.sql => pred-pushdown.sql} (64%)
 rename sql/core/src/test/resources/sql-tests/results/{anti-join.sql.out => pred-pushdown.sql.out} (71%)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 805cad5cb953e..37f0c8ed19d37 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -932,7 +932,7 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
         split(joinCondition.map(splitConjunctivePredicates).getOrElse(Nil), left, right)
 
       joinType match {
-        case _: InnerLike |  LeftSemi | ExistenceJoin(_) =>
+        case _: InnerLike |  LeftSemi =>
           // push down the single side only join filter for both sides sub queries
           val newLeft = leftJoinConditions.
             reduceLeftOption(And).map(Filter(_, left)).getOrElse(left)
@@ -949,7 +949,7 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
           val newJoinCond = (rightJoinConditions ++ commonJoinCondition).reduceLeftOption(And)
 
           Join(newLeft, newRight, RightOuter, newJoinCond)
-        case LeftOuter | LeftAnti =>
+        case LeftOuter | LeftAnti | ExistenceJoin(_) =>
           // push down the right side only join filter for right sub query
           val newLeft = left
           val newRight = rightJoinConditions.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
index 3e67282d687f5..6feea4060f46a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
@@ -546,6 +546,23 @@ class FilterPushdownSuite extends PlanTest {
     comparePlans(optimized, analysis.EliminateSubqueryAliases(correctAnswer))
   }
 
+  test("joins: only push down join conditions to the right of an existence join") {
+    val x = testRelation.subquery('x)
+    val y = testRelation.subquery('y)
+    val fillerVal = 'val.boolean
+    val originalQuery =
+      x.join(y,
+        ExistenceJoin(fillerVal),
+        Some("x.a".attr > 1 && "y.b".attr > 2)).analyze
+    val optimized = Optimize.execute(originalQuery)
+    val correctAnswer =
+      x.join(
+        y.where("y.b".attr > 2),
+        ExistenceJoin(fillerVal),
+        Some("x.a".attr > 1))
+      .analyze
+    comparePlans(optimized, analysis.EliminateSubqueryAliases(correctAnswer))
+  }
 
   val testRelationWithArrayType = LocalRelation('a.int, 'b.int, 'c_arr.array(IntegerType))
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/anti-join.sql b/sql/core/src/test/resources/sql-tests/inputs/pred-pushdown.sql
similarity index 64%
rename from sql/core/src/test/resources/sql-tests/inputs/anti-join.sql
rename to sql/core/src/test/resources/sql-tests/inputs/pred-pushdown.sql
index 0346f57d609ad..eff258a06635a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/anti-join.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/pred-pushdown.sql
@@ -1,7 +1,12 @@
--- SPARK-18597: Do not push down predicates to left hand side in an anti-join
 CREATE OR REPLACE TEMPORARY VIEW tbl_a AS VALUES (1, 1), (2, 1), (3, 6) AS T(c1, c2);
 CREATE OR REPLACE TEMPORARY VIEW tbl_b AS VALUES 1 AS T(c1);
 
+-- SPARK-18597: Do not push down predicates to left hand side in an anti-join
 SELECT *
 FROM   tbl_a
        LEFT ANTI JOIN tbl_b ON ((tbl_a.c1 = tbl_a.c2) IS NULL OR tbl_a.c1 = tbl_a.c2);
+
+-- SPARK-18614: Do not push down predicates on left table below ExistenceJoin
+SELECT l.c1, l.c2
+FROM   tbl_a l
+WHERE  EXISTS (SELECT 1 FROM tbl_b r WHERE l.c1 = l.c2) OR l.c2 < 2;
diff --git a/sql/core/src/test/resources/sql-tests/results/anti-join.sql.out b/sql/core/src/test/resources/sql-tests/results/pred-pushdown.sql.out
similarity index 71%
rename from sql/core/src/test/resources/sql-tests/results/anti-join.sql.out
rename to sql/core/src/test/resources/sql-tests/results/pred-pushdown.sql.out
index 6f38c4d08bc5a..1b8ddbe4c7211 100644
--- a/sql/core/src/test/resources/sql-tests/results/anti-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/pred-pushdown.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 3
+-- Number of queries: 4
 
 
 -- !query 0
@@ -27,3 +27,14 @@ struct<c1:int,c2:int>
 -- !query 2 output
 2	1
 3	6
+
+
+-- !query 3
+SELECT l.c1, l.c2
+FROM   tbl_a l
+WHERE  EXISTS (SELECT 1 FROM tbl_b r WHERE l.c1 = l.c2) OR l.c2 < 2
+-- !query 3 schema
+struct<c1:int,c2:int>
+-- !query 3 output
+1	1
+2	1

From 045ae299c358e3b991e4e0cd0eb660cd501fdc4d Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Tue, 29 Nov 2016 16:27:25 -0800
Subject: [PATCH 0230/1204] [SPARK-18553][CORE] Fix leak of TaskSetManager
 following executor loss

_This is the master branch version of #15986; the original description follows:_

This patch fixes a critical resource leak in the TaskScheduler which could cause RDDs and ShuffleDependencies to be kept alive indefinitely if an executor with running tasks is permanently lost and the associated stage fails.

This problem was originally identified by analyzing the heap dump of a driver belonging to a cluster that had run out of shuffle space. This dump contained several `ShuffleDependency` instances that were retained by `TaskSetManager`s inside the scheduler but were not otherwise referenced. Each of these `TaskSetManager`s was considered a "zombie" but had no running tasks and therefore should have been cleaned up. However, these zombie task sets were still referenced by the `TaskSchedulerImpl.taskIdToTaskSetManager` map.

Entries are added to the `taskIdToTaskSetManager` map when tasks are launched and are removed inside of `TaskScheduler.statusUpdate()`, which is invoked by the scheduler backend while processing `StatusUpdate` messages from executors. The problem with this design is that a completely dead executor will never send a `StatusUpdate`. There is [some code](https://github.com/apache/spark/blob/072f4c518cdc57d705beec6bcc3113d9a6740819/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala#L338) in `statusUpdate` which handles tasks that exit with the `TaskState.LOST` state (which is supposed to correspond to a task failure triggered by total executor loss), but this state only seems to be used in Mesos fine-grained mode. There doesn't seem to be any code which performs per-task state cleanup for tasks that were running on an executor that completely disappears without sending any sort of final death message. The `executorLost` and [`removeExecutor`](https://github.com/apache/spark/blob/072f4c518cdc57d705beec6bcc3113d9a6740819/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala#L527) methods don't appear to perform any cleanup of the `taskId -> *` mappings, causing the leaks observed here.

This patch's fix is to maintain a `executorId -> running task id` mapping so that these `taskId -> *` maps can be properly cleaned up following an executor loss.

There are some potential corner-case interactions that I'm concerned about here, especially some details in [the comment](https://github.com/apache/spark/blob/072f4c518cdc57d705beec6bcc3113d9a6740819/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala#L523) in `removeExecutor`, so I'd appreciate a very careful review of these changes.

I added a new unit test to `TaskSchedulerImplSuite`.

/cc kayousterhout and markhamstra, who reviewed #15986.

Author: Josh Rosen <joshrosen@databricks.com>

Closes #16045 from JoshRosen/fix-leak-following-total-executor-loss-master.

(cherry picked from commit 9a02f6821265ff67ba3f7b095cd1afaebd25a898)
Signed-off-by: Josh Rosen <joshrosen@databricks.com>
---
 .../spark/scheduler/TaskSchedulerImpl.scala   | 82 +++++++++++--------
 .../StandaloneDynamicAllocationSuite.scala    |  7 +-
 .../scheduler/TaskSchedulerImplSuite.scala    | 72 ++++++++++++++++
 3 files changed, 125 insertions(+), 36 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index 3e3f1ad031e66..67446da0a8b8d 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -93,10 +93,12 @@ private[spark] class TaskSchedulerImpl(
   // Incrementing task IDs
   val nextTaskId = new AtomicLong(0)
 
-  // Number of tasks running on each executor
-  private val executorIdToTaskCount = new HashMap[String, Int]
+  // IDs of the tasks running on each executor
+  private val executorIdToRunningTaskIds = new HashMap[String, HashSet[Long]]
 
-  def runningTasksByExecutors(): Map[String, Int] = executorIdToTaskCount.toMap
+  def runningTasksByExecutors(): Map[String, Int] = {
+    executorIdToRunningTaskIds.toMap.mapValues(_.size)
+  }
 
   // The set of executors we have on each host; this is used to compute hostsAlive, which
   // in turn is used to decide when we can attain data locality on a given host
@@ -264,7 +266,7 @@ private[spark] class TaskSchedulerImpl(
             val tid = task.taskId
             taskIdToTaskSetManager(tid) = taskSet
             taskIdToExecutorId(tid) = execId
-            executorIdToTaskCount(execId) += 1
+            executorIdToRunningTaskIds(execId).add(tid)
             availableCpus(i) -= CPUS_PER_TASK
             assert(availableCpus(i) >= 0)
             launchedTask = true
@@ -294,11 +296,11 @@ private[spark] class TaskSchedulerImpl(
       if (!hostToExecutors.contains(o.host)) {
         hostToExecutors(o.host) = new HashSet[String]()
       }
-      if (!executorIdToTaskCount.contains(o.executorId)) {
+      if (!executorIdToRunningTaskIds.contains(o.executorId)) {
         hostToExecutors(o.host) += o.executorId
         executorAdded(o.executorId, o.host)
         executorIdToHost(o.executorId) = o.host
-        executorIdToTaskCount(o.executorId) = 0
+        executorIdToRunningTaskIds(o.executorId) = HashSet[Long]()
         newExecAvail = true
       }
       for (rack <- getRackForHost(o.host)) {
@@ -349,38 +351,34 @@ private[spark] class TaskSchedulerImpl(
     var reason: Option[ExecutorLossReason] = None
     synchronized {
       try {
-        if (state == TaskState.LOST && taskIdToExecutorId.contains(tid)) {
-          // We lost this entire executor, so remember that it's gone
-          val execId = taskIdToExecutorId(tid)
-
-          if (executorIdToTaskCount.contains(execId)) {
-            reason = Some(
-              SlaveLost(s"Task $tid was lost, so marking the executor as lost as well."))
-            removeExecutor(execId, reason.get)
-            failedExecutor = Some(execId)
-          }
-        }
         taskIdToTaskSetManager.get(tid) match {
           case Some(taskSet) =>
-            if (TaskState.isFinished(state)) {
-              taskIdToTaskSetManager.remove(tid)
-              taskIdToExecutorId.remove(tid).foreach { execId =>
-                if (executorIdToTaskCount.contains(execId)) {
-                  executorIdToTaskCount(execId) -= 1
-                }
+            if (state == TaskState.LOST) {
+              // TaskState.LOST is only used by the deprecated Mesos fine-grained scheduling mode,
+              // where each executor corresponds to a single task, so mark the executor as failed.
+              val execId = taskIdToExecutorId.getOrElse(tid, throw new IllegalStateException(
+                "taskIdToTaskSetManager.contains(tid) <=> taskIdToExecutorId.contains(tid)"))
+              if (executorIdToRunningTaskIds.contains(execId)) {
+                reason = Some(
+                  SlaveLost(s"Task $tid was lost, so marking the executor as lost as well."))
+                removeExecutor(execId, reason.get)
+                failedExecutor = Some(execId)
               }
             }
-            if (state == TaskState.FINISHED) {
-              taskSet.removeRunningTask(tid)
-              taskResultGetter.enqueueSuccessfulTask(taskSet, tid, serializedData)
-            } else if (Set(TaskState.FAILED, TaskState.KILLED, TaskState.LOST).contains(state)) {
+            if (TaskState.isFinished(state)) {
+              cleanupTaskState(tid)
               taskSet.removeRunningTask(tid)
-              taskResultGetter.enqueueFailedTask(taskSet, tid, state, serializedData)
+              if (state == TaskState.FINISHED) {
+                taskResultGetter.enqueueSuccessfulTask(taskSet, tid, serializedData)
+              } else if (Set(TaskState.FAILED, TaskState.KILLED, TaskState.LOST).contains(state)) {
+                taskResultGetter.enqueueFailedTask(taskSet, tid, state, serializedData)
+              }
             }
           case None =>
             logError(
               ("Ignoring update with state %s for TID %s because its task set is gone (this is " +
-                "likely the result of receiving duplicate task finished status updates)")
+                "likely the result of receiving duplicate task finished status updates) or its " +
+                "executor has been marked as failed.")
                 .format(state, tid))
         }
       } catch {
@@ -491,7 +489,7 @@ private[spark] class TaskSchedulerImpl(
     var failedExecutor: Option[String] = None
 
     synchronized {
-      if (executorIdToTaskCount.contains(executorId)) {
+      if (executorIdToRunningTaskIds.contains(executorId)) {
         val hostPort = executorIdToHost(executorId)
         logExecutorLoss(executorId, hostPort, reason)
         removeExecutor(executorId, reason)
@@ -533,13 +531,31 @@ private[spark] class TaskSchedulerImpl(
       logError(s"Lost executor $executorId on $hostPort: $reason")
   }
 
+  /**
+   * Cleans up the TaskScheduler's state for tracking the given task.
+   */
+  private def cleanupTaskState(tid: Long): Unit = {
+    taskIdToTaskSetManager.remove(tid)
+    taskIdToExecutorId.remove(tid).foreach { executorId =>
+      executorIdToRunningTaskIds.get(executorId).foreach { _.remove(tid) }
+    }
+  }
+
   /**
    * Remove an executor from all our data structures and mark it as lost. If the executor's loss
    * reason is not yet known, do not yet remove its association with its host nor update the status
    * of any running tasks, since the loss reason defines whether we'll fail those tasks.
    */
   private def removeExecutor(executorId: String, reason: ExecutorLossReason) {
-    executorIdToTaskCount -= executorId
+    // The tasks on the lost executor may not send any more status updates (because the executor
+    // has been lost), so they should be cleaned up here.
+    executorIdToRunningTaskIds.remove(executorId).foreach { taskIds =>
+      logDebug("Cleaning up TaskScheduler state for tasks " +
+        s"${taskIds.mkString("[", ",", "]")} on failed executor $executorId")
+      // We do not notify the TaskSetManager of the task failures because that will
+      // happen below in the rootPool.executorLost() call.
+      taskIds.foreach(cleanupTaskState)
+    }
 
     val host = executorIdToHost(executorId)
     val execs = hostToExecutors.getOrElse(host, new HashSet)
@@ -577,11 +593,11 @@ private[spark] class TaskSchedulerImpl(
   }
 
   def isExecutorAlive(execId: String): Boolean = synchronized {
-    executorIdToTaskCount.contains(execId)
+    executorIdToRunningTaskIds.contains(execId)
   }
 
   def isExecutorBusy(execId: String): Boolean = synchronized {
-    executorIdToTaskCount.getOrElse(execId, -1) > 0
+    executorIdToRunningTaskIds.get(execId).exists(_.nonEmpty)
   }
 
   // By default, rack is unknown
diff --git a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
index e29eb8552e134..05dad7a4b86ad 100644
--- a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
@@ -433,10 +433,11 @@ class StandaloneDynamicAllocationSuite
     assert(executors.size === 2)
 
     // simulate running a task on the executor
-    val getMap = PrivateMethod[mutable.HashMap[String, Int]]('executorIdToTaskCount)
+    val getMap =
+      PrivateMethod[mutable.HashMap[String, mutable.HashSet[Long]]]('executorIdToRunningTaskIds)
     val taskScheduler = sc.taskScheduler.asInstanceOf[TaskSchedulerImpl]
-    val executorIdToTaskCount = taskScheduler invokePrivate getMap()
-    executorIdToTaskCount(executors.head) = 1
+    val executorIdToRunningTaskIds = taskScheduler invokePrivate getMap()
+    executorIdToRunningTaskIds(executors.head) = mutable.HashSet(1L)
     // kill the busy executor without force; this should fail
     assert(killExecutor(sc, executors.head, force = false).isEmpty)
     apps = getApplications()
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
index f5f1947661d9a..48ec04bd5aab3 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
@@ -17,6 +17,12 @@
 
 package org.apache.spark.scheduler
 
+import java.nio.ByteBuffer
+
+import scala.collection.mutable.HashMap
+
+import org.mockito.Matchers.{anyInt, anyString, eq => meq}
+import org.mockito.Mockito.{atLeast, atMost, never, spy, verify, when}
 import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark._
@@ -408,4 +414,70 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     assert(thirdTaskDescs.size === 0)
     assert(taskScheduler.getExecutorsAliveOnHost("host1") === Some(Set("executor1", "executor3")))
   }
+  test("if an executor is lost then the state for its running tasks is cleaned up (SPARK-18553)") {
+    sc = new SparkContext("local", "TaskSchedulerImplSuite")
+    val taskScheduler = new TaskSchedulerImpl(sc)
+    taskScheduler.initialize(new FakeSchedulerBackend)
+    // Need to initialize a DAGScheduler for the taskScheduler to use for callbacks.
+    new DAGScheduler(sc, taskScheduler) {
+      override def taskStarted(task: Task[_], taskInfo: TaskInfo) {}
+      override def executorAdded(execId: String, host: String) {}
+    }
+
+    val e0Offers = IndexedSeq(WorkerOffer("executor0", "host0", 1))
+    val attempt1 = FakeTask.createTaskSet(1)
+
+    // submit attempt 1, offer resources, task gets scheduled
+    taskScheduler.submitTasks(attempt1)
+    val taskDescriptions = taskScheduler.resourceOffers(e0Offers).flatten
+    assert(1 === taskDescriptions.length)
+
+    // mark executor0 as dead
+    taskScheduler.executorLost("executor0", SlaveLost())
+    assert(!taskScheduler.isExecutorAlive("executor0"))
+    assert(!taskScheduler.hasExecutorsAliveOnHost("host0"))
+    assert(taskScheduler.getExecutorsAliveOnHost("host0").isEmpty)
+
+
+    // Check that state associated with the lost task attempt is cleaned up:
+    assert(taskScheduler.taskIdToExecutorId.isEmpty)
+    assert(taskScheduler.taskIdToTaskSetManager.isEmpty)
+    assert(taskScheduler.runningTasksByExecutors().get("executor0").isEmpty)
+  }
+
+  test("if a task finishes with TaskState.LOST its executor is marked as dead") {
+    sc = new SparkContext("local", "TaskSchedulerImplSuite")
+    val taskScheduler = new TaskSchedulerImpl(sc)
+    taskScheduler.initialize(new FakeSchedulerBackend)
+    // Need to initialize a DAGScheduler for the taskScheduler to use for callbacks.
+    new DAGScheduler(sc, taskScheduler) {
+      override def taskStarted(task: Task[_], taskInfo: TaskInfo) {}
+      override def executorAdded(execId: String, host: String) {}
+    }
+
+    val e0Offers = IndexedSeq(WorkerOffer("executor0", "host0", 1))
+    val attempt1 = FakeTask.createTaskSet(1)
+
+    // submit attempt 1, offer resources, task gets scheduled
+    taskScheduler.submitTasks(attempt1)
+    val taskDescriptions = taskScheduler.resourceOffers(e0Offers).flatten
+    assert(1 === taskDescriptions.length)
+
+    // Report the task as failed with TaskState.LOST
+    taskScheduler.statusUpdate(
+      tid = taskDescriptions.head.taskId,
+      state = TaskState.LOST,
+      serializedData = ByteBuffer.allocate(0)
+    )
+
+    // Check that state associated with the lost task attempt is cleaned up:
+    assert(taskScheduler.taskIdToExecutorId.isEmpty)
+    assert(taskScheduler.taskIdToTaskSetManager.isEmpty)
+    assert(taskScheduler.runningTasksByExecutors().get("executor0").isEmpty)
+
+    // Check that the executor has been marked as dead
+    assert(!taskScheduler.isExecutorAlive("executor0"))
+    assert(!taskScheduler.hasExecutorsAliveOnHost("host0"))
+    assert(taskScheduler.getExecutorsAliveOnHost("host0").isEmpty)
+  }
 }

From 28b57c8a124fe55501c4ca4b91320851ace5d735 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Tue, 29 Nov 2016 17:24:17 -0800
Subject: [PATCH 0231/1204] [SPARK-18516][SQL] Split state and progress in
 streaming

This PR separates the status of a `StreamingQuery` into two separate APIs:
 - `status` - describes the status of a `StreamingQuery` at this moment, including what phase of processing is currently happening and if data is available.
 - `recentProgress` - an array of statistics about the most recent microbatches that have executed.

A recent progress contains the following information:
```
{
  "id" : "2be8670a-fce1-4859-a530-748f29553bb6",
  "name" : "query-29",
  "timestamp" : 1479705392724,
  "inputRowsPerSecond" : 230.76923076923077,
  "processedRowsPerSecond" : 10.869565217391303,
  "durationMs" : {
    "triggerExecution" : 276,
    "queryPlanning" : 3,
    "getBatch" : 5,
    "getOffset" : 3,
    "addBatch" : 234,
    "walCommit" : 30
  },
  "currentWatermark" : 0,
  "stateOperators" : [ ],
  "sources" : [ {
    "description" : "KafkaSource[Subscribe[topic-14]]",
    "startOffset" : {
      "topic-14" : {
        "2" : 0,
        "4" : 1,
        "1" : 0,
        "3" : 0,
        "0" : 0
      }
    },
    "endOffset" : {
      "topic-14" : {
        "2" : 1,
        "4" : 2,
        "1" : 0,
        "3" : 0,
        "0" : 1
      }
    },
    "numRecords" : 3,
    "inputRowsPerSecond" : 230.76923076923077,
    "processedRowsPerSecond" : 10.869565217391303
  } ]
}
```

Additionally, in order to make it possible to correlate progress updates across restarts, we change the `id` field from an integer that is unique with in the JVM to a `UUID` that is globally unique.

Author: Tathagata Das <tathagata.das1565@gmail.com>
Author: Michael Armbrust <michael@databricks.com>

Closes #15954 from marmbrus/queryProgress.

(cherry picked from commit c3d08e2f29baeebe09bf4c059ace4336af9116b5)
Signed-off-by: Michael Armbrust <michael@databricks.com>
---
 .../spark/sql/kafka010/KafkaSourceSuite.scala |   7 +-
 project/MimaExcludes.scala                    |  11 +
 python/pyspark/sql/streaming.py               | 326 ++----------------
 python/pyspark/sql/tests.py                   |  22 ++
 .../execution/streaming/MetricsReporter.scala |  53 +++
 .../streaming/ProgressReporter.scala          | 234 +++++++++++++
 .../execution/streaming/StreamExecution.scala | 282 ++++-----------
 .../execution/streaming/StreamMetrics.scala   | 243 -------------
 .../apache/spark/sql/internal/SQLConf.scala   |   8 +
 .../spark/sql/streaming/SinkStatus.scala      |  66 ----
 .../spark/sql/streaming/SourceStatus.scala    |  95 -----
 .../spark/sql/streaming/StreamingQuery.scala  |  33 +-
 .../streaming/StreamingQueryException.scala   |   2 +-
 .../streaming/StreamingQueryListener.scala    |  24 +-
 .../sql/streaming/StreamingQueryManager.scala |  27 +-
 .../sql/streaming/StreamingQueryStatus.scala  | 151 +-------
 .../apache/spark/sql/streaming/progress.scala | 193 +++++++++++
 .../streaming/StreamMetricsSuite.scala        | 213 ------------
 .../sql/streaming/FileStreamSourceSuite.scala |  10 +-
 .../spark/sql/streaming/StreamTest.scala      |  73 +---
 .../StreamingQueryListenerSuite.scala         | 267 +++++++-------
 .../StreamingQueryManagerSuite.scala          |   2 +-
 .../StreamingQueryProgressSuite.scala         |  98 ++++++
 .../streaming/StreamingQueryStatusSuite.scala | 123 -------
 .../sql/streaming/StreamingQuerySuite.scala   | 260 ++++++++------
 .../spark/sql/streaming/WatermarkSuite.scala  |  16 +-
 26 files changed, 1087 insertions(+), 1752 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetricsReporter.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
 delete mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetrics.scala
 delete mode 100644 sql/core/src/main/scala/org/apache/spark/sql/streaming/SinkStatus.scala
 delete mode 100644 sql/core/src/main/scala/org/apache/spark/sql/streaming/SourceStatus.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
 delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/StreamMetricsSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryProgressSuite.scala
 delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusSuite.scala

diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
index e1af14f95dfc9..2d6ccb22ddb06 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
@@ -442,12 +442,13 @@ class KafkaSourceSuite extends KafkaSourceTest {
 
     val mapped = kafka.map(kv => kv._2.toInt + 1)
     testStream(mapped)(
+      StartStream(trigger = ProcessingTime(1)),
       makeSureGetOffsetCalled,
       AddKafkaData(Set(topic), 1, 2, 3),
       CheckAnswer(2, 3, 4),
-      AssertOnLastQueryStatus { status =>
-        assert(status.triggerDetails.get("numRows.input.total").toInt > 0)
-        assert(status.sourceStatuses(0).processingRate > 0.0)
+      AssertOnQuery { query =>
+        val recordsRead = query.recentProgresses.map(_.numInputRows).sum
+        recordsRead == 3
       }
     )
   }
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 03c9fcc0124d2..97391643322fc 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -78,6 +78,17 @@ object MimaExcludes {
       ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.streaming.StreamingQueryListener.onQueryTerminated"),
       ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQueryListener.onQueryTerminated"),
 
+      // [SPARK-18516][SQL] Split state and progress in streaming
+      ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.streaming.SourceStatus"),
+      ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.streaming.SinkStatus"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQuery.sinkStatus"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQuery.sourceStatuses"),
+      ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.streaming.StreamingQuery.id"),
+      ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQuery.lastProgress"),
+      ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQuery.recentProgresses"),
+      ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQuery.id"),
+      ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.streaming.StreamingQueryManager.get"),
+
       // [SPARK-17338][SQL] add global temp view
       ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.dropGlobalTempView"),
       ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.catalog.Catalog.dropTempView"),
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index 9c3a237699f96..c420b0d016091 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -16,6 +16,8 @@
 #
 
 import sys
+import json
+
 if sys.version >= '3':
     intlike = int
     basestring = unicode = str
@@ -48,10 +50,9 @@ def __init__(self, jsq):
     @property
     @since(2.0)
     def id(self):
-        """The id of the streaming query. This id is unique across all queries that have been
-        started in the current process.
+        """The id of the streaming query.
         """
-        return self._jsq.id()
+        return self._jsq.id().toString()
 
     @property
     @since(2.0)
@@ -87,6 +88,24 @@ def awaitTermination(self, timeout=None):
         else:
             return self._jsq.awaitTermination()
 
+    @property
+    @since(2.1)
+    def recentProgresses(self):
+        """Returns an array of the most recent [[StreamingQueryProgress]] updates for this query.
+        The number of progress updates retained for each stream is configured by Spark session
+        configuration `spark.sql.streaming.numRecentProgresses`.
+        """
+        return [json.loads(p.json()) for p in self._jsq.recentProgresses()]
+
+    @property
+    @since(2.1)
+    def lastProgress(self):
+        """
+        Returns the most recent :class:`StreamingQueryProgress` update of this streaming query.
+        :return: a map
+        """
+        return json.loads(self._jsq.lastProgress().json())
+
     @since(2.0)
     def processAllAvailable(self):
         """Blocks until all available data in the source has been processed and committed to the
@@ -149,8 +168,6 @@ def get(self, id):
         True
         >>> sq.stop()
         """
-        if not isinstance(id, intlike):
-            raise ValueError("The id for the query must be an integer. Got: %s" % id)
         return StreamingQuery(self._jsqm.get(id))
 
     @since(2.0)
@@ -191,303 +208,6 @@ def resetTerminated(self):
         self._jsqm.resetTerminated()
 
 
-class StreamingQueryStatus(object):
-    """A class used to report information about the progress of a StreamingQuery.
-
-    .. note:: Experimental
-
-    .. versionadded:: 2.1
-    """
-
-    def __init__(self, jsqs):
-        self._jsqs = jsqs
-
-    def __str__(self):
-        """
-        Pretty string of this query status.
-
-        >>> print(sqs)
-        Status of query 'query'
-            Query id: 1
-            Status timestamp: 123
-            Input rate: 15.5 rows/sec
-            Processing rate 23.5 rows/sec
-            Latency: 345.0 ms
-            Trigger details:
-                batchId: 5
-                isDataPresentInTrigger: true
-                isTriggerActive: true
-                latency.getBatch.total: 20
-                latency.getOffset.total: 10
-                numRows.input.total: 100
-            Source statuses [1 source]:
-                Source 1 - MySource1
-                    Available offset: 0
-                    Input rate: 15.5 rows/sec
-                    Processing rate: 23.5 rows/sec
-                    Trigger details:
-                        numRows.input.source: 100
-                        latency.getOffset.source: 10
-                        latency.getBatch.source: 20
-            Sink status - MySink
-                Committed offsets: [1, -]
-        """
-        return self._jsqs.toString()
-
-    @property
-    @ignore_unicode_prefix
-    @since(2.1)
-    def name(self):
-        """
-        Name of the query. This name is unique across all active queries.
-
-        >>> sqs.name
-        u'query'
-        """
-        return self._jsqs.name()
-
-    @property
-    @since(2.1)
-    def id(self):
-        """
-        Id of the query. This id is unique across all queries that have been started in
-        the current process.
-
-        >>> int(sqs.id)
-        1
-        """
-        return self._jsqs.id()
-
-    @property
-    @since(2.1)
-    def timestamp(self):
-        """
-        Timestamp (ms) of when this query was generated.
-
-        >>> int(sqs.timestamp)
-        123
-        """
-        return self._jsqs.timestamp()
-
-    @property
-    @since(2.1)
-    def inputRate(self):
-        """
-        Current total rate (rows/sec) at which data is being generated by all the sources.
-
-        >>> sqs.inputRate
-        15.5
-        """
-        return self._jsqs.inputRate()
-
-    @property
-    @since(2.1)
-    def processingRate(self):
-        """
-        Current rate (rows/sec) at which the query is processing data from all the sources.
-
-        >>> sqs.processingRate
-        23.5
-        """
-        return self._jsqs.processingRate()
-
-    @property
-    @since(2.1)
-    def latency(self):
-        """
-        Current average latency between the data being available in source and the sink
-        writing the corresponding output.
-
-        >>> sqs.latency
-        345.0
-        """
-        if (self._jsqs.latency().nonEmpty()):
-            return self._jsqs.latency().get()
-        else:
-            return None
-
-    @property
-    @ignore_unicode_prefix
-    @since(2.1)
-    def sourceStatuses(self):
-        """
-        Current statuses of the sources as a list.
-
-        >>> len(sqs.sourceStatuses)
-        1
-        >>> sqs.sourceStatuses[0].description
-        u'MySource1'
-        """
-        return [SourceStatus(ss) for ss in self._jsqs.sourceStatuses()]
-
-    @property
-    @ignore_unicode_prefix
-    @since(2.1)
-    def sinkStatus(self):
-        """
-        Current status of the sink.
-
-        >>> sqs.sinkStatus.description
-        u'MySink'
-        """
-        return SinkStatus(self._jsqs.sinkStatus())
-
-    @property
-    @ignore_unicode_prefix
-    @since(2.1)
-    def triggerDetails(self):
-        """
-        Low-level details of the currently active trigger (e.g. number of rows processed
-        in trigger, latency of intermediate steps, etc.).
-
-        If no trigger is currently active, then it will have details of the last completed trigger.
-
-        >>> sqs.triggerDetails
-        {u'latency.getBatch.total': u'20', u'numRows.input.total': u'100',
-        u'isTriggerActive': u'true', u'batchId': u'5', u'latency.getOffset.total': u'10',
-        u'isDataPresentInTrigger': u'true'}
-        """
-        return self._jsqs.triggerDetails()
-
-
-class SourceStatus(object):
-    """
-    Status and metrics of a streaming Source.
-
-    .. note:: Experimental
-
-    .. versionadded:: 2.1
-    """
-
-    def __init__(self, jss):
-        self._jss = jss
-
-    def __str__(self):
-        """
-        Pretty string of this source status.
-
-        >>> print(sqs.sourceStatuses[0])
-        Status of source MySource1
-            Available offset: 0
-            Input rate: 15.5 rows/sec
-            Processing rate: 23.5 rows/sec
-            Trigger details:
-                numRows.input.source: 100
-                latency.getOffset.source: 10
-                latency.getBatch.source: 20
-        """
-        return self._jss.toString()
-
-    @property
-    @ignore_unicode_prefix
-    @since(2.1)
-    def description(self):
-        """
-        Description of the source corresponding to this status.
-
-        >>> sqs.sourceStatuses[0].description
-        u'MySource1'
-        """
-        return self._jss.description()
-
-    @property
-    @ignore_unicode_prefix
-    @since(2.1)
-    def offsetDesc(self):
-        """
-        Description of the current offset if known.
-
-        >>> sqs.sourceStatuses[0].offsetDesc
-        u'0'
-        """
-        return self._jss.offsetDesc()
-
-    @property
-    @since(2.1)
-    def inputRate(self):
-        """
-        Current rate (rows/sec) at which data is being generated by the source.
-
-        >>> sqs.sourceStatuses[0].inputRate
-        15.5
-        """
-        return self._jss.inputRate()
-
-    @property
-    @since(2.1)
-    def processingRate(self):
-        """
-        Current rate (rows/sec) at which the query is processing data from the source.
-
-        >>> sqs.sourceStatuses[0].processingRate
-        23.5
-        """
-        return self._jss.processingRate()
-
-    @property
-    @ignore_unicode_prefix
-    @since(2.1)
-    def triggerDetails(self):
-        """
-        Low-level details of the currently active trigger (e.g. number of rows processed
-        in trigger, latency of intermediate steps, etc.).
-
-        If no trigger is currently active, then it will have details of the last completed trigger.
-
-        >>> sqs.sourceStatuses[0].triggerDetails
-        {u'numRows.input.source': u'100', u'latency.getOffset.source': u'10',
-        u'latency.getBatch.source': u'20'}
-       """
-        return self._jss.triggerDetails()
-
-
-class SinkStatus(object):
-    """
-    Status and metrics of a streaming Sink.
-
-    .. note:: Experimental
-
-    .. versionadded:: 2.1
-    """
-
-    def __init__(self, jss):
-        self._jss = jss
-
-    def __str__(self):
-        """
-        Pretty string of this source status.
-
-        >>> print(sqs.sinkStatus)
-        Status of sink MySink
-            Committed offsets: [1, -]
-        """
-        return self._jss.toString()
-
-    @property
-    @ignore_unicode_prefix
-    @since(2.1)
-    def description(self):
-        """
-        Description of the source corresponding to this status.
-
-        >>> sqs.sinkStatus.description
-        u'MySink'
-        """
-        return self._jss.description()
-
-    @property
-    @ignore_unicode_prefix
-    @since(2.1)
-    def offsetDesc(self):
-        """
-        Description of the current offsets up to which data has been written by the sink.
-
-        >>> sqs.sinkStatus.offsetDesc
-        u'[1, -]'
-        """
-        return self._jss.offsetDesc()
-
-
 class Trigger(object):
     """Used to indicate how often results should be produced by a :class:`StreamingQuery`.
 
@@ -1053,8 +773,6 @@ def _test():
     globs['sdf_schema'] = StructType([StructField("data", StringType(), False)])
     globs['df'] = \
         globs['spark'].readStream.format('text').load('python/test_support/sql/streaming')
-    globs['sqs'] = StreamingQueryStatus(
-        spark.sparkContext._jvm.org.apache.spark.sql.streaming.StreamingQueryStatus.testStatus())
 
     (failure_count, test_count) = doctest.testmod(
         pyspark.sql.streaming, globs=globs,
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 3d46b852c52e1..7151f95216e03 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -1082,6 +1082,28 @@ def test_stream_save_options_overwrite(self):
             q.stop()
             shutil.rmtree(tmpPath)
 
+    def test_stream_status_and_progress(self):
+        df = self.spark.readStream.format('text').load('python/test_support/sql/streaming')
+        for q in self.spark._wrapped.streams.active:
+            q.stop()
+        tmpPath = tempfile.mkdtemp()
+        shutil.rmtree(tmpPath)
+        self.assertTrue(df.isStreaming)
+        out = os.path.join(tmpPath, 'out')
+        chk = os.path.join(tmpPath, 'chk')
+        q = df.writeStream \
+            .start(path=out, format='parquet', queryName='this_query', checkpointLocation=chk)
+        try:
+            q.processAllAvailable()
+            lastProgress = q.lastProgress
+            recentProgresses = q.recentProgresses
+            self.assertEqual(lastProgress['name'], q.name)
+            self.assertEqual(lastProgress['id'], q.id)
+            self.assertTrue(any(p == lastProgress for p in recentProgresses))
+        finally:
+            q.stop()
+            shutil.rmtree(tmpPath)
+
     def test_stream_await_termination(self):
         df = self.spark.readStream.format('text').load('python/test_support/sql/streaming')
         for q in self.spark._wrapped.streams.active:
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetricsReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetricsReporter.scala
new file mode 100644
index 0000000000000..5551d12fa8ad2
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetricsReporter.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming
+
+import java.{util => ju}
+
+import scala.collection.mutable
+
+import com.codahale.metrics.{Gauge, MetricRegistry}
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.metrics.source.{Source => CodahaleSource}
+import org.apache.spark.util.Clock
+
+/**
+ * Serves metrics from a [[org.apache.spark.sql.streaming.StreamingQuery]] to
+ * Codahale/DropWizard metrics
+ */
+class MetricsReporter(
+    stream: StreamExecution,
+    override val sourceName: String) extends CodahaleSource with Logging {
+
+  override val metricRegistry: MetricRegistry = new MetricRegistry
+
+  // Metric names should not have . in them, so that all the metrics of a query are identified
+  // together in Ganglia as a single metric group
+  registerGauge("inputRate-total", () => stream.lastProgress.inputRowsPerSecond)
+  registerGauge("processingRate-total", () => stream.lastProgress.inputRowsPerSecond)
+  registerGauge("latency", () => stream.lastProgress.durationMs.get("triggerExecution").longValue())
+
+  private def registerGauge[T](name: String, f: () => T)(implicit num: Numeric[T]): Unit = {
+    synchronized {
+      metricRegistry.register(name, new Gauge[T] {
+        override def getValue: T = f()
+      })
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
new file mode 100644
index 0000000000000..b7b6e1988eef5
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -0,0 +1,234 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming
+
+import java.util.UUID
+
+import scala.collection.mutable
+import scala.collection.JavaConverters._
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.{DataFrame, SparkSession}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.QueryExecution
+import org.apache.spark.sql.streaming._
+import org.apache.spark.util.Clock
+
+/**
+ * Responsible for continually reporting statistics about the amount of data processed as well
+ * as latency for a streaming query.  This trait is designed to be mixed into the
+ * [[StreamExecution]], who is responsible for calling `startTrigger` and `finishTrigger`
+ * at the appropriate times. Additionally, the status can updated with `updateStatusMessage` to
+ * allow reporting on the streams current state (i.e. "Fetching more data").
+ */
+trait ProgressReporter extends Logging {
+
+  case class ExecutionStats(
+    inputRows: Map[Source, Long], stateOperators: Seq[StateOperatorProgress])
+
+  // Internal state of the stream, required for computing metrics.
+  protected def id: UUID
+  protected def name: String
+  protected def triggerClock: Clock
+  protected def logicalPlan: LogicalPlan
+  protected def lastExecution: QueryExecution
+  protected def newData: Map[Source, DataFrame]
+  protected def availableOffsets: StreamProgress
+  protected def committedOffsets: StreamProgress
+  protected def sources: Seq[Source]
+  protected def sink: Sink
+  protected def streamExecutionMetadata: StreamExecutionMetadata
+  protected def currentBatchId: Long
+  protected def sparkSession: SparkSession
+
+  // Local timestamps and counters.
+  private var currentTriggerStartTimestamp = -1L
+  private var currentTriggerEndTimestamp = -1L
+  // TODO: Restore this from the checkpoint when possible.
+  private var lastTriggerStartTimestamp = -1L
+  private val currentDurationsMs = new mutable.HashMap[String, Long]()
+
+  /** Flag that signals whether any error with input metrics have already been logged */
+  private var metricWarningLogged: Boolean = false
+
+  /** Holds the most recent query progress updates.  Accesses must lock on the queue itself. */
+  private val progressBuffer = new mutable.Queue[StreamingQueryProgress]()
+
+  @volatile
+  protected var currentStatus: StreamingQueryStatus =
+    StreamingQueryStatus(
+      message = "Initializing StreamExecution",
+      isDataAvailable = false,
+      isTriggerActive = false)
+
+  /** Returns the current status of the query. */
+  def status: StreamingQueryStatus = currentStatus
+
+  /** Returns an array containing the most recent query progress updates. */
+  def recentProgresses: Array[StreamingQueryProgress] = progressBuffer.synchronized {
+    progressBuffer.toArray
+  }
+
+  /** Returns the most recent query progress update. */
+  def lastProgress: StreamingQueryProgress = progressBuffer.synchronized {
+    progressBuffer.last
+  }
+
+  /** Begins recording statistics about query progress for a given trigger. */
+  protected def startTrigger(): Unit = {
+    logDebug("Starting Trigger Calculation")
+    lastTriggerStartTimestamp = currentTriggerStartTimestamp
+    currentTriggerStartTimestamp = triggerClock.getTimeMillis()
+    currentStatus = currentStatus.copy(isTriggerActive = true)
+    currentDurationsMs.clear()
+  }
+
+  /** Finalizes the query progress and adds it to list of recent status updates. */
+  protected def finishTrigger(hasNewData: Boolean): Unit = {
+    currentTriggerEndTimestamp = triggerClock.getTimeMillis()
+
+    val executionStats: ExecutionStats = if (!hasNewData) {
+      ExecutionStats(Map.empty, Seq.empty)
+    } else {
+      extractExecutionStats
+    }
+
+    val processingTimeSec =
+      (currentTriggerEndTimestamp - currentTriggerStartTimestamp).toDouble / 1000
+
+    val inputTimeSec = if (lastTriggerStartTimestamp >= 0) {
+      (currentTriggerStartTimestamp - lastTriggerStartTimestamp).toDouble / 1000
+    } else {
+      Double.NaN
+    }
+    logDebug(s"Execution stats: $executionStats")
+
+    val sourceProgress = sources.map { source =>
+      val numRecords = executionStats.inputRows.getOrElse(source, 0L)
+      new SourceProgress(
+        description = source.toString,
+        startOffset = committedOffsets.get(source).map(_.json).orNull,
+        endOffset = availableOffsets.get(source).map(_.json).orNull,
+        numInputRows = numRecords,
+        inputRowsPerSecond = numRecords / inputTimeSec,
+        processedRowsPerSecond = numRecords / processingTimeSec
+      )
+    }
+    val sinkProgress = new SinkProgress(sink.toString)
+
+    val newProgress = new StreamingQueryProgress(
+      id = id,
+      name = name,
+      timestamp = currentTriggerStartTimestamp,
+      batchId = currentBatchId,
+      durationMs = currentDurationsMs.toMap.mapValues(long2Long).asJava,
+      currentWatermark = streamExecutionMetadata.batchWatermarkMs,
+      stateOperators = executionStats.stateOperators.toArray,
+      sources = sourceProgress.toArray,
+      sink = sinkProgress)
+
+    progressBuffer.synchronized {
+      progressBuffer += newProgress
+      while (progressBuffer.length >= sparkSession.sqlContext.conf.streamingProgressRetention) {
+        progressBuffer.dequeue()
+      }
+    }
+
+    logInfo(s"Streaming query made progress: $newProgress")
+    currentStatus = currentStatus.copy(isTriggerActive = false)
+  }
+
+  /** Extracts statistics from the most recent query execution. */
+  private def extractExecutionStats: ExecutionStats = {
+    // We want to associate execution plan leaves to sources that generate them, so that we match
+    // the their metrics (e.g. numOutputRows) to the sources. To do this we do the following.
+    // Consider the translation from the streaming logical plan to the final executed plan.
+    //
+    //  streaming logical plan (with sources) <==> trigger's logical plan <==> executed plan
+    //
+    // 1. We keep track of streaming sources associated with each leaf in the trigger's logical plan
+    //    - Each logical plan leaf will be associated with a single streaming source.
+    //    - There can be multiple logical plan leaves associated with a streaming source.
+    //    - There can be leaves not associated with any streaming source, because they were
+    //      generated from a batch source (e.g. stream-batch joins)
+    //
+    // 2. Assuming that the executed plan has same number of leaves in the same order as that of
+    //    the trigger logical plan, we associate executed plan leaves with corresponding
+    //    streaming sources.
+    //
+    // 3. For each source, we sum the metrics of the associated execution plan leaves.
+    //
+    val logicalPlanLeafToSource = newData.flatMap { case (source, df) =>
+      df.logicalPlan.collectLeaves().map { leaf => leaf -> source }
+    }
+    val allLogicalPlanLeaves = lastExecution.logical.collectLeaves() // includes non-streaming
+    val allExecPlanLeaves = lastExecution.executedPlan.collectLeaves()
+    val numInputRows: Map[Source, Long] =
+      if (allLogicalPlanLeaves.size == allExecPlanLeaves.size) {
+        val execLeafToSource = allLogicalPlanLeaves.zip(allExecPlanLeaves).flatMap {
+          case (lp, ep) => logicalPlanLeafToSource.get(lp).map { source => ep -> source }
+        }
+        val sourceToNumInputRows = execLeafToSource.map { case (execLeaf, source) =>
+          val numRows = execLeaf.metrics.get("numOutputRows").map(_.value).getOrElse(0L)
+          source -> numRows
+        }
+        sourceToNumInputRows.groupBy(_._1).mapValues(_.map(_._2).sum) // sum up rows for each source
+      } else {
+        if (!metricWarningLogged) {
+          def toString[T](seq: Seq[T]): String = s"(size = ${seq.size}), ${seq.mkString(", ")}"
+          logWarning(
+            "Could not report metrics as number leaves in trigger logical plan did not match that" +
+                s" of the execution plan:\n" +
+                s"logical plan leaves: ${toString(allLogicalPlanLeaves)}\n" +
+                s"execution plan leaves: ${toString(allExecPlanLeaves)}\n")
+          metricWarningLogged = true
+        }
+        Map.empty
+      }
+
+    // Extract statistics about stateful operators in the query plan.
+    val stateNodes = lastExecution.executedPlan.collect {
+      case p if p.isInstanceOf[StateStoreSaveExec] => p
+    }
+    val stateOperators = stateNodes.map { node =>
+      new StateOperatorProgress(
+        numRowsTotal = node.metrics.get("numTotalStateRows").map(_.value).getOrElse(0L),
+        numRowsUpdated = node.metrics.get("numUpdatedStateRows").map(_.value).getOrElse(0L))
+    }
+
+    ExecutionStats(numInputRows, stateOperators)
+  }
+
+  /** Records the duration of running `body` for the next query progress update. */
+  protected def reportTimeTaken[T](triggerDetailKey: String)(body: => T): T = {
+    val startTime = triggerClock.getTimeMillis()
+    val result = body
+    val endTime = triggerClock.getTimeMillis()
+    val timeTaken = math.max(endTime - startTime, 0)
+
+    val previousTime = currentDurationsMs.getOrElse(triggerDetailKey, 0L)
+    currentDurationsMs.put(triggerDetailKey, previousTime + timeTaken)
+    logDebug(s"$triggerDetailKey took $timeTaken ms")
+    result
+  }
+
+  /** Updates the message returned in `status`. */
+  protected def updateStatusMessage(message: String): Unit = {
+    currentStatus = currentStatus.copy(message = message)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 21664d7fd0381..e4f31af35fdf4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -17,8 +17,8 @@
 
 package org.apache.spark.sql.execution.streaming
 
+import java.util.UUID
 import java.util.concurrent.{CountDownLatch, TimeUnit}
-import java.util.concurrent.atomic.AtomicLong
 import java.util.concurrent.locks.ReentrantLock
 
 import scala.collection.mutable.ArrayBuffer
@@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, CurrentBatchTimestamp, CurrentDate, CurrentTimestamp}
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.util._
-import org.apache.spark.sql.execution.{QueryExecution, SparkPlan}
+import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.command.ExplainCommand
 import org.apache.spark.sql.streaming._
 import org.apache.spark.util.{Clock, UninterruptibleThread, Utils}
@@ -47,7 +47,6 @@ import org.apache.spark.util.{Clock, UninterruptibleThread, Utils}
  */
 class StreamExecution(
     override val sparkSession: SparkSession,
-    override val id: Long,
     override val name: String,
     checkpointRoot: String,
     val logicalPlan: LogicalPlan,
@@ -55,10 +54,12 @@ class StreamExecution(
     val trigger: Trigger,
     val triggerClock: Clock,
     val outputMode: OutputMode)
-  extends StreamingQuery with Logging {
+  extends StreamingQuery with ProgressReporter with Logging {
 
   import org.apache.spark.sql.streaming.StreamingQueryListener._
-  import StreamMetrics._
+
+  // TODO: restore this from the checkpoint directory.
+  override val id: UUID = UUID.randomUUID()
 
   private val pollingDelayMs = sparkSession.sessionState.conf.streamingPollingDelay
 
@@ -89,16 +90,16 @@ class StreamExecution(
    * once, since the field's value may change at any time.
    */
   @volatile
-  private var availableOffsets = new StreamProgress
+  protected var availableOffsets = new StreamProgress
 
   /** The current batchId or -1 if execution has not yet been initialized. */
-  private var currentBatchId: Long = -1
+  protected var currentBatchId: Long = -1
 
   /** Stream execution metadata */
-  private var streamExecutionMetadata = StreamExecutionMetadata()
+  protected var streamExecutionMetadata = StreamExecutionMetadata()
 
   /** All stream sources present in the query plan. */
-  private val sources =
+  protected val sources =
     logicalPlan.collect { case s: StreamingExecutionRelation => s.source }
 
   /** A list of unique sources in the query plan. */
@@ -113,7 +114,10 @@ class StreamExecution(
   private var state: State = INITIALIZED
 
   @volatile
-  var lastExecution: QueryExecution = null
+  var lastExecution: QueryExecution = _
+
+  /** Holds the most recent input data for each source. */
+  protected var newData: Map[Source, DataFrame] = _
 
   @volatile
   private var streamDeathCause: StreamingQueryException = null
@@ -121,16 +125,8 @@ class StreamExecution(
   /* Get the call site in the caller thread; will pass this into the micro batch thread */
   private val callSite = Utils.getCallSite()
 
-  /** Metrics for this query */
-  private val streamMetrics =
-    new StreamMetrics(uniqueSources.toSet, triggerClock, s"StructuredStreaming.$name")
-
-  @volatile
-  private var currentStatus: StreamingQueryStatus = null
-
-  /** Flag that signals whether any error with input metrics have already been logged */
-  @volatile
-  private var metricWarningLogged: Boolean = false
+  /** Used to report metrics to coda-hale. */
+  lazy val streamMetrics = new MetricsReporter(this, s"spark.streaming.$name")
 
   /**
    * The thread that runs the micro-batches of this stream. Note that this thread must be
@@ -158,15 +154,6 @@ class StreamExecution(
   /** Whether the query is currently active or not */
   override def isActive: Boolean = state == ACTIVE
 
-  /** Returns the current status of the query. */
-  override def status: StreamingQueryStatus = currentStatus
-
-  /** Returns current status of all the sources. */
-  override def sourceStatuses: Array[SourceStatus] = currentStatus.sourceStatuses.toArray
-
-  /** Returns current status of the sink. */
-  override def sinkStatus: SinkStatus = currentStatus.sinkStatus
-
   /** Returns the [[StreamingQueryException]] if the query was terminated by an exception. */
   override def exception: Option[StreamingQueryException] = Option(streamDeathCause)
 
@@ -200,8 +187,8 @@ class StreamExecution(
       if (sparkSession.sessionState.conf.streamingMetricsEnabled) {
         sparkSession.sparkContext.env.metricsSystem.registerSource(streamMetrics)
       }
-      updateStatus()
-      postEvent(new QueryStartedEvent(currentStatus)) // Assumption: Does not throw exception.
+
+      postEvent(new QueryStartedEvent(id, name)) // Assumption: Does not throw exception.
 
       // Unblock starting thread
       startLatch.countDown()
@@ -210,40 +197,45 @@ class StreamExecution(
       SparkSession.setActiveSession(sparkSession)
 
       triggerExecutor.execute(() => {
-        streamMetrics.reportTriggerStarted(currentBatchId)
-        streamMetrics.reportTriggerDetail(STATUS_MESSAGE, "Finding new data from sources")
-        updateStatus()
-        val isTerminated = reportTimeTaken(TRIGGER_LATENCY) {
+        startTrigger()
+
+        val isTerminated =
           if (isActive) {
-            if (currentBatchId < 0) {
-              // We'll do this initialization only once
-              populateStartOffsets()
-              logDebug(s"Stream running from $committedOffsets to $availableOffsets")
-            } else {
-              constructNextBatch()
+            reportTimeTaken("triggerExecution") {
+              if (currentBatchId < 0) {
+                // We'll do this initialization only once
+                populateStartOffsets()
+                logDebug(s"Stream running from $committedOffsets to $availableOffsets")
+              } else {
+                constructNextBatch()
+              }
+              if (dataAvailable) {
+                currentStatus = currentStatus.copy(isDataAvailable = true)
+                updateStatusMessage("Processing new data")
+                runBatch()
+              }
             }
+
+            // Report trigger as finished and construct progress object.
+            finishTrigger(dataAvailable)
+            postEvent(new QueryProgressEvent(lastProgress))
+
             if (dataAvailable) {
-              streamMetrics.reportTriggerDetail(IS_DATA_PRESENT_IN_TRIGGER, true)
-              streamMetrics.reportTriggerDetail(STATUS_MESSAGE, "Processing new data")
-              updateStatus()
-              runBatch()
               // We'll increase currentBatchId after we complete processing current batch's data
               currentBatchId += 1
             } else {
-              streamMetrics.reportTriggerDetail(IS_DATA_PRESENT_IN_TRIGGER, false)
-              streamMetrics.reportTriggerDetail(STATUS_MESSAGE, "No new data")
-              updateStatus()
+              currentStatus = currentStatus.copy(isDataAvailable = false)
+              updateStatusMessage("Waiting for data to arrive")
               Thread.sleep(pollingDelayMs)
             }
             true
           } else {
             false
           }
-        }
-        // Update metrics and notify others
-        streamMetrics.reportTriggerFinished()
-        updateStatus()
-        postEvent(new QueryProgressEvent(currentStatus))
+
+        // Update committed offsets.
+        committedOffsets ++= availableOffsets
+        updateStatusMessage("Waiting for next trigger")
         isTerminated
       })
     } catch {
@@ -264,14 +256,12 @@ class StreamExecution(
       state = TERMINATED
 
       // Update metrics and status
-      streamMetrics.stop()
       sparkSession.sparkContext.env.metricsSystem.removeSource(streamMetrics)
-      updateStatus()
 
       // Notify others
       sparkSession.streams.notifyQueryTermination(StreamExecution.this)
       postEvent(
-        new QueryTerminatedEvent(currentStatus, exception.map(_.cause).map(Utils.exceptionString)))
+       new QueryTerminatedEvent(id, exception.map(_.cause).map(Utils.exceptionString)))
       terminationLatch.countDown()
     }
   }
@@ -328,14 +318,13 @@ class StreamExecution(
     val hasNewData = {
       awaitBatchLock.lock()
       try {
-        reportTimeTaken(GET_OFFSET_LATENCY) {
-          val latestOffsets: Map[Source, Option[Offset]] = uniqueSources.map { s =>
-            reportTimeTaken(s, SOURCE_GET_OFFSET_LATENCY) {
-              (s, s.getOffset)
-            }
-          }.toMap
-          availableOffsets ++= latestOffsets.filter { case (s, o) => o.nonEmpty }.mapValues(_.get)
-        }
+        val latestOffsets: Map[Source, Option[Offset]] = uniqueSources.map { s =>
+          updateStatusMessage(s"Getting offsets from $s")
+          reportTimeTaken("getOffset") {
+            (s, s.getOffset)
+          }
+        }.toMap
+        availableOffsets ++= latestOffsets.filter { case (s, o) => o.nonEmpty }.mapValues(_.get)
 
         if (dataAvailable) {
           true
@@ -350,8 +339,10 @@ class StreamExecution(
     if (hasNewData) {
       // Current batch timestamp in milliseconds
       streamExecutionMetadata.batchTimestampMs = triggerClock.getTimeMillis()
-      reportTimeTaken(OFFSET_WAL_WRITE_LATENCY) {
-        assert(offsetLog.add(currentBatchId,
+      updateStatusMessage("Writing offsets to log")
+      reportTimeTaken("walCommit") {
+        assert(offsetLog.add(
+          currentBatchId,
           availableOffsets.toOffsetSeq(sources, streamExecutionMetadata.json)),
           s"Concurrent update to the log. Multiple streaming jobs detected for $currentBatchId")
         logInfo(s"Committed offsets for batch $currentBatchId. " +
@@ -384,30 +375,24 @@ class StreamExecution(
         awaitBatchLock.unlock()
       }
     }
-    reportTimestamp(GET_OFFSET_TIMESTAMP)
   }
 
   /**
    * Processes any data available between `availableOffsets` and `committedOffsets`.
    */
   private def runBatch(): Unit = {
-    // TODO: Move this to IncrementalExecution.
-
     // Request unprocessed data from all sources.
-    val newData = reportTimeTaken(GET_BATCH_LATENCY) {
+    newData = reportTimeTaken("getBatch") {
       availableOffsets.flatMap {
         case (source, available)
           if committedOffsets.get(source).map(_ != available).getOrElse(true) =>
           val current = committedOffsets.get(source)
-          val batch = reportTimeTaken(source, SOURCE_GET_BATCH_LATENCY) {
-            source.getBatch(current, available)
-          }
+          val batch = source.getBatch(current, available)
           logDebug(s"Retrieving data from $source: $current -> $available")
           Some(source -> batch)
         case _ => None
       }
     }
-    reportTimestamp(GET_BATCH_TIMESTAMP)
 
     // A list of attributes that will need to be updated.
     var replacements = new ArrayBuffer[(Attribute, Attribute)]
@@ -438,7 +423,7 @@ class StreamExecution(
           cd.dataType)
     }
 
-    val executedPlan = reportTimeTaken(OPTIMIZER_LATENCY) {
+    val executedPlan = reportTimeTaken("queryPlanning") {
       lastExecution = new IncrementalExecution(
         sparkSession,
         triggerLogicalPlan,
@@ -451,11 +436,12 @@ class StreamExecution(
 
     val nextBatch =
       new Dataset(sparkSession, lastExecution, RowEncoder(lastExecution.analyzed.schema))
-    sink.addBatch(currentBatchId, nextBatch)
-    reportNumRows(executedPlan, triggerLogicalPlan, newData)
+
+    reportTimeTaken("addBatch") {
+      sink.addBatch(currentBatchId, nextBatch)
+    }
 
     // Update the eventTime watermark if we find one in the plan.
-    // TODO: Does this need to be an AttributeMap?
     lastExecution.executedPlan.collect {
       case e: EventTimeWatermarkExec =>
         logTrace(s"Maximum observed eventTime: ${e.maxEventTime.value}")
@@ -468,10 +454,6 @@ class StreamExecution(
         logTrace(s"Event time didn't move: $newWatermark < " +
           s"$streamExecutionMetadata.currentEventTimeWatermark")
       }
-
-      if (newWatermark != 0) {
-        streamMetrics.reportTriggerDetail(EVENT_TIME_WATERMARK, newWatermark)
-      }
     }
 
     awaitBatchLock.lock()
@@ -481,9 +463,6 @@ class StreamExecution(
     } finally {
       awaitBatchLock.unlock()
     }
-
-    // Update committed offsets.
-    committedOffsets ++= availableOffsets
   }
 
   private def postEvent(event: StreamingQueryListener.Event) {
@@ -616,145 +595,12 @@ class StreamExecution(
      """.stripMargin
   }
 
-  /**
-   * Report row metrics of the executed trigger
-   * @param triggerExecutionPlan Execution plan of the trigger
-   * @param triggerLogicalPlan Logical plan of the trigger, generated from the query logical plan
-   * @param sourceToDF Source to DataFrame returned by the source.getBatch in this trigger
-   */
-  private def reportNumRows(
-      triggerExecutionPlan: SparkPlan,
-      triggerLogicalPlan: LogicalPlan,
-      sourceToDF: Map[Source, DataFrame]): Unit = {
-    // We want to associate execution plan leaves to sources that generate them, so that we match
-    // the their metrics (e.g. numOutputRows) to the sources. To do this we do the following.
-    // Consider the translation from the streaming logical plan to the final executed plan.
-    //
-    //  streaming logical plan (with sources) <==> trigger's logical plan <==> executed plan
-    //
-    // 1. We keep track of streaming sources associated with each leaf in the trigger's logical plan
-    //    - Each logical plan leaf will be associated with a single streaming source.
-    //    - There can be multiple logical plan leaves associated with a streaming source.
-    //    - There can be leaves not associated with any streaming source, because they were
-    //      generated from a batch source (e.g. stream-batch joins)
-    //
-    // 2. Assuming that the executed plan has same number of leaves in the same order as that of
-    //    the trigger logical plan, we associate executed plan leaves with corresponding
-    //    streaming sources.
-    //
-    // 3. For each source, we sum the metrics of the associated execution plan leaves.
-    //
-    val logicalPlanLeafToSource = sourceToDF.flatMap { case (source, df) =>
-      df.logicalPlan.collectLeaves().map { leaf => leaf -> source }
-    }
-    val allLogicalPlanLeaves = triggerLogicalPlan.collectLeaves() // includes non-streaming sources
-    val allExecPlanLeaves = triggerExecutionPlan.collectLeaves()
-    val sourceToNumInputRows: Map[Source, Long] =
-      if (allLogicalPlanLeaves.size == allExecPlanLeaves.size) {
-        val execLeafToSource = allLogicalPlanLeaves.zip(allExecPlanLeaves).flatMap {
-          case (lp, ep) => logicalPlanLeafToSource.get(lp).map { source => ep -> source }
-        }
-        val sourceToNumInputRows = execLeafToSource.map { case (execLeaf, source) =>
-          val numRows = execLeaf.metrics.get("numOutputRows").map(_.value).getOrElse(0L)
-          source -> numRows
-        }
-        sourceToNumInputRows.groupBy(_._1).mapValues(_.map(_._2).sum) // sum up rows for each source
-      } else {
-        if (!metricWarningLogged) {
-          def toString[T](seq: Seq[T]): String = s"(size = ${seq.size}), ${seq.mkString(", ")}"
-          logWarning(
-            "Could not report metrics as number leaves in trigger logical plan did not match that" +
-              s" of the execution plan:\n" +
-              s"logical plan leaves: ${toString(allLogicalPlanLeaves)}\n" +
-              s"execution plan leaves: ${toString(allExecPlanLeaves)}\n")
-          metricWarningLogged = true
-        }
-        Map.empty
-      }
-    val numOutputRows = triggerExecutionPlan.metrics.get("numOutputRows").map(_.value)
-    val stateNodes = triggerExecutionPlan.collect {
-      case p if p.isInstanceOf[StateStoreSaveExec] => p
-    }
-
-    streamMetrics.reportNumInputRows(sourceToNumInputRows)
-    stateNodes.zipWithIndex.foreach { case (s, i) =>
-      streamMetrics.reportTriggerDetail(
-        NUM_TOTAL_STATE_ROWS(i + 1),
-        s.metrics.get("numTotalStateRows").map(_.value).getOrElse(0L))
-      streamMetrics.reportTriggerDetail(
-        NUM_UPDATED_STATE_ROWS(i + 1),
-        s.metrics.get("numUpdatedStateRows").map(_.value).getOrElse(0L))
-    }
-    updateStatus()
-  }
-
-  private def reportTimeTaken[T](triggerDetailKey: String)(body: => T): T = {
-    val startTime = triggerClock.getTimeMillis()
-    val result = body
-    val endTime = triggerClock.getTimeMillis()
-    val timeTaken = math.max(endTime - startTime, 0)
-    streamMetrics.reportTriggerDetail(triggerDetailKey, timeTaken)
-    updateStatus()
-    if (triggerDetailKey == TRIGGER_LATENCY) {
-      logInfo(s"Completed up to $availableOffsets in $timeTaken ms")
-    }
-    result
-  }
-
-  private def reportTimeTaken[T](source: Source, triggerDetailKey: String)(body: => T): T = {
-    val startTime = triggerClock.getTimeMillis()
-    val result = body
-    val endTime = triggerClock.getTimeMillis()
-    streamMetrics.reportSourceTriggerDetail(
-      source, triggerDetailKey, math.max(endTime - startTime, 0))
-    updateStatus()
-    result
-  }
-
-  private def reportTimestamp(triggerDetailKey: String): Unit = {
-    streamMetrics.reportTriggerDetail(triggerDetailKey, triggerClock.getTimeMillis)
-    updateStatus()
-  }
-
-  private def updateStatus(): Unit = {
-    val localAvailableOffsets = availableOffsets
-    val sourceStatuses = sources.map { s =>
-      SourceStatus(
-        s.toString,
-        localAvailableOffsets.get(s).map(_.json).getOrElse("-"),
-        streamMetrics.currentSourceInputRate(s),
-        streamMetrics.currentSourceProcessingRate(s),
-        streamMetrics.currentSourceTriggerDetails(s))
-    }.toArray
-    val sinkStatus = SinkStatus(
-      sink.toString,
-      committedOffsets.toOffsetSeq(sources, streamExecutionMetadata.json).toString)
-
-    currentStatus =
-      StreamingQueryStatus(
-        name = name,
-        id = id,
-        timestamp = triggerClock.getTimeMillis(),
-        inputRate = streamMetrics.currentInputRate(),
-        processingRate = streamMetrics.currentProcessingRate(),
-        latency = streamMetrics.currentLatency(),
-        sourceStatuses = sourceStatuses,
-        sinkStatus = sinkStatus,
-        triggerDetails = streamMetrics.currentTriggerDetails())
-  }
-
   trait State
   case object INITIALIZED extends State
   case object ACTIVE extends State
   case object TERMINATED extends State
 }
 
-object StreamExecution {
-  private val _nextId = new AtomicLong(0)
-
-  def nextId: Long = _nextId.getAndIncrement()
-}
-
 /**
  * Contains metadata associated with a stream execution. This information is
  * persisted to the offset log via the OffsetSeq metadata field. Current
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetrics.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetrics.scala
deleted file mode 100644
index 942e6ed8944be..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetrics.scala
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.streaming
-
-import java.{util => ju}
-
-import scala.collection.mutable
-
-import com.codahale.metrics.{Gauge, MetricRegistry}
-
-import org.apache.spark.internal.Logging
-import org.apache.spark.metrics.source.{Source => CodahaleSource}
-import org.apache.spark.util.Clock
-
-/**
- * Class that manages all the metrics related to a StreamingQuery. It does the following.
- * - Calculates metrics (rates, latencies, etc.) based on information reported by StreamExecution.
- * - Allows the current metric values to be queried
- * - Serves some of the metrics through Codahale/DropWizard metrics
- *
- * @param sources Unique set of sources in a query
- * @param triggerClock Clock used for triggering in StreamExecution
- * @param codahaleSourceName Root name for all the Codahale metrics
- */
-class StreamMetrics(sources: Set[Source], triggerClock: Clock, codahaleSourceName: String)
-  extends CodahaleSource with Logging {
-
-  import StreamMetrics._
-
-  // Trigger infos
-  private val triggerDetails = new mutable.HashMap[String, String]
-  private val sourceTriggerDetails = new mutable.HashMap[Source, mutable.HashMap[String, String]]
-
-  // Rate estimators for sources and sinks
-  private val inputRates = new mutable.HashMap[Source, RateCalculator]
-  private val processingRates = new mutable.HashMap[Source, RateCalculator]
-
-  // Number of input rows in the current trigger
-  private val numInputRows = new mutable.HashMap[Source, Long]
-  private var currentTriggerStartTimestamp: Long = -1
-  private var previousTriggerStartTimestamp: Long = -1
-  private var latency: Option[Double] = None
-
-  override val sourceName: String = codahaleSourceName
-  override val metricRegistry: MetricRegistry = new MetricRegistry
-
-  // =========== Initialization ===========
-
-  // Metric names should not have . in them, so that all the metrics of a query are identified
-  // together in Ganglia as a single metric group
-  registerGauge("inputRate-total", currentInputRate)
-  registerGauge("processingRate-total", () => currentProcessingRate)
-  registerGauge("latency", () => currentLatency().getOrElse(-1.0))
-
-  sources.foreach { s =>
-    inputRates.put(s, new RateCalculator)
-    processingRates.put(s, new RateCalculator)
-    sourceTriggerDetails.put(s, new mutable.HashMap[String, String])
-
-    registerGauge(s"inputRate-${s.toString}", () => currentSourceInputRate(s))
-    registerGauge(s"processingRate-${s.toString}", () => currentSourceProcessingRate(s))
-  }
-
-  // =========== Setter methods ===========
-
-  def reportTriggerStarted(batchId: Long): Unit = synchronized {
-    numInputRows.clear()
-    triggerDetails.clear()
-    sourceTriggerDetails.values.foreach(_.clear())
-
-    reportTriggerDetail(BATCH_ID, batchId)
-    sources.foreach(s => reportSourceTriggerDetail(s, BATCH_ID, batchId))
-    reportTriggerDetail(IS_TRIGGER_ACTIVE, true)
-    currentTriggerStartTimestamp = triggerClock.getTimeMillis()
-    reportTriggerDetail(START_TIMESTAMP, currentTriggerStartTimestamp)
-  }
-
-  def reportTriggerDetail[T](key: String, value: T): Unit = synchronized {
-    triggerDetails.put(key, value.toString)
-  }
-
-  def reportSourceTriggerDetail[T](source: Source, key: String, value: T): Unit = synchronized {
-    sourceTriggerDetails(source).put(key, value.toString)
-  }
-
-  def reportNumInputRows(inputRows: Map[Source, Long]): Unit = synchronized {
-    numInputRows ++= inputRows
-  }
-
-  def reportTriggerFinished(): Unit = synchronized {
-    require(currentTriggerStartTimestamp >= 0)
-    val currentTriggerFinishTimestamp = triggerClock.getTimeMillis()
-    reportTriggerDetail(FINISH_TIMESTAMP, currentTriggerFinishTimestamp)
-    triggerDetails.remove(STATUS_MESSAGE)
-    reportTriggerDetail(IS_TRIGGER_ACTIVE, false)
-
-    // Report number of rows
-    val totalNumInputRows = numInputRows.values.sum
-    reportTriggerDetail(NUM_INPUT_ROWS, totalNumInputRows)
-    numInputRows.foreach { case (s, r) =>
-      reportSourceTriggerDetail(s, NUM_SOURCE_INPUT_ROWS, r)
-    }
-
-    val currentTriggerDuration = currentTriggerFinishTimestamp - currentTriggerStartTimestamp
-    val previousInputIntervalOption = if (previousTriggerStartTimestamp >= 0) {
-      Some(currentTriggerStartTimestamp - previousTriggerStartTimestamp)
-    } else None
-
-    // Update input rate = num rows received by each source during the previous trigger interval
-    // Interval is measures as interval between start times of previous and current trigger.
-    //
-    // TODO: Instead of trigger start, we should use time when getOffset was called on each source
-    // as this may be different for each source if there are many sources in the query plan
-    // and getOffset is called serially on them.
-    if (previousInputIntervalOption.nonEmpty) {
-      sources.foreach { s =>
-        inputRates(s).update(numInputRows.getOrElse(s, 0), previousInputIntervalOption.get)
-      }
-    }
-
-    // Update processing rate = num rows processed for each source in current trigger duration
-    sources.foreach { s =>
-      processingRates(s).update(numInputRows.getOrElse(s, 0), currentTriggerDuration)
-    }
-
-    // Update latency = if data present, 0.5 * previous trigger interval + current trigger duration
-    if (previousInputIntervalOption.nonEmpty && totalNumInputRows > 0) {
-      latency = Some((previousInputIntervalOption.get.toDouble / 2) + currentTriggerDuration)
-    } else {
-      latency = None
-    }
-
-    previousTriggerStartTimestamp = currentTriggerStartTimestamp
-    currentTriggerStartTimestamp = -1
-  }
-
-  // =========== Getter methods ===========
-
-  def currentInputRate(): Double = synchronized {
-    // Since we are calculating source input rates using the same time interval for all sources
-    // it is fine to calculate total input rate as the sum of per source input rate.
-    inputRates.map(_._2.currentRate).sum
-  }
-
-  def currentSourceInputRate(source: Source): Double = synchronized {
-    inputRates(source).currentRate
-  }
-
-  def currentProcessingRate(): Double = synchronized {
-    // Since we are calculating source processing rates using the same time interval for all sources
-    // it is fine to calculate total processing rate as the sum of per source processing rate.
-    processingRates.map(_._2.currentRate).sum
-  }
-
-  def currentSourceProcessingRate(source: Source): Double = synchronized {
-    processingRates(source).currentRate
-  }
-
-  def currentLatency(): Option[Double] = synchronized { latency }
-
-  def currentTriggerDetails(): Map[String, String] = synchronized { triggerDetails.toMap }
-
-  def currentSourceTriggerDetails(source: Source): Map[String, String] = synchronized {
-    sourceTriggerDetails(source).toMap
-  }
-
-  // =========== Other methods ===========
-
-  private def registerGauge[T](name: String, f: () => T)(implicit num: Numeric[T]): Unit = {
-    synchronized {
-      metricRegistry.register(name, new Gauge[T] {
-        override def getValue: T = f()
-      })
-    }
-  }
-
-  def stop(): Unit = synchronized {
-    triggerDetails.clear()
-    inputRates.valuesIterator.foreach { _.stop() }
-    processingRates.valuesIterator.foreach { _.stop() }
-    latency = None
-  }
-}
-
-object StreamMetrics extends Logging {
-  /** Simple utility class to calculate rate while avoiding DivideByZero */
-  class RateCalculator {
-    @volatile private var rate: Option[Double] = None
-
-    def update(numRows: Long, timeGapMs: Long): Unit = {
-      if (timeGapMs > 0) {
-        rate = Some(numRows.toDouble * 1000 / timeGapMs)
-      } else {
-        rate = None
-        logDebug(s"Rate updates cannot with zero or negative time gap $timeGapMs")
-      }
-    }
-
-    def currentRate: Double = rate.getOrElse(0.0)
-
-    def stop(): Unit = { rate = None }
-  }
-
-
-  val BATCH_ID = "batchId"
-  val IS_TRIGGER_ACTIVE = "isTriggerActive"
-  val IS_DATA_PRESENT_IN_TRIGGER = "isDataPresentInTrigger"
-  val STATUS_MESSAGE = "statusMessage"
-  val EVENT_TIME_WATERMARK = "eventTimeWatermark"
-
-  val START_TIMESTAMP = "timestamp.triggerStart"
-  val GET_OFFSET_TIMESTAMP = "timestamp.afterGetOffset"
-  val GET_BATCH_TIMESTAMP = "timestamp.afterGetBatch"
-  val FINISH_TIMESTAMP = "timestamp.triggerFinish"
-
-  val GET_OFFSET_LATENCY = "latency.getOffset.total"
-  val GET_BATCH_LATENCY = "latency.getBatch.total"
-  val OFFSET_WAL_WRITE_LATENCY = "latency.offsetLogWrite"
-  val OPTIMIZER_LATENCY = "latency.optimizer"
-  val TRIGGER_LATENCY = "latency.fullTrigger"
-  val SOURCE_GET_OFFSET_LATENCY = "latency.getOffset.source"
-  val SOURCE_GET_BATCH_LATENCY = "latency.getBatch.source"
-
-  val NUM_INPUT_ROWS = "numRows.input.total"
-  val NUM_SOURCE_INPUT_ROWS = "numRows.input.source"
-  def NUM_TOTAL_STATE_ROWS(aggId: Int): String = s"numRows.state.aggregation$aggId.total"
-  def NUM_UPDATED_STATE_ROWS(aggId: Int): String = s"numRows.state.aggregation$aggId.updated"
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 5589805212b7e..21b26b81467fe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -583,6 +583,12 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val STREAMING_PROGRESS_RETENTION =
+    SQLConfigBuilder("spark.sql.streaming.numRecentProgresses")
+      .doc("The number of progress updates to retain for a streaming query")
+      .intConf
+      .createWithDefault(100)
+
   val NDV_MAX_ERROR =
     SQLConfigBuilder("spark.sql.statistics.ndv.maxError")
       .internal()
@@ -654,6 +660,8 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging {
 
   def streamingMetricsEnabled: Boolean = getConf(STREAMING_METRICS_ENABLED)
 
+  def streamingProgressRetention: Int = getConf(STREAMING_PROGRESS_RETENTION)
+
   def filesMaxPartitionBytes: Long = getConf(FILES_MAX_PARTITION_BYTES)
 
   def filesOpenCostInBytes: Long = getConf(FILES_OPEN_COST_IN_BYTES)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/SinkStatus.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/SinkStatus.scala
deleted file mode 100644
index ab19602207ad8..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/SinkStatus.scala
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.streaming
-
-import org.json4s._
-import org.json4s.JsonAST.JValue
-import org.json4s.JsonDSL._
-import org.json4s.jackson.JsonMethods._
-
-import org.apache.spark.annotation.Experimental
-import org.apache.spark.sql.streaming.StreamingQueryStatus.indent
-
-/**
- * :: Experimental ::
- * Status and metrics of a streaming sink.
- *
- * @param description Description of the source corresponding to this status.
- * @param offsetDesc Description of the current offsets up to which data has been written
- *                   by the sink.
- * @since 2.0.0
- */
-@Experimental
-class SinkStatus private(
-    val description: String,
-    val offsetDesc: String) {
-
-  /** The compact JSON representation of this status. */
-  def json: String = compact(render(jsonValue))
-
-  /** The pretty (i.e. indented) JSON representation of this status. */
-  def prettyJson: String = pretty(render(jsonValue))
-
-  override def toString: String =
-    "Status of sink " + indent(prettyString).trim
-
-  private[sql] def jsonValue: JValue = {
-    ("description" -> JString(description)) ~
-    ("offsetDesc" -> JString(offsetDesc))
-  }
-
-  private[sql] def prettyString: String = {
-    s"""$description
-       |Committed offsets: $offsetDesc
-       |""".stripMargin
-  }
-}
-
-/** Companion object, primarily for creating SinkStatus instances internally */
-private[sql] object SinkStatus {
-  def apply(desc: String, offsetDesc: String): SinkStatus = new SinkStatus(desc, offsetDesc)
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/SourceStatus.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/SourceStatus.scala
deleted file mode 100644
index cfdf11370e06d..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/SourceStatus.scala
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.streaming
-
-import java.{util => ju}
-
-import scala.collection.JavaConverters._
-
-import org.json4s._
-import org.json4s.JsonAST.JValue
-import org.json4s.JsonDSL._
-import org.json4s.jackson.JsonMethods._
-
-import org.apache.spark.annotation.Experimental
-import org.apache.spark.sql.streaming.StreamingQueryStatus.indent
-import org.apache.spark.util.JsonProtocol
-
-/**
- * :: Experimental ::
- * Status and metrics of a streaming Source.
- *
- * @param description Description of the source corresponding to this status.
- * @param offsetDesc Description of the current offset if known.
- * @param inputRate Current rate (rows/sec) at which data is being generated by the source.
- * @param processingRate Current rate (rows/sec) at which the query is processing data from
- *                       the source.
- * @param triggerDetails Low-level details of the currently active trigger (e.g. number of
- *                      rows processed in trigger, latency of intermediate steps, etc.).
- *                      If no trigger is active, then it will have details of the last completed
- *                      trigger.
- * @since 2.0.0
- */
-@Experimental
-class SourceStatus private(
-    val description: String,
-    val offsetDesc: String,
-    val inputRate: Double,
-    val processingRate: Double,
-    val triggerDetails: ju.Map[String, String]) {
-
-  /** The compact JSON representation of this status. */
-  def json: String = compact(render(jsonValue))
-
-  /** The pretty (i.e. indented) JSON representation of this status. */
-  def prettyJson: String = pretty(render(jsonValue))
-
-  override def toString: String =
-    "Status of source " + indent(prettyString).trim
-
-  private[sql] def jsonValue: JValue = {
-    ("description" -> JString(description)) ~
-    ("offsetDesc" -> JString(offsetDesc)) ~
-    ("inputRate" -> JDouble(inputRate)) ~
-    ("processingRate" -> JDouble(processingRate)) ~
-    ("triggerDetails" -> JsonProtocol.mapToJson(triggerDetails.asScala))
-  }
-
-  private[sql] def prettyString: String = {
-    val triggerDetailsLines =
-      triggerDetails.asScala.map { case (k, v) => s"$k: $v" }
-    s"""$description
-       |Available offset: $offsetDesc
-       |Input rate: $inputRate rows/sec
-       |Processing rate: $processingRate rows/sec
-       |Trigger details:
-       |""".stripMargin + indent(triggerDetailsLines)
-  }
-}
-
-/** Companion object, primarily for creating SourceStatus instances internally */
-private[sql] object SourceStatus {
-  def apply(
-      desc: String,
-      offsetDesc: String,
-      inputRate: Double,
-      processingRate: Double,
-      triggerDetails: Map[String, String]): SourceStatus = {
-    new SourceStatus(desc, offsetDesc, inputRate, processingRate, triggerDetails.asJava)
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
index 374313f2ca9ab..8fc4e43b6de53 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.streaming
 
+import java.util.UUID
+
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.sql.SparkSession
 
@@ -33,25 +35,27 @@ trait StreamingQuery {
    * Returns the name of the query. This name is unique across all active queries. This can be
    * set in the `org.apache.spark.sql.streaming.DataStreamWriter` as
    * `dataframe.writeStream.queryName("query").start()`.
+   *
    * @since 2.0.0
    */
   def name: String
 
   /**
-   * Returns the unique id of this query. This id is automatically generated and is unique across
-   * all queries that have been started in the current process.
-   * @since 2.0.0
+   * Returns the unique id of this query.
+   * @since 2.1.0
    */
-  def id: Long
+  def id: UUID
 
   /**
    * Returns the `SparkSession` associated with `this`.
+   *
    * @since 2.0.0
    */
   def sparkSession: SparkSession
 
   /**
-   * Whether the query is currently active or not
+   * Returns `true` if this query is actively running.
+   *
    * @since 2.0.0
    */
   def isActive: Boolean
@@ -64,23 +68,26 @@ trait StreamingQuery {
 
   /**
    * Returns the current status of the query.
+   *
    * @since 2.0.2
    */
   def status: StreamingQueryStatus
 
   /**
-   * Returns current status of all the sources.
-   * @since 2.0.0
+   * Returns an array of the most recent [[StreamingQueryProgress]] updates for this query.
+   * The number of progress updates retained for each stream is configured by Spark session
+   * configuration `spark.sql.streaming.numRecentProgresses`.
+   *
+   * @since 2.1.0
    */
-  @deprecated("use status.sourceStatuses", "2.0.2")
-  def sourceStatuses: Array[SourceStatus]
+  def recentProgresses: Array[StreamingQueryProgress]
 
   /**
-   * Returns current status of the sink.
-   * @since 2.0.0
+   * Returns the most recent [[StreamingQueryProgress]] update of this streaming query.
+   *
+   * @since 2.1.0
    */
-  @deprecated("use status.sinkStatus", "2.0.2")
-  def sinkStatus: SinkStatus
+  def lastProgress: StreamingQueryProgress
 
   /**
    * Waits for the termination of `this` query, either by `query.stop()` or by an exception.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala
index 0a58142e066ac..13f11ba1c9222 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.execution.streaming.{Offset, OffsetSeq, StreamExecut
  * :: Experimental ::
  * Exception that stopped a [[StreamingQuery]]. Use `cause` get the actual exception
  * that caused the failure.
- * @param query      Query that caused the exception
+ * @param query       Query that caused the exception
  * @param message     Message of this exception
  * @param cause       Internal cause of this exception
  * @param startOffset Starting offset (if known) of the range of data in which exception occurred
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
index 9e311fae842be..d9ee75c064065 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.streaming
 
+import java.util.UUID
+
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.scheduler.SparkListenerEvent
 
@@ -81,30 +83,28 @@ object StreamingQueryListener {
   /**
    * :: Experimental ::
    * Event representing the start of a query
-   * @since 2.0.0
+   * @since 2.1.0
    */
   @Experimental
-  class QueryStartedEvent private[sql](val queryStatus: StreamingQueryStatus) extends Event
+  class QueryStartedEvent private[sql](val id: UUID, val name: String) extends Event
 
   /**
    * :: Experimental ::
-   * Event representing any progress updates in a query
-   * @since 2.0.0
+   * Event representing any progress updates in a query.
+   * @since 2.1.0
    */
   @Experimental
-  class QueryProgressEvent private[sql](val queryStatus: StreamingQueryStatus) extends Event
+  class QueryProgressEvent private[sql](val progress: StreamingQueryProgress) extends Event
 
   /**
    * :: Experimental ::
-   * Event representing that termination of a query
+   * Event representing that termination of a query.
    *
-   * @param queryStatus Information about the status of the query.
-   * @param exception The exception message of the [[StreamingQuery]] if the query was terminated
+   * @param id The query id.
+   * @param exception The exception message of the query if the query was terminated
    *                  with an exception. Otherwise, it will be `None`.
-   * @since 2.0.0
+   * @since 2.1.0
    */
   @Experimental
-  class QueryTerminatedEvent private[sql](
-      val queryStatus: StreamingQueryStatus,
-      val exception: Option[String]) extends Event
+  class QueryTerminatedEvent private[sql](val id: UUID, val exception: Option[String]) extends Event
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index 53968a82d8e22..c448468bea519 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -17,6 +17,9 @@
 
 package org.apache.spark.sql.streaming
 
+import java.util.UUID
+import java.util.concurrent.atomic.AtomicLong
+
 import scala.collection.mutable
 
 import org.apache.hadoop.fs.Path
@@ -41,7 +44,7 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) {
   private[sql] val stateStoreCoordinator =
     StateStoreCoordinatorRef.forDriver(sparkSession.sparkContext.env)
   private val listenerBus = new StreamingQueryListenerBus(sparkSession.sparkContext.listenerBus)
-  private val activeQueries = new mutable.HashMap[Long, StreamingQuery]
+  private val activeQueries = new mutable.HashMap[UUID, StreamingQuery]
   private val activeQueriesLock = new Object
   private val awaitTerminationLock = new Object
 
@@ -59,12 +62,19 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) {
   /**
    * Returns the query if there is an active query with the given id, or null.
    *
-   * @since 2.0.0
+   * @since 2.1.0
    */
-  def get(id: Long): StreamingQuery = activeQueriesLock.synchronized {
+  def get(id: UUID): StreamingQuery = activeQueriesLock.synchronized {
     activeQueries.get(id).orNull
   }
 
+  /**
+   * Returns the query if there is an active query with the given id, or null.
+   *
+   * @since 2.1.0
+   */
+  def get(id: String): StreamingQuery = get(UUID.fromString(id))
+
   /**
    * Wait until any of the queries on the associated SQLContext has terminated since the
    * creation of the context, or since `resetTerminated()` was called. If any query was terminated
@@ -197,8 +207,7 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) {
       trigger: Trigger = ProcessingTime(0),
       triggerClock: Clock = new SystemClock()): StreamingQuery = {
     activeQueriesLock.synchronized {
-      val id = StreamExecution.nextId
-      val name = userSpecifiedName.getOrElse(s"query-$id")
+      val name = userSpecifiedName.getOrElse(s"query-${StreamingQueryManager.nextId}")
       if (activeQueries.values.exists(_.name == name)) {
         throw new IllegalArgumentException(
           s"Cannot start query with name $name as a query with that name is already active")
@@ -252,7 +261,6 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) {
       }
       val query = new StreamExecution(
         sparkSession,
-        id,
         name,
         checkpointLocation,
         logicalPlan,
@@ -261,7 +269,7 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) {
         triggerClock,
         outputMode)
       query.start()
-      activeQueries.put(id, query)
+      activeQueries.put(query.id, query)
       query
     }
   }
@@ -279,3 +287,8 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) {
     }
   }
 }
+
+private object StreamingQueryManager {
+  private val _nextId = new AtomicLong(0)
+  private def nextId: Long = _nextId.getAndIncrement()
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
index ba732ff7fc2ce..4c1a7ce6a03fb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
@@ -17,146 +17,17 @@
 
 package org.apache.spark.sql.streaming
 
-import java.{util => ju}
-
-import scala.collection.JavaConverters._
-
-import org.json4s._
-import org.json4s.JsonAST.JValue
-import org.json4s.JsonDSL._
-import org.json4s.jackson.JsonMethods._
-
-import org.apache.spark.annotation.Experimental
-import org.apache.spark.sql.execution.streaming.{LongOffset, OffsetSeq}
-import org.apache.spark.util.JsonProtocol
-
 /**
- * :: Experimental ::
- * A class used to report information about the progress of a [[StreamingQuery]].
+ * Reports information about the instantaneous status of a streaming query.
  *
- * @param name Name of the query. This name is unique across all active queries.
- * @param id Id of the query. This id is unique across
- *          all queries that have been started in the current process.
- * @param timestamp Timestamp (ms) of when this query was generated.
- * @param inputRate Current rate (rows/sec) at which data is being generated by all the sources.
- * @param processingRate Current rate (rows/sec) at which the query is processing data from
- *                       all the sources.
- * @param latency  Current average latency between the data being available in source and the sink
- *                   writing the corresponding output.
- * @param sourceStatuses Current statuses of the sources.
- * @param sinkStatus Current status of the sink.
- * @param triggerDetails Low-level details of the currently active trigger (e.g. number of
- *                      rows processed in trigger, latency of intermediate steps, etc.).
- *                      If no trigger is active, then it will have details of the last completed
- *                      trigger.
- * @since 2.0.0
+ * @param message A human readable description of what the stream is currently doing.
+ * @param isDataAvailable True when there is new data to be processed.
+ * @param isTriggerActive True when the trigger is actively firing, false when waiting for the
+ *                        next trigger time.
+ *
+ * @since 2.1.0
  */
-@Experimental
-class StreamingQueryStatus private(
-  val name: String,
-  val id: Long,
-  val timestamp: Long,
-  val inputRate: Double,
-  val processingRate: Double,
-  val latency: Option[Double],
-  val sourceStatuses: Array[SourceStatus],
-  val sinkStatus: SinkStatus,
-  val triggerDetails: ju.Map[String, String]) {
-
-  import StreamingQueryStatus._
-
-  /** The compact JSON representation of this status. */
-  def json: String = compact(render(jsonValue))
-
-  /** The pretty (i.e. indented) JSON representation of this status. */
-  def prettyJson: String = pretty(render(jsonValue))
-
-  override def toString: String = {
-    val sourceStatusLines = sourceStatuses.zipWithIndex.map { case (s, i) =>
-      s"Source ${i + 1} - " + indent(s.prettyString).trim
-    }
-    val sinkStatusLines = sinkStatus.prettyString.trim
-    val triggerDetailsLines = triggerDetails.asScala.map { case (k, v) => s"$k: $v" }.toSeq.sorted
-    val numSources = sourceStatuses.length
-    val numSourcesString = s"$numSources source" + { if (numSources > 1) "s" else "" }
-
-    val allLines =
-      s"""|Query id: $id
-          |Status timestamp: $timestamp
-          |Input rate: $inputRate rows/sec
-          |Processing rate $processingRate rows/sec
-          |Latency: ${latency.getOrElse("-")} ms
-          |Trigger details:
-          |${indent(triggerDetailsLines)}
-          |Source statuses [$numSourcesString]:
-          |${indent(sourceStatusLines)}
-          |Sink status - ${indent(sinkStatusLines).trim}""".stripMargin
-
-    s"Status of query '$name'\n${indent(allLines)}"
-  }
-
-  private[sql] def jsonValue: JValue = {
-    ("name" -> JString(name)) ~
-    ("id" -> JInt(id)) ~
-    ("timestamp" -> JInt(timestamp)) ~
-    ("inputRate" -> JDouble(inputRate)) ~
-    ("processingRate" -> JDouble(processingRate)) ~
-    ("latency" -> latency.map(JDouble).getOrElse(JNothing)) ~
-    ("triggerDetails" -> JsonProtocol.mapToJson(triggerDetails.asScala)) ~
-    ("sourceStatuses" -> JArray(sourceStatuses.map(_.jsonValue).toList)) ~
-    ("sinkStatus" -> sinkStatus.jsonValue)
-  }
-}
-
-/** Companion object, primarily for creating StreamingQueryInfo instances internally */
-private[sql] object StreamingQueryStatus {
-  def apply(
-      name: String,
-      id: Long,
-      timestamp: Long,
-      inputRate: Double,
-      processingRate: Double,
-      latency: Option[Double],
-      sourceStatuses: Array[SourceStatus],
-      sinkStatus: SinkStatus,
-      triggerDetails: Map[String, String]): StreamingQueryStatus = {
-    new StreamingQueryStatus(name, id, timestamp, inputRate, processingRate,
-      latency, sourceStatuses, sinkStatus, triggerDetails.asJava)
-  }
-
-  def indent(strings: Iterable[String]): String = strings.map(indent).mkString("\n")
-  def indent(string: String): String = string.split("\n").map("    " + _).mkString("\n")
-
-  /** Create an instance of status for python testing */
-  def testStatus(): StreamingQueryStatus = {
-    import org.apache.spark.sql.execution.streaming.StreamMetrics._
-    StreamingQueryStatus(
-      name = "query",
-      id = 1,
-      timestamp = 123,
-      inputRate = 15.5,
-      processingRate = 23.5,
-      latency = Some(345),
-      sourceStatuses = Array(
-        SourceStatus(
-          desc = "MySource1",
-          offsetDesc = LongOffset(0).json,
-          inputRate = 15.5,
-          processingRate = 23.5,
-          triggerDetails = Map(
-            NUM_SOURCE_INPUT_ROWS -> "100",
-            SOURCE_GET_OFFSET_LATENCY -> "10",
-            SOURCE_GET_BATCH_LATENCY -> "20"))),
-      sinkStatus = SinkStatus(
-        desc = "MySink",
-        offsetDesc = OffsetSeq(Some(LongOffset(1)) :: None :: Nil).toString),
-      triggerDetails = Map(
-        BATCH_ID -> "5",
-        IS_TRIGGER_ACTIVE -> "true",
-        IS_DATA_PRESENT_IN_TRIGGER -> "true",
-        GET_OFFSET_LATENCY -> "10",
-        GET_BATCH_LATENCY -> "20",
-        NUM_INPUT_ROWS -> "100"
-      ))
-  }
-}
+case class StreamingQueryStatus protected[sql](
+    message: String,
+    isDataAvailable: Boolean,
+    isTriggerActive: Boolean)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
new file mode 100644
index 0000000000000..7129fa4d15ef8
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming
+
+import java.{util => ju}
+import java.util.UUID
+
+import scala.collection.JavaConverters._
+import scala.util.control.NonFatal
+
+import org.apache.jute.compiler.JLong
+import org.json4s._
+import org.json4s.JsonAST.JValue
+import org.json4s.JsonDSL._
+import org.json4s.jackson.JsonMethods._
+
+import org.apache.spark.annotation.Experimental
+
+/**
+ * :: Experimental ::
+ * Information about updates made to stateful operators in a [[StreamingQuery]] during a trigger.
+ */
+@Experimental
+class StateOperatorProgress private[sql](
+    val numRowsTotal: Long,
+    val numRowsUpdated: Long) {
+  private[sql] def jsonValue: JValue = {
+    ("numRowsTotal" -> JInt(numRowsTotal)) ~
+    ("numRowsUpdated" -> JInt(numRowsUpdated))
+  }
+}
+
+/**
+ * :: Experimental ::
+ * Information about progress made in the execution of a [[StreamingQuery]] during
+ * a trigger. Each event relates to processing done for a single trigger of the streaming
+ * query. Events are emitted even when no new data is available to be processed.
+ *
+ * @param id A unique id of the query.
+ * @param name Name of the query. This name is unique across all active queries.
+ * @param timestamp Timestamp (ms) of the beginning of the trigger.
+ * @param batchId A unique id for the current batch of data being processed.  Note that in the
+ *                case of retries after a failure a given batchId my be executed more than once.
+ *                Similarly, when there is no data to be processed, the batchId will not be
+ *                incremented.
+ * @param durationMs The amount of time taken to perform various operations in milliseconds.
+ * @param currentWatermark The current event time watermark in milliseconds
+ * @param stateOperators Information about operators in the query that store state.
+ * @param sources detailed statistics on data being read from each of the streaming sources.
+ * @since 2.1.0
+ */
+@Experimental
+class StreamingQueryProgress private[sql](
+  val id: UUID,
+  val name: String,
+  val timestamp: Long,
+  val batchId: Long,
+  val durationMs: ju.Map[String, java.lang.Long],
+  val currentWatermark: Long,
+  val stateOperators: Array[StateOperatorProgress],
+  val sources: Array[SourceProgress],
+  val sink: SinkProgress) {
+
+  /** The aggregate (across all sources) number of records processed in a trigger. */
+  def numInputRows: Long = sources.map(_.numInputRows).sum
+
+  /** The aggregate (across all sources) rate of data arriving. */
+  def inputRowsPerSecond: Double = sources.map(_.inputRowsPerSecond).sum
+
+  /** The aggregate (across all sources) rate at which Spark is processing data. */
+  def processedRowsPerSecond: Double = sources.map(_.processedRowsPerSecond).sum
+
+  /** The compact JSON representation of this status. */
+  def json: String = compact(render(jsonValue))
+
+  /** The pretty (i.e. indented) JSON representation of this status. */
+  def prettyJson: String = pretty(render(jsonValue))
+
+  override def toString: String = prettyJson
+
+  private[sql] def jsonValue: JValue = {
+    def safeDoubleToJValue(value: Double): JValue = {
+      if (value.isNaN || value.isInfinity) JNothing else JDouble(value)
+    }
+
+    ("id" -> JString(id.toString)) ~
+    ("name" -> JString(name)) ~
+    ("timestamp" -> JInt(timestamp)) ~
+    ("numInputRows" -> JInt(numInputRows)) ~
+    ("inputRowsPerSecond" -> safeDoubleToJValue(inputRowsPerSecond)) ~
+    ("processedRowsPerSecond" -> safeDoubleToJValue(processedRowsPerSecond)) ~
+    ("durationMs" -> durationMs
+        .asScala
+        .map { case (k, v) => k -> JInt(v.toLong): JObject }
+        .reduce(_ ~ _)) ~
+    ("currentWatermark" -> JInt(currentWatermark)) ~
+    ("stateOperators" -> JArray(stateOperators.map(_.jsonValue).toList)) ~
+    ("sources" -> JArray(sources.map(_.jsonValue).toList)) ~
+    ("sink" -> sink.jsonValue)
+
+  }
+}
+
+/**
+ * :: Experimental ::
+ * Information about progress made for a source in the execution of a [[StreamingQuery]]
+ * during a trigger. See [[StreamingQueryProgress]] for more information.
+ *
+ * @param description            Description of the source.
+ * @param startOffset            The starting offset for data being read.
+ * @param endOffset              The ending offset for data being read.
+ * @param numInputRows           The number of records read from this source.
+ * @param inputRowsPerSecond     The rate at which data is arriving from this source.
+ * @param processedRowsPerSecond The rate at which data from this source is being procressed by
+ *                               Spark.
+ * @since 2.1.0
+ */
+@Experimental
+class SourceProgress protected[sql](
+  val description: String,
+  val startOffset: String,
+  val endOffset: String,
+  val numInputRows: Long,
+  val inputRowsPerSecond: Double,
+  val processedRowsPerSecond: Double) {
+
+  /** The compact JSON representation of this progress. */
+  def json: String = compact(render(jsonValue))
+
+  /** The pretty (i.e. indented) JSON representation of this progress. */
+  def prettyJson: String = pretty(render(jsonValue))
+
+  override def toString: String = prettyJson
+
+  private[sql] def jsonValue: JValue = {
+    def safeDoubleToJValue(value: Double): JValue = {
+      if (value.isNaN || value.isInfinity) JNothing else JDouble(value)
+    }
+
+    ("description" -> JString(description)) ~
+      ("startOffset" -> tryParse(startOffset)) ~
+      ("endOffset" -> tryParse(endOffset)) ~
+      ("numInputRows" -> JInt(numInputRows)) ~
+      ("inputRowsPerSecond" -> safeDoubleToJValue(inputRowsPerSecond)) ~
+      ("processedRowsPerSecond" -> safeDoubleToJValue(processedRowsPerSecond))
+  }
+
+  private def tryParse(json: String) = try {
+    parse(json)
+  } catch {
+    case NonFatal(e) => JString(json)
+  }
+}
+
+/**
+ * :: Experimental ::
+ * Information about progress made for a sink in the execution of a [[StreamingQuery]]
+ * during a trigger. See [[StreamingQueryProgress]] for more information.
+ *
+ * @param description Description of the source corresponding to this status.
+ * @since 2.1.0
+ */
+@Experimental
+class SinkProgress protected[sql](
+    val description: String) {
+
+  /** The compact JSON representation of this status. */
+  def json: String = compact(render(jsonValue))
+
+  /** The pretty (i.e. indented) JSON representation of this status. */
+  def prettyJson: String = pretty(render(jsonValue))
+
+  override def toString: String = prettyJson
+
+  private[sql] def jsonValue: JValue = {
+    ("description" -> JString(description))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/StreamMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/StreamMetricsSuite.scala
deleted file mode 100644
index 38c4ece439770..0000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/StreamMetricsSuite.scala
+++ /dev/null
@@ -1,213 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.streaming
-
-import org.scalactic.TolerantNumerics
-
-import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.DataFrame
-import org.apache.spark.sql.types.{StructField, StructType}
-import org.apache.spark.util.ManualClock
-
-class StreamMetricsSuite extends SparkFunSuite {
-  import StreamMetrics._
-
-  // To make === between double tolerate inexact values
-  implicit val doubleEquality = TolerantNumerics.tolerantDoubleEquality(0.01)
-
-  test("rates, latencies, trigger details - basic life cycle") {
-    val sm = newStreamMetrics(source)
-    assert(sm.currentInputRate() === 0.0)
-    assert(sm.currentProcessingRate() === 0.0)
-    assert(sm.currentSourceInputRate(source) === 0.0)
-    assert(sm.currentSourceProcessingRate(source) === 0.0)
-    assert(sm.currentLatency() === None)
-    assert(sm.currentTriggerDetails().isEmpty)
-
-    // When trigger started, the rates should not change, but should return
-    // reported trigger details
-    sm.reportTriggerStarted(1)
-    sm.reportTriggerDetail("key", "value")
-    sm.reportSourceTriggerDetail(source, "key2", "value2")
-    assert(sm.currentInputRate() === 0.0)
-    assert(sm.currentProcessingRate() === 0.0)
-    assert(sm.currentSourceInputRate(source) === 0.0)
-    assert(sm.currentSourceProcessingRate(source) === 0.0)
-    assert(sm.currentLatency() === None)
-    assert(sm.currentTriggerDetails() ===
-      Map(BATCH_ID -> "1", IS_TRIGGER_ACTIVE -> "true",
-        START_TIMESTAMP -> "0", "key" -> "value"))
-    assert(sm.currentSourceTriggerDetails(source) ===
-      Map(BATCH_ID -> "1", "key2" -> "value2"))
-
-    // Finishing the trigger should calculate the rates, except input rate which needs
-    // to have another trigger interval
-    sm.reportNumInputRows(Map(source -> 100L)) // 100 input rows, 10 output rows
-    clock.advance(1000)
-    sm.reportTriggerFinished()
-    assert(sm.currentInputRate() === 0.0)
-    assert(sm.currentProcessingRate() === 100.0)  // 100 input rows processed in 1 sec
-    assert(sm.currentSourceInputRate(source) === 0.0)
-    assert(sm.currentSourceProcessingRate(source) === 100.0)
-    assert(sm.currentLatency() === None)
-    assert(sm.currentTriggerDetails() ===
-      Map(BATCH_ID -> "1", IS_TRIGGER_ACTIVE -> "false",
-        START_TIMESTAMP -> "0", FINISH_TIMESTAMP -> "1000",
-        NUM_INPUT_ROWS -> "100", "key" -> "value"))
-    assert(sm.currentSourceTriggerDetails(source) ===
-      Map(BATCH_ID -> "1", NUM_SOURCE_INPUT_ROWS -> "100", "key2" -> "value2"))
-
-    // After another trigger starts, the rates and latencies should not change until
-    // new rows are reported
-    clock.advance(1000)
-    sm.reportTriggerStarted(2)
-    assert(sm.currentInputRate() === 0.0)
-    assert(sm.currentProcessingRate() === 100.0)
-    assert(sm.currentSourceInputRate(source) === 0.0)
-    assert(sm.currentSourceProcessingRate(source) === 100.0)
-    assert(sm.currentLatency() === None)
-
-    // Reporting new rows should update the rates and latencies
-    sm.reportNumInputRows(Map(source -> 200L))     // 200 input rows
-    clock.advance(500)
-    sm.reportTriggerFinished()
-    assert(sm.currentInputRate() === 100.0)      // 200 input rows generated in 2 seconds b/w starts
-    assert(sm.currentProcessingRate() === 400.0) // 200 output rows processed in 0.5 sec
-    assert(sm.currentSourceInputRate(source) === 100.0)
-    assert(sm.currentSourceProcessingRate(source) === 400.0)
-    assert(sm.currentLatency().get === 1500.0)       // 2000 ms / 2 + 500 ms
-
-    // Rates should be set to 0 after stop
-    sm.stop()
-    assert(sm.currentInputRate() === 0.0)
-    assert(sm.currentProcessingRate() === 0.0)
-    assert(sm.currentSourceInputRate(source) === 0.0)
-    assert(sm.currentSourceProcessingRate(source) === 0.0)
-    assert(sm.currentLatency() === None)
-    assert(sm.currentTriggerDetails().isEmpty)
-  }
-
-  test("rates and latencies - after trigger with no data") {
-    val sm = newStreamMetrics(source)
-    // Trigger 1 with data
-    sm.reportTriggerStarted(1)
-    sm.reportNumInputRows(Map(source -> 100L)) // 100 input rows
-    clock.advance(1000)
-    sm.reportTriggerFinished()
-
-    // Trigger 2 with data
-    clock.advance(1000)
-    sm.reportTriggerStarted(2)
-    sm.reportNumInputRows(Map(source -> 200L)) // 200 input rows
-    clock.advance(500)
-    sm.reportTriggerFinished()
-
-    // Make sure that all rates are set
-    require(sm.currentInputRate() === 100.0) // 200 input rows generated in 2 seconds b/w starts
-    require(sm.currentProcessingRate() === 400.0) // 200 output rows processed in 0.5 sec
-    require(sm.currentSourceInputRate(source) === 100.0)
-    require(sm.currentSourceProcessingRate(source) === 400.0)
-    require(sm.currentLatency().get === 1500.0) // 2000 ms / 2 + 500 ms
-
-    // Trigger 3 with data
-    clock.advance(500)
-    sm.reportTriggerStarted(3)
-    clock.advance(500)
-    sm.reportTriggerFinished()
-
-    // Rates are set to zero and latency is set to None
-    assert(sm.currentInputRate() === 0.0)
-    assert(sm.currentProcessingRate() === 0.0)
-    assert(sm.currentSourceInputRate(source) === 0.0)
-    assert(sm.currentSourceProcessingRate(source) === 0.0)
-    assert(sm.currentLatency() === None)
-    sm.stop()
-  }
-
-  test("rates - after trigger with multiple sources, and one source having no info") {
-    val source1 = TestSource(1)
-    val source2 = TestSource(2)
-    val sm = newStreamMetrics(source1, source2)
-    // Trigger 1 with data
-    sm.reportTriggerStarted(1)
-    sm.reportNumInputRows(Map(source1 -> 100L, source2 -> 100L))
-    clock.advance(1000)
-    sm.reportTriggerFinished()
-
-    // Trigger 2 with data
-    clock.advance(1000)
-    sm.reportTriggerStarted(2)
-    sm.reportNumInputRows(Map(source1 -> 200L, source2 -> 200L))
-    clock.advance(500)
-    sm.reportTriggerFinished()
-
-    // Make sure that all rates are set
-    assert(sm.currentInputRate() === 200.0) // 200*2 input rows generated in 2 seconds b/w starts
-    assert(sm.currentProcessingRate() === 800.0) // 200*2 output rows processed in 0.5 sec
-    assert(sm.currentSourceInputRate(source1) === 100.0)
-    assert(sm.currentSourceInputRate(source2) === 100.0)
-    assert(sm.currentSourceProcessingRate(source1) === 400.0)
-    assert(sm.currentSourceProcessingRate(source2) === 400.0)
-
-    // Trigger 3 with data
-    clock.advance(500)
-    sm.reportTriggerStarted(3)
-    clock.advance(500)
-    sm.reportNumInputRows(Map(source1 -> 200L))
-    sm.reportTriggerFinished()
-
-    // Rates are set to zero and latency is set to None
-    assert(sm.currentInputRate() === 200.0)
-    assert(sm.currentProcessingRate() === 400.0)
-    assert(sm.currentSourceInputRate(source1) === 200.0)
-    assert(sm.currentSourceInputRate(source2) === 0.0)
-    assert(sm.currentSourceProcessingRate(source1) === 400.0)
-    assert(sm.currentSourceProcessingRate(source2) === 0.0)
-    sm.stop()
-  }
-
-  test("registered Codahale metrics") {
-    import scala.collection.JavaConverters._
-    val sm = newStreamMetrics(source)
-    val gaugeNames = sm.metricRegistry.getGauges().keySet().asScala
-
-    // so that all metrics are considered as a single metric group in Ganglia
-    assert(!gaugeNames.exists(_.contains(".")))
-    assert(gaugeNames === Set(
-      "inputRate-total",
-      "inputRate-source0",
-      "processingRate-total",
-      "processingRate-source0",
-      "latency"))
-  }
-
-  private def newStreamMetrics(sources: Source*): StreamMetrics = {
-    new StreamMetrics(sources.toSet, clock, "test")
-  }
-
-  private val clock = new ManualClock()
-  private val source = TestSource(0)
-
-  case class TestSource(id: Int) extends Source {
-    override def schema: StructType = StructType(Array.empty[StructField])
-    override def getOffset: Option[Offset] = Some(new LongOffset(0))
-    override def getBatch(start: Option[Offset], end: Offset): DataFrame = { null }
-    override def stop() {}
-    override def toString(): String = s"source$id"
-  }
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index bad6642ea4058..8256c63d87090 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -1006,9 +1006,13 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
       testStream(input)(
         AddTextFileData("100", src, tmp),
         CheckAnswer("100"),
-        AssertOnLastQueryStatus { status =>
-          assert(status.triggerDetails.get("numRows.input.total") === "1")
-          assert(status.sourceStatuses(0).processingRate > 0.0)
+        AssertOnQuery { query =>
+          val actualProgress = query.recentProgresses
+              .find(_.numInputRows > 0)
+              .getOrElse(sys.error("Could not find records with data."))
+          assert(actualProgress.numInputRows === 1)
+          assert(actualProgress.sources(0).processedRowsPerSecond > 0.0)
+          true
         }
       )
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index a6b2d4b9ab4c8..a2629f7f68160 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -28,7 +28,6 @@ import scala.util.control.NonFatal
 
 import org.scalatest.Assertions
 import org.scalatest.concurrent.{Eventually, Timeouts}
-import org.scalatest.concurrent.AsyncAssertions.Waiter
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.concurrent.PatienceConfiguration.Timeout
 import org.scalatest.exceptions.TestFailedDueToTimeoutException
@@ -202,10 +201,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
     }
   }
 
-  case class AssertOnLastQueryStatus(condition: StreamingQueryStatus => Unit)
-    extends StreamAction
-
-  class StreamManualClock(time: Long = 0L) extends ManualClock(time) {
+  class StreamManualClock(time: Long = 0L) extends ManualClock(time) with Serializable {
     private var waitStartTime: Option[Long] = None
 
     override def waitTillTime(targetTime: Long): Long = synchronized {
@@ -325,10 +321,8 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
 
     val testThread = Thread.currentThread()
     val metadataRoot = Utils.createTempDir(namePrefix = "streaming.metadata").getCanonicalPath
-    val statusCollector = new QueryStatusCollector
     var manualClockExpectedTime = -1L
     try {
-      spark.streams.addListener(statusCollector)
       startedTest.foreach { action =>
         logInfo(s"Processing test stream action: $action")
         action match {
@@ -375,10 +369,12 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
                    s"can not advance clock of type ${currentStream.triggerClock.getClass}")
             val clock = currentStream.triggerClock.asInstanceOf[StreamManualClock]
             assert(manualClockExpectedTime >= 0)
+
             // Make sure we don't advance ManualClock too early. See SPARK-16002.
             eventually("StreamManualClock has not yet entered the waiting state") {
               assert(clock.isStreamWaitingAt(manualClockExpectedTime))
             }
+
             clock.advance(timeToAdd)
             manualClockExpectedTime += timeToAdd
             verify(clock.getTimeMillis() === manualClockExpectedTime,
@@ -447,13 +443,6 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
             val streamToAssert = Option(currentStream).getOrElse(lastStream)
             verify({ a.run(); true }, s"Assert failed: ${a.message}")
 
-          case a: AssertOnLastQueryStatus =>
-            Eventually.eventually(timeout(streamingTimeout)) {
-              require(statusCollector.lastTriggerStatus.nonEmpty)
-            }
-            val status = statusCollector.lastTriggerStatus.get
-            verify({ a.condition(status); true }, "Assert on last query status failed")
-
           case a: AddData =>
             try {
               // Add data and get the source where it was added, and the expected offset of the
@@ -528,7 +517,6 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
       if (currentStream != null && currentStream.microBatchThread.isAlive) {
         currentStream.stop()
       }
-      spark.streams.removeListener(statusCollector)
 
       // Rollback prev configuration values
       resetConfValues.foreach {
@@ -614,7 +602,6 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
     testStream(ds)(actions: _*)
   }
 
-
   object AwaitTerminationTester {
 
     trait ExpectedBehavior
@@ -668,58 +655,4 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
       }
     }
   }
-
-
-  class QueryStatusCollector extends StreamingQueryListener {
-    // to catch errors in the async listener events
-    @volatile private var asyncTestWaiter = new Waiter
-
-    @volatile var startStatus: StreamingQueryStatus = null
-    @volatile var terminationStatus: StreamingQueryStatus = null
-    @volatile var terminationException: Option[String] = null
-
-    private val progressStatuses = new mutable.ArrayBuffer[StreamingQueryStatus]
-
-    /** Get the info of the last trigger that processed data */
-    def lastTriggerStatus: Option[StreamingQueryStatus] = synchronized {
-      progressStatuses.filter { i =>
-        i.triggerDetails.get("isTriggerActive").toBoolean == false &&
-          i.triggerDetails.get("isDataPresentInTrigger").toBoolean == true
-      }.lastOption
-    }
-
-    def reset(): Unit = {
-      startStatus = null
-      terminationStatus = null
-      progressStatuses.clear()
-      asyncTestWaiter = new Waiter
-    }
-
-    def checkAsyncErrors(): Unit = {
-      asyncTestWaiter.await(timeout(10 seconds))
-    }
-
-
-    override def onQueryStarted(queryStarted: QueryStartedEvent): Unit = {
-      asyncTestWaiter {
-        startStatus = queryStarted.queryStatus
-      }
-    }
-
-    override def onQueryProgress(queryProgress: QueryProgressEvent): Unit = {
-      asyncTestWaiter {
-        assert(startStatus != null, "onQueryProgress called before onQueryStarted")
-        synchronized { progressStatuses += queryProgress.queryStatus }
-      }
-    }
-
-    override def onQueryTerminated(queryTerminated: QueryTerminatedEvent): Unit = {
-      asyncTestWaiter {
-        assert(startStatus != null, "onQueryTerminated called before onQueryStarted")
-        terminationStatus = queryTerminated.queryStatus
-        terminationException = queryTerminated.exception
-      }
-      asyncTestWaiter.dismiss()
-    }
-  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
index 98f3bec7080af..c68f953b10136 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
@@ -17,24 +17,26 @@
 
 package org.apache.spark.sql.streaming
 
+import java.util.UUID
+
 import scala.collection.mutable
 
 import org.scalactic.TolerantNumerics
+import org.scalatest.concurrent.AsyncAssertions.Waiter
+import org.scalatest.concurrent.Eventually._
+import org.scalatest.concurrent.PatienceConfiguration.Timeout
 import org.scalatest.BeforeAndAfter
 import org.scalatest.PrivateMethodTester._
 
 import org.apache.spark.SparkException
 import org.apache.spark.scheduler._
-import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.execution.streaming._
-import org.apache.spark.sql.functions._
-import org.apache.spark.util.{JsonProtocol, ManualClock}
-
+import org.apache.spark.sql.streaming.StreamingQueryListener._
+import org.apache.spark.util.JsonProtocol
 
 class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
 
   import testImplicits._
-  import StreamingQueryListenerSuite._
 
   // To make === between double tolerate inexact values
   implicit val doubleEquality = TolerantNumerics.tolerantDoubleEquality(0.01)
@@ -46,86 +48,86 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
     // Make sure we don't leak any events to the next test
   }
 
-  test("single listener, check trigger statuses") {
-    import StreamingQueryListenerSuite._
-    clock = new StreamManualClock
-
-    /** Custom MemoryStream that waits for manual clock to reach a time */
-    val inputData = new MemoryStream[Int](0, sqlContext) {
-      // Wait for manual clock to be 100 first time there is data
-      override def getOffset: Option[Offset] = {
-        val offset = super.getOffset
-        if (offset.nonEmpty) {
-          clock.waitTillTime(100)
+  testQuietly("single listener, check trigger events are generated correctly") {
+    val clock = new StreamManualClock
+    val inputData = new MemoryStream[Int](0, sqlContext)
+    val df = inputData.toDS().as[Long].map { 10 / _ }
+    val listener = new EventCollector
+    try {
+      // No events until started
+      spark.streams.addListener(listener)
+      assert(listener.startEvent === null)
+      assert(listener.progressEvents.isEmpty)
+      assert(listener.terminationEvent === null)
+
+      testStream(df, OutputMode.Append)(
+
+        // Start event generated when query started
+        StartStream(ProcessingTime(100), triggerClock = clock),
+        AssertOnQuery { query =>
+          assert(listener.startEvent !== null)
+          assert(listener.startEvent.id === query.id)
+          assert(listener.startEvent.name === query.name)
+          assert(listener.progressEvents.isEmpty)
+          assert(listener.terminationEvent === null)
+          true
+        },
+
+        // Progress event generated when data processed
+        AddData(inputData, 1, 2),
+        AdvanceManualClock(100),
+        CheckAnswer(10, 5),
+        AssertOnQuery { query =>
+          assert(listener.progressEvents.nonEmpty)
+          assert(listener.progressEvents.last.json === query.lastProgress.json)
+          assert(listener.terminationEvent === null)
+          true
+        },
+
+        // Termination event generated when stopped cleanly
+        StopStream,
+        AssertOnQuery { query =>
+          eventually(Timeout(streamingTimeout)) {
+            assert(listener.terminationEvent !== null)
+            assert(listener.terminationEvent.id === query.id)
+            assert(listener.terminationEvent.exception === None)
+          }
+          listener.checkAsyncErrors()
+          listener.reset()
+          true
+        },
+
+        // Termination event generated with exception message when stopped with error
+        StartStream(ProcessingTime(100), triggerClock = clock),
+        AddData(inputData, 0),
+        AdvanceManualClock(100),
+        ExpectFailure[SparkException],
+        AssertOnQuery { query =>
+          assert(listener.terminationEvent !== null)
+          assert(listener.terminationEvent.id === query.id)
+          assert(listener.terminationEvent.exception.nonEmpty)
+          listener.checkAsyncErrors()
+          true
         }
-        offset
-      }
-
-      // Wait for manual clock to be 300 first time there is data
-      override def getBatch(start: Option[Offset], end: Offset): DataFrame = {
-        clock.waitTillTime(300)
-        super.getBatch(start, end)
-      }
-    }
-
-    // This is to make sure thatquery waits for manual clock to be 600 first time there is data
-    val mapped = inputData.toDS().agg(count("*")).as[Long].coalesce(1).map { x =>
-      clock.waitTillTime(600)
-      x
+      )
+    } finally {
+      spark.streams.removeListener(listener)
     }
-
-    testStream(mapped, OutputMode.Complete)(
-      StartStream(triggerClock = clock),
-      AddData(inputData, 1, 2),
-      AdvanceManualClock(100),  // unblock getOffset, will block on getBatch
-      AdvanceManualClock(200),  // unblock getBatch, will block on computation
-      AdvanceManualClock(300),  // unblock computation
-      AssertOnQuery { _ => clock.getTimeMillis() === 600 },
-      AssertOnLastQueryStatus { status: StreamingQueryStatus =>
-        // Check the correctness of the trigger info of the last completed batch reported by
-        // onQueryProgress
-        assert(status.triggerDetails.containsKey("batchId"))
-        assert(status.triggerDetails.get("isTriggerActive") === "false")
-        assert(status.triggerDetails.get("isDataPresentInTrigger") === "true")
-
-        assert(status.triggerDetails.get("timestamp.triggerStart") === "0")
-        assert(status.triggerDetails.get("timestamp.afterGetOffset") === "100")
-        assert(status.triggerDetails.get("timestamp.afterGetBatch") === "300")
-        assert(status.triggerDetails.get("timestamp.triggerFinish") === "600")
-
-        assert(status.triggerDetails.get("latency.getOffset.total") === "100")
-        assert(status.triggerDetails.get("latency.getBatch.total") === "200")
-        assert(status.triggerDetails.get("latency.optimizer") === "0")
-        assert(status.triggerDetails.get("latency.offsetLogWrite") === "0")
-        assert(status.triggerDetails.get("latency.fullTrigger") === "600")
-
-        assert(status.triggerDetails.get("numRows.input.total") === "2")
-        assert(status.triggerDetails.get("numRows.state.aggregation1.total") === "1")
-        assert(status.triggerDetails.get("numRows.state.aggregation1.updated") === "1")
-
-        assert(status.sourceStatuses.length === 1)
-        assert(status.sourceStatuses(0).triggerDetails.containsKey("batchId"))
-        assert(status.sourceStatuses(0).triggerDetails.get("latency.getOffset.source") === "100")
-        assert(status.sourceStatuses(0).triggerDetails.get("latency.getBatch.source") === "200")
-        assert(status.sourceStatuses(0).triggerDetails.get("numRows.input.source") === "2")
-      },
-      CheckAnswer(2)
-    )
   }
 
   test("adding and removing listener") {
-    def isListenerActive(listener: QueryStatusCollector): Boolean = {
+    def isListenerActive(listener: EventCollector): Boolean = {
       listener.reset()
       testStream(MemoryStream[Int].toDS)(
         StartStream(),
         StopStream
       )
-      listener.startStatus != null
+      listener.startEvent != null
     }
 
     try {
-      val listener1 = new QueryStatusCollector
-      val listener2 = new QueryStatusCollector
+      val listener1 = new EventCollector
+      val listener2 = new EventCollector
 
       spark.streams.addListener(listener1)
       assert(isListenerActive(listener1) === true)
@@ -142,14 +144,14 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
   }
 
   test("event ordering") {
-    val listener = new QueryStatusCollector
+    val listener = new EventCollector
     withListenerAdded(listener) {
       for (i <- 1 to 100) {
         listener.reset()
-        require(listener.startStatus === null)
+        require(listener.startEvent === null)
         testStream(MemoryStream[Int].toDS)(
           StartStream(),
-          Assert(listener.startStatus !== null, "onQueryStarted not called before query returned"),
+          Assert(listener.startEvent !== null, "onQueryStarted not called before query returned"),
           StopStream,
           Assert { listener.checkAsyncErrors() }
         )
@@ -158,7 +160,7 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
   }
 
   testQuietly("exception should be reported in QueryTerminated") {
-    val listener = new QueryStatusCollector
+    val listener = new EventCollector
     withListenerAdded(listener) {
       val input = MemoryStream[Int]
       testStream(input.toDS.map(_ / 0))(
@@ -167,49 +169,46 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
         ExpectFailure[SparkException](),
         Assert {
           spark.sparkContext.listenerBus.waitUntilEmpty(10000)
-          assert(listener.terminationStatus !== null)
-          assert(listener.terminationException.isDefined)
+          assert(listener.terminationEvent !== null)
+          assert(listener.terminationEvent.exception.nonEmpty)
           // Make sure that the exception message reported through listener
           // contains the actual exception and relevant stack trace
-          assert(!listener.terminationException.get.contains("StreamingQueryException"))
-          assert(listener.terminationException.get.contains("java.lang.ArithmeticException"))
-          assert(listener.terminationException.get.contains("StreamingQueryListenerSuite"))
+          assert(!listener.terminationEvent.exception.get.contains("StreamingQueryException"))
+          assert(listener.terminationEvent.exception.get.contains("java.lang.ArithmeticException"))
+          assert(listener.terminationEvent.exception.get.contains("StreamingQueryListenerSuite"))
         }
       )
     }
   }
 
-  test("QueryStarted serialization") {
-    val queryStarted = new StreamingQueryListener.QueryStartedEvent(StreamingQueryStatus.testStatus)
+  test("QueryStartedEvent serialization") {
+    val queryStarted = new StreamingQueryListener.QueryStartedEvent(UUID.randomUUID(), "name")
     val json = JsonProtocol.sparkEventToJson(queryStarted)
     val newQueryStarted = JsonProtocol.sparkEventFromJson(json)
       .asInstanceOf[StreamingQueryListener.QueryStartedEvent]
-    assertStreamingQueryInfoEquals(queryStarted.queryStatus, newQueryStarted.queryStatus)
   }
 
-  test("QueryProgress serialization") {
-    val queryProcess = new StreamingQueryListener.QueryProgressEvent(
-      StreamingQueryStatus.testStatus)
-    val json = JsonProtocol.sparkEventToJson(queryProcess)
-    val newQueryProcess = JsonProtocol.sparkEventFromJson(json)
+  test("QueryProgressEvent serialization") {
+    val event = new StreamingQueryListener.QueryProgressEvent(
+      StreamingQueryProgressSuite.testProgress)
+    val json = JsonProtocol.sparkEventToJson(event)
+    val newEvent = JsonProtocol.sparkEventFromJson(json)
       .asInstanceOf[StreamingQueryListener.QueryProgressEvent]
-    assertStreamingQueryInfoEquals(queryProcess.queryStatus, newQueryProcess.queryStatus)
+    assert(event.progress.json === newEvent.progress.json)
   }
 
-  test("QueryTerminated serialization") {
+  test("QueryTerminatedEvent serialization") {
     val exception = new RuntimeException("exception")
     val queryQueryTerminated = new StreamingQueryListener.QueryTerminatedEvent(
-      StreamingQueryStatus.testStatus,
-      Some(exception.getMessage))
-    val json =
-      JsonProtocol.sparkEventToJson(queryQueryTerminated)
+      UUID.randomUUID, Some(exception.getMessage))
+    val json = JsonProtocol.sparkEventToJson(queryQueryTerminated)
     val newQueryTerminated = JsonProtocol.sparkEventFromJson(json)
       .asInstanceOf[StreamingQueryListener.QueryTerminatedEvent]
-    assertStreamingQueryInfoEquals(queryQueryTerminated.queryStatus, newQueryTerminated.queryStatus)
+    assert(queryQueryTerminated.id === newQueryTerminated.id)
     assert(queryQueryTerminated.exception === newQueryTerminated.exception)
   }
 
-  test("ReplayListenerBus should ignore broken event jsons generated in 2.0.0") {
+  testQuietly("ReplayListenerBus should ignore broken event jsons generated in 2.0.0") {
     // query-event-logs-version-2.0.0.txt has all types of events generated by
     // Structured Streaming in Spark 2.0.0.
     // SparkListenerApplicationEnd is the only valid event and it's the last event. We use it
@@ -217,7 +216,7 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
     testReplayListenerBusWithBorkenEventJsons("query-event-logs-version-2.0.0.txt")
   }
 
-  test("ReplayListenerBus should ignore broken event jsons generated in 2.0.1") {
+  testQuietly("ReplayListenerBus should ignore broken event jsons generated in 2.0.1") {
     // query-event-logs-version-2.0.1.txt has all types of events generated by
     // Structured Streaming in Spark 2.0.1.
     // SparkListenerApplicationEnd is the only valid event and it's the last event. We use it
@@ -248,28 +247,6 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
     }
   }
 
-  private def assertStreamingQueryInfoEquals(
-      expected: StreamingQueryStatus,
-      actual: StreamingQueryStatus): Unit = {
-    assert(expected.name === actual.name)
-    assert(expected.sourceStatuses.size === actual.sourceStatuses.size)
-    expected.sourceStatuses.zip(actual.sourceStatuses).foreach {
-      case (expectedSource, actualSource) =>
-        assertSourceStatus(expectedSource, actualSource)
-    }
-    assertSinkStatus(expected.sinkStatus, actual.sinkStatus)
-  }
-
-  private def assertSourceStatus(expected: SourceStatus, actual: SourceStatus): Unit = {
-    assert(expected.description === actual.description)
-    assert(expected.offsetDesc === actual.offsetDesc)
-  }
-
-  private def assertSinkStatus(expected: SinkStatus, actual: SinkStatus): Unit = {
-    assert(expected.description === actual.description)
-    assert(expected.offsetDesc === actual.offsetDesc)
-  }
-
   private def withListenerAdded(listener: StreamingQueryListener)(body: => Unit): Unit = {
     try {
       failAfter(streamingTimeout) {
@@ -287,9 +264,51 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
     val listenerBus = spark.streams invokePrivate listenerBusMethod()
     listenerBus.listeners.toArray.map(_.asInstanceOf[StreamingQueryListener])
   }
-}
 
-object StreamingQueryListenerSuite {
-  // Singleton reference to clock that does not get serialized in task closures
-  @volatile var clock: ManualClock = null
+  /** Collects events from the StreamingQueryListener for testing */
+  class EventCollector extends StreamingQueryListener {
+    // to catch errors in the async listener events
+    @volatile private var asyncTestWaiter = new Waiter
+
+    @volatile var startEvent: QueryStartedEvent = null
+    @volatile var terminationEvent: QueryTerminatedEvent = null
+
+    private val _progressEvents = new mutable.Queue[StreamingQueryProgress]
+
+    def progressEvents: Seq[StreamingQueryProgress] = _progressEvents.synchronized {
+      _progressEvents.filter(_.numInputRows > 0)
+    }
+
+    def reset(): Unit = {
+      startEvent = null
+      terminationEvent = null
+      _progressEvents.clear()
+      asyncTestWaiter = new Waiter
+    }
+
+    def checkAsyncErrors(): Unit = {
+      asyncTestWaiter.await(timeout(streamingTimeout))
+    }
+
+    override def onQueryStarted(queryStarted: QueryStartedEvent): Unit = {
+      asyncTestWaiter {
+        startEvent = queryStarted
+      }
+    }
+
+    override def onQueryProgress(queryProgress: QueryProgressEvent): Unit = {
+      asyncTestWaiter {
+        assert(startEvent != null, "onQueryProgress called before onQueryStarted")
+        _progressEvents.synchronized { _progressEvents += queryProgress.progress }
+      }
+    }
+
+    override def onQueryTerminated(queryTerminated: QueryTerminatedEvent): Unit = {
+      asyncTestWaiter {
+        assert(startEvent != null, "onQueryTerminated called before onQueryStarted")
+        terminationEvent = queryTerminated
+      }
+      asyncTestWaiter.dismiss()
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
index 41ffd56cf1290..268b8ff7b41a5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
@@ -62,7 +62,7 @@ class StreamingQueryManagerSuite extends StreamTest with BeforeAndAfter {
       assert(spark.streams.get(q1.id).eq(q1))
       assert(spark.streams.get(q2.id).eq(q2))
       assert(spark.streams.get(q3.id).eq(q3))
-      assert(spark.streams.get(-1) === null) // non-existent id
+      assert(spark.streams.get(java.util.UUID.randomUUID()) === null) // non-existent id
       q1.stop()
 
       assert(spark.streams.active.toSet === Set(q2, q3))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryProgressSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryProgressSuite.scala
new file mode 100644
index 0000000000000..45d29f6b35b92
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryProgressSuite.scala
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming
+
+import java.util.UUID
+
+import scala.collection.JavaConverters._
+
+import org.json4s._
+import org.json4s.jackson.JsonMethods._
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.streaming.StreamingQueryProgressSuite._
+
+
+class StreamingQueryProgressSuite extends SparkFunSuite {
+
+  test("prettyJson") {
+    val json = testProgress.prettyJson
+    assert(json ===
+      s"""
+        |{
+        |  "id" : "${testProgress.id.toString}",
+        |  "name" : "name",
+        |  "timestamp" : 1,
+        |  "numInputRows" : 678,
+        |  "inputRowsPerSecond" : 10.0,
+        |  "durationMs" : {
+        |    "total" : 0
+        |  },
+        |  "currentWatermark" : 3,
+        |  "stateOperators" : [ {
+        |    "numRowsTotal" : 0,
+        |    "numRowsUpdated" : 1
+        |  } ],
+        |  "sources" : [ {
+        |    "description" : "source",
+        |    "startOffset" : 123,
+        |    "endOffset" : 456,
+        |    "numInputRows" : 678,
+        |    "inputRowsPerSecond" : 10.0
+        |  } ],
+        |  "sink" : {
+        |    "description" : "sink"
+        |  }
+        |}
+      """.stripMargin.trim)
+    assert(compact(parse(json)) === testProgress.json)
+
+  }
+
+  test("json") {
+    assert(compact(parse(testProgress.json)) === testProgress.json)
+  }
+
+  test("toString") {
+    assert(testProgress.toString === testProgress.prettyJson)
+  }
+}
+
+object StreamingQueryProgressSuite {
+  val testProgress = new StreamingQueryProgress(
+    id = UUID.randomUUID(),
+    name = "name",
+    timestamp = 1L,
+    batchId = 2L,
+    durationMs = Map("total" -> 0L).mapValues(long2Long).asJava,
+    currentWatermark = 3L,
+    stateOperators = Array(new StateOperatorProgress(numRowsTotal = 0, numRowsUpdated = 1)),
+    sources = Array(
+      new SourceProgress(
+        description = "source",
+        startOffset = "123",
+        endOffset = "456",
+        numInputRows = 678,
+        inputRowsPerSecond = 10.0,
+        processedRowsPerSecond = Double.PositiveInfinity  // should not be present in the json
+      )
+    ),
+    sink = new SinkProgress("sink")
+  )
+}
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusSuite.scala
deleted file mode 100644
index 50a7d92ede9a5..0000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusSuite.scala
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.streaming
-
-import org.apache.spark.SparkFunSuite
-
-class StreamingQueryStatusSuite extends SparkFunSuite {
-  test("toString") {
-    assert(StreamingQueryStatus.testStatus.sourceStatuses(0).toString ===
-      """
-        |Status of source MySource1
-        |    Available offset: 0
-        |    Input rate: 15.5 rows/sec
-        |    Processing rate: 23.5 rows/sec
-        |    Trigger details:
-        |        numRows.input.source: 100
-        |        latency.getOffset.source: 10
-        |        latency.getBatch.source: 20
-      """.stripMargin.trim, "SourceStatus.toString does not match")
-
-    assert(StreamingQueryStatus.testStatus.sinkStatus.toString ===
-      """
-        |Status of sink MySink
-        |    Committed offsets: [1, -]
-      """.stripMargin.trim, "SinkStatus.toString does not match")
-
-    assert(StreamingQueryStatus.testStatus.toString ===
-      """
-        |Status of query 'query'
-        |    Query id: 1
-        |    Status timestamp: 123
-        |    Input rate: 15.5 rows/sec
-        |    Processing rate 23.5 rows/sec
-        |    Latency: 345.0 ms
-        |    Trigger details:
-        |        batchId: 5
-        |        isDataPresentInTrigger: true
-        |        isTriggerActive: true
-        |        latency.getBatch.total: 20
-        |        latency.getOffset.total: 10
-        |        numRows.input.total: 100
-        |    Source statuses [1 source]:
-        |        Source 1 - MySource1
-        |            Available offset: 0
-        |            Input rate: 15.5 rows/sec
-        |            Processing rate: 23.5 rows/sec
-        |            Trigger details:
-        |                numRows.input.source: 100
-        |                latency.getOffset.source: 10
-        |                latency.getBatch.source: 20
-        |    Sink status - MySink
-        |        Committed offsets: [1, -]
-      """.stripMargin.trim, "StreamingQueryStatus.toString does not match")
-
-  }
-
-  test("json") {
-    assert(StreamingQueryStatus.testStatus.json ===
-      """
-        |{"name":"query","id":1,"timestamp":123,"inputRate":15.5,"processingRate":23.5,
-        |"latency":345.0,"triggerDetails":{"latency.getBatch.total":"20",
-        |"numRows.input.total":"100","isTriggerActive":"true","batchId":"5",
-        |"latency.getOffset.total":"10","isDataPresentInTrigger":"true"},
-        |"sourceStatuses":[{"description":"MySource1","offsetDesc":"0","inputRate":15.5,
-        |"processingRate":23.5,"triggerDetails":{"numRows.input.source":"100",
-        |"latency.getOffset.source":"10","latency.getBatch.source":"20"}}],
-        |"sinkStatus":{"description":"MySink","offsetDesc":"[1, -]"}}
-      """.stripMargin.replace("\n", "").trim)
-  }
-
-  test("prettyJson") {
-    assert(
-      StreamingQueryStatus.testStatus.prettyJson ===
-        """
-          |{
-          |  "name" : "query",
-          |  "id" : 1,
-          |  "timestamp" : 123,
-          |  "inputRate" : 15.5,
-          |  "processingRate" : 23.5,
-          |  "latency" : 345.0,
-          |  "triggerDetails" : {
-          |    "latency.getBatch.total" : "20",
-          |    "numRows.input.total" : "100",
-          |    "isTriggerActive" : "true",
-          |    "batchId" : "5",
-          |    "latency.getOffset.total" : "10",
-          |    "isDataPresentInTrigger" : "true"
-          |  },
-          |  "sourceStatuses" : [ {
-          |    "description" : "MySource1",
-          |    "offsetDesc" : "0",
-          |    "inputRate" : 15.5,
-          |    "processingRate" : 23.5,
-          |    "triggerDetails" : {
-          |      "numRows.input.source" : "100",
-          |      "latency.getOffset.source" : "10",
-          |      "latency.getBatch.source" : "20"
-          |    }
-          |  } ],
-          |  "sinkStatus" : {
-          |    "description" : "MySink",
-          |    "offsetDesc" : "[1, -]"
-          |  }
-          |}
-        """.stripMargin.trim)
-  }
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index 8ecb33cf9d266..4f3b4a2d7552b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -20,14 +20,15 @@ package org.apache.spark.sql.streaming
 import org.scalactic.TolerantNumerics
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.BeforeAndAfter
+import org.scalatest.concurrent.PatienceConfiguration.Timeout
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.DataFrame
-import org.apache.spark.sql.streaming.StreamingQueryListener._
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.SparkException
 import org.apache.spark.sql.execution.streaming._
-import org.apache.spark.util.Utils
+import org.apache.spark.sql.functions._
+import org.apache.spark.util.{ManualClock, Utils}
 
 
 class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
@@ -109,85 +110,139 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
     )
   }
 
-  testQuietly("query statuses") {
-    val inputData = MemoryStream[Int]
-    val mapped = inputData.toDS().map(6 / _)
-    testStream(mapped)(
-      AssertOnQuery(q => q.status.name === q.name),
-      AssertOnQuery(q => q.status.id === q.id),
-      AssertOnQuery(_.status.timestamp <= System.currentTimeMillis),
-      AssertOnQuery(_.status.inputRate === 0.0),
-      AssertOnQuery(_.status.processingRate === 0.0),
-      AssertOnQuery(_.status.sourceStatuses.length === 1),
-      AssertOnQuery(_.status.sourceStatuses(0).description.contains("Memory")),
-      AssertOnQuery(_.status.sourceStatuses(0).offsetDesc === "-"),
-      AssertOnQuery(_.status.sourceStatuses(0).inputRate === 0.0),
-      AssertOnQuery(_.status.sourceStatuses(0).processingRate === 0.0),
-      AssertOnQuery(_.status.sinkStatus.description.contains("Memory")),
-      AssertOnQuery(_.status.sinkStatus.offsetDesc === OffsetSeq(None :: Nil).toString),
-      AssertOnQuery(_.sourceStatuses(0).description.contains("Memory")),
-      AssertOnQuery(_.sourceStatuses(0).offsetDesc === "-"),
-      AssertOnQuery(_.sourceStatuses(0).inputRate === 0.0),
-      AssertOnQuery(_.sourceStatuses(0).processingRate === 0.0),
-      AssertOnQuery(_.sinkStatus.description.contains("Memory")),
-      AssertOnQuery(_.sinkStatus.offsetDesc === new OffsetSeq(None :: Nil).toString),
+  testQuietly("query statuses and progresses") {
+    import StreamingQuerySuite._
+    clock = new StreamManualClock
+
+    /** Custom MemoryStream that waits for manual clock to reach a time */
+    val inputData = new MemoryStream[Int](0, sqlContext) {
+      // Wait for manual clock to be 100 first time there is data
+      override def getOffset: Option[Offset] = {
+        val offset = super.getOffset
+        if (offset.nonEmpty) {
+          clock.waitTillTime(300)
+        }
+        offset
+      }
 
-      AddData(inputData, 1, 2),
-      CheckAnswer(6, 3),
-      AssertOnQuery(_.status.timestamp <= System.currentTimeMillis),
-      AssertOnQuery(_.status.inputRate >= 0.0),
-      AssertOnQuery(_.status.processingRate >= 0.0),
-      AssertOnQuery(_.status.sourceStatuses.length === 1),
-      AssertOnQuery(_.status.sourceStatuses(0).description.contains("Memory")),
-      AssertOnQuery(_.status.sourceStatuses(0).offsetDesc === LongOffset(0).json),
-      AssertOnQuery(_.status.sourceStatuses(0).inputRate >= 0.0),
-      AssertOnQuery(_.status.sourceStatuses(0).processingRate >= 0.0),
-      AssertOnQuery(_.status.sinkStatus.description.contains("Memory")),
-      AssertOnQuery(_.status.sinkStatus.offsetDesc ===
-        OffsetSeq.fill(LongOffset(0)).toString),
-      AssertOnQuery(_.sourceStatuses(0).offsetDesc === LongOffset(0).json),
-      AssertOnQuery(_.sourceStatuses(0).inputRate >= 0.0),
-      AssertOnQuery(_.sourceStatuses(0).processingRate >= 0.0),
-      AssertOnQuery(_.sinkStatus.offsetDesc === OffsetSeq.fill(LongOffset(0)).toString),
+      // Wait for manual clock to be 300 first time there is data
+      override def getBatch(start: Option[Offset], end: Offset): DataFrame = {
+        clock.waitTillTime(600)
+        super.getBatch(start, end)
+      }
+    }
 
-      AddData(inputData, 1, 2),
-      CheckAnswer(6, 3, 6, 3),
-      AssertOnQuery(_.status.sourceStatuses(0).offsetDesc === LongOffset(1).json),
-      AssertOnQuery(_.status.sinkStatus.offsetDesc ===
-        OffsetSeq.fill(LongOffset(1)).toString),
-      AssertOnQuery(_.sourceStatuses(0).offsetDesc === LongOffset(1).json),
-      AssertOnQuery(_.sinkStatus.offsetDesc === OffsetSeq.fill(LongOffset(1)).toString),
+    // This is to make sure thatquery waits for manual clock to be 600 first time there is data
+    val mapped = inputData.toDS().agg(count("*")).as[Long].coalesce(1).map { x =>
+      clock.waitTillTime(1100)
+      x
+    }
 
-      StopStream,
-      AssertOnQuery(_.status.inputRate === 0.0),
-      AssertOnQuery(_.status.processingRate === 0.0),
-      AssertOnQuery(_.status.sourceStatuses.length === 1),
-      AssertOnQuery(_.status.sourceStatuses(0).offsetDesc === LongOffset(1).json),
-      AssertOnQuery(_.status.sourceStatuses(0).inputRate === 0.0),
-      AssertOnQuery(_.status.sourceStatuses(0).processingRate === 0.0),
-      AssertOnQuery(_.status.sinkStatus.offsetDesc ===
-        OffsetSeq.fill(LongOffset(1)).toString),
-      AssertOnQuery(_.sourceStatuses(0).offsetDesc === LongOffset(1).json),
-      AssertOnQuery(_.sourceStatuses(0).inputRate === 0.0),
-      AssertOnQuery(_.sourceStatuses(0).processingRate === 0.0),
-      AssertOnQuery(_.sinkStatus.offsetDesc === OffsetSeq.fill(LongOffset(1)).toString),
-      AssertOnQuery(_.status.triggerDetails.isEmpty),
+    case class AssertStreamExecThreadToWaitForClock()
+      extends AssertOnQuery(q => {
+        eventually(Timeout(streamingTimeout)) {
+          if (q.exception.isEmpty) {
+            assert(clock.asInstanceOf[StreamManualClock].isStreamWaitingAt(clock.getTimeMillis))
+          }
+        }
+        if (q.exception.isDefined) {
+          throw q.exception.get
+        }
+        true
+      }, "")
+
+    testStream(mapped, OutputMode.Complete)(
+      StartStream(ProcessingTime(100), triggerClock = clock),
+      AssertStreamExecThreadToWaitForClock(),
+      AssertOnQuery(_.status.isDataAvailable === false),
+      AssertOnQuery(_.status.isTriggerActive === false),
+      // TODO: test status.message before trigger has started
+      // AssertOnQuery(_.lastProgress === null)  // there is an empty trigger as soon as started
+      AssertOnQuery(_.recentProgresses.count(_.numInputRows > 0) === 0),
+
+      // Test status while offset is being fetched
+      AddData(inputData, 1, 2),
+      AdvanceManualClock(100), // time = 100 to start new trigger, will block on getOffset
+      AssertStreamExecThreadToWaitForClock(),
+      AssertOnQuery(_.status.isDataAvailable === false),
+      AssertOnQuery(_.status.isTriggerActive === true),
+      AssertOnQuery(_.status.message.toLowerCase.contains("getting offsets from")),
+      AssertOnQuery(_.recentProgresses.count(_.numInputRows > 0) === 0),
+
+      // Test status while batch is being fetched
+      AdvanceManualClock(200), // time = 300 to unblock getOffset, will block on getBatch
+      AssertStreamExecThreadToWaitForClock(),
+      AssertOnQuery(_.status.isDataAvailable === true),
+      AssertOnQuery(_.status.isTriggerActive === true),
+      AssertOnQuery(_.status.message === "Processing new data"),
+      AssertOnQuery(_.recentProgresses.count(_.numInputRows > 0) === 0),
+
+      // Test status while batch is being processed
+      AdvanceManualClock(300), // time = 600 to unblock getBatch, will block in Spark job
+      AssertOnQuery(_.status.isDataAvailable === true),
+      AssertOnQuery(_.status.isTriggerActive === true),
+      AssertOnQuery(_.status.message === "Processing new data"),
+      AssertOnQuery(_.recentProgresses.count(_.numInputRows > 0) === 0),
+
+      // Test status while batch processing has completed
+      AdvanceManualClock(500), // time = 1100 to unblock job
+      AssertOnQuery { _ => clock.getTimeMillis() === 1100 },
+      CheckAnswer(2),
+      AssertOnQuery(_.status.isDataAvailable === true),
+      AssertOnQuery(_.status.isTriggerActive === false),
+      AssertOnQuery(_.status.message === "Waiting for next trigger"),
+      AssertOnQuery { query =>
+        assert(query.lastProgress != null)
+        assert(query.recentProgresses.exists(_.numInputRows > 0))
+        assert(query.recentProgresses.last.eq(query.lastProgress))
+
+        val progress = query.lastProgress
+        assert(progress.id === query.id)
+        assert(progress.name === query.name)
+        assert(progress.batchId === 0)
+        assert(progress.timestamp === 100)
+        assert(progress.numInputRows === 2)
+        assert(progress.processedRowsPerSecond === 2.0)
+
+        assert(progress.durationMs.get("getOffset") === 200)
+        assert(progress.durationMs.get("getBatch") === 300)
+        assert(progress.durationMs.get("queryPlanning") === 0)
+        assert(progress.durationMs.get("walCommit") === 0)
+        assert(progress.durationMs.get("triggerExecution") === 1000)
+
+        assert(progress.sources.length === 1)
+        assert(progress.sources(0).description contains "MemoryStream")
+        assert(progress.sources(0).startOffset === null)
+        assert(progress.sources(0).endOffset !== null)
+        assert(progress.sources(0).processedRowsPerSecond === 2.0)
+
+        assert(progress.stateOperators.length === 1)
+        assert(progress.stateOperators(0).numRowsUpdated === 1)
+        assert(progress.stateOperators(0).numRowsTotal === 1)
+
+        assert(progress.sink.description contains "MemorySink")
+        true
+      },
 
-      StartStream(),
-      AddData(inputData, 0),
-      ExpectFailure[SparkException],
-      AssertOnQuery(_.status.inputRate === 0.0),
-      AssertOnQuery(_.status.processingRate === 0.0),
-      AssertOnQuery(_.status.sourceStatuses.length === 1),
-      AssertOnQuery(_.status.sourceStatuses(0).offsetDesc === LongOffset(2).json),
-      AssertOnQuery(_.status.sourceStatuses(0).inputRate === 0.0),
-      AssertOnQuery(_.status.sourceStatuses(0).processingRate === 0.0),
-      AssertOnQuery(_.status.sinkStatus.offsetDesc ===
-        OffsetSeq.fill(LongOffset(1)).toString),
-      AssertOnQuery(_.sourceStatuses(0).offsetDesc === LongOffset(2).json),
-      AssertOnQuery(_.sourceStatuses(0).inputRate === 0.0),
-      AssertOnQuery(_.sourceStatuses(0).processingRate === 0.0),
-      AssertOnQuery(_.sinkStatus.offsetDesc === OffsetSeq.fill(LongOffset(1)).toString)
+      AddData(inputData, 1, 2),
+      AdvanceManualClock(100), // allow another trigger
+      CheckAnswer(4),
+      AssertOnQuery(_.status.isDataAvailable === true),
+      AssertOnQuery(_.status.isTriggerActive === false),
+      AssertOnQuery(_.status.message === "Waiting for next trigger"),
+      AssertOnQuery { query =>
+        assert(query.recentProgresses.last.eq(query.lastProgress))
+        assert(query.lastProgress.batchId === 1)
+        assert(query.lastProgress.sources(0).inputRowsPerSecond === 1.818)
+        true
+      },
+
+      // Test status after data is not available for a trigger
+      AdvanceManualClock(100), // allow another trigger
+      AssertStreamExecThreadToWaitForClock(),
+      AssertOnQuery(_.status.isDataAvailable === false),
+      AssertOnQuery(_.status.isTriggerActive === false),
+      AssertOnQuery(_.status.message === "Waiting for next trigger")
     )
   }
 
@@ -196,7 +251,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
 
     /** Whether metrics of a query is registered for reporting */
     def isMetricsRegistered(query: StreamingQuery): Boolean = {
-      val sourceName = s"StructuredStreaming.${query.name}"
+      val sourceName = s"spark.streaming.${query.name}"
       val sources = spark.sparkContext.env.metricsSystem.getSourcesByName(sourceName)
       require(sources.size <= 1)
       sources.nonEmpty
@@ -229,23 +284,23 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
 
     // Trigger input has 10 rows, static input has 2 rows,
     // therefore after the first trigger, the calculated input rows should be 10
-    val status = getFirstTriggerStatus(streamingInputDF.join(staticInputDF, "value"))
-    assert(status.triggerDetails.get("numRows.input.total") === "10")
-    assert(status.sourceStatuses.size === 1)
-    assert(status.sourceStatuses(0).triggerDetails.get("numRows.input.source") === "10")
+    val progress = getFirstProgress(streamingInputDF.join(staticInputDF, "value"))
+    assert(progress.numInputRows === 10)
+    assert(progress.sources.size === 1)
+    assert(progress.sources(0).numInputRows === 10)
   }
 
-  test("input row calculation with trigger DF having multiple leaves") {
+  test("input row calculation with trigger input DF having multiple leaves") {
     val streamingTriggerDF =
       spark.createDataset(1 to 5).toDF.union(spark.createDataset(6 to 10).toDF)
     require(streamingTriggerDF.logicalPlan.collectLeaves().size > 1)
     val streamingInputDF = createSingleTriggerStreamingDF(streamingTriggerDF)
 
     // After the first trigger, the calculated input rows should be 10
-    val status = getFirstTriggerStatus(streamingInputDF)
-    assert(status.triggerDetails.get("numRows.input.total") === "10")
-    assert(status.sourceStatuses.size === 1)
-    assert(status.sourceStatuses(0).triggerDetails.get("numRows.input.source") === "10")
+    val progress = getFirstProgress(streamingInputDF)
+    assert(progress.numInputRows === 10)
+    assert(progress.sources.size === 1)
+    assert(progress.sources(0).numInputRows === 10)
   }
 
   testQuietly("StreamExecution metadata garbage collection") {
@@ -285,34 +340,14 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
     StreamingExecutionRelation(source)
   }
 
-  /** Returns the query status at the end of the first trigger of streaming DF */
-  private def getFirstTriggerStatus(streamingDF: DataFrame): StreamingQueryStatus = {
-    // A StreamingQueryListener that gets the query status after the first completed trigger
-    val listener = new StreamingQueryListener {
-      @volatile var firstStatus: StreamingQueryStatus = null
-      @volatile var queryStartedEvent = 0
-      override def onQueryStarted(queryStarted: QueryStartedEvent): Unit = {
-        queryStartedEvent += 1
-      }
-      override def onQueryProgress(queryProgress: QueryProgressEvent): Unit = {
-       if (firstStatus == null) firstStatus = queryProgress.queryStatus
-      }
-      override def onQueryTerminated(queryTerminated: QueryTerminatedEvent): Unit = { }
-    }
-
+  /** Returns the query progress at the end of the first trigger of streaming DF */
+  private def getFirstProgress(streamingDF: DataFrame): StreamingQueryProgress = {
     try {
-      spark.streams.addListener(listener)
       val q = streamingDF.writeStream.format("memory").queryName("test").start()
       q.processAllAvailable()
-      eventually(timeout(streamingTimeout)) {
-        assert(listener.firstStatus != null)
-        // test if QueryStartedEvent callback is called for only once
-        assert(listener.queryStartedEvent === 1)
-      }
-      listener.firstStatus
+      q.recentProgresses.head
     } finally {
       spark.streams.active.map(_.stop())
-      spark.streams.removeListener(listener)
     }
   }
 
@@ -369,3 +404,8 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
     }
   }
 }
+
+object StreamingQuerySuite {
+  // Singleton reference to clock that does not get serialized in task closures
+  var clock: ManualClock = null
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/WatermarkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/WatermarkSuite.scala
index 3e9488c7dc9af..12f3c3e5ff3d9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/WatermarkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/WatermarkSuite.scala
@@ -51,6 +51,7 @@ class WatermarkSuite extends StreamTest with BeforeAndAfter with Logging {
 
 
   test("watermark metric") {
+
     val inputData = MemoryStream[Int]
 
     val windowedAggregation = inputData.toDF()
@@ -62,16 +63,19 @@ class WatermarkSuite extends StreamTest with BeforeAndAfter with Logging {
 
     testStream(windowedAggregation)(
       AddData(inputData, 15),
-      AssertOnLastQueryStatus { status =>
-        status.triggerDetails.get(StreamMetrics.EVENT_TIME_WATERMARK) === "5000"
+      CheckAnswer(),
+      AssertOnQuery { query =>
+        query.lastProgress.currentWatermark === 5000
       },
       AddData(inputData, 15),
-      AssertOnLastQueryStatus { status =>
-        status.triggerDetails.get(StreamMetrics.EVENT_TIME_WATERMARK) === "5000"
+      CheckAnswer(),
+      AssertOnQuery { query =>
+        query.lastProgress.currentWatermark === 5000
       },
       AddData(inputData, 25),
-      AssertOnLastQueryStatus { status =>
-        status.triggerDetails.get(StreamMetrics.EVENT_TIME_WATERMARK) === "15000"
+      CheckAnswer(),
+      AssertOnQuery { query =>
+        query.lastProgress.currentWatermark === 15000
       }
     )
   }

From eb0b3631d0fe638e06cb497e1c8ad4cfa47dcc36 Mon Sep 17 00:00:00 2001
From: Yuhao <yuhao.yang@intel.com>
Date: Tue, 29 Nov 2016 18:46:59 -0800
Subject: [PATCH 0232/1204] [SPARK-18319][ML][QA2.1] 2.1 QA: API: Experimental,
 DeveloperApi, final, sealed audit

## What changes were proposed in this pull request?
make a pass through the items marked as Experimental or DeveloperApi and see if any are stable enough to be unmarked. Also check for items marked final or sealed to see if they are stable enough to be opened up as APIs.

Some discussions in the jira: https://issues.apache.org/jira/browse/SPARK-18319

## How was this patch tested?
existing ut

Author: Yuhao <yuhao.yang@intel.com>
Author: Yuhao Yang <hhbyyh@gmail.com>

Closes #15972 from hhbyyh/experimental21.

(cherry picked from commit 9b670bcaec9c220603ec10a6d186865dabf26a5b)
Signed-off-by: Joseph K. Bradley <joseph@databricks.com>
---
 .../MultilayerPerceptronClassifier.scala         |  6 +-----
 .../spark/ml/clustering/BisectingKMeans.scala    |  5 -----
 .../spark/ml/clustering/GaussianMixture.scala    |  5 -----
 .../org/apache/spark/ml/clustering/KMeans.scala  |  4 ----
 .../org/apache/spark/ml/clustering/LDA.scala     | 12 ++----------
 .../apache/spark/ml/feature/LabeledPoint.scala   |  4 +---
 .../apache/spark/ml/feature/MaxAbsScaler.scala   |  6 +-----
 .../org/apache/spark/ml/util/ReadWrite.scala     | 14 +-------------
 .../spark/mllib/clustering/LDAOptimizer.scala    |  2 +-
 python/pyspark/ml/classification.py              |  4 ----
 python/pyspark/ml/clustering.py                  | 16 ----------------
 python/pyspark/ml/feature.py                     |  4 ----
 python/pyspark/ml/util.py                        |  8 --------
 13 files changed, 7 insertions(+), 83 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
index 1b45eafbaca23..aaaf7df34576a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
@@ -21,7 +21,7 @@ import scala.collection.JavaConverters._
 
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml.{PredictionModel, Predictor, PredictorParams}
 import org.apache.spark.ml.ann.{FeedForwardTopology, FeedForwardTrainer}
 import org.apache.spark.ml.feature.LabeledPoint
@@ -135,7 +135,6 @@ private object LabelConverter {
 }
 
 /**
- * :: Experimental ::
  * Classifier trainer based on the Multilayer Perceptron.
  * Each layer has sigmoid activation function, output layer has softmax.
  * Number of inputs has to be equal to the size of feature vectors.
@@ -143,7 +142,6 @@ private object LabelConverter {
  *
  */
 @Since("1.5.0")
-@Experimental
 class MultilayerPerceptronClassifier @Since("1.5.0") (
     @Since("1.5.0") override val uid: String)
   extends Predictor[Vector, MultilayerPerceptronClassifier, MultilayerPerceptronClassificationModel]
@@ -282,7 +280,6 @@ object MultilayerPerceptronClassifier
 }
 
 /**
- * :: Experimental ::
  * Classification model based on the Multilayer Perceptron.
  * Each layer has sigmoid activation function, output layer has softmax.
  *
@@ -291,7 +288,6 @@ object MultilayerPerceptronClassifier
  * @param weights the weights of layers
  */
 @Since("1.5.0")
-@Experimental
 class MultilayerPerceptronClassificationModel private[ml] (
     @Since("1.5.0") override val uid: String,
     @Since("1.5.0") val layers: Array[Int],
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
index e58df6ba9108a..4c20e6563bad1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
@@ -80,13 +80,11 @@ private[clustering] trait BisectingKMeansParams extends Params
 }
 
 /**
- * :: Experimental ::
  * Model fitted by BisectingKMeans.
  *
  * @param parentModel a model trained by [[org.apache.spark.mllib.clustering.BisectingKMeans]].
  */
 @Since("2.0.0")
-@Experimental
 class BisectingKMeansModel private[ml] (
     @Since("2.0.0") override val uid: String,
     private val parentModel: MLlibBisectingKMeansModel
@@ -197,8 +195,6 @@ object BisectingKMeansModel extends MLReadable[BisectingKMeansModel] {
 }
 
 /**
- * :: Experimental ::
- *
  * A bisecting k-means algorithm based on the paper "A comparison of document clustering techniques"
  * by Steinbach, Karypis, and Kumar, with modification to fit Spark.
  * The algorithm starts from a single cluster that contains all points.
@@ -213,7 +209,6 @@ object BisectingKMeansModel extends MLReadable[BisectingKMeansModel] {
  * KDD Workshop on Text Mining, 2000.</a>
  */
 @Since("2.0.0")
-@Experimental
 class BisectingKMeans @Since("2.0.0") (
     @Since("2.0.0") override val uid: String)
   extends Estimator[BisectingKMeansModel] with BisectingKMeansParams with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
index c764c3aa32a4c..ac56845581ae9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
@@ -68,8 +68,6 @@ private[clustering] trait GaussianMixtureParams extends Params with HasMaxIter w
 }
 
 /**
- * :: Experimental ::
- *
  * Multivariate Gaussian Mixture Model (GMM) consisting of k Gaussians, where points
  * are drawn from each Gaussian i with probability weights(i).
  *
@@ -80,7 +78,6 @@ private[clustering] trait GaussianMixtureParams extends Params with HasMaxIter w
  *                  the Multivariate Gaussian (Normal) Distribution for Gaussian i
  */
 @Since("2.0.0")
-@Experimental
 class GaussianMixtureModel private[ml] (
     @Since("2.0.0") override val uid: String,
     @Since("2.0.0") val weights: Array[Double],
@@ -265,7 +262,6 @@ object GaussianMixtureModel extends MLReadable[GaussianMixtureModel] {
 }
 
 /**
- * :: Experimental ::
  * Gaussian Mixture clustering.
  *
  * This class performs expectation maximization for multivariate Gaussian
@@ -284,7 +280,6 @@ object GaussianMixtureModel extends MLReadable[GaussianMixtureModel] {
  * on statistical/theoretical arguments) and (b) numerical issues with Gaussian distributions.
  */
 @Since("2.0.0")
-@Experimental
 class GaussianMixture @Since("2.0.0") (
     @Since("2.0.0") override val uid: String)
   extends Estimator[GaussianMixtureModel] with GaussianMixtureParams with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
index 6e124eb6ddca0..af8f35374a1f8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
@@ -95,13 +95,11 @@ private[clustering] trait KMeansParams extends Params with HasMaxIter with HasFe
 }
 
 /**
- * :: Experimental ::
  * Model fitted by KMeans.
  *
  * @param parentModel a model trained by spark.mllib.clustering.KMeans.
  */
 @Since("1.5.0")
-@Experimental
 class KMeansModel private[ml] (
     @Since("1.5.0") override val uid: String,
     private val parentModel: MLlibKMeansModel)
@@ -247,13 +245,11 @@ object KMeansModel extends MLReadable[KMeansModel] {
 }
 
 /**
- * :: Experimental ::
  * K-means clustering with support for k-means|| initialization proposed by Bahmani et al.
  *
  * @see <a href="http://dx.doi.org/10.14778/2180912.2180915">Bahmani et al., Scalable k-means++.</a>
  */
 @Since("1.5.0")
-@Experimental
 class KMeans @Since("1.5.0") (
     @Since("1.5.0") override val uid: String)
   extends Estimator[KMeansModel] with KMeansParams with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
index cd403d842b694..583e5e0928eba 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
@@ -22,7 +22,7 @@ import org.json4s.DefaultFormats
 import org.json4s.JsonAST.JObject
 import org.json4s.jackson.JsonMethods._
 
-import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.internal.Logging
 import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.linalg.{Matrix, Vector, Vectors, VectorUDT}
@@ -396,15 +396,13 @@ private object LDAParams {
 
 
 /**
- * :: Experimental ::
  * Model fitted by [[LDA]].
  *
  * @param vocabSize  Vocabulary size (number of terms or words in the vocabulary)
  * @param sparkSession  Used to construct local DataFrames for returning query results
  */
 @Since("1.6.0")
-@Experimental
-sealed abstract class LDAModel private[ml] (
+abstract class LDAModel private[ml] (
     @Since("1.6.0") override val uid: String,
     @Since("1.6.0") val vocabSize: Int,
     @Since("1.6.0") @transient private[ml] val sparkSession: SparkSession)
@@ -556,14 +554,12 @@ sealed abstract class LDAModel private[ml] (
 
 
 /**
- * :: Experimental ::
  *
  * Local (non-distributed) model fitted by [[LDA]].
  *
  * This model stores the inferred topics only; it does not store info about the training dataset.
  */
 @Since("1.6.0")
-@Experimental
 class LocalLDAModel private[ml] (
     uid: String,
     vocabSize: Int,
@@ -641,7 +637,6 @@ object LocalLDAModel extends MLReadable[LocalLDAModel] {
 
 
 /**
- * :: Experimental ::
  *
  * Distributed model fitted by [[LDA]].
  * This type of model is currently only produced by Expectation-Maximization (EM).
@@ -653,7 +648,6 @@ object LocalLDAModel extends MLReadable[LocalLDAModel] {
  *                             `copy()` cheap.
  */
 @Since("1.6.0")
-@Experimental
 class DistributedLDAModel private[ml] (
     uid: String,
     vocabSize: Int,
@@ -789,7 +783,6 @@ object DistributedLDAModel extends MLReadable[DistributedLDAModel] {
 
 
 /**
- * :: Experimental ::
  *
  * Latent Dirichlet Allocation (LDA), a topic model designed for text documents.
  *
@@ -813,7 +806,6 @@ object DistributedLDAModel extends MLReadable[DistributedLDAModel] {
  * Latent Dirichlet allocation (Wikipedia)</a>
  */
 @Since("1.6.0")
-@Experimental
 class LDA @Since("1.6.0") (
     @Since("1.6.0") override val uid: String)
   extends Estimator[LDAModel] with LDAParams with DefaultParamsWritable {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/LabeledPoint.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/LabeledPoint.scala
index 7d8e4adcc2259..c5d0ec1a8d350 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/LabeledPoint.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/LabeledPoint.scala
@@ -19,11 +19,10 @@ package org.apache.spark.ml.feature
 
 import scala.beans.BeanInfo
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml.linalg.Vector
 
 /**
- * :: Experimental ::
  *
  * Class that represents the features and label of a data point.
  *
@@ -31,7 +30,6 @@ import org.apache.spark.ml.linalg.Vector
  * @param features List of features for this data point.
  */
 @Since("2.0.0")
-@Experimental
 @BeanInfo
 case class LabeledPoint(@Since("2.0.0") label: Double, @Since("2.0.0") features: Vector) {
   override def toString: String = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala
index acabf0b892660..85f9732f79f67 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.feature
 
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.annotation.{Experimental, Since}
+import org.apache.spark.annotation.Since
 import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT}
 import org.apache.spark.ml.param.{ParamMap, Params}
@@ -48,12 +48,10 @@ private[feature] trait MaxAbsScalerParams extends Params with HasInputCol with H
 }
 
 /**
- * :: Experimental ::
  * Rescale each feature individually to range [-1, 1] by dividing through the largest maximum
  * absolute value in each feature. It does not shift/center the data, and thus does not destroy
  * any sparsity.
  */
-@Experimental
 @Since("2.0.0")
 class MaxAbsScaler @Since("2.0.0") (@Since("2.0.0") override val uid: String)
   extends Estimator[MaxAbsScalerModel] with MaxAbsScalerParams with DefaultParamsWritable {
@@ -101,11 +99,9 @@ object MaxAbsScaler extends DefaultParamsReadable[MaxAbsScaler] {
 }
 
 /**
- * :: Experimental ::
  * Model fitted by [[MaxAbsScaler]].
  *
  */
-@Experimental
 @Since("2.0.0")
 class MaxAbsScalerModel private[ml] (
     @Since("2.0.0") override val uid: String,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
index 95f480455ee45..c0e3801499818 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
@@ -26,7 +26,7 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.internal.Logging
 import org.apache.spark.ml._
 import org.apache.spark.ml.classification.{OneVsRest, OneVsRestModel}
@@ -81,11 +81,8 @@ private[util] sealed trait BaseReadWrite {
 }
 
 /**
- * :: Experimental ::
- *
  * Abstract class for utility classes that can save ML instances.
  */
-@Experimental
 @Since("1.6.0")
 abstract class MLWriter extends BaseReadWrite with Logging {
 
@@ -138,11 +135,8 @@ abstract class MLWriter extends BaseReadWrite with Logging {
 }
 
 /**
- * :: Experimental ::
- *
  * Trait for classes that provide [[MLWriter]].
  */
-@Experimental
 @Since("1.6.0")
 trait MLWritable {
 
@@ -178,13 +172,10 @@ trait DefaultParamsWritable extends MLWritable { self: Params =>
 }
 
 /**
- * :: Experimental ::
- *
  * Abstract class for utility classes that can load ML instances.
  *
  * @tparam T ML instance type
  */
-@Experimental
 @Since("1.6.0")
 abstract class MLReader[T] extends BaseReadWrite {
 
@@ -202,13 +193,10 @@ abstract class MLReader[T] extends BaseReadWrite {
 }
 
 /**
- * :: Experimental ::
- *
  * Trait for objects that provide [[MLReader]].
  *
  * @tparam T ML instance type
  */
-@Experimental
 @Since("1.6.0")
 trait MLReadable[T] {
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
index 96b49bcc0aac1..48bae4276c480 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
@@ -38,7 +38,7 @@ import org.apache.spark.storage.StorageLevel
  */
 @Since("1.4.0")
 @DeveloperApi
-sealed trait LDAOptimizer {
+trait LDAOptimizer {
 
   /*
     DEVELOPERS NOTE:
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 8054a34db30f2..5fe4bab186bd2 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -1138,8 +1138,6 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol,
                                      HasMaxIter, HasTol, HasSeed, HasStepSize, JavaMLWritable,
                                      JavaMLReadable):
     """
-    .. note:: Experimental
-
     Classifier trainer based on the Multilayer Perceptron.
     Each layer has sigmoid activation function, output layer has softmax.
     Number of inputs has to be equal to the size of feature vectors.
@@ -1311,8 +1309,6 @@ def getInitialWeights(self):
 class MultilayerPerceptronClassificationModel(JavaModel, JavaPredictionModel, JavaMLWritable,
                                               JavaMLReadable):
     """
-    .. note:: Experimental
-
     Model fitted by MultilayerPerceptronClassifier.
 
     .. versionadded:: 1.6.0
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index b29b5ac70e6fe..7f8d845564768 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -87,8 +87,6 @@ def clusterSizes(self):
 
 class GaussianMixtureModel(JavaModel, JavaMLWritable, JavaMLReadable):
     """
-    .. note:: Experimental
-
     Model fitted by GaussianMixture.
 
     .. versionadded:: 2.0.0
@@ -141,8 +139,6 @@ def summary(self):
 class GaussianMixture(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIter, HasTol, HasSeed,
                       HasProbabilityCol, JavaMLWritable, JavaMLReadable):
     """
-    .. note:: Experimental
-
     GaussianMixture clustering.
     This class performs expectation maximization for multivariate Gaussian
     Mixture Models (GMMs).  A GMM represents a composite distribution of
@@ -441,8 +437,6 @@ def getInitSteps(self):
 
 class BisectingKMeansModel(JavaModel, JavaMLWritable, JavaMLReadable):
     """
-    .. note:: Experimental
-
     Model fitted by BisectingKMeans.
 
     .. versionadded:: 2.0.0
@@ -487,8 +481,6 @@ def summary(self):
 class BisectingKMeans(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIter, HasSeed,
                       JavaMLWritable, JavaMLReadable):
     """
-    .. note:: Experimental
-
     A bisecting k-means algorithm based on the paper "A comparison of document clustering
     techniques" by Steinbach, Karypis, and Kumar, with modification to fit Spark.
     The algorithm starts from a single cluster that contains all points.
@@ -619,8 +611,6 @@ class BisectingKMeansSummary(ClusteringSummary):
 @inherit_doc
 class LDAModel(JavaModel):
     """
-    .. note:: Experimental
-
     Latent Dirichlet Allocation (LDA) model.
     This abstraction permits for different underlying representations,
     including local and distributed data structures.
@@ -697,8 +687,6 @@ def estimatedDocConcentration(self):
 @inherit_doc
 class DistributedLDAModel(LDAModel, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Distributed model fitted by :py:class:`LDA`.
     This type of model is currently only produced by Expectation-Maximization (EM).
 
@@ -761,8 +749,6 @@ def getCheckpointFiles(self):
 @inherit_doc
 class LocalLDAModel(LDAModel, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Local (non-distributed) model fitted by :py:class:`LDA`.
     This model stores the inferred topics only; it does not store info about the training dataset.
 
@@ -775,8 +761,6 @@ class LocalLDAModel(LDAModel, JavaMLReadable, JavaMLWritable):
 class LDA(JavaEstimator, HasFeaturesCol, HasMaxIter, HasSeed, HasCheckpointInterval,
           JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Latent Dirichlet Allocation (LDA), a topic model designed for text documents.
 
     Terminology:
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 40b63d4d31d4b..aada38d1ad2ec 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -654,8 +654,6 @@ def idf(self):
 @inherit_doc
 class MaxAbsScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Rescale each feature individually to range [-1, 1] by dividing through the largest maximum
     absolute value in each feature. It does not shift/center the data, and thus does not destroy
     any sparsity.
@@ -715,8 +713,6 @@ def _create_model(self, java_model):
 
 class MaxAbsScalerModel(JavaModel, JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Model fitted by :py:class:`MaxAbsScaler`.
 
     .. versionadded:: 2.0.0
diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py
index bec4b28952102..c65b3d14be1df 100644
--- a/python/pyspark/ml/util.py
+++ b/python/pyspark/ml/util.py
@@ -62,8 +62,6 @@ def _randomUID(cls):
 @inherit_doc
 class MLWriter(object):
     """
-    .. note:: Experimental
-
     Utility class that can save ML instances.
 
     .. versionadded:: 2.0.0
@@ -129,8 +127,6 @@ def session(self, sparkSession):
 @inherit_doc
 class MLWritable(object):
     """
-    .. note:: Experimental
-
     Mixin for ML instances that provide :py:class:`MLWriter`.
 
     .. versionadded:: 2.0.0
@@ -159,8 +155,6 @@ def write(self):
 @inherit_doc
 class MLReader(object):
     """
-    .. note:: Experimental
-
     Utility class that can load ML instances.
 
     .. versionadded:: 2.0.0
@@ -242,8 +236,6 @@ def _load_java_obj(cls, clazz):
 @inherit_doc
 class MLReadable(object):
     """
-    .. note:: Experimental
-
     Mixin for instances that provide :py:class:`MLReader`.
 
     .. versionadded:: 2.0.0

From 55b1142bdbdcb9005e384a99ff5dffd3ae24216b Mon Sep 17 00:00:00 2001
From: Eric Liang <ekl@databricks.com>
Date: Tue, 29 Nov 2016 20:06:39 -0800
Subject: [PATCH 0233/1204] [SPARK-18145] Update documentation for hive
 partition management in 2.1

## What changes were proposed in this pull request?

This documents the partition handling changes for Spark 2.1 and how to migrate existing tables.

## How was this patch tested?

Built docs locally.

rxin

Author: Eric Liang <ekl@databricks.com>

Closes #16074 from ericl/spark-18145.

(cherry picked from commit 489845f3a0e2a3555b96b6f3dbb984c783b20d97)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 docs/sql-programming-guide.md | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 3093d48282919..51ba91130e91f 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -1320,6 +1320,15 @@ options.
 
 # Migration Guide
 
+## Upgrading From Spark SQL 2.0 to 2.1
+
+ - Datasource tables now store partition metadata in the Hive metastore. This means that Hive DDLs such as `ALTER TABLE PARTITION ... SET LOCATION` are now available for tables created with the Datasource API.
+    - Legacy datasource tables can be migrated to this format via the `MSCK REPAIR TABLE` command. Migrating legacy tables is recommended to take advantage of Hive DDL support and improved planning performance.
+    - To determine if a table has been migrated, look for the `PartitionProvider: Catalog` attribute when issuing `DESCRIBE FORMATTED` on the table.
+ - Changes to `INSERT OVERWRITE TABLE ... PARTITION ...` behavior for Datasource tables.
+    - In prior Spark versions `INSERT OVERWRITE` overwrote the entire Datasource table, even when given a partition specification. Now only partitions matching the specification are overwritten.
+    - Note that this still differs from the behavior of Hive tables, which is to overwrite only partitions overlapping with newly inserted data.
+
 ## Upgrading From Spark SQL 1.6 to 2.0
 
  - `SparkSession` is now the new entry point of Spark that replaces the old `SQLContext` and

From b95aad7cad99a62851fe5e61692fda9bceb4b160 Mon Sep 17 00:00:00 2001
From: Jeff Zhang <zjffdu@apache.org>
Date: Tue, 29 Nov 2016 20:51:27 -0800
Subject: [PATCH 0234/1204] [SPARK-15819][PYSPARK][ML] Add KMeanSummary in
 KMeans of PySpark

## What changes were proposed in this pull request?

Add python api for KMeansSummary
## How was this patch tested?

unit test added

Author: Jeff Zhang <zjffdu@apache.org>

Closes #13557 from zjffdu/SPARK-15819.

(cherry picked from commit 4c82ca86d979e5526a15666683eef3c79c37dc68)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 python/pyspark/ml/clustering.py | 41 +++++++++++++++++++++++++++++++++
 python/pyspark/ml/tests.py      | 15 ++++++++++++
 2 files changed, 56 insertions(+)

diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index 7f8d845564768..35d0aefa04a8e 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -292,6 +292,17 @@ def probability(self):
         return self._call_java("probability")
 
 
+class KMeansSummary(ClusteringSummary):
+    """
+    .. note:: Experimental
+
+    Summary of KMeans.
+
+    .. versionadded:: 2.1.0
+    """
+    pass
+
+
 class KMeansModel(JavaModel, JavaMLWritable, JavaMLReadable):
     """
     Model fitted by KMeans.
@@ -312,6 +323,27 @@ def computeCost(self, dataset):
         """
         return self._call_java("computeCost", dataset)
 
+    @property
+    @since("2.1.0")
+    def hasSummary(self):
+        """
+        Indicates whether a training summary exists for this model instance.
+        """
+        return self._call_java("hasSummary")
+
+    @property
+    @since("2.1.0")
+    def summary(self):
+        """
+        Gets summary (e.g. cluster assignments, cluster sizes) of the model trained on the
+        training set. An exception is thrown if no summary exists.
+        """
+        if self.hasSummary:
+            return KMeansSummary(self._call_java("summary"))
+        else:
+            raise RuntimeError("No training summary available for this %s" %
+                               self.__class__.__name__)
+
 
 @inherit_doc
 class KMeans(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIter, HasTol, HasSeed,
@@ -337,6 +369,13 @@ class KMeans(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIter, HasTol
     True
     >>> rows[2].prediction == rows[3].prediction
     True
+    >>> model.hasSummary
+    True
+    >>> summary = model.summary
+    >>> summary.k
+    2
+    >>> summary.clusterSizes
+    [2, 2]
     >>> kmeans_path = temp_path + "/kmeans"
     >>> kmeans.save(kmeans_path)
     >>> kmeans2 = KMeans.load(kmeans_path)
@@ -345,6 +384,8 @@ class KMeans(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIter, HasTol
     >>> model_path = temp_path + "/kmeans_model"
     >>> model.save(model_path)
     >>> model2 = KMeansModel.load(model_path)
+    >>> model2.hasSummary
+    False
     >>> model.clusterCenters()[0] == model2.clusterCenters()[0]
     array([ True,  True], dtype=bool)
     >>> model.clusterCenters()[1] == model2.clusterCenters()[1]
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index c0f0d4073564e..a0c288a0b71a2 100755
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -1129,6 +1129,21 @@ def test_bisecting_kmeans_summary(self):
         self.assertEqual(len(s.clusterSizes), 2)
         self.assertEqual(s.k, 2)
 
+    def test_kmeans_summary(self):
+        data = [(Vectors.dense([0.0, 0.0]),), (Vectors.dense([1.0, 1.0]),),
+                (Vectors.dense([9.0, 8.0]),), (Vectors.dense([8.0, 9.0]),)]
+        df = self.spark.createDataFrame(data, ["features"])
+        kmeans = KMeans(k=2, seed=1)
+        model = kmeans.fit(df)
+        self.assertTrue(model.hasSummary)
+        s = model.summary
+        self.assertTrue(isinstance(s.predictions, DataFrame))
+        self.assertEqual(s.featuresCol, "features")
+        self.assertEqual(s.predictionCol, "prediction")
+        self.assertTrue(isinstance(s.cluster, DataFrame))
+        self.assertEqual(len(s.clusterSizes), 2)
+        self.assertEqual(s.k, 2)
+
 
 class OneVsRestTests(SparkSessionTestCase):
 

From e780733b4d2ef40b1adbfcb172960987d2df758b Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Tue, 29 Nov 2016 23:08:56 -0800
Subject: [PATCH 0235/1204] [SPARK-18516][STRUCTURED STREAMING] Follow up PR to
 add StreamingQuery.status to Python

## What changes were proposed in this pull request?
- Add StreamingQueryStatus.json
- Make it not case class (to avoid unnecessarily exposing implicit object StreamingQueryStatus, consistent with StreamingQueryProgress)
- Add StreamingQuery.status to Python
- Fix post-termination status

## How was this patch tested?
New unit tests

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #16075 from tdas/SPARK-18516-1.

(cherry picked from commit bc09a2b8c3b03a207a6e20627f2c5ec23c1efe8c)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 python/pyspark/sql/streaming.py               |  8 +++
 python/pyspark/sql/tests.py                   |  5 ++
 .../streaming/ProgressReporter.scala          |  5 +-
 .../execution/streaming/StreamExecution.scala |  4 ++
 .../sql/streaming/StreamingQueryStatus.scala  | 38 ++++++++++++--
 .../apache/spark/sql/streaming/progress.scala |  9 ++--
 .../StreamingQueryListenerSuite.scala         | 29 +++--------
 ...treamingQueryStatusAndProgressSuite.scala} | 34 ++++++++++---
 .../sql/streaming/StreamingQuerySuite.scala   | 49 +++++++++++++------
 9 files changed, 127 insertions(+), 54 deletions(-)
 rename sql/core/src/test/scala/org/apache/spark/sql/streaming/{StreamingQueryProgressSuite.scala => StreamingQueryStatusAndProgressSuite.scala} (75%)

diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index c420b0d016091..84f01d3d9ac0b 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -88,6 +88,14 @@ def awaitTermination(self, timeout=None):
         else:
             return self._jsq.awaitTermination()
 
+    @property
+    @since(2.1)
+    def status(self):
+        """
+        Returns the current status of the query.
+        """
+        return json.loads(self._jsq.status().json())
+
     @property
     @since(2.1)
     def recentProgresses(self):
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 7151f95216e03..b7b2a5923c07f 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -1097,9 +1097,14 @@ def test_stream_status_and_progress(self):
             q.processAllAvailable()
             lastProgress = q.lastProgress
             recentProgresses = q.recentProgresses
+            status = q.status
             self.assertEqual(lastProgress['name'], q.name)
             self.assertEqual(lastProgress['id'], q.id)
             self.assertTrue(any(p == lastProgress for p in recentProgresses))
+            self.assertTrue(
+                "message" in status and
+                "isDataAvailable" in status and
+                "isTriggerActive" in status)
         finally:
             q.stop()
             shutil.rmtree(tmpPath)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
index b7b6e1988eef5..ba77e7c7bf2b3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -70,11 +70,12 @@ trait ProgressReporter extends Logging {
   private val progressBuffer = new mutable.Queue[StreamingQueryProgress]()
 
   @volatile
-  protected var currentStatus: StreamingQueryStatus =
-    StreamingQueryStatus(
+  protected var currentStatus: StreamingQueryStatus = {
+    new StreamingQueryStatus(
       message = "Initializing StreamExecution",
       isDataAvailable = false,
       isTriggerActive = false)
+  }
 
   /** Returns the current status of the query. */
   def status: StreamingQueryStatus = currentStatus
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index e4f31af35fdf4..6d0e269d341ee 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -238,8 +238,10 @@ class StreamExecution(
         updateStatusMessage("Waiting for next trigger")
         isTerminated
       })
+      updateStatusMessage("Stopped")
     } catch {
       case _: InterruptedException if state == TERMINATED => // interrupted by stop()
+        updateStatusMessage("Stopped")
       case e: Throwable =>
         streamDeathCause = new StreamingQueryException(
           this,
@@ -247,6 +249,7 @@ class StreamExecution(
           e,
           Some(committedOffsets.toOffsetSeq(sources, streamExecutionMetadata.json)))
         logError(s"Query $name terminated with error", e)
+        updateStatusMessage(s"Terminated with exception: ${e.getMessage}")
         // Rethrow the fatal errors to allow the user using `Thread.UncaughtExceptionHandler` to
         // handle them
         if (!NonFatal(e)) {
@@ -254,6 +257,7 @@ class StreamExecution(
         }
     } finally {
       state = TERMINATED
+      currentStatus = status.copy(isTriggerActive = false, isDataAvailable = false)
 
       // Update metrics and status
       sparkSession.sparkContext.env.metricsSystem.removeSource(streamMetrics)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
index 4c1a7ce6a03fb..44befa0d2ff76 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
@@ -17,6 +17,11 @@
 
 package org.apache.spark.sql.streaming
 
+import org.json4s._
+import org.json4s.JsonAST.JValue
+import org.json4s.JsonDSL._
+import org.json4s.jackson.JsonMethods._
+
 /**
  * Reports information about the instantaneous status of a streaming query.
  *
@@ -27,7 +32,32 @@ package org.apache.spark.sql.streaming
  *
  * @since 2.1.0
  */
-case class StreamingQueryStatus protected[sql](
-    message: String,
-    isDataAvailable: Boolean,
-    isTriggerActive: Boolean)
+class StreamingQueryStatus protected[sql](
+    val message: String,
+    val isDataAvailable: Boolean,
+    val isTriggerActive: Boolean) {
+
+  /** The compact JSON representation of this status. */
+  def json: String = compact(render(jsonValue))
+
+  /** The pretty (i.e. indented) JSON representation of this status. */
+  def prettyJson: String = pretty(render(jsonValue))
+
+  override def toString: String = prettyJson
+
+  private[sql] def copy(
+      message: String = this.message,
+      isDataAvailable: Boolean = this.isDataAvailable,
+      isTriggerActive: Boolean = this.isTriggerActive): StreamingQueryStatus = {
+    new StreamingQueryStatus(
+      message = message,
+      isDataAvailable = isDataAvailable,
+      isTriggerActive = isTriggerActive)
+  }
+
+  private[sql] def jsonValue: JValue = {
+    ("message" -> JString(message.toString)) ~
+    ("isDataAvailable" -> JBool(isDataAvailable)) ~
+    ("isTriggerActive" -> JBool(isTriggerActive))
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
index 7129fa4d15ef8..4c8247458fcfe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
@@ -23,7 +23,6 @@ import java.util.UUID
 import scala.collection.JavaConverters._
 import scala.util.control.NonFatal
 
-import org.apache.jute.compiler.JLong
 import org.json4s._
 import org.json4s.JsonAST.JValue
 import org.json4s.JsonDSL._
@@ -85,10 +84,10 @@ class StreamingQueryProgress private[sql](
   /** The aggregate (across all sources) rate at which Spark is processing data. */
   def processedRowsPerSecond: Double = sources.map(_.processedRowsPerSecond).sum
 
-  /** The compact JSON representation of this status. */
+  /** The compact JSON representation of this progress. */
   def json: String = compact(render(jsonValue))
 
-  /** The pretty (i.e. indented) JSON representation of this status. */
+  /** The pretty (i.e. indented) JSON representation of this progress. */
   def prettyJson: String = pretty(render(jsonValue))
 
   override def toString: String = prettyJson
@@ -179,10 +178,10 @@ class SourceProgress protected[sql](
 class SinkProgress protected[sql](
     val description: String) {
 
-  /** The compact JSON representation of this status. */
+  /** The compact JSON representation of this progress. */
   def json: String = compact(render(jsonValue))
 
-  /** The pretty (i.e. indented) JSON representation of this status. */
+  /** The pretty (i.e. indented) JSON representation of this progress. */
   def prettyJson: String = pretty(render(jsonValue))
 
   override def toString: String = prettyJson
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
index c68f953b10136..08b93e7d0b498 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
@@ -106,6 +106,11 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
           assert(listener.terminationEvent !== null)
           assert(listener.terminationEvent.id === query.id)
           assert(listener.terminationEvent.exception.nonEmpty)
+          // Make sure that the exception message reported through listener
+          // contains the actual exception and relevant stack trace
+          assert(!listener.terminationEvent.exception.get.contains("StreamingQueryException"))
+          assert(listener.terminationEvent.exception.get.contains("java.lang.ArithmeticException"))
+          assert(listener.terminationEvent.exception.get.contains("StreamingQueryListenerSuite"))
           listener.checkAsyncErrors()
           true
         }
@@ -159,28 +164,6 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
     }
   }
 
-  testQuietly("exception should be reported in QueryTerminated") {
-    val listener = new EventCollector
-    withListenerAdded(listener) {
-      val input = MemoryStream[Int]
-      testStream(input.toDS.map(_ / 0))(
-        StartStream(),
-        AddData(input, 1),
-        ExpectFailure[SparkException](),
-        Assert {
-          spark.sparkContext.listenerBus.waitUntilEmpty(10000)
-          assert(listener.terminationEvent !== null)
-          assert(listener.terminationEvent.exception.nonEmpty)
-          // Make sure that the exception message reported through listener
-          // contains the actual exception and relevant stack trace
-          assert(!listener.terminationEvent.exception.get.contains("StreamingQueryException"))
-          assert(listener.terminationEvent.exception.get.contains("java.lang.ArithmeticException"))
-          assert(listener.terminationEvent.exception.get.contains("StreamingQueryListenerSuite"))
-        }
-      )
-    }
-  }
-
   test("QueryStartedEvent serialization") {
     val queryStarted = new StreamingQueryListener.QueryStartedEvent(UUID.randomUUID(), "name")
     val json = JsonProtocol.sparkEventToJson(queryStarted)
@@ -190,7 +173,7 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
 
   test("QueryProgressEvent serialization") {
     val event = new StreamingQueryListener.QueryProgressEvent(
-      StreamingQueryProgressSuite.testProgress)
+      StreamingQueryStatusAndProgressSuite.testProgress)
     val json = JsonProtocol.sparkEventToJson(event)
     val newEvent = JsonProtocol.sparkEventFromJson(json)
       .asInstanceOf[StreamingQueryListener.QueryProgressEvent]
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryProgressSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
similarity index 75%
rename from sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryProgressSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
index 45d29f6b35b92..4da712fa0f7e0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryProgressSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
@@ -25,12 +25,12 @@ import org.json4s._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.streaming.StreamingQueryProgressSuite._
+import org.apache.spark.sql.streaming.StreamingQueryStatusAndProgressSuite._
 
 
-class StreamingQueryProgressSuite extends SparkFunSuite {
+class StreamingQueryStatusAndProgressSuite extends SparkFunSuite {
 
-  test("prettyJson") {
+  test("StreamingQueryProgress - prettyJson") {
     val json = testProgress.prettyJson
     assert(json ===
       s"""
@@ -64,16 +64,36 @@ class StreamingQueryProgressSuite extends SparkFunSuite {
 
   }
 
-  test("json") {
+  test("StreamingQueryProgress - json") {
     assert(compact(parse(testProgress.json)) === testProgress.json)
   }
 
-  test("toString") {
+  test("StreamingQueryProgress - toString") {
     assert(testProgress.toString === testProgress.prettyJson)
   }
+
+  test("StreamingQueryStatus - prettyJson") {
+    val json = testStatus.prettyJson
+    assert(json ===
+      """
+        |{
+        |  "message" : "active",
+        |  "isDataAvailable" : true,
+        |  "isTriggerActive" : false
+        |}
+      """.stripMargin.trim)
+  }
+
+  test("StreamingQueryStatus - json") {
+    assert(compact(parse(testStatus.json)) === testStatus.json)
+  }
+
+  test("StreamingQueryStatus - toString") {
+    assert(testStatus.toString === testStatus.prettyJson)
+  }
 }
 
-object StreamingQueryProgressSuite {
+object StreamingQueryStatusAndProgressSuite {
   val testProgress = new StreamingQueryProgress(
     id = UUID.randomUUID(),
     name = "name",
@@ -94,5 +114,7 @@ object StreamingQueryProgressSuite {
     ),
     sink = new SinkProgress("sink")
   )
+
+  val testStatus = new StreamingQueryStatus("active", true, false)
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index 4f3b4a2d7552b..56abe1201c0cc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -77,7 +77,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
     q2.stop()
   }
 
-  testQuietly("lifecycle states and awaitTermination") {
+  testQuietly("isActive, exception, and awaitTermination") {
     val inputData = MemoryStream[Int]
     val mapped = inputData.toDS().map { 6 / _}
 
@@ -110,7 +110,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
     )
   }
 
-  testQuietly("query statuses and progresses") {
+  testQuietly("status, lastProgress, and recentProgresses") {
     import StreamingQuerySuite._
     clock = new StreamManualClock
 
@@ -133,10 +133,10 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
     }
 
     // This is to make sure thatquery waits for manual clock to be 600 first time there is data
-    val mapped = inputData.toDS().agg(count("*")).as[Long].coalesce(1).map { x =>
+    val mapped = inputData.toDS().as[Long].map { x =>
       clock.waitTillTime(1100)
-      x
-    }
+      10 / x
+    }.agg(count("*")).as[Long]
 
     case class AssertStreamExecThreadToWaitForClock()
       extends AssertOnQuery(q => {
@@ -151,25 +151,26 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
         true
       }, "")
 
+    var lastProgressBeforeStop: StreamingQueryProgress = null
+
     testStream(mapped, OutputMode.Complete)(
       StartStream(ProcessingTime(100), triggerClock = clock),
       AssertStreamExecThreadToWaitForClock(),
       AssertOnQuery(_.status.isDataAvailable === false),
       AssertOnQuery(_.status.isTriggerActive === false),
-      // TODO: test status.message before trigger has started
-      // AssertOnQuery(_.lastProgress === null)  // there is an empty trigger as soon as started
+      AssertOnQuery(_.status.message === "Waiting for next trigger"),
       AssertOnQuery(_.recentProgresses.count(_.numInputRows > 0) === 0),
 
-      // Test status while offset is being fetched
+      // Test status and progress while offset is being fetched
       AddData(inputData, 1, 2),
       AdvanceManualClock(100), // time = 100 to start new trigger, will block on getOffset
       AssertStreamExecThreadToWaitForClock(),
       AssertOnQuery(_.status.isDataAvailable === false),
       AssertOnQuery(_.status.isTriggerActive === true),
-      AssertOnQuery(_.status.message.toLowerCase.contains("getting offsets from")),
+      AssertOnQuery(_.status.message.startsWith("Getting offsets from")),
       AssertOnQuery(_.recentProgresses.count(_.numInputRows > 0) === 0),
 
-      // Test status while batch is being fetched
+      // Test status and progress while batch is being fetched
       AdvanceManualClock(200), // time = 300 to unblock getOffset, will block on getBatch
       AssertStreamExecThreadToWaitForClock(),
       AssertOnQuery(_.status.isDataAvailable === true),
@@ -177,14 +178,14 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
       AssertOnQuery(_.status.message === "Processing new data"),
       AssertOnQuery(_.recentProgresses.count(_.numInputRows > 0) === 0),
 
-      // Test status while batch is being processed
+      // Test status and progress while batch is being processed
       AdvanceManualClock(300), // time = 600 to unblock getBatch, will block in Spark job
       AssertOnQuery(_.status.isDataAvailable === true),
       AssertOnQuery(_.status.isTriggerActive === true),
       AssertOnQuery(_.status.message === "Processing new data"),
       AssertOnQuery(_.recentProgresses.count(_.numInputRows > 0) === 0),
 
-      // Test status while batch processing has completed
+      // Test status and progress while batch processing has completed
       AdvanceManualClock(500), // time = 1100 to unblock job
       AssertOnQuery { _ => clock.getTimeMillis() === 1100 },
       CheckAnswer(2),
@@ -237,12 +238,32 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
         true
       },
 
-      // Test status after data is not available for a trigger
+      // Test status and progress after data is not available for a trigger
       AdvanceManualClock(100), // allow another trigger
       AssertStreamExecThreadToWaitForClock(),
       AssertOnQuery(_.status.isDataAvailable === false),
       AssertOnQuery(_.status.isTriggerActive === false),
-      AssertOnQuery(_.status.message === "Waiting for next trigger")
+      AssertOnQuery(_.status.message === "Waiting for next trigger"),
+
+      // Test status and progress after query stopped
+      AssertOnQuery { query =>
+        lastProgressBeforeStop = query.lastProgress
+        true
+      },
+      StopStream,
+      AssertOnQuery(_.lastProgress.json === lastProgressBeforeStop.json),
+      AssertOnQuery(_.status.isDataAvailable === false),
+      AssertOnQuery(_.status.isTriggerActive === false),
+      AssertOnQuery(_.status.message === "Stopped"),
+
+      // Test status and progress after query terminated with error
+      StartStream(ProcessingTime(100), triggerClock = clock),
+      AddData(inputData, 0),
+      AdvanceManualClock(100),
+      ExpectFailure[SparkException],
+      AssertOnQuery(_.status.isDataAvailable === false),
+      AssertOnQuery(_.status.isTriggerActive === false),
+      AssertOnQuery(_.status.message.startsWith("Terminated with exception"))
     )
   }
 

From a5ec2a7b25cc8fb11f74761a9fad5833676da679 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Wed, 30 Nov 2016 15:17:29 +0800
Subject: [PATCH 0236/1204] [SPARK-17680][SQL][TEST] Added a Testcase for
 Verifying Unicode Character Support for Column Names and Comments

### What changes were proposed in this pull request?

Spark SQL supports Unicode characters for column names when specified within backticks(`). When the Hive support is enabled, the version of the Hive metastore must be higher than 0.12,  See the JIRA: https://issues.apache.org/jira/browse/HIVE-6013 Hive metastore supports Unicode characters for column names since 0.13.

In Spark SQL, table comments, and view comments always allow Unicode characters without backticks.

BTW, a separate PR has been submitted for database and table name validation because we do not support Unicode characters in these two cases.
### How was this patch tested?

N/A

Author: gatorsmile <gatorsmile@gmail.com>

Closes #15255 from gatorsmile/unicodeSupport.

(cherry picked from commit a1d9138ab286dc58d7f61c27419de7ecbf5b828b)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/hive/execution/HiveDDLSuite.scala     | 45 +++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 951e0704148b3..f313db641b152 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -147,6 +147,51 @@ class HiveDDLSuite
     }
   }
 
+  test("create Hive-serde table and view with unicode columns and comment") {
+    val catalog = spark.sessionState.catalog
+    val tabName = "tab1"
+    val viewName = "view1"
+    // scalastyle:off
+    // non ascii characters are not allowed in the source code, so we disable the scalastyle.
+    val colName1 = "和"
+    val colName2 = "尼"
+    val comment = "庙"
+    // scalastyle:on
+    withTable(tabName) {
+      sql(s"""
+             |CREATE TABLE $tabName(`$colName1` int COMMENT '$comment')
+             |COMMENT '$comment'
+             |PARTITIONED BY (`$colName2` int)
+           """.stripMargin)
+      sql(s"INSERT OVERWRITE TABLE $tabName partition (`$colName2`=2) SELECT 1")
+      withView(viewName) {
+        sql(
+          s"""
+             |CREATE VIEW $viewName(`$colName1` COMMENT '$comment', `$colName2`)
+             |COMMENT '$comment'
+             |AS SELECT `$colName1`, `$colName2` FROM $tabName
+           """.stripMargin)
+        val tableMetadata = catalog.getTableMetadata(TableIdentifier(tabName, Some("default")))
+        val viewMetadata = catalog.getTableMetadata(TableIdentifier(viewName, Some("default")))
+        assert(tableMetadata.comment == Option(comment))
+        assert(viewMetadata.comment == Option(comment))
+
+        assert(tableMetadata.schema.fields.length == 2 && viewMetadata.schema.fields.length == 2)
+        val column1InTable = tableMetadata.schema.fields.head
+        val column1InView = viewMetadata.schema.fields.head
+        assert(column1InTable.name == colName1 && column1InView.name == colName1)
+        assert(column1InTable.getComment() == Option(comment))
+        assert(column1InView.getComment() == Option(comment))
+
+        assert(tableMetadata.schema.fields(1).name == colName2 &&
+          viewMetadata.schema.fields(1).name == colName2)
+
+        checkAnswer(sql(s"SELECT `$colName1`, `$colName2` FROM $tabName"), Row(1, 2) :: Nil)
+        checkAnswer(sql(s"SELECT `$colName1`, `$colName2` FROM $viewName"), Row(1, 2) :: Nil)
+      }
+    }
+  }
+
   test("create table: partition column names exist in table definition") {
     val e = intercept[AnalysisException] {
       sql("CREATE TABLE tbl(a int) PARTITIONED BY (a string)")

From 8cd466e831a7987a6fb04833c31b9b442da092db Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@databricks.com>
Date: Wed, 30 Nov 2016 15:25:33 +0800
Subject: [PATCH 0237/1204] [SPARK-18622][SQL] Fix the datatype of the Sum
 aggregate function

## What changes were proposed in this pull request?
The result of a `sum` aggregate function is typically a Decimal, Double or a Long. Currently the output dataType is based on input's dataType.

The `FunctionArgumentConversion` rule will make sure that the input is promoted to the largest type, and that also ensures that the output uses a (hopefully) sufficiently large output dataType. The issue is that sum is in a resolved state when we cast the input type, this means that rules assuming that the dataType of the expression does not change anymore could have been applied in the mean time. This is what happens if we apply `WidenSetOperationTypes` before applying the casts, and this breaks analysis.

The most straight forward and future proof solution is to make `sum` always output the widest dataType in its class (Long for IntegralTypes, Decimal for DecimalTypes & Double for FloatType and DoubleType). This PR implements that solution.

We should move expression specific type casting rules into the given Expression at some point.

## How was this patch tested?
Added (regression) tests to SQLQueryTestSuite's `union.sql`.

Author: Herman van Hovell <hvanhovell@databricks.com>

Closes #16063 from hvanhovell/SPARK-18622.

(cherry picked from commit 879ba71110b6c85a4e47133620fbae7580650a6f)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/expressions/aggregate/Sum.scala  |  6 +-
 .../test/resources/sql-tests/inputs/union.sql | 27 +++++++
 .../resources/sql-tests/results/union.sql.out | 80 +++++++++++++++++++
 3 files changed, 110 insertions(+), 3 deletions(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/union.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/results/union.sql.out

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala
index f3731d40058e3..3c77b1198ac2d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala
@@ -33,8 +33,7 @@ case class Sum(child: Expression) extends DeclarativeAggregate {
   // Return data type.
   override def dataType: DataType = resultType
 
-  override def inputTypes: Seq[AbstractDataType] =
-    Seq(TypeCollection(LongType, DoubleType, DecimalType))
+  override def inputTypes: Seq[AbstractDataType] = Seq(NumericType)
 
   override def checkInputDataTypes(): TypeCheckResult =
     TypeUtils.checkForNumericExpr(child.dataType, "function sum")
@@ -42,7 +41,8 @@ case class Sum(child: Expression) extends DeclarativeAggregate {
   private lazy val resultType = child.dataType match {
     case DecimalType.Fixed(precision, scale) =>
       DecimalType.bounded(precision + 10, scale)
-    case _ => child.dataType
+    case _: IntegralType => LongType
+    case _ => DoubleType
   }
 
   private lazy val sumDataType = resultType
diff --git a/sql/core/src/test/resources/sql-tests/inputs/union.sql b/sql/core/src/test/resources/sql-tests/inputs/union.sql
new file mode 100644
index 0000000000000..1f4780abde2d2
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/union.sql
@@ -0,0 +1,27 @@
+CREATE OR REPLACE TEMPORARY VIEW t1 AS VALUES (1, 'a'), (2, 'b') tbl(c1, c2);
+CREATE OR REPLACE TEMPORARY VIEW t2 AS VALUES (1.0, 1), (2.0, 4) tbl(c1, c2);
+
+-- Simple Union
+SELECT *
+FROM   (SELECT * FROM t1
+        UNION ALL
+        SELECT * FROM t1);
+
+-- Type Coerced Union
+SELECT *
+FROM   (SELECT * FROM t1
+        UNION ALL
+        SELECT * FROM t2
+        UNION ALL
+        SELECT * FROM t2);
+
+-- Regression test for SPARK-18622
+SELECT a
+FROM (SELECT 0 a, 0 b
+      UNION ALL
+      SELECT SUM(1) a, CAST(0 AS BIGINT) b
+      UNION ALL SELECT 0 a, 0 b) T;
+
+-- Clean-up
+DROP VIEW IF EXISTS t1;
+DROP VIEW IF EXISTS t2;
diff --git a/sql/core/src/test/resources/sql-tests/results/union.sql.out b/sql/core/src/test/resources/sql-tests/results/union.sql.out
new file mode 100644
index 0000000000000..c57028cabe933
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/union.sql.out
@@ -0,0 +1,80 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 7
+
+
+-- !query 0
+CREATE OR REPLACE TEMPORARY VIEW t1 AS VALUES (1, 'a'), (2, 'b') tbl(c1, c2)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+CREATE OR REPLACE TEMPORARY VIEW t2 AS VALUES (1.0, 1), (2.0, 4) tbl(c1, c2)
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+SELECT *
+FROM   (SELECT * FROM t1
+        UNION ALL
+        SELECT * FROM t1)
+-- !query 2 schema
+struct<c1:int,c2:string>
+-- !query 2 output
+1	a
+1	a
+2	b
+2	b
+
+
+-- !query 3
+SELECT *
+FROM   (SELECT * FROM t1
+        UNION ALL
+        SELECT * FROM t2
+        UNION ALL
+        SELECT * FROM t2)
+-- !query 3 schema
+struct<c1:decimal(11,1),c2:string>
+-- !query 3 output
+1	1
+1	1
+1	a
+2	4
+2	4
+2	b
+
+
+-- !query 4
+SELECT a
+FROM (SELECT 0 a, 0 b
+      UNION ALL
+      SELECT SUM(1) a, CAST(0 AS BIGINT) b
+      UNION ALL SELECT 0 a, 0 b) T
+-- !query 4 schema
+struct<a:bigint>
+-- !query 4 output
+0
+0
+1
+
+
+-- !query 5
+DROP VIEW IF EXISTS t1
+-- !query 5 schema
+struct<>
+-- !query 5 output
+
+
+
+-- !query 6
+DROP VIEW IF EXISTS t2
+-- !query 6 schema
+struct<>
+-- !query 6 output
+

From 5e4afbfb6e3993533cb0ab1bece2ea504801a7cb Mon Sep 17 00:00:00 2001
From: uncleGen <hustyugm@gmail.com>
Date: Tue, 29 Nov 2016 23:45:06 -0800
Subject: [PATCH 0238/1204] [SPARK-18617][CORE][STREAMING] Close "kryo auto
 pick" feature for Spark Streaming

## What changes were proposed in this pull request?

#15992 provided a solution to fix the bug, i.e. **receiver data can not be deserialized properly**. As zsxwing said, it is a critical bug, but we should not break APIs between maintenance releases. It may be a rational choice to close auto pick kryo serializer for Spark Streaming in the first step. I will continue #15992 to optimize the solution.

## How was this patch tested?

existing ut

Author: uncleGen <hustyugm@gmail.com>

Closes #16052 from uncleGen/SPARK-18617.

(cherry picked from commit 56c82edabd62db9e936bb9afcf300faf8ef39362)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../spark/serializer/SerializerManager.scala  | 16 +++++--
 .../spark/storage/memory/MemoryStore.scala    |  5 +-
 .../PartiallySerializedBlockSuite.scala       |  6 ++-
 .../streaming/StreamingContextSuite.scala     | 47 +++++++++++++++++++
 4 files changed, 65 insertions(+), 9 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
index ef8432ec0834a..7371f886575c6 100644
--- a/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
@@ -79,8 +79,11 @@ private[spark] class SerializerManager(
     primitiveAndPrimitiveArrayClassTags.contains(ct) || ct == stringClassTag
   }
 
-  def getSerializer(ct: ClassTag[_]): Serializer = {
-    if (canUseKryo(ct)) {
+  // SPARK-18617: As feature in SPARK-13990 can not be applied to Spark Streaming now. The worst
+  // result is streaming job based on `Receiver` mode can not run on Spark 2.x properly. It may be
+  // a rational choice to close `kryo auto pick` feature for streaming in the first step.
+  def getSerializer(ct: ClassTag[_], autoPick: Boolean): Serializer = {
+    if (autoPick && canUseKryo(ct)) {
       kryoSerializer
     } else {
       defaultSerializer
@@ -161,7 +164,8 @@ private[spark] class SerializerManager(
       outputStream: OutputStream,
       values: Iterator[T]): Unit = {
     val byteStream = new BufferedOutputStream(outputStream)
-    val ser = getSerializer(implicitly[ClassTag[T]]).newInstance()
+    val autoPick = !blockId.isInstanceOf[StreamBlockId]
+    val ser = getSerializer(implicitly[ClassTag[T]], autoPick).newInstance()
     ser.serializeStream(wrapStream(blockId, byteStream)).writeAll(values).close()
   }
 
@@ -177,7 +181,8 @@ private[spark] class SerializerManager(
       classTag: ClassTag[_]): ChunkedByteBuffer = {
     val bbos = new ChunkedByteBufferOutputStream(1024 * 1024 * 4, ByteBuffer.allocate)
     val byteStream = new BufferedOutputStream(bbos)
-    val ser = getSerializer(classTag).newInstance()
+    val autoPick = !blockId.isInstanceOf[StreamBlockId]
+    val ser = getSerializer(classTag, autoPick).newInstance()
     ser.serializeStream(wrapStream(blockId, byteStream)).writeAll(values).close()
     bbos.toChunkedByteBuffer
   }
@@ -191,7 +196,8 @@ private[spark] class SerializerManager(
       inputStream: InputStream)
       (classTag: ClassTag[T]): Iterator[T] = {
     val stream = new BufferedInputStream(inputStream)
-    getSerializer(classTag)
+    val autoPick = !blockId.isInstanceOf[StreamBlockId]
+    getSerializer(classTag, autoPick)
       .newInstance()
       .deserializeStream(wrapStream(blockId, stream))
       .asIterator.asInstanceOf[Iterator[T]]
diff --git a/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala b/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala
index 095d32407f345..fff21218b1769 100644
--- a/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala
@@ -31,7 +31,7 @@ import org.apache.spark.{SparkConf, TaskContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.memory.{MemoryManager, MemoryMode}
 import org.apache.spark.serializer.{SerializationStream, SerializerManager}
-import org.apache.spark.storage.{BlockId, BlockInfoManager, StorageLevel}
+import org.apache.spark.storage.{BlockId, BlockInfoManager, StorageLevel, StreamBlockId}
 import org.apache.spark.unsafe.Platform
 import org.apache.spark.util.{SizeEstimator, Utils}
 import org.apache.spark.util.collection.SizeTrackingVector
@@ -334,7 +334,8 @@ private[spark] class MemoryStore(
     val bbos = new ChunkedByteBufferOutputStream(initialMemoryThreshold.toInt, allocator)
     redirectableStream.setOutputStream(bbos)
     val serializationStream: SerializationStream = {
-      val ser = serializerManager.getSerializer(classTag).newInstance()
+      val autoPick = !blockId.isInstanceOf[StreamBlockId]
+      val ser = serializerManager.getSerializer(classTag, autoPick).newInstance()
       ser.serializeStream(serializerManager.wrapStream(blockId, redirectableStream))
     }
 
diff --git a/core/src/test/scala/org/apache/spark/storage/PartiallySerializedBlockSuite.scala b/core/src/test/scala/org/apache/spark/storage/PartiallySerializedBlockSuite.scala
index ec4f2637fadd0..3050f9a250235 100644
--- a/core/src/test/scala/org/apache/spark/storage/PartiallySerializedBlockSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/PartiallySerializedBlockSuite.scala
@@ -67,7 +67,8 @@ class PartiallySerializedBlockSuite
       spy
     }
 
-    val serializer = serializerManager.getSerializer(implicitly[ClassTag[T]]).newInstance()
+    val serializer = serializerManager
+      .getSerializer(implicitly[ClassTag[T]], autoPick = true).newInstance()
     val redirectableOutputStream = Mockito.spy(new RedirectableOutputStream)
     redirectableOutputStream.setOutputStream(bbos)
     val serializationStream = Mockito.spy(serializer.serializeStream(redirectableOutputStream))
@@ -182,7 +183,8 @@ class PartiallySerializedBlockSuite
       Mockito.verifyNoMoreInteractions(memoryStore)
       Mockito.verify(partiallySerializedBlock.getUnrolledChunkedByteBuffer, atLeastOnce).dispose()
 
-      val serializer = serializerManager.getSerializer(implicitly[ClassTag[T]]).newInstance()
+      val serializer = serializerManager
+        .getSerializer(implicitly[ClassTag[T]], autoPick = true).newInstance()
       val deserialized =
         serializer.deserializeStream(new ByteBufferInputStream(bbos.toByteBuffer)).asIterator.toSeq
       assert(deserialized === items)
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index f1482e5c06cdc..45d8f50853431 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -806,6 +806,28 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with Timeo
     ssc.stop()
   }
 
+  test("SPARK-18560 Receiver data should be deserialized properly.") {
+    // Start a two nodes cluster, so receiver will use one node, and Spark jobs will use the
+    // other one. Then Spark jobs need to fetch remote blocks and it will trigger SPARK-18560.
+    val conf = new SparkConf().setMaster("local-cluster[2,1,1024]").setAppName(appName)
+    ssc = new StreamingContext(conf, Milliseconds(100))
+    val input = ssc.receiverStream(new FakeByteArrayReceiver)
+    input.count().foreachRDD { rdd =>
+      // Make sure we can read from BlockRDD
+      if (rdd.collect().headOption.getOrElse(0L) > 0) {
+        // Stop StreamingContext to unblock "awaitTerminationOrTimeout"
+        new Thread() {
+          setDaemon(true)
+          override def run(): Unit = {
+            ssc.stop(stopSparkContext = true, stopGracefully = false)
+          }
+        }.start()
+      }
+    }
+    ssc.start()
+    ssc.awaitTerminationOrTimeout(60000)
+  }
+
   def addInputStream(s: StreamingContext): DStream[Int] = {
     val input = (1 to 100).map(i => 1 to i)
     val inputStream = new TestInputStream(s, input, 1)
@@ -869,6 +891,31 @@ object TestReceiver {
   val counter = new AtomicInteger(1)
 }
 
+class FakeByteArrayReceiver extends Receiver[Array[Byte]](StorageLevel.MEMORY_ONLY) with Logging {
+
+  val data: Array[Byte] = "test".getBytes
+  var receivingThreadOption: Option[Thread] = None
+
+  override def onStart(): Unit = {
+    val thread = new Thread() {
+      override def run() {
+        logInfo("Receiving started")
+        while (!isStopped) {
+          store(data)
+        }
+        logInfo("Receiving stopped")
+      }
+    }
+    receivingThreadOption = Some(thread)
+    thread.start()
+  }
+
+  override def onStop(): Unit = {
+    // no clean to be done, the receiving thread should stop on it own, so just wait for it.
+    receivingThreadOption.foreach(_.join())
+  }
+}
+
 /** Custom receiver for testing whether a slow receiver can be shutdown gracefully or not */
 class SlowTestReceiver(totalRecords: Int, recordsPerSecond: Int)
   extends Receiver[Int](StorageLevel.MEMORY_ONLY) with Logging {

From 7043c6b695f77741c5e97a322d9590bd714289de Mon Sep 17 00:00:00 2001
From: Sandeep Singh <sandeep@techaddict.me>
Date: Wed, 30 Nov 2016 11:33:15 +0200
Subject: [PATCH 0239/1204] [SPARK-18366][PYSPARK][ML] Add handleInvalid to
 Pyspark for QuantileDiscretizer and Bucketizer

## What changes were proposed in this pull request?
added the new handleInvalid param for these transformers to Python to maintain API parity.

## How was this patch tested?
existing tests
testing is done with new doctests

Author: Sandeep Singh <sandeep@techaddict.me>

Closes #15817 from techaddict/SPARK-18366.

(cherry picked from commit fe854f2e4fb2fa1a1c501f11030e36f489ca546f)
Signed-off-by: Nick Pentreath <nickp@za.ibm.com>
---
 python/pyspark/ml/feature.py | 85 ++++++++++++++++++++++++++++++------
 1 file changed, 71 insertions(+), 14 deletions(-)

diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index aada38d1ad2ec..1d62b325344e5 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -125,10 +125,13 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav
     """
     Maps a column of continuous features to a column of feature buckets.
 
-    >>> df = spark.createDataFrame([(0.1,), (0.4,), (1.2,), (1.5,)], ["values"])
+    >>> values = [(0.1,), (0.4,), (1.2,), (1.5,), (float("nan"),), (float("nan"),)]
+    >>> df = spark.createDataFrame(values, ["values"])
     >>> bucketizer = Bucketizer(splits=[-float("inf"), 0.5, 1.4, float("inf")],
     ...     inputCol="values", outputCol="buckets")
-    >>> bucketed = bucketizer.transform(df).collect()
+    >>> bucketed = bucketizer.setHandleInvalid("keep").transform(df).collect()
+    >>> len(bucketed)
+    6
     >>> bucketed[0].buckets
     0.0
     >>> bucketed[1].buckets
@@ -144,6 +147,9 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav
     >>> loadedBucketizer = Bucketizer.load(bucketizerPath)
     >>> loadedBucketizer.getSplits() == bucketizer.getSplits()
     True
+    >>> bucketed = bucketizer.setHandleInvalid("skip").transform(df).collect()
+    >>> len(bucketed)
+    4
 
     .. versionadded:: 1.4.0
     """
@@ -158,21 +164,28 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav
               "splits specified will be treated as errors.",
               typeConverter=TypeConverters.toListFloat)
 
+    handleInvalid = Param(Params._dummy(), "handleInvalid", "how to handle invalid entries. " +
+                          "Options are skip (filter out rows with invalid values), " +
+                          "error (throw an error), or keep (keep invalid values in a special " +
+                          "additional bucket).",
+                          typeConverter=TypeConverters.toString)
+
     @keyword_only
-    def __init__(self, splits=None, inputCol=None, outputCol=None):
+    def __init__(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error"):
         """
-        __init__(self, splits=None, inputCol=None, outputCol=None)
+        __init__(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error")
         """
         super(Bucketizer, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Bucketizer", self.uid)
+        self._setDefault(handleInvalid="error")
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
     @since("1.4.0")
-    def setParams(self, splits=None, inputCol=None, outputCol=None):
+    def setParams(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error"):
         """
-        setParams(self, splits=None, inputCol=None, outputCol=None)
+        setParams(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error")
         Sets params for this Bucketizer.
         """
         kwargs = self.setParams._input_kwargs
@@ -192,6 +205,20 @@ def getSplits(self):
         """
         return self.getOrDefault(self.splits)
 
+    @since("2.1.0")
+    def setHandleInvalid(self, value):
+        """
+        Sets the value of :py:attr:`handleInvalid`.
+        """
+        return self._set(handleInvalid=value)
+
+    @since("2.1.0")
+    def getHandleInvalid(self):
+        """
+        Gets the value of :py:attr:`handleInvalid` or its default value.
+        """
+        return self.getOrDefault(self.handleInvalid)
+
 
 @inherit_doc
 class CountVectorizer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
@@ -1157,12 +1184,17 @@ class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadab
     :py:attr:`relativeError` parameter.
     The lower and upper bin bounds will be `-Infinity` and `+Infinity`, covering all real values.
 
-    >>> df = spark.createDataFrame([(0.1,), (0.4,), (1.2,), (1.5,)], ["values"])
+    >>> values = [(0.1,), (0.4,), (1.2,), (1.5,), (float("nan"),), (float("nan"),)]
+    >>> df = spark.createDataFrame(values, ["values"])
     >>> qds = QuantileDiscretizer(numBuckets=2,
-    ...     inputCol="values", outputCol="buckets", relativeError=0.01)
+    ...     inputCol="values", outputCol="buckets", relativeError=0.01, handleInvalid="error")
     >>> qds.getRelativeError()
     0.01
     >>> bucketizer = qds.fit(df)
+    >>> qds.setHandleInvalid("keep").fit(df).transform(df).count()
+    6
+    >>> qds.setHandleInvalid("skip").fit(df).transform(df).count()
+    4
     >>> splits = bucketizer.getSplits()
     >>> splits[0]
     -inf
@@ -1190,23 +1222,33 @@ class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadab
                           "Must be in the range [0, 1].",
                           typeConverter=TypeConverters.toFloat)
 
+    handleInvalid = Param(Params._dummy(), "handleInvalid", "how to handle invalid entries. " +
+                          "Options are skip (filter out rows with invalid values), " +
+                          "error (throw an error), or keep (keep invalid values in a special " +
+                          "additional bucket).",
+                          typeConverter=TypeConverters.toString)
+
     @keyword_only
-    def __init__(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001):
+    def __init__(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001,
+                 handleInvalid="error"):
         """
-        __init__(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001)
+        __init__(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001, \
+                 handleInvalid="error")
         """
         super(QuantileDiscretizer, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.QuantileDiscretizer",
                                             self.uid)
-        self._setDefault(numBuckets=2, relativeError=0.001)
+        self._setDefault(numBuckets=2, relativeError=0.001, handleInvalid="error")
         kwargs = self.__init__._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
     @since("2.0.0")
-    def setParams(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001):
+    def setParams(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001,
+                  handleInvalid="error"):
         """
-        setParams(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001)
+        setParams(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001, \
+                  handleInvalid="error")
         Set the params for the QuantileDiscretizer
         """
         kwargs = self.setParams._input_kwargs
@@ -1240,13 +1282,28 @@ def getRelativeError(self):
         """
         return self.getOrDefault(self.relativeError)
 
+    @since("2.1.0")
+    def setHandleInvalid(self, value):
+        """
+        Sets the value of :py:attr:`handleInvalid`.
+        """
+        return self._set(handleInvalid=value)
+
+    @since("2.1.0")
+    def getHandleInvalid(self):
+        """
+        Gets the value of :py:attr:`handleInvalid` or its default value.
+        """
+        return self.getOrDefault(self.handleInvalid)
+
     def _create_model(self, java_model):
         """
         Private method to convert the java_model to a Python model.
         """
         return Bucketizer(splits=list(java_model.getSplits()),
                           inputCol=self.getInputCol(),
-                          outputCol=self.getOutputCol())
+                          outputCol=self.getOutputCol(),
+                          handleInvalid=self.getHandleInvalid())
 
 
 @inherit_doc

From 05ba5eed71309e104feb1951aa8197e4336cdb2a Mon Sep 17 00:00:00 2001
From: Anthony Truchet <a.truchet@criteo.com>
Date: Wed, 30 Nov 2016 10:04:47 +0000
Subject: [PATCH 0240/1204] [SPARK-18612][MLLIB] Delete broadcasted variable in
 LBFGS CostFun

## What changes were proposed in this pull request?

Fix a broadcasted variable leak occurring at each invocation of CostFun in L-BFGS.

## How was this patch tested?

UTests + check that fixed fatal memory consumption on Criteo's use cases.

This contribution is made on behalf of Criteo S.A.
(http://labs.criteo.com/) under the terms of the Apache v2 License.

Author: Anthony Truchet <a.truchet@criteo.com>

Closes #16040 from AnthonyTruchet/SPARK-18612-lbfgs-cost-fun.

(cherry picked from commit c5a64d760600ff430899e401751c41dc6b27cee6)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../main/scala/org/apache/spark/mllib/optimization/LBFGS.scala | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
index 900eec18489c1..e0e41f711b981 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/LBFGS.scala
@@ -252,6 +252,9 @@ object LBFGS extends Logging {
             (grad1, loss1 + loss2)
           })
 
+      // broadcasted model is not needed anymore
+      bcW.destroy()
+
       /**
        * regVal is sum of weight squares if it's L2 updater;
        * for other updater, the same logic is followed.

From 6e044ab9a9d417fb12d53f6327b90d9166c01f35 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Wed, 30 Nov 2016 19:40:58 +0800
Subject: [PATCH 0241/1204] [SPARK-17897][SQL] Fixed IsNotNull Constraint
 Inference Rule

### What changes were proposed in this pull request?
The `constraints` of an operator is the expressions that evaluate to `true` for all the rows produced. That means, the expression result should be neither `false` nor `unknown` (NULL). Thus, we can conclude that `IsNotNull` on all the constraints, which are generated by its own predicates or propagated from the children. The constraint can be a complex expression. For better usage of these constraints, we try to push down `IsNotNull` to the lowest-level expressions (i.e., `Attribute`). `IsNotNull` can be pushed through an expression when it is null intolerant. (When the input is NULL, the null-intolerant expression always evaluates to NULL.)

Below is the existing code we have for `IsNotNull` pushdown.
```Scala
  private def scanNullIntolerantExpr(expr: Expression): Seq[Attribute] = expr match {
    case a: Attribute => Seq(a)
    case _: NullIntolerant | IsNotNull(_: NullIntolerant) =>
      expr.children.flatMap(scanNullIntolerantExpr)
    case _ => Seq.empty[Attribute]
  }
```

**`IsNotNull` itself is not null-intolerant.** It converts `null` to `false`. If the expression does not include any `Not`-like expression, it works; otherwise, it could generate a wrong result. This PR is to fix the above function by removing the `IsNotNull` from the inference. After the fix, when a constraint has a `IsNotNull` expression, we infer new attribute-specific `IsNotNull` constraints if and only if `IsNotNull` appears in the root.

Without the fix, the following test case will return empty.
```Scala
val data = Seq[java.lang.Integer](1, null).toDF("key")
data.filter("not key is not null").show()
```
Before the fix, the optimized plan is like
```
== Optimized Logical Plan ==
Project [value#1 AS key#3]
+- Filter (isnotnull(value#1) && NOT isnotnull(value#1))
   +- LocalRelation [value#1]
```

After the fix, the optimized plan is like
```
== Optimized Logical Plan ==
Project [value#1 AS key#3]
+- Filter NOT isnotnull(value#1)
   +- LocalRelation [value#1]
```

### How was this patch tested?
Added a test

Author: gatorsmile <gatorsmile@gmail.com>

Closes #16067 from gatorsmile/isNotNull2.

(cherry picked from commit 2eb093decb5e87a1ea71bbaa28092876a8c84996)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/plans/QueryPlan.scala  | 27 ++++++++++++++-----
 .../plans/ConstraintPropagationSuite.scala    |  9 +++++++
 .../org/apache/spark/sql/DataFrameSuite.scala |  6 +++++
 3 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index 45ee2964d4db0..b108017c4c482 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -40,14 +40,13 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
   }
 
   /**
-   * Infers a set of `isNotNull` constraints from a given set of equality/comparison expressions as
-   * well as non-nullable attributes. For e.g., if an expression is of the form (`a > 5`), this
+   * Infers a set of `isNotNull` constraints from null intolerant expressions as well as
+   * non-nullable attributes. For e.g., if an expression is of the form (`a > 5`), this
    * returns a constraint of the form `isNotNull(a)`
    */
   private def constructIsNotNullConstraints(constraints: Set[Expression]): Set[Expression] = {
     // First, we propagate constraints from the null intolerant expressions.
-    var isNotNullConstraints: Set[Expression] =
-      constraints.flatMap(scanNullIntolerantExpr).map(IsNotNull(_))
+    var isNotNullConstraints: Set[Expression] = constraints.flatMap(inferIsNotNullConstraints)
 
     // Second, we infer additional constraints from non-nullable attributes that are part of the
     // operator's output
@@ -57,14 +56,28 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
     isNotNullConstraints -- constraints
   }
 
+  /**
+   * Infer the Attribute-specific IsNotNull constraints from the null intolerant child expressions
+   * of constraints.
+   */
+  private def inferIsNotNullConstraints(constraint: Expression): Seq[Expression] =
+    constraint match {
+      // When the root is IsNotNull, we can push IsNotNull through the child null intolerant
+      // expressions
+      case IsNotNull(expr) => scanNullIntolerantAttribute(expr).map(IsNotNull(_))
+      // Constraints always return true for all the inputs. That means, null will never be returned.
+      // Thus, we can infer `IsNotNull(constraint)`, and also push IsNotNull through the child
+      // null intolerant expressions.
+      case _ => scanNullIntolerantAttribute(constraint).map(IsNotNull(_))
+    }
+
   /**
    * Recursively explores the expressions which are null intolerant and returns all attributes
    * in these expressions.
    */
-  private def scanNullIntolerantExpr(expr: Expression): Seq[Attribute] = expr match {
+  private def scanNullIntolerantAttribute(expr: Expression): Seq[Attribute] = expr match {
     case a: Attribute => Seq(a)
-    case _: NullIntolerant | IsNotNull(_: NullIntolerant) =>
-      expr.children.flatMap(scanNullIntolerantExpr)
+    case _: NullIntolerant => expr.children.flatMap(scanNullIntolerantAttribute)
     case _ => Seq.empty[Attribute]
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
index 8068ce922e636..a191aa8fee702 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
@@ -351,6 +351,15 @@ class ConstraintPropagationSuite extends SparkFunSuite {
         IsNotNull(IsNotNull(resolveColumn(tr, "b"))),
         IsNotNull(resolveColumn(tr, "a")),
         IsNotNull(resolveColumn(tr, "c")))))
+
+    verifyConstraints(
+      tr.where('a.attr === 1 && IsNotNull(resolveColumn(tr, "b")) &&
+        IsNotNull(resolveColumn(tr, "c"))).analyze.constraints,
+      ExpressionSet(Seq(
+        resolveColumn(tr, "a") === 1,
+        IsNotNull(resolveColumn(tr, "c")),
+        IsNotNull(resolveColumn(tr, "a")),
+        IsNotNull(resolveColumn(tr, "b")))))
   }
 
   test("infer IsNotNull constraints from non-nullable attributes") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index f5bc8785d5a2c..312cd17c26d60 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -1697,6 +1697,12 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
       expr = "cast((_1 + _2) as boolean)", expectedNonNullableColumns = Seq("_1", "_2"))
   }
 
+  test("SPARK-17897: Fixed IsNotNull Constraint Inference Rule") {
+    val data = Seq[java.lang.Integer](1, null).toDF("key")
+    checkAnswer(data.filter(!$"key".isNotNull), Row(null))
+    checkAnswer(data.filter(!(- $"key").isNotNull), Row(null))
+  }
+
   test("SPARK-17957: outer join + na.fill") {
     val df1 = Seq((1, 2), (2, 3)).toDF("a", "b")
     val df2 = Seq((2, 5), (3, 4)).toDF("a", "c")

From 3de93fb480ce316e9b35a025dd350123084c3565 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 30 Nov 2016 09:47:30 -0800
Subject: [PATCH 0242/1204] [SPARK-18220][SQL] read Hive orc table with varchar
 column should not fail

## What changes were proposed in this pull request?

Spark SQL only has `StringType`, when reading hive table with varchar column, we will read that column as `StringType`. However, we still need to use varchar `ObjectInspector` to read varchar column in hive table, which means we need to know the actual column type at hive side.

In Spark 2.1, after https://github.com/apache/spark/pull/14363 , we parse hive type string to catalyst type, which means the actual column type at hive side is erased. Then we may use string `ObjectInspector` to read varchar column and fail.

This PR keeps the original hive column type string in the metadata of `StructField`, and use it when we convert it to a hive column.

## How was this patch tested?

newly added regression test

Author: Wenchen Fan <wenchen@databricks.com>

Closes #16060 from cloud-fan/varchar.

(cherry picked from commit 3f03c90a807872d47588f3c3920769b8978033bf)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../org/apache/spark/sql/hive/HiveUtils.scala     |  8 ++++++++
 .../apache/spark/sql/hive/MetastoreRelation.scala |  7 ++++++-
 .../spark/sql/hive/client/HiveClientImpl.scala    | 15 ++++++++++++---
 ...xternalCatalogBackwardCompatibilitySuite.scala |  4 ++--
 .../spark/sql/hive/orc/OrcSourceSuite.scala       | 12 ++++++++++++
 5 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
index 81cd65c3cc337..26b1994308f5d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
@@ -54,6 +54,14 @@ private[spark] object HiveUtils extends Logging {
   /** The version of hive used internally by Spark SQL. */
   val hiveExecutionVersion: String = "1.2.1"
 
+  /**
+   * The property key that is used to store the raw hive type string in the metadata of StructField.
+   * For example, in the case where the Hive type is varchar, the type gets mapped to a string type
+   * in Spark SQL, but we need to preserve the original type in order to invoke the correct object
+   * inspector in Hive.
+   */
+  val hiveTypeString: String = "HIVE_TYPE_STRING"
+
   val HIVE_METASTORE_VERSION = SQLConfigBuilder("spark.sql.hive.metastore.version")
     .doc("Version of the Hive metastore. Available options are " +
         s"<code>0.12.0</code> through <code>$hiveExecutionVersion</code>.")
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala
index da809cf991de2..3bbac05a79c23 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala
@@ -61,7 +61,12 @@ private[hive] case class MetastoreRelation(
   override protected def otherCopyArgs: Seq[AnyRef] = catalogTable :: sparkSession :: Nil
 
   private def toHiveColumn(c: StructField): FieldSchema = {
-    new FieldSchema(c.name, c.dataType.catalogString, c.getComment.orNull)
+    val typeString = if (c.metadata.contains(HiveUtils.hiveTypeString)) {
+      c.metadata.getString(HiveUtils.hiveTypeString)
+    } else {
+      c.dataType.catalogString
+    }
+    new FieldSchema(c.name, typeString, c.getComment.orNull)
   }
 
   // TODO: merge this with HiveClientImpl#toHiveTable
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 68dcfd86731bd..590029a517e09 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -46,7 +46,8 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException}
 import org.apache.spark.sql.execution.QueryExecutionException
-import org.apache.spark.sql.types.{StructField, StructType}
+import org.apache.spark.sql.hive.HiveUtils
+import org.apache.spark.sql.types.{MetadataBuilder, StructField, StructType}
 import org.apache.spark.util.{CircularBuffer, Utils}
 
 /**
@@ -748,7 +749,12 @@ private[hive] class HiveClientImpl(
       .asInstanceOf[Class[_ <: org.apache.hadoop.hive.ql.io.HiveOutputFormat[_, _]]]
 
   private def toHiveColumn(c: StructField): FieldSchema = {
-    new FieldSchema(c.name, c.dataType.catalogString, c.getComment().orNull)
+    val typeString = if (c.metadata.contains(HiveUtils.hiveTypeString)) {
+      c.metadata.getString(HiveUtils.hiveTypeString)
+    } else {
+      c.dataType.catalogString
+    }
+    new FieldSchema(c.name, typeString, c.getComment().orNull)
   }
 
   private def fromHiveColumn(hc: FieldSchema): StructField = {
@@ -758,10 +764,13 @@ private[hive] class HiveClientImpl(
       case e: ParseException =>
         throw new SparkException("Cannot recognize hive type string: " + hc.getType, e)
     }
+
+    val metadata = new MetadataBuilder().putString(HiveUtils.hiveTypeString, hc.getType).build()
     val field = StructField(
       name = hc.getName,
       dataType = columnType,
-      nullable = true)
+      nullable = true,
+      metadata = metadata)
     Option(hc.getComment).map(field.withComment).getOrElse(field)
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala
index cca4480c44150..c5753cec80da7 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala
@@ -205,7 +205,7 @@ class HiveExternalCatalogBackwardCompatibilitySuite extends QueryTest
   test("make sure we can read table created by old version of Spark") {
     for ((tbl, expectedSchema) <- rawTablesAndExpectations) {
       val readBack = getTableMetadata(tbl.identifier.table)
-      assert(readBack.schema == expectedSchema)
+      assert(readBack.schema.sameType(expectedSchema))
 
       if (tbl.tableType == CatalogTableType.EXTERNAL) {
         // trim the URI prefix
@@ -235,7 +235,7 @@ class HiveExternalCatalogBackwardCompatibilitySuite extends QueryTest
       sql(s"ALTER TABLE ${tbl.identifier} RENAME TO $newName")
 
       val readBack = getTableMetadata(newName)
-      assert(readBack.schema == expectedSchema)
+      assert(readBack.schema.sameType(expectedSchema))
 
       // trim the URI prefix
       val actualTableLocation = new URI(readBack.storage.locationUri.get).getPath
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala
index 12f948041a8ab..2b404690510cd 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala
@@ -22,6 +22,7 @@ import java.io.File
 import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.hive.HiveExternalCatalog
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
@@ -150,6 +151,17 @@ abstract class OrcSuite extends QueryTest with TestHiveSingleton with BeforeAndA
   test("SPARK-18433: Improve DataSource option keys to be more case-insensitive") {
     assert(new OrcOptions(Map("Orc.Compress" -> "NONE")).compressionCodec == "NONE")
   }
+
+  test("SPARK-18220: read Hive orc table with varchar column") {
+    val hiveClient = spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client
+    try {
+      hiveClient.runSqlHive("CREATE TABLE orc_varchar(a VARCHAR(10)) STORED AS orc")
+      hiveClient.runSqlHive("INSERT INTO TABLE orc_varchar SELECT 'a' FROM (SELECT 1) t")
+      checkAnswer(spark.table("orc_varchar"), Row("a"))
+    } finally {
+      hiveClient.runSqlHive("DROP TABLE IF EXISTS orc_varchar")
+    }
+  }
 }
 
 class OrcSourceSuite extends OrcSuite {

From eae85da388e27c7eda8be3933f673ad7f1a3c6af Mon Sep 17 00:00:00 2001
From: manishAtGit <manish@knoldus.com>
Date: Wed, 30 Nov 2016 14:46:50 -0500
Subject: [PATCH 0243/1204] [SPARK][EXAMPLE] Added missing semicolon in
 quick-start-guide example

## What changes were proposed in this pull request?

Added missing semicolon in quick-start-guide java example code which wasn't compiling before.

## How was this patch tested?
Locally by running and generating site for docs. You can see the last line contains ";" in the below snapshot.
![image](https://cloud.githubusercontent.com/assets/10628224/20751760/9a7e0402-b723-11e6-9aa8-3b6ca2d92ebf.png)

Author: manishAtGit <manish@knoldus.com>

Closes #16081 from manishatGit/fixed-quick-start-guide.

(cherry picked from commit bc95ea0be5b880673d452f5eec47fbfd403d94ce)
Signed-off-by: Andrew Or <andrewor14@gmail.com>
---
 docs/quick-start.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/quick-start.md b/docs/quick-start.md
index cb9a378199562..0836c602feafa 100644
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -330,7 +330,7 @@ public class SimpleApp {
 
     System.out.println("Lines with a: " + numAs + ", lines with b: " + numBs);
     
-    sc.stop()
+    sc.stop();
   }
 }
 {% endhighlight %}

From 7c0e2962d5e0fb80e4472d29dd467477f1cbcf8a Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Wed, 30 Nov 2016 14:47:41 -0500
Subject: [PATCH 0244/1204] [SPARK-18640] Add synchronization to
 TaskScheduler.runningTasksByExecutors

## What changes were proposed in this pull request?

The method `TaskSchedulerImpl.runningTasksByExecutors()` accesses the mutable `executorIdToRunningTaskIds` map without proper synchronization. In addition, as markhamstra pointed out in #15986, the signature's use of parentheses is a little odd given that this is a pure getter method.

This patch fixes both issues.

## How was this patch tested?

Covered by existing tests.

Author: Josh Rosen <joshrosen@databricks.com>

Closes #16073 from JoshRosen/runningTasksByExecutors-thread-safety.

(cherry picked from commit c51c7725944d60738e2bac3e11f6aea74812905c)
Signed-off-by: Andrew Or <andrewor14@gmail.com>
---
 core/src/main/scala/org/apache/spark/SparkStatusTracker.scala | 2 +-
 .../scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala  | 2 +-
 .../org/apache/spark/scheduler/TaskSchedulerImplSuite.scala   | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkStatusTracker.scala b/core/src/main/scala/org/apache/spark/SparkStatusTracker.scala
index 52c4656c271bc..22a553e68439a 100644
--- a/core/src/main/scala/org/apache/spark/SparkStatusTracker.scala
+++ b/core/src/main/scala/org/apache/spark/SparkStatusTracker.scala
@@ -112,7 +112,7 @@ class SparkStatusTracker private[spark] (sc: SparkContext) {
    */
   def getExecutorInfos: Array[SparkExecutorInfo] = {
     val executorIdToRunningTasks: Map[String, Int] =
-      sc.taskScheduler.asInstanceOf[TaskSchedulerImpl].runningTasksByExecutors()
+      sc.taskScheduler.asInstanceOf[TaskSchedulerImpl].runningTasksByExecutors
 
     sc.getExecutorStorageStatus.map { status =>
       val bmId = status.blockManagerId
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index 67446da0a8b8d..b03cfe4f0dc49 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -96,7 +96,7 @@ private[spark] class TaskSchedulerImpl(
   // IDs of the tasks running on each executor
   private val executorIdToRunningTaskIds = new HashMap[String, HashSet[Long]]
 
-  def runningTasksByExecutors(): Map[String, Int] = {
+  def runningTasksByExecutors: Map[String, Int] = synchronized {
     executorIdToRunningTaskIds.toMap.mapValues(_.size)
   }
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
index 48ec04bd5aab3..e736c6c1145f9 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
@@ -442,7 +442,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     // Check that state associated with the lost task attempt is cleaned up:
     assert(taskScheduler.taskIdToExecutorId.isEmpty)
     assert(taskScheduler.taskIdToTaskSetManager.isEmpty)
-    assert(taskScheduler.runningTasksByExecutors().get("executor0").isEmpty)
+    assert(taskScheduler.runningTasksByExecutors.get("executor0").isEmpty)
   }
 
   test("if a task finishes with TaskState.LOST its executor is marked as dead") {
@@ -473,7 +473,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     // Check that state associated with the lost task attempt is cleaned up:
     assert(taskScheduler.taskIdToExecutorId.isEmpty)
     assert(taskScheduler.taskIdToTaskSetManager.isEmpty)
-    assert(taskScheduler.runningTasksByExecutors().get("executor0").isEmpty)
+    assert(taskScheduler.runningTasksByExecutors.get("executor0").isEmpty)
 
     // Check that the executor has been marked as dead
     assert(!taskScheduler.isExecutorAlive("executor0"))

From f542df3107e6161f90a7394a36ab95932a0b3425 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Wed, 30 Nov 2016 13:21:05 -0800
Subject: [PATCH 0245/1204] [SPARK-18318][ML] ML, Graph 2.1 QA: API: New Scala
 APIs, docs

## What changes were proposed in this pull request?
API review for 2.1, except ```LSH``` related classes which are still under development.

## How was this patch tested?
Only doc changes, no new tests.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #16009 from yanboliang/spark-18318.

(cherry picked from commit 60022bfd65e4637efc0eb5f4cc0112289c783147)
Signed-off-by: Joseph K. Bradley <joseph@databricks.com>
---
 docs/ml-features.md                                   |  4 +++-
 .../spark/ml/classification/LogisticRegression.scala  |  6 +++---
 .../apache/spark/ml/classification/NaiveBayes.scala   |  2 +-
 .../org/apache/spark/ml/feature/Bucketizer.scala      |  7 ++++---
 .../org/apache/spark/ml/feature/ChiSqSelector.scala   |  2 ++
 .../apache/spark/ml/feature/QuantileDiscretizer.scala | 11 +++++++----
 .../apache/spark/ml/optim/NormalEquationSolver.scala  |  8 ++++----
 .../spark/mllib/classification/NaiveBayes.scala       |  6 +++---
 .../apache/spark/mllib/feature/ChiSqSelector.scala    |  2 +-
 .../org/apache/spark/mllib/feature/HashingTF.scala    |  6 +++---
 10 files changed, 31 insertions(+), 23 deletions(-)

diff --git a/docs/ml-features.md b/docs/ml-features.md
index 45724a3716e74..9eecc1333d06f 100644
--- a/docs/ml-features.md
+++ b/docs/ml-features.md
@@ -1158,7 +1158,9 @@ categorical features. The number of bins is set by the `numBuckets` parameter. I
 that the number of buckets used will be smaller than this value, for example, if there are too few
 distinct values of the input to create enough distinct quantiles.
 
-NaN values: Note also that QuantileDiscretizer
+NaN values:
+NaN values will be removed from the column during `QuantileDiscretizer` fitting. This will produce
+a `Bucketizer` model for making predictions. During the transformation, `Bucketizer`
 will raise an error when it finds NaN values in the dataset, but the user can also choose to either
 keep or remove NaN values within the dataset by setting `handleInvalid`. If the user chooses to keep
 NaN values, they will be handled specially and placed into their own bucket, for example, if 4 buckets
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index d3ae62e243302..5e1d6eec96a3e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -312,7 +312,6 @@ class LogisticRegression @Since("1.2.0") (
 
   private var optInitialModel: Option[LogisticRegressionModel] = None
 
-  /** @group setParam */
   private[spark] def setInitialModel(model: LogisticRegressionModel): this.type = {
     this.optInitialModel = Some(model)
     this
@@ -323,8 +322,9 @@ class LogisticRegression @Since("1.2.0") (
     train(dataset, handlePersistence)
   }
 
-  protected[spark] def train(dataset: Dataset[_], handlePersistence: Boolean):
-      LogisticRegressionModel = {
+  protected[spark] def train(
+      dataset: Dataset[_],
+      handlePersistence: Boolean): LogisticRegressionModel = {
     val w = if (!isDefined(weightCol) || $(weightCol).isEmpty) lit(1.0) else col($(weightCol))
     val instances: RDD[Instance] =
       dataset.select(col($(labelCol)), w, col($(featuresCol))).rdd.map {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
index a2ac7000003d4..94ee2a2e7d9f4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.types.DoubleType
 /**
  * Params for Naive Bayes Classifiers.
  */
-private[ml] trait NaiveBayesParams extends PredictorParams with HasWeightCol {
+private[classification] trait NaiveBayesParams extends PredictorParams with HasWeightCol {
 
   /**
    * The smoothing parameter.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
index 260159f8b7ac4..eb4d42f255345 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
@@ -84,11 +84,12 @@ final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String
    * Default: "error"
    * @group param
    */
+  // TODO: SPARK-18619 Make Bucketizer inherit from HasHandleInvalid.
   @Since("2.1.0")
-  val handleInvalid: Param[String] = new Param[String](this, "handleInvalid", "how to handle" +
+  val handleInvalid: Param[String] = new Param[String](this, "handleInvalid", "how to handle " +
     "invalid entries. Options are skip (filter out rows with invalid values), " +
     "error (throw an error), or keep (keep invalid values in a special additional bucket).",
-    ParamValidators.inArray(Bucketizer.supportedHandleInvalid))
+    ParamValidators.inArray(Bucketizer.supportedHandleInvalids))
 
   /** @group getParam */
   @Since("2.1.0")
@@ -145,7 +146,7 @@ object Bucketizer extends DefaultParamsReadable[Bucketizer] {
   private[feature] val SKIP_INVALID: String = "skip"
   private[feature] val ERROR_INVALID: String = "error"
   private[feature] val KEEP_INVALID: String = "keep"
-  private[feature] val supportedHandleInvalid: Array[String] =
+  private[feature] val supportedHandleInvalids: Array[String] =
     Array(SKIP_INVALID, ERROR_INVALID, KEEP_INVALID)
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala
index 7cd0f159c6be7..8699929bab793 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala
@@ -82,11 +82,13 @@ private[feature] trait ChiSqSelectorParams extends Params
    * Default value is 0.05.
    * @group param
    */
+  @Since("2.1.0")
   final val fpr = new DoubleParam(this, "fpr", "The highest p-value for features to be kept.",
     ParamValidators.inRange(0, 1))
   setDefault(fpr -> 0.05)
 
   /** @group getParam */
+  @Since("2.1.0")
   def getFpr: Double = $(fpr)
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
index d8f33cd768dcd..b4fcfa2da47de 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
@@ -72,11 +72,12 @@ private[feature] trait QuantileDiscretizerBase extends Params
    * Default: "error"
    * @group param
    */
+  // TODO: SPARK-18619 Make QuantileDiscretizer inherit from HasHandleInvalid.
   @Since("2.1.0")
-  val handleInvalid: Param[String] = new Param[String](this, "handleInvalid", "how to handle" +
+  val handleInvalid: Param[String] = new Param[String](this, "handleInvalid", "how to handle " +
     "invalid entries. Options are skip (filter out rows with invalid values), " +
     "error (throw an error), or keep (keep invalid values in a special additional bucket).",
-    ParamValidators.inArray(Bucketizer.supportedHandleInvalid))
+    ParamValidators.inArray(Bucketizer.supportedHandleInvalids))
   setDefault(handleInvalid, Bucketizer.ERROR_INVALID)
 
   /** @group getParam */
@@ -91,8 +92,10 @@ private[feature] trait QuantileDiscretizerBase extends Params
  * possible that the number of buckets used will be smaller than this value, for example, if there
  * are too few distinct values of the input to create enough distinct quantiles.
  *
- * NaN handling: Note also that
- * QuantileDiscretizer will raise an error when it finds NaN values in the dataset, but the user can
+ * NaN handling:
+ * NaN values will be removed from the column during `QuantileDiscretizer` fitting. This will
+ * produce a `Bucketizer` model for making predictions. During the transformation,
+ * `Bucketizer` will raise an error when it finds NaN values in the dataset, but the user can
  * also choose to either keep or remove NaN values within the dataset by setting `handleInvalid`.
  * If the user chooses to keep NaN values, they will be handled specially and placed into their own
  * bucket, for example, if 4 buckets are used, then non-NaN data will be put into buckets[0-3],
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/NormalEquationSolver.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/NormalEquationSolver.scala
index 96fd0d18b5ae9..dc3bcc6627339 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/NormalEquationSolver.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/NormalEquationSolver.scala
@@ -34,7 +34,7 @@ import org.apache.spark.mllib.linalg.CholeskyDecomposition
  * @param objectiveHistory Option containing the objective history when an optimization program is
  *                         used to solve the normal equations. None when an analytic solver is used.
  */
-private[ml] class NormalEquationSolution(
+private[optim] class NormalEquationSolution(
     val coefficients: Array[Double],
     val aaInv: Option[Array[Double]],
     val objectiveHistory: Option[Array[Double]])
@@ -42,7 +42,7 @@ private[ml] class NormalEquationSolution(
 /**
  * Interface for classes that solve the normal equations locally.
  */
-private[ml] sealed trait NormalEquationSolver {
+private[optim] sealed trait NormalEquationSolver {
 
   /** Solve the normal equations from summary statistics. */
   def solve(
@@ -56,7 +56,7 @@ private[ml] sealed trait NormalEquationSolver {
 /**
  * A class that solves the normal equations directly, using Cholesky decomposition.
  */
-private[ml] class CholeskySolver extends NormalEquationSolver {
+private[optim] class CholeskySolver extends NormalEquationSolver {
 
   override def solve(
       bBar: Double,
@@ -75,7 +75,7 @@ private[ml] class CholeskySolver extends NormalEquationSolver {
 /**
  * A class for solving the normal equations using Quasi-Newton optimization methods.
  */
-private[ml] class QuasiNewtonSolver(
+private[optim] class QuasiNewtonSolver(
     fitIntercept: Boolean,
     maxIter: Int,
     tol: Double,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
index fa46ba3ace508..9e8774732efe6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -392,13 +392,13 @@ class NaiveBayes private (
 object NaiveBayes {
 
   /** String name for multinomial model type. */
-  private[spark] val Multinomial: String = "multinomial"
+  private[classification] val Multinomial: String = "multinomial"
 
   /** String name for Bernoulli model type. */
-  private[spark] val Bernoulli: String = "bernoulli"
+  private[classification] val Bernoulli: String = "bernoulli"
 
   /* Set of modelTypes that NaiveBayes supports */
-  private[spark] val supportedModelTypes = Set(Multinomial, Bernoulli)
+  private[classification] val supportedModelTypes = Set(Multinomial, Bernoulli)
 
   /**
    * Trains a Naive Bayes model given an RDD of `(label, features)` pairs.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
index 05ad2492f8c43..7ef2a95b96f2d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
@@ -266,7 +266,7 @@ private[spark] object ChiSqSelector {
   val Percentile: String = "percentile"
 
   /** String name for `fpr` selector type. */
-  private[spark] val FPR: String = "fpr"
+  val FPR: String = "fpr"
 
   /** Set of selector types that ChiSqSelector supports. */
   val supportedSelectorTypes: Array[String] = Array(NumTopFeatures, Percentile, FPR)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala
index bc26655104a9b..9abdd44a635d1 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala
@@ -131,9 +131,9 @@ class HashingTF(val numFeatures: Int) extends Serializable {
 
 object HashingTF {
 
-  private[spark] val Native: String = "native"
+  private[HashingTF] val Native: String = "native"
 
-  private[spark] val Murmur3: String = "murmur3"
+  private[HashingTF] val Murmur3: String = "murmur3"
 
   private val seed = 42
 
@@ -141,7 +141,7 @@ object HashingTF {
    * Calculate a hash code value for the term object using the native Scala implementation.
    * This is the default hash algorithm used in Spark 1.6 and earlier.
    */
-  private[spark] def nativeHash(term: Any): Int = term.##
+  private[HashingTF] def nativeHash(term: Any): Int = term.##
 
   /**
    * Calculate a hash code value for the term object using

From 9e96ac5a986c53ca1689e3d1f1365cc5107b5d88 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 30 Nov 2016 13:36:17 -0800
Subject: [PATCH 0246/1204] [SPARK-18251][SQL] the type of Dataset can't be
 Option of non-flat type

## What changes were proposed in this pull request?

For input object of non-flat type, we can't encode it to row if it's null, as Spark SQL doesn't allow the entire row to be null, only its columns can be null. That's the reason we forbid users to use top level null objects in https://github.com/apache/spark/pull/13469

However, if users wrap non-flat type with `Option`, then we may still encoder top level null object to row, which is not allowed.

This PR fixes this case, and suggests users to wrap their type with `Tuple1` if they do wanna top level null objects.

## How was this patch tested?

new test

Author: Wenchen Fan <wenchen@databricks.com>

Closes #15979 from cloud-fan/option.

(cherry picked from commit f135b70fd590438bebb2a54012a6f73074219758)
Signed-off-by: Cheng Lian <lian@databricks.com>
---
 .../spark/sql/catalyst/ScalaReflection.scala       | 13 +++++++++++++
 .../sql/catalyst/encoders/ExpressionEncoder.scala  | 14 ++++++++++++--
 .../scala/org/apache/spark/sql/DatasetSuite.scala  | 13 +++++++++++--
 .../org/apache/spark/sql/JsonFunctionsSuite.scala  |  2 +-
 4 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index 7bcaea7ea2f79..0aa21b9347a9d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -605,6 +605,19 @@ object ScalaReflection extends ScalaReflection {
 
   }
 
+  /**
+   * Returns true if the given type is option of product type, e.g. `Option[Tuple2]`. Note that,
+   * we also treat [[DefinedByConstructorParams]] as product type.
+   */
+  def optionOfProductType(tpe: `Type`): Boolean = ScalaReflectionLock.synchronized {
+    tpe match {
+      case t if t <:< localTypeOf[Option[_]] =>
+        val TypeRef(_, _, Seq(optType)) = t
+        definedByConstructorParams(optType)
+      case _ => false
+    }
+  }
+
   /**
    * Returns the parameter names and types for the primary constructor of this class.
    *
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
index 82e1a8a7cad96..9c4818db6333b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
@@ -47,6 +47,16 @@ object ExpressionEncoder {
     // We convert the not-serializable TypeTag into StructType and ClassTag.
     val mirror = typeTag[T].mirror
     val tpe = typeTag[T].tpe
+
+    if (ScalaReflection.optionOfProductType(tpe)) {
+      throw new UnsupportedOperationException(
+        "Cannot create encoder for Option of Product type, because Product type is represented " +
+          "as a row, and the entire row can not be null in Spark SQL like normal databases. " +
+          "You can wrap your type with Tuple1 if you do want top level null Product objects, " +
+          "e.g. instead of creating `Dataset[Option[MyClass]]`, you can do something like " +
+          "`val ds: Dataset[Tuple1[MyClass]] = Seq(Tuple1(MyClass(...)), Tuple1(null)).toDS`")
+    }
+
     val cls = mirror.runtimeClass(tpe)
     val flat = !ScalaReflection.definedByConstructorParams(tpe)
 
@@ -54,9 +64,9 @@ object ExpressionEncoder {
     val nullSafeInput = if (flat) {
       inputObject
     } else {
-      // For input object of non-flat type, we can't encode it to row if it's null, as Spark SQL
+      // For input object of Product type, we can't encode it to row if it's null, as Spark SQL
       // doesn't allow top-level row to be null, only its columns can be null.
-      AssertNotNull(inputObject, Seq("top level non-flat input object"))
+      AssertNotNull(inputObject, Seq("top level Product input object"))
     }
     val serializer = ScalaReflection.serializerFor[T](nullSafeInput)
     val deserializer = ScalaReflection.deserializerFor[T]
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 81fa8cbf22384..1174d7354f931 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -867,10 +867,10 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
     checkDataset(Seq("a", null).toDS(), "a", null)
   }
 
-  test("Dataset should throw RuntimeException if non-flat input object is null") {
+  test("Dataset should throw RuntimeException if top-level product input object is null") {
     val e = intercept[RuntimeException](Seq(ClassData("a", 1), null).toDS())
     assert(e.getMessage.contains("Null value appeared in non-nullable field"))
-    assert(e.getMessage.contains("top level non-flat input object"))
+    assert(e.getMessage.contains("top level Product input object"))
   }
 
   test("dropDuplicates") {
@@ -1051,6 +1051,15 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
     checkDataset(dsDouble, arrayDouble)
     checkDataset(dsString, arrayString)
   }
+
+  test("SPARK-18251: the type of Dataset can't be Option of Product type") {
+    checkDataset(Seq(Some(1), None).toDS(), Some(1), None)
+
+    val e = intercept[UnsupportedOperationException] {
+      Seq(Some(1 -> "a"), None).toDS()
+    }
+    assert(e.getMessage.contains("Cannot create encoder for Option of Product type"))
+  }
 }
 
 case class Generic[T](id: T, value: Double)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index 7d63d31d9b979..890cc5b560d02 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -143,7 +143,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext {
   }
 
   test("roundtrip in to_json and from_json") {
-    val dfOne = Seq(Some(Tuple1(Tuple1(1))), None).toDF("struct")
+    val dfOne = Seq(Tuple1(Tuple1(1)), Tuple1(null)).toDF("struct")
     val schemaOne = dfOne.schema(0).dataType.asInstanceOf[StructType]
     val readBackOne = dfOne.select(to_json($"struct").as("json"))
       .select(from_json($"json", schemaOne).as("struct"))

From c2c2fdcb71e9bc82f0e88567148d1bae283f256a Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Wed, 30 Nov 2016 14:10:32 -0800
Subject: [PATCH 0247/1204] [SPARK-18546][CORE] Fix merging shuffle spills when
 using encryption.

The problem exists because it's not possible to just concatenate encrypted
partition data from different spill files; currently each partition would
have its own initial vector to set up encryption, and the final merged file
should contain a single initial vector for each merged partiton, otherwise
iterating over each record becomes really hard.

To fix that, UnsafeShuffleWriter now decrypts the partitions when merging,
so that the merged file contains a single initial vector at the start of
the partition data.

Because it's not possible to do that using the fast transferTo path, when
encryption is enabled UnsafeShuffleWriter will revert back to using file
streams when merging. It may be possible to use a hybrid approach when
using encryption, using an intermediate direct buffer when reading from
files and encrypting the data, but that's better left for a separate patch.

As part of the change I made DiskBlockObjectWriter take a SerializerManager
instead of a "wrap stream" closure, since that makes it easier to test the
code without having to mock SerializerManager functionality.

Tested with newly added unit tests (UnsafeShuffleWriterSuite for the write
side and ExternalAppendOnlyMapSuite for integration), and by running some
apps that failed without the fix.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #15982 from vanzin/SPARK-18546.

(cherry picked from commit 93e9d880bf8a144112d74a6897af4e36fcfa5807)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../shuffle/sort/UnsafeShuffleWriter.java     |  48 +++++----
 .../spark/serializer/SerializerManager.scala  |   6 +-
 .../apache/spark/storage/BlockManager.scala   |   5 +-
 .../spark/storage/DiskBlockObjectWriter.scala |   6 +-
 .../sort/UnsafeShuffleWriterSuite.java        | 100 +++++++++++++-----
 .../map/AbstractBytesToBytesMapSuite.java     |  11 +-
 .../sort/UnsafeExternalSorterSuite.java       |  21 ++--
 .../BypassMergeSortShuffleWriterSuite.scala   |   5 +-
 .../storage/DiskBlockObjectWriterSuite.scala  |  54 ++++------
 .../ExternalAppendOnlyMapSuite.scala          |   8 +-
 10 files changed, 145 insertions(+), 119 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
index f235c434be7b1..8a1771848dee6 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
@@ -40,6 +40,8 @@
 import org.apache.spark.executor.ShuffleWriteMetrics;
 import org.apache.spark.io.CompressionCodec;
 import org.apache.spark.io.CompressionCodec$;
+import org.apache.commons.io.output.CloseShieldOutputStream;
+import org.apache.commons.io.output.CountingOutputStream;
 import org.apache.spark.memory.TaskMemoryManager;
 import org.apache.spark.network.util.LimitedInputStream;
 import org.apache.spark.scheduler.MapStatus;
@@ -264,6 +266,7 @@ private long[] mergeSpills(SpillInfo[] spills, File outputFile) throws IOExcepti
       sparkConf.getBoolean("spark.shuffle.unsafe.fastMergeEnabled", true);
     final boolean fastMergeIsSupported = !compressionEnabled ||
       CompressionCodec$.MODULE$.supportsConcatenationOfSerializedStreams(compressionCodec);
+    final boolean encryptionEnabled = blockManager.serializerManager().encryptionEnabled();
     try {
       if (spills.length == 0) {
         new FileOutputStream(outputFile).close(); // Create an empty file
@@ -289,7 +292,7 @@ private long[] mergeSpills(SpillInfo[] spills, File outputFile) throws IOExcepti
           // Compression is disabled or we are using an IO compression codec that supports
           // decompression of concatenated compressed streams, so we can perform a fast spill merge
           // that doesn't need to interpret the spilled bytes.
-          if (transferToEnabled) {
+          if (transferToEnabled && !encryptionEnabled) {
             logger.debug("Using transferTo-based fast merge");
             partitionLengths = mergeSpillsWithTransferTo(spills, outputFile);
           } else {
@@ -320,9 +323,9 @@ private long[] mergeSpills(SpillInfo[] spills, File outputFile) throws IOExcepti
   /**
    * Merges spill files using Java FileStreams. This code path is slower than the NIO-based merge,
    * {@link UnsafeShuffleWriter#mergeSpillsWithTransferTo(SpillInfo[], File)}, so it's only used in
-   * cases where the IO compression codec does not support concatenation of compressed data, or in
-   * cases where users have explicitly disabled use of {@code transferTo} in order to work around
-   * kernel bugs.
+   * cases where the IO compression codec does not support concatenation of compressed data, when
+   * encryption is enabled, or when users have explicitly disabled use of {@code transferTo} in
+   * order to work around kernel bugs.
    *
    * @param spills the spills to merge.
    * @param outputFile the file to write the merged data to.
@@ -337,7 +340,11 @@ private long[] mergeSpillsWithFileStream(
     final int numPartitions = partitioner.numPartitions();
     final long[] partitionLengths = new long[numPartitions];
     final InputStream[] spillInputStreams = new FileInputStream[spills.length];
-    OutputStream mergedFileOutputStream = null;
+
+    // Use a counting output stream to avoid having to close the underlying file and ask
+    // the file system for its size after each partition is written.
+    final CountingOutputStream mergedFileOutputStream = new CountingOutputStream(
+      new FileOutputStream(outputFile));
 
     boolean threwException = true;
     try {
@@ -345,34 +352,35 @@ private long[] mergeSpillsWithFileStream(
         spillInputStreams[i] = new FileInputStream(spills[i].file);
       }
       for (int partition = 0; partition < numPartitions; partition++) {
-        final long initialFileLength = outputFile.length();
-        mergedFileOutputStream =
-          new TimeTrackingOutputStream(writeMetrics, new FileOutputStream(outputFile, true));
+        final long initialFileLength = mergedFileOutputStream.getByteCount();
+        // Shield the underlying output stream from close() calls, so that we can close the higher
+        // level streams to make sure all data is really flushed and internal state is cleaned.
+        OutputStream partitionOutput = new CloseShieldOutputStream(
+          new TimeTrackingOutputStream(writeMetrics, mergedFileOutputStream));
+        partitionOutput = blockManager.serializerManager().wrapForEncryption(partitionOutput);
         if (compressionCodec != null) {
-          mergedFileOutputStream = compressionCodec.compressedOutputStream(mergedFileOutputStream);
+          partitionOutput = compressionCodec.compressedOutputStream(partitionOutput);
         }
-
         for (int i = 0; i < spills.length; i++) {
           final long partitionLengthInSpill = spills[i].partitionLengths[partition];
           if (partitionLengthInSpill > 0) {
-            InputStream partitionInputStream = null;
-            boolean innerThrewException = true;
+            InputStream partitionInputStream = new LimitedInputStream(spillInputStreams[i],
+              partitionLengthInSpill, false);
             try {
-              partitionInputStream =
-                  new LimitedInputStream(spillInputStreams[i], partitionLengthInSpill, false);
+              partitionInputStream = blockManager.serializerManager().wrapForEncryption(
+                partitionInputStream);
               if (compressionCodec != null) {
                 partitionInputStream = compressionCodec.compressedInputStream(partitionInputStream);
               }
-              ByteStreams.copy(partitionInputStream, mergedFileOutputStream);
-              innerThrewException = false;
+              ByteStreams.copy(partitionInputStream, partitionOutput);
             } finally {
-              Closeables.close(partitionInputStream, innerThrewException);
+              partitionInputStream.close();
             }
           }
         }
-        mergedFileOutputStream.flush();
-        mergedFileOutputStream.close();
-        partitionLengths[partition] = (outputFile.length() - initialFileLength);
+        partitionOutput.flush();
+        partitionOutput.close();
+        partitionLengths[partition] = (mergedFileOutputStream.getByteCount() - initialFileLength);
       }
       threwException = false;
     } finally {
diff --git a/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
index 7371f886575c6..686305e9335dc 100644
--- a/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
@@ -75,6 +75,8 @@ private[spark] class SerializerManager(
    * loaded yet. */
   private lazy val compressionCodec: CompressionCodec = CompressionCodec.createCodec(conf)
 
+  def encryptionEnabled: Boolean = encryptionKey.isDefined
+
   def canUseKryo(ct: ClassTag[_]): Boolean = {
     primitiveAndPrimitiveArrayClassTags.contains(ct) || ct == stringClassTag
   }
@@ -129,7 +131,7 @@ private[spark] class SerializerManager(
   /**
    * Wrap an input stream for encryption if shuffle encryption is enabled
    */
-  private[this] def wrapForEncryption(s: InputStream): InputStream = {
+  def wrapForEncryption(s: InputStream): InputStream = {
     encryptionKey
       .map { key => CryptoStreamUtils.createCryptoInputStream(s, conf, key) }
       .getOrElse(s)
@@ -138,7 +140,7 @@ private[spark] class SerializerManager(
   /**
    * Wrap an output stream for encryption if shuffle encryption is enabled
    */
-  private[this] def wrapForEncryption(s: OutputStream): OutputStream = {
+  def wrapForEncryption(s: OutputStream): OutputStream = {
     encryptionKey
       .map { key => CryptoStreamUtils.createCryptoOutputStream(s, conf, key) }
       .getOrElse(s)
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 982b83324e0fc..04521c9159eac 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -62,7 +62,7 @@ private[spark] class BlockManager(
     executorId: String,
     rpcEnv: RpcEnv,
     val master: BlockManagerMaster,
-    serializerManager: SerializerManager,
+    val serializerManager: SerializerManager,
     val conf: SparkConf,
     memoryManager: MemoryManager,
     mapOutputTracker: MapOutputTracker,
@@ -745,9 +745,8 @@ private[spark] class BlockManager(
       serializerInstance: SerializerInstance,
       bufferSize: Int,
       writeMetrics: ShuffleWriteMetrics): DiskBlockObjectWriter = {
-    val wrapStream: OutputStream => OutputStream = serializerManager.wrapStream(blockId, _)
     val syncWrites = conf.getBoolean("spark.shuffle.sync", false)
-    new DiskBlockObjectWriter(file, serializerInstance, bufferSize, wrapStream,
+    new DiskBlockObjectWriter(file, serializerManager, serializerInstance, bufferSize,
       syncWrites, writeMetrics, blockId)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
index a499827ae1598..3cb12fca7dccb 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
@@ -22,7 +22,7 @@ import java.nio.channels.FileChannel
 
 import org.apache.spark.executor.ShuffleWriteMetrics
 import org.apache.spark.internal.Logging
-import org.apache.spark.serializer.{SerializationStream, SerializerInstance}
+import org.apache.spark.serializer.{SerializationStream, SerializerInstance, SerializerManager}
 import org.apache.spark.util.Utils
 
 /**
@@ -37,9 +37,9 @@ import org.apache.spark.util.Utils
  */
 private[spark] class DiskBlockObjectWriter(
     val file: File,
+    serializerManager: SerializerManager,
     serializerInstance: SerializerInstance,
     bufferSize: Int,
-    wrapStream: OutputStream => OutputStream,
     syncWrites: Boolean,
     // These write metrics concurrently shared with other active DiskBlockObjectWriters who
     // are themselves performing writes. All updates must be relative.
@@ -116,7 +116,7 @@ private[spark] class DiskBlockObjectWriter(
       initialized = true
     }
 
-    bs = wrapStream(mcs)
+    bs = serializerManager.wrapStream(blockId, mcs)
     objOut = serializerInstance.serializeStream(bs)
     streamOpen = true
     this
diff --git a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
index a96cd82382e2c..088b68132d905 100644
--- a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
+++ b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
@@ -26,11 +26,9 @@
 import scala.Tuple2;
 import scala.Tuple2$;
 import scala.collection.Iterator;
-import scala.runtime.AbstractFunction1;
 
 import com.google.common.collect.HashMultiset;
 import com.google.common.collect.Iterators;
-import com.google.common.io.ByteStreams;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -53,6 +51,7 @@
 import org.apache.spark.memory.TestMemoryManager;
 import org.apache.spark.network.util.LimitedInputStream;
 import org.apache.spark.scheduler.MapStatus;
+import org.apache.spark.security.CryptoStreamUtils;
 import org.apache.spark.serializer.*;
 import org.apache.spark.shuffle.IndexShuffleBlockResolver;
 import org.apache.spark.storage.*;
@@ -77,7 +76,6 @@ public class UnsafeShuffleWriterSuite {
   final LinkedList<File> spillFilesCreated = new LinkedList<>();
   SparkConf conf;
   final Serializer serializer = new KryoSerializer(new SparkConf());
-  final SerializerManager serializerManager = new SerializerManager(serializer, new SparkConf());
   TaskMetrics taskMetrics;
 
   @Mock(answer = RETURNS_SMART_NULLS) BlockManager blockManager;
@@ -86,17 +84,6 @@ public class UnsafeShuffleWriterSuite {
   @Mock(answer = RETURNS_SMART_NULLS) TaskContext taskContext;
   @Mock(answer = RETURNS_SMART_NULLS) ShuffleDependency<Object, Object, Object> shuffleDep;
 
-  private final class WrapStream extends AbstractFunction1<OutputStream, OutputStream> {
-    @Override
-    public OutputStream apply(OutputStream stream) {
-      if (conf.getBoolean("spark.shuffle.compress", true)) {
-        return CompressionCodec$.MODULE$.createCodec(conf).compressedOutputStream(stream);
-      } else {
-        return stream;
-      }
-    }
-  }
-
   @After
   public void tearDown() {
     Utils.deleteRecursively(tempDir);
@@ -121,6 +108,11 @@ public void setUp() throws IOException {
     memoryManager = new TestMemoryManager(conf);
     taskMemoryManager = new TaskMemoryManager(memoryManager, 0);
 
+    // Some tests will override this manager because they change the configuration. This is a
+    // default for tests that don't need a specific one.
+    SerializerManager manager = new SerializerManager(serializer, conf);
+    when(blockManager.serializerManager()).thenReturn(manager);
+
     when(blockManager.diskBlockManager()).thenReturn(diskBlockManager);
     when(blockManager.getDiskWriter(
       any(BlockId.class),
@@ -131,12 +123,11 @@ public void setUp() throws IOException {
       @Override
       public DiskBlockObjectWriter answer(InvocationOnMock invocationOnMock) throws Throwable {
         Object[] args = invocationOnMock.getArguments();
-
         return new DiskBlockObjectWriter(
           (File) args[1],
+          blockManager.serializerManager(),
           (SerializerInstance) args[2],
           (Integer) args[3],
-          new WrapStream(),
           false,
           (ShuffleWriteMetrics) args[4],
           (BlockId) args[0]
@@ -201,9 +192,10 @@ private List<Tuple2<Object, Object>> readRecordsFromFile() throws IOException {
     for (int i = 0; i < NUM_PARTITITONS; i++) {
       final long partitionSize = partitionSizesInMergedFile[i];
       if (partitionSize > 0) {
-        InputStream in = new FileInputStream(mergedOutputFile);
-        ByteStreams.skipFully(in, startOffset);
-        in = new LimitedInputStream(in, partitionSize);
+        FileInputStream fin = new FileInputStream(mergedOutputFile);
+        fin.getChannel().position(startOffset);
+        InputStream in = new LimitedInputStream(fin, partitionSize);
+        in = blockManager.serializerManager().wrapForEncryption(in);
         if (conf.getBoolean("spark.shuffle.compress", true)) {
           in = CompressionCodec$.MODULE$.createCodec(conf).compressedInputStream(in);
         }
@@ -294,14 +286,32 @@ public void writeWithoutSpilling() throws Exception {
   }
 
   private void testMergingSpills(
-      boolean transferToEnabled,
-      String compressionCodecName) throws IOException {
+      final boolean transferToEnabled,
+      String compressionCodecName,
+      boolean encrypt) throws Exception {
     if (compressionCodecName != null) {
       conf.set("spark.shuffle.compress", "true");
       conf.set("spark.io.compression.codec", compressionCodecName);
     } else {
       conf.set("spark.shuffle.compress", "false");
     }
+    conf.set(org.apache.spark.internal.config.package$.MODULE$.IO_ENCRYPTION_ENABLED(), encrypt);
+
+    SerializerManager manager;
+    if (encrypt) {
+      manager = new SerializerManager(serializer, conf,
+        Option.apply(CryptoStreamUtils.createKey(conf)));
+    } else {
+      manager = new SerializerManager(serializer, conf);
+    }
+
+    when(blockManager.serializerManager()).thenReturn(manager);
+    testMergingSpills(transferToEnabled, encrypt);
+  }
+
+  private void testMergingSpills(
+      boolean transferToEnabled,
+      boolean encrypted) throws IOException {
     final UnsafeShuffleWriter<Object, Object> writer = createWriter(transferToEnabled);
     final ArrayList<Product2<Object, Object>> dataToWrite = new ArrayList<>();
     for (int i : new int[] { 1, 2, 3, 4, 4, 2 }) {
@@ -324,6 +334,7 @@ private void testMergingSpills(
     for (long size: partitionSizesInMergedFile) {
       sumOfPartitionSizes += size;
     }
+
     assertEquals(sumOfPartitionSizes, mergedOutputFile.length());
 
     assertEquals(HashMultiset.create(dataToWrite), HashMultiset.create(readRecordsFromFile()));
@@ -338,42 +349,72 @@ private void testMergingSpills(
 
   @Test
   public void mergeSpillsWithTransferToAndLZF() throws Exception {
-    testMergingSpills(true, LZFCompressionCodec.class.getName());
+    testMergingSpills(true, LZFCompressionCodec.class.getName(), false);
   }
 
   @Test
   public void mergeSpillsWithFileStreamAndLZF() throws Exception {
-    testMergingSpills(false, LZFCompressionCodec.class.getName());
+    testMergingSpills(false, LZFCompressionCodec.class.getName(), false);
   }
 
   @Test
   public void mergeSpillsWithTransferToAndLZ4() throws Exception {
-    testMergingSpills(true, LZ4CompressionCodec.class.getName());
+    testMergingSpills(true, LZ4CompressionCodec.class.getName(), false);
   }
 
   @Test
   public void mergeSpillsWithFileStreamAndLZ4() throws Exception {
-    testMergingSpills(false, LZ4CompressionCodec.class.getName());
+    testMergingSpills(false, LZ4CompressionCodec.class.getName(), false);
   }
 
   @Test
   public void mergeSpillsWithTransferToAndSnappy() throws Exception {
-    testMergingSpills(true, SnappyCompressionCodec.class.getName());
+    testMergingSpills(true, SnappyCompressionCodec.class.getName(), false);
   }
 
   @Test
   public void mergeSpillsWithFileStreamAndSnappy() throws Exception {
-    testMergingSpills(false, SnappyCompressionCodec.class.getName());
+    testMergingSpills(false, SnappyCompressionCodec.class.getName(), false);
   }
 
   @Test
   public void mergeSpillsWithTransferToAndNoCompression() throws Exception {
-    testMergingSpills(true, null);
+    testMergingSpills(true, null, false);
   }
 
   @Test
   public void mergeSpillsWithFileStreamAndNoCompression() throws Exception {
-    testMergingSpills(false, null);
+    testMergingSpills(false, null, false);
+  }
+
+  @Test
+  public void mergeSpillsWithCompressionAndEncryption() throws Exception {
+    // This should actually be translated to a "file stream merge" internally, just have the
+    // test to make sure that it's the case.
+    testMergingSpills(true, LZ4CompressionCodec.class.getName(), true);
+  }
+
+  @Test
+  public void mergeSpillsWithFileStreamAndCompressionAndEncryption() throws Exception {
+    testMergingSpills(false, LZ4CompressionCodec.class.getName(), true);
+  }
+
+  @Test
+  public void mergeSpillsWithCompressionAndEncryptionSlowPath() throws Exception {
+    conf.set("spark.shuffle.unsafe.fastMergeEnabled", "false");
+    testMergingSpills(false, LZ4CompressionCodec.class.getName(), true);
+  }
+
+  @Test
+  public void mergeSpillsWithEncryptionAndNoCompression() throws Exception {
+    // This should actually be translated to a "file stream merge" internally, just have the
+    // test to make sure that it's the case.
+    testMergingSpills(true, null, true);
+  }
+
+  @Test
+  public void mergeSpillsWithFileStreamAndEncryptionAndNoCompression() throws Exception {
+    testMergingSpills(false, null, true);
   }
 
   @Test
@@ -531,4 +572,5 @@ public void testPeakMemoryUsed() throws Exception {
       writer.stop(false);
     }
   }
+
 }
diff --git a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
index 33709b454c4c9..26568146bf4d7 100644
--- a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
+++ b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
@@ -19,13 +19,11 @@
 
 import java.io.File;
 import java.io.IOException;
-import java.io.OutputStream;
 import java.nio.ByteBuffer;
 import java.util.*;
 
 import scala.Tuple2;
 import scala.Tuple2$;
-import scala.runtime.AbstractFunction1;
 
 import org.junit.After;
 import org.junit.Assert;
@@ -75,13 +73,6 @@ public abstract class AbstractBytesToBytesMapSuite {
   @Mock(answer = RETURNS_SMART_NULLS) BlockManager blockManager;
   @Mock(answer = RETURNS_SMART_NULLS) DiskBlockManager diskBlockManager;
 
-  private static final class WrapStream extends AbstractFunction1<OutputStream, OutputStream> {
-    @Override
-    public OutputStream apply(OutputStream stream) {
-      return stream;
-    }
-  }
-
   @Before
   public void setup() {
     memoryManager =
@@ -120,9 +111,9 @@ public DiskBlockObjectWriter answer(InvocationOnMock invocationOnMock) throws Th
 
         return new DiskBlockObjectWriter(
           (File) args[1],
+          serializerManager,
           (SerializerInstance) args[2],
           (Integer) args[3],
-          new WrapStream(),
           false,
           (ShuffleWriteMetrics) args[4],
           (BlockId) args[0]
diff --git a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
index a9cf8ff520ed4..fbbe530a132e1 100644
--- a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
+++ b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
@@ -19,14 +19,12 @@
 
 import java.io.File;
 import java.io.IOException;
-import java.io.OutputStream;
 import java.util.Arrays;
 import java.util.LinkedList;
 import java.util.UUID;
 
 import scala.Tuple2;
 import scala.Tuple2$;
-import scala.runtime.AbstractFunction1;
 
 import org.junit.After;
 import org.junit.Before;
@@ -57,13 +55,15 @@
 
 public class UnsafeExternalSorterSuite {
 
+  private final SparkConf conf = new SparkConf();
+
   final LinkedList<File> spillFilesCreated = new LinkedList<>();
   final TestMemoryManager memoryManager =
-    new TestMemoryManager(new SparkConf().set("spark.memory.offHeap.enabled", "false"));
+    new TestMemoryManager(conf.clone().set("spark.memory.offHeap.enabled", "false"));
   final TaskMemoryManager taskMemoryManager = new TaskMemoryManager(memoryManager, 0);
   final SerializerManager serializerManager = new SerializerManager(
-    new JavaSerializer(new SparkConf()),
-    new SparkConf().set("spark.shuffle.spill.compress", "false"));
+    new JavaSerializer(conf),
+    conf.clone().set("spark.shuffle.spill.compress", "false"));
   // Use integer comparison for comparing prefixes (which are partition ids, in this case)
   final PrefixComparator prefixComparator = PrefixComparators.LONG;
   // Since the key fits within the 8-byte prefix, we don't need to do any record comparison, so
@@ -86,14 +86,7 @@ public int compare(
 
   protected boolean shouldUseRadixSort() { return false; }
 
-  private final long pageSizeBytes = new SparkConf().getSizeAsBytes("spark.buffer.pageSize", "4m");
-
-  private static final class WrapStream extends AbstractFunction1<OutputStream, OutputStream> {
-    @Override
-    public OutputStream apply(OutputStream stream) {
-      return stream;
-    }
-  }
+  private final long pageSizeBytes = conf.getSizeAsBytes("spark.buffer.pageSize", "4m");
 
   @Before
   public void setUp() {
@@ -126,9 +119,9 @@ public DiskBlockObjectWriter answer(InvocationOnMock invocationOnMock) throws Th
 
         return new DiskBlockObjectWriter(
           (File) args[1],
+          serializerManager,
           (SerializerInstance) args[2],
           (Integer) args[3],
-          new WrapStream(),
           false,
           (ShuffleWriteMetrics) args[4],
           (BlockId) args[0]
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala
index 442941685f1ae..85ccb33471048 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala
@@ -33,7 +33,7 @@ import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark._
 import org.apache.spark.executor.{ShuffleWriteMetrics, TaskMetrics}
-import org.apache.spark.serializer.{JavaSerializer, SerializerInstance}
+import org.apache.spark.serializer.{JavaSerializer, SerializerInstance, SerializerManager}
 import org.apache.spark.shuffle.IndexShuffleBlockResolver
 import org.apache.spark.storage._
 import org.apache.spark.util.Utils
@@ -90,11 +90,12 @@ class BypassMergeSortShuffleWriterSuite extends SparkFunSuite with BeforeAndAfte
     )).thenAnswer(new Answer[DiskBlockObjectWriter] {
       override def answer(invocation: InvocationOnMock): DiskBlockObjectWriter = {
         val args = invocation.getArguments
+        val manager = new SerializerManager(new JavaSerializer(conf), conf)
         new DiskBlockObjectWriter(
           args(1).asInstanceOf[File],
+          manager,
           args(2).asInstanceOf[SerializerInstance],
           args(3).asInstanceOf[Int],
-          wrapStream = identity,
           syncWrites = false,
           args(4).asInstanceOf[ShuffleWriteMetrics],
           blockId = args(0).asInstanceOf[BlockId]
diff --git a/core/src/test/scala/org/apache/spark/storage/DiskBlockObjectWriterSuite.scala b/core/src/test/scala/org/apache/spark/storage/DiskBlockObjectWriterSuite.scala
index 684e978d11864..bfb3ac4c15bca 100644
--- a/core/src/test/scala/org/apache/spark/storage/DiskBlockObjectWriterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/DiskBlockObjectWriterSuite.scala
@@ -22,7 +22,7 @@ import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.executor.ShuffleWriteMetrics
-import org.apache.spark.serializer.JavaSerializer
+import org.apache.spark.serializer.{JavaSerializer, SerializerManager}
 import org.apache.spark.util.Utils
 
 class DiskBlockObjectWriterSuite extends SparkFunSuite with BeforeAndAfterEach {
@@ -42,11 +42,19 @@ class DiskBlockObjectWriterSuite extends SparkFunSuite with BeforeAndAfterEach {
     }
   }
 
-  test("verify write metrics") {
+  private def createWriter(): (DiskBlockObjectWriter, File, ShuffleWriteMetrics) = {
     val file = new File(tempDir, "somefile")
+    val conf = new SparkConf()
+    val serializerManager = new SerializerManager(new JavaSerializer(conf), conf)
     val writeMetrics = new ShuffleWriteMetrics()
     val writer = new DiskBlockObjectWriter(
-      file, new JavaSerializer(new SparkConf()).newInstance(), 1024, os => os, true, writeMetrics)
+      file, serializerManager, new JavaSerializer(new SparkConf()).newInstance(), 1024, true,
+      writeMetrics)
+    (writer, file, writeMetrics)
+  }
+
+  test("verify write metrics") {
+    val (writer, file, writeMetrics) = createWriter()
 
     writer.write(Long.box(20), Long.box(30))
     // Record metrics update on every write
@@ -66,10 +74,7 @@ class DiskBlockObjectWriterSuite extends SparkFunSuite with BeforeAndAfterEach {
   }
 
   test("verify write metrics on revert") {
-    val file = new File(tempDir, "somefile")
-    val writeMetrics = new ShuffleWriteMetrics()
-    val writer = new DiskBlockObjectWriter(
-      file, new JavaSerializer(new SparkConf()).newInstance(), 1024, os => os, true, writeMetrics)
+    val (writer, _, writeMetrics) = createWriter()
 
     writer.write(Long.box(20), Long.box(30))
     // Record metrics update on every write
@@ -89,10 +94,7 @@ class DiskBlockObjectWriterSuite extends SparkFunSuite with BeforeAndAfterEach {
   }
 
   test("Reopening a closed block writer") {
-    val file = new File(tempDir, "somefile")
-    val writeMetrics = new ShuffleWriteMetrics()
-    val writer = new DiskBlockObjectWriter(
-      file, new JavaSerializer(new SparkConf()).newInstance(), 1024, os => os, true, writeMetrics)
+    val (writer, _, _) = createWriter()
 
     writer.open()
     writer.close()
@@ -102,10 +104,7 @@ class DiskBlockObjectWriterSuite extends SparkFunSuite with BeforeAndAfterEach {
   }
 
   test("calling revertPartialWritesAndClose() on a partial write should truncate up to commit") {
-    val file = new File(tempDir, "somefile")
-    val writeMetrics = new ShuffleWriteMetrics()
-    val writer = new DiskBlockObjectWriter(
-      file, new JavaSerializer(new SparkConf()).newInstance(), 1024, os => os, true, writeMetrics)
+    val (writer, file, writeMetrics) = createWriter()
 
     writer.write(Long.box(20), Long.box(30))
     val firstSegment = writer.commitAndGet()
@@ -120,10 +119,7 @@ class DiskBlockObjectWriterSuite extends SparkFunSuite with BeforeAndAfterEach {
   }
 
   test("calling revertPartialWritesAndClose() after commit() should have no effect") {
-    val file = new File(tempDir, "somefile")
-    val writeMetrics = new ShuffleWriteMetrics()
-    val writer = new DiskBlockObjectWriter(
-      file, new JavaSerializer(new SparkConf()).newInstance(), 1024, os => os, true, writeMetrics)
+    val (writer, file, writeMetrics) = createWriter()
 
     writer.write(Long.box(20), Long.box(30))
     val firstSegment = writer.commitAndGet()
@@ -136,10 +132,7 @@ class DiskBlockObjectWriterSuite extends SparkFunSuite with BeforeAndAfterEach {
   }
 
   test("calling revertPartialWritesAndClose() on a closed block writer should have no effect") {
-    val file = new File(tempDir, "somefile")
-    val writeMetrics = new ShuffleWriteMetrics()
-    val writer = new DiskBlockObjectWriter(
-      file, new JavaSerializer(new SparkConf()).newInstance(), 1024, os => os, true, writeMetrics)
+    val (writer, file, writeMetrics) = createWriter()
     for (i <- 1 to 1000) {
       writer.write(i, i)
     }
@@ -153,10 +146,7 @@ class DiskBlockObjectWriterSuite extends SparkFunSuite with BeforeAndAfterEach {
   }
 
   test("commit() and close() should be idempotent") {
-    val file = new File(tempDir, "somefile")
-    val writeMetrics = new ShuffleWriteMetrics()
-    val writer = new DiskBlockObjectWriter(
-      file, new JavaSerializer(new SparkConf()).newInstance(), 1024, os => os, true, writeMetrics)
+    val (writer, file, writeMetrics) = createWriter()
     for (i <- 1 to 1000) {
       writer.write(i, i)
     }
@@ -173,10 +163,7 @@ class DiskBlockObjectWriterSuite extends SparkFunSuite with BeforeAndAfterEach {
   }
 
   test("revertPartialWritesAndClose() should be idempotent") {
-    val file = new File(tempDir, "somefile")
-    val writeMetrics = new ShuffleWriteMetrics()
-    val writer = new DiskBlockObjectWriter(
-      file, new JavaSerializer(new SparkConf()).newInstance(), 1024, os => os, true, writeMetrics)
+    val (writer, file, writeMetrics) = createWriter()
     for (i <- 1 to 1000) {
       writer.write(i, i)
     }
@@ -191,10 +178,7 @@ class DiskBlockObjectWriterSuite extends SparkFunSuite with BeforeAndAfterEach {
   }
 
   test("commit() and close() without ever opening or writing") {
-    val file = new File(tempDir, "somefile")
-    val writeMetrics = new ShuffleWriteMetrics()
-    val writer = new DiskBlockObjectWriter(
-      file, new JavaSerializer(new SparkConf()).newInstance(), 1024, os => os, true, writeMetrics)
+    val (writer, _, _) = createWriter()
     val segment = writer.commitAndGet()
     writer.close()
     assert(segment.length === 0)
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
index 5141e36d9e38d..7f0838268a111 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.util.collection
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark._
+import org.apache.spark.internal.config._
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.memory.MemoryTestingUtils
 
@@ -230,14 +231,19 @@ class ExternalAppendOnlyMapSuite extends SparkFunSuite with LocalSparkContext {
     }
   }
 
+  test("spilling with compression and encryption") {
+    testSimpleSpilling(Some(CompressionCodec.DEFAULT_COMPRESSION_CODEC), encrypt = true)
+  }
+
   /**
    * Test spilling through simple aggregations and cogroups.
    * If a compression codec is provided, use it. Otherwise, do not compress spills.
    */
-  private def testSimpleSpilling(codec: Option[String] = None): Unit = {
+  private def testSimpleSpilling(codec: Option[String] = None, encrypt: Boolean = false): Unit = {
     val size = 1000
     val conf = createSparkConf(loadDefaults = true, codec)  // Load defaults for Spark home
     conf.set("spark.shuffle.spill.numElementsForceSpillThreshold", (size / 4).toString)
+    conf.set(IO_ENCRYPTION_ENABLED, encrypt)
     sc = new SparkContext("local-cluster[1,1,1024]", "test", conf)
 
     assertSpilled(sc, "reduceByKey") {

From 6e2e987bd8d4f4417b6fd6ff15dc2f38e9c7e661 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 30 Nov 2016 16:18:53 -0800
Subject: [PATCH 0248/1204] [SPARK-18655][SS] Ignore Structured Streaming 2.0.2
 logs in history server

## What changes were proposed in this pull request?

As `queryStatus` in StreamingQueryListener events was removed in #15954, parsing 2.0.2 structured streaming logs will throw the following errror:

```
[info]   com.fasterxml.jackson.databind.exc.UnrecognizedPropertyException: Unrecognized field "queryStatus" (class org.apache.spark.sql.streaming.StreamingQueryListener$QueryTerminatedEvent), not marked as ignorable (2 known properties: "id", "exception"])
[info]  at [Source: {"Event":"org.apache.spark.sql.streaming.StreamingQueryListener$QueryTerminatedEvent","queryStatus":{"name":"query-1","id":1,"timestamp":1480491532753,"inputRate":0.0,"processingRate":0.0,"latency":null,"sourceStatuses":[{"description":"FileStreamSource[file:/Users/zsx/stream]","offsetDesc":"#0","inputRate":0.0,"processingRate":0.0,"triggerDetails":{"latency.getOffset.source":"1","triggerId":"1"}}],"sinkStatus":{"description":"FileSink[/Users/zsx/stream2]","offsetDesc":"[#0]"},"triggerDetails":{}},"exception":null}; line: 1, column: 521] (through reference chain: org.apache.spark.sql.streaming.QueryTerminatedEvent["queryStatus"])
[info]   at com.fasterxml.jackson.databind.exc.UnrecognizedPropertyException.from(UnrecognizedPropertyException.java:51)
[info]   at com.fasterxml.jackson.databind.DeserializationContext.reportUnknownProperty(DeserializationContext.java:839)
[info]   at com.fasterxml.jackson.databind.deser.std.StdDeserializer.handleUnknownProperty(StdDeserializer.java:1045)
[info]   at com.fasterxml.jackson.databind.deser.BeanDeserializerBase.handleUnknownProperty(BeanDeserializerBase.java:1352)
[info]   at com.fasterxml.jackson.databind.deser.BeanDeserializerBase.handleUnknownProperties(BeanDeserializerBase.java:1306)
[info]   at com.fasterxml.jackson.databind.deser.BeanDeserializer._deserializeUsingPropertyBased(BeanDeserializer.java:453)
[info]   at com.fasterxml.jackson.databind.deser.BeanDeserializerBase.deserializeFromObjectUsingNonDefault(BeanDeserializerBase.java:1099)
...
```

This PR just ignores such errors and adds a test to make sure we can read 2.0.2 logs.

## How was this patch tested?

`query-event-logs-version-2.0.2.txt` has all types of events generated by Structured Streaming in Spark 2.0.2. `testQuietly("ReplayListenerBus should ignore broken event jsons generated in 2.0.2")` verified we can load them without any error.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16085 from zsxwing/SPARK-18655.

(cherry picked from commit c4979f6ea8ed44fd87ded3133efa6df39d4842c3)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../org/apache/spark/scheduler/ReplayListenerBus.scala    | 7 +++++++
 .../query-event-logs-version-2.0.2.txt                    | 5 +++++
 .../spark/sql/streaming/StreamingQueryListenerSuite.scala | 8 ++++++++
 3 files changed, 20 insertions(+)
 create mode 100644 sql/core/src/test/resources/structured-streaming/query-event-logs-version-2.0.2.txt

diff --git a/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
index 0bd5a6bc59a9e..08e05ae0c095b 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
@@ -22,6 +22,7 @@ import java.io.{InputStream, IOException}
 import scala.io.Source
 
 import com.fasterxml.jackson.core.JsonParseException
+import com.fasterxml.jackson.databind.exc.UnrecognizedPropertyException
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.internal.Logging
@@ -87,6 +88,12 @@ private[spark] class ReplayListenerBus extends SparkListenerBus with Logging {
             // Ignore events generated by Structured Streaming in Spark 2.0.0 and 2.0.1.
             // It's safe since no place uses them.
             logWarning(s"Dropped incompatible Structured Streaming log: $currentLine")
+          case e: UnrecognizedPropertyException if e.getMessage != null && e.getMessage.startsWith(
+            "Unrecognized field \"queryStatus\" " +
+              "(class org.apache.spark.sql.streaming.StreamingQueryListener$") =>
+            // Ignore events generated by Structured Streaming in Spark 2.0.2
+            // It's safe since no place uses them.
+            logWarning(s"Dropped incompatible Structured Streaming log: $currentLine")
           case jpe: JsonParseException =>
             // We can only ignore exception from last line of the file that might be truncated
             // the last entry may not be the very last line in the event log, but we treat it
diff --git a/sql/core/src/test/resources/structured-streaming/query-event-logs-version-2.0.2.txt b/sql/core/src/test/resources/structured-streaming/query-event-logs-version-2.0.2.txt
new file mode 100644
index 0000000000000..57c44c8627252
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/query-event-logs-version-2.0.2.txt
@@ -0,0 +1,5 @@
+{"Event":"org.apache.spark.sql.streaming.StreamingQueryListener$QueryStartedEvent","queryStatus":{"name":"query-1","id":1,"timestamp":1480491481350,"inputRate":0.0,"processingRate":0.0,"latency":null,"sourceStatuses":[{"description":"FileStreamSource[file:/Users/zsx/stream]","offsetDesc":"-","inputRate":0.0,"processingRate":0.0,"triggerDetails":{}}],"sinkStatus":{"description":"FileSink[/Users/zsx/stream2]","offsetDesc":"[-]"},"triggerDetails":{}}}
+{"Event":"org.apache.spark.sql.streaming.StreamingQueryListener$QueryProgressEvent","queryStatus":{"name":"query-1","id":1,"timestamp":1480491493386,"inputRate":83.33333333333333,"processingRate":0.5773672055427251,"latency":1738.0,"sourceStatuses":[{"description":"FileStreamSource[file:/Users/zsx/stream]","offsetDesc":"#0","inputRate":83.33333333333333,"processingRate":0.5773672055427251,"triggerDetails":{"latency.getBatch.source":"39","numRows.input.source":"1","latency.getOffset.source":"91","triggerId":"0"}}],"sinkStatus":{"description":"FileSink[/Users/zsx/stream2]","offsetDesc":"[#0]"},"triggerDetails":{"timestamp.afterGetBatch":"1480491491817","latency.offsetLogWrite":"26","timestamp.triggerStart":"1480491491653","triggerId":"0","timestamp.triggerFinish":"1480491493385","latency.fullTrigger":"1732","latency.getBatch.total":"44","timestamp.afterGetOffset":"1480491491772","numRows.input.total":"1","isTriggerActive":"false","latency.optimizer":"406","latency.getOffset.total":"91","isDataPresentInTrigger":"true"}}}
+{"Event":"org.apache.spark.sql.streaming.StreamingQueryListener$QueryTerminatedEvent","queryStatus":{"name":"query-1","id":1,"timestamp":1480491532753,"inputRate":0.0,"processingRate":0.0,"latency":null,"sourceStatuses":[{"description":"FileStreamSource[file:/Users/zsx/stream]","offsetDesc":"#0","inputRate":0.0,"processingRate":0.0,"triggerDetails":{"latency.getOffset.source":"1","triggerId":"1"}}],"sinkStatus":{"description":"FileSink[/Users/zsx/stream2]","offsetDesc":"[#0]"},"triggerDetails":{}},"exception":null}
+{"Event":"org.apache.spark.sql.streaming.StreamingQueryListener$QueryTerminatedEvent","queryStatus":{"name":"query-0","id":0,"timestamp":1480491812530,"inputRate":0.0,"processingRate":0.0,"latency":null,"sourceStatuses":[{"description":"FileStreamSource[file:/Users/zsx/stream]","offsetDesc":"#0","inputRate":0.0,"processingRate":0.0,"triggerDetails":{"latency.getBatch.source":"25","latency.getOffset.source":"65","triggerId":"0"}}],"sinkStatus":{"description":"FileSink[/Users/zsx/stream2]","offsetDesc":"[-]"},"triggerDetails":{}},"exception":"org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0, localhost): org.apache.spark.SparkException: Task failed while writing rows.\n\tat org.apache.spark.sql.execution.streaming.FileStreamSinkWriter.writePartitionToSingleFile(FileStreamSink.scala:183)\n\tat org.apache.spark.sql.execution.streaming.FileStreamSinkWriter$$anonfun$write$1.apply(FileStreamSink.scala:155)\n\tat org.apache.spark.sql.execution.streaming.FileStreamSinkWriter$$anonfun$write$1.apply(FileStreamSink.scala:153)\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70)\n\tat org.apache.spark.scheduler.Task.run(Task.scala:86)\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\n\tat java.lang.Thread.run(Thread.java:745)\nCaused by: java.lang.ArithmeticException: / by zero\n\tat $line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$1.apply(<console>:25)\n\tat $line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$1.apply(<console>:25)\n\tat org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source)\n\tat org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)\n\tat org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:370)\n\tat org.apache.spark.sql.execution.streaming.FileStreamSinkWriter.writePartitionToSingleFile(FileStreamSink.scala:172)\n\t... 8 more\n\nDriver stacktrace:\n\tat org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1454)\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1442)\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1441)\n\tat scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)\n\tat scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)\n\tat org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1441)\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:811)\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:811)\n\tat scala.Option.foreach(Option.scala:257)\n\tat org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:811)\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1667)\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1622)\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1611)\n\tat org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)\n\tat org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:632)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:1873)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:1886)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:1906)\n\tat org.apache.spark.sql.execution.streaming.FileStreamSinkWriter.write(FileStreamSink.scala:151)\n\tat org.apache.spark.sql.execution.streaming.FileStreamSink.addBatch(FileStreamSink.scala:70)\n\tat org.apache.spark.sql.execution.streaming.StreamExecution.org$apache$spark$sql$execution$streaming$StreamExecution$$runBatch(StreamExecution.scala:437)\n\tat org.apache.spark.sql.execution.streaming.StreamExecution$$anonfun$org$apache$spark$sql$execution$streaming$StreamExecution$$runBatches$1$$anonfun$1.apply$mcZ$sp(StreamExecution.scala:225)\n\tat org.apache.spark.sql.execution.streaming.StreamExecution$$anonfun$org$apache$spark$sql$execution$streaming$StreamExecution$$runBatches$1$$anonfun$1.apply(StreamExecution.scala:213)\n\tat org.apache.spark.sql.execution.streaming.StreamExecution$$anonfun$org$apache$spark$sql$execution$streaming$StreamExecution$$runBatches$1$$anonfun$1.apply(StreamExecution.scala:213)\n\tat org.apache.spark.sql.execution.streaming.StreamExecution.org$apache$spark$sql$execution$streaming$StreamExecution$$reportTimeTaken(StreamExecution.scala:656)\n\tat org.apache.spark.sql.execution.streaming.StreamExecution$$anonfun$org$apache$spark$sql$execution$streaming$StreamExecution$$runBatches$1.apply$mcZ$sp(StreamExecution.scala:212)\n\tat org.apache.spark.sql.execution.streaming.ProcessingTimeExecutor.execute(TriggerExecutor.scala:43)\n\tat org.apache.spark.sql.execution.streaming.StreamExecution.org$apache$spark$sql$execution$streaming$StreamExecution$$runBatches(StreamExecution.scala:208)\n\tat org.apache.spark.sql.execution.streaming.StreamExecution$$anon$1.run(StreamExecution.scala:142)\nCaused by: org.apache.spark.SparkException: Task failed while writing rows.\n\tat org.apache.spark.sql.execution.streaming.FileStreamSinkWriter.writePartitionToSingleFile(FileStreamSink.scala:183)\n\tat org.apache.spark.sql.execution.streaming.FileStreamSinkWriter$$anonfun$write$1.apply(FileStreamSink.scala:155)\n\tat org.apache.spark.sql.execution.streaming.FileStreamSinkWriter$$anonfun$write$1.apply(FileStreamSink.scala:153)\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70)\n\tat org.apache.spark.scheduler.Task.run(Task.scala:86)\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\n\tat java.lang.Thread.run(Thread.java:745)\nCaused by: java.lang.ArithmeticException: / by zero\n\tat $line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$1.apply(<console>:25)\n\tat $line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$anonfun$1.apply(<console>:25)\n\tat org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source)\n\tat org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)\n\tat org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:370)\n\tat org.apache.spark.sql.execution.streaming.FileStreamSinkWriter.writePartitionToSingleFile(FileStreamSink.scala:172)\n\t... 8 more\n"}
+{"Event":"SparkListenerApplicationEnd","Timestamp":1480491541552}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
index 08b93e7d0b498..07a13a48a18c8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
@@ -207,6 +207,14 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
     testReplayListenerBusWithBorkenEventJsons("query-event-logs-version-2.0.1.txt")
   }
 
+  testQuietly("ReplayListenerBus should ignore broken event jsons generated in 2.0.2") {
+    // query-event-logs-version-2.0.2.txt has all types of events generated by
+    // Structured Streaming in Spark 2.0.2.
+    // SparkListenerApplicationEnd is the only valid event and it's the last event. We use it
+    // to verify that we can skip broken jsons generated by Structured Streaming.
+    testReplayListenerBusWithBorkenEventJsons("query-event-logs-version-2.0.2.txt")
+  }
+
   private def testReplayListenerBusWithBorkenEventJsons(fileName: String): Unit = {
     val input = getClass.getResourceAsStream(s"/structured-streaming/$fileName")
     val events = mutable.ArrayBuffer[SparkListenerEvent]()

From 7d4596734b6ebd021adc32ff87aa859bc2eeb976 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 30 Nov 2016 17:41:43 -0800
Subject: [PATCH 0249/1204] [SPARK-18617][SPARK-18560][TEST] Fix flaky test:
 StreamingContextSuite. Receiver data should be deserialized properly

## What changes were proposed in this pull request?

Fixed the potential SparkContext leak in `StreamingContextSuite.SPARK-18560 Receiver data should be deserialized properly` which was added in #16052. I also removed FakeByteArrayReceiver and used TestReceiver directly.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16091 from zsxwing/SPARK-18617-follow-up.

(cherry picked from commit 0a811210f809eb5b80eae14694d484d45b48b3f6)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../streaming/StreamingContextSuite.scala     | 34 +++++--------------
 1 file changed, 8 insertions(+), 26 deletions(-)

diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index 45d8f50853431..35eeb9dfa5ef8 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.streaming
 
 import java.io.{File, NotSerializableException}
+import java.util.concurrent.{CountDownLatch, TimeUnit}
 import java.util.concurrent.atomic.AtomicInteger
 
 import scala.collection.mutable.ArrayBuffer
@@ -811,7 +812,8 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with Timeo
     // other one. Then Spark jobs need to fetch remote blocks and it will trigger SPARK-18560.
     val conf = new SparkConf().setMaster("local-cluster[2,1,1024]").setAppName(appName)
     ssc = new StreamingContext(conf, Milliseconds(100))
-    val input = ssc.receiverStream(new FakeByteArrayReceiver)
+    val input = ssc.receiverStream(new TestReceiver)
+    val latch = new CountDownLatch(1)
     input.count().foreachRDD { rdd =>
       // Make sure we can read from BlockRDD
       if (rdd.collect().headOption.getOrElse(0L) > 0) {
@@ -820,12 +822,17 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with Timeo
           setDaemon(true)
           override def run(): Unit = {
             ssc.stop(stopSparkContext = true, stopGracefully = false)
+            latch.countDown()
           }
         }.start()
       }
     }
     ssc.start()
     ssc.awaitTerminationOrTimeout(60000)
+    // Wait until `ssc.top` returns. Otherwise, we may finish this test too fast and leak an active
+    // SparkContext. Note: the stop codes in `after` will just do nothing if `ssc.stop` in this test
+    // is running.
+    assert(latch.await(60, TimeUnit.SECONDS))
   }
 
   def addInputStream(s: StreamingContext): DStream[Int] = {
@@ -891,31 +898,6 @@ object TestReceiver {
   val counter = new AtomicInteger(1)
 }
 
-class FakeByteArrayReceiver extends Receiver[Array[Byte]](StorageLevel.MEMORY_ONLY) with Logging {
-
-  val data: Array[Byte] = "test".getBytes
-  var receivingThreadOption: Option[Thread] = None
-
-  override def onStart(): Unit = {
-    val thread = new Thread() {
-      override def run() {
-        logInfo("Receiving started")
-        while (!isStopped) {
-          store(data)
-        }
-        logInfo("Receiving stopped")
-      }
-    }
-    receivingThreadOption = Some(thread)
-    thread.start()
-  }
-
-  override def onStop(): Unit = {
-    // no clean to be done, the receiving thread should stop on it own, so just wait for it.
-    receivingThreadOption.foreach(_.join())
-  }
-}
-
 /** Custom receiver for testing whether a slow receiver can be shutdown gracefully or not */
 class SlowTestReceiver(totalRecords: Int, recordsPerSecond: Int)
   extends Receiver[Int](StorageLevel.MEMORY_ONLY) with Logging {

From e8d8e350998e6e44a6dee7f78dbe2d1aa997c1d6 Mon Sep 17 00:00:00 2001
From: "wm624@hotmail.com" <wm624@hotmail.com>
Date: Wed, 30 Nov 2016 20:32:17 -0800
Subject: [PATCH 0250/1204] [SPARK-18476][SPARKR][ML] SparkR Logistic
 Regression should should support output original label.

## What changes were proposed in this pull request?

Similar to SPARK-18401, as a classification algorithm, logistic regression should support output original label instead of supporting index label.

In this PR, original label output is supported and test cases are modified and added. Document is also modified.

## How was this patch tested?

Unit tests.

Author: wm624@hotmail.com <wm624@hotmail.com>

Closes #15910 from wangmiao1981/audit.

(cherry picked from commit 2eb6764fbb23553fc17772d8a4a1cad55ff7ba6e)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 R/pkg/R/mllib.R                               | 19 +++++-----
 R/pkg/inst/tests/testthat/test_mllib.R        | 26 +++++++++----
 .../scala/org/apache/spark/SparkContext.scala |  2 +-
 .../ml/r/LogisticRegressionWrapper.scala      | 37 +++++++++++++------
 4 files changed, 54 insertions(+), 30 deletions(-)

diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 02bc6456de4d0..eed829356f2be 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -712,7 +712,6 @@ setMethod("predict", signature(object = "KMeansModel"),
 #'                        of L1 and L2. Default is 0.0 which is an L2 penalty.
 #' @param maxIter maximum iteration number.
 #' @param tol convergence tolerance of iterations.
-#' @param fitIntercept whether to fit an intercept term.
 #' @param family the name of family which is a description of the label distribution to be used in the model.
 #'               Supported options:
 #'                 \itemize{
@@ -747,11 +746,11 @@ setMethod("predict", signature(object = "KMeansModel"),
 #' \dontrun{
 #' sparkR.session()
 #' # binary logistic regression
-#' label <- c(1.0, 1.0, 1.0, 0.0, 0.0)
-#' feature <- c(1.1419053, 0.9194079, -0.9498666, -1.1069903, 0.2809776)
-#' binary_data <- as.data.frame(cbind(label, feature))
+#' label <- c(0.0, 0.0, 0.0, 1.0, 1.0)
+#' features <- c(1.1419053, 0.9194079, -0.9498666, -1.1069903, 0.2809776)
+#' binary_data <- as.data.frame(cbind(label, features))
 #' binary_df <- createDataFrame(binary_data)
-#' blr_model <- spark.logit(binary_df, label ~ feature, thresholds = 1.0)
+#' blr_model <- spark.logit(binary_df, label ~ features, thresholds = 1.0)
 #' blr_predict <- collect(select(predict(blr_model, binary_df), "prediction"))
 #'
 #' # summary of binary logistic regression
@@ -783,7 +782,7 @@ setMethod("predict", signature(object = "KMeansModel"),
 #' @note spark.logit since 2.1.0
 setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula"),
           function(data, formula, regParam = 0.0, elasticNetParam = 0.0, maxIter = 100,
-                   tol = 1E-6, fitIntercept = TRUE, family = "auto", standardization = TRUE,
+                   tol = 1E-6, family = "auto", standardization = TRUE,
                    thresholds = 0.5, weightCol = NULL, aggregationDepth = 2,
                    probabilityCol = "probability") {
             formula <- paste(deparse(formula), collapse = "")
@@ -795,10 +794,10 @@ setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula")
             jobj <- callJStatic("org.apache.spark.ml.r.LogisticRegressionWrapper", "fit",
                                 data@sdf, formula, as.numeric(regParam),
                                 as.numeric(elasticNetParam), as.integer(maxIter),
-                                as.numeric(tol), as.logical(fitIntercept),
-                                as.character(family), as.logical(standardization),
-                                as.array(thresholds), as.character(weightCol),
-                                as.integer(aggregationDepth), as.character(probabilityCol))
+                                as.numeric(tol), as.character(family),
+                                as.logical(standardization), as.array(thresholds),
+                                as.character(weightCol), as.integer(aggregationDepth),
+                                as.character(probabilityCol))
             new("LogisticRegressionModel", jobj = jobj)
           })
 
diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R
index b05be476a3fa8..c8f062d8ac5dc 100644
--- a/R/pkg/inst/tests/testthat/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -646,30 +646,30 @@ test_that("spark.isotonicRegression", {
 
 test_that("spark.logit", {
   # test binary logistic regression
-  label <- c(1.0, 1.0, 1.0, 0.0, 0.0)
+  label <- c(0.0, 0.0, 0.0, 1.0, 1.0)
   feature <- c(1.1419053, 0.9194079, -0.9498666, -1.1069903, 0.2809776)
   binary_data <- as.data.frame(cbind(label, feature))
   binary_df <- createDataFrame(binary_data)
 
   blr_model <- spark.logit(binary_df, label ~ feature, thresholds = 1.0)
   blr_predict <- collect(select(predict(blr_model, binary_df), "prediction"))
-  expect_equal(blr_predict$prediction, c(0, 0, 0, 0, 0))
+  expect_equal(blr_predict$prediction, c("0.0", "0.0", "0.0", "0.0", "0.0"))
   blr_model1 <- spark.logit(binary_df, label ~ feature, thresholds = 0.0)
   blr_predict1 <- collect(select(predict(blr_model1, binary_df), "prediction"))
-  expect_equal(blr_predict1$prediction, c(1, 1, 1, 1, 1))
+  expect_equal(blr_predict1$prediction, c("1.0", "1.0", "1.0", "1.0", "1.0"))
 
   # test summary of binary logistic regression
   blr_summary <- summary(blr_model)
   blr_fmeasure <- collect(select(blr_summary$fMeasureByThreshold, "threshold", "F-Measure"))
-  expect_equal(blr_fmeasure$threshold, c(0.8221347, 0.7884005, 0.6674709, 0.3785437, 0.3434487),
+  expect_equal(blr_fmeasure$threshold, c(0.6565513, 0.6214563, 0.3325291, 0.2115995, 0.1778653),
                tolerance = 1e-4)
-  expect_equal(blr_fmeasure$"F-Measure", c(0.5000000, 0.8000000, 0.6666667, 0.8571429, 0.7500000),
+  expect_equal(blr_fmeasure$"F-Measure", c(0.6666667, 0.5000000, 0.8000000, 0.6666667, 0.5714286),
                tolerance = 1e-4)
   blr_precision <- collect(select(blr_summary$precisionByThreshold, "threshold", "precision"))
-  expect_equal(blr_precision$precision, c(1.0000000, 1.0000000, 0.6666667, 0.7500000, 0.6000000),
+  expect_equal(blr_precision$precision, c(1.0000000, 0.5000000, 0.6666667, 0.5000000, 0.4000000),
                tolerance = 1e-4)
   blr_recall <- collect(select(blr_summary$recallByThreshold, "threshold", "recall"))
-  expect_equal(blr_recall$recall, c(0.3333333, 0.6666667, 0.6666667, 1.0000000, 1.0000000),
+  expect_equal(blr_recall$recall, c(0.5000000, 0.5000000, 1.0000000, 1.0000000, 1.0000000),
                tolerance = 1e-4)
 
   # test model save and read
@@ -683,6 +683,16 @@ test_that("spark.logit", {
   expect_error(summary(blr_model2))
   unlink(modelPath)
 
+  # test prediction label as text
+  training <- suppressWarnings(createDataFrame(iris))
+  binomial_training <- training[training$Species %in% c("versicolor", "virginica"), ]
+  binomial_model <- spark.logit(binomial_training, Species ~ Sepal_Length + Sepal_Width)
+  prediction <- predict(binomial_model, binomial_training)
+  expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), "character")
+  expected <- c("virginica", "virginica", "virginica", "versicolor", "virginica",
+                "versicolor", "virginica", "versicolor", "virginica", "versicolor")
+  expect_equal(as.list(take(select(prediction, "prediction"), 10))[[1]], expected)
+
   # test multinomial logistic regression
   label <- c(0.0, 1.0, 2.0, 0.0, 0.0)
   feature1 <- c(4.845940, 5.64480, 7.430381, 6.464263, 5.555667)
@@ -694,7 +704,7 @@ test_that("spark.logit", {
 
   model <- spark.logit(df, label ~., family = "multinomial", thresholds = c(0, 1, 1))
   predict1 <- collect(select(predict(model, df), "prediction"))
-  expect_equal(predict1$prediction, c(0, 0, 0, 0, 0))
+  expect_equal(predict1$prediction, c("0.0", "0.0", "0.0", "0.0", "0.0"))
   # Summary of multinomial logistic regression is not implemented yet
   expect_error(summary(model))
 })
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 1cb39a4209a1c..b8414b5d099c5 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -19,7 +19,7 @@ package org.apache.spark
 
 import java.io._
 import java.lang.reflect.Constructor
-import java.net.{MalformedURLException, URI}
+import java.net.{URI}
 import java.util.{Arrays, Locale, Properties, ServiceLoader, UUID}
 import java.util.concurrent.{ConcurrentHashMap, ConcurrentMap}
 import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger, AtomicReference}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/LogisticRegressionWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/LogisticRegressionWrapper.scala
index 9b352c9863114..9fe6202980fca 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/LogisticRegressionWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/LogisticRegressionWrapper.scala
@@ -23,9 +23,9 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.ml.{Pipeline, PipelineModel}
-import org.apache.spark.ml.attribute.AttributeGroup
 import org.apache.spark.ml.classification.{BinaryLogisticRegressionSummary, LogisticRegression, LogisticRegressionModel}
-import org.apache.spark.ml.feature.RFormula
+import org.apache.spark.ml.feature.{IndexToString, RFormula}
+import org.apache.spark.ml.r.RWrapperUtils._
 import org.apache.spark.ml.util._
 import org.apache.spark.sql.{DataFrame, Dataset}
 
@@ -34,6 +34,8 @@ private[r] class LogisticRegressionWrapper private (
     val features: Array[String],
     val isLoaded: Boolean = false) extends MLWritable {
 
+  import LogisticRegressionWrapper._
+
   private val logisticRegressionModel: LogisticRegressionModel =
     pipeline.stages(1).asInstanceOf[LogisticRegressionModel]
 
@@ -57,7 +59,11 @@ private[r] class LogisticRegressionWrapper private (
   lazy val recallByThreshold: DataFrame = blrSummary.recallByThreshold
 
   def transform(dataset: Dataset[_]): DataFrame = {
-    pipeline.transform(dataset).drop(logisticRegressionModel.getFeaturesCol)
+    pipeline.transform(dataset)
+      .drop(PREDICTED_LABEL_INDEX_COL)
+      .drop(logisticRegressionModel.getFeaturesCol)
+      .drop(logisticRegressionModel.getLabelCol)
+
   }
 
   override def write: MLWriter = new LogisticRegressionWrapper.LogisticRegressionWrapperWriter(this)
@@ -66,6 +72,9 @@ private[r] class LogisticRegressionWrapper private (
 private[r] object LogisticRegressionWrapper
     extends MLReadable[LogisticRegressionWrapper] {
 
+  val PREDICTED_LABEL_INDEX_COL = "pred_label_idx"
+  val PREDICTED_LABEL_COL = "prediction"
+
   def fit( // scalastyle:ignore
       data: DataFrame,
       formula: String,
@@ -73,7 +82,6 @@ private[r] object LogisticRegressionWrapper
       elasticNetParam: Double,
       maxIter: Int,
       tol: Double,
-      fitIntercept: Boolean,
       family: String,
       standardization: Boolean,
       thresholds: Array[Double],
@@ -84,14 +92,14 @@ private[r] object LogisticRegressionWrapper
 
     val rFormula = new RFormula()
       .setFormula(formula)
-    RWrapperUtils.checkDataColumns(rFormula, data)
+      .setForceIndexLabel(true)
+    checkDataColumns(rFormula, data)
     val rFormulaModel = rFormula.fit(data)
 
-    // get feature names from output schema
-    val schema = rFormulaModel.transform(data).schema
-    val featureAttrs = AttributeGroup.fromStructField(schema(rFormulaModel.getFeaturesCol))
-      .attributes.get
-    val features = featureAttrs.map(_.name.get)
+    val fitIntercept = rFormula.hasIntercept
+
+    // get labels and feature names from output schema
+    val (features, labels) = getFeaturesAndLabels(rFormulaModel, data)
 
     // assemble and fit the pipeline
     val logisticRegression = new LogisticRegression()
@@ -105,7 +113,9 @@ private[r] object LogisticRegressionWrapper
       .setWeightCol(weightCol)
       .setAggregationDepth(aggregationDepth)
       .setFeaturesCol(rFormula.getFeaturesCol)
+      .setLabelCol(rFormula.getLabelCol)
       .setProbabilityCol(probability)
+      .setPredictionCol(PREDICTED_LABEL_INDEX_COL)
 
     if (thresholds.length > 1) {
       logisticRegression.setThresholds(thresholds)
@@ -113,8 +123,13 @@ private[r] object LogisticRegressionWrapper
       logisticRegression.setThreshold(thresholds(0))
     }
 
+    val idxToStr = new IndexToString()
+      .setInputCol(PREDICTED_LABEL_INDEX_COL)
+      .setOutputCol(PREDICTED_LABEL_COL)
+      .setLabels(labels)
+
     val pipeline = new Pipeline()
-      .setStages(Array(rFormulaModel, logisticRegression))
+      .setStages(Array(rFormulaModel, logisticRegression, idxToStr))
       .fit(data)
 
     new LogisticRegressionWrapper(pipeline, features)

From 9dc3ef6e11b7dd3fd916d1442733938dcb5750e3 Mon Sep 17 00:00:00 2001
From: Eric Liang <ekl@databricks.com>
Date: Thu, 1 Dec 2016 16:48:10 +0800
Subject: [PATCH 0251/1204] [SPARK-18635][SQL] Partition name/values not
 escaped correctly in some cases

## What changes were proposed in this pull request?

Due to confusion between URI vs paths, in certain cases we escape partition values too many times, which causes some Hive client operations to fail or write data to the wrong location. This PR fixes at least some of these cases.

To my understanding this is how values, filesystem paths, and URIs interact.
- Hive stores raw (unescaped) partition values that are returned to you directly when you call listPartitions.
- Internally, we convert these raw values to filesystem paths via `ExternalCatalogUtils.[un]escapePathName`.
- In some circumstances we store URIs instead of filesystem paths. When a path is converted to a URI via `path.toURI`, the escaped partition values are further URI-encoded. This means that to get a path back from a URI, you must call `new Path(new URI(uriTxt))` in order to decode the URI-encoded string.
- In `CatalogStorageFormat` we store URIs as strings. This makes it easy to forget to URI-decode the value before converting it into a path.
- Finally, the Hive client itself uses mostly Paths for representing locations, and only URIs occasionally.

In the future we should probably clean this up, perhaps by dropping use of URIs when unnecessary. We should also try fixing escaping for partition names as well as values, though names are unlikely to contain special characters.

cc mallman cloud-fan yhuai

## How was this patch tested?

Unit tests.

Author: Eric Liang <ekl@databricks.com>

Closes #16071 from ericl/spark-18635.

(cherry picked from commit 88f559f20a5208f2386b874eb119f1cba2c748c7)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/catalog/interface.scala      |  3 ++
 .../spark/sql/hive/HiveExternalCatalog.scala  |  5 +-
 .../spark/sql/hive/client/HiveShim.scala      |  6 ++-
 .../PartitionProviderCompatibilitySuite.scala | 54 +++++++++++++++++++
 4 files changed, 64 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index d8bc86727e466..d2a1af0800914 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -44,6 +44,9 @@ case class CatalogFunction(
  * Storage format, used to describe how a partition or a table is stored.
  */
 case class CatalogStorageFormat(
+    // TODO(ekl) consider storing this field as java.net.URI for type safety. Note that this must
+    // be converted to/from a hadoop Path object using new Path(new URI(locationUri)) and
+    // path.toUri respectively before use as a filesystem path due to URI char escaping.
     locationUri: Option[String],
     inputFormat: Option[String],
     outputFormat: Option[String],
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index fd9dc32063872..1a9943bc31058 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.hive
 
 import java.io.IOException
+import java.net.URI
 import java.util
 
 import scala.util.control.NonFatal
@@ -833,10 +834,10 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       // However, Hive metastore is not case preserving and will generate wrong partition location
       // with lower cased partition column names. Here we set the default partition location
       // manually to avoid this problem.
-      val partitionPath = p.storage.locationUri.map(new Path(_)).getOrElse {
+      val partitionPath = p.storage.locationUri.map(uri => new Path(new URI(uri))).getOrElse {
         ExternalCatalogUtils.generatePartitionPath(p.spec, partitionColumnNames, tablePath)
       }
-      p.copy(storage = p.storage.copy(locationUri = Some(partitionPath.toString)))
+      p.copy(storage = p.storage.copy(locationUri = Some(partitionPath.toUri.toString)))
     }
     val lowerCasedParts = partsWithLocation.map(p => p.copy(spec = lowerCasePartitionSpec(p.spec)))
     client.createPartitions(db, table, lowerCasedParts, ignoreIfExists)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index 3d9642dd1463d..e561706facf03 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -268,7 +268,8 @@ private[client] class Shim_v0_12 extends Shim with Logging {
       ignoreIfExists: Boolean): Unit = {
     val table = hive.getTable(database, tableName)
     parts.foreach { s =>
-      val location = s.storage.locationUri.map(new Path(table.getPath, _)).orNull
+      val location = s.storage.locationUri.map(
+        uri => new Path(table.getPath, new Path(new URI(uri)))).orNull
       val params = if (s.parameters.nonEmpty) s.parameters.asJava else null
       val spec = s.spec.asJava
       if (hive.getPartition(table, spec, false) != null && ignoreIfExists) {
@@ -463,7 +464,8 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
       ignoreIfExists: Boolean): Unit = {
     val addPartitionDesc = new AddPartitionDesc(db, table, ignoreIfExists)
     parts.zipWithIndex.foreach { case (s, i) =>
-      addPartitionDesc.addPartition(s.spec.asJava, s.storage.locationUri.orNull)
+      addPartitionDesc.addPartition(
+        s.spec.asJava, s.storage.locationUri.map(u => new Path(new URI(u)).toString).orNull)
       if (s.parameters.nonEmpty) {
         addPartitionDesc.getPartition(i).setPartParams(s.parameters.asJava)
       }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
index cace5fa95cad0..e8e4238d1c5a4 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
@@ -205,6 +205,60 @@ class PartitionProviderCompatibilitySuite
         }
       }
     }
+
+    test(s"SPARK-18635 special chars in partition values - partition management $enabled") {
+      withTable("test") {
+        spark.range(10)
+          .selectExpr("id", "id as A", "'%' as B")
+          .write.partitionBy("A", "B").mode("overwrite")
+          .saveAsTable("test")
+        assert(spark.sql("select * from test").count() == 10)
+        assert(spark.sql("select * from test where B = '%'").count() == 10)
+        assert(spark.sql("select * from test where B = '$'").count() == 0)
+        spark.range(10)
+          .selectExpr("id", "id as A", "'=' as B")
+          .write.mode("append").insertInto("test")
+        spark.sql("insert into test partition (A, B) select id, id, '%=' from range(10)")
+        assert(spark.sql("select * from test").count() == 30)
+        assert(spark.sql("select * from test where B = '%'").count() == 10)
+        assert(spark.sql("select * from test where B = '='").count() == 10)
+        assert(spark.sql("select * from test where B = '%='").count() == 10)
+
+        // show partitions sanity check
+        val parts = spark.sql("show partitions test").collect().map(_.get(0)).toSeq
+        assert(parts.length == 30)
+        assert(parts.contains("A=0/B=%25"))
+        assert(parts.contains("A=0/B=%3D"))
+        assert(parts.contains("A=0/B=%25%3D"))
+
+        // drop partition sanity check
+        spark.sql("alter table test drop partition (A=1, B='%')")
+        assert(spark.sql("select * from test").count() == 29)  // 1 file in dropped partition
+
+        withTempDir { dir =>
+          // custom locations sanity check
+          spark.sql(s"""
+            |alter table test partition (A=0, B='%')
+            |set location '${dir.getAbsolutePath}'""".stripMargin)
+          assert(spark.sql("select * from test").count() == 28)  // moved to empty dir
+
+          // rename partition sanity check
+          spark.sql(s"""
+            |alter table test partition (A=5, B='%')
+            |rename to partition (A=100, B='%')""".stripMargin)
+          assert(spark.sql("select * from test where a = 5 and b = '%'").count() == 0)
+          assert(spark.sql("select * from test where a = 100 and b = '%'").count() == 1)
+
+          // try with A=0 which has a custom location
+          spark.sql("insert into test partition (A=0, B='%') select 1")
+          spark.sql(s"""
+            |alter table test partition (A=0, B='%')
+            |rename to partition (A=101, B='%')""".stripMargin)
+          assert(spark.sql("select * from test where a = 0 and b = '%'").count() == 0)
+          assert(spark.sql("select * from test where a = 101 and b = '%'").count() == 1)
+        }
+      }
+    }
   }
 
   /**

From 8579ab5d7092a65f044fd925ecd5b790305f0aef Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 1 Dec 2016 01:57:58 -0800
Subject: [PATCH 0252/1204] [SPARK-18666][WEB UI] Remove the codes checking
 deprecated config spark.sql.unsafe.enabled

## What changes were proposed in this pull request?

`spark.sql.unsafe.enabled` is deprecated since 1.6. There still are codes in UI to check it. We should remove it and clean the codes.

## How was this patch tested?

Changes to related existing unit test.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #16095 from viirya/remove-deprecated-config-code.

(cherry picked from commit dbf842b7a8479f9566146192ffc04421591742d5)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../org/apache/spark/ui/jobs/StagePage.scala  | 49 ++++++-------------
 .../org/apache/spark/ui/StagePageSuite.scala  | 16 ++----
 2 files changed, 18 insertions(+), 47 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
index 8c7cefe200739..412ddfa9fad35 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
@@ -70,8 +70,6 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
   // if we find that it's okay.
   private val MAX_TIMELINE_TASKS = parent.conf.getInt("spark.ui.timeline.tasks.maximum", 1000)
 
-  private val displayPeakExecutionMemory = parent.conf.getBoolean("spark.sql.unsafe.enabled", true)
-
   private def getLocalitySummaryString(stageData: StageUIData): String = {
     val localities = stageData.taskData.values.map(_.taskInfo.taskLocality)
     val localityCounts = localities.groupBy(identity).mapValues(_.size)
@@ -252,15 +250,13 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
                   <span class="additional-metric-title">Getting Result Time</span>
                 </span>
               </li>
-              {if (displayPeakExecutionMemory) {
-                <li>
-                  <span data-toggle="tooltip"
-                        title={ToolTips.PEAK_EXECUTION_MEMORY} data-placement="right">
-                    <input type="checkbox" name={TaskDetailsClassNames.PEAK_EXECUTION_MEMORY}/>
-                    <span class="additional-metric-title">Peak Execution Memory</span>
-                  </span>
-                </li>
-              }}
+              <li>
+                <span data-toggle="tooltip"
+                      title={ToolTips.PEAK_EXECUTION_MEMORY} data-placement="right">
+                  <input type="checkbox" name={TaskDetailsClassNames.PEAK_EXECUTION_MEMORY}/>
+                  <span class="additional-metric-title">Peak Execution Memory</span>
+                </span>
+              </li>
             </ul>
           </div>
         </div>
@@ -532,13 +528,9 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
               {serializationQuantiles}
             </tr>,
             <tr class={TaskDetailsClassNames.GETTING_RESULT_TIME}>{gettingResultQuantiles}</tr>,
-            if (displayPeakExecutionMemory) {
-              <tr class={TaskDetailsClassNames.PEAK_EXECUTION_MEMORY}>
-                {peakExecutionMemoryQuantiles}
-              </tr>
-            } else {
-              Nil
-            },
+            <tr class={TaskDetailsClassNames.PEAK_EXECUTION_MEMORY}>
+              {peakExecutionMemoryQuantiles}
+            </tr>,
             if (stageData.hasInput) <tr>{inputQuantiles}</tr> else Nil,
             if (stageData.hasOutput) <tr>{outputQuantiles}</tr> else Nil,
             if (stageData.hasShuffleRead) {
@@ -1166,9 +1158,6 @@ private[ui] class TaskPagedTable(
     desc: Boolean,
     executorsListener: ExecutorsListener) extends PagedTable[TaskTableRowData] {
 
-  // We only track peak memory used for unsafe operators
-  private val displayPeakExecutionMemory = conf.getBoolean("spark.sql.unsafe.enabled", true)
-
   override def tableId: String = "task-table"
 
   override def tableCssClass: String =
@@ -1217,14 +1206,8 @@ private[ui] class TaskPagedTable(
         ("Task Deserialization Time", TaskDetailsClassNames.TASK_DESERIALIZATION_TIME),
         ("GC Time", ""),
         ("Result Serialization Time", TaskDetailsClassNames.RESULT_SERIALIZATION_TIME),
-        ("Getting Result Time", TaskDetailsClassNames.GETTING_RESULT_TIME)) ++
-        {
-          if (displayPeakExecutionMemory) {
-            Seq(("Peak Execution Memory", TaskDetailsClassNames.PEAK_EXECUTION_MEMORY))
-          } else {
-            Nil
-          }
-        } ++
+        ("Getting Result Time", TaskDetailsClassNames.GETTING_RESULT_TIME),
+        ("Peak Execution Memory", TaskDetailsClassNames.PEAK_EXECUTION_MEMORY)) ++
         {if (hasAccumulators) Seq(("Accumulators", "")) else Nil} ++
         {if (hasInput) Seq(("Input Size / Records", "")) else Nil} ++
         {if (hasOutput) Seq(("Output Size / Records", "")) else Nil} ++
@@ -1316,11 +1299,9 @@ private[ui] class TaskPagedTable(
       <td class={TaskDetailsClassNames.GETTING_RESULT_TIME}>
         {UIUtils.formatDuration(task.gettingResultTime)}
       </td>
-      {if (displayPeakExecutionMemory) {
-        <td class={TaskDetailsClassNames.PEAK_EXECUTION_MEMORY}>
-          {Utils.bytesToString(task.peakExecutionMemoryUsed)}
-        </td>
-      }}
+      <td class={TaskDetailsClassNames.PEAK_EXECUTION_MEMORY}>
+        {Utils.bytesToString(task.peakExecutionMemoryUsed)}
+      </td>
       {if (task.accumulators.nonEmpty) {
         <td>{Unparsed(task.accumulators.get)}</td>
       }}
diff --git a/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala b/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
index d30b987d6ca31..11482d187aeca 100644
--- a/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
@@ -35,25 +35,15 @@ class StagePageSuite extends SparkFunSuite with LocalSparkContext {
 
   private val peakExecutionMemory = 10
 
-  test("peak execution memory only displayed if unsafe is enabled") {
-    val unsafeConf = "spark.sql.unsafe.enabled"
-    val conf = new SparkConf(false).set(unsafeConf, "true")
+  test("peak execution memory should displayed") {
+    val conf = new SparkConf(false)
     val html = renderStagePage(conf).toString().toLowerCase
     val targetString = "peak execution memory"
     assert(html.contains(targetString))
-    // Disable unsafe and make sure it's not there
-    val conf2 = new SparkConf(false).set(unsafeConf, "false")
-    val html2 = renderStagePage(conf2).toString().toLowerCase
-    assert(!html2.contains(targetString))
-    // Avoid setting anything; it should be displayed by default
-    val conf3 = new SparkConf(false)
-    val html3 = renderStagePage(conf3).toString().toLowerCase
-    assert(html3.contains(targetString))
   }
 
   test("SPARK-10543: peak execution memory should be per-task rather than cumulative") {
-    val unsafeConf = "spark.sql.unsafe.enabled"
-    val conf = new SparkConf(false).set(unsafeConf, "true")
+    val conf = new SparkConf(false)
     val html = renderStagePage(conf).toString().toLowerCase
     // verify min/25/50/75/max show task value not cumulative values
     assert(html.contains(s"<td>$peakExecutionMemory.0 b</td>" * 5))

From cbbe217777173b100de2f5a613c46428974826f6 Mon Sep 17 00:00:00 2001
From: Yuming Wang <wgyumg@gmail.com>
Date: Thu, 1 Dec 2016 14:14:09 +0100
Subject: [PATCH 0253/1204] [SPARK-18645][DEPLOY] Fix spark-daemon.sh arguments
 error lead to throws Unrecognized option

## What changes were proposed in this pull request?

spark-daemon.sh will lost single quotes around after #15338. as follows:
```
execute_command nice -n 0 bash /opt/cloudera/parcels/SPARK-2.1.0-cdh5.4.3.d20161129-21.04.38/lib/spark/bin/spark-submit --class org.apache.spark.sql.hive.thriftserver.HiveThriftServer2 --name Thrift JDBC/ODBC Server --conf spark.driver.extraJavaOptions=-XX:+UseG1GC -XX:-HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp
```
With this fix, as follows:
```
execute_command nice -n 0 bash /opt/cloudera/parcels/SPARK-2.1.0-cdh5.4.3.d20161129-21.04.38/lib/spark/bin/spark-submit --class org.apache.spark.sql.hive.thriftserver.HiveThriftServer2 --name 'Thrift JDBC/ODBC Server' --conf 'spark.driver.extraJavaOptions=-XX:+UseG1GC -XX:-HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp'
```

## How was this patch tested?

- Manual tests
- Build the package and start-thriftserver.sh with `--conf 'spark.driver.extraJavaOptions=-XX:+UseG1GC -XX:-HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/tmp'`

Author: Yuming Wang <wgyumg@gmail.com>

Closes #16079 from wangyum/SPARK-18645.

(cherry picked from commit 2ab8551e79e1655c406c358b21c0a1e719f498be)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 sbin/spark-daemon.sh | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/sbin/spark-daemon.sh b/sbin/spark-daemon.sh
index 061019a55e997..c227c9828e6ac 100755
--- a/sbin/spark-daemon.sh
+++ b/sbin/spark-daemon.sh
@@ -124,9 +124,8 @@ if [ "$SPARK_NICENESS" = "" ]; then
 fi
 
 execute_command() {
-  local command="$@"
   if [ -z ${SPARK_NO_DAEMONIZE+set} ]; then
-      nohup -- $command >> $log 2>&1 < /dev/null &
+      nohup -- "$@" >> $log 2>&1 < /dev/null &
       newpid="$!"
 
       echo "$newpid" > "$pid"
@@ -143,12 +142,12 @@ execute_command() {
       sleep 2
       # Check if the process has died; in that case we'll tail the log so the user can see
       if [[ ! $(ps -p "$newpid" -o comm=) =~ "java" ]]; then
-        echo "failed to launch $command:"
+        echo "failed to launch: $@"
         tail -2 "$log" | sed 's/^/  /'
         echo "full log in $log"
       fi
   else
-      $command
+      "$@"
   fi
 }
 
@@ -176,11 +175,11 @@ run_command() {
 
   case "$mode" in
     (class)
-      execute_command nice -n "$SPARK_NICENESS" "${SPARK_HOME}"/bin/spark-class $command $@
+      execute_command nice -n "$SPARK_NICENESS" "${SPARK_HOME}"/bin/spark-class "$command" "$@"
       ;;
 
     (submit)
-      execute_command nice -n "$SPARK_NICENESS" bash "${SPARK_HOME}"/bin/spark-submit --class $command $@
+      execute_command nice -n "$SPARK_NICENESS" bash "${SPARK_HOME}"/bin/spark-submit --class "$command" "$@"
       ;;
 
     (*)

From 6916ddc385fc33fa390e541300ca2bb1dbd0599c Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 1 Dec 2016 11:53:12 -0800
Subject: [PATCH 0254/1204] [SPARK-18674][SQL] improve the error message of
 using join

## What changes were proposed in this pull request?

The current error message of USING join is quite confusing, for example:
```
scala> val df1 = List(1,2,3).toDS.withColumnRenamed("value", "c1")
df1: org.apache.spark.sql.DataFrame = [c1: int]

scala> val df2 = List(1,2,3).toDS.withColumnRenamed("value", "c2")
df2: org.apache.spark.sql.DataFrame = [c2: int]

scala> df1.join(df2, usingColumn = "c1")
org.apache.spark.sql.AnalysisException: using columns ['c1] can not be resolved given input columns: [c1, c2] ;;
'Join UsingJoin(Inner,List('c1))
:- Project [value#1 AS c1#3]
:  +- LocalRelation [value#1]
+- Project [value#7 AS c2#9]
   +- LocalRelation [value#7]
```

after this PR, it becomes:
```
scala> val df1 = List(1,2,3).toDS.withColumnRenamed("value", "c1")
df1: org.apache.spark.sql.DataFrame = [c1: int]

scala> val df2 = List(1,2,3).toDS.withColumnRenamed("value", "c2")
df2: org.apache.spark.sql.DataFrame = [c2: int]

scala> df1.join(df2, usingColumn = "c1")
org.apache.spark.sql.AnalysisException: USING column `c1` can not be resolved with the right join side, the right output is: [c2];
```

## How was this patch tested?

updated tests

Author: Wenchen Fan <wenchen@databricks.com>

Closes #16100 from cloud-fan/natural.

(cherry picked from commit e6534847100670a22b3b191a0f9d924fab7f3c02)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 28 ++++-------
 .../sql/catalyst/analysis/CheckAnalysis.scala |  6 ---
 .../sql/catalyst/parser/AstBuilder.scala      |  5 +-
 .../spark/sql/catalyst/plans/joinTypes.scala  |  2 +-
 .../analysis/ResolveNaturalJoinSuite.scala    | 47 +++++++++----------
 .../sql/catalyst/parser/PlanParserSuite.scala |  2 +-
 .../scala/org/apache/spark/sql/Dataset.scala  |  2 +-
 7 files changed, 34 insertions(+), 58 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index e576d53280504..372a121993758 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1899,15 +1899,7 @@ class Analyzer(
     override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
       case j @ Join(left, right, UsingJoin(joinType, usingCols), condition)
           if left.resolved && right.resolved && j.duplicateResolved =>
-        // Resolve the column names referenced in using clause from both the legs of join.
-        val lCols = usingCols.flatMap(col => left.resolveQuoted(col.name, resolver))
-        val rCols = usingCols.flatMap(col => right.resolveQuoted(col.name, resolver))
-        if ((lCols.length == usingCols.length) && (rCols.length == usingCols.length)) {
-          val joinNames = lCols.map(exp => exp.name)
-          commonNaturalJoinProcessing(left, right, joinType, joinNames, None)
-        } else {
-          j
-        }
+        commonNaturalJoinProcessing(left, right, joinType, usingCols, None)
       case j @ Join(left, right, NaturalJoin(joinType), condition) if j.resolvedExceptNatural =>
         // find common column names from both sides
         val joinNames = left.output.map(_.name).intersect(right.output.map(_.name))
@@ -1922,18 +1914,16 @@ class Analyzer(
       joinNames: Seq[String],
       condition: Option[Expression]) = {
     val leftKeys = joinNames.map { keyName =>
-      val joinColumn = left.output.find(attr => resolver(attr.name, keyName))
-      assert(
-        joinColumn.isDefined,
-        s"$keyName should exist in ${left.output.map(_.name).mkString(",")}")
-      joinColumn.get
+      left.output.find(attr => resolver(attr.name, keyName)).getOrElse {
+        throw new AnalysisException(s"USING column `$keyName` can not be resolved with the " +
+          s"left join side, the left output is: [${left.output.map(_.name).mkString(", ")}]")
+      }
     }
     val rightKeys = joinNames.map { keyName =>
-      val joinColumn = right.output.find(attr => resolver(attr.name, keyName))
-      assert(
-        joinColumn.isDefined,
-        s"$keyName should exist in ${right.output.map(_.name).mkString(",")}")
-      joinColumn.get
+      right.output.find(attr => resolver(attr.name, keyName)).getOrElse {
+        throw new AnalysisException(s"USING column `$keyName` can not be resolved with the " +
+          s"right join side, the right output is: [${right.output.map(_.name).mkString(", ")}]")
+      }
     }
     val joinPairs = leftKeys.zip(rightKeys)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index db417526ed5b9..235a79973d6ee 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -190,12 +190,6 @@ trait CheckAnalysis extends PredicateHelper {
               case e =>
             }
 
-          case j @ Join(_, _, UsingJoin(_, cols), _) =>
-            val from = operator.inputSet.map(_.name).mkString(", ")
-            failAnalysis(
-              s"using columns [${cols.mkString(",")}] " +
-                s"can not be resolved given input columns: [$from] ")
-
           case j @ Join(_, _, _, Some(condition)) if condition.dataType != BooleanType =>
             failAnalysis(
               s"join condition '${condition.sql}' " +
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 2006844923cf7..06f0f5b67f220 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -570,10 +570,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
         // Resolve the join type and join condition
         val (joinType, condition) = Option(join.joinCriteria) match {
           case Some(c) if c.USING != null =>
-            val columns = c.identifier.asScala.map { column =>
-              UnresolvedAttribute.quoted(column.getText)
-            }
-            (UsingJoin(baseJoinType, columns), None)
+            (UsingJoin(baseJoinType, c.identifier.asScala.map(_.getText)), None)
           case Some(c) if c.booleanExpression != null =>
             (baseJoinType, Option(expression(c.booleanExpression)))
           case None if join.NATURAL != null =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
index 61e083e6fc2c3..853e9f3b076a2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
@@ -100,7 +100,7 @@ case class NaturalJoin(tpe: JoinType) extends JoinType {
   override def sql: String = "NATURAL " + tpe.sql
 }
 
-case class UsingJoin(tpe: JoinType, usingColumns: Seq[UnresolvedAttribute]) extends JoinType {
+case class UsingJoin(tpe: JoinType, usingColumns: Seq[String]) extends JoinType {
   require(Seq(Inner, LeftOuter, LeftSemi, RightOuter, FullOuter, LeftAnti).contains(tpe),
     "Unsupported using join type " + tpe)
   override def sql: String = "USING " + tpe.sql
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveNaturalJoinSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveNaturalJoinSuite.scala
index 100ec4d53fb81..1421d36fdb2a3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveNaturalJoinSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveNaturalJoinSuite.scala
@@ -38,7 +38,7 @@ class ResolveNaturalJoinSuite extends AnalysisTest {
 
   test("natural/using inner join") {
     val naturalPlan = r1.join(r2, NaturalJoin(Inner), None)
-    val usingPlan = r1.join(r2, UsingJoin(Inner, Seq(UnresolvedAttribute("a"))), None)
+    val usingPlan = r1.join(r2, UsingJoin(Inner, Seq("a")), None)
     val expected = r1.join(r2, Inner, Some(EqualTo(a, a))).select(a, b, c)
     checkAnalysis(naturalPlan, expected)
     checkAnalysis(usingPlan, expected)
@@ -46,7 +46,7 @@ class ResolveNaturalJoinSuite extends AnalysisTest {
 
   test("natural/using left join") {
     val naturalPlan = r1.join(r2, NaturalJoin(LeftOuter), None)
-    val usingPlan = r1.join(r2, UsingJoin(LeftOuter, Seq(UnresolvedAttribute("a"))), None)
+    val usingPlan = r1.join(r2, UsingJoin(LeftOuter, Seq("a")), None)
     val expected = r1.join(r2, LeftOuter, Some(EqualTo(a, a))).select(a, b, c)
     checkAnalysis(naturalPlan, expected)
     checkAnalysis(usingPlan, expected)
@@ -54,7 +54,7 @@ class ResolveNaturalJoinSuite extends AnalysisTest {
 
   test("natural/using right join") {
     val naturalPlan = r1.join(r2, NaturalJoin(RightOuter), None)
-    val usingPlan = r1.join(r2, UsingJoin(RightOuter, Seq(UnresolvedAttribute("a"))), None)
+    val usingPlan = r1.join(r2, UsingJoin(RightOuter, Seq("a")), None)
     val expected = r1.join(r2, RightOuter, Some(EqualTo(a, a))).select(a, b, c)
     checkAnalysis(naturalPlan, expected)
     checkAnalysis(usingPlan, expected)
@@ -62,7 +62,7 @@ class ResolveNaturalJoinSuite extends AnalysisTest {
 
   test("natural/using full outer join") {
     val naturalPlan = r1.join(r2, NaturalJoin(FullOuter), None)
-    val usingPlan = r1.join(r2, UsingJoin(FullOuter, Seq(UnresolvedAttribute("a"))), None)
+    val usingPlan = r1.join(r2, UsingJoin(FullOuter, Seq("a")), None)
     val expected = r1.join(r2, FullOuter, Some(EqualTo(a, a))).select(
       Alias(Coalesce(Seq(a, a)), "a")(), b, c)
     checkAnalysis(naturalPlan, expected)
@@ -71,7 +71,7 @@ class ResolveNaturalJoinSuite extends AnalysisTest {
 
   test("natural/using inner join with no nullability") {
     val naturalPlan = r3.join(r4, NaturalJoin(Inner), None)
-    val usingPlan = r3.join(r4, UsingJoin(Inner, Seq(UnresolvedAttribute("b"))), None)
+    val usingPlan = r3.join(r4, UsingJoin(Inner, Seq("b")), None)
     val expected = r3.join(r4, Inner, Some(EqualTo(bNotNull, bNotNull))).select(
       bNotNull, aNotNull, cNotNull)
     checkAnalysis(naturalPlan, expected)
@@ -80,7 +80,7 @@ class ResolveNaturalJoinSuite extends AnalysisTest {
 
   test("natural/using left join with no nullability") {
     val naturalPlan = r3.join(r4, NaturalJoin(LeftOuter), None)
-    val usingPlan = r3.join(r4, UsingJoin(LeftOuter, Seq(UnresolvedAttribute("b"))), None)
+    val usingPlan = r3.join(r4, UsingJoin(LeftOuter, Seq("b")), None)
     val expected = r3.join(r4, LeftOuter, Some(EqualTo(bNotNull, bNotNull))).select(
       bNotNull, aNotNull, c)
     checkAnalysis(naturalPlan, expected)
@@ -89,7 +89,7 @@ class ResolveNaturalJoinSuite extends AnalysisTest {
 
   test("natural/using right join with no nullability") {
     val naturalPlan = r3.join(r4, NaturalJoin(RightOuter), None)
-    val usingPlan = r3.join(r4, UsingJoin(RightOuter, Seq(UnresolvedAttribute("b"))), None)
+    val usingPlan = r3.join(r4, UsingJoin(RightOuter, Seq("b")), None)
     val expected = r3.join(r4, RightOuter, Some(EqualTo(bNotNull, bNotNull))).select(
       bNotNull, a, cNotNull)
     checkAnalysis(naturalPlan, expected)
@@ -98,7 +98,7 @@ class ResolveNaturalJoinSuite extends AnalysisTest {
 
   test("natural/using full outer join with no nullability") {
     val naturalPlan = r3.join(r4, NaturalJoin(FullOuter), None)
-    val usingPlan = r3.join(r4, UsingJoin(FullOuter, Seq(UnresolvedAttribute("b"))), None)
+    val usingPlan = r3.join(r4, UsingJoin(FullOuter, Seq("b")), None)
     val expected = r3.join(r4, FullOuter, Some(EqualTo(bNotNull, bNotNull))).select(
       Alias(Coalesce(Seq(b, b)), "b")(), a, c)
     checkAnalysis(naturalPlan, expected)
@@ -106,40 +106,35 @@ class ResolveNaturalJoinSuite extends AnalysisTest {
   }
 
   test("using unresolved attribute") {
-    val usingPlan = r1.join(r2, UsingJoin(Inner, Seq(UnresolvedAttribute("d"))), None)
-    val error = intercept[AnalysisException] {
-      SimpleAnalyzer.checkAnalysis(usingPlan)
-    }
-    assert(error.message.contains(
-      "using columns ['d] can not be resolved given input columns: [b, a, c]"))
+    assertAnalysisError(
+      r1.join(r2, UsingJoin(Inner, Seq("d"))),
+      "USING column `d` can not be resolved with the left join side" :: Nil)
+    assertAnalysisError(
+      r1.join(r2, UsingJoin(Inner, Seq("b"))),
+      "USING column `b` can not be resolved with the right join side" :: Nil)
   }
 
   test("using join with a case sensitive analyzer") {
     val expected = r1.join(r2, Inner, Some(EqualTo(a, a))).select(a, b, c)
 
-    {
-      val usingPlan = r1.join(r2, UsingJoin(Inner, Seq(UnresolvedAttribute("a"))), None)
-      checkAnalysis(usingPlan, expected, caseSensitive = true)
-    }
+    val usingPlan = r1.join(r2, UsingJoin(Inner, Seq("a")), None)
+    checkAnalysis(usingPlan, expected, caseSensitive = true)
 
-    {
-      val usingPlan = r1.join(r2, UsingJoin(Inner, Seq(UnresolvedAttribute("A"))), None)
-      assertAnalysisError(
-        usingPlan,
-        Seq("using columns ['A] can not be resolved given input columns: [b, a, c, a]"))
-    }
+    assertAnalysisError(
+      r1.join(r2, UsingJoin(Inner, Seq("A"))),
+      "USING column `A` can not be resolved with the left join side" :: Nil)
   }
 
   test("using join with a case insensitive analyzer") {
     val expected = r1.join(r2, Inner, Some(EqualTo(a, a))).select(a, b, c)
 
     {
-      val usingPlan = r1.join(r2, UsingJoin(Inner, Seq(UnresolvedAttribute("a"))), None)
+      val usingPlan = r1.join(r2, UsingJoin(Inner, Seq("a")), None)
       checkAnalysis(usingPlan, expected, caseSensitive = false)
     }
 
     {
-      val usingPlan = r1.join(r2, UsingJoin(Inner, Seq(UnresolvedAttribute("A"))), None)
+      val usingPlan = r1.join(r2, UsingJoin(Inner, Seq("A")), None)
       checkAnalysis(usingPlan, expected, caseSensitive = false)
     }
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index e5f1f7b3bd4cf..304beb121ff6b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -348,7 +348,7 @@ class PlanParserSuite extends PlanTest {
     val testUsingJoin = (sql: String, jt: JoinType) => {
       assertEqual(
         s"select * from t $sql u using(a, b)",
-        table("t").join(table("u"), UsingJoin(jt, Seq('a.attr, 'b.attr)), None).select(star()))
+        table("t").join(table("u"), UsingJoin(jt, Seq("a", "b")), None).select(star()))
     }
     val testAll = Seq(testUnconditionalJoin, testConditionalJoin, testNaturalJoin, testUsingJoin)
     val testExistence = Seq(testUnconditionalJoin, testConditionalJoin, testUsingJoin)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index fcc02e5eb3ef9..133f633212be7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -776,7 +776,7 @@ class Dataset[T] private[sql](
       Join(
         joined.left,
         joined.right,
-        UsingJoin(JoinType(joinType), usingColumns.map(UnresolvedAttribute(_))),
+        UsingJoin(JoinType(joinType), usingColumns),
         None)
     }
   }

From 4c673c656d52d29813979e942851b9205e4ace06 Mon Sep 17 00:00:00 2001
From: Sandeep Singh <sandeep@techaddict.me>
Date: Thu, 1 Dec 2016 13:22:40 -0800
Subject: [PATCH 0255/1204] [SPARK-18274][ML][PYSPARK] Memory leak in PySpark
 JavaWrapper

## What changes were proposed in this pull request?
In`JavaWrapper `'s destructor make Java Gateway dereference object in destructor, using `SparkContext._active_spark_context._gateway.detach`
Fixing the copying parameter bug, by moving the `copy` method from `JavaModel` to `JavaParams`

## How was this patch tested?
```scala
import random, string
from pyspark.ml.feature import StringIndexer

l = [(''.join(random.choice(string.ascii_uppercase) for _ in range(10)), ) for _ in range(int(7e5))]  # 700000 random strings of 10 characters
df = spark.createDataFrame(l, ['string'])

for i in range(50):
    indexer = StringIndexer(inputCol='string', outputCol='index')
    indexer.fit(df)
```
* Before: would keep StringIndexer strong reference, causing GC issues and is halted midway
After: garbage collection works as the object is dereferenced, and computation completes
* Mem footprint tested using profiler
* Added a parameter copy related test which was failing before.

Author: Sandeep Singh <sandeep@techaddict.me>
Author: jkbradley <joseph.kurata.bradley@gmail.com>

Closes #15843 from techaddict/SPARK-18274.

(cherry picked from commit 78bb7f8071379114314c394e0167c4c5fd8545c5)
Signed-off-by: Joseph K. Bradley <joseph@databricks.com>
---
 python/pyspark/ml/tests.py   | 18 ++++++++++++++++
 python/pyspark/ml/wrapper.py | 41 ++++++++++++++++++++----------------
 2 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index a0c288a0b71a2..68f5bc30ac57f 100755
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -390,6 +390,24 @@ def test_word2vec_param(self):
         self.assertEqual(model.getWindowSize(), 6)
 
 
+class EvaluatorTests(SparkSessionTestCase):
+
+    def test_java_params(self):
+        """
+        This tests a bug fixed by SPARK-18274 which causes multiple copies
+        of a Params instance in Python to be linked to the same Java instance.
+        """
+        evaluator = RegressionEvaluator(metricName="r2")
+        df = self.spark.createDataFrame([Row(label=1.0, prediction=1.1)])
+        evaluator.evaluate(df)
+        self.assertEqual(evaluator._java_obj.getMetricName(), "r2")
+        evaluatorCopy = evaluator.copy({evaluator.metricName: "mae"})
+        evaluator.evaluate(df)
+        evaluatorCopy.evaluate(df)
+        self.assertEqual(evaluator._java_obj.getMetricName(), "r2")
+        self.assertEqual(evaluatorCopy._java_obj.getMetricName(), "mae")
+
+
 class FeatureTests(SparkSessionTestCase):
 
     def test_binarizer(self):
diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py
index 25c44b7533c77..13b75e9919221 100644
--- a/python/pyspark/ml/wrapper.py
+++ b/python/pyspark/ml/wrapper.py
@@ -71,6 +71,10 @@ class JavaParams(JavaWrapper, Params):
 
     __metaclass__ = ABCMeta
 
+    def __del__(self):
+        if SparkContext._active_spark_context:
+            SparkContext._active_spark_context._gateway.detach(self._java_obj)
+
     def _make_java_param_pair(self, param, value):
         """
         Makes a Java parm pair.
@@ -180,6 +184,25 @@ def __get_class(clazz):
                                       % stage_name)
         return py_stage
 
+    def copy(self, extra=None):
+        """
+        Creates a copy of this instance with the same uid and some
+        extra params. This implementation first calls Params.copy and
+        then make a copy of the companion Java pipeline component with
+        extra params. So both the Python wrapper and the Java pipeline
+        component get copied.
+
+        :param extra: Extra parameters to copy to the new instance
+        :return: Copy of this instance
+        """
+        if extra is None:
+            extra = dict()
+        that = super(JavaParams, self).copy(extra)
+        if self._java_obj is not None:
+            that._java_obj = self._java_obj.copy(self._empty_java_param_map())
+            that._transfer_params_to_java()
+        return that
+
 
 @inherit_doc
 class JavaEstimator(JavaParams, Estimator):
@@ -256,21 +279,3 @@ def __init__(self, java_model=None):
         super(JavaModel, self).__init__(java_model)
         if java_model is not None:
             self._resetUid(java_model.uid())
-
-    def copy(self, extra=None):
-        """
-        Creates a copy of this instance with the same uid and some
-        extra params. This implementation first calls Params.copy and
-        then make a copy of the companion Java model with extra params.
-        So both the Python wrapper and the Java model get copied.
-
-        :param extra: Extra parameters to copy to the new instance
-        :return: Copy of this instance
-        """
-        if extra is None:
-            extra = dict()
-        that = super(JavaModel, self).copy(extra)
-        if self._java_obj is not None:
-            that._java_obj = self._java_obj.copy(self._empty_java_param_map())
-            that._transfer_params_to_java()
-        return that

From 4746674ad3acfc38bbd3e2708d75280c19ef0202 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Thu, 1 Dec 2016 14:22:49 -0800
Subject: [PATCH 0256/1204] [SPARK-18617][SPARK-18560][TESTS] Fix flaky test:
 StreamingContextSuite. Receiver data should be deserialized properly

## What changes were proposed in this pull request?

Avoid to create multiple threads to stop StreamingContext. Otherwise, the latch added in #16091 can be passed too early.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16105 from zsxwing/SPARK-18617-2.

(cherry picked from commit 086b0c8f6788b205bc630d5ccf078f77b9751af3)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../org/apache/spark/streaming/StreamingContextSuite.scala    | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index 35eeb9dfa5ef8..5645996de5a69 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -814,10 +814,12 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with Timeo
     ssc = new StreamingContext(conf, Milliseconds(100))
     val input = ssc.receiverStream(new TestReceiver)
     val latch = new CountDownLatch(1)
+    @volatile var stopping = false
     input.count().foreachRDD { rdd =>
       // Make sure we can read from BlockRDD
-      if (rdd.collect().headOption.getOrElse(0L) > 0) {
+      if (rdd.collect().headOption.getOrElse(0L) > 0 && !stopping) {
         // Stop StreamingContext to unblock "awaitTerminationOrTimeout"
+        stopping = true
         new Thread() {
           setDaemon(true)
           override def run(): Unit = {

From 2d2e80180f3b746df9e45a49bc62da31a37dadb8 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Thu, 1 Dec 2016 17:58:28 -0800
Subject: [PATCH 0257/1204] [SPARK-18639] Build only a single pip package

## What changes were proposed in this pull request?
We current build 5 separate pip binary tar balls, doubling the release script runtime. It'd be better to build one, especially for use cases that are just using Spark locally. In the long run, it would make more sense to have Hadoop support be pluggable.

## How was this patch tested?
N/A - this is a release build script that doesn't have any automated test coverage. We will know if it goes wrong when we prepare releases.

Author: Reynold Xin <rxin@databricks.com>

Closes #16072 from rxin/SPARK-18639.

(cherry picked from commit 37e52f8793bff306a7ae5a9aecc16f28333b70e3)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 dev/create-release/release-build.sh | 45 +++++++++++++++++------------
 1 file changed, 27 insertions(+), 18 deletions(-)

diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index 1dbfa3b6e361b..aa42750f26679 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -150,6 +150,7 @@ if [[ "$1" == "package" ]]; then
     NAME=$1
     FLAGS=$2
     ZINC_PORT=$3
+    BUILD_PIP_PACKAGE=$4
     cp -r spark spark-$SPARK_VERSION-bin-$NAME
 
     cd spark-$SPARK_VERSION-bin-$NAME
@@ -170,24 +171,32 @@ if [[ "$1" == "package" ]]; then
     # Get maven home set by MVN
     MVN_HOME=`$MVN -version 2>&1 | grep 'Maven home' | awk '{print $NF}'`
 
-    echo "Creating distribution"
-    ./dev/make-distribution.sh --name $NAME --mvn $MVN_HOME/bin/mvn --tgz --pip $FLAGS \
-      -DzincPort=$ZINC_PORT 2>&1 >  ../binary-release-$NAME.log
-    cd ..
 
-    echo "Copying and signing python distribution"
-    PYTHON_DIST_NAME=pyspark-$PYSPARK_VERSION.tar.gz
-    cp spark-$SPARK_VERSION-bin-$NAME/python/dist/$PYTHON_DIST_NAME .
-
-    echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --armour \
-      --output $PYTHON_DIST_NAME.asc \
-      --detach-sig $PYTHON_DIST_NAME
-    echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \
-      MD5 $PYTHON_DIST_NAME > \
-      $PYTHON_DIST_NAME.md5
-    echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \
-      SHA512 $PYTHON_DIST_NAME > \
-      $PYTHON_DIST_NAME.sha
+    if [ -z "$BUILD_PIP_PACKAGE" ]; then
+      echo "Creating distribution without PIP package"
+      ./dev/make-distribution.sh --name $NAME --mvn $MVN_HOME/bin/mvn --tgz $FLAGS \
+        -DzincPort=$ZINC_PORT 2>&1 >  ../binary-release-$NAME.log
+      cd ..
+    else
+      echo "Creating distribution with PIP package"
+      ./dev/make-distribution.sh --name $NAME --mvn $MVN_HOME/bin/mvn --tgz --pip $FLAGS \
+        -DzincPort=$ZINC_PORT 2>&1 >  ../binary-release-$NAME.log
+      cd ..
+
+      echo "Copying and signing python distribution"
+      PYTHON_DIST_NAME=pyspark-$PYSPARK_VERSION.tar.gz
+      cp spark-$SPARK_VERSION-bin-$NAME/python/dist/$PYTHON_DIST_NAME .
+
+      echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --armour \
+        --output $PYTHON_DIST_NAME.asc \
+        --detach-sig $PYTHON_DIST_NAME
+      echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \
+        MD5 $PYTHON_DIST_NAME > \
+        $PYTHON_DIST_NAME.md5
+      echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \
+        SHA512 $PYTHON_DIST_NAME > \
+        $PYTHON_DIST_NAME.sha
+    fi
 
     echo "Copying and signing regular binary distribution"
     cp spark-$SPARK_VERSION-bin-$NAME/spark-$SPARK_VERSION-bin-$NAME.tgz .
@@ -211,7 +220,7 @@ if [[ "$1" == "package" ]]; then
   make_binary_release "hadoop2.3" "-Phadoop-2.3 $FLAGS" "3033" &
   make_binary_release "hadoop2.4" "-Phadoop-2.4 $FLAGS" "3034" &
   make_binary_release "hadoop2.6" "-Phadoop-2.6 $FLAGS" "3035" &
-  make_binary_release "hadoop2.7" "-Phadoop-2.7 $FLAGS" "3036" &
+  make_binary_release "hadoop2.7" "-Phadoop-2.7 $FLAGS" "3036" "withpip" &
   make_binary_release "hadoop2.4-without-hive" "-Psparkr -Phadoop-2.4 -Pyarn -Pmesos" "3037" &
   make_binary_release "without-hadoop" "-Psparkr -Phadoop-provided -Pyarn -Pmesos" "3038" &
   wait

From 2f91b0154ee0674b65e80f81f6498b94666c4b46 Mon Sep 17 00:00:00 2001
From: sureshthalamati <suresh.thalamati@gmail.com>
Date: Thu, 1 Dec 2016 19:13:38 -0800
Subject: [PATCH 0258/1204] [SPARK-18141][SQL] Fix to quote column names in the
 predicate clause  of the JDBC RDD generated sql statement

## What changes were proposed in this pull request?

SQL query generated for the JDBC data source is not quoting columns in the predicate clause. When the source table has quoted column names,  spark jdbc read fails with column not found error incorrectly.

Error:
org.h2.jdbc.JdbcSQLException: Column "ID" not found;
Source SQL statement:
SELECT "Name","Id" FROM TEST."mixedCaseCols" WHERE (Id < 1)

This PR fixes by quoting column names in the generated  SQL for predicate clause  when filters are pushed down to the data source.

Source SQL statement after the fix:
SELECT "Name","Id" FROM TEST."mixedCaseCols" WHERE ("Id" < 1)

## How was this patch tested?

Added new test case to the JdbcSuite

Author: sureshthalamati <suresh.thalamati@gmail.com>

Closes #15662 from sureshthalamati/filter_quoted_cols-SPARK-18141.

(cherry picked from commit 70c5549ee9588228d18a7b405c977cf591e2efd4)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../execution/datasources/jdbc/JDBCRDD.scala  | 45 +++++++-----
 .../datasources/jdbc/JDBCRelation.scala       |  3 +-
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 73 ++++++++++++++-----
 3 files changed, 82 insertions(+), 39 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
index a1e5dfdbf739e..37df283a9e5b2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
@@ -27,7 +27,7 @@ import org.apache.spark.{Partition, SparkContext, TaskContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.jdbc.JdbcDialects
+import org.apache.spark.sql.jdbc.{JdbcDialect, JdbcDialects}
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
 import org.apache.spark.util.CompletionIterator
@@ -105,37 +105,40 @@ object JDBCRDD extends Logging {
    * Turns a single Filter into a String representing a SQL expression.
    * Returns None for an unhandled filter.
    */
-  def compileFilter(f: Filter): Option[String] = {
+  def compileFilter(f: Filter, dialect: JdbcDialect): Option[String] = {
+    def quote(colName: String): String = dialect.quoteIdentifier(colName)
+
     Option(f match {
-      case EqualTo(attr, value) => s"$attr = ${compileValue(value)}"
+      case EqualTo(attr, value) => s"${quote(attr)} = ${compileValue(value)}"
       case EqualNullSafe(attr, value) =>
-        s"(NOT ($attr != ${compileValue(value)} OR $attr IS NULL OR " +
-          s"${compileValue(value)} IS NULL) OR ($attr IS NULL AND ${compileValue(value)} IS NULL))"
-      case LessThan(attr, value) => s"$attr < ${compileValue(value)}"
-      case GreaterThan(attr, value) => s"$attr > ${compileValue(value)}"
-      case LessThanOrEqual(attr, value) => s"$attr <= ${compileValue(value)}"
-      case GreaterThanOrEqual(attr, value) => s"$attr >= ${compileValue(value)}"
-      case IsNull(attr) => s"$attr IS NULL"
-      case IsNotNull(attr) => s"$attr IS NOT NULL"
-      case StringStartsWith(attr, value) => s"${attr} LIKE '${value}%'"
-      case StringEndsWith(attr, value) => s"${attr} LIKE '%${value}'"
-      case StringContains(attr, value) => s"${attr} LIKE '%${value}%'"
+        val col = quote(attr)
+        s"(NOT ($col != ${compileValue(value)} OR $col IS NULL OR " +
+          s"${compileValue(value)} IS NULL) OR ($col IS NULL AND ${compileValue(value)} IS NULL))"
+      case LessThan(attr, value) => s"${quote(attr)} < ${compileValue(value)}"
+      case GreaterThan(attr, value) => s"${quote(attr)} > ${compileValue(value)}"
+      case LessThanOrEqual(attr, value) => s"${quote(attr)} <= ${compileValue(value)}"
+      case GreaterThanOrEqual(attr, value) => s"${quote(attr)} >= ${compileValue(value)}"
+      case IsNull(attr) => s"${quote(attr)} IS NULL"
+      case IsNotNull(attr) => s"${quote(attr)} IS NOT NULL"
+      case StringStartsWith(attr, value) => s"${quote(attr)} LIKE '${value}%'"
+      case StringEndsWith(attr, value) => s"${quote(attr)} LIKE '%${value}'"
+      case StringContains(attr, value) => s"${quote(attr)} LIKE '%${value}%'"
       case In(attr, value) if value.isEmpty =>
-        s"CASE WHEN ${attr} IS NULL THEN NULL ELSE FALSE END"
-      case In(attr, value) => s"$attr IN (${compileValue(value)})"
-      case Not(f) => compileFilter(f).map(p => s"(NOT ($p))").getOrElse(null)
+        s"CASE WHEN ${quote(attr)} IS NULL THEN NULL ELSE FALSE END"
+      case In(attr, value) => s"${quote(attr)} IN (${compileValue(value)})"
+      case Not(f) => compileFilter(f, dialect).map(p => s"(NOT ($p))").getOrElse(null)
       case Or(f1, f2) =>
         // We can't compile Or filter unless both sub-filters are compiled successfully.
         // It applies too for the following And filter.
         // If we can make sure compileFilter supports all filters, we can remove this check.
-        val or = Seq(f1, f2).flatMap(compileFilter(_))
+        val or = Seq(f1, f2).flatMap(compileFilter(_, dialect))
         if (or.size == 2) {
           or.map(p => s"($p)").mkString(" OR ")
         } else {
           null
         }
       case And(f1, f2) =>
-        val and = Seq(f1, f2).flatMap(compileFilter(_))
+        val and = Seq(f1, f2).flatMap(compileFilter(_, dialect))
         if (and.size == 2) {
           and.map(p => s"($p)").mkString(" AND ")
         } else {
@@ -214,7 +217,9 @@ private[jdbc] class JDBCRDD(
    * `filters`, but as a WHERE clause suitable for injection into a SQL query.
    */
   private val filterWhereClause: String =
-    filters.flatMap(JDBCRDD.compileFilter).map(p => s"($p)").mkString(" AND ")
+    filters
+      .flatMap(JDBCRDD.compileFilter(_, JdbcDialects.get(url)))
+      .map(p => s"($p)").mkString(" AND ")
 
   /**
    * A WHERE clause representing both `filters`, if any, and the current partition.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
index 672c21c6ac734..6abb27db8531e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
@@ -23,6 +23,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.Partition
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, Row, SaveMode, SparkSession, SQLContext}
+import org.apache.spark.sql.jdbc.JdbcDialects
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types.StructType
 
@@ -113,7 +114,7 @@ private[sql] case class JDBCRelation(
 
   // Check if JDBCRDD.compileFilter can accept input filters
   override def unhandledFilters(filters: Array[Filter]): Array[Filter] = {
-    filters.filter(JDBCRDD.compileFilter(_).isEmpty)
+    filters.filter(JDBCRDD.compileFilter(_, JdbcDialects.get(jdbcOptions.url)).isEmpty)
   }
 
   override def buildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[Row] = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index b16be457ed5c3..af5f01c493e84 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -202,6 +202,21 @@ class JDBCSuite extends SparkFunSuite
          |partitionColumn '"Dept"', lowerBound '1', upperBound '4', numPartitions '4')
       """.stripMargin.replaceAll("\n", " "))
 
+    conn.prepareStatement(
+      """create table test."mixedCaseCols" ("Name" TEXT(32), "Id" INTEGER NOT NULL)""")
+      .executeUpdate()
+    conn.prepareStatement("""insert into test."mixedCaseCols" values ('fred', 1)""").executeUpdate()
+    conn.prepareStatement("""insert into test."mixedCaseCols" values ('mary', 2)""").executeUpdate()
+    conn.prepareStatement("""insert into test."mixedCaseCols" values (null, 3)""").executeUpdate()
+    conn.commit()
+
+    sql(
+      s"""
+         |CREATE TEMPORARY TABLE mixedCaseCols
+         |USING org.apache.spark.sql.jdbc
+         |OPTIONS (url '$url', dbtable 'TEST."mixedCaseCols"', user 'testUser', password 'testPass')
+      """.stripMargin.replaceAll("\n", " "))
+
     // Untested: IDENTITY, OTHER, UUID, ARRAY, and GEOMETRY types.
   }
 
@@ -604,30 +619,32 @@ class JDBCSuite extends SparkFunSuite
 
   test("compile filters") {
     val compileFilter = PrivateMethod[Option[String]]('compileFilter)
-    def doCompileFilter(f: Filter): String = JDBCRDD invokePrivate compileFilter(f) getOrElse("")
-    assert(doCompileFilter(EqualTo("col0", 3)) === "col0 = 3")
-    assert(doCompileFilter(Not(EqualTo("col1", "abc"))) === "(NOT (col1 = 'abc'))")
+    def doCompileFilter(f: Filter): String =
+      JDBCRDD invokePrivate compileFilter(f, JdbcDialects.get("jdbc:")) getOrElse("")
+    assert(doCompileFilter(EqualTo("col0", 3)) === """"col0" = 3""")
+    assert(doCompileFilter(Not(EqualTo("col1", "abc"))) === """(NOT ("col1" = 'abc'))""")
     assert(doCompileFilter(And(EqualTo("col0", 0), EqualTo("col1", "def")))
-      === "(col0 = 0) AND (col1 = 'def')")
+      === """("col0" = 0) AND ("col1" = 'def')""")
     assert(doCompileFilter(Or(EqualTo("col0", 2), EqualTo("col1", "ghi")))
-      === "(col0 = 2) OR (col1 = 'ghi')")
-    assert(doCompileFilter(LessThan("col0", 5)) === "col0 < 5")
+      === """("col0" = 2) OR ("col1" = 'ghi')""")
+    assert(doCompileFilter(LessThan("col0", 5)) === """"col0" < 5""")
     assert(doCompileFilter(LessThan("col3",
-      Timestamp.valueOf("1995-11-21 00:00:00.0"))) === "col3 < '1995-11-21 00:00:00.0'")
-    assert(doCompileFilter(LessThan("col4", Date.valueOf("1983-08-04"))) === "col4 < '1983-08-04'")
-    assert(doCompileFilter(LessThanOrEqual("col0", 5)) === "col0 <= 5")
-    assert(doCompileFilter(GreaterThan("col0", 3)) === "col0 > 3")
-    assert(doCompileFilter(GreaterThanOrEqual("col0", 3)) === "col0 >= 3")
-    assert(doCompileFilter(In("col1", Array("jkl"))) === "col1 IN ('jkl')")
+      Timestamp.valueOf("1995-11-21 00:00:00.0"))) === """"col3" < '1995-11-21 00:00:00.0'""")
+    assert(doCompileFilter(LessThan("col4", Date.valueOf("1983-08-04")))
+      === """"col4" < '1983-08-04'""")
+    assert(doCompileFilter(LessThanOrEqual("col0", 5)) === """"col0" <= 5""")
+    assert(doCompileFilter(GreaterThan("col0", 3)) === """"col0" > 3""")
+    assert(doCompileFilter(GreaterThanOrEqual("col0", 3)) === """"col0" >= 3""")
+    assert(doCompileFilter(In("col1", Array("jkl"))) === """"col1" IN ('jkl')""")
     assert(doCompileFilter(In("col1", Array.empty)) ===
-      "CASE WHEN col1 IS NULL THEN NULL ELSE FALSE END")
+      """CASE WHEN "col1" IS NULL THEN NULL ELSE FALSE END""")
     assert(doCompileFilter(Not(In("col1", Array("mno", "pqr"))))
-      === "(NOT (col1 IN ('mno', 'pqr')))")
-    assert(doCompileFilter(IsNull("col1")) === "col1 IS NULL")
-    assert(doCompileFilter(IsNotNull("col1")) === "col1 IS NOT NULL")
+      === """(NOT ("col1" IN ('mno', 'pqr')))""")
+    assert(doCompileFilter(IsNull("col1")) === """"col1" IS NULL""")
+    assert(doCompileFilter(IsNotNull("col1")) === """"col1" IS NOT NULL""")
     assert(doCompileFilter(And(EqualNullSafe("col0", "abc"), EqualTo("col1", "def")))
-      === "((NOT (col0 != 'abc' OR col0 IS NULL OR 'abc' IS NULL) "
-        + "OR (col0 IS NULL AND 'abc' IS NULL))) AND (col1 = 'def')")
+      === """((NOT ("col0" != 'abc' OR "col0" IS NULL OR 'abc' IS NULL) """
+        + """OR ("col0" IS NULL AND 'abc' IS NULL))) AND ("col1" = 'def')""")
   }
 
   test("Dialect unregister") {
@@ -824,4 +841,24 @@ class JDBCSuite extends SparkFunSuite
     val schema = JdbcUtils.schemaString(df.schema, "jdbc:mysql://localhost:3306/temp")
     assert(schema.contains("`order` TEXT"))
   }
+
+  test("SPARK-18141: Predicates on quoted column names in the jdbc data source") {
+    assert(sql("SELECT * FROM mixedCaseCols WHERE Id < 1").collect().size == 0)
+    assert(sql("SELECT * FROM mixedCaseCols WHERE Id <= 1").collect().size == 1)
+    assert(sql("SELECT * FROM mixedCaseCols WHERE Id > 1").collect().size == 2)
+    assert(sql("SELECT * FROM mixedCaseCols WHERE Id >= 1").collect().size == 3)
+    assert(sql("SELECT * FROM mixedCaseCols WHERE Id = 1").collect().size == 1)
+    assert(sql("SELECT * FROM mixedCaseCols WHERE Id != 2").collect().size == 2)
+    assert(sql("SELECT * FROM mixedCaseCols WHERE Id <=> 2").collect().size == 1)
+    assert(sql("SELECT * FROM mixedCaseCols WHERE Name LIKE 'fr%'").collect().size == 1)
+    assert(sql("SELECT * FROM mixedCaseCols WHERE Name LIKE '%ed'").collect().size == 1)
+    assert(sql("SELECT * FROM mixedCaseCols WHERE Name LIKE '%re%'").collect().size == 1)
+    assert(sql("SELECT * FROM mixedCaseCols WHERE Name IS NULL").collect().size == 1)
+    assert(sql("SELECT * FROM mixedCaseCols WHERE Name IS NOT NULL").collect().size == 2)
+    assert(sql("SELECT * FROM mixedCaseCols").filter($"Name".isin()).collect().size == 0)
+    assert(sql("SELECT * FROM mixedCaseCols WHERE Name IN ('mary', 'fred')").collect().size == 2)
+    assert(sql("SELECT * FROM mixedCaseCols WHERE Name NOT IN ('fred')").collect().size == 1)
+    assert(sql("SELECT * FROM mixedCaseCols WHERE Id = 1 OR Name = 'mary'").collect().size == 2)
+    assert(sql("SELECT * FROM mixedCaseCols WHERE Name = 'mary' AND Id = 2").collect().size == 1)
+  }
 }

From b9eb10043129defa53c5bdfd1190fe68c0107b3b Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Fri, 2 Dec 2016 11:15:26 +0800
Subject: [PATCH 0259/1204] [SPARK-18538][SQL][BACKPORT-2.1] Fix Concurrent
 Table Fetching Using DataFrameReader JDBC APIs

### What changes were proposed in this pull request?

#### This PR is to backport https://github.com/apache/spark/pull/15975 to Branch 2.1

---

The following two `DataFrameReader` JDBC APIs ignore the user-specified parameters of parallelism degree.

```Scala
  def jdbc(
      url: String,
      table: String,
      columnName: String,
      lowerBound: Long,
      upperBound: Long,
      numPartitions: Int,
      connectionProperties: Properties): DataFrame
```

```Scala
  def jdbc(
      url: String,
      table: String,
      predicates: Array[String],
      connectionProperties: Properties): DataFrame
```

This PR is to fix the issues. To verify the behavior correctness, we improve the plan output of `EXPLAIN` command by adding `numPartitions` in the `JDBCRelation` node.

Before the fix,
```
== Physical Plan ==
*Scan JDBCRelation(TEST.PEOPLE) [NAME#1896,THEID#1897] ReadSchema: struct<NAME:string,THEID:int>
```

After the fix,
```
== Physical Plan ==
*Scan JDBCRelation(TEST.PEOPLE) [numPartitions=3] [NAME#1896,THEID#1897] ReadSchema: struct<NAME:string,THEID:int>
```
### How was this patch tested?
Added the verification logics on all the test cases for JDBC concurrent fetching.

Author: gatorsmile <gatorsmile@gmail.com>

Closes #16111 from gatorsmile/jdbcFix2.1.
---
 .../apache/spark/sql/DataFrameReader.scala    | 37 +++++-----
 .../datasources/jdbc/JDBCRelation.scala       |  3 +-
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 67 +++++++++++++------
 3 files changed, 69 insertions(+), 38 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 1af2f9afea5eb..365b50dee93c4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -159,7 +159,11 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * @since 1.4.0
    */
   def jdbc(url: String, table: String, properties: Properties): DataFrame = {
-    jdbc(url, table, JDBCRelation.columnPartition(null), properties)
+    // properties should override settings in extraOptions.
+    this.extraOptions = this.extraOptions ++ properties.asScala
+    // explicit url and dbtable should override all
+    this.extraOptions += (JDBCOptions.JDBC_URL -> url, JDBCOptions.JDBC_TABLE_NAME -> table)
+    format("jdbc").load()
   }
 
   /**
@@ -177,7 +181,8 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * @param upperBound the maximum value of `columnName` used to decide partition stride.
    * @param numPartitions the number of partitions. This, along with `lowerBound` (inclusive),
    *                      `upperBound` (exclusive), form partition strides for generated WHERE
-   *                      clause expressions used to split the column `columnName` evenly.
+   *                      clause expressions used to split the column `columnName` evenly. When
+   *                      the input is less than 1, the number is set to 1.
    * @param connectionProperties JDBC database connection arguments, a list of arbitrary string
    *                             tag/value. Normally at least a "user" and "password" property
    *                             should be included. "fetchsize" can be used to control the
@@ -192,9 +197,13 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
       upperBound: Long,
       numPartitions: Int,
       connectionProperties: Properties): DataFrame = {
-    val partitioning = JDBCPartitioningInfo(columnName, lowerBound, upperBound, numPartitions)
-    val parts = JDBCRelation.columnPartition(partitioning)
-    jdbc(url, table, parts, connectionProperties)
+    // columnName, lowerBound, upperBound and numPartitions override settings in extraOptions.
+    this.extraOptions ++= Map(
+      JDBCOptions.JDBC_PARTITION_COLUMN -> columnName,
+      JDBCOptions.JDBC_LOWER_BOUND -> lowerBound.toString,
+      JDBCOptions.JDBC_UPPER_BOUND -> upperBound.toString,
+      JDBCOptions.JDBC_NUM_PARTITIONS -> numPartitions.toString)
+    jdbc(url, table, connectionProperties)
   }
 
   /**
@@ -220,22 +229,14 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
       table: String,
       predicates: Array[String],
       connectionProperties: Properties): DataFrame = {
+    // connectionProperties should override settings in extraOptions.
+    val params = extraOptions.toMap ++ connectionProperties.asScala.toMap
+    val options = new JDBCOptions(url, table, params)
     val parts: Array[Partition] = predicates.zipWithIndex.map { case (part, i) =>
       JDBCPartition(part, i) : Partition
     }
-    jdbc(url, table, parts, connectionProperties)
-  }
-
-  private def jdbc(
-      url: String,
-      table: String,
-      parts: Array[Partition],
-      connectionProperties: Properties): DataFrame = {
-    // connectionProperties should override settings in extraOptions.
-    this.extraOptions = this.extraOptions ++ connectionProperties.asScala
-    // explicit url and dbtable should override all
-    this.extraOptions += ("url" -> url, "dbtable" -> table)
-    format("jdbc").load()
+    val relation = JDBCRelation(parts, options)(sparkSession)
+    sparkSession.baseRelationToDataFrame(relation)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
index 6abb27db8531e..5ca1c7543cfa7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
@@ -138,7 +138,8 @@ private[sql] case class JDBCRelation(
   }
 
   override def toString: String = {
+    val partitioningInfo = if (parts.nonEmpty) s" [numPartitions=${parts.length}]" else ""
     // credentials should not be included in the plan output, table information is sufficient.
-    s"JDBCRelation(${jdbcOptions.table})"
+    s"JDBCRelation(${jdbcOptions.table})" + partitioningInfo
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index af5f01c493e84..aa1ab141a4ec8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -24,12 +24,12 @@ import java.util.{Calendar, GregorianCalendar, Properties}
 import org.h2.jdbc.JdbcSQLException
 import org.scalatest.{BeforeAndAfter, PrivateMethodTester}
 
-import org.apache.spark.{SparkException, SparkFunSuite}
+import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.execution.DataSourceScanExec
 import org.apache.spark.sql.execution.command.ExplainCommand
 import org.apache.spark.sql.execution.datasources.LogicalRelation
-import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JDBCRDD, JdbcUtils}
+import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JDBCRDD, JDBCRelation, JdbcUtils}
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
@@ -224,6 +224,16 @@ class JDBCSuite extends SparkFunSuite
     conn.close()
   }
 
+  // Check whether the tables are fetched in the expected degree of parallelism
+  def checkNumPartitions(df: DataFrame, expectedNumPartitions: Int): Unit = {
+    val jdbcRelations = df.queryExecution.analyzed.collect {
+      case LogicalRelation(r: JDBCRelation, _, _) => r
+    }
+    assert(jdbcRelations.length == 1)
+    assert(jdbcRelations.head.parts.length == expectedNumPartitions,
+      s"Expecting a JDBCRelation with $expectedNumPartitions partitions, but got:`$jdbcRelations`")
+  }
+
   test("SELECT *") {
     assert(sql("SELECT * FROM foobar").collect().size === 3)
   }
@@ -328,13 +338,23 @@ class JDBCSuite extends SparkFunSuite
   }
 
   test("SELECT * partitioned") {
-    assert(sql("SELECT * FROM parts").collect().size == 3)
+    val df = sql("SELECT * FROM parts")
+    checkNumPartitions(df, expectedNumPartitions = 3)
+    assert(df.collect().length == 3)
   }
 
   test("SELECT WHERE (simple predicates) partitioned") {
-    assert(sql("SELECT * FROM parts WHERE THEID < 1").collect().size === 0)
-    assert(sql("SELECT * FROM parts WHERE THEID != 2").collect().size === 2)
-    assert(sql("SELECT THEID FROM parts WHERE THEID = 1").collect().size === 1)
+    val df1 = sql("SELECT * FROM parts WHERE THEID < 1")
+    checkNumPartitions(df1, expectedNumPartitions = 3)
+    assert(df1.collect().length === 0)
+
+    val df2 = sql("SELECT * FROM parts WHERE THEID != 2")
+    checkNumPartitions(df2, expectedNumPartitions = 3)
+    assert(df2.collect().length === 2)
+
+    val df3 = sql("SELECT THEID FROM parts WHERE THEID = 1")
+    checkNumPartitions(df3, expectedNumPartitions = 3)
+    assert(df3.collect().length === 1)
   }
 
   test("SELECT second field partitioned") {
@@ -385,24 +405,27 @@ class JDBCSuite extends SparkFunSuite
   }
 
   test("Partitioning via JDBCPartitioningInfo API") {
-    assert(
-      spark.read.jdbc(urlWithUserAndPass, "TEST.PEOPLE", "THEID", 0, 4, 3, new Properties())
-      .collect().length === 3)
+    val df = spark.read.jdbc(urlWithUserAndPass, "TEST.PEOPLE", "THEID", 0, 4, 3, new Properties())
+    checkNumPartitions(df, expectedNumPartitions = 3)
+    assert(df.collect().length === 3)
   }
 
   test("Partitioning via list-of-where-clauses API") {
     val parts = Array[String]("THEID < 2", "THEID >= 2")
-    assert(spark.read.jdbc(urlWithUserAndPass, "TEST.PEOPLE", parts, new Properties())
-      .collect().length === 3)
+    val df = spark.read.jdbc(urlWithUserAndPass, "TEST.PEOPLE", parts, new Properties())
+    checkNumPartitions(df, expectedNumPartitions = 2)
+    assert(df.collect().length === 3)
   }
 
   test("Partitioning on column that might have null values.") {
-    assert(
-      spark.read.jdbc(urlWithUserAndPass, "TEST.EMP", "theid", 0, 4, 3, new Properties())
-        .collect().length === 4)
-    assert(
-      spark.read.jdbc(urlWithUserAndPass, "TEST.EMP", "THEID", 0, 4, 3, new Properties())
-        .collect().length === 4)
+    val df = spark.read.jdbc(urlWithUserAndPass, "TEST.EMP", "theid", 0, 4, 3, new Properties())
+    checkNumPartitions(df, expectedNumPartitions = 3)
+    assert(df.collect().length === 4)
+
+    val df2 = spark.read.jdbc(urlWithUserAndPass, "TEST.EMP", "THEID", 0, 4, 3, new Properties())
+    checkNumPartitions(df2, expectedNumPartitions = 3)
+    assert(df2.collect().length === 4)
+
     // partitioning on a nullable quoted column
     assert(
       spark.read.jdbc(urlWithUserAndPass, "TEST.EMP", """"Dept"""", 0, 4, 3, new Properties())
@@ -419,6 +442,7 @@ class JDBCSuite extends SparkFunSuite
       numPartitions = 0,
       connectionProperties = new Properties()
     )
+    checkNumPartitions(res, expectedNumPartitions = 1)
     assert(res.count() === 8)
   }
 
@@ -432,6 +456,7 @@ class JDBCSuite extends SparkFunSuite
       numPartitions = 10,
       connectionProperties = new Properties()
     )
+    checkNumPartitions(res, expectedNumPartitions = 4)
     assert(res.count() === 8)
   }
 
@@ -445,6 +470,7 @@ class JDBCSuite extends SparkFunSuite
       numPartitions = 4,
       connectionProperties = new Properties()
     )
+    checkNumPartitions(res, expectedNumPartitions = 1)
     assert(res.count() === 8)
   }
 
@@ -465,7 +491,9 @@ class JDBCSuite extends SparkFunSuite
   }
 
   test("SELECT * on partitioned table with a nullable partition column") {
-    assert(sql("SELECT * FROM nullparts").collect().size == 4)
+    val df = sql("SELECT * FROM nullparts")
+    checkNumPartitions(df, expectedNumPartitions = 3)
+    assert(df.collect().length == 4)
   }
 
   test("H2 integral types") {
@@ -739,7 +767,8 @@ class JDBCSuite extends SparkFunSuite
     }
     // test the JdbcRelation toString output
     df.queryExecution.analyzed.collect {
-      case r: LogicalRelation => assert(r.relation.toString == "JDBCRelation(TEST.PEOPLE)")
+      case r: LogicalRelation =>
+        assert(r.relation.toString == "JDBCRelation(TEST.PEOPLE) [numPartitions=3]")
     }
   }
 

From fce1be6cc81b1fe3991a4df91128f4fcd14ff615 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Fri, 2 Dec 2016 12:30:13 +0800
Subject: [PATCH 0260/1204] [SPARK-18284][SQL] Make
 ExpressionEncoder.serializer.nullable precise

## What changes were proposed in this pull request?

This PR makes `ExpressionEncoder.serializer.nullable` for flat encoder for a primitive type `false`. Since it is `true` for now, it is too conservative.
While `ExpressionEncoder.schema` has correct information (e.g. `<IntegerType, false>`), `serializer.head.nullable` of `ExpressionEncoder`, which got from `encoderFor[T]`, is always false. It is too conservative.

This is accomplished by checking whether a type is one of primitive types. If it is `true`, `nullable` should be `false`.

## How was this patch tested?

Added new tests for encoder and dataframe

Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com>

Closes #15780 from kiszk/SPARK-18284.

(cherry picked from commit 38b9e69623c14a675b14639e8291f5d29d2a0bc3)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/JavaTypeInference.scala      |  4 +-
 .../spark/sql/catalyst/ScalaReflection.scala  |  7 ++-
 .../catalyst/encoders/ExpressionEncoder.scala |  7 +--
 .../expressions/ReferenceToExpressions.scala  |  2 +-
 .../expressions/objects/objects.scala         | 24 +++++----
 .../encoders/ExpressionEncoderSuite.scala     | 19 ++++++-
 .../org/apache/spark/sql/DatasetSuite.scala   | 52 ++++++++++++++++++-
 .../sql/streaming/FileStreamSinkSuite.scala   |  2 +-
 8 files changed, 96 insertions(+), 21 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
index 04f0cfce883f2..7e8e4dab72145 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
@@ -396,12 +396,14 @@ object JavaTypeInference {
 
         case _ if mapType.isAssignableFrom(typeToken) =>
           val (keyType, valueType) = mapKeyValueType(typeToken)
+
           ExternalMapToCatalyst(
             inputObject,
             ObjectType(keyType.getRawType),
             serializerFor(_, keyType),
             ObjectType(valueType.getRawType),
-            serializerFor(_, valueType)
+            serializerFor(_, valueType),
+            valueNullable = true
           )
 
         case other =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index 0aa21b9347a9d..6e20096901d99 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -498,7 +498,8 @@ object ScalaReflection extends ScalaReflection {
           dataTypeFor(keyType),
           serializerFor(_, keyType, keyPath),
           dataTypeFor(valueType),
-          serializerFor(_, valueType, valuePath))
+          serializerFor(_, valueType, valuePath),
+          valueNullable = !valueType.typeSymbol.asClass.isPrimitive)
 
       case t if t <:< localTypeOf[String] =>
         StaticInvoke(
@@ -590,7 +591,9 @@ object ScalaReflection extends ScalaReflection {
               "cannot be used as field name\n" + walkedTypePath.mkString("\n"))
           }
 
-          val fieldValue = Invoke(inputObject, fieldName, dataTypeFor(fieldType))
+          val fieldValue = Invoke(
+            AssertNotNull(inputObject, walkedTypePath), fieldName, dataTypeFor(fieldType),
+            returnNullable = !fieldType.typeSymbol.asClass.isPrimitive)
           val clsName = getClassNameFromType(fieldType)
           val newPath = s"""- field (class: "$clsName", name: "$fieldName")""" +: walkedTypePath
           expressions.Literal(fieldName) :: serializerFor(fieldValue, fieldType, newPath) :: Nil
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
index 9c4818db6333b..3757eccfa2dd8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
@@ -60,7 +60,7 @@ object ExpressionEncoder {
     val cls = mirror.runtimeClass(tpe)
     val flat = !ScalaReflection.definedByConstructorParams(tpe)
 
-    val inputObject = BoundReference(0, ScalaReflection.dataTypeFor[T], nullable = true)
+    val inputObject = BoundReference(0, ScalaReflection.dataTypeFor[T], nullable = !cls.isPrimitive)
     val nullSafeInput = if (flat) {
       inputObject
     } else {
@@ -71,10 +71,7 @@ object ExpressionEncoder {
     val serializer = ScalaReflection.serializerFor[T](nullSafeInput)
     val deserializer = ScalaReflection.deserializerFor[T]
 
-    val schema = ScalaReflection.schemaFor[T] match {
-      case ScalaReflection.Schema(s: StructType, _) => s
-      case ScalaReflection.Schema(dt, nullable) => new StructType().add("value", dt, nullable)
-    }
+    val schema = serializer.dataType
 
     new ExpressionEncoder[T](
       schema,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ReferenceToExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ReferenceToExpressions.scala
index 6c75a7a50214f..2ca77e8394e17 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ReferenceToExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ReferenceToExpressions.scala
@@ -74,7 +74,7 @@ case class ReferenceToExpressions(result: Expression, children: Seq[Expression])
         ctx.addMutableState("boolean", classChildVarIsNull, "")
 
         val classChildVar =
-          LambdaVariable(classChildVarName, classChildVarIsNull, child.dataType)
+          LambdaVariable(classChildVarName, classChildVarIsNull, child.dataType, child.nullable)
 
         val initCode = s"${classChildVar.value} = ${childGen.value};\n" +
           s"${classChildVar.isNull} = ${childGen.isNull};"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index e517ec18eb540..a8aa1e725524a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -171,15 +171,18 @@ case class StaticInvoke(
  * @param arguments An optional list of expressions, whos evaluation will be passed to the function.
  * @param propagateNull When true, and any of the arguments is null, null will be returned instead
  *                      of calling the function.
+ * @param returnNullable When false, indicating the invoked method will always return
+ *                       non-null value.
  */
 case class Invoke(
     targetObject: Expression,
     functionName: String,
     dataType: DataType,
     arguments: Seq[Expression] = Nil,
-    propagateNull: Boolean = true) extends InvokeLike {
+    propagateNull: Boolean = true,
+    returnNullable : Boolean = true) extends InvokeLike {
 
-  override def nullable: Boolean = true
+  override def nullable: Boolean = targetObject.nullable || needNullCheck || returnNullable
   override def children: Seq[Expression] = targetObject +: arguments
 
   override def eval(input: InternalRow): Any =
@@ -405,13 +408,15 @@ case class WrapOption(child: Expression, optType: DataType)
  * A place holder for the loop variable used in [[MapObjects]].  This should never be constructed
  * manually, but will instead be passed into the provided lambda function.
  */
-case class LambdaVariable(value: String, isNull: String, dataType: DataType) extends LeafExpression
+case class LambdaVariable(
+    value: String,
+    isNull: String,
+    dataType: DataType,
+    nullable: Boolean = true) extends LeafExpression
   with Unevaluable with NonSQLExpression {
 
-  override def nullable: Boolean = true
-
   override def genCode(ctx: CodegenContext): ExprCode = {
-    ExprCode(code = "", value = value, isNull = isNull)
+    ExprCode(code = "", value = value, isNull = if (nullable) isNull else "false")
   }
 }
 
@@ -592,7 +597,8 @@ object ExternalMapToCatalyst {
       keyType: DataType,
       keyConverter: Expression => Expression,
       valueType: DataType,
-      valueConverter: Expression => Expression): ExternalMapToCatalyst = {
+      valueConverter: Expression => Expression,
+      valueNullable: Boolean): ExternalMapToCatalyst = {
     val id = curId.getAndIncrement()
     val keyName = "ExternalMapToCatalyst_key" + id
     val valueName = "ExternalMapToCatalyst_value" + id
@@ -601,11 +607,11 @@ object ExternalMapToCatalyst {
     ExternalMapToCatalyst(
       keyName,
       keyType,
-      keyConverter(LambdaVariable(keyName, "false", keyType)),
+      keyConverter(LambdaVariable(keyName, "false", keyType, false)),
       valueName,
       valueIsNull,
       valueType,
-      valueConverter(LambdaVariable(valueName, valueIsNull, valueType)),
+      valueConverter(LambdaVariable(valueName, valueIsNull, valueType, valueNullable)),
       inputMap
     )
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
index 4d896c2e38f10..080f11b769388 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
@@ -24,7 +24,7 @@ import java.util.Arrays
 import scala.collection.mutable.ArrayBuffer
 import scala.reflect.runtime.universe.TypeTag
 
-import org.apache.spark.sql.Encoders
+import org.apache.spark.sql.{Encoder, Encoders}
 import org.apache.spark.sql.catalyst.{OptionalData, PrimitiveData}
 import org.apache.spark.sql.catalyst.analysis.AnalysisTest
 import org.apache.spark.sql.catalyst.dsl.plans._
@@ -300,6 +300,11 @@ class ExpressionEncoderSuite extends PlanTest with AnalysisTest {
   encodeDecodeTest(
     ReferenceValueClass(ReferenceValueClass.Container(1)), "reference value class")
 
+  encodeDecodeTest(Option(31), "option of int")
+  encodeDecodeTest(Option.empty[Int], "empty option of int")
+  encodeDecodeTest(Option("abc"), "option of string")
+  encodeDecodeTest(Option.empty[String], "empty option of string")
+
   productTest(("UDT", new ExamplePoint(0.1, 0.2)))
 
   test("nullable of encoder schema") {
@@ -338,6 +343,18 @@ class ExpressionEncoderSuite extends PlanTest with AnalysisTest {
     }
   }
 
+  test("nullable of encoder serializer") {
+    def checkNullable[T: Encoder](nullable: Boolean): Unit = {
+      assert(encoderFor[T].serializer.forall(_.nullable === nullable))
+    }
+
+    // test for flat encoders
+    checkNullable[Int](false)
+    checkNullable[Option[Int]](true)
+    checkNullable[java.lang.Integer](true)
+    checkNullable[String](true)
+  }
+
   test("null check for map key") {
     val encoder = ExpressionEncoder[Map[String, Int]]()
     val e = intercept[RuntimeException](encoder.toRow(Map(("a", 1), (null, 2))))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 1174d7354f931..d31c766cb779f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -28,7 +28,10 @@ import org.apache.spark.sql.execution.streaming.MemoryStream
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
-import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
+import org.apache.spark.sql.types._
+
+case class TestDataPoint(x: Int, y: Double, s: String, t: TestDataPoint2)
+case class TestDataPoint2(x: Int, s: String)
 
 class DatasetSuite extends QueryTest with SharedSQLContext {
   import testImplicits._
@@ -969,6 +972,53 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
     assert(dataset.collect() sameElements Array(resultValue, resultValue))
   }
 
+  test("SPARK-18284: Serializer should have correct nullable value") {
+    val df1 = Seq(1, 2, 3, 4).toDF
+    assert(df1.schema(0).nullable == false)
+    val df2 = Seq(Integer.valueOf(1), Integer.valueOf(2)).toDF
+    assert(df2.schema(0).nullable == true)
+
+    val df3 = Seq(Seq(1, 2), Seq(3, 4)).toDF
+    assert(df3.schema(0).nullable == true)
+    assert(df3.schema(0).dataType.asInstanceOf[ArrayType].containsNull == false)
+    val df4 = Seq(Seq("a", "b"), Seq("c", "d")).toDF
+    assert(df4.schema(0).nullable == true)
+    assert(df4.schema(0).dataType.asInstanceOf[ArrayType].containsNull == true)
+
+    val df5 = Seq((0, 1.0), (2, 2.0)).toDF("id", "v")
+    assert(df5.schema(0).nullable == false)
+    assert(df5.schema(1).nullable == false)
+    val df6 = Seq((0, 1.0, "a"), (2, 2.0, "b")).toDF("id", "v1", "v2")
+    assert(df6.schema(0).nullable == false)
+    assert(df6.schema(1).nullable == false)
+    assert(df6.schema(2).nullable == true)
+
+    val df7 = (Tuple1(Array(1, 2, 3)) :: Nil).toDF("a")
+    assert(df7.schema(0).nullable == true)
+    assert(df7.schema(0).dataType.asInstanceOf[ArrayType].containsNull == false)
+
+    val df8 = (Tuple1(Array((null: Integer), (null: Integer))) :: Nil).toDF("a")
+    assert(df8.schema(0).nullable == true)
+    assert(df8.schema(0).dataType.asInstanceOf[ArrayType].containsNull == true)
+
+    val df9 = (Tuple1(Map(2 -> 3)) :: Nil).toDF("m")
+    assert(df9.schema(0).nullable == true)
+    assert(df9.schema(0).dataType.asInstanceOf[MapType].valueContainsNull == false)
+
+    val df10 = (Tuple1(Map(1 -> (null: Integer))) :: Nil).toDF("m")
+    assert(df10.schema(0).nullable == true)
+    assert(df10.schema(0).dataType.asInstanceOf[MapType].valueContainsNull == true)
+
+    val df11 = Seq(TestDataPoint(1, 2.2, "a", null),
+                   TestDataPoint(3, 4.4, "null", (TestDataPoint2(33, "b")))).toDF
+    assert(df11.schema(0).nullable == false)
+    assert(df11.schema(1).nullable == false)
+    assert(df11.schema(2).nullable == true)
+    assert(df11.schema(3).nullable == true)
+    assert(df11.schema(3).dataType.asInstanceOf[StructType].fields(0).nullable == false)
+    assert(df11.schema(3).dataType.asInstanceOf[StructType].fields(1).nullable == true)
+  }
+
   Seq(true, false).foreach { eager =>
     def testCheckpointing(testName: String)(f: => Unit): Unit = {
       test(s"Dataset.checkpoint() - $testName (eager = $eager)") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
index 09613ef9e4348..54efae3fb4627 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
@@ -86,7 +86,7 @@ class FileStreamSinkSuite extends StreamTest {
 
       val outputDf = spark.read.parquet(outputDir)
       val expectedSchema = new StructType()
-        .add(StructField("value", IntegerType))
+        .add(StructField("value", IntegerType, nullable = false))
         .add(StructField("id", IntegerType))
       assert(outputDf.schema === expectedSchema)
 

From 0f0903d17b9c71a569d92f2c35e2caeb1eb8c89f Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 2 Dec 2016 12:54:12 +0800
Subject: [PATCH 0261/1204] [SPARK-18647][SQL] do not put provider in table
 properties for Hive serde table

## What changes were proposed in this pull request?

In Spark 2.1, we make Hive serde tables case-preserving by putting the table metadata in table properties, like what we did for data source table. However, we should not put table provider, as it will break forward compatibility. e.g. if we create a Hive serde table with Spark 2.1, using `sql("create table test stored as parquet as select 1")`, we will fail to read it with Spark 2.0, as Spark 2.0 mistakenly treat it as data source table because there is a `provider` entry in table properties.

Logically Hive serde table's provider is always hive, we don't need to store it in table properties, this PR removes it.

## How was this patch tested?

manually test the forward compatibility issue.

Author: Wenchen Fan <wenchen@databricks.com>

Closes #16080 from cloud-fan/hive.

(cherry picked from commit a5f02b00291e0a22429a3dca81f12cf6d38fea0b)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/hive/HiveExternalCatalog.scala  | 80 ++++++++++---------
 .../sql/hive/HiveExternalCatalogSuite.scala   | 18 +++++
 .../sql/hive/HiveMetastoreCatalogSuite.scala  |  2 -
 3 files changed, 59 insertions(+), 41 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 1a9943bc31058..065883234a780 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -21,6 +21,7 @@ import java.io.IOException
 import java.net.URI
 import java.util
 
+import scala.collection.mutable
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.conf.Configuration
@@ -219,9 +220,9 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
           // table location for tables in default database, while we expect to use the location of
           // default database.
           storage = tableDefinition.storage.copy(locationUri = tableLocation),
-          // Here we follow data source tables and put table metadata like provider, schema, etc. in
-          // table properties, so that we can work around the Hive metastore issue about not case
-          // preserving and make Hive serde table support mixed-case column names.
+          // Here we follow data source tables and put table metadata like table schema, partition
+          // columns etc. in table properties, so that we can work around the Hive metastore issue
+          // about not case preserving and make Hive serde table support mixed-case column names.
           properties = tableDefinition.properties ++ tableMetaToTableProps(tableDefinition))
         client.createTable(tableWithDataSourceProps, ignoreIfExists)
       } else {
@@ -233,10 +234,13 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
   }
 
   private def createDataSourceTable(table: CatalogTable, ignoreIfExists: Boolean): Unit = {
+    // data source table always have a provider, it's guaranteed by `DDLUtils.isDatasourceTable`.
+    val provider = table.provider.get
+
     // To work around some hive metastore issues, e.g. not case-preserving, bad decimal type
     // support, no column nullability, etc., we should do some extra works before saving table
     // metadata into Hive metastore:
-    //  1. Put table metadata like provider, schema, etc. in table properties.
+    //  1. Put table metadata like table schema, partition columns, etc. in table properties.
     //  2. Check if this table is hive compatible.
     //    2.1  If it's not hive compatible, set location URI, schema, partition columns and bucket
     //         spec to empty and save table metadata to Hive.
@@ -244,6 +248,12 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     //         it to Hive. If it fails, treat it as not hive compatible and go back to 2.1
     val tableProperties = tableMetaToTableProps(table)
 
+    // put table provider and partition provider in table properties.
+    tableProperties.put(DATASOURCE_PROVIDER, provider)
+    if (table.tracksPartitionsInCatalog) {
+      tableProperties.put(TABLE_PARTITION_PROVIDER, TABLE_PARTITION_PROVIDER_CATALOG)
+    }
+
     // Ideally we should also put `locationUri` in table properties like provider, schema, etc.
     // However, in older version of Spark we already store table location in storage properties
     // with key "path". Here we keep this behaviour for backward compatibility.
@@ -290,7 +300,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     }
 
     val qualifiedTableName = table.identifier.quotedString
-    val maybeSerde = HiveSerDe.sourceToSerDe(table.provider.get)
+    val maybeSerde = HiveSerDe.sourceToSerDe(provider)
     val skipHiveMetadata = table.storage.properties
       .getOrElse("skipHiveMetadata", "false").toBoolean
 
@@ -315,7 +325,6 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
         (Some(newHiveCompatibleMetastoreTable(serde)), message)
 
       case _ =>
-        val provider = table.provider.get
         val message =
           s"Couldn't find corresponding Hive SerDe for data source provider $provider. " +
             s"Persisting data source table $qualifiedTableName into Hive metastore in " +
@@ -349,21 +358,14 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
   /**
    * Data source tables may be non Hive compatible and we need to store table metadata in table
    * properties to workaround some Hive metastore limitations.
-   * This method puts table provider, partition provider, schema, partition column names, bucket
-   * specification into a map, which can be used as table properties later.
+   * This method puts table schema, partition column names, bucket specification into a map, which
+   * can be used as table properties later.
    */
-  private def tableMetaToTableProps(table: CatalogTable): scala.collection.Map[String, String] = {
-    // data source table always have a provider, it's guaranteed by `DDLUtils.isDatasourceTable`.
-    val provider = table.provider.get
+  private def tableMetaToTableProps(table: CatalogTable): mutable.Map[String, String] = {
     val partitionColumns = table.partitionColumnNames
     val bucketSpec = table.bucketSpec
 
-    val properties = new scala.collection.mutable.HashMap[String, String]
-    properties.put(DATASOURCE_PROVIDER, provider)
-    if (table.tracksPartitionsInCatalog) {
-      properties.put(TABLE_PARTITION_PROVIDER, TABLE_PARTITION_PROVIDER_CATALOG)
-    }
-
+    val properties = new mutable.HashMap[String, String]
     // Serialized JSON schema string may be too long to be stored into a single metastore table
     // property. In this case, we split the JSON string and store each part as a separate table
     // property.
@@ -617,14 +619,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
 
     if (table.tableType != VIEW) {
       table.properties.get(DATASOURCE_PROVIDER) match {
-        // No provider in table properties, which means this table is created by Spark prior to 2.1,
-        // or is created at Hive side.
+        // No provider in table properties, which means this is a Hive serde table.
         case None =>
-          table = table.copy(
-            provider = Some(DDLUtils.HIVE_PROVIDER), tracksPartitionsInCatalog = true)
-
-        // This is a Hive serde table created by Spark 2.1 or higher versions.
-        case Some(DDLUtils.HIVE_PROVIDER) =>
           table = restoreHiveSerdeTable(table)
 
         // This is a regular data source table.
@@ -637,7 +633,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     val statsProps = table.properties.filterKeys(_.startsWith(STATISTICS_PREFIX))
 
     if (statsProps.nonEmpty) {
-      val colStats = new scala.collection.mutable.HashMap[String, ColumnStat]
+      val colStats = new mutable.HashMap[String, ColumnStat]
 
       // For each column, recover its column stats. Note that this is currently a O(n^2) operation,
       // but given the number of columns it usually not enormous, this is probably OK as a start.
@@ -674,21 +670,27 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       provider = Some(DDLUtils.HIVE_PROVIDER),
       tracksPartitionsInCatalog = true)
 
-    val schemaFromTableProps = getSchemaFromTableProperties(table)
-    if (DataType.equalsIgnoreCaseAndNullability(schemaFromTableProps, table.schema)) {
-      hiveTable.copy(
-        schema = schemaFromTableProps,
-        partitionColumnNames = getPartitionColumnsFromTableProperties(table),
-        bucketSpec = getBucketSpecFromTableProperties(table))
+    // If this is a Hive serde table created by Spark 2.1 or higher versions, we should restore its
+    // schema from table properties.
+    if (table.properties.contains(DATASOURCE_SCHEMA_NUMPARTS)) {
+      val schemaFromTableProps = getSchemaFromTableProperties(table)
+      if (DataType.equalsIgnoreCaseAndNullability(schemaFromTableProps, table.schema)) {
+        hiveTable.copy(
+          schema = schemaFromTableProps,
+          partitionColumnNames = getPartitionColumnsFromTableProperties(table),
+          bucketSpec = getBucketSpecFromTableProperties(table))
+      } else {
+        // Hive metastore may change the table schema, e.g. schema inference. If the table
+        // schema we read back is different(ignore case and nullability) from the one in table
+        // properties which was written when creating table, we should respect the table schema
+        // from hive.
+        logWarning(s"The table schema given by Hive metastore(${table.schema.simpleString}) is " +
+          "different from the schema when this table was created by Spark SQL" +
+          s"(${schemaFromTableProps.simpleString}). We have to fall back to the table schema " +
+          "from Hive metastore which is not case preserving.")
+        hiveTable
+      }
     } else {
-      // Hive metastore may change the table schema, e.g. schema inference. If the table
-      // schema we read back is different(ignore case and nullability) from the one in table
-      // properties which was written when creating table, we should respect the table schema
-      // from hive.
-      logWarning(s"The table schema given by Hive metastore(${table.schema.simpleString}) is " +
-        "different from the schema when this table was created by Spark SQL" +
-        s"(${schemaFromTableProps.simpleString}). We have to fall back to the table schema from " +
-        "Hive metastore which is not case preserving.")
       hiveTable
     }
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
index efa0beb85030b..6fee45824ea3f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
@@ -20,8 +20,11 @@ package org.apache.spark.sql.hive
 import org.apache.hadoop.conf.Configuration
 
 import org.apache.spark.SparkConf
+import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.execution.command.DDLUtils
+import org.apache.spark.sql.types.StructType
 
 /**
  * Test suite for the [[HiveExternalCatalog]].
@@ -52,4 +55,19 @@ class HiveExternalCatalogSuite extends ExternalCatalogSuite {
     assert(selectedPartitions.length == 1)
     assert(selectedPartitions.head.spec == part1.spec)
   }
+
+  test("SPARK-18647: do not put provider in table properties for Hive serde table") {
+    val catalog = newBasicCatalog()
+    val hiveTable = CatalogTable(
+      identifier = TableIdentifier("hive_tbl", Some("db1")),
+      tableType = CatalogTableType.MANAGED,
+      storage = storageFormat,
+      schema = new StructType().add("col1", "int").add("col2", "string"),
+      provider = Some("hive"))
+    catalog.createTable(hiveTable, ignoreIfExists = false)
+
+    val rawTable = externalCatalog.client.getTable("db1", "hive_tbl")
+    assert(!rawTable.properties.contains(HiveExternalCatalog.DATASOURCE_PROVIDER))
+    assert(externalCatalog.getTable("db1", "hive_tbl").provider == Some(DDLUtils.HIVE_PROVIDER))
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
index 7abc4d9623f71..0a280b495215c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.hive
 
-import java.io.File
-
 import org.apache.spark.sql.{QueryTest, Row, SaveMode}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTableType

From a7f8ebb8629706c54c286b7aca658838e718e804 Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian@databricks.com>
Date: Thu, 1 Dec 2016 22:02:45 -0800
Subject: [PATCH 0262/1204] [SPARK-17213][SQL] Disable Parquet filter push-down
 for string and binary columns due to PARQUET-686

This PR targets to both master and branch-2.1.

## What changes were proposed in this pull request?

Due to PARQUET-686, Parquet doesn't do string comparison correctly while doing filter push-down for string columns. This PR disables filter push-down for both string and binary columns to work around this issue. Binary columns are also affected because some Parquet data models (like Hive) may store string columns as a plain Parquet `binary` instead of a `binary (UTF8)`.

## How was this patch tested?

New test case added in `ParquetFilterSuite`.

Author: Cheng Lian <lian@databricks.com>

Closes #16106 from liancheng/spark-17213-bad-string-ppd.

(cherry picked from commit ca6391637212814b7c0bd14c434a6737da17b258)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../datasources/parquet/ParquetFilters.scala  | 24 +++++++++++++++++
 .../parquet/ParquetFilterSuite.scala          | 26 ++++++++++++++++---
 2 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala
index a6e9788097728..7730d1fccb0b9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala
@@ -40,6 +40,9 @@ private[parquet] object ParquetFilters {
       (n: String, v: Any) => FilterApi.eq(floatColumn(n), v.asInstanceOf[java.lang.Float])
     case DoubleType =>
       (n: String, v: Any) => FilterApi.eq(doubleColumn(n), v.asInstanceOf[java.lang.Double])
+
+    // See SPARK-17213: https://issues.apache.org/jira/browse/SPARK-17213
+    /*
     // Binary.fromString and Binary.fromByteArray don't accept null values
     case StringType =>
       (n: String, v: Any) => FilterApi.eq(
@@ -49,6 +52,7 @@ private[parquet] object ParquetFilters {
       (n: String, v: Any) => FilterApi.eq(
         binaryColumn(n),
         Option(v).map(b => Binary.fromReusedByteArray(v.asInstanceOf[Array[Byte]])).orNull)
+     */
   }
 
   private val makeNotEq: PartialFunction[DataType, (String, Any) => FilterPredicate] = {
@@ -62,6 +66,9 @@ private[parquet] object ParquetFilters {
       (n: String, v: Any) => FilterApi.notEq(floatColumn(n), v.asInstanceOf[java.lang.Float])
     case DoubleType =>
       (n: String, v: Any) => FilterApi.notEq(doubleColumn(n), v.asInstanceOf[java.lang.Double])
+
+    // See SPARK-17213: https://issues.apache.org/jira/browse/SPARK-17213
+    /*
     case StringType =>
       (n: String, v: Any) => FilterApi.notEq(
         binaryColumn(n),
@@ -70,6 +77,7 @@ private[parquet] object ParquetFilters {
       (n: String, v: Any) => FilterApi.notEq(
         binaryColumn(n),
         Option(v).map(b => Binary.fromReusedByteArray(v.asInstanceOf[Array[Byte]])).orNull)
+     */
   }
 
   private val makeLt: PartialFunction[DataType, (String, Any) => FilterPredicate] = {
@@ -81,6 +89,9 @@ private[parquet] object ParquetFilters {
       (n: String, v: Any) => FilterApi.lt(floatColumn(n), v.asInstanceOf[java.lang.Float])
     case DoubleType =>
       (n: String, v: Any) => FilterApi.lt(doubleColumn(n), v.asInstanceOf[java.lang.Double])
+
+    // See SPARK-17213: https://issues.apache.org/jira/browse/SPARK-17213
+    /*
     case StringType =>
       (n: String, v: Any) =>
         FilterApi.lt(binaryColumn(n),
@@ -88,6 +99,7 @@ private[parquet] object ParquetFilters {
     case BinaryType =>
       (n: String, v: Any) =>
         FilterApi.lt(binaryColumn(n), Binary.fromReusedByteArray(v.asInstanceOf[Array[Byte]]))
+     */
   }
 
   private val makeLtEq: PartialFunction[DataType, (String, Any) => FilterPredicate] = {
@@ -99,6 +111,9 @@ private[parquet] object ParquetFilters {
       (n: String, v: Any) => FilterApi.ltEq(floatColumn(n), v.asInstanceOf[java.lang.Float])
     case DoubleType =>
       (n: String, v: Any) => FilterApi.ltEq(doubleColumn(n), v.asInstanceOf[java.lang.Double])
+
+    // See SPARK-17213: https://issues.apache.org/jira/browse/SPARK-17213
+    /*
     case StringType =>
       (n: String, v: Any) =>
         FilterApi.ltEq(binaryColumn(n),
@@ -106,6 +121,7 @@ private[parquet] object ParquetFilters {
     case BinaryType =>
       (n: String, v: Any) =>
         FilterApi.ltEq(binaryColumn(n), Binary.fromReusedByteArray(v.asInstanceOf[Array[Byte]]))
+     */
   }
 
   private val makeGt: PartialFunction[DataType, (String, Any) => FilterPredicate] = {
@@ -117,6 +133,9 @@ private[parquet] object ParquetFilters {
       (n: String, v: Any) => FilterApi.gt(floatColumn(n), v.asInstanceOf[java.lang.Float])
     case DoubleType =>
       (n: String, v: Any) => FilterApi.gt(doubleColumn(n), v.asInstanceOf[java.lang.Double])
+
+    // See SPARK-17213: https://issues.apache.org/jira/browse/SPARK-17213
+    /*
     case StringType =>
       (n: String, v: Any) =>
         FilterApi.gt(binaryColumn(n),
@@ -124,6 +143,7 @@ private[parquet] object ParquetFilters {
     case BinaryType =>
       (n: String, v: Any) =>
         FilterApi.gt(binaryColumn(n), Binary.fromReusedByteArray(v.asInstanceOf[Array[Byte]]))
+     */
   }
 
   private val makeGtEq: PartialFunction[DataType, (String, Any) => FilterPredicate] = {
@@ -135,6 +155,9 @@ private[parquet] object ParquetFilters {
       (n: String, v: Any) => FilterApi.gtEq(floatColumn(n), v.asInstanceOf[java.lang.Float])
     case DoubleType =>
       (n: String, v: Any) => FilterApi.gtEq(doubleColumn(n), v.asInstanceOf[java.lang.Double])
+
+    // See SPARK-17213: https://issues.apache.org/jira/browse/SPARK-17213
+    /*
     case StringType =>
       (n: String, v: Any) =>
         FilterApi.gtEq(binaryColumn(n),
@@ -142,6 +165,7 @@ private[parquet] object ParquetFilters {
     case BinaryType =>
       (n: String, v: Any) =>
         FilterApi.gtEq(binaryColumn(n), Binary.fromReusedByteArray(v.asInstanceOf[Array[Byte]]))
+     */
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
index 4246b54c21f0c..a0d57d79f045a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
@@ -47,7 +47,6 @@ import org.apache.spark.util.{AccumulatorContext, LongAccumulator}
  *    data type is nullable.
  */
 class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContext {
-
   private def checkFilterPredicate(
       df: DataFrame,
       predicate: Predicate,
@@ -230,7 +229,8 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex
     }
   }
 
-  test("filter pushdown - string") {
+  // See SPARK-17213: https://issues.apache.org/jira/browse/SPARK-17213
+  ignore("filter pushdown - string") {
     withParquetDataFrame((1 to 4).map(i => Tuple1(i.toString))) { implicit df =>
       checkFilterPredicate('_1.isNull, classOf[Eq[_]], Seq.empty[Row])
       checkFilterPredicate(
@@ -258,7 +258,8 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex
     }
   }
 
-  test("filter pushdown - binary") {
+  // See SPARK-17213: https://issues.apache.org/jira/browse/SPARK-17213
+  ignore("filter pushdown - binary") {
     implicit class IntToBinary(int: Int) {
       def b: Array[Byte] = int.toString.getBytes(StandardCharsets.UTF_8)
     }
@@ -558,4 +559,23 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex
       }
     }
   }
+
+  test("SPARK-17213: Broken Parquet filter push-down for string columns") {
+    withTempPath { dir =>
+      import testImplicits._
+
+      val path = dir.getCanonicalPath
+      // scalastyle:off nonascii
+      Seq("a", "é").toDF("name").write.parquet(path)
+      // scalastyle:on nonascii
+
+      assert(spark.read.parquet(path).where("name > 'a'").count() == 1)
+      assert(spark.read.parquet(path).where("name >= 'a'").count() == 2)
+
+      // scalastyle:off nonascii
+      assert(spark.read.parquet(path).where("name < 'é'").count() == 1)
+      assert(spark.read.parquet(path).where("name <= 'é'").count() == 2)
+      // scalastyle:on nonascii
+    }
+  }
 }

From 65e896a6e9a5378f2d3a02c0c2a57fdb8d8f1d9d Mon Sep 17 00:00:00 2001
From: Eric Liang <ekl@databricks.com>
Date: Fri, 2 Dec 2016 20:59:39 +0800
Subject: [PATCH 0263/1204] [SPARK-18679][SQL] Fix regression in file listing
 performance for non-catalog tables

## What changes were proposed in this pull request?

In Spark 2.1 ListingFileCatalog was significantly refactored (and renamed to InMemoryFileIndex). This introduced a regression where parallelism could only be introduced at the very top of the tree. However, in many cases (e.g. `spark.read.parquet(topLevelDir)`), the top of the tree is only a single directory.

This PR simplifies and fixes the parallel recursive listing code to allow parallelism to be introduced at any level during recursive descent (though note that once we decide to list a sub-tree in parallel, the sub-tree is listed in serial on executors).

cc mallman  cloud-fan

## How was this patch tested?

Checked metrics in unit tests.

Author: Eric Liang <ekl@databricks.com>

Closes #16112 from ericl/spark-18679.

(cherry picked from commit 294163ee9319e4f7f6da1259839eb3c80bba25c2)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/metrics/source/StaticSources.scala  |  8 ++
 .../PartitioningAwareFileIndex.scala          | 79 +++++++++++--------
 ...atalogSuite.scala => FileIndexSuite.scala} | 53 +++++++++++++
 3 files changed, 106 insertions(+), 34 deletions(-)
 rename sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/{FileCatalogSuite.scala => FileIndexSuite.scala} (70%)

diff --git a/core/src/main/scala/org/apache/spark/metrics/source/StaticSources.scala b/core/src/main/scala/org/apache/spark/metrics/source/StaticSources.scala
index b433cd0a89ac9..99ec78633ab75 100644
--- a/core/src/main/scala/org/apache/spark/metrics/source/StaticSources.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/source/StaticSources.scala
@@ -90,6 +90,12 @@ object HiveCatalogMetrics extends Source {
    */
   val METRIC_HIVE_CLIENT_CALLS = metricRegistry.counter(MetricRegistry.name("hiveClientCalls"))
 
+  /**
+   * Tracks the total number of Spark jobs launched for parallel file listing.
+   */
+  val METRIC_PARALLEL_LISTING_JOB_COUNT = metricRegistry.counter(
+    MetricRegistry.name("parallelListingJobCount"))
+
   /**
    * Resets the values of all metrics to zero. This is useful in tests.
    */
@@ -98,6 +104,7 @@ object HiveCatalogMetrics extends Source {
     METRIC_FILES_DISCOVERED.dec(METRIC_FILES_DISCOVERED.getCount())
     METRIC_FILE_CACHE_HITS.dec(METRIC_FILE_CACHE_HITS.getCount())
     METRIC_HIVE_CLIENT_CALLS.dec(METRIC_HIVE_CLIENT_CALLS.getCount())
+    METRIC_PARALLEL_LISTING_JOB_COUNT.dec(METRIC_PARALLEL_LISTING_JOB_COUNT.getCount())
   }
 
   // clients can use these to avoid classloader issues with the codahale classes
@@ -105,4 +112,5 @@ object HiveCatalogMetrics extends Source {
   def incrementFilesDiscovered(n: Int): Unit = METRIC_FILES_DISCOVERED.inc(n)
   def incrementFileCacheHits(n: Int): Unit = METRIC_FILE_CACHE_HITS.inc(n)
   def incrementHiveClientCalls(n: Int): Unit = METRIC_HIVE_CLIENT_CALLS.inc(n)
+  def incrementParallelListingJobCount(n: Int): Unit = METRIC_PARALLEL_LISTING_JOB_COUNT.inc(n)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
index 3740caa22c37e..f22b55bb0465e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
@@ -249,12 +249,9 @@ abstract class PartitioningAwareFileIndex(
           pathsToFetch += path
       }
     }
-    val discovered = if (pathsToFetch.length >=
-        sparkSession.sessionState.conf.parallelPartitionDiscoveryThreshold) {
-      PartitioningAwareFileIndex.listLeafFilesInParallel(pathsToFetch, hadoopConf, sparkSession)
-    } else {
-      PartitioningAwareFileIndex.listLeafFilesInSerial(pathsToFetch, hadoopConf)
-    }
+    val filter = FileInputFormat.getInputPathFilter(new JobConf(hadoopConf, this.getClass))
+    val discovered = PartitioningAwareFileIndex.bulkListLeafFiles(
+      pathsToFetch, hadoopConf, filter, sparkSession)
     discovered.foreach { case (path, leafFiles) =>
       HiveCatalogMetrics.incrementFilesDiscovered(leafFiles.size)
       fileStatusCache.putLeafFiles(path, leafFiles.toArray)
@@ -286,31 +283,28 @@ object PartitioningAwareFileIndex extends Logging {
       blockLocations: Array[SerializableBlockLocation])
 
   /**
-   * List a collection of path recursively.
-   */
-  private def listLeafFilesInSerial(
-      paths: Seq[Path],
-      hadoopConf: Configuration): Seq[(Path, Seq[FileStatus])] = {
-    // Dummy jobconf to get to the pathFilter defined in configuration
-    val jobConf = new JobConf(hadoopConf, this.getClass)
-    val filter = FileInputFormat.getInputPathFilter(jobConf)
-
-    paths.map { path =>
-      val fs = path.getFileSystem(hadoopConf)
-      (path, listLeafFiles0(fs, path, filter))
-    }
-  }
-
-  /**
-   * List a collection of path recursively in parallel (using Spark executors).
-   * Each task launched will use [[listLeafFilesInSerial]] to list.
+   * Lists a collection of paths recursively. Picks the listing strategy adaptively depending
+   * on the number of paths to list.
+   *
+   * This may only be called on the driver.
+   *
+   * @return for each input path, the set of discovered files for the path
    */
-  private def listLeafFilesInParallel(
+  private def bulkListLeafFiles(
       paths: Seq[Path],
       hadoopConf: Configuration,
+      filter: PathFilter,
       sparkSession: SparkSession): Seq[(Path, Seq[FileStatus])] = {
-    assert(paths.size >= sparkSession.sessionState.conf.parallelPartitionDiscoveryThreshold)
+
+    // Short-circuits parallel listing when serial listing is likely to be faster.
+    if (paths.size < sparkSession.sessionState.conf.parallelPartitionDiscoveryThreshold) {
+      return paths.map { path =>
+        (path, listLeafFiles(path, hadoopConf, filter, Some(sparkSession)))
+      }
+    }
+
     logInfo(s"Listing leaf files and directories in parallel under: ${paths.mkString(", ")}")
+    HiveCatalogMetrics.incrementParallelListingJobCount(1)
 
     val sparkContext = sparkSession.sparkContext
     val serializableConfiguration = new SerializableConfiguration(hadoopConf)
@@ -322,9 +316,11 @@ object PartitioningAwareFileIndex extends Logging {
 
     val statusMap = sparkContext
       .parallelize(serializedPaths, numParallelism)
-      .mapPartitions { paths =>
+      .mapPartitions { pathStrings =>
         val hadoopConf = serializableConfiguration.value
-        listLeafFilesInSerial(paths.map(new Path(_)).toSeq, hadoopConf).iterator
+        pathStrings.map(new Path(_)).toSeq.map { path =>
+          (path, listLeafFiles(path, hadoopConf, filter, None))
+        }.iterator
       }.map { case (path, statuses) =>
         val serializableStatuses = statuses.map { status =>
           // Turn FileStatus into SerializableFileStatus so we can send it back to the driver
@@ -372,11 +368,20 @@ object PartitioningAwareFileIndex extends Logging {
   }
 
   /**
-   * List a single path, provided as a FileStatus, in serial.
+   * Lists a single filesystem path recursively. If a SparkSession object is specified, this
+   * function may launch Spark jobs to parallelize listing.
+   *
+   * If sessionOpt is None, this may be called on executors.
+   *
+   * @return all children of path that match the specified filter.
    */
-  private def listLeafFiles0(
-      fs: FileSystem, path: Path, filter: PathFilter): Seq[FileStatus] = {
+  private def listLeafFiles(
+      path: Path,
+      hadoopConf: Configuration,
+      filter: PathFilter,
+      sessionOpt: Option[SparkSession]): Seq[FileStatus] = {
     logTrace(s"Listing $path")
+    val fs = path.getFileSystem(hadoopConf)
     val name = path.getName.toLowerCase
     if (shouldFilterOut(name)) {
       Seq.empty[FileStatus]
@@ -391,9 +396,15 @@ object PartitioningAwareFileIndex extends Logging {
       }
 
       val allLeafStatuses = {
-        val (dirs, files) = statuses.partition(_.isDirectory)
-        val stats = files ++ dirs.flatMap(dir => listLeafFiles0(fs, dir.getPath, filter))
-        if (filter != null) stats.filter(f => filter.accept(f.getPath)) else stats
+        val (dirs, topLevelFiles) = statuses.partition(_.isDirectory)
+        val nestedFiles: Seq[FileStatus] = sessionOpt match {
+          case Some(session) =>
+            bulkListLeafFiles(dirs.map(_.getPath), hadoopConf, filter, session).flatMap(_._2)
+          case _ =>
+            dirs.flatMap(dir => listLeafFiles(dir.getPath, hadoopConf, filter, sessionOpt))
+        }
+        val allFiles = topLevelFiles ++ nestedFiles
+        if (filter != null) allFiles.filter(f => filter.accept(f.getPath)) else allFiles
       }
 
       allLeafStatuses.filterNot(status => shouldFilterOut(status.getPath.getName)).map {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
similarity index 70%
rename from sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileCatalogSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
index 56df1face6364..b7a472b7f0919 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
@@ -25,6 +25,7 @@ import scala.language.reflectiveCalls
 
 import org.apache.hadoop.fs.{FileStatus, Path, RawLocalFileSystem}
 
+import org.apache.spark.metrics.source.HiveCatalogMetrics
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.test.SharedSQLContext
 
@@ -81,6 +82,58 @@ class FileIndexSuite extends SharedSQLContext {
     }
   }
 
+  test("PartitioningAwareFileIndex listing parallelized with many top level dirs") {
+    for ((scale, expectedNumPar) <- Seq((10, 0), (50, 1))) {
+      withTempDir { dir =>
+        val topLevelDirs = (1 to scale).map { i =>
+          val tmp = new File(dir, s"foo=$i.txt")
+          tmp.mkdir()
+          new Path(tmp.getCanonicalPath)
+        }
+        HiveCatalogMetrics.reset()
+        assert(HiveCatalogMetrics.METRIC_PARALLEL_LISTING_JOB_COUNT.getCount() == 0)
+        new InMemoryFileIndex(spark, topLevelDirs, Map.empty, None)
+        assert(HiveCatalogMetrics.METRIC_PARALLEL_LISTING_JOB_COUNT.getCount() == expectedNumPar)
+      }
+    }
+  }
+
+  test("PartitioningAwareFileIndex listing parallelized with large child dirs") {
+    for ((scale, expectedNumPar) <- Seq((10, 0), (50, 1))) {
+      withTempDir { dir =>
+        for (i <- 1 to scale) {
+          new File(dir, s"foo=$i.txt").mkdir()
+        }
+        HiveCatalogMetrics.reset()
+        assert(HiveCatalogMetrics.METRIC_PARALLEL_LISTING_JOB_COUNT.getCount() == 0)
+        new InMemoryFileIndex(spark, Seq(new Path(dir.getCanonicalPath)), Map.empty, None)
+        assert(HiveCatalogMetrics.METRIC_PARALLEL_LISTING_JOB_COUNT.getCount() == expectedNumPar)
+      }
+    }
+  }
+
+  test("PartitioningAwareFileIndex listing parallelized with large, deeply nested child dirs") {
+    for ((scale, expectedNumPar) <- Seq((10, 0), (50, 4))) {
+      withTempDir { dir =>
+        for (i <- 1 to 2) {
+          val subdirA = new File(dir, s"a=$i")
+          subdirA.mkdir()
+          for (j <- 1 to 2) {
+            val subdirB = new File(subdirA, s"b=$j")
+            subdirB.mkdir()
+            for (k <- 1 to scale) {
+              new File(subdirB, s"foo=$k.txt").mkdir()
+            }
+          }
+        }
+        HiveCatalogMetrics.reset()
+        assert(HiveCatalogMetrics.METRIC_PARALLEL_LISTING_JOB_COUNT.getCount() == 0)
+        new InMemoryFileIndex(spark, Seq(new Path(dir.getCanonicalPath)), Map.empty, None)
+        assert(HiveCatalogMetrics.METRIC_PARALLEL_LISTING_JOB_COUNT.getCount() == expectedNumPar)
+      }
+    }
+  }
+
   test("PartitioningAwareFileIndex - file filtering") {
     assert(!PartitioningAwareFileIndex.shouldFilterOut("abcd"))
     assert(PartitioningAwareFileIndex.shouldFilterOut(".ab"))

From 415730e19cea3a0e7ea5491bf801a22859bbab66 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Fri, 2 Dec 2016 21:48:22 +0800
Subject: [PATCH 0264/1204] [SPARK-18419][SQL] `JDBCRelation.insert` should not
 remove Spark options

## What changes were proposed in this pull request?

Currently, `JDBCRelation.insert` removes Spark options too early by mistakenly using `asConnectionProperties`. Spark options like `numPartitions` should be passed into `DataFrameWriter.jdbc` correctly. This bug have been **hidden** because `JDBCOptions.asConnectionProperties` fails to filter out the mixed-case options. This PR aims to fix both.

**JDBCRelation.insert**
```scala
override def insert(data: DataFrame, overwrite: Boolean): Unit = {
  val url = jdbcOptions.url
  val table = jdbcOptions.table
- val properties = jdbcOptions.asConnectionProperties
+ val properties = jdbcOptions.asProperties
  data.write
    .mode(if (overwrite) SaveMode.Overwrite else SaveMode.Append)
    .jdbc(url, table, properties)
```

**JDBCOptions.asConnectionProperties**
```scala
scala> import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
scala> import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
scala> new JDBCOptions(Map("url" -> "jdbc:mysql://localhost:3306/temp", "dbtable" -> "t1", "numPartitions" -> "10")).asConnectionProperties
res0: java.util.Properties = {numpartitions=10}
scala> new JDBCOptions(new CaseInsensitiveMap(Map("url" -> "jdbc:mysql://localhost:3306/temp", "dbtable" -> "t1", "numPartitions" -> "10"))).asConnectionProperties
res1: java.util.Properties = {numpartitions=10}
```

## How was this patch tested?

Pass the Jenkins with a new testcase.

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #15863 from dongjoon-hyun/SPARK-18419.

(cherry picked from commit 55d528f2ba0ba689dbb881616d9436dc7958e943)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../datasources/jdbc/JDBCOptions.scala        | 23 ++++++++++++++-----
 .../execution/datasources/jdbc/JDBCRDD.scala  |  1 -
 .../datasources/jdbc/JDBCRelation.scala       |  2 +-
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 10 ++++++++
 4 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
index 7f419b5788c4f..d94fa7e8d80a1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
@@ -20,8 +20,6 @@ package org.apache.spark.sql.execution.datasources.jdbc
 import java.sql.{Connection, DriverManager}
 import java.util.Properties
 
-import scala.collection.mutable.ArrayBuffer
-
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 
 /**
@@ -41,10 +39,23 @@ class JDBCOptions(
       JDBCOptions.JDBC_TABLE_NAME -> table)))
   }
 
+  /**
+   * Returns a property with all options.
+   */
+  val asProperties: Properties = {
+    val properties = new Properties()
+    parameters.foreach { case (k, v) => properties.setProperty(k, v) }
+    properties
+  }
+
+  /**
+   * Returns a property with all options except Spark internal data source options like `url`,
+   * `dbtable`, and `numPartition`. This should be used when invoking JDBC API like `Driver.connect`
+   * because each DBMS vendor has its own property list for JDBC driver. See SPARK-17776.
+   */
   val asConnectionProperties: Properties = {
     val properties = new Properties()
-    // We should avoid to pass the options into properties. See SPARK-17776.
-    parameters.filterKeys(!jdbcOptionNames.contains(_))
+    parameters.filterKeys(key => !jdbcOptionNames(key.toLowerCase))
       .foreach { case (k, v) => properties.setProperty(k, v) }
     properties
   }
@@ -125,10 +136,10 @@ class JDBCOptions(
 }
 
 object JDBCOptions {
-  private val jdbcOptionNames = ArrayBuffer.empty[String]
+  private val jdbcOptionNames = collection.mutable.Set[String]()
 
   private def newOption(name: String): String = {
-    jdbcOptionNames += name
+    jdbcOptionNames += name.toLowerCase
     name
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
index 37df283a9e5b2..d5b11e7bec0bd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
@@ -54,7 +54,6 @@ object JDBCRDD extends Logging {
   def resolveTable(options: JDBCOptions): StructType = {
     val url = options.url
     val table = options.table
-    val properties = options.asConnectionProperties
     val dialect = JdbcDialects.get(url)
     val conn: Connection = JdbcUtils.createConnectionFactory(options)()
     try {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
index 5ca1c7543cfa7..8b45dba04d29e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
@@ -131,7 +131,7 @@ private[sql] case class JDBCRelation(
   override def insert(data: DataFrame, overwrite: Boolean): Unit = {
     val url = jdbcOptions.url
     val table = jdbcOptions.table
-    val properties = jdbcOptions.asConnectionProperties
+    val properties = jdbcOptions.asProperties
     data.write
       .mode(if (overwrite) SaveMode.Overwrite else SaveMode.Append)
       .jdbc(url, table, properties)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index aa1ab141a4ec8..4c964bf1b3ac4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -26,6 +26,7 @@ import org.scalatest.{BeforeAndAfter, PrivateMethodTester}
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.{DataFrame, Row}
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.execution.DataSourceScanExec
 import org.apache.spark.sql.execution.command.ExplainCommand
 import org.apache.spark.sql.execution.datasources.LogicalRelation
@@ -890,4 +891,13 @@ class JDBCSuite extends SparkFunSuite
     assert(sql("SELECT * FROM mixedCaseCols WHERE Id = 1 OR Name = 'mary'").collect().size == 2)
     assert(sql("SELECT * FROM mixedCaseCols WHERE Name = 'mary' AND Id = 2").collect().size == 1)
   }
+
+  test("SPARK-18419: Fix `asConnectionProperties` to filter case-insensitively") {
+    val parameters = Map(
+      "url" -> "jdbc:mysql://localhost:3306/temp",
+      "dbtable" -> "t1",
+      "numPartitions" -> "10")
+    assert(new JDBCOptions(parameters).asConnectionProperties.isEmpty)
+    assert(new JDBCOptions(new CaseInsensitiveMap(parameters)).asConnectionProperties.isEmpty)
+  }
 }

From e374b2426114d841e1935719f6e21919475f6804 Mon Sep 17 00:00:00 2001
From: Eric Liang <ekl@databricks.com>
Date: Fri, 2 Dec 2016 21:59:02 +0800
Subject: [PATCH 0265/1204] [SPARK-18659][SQL] Incorrect behaviors in overwrite
 table for datasource tables

## What changes were proposed in this pull request?

Two bugs are addressed here
1. INSERT OVERWRITE TABLE sometime crashed when catalog partition management was enabled. This was because when dropping partitions after an overwrite operation, the Hive client will attempt to delete the partition files. If the entire partition directory was dropped, this would fail. The PR fixes this by adding a flag to control whether the Hive client should attempt to delete files.
2. The static partition spec for OVERWRITE TABLE was not correctly resolved to the case-sensitive original partition names. This resulted in the entire table being overwritten if you did not correctly capitalize your partition names.

cc yhuai cloud-fan

## How was this patch tested?

Unit tests. Surprisingly, the existing overwrite table tests did not catch these edge cases.

Author: Eric Liang <ekl@databricks.com>

Closes #16088 from ericl/spark-18659.

(cherry picked from commit 7935c8470c5c162ef7213e394fe8588e5dd42ca2)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/catalog/ExternalCatalog.scala    |  3 +-
 .../catalyst/catalog/InMemoryCatalog.scala    | 10 ++++--
 .../sql/catalyst/catalog/SessionCatalog.scala |  5 +--
 .../catalog/ExternalCatalogSuite.scala        | 21 ++++++-----
 .../catalog/SessionCatalogSuite.scala         | 27 +++++++++-----
 .../spark/sql/execution/SparkSqlParser.scala  |  5 +--
 .../spark/sql/execution/command/ddl.scala     |  6 ++--
 .../datasources/DataSourceStrategy.scala      | 13 +++++--
 .../execution/command/DDLCommandSuite.scala   |  3 +-
 .../spark/sql/hive/HiveExternalCatalog.scala  |  6 ++--
 .../spark/sql/hive/client/HiveClient.scala    |  3 +-
 .../sql/hive/client/HiveClientImpl.scala      |  6 ++--
 .../PartitionProviderCompatibilitySuite.scala | 35 +++++++++++++++++++
 .../spark/sql/hive/client/VersionsSuite.scala |  4 +--
 14 files changed, 110 insertions(+), 37 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
index 14dd707fa0f1c..259008f183b56 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
@@ -154,7 +154,8 @@ abstract class ExternalCatalog {
       table: String,
       parts: Seq[TablePartitionSpec],
       ignoreIfNotExists: Boolean,
-      purge: Boolean): Unit
+      purge: Boolean,
+      retainData: Boolean): Unit
 
   /**
    * Override the specs of one or many existing table partitions, assuming they exist.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
index a3ffeaa63f690..880a7a0dc4225 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
@@ -385,7 +385,8 @@ class InMemoryCatalog(
       table: String,
       partSpecs: Seq[TablePartitionSpec],
       ignoreIfNotExists: Boolean,
-      purge: Boolean): Unit = synchronized {
+      purge: Boolean,
+      retainData: Boolean): Unit = synchronized {
     requireTableExists(db, table)
     val existingParts = catalog(db).tables(table).partitions
     if (!ignoreIfNotExists) {
@@ -395,7 +396,12 @@ class InMemoryCatalog(
       }
     }
 
-    val shouldRemovePartitionLocation = getTable(db, table).tableType == CatalogTableType.MANAGED
+    val shouldRemovePartitionLocation = if (retainData) {
+      false
+    } else {
+      getTable(db, table).tableType == CatalogTableType.MANAGED
+    }
+
     // TODO: we should follow hive to roll back if one partition path failed to delete, and support
     // partial partition spec.
     partSpecs.foreach { p =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 0b6a91fff71fe..da3a2079f42d3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -687,13 +687,14 @@ class SessionCatalog(
       tableName: TableIdentifier,
       specs: Seq[TablePartitionSpec],
       ignoreIfNotExists: Boolean,
-      purge: Boolean): Unit = {
+      purge: Boolean,
+      retainData: Boolean): Unit = {
     val db = formatDatabaseName(tableName.database.getOrElse(getCurrentDatabase))
     val table = formatTableName(tableName.table)
     requireDbExists(db)
     requireTableExists(TableIdentifier(table, Option(db)))
     requirePartialMatchedPartitionSpec(specs, getTableMetadata(tableName))
-    externalCatalog.dropPartitions(db, table, specs, ignoreIfNotExists, purge)
+    externalCatalog.dropPartitions(db, table, specs, ignoreIfNotExists, purge, retainData)
   }
 
   /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
index 303a8662d3f4d..3b39f420af494 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
@@ -361,13 +361,14 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
     val catalog = newBasicCatalog()
     assert(catalogPartitionsEqual(catalog, "db2", "tbl2", Seq(part1, part2)))
     catalog.dropPartitions(
-      "db2", "tbl2", Seq(part1.spec), ignoreIfNotExists = false, purge = false)
+      "db2", "tbl2", Seq(part1.spec), ignoreIfNotExists = false, purge = false, retainData = false)
     assert(catalogPartitionsEqual(catalog, "db2", "tbl2", Seq(part2)))
     resetState()
     val catalog2 = newBasicCatalog()
     assert(catalogPartitionsEqual(catalog2, "db2", "tbl2", Seq(part1, part2)))
     catalog2.dropPartitions(
-      "db2", "tbl2", Seq(part1.spec, part2.spec), ignoreIfNotExists = false, purge = false)
+      "db2", "tbl2", Seq(part1.spec, part2.spec), ignoreIfNotExists = false, purge = false,
+      retainData = false)
     assert(catalog2.listPartitions("db2", "tbl2").isEmpty)
   }
 
@@ -375,11 +376,13 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
     val catalog = newBasicCatalog()
     intercept[AnalysisException] {
       catalog.dropPartitions(
-        "does_not_exist", "tbl1", Seq(), ignoreIfNotExists = false, purge = false)
+        "does_not_exist", "tbl1", Seq(), ignoreIfNotExists = false, purge = false,
+        retainData = false)
     }
     intercept[AnalysisException] {
       catalog.dropPartitions(
-        "db2", "does_not_exist", Seq(), ignoreIfNotExists = false, purge = false)
+        "db2", "does_not_exist", Seq(), ignoreIfNotExists = false, purge = false,
+        retainData = false)
     }
   }
 
@@ -387,10 +390,11 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
     val catalog = newBasicCatalog()
     intercept[AnalysisException] {
       catalog.dropPartitions(
-        "db2", "tbl2", Seq(part3.spec), ignoreIfNotExists = false, purge = false)
+        "db2", "tbl2", Seq(part3.spec), ignoreIfNotExists = false, purge = false,
+        retainData = false)
     }
     catalog.dropPartitions(
-      "db2", "tbl2", Seq(part3.spec), ignoreIfNotExists = true, purge = false)
+      "db2", "tbl2", Seq(part3.spec), ignoreIfNotExists = true, purge = false, retainData = false)
   }
 
   test("get partition") {
@@ -713,7 +717,7 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
     assert(exists(tableLocation, "partCol1=5", "partCol2=6"))
 
     catalog.dropPartitions("db1", "tbl", Seq(part2.spec, part3.spec), ignoreIfNotExists = false,
-      purge = false)
+      purge = false, retainData = false)
     assert(!exists(tableLocation, "partCol1=3", "partCol2=4"))
     assert(!exists(tableLocation, "partCol1=5", "partCol2=6"))
 
@@ -745,7 +749,8 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
     val fs = partPath.getFileSystem(new Configuration)
     assert(fs.exists(partPath))
 
-    catalog.dropPartitions("db2", "tbl1", Seq(part1.spec), ignoreIfNotExists = false, purge = false)
+    catalog.dropPartitions(
+      "db2", "tbl1", Seq(part1.spec), ignoreIfNotExists = false, purge = false, retainData = false)
     assert(fs.exists(partPath))
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index 3f27160d63934..f9c4b2687bf7a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -618,7 +618,8 @@ class SessionCatalogSuite extends SparkFunSuite {
       TableIdentifier("tbl2", Some("db2")),
       Seq(part1.spec),
       ignoreIfNotExists = false,
-      purge = false)
+      purge = false,
+      retainData = false)
     assert(catalogPartitionsEqual(externalCatalog.listPartitions("db2", "tbl2"), part2))
     // Drop partitions without explicitly specifying database
     sessionCatalog.setCurrentDatabase("db2")
@@ -626,7 +627,8 @@ class SessionCatalogSuite extends SparkFunSuite {
       TableIdentifier("tbl2"),
       Seq(part2.spec),
       ignoreIfNotExists = false,
-      purge = false)
+      purge = false,
+      retainData = false)
     assert(externalCatalog.listPartitions("db2", "tbl2").isEmpty)
     // Drop multiple partitions at once
     sessionCatalog.createPartitions(
@@ -636,7 +638,8 @@ class SessionCatalogSuite extends SparkFunSuite {
       TableIdentifier("tbl2", Some("db2")),
       Seq(part1.spec, part2.spec),
       ignoreIfNotExists = false,
-      purge = false)
+      purge = false,
+      retainData = false)
     assert(externalCatalog.listPartitions("db2", "tbl2").isEmpty)
   }
 
@@ -647,14 +650,16 @@ class SessionCatalogSuite extends SparkFunSuite {
         TableIdentifier("tbl1", Some("unknown_db")),
         Seq(),
         ignoreIfNotExists = false,
-        purge = false)
+        purge = false,
+        retainData = false)
     }
     intercept[NoSuchTableException] {
       catalog.dropPartitions(
         TableIdentifier("does_not_exist", Some("db2")),
         Seq(),
         ignoreIfNotExists = false,
-        purge = false)
+        purge = false,
+        retainData = false)
     }
   }
 
@@ -665,13 +670,15 @@ class SessionCatalogSuite extends SparkFunSuite {
         TableIdentifier("tbl2", Some("db2")),
         Seq(part3.spec),
         ignoreIfNotExists = false,
-        purge = false)
+        purge = false,
+        retainData = false)
     }
     catalog.dropPartitions(
       TableIdentifier("tbl2", Some("db2")),
       Seq(part3.spec),
       ignoreIfNotExists = true,
-      purge = false)
+      purge = false,
+      retainData = false)
   }
 
   test("drop partitions with invalid partition spec") {
@@ -681,7 +688,8 @@ class SessionCatalogSuite extends SparkFunSuite {
         TableIdentifier("tbl2", Some("db2")),
         Seq(partWithMoreColumns.spec),
         ignoreIfNotExists = false,
-        purge = false)
+        purge = false,
+        retainData = false)
     }
     assert(e.getMessage.contains(
       "Partition spec is invalid. The spec (a, b, c) must be contained within " +
@@ -691,7 +699,8 @@ class SessionCatalogSuite extends SparkFunSuite {
         TableIdentifier("tbl2", Some("db2")),
         Seq(partWithUnknownColumns.spec),
         ignoreIfNotExists = false,
-        purge = false)
+        purge = false,
+        retainData = false)
     }
     assert(e.getMessage.contains(
       "Partition spec is invalid. The spec (a, unknown) must be contained within " +
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 5f89a229d6242..7a659ea151822 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -833,8 +833,9 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
     AlterTableDropPartitionCommand(
       visitTableIdentifier(ctx.tableIdentifier),
       ctx.partitionSpec.asScala.map(visitNonOptionalPartitionSpec),
-      ctx.EXISTS != null,
-      ctx.PURGE != null)
+      ifExists = ctx.EXISTS != null,
+      purge = ctx.PURGE != null,
+      retainData = false)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 0f126d0200eff..c62c14200c24a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -421,7 +421,8 @@ case class AlterTableDropPartitionCommand(
     tableName: TableIdentifier,
     specs: Seq[TablePartitionSpec],
     ifExists: Boolean,
-    purge: Boolean)
+    purge: Boolean,
+    retainData: Boolean)
   extends RunnableCommand {
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
@@ -439,7 +440,8 @@ case class AlterTableDropPartitionCommand(
     }
 
     catalog.dropPartitions(
-      table.identifier, normalizedSpecs, ignoreIfNotExists = ifExists, purge = purge)
+      table.identifier, normalizedSpecs, ignoreIfNotExists = ifExists, purge = purge,
+      retainData = retainData)
     Seq.empty[Row]
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index f3d92bf7cc245..4468dc58e404a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -217,16 +217,25 @@ case class DataSourceAnalysis(conf: CatalystConf) extends Rule[LogicalPlan] {
             if (deletedPartitions.nonEmpty) {
               AlterTableDropPartitionCommand(
                 l.catalogTable.get.identifier, deletedPartitions.toSeq,
-                ifExists = true, purge = true).run(t.sparkSession)
+                ifExists = true, purge = false,
+                retainData = true /* already deleted */).run(t.sparkSession)
             }
           }
         }
         t.location.refresh()
       }
 
+      val staticPartitionKeys: TablePartitionSpec = if (overwrite.enabled) {
+        overwrite.staticPartitionKeys.map { case (k, v) =>
+          (partitionSchema.map(_.name).find(_.equalsIgnoreCase(k)).get, v)
+        }
+      } else {
+        Map.empty
+      }
+
       val insertCmd = InsertIntoHadoopFsRelationCommand(
         outputPath,
-        if (overwrite.enabled) overwrite.staticPartitionKeys else Map.empty,
+        staticPartitionKeys,
         customPartitionLocations,
         partitionSchema,
         t.bucketSpec,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
index d31e7aeb3a78a..5ef5f8ee77418 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
@@ -615,7 +615,8 @@ class DDLCommandSuite extends PlanTest {
         Map("dt" -> "2008-08-08", "country" -> "us"),
         Map("dt" -> "2009-09-09", "country" -> "uk")),
       ifExists = true,
-      purge = false)
+      purge = false,
+      retainData = false)
     val expected2_table = expected1_table.copy(ifExists = false)
     val expected1_purge = expected1_table.copy(purge = true)
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 065883234a780..c213e8e0b22e6 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -850,9 +850,11 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       table: String,
       parts: Seq[TablePartitionSpec],
       ignoreIfNotExists: Boolean,
-      purge: Boolean): Unit = withClient {
+      purge: Boolean,
+      retainData: Boolean): Unit = withClient {
     requireTableExists(db, table)
-    client.dropPartitions(db, table, parts.map(lowerCasePartitionSpec), ignoreIfNotExists, purge)
+    client.dropPartitions(
+      db, table, parts.map(lowerCasePartitionSpec), ignoreIfNotExists, purge, retainData)
   }
 
   override def renamePartitions(
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
index 569a9c11398ea..4c76932b61758 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
@@ -125,7 +125,8 @@ private[hive] trait HiveClient {
       table: String,
       specs: Seq[TablePartitionSpec],
       ignoreIfNotExists: Boolean,
-      purge: Boolean): Unit
+      purge: Boolean,
+      retainData: Boolean): Unit
 
   /**
    * Rename one or many existing table partitions, assuming they exist.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 590029a517e09..bd840af5b1649 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -453,7 +453,8 @@ private[hive] class HiveClientImpl(
       table: String,
       specs: Seq[TablePartitionSpec],
       ignoreIfNotExists: Boolean,
-      purge: Boolean): Unit = withHiveState {
+      purge: Boolean,
+      retainData: Boolean): Unit = withHiveState {
     // TODO: figure out how to drop multiple partitions in one call
     val hiveTable = client.getTable(db, table, true /* throw exception */)
     // do the check at first and collect all the matching partitions
@@ -473,8 +474,7 @@ private[hive] class HiveClientImpl(
     var droppedParts = ArrayBuffer.empty[java.util.List[String]]
     matchingParts.foreach { partition =>
       try {
-        val deleteData = true
-        shim.dropPartition(client, db, table, partition, deleteData, purge)
+        shim.dropPartition(client, db, table, partition, !retainData, purge)
       } catch {
         case e: Exception =>
           val remainingParts = matchingParts.toBuffer -- droppedParts
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
index e8e4238d1c5a4..c2ac032760780 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
@@ -259,6 +259,41 @@ class PartitionProviderCompatibilitySuite
         }
       }
     }
+
+    test(s"SPARK-18659 insert overwrite table files - partition management $enabled") {
+      withSQLConf(SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key -> enabled.toString) {
+        withTable("test") {
+          spark.range(10)
+            .selectExpr("id", "id as A", "'x' as B")
+            .write.partitionBy("A", "B").mode("overwrite")
+            .saveAsTable("test")
+          spark.sql("insert overwrite table test select id, id, 'x' from range(1)")
+          assert(spark.sql("select * from test").count() == 1)
+
+          spark.range(10)
+            .selectExpr("id", "id as A", "'x' as B")
+            .write.partitionBy("A", "B").mode("overwrite")
+            .saveAsTable("test")
+          spark.sql(
+            "insert overwrite table test partition (A, B) select id, id, 'x' from range(1)")
+          assert(spark.sql("select * from test").count() == 1)
+        }
+      }
+    }
+
+    test(s"SPARK-18659 insert overwrite table with lowercase - partition management $enabled") {
+      withSQLConf(SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key -> enabled.toString) {
+        withTable("test") {
+          spark.range(10)
+            .selectExpr("id", "id as A", "'x' as B")
+            .write.partitionBy("A", "B").mode("overwrite")
+            .saveAsTable("test")
+          // note that 'A', 'B' are lowercase instead of their original case here
+          spark.sql("insert overwrite table test partition (a=1, b) select id, 'x' from range(1)")
+          assert(spark.sql("select * from test").count() == 10)
+        }
+      }
+    }
   }
 
   /**
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
index 081b0ed9bd688..16ae345de6d95 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -352,13 +352,13 @@ class VersionsSuite extends SparkFunSuite with Logging {
       // with a version that is older than the minimum (1.2 in this case).
       try {
         client.dropPartitions("default", "src_part", Seq(spec), ignoreIfNotExists = true,
-          purge = true)
+          purge = true, retainData = false)
         assert(!versionsWithoutPurge.contains(version))
       } catch {
         case _: UnsupportedOperationException =>
           assert(versionsWithoutPurge.contains(version))
           client.dropPartitions("default", "src_part", Seq(spec), ignoreIfNotExists = true,
-            purge = false)
+            purge = false, retainData = false)
       }
 
       assert(client.getPartitionOption("default", "src_part", spec).isEmpty)

From 32c85383bfd6210e96b4bbcdedbe27a88935e4c7 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Fri, 2 Dec 2016 22:12:19 +0800
Subject: [PATCH 0266/1204] [SPARK-18674][SQL][FOLLOW-UP] improve the error
 message of using join

### What changes were proposed in this pull request?
Added a test case for using joins with nested fields.

### How was this patch tested?
N/A

Author: gatorsmile <gatorsmile@gmail.com>

Closes #16110 from gatorsmile/followup-18674.

(cherry picked from commit 2f8776ccad532fbed17381ff97d302007918b8d8)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/analysis/Analyzer.scala   |  8 ++++----
 .../analysis/ResolveNaturalJoinSuite.scala       | 16 +++++++++++++---
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 372a121993758..fec42eedf98ab 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1915,14 +1915,14 @@ class Analyzer(
       condition: Option[Expression]) = {
     val leftKeys = joinNames.map { keyName =>
       left.output.find(attr => resolver(attr.name, keyName)).getOrElse {
-        throw new AnalysisException(s"USING column `$keyName` can not be resolved with the " +
-          s"left join side, the left output is: [${left.output.map(_.name).mkString(", ")}]")
+        throw new AnalysisException(s"USING column `$keyName` cannot be resolved on the left " +
+          s"side of the join. The left-side columns: [${left.output.map(_.name).mkString(", ")}]")
       }
     }
     val rightKeys = joinNames.map { keyName =>
       right.output.find(attr => resolver(attr.name, keyName)).getOrElse {
-        throw new AnalysisException(s"USING column `$keyName` can not be resolved with the " +
-          s"right join side, the right output is: [${right.output.map(_.name).mkString(", ")}]")
+        throw new AnalysisException(s"USING column `$keyName` cannot be resolved on the right " +
+          s"side of the join. The right-side columns: [${right.output.map(_.name).mkString(", ")}]")
       }
     }
     val joinPairs = leftKeys.zip(rightKeys)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveNaturalJoinSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveNaturalJoinSuite.scala
index 1421d36fdb2a3..e449b9669cc72 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveNaturalJoinSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveNaturalJoinSuite.scala
@@ -28,6 +28,7 @@ class ResolveNaturalJoinSuite extends AnalysisTest {
   lazy val a = 'a.string
   lazy val b = 'b.string
   lazy val c = 'c.string
+  lazy val d = 'd.struct('f1.int, 'f2.long)
   lazy val aNotNull = a.notNull
   lazy val bNotNull = b.notNull
   lazy val cNotNull = c.notNull
@@ -35,6 +36,8 @@ class ResolveNaturalJoinSuite extends AnalysisTest {
   lazy val r2 = LocalRelation(c, a)
   lazy val r3 = LocalRelation(aNotNull, bNotNull)
   lazy val r4 = LocalRelation(cNotNull, bNotNull)
+  lazy val r5 = LocalRelation(d)
+  lazy val r6 = LocalRelation(d)
 
   test("natural/using inner join") {
     val naturalPlan = r1.join(r2, NaturalJoin(Inner), None)
@@ -108,10 +111,10 @@ class ResolveNaturalJoinSuite extends AnalysisTest {
   test("using unresolved attribute") {
     assertAnalysisError(
       r1.join(r2, UsingJoin(Inner, Seq("d"))),
-      "USING column `d` can not be resolved with the left join side" :: Nil)
+      "USING column `d` cannot be resolved on the left side of the join" :: Nil)
     assertAnalysisError(
       r1.join(r2, UsingJoin(Inner, Seq("b"))),
-      "USING column `b` can not be resolved with the right join side" :: Nil)
+      "USING column `b` cannot be resolved on the right side of the join" :: Nil)
   }
 
   test("using join with a case sensitive analyzer") {
@@ -122,7 +125,14 @@ class ResolveNaturalJoinSuite extends AnalysisTest {
 
     assertAnalysisError(
       r1.join(r2, UsingJoin(Inner, Seq("A"))),
-      "USING column `A` can not be resolved with the left join side" :: Nil)
+      "USING column `A` cannot be resolved on the left side of the join" :: Nil)
+  }
+
+  test("using join on nested fields") {
+    assertAnalysisError(
+      r5.join(r6, UsingJoin(Inner, Seq("d.f1"))),
+      "USING column `d.f1` cannot be resolved on the left side of the join. " +
+        "The left-side columns: [d]" :: Nil)
   }
 
   test("using join with a case insensitive analyzer") {

From c69825a98989ee975dc8b87979e29e0fff15a3f7 Mon Sep 17 00:00:00 2001
From: Ryan Blue <blue@apache.org>
Date: Fri, 2 Dec 2016 08:41:40 -0800
Subject: [PATCH 0267/1204] [SPARK-18677] Fix parsing ['key'] in JSON path
 expressions.

## What changes were proposed in this pull request?

This fixes the parser rule to match named expressions, which doesn't work for two reasons:
1. The name match is not coerced to a regular expression (missing .r)
2. The surrounding literals are incorrect and attempt to escape a single quote, which is unnecessary

## How was this patch tested?

This adds test cases for named expressions using the bracket syntax, including one with quoted spaces.

Author: Ryan Blue <blue@apache.org>

Closes #16107 from rdblue/SPARK-18677-fix-json-path.

(cherry picked from commit 48778976e0566d9c93a8c900825def82c6b81fd6)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../expressions/jsonExpressions.scala         |  2 +-
 .../expressions/JsonExpressionsSuite.scala    | 24 +++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index b61583d0dafb6..667ff649d1297 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -69,7 +69,7 @@ private[this] object JsonPathParser extends RegexParsers {
   // parse `.name` or `['name']` child expressions
   def named: Parser[List[PathInstruction]] =
     for {
-      name <- '.' ~> "[^\\.\\[]+".r | "[\\'" ~> "[^\\'\\?]+" <~ "\\']"
+      name <- '.' ~> "[^\\.\\[]+".r | "['" ~> "[^\\'\\?]+".r <~ "']"
     } yield {
       Key :: Named(name) :: Nil
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
index 3b0e90824b766..618b8b29e8ee5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
@@ -43,6 +43,30 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       """{"price":19.95,"color":"red"}""")
   }
 
+  test("$['store'].bicycle") {
+    checkEvaluation(
+      GetJsonObject(Literal(json), Literal("$['store'].bicycle")),
+      """{"price":19.95,"color":"red"}""")
+  }
+
+  test("$.store['bicycle']") {
+    checkEvaluation(
+      GetJsonObject(Literal(json), Literal("$.store['bicycle']")),
+      """{"price":19.95,"color":"red"}""")
+  }
+
+  test("$['store']['bicycle']") {
+    checkEvaluation(
+      GetJsonObject(Literal(json), Literal("$['store']['bicycle']")),
+      """{"price":19.95,"color":"red"}""")
+  }
+
+  test("$['key with spaces']") {
+    checkEvaluation(GetJsonObject(
+      Literal("""{ "key with spaces": "it works" }"""), Literal("$['key with spaces']")),
+      "it works")
+  }
+
   test("$.store.book") {
     checkEvaluation(
       GetJsonObject(Literal(json), Literal("$.store.book")),

From f915f8128bd47b9d668065f848d5d437365e564a Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Fri, 2 Dec 2016 12:16:57 -0800
Subject: [PATCH 0268/1204] [SPARK-18291][SPARKR][ML] Revert
 "[SPARK-18291][SPARKR][ML] SparkR glm predict should output original label
 when family = binomial."

## What changes were proposed in this pull request?
It's better we can fix this issue by providing an option ```type``` for users to change the ```predict``` output schema, then they could output probabilities, log-space predictions, or original labels. In order to not involve breaking API change for 2.1, so revert this change firstly and will add it back after [SPARK-18618](https://issues.apache.org/jira/browse/SPARK-18618) resolved.

## How was this patch tested?
Existing unit tests.

This reverts commit daa975f4bfa4f904697bf3365a4be9987032e490.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #16118 from yanboliang/spark-18291-revert.

(cherry picked from commit a985dd8e99d2663a3cb4745c675fa2057aa67155)
Signed-off-by: Joseph K. Bradley <joseph@databricks.com>
---
 R/pkg/inst/tests/testthat/test_mllib.R        | 20 ++---
 .../GeneralizedLinearRegressionWrapper.scala  | 78 ++-----------------
 2 files changed, 12 insertions(+), 86 deletions(-)

diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R
index c8f062d8ac5dc..07e812fd98013 100644
--- a/R/pkg/inst/tests/testthat/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -64,16 +64,6 @@ test_that("spark.glm and predict", {
   rVals <- predict(glm(Sepal.Width ~ Sepal.Length + Species, data = iris), iris)
   expect_true(all(abs(rVals - vals) < 1e-6), rVals - vals)
 
-  # binomial family
-  binomialTraining <- training[training$Species %in% c("versicolor", "virginica"), ]
-  model <- spark.glm(binomialTraining, Species ~ Sepal_Length + Sepal_Width,
-    family = binomial(link = "logit"))
-  prediction <- predict(model, binomialTraining)
-  expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), "character")
-  expected <- c("virginica", "virginica", "virginica", "versicolor", "virginica",
-    "versicolor", "virginica", "versicolor", "virginica", "versicolor")
-  expect_equal(as.list(take(select(prediction, "prediction"), 10))[[1]], expected)
-
   # poisson family
   model <- spark.glm(training, Sepal_Width ~ Sepal_Length + Species,
   family = poisson(link = identity))
@@ -138,10 +128,10 @@ test_that("spark.glm summary", {
   expect_equal(stats$aic, rStats$aic)
 
   # Test spark.glm works with weighted dataset
-  a1 <- c(0, 1, 2, 3, 4)
-  a2 <- c(5, 2, 1, 3, 2)
-  w <- c(1, 2, 3, 4, 5)
-  b <- c(1, 0, 1, 0, 0)
+  a1 <- c(0, 1, 2, 3)
+  a2 <- c(5, 2, 1, 3)
+  w <- c(1, 2, 3, 4)
+  b <- c(1, 0, 1, 0)
   data <- as.data.frame(cbind(a1, a2, w, b))
   df <- suppressWarnings(createDataFrame(data))
 
@@ -168,7 +158,7 @@ test_that("spark.glm summary", {
   data <- as.data.frame(cbind(a1, a2, b))
   df <- suppressWarnings(createDataFrame(data))
   regStats <- summary(spark.glm(df, b ~ a1 + a2, regParam = 1.0))
-  expect_equal(regStats$aic, 14.00976, tolerance = 1e-4) # 14.00976 is from summary() result
+  expect_equal(regStats$aic, 13.32836, tolerance = 1e-4) # 13.32836 is from summary() result
 
   # Test spark.glm works on collinear data
   A <- matrix(c(1, 2, 3, 4, 2, 4, 6, 8), 4, 2)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala
index 8bcc9fe5d1b85..78f401f29b004 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/GeneralizedLinearRegressionWrapper.scala
@@ -23,17 +23,12 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.ml.{Pipeline, PipelineModel}
-import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, NominalAttribute}
-import org.apache.spark.ml.feature.{IndexToString, RFormula}
-import org.apache.spark.ml.regression._
-import org.apache.spark.ml.Transformer
-import org.apache.spark.ml.param.ParamMap
-import org.apache.spark.ml.param.shared._
+import org.apache.spark.ml.attribute.AttributeGroup
+import org.apache.spark.ml.feature.RFormula
 import org.apache.spark.ml.r.RWrapperUtils._
+import org.apache.spark.ml.regression._
 import org.apache.spark.ml.util._
 import org.apache.spark.sql._
-import org.apache.spark.sql.functions._
-import org.apache.spark.sql.types._
 
 private[r] class GeneralizedLinearRegressionWrapper private (
     val pipeline: PipelineModel,
@@ -48,8 +43,6 @@ private[r] class GeneralizedLinearRegressionWrapper private (
     val rNumIterations: Int,
     val isLoaded: Boolean = false) extends MLWritable {
 
-  import GeneralizedLinearRegressionWrapper._
-
   private val glm: GeneralizedLinearRegressionModel =
     pipeline.stages(1).asInstanceOf[GeneralizedLinearRegressionModel]
 
@@ -60,16 +53,7 @@ private[r] class GeneralizedLinearRegressionWrapper private (
   def residuals(residualsType: String): DataFrame = glm.summary.residuals(residualsType)
 
   def transform(dataset: Dataset[_]): DataFrame = {
-    if (rFamily == "binomial") {
-      pipeline.transform(dataset)
-        .drop(PREDICTED_LABEL_PROB_COL)
-        .drop(PREDICTED_LABEL_INDEX_COL)
-        .drop(glm.getFeaturesCol)
-        .drop(glm.getLabelCol)
-    } else {
-      pipeline.transform(dataset)
-        .drop(glm.getFeaturesCol)
-    }
+    pipeline.transform(dataset).drop(glm.getFeaturesCol)
   }
 
   override def write: MLWriter =
@@ -79,10 +63,6 @@ private[r] class GeneralizedLinearRegressionWrapper private (
 private[r] object GeneralizedLinearRegressionWrapper
   extends MLReadable[GeneralizedLinearRegressionWrapper] {
 
-  val PREDICTED_LABEL_PROB_COL = "pred_label_prob"
-  val PREDICTED_LABEL_INDEX_COL = "pred_label_idx"
-  val PREDICTED_LABEL_COL = "prediction"
-
   def fit(
       formula: String,
       data: DataFrame,
@@ -93,7 +73,6 @@ private[r] object GeneralizedLinearRegressionWrapper
       weightCol: String,
       regParam: Double): GeneralizedLinearRegressionWrapper = {
     val rFormula = new RFormula().setFormula(formula)
-    if (family == "binomial") rFormula.setForceIndexLabel(true)
     checkDataColumns(rFormula, data)
     val rFormulaModel = rFormula.fit(data)
     // get labels and feature names from output schema
@@ -111,28 +90,9 @@ private[r] object GeneralizedLinearRegressionWrapper
       .setWeightCol(weightCol)
       .setRegParam(regParam)
       .setFeaturesCol(rFormula.getFeaturesCol)
-      .setLabelCol(rFormula.getLabelCol)
-    val pipeline = if (family == "binomial") {
-      // Convert prediction from probability to label index.
-      val probToPred = new ProbabilityToPrediction()
-        .setInputCol(PREDICTED_LABEL_PROB_COL)
-        .setOutputCol(PREDICTED_LABEL_INDEX_COL)
-      // Convert prediction from label index to original label.
-      val labelAttr = Attribute.fromStructField(schema(rFormulaModel.getLabelCol))
-        .asInstanceOf[NominalAttribute]
-      val labels = labelAttr.values.get
-      val idxToStr = new IndexToString()
-        .setInputCol(PREDICTED_LABEL_INDEX_COL)
-        .setOutputCol(PREDICTED_LABEL_COL)
-        .setLabels(labels)
-
-      new Pipeline()
-        .setStages(Array(rFormulaModel, glr.setPredictionCol(PREDICTED_LABEL_PROB_COL),
-          probToPred, idxToStr))
-        .fit(data)
-    } else {
-      new Pipeline().setStages(Array(rFormulaModel, glr)).fit(data)
-    }
+    val pipeline = new Pipeline()
+      .setStages(Array(rFormulaModel, glr))
+      .fit(data)
 
     val glm: GeneralizedLinearRegressionModel =
       pipeline.stages(1).asInstanceOf[GeneralizedLinearRegressionModel]
@@ -248,27 +208,3 @@ private[r] object GeneralizedLinearRegressionWrapper
     }
   }
 }
-
-/**
- * This utility transformer converts the predicted value of GeneralizedLinearRegressionModel
- * with "binomial" family from probability to prediction according to threshold 0.5.
- */
-private[r] class ProbabilityToPrediction private[r] (override val uid: String)
-  extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable {
-
-  def this() = this(Identifiable.randomUID("probToPred"))
-
-  def setInputCol(value: String): this.type = set(inputCol, value)
-
-  def setOutputCol(value: String): this.type = set(outputCol, value)
-
-  override def transformSchema(schema: StructType): StructType = {
-    StructType(schema.fields :+ StructField($(outputCol), DoubleType))
-  }
-
-  override def transform(dataset: Dataset[_]): DataFrame = {
-    dataset.withColumn($(outputCol), round(col($(inputCol))))
-  }
-
-  override def copy(extra: ParamMap): ProbabilityToPrediction = defaultCopy(extra)
-}

From f53763275ae1b74925e4123dd87f567798f16ba1 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Fri, 2 Dec 2016 12:42:47 -0800
Subject: [PATCH 0269/1204] [SPARK-18670][SS] Limit the number of
 StreamingQueryListener.StreamProgressEvent when there is no data

## What changes were proposed in this pull request?

This PR adds a sql conf `spark.sql.streaming.noDataReportInterval` to control how long to wait before outputing the next StreamProgressEvent when there is no data.

## How was this patch tested?

The added unit test.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16108 from zsxwing/SPARK-18670.

(cherry picked from commit 56a503df5ccbb233ad6569e22002cc989e676337)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../execution/streaming/StreamExecution.scala | 18 +++++++-
 .../apache/spark/sql/internal/SQLConf.scala   | 10 +++++
 .../StreamingQueryListenerSuite.scala         | 44 +++++++++++++++++++
 3 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 6d0e269d341ee..8804c647a75c3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -63,6 +63,9 @@ class StreamExecution(
 
   private val pollingDelayMs = sparkSession.sessionState.conf.streamingPollingDelay
 
+  private val noDataProgressEventInterval =
+    sparkSession.sessionState.conf.streamingNoDataProgressEventInterval
+
   /**
    * A lock used to wait/notify when batches complete. Use a fair lock to avoid thread starvation.
    */
@@ -196,6 +199,9 @@ class StreamExecution(
       // While active, repeatedly attempt to run batches.
       SparkSession.setActiveSession(sparkSession)
 
+      // The timestamp we report an event that has no input data
+      var lastNoDataProgressEventTime = Long.MinValue
+
       triggerExecutor.execute(() => {
         startTrigger()
 
@@ -218,7 +224,17 @@ class StreamExecution(
 
             // Report trigger as finished and construct progress object.
             finishTrigger(dataAvailable)
-            postEvent(new QueryProgressEvent(lastProgress))
+            if (dataAvailable) {
+              // Reset noDataEventTimestamp if we processed any data
+              lastNoDataProgressEventTime = Long.MinValue
+              postEvent(new QueryProgressEvent(lastProgress))
+            } else {
+              val now = triggerClock.getTimeMillis()
+              if (now - noDataProgressEventInterval >= lastNoDataProgressEventTime) {
+                lastNoDataProgressEventTime = now
+                postEvent(new QueryProgressEvent(lastProgress))
+              }
+            }
 
             if (dataAvailable) {
               // We'll increase currentBatchId after we complete processing current batch's data
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 21b26b81467fe..581f99e9c155c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -577,6 +577,13 @@ object SQLConf {
       .timeConf(TimeUnit.MILLISECONDS)
       .createWithDefault(10L)
 
+  val STREAMING_NO_DATA_PROGRESS_EVENT_INTERVAL =
+    SQLConfigBuilder("spark.sql.streaming.noDataProgressEventInterval")
+      .internal()
+      .doc("How long to wait between two progress events when there is no data")
+      .timeConf(TimeUnit.MILLISECONDS)
+      .createWithDefault(10000L)
+
   val STREAMING_METRICS_ENABLED =
     SQLConfigBuilder("spark.sql.streaming.metricsEnabled")
       .doc("Whether Dropwizard/Codahale metrics will be reported for active streaming queries.")
@@ -658,6 +665,9 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging {
 
   def streamingPollingDelay: Long = getConf(STREAMING_POLLING_DELAY)
 
+  def streamingNoDataProgressEventInterval: Long =
+    getConf(STREAMING_NO_DATA_PROGRESS_EVENT_INTERVAL)
+
   def streamingMetricsEnabled: Boolean = getConf(STREAMING_METRICS_ENABLED)
 
   def streamingProgressRetention: Int = getConf(STREAMING_PROGRESS_RETENTION)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
index 07a13a48a18c8..3086abf03cd6c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
@@ -31,6 +31,7 @@ import org.scalatest.PrivateMethodTester._
 import org.apache.spark.SparkException
 import org.apache.spark.scheduler._
 import org.apache.spark.sql.execution.streaming._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.StreamingQueryListener._
 import org.apache.spark.util.JsonProtocol
 
@@ -46,6 +47,7 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
     assert(spark.streams.active.isEmpty)
     assert(addedListeners.isEmpty)
     // Make sure we don't leak any events to the next test
+    spark.sparkContext.listenerBus.waitUntilEmpty(10000)
   }
 
   testQuietly("single listener, check trigger events are generated correctly") {
@@ -191,6 +193,48 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
     assert(queryQueryTerminated.exception === newQueryTerminated.exception)
   }
 
+  test("only one progress event per interval when no data") {
+    // This test will start a query but not push any data, and then check if we push too many events
+    withSQLConf(SQLConf.STREAMING_NO_DATA_PROGRESS_EVENT_INTERVAL.key -> "100ms") {
+      @volatile var numProgressEvent = 0
+      val listener = new StreamingQueryListener {
+        override def onQueryStarted(event: QueryStartedEvent): Unit = {}
+        override def onQueryProgress(event: QueryProgressEvent): Unit = {
+          numProgressEvent += 1
+        }
+        override def onQueryTerminated(event: QueryTerminatedEvent): Unit = {}
+      }
+      spark.streams.addListener(listener)
+      try {
+        val input = new MemoryStream[Int](0, sqlContext) {
+          @volatile var numTriggers = 0
+          override def getOffset: Option[Offset] = {
+            numTriggers += 1
+            super.getOffset
+          }
+        }
+        val clock = new StreamManualClock()
+        val actions = mutable.ArrayBuffer[StreamAction]()
+        actions += StartStream(trigger = ProcessingTime(10), triggerClock = clock)
+        for (_ <- 1 to 100) {
+          actions += AdvanceManualClock(10)
+        }
+        actions += AssertOnQuery { _ =>
+          eventually(timeout(streamingTimeout)) {
+            assert(input.numTriggers > 100) // at least 100 triggers have occurred
+          }
+          true
+        }
+        testStream(input.toDS)(actions: _*)
+        spark.sparkContext.listenerBus.waitUntilEmpty(10000)
+        // 11 is the max value of the possible numbers of events.
+        assert(numProgressEvent > 1 && numProgressEvent <= 11)
+      } finally {
+        spark.streams.removeListener(listener)
+      }
+    }
+  }
+
   testQuietly("ReplayListenerBus should ignore broken event jsons generated in 2.0.0") {
     // query-event-logs-version-2.0.0.txt has all types of events generated by
     // Structured Streaming in Spark 2.0.0.

From 839d4e9ca94b132732225632e8c50364e53579a0 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Fri, 2 Dec 2016 16:28:01 -0800
Subject: [PATCH 0270/1204] [SPARK-18324][ML][DOC] Update ML programming and
 migration guide for 2.1 release

## What changes were proposed in this pull request?
Update ML programming and migration guide for 2.1 release.

## How was this patch tested?
Doc change, no test.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #16076 from yanboliang/spark-18324.

(cherry picked from commit 2dc0d7efe3380a5763cb69ef346674a46f8e3d57)
Signed-off-by: Joseph K. Bradley <joseph@databricks.com>
---
 docs/ml-guide.md            | 150 ++++--------------------------------
 docs/ml-migration-guides.md | 147 +++++++++++++++++++++++++++++++++++
 2 files changed, 163 insertions(+), 134 deletions(-)

diff --git a/docs/ml-guide.md b/docs/ml-guide.md
index 4607ad3ba681a..ddf81be177f3d 100644
--- a/docs/ml-guide.md
+++ b/docs/ml-guide.md
@@ -60,152 +60,34 @@ MLlib is under active development.
 The APIs marked `Experimental`/`DeveloperApi` may change in future releases,
 and the migration guide below will explain all changes between releases.
 
-## From 1.6 to 2.0
+## From 2.0 to 2.1
 
 ### Breaking changes
-
-There were several breaking changes in Spark 2.0, which are outlined below.
-
-**Linear algebra classes for DataFrame-based APIs**
-
-Spark's linear algebra dependencies were moved to a new project, `mllib-local` 
-(see [SPARK-13944](https://issues.apache.org/jira/browse/SPARK-13944)). 
-As part of this change, the linear algebra classes were copied to a new package, `spark.ml.linalg`. 
-The DataFrame-based APIs in `spark.ml` now depend on the `spark.ml.linalg` classes, 
-leading to a few breaking changes, predominantly in various model classes 
-(see [SPARK-14810](https://issues.apache.org/jira/browse/SPARK-14810) for a full list).
-
-**Note:** the RDD-based APIs in `spark.mllib` continue to depend on the previous package `spark.mllib.linalg`.
-
-_Converting vectors and matrices_
-
-While most pipeline components support backward compatibility for loading, 
-some existing `DataFrames` and pipelines in Spark versions prior to 2.0, that contain vector or matrix 
-columns, may need to be migrated to the new `spark.ml` vector and matrix types. 
-Utilities for converting `DataFrame` columns from `spark.mllib.linalg` to `spark.ml.linalg` types
-(and vice versa) can be found in `spark.mllib.util.MLUtils`.
-
-There are also utility methods available for converting single instances of 
-vectors and matrices. Use the `asML` method on a `mllib.linalg.Vector` / `mllib.linalg.Matrix`
-for converting to `ml.linalg` types, and 
-`mllib.linalg.Vectors.fromML` / `mllib.linalg.Matrices.fromML` 
-for converting to `mllib.linalg` types.
-
-<div class="codetabs">
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-import org.apache.spark.mllib.util.MLUtils
-
-// convert DataFrame columns
-val convertedVecDF = MLUtils.convertVectorColumnsToML(vecDF)
-val convertedMatrixDF = MLUtils.convertMatrixColumnsToML(matrixDF)
-// convert a single vector or matrix
-val mlVec: org.apache.spark.ml.linalg.Vector = mllibVec.asML
-val mlMat: org.apache.spark.ml.linalg.Matrix = mllibMat.asML
-{% endhighlight %}
-
-Refer to the [`MLUtils` Scala docs](api/scala/index.html#org.apache.spark.mllib.util.MLUtils$) for further detail.
-</div>
-
-<div data-lang="java" markdown="1">
-
-{% highlight java %}
-import org.apache.spark.mllib.util.MLUtils;
-import org.apache.spark.sql.Dataset;
-
-// convert DataFrame columns
-Dataset<Row> convertedVecDF = MLUtils.convertVectorColumnsToML(vecDF);
-Dataset<Row> convertedMatrixDF = MLUtils.convertMatrixColumnsToML(matrixDF);
-// convert a single vector or matrix
-org.apache.spark.ml.linalg.Vector mlVec = mllibVec.asML();
-org.apache.spark.ml.linalg.Matrix mlMat = mllibMat.asML();
-{% endhighlight %}
-
-Refer to the [`MLUtils` Java docs](api/java/org/apache/spark/mllib/util/MLUtils.html) for further detail.
-</div>
-
-<div data-lang="python"  markdown="1">
-
-{% highlight python %}
-from pyspark.mllib.util import MLUtils
-
-# convert DataFrame columns
-convertedVecDF = MLUtils.convertVectorColumnsToML(vecDF)
-convertedMatrixDF = MLUtils.convertMatrixColumnsToML(matrixDF)
-# convert a single vector or matrix
-mlVec = mllibVec.asML()
-mlMat = mllibMat.asML()
-{% endhighlight %}
-
-Refer to the [`MLUtils` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.util.MLUtils) for further detail.
-</div>
-</div>
-
+ 
 **Deprecated methods removed**
 
-Several deprecated methods were removed in the `spark.mllib` and `spark.ml` packages:
-
-* `setScoreCol` in `ml.evaluation.BinaryClassificationEvaluator`
-* `weights` in `LinearRegression` and `LogisticRegression` in `spark.ml`
-* `setMaxNumIterations` in `mllib.optimization.LBFGS` (marked as `DeveloperApi`)
-* `treeReduce` and `treeAggregate` in `mllib.rdd.RDDFunctions` (these functions are available on `RDD`s directly, and were marked as `DeveloperApi`)
-* `defaultStategy` in `mllib.tree.configuration.Strategy`
-* `build` in `mllib.tree.Node`
-* libsvm loaders for multiclass and load/save labeledData methods in `mllib.util.MLUtils`
-
-A full list of breaking changes can be found at [SPARK-14810](https://issues.apache.org/jira/browse/SPARK-14810).
+* `setLabelCol` in `feature.ChiSqSelectorModel`
+* `numTrees` in `classification.RandomForestClassificationModel` (This now refers to the Param called `numTrees`)
+* `numTrees` in `regression.RandomForestRegressionModel` (This now refers to the Param called `numTrees`)
+* `model` in `regression.LinearRegressionSummary`
+* `validateParams` in `PipelineStage`
+* `validateParams` in `Evaluator`
 
 ### Deprecations and changes of behavior
 
 **Deprecations**
 
-Deprecations in the `spark.mllib` and `spark.ml` packages include:
-
-* [SPARK-14984](https://issues.apache.org/jira/browse/SPARK-14984):
- In `spark.ml.regression.LinearRegressionSummary`, the `model` field has been deprecated.
-* [SPARK-13784](https://issues.apache.org/jira/browse/SPARK-13784):
- In `spark.ml.regression.RandomForestRegressionModel` and `spark.ml.classification.RandomForestClassificationModel`,
- the `numTrees` parameter has been deprecated in favor of `getNumTrees` method.
-* [SPARK-13761](https://issues.apache.org/jira/browse/SPARK-13761):
- In `spark.ml.param.Params`, the `validateParams` method has been deprecated.
- We move all functionality in overridden methods to the corresponding `transformSchema`.
-* [SPARK-14829](https://issues.apache.org/jira/browse/SPARK-14829):
- In `spark.mllib` package, `LinearRegressionWithSGD`, `LassoWithSGD`, `RidgeRegressionWithSGD` and `LogisticRegressionWithSGD` have been deprecated.
- We encourage users to use `spark.ml.regression.LinearRegresson` and `spark.ml.classification.LogisticRegresson`.
-* [SPARK-14900](https://issues.apache.org/jira/browse/SPARK-14900):
- In `spark.mllib.evaluation.MulticlassMetrics`, the parameters `precision`, `recall` and `fMeasure` have been deprecated in favor of `accuracy`.
-* [SPARK-15644](https://issues.apache.org/jira/browse/SPARK-15644):
- In `spark.ml.util.MLReader` and `spark.ml.util.MLWriter`, the `context` method has been deprecated in favor of `session`.
-* In `spark.ml.feature.ChiSqSelectorModel`, the `setLabelCol` method has been deprecated since it was not used by `ChiSqSelectorModel`.
+* [SPARK-18592](https://issues.apache.org/jira/browse/SPARK-18592):
+  Deprecate all Param setter methods except for input/output column Params for `DecisionTreeClassificationModel`, `GBTClassificationModel`, `RandomForestClassificationModel`, `DecisionTreeRegressionModel`, `GBTRegressionModel` and `RandomForestRegressionModel`
 
 **Changes of behavior**
 
-Changes of behavior in the `spark.mllib` and `spark.ml` packages include:
-
-* [SPARK-7780](https://issues.apache.org/jira/browse/SPARK-7780):
- `spark.mllib.classification.LogisticRegressionWithLBFGS` directly calls `spark.ml.classification.LogisticRegresson` for binary classification now.
- This will introduce the following behavior changes for `spark.mllib.classification.LogisticRegressionWithLBFGS`:
-    * The intercept will not be regularized when training binary classification model with L1/L2 Updater.
-    * If users set without regularization, training with or without feature scaling will return the same solution by the same convergence rate.
-* [SPARK-13429](https://issues.apache.org/jira/browse/SPARK-13429):
- In order to provide better and consistent result with `spark.ml.classification.LogisticRegresson`,
- the default value of `spark.mllib.classification.LogisticRegressionWithLBFGS`: `convergenceTol` has been changed from 1E-4 to 1E-6.
-* [SPARK-12363](https://issues.apache.org/jira/browse/SPARK-12363):
- Fix a bug of `PowerIterationClustering` which will likely change its result.
-* [SPARK-13048](https://issues.apache.org/jira/browse/SPARK-13048):
- `LDA` using the `EM` optimizer will keep the last checkpoint by default, if checkpointing is being used.
-* [SPARK-12153](https://issues.apache.org/jira/browse/SPARK-12153):
- `Word2Vec` now respects sentence boundaries. Previously, it did not handle them correctly.
-* [SPARK-10574](https://issues.apache.org/jira/browse/SPARK-10574):
- `HashingTF` uses `MurmurHash3` as default hash algorithm in both `spark.ml` and `spark.mllib`.
-* [SPARK-14768](https://issues.apache.org/jira/browse/SPARK-14768):
- The `expectedType` argument for PySpark `Param` was removed.
-* [SPARK-14931](https://issues.apache.org/jira/browse/SPARK-14931):
- Some default `Param` values, which were mismatched between pipelines in Scala and Python, have been changed.
-* [SPARK-13600](https://issues.apache.org/jira/browse/SPARK-13600):
- `QuantileDiscretizer` now uses `spark.sql.DataFrameStatFunctions.approxQuantile` to find splits (previously used custom sampling logic).
- The output buckets will differ for same input data and params.
+* [SPARK-17870](https://issues.apache.org/jira/browse/SPARK-17870):
+ Fix a bug of `ChiSqSelector` which will likely change its result. Now `ChiSquareSelector` use pValue rather than raw statistic to select a fixed number of top features.
+* [SPARK-3261](https://issues.apache.org/jira/browse/SPARK-3261):
+ `KMeans` returns potentially fewer than k cluster centers in cases where k distinct centroids aren't available or aren't selected.
+* [SPARK-17389](https://issues.apache.org/jira/browse/SPARK-17389):
+ `KMeans` reduces the default number of steps from 5 to 2 for the k-means|| initialization mode.
 
 ## Previous Spark versions
 
diff --git a/docs/ml-migration-guides.md b/docs/ml-migration-guides.md
index 82bf9d7760fb4..58c3747ea6387 100644
--- a/docs/ml-migration-guides.md
+++ b/docs/ml-migration-guides.md
@@ -7,6 +7,153 @@ description: MLlib migration guides from before Spark SPARK_VERSION_SHORT
 
 The migration guide for the current Spark version is kept on the [MLlib Guide main page](ml-guide.html#migration-guide).
 
+## From 1.6 to 2.0
+
+### Breaking changes
+
+There were several breaking changes in Spark 2.0, which are outlined below.
+
+**Linear algebra classes for DataFrame-based APIs**
+
+Spark's linear algebra dependencies were moved to a new project, `mllib-local` 
+(see [SPARK-13944](https://issues.apache.org/jira/browse/SPARK-13944)). 
+As part of this change, the linear algebra classes were copied to a new package, `spark.ml.linalg`. 
+The DataFrame-based APIs in `spark.ml` now depend on the `spark.ml.linalg` classes, 
+leading to a few breaking changes, predominantly in various model classes 
+(see [SPARK-14810](https://issues.apache.org/jira/browse/SPARK-14810) for a full list).
+
+**Note:** the RDD-based APIs in `spark.mllib` continue to depend on the previous package `spark.mllib.linalg`.
+
+_Converting vectors and matrices_
+
+While most pipeline components support backward compatibility for loading, 
+some existing `DataFrames` and pipelines in Spark versions prior to 2.0, that contain vector or matrix 
+columns, may need to be migrated to the new `spark.ml` vector and matrix types. 
+Utilities for converting `DataFrame` columns from `spark.mllib.linalg` to `spark.ml.linalg` types
+(and vice versa) can be found in `spark.mllib.util.MLUtils`.
+
+There are also utility methods available for converting single instances of 
+vectors and matrices. Use the `asML` method on a `mllib.linalg.Vector` / `mllib.linalg.Matrix`
+for converting to `ml.linalg` types, and 
+`mllib.linalg.Vectors.fromML` / `mllib.linalg.Matrices.fromML` 
+for converting to `mllib.linalg` types.
+
+<div class="codetabs">
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+import org.apache.spark.mllib.util.MLUtils
+
+// convert DataFrame columns
+val convertedVecDF = MLUtils.convertVectorColumnsToML(vecDF)
+val convertedMatrixDF = MLUtils.convertMatrixColumnsToML(matrixDF)
+// convert a single vector or matrix
+val mlVec: org.apache.spark.ml.linalg.Vector = mllibVec.asML
+val mlMat: org.apache.spark.ml.linalg.Matrix = mllibMat.asML
+{% endhighlight %}
+
+Refer to the [`MLUtils` Scala docs](api/scala/index.html#org.apache.spark.mllib.util.MLUtils$) for further detail.
+</div>
+
+<div data-lang="java" markdown="1">
+
+{% highlight java %}
+import org.apache.spark.mllib.util.MLUtils;
+import org.apache.spark.sql.Dataset;
+
+// convert DataFrame columns
+Dataset<Row> convertedVecDF = MLUtils.convertVectorColumnsToML(vecDF);
+Dataset<Row> convertedMatrixDF = MLUtils.convertMatrixColumnsToML(matrixDF);
+// convert a single vector or matrix
+org.apache.spark.ml.linalg.Vector mlVec = mllibVec.asML();
+org.apache.spark.ml.linalg.Matrix mlMat = mllibMat.asML();
+{% endhighlight %}
+
+Refer to the [`MLUtils` Java docs](api/java/org/apache/spark/mllib/util/MLUtils.html) for further detail.
+</div>
+
+<div data-lang="python"  markdown="1">
+
+{% highlight python %}
+from pyspark.mllib.util import MLUtils
+
+# convert DataFrame columns
+convertedVecDF = MLUtils.convertVectorColumnsToML(vecDF)
+convertedMatrixDF = MLUtils.convertMatrixColumnsToML(matrixDF)
+# convert a single vector or matrix
+mlVec = mllibVec.asML()
+mlMat = mllibMat.asML()
+{% endhighlight %}
+
+Refer to the [`MLUtils` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.util.MLUtils) for further detail.
+</div>
+</div>
+
+**Deprecated methods removed**
+
+Several deprecated methods were removed in the `spark.mllib` and `spark.ml` packages:
+
+* `setScoreCol` in `ml.evaluation.BinaryClassificationEvaluator`
+* `weights` in `LinearRegression` and `LogisticRegression` in `spark.ml`
+* `setMaxNumIterations` in `mllib.optimization.LBFGS` (marked as `DeveloperApi`)
+* `treeReduce` and `treeAggregate` in `mllib.rdd.RDDFunctions` (these functions are available on `RDD`s directly, and were marked as `DeveloperApi`)
+* `defaultStategy` in `mllib.tree.configuration.Strategy`
+* `build` in `mllib.tree.Node`
+* libsvm loaders for multiclass and load/save labeledData methods in `mllib.util.MLUtils`
+
+A full list of breaking changes can be found at [SPARK-14810](https://issues.apache.org/jira/browse/SPARK-14810).
+
+### Deprecations and changes of behavior
+
+**Deprecations**
+
+Deprecations in the `spark.mllib` and `spark.ml` packages include:
+
+* [SPARK-14984](https://issues.apache.org/jira/browse/SPARK-14984):
+ In `spark.ml.regression.LinearRegressionSummary`, the `model` field has been deprecated.
+* [SPARK-13784](https://issues.apache.org/jira/browse/SPARK-13784):
+ In `spark.ml.regression.RandomForestRegressionModel` and `spark.ml.classification.RandomForestClassificationModel`,
+ the `numTrees` parameter has been deprecated in favor of `getNumTrees` method.
+* [SPARK-13761](https://issues.apache.org/jira/browse/SPARK-13761):
+ In `spark.ml.param.Params`, the `validateParams` method has been deprecated.
+ We move all functionality in overridden methods to the corresponding `transformSchema`.
+* [SPARK-14829](https://issues.apache.org/jira/browse/SPARK-14829):
+ In `spark.mllib` package, `LinearRegressionWithSGD`, `LassoWithSGD`, `RidgeRegressionWithSGD` and `LogisticRegressionWithSGD` have been deprecated.
+ We encourage users to use `spark.ml.regression.LinearRegresson` and `spark.ml.classification.LogisticRegresson`.
+* [SPARK-14900](https://issues.apache.org/jira/browse/SPARK-14900):
+ In `spark.mllib.evaluation.MulticlassMetrics`, the parameters `precision`, `recall` and `fMeasure` have been deprecated in favor of `accuracy`.
+* [SPARK-15644](https://issues.apache.org/jira/browse/SPARK-15644):
+ In `spark.ml.util.MLReader` and `spark.ml.util.MLWriter`, the `context` method has been deprecated in favor of `session`.
+* In `spark.ml.feature.ChiSqSelectorModel`, the `setLabelCol` method has been deprecated since it was not used by `ChiSqSelectorModel`.
+
+**Changes of behavior**
+
+Changes of behavior in the `spark.mllib` and `spark.ml` packages include:
+
+* [SPARK-7780](https://issues.apache.org/jira/browse/SPARK-7780):
+ `spark.mllib.classification.LogisticRegressionWithLBFGS` directly calls `spark.ml.classification.LogisticRegresson` for binary classification now.
+ This will introduce the following behavior changes for `spark.mllib.classification.LogisticRegressionWithLBFGS`:
+    * The intercept will not be regularized when training binary classification model with L1/L2 Updater.
+    * If users set without regularization, training with or without feature scaling will return the same solution by the same convergence rate.
+* [SPARK-13429](https://issues.apache.org/jira/browse/SPARK-13429):
+ In order to provide better and consistent result with `spark.ml.classification.LogisticRegresson`,
+ the default value of `spark.mllib.classification.LogisticRegressionWithLBFGS`: `convergenceTol` has been changed from 1E-4 to 1E-6.
+* [SPARK-12363](https://issues.apache.org/jira/browse/SPARK-12363):
+ Fix a bug of `PowerIterationClustering` which will likely change its result.
+* [SPARK-13048](https://issues.apache.org/jira/browse/SPARK-13048):
+ `LDA` using the `EM` optimizer will keep the last checkpoint by default, if checkpointing is being used.
+* [SPARK-12153](https://issues.apache.org/jira/browse/SPARK-12153):
+ `Word2Vec` now respects sentence boundaries. Previously, it did not handle them correctly.
+* [SPARK-10574](https://issues.apache.org/jira/browse/SPARK-10574):
+ `HashingTF` uses `MurmurHash3` as default hash algorithm in both `spark.ml` and `spark.mllib`.
+* [SPARK-14768](https://issues.apache.org/jira/browse/SPARK-14768):
+ The `expectedType` argument for PySpark `Param` was removed.
+* [SPARK-14931](https://issues.apache.org/jira/browse/SPARK-14931):
+ Some default `Param` values, which were mismatched between pipelines in Scala and Python, have been changed.
+* [SPARK-13600](https://issues.apache.org/jira/browse/SPARK-13600):
+ `QuantileDiscretizer` now uses `spark.sql.DataFrameStatFunctions.approxQuantile` to find splits (previously used custom sampling logic).
+ The output buckets will differ for same input data and params.
+
 ## From 1.5 to 1.6
 
 There are no breaking API changes in the `spark.mllib` or `spark.ml` packages, but there are

From cf3dbec68d379763ee541bf3b7a4809e1f2d0cb7 Mon Sep 17 00:00:00 2001
From: zero323 <zero323@users.noreply.github.com>
Date: Fri, 2 Dec 2016 17:39:28 -0800
Subject: [PATCH 0271/1204] [SPARK-18690][PYTHON][SQL] Backward compatibility
 of unbounded frames

## What changes were proposed in this pull request?

Makes `Window.unboundedPreceding` and `Window.unboundedFollowing` backward compatible.

## How was this patch tested?

Pyspark SQL unittests.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: zero323 <zero323@users.noreply.github.com>

Closes #16123 from zero323/SPARK-17845-follow-up.

(cherry picked from commit a9cbfc4f6a8db936215fcf64697d5b65f13f666e)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 python/pyspark/sql/tests.py  | 35 +++++++++++++++++++++++++++++++++++
 python/pyspark/sql/window.py | 30 ++++++++++++++++--------------
 2 files changed, 51 insertions(+), 14 deletions(-)

diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index b7b2a5923c07f..0aff9cebe91bd 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -1980,6 +1980,41 @@ def assert_runs_only_one_job_stage_and_task(job_group_name, f):
         # Regression test for SPARK-17514: limit(n).collect() should the perform same as take(n)
         assert_runs_only_one_job_stage_and_task("collect_limit", lambda: df.limit(1).collect())
 
+    @unittest.skipIf(sys.version_info < (3, 3), "Unittest < 3.3 doesn't support mocking")
+    def test_unbounded_frames(self):
+        from unittest.mock import patch
+        from pyspark.sql import functions as F
+        from pyspark.sql import window
+        import importlib
+
+        df = self.spark.range(0, 3)
+
+        def rows_frame_match():
+            return "ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" in df.select(
+                F.count("*").over(window.Window.rowsBetween(-sys.maxsize, sys.maxsize))
+            ).columns[0]
+
+        def range_frame_match():
+            return "RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" in df.select(
+                F.count("*").over(window.Window.rangeBetween(-sys.maxsize, sys.maxsize))
+            ).columns[0]
+
+        with patch("sys.maxsize", 2 ** 31 - 1):
+            importlib.reload(window)
+            self.assertTrue(rows_frame_match())
+            self.assertTrue(range_frame_match())
+
+        with patch("sys.maxsize", 2 ** 63 - 1):
+            importlib.reload(window)
+            self.assertTrue(rows_frame_match())
+            self.assertTrue(range_frame_match())
+
+        with patch("sys.maxsize", 2 ** 127 - 1):
+            importlib.reload(window)
+            self.assertTrue(rows_frame_match())
+            self.assertTrue(range_frame_match())
+
+        importlib.reload(window)
 
 if __name__ == "__main__":
     from pyspark.sql.tests import *
diff --git a/python/pyspark/sql/window.py b/python/pyspark/sql/window.py
index c345e623f1cb1..7ce27f9b102c0 100644
--- a/python/pyspark/sql/window.py
+++ b/python/pyspark/sql/window.py
@@ -49,6 +49,8 @@ class Window(object):
 
     _JAVA_MIN_LONG = -(1 << 63)  # -9223372036854775808
     _JAVA_MAX_LONG = (1 << 63) - 1  # 9223372036854775807
+    _PRECEDING_THRESHOLD = max(-sys.maxsize, _JAVA_MIN_LONG)
+    _FOLLOWING_THRESHOLD = min(sys.maxsize, _JAVA_MAX_LONG)
 
     unboundedPreceding = _JAVA_MIN_LONG
 
@@ -98,9 +100,9 @@ def rowsBetween(start, end):
                     The frame is unbounded if this is ``Window.unboundedFollowing``, or
                     any value greater than or equal to 9223372036854775807.
         """
-        if start <= Window._JAVA_MIN_LONG:
+        if start <= Window._PRECEDING_THRESHOLD:
             start = Window.unboundedPreceding
-        if end >= Window._JAVA_MAX_LONG:
+        if end >= Window._FOLLOWING_THRESHOLD:
             end = Window.unboundedFollowing
         sc = SparkContext._active_spark_context
         jspec = sc._jvm.org.apache.spark.sql.expressions.Window.rowsBetween(start, end)
@@ -123,14 +125,14 @@ def rangeBetween(start, end):
 
         :param start: boundary start, inclusive.
                       The frame is unbounded if this is ``Window.unboundedPreceding``, or
-                      any value less than or equal to -9223372036854775808.
+                      any value less than or equal to max(-sys.maxsize, -9223372036854775808).
         :param end: boundary end, inclusive.
                     The frame is unbounded if this is ``Window.unboundedFollowing``, or
-                    any value greater than or equal to 9223372036854775807.
+                    any value greater than or equal to min(sys.maxsize, 9223372036854775807).
         """
-        if start <= Window._JAVA_MIN_LONG:
+        if start <= Window._PRECEDING_THRESHOLD:
             start = Window.unboundedPreceding
-        if end >= Window._JAVA_MAX_LONG:
+        if end >= Window._FOLLOWING_THRESHOLD:
             end = Window.unboundedFollowing
         sc = SparkContext._active_spark_context
         jspec = sc._jvm.org.apache.spark.sql.expressions.Window.rangeBetween(start, end)
@@ -185,14 +187,14 @@ def rowsBetween(self, start, end):
 
         :param start: boundary start, inclusive.
                       The frame is unbounded if this is ``Window.unboundedPreceding``, or
-                      any value less than or equal to -9223372036854775808.
+                      any value less than or equal to max(-sys.maxsize, -9223372036854775808).
         :param end: boundary end, inclusive.
                     The frame is unbounded if this is ``Window.unboundedFollowing``, or
-                    any value greater than or equal to 9223372036854775807.
+                    any value greater than or equal to min(sys.maxsize, 9223372036854775807).
         """
-        if start <= Window._JAVA_MIN_LONG:
+        if start <= Window._PRECEDING_THRESHOLD:
             start = Window.unboundedPreceding
-        if end >= Window._JAVA_MAX_LONG:
+        if end >= Window._FOLLOWING_THRESHOLD:
             end = Window.unboundedFollowing
         return WindowSpec(self._jspec.rowsBetween(start, end))
 
@@ -211,14 +213,14 @@ def rangeBetween(self, start, end):
 
         :param start: boundary start, inclusive.
                       The frame is unbounded if this is ``Window.unboundedPreceding``, or
-                      any value less than or equal to -9223372036854775808.
+                      any value less than or equal to max(-sys.maxsize, -9223372036854775808).
         :param end: boundary end, inclusive.
                     The frame is unbounded if this is ``Window.unboundedFollowing``, or
-                    any value greater than or equal to 9223372036854775807.
+                    any value greater than or equal to min(sys.maxsize, 9223372036854775807).
         """
-        if start <= Window._JAVA_MIN_LONG:
+        if start <= Window._PRECEDING_THRESHOLD:
             start = Window.unboundedPreceding
-        if end >= Window._JAVA_MAX_LONG:
+        if end >= Window._FOLLOWING_THRESHOLD:
             end = Window.unboundedFollowing
         return WindowSpec(self._jspec.rangeBetween(start, end))
 

From 28ea432a26953866eaf95b2fd32a251ecf0c8094 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Sat, 3 Dec 2016 10:12:28 +0000
Subject: [PATCH 0272/1204] [SPARK-18685][TESTS] Fix URI and release resources
 after opening in tests at ExecutorClassLoaderSuite

## What changes were proposed in this pull request?

This PR fixes two problems as below:

- Close `BufferedSource` after `Source.fromInputStream(...)` to release resource and make the tests pass on Windows in `ExecutorClassLoaderSuite`

  ```
  [info] Exception encountered when attempting to run a suite with class name: org.apache.spark.repl.ExecutorClassLoaderSuite *** ABORTED *** (7 seconds, 333 milliseconds)
  [info]   java.io.IOException: Failed to delete: C:\projects\spark\target\tmp\spark-77b2f37b-6405-47c4-af1c-4a6a206511f2
  [info]   at org.apache.spark.util.Utils$.deleteRecursively(Utils.scala:1010)
  [info]   at org.apache.spark.repl.ExecutorClassLoaderSuite.afterAll(ExecutorClassLoaderSuite.scala:76)
  [info]   at org.scalatest.BeforeAndAfterAll$class.afterAll(BeforeAndAfterAll.scala:213)
  ...
  ```

- Fix URI correctly so that related tests can be passed on Windows.

  ```
  [info] - child first *** FAILED *** (78 milliseconds)
  [info]   java.net.URISyntaxException: Illegal character in authority at index 7: file://C:\projects\spark\target\tmp\spark-00b66070-0548-463c-b6f3-8965d173da9b
  [info]   at java.net.URI$Parser.fail(URI.java:2848)
  [info]   at java.net.URI$Parser.parseAuthority(URI.java:3186)
  ...
  [info] - parent first *** FAILED *** (15 milliseconds)
  [info]   java.net.URISyntaxException: Illegal character in authority at index 7: file://C:\projects\spark\target\tmp\spark-00b66070-0548-463c-b6f3-8965d173da9b
  [info]   at java.net.URI$Parser.fail(URI.java:2848)
  [info]   at java.net.URI$Parser.parseAuthority(URI.java:3186)
  ...
  [info] - child first can fall back *** FAILED *** (0 milliseconds)
  [info]   java.net.URISyntaxException: Illegal character in authority at index 7: file://C:\projects\spark\target\tmp\spark-00b66070-0548-463c-b6f3-8965d173da9b
  [info]   at java.net.URI$Parser.fail(URI.java:2848)
  [info]   at java.net.URI$Parser.parseAuthority(URI.java:3186)
  ...
  [info] - child first can fail *** FAILED *** (0 milliseconds)
  [info]   java.net.URISyntaxException: Illegal character in authority at index 7: file://C:\projects\spark\target\tmp\spark-00b66070-0548-463c-b6f3-8965d173da9b
  [info]   at java.net.URI$Parser.fail(URI.java:2848)
  [info]   at java.net.URI$Parser.parseAuthority(URI.java:3186)
  ...
  [info] - resource from parent *** FAILED *** (0 milliseconds)
  [info]   java.net.URISyntaxException: Illegal character in authority at index 7: file://C:\projects\spark\target\tmp\spark-00b66070-0548-463c-b6f3-8965d173da9b
  [info]   at java.net.URI$Parser.fail(URI.java:2848)
  [info]   at java.net.URI$Parser.parseAuthority(URI.java:3186)
  ...
  [info] - resources from parent *** FAILED *** (0 milliseconds)
  [info]   java.net.URISyntaxException: Illegal character in authority at index 7: file://C:\projects\spark\target\tmp\spark-00b66070-0548-463c-b6f3-8965d173da9b
  [info]   at java.net.URI$Parser.fail(URI.java:2848)
  [info]   at java.net.URI$Parser.parseAuthority(URI.java:3186)
  ```

## How was this patch tested?

Manually tested via AppVeyor.

**Before**
https://ci.appveyor.com/project/spark-test/spark/build/102-rpel-ExecutorClassLoaderSuite

**After**
https://ci.appveyor.com/project/spark-test/spark/build/108-rpel-ExecutorClassLoaderSuite

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #16116 from HyukjinKwon/close-after-open.

(cherry picked from commit d1312fb7edffd6e10c86f69ddfff05f8915856ac)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../spark/repl/ExecutorClassLoaderSuite.scala | 25 +++++++++++++------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
index 3d622d42f4086..6d274bddb7782 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
@@ -24,7 +24,6 @@ import java.nio.charset.StandardCharsets
 import java.nio.file.{Paths, StandardOpenOption}
 import java.util
 
-import scala.concurrent.duration._
 import scala.io.Source
 import scala.language.implicitConversions
 
@@ -34,8 +33,6 @@ import org.mockito.Mockito._
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
 import org.scalatest.BeforeAndAfterAll
-import org.scalatest.concurrent.Interruptor
-import org.scalatest.concurrent.Timeouts._
 import org.scalatest.mock.MockitoSugar
 
 import org.apache.spark._
@@ -61,7 +58,7 @@ class ExecutorClassLoaderSuite
     super.beforeAll()
     tempDir1 = Utils.createTempDir()
     tempDir2 = Utils.createTempDir()
-    url1 = "file://" + tempDir1
+    url1 = tempDir1.toURI.toURL.toString
     urls2 = List(tempDir2.toURI.toURL).toArray
     childClassNames.foreach(TestUtils.createCompiledClass(_, tempDir1, "1"))
     parentResourceNames.foreach { x =>
@@ -118,8 +115,14 @@ class ExecutorClassLoaderSuite
     val resourceName: String = parentResourceNames.head
     val is = classLoader.getResourceAsStream(resourceName)
     assert(is != null, s"Resource $resourceName not found")
-    val content = Source.fromInputStream(is, "UTF-8").getLines().next()
-    assert(content.contains("resource"), "File doesn't contain 'resource'")
+
+    val bufferedSource = Source.fromInputStream(is, "UTF-8")
+    Utils.tryWithSafeFinally {
+      val content = bufferedSource.getLines().next()
+      assert(content.contains("resource"), "File doesn't contain 'resource'")
+    } {
+      bufferedSource.close()
+    }
   }
 
   test("resources from parent") {
@@ -128,8 +131,14 @@ class ExecutorClassLoaderSuite
     val resourceName: String = parentResourceNames.head
     val resources: util.Enumeration[URL] = classLoader.getResources(resourceName)
     assert(resources.hasMoreElements, s"Resource $resourceName not found")
-    val fileReader = Source.fromInputStream(resources.nextElement().openStream()).bufferedReader()
-    assert(fileReader.readLine().contains("resource"), "File doesn't contain 'resource'")
+
+    val bufferedSource = Source.fromInputStream(resources.nextElement().openStream())
+    Utils.tryWithSafeFinally {
+      val fileReader = bufferedSource.bufferedReader()
+      assert(fileReader.readLine().contains("resource"), "File doesn't contain 'resource'")
+    } {
+      bufferedSource.close()
+    }
   }
 
   test("fetch classes using Spark's RpcEnv") {

From b098b4845c557a3139c76caa0377c3049b6fe8aa Mon Sep 17 00:00:00 2001
From: Nattavut Sutyanyong <nsy.can@gmail.com>
Date: Sat, 3 Dec 2016 11:36:26 -0800
Subject: [PATCH 0273/1204] [SPARK-18582][SQL] Whitelist LogicalPlan operators
 allowed in correlated subqueries

## What changes were proposed in this pull request?

This fix puts an explicit list of operators that Spark supports for correlated subqueries.

## How was this patch tested?

Run sql/test, catalyst/test and add a new test case on Generate.

Author: Nattavut Sutyanyong <nsy.can@gmail.com>

Closes #16046 from nsyca/spark18455.0.

(cherry picked from commit 4a3c09601ba69f7d49d1946bb6f20f5cfe453031)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 158 ++++++++++++------
 .../sql/catalyst/optimizer/Optimizer.scala    |   2 +-
 .../analysis/AnalysisErrorSuite.scala         |   4 +-
 .../org/apache/spark/sql/SubquerySuite.scala  |  18 ++
 4 files changed, 129 insertions(+), 53 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index fec42eedf98ab..f738ae822178a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -952,24 +952,24 @@ class Analyzer(
     private def pullOutCorrelatedPredicates(sub: LogicalPlan): (LogicalPlan, Seq[Expression]) = {
       val predicateMap = scala.collection.mutable.Map.empty[LogicalPlan, Seq[Expression]]
 
-      /** Make sure a plans' subtree does not contain a tagged predicate. */
-      def failOnOuterReferenceInSubTree(p: LogicalPlan, msg: String): Unit = {
-        if (p.collect(predicateMap).nonEmpty) {
-          failAnalysis(s"Accessing outer query column is not allowed in $msg: $p")
+      // Make sure a plan's subtree does not contain outer references
+      def failOnOuterReferenceInSubTree(p: LogicalPlan): Unit = {
+        if (p.collectFirst(predicateMap).nonEmpty) {
+          failAnalysis(s"Accessing outer query column is not allowed in:\n$p")
         }
       }
 
-      /** Helper function for locating outer references. */
+      // Helper function for locating outer references.
       def containsOuter(e: Expression): Boolean = {
         e.find(_.isInstanceOf[OuterReference]).isDefined
       }
 
-      /** Make sure a plans' expressions do not contain a tagged predicate. */
+      // Make sure a plan's expressions do not contain outer references
       def failOnOuterReference(p: LogicalPlan): Unit = {
         if (p.expressions.exists(containsOuter)) {
           failAnalysis(
             "Expressions referencing the outer query are not supported outside of WHERE/HAVING " +
-              s"clauses: $p")
+              s"clauses:\n$p")
         }
       }
 
@@ -1018,10 +1018,51 @@ class Analyzer(
 
       // Simplify the predicates before pulling them out.
       val transformed = BooleanSimplification(sub) transformUp {
-        // WARNING:
-        // Only Filter can host correlated expressions at this time
-        // Anyone adding a new "case" below needs to add the call to
-        // "failOnOuterReference" to disallow correlated expressions in it.
+
+        // Whitelist operators allowed in a correlated subquery
+        // There are 4 categories:
+        // 1. Operators that are allowed anywhere in a correlated subquery, and,
+        //    by definition of the operators, they either do not contain
+        //    any columns or cannot host outer references.
+        // 2. Operators that are allowed anywhere in a correlated subquery
+        //    so long as they do not host outer references.
+        // 3. Operators that need special handlings. These operators are
+        //    Project, Filter, Join, Aggregate, and Generate.
+        //
+        // Any operators that are not in the above list are allowed
+        // in a correlated subquery only if they are not on a correlation path.
+        // In other word, these operators are allowed only under a correlation point.
+        //
+        // A correlation path is defined as the sub-tree of all the operators that
+        // are on the path from the operator hosting the correlated expressions
+        // up to the operator producing the correlated values.
+
+        // Category 1:
+        // BroadcastHint, Distinct, LeafNode, Repartition, and SubqueryAlias
+        case p: BroadcastHint =>
+          p
+        case p: Distinct =>
+          p
+        case p: LeafNode =>
+          p
+        case p: Repartition =>
+          p
+        case p: SubqueryAlias =>
+          p
+
+        // Category 2:
+        // These operators can be anywhere in a correlated subquery.
+        // so long as they do not host outer references in the operators.
+        case p: Sort =>
+          failOnOuterReference(p)
+          p
+        case p: RedistributeData =>
+          failOnOuterReference(p)
+          p
+
+        // Category 3:
+        // Filter is one of the two operators allowed to host correlated expressions.
+        // The other operator is Join. Filter can be anywhere in a correlated subquery.
         case f @ Filter(cond, child) =>
           // Find all predicates with an outer reference.
           val (correlated, local) = splitConjunctivePredicates(cond).partition(containsOuter)
@@ -1043,14 +1084,24 @@ class Analyzer(
               predicateMap += child -> xs
               child
           }
+
+        // Project cannot host any correlated expressions
+        // but can be anywhere in a correlated subquery.
         case p @ Project(expressions, child) =>
           failOnOuterReference(p)
+
           val referencesToAdd = missingReferences(p)
           if (referencesToAdd.nonEmpty) {
             Project(expressions ++ referencesToAdd, child)
           } else {
             p
           }
+
+        // Aggregate cannot host any correlated expressions
+        // It can be on a correlation path if the correlation contains
+        // only equality correlated predicates.
+        // It cannot be on a correlation path if the correlation has
+        // non-equality correlated predicates.
         case a @ Aggregate(grouping, expressions, child) =>
           failOnOuterReference(a)
           failOnNonEqualCorrelatedPredicate(foundNonEqualCorrelatedPred, a)
@@ -1061,48 +1112,55 @@ class Analyzer(
           } else {
             a
           }
-        case w : Window =>
-          failOnOuterReference(w)
-          failOnNonEqualCorrelatedPredicate(foundNonEqualCorrelatedPred, w)
-          w
-        case j @ Join(left, _, RightOuter, _) =>
-          failOnOuterReference(j)
-          failOnOuterReferenceInSubTree(left, "a RIGHT OUTER JOIN")
-          j
-        // SPARK-18578: Do not allow any correlated predicate
-        // in a Full (Outer) Join operator and its descendants
-        case j @ Join(_, _, FullOuter, _) =>
-          failOnOuterReferenceInSubTree(j, "a FULL OUTER JOIN")
-          j
-        case j @ Join(_, right, jt, _) if !jt.isInstanceOf[InnerLike] =>
-          failOnOuterReference(j)
-          failOnOuterReferenceInSubTree(right, "a LEFT (OUTER) JOIN")
+
+        // Join can host correlated expressions.
+        case j @ Join(left, right, joinType, _) =>
+          joinType match {
+            // Inner join, like Filter, can be anywhere.
+            case _: InnerLike =>
+              failOnOuterReference(j)
+
+            // Left outer join's right operand cannot be on a correlation path.
+            // LeftAnti and ExistenceJoin are special cases of LeftOuter.
+            // Note that ExistenceJoin cannot be expressed externally in both SQL and DataFrame
+            // so it should not show up here in Analysis phase. This is just a safety net.
+            //
+            // LeftSemi does not allow output from the right operand.
+            // Any correlated references in the subplan
+            // of the right operand cannot be pulled up.
+            case LeftOuter | LeftSemi | LeftAnti | ExistenceJoin(_) =>
+              failOnOuterReference(j)
+              failOnOuterReferenceInSubTree(right)
+
+            // Likewise, Right outer join's left operand cannot be on a correlation path.
+            case RightOuter =>
+              failOnOuterReference(j)
+              failOnOuterReferenceInSubTree(left)
+
+            // Any other join types not explicitly listed above,
+            // including Full outer join, are treated as Category 4.
+            case _ =>
+              failOnOuterReferenceInSubTree(j)
+          }
           j
-        case u: Union =>
-          failOnOuterReferenceInSubTree(u, "a UNION")
-          u
-        case s: SetOperation =>
-          failOnOuterReferenceInSubTree(s.right, "an INTERSECT/EXCEPT")
-          s
-        case e: Expand =>
-          failOnOuterReferenceInSubTree(e, "an EXPAND")
-          e
-        case l : LocalLimit =>
-          failOnOuterReferenceInSubTree(l, "a LIMIT")
-          l
-        // Since LIMIT <n> is represented as GlobalLimit(<n>, (LocalLimit (<n>, child))
-        // and we are walking bottom up, we will fail on LocalLimit before
-        // reaching GlobalLimit.
-        // The code below is just a safety net.
-        case g : GlobalLimit =>
-          failOnOuterReferenceInSubTree(g, "a LIMIT")
-          g
-        case s : Sample =>
-          failOnOuterReferenceInSubTree(s, "a TABLESAMPLE")
-          s
-        case p =>
+
+        // Generator with join=true, i.e., expressed with
+        // LATERAL VIEW [OUTER], similar to inner join,
+        // allows to have correlation under it
+        // but must not host any outer references.
+        // Note:
+        // Generator with join=false is treated as Category 4.
+        case p @ Generate(generator, true, _, _, _, _) =>
           failOnOuterReference(p)
           p
+
+        // Category 4: Any other operators not in the above 3 categories
+        // cannot be on a correlation path, that is they are allowed only
+        // under a correlation point but they and their descendant operators
+        // are not allowed to have any correlated expressions.
+        case p =>
+          failOnOuterReferenceInSubTree(p)
+          p
       }
       (transformed, predicateMap.values.flatten.toSeq)
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 37f0c8ed19d37..75d9997582aa6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -932,7 +932,7 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
         split(joinCondition.map(splitConjunctivePredicates).getOrElse(Nil), left, right)
 
       joinType match {
-        case _: InnerLike |  LeftSemi =>
+        case _: InnerLike | LeftSemi =>
           // push down the single side only join filter for both sides sub queries
           val newLeft = leftJoinConditions.
             reduceLeftOption(And).map(Filter(_, left)).getOrElse(left)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index 8c1faea2394c6..96aff37a4b4f9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -542,7 +542,7 @@ class AnalysisErrorSuite extends AnalysisTest {
           Filter(EqualTo(OuterReference(a), b), LocalRelation(b)))
       ),
       LocalRelation(a))
-    assertAnalysisError(plan4, "Accessing outer query column is not allowed in a LIMIT" :: Nil)
+    assertAnalysisError(plan4, "Accessing outer query column is not allowed in" :: Nil)
 
     val plan5 = Filter(
       Exists(
@@ -551,6 +551,6 @@ class AnalysisErrorSuite extends AnalysisTest {
       ),
       LocalRelation(a))
     assertAnalysisError(plan5,
-                        "Accessing outer query column is not allowed in a TABLESAMPLE" :: Nil)
+                        "Accessing outer query column is not allowed in" :: Nil)
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index 73a53944964fd..0f2f520006e35 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -789,4 +789,22 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
       }
     }
   }
+
+  // Generate operator
+  test("Correlated subqueries in LATERAL VIEW") {
+    withTempView("t1", "t2") {
+      Seq((1, 1), (2, 0)).toDF("c1", "c2").createOrReplaceTempView("t1")
+      Seq[(Int, Array[Int])]((1, Array(1, 2)), (2, Array(-1, -3)))
+        .toDF("c1", "arr_c2").createTempView("t2")
+      checkAnswer(
+        sql(
+          """
+          | select c2
+          | from t1
+          | where exists (select *
+          |               from t2 lateral view explode(arr_c2) q as c2
+                          where t1.c1 = t2.c1)""".stripMargin),
+        Row(1) :: Row(0) :: Nil)
+    }
+  }
 }

From 28f698b4845e6497d060270ba790cc60dc7e1a6e Mon Sep 17 00:00:00 2001
From: Yunni <Euler57721@gmail.com>
Date: Sat, 3 Dec 2016 16:58:15 -0800
Subject: [PATCH 0274/1204] [SPARK-18081][ML][DOCS] Add user guide for Locality
 Sensitive Hashing(LSH)

## What changes were proposed in this pull request?
The user guide for LSH is added to ml-features.md, with several scala/java examples in spark-examples.

## How was this patch tested?
Doc has been generated through Jekyll, and checked through manual inspection.

Author: Yunni <Euler57721@gmail.com>
Author: Yun Ni <yunn@uber.com>
Author: Joseph K. Bradley <joseph@databricks.com>
Author: Yun Ni <Euler57721@gmail.com>

Closes #15795 from Yunni/SPARK-18081-lsh-guide.

(cherry picked from commit 34777184cd8cab61e1dd25d0a4d5e738880a57b2)
Signed-off-by: Joseph K. Bradley <joseph@databricks.com>
---
 docs/ml-features.md                           | 111 ++++++++++++++++++
 ...avaBucketedRandomProjectionLSHExample.java |  98 ++++++++++++++++
 .../examples/ml/JavaMinHashLSHExample.java    |  70 +++++++++++
 .../BucketedRandomProjectionLSHExample.scala  |  80 +++++++++++++
 .../spark/examples/ml/MinHashLSHExample.scala |  77 ++++++++++++
 5 files changed, 436 insertions(+)
 create mode 100644 examples/src/main/java/org/apache/spark/examples/ml/JavaBucketedRandomProjectionLSHExample.java
 create mode 100644 examples/src/main/java/org/apache/spark/examples/ml/JavaMinHashLSHExample.java
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/BucketedRandomProjectionLSHExample.scala
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/MinHashLSHExample.scala

diff --git a/docs/ml-features.md b/docs/ml-features.md
index 9eecc1333d06f..3ecf700abf6e5 100644
--- a/docs/ml-features.md
+++ b/docs/ml-features.md
@@ -9,6 +9,7 @@ This section covers algorithms for working with features, roughly divided into t
 * Extraction: Extracting features from "raw" data
 * Transformation: Scaling, converting, or modifying features
 * Selection: Selecting a subset from a larger set of features
+* Locality Sensitive Hashing (LSH): This class of algorithms combines aspects of feature transformation with other algorithms.
 
 **Table of Contents**
 
@@ -1450,3 +1451,113 @@ for more details on the API.
 {% include_example python/ml/chisq_selector_example.py %}
 </div>
 </div>
+
+# Locality Sensitive Hashing
+[Locality Sensitive Hashing (LSH)](https://en.wikipedia.org/wiki/Locality-sensitive_hashing) is an important class of hashing techniques, which is commonly used in clustering, approximate nearest neighbor search and outlier detection with large datasets.
+
+The general idea of LSH is to use a family of functions ("LSH families") to hash data points into buckets, so that the data points which are close to each other are in the same buckets with high probability, while data points that are far away from each other are very likely in different buckets. An LSH family is formally defined as follows.
+
+In a metric space `(M, d)`, where `M` is a set and `d` is a distance function on `M`, an LSH family is a family of functions `h` that satisfy the following properties:
+`\[
+\forall p, q \in M,\\
+d(p,q) \leq r1 \Rightarrow Pr(h(p)=h(q)) \geq p1\\
+d(p,q) \geq r2 \Rightarrow Pr(h(p)=h(q)) \leq p2
+\]`
+This LSH family is called `(r1, r2, p1, p2)`-sensitive.
+
+In Spark, different LSH families are implemented in separate classes (e.g., `MinHash`), and APIs for feature transformation, approximate similarity join and approximate nearest neighbor are provided in each class.
+
+In LSH, we define a false positive as a pair of distant input features (with `$d(p,q) \geq r2$`) which are hashed into the same bucket, and we define a false negative as a pair of nearby features (with `$d(p,q) \leq r1$`) which are hashed into different buckets.
+
+## LSH Operations
+
+We describe the major types of operations which LSH can be used for.  A fitted LSH model has methods for each of these operations.
+
+### Feature Transformation
+Feature transformation is the basic functionality to add hashed values as a new column. This can be useful for dimensionality reduction. Users can specify input and output column names by setting `inputCol` and `outputCol`.
+
+LSH also supports multiple LSH hash tables. Users can specify the number of hash tables by setting `numHashTables`. This is also used for [OR-amplification](https://en.wikipedia.org/wiki/Locality-sensitive_hashing#Amplification) in approximate similarity join and approximate nearest neighbor. Increasing the number of hash tables will increase the accuracy but will also increase communication cost and running time.
+
+The type of `outputCol` is `Seq[Vector]` where the dimension of the array equals `numHashTables`, and the dimensions of the vectors are currently set to 1. In future releases, we will implement AND-amplification so that users can specify the dimensions of these vectors.
+
+### Approximate Similarity Join
+Approximate similarity join takes two datasets and approximately returns pairs of rows in the datasets whose distance is smaller than a user-defined threshold. Approximate similarity join supports both joining two different datasets and self-joining. Self-joining will produce some duplicate pairs.
+
+Approximate similarity join accepts both transformed and untransformed datasets as input. If an untransformed dataset is used, it will be transformed automatically. In this case, the hash signature will be created as `outputCol`.
+
+In the joined dataset, the origin datasets can be queried in `datasetA` and `datasetB`. A distance column will be added to the output dataset to show the true distance between each pair of rows returned.
+
+### Approximate Nearest Neighbor Search
+Approximate nearest neighbor search takes a dataset (of feature vectors) and a key (a single feature vector), and it approximately returns a specified number of rows in the dataset that are closest to the vector.
+
+Approximate nearest neighbor search accepts both transformed and untransformed datasets as input. If an untransformed dataset is used, it will be transformed automatically. In this case, the hash signature will be created as `outputCol`.
+
+A distance column will be added to the output dataset to show the true distance between each output row and the searched key.
+
+**Note:** Approximate nearest neighbor search will return fewer than `k` rows when there are not enough candidates in the hash bucket.
+
+## LSH Algorithms
+
+### Bucketed Random Projection for Euclidean Distance
+
+[Bucketed Random Projection](https://en.wikipedia.org/wiki/Locality-sensitive_hashing#Stable_distributions) is an LSH family for Euclidean distance. The Euclidean distance is defined as follows:
+`\[
+d(\mathbf{x}, \mathbf{y}) = \sqrt{\sum_i (x_i - y_i)^2}
+\]`
+Its LSH family projects feature vectors `$\mathbf{x}$` onto a random unit vector `$\mathbf{v}$` and portions the projected results into hash buckets:
+`\[
+h(\mathbf{x}) = \Big\lfloor \frac{\mathbf{x} \cdot \mathbf{v}}{r} \Big\rfloor
+\]`
+where `r` is a user-defined bucket length. The bucket length can be used to control the average size of hash buckets (and thus the number of buckets). A larger bucket length (i.e., fewer buckets) increases the probability of features being hashed to the same bucket (increasing the numbers of true and false positives).
+
+Bucketed Random Projection accepts arbitrary vectors as input features, and supports both sparse and dense vectors.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+
+Refer to the [BucketedRandomProjectionLSH Scala docs](api/scala/index.html#org.apache.spark.ml.feature.BucketedRandomProjectionLSH)
+for more details on the API.
+
+{% include_example scala/org/apache/spark/examples/ml/BucketedRandomProjectionLSHExample.scala %}
+</div>
+
+<div data-lang="java" markdown="1">
+
+Refer to the [BucketedRandomProjectionLSH Java docs](api/java/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.html)
+for more details on the API.
+
+{% include_example java/org/apache/spark/examples/ml/JavaBucketedRandomProjectionLSHExample.java %}
+</div>
+</div>
+
+### MinHash for Jaccard Distance
+[MinHash](https://en.wikipedia.org/wiki/MinHash) is an LSH family for Jaccard distance where input features are sets of natural numbers. Jaccard distance of two sets is defined by the cardinality of their intersection and union:
+`\[
+d(\mathbf{A}, \mathbf{B}) = 1 - \frac{|\mathbf{A} \cap \mathbf{B}|}{|\mathbf{A} \cup \mathbf{B}|}
+\]`
+MinHash applies a random hash function `g` to each element in the set and take the minimum of all hashed values:
+`\[
+h(\mathbf{A}) = \min_{a \in \mathbf{A}}(g(a))
+\]`
+
+The input sets for MinHash are represented as binary vectors, where the vector indices represent the elements themselves and the non-zero values in the vector represent the presence of that element in the set. While both dense and sparse vectors are supported, typically sparse vectors are recommended for efficiency. For example, `Vectors.sparse(10, Array[(2, 1.0), (3, 1.0), (5, 1.0)])` means there are 10 elements in the space. This set contains elem 2, elem 3 and elem 5. All non-zero values are treated as binary "1" values.
+
+**Note:** Empty sets cannot be transformed by MinHash, which means any input vector must have at least 1 non-zero entry.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+
+Refer to the [MinHashLSH Scala docs](api/scala/index.html#org.apache.spark.ml.feature.MinHashLSH)
+for more details on the API.
+
+{% include_example scala/org/apache/spark/examples/ml/MinHashLSHExample.scala %}
+</div>
+
+<div data-lang="java" markdown="1">
+
+Refer to the [MinHashLSH Java docs](api/java/org/apache/spark/ml/feature/MinHashLSH.html)
+for more details on the API.
+
+{% include_example java/org/apache/spark/examples/ml/JavaMinHashLSHExample.java %}
+</div>
+</div>
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketedRandomProjectionLSHExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketedRandomProjectionLSHExample.java
new file mode 100644
index 0000000000000..ca3ee5a285255
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketedRandomProjectionLSHExample.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.feature.BucketedRandomProjectionLSH;
+import org.apache.spark.ml.feature.BucketedRandomProjectionLSHModel;
+import org.apache.spark.ml.linalg.Vector;
+import org.apache.spark.ml.linalg.Vectors;
+import org.apache.spark.ml.linalg.VectorUDT;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaBucketedRandomProjectionLSHExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaBucketedRandomProjectionLSHExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> dataA = Arrays.asList(
+      RowFactory.create(0, Vectors.dense(1.0, 1.0)),
+      RowFactory.create(1, Vectors.dense(1.0, -1.0)),
+      RowFactory.create(2, Vectors.dense(-1.0, -1.0)),
+      RowFactory.create(3, Vectors.dense(-1.0, 1.0))
+    );
+
+    List<Row> dataB = Arrays.asList(
+        RowFactory.create(4, Vectors.dense(1.0, 0.0)),
+        RowFactory.create(5, Vectors.dense(-1.0, 0.0)),
+        RowFactory.create(6, Vectors.dense(0.0, 1.0)),
+        RowFactory.create(7, Vectors.dense(0.0, -1.0))
+    );
+
+    StructType schema = new StructType(new StructField[]{
+      new StructField("id", DataTypes.IntegerType, false, Metadata.empty()),
+      new StructField("keys", new VectorUDT(), false, Metadata.empty())
+    });
+    Dataset<Row> dfA = spark.createDataFrame(dataA, schema);
+    Dataset<Row> dfB = spark.createDataFrame(dataB, schema);
+
+    Vector key = Vectors.dense(1.0, 0.0);
+
+    BucketedRandomProjectionLSH mh = new BucketedRandomProjectionLSH()
+      .setBucketLength(2.0)
+      .setNumHashTables(3)
+      .setInputCol("keys")
+      .setOutputCol("values");
+
+    BucketedRandomProjectionLSHModel model = mh.fit(dfA);
+
+    // Feature Transformation
+    model.transform(dfA).show();
+    // Cache the transformed columns
+    Dataset<Row> transformedA = model.transform(dfA).cache();
+    Dataset<Row> transformedB = model.transform(dfB).cache();
+
+    // Approximate similarity join
+    model.approxSimilarityJoin(dfA, dfB, 1.5).show();
+    model.approxSimilarityJoin(transformedA, transformedB, 1.5).show();
+    // Self Join
+    model.approxSimilarityJoin(dfA, dfA, 2.5).filter("datasetA.id < datasetB.id").show();
+
+    // Approximate nearest neighbor search
+    model.approxNearestNeighbors(dfA, key, 2).show();
+    model.approxNearestNeighbors(transformedA, key, 2).show();
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaMinHashLSHExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaMinHashLSHExample.java
new file mode 100644
index 0000000000000..9dbbf6d117246
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaMinHashLSHExample.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.feature.MinHashLSH;
+import org.apache.spark.ml.feature.MinHashLSHModel;
+import org.apache.spark.ml.linalg.VectorUDT;
+import org.apache.spark.ml.linalg.Vectors;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off$
+
+public class JavaMinHashLSHExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaMinHashLSHExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+      RowFactory.create(0, Vectors.sparse(6, new int[]{0, 1, 2}, new double[]{1.0, 1.0, 1.0})),
+      RowFactory.create(1, Vectors.sparse(6, new int[]{2, 3, 4}, new double[]{1.0, 1.0, 1.0})),
+      RowFactory.create(2, Vectors.sparse(6, new int[]{0, 2, 4}, new double[]{1.0, 1.0, 1.0}))
+    );
+
+    StructType schema = new StructType(new StructField[]{
+      new StructField("id", DataTypes.IntegerType, false, Metadata.empty()),
+      new StructField("keys", new VectorUDT(), false, Metadata.empty())
+    });
+    Dataset<Row> dataFrame = spark.createDataFrame(data, schema);
+
+    MinHashLSH mh = new MinHashLSH()
+      .setNumHashTables(1)
+      .setInputCol("keys")
+      .setOutputCol("values");
+
+    MinHashLSHModel model = mh.fit(dataFrame);
+    model.transform(dataFrame).show();
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/BucketedRandomProjectionLSHExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/BucketedRandomProjectionLSHExample.scala
new file mode 100644
index 0000000000000..686cc39d3b9a5
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/BucketedRandomProjectionLSHExample.scala
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.BucketedRandomProjectionLSH
+import org.apache.spark.ml.linalg.Vectors
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object BucketedRandomProjectionLSHExample {
+  def main(args: Array[String]): Unit = {
+    // Creates a SparkSession
+    val spark = SparkSession
+      .builder
+      .appName("BucketedRandomProjectionLSHExample")
+      .getOrCreate()
+
+    // $example on$
+    val dfA = spark.createDataFrame(Seq(
+      (0, Vectors.dense(1.0, 1.0)),
+      (1, Vectors.dense(1.0, -1.0)),
+      (2, Vectors.dense(-1.0, -1.0)),
+      (3, Vectors.dense(-1.0, 1.0))
+    )).toDF("id", "keys")
+
+    val dfB = spark.createDataFrame(Seq(
+      (4, Vectors.dense(1.0, 0.0)),
+      (5, Vectors.dense(-1.0, 0.0)),
+      (6, Vectors.dense(0.0, 1.0)),
+      (7, Vectors.dense(0.0, -1.0))
+    )).toDF("id", "keys")
+
+    val key = Vectors.dense(1.0, 0.0)
+
+    val brp = new BucketedRandomProjectionLSH()
+      .setBucketLength(2.0)
+      .setNumHashTables(3)
+      .setInputCol("keys")
+      .setOutputCol("values")
+
+    val model = brp.fit(dfA)
+
+    // Feature Transformation
+    model.transform(dfA).show()
+    // Cache the transformed columns
+    val transformedA = model.transform(dfA).cache()
+    val transformedB = model.transform(dfB).cache()
+
+    // Approximate similarity join
+    model.approxSimilarityJoin(dfA, dfB, 1.5).show()
+    model.approxSimilarityJoin(transformedA, transformedB, 1.5).show()
+    // Self Join
+    model.approxSimilarityJoin(dfA, dfA, 2.5).filter("datasetA.id < datasetB.id").show()
+
+    // Approximate nearest neighbor search
+    model.approxNearestNeighbors(dfA, key, 2).show()
+    model.approxNearestNeighbors(transformedA, key, 2).show()
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/MinHashLSHExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/MinHashLSHExample.scala
new file mode 100644
index 0000000000000..f4fc3cf4118a6
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/MinHashLSHExample.scala
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.MinHashLSH
+import org.apache.spark.ml.linalg.Vectors
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object MinHashLSHExample {
+  def main(args: Array[String]): Unit = {
+    // Creates a SparkSession
+    val spark = SparkSession
+      .builder
+      .appName("MinHashLSHExample")
+      .getOrCreate()
+
+    // $example on$
+    val dfA = spark.createDataFrame(Seq(
+      (0, Vectors.sparse(6, Seq((0, 1.0), (1, 1.0), (2, 1.0)))),
+      (1, Vectors.sparse(6, Seq((2, 1.0), (3, 1.0), (4, 1.0)))),
+      (2, Vectors.sparse(6, Seq((0, 1.0), (2, 1.0), (4, 1.0))))
+    )).toDF("id", "keys")
+
+    val dfB = spark.createDataFrame(Seq(
+      (3, Vectors.sparse(6, Seq((1, 1.0), (3, 1.0), (5, 1.0)))),
+      (4, Vectors.sparse(6, Seq((2, 1.0), (3, 1.0), (5, 1.0)))),
+      (5, Vectors.sparse(6, Seq((1, 1.0), (2, 1.0), (4, 1.0))))
+    )).toDF("id", "keys")
+
+    val key = Vectors.sparse(6, Seq((1, 1.0), (3, 1.0)))
+
+    val mh = new MinHashLSH()
+      .setNumHashTables(3)
+      .setInputCol("keys")
+      .setOutputCol("values")
+
+    val model = mh.fit(dfA)
+
+    // Feature Transformation
+    model.transform(dfA).show()
+    // Cache the transformed columns
+    val transformedA = model.transform(dfA).cache()
+    val transformedB = model.transform(dfB).cache()
+
+    // Approximate similarity join
+    model.approxSimilarityJoin(dfA, dfB, 0.6).show()
+    model.approxSimilarityJoin(transformedA, transformedB, 0.6).show()
+    // Self Join
+    model.approxSimilarityJoin(dfA, dfA, 0.6).filter("datasetA.id < datasetB.id").show()
+
+    // Approximate nearest neighbor search
+    model.approxNearestNeighbors(dfA, key, 2).show()
+    model.approxNearestNeighbors(transformedA, key, 2).show()
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println

From 8145c82bc8e4c44e7b74695e2307bb837cde1207 Mon Sep 17 00:00:00 2001
From: Kapil Singh <kapsingh@adobe.com>
Date: Sun, 4 Dec 2016 17:16:40 +0800
Subject: [PATCH 0275/1204] [SPARK-18091][SQL] Deep if expressions cause
 Generated SpecificUnsafeProjection code to exceed JVM code size limit

## What changes were proposed in this pull request?

Fix for SPARK-18091 which is a bug related to large if expressions causing generated SpecificUnsafeProjection code to exceed JVM code size limit.

This PR changes if expression's code generation to place its predicate, true value and false value expressions' generated code in separate methods in context so as to never generate too long combined code.
## How was this patch tested?

Added a unit test and also tested manually with the application (having transformations similar to the unit test) which caused the issue to be identified in the first place.

Author: Kapil Singh <kapsingh@adobe.com>

Closes #15620 from kapilsingh5050/SPARK-18091-IfCodegenFix.

(cherry picked from commit e463678b194e08be4a8bc9d1d45461d6c77a15ee)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../expressions/conditionalExpressions.scala  | 82 ++++++++++++++++---
 .../expressions/CodeGenerationSuite.scala     | 21 +++++
 2 files changed, 90 insertions(+), 13 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
index afc190e6978d4..bacedec1ae203 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
@@ -64,19 +64,75 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi
     val trueEval = trueValue.genCode(ctx)
     val falseEval = falseValue.genCode(ctx)
 
-    ev.copy(code = s"""
-      ${condEval.code}
-      boolean ${ev.isNull} = false;
-      ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
-      if (!${condEval.isNull} && ${condEval.value}) {
-        ${trueEval.code}
-        ${ev.isNull} = ${trueEval.isNull};
-        ${ev.value} = ${trueEval.value};
-      } else {
-        ${falseEval.code}
-        ${ev.isNull} = ${falseEval.isNull};
-        ${ev.value} = ${falseEval.value};
-      }""")
+    // place generated code of condition, true value and false value in separate methods if
+    // their code combined is large
+    val combinedLength = condEval.code.length + trueEval.code.length + falseEval.code.length
+    val generatedCode = if (combinedLength > 1024 &&
+      // Split these expressions only if they are created from a row object
+      (ctx.INPUT_ROW != null && ctx.currentVars == null)) {
+
+      val (condFuncName, condGlobalIsNull, condGlobalValue) =
+        createAndAddFunction(ctx, condEval, predicate.dataType, "evalIfCondExpr")
+      val (trueFuncName, trueGlobalIsNull, trueGlobalValue) =
+        createAndAddFunction(ctx, trueEval, trueValue.dataType, "evalIfTrueExpr")
+      val (falseFuncName, falseGlobalIsNull, falseGlobalValue) =
+        createAndAddFunction(ctx, falseEval, falseValue.dataType, "evalIfFalseExpr")
+      s"""
+        $condFuncName(${ctx.INPUT_ROW});
+        boolean ${ev.isNull} = false;
+        ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
+        if (!$condGlobalIsNull && $condGlobalValue) {
+          $trueFuncName(${ctx.INPUT_ROW});
+          ${ev.isNull} = $trueGlobalIsNull;
+          ${ev.value} = $trueGlobalValue;
+        } else {
+          $falseFuncName(${ctx.INPUT_ROW});
+          ${ev.isNull} = $falseGlobalIsNull;
+          ${ev.value} = $falseGlobalValue;
+        }
+      """
+    }
+    else {
+      s"""
+        ${condEval.code}
+        boolean ${ev.isNull} = false;
+        ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
+        if (!${condEval.isNull} && ${condEval.value}) {
+          ${trueEval.code}
+          ${ev.isNull} = ${trueEval.isNull};
+          ${ev.value} = ${trueEval.value};
+        } else {
+          ${falseEval.code}
+          ${ev.isNull} = ${falseEval.isNull};
+          ${ev.value} = ${falseEval.value};
+        }
+      """
+    }
+
+    ev.copy(code = generatedCode)
+  }
+
+  private def createAndAddFunction(
+      ctx: CodegenContext,
+      ev: ExprCode,
+      dataType: DataType,
+      baseFuncName: String): (String, String, String) = {
+    val globalIsNull = ctx.freshName("isNull")
+    ctx.addMutableState("boolean", globalIsNull, s"$globalIsNull = false;")
+    val globalValue = ctx.freshName("value")
+    ctx.addMutableState(ctx.javaType(dataType), globalValue,
+      s"$globalValue = ${ctx.defaultValue(dataType)};")
+    val funcName = ctx.freshName(baseFuncName)
+    val funcBody =
+      s"""
+         |private void $funcName(InternalRow ${ctx.INPUT_ROW}) {
+         |  ${ev.code.trim}
+         |  $globalIsNull = ${ev.isNull};
+         |  $globalValue = ${ev.value};
+         |}
+         """.stripMargin
+    ctx.addNewFunction(funcName, funcBody)
+    (funcName, globalIsNull, globalValue)
   }
 
   override def toString: String = s"if ($predicate) $trueValue else $falseValue"
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
index 0cb201e4dae3e..0f4b4b5bc8dd6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
@@ -97,6 +97,27 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
     assert(actual(0) == cases)
   }
 
+  test("SPARK-18091: split large if expressions into blocks due to JVM code size limit") {
+    val inStr = "StringForTesting"
+    val row = create_row(inStr)
+    val inputStrAttr = 'a.string.at(0)
+
+    var strExpr: Expression = inputStrAttr
+    for (_ <- 1 to 13) {
+      strExpr = If(EqualTo(Decode(Encode(strExpr, "utf-8"), "utf-8"), inputStrAttr),
+        strExpr, strExpr)
+    }
+
+    val expressions = Seq(strExpr)
+    val plan = GenerateUnsafeProjection.generate(expressions, true)
+    val actual = plan(row).toSeq(expressions.map(_.dataType))
+    val expected = Seq(UTF8String.fromString(inStr))
+
+    if (!checkResult(actual, expected)) {
+      fail(s"Incorrect Evaluation: expressions: $expressions, actual: $actual, expected: $expected")
+    }
+  }
+
   test("SPARK-14793: split wide array creation into blocks due to JVM code size limit") {
     val length = 5000
     val expressions = Seq(CreateArray(List.fill(length)(EqualTo(Literal(1), Literal(1)))))

From 41d698ecead46979e9a77b21e6a9c8f27cff63ac Mon Sep 17 00:00:00 2001
From: Eric Liang <ekl@databricks.com>
Date: Sun, 4 Dec 2016 20:44:04 +0800
Subject: [PATCH 0276/1204] [SPARK-18661][SQL] Creating a partitioned
 datasource table should not scan all files for table

## What changes were proposed in this pull request?

Even though in 2.1 creating a partitioned datasource table will not populate the partition data by default (until the user issues MSCK REPAIR TABLE), it seems we still scan the filesystem for no good reason.

We should avoid doing this when the user specifies a schema.

## How was this patch tested?

Perf stat tests.

Author: Eric Liang <ekl@databricks.com>

Closes #16090 from ericl/spark-18661.

(cherry picked from commit d9eb4c7215f26dd05527c0b9980af35087ab9d64)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../command/createDataSourceTables.scala      | 10 +++-
 .../execution/datasources/DataSource.scala    |  2 +-
 .../sql/execution/command/DDLSuite.scala      | 11 +++-
 .../hive/PartitionedTablePerfStatsSuite.scala | 51 +++++++++++++++++--
 4 files changed, 66 insertions(+), 8 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
index 422700c89194a..193a2a2cdc170 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
@@ -58,13 +58,21 @@ case class CreateDataSourceTableCommand(table: CatalogTable, ignoreIfExists: Boo
     // Create the relation to validate the arguments before writing the metadata to the metastore,
     // and infer the table schema and partition if users didn't specify schema in CREATE TABLE.
     val pathOption = table.storage.locationUri.map("path" -> _)
+    // Fill in some default table options from the session conf
+    val tableWithDefaultOptions = table.copy(
+      identifier = table.identifier.copy(
+        database = Some(
+          table.identifier.database.getOrElse(sessionState.catalog.getCurrentDatabase))),
+      tracksPartitionsInCatalog = sparkSession.sessionState.conf.manageFilesourcePartitions)
     val dataSource: BaseRelation =
       DataSource(
         sparkSession = sparkSession,
         userSpecifiedSchema = if (table.schema.isEmpty) None else Some(table.schema),
+        partitionColumns = table.partitionColumnNames,
         className = table.provider.get,
         bucketSpec = table.bucketSpec,
-        options = table.storage.properties ++ pathOption).resolveRelation()
+        options = table.storage.properties ++ pathOption,
+        catalogTable = Some(tableWithDefaultOptions)).resolveRelation()
 
     dataSource match {
       case fs: HadoopFsRelation =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index ccfc759c8fa7e..f47eb84df0288 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -132,7 +132,7 @@ case class DataSource(
       }.toArray
       new InMemoryFileIndex(sparkSession, globbedPaths, options, None)
     }
-    val partitionSchema = if (partitionColumns.isEmpty && catalogTable.isEmpty) {
+    val partitionSchema = if (partitionColumns.isEmpty) {
       // Try to infer partitioning, because no DataSource in the read path provides the partitioning
       // columns properly unless it is a Hive DataSource
       val resolved = tempFileIndex.partitionSchema.map { partitionField =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 10843e9ba5753..6593fa479d66b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -312,7 +312,13 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
           pathToNonPartitionedTable,
           userSpecifiedSchema = Option("num int, str string"),
           userSpecifiedPartitionCols = partitionCols,
-          expectedSchema = new StructType().add("num", IntegerType).add("str", StringType),
+          expectedSchema = if (partitionCols.isDefined) {
+            // we skipped inference, so the partition col is ordered at the end
+            new StructType().add("str", StringType).add("num", IntegerType)
+          } else {
+            // no inferred partitioning, so schema is in original order
+            new StructType().add("num", IntegerType).add("str", StringType)
+          },
           expectedPartitionCols = partitionCols.map(Seq(_)).getOrElse(Seq.empty[String]))
       }
     }
@@ -565,7 +571,8 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
       val table = catalog.getTableMetadata(TableIdentifier("tbl"))
       assert(table.tableType == CatalogTableType.MANAGED)
       assert(table.provider == Some("parquet"))
-      assert(table.schema == new StructType().add("a", IntegerType).add("b", IntegerType))
+      // a is ordered last since it is a user-specified partitioning column
+      assert(table.schema == new StructType().add("b", IntegerType).add("a", IntegerType))
       assert(table.partitionColumnNames == Seq("a"))
     }
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
index 9838b9a4eba3d..65c02d473b79d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
@@ -60,36 +60,52 @@ class PartitionedTablePerfStatsSuite
     setupPartitionedHiveTable(tableName, dir, 5)
   }
 
-  private def setupPartitionedHiveTable(tableName: String, dir: File, scale: Int): Unit = {
+  private def setupPartitionedHiveTable(
+      tableName: String, dir: File, scale: Int,
+      clearMetricsBeforeCreate: Boolean = false, repair: Boolean = true): Unit = {
     spark.range(scale).selectExpr("id as fieldOne", "id as partCol1", "id as partCol2").write
       .partitionBy("partCol1", "partCol2")
       .mode("overwrite")
       .parquet(dir.getAbsolutePath)
 
+    if (clearMetricsBeforeCreate) {
+      HiveCatalogMetrics.reset()
+    }
+
     spark.sql(s"""
       |create external table $tableName (fieldOne long)
       |partitioned by (partCol1 int, partCol2 int)
       |stored as parquet
       |location "${dir.getAbsolutePath}"""".stripMargin)
-    spark.sql(s"msck repair table $tableName")
+    if (repair) {
+      spark.sql(s"msck repair table $tableName")
+    }
   }
 
   private def setupPartitionedDatasourceTable(tableName: String, dir: File): Unit = {
     setupPartitionedDatasourceTable(tableName, dir, 5)
   }
 
-  private def setupPartitionedDatasourceTable(tableName: String, dir: File, scale: Int): Unit = {
+  private def setupPartitionedDatasourceTable(
+      tableName: String, dir: File, scale: Int,
+      clearMetricsBeforeCreate: Boolean = false, repair: Boolean = true): Unit = {
     spark.range(scale).selectExpr("id as fieldOne", "id as partCol1", "id as partCol2").write
       .partitionBy("partCol1", "partCol2")
       .mode("overwrite")
       .parquet(dir.getAbsolutePath)
 
+    if (clearMetricsBeforeCreate) {
+      HiveCatalogMetrics.reset()
+    }
+
     spark.sql(s"""
       |create table $tableName (fieldOne long, partCol1 int, partCol2 int)
       |using parquet
       |options (path "${dir.getAbsolutePath}")
       |partitioned by (partCol1, partCol2)""".stripMargin)
-    spark.sql(s"msck repair table $tableName")
+    if (repair) {
+      spark.sql(s"msck repair table $tableName")
+    }
   }
 
   genericTest("partitioned pruned table reports only selected files") { spec =>
@@ -250,6 +266,33 @@ class PartitionedTablePerfStatsSuite
     }
   }
 
+  test("datasource table: table setup does not scan filesystem") {
+    withSQLConf(SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key -> "true") {
+      withTable("test") {
+        withTempDir { dir =>
+          setupPartitionedDatasourceTable(
+            "test", dir, scale = 10, clearMetricsBeforeCreate = true, repair = false)
+          assert(HiveCatalogMetrics.METRIC_FILES_DISCOVERED.getCount() == 0)
+          assert(HiveCatalogMetrics.METRIC_FILE_CACHE_HITS.getCount() == 0)
+        }
+      }
+    }
+  }
+
+  test("hive table: table setup does not scan filesystem") {
+    withSQLConf(SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key -> "true") {
+      withTable("test") {
+        withTempDir { dir =>
+          HiveCatalogMetrics.reset()
+          setupPartitionedHiveTable(
+            "test", dir, scale = 10, clearMetricsBeforeCreate = true, repair = false)
+          assert(HiveCatalogMetrics.METRIC_FILES_DISCOVERED.getCount() == 0)
+          assert(HiveCatalogMetrics.METRIC_FILE_CACHE_HITS.getCount() == 0)
+        }
+      }
+    }
+  }
+
   test("hive table: num hive client calls does not scale with partition count") {
     withSQLConf(SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key -> "true") {
       withTable("test") {

From c13c2939fb19901d86ee013aa7bb5e200d79be85 Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Sun, 4 Dec 2016 20:25:11 -0800
Subject: [PATCH 0277/1204] [SPARK-18643][SPARKR] SparkR hangs at session start
 when installed as a package without Spark

## What changes were proposed in this pull request?

If SparkR is running as a package and it has previously downloaded Spark Jar it should be able to run as before without having to set SPARK_HOME. Basically with this bug the auto install Spark will only work in the first session.

This seems to be a regression on the earlier behavior.

Fix is to always try to install or check for the cached Spark if running in an interactive session.
As discussed before, we should probably only install Spark iff running in an interactive session (R shell, RStudio etc)

## How was this patch tested?

Manually

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #16077 from felixcheung/rsessioninteractive.

(cherry picked from commit b019b3a8ac49336e657f5e093fa2fba77f8d12d2)
Signed-off-by: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
---
 R/pkg/R/sparkR.R                     | 5 ++++-
 R/pkg/vignettes/sparkr-vignettes.Rmd | 4 ++--
 docs/sparkr.md                       | 4 +++-
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index a7152b4313993..43bff97553c2f 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -322,6 +322,9 @@ sparkRHive.init <- function(jsc = NULL) {
 #' SparkSession or initializes a new SparkSession.
 #' Additional Spark properties can be set in \code{...}, and these named parameters take priority
 #' over values in \code{master}, \code{appName}, named lists of \code{sparkConfig}.
+#' When called in an interactive session, this checks for the Spark installation, and, if not
+#' found, it will be downloaded and cached automatically. Alternatively, \code{install.spark} can
+#' be called manually.
 #'
 #' For details on how to initialize and use SparkR, refer to SparkR programming guide at
 #' \url{http://spark.apache.org/docs/latest/sparkr.html#starting-up-sparksession}.
@@ -565,7 +568,7 @@ sparkCheckInstall <- function(sparkHome, master, deployMode) {
       message(msg)
       NULL
     } else {
-      if (isMasterLocal(master)) {
+      if (interactive() || isMasterLocal(master)) {
         msg <- paste0("Spark not found in SPARK_HOME: ", sparkHome)
         message(msg)
         packageLocalDir <- install.spark()
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index 73a5e26a3ba9c..a36f8fc0c1455 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -94,13 +94,13 @@ sparkR.session.stop()
 
 Different from many other R packages, to use SparkR, you need an additional installation of Apache Spark. The Spark installation will be used to run a backend process that will compile and execute SparkR programs.
 
-If you don't have Spark installed on the computer, you may download it from [Apache Spark Website](http://spark.apache.org/downloads.html). Alternatively, we provide an easy-to-use function `install.spark` to complete this process. You don't have to call it explicitly. We will check the installation when `sparkR.session` is called and `install.spark` function will be  triggered automatically if no installation is found.
+After installing the SparkR package, you can call `sparkR.session` as explained in the previous section to start and it will check for the Spark installation. If you are working with SparkR from an interactive shell (eg. R, RStudio) then Spark is downloaded and cached automatically if it is not found. Alternatively, we provide an easy-to-use function `install.spark` for running this manually. If you don't have Spark installed on the computer, you may download it from [Apache Spark Website](http://spark.apache.org/downloads.html).
 
 ```{r, eval=FALSE}
 install.spark()
 ```
 
-If you already have Spark installed, you don't have to install again and can pass the `sparkHome` argument to `sparkR.session` to let SparkR know where the Spark installation is.
+If you already have Spark installed, you don't have to install again and can pass the `sparkHome` argument to `sparkR.session` to let SparkR know where the existing Spark installation is.
 
 ```{r, eval=FALSE}
 sparkR.session(sparkHome = "/HOME/spark")
diff --git a/docs/sparkr.md b/docs/sparkr.md
index d26949226b117..60cd01a9fea71 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -40,7 +40,9 @@ sparkR.session()
 You can also start SparkR from RStudio. You can connect your R program to a Spark cluster from
 RStudio, R shell, Rscript or other R IDEs. To start, make sure SPARK_HOME is set in environment
 (you can check [Sys.getenv](https://stat.ethz.ch/R-manual/R-devel/library/base/html/Sys.getenv.html)),
-load the SparkR package, and call `sparkR.session` as below. In addition to calling `sparkR.session`,
+load the SparkR package, and call `sparkR.session` as below. It will check for the Spark installation, and, if not found, it will be downloaded and cached automatically. Alternatively, you can also run `install.spark` manually.
+
+In addition to calling `sparkR.session`,
  you could also specify certain Spark driver properties. Normally these
 [Application properties](configuration.html#application-properties) and
 [Runtime Environment](configuration.html#runtime-environment) cannot be set programmatically, as the

From 88e07efe86512142eeada6a6f1f7fe858204c59b Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Mon, 5 Dec 2016 00:32:58 -0800
Subject: [PATCH 0278/1204] [SPARK-18625][ML] OneVsRestModel should support
 setFeaturesCol and setPredictionCol

## What changes were proposed in this pull request?
add `setFeaturesCol` and `setPredictionCol` for `OneVsRestModel`

## How was this patch tested?
added tests

Author: Zheng RuiFeng <ruifengz@foxmail.com>

Closes #16059 from zhengruifeng/ovrm_setCol.

(cherry picked from commit bdfe7f67468ecfd9927a1fec60d6605dd05ebe3f)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 .../apache/spark/ml/classification/OneVsRest.scala |  9 +++++++++
 .../spark/ml/classification/OneVsRestSuite.scala   | 14 +++++++++++++-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
index f4ab0a074c420..e58b30d66588c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
@@ -140,6 +140,14 @@ final class OneVsRestModel private[ml] (
     this(uid, Metadata.empty, models.asScala.toArray)
   }
 
+  /** @group setParam */
+  @Since("2.1.0")
+  def setFeaturesCol(value: String): this.type = set(featuresCol, value)
+
+  /** @group setParam */
+  @Since("2.1.0")
+  def setPredictionCol(value: String): this.type = set(predictionCol, value)
+
   @Since("1.4.0")
   override def transformSchema(schema: StructType): StructType = {
     validateAndTransformSchema(schema, fitting = false, getClassifier.featuresDataType)
@@ -175,6 +183,7 @@ final class OneVsRestModel private[ml] (
         val updateUDF = udf { (predictions: Map[Int, Double], prediction: Vector) =>
           predictions + ((index, prediction(1)))
         }
+        model.setFeaturesCol($(featuresCol))
         val transformedDataset = model.transform(df).select(columns: _*)
         val updatedDataset = transformedDataset
           .withColumn(tmpColName, updateUDF(col(accColName), col(rawPredictionCol)))
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
index 3f9bcec427399..aacb7921b835f 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
@@ -22,7 +22,7 @@ import org.apache.spark.ml.attribute.NominalAttribute
 import org.apache.spark.ml.classification.LogisticRegressionSuite._
 import org.apache.spark.ml.feature.LabeledPoint
 import org.apache.spark.ml.feature.StringIndexer
-import org.apache.spark.ml.linalg.{DenseMatrix, Vectors}
+import org.apache.spark.ml.linalg.Vectors
 import org.apache.spark.ml.param.{ParamMap, ParamsSuite}
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MetadataUtils, MLTestingUtils}
 import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
@@ -33,6 +33,7 @@ import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.util.TestingUtils._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.Dataset
+import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.Metadata
 
 class OneVsRestSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
@@ -136,6 +137,17 @@ class OneVsRestSuite extends SparkFunSuite with MLlibTestSparkContext with Defau
     assert(outputFields.contains("p"))
   }
 
+  test("SPARK-18625 : OneVsRestModel should support setFeaturesCol and setPredictionCol") {
+    val ova = new OneVsRest().setClassifier(new LogisticRegression)
+    val ovaModel = ova.fit(dataset)
+    val dataset2 = dataset.select(col("label").as("y"), col("features").as("fea"))
+    ovaModel.setFeaturesCol("fea")
+    ovaModel.setPredictionCol("pred")
+    val transformedDataset = ovaModel.transform(dataset2)
+    val outputFields = transformedDataset.schema.fieldNames.toSet
+    assert(outputFields === Set("y", "fea", "pred"))
+  }
+
   test("SPARK-8049: OneVsRest shouldn't output temp columns") {
     val logReg = new LogisticRegression()
       .setMaxIter(1)

From 1821cbead1875fbe1c16d7c50563aa0839e1f70f Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Mon, 5 Dec 2016 00:39:44 -0800
Subject: [PATCH 0279/1204] [SPARK-18279][DOC][ML][SPARKR] Add R examples to ML
 programming guide.

## What changes were proposed in this pull request?
Add R examples to ML programming guide for the following algorithms as POC:
* spark.glm
* spark.survreg
* spark.naiveBayes
* spark.kmeans

The four algorithms were added to SparkR since 2.0.0, more docs for algorithms added during 2.1 release cycle will be addressed in a separate follow-up PR.

## How was this patch tested?
This is the screenshots of generated ML programming guide for ```GeneralizedLinearRegression```:
![image](https://cloud.githubusercontent.com/assets/1962026/20866403/babad856-b9e1-11e6-9984-62747801e8c4.png)

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #16136 from yanboliang/spark-18279.

(cherry picked from commit eb8dd68132998aa00902dfeb935db1358781e1c1)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 docs/ml-classification-regression.md | 22 ++++++++++++++++++++++
 docs/ml-clustering.md                |  8 ++++++++
 2 files changed, 30 insertions(+)

diff --git a/docs/ml-classification-regression.md b/docs/ml-classification-regression.md
index c72c01fcff830..5148ad02d93aa 100644
--- a/docs/ml-classification-regression.md
+++ b/docs/ml-classification-regression.md
@@ -389,6 +389,14 @@ Refer to the [Python API docs](api/python/pyspark.ml.html#pyspark.ml.classificat
 
 {% include_example python/ml/naive_bayes_example.py %}
 </div>
+
+<div data-lang="r" markdown="1">
+
+Refer to the [R API docs](api/R/spark.naiveBayes.html) for more details.
+
+{% include_example naiveBayes r/ml.R %}
+</div>
+
 </div>
 
 
@@ -566,6 +574,13 @@ Refer to the [Python API docs](api/python/pyspark.ml.html#pyspark.ml.regression.
 {% include_example python/ml/generalized_linear_regression_example.py %}
 </div>
 
+<div data-lang="r" markdown="1">
+
+Refer to the [R API docs](api/R/spark.glm.html) for more details.
+
+{% include_example glm r/ml.R %}
+</div>
+
 </div>
 
 
@@ -755,6 +770,13 @@ Refer to the [Python API docs](api/python/pyspark.ml.html#pyspark.ml.regression.
 {% include_example python/ml/aft_survival_regression.py %}
 </div>
 
+<div data-lang="r" markdown="1">
+
+Refer to the [R API docs](api/R/spark.survreg.html) for more details.
+
+{% include_example survreg r/ml.R %}
+</div>
+
 </div>
 
 
diff --git a/docs/ml-clustering.md b/docs/ml-clustering.md
index 8a0a61cb595e7..4731abc7dcdd6 100644
--- a/docs/ml-clustering.md
+++ b/docs/ml-clustering.md
@@ -86,6 +86,14 @@ Refer to the [Python API docs](api/python/pyspark.ml.html#pyspark.ml.clustering.
 
 {% include_example python/ml/kmeans_example.py %}
 </div>
+
+<div data-lang="r" markdown="1">
+
+Refer to the [R API docs](api/R/spark.kmeans.html) for more details.
+
+{% include_example kmeans r/ml.R %}
+</div>
+
 </div>
 
 ## Latent Dirichlet allocation (LDA)

From afd2321b689fb29d18fee1840f5a0058cefd6d60 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 5 Dec 2016 10:36:13 -0800
Subject: [PATCH 0280/1204] [MINOR][DOC] Use SparkR `TRUE` value and add
 default values for `StructField` in SQL Guide.

## What changes were proposed in this pull request?

In `SQL Programming Guide`, this PR uses `TRUE` instead of `True` in SparkR and adds default values of `nullable` for `StructField` in Scala/Python/R (i.e., "Note: The default value of nullable is true."). In Java API, `nullable` is not optional.

**BEFORE**
* SPARK 2.1.0 RC1
http://people.apache.org/~pwendell/spark-releases/spark-2.1.0-rc1-docs/sql-programming-guide.html#data-types

**AFTER**

* R
<img width="916" alt="screen shot 2016-12-04 at 11 58 19 pm" src="https://cloud.githubusercontent.com/assets/9700541/20877443/abba19a6-ba7d-11e6-8984-afbe00333fb0.png">

* Scala
<img width="914" alt="screen shot 2016-12-04 at 11 57 37 pm" src="https://cloud.githubusercontent.com/assets/9700541/20877433/99ce734a-ba7d-11e6-8bb5-e8619041b09b.png">

* Python
<img width="914" alt="screen shot 2016-12-04 at 11 58 04 pm" src="https://cloud.githubusercontent.com/assets/9700541/20877440/a5c89338-ba7d-11e6-8f92-6c0ae9388d7e.png">

## How was this patch tested?

Manual.

```
cd docs
SKIP_API=1 jekyll build
open _site/index.html
```

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #16141 from dongjoon-hyun/SPARK-SQL-GUIDE.

(cherry picked from commit 410b7898661f77e748564aaee6a5ab7747ce34ad)
Signed-off-by: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
---
 docs/sql-programming-guide.md | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 51ba91130e91f..d57f22eca460e 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -1840,7 +1840,8 @@ You can access them by doing
   <td> The value type in Scala of the data type of this field
   (For example, Int for a StructField with the data type IntegerType) </td>
   <td>
-  StructField(<i>name</i>, <i>dataType</i>, <i>nullable</i>)
+  StructField(<i>name</i>, <i>dataType</i>, [<i>nullable</i>])<br />
+  <b>Note:</b> The default value of <i>nullable</i> is <i>true</i>.
   </td>
 </tr>
 </table>
@@ -2128,7 +2129,8 @@ from pyspark.sql.types import *
   <td> The value type in Python of the data type of this field
   (For example, Int for a StructField with the data type IntegerType) </td>
   <td>
-  StructField(<i>name</i>, <i>dataType</i>, <i>nullable</i>)
+  StructField(<i>name</i>, <i>dataType</i>, [<i>nullable</i>])<br />
+  <b>Note:</b> The default value of <i>nullable</i> is <i>True</i>.
   </td>
 </tr>
 </table>
@@ -2249,7 +2251,7 @@ from pyspark.sql.types import *
   <td> vector or list </td>
   <td>
   list(type="array", elementType=<i>elementType</i>, containsNull=[<i>containsNull</i>])<br />
-  <b>Note:</b> The default value of <i>containsNull</i> is <i>True</i>.
+  <b>Note:</b> The default value of <i>containsNull</i> is <i>TRUE</i>.
   </td>
 </tr>
 <tr>
@@ -2257,7 +2259,7 @@ from pyspark.sql.types import *
   <td> environment </td>
   <td>
   list(type="map", keyType=<i>keyType</i>, valueType=<i>valueType</i>, valueContainsNull=[<i>valueContainsNull</i>])<br />
-  <b>Note:</b> The default value of <i>valueContainsNull</i> is <i>True</i>.
+  <b>Note:</b> The default value of <i>valueContainsNull</i> is <i>TRUE</i>.
   </td>
 </tr>
 <tr>
@@ -2274,7 +2276,8 @@ from pyspark.sql.types import *
   <td> The value type in R of the data type of this field
   (For example, integer for a StructField with the data type IntegerType) </td>
   <td>
-  list(name=<i>name</i>, type=<i>dataType</i>, nullable=<i>nullable</i>)
+  list(name=<i>name</i>, type=<i>dataType</i>, nullable=[<i>nullable</i>])<br />
+  <b>Note:</b> The default value of <i>nullable</i> is <i>TRUE</i>.
   </td>
 </tr>
 </table>

From 30c074308f723f95823b43fbc54e2e4742d52840 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Mon, 5 Dec 2016 10:49:22 -0800
Subject: [PATCH 0281/1204] Revert "[SPARK-18284][SQL] Make
 ExpressionEncoder.serializer.nullable precise"

This reverts commit fce1be6cc81b1fe3991a4df91128f4fcd14ff615 from branch-2.1.
---
 .../sql/catalyst/JavaTypeInference.scala      |  4 +-
 .../spark/sql/catalyst/ScalaReflection.scala  |  7 +--
 .../catalyst/encoders/ExpressionEncoder.scala |  7 ++-
 .../expressions/ReferenceToExpressions.scala  |  2 +-
 .../expressions/objects/objects.scala         | 24 ++++-----
 .../encoders/ExpressionEncoderSuite.scala     | 19 +------
 .../org/apache/spark/sql/DatasetSuite.scala   | 52 +------------------
 .../sql/streaming/FileStreamSinkSuite.scala   |  2 +-
 8 files changed, 21 insertions(+), 96 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
index 7e8e4dab72145..04f0cfce883f2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
@@ -396,14 +396,12 @@ object JavaTypeInference {
 
         case _ if mapType.isAssignableFrom(typeToken) =>
           val (keyType, valueType) = mapKeyValueType(typeToken)
-
           ExternalMapToCatalyst(
             inputObject,
             ObjectType(keyType.getRawType),
             serializerFor(_, keyType),
             ObjectType(valueType.getRawType),
-            serializerFor(_, valueType),
-            valueNullable = true
+            serializerFor(_, valueType)
           )
 
         case other =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index 6e20096901d99..0aa21b9347a9d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -498,8 +498,7 @@ object ScalaReflection extends ScalaReflection {
           dataTypeFor(keyType),
           serializerFor(_, keyType, keyPath),
           dataTypeFor(valueType),
-          serializerFor(_, valueType, valuePath),
-          valueNullable = !valueType.typeSymbol.asClass.isPrimitive)
+          serializerFor(_, valueType, valuePath))
 
       case t if t <:< localTypeOf[String] =>
         StaticInvoke(
@@ -591,9 +590,7 @@ object ScalaReflection extends ScalaReflection {
               "cannot be used as field name\n" + walkedTypePath.mkString("\n"))
           }
 
-          val fieldValue = Invoke(
-            AssertNotNull(inputObject, walkedTypePath), fieldName, dataTypeFor(fieldType),
-            returnNullable = !fieldType.typeSymbol.asClass.isPrimitive)
+          val fieldValue = Invoke(inputObject, fieldName, dataTypeFor(fieldType))
           val clsName = getClassNameFromType(fieldType)
           val newPath = s"""- field (class: "$clsName", name: "$fieldName")""" +: walkedTypePath
           expressions.Literal(fieldName) :: serializerFor(fieldValue, fieldType, newPath) :: Nil
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
index 3757eccfa2dd8..9c4818db6333b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
@@ -60,7 +60,7 @@ object ExpressionEncoder {
     val cls = mirror.runtimeClass(tpe)
     val flat = !ScalaReflection.definedByConstructorParams(tpe)
 
-    val inputObject = BoundReference(0, ScalaReflection.dataTypeFor[T], nullable = !cls.isPrimitive)
+    val inputObject = BoundReference(0, ScalaReflection.dataTypeFor[T], nullable = true)
     val nullSafeInput = if (flat) {
       inputObject
     } else {
@@ -71,7 +71,10 @@ object ExpressionEncoder {
     val serializer = ScalaReflection.serializerFor[T](nullSafeInput)
     val deserializer = ScalaReflection.deserializerFor[T]
 
-    val schema = serializer.dataType
+    val schema = ScalaReflection.schemaFor[T] match {
+      case ScalaReflection.Schema(s: StructType, _) => s
+      case ScalaReflection.Schema(dt, nullable) => new StructType().add("value", dt, nullable)
+    }
 
     new ExpressionEncoder[T](
       schema,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ReferenceToExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ReferenceToExpressions.scala
index 2ca77e8394e17..6c75a7a50214f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ReferenceToExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ReferenceToExpressions.scala
@@ -74,7 +74,7 @@ case class ReferenceToExpressions(result: Expression, children: Seq[Expression])
         ctx.addMutableState("boolean", classChildVarIsNull, "")
 
         val classChildVar =
-          LambdaVariable(classChildVarName, classChildVarIsNull, child.dataType, child.nullable)
+          LambdaVariable(classChildVarName, classChildVarIsNull, child.dataType)
 
         val initCode = s"${classChildVar.value} = ${childGen.value};\n" +
           s"${classChildVar.isNull} = ${childGen.isNull};"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index a8aa1e725524a..e517ec18eb540 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -171,18 +171,15 @@ case class StaticInvoke(
  * @param arguments An optional list of expressions, whos evaluation will be passed to the function.
  * @param propagateNull When true, and any of the arguments is null, null will be returned instead
  *                      of calling the function.
- * @param returnNullable When false, indicating the invoked method will always return
- *                       non-null value.
  */
 case class Invoke(
     targetObject: Expression,
     functionName: String,
     dataType: DataType,
     arguments: Seq[Expression] = Nil,
-    propagateNull: Boolean = true,
-    returnNullable : Boolean = true) extends InvokeLike {
+    propagateNull: Boolean = true) extends InvokeLike {
 
-  override def nullable: Boolean = targetObject.nullable || needNullCheck || returnNullable
+  override def nullable: Boolean = true
   override def children: Seq[Expression] = targetObject +: arguments
 
   override def eval(input: InternalRow): Any =
@@ -408,15 +405,13 @@ case class WrapOption(child: Expression, optType: DataType)
  * A place holder for the loop variable used in [[MapObjects]].  This should never be constructed
  * manually, but will instead be passed into the provided lambda function.
  */
-case class LambdaVariable(
-    value: String,
-    isNull: String,
-    dataType: DataType,
-    nullable: Boolean = true) extends LeafExpression
+case class LambdaVariable(value: String, isNull: String, dataType: DataType) extends LeafExpression
   with Unevaluable with NonSQLExpression {
 
+  override def nullable: Boolean = true
+
   override def genCode(ctx: CodegenContext): ExprCode = {
-    ExprCode(code = "", value = value, isNull = if (nullable) isNull else "false")
+    ExprCode(code = "", value = value, isNull = isNull)
   }
 }
 
@@ -597,8 +592,7 @@ object ExternalMapToCatalyst {
       keyType: DataType,
       keyConverter: Expression => Expression,
       valueType: DataType,
-      valueConverter: Expression => Expression,
-      valueNullable: Boolean): ExternalMapToCatalyst = {
+      valueConverter: Expression => Expression): ExternalMapToCatalyst = {
     val id = curId.getAndIncrement()
     val keyName = "ExternalMapToCatalyst_key" + id
     val valueName = "ExternalMapToCatalyst_value" + id
@@ -607,11 +601,11 @@ object ExternalMapToCatalyst {
     ExternalMapToCatalyst(
       keyName,
       keyType,
-      keyConverter(LambdaVariable(keyName, "false", keyType, false)),
+      keyConverter(LambdaVariable(keyName, "false", keyType)),
       valueName,
       valueIsNull,
       valueType,
-      valueConverter(LambdaVariable(valueName, valueIsNull, valueType, valueNullable)),
+      valueConverter(LambdaVariable(valueName, valueIsNull, valueType)),
       inputMap
     )
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
index 080f11b769388..4d896c2e38f10 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
@@ -24,7 +24,7 @@ import java.util.Arrays
 import scala.collection.mutable.ArrayBuffer
 import scala.reflect.runtime.universe.TypeTag
 
-import org.apache.spark.sql.{Encoder, Encoders}
+import org.apache.spark.sql.Encoders
 import org.apache.spark.sql.catalyst.{OptionalData, PrimitiveData}
 import org.apache.spark.sql.catalyst.analysis.AnalysisTest
 import org.apache.spark.sql.catalyst.dsl.plans._
@@ -300,11 +300,6 @@ class ExpressionEncoderSuite extends PlanTest with AnalysisTest {
   encodeDecodeTest(
     ReferenceValueClass(ReferenceValueClass.Container(1)), "reference value class")
 
-  encodeDecodeTest(Option(31), "option of int")
-  encodeDecodeTest(Option.empty[Int], "empty option of int")
-  encodeDecodeTest(Option("abc"), "option of string")
-  encodeDecodeTest(Option.empty[String], "empty option of string")
-
   productTest(("UDT", new ExamplePoint(0.1, 0.2)))
 
   test("nullable of encoder schema") {
@@ -343,18 +338,6 @@ class ExpressionEncoderSuite extends PlanTest with AnalysisTest {
     }
   }
 
-  test("nullable of encoder serializer") {
-    def checkNullable[T: Encoder](nullable: Boolean): Unit = {
-      assert(encoderFor[T].serializer.forall(_.nullable === nullable))
-    }
-
-    // test for flat encoders
-    checkNullable[Int](false)
-    checkNullable[Option[Int]](true)
-    checkNullable[java.lang.Integer](true)
-    checkNullable[String](true)
-  }
-
   test("null check for map key") {
     val encoder = ExpressionEncoder[Map[String, Int]]()
     val e = intercept[RuntimeException](encoder.toRow(Map(("a", 1), (null, 2))))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index d31c766cb779f..1174d7354f931 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -28,10 +28,7 @@ import org.apache.spark.sql.execution.streaming.MemoryStream
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
-import org.apache.spark.sql.types._
-
-case class TestDataPoint(x: Int, y: Double, s: String, t: TestDataPoint2)
-case class TestDataPoint2(x: Int, s: String)
+import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
 
 class DatasetSuite extends QueryTest with SharedSQLContext {
   import testImplicits._
@@ -972,53 +969,6 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
     assert(dataset.collect() sameElements Array(resultValue, resultValue))
   }
 
-  test("SPARK-18284: Serializer should have correct nullable value") {
-    val df1 = Seq(1, 2, 3, 4).toDF
-    assert(df1.schema(0).nullable == false)
-    val df2 = Seq(Integer.valueOf(1), Integer.valueOf(2)).toDF
-    assert(df2.schema(0).nullable == true)
-
-    val df3 = Seq(Seq(1, 2), Seq(3, 4)).toDF
-    assert(df3.schema(0).nullable == true)
-    assert(df3.schema(0).dataType.asInstanceOf[ArrayType].containsNull == false)
-    val df4 = Seq(Seq("a", "b"), Seq("c", "d")).toDF
-    assert(df4.schema(0).nullable == true)
-    assert(df4.schema(0).dataType.asInstanceOf[ArrayType].containsNull == true)
-
-    val df5 = Seq((0, 1.0), (2, 2.0)).toDF("id", "v")
-    assert(df5.schema(0).nullable == false)
-    assert(df5.schema(1).nullable == false)
-    val df6 = Seq((0, 1.0, "a"), (2, 2.0, "b")).toDF("id", "v1", "v2")
-    assert(df6.schema(0).nullable == false)
-    assert(df6.schema(1).nullable == false)
-    assert(df6.schema(2).nullable == true)
-
-    val df7 = (Tuple1(Array(1, 2, 3)) :: Nil).toDF("a")
-    assert(df7.schema(0).nullable == true)
-    assert(df7.schema(0).dataType.asInstanceOf[ArrayType].containsNull == false)
-
-    val df8 = (Tuple1(Array((null: Integer), (null: Integer))) :: Nil).toDF("a")
-    assert(df8.schema(0).nullable == true)
-    assert(df8.schema(0).dataType.asInstanceOf[ArrayType].containsNull == true)
-
-    val df9 = (Tuple1(Map(2 -> 3)) :: Nil).toDF("m")
-    assert(df9.schema(0).nullable == true)
-    assert(df9.schema(0).dataType.asInstanceOf[MapType].valueContainsNull == false)
-
-    val df10 = (Tuple1(Map(1 -> (null: Integer))) :: Nil).toDF("m")
-    assert(df10.schema(0).nullable == true)
-    assert(df10.schema(0).dataType.asInstanceOf[MapType].valueContainsNull == true)
-
-    val df11 = Seq(TestDataPoint(1, 2.2, "a", null),
-                   TestDataPoint(3, 4.4, "null", (TestDataPoint2(33, "b")))).toDF
-    assert(df11.schema(0).nullable == false)
-    assert(df11.schema(1).nullable == false)
-    assert(df11.schema(2).nullable == true)
-    assert(df11.schema(3).nullable == true)
-    assert(df11.schema(3).dataType.asInstanceOf[StructType].fields(0).nullable == false)
-    assert(df11.schema(3).dataType.asInstanceOf[StructType].fields(1).nullable == true)
-  }
-
   Seq(true, false).foreach { eager =>
     def testCheckpointing(testName: String)(f: => Unit): Unit = {
       test(s"Dataset.checkpoint() - $testName (eager = $eager)") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
index 54efae3fb4627..09613ef9e4348 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
@@ -86,7 +86,7 @@ class FileStreamSinkSuite extends StreamTest {
 
       val outputDf = spark.read.parquet(outputDir)
       val expectedSchema = new StructType()
-        .add(StructField("value", IntegerType, nullable = false))
+        .add(StructField("value", IntegerType))
         .add(StructField("id", IntegerType))
       assert(outputDf.schema === expectedSchema)
 

From e23c8cfc8e59508743fc69c82028831f95bc25d7 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 5 Dec 2016 11:37:13 -0800
Subject: [PATCH 0282/1204] [SPARK-18711][SQL] should disable subexpression
 elimination for LambdaVariable

## What changes were proposed in this pull request?

This is kind of a long-standing bug, it's hidden until https://github.com/apache/spark/pull/15780 , which may add `AssertNotNull` on top of `LambdaVariable` and thus enables subexpression elimination.

However, subexpression elimination will evaluate the common expressions at the beginning, which is invalid for `LambdaVariable`. `LambdaVariable` usually represents loop variable, which can't be evaluated ahead of the loop.

This PR skips expressions containing `LambdaVariable` when doing subexpression elimination.

## How was this patch tested?

updated test in `DatasetAggregatorSuite`

Author: Wenchen Fan <wenchen@databricks.com>

Closes #16143 from cloud-fan/aggregator.

(cherry picked from commit 01a7d33d0851d82fd1bb477a58d9925fe8d727d8)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../sql/catalyst/expressions/EquivalentExpressions.scala  | 6 +++++-
 .../org/apache/spark/sql/DatasetAggregatorSuite.scala     | 8 ++++----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
index b8e2b67b2fe9c..6c246a5663ca3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
 import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable
 
 /**
  * This class is used to compute equality of (sub)expression trees. Expressions can be added
@@ -72,7 +73,10 @@ class EquivalentExpressions {
       root: Expression,
       ignoreLeaf: Boolean = true,
       skipReferenceToExpressions: Boolean = true): Unit = {
-    val skip = root.isInstanceOf[LeafExpression] && ignoreLeaf
+    val skip = (root.isInstanceOf[LeafExpression] && ignoreLeaf) ||
+      // `LambdaVariable` is usually used as a loop variable, which can't be evaluated ahead of the
+      // loop. So we can't evaluate sub-expressions containing `LambdaVariable` at the beginning.
+      root.find(_.isInstanceOf[LambdaVariable]).isDefined
     // There are some special expressions that we should not recurse into children.
     //   1. CodegenFallback: it's children will not be used to generate code (call eval() instead)
     //   2. ReferenceToExpressions: it's kind of an explicit sub-expression elimination.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala
index 36b2651e5a9e8..0e7eaa9e88d57 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala
@@ -92,13 +92,13 @@ object NameAgg extends Aggregator[AggData, String, String] {
 }
 
 
-object SeqAgg extends Aggregator[AggData, Seq[Int], Seq[Int]] {
+object SeqAgg extends Aggregator[AggData, Seq[Int], Seq[(Int, Int)]] {
   def zero: Seq[Int] = Nil
   def reduce(b: Seq[Int], a: AggData): Seq[Int] = a.a +: b
   def merge(b1: Seq[Int], b2: Seq[Int]): Seq[Int] = b1 ++ b2
-  def finish(r: Seq[Int]): Seq[Int] = r
+  def finish(r: Seq[Int]): Seq[(Int, Int)] = r.map(i => i -> i)
   override def bufferEncoder: Encoder[Seq[Int]] = ExpressionEncoder()
-  override def outputEncoder: Encoder[Seq[Int]] = ExpressionEncoder()
+  override def outputEncoder: Encoder[Seq[(Int, Int)]] = ExpressionEncoder()
 }
 
 
@@ -281,7 +281,7 @@ class DatasetAggregatorSuite extends QueryTest with SharedSQLContext {
 
     checkDataset(
       ds.groupByKey(_.b).agg(SeqAgg.toColumn),
-      "a" -> Seq(1, 2)
+      "a" -> Seq(1 -> 1, 2 -> 2)
     )
   }
 

From 39759ff00ba4313a82834387eea53b1af7b7daaf Mon Sep 17 00:00:00 2001
From: Nicholas Chammas <nicholas.chammas@gmail.com>
Date: Mon, 5 Dec 2016 12:57:41 -0800
Subject: [PATCH 0283/1204] [DOCS][MINOR] Update location of Spark YARN shuffle
 jar

Looking at the distributions provided on spark.apache.org, I see that the Spark YARN shuffle jar is under `yarn/` and not `lib/`.

This change is so minor I'm not sure it needs a JIRA. But let me know if so and I'll create one.

Author: Nicholas Chammas <nicholas.chammas@gmail.com>

Closes #16130 from nchammas/yarn-doc-fix.

(cherry picked from commit 5a92dc76ab431d73275a2bdfbc2c0a8ceb0d75d1)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 docs/running-on-yarn.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 4d1fafc07b8fc..d4144c86e94cf 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -518,7 +518,7 @@ instructions:
 pre-packaged distribution.
 1. Locate the `spark-<version>-yarn-shuffle.jar`. This should be under
 `$SPARK_HOME/common/network-yarn/target/scala-<version>` if you are building Spark yourself, and under
-`lib` if you are using a distribution.
+`yarn` if you are using a distribution.
 1. Add this jar to the classpath of all `NodeManager`s in your cluster.
 1. In the `yarn-site.xml` on each node, add `spark_shuffle` to `yarn.nodemanager.aux-services`,
 then set `yarn.nodemanager.aux-services.spark_shuffle.class` to

From c6a4e3d96997bf166360524a95510b3490b68b49 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Mon, 5 Dec 2016 14:59:42 -0800
Subject: [PATCH 0284/1204] [SPARK-18694][SS] Add StreamingQuery.explain and
 exception to Python and fix StreamingQueryException (branch 2.1)

## What changes were proposed in this pull request?

Backport #16125 to branch 2.1.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16153 from zsxwing/SPARK-18694-2.1.
---
 project/MimaExcludes.scala                    |  9 +++-
 python/pyspark/sql/streaming.py               | 40 ++++++++++++++++++
 python/pyspark/sql/tests.py                   | 29 +++++++++++++
 .../execution/streaming/StreamExecution.scala |  5 ++-
 .../streaming/StreamingQueryException.scala   | 42 ++++++++++++-------
 .../apache/spark/sql/streaming/progress.scala |  7 ++++
 .../spark/sql/streaming/StreamTest.scala      |  2 -
 .../sql/streaming/StreamingQuerySuite.scala   | 10 +++--
 8 files changed, 119 insertions(+), 25 deletions(-)

diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 97391643322fc..9e6325432c0f6 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -97,7 +97,14 @@ object MimaExcludes {
       // [SPARK-18034] Upgrade to MiMa 0.1.11 to fix flakiness.
       ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.param.shared.HasAggregationDepth.aggregationDepth"),
       ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.param.shared.HasAggregationDepth.getAggregationDepth"),
-      ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.param.shared.HasAggregationDepth.org$apache$spark$ml$param$shared$HasAggregationDepth$_setter_$aggregationDepth_=")
+      ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.param.shared.HasAggregationDepth.org$apache$spark$ml$param$shared$HasAggregationDepth$_setter_$aggregationDepth_="),
+
+      // [SPARK-18694] Add StreamingQuery.explain and exception to Python and fix StreamingQueryException
+      ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.streaming.StreamingQueryException$"),
+      ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.streaming.StreamingQueryException.startOffset"),
+      ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.streaming.StreamingQueryException.endOffset"),
+      ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.streaming.StreamingQueryException.this"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQueryException.query")
     )
   }
 
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index 84f01d3d9ac0b..4a7d17ba51a7b 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -30,6 +30,7 @@
 from pyspark.rdd import ignore_unicode_prefix
 from pyspark.sql.readwriter import OptionUtils, to_str
 from pyspark.sql.types import *
+from pyspark.sql.utils import StreamingQueryException
 
 __all__ = ["StreamingQuery", "StreamingQueryManager", "DataStreamReader", "DataStreamWriter"]
 
@@ -132,6 +133,45 @@ def stop(self):
         """
         self._jsq.stop()
 
+    @since(2.1)
+    def explain(self, extended=False):
+        """Prints the (logical and physical) plans to the console for debugging purpose.
+
+        :param extended: boolean, default ``False``. If ``False``, prints only the physical plan.
+
+        >>> sq = sdf.writeStream.format('memory').queryName('query_explain').start()
+        >>> sq.processAllAvailable() # Wait a bit to generate the runtime plans.
+        >>> sq.explain()
+        == Physical Plan ==
+        ...
+        >>> sq.explain(True)
+        == Parsed Logical Plan ==
+        ...
+        == Analyzed Logical Plan ==
+        ...
+        == Optimized Logical Plan ==
+        ...
+        == Physical Plan ==
+        ...
+        >>> sq.stop()
+        """
+        # Cannot call `_jsq.explain(...)` because it will print in the JVM process.
+        # We should print it in the Python process.
+        print(self._jsq.explainInternal(extended))
+
+    @since(2.1)
+    def exception(self):
+        """
+        :return: the StreamingQueryException if the query was terminated by an exception, or None.
+        """
+        if self._jsq.exception().isDefined():
+            je = self._jsq.exception().get()
+            msg = je.toString().split(': ', 1)[1]  # Drop the Java StreamingQueryException type info
+            stackTrace = '\n\t at '.join(map(lambda x: x.toString(), je.getStackTrace()))
+            return StreamingQueryException(msg, stackTrace)
+        else:
+            return None
+
 
 class StreamingQueryManager(object):
     """A class to manage all the :class:`StreamingQuery` StreamingQueries active.
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 0aff9cebe91bd..9f34414f64d1b 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -1137,6 +1137,35 @@ def test_stream_await_termination(self):
             q.stop()
             shutil.rmtree(tmpPath)
 
+    def test_stream_exception(self):
+        sdf = self.spark.readStream.format('text').load('python/test_support/sql/streaming')
+        sq = sdf.writeStream.format('memory').queryName('query_explain').start()
+        try:
+            sq.processAllAvailable()
+            self.assertEqual(sq.exception(), None)
+        finally:
+            sq.stop()
+
+        from pyspark.sql.functions import col, udf
+        from pyspark.sql.utils import StreamingQueryException
+        bad_udf = udf(lambda x: 1 / 0)
+        sq = sdf.select(bad_udf(col("value")))\
+            .writeStream\
+            .format('memory')\
+            .queryName('this_query')\
+            .start()
+        try:
+            # Process some data to fail the query
+            sq.processAllAvailable()
+            self.fail("bad udf should fail the query")
+        except StreamingQueryException as e:
+            # This is expected
+            self.assertTrue("ZeroDivisionError" in e.desc)
+        finally:
+            sq.stop()
+        self.assertTrue(type(sq.exception()) is StreamingQueryException)
+        self.assertTrue("ZeroDivisionError" in sq.exception().desc)
+
     def test_query_manager_await_termination(self):
         df = self.spark.readStream.format('text').load('python/test_support/sql/streaming')
         for q in self.spark._wrapped.streams.active:
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 8804c647a75c3..6b1c01ab2a061 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -93,7 +93,7 @@ class StreamExecution(
    * once, since the field's value may change at any time.
    */
   @volatile
-  protected var availableOffsets = new StreamProgress
+  var availableOffsets = new StreamProgress
 
   /** The current batchId or -1 if execution has not yet been initialized. */
   protected var currentBatchId: Long = -1
@@ -263,7 +263,8 @@ class StreamExecution(
           this,
           s"Query $name terminated with exception: ${e.getMessage}",
           e,
-          Some(committedOffsets.toOffsetSeq(sources, streamExecutionMetadata.json)))
+          committedOffsets.toOffsetSeq(sources, streamExecutionMetadata.json).toString,
+          availableOffsets.toOffsetSeq(sources, streamExecutionMetadata.json).toString)
         logError(s"Query $name terminated with error", e)
         updateStatusMessage(s"Terminated with exception: ${e.getMessage}")
         // Rethrow the fatal errors to allow the user using `Thread.UncaughtExceptionHandler` to
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala
index 13f11ba1c9222..a96150aa89920 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala
@@ -24,32 +24,42 @@ import org.apache.spark.sql.execution.streaming.{Offset, OffsetSeq, StreamExecut
  * :: Experimental ::
  * Exception that stopped a [[StreamingQuery]]. Use `cause` get the actual exception
  * that caused the failure.
- * @param query       Query that caused the exception
  * @param message     Message of this exception
  * @param cause       Internal cause of this exception
- * @param startOffset Starting offset (if known) of the range of data in which exception occurred
- * @param endOffset   Ending offset (if known) of the range of data in exception occurred
+ * @param startOffset Starting offset in json of the range of data in which exception occurred
+ * @param endOffset   Ending offset in json of the range of data in exception occurred
  * @since 2.0.0
  */
 @Experimental
-class StreamingQueryException private[sql](
-    @transient val query: StreamingQuery,
+class StreamingQueryException private(
+    causeString: String,
     val message: String,
     val cause: Throwable,
-    val startOffset: Option[OffsetSeq] = None,
-    val endOffset: Option[OffsetSeq] = None)
+    val startOffset: String,
+    val endOffset: String)
   extends Exception(message, cause) {
 
+  private[sql] def this(
+      query: StreamingQuery,
+      message: String,
+      cause: Throwable,
+      startOffset: String,
+      endOffset: String) {
+    this(
+      // scalastyle:off
+      s"""${classOf[StreamingQueryException].getName}: ${cause.getMessage} ${cause.getStackTrace.take(10).mkString("", "\n|\t", "\n")}
+         |
+         |${query.asInstanceOf[StreamExecution].toDebugString}
+         """.stripMargin,
+      // scalastyle:on
+      message,
+      cause,
+      startOffset,
+      endOffset)
+  }
+
   /** Time when the exception occurred */
   val time: Long = System.currentTimeMillis
 
-  override def toString(): String = {
-    val causeStr =
-      s"${cause.getMessage} ${cause.getStackTrace.take(10).mkString("", "\n|\t", "\n")}"
-    s"""
-       |$causeStr
-       |
-       |${query.asInstanceOf[StreamExecution].toDebugString}
-       """.stripMargin
-  }
+  override def toString(): String = causeString
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
index 4c8247458fcfe..fb5bad0123817 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
@@ -38,6 +38,13 @@ import org.apache.spark.annotation.Experimental
 class StateOperatorProgress private[sql](
     val numRowsTotal: Long,
     val numRowsUpdated: Long) {
+
+  /** The compact JSON representation of this progress. */
+  def json: String = compact(render(jsonValue))
+
+  /** The pretty (i.e. indented) JSON representation of this progress. */
+  def prettyJson: String = pretty(render(jsonValue))
+
   private[sql] def jsonValue: JValue = {
     ("numRowsTotal" -> JInt(numRowsTotal)) ~
     ("numRowsUpdated" -> JInt(numRowsUpdated))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index a2629f7f68160..43322651296b9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -412,8 +412,6 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
               eventually("microbatch thread not stopped after termination with failure") {
                 assert(!currentStream.microBatchThread.isAlive)
               }
-              verify(thrownException.query.eq(currentStream),
-                s"incorrect query reference in exception")
               verify(currentStream.exception === Some(thrownException),
                 s"incorrect exception returned by query.exception()")
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index 56abe1201c0cc..f7fc19494d097 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -103,10 +103,12 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
       TestAwaitTermination(ExpectException[SparkException]),
       TestAwaitTermination(ExpectException[SparkException], timeoutMs = 2000),
       TestAwaitTermination(ExpectException[SparkException], timeoutMs = 10),
-      AssertOnQuery(
-        q => q.exception.get.startOffset.get.offsets ===
-          q.committedOffsets.toOffsetSeq(Seq(inputData), "{}").offsets,
-        "incorrect start offset on exception")
+      AssertOnQuery(q => {
+        q.exception.get.startOffset ===
+          q.committedOffsets.toOffsetSeq(Seq(inputData), "{}").toString &&
+          q.exception.get.endOffset ===
+            q.availableOffsets.toOffsetSeq(Seq(inputData), "{}").toString
+      }, "incorrect start offset or end offset on exception")
     )
   }
 

From fecd23d2cebe691e4dee43ef26ef0090ead2c0d0 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 5 Dec 2016 17:50:43 -0800
Subject: [PATCH 0285/1204] [SPARK-18634][PYSPARK][SQL] Corruption and
 Correctness issues with exploding Python UDFs

## What changes were proposed in this pull request?

As reported in the Jira, there are some weird issues with exploding Python UDFs in SparkSQL.

The following test code can reproduce it. Notice: the following test code is reported to return wrong results in the Jira. However, as I tested on master branch, it causes exception and so can't return any result.

    >>> from pyspark.sql.functions import *
    >>> from pyspark.sql.types import *
    >>>
    >>> df = spark.range(10)
    >>>
    >>> def return_range(value):
    ...   return [(i, str(i)) for i in range(value - 1, value + 1)]
    ...
    >>> range_udf = udf(return_range, ArrayType(StructType([StructField("integer_val", IntegerType()),
    ...                                                     StructField("string_val", StringType())])))
    >>>
    >>> df.select("id", explode(range_udf(df.id))).show()
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
      File "/spark/python/pyspark/sql/dataframe.py", line 318, in show
        print(self._jdf.showString(n, 20))
      File "/spark/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py", line 1133, in __call__
      File "/spark/python/pyspark/sql/utils.py", line 63, in deco
        return f(*a, **kw)
      File "/spark/python/lib/py4j-0.10.4-src.zip/py4j/protocol.py", line 319, in get_return_value py4j.protocol.Py4JJavaError: An error occurred while calling o126.showString.: java.lang.AssertionError: assertion failed
        at scala.Predef$.assert(Predef.scala:156)
        at org.apache.spark.sql.execution.CodegenSupport$class.consume(WholeStageCodegenExec.scala:120)
        at org.apache.spark.sql.execution.GenerateExec.consume(GenerateExec.scala:57)

The cause of this issue is, in `ExtractPythonUDFs` we insert `BatchEvalPythonExec` to run PythonUDFs in batch. `BatchEvalPythonExec` will add extra outputs (e.g., `pythonUDF0`) to original plan. In above case, the original `Range` only has one output `id`. After `ExtractPythonUDFs`, the added `BatchEvalPythonExec` has two outputs `id` and `pythonUDF0`.

Because the output of `GenerateExec` is given after analysis phase, in above case, it is the combination of `id`, i.e., the output of `Range`, and `col`. But in planning phase, we change `GenerateExec`'s child plan to `BatchEvalPythonExec` with additional output attributes.

It will cause no problem in non wholestage codegen. Because when evaluating the additional attributes are projected out the final output of `GenerateExec`.

However, as `GenerateExec` now supports wholestage codegen, the framework will input all the outputs of the child plan to `GenerateExec`. Then when consuming `GenerateExec`'s output data (i.e., calling `consume`), the number of output attributes is different to the output variables in wholestage codegen.

To solve this issue, this patch only gives the generator's output to `GenerateExec` after analysis phase. `GenerateExec`'s output is the combination of its child plan's output and the generator's output. So when we change `GenerateExec`'s child, its output is still correct.

## How was this patch tested?

Added test cases to PySpark.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #16120 from viirya/fix-py-udf-with-generator.

(cherry picked from commit 3ba69b64852ccbf6d4ec05a021bc20616a09f574)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 python/pyspark/sql/tests.py                   | 20 +++++++++++++++++++
 .../plans/logical/basicLogicalOperators.scala | 12 +++++------
 .../spark/sql/execution/GenerateExec.scala    | 15 +++++++++++---
 .../spark/sql/execution/SparkStrategies.scala |  3 ++-
 4 files changed, 40 insertions(+), 10 deletions(-)

diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 9f34414f64d1b..66a3490a640ba 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -384,6 +384,26 @@ def test_udf_in_generate(self):
         row = df.select(explode(f(*df))).groupBy().sum().first()
         self.assertEqual(row[0], 10)
 
+        df = self.spark.range(3)
+        res = df.select("id", explode(f(df.id))).collect()
+        self.assertEqual(res[0][0], 1)
+        self.assertEqual(res[0][1], 0)
+        self.assertEqual(res[1][0], 2)
+        self.assertEqual(res[1][1], 0)
+        self.assertEqual(res[2][0], 2)
+        self.assertEqual(res[2][1], 1)
+
+        range_udf = udf(lambda value: list(range(value - 1, value + 1)), ArrayType(IntegerType()))
+        res = df.select("id", explode(range_udf(df.id))).collect()
+        self.assertEqual(res[0][0], 0)
+        self.assertEqual(res[0][1], -1)
+        self.assertEqual(res[1][0], 0)
+        self.assertEqual(res[1][1], 0)
+        self.assertEqual(res[2][0], 1)
+        self.assertEqual(res[2][1], 0)
+        self.assertEqual(res[3][0], 1)
+        self.assertEqual(res[3][1], 1)
+
     def test_udf_with_order_by_and_limit(self):
         from pyspark.sql.functions import udf
         my_copy = udf(lambda x: x, IntegerType())
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index da42df3366307..304367de4cf6a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -94,13 +94,13 @@ case class Generate(
 
   override def producedAttributes: AttributeSet = AttributeSet(generatorOutput)
 
-  def output: Seq[Attribute] = {
-    val qualified = qualifier.map(q =>
-      // prepend the new qualifier to the existed one
-      generatorOutput.map(a => a.withQualifier(Some(q)))
-    ).getOrElse(generatorOutput)
+  val qualifiedGeneratorOutput: Seq[Attribute] = qualifier.map { q =>
+    // prepend the new qualifier to the existed one
+    generatorOutput.map(a => a.withQualifier(Some(q)))
+  }.getOrElse(generatorOutput)
 
-    if (join) child.output ++ qualified else qualified
+  def output: Seq[Attribute] = {
+    if (join) child.output ++ qualifiedGeneratorOutput else qualifiedGeneratorOutput
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala
index 19fbf0c162048..1d9f96bcb5344 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala
@@ -45,17 +45,26 @@ private[execution] sealed case class LazyIterator(func: () => TraversableOnce[In
  *              it.
  * @param outer when true, each input row will be output at least once, even if the output of the
  *              given `generator` is empty. `outer` has no effect when `join` is false.
- * @param output the output attributes of this node, which constructed in analysis phase,
- *               and we can not change it, as the parent node bound with it already.
+ * @param generatorOutput the qualified output attributes of the generator of this node, which
+ *                        constructed in analysis phase, and we can not change it, as the
+ *                        parent node bound with it already.
  */
 case class GenerateExec(
     generator: Generator,
     join: Boolean,
     outer: Boolean,
-    output: Seq[Attribute],
+    generatorOutput: Seq[Attribute],
     child: SparkPlan)
   extends UnaryExecNode {
 
+  override def output: Seq[Attribute] = {
+    if (join) {
+      child.output ++ generatorOutput
+    } else {
+      generatorOutput
+    }
+  }
+
   override lazy val metrics = Map(
     "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"))
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index 2308ae8a6c611..d88cbdfbcfa0e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -403,7 +403,8 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         execution.UnionExec(unionChildren.map(planLater)) :: Nil
       case g @ logical.Generate(generator, join, outer, _, _, child) =>
         execution.GenerateExec(
-          generator, join = join, outer = outer, g.output, planLater(child)) :: Nil
+          generator, join = join, outer = outer, g.qualifiedGeneratorOutput,
+          planLater(child)) :: Nil
       case logical.OneRowRelation =>
         execution.RDDScanExec(Nil, singleRowRdd, "OneRowRelation") :: Nil
       case r: logical.Range =>

From 6c4c3368473f7f2c8fe810b895b9148e72370ba6 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Mon, 5 Dec 2016 18:15:55 -0800
Subject: [PATCH 0286/1204] [SPARK-18729][SS] Move DataFrame.collect out of
 synchronized block in MemorySink

## What changes were proposed in this pull request?

Move DataFrame.collect out of synchronized block so that we can query content in MemorySink when `DataFrame.collect` is running.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16162 from zsxwing/SPARK-18729.

(cherry picked from commit 1b2785c3d0a40da2fca923af78066060dbfbcf0a)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../sql/execution/streaming/memory.scala      | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
index adf6963577f49..b370845481ed7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
@@ -186,16 +186,23 @@ class MemorySink(val schema: StructType, outputMode: OutputMode) extends Sink wi
     }.mkString("\n")
   }
 
-  override def addBatch(batchId: Long, data: DataFrame): Unit = synchronized {
-    if (latestBatchId.isEmpty || batchId > latestBatchId.get) {
+  override def addBatch(batchId: Long, data: DataFrame): Unit = {
+    val notCommitted = synchronized {
+      latestBatchId.isEmpty || batchId > latestBatchId.get
+    }
+    if (notCommitted) {
       logDebug(s"Committing batch $batchId to $this")
       outputMode match {
         case InternalOutputModes.Append | InternalOutputModes.Update =>
-          batches.append(AddedData(batchId, data.collect()))
+          val rows = AddedData(batchId, data.collect())
+          synchronized { batches += rows }
 
         case InternalOutputModes.Complete =>
-          batches.clear()
-          batches += AddedData(batchId, data.collect())
+          val rows = AddedData(batchId, data.collect())
+          synchronized {
+            batches.clear()
+            batches += rows
+          }
 
         case _ =>
           throw new IllegalArgumentException(
@@ -206,7 +213,7 @@ class MemorySink(val schema: StructType, outputMode: OutputMode) extends Sink wi
     }
   }
 
-  def clear(): Unit = {
+  def clear(): Unit = synchronized {
     batches.clear()
   }
 

From 1946854abd4e4dc4bf0bba30ca521170b966d467 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Mon, 5 Dec 2016 18:17:38 -0800
Subject: [PATCH 0287/1204] [SPARK-18657][SPARK-18668] Make StreamingQuery.id
 persists across restart and not auto-generate StreamingQuery.name

Here are the major changes in this PR.
- Added the ability to recover `StreamingQuery.id` from checkpoint location, by writing the id to `checkpointLoc/metadata`.
- Added `StreamingQuery.runId` which is unique for every query started and does not persist across restarts. This is to identify each restart of a query separately (same as earlier behavior of `id`).
- Removed auto-generation of `StreamingQuery.name`. The purpose of name was to have the ability to define an identifier across restarts, but since id is precisely that, there is no need for a auto-generated name. This means name becomes purely cosmetic, and is null by default.
- Added `runId` to `StreamingQueryListener` events and `StreamingQueryProgress`.

Implementation details
- Renamed existing `StreamExecutionMetadata` to `OffsetSeqMetadata`, and moved it to the file `OffsetSeq.scala`, because that is what this metadata is tied to. Also did some refactoring to make the code cleaner (got rid of a lot of `.json` and `.getOrElse("{}")`).
- Added the `id` as the new `StreamMetadata`.
- When a StreamingQuery is created it gets or writes the `StreamMetadata` from `checkpointLoc/metadata`.
- All internal logging in `StreamExecution` uses `(name, id, runId)` instead of just `name`

TODO
- [x] Test handling of name=null in json generation of StreamingQueryProgress
- [x] Test handling of name=null in json generation of StreamingQueryListener events
- [x] Test python API of runId

Updated unit tests and new unit tests

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #16113 from tdas/SPARK-18657.

(cherry picked from commit bb57bfe97d9fb077885065b8e804b85d4c493faf)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 project/MimaExcludes.scala                    |   6 +
 python/pyspark/sql/streaming.py               |  19 +++-
 .../sql/execution/streaming/OffsetSeq.scala   |  27 ++++-
 .../execution/streaming/OffsetSeqLog.scala    |   2 +-
 .../streaming/ProgressReporter.scala          |   6 +-
 .../execution/streaming/StreamExecution.scala | 105 ++++++++----------
 .../execution/streaming/StreamMetadata.scala  |  88 +++++++++++++++
 .../execution/streaming/StreamProgress.scala  |   2 +-
 .../spark/sql/streaming/StreamingQuery.scala  |  19 +++-
 .../streaming/StreamingQueryListener.scala    |  10 +-
 .../sql/streaming/StreamingQueryManager.scala |  25 +++--
 .../apache/spark/sql/streaming/progress.scala |   7 +-
 .../query-metadata-logs-version-2.1.0.txt     |   3 +
 .../streaming/OffsetSeqLogSuite.scala         |  13 ++-
 .../streaming/StreamMetadataSuite.scala       |  55 +++++++++
 .../StreamExecutionMetadataSuite.scala        |  35 ------
 .../StreamingQueryListenerSuite.scala         |  46 +++++---
 ...StreamingQueryStatusAndProgressSuite.scala |  78 +++++++++++--
 .../sql/streaming/StreamingQuerySuite.scala   | 100 ++++++++++++-----
 19 files changed, 469 insertions(+), 177 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetadata.scala
 create mode 100644 sql/core/src/test/resources/structured-streaming/query-metadata-logs-version-2.1.0.txt
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/StreamMetadataSuite.scala
 delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamExecutionMetadataSuite.scala

diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 9e6325432c0f6..6650aad0be594 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -99,6 +99,12 @@ object MimaExcludes {
       ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.param.shared.HasAggregationDepth.getAggregationDepth"),
       ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.param.shared.HasAggregationDepth.org$apache$spark$ml$param$shared$HasAggregationDepth$_setter_$aggregationDepth_="),
 
+      // [SPARK-18236] Reduce duplicate objects in Spark UI and HistoryServer
+      ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.scheduler.TaskInfo.accumulables"),
+
+      // [SPARK-18657] Add StreamingQuery.runId
+      ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQuery.runId"),
+
       // [SPARK-18694] Add StreamingQuery.explain and exception to Python and fix StreamingQueryException
       ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.streaming.StreamingQueryException$"),
       ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.streaming.StreamingQueryException.startOffset"),
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index 4a7d17ba51a7b..ee7a26d00df4b 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -51,14 +51,29 @@ def __init__(self, jsq):
     @property
     @since(2.0)
     def id(self):
-        """The id of the streaming query.
+        """Returns the unique id of this query that persists across restarts from checkpoint data.
+        That is, this id is generated when a query is started for the first time, and
+        will be the same every time it is restarted from checkpoint data.
+        There can only be one query with the same id active in a Spark cluster.
+        Also see, `runId`.
         """
         return self._jsq.id().toString()
 
+    @property
+    @since(2.1)
+    def runId(self):
+        """Returns the unique id of this query that does not persist across restarts. That is, every
+        query that is started (or restarted from checkpoint) will have a different runId.
+        """
+        return self._jsq.runId().toString()
+
     @property
     @since(2.0)
     def name(self):
-        """The name of the streaming query. This name is unique across all active queries.
+        """Returns the user-specified name of the query, or null if not specified.
+        This name can be specified in the `org.apache.spark.sql.streaming.DataStreamWriter`
+        as `dataframe.writeStream.queryName("query").start()`.
+        This name, if set, must be unique across all active queries.
         """
         return self._jsq.name()
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
index 7469caeee3be5..e5a1997d6b808 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
@@ -17,13 +17,16 @@
 
 package org.apache.spark.sql.execution.streaming
 
+import org.json4s.NoTypeHints
+import org.json4s.jackson.Serialization
+
 
 /**
  * An ordered collection of offsets, used to track the progress of processing data from one or more
  * [[Source]]s that are present in a streaming query. This is similar to simplified, single-instance
  * vector clock that must progress linearly forward.
  */
-case class OffsetSeq(offsets: Seq[Option[Offset]], metadata: Option[String] = None) {
+case class OffsetSeq(offsets: Seq[Option[Offset]], metadata: Option[OffsetSeqMetadata] = None) {
 
   /**
    * Unpacks an offset into [[StreamProgress]] by associating each offset with the order list of
@@ -54,6 +57,26 @@ object OffsetSeq {
    * `nulls` in the sequence are converted to `None`s.
    */
   def fill(metadata: Option[String], offsets: Offset*): OffsetSeq = {
-    OffsetSeq(offsets.map(Option(_)), metadata)
+    OffsetSeq(offsets.map(Option(_)), metadata.map(OffsetSeqMetadata.apply))
   }
 }
+
+
+/**
+ * Contains metadata associated with a [[OffsetSeq]]. This information is
+ * persisted to the offset log in the checkpoint location via the [[OffsetSeq]] metadata field.
+ *
+ * @param batchWatermarkMs: The current eventTime watermark, used to
+ * bound the lateness of data that will processed. Time unit: milliseconds
+ * @param batchTimestampMs: The current batch processing timestamp.
+ * Time unit: milliseconds
+ */
+case class OffsetSeqMetadata(var batchWatermarkMs: Long = 0, var batchTimestampMs: Long = 0) {
+  def json: String = Serialization.write(this)(OffsetSeqMetadata.format)
+}
+
+object OffsetSeqMetadata {
+  private implicit val format = Serialization.formats(NoTypeHints)
+  def apply(json: String): OffsetSeqMetadata = Serialization.read[OffsetSeqMetadata](json)
+}
+
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala
index cc25b4474ba2c..3210d8ad64e22 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala
@@ -74,7 +74,7 @@ class OffsetSeqLog(sparkSession: SparkSession, path: String)
 
     // write metadata
     out.write('\n')
-    out.write(offsetSeq.metadata.getOrElse("").getBytes(UTF_8))
+    out.write(offsetSeq.metadata.map(_.json).getOrElse("").getBytes(UTF_8))
 
     // write offsets, one per line
     offsetSeq.offsets.map(_.map(_.json)).foreach { offset =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
index ba77e7c7bf2b3..7d0d086746c79 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -43,6 +43,7 @@ trait ProgressReporter extends Logging {
 
   // Internal state of the stream, required for computing metrics.
   protected def id: UUID
+  protected def runId: UUID
   protected def name: String
   protected def triggerClock: Clock
   protected def logicalPlan: LogicalPlan
@@ -52,7 +53,7 @@ trait ProgressReporter extends Logging {
   protected def committedOffsets: StreamProgress
   protected def sources: Seq[Source]
   protected def sink: Sink
-  protected def streamExecutionMetadata: StreamExecutionMetadata
+  protected def offsetSeqMetadata: OffsetSeqMetadata
   protected def currentBatchId: Long
   protected def sparkSession: SparkSession
 
@@ -134,11 +135,12 @@ trait ProgressReporter extends Logging {
 
     val newProgress = new StreamingQueryProgress(
       id = id,
+      runId = runId,
       name = name,
       timestamp = currentTriggerStartTimestamp,
       batchId = currentBatchId,
       durationMs = currentDurationsMs.toMap.mapValues(long2Long).asJava,
-      currentWatermark = streamExecutionMetadata.batchWatermarkMs,
+      currentWatermark = offsetSeqMetadata.batchWatermarkMs,
       stateOperators = executionStats.stateOperators.toArray,
       sources = sourceProgress.toArray,
       sink = sinkProgress)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 6b1c01ab2a061..083cce8eb52a6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -25,8 +25,6 @@ import scala.collection.mutable.ArrayBuffer
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.fs.Path
-import org.json4s.NoTypeHints
-import org.json4s.jackson.Serialization
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql._
@@ -58,9 +56,6 @@ class StreamExecution(
 
   import org.apache.spark.sql.streaming.StreamingQueryListener._
 
-  // TODO: restore this from the checkpoint directory.
-  override val id: UUID = UUID.randomUUID()
-
   private val pollingDelayMs = sparkSession.sessionState.conf.streamingPollingDelay
 
   private val noDataProgressEventInterval =
@@ -98,8 +93,30 @@ class StreamExecution(
   /** The current batchId or -1 if execution has not yet been initialized. */
   protected var currentBatchId: Long = -1
 
-  /** Stream execution metadata */
-  protected var streamExecutionMetadata = StreamExecutionMetadata()
+  /** Metadata associated with the whole query */
+  protected val streamMetadata: StreamMetadata = {
+    val metadataPath = new Path(checkpointFile("metadata"))
+    val hadoopConf = sparkSession.sessionState.newHadoopConf()
+    StreamMetadata.read(metadataPath, hadoopConf).getOrElse {
+      val newMetadata = new StreamMetadata(UUID.randomUUID.toString)
+      StreamMetadata.write(newMetadata, metadataPath, hadoopConf)
+      newMetadata
+    }
+  }
+
+  /** Metadata associated with the offset seq of a batch in the query. */
+  protected var offsetSeqMetadata = OffsetSeqMetadata()
+
+  override val id: UUID = UUID.fromString(streamMetadata.id)
+
+  override val runId: UUID = UUID.randomUUID
+
+  /**
+   * Pretty identified string of printing in logs. Format is
+   * If name is set "queryName [id = xyz, runId = abc]" else "[id = xyz, runId = abc]"
+   */
+  private val prettyIdString =
+    Option(name).map(_ + " ").getOrElse("") + s"[id = $id, runId = $runId]"
 
   /** All stream sources present in the query plan. */
   protected val sources =
@@ -128,8 +145,9 @@ class StreamExecution(
   /* Get the call site in the caller thread; will pass this into the micro batch thread */
   private val callSite = Utils.getCallSite()
 
-  /** Used to report metrics to coda-hale. */
-  lazy val streamMetrics = new MetricsReporter(this, s"spark.streaming.$name")
+  /** Used to report metrics to coda-hale. This uses id for easier tracking across restarts. */
+  lazy val streamMetrics = new MetricsReporter(
+    this, s"spark.streaming.${Option(name).getOrElse(id)}")
 
   /**
    * The thread that runs the micro-batches of this stream. Note that this thread must be
@@ -137,7 +155,7 @@ class StreamExecution(
    * [[HDFSMetadataLog]]. See SPARK-14131 for more details.
    */
   val microBatchThread =
-    new StreamExecutionThread(s"stream execution thread for $name") {
+    new StreamExecutionThread(s"stream execution thread for $prettyIdString") {
       override def run(): Unit = {
         // To fix call site like "run at <unknown>:0", we bridge the call site from the caller
         // thread to this micro batch thread
@@ -191,7 +209,7 @@ class StreamExecution(
         sparkSession.sparkContext.env.metricsSystem.registerSource(streamMetrics)
       }
 
-      postEvent(new QueryStartedEvent(id, name)) // Assumption: Does not throw exception.
+      postEvent(new QueryStartedEvent(id, runId, name)) // Assumption: Does not throw exception.
 
       // Unblock starting thread
       startLatch.countDown()
@@ -261,10 +279,10 @@ class StreamExecution(
       case e: Throwable =>
         streamDeathCause = new StreamingQueryException(
           this,
-          s"Query $name terminated with exception: ${e.getMessage}",
+          s"Query $prettyIdString terminated with exception: ${e.getMessage}",
           e,
-          committedOffsets.toOffsetSeq(sources, streamExecutionMetadata.json).toString,
-          availableOffsets.toOffsetSeq(sources, streamExecutionMetadata.json).toString)
+          committedOffsets.toOffsetSeq(sources, offsetSeqMetadata).toString,
+          availableOffsets.toOffsetSeq(sources, offsetSeqMetadata).toString)
         logError(s"Query $name terminated with error", e)
         updateStatusMessage(s"Terminated with exception: ${e.getMessage}")
         // Rethrow the fatal errors to allow the user using `Thread.UncaughtExceptionHandler` to
@@ -282,7 +300,7 @@ class StreamExecution(
       // Notify others
       sparkSession.streams.notifyQueryTermination(StreamExecution.this)
       postEvent(
-       new QueryTerminatedEvent(id, exception.map(_.cause).map(Utils.exceptionString)))
+       new QueryTerminatedEvent(id, runId, exception.map(_.cause).map(Utils.exceptionString)))
       terminationLatch.countDown()
     }
   }
@@ -301,9 +319,9 @@ class StreamExecution(
         logInfo(s"Resuming streaming query, starting with batch $batchId")
         currentBatchId = batchId
         availableOffsets = nextOffsets.toStreamProgress(sources)
-        streamExecutionMetadata = StreamExecutionMetadata(nextOffsets.metadata.getOrElse("{}"))
+        offsetSeqMetadata = nextOffsets.metadata.getOrElse(OffsetSeqMetadata())
         logDebug(s"Found possibly unprocessed offsets $availableOffsets " +
-          s"at batch timestamp ${streamExecutionMetadata.batchTimestampMs}")
+          s"at batch timestamp ${offsetSeqMetadata.batchTimestampMs}")
 
         offsetLog.get(batchId - 1).foreach {
           case lastOffsets =>
@@ -359,15 +377,15 @@ class StreamExecution(
     }
     if (hasNewData) {
       // Current batch timestamp in milliseconds
-      streamExecutionMetadata.batchTimestampMs = triggerClock.getTimeMillis()
+      offsetSeqMetadata.batchTimestampMs = triggerClock.getTimeMillis()
       updateStatusMessage("Writing offsets to log")
       reportTimeTaken("walCommit") {
         assert(offsetLog.add(
           currentBatchId,
-          availableOffsets.toOffsetSeq(sources, streamExecutionMetadata.json)),
+          availableOffsets.toOffsetSeq(sources, offsetSeqMetadata)),
           s"Concurrent update to the log. Multiple streaming jobs detected for $currentBatchId")
         logInfo(s"Committed offsets for batch $currentBatchId. " +
-          s"Metadata ${streamExecutionMetadata.toString}")
+          s"Metadata ${offsetSeqMetadata.toString}")
 
         // NOTE: The following code is correct because runBatches() processes exactly one
         // batch at a time. If we add pipeline parallelism (multiple batches in flight at
@@ -437,21 +455,21 @@ class StreamExecution(
     val triggerLogicalPlan = withNewSources transformAllExpressions {
       case a: Attribute if replacementMap.contains(a) => replacementMap(a)
       case ct: CurrentTimestamp =>
-        CurrentBatchTimestamp(streamExecutionMetadata.batchTimestampMs,
+        CurrentBatchTimestamp(offsetSeqMetadata.batchTimestampMs,
           ct.dataType)
       case cd: CurrentDate =>
-        CurrentBatchTimestamp(streamExecutionMetadata.batchTimestampMs,
+        CurrentBatchTimestamp(offsetSeqMetadata.batchTimestampMs,
           cd.dataType)
     }
 
-    val executedPlan = reportTimeTaken("queryPlanning") {
+    reportTimeTaken("queryPlanning") {
       lastExecution = new IncrementalExecution(
         sparkSession,
         triggerLogicalPlan,
         outputMode,
         checkpointFile("state"),
         currentBatchId,
-        streamExecutionMetadata.batchWatermarkMs)
+        offsetSeqMetadata.batchWatermarkMs)
       lastExecution.executedPlan // Force the lazy generation of execution plan
     }
 
@@ -468,12 +486,12 @@ class StreamExecution(
         logTrace(s"Maximum observed eventTime: ${e.maxEventTime.value}")
         (e.maxEventTime.value / 1000) - e.delay.milliseconds()
     }.headOption.foreach { newWatermark =>
-      if (newWatermark > streamExecutionMetadata.batchWatermarkMs) {
+      if (newWatermark > offsetSeqMetadata.batchWatermarkMs) {
         logInfo(s"Updating eventTime watermark to: $newWatermark ms")
-        streamExecutionMetadata.batchWatermarkMs = newWatermark
+        offsetSeqMetadata.batchWatermarkMs = newWatermark
       } else {
         logTrace(s"Event time didn't move: $newWatermark < " +
-          s"$streamExecutionMetadata.currentEventTimeWatermark")
+          s"$offsetSeqMetadata.currentEventTimeWatermark")
       }
     }
 
@@ -503,7 +521,7 @@ class StreamExecution(
       microBatchThread.join()
     }
     uniqueSources.foreach(_.stop())
-    logInfo(s"Query $name was stopped")
+    logInfo(s"Query $prettyIdString was stopped")
   }
 
   /**
@@ -594,7 +612,7 @@ class StreamExecution(
   override def explain(): Unit = explain(extended = false)
 
   override def toString: String = {
-    s"Streaming Query - $name [state = $state]"
+    s"Streaming Query $prettyIdString [state = $state]"
   }
 
   def toDebugString: String = {
@@ -603,7 +621,7 @@ class StreamExecution(
     } else ""
     s"""
        |=== Streaming Query ===
-       |Name: $name
+       |Identifier: $prettyIdString
        |Current Offsets: $committedOffsets
        |
        |Current State: $state
@@ -622,33 +640,6 @@ class StreamExecution(
   case object TERMINATED extends State
 }
 
-/**
- * Contains metadata associated with a stream execution. This information is
- * persisted to the offset log via the OffsetSeq metadata field. Current
- * information contained in this object includes:
- *
- * @param batchWatermarkMs: The current eventTime watermark, used to
- * bound the lateness of data that will processed. Time unit: milliseconds
- * @param batchTimestampMs: The current batch processing timestamp.
- * Time unit: milliseconds
- */
-case class StreamExecutionMetadata(
-    var batchWatermarkMs: Long = 0,
-    var batchTimestampMs: Long = 0) {
-  private implicit val formats = StreamExecutionMetadata.formats
-
-  /**
-   * JSON string representation of this object.
-   */
-  def json: String = Serialization.write(this)
-}
-
-object StreamExecutionMetadata {
-  private implicit val formats = Serialization.formats(NoTypeHints)
-
-  def apply(json: String): StreamExecutionMetadata =
-    Serialization.read[StreamExecutionMetadata](json)
-}
 
 /**
  * A special thread to run the stream query. Some codes require to run in the StreamExecutionThread
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetadata.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetadata.scala
new file mode 100644
index 0000000000000..7807c9fae840a
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetadata.scala
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming
+
+import java.io.{InputStreamReader, OutputStreamWriter}
+import java.nio.charset.StandardCharsets
+
+import scala.util.control.NonFatal
+
+import org.apache.commons.io.IOUtils
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileSystem, FSDataInputStream, FSDataOutputStream, Path}
+import org.json4s.NoTypeHints
+import org.json4s.jackson.Serialization
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.streaming.StreamingQuery
+
+/**
+ * Contains metadata associated with a [[StreamingQuery]]. This information is written
+ * in the checkpoint location the first time a query is started and recovered every time the query
+ * is restarted.
+ *
+ * @param id  unique id of the [[StreamingQuery]] that needs to be persisted across restarts
+ */
+case class StreamMetadata(id: String) {
+  def json: String = Serialization.write(this)(StreamMetadata.format)
+}
+
+object StreamMetadata extends Logging {
+  implicit val format = Serialization.formats(NoTypeHints)
+
+  /** Read the metadata from file if it exists */
+  def read(metadataFile: Path, hadoopConf: Configuration): Option[StreamMetadata] = {
+    val fs = FileSystem.get(hadoopConf)
+    if (fs.exists(metadataFile)) {
+      var input: FSDataInputStream = null
+      try {
+        input = fs.open(metadataFile)
+        val reader = new InputStreamReader(input, StandardCharsets.UTF_8)
+        val metadata = Serialization.read[StreamMetadata](reader)
+        Some(metadata)
+      } catch {
+        case NonFatal(e) =>
+          logError(s"Error reading stream metadata from $metadataFile", e)
+          throw e
+      } finally {
+        IOUtils.closeQuietly(input)
+      }
+    } else None
+  }
+
+  /** Write metadata to file */
+  def write(
+      metadata: StreamMetadata,
+      metadataFile: Path,
+      hadoopConf: Configuration): Unit = {
+    var output: FSDataOutputStream = null
+    try {
+      val fs = FileSystem.get(hadoopConf)
+      output = fs.create(metadataFile)
+      val writer = new OutputStreamWriter(output)
+      Serialization.write(metadata, writer)
+      writer.close()
+    } catch {
+      case NonFatal(e) =>
+        logError(s"Error writing stream metadata $metadata to $metadataFile", e)
+        throw e
+    } finally {
+      IOUtils.closeQuietly(output)
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala
index 21b8750ca913d..a3f3662e6f4c9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala
@@ -26,7 +26,7 @@ class StreamProgress(
     val baseMap: immutable.Map[Source, Offset] = new immutable.HashMap[Source, Offset])
   extends scala.collection.immutable.Map[Source, Offset] {
 
-  def toOffsetSeq(source: Seq[Source], metadata: String): OffsetSeq = {
+  def toOffsetSeq(source: Seq[Source], metadata: OffsetSeqMetadata): OffsetSeq = {
     OffsetSeq(source.map(get), Some(metadata))
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
index 8fc4e43b6de53..1794e75462cfd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
@@ -32,20 +32,31 @@ import org.apache.spark.sql.SparkSession
 trait StreamingQuery {
 
   /**
-   * Returns the name of the query. This name is unique across all active queries. This can be
-   * set in the `org.apache.spark.sql.streaming.DataStreamWriter` as
-   * `dataframe.writeStream.queryName("query").start()`.
+   * Returns the user-specified name of the query, or null if not specified.
+   * This name can be specified in the `org.apache.spark.sql.streaming.DataStreamWriter`
+   * as `dataframe.writeStream.queryName("query").start()`.
+   * This name, if set, must be unique across all active queries.
    *
    * @since 2.0.0
    */
   def name: String
 
   /**
-   * Returns the unique id of this query.
+   * Returns the unique id of this query that persists across restarts from checkpoint data.
+   * That is, this id is generated when a query is started for the first time, and
+   * will be the same every time it is restarted from checkpoint data. Also see [[runId]].
+   *
    * @since 2.1.0
    */
   def id: UUID
 
+  /**
+   * Returns the unique id of this run of the query. That is, every start/restart of a query will
+   * generated a unique runId. Therefore, every time a query is restarted from
+   * checkpoint, it will have the same [[id]] but different [[runId]]s.
+   */
+  def runId: UUID
+
   /**
    * Returns the `SparkSession` associated with `this`.
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
index d9ee75c064065..6fc859d88d97e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
@@ -86,7 +86,10 @@ object StreamingQueryListener {
    * @since 2.1.0
    */
   @Experimental
-  class QueryStartedEvent private[sql](val id: UUID, val name: String) extends Event
+  class QueryStartedEvent private[sql](
+      val id: UUID,
+      val runId: UUID,
+      val name: String) extends Event
 
   /**
    * :: Experimental ::
@@ -106,5 +109,8 @@ object StreamingQueryListener {
    * @since 2.1.0
    */
   @Experimental
-  class QueryTerminatedEvent private[sql](val id: UUID, val exception: Option[String]) extends Event
+  class QueryTerminatedEvent private[sql](
+      val id: UUID,
+      val runId: UUID,
+      val exception: Option[String]) extends Event
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index c448468bea519..c6ab41655f5ef 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -207,10 +207,14 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) {
       trigger: Trigger = ProcessingTime(0),
       triggerClock: Clock = new SystemClock()): StreamingQuery = {
     activeQueriesLock.synchronized {
-      val name = userSpecifiedName.getOrElse(s"query-${StreamingQueryManager.nextId}")
-      if (activeQueries.values.exists(_.name == name)) {
-        throw new IllegalArgumentException(
-          s"Cannot start query with name $name as a query with that name is already active")
+      val name = userSpecifiedName match {
+        case Some(n) =>
+          if (activeQueries.values.exists(_.name == userSpecifiedName.get)) {
+            throw new IllegalArgumentException(
+              s"Cannot start query with name $n as a query with that name is already active")
+          }
+          n
+        case None => null
       }
       val checkpointLocation = userSpecifiedCheckpointLocation.map { userSpecified =>
         new Path(userSpecified).toUri.toString
@@ -268,6 +272,14 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) {
         trigger,
         triggerClock,
         outputMode)
+
+      if (activeQueries.values.exists(_.id == query.id)) {
+        throw new IllegalStateException(
+          s"Cannot start query with id ${query.id} as another query with same id is " +
+            s"already active. Perhaps you are attempting to restart a query from checkpoint" +
+            s"that is already active.")
+      }
+
       query.start()
       activeQueries.put(query.id, query)
       query
@@ -287,8 +299,3 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) {
     }
   }
 }
-
-private object StreamingQueryManager {
-  private val _nextId = new AtomicLong(0)
-  private def nextId: Long = _nextId.getAndIncrement()
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
index fb5bad0123817..f768080f5d2c6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
@@ -57,8 +57,9 @@ class StateOperatorProgress private[sql](
  * a trigger. Each event relates to processing done for a single trigger of the streaming
  * query. Events are emitted even when no new data is available to be processed.
  *
- * @param id A unique id of the query.
- * @param name Name of the query. This name is unique across all active queries.
+ * @param id An unique query id that persists across restarts. See `StreamingQuery.id()`.
+ * @param runId A query id that is unique for every start/restart. See `StreamingQuery.runId()`.
+ * @param name User-specified name of the query, null if not specified.
  * @param timestamp Timestamp (ms) of the beginning of the trigger.
  * @param batchId A unique id for the current batch of data being processed.  Note that in the
  *                case of retries after a failure a given batchId my be executed more than once.
@@ -73,6 +74,7 @@ class StateOperatorProgress private[sql](
 @Experimental
 class StreamingQueryProgress private[sql](
   val id: UUID,
+  val runId: UUID,
   val name: String,
   val timestamp: Long,
   val batchId: Long,
@@ -105,6 +107,7 @@ class StreamingQueryProgress private[sql](
     }
 
     ("id" -> JString(id.toString)) ~
+    ("runId" -> JString(runId.toString)) ~
     ("name" -> JString(name)) ~
     ("timestamp" -> JInt(timestamp)) ~
     ("numInputRows" -> JInt(numInputRows)) ~
diff --git a/sql/core/src/test/resources/structured-streaming/query-metadata-logs-version-2.1.0.txt b/sql/core/src/test/resources/structured-streaming/query-metadata-logs-version-2.1.0.txt
new file mode 100644
index 0000000000000..79613e2362164
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/query-metadata-logs-version-2.1.0.txt
@@ -0,0 +1,3 @@
+{
+  "id": "d366a8bf-db79-42ca-b5a4-d9ca0a11d63e"
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala
index 3afd11fa4686d..d3a83ea0b922f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala
@@ -27,10 +27,19 @@ class OffsetSeqLogSuite extends SparkFunSuite with SharedSQLContext {
   /** test string offset type */
   case class StringOffset(override val json: String) extends Offset
 
-  testWithUninterruptibleThread("serialization - deserialization") {
+  test("OffsetSeqMetadata - deserialization") {
+    assert(OffsetSeqMetadata(0, 0) === OffsetSeqMetadata("""{}"""))
+    assert(OffsetSeqMetadata(1, 0) === OffsetSeqMetadata("""{"batchWatermarkMs":1}"""))
+    assert(OffsetSeqMetadata(0, 2) === OffsetSeqMetadata("""{"batchTimestampMs":2}"""))
+    assert(
+      OffsetSeqMetadata(1, 2) ===
+        OffsetSeqMetadata("""{"batchWatermarkMs":1,"batchTimestampMs":2}"""))
+  }
+
+  testWithUninterruptibleThread("OffsetSeqLog - serialization - deserialization") {
     withTempDir { temp =>
       val dir = new File(temp, "dir") // use non-existent directory to test whether log make the dir
-    val metadataLog = new OffsetSeqLog(spark, dir.getAbsolutePath)
+      val metadataLog = new OffsetSeqLog(spark, dir.getAbsolutePath)
       val batch0 = OffsetSeq.fill(LongOffset(0), LongOffset(1), LongOffset(2))
       val batch1 = OffsetSeq.fill(StringOffset("one"), StringOffset("two"), StringOffset("three"))
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/StreamMetadataSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/StreamMetadataSuite.scala
new file mode 100644
index 0000000000000..87f8004ab9588
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/StreamMetadataSuite.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming
+
+import java.io.File
+import java.util.UUID
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.sql.streaming.StreamTest
+
+class StreamMetadataSuite extends StreamTest {
+
+  test("writing and reading") {
+    withTempDir { dir =>
+      val id = UUID.randomUUID.toString
+      val metadata = StreamMetadata(id)
+      val file = new Path(new File(dir, "test").toString)
+      StreamMetadata.write(metadata, file, hadoopConf)
+      val readMetadata = StreamMetadata.read(file, hadoopConf)
+      assert(readMetadata.nonEmpty)
+      assert(readMetadata.get.id === id)
+    }
+  }
+
+  test("read Spark 2.1.0 format") {
+    // query-metadata-logs-version-2.1.0.txt has the execution metadata generated by Spark 2.1.0
+    assert(
+      readForResource("query-metadata-logs-version-2.1.0.txt") ===
+      StreamMetadata("d366a8bf-db79-42ca-b5a4-d9ca0a11d63e"))
+  }
+
+  private def readForResource(fileName: String): StreamMetadata = {
+    val input = getClass.getResource(s"/structured-streaming/$fileName")
+    StreamMetadata.read(new Path(input.toString), hadoopConf).get
+  }
+
+  private val hadoopConf = new Configuration()
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamExecutionMetadataSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamExecutionMetadataSuite.scala
deleted file mode 100644
index c7139c588d1d3..0000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamExecutionMetadataSuite.scala
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.streaming
-
-import org.apache.spark.sql.execution.streaming.StreamExecutionMetadata
-
-class StreamExecutionMetadataSuite extends StreamTest {
-
-  test("stream execution metadata") {
-    assert(StreamExecutionMetadata(0, 0) ===
-      StreamExecutionMetadata("""{}"""))
-    assert(StreamExecutionMetadata(1, 0) ===
-      StreamExecutionMetadata("""{"batchWatermarkMs":1}"""))
-    assert(StreamExecutionMetadata(0, 2) ===
-      StreamExecutionMetadata("""{"batchTimestampMs":2}"""))
-    assert(StreamExecutionMetadata(1, 2) ===
-      StreamExecutionMetadata(
-        """{"batchWatermarkMs":1,"batchTimestampMs":2}"""))
-  }
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
index 3086abf03cd6c..a38c05eed5e33 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
@@ -69,6 +69,7 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
         AssertOnQuery { query =>
           assert(listener.startEvent !== null)
           assert(listener.startEvent.id === query.id)
+          assert(listener.startEvent.runId === query.runId)
           assert(listener.startEvent.name === query.name)
           assert(listener.progressEvents.isEmpty)
           assert(listener.terminationEvent === null)
@@ -92,6 +93,7 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
           eventually(Timeout(streamingTimeout)) {
             assert(listener.terminationEvent !== null)
             assert(listener.terminationEvent.id === query.id)
+            assert(listener.terminationEvent.runId === query.runId)
             assert(listener.terminationEvent.exception === None)
           }
           listener.checkAsyncErrors()
@@ -167,30 +169,40 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
   }
 
   test("QueryStartedEvent serialization") {
-    val queryStarted = new StreamingQueryListener.QueryStartedEvent(UUID.randomUUID(), "name")
-    val json = JsonProtocol.sparkEventToJson(queryStarted)
-    val newQueryStarted = JsonProtocol.sparkEventFromJson(json)
-      .asInstanceOf[StreamingQueryListener.QueryStartedEvent]
+    def testSerialization(event: QueryStartedEvent): Unit = {
+      val json = JsonProtocol.sparkEventToJson(event)
+      val newEvent = JsonProtocol.sparkEventFromJson(json).asInstanceOf[QueryStartedEvent]
+      assert(newEvent.id === event.id)
+      assert(newEvent.runId === event.runId)
+      assert(newEvent.name === event.name)
+    }
+
+    testSerialization(new QueryStartedEvent(UUID.randomUUID, UUID.randomUUID, "name"))
+    testSerialization(new QueryStartedEvent(UUID.randomUUID, UUID.randomUUID, null))
   }
 
   test("QueryProgressEvent serialization") {
-    val event = new StreamingQueryListener.QueryProgressEvent(
-      StreamingQueryStatusAndProgressSuite.testProgress)
-    val json = JsonProtocol.sparkEventToJson(event)
-    val newEvent = JsonProtocol.sparkEventFromJson(json)
-      .asInstanceOf[StreamingQueryListener.QueryProgressEvent]
-    assert(event.progress.json === newEvent.progress.json)
+    def testSerialization(event: QueryProgressEvent): Unit = {
+      val json = JsonProtocol.sparkEventToJson(event)
+      val newEvent = JsonProtocol.sparkEventFromJson(json).asInstanceOf[QueryProgressEvent]
+      assert(newEvent.progress.json === event.progress.json)  // json as a proxy for equality
+    }
+    testSerialization(new QueryProgressEvent(StreamingQueryStatusAndProgressSuite.testProgress1))
+    testSerialization(new QueryProgressEvent(StreamingQueryStatusAndProgressSuite.testProgress2))
   }
 
   test("QueryTerminatedEvent serialization") {
+    def testSerialization(event: QueryTerminatedEvent): Unit = {
+      val json = JsonProtocol.sparkEventToJson(event)
+      val newEvent = JsonProtocol.sparkEventFromJson(json).asInstanceOf[QueryTerminatedEvent]
+      assert(newEvent.id === event.id)
+      assert(newEvent.runId === event.runId)
+      assert(newEvent.exception === event.exception)
+    }
+
     val exception = new RuntimeException("exception")
-    val queryQueryTerminated = new StreamingQueryListener.QueryTerminatedEvent(
-      UUID.randomUUID, Some(exception.getMessage))
-    val json = JsonProtocol.sparkEventToJson(queryQueryTerminated)
-    val newQueryTerminated = JsonProtocol.sparkEventFromJson(json)
-      .asInstanceOf[StreamingQueryListener.QueryTerminatedEvent]
-    assert(queryQueryTerminated.id === newQueryTerminated.id)
-    assert(queryQueryTerminated.exception === newQueryTerminated.exception)
+    testSerialization(
+      new QueryTerminatedEvent(UUID.randomUUID, UUID.randomUUID, Some(exception.getMessage)))
   }
 
   test("only one progress event per interval when no data") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
index 4da712fa0f7e0..96f19db1a90e0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
@@ -31,12 +31,13 @@ import org.apache.spark.sql.streaming.StreamingQueryStatusAndProgressSuite._
 class StreamingQueryStatusAndProgressSuite extends SparkFunSuite {
 
   test("StreamingQueryProgress - prettyJson") {
-    val json = testProgress.prettyJson
-    assert(json ===
+    val json1 = testProgress1.prettyJson
+    assert(json1 ===
       s"""
         |{
-        |  "id" : "${testProgress.id.toString}",
-        |  "name" : "name",
+        |  "id" : "${testProgress1.id.toString}",
+        |  "runId" : "${testProgress1.runId.toString}",
+        |  "name" : "myName",
         |  "timestamp" : 1,
         |  "numInputRows" : 678,
         |  "inputRowsPerSecond" : 10.0,
@@ -60,16 +61,48 @@ class StreamingQueryStatusAndProgressSuite extends SparkFunSuite {
         |  }
         |}
       """.stripMargin.trim)
-    assert(compact(parse(json)) === testProgress.json)
-
+    assert(compact(parse(json1)) === testProgress1.json)
+
+    val json2 = testProgress2.prettyJson
+    assert(
+      json2 ===
+        s"""
+         |{
+         |  "id" : "${testProgress2.id.toString}",
+         |  "runId" : "${testProgress2.runId.toString}",
+         |  "name" : null,
+         |  "timestamp" : 1,
+         |  "numInputRows" : 678,
+         |  "durationMs" : {
+         |    "total" : 0
+         |  },
+         |  "currentWatermark" : 3,
+         |  "stateOperators" : [ {
+         |    "numRowsTotal" : 0,
+         |    "numRowsUpdated" : 1
+         |  } ],
+         |  "sources" : [ {
+         |    "description" : "source",
+         |    "startOffset" : 123,
+         |    "endOffset" : 456,
+         |    "numInputRows" : 678
+         |  } ],
+         |  "sink" : {
+         |    "description" : "sink"
+         |  }
+         |}
+      """.stripMargin.trim)
+    assert(compact(parse(json2)) === testProgress2.json)
   }
 
   test("StreamingQueryProgress - json") {
-    assert(compact(parse(testProgress.json)) === testProgress.json)
+    assert(compact(parse(testProgress1.json)) === testProgress1.json)
+    assert(compact(parse(testProgress2.json)) === testProgress2.json)
   }
 
   test("StreamingQueryProgress - toString") {
-    assert(testProgress.toString === testProgress.prettyJson)
+    assert(testProgress1.toString === testProgress1.prettyJson)
+    assert(testProgress2.toString === testProgress2.prettyJson)
   }
 
   test("StreamingQueryStatus - prettyJson") {
@@ -94,9 +127,10 @@ class StreamingQueryStatusAndProgressSuite extends SparkFunSuite {
 }
 
 object StreamingQueryStatusAndProgressSuite {
-  val testProgress = new StreamingQueryProgress(
-    id = UUID.randomUUID(),
-    name = "name",
+  val testProgress1 = new StreamingQueryProgress(
+    id = UUID.randomUUID,
+    runId = UUID.randomUUID,
+    name = "myName",
     timestamp = 1L,
     batchId = 2L,
     durationMs = Map("total" -> 0L).mapValues(long2Long).asJava,
@@ -115,6 +149,28 @@ object StreamingQueryStatusAndProgressSuite {
     sink = new SinkProgress("sink")
   )
 
+  val testProgress2 = new StreamingQueryProgress(
+    id = UUID.randomUUID,
+    runId = UUID.randomUUID,
+    name = null, // should not be present in the json
+    timestamp = 1L,
+    batchId = 2L,
+    durationMs = Map("total" -> 0L).mapValues(long2Long).asJava,
+    currentWatermark = 3L,
+    stateOperators = Array(new StateOperatorProgress(numRowsTotal = 0, numRowsUpdated = 1)),
+    sources = Array(
+      new SourceProgress(
+        description = "source",
+        startOffset = "123",
+        endOffset = "456",
+        numInputRows = 678,
+        inputRowsPerSecond = Double.NaN, // should not be present in the json
+        processedRowsPerSecond = Double.NegativeInfinity // should not be present in the json
+      )
+    ),
+    sink = new SinkProgress("sink")
+  )
+
   val testStatus = new StreamingQueryStatus("active", true, false)
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index f7fc19494d097..893cb762c6580 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.streaming
 
+import org.apache.commons.lang3.RandomStringUtils
 import org.scalactic.TolerantNumerics
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.BeforeAndAfter
@@ -28,7 +29,7 @@ import org.apache.spark.sql.types.StructType
 import org.apache.spark.SparkException
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.functions._
-import org.apache.spark.util.{ManualClock, Utils}
+import org.apache.spark.util.ManualClock
 
 
 class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
@@ -43,38 +44,77 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
     sqlContext.streams.active.foreach(_.stop())
   }
 
-  test("names unique across active queries, ids unique across all started queries") {
-    val inputData = MemoryStream[Int]
-    val mapped = inputData.toDS().map { 6 / _}
+  test("name unique in active queries") {
+    withTempDir { dir =>
+      def startQuery(name: Option[String]): StreamingQuery = {
+        val writer = MemoryStream[Int].toDS.writeStream
+        name.foreach(writer.queryName)
+        writer
+          .foreach(new TestForeachWriter)
+          .start()
+      }
 
-    def startQuery(queryName: String): StreamingQuery = {
-      val metadataRoot = Utils.createTempDir(namePrefix = "streaming.checkpoint").getCanonicalPath
-      val writer = mapped.writeStream
-      writer
-        .queryName(queryName)
-        .format("memory")
-        .option("checkpointLocation", metadataRoot)
-        .start()
-    }
+      // No name by default, multiple active queries can have no name
+      val q1 = startQuery(name = None)
+      assert(q1.name === null)
+      val q2 = startQuery(name = None)
+      assert(q2.name === null)
+
+      // Can be set by user
+      val q3 = startQuery(name = Some("q3"))
+      assert(q3.name === "q3")
 
-    val q1 = startQuery("q1")
-    assert(q1.name === "q1")
+      // Multiple active queries cannot have same name
+      val e = intercept[IllegalArgumentException] {
+        startQuery(name = Some("q3"))
+      }
 
-    // Verify that another query with same name cannot be started
-    val e1 = intercept[IllegalArgumentException] {
-      startQuery("q1")
+      q1.stop()
+      q2.stop()
+      q3.stop()
     }
-    Seq("q1", "already active").foreach { s => assert(e1.getMessage.contains(s)) }
+  }
 
-    // Verify q1 was unaffected by the above exception and stop it
-    assert(q1.isActive)
-    q1.stop()
+  test(
+    "id unique in active queries + persists across restarts, runId unique across start/restarts") {
+    val inputData = MemoryStream[Int]
+    withTempDir { dir =>
+      var cpDir: String = null
+
+      def startQuery(restart: Boolean): StreamingQuery = {
+        if (cpDir == null || !restart) cpDir = s"$dir/${RandomStringUtils.randomAlphabetic(10)}"
+        MemoryStream[Int].toDS().groupBy().count()
+          .writeStream
+          .format("memory")
+          .outputMode("complete")
+          .queryName(s"name${RandomStringUtils.randomAlphabetic(10)}")
+          .option("checkpointLocation", cpDir)
+          .start()
+      }
 
-    // Verify another query can be started with name q1, but will have different id
-    val q2 = startQuery("q1")
-    assert(q2.name === "q1")
-    assert(q2.id !== q1.id)
-    q2.stop()
+      // id and runId unique for new queries
+      val q1 = startQuery(restart = false)
+      val q2 = startQuery(restart = false)
+      assert(q1.id !== q2.id)
+      assert(q1.runId !== q2.runId)
+      q1.stop()
+      q2.stop()
+
+      // id persists across restarts, runId unique across restarts
+      val q3 = startQuery(restart = false)
+      q3.stop()
+
+      val q4 = startQuery(restart = true)
+      q4.stop()
+      assert(q3.id === q3.id)
+      assert(q3.runId !== q4.runId)
+
+      // Only one query with same id can be active
+      val q5 = startQuery(restart = false)
+      val e = intercept[IllegalStateException] {
+        startQuery(restart = true)
+      }
+    }
   }
 
   testQuietly("isActive, exception, and awaitTermination") {
@@ -105,9 +145,9 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
       TestAwaitTermination(ExpectException[SparkException], timeoutMs = 10),
       AssertOnQuery(q => {
         q.exception.get.startOffset ===
-          q.committedOffsets.toOffsetSeq(Seq(inputData), "{}").toString &&
+          q.committedOffsets.toOffsetSeq(Seq(inputData), OffsetSeqMetadata()).toString &&
           q.exception.get.endOffset ===
-            q.availableOffsets.toOffsetSeq(Seq(inputData), "{}").toString
+            q.availableOffsets.toOffsetSeq(Seq(inputData), OffsetSeqMetadata()).toString
       }, "incorrect start offset or end offset on exception")
     )
   }
@@ -274,7 +314,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
 
     /** Whether metrics of a query is registered for reporting */
     def isMetricsRegistered(query: StreamingQuery): Boolean = {
-      val sourceName = s"spark.streaming.${query.name}"
+      val sourceName = s"spark.streaming.${query.id}"
       val sources = spark.sparkContext.env.metricsSystem.getSourcesByName(sourceName)
       require(sources.size <= 1)
       sources.nonEmpty

From d4588165ed0c68c2712304a6814eda4fbb470ea2 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Mon, 5 Dec 2016 18:51:07 -0800
Subject: [PATCH 0288/1204] [SPARK-18722][SS] Move no data rate limit from
 StreamExecution to ProgressReporter

## What changes were proposed in this pull request?

Move no data rate limit from StreamExecution to ProgressReporter to make `recentProgresses` and listener events consistent.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16155 from zsxwing/SPARK-18722.

(cherry picked from commit 4af142f55771affa5fc7f2abbbf5e47766194e6e)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../streaming/ProgressReporter.scala          | 33 ++++++++++++++++---
 .../execution/streaming/StreamExecution.scala | 20 +----------
 .../StreamingQueryListenerSuite.scala         |  4 +++
 3 files changed, 33 insertions(+), 24 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
index 7d0d086746c79..d95f55267e142 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.{DataFrame, SparkSession}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.streaming._
+import org.apache.spark.sql.streaming.StreamingQueryListener.QueryProgressEvent
 import org.apache.spark.util.Clock
 
 /**
@@ -56,6 +57,7 @@ trait ProgressReporter extends Logging {
   protected def offsetSeqMetadata: OffsetSeqMetadata
   protected def currentBatchId: Long
   protected def sparkSession: SparkSession
+  protected def postEvent(event: StreamingQueryListener.Event): Unit
 
   // Local timestamps and counters.
   private var currentTriggerStartTimestamp = -1L
@@ -70,6 +72,12 @@ trait ProgressReporter extends Logging {
   /** Holds the most recent query progress updates.  Accesses must lock on the queue itself. */
   private val progressBuffer = new mutable.Queue[StreamingQueryProgress]()
 
+  private val noDataProgressEventInterval =
+    sparkSession.sessionState.conf.streamingNoDataProgressEventInterval
+
+  // The timestamp we report an event that has no input data
+  private var lastNoDataProgressEventTime = Long.MinValue
+
   @volatile
   protected var currentStatus: StreamingQueryStatus = {
     new StreamingQueryStatus(
@@ -100,6 +108,17 @@ trait ProgressReporter extends Logging {
     currentDurationsMs.clear()
   }
 
+  private def updateProgress(newProgress: StreamingQueryProgress): Unit = {
+    progressBuffer.synchronized {
+      progressBuffer += newProgress
+      while (progressBuffer.length >= sparkSession.sqlContext.conf.streamingProgressRetention) {
+        progressBuffer.dequeue()
+      }
+    }
+    postEvent(new QueryProgressEvent(newProgress))
+    logInfo(s"Streaming query made progress: $newProgress")
+  }
+
   /** Finalizes the query progress and adds it to list of recent status updates. */
   protected def finishTrigger(hasNewData: Boolean): Unit = {
     currentTriggerEndTimestamp = triggerClock.getTimeMillis()
@@ -145,14 +164,18 @@ trait ProgressReporter extends Logging {
       sources = sourceProgress.toArray,
       sink = sinkProgress)
 
-    progressBuffer.synchronized {
-      progressBuffer += newProgress
-      while (progressBuffer.length >= sparkSession.sqlContext.conf.streamingProgressRetention) {
-        progressBuffer.dequeue()
+    if (hasNewData) {
+      // Reset noDataEventTimestamp if we processed any data
+      lastNoDataProgressEventTime = Long.MinValue
+      updateProgress(newProgress)
+    } else {
+      val now = triggerClock.getTimeMillis()
+      if (now - noDataProgressEventInterval >= lastNoDataProgressEventTime) {
+        lastNoDataProgressEventTime = now
+        updateProgress(newProgress)
       }
     }
 
-    logInfo(s"Streaming query made progress: $newProgress")
     currentStatus = currentStatus.copy(isTriggerActive = false)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 083cce8eb52a6..39be222d05d0f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -58,9 +58,6 @@ class StreamExecution(
 
   private val pollingDelayMs = sparkSession.sessionState.conf.streamingPollingDelay
 
-  private val noDataProgressEventInterval =
-    sparkSession.sessionState.conf.streamingNoDataProgressEventInterval
-
   /**
    * A lock used to wait/notify when batches complete. Use a fair lock to avoid thread starvation.
    */
@@ -217,9 +214,6 @@ class StreamExecution(
       // While active, repeatedly attempt to run batches.
       SparkSession.setActiveSession(sparkSession)
 
-      // The timestamp we report an event that has no input data
-      var lastNoDataProgressEventTime = Long.MinValue
-
       triggerExecutor.execute(() => {
         startTrigger()
 
@@ -242,18 +236,6 @@ class StreamExecution(
 
             // Report trigger as finished and construct progress object.
             finishTrigger(dataAvailable)
-            if (dataAvailable) {
-              // Reset noDataEventTimestamp if we processed any data
-              lastNoDataProgressEventTime = Long.MinValue
-              postEvent(new QueryProgressEvent(lastProgress))
-            } else {
-              val now = triggerClock.getTimeMillis()
-              if (now - noDataProgressEventInterval >= lastNoDataProgressEventTime) {
-                lastNoDataProgressEventTime = now
-                postEvent(new QueryProgressEvent(lastProgress))
-              }
-            }
-
             if (dataAvailable) {
               // We'll increase currentBatchId after we complete processing current batch's data
               currentBatchId += 1
@@ -504,7 +486,7 @@ class StreamExecution(
     }
   }
 
-  private def postEvent(event: StreamingQueryListener.Event) {
+  override protected def postEvent(event: StreamingQueryListener.Event): Unit = {
     sparkSession.streams.postListenerEvent(event)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
index a38c05eed5e33..1cd503c6de696 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
@@ -237,6 +237,10 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
           }
           true
         }
+        // `recentProgresses` should not receive too many no data events
+        actions += AssertOnQuery { q =>
+          q.recentProgresses.size > 1 && q.recentProgresses.size <= 11
+        }
         testStream(input.toDS)(actions: _*)
         spark.sparkContext.listenerBus.waitUntilEmpty(10000)
         // 11 is the max value of the possible numbers of events.

From 8ca6a82c1d04b0986d3063e3ee321698fc278992 Mon Sep 17 00:00:00 2001
From: Michael Allman <michael@videoamp.com>
Date: Tue, 6 Dec 2016 11:33:35 +0800
Subject: [PATCH 0289/1204] [SPARK-18572][SQL] Add a method
 `listPartitionNames` to `ExternalCatalog`

(Link to Jira issue: https://issues.apache.org/jira/browse/SPARK-18572)

## What changes were proposed in this pull request?

Currently Spark answers the `SHOW PARTITIONS` command by fetching all of the table's partition metadata from the external catalog and constructing partition names therefrom. The Hive client has a `getPartitionNames` method which is many times faster for this purpose, with the performance improvement scaling with the number of partitions in a table.

To test the performance impact of this PR, I ran the `SHOW PARTITIONS` command on two Hive tables with large numbers of partitions. One table has ~17,800 partitions, and the other has ~95,000 partitions. For the purposes of this PR, I'll call the former table `table1` and the latter table `table2`. I ran 5 trials for each table with before-and-after versions of this PR. The results are as follows:

Spark at bdc8153, `SHOW PARTITIONS table1`, times in seconds:
7.901
3.983
4.018
4.331
4.261

Spark at bdc8153, `SHOW PARTITIONS table2`
(Timed out after 10 minutes with a `SocketTimeoutException`.)

Spark at this PR, `SHOW PARTITIONS table1`, times in seconds:
3.801
0.449
0.395
0.348
0.336

Spark at this PR, `SHOW PARTITIONS table2`, times in seconds:
5.184
1.63
1.474
1.519
1.41

Taking the best times from each trial, we get a 12x performance improvement for a table with ~17,800 partitions and at least a 426x improvement for a table with ~95,000 partitions. More significantly, the latter command doesn't even complete with the current code in master.

This is actually a patch we've been using in-house at VideoAmp since Spark 1.1. It's made all the difference in the practical usability of our largest tables. Even with tables with about 1,000 partitions there's a performance improvement of about 2-3x.

## How was this patch tested?

I added a unit test to `VersionsSuite` which tests that the Hive client's `getPartitionNames` method returns the correct number of partitions.

Author: Michael Allman <michael@videoamp.com>

Closes #15998 from mallman/spark-18572-list_partition_names.

(cherry picked from commit 772ddbeaa6fe5abf189d01246f57d295f9346fa3)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/catalog/ExternalCatalog.scala    | 26 +++++++++++-
 .../catalyst/catalog/InMemoryCatalog.scala    | 14 +++++++
 .../sql/catalyst/catalog/SessionCatalog.scala | 23 ++++++++++
 .../catalog/ExternalCatalogSuite.scala        | 25 +++++++++++
 .../catalog/SessionCatalogSuite.scala         | 39 +++++++++++++++++
 .../spark/sql/execution/command/tables.scala  | 12 +-----
 .../datasources/DataSourceStrategy.scala      | 22 +++++-----
 .../datasources/PartitioningUtils.scala       | 13 +++++-
 .../spark/sql/hive/HiveExternalCatalog.scala  | 42 +++++++++++++++++--
 .../spark/sql/hive/client/HiveClient.scala    | 10 +++++
 .../sql/hive/client/HiveClientImpl.scala      | 20 +++++++++
 .../spark/sql/hive/client/VersionsSuite.scala |  5 +++
 12 files changed, 221 insertions(+), 30 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
index 259008f183b56..4b8cac8f32b06 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
@@ -189,15 +189,37 @@ abstract class ExternalCatalog {
       table: String,
       spec: TablePartitionSpec): Option[CatalogTablePartition]
 
+  /**
+   * List the names of all partitions that belong to the specified table, assuming it exists.
+   *
+   * For a table with partition columns p1, p2, p3, each partition name is formatted as
+   * `p1=v1/p2=v2/p3=v3`. Each partition column name and value is an escaped path name, and can be
+   * decoded with the `ExternalCatalogUtils.unescapePathName` method.
+   *
+   * The returned sequence is sorted as strings.
+   *
+   * A partial partition spec may optionally be provided to filter the partitions returned, as
+   * described in the `listPartitions` method.
+   *
+   * @param db database name
+   * @param table table name
+   * @param partialSpec partition spec
+   */
+  def listPartitionNames(
+      db: String,
+      table: String,
+      partialSpec: Option[TablePartitionSpec] = None): Seq[String]
+
   /**
    * List the metadata of all partitions that belong to the specified table, assuming it exists.
    *
    * A partial partition spec may optionally be provided to filter the partitions returned.
    * For instance, if there exist partitions (a='1', b='2'), (a='1', b='3') and (a='2', b='4'),
    * then a partial spec of (a='1') will return the first two only.
+   *
    * @param db database name
    * @param table table name
-   * @param partialSpec  partition spec
+   * @param partialSpec partition spec
    */
   def listPartitions(
       db: String,
@@ -210,7 +232,7 @@ abstract class ExternalCatalog {
    *
    * @param db database name
    * @param table table name
-   * @param predicates  partition-pruning predicates
+   * @param predicates partition-pruning predicates
    */
   def listPartitionsByFilter(
       db: String,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
index 880a7a0dc4225..a6bebe1a3938c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
@@ -28,6 +28,7 @@ import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis._
+import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.escapePathName
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.util.StringUtils
 
@@ -488,6 +489,19 @@ class InMemoryCatalog(
     }
   }
 
+  override def listPartitionNames(
+      db: String,
+      table: String,
+      partialSpec: Option[TablePartitionSpec] = None): Seq[String] = synchronized {
+    val partitionColumnNames = getTable(db, table).partitionColumnNames
+
+    listPartitions(db, table, partialSpec).map { partition =>
+      partitionColumnNames.map { name =>
+        escapePathName(name) + "=" + escapePathName(partition.spec(name))
+      }.mkString("/")
+    }.sorted
+  }
+
   override def listPartitions(
       db: String,
       table: String,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index da3a2079f42d3..7a3d2097a85c5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -748,6 +748,26 @@ class SessionCatalog(
     externalCatalog.getPartition(db, table, spec)
   }
 
+  /**
+   * List the names of all partitions that belong to the specified table, assuming it exists.
+   *
+   * A partial partition spec may optionally be provided to filter the partitions returned.
+   * For instance, if there exist partitions (a='1', b='2'), (a='1', b='3') and (a='2', b='4'),
+   * then a partial spec of (a='1') will return the first two only.
+   */
+  def listPartitionNames(
+      tableName: TableIdentifier,
+      partialSpec: Option[TablePartitionSpec] = None): Seq[String] = {
+    val db = formatDatabaseName(tableName.database.getOrElse(getCurrentDatabase))
+    val table = formatTableName(tableName.table)
+    requireDbExists(db)
+    requireTableExists(TableIdentifier(table, Option(db)))
+    partialSpec.foreach { spec =>
+      requirePartialMatchedPartitionSpec(Seq(spec), getTableMetadata(tableName))
+    }
+    externalCatalog.listPartitionNames(db, table, partialSpec)
+  }
+
   /**
    * List the metadata of all partitions that belong to the specified table, assuming it exists.
    *
@@ -762,6 +782,9 @@ class SessionCatalog(
     val table = formatTableName(tableName.table)
     requireDbExists(db)
     requireTableExists(TableIdentifier(table, Option(db)))
+    partialSpec.foreach { spec =>
+      requirePartialMatchedPartitionSpec(Seq(spec), getTableMetadata(tableName))
+    }
     externalCatalog.listPartitions(db, table, partialSpec)
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
index 3b39f420af494..00e663c324cb4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
@@ -346,6 +346,31 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
     assert(new Path(partitionLocation) == defaultPartitionLocation)
   }
 
+  test("list partition names") {
+    val catalog = newBasicCatalog()
+    val newPart = CatalogTablePartition(Map("a" -> "1", "b" -> "%="), storageFormat)
+    catalog.createPartitions("db2", "tbl2", Seq(newPart), ignoreIfExists = false)
+
+    val partitionNames = catalog.listPartitionNames("db2", "tbl2")
+    assert(partitionNames == Seq("a=1/b=%25%3D", "a=1/b=2", "a=3/b=4"))
+  }
+
+  test("list partition names with partial partition spec") {
+    val catalog = newBasicCatalog()
+    val newPart = CatalogTablePartition(Map("a" -> "1", "b" -> "%="), storageFormat)
+    catalog.createPartitions("db2", "tbl2", Seq(newPart), ignoreIfExists = false)
+
+    val partitionNames1 = catalog.listPartitionNames("db2", "tbl2", Some(Map("a" -> "1")))
+    assert(partitionNames1 == Seq("a=1/b=%25%3D", "a=1/b=2"))
+
+    // Partial partition specs including "weird" partition values should use the unescaped values
+    val partitionNames2 = catalog.listPartitionNames("db2", "tbl2", Some(Map("b" -> "%=")))
+    assert(partitionNames2 == Seq("a=1/b=%25%3D"))
+
+    val partitionNames3 = catalog.listPartitionNames("db2", "tbl2", Some(Map("b" -> "%25%3D")))
+    assert(partitionNames3.isEmpty)
+  }
+
   test("list partitions with partial partition spec") {
     val catalog = newBasicCatalog()
     val parts = catalog.listPartitions("db2", "tbl2", Some(Map("a" -> "1")))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index f9c4b2687bf7a..5cc772d8e9a1e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -878,6 +878,31 @@ class SessionCatalogSuite extends SparkFunSuite {
       "the partition spec (a, b) defined in table '`db2`.`tbl1`'"))
   }
 
+  test("list partition names") {
+    val catalog = new SessionCatalog(newBasicCatalog())
+    val expectedPartitionNames = Seq("a=1/b=2", "a=3/b=4")
+    assert(catalog.listPartitionNames(TableIdentifier("tbl2", Some("db2"))) ==
+      expectedPartitionNames)
+    // List partition names without explicitly specifying database
+    catalog.setCurrentDatabase("db2")
+    assert(catalog.listPartitionNames(TableIdentifier("tbl2")) == expectedPartitionNames)
+  }
+
+  test("list partition names with partial partition spec") {
+    val catalog = new SessionCatalog(newBasicCatalog())
+    assert(
+      catalog.listPartitionNames(TableIdentifier("tbl2", Some("db2")), Some(Map("a" -> "1"))) ==
+        Seq("a=1/b=2"))
+  }
+
+  test("list partition names with invalid partial partition spec") {
+    val catalog = new SessionCatalog(newBasicCatalog())
+    intercept[AnalysisException] {
+      catalog.listPartitionNames(TableIdentifier("tbl2", Some("db2")),
+        Some(Map("unknown" -> "unknown")))
+    }
+  }
+
   test("list partitions") {
     val catalog = new SessionCatalog(newBasicCatalog())
     assert(catalogPartitionsEqual(
@@ -887,6 +912,20 @@ class SessionCatalogSuite extends SparkFunSuite {
     assert(catalogPartitionsEqual(catalog.listPartitions(TableIdentifier("tbl2")), part1, part2))
   }
 
+  test("list partitions with partial partition spec") {
+    val catalog = new SessionCatalog(newBasicCatalog())
+    assert(catalogPartitionsEqual(
+      catalog.listPartitions(TableIdentifier("tbl2", Some("db2")), Some(Map("a" -> "1"))), part1))
+  }
+
+  test("list partitions with invalid partial partition spec") {
+    val catalog = new SessionCatalog(newBasicCatalog())
+    intercept[AnalysisException] {
+      catalog.listPartitions(
+        TableIdentifier("tbl2", Some("db2")), Some(Map("unknown" -> "unknown")))
+    }
+  }
+
   test("list partitions when database/table does not exist") {
     val catalog = new SessionCatalog(newBasicCatalog())
     intercept[NoSuchDatabaseException] {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 57d66f1f14785..5d507759d6a38 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -715,13 +715,6 @@ case class ShowPartitionsCommand(
     AttributeReference("partition", StringType, nullable = false)() :: Nil
   }
 
-  private def getPartName(spec: TablePartitionSpec, partColNames: Seq[String]): String = {
-    partColNames.map { name =>
-      ExternalCatalogUtils.escapePathName(name) + "=" +
-        ExternalCatalogUtils.escapePathName(spec(name))
-    }.mkString(File.separator)
-  }
-
   override def run(sparkSession: SparkSession): Seq[Row] = {
     val catalog = sparkSession.sessionState.catalog
     val table = catalog.getTableMetadata(tableName)
@@ -758,10 +751,7 @@ case class ShowPartitionsCommand(
       }
     }
 
-    val partNames = catalog.listPartitions(tableName, spec).map { p =>
-      getPartName(p.spec, table.partitionColumnNames)
-    }
-
+    val partNames = catalog.listPartitionNames(tableName, spec)
     partNames.map(Row(_))
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index 4468dc58e404a..03eed251763b4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -161,8 +161,8 @@ case class DataSourceAnalysis(conf: CatalystConf) extends Rule[LogicalPlan] {
       insert.copy(partition = parts.map(p => (p._1, None)), child = Project(projectList, query))
 
 
-    case i @ logical.InsertIntoTable(
-           l @ LogicalRelation(t: HadoopFsRelation, _, table), part, query, overwrite, false)
+    case logical.InsertIntoTable(
+      l @ LogicalRelation(t: HadoopFsRelation, _, table), _, query, overwrite, false)
         if query.resolved && t.schema.sameType(query.schema) =>
 
       // Sanity checks
@@ -192,11 +192,19 @@ case class DataSourceAnalysis(conf: CatalystConf) extends Rule[LogicalPlan] {
       var initialMatchingPartitions: Seq[TablePartitionSpec] = Nil
       var customPartitionLocations: Map[TablePartitionSpec, String] = Map.empty
 
+      val staticPartitionKeys: TablePartitionSpec = if (overwrite.enabled) {
+        overwrite.staticPartitionKeys.map { case (k, v) =>
+          (partitionSchema.map(_.name).find(_.equalsIgnoreCase(k)).get, v)
+        }
+      } else {
+        Map.empty
+      }
+
       // When partitions are tracked by the catalog, compute all custom partition locations that
       // may be relevant to the insertion job.
       if (partitionsTrackedByCatalog) {
         val matchingPartitions = t.sparkSession.sessionState.catalog.listPartitions(
-          l.catalogTable.get.identifier, Some(overwrite.staticPartitionKeys))
+          l.catalogTable.get.identifier, Some(staticPartitionKeys))
         initialMatchingPartitions = matchingPartitions.map(_.spec)
         customPartitionLocations = getCustomPartitionLocations(
           t.sparkSession, l.catalogTable.get, outputPath, matchingPartitions)
@@ -225,14 +233,6 @@ case class DataSourceAnalysis(conf: CatalystConf) extends Rule[LogicalPlan] {
         t.location.refresh()
       }
 
-      val staticPartitionKeys: TablePartitionSpec = if (overwrite.enabled) {
-        overwrite.staticPartitionKeys.map { case (k, v) =>
-          (partitionSchema.map(_.name).find(_.equalsIgnoreCase(k)).get, v)
-        }
-      } else {
-        Map.empty
-      }
-
       val insertCmd = InsertIntoHadoopFsRelationCommand(
         outputPath,
         staticPartitionKeys,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index bf9f318780ec2..bc290702dc37f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -244,13 +244,22 @@ object PartitioningUtils {
 
   /**
    * Given a partition path fragment, e.g. `fieldOne=1/fieldTwo=2`, returns a parsed spec
-   * for that fragment, e.g. `Map(("fieldOne", "1"), ("fieldTwo", "2"))`.
+   * for that fragment as a `TablePartitionSpec`, e.g. `Map(("fieldOne", "1"), ("fieldTwo", "2"))`.
    */
   def parsePathFragment(pathFragment: String): TablePartitionSpec = {
+    parsePathFragmentAsSeq(pathFragment).toMap
+  }
+
+  /**
+   * Given a partition path fragment, e.g. `fieldOne=1/fieldTwo=2`, returns a parsed spec
+   * for that fragment as a `Seq[(String, String)]`, e.g.
+   * `Seq(("fieldOne", "1"), ("fieldTwo", "2"))`.
+   */
+  def parsePathFragmentAsSeq(pathFragment: String): Seq[(String, String)] = {
     pathFragment.split("/").map { kv =>
       val pair = kv.split("=", 2)
       (unescapePathName(pair(0)), unescapePathName(pair(1)))
-    }.toMap
+    }
   }
 
   /**
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index c213e8e0b22e6..f67ddc9be1a5a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -35,10 +35,12 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 import org.apache.spark.sql.catalyst.catalog._
+import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.escapePathName
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, Statistics}
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.execution.command.DDLUtils
+import org.apache.spark.sql.execution.datasources.PartitioningUtils
 import org.apache.spark.sql.hive.client.HiveClient
 import org.apache.spark.sql.internal.HiveSerDe
 import org.apache.spark.sql.internal.StaticSQLConf._
@@ -812,9 +814,21 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     spec.map { case (k, v) => k.toLowerCase -> v }
   }
 
+  // Build a map from lower-cased partition column names to exact column names for a given table
+  private def buildLowerCasePartColNameMap(table: CatalogTable): Map[String, String] = {
+    val actualPartColNames = table.partitionColumnNames
+    actualPartColNames.map(colName => (colName.toLowerCase, colName)).toMap
+  }
+
   // Hive metastore is not case preserving and the column names of the partition specification we
   // get from the metastore are always lower cased. We should restore them w.r.t. the actual table
   // partition columns.
+  private def restorePartitionSpec(
+      spec: TablePartitionSpec,
+      partColMap: Map[String, String]): TablePartitionSpec = {
+    spec.map { case (k, v) => partColMap(k.toLowerCase) -> v }
+  }
+
   private def restorePartitionSpec(
       spec: TablePartitionSpec,
       partCols: Seq[String]): TablePartitionSpec = {
@@ -927,13 +941,32 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
   /**
    * Returns the partition names from hive metastore for a given table in a database.
    */
+  override def listPartitionNames(
+      db: String,
+      table: String,
+      partialSpec: Option[TablePartitionSpec] = None): Seq[String] = withClient {
+    val catalogTable = getTable(db, table)
+    val partColNameMap = buildLowerCasePartColNameMap(catalogTable).mapValues(escapePathName)
+    val clientPartitionNames =
+      client.getPartitionNames(catalogTable, partialSpec.map(lowerCasePartitionSpec))
+    clientPartitionNames.map { partName =>
+      val partSpec = PartitioningUtils.parsePathFragmentAsSeq(partName)
+      partSpec.map { case (partName, partValue) =>
+        partColNameMap(partName.toLowerCase) + "=" + escapePathName(partValue)
+      }.mkString("/")
+    }
+  }
+
+  /**
+   * Returns the partitions from hive metastore for a given table in a database.
+   */
   override def listPartitions(
       db: String,
       table: String,
       partialSpec: Option[TablePartitionSpec] = None): Seq[CatalogTablePartition] = withClient {
-    val actualPartColNames = getTable(db, table).partitionColumnNames
+    val partColNameMap = buildLowerCasePartColNameMap(getTable(db, table))
     client.getPartitions(db, table, partialSpec.map(lowerCasePartitionSpec)).map { part =>
-      part.copy(spec = restorePartitionSpec(part.spec, actualPartColNames))
+      part.copy(spec = restorePartitionSpec(part.spec, partColNameMap))
     }
   }
 
@@ -954,10 +987,11 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     }
 
     val partitionSchema = catalogTable.partitionSchema
+    val partColNameMap = buildLowerCasePartColNameMap(getTable(db, table))
 
     if (predicates.nonEmpty) {
       val clientPrunedPartitions = client.getPartitionsByFilter(rawTable, predicates).map { part =>
-        part.copy(spec = restorePartitionSpec(part.spec, catalogTable.partitionColumnNames))
+        part.copy(spec = restorePartitionSpec(part.spec, partColNameMap))
       }
       val boundPredicate =
         InterpretedPredicate.create(predicates.reduce(And).transform {
@@ -968,7 +1002,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       clientPrunedPartitions.filter { p => boundPredicate(p.toRow(partitionSchema)) }
     } else {
       client.getPartitions(catalogTable).map { part =>
-        part.copy(spec = restorePartitionSpec(part.spec, catalogTable.partitionColumnNames))
+        part.copy(spec = restorePartitionSpec(part.spec, partColNameMap))
       }
     }
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
index 4c76932b61758..8e7c871183dfd 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
@@ -156,6 +156,16 @@ private[hive] trait HiveClient {
     }
   }
 
+  /**
+   * Returns the partition names for the given table that match the supplied partition spec.
+   * If no partition spec is specified, all partitions are returned.
+   *
+   * The returned sequence is sorted as strings.
+   */
+  def getPartitionNames(
+      table: CatalogTable,
+      partialSpec: Option[TablePartitionSpec] = None): Seq[String]
+
   /** Returns the specified partition or None if it does not exist. */
   final def getPartitionOption(
       db: String,
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index bd840af5b1649..db73596e5f520 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -519,6 +519,26 @@ private[hive] class HiveClientImpl(
     client.alterPartitions(table, newParts.map { p => toHivePartition(p, hiveTable) }.asJava)
   }
 
+  /**
+   * Returns the partition names for the given table that match the supplied partition spec.
+   * If no partition spec is specified, all partitions are returned.
+   *
+   * The returned sequence is sorted as strings.
+   */
+  override def getPartitionNames(
+      table: CatalogTable,
+      partialSpec: Option[TablePartitionSpec] = None): Seq[String] = withHiveState {
+    val hivePartitionNames =
+      partialSpec match {
+        case None =>
+          // -1 for result limit means "no limit/return all"
+          client.getPartitionNames(table.database, table.identifier.table, -1)
+        case Some(s) =>
+          client.getPartitionNames(table.database, table.identifier.table, s.asJava, -1)
+      }
+    hivePartitionNames.asScala.sorted
+  }
+
   override def getPartitionOption(
       table: CatalogTable,
       spec: TablePartitionSpec): Option[CatalogTablePartition] = withHiveState {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
index 16ae345de6d95..79e76b3134c2a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -254,6 +254,11 @@ class VersionsSuite extends SparkFunSuite with Logging {
         "default", "src_part", partitions, ignoreIfExists = true)
     }
 
+    test(s"$version: getPartitionNames(catalogTable)") {
+      val partitionNames = (1 to testPartitionCount).map(key2 => s"key1=1/key2=$key2")
+      assert(partitionNames == client.getPartitionNames(client.getTable("default", "src_part")))
+    }
+
     test(s"$version: getPartitions(catalogTable)") {
       assert(testPartitionCount ==
         client.getPartitions(client.getTable("default", "src_part")).size)

From 655297b35651fc68632ebe92ea97ed560548c68e Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Mon, 5 Dec 2016 20:35:24 -0800
Subject: [PATCH 0290/1204] [SPARK-18721][SS] Fix ForeachSink with watermark +
 append

## What changes were proposed in this pull request?

Right now ForeachSink creates a new physical plan, so StreamExecution cannot retrieval metrics and watermark.

This PR changes ForeachSink to manually convert InternalRows to objects without creating a new plan.

## How was this patch tested?

`test("foreach with watermark: append")`.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16160 from zsxwing/SPARK-18721.

(cherry picked from commit 7863c623791d088684107f833fdecb4b5fdab4ec)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../sql/execution/streaming/ForeachSink.scala | 45 ++++--------
 .../streaming/ForeachSinkSuite.scala          | 68 ++++++++++++++++++-
 2 files changed, 79 insertions(+), 34 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ForeachSink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ForeachSink.scala
index c93fcfb77cc93..de09fb568d2a6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ForeachSink.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ForeachSink.scala
@@ -18,9 +18,8 @@
 package org.apache.spark.sql.execution.streaming
 
 import org.apache.spark.TaskContext
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{DataFrame, Dataset, Encoder, ForeachWriter}
-import org.apache.spark.sql.catalyst.plans.logical.CatalystSerde
+import org.apache.spark.sql.{DataFrame, Encoder, ForeachWriter}
+import org.apache.spark.sql.catalyst.encoders.encoderFor
 
 /**
  * A [[Sink]] that forwards all data into [[ForeachWriter]] according to the contract defined by
@@ -32,46 +31,26 @@ import org.apache.spark.sql.catalyst.plans.logical.CatalystSerde
 class ForeachSink[T : Encoder](writer: ForeachWriter[T]) extends Sink with Serializable {
 
   override def addBatch(batchId: Long, data: DataFrame): Unit = {
-    // TODO: Refine this method when SPARK-16264 is resolved; see comments below.
-
     // This logic should've been as simple as:
     // ```
     //   data.as[T].foreachPartition { iter => ... }
     // ```
     //
     // Unfortunately, doing that would just break the incremental planing. The reason is,
-    // `Dataset.foreachPartition()` would further call `Dataset.rdd()`, but `Dataset.rdd()` just
-    // does not support `IncrementalExecution`.
+    // `Dataset.foreachPartition()` would further call `Dataset.rdd()`, but `Dataset.rdd()` will
+    // create a new plan. Because StreamExecution uses the existing plan to collect metrics and
+    // update watermark, we should never create a new plan. Otherwise, metrics and watermark are
+    // updated in the new plan, and StreamExecution cannot retrieval them.
     //
-    // So as a provisional fix, below we've made a special version of `Dataset` with its `rdd()`
-    // method supporting incremental planning. But in the long run, we should generally make newly
-    // created Datasets use `IncrementalExecution` where necessary (which is SPARK-16264 tries to
-    // resolve).
-    val incrementalExecution = data.queryExecution.asInstanceOf[IncrementalExecution]
-    val datasetWithIncrementalExecution =
-      new Dataset(data.sparkSession, incrementalExecution, implicitly[Encoder[T]]) {
-        override lazy val rdd: RDD[T] = {
-          val objectType = exprEnc.deserializer.dataType
-          val deserialized = CatalystSerde.deserialize[T](logicalPlan)
-
-          // was originally: sparkSession.sessionState.executePlan(deserialized) ...
-          val newIncrementalExecution = new IncrementalExecution(
-            this.sparkSession,
-            deserialized,
-            incrementalExecution.outputMode,
-            incrementalExecution.checkpointLocation,
-            incrementalExecution.currentBatchId,
-            incrementalExecution.currentEventTimeWatermark)
-          newIncrementalExecution.toRdd.mapPartitions { rows =>
-            rows.map(_.get(0, objectType))
-          }.asInstanceOf[RDD[T]]
-        }
-      }
-    datasetWithIncrementalExecution.foreachPartition { iter =>
+    // Hence, we need to manually convert internal rows to objects using encoder.
+    val encoder = encoderFor[T].resolveAndBind(
+      data.logicalPlan.output,
+      data.sparkSession.sessionState.analyzer)
+    data.queryExecution.toRdd.foreachPartition { iter =>
       if (writer.open(TaskContext.getPartitionId(), batchId)) {
         try {
           while (iter.hasNext) {
-            writer.process(iter.next())
+            writer.process(encoder.fromRow(iter.next()))
           }
         } catch {
           case e: Throwable =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/ForeachSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/ForeachSinkSuite.scala
index ee6261036fdd0..4a3eeb70b1702 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/ForeachSinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/ForeachSinkSuite.scala
@@ -171,7 +171,7 @@ class ForeachSinkSuite extends StreamTest with SharedSQLContext with BeforeAndAf
     }
   }
 
-  test("foreach with watermark") {
+  test("foreach with watermark: complete") {
     val inputData = MemoryStream[Int]
 
     val windowedAggregation = inputData.toDF()
@@ -204,6 +204,72 @@ class ForeachSinkSuite extends StreamTest with SharedSQLContext with BeforeAndAf
       query.stop()
     }
   }
+
+  test("foreach with watermark: append") {
+    val inputData = MemoryStream[Int]
+
+    val windowedAggregation = inputData.toDF()
+      .withColumn("eventTime", $"value".cast("timestamp"))
+      .withWatermark("eventTime", "10 seconds")
+      .groupBy(window($"eventTime", "5 seconds") as 'window)
+      .agg(count("*") as 'count)
+      .select($"count".as[Long])
+      .map(_.toInt)
+      .repartition(1)
+
+    val query = windowedAggregation
+      .writeStream
+      .outputMode(OutputMode.Append)
+      .foreach(new TestForeachWriter())
+      .start()
+    try {
+      inputData.addData(10, 11, 12)
+      query.processAllAvailable()
+      inputData.addData(25) // Advance watermark to 15 seconds
+      query.processAllAvailable()
+      inputData.addData(25) // Evict items less than previous watermark
+      query.processAllAvailable()
+
+      // There should be 3 batches and only does the last batch contain a value.
+      val allEvents = ForeachSinkSuite.allEvents()
+      assert(allEvents.size === 3)
+      val expectedEvents = Seq(
+        Seq(
+          ForeachSinkSuite.Open(partition = 0, version = 0),
+          ForeachSinkSuite.Close(None)
+        ),
+        Seq(
+          ForeachSinkSuite.Open(partition = 0, version = 1),
+          ForeachSinkSuite.Close(None)
+        ),
+        Seq(
+          ForeachSinkSuite.Open(partition = 0, version = 2),
+          ForeachSinkSuite.Process(value = 3),
+          ForeachSinkSuite.Close(None)
+        )
+      )
+      assert(allEvents === expectedEvents)
+    } finally {
+      query.stop()
+    }
+  }
+
+  test("foreach sink should support metrics") {
+    val inputData = MemoryStream[Int]
+    val query = inputData.toDS()
+      .writeStream
+      .foreach(new TestForeachWriter())
+      .start()
+    try {
+      inputData.addData(10, 11, 12)
+      query.processAllAvailable()
+      val recentProgress = query.recentProgresses.filter(_.numInputRows != 0).headOption
+      assert(recentProgress.isDefined && recentProgress.get.numInputRows === 3,
+        s"recentProgresses[${query.recentProgresses.toList}] doesn't contain correct metrics")
+    } finally {
+      query.stop()
+    }
+  }
 }
 
 /** A global object to collect events in the executor */

From e362d998d045f9c6b22f34cba0ad1e77a505883b Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@databricks.com>
Date: Tue, 6 Dec 2016 05:51:39 -0800
Subject: [PATCH 0291/1204] [SPARK-18634][SQL][TRIVIAL] Touch-up Generate

## What changes were proposed in this pull request?
I jumped the gun on merging https://github.com/apache/spark/pull/16120, and missed a tiny potential problem. This PR fixes that by changing a val into a def; this should prevent potential serialization/initialization weirdness from happening.

## How was this patch tested?
Existing tests.

Author: Herman van Hovell <hvanhovell@databricks.com>

Closes #16170 from hvanhovell/SPARK-18634.

(cherry picked from commit 381ef4ea76b0920e05c81adb44b1fef88bee5d25)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../sql/catalyst/plans/logical/basicLogicalOperators.scala      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 304367de4cf6a..0f33e1dae944e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -94,7 +94,7 @@ case class Generate(
 
   override def producedAttributes: AttributeSet = AttributeSet(generatorOutput)
 
-  val qualifiedGeneratorOutput: Seq[Attribute] = qualifier.map { q =>
+  def qualifiedGeneratorOutput: Seq[Attribute] = qualifier.map { q =>
     // prepend the new qualifier to the existed one
     generatorOutput.map(a => a.withQualifier(Some(q)))
   }.getOrElse(generatorOutput)

From ace4079c5f2049d9888a8f27c1fe544c92a9fd2d Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Tue, 6 Dec 2016 11:48:11 -0800
Subject: [PATCH 0292/1204] [SPARK-18714][SQL] Add a simple time function to
 SparkSession

## What changes were proposed in this pull request?
Many Spark developers often want to test the runtime of some function in interactive debugging and testing. This patch adds a simple time function to SparkSession:

```
scala> spark.time { spark.range(1000).count() }
Time taken: 77 ms
res1: Long = 1000
```

## How was this patch tested?
I tested this interactively in spark-shell.

Author: Reynold Xin <rxin@databricks.com>

Closes #16140 from rxin/SPARK-18714.

(cherry picked from commit cb1f10b468e7771af75cb2288d375a87ab66d316)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../org/apache/spark/sql/SparkSession.scala      | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 08d74ac0185b8..f3dde480eabe0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -618,6 +618,22 @@ class SparkSession private(
   @InterfaceStability.Evolving
   def readStream: DataStreamReader = new DataStreamReader(self)
 
+  /**
+   * Executes some code block and prints to stdout the time taken to execute the block. This is
+   * available in Scala only and is used primarily for interactive testing and debugging.
+   *
+   * @since 2.1.0
+   */
+  @InterfaceStability.Stable
+  def time[T](f: => T): T = {
+    val start = System.nanoTime()
+    val ret = f
+    val end = System.nanoTime()
+    // scalastyle:off println
+    println(s"Time taken: ${(end - start) / 1000 / 1000} ms")
+    // scalastyle:on println
+    ret
+  }
 
   // scalastyle:off
   // Disable style checker so "implicits" object can start with lowercase i

From d20e0d6b8919eccaab9ae7db94ba80fdfac03c9d Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Tue, 6 Dec 2016 13:05:22 -0800
Subject: [PATCH 0293/1204] [SPARK-18671][SS][TEST] Added tests to ensure
 stability of that all Structured Streaming log formats

## What changes were proposed in this pull request?

To be able to restart StreamingQueries across Spark version, we have already made the logs (offset log, file source log, file sink log) use json. We should added tests with actual json files in the Spark such that any incompatible changes in reading the logs is immediately caught. This PR add tests for FileStreamSourceLog, FileStreamSinkLog, and OffsetSeqLog.

## How was this patch tested?
new unit tests

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #16128 from tdas/SPARK-18671.

(cherry picked from commit 1ef6b296d7cd2d93cdfd5f54940842d6bb915ce0)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 dev/.rat-excludes                             |  1 +
 .../apache/spark/sql/kafka010/JsonUtils.scala |  9 +++++-
 .../sql/kafka010/KafkaSourceOffsetSuite.scala | 12 ++++++++
 .../file-sink-log-version-2.1.0/7.compact     |  9 ++++++
 .../file-sink-log-version-2.1.0/8             |  3 ++
 .../file-sink-log-version-2.1.0/9             |  2 ++
 .../file-source-log-version-2.1.0/2.compact   |  4 +++
 .../file-source-log-version-2.1.0/3           |  2 ++
 .../file-source-log-version-2.1.0/4           |  2 ++
 .../file-source-offset-version-2.1.0.txt      |  1 +
 .../kafka-source-offset-version-2.1.0.txt     |  1 +
 .../offset-log-version-2.1.0/0                |  4 +++
 .../streaming/FileStreamSinkLogSuite.scala    | 21 +++++++++++++
 .../streaming/OffsetSeqLogSuite.scala         | 16 ++++++++++
 .../sql/streaming/FileStreamSourceSuite.scala | 30 +++++++++++++++++--
 15 files changed, 114 insertions(+), 3 deletions(-)
 create mode 100644 sql/core/src/test/resources/structured-streaming/file-sink-log-version-2.1.0/7.compact
 create mode 100644 sql/core/src/test/resources/structured-streaming/file-sink-log-version-2.1.0/8
 create mode 100644 sql/core/src/test/resources/structured-streaming/file-sink-log-version-2.1.0/9
 create mode 100644 sql/core/src/test/resources/structured-streaming/file-source-log-version-2.1.0/2.compact
 create mode 100644 sql/core/src/test/resources/structured-streaming/file-source-log-version-2.1.0/3
 create mode 100644 sql/core/src/test/resources/structured-streaming/file-source-log-version-2.1.0/4
 create mode 100644 sql/core/src/test/resources/structured-streaming/file-source-offset-version-2.1.0.txt
 create mode 100644 sql/core/src/test/resources/structured-streaming/kafka-source-offset-version-2.1.0.txt
 create mode 100644 sql/core/src/test/resources/structured-streaming/offset-log-version-2.1.0/0

diff --git a/dev/.rat-excludes b/dev/.rat-excludes
index a3efddeaa515a..6be1c72bc6cfb 100644
--- a/dev/.rat-excludes
+++ b/dev/.rat-excludes
@@ -102,3 +102,4 @@ org.apache.spark.scheduler.ExternalClusterManager
 .Rbuildignore
 org.apache.spark.deploy.yarn.security.ServiceCredentialProvider
 spark-warehouse
+structured-streaming/*
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/JsonUtils.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/JsonUtils.scala
index 13d717092a898..868edb5dcdc0c 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/JsonUtils.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/JsonUtils.scala
@@ -81,7 +81,14 @@ private object JsonUtils {
    */
   def partitionOffsets(partitionOffsets: Map[TopicPartition, Long]): String = {
     val result = new HashMap[String, HashMap[Int, Long]]()
-    partitionOffsets.foreach { case (tp, off) =>
+    implicit val ordering = new Ordering[TopicPartition] {
+      override def compare(x: TopicPartition, y: TopicPartition): Int = {
+        Ordering.Tuple2[String, Int].compare((x.topic, x.partition), (y.topic, y.partition))
+      }
+    }
+    val partitions = partitionOffsets.keySet.toSeq.sorted  // sort for more determinism
+    partitions.foreach { tp =>
+        val off = partitionOffsets(tp)
         val parts = result.getOrElse(tp.topic, new HashMap[Int, Long])
         parts += tp.partition -> off
         result += tp.topic -> parts
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceOffsetSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceOffsetSuite.scala
index 881018fd95665..c8326ffcc7ad4 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceOffsetSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceOffsetSuite.scala
@@ -89,4 +89,16 @@ class KafkaSourceOffsetSuite extends OffsetSuite with SharedSQLContext {
         Array(0 -> batch0Serialized, 1 -> batch1Serialized))
     }
   }
+
+  test("read Spark 2.1.0 log format") {
+    val offset = readFromResource("kafka-source-offset-version-2.1.0.txt")
+    assert(KafkaSourceOffset(offset) ===
+      KafkaSourceOffset(("topic1", 0, 456L), ("topic1", 1, 789L), ("topic2", 0, 0L)))
+  }
+
+  private def readFromResource(file: String): SerializedOffset = {
+    import scala.io.Source
+    val str = Source.fromFile(getClass.getResource(s"/structured-streaming/$file").toURI).mkString
+    SerializedOffset(str)
+  }
 }
diff --git a/sql/core/src/test/resources/structured-streaming/file-sink-log-version-2.1.0/7.compact b/sql/core/src/test/resources/structured-streaming/file-sink-log-version-2.1.0/7.compact
new file mode 100644
index 0000000000000..e1ec8a74f052c
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/file-sink-log-version-2.1.0/7.compact
@@ -0,0 +1,9 @@
+v1
+{"path":"/a/b/0","size":1,"isDir":false,"modificationTime":1,"blockReplication":1,"blockSize":100,"action":"add"}
+{"path":"/a/b/1","size":100,"isDir":false,"modificationTime":100,"blockReplication":1,"blockSize":100,"action":"add"}
+{"path":"/a/b/2","size":200,"isDir":false,"modificationTime":200,"blockReplication":1,"blockSize":100,"action":"add"}
+{"path":"/a/b/3","size":300,"isDir":false,"modificationTime":300,"blockReplication":1,"blockSize":100,"action":"add"}
+{"path":"/a/b/4","size":400,"isDir":false,"modificationTime":400,"blockReplication":1,"blockSize":100,"action":"add"}
+{"path":"/a/b/5","size":500,"isDir":false,"modificationTime":500,"blockReplication":1,"blockSize":100,"action":"add"}
+{"path":"/a/b/6","size":600,"isDir":false,"modificationTime":600,"blockReplication":1,"blockSize":100,"action":"add"}
+{"path":"/a/b/7","size":700,"isDir":false,"modificationTime":700,"blockReplication":1,"blockSize":100,"action":"add"}
diff --git a/sql/core/src/test/resources/structured-streaming/file-sink-log-version-2.1.0/8 b/sql/core/src/test/resources/structured-streaming/file-sink-log-version-2.1.0/8
new file mode 100644
index 0000000000000..e7989804e8886
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/file-sink-log-version-2.1.0/8
@@ -0,0 +1,3 @@
+v1
+{"path":"/a/b/8","size":800,"isDir":false,"modificationTime":800,"blockReplication":1,"blockSize":100,"action":"add"}
+{"path":"/a/b/0","size":100,"isDir":false,"modificationTime":100,"blockReplication":1,"blockSize":100,"action":"delete"}
diff --git a/sql/core/src/test/resources/structured-streaming/file-sink-log-version-2.1.0/9 b/sql/core/src/test/resources/structured-streaming/file-sink-log-version-2.1.0/9
new file mode 100644
index 0000000000000..42fb0ee416922
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/file-sink-log-version-2.1.0/9
@@ -0,0 +1,2 @@
+v1
+{"path":"/a/b/9","size":900,"isDir":false,"modificationTime":900,"blockReplication":3,"blockSize":200,"action":"add"}
diff --git a/sql/core/src/test/resources/structured-streaming/file-source-log-version-2.1.0/2.compact b/sql/core/src/test/resources/structured-streaming/file-source-log-version-2.1.0/2.compact
new file mode 100644
index 0000000000000..95f78bb2620d4
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/file-source-log-version-2.1.0/2.compact
@@ -0,0 +1,4 @@
+v1
+{"path":"/a/b/0","timestamp":1480730949000,"batchId":0}
+{"path":"/a/b/1","timestamp":1480730950000,"batchId":1}
+{"path":"/a/b/2","timestamp":1480730950000,"batchId":2}
diff --git a/sql/core/src/test/resources/structured-streaming/file-source-log-version-2.1.0/3 b/sql/core/src/test/resources/structured-streaming/file-source-log-version-2.1.0/3
new file mode 100644
index 0000000000000..2caa5972e42eb
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/file-source-log-version-2.1.0/3
@@ -0,0 +1,2 @@
+v1
+{"path":"/a/b/3","timestamp":1480730950000,"batchId":3}
diff --git a/sql/core/src/test/resources/structured-streaming/file-source-log-version-2.1.0/4 b/sql/core/src/test/resources/structured-streaming/file-source-log-version-2.1.0/4
new file mode 100644
index 0000000000000..e54b943229880
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/file-source-log-version-2.1.0/4
@@ -0,0 +1,2 @@
+v1
+{"path":"/a/b/4","timestamp":1480730951000,"batchId":4}
diff --git a/sql/core/src/test/resources/structured-streaming/file-source-offset-version-2.1.0.txt b/sql/core/src/test/resources/structured-streaming/file-source-offset-version-2.1.0.txt
new file mode 100644
index 0000000000000..51b4008129ffe
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/file-source-offset-version-2.1.0.txt
@@ -0,0 +1 @@
+345
diff --git a/sql/core/src/test/resources/structured-streaming/kafka-source-offset-version-2.1.0.txt b/sql/core/src/test/resources/structured-streaming/kafka-source-offset-version-2.1.0.txt
new file mode 100644
index 0000000000000..6410031743d26
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/kafka-source-offset-version-2.1.0.txt
@@ -0,0 +1 @@
+{"topic1":{"0":456,"1":789},"topic2":{"0":0}}
diff --git a/sql/core/src/test/resources/structured-streaming/offset-log-version-2.1.0/0 b/sql/core/src/test/resources/structured-streaming/offset-log-version-2.1.0/0
new file mode 100644
index 0000000000000..fe5c1d44a6e26
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/offset-log-version-2.1.0/0
@@ -0,0 +1,4 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1480981499528}
+0
+{"topic-0":{"0":1}}
\ No newline at end of file
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala
index e046fee0c04d3..8a21b76e8f029 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala
@@ -185,6 +185,21 @@ class FileStreamSinkLogSuite extends SparkFunSuite with SharedSQLContext {
     }
   }
 
+  test("read Spark 2.1.0 log format") {
+    assert(readFromResource("file-sink-log-version-2.1.0") === Seq(
+      // SinkFileStatus("/a/b/0", 100, false, 100, 1, 100, FileStreamSinkLog.ADD_ACTION), -> deleted
+      SinkFileStatus("/a/b/1", 100, false, 100, 1, 100, FileStreamSinkLog.ADD_ACTION),
+      SinkFileStatus("/a/b/2", 200, false, 200, 1, 100, FileStreamSinkLog.ADD_ACTION),
+      SinkFileStatus("/a/b/3", 300, false, 300, 1, 100, FileStreamSinkLog.ADD_ACTION),
+      SinkFileStatus("/a/b/4", 400, false, 400, 1, 100, FileStreamSinkLog.ADD_ACTION),
+      SinkFileStatus("/a/b/5", 500, false, 500, 1, 100, FileStreamSinkLog.ADD_ACTION),
+      SinkFileStatus("/a/b/6", 600, false, 600, 1, 100, FileStreamSinkLog.ADD_ACTION),
+      SinkFileStatus("/a/b/7", 700, false, 700, 1, 100, FileStreamSinkLog.ADD_ACTION),
+      SinkFileStatus("/a/b/8", 800, false, 800, 1, 100, FileStreamSinkLog.ADD_ACTION),
+      SinkFileStatus("/a/b/9", 900, false, 900, 3, 200, FileStreamSinkLog.ADD_ACTION)
+    ))
+  }
+
   /**
    * Create a fake SinkFileStatus using path and action. Most of tests don't care about other fields
    * in SinkFileStatus.
@@ -206,4 +221,10 @@ class FileStreamSinkLogSuite extends SparkFunSuite with SharedSQLContext {
       f(sinkLog)
     }
   }
+
+  private def readFromResource(dir: String): Seq[SinkFileStatus] = {
+    val input = getClass.getResource(s"/structured-streaming/$dir")
+    val log = new FileStreamSinkLog(FileStreamSinkLog.VERSION, spark, input.toString)
+    log.allFiles()
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala
index d3a83ea0b922f..d139efaaf824f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala
@@ -69,4 +69,20 @@ class OffsetSeqLogSuite extends SparkFunSuite with SharedSQLContext {
         Array(0 -> batch0Serialized, 1 -> batch1Serialized))
     }
   }
+
+  test("read Spark 2.1.0 log format") {
+    val (batchId, offsetSeq) = readFromResource("offset-log-version-2.1.0")
+    assert(batchId === 0)
+    assert(offsetSeq.offsets === Seq(
+      Some(SerializedOffset("0")),
+      Some(SerializedOffset("""{"topic-0":{"0":1}}"""))
+    ))
+    assert(offsetSeq.metadata === Some(OffsetSeqMetadata(0L, 1480981499528L)))
+  }
+
+  private def readFromResource(dir: String): (Long, OffsetSeq) = {
+    val input = getClass.getResource(s"/structured-streaming/$dir")
+    val log = new OffsetSeqLog(spark, input.toString)
+    log.getLatest().get
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index 8256c63d87090..ff1f3e26f1593 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -19,14 +19,13 @@ package org.apache.spark.sql.streaming
 
 import java.io.File
 
-import scala.collection.mutable
-
 import org.scalatest.PrivateMethodTester
 import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.execution.streaming._
+import org.apache.spark.sql.execution.streaming.FileStreamSource.FileEntry
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
@@ -1022,6 +1021,33 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
     val options = new FileStreamOptions(Map("maxfilespertrigger" -> "1"))
     assert(options.maxFilesPerTrigger == Some(1))
   }
+
+  test("FileStreamSource offset - read Spark 2.1.0 log format") {
+    val offset = readOffsetFromResource("file-source-offset-version-2.1.0.txt")
+    assert(LongOffset.convert(offset) === Some(LongOffset(345)))
+  }
+
+  test("FileStreamSourceLog - read Spark 2.1.0 log format") {
+    assert(readLogFromResource("file-source-log-version-2.1.0") === Seq(
+      FileEntry("/a/b/0", 1480730949000L, 0L),
+      FileEntry("/a/b/1", 1480730950000L, 1L),
+      FileEntry("/a/b/2", 1480730950000L, 2L),
+      FileEntry("/a/b/3", 1480730950000L, 3L),
+      FileEntry("/a/b/4", 1480730951000L, 4L)
+    ))
+  }
+
+  private def readLogFromResource(dir: String): Seq[FileEntry] = {
+    val input = getClass.getResource(s"/structured-streaming/$dir")
+    val log = new FileStreamSourceLog(FileStreamSourceLog.VERSION, spark, input.toString)
+    log.allFiles()
+  }
+
+  private def readOffsetFromResource(file: String): SerializedOffset = {
+    import scala.io.Source
+    val str = Source.fromFile(getClass.getResource(s"/structured-streaming/$file").toURI).mkString
+    SerializedOffset(str.trim)
+  }
 }
 
 class FileStreamSourceStressTestSuite extends FileStreamSourceTest {

From 65f5331a7f3a9de8ca7382b2a14db6c0670c4015 Mon Sep 17 00:00:00 2001
From: Shuai Lin <linshuai2012@gmail.com>
Date: Wed, 7 Dec 2016 06:09:27 +0800
Subject: [PATCH 0294/1204] [SPARK-18652][PYTHON] Include the example data and
 third-party licenses in pyspark package.

## What changes were proposed in this pull request?

Since we already include the python examples in the pyspark package, we should include the example data with it as well.

We should also include the third-party licences since we distribute their jars with the pyspark package.

## How was this patch tested?

Manually tested with python2.7 and python3.4
```sh
$ ./build/mvn -DskipTests -Phive -Phive-thriftserver -Pyarn -Pmesos clean package
$ cd python
$ python setup.py sdist
$ pip install  dist/pyspark-2.1.0.dev0.tar.gz

$ ls -1 /usr/local/lib/python2.7/dist-packages/pyspark/data/
graphx
mllib
streaming

$ du -sh /usr/local/lib/python2.7/dist-packages/pyspark/data/
600K    /usr/local/lib/python2.7/dist-packages/pyspark/data/

$ ls -1  /usr/local/lib/python2.7/dist-packages/pyspark/licenses/|head -5
LICENSE-AnchorJS.txt
LICENSE-DPark.txt
LICENSE-Mockito.txt
LICENSE-SnapTree.txt
LICENSE-antlr.txt
```

Author: Shuai Lin <linshuai2012@gmail.com>

Closes #16082 from lins05/include-data-in-pyspark-dist.

(cherry picked from commit bd9a4a5ac3abcc48131d1249df55e7d68266343a)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 python/MANIFEST.in |  2 ++
 python/setup.py    | 20 +++++++++++++++++++-
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/python/MANIFEST.in b/python/MANIFEST.in
index bbcce1baa439d..40f1fb2f1ee7e 100644
--- a/python/MANIFEST.in
+++ b/python/MANIFEST.in
@@ -17,6 +17,8 @@
 global-exclude *.py[cod] __pycache__ .DS_Store
 recursive-include deps/jars *.jar
 graft deps/bin
+recursive-include deps/data *.data *.txt
+recursive-include deps/licenses *.txt
 recursive-include deps/examples *.py
 recursive-include lib *.zip
 include README.md
diff --git a/python/setup.py b/python/setup.py
index 625aea04073f5..bc2eb4ce9dbd0 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -69,10 +69,14 @@
 
 EXAMPLES_PATH = os.path.join(SPARK_HOME, "examples/src/main/python")
 SCRIPTS_PATH = os.path.join(SPARK_HOME, "bin")
+DATA_PATH = os.path.join(SPARK_HOME, "data")
+LICENSES_PATH = os.path.join(SPARK_HOME, "licenses")
+
 SCRIPTS_TARGET = os.path.join(TEMP_PATH, "bin")
 JARS_TARGET = os.path.join(TEMP_PATH, "jars")
 EXAMPLES_TARGET = os.path.join(TEMP_PATH, "examples")
-
+DATA_TARGET = os.path.join(TEMP_PATH, "data")
+LICENSES_TARGET = os.path.join(TEMP_PATH, "licenses")
 
 # Check and see if we are under the spark path in which case we need to build the symlink farm.
 # This is important because we only want to build the symlink farm while under Spark otherwise we
@@ -114,11 +118,15 @@ def _supports_symlinks():
             os.symlink(JARS_PATH, JARS_TARGET)
             os.symlink(SCRIPTS_PATH, SCRIPTS_TARGET)
             os.symlink(EXAMPLES_PATH, EXAMPLES_TARGET)
+            os.symlink(DATA_PATH, DATA_TARGET)
+            os.symlink(LICENSES_PATH, LICENSES_TARGET)
         else:
             # For windows fall back to the slower copytree
             copytree(JARS_PATH, JARS_TARGET)
             copytree(SCRIPTS_PATH, SCRIPTS_TARGET)
             copytree(EXAMPLES_PATH, EXAMPLES_TARGET)
+            copytree(DATA_PATH, DATA_TARGET)
+            copytree(LICENSES_PATH, LICENSES_TARGET)
     else:
         # If we are not inside of SPARK_HOME verify we have the required symlink farm
         if not os.path.exists(JARS_TARGET):
@@ -161,18 +169,24 @@ def _supports_symlinks():
                   'pyspark.jars',
                   'pyspark.python.pyspark',
                   'pyspark.python.lib',
+                  'pyspark.data',
+                  'pyspark.licenses',
                   'pyspark.examples.src.main.python'],
         include_package_data=True,
         package_dir={
             'pyspark.jars': 'deps/jars',
             'pyspark.bin': 'deps/bin',
             'pyspark.python.lib': 'lib',
+            'pyspark.data': 'deps/data',
+            'pyspark.licenses': 'deps/licenses',
             'pyspark.examples.src.main.python': 'deps/examples',
         },
         package_data={
             'pyspark.jars': ['*.jar'],
             'pyspark.bin': ['*'],
             'pyspark.python.lib': ['*.zip'],
+            'pyspark.data': ['*.txt', '*.data'],
+            'pyspark.licenses': ['*.txt'],
             'pyspark.examples.src.main.python': ['*.py', '*/*.py']},
         scripts=scripts,
         license='http://www.apache.org/licenses/LICENSE-2.0',
@@ -202,8 +216,12 @@ def _supports_symlinks():
             os.remove(os.path.join(TEMP_PATH, "jars"))
             os.remove(os.path.join(TEMP_PATH, "bin"))
             os.remove(os.path.join(TEMP_PATH, "examples"))
+            os.remove(os.path.join(TEMP_PATH, "data"))
+            os.remove(os.path.join(TEMP_PATH, "licenses"))
         else:
             rmtree(os.path.join(TEMP_PATH, "jars"))
             rmtree(os.path.join(TEMP_PATH, "bin"))
             rmtree(os.path.join(TEMP_PATH, "examples"))
+            rmtree(os.path.join(TEMP_PATH, "data"))
+            rmtree(os.path.join(TEMP_PATH, "licenses"))
         os.rmdir(TEMP_PATH)

From 9b5bc2a6aeb9580fc2dde3f37a77b4d1fbc6299e Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Tue, 6 Dec 2016 17:04:26 -0800
Subject: [PATCH 0295/1204] [SPARK-18734][SS] Represent timestamp in
 StreamingQueryProgress as formatted string instead of millis

## What changes were proposed in this pull request?

Easier to read while debugging as a formatted string (in ISO8601 format) than in millis

## How was this patch tested?
Updated unit tests

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #16166 from tdas/SPARK-18734.

(cherry picked from commit 539bb3cf9573be5cd86e7e6502523ce89c0de170)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../spark/sql/execution/streaming/ProgressReporter.scala  | 8 ++++++--
 .../scala/org/apache/spark/sql/streaming/progress.scala   | 6 +++---
 .../streaming/StreamingQueryStatusAndProgressSuite.scala  | 8 ++++----
 .../apache/spark/sql/streaming/StreamingQuerySuite.scala  | 2 +-
 4 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
index d95f55267e142..12d0c1e9b49f0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -17,7 +17,8 @@
 
 package org.apache.spark.sql.execution.streaming
 
-import java.util.UUID
+import java.text.SimpleDateFormat
+import java.util.{Date, TimeZone, UUID}
 
 import scala.collection.mutable
 import scala.collection.JavaConverters._
@@ -78,6 +79,9 @@ trait ProgressReporter extends Logging {
   // The timestamp we report an event that has no input data
   private var lastNoDataProgressEventTime = Long.MinValue
 
+  private val timestampFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'") // ISO8601
+  timestampFormat.setTimeZone(TimeZone.getTimeZone("UTC"))
+
   @volatile
   protected var currentStatus: StreamingQueryStatus = {
     new StreamingQueryStatus(
@@ -156,7 +160,7 @@ trait ProgressReporter extends Logging {
       id = id,
       runId = runId,
       name = name,
-      timestamp = currentTriggerStartTimestamp,
+      timestamp = timestampFormat.format(new Date(currentTriggerStartTimestamp)),
       batchId = currentBatchId,
       durationMs = currentDurationsMs.toMap.mapValues(long2Long).asJava,
       currentWatermark = offsetSeqMetadata.batchWatermarkMs,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
index f768080f5d2c6..d1568758b7a43 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
@@ -29,6 +29,7 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.annotation.Experimental
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
 
 /**
  * :: Experimental ::
@@ -76,7 +77,7 @@ class StreamingQueryProgress private[sql](
   val id: UUID,
   val runId: UUID,
   val name: String,
-  val timestamp: Long,
+  val timestamp: String,
   val batchId: Long,
   val durationMs: ju.Map[String, java.lang.Long],
   val currentWatermark: Long,
@@ -109,7 +110,7 @@ class StreamingQueryProgress private[sql](
     ("id" -> JString(id.toString)) ~
     ("runId" -> JString(runId.toString)) ~
     ("name" -> JString(name)) ~
-    ("timestamp" -> JInt(timestamp)) ~
+    ("timestamp" -> JString(timestamp)) ~
     ("numInputRows" -> JInt(numInputRows)) ~
     ("inputRowsPerSecond" -> safeDoubleToJValue(inputRowsPerSecond)) ~
     ("processedRowsPerSecond" -> safeDoubleToJValue(processedRowsPerSecond)) ~
@@ -121,7 +122,6 @@ class StreamingQueryProgress private[sql](
     ("stateOperators" -> JArray(stateOperators.map(_.jsonValue).toList)) ~
     ("sources" -> JArray(sources.map(_.jsonValue).toList)) ~
     ("sink" -> sink.jsonValue)
-
   }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
index 96f19db1a90e0..193c943f83be8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
@@ -38,7 +38,7 @@ class StreamingQueryStatusAndProgressSuite extends SparkFunSuite {
         |  "id" : "${testProgress1.id.toString}",
         |  "runId" : "${testProgress1.runId.toString}",
         |  "name" : "myName",
-        |  "timestamp" : 1,
+        |  "timestamp" : "2016-12-05T20:54:20.827Z",
         |  "numInputRows" : 678,
         |  "inputRowsPerSecond" : 10.0,
         |  "durationMs" : {
@@ -71,7 +71,7 @@ class StreamingQueryStatusAndProgressSuite extends SparkFunSuite {
          |  "id" : "${testProgress2.id.toString}",
          |  "runId" : "${testProgress2.runId.toString}",
          |  "name" : null,
-         |  "timestamp" : 1,
+         |  "timestamp" : "2016-12-05T20:54:20.827Z",
          |  "numInputRows" : 678,
          |  "durationMs" : {
          |    "total" : 0
@@ -131,7 +131,7 @@ object StreamingQueryStatusAndProgressSuite {
     id = UUID.randomUUID,
     runId = UUID.randomUUID,
     name = "myName",
-    timestamp = 1L,
+    timestamp = "2016-12-05T20:54:20.827Z",
     batchId = 2L,
     durationMs = Map("total" -> 0L).mapValues(long2Long).asJava,
     currentWatermark = 3L,
@@ -153,7 +153,7 @@ object StreamingQueryStatusAndProgressSuite {
     id = UUID.randomUUID,
     runId = UUID.randomUUID,
     name = null, // should not be present in the json
-    timestamp = 1L,
+    timestamp = "2016-12-05T20:54:20.827Z",
     batchId = 2L,
     durationMs = Map("total" -> 0L).mapValues(long2Long).asJava,
     currentWatermark = 3L,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index 893cb762c6580..55dd1a5d51e37 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -243,7 +243,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
         assert(progress.id === query.id)
         assert(progress.name === query.name)
         assert(progress.batchId === 0)
-        assert(progress.timestamp === 100)
+        assert(progress.timestamp === "1970-01-01T00:00:00.100Z") // 100 ms in UTC
         assert(progress.numInputRows === 2)
         assert(progress.processedRowsPerSecond === 2.0)
 

From 3750c6e9b580be0f2e25f691a1fd582f1b7e430a Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Tue, 6 Dec 2016 21:51:38 -0800
Subject: [PATCH 0296/1204] [SPARK-18671][SS][TEST-MAVEN] Follow up PR to fix
 test for Maven

## What changes were proposed in this pull request?

Maven compilation seem to not allow resource is sql/test to be easily referred to in kafka-0-10-sql tests. So moved the kafka-source-offset-version-2.1.0 from sql test resources to kafka-0-10-sql test resources.

## How was this patch tested?

Manually ran maven test

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #16183 from tdas/SPARK-18671-1.

(cherry picked from commit 5c6bcdbda4dd23bbd112a7395cd9d1cfd04cf4bb)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../src/test/resources}/kafka-source-offset-version-2.1.0.txt  | 0
 .../org/apache/spark/sql/kafka010/KafkaSourceOffsetSuite.scala | 3 ++-
 2 files changed, 2 insertions(+), 1 deletion(-)
 rename {sql/core/src/test/resources/structured-streaming => external/kafka-0-10-sql/src/test/resources}/kafka-source-offset-version-2.1.0.txt (100%)

diff --git a/sql/core/src/test/resources/structured-streaming/kafka-source-offset-version-2.1.0.txt b/external/kafka-0-10-sql/src/test/resources/kafka-source-offset-version-2.1.0.txt
similarity index 100%
rename from sql/core/src/test/resources/structured-streaming/kafka-source-offset-version-2.1.0.txt
rename to external/kafka-0-10-sql/src/test/resources/kafka-source-offset-version-2.1.0.txt
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceOffsetSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceOffsetSuite.scala
index c8326ffcc7ad4..22668fd6faaa9 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceOffsetSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceOffsetSuite.scala
@@ -98,7 +98,8 @@ class KafkaSourceOffsetSuite extends OffsetSuite with SharedSQLContext {
 
   private def readFromResource(file: String): SerializedOffset = {
     import scala.io.Source
-    val str = Source.fromFile(getClass.getResource(s"/structured-streaming/$file").toURI).mkString
+    val input = getClass.getResource(s"/$file").toURI
+    val str = Source.fromFile(input).mkString
     SerializedOffset(str)
   }
 }

From 340e9aea4853805c42b8739004d93efe8fe16ba4 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Wed, 7 Dec 2016 00:31:11 -0800
Subject: [PATCH 0297/1204] [SPARK-18686][SPARKR][ML] Several cleanup and
 improvements for spark.logit.

## What changes were proposed in this pull request?
Several cleanup and improvements for ```spark.logit```:
* ```summary``` should return coefficients matrix, and should output labels for each class if the model is multinomial logistic regression model.
* ```summary``` should not return ```areaUnderROC, roc, pr, ...```, since most of them are DataFrame which are less important for R users. Meanwhile, these metrics ignore instance weights (setting all to 1.0) which will be changed in later Spark version. In case it will introduce breaking changes, we do not expose them currently.
* SparkR test improvement: comparing the training result with native R glmnet.
* Remove argument ```aggregationDepth``` from ```spark.logit```, since it's an expert Param(related with Spark architecture and job execution) that would be used rarely by R users.

## How was this patch tested?
Unit tests.

The ```summary``` output after this change:
multinomial logistic regression:
```
> df <- suppressWarnings(createDataFrame(iris))
> model <- spark.logit(df, Species ~ ., regParam = 0.5)
> summary(model)
$coefficients
             versicolor  virginica   setosa
(Intercept)  1.514031    -2.609108   1.095077
Sepal_Length 0.02511006  0.2649821   -0.2900921
Sepal_Width  -0.5291215  -0.02016446 0.549286
Petal_Length 0.03647411  0.1544119   -0.190886
Petal_Width  0.000236092 0.4195804   -0.4198165
```
binomial logistic regression:
```
> df <- suppressWarnings(createDataFrame(iris))
> training <- df[df$Species %in% c("versicolor", "virginica"), ]
> model <- spark.logit(training, Species ~ ., regParam = 0.5)
> summary(model)
$coefficients
             Estimate
(Intercept)  -6.053815
Sepal_Length 0.2449379
Sepal_Width  0.1648321
Petal_Length 0.4730718
Petal_Width  1.031947
```

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #16117 from yanboliang/spark-18686.

(cherry picked from commit 90b59d1bf262b41c3a5f780697f504030f9d079c)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 R/pkg/R/mllib.R                               |  86 +++-----
 R/pkg/inst/tests/testthat/test_mllib.R        | 183 ++++++++++++------
 .../ml/r/LogisticRegressionWrapper.scala      |  81 ++++----
 3 files changed, 203 insertions(+), 147 deletions(-)

diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index eed829356f2be..074e9cbebe1d4 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -733,8 +733,6 @@ setMethod("predict", signature(object = "KMeansModel"),
 #'                  excepting that at most one value may be 0. The class with largest value p/t is predicted, where p
 #'                  is the original probability of that class and t is the class's threshold.
 #' @param weightCol The weight column name.
-#' @param aggregationDepth depth for treeAggregate (>= 2). If the dimensions of features or the number of partitions
-#'                         are large, this param could be adjusted to a larger size.
 #' @param probabilityCol column name for predicted class conditional probabilities.
 #' @param ... additional arguments passed to the method.
 #' @return \code{spark.logit} returns a fitted logistic regression model
@@ -746,45 +744,35 @@ setMethod("predict", signature(object = "KMeansModel"),
 #' \dontrun{
 #' sparkR.session()
 #' # binary logistic regression
-#' label <- c(0.0, 0.0, 0.0, 1.0, 1.0)
-#' features <- c(1.1419053, 0.9194079, -0.9498666, -1.1069903, 0.2809776)
-#' binary_data <- as.data.frame(cbind(label, features))
-#' binary_df <- createDataFrame(binary_data)
-#' blr_model <- spark.logit(binary_df, label ~ features, thresholds = 1.0)
-#' blr_predict <- collect(select(predict(blr_model, binary_df), "prediction"))
-#'
-#' # summary of binary logistic regression
-#' blr_summary <- summary(blr_model)
-#' blr_fmeasure <- collect(select(blr_summary$fMeasureByThreshold, "threshold", "F-Measure"))
+#' df <- createDataFrame(iris)
+#' training <- df[df$Species %in% c("versicolor", "virginica"), ]
+#' model <- spark.logit(training, Species ~ ., regParam = 0.5)
+#' summary <- summary(model)
+#'
+#' # fitted values on training data
+#' fitted <- predict(model, training)
+#'
 #' # save fitted model to input path
 #' path <- "path/to/model"
-#' write.ml(blr_model, path)
+#' write.ml(model, path)
 #'
 #' # can also read back the saved model and predict
 #' # Note that summary deos not work on loaded model
 #' savedModel <- read.ml(path)
-#' blr_predict2 <- collect(select(predict(savedModel, binary_df), "prediction"))
+#' summary(savedModel)
 #'
 #' # multinomial logistic regression
 #'
-#' label <- c(0.0, 1.0, 2.0, 0.0, 0.0)
-#' feature1 <- c(4.845940, 5.64480, 7.430381, 6.464263, 5.555667)
-#' feature2 <- c(2.941319, 2.614812, 2.162451, 3.339474, 2.970987)
-#' feature3 <- c(1.322733, 1.348044, 3.861237, 9.686976, 3.447130)
-#' feature4 <- c(1.3246388, 0.5510444, 0.9225810, 1.2147881, 1.6020842)
-#' data <- as.data.frame(cbind(label, feature1, feature2, feature3, feature4))
-#' df <- createDataFrame(data)
+#' df <- createDataFrame(iris)
+#' model <- spark.logit(df, Species ~ ., regParam = 0.5)
+#' summary <- summary(model)
 #'
-#' # Note that summary of multinomial logistic regression is not implemented yet
-#' model <- spark.logit(df, label ~ ., family = "multinomial", thresholds = c(0, 1, 1))
-#' predict1 <- collect(select(predict(model, df), "prediction"))
 #' }
 #' @note spark.logit since 2.1.0
 setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula"),
           function(data, formula, regParam = 0.0, elasticNetParam = 0.0, maxIter = 100,
                    tol = 1E-6, family = "auto", standardization = TRUE,
-                   thresholds = 0.5, weightCol = NULL, aggregationDepth = 2,
-                   probabilityCol = "probability") {
+                   thresholds = 0.5, weightCol = NULL, probabilityCol = "probability") {
             formula <- paste(deparse(formula), collapse = "")
 
             if (is.null(weightCol)) {
@@ -796,8 +784,7 @@ setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula")
                                 as.numeric(elasticNetParam), as.integer(maxIter),
                                 as.numeric(tol), as.character(family),
                                 as.logical(standardization), as.array(thresholds),
-                                as.character(weightCol), as.integer(aggregationDepth),
-                                as.character(probabilityCol))
+                                as.character(weightCol), as.character(probabilityCol))
             new("LogisticRegressionModel", jobj = jobj)
           })
 
@@ -817,10 +804,7 @@ setMethod("predict", signature(object = "LogisticRegressionModel"),
 #  Get the summary of an LogisticRegressionModel
 
 #' @param object an LogisticRegressionModel fitted by \code{spark.logit}
-#' @return \code{summary} returns the Binary Logistic regression results of a given model as list,
-#'         including roc, areaUnderROC, pr, fMeasureByThreshold, precisionByThreshold,
-#'         recallByThreshold, totalIterations, objectiveHistory. Note that Multinomial logistic
-#'         regression summary is not available now.
+#' @return \code{summary} returns coefficients matrix of the fitted model
 #' @rdname spark.logit
 #' @aliases summary,LogisticRegressionModel-method
 #' @export
@@ -828,33 +812,21 @@ setMethod("predict", signature(object = "LogisticRegressionModel"),
 setMethod("summary", signature(object = "LogisticRegressionModel"),
           function(object) {
             jobj <- object@jobj
-            is.loaded <- callJMethod(jobj, "isLoaded")
-
-            if (is.loaded) {
-              stop("Loaded model doesn't have training summary.")
+            features <- callJMethod(jobj, "rFeatures")
+            labels <- callJMethod(jobj, "labels")
+            coefficients <- callJMethod(jobj, "rCoefficients")
+            nCol <- length(coefficients) / length(features)
+            coefficients <- matrix(coefficients, ncol = nCol)
+            # If nCol == 1, means this is a binomial logistic regression model with pivoting.
+            # Otherwise, it's a multinomial logistic regression model without pivoting.
+            if (nCol == 1) {
+              colnames(coefficients) <- c("Estimate")
+            } else {
+              colnames(coefficients) <- unlist(labels)
             }
+            rownames(coefficients) <- unlist(features)
 
-            roc <- dataFrame(callJMethod(jobj, "roc"))
-
-            areaUnderROC <- callJMethod(jobj, "areaUnderROC")
-
-            pr <- dataFrame(callJMethod(jobj, "pr"))
-
-            fMeasureByThreshold <- dataFrame(callJMethod(jobj, "fMeasureByThreshold"))
-
-            precisionByThreshold <- dataFrame(callJMethod(jobj, "precisionByThreshold"))
-
-            recallByThreshold <- dataFrame(callJMethod(jobj, "recallByThreshold"))
-
-            totalIterations <- callJMethod(jobj, "totalIterations")
-
-            objectiveHistory <- callJMethod(jobj, "objectiveHistory")
-
-            list(roc = roc, areaUnderROC = areaUnderROC, pr = pr,
-                 fMeasureByThreshold = fMeasureByThreshold,
-                 precisionByThreshold = precisionByThreshold,
-                 recallByThreshold = recallByThreshold,
-                 totalIterations = totalIterations, objectiveHistory = objectiveHistory)
+            list(coefficients = coefficients)
           })
 
 #' Multilayer Perceptron Classification Model
diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R
index 07e812fd98013..d7aa965422652 100644
--- a/R/pkg/inst/tests/testthat/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -635,68 +635,141 @@ test_that("spark.isotonicRegression", {
 })
 
 test_that("spark.logit", {
-  # test binary logistic regression
-  label <- c(0.0, 0.0, 0.0, 1.0, 1.0)
-  feature <- c(1.1419053, 0.9194079, -0.9498666, -1.1069903, 0.2809776)
-  binary_data <- as.data.frame(cbind(label, feature))
-  binary_df <- createDataFrame(binary_data)
-
-  blr_model <- spark.logit(binary_df, label ~ feature, thresholds = 1.0)
-  blr_predict <- collect(select(predict(blr_model, binary_df), "prediction"))
-  expect_equal(blr_predict$prediction, c("0.0", "0.0", "0.0", "0.0", "0.0"))
-  blr_model1 <- spark.logit(binary_df, label ~ feature, thresholds = 0.0)
-  blr_predict1 <- collect(select(predict(blr_model1, binary_df), "prediction"))
-  expect_equal(blr_predict1$prediction, c("1.0", "1.0", "1.0", "1.0", "1.0"))
-
-  # test summary of binary logistic regression
-  blr_summary <- summary(blr_model)
-  blr_fmeasure <- collect(select(blr_summary$fMeasureByThreshold, "threshold", "F-Measure"))
-  expect_equal(blr_fmeasure$threshold, c(0.6565513, 0.6214563, 0.3325291, 0.2115995, 0.1778653),
-               tolerance = 1e-4)
-  expect_equal(blr_fmeasure$"F-Measure", c(0.6666667, 0.5000000, 0.8000000, 0.6666667, 0.5714286),
-               tolerance = 1e-4)
-  blr_precision <- collect(select(blr_summary$precisionByThreshold, "threshold", "precision"))
-  expect_equal(blr_precision$precision, c(1.0000000, 0.5000000, 0.6666667, 0.5000000, 0.4000000),
-               tolerance = 1e-4)
-  blr_recall <- collect(select(blr_summary$recallByThreshold, "threshold", "recall"))
-  expect_equal(blr_recall$recall, c(0.5000000, 0.5000000, 1.0000000, 1.0000000, 1.0000000),
-               tolerance = 1e-4)
+  # R code to reproduce the result.
+  # nolint start
+  #' library(glmnet)
+  #' iris.x = as.matrix(iris[, 1:4])
+  #' iris.y = as.factor(as.character(iris[, 5]))
+  #' logit = glmnet(iris.x, iris.y, family="multinomial", alpha=0, lambda=0.5)
+  #' coef(logit)
+  #
+  # $setosa
+  # 5 x 1 sparse Matrix of class "dgCMatrix"
+  # s0
+  #               1.0981324
+  # Sepal.Length -0.2909860
+  # Sepal.Width   0.5510907
+  # Petal.Length -0.1915217
+  # Petal.Width  -0.4211946
+  #
+  # $versicolor
+  # 5 x 1 sparse Matrix of class "dgCMatrix"
+  # s0
+  #               1.520061e+00
+  # Sepal.Length  2.524501e-02
+  # Sepal.Width  -5.310313e-01
+  # Petal.Length  3.656543e-02
+  # Petal.Width  -3.144464e-05
+  #
+  # $virginica
+  # 5 x 1 sparse Matrix of class "dgCMatrix"
+  # s0
+  #              -2.61819385
+  # Sepal.Length  0.26574097
+  # Sepal.Width  -0.02005932
+  # Petal.Length  0.15495629
+  # Petal.Width   0.42122607
+  # nolint end
 
-  # test model save and read
-  modelPath <- tempfile(pattern = "spark-logisticRegression", fileext = ".tmp")
-  write.ml(blr_model, modelPath)
-  expect_error(write.ml(blr_model, modelPath))
-  write.ml(blr_model, modelPath, overwrite = TRUE)
-  blr_model2 <- read.ml(modelPath)
-  blr_predict2 <- collect(select(predict(blr_model2, binary_df), "prediction"))
-  expect_equal(blr_predict$prediction, blr_predict2$prediction)
-  expect_error(summary(blr_model2))
+  # Test multinomial logistic regression againt three classes
+  df <- suppressWarnings(createDataFrame(iris))
+  model <- spark.logit(df, Species ~ ., regParam = 0.5)
+  summary <- summary(model)
+  versicolorCoefsR <- c(1.52, 0.03, -0.53, 0.04, 0.00)
+  virginicaCoefsR <- c(-2.62, 0.27, -0.02, 0.16, 0.42)
+  setosaCoefsR <- c(1.10, -0.29, 0.55, -0.19, -0.42)
+  versicolorCoefs <- unlist(summary$coefficients[, "versicolor"])
+  virginicaCoefs <- unlist(summary$coefficients[, "virginica"])
+  setosaCoefs <- unlist(summary$coefficients[, "setosa"])
+  expect_true(all(abs(versicolorCoefsR - versicolorCoefs) < 0.1))
+  expect_true(all(abs(virginicaCoefsR - virginicaCoefs) < 0.1))
+  expect_true(all(abs(setosaCoefs - setosaCoefs) < 0.1))
+
+  # Test model save and load
+  modelPath <- tempfile(pattern = "spark-logit", fileext = ".tmp")
+  write.ml(model, modelPath)
+  expect_error(write.ml(model, modelPath))
+  write.ml(model, modelPath, overwrite = TRUE)
+  model2 <- read.ml(modelPath)
+  coefs <- summary(model)$coefficients
+  coefs2 <- summary(model2)$coefficients
+  expect_equal(coefs, coefs2)
   unlink(modelPath)
 
-  # test prediction label as text
-  training <- suppressWarnings(createDataFrame(iris))
-  binomial_training <- training[training$Species %in% c("versicolor", "virginica"), ]
-  binomial_model <- spark.logit(binomial_training, Species ~ Sepal_Length + Sepal_Width)
-  prediction <- predict(binomial_model, binomial_training)
+  # R code to reproduce the result.
+  # nolint start
+  #' library(glmnet)
+  #' iris2 <- iris[iris$Species %in% c("versicolor", "virginica"), ]
+  #' iris.x = as.matrix(iris2[, 1:4])
+  #' iris.y = as.factor(as.character(iris2[, 5]))
+  #' logit = glmnet(iris.x, iris.y, family="multinomial", alpha=0, lambda=0.5)
+  #' coef(logit)
+  #
+  # $versicolor
+  # 5 x 1 sparse Matrix of class "dgCMatrix"
+  # s0
+  #               3.93844796
+  # Sepal.Length -0.13538675
+  # Sepal.Width  -0.02386443
+  # Petal.Length -0.35076451
+  # Petal.Width  -0.77971954
+  #
+  # $virginica
+  # 5 x 1 sparse Matrix of class "dgCMatrix"
+  # s0
+  #              -3.93844796
+  # Sepal.Length  0.13538675
+  # Sepal.Width   0.02386443
+  # Petal.Length  0.35076451
+  # Petal.Width   0.77971954
+  #
+  #' logit = glmnet(iris.x, iris.y, family="binomial", alpha=0, lambda=0.5)
+  #' coef(logit)
+  #
+  # 5 x 1 sparse Matrix of class "dgCMatrix"
+  # s0
+  # (Intercept)  -6.0824412
+  # Sepal.Length  0.2458260
+  # Sepal.Width   0.1642093
+  # Petal.Length  0.4759487
+  # Petal.Width   1.0383948
+  #
+  # nolint end
+
+  # Test multinomial logistic regression againt two classes
+  df <- suppressWarnings(createDataFrame(iris))
+  training <- df[df$Species %in% c("versicolor", "virginica"), ]
+  model <- spark.logit(training, Species ~ ., regParam = 0.5, family = "multinomial")
+  summary <- summary(model)
+  versicolorCoefsR <- c(3.94, -0.16, -0.02, -0.35, -0.78)
+  virginicaCoefsR <- c(-3.94, 0.16, -0.02, 0.35, 0.78)
+  versicolorCoefs <- unlist(summary$coefficients[, "versicolor"])
+  virginicaCoefs <- unlist(summary$coefficients[, "virginica"])
+  expect_true(all(abs(versicolorCoefsR - versicolorCoefs) < 0.1))
+  expect_true(all(abs(virginicaCoefsR - virginicaCoefs) < 0.1))
+
+  # Test binomial logistic regression againt two classes
+  model <- spark.logit(training, Species ~ ., regParam = 0.5)
+  summary <- summary(model)
+  coefsR <- c(-6.08, 0.25, 0.16, 0.48, 1.04)
+  coefs <- unlist(summary$coefficients[, "Estimate"])
+  expect_true(all(abs(coefsR - coefs) < 0.1))
+
+  # Test prediction with string label
+  prediction <- predict(model, training)
   expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), "character")
-  expected <- c("virginica", "virginica", "virginica", "versicolor", "virginica",
-                "versicolor", "virginica", "versicolor", "virginica", "versicolor")
+  expected <- c("versicolor", "versicolor", "virginica", "versicolor", "versicolor",
+                "versicolor", "versicolor", "versicolor", "versicolor", "versicolor")
   expect_equal(as.list(take(select(prediction, "prediction"), 10))[[1]], expected)
 
-  # test multinomial logistic regression
-  label <- c(0.0, 1.0, 2.0, 0.0, 0.0)
-  feature1 <- c(4.845940, 5.64480, 7.430381, 6.464263, 5.555667)
-  feature2 <- c(2.941319, 2.614812, 2.162451, 3.339474, 2.970987)
-  feature3 <- c(1.322733, 1.348044, 3.861237, 9.686976, 3.447130)
-  feature4 <- c(1.3246388, 0.5510444, 0.9225810, 1.2147881, 1.6020842)
-  data <- as.data.frame(cbind(label, feature1, feature2, feature3, feature4))
+  # Test prediction with numeric label
+  label <- c(0.0, 0.0, 0.0, 1.0, 1.0)
+  feature <- c(1.1419053, 0.9194079, -0.9498666, -1.1069903, 0.2809776)
+  data <- as.data.frame(cbind(label, feature))
   df <- createDataFrame(data)
-
-  model <- spark.logit(df, label ~., family = "multinomial", thresholds = c(0, 1, 1))
-  predict1 <- collect(select(predict(model, df), "prediction"))
-  expect_equal(predict1$prediction, c("0.0", "0.0", "0.0", "0.0", "0.0"))
-  # Summary of multinomial logistic regression is not implemented yet
-  expect_error(summary(model))
+  model <- spark.logit(df, label ~ feature)
+  prediction <- collect(select(predict(model, df), "prediction"))
+  expect_equal(prediction$prediction, c("0.0", "0.0", "1.0", "1.0", "0.0"))
 })
 
 test_that("spark.gaussianMixture", {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/LogisticRegressionWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/LogisticRegressionWrapper.scala
index 9fe6202980fca..7f0f3cea2124a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/LogisticRegressionWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/LogisticRegressionWrapper.scala
@@ -23,8 +23,9 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.ml.{Pipeline, PipelineModel}
-import org.apache.spark.ml.classification.{BinaryLogisticRegressionSummary, LogisticRegression, LogisticRegressionModel}
+import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel}
 import org.apache.spark.ml.feature.{IndexToString, RFormula}
+import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.r.RWrapperUtils._
 import org.apache.spark.ml.util._
 import org.apache.spark.sql.{DataFrame, Dataset}
@@ -32,38 +33,48 @@ import org.apache.spark.sql.{DataFrame, Dataset}
 private[r] class LogisticRegressionWrapper private (
     val pipeline: PipelineModel,
     val features: Array[String],
-    val isLoaded: Boolean = false) extends MLWritable {
+    val labels: Array[String]) extends MLWritable {
 
   import LogisticRegressionWrapper._
 
-  private val logisticRegressionModel: LogisticRegressionModel =
+  private val lrModel: LogisticRegressionModel =
     pipeline.stages(1).asInstanceOf[LogisticRegressionModel]
 
-  lazy val totalIterations: Int = logisticRegressionModel.summary.totalIterations
-
-  lazy val objectiveHistory: Array[Double] = logisticRegressionModel.summary.objectiveHistory
-
-  lazy val blrSummary =
-    logisticRegressionModel.summary.asInstanceOf[BinaryLogisticRegressionSummary]
-
-  lazy val roc: DataFrame = blrSummary.roc
-
-  lazy val areaUnderROC: Double = blrSummary.areaUnderROC
-
-  lazy val pr: DataFrame = blrSummary.pr
-
-  lazy val fMeasureByThreshold: DataFrame = blrSummary.fMeasureByThreshold
-
-  lazy val precisionByThreshold: DataFrame = blrSummary.precisionByThreshold
+  val rFeatures: Array[String] = if (lrModel.getFitIntercept) {
+    Array("(Intercept)") ++ features
+  } else {
+    features
+  }
 
-  lazy val recallByThreshold: DataFrame = blrSummary.recallByThreshold
+  val rCoefficients: Array[Double] = {
+    val numRows = lrModel.coefficientMatrix.numRows
+    val numCols = lrModel.coefficientMatrix.numCols
+    val numColsWithIntercept = if (lrModel.getFitIntercept) numCols + 1 else numCols
+    val coefficients: Array[Double] = new Array[Double](numRows * numColsWithIntercept)
+    val coefficientVectors: Seq[Vector] = lrModel.coefficientMatrix.rowIter.toSeq
+    var i = 0
+    if (lrModel.getFitIntercept) {
+      while (i < numRows) {
+        coefficients(i * numColsWithIntercept) = lrModel.interceptVector(i)
+        System.arraycopy(coefficientVectors(i).toArray, 0,
+          coefficients, i * numColsWithIntercept + 1, numCols)
+        i += 1
+      }
+    } else {
+      while (i < numRows) {
+        System.arraycopy(coefficientVectors(i).toArray, 0,
+          coefficients, i * numColsWithIntercept, numCols)
+        i += 1
+      }
+    }
+    coefficients
+  }
 
   def transform(dataset: Dataset[_]): DataFrame = {
     pipeline.transform(dataset)
       .drop(PREDICTED_LABEL_INDEX_COL)
-      .drop(logisticRegressionModel.getFeaturesCol)
-      .drop(logisticRegressionModel.getLabelCol)
-
+      .drop(lrModel.getFeaturesCol)
+      .drop(lrModel.getLabelCol)
   }
 
   override def write: MLWriter = new LogisticRegressionWrapper.LogisticRegressionWrapperWriter(this)
@@ -86,8 +97,7 @@ private[r] object LogisticRegressionWrapper
       standardization: Boolean,
       thresholds: Array[Double],
       weightCol: String,
-      aggregationDepth: Int,
-      probability: String
+      probabilityCol: String
       ): LogisticRegressionWrapper = {
 
     val rFormula = new RFormula()
@@ -102,7 +112,7 @@ private[r] object LogisticRegressionWrapper
     val (features, labels) = getFeaturesAndLabels(rFormulaModel, data)
 
     // assemble and fit the pipeline
-    val logisticRegression = new LogisticRegression()
+    val lr = new LogisticRegression()
       .setRegParam(regParam)
       .setElasticNetParam(elasticNetParam)
       .setMaxIter(maxIter)
@@ -111,16 +121,15 @@ private[r] object LogisticRegressionWrapper
       .setFamily(family)
       .setStandardization(standardization)
       .setWeightCol(weightCol)
-      .setAggregationDepth(aggregationDepth)
       .setFeaturesCol(rFormula.getFeaturesCol)
       .setLabelCol(rFormula.getLabelCol)
-      .setProbabilityCol(probability)
+      .setProbabilityCol(probabilityCol)
       .setPredictionCol(PREDICTED_LABEL_INDEX_COL)
 
     if (thresholds.length > 1) {
-      logisticRegression.setThresholds(thresholds)
+      lr.setThresholds(thresholds)
     } else {
-      logisticRegression.setThreshold(thresholds(0))
+      lr.setThreshold(thresholds(0))
     }
 
     val idxToStr = new IndexToString()
@@ -129,10 +138,10 @@ private[r] object LogisticRegressionWrapper
       .setLabels(labels)
 
     val pipeline = new Pipeline()
-      .setStages(Array(rFormulaModel, logisticRegression, idxToStr))
+      .setStages(Array(rFormulaModel, lr, idxToStr))
       .fit(data)
 
-    new LogisticRegressionWrapper(pipeline, features)
+    new LogisticRegressionWrapper(pipeline, features, labels)
   }
 
   override def read: MLReader[LogisticRegressionWrapper] = new LogisticRegressionWrapperReader
@@ -146,7 +155,8 @@ private[r] object LogisticRegressionWrapper
       val pipelinePath = new Path(path, "pipeline").toString
 
       val rMetadata = ("class" -> instance.getClass.getName) ~
-        ("features" -> instance.features.toSeq)
+        ("features" -> instance.features.toSeq) ~
+        ("labels" -> instance.labels.toSeq)
       val rMetadataJson: String = compact(render(rMetadata))
       sc.parallelize(Seq(rMetadataJson), 1).saveAsTextFile(rMetadataPath)
 
@@ -164,9 +174,10 @@ private[r] object LogisticRegressionWrapper
       val rMetadataStr = sc.textFile(rMetadataPath, 1).first()
       val rMetadata = parse(rMetadataStr)
       val features = (rMetadata \ "features").extract[Array[String]]
+      val labels = (rMetadata \ "labels").extract[Array[String]]
 
       val pipeline = PipelineModel.load(pipelinePath)
-      new LogisticRegressionWrapper(pipeline, features, isLoaded = true)
+      new LogisticRegressionWrapper(pipeline, features, labels)
     }
   }
-}
\ No newline at end of file
+}

From 99c293eeaa9733fc424404d04a9671e9525a1e36 Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Wed, 7 Dec 2016 16:37:25 +0800
Subject: [PATCH 0298/1204] [SPARK-18701][ML] Fix Poisson GLM failure due to
 wrong initialization

Poisson GLM fails for many standard data sets (see example in test or JIRA). The issue is incorrect initialization leading to almost zero probability and weights. Specifically, the mean is initialized as the response, which could be zero. Applying the log link results in very negative numbers (protected against -Inf), which again leads to close to zero probability and weights in the weighted least squares. Fix and test are included in the commits.

## What changes were proposed in this pull request?
Update initialization in Poisson GLM

## How was this patch tested?
Add test in GeneralizedLinearRegressionSuite

srowen sethah yanboliang HyukjinKwon mengxr

Author: actuaryzhang <actuaryzhang10@gmail.com>

Closes #16131 from actuaryzhang/master.

(cherry picked from commit b8280271396eb74638da6546d76bbb2d06c7011b)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../GeneralizedLinearRegression.scala         |  6 +++++-
 .../GeneralizedLinearRegressionSuite.scala    | 21 +++++++++++--------
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index 770a2571bb9c2..f137c8cb41894 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -505,7 +505,11 @@ object GeneralizedLinearRegression extends DefaultParamsReadable[GeneralizedLine
     override def initialize(y: Double, weight: Double): Double = {
       require(y >= 0.0, "The response variable of Poisson family " +
         s"should be non-negative, but got $y")
-      y
+      /*
+        Force Poisson mean > 0 to avoid numerical instability in IRLS.
+        R uses y + 0.1 for initialization. See poisson()$initialize.
+       */
+      math.max(y, 0.1)
     }
 
     override def variance(mu: Double): Double = mu
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
index 4fab2160339c6..3e9e1fced8ec4 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
@@ -89,11 +89,14 @@ class GeneralizedLinearRegressionSuite
       xVariance = Array(0.7, 1.2), nPoints = 10000, seed, noiseLevel = 0.01,
       family = "poisson", link = "log").toDF()
 
-    datasetPoissonLogWithZero = generateGeneralizedLinearRegressionInput(
-      intercept = -1.5, coefficients = Array(0.22, 0.06), xMean = Array(2.9, 10.5),
-      xVariance = Array(0.7, 1.2), nPoints = 100, seed, noiseLevel = 0.01,
-      family = "poisson", link = "log")
-      .map{x => LabeledPoint(if (x.label < 0.7) 0.0 else x.label, x.features)}.toDF()
+    datasetPoissonLogWithZero = Seq(
+      LabeledPoint(0.0, Vectors.dense(18, 1.0)),
+      LabeledPoint(1.0, Vectors.dense(12, 0.0)),
+      LabeledPoint(0.0, Vectors.dense(15, 0.0)),
+      LabeledPoint(0.0, Vectors.dense(13, 2.0)),
+      LabeledPoint(0.0, Vectors.dense(15, 1.0)),
+      LabeledPoint(1.0, Vectors.dense(16, 1.0))
+    ).toDF()
 
     datasetPoissonIdentity = generateGeneralizedLinearRegressionInput(
       intercept = 2.5, coefficients = Array(2.2, 0.6), xMean = Array(2.9, 10.5),
@@ -480,12 +483,12 @@ class GeneralizedLinearRegressionSuite
          model <- glm(formula, family="poisson", data=data)
          print(as.vector(coef(model)))
        }
-       [1]  0.4272661 -0.1565423
-       [1] -3.6911354  0.6214301  0.1295814
+       [1] -0.0457441 -0.6833928
+       [1] 1.8121235  -0.1747493  -0.5815417
      */
     val expected = Seq(
-      Vectors.dense(0.0, 0.4272661, -0.1565423),
-      Vectors.dense(-3.6911354, 0.6214301, 0.1295814))
+      Vectors.dense(0.0, -0.0457441, -0.6833928),
+      Vectors.dense(1.8121235, -0.1747493, -0.5815417))
 
     import GeneralizedLinearRegression._
 

From 51754d6df703c02ecb23ec1779889602ff8fb038 Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Wed, 7 Dec 2016 17:34:45 +0800
Subject: [PATCH 0299/1204] [SPARK-18678][ML] Skewed reservoir sampling in
 SamplingUtils

## What changes were proposed in this pull request?

Fix reservoir sampling bias for small k. An off-by-one error meant that the probability of replacement was slightly too high -- k/(l-1) after l element instead of k/l, which matters for small k.

## How was this patch tested?

Existing test plus new test case.

Author: Sean Owen <sowen@cloudera.com>

Closes #16129 from srowen/SPARK-18678.

(cherry picked from commit 79f5f281bb69cb2de9f64006180abd753e8ae427)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 R/pkg/inst/tests/testthat/test_mllib.R              |  9 +++++----
 .../apache/spark/util/random/SamplingUtils.scala    |  5 ++++-
 .../spark/util/random/SamplingUtilsSuite.scala      | 13 +++++++++++++
 3 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R
index d7aa965422652..9f810befcd40f 100644
--- a/R/pkg/inst/tests/testthat/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -1007,10 +1007,11 @@ test_that("spark.randomForest", {
   model <- spark.randomForest(data, Employed ~ ., "regression", maxDepth = 5, maxBins = 16,
                               numTrees = 20, seed = 123)
   predictions <- collect(predict(model, data))
-  expect_equal(predictions$prediction, c(60.379, 61.096, 60.636, 62.258,
-                                         63.736, 64.296, 64.868, 64.300,
-                                         66.709, 67.697, 67.966, 67.252,
-                                         68.866, 69.593, 69.195, 69.658),
+  expect_equal(predictions$prediction, c(60.32820, 61.22315, 60.69025, 62.11070,
+                                         63.53160, 64.05470, 65.12710, 64.30450,
+                                         66.70910, 67.86125, 68.08700, 67.21865,
+                                         68.89275, 69.53180, 69.39640, 69.68250),
+
                tolerance = 1e-4)
   stats <- summary(model)
   expect_equal(stats$numTrees, 20)
diff --git a/core/src/main/scala/org/apache/spark/util/random/SamplingUtils.scala b/core/src/main/scala/org/apache/spark/util/random/SamplingUtils.scala
index 297524c943e1f..a7e0075debedb 100644
--- a/core/src/main/scala/org/apache/spark/util/random/SamplingUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/random/SamplingUtils.scala
@@ -56,11 +56,14 @@ private[spark] object SamplingUtils {
       val rand = new XORShiftRandom(seed)
       while (input.hasNext) {
         val item = input.next()
+        l += 1
+        // There are k elements in the reservoir, and the l-th element has been
+        // consumed. It should be chosen with probability k/l. The expression
+        // below is a random long chosen uniformly from [0,l)
         val replacementIndex = (rand.nextDouble() * l).toLong
         if (replacementIndex < k) {
           reservoir(replacementIndex.toInt) = item
         }
-        l += 1
       }
       (reservoir, l)
     }
diff --git a/core/src/test/scala/org/apache/spark/util/random/SamplingUtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/random/SamplingUtilsSuite.scala
index 667a4db6f7bb6..55c5dd5e2460d 100644
--- a/core/src/test/scala/org/apache/spark/util/random/SamplingUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/random/SamplingUtilsSuite.scala
@@ -44,6 +44,19 @@ class SamplingUtilsSuite extends SparkFunSuite {
     assert(sample3.length === 10)
   }
 
+  test("SPARK-18678 reservoirSampleAndCount with tiny input") {
+    val input = Seq(0, 1)
+    val counts = new Array[Int](input.size)
+    for (i <- 0 until 500) {
+      val (samples, inputSize) = SamplingUtils.reservoirSampleAndCount(input.iterator, 1)
+      assert(inputSize === 2)
+      assert(samples.length === 1)
+      counts(samples.head) += 1
+    }
+    // If correct, should be true with prob ~ 0.99999707
+    assert(math.abs(counts(0) - counts(1)) <= 100)
+  }
+
   test("computeFraction") {
     // test that the computed fraction guarantees enough data points
     // in the sample with a failure rate <= 0.0001

From 4432a2a8386f951775957f352e4ba223c6ce4fa3 Mon Sep 17 00:00:00 2001
From: Jie Xiong <jiexiong@fb.com>
Date: Wed, 7 Dec 2016 04:33:30 -0800
Subject: [PATCH 0300/1204] [SPARK-18208][SHUFFLE] Executor OOM due to a
 growing LongArray in BytesToBytesMap

## What changes were proposed in this pull request?

BytesToBytesMap currently does not release the in-memory storage (the longArray variable) after it spills to disk. This is typically not a problem during aggregation because the longArray should be much smaller than the pages, and because we grow the longArray at a conservative rate.

However this can lead to an OOM when an already running task is allocated more than its fair share, this can happen because of a scheduling delay. In this case the longArray can grow beyond the fair share of memory for the task. This becomes problematic when the task spills and the long array is not freed, that causes subsequent memory allocation requests to be denied by the memory manager resulting in an OOM.

This PR fixes this issuing by freeing the longArray when the BytesToBytesMap spills.

## How was this patch tested?

Existing tests and tested on realworld workloads.

Author: Jie Xiong <jiexiong@fb.com>
Author: jiexiong <jiexiong@gmail.com>

Closes #15722 from jiexiong/jie_oom_fix.

(cherry picked from commit c496d03b5289f7c604661a12af86f6accddcf125)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../java/org/apache/spark/unsafe/map/BytesToBytesMap.java  | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
index d2fcdea4f2cee..44120e591f2fb 100644
--- a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
+++ b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
@@ -170,6 +170,8 @@ public final class BytesToBytesMap extends MemoryConsumer {
 
   private long peakMemoryUsedBytes = 0L;
 
+  private final int initialCapacity;
+
   private final BlockManager blockManager;
   private final SerializerManager serializerManager;
   private volatile MapIterator destructiveIterator = null;
@@ -202,6 +204,7 @@ public BytesToBytesMap(
       throw new IllegalArgumentException("Page size " + pageSizeBytes + " cannot exceed " +
         TaskMemoryManager.MAXIMUM_PAGE_SIZE_BYTES);
     }
+    this.initialCapacity = initialCapacity;
     allocate(initialCapacity);
   }
 
@@ -902,12 +905,12 @@ public LongArray getArray() {
   public void reset() {
     numKeys = 0;
     numValues = 0;
-    longArray.zeroOut();
-
+    freeArray(longArray);
     while (dataPages.size() > 0) {
       MemoryBlock dataPage = dataPages.removeLast();
       freePage(dataPage);
     }
+    allocate(initialCapacity);
     currentPage = null;
     pageCursor = 0;
   }

From 5dbcd4fcfbc14ba8c17e1cb364ca45b99aa90708 Mon Sep 17 00:00:00 2001
From: Andrew Ray <ray.andrew@gmail.com>
Date: Wed, 7 Dec 2016 04:44:14 -0800
Subject: [PATCH 0301/1204] [SPARK-17760][SQL] AnalysisException with dataframe
 pivot when groupBy column is not attribute

## What changes were proposed in this pull request?

Fixes AnalysisException for pivot queries that have group by columns that are expressions and not attributes by substituting the expressions output attribute in the second aggregation and final projection.

## How was this patch tested?

existing and additional unit tests

Author: Andrew Ray <ray.andrew@gmail.com>

Closes #16177 from aray/SPARK-17760.

(cherry picked from commit f1fca81b165c5a673f7d86b268e04ea42a6c267e)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 5 +++--
 .../scala/org/apache/spark/sql/DataFramePivotSuite.scala  | 8 ++++++++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index f738ae822178a..9ca990144fc23 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -404,14 +404,15 @@ class Analyzer(
               .toAggregateExpression()
             , "__pivot_" + a.sql)()
           }
-          val secondAgg = Aggregate(groupByExprs, groupByExprs ++ pivotAggs, firstAgg)
+          val groupByExprsAttr = groupByExprs.map(_.toAttribute)
+          val secondAgg = Aggregate(groupByExprsAttr, groupByExprsAttr ++ pivotAggs, firstAgg)
           val pivotAggAttribute = pivotAggs.map(_.toAttribute)
           val pivotOutputs = pivotValues.zipWithIndex.flatMap { case (value, i) =>
             aggregates.zip(pivotAggAttribute).map { case (aggregate, pivotAtt) =>
               Alias(ExtractValue(pivotAtt, Literal(i), resolver), outputName(value, aggregate))()
             }
           }
-          Project(groupByExprs ++ pivotOutputs, secondAgg)
+          Project(groupByExprsAttr ++ pivotOutputs, secondAgg)
         } else {
           val pivotAggregates: Seq[NamedExpression] = pivotValues.flatMap { value =>
             def ifExpr(expr: Expression) = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
index 1bbe1354d55f4..a8d854ccbc943 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
@@ -208,4 +208,12 @@ class DataFramePivotSuite extends QueryTest with SharedSQLContext{
     )
   }
 
+  test("pivot with column definition in groupby") {
+    checkAnswer(
+      courseSales.groupBy(substring(col("course"), 0, 1).as("foo"))
+        .pivot("year", Seq(2012, 2013))
+        .sum("earnings"),
+      Row("d", 15000.0, 48000.0) :: Row("J", 20000.0, 30000.0) :: Nil
+    )
+  }
 }

From acb6ac5da7a5694cc3270772c6d68933b7d761dc Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 7 Dec 2016 10:30:05 -0800
Subject: [PATCH 0302/1204] [SPARK-18764][CORE] Add a warning log when skipping
 a corrupted file

## What changes were proposed in this pull request?

It's better to add a warning log when skipping a corrupted file. It will be helpful when we want to finish the job first, then find them in the log and fix these files.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16192 from zsxwing/SPARK-18764.

(cherry picked from commit dbf3e298a1a35c0243f087814ddf88034ff96d66)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala    | 4 +++-
 core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala | 6 +++++-
 .../spark/sql/execution/datasources/FileScanRDD.scala       | 1 +
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index ae4320d4583d6..3133a28755884 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -257,7 +257,9 @@ class HadoopRDD[K, V](
         try {
           finished = !reader.next(key, value)
         } catch {
-          case e: IOException if ignoreCorruptFiles => finished = true
+          case e: IOException if ignoreCorruptFiles =>
+            logWarning(s"Skipped the rest content in the corrupted file: ${split.inputSplit}", e)
+            finished = true
         }
         if (!finished) {
           inputMetrics.incRecordsRead(1)
diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
index c783e1375283a..c6ddb4b090928 100644
--- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
@@ -186,7 +186,11 @@ class NewHadoopRDD[K, V](
           try {
             finished = !reader.nextKeyValue
           } catch {
-            case e: IOException if ignoreCorruptFiles => finished = true
+            case e: IOException if ignoreCorruptFiles =>
+              logWarning(
+                s"Skipped the rest content in the corrupted file: ${split.serializableHadoopSplit}",
+                e)
+              finished = true
           }
           if (finished) {
             // Close and release the reader here; close() will also be called when the task
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
index 89944570df662..237cdabb5f795 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
@@ -138,6 +138,7 @@ class FileScanRDD(
                     }
                   } catch {
                     case e: IOException =>
+                      logWarning(s"Skipped the rest content in the corrupted file: $currentFile", e)
                       finished = true
                       null
                   }

From 76e1f1651f5a7207c9c66686616709b62b798fa3 Mon Sep 17 00:00:00 2001
From: sarutak <sarutak@oss.nttdata.co.jp>
Date: Wed, 7 Dec 2016 11:41:23 -0800
Subject: [PATCH 0303/1204] [SPARK-18762][WEBUI] Web UI should be http:4040
 instead of https:4040

## What changes were proposed in this pull request?

When SSL is enabled, the Spark shell shows:
```
Spark context Web UI available at https://192.168.99.1:4040
```
This is wrong because 4040 is http, not https. It redirects to the https port.
More importantly, this introduces several broken links in the UI. For example, in the master UI, the worker link is https:8081 instead of http:8081 or https:8481.

CC: mengxr liancheng

I manually tested accessing by accessing MasterPage, WorkerPage and HistoryServer with SSL enabled.

Author: sarutak <sarutak@oss.nttdata.co.jp>

Closes #16190 from sarutak/SPARK-18761.

(cherry picked from commit bb94f61a7ac97bf904ec0e8d5a4ab69a4142443f)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../main/scala/org/apache/spark/deploy/worker/Worker.scala   | 3 +--
 core/src/main/scala/org/apache/spark/ui/WebUI.scala          | 5 +----
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 8b1c6bf2e5fd5..0940f3c55844c 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -187,8 +187,7 @@ private[deploy] class Worker(
     webUi = new WorkerWebUI(this, workDir, webUiPort)
     webUi.bind()
 
-    val scheme = if (webUi.sslOptions.enabled) "https" else "http"
-    workerWebUiUrl = s"$scheme://$publicAddress:${webUi.boundPort}"
+    workerWebUiUrl = s"http://$publicAddress:${webUi.boundPort}"
     registerWithMaster()
 
     metricsSystem.registerSource(workerSource)
diff --git a/core/src/main/scala/org/apache/spark/ui/WebUI.scala b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
index a05e0efb7a3e3..4118fcf46b428 100644
--- a/core/src/main/scala/org/apache/spark/ui/WebUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
@@ -147,10 +147,7 @@ private[spark] abstract class WebUI(
   }
 
   /** Return the url of web interface. Only valid after bind(). */
-  def webUrl: String = {
-    val protocol = if (sslOptions.enabled) "https" else "http"
-    s"$protocol://$publicHostName:$boundPort"
-  }
+  def webUrl: String = s"http://$publicHostName:$boundPort"
 
   /** Return the actual port to which this server is bound. Only valid after bind(). */
   def boundPort: Int = serverInfo.map(_.boundPort).getOrElse(-1)

From e9b3afac9ce5ea4bffb8201a58856598c521a3a9 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 7 Dec 2016 13:47:44 -0800
Subject: [PATCH 0304/1204] [SPARK-18588][TESTS] Fix flaky test:
 KafkaSourceStressForDontFailOnDataLossSuite

## What changes were proposed in this pull request?

Fixed the following failures:

```
org.scalatest.exceptions.TestFailedDueToTimeoutException: The code passed to eventually never returned normally. Attempted 3745 times over 1.0000790851666665 minutes. Last failure message: assertion failed: failOnDataLoss-0 not deleted after timeout.
```

```
sbt.ForkMain$ForkError: org.apache.spark.sql.streaming.StreamingQueryException: Query query-66 terminated with exception: null
	at org.apache.spark.sql.execution.streaming.StreamExecution.org$apache$spark$sql$execution$streaming$StreamExecution$$runBatches(StreamExecution.scala:252)
	at org.apache.spark.sql.execution.streaming.StreamExecution$$anon$1.run(StreamExecution.scala:146)
Caused by: sbt.ForkMain$ForkError: java.lang.NullPointerException: null
	at java.util.ArrayList.addAll(ArrayList.java:577)
	at org.apache.kafka.clients.Metadata.getClusterForCurrentTopics(Metadata.java:257)
	at org.apache.kafka.clients.Metadata.update(Metadata.java:177)
	at org.apache.kafka.clients.NetworkClient$DefaultMetadataUpdater.handleResponse(NetworkClient.java:605)
	at org.apache.kafka.clients.NetworkClient$DefaultMetadataUpdater.maybeHandleCompletedReceive(NetworkClient.java:582)
	at org.apache.kafka.clients.NetworkClient.handleCompletedReceives(NetworkClient.java:450)
	at org.apache.kafka.clients.NetworkClient.poll(NetworkClient.java:269)
	at org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient.clientPoll(ConsumerNetworkClient.java:360)
	at org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient.poll(ConsumerNetworkClient.java:224)
	at org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient.poll(ConsumerNetworkClient.java:192)
	at org.apache.kafka.clients.consumer.internals.ConsumerNetworkClient.awaitPendingRequests(ConsumerNetworkClient.java:260)
	at org.apache.kafka.clients.consumer.internals.AbstractCoordinator.ensureActiveGroup(AbstractCoordinator.java:222)
	at org.apache.kafka.clients.consumer.internals.ConsumerCoordinator.ensurePartitionAssignment(ConsumerCoordinator.java:366)
	at org.apache.kafka.clients.consumer.KafkaConsumer.pollOnce(KafkaConsumer.java:978)
	at org.apache.kafka.clients.consumer.KafkaConsumer.poll(KafkaConsumer.java:938)
	at
...
```

## How was this patch tested?

Tested in #16048 by running many times.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16109 from zsxwing/fix-kafka-flaky-test.

(cherry picked from commit edc87e18922b98be47c298cdc3daa2b049a737e9)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../sql/kafka010/CachedKafkaConsumer.scala    | 39 ++++++++--
 .../spark/sql/kafka010/KafkaSource.scala      |  2 +-
 .../spark/sql/kafka010/KafkaSourceSuite.scala | 11 ++-
 .../spark/sql/kafka010/KafkaTestUtils.scala   | 75 ++++++++++++-------
 .../spark/sql/test/SharedSQLContext.scala     |  8 +-
 5 files changed, 96 insertions(+), 39 deletions(-)

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaConsumer.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaConsumer.scala
index 3f438e99185b5..3f396a7e6b698 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaConsumer.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaConsumer.scala
@@ -86,7 +86,7 @@ private[kafka010] case class CachedKafkaConsumer private(
     var toFetchOffset = offset
     while (toFetchOffset != UNKNOWN_OFFSET) {
       try {
-        return fetchData(toFetchOffset, pollTimeoutMs)
+        return fetchData(toFetchOffset, untilOffset, pollTimeoutMs, failOnDataLoss)
       } catch {
         case e: OffsetOutOfRangeException =>
           // When there is some error thrown, it's better to use a new consumer to drop all cached
@@ -159,14 +159,18 @@ private[kafka010] case class CachedKafkaConsumer private(
   }
 
   /**
-   * Get the record at `offset`.
+   * Get the record for the given offset if available. Otherwise it will either throw error
+   * (if failOnDataLoss = true), or return the next available offset within [offset, untilOffset),
+   * or null.
    *
    * @throws OffsetOutOfRangeException if `offset` is out of range
    * @throws TimeoutException if cannot fetch the record in `pollTimeoutMs` milliseconds.
    */
   private def fetchData(
       offset: Long,
-      pollTimeoutMs: Long): ConsumerRecord[Array[Byte], Array[Byte]] = {
+      untilOffset: Long,
+      pollTimeoutMs: Long,
+      failOnDataLoss: Boolean): ConsumerRecord[Array[Byte], Array[Byte]] = {
     if (offset != nextOffsetInFetchedData || !fetchedData.hasNext()) {
       // This is the first fetch, or the last pre-fetched data has been drained.
       // Seek to the offset because we may call seekToBeginning or seekToEnd before this.
@@ -190,10 +194,31 @@ private[kafka010] case class CachedKafkaConsumer private(
     } else {
       val record = fetchedData.next()
       nextOffsetInFetchedData = record.offset + 1
-      // `seek` is always called before "poll". So "record.offset" must be same as "offset".
-      assert(record.offset == offset,
-        s"The fetched data has a different offset: expected $offset but was ${record.offset}")
-      record
+      // In general, Kafka uses the specified offset as the start point, and tries to fetch the next
+      // available offset. Hence we need to handle offset mismatch.
+      if (record.offset > offset) {
+        // This may happen when some records aged out but their offsets already got verified
+        if (failOnDataLoss) {
+          reportDataLoss(true, s"Cannot fetch records in [$offset, ${record.offset})")
+          // Never happen as "reportDataLoss" will throw an exception
+          null
+        } else {
+          if (record.offset >= untilOffset) {
+            reportDataLoss(false, s"Skip missing records in [$offset, $untilOffset)")
+            null
+          } else {
+            reportDataLoss(false, s"Skip missing records in [$offset, ${record.offset})")
+            record
+          }
+        }
+      } else if (record.offset < offset) {
+        // This should not happen. If it does happen, then we probably misunderstand Kafka internal
+        // mechanism.
+        throw new IllegalStateException(
+          s"Tried to fetch $offset but the returned record offset was ${record.offset}")
+      } else {
+        record
+      }
     }
   }
 
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
index d9ab4bb4f873d..92ee0ed93d940 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
@@ -102,7 +102,7 @@ private[kafka010] case class KafkaSource(
     sourceOptions.getOrElse("fetchOffset.numRetries", "3").toInt
 
   private val offsetFetchAttemptIntervalMs =
-    sourceOptions.getOrElse("fetchOffset.retryIntervalMs", "10").toLong
+    sourceOptions.getOrElse("fetchOffset.retryIntervalMs", "1000").toLong
 
   private val maxOffsetsPerTrigger =
     sourceOptions.get("maxOffsetsPerTrigger").map(_.toLong)
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
index 2d6ccb22ddb06..0e40abac65251 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
@@ -31,11 +31,12 @@ import org.scalatest.concurrent.Eventually._
 import org.scalatest.concurrent.PatienceConfiguration.Timeout
 import org.scalatest.time.SpanSugar._
 
+import org.apache.spark.SparkContext
 import org.apache.spark.sql.ForeachWriter
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.functions.{count, window}
 import org.apache.spark.sql.streaming.{ProcessingTime, StreamTest}
-import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.sql.test.{SharedSQLContext, TestSparkSession}
 
 abstract class KafkaSourceTest extends StreamTest with SharedSQLContext {
 
@@ -811,6 +812,11 @@ class KafkaSourceStressForDontFailOnDataLossSuite extends StreamTest with Shared
 
   private def newTopic(): String = s"failOnDataLoss-${topicId.getAndIncrement()}"
 
+  override def createSparkSession(): TestSparkSession = {
+    // Set maxRetries to 3 to handle NPE from `poll` when deleting a topic
+    new TestSparkSession(new SparkContext("local[2,3]", "test-sql-context", sparkConf))
+  }
+
   override def beforeAll(): Unit = {
     super.beforeAll()
     testUtils = new KafkaTestUtils {
@@ -839,7 +845,7 @@ class KafkaSourceStressForDontFailOnDataLossSuite extends StreamTest with Shared
     }
   }
 
-  ignore("stress test for failOnDataLoss=false") {
+  test("stress test for failOnDataLoss=false") {
     val reader = spark
       .readStream
       .format("kafka")
@@ -848,6 +854,7 @@ class KafkaSourceStressForDontFailOnDataLossSuite extends StreamTest with Shared
       .option("subscribePattern", "failOnDataLoss.*")
       .option("startingOffsets", "earliest")
       .option("failOnDataLoss", "false")
+      .option("fetchOffset.retryIntervalMs", "3000")
     val kafka = reader.load()
       .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
       .as[(String, String)]
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
index f43917e151c57..fd1689acf6727 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
@@ -184,7 +184,7 @@ class KafkaTestUtils extends Logging {
   def deleteTopic(topic: String): Unit = {
     val partitions = zkUtils.getPartitionsForTopics(Seq(topic))(topic).size
     AdminUtils.deleteTopic(zkUtils, topic)
-    verifyTopicDeletion(zkUtils, topic, partitions, List(this.server))
+    verifyTopicDeletionWithRetries(zkUtils, topic, partitions, List(this.server))
   }
 
   /** Add new paritions to a Kafka topic */
@@ -286,36 +286,57 @@ class KafkaTestUtils extends Logging {
     props
   }
 
+  /** Verify topic is deleted in all places, e.g, brokers, zookeeper. */
   private def verifyTopicDeletion(
+      topic: String,
+      numPartitions: Int,
+      servers: Seq[KafkaServer]): Unit = {
+    val topicAndPartitions = (0 until numPartitions).map(TopicAndPartition(topic, _))
+
+    import ZkUtils._
+    // wait until admin path for delete topic is deleted, signaling completion of topic deletion
+    assert(
+      !zkUtils.pathExists(getDeleteTopicPath(topic)),
+      s"${getDeleteTopicPath(topic)} still exists")
+    assert(!zkUtils.pathExists(getTopicPath(topic)), s"${getTopicPath(topic)} still exists")
+    // ensure that the topic-partition has been deleted from all brokers' replica managers
+    assert(servers.forall(server => topicAndPartitions.forall(tp =>
+      server.replicaManager.getPartition(tp.topic, tp.partition) == None)),
+      s"topic $topic still exists in the replica manager")
+    // ensure that logs from all replicas are deleted if delete topic is marked successful
+    assert(servers.forall(server => topicAndPartitions.forall(tp =>
+      server.getLogManager().getLog(tp).isEmpty)),
+      s"topic $topic still exists in log mananger")
+    // ensure that topic is removed from all cleaner offsets
+    assert(servers.forall(server => topicAndPartitions.forall { tp =>
+      val checkpoints = server.getLogManager().logDirs.map { logDir =>
+        new OffsetCheckpoint(new File(logDir, "cleaner-offset-checkpoint")).read()
+      }
+      checkpoints.forall(checkpointsPerLogDir => !checkpointsPerLogDir.contains(tp))
+    }), s"checkpoint for topic $topic still exists")
+    // ensure the topic is gone
+    assert(
+      !zkUtils.getAllTopics().contains(topic),
+      s"topic $topic still exists on zookeeper")
+  }
+
+  /** Verify topic is deleted. Retry to delete the topic if not. */
+  private def verifyTopicDeletionWithRetries(
       zkUtils: ZkUtils,
       topic: String,
       numPartitions: Int,
       servers: Seq[KafkaServer]) {
-    import ZkUtils._
-    val topicAndPartitions = (0 until numPartitions).map(TopicAndPartition(topic, _))
-    def isDeleted(): Boolean = {
-      // wait until admin path for delete topic is deleted, signaling completion of topic deletion
-      val deletePath = !zkUtils.pathExists(getDeleteTopicPath(topic))
-      val topicPath = !zkUtils.pathExists(getTopicPath(topic))
-      // ensure that the topic-partition has been deleted from all brokers' replica managers
-      val replicaManager = servers.forall(server => topicAndPartitions.forall(tp =>
-        server.replicaManager.getPartition(tp.topic, tp.partition) == None))
-      // ensure that logs from all replicas are deleted if delete topic is marked successful
-      val logManager = servers.forall(server => topicAndPartitions.forall(tp =>
-        server.getLogManager().getLog(tp).isEmpty))
-      // ensure that topic is removed from all cleaner offsets
-      val cleaner = servers.forall(server => topicAndPartitions.forall { tp =>
-        val checkpoints = server.getLogManager().logDirs.map { logDir =>
-          new OffsetCheckpoint(new File(logDir, "cleaner-offset-checkpoint")).read()
-        }
-        checkpoints.forall(checkpointsPerLogDir => !checkpointsPerLogDir.contains(tp))
-      })
-      // ensure the topic is gone
-      val deleted = !zkUtils.getAllTopics().contains(topic)
-      deletePath && topicPath && replicaManager && logManager && cleaner && deleted
-    }
-    eventually(timeout(60.seconds)) {
-      assert(isDeleted, s"$topic not deleted after timeout")
+    eventually(timeout(60.seconds), interval(200.millis)) {
+      try {
+        verifyTopicDeletion(topic, numPartitions, servers)
+      } catch {
+        case e: Throwable =>
+          // As pushing messages into Kafka updates Zookeeper asynchronously, there is a small
+          // chance that a topic will be recreated after deletion due to the asynchronous update.
+          // Hence, delete the topic and retry.
+          AdminUtils.deleteTopic(zkUtils, topic)
+          throw e
+      }
     }
   }
 
@@ -331,7 +352,7 @@ class KafkaTestUtils extends Logging {
       case _ =>
         false
     }
-    eventually(timeout(10.seconds)) {
+    eventually(timeout(60.seconds)) {
       assert(isPropagated, s"Partition [$topic, $partition] metadata not propagated after timeout")
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala
index db24ee8b46dd5..2239f10870eda 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala
@@ -48,14 +48,18 @@ trait SharedSQLContext extends SQLTestUtils with BeforeAndAfterEach {
    */
   protected implicit def sqlContext: SQLContext = _spark.sqlContext
 
+  protected def createSparkSession: TestSparkSession = {
+    new TestSparkSession(
+      sparkConf.set("spark.hadoop.fs.file.impl", classOf[DebugFilesystem].getName))
+  }
+
   /**
    * Initialize the [[TestSparkSession]].
    */
   protected override def beforeAll(): Unit = {
     SparkSession.sqlListener.set(null)
     if (_spark == null) {
-      _spark = new TestSparkSession(
-        sparkConf.set("spark.hadoop.fs.file.impl", classOf[DebugFilesystem].getName))
+      _spark = createSparkSession
     }
     // Ensure we have initialized the context before calling parent code
     super.beforeAll()

From 1c6419718aadf0bdc200f9b328242062a07f2277 Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Wed, 7 Dec 2016 15:36:29 -0800
Subject: [PATCH 0305/1204] [SPARK-18754][SS] Rename recentProgresses to
 recentProgress

Based on an informal survey, users find this option easier to understand / remember.

Author: Michael Armbrust <michael@databricks.com>

Closes #16182 from marmbrus/renameRecentProgress.

(cherry picked from commit 70b2bf717d367d598c5a238d569d62c777e63fde)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../spark/sql/kafka010/KafkaSourceSuite.scala  |  2 +-
 project/MimaExcludes.scala                     |  2 +-
 python/pyspark/sql/streaming.py                |  6 +++---
 python/pyspark/sql/tests.py                    |  4 ++--
 .../execution/streaming/ProgressReporter.scala |  2 +-
 .../apache/spark/sql/internal/SQLConf.scala    |  2 +-
 .../spark/sql/streaming/StreamingQuery.scala   |  4 ++--
 .../execution/streaming/ForeachSinkSuite.scala |  4 ++--
 .../sql/streaming/FileStreamSourceSuite.scala  |  2 +-
 .../StreamingQueryListenerSuite.scala          |  4 ++--
 .../sql/streaming/StreamingQuerySuite.scala    | 18 +++++++++---------
 11 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
index 0e40abac65251..544fbc5ec36a2 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
@@ -448,7 +448,7 @@ class KafkaSourceSuite extends KafkaSourceTest {
       AddKafkaData(Set(topic), 1, 2, 3),
       CheckAnswer(2, 3, 4),
       AssertOnQuery { query =>
-        val recordsRead = query.recentProgresses.map(_.numInputRows).sum
+        val recordsRead = query.recentProgress.map(_.numInputRows).sum
         recordsRead == 3
       }
     )
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 6650aad0be594..978a328f3e2d0 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -85,7 +85,7 @@ object MimaExcludes {
       ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQuery.sourceStatuses"),
       ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.streaming.StreamingQuery.id"),
       ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQuery.lastProgress"),
-      ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQuery.recentProgresses"),
+      ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQuery.recentProgress"),
       ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQuery.id"),
       ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.streaming.StreamingQueryManager.get"),
 
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index ee7a26d00df4b..9cfb3fe25cdcc 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -114,12 +114,12 @@ def status(self):
 
     @property
     @since(2.1)
-    def recentProgresses(self):
+    def recentProgress(self):
         """Returns an array of the most recent [[StreamingQueryProgress]] updates for this query.
         The number of progress updates retained for each stream is configured by Spark session
-        configuration `spark.sql.streaming.numRecentProgresses`.
+        configuration `spark.sql.streaming.numRecentProgressUpdates`.
         """
-        return [json.loads(p.json()) for p in self._jsq.recentProgresses()]
+        return [json.loads(p.json()) for p in self._jsq.recentProgress()]
 
     @property
     @since(2.1)
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 66a3490a640ba..50df68b14483d 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -1116,11 +1116,11 @@ def test_stream_status_and_progress(self):
         try:
             q.processAllAvailable()
             lastProgress = q.lastProgress
-            recentProgresses = q.recentProgresses
+            recentProgress = q.recentProgress
             status = q.status
             self.assertEqual(lastProgress['name'], q.name)
             self.assertEqual(lastProgress['id'], q.id)
-            self.assertTrue(any(p == lastProgress for p in recentProgresses))
+            self.assertTrue(any(p == lastProgress for p in recentProgress))
             self.assertTrue(
                 "message" in status and
                 "isDataAvailable" in status and
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
index 12d0c1e9b49f0..40e3151337af6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -94,7 +94,7 @@ trait ProgressReporter extends Logging {
   def status: StreamingQueryStatus = currentStatus
 
   /** Returns an array containing the most recent query progress updates. */
-  def recentProgresses: Array[StreamingQueryProgress] = progressBuffer.synchronized {
+  def recentProgress: Array[StreamingQueryProgress] = progressBuffer.synchronized {
     progressBuffer.toArray
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 581f99e9c155c..0280a3b87a3a9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -591,7 +591,7 @@ object SQLConf {
       .createWithDefault(false)
 
   val STREAMING_PROGRESS_RETENTION =
-    SQLConfigBuilder("spark.sql.streaming.numRecentProgresses")
+    SQLConfigBuilder("spark.sql.streaming.numRecentProgressUpdates")
       .doc("The number of progress updates to retain for a streaming query")
       .intConf
       .createWithDefault(100)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
index 1794e75462cfd..596bd90140cc9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
@@ -87,11 +87,11 @@ trait StreamingQuery {
   /**
    * Returns an array of the most recent [[StreamingQueryProgress]] updates for this query.
    * The number of progress updates retained for each stream is configured by Spark session
-   * configuration `spark.sql.streaming.numRecentProgresses`.
+   * configuration `spark.sql.streaming.numRecentProgressUpdates`.
    *
    * @since 2.1.0
    */
-  def recentProgresses: Array[StreamingQueryProgress]
+  def recentProgress: Array[StreamingQueryProgress]
 
   /**
    * Returns the most recent [[StreamingQueryProgress]] update of this streaming query.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/ForeachSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/ForeachSinkSuite.scala
index 4a3eeb70b1702..9137d650e906b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/ForeachSinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/ForeachSinkSuite.scala
@@ -263,9 +263,9 @@ class ForeachSinkSuite extends StreamTest with SharedSQLContext with BeforeAndAf
     try {
       inputData.addData(10, 11, 12)
       query.processAllAvailable()
-      val recentProgress = query.recentProgresses.filter(_.numInputRows != 0).headOption
+      val recentProgress = query.recentProgress.filter(_.numInputRows != 0).headOption
       assert(recentProgress.isDefined && recentProgress.get.numInputRows === 3,
-        s"recentProgresses[${query.recentProgresses.toList}] doesn't contain correct metrics")
+        s"recentProgress[${query.recentProgress.toList}] doesn't contain correct metrics")
     } finally {
       query.stop()
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index ff1f3e26f1593..7b6fe83b9a597 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -1006,7 +1006,7 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
         AddTextFileData("100", src, tmp),
         CheckAnswer("100"),
         AssertOnQuery { query =>
-          val actualProgress = query.recentProgresses
+          val actualProgress = query.recentProgress
               .find(_.numInputRows > 0)
               .getOrElse(sys.error("Could not find records with data."))
           assert(actualProgress.numInputRows === 1)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
index 1cd503c6de696..b78d1353e8dcb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
@@ -237,9 +237,9 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
           }
           true
         }
-        // `recentProgresses` should not receive too many no data events
+        // `recentProgress` should not receive too many no data events
         actions += AssertOnQuery { q =>
-          q.recentProgresses.size > 1 && q.recentProgresses.size <= 11
+          q.recentProgress.size > 1 && q.recentProgress.size <= 11
         }
         testStream(input.toDS)(actions: _*)
         spark.sparkContext.listenerBus.waitUntilEmpty(10000)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index 55dd1a5d51e37..7be2f216919b0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -152,7 +152,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
     )
   }
 
-  testQuietly("status, lastProgress, and recentProgresses") {
+  testQuietly("status, lastProgress, and recentProgress") {
     import StreamingQuerySuite._
     clock = new StreamManualClock
 
@@ -201,7 +201,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
       AssertOnQuery(_.status.isDataAvailable === false),
       AssertOnQuery(_.status.isTriggerActive === false),
       AssertOnQuery(_.status.message === "Waiting for next trigger"),
-      AssertOnQuery(_.recentProgresses.count(_.numInputRows > 0) === 0),
+      AssertOnQuery(_.recentProgress.count(_.numInputRows > 0) === 0),
 
       // Test status and progress while offset is being fetched
       AddData(inputData, 1, 2),
@@ -210,7 +210,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
       AssertOnQuery(_.status.isDataAvailable === false),
       AssertOnQuery(_.status.isTriggerActive === true),
       AssertOnQuery(_.status.message.startsWith("Getting offsets from")),
-      AssertOnQuery(_.recentProgresses.count(_.numInputRows > 0) === 0),
+      AssertOnQuery(_.recentProgress.count(_.numInputRows > 0) === 0),
 
       // Test status and progress while batch is being fetched
       AdvanceManualClock(200), // time = 300 to unblock getOffset, will block on getBatch
@@ -218,14 +218,14 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
       AssertOnQuery(_.status.isDataAvailable === true),
       AssertOnQuery(_.status.isTriggerActive === true),
       AssertOnQuery(_.status.message === "Processing new data"),
-      AssertOnQuery(_.recentProgresses.count(_.numInputRows > 0) === 0),
+      AssertOnQuery(_.recentProgress.count(_.numInputRows > 0) === 0),
 
       // Test status and progress while batch is being processed
       AdvanceManualClock(300), // time = 600 to unblock getBatch, will block in Spark job
       AssertOnQuery(_.status.isDataAvailable === true),
       AssertOnQuery(_.status.isTriggerActive === true),
       AssertOnQuery(_.status.message === "Processing new data"),
-      AssertOnQuery(_.recentProgresses.count(_.numInputRows > 0) === 0),
+      AssertOnQuery(_.recentProgress.count(_.numInputRows > 0) === 0),
 
       // Test status and progress while batch processing has completed
       AdvanceManualClock(500), // time = 1100 to unblock job
@@ -236,8 +236,8 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
       AssertOnQuery(_.status.message === "Waiting for next trigger"),
       AssertOnQuery { query =>
         assert(query.lastProgress != null)
-        assert(query.recentProgresses.exists(_.numInputRows > 0))
-        assert(query.recentProgresses.last.eq(query.lastProgress))
+        assert(query.recentProgress.exists(_.numInputRows > 0))
+        assert(query.recentProgress.last.eq(query.lastProgress))
 
         val progress = query.lastProgress
         assert(progress.id === query.id)
@@ -274,7 +274,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
       AssertOnQuery(_.status.isTriggerActive === false),
       AssertOnQuery(_.status.message === "Waiting for next trigger"),
       AssertOnQuery { query =>
-        assert(query.recentProgresses.last.eq(query.lastProgress))
+        assert(query.recentProgress.last.eq(query.lastProgress))
         assert(query.lastProgress.batchId === 1)
         assert(query.lastProgress.sources(0).inputRowsPerSecond === 1.818)
         true
@@ -408,7 +408,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
     try {
       val q = streamingDF.writeStream.format("memory").queryName("test").start()
       q.processAllAvailable()
-      q.recentProgresses.head
+      q.recentProgress.head
     } finally {
       spark.streams.active.map(_.stop())
     }

From 839c2eb9723ba51baf6022fea8c29caecf7c0612 Mon Sep 17 00:00:00 2001
From: "wm624@hotmail.com" <wm624@hotmail.com>
Date: Wed, 7 Dec 2016 18:12:49 -0800
Subject: [PATCH 0306/1204] [SPARK-18633][ML][EXAMPLE] Add multiclass logistic
 regression summary python example and document

## What changes were proposed in this pull request?
Logistic Regression summary is added in Python API. We need to add example and document for summary.

The newly added example is consistent with Scala and Java examples.

## How was this patch tested?

Manually tests: Run the example with spark-submit; copy & paste code into pyspark; build document and check the document.

Author: wm624@hotmail.com <wm624@hotmail.com>

Closes #16064 from wangmiao1981/py.

(cherry picked from commit aad11209eb4db585f991ba09d08d90576f315bb4)
Signed-off-by: Joseph K. Bradley <joseph@databricks.com>
---
 docs/ml-classification-regression.md          | 10 ++-
 .../ml/logistic_regression_summary_example.py | 68 +++++++++++++++++++
 2 files changed, 76 insertions(+), 2 deletions(-)
 create mode 100644 examples/src/main/python/ml/logistic_regression_summary_example.py

diff --git a/docs/ml-classification-regression.md b/docs/ml-classification-regression.md
index 5148ad02d93aa..557a53cc2314a 100644
--- a/docs/ml-classification-regression.md
+++ b/docs/ml-classification-regression.md
@@ -114,9 +114,15 @@ Continuing the earlier example:
 {% include_example java/org/apache/spark/examples/ml/JavaLogisticRegressionSummaryExample.java %}
 </div>
 
-<!--- TODO: Add python model summaries once implemented -->
 <div data-lang="python" markdown="1">
-Logistic regression model summary is not yet supported in Python.
+[`LogisticRegressionTrainingSummary`](api/python/pyspark.ml.html#pyspark.ml.classification.LogisticRegressionSummary)
+provides a summary for a
+[`LogisticRegressionModel`](api/python/pyspark.ml.html#pyspark.ml.classification.LogisticRegressionModel).
+Currently, only binary classification is supported. Support for multiclass model summaries will be added in the future.
+
+Continuing the earlier example:
+
+{% include_example python/ml/logistic_regression_summary_example.py %}
 </div>
 
 </div>
diff --git a/examples/src/main/python/ml/logistic_regression_summary_example.py b/examples/src/main/python/ml/logistic_regression_summary_example.py
new file mode 100644
index 0000000000000..bd440a1fbe8df
--- /dev/null
+++ b/examples/src/main/python/ml/logistic_regression_summary_example.py
@@ -0,0 +1,68 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.classification import LogisticRegression
+# $example off$
+from pyspark.sql import SparkSession
+
+"""
+An example demonstrating Logistic Regression Summary.
+Run with:
+  bin/spark-submit examples/src/main/python/ml/logistic_regression_summary_example.py
+"""
+
+if __name__ == "__main__":
+    spark = SparkSession \
+        .builder \
+        .appName("LogisticRegressionSummary") \
+        .getOrCreate()
+
+    # Load training data
+    training = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    lr = LogisticRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8)
+
+    # Fit the model
+    lrModel = lr.fit(training)
+
+    # $example on$
+    # Extract the summary from the returned LogisticRegressionModel instance trained
+    # in the earlier example
+    trainingSummary = lrModel.summary
+
+    # Obtain the objective per iteration
+    objectiveHistory = trainingSummary.objectiveHistory
+    print("objectiveHistory:")
+    for objective in objectiveHistory:
+        print(objective)
+
+    # Obtain the receiver-operating characteristic as a dataframe and areaUnderROC.
+    trainingSummary.roc.show()
+    print("areaUnderROC: " + str(trainingSummary.areaUnderROC))
+
+    # Set the model threshold to maximize F-Measure
+    fMeasure = trainingSummary.fMeasureByThreshold
+    maxFMeasure = fMeasure.groupBy().max('F-Measure').select('max(F-Measure)').head()
+    bestThreshold = fMeasure.where(fMeasure['F-Measure'] == maxFMeasure['max(F-Measure)']) \
+        .select('threshold').head()['threshold']
+    lr.setThreshold(bestThreshold)
+    # $example off$
+
+    spark.stop()

From 617ce3ba765e13e354eaa9b7e13851aef40c9ceb Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Wed, 7 Dec 2016 19:23:27 -0800
Subject: [PATCH 0307/1204] [SPARK-18758][SS] StreamingQueryListener events
 from a StreamingQuery should be sent only to the listeners in the same
 session as the query

## What changes were proposed in this pull request?

Listeners added with `sparkSession.streams.addListener(l)` are added to a SparkSession. So events only from queries in the same session as a listener should be posted to the listener. Currently, all the events gets rerouted through the Spark's main listener bus, that is,
- StreamingQuery posts event to StreamingQueryListenerBus. Only the queries associated with the same session as the bus posts events to it.
- StreamingQueryListenerBus posts event to Spark's main LiveListenerBus as a SparkEvent.
- StreamingQueryListenerBus also subscribes to LiveListenerBus events thus getting back the posted event in a different thread.
- The received is posted to the registered listeners.

The problem is that *all StreamingQueryListenerBuses in all sessions* gets the events and posts them to their listeners. This is wrong.

In this PR, I solve it by making StreamingQueryListenerBus track active queries (by their runIds) when a query posts the QueryStarted event to the bus. This allows the rerouted events to be filtered using the tracked queries.

Note that this list needs to be maintained separately
from the `StreamingQueryManager.activeQueries` because a terminated query is cleared from
`StreamingQueryManager.activeQueries` as soon as it is stopped, but the this ListenerBus must
clear a query only after the termination event of that query has been posted lazily, much after the query has been terminated.

Credit goes to zsxwing for coming up with the initial idea.

## How was this patch tested?
Updated test harness code to use the correct session, and added new unit test.

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #16186 from tdas/SPARK-18758.

(cherry picked from commit 9ab725eabbb4ad515a663b395bd2f91bb5853a23)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../streaming/StreamingQueryListenerBus.scala | 54 +++++++++++++--
 .../sql/execution/streaming/memory.scala      |  4 +-
 .../spark/sql/streaming/StreamTest.scala      | 15 ++--
 .../StreamingQueryListenerSuite.scala         | 69 +++++++++++++++++--
 4 files changed, 119 insertions(+), 23 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala
index 22e4c6380fcd5..a2153d27e9fef 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala
@@ -17,6 +17,10 @@
 
 package org.apache.spark.sql.execution.streaming
 
+import java.util.UUID
+
+import scala.collection.mutable
+
 import org.apache.spark.scheduler.{LiveListenerBus, SparkListener, SparkListenerEvent}
 import org.apache.spark.sql.streaming.StreamingQueryListener
 import org.apache.spark.util.ListenerBus
@@ -25,7 +29,11 @@ import org.apache.spark.util.ListenerBus
  * A bus to forward events to [[StreamingQueryListener]]s. This one will send received
  * [[StreamingQueryListener.Event]]s to the Spark listener bus. It also registers itself with
  * Spark listener bus, so that it can receive [[StreamingQueryListener.Event]]s and dispatch them
- * to StreamingQueryListener.
+ * to StreamingQueryListeners.
+ *
+ * Note that each bus and its registered listeners are associated with a single SparkSession
+ * and StreamingQueryManager. So this bus will dispatch events to registered listeners for only
+ * those queries that were started in the associated SparkSession.
  */
 class StreamingQueryListenerBus(sparkListenerBus: LiveListenerBus)
   extends SparkListener with ListenerBus[StreamingQueryListener, StreamingQueryListener.Event] {
@@ -35,12 +43,30 @@ class StreamingQueryListenerBus(sparkListenerBus: LiveListenerBus)
   sparkListenerBus.addListener(this)
 
   /**
-   * Post a StreamingQueryListener event to the Spark listener bus asynchronously. This event will
-   * be dispatched to all StreamingQueryListener in the thread of the Spark listener bus.
+   * RunIds of active queries whose events are supposed to be forwarded by this ListenerBus
+   * to registered `StreamingQueryListeners`.
+   *
+   * Note 1: We need to track runIds instead of ids because the runId is unique for every started
+   * query, even it its a restart. So even if a query is restarted, this bus will identify them
+   * separately and correctly account for the restart.
+   *
+   * Note 2: This list needs to be maintained separately from the
+   * `StreamingQueryManager.activeQueries` because a terminated query is cleared from
+   * `StreamingQueryManager.activeQueries` as soon as it is stopped, but the this ListenerBus
+   * must clear a query only after the termination event of that query has been posted.
+   */
+  private val activeQueryRunIds = new mutable.HashSet[UUID]
+
+  /**
+   * Post a StreamingQueryListener event to the added StreamingQueryListeners.
+   * Note that only the QueryStarted event is posted to the listener synchronously. Other events
+   * are dispatched to Spark listener bus. This method is guaranteed to be called by queries in
+   * the same SparkSession as this listener.
    */
   def post(event: StreamingQueryListener.Event) {
     event match {
       case s: QueryStartedEvent =>
+        activeQueryRunIds.synchronized { activeQueryRunIds += s.runId }
         sparkListenerBus.post(s)
         // post to local listeners to trigger callbacks
         postToAll(s)
@@ -63,18 +89,32 @@ class StreamingQueryListenerBus(sparkListenerBus: LiveListenerBus)
     }
   }
 
+  /**
+   * Dispatch events to registered StreamingQueryListeners. Only the events associated queries
+   * started in the same SparkSession as this ListenerBus will be dispatched to the listeners.
+   */
   override protected def doPostEvent(
       listener: StreamingQueryListener,
       event: StreamingQueryListener.Event): Unit = {
+    def shouldReport(runId: UUID): Boolean = {
+      activeQueryRunIds.synchronized { activeQueryRunIds.contains(runId) }
+    }
+
     event match {
       case queryStarted: QueryStartedEvent =>
-        listener.onQueryStarted(queryStarted)
+        if (shouldReport(queryStarted.runId)) {
+          listener.onQueryStarted(queryStarted)
+        }
       case queryProgress: QueryProgressEvent =>
-        listener.onQueryProgress(queryProgress)
+        if (shouldReport(queryProgress.progress.runId)) {
+          listener.onQueryProgress(queryProgress)
+        }
       case queryTerminated: QueryTerminatedEvent =>
-        listener.onQueryTerminated(queryTerminated)
+        if (shouldReport(queryTerminated.runId)) {
+          listener.onQueryTerminated(queryTerminated)
+          activeQueryRunIds.synchronized { activeQueryRunIds -= queryTerminated.runId }
+        }
       case _ =>
     }
   }
-
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
index b370845481ed7..b699be217e67f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
@@ -70,11 +70,11 @@ case class MemoryStream[A : Encoder](id: Int, sqlContext: SQLContext)
 
   def schema: StructType = encoder.schema
 
-  def toDS()(implicit sqlContext: SQLContext): Dataset[A] = {
+  def toDS(): Dataset[A] = {
     Dataset(sqlContext.sparkSession, logicalPlan)
   }
 
-  def toDF()(implicit sqlContext: SQLContext): DataFrame = {
+  def toDF(): DataFrame = {
     Dataset.ofRows(sqlContext.sparkSession, logicalPlan)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index 43322651296b9..10f267e115320 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -231,8 +231,8 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
       outputMode: OutputMode = OutputMode.Append)(actions: StreamAction*): Unit = {
 
     val stream = _stream.toDF()
+    val sparkSession = stream.sparkSession  // use the session in DF, not the default session
     var pos = 0
-    var currentPlan: LogicalPlan = stream.logicalPlan
     var currentStream: StreamExecution = null
     var lastStream: StreamExecution = null
     val awaiting = new mutable.HashMap[Int, Offset]() // source index -> offset to wait for
@@ -319,7 +319,6 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
          """.stripMargin)
     }
 
-    val testThread = Thread.currentThread()
     val metadataRoot = Utils.createTempDir(namePrefix = "streaming.metadata").getCanonicalPath
     var manualClockExpectedTime = -1L
     try {
@@ -337,14 +336,16 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
 
             additionalConfs.foreach(pair => {
               val value =
-                if (spark.conf.contains(pair._1)) Some(spark.conf.get(pair._1)) else None
+                if (sparkSession.conf.contains(pair._1)) {
+                  Some(sparkSession.conf.get(pair._1))
+                } else None
               resetConfValues(pair._1) = value
-              spark.conf.set(pair._1, pair._2)
+              sparkSession.conf.set(pair._1, pair._2)
             })
 
             lastStream = currentStream
             currentStream =
-              spark
+              sparkSession
                 .streams
                 .startQuery(
                   None,
@@ -518,8 +519,8 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
 
       // Rollback prev configuration values
       resetConfValues.foreach {
-        case (key, Some(value)) => spark.conf.set(key, value)
-        case (key, None) => spark.conf.unset(key)
+        case (key, Some(value)) => sparkSession.conf.set(key, value)
+        case (key, None) => sparkSession.conf.unset(key)
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
index b78d1353e8dcb..f75f5b537e41b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.streaming
 import java.util.UUID
 
 import scala.collection.mutable
+import scala.concurrent.duration._
 
 import org.scalactic.TolerantNumerics
 import org.scalatest.concurrent.AsyncAssertions.Waiter
@@ -30,6 +31,7 @@ import org.scalatest.PrivateMethodTester._
 
 import org.apache.spark.SparkException
 import org.apache.spark.scheduler._
+import org.apache.spark.sql.{Encoder, SparkSession}
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.StreamingQueryListener._
@@ -45,7 +47,7 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
   after {
     spark.streams.active.foreach(_.stop())
     assert(spark.streams.active.isEmpty)
-    assert(addedListeners.isEmpty)
+    assert(addedListeners().isEmpty)
     // Make sure we don't leak any events to the next test
     spark.sparkContext.listenerBus.waitUntilEmpty(10000)
   }
@@ -148,7 +150,7 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
       assert(isListenerActive(listener1) === false)
       assert(isListenerActive(listener2) === true)
     } finally {
-      addedListeners.foreach(spark.streams.removeListener)
+      addedListeners().foreach(spark.streams.removeListener)
     }
   }
 
@@ -251,6 +253,57 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
     }
   }
 
+  test("listener only posts events from queries started in the related sessions") {
+    val session1 = spark.newSession()
+    val session2 = spark.newSession()
+    val collector1 = new EventCollector
+    val collector2 = new EventCollector
+
+    def runQuery(session: SparkSession): Unit = {
+      collector1.reset()
+      collector2.reset()
+      val mem = MemoryStream[Int](implicitly[Encoder[Int]], session.sqlContext)
+      testStream(mem.toDS)(
+        AddData(mem, 1, 2, 3),
+        CheckAnswer(1, 2, 3)
+      )
+      session.sparkContext.listenerBus.waitUntilEmpty(5000)
+    }
+
+    def assertEventsCollected(collector: EventCollector): Unit = {
+      assert(collector.startEvent !== null)
+      assert(collector.progressEvents.nonEmpty)
+      assert(collector.terminationEvent !== null)
+    }
+
+    def assertEventsNotCollected(collector: EventCollector): Unit = {
+      assert(collector.startEvent === null)
+      assert(collector.progressEvents.isEmpty)
+      assert(collector.terminationEvent === null)
+    }
+
+    assert(session1.ne(session2))
+    assert(session1.streams.ne(session2.streams))
+
+    withListenerAdded(collector1, session1) {
+      assert(addedListeners(session1).nonEmpty)
+
+      withListenerAdded(collector2, session2) {
+        assert(addedListeners(session2).nonEmpty)
+
+        // query on session1 should send events only to collector1
+        runQuery(session1)
+        assertEventsCollected(collector1)
+        assertEventsNotCollected(collector2)
+
+        // query on session2 should send events only to collector2
+        runQuery(session2)
+        assertEventsCollected(collector2)
+        assertEventsNotCollected(collector1)
+      }
+    }
+  }
+
   testQuietly("ReplayListenerBus should ignore broken event jsons generated in 2.0.0") {
     // query-event-logs-version-2.0.0.txt has all types of events generated by
     // Structured Streaming in Spark 2.0.0.
@@ -298,21 +351,23 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
     }
   }
 
-  private def withListenerAdded(listener: StreamingQueryListener)(body: => Unit): Unit = {
+  private def withListenerAdded(
+      listener: StreamingQueryListener,
+      session: SparkSession = spark)(body: => Unit): Unit = {
     try {
       failAfter(streamingTimeout) {
-        spark.streams.addListener(listener)
+        session.streams.addListener(listener)
         body
       }
     } finally {
-      spark.streams.removeListener(listener)
+      session.streams.removeListener(listener)
     }
   }
 
-  private def addedListeners(): Array[StreamingQueryListener] = {
+  private def addedListeners(session: SparkSession = spark): Array[StreamingQueryListener] = {
     val listenerBusMethod =
       PrivateMethod[StreamingQueryListenerBus]('listenerBus)
-    val listenerBus = spark.streams invokePrivate listenerBusMethod()
+    val listenerBus = session.streams invokePrivate listenerBusMethod()
     listenerBus.listeners.toArray.map(_.asInstanceOf[StreamingQueryListener])
   }
 

From ab865cfd9dc87154e7d4fc5d09168868c88db6b0 Mon Sep 17 00:00:00 2001
From: sethah <seth.hendrickson16@gmail.com>
Date: Wed, 7 Dec 2016 19:41:32 -0800
Subject: [PATCH 0308/1204] [SPARK-18705][ML][DOC] Update user guide to reflect
 one pass solver for L1 and elastic-net

## What changes were proposed in this pull request?

WeightedLeastSquares now supports L1 and elastic net penalties and has an additional solver option: QuasiNewton. The docs are updated to reflect this change.

## How was this patch tested?

Docs only. Generated documentation to make sure Latex looks ok.

Author: sethah <seth.hendrickson16@gmail.com>

Closes #16139 from sethah/SPARK-18705.

(cherry picked from commit 82253617f5b3cdbd418c48f94e748651ee80077e)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 docs/ml-advanced.md | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/docs/ml-advanced.md b/docs/ml-advanced.md
index 12a03d3c91984..2747f2df7cb10 100644
--- a/docs/ml-advanced.md
+++ b/docs/ml-advanced.md
@@ -59,17 +59,25 @@ Given $n$ weighted observations $(w_i, a_i, b_i)$:
 
 The number of features for each observation is $m$. We use the following weighted least squares formulation:
 `\[   
-minimize_{x}\frac{1}{2} \sum_{i=1}^n \frac{w_i(a_i^T x -b_i)^2}{\sum_{k=1}^n w_k} + \frac{1}{2}\frac{\lambda}{\delta}\sum_{j=1}^m(\sigma_{j} x_{j})^2
+\min_{\mathbf{x}}\frac{1}{2} \sum_{i=1}^n \frac{w_i(\mathbf{a}_i^T \mathbf{x} -b_i)^2}{\sum_{k=1}^n w_k} + \frac{\lambda}{\delta}\left[\frac{1}{2}(1 - \alpha)\sum_{j=1}^m(\sigma_j x_j)^2 + \alpha\sum_{j=1}^m |\sigma_j x_j|\right]
 \]`
-where $\lambda$ is the regularization parameter, $\delta$ is the population standard deviation of the label
+where $\lambda$ is the regularization parameter, $\alpha$ is the elastic-net mixing parameter, $\delta$ is the population standard deviation of the label
 and $\sigma_j$ is the population standard deviation of the j-th feature column.
 
-This objective function has an analytic solution and it requires only one pass over the data to collect necessary statistics to solve.
-Unlike the original dataset which can only be stored in a distributed system,
-these statistics can be loaded into memory on a single machine if the number of features is relatively small, and then we can solve the objective function through Cholesky factorization on the driver.
+This objective function requires only one pass over the data to collect the statistics necessary to solve it. For an
+$n \times m$ data matrix, these statistics require only $O(m^2)$ storage and so can be stored on a single machine when $m$ (the number of features) is
+relatively small. We can then solve the normal equations on a single machine using local methods like direct Cholesky factorization or iterative optimization programs.
 
-WeightedLeastSquares only supports L2 regularization and provides options to enable or disable regularization and standardization.
-In order to make the normal equation approach efficient, WeightedLeastSquares requires that the number of features be no more than 4096. For larger problems, use L-BFGS instead.
+Spark MLlib currently supports two types of solvers for the normal equations: Cholesky factorization and Quasi-Newton methods (L-BFGS/OWL-QN). Cholesky factorization
+depends on a positive definite covariance matrix (i.e. columns of the data matrix must be linearly independent) and will fail if this condition is violated. Quasi-Newton methods
+are still capable of providing a reasonable solution even when the covariance matrix is not positive definite, so the normal equation solver can also fall back to 
+Quasi-Newton methods in this case. This fallback is currently always enabled for the `LinearRegression` and `GeneralizedLinearRegression` estimators.
+
+`WeightedLeastSquares` supports L1, L2, and elastic-net regularization and provides options to enable or disable regularization and standardization. In the case where no 
+L1 regularization is applied (i.e. $\alpha = 0$), there exists an analytical solution and either Cholesky or Quasi-Newton solver may be used. When $\alpha > 0$ no analytical 
+solution exists and we instead use the Quasi-Newton solver to find the coefficients iteratively. 
+
+In order to make the normal equation approach efficient, `WeightedLeastSquares` requires that the number of features be no more than 4096. For larger problems, use L-BFGS instead.
 
 ## Iteratively reweighted least squares (IRLS)
 
@@ -83,6 +91,6 @@ It solves certain optimization problems iteratively through the following proced
 * solve a weighted least squares (WLS) problem by WeightedLeastSquares.
 * repeat above steps until convergence.
 
-Since it involves solving a weighted least squares (WLS) problem by WeightedLeastSquares in each iteration,
+Since it involves solving a weighted least squares (WLS) problem by `WeightedLeastSquares` in each iteration,
 it also requires the number of features to be no more than 4096.
 Currently IRLS is used as the default solver of [GeneralizedLinearRegression](api/scala/index.html#org.apache.spark.ml.regression.GeneralizedLinearRegression).

From 1c3f1da82356426b6b550fee67e66dc82eaf1c85 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Wed, 7 Dec 2016 20:23:28 -0800
Subject: [PATCH 0309/1204] [SPARK-18326][SPARKR][ML] Review SparkR ML wrappers
 API for 2.1

## What changes were proposed in this pull request?
Reviewing SparkR ML wrappers API for 2.1 release, mainly two issues:
* Remove ```probabilityCol``` from the argument list of ```spark.logit``` and ```spark.randomForest```. Since it was used when making prediction and should be an argument of ```predict```, and we will work on this at [SPARK-18618](https://issues.apache.org/jira/browse/SPARK-18618) in the next release cycle.
* Fix ```spark.als``` params to make it consistent with MLlib.

## How was this patch tested?
Existing tests.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #16169 from yanboliang/spark-18326.

(cherry picked from commit 97255497d885f0f8ccfc808e868bc8aa5e4d1063)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 R/pkg/R/mllib.R                               | 23 ++++++++-----------
 R/pkg/inst/tests/testthat/test_mllib.R        |  4 ++--
 .../ml/r/LogisticRegressionWrapper.scala      |  4 +---
 .../r/RandomForestClassificationWrapper.scala |  2 --
 4 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 074e9cbebe1d4..632e4add64572 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -733,7 +733,6 @@ setMethod("predict", signature(object = "KMeansModel"),
 #'                  excepting that at most one value may be 0. The class with largest value p/t is predicted, where p
 #'                  is the original probability of that class and t is the class's threshold.
 #' @param weightCol The weight column name.
-#' @param probabilityCol column name for predicted class conditional probabilities.
 #' @param ... additional arguments passed to the method.
 #' @return \code{spark.logit} returns a fitted logistic regression model
 #' @rdname spark.logit
@@ -772,7 +771,7 @@ setMethod("predict", signature(object = "KMeansModel"),
 setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula"),
           function(data, formula, regParam = 0.0, elasticNetParam = 0.0, maxIter = 100,
                    tol = 1E-6, family = "auto", standardization = TRUE,
-                   thresholds = 0.5, weightCol = NULL, probabilityCol = "probability") {
+                   thresholds = 0.5, weightCol = NULL) {
             formula <- paste(deparse(formula), collapse = "")
 
             if (is.null(weightCol)) {
@@ -784,7 +783,7 @@ setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula")
                                 as.numeric(elasticNetParam), as.integer(maxIter),
                                 as.numeric(tol), as.character(family),
                                 as.logical(standardization), as.array(thresholds),
-                                as.character(weightCol), as.character(probabilityCol))
+                                as.character(weightCol))
             new("LogisticRegressionModel", jobj = jobj)
           })
 
@@ -1425,7 +1424,7 @@ setMethod("predict", signature(object = "GaussianMixtureModel"),
 #' @param userCol column name for user ids. Ids must be (or can be coerced into) integers.
 #' @param itemCol column name for item ids. Ids must be (or can be coerced into) integers.
 #' @param rank rank of the matrix factorization (> 0).
-#' @param reg regularization parameter (>= 0).
+#' @param regParam regularization parameter (>= 0).
 #' @param maxIter maximum number of iterations (>= 0).
 #' @param nonnegative logical value indicating whether to apply nonnegativity constraints.
 #' @param implicitPrefs logical value indicating whether to use implicit preference.
@@ -1464,21 +1463,21 @@ setMethod("predict", signature(object = "GaussianMixtureModel"),
 #'
 #' # set other arguments
 #' modelS <- spark.als(df, "rating", "user", "item", rank = 20,
-#'                     reg = 0.1, nonnegative = TRUE)
+#'                     regParam = 0.1, nonnegative = TRUE)
 #' statsS <- summary(modelS)
 #' }
 #' @note spark.als since 2.1.0
 setMethod("spark.als", signature(data = "SparkDataFrame"),
           function(data, ratingCol = "rating", userCol = "user", itemCol = "item",
-                   rank = 10, reg = 0.1, maxIter = 10, nonnegative = FALSE,
+                   rank = 10, regParam = 0.1, maxIter = 10, nonnegative = FALSE,
                    implicitPrefs = FALSE, alpha = 1.0, numUserBlocks = 10, numItemBlocks = 10,
                    checkpointInterval = 10, seed = 0) {
 
             if (!is.numeric(rank) || rank <= 0) {
               stop("rank should be a positive number.")
             }
-            if (!is.numeric(reg) || reg < 0) {
-              stop("reg should be a nonnegative number.")
+            if (!is.numeric(regParam) || regParam < 0) {
+              stop("regParam should be a nonnegative number.")
             }
             if (!is.numeric(maxIter) || maxIter <= 0) {
               stop("maxIter should be a positive number.")
@@ -1486,7 +1485,7 @@ setMethod("spark.als", signature(data = "SparkDataFrame"),
 
             jobj <- callJStatic("org.apache.spark.ml.r.ALSWrapper",
                                 "fit", data@sdf, ratingCol, userCol, itemCol, as.integer(rank),
-                                reg, as.integer(maxIter), implicitPrefs, alpha, nonnegative,
+                                regParam, as.integer(maxIter), implicitPrefs, alpha, nonnegative,
                                 as.integer(numUserBlocks), as.integer(numItemBlocks),
                                 as.integer(checkpointInterval), as.integer(seed))
             new("ALSModel", jobj = jobj)
@@ -1684,8 +1683,6 @@ print.summary.KSTest <- function(x, ...) {
 #'                     nodes. If TRUE, the algorithm will cache node IDs for each instance. Caching
 #'                     can speed up training of deeper trees. Users can set how often should the
 #'                     cache be checkpointed or disable it by setting checkpointInterval.
-#' @param probabilityCol column name for predicted class conditional probabilities, only for
-#'                       classification.
 #' @param ... additional arguments passed to the method.
 #' @aliases spark.randomForest,SparkDataFrame,formula-method
 #' @return \code{spark.randomForest} returns a fitted Random Forest model.
@@ -1720,7 +1717,7 @@ setMethod("spark.randomForest", signature(data = "SparkDataFrame", formula = "fo
                    maxDepth = 5, maxBins = 32, numTrees = 20, impurity = NULL,
                    featureSubsetStrategy = "auto", seed = NULL, subsamplingRate = 1.0,
                    minInstancesPerNode = 1, minInfoGain = 0.0, checkpointInterval = 10,
-                   maxMemoryInMB = 256, cacheNodeIds = FALSE, probabilityCol = "probability") {
+                   maxMemoryInMB = 256, cacheNodeIds = FALSE) {
             type <- match.arg(type)
             formula <- paste(deparse(formula), collapse = "")
             if (!is.null(seed)) {
@@ -1749,7 +1746,7 @@ setMethod("spark.randomForest", signature(data = "SparkDataFrame", formula = "fo
                                          impurity, as.integer(minInstancesPerNode),
                                          as.numeric(minInfoGain), as.integer(checkpointInterval),
                                          as.character(featureSubsetStrategy), seed,
-                                         as.numeric(subsamplingRate), as.character(probabilityCol),
+                                         as.numeric(subsamplingRate),
                                          as.integer(maxMemoryInMB), as.logical(cacheNodeIds))
                      new("RandomForestClassificationModel", jobj = jobj)
                    }
diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R
index 9f810befcd40f..db1e4dc7d8458 100644
--- a/R/pkg/inst/tests/testthat/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -926,10 +926,10 @@ test_that("spark.posterior and spark.perplexity", {
 
 test_that("spark.als", {
   data <- list(list(0, 0, 4.0), list(0, 1, 2.0), list(1, 1, 3.0), list(1, 2, 4.0),
-  list(2, 1, 1.0), list(2, 2, 5.0))
+               list(2, 1, 1.0), list(2, 2, 5.0))
   df <- createDataFrame(data, c("user", "item", "score"))
   model <- spark.als(df, ratingCol = "score", userCol = "user", itemCol = "item",
-  rank = 10, maxIter = 5, seed = 0, reg = 0.1)
+                     rank = 10, maxIter = 5, seed = 0, regParam = 0.1)
   stats <- summary(model)
   expect_equal(stats$rank, 10)
   test <- createDataFrame(list(list(0, 2), list(1, 0), list(2, 0)), c("user", "item"))
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/LogisticRegressionWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/LogisticRegressionWrapper.scala
index 7f0f3cea2124a..645bc7247f30f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/LogisticRegressionWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/LogisticRegressionWrapper.scala
@@ -96,8 +96,7 @@ private[r] object LogisticRegressionWrapper
       family: String,
       standardization: Boolean,
       thresholds: Array[Double],
-      weightCol: String,
-      probabilityCol: String
+      weightCol: String
       ): LogisticRegressionWrapper = {
 
     val rFormula = new RFormula()
@@ -123,7 +122,6 @@ private[r] object LogisticRegressionWrapper
       .setWeightCol(weightCol)
       .setFeaturesCol(rFormula.getFeaturesCol)
       .setLabelCol(rFormula.getLabelCol)
-      .setProbabilityCol(probabilityCol)
       .setPredictionCol(PREDICTED_LABEL_INDEX_COL)
 
     if (thresholds.length > 1) {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestClassificationWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestClassificationWrapper.scala
index 0b860e5af96e3..366f375b58582 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestClassificationWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestClassificationWrapper.scala
@@ -76,7 +76,6 @@ private[r] object RandomForestClassifierWrapper extends MLReadable[RandomForestC
       featureSubsetStrategy: String,
       seed: String,
       subsamplingRate: Double,
-      probabilityCol: String,
       maxMemoryInMB: Int,
       cacheNodeIds: Boolean): RandomForestClassifierWrapper = {
 
@@ -102,7 +101,6 @@ private[r] object RandomForestClassifierWrapper extends MLReadable[RandomForestC
       .setSubsamplingRate(subsamplingRate)
       .setMaxMemoryInMB(maxMemoryInMB)
       .setCacheNodeIds(cacheNodeIds)
-      .setProbabilityCol(probabilityCol)
       .setFeaturesCol(rFormula.getFeaturesCol)
       .setLabelCol(rFormula.getLabelCol)
       .setPredictionCol(PREDICTED_LABEL_INDEX_COL)

From 080717497365b83bc202ab16812ced93eb1ea7bd Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Wed, 7 Dec 2016 22:29:49 -0800
Subject: [PATCH 0310/1204] Preparing Spark release v2.1.0-rc2

---
 R/pkg/DESCRIPTION                         | 2 +-
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 4 ++--
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/java8-tests/pom.xml              | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mesos/pom.xml                             | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 yarn/pom.xml                              | 2 +-
 39 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 46fb178112802..981ae1246476b 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: SparkR
 Type: Package
 Title: R Frontend for Apache Spark
-Version: 2.1.1
+Version: 2.1.0
 Date: 2016-11-06
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
                     email = "shivaram@cs.berkeley.edu"),
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 29522fd3fd829..aebfd12227751 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 85644c4a37bbe..67d78d5f102fb 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index e15ede974cf8c..93790979d7b26 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index c93a355b84d0b..53cb8dd815d81 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 7c9870a8cb85e..89bee8567fc74 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 8f949b94fd233..7b45b23e9c546 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index a9b858e27150f..9b84f1e0c1dfc 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index d24ef118a5c1e..bbe07006109ea 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 84ad5500c0a7d..cd5849b37453c 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,8 +14,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.1.1-SNAPSHOT
-SPARK_VERSION_SHORT: 2.1.1
+SPARK_VERSION: 2.1.0
+SPARK_VERSION_SHORT: 2.1.0
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.7"
 MESOS_VERSION: 1.0.0
diff --git a/examples/pom.xml b/examples/pom.xml
index 8a9e6cfcfcc70..2fb42413aca81 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 3849c02ffb03c..4061c5f089c54 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 964e45f31b741..6cfc47ef00e2a 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index eec7a889ca1ff..58caf35f65a16 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index a7622d08151fe..ed32fc0ec4c18 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/java8-tests/pom.xml b/external/java8-tests/pom.xml
index e862126e48dbe..a3f3907573f21 100644
--- a/external/java8-tests/pom.xml
+++ b/external/java8-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index be8e73e41b947..9ae4461db64a2 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index fdfd2ccd4327a..f7276d0bd2197 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index e5bf070124b6a..52c88150137e3 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index c0a94f5950d5c..93b49bcf615b6 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index a02e23c69171d..cdfd29e3a9208 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index d7bb1acdc1d81..c6a79aa86bcf0 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index c53b72eefe84d..3fa28aa81f214 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 41b16500dd2bc..5c828780600cd 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 96e34cacff8b0..1818bc80ea78a 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index c0b70dfdc3364..d60a633b87699 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mesos/pom.xml b/mesos/pom.xml
index 532d6073343ba..f8e43d2c43ec2 100644
--- a/mesos/pom.xml
+++ b/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 6c3a35eeb9ecd..6dcb44cebb254 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 757906d137c29..5cf3a7f3e0f5e 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 555324524ee82..49f12703c04df 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.1.1-SNAPSHOT</version>
+  <version>2.1.0</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 6ae3609ae7fae..e91e778cb518c 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.1.1.dev0"
+__version__ = "2.1.0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 705316a944e28..1e7db9b10f045 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 72be7e1005f64..c58e0f43b2ac7 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index d7989c2413040..37e7dccd2e27d 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 34e0ae5bbc229..468d758a77884 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index c543a3e049531..7bf4fc0df45e8 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index fba6a5d7734a4..06569e6ee2231 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 0c4c9c9f51828..35d53b30191a5 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 85ec270bf9965..38374b5ae5a3b 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 48aa6775d6b54ccecdbe2287ae75d99c00b02d18 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Wed, 7 Dec 2016 22:29:55 -0800
Subject: [PATCH 0311/1204] Preparing development version 2.1.1-SNAPSHOT

---
 R/pkg/DESCRIPTION                         | 2 +-
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 4 ++--
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/java8-tests/pom.xml              | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mesos/pom.xml                             | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 yarn/pom.xml                              | 2 +-
 39 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 981ae1246476b..46fb178112802 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: SparkR
 Type: Package
 Title: R Frontend for Apache Spark
-Version: 2.1.0
+Version: 2.1.1
 Date: 2016-11-06
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
                     email = "shivaram@cs.berkeley.edu"),
diff --git a/assembly/pom.xml b/assembly/pom.xml
index aebfd12227751..29522fd3fd829 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 67d78d5f102fb..85644c4a37bbe 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 93790979d7b26..e15ede974cf8c 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 53cb8dd815d81..c93a355b84d0b 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 89bee8567fc74..7c9870a8cb85e 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 7b45b23e9c546..8f949b94fd233 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 9b84f1e0c1dfc..a9b858e27150f 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index bbe07006109ea..d24ef118a5c1e 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index cd5849b37453c..84ad5500c0a7d 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,8 +14,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.1.0
-SPARK_VERSION_SHORT: 2.1.0
+SPARK_VERSION: 2.1.1-SNAPSHOT
+SPARK_VERSION_SHORT: 2.1.1
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.7"
 MESOS_VERSION: 1.0.0
diff --git a/examples/pom.xml b/examples/pom.xml
index 2fb42413aca81..8a9e6cfcfcc70 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 4061c5f089c54..3849c02ffb03c 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 6cfc47ef00e2a..964e45f31b741 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 58caf35f65a16..eec7a889ca1ff 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index ed32fc0ec4c18..a7622d08151fe 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/java8-tests/pom.xml b/external/java8-tests/pom.xml
index a3f3907573f21..e862126e48dbe 100644
--- a/external/java8-tests/pom.xml
+++ b/external/java8-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index 9ae4461db64a2..be8e73e41b947 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index f7276d0bd2197..fdfd2ccd4327a 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index 52c88150137e3..e5bf070124b6a 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index 93b49bcf615b6..c0a94f5950d5c 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index cdfd29e3a9208..a02e23c69171d 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index c6a79aa86bcf0..d7bb1acdc1d81 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index 3fa28aa81f214..c53b72eefe84d 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 5c828780600cd..41b16500dd2bc 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 1818bc80ea78a..96e34cacff8b0 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index d60a633b87699..c0b70dfdc3364 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mesos/pom.xml b/mesos/pom.xml
index f8e43d2c43ec2..532d6073343ba 100644
--- a/mesos/pom.xml
+++ b/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 6dcb44cebb254..6c3a35eeb9ecd 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 5cf3a7f3e0f5e..757906d137c29 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 49f12703c04df..555324524ee82 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.1.0</version>
+  <version>2.1.1-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index e91e778cb518c..6ae3609ae7fae 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.1.0"
+__version__ = "2.1.1.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 1e7db9b10f045..705316a944e28 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index c58e0f43b2ac7..72be7e1005f64 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 37e7dccd2e27d..d7989c2413040 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 468d758a77884..34e0ae5bbc229 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 7bf4fc0df45e8..c543a3e049531 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 06569e6ee2231..fba6a5d7734a4 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 35d53b30191a5..0c4c9c9f51828 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 38374b5ae5a3b..85ec270bf9965 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 9095c152e7fedf469dcc4887f5b6a1882cd74c28 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Thu, 8 Dec 2016 06:19:38 -0800
Subject: [PATCH 0312/1204] [SPARK-18325][SPARKR][ML] SparkR ML wrappers
 example code and user guide

## What changes were proposed in this pull request?
* Add all R examples for ML wrappers which were added during 2.1 release cycle.
* Split the whole ```ml.R``` example file into individual example for each algorithm, which will be convenient for users to rerun them.
* Add corresponding examples to ML user guide.
* Update ML section of SparkR user guide.

Note: MLlib Scala/Java/Python examples will be consistent, however, SparkR examples may different from them, since R users may use the algorithms in a different way, for example, using R ```formula``` to specify ```featuresCol``` and ```labelCol```.

## How was this patch tested?
Run all examples manually.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #16148 from yanboliang/spark-18325.

(cherry picked from commit 9bf8f3cd4f62f921c32fb50b8abf49576a80874f)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 docs/ml-classification-regression.md     |  67 +++++++++-
 docs/ml-clustering.md                    |  18 ++-
 docs/ml-collaborative-filtering.md       |   8 ++
 docs/sparkr.md                           |  46 +++----
 examples/src/main/r/ml.R                 | 148 -----------------------
 examples/src/main/r/ml/als.R             |  45 +++++++
 examples/src/main/r/ml/gaussianMixture.R |  42 +++++++
 examples/src/main/r/ml/gbt.R             |  63 ++++++++++
 examples/src/main/r/ml/glm.R             |  57 +++++++++
 examples/src/main/r/ml/isoreg.R          |  42 +++++++
 examples/src/main/r/ml/kmeans.R          |  44 +++++++
 examples/src/main/r/ml/kstest.R          |  39 ++++++
 examples/src/main/r/ml/lda.R             |  46 +++++++
 examples/src/main/r/ml/logit.R           |  63 ++++++++++
 examples/src/main/r/ml/ml.R              |  65 ++++++++++
 examples/src/main/r/ml/mlp.R             |  48 ++++++++
 examples/src/main/r/ml/naiveBayes.R      |  41 +++++++
 examples/src/main/r/ml/randomForest.R    |  63 ++++++++++
 examples/src/main/r/ml/survreg.R         |  43 +++++++
 19 files changed, 810 insertions(+), 178 deletions(-)
 delete mode 100644 examples/src/main/r/ml.R
 create mode 100644 examples/src/main/r/ml/als.R
 create mode 100644 examples/src/main/r/ml/gaussianMixture.R
 create mode 100644 examples/src/main/r/ml/gbt.R
 create mode 100644 examples/src/main/r/ml/glm.R
 create mode 100644 examples/src/main/r/ml/isoreg.R
 create mode 100644 examples/src/main/r/ml/kmeans.R
 create mode 100644 examples/src/main/r/ml/kstest.R
 create mode 100644 examples/src/main/r/ml/lda.R
 create mode 100644 examples/src/main/r/ml/logit.R
 create mode 100644 examples/src/main/r/ml/ml.R
 create mode 100644 examples/src/main/r/ml/mlp.R
 create mode 100644 examples/src/main/r/ml/naiveBayes.R
 create mode 100644 examples/src/main/r/ml/randomForest.R
 create mode 100644 examples/src/main/r/ml/survreg.R

diff --git a/docs/ml-classification-regression.md b/docs/ml-classification-regression.md
index 557a53cc2314a..2ffea64178630 100644
--- a/docs/ml-classification-regression.md
+++ b/docs/ml-classification-regression.md
@@ -75,6 +75,13 @@ More details on parameters can be found in the [Python API documentation](api/py
 {% include_example python/ml/logistic_regression_with_elastic_net.py %}
 </div>
 
+<div data-lang="r" markdown="1">
+
+More details on parameters can be found in the [R API documentation](api/R/spark.logit.html).
+
+{% include_example binomial r/ml/logit.R %}
+</div>
+
 </div>
 
 The `spark.ml` implementation of logistic regression also supports
@@ -171,6 +178,13 @@ model with elastic net regularization.
 {% include_example python/ml/multiclass_logistic_regression_with_elastic_net.py %}
 </div>
 
+<div data-lang="r" markdown="1">
+
+More details on parameters can be found in the [R API documentation](api/R/spark.logit.html).
+
+{% include_example multinomial r/ml/logit.R %}
+</div>
+
 </div>
 
 
@@ -242,6 +256,14 @@ Refer to the [Python API docs](api/python/pyspark.ml.html#pyspark.ml.classificat
 
 {% include_example python/ml/random_forest_classifier_example.py %}
 </div>
+
+<div data-lang="r" markdown="1">
+
+Refer to the [R API docs](api/R/spark.randomForest.html) for more details.
+
+{% include_example classification r/ml/randomForest.R %}
+</div>
+
 </div>
 
 ## Gradient-boosted tree classifier
@@ -275,6 +297,14 @@ Refer to the [Python API docs](api/python/pyspark.ml.html#pyspark.ml.classificat
 
 {% include_example python/ml/gradient_boosted_tree_classifier_example.py %}
 </div>
+
+<div data-lang="r" markdown="1">
+
+Refer to the [R API docs](api/R/spark.gbt.html) for more details.
+
+{% include_example classification r/ml/gbt.R %}
+</div>
+
 </div>
 
 ## Multilayer perceptron classifier
@@ -324,6 +354,13 @@ Refer to the [Python API docs](api/python/pyspark.ml.html#pyspark.ml.classificat
 {% include_example python/ml/multilayer_perceptron_classification.py %}
 </div>
 
+<div data-lang="r" markdown="1">
+
+Refer to the [R API docs](api/R/spark.mlp.html) for more details.
+
+{% include_example r/ml/mlp.R %}
+</div>
+
 </div>
 
 
@@ -400,7 +437,7 @@ Refer to the [Python API docs](api/python/pyspark.ml.html#pyspark.ml.classificat
 
 Refer to the [R API docs](api/R/spark.naiveBayes.html) for more details.
 
-{% include_example naiveBayes r/ml.R %}
+{% include_example r/ml/naiveBayes.R %}
 </div>
 
 </div>
@@ -584,7 +621,7 @@ Refer to the [Python API docs](api/python/pyspark.ml.html#pyspark.ml.regression.
 
 Refer to the [R API docs](api/R/spark.glm.html) for more details.
 
-{% include_example glm r/ml.R %}
+{% include_example r/ml/glm.R %}
 </div>
 
 </div>
@@ -656,6 +693,14 @@ Refer to the [Python API docs](api/python/pyspark.ml.html#pyspark.ml.regression.
 
 {% include_example python/ml/random_forest_regressor_example.py %}
 </div>
+
+<div data-lang="r" markdown="1">
+
+Refer to the [R API docs](api/R/spark.randomForest.html) for more details.
+
+{% include_example regression r/ml/randomForest.R %}
+</div>
+
 </div>
 
 ## Gradient-boosted tree regression
@@ -689,6 +734,14 @@ Refer to the [Python API docs](api/python/pyspark.ml.html#pyspark.ml.regression.
 
 {% include_example python/ml/gradient_boosted_tree_regressor_example.py %}
 </div>
+
+<div data-lang="r" markdown="1">
+
+Refer to the [R API docs](api/R/spark.gbt.html) for more details.
+
+{% include_example regression r/ml/gbt.R %}
+</div>
+
 </div>
 
 
@@ -780,7 +833,7 @@ Refer to the [Python API docs](api/python/pyspark.ml.html#pyspark.ml.regression.
 
 Refer to the [R API docs](api/R/spark.survreg.html) for more details.
 
-{% include_example survreg r/ml.R %}
+{% include_example r/ml/survreg.R %}
 </div>
 
 </div>
@@ -853,6 +906,14 @@ Refer to the [`IsotonicRegression` Python docs](api/python/pyspark.ml.html#pyspa
 
 {% include_example python/ml/isotonic_regression_example.py %}
 </div>
+
+<div data-lang="r" markdown="1">
+
+Refer to the [`IsotonicRegression` R API docs](api/R/spark.isoreg.html) for more details on the API.
+
+{% include_example r/ml/isoreg.R %}
+</div>
+
 </div>
 
 # Linear methods
diff --git a/docs/ml-clustering.md b/docs/ml-clustering.md
index 4731abc7dcdd6..d10db51d2309c 100644
--- a/docs/ml-clustering.md
+++ b/docs/ml-clustering.md
@@ -91,7 +91,7 @@ Refer to the [Python API docs](api/python/pyspark.ml.html#pyspark.ml.clustering.
 
 Refer to the [R API docs](api/R/spark.kmeans.html) for more details.
 
-{% include_example kmeans r/ml.R %}
+{% include_example r/ml/kmeans.R %}
 </div>
 
 </div>
@@ -124,6 +124,14 @@ Refer to the [Python API docs](api/python/pyspark.ml.html#pyspark.ml.clustering.
 
 {% include_example python/ml/lda_example.py %}
 </div>
+
+<div data-lang="r" markdown="1">
+
+Refer to the [R API docs](api/R/spark.lda.html) for more details.
+
+{% include_example r/ml/lda.R %}
+</div>
+
 </div>
 
 ## Bisecting k-means
@@ -239,4 +247,12 @@ Refer to the [Python API docs](api/python/pyspark.ml.html#pyspark.ml.clustering.
 
 {% include_example python/ml/gaussian_mixture_example.py %}
 </div>
+
+<div data-lang="r" markdown="1">
+
+Refer to the [R API docs](api/R/spark.gaussianMixture.html) for more details.
+
+{% include_example r/ml/gaussianMixture.R %}
+</div>
+
 </div>
diff --git a/docs/ml-collaborative-filtering.md b/docs/ml-collaborative-filtering.md
index 1d02d6933cb48..7933a1f5d7fa4 100644
--- a/docs/ml-collaborative-filtering.md
+++ b/docs/ml-collaborative-filtering.md
@@ -149,4 +149,12 @@ als = ALS(maxIter=5, regParam=0.01, implicitPrefs=True,
 {% endhighlight %}
 
 </div>
+
+<div data-lang="r" markdown="1">
+
+Refer to the [R API docs](api/R/spark.als.html) for more details.
+
+{% include_example r/ml/als.R %}
+</div>
+
 </div>
diff --git a/docs/sparkr.md b/docs/sparkr.md
index 60cd01a9fea71..d2db78282aa8f 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -512,39 +512,33 @@ head(teenagers)
 
 # Machine Learning
 
-SparkR supports the following machine learning algorithms currently: `Generalized Linear Model`, `Accelerated Failure Time (AFT) Survival Regression Model`, `Naive Bayes Model` and `KMeans Model`.
-Under the hood, SparkR uses MLlib to train the model.
-Users can call `summary` to print a summary of the fitted model, [predict](api/R/predict.html) to make predictions on new data, and [write.ml](api/R/write.ml.html)/[read.ml](api/R/read.ml.html) to save/load fitted models.
-SparkR supports a subset of the available R formula operators for model fitting, including ‘~’, ‘.’, ‘:’, ‘+’, and ‘-‘.
-
 ## Algorithms
 
-### Generalized Linear Model
-
-[spark.glm()](api/R/spark.glm.html) or [glm()](api/R/glm.html) fits generalized linear model against a Spark DataFrame.
-Currently "gaussian", "binomial", "poisson" and "gamma" families are supported.
-{% include_example glm r/ml.R %}
-
-### Accelerated Failure Time (AFT) Survival Regression Model
-
-[spark.survreg()](api/R/spark.survreg.html) fits an accelerated failure time (AFT) survival regression model on a SparkDataFrame.
-Note that the formula of [spark.survreg()](api/R/spark.survreg.html) does not support operator '.' currently.
-{% include_example survreg r/ml.R %}
-
-### Naive Bayes Model
-
-[spark.naiveBayes()](api/R/spark.naiveBayes.html) fits a Bernoulli naive Bayes model against a SparkDataFrame. Only categorical data is supported.
-{% include_example naiveBayes r/ml.R %}
-
-### KMeans Model
+SparkR supports the following machine learning algorithms currently:
+
+* [`spark.glm`](api/R/spark.glm.html) or [`glm`](api/R/glm.html): [`Generalized Linear Model`](ml-classification-regression.html#generalized-linear-regression)
+* [`spark.survreg`](api/R/spark.survreg.html): [`Accelerated Failure Time (AFT) Survival Regression Model`](ml-classification-regression.html#survival-regression)
+* [`spark.naiveBayes`](api/R/spark.naiveBayes.html): [`Naive Bayes Model`](ml-classification-regression.html#naive-bayes)
+* [`spark.kmeans`](api/R/spark.kmeans.html): [`K-Means Model`](ml-clustering.html#k-means)
+* [`spark.logit`](api/R/spark.logit.html): [`Logistic Regression Model`](ml-classification-regression.html#logistic-regression)
+* [`spark.isoreg`](api/R/spark.isoreg.html): [`Isotonic Regression Model`](ml-classification-regression.html#isotonic-regression)
+* [`spark.gaussianMixture`](api/R/spark.gaussianMixture.html): [`Gaussian Mixture Model`](ml-clustering.html#gaussian-mixture-model-gmm)
+* [`spark.lda`](api/R/spark.lda.html): [`Latent Dirichlet Allocation (LDA) Model`](ml-clustering.html#latent-dirichlet-allocation-lda)
+* [`spark.mlp`](api/R/spark.mlp.html): [`Multilayer Perceptron Classification Model`](ml-classification-regression.html#multilayer-perceptron-classifier)
+* [`spark.gbt`](api/R/spark.gbt.html): `Gradient Boosted Tree Model for` [`Regression`](ml-classification-regression.html#gradient-boosted-tree-regression) `and` [`Classification`](ml-classification-regression.html#gradient-boosted-tree-classifier)
+* [`spark.randomForest`](api/R/spark.randomForest.html): `Random Forest Model for` [`Regression`](ml-classification-regression.html#random-forest-regression) `and` [`Classification`](ml-classification-regression.html#random-forest-classifier)
+* [`spark.als`](api/R/spark.als.html): [`Alternating Least Squares (ALS) matrix factorization Model`](ml-collaborative-filtering.html#collaborative-filtering)
+* [`spark.kstest`](api/R/spark.kstest.html): `Kolmogorov-Smirnov Test`
+
+Under the hood, SparkR uses MLlib to train the model. Please refer to the corresponding section of MLlib user guide for example code.
+Users can call `summary` to print a summary of the fitted model, [predict](api/R/predict.html) to make predictions on new data, and [write.ml](api/R/write.ml.html)/[read.ml](api/R/read.ml.html) to save/load fitted models.
+SparkR supports a subset of the available R formula operators for model fitting, including ‘~’, ‘.’, ‘:’, ‘+’, and ‘-‘.
 
-[spark.kmeans()](api/R/spark.kmeans.html) fits a k-means clustering model against a Spark DataFrame, similarly to R's kmeans().
-{% include_example kmeans r/ml.R %}
 
 ## Model persistence
 
 The following example shows how to save/load a MLlib model by SparkR.
-{% include_example read_write r/ml.R %}
+{% include_example read_write r/ml/ml.R %}
 
 # R Function Name Conflicts
 
diff --git a/examples/src/main/r/ml.R b/examples/src/main/r/ml.R
deleted file mode 100644
index a8a1274ac902a..0000000000000
--- a/examples/src/main/r/ml.R
+++ /dev/null
@@ -1,148 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# To run this example use
-# ./bin/spark-submit examples/src/main/r/ml.R
-
-# Load SparkR library into your R session
-library(SparkR)
-
-# Initialize SparkSession
-sparkR.session(appName = "SparkR-ML-example")
-
-############################ spark.glm and glm ##############################################
-# $example on:glm$
-irisDF <- suppressWarnings(createDataFrame(iris))
-# Fit a generalized linear model of family "gaussian" with spark.glm
-gaussianDF <- irisDF
-gaussianTestDF <- irisDF
-gaussianGLM <- spark.glm(gaussianDF, Sepal_Length ~ Sepal_Width + Species, family = "gaussian")
-
-# Model summary
-summary(gaussianGLM)
-
-# Prediction
-gaussianPredictions <- predict(gaussianGLM, gaussianTestDF)
-showDF(gaussianPredictions)
-
-# Fit a generalized linear model with glm (R-compliant)
-gaussianGLM2 <- glm(Sepal_Length ~ Sepal_Width + Species, gaussianDF, family = "gaussian")
-summary(gaussianGLM2)
-
-# Fit a generalized linear model of family "binomial" with spark.glm
-binomialDF <- filter(irisDF, irisDF$Species != "setosa")
-binomialTestDF <- binomialDF
-binomialGLM <- spark.glm(binomialDF, Species ~ Sepal_Length + Sepal_Width, family = "binomial")
-
-# Model summary
-summary(binomialGLM)
-
-# Prediction
-binomialPredictions <- predict(binomialGLM, binomialTestDF)
-showDF(binomialPredictions)
-# $example off:glm$
-############################ spark.survreg ##############################################
-# $example on:survreg$
-# Use the ovarian dataset available in R survival package
-library(survival)
-
-# Fit an accelerated failure time (AFT) survival regression model with spark.survreg
-ovarianDF <- suppressWarnings(createDataFrame(ovarian))
-aftDF <- ovarianDF
-aftTestDF <- ovarianDF
-aftModel <- spark.survreg(aftDF, Surv(futime, fustat) ~ ecog_ps + rx)
-
-# Model summary
-summary(aftModel)
-
-# Prediction
-aftPredictions <- predict(aftModel, aftTestDF)
-showDF(aftPredictions)
-# $example off:survreg$
-############################ spark.naiveBayes ##############################################
-# $example on:naiveBayes$
-# Fit a Bernoulli naive Bayes model with spark.naiveBayes
-titanic <- as.data.frame(Titanic)
-titanicDF <- createDataFrame(titanic[titanic$Freq > 0, -5])
-nbDF <- titanicDF
-nbTestDF <- titanicDF
-nbModel <- spark.naiveBayes(nbDF, Survived ~ Class + Sex + Age)
-
-# Model summary
-summary(nbModel)
-
-# Prediction
-nbPredictions <- predict(nbModel, nbTestDF)
-showDF(nbPredictions)
-# $example off:naiveBayes$
-############################ spark.kmeans ##############################################
-# $example on:kmeans$
-# Fit a k-means model with spark.kmeans
-irisDF <- suppressWarnings(createDataFrame(iris))
-kmeansDF <- irisDF
-kmeansTestDF <- irisDF
-kmeansModel <- spark.kmeans(kmeansDF, ~ Sepal_Length + Sepal_Width + Petal_Length + Petal_Width,
-                            k = 3)
-
-# Model summary
-summary(kmeansModel)
-
-# Get fitted result from the k-means model
-showDF(fitted(kmeansModel))
-
-# Prediction
-kmeansPredictions <- predict(kmeansModel, kmeansTestDF)
-showDF(kmeansPredictions)
-# $example off:kmeans$
-############################ model read/write ##############################################
-# $example on:read_write$
-irisDF <- suppressWarnings(createDataFrame(iris))
-# Fit a generalized linear model of family "gaussian" with spark.glm
-gaussianDF <- irisDF
-gaussianTestDF <- irisDF
-gaussianGLM <- spark.glm(gaussianDF, Sepal_Length ~ Sepal_Width + Species, family = "gaussian")
-
-# Save and then load a fitted MLlib model
-modelPath <- tempfile(pattern = "ml", fileext = ".tmp")
-write.ml(gaussianGLM, modelPath)
-gaussianGLM2 <- read.ml(modelPath)
-
-# Check model summary
-summary(gaussianGLM2)
-
-# Check model prediction
-gaussianPredictions <- predict(gaussianGLM2, gaussianTestDF)
-showDF(gaussianPredictions)
-
-unlink(modelPath)
-# $example off:read_write$
-############################ fit models with spark.lapply #####################################
-
-# Perform distributed training of multiple models with spark.lapply
-families <- c("gaussian", "poisson")
-train <- function(family) {
-  model <- glm(Sepal.Length ~ Sepal.Width + Species, iris, family = family)
-  summary(model)
-}
-model.summaries <- spark.lapply(families, train)
-
-# Print the summary of each model
-print(model.summaries)
-
-
-# Stop the SparkSession now
-sparkR.session.stop()
diff --git a/examples/src/main/r/ml/als.R b/examples/src/main/r/ml/als.R
new file mode 100644
index 0000000000000..383bbba1908eb
--- /dev/null
+++ b/examples/src/main/r/ml/als.R
@@ -0,0 +1,45 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/als.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-als-example")
+
+# $example on$
+# Load training data
+data <- list(list(0, 0, 4.0), list(0, 1, 2.0), list(1, 1, 3.0),
+             list(1, 2, 4.0), list(2, 1, 1.0), list(2, 2, 5.0))
+df <- createDataFrame(data, c("userId", "movieId", "rating"))
+training <- df
+test <- df
+
+# Fit a recommendation model using ALS with spark.als
+model <- spark.als(training, maxIter = 5, regParam = 0.01, userCol = "userId",
+                   itemCol = "movieId", ratingCol = "rating")
+
+# Model summary
+summary(model)
+
+# Prediction
+predictions <- predict(model, test)
+showDF(predictions)
+# $example off$
diff --git a/examples/src/main/r/ml/gaussianMixture.R b/examples/src/main/r/ml/gaussianMixture.R
new file mode 100644
index 0000000000000..54b69acc83d97
--- /dev/null
+++ b/examples/src/main/r/ml/gaussianMixture.R
@@ -0,0 +1,42 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/gaussianMixture.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-gaussianMixture-example")
+
+# $example on$
+# Load training data
+df <- read.df("data/mllib/sample_kmeans_data.txt", source = "libsvm")
+training <- df
+test <- df
+
+# Fit a gaussian mixture clustering model with spark.gaussianMixture
+model <- spark.gaussianMixture(training, ~ features, k = 2)
+
+# Model summary
+summary(model)
+
+# Prediction
+predictions <- predict(model, test)
+showDF(predictions)
+# $example off$
diff --git a/examples/src/main/r/ml/gbt.R b/examples/src/main/r/ml/gbt.R
new file mode 100644
index 0000000000000..be16c2aa66330
--- /dev/null
+++ b/examples/src/main/r/ml/gbt.R
@@ -0,0 +1,63 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/gbt.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-gbt-example")
+
+# GBT classification model
+
+# $example on:classification$
+# Load training data
+df <- read.df("data/mllib/sample_libsvm_data.txt", source = "libsvm")
+training <- df
+test <- df
+
+# Fit a GBT classification model with spark.gbt
+model <- spark.gbt(training, label ~ features, "classification", maxIter = 10)
+
+# Model summary
+summary(model)
+
+# Prediction
+predictions <- predict(model, test)
+showDF(predictions)
+# $example off:classification$
+
+# GBT regression model
+
+# $example on:regression$
+# Load training data
+df <- read.df("data/mllib/sample_linear_regression_data.txt", source = "libsvm")
+training <- df
+test <- df
+
+# Fit a GBT regression model with spark.gbt
+model <- spark.gbt(training, label ~ features, "regression", maxIter = 10)
+
+# Model summary
+summary(model)
+
+# Prediction
+predictions <- predict(model, test)
+showDF(predictions)
+# $example off:regression$
diff --git a/examples/src/main/r/ml/glm.R b/examples/src/main/r/ml/glm.R
new file mode 100644
index 0000000000000..599071790a2c3
--- /dev/null
+++ b/examples/src/main/r/ml/glm.R
@@ -0,0 +1,57 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/glm.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-glm-example")
+
+# $example on$
+irisDF <- suppressWarnings(createDataFrame(iris))
+# Fit a generalized linear model of family "gaussian" with spark.glm
+gaussianDF <- irisDF
+gaussianTestDF <- irisDF
+gaussianGLM <- spark.glm(gaussianDF, Sepal_Length ~ Sepal_Width + Species, family = "gaussian")
+
+# Model summary
+summary(gaussianGLM)
+
+# Prediction
+gaussianPredictions <- predict(gaussianGLM, gaussianTestDF)
+showDF(gaussianPredictions)
+
+# Fit a generalized linear model with glm (R-compliant)
+gaussianGLM2 <- glm(Sepal_Length ~ Sepal_Width + Species, gaussianDF, family = "gaussian")
+summary(gaussianGLM2)
+
+# Fit a generalized linear model of family "binomial" with spark.glm
+# Note: Filter out "setosa" from label column (two labels left) to match "binomial" family.
+binomialDF <- filter(irisDF, irisDF$Species != "setosa")
+binomialTestDF <- binomialDF
+binomialGLM <- spark.glm(binomialDF, Species ~ Sepal_Length + Sepal_Width, family = "binomial")
+
+# Model summary
+summary(binomialGLM)
+
+# Prediction
+binomialPredictions <- predict(binomialGLM, binomialTestDF)
+showDF(binomialPredictions)
+# $example off$
diff --git a/examples/src/main/r/ml/isoreg.R b/examples/src/main/r/ml/isoreg.R
new file mode 100644
index 0000000000000..75dce97ed9931
--- /dev/null
+++ b/examples/src/main/r/ml/isoreg.R
@@ -0,0 +1,42 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/isoreg.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-isoreg-example")
+
+# $example on$
+# Load training data
+df <- read.df("data/mllib/sample_isotonic_regression_libsvm_data.txt", source = "libsvm")
+training <- df
+test <- df
+
+# Fit an isotonic regression model with spark.isoreg
+model <- spark.isoreg(training, label ~ features, isotonic = FALSE)
+
+# Model summary
+summary(model)
+
+# Prediction
+predictions <- predict(model, test)
+showDF(predictions)
+# $example off$
diff --git a/examples/src/main/r/ml/kmeans.R b/examples/src/main/r/ml/kmeans.R
new file mode 100644
index 0000000000000..043b21b0385d7
--- /dev/null
+++ b/examples/src/main/r/ml/kmeans.R
@@ -0,0 +1,44 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/kmeans.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-kmeans-example")
+
+# $example on$
+# Fit a k-means model with spark.kmeans
+irisDF <- suppressWarnings(createDataFrame(iris))
+kmeansDF <- irisDF
+kmeansTestDF <- irisDF
+kmeansModel <- spark.kmeans(kmeansDF, ~ Sepal_Length + Sepal_Width + Petal_Length + Petal_Width,
+                            k = 3)
+
+# Model summary
+summary(kmeansModel)
+
+# Get fitted result from the k-means model
+showDF(fitted(kmeansModel))
+
+# Prediction
+kmeansPredictions <- predict(kmeansModel, kmeansTestDF)
+showDF(kmeansPredictions)
+# $example off$
diff --git a/examples/src/main/r/ml/kstest.R b/examples/src/main/r/ml/kstest.R
new file mode 100644
index 0000000000000..12625f7d3e635
--- /dev/null
+++ b/examples/src/main/r/ml/kstest.R
@@ -0,0 +1,39 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/kstest.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-kstest-example")
+
+# $example on$
+# Load training data
+data <- data.frame(test = c(0.1, 0.15, 0.2, 0.3, 0.25, -1, -0.5))
+df <- createDataFrame(data)
+training <- df
+test <- df
+
+# Conduct the two-sided Kolmogorov-Smirnov (KS) test with spark.kstest
+model <- spark.kstest(df, "test", "norm")
+
+# Model summary
+summary(model)
+# $example off$
diff --git a/examples/src/main/r/ml/lda.R b/examples/src/main/r/ml/lda.R
new file mode 100644
index 0000000000000..7b187d155a4cb
--- /dev/null
+++ b/examples/src/main/r/ml/lda.R
@@ -0,0 +1,46 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/lda.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-lda-example")
+
+# $example on$
+# Load training data
+df <- read.df("data/mllib/sample_lda_libsvm_data.txt", source = "libsvm")
+training <- df
+test <- df
+
+# Fit a latent dirichlet allocation model with spark.lda
+model <- spark.lda(training, k = 10, maxIter = 10)
+
+# Model summary
+summary(model)
+
+# Posterior probabilities
+posterior <- spark.posterior(model, test)
+showDF(posterior)
+
+# The log perplexity of the LDA model
+logPerplexity <- spark.perplexity(model, test)
+print(paste0("The upper bound bound on perplexity: ", logPerplexity))
+# $example off$
diff --git a/examples/src/main/r/ml/logit.R b/examples/src/main/r/ml/logit.R
new file mode 100644
index 0000000000000..a2ac882ed022c
--- /dev/null
+++ b/examples/src/main/r/ml/logit.R
@@ -0,0 +1,63 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/logit.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-logit-example")
+
+# Binomial logistic regression
+
+# $example on:binomial$
+# Load training data
+df <- read.df("data/mllib/sample_libsvm_data.txt", source = "libsvm")
+training <- df
+test <- df
+
+# Fit an binomial logistic regression model with spark.logit
+model <- spark.logit(training, label ~ features, maxIter = 10, regParam = 0.3, elasticNetParam = 0.8)
+
+# Model summary
+summary(model)
+
+# Prediction
+predictions <- predict(model, test)
+showDF(predictions)
+# $example off:binomial$
+
+# Multinomial logistic regression
+
+# $example on:multinomial$
+# Load training data
+df <- read.df("data/mllib/sample_multiclass_classification_data.txt", source = "libsvm")
+training <- df
+test <- df
+
+# Fit a multinomial logistic regression model with spark.logit
+model <- spark.logit(training, label ~ features, maxIter = 10, regParam = 0.3, elasticNetParam = 0.8)
+
+# Model summary
+summary(model)
+
+# Prediction
+predictions <- predict(model, test)
+showDF(predictions)
+# $example off:multinomial$
diff --git a/examples/src/main/r/ml/ml.R b/examples/src/main/r/ml/ml.R
new file mode 100644
index 0000000000000..d601590c22a89
--- /dev/null
+++ b/examples/src/main/r/ml/ml.R
@@ -0,0 +1,65 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/ml.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-example")
+
+############################ model read/write ##############################################
+# $example on:read_write$
+irisDF <- suppressWarnings(createDataFrame(iris))
+# Fit a generalized linear model of family "gaussian" with spark.glm
+gaussianDF <- irisDF
+gaussianTestDF <- irisDF
+gaussianGLM <- spark.glm(gaussianDF, Sepal_Length ~ Sepal_Width + Species, family = "gaussian")
+
+# Save and then load a fitted MLlib model
+modelPath <- tempfile(pattern = "ml", fileext = ".tmp")
+write.ml(gaussianGLM, modelPath)
+gaussianGLM2 <- read.ml(modelPath)
+
+# Check model summary
+summary(gaussianGLM2)
+
+# Check model prediction
+gaussianPredictions <- predict(gaussianGLM2, gaussianTestDF)
+showDF(gaussianPredictions)
+
+unlink(modelPath)
+# $example off:read_write$
+
+############################ fit models with spark.lapply #####################################
+# Perform distributed training of multiple models with spark.lapply
+costs <- exp(seq(from = log(1), to = log(1000), length.out = 5))
+train <- function(cost) {
+  stopifnot(requireNamespace("e1071", quietly = TRUE))
+  model <- e1071::svm(Species ~ ., data = iris, cost = cost)
+  summary(model)
+}
+
+model.summaries <- spark.lapply(costs, train)
+
+# Print the summary of each model
+print(model.summaries)
+
+# Stop the SparkSession now
+sparkR.session.stop()
diff --git a/examples/src/main/r/ml/mlp.R b/examples/src/main/r/ml/mlp.R
new file mode 100644
index 0000000000000..d28fc069bd118
--- /dev/null
+++ b/examples/src/main/r/ml/mlp.R
@@ -0,0 +1,48 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/mlp.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-mlp-example")
+
+# $example on$
+# Load training data
+df <- read.df("data/mllib/sample_multiclass_classification_data.txt", source = "libsvm")
+training <- df
+test <- df
+
+# specify layers for the neural network:
+# input layer of size 4 (features), two intermediate of size 5 and 4
+# and output of size 3 (classes)
+layers = c(4, 5, 4, 3)
+
+# Fit a multi-layer perceptron neural network model with spark.mlp
+model <- spark.mlp(training, label ~ features, maxIter = 100,
+                   layers = layers, blockSize = 128, seed = 1234)
+
+# Model summary
+summary(model)
+
+# Prediction
+predictions <- predict(model, test)
+showDF(predictions)
+# $example off$
diff --git a/examples/src/main/r/ml/naiveBayes.R b/examples/src/main/r/ml/naiveBayes.R
new file mode 100644
index 0000000000000..9c416599b4d78
--- /dev/null
+++ b/examples/src/main/r/ml/naiveBayes.R
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/naiveBayes.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-naiveBayes-example")
+
+# $example on$
+# Fit a Bernoulli naive Bayes model with spark.naiveBayes
+titanic <- as.data.frame(Titanic)
+titanicDF <- createDataFrame(titanic[titanic$Freq > 0, -5])
+nbDF <- titanicDF
+nbTestDF <- titanicDF
+nbModel <- spark.naiveBayes(nbDF, Survived ~ Class + Sex + Age)
+
+# Model summary
+summary(nbModel)
+
+# Prediction
+nbPredictions <- predict(nbModel, nbTestDF)
+showDF(nbPredictions)
+# $example off$
diff --git a/examples/src/main/r/ml/randomForest.R b/examples/src/main/r/ml/randomForest.R
new file mode 100644
index 0000000000000..d1b96b62a0e3b
--- /dev/null
+++ b/examples/src/main/r/ml/randomForest.R
@@ -0,0 +1,63 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/randomForest.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-randomForest-example")
+
+# Random forest classification model
+
+# $example on:classification$
+# Load training data
+df <- read.df("data/mllib/sample_libsvm_data.txt", source = "libsvm")
+training <- df
+test <- df
+
+# Fit a random forest classification model with spark.randomForest
+model <- spark.randomForest(training, label ~ features, "classification", numTrees = 10)
+
+# Model summary
+summary(model)
+
+# Prediction
+predictions <- predict(model, test)
+showDF(predictions)
+# $example off:classification$
+
+# Random forest regression model
+
+# $example on:regression$
+# Load training data
+df <- read.df("data/mllib/sample_linear_regression_data.txt", source = "libsvm")
+training <- df
+test <- df
+
+# Fit a random forest regression model with spark.randomForest
+model <- spark.randomForest(training, label ~ features, "regression", numTrees = 10)
+
+# Model summary
+summary(model)
+
+# Prediction
+predictions <- predict(model, test)
+showDF(predictions)
+# $example off:regression$
diff --git a/examples/src/main/r/ml/survreg.R b/examples/src/main/r/ml/survreg.R
new file mode 100644
index 0000000000000..f728b8b5d8c06
--- /dev/null
+++ b/examples/src/main/r/ml/survreg.R
@@ -0,0 +1,43 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/survreg.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-survreg-example")
+
+# $example on$
+# Use the ovarian dataset available in R survival package
+library(survival)
+
+# Fit an accelerated failure time (AFT) survival regression model with spark.survreg
+ovarianDF <- suppressWarnings(createDataFrame(ovarian))
+aftDF <- ovarianDF
+aftTestDF <- ovarianDF
+aftModel <- spark.survreg(aftDF, Surv(futime, fustat) ~ ecog_ps + rx)
+
+# Model summary
+summary(aftModel)
+
+# Prediction
+aftPredictions <- predict(aftModel, aftTestDF)
+showDF(aftPredictions)
+# $example off$

From 726217eb7f783e10571a043546694b5b3c90ac77 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 8 Dec 2016 23:22:18 +0800
Subject: [PATCH 0313/1204] [SPARK-18667][PYSPARK][SQL] Change the way to group
 row in BatchEvalPythonExec so input_file_name function can work with UDF in
 pyspark

## What changes were proposed in this pull request?

`input_file_name` doesn't return filename when working with UDF in PySpark. An example shows the problem:

    from pyspark.sql.functions import *
    from pyspark.sql.types import *

    def filename(path):
        return path

    sourceFile = udf(filename, StringType())
    spark.read.json("tmp.json").select(sourceFile(input_file_name())).show()

    +---------------------------+
    |filename(input_file_name())|
    +---------------------------+
    |                           |
    +---------------------------+

The cause of this issue is, we group rows in `BatchEvalPythonExec` for batching processing of PythonUDF. Currently we group rows first and then evaluate expressions on the rows. If the data is less than the required number of rows for a group, the iterator will be consumed to the end before the evaluation. However, once the iterator reaches the end, we will unset input filename. So the input_file_name expression can't return correct filename.

This patch fixes the approach to group the batch of rows. We evaluate the expression first and then group evaluated results to batch.

## How was this patch tested?

Added unit test to PySpark.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #16115 from viirya/fix-py-udf-input-filename.

(cherry picked from commit 6a5a7254dc37952505989e9e580a14543adb730c)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 python/pyspark/sql/tests.py                   |  8 +++++
 .../python/BatchEvalPythonExec.scala          | 35 +++++++++----------
 2 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 50df68b14483d..66320bd050c14 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -412,6 +412,14 @@ def test_udf_with_order_by_and_limit(self):
         res.explain(True)
         self.assertEqual(res.collect(), [Row(id=0, copy=0)])
 
+    def test_udf_with_input_file_name(self):
+        from pyspark.sql.functions import udf, input_file_name
+        from pyspark.sql.types import StringType
+        sourceFile = udf(lambda path: path, StringType())
+        filePath = "python/test_support/sql/people1.json"
+        row = self.spark.read.json(filePath).select(sourceFile(input_file_name())).first()
+        self.assertTrue(row[0].find("people1.json") != -1)
+
     def test_basic_functions(self):
         rdd = self.sc.parallelize(['{"foo":"bar"}', '{"foo":"baz"}'])
         df = self.spark.read.json(rdd)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExec.scala
index dcaf2c76d479d..7a5ac48f1b69d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExec.scala
@@ -119,26 +119,23 @@ case class BatchEvalPythonExec(udfs: Seq[PythonUDF], output: Seq[Attribute], chi
       val pickle = new Pickler(needConversion)
       // Input iterator to Python: input rows are grouped so we send them in batches to Python.
       // For each row, add it to the queue.
-      val inputIterator = iter.grouped(100).map { inputRows =>
-        val toBePickled = inputRows.map { inputRow =>
-          queue.add(inputRow.asInstanceOf[UnsafeRow])
-          val row = projection(inputRow)
-          if (needConversion) {
-            EvaluatePython.toJava(row, schema)
-          } else {
-            // fast path for these types that does not need conversion in Python
-            val fields = new Array[Any](row.numFields)
-            var i = 0
-            while (i < row.numFields) {
-              val dt = dataTypes(i)
-              fields(i) = EvaluatePython.toJava(row.get(i, dt), dt)
-              i += 1
-            }
-            fields
+      val inputIterator = iter.map { inputRow =>
+        queue.add(inputRow.asInstanceOf[UnsafeRow])
+        val row = projection(inputRow)
+        if (needConversion) {
+          EvaluatePython.toJava(row, schema)
+        } else {
+          // fast path for these types that does not need conversion in Python
+          val fields = new Array[Any](row.numFields)
+          var i = 0
+          while (i < row.numFields) {
+            val dt = dataTypes(i)
+            fields(i) = EvaluatePython.toJava(row.get(i, dt), dt)
+            i += 1
           }
-        }.toArray
-        pickle.dumps(toBePickled)
-      }
+          fields
+        }
+      }.grouped(100).map(x => pickle.dumps(x.toArray))
 
       val context = TaskContext.get()
 

From e0173f14e3ea28d83c1c46bf97f7d3755960a8fc Mon Sep 17 00:00:00 2001
From: Andrew Ray <ray.andrew@gmail.com>
Date: Thu, 8 Dec 2016 11:08:12 -0800
Subject: [PATCH 0314/1204] [SPARK-16589] [PYTHON] Chained cartesian produces
 incorrect number of records

## What changes were proposed in this pull request?

Fixes a bug in the python implementation of rdd cartesian product related to batching that showed up in repeated cartesian products with seemingly random results. The root cause being multiple iterators pulling from the same stream in the wrong order because of logic that ignored batching.

`CartesianDeserializer` and `PairDeserializer` were changed to implement `_load_stream_without_unbatching` and borrow the one line implementation of `load_stream` from `BatchedSerializer`. The default implementation of `_load_stream_without_unbatching` was changed to give consistent results (always an iterable) so that it could be used without additional checks.

`PairDeserializer` no longer extends `CartesianDeserializer` as it was not really proper. If wanted a new common super class could be added.

Both `CartesianDeserializer` and `PairDeserializer` now only extend `Serializer` (which has no `dump_stream` implementation) since they are only meant for *de*serialization.

## How was this patch tested?

Additional unit tests (sourced from #14248) plus one for testing a cartesian with zip.

Author: Andrew Ray <ray.andrew@gmail.com>

Closes #16121 from aray/fix-cartesian.

(cherry picked from commit 3c68944b229aaaeeaee3efcbae3e3be9a2914855)
Signed-off-by: Davies Liu <davies.liu@gmail.com>
---
 python/pyspark/serializers.py | 58 +++++++++++++++++++++--------------
 python/pyspark/tests.py       | 18 +++++++++++
 2 files changed, 53 insertions(+), 23 deletions(-)

diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index 2a1326947f4f5..c4f2f08cb4445 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -61,7 +61,7 @@
 if sys.version < '3':
     import cPickle as pickle
     protocol = 2
-    from itertools import izip as zip
+    from itertools import izip as zip, imap as map
 else:
     import pickle
     protocol = 3
@@ -96,7 +96,12 @@ def load_stream(self, stream):
         raise NotImplementedError
 
     def _load_stream_without_unbatching(self, stream):
-        return self.load_stream(stream)
+        """
+        Return an iterator of deserialized batches (lists) of objects from the input stream.
+        if the serializer does not operate on batches the default implementation returns an
+        iterator of single element lists.
+        """
+        return map(lambda x: [x], self.load_stream(stream))
 
     # Note: our notion of "equality" is that output generated by
     # equal serializers can be deserialized using the same serializer.
@@ -278,50 +283,57 @@ def __repr__(self):
         return "AutoBatchedSerializer(%s)" % self.serializer
 
 
-class CartesianDeserializer(FramedSerializer):
+class CartesianDeserializer(Serializer):
 
     """
     Deserializes the JavaRDD cartesian() of two PythonRDDs.
+    Due to pyspark batching we cannot simply use the result of the Java RDD cartesian,
+    we additionally need to do the cartesian within each pair of batches.
     """
 
     def __init__(self, key_ser, val_ser):
-        FramedSerializer.__init__(self)
         self.key_ser = key_ser
         self.val_ser = val_ser
 
-    def prepare_keys_values(self, stream):
-        key_stream = self.key_ser._load_stream_without_unbatching(stream)
-        val_stream = self.val_ser._load_stream_without_unbatching(stream)
-        key_is_batched = isinstance(self.key_ser, BatchedSerializer)
-        val_is_batched = isinstance(self.val_ser, BatchedSerializer)
-        for (keys, vals) in zip(key_stream, val_stream):
-            keys = keys if key_is_batched else [keys]
-            vals = vals if val_is_batched else [vals]
-            yield (keys, vals)
+    def _load_stream_without_unbatching(self, stream):
+        key_batch_stream = self.key_ser._load_stream_without_unbatching(stream)
+        val_batch_stream = self.val_ser._load_stream_without_unbatching(stream)
+        for (key_batch, val_batch) in zip(key_batch_stream, val_batch_stream):
+            # for correctness with repeated cartesian/zip this must be returned as one batch
+            yield product(key_batch, val_batch)
 
     def load_stream(self, stream):
-        for (keys, vals) in self.prepare_keys_values(stream):
-            for pair in product(keys, vals):
-                yield pair
+        return chain.from_iterable(self._load_stream_without_unbatching(stream))
 
     def __repr__(self):
         return "CartesianDeserializer(%s, %s)" % \
                (str(self.key_ser), str(self.val_ser))
 
 
-class PairDeserializer(CartesianDeserializer):
+class PairDeserializer(Serializer):
 
     """
     Deserializes the JavaRDD zip() of two PythonRDDs.
+    Due to pyspark batching we cannot simply use the result of the Java RDD zip,
+    we additionally need to do the zip within each pair of batches.
     """
 
+    def __init__(self, key_ser, val_ser):
+        self.key_ser = key_ser
+        self.val_ser = val_ser
+
+    def _load_stream_without_unbatching(self, stream):
+        key_batch_stream = self.key_ser._load_stream_without_unbatching(stream)
+        val_batch_stream = self.val_ser._load_stream_without_unbatching(stream)
+        for (key_batch, val_batch) in zip(key_batch_stream, val_batch_stream):
+            if len(key_batch) != len(val_batch):
+                raise ValueError("Can not deserialize PairRDD with different number of items"
+                                 " in batches: (%d, %d)" % (len(key_batch), len(val_batch)))
+            # for correctness with repeated cartesian/zip this must be returned as one batch
+            yield zip(key_batch, val_batch)
+
     def load_stream(self, stream):
-        for (keys, vals) in self.prepare_keys_values(stream):
-            if len(keys) != len(vals):
-                raise ValueError("Can not deserialize RDD with different number of items"
-                                 " in pair: (%d, %d)" % (len(keys), len(vals)))
-            for pair in zip(keys, vals):
-                yield pair
+        return chain.from_iterable(self._load_stream_without_unbatching(stream))
 
     def __repr__(self):
         return "PairDeserializer(%s, %s)" % (str(self.key_ser), str(self.val_ser))
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index ab4bef8329cd0..89fce8ab25baf 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -548,6 +548,24 @@ def test_cartesian_on_textfile(self):
         self.assertEqual(u"Hello World!", x.strip())
         self.assertEqual(u"Hello World!", y.strip())
 
+    def test_cartesian_chaining(self):
+        # Tests for SPARK-16589
+        rdd = self.sc.parallelize(range(10), 2)
+        self.assertSetEqual(
+            set(rdd.cartesian(rdd).cartesian(rdd).collect()),
+            set([((x, y), z) for x in range(10) for y in range(10) for z in range(10)])
+        )
+
+        self.assertSetEqual(
+            set(rdd.cartesian(rdd.cartesian(rdd)).collect()),
+            set([(x, (y, z)) for x in range(10) for y in range(10) for z in range(10)])
+        )
+
+        self.assertSetEqual(
+            set(rdd.cartesian(rdd.zip(rdd)).collect()),
+            set([(x, (y, y)) for x in range(10) for y in range(10)])
+        )
+
     def test_deleting_input_files(self):
         # Regression test for SPARK-1025
         tempFile = tempfile.NamedTemporaryFile(delete=False)

From d69df9073274f7ab3a3598bb182a3233fd7775cd Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Thu, 8 Dec 2016 11:29:31 -0800
Subject: [PATCH 0315/1204] [SPARK-18590][SPARKR] build R source package when
 making distribution

This PR has 2 key changes. One, we are building source package (aka bundle package) for SparkR which could be released on CRAN. Two, we should include in the official Spark binary distributions SparkR installed from this source package instead (which would have help/vignettes rds needed for those to work when the SparkR package is loaded in R, whereas earlier approach with devtools does not)

But, because of various differences in how R performs different tasks, this PR is a fair bit more complicated. More details below.

This PR also includes a few minor fixes.

These are the additional steps in make-distribution; please see [here](https://github.com/apache/spark/blob/master/R/CRAN_RELEASE.md) on what's going to a CRAN release, which is now run during make-distribution.sh.
1. package needs to be installed because the first code block in vignettes is `library(SparkR)` without lib path
2. `R CMD build` will build vignettes (this process runs Spark/SparkR code and captures outputs into pdf documentation)
3. `R CMD check` on the source package will install package and build vignettes again (this time from source packaged) - this is a key step required to release R package on CRAN
 (will skip tests here but tests will need to pass for CRAN release process to success - ideally, during release signoff we should install from the R source package and run tests)
4. `R CMD Install` on the source package (this is the only way to generate doc/vignettes rds files correctly, not in step # 1)
 (the output of this step is what we package into Spark dist and sparkr.zip)

Alternatively,
   R CMD build should already be installing the package in a temp directory though it might just be finding this location and set it to lib.loc parameter; another approach is perhaps we could try calling `R CMD INSTALL --build pkg` instead.
 But in any case, despite installing the package multiple times this is relatively fast.
Building vignettes takes a while though.

Manually, CI.

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #16014 from felixcheung/rdist.

(cherry picked from commit c3d3a9d0e85b834abef87069e4edd27db87fc607)
Signed-off-by: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
---
 R/CRAN_RELEASE.md                   |  2 +-
 R/check-cran.sh                     | 19 ++++++++++++++++++-
 R/install-dev.sh                    |  2 +-
 R/pkg/.Rbuildignore                 |  3 +++
 R/pkg/DESCRIPTION                   | 13 ++++++-------
 R/pkg/NAMESPACE                     |  2 +-
 dev/create-release/release-build.sh | 27 +++++++++++++++++++++++----
 dev/make-distribution.sh            | 25 +++++++++++++++++++++----
 8 files changed, 74 insertions(+), 19 deletions(-)

diff --git a/R/CRAN_RELEASE.md b/R/CRAN_RELEASE.md
index bea8f9fbe4eec..d6084c7a7cc90 100644
--- a/R/CRAN_RELEASE.md
+++ b/R/CRAN_RELEASE.md
@@ -7,7 +7,7 @@ To release SparkR as a package to CRAN, we would use the `devtools` package. Ple
 
 First, check that the `Version:` field in the `pkg/DESCRIPTION` file is updated. Also, check for stale files not under source control.
 
-Note that while `check-cran.sh` is running `R CMD check`, it is doing so with `--no-manual --no-vignettes`, which skips a few vignettes or PDF checks - therefore it will be preferred to run `R CMD check` on the source package built manually before uploading a release.
+Note that while `run-tests.sh` runs `check-cran.sh` (which runs `R CMD check`), it is doing so with `--no-manual --no-vignettes`, which skips a few vignettes or PDF checks - therefore it will be preferred to run `R CMD check` on the source package built manually before uploading a release. Also note that for CRAN checks for pdf vignettes to success, `qpdf` tool must be there (to install it, eg. `yum -q -y install qpdf`).
 
 To upload a release, we would need to update the `cran-comments.md`. This should generally contain the results from running the `check-cran.sh` script along with comments on status of all `WARNING` (should not be any) or `NOTE`. As a part of `check-cran.sh` and the release process, the vignettes is build - make sure `SPARK_HOME` is set and Spark jars are accessible.
 
diff --git a/R/check-cran.sh b/R/check-cran.sh
index c5f042848c90c..1288e7fc9fb4c 100755
--- a/R/check-cran.sh
+++ b/R/check-cran.sh
@@ -34,8 +34,9 @@ if [ ! -z "$R_HOME" ]
     fi
     R_SCRIPT_PATH="$(dirname $(which R))"
 fi
-echo "USING R_HOME = $R_HOME"
+echo "Using R_SCRIPT_PATH = ${R_SCRIPT_PATH}"
 
+# Install the package (this is required for code in vignettes to run when building it later)
 # Build the latest docs, but not vignettes, which is built with the package next
 $FWDIR/create-docs.sh
 
@@ -82,4 +83,20 @@ else
   # This will run tests and/or build vignettes, and require SPARK_HOME
   SPARK_HOME="${SPARK_HOME}" "$R_SCRIPT_PATH/"R CMD check $CRAN_CHECK_OPTIONS SparkR_"$VERSION".tar.gz
 fi
+
+# Install source package to get it to generate vignettes rds files, etc.
+if [ -n "$CLEAN_INSTALL" ]
+then
+  echo "Removing lib path and installing from source package"
+  LIB_DIR="$FWDIR/lib"
+  rm -rf $LIB_DIR
+  mkdir -p $LIB_DIR
+  "$R_SCRIPT_PATH/"R CMD INSTALL SparkR_"$VERSION".tar.gz --library=$LIB_DIR
+
+  # Zip the SparkR package so that it can be distributed to worker nodes on YARN
+  pushd $LIB_DIR > /dev/null
+  jar cfM "$LIB_DIR/sparkr.zip" SparkR
+  popd > /dev/null
+fi
+
 popd > /dev/null
diff --git a/R/install-dev.sh b/R/install-dev.sh
index ada6303a722b7..0f881208bcadb 100755
--- a/R/install-dev.sh
+++ b/R/install-dev.sh
@@ -46,7 +46,7 @@ if [ ! -z "$R_HOME" ]
     fi
     R_SCRIPT_PATH="$(dirname $(which R))"
 fi
-echo "USING R_HOME = $R_HOME"
+echo "Using R_SCRIPT_PATH = ${R_SCRIPT_PATH}"
 
 # Generate Rd files if devtools is installed
 "$R_SCRIPT_PATH/"Rscript -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg="./pkg", roclets=c("rd")) }'
diff --git a/R/pkg/.Rbuildignore b/R/pkg/.Rbuildignore
index 544d203a6dce6..f12f8c275a989 100644
--- a/R/pkg/.Rbuildignore
+++ b/R/pkg/.Rbuildignore
@@ -1,5 +1,8 @@
 ^.*\.Rproj$
 ^\.Rproj\.user$
 ^\.lintr$
+^cran-comments\.md$
+^NEWS\.md$
+^README\.Rmd$
 ^src-native$
 ^html$
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 46fb178112802..0cb3a80a6e892 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: SparkR
 Type: Package
+Version: 2.1.0
 Title: R Frontend for Apache Spark
-Version: 2.1.1
-Date: 2016-11-06
+Description: The SparkR package provides an R Frontend for Apache Spark.
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
                     email = "shivaram@cs.berkeley.edu"),
              person("Xiangrui", "Meng", role = "aut",
@@ -10,19 +10,18 @@ Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
              person("Felix", "Cheung", role = "aut",
                     email = "felixcheung@apache.org"),
              person(family = "The Apache Software Foundation", role = c("aut", "cph")))
+License: Apache License (== 2.0)
 URL: http://www.apache.org/ http://spark.apache.org/
 BugReports: http://spark.apache.org/contributing.html
 Depends:
     R (>= 3.0),
     methods
 Suggests:
+    knitr,
+    rmarkdown,
     testthat,
     e1071,
-    survival,
-    knitr,
-    rmarkdown
-Description: The SparkR package provides an R frontend for Apache Spark.
-License: Apache License (== 2.0)
+    survival
 Collate:
     'schema.R'
     'generics.R'
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index daee09de88263..377f9429ae5c1 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -3,7 +3,7 @@
 importFrom("methods", "setGeneric", "setMethod", "setOldClass")
 importFrom("methods", "is", "new", "signature", "show")
 importFrom("stats", "gaussian", "setNames")
-importFrom("utils", "download.file", "object.size", "packageVersion", "untar")
+importFrom("utils", "download.file", "object.size", "packageVersion", "tail", "untar")
 
 # Disable native libraries till we figure out how to package it
 # See SPARKR-7839
diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index aa42750f26679..8863ee6cd7923 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -150,7 +150,7 @@ if [[ "$1" == "package" ]]; then
     NAME=$1
     FLAGS=$2
     ZINC_PORT=$3
-    BUILD_PIP_PACKAGE=$4
+    BUILD_PACKAGE=$4
     cp -r spark spark-$SPARK_VERSION-bin-$NAME
 
     cd spark-$SPARK_VERSION-bin-$NAME
@@ -172,11 +172,30 @@ if [[ "$1" == "package" ]]; then
     MVN_HOME=`$MVN -version 2>&1 | grep 'Maven home' | awk '{print $NF}'`
 
 
-    if [ -z "$BUILD_PIP_PACKAGE" ]; then
-      echo "Creating distribution without PIP package"
+    if [ -z "$BUILD_PACKAGE" ]; then
+      echo "Creating distribution without PIP/R package"
       ./dev/make-distribution.sh --name $NAME --mvn $MVN_HOME/bin/mvn --tgz $FLAGS \
         -DzincPort=$ZINC_PORT 2>&1 >  ../binary-release-$NAME.log
       cd ..
+    elif [[ "$BUILD_PACKAGE" == "withr" ]]; then
+      echo "Creating distribution with R package"
+      ./dev/make-distribution.sh --name $NAME --mvn $MVN_HOME/bin/mvn --tgz --r $FLAGS \
+        -DzincPort=$ZINC_PORT 2>&1 >  ../binary-release-$NAME.log
+      cd ..
+
+      echo "Copying and signing R source package"
+      R_DIST_NAME=SparkR_$SPARK_VERSION.tar.gz
+      cp spark-$SPARK_VERSION-bin-$NAME/R/$R_DIST_NAME .
+
+      echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --armour \
+        --output $R_DIST_NAME.asc \
+        --detach-sig $R_DIST_NAME
+      echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \
+        MD5 $R_DIST_NAME > \
+        $R_DIST_NAME.md5
+      echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \
+        SHA512 $R_DIST_NAME > \
+        $R_DIST_NAME.sha
     else
       echo "Creating distribution with PIP package"
       ./dev/make-distribution.sh --name $NAME --mvn $MVN_HOME/bin/mvn --tgz --pip $FLAGS \
@@ -222,7 +241,7 @@ if [[ "$1" == "package" ]]; then
   make_binary_release "hadoop2.6" "-Phadoop-2.6 $FLAGS" "3035" &
   make_binary_release "hadoop2.7" "-Phadoop-2.7 $FLAGS" "3036" "withpip" &
   make_binary_release "hadoop2.4-without-hive" "-Psparkr -Phadoop-2.4 -Pyarn -Pmesos" "3037" &
-  make_binary_release "without-hadoop" "-Psparkr -Phadoop-provided -Pyarn -Pmesos" "3038" &
+  make_binary_release "without-hadoop" "-Psparkr -Phadoop-provided -Pyarn -Pmesos" "3038" "withr" &
   wait
   rm -rf spark-$SPARK_VERSION-bin-*/
 
diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
index 49b46fbc3fb27..fe281bbaa2023 100755
--- a/dev/make-distribution.sh
+++ b/dev/make-distribution.sh
@@ -34,6 +34,7 @@ DISTDIR="$SPARK_HOME/dist"
 
 MAKE_TGZ=false
 MAKE_PIP=false
+MAKE_R=false
 NAME=none
 MVN="$SPARK_HOME/build/mvn"
 
@@ -41,7 +42,7 @@ function exit_with_usage {
   echo "make-distribution.sh - tool for making binary distributions of Spark"
   echo ""
   echo "usage:"
-  cl_options="[--name] [--tgz] [--pip] [--mvn <mvn-command>]"
+  cl_options="[--name] [--tgz] [--pip] [--r] [--mvn <mvn-command>]"
   echo "make-distribution.sh $cl_options <maven build options>"
   echo "See Spark's \"Building Spark\" doc for correct Maven options."
   echo ""
@@ -71,6 +72,9 @@ while (( "$#" )); do
     --pip)
       MAKE_PIP=true
       ;;
+    --r)
+      MAKE_R=true
+      ;;
     --mvn)
       MVN="$2"
       shift
@@ -208,11 +212,24 @@ cp -r "$SPARK_HOME/data" "$DISTDIR"
 # Make pip package
 if [ "$MAKE_PIP" == "true" ]; then
   echo "Building python distribution package"
-  cd $SPARK_HOME/python
+  pushd "$SPARK_HOME/python" > /dev/null
   python setup.py sdist
-  cd ..
+  popd > /dev/null
+else
+  echo "Skipping building python distribution package"
+fi
+
+# Make R package - this is used for both CRAN release and packing R layout into distribution
+if [ "$MAKE_R" == "true" ]; then
+  echo "Building R source package"
+  pushd "$SPARK_HOME/R" > /dev/null
+  # Build source package and run full checks
+  # Install source package to get it to generate vignettes, etc.
+  # Do not source the check-cran.sh - it should be run from where it is for it to set SPARK_HOME
+  NO_TESTS=1 CLEAN_INSTALL=1 "$SPARK_HOME/"R/check-cran.sh
+  popd > /dev/null
 else
-  echo "Skipping creating pip installable PySpark"
+  echo "Skipping building R source package"
 fi
 
 # Copy other things

From a035644182646a2160ac16ecd6c7f4d98be2caad Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Thu, 8 Dec 2016 11:54:04 -0800
Subject: [PATCH 0316/1204] [SPARK-18751][CORE] Fix deadlock when
 SparkContext.stop is called in Utils.tryOrStopSparkContext

## What changes were proposed in this pull request?

When `SparkContext.stop` is called in `Utils.tryOrStopSparkContext` (the following three places), it will cause deadlock because the `stop` method needs to wait for the thread running `stop` to exit.

- ContextCleaner.keepCleaning
- LiveListenerBus.listenerThread.run
- TaskSchedulerImpl.start

This PR adds `SparkContext.stopInNewThread` and uses it to eliminate the potential deadlock. I also removed my changes in #15775 since they are not necessary now.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16178 from zsxwing/fix-stop-deadlock.

(cherry picked from commit 26432df9cc6ffe569583aa628c6ecd7050b38316)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../scala/org/apache/spark/SparkContext.scala | 35 +++++++++++--------
 .../scala/org/apache/spark/rpc/RpcEnv.scala   |  5 ---
 .../apache/spark/rpc/netty/Dispatcher.scala   |  1 -
 .../apache/spark/rpc/netty/NettyRpcEnv.scala  |  5 ---
 .../apache/spark/scheduler/DAGScheduler.scala |  2 +-
 .../cluster/StandaloneSchedulerBackend.scala  |  2 +-
 .../scala/org/apache/spark/util/Utils.scala   |  2 +-
 .../org/apache/spark/rpc/RpcEnvSuite.scala    | 13 -------
 8 files changed, 23 insertions(+), 42 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index b8414b5d099c5..8f8392fa646de 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1757,25 +1757,30 @@ class SparkContext(config: SparkConf) extends Logging {
   def listJars(): Seq[String] = addedJars.keySet.toSeq
 
   /**
-   * Shut down the SparkContext.
+   * When stopping SparkContext inside Spark components, it's easy to cause dead-lock since Spark
+   * may wait for some internal threads to finish. It's better to use this method to stop
+   * SparkContext instead.
    */
-  def stop(): Unit = {
-    if (env.rpcEnv.isInRPCThread) {
-      // `stop` will block until all RPC threads exit, so we cannot call stop inside a RPC thread.
-      // We should launch a new thread to call `stop` to avoid dead-lock.
-      new Thread("stop-spark-context") {
-        setDaemon(true)
-
-        override def run(): Unit = {
-          _stop()
+  private[spark] def stopInNewThread(): Unit = {
+    new Thread("stop-spark-context") {
+      setDaemon(true)
+
+      override def run(): Unit = {
+        try {
+          SparkContext.this.stop()
+        } catch {
+          case e: Throwable =>
+            logError(e.getMessage, e)
+            throw e
         }
-      }.start()
-    } else {
-      _stop()
-    }
+      }
+    }.start()
   }
 
-  private def _stop() {
+  /**
+   * Shut down the SparkContext.
+   */
+  def stop(): Unit = {
     if (LiveListenerBus.withinListenerThread.value) {
       throw new SparkException(
         s"Cannot stop SparkContext within listener thread of ${LiveListenerBus.name}")
diff --git a/core/src/main/scala/org/apache/spark/rpc/RpcEnv.scala b/core/src/main/scala/org/apache/spark/rpc/RpcEnv.scala
index bbc416381490b..530743c03640b 100644
--- a/core/src/main/scala/org/apache/spark/rpc/RpcEnv.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/RpcEnv.scala
@@ -146,11 +146,6 @@ private[spark] abstract class RpcEnv(conf: SparkConf) {
    * @param uri URI with location of the file.
    */
   def openChannel(uri: String): ReadableByteChannel
-
-  /**
-   * Return if the current thread is a RPC thread.
-   */
-  def isInRPCThread: Boolean
 }
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala b/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala
index 67baabd2cbff2..a02cf30a5d831 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala
@@ -201,7 +201,6 @@ private[netty] class Dispatcher(nettyEnv: NettyRpcEnv) extends Logging {
   /** Message loop used for dispatching messages. */
   private class MessageLoop extends Runnable {
     override def run(): Unit = {
-      NettyRpcEnv.rpcThreadFlag.value = true
       try {
         while (true) {
           try {
diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala b/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
index 0b8cd144a2161..e56943da1303a 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
@@ -407,14 +407,9 @@ private[netty] class NettyRpcEnv(
     }
 
   }
-
-  override def isInRPCThread: Boolean = NettyRpcEnv.rpcThreadFlag.value
 }
 
 private[netty] object NettyRpcEnv extends Logging {
-
-  private[netty] val rpcThreadFlag = new DynamicVariable[Boolean](false)
-
   /**
    * When deserializing the [[NettyRpcEndpointRef]], it needs a reference to [[NettyRpcEnv]].
    * Use `currentEnv` to wrap the deserialization codes. E.g.,
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index f2517401cb76b..01a95c06fc69c 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -1660,7 +1660,7 @@ private[scheduler] class DAGSchedulerEventProcessLoop(dagScheduler: DAGScheduler
     } catch {
       case t: Throwable => logError("DAGScheduler failed to cancel all jobs.", t)
     }
-    dagScheduler.sc.stop()
+    dagScheduler.sc.stopInNewThread()
   }
 
   override def onStop(): Unit = {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
index 04d40e2907cff..4a9af80f4537b 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
@@ -139,7 +139,7 @@ private[spark] class StandaloneSchedulerBackend(
         scheduler.error(reason)
       } finally {
         // Ensure the application terminates, as we can no longer run jobs.
-        sc.stop()
+        sc.stopInNewThread()
       }
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index ded3416299e9a..071515134503f 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -1249,7 +1249,7 @@ private[spark] object Utils extends Logging {
         val currentThreadName = Thread.currentThread().getName
         if (sc != null) {
           logError(s"uncaught error in thread $currentThreadName, stopping SparkContext", t)
-          sc.stop()
+          sc.stopInNewThread()
         }
         if (!NonFatal(t)) {
           logError(s"throw uncaught fatal error in thread $currentThreadName", t)
diff --git a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
index aa0705987d837..acdf21df9a161 100644
--- a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
@@ -870,19 +870,6 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
     verify(endpoint, never()).onDisconnected(any())
     verify(endpoint, never()).onNetworkError(any(), any())
   }
-
-  test("isInRPCThread") {
-    val rpcEndpointRef = env.setupEndpoint("isInRPCThread", new RpcEndpoint {
-      override val rpcEnv = env
-
-      override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
-        case m => context.reply(rpcEnv.isInRPCThread)
-      }
-    })
-    assert(rpcEndpointRef.askWithRetry[Boolean]("hello") === true)
-    assert(env.isInRPCThread === false)
-    env.stop(rpcEndpointRef)
-  }
 }
 
 class UnserializableClass

From 9483242f4c6cc13001e5a967810718b26beb2361 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Thu, 8 Dec 2016 12:52:05 -0800
Subject: [PATCH 0317/1204] [SPARK-18760][SQL] Consistent format specification
 for FileFormats

## What changes were proposed in this pull request?
This patch fixes the format specification in explain for file sources (Parquet and Text formats are the only two that are different from the rest):

Before:
```
scala> spark.read.text("test.text").explain()
== Physical Plan ==
*FileScan text [value#15] Batched: false, Format: org.apache.spark.sql.execution.datasources.text.TextFileFormatxyz, Location: InMemoryFileIndex[file:/scratch/rxin/spark/test.text], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<value:string>
```

After:
```
scala> spark.read.text("test.text").explain()
== Physical Plan ==
*FileScan text [value#15] Batched: false, Format: Text, Location: InMemoryFileIndex[file:/scratch/rxin/spark/test.text], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<value:string>
```

Also closes #14680.

## How was this patch tested?
Verified in spark-shell.

Author: Reynold Xin <rxin@databricks.com>

Closes #16187 from rxin/SPARK-18760.

(cherry picked from commit 5f894d23a54ea99f75f8b722e111e5270f7f80cf)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../execution/datasources/parquet/ParquetFileFormat.scala  | 2 +-
 .../sql/execution/datasources/text/TextFileFormat.scala    | 2 ++
 .../apache/spark/sql/streaming/FileStreamSourceSuite.scala | 7 ++++---
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index 031a0fe57893f..0965ffebea962 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -61,7 +61,7 @@ class ParquetFileFormat
 
   override def shortName(): String = "parquet"
 
-  override def toString: String = "ParquetFormat"
+  override def toString: String = "Parquet"
 
   override def hashCode(): Int = getClass.hashCode()
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
index 8e043960326df..3e890828e88be 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
@@ -43,6 +43,8 @@ class TextFileFormat extends TextBasedFileFormat with DataSourceRegister {
 
   override def shortName(): String = "text"
 
+  override def toString: String = "Text"
+
   private def verifySchema(schema: StructType): Unit = {
     if (schema.size != 1) {
       throw new AnalysisException(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index 7b6fe83b9a597..267c462484a32 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -31,7 +31,8 @@ import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
-class FileStreamSourceTest extends StreamTest with SharedSQLContext with PrivateMethodTester {
+abstract class FileStreamSourceTest
+  extends StreamTest with SharedSQLContext with PrivateMethodTester {
 
   import testImplicits._
 
@@ -848,13 +849,13 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
         val explainWithoutExtended = q.explainInternal(false)
         // `extended = false` only displays the physical plan.
         assert("Relation.*text".r.findAllMatchIn(explainWithoutExtended).size === 0)
-        assert("TextFileFormat".r.findAllMatchIn(explainWithoutExtended).size === 1)
+        assert(": Text".r.findAllMatchIn(explainWithoutExtended).size === 1)
 
         val explainWithExtended = q.explainInternal(true)
         // `extended = true` displays 3 logical plans (Parsed/Optimized/Optimized) and 1 physical
         // plan.
         assert("Relation.*text".r.findAllMatchIn(explainWithExtended).size === 3)
-        assert("TextFileFormat".r.findAllMatchIn(explainWithExtended).size === 1)
+        assert(": Text".r.findAllMatchIn(explainWithExtended).size === 1)
       } finally {
         q.stop()
       }

From e43209fe2a69fb239dff8bc1a18297d3696f0dcd Mon Sep 17 00:00:00 2001
From: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
Date: Thu, 8 Dec 2016 13:01:46 -0800
Subject: [PATCH 0318/1204] [SPARK-18590][SPARKR] Change the R source build to
 Hadoop 2.6

This PR changes the SparkR source release tarball to be built using the Hadoop 2.6 profile. Previously it was using the without hadoop profile which leads to an error as discussed in https://github.com/apache/spark/pull/16014#issuecomment-265843991

Author: Shivaram Venkataraman <shivaram@cs.berkeley.edu>

Closes #16218 from shivaram/fix-sparkr-release-build.

(cherry picked from commit 202fcd21ce01393fa6dfaa1c2126e18e9b85ee96)
Signed-off-by: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
---
 dev/create-release/release-build.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index 8863ee6cd7923..1b05b20a14b76 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -238,10 +238,10 @@ if [[ "$1" == "package" ]]; then
   FLAGS="-Psparkr -Phive -Phive-thriftserver -Pyarn -Pmesos"
   make_binary_release "hadoop2.3" "-Phadoop-2.3 $FLAGS" "3033" &
   make_binary_release "hadoop2.4" "-Phadoop-2.4 $FLAGS" "3034" &
-  make_binary_release "hadoop2.6" "-Phadoop-2.6 $FLAGS" "3035" &
+  make_binary_release "hadoop2.6" "-Phadoop-2.6 $FLAGS" "3035" "withr" &
   make_binary_release "hadoop2.7" "-Phadoop-2.7 $FLAGS" "3036" "withpip" &
   make_binary_release "hadoop2.4-without-hive" "-Psparkr -Phadoop-2.4 -Pyarn -Pmesos" "3037" &
-  make_binary_release "without-hadoop" "-Psparkr -Phadoop-provided -Pyarn -Pmesos" "3038" "withr" &
+  make_binary_release "without-hadoop" "-Psparkr -Phadoop-provided -Pyarn -Pmesos" "3038" &
   wait
   rm -rf spark-$SPARK_VERSION-bin-*/
 

From fcd22e5389a7dffda32be0e143d772f611a0f3d9 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Thu, 8 Dec 2016 17:53:34 -0800
Subject: [PATCH 0319/1204] [SPARK-18776][SS] Make Offset for FileStreamSource
 corrected formatted in json

## What changes were proposed in this pull request?

- Changed FileStreamSource to use new FileStreamSourceOffset rather than LongOffset. The field is named as `logOffset` to make it more clear that this is a offset in the file stream log.
- Fixed bug in FileStreamSourceLog, the field endId in the FileStreamSourceLog.get(startId, endId) was not being used at all. No test caught it earlier. Only my updated tests caught it.

Other minor changes
- Dont use batchId in the FileStreamSource, as calling it batch id is extremely miss leading. With multiple sources, it may happen that a new batch has no new data from a file source. So offset of FileStreamSource != batchId after that batch.

## How was this patch tested?

Updated unit test.

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #16205 from tdas/SPARK-18776.

(cherry picked from commit 458fa3325e5f8c21c50e406ac8059d6236f93a9c)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../sql/kafka010/KafkaSourceOffsetSuite.scala |  2 +-
 .../streaming/FileStreamSource.scala          | 32 ++++++-----
 .../streaming/FileStreamSourceLog.scala       |  2 +-
 .../streaming/FileStreamSourceOffset.scala    | 53 +++++++++++++++++++
 .../file-source-offset-version-2.1.0-json.txt |  1 +
 ...file-source-offset-version-2.1.0-long.txt} |  0
 .../offset-log-version-2.1.0/0                |  4 +-
 .../streaming/FileStreamSourceSuite.scala     |  2 +-
 .../streaming/OffsetSeqLogSuite.scala         |  2 +-
 .../sql/streaming/FileStreamSourceSuite.scala | 30 +++++++----
 10 files changed, 95 insertions(+), 33 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceOffset.scala
 create mode 100644 sql/core/src/test/resources/structured-streaming/file-source-offset-version-2.1.0-json.txt
 rename sql/core/src/test/resources/structured-streaming/{file-source-offset-version-2.1.0.txt => file-source-offset-version-2.1.0-long.txt} (100%)

diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceOffsetSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceOffsetSuite.scala
index 22668fd6faaa9..10b35c74f473e 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceOffsetSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceOffsetSuite.scala
@@ -90,7 +90,7 @@ class KafkaSourceOffsetSuite extends OffsetSuite with SharedSQLContext {
     }
   }
 
-  test("read Spark 2.1.0 log format") {
+  test("read Spark 2.1.0 offset format") {
     val offset = readFromResource("kafka-source-offset-version-2.1.0.txt")
     assert(KafkaSourceOffset(offset) ===
       KafkaSourceOffset(("topic1", 0, 456L), ("topic1", 1, 789L), ("topic2", 0, 0L)))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
index 8494aef004bb5..20e0dcef8ffda 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
@@ -57,7 +57,7 @@ class FileStreamSource(
 
   private val metadataLog =
     new FileStreamSourceLog(FileStreamSourceLog.VERSION, sparkSession, metadataPath)
-  private var maxBatchId = metadataLog.getLatest().map(_._1).getOrElse(-1L)
+  private var metadataLogCurrentOffset = metadataLog.getLatest().map(_._1).getOrElse(-1L)
 
   /** Maximum number of new files to be considered in each batch */
   private val maxFilesPerBatch = sourceOptions.maxFilesPerTrigger
@@ -79,7 +79,7 @@ class FileStreamSource(
    * `synchronized` on this method is for solving race conditions in tests. In the normal usage,
    * there is no race here, so the cost of `synchronized` should be rare.
    */
-  private def fetchMaxOffset(): LongOffset = synchronized {
+  private def fetchMaxOffset(): FileStreamSourceOffset = synchronized {
     // All the new files found - ignore aged files and files that we have seen.
     val newFiles = fetchAllFiles().filter {
       case (path, timestamp) => seenFiles.isNewFile(path, timestamp)
@@ -104,14 +104,14 @@ class FileStreamSource(
        """.stripMargin)
 
     if (batchFiles.nonEmpty) {
-      maxBatchId += 1
-      metadataLog.add(maxBatchId, batchFiles.map { case (path, timestamp) =>
-        FileEntry(path = path, timestamp = timestamp, batchId = maxBatchId)
+      metadataLogCurrentOffset += 1
+      metadataLog.add(metadataLogCurrentOffset, batchFiles.map { case (p, timestamp) =>
+        FileEntry(path = p, timestamp = timestamp, batchId = metadataLogCurrentOffset)
       }.toArray)
-      logInfo(s"Max batch id increased to $maxBatchId with ${batchFiles.size} new files")
+      logInfo(s"Log offset set to $metadataLogCurrentOffset with ${batchFiles.size} new files")
     }
 
-    new LongOffset(maxBatchId)
+    FileStreamSourceOffset(metadataLogCurrentOffset)
   }
 
   /**
@@ -122,21 +122,19 @@ class FileStreamSource(
     func
   }
 
-  /** Return the latest offset in the source */
-  def currentOffset: LongOffset = synchronized {
-    new LongOffset(maxBatchId)
-  }
+  /** Return the latest offset in the [[FileStreamSourceLog]] */
+  def currentLogOffset: Long = synchronized { metadataLogCurrentOffset }
 
   /**
    * Returns the data that is between the offsets (`start`, `end`].
    */
   override def getBatch(start: Option[Offset], end: Offset): DataFrame = {
-    val startId = start.flatMap(LongOffset.convert(_)).getOrElse(LongOffset(-1L)).offset
-    val endId = LongOffset.convert(end).getOrElse(LongOffset(0)).offset
+    val startOffset = start.map(FileStreamSourceOffset(_).logOffset).getOrElse(-1L)
+    val endOffset = FileStreamSourceOffset(end).logOffset
 
-    assert(startId <= endId)
-    val files = metadataLog.get(Some(startId + 1), Some(endId)).flatMap(_._2)
-    logInfo(s"Processing ${files.length} files from ${startId + 1}:$endId")
+    assert(startOffset <= endOffset)
+    val files = metadataLog.get(Some(startOffset + 1), Some(endOffset)).flatMap(_._2)
+    logInfo(s"Processing ${files.length} files from ${startOffset + 1}:$endOffset")
     logTrace(s"Files are:\n\t" + files.mkString("\n\t"))
     val newDataSource =
       DataSource(
@@ -172,7 +170,7 @@ class FileStreamSource(
     files
   }
 
-  override def getOffset: Option[Offset] = Some(fetchMaxOffset()).filterNot(_.offset == -1)
+  override def getOffset: Option[Offset] = Some(fetchMaxOffset()).filterNot(_.logOffset == -1)
 
   override def toString: String = s"FileStreamSource[$qualifiedBasePath]"
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala
index 327b3ac267766..81908c0cefdfa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala
@@ -78,7 +78,7 @@ class FileStreamSourceLog(
 
   override def get(startId: Option[Long], endId: Option[Long]): Array[(Long, Array[FileEntry])] = {
     val startBatchId = startId.getOrElse(0L)
-    val endBatchId = getLatest().map(_._1).getOrElse(0L)
+    val endBatchId = endId.orElse(getLatest().map(_._1)).getOrElse(0L)
 
     val (existedBatches, removedBatches) = (startBatchId to endBatchId).map { id =>
       if (isCompactionBatch(id, compactInterval) && fileEntryCache.containsKey(id)) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceOffset.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceOffset.scala
new file mode 100644
index 0000000000000..06d0fe6c18c1e
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceOffset.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming
+
+import scala.util.control.Exception._
+
+import org.json4s.NoTypeHints
+import org.json4s.jackson.Serialization
+
+/**
+ * Offset for the [[FileStreamSource]].
+ * @param logOffset  Position in the [[FileStreamSourceLog]]
+ */
+case class FileStreamSourceOffset(logOffset: Long) extends Offset {
+  override def json: String = {
+    Serialization.write(this)(FileStreamSourceOffset.format)
+  }
+}
+
+object FileStreamSourceOffset {
+  implicit val format = Serialization.formats(NoTypeHints)
+
+  def apply(offset: Offset): FileStreamSourceOffset = {
+    offset match {
+      case f: FileStreamSourceOffset => f
+      case SerializedOffset(str) =>
+        catching(classOf[NumberFormatException]).opt {
+          FileStreamSourceOffset(str.toLong)
+        }.getOrElse {
+          Serialization.read[FileStreamSourceOffset](str)
+        }
+      case _ =>
+        throw new IllegalArgumentException(
+          s"Invalid conversion from offset of ${offset.getClass} to FileStreamSourceOffset")
+    }
+  }
+}
+
diff --git a/sql/core/src/test/resources/structured-streaming/file-source-offset-version-2.1.0-json.txt b/sql/core/src/test/resources/structured-streaming/file-source-offset-version-2.1.0-json.txt
new file mode 100644
index 0000000000000..e266a47368e1c
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/file-source-offset-version-2.1.0-json.txt
@@ -0,0 +1 @@
+{"logOffset":345}
diff --git a/sql/core/src/test/resources/structured-streaming/file-source-offset-version-2.1.0.txt b/sql/core/src/test/resources/structured-streaming/file-source-offset-version-2.1.0-long.txt
similarity index 100%
rename from sql/core/src/test/resources/structured-streaming/file-source-offset-version-2.1.0.txt
rename to sql/core/src/test/resources/structured-streaming/file-source-offset-version-2.1.0-long.txt
diff --git a/sql/core/src/test/resources/structured-streaming/offset-log-version-2.1.0/0 b/sql/core/src/test/resources/structured-streaming/offset-log-version-2.1.0/0
index fe5c1d44a6e26..988a98a7587d4 100644
--- a/sql/core/src/test/resources/structured-streaming/offset-log-version-2.1.0/0
+++ b/sql/core/src/test/resources/structured-streaming/offset-log-version-2.1.0/0
@@ -1,4 +1,4 @@
 v1
 {"batchWatermarkMs":0,"batchTimestampMs":1480981499528}
-0
-{"topic-0":{"0":1}}
\ No newline at end of file
+{"logOffset":345}
+{"topic-0":{"0":1}}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceSuite.scala
index 4a47c04d3f084..40d0643ba8771 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceSuite.scala
@@ -97,7 +97,7 @@ class FileStreamSourceSuite extends SparkFunSuite with SharedSQLContext {
       val newSource = new FileStreamSource(spark, s"$scheme:///", "parquet", StructType(Nil), Nil,
         dir.getAbsolutePath, Map.empty)
       // this method should throw an exception if `fs.exists` is called during resolveRelation
-      newSource.getBatch(None, LongOffset(1))
+      newSource.getBatch(None, FileStreamSourceOffset(1))
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala
index d139efaaf824f..bb4274a162e8e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala
@@ -74,7 +74,7 @@ class OffsetSeqLogSuite extends SparkFunSuite with SharedSQLContext {
     val (batchId, offsetSeq) = readFromResource("offset-log-version-2.1.0")
     assert(batchId === 0)
     assert(offsetSeq.offsets === Seq(
-      Some(SerializedOffset("0")),
+      Some(SerializedOffset("""{"logOffset":345}""")),
       Some(SerializedOffset("""{"topic-0":{"0":1}}"""))
     ))
     assert(offsetSeq.metadata === Some(OffsetSeqMetadata(0L, 1480981499528L)))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index 267c462484a32..bcb68520407bc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -61,7 +61,7 @@ abstract class FileStreamSourceTest
       val source = sources.head
       val newOffset = source.withBatchingLocked {
         addData(source)
-        source.currentOffset + 1
+        new FileStreamSourceOffset(source.currentLogOffset + 1)
       }
       logInfo(s"Added file to $source at offset $newOffset")
       (source, newOffset)
@@ -987,12 +987,17 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
             val _sources = PrivateMethod[Seq[Source]]('sources)
             val fileSource =
               (execution invokePrivate _sources()).head.asInstanceOf[FileStreamSource]
-            assert(fileSource.getBatch(None, LongOffset(2)).as[String].collect() ===
-              List("keep1", "keep2", "keep3"))
-            assert(fileSource.getBatch(Some(LongOffset(0)), LongOffset(2)).as[String].collect() ===
-              List("keep2", "keep3"))
-            assert(fileSource.getBatch(Some(LongOffset(1)), LongOffset(2)).as[String].collect() ===
-              List("keep3"))
+
+            def verify(startId: Option[Int], endId: Int, expected: String*): Unit = {
+              val start = startId.map(new FileStreamSourceOffset(_))
+              val end = FileStreamSourceOffset(endId)
+              assert(fileSource.getBatch(start, end).as[String].collect().toSeq === expected)
+            }
+
+            verify(startId = None, endId = 2, "keep1", "keep2", "keep3")
+            verify(startId = Some(0), endId = 1, "keep2")
+            verify(startId = Some(0), endId = 2, "keep2", "keep3")
+            verify(startId = Some(1), endId = 2, "keep3")
             true
           }
         )
@@ -1023,9 +1028,14 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
     assert(options.maxFilesPerTrigger == Some(1))
   }
 
-  test("FileStreamSource offset - read Spark 2.1.0 log format") {
-    val offset = readOffsetFromResource("file-source-offset-version-2.1.0.txt")
-    assert(LongOffset.convert(offset) === Some(LongOffset(345)))
+  test("FileStreamSource offset - read Spark 2.1.0 offset json format") {
+    val offset = readOffsetFromResource("file-source-offset-version-2.1.0-json.txt")
+    assert(FileStreamSourceOffset(offset) === FileStreamSourceOffset(345))
+  }
+
+  test("FileStreamSource offset - read Spark 2.1.0 offset long format") {
+    val offset = readOffsetFromResource("file-source-offset-version-2.1.0-long.txt")
+    assert(FileStreamSourceOffset(offset) === FileStreamSourceOffset(345))
   }
 
   test("FileStreamSourceLog - read Spark 2.1.0 log format") {

From 1cafc76ea1e9eef40b24060d1cd7c4aaf9f16a49 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Thu, 8 Dec 2016 17:58:44 -0800
Subject: [PATCH 0320/1204] [SPARK-18774][CORE][SQL] Ignore non-existing files
 when ignoreCorruptFiles is enabled (branch 2.1)

## What changes were proposed in this pull request?

Backport #16203 to branch 2.1.

## How was this patch tested?

Jennkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16216 from zsxwing/SPARK-18774-2.1.
---
 .../spark/internal/config/package.scala       |  3 +-
 .../org/apache/spark/rdd/HadoopRDD.scala      | 30 +++++++----
 .../org/apache/spark/rdd/NewHadoopRDD.scala   | 50 ++++++++++++-------
 .../execution/datasources/FileScanRDD.scala   |  3 ++
 .../apache/spark/sql/internal/SQLConf.scala   |  3 +-
 5 files changed, 57 insertions(+), 32 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 4a3e3d5c79eff..8ce9883ac5531 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -203,7 +203,8 @@ package object config {
 
   private[spark] val IGNORE_CORRUPT_FILES = ConfigBuilder("spark.files.ignoreCorruptFiles")
     .doc("Whether to ignore corrupt files. If true, the Spark jobs will continue to run when " +
-      "encountering corrupt files and contents that have been read will still be returned.")
+      "encountering corrupted or non-existing files and contents that have been read will still " +
+      "be returned.")
     .booleanConf
     .createWithDefault(false)
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index 3133a28755884..b56ebf4df06e9 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -210,12 +210,12 @@ class HadoopRDD[K, V](
   override def compute(theSplit: Partition, context: TaskContext): InterruptibleIterator[(K, V)] = {
     val iter = new NextIterator[(K, V)] {
 
-      val split = theSplit.asInstanceOf[HadoopPartition]
+      private val split = theSplit.asInstanceOf[HadoopPartition]
       logInfo("Input split: " + split.inputSplit)
-      val jobConf = getJobConf()
+      private val jobConf = getJobConf()
 
-      val inputMetrics = context.taskMetrics().inputMetrics
-      val existingBytesRead = inputMetrics.bytesRead
+      private val inputMetrics = context.taskMetrics().inputMetrics
+      private val existingBytesRead = inputMetrics.bytesRead
 
       // Sets the thread local variable for the file's name
       split.inputSplit.value match {
@@ -225,7 +225,7 @@ class HadoopRDD[K, V](
 
       // Find a function that will return the FileSystem bytes read by this thread. Do this before
       // creating RecordReader, because RecordReader's constructor might read some bytes
-      val getBytesReadCallback: Option[() => Long] = split.inputSplit.value match {
+      private val getBytesReadCallback: Option[() => Long] = split.inputSplit.value match {
         case _: FileSplit | _: CombineFileSplit =>
           SparkHadoopUtil.get.getFSBytesReadOnThreadCallback()
         case _ => None
@@ -235,23 +235,31 @@ class HadoopRDD[K, V](
       // If we do a coalesce, however, we are likely to compute multiple partitions in the same
       // task and in the same thread, in which case we need to avoid override values written by
       // previous partitions (SPARK-13071).
-      def updateBytesRead(): Unit = {
+      private def updateBytesRead(): Unit = {
         getBytesReadCallback.foreach { getBytesRead =>
           inputMetrics.setBytesRead(existingBytesRead + getBytesRead())
         }
       }
 
-      var reader: RecordReader[K, V] = null
-      val inputFormat = getInputFormat(jobConf)
+      private var reader: RecordReader[K, V] = null
+      private val inputFormat = getInputFormat(jobConf)
       HadoopRDD.addLocalConfiguration(
         new SimpleDateFormat("yyyyMMddHHmmss", Locale.US).format(createTime),
         context.stageId, theSplit.index, context.attemptNumber, jobConf)
-      reader = inputFormat.getRecordReader(split.inputSplit.value, jobConf, Reporter.NULL)
 
+      reader =
+        try {
+          inputFormat.getRecordReader(split.inputSplit.value, jobConf, Reporter.NULL)
+        } catch {
+          case e: IOException if ignoreCorruptFiles =>
+            logWarning(s"Skipped the rest content in the corrupted file: ${split.inputSplit}", e)
+            finished = true
+            null
+        }
       // Register an on-task-completion callback to close the input stream.
       context.addTaskCompletionListener{ context => closeIfNeeded() }
-      val key: K = reader.createKey()
-      val value: V = reader.createValue()
+      private val key: K = if (reader == null) null.asInstanceOf[K] else reader.createKey()
+      private val value: V = if (reader == null) null.asInstanceOf[V] else reader.createValue()
 
       override def getNext(): (K, V) = {
         try {
diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
index c6ddb4b090928..6168d979032aa 100644
--- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
@@ -132,12 +132,12 @@ class NewHadoopRDD[K, V](
 
   override def compute(theSplit: Partition, context: TaskContext): InterruptibleIterator[(K, V)] = {
     val iter = new Iterator[(K, V)] {
-      val split = theSplit.asInstanceOf[NewHadoopPartition]
+      private val split = theSplit.asInstanceOf[NewHadoopPartition]
       logInfo("Input split: " + split.serializableHadoopSplit)
-      val conf = getConf
+      private val conf = getConf
 
-      val inputMetrics = context.taskMetrics().inputMetrics
-      val existingBytesRead = inputMetrics.bytesRead
+      private val inputMetrics = context.taskMetrics().inputMetrics
+      private val existingBytesRead = inputMetrics.bytesRead
 
       // Sets the thread local variable for the file's name
       split.serializableHadoopSplit.value match {
@@ -147,39 +147,51 @@ class NewHadoopRDD[K, V](
 
       // Find a function that will return the FileSystem bytes read by this thread. Do this before
       // creating RecordReader, because RecordReader's constructor might read some bytes
-      val getBytesReadCallback: Option[() => Long] = split.serializableHadoopSplit.value match {
-        case _: FileSplit | _: CombineFileSplit =>
-          SparkHadoopUtil.get.getFSBytesReadOnThreadCallback()
-        case _ => None
-      }
+      private val getBytesReadCallback: Option[() => Long] =
+        split.serializableHadoopSplit.value match {
+          case _: FileSplit | _: CombineFileSplit =>
+            SparkHadoopUtil.get.getFSBytesReadOnThreadCallback()
+          case _ => None
+        }
 
       // For Hadoop 2.5+, we get our input bytes from thread-local Hadoop FileSystem statistics.
       // If we do a coalesce, however, we are likely to compute multiple partitions in the same
       // task and in the same thread, in which case we need to avoid override values written by
       // previous partitions (SPARK-13071).
-      def updateBytesRead(): Unit = {
+      private def updateBytesRead(): Unit = {
         getBytesReadCallback.foreach { getBytesRead =>
           inputMetrics.setBytesRead(existingBytesRead + getBytesRead())
         }
       }
 
-      val format = inputFormatClass.newInstance
+      private val format = inputFormatClass.newInstance
       format match {
         case configurable: Configurable =>
           configurable.setConf(conf)
         case _ =>
       }
-      val attemptId = new TaskAttemptID(jobTrackerId, id, TaskType.MAP, split.index, 0)
-      val hadoopAttemptContext = new TaskAttemptContextImpl(conf, attemptId)
-      private var reader = format.createRecordReader(
-        split.serializableHadoopSplit.value, hadoopAttemptContext)
-      reader.initialize(split.serializableHadoopSplit.value, hadoopAttemptContext)
+      private val attemptId = new TaskAttemptID(jobTrackerId, id, TaskType.MAP, split.index, 0)
+      private val hadoopAttemptContext = new TaskAttemptContextImpl(conf, attemptId)
+      private var finished = false
+      private var reader =
+        try {
+          val _reader = format.createRecordReader(
+            split.serializableHadoopSplit.value, hadoopAttemptContext)
+          _reader.initialize(split.serializableHadoopSplit.value, hadoopAttemptContext)
+          _reader
+        } catch {
+          case e: IOException if ignoreCorruptFiles =>
+            logWarning(
+              s"Skipped the rest content in the corrupted file: ${split.serializableHadoopSplit}",
+              e)
+            finished = true
+            null
+        }
 
       // Register an on-task-completion callback to close the input stream.
       context.addTaskCompletionListener(context => close())
-      var havePair = false
-      var finished = false
-      var recordsSinceMetricsUpdate = 0
+      private var havePair = false
+      private var recordsSinceMetricsUpdate = 0
 
       override def hasNext: Boolean = {
         if (!finished && !havePair) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
index 237cdabb5f795..69338f7d96615 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
@@ -150,6 +150,9 @@ class FileScanRDD(
               currentIterator = readFunction(currentFile)
             }
           } catch {
+            case e: IOException if ignoreCorruptFiles =>
+              logWarning(s"Skipped the rest content in the corrupted file: $currentFile", e)
+              currentIterator = Iterator.empty
             case e: java.io.FileNotFoundException =>
               throw new java.io.FileNotFoundException(
                 e.getMessage + "\n" +
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 0280a3b87a3a9..809b267b884b6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -606,7 +606,8 @@ object SQLConf {
 
   val IGNORE_CORRUPT_FILES = SQLConfigBuilder("spark.sql.files.ignoreCorruptFiles")
     .doc("Whether to ignore corrupt files. If true, the Spark jobs will continue to run when " +
-      "encountering corrupt files and contents that have been read will still be returned.")
+      "encountering corrupted or non-existing and contents that have been read will still be " +
+      "returned.")
     .booleanConf
     .createWithDefault(false)
 

From ef5646b4c6792a96e85d1dd4bb3103ba8306949b Mon Sep 17 00:00:00 2001
From: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
Date: Thu, 8 Dec 2016 18:26:54 -0800
Subject: [PATCH 0321/1204] [SPARKR][PYSPARK] Fix R source package name to
 match Spark version. Remove pip tar.gz from distribution

## What changes were proposed in this pull request?

Fixes name of R source package so that the `cp` in release-build.sh works correctly.

Issue discussed in https://github.com/apache/spark/pull/16014#issuecomment-265867125

Author: Shivaram Venkataraman <shivaram@cs.berkeley.edu>

Closes #16221 from shivaram/fix-sparkr-release-build-name.

(cherry picked from commit 4ac8b20bf2f962d9b8b6b209468896758d49efe3)
Signed-off-by: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
---
 dev/make-distribution.sh | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
index fe281bbaa2023..4da7d573849f8 100755
--- a/dev/make-distribution.sh
+++ b/dev/make-distribution.sh
@@ -222,11 +222,14 @@ fi
 # Make R package - this is used for both CRAN release and packing R layout into distribution
 if [ "$MAKE_R" == "true" ]; then
   echo "Building R source package"
+  R_PACKAGE_VERSION=`grep Version $SPARK_HOME/R/pkg/DESCRIPTION | awk '{print $NF}'`
   pushd "$SPARK_HOME/R" > /dev/null
   # Build source package and run full checks
   # Install source package to get it to generate vignettes, etc.
   # Do not source the check-cran.sh - it should be run from where it is for it to set SPARK_HOME
   NO_TESTS=1 CLEAN_INSTALL=1 "$SPARK_HOME/"R/check-cran.sh
+  # Make a copy of R source package matching the Spark release version.
+  cp $SPARK_HOME/R/SparkR_"$R_PACKAGE_VERSION".tar.gz $SPARK_HOME/R/SparkR_"$VERSION".tar.gz
   popd > /dev/null
 else
   echo "Skipping building R source package"
@@ -238,6 +241,12 @@ cp "$SPARK_HOME"/conf/*.template "$DISTDIR"/conf
 cp "$SPARK_HOME/README.md" "$DISTDIR"
 cp -r "$SPARK_HOME/bin" "$DISTDIR"
 cp -r "$SPARK_HOME/python" "$DISTDIR"
+
+# Remove the python distribution from dist/ if we built it
+if [ "$MAKE_PIP" == "true" ]; then
+  rm -f $DISTDIR/python/dist/pyspark-*.tar.gz
+fi
+
 cp -r "$SPARK_HOME/sbin" "$DISTDIR"
 # Copy SparkR if it exists
 if [ -d "$SPARK_HOME"/R/lib/SparkR ]; then

From 4ceed95b43d0cd9665004865095a40926efcc289 Mon Sep 17 00:00:00 2001
From: "wm624@hotmail.com" <wm624@hotmail.com>
Date: Thu, 8 Dec 2016 22:08:19 -0800
Subject: [PATCH 0322/1204] [SPARK-18349][SPARKR] Update R API documentation on
 ml model summary

## What changes were proposed in this pull request?
In this PR, the document of `summary` method is improved in the format:

returns summary information of the fitted model, which is a list. The list includes .......

Since `summary` in R is mainly about the model, which is not the same as `summary` object on scala side, if there is one, the scala API doc is not pointed here.

In current document, some `return` have `.` and some don't have. `.` is added to missed ones.

Since spark.logit `summary` has a big refactoring, this PR doesn't include this one. It will be changed when the `spark.logit` PR is merged.

## How was this patch tested?

Manual build.

Author: wm624@hotmail.com <wm624@hotmail.com>

Closes #16150 from wangmiao1981/audit2.

(cherry picked from commit 86a96034ccb47c5bba2cd739d793240afcfc25f6)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/R/mllib.R                        | 147 ++++++++++++++-----------
 R/pkg/inst/tests/testthat/test_mllib.R |   2 +
 2 files changed, 86 insertions(+), 63 deletions(-)

diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 632e4add64572..5df843c2b9d5e 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -191,7 +191,7 @@ predict_internal <- function(object, newData) {
 #' @param regParam regularization parameter for L2 regularization.
 #' @param ... additional arguments passed to the method.
 #' @aliases spark.glm,SparkDataFrame,formula-method
-#' @return \code{spark.glm} returns a fitted generalized linear model
+#' @return \code{spark.glm} returns a fitted generalized linear model.
 #' @rdname spark.glm
 #' @name spark.glm
 #' @export
@@ -277,12 +277,12 @@ setMethod("glm", signature(formula = "formula", family = "ANY", data = "SparkDat
 #  Returns the summary of a model produced by glm() or spark.glm(), similarly to R's summary().
 
 #' @param object a fitted generalized linear model.
-#' @return \code{summary} returns a summary object of the fitted model, a list of components
-#'         including at least the coefficients matrix (which includes coefficients, standard error
-#'         of coefficients, t value and p value), null/residual deviance, null/residual degrees of
-#'         freedom, AIC and number of iterations IRLS takes. If there are collinear columns
-#'         in you data, the coefficients matrix only provides coefficients.
-#'
+#' @return \code{summary} returns summary information of the fitted model, which is a list.
+#'         The list of components includes at least the \code{coefficients} (coefficients matrix, which includes
+#'         coefficients, standard error of coefficients, t value and p value),
+#'         \code{null.deviance} (null/residual degrees of freedom), \code{aic} (AIC)
+#'         and \code{iter} (number of iterations IRLS takes). If there are collinear columns in the data,
+#'         the coefficients matrix only provides coefficients.
 #' @rdname spark.glm
 #' @export
 #' @note summary(GeneralizedLinearRegressionModel) since 2.0.0
@@ -328,7 +328,7 @@ setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"),
 #  Prints the summary of GeneralizedLinearRegressionModel
 
 #' @rdname spark.glm
-#' @param x summary object of fitted generalized linear model returned by \code{summary} function
+#' @param x summary object of fitted generalized linear model returned by \code{summary} function.
 #' @export
 #' @note print.summary.GeneralizedLinearRegressionModel since 2.0.0
 print.summary.GeneralizedLinearRegressionModel <- function(x, ...) {
@@ -361,7 +361,7 @@ print.summary.GeneralizedLinearRegressionModel <- function(x, ...) {
 
 #' @param newData a SparkDataFrame for testing.
 #' @return \code{predict} returns a SparkDataFrame containing predicted labels in a column named
-#'         "prediction"
+#'         "prediction".
 #' @rdname spark.glm
 #' @export
 #' @note predict(GeneralizedLinearRegressionModel) since 1.5.0
@@ -375,7 +375,7 @@ setMethod("predict", signature(object = "GeneralizedLinearRegressionModel"),
 
 #' @param newData a SparkDataFrame for testing.
 #' @return \code{predict} returns a SparkDataFrame containing predicted labeled in a column named
-#' "prediction"
+#' "prediction".
 #' @rdname spark.naiveBayes
 #' @export
 #' @note predict(NaiveBayesModel) since 2.0.0
@@ -387,8 +387,9 @@ setMethod("predict", signature(object = "NaiveBayesModel"),
 # Returns the summary of a naive Bayes model produced by \code{spark.naiveBayes}
 
 #' @param object a naive Bayes model fitted by \code{spark.naiveBayes}.
-#' @return \code{summary} returns a list containing \code{apriori}, the label distribution, and
-#'         \code{tables}, conditional probabilities given the target label.
+#' @return \code{summary} returns summary information of the fitted model, which is a list.
+#'         The list includes \code{apriori} (the label distribution) and
+#'         \code{tables} (conditional probabilities given the target label).
 #' @rdname spark.naiveBayes
 #' @export
 #' @note summary(NaiveBayesModel) since 2.0.0
@@ -409,9 +410,9 @@ setMethod("summary", signature(object = "NaiveBayesModel"),
 
 # Returns posterior probabilities from a Latent Dirichlet Allocation model produced by spark.lda()
 
-#' @param newData A SparkDataFrame for testing
+#' @param newData A SparkDataFrame for testing.
 #' @return \code{spark.posterior} returns a SparkDataFrame containing posterior probabilities
-#'         vectors named "topicDistribution"
+#'         vectors named "topicDistribution".
 #' @rdname spark.lda
 #' @aliases spark.posterior,LDAModel,SparkDataFrame-method
 #' @export
@@ -425,7 +426,8 @@ setMethod("spark.posterior", signature(object = "LDAModel", newData = "SparkData
 
 #' @param object A Latent Dirichlet Allocation model fitted by \code{spark.lda}.
 #' @param maxTermsPerTopic Maximum number of terms to collect for each topic. Default value of 10.
-#' @return \code{summary} returns a list containing
+#' @return \code{summary} returns summary information of the fitted model, which is a list.
+#'         The list includes
 #'         \item{\code{docConcentration}}{concentration parameter commonly named \code{alpha} for
 #'               the prior placed on documents distributions over topics \code{theta}}
 #'         \item{\code{topicConcentration}}{concentration parameter commonly named \code{beta} or
@@ -476,7 +478,7 @@ setMethod("spark.perplexity", signature(object = "LDAModel", data = "SparkDataFr
 
 # Saves the Latent Dirichlet Allocation model to the input path.
 
-#' @param path The directory where the model is saved
+#' @param path The directory where the model is saved.
 #' @param overwrite Overwrites or not if the output path already exists. Default is FALSE
 #'                  which means throw exception if the output path exists.
 #'
@@ -495,16 +497,16 @@ setMethod("write.ml", signature(object = "LDAModel", path = "character"),
 #' Fits an Isotonic Regression model against a Spark DataFrame, similarly to R's isoreg().
 #' Users can print, make predictions on the produced model and save the model to the input path.
 #'
-#' @param data SparkDataFrame for training
+#' @param data SparkDataFrame for training.
 #' @param formula A symbolic description of the model to be fitted. Currently only a few formula
 #'                operators are supported, including '~', '.', ':', '+', and '-'.
 #' @param isotonic Whether the output sequence should be isotonic/increasing (TRUE) or
-#'                 antitonic/decreasing (FALSE)
+#'                 antitonic/decreasing (FALSE).
 #' @param featureIndex The index of the feature if \code{featuresCol} is a vector column
-#'                     (default: 0), no effect otherwise
+#'                     (default: 0), no effect otherwise.
 #' @param weightCol The weight column name.
 #' @param ... additional arguments passed to the method.
-#' @return \code{spark.isoreg} returns a fitted Isotonic Regression model
+#' @return \code{spark.isoreg} returns a fitted Isotonic Regression model.
 #' @rdname spark.isoreg
 #' @aliases spark.isoreg,SparkDataFrame,formula-method
 #' @name spark.isoreg
@@ -550,9 +552,9 @@ setMethod("spark.isoreg", signature(data = "SparkDataFrame", formula = "formula"
 
 #  Predicted values based on an isotonicRegression model
 
-#' @param object a fitted IsotonicRegressionModel
-#' @param newData SparkDataFrame for testing
-#' @return \code{predict} returns a SparkDataFrame containing predicted values
+#' @param object a fitted IsotonicRegressionModel.
+#' @param newData SparkDataFrame for testing.
+#' @return \code{predict} returns a SparkDataFrame containing predicted values.
 #' @rdname spark.isoreg
 #' @aliases predict,IsotonicRegressionModel,SparkDataFrame-method
 #' @export
@@ -564,7 +566,9 @@ setMethod("predict", signature(object = "IsotonicRegressionModel"),
 
 #  Get the summary of an IsotonicRegressionModel model
 
-#' @return \code{summary} returns the model's boundaries and prediction as lists
+#' @return \code{summary} returns summary information of the fitted model, which is a list.
+#'         The list includes model's \code{boundaries} (boundaries in increasing order)
+#'         and \code{predictions} (predictions associated with the boundaries at the same index).
 #' @rdname spark.isoreg
 #' @aliases summary,IsotonicRegressionModel-method
 #' @export
@@ -661,7 +665,11 @@ setMethod("fitted", signature(object = "KMeansModel"),
 #  Get the summary of a k-means model
 
 #' @param object a fitted k-means model.
-#' @return \code{summary} returns the model's features, coefficients, k, size and cluster.
+#' @return \code{summary} returns summary information of the fitted model, which is a list.
+#'         The list includes the model's \code{k} (number of cluster centers),
+#'         \code{coefficients} (model cluster centers),
+#'         \code{size} (number of data points in each cluster), and \code{cluster}
+#'         (cluster centers of the transformed data).
 #' @rdname spark.kmeans
 #' @export
 #' @note summary(KMeansModel) since 2.0.0
@@ -681,7 +689,7 @@ setMethod("summary", signature(object = "KMeansModel"),
             } else {
               dataFrame(callJMethod(jobj, "cluster"))
             }
-            list(coefficients = coefficients, size = size,
+            list(k = k, coefficients = coefficients, size = size,
                  cluster = cluster, is.loaded = is.loaded)
           })
 
@@ -703,7 +711,7 @@ setMethod("predict", signature(object = "KMeansModel"),
 #' with pivoting; "multinomial": Multinomial logistic (softmax) regression without pivoting, similar to glmnet.
 #' Users can print, make predictions on the produced model and save the model to the input path.
 #'
-#' @param data SparkDataFrame for training
+#' @param data SparkDataFrame for training.
 #' @param formula A symbolic description of the model to be fitted. Currently only a few formula
 #'                operators are supported, including '~', '.', ':', '+', and '-'.
 #' @param regParam the regularization parameter.
@@ -734,7 +742,7 @@ setMethod("predict", signature(object = "KMeansModel"),
 #'                  is the original probability of that class and t is the class's threshold.
 #' @param weightCol The weight column name.
 #' @param ... additional arguments passed to the method.
-#' @return \code{spark.logit} returns a fitted logistic regression model
+#' @return \code{spark.logit} returns a fitted logistic regression model.
 #' @rdname spark.logit
 #' @aliases spark.logit,SparkDataFrame,formula-method
 #' @name spark.logit
@@ -802,8 +810,9 @@ setMethod("predict", signature(object = "LogisticRegressionModel"),
 
 #  Get the summary of an LogisticRegressionModel
 
-#' @param object an LogisticRegressionModel fitted by \code{spark.logit}
-#' @return \code{summary} returns coefficients matrix of the fitted model
+#' @param object an LogisticRegressionModel fitted by \code{spark.logit}.
+#' @return \code{summary} returns summary information of the fitted model, which is a list.
+#'         The list includes \code{coefficients} (coefficients matrix of the fitted model).
 #' @rdname spark.logit
 #' @aliases summary,LogisticRegressionModel-method
 #' @export
@@ -842,7 +851,7 @@ setMethod("summary", signature(object = "LogisticRegressionModel"),
 #' @param formula a symbolic description of the model to be fitted. Currently only a few formula
 #'                operators are supported, including '~', '.', ':', '+', and '-'.
 #' @param blockSize blockSize parameter.
-#' @param layers integer vector containing the number of nodes for each layer
+#' @param layers integer vector containing the number of nodes for each layer.
 #' @param solver solver parameter, supported options: "gd" (minibatch gradient descent) or "l-bfgs".
 #' @param maxIter maximum iteration number.
 #' @param tol convergence tolerance of iterations.
@@ -920,10 +929,12 @@ setMethod("predict", signature(object = "MultilayerPerceptronClassificationModel
 # Returns the summary of a Multilayer Perceptron Classification Model produced by \code{spark.mlp}
 
 #' @param object a Multilayer Perceptron Classification Model fitted by \code{spark.mlp}
-#' @return \code{summary} returns a list containing \code{numOfInputs}, \code{numOfOutputs},
-#'         \code{layers}, and \code{weights}. For \code{weights}, it is a numeric vector with
-#'         length equal to the expected given the architecture (i.e., for 8-10-2 network,
-#'         112 connection weights).
+#' @return \code{summary} returns summary information of the fitted model, which is a list.
+#'         The list includes \code{numOfInputs} (number of inputs), \code{numOfOutputs}
+#'         (number of outputs), \code{layers} (array of layer sizes including input
+#'         and output layers), and \code{weights} (the weights of layers).
+#'         For \code{weights}, it is a numeric vector with length equal to the expected
+#'         given the architecture (i.e., for 8-10-2 network, 112 connection weights).
 #' @rdname spark.mlp
 #' @export
 #' @aliases summary,MultilayerPerceptronClassificationModel-method
@@ -988,7 +999,7 @@ setMethod("spark.naiveBayes", signature(data = "SparkDataFrame", formula = "form
 
 # Saves the Bernoulli naive Bayes model to the input path.
 
-#' @param path the directory where the model is saved
+#' @param path the directory where the model is saved.
 #' @param overwrite overwrites or not if the output path already exists. Default is FALSE
 #'                  which means throw exception if the output path exists.
 #'
@@ -1062,7 +1073,7 @@ setMethod("write.ml", signature(object = "MultilayerPerceptronClassificationMode
 
 #  Save fitted IsotonicRegressionModel to the input path
 
-#' @param path The directory where the model is saved
+#' @param path The directory where the model is saved.
 #' @param overwrite Overwrites or not if the output path already exists. Default is FALSE
 #'                  which means throw exception if the output path exists.
 #'
@@ -1077,7 +1088,7 @@ setMethod("write.ml", signature(object = "IsotonicRegressionModel", path = "char
 
 #  Save fitted LogisticRegressionModel to the input path
 
-#' @param path The directory where the model is saved
+#' @param path The directory where the model is saved.
 #' @param overwrite Overwrites or not if the output path already exists. Default is FALSE
 #'                  which means throw exception if the output path exists.
 #'
@@ -1204,7 +1215,7 @@ setMethod("spark.survreg", signature(data = "SparkDataFrame", formula = "formula
 #' posterior probabilities on new data, \code{spark.perplexity} to compute log perplexity on new
 #' data and \code{write.ml}/\code{read.ml} to save/load fitted models.
 #'
-#' @param data A SparkDataFrame for training
+#' @param data A SparkDataFrame for training.
 #' @param features Features column name. Either libSVM-format column or character-format column is
 #'        valid.
 #' @param k Number of topics.
@@ -1224,7 +1235,7 @@ setMethod("spark.survreg", signature(data = "SparkDataFrame", formula = "formula
 #'        parameter if libSVM-format column is used as the features column.
 #' @param maxVocabSize maximum vocabulary size, default 1 << 18
 #' @param ... additional argument(s) passed to the method.
-#' @return \code{spark.lda} returns a fitted Latent Dirichlet Allocation model
+#' @return \code{spark.lda} returns a fitted Latent Dirichlet Allocation model.
 #' @rdname spark.lda
 #' @aliases spark.lda,SparkDataFrame-method
 #' @seealso topicmodels: \url{https://cran.r-project.org/package=topicmodels}
@@ -1272,8 +1283,9 @@ setMethod("spark.lda", signature(data = "SparkDataFrame"),
 # similarly to R's summary().
 
 #' @param object a fitted AFT survival regression model.
-#' @return \code{summary} returns a list containing the model's features, coefficients,
-#' intercept and log(scale)
+#' @return \code{summary} returns summary information of the fitted model, which is a list.
+#'         The list includes the model's \code{coefficients} (features, coefficients,
+#'         intercept and log(scale)).
 #' @rdname spark.survreg
 #' @export
 #' @note summary(AFTSurvivalRegressionModel) since 2.0.0
@@ -1293,7 +1305,7 @@ setMethod("summary", signature(object = "AFTSurvivalRegressionModel"),
 
 #' @param newData a SparkDataFrame for testing.
 #' @return \code{predict} returns a SparkDataFrame containing predicted values
-#' on the original scale of the data (mean predicted value at scale = 1.0).
+#'         on the original scale of the data (mean predicted value at scale = 1.0).
 #' @rdname spark.survreg
 #' @export
 #' @note predict(AFTSurvivalRegressionModel) since 2.0.0
@@ -1360,7 +1372,9 @@ setMethod("spark.gaussianMixture", signature(data = "SparkDataFrame", formula =
 #  Get the summary of a multivariate gaussian mixture model
 
 #' @param object a fitted gaussian mixture model.
-#' @return \code{summary} returns the model's lambda, mu, sigma, k, dim and posterior.
+#' @return \code{summary} returns summary of the fitted model, which is a list.
+#'         The list includes the model's \code{lambda} (lambda), \code{mu} (mu),
+#'         \code{sigma} (sigma), and \code{posterior} (posterior).
 #' @aliases spark.gaussianMixture,SparkDataFrame,formula-method
 #' @rdname spark.gaussianMixture
 #' @export
@@ -1434,7 +1448,7 @@ setMethod("predict", signature(object = "GaussianMixtureModel"),
 #' @param numItemBlocks number of item blocks used to parallelize computation (> 0).
 #' @param checkpointInterval number of checkpoint intervals (>= 1) or disable checkpoint (-1).
 #' @param ... additional argument(s) passed to the method.
-#' @return \code{spark.als} returns a fitted ALS model
+#' @return \code{spark.als} returns a fitted ALS model.
 #' @rdname spark.als
 #' @aliases spark.als,SparkDataFrame-method
 #' @name spark.als
@@ -1494,9 +1508,11 @@ setMethod("spark.als", signature(data = "SparkDataFrame"),
 # Returns a summary of the ALS model produced by spark.als.
 
 #' @param object a fitted ALS model.
-#' @return \code{summary} returns a list containing the names of the user column,
-#'         the item column and the rating column, the estimated user and item factors,
-#'         rank, regularization parameter and maximum number of iterations used in training.
+#' @return \code{summary} returns summary information of the fitted model, which is a list.
+#'         The list includes \code{user} (the names of the user column),
+#'         \code{item} (the item column), \code{rating} (the rating column), \code{userFactors}
+#'         (the estimated user factors), \code{itemFactors} (the estimated item factors),
+#'         and \code{rank} (rank of the matrix factorization model).
 #' @rdname spark.als
 #' @aliases summary,ALSModel-method
 #' @export
@@ -1609,9 +1625,10 @@ setMethod("spark.kstest", signature(data = "SparkDataFrame"),
 
 #  Get the summary of Kolmogorov-Smirnov (KS) Test.
 #' @param object test result object of KSTest by \code{spark.kstest}.
-#' @return \code{summary} returns a list containing the p-value, test statistic computed for the
-#'         test, the null hypothesis with its parameters tested against
-#'         and degrees of freedom of the test.
+#' @return \code{summary} returns summary information of KSTest object, which is a list.
+#'         The list includes the \code{p.value} (p-value), \code{statistic} (test statistic
+#'         computed for the test), \code{nullHypothesis} (the null hypothesis with its
+#'         parameters tested against) and \code{degreesOfFreedom} (degrees of freedom of the test).
 #' @rdname spark.kstest
 #' @aliases summary,KSTest-method
 #' @export
@@ -1757,7 +1774,7 @@ setMethod("spark.randomForest", signature(data = "SparkDataFrame", formula = "fo
 
 #' @param newData a SparkDataFrame for testing.
 #' @return \code{predict} returns a SparkDataFrame containing predicted labeled in a column named
-#' "prediction"
+#'         "prediction".
 #' @rdname spark.randomForest
 #' @aliases predict,RandomForestRegressionModel-method
 #' @export
@@ -1778,8 +1795,8 @@ setMethod("predict", signature(object = "RandomForestClassificationModel"),
 
 # Save the Random Forest Regression or Classification model to the input path.
 
-#' @param object A fitted Random Forest regression model or classification model
-#' @param path The directory where the model is saved
+#' @param object A fitted Random Forest regression model or classification model.
+#' @param path The directory where the model is saved.
 #' @param overwrite Overwrites or not if the output path already exists. Default is FALSE
 #'                  which means throw exception if the output path exists.
 #'
@@ -1821,9 +1838,11 @@ summary.treeEnsemble <- function(model) {
 
 #  Get the summary of a Random Forest Regression Model
 
-#' @return \code{summary} returns a summary object of the fitted model, a list of components
-#'         including formula, number of features, list of features, feature importances, number of
-#'         trees, and tree weights
+#' @return \code{summary} returns summary information of the fitted model, which is a list.
+#'         The list of components includes \code{formula} (formula),
+#'         \code{numFeatures} (number of features), \code{features} (list of features),
+#'         \code{featureImportances} (feature importances), \code{numTrees} (number of trees),
+#'         and \code{treeWeights} (tree weights).
 #' @rdname spark.randomForest
 #' @aliases summary,RandomForestRegressionModel-method
 #' @export
@@ -2000,7 +2019,7 @@ setMethod("spark.gbt", signature(data = "SparkDataFrame", formula = "formula"),
 
 #' @param newData a SparkDataFrame for testing.
 #' @return \code{predict} returns a SparkDataFrame containing predicted labeled in a column named
-#' "prediction"
+#'         "prediction".
 #' @rdname spark.gbt
 #' @aliases predict,GBTRegressionModel-method
 #' @export
@@ -2021,8 +2040,8 @@ setMethod("predict", signature(object = "GBTClassificationModel"),
 
 # Save the Gradient Boosted Tree Regression or Classification model to the input path.
 
-#' @param object A fitted Gradient Boosted Tree regression model or classification model
-#' @param path The directory where the model is saved
+#' @param object A fitted Gradient Boosted Tree regression model or classification model.
+#' @param path The directory where the model is saved.
 #' @param overwrite Overwrites or not if the output path already exists. Default is FALSE
 #'                  which means throw exception if the output path exists.
 #' @aliases write.ml,GBTRegressionModel,character-method
@@ -2045,9 +2064,11 @@ setMethod("write.ml", signature(object = "GBTClassificationModel", path = "chara
 
 #  Get the summary of a Gradient Boosted Tree Regression Model
 
-#' @return \code{summary} returns a summary object of the fitted model, a list of components
-#'         including formula, number of features, list of features, feature importances, number of
-#'         trees, and tree weights
+#' @return \code{summary} returns summary information of the fitted model, which is a list.
+#'         The list of components includes \code{formula} (formula),
+#'         \code{numFeatures} (number of features), \code{features} (list of features),
+#'         \code{featureImportances} (feature importances), \code{numTrees} (number of trees),
+#'         and \code{treeWeights} (tree weights).
 #' @rdname spark.gbt
 #' @aliases summary,GBTRegressionModel-method
 #' @export
diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R
index db1e4dc7d8458..46dffe3ca091f 100644
--- a/R/pkg/inst/tests/testthat/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -350,6 +350,8 @@ test_that("spark.kmeans", {
   # Test summary works on KMeans
   summary.model <- summary(model)
   cluster <- summary.model$cluster
+  k <- summary.model$k
+  expect_equal(k, 2)
   expect_equal(sort(collect(distinct(select(cluster, "prediction")))$prediction), c(0, 1))
 
   # Test model save/load

From e8f351f9a670fc4d43f15c8d7cd57e49fb9ceba2 Mon Sep 17 00:00:00 2001
From: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
Date: Thu, 8 Dec 2016 22:21:24 -0800
Subject: [PATCH 0323/1204] Copy the SparkR source package with LFTP

This PR adds a line in release-build.sh to copy the SparkR source archive using LFTP

Author: Shivaram Venkataraman <shivaram@cs.berkeley.edu>

Closes #16226 from shivaram/fix-sparkr-copy-build.

(cherry picked from commit 934035ae7cb648fe61665d8efe0b7aa2bbe4ca47)
Signed-off-by: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
---
 dev/create-release/release-build.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index 1b05b20a14b76..7c77791418ffc 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -258,6 +258,7 @@ if [[ "$1" == "package" ]]; then
   LFTP mkdir -p $dest_dir
   LFTP mput -O $dest_dir 'spark-*'
   LFTP mput -O $dest_dir 'pyspark-*'
+  LFTP mput -O $dest_dir 'SparkR-*'
   exit 0
 fi
 

From 2c88e1dc31e1b90605ad8ab85b20b131b4b3c722 Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Thu, 8 Dec 2016 22:52:34 -0800
Subject: [PATCH 0324/1204] Copy pyspark and SparkR packages to latest release
 dir too

## What changes were proposed in this pull request?

Copy pyspark and SparkR packages to latest release dir, as per comment [here](https://github.com/apache/spark/pull/16226#discussion_r91664822)

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #16227 from felixcheung/pyrftp.

(cherry picked from commit c074c96dc57bf18b28fafdcac0c768d75c642cba)
Signed-off-by: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
---
 dev/create-release/release-build.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index 7c77791418ffc..c0663b815da9f 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -251,6 +251,8 @@ if [[ "$1" == "package" ]]; then
   # Put to new directory:
   LFTP mkdir -p $dest_dir
   LFTP mput -O $dest_dir 'spark-*'
+  LFTP mput -O $dest_dir 'pyspark-*'
+  LFTP mput -O $dest_dir 'SparkR-*'
   # Delete /latest directory and rename new upload to /latest
   LFTP "rm -r -f $REMOTE_PARENT_DIR/latest || exit 0"
   LFTP mv $dest_dir "$REMOTE_PARENT_DIR/latest"

From 72bf5199738c7ab0361b2b55eb4f4299048a21fa Mon Sep 17 00:00:00 2001
From: Zhan Zhang <zhanzhang@fb.com>
Date: Fri, 9 Dec 2016 16:35:06 +0800
Subject: [PATCH 0325/1204] [SPARK-18637][SQL] Stateful UDF should be
 considered as nondeterministic

Make stateful udf as nondeterministic

Add new test cases with both Stateful and Stateless UDF.
Without the patch, the test cases will throw exception:

1 did not equal 10
ScalaTestFailureLocation: org.apache.spark.sql.hive.execution.HiveUDFSuite$$anonfun$21 at (HiveUDFSuite.scala:501)
org.scalatest.exceptions.TestFailedException: 1 did not equal 10
        at org.scalatest.Assertions$class.newAssertionFailedException(Assertions.scala:500)
        at org.scalatest.FunSuite.newAssertionFailedException(FunSuite.scala:1555)
        ...

Author: Zhan Zhang <zhanzhang@fb.com>

Closes #16068 from zhzhan/state.

(cherry picked from commit 67587d961d5f94a8639c20cb80127c86bf79d5a8)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/sql/hive/hiveUDFs.scala  |  4 +-
 .../sql/hive/execution/HiveUDFSuite.scala     | 45 ++++++++++++++++++-
 2 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
index e30e0f9611f59..37414ad12934d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
@@ -59,7 +59,7 @@ private[hive] case class HiveSimpleUDF(
   @transient
   private lazy val isUDFDeterministic = {
     val udfType = function.getClass().getAnnotation(classOf[HiveUDFType])
-    udfType != null && udfType.deterministic()
+    udfType != null && udfType.deterministic() && !udfType.stateful()
   }
 
   override def foldable: Boolean = isUDFDeterministic && children.forall(_.foldable)
@@ -142,7 +142,7 @@ private[hive] case class HiveGenericUDF(
   @transient
   private lazy val isUDFDeterministic = {
     val udfType = function.getClass.getAnnotation(classOf[HiveUDFType])
-    udfType != null && udfType.deterministic()
+    udfType != null && udfType.deterministic() && !udfType.stateful()
   }
 
   @transient
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
index 48adc833f4b22..4098bb597bdee 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
@@ -21,15 +21,17 @@ import java.io.{DataInput, DataOutput, File, PrintWriter}
 import java.util.{ArrayList, Arrays, Properties}
 
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.hive.ql.udf.UDAFPercentile
+import org.apache.hadoop.hive.ql.exec.UDF
+import org.apache.hadoop.hive.ql.udf.{UDAFPercentile, UDFType}
 import org.apache.hadoop.hive.ql.udf.generic._
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject
 import org.apache.hadoop.hive.serde2.{AbstractSerDe, SerDeStats}
 import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspector, ObjectInspectorFactory}
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory
-import org.apache.hadoop.io.Writable
+import org.apache.hadoop.io.{LongWritable, Writable}
 
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.functions.max
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.util.Utils
@@ -487,6 +489,26 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
     assert(count4 == 1)
     sql("DROP TABLE parquet_tmp")
   }
+
+  test("Hive Stateful UDF") {
+    withUserDefinedFunction("statefulUDF" -> true, "statelessUDF" -> true) {
+      sql(s"CREATE TEMPORARY FUNCTION statefulUDF AS '${classOf[StatefulUDF].getName}'")
+      sql(s"CREATE TEMPORARY FUNCTION statelessUDF AS '${classOf[StatelessUDF].getName}'")
+      val testData = spark.range(10).repartition(1)
+
+      // Expected Max(s) is 10 as statefulUDF returns the sequence number starting from 1.
+      checkAnswer(testData.selectExpr("statefulUDF() as s").agg(max($"s")), Row(10))
+
+      // Expected Max(s) is 5 as statefulUDF returns the sequence number starting from 1,
+      // and the data is evenly distributed into 2 partitions.
+      checkAnswer(testData.repartition(2)
+        .selectExpr("statefulUDF() as s").agg(max($"s")), Row(5))
+
+      // Expected Max(s) is 1, as stateless UDF is deterministic and foldable and replaced
+      // by constant 1 by ConstantFolding optimizer.
+      checkAnswer(testData.selectExpr("statelessUDF() as s").agg(max($"s")), Row(1))
+    }
+  }
 }
 
 class TestPair(x: Int, y: Int) extends Writable with Serializable {
@@ -551,3 +573,22 @@ class PairUDF extends GenericUDF {
 
   override def getDisplayString(p1: Array[String]): String = ""
 }
+
+@UDFType(stateful = true)
+class StatefulUDF extends UDF {
+  private val result = new LongWritable(0)
+
+  def evaluate(): LongWritable = {
+    result.set(result.get() + 1)
+    result
+  }
+}
+
+class StatelessUDF extends UDF {
+  private val result = new LongWritable(0)
+
+  def evaluate(): LongWritable = {
+    result.set(result.get() + 1)
+    result
+  }
+}

From b226f10e3df8b789da6ef820b256f994b178fbbe Mon Sep 17 00:00:00 2001
From: Jacek Laskowski <jacek@japila.pl>
Date: Fri, 9 Dec 2016 18:45:57 +0800
Subject: [PATCH 0326/1204] [MINOR][CORE][SQL][DOCS] Typo fixes

## What changes were proposed in this pull request?

Typo fixes

## How was this patch tested?

Local build. Awaiting the official build.

Author: Jacek Laskowski <jacek@japila.pl>

Closes #16144 from jaceklaskowski/typo-fixes.

(cherry picked from commit b162cc0c2810c1a9fa2eee8e664ffae84f9eea11)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 core/src/main/scala/org/apache/spark/MapOutputTracker.scala | 2 +-
 core/src/main/scala/org/apache/spark/SparkContext.scala     | 4 ++--
 .../spark/deploy/history/HistoryServerArguments.scala       | 2 +-
 .../scala/org/apache/spark/internal/config/package.scala    | 2 +-
 core/src/main/scala/org/apache/spark/rdd/RDD.scala          | 2 +-
 .../main/scala/org/apache/spark/rpc/RpcCallContext.scala    | 2 +-
 docs/monitoring.md                                          | 6 ++----
 .../java/org/apache/spark/sql/streaming/OutputMode.java     | 2 +-
 .../scala/org/apache/spark/sql/catalyst/InternalRow.scala   | 2 +-
 .../apache/spark/sql/catalyst/catalog/ExternalCatalog.scala | 2 +-
 .../apache/spark/sql/catalyst/expressions/Expression.scala  | 6 +++---
 .../spark/sql/catalyst/expressions/objects/objects.scala    | 2 +-
 12 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index 7f8f0f513134f..6f5c31d7ab71c 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -322,7 +322,7 @@ private[spark] class MapOutputTrackerMaster(conf: SparkConf,
   if (minSizeForBroadcast > maxRpcMessageSize) {
     val msg = s"spark.shuffle.mapOutput.minSizeForBroadcast ($minSizeForBroadcast bytes) must " +
       s"be <= spark.rpc.message.maxSize ($maxRpcMessageSize bytes) to prevent sending an rpc " +
-      "message that is to large."
+      "message that is too large."
     logError(msg)
     throw new IllegalArgumentException(msg)
   }
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 8f8392fa646de..b6aeeb9559ec8 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -2567,8 +2567,8 @@ object SparkContext extends Logging {
     val serviceLoaders =
       ServiceLoader.load(classOf[ExternalClusterManager], loader).asScala.filter(_.canCreate(url))
     if (serviceLoaders.size > 1) {
-      throw new SparkException(s"Multiple Cluster Managers ($serviceLoaders) registered " +
-          s"for the url $url:")
+      throw new SparkException(
+        s"Multiple external cluster managers registered for the url $url: $serviceLoaders")
     }
     serviceLoaders.headOption
   }
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
index 2eddb5ff54479..080ba12c2f0d1 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
@@ -24,7 +24,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.util.Utils
 
 /**
- * Command-line parser for the master.
+ * Command-line parser for the [[HistoryServer]].
  */
 private[history] class HistoryServerArguments(conf: SparkConf, args: Array[String])
   extends Logging {
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 8ce9883ac5531..f4844dee62ef4 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -198,7 +198,7 @@ package object config {
     .createWithDefault(0)
 
   private[spark] val DRIVER_BLOCK_MANAGER_PORT = ConfigBuilder("spark.driver.blockManager.port")
-    .doc("Port to use for the block managed on the driver.")
+    .doc("Port to use for the block manager on the driver.")
     .fallbackConf(BLOCK_MANAGER_PORT)
 
   private[spark] val IGNORE_CORRUPT_FILES = ConfigBuilder("spark.files.ignoreCorruptFiles")
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index d285e917b8a67..374abccf6ad55 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -1746,7 +1746,7 @@ abstract class RDD[T: ClassTag](
 
   /**
    * Clears the dependencies of this RDD. This method must ensure that all references
-   * to the original parent RDDs is removed to enable the parent RDDs to be garbage
+   * to the original parent RDDs are removed to enable the parent RDDs to be garbage
    * collected. Subclasses of RDD may override this method for implementing their own cleaning
    * logic. See [[org.apache.spark.rdd.UnionRDD]] for an example.
    */
diff --git a/core/src/main/scala/org/apache/spark/rpc/RpcCallContext.scala b/core/src/main/scala/org/apache/spark/rpc/RpcCallContext.scala
index f527ec86ab7b2..117f51c5b8f2a 100644
--- a/core/src/main/scala/org/apache/spark/rpc/RpcCallContext.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/RpcCallContext.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.rpc
 
 /**
- * A callback that [[RpcEndpoint]] can use it to send back a message or failure. It's thread-safe
+ * A callback that [[RpcEndpoint]] can use to send back a message or failure. It's thread-safe
  * and can be called in any thread.
  */
 private[spark] trait RpcCallContext {
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 2eef4568d00e9..7a1de52668f1a 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -44,10 +44,8 @@ The spark jobs themselves must be configured to log events, and to log them to t
 writable directory. For example, if the server was configured with a log directory of
 `hdfs://namenode/shared/spark-logs`, then the client-side options would be:
 
-```
-spark.eventLog.enabled true
-spark.eventLog.dir hdfs://namenode/shared/spark-logs
-```
+    spark.eventLog.enabled true
+    spark.eventLog.dir hdfs://namenode/shared/spark-logs
 
 The history server can be configured as follows:
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/OutputMode.java b/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/OutputMode.java
index 49a18df2c72c0..a515c1a109cf4 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/OutputMode.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/OutputMode.java
@@ -46,7 +46,7 @@ public static OutputMode Append() {
 
   /**
    * OutputMode in which all the rows in the streaming DataFrame/Dataset will be written
-   * to the sink every time these is some updates. This output mode can only be used in queries
+   * to the sink every time there are some updates. This output mode can only be used in queries
    * that contain aggregations.
    *
    * @since 2.0.0
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
index f498e071b50a3..256f64e320be8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.types.{DataType, Decimal, StructType}
 
 /**
- * An abstract class for row used internal in Spark SQL, which only contain the columns as
+ * An abstract class for row used internally in Spark SQL, which only contains the columns as
  * internal types.
  */
 abstract class InternalRow extends SpecializedGetters with Serializable {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
index 4b8cac8f32b06..78897daec8107 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.expressions.Expression
 
 
 /**
- * Interface for the system catalog (of columns, partitions, tables, and databases).
+ * Interface for the system catalog (of functions, partitions, tables, and databases).
  *
  * This is only used for non-temporary items, and implementations must be thread-safe as they
  * can be accessed in multiple threads. This is an external catalog because it is expected to
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 221f830aa8583..b93a5d0b7a0e5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -70,9 +70,9 @@ abstract class Expression extends TreeNode[Expression] {
    * children.
    *
    * Note that this means that an expression should be considered as non-deterministic if:
-   * - if it relies on some mutable internal state, or
-   * - if it relies on some implicit input that is not part of the children expression list.
-   * - if it has non-deterministic child or children.
+   * - it relies on some mutable internal state, or
+   * - it relies on some implicit input that is not part of the children expression list.
+   * - it has non-deterministic child or children.
    *
    * An example would be `SparkPartitionID` that relies on the partition id returned by TaskContext.
    * By default leaf expressions are deterministic as Nil.forall(_.deterministic) returns true.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index e517ec18eb540..038b02351eaf7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -924,7 +924,7 @@ case class InitializeJavaBean(beanInstance: Expression, setters: Map[String, Exp
 /**
  * Asserts that input values of a non-nullable child expression are not null.
  *
- * Note that there are cases where `child.nullable == true`, while we still needs to add this
+ * Note that there are cases where `child.nullable == true`, while we still need to add this
  * assertion.  Consider a nullable column `s` whose data type is a struct containing a non-nullable
  * `Int` field named `i`.  Expression `s.i` is nullable because `s` can be null.  However, for all
  * non-null `s`, `s.i` can't be null.

From 0c6415aeca7a5c2fc5462c483c60d770f0236efe Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Fri, 9 Dec 2016 07:51:46 -0800
Subject: [PATCH 0327/1204] [SPARK-17822][R] Make JVMObjectTracker a member
 variable of RBackend

## What changes were proposed in this pull request?

* This PR changes `JVMObjectTracker` from `object` to `class` and let its instance associated with each RBackend. So we can manage the lifecycle of JVM objects when there are multiple `RBackend` sessions. `RBackend.close` will clear the object tracker explicitly.
* I assume that `SQLUtils` and `RRunner` do not need to track JVM instances, which could be wrong.
* Small refactor of `SerDe.sqlSerDe` to increase readability.

## How was this patch tested?

* Added unit tests for `JVMObjectTracker`.
* Wait for Jenkins to run full tests.

Author: Xiangrui Meng <meng@databricks.com>

Closes #16154 from mengxr/SPARK-17822.

(cherry picked from commit fd48d80a6145ea94f03e7fc6e4d724a0fbccac58)
Signed-off-by: Xiangrui Meng <meng@databricks.com>
---
 .../apache/spark/api/r/JVMObjectTracker.scala | 87 ++++++++++++++++++
 .../org/apache/spark/api/r/RBackend.scala     |  6 +-
 .../apache/spark/api/r/RBackendHandler.scala  | 54 ++---------
 .../org/apache/spark/api/r/RRunner.scala      |  2 +-
 .../scala/org/apache/spark/api/r/SerDe.scala  | 92 +++++++++++--------
 .../spark/api/r/JVMObjectTrackerSuite.scala   | 73 +++++++++++++++
 .../apache/spark/api/r/RBackendSuite.scala    | 31 +++++++
 .../org/apache/spark/sql/api/r/SQLUtils.scala | 12 +--
 8 files changed, 265 insertions(+), 92 deletions(-)
 create mode 100644 core/src/main/scala/org/apache/spark/api/r/JVMObjectTracker.scala
 create mode 100644 core/src/test/scala/org/apache/spark/api/r/JVMObjectTrackerSuite.scala
 create mode 100644 core/src/test/scala/org/apache/spark/api/r/RBackendSuite.scala

diff --git a/core/src/main/scala/org/apache/spark/api/r/JVMObjectTracker.scala b/core/src/main/scala/org/apache/spark/api/r/JVMObjectTracker.scala
new file mode 100644
index 0000000000000..3432700f11602
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/api/r/JVMObjectTracker.scala
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api.r
+
+import java.util.concurrent.atomic.AtomicInteger
+import java.util.concurrent.ConcurrentHashMap
+
+/** JVM object ID wrapper */
+private[r] case class JVMObjectId(id: String) {
+  require(id != null, "Object ID cannot be null.")
+}
+
+/**
+ * Counter that tracks JVM objects returned to R.
+ * This is useful for referencing these objects in RPC calls.
+ */
+private[r] class JVMObjectTracker {
+
+  private[this] val objMap = new ConcurrentHashMap[JVMObjectId, Object]()
+  private[this] val objCounter = new AtomicInteger()
+
+  /**
+   * Returns the JVM object associated with the input key or None if not found.
+   */
+  final def get(id: JVMObjectId): Option[Object] = this.synchronized {
+    if (objMap.containsKey(id)) {
+      Some(objMap.get(id))
+    } else {
+      None
+    }
+  }
+
+  /**
+   * Returns the JVM object associated with the input key or throws an exception if not found.
+   */
+  @throws[NoSuchElementException]("if key does not exist.")
+  final def apply(id: JVMObjectId): Object = {
+    get(id).getOrElse(
+      throw new NoSuchElementException(s"$id does not exist.")
+    )
+  }
+
+  /**
+   * Adds a JVM object to track and returns assigned ID, which is unique within this tracker.
+   */
+  final def addAndGetId(obj: Object): JVMObjectId = {
+    val id = JVMObjectId(objCounter.getAndIncrement().toString)
+    objMap.put(id, obj)
+    id
+  }
+
+  /**
+   * Removes and returns a JVM object with the specific ID from the tracker, or None if not found.
+   */
+  final def remove(id: JVMObjectId): Option[Object] = this.synchronized {
+    if (objMap.containsKey(id)) {
+      Some(objMap.remove(id))
+    } else {
+      None
+    }
+  }
+
+  /**
+   * Number of JVM objects being tracked.
+   */
+  final def size: Int = objMap.size()
+
+  /**
+   * Clears the tracker.
+   */
+  final def clear(): Unit = objMap.clear()
+}
diff --git a/core/src/main/scala/org/apache/spark/api/r/RBackend.scala b/core/src/main/scala/org/apache/spark/api/r/RBackend.scala
index 550746c552d02..2d1152a036449 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RBackend.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RBackend.scala
@@ -22,7 +22,7 @@ import java.net.{InetAddress, InetSocketAddress, ServerSocket}
 import java.util.concurrent.TimeUnit
 
 import io.netty.bootstrap.ServerBootstrap
-import io.netty.channel.{ChannelFuture, ChannelInitializer, ChannelOption, EventLoopGroup}
+import io.netty.channel.{ChannelFuture, ChannelInitializer, EventLoopGroup}
 import io.netty.channel.nio.NioEventLoopGroup
 import io.netty.channel.socket.SocketChannel
 import io.netty.channel.socket.nio.NioServerSocketChannel
@@ -42,6 +42,9 @@ private[spark] class RBackend {
   private[this] var bootstrap: ServerBootstrap = null
   private[this] var bossGroup: EventLoopGroup = null
 
+  /** Tracks JVM objects returned to R for this RBackend instance. */
+  private[r] val jvmObjectTracker = new JVMObjectTracker
+
   def init(): Int = {
     val conf = new SparkConf()
     val backendConnectionTimeout = conf.getInt(
@@ -94,6 +97,7 @@ private[spark] class RBackend {
       bootstrap.childGroup().shutdownGracefully()
     }
     bootstrap = null
+    jvmObjectTracker.clear()
   }
 
 }
diff --git a/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala b/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala
index 9f5afa29d6d22..cfd37ac54ba23 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala
@@ -20,7 +20,6 @@ package org.apache.spark.api.r
 import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}
 import java.util.concurrent.TimeUnit
 
-import scala.collection.mutable.HashMap
 import scala.language.existentials
 
 import io.netty.channel.{ChannelHandlerContext, SimpleChannelInboundHandler}
@@ -62,7 +61,7 @@ private[r] class RBackendHandler(server: RBackend)
           assert(numArgs == 1)
 
           writeInt(dos, 0)
-          writeObject(dos, args(0))
+          writeObject(dos, args(0), server.jvmObjectTracker)
         case "stopBackend" =>
           writeInt(dos, 0)
           writeType(dos, "void")
@@ -72,9 +71,9 @@ private[r] class RBackendHandler(server: RBackend)
             val t = readObjectType(dis)
             assert(t == 'c')
             val objToRemove = readString(dis)
-            JVMObjectTracker.remove(objToRemove)
+            server.jvmObjectTracker.remove(JVMObjectId(objToRemove))
             writeInt(dos, 0)
-            writeObject(dos, null)
+            writeObject(dos, null, server.jvmObjectTracker)
           } catch {
             case e: Exception =>
               logError(s"Removing $objId failed", e)
@@ -143,12 +142,8 @@ private[r] class RBackendHandler(server: RBackend)
       val cls = if (isStatic) {
         Utils.classForName(objId)
       } else {
-        JVMObjectTracker.get(objId) match {
-          case None => throw new IllegalArgumentException("Object not found " + objId)
-          case Some(o) =>
-            obj = o
-            o.getClass
-        }
+        obj = server.jvmObjectTracker(JVMObjectId(objId))
+        obj.getClass
       }
 
       val args = readArgs(numArgs, dis)
@@ -173,7 +168,7 @@ private[r] class RBackendHandler(server: RBackend)
 
         // Write status bit
         writeInt(dos, 0)
-        writeObject(dos, ret.asInstanceOf[AnyRef])
+        writeObject(dos, ret.asInstanceOf[AnyRef], server.jvmObjectTracker)
       } else if (methodName == "<init>") {
         // methodName should be "<init>" for constructor
         val ctors = cls.getConstructors
@@ -193,7 +188,7 @@ private[r] class RBackendHandler(server: RBackend)
         val obj = ctors(index.get).newInstance(args : _*)
 
         writeInt(dos, 0)
-        writeObject(dos, obj.asInstanceOf[AnyRef])
+        writeObject(dos, obj.asInstanceOf[AnyRef], server.jvmObjectTracker)
       } else {
         throw new IllegalArgumentException("invalid method " + methodName + " for object " + objId)
       }
@@ -210,7 +205,7 @@ private[r] class RBackendHandler(server: RBackend)
   // Read a number of arguments from the data input stream
   def readArgs(numArgs: Int, dis: DataInputStream): Array[java.lang.Object] = {
     (0 until numArgs).map { _ =>
-      readObject(dis)
+      readObject(dis, server.jvmObjectTracker)
     }.toArray
   }
 
@@ -286,37 +281,4 @@ private[r] class RBackendHandler(server: RBackend)
   }
 }
 
-/**
- * Helper singleton that tracks JVM objects returned to R.
- * This is useful for referencing these objects in RPC calls.
- */
-private[r] object JVMObjectTracker {
-
-  // TODO: This map should be thread-safe if we want to support multiple
-  // connections at the same time
-  private[this] val objMap = new HashMap[String, Object]
-
-  // TODO: We support only one connection now, so an integer is fine.
-  // Investigate using use atomic integer in the future.
-  private[this] var objCounter: Int = 0
-
-  def getObject(id: String): Object = {
-    objMap(id)
-  }
-
-  def get(id: String): Option[Object] = {
-    objMap.get(id)
-  }
-
-  def put(obj: Object): String = {
-    val objId = objCounter.toString
-    objCounter = objCounter + 1
-    objMap.put(objId, obj)
-    objId
-  }
 
-  def remove(id: String): Option[Object] = {
-    objMap.remove(id)
-  }
-
-}
diff --git a/core/src/main/scala/org/apache/spark/api/r/RRunner.scala b/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
index 7ef64723d9593..29e21b3b1aa8a 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
@@ -152,7 +152,7 @@ private[spark] class RRunner[U](
           dataOut.writeInt(mode)
 
           if (isDataFrame) {
-            SerDe.writeObject(dataOut, colNames)
+            SerDe.writeObject(dataOut, colNames, jvmObjectTracker = null)
           }
 
           if (!iter.hasNext) {
diff --git a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
index 550e075a95129..dad928cdcfd0f 100644
--- a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
@@ -28,13 +28,20 @@ import scala.collection.mutable.WrappedArray
  * Utility functions to serialize, deserialize objects to / from R
  */
 private[spark] object SerDe {
-  type ReadObject = (DataInputStream, Char) => Object
-  type WriteObject = (DataOutputStream, Object) => Boolean
+  type SQLReadObject = (DataInputStream, Char) => Object
+  type SQLWriteObject = (DataOutputStream, Object) => Boolean
 
-  var sqlSerDe: (ReadObject, WriteObject) = _
+  private[this] var sqlReadObject: SQLReadObject = _
+  private[this] var sqlWriteObject: SQLWriteObject = _
 
-  def registerSqlSerDe(sqlSerDe: (ReadObject, WriteObject)): Unit = {
-    this.sqlSerDe = sqlSerDe
+  def setSQLReadObject(value: SQLReadObject): this.type = {
+    sqlReadObject = value
+    this
+  }
+
+  def setSQLWriteObject(value: SQLWriteObject): this.type = {
+    sqlWriteObject = value
+    this
   }
 
   // Type mapping from R to Java
@@ -56,32 +63,33 @@ private[spark] object SerDe {
     dis.readByte().toChar
   }
 
-  def readObject(dis: DataInputStream): Object = {
+  def readObject(dis: DataInputStream, jvmObjectTracker: JVMObjectTracker): Object = {
     val dataType = readObjectType(dis)
-    readTypedObject(dis, dataType)
+    readTypedObject(dis, dataType, jvmObjectTracker)
   }
 
   def readTypedObject(
       dis: DataInputStream,
-      dataType: Char): Object = {
+      dataType: Char,
+      jvmObjectTracker: JVMObjectTracker): Object = {
     dataType match {
       case 'n' => null
       case 'i' => new java.lang.Integer(readInt(dis))
       case 'd' => new java.lang.Double(readDouble(dis))
       case 'b' => new java.lang.Boolean(readBoolean(dis))
       case 'c' => readString(dis)
-      case 'e' => readMap(dis)
+      case 'e' => readMap(dis, jvmObjectTracker)
       case 'r' => readBytes(dis)
-      case 'a' => readArray(dis)
-      case 'l' => readList(dis)
+      case 'a' => readArray(dis, jvmObjectTracker)
+      case 'l' => readList(dis, jvmObjectTracker)
       case 'D' => readDate(dis)
       case 't' => readTime(dis)
-      case 'j' => JVMObjectTracker.getObject(readString(dis))
+      case 'j' => jvmObjectTracker(JVMObjectId(readString(dis)))
       case _ =>
-        if (sqlSerDe == null || sqlSerDe._1 == null) {
+        if (sqlReadObject == null) {
           throw new IllegalArgumentException (s"Invalid type $dataType")
         } else {
-          val obj = (sqlSerDe._1)(dis, dataType)
+          val obj = sqlReadObject(dis, dataType)
           if (obj == null) {
             throw new IllegalArgumentException (s"Invalid type $dataType")
           } else {
@@ -181,28 +189,28 @@ private[spark] object SerDe {
   }
 
   // All elements of an array must be of the same type
-  def readArray(dis: DataInputStream): Array[_] = {
+  def readArray(dis: DataInputStream, jvmObjectTracker: JVMObjectTracker): Array[_] = {
     val arrType = readObjectType(dis)
     arrType match {
       case 'i' => readIntArr(dis)
       case 'c' => readStringArr(dis)
       case 'd' => readDoubleArr(dis)
       case 'b' => readBooleanArr(dis)
-      case 'j' => readStringArr(dis).map(x => JVMObjectTracker.getObject(x))
+      case 'j' => readStringArr(dis).map(x => jvmObjectTracker(JVMObjectId(x)))
       case 'r' => readBytesArr(dis)
       case 'a' =>
         val len = readInt(dis)
-        (0 until len).map(_ => readArray(dis)).toArray
+        (0 until len).map(_ => readArray(dis, jvmObjectTracker)).toArray
       case 'l' =>
         val len = readInt(dis)
-        (0 until len).map(_ => readList(dis)).toArray
+        (0 until len).map(_ => readList(dis, jvmObjectTracker)).toArray
       case _ =>
-        if (sqlSerDe == null || sqlSerDe._1 == null) {
+        if (sqlReadObject == null) {
           throw new IllegalArgumentException (s"Invalid array type $arrType")
         } else {
           val len = readInt(dis)
           (0 until len).map { _ =>
-            val obj = (sqlSerDe._1)(dis, arrType)
+            val obj = sqlReadObject(dis, arrType)
             if (obj == null) {
               throw new IllegalArgumentException (s"Invalid array type $arrType")
             } else {
@@ -215,17 +223,19 @@ private[spark] object SerDe {
 
   // Each element of a list can be of different type. They are all represented
   // as Object on JVM side
-  def readList(dis: DataInputStream): Array[Object] = {
+  def readList(dis: DataInputStream, jvmObjectTracker: JVMObjectTracker): Array[Object] = {
     val len = readInt(dis)
-    (0 until len).map(_ => readObject(dis)).toArray
+    (0 until len).map(_ => readObject(dis, jvmObjectTracker)).toArray
   }
 
-  def readMap(in: DataInputStream): java.util.Map[Object, Object] = {
+  def readMap(
+      in: DataInputStream,
+      jvmObjectTracker: JVMObjectTracker): java.util.Map[Object, Object] = {
     val len = readInt(in)
     if (len > 0) {
       // Keys is an array of String
-      val keys = readArray(in).asInstanceOf[Array[Object]]
-      val values = readList(in)
+      val keys = readArray(in, jvmObjectTracker).asInstanceOf[Array[Object]]
+      val values = readList(in, jvmObjectTracker)
 
       keys.zip(values).toMap.asJava
     } else {
@@ -272,7 +282,11 @@ private[spark] object SerDe {
     }
   }
 
-  private def writeKeyValue(dos: DataOutputStream, key: Object, value: Object): Unit = {
+  private def writeKeyValue(
+      dos: DataOutputStream,
+      key: Object,
+      value: Object,
+      jvmObjectTracker: JVMObjectTracker): Unit = {
     if (key == null) {
       throw new IllegalArgumentException("Key in map can't be null.")
     } else if (!key.isInstanceOf[String]) {
@@ -280,10 +294,10 @@ private[spark] object SerDe {
     }
 
     writeString(dos, key.asInstanceOf[String])
-    writeObject(dos, value)
+    writeObject(dos, value, jvmObjectTracker)
   }
 
-  def writeObject(dos: DataOutputStream, obj: Object): Unit = {
+  def writeObject(dos: DataOutputStream, obj: Object, jvmObjectTracker: JVMObjectTracker): Unit = {
     if (obj == null) {
       writeType(dos, "void")
     } else {
@@ -373,14 +387,14 @@ private[spark] object SerDe {
         case v: Array[Object] =>
           writeType(dos, "list")
           writeInt(dos, v.length)
-          v.foreach(elem => writeObject(dos, elem))
+          v.foreach(elem => writeObject(dos, elem, jvmObjectTracker))
 
         // Handle Properties
         // This must be above the case java.util.Map below.
         // (Properties implements Map<Object,Object> and will be serialized as map otherwise)
         case v: java.util.Properties =>
           writeType(dos, "jobj")
-          writeJObj(dos, value)
+          writeJObj(dos, value, jvmObjectTracker)
 
         // Handle map
         case v: java.util.Map[_, _] =>
@@ -392,19 +406,21 @@ private[spark] object SerDe {
             val key = entry.getKey
             val value = entry.getValue
 
-            writeKeyValue(dos, key.asInstanceOf[Object], value.asInstanceOf[Object])
+            writeKeyValue(
+              dos, key.asInstanceOf[Object], value.asInstanceOf[Object], jvmObjectTracker)
           }
         case v: scala.collection.Map[_, _] =>
           writeType(dos, "map")
           writeInt(dos, v.size)
-          v.foreach { case (key, value) =>
-            writeKeyValue(dos, key.asInstanceOf[Object], value.asInstanceOf[Object])
+          v.foreach { case (k1, v1) =>
+            writeKeyValue(dos, k1.asInstanceOf[Object], v1.asInstanceOf[Object], jvmObjectTracker)
           }
 
         case _ =>
-          if (sqlSerDe == null || sqlSerDe._2 == null || !(sqlSerDe._2)(dos, value)) {
+          val sqlWriteSucceeded = sqlWriteObject != null && sqlWriteObject(dos, value)
+          if (!sqlWriteSucceeded) {
             writeType(dos, "jobj")
-            writeJObj(dos, value)
+            writeJObj(dos, value, jvmObjectTracker)
           }
       }
     }
@@ -447,9 +463,9 @@ private[spark] object SerDe {
     out.write(value)
   }
 
-  def writeJObj(out: DataOutputStream, value: Object): Unit = {
-    val objId = JVMObjectTracker.put(value)
-    writeString(out, objId)
+  def writeJObj(out: DataOutputStream, value: Object, jvmObjectTracker: JVMObjectTracker): Unit = {
+    val JVMObjectId(id) = jvmObjectTracker.addAndGetId(value)
+    writeString(out, id)
   }
 
   def writeIntArr(out: DataOutputStream, value: Array[Int]): Unit = {
diff --git a/core/src/test/scala/org/apache/spark/api/r/JVMObjectTrackerSuite.scala b/core/src/test/scala/org/apache/spark/api/r/JVMObjectTrackerSuite.scala
new file mode 100644
index 0000000000000..6a979aefe6e90
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/api/r/JVMObjectTrackerSuite.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api.r
+
+import org.apache.spark.SparkFunSuite
+
+class JVMObjectTrackerSuite extends SparkFunSuite {
+  test("JVMObjectId does not take null IDs") {
+    intercept[IllegalArgumentException] {
+      JVMObjectId(null)
+    }
+  }
+
+  test("JVMObjectTracker") {
+    val tracker = new JVMObjectTracker
+    assert(tracker.size === 0)
+    withClue("an empty tracker can be cleared") {
+      tracker.clear()
+    }
+    val none = JVMObjectId("none")
+    assert(tracker.get(none) === None)
+    intercept[NoSuchElementException] {
+      tracker(JVMObjectId("none"))
+    }
+
+    val obj1 = new Object
+    val id1 = tracker.addAndGetId(obj1)
+    assert(id1 != null)
+    assert(tracker.size === 1)
+    assert(tracker.get(id1).get.eq(obj1))
+    assert(tracker(id1).eq(obj1))
+
+    val obj2 = new Object
+    val id2 = tracker.addAndGetId(obj2)
+    assert(id1 !== id2)
+    assert(tracker.size === 2)
+    assert(tracker(id2).eq(obj2))
+
+    val Some(obj1Removed) = tracker.remove(id1)
+    assert(obj1Removed.eq(obj1))
+    assert(tracker.get(id1) === None)
+    assert(tracker.size === 1)
+    assert(tracker(id2).eq(obj2))
+
+    val obj3 = new Object
+    val id3 = tracker.addAndGetId(obj3)
+    assert(tracker.size === 2)
+    assert(id3 != id1)
+    assert(id3 != id2)
+    assert(tracker(id3).eq(obj3))
+
+    tracker.clear()
+    assert(tracker.size === 0)
+    assert(tracker.get(id1) === None)
+    assert(tracker.get(id2) === None)
+    assert(tracker.get(id3) === None)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/api/r/RBackendSuite.scala b/core/src/test/scala/org/apache/spark/api/r/RBackendSuite.scala
new file mode 100644
index 0000000000000..085cc267ca74d
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/api/r/RBackendSuite.scala
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api.r
+
+import org.apache.spark.SparkFunSuite
+
+class RBackendSuite extends SparkFunSuite {
+  test("close() clears jvmObjectTracker") {
+    val backend = new RBackend
+    val tracker = backend.jvmObjectTracker
+    val id = tracker.addAndGetId(new Object)
+    backend.close()
+    assert(tracker.get(id) === None)
+    assert(tracker.size === 0)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
index 9de6510c634b3..80bbad47f8f17 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.types._
 
 private[sql] object SQLUtils extends Logging {
-  SerDe.registerSqlSerDe((readSqlObject, writeSqlObject))
+  SerDe.setSQLReadObject(readSqlObject).setSQLWriteObject(writeSqlObject)
 
   private[this] def withHiveExternalCatalog(sc: SparkContext): SparkContext = {
     sc.conf.set(CATALOG_IMPLEMENTATION.key, "hive")
@@ -158,7 +158,7 @@ private[sql] object SQLUtils extends Logging {
     val dis = new DataInputStream(bis)
     val num = SerDe.readInt(dis)
     Row.fromSeq((0 until num).map { i =>
-      doConversion(SerDe.readObject(dis), schema.fields(i).dataType)
+      doConversion(SerDe.readObject(dis, jvmObjectTracker = null), schema.fields(i).dataType)
     })
   }
 
@@ -167,7 +167,7 @@ private[sql] object SQLUtils extends Logging {
     val dos = new DataOutputStream(bos)
 
     val cols = (0 until row.length).map(row(_).asInstanceOf[Object]).toArray
-    SerDe.writeObject(dos, cols)
+    SerDe.writeObject(dos, cols, jvmObjectTracker = null)
     bos.toByteArray()
   }
 
@@ -247,7 +247,7 @@ private[sql] object SQLUtils extends Logging {
     dataType match {
       case 's' =>
         // Read StructType for DataFrame
-        val fields = SerDe.readList(dis).asInstanceOf[Array[Object]]
+        val fields = SerDe.readList(dis, jvmObjectTracker = null).asInstanceOf[Array[Object]]
         Row.fromSeq(fields)
       case _ => null
     }
@@ -258,8 +258,8 @@ private[sql] object SQLUtils extends Logging {
       // Handle struct type in DataFrame
       case v: GenericRowWithSchema =>
         dos.writeByte('s')
-        SerDe.writeObject(dos, v.schema.fieldNames)
-        SerDe.writeObject(dos, v.values)
+        SerDe.writeObject(dos, v.schema.fieldNames, jvmObjectTracker = null)
+        SerDe.writeObject(dos, v.values, jvmObjectTracker = null)
         true
       case _ =>
         false

From eb2d9bfd4e100789604ca0810929b42694ea7377 Mon Sep 17 00:00:00 2001
From: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
Date: Fri, 9 Dec 2016 10:12:56 -0800
Subject: [PATCH 0328/1204] [MINOR][SPARKR] Fix SparkR regex in copy command

Fix SparkR package copy regex. The existing code leads to
```
Copying release tarballs to /home/****/public_html/spark-nightly/spark-branch-2.1-bin/spark-2.1.1-SNAPSHOT-2016_12_08_22_38-e8f351f-bin
mput: SparkR-*: no files found
```

Author: Shivaram Venkataraman <shivaram@cs.berkeley.edu>

Closes #16231 from shivaram/typo-sparkr-build.

(cherry picked from commit be5fc6ef72c7eb586b184b0f42ac50ef32843208)
Signed-off-by: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
---
 dev/create-release/release-build.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index c0663b815da9f..b08577c47c673 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -252,7 +252,7 @@ if [[ "$1" == "package" ]]; then
   LFTP mkdir -p $dest_dir
   LFTP mput -O $dest_dir 'spark-*'
   LFTP mput -O $dest_dir 'pyspark-*'
-  LFTP mput -O $dest_dir 'SparkR-*'
+  LFTP mput -O $dest_dir 'SparkR_*'
   # Delete /latest directory and rename new upload to /latest
   LFTP "rm -r -f $REMOTE_PARENT_DIR/latest || exit 0"
   LFTP mv $dest_dir "$REMOTE_PARENT_DIR/latest"
@@ -260,7 +260,7 @@ if [[ "$1" == "package" ]]; then
   LFTP mkdir -p $dest_dir
   LFTP mput -O $dest_dir 'spark-*'
   LFTP mput -O $dest_dir 'pyspark-*'
-  LFTP mput -O $dest_dir 'SparkR-*'
+  LFTP mput -O $dest_dir 'SparkR_*'
   exit 0
 fi
 

From 562507ef038f09ff422e9831416af5119282a9d0 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Fri, 9 Dec 2016 23:13:36 +0100
Subject: [PATCH 0329/1204] [SPARK-18745][SQL] Fix signed integer overflow due
 to toInt cast

## What changes were proposed in this pull request?

This PR avoids that a result of a cast `toInt` is negative due to signed integer overflow (e.g. 0x0000_0000_1???????L.toInt < 0 ). This PR performs casts after we can ensure the value is within range of signed integer (the result of `max(array.length, ???)` is always integer).

## How was this patch tested?

Manually executed query68 of TPC-DS with 100TB

Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com>

Closes #16235 from kiszk/SPARK-18745.

(cherry picked from commit d60ab5fd9b6af9aa5080a2d13b3589d8b79c5c5c)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../apache/spark/sql/execution/joins/HashedRelation.scala | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
index 8821c0dea9ee5..b9f6601ea87fe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
@@ -670,9 +670,9 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap
     var offset: Long = Platform.LONG_ARRAY_OFFSET
     val end = len * 8L + Platform.LONG_ARRAY_OFFSET
     while (offset < end) {
-      val size = Math.min(buffer.length, (end - offset).toInt)
+      val size = Math.min(buffer.length, end - offset)
       Platform.copyMemory(arr, offset, buffer, Platform.BYTE_ARRAY_OFFSET, size)
-      writeBuffer(buffer, 0, size)
+      writeBuffer(buffer, 0, size.toInt)
       offset += size
     }
   }
@@ -710,8 +710,8 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap
     var offset: Long = Platform.LONG_ARRAY_OFFSET
     val end = length * 8L + Platform.LONG_ARRAY_OFFSET
     while (offset < end) {
-      val size = Math.min(buffer.length, (end - offset).toInt)
-      readBuffer(buffer, 0, size)
+      val size = Math.min(buffer.length, end - offset)
+      readBuffer(buffer, 0, size.toInt)
       Platform.copyMemory(buffer, Platform.BYTE_ARRAY_OFFSET, array, offset, size)
       offset += size
     }

From e45345d91e333e0b5f9219e857affeda461863c6 Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Fri, 9 Dec 2016 17:34:52 -0800
Subject: [PATCH 0330/1204] [SPARK-18812][MLLIB] explain "Spark ML"

## What changes were proposed in this pull request?

There has been some confusion around "Spark ML" vs. "MLlib". This PR adds some FAQ-like entries to the MLlib user guide to explain "Spark ML" and reduce the confusion.

I check the [Spark FAQ page](http://spark.apache.org/faq.html), which seems too high-level for the content here. So I added it to the MLlib user guide instead.

cc: mateiz

Author: Xiangrui Meng <meng@databricks.com>

Closes #16241 from mengxr/SPARK-18812.

(cherry picked from commit d2493a203e852adf63dde4e1fc993e8d11efec3d)
Signed-off-by: Xiangrui Meng <meng@databricks.com>
---
 docs/ml-guide.md | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/docs/ml-guide.md b/docs/ml-guide.md
index ddf81be177f3d..971761961b965 100644
--- a/docs/ml-guide.md
+++ b/docs/ml-guide.md
@@ -35,6 +35,18 @@ The primary Machine Learning API for Spark is now the [DataFrame](sql-programmin
 * The DataFrame-based API for MLlib provides a uniform API across ML algorithms and across multiple languages.
 * DataFrames facilitate practical ML Pipelines, particularly feature transformations.  See the [Pipelines guide](ml-pipeline.html) for details.
 
+*What is "Spark ML"?*
+
+* "Spark ML" is not an official name but occasionally used to refer to the MLlib DataFrame-based API.
+  This is majorly due to the `org.apache.spark.ml` Scala package name used by the DataFrame-based API, 
+  and the "Spark ML Pipelines" term we used initially to emphasize the pipeline concept.
+  
+*Is MLlib deprecated?*
+
+* No. MLlib includes both the RDD-based API and the DataFrame-based API.
+  The RDD-based API is now in maintenance mode.
+  But neither API is deprecated, nor MLlib as a whole.
+
 # Dependencies
 
 MLlib uses the linear algebra package [Breeze](http://www.scalanlp.org/), which depends on

From 8bf56cc46b96874565ebd8109f62e69e6c0cf151 Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Fri, 9 Dec 2016 19:06:05 -0800
Subject: [PATCH 0331/1204] [SPARK-18807][SPARKR] Should suppress output print
 for calls to JVM methods with void return values

## What changes were proposed in this pull request?

Several SparkR API calling into JVM methods that have void return values are getting printed out, especially when running in a REPL or IDE.
example:
```
> setLogLevel("WARN")
NULL
```
We should fix this to make the result more clear.

Also found a small change to return value of dropTempView in 2.1 - adding doc and test for it.

## How was this patch tested?

manually - I didn't find a expect_*() method in testthat for this

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #16237 from felixcheung/rinvis.

(cherry picked from commit 3e11d5bfef2f05bd6d42c4d6188eae6d63c963ef)
Signed-off-by: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
---
 R/pkg/R/SQLContext.R                      |  7 ++++---
 R/pkg/R/context.R                         |  6 +++---
 R/pkg/R/sparkR.R                          |  6 +++---
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 14 +++++++-------
 4 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 38d83c6e5c52b..6f48cd66396ea 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -634,7 +634,7 @@ tableNames <- function(x, ...) {
 cacheTable.default <- function(tableName) {
   sparkSession <- getSparkSession()
   catalog <- callJMethod(sparkSession, "catalog")
-  callJMethod(catalog, "cacheTable", tableName)
+  invisible(callJMethod(catalog, "cacheTable", tableName))
 }
 
 cacheTable <- function(x, ...) {
@@ -663,7 +663,7 @@ cacheTable <- function(x, ...) {
 uncacheTable.default <- function(tableName) {
   sparkSession <- getSparkSession()
   catalog <- callJMethod(sparkSession, "catalog")
-  callJMethod(catalog, "uncacheTable", tableName)
+  invisible(callJMethod(catalog, "uncacheTable", tableName))
 }
 
 uncacheTable <- function(x, ...) {
@@ -686,7 +686,7 @@ uncacheTable <- function(x, ...) {
 clearCache.default <- function() {
   sparkSession <- getSparkSession()
   catalog <- callJMethod(sparkSession, "catalog")
-  callJMethod(catalog, "clearCache")
+  invisible(callJMethod(catalog, "clearCache"))
 }
 
 clearCache <- function() {
@@ -730,6 +730,7 @@ dropTempTable <- function(x, ...) {
 #' If the view has been cached before, then it will also be uncached.
 #'
 #' @param viewName the name of the view to be dropped.
+#' @return TRUE if the view is dropped successfully, FALSE otherwise.
 #' @rdname dropTempView
 #' @name dropTempView
 #' @export
diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
index 438d77a388f0e..1138caf98ed8a 100644
--- a/R/pkg/R/context.R
+++ b/R/pkg/R/context.R
@@ -87,8 +87,8 @@ objectFile <- function(sc, path, minPartitions = NULL) {
 #' in the list are split into \code{numSlices} slices and distributed to nodes
 #' in the cluster.
 #'
-#' If size of serialized slices is larger than spark.r.maxAllocationLimit or (200MB), the function 
-#' will write it to disk and send the file name to JVM. Also to make sure each slice is not 
+#' If size of serialized slices is larger than spark.r.maxAllocationLimit or (200MB), the function
+#' will write it to disk and send the file name to JVM. Also to make sure each slice is not
 #' larger than that limit, number of slices may be increased.
 #'
 #' @param sc SparkContext to use
@@ -379,5 +379,5 @@ spark.lapply <- function(list, func) {
 #' @note setLogLevel since 2.0.0
 setLogLevel <- function(level) {
   sc <- getSparkContext()
-  callJMethod(sc, "setLogLevel", level)
+  invisible(callJMethod(sc, "setLogLevel", level))
 }
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index 43bff97553c2f..c57cc8f285613 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -427,7 +427,7 @@ sparkR.session <- function(
 #' @method setJobGroup default
 setJobGroup.default <- function(groupId, description, interruptOnCancel) {
   sc <- getSparkContext()
-  callJMethod(sc, "setJobGroup", groupId, description, interruptOnCancel)
+  invisible(callJMethod(sc, "setJobGroup", groupId, description, interruptOnCancel))
 }
 
 setJobGroup <- function(sc, groupId, description, interruptOnCancel) {
@@ -457,7 +457,7 @@ setJobGroup <- function(sc, groupId, description, interruptOnCancel) {
 #' @method clearJobGroup default
 clearJobGroup.default <- function() {
   sc <- getSparkContext()
-  callJMethod(sc, "clearJobGroup")
+  invisible(callJMethod(sc, "clearJobGroup"))
 }
 
 clearJobGroup <- function(sc) {
@@ -484,7 +484,7 @@ clearJobGroup <- function(sc) {
 #' @method cancelJobGroup default
 cancelJobGroup.default <- function(groupId) {
   sc <- getSparkContext()
-  callJMethod(sc, "cancelJobGroup", groupId)
+  invisible(callJMethod(sc, "cancelJobGroup", groupId))
 }
 
 cancelJobGroup <- function(sc, groupId) {
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index c669c2e2e26ef..e8ccff81222d0 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -576,7 +576,7 @@ test_that("test tableNames and tables", {
   tables <- tables()
   expect_equal(count(tables), 2)
   suppressWarnings(dropTempTable("table1"))
-  dropTempView("table2")
+  expect_true(dropTempView("table2"))
 
   tables <- tables()
   expect_equal(count(tables), 0)
@@ -589,7 +589,7 @@ test_that(
   newdf <- sql("SELECT * FROM table1 where name = 'Michael'")
   expect_is(newdf, "SparkDataFrame")
   expect_equal(count(newdf), 1)
-  dropTempView("table1")
+  expect_true(dropTempView("table1"))
 
   createOrReplaceTempView(df, "dfView")
   sqlCast <- collect(sql("select cast('2' as decimal) as x from dfView limit 1"))
@@ -600,7 +600,7 @@ test_that(
   expect_equal(ncol(sqlCast), 1)
   expect_equal(out[1], "  x")
   expect_equal(out[2], "1 2")
-  dropTempView("dfView")
+  expect_true(dropTempView("dfView"))
 })
 
 test_that("test cache, uncache and clearCache", {
@@ -609,7 +609,7 @@ test_that("test cache, uncache and clearCache", {
   cacheTable("table1")
   uncacheTable("table1")
   clearCache()
-  dropTempView("table1")
+  expect_true(dropTempView("table1"))
 })
 
 test_that("insertInto() on a registered table", {
@@ -630,13 +630,13 @@ test_that("insertInto() on a registered table", {
   insertInto(dfParquet2, "table1")
   expect_equal(count(sql("select * from table1")), 5)
   expect_equal(first(sql("select * from table1 order by age"))$name, "Michael")
-  dropTempView("table1")
+  expect_true(dropTempView("table1"))
 
   createOrReplaceTempView(dfParquet, "table1")
   insertInto(dfParquet2, "table1", overwrite = TRUE)
   expect_equal(count(sql("select * from table1")), 2)
   expect_equal(first(sql("select * from table1 order by age"))$name, "Bob")
-  dropTempView("table1")
+  expect_true(dropTempView("table1"))
 
   unlink(jsonPath2)
   unlink(parquetPath2)
@@ -650,7 +650,7 @@ test_that("tableToDF() returns a new DataFrame", {
   expect_equal(count(tabledf), 3)
   tabledf2 <- tableToDF("table1")
   expect_equal(count(tabledf2), 3)
-  dropTempView("table1")
+  expect_true(dropTempView("table1"))
 })
 
 test_that("toRDD() returns an RRDD", {

From b020ce408507d7fd57f6d357054a2b3530a5b95e Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Fri, 9 Dec 2016 22:49:51 -0800
Subject: [PATCH 0332/1204] [SPARK-18811] StreamSource resolution should happen
 in stream execution thread

## What changes were proposed in this pull request?

When you start a stream, if we are trying to resolve the source of the stream, for example if we need to resolve partition columns, this could take a long time. This long execution time should not block the main thread where `query.start()` was called on. It should happen in the stream execution thread possibly before starting any triggers.

## How was this patch tested?

Unit test added. Made sure test fails with no code changes.

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #16238 from brkyvz/SPARK-18811.

(cherry picked from commit 63c9159870ee274c68e24360594ca01d476b9ace)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../execution/streaming/StreamExecution.scala | 24 ++++++-
 .../sql/streaming/StreamingQueryManager.scala | 14 +---
 .../StreamingQueryManagerSuite.scala          | 28 ++++++++
 .../sql/streaming/util/DefaultSource.scala    | 66 +++++++++++++++++++
 4 files changed, 116 insertions(+), 16 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/streaming/util/DefaultSource.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 39be222d05d0f..b52810da88c3e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -47,7 +47,7 @@ class StreamExecution(
     override val sparkSession: SparkSession,
     override val name: String,
     checkpointRoot: String,
-    val logicalPlan: LogicalPlan,
+    analyzedPlan: LogicalPlan,
     val sink: Sink,
     val trigger: Trigger,
     val triggerClock: Clock,
@@ -115,12 +115,26 @@ class StreamExecution(
   private val prettyIdString =
     Option(name).map(_ + " ").getOrElse("") + s"[id = $id, runId = $runId]"
 
+  override lazy val logicalPlan: LogicalPlan = {
+    var nextSourceId = 0L
+    analyzedPlan.transform {
+      case StreamingRelation(dataSource, _, output) =>
+        // Materialize source to avoid creating it in every batch
+        val metadataPath = s"$checkpointRoot/sources/$nextSourceId"
+        val source = dataSource.createSource(metadataPath)
+        nextSourceId += 1
+        // We still need to use the previous `output` instead of `source.schema` as attributes in
+        // "df.logicalPlan" has already used attributes of the previous `output`.
+        StreamingExecutionRelation(source, output)
+    }
+  }
+
   /** All stream sources present in the query plan. */
-  protected val sources =
+  protected lazy val sources =
     logicalPlan.collect { case s: StreamingExecutionRelation => s.source }
 
   /** A list of unique sources in the query plan. */
-  private val uniqueSources = sources.distinct
+  private lazy val uniqueSources = sources.distinct
 
   private val triggerExecutor = trigger match {
     case t: ProcessingTime => ProcessingTimeExecutor(t, triggerClock)
@@ -214,6 +228,10 @@ class StreamExecution(
       // While active, repeatedly attempt to run batches.
       SparkSession.setActiveSession(sparkSession)
 
+      updateStatusMessage("Initializing sources")
+      // force initialization of the logical plan so that the sources can be created
+      logicalPlan
+
       triggerExecutor.execute(() => {
         startTrigger()
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index c6ab41655f5ef..52d079192dae4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -251,23 +251,11 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) {
         UnsupportedOperationChecker.checkForStreaming(analyzedPlan, outputMode)
       }
 
-      var nextSourceId = 0L
-
-      val logicalPlan = analyzedPlan.transform {
-        case StreamingRelation(dataSource, _, output) =>
-          // Materialize source to avoid creating it in every batch
-          val metadataPath = s"$checkpointLocation/sources/$nextSourceId"
-          val source = dataSource.createSource(metadataPath)
-          nextSourceId += 1
-          // We still need to use the previous `output` instead of `source.schema` as attributes in
-          // "df.logicalPlan" has already used attributes of the previous `output`.
-          StreamingExecutionRelation(source, output)
-      }
       val query = new StreamExecution(
         sparkSession,
         name,
         checkpointLocation,
-        logicalPlan,
+        analyzedPlan,
         sink,
         trigger,
         triggerClock,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
index 268b8ff7b41a5..d188319fe38dd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.streaming
 
+import java.util.concurrent.CountDownLatch
+
 import scala.concurrent.Future
 import scala.util.Random
 import scala.util.control.NonFatal
@@ -213,6 +215,28 @@ class StreamingQueryManagerSuite extends StreamTest with BeforeAndAfter {
     }
   }
 
+  test("SPARK-18811: Source resolution should not block main thread") {
+    failAfter(streamingTimeout) {
+      StreamingQueryManagerSuite.latch = new CountDownLatch(1)
+      withTempDir { tempDir =>
+        // if source resolution was happening on the main thread, it would block the start call,
+        // now it should only be blocking the stream execution thread
+        val sq = spark.readStream
+          .format("org.apache.spark.sql.streaming.util.BlockingSource")
+          .load()
+          .writeStream
+          .format("org.apache.spark.sql.streaming.util.BlockingSource")
+          .option("checkpointLocation", tempDir.toString)
+          .start()
+        eventually(Timeout(streamingTimeout)) {
+          assert(sq.status.message.contains("Initializing sources"))
+        }
+        StreamingQueryManagerSuite.latch.countDown()
+        sq.stop()
+      }
+    }
+  }
+
 
   /** Run a body of code by defining a query on each dataset */
   private def withQueriesOn(datasets: Dataset[_]*)(body: Seq[StreamingQuery] => Unit): Unit = {
@@ -297,3 +321,7 @@ class StreamingQueryManagerSuite extends StreamTest with BeforeAndAfter {
     (inputData, mapped)
   }
 }
+
+object StreamingQueryManagerSuite {
+  var latch: CountDownLatch = null
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/DefaultSource.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/DefaultSource.scala
new file mode 100644
index 0000000000000..b0adf76814b18
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/DefaultSource.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming.util
+
+import org.apache.spark.sql.{SQLContext, _}
+import org.apache.spark.sql.execution.streaming.{LongOffset, Offset, Sink, Source}
+import org.apache.spark.sql.sources.{StreamSinkProvider, StreamSourceProvider}
+import org.apache.spark.sql.streaming.{OutputMode, StreamingQueryManagerSuite}
+import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
+
+/** Dummy provider: returns a SourceProvider with a blocking `createSource` call. */
+class BlockingSource extends StreamSourceProvider with StreamSinkProvider {
+
+  private val fakeSchema = StructType(StructField("a", IntegerType) :: Nil)
+
+  override def sourceSchema(
+      spark: SQLContext,
+      schema: Option[StructType],
+      providerName: String,
+      parameters: Map[String, String]): (String, StructType) = {
+    ("dummySource", fakeSchema)
+  }
+
+  override def createSource(
+      spark: SQLContext,
+      metadataPath: String,
+      schema: Option[StructType],
+      providerName: String,
+      parameters: Map[String, String]): Source = {
+    StreamingQueryManagerSuite.latch.await()
+    new Source {
+      override def schema: StructType = fakeSchema
+      override def getOffset: Option[Offset] = Some(new LongOffset(0))
+      override def getBatch(start: Option[Offset], end: Offset): DataFrame = {
+        import spark.implicits._
+        Seq[Int]().toDS().toDF()
+      }
+      override def stop() {}
+    }
+  }
+
+  override def createSink(
+      spark: SQLContext,
+      parameters: Map[String, String],
+      partitionColumns: Seq[String],
+      outputMode: OutputMode): Sink = {
+    new Sink {
+      override def addBatch(batchId: Long, data: DataFrame): Unit = {}
+    }
+  }
+}

From 2b36f4943051fafea0b12b662b4f4dab54806d26 Mon Sep 17 00:00:00 2001
From: Huaxin Gao <huaxing@us.ibm.com>
Date: Sat, 10 Dec 2016 22:41:40 +0800
Subject: [PATCH 0333/1204] [SPARK-17460][SQL] Make sure sizeInBytes in
 Statistics will not overflow

## What changes were proposed in this pull request?

1. In SparkStrategies.canBroadcast, I will add the check   plan.statistics.sizeInBytes >= 0
2. In LocalRelations.statistics, when calculate the statistics, I will change the size to BigInt so it won't overflow.

## How was this patch tested?

I will add a test case to make sure the statistics.sizeInBytes won't overflow.

Author: Huaxin Gao <huaxing@us.ibm.com>

Closes #16175 from huaxingao/spark-17460.

(cherry picked from commit c5172568b59b4cf1d3dc7ed8c17a9bea2ea2ab79)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/plans/logical/LocalRelation.scala     |  3 ++-
 .../apache/spark/sql/execution/SparkStrategies.scala   |  3 ++-
 .../scala/org/apache/spark/sql/internal/SQLConf.scala  |  4 ++--
 .../test/scala/org/apache/spark/sql/DatasetSuite.scala | 10 ++++++++++
 4 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala
index 890865d177845..91633f5124a2f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala
@@ -75,7 +75,8 @@ case class LocalRelation(output: Seq[Attribute], data: Seq[InternalRow] = Nil)
   }
 
   override lazy val statistics =
-    Statistics(sizeInBytes = output.map(_.dataType.defaultSize).sum * data.length)
+    Statistics(sizeInBytes =
+      (output.map(n => BigInt(n.dataType.defaultSize))).sum * data.length)
 
   def toSQL(inlineTableName: String): String = {
     require(data.nonEmpty)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index d88cbdfbcfa0e..b0bbcfc934cee 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -115,7 +115,8 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
      */
     private def canBroadcast(plan: LogicalPlan): Boolean = {
       plan.statistics.isBroadcastable ||
-        plan.statistics.sizeInBytes <= conf.autoBroadcastJoinThreshold
+        (plan.statistics.sizeInBytes >= 0 &&
+          plan.statistics.sizeInBytes <= conf.autoBroadcastJoinThreshold)
     }
 
     /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 809b267b884b6..24c3d0b5507b7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -136,7 +136,7 @@ object SQLConf {
       "That is to say by default the optimizer will not choose to broadcast a table unless it " +
       "knows for sure its size is small enough.")
     .longConf
-    .createWithDefault(-1)
+    .createWithDefault(Long.MaxValue)
 
   val SHUFFLE_PARTITIONS = SQLConfigBuilder("spark.sql.shuffle.partitions")
     .doc("The default number of partitions to use when shuffling data for joins or aggregations.")
@@ -738,7 +738,7 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging {
 
   def enableRadixSort: Boolean = getConf(RADIX_SORT_ENABLED)
 
-  def defaultSizeInBytes: Long = getConf(DEFAULT_SIZE_IN_BYTES, Long.MaxValue)
+  def defaultSizeInBytes: Long = getConf(DEFAULT_SIZE_IN_BYTES)
 
   def isParquetSchemaMergingEnabled: Boolean = getConf(PARQUET_SCHEMA_MERGING_ENABLED)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 1174d7354f931..cb64aab6acad9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -1060,6 +1060,16 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
     }
     assert(e.getMessage.contains("Cannot create encoder for Option of Product type"))
   }
+
+  test ("SPARK-17460: the sizeInBytes in Statistics shouldn't overflow to a negative number") {
+    // Since the sizeInBytes in Statistics could exceed the limit of an Int, we should use BigInt
+    // instead of Int for avoiding possible overflow.
+    val ds = (0 to 10000).map( i =>
+      (i, Seq((i, Seq((i, "This is really not that long of a string")))))).toDS()
+    val sizeInBytes = ds.logicalPlan.statistics.sizeInBytes
+    // sizeInBytes is 2404280404, before the fix, it overflows to a negative number
+    assert(sizeInBytes > 0)
+  }
 }
 
 case class Generic[T](id: T, value: Double)

From 83822df02fcd541068dd9cd462293f3cddfb6631 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Sat, 10 Dec 2016 16:40:10 +0000
Subject: [PATCH 0334/1204] [MINOR][DOCS] Remove Apache Spark Wiki address

## What changes were proposed in this pull request?

According to the notice of the following Wiki front page, we can remove the obsolete wiki pointer safely in `README.md` and `docs/index.md`, too. These two lines are the last occurrence of that links.

```
All current wiki content has been merged into pages at http://spark.apache.org as of November 2016.
Each page links to the new location of its information on the Spark web site.
Obsolete wiki content is still hosted here, but carries a notice that it is no longer current.
```

## How was this patch tested?

Manual.

- `README.md`: https://github.com/dongjoon-hyun/spark/tree/remove_wiki_from_readme
- `docs/index.md`:
```
cd docs
SKIP_API=1 jekyll build
```
![screen shot 2016-12-09 at 2 53 29 pm](https://cloud.githubusercontent.com/assets/9700541/21067323/517252e2-be1f-11e6-85b1-2a4471131c5d.png)

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #16239 from dongjoon-hyun/remove_wiki_from_readme.

(cherry picked from commit f3a3fed76cb74ecd0f46031f337576ce60f54fb2)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 README.md     | 3 +--
 docs/index.md | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 853f7f5ded3cb..f5983239c043f 100644
--- a/README.md
+++ b/README.md
@@ -13,8 +13,7 @@ and Spark Streaming for stream processing.
 ## Online Documentation
 
 You can find the latest Spark documentation, including a programming
-guide, on the [project web page](http://spark.apache.org/documentation.html)
-and [project wiki](https://cwiki.apache.org/confluence/display/SPARK).
+guide, on the [project web page](http://spark.apache.org/documentation.html).
 This README file only contains basic setup instructions.
 
 ## Building Spark
diff --git a/docs/index.md b/docs/index.md
index c5d34cb5c4e73..57b9fa848f4a3 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -131,7 +131,6 @@ options for deployment:
 **External Resources:**
 
 * [Spark Homepage](http://spark.apache.org)
-* [Spark Wiki](https://cwiki.apache.org/confluence/display/SPARK)
 * [Spark Community](http://spark.apache.org/community.html) resources, including local meetups
 * [StackOverflow tag `apache-spark`](http://stackoverflow.com/questions/tagged/apache-spark)
 * [Mailing Lists](http://spark.apache.org/mailing-lists.html): ask questions about Spark here

From 5151dafaaa6533ea88f7173c136e004ad87abd04 Mon Sep 17 00:00:00 2001
From: Michal Senkyr <mike.senkyr@gmail.com>
Date: Sat, 10 Dec 2016 19:54:07 +0000
Subject: [PATCH 0335/1204] [SPARK-3359][DOCS] Fix greater-than symbols in
 Javadoc to allow building with Java 8

## What changes were proposed in this pull request?

The API documentation build was failing when using Java 8 due to incorrect character `>` in Javadoc.

Replace `>` with literals in Javadoc to allow the build to pass.

## How was this patch tested?

Documentation was built and inspected manually to ensure it still displays correctly in the browser

```
cd docs && jekyll serve
```

Author: Michal Senkyr <mike.senkyr@gmail.com>

Closes #16201 from michalsenkyr/javadoc8-gt-fix.

(cherry picked from commit 114324832abce1fbb2c5f5b84a66d39dd2d4398a)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../apache/spark/ml/classification/DecisionTreeClassifier.scala | 2 +-
 .../org/apache/spark/ml/classification/GBTClassifier.scala      | 2 +-
 .../apache/spark/ml/classification/RandomForestClassifier.scala | 2 +-
 .../org/apache/spark/ml/regression/DecisionTreeRegressor.scala  | 2 +-
 .../scala/org/apache/spark/ml/regression/GBTRegressor.scala     | 2 +-
 .../org/apache/spark/ml/regression/RandomForestRegressor.scala  | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
index 7e0bc19a7aeb4..9f60f0896ec52 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
@@ -81,7 +81,7 @@ class DecisionTreeClassifier @Since("1.4.0") (
    * E.g. 10 means that the cache will get checkpointed every 10 iterations.
    * This is only used if cacheNodeIds is true and if the checkpoint directory is set in
    * [[org.apache.spark.SparkContext]].
-   * Must be >= 1.
+   * Must be at least 1.
    * (default = 10)
    * @group setParam
    */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
index c5fc3c8772908..c99b63b25d2e7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
@@ -98,7 +98,7 @@ class GBTClassifier @Since("1.4.0") (
    * E.g. 10 means that the cache will get checkpointed every 10 iterations.
    * This is only used if cacheNodeIds is true and if the checkpoint directory is set in
    * [[org.apache.spark.SparkContext]].
-   * Must be >= 1.
+   * Must be at least 1.
    * (default = 10)
    * @group setParam
    */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
index 34c055dce6511..5bbaafeff329f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
@@ -83,7 +83,7 @@ class RandomForestClassifier @Since("1.4.0") (
    * E.g. 10 means that the cache will get checkpointed every 10 iterations.
    * This is only used if cacheNodeIds is true and if the checkpoint directory is set in
    * [[org.apache.spark.SparkContext]].
-   * Must be >= 1.
+   * Must be at least 1.
    * (default = 10)
    * @group setParam
    */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
index 0cdfa7b0b742a..01c5cc1c7efa9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
@@ -80,7 +80,7 @@ class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
    * E.g. 10 means that the cache will get checkpointed every 10 iterations.
    * This is only used if cacheNodeIds is true and if the checkpoint directory is set in
    * [[org.apache.spark.SparkContext]].
-   * Must be >= 1.
+   * Must be at least 1.
    * (default = 10)
    * @group setParam
    */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
index 49a3f8b6b5152..f8ab3d3a45a49 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
@@ -95,7 +95,7 @@ class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
    * E.g. 10 means that the cache will get checkpointed every 10 iterations.
    * This is only used if cacheNodeIds is true and if the checkpoint directory is set in
    * [[org.apache.spark.SparkContext]].
-   * Must be >= 1.
+   * Must be at least 1.
    * (default = 10)
    * @group setParam
    */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
index 67fb648625550..ca4a50b825dde 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
@@ -82,7 +82,7 @@ class RandomForestRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
    * E.g. 10 means that the cache will get checkpointed every 10 iterations.
    * This is only used if cacheNodeIds is true and if the checkpoint directory is set in
    * [[org.apache.spark.SparkContext]].
-   * Must be >= 1.
+   * Must be at least 1.
    * (default = 10)
    * @group setParam
    */

From de21ca46e5d992dd950b6dcec71d7aee0cf6532e Mon Sep 17 00:00:00 2001
From: wangzhenhua <wangzhenhua@huawei.com>
Date: Sat, 10 Dec 2016 21:25:29 -0800
Subject: [PATCH 0336/1204] [SPARK-18815][SQL] Fix NPE when collecting column
 stats for string/binary column having only null values

## What changes were proposed in this pull request?

During column stats collection, average and max length will be null if a column of string/binary type has only null values. To fix this, I use default size when avg/max length is null.

## How was this patch tested?

Add a test for handling null columns

Author: wangzhenhua <wangzhenhua@huawei.com>

Closes #16243 from wzhfy/nullStats.

(cherry picked from commit a29ee55aaadfe43ac9abb0eaf8b022b1e6d7babb)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../catalyst/plans/logical/Statistics.scala   |  9 ++-
 .../spark/sql/StatisticsCollectionSuite.scala | 67 +++++++++++++------
 2 files changed, 53 insertions(+), 23 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
index 79865609cb647..465fbab5716ac 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
@@ -194,11 +194,12 @@ object ColumnStat extends Logging {
     val numNonNulls = if (col.nullable) Count(col) else Count(one)
     val ndv = Least(Seq(HyperLogLogPlusPlus(col, relativeSD), numNonNulls))
     val numNulls = Subtract(Count(one), numNonNulls)
+    val defaultSize = Literal(col.dataType.defaultSize, LongType)
 
     def fixedLenTypeStruct(castType: DataType) = {
       // For fixed width types, avg size should be the same as max size.
-      val avgSize = Literal(col.dataType.defaultSize, LongType)
-      struct(ndv, Cast(Min(col), castType), Cast(Max(col), castType), numNulls, avgSize, avgSize)
+      struct(ndv, Cast(Min(col), castType), Cast(Max(col), castType), numNulls, defaultSize,
+        defaultSize)
     }
 
     col.dataType match {
@@ -213,7 +214,9 @@ object ColumnStat extends Logging {
         val nullLit = Literal(null, col.dataType)
         struct(
           ndv, nullLit, nullLit, numNulls,
-          Ceil(Average(Length(col))), Cast(Max(Length(col)), LongType))
+          // Set avg/max size to default size if all the values are null or there is no value.
+          Coalesce(Seq(Ceil(Average(Length(col))), defaultSize)),
+          Coalesce(Seq(Cast(Max(Length(col)), LongType), defaultSize)))
       case _ =>
         throw new AnalysisException("Analyzing column statistics is not supported for column " +
             s"${col.name} of data type: ${col.dataType}.")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index 1fcccd061079e..07408491953ca 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -21,6 +21,7 @@ import java.{lang => jl}
 import java.sql.{Date, Timestamp}
 
 import scala.collection.mutable
+import scala.util.Random
 
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -133,6 +134,40 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
     }
   }
 
+  test("column stats round trip serialization") {
+    // Make sure we serialize and then deserialize and we will get the result data
+    val df = data.toDF(stats.keys.toSeq :+ "carray" : _*)
+    stats.zip(df.schema).foreach { case ((k, v), field) =>
+      withClue(s"column $k with type ${field.dataType}") {
+        val roundtrip = ColumnStat.fromMap("table_is_foo", field, v.toMap)
+        assert(roundtrip == Some(v))
+      }
+    }
+  }
+
+  test("analyze column command - result verification") {
+    // (data.head.productArity - 1) because the last column does not support stats collection.
+    assert(stats.size == data.head.productArity - 1)
+    val df = data.toDF(stats.keys.toSeq :+ "carray" : _*)
+    checkColStats(df, stats)
+  }
+
+  test("column stats collection for null columns") {
+    val dataTypes: Seq[(DataType, Int)] = Seq(
+      BooleanType, ByteType, ShortType, IntegerType, LongType,
+      DoubleType, FloatType, DecimalType.SYSTEM_DEFAULT,
+      StringType, BinaryType, DateType, TimestampType
+    ).zipWithIndex
+
+    val df = sql("select " + dataTypes.map { case (tpe, idx) =>
+      s"cast(null as ${tpe.sql}) as col$idx"
+    }.mkString(", "))
+
+    val expectedColStats = dataTypes.map { case (tpe, idx) =>
+      (s"col$idx", ColumnStat(0, None, None, 1, tpe.defaultSize.toLong, tpe.defaultSize.toLong))
+    }
+    checkColStats(df, mutable.LinkedHashMap(expectedColStats: _*))
+  }
 }
 
 
@@ -141,7 +176,6 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
  * when using the Hive external catalog) as well as in the sql/core module.
  */
 abstract class StatisticsCollectionTestBase extends QueryTest with SQLTestUtils {
-  import testImplicits._
 
   private val dec1 = new java.math.BigDecimal("1.000000000000000000")
   private val dec2 = new java.math.BigDecimal("8.000000000000000000")
@@ -180,35 +214,28 @@ abstract class StatisticsCollectionTestBase extends QueryTest with SQLTestUtils
     "ctimestamp" -> ColumnStat(2, Some(t1), Some(t2), 1, 8, 8)
   )
 
-  test("column stats round trip serialization") {
-    // Make sure we serialize and then deserialize and we will get the result data
-    val df = data.toDF(stats.keys.toSeq :+ "carray" : _*)
-    stats.zip(df.schema).foreach { case ((k, v), field) =>
-      withClue(s"column $k with type ${field.dataType}") {
-        val roundtrip = ColumnStat.fromMap("table_is_foo", field, v.toMap)
-        assert(roundtrip == Some(v))
-      }
-    }
-  }
-
-  test("analyze column command - result verification") {
-    val tableName = "column_stats_test2"
-    // (data.head.productArity - 1) because the last column does not support stats collection.
-    assert(stats.size == data.head.productArity - 1)
-    val df = data.toDF(stats.keys.toSeq :+ "carray" : _*)
+  private val randomName = new Random(31)
 
+  /**
+   * Compute column stats for the given DataFrame and compare it with colStats.
+   */
+  def checkColStats(
+      df: DataFrame,
+      colStats: mutable.LinkedHashMap[String, ColumnStat]): Unit = {
+    val tableName = "column_stats_test_" + randomName.nextInt(1000)
     withTable(tableName) {
       df.write.saveAsTable(tableName)
 
       // Collect statistics
-      sql(s"analyze table $tableName compute STATISTICS FOR COLUMNS " + stats.keys.mkString(", "))
+      sql(s"analyze table $tableName compute STATISTICS FOR COLUMNS " +
+        colStats.keys.mkString(", "))
 
       // Validate statistics
       val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName))
       assert(table.stats.isDefined)
-      assert(table.stats.get.colStats.size == stats.size)
+      assert(table.stats.get.colStats.size == colStats.size)
 
-      stats.foreach { case (k, v) =>
+      colStats.foreach { case (k, v) =>
         withClue(s"column $k") {
           assert(table.stats.get.colStats(k) == v)
         }

From d4c03f8769f063b0dfac7d000513a2bc20989549 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Sun, 11 Dec 2016 09:12:46 +0000
Subject: [PATCH 0337/1204] [SQL][MINOR] simplify a test to fix the maven tests

## What changes were proposed in this pull request?

After https://github.com/apache/spark/pull/15620 , all of the Maven-based 2.0 Jenkins jobs time out consistently. As I pointed out in https://github.com/apache/spark/pull/15620#discussion_r91829129 , it seems that the regression test is an overkill and may hit constants pool size limitation, which is a known issue and hasn't been fixed yet.

Since #15620 only fix the code size limitation problem, we can simplify the test to avoid hitting constants pool size limitation.

## How was this patch tested?

test only change

Author: Wenchen Fan <wenchen@databricks.com>

Closes #16244 from cloud-fan/minor.

(cherry picked from commit 9abd05b6b94eda31c47bce1f913af988c35f1cb1)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../expressions/CodeGenerationSuite.scala     | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
index 0f4b4b5bc8dd6..ee5d1f637374e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
@@ -98,20 +98,15 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("SPARK-18091: split large if expressions into blocks due to JVM code size limit") {
-    val inStr = "StringForTesting"
-    val row = create_row(inStr)
-    val inputStrAttr = 'a.string.at(0)
-
-    var strExpr: Expression = inputStrAttr
-    for (_ <- 1 to 13) {
-      strExpr = If(EqualTo(Decode(Encode(strExpr, "utf-8"), "utf-8"), inputStrAttr),
-        strExpr, strExpr)
+    var strExpr: Expression = Literal("abc")
+    for (_ <- 1 to 150) {
+      strExpr = Decode(Encode(strExpr, "utf-8"), "utf-8")
     }
 
-    val expressions = Seq(strExpr)
-    val plan = GenerateUnsafeProjection.generate(expressions, true)
-    val actual = plan(row).toSeq(expressions.map(_.dataType))
-    val expected = Seq(UTF8String.fromString(inStr))
+    val expressions = Seq(If(EqualTo(strExpr, strExpr), strExpr, strExpr))
+    val plan = GenerateMutableProjection.generate(expressions)
+    val actual = plan(null).toSeq(expressions.map(_.dataType))
+    val expected = Seq(UTF8String.fromString("abc"))
 
     if (!checkResult(actual, expected)) {
       fail(s"Incorrect Evaluation: expressions: $expressions, actual: $actual, expected: $expected")

From d5f14168d39433a02d065206c3910595339ff3dc Mon Sep 17 00:00:00 2001
From: krishnakalyan3 <krishnakalyan3@gmail.com>
Date: Sun, 11 Dec 2016 09:28:16 +0000
Subject: [PATCH 0338/1204] [SPARK-18628][ML] Update Scala param and Python
 param to have quotes

## What changes were proposed in this pull request?

Updated Scala param and Python param to have quotes around the options making it easier for users to read.

## How was this patch tested?

Manually checked the docstrings

Author: krishnakalyan3 <krishnakalyan3@gmail.com>

Closes #16242 from krishnakalyan3/doc-string.

(cherry picked from commit c802ad87182520662be51eb611ea1c64f4874c4e)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../main/scala/org/apache/spark/ml/feature/Bucketizer.scala | 6 +++---
 .../org/apache/spark/ml/feature/QuantileDiscretizer.scala   | 6 +++---
 python/pyspark/ml/feature.py                                | 4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
index eb4d42f255345..d1f3b2af1e482 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
@@ -78,9 +78,9 @@ final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String
   def setOutputCol(value: String): this.type = set(outputCol, value)
 
   /**
-   * Param for how to handle invalid entries. Options are skip (filter out rows with
-   * invalid values), error (throw an error), or keep (keep invalid values in a special additional
-   * bucket).
+   * Param for how to handle invalid entries. Options are 'skip' (filter out rows with
+   * invalid values), 'error' (throw an error), or 'keep' (keep invalid values in a special
+   * additional bucket).
    * Default: "error"
    * @group param
    */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
index b4fcfa2da47de..80c7f55e26b84 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
@@ -66,9 +66,9 @@ private[feature] trait QuantileDiscretizerBase extends Params
   def getRelativeError: Double = getOrDefault(relativeError)
 
   /**
-   * Param for how to handle invalid entries. Options are skip (filter out rows with
-   * invalid values), error (throw an error), or keep (keep invalid values in a special additional
-   * bucket).
+   * Param for how to handle invalid entries. Options are 'skip' (filter out rows with
+   * invalid values), 'error' (throw an error), or 'keep' (keep invalid values in a special
+   * additional bucket).
    * Default: "error"
    * @group param
    */
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 1d62b325344e5..62c31431b58ff 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -165,8 +165,8 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav
               typeConverter=TypeConverters.toListFloat)
 
     handleInvalid = Param(Params._dummy(), "handleInvalid", "how to handle invalid entries. " +
-                          "Options are skip (filter out rows with invalid values), " +
-                          "error (throw an error), or keep (keep invalid values in a special " +
+                          "Options are 'skip' (filter out rows with invalid values), " +
+                          "'error' (throw an error), or 'keep' (keep invalid values in a special " +
                           "additional bucket).",
                           typeConverter=TypeConverters.toString)
 

From 63693c17e4407ec61052553d563218787c6f0dd6 Mon Sep 17 00:00:00 2001
From: Tyson Condie <tcondie@gmail.com>
Date: Sun, 11 Dec 2016 23:38:31 -0800
Subject: [PATCH 0339/1204] [SPARK-18790][SS] Keep a general offset history of
 stream batches

## What changes were proposed in this pull request?

Instead of only keeping the minimum number of offsets around, we should keep enough information to allow us to roll back n batches and reexecute the stream starting from a given point. In particular, we should create a config in SQLConf, spark.sql.streaming.retainedBatches that defaults to 100 and ensure that we keep enough log files in the following places to roll back the specified number of batches:
the offsets that are present in each batch
versions of the state store
the files lists stored for the FileStreamSource
the metadata log stored by the FileStreamSink

marmbrus zsxwing

## How was this patch tested?

The following tests were added.

### StreamExecution offset metadata
Test added to StreamingQuerySuite that ensures offset metadata is garbage collected according to minBatchesRetain

### CompactibleFileStreamLog
Tests added in CompactibleFileStreamLogSuite to ensure that logs are purged starting before the first compaction file that proceeds the current batch id - minBatchesToRetain.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Tyson Condie <tcondie@gmail.com>

Closes #16219 from tcondie/offset_hist.

(cherry picked from commit 83a42897ae90d84a54373db386a985e3e2d5903a)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../streaming/CompactibleFileStreamLog.scala  | 69 ++++++++++++-------
 .../execution/streaming/StreamExecution.scala | 10 ++-
 .../state/HDFSBackedStateStoreProvider.scala  |  1 -
 .../streaming/state/StateStoreConf.scala      |  4 +-
 .../apache/spark/sql/internal/SQLConf.scala   | 17 +++--
 .../CompactibleFileStreamLogSuite.scala       | 16 ++++-
 .../streaming/FileStreamSinkLogSuite.scala    | 48 +++++++++++--
 .../streaming/state/StateStoreSuite.scala     |  5 +-
 .../sql/streaming/StreamingQuerySuite.scala   | 64 ++++++++++++-----
 9 files changed, 170 insertions(+), 64 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala
index 8529ceac30f1e..5a6f9e87f6eaa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala
@@ -52,6 +52,8 @@ abstract class CompactibleFileStreamLog[T <: AnyRef : ClassTag](
   /** Needed to serialize type T into JSON when using Jackson */
   private implicit val manifest = Manifest.classType[T](implicitly[ClassTag[T]].runtimeClass)
 
+  protected val minBatchesToRetain = sparkSession.sessionState.conf.minBatchesToRetain
+
   /**
    * If we delete the old files after compaction at once, there is a race condition in S3: other
    * processes may see the old files are deleted but still cannot see the compaction file using
@@ -152,11 +154,16 @@ abstract class CompactibleFileStreamLog[T <: AnyRef : ClassTag](
   }
 
   override def add(batchId: Long, logs: Array[T]): Boolean = {
-    if (isCompactionBatch(batchId, compactInterval)) {
-      compact(batchId, logs)
-    } else {
-      super.add(batchId, logs)
+    val batchAdded =
+      if (isCompactionBatch(batchId, compactInterval)) {
+        compact(batchId, logs)
+      } else {
+        super.add(batchId, logs)
+      }
+    if (batchAdded && isDeletingExpiredLog) {
+      deleteExpiredLog(batchId)
     }
+    batchAdded
   }
 
   /**
@@ -167,9 +174,6 @@ abstract class CompactibleFileStreamLog[T <: AnyRef : ClassTag](
     val validBatches = getValidBatchesBeforeCompactionBatch(batchId, compactInterval)
     val allLogs = validBatches.flatMap(batchId => super.get(batchId)).flatten ++ logs
     if (super.add(batchId, compactLogs(allLogs).toArray)) {
-      if (isDeletingExpiredLog) {
-        deleteExpiredLog(batchId)
-      }
       true
     } else {
       // Return false as there is another writer.
@@ -210,26 +214,41 @@ abstract class CompactibleFileStreamLog[T <: AnyRef : ClassTag](
   }
 
   /**
-   * Since all logs before `compactionBatchId` are compacted and written into the
-   * `compactionBatchId` log file, they can be removed. However, due to the eventual consistency of
-   * S3, the compaction file may not be seen by other processes at once. So we only delete files
-   * created `fileCleanupDelayMs` milliseconds ago.
+   * Delete expired log entries that proceed the currentBatchId and retain
+   * sufficient minimum number of batches (given by minBatchsToRetain). This
+   * equates to retaining the earliest compaction log that proceeds
+   * batch id position currentBatchId + 1 - minBatchesToRetain. All log entries
+   * prior to the earliest compaction log proceeding that position will be removed.
+   * However, due to the eventual consistency of S3, the compaction file may not
+   * be seen by other processes at once. So we only delete files created
+   * `fileCleanupDelayMs` milliseconds ago.
    */
-  private def deleteExpiredLog(compactionBatchId: Long): Unit = {
-    val expiredTime = System.currentTimeMillis() - fileCleanupDelayMs
-    fileManager.list(metadataPath, new PathFilter {
-      override def accept(path: Path): Boolean = {
-        try {
-          val batchId = getBatchIdFromFileName(path.getName)
-          batchId < compactionBatchId
-        } catch {
-          case _: NumberFormatException =>
-            false
+  private def deleteExpiredLog(currentBatchId: Long): Unit = {
+    if (compactInterval <= currentBatchId + 1 - minBatchesToRetain) {
+      // Find the first compaction batch id that maintains minBatchesToRetain
+      val minBatchId = currentBatchId + 1 - minBatchesToRetain
+      val minCompactionBatchId = minBatchId - (minBatchId % compactInterval) - 1
+      assert(isCompactionBatch(minCompactionBatchId, compactInterval),
+        s"$minCompactionBatchId is not a compaction batch")
+
+      logInfo(s"Current compact batch id = $currentBatchId " +
+        s"min compaction batch id to delete = $minCompactionBatchId")
+
+      val expiredTime = System.currentTimeMillis() - fileCleanupDelayMs
+      fileManager.list(metadataPath, new PathFilter {
+        override def accept(path: Path): Boolean = {
+          try {
+            val batchId = getBatchIdFromFileName(path.getName)
+            batchId < minCompactionBatchId
+          } catch {
+            case _: NumberFormatException =>
+              false
+          }
+        }
+      }).foreach { f =>
+        if (f.getModificationTime <= expiredTime) {
+          fileManager.delete(f.getPath)
         }
-      }
-    }).foreach { f =>
-      if (f.getModificationTime <= expiredTime) {
-        fileManager.delete(f.getPath)
       }
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index b52810da88c3e..48eee42a29011 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -58,6 +58,9 @@ class StreamExecution(
 
   private val pollingDelayMs = sparkSession.sessionState.conf.streamingPollingDelay
 
+  private val minBatchesToRetain = sparkSession.sessionState.conf.minBatchesToRetain
+  require(minBatchesToRetain > 0, "minBatchesToRetain has to be positive")
+
   /**
    * A lock used to wait/notify when batches complete. Use a fair lock to avoid thread starvation.
    */
@@ -400,10 +403,11 @@ class StreamExecution(
           }
         }
 
-        // Now that we have logged the new batch, no further processing will happen for
-        // the batch before the previous batch, and it is safe to discard the old metadata.
+        // It is now safe to discard the metadata beyond the minimum number to retain.
         // Note that purge is exclusive, i.e. it purges everything before the target ID.
-        offsetLog.purge(currentBatchId - 1)
+        if (minBatchesToRetain < currentBatchId) {
+          offsetLog.purge(currentBatchId - minBatchesToRetain)
+        }
       }
     } else {
       awaitBatchLock.lock()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
index 493fdaaec5069..4f3f8181d1f4e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
@@ -303,7 +303,6 @@ private[state] class HDFSBackedStateStoreProvider(
       val mapFromFile = readSnapshotFile(version).getOrElse {
         val prevMap = loadMap(version - 1)
         val newMap = new MapType(prevMap)
-        newMap.putAll(prevMap)
         updateFromDeltaFile(version, newMap)
         newMap
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala
index de72f1cf2723d..acfaa8e5eb3c4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala
@@ -26,9 +26,11 @@ private[streaming] class StateStoreConf(@transient private val conf: SQLConf) ex
 
   val minDeltasForSnapshot = conf.stateStoreMinDeltasForSnapshot
 
-  val minVersionsToRetain = conf.stateStoreMinVersionsToRetain
+  val minVersionsToRetain = conf.minBatchesToRetain
 }
 
 private[streaming] object StateStoreConf {
   val empty = new StateStoreConf()
+
+  def apply(conf: SQLConf): StateStoreConf = new StateStoreConf(conf)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 24c3d0b5507b7..5454be4c01f19 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -472,18 +472,17 @@ object SQLConf {
       .intConf
       .createWithDefault(10)
 
-  val STATE_STORE_MIN_VERSIONS_TO_RETAIN =
-    SQLConfigBuilder("spark.sql.streaming.stateStore.minBatchesToRetain")
-      .internal()
-      .doc("Minimum number of versions of a state store's data to retain after cleaning.")
-      .intConf
-      .createWithDefault(2)
-
   val CHECKPOINT_LOCATION = SQLConfigBuilder("spark.sql.streaming.checkpointLocation")
     .doc("The default location for storing checkpoint data for streaming queries.")
     .stringConf
     .createOptional
 
+  val MIN_BATCHES_TO_RETAIN = SQLConfigBuilder("spark.sql.streaming.minBatchesToRetain")
+    .internal()
+    .doc("The minimum number of batches that must be retained and made recoverable.")
+    .intConf
+    .createWithDefault(100)
+
   val UNSUPPORTED_OPERATION_CHECK_ENABLED =
     SQLConfigBuilder("spark.sql.streaming.unsupportedOperationCheck")
       .internal()
@@ -642,8 +641,6 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging {
 
   def stateStoreMinDeltasForSnapshot: Int = getConf(STATE_STORE_MIN_DELTAS_FOR_SNAPSHOT)
 
-  def stateStoreMinVersionsToRetain: Int = getConf(STATE_STORE_MIN_VERSIONS_TO_RETAIN)
-
   def checkpointLocation: Option[String] = getConf(CHECKPOINT_LOCATION)
 
   def isUnsupportedOperationCheckEnabled: Boolean = getConf(UNSUPPORTED_OPERATION_CHECK_ENABLED)
@@ -697,6 +694,8 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging {
   def minNumPostShufflePartitions: Int =
     getConf(SHUFFLE_MIN_NUM_POSTSHUFFLE_PARTITIONS)
 
+  def minBatchesToRetain: Int = getConf(MIN_BATCHES_TO_RETAIN)
+
   def parquetFilterPushDown: Boolean = getConf(PARQUET_FILTER_PUSHDOWN_ENABLED)
 
   def orcFilterPushDown: Boolean = getConf(ORC_FILTER_PUSHDOWN_ENABLED)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala
index e511fda57912c..435d874d75b92 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala
@@ -104,6 +104,7 @@ class CompactibleFileStreamLogSuite extends SparkFunSuite with SharedSQLContext
     withFakeCompactibleFileStreamLog(
       fileCleanupDelayMs = Long.MaxValue,
       defaultCompactInterval = 3,
+      defaultMinBatchesToRetain = 1,
       compactibleLog => {
         assert("0" === compactibleLog.batchIdToPath(0).getName)
         assert("1" === compactibleLog.batchIdToPath(1).getName)
@@ -118,6 +119,7 @@ class CompactibleFileStreamLogSuite extends SparkFunSuite with SharedSQLContext
     withFakeCompactibleFileStreamLog(
       fileCleanupDelayMs = Long.MaxValue,
       defaultCompactInterval = 3,
+      defaultMinBatchesToRetain = 1,
       compactibleLog => {
         val logs = Array("entry_1", "entry_2", "entry_3")
         val expected = s"""${FakeCompactibleFileStreamLog.VERSION}
@@ -138,6 +140,7 @@ class CompactibleFileStreamLogSuite extends SparkFunSuite with SharedSQLContext
     withFakeCompactibleFileStreamLog(
       fileCleanupDelayMs = Long.MaxValue,
       defaultCompactInterval = 3,
+      defaultMinBatchesToRetain = 1,
       compactibleLog => {
         val logs = s"""${FakeCompactibleFileStreamLog.VERSION}
             |"entry_1"
@@ -157,6 +160,7 @@ class CompactibleFileStreamLogSuite extends SparkFunSuite with SharedSQLContext
     withFakeCompactibleFileStreamLog(
       fileCleanupDelayMs = Long.MaxValue,
       defaultCompactInterval = 3,
+      defaultMinBatchesToRetain = 1,
       compactibleLog => {
         for (batchId <- 0 to 10) {
           compactibleLog.add(batchId, Array("some_path_" + batchId))
@@ -175,6 +179,7 @@ class CompactibleFileStreamLogSuite extends SparkFunSuite with SharedSQLContext
     withFakeCompactibleFileStreamLog(
       fileCleanupDelayMs = 0,
       defaultCompactInterval = 3,
+      defaultMinBatchesToRetain = 1,
       compactibleLog => {
         val fs = compactibleLog.metadataPath.getFileSystem(spark.sessionState.newHadoopConf())
 
@@ -194,25 +199,29 @@ class CompactibleFileStreamLogSuite extends SparkFunSuite with SharedSQLContext
         compactibleLog.add(1, Array("some_path_1"))
         assert(Set("0", "1") === listBatchFiles())
         compactibleLog.add(2, Array("some_path_2"))
-        assert(Set("2.compact") === listBatchFiles())
+        assert(Set("0", "1", "2.compact") === listBatchFiles())
         compactibleLog.add(3, Array("some_path_3"))
         assert(Set("2.compact", "3") === listBatchFiles())
         compactibleLog.add(4, Array("some_path_4"))
         assert(Set("2.compact", "3", "4") === listBatchFiles())
         compactibleLog.add(5, Array("some_path_5"))
-        assert(Set("5.compact") === listBatchFiles())
+        assert(Set("2.compact", "3", "4", "5.compact") === listBatchFiles())
+        compactibleLog.add(6, Array("some_path_6"))
+        assert(Set("5.compact", "6") === listBatchFiles())
       })
   }
 
   private def withFakeCompactibleFileStreamLog(
     fileCleanupDelayMs: Long,
     defaultCompactInterval: Int,
+    defaultMinBatchesToRetain: Int,
     f: FakeCompactibleFileStreamLog => Unit
   ): Unit = {
     withTempDir { file =>
       val compactibleLog = new FakeCompactibleFileStreamLog(
         fileCleanupDelayMs,
         defaultCompactInterval,
+        defaultMinBatchesToRetain,
         spark,
         file.getCanonicalPath)
       f(compactibleLog)
@@ -227,6 +236,7 @@ object FakeCompactibleFileStreamLog {
 class FakeCompactibleFileStreamLog(
     _fileCleanupDelayMs: Long,
     _defaultCompactInterval: Int,
+    _defaultMinBatchesToRetain: Int,
     sparkSession: SparkSession,
     path: String)
   extends CompactibleFileStreamLog[String](
@@ -241,5 +251,7 @@ class FakeCompactibleFileStreamLog(
 
   override protected def defaultCompactInterval: Int = _defaultCompactInterval
 
+  override protected val minBatchesToRetain: Int = _defaultMinBatchesToRetain
+
   override def compactLogs(logs: Seq[String]): Seq[String] = logs
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala
index 8a21b76e8f029..7e0de5e2657be 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala
@@ -151,10 +151,11 @@ class FileStreamSinkLogSuite extends SparkFunSuite with SharedSQLContext {
 
   testWithUninterruptibleThread("delete expired file") {
     // Set FILE_SINK_LOG_CLEANUP_DELAY to 0 so that we can detect the deleting behaviour
-    // deterministically
+    // deterministically and one min batches to retain
     withSQLConf(
       SQLConf.FILE_SINK_LOG_COMPACT_INTERVAL.key -> "3",
-      SQLConf.FILE_SINK_LOG_CLEANUP_DELAY.key -> "0") {
+      SQLConf.FILE_SINK_LOG_CLEANUP_DELAY.key -> "0",
+      SQLConf.MIN_BATCHES_TO_RETAIN.key -> "1") {
       withFileStreamSinkLog { sinkLog =>
         val fs = sinkLog.metadataPath.getFileSystem(spark.sessionState.newHadoopConf())
 
@@ -174,13 +175,52 @@ class FileStreamSinkLogSuite extends SparkFunSuite with SharedSQLContext {
         sinkLog.add(1, Array(newFakeSinkFileStatus("/a/b/1", FileStreamSinkLog.ADD_ACTION)))
         assert(Set("0", "1") === listBatchFiles())
         sinkLog.add(2, Array(newFakeSinkFileStatus("/a/b/2", FileStreamSinkLog.ADD_ACTION)))
-        assert(Set("2.compact") === listBatchFiles())
+        assert(Set("0", "1", "2.compact") === listBatchFiles())
         sinkLog.add(3, Array(newFakeSinkFileStatus("/a/b/3", FileStreamSinkLog.ADD_ACTION)))
         assert(Set("2.compact", "3") === listBatchFiles())
         sinkLog.add(4, Array(newFakeSinkFileStatus("/a/b/4", FileStreamSinkLog.ADD_ACTION)))
         assert(Set("2.compact", "3", "4") === listBatchFiles())
         sinkLog.add(5, Array(newFakeSinkFileStatus("/a/b/5", FileStreamSinkLog.ADD_ACTION)))
-        assert(Set("5.compact") === listBatchFiles())
+        assert(Set("2.compact", "3", "4", "5.compact") === listBatchFiles())
+        sinkLog.add(6, Array(newFakeSinkFileStatus("/a/b/6", FileStreamSinkLog.ADD_ACTION)))
+        assert(Set("5.compact", "6") === listBatchFiles())
+      }
+    }
+
+    withSQLConf(
+      SQLConf.FILE_SINK_LOG_COMPACT_INTERVAL.key -> "3",
+      SQLConf.FILE_SINK_LOG_CLEANUP_DELAY.key -> "0",
+      SQLConf.MIN_BATCHES_TO_RETAIN.key -> "2") {
+      withFileStreamSinkLog { sinkLog =>
+        val fs = sinkLog.metadataPath.getFileSystem(spark.sessionState.newHadoopConf())
+
+        def listBatchFiles(): Set[String] = {
+          fs.listStatus(sinkLog.metadataPath).map(_.getPath.getName).filter { fileName =>
+            try {
+              getBatchIdFromFileName(fileName)
+              true
+            } catch {
+              case _: NumberFormatException => false
+            }
+          }.toSet
+        }
+
+        sinkLog.add(0, Array(newFakeSinkFileStatus("/a/b/0", FileStreamSinkLog.ADD_ACTION)))
+        assert(Set("0") === listBatchFiles())
+        sinkLog.add(1, Array(newFakeSinkFileStatus("/a/b/1", FileStreamSinkLog.ADD_ACTION)))
+        assert(Set("0", "1") === listBatchFiles())
+        sinkLog.add(2, Array(newFakeSinkFileStatus("/a/b/2", FileStreamSinkLog.ADD_ACTION)))
+        assert(Set("0", "1", "2.compact") === listBatchFiles())
+        sinkLog.add(3, Array(newFakeSinkFileStatus("/a/b/3", FileStreamSinkLog.ADD_ACTION)))
+        assert(Set("0", "1", "2.compact", "3") === listBatchFiles())
+        sinkLog.add(4, Array(newFakeSinkFileStatus("/a/b/4", FileStreamSinkLog.ADD_ACTION)))
+        assert(Set("2.compact", "3", "4") === listBatchFiles())
+        sinkLog.add(5, Array(newFakeSinkFileStatus("/a/b/5", FileStreamSinkLog.ADD_ACTION)))
+        assert(Set("2.compact", "3", "4", "5.compact") === listBatchFiles())
+        sinkLog.add(6, Array(newFakeSinkFileStatus("/a/b/6", FileStreamSinkLog.ADD_ACTION)))
+        assert(Set("2.compact", "3", "4", "5.compact", "6") === listBatchFiles())
+        sinkLog.add(7, Array(newFakeSinkFileStatus("/a/b/7", FileStreamSinkLog.ADD_ACTION)))
+        assert(Set("5.compact", "6", "7") === listBatchFiles())
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
index 05fc7345a7daf..3404b1143bc62 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
@@ -376,7 +376,9 @@ class StateStoreSuite extends SparkFunSuite with BeforeAndAfter with PrivateMeth
     val opId = 0
     val dir = Utils.createDirectory(tempDir, Random.nextString(5)).toString
     val storeId = StateStoreId(dir, opId, 0)
-    val storeConf = StateStoreConf.empty
+    val sqlConf = new SQLConf()
+    sqlConf.setConf(SQLConf.MIN_BATCHES_TO_RETAIN, 2)
+    val storeConf = StateStoreConf(sqlConf)
     val hadoopConf = new Configuration()
     val provider = new HDFSBackedStateStoreProvider(
       storeId, keySchema, valueSchema, storeConf, hadoopConf)
@@ -606,6 +608,7 @@ class StateStoreSuite extends SparkFunSuite with BeforeAndAfter with PrivateMeth
     ): HDFSBackedStateStoreProvider = {
     val sqlConf = new SQLConf()
     sqlConf.setConf(SQLConf.STATE_STORE_MIN_DELTAS_FOR_SNAPSHOT, minDeltasForSnapshot)
+    sqlConf.setConf(SQLConf.MIN_BATCHES_TO_RETAIN, 2)
     new HDFSBackedStateStoreProvider(
       StateStoreId(dir, opId, partition),
       keySchema,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index 7be2f216919b0..c66d6b1f8d8e6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -29,6 +29,7 @@ import org.apache.spark.sql.types.StructType
 import org.apache.spark.SparkException
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.util.ManualClock
 
 
@@ -369,25 +370,52 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
   testQuietly("StreamExecution metadata garbage collection") {
     val inputData = MemoryStream[Int]
     val mapped = inputData.toDS().map(6 / _)
+    withSQLConf(SQLConf.MIN_BATCHES_TO_RETAIN.key -> "1") {
+      // Run 3 batches, and then assert that only 2 metadata files is are at the end
+      // since the first should have been purged.
+      testStream(mapped)(
+        AddData(inputData, 1, 2),
+        CheckAnswer(6, 3),
+        AddData(inputData, 1, 2),
+        CheckAnswer(6, 3, 6, 3),
+        AddData(inputData, 4, 6),
+        CheckAnswer(6, 3, 6, 3, 1, 1),
+
+        AssertOnQuery("metadata log should contain only two files") { q =>
+          val metadataLogDir = new java.io.File(q.offsetLog.metadataPath.toString)
+          val logFileNames = metadataLogDir.listFiles().toSeq.map(_.getName())
+          val toTest = logFileNames.filter(!_.endsWith(".crc")).sorted // Workaround for SPARK-17475
+          assert(toTest.size == 2 && toTest.head == "1")
+          true
+        }
+      )
+    }
 
-    // Run 3 batches, and then assert that only 2 metadata files is are at the end
-    // since the first should have been purged.
-    testStream(mapped)(
-      AddData(inputData, 1, 2),
-      CheckAnswer(6, 3),
-      AddData(inputData, 1, 2),
-      CheckAnswer(6, 3, 6, 3),
-      AddData(inputData, 4, 6),
-      CheckAnswer(6, 3, 6, 3, 1, 1),
-
-      AssertOnQuery("metadata log should contain only two files") { q =>
-        val metadataLogDir = new java.io.File(q.offsetLog.metadataPath.toString)
-        val logFileNames = metadataLogDir.listFiles().toSeq.map(_.getName())
-        val toTest = logFileNames.filter(! _.endsWith(".crc")).sorted  // Workaround for SPARK-17475
-        assert(toTest.size == 2 && toTest.head == "1")
-        true
-      }
-    )
+    val inputData2 = MemoryStream[Int]
+    withSQLConf(SQLConf.MIN_BATCHES_TO_RETAIN.key -> "2") {
+      // Run 5 batches, and then assert that 3 metadata files is are at the end
+      // since the two should have been purged.
+      testStream(inputData2.toDS())(
+        AddData(inputData2, 1, 2),
+        CheckAnswer(1, 2),
+        AddData(inputData2, 1, 2),
+        CheckAnswer(1, 2, 1, 2),
+        AddData(inputData2, 3, 4),
+        CheckAnswer(1, 2, 1, 2, 3, 4),
+        AddData(inputData2, 5, 6),
+        CheckAnswer(1, 2, 1, 2, 3, 4, 5, 6),
+        AddData(inputData2, 7, 8),
+        CheckAnswer(1, 2, 1, 2, 3, 4, 5, 6, 7, 8),
+
+        AssertOnQuery("metadata log should contain three files") { q =>
+          val metadataLogDir = new java.io.File(q.offsetLog.metadataPath.toString)
+          val logFileNames = metadataLogDir.listFiles().toSeq.map(_.getName())
+          val toTest = logFileNames.filter(!_.endsWith(".crc")).sorted // Workaround for SPARK-17475
+          assert(toTest.size == 3 && toTest.head == "2")
+          true
+        }
+      )
+    }
   }
 
   /** Create a streaming DF that only execute one batch in which it returns the given static DF */

From 35011608f492ddcb19144954ba96c45ca6f87784 Mon Sep 17 00:00:00 2001
From: Bill Chambers <bill@databricks.com>
Date: Mon, 12 Dec 2016 13:33:17 +0000
Subject: [PATCH 0340/1204] [DOCS][MINOR] Clarify Where AccumulatorV2s are
 Displayed

## What changes were proposed in this pull request?

This PR clarifies where accumulators will be displayed.

## How was this patch tested?

No testing.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Bill Chambers <bill@databricks.com>
Author: anabranch <wac.chambers@gmail.com>
Author: Bill Chambers <wchambers@ischool.berkeley.edu>

Closes #16180 from anabranch/improve-acc-docs.

(cherry picked from commit 70ffff21f769b149bee787fe5901d9844a4d97b8)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/programming-guide.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/docs/programming-guide.md b/docs/programming-guide.md
index 4267b8cae8110..353730c28f3c7 100644
--- a/docs/programming-guide.md
+++ b/docs/programming-guide.md
@@ -1345,14 +1345,15 @@ therefore be efficiently supported in parallel. They can be used to implement co
 MapReduce) or sums. Spark natively supports accumulators of numeric types, and programmers
 can add support for new types.
 
-If accumulators are created with a name, they will be
-displayed in Spark's UI. This can be useful for understanding the progress of
-running stages (NOTE: this is not yet supported in Python).
+As a user, you can create named or unnamed accumulators. As seen in the image below, a named accumulator (in this instance `counter`) will display in the web UI for the stage that modifies that accumulator. Spark displays the value for each accumulator modified by a task in the "Tasks" table.
 
 <p style="text-align: center;">
   <img src="img/spark-webui-accumulators.png" title="Accumulators in the Spark UI" alt="Accumulators in the Spark UI" />
 </p>
 
+Tracking accumulators in the UI can be useful for understanding the progress of 
+running stages (NOTE: this is not yet supported in Python).
+
 <div class="codetabs">
 
 <div data-lang="scala"  markdown="1">

From 523071f3fae72909b64c7f405868bbc85f5c3cde Mon Sep 17 00:00:00 2001
From: Yuming Wang <wgyumg@gmail.com>
Date: Mon, 12 Dec 2016 23:38:36 +0100
Subject: [PATCH 0341/1204] [SPARK-18681][SQL] Fix filtering to compatible with
 partition keys of type int

## What changes were proposed in this pull request?

Cloudera put `/var/run/cloudera-scm-agent/process/15000-hive-HIVEMETASTORE/hive-site.xml` as the configuration file for the Hive Metastore Server, where `hive.metastore.try.direct.sql=false`. But Spark isn't reading this configuration file and get default value `hive.metastore.try.direct.sql=true`. As mallman said, we should use `getMetaConf` method to obtain the original configuration from Hive Metastore Server. I have tested this method few times and the return value is always consistent with Hive Metastore Server.

## How was this patch tested?

The existing tests.

Author: Yuming Wang <wgyumg@gmail.com>

Closes #16122 from wangyum/SPARK-18681.

(cherry picked from commit 90abfd15f4b3f612a7b0ff65f03bf319c78a0243)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../scala/org/apache/spark/sql/hive/client/HiveShim.scala  | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index e561706facf03..87f58e5f1aa37 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -590,8 +590,11 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
       } else {
         logDebug(s"Hive metastore filter is '$filter'.")
         val tryDirectSqlConfVar = HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL
-        val tryDirectSql =
-          hive.getConf.getBoolean(tryDirectSqlConfVar.varname, tryDirectSqlConfVar.defaultBoolVal)
+        // We should get this config value from the metaStore. otherwise hit SPARK-18681.
+        // To be compatible with hive-0.12 and hive-0.13, In the future we can achieve this by:
+        // val tryDirectSql = hive.getMetaConf(tryDirectSqlConfVar.varname).toBoolean
+        val tryDirectSql = hive.getMSC.getConfigValue(tryDirectSqlConfVar.varname,
+          tryDirectSqlConfVar.defaultBoolVal.toString).toBoolean
         try {
           // Hive may throw an exception when calling this method in some circumstances, such as
           // when filtering on a non-string partition column when the hive config key

From 1aeb7f427d31bfd44f7abb7c56dd7661be8bbaa6 Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Mon, 12 Dec 2016 14:40:41 -0800
Subject: [PATCH 0342/1204] [SPARK-18810][SPARKR] SparkR install.spark does not
 work for RCs, snapshots

## What changes were proposed in this pull request?

Support overriding the download url (include version directory) in an environment variable, `SPARKR_RELEASE_DOWNLOAD_URL`

## How was this patch tested?

unit test, manually testing
- snapshot build url
  - download when spark jar not cached
  - when spark jar is cached
- RC build url
  - download when spark jar not cached
  - when spark jar is cached
- multiple cached spark versions
- starting with sparkR shell

To use this,
```
SPARKR_RELEASE_DOWNLOAD_URL=http://this_is_the_url_to_spark_release_tgz R
```
then in R,
```
library(SparkR) # or specify lib.loc
sparkR.session()
```

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #16248 from felixcheung/rinstallurl.

(cherry picked from commit 8a51cfdcad5f8397558ed2e245eb03650f37ce66)
Signed-off-by: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
---
 R/pkg/R/install.R                      | 38 ++++++++++++++++++--------
 R/pkg/R/utils.R                        | 14 +++++++++-
 R/pkg/inst/tests/testthat/test_utils.R | 11 ++++++++
 3 files changed, 51 insertions(+), 12 deletions(-)

diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R
index 69b0a523b84e4..097b7ad4bea08 100644
--- a/R/pkg/R/install.R
+++ b/R/pkg/R/install.R
@@ -79,19 +79,28 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL,
     dir.create(localDir, recursive = TRUE)
   }
 
-  packageLocalDir <- file.path(localDir, packageName)
-
   if (overwrite) {
     message(paste0("Overwrite = TRUE: download and overwrite the tar file",
                    "and Spark package directory if they exist."))
   }
 
+  releaseUrl <- Sys.getenv("SPARKR_RELEASE_DOWNLOAD_URL")
+  if (releaseUrl != "") {
+    packageName <- basenameSansExtFromUrl(releaseUrl)
+  }
+
+  packageLocalDir <- file.path(localDir, packageName)
+
   # can use dir.exists(packageLocalDir) under R 3.2.0 or later
   if (!is.na(file.info(packageLocalDir)$isdir) && !overwrite) {
-    fmt <- "%s for Hadoop %s found, with SPARK_HOME set to %s"
-    msg <- sprintf(fmt, version, ifelse(hadoopVersion == "without", "Free build", hadoopVersion),
-                   packageLocalDir)
-    message(msg)
+    if (releaseUrl != "") {
+      message(paste(packageName, "found, setting SPARK_HOME to", packageLocalDir))
+    } else {
+      fmt <- "%s for Hadoop %s found, setting SPARK_HOME to %s"
+      msg <- sprintf(fmt, version, ifelse(hadoopVersion == "without", "Free build", hadoopVersion),
+                     packageLocalDir)
+      message(msg)
+    }
     Sys.setenv(SPARK_HOME = packageLocalDir)
     return(invisible(packageLocalDir))
   } else {
@@ -104,7 +113,12 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL,
   if (tarExists && !overwrite) {
     message("tar file found.")
   } else {
-    robustDownloadTar(mirrorUrl, version, hadoopVersion, packageName, packageLocalPath)
+    if (releaseUrl != "") {
+      message("Downloading from alternate URL:\n- ", releaseUrl)
+      downloadUrl(releaseUrl, packageLocalPath, paste0("Fetch failed from ", releaseUrl))
+    } else {
+      robustDownloadTar(mirrorUrl, version, hadoopVersion, packageName, packageLocalPath)
+    }
   }
 
   message(sprintf("Installing to %s", localDir))
@@ -182,16 +196,18 @@ getPreferredMirror <- function(version, packageName) {
 }
 
 directDownloadTar <- function(mirrorUrl, version, hadoopVersion, packageName, packageLocalPath) {
-  packageRemotePath <- paste0(
-    file.path(mirrorUrl, version, packageName), ".tgz")
+  packageRemotePath <- paste0(file.path(mirrorUrl, version, packageName), ".tgz")
   fmt <- "Downloading %s for Hadoop %s from:\n- %s"
   msg <- sprintf(fmt, version, ifelse(hadoopVersion == "without", "Free build", hadoopVersion),
                  packageRemotePath)
   message(msg)
+  downloadUrl(packageRemotePath, packageLocalPath, paste0("Fetch failed from ", mirrorUrl))
+}
 
-  isFail <- tryCatch(download.file(packageRemotePath, packageLocalPath),
+downloadUrl <- function(remotePath, localPath, errorMessage) {
+  isFail <- tryCatch(download.file(remotePath, localPath),
                      error = function(e) {
-                       message(sprintf("Fetch failed from %s", mirrorUrl))
+                       message(errorMessage)
                        print(e)
                        TRUE
                      })
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index 098c0e3e31e95..1283449f3592a 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -841,7 +841,7 @@ captureJVMException <- function(e, method) {
 #
 # @param inputData a list of rows, with each row a list
 # @return data.frame with raw columns as lists
-rbindRaws <- function(inputData){
+rbindRaws <- function(inputData) {
   row1 <- inputData[[1]]
   rawcolumns <- ("raw" == sapply(row1, class))
 
@@ -851,3 +851,15 @@ rbindRaws <- function(inputData){
   out[!rawcolumns] <- lapply(out[!rawcolumns], unlist)
   out
 }
+
+# Get basename without extension from URL
+basenameSansExtFromUrl <- function(url) {
+  # split by '/'
+  splits <- unlist(strsplit(url, "^.+/"))
+  last <- tail(splits, 1)
+  # this is from file_path_sans_ext
+  # first, remove any compression extension
+  filename <- sub("[.](gz|bz2|xz)$", "", last)
+  # then, strip extension by the last '.'
+  sub("([^.]+)\\.[[:alnum:]]+$", "\\1", filename)
+}
diff --git a/R/pkg/inst/tests/testthat/test_utils.R b/R/pkg/inst/tests/testthat/test_utils.R
index 607c407f04f97..c87524842876e 100644
--- a/R/pkg/inst/tests/testthat/test_utils.R
+++ b/R/pkg/inst/tests/testthat/test_utils.R
@@ -228,4 +228,15 @@ test_that("varargsToStrEnv", {
   expect_warning(varargsToStrEnv(1, 2, 3, 4), "Unnamed arguments ignored: 1, 2, 3, 4.")
 })
 
+test_that("basenameSansExtFromUrl", {
+  x <- paste0("http://people.apache.org/~pwendell/spark-nightly/spark-branch-2.1-bin/spark-2.1.1-",
+              "SNAPSHOT-2016_12_09_11_08-eb2d9bf-bin/spark-2.1.1-SNAPSHOT-bin-hadoop2.7.tgz")
+  y <- paste0("http://people.apache.org/~pwendell/spark-releases/spark-2.1.0-rc2-bin/spark-2.1.0-",
+              "bin-hadoop2.4-without-hive.tgz")
+  expect_equal(basenameSansExtFromUrl(x), "spark-2.1.1-SNAPSHOT-bin-hadoop2.7")
+  expect_equal(basenameSansExtFromUrl(y), "spark-2.1.0-bin-hadoop2.4-without-hive")
+  z <- "http://people.apache.org/~pwendell/spark-releases/spark-2.1.0--hive.tar.gz"
+  expect_equal(basenameSansExtFromUrl(z), "spark-2.1.0--hive")
+})
+
 sparkR.session.stop()

From 9dc5fa5f77d910e44746c5866cb77565c4b761d9 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Mon, 12 Dec 2016 22:31:22 -0800
Subject: [PATCH 0343/1204] [SPARK-18796][SS] StreamingQueryManager should not
 block when starting a query

## What changes were proposed in this pull request?

Major change in this PR:
- Add `pendingQueryNames` and `pendingQueryIds` to track that are going to start but not yet put into `activeQueries` so that we don't need to hold a lock when starting a query.

Minor changes:
- Fix a potential NPE when the user sets `checkpointLocation` using SQLConf but doesn't specify a query name.
- Add missing docs in `StreamingQueryListener`

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16220 from zsxwing/SPARK-18796.

(cherry picked from commit 417e45c58484a6b984ad2ce9ba8f47aa0a9983fd)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../execution/streaming/StreamExecution.scala |   5 +-
 .../streaming/StreamingQueryListener.scala    |   7 +-
 .../sql/streaming/StreamingQueryManager.scala | 148 +++++++++++-------
 .../test/DataStreamReaderWriterSuite.scala    |  56 +++++++
 4 files changed, 158 insertions(+), 58 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 48eee42a29011..9fe6819837bbc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -223,7 +223,8 @@ class StreamExecution(
         sparkSession.sparkContext.env.metricsSystem.registerSource(streamMetrics)
       }
 
-      postEvent(new QueryStartedEvent(id, runId, name)) // Assumption: Does not throw exception.
+      // `postEvent` does not throw non fatal exception.
+      postEvent(new QueryStartedEvent(id, runId, name))
 
       // Unblock starting thread
       startLatch.countDown()
@@ -286,7 +287,7 @@ class StreamExecution(
           e,
           committedOffsets.toOffsetSeq(sources, offsetSeqMetadata).toString,
           availableOffsets.toOffsetSeq(sources, offsetSeqMetadata).toString)
-        logError(s"Query $name terminated with error", e)
+        logError(s"Query $prettyIdString terminated with error", e)
         updateStatusMessage(s"Terminated with exception: ${e.getMessage}")
         // Rethrow the fatal errors to allow the user using `Thread.UncaughtExceptionHandler` to
         // handle them
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
index 6fc859d88d97e..817733286b03e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
@@ -83,6 +83,9 @@ object StreamingQueryListener {
   /**
    * :: Experimental ::
    * Event representing the start of a query
+   * @param id An unique query id that persists across restarts. See `StreamingQuery.id()`.
+   * @param runId A query id that is unique for every start/restart. See `StreamingQuery.runId()`.
+   * @param name User-specified name of the query, null if not specified.
    * @since 2.1.0
    */
   @Experimental
@@ -94,6 +97,7 @@ object StreamingQueryListener {
   /**
    * :: Experimental ::
    * Event representing any progress updates in a query.
+   * @param progress The query progress updates.
    * @since 2.1.0
    */
   @Experimental
@@ -103,7 +107,8 @@ object StreamingQueryListener {
    * :: Experimental ::
    * Event representing that termination of a query.
    *
-   * @param id The query id.
+   * @param id An unique query id that persists across restarts. See `StreamingQuery.id()`.
+   * @param runId A query id that is unique for every start/restart. See `StreamingQuery.runId()`.
    * @param exception The exception message of the query if the query was terminated
    *                  with an exception. Otherwise, it will be `None`.
    * @since 2.1.0
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index 52d079192dae4..6ebd70685effc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.streaming
 
 import java.util.UUID
-import java.util.concurrent.atomic.AtomicLong
+import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.mutable
 
@@ -44,10 +44,13 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) {
   private[sql] val stateStoreCoordinator =
     StateStoreCoordinatorRef.forDriver(sparkSession.sparkContext.env)
   private val listenerBus = new StreamingQueryListenerBus(sparkSession.sparkContext.listenerBus)
+
+  @GuardedBy("activeQueriesLock")
   private val activeQueries = new mutable.HashMap[UUID, StreamingQuery]
   private val activeQueriesLock = new Object
   private val awaitTerminationLock = new Object
 
+  @GuardedBy("awaitTerminationLock")
   private var lastTerminatedQuery: StreamingQuery = null
 
   /**
@@ -181,8 +184,65 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) {
     listenerBus.post(event)
   }
 
+  private def createQuery(
+      userSpecifiedName: Option[String],
+      userSpecifiedCheckpointLocation: Option[String],
+      df: DataFrame,
+      sink: Sink,
+      outputMode: OutputMode,
+      useTempCheckpointLocation: Boolean,
+      recoverFromCheckpointLocation: Boolean,
+      trigger: Trigger,
+      triggerClock: Clock): StreamExecution = {
+    val checkpointLocation = userSpecifiedCheckpointLocation.map { userSpecified =>
+      new Path(userSpecified).toUri.toString
+    }.orElse {
+      df.sparkSession.sessionState.conf.checkpointLocation.map { location =>
+        new Path(location, userSpecifiedName.getOrElse(UUID.randomUUID().toString)).toUri.toString
+      }
+    }.getOrElse {
+      if (useTempCheckpointLocation) {
+        Utils.createTempDir(namePrefix = s"temporary").getCanonicalPath
+      } else {
+        throw new AnalysisException(
+          "checkpointLocation must be specified either " +
+            """through option("checkpointLocation", ...) or """ +
+            s"""SparkSession.conf.set("${SQLConf.CHECKPOINT_LOCATION.key}", ...)""")
+      }
+    }
+
+    // If offsets have already been created, we trying to resume a query.
+    if (!recoverFromCheckpointLocation) {
+      val checkpointPath = new Path(checkpointLocation, "offsets")
+      val fs = checkpointPath.getFileSystem(df.sparkSession.sessionState.newHadoopConf())
+      if (fs.exists(checkpointPath)) {
+        throw new AnalysisException(
+          s"This query does not support recovering from checkpoint location. " +
+            s"Delete $checkpointPath to start over.")
+      }
+    }
+
+    val analyzedPlan = df.queryExecution.analyzed
+    df.queryExecution.assertAnalyzed()
+
+    if (sparkSession.sessionState.conf.isUnsupportedOperationCheckEnabled) {
+      UnsupportedOperationChecker.checkForStreaming(analyzedPlan, outputMode)
+    }
+
+    new StreamExecution(
+      sparkSession,
+      userSpecifiedName.orNull,
+      checkpointLocation,
+      analyzedPlan,
+      sink,
+      trigger,
+      triggerClock,
+      outputMode)
+  }
+
   /**
    * Start a [[StreamingQuery]].
+   *
    * @param userSpecifiedName Query name optionally specified by the user.
    * @param userSpecifiedCheckpointLocation  Checkpoint location optionally specified by the user.
    * @param df Streaming DataFrame.
@@ -206,72 +266,50 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) {
       recoverFromCheckpointLocation: Boolean = true,
       trigger: Trigger = ProcessingTime(0),
       triggerClock: Clock = new SystemClock()): StreamingQuery = {
-    activeQueriesLock.synchronized {
-      val name = userSpecifiedName match {
-        case Some(n) =>
-          if (activeQueries.values.exists(_.name == userSpecifiedName.get)) {
-            throw new IllegalArgumentException(
-              s"Cannot start query with name $n as a query with that name is already active")
-          }
-          n
-        case None => null
-      }
-      val checkpointLocation = userSpecifiedCheckpointLocation.map { userSpecified =>
-        new Path(userSpecified).toUri.toString
-      }.orElse {
-        df.sparkSession.sessionState.conf.checkpointLocation.map { location =>
-          new Path(location, name).toUri.toString
-        }
-      }.getOrElse {
-        if (useTempCheckpointLocation) {
-          Utils.createTempDir(namePrefix = s"temporary").getCanonicalPath
-        } else {
-          throw new AnalysisException(
-            "checkpointLocation must be specified either " +
-              """through option("checkpointLocation", ...) or """ +
-              s"""SparkSession.conf.set("${SQLConf.CHECKPOINT_LOCATION.key}", ...)""")
-        }
-      }
+    val query = createQuery(
+      userSpecifiedName,
+      userSpecifiedCheckpointLocation,
+      df,
+      sink,
+      outputMode,
+      useTempCheckpointLocation,
+      recoverFromCheckpointLocation,
+      trigger,
+      triggerClock)
 
-      // If offsets have already been created, we trying to resume a query.
-      if (!recoverFromCheckpointLocation) {
-        val checkpointPath = new Path(checkpointLocation, "offsets")
-        val fs = checkpointPath.getFileSystem(df.sparkSession.sessionState.newHadoopConf())
-        if (fs.exists(checkpointPath)) {
-          throw new AnalysisException(
-            s"This query does not support recovering from checkpoint location. " +
-              s"Delete $checkpointPath to start over.")
+    activeQueriesLock.synchronized {
+      // Make sure no other query with same name is active
+      userSpecifiedName.foreach { name =>
+        if (activeQueries.values.exists(_.name == name)) {
+          throw new IllegalArgumentException(
+            s"Cannot start query with name $name as a query with that name is already active")
         }
       }
 
-      val analyzedPlan = df.queryExecution.analyzed
-      df.queryExecution.assertAnalyzed()
-
-      if (sparkSession.sessionState.conf.isUnsupportedOperationCheckEnabled) {
-        UnsupportedOperationChecker.checkForStreaming(analyzedPlan, outputMode)
-      }
-
-      val query = new StreamExecution(
-        sparkSession,
-        name,
-        checkpointLocation,
-        analyzedPlan,
-        sink,
-        trigger,
-        triggerClock,
-        outputMode)
-
+      // Make sure no other query with same id is active
       if (activeQueries.values.exists(_.id == query.id)) {
         throw new IllegalStateException(
           s"Cannot start query with id ${query.id} as another query with same id is " +
-            s"already active. Perhaps you are attempting to restart a query from checkpoint" +
+            s"already active. Perhaps you are attempting to restart a query from checkpoint " +
             s"that is already active.")
       }
 
-      query.start()
       activeQueries.put(query.id, query)
-      query
     }
+    try {
+      // When starting a query, it will call `StreamingQueryListener.onQueryStarted` synchronously.
+      // As it's provided by the user and can run arbitrary codes, we must not hold any lock here.
+      // Otherwise, it's easy to cause dead-lock, or block too long if the user codes take a long
+      // time to finish.
+      query.start()
+    } catch {
+      case e: Throwable =>
+        activeQueriesLock.synchronized {
+          activeQueries -= query.id
+        }
+        throw e
+    }
+    query
   }
 
   /** Notify (by the StreamingQuery) that the query has been terminated */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
index 0eb95a02432fb..f4a62903ebeb1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
@@ -27,6 +27,7 @@ import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.sql._
 import org.apache.spark.sql.execution.streaming._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.{StreamSinkProvider, StreamSourceProvider}
 import org.apache.spark.sql.streaming.{OutputMode, ProcessingTime, StreamingQuery, StreamTest}
 import org.apache.spark.sql.types._
@@ -575,4 +576,59 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
       sq.stop()
     }
   }
+
+  test("user specified checkpointLocation precedes SQLConf") {
+    import testImplicits._
+    withTempDir { checkpointPath =>
+      withTempPath { userCheckpointPath =>
+        assert(!userCheckpointPath.exists(), s"$userCheckpointPath should not exist")
+        withSQLConf(SQLConf.CHECKPOINT_LOCATION.key -> checkpointPath.getAbsolutePath) {
+          val queryName = "test_query"
+          val ds = MemoryStream[Int].toDS
+          ds.writeStream
+            .format("memory")
+            .queryName(queryName)
+            .option("checkpointLocation", userCheckpointPath.getAbsolutePath)
+            .start()
+            .stop()
+          assert(checkpointPath.listFiles().isEmpty,
+            "SQLConf path is used even if user specified checkpointLoc: " +
+              s"${checkpointPath.listFiles()} is not empty")
+          assert(userCheckpointPath.exists(),
+            s"The user specified checkpointLoc (userCheckpointPath) is not created")
+        }
+      }
+    }
+  }
+
+  test("use SQLConf checkpoint dir when checkpointLocation is not specified") {
+    import testImplicits._
+    withTempDir { checkpointPath =>
+      withSQLConf(SQLConf.CHECKPOINT_LOCATION.key -> checkpointPath.getAbsolutePath) {
+        val queryName = "test_query"
+        val ds = MemoryStream[Int].toDS
+        ds.writeStream.format("memory").queryName(queryName).start().stop()
+        // Should use query name to create a folder in `checkpointPath`
+        val queryCheckpointDir = new File(checkpointPath, queryName)
+        assert(queryCheckpointDir.exists(), s"$queryCheckpointDir doesn't exist")
+        assert(
+          checkpointPath.listFiles().size === 1,
+          s"${checkpointPath.listFiles().toList} has 0 or more than 1 files ")
+      }
+    }
+  }
+
+  test("use SQLConf checkpoint dir when checkpointLocation is not specified without query name") {
+    import testImplicits._
+    withTempDir { checkpointPath =>
+      withSQLConf(SQLConf.CHECKPOINT_LOCATION.key -> checkpointPath.getAbsolutePath) {
+        val ds = MemoryStream[Int].toDS
+        ds.writeStream.format("console").start().stop()
+        // Should create a random folder in `checkpointPath`
+        assert(
+          checkpointPath.listFiles().size === 1,
+          s"${checkpointPath.listFiles().toList} has 0 or more than 1 files ")
+      }
+    }
+  }
 }

From 9f0e3be622c77f7a677ce2c930b6dba2f652df00 Mon Sep 17 00:00:00 2001
From: "wm624@hotmail.com" <wm624@hotmail.com>
Date: Mon, 12 Dec 2016 22:41:11 -0800
Subject: [PATCH 0344/1204] [SPARK-18797][SPARKR] Update spark.logit in
 sparkr-vignettes

## What changes were proposed in this pull request?
spark.logit is added in 2.1. We need to update spark-vignettes to reflect the changes. This is part of SparkR QA work.

## How was this patch tested?

Manual build html. Please see attached image for the result.
![test](https://cloud.githubusercontent.com/assets/5033592/21032237/01b565fe-bd5d-11e6-8b59-4de4b6ef611d.jpeg)

Author: wm624@hotmail.com <wm624@hotmail.com>

Closes #16222 from wangmiao1981/veg.

(cherry picked from commit 2aa16d03db79a642cbe21f387441c34fc51a8236)
Signed-off-by: Xiangrui Meng <meng@databricks.com>
---
 R/pkg/vignettes/sparkr-vignettes.Rmd | 45 +++++++++++++++++++++++-----
 1 file changed, 38 insertions(+), 7 deletions(-)

diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index a36f8fc0c1455..625b759626f36 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -565,7 +565,7 @@ head(aftPredictions)
 
 #### Gaussian Mixture Model
 
-(Coming in 2.1.0)
+(Added in 2.1.0)
 
 `spark.gaussianMixture` fits multivariate [Gaussian Mixture Model](https://en.wikipedia.org/wiki/Mixture_model#Multivariate_Gaussian_mixture_model) (GMM) against a `SparkDataFrame`. [Expectation-Maximization](https://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm) (EM) is used to approximate the maximum likelihood estimator (MLE) of the model.
 
@@ -584,7 +584,7 @@ head(select(gmmFitted, "V1", "V2", "prediction"))
 
 #### Latent Dirichlet Allocation
 
-(Coming in 2.1.0)
+(Added in 2.1.0)
 
 `spark.lda` fits a [Latent Dirichlet Allocation](https://en.wikipedia.org/wiki/Latent_Dirichlet_allocation) model on a `SparkDataFrame`. It is often used in topic modeling in which topics are inferred from a collection of text documents. LDA can be thought of as a clustering algorithm as follows:
 
@@ -657,7 +657,7 @@ perplexity
 
 #### Multilayer Perceptron
 
-(Coming in 2.1.0)
+(Added in 2.1.0)
 
 Multilayer perceptron classifier (MLPC) is a classifier based on the [feedforward artificial neural network](https://en.wikipedia.org/wiki/Feedforward_neural_network). MLPC consists of multiple layers of nodes. Each layer is fully connected to the next layer in the network. Nodes in the input layer represent the input data. All other nodes map inputs to outputs by a linear combination of the inputs with the node’s weights $w$ and bias $b$ and applying an activation function. This can be written in matrix form for MLPC with $K+1$ layers as follows:
 $$
@@ -694,7 +694,7 @@ MLPC employs backpropagation for learning the model. We use the logistic loss fu
 
 #### Collaborative Filtering
 
-(Coming in 2.1.0)
+(Added in 2.1.0)
 
 `spark.als` learns latent factors in [collaborative filtering](https://en.wikipedia.org/wiki/Recommender_system#Collaborative_filtering) via [alternating least squares](http://dl.acm.org/citation.cfm?id=1608614).
 
@@ -725,7 +725,7 @@ head(predicted)
 
 #### Isotonic Regression Model
 
-(Coming in 2.1.0)
+(Added in 2.1.0)
 
 `spark.isoreg` fits an [Isotonic Regression](https://en.wikipedia.org/wiki/Isotonic_regression) model against a `SparkDataFrame`. It solves a weighted univariate a regression problem under a complete order constraint. Specifically, given a set of real observed responses $y_1, \ldots, y_n$, corresponding real features $x_1, \ldots, x_n$, and optionally positive weights $w_1, \ldots, w_n$, we want to find a monotone (piecewise linear) function $f$ to  minimize
 $$
@@ -768,8 +768,39 @@ newDF <- createDataFrame(data.frame(x = c(1.5, 3.2)))
 head(predict(isoregModel, newDF))
 ```
 
-#### What's More?
-We also expect Decision Tree, Random Forest, Kolmogorov-Smirnov Test coming in the next version 2.1.0.
+### Logistic Regression Model
+
+(Added in 2.1.0)
+
+[Logistic regression](https://en.wikipedia.org/wiki/Logistic_regression) is a widely-used model when the response is categorical. It can be seen as a special case of the [Generalized Linear Predictive Model](https://en.wikipedia.org/wiki/Generalized_linear_model).
+We provide `spark.logit` on top of `spark.glm` to support logistic regression with advanced hyper-parameters.
+It supports both binary and multiclass classification with elastic-net regularization and feature standardization, similar to `glmnet`.
+
+We use a simple example to demonstrate `spark.logit` usage. In general, there are three steps of using `spark.logit`:
+1). Create a dataframe from a proper data source; 2). Fit a logistic regression model using `spark.logit` with a proper parameter setting;
+and 3). Obtain the coefficient matrix of the fitted model using `summary` and use the model for prediction with `predict`.
+
+Binomial logistic regression
+```{r, warning=FALSE}
+df <- createDataFrame(iris)
+# Create a DataFrame containing two classes
+training <- df[df$Species %in% c("versicolor", "virginica"), ]
+model <- spark.logit(training, Species ~ ., regParam = 0.5)
+summary(model)
+```
+
+Predict values on training data
+```{r}
+fitted <- predict(model, training)
+```
+
+Multinomial logistic regression against three classes
+```{r, warning=FALSE}
+df <- createDataFrame(iris)
+# Note in this case, Spark infers it is multinomial logistic regression, so family = "multinomial" is optional.
+model <- spark.logit(df, Species ~ ., regParam = 0.5)
+summary(model)
+```
 
 ### Model Persistence
 The following example shows how to save/load an ML model by SparkR.

From 207107bca5e550657b02892eef74230787972d10 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Tue, 13 Dec 2016 10:02:19 -0800
Subject: [PATCH 0345/1204] [SPARK-18835][SQL] Don't expose Guava types in the
 JavaTypeInference API.

This avoids issues during maven tests because of shading.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #16260 from vanzin/SPARK-18835.

(cherry picked from commit f280ccf449f62a00eb4042dfbcf7a0715850fd4c)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/sql/catalyst/JavaTypeInference.scala       | 12 +++++++++++-
 .../scala/org/apache/spark/sql/UDFRegistration.scala |  4 +---
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
index 04f0cfce883f2..61c153c10e47c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst
 
 import java.beans.{Introspector, PropertyDescriptor}
 import java.lang.{Iterable => JIterable}
+import java.lang.reflect.Type
 import java.util.{Iterator => JIterator, List => JList, Map => JMap}
 
 import scala.language.existentials
@@ -54,12 +55,21 @@ object JavaTypeInference {
     inferDataType(TypeToken.of(beanClass))
   }
 
+  /**
+   * Infers the corresponding SQL data type of a Java type.
+   * @param beanType Java type
+   * @return (SQL data type, nullable)
+   */
+  private[sql] def inferDataType(beanType: Type): (DataType, Boolean) = {
+    inferDataType(TypeToken.of(beanType))
+  }
+
   /**
    * Infers the corresponding SQL data type of a Java type.
    * @param typeToken Java type
    * @return (SQL data type, nullable)
    */
-  private[sql] def inferDataType(typeToken: TypeToken[_]): (DataType, Boolean) = {
+  private def inferDataType(typeToken: TypeToken[_]): (DataType, Boolean) = {
     typeToken.getRawType match {
       case c: Class[_] if c.isAnnotationPresent(classOf[SQLUserDefinedType]) =>
         (c.getAnnotation(classOf[SQLUserDefinedType]).udt().newInstance(), true)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
index c8be89c646957..d94185b390448 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -23,8 +23,6 @@ import java.lang.reflect.{ParameterizedType, Type}
 import scala.reflect.runtime.universe.TypeTag
 import scala.util.Try
 
-import com.google.common.reflect.TypeToken
-
 import org.apache.spark.annotation.InterfaceStability
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.api.java._
@@ -446,7 +444,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
           val udfReturnType = udfInterfaces(0).getActualTypeArguments.last
           var returnType = returnDataType
           if (returnType == null) {
-            returnType = JavaTypeInference.inferDataType(TypeToken.of(udfReturnType))._1
+            returnType = JavaTypeInference.inferDataType(udfReturnType)._1
           }
 
           udfInterfaces(0).getActualTypeArguments.length match {

From d5c4a5d06b3282aec8300d27510393161773061b Mon Sep 17 00:00:00 2001
From: jerryshao <sshao@hortonworks.com>
Date: Tue, 13 Dec 2016 10:37:45 -0800
Subject: [PATCH 0346/1204] [SPARK-18840][YARN] Avoid throw exception when
 getting token renewal interval in non HDFS security environment

## What changes were proposed in this pull request?

Fix `java.util.NoSuchElementException` when running Spark in non-hdfs security environment.

In the current code, we assume `HDFS_DELEGATION_KIND` token will be found in Credentials. But in some cloud environments, HDFS is not required, so we should avoid this exception.

## How was this patch tested?

Manually verified in local environment.

Author: jerryshao <sshao@hortonworks.com>

Closes #16265 from jerryshao/SPARK-18840.

(cherry picked from commit 43298d157d58d5d03ffab818f8cdfc6eac783c55)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../security/HDFSCredentialProvider.scala     | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/HDFSCredentialProvider.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/HDFSCredentialProvider.scala
index 8d06d735bad51..ebb176bc95caa 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/HDFSCredentialProvider.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/HDFSCredentialProvider.scala
@@ -72,21 +72,22 @@ private[security] class HDFSCredentialProvider extends ServiceCredentialProvider
     // We cannot use the tokens generated with renewer yarn. Trying to renew
     // those will fail with an access control issue. So create new tokens with the logged in
     // user as renewer.
-    sparkConf.get(PRINCIPAL).map { renewer =>
+    sparkConf.get(PRINCIPAL).flatMap { renewer =>
       val creds = new Credentials()
       nnsToAccess(hadoopConf, sparkConf).foreach { dst =>
         val dstFs = dst.getFileSystem(hadoopConf)
         dstFs.addDelegationTokens(renewer, creds)
       }
-      val t = creds.getAllTokens.asScala
-        .filter(_.getKind == DelegationTokenIdentifier.HDFS_DELEGATION_KIND)
-        .head
-      val newExpiration = t.renew(hadoopConf)
-      val identifier = new DelegationTokenIdentifier()
-      identifier.readFields(new DataInputStream(new ByteArrayInputStream(t.getIdentifier)))
-      val interval = newExpiration - identifier.getIssueDate
-      logInfo(s"Renewal Interval is $interval")
-      interval
+      val hdfsToken = creds.getAllTokens.asScala
+        .find(_.getKind == DelegationTokenIdentifier.HDFS_DELEGATION_KIND)
+      hdfsToken.map { t =>
+        val newExpiration = t.renew(hadoopConf)
+        val identifier = new DelegationTokenIdentifier()
+        identifier.readFields(new DataInputStream(new ByteArrayInputStream(t.getIdentifier)))
+        val interval = newExpiration - identifier.getIssueDate
+        logInfo(s"Renewal Interval is $interval")
+        interval
+      }
     }
   }
 

From 292a37f2455b12ef8dfbdaf5b905a69b8b5e3728 Mon Sep 17 00:00:00 2001
From: Alex Bozarth <ajbozart@us.ibm.com>
Date: Tue, 13 Dec 2016 21:37:46 +0000
Subject: [PATCH 0347/1204] [SPARK-18816][WEB UI] Executors Logs column only
 ran visibility check on initial table load

## What changes were proposed in this pull request?

When I added a visibility check for the logs column on the executors page in #14382 the method I used only ran the check on the initial DataTable creation and not subsequent page loads. I moved the check out of the table definition and instead it runs on each page load. The jQuery DataTable functionality used is the same.

## How was this patch tested?

Tested Manually

No visible UI changes to screenshot.

Author: Alex Bozarth <ajbozart@us.ibm.com>

Closes #16256 from ajbozarth/spark18816.

(cherry picked from commit aebf44e50b6b04b848829adbbe08b0f74f31eb32)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../resources/org/apache/spark/ui/static/executorspage.js  | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
index 1df67337ea031..fe5db6aa26b65 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
@@ -411,10 +411,6 @@ $(document).ready(function () {
                         }
                     ],
                     "columnDefs": [
-                        {
-                            "targets": [ 15 ],
-                            "visible": logsExist(response)
-                        },
                         {
                             "targets": [ 16 ],
                             "visible": getThreadDumpEnabled()
@@ -423,7 +419,8 @@ $(document).ready(function () {
                     "order": [[0, "asc"]]
                 };
     
-                $(selector).DataTable(conf);
+                var dt = $(selector).DataTable(conf);
+                dt.column(15).visible(logsExist(response));
                 $('#active-executors [data-toggle="tooltip"]').tooltip();
     
                 var sumSelector = "#summary-execs-table";

From f672bfdf9689c0ab74226b11785ada50b72cd488 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Tue, 13 Dec 2016 14:09:25 -0800
Subject: [PATCH 0348/1204] [SPARK-18843][CORE] Fix timeout in
 awaitResultInForkJoinSafely (branch 2.1, 2.0)

## What changes were proposed in this pull request?

This PR fixes the timeout value in `awaitResultInForkJoinSafely` for 2.1 and 2.0. Master has been fixed by https://github.com/apache/spark/pull/16230.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16268 from zsxwing/SPARK-18843.
---
 core/src/main/scala/org/apache/spark/util/ThreadUtils.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
index 60a6e82c6f90d..2a21c6a52c524 100644
--- a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
@@ -209,7 +209,7 @@ private[spark] object ThreadUtils {
       // `awaitPermission` is not actually used anywhere so it's safe to pass in null here.
       // See SPARK-13747.
       val awaitPermission = null.asInstanceOf[scala.concurrent.CanAwait]
-      awaitable.result(Duration.Inf)(awaitPermission)
+      awaitable.result(atMost)(awaitPermission)
     } catch {
       case NonFatal(t) =>
         throw new SparkException("Exception thrown in awaitResult: ", t)

From 25b97589e32ddc424df500059cd9962eb1b2fa6b Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Tue, 13 Dec 2016 14:14:25 -0800
Subject: [PATCH 0349/1204] [SPARK-18834][SS] Expose event time stats through
 StreamingQueryProgress

## What changes were proposed in this pull request?

- Changed `StreamingQueryProgress.watermark` to `StreamingQueryProgress.queryTimestamps` which is a `Map[String, String]` containing the following keys: "eventTime.max", "eventTime.min", "eventTime.avg", "processingTime", "watermark". All of them UTC formatted strings.

- Renamed `StreamingQuery.timestamp` to `StreamingQueryProgress.triggerTimestamp` to differentiate from `queryTimestamps`. It has the timestamp of when the trigger was started.

## How was this patch tested?

Updated tests

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #16258 from tdas/SPARK-18834.

(cherry picked from commit c68fb426d4ac05414fb402aa1f30f4c98df103ad)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../streaming/EventTimeWatermarkExec.scala    | 55 +++++++++++++------
 .../streaming/ProgressReporter.scala          | 38 +++++++++----
 .../execution/streaming/StreamExecution.scala | 33 ++++++-----
 .../apache/spark/sql/streaming/progress.scala | 31 +++++++----
 .../StreamingQueryListenerSuite.scala         |  3 +
 ...StreamingQueryStatusAndProgressSuite.scala | 16 ++++--
 .../sql/streaming/StreamingQuerySuite.scala   |  2 +
 .../spark/sql/streaming/WatermarkSuite.scala  | 49 ++++++++++++++---
 8 files changed, 161 insertions(+), 66 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
index 4c8cb069d23a0..e8570d040dbe4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.execution.streaming
 
-import scala.math.max
-
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, UnsafeProjection}
@@ -28,24 +26,48 @@ import org.apache.spark.sql.types.MetadataBuilder
 import org.apache.spark.unsafe.types.CalendarInterval
 import org.apache.spark.util.AccumulatorV2
 
-/** Tracks the maximum positive long seen. */
-class MaxLong(protected var currentValue: Long = 0)
-  extends AccumulatorV2[Long, Long] {
+/** Class for collecting event time stats with an accumulator */
+case class EventTimeStats(var max: Long, var min: Long, var sum: Long, var count: Long) {
+  def add(eventTime: Long): Unit = {
+    this.max = math.max(this.max, eventTime)
+    this.min = math.min(this.min, eventTime)
+    this.sum += eventTime
+    this.count += 1
+  }
+
+  def merge(that: EventTimeStats): Unit = {
+    this.max = math.max(this.max, that.max)
+    this.min = math.min(this.min, that.min)
+    this.sum += that.sum
+    this.count += that.count
+  }
+
+  def avg: Long = sum / count
+}
+
+object EventTimeStats {
+  def zero: EventTimeStats = EventTimeStats(
+    max = Long.MinValue, min = Long.MaxValue, sum = 0L, count = 0L)
+}
+
+/** Accumulator that collects stats on event time in a batch. */
+class EventTimeStatsAccum(protected var currentStats: EventTimeStats = EventTimeStats.zero)
+  extends AccumulatorV2[Long, EventTimeStats] {
 
-  override def isZero: Boolean = value == 0
-  override def value: Long = currentValue
-  override def copy(): AccumulatorV2[Long, Long] = new MaxLong(currentValue)
+  override def isZero: Boolean = value == EventTimeStats.zero
+  override def value: EventTimeStats = currentStats
+  override def copy(): AccumulatorV2[Long, EventTimeStats] = new EventTimeStatsAccum(currentStats)
 
   override def reset(): Unit = {
-    currentValue = 0
+    currentStats = EventTimeStats.zero
   }
 
   override def add(v: Long): Unit = {
-    currentValue = max(v, value)
+    currentStats.add(v)
   }
 
-  override def merge(other: AccumulatorV2[Long, Long]): Unit = {
-    currentValue = max(value, other.value)
+  override def merge(other: AccumulatorV2[Long, EventTimeStats]): Unit = {
+    currentStats.merge(other.value)
   }
 }
 
@@ -54,22 +76,21 @@ class MaxLong(protected var currentValue: Long = 0)
  * adding appropriate metadata to this column, this operator also tracks the maximum observed event
  * time. Based on the maximum observed time and a user specified delay, we can calculate the
  * `watermark` after which we assume we will no longer see late records for a particular time
- * period.
+ * period. Note that event time is measured in milliseconds.
  */
 case class EventTimeWatermarkExec(
     eventTime: Attribute,
     delay: CalendarInterval,
     child: SparkPlan) extends SparkPlan {
 
-  // TODO: Use Spark SQL Metrics?
-  val maxEventTime = new MaxLong
-  sparkContext.register(maxEventTime)
+  val eventTimeStats = new EventTimeStatsAccum()
+  sparkContext.register(eventTimeStats)
 
   override protected def doExecute(): RDD[InternalRow] = {
     child.execute().mapPartitions { iter =>
       val getEventTime = UnsafeProjection.create(eventTime :: Nil, child.output)
       iter.map { row =>
-        maxEventTime.add(getEventTime(row).getLong(0))
+        eventTimeStats.add(getEventTime(row).getLong(0) / 1000)
         row
       }
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
index 40e3151337af6..549b93694d949 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -41,7 +41,9 @@ import org.apache.spark.util.Clock
 trait ProgressReporter extends Logging {
 
   case class ExecutionStats(
-    inputRows: Map[Source, Long], stateOperators: Seq[StateOperatorProgress])
+    inputRows: Map[Source, Long],
+    stateOperators: Seq[StateOperatorProgress],
+    eventTimeStats: Map[String, String])
 
   // Internal state of the stream, required for computing metrics.
   protected def id: UUID
@@ -127,12 +129,7 @@ trait ProgressReporter extends Logging {
   protected def finishTrigger(hasNewData: Boolean): Unit = {
     currentTriggerEndTimestamp = triggerClock.getTimeMillis()
 
-    val executionStats: ExecutionStats = if (!hasNewData) {
-      ExecutionStats(Map.empty, Seq.empty)
-    } else {
-      extractExecutionStats
-    }
-
+    val executionStats = extractExecutionStats(hasNewData)
     val processingTimeSec =
       (currentTriggerEndTimestamp - currentTriggerStartTimestamp).toDouble / 1000
 
@@ -160,10 +157,10 @@ trait ProgressReporter extends Logging {
       id = id,
       runId = runId,
       name = name,
-      timestamp = timestampFormat.format(new Date(currentTriggerStartTimestamp)),
+      timestamp = formatTimestamp(currentTriggerStartTimestamp),
       batchId = currentBatchId,
       durationMs = currentDurationsMs.toMap.mapValues(long2Long).asJava,
-      currentWatermark = offsetSeqMetadata.batchWatermarkMs,
+      eventTime = executionStats.eventTimeStats.asJava,
       stateOperators = executionStats.stateOperators.toArray,
       sources = sourceProgress.toArray,
       sink = sinkProgress)
@@ -184,7 +181,13 @@ trait ProgressReporter extends Logging {
   }
 
   /** Extracts statistics from the most recent query execution. */
-  private def extractExecutionStats: ExecutionStats = {
+  private def extractExecutionStats(hasNewData: Boolean): ExecutionStats = {
+    val watermarkTimestamp = Map("watermark" -> formatTimestamp(offsetSeqMetadata.batchWatermarkMs))
+
+    if (!hasNewData) {
+      return ExecutionStats(Map.empty, Seq.empty, watermarkTimestamp)
+    }
+
     // We want to associate execution plan leaves to sources that generate them, so that we match
     // the their metrics (e.g. numOutputRows) to the sources. To do this we do the following.
     // Consider the translation from the streaming logical plan to the final executed plan.
@@ -241,7 +244,16 @@ trait ProgressReporter extends Logging {
         numRowsUpdated = node.metrics.get("numUpdatedStateRows").map(_.value).getOrElse(0L))
     }
 
-    ExecutionStats(numInputRows, stateOperators)
+    val eventTimeStats = lastExecution.executedPlan.collect {
+      case e: EventTimeWatermarkExec if e.eventTimeStats.value.count > 0 =>
+        val stats = e.eventTimeStats.value
+        Map(
+          "max" -> stats.max,
+          "min" -> stats.min,
+          "avg" -> stats.avg).mapValues(formatTimestamp)
+    }.headOption.getOrElse(Map.empty) ++ watermarkTimestamp
+
+    ExecutionStats(numInputRows, stateOperators, eventTimeStats)
   }
 
   /** Records the duration of running `body` for the next query progress update. */
@@ -257,6 +269,10 @@ trait ProgressReporter extends Logging {
     result
   }
 
+  private def formatTimestamp(millis: Long): String = {
+    timestampFormat.format(new Date(millis))
+  }
+
   /** Updates the message returned in `status`. */
   protected def updateStatusMessage(message: String): Unit = {
     currentStatus = currentStatus.copy(message = message)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 9fe6819837bbc..8f97d9570eaa6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -382,6 +382,24 @@ class StreamExecution(
     if (hasNewData) {
       // Current batch timestamp in milliseconds
       offsetSeqMetadata.batchTimestampMs = triggerClock.getTimeMillis()
+      // Update the eventTime watermark if we find one in the plan.
+      if (lastExecution != null) {
+        lastExecution.executedPlan.collect {
+          case e: EventTimeWatermarkExec if e.eventTimeStats.value.count > 0 =>
+            logDebug(s"Observed event time stats: ${e.eventTimeStats.value}")
+            e.eventTimeStats.value.max - e.delay.milliseconds
+        }.headOption.foreach { newWatermarkMs =>
+          if (newWatermarkMs > offsetSeqMetadata.batchWatermarkMs) {
+            logInfo(s"Updating eventTime watermark to: $newWatermarkMs ms")
+            offsetSeqMetadata.batchWatermarkMs = newWatermarkMs
+          } else {
+            logDebug(
+              s"Event time didn't move: $newWatermarkMs < " +
+                s"${offsetSeqMetadata.batchWatermarkMs}")
+          }
+        }
+      }
+
       updateStatusMessage("Writing offsets to log")
       reportTimeTaken("walCommit") {
         assert(offsetLog.add(
@@ -485,21 +503,6 @@ class StreamExecution(
       sink.addBatch(currentBatchId, nextBatch)
     }
 
-    // Update the eventTime watermark if we find one in the plan.
-    lastExecution.executedPlan.collect {
-      case e: EventTimeWatermarkExec =>
-        logTrace(s"Maximum observed eventTime: ${e.maxEventTime.value}")
-        (e.maxEventTime.value / 1000) - e.delay.milliseconds()
-    }.headOption.foreach { newWatermark =>
-      if (newWatermark > offsetSeqMetadata.batchWatermarkMs) {
-        logInfo(s"Updating eventTime watermark to: $newWatermark ms")
-        offsetSeqMetadata.batchWatermarkMs = newWatermark
-      } else {
-        logTrace(s"Event time didn't move: $newWatermark < " +
-          s"$offsetSeqMetadata.currentEventTimeWatermark")
-      }
-    }
-
     awaitBatchLock.lock()
     try {
       // Wake up any threads that are waiting for the stream to progress.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
index d1568758b7a43..e219cfde12656 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.streaming
 
 import java.{util => ju}
+import java.lang.{Long => JLong}
 import java.util.UUID
 
 import scala.collection.JavaConverters._
@@ -29,7 +30,6 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.annotation.Experimental
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
 
 /**
  * :: Experimental ::
@@ -61,13 +61,20 @@ class StateOperatorProgress private[sql](
  * @param id An unique query id that persists across restarts. See `StreamingQuery.id()`.
  * @param runId A query id that is unique for every start/restart. See `StreamingQuery.runId()`.
  * @param name User-specified name of the query, null if not specified.
- * @param timestamp Timestamp (ms) of the beginning of the trigger.
+ * @param timestamp Beginning time of the trigger in ISO8601 format, i.e. UTC timestamps.
  * @param batchId A unique id for the current batch of data being processed.  Note that in the
  *                case of retries after a failure a given batchId my be executed more than once.
  *                Similarly, when there is no data to be processed, the batchId will not be
  *                incremented.
  * @param durationMs The amount of time taken to perform various operations in milliseconds.
- * @param currentWatermark The current event time watermark in milliseconds
+ * @param eventTime Statistics of event time seen in this batch. It may contain the following keys:
+ *                 {
+ *                   "max" -> "2016-12-05T20:54:20.827Z"  // maximum event time seen in this trigger
+ *                   "min" -> "2016-12-05T20:54:20.827Z"  // minimum event time seen in this trigger
+ *                   "avg" -> "2016-12-05T20:54:20.827Z"  // average event time seen in this trigger
+ *                   "watermark" -> "2016-12-05T20:54:20.827Z"  // watermark used in this trigger
+ *                 }
+ *                 All timestamps are in ISO8601 format, i.e. UTC timestamps.
  * @param stateOperators Information about operators in the query that store state.
  * @param sources detailed statistics on data being read from each of the streaming sources.
  * @since 2.1.0
@@ -79,8 +86,8 @@ class StreamingQueryProgress private[sql](
   val name: String,
   val timestamp: String,
   val batchId: Long,
-  val durationMs: ju.Map[String, java.lang.Long],
-  val currentWatermark: Long,
+  val durationMs: ju.Map[String, JLong],
+  val eventTime: ju.Map[String, String],
   val stateOperators: Array[StateOperatorProgress],
   val sources: Array[SourceProgress],
   val sink: SinkProgress) {
@@ -107,6 +114,13 @@ class StreamingQueryProgress private[sql](
       if (value.isNaN || value.isInfinity) JNothing else JDouble(value)
     }
 
+    /** Convert map to JValue while handling empty maps. Also, this sorts the keys. */
+    def safeMapToJValue[T](map: ju.Map[String, T], valueToJValue: T => JValue): JValue = {
+      if (map.isEmpty) return JNothing
+      val keys = map.asScala.keySet.toSeq.sorted
+      keys.map { k => k -> valueToJValue(map.get(k)) : JObject }.reduce(_ ~ _)
+    }
+
     ("id" -> JString(id.toString)) ~
     ("runId" -> JString(runId.toString)) ~
     ("name" -> JString(name)) ~
@@ -114,11 +128,8 @@ class StreamingQueryProgress private[sql](
     ("numInputRows" -> JInt(numInputRows)) ~
     ("inputRowsPerSecond" -> safeDoubleToJValue(inputRowsPerSecond)) ~
     ("processedRowsPerSecond" -> safeDoubleToJValue(processedRowsPerSecond)) ~
-    ("durationMs" -> durationMs
-        .asScala
-        .map { case (k, v) => k -> JInt(v.toLong): JObject }
-        .reduce(_ ~ _)) ~
-    ("currentWatermark" -> JInt(currentWatermark)) ~
+    ("durationMs" -> safeMapToJValue[JLong](durationMs, v => JInt(v.toLong))) ~
+    ("eventTime" -> safeMapToJValue[String](eventTime, s => JString(s))) ~
     ("stateOperators" -> JArray(stateOperators.map(_.jsonValue).toList)) ~
     ("sources" -> JArray(sources.map(_.jsonValue).toList)) ~
     ("sink" -> sink.jsonValue)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
index f75f5b537e41b..7c6745ac8285a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
@@ -185,9 +185,12 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
 
   test("QueryProgressEvent serialization") {
     def testSerialization(event: QueryProgressEvent): Unit = {
+      import scala.collection.JavaConverters._
       val json = JsonProtocol.sparkEventToJson(event)
       val newEvent = JsonProtocol.sparkEventFromJson(json).asInstanceOf[QueryProgressEvent]
       assert(newEvent.progress.json === event.progress.json)  // json as a proxy for equality
+      assert(newEvent.progress.durationMs.asScala === event.progress.durationMs.asScala)
+      assert(newEvent.progress.eventTime.asScala === event.progress.eventTime.asScala)
     }
     testSerialization(new QueryProgressEvent(StreamingQueryStatusAndProgressSuite.testProgress1))
     testSerialization(new QueryProgressEvent(StreamingQueryStatusAndProgressSuite.testProgress2))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
index 193c943f83be8..c970743a31ad6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
@@ -44,7 +44,12 @@ class StreamingQueryStatusAndProgressSuite extends SparkFunSuite {
         |  "durationMs" : {
         |    "total" : 0
         |  },
-        |  "currentWatermark" : 3,
+        |  "eventTime" : {
+        |    "avg" : "2016-12-05T20:54:20.827Z",
+        |    "max" : "2016-12-05T20:54:20.827Z",
+        |    "min" : "2016-12-05T20:54:20.827Z",
+        |    "watermark" : "2016-12-05T20:54:20.827Z"
+        |  },
         |  "stateOperators" : [ {
         |    "numRowsTotal" : 0,
         |    "numRowsUpdated" : 1
@@ -76,7 +81,6 @@ class StreamingQueryStatusAndProgressSuite extends SparkFunSuite {
          |  "durationMs" : {
          |    "total" : 0
          |  },
-         |  "currentWatermark" : 3,
          |  "stateOperators" : [ {
          |    "numRowsTotal" : 0,
          |    "numRowsUpdated" : 1
@@ -134,7 +138,11 @@ object StreamingQueryStatusAndProgressSuite {
     timestamp = "2016-12-05T20:54:20.827Z",
     batchId = 2L,
     durationMs = Map("total" -> 0L).mapValues(long2Long).asJava,
-    currentWatermark = 3L,
+    eventTime = Map(
+      "max" -> "2016-12-05T20:54:20.827Z",
+      "min" -> "2016-12-05T20:54:20.827Z",
+      "avg" -> "2016-12-05T20:54:20.827Z",
+      "watermark" -> "2016-12-05T20:54:20.827Z").asJava,
     stateOperators = Array(new StateOperatorProgress(numRowsTotal = 0, numRowsUpdated = 1)),
     sources = Array(
       new SourceProgress(
@@ -156,7 +164,7 @@ object StreamingQueryStatusAndProgressSuite {
     timestamp = "2016-12-05T20:54:20.827Z",
     batchId = 2L,
     durationMs = Map("total" -> 0L).mapValues(long2Long).asJava,
-    currentWatermark = 3L,
+    eventTime = Map.empty[String, String].asJava,  // empty maps should be handled correctly
     stateOperators = Array(new StateOperatorProgress(numRowsTotal = 0, numRowsUpdated = 1)),
     sources = Array(
       new SourceProgress(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index c66d6b1f8d8e6..afd788ce3ddfd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.streaming
 
+import scala.collection.JavaConverters._
+
 import org.apache.commons.lang3.RandomStringUtils
 import org.scalactic.TolerantNumerics
 import org.scalatest.concurrent.Eventually._
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/WatermarkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/WatermarkSuite.scala
index 12f3c3e5ff3d9..f1cc19c6e235d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/WatermarkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/WatermarkSuite.scala
@@ -17,6 +17,9 @@
 
 package org.apache.spark.sql.streaming
 
+import java.{util => ju}
+import java.text.SimpleDateFormat
+
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.internal.Logging
@@ -50,8 +53,7 @@ class WatermarkSuite extends StreamTest with BeforeAndAfter with Logging {
   }
 
 
-  test("watermark metric") {
-
+  test("event time and watermark metrics") {
     val inputData = MemoryStream[Int]
 
     val windowedAggregation = inputData.toDF()
@@ -61,21 +63,43 @@ class WatermarkSuite extends StreamTest with BeforeAndAfter with Logging {
         .agg(count("*") as 'count)
         .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
 
+    def assertEventStats(body: ju.Map[String, String] => Unit): AssertOnQuery = AssertOnQuery { q =>
+      body(q.recentProgress.filter(_.numInputRows > 0).lastOption.get.eventTime)
+      true
+    }
+
     testStream(windowedAggregation)(
       AddData(inputData, 15),
       CheckAnswer(),
-      AssertOnQuery { query =>
-        query.lastProgress.currentWatermark === 5000
+      assertEventStats { e =>
+        assert(e.get("max") === formatTimestamp(15))
+        assert(e.get("min") === formatTimestamp(15))
+        assert(e.get("avg") === formatTimestamp(15))
+        assert(e.get("watermark") === formatTimestamp(0))
       },
-      AddData(inputData, 15),
+      AddData(inputData, 10, 12, 14),
       CheckAnswer(),
-      AssertOnQuery { query =>
-        query.lastProgress.currentWatermark === 5000
+      assertEventStats { e =>
+        assert(e.get("max") === formatTimestamp(14))
+        assert(e.get("min") === formatTimestamp(10))
+        assert(e.get("avg") === formatTimestamp(12))
+        assert(e.get("watermark") === formatTimestamp(5))
       },
       AddData(inputData, 25),
       CheckAnswer(),
-      AssertOnQuery { query =>
-        query.lastProgress.currentWatermark === 15000
+      assertEventStats { e =>
+        assert(e.get("max") === formatTimestamp(25))
+        assert(e.get("min") === formatTimestamp(25))
+        assert(e.get("avg") === formatTimestamp(25))
+        assert(e.get("watermark") === formatTimestamp(5))
+      },
+      AddData(inputData, 25),
+      CheckAnswer((10, 3)),
+      assertEventStats { e =>
+        assert(e.get("max") === formatTimestamp(25))
+        assert(e.get("min") === formatTimestamp(25))
+        assert(e.get("avg") === formatTimestamp(25))
+        assert(e.get("watermark") === formatTimestamp(15))
       }
     )
   }
@@ -206,4 +230,11 @@ class WatermarkSuite extends StreamTest with BeforeAndAfter with Logging {
       CheckAnswer((10, 1))
     )
   }
+
+  private val timestampFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'") // ISO8601
+  timestampFormat.setTimeZone(ju.TimeZone.getTimeZone("UTC"))
+
+  private def formatTimestamp(sec: Long): String = {
+    timestampFormat.format(new ju.Date(sec * 1000))
+  }
 }

From 5693ac8e5bd5df8aca1b0d6df0be072a45abcfbd Mon Sep 17 00:00:00 2001
From: Xiangrui Meng <meng@databricks.com>
Date: Tue, 13 Dec 2016 16:59:09 -0800
Subject: [PATCH 0350/1204] [SPARK-18793][SPARK-18794][R] add
 spark.randomForest/spark.gbt to vignettes

## What changes were proposed in this pull request?

Mention `spark.randomForest` and `spark.gbt` in vignettes. Keep the content minimal since users can type `?spark.randomForest` to see the full doc.

cc: jkbradley

Author: Xiangrui Meng <meng@databricks.com>

Closes #16264 from mengxr/SPARK-18793.

(cherry picked from commit 594b14f1ebd0b3db9f630e504be92228f11b4d9f)
Signed-off-by: Xiangrui Meng <meng@databricks.com>
---
 R/pkg/vignettes/sparkr-vignettes.Rmd | 32 ++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index 625b759626f36..334daa51f019d 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -449,6 +449,10 @@ SparkR supports the following machine learning models and algorithms.
 
 * Generalized Linear Model (GLM)
 
+* Random Forest
+
+* Gradient-Boosted Trees (GBT)
+
 * Naive Bayes Model
 
 * $k$-means Clustering
@@ -526,6 +530,34 @@ gaussianFitted <- predict(gaussianGLM, carsDF)
 head(select(gaussianFitted, "model", "prediction", "mpg", "wt", "hp"))
 ```
 
+#### Random Forest
+
+`spark.randomForest` fits a [random forest](https://en.wikipedia.org/wiki/Random_forest) classification or regression model on a `SparkDataFrame`.
+Users can call `summary` to get a summary of the fitted model, `predict` to make predictions, and `write.ml`/`read.ml` to save/load fitted models.
+
+In the following example, we use the `longley` dataset to train a random forest and make predictions:
+
+```{r, warning=FALSE}
+df <- createDataFrame(longley)
+rfModel <- spark.randomForest(df, Employed ~ ., type = "regression", maxDepth = 2, numTrees = 2)
+summary(rfModel)
+predictions <- predict(rfModel, df)
+```
+
+#### Gradient-Boosted Trees
+
+`spark.gbt` fits a [gradient-boosted tree](https://en.wikipedia.org/wiki/Gradient_boosting) classification or regression model on a `SparkDataFrame`.
+Users can call `summary` to get a summary of the fitted model, `predict` to make predictions, and `write.ml`/`read.ml` to save/load fitted models.
+
+Similar to the random forest example above, we use the `longley` dataset to train a gradient-boosted tree and make predictions:
+
+```{r, warning=FALSE}
+df <- createDataFrame(longley)
+gbtModel <- spark.gbt(df, Employed ~ ., type = "regression", maxDepth = 2, maxIter = 2)
+summary(gbtModel)
+predictions <- predict(gbtModel, df)
+```
+
 #### Naive Bayes Model
 
 Naive Bayes model assumes independence among the features. `spark.naiveBayes` fits a [Bernoulli naive Bayes model](https://en.wikipedia.org/wiki/Naive_Bayes_classifier#Bernoulli_naive_Bayes) against a SparkDataFrame. The data should be all categorical. These models are often used for document classification.

From 019d1fa3d421b5750170429fc07b204692b7b58e Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Tue, 13 Dec 2016 18:36:36 -0800
Subject: [PATCH 0351/1204] [SPARK-18588][TESTS] Ignore
 KafkaSourceStressForDontFailOnDataLossSuite

## What changes were proposed in this pull request?

Disable KafkaSourceStressForDontFailOnDataLossSuite for now.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16275 from zsxwing/ignore-flaky-test.

(cherry picked from commit e104e55c16e229e521c517393b8163cbc3bbf85a)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
index 544fbc5ec36a2..5d2779aba26d0 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
@@ -845,7 +845,7 @@ class KafkaSourceStressForDontFailOnDataLossSuite extends StreamTest with Shared
     }
   }
 
-  test("stress test for failOnDataLoss=false") {
+  ignore("stress test for failOnDataLoss=false") {
     val reader = spark
       .readStream
       .format("kafka")

From 8ef005931a242d087f4879805571be0660aefaf9 Mon Sep 17 00:00:00 2001
From: "wm624@hotmail.com" <wm624@hotmail.com>
Date: Tue, 13 Dec 2016 18:52:05 -0800
Subject: [PATCH 0352/1204] [MINOR][SPARKR] fix kstest example error and add
 unit test

## What changes were proposed in this pull request?

While adding vignettes for kstest, I found some errors in the example:
1. There is a typo of kstest;
2. print.summary.KStest doesn't work with the example;

Fix the example errors;
Add a new unit test for print.summary.KStest;

## How was this patch tested?
Manual test;
Add new unit test;

Author: wm624@hotmail.com <wm624@hotmail.com>

Closes #16259 from wangmiao1981/ks.

(cherry picked from commit f2ddabfa09fda26ff0391d026dd67545dab33e01)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 R/pkg/R/mllib.R                        | 4 ++--
 R/pkg/inst/tests/testthat/test_mllib.R | 6 ++++++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 5df843c2b9d5e..d736bbb5e9113 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -1595,14 +1595,14 @@ setMethod("write.ml", signature(object = "ALSModel", path = "character"),
 #' \dontrun{
 #' data <- data.frame(test = c(0.1, 0.15, 0.2, 0.3, 0.25))
 #' df <- createDataFrame(data)
-#' test <- spark.ktest(df, "test", "norm", c(0, 1))
+#' test <- spark.kstest(df, "test", "norm", c(0, 1))
 #'
 #' # get a summary of the test result
 #' testSummary <- summary(test)
 #' testSummary
 #'
 #' # print out the summary in an organized way
-#' print.summary.KSTest(test)
+#' print.summary.KSTest(testSummary)
 #' }
 #' @note spark.kstest since 2.1.0
 setMethod("spark.kstest", signature(data = "SparkDataFrame"),
diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R
index 46dffe3ca091f..40c0446740277 100644
--- a/R/pkg/inst/tests/testthat/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -986,6 +986,12 @@ test_that("spark.kstest", {
   expect_equal(stats$p.value, rStats$p.value, tolerance = 1e-4)
   expect_equal(stats$statistic, unname(rStats$statistic), tolerance = 1e-4)
   expect_match(capture.output(stats)[1], "Kolmogorov-Smirnov test summary:")
+
+  # Test print.summary.KSTest
+  printStats <- capture.output(print.summary.KSTest(stats))
+  expect_match(printStats[1], "Kolmogorov-Smirnov test summary:")
+  expect_match(printStats[5],
+               "Low presumption against null hypothesis: Sample follows theoretical distribution. ")
 })
 
 test_that("spark.randomForest", {

From f999312e72940b559738048646013eec9e68d657 Mon Sep 17 00:00:00 2001
From: Nattavut Sutyanyong <nsy.can@gmail.com>
Date: Wed, 14 Dec 2016 11:09:31 +0100
Subject: [PATCH 0353/1204] [SPARK-18814][SQL] CheckAnalysis rejects TPCDS
 query 32

## What changes were proposed in this pull request?
Move the checking of GROUP BY column in correlated scalar subquery from CheckAnalysis
to Analysis to fix a regression caused by SPARK-18504.

This problem can be reproduced with a simple script now.

Seq((1,1)).toDF("pk","pv").createOrReplaceTempView("p")
Seq((1,1)).toDF("ck","cv").createOrReplaceTempView("c")
sql("select * from p,c where p.pk=c.ck and c.cv = (select avg(c1.cv) from c c1 where c1.ck = p.pk)").show

The requirements are:
1. We need to reference the same table twice in both the parent and the subquery. Here is the table c.
2. We need to have a correlated predicate but to a different table. Here is from c (as c1) in the subquery to p in the parent.
3. We will then "deduplicate" c1.ck in the subquery to `ck#<n1>#<n2>` at `Project` above `Aggregate` of `avg`. Then when we compare `ck#<n1>#<n2>` and the original group by column `ck#<n1>` by their canonicalized form, which is #<n2> != #<n1>. That's how we trigger the exception added in SPARK-18504.

## How was this patch tested?

SubquerySuite and a simplified version of TPCDS-Q32

Author: Nattavut Sutyanyong <nsy.can@gmail.com>

Closes #16246 from nsyca/18814.

(cherry picked from commit cccd64393ea633e29d4a505fb0a7c01b51a79af8)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../sql/catalyst/analysis/CheckAnalysis.scala | 31 +++++++++----
 .../sql-tests/inputs/scalar-subquery.sql      | 20 ++++++++
 .../sql-tests/results/scalar-subquery.sql.out | 46 +++++++++++++++++++
 .../org/apache/spark/sql/SubquerySuite.scala  |  2 +-
 4 files changed, 90 insertions(+), 9 deletions(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/scalar-subquery.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/results/scalar-subquery.sql.out

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 235a79973d6ee..aa77a6efef347 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -124,6 +124,10 @@ trait CheckAnalysis extends PredicateHelper {
                 s"Scalar subquery must return only one column, but got ${query.output.size}")
 
           case s @ ScalarSubquery(query, conditions, _) if conditions.nonEmpty =>
+
+            // Collect the columns from the subquery for further checking.
+            var subqueryColumns = conditions.flatMap(_.references).filter(query.output.contains)
+
             def checkAggregate(agg: Aggregate): Unit = {
               // Make sure correlated scalar subqueries contain one row for every outer row by
               // enforcing that they are aggregates which contain exactly one aggregate expressions.
@@ -136,24 +140,35 @@ trait CheckAnalysis extends PredicateHelper {
                 failAnalysis("The output of a correlated scalar subquery must be aggregated")
               }
 
-              // SPARK-18504: block cases where GROUP BY columns
-              // are not part of the correlated columns
-              val groupByCols = ExpressionSet.apply(agg.groupingExpressions.flatMap(_.references))
-              val predicateCols = ExpressionSet.apply(conditions.flatMap(_.references))
-              val invalidCols = groupByCols.diff(predicateCols)
+              // SPARK-18504/SPARK-18814: Block cases where GROUP BY columns
+              // are not part of the correlated columns.
+              val groupByCols = AttributeSet(agg.groupingExpressions.flatMap(_.references))
+              val correlatedCols = AttributeSet(subqueryColumns)
+              val invalidCols = groupByCols -- correlatedCols
               // GROUP BY columns must be a subset of columns in the predicates
               if (invalidCols.nonEmpty) {
                 failAnalysis(
-                  "a GROUP BY clause in a scalar correlated subquery " +
+                  "A GROUP BY clause in a scalar correlated subquery " +
                     "cannot contain non-correlated columns: " +
                     invalidCols.mkString(","))
               }
             }
 
-            // Skip projects and subquery aliases added by the Analyzer and the SQLBuilder.
+            // Skip subquery aliases added by the Analyzer and the SQLBuilder.
+            // For projects, do the necessary mapping and skip to its child.
             def cleanQuery(p: LogicalPlan): LogicalPlan = p match {
               case s: SubqueryAlias => cleanQuery(s.child)
-              case p: Project => cleanQuery(p.child)
+              case p: Project =>
+                // SPARK-18814: Map any aliases to their AttributeReference children
+                // for the checking in the Aggregate operators below this Project.
+                subqueryColumns = subqueryColumns.map {
+                  xs => p.projectList.collectFirst {
+                    case e @ Alias(child : AttributeReference, _) if e.exprId == xs.exprId =>
+                      child
+                  }.getOrElse(xs)
+                }
+
+                cleanQuery(p.child)
               case child => child
             }
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/scalar-subquery.sql b/sql/core/src/test/resources/sql-tests/inputs/scalar-subquery.sql
new file mode 100644
index 0000000000000..3acc9db09cb80
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/scalar-subquery.sql
@@ -0,0 +1,20 @@
+CREATE OR REPLACE TEMPORARY VIEW p AS VALUES (1, 1) AS T(pk, pv);
+CREATE OR REPLACE TEMPORARY VIEW c AS VALUES (1, 1) AS T(ck, cv);
+
+-- SPARK-18814.1: Simplified version of TPCDS-Q32
+SELECT pk, cv
+FROM   p, c
+WHERE  p.pk = c.ck
+AND    c.cv = (SELECT avg(c1.cv)
+               FROM   c c1
+               WHERE  c1.ck = p.pk);
+
+-- SPARK-18814.2: Adding stack of aggregates
+SELECT pk, cv
+FROM   p, c
+WHERE  p.pk = c.ck
+AND    c.cv = (SELECT max(avg)
+	       FROM   (SELECT   c1.cv, avg(c1.cv) avg
+		       FROM     c c1
+		       WHERE    c1.ck = p.pk
+                       GROUP BY c1.cv));
diff --git a/sql/core/src/test/resources/sql-tests/results/scalar-subquery.sql.out b/sql/core/src/test/resources/sql-tests/results/scalar-subquery.sql.out
new file mode 100644
index 0000000000000..c249329d6a61c
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/scalar-subquery.sql.out
@@ -0,0 +1,46 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 4
+
+
+-- !query 0
+CREATE OR REPLACE TEMPORARY VIEW p AS VALUES (1, 1) AS T(pk, pv)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+CREATE OR REPLACE TEMPORARY VIEW c AS VALUES (1, 1) AS T(ck, cv)
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+SELECT pk, cv
+FROM   p, c
+WHERE  p.pk = c.ck
+AND    c.cv = (SELECT avg(c1.cv)
+               FROM   c c1
+               WHERE  c1.ck = p.pk)
+-- !query 2 schema
+struct<pk:int,cv:int>
+-- !query 2 output
+1	1
+
+
+-- !query 3
+SELECT pk, cv
+FROM   p, c
+WHERE  p.pk = c.ck
+AND    c.cv = (SELECT max(avg)
+	       FROM   (SELECT   c1.cv, avg(c1.cv) avg
+		       FROM     c c1
+		       WHERE    c1.ck = p.pk
+                       GROUP BY c1.cv))
+-- !query 3 schema
+struct<pk:int,cv:int>
+-- !query 3 output
+1	1
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index 0f2f520006e35..5a4b1cfe95e27 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -491,7 +491,7 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
         sql("select (select sum(-1) from t t2 where t1.c2 = t2.c1 group by t2.c2) sum from t t1")
       }
       assert(errMsg.getMessage.contains(
-        "a GROUP BY clause in a scalar correlated subquery cannot contain non-correlated columns:"))
+        "A GROUP BY clause in a scalar correlated subquery cannot contain non-correlated columns:"))
     }
   }
 

From 16d4bd4a25e70e9396b3451a53157f7cc41c1359 Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian@databricks.com>
Date: Wed, 14 Dec 2016 10:57:03 -0800
Subject: [PATCH 0354/1204] [SPARK-18730] Post Jenkins test report page instead
 of the full console output page to GitHub

## What changes were proposed in this pull request?

Currently, the full console output page of a Spark Jenkins PR build can be as large as several megabytes. It takes a relatively long time to load and may even freeze the browser for quite a while.

This PR makes the build script to post the test report page link to GitHub instead. The test report page is way more concise and is usually the first page I'd like to check when investigating a Jenkins build failure.

Note that for builds that a test report is not available (ongoing builds and builds that fail before test execution), the test report link automatically redirects to the build page.

## How was this patch tested?

N/A.

Author: Cheng Lian <lian@databricks.com>

Closes #16163 from liancheng/jenkins-test-report.

(cherry picked from commit ba4aab9b85688141d3d0c185165ec7a402c9fbba)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 dev/run-tests-jenkins.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py
index 1d1e72faccf2a..bb286af76384e 100755
--- a/dev/run-tests-jenkins.py
+++ b/dev/run-tests-jenkins.py
@@ -80,7 +80,7 @@ def pr_message(build_display_name,
                 short_commit_hash,
                 commit_url,
                 str(' ' + post_msg + '.') if post_msg else '.')
-    return '**[Test build %s %s](%sconsoleFull)** for PR %s at commit [`%s`](%s)%s' % str_args
+    return '**[Test build %s %s](%stestReport)** for PR %s at commit [`%s`](%s)%s' % str_args
 
 
 def run_pr_checks(pr_tests, ghprb_actual_commit, sha1):

From af12a21ca7145751acdec400134b1bd5c8168f74 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Wed, 14 Dec 2016 11:29:11 -0800
Subject: [PATCH 0355/1204] [SPARK-18753][SQL] Keep pushed-down null literal as
 a filter in Spark-side post-filter for FileFormat datasources

## What changes were proposed in this pull request?

Currently, `FileSourceStrategy` does not handle the case when the pushed-down filter is `Literal(null)` and removes it at the post-filter in Spark-side.

For example, the codes below:

```scala
val df = Seq(Tuple1(Some(true)), Tuple1(None), Tuple1(Some(false))).toDF()
df.filter($"_1" === "true").explain(true)
```

shows it keeps `null` properly.

```
== Parsed Logical Plan ==
'Filter ('_1 = true)
+- LocalRelation [_1#17]

== Analyzed Logical Plan ==
_1: boolean
Filter (cast(_1#17 as double) = cast(true as double))
+- LocalRelation [_1#17]

== Optimized Logical Plan ==
Filter (isnotnull(_1#17) && null)
+- LocalRelation [_1#17]

== Physical Plan ==
*Filter (isnotnull(_1#17) && null)       << Here `null` is there
+- LocalTableScan [_1#17]
```

However, when we read it back from Parquet,

```scala
val path = "/tmp/testfile"
df.write.parquet(path)
spark.read.parquet(path).filter($"_1" === "true").explain(true)
```

`null` is removed at the post-filter.

```
== Parsed Logical Plan ==
'Filter ('_1 = true)
+- Relation[_1#11] parquet

== Analyzed Logical Plan ==
_1: boolean
Filter (cast(_1#11 as double) = cast(true as double))
+- Relation[_1#11] parquet

== Optimized Logical Plan ==
Filter (isnotnull(_1#11) && null)
+- Relation[_1#11] parquet

== Physical Plan ==
*Project [_1#11]
+- *Filter isnotnull(_1#11)       << Here `null` is missing
   +- *FileScan parquet [_1#11] Batched: true, Format: ParquetFormat, Location: InMemoryFileIndex[file:/tmp/testfile], PartitionFilters: [null], PushedFilters: [IsNotNull(_1)], ReadSchema: struct<_1:boolean>
```

This PR fixes it to keep it properly. In more details,

```scala
val partitionKeyFilters =
  ExpressionSet(normalizedFilters.filter(_.references.subsetOf(partitionSet)))
```

This keeps this `null` in `partitionKeyFilters` as `Literal` always don't have `children` and `references` is being empty  which is always the subset of `partitionSet`.

And then in

```scala
val afterScanFilters = filterSet -- partitionKeyFilters
```

`null` is always removed from the post filter. So, if the referenced fields are empty, it should be applied into data columns too.

After this PR, it becomes as below:

```
== Parsed Logical Plan ==
'Filter ('_1 = true)
+- Relation[_1#276] parquet

== Analyzed Logical Plan ==
_1: boolean
Filter (cast(_1#276 as double) = cast(true as double))
+- Relation[_1#276] parquet

== Optimized Logical Plan ==
Filter (isnotnull(_1#276) && null)
+- Relation[_1#276] parquet

== Physical Plan ==
*Project [_1#276]
+- *Filter (isnotnull(_1#276) && null)
   +- *FileScan parquet [_1#276] Batched: true, Format: ParquetFormat, Location: InMemoryFileIndex[file:/private/var/folders/9j/gf_c342d7d150mwrxvkqnc180000gn/T/spark-a5d59bdb-5b..., PartitionFilters: [null], PushedFilters: [IsNotNull(_1)], ReadSchema: struct<_1:boolean>
```

## How was this patch tested?

Unit test in `FileSourceStrategySuite`

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #16184 from HyukjinKwon/SPARK-18753.

(cherry picked from commit 89ae26dcdb73266fbc3a8b6da9f5dff30dc4ec95)
Signed-off-by: Cheng Lian <lian@databricks.com>
---
 .../execution/datasources/FileSourceStrategy.scala    |  2 +-
 .../datasources/FileSourceStrategySuite.scala         | 11 +++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
index 55ca4f11068f9..ead323320243a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
@@ -86,7 +86,7 @@ object FileSourceStrategy extends Strategy with Logging {
       val dataFilters = normalizedFilters.filter(_.references.intersect(partitionSet).isEmpty)
 
       // Predicates with both partition keys and attributes need to be evaluated after the scan.
-      val afterScanFilters = filterSet -- partitionKeyFilters
+      val afterScanFilters = filterSet -- partitionKeyFilters.filter(_.references.nonEmpty)
       logInfo(s"Post-Scan Filters: ${afterScanFilters.mkString(",")}")
 
       val filterAttributes = AttributeSet(afterScanFilters)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
index d900ce7bb2370..f36162858bf7a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
@@ -476,6 +476,17 @@ class FileSourceStrategySuite extends QueryTest with SharedSQLContext with Predi
     }
   }
 
+  test("[SPARK-18753] keep pushed-down null literal as a filter in Spark-side post-filter") {
+    val ds = Seq(Tuple1(Some(true)), Tuple1(None), Tuple1(Some(false))).toDS()
+    withTempPath { p =>
+      val path = p.getAbsolutePath
+      ds.write.parquet(path)
+      val readBack = spark.read.parquet(path).filter($"_1" === "true")
+      val filtered = ds.filter($"_1" === "true").toDF()
+      checkAnswer(readBack, filtered)
+    }
+  }
+
   // Helpers for checking the arguments passed to the FileFormat.
 
   protected val checkPartitionSchema =

From e8866f9fc62095b78421d461549f7eaf8e9070b3 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Wed, 14 Dec 2016 21:22:49 +0100
Subject: [PATCH 0356/1204] [SPARK-18853][SQL] Project (UnaryNode) is way too
 aggressive in estimating statistics

## What changes were proposed in this pull request?
This patch reduces the default number element estimation for arrays and maps from 100 to 1. The issue with the 100 number is that when nested (e.g. an array of map), 100 * 100 would be used as the default size. This sounds like just an overestimation which doesn't seem that bad (since it is usually better to overestimate than underestimate). However, due to the way we assume the size output for Project (new estimated column size / old estimated column size), this overestimation can become underestimation. It is actually in general in this case safer to assume 1 default element.

## How was this patch tested?
This should be covered by existing tests.

Author: Reynold Xin <rxin@databricks.com>

Closes #16274 from rxin/SPARK-18853.

(cherry picked from commit 5d799473696a15fddd54ec71a93b6f8cb169810c)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../org/apache/spark/sql/types/ArrayType.scala     |  6 +++---
 .../scala/org/apache/spark/sql/types/MapType.scala |  6 +++---
 .../org/apache/spark/sql/types/DataTypeSuite.scala | 14 +++++++-------
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
index d409271fbc6b5..98efba199ad47 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
@@ -78,10 +78,10 @@ case class ArrayType(elementType: DataType, containsNull: Boolean) extends DataT
       ("containsNull" -> containsNull)
 
   /**
-   * The default size of a value of the ArrayType is 100 * the default size of the element type.
-   * (We assume that there are 100 elements).
+   * The default size of a value of the ArrayType is the default size of the element type.
+   * We assume that there is only 1 element on average in an array. See SPARK-18853.
    */
-  override def defaultSize: Int = 100 * elementType.defaultSize
+  override def defaultSize: Int = 1 * elementType.defaultSize
 
   override def simpleString: String = s"array<${elementType.simpleString}>"
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala
index fbf3a61786251..6691b81dcea8d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala
@@ -56,10 +56,10 @@ case class MapType(
 
   /**
    * The default size of a value of the MapType is
-   * 100 * (the default size of the key type + the default size of the value type).
-   * (We assume that there are 100 elements).
+   * (the default size of the key type + the default size of the value type).
+   * We assume that there is only 1 element on average in a map. See SPARK-18853.
    */
-  override def defaultSize: Int = 100 * (keyType.defaultSize + valueType.defaultSize)
+  override def defaultSize: Int = 1 * (keyType.defaultSize + valueType.defaultSize)
 
   override def simpleString: String = s"map<${keyType.simpleString},${valueType.simpleString}>"
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
index b8ab9a9963de8..12d2c00dc9c49 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
@@ -253,7 +253,7 @@ class DataTypeSuite extends SparkFunSuite {
   checkDataTypeJsonRepr(structType)
 
   def checkDefaultSize(dataType: DataType, expectedDefaultSize: Int): Unit = {
-    test(s"Check the default size of ${dataType}") {
+    test(s"Check the default size of $dataType") {
       assert(dataType.defaultSize === expectedDefaultSize)
     }
   }
@@ -272,18 +272,18 @@ class DataTypeSuite extends SparkFunSuite {
   checkDefaultSize(TimestampType, 8)
   checkDefaultSize(StringType, 20)
   checkDefaultSize(BinaryType, 100)
-  checkDefaultSize(ArrayType(DoubleType, true), 800)
-  checkDefaultSize(ArrayType(StringType, false), 2000)
-  checkDefaultSize(MapType(IntegerType, StringType, true), 2400)
-  checkDefaultSize(MapType(IntegerType, ArrayType(DoubleType), false), 80400)
-  checkDefaultSize(structType, 812)
+  checkDefaultSize(ArrayType(DoubleType, true), 8)
+  checkDefaultSize(ArrayType(StringType, false), 20)
+  checkDefaultSize(MapType(IntegerType, StringType, true), 24)
+  checkDefaultSize(MapType(IntegerType, ArrayType(DoubleType), false), 12)
+  checkDefaultSize(structType, 20)
 
   def checkEqualsIgnoreCompatibleNullability(
       from: DataType,
       to: DataType,
       expected: Boolean): Unit = {
     val testName =
-      s"equalsIgnoreCompatibleNullability: (from: ${from}, to: ${to})"
+      s"equalsIgnoreCompatibleNullability: (from: $from, to: $to)"
     test(testName) {
       assert(DataType.equalsIgnoreCompatibleNullability(from, to) === expected)
     }

From c4de90fc76d5aa5d2c8fee4ed692d4ab922cbab0 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 14 Dec 2016 13:36:41 -0800
Subject: [PATCH 0357/1204] [SPARK-18852][SS] StreamingQuery.lastProgress
 should be null when recentProgress is empty

## What changes were proposed in this pull request?

Right now `StreamingQuery.lastProgress` throws NoSuchElementException and it's hard to be used in Python since Python user will just see Py4jError.

This PR just makes it return null instead.

## How was this patch tested?

`test("lastProgress should be null when recentProgress is empty")`

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16273 from zsxwing/SPARK-18852.

(cherry picked from commit 1ac6567bdb03d7cc5c5f3473827a102280cb1030)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 python/pyspark/sql/streaming.py               |  9 ++++++--
 python/pyspark/sql/tests.py                   | 18 +++++++++++++++-
 .../streaming/ProgressReporter.scala          |  4 ++--
 .../StreamingQueryManagerSuite.scala          |  9 +++-----
 .../sql/streaming/StreamingQuerySuite.scala   | 21 ++++++++++++++++++-
 ...faultSource.scala => BlockingSource.scala} | 10 +++++++--
 6 files changed, 57 insertions(+), 14 deletions(-)
 rename sql/core/src/test/scala/org/apache/spark/sql/streaming/util/{DefaultSource.scala => BlockingSource.scala} (92%)

diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index 9cfb3fe25cdcc..eabd5ef54cb6b 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -125,10 +125,15 @@ def recentProgress(self):
     @since(2.1)
     def lastProgress(self):
         """
-        Returns the most recent :class:`StreamingQueryProgress` update of this streaming query.
+        Returns the most recent :class:`StreamingQueryProgress` update of this streaming query or
+        None if there were no progress updates
         :return: a map
         """
-        return json.loads(self._jsq.lastProgress().json())
+        lastProgress = self._jsq.lastProgress()
+        if lastProgress:
+            return json.loads(lastProgress.json())
+        else:
+            return None
 
     @since(2.0)
     def processAllAvailable(self):
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 66320bd050c14..115b4a9bef11c 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -1119,9 +1119,25 @@ def test_stream_status_and_progress(self):
         self.assertTrue(df.isStreaming)
         out = os.path.join(tmpPath, 'out')
         chk = os.path.join(tmpPath, 'chk')
-        q = df.writeStream \
+
+        def func(x):
+            time.sleep(1)
+            return x
+
+        from pyspark.sql.functions import col, udf
+        sleep_udf = udf(func)
+
+        # Use "sleep_udf" to delay the progress update so that we can test `lastProgress` when there
+        # were no updates.
+        q = df.select(sleep_udf(col("value")).alias('value')).writeStream \
             .start(path=out, format='parquet', queryName='this_query', checkpointLocation=chk)
         try:
+            # "lastProgress" will return None in most cases. However, as it may be flaky when
+            # Jenkins is very slow, we don't assert it. If there is something wrong, "lastProgress"
+            # may throw error with a high chance and make this test flaky, so we should still be
+            # able to detect broken codes.
+            q.lastProgress
+
             q.processAllAvailable()
             lastProgress = q.lastProgress
             recentProgress = q.recentProgress
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
index 549b93694d949..e40135fdd7a55 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -100,9 +100,9 @@ trait ProgressReporter extends Logging {
     progressBuffer.toArray
   }
 
-  /** Returns the most recent query progress update. */
+  /** Returns the most recent query progress update or null if there were no progress updates. */
   def lastProgress: StreamingQueryProgress = progressBuffer.synchronized {
-    progressBuffer.last
+    progressBuffer.lastOption.orNull
   }
 
   /** Begins recording statistics about query progress for a given trigger. */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
index d188319fe38dd..1742a5474cfd3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
@@ -32,6 +32,7 @@ import org.scalatest.time.SpanSugar._
 import org.apache.spark.SparkException
 import org.apache.spark.sql.Dataset
 import org.apache.spark.sql.execution.streaming._
+import org.apache.spark.sql.streaming.util.BlockingSource
 import org.apache.spark.util.Utils
 
 class StreamingQueryManagerSuite extends StreamTest with BeforeAndAfter {
@@ -217,7 +218,7 @@ class StreamingQueryManagerSuite extends StreamTest with BeforeAndAfter {
 
   test("SPARK-18811: Source resolution should not block main thread") {
     failAfter(streamingTimeout) {
-      StreamingQueryManagerSuite.latch = new CountDownLatch(1)
+      BlockingSource.latch = new CountDownLatch(1)
       withTempDir { tempDir =>
         // if source resolution was happening on the main thread, it would block the start call,
         // now it should only be blocking the stream execution thread
@@ -231,7 +232,7 @@ class StreamingQueryManagerSuite extends StreamTest with BeforeAndAfter {
         eventually(Timeout(streamingTimeout)) {
           assert(sq.status.message.contains("Initializing sources"))
         }
-        StreamingQueryManagerSuite.latch.countDown()
+        BlockingSource.latch.countDown()
         sq.stop()
       }
     }
@@ -321,7 +322,3 @@ class StreamingQueryManagerSuite extends StreamTest with BeforeAndAfter {
     (inputData, mapped)
   }
 }
-
-object StreamingQueryManagerSuite {
-  var latch: CountDownLatch = null
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index afd788ce3ddfd..b052bd9e6a53b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.streaming
 
-import scala.collection.JavaConverters._
+import java.util.concurrent.CountDownLatch
 
 import org.apache.commons.lang3.RandomStringUtils
 import org.scalactic.TolerantNumerics
@@ -32,6 +32,7 @@ import org.apache.spark.SparkException
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.streaming.util.BlockingSource
 import org.apache.spark.util.ManualClock
 
 
@@ -312,6 +313,24 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
     )
   }
 
+  test("lastProgress should be null when recentProgress is empty") {
+    BlockingSource.latch = new CountDownLatch(1)
+    withTempDir { tempDir =>
+      val sq = spark.readStream
+        .format("org.apache.spark.sql.streaming.util.BlockingSource")
+        .load()
+        .writeStream
+        .format("org.apache.spark.sql.streaming.util.BlockingSource")
+        .option("checkpointLocation", tempDir.toString)
+        .start()
+      // Creating source is blocked so recentProgress is empty and lastProgress should be null
+      assert(sq.lastProgress === null)
+      // Release the latch and stop the query
+      BlockingSource.latch.countDown()
+      sq.stop()
+    }
+  }
+
   test("codahale metrics") {
     val inputData = MemoryStream[Int]
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/DefaultSource.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/BlockingSource.scala
similarity index 92%
rename from sql/core/src/test/scala/org/apache/spark/sql/streaming/util/DefaultSource.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/streaming/util/BlockingSource.scala
index b0adf76814b18..19ab2ff13e14e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/DefaultSource.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/BlockingSource.scala
@@ -17,10 +17,12 @@
 
 package org.apache.spark.sql.streaming.util
 
+import java.util.concurrent.CountDownLatch
+
 import org.apache.spark.sql.{SQLContext, _}
 import org.apache.spark.sql.execution.streaming.{LongOffset, Offset, Sink, Source}
 import org.apache.spark.sql.sources.{StreamSinkProvider, StreamSourceProvider}
-import org.apache.spark.sql.streaming.{OutputMode, StreamingQueryManagerSuite}
+import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
 
 /** Dummy provider: returns a SourceProvider with a blocking `createSource` call. */
@@ -42,7 +44,7 @@ class BlockingSource extends StreamSourceProvider with StreamSinkProvider {
       schema: Option[StructType],
       providerName: String,
       parameters: Map[String, String]): Source = {
-    StreamingQueryManagerSuite.latch.await()
+    BlockingSource.latch.await()
     new Source {
       override def schema: StructType = fakeSchema
       override def getOffset: Option[Offset] = Some(new LongOffset(0))
@@ -64,3 +66,7 @@ class BlockingSource extends StreamSourceProvider with StreamSinkProvider {
     }
   }
 }
+
+object BlockingSource {
+  var latch: CountDownLatch = null
+}

From d0d9c5725774897703f2611484838ec7ed09e84f Mon Sep 17 00:00:00 2001
From: "Joseph K. Bradley" <joseph@databricks.com>
Date: Wed, 14 Dec 2016 14:10:40 -0800
Subject: [PATCH 0358/1204] [SPARK-18795][ML][SPARKR][DOC] Added KSTest section
 to SparkR vignettes

## What changes were proposed in this pull request?

Added short section for KSTest.
Also added logreg model to list of ML models in vignette.  (This will be reorganized under SPARK-18849)

![screen shot 2016-12-14 at 1 37 31 pm](https://cloud.githubusercontent.com/assets/5084283/21202140/7f24e240-c202-11e6-9362-458208bb9159.png)

## How was this patch tested?

Manually tested example locally.
Built vignettes locally.

Author: Joseph K. Bradley <joseph@databricks.com>

Closes #16283 from jkbradley/ksTest-vignette.

(cherry picked from commit 78627425708a0afbe113efdf449e8622b43b652d)
Signed-off-by: Joseph K. Bradley <joseph@databricks.com>
---
 R/pkg/vignettes/sparkr-vignettes.Rmd | 29 +++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index 334daa51f019d..d507e2cdf941b 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -469,6 +469,10 @@ SparkR supports the following machine learning models and algorithms.
 
 * Isotonic Regression Model
 
+* Logistic Regression Model
+
+* Kolmogorov-Smirnov Test
+
 More will be added in the future.
 
 ### R Formula
@@ -800,7 +804,7 @@ newDF <- createDataFrame(data.frame(x = c(1.5, 3.2)))
 head(predict(isoregModel, newDF))
 ```
 
-### Logistic Regression Model
+#### Logistic Regression Model
 
 (Added in 2.1.0)
 
@@ -834,6 +838,29 @@ model <- spark.logit(df, Species ~ ., regParam = 0.5)
 summary(model)
 ```
 
+#### Kolmogorov-Smirnov Test
+
+`spark.kstest` runs a two-sided, one-sample [Kolmogorov-Smirnov (KS) test](https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test).
+Given a `SparkDataFrame`, the test compares continuous data in a given column `testCol` with the theoretical distribution
+specified by parameter `nullHypothesis`.
+Users can call `summary` to get a summary of the test results.
+
+In the following example, we test whether the `longley` dataset's `Armed_Forces` column
+follows a normal distribution.  We set the parameters of the normal distribution using
+the mean and standard deviation of the sample.
+
+```{r, warning=FALSE}
+df <- createDataFrame(longley)
+afStats <- head(select(df, mean(df$Armed_Forces), sd(df$Armed_Forces)))
+afMean <- afStats[1]
+afStd <- afStats[2]
+
+test <- spark.kstest(df, "Armed_Forces", "norm", c(afMean, afStd))
+testSummary <- summary(test)
+testSummary
+```
+
+
 ### Model Persistence
 The following example shows how to save/load an ML model by SparkR.
 ```{r, warning=FALSE}

From 280c35af97a20b15578c14b20aa8c19d8fe75456 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Wed, 14 Dec 2016 16:12:14 -0800
Subject: [PATCH 0359/1204] [SPARK-18854][SQL] numberedTreeString and apply(i)
 inconsistent for subqueries

## What changes were proposed in this pull request?
This is a bug introduced by subquery handling. numberedTreeString (which uses generateTreeString under the hood) numbers trees including innerChildren (used to print subqueries), but apply (which uses getNodeNumbered) ignores innerChildren. As a result, apply(i) would return the wrong plan node if there are subqueries.

This patch fixes the bug.

## How was this patch tested?
Added a test case in SubquerySuite.scala to test both the depth-first traversal of numbering as well as making sure the two methods are consistent.

Author: Reynold Xin <rxin@databricks.com>

Closes #16277 from rxin/SPARK-18854.

(cherry picked from commit ffdd1fcd1e8f4f6453d5b0517c0ce82766b8e75f)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../spark/sql/catalyst/plans/QueryPlan.scala  |  9 ++++
 .../plans/logical/basicLogicalOperators.scala |  2 +-
 .../spark/sql/catalyst/trees/TreeNode.scala   | 46 +++++++++++--------
 .../execution/columnar/InMemoryRelation.scala |  3 +-
 .../org/apache/spark/sql/SubquerySuite.scala  | 18 ++++++++
 5 files changed, 55 insertions(+), 23 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index b108017c4c482..e67f2be6d237e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -24,6 +24,15 @@ import org.apache.spark.sql.types.{DataType, StructType}
 abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanType] {
   self: PlanType =>
 
+  /**
+   * Override [[TreeNode.apply]] to so we can return a more narrow type.
+   *
+   * Note that this cannot return BaseType because logical plan's plan node might return
+   * physical plan for innerChildren, e.g. in-memory relation logical plan node has a reference
+   * to the physical plan node it is referencing.
+   */
+  override def apply(number: Int): QueryPlan[_] = super.apply(number).asInstanceOf[QueryPlan[_]]
+
   def output: Seq[Attribute]
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 0f33e1dae944e..b4358c2ef2e62 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -412,7 +412,7 @@ case class With(child: LogicalPlan, cteRelations: Seq[(String, SubqueryAlias)])
     s"CTE $cteAliases"
   }
 
-  override def innerChildren: Seq[QueryPlan[_]] = cteRelations.map(_._2)
+  override def innerChildren: Seq[LogicalPlan] = cteRelations.map(_._2)
 }
 
 case class WithWindowDefinition(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index ea8d8fef7bdf1..670fa2bc8de8e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql.catalyst.trees
 import java.util.UUID
 
 import scala.collection.Map
-import scala.collection.mutable.Stack
 import scala.reflect.ClassTag
 
 import org.apache.commons.lang3.ClassUtils
@@ -28,12 +27,9 @@ import org.json4s.JsonAST._
 import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
-import org.apache.spark.SparkContext
-import org.apache.spark.rdd.{EmptyRDD, RDD}
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, FunctionResource}
 import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.ScalaReflection._
-import org.apache.spark.sql.catalyst.ScalaReflectionLock
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.errors._
 import org.apache.spark.sql.catalyst.expressions._
@@ -493,7 +489,10 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
 
   /**
    * Returns a string representation of the nodes in this tree, where each operator is numbered.
-   * The numbers can be used with [[trees.TreeNode.apply apply]] to easily access specific subtrees.
+   * The numbers can be used with [[TreeNode.apply]] to easily access specific subtrees.
+   *
+   * The numbers are based on depth-first traversal of the tree (with innerChildren traversed first
+   * before children).
    */
   def numberedTreeString: String =
     treeString.split("\n").zipWithIndex.map { case (line, i) => f"$i%02d $line" }.mkString("\n")
@@ -501,17 +500,24 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
   /**
    * Returns the tree node at the specified number.
    * Numbers for each node can be found in the [[numberedTreeString]].
+   *
+   * Note that this cannot return BaseType because logical plan's plan node might return
+   * physical plan for innerChildren, e.g. in-memory relation logical plan node has a reference
+   * to the physical plan node it is referencing.
    */
-  def apply(number: Int): BaseType = getNodeNumbered(new MutableInt(number))
+  def apply(number: Int): TreeNode[_] = getNodeNumbered(new MutableInt(number)).orNull
 
-  protected def getNodeNumbered(number: MutableInt): BaseType = {
+  private def getNodeNumbered(number: MutableInt): Option[TreeNode[_]] = {
     if (number.i < 0) {
-      null.asInstanceOf[BaseType]
+      None
     } else if (number.i == 0) {
-      this
+      Some(this)
     } else {
       number.i -= 1
-      children.map(_.getNodeNumbered(number)).find(_ != null).getOrElse(null.asInstanceOf[BaseType])
+      // Note that this traversal order must be the same as numberedTreeString.
+      innerChildren.map(_.getNodeNumbered(number)).find(_ != None).getOrElse {
+        children.map(_.getNodeNumbered(number)).find(_ != None).flatten
+      }
     }
   }
 
@@ -527,6 +533,8 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
    * The `i`-th element in `lastChildren` indicates whether the ancestor of the current node at
    * depth `i + 1` is the last child of its own parent node.  The depth of the root node is 0, and
    * `lastChildren` for the root node should be empty.
+   *
+   * Note that this traversal (numbering) order must be the same as [[getNodeNumbered]].
    */
   def generateTreeString(
       depth: Int,
@@ -534,19 +542,16 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
       builder: StringBuilder,
       verbose: Boolean,
       prefix: String = ""): StringBuilder = {
+
     if (depth > 0) {
       lastChildren.init.foreach { isLast =>
-        val prefixFragment = if (isLast) "   " else ":  "
-        builder.append(prefixFragment)
+        builder.append(if (isLast) "   " else ":  ")
       }
-
-      val branch = if (lastChildren.last) "+- " else ":- "
-      builder.append(branch)
+      builder.append(if (lastChildren.last) "+- " else ":- ")
     }
 
     builder.append(prefix)
-    val headline = if (verbose) verboseString else simpleString
-    builder.append(headline)
+    builder.append(if (verbose) verboseString else simpleString)
     builder.append("\n")
 
     if (innerChildren.nonEmpty) {
@@ -557,9 +562,10 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
     }
 
     if (children.nonEmpty) {
-      children.init.foreach(
-        _.generateTreeString(depth + 1, lastChildren :+ false, builder, verbose, prefix))
-      children.last.generateTreeString(depth + 1, lastChildren :+ true, builder, verbose, prefix)
+      children.init.foreach(_.generateTreeString(
+        depth + 1, lastChildren :+ false, builder, verbose, prefix))
+      children.last.generateTreeString(
+        depth + 1, lastChildren :+ true, builder, verbose, prefix)
     }
 
     builder
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
index 56bd5c1891e8d..03cc04659bd55 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
@@ -24,7 +24,6 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.plans.logical.Statistics
 import org.apache.spark.sql.execution.SparkPlan
@@ -64,7 +63,7 @@ case class InMemoryRelation(
     val batchStats: LongAccumulator = child.sqlContext.sparkContext.longAccumulator)
   extends logical.LeafNode with MultiInstanceRelation {
 
-  override protected def innerChildren: Seq[QueryPlan[_]] = Seq(child)
+  override protected def innerChildren: Seq[SparkPlan] = Seq(child)
 
   override def producedAttributes: AttributeSet = outputSet
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index 5a4b1cfe95e27..2ef8b18c04612 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -54,6 +54,24 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
     t.createOrReplaceTempView("t")
   }
 
+  test("SPARK-18854 numberedTreeString for subquery") {
+    val df = sql("select * from range(10) where id not in " +
+      "(select id from range(2) union all select id from range(2))")
+
+    // The depth first traversal of the plan tree
+    val dfs = Seq("Project", "Filter", "Union", "Project", "Range", "Project", "Range", "Range")
+    val numbered = df.queryExecution.analyzed.numberedTreeString.split("\n")
+
+    // There should be 8 plan nodes in total
+    assert(numbered.size == dfs.size)
+
+    for (i <- dfs.indices) {
+      val node = df.queryExecution.analyzed(i)
+      assert(node.nodeName == dfs(i))
+      assert(numbered(i).contains(node.nodeName))
+    }
+  }
+
   test("rdd deserialization does not crash [SPARK-15791]") {
     sql("select (select 1 as b) as b").rdd.count()
   }

From 0d94201e0102fd5890ba07da6dd518cec7334b2b Mon Sep 17 00:00:00 2001
From: "wm624@hotmail.com" <wm624@hotmail.com>
Date: Wed, 14 Dec 2016 17:07:27 -0800
Subject: [PATCH 0360/1204] [SPARK-18865][SPARKR] SparkR vignettes MLP and LDA
 updates

## What changes were proposed in this pull request?

When do the QA work, I found that the following issues:

1). `spark.mlp` doesn't include an example;
2). `spark.mlp` and `spark.lda` have redundant parameter explanations;
3). `spark.lda` document misses default values for some parameters.

I also changed the `spark.logit` regParam in the examples, as we discussed in #16222.

## How was this patch tested?

Manual test

Author: wm624@hotmail.com <wm624@hotmail.com>

Closes #16284 from wangmiao1981/ks.

(cherry picked from commit 324388531648de20ee61bd42518a068d4789925c)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/vignettes/sparkr-vignettes.Rmd | 56 +++++++++++++---------------
 1 file changed, 26 insertions(+), 30 deletions(-)

diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index d507e2cdf941b..8f39922d4a219 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -636,22 +636,6 @@ To use LDA, we need to specify a `features` column in `data` where each entry re
 
 * libSVM: Each entry is a collection of words and will be processed directly.
 
-There are several parameters LDA takes for fitting the model.
-
-* `k`: number of topics (default 10).
-
-* `maxIter`: maximum iterations (default 20).
-
-* `optimizer`: optimizer to train an LDA model, "online" (default) uses [online variational inference](https://www.cs.princeton.edu/~blei/papers/HoffmanBleiBach2010b.pdf). "em" uses [expectation-maximization](https://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm).
-
-* `subsamplingRate`: For `optimizer = "online"`. Fraction of the corpus to be sampled and used in each iteration of mini-batch gradient descent, in range (0, 1] (default 0.05).
-
-* `topicConcentration`: concentration parameter (commonly named beta or eta) for the prior placed on topic distributions over terms, default -1 to set automatically on the Spark side. Use `summary` to retrieve the effective topicConcentration. Only 1-size numeric is accepted.
-
-* `docConcentration`: concentration parameter (commonly named alpha) for the prior placed on documents distributions over topics (theta), default -1 to set automatically on the Spark side. Use `summary` to retrieve the effective docConcentration. Only 1-size or k-size numeric is accepted.
-
-* `maxVocabSize`: maximum vocabulary size, default 1 << 18.
-
 Two more functions are provided for the fitted model.
 
 * `spark.posterior` returns a `SparkDataFrame` containing a column of posterior probabilities vectors named "topicDistribution".
@@ -690,7 +674,6 @@ perplexity <- spark.perplexity(model, corpusDF)
 perplexity
 ```
 
-
 #### Multilayer Perceptron
 
 (Added in 2.1.0)
@@ -714,19 +697,32 @@ The number of nodes $N$ in the output layer corresponds to the number of classes
 
 MLPC employs backpropagation for learning the model. We use the logistic loss function for optimization and L-BFGS as an optimization routine.
 
-`spark.mlp` requires at least two columns in `data`: one named `"label"` and the other one `"features"`. The `"features"` column should be in libSVM-format. According to the description above, there are several additional parameters that can be set:
-
-* `layers`: integer vector containing the number of nodes for each layer.
-
-* `solver`: solver parameter, supported options: `"gd"` (minibatch gradient descent) or `"l-bfgs"`.
+`spark.mlp` requires at least two columns in `data`: one named `"label"` and the other one `"features"`. The `"features"` column should be in libSVM-format.
 
-* `maxIter`: maximum iteration number.
-
-* `tol`: convergence tolerance of iterations.
-
-* `stepSize`: step size for `"gd"`.
+We use iris data set to show how to use `spark.mlp` in classification.
+```{r, warning=FALSE}
+df <- createDataFrame(iris)
+# fit a Multilayer Perceptron Classification Model
+model <- spark.mlp(df, Species ~ ., blockSize = 128, layers = c(4, 3), solver = "l-bfgs", maxIter = 100, tol = 0.5, stepSize = 1, seed = 1, initialWeights = c(0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9))
+```
 
-* `seed`: seed parameter for weights initialization.
+To avoid lengthy display, we only present partial results of the model summary. You can check the full result from your sparkR shell.
+```{r, include=FALSE}
+ops <- options()
+options(max.print=5)
+```
+```{r}
+# check the summary of the fitted model
+summary(model)
+```
+```{r, include=FALSE}
+options(ops)
+```
+```{r}
+# make predictions use the fitted model
+predictions <- predict(model, df)
+head(select(predictions, predictions$prediction))
+```
 
 #### Collaborative Filtering
 
@@ -821,7 +817,7 @@ Binomial logistic regression
 df <- createDataFrame(iris)
 # Create a DataFrame containing two classes
 training <- df[df$Species %in% c("versicolor", "virginica"), ]
-model <- spark.logit(training, Species ~ ., regParam = 0.5)
+model <- spark.logit(training, Species ~ ., regParam = 0.00042)
 summary(model)
 ```
 
@@ -834,7 +830,7 @@ Multinomial logistic regression against three classes
 ```{r, warning=FALSE}
 df <- createDataFrame(iris)
 # Note in this case, Spark infers it is multinomial logistic regression, so family = "multinomial" is optional.
-model <- spark.logit(df, Species ~ ., regParam = 0.5)
+model <- spark.logit(df, Species ~ ., regParam = 0.056)
 summary(model)
 ```
 

From cb2c8428df0607cfbb17a2c874f8228561a2e8ef Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 14 Dec 2016 21:03:56 -0800
Subject: [PATCH 0361/1204] [SPARK-18856][SQL] non-empty partitioned table
 should not report zero size

## What changes were proposed in this pull request?

In `DataSource`, if the table is not analyzed, we will use 0 as the default value for table size. This is dangerous, we may broadcast a large table and cause OOM. We should use `defaultSizeInBytes` instead.

## How was this patch tested?

new regression test

Author: Wenchen Fan <wenchen@databricks.com>

Closes #16280 from cloud-fan/bug.

(cherry picked from commit d6f11a12a146a863553c5a5e2023d79d4375ef3f)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../sql/execution/datasources/DataSource.scala |  3 ++-
 .../spark/sql/StatisticsCollectionSuite.scala  | 18 ++++++++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index f47eb84df0288..8e51fc9414546 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -388,10 +388,11 @@ case class DataSource(
 
         val fileCatalog = if (sparkSession.sqlContext.conf.manageFilesourcePartitions &&
             catalogTable.isDefined && catalogTable.get.tracksPartitionsInCatalog) {
+          val defaultTableSize = sparkSession.sessionState.conf.defaultSizeInBytes
           new CatalogFileIndex(
             sparkSession,
             catalogTable.get,
-            catalogTable.get.stats.map(_.sizeInBytes.toLong).getOrElse(0L))
+            catalogTable.get.stats.map(_.sizeInBytes.toLong).getOrElse(defaultTableSize))
         } else {
           new InMemoryFileIndex(sparkSession, globbedPaths, options, Some(partitionSchema))
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index 07408491953ca..c663b31351b52 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -26,6 +26,7 @@ import scala.util.Random
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.execution.datasources.LogicalRelation
+import org.apache.spark.sql.internal.StaticSQLConf
 import org.apache.spark.sql.test.{SharedSQLContext, SQLTestUtils}
 import org.apache.spark.sql.test.SQLTestData.ArrayData
 import org.apache.spark.sql.types._
@@ -176,6 +177,7 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
  * when using the Hive external catalog) as well as in the sql/core module.
  */
 abstract class StatisticsCollectionTestBase extends QueryTest with SQLTestUtils {
+  import testImplicits._
 
   private val dec1 = new java.math.BigDecimal("1.000000000000000000")
   private val dec2 = new java.math.BigDecimal("8.000000000000000000")
@@ -242,4 +244,20 @@ abstract class StatisticsCollectionTestBase extends QueryTest with SQLTestUtils
       }
     }
   }
+
+  // This test will be run twice: with and without Hive support
+  test("SPARK-18856: non-empty partitioned table should not report zero size") {
+    withTable("ds_tbl", "hive_tbl") {
+      spark.range(100).select($"id", $"id" % 5 as "p").write.partitionBy("p").saveAsTable("ds_tbl")
+      val stats = spark.table("ds_tbl").queryExecution.optimizedPlan.statistics
+      assert(stats.sizeInBytes > 0, "non-empty partitioned table should not report zero size.")
+
+      if (spark.conf.get(StaticSQLConf.CATALOG_IMPLEMENTATION) == "hive") {
+        sql("CREATE TABLE hive_tbl(i int) PARTITIONED BY (j int)")
+        sql("INSERT INTO hive_tbl PARTITION(j=1) SELECT 1")
+        val stats2 = spark.table("hive_tbl").queryExecution.optimizedPlan.statistics
+        assert(stats2.sizeInBytes > 0, "non-empty partitioned table should not report zero size.")
+      }
+    }
+  }
 }

From b14fc391893468e25de1e24d982d6f260cac59ad Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Wed, 14 Dec 2016 21:08:45 -0800
Subject: [PATCH 0362/1204] [SPARK-18869][SQL] Add TreeNode.p that returns
 BaseType

## What changes were proposed in this pull request?
After the bug fix in SPARK-18854, TreeNode.apply now returns TreeNode[_] rather than a more specific type. It would be easier for interactive debugging to introduce a function that returns the BaseType.

## How was this patch tested?
N/A - this is a developer only feature used for interactive debugging. As long as it compiles, it should be good to go. I tested this in spark-shell.

Author: Reynold Xin <rxin@databricks.com>

Closes #16288 from rxin/SPARK-18869.

(cherry picked from commit 5d510c693aca8c3fd3364b4453160bc8585ffc8e)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../apache/spark/sql/catalyst/plans/QueryPlan.scala    |  9 ---------
 .../org/apache/spark/sql/catalyst/trees/TreeNode.scala | 10 +++++++++-
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index e67f2be6d237e..b108017c4c482 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -24,15 +24,6 @@ import org.apache.spark.sql.types.{DataType, StructType}
 abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanType] {
   self: PlanType =>
 
-  /**
-   * Override [[TreeNode.apply]] to so we can return a more narrow type.
-   *
-   * Note that this cannot return BaseType because logical plan's plan node might return
-   * physical plan for innerChildren, e.g. in-memory relation logical plan node has a reference
-   * to the physical plan node it is referencing.
-   */
-  override def apply(number: Int): QueryPlan[_] = super.apply(number).asInstanceOf[QueryPlan[_]]
-
   def output: Seq[Attribute]
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 670fa2bc8de8e..8cc16d662b603 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -498,7 +498,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
     treeString.split("\n").zipWithIndex.map { case (line, i) => f"$i%02d $line" }.mkString("\n")
 
   /**
-   * Returns the tree node at the specified number.
+   * Returns the tree node at the specified number, used primarily for interactive debugging.
    * Numbers for each node can be found in the [[numberedTreeString]].
    *
    * Note that this cannot return BaseType because logical plan's plan node might return
@@ -507,6 +507,14 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
    */
   def apply(number: Int): TreeNode[_] = getNodeNumbered(new MutableInt(number)).orNull
 
+  /**
+   * Returns the tree node at the specified number, used primarily for interactive debugging.
+   * Numbers for each node can be found in the [[numberedTreeString]].
+   *
+   * This is a variant of [[apply]] that returns the node as BaseType (if the type matches).
+   */
+  def p(number: Int): BaseType = apply(number).asInstanceOf[BaseType]
+
   private def getNodeNumbered(number: MutableInt): Option[TreeNode[_]] = {
     if (number.i < 0) {
       None

From d399a297d1ec9e0a3c57658cba0320b4d7fe88c5 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Wed, 14 Dec 2016 21:29:20 -0800
Subject: [PATCH 0363/1204] [SPARK-18875][SPARKR][DOCS] Fix R API doc
 generation by adding `DESCRIPTION` file

## What changes were proposed in this pull request?

Since Apache Spark 1.4.0, R API document page has a broken link on `DESCRIPTION file` because Jekyll plugin script doesn't copy the file. This PR aims to fix that.

- Official Latest Website: http://spark.apache.org/docs/latest/api/R/index.html
- Apache Spark 2.1.0-rc2: http://people.apache.org/~pwendell/spark-releases/spark-2.1.0-rc2-docs/api/R/index.html

## How was this patch tested?

Manual.

```bash
cd docs
SKIP_SCALADOC=1 jekyll build
```

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #16292 from dongjoon-hyun/SPARK-18875.

(cherry picked from commit ec0eae486331c3977505d261676b77a33c334216)
Signed-off-by: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
---
 docs/_plugins/copy_api_dirs.rb | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/_plugins/copy_api_dirs.rb b/docs/_plugins/copy_api_dirs.rb
index f926d67e6beaf..71e643244ec28 100644
--- a/docs/_plugins/copy_api_dirs.rb
+++ b/docs/_plugins/copy_api_dirs.rb
@@ -142,4 +142,7 @@
   puts "cp -r R/pkg/html/. docs/api/R"
   cp_r("R/pkg/html/.", "docs/api/R")
 
+  puts "cp R/pkg/DESCRIPTION docs/api"
+  cp("R/pkg/DESCRIPTION", "docs/api")
+
 end

From 2a8de2e11ebab0cb9056444053127619d8a47d8a Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Wed, 14 Dec 2016 21:51:52 -0800
Subject: [PATCH 0364/1204] [SPARK-18849][ML][SPARKR][DOC] vignettes final
 check update

## What changes were proposed in this pull request?

doc cleanup

## How was this patch tested?

~~vignettes is not building for me. I'm going to kick off a full clean build and try again and attach output here for review.~~
Output html here: https://felixcheung.github.io/sparkr-vignettes.html

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #16286 from felixcheung/rvignettespass.

(cherry picked from commit 7d858bc5ce870a28a559f4e81dcfc54cbd128cb7)
Signed-off-by: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
---
 R/pkg/vignettes/sparkr-vignettes.Rmd | 38 +++++++++-------------------
 1 file changed, 12 insertions(+), 26 deletions(-)

diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index 8f39922d4a219..fa2656c008660 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -447,33 +447,31 @@ head(teenagers)
 
 SparkR supports the following machine learning models and algorithms.
 
-* Generalized Linear Model (GLM)
+* Accelerated Failure Time (AFT) Survival Model
 
-* Random Forest
+* Collaborative Filtering with Alternating Least Squares (ALS)
+
+* Gaussian Mixture Model (GMM)
+
+* Generalized Linear Model (GLM)
 
 * Gradient-Boosted Trees (GBT)
 
-* Naive Bayes Model
+* Isotonic Regression Model
 
 * $k$-means Clustering
 
-* Accelerated Failure Time (AFT) Survival Model
-
-* Gaussian Mixture Model (GMM)
+* Kolmogorov-Smirnov Test
 
 * Latent Dirichlet Allocation (LDA)
 
-* Multilayer Perceptron Model
-
-* Collaborative Filtering with Alternating Least Squares (ALS)
-
-* Isotonic Regression Model
-
 * Logistic Regression Model
 
-* Kolmogorov-Smirnov Test
+* Multilayer Perceptron Model
 
-More will be added in the future.
+* Naive Bayes Model
+
+* Random Forest
 
 ### R Formula
 
@@ -601,8 +599,6 @@ head(aftPredictions)
 
 #### Gaussian Mixture Model
 
-(Added in 2.1.0)
-
 `spark.gaussianMixture` fits multivariate [Gaussian Mixture Model](https://en.wikipedia.org/wiki/Mixture_model#Multivariate_Gaussian_mixture_model) (GMM) against a `SparkDataFrame`. [Expectation-Maximization](https://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm) (EM) is used to approximate the maximum likelihood estimator (MLE) of the model.
 
 We use a simulated example to demostrate the usage.
@@ -620,8 +616,6 @@ head(select(gmmFitted, "V1", "V2", "prediction"))
 
 #### Latent Dirichlet Allocation
 
-(Added in 2.1.0)
-
 `spark.lda` fits a [Latent Dirichlet Allocation](https://en.wikipedia.org/wiki/Latent_Dirichlet_allocation) model on a `SparkDataFrame`. It is often used in topic modeling in which topics are inferred from a collection of text documents. LDA can be thought of as a clustering algorithm as follows:
 
 * Topics correspond to cluster centers, and documents correspond to examples (rows) in a dataset.
@@ -676,8 +670,6 @@ perplexity
 
 #### Multilayer Perceptron
 
-(Added in 2.1.0)
-
 Multilayer perceptron classifier (MLPC) is a classifier based on the [feedforward artificial neural network](https://en.wikipedia.org/wiki/Feedforward_neural_network). MLPC consists of multiple layers of nodes. Each layer is fully connected to the next layer in the network. Nodes in the input layer represent the input data. All other nodes map inputs to outputs by a linear combination of the inputs with the node’s weights $w$ and bias $b$ and applying an activation function. This can be written in matrix form for MLPC with $K+1$ layers as follows:
 $$
 y(x)=f_K(\ldots f_2(w_2^T f_1(w_1^T x + b_1) + b_2) \ldots + b_K).
@@ -726,8 +718,6 @@ head(select(predictions, predictions$prediction))
 
 #### Collaborative Filtering
 
-(Added in 2.1.0)
-
 `spark.als` learns latent factors in [collaborative filtering](https://en.wikipedia.org/wiki/Recommender_system#Collaborative_filtering) via [alternating least squares](http://dl.acm.org/citation.cfm?id=1608614).
 
 There are multiple options that can be configured in `spark.als`, including `rank`, `reg`, `nonnegative`. For a complete list, refer to the help file.
@@ -757,8 +747,6 @@ head(predicted)
 
 #### Isotonic Regression Model
 
-(Added in 2.1.0)
-
 `spark.isoreg` fits an [Isotonic Regression](https://en.wikipedia.org/wiki/Isotonic_regression) model against a `SparkDataFrame`. It solves a weighted univariate a regression problem under a complete order constraint. Specifically, given a set of real observed responses $y_1, \ldots, y_n$, corresponding real features $x_1, \ldots, x_n$, and optionally positive weights $w_1, \ldots, w_n$, we want to find a monotone (piecewise linear) function $f$ to  minimize
 $$
 \ell(f) = \sum_{i=1}^n w_i (y_i - f(x_i))^2.
@@ -802,8 +790,6 @@ head(predict(isoregModel, newDF))
 
 #### Logistic Regression Model
 
-(Added in 2.1.0)
-
 [Logistic regression](https://en.wikipedia.org/wiki/Logistic_regression) is a widely-used model when the response is categorical. It can be seen as a special case of the [Generalized Linear Predictive Model](https://en.wikipedia.org/wiki/Generalized_linear_model).
 We provide `spark.logit` on top of `spark.glm` to support logistic regression with advanced hyper-parameters.
 It supports both binary and multiclass classification with elastic-net regularization and feature standardization, similar to `glmnet`.

From e430915fad7ffb9397a96f0ef16e741c6b4f158b Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Thu, 15 Dec 2016 11:54:35 -0800
Subject: [PATCH 0365/1204] [SPARK-18870] Disallowed Distinct Aggregations on
 Streaming Datasets

## What changes were proposed in this pull request?

Check whether Aggregation operators on a streaming subplan have aggregate expressions with isDistinct = true.

## How was this patch tested?

Added unit test

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #16289 from tdas/SPARK-18870.

(cherry picked from commit 4f7292c87512a7da3542998d0e5aa21c27a511e9)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../analysis/UnsupportedOperationChecker.scala    | 15 +++++++++++++--
 .../analysis/UnsupportedOperationsSuite.scala     | 13 +++++++++++++
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
index c054fcbef36f3..c4a78f9d2113a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.{AnalysisException, InternalOutputModes}
 import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.streaming.OutputMode
@@ -95,6 +96,16 @@ object UnsupportedOperationChecker {
       // Operations that cannot exists anywhere in a streaming plan
       subPlan match {
 
+        case Aggregate(_, aggregateExpressions, child) =>
+          val distinctAggExprs = aggregateExpressions.flatMap { expr =>
+            expr.collect { case ae: AggregateExpression if ae.isDistinct => ae }
+          }
+          throwErrorIf(
+            child.isStreaming && distinctAggExprs.nonEmpty,
+            "Distinct aggregations are not supported on streaming DataFrames/Datasets, unless " +
+              "it is on aggregated DataFrame/Dataset in Complete output mode. Consider using " +
+              "approximate distinct aggregation (e.g. approx_count_distinct() instead of count()).")
+
         case _: Command =>
           throwError("Commands like CreateTable*, AlterTable*, Show* are not supported with " +
             "streaming DataFrames/Datasets")
@@ -143,7 +154,7 @@ object UnsupportedOperationChecker {
           throwError("Union between streaming and batch DataFrames/Datasets is not supported")
 
         case Except(left, right) if right.isStreaming =>
-          throwError("Except with a streaming DataFrame/Dataset on the right is not supported")
+          throwError("Except on a streaming DataFrame/Dataset on the right is not supported")
 
         case Intersect(left, right) if left.isStreaming && right.isStreaming =>
           throwError("Intersect between two streaming DataFrames/Datasets is not supported")
@@ -156,7 +167,7 @@ object UnsupportedOperationChecker {
 
         case Sort(_, _, _) | SortPartitions(_, _) if !containsCompleteData(subPlan) =>
           throwError("Sorting is not supported on streaming DataFrames/Datasets, unless it is on" +
-            "aggregated DataFrame/Dataset in Complete mode")
+            "aggregated DataFrame/Dataset in Complete output mode")
 
         case Sample(_, _, _, _, child) if child.isStreaming =>
           throwError("Sampling is not supported on streaming DataFrames/Datasets")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
index ff1bb126f463d..34e94c71422d7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
@@ -98,6 +98,19 @@ class UnsupportedOperationsSuite extends SparkFunSuite {
     outputMode = Update,
     expectedMsgs = Seq("multiple streaming aggregations"))
 
+  // Aggregation: Distinct aggregates not supported on streaming relation
+  val distinctAggExprs = Seq(Count("*").toAggregateExpression(isDistinct = true).as("c"))
+  assertSupportedInStreamingPlan(
+    "distinct aggregate - aggregate on batch relation",
+    Aggregate(Nil, distinctAggExprs, batchRelation),
+    outputMode = Append)
+
+  assertNotSupportedInStreamingPlan(
+    "distinct aggregate - aggregate on streaming relation",
+    Aggregate(Nil, distinctAggExprs, streamRelation),
+    outputMode = Complete,
+    expectedMsgs = Seq("distinct aggregation"))
+
   // Inner joins: Stream-stream not supported
   testBinaryOperationInStreamingPlan(
     "inner join",

From 900ce558a238fb9d8220527d8313646fe6830695 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Thu, 15 Dec 2016 13:17:51 -0800
Subject: [PATCH 0366/1204] [SPARK-18826][SS] Add 'latestFirst' option to
 FileStreamSource

## What changes were proposed in this pull request?

When starting a stream with a lot of backfill and maxFilesPerTrigger, the user could often want to start with most recent files first. This would let you keep low latency for recent data and slowly backfill historical data.

This PR adds a new option `latestFirst` to control this behavior. When it's true, `FileStreamSource` will sort the files by the modified time from latest to oldest, and take the first `maxFilesPerTrigger` files as a new batch.

## How was this patch tested?

The added test.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16251 from zsxwing/newest-first.

(cherry picked from commit 68a6dc974b25e6eddef109f6fd23ae4e9775ceca)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../streaming/FileStreamOptions.scala         | 14 ++++++
 .../streaming/FileStreamSource.scala          | 11 ++++-
 .../sql/streaming/FileStreamSourceSuite.scala | 47 +++++++++++++++++++
 3 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala
index fdea65cb10ae0..25ebe1797bed8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala
@@ -53,4 +53,18 @@ class FileStreamOptions(parameters: CaseInsensitiveMap) extends Logging {
   /** Options as specified by the user, in a case-insensitive map, without "path" set. */
   val optionMapWithoutPath: Map[String, String] =
     parameters.filterKeys(_ != "path")
+
+  /**
+   * Whether to scan latest files first. If it's true, when the source finds unprocessed files in a
+   * trigger, it will first process the latest files.
+   */
+  val latestFirst: Boolean = parameters.get("latestFirst").map { str =>
+    try {
+      str.toBoolean
+    } catch {
+      case _: IllegalArgumentException =>
+        throw new IllegalArgumentException(
+          s"Invalid value '$str' for option 'latestFirst', must be 'true' or 'false'")
+    }
+  }.getOrElse(false)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
index 20e0dcef8ffda..39c0b4979687b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
@@ -62,6 +62,15 @@ class FileStreamSource(
   /** Maximum number of new files to be considered in each batch */
   private val maxFilesPerBatch = sourceOptions.maxFilesPerTrigger
 
+  private val fileSortOrder = if (sourceOptions.latestFirst) {
+      logWarning(
+        """'latestFirst' is true. New files will be processed first.
+          |It may affect the watermark value""".stripMargin)
+      implicitly[Ordering[Long]].reverse
+    } else {
+      implicitly[Ordering[Long]]
+    }
+
   /** A mapping from a file that we have processed to some timestamp it was last modified. */
   // Visible for testing and debugging in production.
   val seenFiles = new SeenFilesMap(sourceOptions.maxFileAgeMs)
@@ -155,7 +164,7 @@ class FileStreamSource(
     val startTime = System.nanoTime
     val globbedPaths = SparkHadoopUtil.get.globPathIfNecessary(qualifiedBasePath)
     val catalog = new InMemoryFileIndex(sparkSession, globbedPaths, options, Some(new StructType))
-    val files = catalog.allFiles().sortBy(_.getModificationTime).map { status =>
+    val files = catalog.allFiles().sortBy(_.getModificationTime)(fileSortOrder).map { status =>
       (status.getPath.toUri.toString, status.getModificationTime)
     }
     val endTime = System.nanoTime
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index bcb68520407bc..b96ccb4e6cbf5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.streaming
 import java.io.File
 
 import org.scalatest.PrivateMethodTester
+import org.scalatest.concurrent.Eventually._
 import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.sql._
@@ -1059,6 +1060,52 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
     val str = Source.fromFile(getClass.getResource(s"/structured-streaming/$file").toURI).mkString
     SerializedOffset(str.trim)
   }
+
+  test("FileStreamSource - latestFirst") {
+    withTempDir { src =>
+      // Prepare two files: 1.txt, 2.txt, and make sure they have different modified time.
+      val f1 = stringToFile(new File(src, "1.txt"), "1")
+      val f2 = stringToFile(new File(src, "2.txt"), "2")
+      f2.setLastModified(f1.lastModified + 1000)
+
+      def runTwoBatchesAndVerifyResults(
+          latestFirst: Boolean,
+          firstBatch: String,
+          secondBatch: String): Unit = {
+        val fileStream = createFileStream(
+          "text",
+          src.getCanonicalPath,
+          options = Map("latestFirst" -> latestFirst.toString, "maxFilesPerTrigger" -> "1"))
+        val clock = new StreamManualClock()
+        testStream(fileStream)(
+          StartStream(trigger = ProcessingTime(10), triggerClock = clock),
+          AssertOnQuery { _ =>
+            // Block until the first batch finishes.
+            eventually(timeout(streamingTimeout)) {
+              assert(clock.isStreamWaitingAt(0))
+            }
+            true
+          },
+          CheckLastBatch(firstBatch),
+          AdvanceManualClock(10),
+          AssertOnQuery { _ =>
+            // Block until the second batch finishes.
+            eventually(timeout(streamingTimeout)) {
+              assert(clock.isStreamWaitingAt(10))
+            }
+            true
+          },
+          CheckLastBatch(secondBatch)
+        )
+      }
+
+      // Read oldest files first, so the first batch is "1", and the second batch is "2".
+      runTwoBatchesAndVerifyResults(latestFirst = false, firstBatch = "1", secondBatch = "2")
+
+      // Read latest files first, so the first batch is "2", and the second batch is "1".
+      runTwoBatchesAndVerifyResults(latestFirst = true, firstBatch = "2", secondBatch = "1")
+    }
+  }
 }
 
 class FileStreamSourceStressTestSuite extends FileStreamSourceTest {

From b6a81f4720752efe459860d28d7f8f738b2944c3 Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Thu, 15 Dec 2016 14:26:54 -0800
Subject: [PATCH 0367/1204] [SPARK-18888] partitionBy in DataStreamWriter in
 Python throws _to_seq not defined

## What changes were proposed in this pull request?

`_to_seq` wasn't imported.

## How was this patch tested?

Added partitionBy to existing write path unit test

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #16297 from brkyvz/SPARK-18888.
---
 python/pyspark/sql/streaming.py | 1 +
 python/pyspark/sql/tests.py     | 7 ++++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index eabd5ef54cb6b..5014299ad220f 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -28,6 +28,7 @@
 
 from pyspark import since, keyword_only
 from pyspark.rdd import ignore_unicode_prefix
+from pyspark.sql.column import _to_seq
 from pyspark.sql.readwriter import OptionUtils, to_str
 from pyspark.sql.types import *
 from pyspark.sql.utils import StreamingQueryException
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 115b4a9bef11c..6de63e649325c 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -50,7 +50,7 @@
 from pyspark.sql.types import *
 from pyspark.sql.types import UserDefinedType, _infer_type
 from pyspark.tests import ReusedPySparkTestCase, SparkSubmitTests
-from pyspark.sql.functions import UserDefinedFunction, sha2
+from pyspark.sql.functions import UserDefinedFunction, sha2, lit
 from pyspark.sql.window import Window
 from pyspark.sql.utils import AnalysisException, ParseException, IllegalArgumentException
 
@@ -1056,7 +1056,8 @@ def test_stream_read_options_overwrite(self):
         self.assertEqual(df.schema.simpleString(), "struct<data:string>")
 
     def test_stream_save_options(self):
-        df = self.spark.readStream.format('text').load('python/test_support/sql/streaming')
+        df = self.spark.readStream.format('text').load('python/test_support/sql/streaming') \
+            .withColumn('id', lit(1))
         for q in self.spark._wrapped.streams.active:
             q.stop()
         tmpPath = tempfile.mkdtemp()
@@ -1065,7 +1066,7 @@ def test_stream_save_options(self):
         out = os.path.join(tmpPath, 'out')
         chk = os.path.join(tmpPath, 'chk')
         q = df.writeStream.option('checkpointLocation', chk).queryName('this_query') \
-            .format('parquet').outputMode('append').option('path', out).start()
+            .format('parquet').partitionBy('id').outputMode('append').option('path', out).start()
         try:
             self.assertEqual(q.name, 'this_query')
             self.assertTrue(q.isActive)

From ef2ccf94224f00154cab7ab173d65442ecd389d7 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Thu, 15 Dec 2016 14:46:00 -0800
Subject: [PATCH 0368/1204] Preparing Spark release v2.1.0-rc3

---
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 4 ++--
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/java8-tests/pom.xml              | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mesos/pom.xml                             | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 yarn/pom.xml                              | 2 +-
 38 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/assembly/pom.xml b/assembly/pom.xml
index 29522fd3fd829..aebfd12227751 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 85644c4a37bbe..67d78d5f102fb 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index e15ede974cf8c..93790979d7b26 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index c93a355b84d0b..53cb8dd815d81 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 7c9870a8cb85e..89bee8567fc74 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 8f949b94fd233..7b45b23e9c546 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index a9b858e27150f..9b84f1e0c1dfc 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index d24ef118a5c1e..bbe07006109ea 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 84ad5500c0a7d..cd5849b37453c 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,8 +14,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.1.1-SNAPSHOT
-SPARK_VERSION_SHORT: 2.1.1
+SPARK_VERSION: 2.1.0
+SPARK_VERSION_SHORT: 2.1.0
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.7"
 MESOS_VERSION: 1.0.0
diff --git a/examples/pom.xml b/examples/pom.xml
index 8a9e6cfcfcc70..2fb42413aca81 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 3849c02ffb03c..4061c5f089c54 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 964e45f31b741..6cfc47ef00e2a 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index eec7a889ca1ff..58caf35f65a16 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index a7622d08151fe..ed32fc0ec4c18 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/java8-tests/pom.xml b/external/java8-tests/pom.xml
index e862126e48dbe..a3f3907573f21 100644
--- a/external/java8-tests/pom.xml
+++ b/external/java8-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index be8e73e41b947..9ae4461db64a2 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index fdfd2ccd4327a..f7276d0bd2197 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index e5bf070124b6a..52c88150137e3 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index c0a94f5950d5c..93b49bcf615b6 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index a02e23c69171d..cdfd29e3a9208 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index d7bb1acdc1d81..c6a79aa86bcf0 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index c53b72eefe84d..3fa28aa81f214 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 41b16500dd2bc..5c828780600cd 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 96e34cacff8b0..1818bc80ea78a 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index c0b70dfdc3364..d60a633b87699 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mesos/pom.xml b/mesos/pom.xml
index 532d6073343ba..f8e43d2c43ec2 100644
--- a/mesos/pom.xml
+++ b/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 6c3a35eeb9ecd..6dcb44cebb254 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 757906d137c29..5cf3a7f3e0f5e 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 555324524ee82..49f12703c04df 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.1.1-SNAPSHOT</version>
+  <version>2.1.0</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 6ae3609ae7fae..e91e778cb518c 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.1.1.dev0"
+__version__ = "2.1.0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 705316a944e28..1e7db9b10f045 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 72be7e1005f64..c58e0f43b2ac7 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index d7989c2413040..37e7dccd2e27d 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 34e0ae5bbc229..468d758a77884 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index c543a3e049531..7bf4fc0df45e8 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index fba6a5d7734a4..06569e6ee2231 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 0c4c9c9f51828..35d53b30191a5 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 85ec270bf9965..38374b5ae5a3b 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From a7364a82eb0d18f92f1d8e46c1160a55bc250032 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Thu, 15 Dec 2016 14:46:09 -0800
Subject: [PATCH 0369/1204] Preparing development version 2.1.1-SNAPSHOT

---
 R/pkg/DESCRIPTION                         | 2 +-
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 4 ++--
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/java8-tests/pom.xml              | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mesos/pom.xml                             | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 yarn/pom.xml                              | 2 +-
 39 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 0cb3a80a6e892..1ceda7ba024c0 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 2.1.0
+Version: 2.1.1
 Title: R Frontend for Apache Spark
 Description: The SparkR package provides an R Frontend for Apache Spark.
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
diff --git a/assembly/pom.xml b/assembly/pom.xml
index aebfd12227751..29522fd3fd829 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 67d78d5f102fb..85644c4a37bbe 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 93790979d7b26..e15ede974cf8c 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 53cb8dd815d81..c93a355b84d0b 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 89bee8567fc74..7c9870a8cb85e 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 7b45b23e9c546..8f949b94fd233 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 9b84f1e0c1dfc..a9b858e27150f 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index bbe07006109ea..d24ef118a5c1e 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index cd5849b37453c..84ad5500c0a7d 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,8 +14,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.1.0
-SPARK_VERSION_SHORT: 2.1.0
+SPARK_VERSION: 2.1.1-SNAPSHOT
+SPARK_VERSION_SHORT: 2.1.1
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.7"
 MESOS_VERSION: 1.0.0
diff --git a/examples/pom.xml b/examples/pom.xml
index 2fb42413aca81..8a9e6cfcfcc70 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 4061c5f089c54..3849c02ffb03c 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 6cfc47ef00e2a..964e45f31b741 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 58caf35f65a16..eec7a889ca1ff 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index ed32fc0ec4c18..a7622d08151fe 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/java8-tests/pom.xml b/external/java8-tests/pom.xml
index a3f3907573f21..e862126e48dbe 100644
--- a/external/java8-tests/pom.xml
+++ b/external/java8-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index 9ae4461db64a2..be8e73e41b947 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index f7276d0bd2197..fdfd2ccd4327a 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index 52c88150137e3..e5bf070124b6a 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index 93b49bcf615b6..c0a94f5950d5c 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index cdfd29e3a9208..a02e23c69171d 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index c6a79aa86bcf0..d7bb1acdc1d81 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index 3fa28aa81f214..c53b72eefe84d 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 5c828780600cd..41b16500dd2bc 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 1818bc80ea78a..96e34cacff8b0 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index d60a633b87699..c0b70dfdc3364 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mesos/pom.xml b/mesos/pom.xml
index f8e43d2c43ec2..532d6073343ba 100644
--- a/mesos/pom.xml
+++ b/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 6dcb44cebb254..6c3a35eeb9ecd 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 5cf3a7f3e0f5e..757906d137c29 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 49f12703c04df..555324524ee82 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.1.0</version>
+  <version>2.1.1-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index e91e778cb518c..6ae3609ae7fae 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.1.0"
+__version__ = "2.1.1.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 1e7db9b10f045..705316a944e28 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index c58e0f43b2ac7..72be7e1005f64 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 37e7dccd2e27d..d7989c2413040 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 468d758a77884..34e0ae5bbc229 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 7bf4fc0df45e8..c543a3e049531 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 06569e6ee2231..fba6a5d7734a4 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 35d53b30191a5..0c4c9c9f51828 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 38374b5ae5a3b..85ec270bf9965 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 08e4272872fc17c43f0dc79d329b946e8e85694d Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Thu, 15 Dec 2016 15:46:03 -0800
Subject: [PATCH 0370/1204] [SPARK-18868][FLAKY-TEST] Deflake
 StreamingQueryListenerSuite: single listener, check trigger...

## What changes were proposed in this pull request?

Use `recentProgress` instead of `lastProgress` and filter out last non-zero value. Also add eventually to the latest assertQuery similar to first `assertQuery`

## How was this patch tested?

Ran test 1000 times

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #16287 from brkyvz/SPARK-18868.

(cherry picked from commit 9c7f83b0289ba4550b156e6af31cf7c44580eb12)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../StreamingQueryListenerSuite.scala         | 25 ++++++++++++-------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
index 7c6745ac8285a..a057d1d36c5a8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
@@ -84,7 +84,11 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
         CheckAnswer(10, 5),
         AssertOnQuery { query =>
           assert(listener.progressEvents.nonEmpty)
-          assert(listener.progressEvents.last.json === query.lastProgress.json)
+          // SPARK-18868: We can't use query.lastProgress, because in progressEvents, we filter
+          // out non-zero input rows, but the lastProgress may be a zero input row trigger
+          val lastNonZeroProgress = query.recentProgress.filter(_.numInputRows > 0).lastOption
+            .getOrElse(fail("No progress updates received in StreamingQuery!"))
+          assert(listener.progressEvents.last.json === lastNonZeroProgress.json)
           assert(listener.terminationEvent === null)
           true
         },
@@ -109,14 +113,17 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
         AdvanceManualClock(100),
         ExpectFailure[SparkException],
         AssertOnQuery { query =>
-          assert(listener.terminationEvent !== null)
-          assert(listener.terminationEvent.id === query.id)
-          assert(listener.terminationEvent.exception.nonEmpty)
-          // Make sure that the exception message reported through listener
-          // contains the actual exception and relevant stack trace
-          assert(!listener.terminationEvent.exception.get.contains("StreamingQueryException"))
-          assert(listener.terminationEvent.exception.get.contains("java.lang.ArithmeticException"))
-          assert(listener.terminationEvent.exception.get.contains("StreamingQueryListenerSuite"))
+          eventually(Timeout(streamingTimeout)) {
+            assert(listener.terminationEvent !== null)
+            assert(listener.terminationEvent.id === query.id)
+            assert(listener.terminationEvent.exception.nonEmpty)
+            // Make sure that the exception message reported through listener
+            // contains the actual exception and relevant stack trace
+            assert(!listener.terminationEvent.exception.get.contains("StreamingQueryException"))
+            assert(
+              listener.terminationEvent.exception.get.contains("java.lang.ArithmeticException"))
+            assert(listener.terminationEvent.exception.get.contains("StreamingQueryListenerSuite"))
+          }
           listener.checkAsyncErrors()
           true
         }

From ae853e8f3bdbd16427e6f1ffade4f63abaf74abb Mon Sep 17 00:00:00 2001
From: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
Date: Thu, 15 Dec 2016 16:15:51 -0800
Subject: [PATCH 0371/1204] [MINOR] Only rename SparkR tar.gz if names mismatch

## What changes were proposed in this pull request?

For release builds the R_PACKAGE_VERSION and VERSION are the same (e.g., 2.1.0). Thus `cp` throws an error which causes the build to fail.

## How was this patch tested?

Manually by executing the following script
```
set -o pipefail
set -e
set -x

touch a

R_PACKAGE_VERSION=2.1.0
VERSION=2.1.0

if [ "$R_PACKAGE_VERSION" != "$VERSION" ]; then
  cp a a
fi
```

Author: Shivaram Venkataraman <shivaram@cs.berkeley.edu>

Closes #16299 from shivaram/sparkr-cp-fix.

(cherry picked from commit 9634018c4d6d5a4f2c909f7227d91e637107b7f4)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 dev/make-distribution.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
index 4da7d573849f8..da44748e5810e 100755
--- a/dev/make-distribution.sh
+++ b/dev/make-distribution.sh
@@ -228,8 +228,8 @@ if [ "$MAKE_R" == "true" ]; then
   # Install source package to get it to generate vignettes, etc.
   # Do not source the check-cran.sh - it should be run from where it is for it to set SPARK_HOME
   NO_TESTS=1 CLEAN_INSTALL=1 "$SPARK_HOME/"R/check-cran.sh
-  # Make a copy of R source package matching the Spark release version.
-  cp $SPARK_HOME/R/SparkR_"$R_PACKAGE_VERSION".tar.gz $SPARK_HOME/R/SparkR_"$VERSION".tar.gz
+  # Move R source package to file name matching the Spark release version.
+  mv $SPARK_HOME/R/SparkR_"$R_PACKAGE_VERSION".tar.gz $SPARK_HOME/R/SparkR_"$VERSION".tar.gz
   popd > /dev/null
 else
   echo "Skipping building R source package"

From ec31726581a43624fd47ce48f4e33d2a8e96c15c Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Thu, 15 Dec 2016 16:18:20 -0800
Subject: [PATCH 0372/1204] Preparing Spark release v2.1.0-rc4

---
 R/pkg/DESCRIPTION                         | 2 +-
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 4 ++--
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/java8-tests/pom.xml              | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mesos/pom.xml                             | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 yarn/pom.xml                              | 2 +-
 39 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 1ceda7ba024c0..0cb3a80a6e892 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 2.1.1
+Version: 2.1.0
 Title: R Frontend for Apache Spark
 Description: The SparkR package provides an R Frontend for Apache Spark.
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 29522fd3fd829..aebfd12227751 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 85644c4a37bbe..67d78d5f102fb 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index e15ede974cf8c..93790979d7b26 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index c93a355b84d0b..53cb8dd815d81 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 7c9870a8cb85e..89bee8567fc74 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 8f949b94fd233..7b45b23e9c546 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index a9b858e27150f..9b84f1e0c1dfc 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index d24ef118a5c1e..bbe07006109ea 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 84ad5500c0a7d..cd5849b37453c 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,8 +14,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.1.1-SNAPSHOT
-SPARK_VERSION_SHORT: 2.1.1
+SPARK_VERSION: 2.1.0
+SPARK_VERSION_SHORT: 2.1.0
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.7"
 MESOS_VERSION: 1.0.0
diff --git a/examples/pom.xml b/examples/pom.xml
index 8a9e6cfcfcc70..2fb42413aca81 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 3849c02ffb03c..4061c5f089c54 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 964e45f31b741..6cfc47ef00e2a 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index eec7a889ca1ff..58caf35f65a16 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index a7622d08151fe..ed32fc0ec4c18 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/java8-tests/pom.xml b/external/java8-tests/pom.xml
index e862126e48dbe..a3f3907573f21 100644
--- a/external/java8-tests/pom.xml
+++ b/external/java8-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index be8e73e41b947..9ae4461db64a2 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index fdfd2ccd4327a..f7276d0bd2197 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index e5bf070124b6a..52c88150137e3 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index c0a94f5950d5c..93b49bcf615b6 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index a02e23c69171d..cdfd29e3a9208 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index d7bb1acdc1d81..c6a79aa86bcf0 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index c53b72eefe84d..3fa28aa81f214 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 41b16500dd2bc..5c828780600cd 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 96e34cacff8b0..1818bc80ea78a 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index c0b70dfdc3364..d60a633b87699 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mesos/pom.xml b/mesos/pom.xml
index 532d6073343ba..f8e43d2c43ec2 100644
--- a/mesos/pom.xml
+++ b/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 6c3a35eeb9ecd..6dcb44cebb254 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 757906d137c29..5cf3a7f3e0f5e 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 555324524ee82..49f12703c04df 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.1.1-SNAPSHOT</version>
+  <version>2.1.0</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 6ae3609ae7fae..e91e778cb518c 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.1.1.dev0"
+__version__ = "2.1.0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 705316a944e28..1e7db9b10f045 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 72be7e1005f64..c58e0f43b2ac7 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index d7989c2413040..37e7dccd2e27d 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 34e0ae5bbc229..468d758a77884 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index c543a3e049531..7bf4fc0df45e8 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index fba6a5d7734a4..06569e6ee2231 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 0c4c9c9f51828..35d53b30191a5 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 85ec270bf9965..38374b5ae5a3b 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 62a6577bfa3a83783c813e74286e62b668e9af83 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Thu, 15 Dec 2016 16:18:29 -0800
Subject: [PATCH 0373/1204] Preparing development version 2.1.1-SNAPSHOT

---
 R/pkg/DESCRIPTION                         | 2 +-
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 4 ++--
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/java8-tests/pom.xml              | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mesos/pom.xml                             | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 yarn/pom.xml                              | 2 +-
 39 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 0cb3a80a6e892..1ceda7ba024c0 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 2.1.0
+Version: 2.1.1
 Title: R Frontend for Apache Spark
 Description: The SparkR package provides an R Frontend for Apache Spark.
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
diff --git a/assembly/pom.xml b/assembly/pom.xml
index aebfd12227751..29522fd3fd829 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 67d78d5f102fb..85644c4a37bbe 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 93790979d7b26..e15ede974cf8c 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 53cb8dd815d81..c93a355b84d0b 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 89bee8567fc74..7c9870a8cb85e 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 7b45b23e9c546..8f949b94fd233 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 9b84f1e0c1dfc..a9b858e27150f 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index bbe07006109ea..d24ef118a5c1e 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index cd5849b37453c..84ad5500c0a7d 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,8 +14,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.1.0
-SPARK_VERSION_SHORT: 2.1.0
+SPARK_VERSION: 2.1.1-SNAPSHOT
+SPARK_VERSION_SHORT: 2.1.1
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.7"
 MESOS_VERSION: 1.0.0
diff --git a/examples/pom.xml b/examples/pom.xml
index 2fb42413aca81..8a9e6cfcfcc70 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 4061c5f089c54..3849c02ffb03c 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 6cfc47ef00e2a..964e45f31b741 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 58caf35f65a16..eec7a889ca1ff 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index ed32fc0ec4c18..a7622d08151fe 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/java8-tests/pom.xml b/external/java8-tests/pom.xml
index a3f3907573f21..e862126e48dbe 100644
--- a/external/java8-tests/pom.xml
+++ b/external/java8-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index 9ae4461db64a2..be8e73e41b947 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index f7276d0bd2197..fdfd2ccd4327a 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index 52c88150137e3..e5bf070124b6a 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index 93b49bcf615b6..c0a94f5950d5c 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index cdfd29e3a9208..a02e23c69171d 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index c6a79aa86bcf0..d7bb1acdc1d81 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index 3fa28aa81f214..c53b72eefe84d 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 5c828780600cd..41b16500dd2bc 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 1818bc80ea78a..96e34cacff8b0 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index d60a633b87699..c0b70dfdc3364 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mesos/pom.xml b/mesos/pom.xml
index f8e43d2c43ec2..532d6073343ba 100644
--- a/mesos/pom.xml
+++ b/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 6dcb44cebb254..6c3a35eeb9ecd 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 5cf3a7f3e0f5e..757906d137c29 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 49f12703c04df..555324524ee82 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.1.0</version>
+  <version>2.1.1-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index e91e778cb518c..6ae3609ae7fae 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.1.0"
+__version__ = "2.1.1.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 1e7db9b10f045..705316a944e28 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index c58e0f43b2ac7..72be7e1005f64 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 37e7dccd2e27d..d7989c2413040 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 468d758a77884..34e0ae5bbc229 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 7bf4fc0df45e8..c543a3e049531 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 06569e6ee2231..fba6a5d7734a4 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 35d53b30191a5..0c4c9c9f51828 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 38374b5ae5a3b..85ec270bf9965 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From b23220fa67dd279d0b8005cb66d0875adbd3c8cb Mon Sep 17 00:00:00 2001
From: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
Date: Thu, 15 Dec 2016 17:13:35 -0800
Subject: [PATCH 0374/1204] [MINOR] Handle fact that mv is different on linux,
 mac

Follow up to https://github.com/apache/spark/commit/ae853e8f3bdbd16427e6f1ffade4f63abaf74abb as `mv` throws an error on the Jenkins machines if source and destinations are the same.

Author: Shivaram Venkataraman <shivaram@cs.berkeley.edu>

Closes #16302 from shivaram/sparkr-no-mv-fix.

(cherry picked from commit 5a44f18a2a114bdd37b6714d81f88cb68148f0c9)
Signed-off-by: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
---
 dev/make-distribution.sh | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
index da44748e5810e..6ea319e4362ab 100755
--- a/dev/make-distribution.sh
+++ b/dev/make-distribution.sh
@@ -228,8 +228,11 @@ if [ "$MAKE_R" == "true" ]; then
   # Install source package to get it to generate vignettes, etc.
   # Do not source the check-cran.sh - it should be run from where it is for it to set SPARK_HOME
   NO_TESTS=1 CLEAN_INSTALL=1 "$SPARK_HOME/"R/check-cran.sh
-  # Move R source package to file name matching the Spark release version.
-  mv $SPARK_HOME/R/SparkR_"$R_PACKAGE_VERSION".tar.gz $SPARK_HOME/R/SparkR_"$VERSION".tar.gz
+  # Move R source package to match the Spark release version if the versions are not the same.
+  # NOTE(shivaram): `mv` throws an error on Linux if source and destination are same file
+  if [ "$R_PACKAGE_VERSION" != "$VERSION" ]; then
+    mv $SPARK_HOME/R/SparkR_"$R_PACKAGE_VERSION".tar.gz $SPARK_HOME/R/SparkR_"$VERSION".tar.gz
+  fi
   popd > /dev/null
 else
   echo "Skipping building R source package"

From cd0a08361e2526519e7c131c42116bf56fa62c76 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Thu, 15 Dec 2016 17:57:04 -0800
Subject: [PATCH 0375/1204] Preparing Spark release v2.1.0-rc5

---
 R/pkg/DESCRIPTION                         | 2 +-
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 4 ++--
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/java8-tests/pom.xml              | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mesos/pom.xml                             | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 yarn/pom.xml                              | 2 +-
 39 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 1ceda7ba024c0..0cb3a80a6e892 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 2.1.1
+Version: 2.1.0
 Title: R Frontend for Apache Spark
 Description: The SparkR package provides an R Frontend for Apache Spark.
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 29522fd3fd829..aebfd12227751 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 85644c4a37bbe..67d78d5f102fb 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index e15ede974cf8c..93790979d7b26 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index c93a355b84d0b..53cb8dd815d81 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 7c9870a8cb85e..89bee8567fc74 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 8f949b94fd233..7b45b23e9c546 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index a9b858e27150f..9b84f1e0c1dfc 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index d24ef118a5c1e..bbe07006109ea 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 84ad5500c0a7d..cd5849b37453c 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,8 +14,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.1.1-SNAPSHOT
-SPARK_VERSION_SHORT: 2.1.1
+SPARK_VERSION: 2.1.0
+SPARK_VERSION_SHORT: 2.1.0
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.7"
 MESOS_VERSION: 1.0.0
diff --git a/examples/pom.xml b/examples/pom.xml
index 8a9e6cfcfcc70..2fb42413aca81 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 3849c02ffb03c..4061c5f089c54 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 964e45f31b741..6cfc47ef00e2a 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index eec7a889ca1ff..58caf35f65a16 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index a7622d08151fe..ed32fc0ec4c18 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/java8-tests/pom.xml b/external/java8-tests/pom.xml
index e862126e48dbe..a3f3907573f21 100644
--- a/external/java8-tests/pom.xml
+++ b/external/java8-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index be8e73e41b947..9ae4461db64a2 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index fdfd2ccd4327a..f7276d0bd2197 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index e5bf070124b6a..52c88150137e3 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index c0a94f5950d5c..93b49bcf615b6 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index a02e23c69171d..cdfd29e3a9208 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index d7bb1acdc1d81..c6a79aa86bcf0 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index c53b72eefe84d..3fa28aa81f214 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 41b16500dd2bc..5c828780600cd 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 96e34cacff8b0..1818bc80ea78a 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index c0b70dfdc3364..d60a633b87699 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mesos/pom.xml b/mesos/pom.xml
index 532d6073343ba..f8e43d2c43ec2 100644
--- a/mesos/pom.xml
+++ b/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 6c3a35eeb9ecd..6dcb44cebb254 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 757906d137c29..5cf3a7f3e0f5e 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 555324524ee82..49f12703c04df 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.1.1-SNAPSHOT</version>
+  <version>2.1.0</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 6ae3609ae7fae..e91e778cb518c 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.1.1.dev0"
+__version__ = "2.1.0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 705316a944e28..1e7db9b10f045 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 72be7e1005f64..c58e0f43b2ac7 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index d7989c2413040..37e7dccd2e27d 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 34e0ae5bbc229..468d758a77884 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index c543a3e049531..7bf4fc0df45e8 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index fba6a5d7734a4..06569e6ee2231 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 0c4c9c9f51828..35d53b30191a5 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 85ec270bf9965..38374b5ae5a3b 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 483624c2e13c8f239ee750bc149941b79800d0b0 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Thu, 15 Dec 2016 17:57:11 -0800
Subject: [PATCH 0376/1204] Preparing development version 2.1.1-SNAPSHOT

---
 R/pkg/DESCRIPTION                         | 2 +-
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 4 ++--
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/java8-tests/pom.xml              | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mesos/pom.xml                             | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 yarn/pom.xml                              | 2 +-
 39 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 0cb3a80a6e892..1ceda7ba024c0 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 2.1.0
+Version: 2.1.1
 Title: R Frontend for Apache Spark
 Description: The SparkR package provides an R Frontend for Apache Spark.
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
diff --git a/assembly/pom.xml b/assembly/pom.xml
index aebfd12227751..29522fd3fd829 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 67d78d5f102fb..85644c4a37bbe 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 93790979d7b26..e15ede974cf8c 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 53cb8dd815d81..c93a355b84d0b 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 89bee8567fc74..7c9870a8cb85e 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 7b45b23e9c546..8f949b94fd233 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 9b84f1e0c1dfc..a9b858e27150f 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index bbe07006109ea..d24ef118a5c1e 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index cd5849b37453c..84ad5500c0a7d 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,8 +14,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.1.0
-SPARK_VERSION_SHORT: 2.1.0
+SPARK_VERSION: 2.1.1-SNAPSHOT
+SPARK_VERSION_SHORT: 2.1.1
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.7"
 MESOS_VERSION: 1.0.0
diff --git a/examples/pom.xml b/examples/pom.xml
index 2fb42413aca81..8a9e6cfcfcc70 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 4061c5f089c54..3849c02ffb03c 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 6cfc47ef00e2a..964e45f31b741 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 58caf35f65a16..eec7a889ca1ff 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index ed32fc0ec4c18..a7622d08151fe 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/java8-tests/pom.xml b/external/java8-tests/pom.xml
index a3f3907573f21..e862126e48dbe 100644
--- a/external/java8-tests/pom.xml
+++ b/external/java8-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index 9ae4461db64a2..be8e73e41b947 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index f7276d0bd2197..fdfd2ccd4327a 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index 52c88150137e3..e5bf070124b6a 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index 93b49bcf615b6..c0a94f5950d5c 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index cdfd29e3a9208..a02e23c69171d 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index c6a79aa86bcf0..d7bb1acdc1d81 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index 3fa28aa81f214..c53b72eefe84d 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 5c828780600cd..41b16500dd2bc 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 1818bc80ea78a..96e34cacff8b0 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index d60a633b87699..c0b70dfdc3364 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mesos/pom.xml b/mesos/pom.xml
index f8e43d2c43ec2..532d6073343ba 100644
--- a/mesos/pom.xml
+++ b/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 6dcb44cebb254..6c3a35eeb9ecd 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 5cf3a7f3e0f5e..757906d137c29 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 49f12703c04df..555324524ee82 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.1.0</version>
+  <version>2.1.1-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index e91e778cb518c..6ae3609ae7fae 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.1.0"
+__version__ = "2.1.1.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 1e7db9b10f045..705316a944e28 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index c58e0f43b2ac7..72be7e1005f64 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 37e7dccd2e27d..d7989c2413040 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 468d758a77884..34e0ae5bbc229 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 7bf4fc0df45e8..c543a3e049531 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 06569e6ee2231..fba6a5d7734a4 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 35d53b30191a5..0c4c9c9f51828 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 38374b5ae5a3b..85ec270bf9965 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0</version>
+    <version>2.1.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From d8548c8a7541bfa37761382edbb1892a145b2b71 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Thu, 15 Dec 2016 21:58:27 -0800
Subject: [PATCH 0377/1204] [SPARK-18892][SQL] Alias percentile_approx
 approx_percentile

## What changes were proposed in this pull request?
percentile_approx is the name used in Hive, and approx_percentile is the name used in Presto. approx_percentile is actually more consistent with our approx_count_distinct. Given the cost to alias SQL functions is low (one-liner), it'd be better to just alias them so it is easier to use.

## How was this patch tested?
Technically I could add an end-to-end test to verify this one-line change, but it seemed too trivial to me.

Author: Reynold Xin <rxin@databricks.com>

Closes #16300 from rxin/SPARK-18892.

(cherry picked from commit 172a52f5d31337d90155feb7072381e8d5712288)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../apache/spark/sql/catalyst/analysis/FunctionRegistry.scala | 1 +
 .../expressions/aggregate/ApproximatePercentile.scala         | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 2636afe6209ef..06b52a8db9654 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -252,6 +252,7 @@ object FunctionRegistry {
     expression[Percentile]("percentile"),
     expression[Skewness]("skewness"),
     expression[ApproximatePercentile]("percentile_approx"),
+    expression[ApproximatePercentile]("approx_percentile"),
     expression[StddevSamp]("std"),
     expression[StddevSamp]("stddev"),
     expression[StddevPop]("stddev_pop"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
index 692cbd7c0d32c..be9e6103b3b18 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
@@ -61,9 +61,9 @@ import org.apache.spark.sql.types._
   """,
   extended = """
     Examples:
-      > SELECT percentile_approx(10.0, array(0.5, 0.4, 0.1), 100);
+      > SELECT _FUNC_(10.0, array(0.5, 0.4, 0.1), 100);
        [10.0,10.0,10.0]
-      > SELECT percentile_approx(10.0, 0.5, 100);
+      > SELECT _FUNC_(10.0, 0.5, 100);
        10.0
   """)
 case class ApproximatePercentile(

From a73201dafcf22756b8074a73e1b5da41cdf8b9a4 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Fri, 16 Dec 2016 00:42:39 -0800
Subject: [PATCH 0378/1204] [SPARK-18850][SS] Make StreamExecution and progress
 classes serializable

## What changes were proposed in this pull request?

This PR adds StreamingQueryWrapper to make StreamExecution and progress classes serializable because it is too easy for it to get captured with normal usage. If StreamingQueryWrapper gets captured in a closure but no place calls its methods, it should not fail the Spark tasks. However if its methods are called, then this PR will throw a better message.

## How was this patch tested?

`test("StreamingQuery should be Serializable but cannot be used in executors")`
`test("progress classes should be Serializable")`

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16272 from zsxwing/SPARK-18850.

(cherry picked from commit d7f3058e17571d76a8b4c8932de6de81ce8d2e78)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../streaming/ProgressReporter.scala          |   4 +-
 .../streaming/StreamingQueryWrapper.scala     | 107 ++++++++++++++++++
 .../sql/streaming/StreamingQueryManager.scala |   8 +-
 .../sql/streaming/StreamingQueryStatus.scala  |   6 +-
 .../apache/spark/sql/streaming/progress.scala |   8 +-
 .../sql/streaming/FileStreamSourceSuite.scala |   6 +-
 .../spark/sql/streaming/StreamSuite.scala     |   4 +-
 .../spark/sql/streaming/StreamTest.scala      |   3 +-
 .../StreamingQueryManagerSuite.scala          |   5 +-
 ...StreamingQueryStatusAndProgressSuite.scala |  52 +++++++--
 .../sql/streaming/StreamingQuerySuite.scala   |  44 ++++++-
 .../test/DataStreamReaderWriterSuite.scala    |   4 +-
 12 files changed, 222 insertions(+), 29 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryWrapper.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
index e40135fdd7a55..2386f33f8ad41 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -159,8 +159,8 @@ trait ProgressReporter extends Logging {
       name = name,
       timestamp = formatTimestamp(currentTriggerStartTimestamp),
       batchId = currentBatchId,
-      durationMs = currentDurationsMs.toMap.mapValues(long2Long).asJava,
-      eventTime = executionStats.eventTimeStats.asJava,
+      durationMs = new java.util.HashMap(currentDurationsMs.toMap.mapValues(long2Long).asJava),
+      eventTime = new java.util.HashMap(executionStats.eventTimeStats.asJava),
       stateOperators = executionStats.stateOperators.toArray,
       sources = sourceProgress.toArray,
       sink = sinkProgress)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryWrapper.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryWrapper.scala
new file mode 100644
index 0000000000000..020c9cb4a7304
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryWrapper.scala
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.streaming
+
+import java.util.UUID
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.streaming.{StreamingQuery, StreamingQueryException, StreamingQueryProgress, StreamingQueryStatus}
+
+/**
+ * Wrap non-serializable StreamExecution to make the query serializable as it's easy to for it to
+ * get captured with normal usage. It's safe to capture the query but not use it in executors.
+ * However, if the user tries to call its methods, it will throw `IllegalStateException`.
+ */
+class StreamingQueryWrapper(@transient private val _streamingQuery: StreamExecution)
+  extends StreamingQuery with Serializable {
+
+  def streamingQuery: StreamExecution = {
+    /** Assert the codes run in the driver. */
+    if (_streamingQuery == null) {
+      throw new IllegalStateException("StreamingQuery cannot be used in executors")
+    }
+    _streamingQuery
+  }
+
+  override def name: String = {
+    streamingQuery.name
+  }
+
+  override def id: UUID = {
+    streamingQuery.id
+  }
+
+  override def runId: UUID = {
+    streamingQuery.runId
+  }
+
+  override def awaitTermination(): Unit = {
+    streamingQuery.awaitTermination()
+  }
+
+  override def awaitTermination(timeoutMs: Long): Boolean = {
+    streamingQuery.awaitTermination(timeoutMs)
+  }
+
+  override def stop(): Unit = {
+    streamingQuery.stop()
+  }
+
+  override def processAllAvailable(): Unit = {
+    streamingQuery.processAllAvailable()
+  }
+
+  override def isActive: Boolean = {
+    streamingQuery.isActive
+  }
+
+  override def lastProgress: StreamingQueryProgress = {
+    streamingQuery.lastProgress
+  }
+
+  override def explain(): Unit = {
+    streamingQuery.explain()
+  }
+
+  override def explain(extended: Boolean): Unit = {
+    streamingQuery.explain(extended)
+  }
+
+  /**
+   * This method is called in Python. Python cannot call "explain" directly as it outputs in the JVM
+   * process, which may not be visible in Python process.
+   */
+  def explainInternal(extended: Boolean): String = {
+    streamingQuery.explainInternal(extended)
+  }
+
+  override def sparkSession: SparkSession = {
+    streamingQuery.sparkSession
+  }
+
+  override def recentProgress: Array[StreamingQueryProgress] = {
+    streamingQuery.recentProgress
+  }
+
+  override def status: StreamingQueryStatus = {
+    streamingQuery.status
+  }
+
+  override def exception: Option[StreamingQueryException] = {
+    streamingQuery.exception
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index 6ebd70685effc..8c26ee2bd3fcd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -193,7 +193,7 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) {
       useTempCheckpointLocation: Boolean,
       recoverFromCheckpointLocation: Boolean,
       trigger: Trigger,
-      triggerClock: Clock): StreamExecution = {
+      triggerClock: Clock): StreamingQueryWrapper = {
     val checkpointLocation = userSpecifiedCheckpointLocation.map { userSpecified =>
       new Path(userSpecified).toUri.toString
     }.orElse {
@@ -229,7 +229,7 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) {
       UnsupportedOperationChecker.checkForStreaming(analyzedPlan, outputMode)
     }
 
-    new StreamExecution(
+    new StreamingQueryWrapper(new StreamExecution(
       sparkSession,
       userSpecifiedName.orNull,
       checkpointLocation,
@@ -237,7 +237,7 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) {
       sink,
       trigger,
       triggerClock,
-      outputMode)
+      outputMode))
   }
 
   /**
@@ -301,7 +301,7 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) {
       // As it's provided by the user and can run arbitrary codes, we must not hold any lock here.
       // Otherwise, it's easy to cause dead-lock, or block too long if the user codes take a long
       // time to finish.
-      query.start()
+      query.streamingQuery.start()
     } catch {
       case e: Throwable =>
         activeQueriesLock.synchronized {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
index 44befa0d2ff76..c2befa6343ba9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
@@ -22,7 +22,10 @@ import org.json4s.JsonAST.JValue
 import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
+import org.apache.spark.annotation.Experimental
+
 /**
+ * :: Experimental ::
  * Reports information about the instantaneous status of a streaming query.
  *
  * @param message A human readable description of what the stream is currently doing.
@@ -32,10 +35,11 @@ import org.json4s.jackson.JsonMethods._
  *
  * @since 2.1.0
  */
+@Experimental
 class StreamingQueryStatus protected[sql](
     val message: String,
     val isDataAvailable: Boolean,
-    val isTriggerActive: Boolean) {
+    val isTriggerActive: Boolean) extends Serializable {
 
   /** The compact JSON representation of this status. */
   def json: String = compact(render(jsonValue))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
index e219cfde12656..bea0b9e297841 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
@@ -38,7 +38,7 @@ import org.apache.spark.annotation.Experimental
 @Experimental
 class StateOperatorProgress private[sql](
     val numRowsTotal: Long,
-    val numRowsUpdated: Long) {
+    val numRowsUpdated: Long) extends Serializable {
 
   /** The compact JSON representation of this progress. */
   def json: String = compact(render(jsonValue))
@@ -90,7 +90,7 @@ class StreamingQueryProgress private[sql](
   val eventTime: ju.Map[String, String],
   val stateOperators: Array[StateOperatorProgress],
   val sources: Array[SourceProgress],
-  val sink: SinkProgress) {
+  val sink: SinkProgress) extends Serializable {
 
   /** The aggregate (across all sources) number of records processed in a trigger. */
   def numInputRows: Long = sources.map(_.numInputRows).sum
@@ -157,7 +157,7 @@ class SourceProgress protected[sql](
   val endOffset: String,
   val numInputRows: Long,
   val inputRowsPerSecond: Double,
-  val processedRowsPerSecond: Double) {
+  val processedRowsPerSecond: Double) extends Serializable {
 
   /** The compact JSON representation of this progress. */
   def json: String = compact(render(jsonValue))
@@ -197,7 +197,7 @@ class SourceProgress protected[sql](
  */
 @Experimental
 class SinkProgress protected[sql](
-    val description: String) {
+    val description: String) extends Serializable {
 
   /** The compact JSON representation of this progress. */
   def json: String = compact(render(jsonValue))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index b96ccb4e6cbf5..cbcc98316b6d3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -746,7 +746,8 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
         .format("memory")
         .queryName("file_data")
         .start()
-        .asInstanceOf[StreamExecution]
+        .asInstanceOf[StreamingQueryWrapper]
+        .streamingQuery
       q.processAllAvailable()
       val memorySink = q.sink.asInstanceOf[MemorySink]
       val fileSource = q.logicalPlan.collect {
@@ -836,7 +837,8 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
       df.explain()
 
       val q = df.writeStream.queryName("file_explain").format("memory").start()
-        .asInstanceOf[StreamExecution]
+        .asInstanceOf[StreamingQueryWrapper]
+        .streamingQuery
       try {
         assert("No physical plan. Waiting for data." === q.explainInternal(false))
         assert("No physical plan. Waiting for data." === q.explainInternal(true))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
index 6bdf47901ae68..4a64054f63db8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -24,7 +24,6 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.sources.StreamSourceProvider
 import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
-import org.apache.spark.util.ManualClock
 
 class StreamSuite extends StreamTest {
 
@@ -278,7 +277,8 @@ class StreamSuite extends StreamTest {
     // Test `explain` not throwing errors
     df.explain()
     val q = df.writeStream.queryName("memory_explain").format("memory").start()
-      .asInstanceOf[StreamExecution]
+      .asInstanceOf[StreamingQueryWrapper]
+      .streamingQuery
     try {
       assert("No physical plan. Waiting for data." === q.explainInternal(false))
       assert("No physical plan. Waiting for data." === q.explainInternal(true))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index 10f267e115320..6fbbbb1f8e038 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -355,7 +355,8 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
                   outputMode,
                   trigger = trigger,
                   triggerClock = triggerClock)
-                .asInstanceOf[StreamExecution]
+                .asInstanceOf[StreamingQueryWrapper]
+                .streamingQuery
             currentStream.microBatchThread.setUncaughtExceptionHandler(
               new UncaughtExceptionHandler {
                 override def uncaughtException(t: Thread, e: Throwable): Unit = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
index 1742a5474cfd3..8e16fd418a37c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
@@ -244,7 +244,7 @@ class StreamingQueryManagerSuite extends StreamTest with BeforeAndAfter {
     failAfter(streamingTimeout) {
       val queries = withClue("Error starting queries") {
         datasets.zipWithIndex.map { case (ds, i) =>
-          @volatile var query: StreamExecution = null
+          var query: StreamingQuery = null
           try {
             val df = ds.toDF
             val metadataRoot =
@@ -256,7 +256,6 @@ class StreamingQueryManagerSuite extends StreamTest with BeforeAndAfter {
                 .option("checkpointLocation", metadataRoot)
                 .outputMode("append")
                 .start()
-                .asInstanceOf[StreamExecution]
           } catch {
             case NonFatal(e) =>
               if (query != null) query.stop()
@@ -304,7 +303,7 @@ class StreamingQueryManagerSuite extends StreamTest with BeforeAndAfter {
       Thread.sleep(stopAfter.toMillis)
       if (withError) {
         logDebug(s"Terminating query ${queryToStop.name} with error")
-        queryToStop.asInstanceOf[StreamExecution].logicalPlan.collect {
+        queryToStop.asInstanceOf[StreamingQueryWrapper].streamingQuery.logicalPlan.collect {
           case StreamingExecutionRelation(source, _) =>
             source.asInstanceOf[MemoryStream[Int]].addData(0)
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
index c970743a31ad6..34bf3985bad2c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
@@ -24,11 +24,12 @@ import scala.collection.JavaConverters._
 import org.json4s._
 import org.json4s.jackson.JsonMethods._
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.execution.streaming.MemoryStream
+import org.apache.spark.sql.functions._
 import org.apache.spark.sql.streaming.StreamingQueryStatusAndProgressSuite._
 
 
-class StreamingQueryStatusAndProgressSuite extends SparkFunSuite {
+class StreamingQueryStatusAndProgressSuite extends StreamTest {
 
   test("StreamingQueryProgress - prettyJson") {
     val json1 = testProgress1.prettyJson
@@ -128,6 +129,42 @@ class StreamingQueryStatusAndProgressSuite extends SparkFunSuite {
   test("StreamingQueryStatus - toString") {
     assert(testStatus.toString === testStatus.prettyJson)
   }
+
+  test("progress classes should be Serializable") {
+    import testImplicits._
+
+    val inputData = MemoryStream[Int]
+
+    val query = inputData.toDS()
+      .groupBy($"value")
+      .agg(count("*"))
+      .writeStream
+      .queryName("progress_serializable_test")
+      .format("memory")
+      .outputMode("complete")
+      .start()
+    try {
+      inputData.addData(1, 2, 3)
+      query.processAllAvailable()
+
+      val progress = query.recentProgress
+
+      // Make sure it generates the progress objects we want to test
+      assert(progress.exists { p =>
+        p.sources.size >= 1 && p.stateOperators.size >= 1 && p.sink != null
+      })
+
+      val array = spark.sparkContext.parallelize(progress).collect()
+      assert(array.length === progress.length)
+      array.zip(progress).foreach { case (p1, p2) =>
+        // Make sure we did serialize and deserialize the object
+        assert(p1 ne p2)
+        assert(p1.json === p2.json)
+      }
+    } finally {
+      query.stop()
+    }
+  }
 }
 
 object StreamingQueryStatusAndProgressSuite {
@@ -137,12 +174,12 @@ object StreamingQueryStatusAndProgressSuite {
     name = "myName",
     timestamp = "2016-12-05T20:54:20.827Z",
     batchId = 2L,
-    durationMs = Map("total" -> 0L).mapValues(long2Long).asJava,
-    eventTime = Map(
+    durationMs = new java.util.HashMap(Map("total" -> 0L).mapValues(long2Long).asJava),
+    eventTime = new java.util.HashMap(Map(
       "max" -> "2016-12-05T20:54:20.827Z",
       "min" -> "2016-12-05T20:54:20.827Z",
       "avg" -> "2016-12-05T20:54:20.827Z",
-      "watermark" -> "2016-12-05T20:54:20.827Z").asJava,
+      "watermark" -> "2016-12-05T20:54:20.827Z").asJava),
     stateOperators = Array(new StateOperatorProgress(numRowsTotal = 0, numRowsUpdated = 1)),
     sources = Array(
       new SourceProgress(
@@ -163,8 +200,9 @@ object StreamingQueryStatusAndProgressSuite {
     name = null, // should not be present in the json
     timestamp = "2016-12-05T20:54:20.827Z",
     batchId = 2L,
-    durationMs = Map("total" -> 0L).mapValues(long2Long).asJava,
-    eventTime = Map.empty[String, String].asJava,  // empty maps should be handled correctly
+    durationMs = new java.util.HashMap(Map("total" -> 0L).mapValues(long2Long).asJava),
+    // empty maps should be handled correctly
+    eventTime = new java.util.HashMap(Map.empty[String, String].asJava),
     stateOperators = Array(new StateOperatorProgress(numRowsTotal = 0, numRowsUpdated = 1)),
     sources = Array(
       new SourceProgress(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index b052bd9e6a53b..6c4bb35ccb2a6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -26,7 +26,7 @@ import org.scalatest.BeforeAndAfter
 import org.scalatest.concurrent.PatienceConfiguration.Timeout
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.{DataFrame, Dataset}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.SparkException
 import org.apache.spark.sql.execution.streaming._
@@ -439,6 +439,48 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
     }
   }
 
+  test("StreamingQuery should be Serializable but cannot be used in executors") {
+    def startQuery(ds: Dataset[Int], queryName: String): StreamingQuery = {
+      ds.writeStream
+        .queryName(queryName)
+        .format("memory")
+        .start()
+    }
+
+    val input = MemoryStream[Int]
+    val q1 = startQuery(input.toDS, "stream_serializable_test_1")
+    val q2 = startQuery(input.toDS.map { i =>
+      // Emulate that `StreamingQuery` get captured with normal usage unintentionally.
+      // It should not fail the query.
+      q1
+      i
+    }, "stream_serializable_test_2")
+    val q3 = startQuery(input.toDS.map { i =>
+      // Emulate that `StreamingQuery` is used in executors. We should fail the query with a clear
+      // error message.
+      q1.explain()
+      i
+    }, "stream_serializable_test_3")
+    try {
+      input.addData(1)
+
+      // q2 should not fail since it doesn't use `q1` in the closure
+      q2.processAllAvailable()
+
+      // The user calls `StreamingQuery` in the closure and it should fail
+      val e = intercept[StreamingQueryException] {
+        q3.processAllAvailable()
+      }
+      assert(e.getCause.isInstanceOf[SparkException])
+      assert(e.getCause.getCause.isInstanceOf[IllegalStateException])
+      assert(e.getMessage.contains("StreamingQuery cannot be used in executors"))
+    } finally {
+      q1.stop()
+      q2.stop()
+      q3.stop()
+    }
+  }
+
   /** Create a streaming DF that only execute one batch in which it returns the given static DF */
   private def createSingleTriggerStreamingDF(triggerDF: DataFrame): DataFrame = {
     require(!triggerDF.isStreaming)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
index f4a62903ebeb1..acac0bfb0e253 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
@@ -339,7 +339,7 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
       .start()
     q.stop()
 
-    assert(q.asInstanceOf[StreamExecution].trigger == ProcessingTime(10000))
+    assert(q.asInstanceOf[StreamingQueryWrapper].streamingQuery.trigger == ProcessingTime(10000))
 
     q = df.writeStream
       .format("org.apache.spark.sql.streaming.test")
@@ -348,7 +348,7 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
       .start()
     q.stop()
 
-    assert(q.asInstanceOf[StreamExecution].trigger == ProcessingTime(100000))
+    assert(q.asInstanceOf[StreamingQueryWrapper].streamingQuery.trigger == ProcessingTime(100000))
   }
 
   test("source metadataPath") {

From d8ef0be83d8d032ddab79b465226ed3ff3d1eff7 Mon Sep 17 00:00:00 2001
From: Takeshi YAMAMURO <linguin.m.s@gmail.com>
Date: Fri, 16 Dec 2016 22:44:42 +0800
Subject: [PATCH 0379/1204] [SPARK-18108][SQL] Fix a schema inconsistent bug
 that makes a parquet reader fail to read data

## What changes were proposed in this pull request?
A vectorized parquet reader fails to read column data if data schema and partition schema overlap with each other and inferred types in the partition schema differ from ones in the data schema. An example code to reproduce this bug is as follows;

```
scala> case class A(a: Long, b: Int)
scala> val as = Seq(A(1, 2))
scala> spark.createDataFrame(as).write.parquet("/data/a=1/")
scala> val df = spark.read.parquet("/data/")
scala> df.printSchema
root
 |-- a: long (nullable = true)
 |-- b: integer (nullable = true)
scala> df.collect
java.lang.NullPointerException
        at org.apache.spark.sql.execution.vectorized.OnHeapColumnVector.getLong(OnHeapColumnVector.java:283)
        at org.apache.spark.sql.execution.vectorized.ColumnarBatch$Row.getLong(ColumnarBatch.java:191)
        at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown Source)
        at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown Source)
        at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
        at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
```
The root cause is that a logical layer (`HadoopFsRelation`) and a physical layer (`VectorizedParquetRecordReader`) have a different assumption on partition schema; the logical layer trusts the data schema to infer the type the overlapped partition columns, and, on the other hand, the physical layer trusts partition schema which is inferred from path string. To fix this bug, this pr simply updates `HadoopFsRelation.schema` to respect the partition columns position in data schema and respect the partition columns type in partition schema.

## How was this patch tested?
Add tests in `ParquetPartitionDiscoverySuite`

Author: Takeshi YAMAMURO <linguin.m.s@gmail.com>

Closes #16030 from maropu/SPARK-18108.

(cherry picked from commit dc2a4d4ad478fdb0486cc0515d4fe8b402d24db4)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../datasources/HadoopFsRelation.scala         | 18 +++++++++++++-----
 .../ParquetPartitionDiscoverySuite.scala       | 11 +++++++++++
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelation.scala
index 014abd454f5c0..9a08524476baa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelation.scala
@@ -17,11 +17,13 @@
 
 package org.apache.spark.sql.execution.datasources
 
+import scala.collection.mutable
+
 import org.apache.spark.sql.{SparkSession, SQLContext}
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.execution.FileRelation
 import org.apache.spark.sql.sources.{BaseRelation, DataSourceRegister}
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.types.{StructField, StructType}
 
 
 /**
@@ -49,10 +51,16 @@ case class HadoopFsRelation(
   override def sqlContext: SQLContext = sparkSession.sqlContext
 
   val schema: StructType = {
-    val dataSchemaColumnNames = dataSchema.map(_.name.toLowerCase).toSet
-    StructType(dataSchema ++ partitionSchema.filterNot { column =>
-      dataSchemaColumnNames.contains(column.name.toLowerCase)
-    })
+    val getColName: (StructField => String) =
+      if (sparkSession.sessionState.conf.caseSensitiveAnalysis) _.name else _.name.toLowerCase
+    val overlappedPartCols = mutable.Map.empty[String, StructField]
+    partitionSchema.foreach { partitionField =>
+      if (dataSchema.exists(getColName(_) == getColName(partitionField))) {
+        overlappedPartCols += getColName(partitionField) -> partitionField
+      }
+    }
+    StructType(dataSchema.map(f => overlappedPartCols.getOrElse(getColName(f), f)) ++
+      partitionSchema.filterNot(f => overlappedPartCols.contains(getColName(f))))
   }
 
   def partitionSchemaOption: Option[StructType] =
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
index 22e35a1bc0b1d..f433a74da8cb9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
@@ -969,4 +969,15 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
       ))
     }
   }
+
+  test("SPARK-18108 Parquet reader fails when data column types conflict with partition ones") {
+    withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true") {
+      withTempPath { dir =>
+        val path = dir.getCanonicalPath
+        val df = Seq((1L, 2.0)).toDF("a", "b")
+        df.write.parquet(s"$path/a=1")
+        checkAnswer(spark.read.parquet(s"$path"), Seq(Row(1, 2.0)))
+      }
+    }
+  }
 }

From df589be5443980f344d50afc8068f57ae18995de Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Fri, 16 Dec 2016 11:30:21 -0800
Subject: [PATCH 0380/1204] [SPARK-18897][SPARKR] Fix SparkR SQL Test to drop
 test table

## What changes were proposed in this pull request?

SparkR tests, `R/run-tests.sh`, succeeds only once because `test_sparkSQL.R` does not clean up the test table, `people`.

As a result, the rows in `people` table are accumulated at every run and the test cases fail.

The following is the failure result for the second run.

```r
Failed -------------------------------------------------------------------------
1. Failure: create DataFrame from RDD (test_sparkSQL.R#204) -------------------
collect(sql("SELECT age from people WHERE name = 'Bob'"))$age not equal to c(16).
Lengths differ: 2 vs 1

2. Failure: create DataFrame from RDD (test_sparkSQL.R#206) -------------------
collect(sql("SELECT height from people WHERE name ='Bob'"))$height not equal to c(176.5).
Lengths differ: 2 vs 1
```

## How was this patch tested?

Manual. Run `run-tests.sh` twice and check if it passes without failures.

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #16310 from dongjoon-hyun/SPARK-18897.

(cherry picked from commit 1169db44bc1d51e68feb6ba2552520b2d660c2c0)
Signed-off-by: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
---
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index e8ccff81222d0..2e95737368897 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -205,6 +205,7 @@ test_that("create DataFrame from RDD", {
                c(16))
   expect_equal(collect(sql("SELECT height from people WHERE name ='Bob'"))$height,
                c(176.5))
+  sql("DROP TABLE people")
   unsetHiveContext()
 })
 

From d2a131a8482ab26ebd10121195a212b30042c72e Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Fri, 16 Dec 2016 15:04:11 -0800
Subject: [PATCH 0381/1204] [SPARK-18904][SS][TESTS] Merge two
 FileStreamSourceSuite files

## What changes were proposed in this pull request?

Merge two FileStreamSourceSuite files into one file.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16315 from zsxwing/FileStreamSourceSuite.

(cherry picked from commit 4faa8a3ec0bae4b210bc5d79918e008ab218f55a)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../streaming/FileStreamSourceSuite.scala     | 127 ------------------
 .../sql/streaming/FileStreamSourceSuite.scala |  99 +++++++++++++-
 2 files changed, 98 insertions(+), 128 deletions(-)
 delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceSuite.scala

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceSuite.scala
deleted file mode 100644
index 40d0643ba8771..0000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceSuite.scala
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.streaming
-
-import java.io.File
-import java.net.URI
-
-import scala.util.Random
-
-import org.apache.hadoop.fs.{FileStatus, Path, RawLocalFileSystem}
-
-import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.execution.streaming.ExistsThrowsExceptionFileSystem._
-import org.apache.spark.sql.test.SharedSQLContext
-import org.apache.spark.sql.types.StructType
-
-class FileStreamSourceSuite extends SparkFunSuite with SharedSQLContext {
-
-  import FileStreamSource._
-
-  test("SeenFilesMap") {
-    val map = new SeenFilesMap(maxAgeMs = 10)
-
-    map.add("a", 5)
-    assert(map.size == 1)
-    map.purge()
-    assert(map.size == 1)
-
-    // Add a new entry and purge should be no-op, since the gap is exactly 10 ms.
-    map.add("b", 15)
-    assert(map.size == 2)
-    map.purge()
-    assert(map.size == 2)
-
-    // Add a new entry that's more than 10 ms than the first entry. We should be able to purge now.
-    map.add("c", 16)
-    assert(map.size == 3)
-    map.purge()
-    assert(map.size == 2)
-
-    // Override existing entry shouldn't change the size
-    map.add("c", 25)
-    assert(map.size == 2)
-
-    // Not a new file because we have seen c before
-    assert(!map.isNewFile("c", 20))
-
-    // Not a new file because timestamp is too old
-    assert(!map.isNewFile("d", 5))
-
-    // Finally a new file: never seen and not too old
-    assert(map.isNewFile("e", 20))
-  }
-
-  test("SeenFilesMap should only consider a file old if it is earlier than last purge time") {
-    val map = new SeenFilesMap(maxAgeMs = 10)
-
-    map.add("a", 20)
-    assert(map.size == 1)
-
-    // Timestamp 5 should still considered a new file because purge time should be 0
-    assert(map.isNewFile("b", 9))
-    assert(map.isNewFile("b", 10))
-
-    // Once purge, purge time should be 10 and then b would be a old file if it is less than 10.
-    map.purge()
-    assert(!map.isNewFile("b", 9))
-    assert(map.isNewFile("b", 10))
-  }
-
-  testWithUninterruptibleThread("do not recheck that files exist during getBatch") {
-    withTempDir { temp =>
-      spark.conf.set(
-        s"fs.$scheme.impl",
-        classOf[ExistsThrowsExceptionFileSystem].getName)
-      // add the metadata entries as a pre-req
-      val dir = new File(temp, "dir") // use non-existent directory to test whether log make the dir
-      val metadataLog =
-        new FileStreamSourceLog(FileStreamSourceLog.VERSION, spark, dir.getAbsolutePath)
-      assert(metadataLog.add(0, Array(FileEntry(s"$scheme:///file1", 100L, 0))))
-
-      val newSource = new FileStreamSource(spark, s"$scheme:///", "parquet", StructType(Nil), Nil,
-        dir.getAbsolutePath, Map.empty)
-      // this method should throw an exception if `fs.exists` is called during resolveRelation
-      newSource.getBatch(None, FileStreamSourceOffset(1))
-    }
-  }
-}
-
-/** Fake FileSystem to test whether the method `fs.exists` is called during
- * `DataSource.resolveRelation`.
- */
-class ExistsThrowsExceptionFileSystem extends RawLocalFileSystem {
-  override def getUri: URI = {
-    URI.create(s"$scheme:///")
-  }
-
-  override def exists(f: Path): Boolean = {
-    throw new IllegalArgumentException("Exists shouldn't have been called!")
-  }
-
-  /** Simply return an empty file for now. */
-  override def listStatus(file: Path): Array[FileStatus] = {
-    val emptyFile = new FileStatus()
-    emptyFile.setPath(file)
-    Array(emptyFile)
-  }
-}
-
-object ExistsThrowsExceptionFileSystem {
-  val scheme = s"FileStreamSourceSuite${math.abs(Random.nextInt)}fs"
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index cbcc98316b6d3..2d218f4754714 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -18,7 +18,11 @@
 package org.apache.spark.sql.streaming
 
 import java.io.File
+import java.net.URI
 
+import scala.util.Random
+
+import org.apache.hadoop.fs.{FileStatus, Path, RawLocalFileSystem}
 import org.scalatest.PrivateMethodTester
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.time.SpanSugar._
@@ -26,8 +30,9 @@ import org.scalatest.time.SpanSugar._
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.execution.streaming._
-import org.apache.spark.sql.execution.streaming.FileStreamSource.FileEntry
+import org.apache.spark.sql.execution.streaming.FileStreamSource.{FileEntry, SeenFilesMap}
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.streaming.ExistsThrowsExceptionFileSystem._
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
@@ -1108,6 +1113,74 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
       runTwoBatchesAndVerifyResults(latestFirst = true, firstBatch = "2", secondBatch = "1")
     }
   }
+
+  test("SeenFilesMap") {
+    val map = new SeenFilesMap(maxAgeMs = 10)
+
+    map.add("a", 5)
+    assert(map.size == 1)
+    map.purge()
+    assert(map.size == 1)
+
+    // Add a new entry and purge should be no-op, since the gap is exactly 10 ms.
+    map.add("b", 15)
+    assert(map.size == 2)
+    map.purge()
+    assert(map.size == 2)
+
+    // Add a new entry that's more than 10 ms than the first entry. We should be able to purge now.
+    map.add("c", 16)
+    assert(map.size == 3)
+    map.purge()
+    assert(map.size == 2)
+
+    // Override existing entry shouldn't change the size
+    map.add("c", 25)
+    assert(map.size == 2)
+
+    // Not a new file because we have seen c before
+    assert(!map.isNewFile("c", 20))
+
+    // Not a new file because timestamp is too old
+    assert(!map.isNewFile("d", 5))
+
+    // Finally a new file: never seen and not too old
+    assert(map.isNewFile("e", 20))
+  }
+
+  test("SeenFilesMap should only consider a file old if it is earlier than last purge time") {
+    val map = new SeenFilesMap(maxAgeMs = 10)
+
+    map.add("a", 20)
+    assert(map.size == 1)
+
+    // Timestamp 5 should still considered a new file because purge time should be 0
+    assert(map.isNewFile("b", 9))
+    assert(map.isNewFile("b", 10))
+
+    // Once purge, purge time should be 10 and then b would be a old file if it is less than 10.
+    map.purge()
+    assert(!map.isNewFile("b", 9))
+    assert(map.isNewFile("b", 10))
+  }
+
+  testWithUninterruptibleThread("do not recheck that files exist during getBatch") {
+    withTempDir { temp =>
+      spark.conf.set(
+        s"fs.$scheme.impl",
+        classOf[ExistsThrowsExceptionFileSystem].getName)
+      // add the metadata entries as a pre-req
+      val dir = new File(temp, "dir") // use non-existent directory to test whether log make the dir
+    val metadataLog =
+      new FileStreamSourceLog(FileStreamSourceLog.VERSION, spark, dir.getAbsolutePath)
+      assert(metadataLog.add(0, Array(FileEntry(s"$scheme:///file1", 100L, 0))))
+
+      val newSource = new FileStreamSource(spark, s"$scheme:///", "parquet", StructType(Nil), Nil,
+        dir.getAbsolutePath, Map.empty)
+      // this method should throw an exception if `fs.exists` is called during resolveRelation
+      newSource.getBatch(None, FileStreamSourceOffset(1))
+    }
+  }
 }
 
 class FileStreamSourceStressTestSuite extends FileStreamSourceTest {
@@ -1128,3 +1201,27 @@ class FileStreamSourceStressTestSuite extends FileStreamSourceTest {
     Utils.deleteRecursively(tmp)
   }
 }
+
+/** Fake FileSystem to test whether the method `fs.exists` is called during
+ * `DataSource.resolveRelation`.
+ */
+class ExistsThrowsExceptionFileSystem extends RawLocalFileSystem {
+  override def getUri: URI = {
+    URI.create(s"$scheme:///")
+  }
+
+  override def exists(f: Path): Boolean = {
+    throw new IllegalArgumentException("Exists shouldn't have been called!")
+  }
+
+  /** Simply return an empty file for now. */
+  override def listStatus(file: Path): Array[FileStatus] = {
+    val emptyFile = new FileStatus()
+    emptyFile.setPath(file)
+    Array(emptyFile)
+  }
+}
+
+object ExistsThrowsExceptionFileSystem {
+  val scheme = s"FileStreamSourceSuite${math.abs(Random.nextInt)}fs"
+}

From 001f49b7ca3a1fd19d1ca1112b1095c690bb89e9 Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Sat, 17 Dec 2016 14:37:34 -0800
Subject: [PATCH 0382/1204] [SPARK-18849][ML][SPARKR][DOC] vignettes final
 check reorg

## What changes were proposed in this pull request?

Reorganizing content (copy/paste)

## How was this patch tested?

https://felixcheung.github.io/sparkr-vignettes.html

Previous:
https://felixcheung.github.io/sparkr-vignettes_old.html

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #16301 from felixcheung/rvignettespass2.

(cherry picked from commit 38fd163d0d2c44128bf8872d297b79edd7bd4137)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/vignettes/sparkr-vignettes.Rmd | 361 ++++++++++++++-------------
 docs/sparkr.md                       |  41 ++-
 2 files changed, 215 insertions(+), 187 deletions(-)

diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index fa2656c008660..6f11c5c51676f 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -447,31 +447,43 @@ head(teenagers)
 
 SparkR supports the following machine learning models and algorithms.
 
-* Accelerated Failure Time (AFT) Survival Model
+#### Classification
 
-* Collaborative Filtering with Alternating Least Squares (ALS)
+* Logistic Regression
 
-* Gaussian Mixture Model (GMM)
+* Multilayer Perceptron (MLP)
+
+* Naive Bayes
+
+#### Regression
+
+* Accelerated Failure Time (AFT) Survival Model
 
 * Generalized Linear Model (GLM)
 
+* Isotonic Regression
+
+#### Tree - Classification and Regression
+
 * Gradient-Boosted Trees (GBT)
 
-* Isotonic Regression Model
+* Random Forest
 
-* $k$-means Clustering
+#### Clustering
 
-* Kolmogorov-Smirnov Test
+* Gaussian Mixture Model (GMM)
+
+* $k$-means Clustering
 
 * Latent Dirichlet Allocation (LDA)
 
-* Logistic Regression Model
+#### Collaborative Filtering
 
-* Multilayer Perceptron Model
+* Alternating Least Squares (ALS)
 
-* Naive Bayes Model
+#### Statistics
 
-* Random Forest
+* Kolmogorov-Smirnov Test
 
 ### R Formula
 
@@ -496,9 +508,115 @@ count(carsDF_test)
 head(carsDF_test)
 ```
 
-
 ### Models and Algorithms
 
+#### Logistic Regression
+
+[Logistic regression](https://en.wikipedia.org/wiki/Logistic_regression) is a widely-used model when the response is categorical. It can be seen as a special case of the [Generalized Linear Predictive Model](https://en.wikipedia.org/wiki/Generalized_linear_model).
+We provide `spark.logit` on top of `spark.glm` to support logistic regression with advanced hyper-parameters.
+It supports both binary and multiclass classification with elastic-net regularization and feature standardization, similar to `glmnet`.
+
+We use a simple example to demonstrate `spark.logit` usage. In general, there are three steps of using `spark.logit`:
+1). Create a dataframe from a proper data source; 2). Fit a logistic regression model using `spark.logit` with a proper parameter setting;
+and 3). Obtain the coefficient matrix of the fitted model using `summary` and use the model for prediction with `predict`.
+
+Binomial logistic regression
+```{r, warning=FALSE}
+df <- createDataFrame(iris)
+# Create a DataFrame containing two classes
+training <- df[df$Species %in% c("versicolor", "virginica"), ]
+model <- spark.logit(training, Species ~ ., regParam = 0.00042)
+summary(model)
+```
+
+Predict values on training data
+```{r}
+fitted <- predict(model, training)
+```
+
+Multinomial logistic regression against three classes
+```{r, warning=FALSE}
+df <- createDataFrame(iris)
+# Note in this case, Spark infers it is multinomial logistic regression, so family = "multinomial" is optional.
+model <- spark.logit(df, Species ~ ., regParam = 0.056)
+summary(model)
+```
+
+#### Multilayer Perceptron
+
+Multilayer perceptron classifier (MLPC) is a classifier based on the [feedforward artificial neural network](https://en.wikipedia.org/wiki/Feedforward_neural_network). MLPC consists of multiple layers of nodes. Each layer is fully connected to the next layer in the network. Nodes in the input layer represent the input data. All other nodes map inputs to outputs by a linear combination of the inputs with the node’s weights $w$ and bias $b$ and applying an activation function. This can be written in matrix form for MLPC with $K+1$ layers as follows:
+$$
+y(x)=f_K(\ldots f_2(w_2^T f_1(w_1^T x + b_1) + b_2) \ldots + b_K).
+$$
+
+Nodes in intermediate layers use sigmoid (logistic) function:
+$$
+f(z_i) = \frac{1}{1+e^{-z_i}}.
+$$
+
+Nodes in the output layer use softmax function:
+$$
+f(z_i) = \frac{e^{z_i}}{\sum_{k=1}^N e^{z_k}}.
+$$
+
+The number of nodes $N$ in the output layer corresponds to the number of classes.
+
+MLPC employs backpropagation for learning the model. We use the logistic loss function for optimization and L-BFGS as an optimization routine.
+
+`spark.mlp` requires at least two columns in `data`: one named `"label"` and the other one `"features"`. The `"features"` column should be in libSVM-format.
+
+We use iris data set to show how to use `spark.mlp` in classification.
+```{r, warning=FALSE}
+df <- createDataFrame(iris)
+# fit a Multilayer Perceptron Classification Model
+model <- spark.mlp(df, Species ~ ., blockSize = 128, layers = c(4, 3), solver = "l-bfgs", maxIter = 100, tol = 0.5, stepSize = 1, seed = 1, initialWeights = c(0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9))
+```
+
+To avoid lengthy display, we only present partial results of the model summary. You can check the full result from your sparkR shell.
+```{r, include=FALSE}
+ops <- options()
+options(max.print=5)
+```
+```{r}
+# check the summary of the fitted model
+summary(model)
+```
+```{r, include=FALSE}
+options(ops)
+```
+```{r}
+# make predictions use the fitted model
+predictions <- predict(model, df)
+head(select(predictions, predictions$prediction))
+```
+
+#### Naive Bayes
+
+Naive Bayes model assumes independence among the features. `spark.naiveBayes` fits a [Bernoulli naive Bayes model](https://en.wikipedia.org/wiki/Naive_Bayes_classifier#Bernoulli_naive_Bayes) against a SparkDataFrame. The data should be all categorical. These models are often used for document classification.
+
+```{r}
+titanic <- as.data.frame(Titanic)
+titanicDF <- createDataFrame(titanic[titanic$Freq > 0, -5])
+naiveBayesModel <- spark.naiveBayes(titanicDF, Survived ~ Class + Sex + Age)
+summary(naiveBayesModel)
+naiveBayesPrediction <- predict(naiveBayesModel, titanicDF)
+head(select(naiveBayesPrediction, "Class", "Sex", "Age", "Survived", "prediction"))
+```
+
+#### Accelerated Failure Time Survival Model
+
+Survival analysis studies the expected duration of time until an event happens, and often the relationship with risk factors or treatment taken on the subject. In contrast to standard regression analysis, survival modeling has to deal with special characteristics in the data including non-negative survival time and censoring.
+
+Accelerated Failure Time (AFT) model is a parametric survival model for censored data that assumes the effect of a covariate is to accelerate or decelerate the life course of an event by some constant. For more information, refer to the Wikipedia page [AFT Model](https://en.wikipedia.org/wiki/Accelerated_failure_time_model) and the references there. Different from a [Proportional Hazards Model](https://en.wikipedia.org/wiki/Proportional_hazards_model) designed for the same purpose, the AFT model is easier to parallelize because each instance contributes to the objective function independently.
+```{r, warning=FALSE}
+library(survival)
+ovarianDF <- createDataFrame(ovarian)
+aftModel <- spark.survreg(ovarianDF, Surv(futime, fustat) ~ ecog_ps + rx)
+summary(aftModel)
+aftPredictions <- predict(aftModel, ovarianDF)
+head(aftPredictions)
+```
+
 #### Generalized Linear Model
 
 The main function is `spark.glm`. The following families and link functions are supported. The default is gaussian.
@@ -532,18 +650,47 @@ gaussianFitted <- predict(gaussianGLM, carsDF)
 head(select(gaussianFitted, "model", "prediction", "mpg", "wt", "hp"))
 ```
 
-#### Random Forest
+#### Isotonic Regression
 
-`spark.randomForest` fits a [random forest](https://en.wikipedia.org/wiki/Random_forest) classification or regression model on a `SparkDataFrame`.
-Users can call `summary` to get a summary of the fitted model, `predict` to make predictions, and `write.ml`/`read.ml` to save/load fitted models.
+`spark.isoreg` fits an [Isotonic Regression](https://en.wikipedia.org/wiki/Isotonic_regression) model against a `SparkDataFrame`. It solves a weighted univariate a regression problem under a complete order constraint. Specifically, given a set of real observed responses $y_1, \ldots, y_n$, corresponding real features $x_1, \ldots, x_n$, and optionally positive weights $w_1, \ldots, w_n$, we want to find a monotone (piecewise linear) function $f$ to  minimize
+$$
+\ell(f) = \sum_{i=1}^n w_i (y_i - f(x_i))^2.
+$$
 
-In the following example, we use the `longley` dataset to train a random forest and make predictions:
+There are a few more arguments that may be useful.
 
-```{r, warning=FALSE}
-df <- createDataFrame(longley)
-rfModel <- spark.randomForest(df, Employed ~ ., type = "regression", maxDepth = 2, numTrees = 2)
-summary(rfModel)
-predictions <- predict(rfModel, df)
+* `weightCol`: a character string specifying the weight column.
+
+* `isotonic`: logical value indicating whether the output sequence should be isotonic/increasing (`TRUE`) or antitonic/decreasing (`FALSE`).
+
+* `featureIndex`: the index of the feature on the right hand side of the formula if it is a vector column (default: 0), no effect otherwise.
+
+We use an artificial example to show the use.
+
+```{r}
+y <- c(3.0, 6.0, 8.0, 5.0, 7.0)
+x <- c(1.0, 2.0, 3.5, 3.0, 4.0)
+w <- rep(1.0, 5)
+data <- data.frame(y = y, x = x, w = w)
+df <- createDataFrame(data)
+isoregModel <- spark.isoreg(df, y ~ x, weightCol = "w")
+isoregFitted <- predict(isoregModel, df)
+head(select(isoregFitted, "x", "y", "prediction"))
+```
+
+In the prediction stage, based on the fitted monotone piecewise function, the rules are:
+
+* If the prediction input exactly matches a training feature then associated prediction is returned. In case there are multiple predictions with the same feature then one of them is returned. Which one is undefined.
+
+* If the prediction input is lower or higher than all training features then prediction with lowest or highest feature is returned respectively. In case there are multiple predictions with the same feature then the lowest or highest is returned respectively.
+
+* If the prediction input falls between two training features then prediction is treated as piecewise linear function and interpolated value is calculated from the predictions of the two closest features. In case there are multiple values with the same feature then the same rules as in previous point are used.
+
+For example, when the input is $3.2$, the two closest feature values are $3.0$ and $3.5$, then predicted value would be a linear interpolation between the predicted values at $3.0$ and $3.5$.
+
+```{r}
+newDF <- createDataFrame(data.frame(x = c(1.5, 3.2)))
+head(predict(isoregModel, newDF))
 ```
 
 #### Gradient-Boosted Trees
@@ -560,41 +707,18 @@ summary(gbtModel)
 predictions <- predict(gbtModel, df)
 ```
 
-#### Naive Bayes Model
-
-Naive Bayes model assumes independence among the features. `spark.naiveBayes` fits a [Bernoulli naive Bayes model](https://en.wikipedia.org/wiki/Naive_Bayes_classifier#Bernoulli_naive_Bayes) against a SparkDataFrame. The data should be all categorical. These models are often used for document classification.
-
-```{r}
-titanic <- as.data.frame(Titanic)
-titanicDF <- createDataFrame(titanic[titanic$Freq > 0, -5])
-naiveBayesModel <- spark.naiveBayes(titanicDF, Survived ~ Class + Sex + Age)
-summary(naiveBayesModel)
-naiveBayesPrediction <- predict(naiveBayesModel, titanicDF)
-head(select(naiveBayesPrediction, "Class", "Sex", "Age", "Survived", "prediction"))
-```
-
-#### k-Means Clustering
-
-`spark.kmeans` fits a $k$-means clustering model against a `SparkDataFrame`. As an unsupervised learning method, we don't need a response variable. Hence, the left hand side of the R formula should be left blank. The clustering is based only on the variables on the right hand side.
+#### Random Forest
 
-```{r}
-kmeansModel <- spark.kmeans(carsDF, ~ mpg + hp + wt, k = 3)
-summary(kmeansModel)
-kmeansPredictions <- predict(kmeansModel, carsDF)
-head(select(kmeansPredictions, "model", "mpg", "hp", "wt", "prediction"), n = 20L)
-```
+`spark.randomForest` fits a [random forest](https://en.wikipedia.org/wiki/Random_forest) classification or regression model on a `SparkDataFrame`.
+Users can call `summary` to get a summary of the fitted model, `predict` to make predictions, and `write.ml`/`read.ml` to save/load fitted models.
 
-#### AFT Survival Model
-Survival analysis studies the expected duration of time until an event happens, and often the relationship with risk factors or treatment taken on the subject. In contrast to standard regression analysis, survival modeling has to deal with special characteristics in the data including non-negative survival time and censoring.
+In the following example, we use the `longley` dataset to train a random forest and make predictions:
 
-Accelerated Failure Time (AFT) model is a parametric survival model for censored data that assumes the effect of a covariate is to accelerate or decelerate the life course of an event by some constant. For more information, refer to the Wikipedia page [AFT Model](https://en.wikipedia.org/wiki/Accelerated_failure_time_model) and the references there. Different from a [Proportional Hazards Model](https://en.wikipedia.org/wiki/Proportional_hazards_model) designed for the same purpose, the AFT model is easier to parallelize because each instance contributes to the objective function independently.
 ```{r, warning=FALSE}
-library(survival)
-ovarianDF <- createDataFrame(ovarian)
-aftModel <- spark.survreg(ovarianDF, Surv(futime, fustat) ~ ecog_ps + rx)
-summary(aftModel)
-aftPredictions <- predict(aftModel, ovarianDF)
-head(aftPredictions)
+df <- createDataFrame(longley)
+rfModel <- spark.randomForest(df, Employed ~ ., type = "regression", maxDepth = 2, numTrees = 2)
+summary(rfModel)
+predictions <- predict(rfModel, df)
 ```
 
 #### Gaussian Mixture Model
@@ -613,6 +737,16 @@ gmmFitted <- predict(gmmModel, df)
 head(select(gmmFitted, "V1", "V2", "prediction"))
 ```
 
+#### k-Means Clustering
+
+`spark.kmeans` fits a $k$-means clustering model against a `SparkDataFrame`. As an unsupervised learning method, we don't need a response variable. Hence, the left hand side of the R formula should be left blank. The clustering is based only on the variables on the right hand side.
+
+```{r}
+kmeansModel <- spark.kmeans(carsDF, ~ mpg + hp + wt, k = 3)
+summary(kmeansModel)
+kmeansPredictions <- predict(kmeansModel, carsDF)
+head(select(kmeansPredictions, "model", "mpg", "hp", "wt", "prediction"), n = 20L)
+```
 
 #### Latent Dirichlet Allocation
 
@@ -668,55 +802,7 @@ perplexity <- spark.perplexity(model, corpusDF)
 perplexity
 ```
 
-#### Multilayer Perceptron
-
-Multilayer perceptron classifier (MLPC) is a classifier based on the [feedforward artificial neural network](https://en.wikipedia.org/wiki/Feedforward_neural_network). MLPC consists of multiple layers of nodes. Each layer is fully connected to the next layer in the network. Nodes in the input layer represent the input data. All other nodes map inputs to outputs by a linear combination of the inputs with the node’s weights $w$ and bias $b$ and applying an activation function. This can be written in matrix form for MLPC with $K+1$ layers as follows:
-$$
-y(x)=f_K(\ldots f_2(w_2^T f_1(w_1^T x + b_1) + b_2) \ldots + b_K).
-$$
-
-Nodes in intermediate layers use sigmoid (logistic) function:
-$$
-f(z_i) = \frac{1}{1+e^{-z_i}}.
-$$
-
-Nodes in the output layer use softmax function:
-$$
-f(z_i) = \frac{e^{z_i}}{\sum_{k=1}^N e^{z_k}}.
-$$
-
-The number of nodes $N$ in the output layer corresponds to the number of classes.
-
-MLPC employs backpropagation for learning the model. We use the logistic loss function for optimization and L-BFGS as an optimization routine.
-
-`spark.mlp` requires at least two columns in `data`: one named `"label"` and the other one `"features"`. The `"features"` column should be in libSVM-format.
-
-We use iris data set to show how to use `spark.mlp` in classification.
-```{r, warning=FALSE}
-df <- createDataFrame(iris)
-# fit a Multilayer Perceptron Classification Model
-model <- spark.mlp(df, Species ~ ., blockSize = 128, layers = c(4, 3), solver = "l-bfgs", maxIter = 100, tol = 0.5, stepSize = 1, seed = 1, initialWeights = c(0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9))
-```
-
-To avoid lengthy display, we only present partial results of the model summary. You can check the full result from your sparkR shell.
-```{r, include=FALSE}
-ops <- options()
-options(max.print=5)
-```
-```{r}
-# check the summary of the fitted model
-summary(model)
-```
-```{r, include=FALSE}
-options(ops)
-```
-```{r}
-# make predictions use the fitted model
-predictions <- predict(model, df)
-head(select(predictions, predictions$prediction))
-```
-
-#### Collaborative Filtering
+#### Alternating Least Squares
 
 `spark.als` learns latent factors in [collaborative filtering](https://en.wikipedia.org/wiki/Recommender_system#Collaborative_filtering) via [alternating least squares](http://dl.acm.org/citation.cfm?id=1608614).
 
@@ -745,81 +831,6 @@ predicted <- predict(model, df)
 head(predicted)
 ```
 
-#### Isotonic Regression Model
-
-`spark.isoreg` fits an [Isotonic Regression](https://en.wikipedia.org/wiki/Isotonic_regression) model against a `SparkDataFrame`. It solves a weighted univariate a regression problem under a complete order constraint. Specifically, given a set of real observed responses $y_1, \ldots, y_n$, corresponding real features $x_1, \ldots, x_n$, and optionally positive weights $w_1, \ldots, w_n$, we want to find a monotone (piecewise linear) function $f$ to  minimize
-$$
-\ell(f) = \sum_{i=1}^n w_i (y_i - f(x_i))^2.
-$$
-
-There are a few more arguments that may be useful.
-
-* `weightCol`: a character string specifying the weight column.
-
-* `isotonic`: logical value indicating whether the output sequence should be isotonic/increasing (`TRUE`) or antitonic/decreasing (`FALSE`).
-
-* `featureIndex`: the index of the feature on the right hand side of the formula if it is a vector column (default: 0), no effect otherwise.
-
-We use an artificial example to show the use.
-
-```{r}
-y <- c(3.0, 6.0, 8.0, 5.0, 7.0)
-x <- c(1.0, 2.0, 3.5, 3.0, 4.0)
-w <- rep(1.0, 5)
-data <- data.frame(y = y, x = x, w = w)
-df <- createDataFrame(data)
-isoregModel <- spark.isoreg(df, y ~ x, weightCol = "w")
-isoregFitted <- predict(isoregModel, df)
-head(select(isoregFitted, "x", "y", "prediction"))
-```
-
-In the prediction stage, based on the fitted monotone piecewise function, the rules are:
-
-* If the prediction input exactly matches a training feature then associated prediction is returned. In case there are multiple predictions with the same feature then one of them is returned. Which one is undefined.
-
-* If the prediction input is lower or higher than all training features then prediction with lowest or highest feature is returned respectively. In case there are multiple predictions with the same feature then the lowest or highest is returned respectively.
-
-* If the prediction input falls between two training features then prediction is treated as piecewise linear function and interpolated value is calculated from the predictions of the two closest features. In case there are multiple values with the same feature then the same rules as in previous point are used.
-
-For example, when the input is $3.2$, the two closest feature values are $3.0$ and $3.5$, then predicted value would be a linear interpolation between the predicted values at $3.0$ and $3.5$.
-
-```{r}
-newDF <- createDataFrame(data.frame(x = c(1.5, 3.2)))
-head(predict(isoregModel, newDF))
-```
-
-#### Logistic Regression Model
-
-[Logistic regression](https://en.wikipedia.org/wiki/Logistic_regression) is a widely-used model when the response is categorical. It can be seen as a special case of the [Generalized Linear Predictive Model](https://en.wikipedia.org/wiki/Generalized_linear_model).
-We provide `spark.logit` on top of `spark.glm` to support logistic regression with advanced hyper-parameters.
-It supports both binary and multiclass classification with elastic-net regularization and feature standardization, similar to `glmnet`.
-
-We use a simple example to demonstrate `spark.logit` usage. In general, there are three steps of using `spark.logit`:
-1). Create a dataframe from a proper data source; 2). Fit a logistic regression model using `spark.logit` with a proper parameter setting;
-and 3). Obtain the coefficient matrix of the fitted model using `summary` and use the model for prediction with `predict`.
-
-Binomial logistic regression
-```{r, warning=FALSE}
-df <- createDataFrame(iris)
-# Create a DataFrame containing two classes
-training <- df[df$Species %in% c("versicolor", "virginica"), ]
-model <- spark.logit(training, Species ~ ., regParam = 0.00042)
-summary(model)
-```
-
-Predict values on training data
-```{r}
-fitted <- predict(model, training)
-```
-
-Multinomial logistic regression against three classes
-```{r, warning=FALSE}
-df <- createDataFrame(iris)
-# Note in this case, Spark infers it is multinomial logistic regression, so family = "multinomial" is optional.
-model <- spark.logit(df, Species ~ ., regParam = 0.056)
-summary(model)
-```
-
 #### Kolmogorov-Smirnov Test
 
 `spark.kstest` runs a two-sided, one-sample [Kolmogorov-Smirnov (KS) test](https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test).
diff --git a/docs/sparkr.md b/docs/sparkr.md
index d2db78282aa8f..d7ffd9b3f1229 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -516,18 +516,35 @@ head(teenagers)
 
 SparkR supports the following machine learning algorithms currently:
 
-* [`spark.glm`](api/R/spark.glm.html) or [`glm`](api/R/glm.html): [`Generalized Linear Model`](ml-classification-regression.html#generalized-linear-regression)
-* [`spark.survreg`](api/R/spark.survreg.html): [`Accelerated Failure Time (AFT) Survival Regression Model`](ml-classification-regression.html#survival-regression)
-* [`spark.naiveBayes`](api/R/spark.naiveBayes.html): [`Naive Bayes Model`](ml-classification-regression.html#naive-bayes)
-* [`spark.kmeans`](api/R/spark.kmeans.html): [`K-Means Model`](ml-clustering.html#k-means)
-* [`spark.logit`](api/R/spark.logit.html): [`Logistic Regression Model`](ml-classification-regression.html#logistic-regression)
-* [`spark.isoreg`](api/R/spark.isoreg.html): [`Isotonic Regression Model`](ml-classification-regression.html#isotonic-regression)
-* [`spark.gaussianMixture`](api/R/spark.gaussianMixture.html): [`Gaussian Mixture Model`](ml-clustering.html#gaussian-mixture-model-gmm)
-* [`spark.lda`](api/R/spark.lda.html): [`Latent Dirichlet Allocation (LDA) Model`](ml-clustering.html#latent-dirichlet-allocation-lda)
-* [`spark.mlp`](api/R/spark.mlp.html): [`Multilayer Perceptron Classification Model`](ml-classification-regression.html#multilayer-perceptron-classifier)
-* [`spark.gbt`](api/R/spark.gbt.html): `Gradient Boosted Tree Model for` [`Regression`](ml-classification-regression.html#gradient-boosted-tree-regression) `and` [`Classification`](ml-classification-regression.html#gradient-boosted-tree-classifier)
-* [`spark.randomForest`](api/R/spark.randomForest.html): `Random Forest Model for` [`Regression`](ml-classification-regression.html#random-forest-regression) `and` [`Classification`](ml-classification-regression.html#random-forest-classifier)
-* [`spark.als`](api/R/spark.als.html): [`Alternating Least Squares (ALS) matrix factorization Model`](ml-collaborative-filtering.html#collaborative-filtering)
+#### Classification
+
+* [`spark.logit`](api/R/spark.logit.html): [`Logistic Regression`](ml-classification-regression.html#logistic-regression)
+* [`spark.mlp`](api/R/spark.mlp.html): [`Multilayer Perceptron (MLP)`](ml-classification-regression.html#multilayer-perceptron-classifier)
+* [`spark.naiveBayes`](api/R/spark.naiveBayes.html): [`Naive Bayes`](ml-classification-regression.html#naive-bayes)
+
+#### Regression
+
+* [`spark.survreg`](api/R/spark.survreg.html): [`Accelerated Failure Time (AFT) Survival  Model`](ml-classification-regression.html#survival-regression)
+* [`spark.glm`](api/R/spark.glm.html) or [`glm`](api/R/glm.html): [`Generalized Linear Model (GLM)`](ml-classification-regression.html#generalized-linear-regression)
+* [`spark.isoreg`](api/R/spark.isoreg.html): [`Isotonic Regression`](ml-classification-regression.html#isotonic-regression)
+
+#### Tree
+
+* [`spark.gbt`](api/R/spark.gbt.html): `Gradient Boosted Trees for` [`Regression`](ml-classification-regression.html#gradient-boosted-tree-regression) `and` [`Classification`](ml-classification-regression.html#gradient-boosted-tree-classifier)
+* [`spark.randomForest`](api/R/spark.randomForest.html): `Random Forest for` [`Regression`](ml-classification-regression.html#random-forest-regression) `and` [`Classification`](ml-classification-regression.html#random-forest-classifier)
+
+#### Clustering
+
+* [`spark.gaussianMixture`](api/R/spark.gaussianMixture.html): [`Gaussian Mixture Model (GMM)`](ml-clustering.html#gaussian-mixture-model-gmm)
+* [`spark.kmeans`](api/R/spark.kmeans.html): [`K-Means`](ml-clustering.html#k-means)
+* [`spark.lda`](api/R/spark.lda.html): [`Latent Dirichlet Allocation (LDA)`](ml-clustering.html#latent-dirichlet-allocation-lda)
+
+#### Collaborative Filtering
+
+* [`spark.als`](api/R/spark.als.html): [`Alternating Least Squares (ALS)`](ml-collaborative-filtering.html#collaborative-filtering)
+
+#### Statistics
+
 * [`spark.kstest`](api/R/spark.kstest.html): `Kolmogorov-Smirnov Test`
 
 Under the hood, SparkR uses MLlib to train the model. Please refer to the corresponding section of MLlib user guide for example code.

From 4b8a643f9bb74919a980f72ea72be957689ed8d5 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Sun, 18 Dec 2016 09:02:04 +0000
Subject: [PATCH 0383/1204] [SPARK-18918][DOC] Missing </td> in Configuration
 page

### What changes were proposed in this pull request?
The configuration page looks messy now, as shown in the nightly build:
https://people.apache.org/~pwendell/spark-nightly/spark-master-docs/latest/configuration.html

Starting from the following location:

![screenshot 2016-12-18 00 26 33](https://cloud.githubusercontent.com/assets/11567269/21292396/ace4719c-c4b8-11e6-8dfd-d9ab95be43d5.png)

### How was this patch tested?
Attached is the screenshot generated in my local computer after the fix.
[Configuration - Spark 2.2.0 Documentation.pdf](https://github.com/apache/spark/files/659315/Configuration.-.Spark.2.2.0.Documentation.pdf)

Author: gatorsmile <gatorsmile@gmail.com>

Closes #16327 from gatorsmile/docFix.

(cherry picked from commit c0c9e1d27a4c9ede768cfb150cdb26d68472f1da)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/configuration.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index a6ba6cf6ee7aa..e33af3abc09d4 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1305,7 +1305,7 @@ Apart from these, the following properties are also available, and may be useful
   </td>
 </tr>
 <tr>
-  <td><code>spark.blacklist.stage.maxFailedTasksPerExecutor</code>
+  <td><code>spark.blacklist.stage.maxFailedTasksPerExecutor</code></td>
   <td>2</td>
   <td>
     (Experimental) How many different tasks must fail on one executor, within one stage, before the

From a5da8db85bcc0c183ec3bc15d9389b29c57cb103 Mon Sep 17 00:00:00 2001
From: Yuming Wang <wgyumg@gmail.com>
Date: Sun, 18 Dec 2016 09:08:02 +0000
Subject: [PATCH 0384/1204] [SPARK-18827][CORE] Fix cannot read broadcast on
 disk

## What changes were proposed in this pull request?
`NoSuchElementException` will throw since https://github.com/apache/spark/pull/15056 if a broadcast cannot cache in memory. The reason is that that change cannot cover `!unrolled.hasNext` in `next()` function.

This change is to cover the `!unrolled.hasNext` and check `hasNext` before calling `next` in `blockManager.getLocalValues` to make it  more robust.

We can cache and read broadcast even it cannot fit in memory from this pull request.

Exception log:
```
16/12/10 10:10:04 INFO UnifiedMemoryManager: Will not store broadcast_131 as the required space (1048576 bytes) exceeds our memory limit (122764 bytes)
16/12/10 10:10:04 WARN MemoryStore: Failed to reserve initial memory threshold of 1024.0 KB for computing block broadcast_131 in memory.
16/12/10 10:10:04 WARN MemoryStore: Not enough space to cache broadcast_131 in memory! (computed 384.0 B so far)
16/12/10 10:10:04 INFO MemoryStore: Memory use = 95.6 KB (blocks) + 0.0 B (scratch space shared across 0 tasks(s)) = 95.6 KB. Storage limit = 119.9 KB.
16/12/10 10:10:04 ERROR Utils: Exception encountered
java.util.NoSuchElementException
	at org.apache.spark.util.collection.PrimitiveVector$$anon$1.next(PrimitiveVector.scala:58)
	at org.apache.spark.storage.memory.PartiallyUnrolledIterator.next(MemoryStore.scala:700)
	at org.apache.spark.util.CompletionIterator.next(CompletionIterator.scala:30)
	at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$readBroadcastBlock$1$$anonfun$2.apply(TorrentBroadcast.scala:210)
	at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$readBroadcastBlock$1$$anonfun$2.apply(TorrentBroadcast.scala:210)
	at scala.Option.map(Option.scala:146)
	at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$readBroadcastBlock$1.apply(TorrentBroadcast.scala:210)
	at org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1269)
	at org.apache.spark.broadcast.TorrentBroadcast.readBroadcastBlock(TorrentBroadcast.scala:206)
	at org.apache.spark.broadcast.TorrentBroadcast._value$lzycompute(TorrentBroadcast.scala:66)
	at org.apache.spark.broadcast.TorrentBroadcast._value(TorrentBroadcast.scala:66)
	at org.apache.spark.broadcast.TorrentBroadcast.getValue(TorrentBroadcast.scala:96)
	at org.apache.spark.broadcast.Broadcast.value(Broadcast.scala:70)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:86)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
	at org.apache.spark.scheduler.Task.run(Task.scala:108)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	at java.lang.Thread.run(Thread.java:745)
16/12/10 10:10:04 ERROR Executor: Exception in task 1.0 in stage 86.0 (TID 134423)
java.io.IOException: java.util.NoSuchElementException
	at org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1276)
	at org.apache.spark.broadcast.TorrentBroadcast.readBroadcastBlock(TorrentBroadcast.scala:206)
	at org.apache.spark.broadcast.TorrentBroadcast._value$lzycompute(TorrentBroadcast.scala:66)
	at org.apache.spark.broadcast.TorrentBroadcast._value(TorrentBroadcast.scala:66)
	at org.apache.spark.broadcast.TorrentBroadcast.getValue(TorrentBroadcast.scala:96)
	at org.apache.spark.broadcast.Broadcast.value(Broadcast.scala:70)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:86)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
	at org.apache.spark.scheduler.Task.run(Task.scala:108)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	at java.lang.Thread.run(Thread.java:745)
Caused by: java.util.NoSuchElementException
	at org.apache.spark.util.collection.PrimitiveVector$$anon$1.next(PrimitiveVector.scala:58)
	at org.apache.spark.storage.memory.PartiallyUnrolledIterator.next(MemoryStore.scala:700)
	at org.apache.spark.util.CompletionIterator.next(CompletionIterator.scala:30)
	at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$readBroadcastBlock$1$$anonfun$2.apply(TorrentBroadcast.scala:210)
	at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$readBroadcastBlock$1$$anonfun$2.apply(TorrentBroadcast.scala:210)
	at scala.Option.map(Option.scala:146)
	at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$readBroadcastBlock$1.apply(TorrentBroadcast.scala:210)
	at org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1269)
	... 12 more
```

## How was this patch tested?

Add unit test

Author: Yuming Wang <wgyumg@gmail.com>

Closes #16252 from wangyum/SPARK-18827.

(cherry picked from commit 1e5c51f336b90cd1eed43e9c6cf00faee696174c)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../apache/spark/broadcast/TorrentBroadcast.scala  | 14 +++++++++-----
 .../apache/spark/storage/memory/MemoryStore.scala  |  2 +-
 .../apache/spark/broadcast/BroadcastSuite.scala    | 12 ++++++++++++
 3 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
index f350784378795..22d01c47e645d 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
@@ -207,11 +207,15 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
     TorrentBroadcast.synchronized {
       setConf(SparkEnv.get.conf)
       val blockManager = SparkEnv.get.blockManager
-      blockManager.getLocalValues(broadcastId).map(_.data.next()) match {
-        case Some(x) =>
-          releaseLock(broadcastId)
-          x.asInstanceOf[T]
-
+      blockManager.getLocalValues(broadcastId) match {
+        case Some(blockResult) =>
+          if (blockResult.data.hasNext) {
+            val x = blockResult.data.next().asInstanceOf[T]
+            releaseLock(broadcastId)
+            x
+          } else {
+            throw new SparkException(s"Failed to get locally stored broadcast data: $broadcastId")
+          }
         case None =>
           logInfo("Started reading broadcast variable " + id)
           val startTimeMs = System.currentTimeMillis()
diff --git a/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala b/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala
index fff21218b1769..929a0808bd234 100644
--- a/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala
@@ -694,7 +694,7 @@ private[storage] class PartiallyUnrolledIterator[T](
   }
 
   override def next(): T = {
-    if (unrolled == null) {
+    if (unrolled == null || !unrolled.hasNext) {
       rest.next()
     } else {
       unrolled.next()
diff --git a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
index 973676398ae54..6646068d5080b 100644
--- a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
+++ b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
@@ -137,6 +137,18 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext {
     sc.stop()
   }
 
+  test("Cache broadcast to disk") {
+    val conf = new SparkConf()
+      .setMaster("local")
+      .setAppName("test")
+      .set("spark.memory.useLegacyMode", "true")
+      .set("spark.storage.memoryFraction", "0.0")
+    sc = new SparkContext(conf)
+    val list = List[Int](1, 2, 3, 4)
+    val broadcast = sc.broadcast(list)
+    assert(broadcast.value.sum === 10)
+  }
+
   /**
    * Verify the persistence of state associated with a TorrentBroadcast in a local-cluster.
    *

From 3080f995c690b34d131b428b6d63044ebc1f60eb Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Mon, 19 Dec 2016 11:50:56 +0800
Subject: [PATCH 0385/1204] [SPARK-18703][SPARK-18675][SQL][BACKPORT-2.1] CTAS
 for hive serde table should work for all hive versions AND Drop Staging
 Directories and Data Files

### What changes were proposed in this pull request?

This PR is to backport https://github.com/apache/spark/pull/16104 and https://github.com/apache/spark/pull/16134.

----------
[[SPARK-18675][SQL] CTAS for hive serde table should work for all hive versions](https://github.com/apache/spark/pull/16104)

Before hive 1.1, when inserting into a table, hive will create the staging directory under a common scratch directory. After the writing is finished, hive will simply empty the table directory and move the staging directory to it.

After hive 1.1, hive will create the staging directory under the table directory, and when moving staging directory to table directory, hive will still empty the table directory, but will exclude the staging directory there.

In `InsertIntoHiveTable`, we simply copy the code from hive 1.2, which means we will always create the staging directory under the table directory, no matter what the hive version is. This causes problems if the hive version is prior to 1.1, because the staging directory will be removed by hive when hive is trying to empty the table directory.

This PR copies the code from hive 0.13, so that we have 2 branches to create staging directory. If hive version is prior to 1.1, we'll go to the old style branch(i.e. create the staging directory under a common scratch directory), else, go to the new style branch(i.e. create the staging directory under the table directory)

----------
[[SPARK-18703] [SQL] Drop Staging Directories and Data Files After each Insertion/CTAS of Hive serde Tables](https://github.com/apache/spark/pull/16134)

Below are the files/directories generated for three inserts againsts a Hive table:
```
/private/var/folders/4b/sgmfldk15js406vk7lw5llzw0000gn/T/spark-41eaa5ce-0288-471e-bba1-09cc482813ff/.hive-staging_hive_2016-12-03_20-56-29_149_4298858301766472202-1
/private/var/folders/4b/sgmfldk15js406vk7lw5llzw0000gn/T/spark-41eaa5ce-0288-471e-bba1-09cc482813ff/.hive-staging_hive_2016-12-03_20-56-29_149_4298858301766472202-1/-ext-10000
/private/var/folders/4b/sgmfldk15js406vk7lw5llzw0000gn/T/spark-41eaa5ce-0288-471e-bba1-09cc482813ff/.hive-staging_hive_2016-12-03_20-56-29_149_4298858301766472202-1/-ext-10000/._SUCCESS.crc
/private/var/folders/4b/sgmfldk15js406vk7lw5llzw0000gn/T/spark-41eaa5ce-0288-471e-bba1-09cc482813ff/.hive-staging_hive_2016-12-03_20-56-29_149_4298858301766472202-1/-ext-10000/.part-00000.crc
/private/var/folders/4b/sgmfldk15js406vk7lw5llzw0000gn/T/spark-41eaa5ce-0288-471e-bba1-09cc482813ff/.hive-staging_hive_2016-12-03_20-56-29_149_4298858301766472202-1/-ext-10000/_SUCCESS
/private/var/folders/4b/sgmfldk15js406vk7lw5llzw0000gn/T/spark-41eaa5ce-0288-471e-bba1-09cc482813ff/.hive-staging_hive_2016-12-03_20-56-29_149_4298858301766472202-1/-ext-10000/part-00000
/private/var/folders/4b/sgmfldk15js406vk7lw5llzw0000gn/T/spark-41eaa5ce-0288-471e-bba1-09cc482813ff/.hive-staging_hive_2016-12-03_20-56-30_454_6445008511655931341-1
/private/var/folders/4b/sgmfldk15js406vk7lw5llzw0000gn/T/spark-41eaa5ce-0288-471e-bba1-09cc482813ff/.hive-staging_hive_2016-12-03_20-56-30_454_6445008511655931341-1/-ext-10000
/private/var/folders/4b/sgmfldk15js406vk7lw5llzw0000gn/T/spark-41eaa5ce-0288-471e-bba1-09cc482813ff/.hive-staging_hive_2016-12-03_20-56-30_454_6445008511655931341-1/-ext-10000/._SUCCESS.crc
/private/var/folders/4b/sgmfldk15js406vk7lw5llzw0000gn/T/spark-41eaa5ce-0288-471e-bba1-09cc482813ff/.hive-staging_hive_2016-12-03_20-56-30_454_6445008511655931341-1/-ext-10000/.part-00000.crc
/private/var/folders/4b/sgmfldk15js406vk7lw5llzw0000gn/T/spark-41eaa5ce-0288-471e-bba1-09cc482813ff/.hive-staging_hive_2016-12-03_20-56-30_454_6445008511655931341-1/-ext-10000/_SUCCESS
/private/var/folders/4b/sgmfldk15js406vk7lw5llzw0000gn/T/spark-41eaa5ce-0288-471e-bba1-09cc482813ff/.hive-staging_hive_2016-12-03_20-56-30_454_6445008511655931341-1/-ext-10000/part-00000
/private/var/folders/4b/sgmfldk15js406vk7lw5llzw0000gn/T/spark-41eaa5ce-0288-471e-bba1-09cc482813ff/.hive-staging_hive_2016-12-03_20-56-30_722_3388423608658711001-1
/private/var/folders/4b/sgmfldk15js406vk7lw5llzw0000gn/T/spark-41eaa5ce-0288-471e-bba1-09cc482813ff/.hive-staging_hive_2016-12-03_20-56-30_722_3388423608658711001-1/-ext-10000
/private/var/folders/4b/sgmfldk15js406vk7lw5llzw0000gn/T/spark-41eaa5ce-0288-471e-bba1-09cc482813ff/.hive-staging_hive_2016-12-03_20-56-30_722_3388423608658711001-1/-ext-10000/._SUCCESS.crc
/private/var/folders/4b/sgmfldk15js406vk7lw5llzw0000gn/T/spark-41eaa5ce-0288-471e-bba1-09cc482813ff/.hive-staging_hive_2016-12-03_20-56-30_722_3388423608658711001-1/-ext-10000/.part-00000.crc
/private/var/folders/4b/sgmfldk15js406vk7lw5llzw0000gn/T/spark-41eaa5ce-0288-471e-bba1-09cc482813ff/.hive-staging_hive_2016-12-03_20-56-30_722_3388423608658711001-1/-ext-10000/_SUCCESS
/private/var/folders/4b/sgmfldk15js406vk7lw5llzw0000gn/T/spark-41eaa5ce-0288-471e-bba1-09cc482813ff/.hive-staging_hive_2016-12-03_20-56-30_722_3388423608658711001-1/-ext-10000/part-00000
/private/var/folders/4b/sgmfldk15js406vk7lw5llzw0000gn/T/spark-41eaa5ce-0288-471e-bba1-09cc482813ff/.part-00000.crc
/private/var/folders/4b/sgmfldk15js406vk7lw5llzw0000gn/T/spark-41eaa5ce-0288-471e-bba1-09cc482813ff/part-00000
```

The first 18 files are temporary. We do not drop it until the end of JVM termination. If JVM does not appropriately terminate, these temporary files/directories will not be dropped.

Only the last two files are needed, as shown below.
```
/private/var/folders/4b/sgmfldk15js406vk7lw5llzw0000gn/T/spark-41eaa5ce-0288-471e-bba1-09cc482813ff/.part-00000.crc
/private/var/folders/4b/sgmfldk15js406vk7lw5llzw0000gn/T/spark-41eaa5ce-0288-471e-bba1-09cc482813ff/part-00000
```
The temporary files/directories could accumulate a lot when we issue many inserts, since each insert generats at least six files. This could eat a lot of spaces and slow down the JVM termination. When the JVM does not terminates approprately, the files might not be dropped.

This PR is to drop the created staging files and temporary data files after each insert/CTAS.

### How was this patch tested?
Added test cases.

Author: gatorsmile <gatorsmile@gmail.com>

Closes #16325 from gatorsmile/backport-18703&18675.
---
 .../hive/execution/InsertIntoHiveTable.scala  | 82 ++++++++++++++++---
 .../spark/sql/hive/client/VersionsSuite.scala | 43 +++++++++-
 2 files changed, 112 insertions(+), 13 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index 5f5c8e2432d6c..09d1abfa8c7a2 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -22,7 +22,8 @@ import java.net.URI
 import java.text.SimpleDateFormat
 import java.util.{Date, Locale, Random}
 
-import org.apache.hadoop.conf.Configuration
+import scala.util.control.NonFatal
+
 import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hadoop.hive.common.FileUtils
 import org.apache.hadoop.hive.ql.exec.TaskRunner
@@ -85,7 +86,9 @@ case class InsertIntoHiveTable(
   def output: Seq[Attribute] = Seq.empty
 
   val hadoopConf = sessionState.newHadoopConf()
+  var createdTempDir: Option[Path] = None
   val stagingDir = hadoopConf.get("hive.exec.stagingdir", ".hive-staging")
+  val scratchDir = hadoopConf.get("hive.exec.scratchdir", "/tmp/hive")
 
   private def executionId: String = {
     val rand: Random = new Random
@@ -93,7 +96,7 @@ case class InsertIntoHiveTable(
     "hive_" + format.format(new Date) + "_" + Math.abs(rand.nextLong)
   }
 
-  private def getStagingDir(inputPath: Path, hadoopConf: Configuration): Path = {
+  private def getStagingDir(inputPath: Path): Path = {
     val inputPathUri: URI = inputPath.toUri
     val inputPathName: String = inputPathUri.getPath
     val fs: FileSystem = inputPath.getFileSystem(hadoopConf)
@@ -111,31 +114,79 @@ case class InsertIntoHiveTable(
       if (!FileUtils.mkdir(fs, dir, true, hadoopConf)) {
         throw new IllegalStateException("Cannot create staging directory  '" + dir.toString + "'")
       }
+      createdTempDir = Some(dir)
       fs.deleteOnExit(dir)
     } catch {
       case e: IOException =>
         throw new RuntimeException(
           "Cannot create staging directory '" + dir.toString + "': " + e.getMessage, e)
-
     }
     return dir
   }
 
-  private def getExternalScratchDir(extURI: URI, hadoopConf: Configuration): Path = {
-    getStagingDir(new Path(extURI.getScheme, extURI.getAuthority, extURI.getPath), hadoopConf)
+  private def getExternalScratchDir(extURI: URI): Path = {
+    getStagingDir(new Path(extURI.getScheme, extURI.getAuthority, extURI.getPath))
+  }
+
+  def getExternalTmpPath(path: Path): Path = {
+    import org.apache.spark.sql.hive.client.hive._
+
+    val hiveVersion = externalCatalog.asInstanceOf[HiveExternalCatalog].client.version
+    // Before Hive 1.1, when inserting into a table, Hive will create the staging directory under
+    // a common scratch directory. After the writing is finished, Hive will simply empty the table
+    // directory and move the staging directory to it.
+    // After Hive 1.1, Hive will create the staging directory under the table directory, and when
+    // moving staging directory to table directory, Hive will still empty the table directory, but
+    // will exclude the staging directory there.
+    // We have to follow the Hive behavior here, to avoid troubles. For example, if we create
+    // staging directory under the table director for Hive prior to 1.1, the staging directory will
+    // be removed by Hive when Hive is trying to empty the table directory.
+    if (hiveVersion == v12 || hiveVersion == v13 || hiveVersion == v14 || hiveVersion == v1_0) {
+      oldVersionExternalTempPath(path)
+    } else if (hiveVersion == v1_1 || hiveVersion == v1_2) {
+      newVersionExternalTempPath(path)
+    } else {
+      throw new IllegalStateException("Unsupported hive version: " + hiveVersion.fullVersion)
+    }
+  }
+
+  // Mostly copied from Context.java#getExternalTmpPath of Hive 0.13
+  def oldVersionExternalTempPath(path: Path): Path = {
+    val extURI: URI = path.toUri
+    val scratchPath = new Path(scratchDir, executionId)
+    var dirPath = new Path(
+      extURI.getScheme,
+      extURI.getAuthority,
+      scratchPath.toUri.getPath + "-" + TaskRunner.getTaskRunnerID())
+
+    try {
+      val fs: FileSystem = dirPath.getFileSystem(hadoopConf)
+      dirPath = new Path(fs.makeQualified(dirPath).toString())
+
+      if (!FileUtils.mkdir(fs, dirPath, true, hadoopConf)) {
+        throw new IllegalStateException("Cannot create staging directory: " + dirPath.toString)
+      }
+      createdTempDir = Some(dirPath)
+      fs.deleteOnExit(dirPath)
+    } catch {
+      case e: IOException =>
+        throw new RuntimeException("Cannot create staging directory: " + dirPath.toString, e)
+    }
+    dirPath
   }
 
-  def getExternalTmpPath(path: Path, hadoopConf: Configuration): Path = {
+  // Mostly copied from Context.java#getExternalTmpPath of Hive 1.2
+  def newVersionExternalTempPath(path: Path): Path = {
     val extURI: URI = path.toUri
     if (extURI.getScheme == "viewfs") {
-      getExtTmpPathRelTo(path.getParent, hadoopConf)
+      getExtTmpPathRelTo(path.getParent)
     } else {
-      new Path(getExternalScratchDir(extURI, hadoopConf), "-ext-10000")
+      new Path(getExternalScratchDir(extURI), "-ext-10000")
     }
   }
 
-  def getExtTmpPathRelTo(path: Path, hadoopConf: Configuration): Path = {
-    new Path(getStagingDir(path, hadoopConf), "-ext-10000") // Hive uses 10000
+  def getExtTmpPathRelTo(path: Path): Path = {
+    new Path(getStagingDir(path), "-ext-10000") // Hive uses 10000
   }
 
   private def saveAsHiveFile(
@@ -172,7 +223,7 @@ case class InsertIntoHiveTable(
     // instances within the closure, since Serializer is not serializable while TableDesc is.
     val tableDesc = table.tableDesc
     val tableLocation = table.hiveQlTable.getDataLocation
-    val tmpLocation = getExternalTmpPath(tableLocation, hadoopConf)
+    val tmpLocation = getExternalTmpPath(tableLocation)
     val fileSinkConf = new FileSinkDesc(tmpLocation.toString, tableDesc, false)
     val isCompressed = hadoopConf.get("hive.exec.compress.output", "false").toBoolean
 
@@ -328,6 +379,15 @@ case class InsertIntoHiveTable(
         holdDDLTime)
     }
 
+    // Attempt to delete the staging directory and the inclusive files. If failed, the files are
+    // expected to be dropped at the normal termination of VM since deleteOnExit is used.
+    try {
+      createdTempDir.foreach { path => path.getFileSystem(hadoopConf).delete(path, true) }
+    } catch {
+      case NonFatal(e) =>
+        logWarning(s"Unable to delete staging directory: $stagingDir.\n" + e)
+    }
+
     // Invalidate the cache.
     sqlContext.sharedState.cacheManager.invalidateCache(table)
     sqlContext.sessionState.catalog.refreshTable(table.catalogTable.identifier)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
index 79e76b3134c2a..bfec43070a797 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -26,13 +26,15 @@ import org.apache.hadoop.mapred.TextInputFormat
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.{AnalysisException, Row}
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.NoSuchPermanentFunctionException
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Literal}
 import org.apache.spark.sql.catalyst.util.quietly
 import org.apache.spark.sql.hive.HiveUtils
+import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types.IntegerType
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.tags.ExtendedHiveTest
@@ -45,7 +47,7 @@ import org.apache.spark.util.{MutableURLClassLoader, Utils}
  * is not fully tested.
  */
 @ExtendedHiveTest
-class VersionsSuite extends SparkFunSuite with Logging {
+class VersionsSuite extends SparkFunSuite with SQLTestUtils with TestHiveSingleton with Logging {
 
   private val clientBuilder = new HiveClientBuilder
   import clientBuilder.buildClient
@@ -530,5 +532,42 @@ class VersionsSuite extends SparkFunSuite with Logging {
       client.reset()
       assert(client.listTables("default").isEmpty)
     }
+
+    ///////////////////////////////////////////////////////////////////////////
+    // End-To-End tests
+    ///////////////////////////////////////////////////////////////////////////
+
+    test(s"$version: CREATE TABLE AS SELECT") {
+      withTable("tbl") {
+        spark.sql("CREATE TABLE tbl AS SELECT 1 AS a")
+        assert(spark.table("tbl").collect().toSeq == Seq(Row(1)))
+      }
+    }
+
+    test(s"$version: Delete the temporary staging directory and files after each insert") {
+      withTempDir { tmpDir =>
+        withTable("tab") {
+          spark.sql(
+            s"""
+               |CREATE TABLE tab(c1 string)
+               |location '${tmpDir.toURI.toString}'
+             """.stripMargin)
+
+          (1 to 3).map { i =>
+            spark.sql(s"INSERT OVERWRITE TABLE tab SELECT '$i'")
+          }
+          def listFiles(path: File): List[String] = {
+            val dir = path.listFiles()
+            val folders = dir.filter(_.isDirectory).toList
+            val filePaths = dir.map(_.getName).toList
+            folders.flatMap(listFiles) ++: filePaths
+          }
+          val expectedFiles = ".part-00000.crc" :: "part-00000" :: Nil
+          assert(listFiles(tmpDir).sorted == expectedFiles)
+        }
+      }
+    }
+
+    // TODO: add more tests.
   }
 }

From fc1b25660d8d2ac676c0b020208bcb9b711978c8 Mon Sep 17 00:00:00 2001
From: xuanyuanking <xyliyuanjian@gmail.com>
Date: Mon, 19 Dec 2016 20:31:43 +0100
Subject: [PATCH 0386/1204] [SPARK-18700][SQL] Add StripedLock for each table's
 relation in cache

## What changes were proposed in this pull request?

As the scenario describe in [SPARK-18700](https://issues.apache.org/jira/browse/SPARK-18700), when cachedDataSourceTables invalided, the coming few queries will fetch all FileStatus in listLeafFiles function. In the condition of table has many partitions, these jobs will occupy much memory of driver finally may cause driver OOM.

In this patch, add StripedLock for each table's relation in cache not for the whole cachedDataSourceTables, each table's load cache operation protected by it.

## How was this patch tested?

Add a multi-thread access table test in `PartitionedTablePerfStatsSuite` and check it only loading once using metrics in `HiveCatalogMetrics`

Author: xuanyuanking <xyliyuanjian@gmail.com>

Closes #16135 from xuanyuanking/SPARK-18700.

(cherry picked from commit 24482858e05bea84cacb41c62be0a9aaa33897ee)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../spark/sql/hive/HiveMetastoreCatalog.scala | 134 ++++++++++--------
 .../hive/PartitionedTablePerfStatsSuite.scala |  31 ++++
 2 files changed, 106 insertions(+), 59 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index edbde5d10b47c..0407cf6a1edbc 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.hive
 
 import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache}
+import com.google.common.util.concurrent.Striped
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.internal.Logging
@@ -32,7 +33,6 @@ import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat, Pa
 import org.apache.spark.sql.hive.orc.OrcFileFormat
 import org.apache.spark.sql.types._
 
-
 /**
  * Legacy catalog for interacting with the Hive metastore.
  *
@@ -53,6 +53,18 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
       tableIdent.table.toLowerCase)
   }
 
+  /** These locks guard against multiple attempts to instantiate a table, which wastes memory. */
+  private val tableCreationLocks = Striped.lazyWeakLock(100)
+
+  /** Acquires a lock on the table cache for the duration of `f`. */
+  private def withTableCreationLock[A](tableName: QualifiedTableName, f: => A): A = {
+    val lock = tableCreationLocks.get(tableName)
+    lock.lock()
+    try f finally {
+      lock.unlock()
+    }
+  }
+
   /** A cache of Spark SQL data source tables that have been accessed. */
   protected[hive] val cachedDataSourceTables: LoadingCache[QualifiedTableName, LogicalPlan] = {
     val cacheLoader = new CacheLoader[QualifiedTableName, LogicalPlan]() {
@@ -209,72 +221,76 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
         }
       }
 
-      val cached = getCached(
-        tableIdentifier,
-        rootPaths,
-        metastoreRelation,
-        metastoreSchema,
-        fileFormatClass,
-        bucketSpec,
-        Some(partitionSchema))
-
-      val logicalRelation = cached.getOrElse {
-        val sizeInBytes = metastoreRelation.statistics.sizeInBytes.toLong
-        val fileCatalog = {
-          val catalog = new CatalogFileIndex(
-            sparkSession, metastoreRelation.catalogTable, sizeInBytes)
-          if (lazyPruningEnabled) {
-            catalog
-          } else {
-            catalog.filterPartitions(Nil)  // materialize all the partitions in memory
+      withTableCreationLock(tableIdentifier, {
+        val cached = getCached(
+          tableIdentifier,
+          rootPaths,
+          metastoreRelation,
+          metastoreSchema,
+          fileFormatClass,
+          bucketSpec,
+          Some(partitionSchema))
+
+        val logicalRelation = cached.getOrElse {
+          val sizeInBytes = metastoreRelation.statistics.sizeInBytes.toLong
+          val fileCatalog = {
+            val catalog = new CatalogFileIndex(
+              sparkSession, metastoreRelation.catalogTable, sizeInBytes)
+            if (lazyPruningEnabled) {
+              catalog
+            } else {
+              catalog.filterPartitions(Nil)  // materialize all the partitions in memory
+            }
           }
+          val partitionSchemaColumnNames = partitionSchema.map(_.name.toLowerCase).toSet
+          val dataSchema =
+            StructType(metastoreSchema
+              .filterNot(field => partitionSchemaColumnNames.contains(field.name.toLowerCase)))
+
+          val relation = HadoopFsRelation(
+            location = fileCatalog,
+            partitionSchema = partitionSchema,
+            dataSchema = dataSchema,
+            bucketSpec = bucketSpec,
+            fileFormat = defaultSource,
+            options = options)(sparkSession = sparkSession)
+
+          val created = LogicalRelation(relation,
+            catalogTable = Some(metastoreRelation.catalogTable))
+          cachedDataSourceTables.put(tableIdentifier, created)
+          created
         }
-        val partitionSchemaColumnNames = partitionSchema.map(_.name.toLowerCase).toSet
-        val dataSchema =
-          StructType(metastoreSchema
-            .filterNot(field => partitionSchemaColumnNames.contains(field.name.toLowerCase)))
-
-        val relation = HadoopFsRelation(
-          location = fileCatalog,
-          partitionSchema = partitionSchema,
-          dataSchema = dataSchema,
-          bucketSpec = bucketSpec,
-          fileFormat = defaultSource,
-          options = options)(sparkSession = sparkSession)
-
-        val created = LogicalRelation(relation, catalogTable = Some(metastoreRelation.catalogTable))
-        cachedDataSourceTables.put(tableIdentifier, created)
-        created
-      }
 
-      logicalRelation
+        logicalRelation
+      })
     } else {
       val rootPath = metastoreRelation.hiveQlTable.getDataLocation
-
-      val cached = getCached(tableIdentifier,
-        Seq(rootPath),
-        metastoreRelation,
-        metastoreSchema,
-        fileFormatClass,
-        bucketSpec,
-        None)
-      val logicalRelation = cached.getOrElse {
-        val created =
-          LogicalRelation(
-            DataSource(
-              sparkSession = sparkSession,
-              paths = rootPath.toString :: Nil,
-              userSpecifiedSchema = Some(metastoreRelation.schema),
-              bucketSpec = bucketSpec,
-              options = options,
-              className = fileType).resolveRelation(),
+      withTableCreationLock(tableIdentifier, {
+        val cached = getCached(tableIdentifier,
+          Seq(rootPath),
+          metastoreRelation,
+          metastoreSchema,
+          fileFormatClass,
+          bucketSpec,
+          None)
+        val logicalRelation = cached.getOrElse {
+          val created =
+            LogicalRelation(
+              DataSource(
+                sparkSession = sparkSession,
+                paths = rootPath.toString :: Nil,
+                userSpecifiedSchema = Some(metastoreRelation.schema),
+                bucketSpec = bucketSpec,
+                options = options,
+                className = fileType).resolveRelation(),
               catalogTable = Some(metastoreRelation.catalogTable))
 
-        cachedDataSourceTables.put(tableIdentifier, created)
-        created
-      }
+          cachedDataSourceTables.put(tableIdentifier, created)
+          created
+        }
 
-      logicalRelation
+        logicalRelation
+      })
     }
     result.copy(expectedOutputAttributes = Some(metastoreRelation.output))
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
index 65c02d473b79d..55b72c625db41 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.hive
 
 import java.io.File
+import java.util.concurrent.{Executors, TimeUnit}
 
 import org.scalatest.BeforeAndAfterEach
 
@@ -395,4 +396,34 @@ class PartitionedTablePerfStatsSuite
       }
     }
   }
+
+  test("SPARK-18700: table loaded only once even when resolved concurrently") {
+    withSQLConf(SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key -> "false") {
+      withTable("test") {
+        withTempDir { dir =>
+          HiveCatalogMetrics.reset()
+          setupPartitionedHiveTable("test", dir, 50)
+          // select the table in multi-threads
+          val executorPool = Executors.newFixedThreadPool(10)
+          (1 to 10).map(threadId => {
+            val runnable = new Runnable {
+              override def run(): Unit = {
+                spark.sql("select * from test where partCol1 = 999").count()
+              }
+            }
+            executorPool.execute(runnable)
+            None
+          })
+          executorPool.shutdown()
+          executorPool.awaitTermination(30, TimeUnit.SECONDS)
+          // check the cache hit, we use the metric of METRIC_FILES_DISCOVERED and
+          // METRIC_PARALLEL_LISTING_JOB_COUNT to check this, while the lock take effect,
+          // only one thread can really do the build, so the listing job count is 2, the other
+          // one is cache.load func. Also METRIC_FILES_DISCOVERED is $partition_num * 2
+          assert(HiveCatalogMetrics.METRIC_FILES_DISCOVERED.getCount() == 100)
+          assert(HiveCatalogMetrics.METRIC_PARALLEL_LISTING_JOB_COUNT.getCount() == 2)
+        }
+      }
+    }
+  }
 }

From c1a26b458dd353be3ab1a2b3f9bb80809cf63479 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 19 Dec 2016 11:42:59 -0800
Subject: [PATCH 0387/1204] [SPARK-18921][SQL] check database existence with
 Hive.databaseExists instead of getDatabase

## What changes were proposed in this pull request?

It's weird that we use `Hive.getDatabase` to check the existence of a database, while Hive has a `databaseExists` interface.

What's worse, `Hive.getDatabase` will produce an error message if the database doesn't exist, which is annoying when we only want to check the database existence.

This PR fixes this and use `Hive.databaseExists` to check database existence.

## How was this patch tested?

N/A

Author: Wenchen Fan <wenchen@databricks.com>

Closes #16332 from cloud-fan/minor.

(cherry picked from commit 7a75ee1c9224aa5c2e954fe2a71f9ad506f6782b)
Signed-off-by: Yin Huai <yhuai@databricks.com>
---
 .../apache/spark/sql/hive/HiveExternalCatalog.scala |  2 +-
 .../apache/spark/sql/hive/client/HiveClient.scala   |  8 +++-----
 .../spark/sql/hive/client/HiveClientImpl.scala      | 12 ++++++++----
 .../spark/sql/hive/client/VersionsSuite.scala       | 13 +++++++------
 4 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index f67ddc9be1a5a..f321c45e5c518 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -167,7 +167,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
   }
 
   override def databaseExists(db: String): Boolean = withClient {
-    client.getDatabaseOption(db).isDefined
+    client.databaseExists(db)
   }
 
   override def listDatabases(): Seq[String] = withClient {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
index 8e7c871183dfd..0be5b0bedfe77 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
@@ -58,12 +58,10 @@ private[hive] trait HiveClient {
   def setCurrentDatabase(databaseName: String): Unit
 
   /** Returns the metadata for specified database, throwing an exception if it doesn't exist */
-  final def getDatabase(name: String): CatalogDatabase = {
-    getDatabaseOption(name).getOrElse(throw new NoSuchDatabaseException(name))
-  }
+  def getDatabase(name: String): CatalogDatabase
 
-  /** Returns the metadata for a given database, or None if it doesn't exist. */
-  def getDatabaseOption(name: String): Option[CatalogDatabase]
+  /** Return whether a table/view with the specified name exists. */
+  def databaseExists(dbName: String): Boolean
 
   /** List the names of all the databases that match the specified pattern. */
   def listDatabases(pattern: String): Seq[String]
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index db73596e5f520..e0f71560f3302 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -300,7 +300,7 @@ private[hive] class HiveClientImpl(
   }
 
   override def setCurrentDatabase(databaseName: String): Unit = withHiveState {
-    if (getDatabaseOption(databaseName).isDefined) {
+    if (databaseExists(databaseName)) {
       state.setCurrentDatabase(databaseName)
     } else {
       throw new NoSuchDatabaseException(databaseName)
@@ -336,14 +336,18 @@ private[hive] class HiveClientImpl(
         Option(database.properties).map(_.asJava).orNull))
   }
 
-  override def getDatabaseOption(name: String): Option[CatalogDatabase] = withHiveState {
-    Option(client.getDatabase(name)).map { d =>
+  override def getDatabase(dbName: String): CatalogDatabase = withHiveState {
+    Option(client.getDatabase(dbName)).map { d =>
       CatalogDatabase(
         name = d.getName,
         description = d.getDescription,
         locationUri = d.getLocationUri,
         properties = Option(d.getParameters).map(_.asScala.toMap).orNull)
-    }
+    }.getOrElse(throw new NoSuchDatabaseException(dbName))
+  }
+
+  override def databaseExists(dbName: String): Boolean = withHiveState {
+    client.databaseExists(dbName)
   }
 
   override def listDatabases(pattern: String): Seq[String] = withHiveState {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
index bfec43070a797..e706e2eb1f438 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, Row}
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.NoSuchPermanentFunctionException
+import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchPermanentFunctionException}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Literal}
 import org.apache.spark.sql.catalyst.util.quietly
@@ -137,11 +137,12 @@ class VersionsSuite extends SparkFunSuite with SQLTestUtils with TestHiveSinglet
     test(s"$version: getDatabase") {
       // No exception should be thrown
       client.getDatabase("default")
+      intercept[NoSuchDatabaseException](client.getDatabase("nonexist"))
     }
 
-    test(s"$version: getDatabaseOption") {
-      assert(client.getDatabaseOption("default").isDefined)
-      assert(client.getDatabaseOption("nonexist") == None)
+    test(s"$version: databaseExists") {
+      assert(client.databaseExists("default") == true)
+      assert(client.databaseExists("nonexist") == false)
     }
 
     test(s"$version: listDatabases") {
@@ -155,9 +156,9 @@ class VersionsSuite extends SparkFunSuite with SQLTestUtils with TestHiveSinglet
     }
 
     test(s"$version: dropDatabase") {
-      assert(client.getDatabaseOption("temporary").isDefined)
+      assert(client.databaseExists("temporary") == true)
       client.dropDatabase("temporary", ignoreIfNotExists = false, cascade = true)
-      assert(client.getDatabaseOption("temporary").isEmpty)
+      assert(client.databaseExists("temporary") == false)
     }
 
     ///////////////////////////////////////////////////////////////////////////

From f07e989c02844151587f9a29fe77ea65facea422 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Tue, 20 Dec 2016 01:19:38 +0100
Subject: [PATCH 0388/1204] [SPARK-18928] Check TaskContext.isInterrupted() in
 FileScanRDD, JDBCRDD & UnsafeSorter

## What changes were proposed in this pull request?

In order to respond to task cancellation, Spark tasks must periodically check `TaskContext.isInterrupted()`, but this check is missing on a few critical read paths used in Spark SQL, including `FileScanRDD`, `JDBCRDD`, and UnsafeSorter-based sorts. This can cause interrupted / cancelled tasks to continue running and become zombies (as also described in #16189).

This patch aims to fix this problem by adding `TaskContext.isInterrupted()` checks to these paths. Note that I could have used `InterruptibleIterator` to simply wrap a bunch of iterators but in some cases this would have an adverse performance penalty or might not be effective due to certain special uses of Iterators in Spark SQL. Instead, I inlined `InterruptibleIterator`-style logic into existing iterator subclasses.

## How was this patch tested?

Tested manually in `spark-shell` with two different reproductions of non-cancellable tasks, one involving scans of huge files and another involving sort-merge joins that spill to disk. Both causes of zombie tasks are fixed by the changes added here.

Author: Josh Rosen <joshrosen@databricks.com>

Closes #16340 from JoshRosen/sql-task-interruption.

(cherry picked from commit 5857b9ac2d9808d9b89a5b29620b5052e2beebf5)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../collection/unsafe/sort/UnsafeInMemorySorter.java | 11 +++++++++++
 .../unsafe/sort/UnsafeSorterSpillReader.java         | 11 +++++++++++
 .../sql/execution/datasources/FileScanRDD.scala      | 12 ++++++++++--
 .../sql/execution/datasources/jdbc/JDBCRDD.scala     |  5 +++--
 4 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
index 252a35ec6bdf5..5b42843717e90 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
@@ -22,6 +22,8 @@
 
 import org.apache.avro.reflect.Nullable;
 
+import org.apache.spark.TaskContext;
+import org.apache.spark.TaskKilledException;
 import org.apache.spark.memory.MemoryConsumer;
 import org.apache.spark.memory.TaskMemoryManager;
 import org.apache.spark.unsafe.Platform;
@@ -253,6 +255,7 @@ public final class SortedIterator extends UnsafeSorterIterator implements Clonea
     private long keyPrefix;
     private int recordLength;
     private long currentPageNumber;
+    private final TaskContext taskContext = TaskContext.get();
 
     private SortedIterator(int numRecords, int offset) {
       this.numRecords = numRecords;
@@ -283,6 +286,14 @@ public boolean hasNext() {
 
     @Override
     public void loadNext() {
+      // Kill the task in case it has been marked as killed. This logic is from
+      // InterruptibleIterator, but we inline it here instead of wrapping the iterator in order
+      // to avoid performance overhead. This check is added here in `loadNext()` instead of in
+      // `hasNext()` because it's technically possible for the caller to be relying on
+      // `getNumRecords()` instead of `hasNext()` to know when to stop.
+      if (taskContext != null && taskContext.isInterrupted()) {
+        throw new TaskKilledException();
+      }
       // This pointer points to a 4-byte record length, followed by the record's bytes
       final long recordPointer = array.get(offset + position);
       currentPageNumber = TaskMemoryManager.decodePageNumber(recordPointer);
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java
index a658e5eb47b78..b6323c624b7b9 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java
@@ -23,6 +23,8 @@
 import com.google.common.io.Closeables;
 
 import org.apache.spark.SparkEnv;
+import org.apache.spark.TaskContext;
+import org.apache.spark.TaskKilledException;
 import org.apache.spark.io.NioBufferedFileInputStream;
 import org.apache.spark.serializer.SerializerManager;
 import org.apache.spark.storage.BlockId;
@@ -51,6 +53,7 @@ public final class UnsafeSorterSpillReader extends UnsafeSorterIterator implemen
   private byte[] arr = new byte[1024 * 1024];
   private Object baseObject = arr;
   private final long baseOffset = Platform.BYTE_ARRAY_OFFSET;
+  private final TaskContext taskContext = TaskContext.get();
 
   public UnsafeSorterSpillReader(
       SerializerManager serializerManager,
@@ -94,6 +97,14 @@ public boolean hasNext() {
 
   @Override
   public void loadNext() throws IOException {
+    // Kill the task in case it has been marked as killed. This logic is from
+    // InterruptibleIterator, but we inline it here instead of wrapping the iterator in order
+    // to avoid performance overhead. This check is added here in `loadNext()` instead of in
+    // `hasNext()` because it's technically possible for the caller to be relying on
+    // `getNumRecords()` instead of `hasNext()` to know when to stop.
+    if (taskContext != null && taskContext.isInterrupted()) {
+      throw new TaskKilledException();
+    }
     recordLength = din.readInt();
     keyPrefix = din.readLong();
     if (recordLength > arr.length) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
index 69338f7d96615..b926b9207416f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
@@ -21,7 +21,7 @@ import java.io.IOException
 
 import scala.collection.mutable
 
-import org.apache.spark.{Partition => RDDPartition, TaskContext}
+import org.apache.spark.{Partition => RDDPartition, TaskContext, TaskKilledException}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.rdd.{InputFileNameHolder, RDD}
 import org.apache.spark.sql.SparkSession
@@ -99,7 +99,15 @@ class FileScanRDD(
       private[this] var currentFile: PartitionedFile = null
       private[this] var currentIterator: Iterator[Object] = null
 
-      def hasNext: Boolean = (currentIterator != null && currentIterator.hasNext) || nextIterator()
+      def hasNext: Boolean = {
+        // Kill the task in case it has been marked as killed. This logic is from
+        // InterruptibleIterator, but we inline it here instead of wrapping the iterator in order
+        // to avoid performance overhead.
+        if (context.isInterrupted()) {
+          throw new TaskKilledException
+        }
+        (currentIterator != null && currentIterator.hasNext) || nextIterator()
+      }
       def next(): Object = {
         val nextElement = currentIterator.next()
         // TODO: we should have a better separation of row based and batch based scan, so that we
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
index d5b11e7bec0bd..2bdc43254133e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
@@ -23,7 +23,7 @@ import scala.util.control.NonFatal
 
 import org.apache.commons.lang3.StringUtils
 
-import org.apache.spark.{Partition, SparkContext, TaskContext}
+import org.apache.spark.{InterruptibleIterator, Partition, SparkContext, TaskContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
@@ -301,6 +301,7 @@ private[jdbc] class JDBCRDD(
     rs = stmt.executeQuery()
     val rowsIterator = JdbcUtils.resultSetToSparkInternalRows(rs, schema, inputMetrics)
 
-    CompletionIterator[InternalRow, Iterator[InternalRow]](rowsIterator, close())
+    CompletionIterator[InternalRow, Iterator[InternalRow]](
+      new InterruptibleIterator(context, rowsIterator), close())
   }
 }

From 2971ae564cb3e97aa5ecac7f411daed7d54248ad Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Mon, 19 Dec 2016 18:43:59 -0800
Subject: [PATCH 0389/1204] [SPARK-18761][CORE] Introduce "task reaper" to
 oversee task killing in executors

## What changes were proposed in this pull request?

Spark's current task cancellation / task killing mechanism is "best effort" because some tasks may not be interruptible or may not respond to their "killed" flags being set. If a significant fraction of a cluster's task slots are occupied by tasks that have been marked as killed but remain running then this can lead to a situation where new jobs and tasks are starved of resources that are being used by these zombie tasks.

This patch aims to address this problem by adding a "task reaper" mechanism to executors. At a high-level, task killing now launches a new thread which attempts to kill the task and then watches the task and periodically checks whether it has been killed. The TaskReaper will periodically re-attempt to call `TaskRunner.kill()` and will log warnings if the task keeps running. I modified TaskRunner to rename its thread at the start of the task, allowing TaskReaper to take a thread dump and filter it in order to log stacktraces from the exact task thread that we are waiting to finish. If the task has not stopped after a configurable timeout then the TaskReaper will throw an exception to trigger executor JVM death, thereby forcibly freeing any resources consumed by the zombie tasks.

This feature is flagged off by default and is controlled by four new configurations under the `spark.task.reaper.*` namespace. See the updated `configuration.md` doc for details.

## How was this patch tested?

Tested via a new test case in `JobCancellationSuite`, plus manual testing.

Author: Josh Rosen <joshrosen@databricks.com>

Closes #16189 from JoshRosen/cancellation.
---
 .../org/apache/spark/executor/Executor.scala  | 169 +++++++++++++++++-
 .../scala/org/apache/spark/util/Utils.scala   |  56 ++++--
 .../apache/spark/JobCancellationSuite.scala   |  77 ++++++++
 docs/configuration.md                         |  42 +++++
 4 files changed, 316 insertions(+), 28 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index 9501dd9cd8e93..3346f6dd1f975 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -84,6 +84,16 @@ private[spark] class Executor(
   // Start worker thread pool
   private val threadPool = ThreadUtils.newDaemonCachedThreadPool("Executor task launch worker")
   private val executorSource = new ExecutorSource(threadPool, executorId)
+  // Pool used for threads that supervise task killing / cancellation
+  private val taskReaperPool = ThreadUtils.newDaemonCachedThreadPool("Task reaper")
+  // For tasks which are in the process of being killed, this map holds the most recently created
+  // TaskReaper. All accesses to this map should be synchronized on the map itself (this isn't
+  // a ConcurrentHashMap because we use the synchronization for purposes other than simply guarding
+  // the integrity of the map's internal state). The purpose of this map is to prevent the creation
+  // of a separate TaskReaper for every killTask() of a given task. Instead, this map allows us to
+  // track whether an existing TaskReaper fulfills the role of a TaskReaper that we would otherwise
+  // create. The map key is a task id.
+  private val taskReaperForTask: HashMap[Long, TaskReaper] = HashMap[Long, TaskReaper]()
 
   if (!isLocal) {
     env.metricsSystem.registerSource(executorSource)
@@ -93,6 +103,9 @@ private[spark] class Executor(
   // Whether to load classes in user jars before those in Spark jars
   private val userClassPathFirst = conf.getBoolean("spark.executor.userClassPathFirst", false)
 
+  // Whether to monitor killed / interrupted tasks
+  private val taskReaperEnabled = conf.getBoolean("spark.task.reaper.enabled", false)
+
   // Create our ClassLoader
   // do this after SparkEnv creation so can access the SecurityManager
   private val urlClassLoader = createClassLoader()
@@ -148,9 +161,27 @@ private[spark] class Executor(
   }
 
   def killTask(taskId: Long, interruptThread: Boolean): Unit = {
-    val tr = runningTasks.get(taskId)
-    if (tr != null) {
-      tr.kill(interruptThread)
+    val taskRunner = runningTasks.get(taskId)
+    if (taskRunner != null) {
+      if (taskReaperEnabled) {
+        val maybeNewTaskReaper: Option[TaskReaper] = taskReaperForTask.synchronized {
+          val shouldCreateReaper = taskReaperForTask.get(taskId) match {
+            case None => true
+            case Some(existingReaper) => interruptThread && !existingReaper.interruptThread
+          }
+          if (shouldCreateReaper) {
+            val taskReaper = new TaskReaper(taskRunner, interruptThread = interruptThread)
+            taskReaperForTask(taskId) = taskReaper
+            Some(taskReaper)
+          } else {
+            None
+          }
+        }
+        // Execute the TaskReaper from outside of the synchronized block.
+        maybeNewTaskReaper.foreach(taskReaperPool.execute)
+      } else {
+        taskRunner.kill(interruptThread = interruptThread)
+      }
     }
   }
 
@@ -161,12 +192,7 @@ private[spark] class Executor(
    * @param interruptThread whether to interrupt the task thread
    */
   def killAllTasks(interruptThread: Boolean) : Unit = {
-    // kill all the running tasks
-    for (taskRunner <- runningTasks.values().asScala) {
-      if (taskRunner != null) {
-        taskRunner.kill(interruptThread)
-      }
-    }
+    runningTasks.keys().asScala.foreach(t => killTask(t, interruptThread = interruptThread))
   }
 
   def stop(): Unit = {
@@ -192,13 +218,21 @@ private[spark] class Executor(
       serializedTask: ByteBuffer)
     extends Runnable {
 
+    val threadName = s"Executor task launch worker for task $taskId"
+
     /** Whether this task has been killed. */
     @volatile private var killed = false
 
+    @volatile private var threadId: Long = -1
+
+    def getThreadId: Long = threadId
+
     /** Whether this task has been finished. */
     @GuardedBy("TaskRunner.this")
     private var finished = false
 
+    def isFinished: Boolean = synchronized { finished }
+
     /** How much the JVM process has spent in GC when the task starts to run. */
     @volatile var startGCTime: Long = _
 
@@ -229,9 +263,15 @@ private[spark] class Executor(
       // ClosedByInterruptException during execBackend.statusUpdate which causes
       // Executor to crash
       Thread.interrupted()
+      // Notify any waiting TaskReapers. Generally there will only be one reaper per task but there
+      // is a rare corner-case where one task can have two reapers in case cancel(interrupt=False)
+      // is followed by cancel(interrupt=True). Thus we use notifyAll() to avoid a lost wakeup:
+      notifyAll()
     }
 
     override def run(): Unit = {
+      threadId = Thread.currentThread.getId
+      Thread.currentThread.setName(threadName)
       val threadMXBean = ManagementFactory.getThreadMXBean
       val taskMemoryManager = new TaskMemoryManager(env.memoryManager, taskId)
       val deserializeStartTime = System.currentTimeMillis()
@@ -431,6 +471,117 @@ private[spark] class Executor(
     }
   }
 
+  /**
+   * Supervises the killing / cancellation of a task by sending the interrupted flag, optionally
+   * sending a Thread.interrupt(), and monitoring the task until it finishes.
+   *
+   * Spark's current task cancellation / task killing mechanism is "best effort" because some tasks
+   * may not be interruptable or may not respond to their "killed" flags being set. If a significant
+   * fraction of a cluster's task slots are occupied by tasks that have been marked as killed but
+   * remain running then this can lead to a situation where new jobs and tasks are starved of
+   * resources that are being used by these zombie tasks.
+   *
+   * The TaskReaper was introduced in SPARK-18761 as a mechanism to monitor and clean up zombie
+   * tasks. For backwards-compatibility / backportability this component is disabled by default
+   * and must be explicitly enabled by setting `spark.task.reaper.enabled=true`.
+   *
+   * A TaskReaper is created for a particular task when that task is killed / cancelled. Typically
+   * a task will have only one TaskReaper, but it's possible for a task to have up to two reapers
+   * in case kill is called twice with different values for the `interrupt` parameter.
+   *
+   * Once created, a TaskReaper will run until its supervised task has finished running. If the
+   * TaskReaper has not been configured to kill the JVM after a timeout (i.e. if
+   * `spark.task.reaper.killTimeout < 0`) then this implies that the TaskReaper may run indefinitely
+   * if the supervised task never exits.
+   */
+  private class TaskReaper(
+      taskRunner: TaskRunner,
+      val interruptThread: Boolean)
+    extends Runnable {
+
+    private[this] val taskId: Long = taskRunner.taskId
+
+    private[this] val killPollingIntervalMs: Long =
+      conf.getTimeAsMs("spark.task.reaper.pollingInterval", "10s")
+
+    private[this] val killTimeoutMs: Long = conf.getTimeAsMs("spark.task.reaper.killTimeout", "-1")
+
+    private[this] val takeThreadDump: Boolean =
+      conf.getBoolean("spark.task.reaper.threadDump", true)
+
+    override def run(): Unit = {
+      val startTimeMs = System.currentTimeMillis()
+      def elapsedTimeMs = System.currentTimeMillis() - startTimeMs
+      def timeoutExceeded(): Boolean = killTimeoutMs > 0 && elapsedTimeMs > killTimeoutMs
+      try {
+        // Only attempt to kill the task once. If interruptThread = false then a second kill
+        // attempt would be a no-op and if interruptThread = true then it may not be safe or
+        // effective to interrupt multiple times:
+        taskRunner.kill(interruptThread = interruptThread)
+        // Monitor the killed task until it exits. The synchronization logic here is complicated
+        // because we don't want to synchronize on the taskRunner while possibly taking a thread
+        // dump, but we also need to be careful to avoid races between checking whether the task
+        // has finished and wait()ing for it to finish.
+        var finished: Boolean = false
+        while (!finished && !timeoutExceeded()) {
+          taskRunner.synchronized {
+            // We need to synchronize on the TaskRunner while checking whether the task has
+            // finished in order to avoid a race where the task is marked as finished right after
+            // we check and before we call wait().
+            if (taskRunner.isFinished) {
+              finished = true
+            } else {
+              taskRunner.wait(killPollingIntervalMs)
+            }
+          }
+          if (taskRunner.isFinished) {
+            finished = true
+          } else {
+            logWarning(s"Killed task $taskId is still running after $elapsedTimeMs ms")
+            if (takeThreadDump) {
+              try {
+                Utils.getThreadDumpForThread(taskRunner.getThreadId).foreach { thread =>
+                  if (thread.threadName == taskRunner.threadName) {
+                    logWarning(s"Thread dump from task $taskId:\n${thread.stackTrace}")
+                  }
+                }
+              } catch {
+                case NonFatal(e) =>
+                  logWarning("Exception thrown while obtaining thread dump: ", e)
+              }
+            }
+          }
+        }
+
+        if (!taskRunner.isFinished && timeoutExceeded()) {
+          if (isLocal) {
+            logError(s"Killed task $taskId could not be stopped within $killTimeoutMs ms; " +
+              "not killing JVM because we are running in local mode.")
+          } else {
+            // In non-local-mode, the exception thrown here will bubble up to the uncaught exception
+            // handler and cause the executor JVM to exit.
+            throw new SparkException(
+              s"Killing executor JVM because killed task $taskId could not be stopped within " +
+                s"$killTimeoutMs ms.")
+          }
+        }
+      } finally {
+        // Clean up entries in the taskReaperForTask map.
+        taskReaperForTask.synchronized {
+          taskReaperForTask.get(taskId).foreach { taskReaperInMap =>
+            if (taskReaperInMap eq this) {
+              taskReaperForTask.remove(taskId)
+            } else {
+              // This must have been a TaskReaper where interruptThread == false where a subsequent
+              // killTask() call for the same task had interruptThread == true and overwrote the
+              // map entry.
+            }
+          }
+        }
+      }
+    }
+  }
+
   /**
    * Create a ClassLoader for use in tasks, adding any JARs specified by the user or any classes
    * created by the interpreter to the search path
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 071515134503f..1319a4ce26f56 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.util
 
 import java.io._
-import java.lang.management.{LockInfo, ManagementFactory, MonitorInfo}
+import java.lang.management.{LockInfo, ManagementFactory, MonitorInfo, ThreadInfo}
 import java.net._
 import java.nio.ByteBuffer
 import java.nio.channels.Channels
@@ -2117,28 +2117,46 @@ private[spark] object Utils extends Logging {
     // We need to filter out null values here because dumpAllThreads() may return null array
     // elements for threads that are dead / don't exist.
     val threadInfos = ManagementFactory.getThreadMXBean.dumpAllThreads(true, true).filter(_ != null)
-    threadInfos.sortBy(_.getThreadId).map { case threadInfo =>
-      val monitors = threadInfo.getLockedMonitors.map(m => m.getLockedStackFrame -> m).toMap
-      val stackTrace = threadInfo.getStackTrace.map { frame =>
-        monitors.get(frame) match {
-          case Some(monitor) =>
-            monitor.getLockedStackFrame.toString + s" => holding ${monitor.lockString}"
-          case None =>
-            frame.toString
-        }
-      }.mkString("\n")
-
-      // use a set to dedup re-entrant locks that are held at multiple places
-      val heldLocks = (threadInfo.getLockedSynchronizers.map(_.lockString)
-          ++ threadInfo.getLockedMonitors.map(_.lockString)
-        ).toSet
+    threadInfos.sortBy(_.getThreadId).map(threadInfoToThreadStackTrace)
+  }
 
-      ThreadStackTrace(threadInfo.getThreadId, threadInfo.getThreadName, threadInfo.getThreadState,
-        stackTrace, if (threadInfo.getLockOwnerId < 0) None else Some(threadInfo.getLockOwnerId),
-        Option(threadInfo.getLockInfo).map(_.lockString).getOrElse(""), heldLocks.toSeq)
+  def getThreadDumpForThread(threadId: Long): Option[ThreadStackTrace] = {
+    if (threadId <= 0) {
+      None
+    } else {
+      // The Int.MaxValue here requests the entire untruncated stack trace of the thread:
+      val threadInfo =
+        Option(ManagementFactory.getThreadMXBean.getThreadInfo(threadId, Int.MaxValue))
+      threadInfo.map(threadInfoToThreadStackTrace)
     }
   }
 
+  private def threadInfoToThreadStackTrace(threadInfo: ThreadInfo): ThreadStackTrace = {
+    val monitors = threadInfo.getLockedMonitors.map(m => m.getLockedStackFrame -> m).toMap
+    val stackTrace = threadInfo.getStackTrace.map { frame =>
+      monitors.get(frame) match {
+        case Some(monitor) =>
+          monitor.getLockedStackFrame.toString + s" => holding ${monitor.lockString}"
+        case None =>
+          frame.toString
+      }
+    }.mkString("\n")
+
+    // use a set to dedup re-entrant locks that are held at multiple places
+    val heldLocks =
+      (threadInfo.getLockedSynchronizers ++ threadInfo.getLockedMonitors).map(_.lockString).toSet
+
+    ThreadStackTrace(
+      threadId = threadInfo.getThreadId,
+      threadName = threadInfo.getThreadName,
+      threadState = threadInfo.getThreadState,
+      stackTrace = stackTrace,
+      blockedByThreadId =
+        if (threadInfo.getLockOwnerId < 0) None else Some(threadInfo.getLockOwnerId),
+      blockedByLock = Option(threadInfo.getLockInfo).map(_.lockString).getOrElse(""),
+      holdingLocks = heldLocks.toSeq)
+  }
+
   /**
    * Convert all spark properties set in the given SparkConf to a sequence of java options.
    */
diff --git a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
index a3490fc79e458..99150a1430d95 100644
--- a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
@@ -209,6 +209,83 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
     assert(jobB.get() === 100)
   }
 
+  test("task reaper kills JVM if killed tasks keep running for too long") {
+    val conf = new SparkConf()
+      .set("spark.task.reaper.enabled", "true")
+      .set("spark.task.reaper.killTimeout", "5s")
+    sc = new SparkContext("local-cluster[2,1,1024]", "test", conf)
+
+    // Add a listener to release the semaphore once any tasks are launched.
+    val sem = new Semaphore(0)
+    sc.addSparkListener(new SparkListener {
+      override def onTaskStart(taskStart: SparkListenerTaskStart) {
+        sem.release()
+      }
+    })
+
+    // jobA is the one to be cancelled.
+    val jobA = Future {
+      sc.setJobGroup("jobA", "this is a job to be cancelled", interruptOnCancel = true)
+      sc.parallelize(1 to 10000, 2).map { i =>
+        while (true) { }
+      }.count()
+    }
+
+    // Block until both tasks of job A have started and cancel job A.
+    sem.acquire(2)
+    // Small delay to ensure tasks actually start executing the task body
+    Thread.sleep(1000)
+
+    sc.clearJobGroup()
+    val jobB = sc.parallelize(1 to 100, 2).countAsync()
+    sc.cancelJobGroup("jobA")
+    val e = intercept[SparkException] { ThreadUtils.awaitResult(jobA, 15.seconds) }.getCause
+    assert(e.getMessage contains "cancel")
+
+    // Once A is cancelled, job B should finish fairly quickly.
+    assert(ThreadUtils.awaitResult(jobB, 60.seconds) === 100)
+  }
+
+  test("task reaper will not kill JVM if spark.task.killTimeout == -1") {
+    val conf = new SparkConf()
+      .set("spark.task.reaper.enabled", "true")
+      .set("spark.task.reaper.killTimeout", "-1")
+      .set("spark.task.reaper.PollingInterval", "1s")
+      .set("spark.deploy.maxExecutorRetries", "1")
+    sc = new SparkContext("local-cluster[2,1,1024]", "test", conf)
+
+    // Add a listener to release the semaphore once any tasks are launched.
+    val sem = new Semaphore(0)
+    sc.addSparkListener(new SparkListener {
+      override def onTaskStart(taskStart: SparkListenerTaskStart) {
+        sem.release()
+      }
+    })
+
+    // jobA is the one to be cancelled.
+    val jobA = Future {
+      sc.setJobGroup("jobA", "this is a job to be cancelled", interruptOnCancel = true)
+      sc.parallelize(1 to 2, 2).map { i =>
+        val startTime = System.currentTimeMillis()
+        while (System.currentTimeMillis() < startTime + 10000) { }
+      }.count()
+    }
+
+    // Block until both tasks of job A have started and cancel job A.
+    sem.acquire(2)
+    // Small delay to ensure tasks actually start executing the task body
+    Thread.sleep(1000)
+
+    sc.clearJobGroup()
+    val jobB = sc.parallelize(1 to 100, 2).countAsync()
+    sc.cancelJobGroup("jobA")
+    val e = intercept[SparkException] { ThreadUtils.awaitResult(jobA, 15.seconds) }.getCause
+    assert(e.getMessage contains "cancel")
+
+    // Once A is cancelled, job B should finish fairly quickly.
+    assert(ThreadUtils.awaitResult(jobB, 60.seconds) === 100)
+  }
+
   test("two jobs sharing the same stage") {
     // sem1: make sure cancel is issued after some tasks are launched
     // twoJobsSharingStageSemaphore:
diff --git a/docs/configuration.md b/docs/configuration.md
index e33af3abc09d4..9c325b653e52d 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1366,6 +1366,48 @@ Apart from these, the following properties are also available, and may be useful
     Should be greater than or equal to 1. Number of allowed retries = this value - 1.
   </td>
 </tr>
+<tr>
+  <td><code>spark.task.reaper.enabled</code></td>
+  <td>false</td>
+  <td>
+    Enables monitoring of killed / interrupted tasks. When set to true, any task which is killed
+    will be monitored by the executor until that task actually finishes executing. See the other
+    <code>spark.task.reaper.*</code> configurations for details on how to control the exact behavior
+    of this monitoring</code>. When set to false (the default), task killing will use an older code
+    path which lacks such monitoring.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.task.reaper.pollingInterval</code></td>
+  <td>10s</td>
+  <td>
+    When <code>spark.task.reaper.enabled = true</code>, this setting controls the frequency at which
+    executors will poll the status of killed tasks. If a killed task is still running when polled
+    then a warning will be logged and, by default, a thread-dump of the task will be logged
+    (this thread dump can be disabled via the <code>spark.task.reaper.threadDump</code> setting,
+    which is documented below).
+  </td>
+</tr>
+<tr>
+  <td><code>spark.task.reaper.threadDump</code></td>
+  <td>true</td>
+  <td>
+    When <code>spark.task.reaper.enabled = true</code>, this setting controls whether task thread
+    dumps are logged during periodic polling of killed tasks. Set this to false to disable
+    collection of thread dumps.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.task.reaper.killTimeout</code></td>
+  <td>-1</td>
+  <td>
+    When <code>spark.task.reaper.enabled = true</code>, this setting specifies a timeout after
+    which the executor JVM will kill itself if a killed task has not stopped running. The default
+    value, -1, disables this mechanism and prevents the executor from self-destructing. The purpose
+    of this setting is to act as a safety-net to prevent runaway uncancellable tasks from rendering
+    an executor unusable.
+  </td>
+</tr>
 </table>
 
 #### Dynamic Allocation

From cd297c390daedbfcaea8431dec4a37ca39dd26e3 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Tue, 20 Dec 2016 13:12:16 -0800
Subject: [PATCH 0390/1204] [SPARK-18281] [SQL] [PYSPARK] Remove timeout for
 reading data through socket for local iterator

## What changes were proposed in this pull request?

There is a timeout failure when using `rdd.toLocalIterator()` or `df.toLocalIterator()` for a PySpark RDD and DataFrame:

    df = spark.createDataFrame([[1],[2],[3]])
    it = df.toLocalIterator()
    row = next(it)

    df2 = df.repartition(1000)  # create many empty partitions which increase materialization time so causing timeout
    it2 = df2.toLocalIterator()
    row = next(it2)

The cause of this issue is, we open a socket to serve the data from JVM side. We set timeout for connection and reading through the socket in Python side. In Python we use a generator to read the data, so we only begin to connect the socket once we start to ask data from it. If we don't consume it immediately, there is connection timeout.

In the other side, the materialization time for RDD partitions is unpredictable. So we can't set a timeout for reading data through the socket. Otherwise, it is very possibly to fail.

## How was this patch tested?

Added tests into PySpark.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #16263 from viirya/fix-pyspark-localiterator.

(cherry picked from commit 95c95b71ed31b2971475aec6d7776dc234845d0a)
Signed-off-by: Davies Liu <davies.liu@gmail.com>
---
 python/pyspark/rdd.py   | 11 +++++------
 python/pyspark/tests.py | 12 ++++++++++++
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 9e05da89af082..b384b2b507332 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -135,12 +135,11 @@ def _load_from_socket(port, serializer):
         break
     if not sock:
         raise Exception("could not open socket")
-    try:
-        rf = sock.makefile("rb", 65536)
-        for item in serializer.load_stream(rf):
-            yield item
-    finally:
-        sock.close()
+    # The RDD materialization time is unpredicable, if we set a timeout for socket reading
+    # operation, it will very possibly fail. See SPARK-18281.
+    sock.settimeout(None)
+    # The socket will be automatically closed when garbage-collected.
+    return serializer.load_stream(sock.makefile("rb", 65536))
 
 
 def ignore_unicode_prefix(f):
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index 89fce8ab25baf..fe314c54a1b18 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -502,6 +502,18 @@ def test_sum(self):
         self.assertEqual(0, self.sc.emptyRDD().sum())
         self.assertEqual(6, self.sc.parallelize([1, 2, 3]).sum())
 
+    def test_to_localiterator(self):
+        from time import sleep
+        rdd = self.sc.parallelize([1, 2, 3])
+        it = rdd.toLocalIterator()
+        sleep(5)
+        self.assertEqual([1, 2, 3], sorted(it))
+
+        rdd2 = rdd.repartition(1000)
+        it2 = rdd2.toLocalIterator()
+        sleep(5)
+        self.assertEqual([1, 2, 3], sorted(it2))
+
     def test_save_as_textfile_with_unicode(self):
         # Regression test for SPARK-970
         x = u"\u00A1Hola, mundo!"

From 3857d5ba8b195bc1eb4b75f00398535b42164ff1 Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Tue, 20 Dec 2016 14:19:35 -0800
Subject: [PATCH 0391/1204] [SPARK-18927][SS] MemorySink for
 StructuredStreaming can't recover from checkpoint if location is provided in
 SessionConf

## What changes were proposed in this pull request?

Checkpoint Location can be defined for a StructuredStreaming on a per-query basis by the `DataStreamWriter` options, but it can also be provided through SparkSession configurations. It should be able to recover in both cases when the OutputMode is Complete for MemorySinks.

## How was this patch tested?

Unit tests

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #16342 from brkyvz/chk-rec.

(cherry picked from commit caed89321fdabe83e46451ca4e968f86481ad500)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../sql/streaming/DataStreamWriter.scala      |  2 +-
 .../test/DataStreamReaderWriterSuite.scala    | 32 ++++++++++++++-----
 2 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index b3c600ae53dbb..b7fc336223fd8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -223,7 +223,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
       val sink = new MemorySink(df.schema, outputMode)
       val resultDf = Dataset.ofRows(df.sparkSession, new MemoryPlan(sink))
       val chkpointLoc = extraOptions.get("checkpointLocation")
-      val recoverFromChkpoint = chkpointLoc.isDefined && outputMode == OutputMode.Complete()
+      val recoverFromChkpoint = outputMode == OutputMode.Complete()
       val query = df.sparkSession.sessionState.streamingQueryManager.startQuery(
         extraOptions.get("queryName"),
         chkpointLoc,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
index acac0bfb0e253..9de3da34831c3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
@@ -470,24 +470,22 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
     sq.stop()
   }
 
-  test("MemorySink can recover from a checkpoint in Complete Mode") {
+  private def testMemorySinkCheckpointRecovery(chkLoc: String, provideInWriter: Boolean): Unit = {
     import testImplicits._
     val ms = new MemoryStream[Int](0, sqlContext)
     val df = ms.toDF().toDF("a")
-    val checkpointLoc = newMetadataDir
-    val checkpointDir = new File(checkpointLoc, "offsets")
-    checkpointDir.mkdirs()
-    assert(checkpointDir.exists())
     val tableName = "test"
     def startQuery: StreamingQuery = {
-      df.groupBy("a")
+      val writer = df.groupBy("a")
         .count()
         .writeStream
         .format("memory")
         .queryName(tableName)
-        .option("checkpointLocation", checkpointLoc)
         .outputMode("complete")
-        .start()
+      if (provideInWriter) {
+        writer.option("checkpointLocation", chkLoc)
+      }
+      writer.start()
     }
     // no exception here
     val q = startQuery
@@ -513,6 +511,24 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
     q2.stop()
   }
 
+  test("MemorySink can recover from a checkpoint in Complete Mode") {
+    val checkpointLoc = newMetadataDir
+    val checkpointDir = new File(checkpointLoc, "offsets")
+    checkpointDir.mkdirs()
+    assert(checkpointDir.exists())
+    testMemorySinkCheckpointRecovery(checkpointLoc, provideInWriter = true)
+  }
+
+  test("SPARK-18927: MemorySink can recover from a checkpoint provided in conf in Complete Mode") {
+    val checkpointLoc = newMetadataDir
+    val checkpointDir = new File(checkpointLoc, "offsets")
+    checkpointDir.mkdirs()
+    assert(checkpointDir.exists())
+    withSQLConf(SQLConf.CHECKPOINT_LOCATION.key -> checkpointLoc) {
+      testMemorySinkCheckpointRecovery(checkpointLoc, provideInWriter = false)
+    }
+  }
+
   test("append mode memory sink's do not support checkpoint recovery") {
     import testImplicits._
     val ms = new MemoryStream[Int](0, sqlContext)

From 063a98e52189567245ca28696f0b61a7ae103f3f Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Tue, 20 Dec 2016 19:28:18 -0800
Subject: [PATCH 0392/1204] [SPARK-18900][FLAKY-TEST]
 StateStoreSuite.maintenance

## What changes were proposed in this pull request?

It was pretty flaky before 10 days ago.
https://spark-tests.appspot.com/test-details?suite_name=org.apache.spark.sql.execution.streaming.state.StateStoreSuite&test_name=maintenance

Since no code changes went into this code path to not be so flaky, I'm just increasing the timeouts such that load related flakiness shouldn't be a problem. As you may see from the testing, I haven't been able to reproduce it.

## How was this patch tested?

2000 retries 5 times

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #16314 from brkyvz/maint-flaky.

(cherry picked from commit b2dd8ec6b2c05c996e2d7c0bf8db0073c1ee0b94)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../execution/streaming/state/StateStoreSuite.scala  | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
index 3404b1143bc62..4863a4cbcf4f5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
@@ -395,6 +395,8 @@ class StateStoreSuite extends SparkFunSuite with BeforeAndAfter with PrivateMeth
       }
     }
 
+    val timeoutDuration = 60 seconds
+
     quietly {
       withSpark(new SparkContext(conf)) { sc =>
         withCoordinatorRef(sc) { coordinatorRef =>
@@ -403,7 +405,7 @@ class StateStoreSuite extends SparkFunSuite with BeforeAndAfter with PrivateMeth
           // Generate sufficient versions of store for snapshots
           generateStoreVersions()
 
-          eventually(timeout(10 seconds)) {
+          eventually(timeout(timeoutDuration)) {
             // Store should have been reported to the coordinator
             assert(coordinatorRef.getLocation(storeId).nonEmpty, "active instance was not reported")
 
@@ -422,14 +424,14 @@ class StateStoreSuite extends SparkFunSuite with BeforeAndAfter with PrivateMeth
           generateStoreVersions()
 
           // Earliest delta file should get cleaned up
-          eventually(timeout(10 seconds)) {
+          eventually(timeout(timeoutDuration)) {
             assert(!fileExists(provider, 1, isSnapshot = false), "earliest file not deleted")
           }
 
           // If driver decides to deactivate all instances of the store, then this instance
           // should be unloaded
           coordinatorRef.deactivateInstances(dir)
-          eventually(timeout(10 seconds)) {
+          eventually(timeout(timeoutDuration)) {
             assert(!StateStore.isLoaded(storeId))
           }
 
@@ -439,7 +441,7 @@ class StateStoreSuite extends SparkFunSuite with BeforeAndAfter with PrivateMeth
 
           // If some other executor loads the store, then this instance should be unloaded
           coordinatorRef.reportActiveInstance(storeId, "other-host", "other-exec")
-          eventually(timeout(10 seconds)) {
+          eventually(timeout(timeoutDuration)) {
             assert(!StateStore.isLoaded(storeId))
           }
 
@@ -450,7 +452,7 @@ class StateStoreSuite extends SparkFunSuite with BeforeAndAfter with PrivateMeth
       }
 
       // Verify if instance is unloaded if SparkContext is stopped
-      eventually(timeout(10 seconds)) {
+      eventually(timeout(timeoutDuration)) {
         require(SparkEnv.get === null)
         assert(!StateStore.isLoaded(storeId))
         assert(!StateStore.isMaintenanceRunning)

From bc54a14b415041531f94ccc2dd35851c269e8263 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 21 Dec 2016 19:39:00 +0800
Subject: [PATCH 0393/1204] [SPARK-18947][SQL] SQLContext.tableNames should not
 call Catalog.listTables

## What changes were proposed in this pull request?

It's a huge waste to call `Catalog.listTables` in `SQLContext.tableNames`, which only need the table names, while `Catalog.listTables` will get the table metadata for each table name.

## How was this patch tested?

N/A

Author: Wenchen Fan <wenchen@databricks.com>

Closes #16352 from cloud-fan/minor.

(cherry picked from commit b7650f11c7afbdffc6f5caaafb5dcfd54f7a25ff)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../src/main/scala/org/apache/spark/sql/SQLContext.scala | 4 ++--
 .../main/scala/org/apache/spark/sql/api/r/SQLUtils.scala | 9 +++++----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 6554359806a01..1a7fd689a04d2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -747,7 +747,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
    * @since 1.3.0
    */
   def tableNames(): Array[String] = {
-    sparkSession.catalog.listTables().collect().map(_.name)
+    tableNames(sparkSession.catalog.currentDatabase)
   }
 
   /**
@@ -757,7 +757,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
    * @since 1.3.0
    */
   def tableNames(databaseName: String): Array[String] = {
-    sparkSession.catalog.listTables(databaseName).collect().map(_.name)
+    sessionState.catalog.listTables(databaseName).map(_.table).toArray
   }
 
   ////////////////////////////////////////////////////////////////////////////
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
index 80bbad47f8f17..e56c33e4b512f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
@@ -276,11 +276,12 @@ private[sql] object SQLUtils extends Logging {
   }
 
   def getTableNames(sparkSession: SparkSession, databaseName: String): Array[String] = {
-    databaseName match {
-      case n: String if n != null && n.trim.nonEmpty =>
-        sparkSession.catalog.listTables(n).collect().map(_.name)
+    val db = databaseName match {
+      case _ if databaseName != null && databaseName.trim.nonEmpty =>
+        databaseName
       case _ =>
-        sparkSession.catalog.listTables().collect().map(_.name)
+        sparkSession.catalog.currentDatabase
     }
+    sparkSession.sessionState.catalog.listTables(db).map(_.table).toArray
   }
 }

From 3c8861d924e42ff84615044930fc5531201b9b12 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Wed, 21 Dec 2016 10:44:20 -0800
Subject: [PATCH 0394/1204] [SPARK-18894][SS] Fix event time watermark delay
 threshold specified in months or years

## What changes were proposed in this pull request?

Two changes
- Fix how delays specified in months and years are translated to milliseconds
- Following up on #16258, not show watermark when there is no watermarking in the query

## How was this patch tested?
Updated and new unit tests

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #16304 from tdas/SPARK-18834-1.

(cherry picked from commit 607a1e63dbc9269b806a9f537e1d041029333cdd)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../streaming/EventTimeWatermarkExec.scala    |  7 +-
 .../streaming/ProgressReporter.scala          |  7 +-
 .../execution/streaming/StreamExecution.scala |  2 +-
 ...te.scala => EventTimeWatermarkSuite.scala} | 75 +++++++++++++++----
 4 files changed, 73 insertions(+), 18 deletions(-)
 rename sql/core/src/test/scala/org/apache/spark/sql/streaming/{WatermarkSuite.scala => EventTimeWatermarkSuite.scala} (77%)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
index e8570d040dbe4..5a9a99e11188e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
@@ -84,6 +84,11 @@ case class EventTimeWatermarkExec(
     child: SparkPlan) extends SparkPlan {
 
   val eventTimeStats = new EventTimeStatsAccum()
+  val delayMs = {
+    val millisPerMonth = CalendarInterval.MICROS_PER_DAY / 1000 * 31
+    delay.milliseconds + delay.months * millisPerMonth
+  }
+
   sparkContext.register(eventTimeStats)
 
   override protected def doExecute(): RDD[InternalRow] = {
@@ -101,7 +106,7 @@ case class EventTimeWatermarkExec(
     if (a semanticEquals eventTime) {
       val updatedMetadata = new MetadataBuilder()
           .withMetadata(a.metadata)
-          .putLong(EventTimeWatermark.delayKey, delay.milliseconds)
+          .putLong(EventTimeWatermark.delayKey, delayMs)
           .build()
 
       a.withMetadata(updatedMetadata)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
index 2386f33f8ad41..c5e9eae607b39 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -25,7 +25,7 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{DataFrame, SparkSession}
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, LogicalPlan}
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.streaming._
 import org.apache.spark.sql.streaming.StreamingQueryListener.QueryProgressEvent
@@ -182,7 +182,10 @@ trait ProgressReporter extends Logging {
 
   /** Extracts statistics from the most recent query execution. */
   private def extractExecutionStats(hasNewData: Boolean): ExecutionStats = {
-    val watermarkTimestamp = Map("watermark" -> formatTimestamp(offsetSeqMetadata.batchWatermarkMs))
+    val hasEventTime = logicalPlan.collect { case e: EventTimeWatermark => e }.nonEmpty
+    val watermarkTimestamp =
+      if (hasEventTime) Map("watermark" -> formatTimestamp(offsetSeqMetadata.batchWatermarkMs))
+      else Map.empty[String, String]
 
     if (!hasNewData) {
       return ExecutionStats(Map.empty, Seq.empty, watermarkTimestamp)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 8f97d9570eaa6..e05200df50849 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -387,7 +387,7 @@ class StreamExecution(
         lastExecution.executedPlan.collect {
           case e: EventTimeWatermarkExec if e.eventTimeStats.value.count > 0 =>
             logDebug(s"Observed event time stats: ${e.eventTimeStats.value}")
-            e.eventTimeStats.value.max - e.delay.milliseconds
+            e.eventTimeStats.value.max - e.delayMs
         }.headOption.foreach { newWatermarkMs =>
           if (newWatermarkMs > offsetSeqMetadata.batchWatermarkMs) {
             logInfo(s"Updating eventTime watermark to: $newWatermarkMs ms")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/WatermarkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
similarity index 77%
rename from sql/core/src/test/scala/org/apache/spark/sql/streaming/WatermarkSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
index f1cc19c6e235d..bdfba9590b0a0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/WatermarkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.streaming
 
 import java.{util => ju}
 import java.text.SimpleDateFormat
+import java.util.{Calendar, Date}
 
 import org.scalatest.BeforeAndAfter
 
@@ -26,8 +27,9 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, Row}
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.functions.{count, window}
+import org.apache.spark.sql.InternalOutputModes.Complete
 
-class WatermarkSuite extends StreamTest with BeforeAndAfter with Logging {
+class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Logging {
 
   import testImplicits._
 
@@ -52,24 +54,35 @@ class WatermarkSuite extends StreamTest with BeforeAndAfter with Logging {
     assert(e.getMessage contains "int")
   }
 
-
   test("event time and watermark metrics") {
-    val inputData = MemoryStream[Int]
+    // No event time metrics when there is no watermarking
+    val inputData1 = MemoryStream[Int]
+    val aggWithoutWatermark = inputData1.toDF()
+      .withColumn("eventTime", $"value".cast("timestamp"))
+      .groupBy(window($"eventTime", "5 seconds") as 'window)
+      .agg(count("*") as 'count)
+      .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
 
-    val windowedAggregation = inputData.toDF()
+    testStream(aggWithoutWatermark, outputMode = Complete)(
+      AddData(inputData1, 15),
+      CheckAnswer((15, 1)),
+      assertEventStats { e => assert(e.isEmpty) },
+      AddData(inputData1, 10, 12, 14),
+      CheckAnswer((10, 3), (15, 1)),
+      assertEventStats { e => assert(e.isEmpty) }
+    )
+
+    // All event time metrics where watermarking is set
+    val inputData2 = MemoryStream[Int]
+    val aggWithWatermark = inputData2.toDF()
         .withColumn("eventTime", $"value".cast("timestamp"))
         .withWatermark("eventTime", "10 seconds")
         .groupBy(window($"eventTime", "5 seconds") as 'window)
         .agg(count("*") as 'count)
         .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
 
-    def assertEventStats(body: ju.Map[String, String] => Unit): AssertOnQuery = AssertOnQuery { q =>
-      body(q.recentProgress.filter(_.numInputRows > 0).lastOption.get.eventTime)
-      true
-    }
-
-    testStream(windowedAggregation)(
-      AddData(inputData, 15),
+    testStream(aggWithWatermark)(
+      AddData(inputData2, 15),
       CheckAnswer(),
       assertEventStats { e =>
         assert(e.get("max") === formatTimestamp(15))
@@ -77,7 +90,7 @@ class WatermarkSuite extends StreamTest with BeforeAndAfter with Logging {
         assert(e.get("avg") === formatTimestamp(15))
         assert(e.get("watermark") === formatTimestamp(0))
       },
-      AddData(inputData, 10, 12, 14),
+      AddData(inputData2, 10, 12, 14),
       CheckAnswer(),
       assertEventStats { e =>
         assert(e.get("max") === formatTimestamp(14))
@@ -85,7 +98,7 @@ class WatermarkSuite extends StreamTest with BeforeAndAfter with Logging {
         assert(e.get("avg") === formatTimestamp(12))
         assert(e.get("watermark") === formatTimestamp(5))
       },
-      AddData(inputData, 25),
+      AddData(inputData2, 25),
       CheckAnswer(),
       assertEventStats { e =>
         assert(e.get("max") === formatTimestamp(25))
@@ -93,7 +106,7 @@ class WatermarkSuite extends StreamTest with BeforeAndAfter with Logging {
         assert(e.get("avg") === formatTimestamp(25))
         assert(e.get("watermark") === formatTimestamp(5))
       },
-      AddData(inputData, 25),
+      AddData(inputData2, 25),
       CheckAnswer((10, 3)),
       assertEventStats { e =>
         assert(e.get("max") === formatTimestamp(25))
@@ -124,6 +137,33 @@ class WatermarkSuite extends StreamTest with BeforeAndAfter with Logging {
     )
   }
 
+  test("delay in months and years handled correctly") {
+    val currentTimeMs = System.currentTimeMillis
+    val currentTime = new Date(currentTimeMs)
+
+    val input = MemoryStream[Long]
+    val aggWithWatermark = input.toDF()
+      .withColumn("eventTime", $"value".cast("timestamp"))
+      .withWatermark("eventTime", "2 years 5 months")
+      .groupBy(window($"eventTime", "5 seconds") as 'window)
+      .agg(count("*") as 'count)
+      .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
+
+    def monthsSinceEpoch(date: Date): Int = { date.getYear * 12 + date.getMonth }
+
+    testStream(aggWithWatermark)(
+      AddData(input, currentTimeMs / 1000),
+      CheckAnswer(),
+      AddData(input, currentTimeMs / 1000),
+      CheckAnswer(),
+      assertEventStats { e =>
+        assert(timestampFormat.parse(e.get("max")).getTime === (currentTimeMs / 1000) * 1000)
+        val watermarkTime = timestampFormat.parse(e.get("watermark"))
+        assert(monthsSinceEpoch(currentTime) - monthsSinceEpoch(watermarkTime) === 29)
+      }
+    )
+  }
+
   test("recovery") {
     val inputData = MemoryStream[Int]
     val df = inputData.toDF()
@@ -231,6 +271,13 @@ class WatermarkSuite extends StreamTest with BeforeAndAfter with Logging {
     )
   }
 
+  private def assertEventStats(body: ju.Map[String, String] => Unit): AssertOnQuery = {
+    AssertOnQuery { q =>
+      body(q.recentProgress.filter(_.numInputRows > 0).lastOption.get.eventTime)
+      true
+    }
+  }
+
   private val timestampFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'") // ISO8601
   timestampFormat.setTimeZone(ju.TimeZone.getTimeZone("UTC"))
 

From 162bdb9103ecba99cd73004ddddede4d55ff8fc8 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 21 Dec 2016 11:17:44 -0800
Subject: [PATCH 0395/1204] [SPARK-18031][TESTS] Fix flaky test
 ExecutorAllocationManagerSuite.basic functionality

## What changes were proposed in this pull request?

The failure is because in `test("basic functionality")`, it doesn't block until `ExecutorAllocationManager.manageAllocation` is called. This PR just adds StreamManualClock to allow the tests to block on expected wait time to make the test deterministic.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16321 from zsxwing/SPARK-18031.

(cherry picked from commit ccfe60a8304871779ff1b31b8c2d724f59d5b2af)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../ExecutorAllocationManagerSuite.scala      | 36 ++++++++++++++++---
 1 file changed, 32 insertions(+), 4 deletions(-)

diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
index b49e5790711cd..1d2bf35a6d458 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
@@ -36,11 +36,11 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite
 
   private val batchDurationMillis = 1000L
   private var allocationClient: ExecutorAllocationClient = null
-  private var clock: ManualClock = null
+  private var clock: StreamManualClock = null
 
   before {
     allocationClient = mock[ExecutorAllocationClient]
-    clock = new ManualClock()
+    clock = new StreamManualClock()
   }
 
   test("basic functionality") {
@@ -57,10 +57,14 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite
         reset(allocationClient)
         when(allocationClient.getExecutorIds()).thenReturn(Seq("1", "2"))
         addBatchProcTime(allocationManager, batchProcTimeMs.toLong)
-        clock.advance(SCALING_INTERVAL_DEFAULT_SECS * 1000 + 1)
+        val advancedTime = SCALING_INTERVAL_DEFAULT_SECS * 1000 + 1
+        val expectedWaitTime = clock.getTimeMillis() + advancedTime
+        clock.advance(advancedTime)
+        // Make sure ExecutorAllocationManager.manageAllocation is called
         eventually(timeout(10 seconds)) {
-          body
+          assert(clock.isStreamWaitingAt(expectedWaitTime))
         }
+        body
       }
 
       /** Verify that the expected number of total executor were requested */
@@ -394,3 +398,27 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite
     }
   }
 }
+
+/**
+ * A special manual clock that provide `isStreamWaitingAt` to allow the user to check if the clock
+ * is blocking.
+ */
+class StreamManualClock(time: Long = 0L) extends ManualClock(time) with Serializable {
+  private var waitStartTime: Option[Long] = None
+
+  override def waitTillTime(targetTime: Long): Long = synchronized {
+    try {
+      waitStartTime = Some(getTimeMillis())
+      super.waitTillTime(targetTime)
+    } finally {
+      waitStartTime = None
+    }
+  }
+
+  /**
+   * Returns if the clock is blocking and the time it started to block is the parameter `time`.
+   */
+  def isStreamWaitingAt(time: Long): Boolean = synchronized {
+    waitStartTime == Some(time)
+  }
+}

From 318483421adc3c2e22744c4c580917377ce40b3f Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 21 Dec 2016 11:59:21 -0800
Subject: [PATCH 0396/1204] [SPARK-18954][TESTS] Fix flaky test:
 o.a.s.streaming.BasicOperationsSuite rdd cleanup - map and window

## What changes were proposed in this pull request?

The issue in this test is the cleanup of RDDs may not be able to finish before stopping StreamingContext. This PR basically just puts the assertions into `eventually` and runs it before stopping StreamingContext.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16362 from zsxwing/SPARK-18954.

(cherry picked from commit 078c71c2dcbb1470d22f8eb8138fb17e3d7c2414)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../streaming/BasicOperationsSuite.scala      | 98 +++++++++++--------
 .../spark/streaming/TestSuiteBase.scala       | 19 +++-
 2 files changed, 73 insertions(+), 44 deletions(-)

diff --git a/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
index 4e702bbb92061..a3062ac94614b 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
@@ -19,13 +19,13 @@ package org.apache.spark.streaming
 
 import java.util.concurrent.ConcurrentLinkedQueue
 
-import scala.collection.JavaConverters._
 import scala.collection.mutable
 import scala.language.existentials
 import scala.reflect.ClassTag
 
+import org.scalatest.concurrent.Eventually.eventually
+
 import org.apache.spark.{SparkConf, SparkException}
-import org.apache.spark.SparkContext._
 import org.apache.spark.rdd.{BlockRDD, RDD}
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.dstream.{DStream, WindowedDStream}
@@ -657,48 +657,57 @@ class BasicOperationsSuite extends TestSuiteBase {
        .window(Seconds(4), Seconds(2))
     }
 
-    val operatedStream = runCleanupTest(conf, operation _,
-      numExpectedOutput = cleanupTestInput.size / 2, rememberDuration = Seconds(3))
-    val windowedStream2 = operatedStream.asInstanceOf[WindowedDStream[_]]
-    val windowedStream1 = windowedStream2.dependencies.head.asInstanceOf[WindowedDStream[_]]
-    val mappedStream = windowedStream1.dependencies.head
-
-    // Checkpoint remember durations
-    assert(windowedStream2.rememberDuration === rememberDuration)
-    assert(windowedStream1.rememberDuration === rememberDuration + windowedStream2.windowDuration)
-    assert(mappedStream.rememberDuration ===
-      rememberDuration + windowedStream2.windowDuration + windowedStream1.windowDuration)
-
-    // WindowedStream2 should remember till 7 seconds: 10, 9, 8, 7
-    // WindowedStream1 should remember till 4 seconds: 10, 9, 8, 7, 6, 5, 4
-    // MappedStream should remember till 2 seconds:    10, 9, 8, 7, 6, 5, 4, 3, 2
-
-    // WindowedStream2
-    assert(windowedStream2.generatedRDDs.contains(Time(10000)))
-    assert(windowedStream2.generatedRDDs.contains(Time(8000)))
-    assert(!windowedStream2.generatedRDDs.contains(Time(6000)))
-
-    // WindowedStream1
-    assert(windowedStream1.generatedRDDs.contains(Time(10000)))
-    assert(windowedStream1.generatedRDDs.contains(Time(4000)))
-    assert(!windowedStream1.generatedRDDs.contains(Time(3000)))
-
-    // MappedStream
-    assert(mappedStream.generatedRDDs.contains(Time(10000)))
-    assert(mappedStream.generatedRDDs.contains(Time(2000)))
-    assert(!mappedStream.generatedRDDs.contains(Time(1000)))
+    runCleanupTest(
+        conf,
+        operation _,
+        numExpectedOutput = cleanupTestInput.size / 2,
+        rememberDuration = Seconds(3)) { operatedStream =>
+      eventually(eventuallyTimeout) {
+        val windowedStream2 = operatedStream.asInstanceOf[WindowedDStream[_]]
+        val windowedStream1 = windowedStream2.dependencies.head.asInstanceOf[WindowedDStream[_]]
+        val mappedStream = windowedStream1.dependencies.head
+
+        // Checkpoint remember durations
+        assert(windowedStream2.rememberDuration === rememberDuration)
+        assert(
+          windowedStream1.rememberDuration === rememberDuration + windowedStream2.windowDuration)
+        assert(mappedStream.rememberDuration ===
+          rememberDuration + windowedStream2.windowDuration + windowedStream1.windowDuration)
+
+        // WindowedStream2 should remember till 7 seconds: 10, 9, 8, 7
+        // WindowedStream1 should remember till 4 seconds: 10, 9, 8, 7, 6, 5, 4
+        // MappedStream should remember till 2 seconds:    10, 9, 8, 7, 6, 5, 4, 3, 2
+
+        // WindowedStream2
+        assert(windowedStream2.generatedRDDs.contains(Time(10000)))
+        assert(windowedStream2.generatedRDDs.contains(Time(8000)))
+        assert(!windowedStream2.generatedRDDs.contains(Time(6000)))
+
+        // WindowedStream1
+        assert(windowedStream1.generatedRDDs.contains(Time(10000)))
+        assert(windowedStream1.generatedRDDs.contains(Time(4000)))
+        assert(!windowedStream1.generatedRDDs.contains(Time(3000)))
+
+        // MappedStream
+        assert(mappedStream.generatedRDDs.contains(Time(10000)))
+        assert(mappedStream.generatedRDDs.contains(Time(2000)))
+        assert(!mappedStream.generatedRDDs.contains(Time(1000)))
+      }
+    }
   }
 
   test("rdd cleanup - updateStateByKey") {
     val updateFunc = (values: Seq[Int], state: Option[Int]) => {
       Some(values.sum + state.getOrElse(0))
     }
-    val stateStream = runCleanupTest(
-      conf, _.map(_ -> 1).updateStateByKey(updateFunc).checkpoint(Seconds(3)))
-
-    assert(stateStream.rememberDuration === stateStream.checkpointDuration * 2)
-    assert(stateStream.generatedRDDs.contains(Time(10000)))
-    assert(!stateStream.generatedRDDs.contains(Time(4000)))
+    runCleanupTest(
+      conf, _.map(_ -> 1).updateStateByKey(updateFunc).checkpoint(Seconds(3))) { stateStream =>
+      eventually(eventuallyTimeout) {
+        assert(stateStream.rememberDuration === stateStream.checkpointDuration * 2)
+        assert(stateStream.generatedRDDs.contains(Time(10000)))
+        assert(!stateStream.generatedRDDs.contains(Time(4000)))
+      }
+    }
   }
 
   test("rdd cleanup - input blocks and persisted RDDs") {
@@ -779,13 +788,16 @@ class BasicOperationsSuite extends TestSuiteBase {
     }
   }
 
-  /** Test cleanup of RDDs in DStream metadata */
+  /**
+   * Test cleanup of RDDs in DStream metadata. `assertCleanup` is the function that asserts the
+   * cleanup of RDDs is successful.
+   */
   def runCleanupTest[T: ClassTag](
       conf2: SparkConf,
       operation: DStream[Int] => DStream[T],
       numExpectedOutput: Int = cleanupTestInput.size,
       rememberDuration: Duration = null
-    ): DStream[T] = {
+    )(assertCleanup: (DStream[T]) => Unit): DStream[T] = {
 
     // Setup the stream computation
     assert(batchDuration === Seconds(1),
@@ -794,7 +806,11 @@ class BasicOperationsSuite extends TestSuiteBase {
       val operatedStream =
         ssc.graph.getOutputStreams().head.dependencies.head.asInstanceOf[DStream[T]]
       if (rememberDuration != null) ssc.remember(rememberDuration)
-      val output = runStreams[(Int, Int)](ssc, cleanupTestInput.size, numExpectedOutput)
+      val output = runStreams[(Int, Int)](
+        ssc,
+        cleanupTestInput.size,
+        numExpectedOutput,
+        () => assertCleanup(operatedStream))
       val clock = ssc.scheduler.clock.asInstanceOf[Clock]
       assert(clock.getTimeMillis() === Seconds(10).milliseconds)
       assert(output.size === numExpectedOutput)
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
index fa975a146216d..dbab70886102d 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
@@ -359,14 +359,20 @@ trait TestSuiteBase extends SparkFunSuite with BeforeAndAfter with Logging {
    * output data has been collected or timeout (set by `maxWaitTimeMillis`) is reached.
    *
    * Returns a sequence of items for each RDD.
+   *
+   * @param ssc The StreamingContext
+   * @param numBatches The number of batches should be run
+   * @param numExpectedOutput The number of expected output
+   * @param preStop The function to run before stopping StreamingContext
    */
   def runStreams[V: ClassTag](
       ssc: StreamingContext,
       numBatches: Int,
-      numExpectedOutput: Int
+      numExpectedOutput: Int,
+      preStop: () => Unit = () => {}
     ): Seq[Seq[V]] = {
     // Flatten each RDD into a single Seq
-    runStreamsWithPartitions(ssc, numBatches, numExpectedOutput).map(_.flatten.toSeq)
+    runStreamsWithPartitions(ssc, numBatches, numExpectedOutput, preStop).map(_.flatten.toSeq)
   }
 
   /**
@@ -376,11 +382,17 @@ trait TestSuiteBase extends SparkFunSuite with BeforeAndAfter with Logging {
    *
    * Returns a sequence of RDD's. Each RDD is represented as several sequences of items, each
    * representing one partition.
+   *
+   * @param ssc The StreamingContext
+   * @param numBatches The number of batches should be run
+   * @param numExpectedOutput The number of expected output
+   * @param preStop The function to run before stopping StreamingContext
    */
   def runStreamsWithPartitions[V: ClassTag](
       ssc: StreamingContext,
       numBatches: Int,
-      numExpectedOutput: Int
+      numExpectedOutput: Int,
+      preStop: () => Unit = () => {}
     ): Seq[Seq[Seq[V]]] = {
     assert(numBatches > 0, "Number of batches to run stream computation is zero")
     assert(numExpectedOutput > 0, "Number of expected outputs after " + numBatches + " is zero")
@@ -424,6 +436,7 @@ trait TestSuiteBase extends SparkFunSuite with BeforeAndAfter with Logging {
       assert(output.size === numExpectedOutput, "Unexpected number of outputs generated")
 
       Thread.sleep(100) // Give some time for the forgetting old RDDs to complete
+      preStop()
     } finally {
       ssc.stop(stopSparkContext = true)
     }

From 0e51bb085446a482c22eaef93aea513610f41f48 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Wed, 21 Dec 2016 13:55:40 -0800
Subject: [PATCH 0397/1204] [SPARK-18949][SQL][BACKPORT-2.1] Add
 recoverPartitions API to Catalog

### What changes were proposed in this pull request?

This PR is to backport https://github.com/apache/spark/pull/16356 to Spark 2.1.1 branch.

----

Currently, we only have a SQL interface for recovering all the partitions in the directory of a table and update the catalog. `MSCK REPAIR TABLE` or `ALTER TABLE table RECOVER PARTITIONS`. (Actually, very hard for me to remember `MSCK` and have no clue what it means)

After the new "Scalable Partition Handling", the table repair becomes much more important for making visible the data in the created data source partitioned table.

Thus, this PR is to add it into the Catalog interface. After this PR, users can repair the table by
```Scala
spark.catalog.recoverPartitions("testTable")
```

### How was this patch tested?
Modified the existing test cases.

Author: gatorsmile <gatorsmile@gmail.com>

Closes #16372 from gatorsmile/repairTable2.1.1.
---
 project/MimaExcludes.scala                         |  5 ++++-
 python/pyspark/sql/catalog.py                      |  5 +++++
 .../org/apache/spark/sql/catalog/Catalog.scala     |  7 +++++++
 .../apache/spark/sql/internal/CatalogImpl.scala    | 14 ++++++++++++++
 .../hive/PartitionProviderCompatibilitySuite.scala |  6 +++---
 5 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 978a328f3e2d0..6d1b4d2b277f9 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -110,7 +110,10 @@ object MimaExcludes {
       ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.streaming.StreamingQueryException.startOffset"),
       ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.streaming.StreamingQueryException.endOffset"),
       ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.streaming.StreamingQueryException.this"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQueryException.query")
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQueryException.query"),
+
+      // [SPARK-18949] [SQL] Add repairTable API to Catalog
+      ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.recoverPartitions")
     )
   }
 
diff --git a/python/pyspark/sql/catalog.py b/python/pyspark/sql/catalog.py
index a36d02e0db134..30c7a3fe4fe60 100644
--- a/python/pyspark/sql/catalog.py
+++ b/python/pyspark/sql/catalog.py
@@ -258,6 +258,11 @@ def refreshTable(self, tableName):
         """Invalidate and refresh all the cached metadata of the given table."""
         self._jcatalog.refreshTable(tableName)
 
+    @since('2.1.1')
+    def recoverPartitions(self, tableName):
+        """Recover all the partitions of the given table and update the catalog."""
+        self._jcatalog.recoverPartitions(tableName)
+
     def _reset(self):
         """(Internal use only) Drop all existing databases (except "default"), tables,
         partitions and functions, and set the current database to "default".
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
index aecdda1c36498..6b061f8ab2740 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
@@ -300,6 +300,13 @@ abstract class Catalog {
    */
   def dropGlobalTempView(viewName: String): Boolean
 
+  /**
+   * Recover all the partitions in the directory of a table and update the catalog.
+   *
+   * @since 2.1.1
+   */
+  def recoverPartitions(tableName: String): Unit
+
   /**
    * Returns true if the table is currently cached in-memory.
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
index 6d984621ccca1..41ed9d71809e9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.{DefinedByConstructorParams, FunctionIdenti
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
+import org.apache.spark.sql.execution.command.AlterTableRecoverPartitionsCommand
 import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource}
 import org.apache.spark.sql.types.StructType
 
@@ -393,6 +394,19 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
     }
   }
 
+  /**
+   * Recover all the partitions in the directory of a table and update the catalog.
+   *
+   * @param tableName the name of the table to be repaired.
+   * @group ddl_ops
+   * @since 2.1.1
+   */
+  override def recoverPartitions(tableName: String): Unit = {
+    val tableIdent = sparkSession.sessionState.sqlParser.parseTableIdentifier(tableName)
+    sparkSession.sessionState.executePlan(
+      AlterTableRecoverPartitionsCommand(tableIdent)).toRdd
+  }
+
   /**
    * Returns true if the table is currently cached in-memory.
    *
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
index c2ac032760780..3f84cbdb1b09c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
@@ -70,7 +70,7 @@ class PartitionProviderCompatibilitySuite
         }
         withSQLConf(SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key -> "true") {
           verifyIsLegacyTable("test")
-          spark.sql("msck repair table test")
+          spark.catalog.recoverPartitions("test")
           spark.sql("show partitions test").count()  // check we are a new table
 
           // sanity check table performance
@@ -90,7 +90,7 @@ class PartitionProviderCompatibilitySuite
           setupPartitionedDatasourceTable("test", dir)
           spark.sql("show partitions test").count()  // check we are a new table
           assert(spark.sql("select * from test").count() == 0)  // needs repair
-          spark.sql("msck repair table test")
+          spark.catalog.recoverPartitions("test")
           assert(spark.sql("select * from test").count() == 5)
         }
       }
@@ -160,7 +160,7 @@ class PartitionProviderCompatibilitySuite
       withTable("test") {
         withTempDir { dir =>
           setupPartitionedDatasourceTable("test", dir)
-          sql("msck repair table test")
+          spark.catalog.recoverPartitions("test")
           spark.sql(
             """insert overwrite table test
               |partition (partCol=1)

From 17ef57fe8dab7616200fdd9c00ff29f716459321 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 21 Dec 2016 15:39:36 -0800
Subject: [PATCH 0398/1204] [SPARK-18588][SS][KAFKA] Create a new KafkaConsumer
 when error happens to fix the flaky test

## What changes were proposed in this pull request?

When KafkaSource fails on Kafka errors, we should create a new consumer to retry rather than using the existing broken one because it's possible that the broken one will fail again.

This PR also assigns a new group id to the new created consumer for a possible race condition:  the broken consumer cannot talk with the Kafka cluster in `close` but the new consumer can talk to Kafka cluster. I'm not sure if this will happen or not. Just for safety to avoid that the Kafka cluster thinks there are two consumers with the same group id in a short time window. (Note: CachedKafkaConsumer doesn't need this fix since `assign` never uses the group id.)

## How was this patch tested?

In https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/70370/console , it ran this flaky test 120 times and all passed.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16282 from zsxwing/kafka-fix.

(cherry picked from commit 95efc895e929701a605313b87ad0cd91edee2f81)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 dev/sparktestsupport/modules.py               |  3 +-
 .../spark/sql/kafka010/KafkaSource.scala      | 58 ++++++++++++++-----
 .../sql/kafka010/KafkaSourceProvider.scala    | 21 +++----
 .../spark/sql/kafka010/KafkaSourceSuite.scala |  2 +-
 4 files changed, 52 insertions(+), 32 deletions(-)

diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index b34ab51f3b996..0cf078c378fd9 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -245,7 +245,8 @@ def __hash__(self):
     name="streaming-kafka-0-10",
     dependencies=[streaming],
     source_file_regexes=[
-        "external/kafka-0-10",
+        # The ending "/" is necessary otherwise it will include "sql-kafka" codes
+        "external/kafka-0-10/",
         "external/kafka-0-10-assembly",
     ],
     sbt_test_goals=[
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
index 92ee0ed93d940..43b8d9d6d7eef 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
@@ -24,7 +24,7 @@ import java.nio.charset.StandardCharsets
 import scala.collection.JavaConverters._
 import scala.util.control.NonFatal
 
-import org.apache.kafka.clients.consumer.{Consumer, KafkaConsumer, OffsetOutOfRangeException}
+import org.apache.kafka.clients.consumer.{Consumer, ConsumerConfig, KafkaConsumer, OffsetOutOfRangeException}
 import org.apache.kafka.clients.consumer.internals.NoOpConsumerRebalanceListener
 import org.apache.kafka.common.TopicPartition
 
@@ -81,14 +81,16 @@ import org.apache.spark.util.UninterruptibleThread
  * To avoid this issue, you should make sure stopping the query before stopping the Kafka brokers
  * and not use wrong broker addresses.
  */
-private[kafka010] case class KafkaSource(
+private[kafka010] class KafkaSource(
     sqlContext: SQLContext,
     consumerStrategy: ConsumerStrategy,
+    driverKafkaParams: ju.Map[String, Object],
     executorKafkaParams: ju.Map[String, Object],
     sourceOptions: Map[String, String],
     metadataPath: String,
     startingOffsets: StartingOffsets,
-    failOnDataLoss: Boolean)
+    failOnDataLoss: Boolean,
+    driverGroupIdPrefix: String)
   extends Source with Logging {
 
   private val sc = sqlContext.sparkContext
@@ -107,11 +109,31 @@ private[kafka010] case class KafkaSource(
   private val maxOffsetsPerTrigger =
     sourceOptions.get("maxOffsetsPerTrigger").map(_.toLong)
 
+  private var groupId: String = null
+
+  private var nextId = 0
+
+  private def nextGroupId(): String = {
+    groupId = driverGroupIdPrefix + "-" + nextId
+    nextId += 1
+    groupId
+  }
+
   /**
    * A KafkaConsumer used in the driver to query the latest Kafka offsets. This only queries the
    * offsets and never commits them.
    */
-  private val consumer = consumerStrategy.createConsumer()
+  private var consumer: Consumer[Array[Byte], Array[Byte]] = createConsumer()
+
+  /**
+   * Create a consumer using the new generated group id. We always use a new consumer to avoid
+   * just using a broken consumer to retry on Kafka errors, which likely will fail again.
+   */
+  private def createConsumer(): Consumer[Array[Byte], Array[Byte]] = synchronized {
+    val newKafkaParams = new ju.HashMap[String, Object](driverKafkaParams)
+    newKafkaParams.put(ConsumerConfig.GROUP_ID_CONFIG, nextGroupId())
+    consumerStrategy.createConsumer(newKafkaParams)
+  }
 
   /**
    * Lazily initialize `initialPartitionOffsets` to make sure that `KafkaConsumer.poll` is only
@@ -171,6 +193,11 @@ private[kafka010] case class KafkaSource(
     Some(KafkaSourceOffset(offsets))
   }
 
+  private def resetConsumer(): Unit = synchronized {
+    consumer.close()
+    consumer = createConsumer()
+  }
+
   /** Proportionally distribute limit number of offsets among topicpartitions */
   private def rateLimit(
       limit: Long,
@@ -441,13 +468,12 @@ private[kafka010] case class KafkaSource(
               try {
                 result = Some(body)
               } catch {
-                case x: OffsetOutOfRangeException =>
-                  reportDataLoss(x.getMessage)
                 case NonFatal(e) =>
                   lastException = e
                   logWarning(s"Error in attempt $attempt getting Kafka offsets: ", e)
                   attempt += 1
                   Thread.sleep(offsetFetchAttemptIntervalMs)
+                  resetConsumer()
               }
             }
           case _ =>
@@ -511,12 +537,12 @@ private[kafka010] object KafkaSource {
   ))
 
   sealed trait ConsumerStrategy {
-    def createConsumer(): Consumer[Array[Byte], Array[Byte]]
+    def createConsumer(kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]]
   }
 
-  case class AssignStrategy(partitions: Array[TopicPartition], kafkaParams: ju.Map[String, Object])
-    extends ConsumerStrategy {
-    override def createConsumer(): Consumer[Array[Byte], Array[Byte]] = {
+  case class AssignStrategy(partitions: Array[TopicPartition]) extends ConsumerStrategy {
+    override def createConsumer(
+        kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]] = {
       val consumer = new KafkaConsumer[Array[Byte], Array[Byte]](kafkaParams)
       consumer.assign(ju.Arrays.asList(partitions: _*))
       consumer
@@ -525,9 +551,9 @@ private[kafka010] object KafkaSource {
     override def toString: String = s"Assign[${partitions.mkString(", ")}]"
   }
 
-  case class SubscribeStrategy(topics: Seq[String], kafkaParams: ju.Map[String, Object])
-    extends ConsumerStrategy {
-    override def createConsumer(): Consumer[Array[Byte], Array[Byte]] = {
+  case class SubscribeStrategy(topics: Seq[String]) extends ConsumerStrategy {
+    override def createConsumer(
+        kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]] = {
       val consumer = new KafkaConsumer[Array[Byte], Array[Byte]](kafkaParams)
       consumer.subscribe(topics.asJava)
       consumer
@@ -536,10 +562,10 @@ private[kafka010] object KafkaSource {
     override def toString: String = s"Subscribe[${topics.mkString(", ")}]"
   }
 
-  case class SubscribePatternStrategy(
-    topicPattern: String, kafkaParams: ju.Map[String, Object])
+  case class SubscribePatternStrategy(topicPattern: String)
     extends ConsumerStrategy {
-    override def createConsumer(): Consumer[Array[Byte], Array[Byte]] = {
+    override def createConsumer(
+        kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]] = {
       val consumer = new KafkaConsumer[Array[Byte], Array[Byte]](kafkaParams)
       consumer.subscribe(
         ju.regex.Pattern.compile(topicPattern),
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index 585ced875caa7..aa01238f91247 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -85,14 +85,11 @@ private[kafka010] class KafkaSourceProvider extends StreamSourceProvider
         case None => LatestOffsets
       }
 
-    val kafkaParamsForStrategy =
+    val kafkaParamsForDriver =
       ConfigUpdater("source", specifiedKafkaParams)
         .set(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, deserClassName)
         .set(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, deserClassName)
 
-        // So that consumers in Kafka source do not mess with any existing group id
-        .set(ConsumerConfig.GROUP_ID_CONFIG, s"$uniqueGroupId-driver")
-
         // Set to "earliest" to avoid exceptions. However, KafkaSource will fetch the initial
         // offsets by itself instead of counting on KafkaConsumer.
         .set(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
@@ -129,17 +126,11 @@ private[kafka010] class KafkaSourceProvider extends StreamSourceProvider
 
     val strategy = caseInsensitiveParams.find(x => STRATEGY_OPTION_KEYS.contains(x._1)).get match {
       case ("assign", value) =>
-        AssignStrategy(
-          JsonUtils.partitions(value),
-          kafkaParamsForStrategy)
+        AssignStrategy(JsonUtils.partitions(value))
       case ("subscribe", value) =>
-        SubscribeStrategy(
-          value.split(",").map(_.trim()).filter(_.nonEmpty),
-          kafkaParamsForStrategy)
+        SubscribeStrategy(value.split(",").map(_.trim()).filter(_.nonEmpty))
       case ("subscribepattern", value) =>
-        SubscribePatternStrategy(
-          value.trim(),
-          kafkaParamsForStrategy)
+        SubscribePatternStrategy(value.trim())
       case _ =>
         // Should never reach here as we are already matching on
         // matched strategy names
@@ -152,11 +143,13 @@ private[kafka010] class KafkaSourceProvider extends StreamSourceProvider
     new KafkaSource(
       sqlContext,
       strategy,
+      kafkaParamsForDriver,
       kafkaParamsForExecutors,
       parameters,
       metadataPath,
       startingOffsets,
-      failOnDataLoss)
+      failOnDataLoss,
+      driverGroupIdPrefix = s"$uniqueGroupId-driver")
   }
 
   private def validateOptions(parameters: Map[String, String]): Unit = {
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
index 5d2779aba26d0..544fbc5ec36a2 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
@@ -845,7 +845,7 @@ class KafkaSourceStressForDontFailOnDataLossSuite extends StreamTest with Shared
     }
   }
 
-  ignore("stress test for failOnDataLoss=false") {
+  test("stress test for failOnDataLoss=false") {
     val reader = spark
       .readStream
       .format("kafka")

From 60e02a173ddf335d58852e56611131ec4409ae8b Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Wed, 21 Dec 2016 16:43:17 -0800
Subject: [PATCH 0399/1204] [SPARK-18234][SS] Made update mode public

## What changes were proposed in this pull request?

Made update mode public. As part of that here are the changes.
- Update DatastreamWriter to accept "update"
- Changed package of InternalOutputModes from o.a.s.sql to o.a.s.sql.catalyst
- Added update mode state removing with watermark to StateStoreSaveExec

## How was this patch tested?

Added new tests in changed modules

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #16360 from tdas/SPARK-18234.

(cherry picked from commit 83a6ace0d1be44f70e768348ae6688798c84343e)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../spark/sql/streaming/OutputMode.java       | 12 +++-
 .../UnsupportedOperationChecker.scala         |  3 +-
 .../streaming}/InternalOutputModes.scala      |  2 +-
 .../analysis/UnsupportedOperationsSuite.scala |  2 +-
 .../execution/datasources/DataSource.scala    |  2 +-
 .../streaming/StatefulAggregate.scala         | 61 ++++++++++++-------
 .../sql/execution/streaming/memory.scala      |  5 +-
 .../sql/streaming/DataStreamWriter.scala      | 17 +++++-
 .../streaming/MemorySinkSuite.scala           | 31 +++++++---
 .../streaming/EventTimeWatermarkSuite.scala   | 55 ++++++++++++++---
 .../sql/streaming/FileStreamSinkSuite.scala   | 22 ++++++-
 .../sql/streaming/FileStreamSourceSuite.scala |  2 +-
 .../spark/sql/streaming/StreamSuite.scala     |  8 +--
 .../streaming/StreamingAggregationSuite.scala |  2 +-
 .../test/DataStreamReaderWriterSuite.scala    | 38 +++++++++---
 15 files changed, 196 insertions(+), 66 deletions(-)
 rename sql/catalyst/src/main/scala/org/apache/spark/sql/{ => catalyst/streaming}/InternalOutputModes.scala (97%)
 rename sql/core/src/test/scala/org/apache/spark/sql/{ => execution}/streaming/MemorySinkSuite.scala (90%)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/OutputMode.java b/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/OutputMode.java
index a515c1a109cf4..cf0579fd3625c 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/OutputMode.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/OutputMode.java
@@ -19,7 +19,7 @@
 
 import org.apache.spark.annotation.Experimental;
 import org.apache.spark.annotation.InterfaceStability;
-import org.apache.spark.sql.InternalOutputModes;
+import org.apache.spark.sql.catalyst.streaming.InternalOutputModes;
 
 /**
  * :: Experimental ::
@@ -54,4 +54,14 @@ public static OutputMode Append() {
   public static OutputMode Complete() {
     return InternalOutputModes.Complete$.MODULE$;
   }
+
+  /**
+   * OutputMode in which only the rows that were updated in the streaming DataFrame/Dataset will
+   * be written to the sink every time there are some updates.
+   *
+   * @since 2.1.1
+   */
+  public static OutputMode Update() {
+    return InternalOutputModes.Update$.MODULE$;
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
index c4a78f9d2113a..60d9881ac9ebf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -17,11 +17,12 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import org.apache.spark.sql.{AnalysisException, InternalOutputModes}
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
 import org.apache.spark.sql.streaming.OutputMode
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/InternalOutputModes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/InternalOutputModes.scala
similarity index 97%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/InternalOutputModes.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/InternalOutputModes.scala
index 594c41c2c7446..915f4a9e25cec 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/InternalOutputModes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/InternalOutputModes.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql
+package org.apache.spark.sql.catalyst.streaming
 
 import org.apache.spark.sql.streaming.OutputMode
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
index 34e94c71422d7..94a008f4f69d5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.InternalOutputModes._
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
@@ -27,6 +26,7 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference,
 import org.apache.spark.sql.catalyst.expressions.aggregate.Count
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.IntegerType
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 8e51fc9414546..31a491fb3ddf0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -278,7 +278,7 @@ case class DataSource(
           throw new IllegalArgumentException("'path' is not specified")
         })
         if (outputMode != OutputMode.Append) {
-          throw new IllegalArgumentException(
+          throw new AnalysisException(
             s"Data source $className does not support $outputMode output mode")
         }
         new FileStreamSink(sparkSession, path, fileFormat, partitionColumns, caseInsensitiveOptions)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulAggregate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulAggregate.scala
index 7af978a9c4aa2..0551e4b4a2ef5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulAggregate.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulAggregate.scala
@@ -21,11 +21,11 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.errors._
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.codegen.{GeneratePredicate, GenerateUnsafeProjection}
+import org.apache.spark.sql.catalyst.expressions.codegen.{GenerateUnsafeProjection, Predicate}
+import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
+import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.execution
-import org.apache.spark.sql.InternalOutputModes._
-import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark
 import org.apache.spark.sql.execution.metric.SQLMetrics
 import org.apache.spark.sql.execution.streaming.state._
 import org.apache.spark.sql.execution.SparkPlan
@@ -108,6 +108,30 @@ case class StateStoreSaveExec(
     "numTotalStateRows" -> SQLMetrics.createMetric(sparkContext, "number of total state rows"),
     "numUpdatedStateRows" -> SQLMetrics.createMetric(sparkContext, "number of updated state rows"))
 
+  /** Generate a predicate that matches data older than the watermark */
+  private lazy val watermarkPredicate: Option[Predicate] = {
+    val optionalWatermarkAttribute =
+      keyExpressions.find(_.metadata.contains(EventTimeWatermark.delayKey))
+
+    optionalWatermarkAttribute.map { watermarkAttribute =>
+      // If we are evicting based on a window, use the end of the window.  Otherwise just
+      // use the attribute itself.
+      val evictionExpression =
+        if (watermarkAttribute.dataType.isInstanceOf[StructType]) {
+          LessThanOrEqual(
+            GetStructField(watermarkAttribute, 1),
+            Literal(eventTimeWatermark.get * 1000))
+        } else {
+          LessThanOrEqual(
+            watermarkAttribute,
+            Literal(eventTimeWatermark.get * 1000))
+        }
+
+      logInfo(s"Filtering state store on: $evictionExpression")
+      newPredicate(evictionExpression, keyExpressions)
+    }
+  }
+
   override protected def doExecute(): RDD[InternalRow] = {
     metrics // force lazy init at driver
     assert(outputMode.nonEmpty,
@@ -151,25 +175,8 @@ case class StateStoreSaveExec(
               numUpdatedStateRows += 1
             }
 
-            val watermarkAttribute =
-              keyExpressions.find(_.metadata.contains(EventTimeWatermark.delayKey)).get
-            // If we are evicting based on a window, use the end of the window.  Otherwise just
-            // use the attribute itself.
-            val evictionExpression =
-              if (watermarkAttribute.dataType.isInstanceOf[StructType]) {
-                LessThanOrEqual(
-                  GetStructField(watermarkAttribute, 1),
-                  Literal(eventTimeWatermark.get * 1000))
-              } else {
-                LessThanOrEqual(
-                  watermarkAttribute,
-                  Literal(eventTimeWatermark.get * 1000))
-              }
-
-            logInfo(s"Filtering state store on: $evictionExpression")
-            val predicate = newPredicate(evictionExpression, keyExpressions)
-            store.remove(predicate.eval)
-
+            // Assumption: Append mode can be done only when watermark has been specified
+            store.remove(watermarkPredicate.get.eval)
             store.commit()
 
             numTotalStateRows += store.numKeys()
@@ -180,11 +187,19 @@ case class StateStoreSaveExec(
 
           // Update and output modified rows from the StateStore.
           case Some(Update) =>
+
             new Iterator[InternalRow] {
-              private[this] val baseIterator = iter
+
+              // Filter late date using watermark if specified
+              private[this] val baseIterator = watermarkPredicate match {
+                case Some(predicate) => iter.filter((row: InternalRow) => !predicate.eval(row))
+                case None => iter
+              }
 
               override def hasNext: Boolean = {
                 if (!baseIterator.hasNext) {
+                  // Remove old aggregates if watermark specified
+                  if (watermarkPredicate.nonEmpty) store.remove(watermarkPredicate.get.eval)
                   store.commit()
                   numTotalStateRows += store.numKeys()
                   false
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
index b699be217e67f..91da6b38464d5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
@@ -28,6 +28,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.encoders.encoderFor
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, Statistics}
+import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.Utils
@@ -193,11 +194,11 @@ class MemorySink(val schema: StructType, outputMode: OutputMode) extends Sink wi
     if (notCommitted) {
       logDebug(s"Committing batch $batchId to $this")
       outputMode match {
-        case InternalOutputModes.Append | InternalOutputModes.Update =>
+        case Append | Update =>
           val rows = AddedData(batchId, data.collect())
           synchronized { batches += rows }
 
-        case InternalOutputModes.Complete =>
+        case Complete =>
           val rows = AddedData(batchId, data.collect())
           synchronized {
             batches.clear()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index b7fc336223fd8..6c0c5e0c95b91 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -21,6 +21,7 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, ForeachWriter}
+import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.streaming.{ForeachSink, MemoryPlan, MemorySink}
 
@@ -65,9 +66,11 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
         OutputMode.Append
       case "complete" =>
         OutputMode.Complete
+      case "update" =>
+        OutputMode.Update
       case _ =>
         throw new IllegalArgumentException(s"Unknown output mode $outputMode. " +
-          "Accepted output modes are 'append' and 'complete'")
+          "Accepted output modes are 'append', 'complete', 'update'")
     }
     this
   }
@@ -99,7 +102,6 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
     this
   }
 
-
   /**
    * Specifies the name of the [[StreamingQuery]] that can be started with `start()`.
    * This name must be unique among all the currently active queries in the associated SQLContext.
@@ -219,7 +221,16 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
       if (extraOptions.get("queryName").isEmpty) {
         throw new AnalysisException("queryName must be specified for memory sink")
       }
-
+      val supportedModes = "Output modes supported by the memory sink are 'append' and 'complete'."
+      outputMode match {
+        case Append | Complete => // allowed
+        case Update =>
+          throw new AnalysisException(
+            s"Update output mode is not supported for memory sink. $supportedModes")
+        case _ =>
+          throw new AnalysisException(
+            s"$outputMode is not supported for memory sink. $supportedModes")
+      }
       val sink = new MemorySink(df.schema, outputMode)
       val resultDf = Dataset.ofRows(df.sparkSession, new MemoryPlan(sink))
       val chkpointLoc = extraOptions.get("checkpointLocation")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/MemorySinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala
similarity index 90%
rename from sql/core/src/test/scala/org/apache/spark/sql/streaming/MemorySinkSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala
index 4e9fba9dbaa1e..ca724fc5cc67e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/MemorySinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala
@@ -15,15 +15,14 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.streaming
+package org.apache.spark.sql.execution.streaming
 
 import scala.language.implicitConversions
 
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.sql._
-import org.apache.spark.sql.execution.streaming._
-import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.sql.streaming.{OutputMode, StreamTest}
 import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
 import org.apache.spark.util.Utils
 
@@ -37,7 +36,7 @@ class MemorySinkSuite extends StreamTest with BeforeAndAfter {
 
   test("directly add data in Append output mode") {
     implicit val schema = new StructType().add(new StructField("value", IntegerType))
-    val sink = new MemorySink(schema, InternalOutputModes.Append)
+    val sink = new MemorySink(schema, OutputMode.Append)
 
     // Before adding data, check output
     assert(sink.latestBatchId === None)
@@ -71,7 +70,7 @@ class MemorySinkSuite extends StreamTest with BeforeAndAfter {
 
   test("directly add data in Update output mode") {
     implicit val schema = new StructType().add(new StructField("value", IntegerType))
-    val sink = new MemorySink(schema, InternalOutputModes.Update)
+    val sink = new MemorySink(schema, OutputMode.Update)
 
     // Before adding data, check output
     assert(sink.latestBatchId === None)
@@ -105,7 +104,7 @@ class MemorySinkSuite extends StreamTest with BeforeAndAfter {
 
   test("directly add data in Complete output mode") {
     implicit val schema = new StructType().add(new StructField("value", IntegerType))
-    val sink = new MemorySink(schema, InternalOutputModes.Complete)
+    val sink = new MemorySink(schema, OutputMode.Complete)
 
     // Before adding data, check output
     assert(sink.latestBatchId === None)
@@ -138,7 +137,7 @@ class MemorySinkSuite extends StreamTest with BeforeAndAfter {
   }
 
 
-  test("registering as a table in Append output mode") {
+  test("registering as a table in Append output mode - supported") {
     val input = MemoryStream[Int]
     val query = input.toDF().writeStream
       .format("memory")
@@ -161,7 +160,7 @@ class MemorySinkSuite extends StreamTest with BeforeAndAfter {
     query.stop()
   }
 
-  test("registering as a table in Complete output mode") {
+  test("registering as a table in Complete output mode - supported") {
     val input = MemoryStream[Int]
     val query = input.toDF()
       .groupBy("value")
@@ -187,9 +186,23 @@ class MemorySinkSuite extends StreamTest with BeforeAndAfter {
     query.stop()
   }
 
+  test("registering as a table in Update output mode - not supported") {
+    val input = MemoryStream[Int]
+    val df = input.toDF()
+      .groupBy("value")
+      .count()
+    intercept[AnalysisException] {
+      df.writeStream
+        .format("memory")
+        .outputMode("update")
+        .queryName("memStream")
+        .start()
+    }
+  }
+
   test("MemoryPlan statistics") {
     implicit val schema = new StructType().add(new StructField("value", IntegerType))
-    val sink = new MemorySink(schema, InternalOutputModes.Append)
+    val sink = new MemorySink(schema, OutputMode.Append)
     val plan = new MemoryPlan(sink)
 
     // Before adding data, check output
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
index bdfba9590b0a0..23f51ff11d903 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
@@ -19,15 +19,15 @@ package org.apache.spark.sql.streaming
 
 import java.{util => ju}
 import java.text.SimpleDateFormat
-import java.util.{Calendar, Date}
+import java.util.Date
 
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.functions.{count, window}
-import org.apache.spark.sql.InternalOutputModes.Complete
+import org.apache.spark.sql.streaming.OutputMode._
 
 class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Logging {
 
@@ -117,7 +117,7 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Loggin
     )
   }
 
-  test("append-mode watermark aggregation") {
+  test("append mode") {
     val inputData = MemoryStream[Int]
 
     val windowedAggregation = inputData.toDF()
@@ -129,11 +129,42 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Loggin
 
     testStream(windowedAggregation)(
       AddData(inputData, 10, 11, 12, 13, 14, 15),
-      CheckAnswer(),
-      AddData(inputData, 25), // Advance watermark to 15 seconds
-      CheckAnswer(),
-      AddData(inputData, 25), // Evict items less than previous watermark.
-      CheckAnswer((10, 5))
+      CheckLastBatch(),
+      AddData(inputData, 25),   // Advance watermark to 15 seconds
+      CheckLastBatch(),
+      assertNumStateRows(3),
+      AddData(inputData, 25),   // Emit items less than watermark and drop their state
+      CheckLastBatch((10, 5)),
+      assertNumStateRows(2),
+      AddData(inputData, 10),   // Should not emit anything as data less than watermark
+      CheckLastBatch(),
+      assertNumStateRows(2)
+    )
+  }
+
+  test("update mode") {
+    val inputData = MemoryStream[Int]
+    spark.conf.set("spark.sql.shuffle.partitions", "10")
+
+    val windowedAggregation = inputData.toDF()
+      .withColumn("eventTime", $"value".cast("timestamp"))
+      .withWatermark("eventTime", "10 seconds")
+      .groupBy(window($"eventTime", "5 seconds") as 'window)
+      .agg(count("*") as 'count)
+      .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
+
+    testStream(windowedAggregation, OutputMode.Update)(
+      AddData(inputData, 10, 11, 12, 13, 14, 15),
+      CheckLastBatch((10, 5), (15, 1)),
+      AddData(inputData, 25),     // Advance watermark to 15 seconds
+      CheckLastBatch((25, 1)),
+      assertNumStateRows(3),
+      AddData(inputData, 10, 25), // Ignore 10 as its less than watermark
+      CheckLastBatch((25, 2)),
+      assertNumStateRows(2),
+      AddData(inputData, 10),     // Should not emit anything as data less than watermark
+      CheckLastBatch(),
+      assertNumStateRows(2)
     )
   }
 
@@ -271,6 +302,12 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Loggin
     )
   }
 
+  private def assertNumStateRows(numTotalRows: Long): AssertOnQuery = AssertOnQuery { q =>
+    val progressWithData = q.recentProgress.filter(_.numInputRows > 0).lastOption.get
+    assert(progressWithData.stateOperators(0).numRowsTotal === numTotalRows)
+    true
+  }
+
   private def assertEventStats(body: ju.Map[String, String] => Unit): AssertOnQuery = {
     AssertOnQuery { q =>
       body(q.recentProgress.filter(_.numInputRows > 0).lastOption.get.eventTime)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
index 09613ef9e4348..688829ff927ac 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.streaming
 
-import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.{AnalysisException, DataFrame}
 import org.apache.spark.sql.execution.DataSourceScanExec
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.streaming.{MemoryStream, MetadataLogFileIndex}
@@ -210,6 +210,26 @@ class FileStreamSinkSuite extends StreamTest {
     }
   }
 
+  test("Update and Complete output mode not supported") {
+    val df = MemoryStream[Int].toDF().groupBy().count()
+    val outputDir = Utils.createTempDir(namePrefix = "stream.output").getCanonicalPath
+
+    withTempDir { dir =>
+
+      def testOutputMode(mode: String): Unit = {
+        val e = intercept[AnalysisException] {
+          df.writeStream.format("parquet").outputMode(mode).start(dir.getCanonicalPath)
+        }
+        Seq(mode, "not support").foreach { w =>
+          assert(e.getMessage.toLowerCase.contains(w))
+        }
+      }
+
+      testOutputMode("update")
+      testOutputMode("complete")
+    }
+  }
+
   test("parquet") {
     testFormat(None) // should not throw error as default format parquet when not specified
     testFormat(Some("parquet"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index 2d218f4754714..55d927a857747 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -899,7 +899,7 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
       // This is to avoid actually running a Spark job with 10000 tasks
       val df = files.filter("1 == 0").groupBy().count()
 
-      testStream(df, InternalOutputModes.Complete)(
+      testStream(df, OutputMode.Complete)(
         AddTextFileData("0", src, tmp),
         CheckAnswer(0)
       )
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
index 4a64054f63db8..b8fa82d9b4433 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -21,6 +21,7 @@ import scala.reflect.ClassTag
 import scala.util.control.ControlThrowable
 
 import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.sources.StreamSourceProvider
 import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
@@ -265,10 +266,9 @@ class StreamSuite extends StreamTest {
   }
 
   test("output mode API in Scala") {
-    val o1 = OutputMode.Append
-    assert(o1 === InternalOutputModes.Append)
-    val o2 = OutputMode.Complete
-    assert(o2 === InternalOutputModes.Complete)
+    assert(OutputMode.Append === InternalOutputModes.Append)
+    assert(OutputMode.Complete === InternalOutputModes.Complete)
+    assert(OutputMode.Update === InternalOutputModes.Update)
   }
 
   test("explain") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
index fbe560e8d9181..eca2647dea52b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
@@ -23,13 +23,13 @@ import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.InternalOutputModes._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.state.StateStore
 import org.apache.spark.sql.expressions.scalalang.typed
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.streaming.OutputMode._
 
 object FailureSinglton {
   var firstTime = true
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
index 9de3da34831c3..097dd6e3679e2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
@@ -23,13 +23,14 @@ import java.util.concurrent.TimeUnit
 import scala.concurrent.duration._
 
 import org.mockito.Mockito._
-import org.scalatest.BeforeAndAfter
+import org.scalatest.{BeforeAndAfter, PrivateMethodTester}
+import org.scalatest.PrivateMethodTester.PrivateMethod
 
 import org.apache.spark.sql._
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.{StreamSinkProvider, StreamSourceProvider}
-import org.apache.spark.sql.streaming.{OutputMode, ProcessingTime, StreamingQuery, StreamTest}
+import org.apache.spark.sql.streaming._
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
@@ -105,7 +106,7 @@ class DefaultSource extends StreamSourceProvider with StreamSinkProvider {
   }
 }
 
-class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
+class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter with PrivateMethodTester {
 
   private def newMetadataDir =
     Utils.createTempDir(namePrefix = "streaming.metadata").getCanonicalPath
@@ -388,19 +389,40 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
 
   private def newTextInput = Utils.createTempDir(namePrefix = "text").getCanonicalPath
 
-  test("check outputMode(string) throws exception on unsupported modes") {
-    def testError(outputMode: String): Unit = {
+  test("supported strings in outputMode(string)") {
+    val outputModeMethod = PrivateMethod[OutputMode]('outputMode)
+
+    def testMode(outputMode: String, expected: OutputMode): Unit = {
+      val df = spark.readStream
+        .format("org.apache.spark.sql.streaming.test")
+        .load()
+      val w = df.writeStream
+      w.outputMode(outputMode)
+      val setOutputMode = w invokePrivate outputModeMethod()
+      assert(setOutputMode === expected)
+    }
+
+    testMode("append", OutputMode.Append)
+    testMode("Append", OutputMode.Append)
+    testMode("complete", OutputMode.Complete)
+    testMode("Complete", OutputMode.Complete)
+    testMode("update", OutputMode.Update)
+    testMode("Update", OutputMode.Update)
+  }
+
+  test("unsupported strings in outputMode(string)") {
+    def testMode(outputMode: String): Unit = {
+      val acceptedModes = Seq("append", "update", "complete")
       val df = spark.readStream
         .format("org.apache.spark.sql.streaming.test")
         .load()
       val w = df.writeStream
       val e = intercept[IllegalArgumentException](w.outputMode(outputMode))
-      Seq("output mode", "unknown", outputMode).foreach { s =>
+      (Seq("output mode", "unknown", outputMode) ++ acceptedModes).foreach { s =>
         assert(e.getMessage.toLowerCase.contains(s.toLowerCase))
       }
     }
-    testError("Update")
-    testError("Xyz")
+    testMode("Xyz")
   }
 
   test("check foreach() catches null writers") {

From 021952d5808715d0b9d6c716f8b67cd550f7982e Mon Sep 17 00:00:00 2001
From: Takeshi YAMAMURO <linguin.m.s@gmail.com>
Date: Thu, 22 Dec 2016 01:53:33 +0100
Subject: [PATCH 0400/1204] [SPARK-18528][SQL] Fix a bug to initialise an
 iterator of aggregation buffer

## What changes were proposed in this pull request?
This pr is to fix an `NullPointerException` issue caused by a following `limit + aggregate` query;
```
scala> val df = Seq(("a", 1), ("b", 2), ("c", 1), ("d", 5)).toDF("id", "value")
scala> df.limit(2).groupBy("id").count().show
WARN TaskSetManager: Lost task 0.0 in stage 9.0 (TID 8204, lvsp20hdn012.stubprod.com): java.lang.NullPointerException
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.agg_doAggregateWithKeys$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source)
```
The root culprit is that [`$doAgg()`](https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala#L596) skips an initialization of [the buffer iterator](https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala#L603); `BaseLimitExec` sets `stopEarly=true` and `$doAgg()` exits in the middle without the initialization.

## How was this patch tested?
Added a test to check if no exception happens for limit + aggregates in `DataFrameAggregateSuite.scala`.

Author: Takeshi YAMAMURO <linguin.m.s@gmail.com>

Closes #15980 from maropu/SPARK-18528.

(cherry picked from commit b41ec997786e2be42a8a2a182212a610d08b221b)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../spark/sql/execution/BufferedRowIterator.java       | 10 ++++++++++
 .../spark/sql/execution/WholeStageCodegenExec.scala    |  2 +-
 .../scala/org/apache/spark/sql/execution/limit.scala   |  6 +++---
 .../org/apache/spark/sql/DataFrameAggregateSuite.scala |  8 ++++++++
 4 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/BufferedRowIterator.java b/sql/core/src/main/java/org/apache/spark/sql/execution/BufferedRowIterator.java
index 086547c793e3b..730a4ae8d5605 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/BufferedRowIterator.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/BufferedRowIterator.java
@@ -69,6 +69,16 @@ protected void append(InternalRow row) {
     currentRows.add(row);
   }
 
+  /**
+   * Returns whether this iterator should stop fetching next row from [[CodegenSupport#inputRDDs]].
+   *
+   * If it returns true, the caller should exit the loop that [[InputAdapter]] generates.
+   * This interface is mainly used to limit the number of input rows.
+   */
+  protected boolean stopEarly() {
+    return false;
+  }
+
   /**
    * Returns whether `processNext()` should stop processing next row from `input` or not.
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index 516b9d5444d31..2ead8f6baae6b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -241,7 +241,7 @@ case class InputAdapter(child: SparkPlan) extends UnaryExecNode with CodegenSupp
     ctx.addMutableState("scala.collection.Iterator", input, s"$input = inputs[0];")
     val row = ctx.freshName("row")
     s"""
-       | while ($input.hasNext()) {
+       | while ($input.hasNext() && !stopEarly()) {
        |   InternalRow $row = (InternalRow) $input.next();
        |   ${consume(ctx, null, row).trim}
        |   if (shouldStop()) return;
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
index 9918ac327f2dd..757fe2185d302 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
@@ -70,10 +70,10 @@ trait BaseLimitExec extends UnaryExecNode with CodegenSupport {
     val stopEarly = ctx.freshName("stopEarly")
     ctx.addMutableState("boolean", stopEarly, s"$stopEarly = false;")
 
-    ctx.addNewFunction("shouldStop", s"""
+    ctx.addNewFunction("stopEarly", s"""
       @Override
-      protected boolean shouldStop() {
-        return !currentRows.isEmpty() || $stopEarly;
+      protected boolean stopEarly() {
+        return $stopEarly;
       }
     """)
     val countTerm = ctx.freshName("count")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 7aa4f0026f275..645175900f931 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -513,4 +513,12 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
       df.groupBy($"x").agg(countDistinct($"y"), sort_array(collect_list($"z"))),
       Seq(Row(1, 2, Seq("a", "b")), Row(3, 2, Seq("c", "c", "d"))))
   }
+
+  test("SPARK-18004 limit + aggregates") {
+    val df = Seq(("a", 1), ("b", 2), ("c", 1), ("d", 5)).toDF("id", "value")
+    val limit2Df = df.limit(2)
+    checkAnswer(
+      limit2Df.groupBy("id").count().select($"id"),
+      limit2Df.select($"id"))
+  }
 }

From 9a3c5bd7082474cfb01f021aef103e44d12e2ff1 Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Wed, 21 Dec 2016 17:23:48 -0800
Subject: [PATCH 0401/1204] [FLAKY-TEST] InputStreamsSuite.socket input stream

## What changes were proposed in this pull request?

https://spark-tests.appspot.com/test-details?suite_name=org.apache.spark.streaming.InputStreamsSuite&test_name=socket+input+stream

## How was this patch tested?

Tested 2,000 times.

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #16343 from brkyvz/sock.

(cherry picked from commit afe36516e4b4031196ee2e0a04980ac49208ea6b)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../spark/streaming/InputStreamsSuite.scala   | 55 ++++++++-----------
 1 file changed, 23 insertions(+), 32 deletions(-)

diff --git a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
index 9ecfa48091a0e..6fb50a4052712 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
@@ -67,42 +67,33 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
         val expectedOutput = input.map(_.toString)
         for (i <- input.indices) {
           testServer.send(input(i).toString + "\n")
-          Thread.sleep(500)
           clock.advance(batchDuration.milliseconds)
         }
-        // Make sure we finish all batches before "stop"
-        if (!batchCounter.waitUntilBatchesCompleted(input.size, 30000)) {
-          fail("Timeout: cannot finish all batches in 30 seconds")
+
+        eventually(eventuallyTimeout) {
+          clock.advance(batchDuration.milliseconds)
+          // Verify whether data received was as expected
+          logInfo("--------------------------------")
+          logInfo("output.size = " + outputQueue.size)
+          logInfo("output")
+          outputQueue.asScala.foreach(x => logInfo("[" + x.mkString(",") + "]"))
+          logInfo("expected output.size = " + expectedOutput.size)
+          logInfo("expected output")
+          expectedOutput.foreach(x => logInfo("[" + x.mkString(",") + "]"))
+          logInfo("--------------------------------")
+
+          // Verify whether all the elements received are as expected
+          // (whether the elements were received one in each interval is not verified)
+          val output: Array[String] = outputQueue.asScala.flatMap(x => x).toArray
+          assert(output.length === expectedOutput.size)
+          for (i <- output.indices) {
+            assert(output(i) === expectedOutput(i))
+          }
         }
 
-        // Ensure progress listener has been notified of all events
-        ssc.sparkContext.listenerBus.waitUntilEmpty(500)
-
-        // Verify all "InputInfo"s have been reported
-        assert(ssc.progressListener.numTotalReceivedRecords === input.size)
-        assert(ssc.progressListener.numTotalProcessedRecords === input.size)
-
-        logInfo("Stopping server")
-        testServer.stop()
-        logInfo("Stopping context")
-        ssc.stop()
-
-        // Verify whether data received was as expected
-        logInfo("--------------------------------")
-        logInfo("output.size = " + outputQueue.size)
-        logInfo("output")
-        outputQueue.asScala.foreach(x => logInfo("[" + x.mkString(",") + "]"))
-        logInfo("expected output.size = " + expectedOutput.size)
-        logInfo("expected output")
-        expectedOutput.foreach(x => logInfo("[" + x.mkString(",") + "]"))
-        logInfo("--------------------------------")
-
-        // Verify whether all the elements received are as expected
-        // (whether the elements were received one in each interval is not verified)
-        val output: Array[String] = outputQueue.asScala.flatMap(x => x).toArray
-        assert(output.length === expectedOutput.size)
-        for (i <- output.indices) {
-          assert(output(i) === expectedOutput(i))
+        eventually(eventuallyTimeout) {
+          assert(ssc.progressListener.numTotalReceivedRecords === input.length)
+          assert(ssc.progressListener.numTotalProcessedRecords === input.length)
         }
       }
     }

From 07e2a17d1cb7eade93d482d18a2079e9e6f40f57 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 21 Dec 2016 22:02:57 -0800
Subject: [PATCH 0402/1204] [SPARK-18908][SS] Creating StreamingQueryException
 should check if logicalPlan is created

## What changes were proposed in this pull request?

This PR audits places using `logicalPlan` in StreamExecution and ensures they all handles the case that `logicalPlan` cannot be created.

In addition, this PR also fixes the following issues in `StreamingQueryException`:
- `StreamingQueryException` and `StreamExecution` are cycle-dependent because in the `StreamingQueryException`'s constructor, it calls `StreamExecution`'s `toDebugString` which uses `StreamingQueryException`. Hence it will output `null` value in the error message.
- Duplicated stack trace when calling Throwable.printStackTrace because StreamingQueryException's toString contains the stack trace.

## How was this patch tested?

The updated `test("max files per trigger - incorrect values")`. I found this issue when I switched from `testStream` to the real codes to verify the failure in this test.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16322 from zsxwing/SPARK-18907.

(cherry picked from commit ff7d82a207e8bef7779c27378f7a50a138627341)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../execution/streaming/StreamExecution.scala | 141 ++++++++++++------
 .../streaming/StreamingQueryException.scala   |  28 +---
 .../sql/streaming/FileStreamSourceSuite.scala |  39 +++--
 .../spark/sql/streaming/StreamSuite.scala     |   3 +-
 .../spark/sql/streaming/StreamTest.scala      |  52 +++++--
 .../StreamingQueryListenerSuite.scala         |   2 +-
 .../sql/streaming/StreamingQuerySuite.scala   |   4 +-
 7 files changed, 165 insertions(+), 104 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index e05200df50849..a35950e2dc17f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -31,7 +31,6 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, CurrentBatchTimestamp, CurrentDate, CurrentTimestamp}
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
-import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.command.ExplainCommand
 import org.apache.spark.sql.streaming._
@@ -67,6 +66,7 @@ class StreamExecution(
   private val awaitBatchLock = new ReentrantLock(true)
   private val awaitBatchLockCondition = awaitBatchLock.newCondition()
 
+  private val initializationLatch = new CountDownLatch(1)
   private val startLatch = new CountDownLatch(1)
   private val terminationLatch = new CountDownLatch(1)
 
@@ -118,9 +118,22 @@ class StreamExecution(
   private val prettyIdString =
     Option(name).map(_ + " ").getOrElse("") + s"[id = $id, runId = $runId]"
 
+  /**
+   * All stream sources present in the query plan. This will be set when generating logical plan.
+   */
+  @volatile protected var sources: Seq[Source] = Seq.empty
+
+  /**
+   * A list of unique sources in the query plan. This will be set when generating logical plan.
+   */
+  @volatile private var uniqueSources: Seq[Source] = Seq.empty
+
   override lazy val logicalPlan: LogicalPlan = {
+    assert(microBatchThread eq Thread.currentThread,
+      "logicalPlan must be initialized in StreamExecutionThread " +
+        s"but the current thread was ${Thread.currentThread}")
     var nextSourceId = 0L
-    analyzedPlan.transform {
+    val _logicalPlan = analyzedPlan.transform {
       case StreamingRelation(dataSource, _, output) =>
         // Materialize source to avoid creating it in every batch
         val metadataPath = s"$checkpointRoot/sources/$nextSourceId"
@@ -130,22 +143,18 @@ class StreamExecution(
         // "df.logicalPlan" has already used attributes of the previous `output`.
         StreamingExecutionRelation(source, output)
     }
+    sources = _logicalPlan.collect { case s: StreamingExecutionRelation => s.source }
+    uniqueSources = sources.distinct
+    _logicalPlan
   }
 
-  /** All stream sources present in the query plan. */
-  protected lazy val sources =
-    logicalPlan.collect { case s: StreamingExecutionRelation => s.source }
-
-  /** A list of unique sources in the query plan. */
-  private lazy val uniqueSources = sources.distinct
-
   private val triggerExecutor = trigger match {
     case t: ProcessingTime => ProcessingTimeExecutor(t, triggerClock)
   }
 
   /** Defines the internal state of execution */
   @volatile
-  private var state: State = INITIALIZED
+  private var state: State = INITIALIZING
 
   @volatile
   var lastExecution: QueryExecution = _
@@ -186,8 +195,11 @@ class StreamExecution(
    */
   val offsetLog = new OffsetSeqLog(sparkSession, checkpointFile("offsets"))
 
+  /** Whether all fields of the query have been initialized */
+  private def isInitialized: Boolean = state != INITIALIZING
+
   /** Whether the query is currently active or not */
-  override def isActive: Boolean = state == ACTIVE
+  override def isActive: Boolean = state != TERMINATED
 
   /** Returns the [[StreamingQueryException]] if the query was terminated by an exception. */
   override def exception: Option[StreamingQueryException] = Option(streamDeathCause)
@@ -216,9 +228,6 @@ class StreamExecution(
    */
   private def runBatches(): Unit = {
     try {
-      // Mark ACTIVE and then post the event. QueryStarted event is synchronously sent to listeners,
-      // so must mark this as ACTIVE first.
-      state = ACTIVE
       if (sparkSession.sessionState.conf.streamingMetricsEnabled) {
         sparkSession.sparkContext.env.metricsSystem.registerSource(streamMetrics)
       }
@@ -235,6 +244,9 @@ class StreamExecution(
       updateStatusMessage("Initializing sources")
       // force initialization of the logical plan so that the sources can be created
       logicalPlan
+      state = ACTIVE
+      // Unblock `awaitInitialization`
+      initializationLatch.countDown()
 
       triggerExecutor.execute(() => {
         startTrigger()
@@ -282,7 +294,7 @@ class StreamExecution(
         updateStatusMessage("Stopped")
       case e: Throwable =>
         streamDeathCause = new StreamingQueryException(
-          this,
+          toDebugString(includeLogicalPlan = isInitialized),
           s"Query $prettyIdString terminated with exception: ${e.getMessage}",
           e,
           committedOffsets.toOffsetSeq(sources, offsetSeqMetadata).toString,
@@ -295,17 +307,25 @@ class StreamExecution(
           throw e
         }
     } finally {
-      state = TERMINATED
-      currentStatus = status.copy(isTriggerActive = false, isDataAvailable = false)
+      // Release latches to unblock the user codes since exception can happen in any place and we
+      // may not get a chance to release them
+      startLatch.countDown()
+      initializationLatch.countDown()
 
-      // Update metrics and status
-      sparkSession.sparkContext.env.metricsSystem.removeSource(streamMetrics)
+      try {
+        state = TERMINATED
+        currentStatus = status.copy(isTriggerActive = false, isDataAvailable = false)
 
-      // Notify others
-      sparkSession.streams.notifyQueryTermination(StreamExecution.this)
-      postEvent(
-       new QueryTerminatedEvent(id, runId, exception.map(_.cause).map(Utils.exceptionString)))
-      terminationLatch.countDown()
+        // Update metrics and status
+        sparkSession.sparkContext.env.metricsSystem.removeSource(streamMetrics)
+
+        // Notify others
+        sparkSession.streams.notifyQueryTermination(StreamExecution.this)
+        postEvent(
+          new QueryTerminatedEvent(id, runId, exception.map(_.cause).map(Utils.exceptionString)))
+      } finally {
+        terminationLatch.countDown()
+      }
     }
   }
 
@@ -537,6 +557,7 @@ class StreamExecution(
    * least the given `Offset`. This method is intended for use primarily when writing tests.
    */
   private[sql] def awaitOffset(source: Source, newOffset: Offset): Unit = {
+    assertAwaitThread()
     def notDone = {
       val localCommittedOffsets = committedOffsets
       !localCommittedOffsets.contains(source) || localCommittedOffsets(source) != newOffset
@@ -559,7 +580,38 @@ class StreamExecution(
   /** A flag to indicate that a batch has completed with no new data available. */
   @volatile private var noNewData = false
 
+  /**
+   * Assert that the await APIs should not be called in the stream thread. Otherwise, it may cause
+   * dead-lock, e.g., calling any await APIs in `StreamingQueryListener.onQueryStarted` will block
+   * the stream thread forever.
+   */
+  private def assertAwaitThread(): Unit = {
+    if (microBatchThread eq Thread.currentThread) {
+      throw new IllegalStateException(
+        "Cannot wait for a query state from the same thread that is running the query")
+    }
+  }
+
+  /**
+   * Await until all fields of the query have been initialized.
+   */
+  def awaitInitialization(timeoutMs: Long): Unit = {
+    assertAwaitThread()
+    require(timeoutMs > 0, "Timeout has to be positive")
+    if (streamDeathCause != null) {
+      throw streamDeathCause
+    }
+    initializationLatch.await(timeoutMs, TimeUnit.MILLISECONDS)
+    if (streamDeathCause != null) {
+      throw streamDeathCause
+    }
+  }
+
   override def processAllAvailable(): Unit = {
+    assertAwaitThread()
+    if (streamDeathCause != null) {
+      throw streamDeathCause
+    }
     awaitBatchLock.lock()
     try {
       noNewData = false
@@ -578,9 +630,7 @@ class StreamExecution(
   }
 
   override def awaitTermination(): Unit = {
-    if (state == INITIALIZED) {
-      throw new IllegalStateException("Cannot wait for termination on a query that has not started")
-    }
+    assertAwaitThread()
     terminationLatch.await()
     if (streamDeathCause != null) {
       throw streamDeathCause
@@ -588,9 +638,7 @@ class StreamExecution(
   }
 
   override def awaitTermination(timeoutMs: Long): Boolean = {
-    if (state == INITIALIZED) {
-      throw new IllegalStateException("Cannot wait for termination on a query that has not started")
-    }
+    assertAwaitThread()
     require(timeoutMs > 0, "Timeout has to be positive")
     terminationLatch.await(timeoutMs, TimeUnit.MILLISECONDS)
     if (streamDeathCause != null) {
@@ -623,27 +671,24 @@ class StreamExecution(
     s"Streaming Query $prettyIdString [state = $state]"
   }
 
-  def toDebugString: String = {
-    val deathCauseStr = if (streamDeathCause != null) {
-      "Error:\n" + stackTraceToString(streamDeathCause.cause)
-    } else ""
-    s"""
-       |=== Streaming Query ===
-       |Identifier: $prettyIdString
-       |Current Offsets: $committedOffsets
-       |
-       |Current State: $state
-       |Thread State: ${microBatchThread.getState}
-       |
-       |Logical Plan:
-       |$logicalPlan
-       |
-       |$deathCauseStr
-     """.stripMargin
+  private def toDebugString(includeLogicalPlan: Boolean): String = {
+    val debugString =
+      s"""|=== Streaming Query ===
+          |Identifier: $prettyIdString
+          |Current Committed Offsets: $committedOffsets
+          |Current Available Offsets: $availableOffsets
+          |
+          |Current State: $state
+          |Thread State: ${microBatchThread.getState}""".stripMargin
+    if (includeLogicalPlan) {
+      debugString + s"\n\nLogical Plan:\n$logicalPlan"
+    } else {
+      debugString
+    }
   }
 
   trait State
-  case object INITIALIZED extends State
+  case object INITIALIZING extends State
   case object ACTIVE extends State
   case object TERMINATED extends State
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala
index a96150aa89920..c53c29591a0ba 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.streaming
 
 import org.apache.spark.annotation.Experimental
-import org.apache.spark.sql.execution.streaming.{Offset, OffsetSeq, StreamExecution}
 
 /**
  * :: Experimental ::
@@ -31,35 +30,18 @@ import org.apache.spark.sql.execution.streaming.{Offset, OffsetSeq, StreamExecut
  * @since 2.0.0
  */
 @Experimental
-class StreamingQueryException private(
-    causeString: String,
+class StreamingQueryException private[sql](
+    private val queryDebugString: String,
     val message: String,
     val cause: Throwable,
     val startOffset: String,
     val endOffset: String)
   extends Exception(message, cause) {
 
-  private[sql] def this(
-      query: StreamingQuery,
-      message: String,
-      cause: Throwable,
-      startOffset: String,
-      endOffset: String) {
-    this(
-      // scalastyle:off
-      s"""${classOf[StreamingQueryException].getName}: ${cause.getMessage} ${cause.getStackTrace.take(10).mkString("", "\n|\t", "\n")}
-         |
-         |${query.asInstanceOf[StreamExecution].toDebugString}
-         """.stripMargin,
-      // scalastyle:on
-      message,
-      cause,
-      startOffset,
-      endOffset)
-  }
-
   /** Time when the exception occurred */
   val time: Long = System.currentTimeMillis
 
-  override def toString(): String = causeString
+  override def toString(): String =
+    s"""${classOf[StreamingQueryException].getName}: ${cause.getMessage}
+       |$queryDebugString""".stripMargin
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index 55d927a857747..8a9fa94bea601 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -815,21 +815,31 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
   }
 
   test("max files per trigger - incorrect values") {
-    withTempDir { case src =>
-      def testMaxFilePerTriggerValue(value: String): Unit = {
-        val df = spark.readStream.option("maxFilesPerTrigger", value).text(src.getCanonicalPath)
-        val e = intercept[IllegalArgumentException] {
-          testStream(df)()
-        }
-        Seq("maxFilesPerTrigger", value, "positive integer").foreach { s =>
-          assert(e.getMessage.contains(s))
+    val testTable = "maxFilesPerTrigger_test"
+    withTable(testTable) {
+      withTempDir { case src =>
+        def testMaxFilePerTriggerValue(value: String): Unit = {
+          val df = spark.readStream.option("maxFilesPerTrigger", value).text(src.getCanonicalPath)
+          val e = intercept[StreamingQueryException] {
+            // Note: `maxFilesPerTrigger` is checked in the stream thread when creating the source
+            val q = df.writeStream.format("memory").queryName(testTable).start()
+            try {
+              q.processAllAvailable()
+            } finally {
+              q.stop()
+            }
+          }
+          assert(e.getCause.isInstanceOf[IllegalArgumentException])
+          Seq("maxFilesPerTrigger", value, "positive integer").foreach { s =>
+            assert(e.getMessage.contains(s))
+          }
         }
-      }
 
-      testMaxFilePerTriggerValue("not-a-integer")
-      testMaxFilePerTriggerValue("-1")
-      testMaxFilePerTriggerValue("0")
-      testMaxFilePerTriggerValue("10.1")
+        testMaxFilePerTriggerValue("not-a-integer")
+        testMaxFilePerTriggerValue("-1")
+        testMaxFilePerTriggerValue("0")
+        testMaxFilePerTriggerValue("10.1")
+      }
     }
   }
 
@@ -1202,7 +1212,8 @@ class FileStreamSourceStressTestSuite extends FileStreamSourceTest {
   }
 }
 
-/** Fake FileSystem to test whether the method `fs.exists` is called during
+/**
+ * Fake FileSystem to test whether the method `fs.exists` is called during
  * `DataSource.resolveRelation`.
  */
 class ExistsThrowsExceptionFileSystem extends RawLocalFileSystem {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
index b8fa82d9b4433..34b0ee8064c3f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -259,8 +259,9 @@ class StreamSuite extends StreamTest {
         override def stop(): Unit = {}
       }
       val df = Dataset[Int](sqlContext.sparkSession, StreamingExecutionRelation(source))
+      // These error are fatal errors and should be ignored in `testStream` to not fail the test.
       testStream(df)(
-        ExpectFailure()(ClassTag(e.getClass))
+        ExpectFailure(isFatalError = true)(ClassTag(e.getClass))
       )
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index 6fbbbb1f8e038..709050d29bb00 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -167,10 +167,17 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
   /** Advance the trigger clock's time manually. */
   case class AdvanceManualClock(timeToAdd: Long) extends StreamAction
 
-  /** Signals that a failure is expected and should not kill the test. */
-  case class ExpectFailure[T <: Throwable : ClassTag]() extends StreamAction {
+  /**
+   * Signals that a failure is expected and should not kill the test.
+   *
+   * @param isFatalError if this is a fatal error. If so, the error should also be caught by
+   *                     UncaughtExceptionHandler.
+   */
+  case class ExpectFailure[T <: Throwable : ClassTag](
+      isFatalError: Boolean = false) extends StreamAction {
     val causeClass: Class[T] = implicitly[ClassTag[T]].runtimeClass.asInstanceOf[Class[T]]
-    override def toString(): String = s"ExpectFailure[${causeClass.getName}]"
+    override def toString(): String =
+      s"ExpectFailure[${causeClass.getName}, isFatalError: $isFatalError]"
   }
 
   /** Assert that a body is true */
@@ -240,7 +247,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
     val resetConfValues = mutable.Map[String, Option[String]]()
 
     @volatile
-    var streamDeathCause: Throwable = null
+    var streamThreadDeathCause: Throwable = null
 
     // If the test doesn't manually start the stream, we do it automatically at the beginning.
     val startedManually =
@@ -271,7 +278,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
          |Output Mode: $outputMode
          |Stream state: $currentOffsets
          |Thread state: $threadState
-         |${if (streamDeathCause != null) stackTraceToString(streamDeathCause) else ""}
+         |${if (streamThreadDeathCause != null) stackTraceToString(streamThreadDeathCause) else ""}
          |
          |== Sink ==
          |${sink.toDebugString}
@@ -360,9 +367,12 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
             currentStream.microBatchThread.setUncaughtExceptionHandler(
               new UncaughtExceptionHandler {
                 override def uncaughtException(t: Thread, e: Throwable): Unit = {
-                  streamDeathCause = e
+                  streamThreadDeathCause = e
                 }
               })
+            // Wait until the initialization finishes, because some tests need to use `logicalPlan`
+            // after starting the query.
+            currentStream.awaitInitialization(streamingTimeout.toMillis)
 
           case AdvanceManualClock(timeToAdd) =>
             verify(currentStream != null,
@@ -396,8 +406,9 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
                   currentStream.exception.map(_.toString()).getOrElse(""))
             } catch {
               case _: InterruptedException =>
-              case _: org.scalatest.exceptions.TestFailedDueToTimeoutException =>
-                failTest("Timed out while stopping and waiting for microbatchthread to terminate.")
+              case e: org.scalatest.exceptions.TestFailedDueToTimeoutException =>
+                failTest(
+                  "Timed out while stopping and waiting for microbatchthread to terminate.", e)
               case t: Throwable =>
                 failTest("Error while stopping stream", t)
             } finally {
@@ -421,16 +432,24 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
               verify(exception.cause.getClass === ef.causeClass,
                 "incorrect cause in exception returned by query.exception()\n" +
                   s"\tExpected: ${ef.causeClass}\n\tReturned: ${exception.cause.getClass}")
+              if (ef.isFatalError) {
+                // This is a fatal error, `streamThreadDeathCause` should be set to this error in
+                // UncaughtExceptionHandler.
+                verify(streamThreadDeathCause != null &&
+                  streamThreadDeathCause.getClass === ef.causeClass,
+                  "UncaughtExceptionHandler didn't receive the correct error\n" +
+                    s"\tExpected: ${ef.causeClass}\n\tReturned: $streamThreadDeathCause")
+                streamThreadDeathCause = null
+              }
             } catch {
               case _: InterruptedException =>
-              case _: org.scalatest.exceptions.TestFailedDueToTimeoutException =>
-                failTest("Timed out while waiting for failure")
+              case e: org.scalatest.exceptions.TestFailedDueToTimeoutException =>
+                failTest("Timed out while waiting for failure", e)
               case t: Throwable =>
                 failTest("Error while checking stream failure", t)
             } finally {
               lastStream = currentStream
               currentStream = null
-              streamDeathCause = null
             }
 
           case a: AssertOnQuery =>
@@ -508,11 +527,14 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
         }
         pos += 1
       }
+      if (streamThreadDeathCause != null) {
+        failTest("Stream Thread Died", streamThreadDeathCause)
+      }
     } catch {
-      case _: InterruptedException if streamDeathCause != null =>
-        failTest("Stream Thread Died")
-      case _: org.scalatest.exceptions.TestFailedDueToTimeoutException =>
-        failTest("Timed out waiting for stream")
+      case _: InterruptedException if streamThreadDeathCause != null =>
+        failTest("Stream Thread Died", streamThreadDeathCause)
+      case e: org.scalatest.exceptions.TestFailedDueToTimeoutException =>
+        failTest("Timed out waiting for stream", e)
     } finally {
       if (currentStream != null && currentStream.microBatchThread.isAlive) {
         currentStream.stop()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
index a057d1d36c5a8..4596aa1d348e3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
@@ -111,7 +111,7 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
         StartStream(ProcessingTime(100), triggerClock = clock),
         AddData(inputData, 0),
         AdvanceManualClock(100),
-        ExpectFailure[SparkException],
+        ExpectFailure[SparkException](),
         AssertOnQuery { query =>
           eventually(Timeout(streamingTimeout)) {
             assert(listener.terminationEvent !== null)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index 6c4bb35ccb2a6..1525ad5fd5178 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -142,7 +142,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
       StartStream(),
       AssertOnQuery(_.isActive === true),
       AddData(inputData, 0),
-      ExpectFailure[SparkException],
+      ExpectFailure[SparkException](),
       AssertOnQuery(_.isActive === false),
       TestAwaitTermination(ExpectException[SparkException]),
       TestAwaitTermination(ExpectException[SparkException], timeoutMs = 2000),
@@ -306,7 +306,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
       StartStream(ProcessingTime(100), triggerClock = clock),
       AddData(inputData, 0),
       AdvanceManualClock(100),
-      ExpectFailure[SparkException],
+      ExpectFailure[SparkException](),
       AssertOnQuery(_.status.isDataAvailable === false),
       AssertOnQuery(_.status.isTriggerActive === false),
       AssertOnQuery(_.status.message.startsWith("Terminated with exception"))

From def3690f6889979226478bf9c35a240d7e0662e6 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Thu, 22 Dec 2016 15:29:56 +0800
Subject: [PATCH 0403/1204] [SQL] Minor readability improvement for partition
 handling code

This patch includes minor changes to improve readability for partition handling code. I'm in the middle of implementing some new feature and found some naming / implicit type inference not as intuitive.

This patch should have no semantic change and the changes should be covered by existing test cases.

Author: Reynold Xin <rxin@databricks.com>

Closes #16378 from rxin/minor-fix.

(cherry picked from commit 7c5b7b3a2e5a7c1b2d0d8ce655840cad581e47ac)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../sql/execution/DataSourceScanExec.scala    |  7 +-
 .../datasources/CatalogFileIndex.scala        | 11 +--
 .../execution/datasources/FileFormat.scala    |  3 +-
 .../datasources/FileStatusCache.scala         | 72 ++++++++++---------
 4 files changed, 49 insertions(+), 44 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index e485b52b43f76..76161643976aa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -136,7 +136,7 @@ case class RowDataSourceScanExec(
  * @param outputSchema Output schema of the scan.
  * @param partitionFilters Predicates to use for partition pruning.
  * @param dataFilters Data source filters to use for filtering data within partitions.
- * @param metastoreTableIdentifier
+ * @param metastoreTableIdentifier identifier for the table in the metastore.
  */
 case class FileSourceScanExec(
     @transient relation: HadoopFsRelation,
@@ -147,10 +147,10 @@ case class FileSourceScanExec(
     override val metastoreTableIdentifier: Option[TableIdentifier])
   extends DataSourceScanExec {
 
-  val supportsBatch = relation.fileFormat.supportBatch(
+  val supportsBatch: Boolean = relation.fileFormat.supportBatch(
     relation.sparkSession, StructType.fromAttributes(output))
 
-  val needsUnsafeRowConversion = if (relation.fileFormat.isInstanceOf[ParquetSource]) {
+  val needsUnsafeRowConversion: Boolean = if (relation.fileFormat.isInstanceOf[ParquetSource]) {
     SparkSession.getActiveSession.get.sessionState.conf.parquetVectorizedReaderEnabled
   } else {
     false
@@ -516,7 +516,6 @@ case class FileSourceScanExec(
     }
 
     // Assign files to partitions using "First Fit Decreasing" (FFD)
-    // TODO: consider adding a slop factor here?
     splitFiles.foreach { file =>
       if (currentSize + file.length > maxSplitBytes) {
         closePartition()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/CatalogFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/CatalogFileIndex.scala
index 4ad91dcceb432..1235a4b12f1d0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/CatalogFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/CatalogFileIndex.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.execution.datasources
 
+import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.sql.SparkSession
@@ -37,14 +38,15 @@ class CatalogFileIndex(
     val table: CatalogTable,
     override val sizeInBytes: Long) extends FileIndex {
 
-  protected val hadoopConf = sparkSession.sessionState.newHadoopConf
+  protected val hadoopConf: Configuration = sparkSession.sessionState.newHadoopConf()
 
-  private val fileStatusCache = FileStatusCache.newCache(sparkSession)
+  /** Globally shared (not exclusive to this table) cache for file statuses to speed up listing. */
+  private val fileStatusCache = FileStatusCache.getOrCreate(sparkSession)
 
   assert(table.identifier.database.isDefined,
     "The table identifier must be qualified in CatalogFileIndex")
 
-  private val baseLocation = table.storage.locationUri
+  private val baseLocation: Option[String] = table.storage.locationUri
 
   override def partitionSchema: StructType = table.partitionSchema
 
@@ -76,7 +78,8 @@ class CatalogFileIndex(
       new PrunedInMemoryFileIndex(
         sparkSession, new Path(baseLocation.get), fileStatusCache, partitionSpec)
     } else {
-      new InMemoryFileIndex(sparkSession, rootPaths, table.storage.properties, None)
+      new InMemoryFileIndex(
+        sparkSession, rootPaths, table.storage.properties, partitionSchema = None)
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
index 4f4aaaa5026fb..6784ee243c93c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
@@ -148,7 +148,8 @@ trait FileFormat {
  * The base class file format that is based on text file.
  */
 abstract class TextBasedFileFormat extends FileFormat {
-  private var codecFactory: CompressionCodecFactory = null
+  private var codecFactory: CompressionCodecFactory = _
+
   override def isSplitable(
       sparkSession: SparkSession,
       options: Map[String, String],
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileStatusCache.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileStatusCache.scala
index 7c2e6fd04d5db..5d97558633146 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileStatusCache.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileStatusCache.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.execution.datasources
 
-import java.util.concurrent.ConcurrentHashMap
 import java.util.concurrent.atomic.AtomicBoolean
 
 import scala.collection.JavaConverters._
@@ -26,9 +25,38 @@ import com.google.common.cache._
 import org.apache.hadoop.fs.{FileStatus, Path}
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.metrics.source.HiveCatalogMetrics
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.util.{SerializableConfiguration, SizeEstimator}
+import org.apache.spark.util.SizeEstimator
+
+
+/**
+ * Use [[FileStatusCache.getOrCreate()]] to construct a globally shared file status cache.
+ */
+object FileStatusCache {
+  private var sharedCache: SharedInMemoryCache = _
+
+  /**
+   * @return a new FileStatusCache based on session configuration. Cache memory quota is
+   *         shared across all clients.
+   */
+  def getOrCreate(session: SparkSession): FileStatusCache = synchronized {
+    if (session.sqlContext.conf.manageFilesourcePartitions &&
+      session.sqlContext.conf.filesourcePartitionFileCacheSize > 0) {
+      if (sharedCache == null) {
+        sharedCache = new SharedInMemoryCache(
+          session.sqlContext.conf.filesourcePartitionFileCacheSize)
+      }
+      sharedCache.createForNewClient()
+    } else {
+      NoopCache
+    }
+  }
+
+  def resetForTesting(): Unit = synchronized {
+    sharedCache = null
+  }
+}
+
 
 /**
  * A cache of the leaf files of partition directories. We cache these files in order to speed
@@ -55,32 +83,6 @@ abstract class FileStatusCache {
   def invalidateAll(): Unit
 }
 
-object FileStatusCache {
-  private var sharedCache: SharedInMemoryCache = null
-
-  /**
-   * @return a new FileStatusCache based on session configuration. Cache memory quota is
-   *         shared across all clients.
-   */
-  def newCache(session: SparkSession): FileStatusCache = {
-    synchronized {
-      if (session.sqlContext.conf.manageFilesourcePartitions &&
-          session.sqlContext.conf.filesourcePartitionFileCacheSize > 0) {
-        if (sharedCache == null) {
-          sharedCache = new SharedInMemoryCache(
-            session.sqlContext.conf.filesourcePartitionFileCacheSize)
-        }
-        sharedCache.getForNewClient()
-      } else {
-        NoopCache
-      }
-    }
-  }
-
-  def resetForTesting(): Unit = synchronized {
-    sharedCache = null
-  }
-}
 
 /**
  * An implementation that caches partition file statuses in memory.
@@ -88,7 +90,6 @@ object FileStatusCache {
  * @param maxSizeInBytes max allowable cache size before entries start getting evicted
  */
 private class SharedInMemoryCache(maxSizeInBytes: Long) extends Logging {
-  import FileStatusCache._
 
   // Opaque object that uniquely identifies a shared cache user
   private type ClientId = Object
@@ -102,8 +103,9 @@ private class SharedInMemoryCache(maxSizeInBytes: Long) extends Logging {
         (SizeEstimator.estimate(key) + SizeEstimator.estimate(value)).toInt
       }})
     .removalListener(new RemovalListener[(ClientId, Path), Array[FileStatus]]() {
-      override def onRemoval(removed: RemovalNotification[(ClientId, Path), Array[FileStatus]]) = {
-        if (removed.getCause() == RemovalCause.SIZE &&
+      override def onRemoval(removed: RemovalNotification[(ClientId, Path), Array[FileStatus]])
+        : Unit = {
+        if (removed.getCause == RemovalCause.SIZE &&
             warnedAboutEviction.compareAndSet(false, true)) {
           logWarning(
             "Evicting cached table partition metadata from memory due to size constraints " +
@@ -112,13 +114,13 @@ private class SharedInMemoryCache(maxSizeInBytes: Long) extends Logging {
         }
       }})
     .maximumWeight(maxSizeInBytes)
-    .build()
+    .build[(ClientId, Path), Array[FileStatus]]()
 
   /**
    * @return a FileStatusCache that does not share any entries with any other client, but does
    *         share memory resources for the purpose of cache eviction.
    */
-  def getForNewClient(): FileStatusCache = new FileStatusCache {
+  def createForNewClient(): FileStatusCache = new FileStatusCache {
     val clientId = new Object()
 
     override def getLeafFiles(path: Path): Option[Array[FileStatus]] = {
@@ -126,7 +128,7 @@ private class SharedInMemoryCache(maxSizeInBytes: Long) extends Logging {
     }
 
     override def putLeafFiles(path: Path, leafFiles: Array[FileStatus]): Unit = {
-      cache.put((clientId, path), leafFiles.toArray)
+      cache.put((clientId, path), leafFiles)
     }
 
     override def invalidateAll(): Unit = {

From ec0d6e21ed85164fd7eb519ec1d017497122c55c Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Wed, 21 Dec 2016 23:46:33 -0800
Subject: [PATCH 0404/1204] [DOC] bucketing is applicable to all file-based
 data sources

## What changes were proposed in this pull request?
Starting Spark 2.1.0, bucketing feature is available for all file-based data sources. This patch fixes some function docs that haven't yet been updated to reflect that.

## How was this patch tested?
N/A

Author: Reynold Xin <rxin@databricks.com>

Closes #16349 from rxin/ds-doc.

(cherry picked from commit 2e861df96eacd821edbbd9883121bff67611074f)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../main/scala/org/apache/spark/sql/DataFrameWriter.scala   | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index fa8e8cb985ef5..44c407d9cd695 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -150,7 +150,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    * predicates on the partitioned columns. In order for partitioning to work well, the number
    * of distinct values in each column should typically be less than tens of thousands.
    *
-   * This was initially applicable for Parquet but in 1.5+ covers JSON, text, ORC and avro as well.
+   * This is applicable for all file-based data sources (e.g. Parquet, JSON) staring Spark 2.1.0.
    *
    * @since 1.4.0
    */
@@ -164,7 +164,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    * Buckets the output by the given columns. If specified, the output is laid out on the file
    * system similar to Hive's bucketing scheme.
    *
-   * This is applicable for Parquet, JSON and ORC.
+   * This is applicable for all file-based data sources (e.g. Parquet, JSON) staring Spark 2.1.0.
    *
    * @since 2.0
    */
@@ -178,7 +178,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
   /**
    * Sorts the output in each bucket by the given columns.
    *
-   * This is applicable for Parquet, JSON and ORC.
+   * This is applicable for all file-based data sources (e.g. Parquet, JSON) staring Spark 2.1.0.
    *
    * @since 2.0
    */

From f6853b3e5a068c1bc972eae2370d8bd94026d682 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Thu, 22 Dec 2016 19:35:09 +0100
Subject: [PATCH 0405/1204] [SPARK-18973][SQL] Remove SortPartitions and
 RedistributeData

## What changes were proposed in this pull request?
SortPartitions and RedistributeData logical operators are not actually used and can be removed. Note that we do have a Sort operator (with global flag false) that subsumed SortPartitions.

## How was this patch tested?
Also updated test cases to reflect the removal.

Author: Reynold Xin <rxin@databricks.com>

Closes #16381 from rxin/SPARK-18973.

(cherry picked from commit 2615100055860faa5f74d3711d4d15ebae6aba25)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  2 +-
 .../UnsupportedOperationChecker.scala         |  2 +-
 .../sql/catalyst/optimizer/Optimizer.scala    |  2 +-
 .../sql/catalyst/optimizer/expressions.scala  |  2 +-
 .../plans/logical/basicLogicalOperators.scala | 22 +++++++++
 .../catalyst/plans/logical/partitioning.scala | 49 -------------------
 .../analysis/UnsupportedOperationsSuite.scala |  1 -
 .../spark/sql/execution/SparkStrategies.scala |  4 --
 8 files changed, 26 insertions(+), 58 deletions(-)
 delete mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/partitioning.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 9ca990144fc23..f17c37256c9e5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1057,7 +1057,7 @@ class Analyzer(
         case p: Sort =>
           failOnOuterReference(p)
           p
-        case p: RedistributeData =>
+        case p: RepartitionByExpression =>
           failOnOuterReference(p)
           p
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
index 60d9881ac9ebf..053c8eb6170e9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -166,7 +166,7 @@ object UnsupportedOperationChecker {
         case GlobalLimit(_, _) | LocalLimit(_, _) if subPlan.children.forall(_.isStreaming) =>
           throwError("Limits are not supported on streaming DataFrames/Datasets")
 
-        case Sort(_, _, _) | SortPartitions(_, _) if !containsCompleteData(subPlan) =>
+        case Sort(_, _, _) if !containsCompleteData(subPlan) =>
           throwError("Sorting is not supported on streaming DataFrames/Datasets, unless it is on" +
             "aggregated DataFrame/Dataset in Complete output mode")
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 75d9997582aa6..dfd66aac2dd44 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -796,7 +796,7 @@ object PushDownPredicate extends Rule[LogicalPlan] with PredicateHelper {
     case _: Distinct => true
     case _: Generate => true
     case _: Pivot => true
-    case _: RedistributeData => true
+    case _: RepartitionByExpression => true
     case _: Repartition => true
     case _: ScriptTransformation => true
     case _: Sort => true
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 6958398e03f70..949ccdcb458cd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -489,7 +489,7 @@ object FoldablePropagation extends Rule[LogicalPlan] {
     case _: AppendColumns => true
     case _: AppendColumnsWithObject => true
     case _: BroadcastHint => true
-    case _: RedistributeData => true
+    case _: RepartitionByExpression => true
     case _: Repartition => true
     case _: Sort => true
     case _: TypedFilter => true
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index b4358c2ef2e62..f51ed22427dbe 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -779,6 +779,28 @@ case class Repartition(numPartitions: Int, shuffle: Boolean, child: LogicalPlan)
   override def output: Seq[Attribute] = child.output
 }
 
+/**
+ * This method repartitions data using [[Expression]]s into `numPartitions`, and receives
+ * information about the number of partitions during execution. Used when a specific ordering or
+ * distribution is expected by the consumer of the query result. Use [[Repartition]] for RDD-like
+ * `coalesce` and `repartition`.
+ * If `numPartitions` is not specified, the number of partitions will be the number set by
+ * `spark.sql.shuffle.partitions`.
+ */
+case class RepartitionByExpression(
+    partitionExpressions: Seq[Expression],
+    child: LogicalPlan,
+    numPartitions: Option[Int] = None) extends UnaryNode {
+
+  numPartitions match {
+    case Some(n) => require(n > 0, s"Number of partitions ($n) must be positive.")
+    case None => // Ok
+  }
+
+  override def maxRows: Option[Long] = child.maxRows
+  override def output: Seq[Attribute] = child.output
+}
+
 /**
  * A relation with one row. This is used in "SELECT ..." without a from clause.
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/partitioning.scala
deleted file mode 100644
index 28cbce8748fcd..0000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/partitioning.scala
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical
-
-import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, SortOrder}
-
-/**
- * Performs a physical redistribution of the data.  Used when the consumer of the query
- * result have expectations about the distribution and ordering of partitioned input data.
- */
-abstract class RedistributeData extends UnaryNode {
-  override def output: Seq[Attribute] = child.output
-}
-
-case class SortPartitions(sortExpressions: Seq[SortOrder], child: LogicalPlan)
-  extends RedistributeData
-
-/**
- * This method repartitions data using [[Expression]]s into `numPartitions`, and receives
- * information about the number of partitions during execution. Used when a specific ordering or
- * distribution is expected by the consumer of the query result. Use [[Repartition]] for RDD-like
- * `coalesce` and `repartition`.
- * If `numPartitions` is not specified, the number of partitions will be the number set by
- * `spark.sql.shuffle.partitions`.
- */
-case class RepartitionByExpression(
-    partitionExpressions: Seq[Expression],
-    child: LogicalPlan,
-    numPartitions: Option[Int] = None) extends RedistributeData {
-  numPartitions match {
-    case Some(n) => require(n > 0, s"Number of partitions ($n) must be positive.")
-    case None => // Ok
-  }
-}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
index 94a008f4f69d5..d2c0f8cc9fe8a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
@@ -213,7 +213,6 @@ class UnsupportedOperationsSuite extends SparkFunSuite {
 
 
   // Other unary operations
-  testUnaryOperatorInStreamingPlan("sort partitions", SortPartitions(Nil, _), expectedMsg = "sort")
   testUnaryOperatorInStreamingPlan(
     "sample", Sample(0.1, 1, true, 1L, _)(), expectedMsg = "sampling")
   testUnaryOperatorInStreamingPlan(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index b0bbcfc934cee..ba82ec156e850 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -376,10 +376,6 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         } else {
           execution.CoalesceExec(numPartitions, planLater(child)) :: Nil
         }
-      case logical.SortPartitions(sortExprs, child) =>
-        // This sort only sorts tuples within a partition. Its requiredDistribution will be
-        // an UnspecifiedDistribution.
-        execution.SortExec(sortExprs, global = false, child = planLater(child)) :: Nil
       case logical.Sort(sortExprs, global, child) =>
         execution.SortExec(sortExprs, global, planLater(child)) :: Nil
       case logical.Project(projectList, child) =>

From 132f2297118e29a9bc0830d24063f425dc75892b Mon Sep 17 00:00:00 2001
From: Ryan Williams <ryan.blake.williams@gmail.com>
Date: Wed, 21 Dec 2016 16:37:20 -0800
Subject: [PATCH 0406/1204] [SPARK-17807][CORE] split test-tags into test-JAR

Remove spark-tag's compile-scope dependency (and, indirectly, spark-core's compile-scope transitive-dependency) on scalatest by splitting test-oriented tags into spark-tags' test JAR.

Alternative to #16303.

Author: Ryan Williams <ryan.blake.williams@gmail.com>

Closes #16311 from ryan-williams/tt.

(cherry picked from commit afd9bc1d8a85adf88c412d8bc75e46e7ecb4bcdd)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 common/network-common/pom.xml                        | 12 ++++++++++++
 common/network-shuffle/pom.xml                       | 12 ++++++++++++
 common/network-yarn/pom.xml                          | 11 +++++++++++
 common/sketch/pom.xml                                | 12 ++++++++++++
 common/tags/pom.xml                                  |  8 --------
 .../java/org/apache/spark/tags/DockerTest.java       |  0
 .../java/org/apache/spark/tags/ExtendedHiveTest.java |  0
 .../java/org/apache/spark/tags/ExtendedYarnTest.java |  0
 common/unsafe/pom.xml                                | 12 ++++++++++++
 core/pom.xml                                         | 12 ++++++++++++
 external/docker-integration-tests/pom.xml            |  2 +-
 external/flume-sink/pom.xml                          | 12 ++++++++++++
 external/flume/pom.xml                               | 12 ++++++++++++
 external/java8-tests/pom.xml                         | 12 ++++++++++++
 external/kafka-0-10-sql/pom.xml                      | 12 ++++++++++++
 external/kafka-0-10/pom.xml                          | 12 ++++++++++++
 external/kafka-0-8/pom.xml                           | 12 ++++++++++++
 external/kinesis-asl/pom.xml                         | 12 ++++++++++++
 graphx/pom.xml                                       | 12 ++++++++++++
 launcher/pom.xml                                     | 11 +++++++++++
 mllib-local/pom.xml                                  | 12 ++++++++++++
 mllib/pom.xml                                        | 12 ++++++++++++
 pom.xml                                              |  6 ++++++
 repl/pom.xml                                         | 12 ++++++++++++
 sql/catalyst/pom.xml                                 | 12 ++++++++++++
 sql/core/pom.xml                                     | 12 ++++++++++++
 sql/hive-thriftserver/pom.xml                        | 12 ++++++++++++
 sql/hive/pom.xml                                     |  2 ++
 streaming/pom.xml                                    | 11 +++++++++++
 yarn/pom.xml                                         |  2 ++
 30 files changed, 272 insertions(+), 9 deletions(-)
 rename common/tags/src/{main => test}/java/org/apache/spark/tags/DockerTest.java (100%)
 rename common/tags/src/{main => test}/java/org/apache/spark/tags/ExtendedHiveTest.java (100%)
 rename common/tags/src/{main => test}/java/org/apache/spark/tags/ExtendedYarnTest.java (100%)

diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 85644c4a37bbe..793f6c7cbf3ea 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -87,6 +87,18 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
     </dependency>
+
+    <!--
+      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+      them will yield errors.
+    -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
     <dependency>
       <groupId>org.mockito</groupId>
       <artifactId>mockito-core</artifactId>
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index e15ede974cf8c..d8ab265289d8f 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -70,6 +70,18 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
     </dependency>
+
+    <!--
+      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+      them will yield errors.
+    -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
     <dependency>
       <groupId>log4j</groupId>
       <artifactId>log4j</artifactId>
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index c93a355b84d0b..ec23a3339f551 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -50,6 +50,17 @@
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
     </dependency>
 
+    <!--
+      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+      them will yield errors.
+    -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
     <!-- Provided dependencies -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 7c9870a8cb85e..1cefe88d02b95 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -39,6 +39,18 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
     </dependency>
+
+    <!--
+      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+      them will yield errors.
+    -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
   </dependencies>
 
   <build>
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 8f949b94fd233..0778ee386020b 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -34,14 +34,6 @@
     <sbt.project.name>tags</sbt.project.name>
   </properties>
 
-  <dependencies>
-    <dependency>
-      <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala.binary.version}</artifactId>
-      <scope>compile</scope>
-    </dependency>
-  </dependencies>
-
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
diff --git a/common/tags/src/main/java/org/apache/spark/tags/DockerTest.java b/common/tags/src/test/java/org/apache/spark/tags/DockerTest.java
similarity index 100%
rename from common/tags/src/main/java/org/apache/spark/tags/DockerTest.java
rename to common/tags/src/test/java/org/apache/spark/tags/DockerTest.java
diff --git a/common/tags/src/main/java/org/apache/spark/tags/ExtendedHiveTest.java b/common/tags/src/test/java/org/apache/spark/tags/ExtendedHiveTest.java
similarity index 100%
rename from common/tags/src/main/java/org/apache/spark/tags/ExtendedHiveTest.java
rename to common/tags/src/test/java/org/apache/spark/tags/ExtendedHiveTest.java
diff --git a/common/tags/src/main/java/org/apache/spark/tags/ExtendedYarnTest.java b/common/tags/src/test/java/org/apache/spark/tags/ExtendedYarnTest.java
similarity index 100%
rename from common/tags/src/main/java/org/apache/spark/tags/ExtendedYarnTest.java
rename to common/tags/src/test/java/org/apache/spark/tags/ExtendedYarnTest.java
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index a9b858e27150f..b94f0991d4e0b 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -39,6 +39,18 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
     </dependency>
+
+    <!--
+      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+      them will yield errors.
+    -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
     <dependency>
       <groupId>com.twitter</groupId>
       <artifactId>chill_${scala.binary.version}</artifactId>
diff --git a/core/pom.xml b/core/pom.xml
index d24ef118a5c1e..6e06b627154bc 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -337,6 +337,18 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
     </dependency>
+
+    <!--
+      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+      them will yield errors.
+    -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-crypto</artifactId>
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 3849c02ffb03c..86bc5f5520e26 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -96,7 +96,7 @@
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
+      <type>test-jar</type>
       <scope>test</scope>
     </dependency>
     <dependency>
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index eec7a889ca1ff..fa722ee2aad18 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -93,6 +93,18 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
     </dependency>
+
+    <!--
+      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+      them will yield errors.
+    -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index a7622d08151fe..f2c7d3ec6b9c9 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -69,6 +69,18 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
     </dependency>
+
+    <!--
+      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+      them will yield errors.
+    -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
diff --git a/external/java8-tests/pom.xml b/external/java8-tests/pom.xml
index e862126e48dbe..1d7cf371a272a 100644
--- a/external/java8-tests/pom.xml
+++ b/external/java8-tests/pom.xml
@@ -73,6 +73,18 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
     </dependency>
+
+    <!--
+      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+      them will yield errors.
+    -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
   </dependencies>
 
   <build>
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index fdfd2ccd4327a..03ebe6a2f6937 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -88,6 +88,18 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
     </dependency>
+
+    <!--
+      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+      them will yield errors.
+    -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index e5bf070124b6a..a88a180db7f7e 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -89,6 +89,18 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
     </dependency>
+
+    <!--
+      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+      them will yield errors.
+    -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index a02e23c69171d..29d898b91b2dd 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -89,6 +89,18 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
     </dependency>
+
+    <!--
+      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+      them will yield errors.
+    -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index c53b72eefe84d..e78218db379a3 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -78,6 +78,18 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
     </dependency>
+
+    <!--
+      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+      them will yield errors.
+    -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 96e34cacff8b0..3ffffbaacb809 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -78,6 +78,18 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
     </dependency>
+
+    <!--
+      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+      them will yield errors.
+    -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
diff --git a/launcher/pom.xml b/launcher/pom.xml
index c0b70dfdc3364..c6e5d5c422fde 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -67,6 +67,17 @@
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
     </dependency>
 
+    <!--
+      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+      them will yield errors.
+    -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
     <!-- Not needed by the test code, but referenced by SparkSubmit which is used by the tests. -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 6c3a35eeb9ecd..dd77f5269b06f 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -56,6 +56,18 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
     </dependency>
+
+    <!--
+      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+      them will yield errors.
+    -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
   </dependencies>
   <profiles>
     <profile>
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 757906d137c29..dc701b8eff743 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -113,6 +113,18 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
     </dependency>
+
+    <!--
+      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+      them will yield errors.
+    -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
   </dependencies>
   <profiles>
     <profile>
diff --git a/pom.xml b/pom.xml
index 555324524ee82..8a0efece0cea4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -293,6 +293,12 @@
         <artifactId>spark-tags_${scala.binary.version}</artifactId>
         <version>${project.version}</version>
       </dependency>
+      <dependency>
+        <groupId>org.apache.spark</groupId>
+        <artifactId>spark-tags_${scala.binary.version}</artifactId>
+        <version>${project.version}</version>
+        <type>test-jar</type>
+      </dependency>
       <dependency>
         <groupId>com.twitter</groupId>
         <artifactId>chill_${scala.binary.version}</artifactId>
diff --git a/repl/pom.xml b/repl/pom.xml
index 705316a944e28..b1980eba4c1ff 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -92,6 +92,18 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
     </dependency>
+
+    <!--
+      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+      them will yield errors.
+    -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
     <dependency>
       <groupId>org.apache.xbean</groupId>
       <artifactId>xbean-asm5-shaded</artifactId>
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 72be7e1005f64..298102f17ab60 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -56,6 +56,18 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
     </dependency>
+
+    <!--
+      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+      them will yield errors.
+    -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-unsafe_${scala.binary.version}</artifactId>
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index d7989c2413040..bac37f8355f63 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -74,6 +74,18 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
     </dependency>
+
+    <!--
+      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+      them will yield errors.
+    -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
     <dependency>
       <groupId>org.apache.parquet</groupId>
       <artifactId>parquet-column</artifactId>
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 34e0ae5bbc229..908a2eba50474 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -85,6 +85,18 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
     </dependency>
+
+    <!--
+      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+      them will yield errors.
+    -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
     <dependency>
       <groupId>net.sf.jpam</groupId>
       <artifactId>jpam</artifactId>
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index c543a3e049531..438f9ea7db2a0 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -60,6 +60,8 @@
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
     </dependency>
 <!--
     <dependency>
diff --git a/streaming/pom.xml b/streaming/pom.xml
index fba6a5d7734a4..6ee084fcbcd69 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -51,6 +51,17 @@
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
     </dependency>
 
+    <!--
+      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+      them will yield errors.
+    -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
     <!-- Explicit listing of transitive deps that are shaded. Otherwise, odd compiler crashes. -->
     <dependency>
       <groupId>com.google.guava</groupId>
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 85ec270bf9965..797b169184edf 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -54,6 +54,8 @@
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>

From 5e801034915dd206f720ae89dc00bb2a84ae3d41 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Thu, 22 Dec 2016 16:21:09 -0800
Subject: [PATCH 0407/1204] [SPARK-18985][SS] Add missing
 @InterfaceStability.Evolving for Structured Streaming APIs

## What changes were proposed in this pull request?

Add missing InterfaceStability.Evolving for Structured Streaming APIs

## How was this patch tested?

Compiling the codes.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16385 from zsxwing/SPARK-18985.

(cherry picked from commit 2246ce88ae6bf842cf325ee3efcb7bea53f8ca37)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../org/apache/spark/sql/streaming/DataStreamReader.scala | 3 ++-
 .../org/apache/spark/sql/streaming/DataStreamWriter.scala | 3 ++-
 .../org/apache/spark/sql/streaming/StreamingQuery.scala   | 3 ++-
 .../spark/sql/streaming/StreamingQueryException.scala     | 3 ++-
 .../spark/sql/streaming/StreamingQueryListener.scala      | 8 +++++++-
 .../spark/sql/streaming/StreamingQueryManager.scala       | 3 ++-
 .../apache/spark/sql/streaming/StreamingQueryStatus.scala | 3 ++-
 .../scala/org/apache/spark/sql/streaming/Trigger.scala    | 5 ++++-
 .../scala/org/apache/spark/sql/streaming/progress.scala   | 6 +++++-
 9 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index c50733534e2b5..7db9d9264b1c9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.streaming
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, InterfaceStability}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, SparkSession}
 import org.apache.spark.sql.execution.datasources.DataSource
@@ -33,6 +33,7 @@ import org.apache.spark.sql.types.StructType
  * @since 2.0.0
  */
 @Experimental
+@InterfaceStability.Evolving
 final class DataStreamReader private[sql](sparkSession: SparkSession) extends Logging {
   /**
    * Specifies the input data source format.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index 6c0c5e0c95b91..0ce47b152c59b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.streaming
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, InterfaceStability}
 import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, ForeachWriter}
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.execution.datasources.DataSource
@@ -33,6 +33,7 @@ import org.apache.spark.sql.execution.streaming.{ForeachSink, MemoryPlan, Memory
  * @since 2.0.0
  */
 @Experimental
+@InterfaceStability.Evolving
 final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
 
   private val df = ds.toDF()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
index 596bd90140cc9..9c00259f73e2e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.streaming
 
 import java.util.UUID
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, InterfaceStability}
 import org.apache.spark.sql.SparkSession
 
 /**
@@ -29,6 +29,7 @@ import org.apache.spark.sql.SparkSession
  * @since 2.0.0
  */
 @Experimental
+@InterfaceStability.Evolving
 trait StreamingQuery {
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala
index c53c29591a0ba..234a1166a1953 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.streaming
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, InterfaceStability}
 
 /**
  * :: Experimental ::
@@ -30,6 +30,7 @@ import org.apache.spark.annotation.Experimental
  * @since 2.0.0
  */
 @Experimental
+@InterfaceStability.Evolving
 class StreamingQueryException private[sql](
     private val queryDebugString: String,
     val message: String,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
index 817733286b03e..6b871b1fe6857 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.streaming
 
 import java.util.UUID
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, InterfaceStability}
 import org.apache.spark.scheduler.SparkListenerEvent
 
 /**
@@ -30,6 +30,7 @@ import org.apache.spark.scheduler.SparkListenerEvent
  * @since 2.0.0
  */
 @Experimental
+@InterfaceStability.Evolving
 abstract class StreamingQueryListener {
 
   import StreamingQueryListener._
@@ -70,6 +71,7 @@ abstract class StreamingQueryListener {
  * @since 2.0.0
  */
 @Experimental
+@InterfaceStability.Evolving
 object StreamingQueryListener {
 
   /**
@@ -78,6 +80,7 @@ object StreamingQueryListener {
    * @since 2.0.0
    */
   @Experimental
+  @InterfaceStability.Evolving
   trait Event extends SparkListenerEvent
 
   /**
@@ -89,6 +92,7 @@ object StreamingQueryListener {
    * @since 2.1.0
    */
   @Experimental
+  @InterfaceStability.Evolving
   class QueryStartedEvent private[sql](
       val id: UUID,
       val runId: UUID,
@@ -101,6 +105,7 @@ object StreamingQueryListener {
    * @since 2.1.0
    */
   @Experimental
+  @InterfaceStability.Evolving
   class QueryProgressEvent private[sql](val progress: StreamingQueryProgress) extends Event
 
   /**
@@ -114,6 +119,7 @@ object StreamingQueryListener {
    * @since 2.1.0
    */
   @Experimental
+  @InterfaceStability.Evolving
   class QueryTerminatedEvent private[sql](
       val id: UUID,
       val runId: UUID,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index 8c26ee2bd3fcd..7b9770dadd0f6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -24,7 +24,7 @@ import scala.collection.mutable
 
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, InterfaceStability}
 import org.apache.spark.sql.{AnalysisException, DataFrame, SparkSession}
 import org.apache.spark.sql.catalyst.analysis.UnsupportedOperationChecker
 import org.apache.spark.sql.execution.streaming._
@@ -39,6 +39,7 @@ import org.apache.spark.util.{Clock, SystemClock, Utils}
  * @since 2.0.0
  */
 @Experimental
+@InterfaceStability.Evolving
 class StreamingQueryManager private[sql] (sparkSession: SparkSession) {
 
   private[sql] val stateStoreCoordinator =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
index c2befa6343ba9..687b1267825fe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
@@ -22,7 +22,7 @@ import org.json4s.JsonAST.JValue
 import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, InterfaceStability}
 
 /**
  * :: Experimental ::
@@ -36,6 +36,7 @@ import org.apache.spark.annotation.Experimental
  * @since 2.1.0
  */
 @Experimental
+@InterfaceStability.Evolving
 class StreamingQueryStatus protected[sql](
     val message: String,
     val isDataAvailable: Boolean,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/Trigger.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/Trigger.scala
index 55be7a711adb6..68f2eab9d45fc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/Trigger.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/Trigger.scala
@@ -23,7 +23,7 @@ import scala.concurrent.duration.Duration
 
 import org.apache.commons.lang3.StringUtils
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, InterfaceStability}
 import org.apache.spark.unsafe.types.CalendarInterval
 
 /**
@@ -33,6 +33,7 @@ import org.apache.spark.unsafe.types.CalendarInterval
  * @since 2.0.0
  */
 @Experimental
+@InterfaceStability.Evolving
 sealed trait Trigger
 
 /**
@@ -59,6 +60,7 @@ sealed trait Trigger
  * @since 2.0.0
  */
 @Experimental
+@InterfaceStability.Evolving
 case class ProcessingTime(intervalMs: Long) extends Trigger {
   require(intervalMs >= 0, "the interval of trigger should not be negative")
 }
@@ -70,6 +72,7 @@ case class ProcessingTime(intervalMs: Long) extends Trigger {
  * @since 2.0.0
  */
 @Experimental
+@InterfaceStability.Evolving
 object ProcessingTime {
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
index bea0b9e297841..eddae1b4771f8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
@@ -29,13 +29,14 @@ import org.json4s.JsonAST.JValue
 import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, InterfaceStability}
 
 /**
  * :: Experimental ::
  * Information about updates made to stateful operators in a [[StreamingQuery]] during a trigger.
  */
 @Experimental
+@InterfaceStability.Evolving
 class StateOperatorProgress private[sql](
     val numRowsTotal: Long,
     val numRowsUpdated: Long) extends Serializable {
@@ -80,6 +81,7 @@ class StateOperatorProgress private[sql](
  * @since 2.1.0
  */
 @Experimental
+@InterfaceStability.Evolving
 class StreamingQueryProgress private[sql](
   val id: UUID,
   val runId: UUID,
@@ -151,6 +153,7 @@ class StreamingQueryProgress private[sql](
  * @since 2.1.0
  */
 @Experimental
+@InterfaceStability.Evolving
 class SourceProgress protected[sql](
   val description: String,
   val startOffset: String,
@@ -196,6 +199,7 @@ class SourceProgress protected[sql](
  * @since 2.1.0
  */
 @Experimental
+@InterfaceStability.Evolving
 class SinkProgress protected[sql](
     val description: String) extends Serializable {
 

From 1857acc717dcd083d21b20ef4d09723c3901bdfb Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Thu, 22 Dec 2016 16:22:55 -0800
Subject: [PATCH 0408/1204] [SPARK-18972][CORE] Fix the netty thread names for
 RPC

## What changes were proposed in this pull request?

Right now the name of threads created by Netty for Spark RPC are `shuffle-client-**` and `shuffle-server-**`. It's pretty confusing.

This PR just uses the module name in TransportConf to set the thread name. In addition, it also includes the following minor fixes:

- TransportChannelHandler.channelActive and channelInactive should call the corresponding super methods.
- Make ShuffleBlockFetcherIterator throw NoSuchElementException if it has no more elements. Otherwise,  if the caller calls `next` without `hasNext`, it will just hang.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16380 from zsxwing/SPARK-18972.

(cherry picked from commit f252cb5d161e064d39cc1ed1d9299307a0636174)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../spark/network/client/TransportClientFactory.java |  6 ++++--
 .../network/server/TransportChannelHandler.java      | 12 ++++++------
 .../apache/spark/network/server/TransportServer.java |  2 +-
 .../org/apache/spark/network/util/TransportConf.java |  4 ++++
 .../spark/storage/ShuffleBlockFetcherIterator.scala  |  4 ++++
 5 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
index e895f13f45458..cb10edff659f1 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
@@ -100,8 +100,10 @@ public TransportClientFactory(
 
     IOMode ioMode = IOMode.valueOf(conf.ioMode());
     this.socketChannelClass = NettyUtils.getClientChannelClass(ioMode);
-    // TODO: Make thread pool name configurable.
-    this.workerGroup = NettyUtils.createEventLoop(ioMode, conf.clientThreads(), "shuffle-client");
+    this.workerGroup = NettyUtils.createEventLoop(
+        ioMode,
+        conf.clientThreads(),
+        conf.getModuleName() + "-client");
     this.pooledAllocator = NettyUtils.createPooledByteBufAllocator(
       conf.preferDirectBufs(), false /* allowCache */, conf.clientThreads());
   }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java b/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java
index c33848c8406c1..c6ccae18b5e06 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java
@@ -88,14 +88,14 @@ public void channelActive(ChannelHandlerContext ctx) throws Exception {
     try {
       requestHandler.channelActive();
     } catch (RuntimeException e) {
-      logger.error("Exception from request handler while registering channel", e);
+      logger.error("Exception from request handler while channel is active", e);
     }
     try {
       responseHandler.channelActive();
     } catch (RuntimeException e) {
-      logger.error("Exception from response handler while registering channel", e);
+      logger.error("Exception from response handler while channel is active", e);
     }
-    super.channelRegistered(ctx);
+    super.channelActive(ctx);
   }
 
   @Override
@@ -103,14 +103,14 @@ public void channelInactive(ChannelHandlerContext ctx) throws Exception {
     try {
       requestHandler.channelInactive();
     } catch (RuntimeException e) {
-      logger.error("Exception from request handler while unregistering channel", e);
+      logger.error("Exception from request handler while channel is inactive", e);
     }
     try {
       responseHandler.channelInactive();
     } catch (RuntimeException e) {
-      logger.error("Exception from response handler while unregistering channel", e);
+      logger.error("Exception from response handler while channel is inactive", e);
     }
-    super.channelUnregistered(ctx);
+    super.channelInactive(ctx);
   }
 
   @Override
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java b/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java
index 0d7a677820d35..047c5f3f1f094 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java
@@ -89,7 +89,7 @@ private void init(String hostToBind, int portToBind) {
 
     IOMode ioMode = IOMode.valueOf(conf.ioMode());
     EventLoopGroup bossGroup =
-      NettyUtils.createEventLoop(ioMode, conf.serverThreads(), "shuffle-server");
+      NettyUtils.createEventLoop(ioMode, conf.serverThreads(), conf.getModuleName() + "-server");
     EventLoopGroup workerGroup = bossGroup;
 
     PooledByteBufAllocator allocator = NettyUtils.createPooledByteBufAllocator(
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
index 64eaba103cccb..fc5cc091f6e6d 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
@@ -73,6 +73,10 @@ private String getConfKey(String suffix) {
     return "spark." + module + "." + suffix;
   }
 
+  public String getModuleName() {
+    return module;
+  }
+
   /** IO mode: nio or epoll */
   public String ioMode() { return conf.get(SPARK_NETWORK_IO_MODE_KEY, "NIO").toUpperCase(); }
 
diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
index 269c12d6da444..7eda6e97a81ef 100644
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
@@ -304,6 +304,10 @@ final class ShuffleBlockFetcherIterator(
    * Throws a FetchFailedException if the next block could not be fetched.
    */
   override def next(): (BlockId, InputStream) = {
+    if (!hasNext) {
+      throw new NoSuchElementException
+    }
+
     numBlocksProcessed += 1
     val startFetchWait = System.currentTimeMillis()
     currentResult = results.take()

From 5bafdc45d6493f2ea41cc4bce0faa5f93ff3162c Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Fri, 23 Dec 2016 15:38:41 -0800
Subject: [PATCH 0409/1204] [SPARK-18991][CORE] Change
 ContextCleaner.referenceBuffer to use ConcurrentHashMap to make it faster

## What changes were proposed in this pull request?

The time complexity of ConcurrentHashMap's `remove` is O(1). Changing ContextCleaner.referenceBuffer's type from `ConcurrentLinkedQueue` to `ConcurrentHashMap's` will make the removal much faster.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16390 from zsxwing/SPARK-18991.

(cherry picked from commit a848f0ba84e37fd95d0f47863ec68326e3296b33)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../org/apache/spark/ContextCleaner.scala      | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ContextCleaner.scala b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
index af913454fce69..4d884dec07916 100644
--- a/core/src/main/scala/org/apache/spark/ContextCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
@@ -18,7 +18,8 @@
 package org.apache.spark
 
 import java.lang.ref.{ReferenceQueue, WeakReference}
-import java.util.concurrent.{ConcurrentLinkedQueue, ScheduledExecutorService, TimeUnit}
+import java.util.Collections
+import java.util.concurrent.{ConcurrentHashMap, ConcurrentLinkedQueue, ScheduledExecutorService, TimeUnit}
 
 import scala.collection.JavaConverters._
 
@@ -58,7 +59,12 @@ private class CleanupTaskWeakReference(
  */
 private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
 
-  private val referenceBuffer = new ConcurrentLinkedQueue[CleanupTaskWeakReference]()
+  /**
+   * A buffer to ensure that `CleanupTaskWeakReference`s are not garbage collected as long as they
+   * have not been handled by the reference queue.
+   */
+  private val referenceBuffer =
+    Collections.newSetFromMap[CleanupTaskWeakReference](new ConcurrentHashMap)
 
   private val referenceQueue = new ReferenceQueue[AnyRef]
 
@@ -176,10 +182,10 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
           .map(_.asInstanceOf[CleanupTaskWeakReference])
         // Synchronize here to avoid being interrupted on stop()
         synchronized {
-          reference.map(_.task).foreach { task =>
-            logDebug("Got cleaning task " + task)
-            referenceBuffer.remove(reference.get)
-            task match {
+          reference.foreach { ref =>
+            logDebug("Got cleaning task " + ref.task)
+            referenceBuffer.remove(ref)
+            ref.task match {
               case CleanRDD(rddId) =>
                 doCleanupRDD(rddId, blocking = blockOnCleanupTasks)
               case CleanShuffle(shuffleId) =>

From ca25b1e51f036fb837e3fe8218cb04d7360e049d Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.co.jp>
Date: Sat, 24 Dec 2016 13:02:58 +0000
Subject: [PATCH 0410/1204] [SPARK-18837][WEBUI] Very long stage descriptions
 do not wrap in the UI

## What changes were proposed in this pull request?

This issue was reported by wangyum.

In the AllJobsPage, JobPage and StagePage, the description length was limited before like as follows.

![ui-2 0 0](https://cloud.githubusercontent.com/assets/4736016/21319673/8b225246-c651-11e6-9041-4fcdd04f4dec.gif)

But recently, the limitation seems to have been accidentally removed.

![ui-2 1 0](https://cloud.githubusercontent.com/assets/4736016/21319825/104779f6-c652-11e6-8bfa-dfd800396352.gif)

The cause is that some tables are no longer `sortable` class although they were, and `sortable` class does not only mark tables as sortable but also limited the width of their child `td` elements.
The reason why now some tables are not `sortable` class is because another sortable mechanism was introduced by #13620 and #13708 with pagination feature.

To fix this issue, I've introduced new class `table-cell-width-limited` which limits the description cell width and the description is like what it was.

<img width="1260" alt="2016-12-20 1 00 34" src="https://cloud.githubusercontent.com/assets/4736016/21320478/89141c7a-c654-11e6-8494-f8f91325980b.png">

## How was this patch tested?

Tested manually with my browser.

Author: Kousuke Saruta <sarutak@oss.nttdata.co.jp>

Closes #16338 from sarutak/SPARK-18837.

(cherry picked from commit f2ceb2abe9357942a51bd643683850efd1fc9df7)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 core/src/main/resources/org/apache/spark/ui/static/webui.css  | 4 ++++
 .../src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala | 3 ++-
 core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala | 3 ++-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui.css b/core/src/main/resources/org/apache/spark/ui/static/webui.css
index b157f3e0a407d..319a719efaa79 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/webui.css
+++ b/core/src/main/resources/org/apache/spark/ui/static/webui.css
@@ -246,4 +246,8 @@ a.expandbutton {
   text-align: center;
   margin: 0;
   padding: 4px 0;
+}
+
+.table-cell-width-limited td {
+  max-width: 600px;
 }
\ No newline at end of file
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
index 173fc3cf31ce8..d9475c4c5d5f2 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
@@ -500,7 +500,8 @@ private[ui] class JobPagedTable(
   override def tableId: String = jobTag + "-table"
 
   override def tableCssClass: String =
-    "table table-bordered table-condensed table-striped table-head-clickable"
+    "table table-bordered table-condensed table-striped " +
+      "table-head-clickable table-cell-width-limited"
 
   override def pageSizeFormField: String = jobTag + ".pageSize"
 
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
index c9d0431e2d2f7..e1fa9043b6a15 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
@@ -149,7 +149,8 @@ private[ui] class StagePagedTable(
   override def tableId: String = stageTag + "-table"
 
   override def tableCssClass: String =
-    "table table-bordered table-condensed table-striped table-head-clickable"
+    "table table-bordered table-condensed table-striped " +
+      "table-head-clickable table-cell-width-limited"
 
   override def pageSizeFormField: String = stageTag + ".pageSize"
 

From ac7107fe70fcd0b584001c10dd624a4d8757109c Mon Sep 17 00:00:00 2001
From: Carson Wang <carson.wang@intel.com>
Date: Wed, 28 Dec 2016 12:12:44 +0000
Subject: [PATCH 0411/1204] [MINOR][DOC] Fix doc of ForeachWriter to use
 writeStream

## What changes were proposed in this pull request?

Fix the document of `ForeachWriter` to use `writeStream` instead of `write` for a streaming dataset.

## How was this patch tested?
Docs only.

Author: Carson Wang <carson.wang@intel.com>

Closes #16419 from carsonwang/FixDoc.

(cherry picked from commit 2a5f52a7146abc05bf70e65eb2267cd869ac4789)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../src/main/scala/org/apache/spark/sql/ForeachWriter.scala   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala
index b94ad59fa2f6e..372ec262f5764 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala
@@ -28,7 +28,7 @@ import org.apache.spark.annotation.{Experimental, InterfaceStability}
  *
  * Scala example:
  * {{{
- *   datasetOfString.write.foreach(new ForeachWriter[String] {
+ *   datasetOfString.writeStream.foreach(new ForeachWriter[String] {
  *
  *     def open(partitionId: Long, version: Long): Boolean = {
  *       // open connection
@@ -46,7 +46,7 @@ import org.apache.spark.annotation.{Experimental, InterfaceStability}
  *
  * Java example:
  * {{{
- *  datasetOfString.write().foreach(new ForeachWriter<String>() {
+ *  datasetOfString.writeStream().foreach(new ForeachWriter<String>() {
  *
  *    @Override
  *    public boolean open(long partitionId, long version) {

From 7197a7bc7061e2908b6430f494dba378378d5d02 Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Wed, 28 Dec 2016 12:17:33 +0000
Subject: [PATCH 0412/1204] [SPARK-18993][BUILD] Unable to build/compile Spark
 in IntelliJ due to missing Scala deps in spark-tags

## What changes were proposed in this pull request?

This adds back a direct dependency on Scala library classes from spark-tags because its Scala annotations need them.

## How was this patch tested?

Existing tests

Author: Sean Owen <sowen@cloudera.com>

Closes #16418 from srowen/SPARK-18993.

(cherry picked from commit d7bce3bd31ec193274718042dc017706989d7563)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 common/tags/pom.xml | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 0778ee386020b..ad29848b0ce02 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -34,6 +34,14 @@
     <sbt.project.name>tags</sbt.project.name>
   </properties>
 
+  <dependencies>
+    <dependency>
+      <groupId>org.scala-lang</groupId>
+      <artifactId>scala-library</artifactId>
+      <version>${scala.version}</version>
+    </dependency>
+  </dependencies>
+
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>

From 80d583bd09de54890cddfcc0c6fd807d7200ea75 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Wed, 28 Dec 2016 12:11:25 -0800
Subject: [PATCH 0413/1204] [SPARK-18669][SS][DOCS] Update Apache docs for
 Structured Streaming regarding watermarking and status

## What changes were proposed in this pull request?

- Extended the Window operation section with code snippet and explanation of watermarking
- Extended the Output Mode section with a table showing the compatibility between query type and output mode
- Rewrote the Monitoring section with updated jsons generated by StreamingQuery.progress/status
- Updated API changes in the StreamingQueryListener example

TODO
- [x] Figure showing the watermarking

## How was this patch tested?

N/A

## Screenshots
### Section: Windowed Aggregation with Event Time

<img width="927" alt="screen shot 2016-12-15 at 3 33 10 pm" src="https://cloud.githubusercontent.com/assets/663212/21246197/0e02cb1a-c2dc-11e6-8816-0cd28d8201d7.png">

![image](https://cloud.githubusercontent.com/assets/663212/21246241/45b0f87a-c2dc-11e6-9c29-d0a89e07bf8d.png)

<img width="929" alt="screen shot 2016-12-15 at 3 33 46 pm" src="https://cloud.githubusercontent.com/assets/663212/21246202/1652cefa-c2dc-11e6-8c64-3c05977fb3fc.png">

----------------------------
### Section: Output Modes
![image](https://cloud.githubusercontent.com/assets/663212/21246276/8ee44948-c2dc-11e6-9fa2-30502fcf9a55.png)

----------------------------
### Section: Monitoring
![image](https://cloud.githubusercontent.com/assets/663212/21246535/3c5baeb2-c2de-11e6-88cd-ca71db7c5cf9.png)
![image](https://cloud.githubusercontent.com/assets/663212/21246574/789492c2-c2de-11e6-8471-7bef884e1837.png)

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #16294 from tdas/SPARK-18669.

(cherry picked from commit 092c6725bf039bf33299b53791e1958c4ea3f6aa)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 docs/img/structured-streaming-watermark.png   | Bin 0 -> 252000 bytes
 docs/img/structured-streaming.pptx            | Bin 1105413 -> 1113902 bytes
 .../structured-streaming-programming-guide.md | 460 ++++++++++++++----
 3 files changed, 353 insertions(+), 107 deletions(-)
 create mode 100644 docs/img/structured-streaming-watermark.png

diff --git a/docs/img/structured-streaming-watermark.png b/docs/img/structured-streaming-watermark.png
new file mode 100644
index 0000000000000000000000000000000000000000..f21fbda1710133f46ed37a3548bb0fc227681744
GIT binary patch
literal 252000
zcmeFZg<n)__dW~=N{G@Z-67o#BS?2QNXO9Kp#sv~DJjz3CDIMj-6Gx1yc^>jpYwa4
z^Zf^&kDtNO*?aaKYhCMFYh5>iax$XGh&YH)P*BL?Vy_gSpdOk)LBSfqKLq|q${sxv
z_zT)zK~xZ`co1(3_y>ZGn7Ta_6gJxZ3)&=o6u1QhCI0HAk_+_C{G$|%_1lZX{T<A(
z7vnhY*u&38rgGksmnAE5(th!Vi8Hl0%+r6_mhcQcO9X>ao(YdAX$lkhJzHl+Aci{!
z8mUUQ&h({sOZxy*4Tp2h?wPT(z_A;jjAxo_+Ia<O&2|4VF9|I?kq;Ei|9<>$4gO~b
z|8s-?LBao!@c%@@wF;0pze!pnc2leW3zSJY)e|(R{|X*!p-`bLu8PWrtgl{Y63ptf
z2NQ2{zWdi_L<svzvlWfw#F>BW)~_e?``3$6NDdzHZLq&f_e<GC`Bw)9#lf;qI!u@z
z;F|DH>>knPNHhQYW+TAVY&9=3u9kFBXtmS-`*&l(R*$lpvtEQU-(7j~spR^?LBpbe
zp#IwrqF@;Bua!3s6Th6AsW(0qC;m6b6J{t)Mf_?mW)X9EzBpn}E?ffpuS*g^?rWA5
zwumw|dKE)EA8hz9<ACzb#F~<q)Xn>lBk~+_f$Jt+_%Vly_uu9k0q6TX$$t@QY{JbD
zYflvI-_{T`f_O3+H3>E?kna>|c~#;Vq!Rpw^xx+C9I=1m!mC}fe4Eh7MK<$auN9^)
zO^)L(CY}a{UhfUYa~r}~7xfqXmwP}IFy~;ymNxS7(~-ovgxY@{dl|yc2RNCkmlQ6y
zZ}cBzHU8_x6fohYYp)F?xn~9X$sT+W{;xX^d=Mo3+M3RkJCrHVR)-dgu{ZGFcGHmL
z;GKw!g{inRBtnW=Q0w2UDq|S#Y!W7S76lz^e_HTSmdF1w<YQJlHBw26E`P%);uVsA
z+ZMkUuS2O4SwB}|oaYO|Woi8%U(61xE{RN=NKmsps`hWz2Mvn@`((s17n!lB!yCrz
zzt4?ZBcn2lrS%pS9{%grnL3U>aT@|&1(<ydMQd+36=GIe+XhA-`(Cu!^0oENzbz+G
zM0P7o6Mb1gKY@7XI`h9gQK{7pvy>>p8b&W&6I`hG^1tpV(GiTEfSoA+m|^KRIE$(B
zFXU-1KA!*mn=`3Et5O(XKi(>Gl(V`V+gv)f!T)NruLOy9MEArDL!^j?gFsW8ah-`O
zt(^ZA5>fDdO9|_;qIvAzfNFnUZbK7B5)lTP8us1F7Q*}g9~L|+6*5;RF>EG1Fto4W
z_c!ZRt&Oh73(j~ZeJ;Gs<E47-{QcXXBV7Ny-Bh_Xm1b6gmR&&~Q#A0a6@!(wbLdi#
zV!dF>QBAhjsSsNDkd1#&LlMQ79x(*o)31ufMyA~n#El-<`ANhq_;jrmBO`3Bw{Kxs
z2VvQ!2C<=WOpbDLyG@!AUA!A7$HzxV>2$#n9)GOOu)oiBj71(Y^J<4}c<kVHXZCS1
zrotV5q#8$Pe#>^3>RC(@WQV)}d+$YoZtWM(nahGNlOfLj-8`0L1Ha8nOwecIm@J}}
z%nrsH99AvaJ$_a{MED+oYUhUwywdgIsL9k7T`N6+b=gBKk)f8agmc-*R2A6c<&w}h
z8XxsEO+4Yu)Jl;_5--UT7BfE0BXut7J_-OEb~NJTm*B5Ij(sFVzxF#CWfP%HJRtV_
zrWFJVfPd#{rFlhDF2vj|joNJwjk9;$2-Cy=2j0>l)D9_y#Ek~2mygsIm$%u@Fe(<u
zS0BaBjN3a&KW(aBB%ddvc<N$S>F^-z*V2F#U~Zt|G0?GvrIz>xh9(ePXGG2yWjQ;6
zpp{LWVL#~c$$rp6NmsY!u<a4yO$%!j5#k?@gnuRXT08u?D~LUXNMVA5H$cZA<TTFA
zwA2=-+R{>T9-AaxT8-uuESvFYfIEunE}j4^Ux;~x0SBEHO7#`S^#Ja(8_~t48Yzmh
zZ|M-7y}_%h>TXqjM3=>Sb8~Z-;-j*&Zk9iuM+~#4H9djbTe!$+nXpw+eAt2W8ttto
z?g5Ue1OD7u3C%9u=y|4$f&%+|at+0r4xZpo-}9}8!_jd!y%sD65edQ&n(461b`N3I
zLu@}YIidcDweK6DtgVEbgSuZsBOc6QzRpp|7DGDd7tV5<%cHrY)jltYA?TXnG)`pb
zgJ_V_q$ls<Rc>VWqm#n@H0&T*qQXalhtzT_9_aAbb4UK(&Eq7Oq2*A>=kmq)NB(~!
z3n&Oir{p=MSD^z>qn6g~Rf<IErRk2FwN6rNbxw|%n^|50A-(9YaXaEG*@B1`VdKpG
zImEk9a_b?B?+gHI1L44MZ}v|DuZNC~j(h=63GbwF#8Li(I*^bp@#EuCrk*!Qv41Qy
zeO4f4zu6LPWZ&nq8t=Esd06S4U(}M=B%;Fa9m+z+&&10t$|K=J0LCXD^30-dtw24+
zc~N}hcM77Shn7eVzeh8-!8HTvsLhYS292r|X>{0Qj^mK%{sHOPBuh$o<f&Jxg~_^|
zZiB2RK8J3@7qivXanH1Gsw~|XpWsaBw1?=dme37E1(EJDWBtMfG3izaC3}^cRw$?o
z(IdBJKR&__+0k45|Joo&kJDP&cu#UqhHYwcTvOvh=uWV<S$9Iq`~i9-TR&(0F2^ug
ziVmTw>{qbp^$?}kE}qhC!K$SAlb_M0o56SVrkB1>xD_3Kn``XA?WrI<Q3M<`WLp(0
ziUi?L)cR%vT~pk^nOfB*>^|Uv%zzB;Q#(kAmj@@5$()>3fv`&=JHsw@8y<kAEBN=$
z@Yoy!z2&kxOFj|s*fe6-EDI<{V~0BHm(jfKs+EDozW#XVx@<qpl>Pv}*K?_m#Yq$I
z27)#Vzoy~SnWz<P-(msJdgC`mMt$8E3G8b*5#Wu5BU~oE@pPIKTkp_iKK2Mkk@-Tw
z6v01!&P=wmSf!$Gg9r`FjJZ_TrxbAkpH0!q5}%}ct!=qWiE|@@<!#CF*vkF6sRdXa
zJFkc7daPrx<Ql#6u(}E9;^{9Z`zPX<rEEofTFUjK9WK!oNGlL5e%<@GDa*Zr$yt=}
z@QbVY;G0`jH3{y-u|3%asYUp&R)=}hfVFU6g#NZj1<Z_1sZaV^gm3^>XUZY1eRvJ$
z{{(*~|22|m8_N3(sMQ0}QiX1XHYC9NJTMho-y;Ctmm#QcWGy~F+LWE$z+P+GE@>l^
zSh}N*E5j@~PkFRBQxshn1^z`7h+f{uyLXaP!bEoWwz_Z-uEiGlffwub`yAe4-tw?N
zT#yCYdEjmNSrwa(?6YmQ)$=gGN~b2xz@cST5A*b9C)z9_jU2E^I(geyY}OM^<@zng
zHLJhxucT{zYGa9OD|`a0VjfpdW>`?LS0eEfGU=DTC5JB5u;AnJ780|bVgv&F^S688
zzgQZKwah8>_;6Au)grk~^r9_|<*YBFs;lw>Nqz+m8M}d-ZykHGFlr`AJ5}$<DgY}Y
znFBqeOM?aN3DFY5fyKeg_fb#+jl%it-IC?^v%F?X1eTZ?qjodRedp57aXGPfIO^Nq
zkA#0E+zRHkT`wuOlL_UV*v^r`HE^(O+ol^UJ#+7ZopdVNot;5C-JYOH|2QI&+A_+B
z9;-n?_~Q#MDTY_vQ7=_cjx?2Ez*Uv&2;|jV%qLa&znu0+5{P$41<l=-dho!miSQ`|
z-JmWd28(Fd$@Vx|749hKZEk627pAk9k|mauhOwMN&@b(dBrFRri&<VRh%=MZ3;n`5
zACTJ9cdcgZsPD4@?!i7SKtrld#Ow$90&7|uuI3kk|A!+oV?H}PThZb0I!dm;nDj1;
zt}kKby2eR;b+C~zvTb4c;ZD*rp-z}*?5pw!W3*Wj3gLY&m5~5<0bQH`7Vz$~^86%;
zR$lUUBj~Ki_$8FD6s&_<#o+DLsb)@lQV|Zf!^eU3FaN;>UFjKDs#OCz^vTb8!p3%e
z2%h#%h0>#Lv8m*xPk(gnDTL1LNnyPji}WtEO6*qk)}moLci3#5;Yk+V5L1HvR)!Uu
zL_{c$zBK(_BNW5gYv++A7VMEUQQ>9?azr2SsON|lrEL%Ij&l~f|AY1tEy8`W(sW4q
zqCY;fpx^G>SJ4+96?;0rwrRIK6GgvhpWXM8+oJZ4*ASVx+Jy$3p<Y9?;1F{aw$_^m
zHXIiKHxaPBU2pTRDE#du($J9f8P+UxWZx&K@Yf?#UUE-(7Kop(<^Hr8GMd(8%^nF`
ze3)r;07ufusfi;(poX-VC3c+$x_g2<JM_x}1D&TTcjnJa<~QD9OJ?7hQZLd`tj{Wo
z1r7?MgTH|Yb#qn~J#s4=nkz)ibwY!;Z<N2A7drriF3_wj{Fg)IBMsYAII`$F6#76^
zycw}4!2bS;8Su04h7_-n&v_LSpQGXaodf%d!S_GoIY^R^#y*ctu(vYs*E%16s$%7?
zw&e@vciil)UTJ1lFki$WKCIFNgQfhOQx%&<Qe^feJvyZFgNHKYbLGR4e;QFa31}#>
zkTFW^30&dJGWKOJ_D9k`;|vk9ZvdfJt?Fd4cFR{TlV;#`l^qEy|7RjX<OU-%9z*WD
z)onz%y?8Y=UJnjS;By-080n+n>Wzcqn<yt2=k}v$t&lfmOIln<*!GK9BqurdqenSQ
zG-^&1EOG3OXYmaRLWDfe+LRjnT8H`vLQJE6ENsj({IVu2@K}66yZ)qNOe-+FK)dV)
z1;<$M-LLWoC=Bi!SG0kRReYI`V~I{`L@&<#eu_<q_Rk3(3Eul<W>UAIF9P6~YJLhS
z-2j(a&kcIgvNa;pL{ptY9EOJ*-nYfld6u4%%wv!J%e}-zTfK$2Q)|Dx%pisR%wN&C
zyok!y#`lvV{Z*47A-4HilD@rApdZ{-sMpDiVQ;7~zD6D!*^QU@>mpw{Xt%+4<u;6;
zVYJz-Y0!>p6wVvS@eHt`VMkG1ZVtORw!Y?%Dc}dV<qvXIsQ;6?*ikWl&&77HO>G!(
z{PG;5v53YhRCAwzY4yZc_F;HMYD(*LR<@{F=+_0thUstkJ><h}sv`zPZ)tpHMOA!E
zNXwNKW1b^z`^m+iU~pUC&!nF#nqyp4B#T~7MP)<4;6yCg%_vgf*R<~kt@Qee%iX)c
zdVya2i;Y&EQpyy2VAr&)Oons5)VBrUFv&gh;-74i$kBK7YZl4_-zS&{-keX2-*hHs
zJOu2Ph|yxIRF;*y=>-cT*$bF_#DfhLAmW9CHeh(26LF(pVDjM)`g1a4C#v=+(`m1J
zjFZo{$;0ep;@@mfbmDV4b|yfx++Z+p3158+2L((9HahIGy7mbWsPU}IMBYm1+m){N
z8W)oysgC!h7q@6yR0dVY#jKd?I8iySQQfYT1vg`!X?PU%*Bj@0%uPshrZ=`m5DQ)C
z#PedSVnsp?iJ&Sw5n!<G1nlv4v#ZnU1C*OKltuW)H{;KMLF5A}L+U3K$li<2>63BL
zcGb{wR_5KqV_VJJ#x*YHRo;l<IWnWAaE|9HH_4pzqm-Pa5$$Ut4@<!+iI4xj!mK_v
zlL3~(`c{XGUh;G=59hpGaZPka7eZjkb&<(WAN?hkL6T<e&{nzJh*7)xr)^O=0QQ36
zhK#LqX_`Xa$er^F6)l0qWN>1AeSLo`h2{P=D?$Y}g!6raxinDprbq)3OAk7&0bccq
zI*k}~THTJyc6Yv{Kez3cB40IR=XLK4s6u2jo$FI`<MtC4S;A5x7pv(PT+Oo@w%q4Y
zi*VuBv#ADl9=!D7QVA5^N8wv;ZJ2oy)|MG{5;a5cc-G~K5_5f@4BdAV?Ue^4o_p+C
zs82);zd3%f?v*;b-baB@5giFQJM#*u9W>H`1%}P~hTe#2N>954%@OSwr{WqRRM28G
zOioVD9ZP2l%dVvK_dlX?Hm#iFZ02fZo_N)2ZP_R4;CR*W=ZusI?*|1cw9b=R`=>6&
zD~$Y#s>*UECIb83H5h8FFwIUbuvCy!M7IxKb}0nn(!}b1(Y|!d(Cd_>nD<ZYm7reC
z*K!);b-LuHht#{)t6k#8BZ){XE)GTv;XY-x(9$ArYW7f6k#eoe4>*O<xUTy;f0Ixs
z9n$_Qi=4(f@P60lRG%FHaz}XQY5aVmu&GkK<{PZTV50NxfXFUeh4u($8Vu?1OPI5b
zILMjGBO~8g5;+IJzJ*oSF>2oe*>1Taw6Y@htpIi4{3_?D_+}jS*r_L_jHl!NPBuZB
zM5D-ER_IIV<^WwLjZR&5jfVkUYTm3&#cbK8Y2i0lEQwWjpH6CuIfZ5Ba)N2?de>+v
zDPo%>)`j_;PM6w7FG%brWtjIzqm4iZ^fQuq_Ib>grdiMf?hniuKD3;C%M0a$E1f#$
zb}PaJ2`juS1a`(SH3pUi#q{W@#ltNWOkb`0nYp$Ls9w!I`a8bdza!d&A<d=bo`{^<
z<(8G~xk~2ImYj<cm2ET{8Qcyxa;8xszBUVDxXKTk7V5=QrKhr8cY&x$ZhxueQRy~z
z+ZFXHiK%!RFEjCaOyOoC3{2WzHe=|})?15bbDGM0^}S)uq@*Nj-+Xl_?vWLB%hBrh
z1MBc_Q&0g{*VTIq>JP!;89e$jW2;%Bl749m`4$?-X`*cx7C$rf2s+=+8iev#qAf$=
z*XYvg7|7P<wJo6-MP(~$$jUg?lcfsnXvi$A{o@heqpaLSzlH9ES5L%Ao*L#NQ(x7X
zR`{i{42W_1fZSkY`7hT7xhto^K=ufpanJR3%SS<ihE+ga3hh*?ti|%(0oa^TSELe(
z7=Wep>iKac*A82o;g;3vEte#NOYaKe3XAfutv{C)?u20nZpH7~&%0gvrDq^Na2h!)
zU;S>qh6GWY;-)S3N?a7gyA0>pmrB$sIdnTgn_lWwG%_lpF}bx%qJ1n$bC@s*!ie5p
zt+MMs9b0FC-0Vy%uxyBN8ALA})h`$veKB|C+3Rk<BJIL-8?e6Ju9V-D<DhpoB^|>6
z6CL3OSAUFtP`S08ct`?wi2HVtC3&-?Y-1YK3J|TOw<zSkr8@BPy}C^69p6_ZkS8*Z
zK0I_E%j%iMuTQUan0)ZmtXBQ)t)Ahb_1D)1J^Y`R*^%>U6SGaM2k^2!IMWap>XFCZ
zOjqfpPrtcnr&fk97R}&;m5^|78+4S+Mb6)i<QskUlfT}ZPX-m-MmS-)h)M~khOV<m
z4r*(xsJSYwz}nrh6Qur74U6NuY;5|s+lAx<6Ib?x9Y+?U{KX7ZUV^zNq1)XXZUdcV
z%ynheIx;Qa>7Qrv=`^eEcdr|T@!FkHZJ|nK<yhi}Sv(q;G(pep%lrb)<s~aP3GTN@
zoi+ZL>TVF*`0@JIlB%M$YB8Bn`fiu|$fJbc?)B<n!sE$H8v^j0$%7xp?(2jF<Y7Dy
z@*;0oaJoK=TYi~W8X6iYRce2N)7W$CVA&^y><o6%d7GnL^r47+?BwbjGQp^gye3EQ
zfd?%ae<%wv6`mjwKJ+e~0pEW4NJP@$Zs1#x{C%vGSK;y7dktXnlVT_<T>up9r}K}t
zHg}pDk8)L?&=h<-Npae{*c{DC51$Z8uHVUCV2$mQjha8{J9n*OW3JswtUlKWD$r#%
zuaeI>Iaap&oISnm-Jal**AD=n^$G2v2+F09rt>gykJE%Tt#y@4YF1VR=SnC{Mw8k1
z5>Dj}=aK^)y$vm1jp|VX7D96^)$6W)TZwhXd9P8)zR0)RT9=N&TN~2|ZvAWOev$Jj
zKti<^l7vf6jB+OAj5bo6jSpMsyZi~{(h%xSJ9ezdI%T8Z=Kow^rmx;M*gyF8fn_0!
zbV*6>;#6vFsJoFJ_YRBXsuV@$Ka2M7Lt5oqyo%k&V>+IOHd-$82PP(Y4xY!=?R~%f
z)b*IRt_jDOo==>8L?l>!roredgGo08?Ra>`3v9c+3e>#xFUh7DSM!YcR2-fp;NRY=
zOwJT$BD2m+pf|g07H;L}jSw(Dos~1L)+C4*I@LMLEbwU1*Z`vs_*Q#viA{@b7N~pf
zR!#(fE5Pc%${cZ^si*VP63GrTsu?QLE$VNJG&Ryj(Rs|oas!ldx71gqB)luDVz$`B
z0z8dzA>YhN>GM!Q8OYG7_QuZc6}AhFt`n{InF<$5DMyWqJI$lvIT&9bs3C2rx6-h7
zL_n#5_o%xE>B?z)-ac)WQ<Sr8ij;EU>buXEDVEMBS6bm_Jfdgu!C_H*M&x$hy!7$6
zM?=2EU!!W3HEqEX>wK2-2&t0_xFS(^d>@)$>5O$^T^Er$j1VF_O*~ARyosK<HVI>2
ze_Z8BNfA)!w2m;_tXEw);h)Svk3+slO&z_AcvXwf5Yy2-dD%39m?IVQz}EsEK!2e~
z)^ea)F63N2N|73dU21F6$2z6O+}82kv-DxAI|C4)PiRqbCqmL#sBP_-U`Ehi4<>#a
zN8tKZIt1K?>WMFE4eiXF?Ta*kjT({z1JcPM&yiIEqT03=d<d69z0ygX=t|felWo#m
zaxY(W;B_+3x}qhNt7-rr4-wg}=gC})P#6c#$)JR*W&p)D|DNGt${TF82a$V_kL(oQ
z$KjAq8qK4-C0(f`$y@cCr_Lpi-Oo6N+_BwyGp6>X?BAy5ntthR)}(;?P>Q^Bcj(U3
z*mJfiLXx(1!+BSUcD}Z^;ubYAI)R;4{scHCQJ^(6*i2fWYSd_9&HlZ98atJ0Gr5sC
zVhE<OE_SF&bm3UpXOmtR9OkiE0AzS}Y}hox^V!JE((dm#%f;(rc$r;p6njG6N6v)`
z7IR<QY*mU|yS9e*t)lP6%nH(*@n+lCh9I$*W}{6O#e-AP>e~EKkI^km4WC~{l8)UP
z#HW*N>N*8+gfw`6w6#3${u+GKnEmR`p=^+NHNE!0aBHS6UTSQC(fejR^~l$|QSVkP
ze--)qPXC}*X6m^1Y=%qfnix63MpaOjSoHgeOGEvm<EULrw?ZS$m=i={ACoLO^K!=C
z$QnpD#zXIhczy3Cp=beKoV{9Zy;KVk4T+K(ir1@6URioE{(P(!AD6X}*;z-0a8CSU
z3TgF_gQg{TXr7ay{*eRx@D&BjkJ=SZQm&gaJmAJt`;ywd7m*imiTw=r9n{RixJ1p?
zmm>^A$y+t&qyQPFFTbsK<62D;=tF8^j{!TLx2j3iYcXla45v}2?liWTmP940%0?G-
z?ktB6+6J<hT4ZjfRUP+qR)HyR)YF<)$m+5SweE7yUFXBLn%GNh*J^q8W7w*#gE!8l
zQr~uZnYc{xL>#0CI`^O_4Z^LHAC<CWd1j~<4@)-Z9rxV4#ErLrzo3w8)FGe`?D_06
zU%xzX6Fs>=ZT+#sY9`~jkpO%&`(`lm_#92NxUNt_Cqa$Q`r20QH>-fY4hSPpf|*D?
z6GAr0PUOSwI@QGq8(s>{ONt1)Ejw@aPkOgXw_6>T)--M4tu+0t?Ceo6TQ}|GYlBsS
zPs_^4d1fv`Lb<`WAFeb6Nu2hz(Zb4tp<vA5`%`(G7u9Ib8n^2iO!pISN5$Sy{Xu|<
zB630NYvh_SJbTA8+=uv<G#B*?j^3R1{jJ!eQpMVtw~N%}!UN{&#`e{Qc5j1aqjn`E
zzl{51xtCS+*xAOOd{?4B*Ky2y47MXN+*}zcS5BnSUQ{EuRx3%0kR0WA=%Fre4|2Oa
z+1OgJ#xu(sLq#p>N}8W^LvZ+VTSo~aj|tRb%m*+`n!j-=)}Hc<2OC=_ykqNE%U`U7
zWS3UPEr=!!vf)@9HEyDf&Z(T6nAXa+pR*Q@NA#dg*i)p5zX3qs_Y{UZ<jr|NL$OQo
zL`CTzy*1DTTv$qDMxT(uqas8KUUv&fxQL7n=UilzJ01tW`NotsHr$Y?-u2|z7q181
zE|=r!>#1>Q)C{p%l}2q|2B!81&p#IqaQ5if04X5NG`Q}`kMIo*izvDzPuE*yhPBy+
zGqmt}`o+LS;0Uwy(2<_Ejm`??%|^0yJ=W+H;YZuXTI55zP`~Hdn?}ADW|NP+UbJoo
z49T`b!d(bl2uAN9zK}C4h`=(0@&-ay)bdpE4pPEr&ue}Er_Oa@2YTHVeAhUT0;%UQ
z?_CKmprD(h=-&+GSP<L1w(Ermgy`S$@@>jE&mhO{EwycLxFZ}dU#?%C^6%V0{2(p4
zkZb*yHxOoXi1%0ThPyR^<icp6#O@cNB~={ssZ}SbOp`SwfB6A&BK@@3{)2%pw8)Q)
zlb(kXo+Yj$Lugy0VE-tK0EPp)M&gCoY7w|;wsg)68yS^Rs(xjyZf9L+{2=7M>cwPv
zGVP#xdH0l}_vTVTRcbxA@Q1Z^P%a_sDtdGb2BT>XB|c@#X5Q?_a=R5^RRY(7gmNX>
z$0}}awBF_<_LMh>c##uBCu?^3OBn!F!N+hw+VcV6$XY-MOdI4H+qf8fz7Y0js*Dla
z+D#qojc=b4{Kn6zL%Y^oc=zRbdd>A@+o}a=M4%7ANV!ay<;kpw5rGs@zi7ic@v|H8
znzdOF&*psP#Z$9V{~%pMUy|7^&n(w+Ru*lbQgPLbiUb31DCEOsQvN{<=MSG@#eT-+
z`cgM_+17C7xoC<uk29U+=<8?~<m+D-$+^Isqy$=}=8oFu3I-pR`Y+%-1gjMjN0ZNP
zsq0vMeSO^VxGME=93R`BB*Gyi|Jwa$Nd~|@-$oQ5t#15C!L0roNax^GK44HD0Tgb#
zz;tcRkB3X9r(#BIY*OHR{o$5mycfUQ-39UE)%C1Wksk~=10EKb7{<#oq)+dSG?Lue
zrfl>A%(^S}C}y|J;<6iwt&uD!+sSuWW<7is9$O1K{1bJ{=@<=mimkt_9=5a1TQ^nC
zUkr>JJWP{nm#WSYfBMV#QN(mx-3I4oL{8B7tdUs=!Bgd-V0781!w)8eC~C8#7wVE)
z=W*&`(IuvuCC=l`rl^)(xEtKTWZ)14|CYRH6kC9%*WLH0y<P($@hWn|gzG=*ygi6h
zjK;Aww9}kBSCNYGkBgGZFUm{p@q~zQVa}Qc_p9PUPsASh3ZgDeQ^h3FhS<emi7*{C
zV@%+ZmDzUS@VJ>yam#VLS8N>ndj{Lb`+vaMDN5Ero&}daj4NHI<NG!=V3%n%^!2sS
z2mJiDIES$Y*;N&D7x{K_oDI{127T*~vy<kW>g@<PcX`BP9Y=|^7wZzdgFt?sO}~Wf
zoNqsLpp|1K`uLC4<Nd)+v>>@&nT1m3fS2-qyz19(K^*PBFoU?(TmK~|3Q}slMlBZW
zBgoM||Ju)bDOBXwKcC;_J!i8QP+#2{YZ#AwBIw;+q3C!mgX}%VsP&c($b~{Z;?18G
ze2DiCXH_e~1KX7A&^dWT`diRmgGq)c$WBuB2qXR|Jhpmg*#eFF8|^r3HW3P>mp+@t
zDCUvOYtf)GKKs~co%yoD!ZmTxlz}EUyLs%m2C%;XtJCl&k53#&LB9&ayEH&~_Uu%e
z{j}utvyyj28F7F@=<23Pl2v1tUYR#&5*QVqQSpRLsDCQ!6k&$b&LYlD8h?FaERf==
z>-{sJ`&uA{)LM>Y0HG+sOMQPfp?Y;^R-1dVp=%9Jw1w9xBGj$zFB{0dUFCOxcZg|y
z-E^pkz>!{ZE%ZB{ebZv;+B!fwuPeTe{~#1VkS>Du>JMc{hAG^qT>828NVMz0nI`Ht
z*N)On6^lD#*DVt#obK?I;h?fpH87#D0^KX1_bpgBenY3I#_;Zj4w;ML;e8>_-T*^r
zK(SK`!G-TA!Ja~O?vL_BMg;69@A~b&Q^&(BE!7G3xV~731g{qnGD|3l#V`Gw4`MbC
zEtbpu;M?3*DbcO&IOfkc2gNx%e(BFfumA@)oqSqHBL(DWp9@tbFtvU4JvM;2<Pw5T
zQ>XgnR?d92y<lkWwCURf3fJ$}Oc)A)+QX=qOKI?AG(Zj8F*>y0@efq`@q9zc4D;U1
z=x`wBN$R_?bndyeeC?9`SY&&arAHwj@LSWu2vqqFxVkxfh1lA|DGlAi_6XyCSdW+x
zXfq{$lz1R_JB4M;aWFCZAAAAB?=Pr5vEJme2CD8Jt~(hsnN*y9{Ybw0J7sa$m_igh
zS^zC<FYtK`dyhx3G8KWKwV&l*^g44Aq01K;$Sij$EGJ@plM-Z$f6rp~g`B#ptT<5E
zuhDD|nHPh+<986v(@zQ^Bs}x1(mobk`E_kX3jnb_i}f@UDG)iNI~&1e!jW6jpGE>)
z0@8s8GGQGh^h8=lV3lDC<bu#$eFfb&0OLs8d#@M^Ut-NMuW>teXF@?>@}n_zkDu`9
zOGROfm|?=|L_kV8lt?xC%=Op$|Bm|-L|)<SU%qnCL~YM7>AbOg*!~+7ft$nqf84xe
zIxu&O@Z1A*9xnd3gd-)lo%u$-Yv3+xkn1@59|{gQ21dK16~)v9`+&QTE4Rg&p=_&F
zll{tYx;BUj958+p26Wa-J*I1LF6<<_!osm_KuJXy4y1Q1^K+z)1tQFN%Fr7rf#w4d
z41eL0FBJmNuUVp>j76%F?{r+XV@GC@|Ac?^m!}a86TozVGq29Aqiw9+=iamatOIxZ
z`83t7w=~5DU)w%~ML}7vTUGP{S-~A$CKjZed=FDrjJwh441ek_FDKJzd1zHPzGn4n
zXXL10K6q<kNGwx{zC;9Sd_`scznjm$`!~LY2ujl0wW8`~TlQh30wpt*M{%X^Oj7xm
z#!FZ6{#x)EG#~}6e#c43=8MiMRDs6q7`~UD@VA|EDF(1N2ydp;0k6w{^e%iGu|Fm?
zy1XBxHaJdpdWx};v?Z|Xt^PV)8kU4+>AatS*$}?vR=7H@TTK*5PaWQIpIe9%6=DlY
zPTJx5QUNL-<3xK3L^dG;L_vT7AZFrF|4oEI4`E_!?%Y`{6I_*|;o1Auf>uSO)g9a(
zilbxxwgyv-jMK)*H=(?!A4ff8WV^vtd*MHu)Ax?*N({93T;-(=%UkayN`->dGHY;0
zhlyipESqKz2KL|77T4~JTY9*hCN?x<^A58ACSiaM41g<#QQ{{sb!hO{{i6Ux{j-8f
z1>^g@?19y=zRoJPGOjV!#02N{2;*w-FOG0;=v3^!ij}Qag0%%xC{zqEyW~K6nqqCu
zKA6gLesxy=XzLFxVFUvVu-$VSY>tQaU2_K|X_rwZM}a;FKzX-i5{Cqys8q+t#GnaI
z6Q4nD=1`sJ5qvfo5ISONYoGakg}vc2(JxD2dsF@kAMc}!+%QaR?0HNKetDl|@iu)j
zlkrkcpkI*=_jO?}L4ZR&<i=B#GR@#Gx9oleYytBKsg)>vN-oE(!RJs&_BDiI`B}qH
zn_KhqfrzZ$_gETe8O%eZu-*Xm5aO>SM%|RYANlPA7Ees`bTBogCb46}IEHFzu=}5l
zwe;|^T&_v!{OYL@wcG>MnkquKW!cZlbjuV!Tnx}B`)#zqyKYd%Q5^ZaoAbgEYKvZH
zlpS83*V<Y~o^b@N{O{hJZ#JBZ5R2Ih`jGdnvTTHkXh+P-=UrsKT5iKNI9+Cyfrl<P
zNqX)6+HwCGyZ{|y7Et-tPMInSd_KNB@cnyEF{F@b(Uay7?;0Km-b(I;>(RunbU@??
z{aiTdL8lBDg9jbbCH)g1c3CuX2FXyPntDT9#vJ18o+1mT6`XBuJo)9AfZ5@bF?#m5
z&N{&RQ-reA?+0zCh_*{TQbw%#sdmlih6S=PLrfj04nCniM(PiEXOB)35xS;+eoVvw
z>tavC@F`FVD*;$wUuCL}z($Z^dfnlwfpW64%Jx%I%AoBD^Y;F4TL#yvq69IIf0+^J
zCH%WnL8|v}<ln!+1gsX2Wwohrz->c8<LvT4rE30dZIvaUb?K55uv5U{>Z*08YI@<V
z(cSmX)vjU2F{$?t{+<dvD`HBK=FDuPcy0b6EiNDyiu7q7=WJWBNa1m&KOQh76F2;8
z?|eVPD@p6{8h(u-Ukquq><Q0`qCD~ixg!BKjI`@XI4TNgyIk0hiaHT_TJ)Mw5F5V4
z&@aLPSpX1Z&Nav(k*EL^bt}O=|C?%bzi9>PY~iR~u2Ohg%Q4+b)&fXde`iY(`B3eX
zhA+f!w2)HrIBv8*OIr`$9;5%r%7Iw`0qVk<^~rPQcP)KKnVsE+#P`w-K{^G0543L|
zO$Ob1)!v5OJ<wcnuPZJLo$Pv=?AqCSdB-&Idj&wE_e=Wiy@!n~Y}Ealj!JW=^A!ta
zSVDW@S<Swu`P*)fK&{laPoDrd6^Ad%-64@0=o;Z!{{9!wgQJ(zSX$3=@uNgJYiDXt
z!qnkY%zT0dSZc|brtZh~<XiozQoTuMIrdsQ7-4cycw*)!D@0sWAgsPuf}N(q@eOm^
zSO_V3-lSNVS;O9gH}QVAo}o}f-Tl=nKQ27RN08{L8jYMxkTM8{7Z*+`5l$`!2wre<
zv_W#MfOJM3#{{wVGqr{l96+~f7Uq!ua=DCKSRDeMk4~;01BaQ&%ud+&qdR5mdr8Yb
zQoCJeRfV5P&(F+<5srRBf11&_&Bjs>D^}r$9v|cF&oHL^cs(Zs{uMxj=@F|y1VQ3N
z*h6#69DFloUt)x>{p(buHKq#8kD|b535^-FJG@=L7tc2g<+WNxnA~PsB0QjzK<8T_
z8Lsw0#_#5pvE;FZ`;E#zpjWUjtkp-08nyN&)0nPRrF8)4o`RNzt5EUWDAwJ&t6gIH
z)nWy5yf7^I<d&?g<1e?UrqWByd|brKFv<OgN53V7S1PLmIJ+j?jnmUW-b?ro3brP-
zG$nA8B^!TK93Hs3Gi@_wy}i9EWRk{j^Ky{6Efi330SRwI)`mwA5rVqle>PzVU0)K(
z1C*gJg(Xm8hcqJBXS-e}t7(;!SJey$?eDcG;(+Or)+rT_m(fl~2HK#Z4PE+WT`wq`
z>J6;y+O+>#pA*<;6SRCB`i?2UArX9&q10f|>u<v!1jh6=UIWQH9s)qa!ZPZRVW0tf
zfqIw>zXr-^N~_a|w0`lb*@B<!{cJ^LrJ)bj?f@P}xF!*bIJ8^;t8$HQC0M{8N#qa=
zKLDc**z1h8AVPV0&(DtyUX^BcD=Z+gLadT&go#&wI|LMrAML^L)XM^uqK|x^F`Yl?
z%A`6QSAAI(9eT#H#1DA}{tX92K`=M)*c}?gvPB3}9xUwpQZ{9m#uf|D0X<c>9B9!z
z<&*f$D}Yk~nh1M${~lEQzf2O!*9rS$qft+%(fZ!;)dNxM{AkkLbf%?Crxt1s5YY<E
z?zjQnN(TjyX;1^=XbM#9HMx6M@x*$l2u19kRrbw2qh>*a_O$`H8iMmO)!!!6g#?fy
zmy<w6to_7<=xLKvh!T(otXA#4WbG&WUiw?GBY&^pQv&;YRFT;frPXpSuaVb73VxW*
zy+=o2-<OrK$6rw(NdZ}3>x-C`S2{$vLv!7;aW_-ck@+jVs>Pb#ts!L%&8U}eqF?@A
zl?+VZQI$bkL|-aIvU&U5`ue0g(@eFuYY4#Psb2ej`t(A_gL>&I$Mna7<CDs<8IAab
zk+vM}?TP8y>H}4HopQ)d*{#6$1Uy^S-``3ED5Oo8ImrCxfN$lDp0%*$`1#Q&`ZIun
zA#-nypd$GK^6b6QTv8GkGX%q<er0K$ql!-dME?e!6$>4LXi6Y8DuslUz2%psQG6ws
z(F#`%h)h}osdOUm%Q<EpqLovugRd&}yH$c=s5u5h@VLr5zHt7O{DIhEjr$WKLk-1#
zcHW?2(O}KD*A~i|&$YU2tC7>}v|X+Jx<CEtpB`u#wiM~+!Hr3v-cXj(pGSL8jG!0h
zg6wu9Y6JmgC(YNS94&dw?zO~ZPZfwPO<ps1;~br*4&L+^nyl?XmoaVo?|lZ3#zm_7
z*N^~*@6PVh5Zz3@CLK-#P;fV2awZ?~-R*FyISwF9p}&tb{dCcNI7>Qzn=|kZ^aRcr
z;!tzG{eAn1t@b$$$KC?7XTOg;YyVm)OfBXw+o=qO;xn1?i+wdKPh_EW=fblPp8b=K
zn5qI}Hdoa_6D3jturJ2%EqrA%sC;Oy%=<z#oql~jccRvA?|#z|jZG(>H_#QQG_ExS
zcD<b}4TXZ)hGX6;tLDe7Mxo-pCmaQ19`vZ8ESFmD9f{|qX)LUR7@H=(3V*2keL!!0
zcvP!n)hx1O9QYOHlJLJ2ZEvL7NvOV8yo>u<;w83n_<WGjl_SWsOWO04CpBjcVPJO9
zh~$>W+RX?uz3E}C&RD|Q4;&`=1?|2b*uY*af=e%sWLKpQ$GOPwKS0y~lvRlJZq!<@
z0LvlKU^1kkgFdI8meI%j$GL*kqKfVMlNB0wi2?7ksBt9c*l`A}75-g73OAvZ*e8-y
zomPjQcjF$QP_rg(n?grB7Dmbwqw3)1@l~4PNut-_Lf_aMVl)_G`Us2gDiV{-3t=ii
zQuZ#EV3}l$6Meat82kY^{QyX;Q2qu9;DC^HYbvFvcwwPNJ-!kJUl<j3UNjn+RL}Em
zVui9`|2xdlW6sF+r>ea*$6>ac<0@Q2>uvA}TsaDE;fsu83524}n@)TDLg(I7E+N0<
zazM$<2Nn?JWc1QTWe#&Hl3_|S0lVz0j|$Wi^z+SnpOXU+OEvz`Ra~{zGgrT|vq||%
zTZ`BznUyShm=RrSZrom`PAq)2H|O}gZ@r>`aG2F=2d{QxaLH)5Y`&9ab7FilT}eUV
zC}fqhX<~8lb|#c(vUt(thbF<apiAS0nJpcR9~m%xv8qIf5yJHc9*Cqc0^$!?OJVWT
zT8;TTfB?Ic1n<IUyaL{fXkLAh?LfLqo246wCe-LP&!nk|($uM|X)q{)e%k1s9Iq1n
z)xL#gJ7gYflIK)l_;Hbzv-9~_z1wqM0}IHeT{`aGZ?*!zCUPGS=H?O`h$7NJQzpQ1
zniJZ<A0>YjXlQr)K5us#m}z1pcnm*)qslAUw_%MY8|1zUVBY4C$u10MD}DD!6rpi}
zx&DdO^<?}R)|R_G<w#{C>#RKOVPXMZ%d3V1R&C>3hZ<})aM$(-$M(wJU4;zcf%zEC
zln|7!5^Q0Y25!@Yf_NAhmFgMb7jN==-%yDGZCyBdMnw5ifYMB>T<u{OI+<(%zeeEa
z?*e!L_g7X6gic?5jQ7eR@x|O-Qq#HHIJ%8o#yd3ff~6wQcs4jXFJ|u?of<6+4oAyb
z?JGl)8rR+&Nw4o`S-Htmwwu)}0RpAXXjy(-M(|Fnb8?#AUsYMaOE>OCTXcZHgGV<I
ziO&R0c<RRqhHhNw1=IfIPb67zWUcn9pjdT7VcUDL=9lnTtAKd-_JkY-FVPkW^|v8K
zmWMTTMan~XU^AZfY;7Fj8xyOqUpv?X`a2eOY3c<yfo5dD0HJ_k=<ap!-sI(MAu5Q-
z6S~Eu8gi49j>d#aMSG7GpN2dbr0<gx2J}k?i=PTNKkdz!-~E#x1VSo<m0j>fGa-6j
zVOzUf*F|uP<#q&0O^LMI93cIc>>CZ~5V@i@^5paKuvm-fP^>iS21NcRRiWQ92H1Bw
zBymk$%viUV$bK8gB~dHk%S$=QHKzLmT5@kK??Vkx3AnGQmk1(81OOFvS2Z(3*Hj-x
z+Zc!My340-hd8fyN*Af3ON)vK&pk`4Zcei-Cps)I_RBOIDZ8vs@2=8VX5MZz8wnI&
zY>&=Tge1K{zX_gqx%mX-*j?Ax)x}ATrj6*q+Ru68b{zS7zZ<$*ewIZEv;Uf#Z0VAe
zn}^H+G>jvp0bXOM7+r}d4iv^B%vm6PXh1LgE<Okiqx{~A)Ba!gnLwzm;t|`TsJBFB
zM&{FNbhMcrG<!eEp_Nxa5u9d^J9xRRc21#M*){lNUKLzC2PZx1G%r4$-rwBR!{qcA
zLnA>IbFs`3laOXQRwwetHBs3<7)T$$xI99LR4xhr>RnY#AmJA1Fs+__PE3LuhO*UY
z$v^qj^@rVv$pbj60J~}j9D8WBkB55|_3hcO!xy%~xCRz`&%7<`4r?rT>5%RFS$Eiq
zEsCEbh4NWB%$h;cAKqQM)*NfA+t+Yn4snN7L~ksA-!gcpHJ!k~4ye2eU{25mUQh;N
zMgWRPXk}w)jGS8t3=I@n5DK;L4LC*pHmo}UU6|h+gsa!gFly!;X(bx;rM~!WXbDFC
zWq3FCP>_d5lh|UGaCM^;u32?325;iu*%`3!RPd~<g)YdnNrc{(>PS{KNy?H!xYmsi
zaWprL6PcWbH%YD39o|>N;d2}8Kmk$||I~(GH44xxIMb9s(~gKDEV!cr+l}K#!V8?j
z+1p+uxWMAcP|_uMq9W3k5pY9q(&JnOpKuDU&33)gAs{OsW3~Lq^}>cY`9~P{qXMct
zerg3V)ZYQilP@7Ynk)xx)u8E)C`z!KUZO#yq8FI%UTSWiM|SG4*YBRT<|F6~)6cqF
z*RD8^lW_F&f|t779^e?y2q(TmRbv1#jGSUw5Tjr$O~ad)&W`|r$H4*F2BHM_8OH=o
zgGWKSJhd2dVioB;IPoy<_*JikmZ_#j&nHh;yI$dX@K7S6Wr}*{Hhz__@oMy>@On!m
z#>LI?P>%Itb7Vhmq9Bo!&yYm|;;-zd8{JoJT2rCMfq{C2OTTgdDcxi33AvQj8W%;{
z$|}5;9z3q*zHi>#kBSxws=x(ZPw+}M=_>2VbT&EOSK|am|7J3vNCbfCXdA*}a16nI
zzM8v!`$9C2YQkVcdY5TzQ!sKgbfPKI*g6mHt@N<jb@ds=w#8I60$&l6Nmk%Y?J*1f
z6X`Miz-$mNL45lY9x0XLuOGJ5fF!4t|9Xcsod3$yU@(qBmtNqa?deHPiV98;Ziz9e
zDxi$SvbJ8ANWLa2mGRjOlNDS*tOXOqRFxDRHj7^Hy>7&A-C1<HC0Mqc)p$E*(+?od
zJ~!X~E#Gjd&h-^eV2Nc70|BqS>vbUMsw?g5&`j2l4O_UVQzt;wX3C*~CL`pN#$uL%
znj-|PKF02>r~_8AL8t$Hn88)QV)fWU=t<-FQsF6Y&birinLB$Io}1Go><01B)^Mlo
z{*K2A=>M{5)iTW3Y3J)5qoE%>borp$;&rDtVF}NB(E6bKe8F9EvKaaJQI)MHQ(Co%
zPgXd;#@U>`Y~9UKUvOlGP@k5jCe6hAZw!2HClUKz=M6_w2H~!utarA*s{|30Fph_b
z<!9@XIkU@2dbiyB$l}JKjP>t4TS`~&&XF2gb)JB4U`9)Ki!r5HioIF;OZ$E)mF~6J
zcBJ=$9jm>5Z4@zCAa`lFi|;xa?YAm?TGb^P?^&s5Jm$|JcxFUR=As*QQd{(P_O8mQ
z&|=lmOg%|mi@@XHWgX=1Iwv=`6KTKkI84j)bi8~_hNtIj)|z36_gHFT?`%1Muy<#=
zq9RTw2Ce??YI{A!elSoWj(Jd$`*d6*r3yUhhU^QunrvYn#6>U^ZRM|Mxg4-AkA9YK
zC=6gz`DZZ_pxg*x6B;(Qs95WBiv91Cel5MdTx=`fCW&ffW;OobQ#KRPx;3i&@&>>e
zzZyk#*Nqv3I73uZVc^JJ(UQ%lk3fk%sQ!vn^`expo-|&Z`5g!Z%pJT4$}B7NQVk$>
z2W4;D+-4f+dMLn8P^~ug<Zdrj=NOr3%&2Rf&43x|j(TaDY*s@#g1q#i7@98x>Fd_e
zE|;HrQ>}KI4W-l4O2*MfnvLZe5$BkaAI{kii4Udm83o`wy;tdE?w9^CZ{S!$S!Z&~
z%{s&LjgW8CXr(%K<mBW>qmm3pG~%PbV9pm0k@u!Uyhhq(J0uF?p_moQjJ!x3b-A#t
zJImRh*jWTo0*;<k7FZ5k;9>b{{!C-;&xn77YEL?30@^>O9D}sUIql)QA9SO7akPp)
z<k-%Y`&;s0m47*{)iwuf-rPsw8;jiTxM?#+b?3JSkTj<mvjsWr>)o34ore8p9U<w9
z*dBbkua|1Y4SU>|mqU(IbugNv`6bmWF|n6JLcV(IwzfMJ;3gBqDpk#9<<O?OSZh~`
z7WlOWfS)x_zP8<LxjII23g=R=>j3jK<XN3Y*$;Q7lRsMVMn0T8)eZD;4R*}UxAX7i
z=^>9=Q0BGYssmC+DS(G1b!=Yty^5>_aJEAJeN|G65hB1zy(NVPgMmY6EV!v@EV{hK
zLXnmw__`<7MEkpJ-j;l(H<p^<(hl9(YRFhh5#1D*x22gES66XwPkA4GL-*xo`u=Gq
z!k}uT%dvIyrAV6RnY!zK(>a<f+g{CSyXUH3eWa9I+qUM640|dYc%r+Yr1^X7@^(p!
z6?L6)mujKfGV(Z^$Hjs1=9@=_l%r#^0(6keA@4T<73HZiGeC91ys#vW;+<?x^Ua%V
z^saDREy%t%BwAI+RaoE(c}R50P+L2-@!&Jk(Ov_0lxX*5YtDi(H-Dg6<z}<zg2hGa
z`J%T#&Md7?quPSYYUnWMSzQflQC?Jw+SA0+4!5r9s<X$XzXO>U(Eb^N7~@A2GVjF|
zrd=)g+HEJ_8if)JTQN$%<p@b6I1XT{IKC_6-eo#o&#pqFxh`Yx=S|RsS;P9B&rm%E
zPNdf(Wf{g~%H1cbBW5Y;r}bx|v#)k{yNvqcTAo<VG9lYuAAK#Ywp4xFQf<kzSwbf*
z$rBLVqz)MoCG1(eJFB5^uHt%SQH+_Kp6d;{ThL;AglFTk(|A011fT(j4!AwrbCYK=
zl4Xc@EZL*c{d{jFU{c4u=&;^!3>-52S>)5!oNX6=`RRma-?T|O-Mwox-4HrW=^b5d
zTk_1>$Z1+u#SXTZ7nuxTU)$bfynPbIcQ)0&o%%YIlRf5Zrf@J0Lo)!J>$R(4;JK%>
zrL23+*LxIsQR1`VHfxchz38&M#OWSKfq1rGP!~J|<F%a<ry`t$0-nSIbG(dqYo8%k
z)D1;-Af4u}_K-?sCVw_Qdv<J+p8Vh`uiTD3tJ99!kke?k_yXJM>8TFsa`mhg4YsaF
z*<=CuV5#kB+GN@KeeTK_tq}2}jiGb~&#RM>OWS0K^#&p2BH;Xa?d{pDkbV%J$3dHZ
zdc~4{C{vvG^^UgVxbgxUz*=mB`PBNq^-!kAc;6kT>oldheb2W$>Y;oT9TP5aH&rk+
zz_F;+8BWM>#<gJ8u)7A}YFwNfU57qX*)}$<+AtZN6Sqj~!d&?<lk;XVJVqYie6|zr
z#IbH0ve%26n&`MwMQh15y4ShZjZPiM+}0sa?6L4Drmq#1A+BXraQj+CgG|SUp&o3G
zIZuq7-q^q1O|7$8PVDnzJ_lvw`<y0xn3PgPKSKf9t;qb3<bmGQ2Ofz_QU-Xk`zB!4
z@mrPDC&)_<Ru?N->gUZ+Y#42e)c~Vq_^^oEI-yg#ZJ{|CXY6^z$8$r*qJI8uM5y_C
ze!n-_x_O%HIasN*=A6*;M7~7y{jH6Kz#XA|Ka<_bh$x3n0c6T`)AFLE{i%RfGq8l6
zulek6$fr+kwly{Kq6UwL1Q<+GZ2cz>44WPixbHBj*E{5=xU2*oZRK5I>EZS*LhhWW
zjgu_}OUnHzai0u&fgnq7pEeS97qxY5oHL<Uly`U;a<s>}?Wna+wLosIx$Z7jardJw
zW&l%Y?===UNO#|>(=kXsT`PO|_<CfaA9(uJQAv#>x9yul%j-x{g2+fxQMXVv>!#DV
zNqE~u&+{HkRW0KCn6@S=a20v7J3F~er|)Q#8+R_r(8S6_E#8%<PJ;00Y|-1hM6VM^
z6fK_1e#52xZkpS+Oh@93t753RMU8<DTv}l;QDL$-SEmD<>nCsl4f5>smXvk2u-AHR
zraG@>2Gd6g0MK)E)W_g1a_5@7Z*o5G91noa_0f8~ZZ9CeX?!RGj;Rp&TIz9aTQl*H
z<eIkPFXTW}O0U~?*Y&Dw^YL`!^)Nl3xvW_4TP%*k3@?eSy85h?>gD_%EEM8bZ~cYG
zcylLdoM9;Ls($_(Q*`>NO!4o27=R+*sC@HIIwzi?KYrWJtf;NMtI@s1-15ay_quAk
z`EHi%^FB^-8DI<HxSVG*j>TXz6SK?dA+HS$Q}D<`SXK97);cM76xQi;XNM_j=XPWc
z2gd{!NJLHZ<wmmhZq>+|nhoXn$s60Zv-#P_!MZn}@tO2rF}g|2ax;V%Ame_ptrQJ%
z2ja1R+SrGcIVOWn*0eFgJMCTh(OSYeK;H_UAITDZ^t3KN+Y)uab5h&cvC2d^+QCB9
z9a0OZC!NseXFin}BW(Fruc+};$fnKFe!`!82s*zvhYI_=bg%l^B25m}%~xhldmY%i
z4t;%n+SmPX@k*X=Y&B;mM0n1>7nE2z^#FWz)!dq@24HyuN137e#q3F}fd;31wkuyK
zx^D1y`PQ7aMmxqIQUG&f&r0KSub&Z{S*vX^FsdF>p3-+LXquf1AaJAFsVqR|FXpNM
z=n#a1a%Yh=>}P0?2m~3e^O2M0(qGDJn$_AHX`XLQ8SPc@VKvA)Yi#lFesy1dK7SUr
z#nqHs<iH6Ol>LZ7;Z>hMib_O!0!jN$a+LlR4AA}>l?_dRuWSMGQk90dSBBKPMb>#m
z!~L+cVH#6sZbRR=S}3JgX8eVZi$@%9H1X++R0omF+6mMdf|o`|<#gC<{O;7|3-hEI
zt)`r+M<vNq7n<DNk7wVlQzjZrY`3}LlhIGmbodvI`j>oBT)30<Izlh03Gr6I)oolr
zx%0BIY$7Xk4sFR!swUhotQb7<k3T-YtvU@VzZ7{Rlzh2qS659S#a^hoqR%JgG-oSX
zvMIGvx9_>{t^g3?nU7;GFSOG;VKt^pFV)f9yF5<ERdq@zaf=82FDi%!-vpm(UA8^-
z8g}XSe^i<Fa^fwuMVVfAVRK2{Mv`%!XebUj&rYdO#VY$+60#(_)m$xL+&+Bhn3$L!
zHg_^k@5&e0e@VcJP%M4Rmm5MR)z^%}lgeG3*4N;0D`PZG5*oMhFw(!_m3sKt56=`2
zzO)L&e)83FKOt=ltPwP1`@1?0<vaJI3OtKv!vU)R{KATUkv!YnJ{fL{Rt369w!*1K
zCS>re%;SZ3DNdn;h0l;}8!ydoEVU=M<kYmCCXVa0ExL<znl!92%6pb}AFc50H;#c5
zx=*_9EOI6_xY^edkOz!IN!i;xJ_X-q;IkJdB6ArDc}{x}=mmF_J%mdp?21E|;cUar
z?e%B;{|Nias4Ba)Ye7^{8l+Q7q}g;U(gM=3feq5#-H6iNjkI)ksdP&>NH<6~d<%V^
z^PcnT8)FX!KlT`KuXW{|^SbV5j2y`cs|#z%2XTrcr^{89)#=|kCbfM~Et8FDJ*Nq?
zvlZq`4PwuJHA1FNk>+Q8$txDWg9^zH!C1AhOdpVh^Yi3>L@z)-s`czqS6_a&6mGzg
zD+8u@lQeK=%eKuoD%wO`1*u35PmB6Eb`se+aw;a*hX@GNcwnHG=V?amdVO#|DXW#T
zZW>UKL(eyfVL0bh9D0;luG&sVVWH0Ua-pXc5@*RS>hbKgP)iaI&eL@IXgF^(zysW)
zE3D67_Orn)QH1MymufX-j&O~vGe_L<lgl^X@Zflo;9Cy5{B(j?7xNlMb<a4f&ny86
z_N=d|xn?E`A|XlrUOmz6#M)VD9X%$lX=MVtY3u}9g--iZd@idmoQq92+QknMs9^JJ
z9>AzMQu;Y&`nETJ&n(<3KSm)~k?tuLPI1O6u1Qd967mbe+@_7LolK_T@bl|G7#`MX
z!rysvAk8_aSns&c*h%7A_%2OgF6nqaOt%Rv%b=Ykd^uKmQwdd9#|O3Z_;BBN9rJ?r
zbbzxZ8tI(}$9jmnj9F;>awzWruUQC-vST%o%SJlPl9u?gZ>Glib0LNWs8LT(cLifx
zO6QqEa5sEYYW`Fr>1idus;bS6q5BzjxRA<0Rv>Eq6XV}1+>03qd6#4+O3*_VyercU
zl2rSg4OaGX&H0ySz3+ky2D<;H7yuQ)3eX11%+clvvyASvjPC}TEiEDHQ>3e>rrI%&
zuTDI(eW?fxjN%K($Ec!?cjAnDJHHd`R9O&?bi#-&3y@tTM5UH-ql8`!UD%f`1iT>C
zq!}KY|2omxYw1<i=VHz<jyDo++k(-mHG^b{2LVV>Eb#~o$Mmi!1mbDtAKwr&;Ik+_
zPSUvi<MXp7iJwIioy3l_izu5bX*Hy=3Yz6Hu>4K4@^0$+Pn$fB!Lu&gMKGU06j^rj
zX|^ewUIujr-xI~s#ooe+H&$h&V`u{Bzi|B6D@==C8)b%Ym;@U%IoiR}Iy&&L%G}oa
z5_E)>AH~@XqjoSI@1CBPrS}(d1}|{wp4g5^b2=AtAS9vO8HQMZA})d^%}w4HjdVWC
zyM2p&auP?GbbPsopf}Q-xgakOBgK`5J})b@2{iLCkEr_iplKy9PJ)@3xHb$#KqEcV
zU({|xknEjZK1TCLNiyuc(Qjt&dwY?<>l%IhvM6Gj(4co~ZJg*endc+W@2m>+3~JqO
zMIO1~{F>@opQdYU1UAlMZ-RmxXUg@y6^@{XZ8j&p$~R=s#Usaej-9V?agcz=xKyQr
zbj-@ncZ8j?K`%13isV|NRU5@&VTz!TUV+_BrPT>#x9V+!b<R=s3->UL3x`zk8lQ<X
z-vjn*2V76V+G^26ZQW2$i&cf~;mS(eT2&@ELJO(8aMQ+u{-Y09<MAItAV5DZ$rNQ*
zSThK6;0}BEqy`H|W%P--k$D;EuU)_0HW3=FIMC%}_mg0Lcj8HJa05N0j&Q;hAM?>#
ze^gQRw4@no<gZAX>F?*Co9E^Q#E7gv5xE}^kz}F-)PTC56IIS2<kk2~!w^mivC!l3
z?-vI2!;B3(FPNH+<qO8NLy~4JTbiO{W2;?KJcoU$Djtv8;hJi{a}=+%+n3<`C+3R7
zw84CLl;}InL$$t`nG@5GoD;zw`seB=!>*nQZm%?^VukkQ6Mn@8-lTIh6`6AX%ex}#
zDUj7^d6heh4L^O#m&uDG^OOX6YF;s{|L@qIS%<S{+2f=JcD|DD!};Ss$0LnhCbm$_
za!d*Ld+t6@69n&#Fl)8a5EQ28@W+DATQ#7j@!iTQf90mx7mkL_M*Q)O$UYquPS``~
z)^?mCnAj`TtH3U=(<L)*T@_9Hm}K|UDUk54+kJPoX6tMvQ6S^$U$RplY-X*mQ`HWL
z@+cPzUN54}hmD>3)ZB>A50|&&sDI)ZP+jgp;hZHaP$@Z0h6a-ISqS7N+XO1AsJMne
zINB0+X&5)NQsPBfuZqT0qz*;`M&zNJB$k$>NoVaY>ytzx<A}DCA9y}}&@OW)+%jwP
z$ACSYxi2EWshL-e4|d!GPs)OXwrhO{#ip2LF~nEF^X_MJ^FQ?%`fW!g?e-yN!kFcD
z_j{e}-H5||mcTv>x@Mli3CzK3<2zTNly+-p2<jg$dA6XYsU;}c5f)aXoGK*~A#f>j
z_nRjot~iDIgXT<%b#9jj)ul%On7!u-Jp7?_a+tfle=2HV#9+)DC$AyM<Vwo9yT?dq
zHb8i7zG*|^P5<<#E_F~mrPBL*-HB3)vsOIlV~?8OJHLN$rF)q#WVAiDuJ5vojUr|Q
zM0^{?HI0RLO(L!B+YU3=meiZ}L!*Pk+D&%XzW8^(9F++th?k!|zYvcDeuq+BaGIR+
zgM`C42^z1l>lCMBz^Hq+it1@3B8(ywW@!5CL@UrOZ_ES$Vcbx&7ne~oJl#oGOLw>X
zcZsuO1jJC6f;-Tg#EAsqu-DBy6vjzNwWef8!72zhycI{vwp(LC(;0B`!#FS4<Nn?|
z3CH^u&L+lPf4DSGhW*Z?&rNm72hb5ejQhT;;XY`?N)nOf+%-U|vi?$aX+mqlV-1z0
zn_I5?eIL6Fqu2)~`5@m{!-Y`~d?Bu&BGNzV=Zr()G~<B}SRHh!p9N6DccYM*ajb1w
z&&wzcm(A>u&gtn=@^P?Vit236aCfOcj&s3nWcz0Py04zXLH56Tg~~_y_iE*7+2oro
zli74ne%+cuXzy-JZcj4Usy7ze^k+xGQWb70y=Dm!!&xks4YUyk%Ma?+yV)vxJrPg;
z(^mS<OTs_dWL6v2e6tktY4jh>*}b+0w337|8%U!ju@>Xn+p)2^$89nMk(($n5-vOj
zajMIaC(%gig*0JRlwj!SRJG7kVrh6QNjCs@U9V+dj)yK>c`A<6=wXPxQ@$mMhI?Fp
z=nPM-dFPhE^NY){7rZ79!?MQ)f=~)h02bcMP&4}ibac6Hr&EUW%<2KcBvW-IEw|I#
ztBYs8Wp!-#s3!Nu7oRulqorKqGrv+C=Z)z<lX9jU6JQ~kWkrUv_f4X!V$sM%18|5f
zN2lop^}BeotG$L2PCcdNfN+$g4Sq9KNW&;{(OKX5!E#e7j)$FXISlo5*<(ouh3C|#
z8Ks27>9=-S`uLAnkb$Ift91W(=J35|CJ@QCcM^~nO>}XuCr2GKm>X4yk-Cpw@zI=e
zm372}0%J6(^QE;oq`~`yvHV)oanAlQY2i9Y!m|7xIG8`O{r(8oDXm`*GMX)gTX(rt
zuu3C%5inue8K$D5vfi(52bO|Jo$bxtwDym4$PU0<cgJ?Pp4c#HTUs2PD(}tJ=~~~f
zhH|_St3F=t?7ZKKqoI0h5Vo%c*)H@h@k@C|qFyZ-s&x}{Jnq<Pfktvx!`r#ybW?F0
zQLrC5LOstA2D7c9kGG0@H>%P~GV++f)w{HvYOMBqW8-NblkCz$J&rNAw@?G3DwAAG
zu2I9R*ISG(1<;=G71hG`bA&)XE0k%O%Jr~O-eMZs^Qj6mfoXAuY{!q(hyIUKvIXoe
z!bqwPT<C-c4Lq0u2-wttPq*HY>woMiq<4Xgk;K_iG^DLIRw97YXprLzcA>3T&h-^*
z3TW!B3zkR;%T{%=8V^%~wJ1@z95ZH+I)KFKFaG!pd+3c)QhCrp$k|ZDK0aBhzMkgU
zJQ1(m^rU*)gsu+ccab}5)8r@$JnI?rni%Wlj_xn7eHtZ(i_W}I7WJigHXn_g)+k7H
z`ZBp*ZszKDM^ep)n*#*3fyj9!Lg%7VG*7L<;G(;_c5kX8cL$(UFdyY5k%y0w(YN^z
z!aT2qFHd*rICA^ICdanme%dmz&!nEm8$<I5N8>>`*;I3t>XJ-5PNpZ%9h!)xXk?^}
z^=<f`lbU8--QyV-QObpg+u@UZG|F+OW<*ZEy!&OhFbJpnQqhuXUYdaz0rR-fcRpYA
zYa-D<tz+iBy1qHBX?ad4$vmdt7tdk?a&_-lxy&}-nF_X|^!?kb{W!x^XHf_6tUoD>
zB7O>7X*M{Z@DINJC(U*>35`(;60AqJ?Gdu;AMEsWKl`yTZcL#bWzZYAO%iQ+qAt1-
z<UWlKG5^74dHa)^)_6Z*3Mk$@q*1ANvIuI58Yw9kwk-6sqqPJiPaCiGxHop>hA9q~
z?pJBF=~jD!*+YT~zcc>2ZsYAocBhN;M8(pf6+Zu{8WC6Izg;<V#c{5{(&59lhS1oP
zF<dz6eAc)0PoJK1AjQovH94A`C;2WCL~24MEz_x)rCU#mDD4VTE3{6-%vTfbWT$SP
zP`{m5I!g-4%|>}}new&~sUE9%aCgsSm*sE~A!2ZU!rAE3bRma9MDjAAjlcstq+}SC
zjfdxTiU-0t<4`JV=SCxY+`8S%3dPque&$Kob1n}j(Ha4gZ%$oiw<&LVcnHEro!UbY
zp3Tp5%wu+ioIxlzdaVg+y1k-==Gd`j8!Sm$ZNBN{i(OD?p`$PRs4ALYio>+Tt5>g}
z^=zxly9w3$Irs6tVzbSdw&-0L&9biWfts-I$>R#&9t}nZSySYyD*!!><1b9RWUB46
z(T#wFK@Qfr6y-Pr1g*DjJ=Ju%xyFT<CJHy6MVN5OJ}*DtCNivQPx3BAbpEA`Qs_2@
zxE4?kx%28&hk@yjXJ|Ibk#(10{%3wwbbGD`E@y2Cxo8^U1H@qVpW3)_y~;1R`DE;r
z)#8&GY*>P1Wc$gv3Rp8@W|MS*wST<!`~;0t-LD-5rsq7h`b}zZ(kN-sxSt!0_1a{3
z?AAgF!)}ZBYp2xNh3(z<A*9`7N9$54b5ck3rNyv(59b>C(|vbXw%y*4h`<PI@Q81A
z()Dt6HTlGrM%{Iy@nNLJ!dERKEy?j$@u)NZ;UKBuvlHG@TVx@ePRusqMP`nnbY9zc
zQKkDqB?mp4U^oQ2H?Mg~Z|hGlO^4@t(o!KFNKM>Ed&2>8^Ge(|2NqMp)4T?u99{HU
zjv@gL4qJ^rU7})v#|`Q=H7B~F4(3g=W-kjAaR?8)-%LizxJ-$6Qp?qJym*$!I4<?W
z1;HIe)p@j?4+ljmA5o&lNu*qJ%rDG~nxU^`|0L>><?i!)%pZ2MQvac&$DpYMy5$`u
zGgXvs2R!fWjrxJPc(bhFO|)-9^_P3(20WNhDA1g`WT$cTDPEy~wS{8I=8}@hhG9vK
z+DR?J#K#M#1CoVrT>ERcTV$b@Se-G?Dzm?a&AWP-p2-H!OjqTlj`$|%Qc>d;7REgU
zdyE<{913y!88!8`yw!V3*mQBBiJ_=t$^=ou_7!E;Ij)BIh5IESyC2?po(?=52*|&>
zGK@5PsO+K?NQSrvKkSCyHs63ayz|t5^#J3BF7Rdh>B~z#4g@nqaqFQ8u`*7fQh5X%
zINGOAC7-BO(;THj8lqDAWy&QS<f$(-1>)*#fiZgQ_faQ6)?wXe1ct>esl(2N#0m!T
zs=iE{K;|O!;Ga(+nfB?InWuH(7@F`qp|9g?l0N|L?f62(@`ET=Pqo{X14r9_O+LKk
zcfT1CZ!4JxIU5Z0i%QSYbnJ{z$mGKI17xOT?Fd>{s1BkZ>O0FDJZyb;?Gu{n3({p=
zb(r|E%UfAx4Am-=nCjq3+d*f~Ty-j*y;Z)u>m$mm8L~DsxPy4_iFen1PuL_-;cNFJ
zbyIg)dq$6=>86+!RvD;IQ=MNC_$D0ej11n~N!eqy$SLFGlzfT&_=3mdZjW`@FLmFQ
z$Z~kRCN$ZvVm;V^%;u+F)hCN#6vzuY8o$81S=mh0b#FA(?9<3~IV1A4G&*BT?M!6X
zw#EV9c<K(SK4^@*U~z?>+Y~H+eoNd-C<EhLGeH1HCSI5lf|aGJ2&T<{AJI~IbNF(u
zNBvyaMY~5&U-?b9>9?5G#1Fl1zLBklz>g!g{M<>b^0TJCX35k~#>C@uDy@DJK%eWY
z%>-ZW*azAwX*D&SAemIF^WPuL2eau)8d;qmheUjQfanE2LdjT8wr|(-EI8Oo<2uPT
zm1LM0FOJudE}bdE_O>?+IJC`D*piw#PLEdjrZ29=JkAKl@-IzIqp>XNjXCVKn;c|a
zDz-4kM;3D3I2u>DG#cCR{ZKBBiPH?b<qdpq>yK`E1P^SCdbc>U6q+tjc{($P#pZ1(
z|6>k9WTFUtBzB$R&+Q1Qy$NgQB#6ZPrsiwZ$ejIp=_XFiS8QQ^gGX+Mg*@yH1|O2k
z0vT66z5TXoW9&Mh3YH&;KmB4eiO>;@FKj+tiPX@fNjRRTAl6{uGey%=tXr8m?_}E>
zTJbn#SLVT4%A6td!EqWVpDA2SeM3b}pA84=&H?swl>Y9zKy!`=l;z@_;_HXo#lt%F
zbfc+njC)-N`eLsbqXi~~s7%rBmTE%Y5MGSEHh0%K&B0I(VjbxXX4Wmv<!Gdz;TTZp
z7EXJ~$g@BvmR{j}0}nGK0D9G=2aRvBTs>Ioe<S^xM@%DzLS?`0yskNE64J>+^p{%`
z39YfY4$+kswm*DY=hMP{XmJYX<J7<NRF&fT4Op?A#dNXg7!phNKE`Y3i2TZ(%*%@y
zv8&1j5NcDel{OIJ$sXK)B*e4_iZrWZcivy-n{?wJ?QJweh98jBCMH%4>39Sr`SNpc
zRxVhw8xtPaP8R-nOwgiLS;flI6#e8hT&=5vU`oqZp5KndwbSTveCwk|!%(O}rL#67
z`)Y~sqp-m&cNbow*Z(@O5Sj9y8<WI6CM57y*0}K`(poxqF_M-xeL^Q2%s7T`n8hdG
zfnUM2KF_q0Lm^E4$I>1kwz0r9);;fxdN_s|`(Ur(Qwp>J`5pe{VPbDzbU%KY5}#vR
zz;<6xVPC_Ue9t@U;RX^k{_{yQD1P9M)wD10wHXP)iZYQ?S$33b=1eR?BaiWCQh`%=
znb8ckIb0jF@Kt70`*$nv0lslM#~U1Ko%56X+F+s6g%Dsc5lG*p*dh#ZbqkQ-W(nET
z{i>C*ysSG+GKL~(dVQM)t)F+^Reb4vzjD)lQ<ax9IIh8XQdw%fEUJ_tvaVD-1;hY4
zQe+n{EnNS4P8JfvC13gZQ8?M)@ILM@<J~j%-w3jFEq64tC&$z4+s=0`>kX|r>ubo#
z)r}UEx2cXB0(#N63F~y8ntQS)1&sW+22?OlJ!*#E3}|)&OPKjo=B_D%x0J?o?g;rz
zs~J*hQfY|{#z<ph6TO_I>C%mCYHOT9IEr<tHWP*63vmx6iH_@FcLCgXUPq|K(B`-1
z)Af#b-4#YP^IBmQdxA{;66+O1Ydg=1JzX;@BUq7$T{Mpdt&ZtTiLagBw*-3$X#K(<
zCK(PoaCzeC=Gk`g{@{%C%;)5+_3~spZRoCf8IQ>O6<WQIsV|g;GJL?jrB`lzl(?@f
z2S$<Ja}|Bn_$jl6>~LW1<;e?-e2ofJv6w>f<7WwfO_B^910IYVy-+2`Mu5khth1$&
zc>BQH2n&zT-8_xSli?02_vJ%*qD?wSlGACV7sUg_xbLSUA7y@ZZcs2DMouL7+Ui%F
zS=LzcPOvGo(tO_Qmd1R>ku<~bi9Mgw?*|FH+>kveL6h!?N)D?YDgNz{>$)Q;pOEvD
zyWiuq3<J5A3oT)jgll6o28wF)S3YSiDWV}zyOZ&_w?_^)PP97nj-uH+m;L3hVz`MI
z<@pbf?%g%W%PFARXy`@R<Ih80UXKudy*7t?>t7n|HH9e`i{$s^3zAzGdyVyR^V~|V
z{?qk4!BAqZ)E{!LSpH#)zE&$$s{_?mJAEt@S-LE<eXEn7bekodr7JuP;X2O|jl0*M
zFO2s-TklKAyy*e!hAT-?1<I|T{^P=9nDM+S0+;?^qc6dsBi+r1m}~K`)f9>k;LuAy
zJ=o)wVtym;k5KHic;$<4T%|&r)wPeS`;z|4@aaDCK6I5cq)gs^)2}#t)6DDKZ*-aT
zjo1;U*2RwVi+M`LEwH(BAjfj@*CDd!<ZOq2Z=H-eqimE7|8Le0N7MsZNE9ybB`eH|
zc7p>ff9}eYn5h>PhcRwe5pvpP>kmiVs84LvNB7S5@z7xzyyxZSe1@hey&fKQ+p6DE
zHt+1VGvLz9youtHIx=l3&@|^W@t!8vXS}p%wNIjA)0AH;6irJkY4S^j*%EU7eEi+7
zDz<NVKQs3*>(3*&E4x*F+5QO-AR)btd_V&KHqzpExev<#m)fjb=%KzkGGvD9F=2V#
zl0oQa-^c6`HNmSp>rVJ$nd>r-(|U2W)hI4_LL(4)Rln{Rkc!#DVpK>!BnKdPbRoc@
zgX3eSc#pLloWQr@{U_&9lEA?_%(xmX-<=UB<mca|LSDEATY3}@?l;6jpX2@{nW~_Y
zuMftTJQ<NUh9fi_$S)3KnNUc=jxn+9%0*dHQqa74-?QHm%d=9qS+c&6)go-{S5JmM
zi17Qx8@@qQyC5nMVal??n%$9V-5^#Z*6`(zRZP=hCI=UCL}BbJq-)3XhqlH?k2@jU
zuy19bbX`Mrv{|0DSmju-;kRu%6XDv7#!(am@ZktgSYba5X>2-X?(`u_vUUs2q&IcI
z?G%4BG{?E2d(z*@{eLHSpUpvxedNt4rq{xEzH0x3=vC8?`4{%=*@`DgU^bKw5BKTi
z%Y*%d$~l_e!x2SX&7NW=(kB#?dPo)8iI0@l4-zO_(gFATZ>>g_kD#3#@XDeH!t%rH
zWMucf@GY{;!g~+K#|a-+H}`wOyipHDv^<x^f~hc%GqX%z$W8~{u3IO!%f%|sw8_On
zvE~L=U&7cUo-z--4WwWB8#voOJpi*9NDg+1D+pE#R2PrXTNeijQ>(N$qS@)jushw9
zGmCQH5;q!M<7cxd(!SD6nzL<0m`X6q;?X~_9An%keoj1ugh)vMN1u>n1PMG)6?t&x
zt3*lO`4pacK=m~X8~&1h#;5U50*IL*^X>0sx*`oaZa$d2kd15b#nR*s><lLI{N?1O
zo8^;W?W<J{c2wOP$^U)4&uN&~#~Ib1glbJpjJ1te&zq-shIxiiWG>#mx%?oB>W+`U
z8UC-^f?#(bOQuNi`Z^4efSLzRZL27Kkc{x)LLiOrM=GTGtrhAY2S=xcpR}cx*CWTh
zKi(Z_?XJ;TskQmYo8~4lxLuH3cf`N->Gp>%dkv6jvjYQC#`@fth|YZU=4$|7<s)x#
zEanD-vh{aadBOt!d+9>r2);7CO}&eX^MoAyoQA(DSIdvt;Q!}KPDt8n5T>V;W^W#J
z;3vJq9{uYXh@s#vo`zzXczeZw%TSZRD(^!?b~m1r2+X)ur&EZAqm^_y`>rs7n&>FC
zzG`rS#gBHTkuk{Jfg6TUS|Z9f@YF8w34AT*(g&O`J~H=}4a}DUv5f+*!W1VJa=@kV
z6X{=T`eRm+;ony8)Re-K+0wA7L`a+8NUHm_mf3AQqf_5@{x`G!^-@karvb{d>ly3g
zYW9KJvpbn~Q^EAe{)I@_!ICT-lJRr8&4d**H~MTMN7ifyfv{v0=(IKL3*^sp3&kUV
z;kEc*KamkkgdUt}qWnrXu0j|#ZMdX+T9pTtOJQ9<-J8i@qwzr9t7}Ws4Q9SY{HDur
zyM{o}{sbKU0SnsCjNk{skXp<g(QghN_-D<#ilygDuhj0Ib0^uoFl^#jF_{mFj<ySF
zg360;Mer*g_M$1)+EHiQ@uaZIG$bUWs85+&d|~>ZYoO%!cBbHI;Q-$0$*bt{IfQK*
z26>eyXr}mVU&@8+!Jk_0pY-{Q>cMg_?juYNa&0;=J>x54x3cTkHAWva>;jWT%?(l{
z{V8d}ATO^TRpCK+Sh*#=C+LyYk_(frDoGa4BbGhO3sLxE)!uI@07HvuF3Li}i~h~s
z&`+(rCiKYHkO7ZJzh9EQokfeBXFvhUYSjKUmFkp4>3n)Q^%<yC!8=S4ewE=ca#u>(
zfh)#z_uXX|sCY7WN&r_W%>i#PQCQMM!5EKya|}ml&>;t1r+b-w(!FBE#omOilwB_F
zOA(RpQX3kiVfIC$zdrbIEton2%ys?qcz=J<7KFF8*e$Qp^BbPfgu1LX%FTIHEFgYi
z`LAOWqC|@4-w4(73`c=yr~1Z}s)UsKikoHNC!fJQzF6MWLve`$vHTf^aol}5Dc7E(
z^REQM@@rqm!iT3us67|4D9h3FX@0hNdb$AUyr2Z{3Hwf#X{EKdbY;LyX3N_a#+81C
z#wPtRQw%6ID%`AN03_~!H8NdcLc=IWYT&$Z$vIO-Lti=eopbGaq;WONxDz=UiD~@=
zv~dH0Ux<tZ@z=b+pPCmc4;;N|PSm#{Mua}(Kx_Tv2z~=S+KS3zpY8+?>=|eek!$5$
zyO4qmwD6sLj(U<}m>)JQ8XAfjUHMLuf00!>_9%8LcJK8OkjM=kuJ%@WthDghZ@qVL
zsKz1WwAksdQOL;3`ZTM9Q1QAns(A5N^!6oCJ!Zn1T!w0`m)~Gwyh=(uejcgwl}G_T
z9fr%Igp=a)*OiF9KY&`iV_{wkkAvUwbs5yjcAzWOFA^30G4g-i2RbVpxL)#SuOFiO
zzkr*o{;<PZT)h0Ty(GLsg2ralVuiiWsOPX;f1V;&A0JZPW+cYY|M^m`Wc}-ym%cq^
zgt5L8AhI{+92c^emUI{_<{J~JWs=2-c^$G&cV|W-sif$)EoQ3Cqobm96B@;zl_xci
z9r(991qKGPEidFNDQRI=LDPiXl{%?YdyCev2X#MNymhjDrUHlfHVrfk8xYx1k|!<4
z3Y<zxspB30l#fqjHD@4lorHU}R)e3VP<MsM+~9w!-WxDA2k_(o{uT#9O%49hYC^sZ
zLVNfnJ^5;DDUk`CLfRs(5c8g<JI7aQE&dw)vXqh8J;-uL{)@KUvDs5WqK$fdd<X$L
zg+pOFpm-MGNpc*~3pAr^m7qy^izawoij#IPFXa-yI^E5@+HXl?tlxM+!0S**HCLow
z6KXJ!Y#dN%gjfb1k|m(sX?SErBKTfPE%W@OaGd77mJ&~mobOn&8JpeLe{)GSxz%P%
zoB@w!an0lfzqAUJJiDkhkDic6Us|0XP3*sN)O}IorQhH8m5d376pxQJ^$|lb-Dx+s
z@mkeYo=C}(!To~urYjZ?HvDd&$Cs=c|Mo-fl#$-sOH4!5*j2O9)W8XkH~6`8L<isR
zg{Y$qj+R?9HB2X)BVv%z(X0_b(vD_1kIqa>FKj(%H{qkDrOn9~c|+>yp72ULs!!ai
zy!&UAytK0|9qp6Q*FHkXNWYqc$gdH$>K&R~ZZ6cXtd)FM(&N@=r}a%^t|<q;@yE4A
zg{5}}dpQ+Ktk>S@a6H=Jk^wET|44v@d{rNb0}bs{<eA3^eZ>>CtOL<zm%c;^{rc|Z
zhx)xeq2(G0Yb=?h>?L?CZKaoo_5qovdd1f-jg6Q2$Bjj)lA1%Nu~V#mAhEd|8}$cu
zQ1DzL`JobL0O}{P+T!GH1`_Sp_r2>VQ}X@P_bxOMtphe_qZ7-`K_}_!6U)LCD@Bcg
zj5vC#vk5Iv`~uZTRDHf!JJpcz_b1(hgJ&?^%UkI&yTzJGNi`2%qzp2W;?~(&jp@=4
zK3e9EyVD|cD#a@;!qfY8k|&Z<Qky{bn9iVH9UrTb-$(O3c%nwy{_<k{Kb2bB$8WO{
zQ<kMA*Fv|o$9pWTX~_ks*yb&~IG0~!|NDkrF}^zS7%?@ao7-SAH^6SxQFXp`HLa`I
z=#AnQJmYP%8V!RcdggD6vCMZx8m@{-n065EN1#BUYEwfALtbwk*jfQVEiqGLnL8+W
z*C=08-r2d*9aVK>TwY#Y*<=EGJh2oW`wZzM4(IJ2R^v`0TjV*Qya2#5shoj94(BbP
z)aG5j_Z)E`vS1WmYoEmI>30`{8GsO%<Biv{{3qBcwgcSJDJjyXVAKK1dvJOVPINO`
zK_(rnA{qIFge4xVgL%^p<nk!)S4DLZG9(6^CZk<5KukYG$N;VMC*UyaiZdRY*#hn}
z(O3s_Zg;!OtQZtXf_dK9Yhr8KbW^dpU)t?Ef4$t9EEA3*cO~OL|K&s3`#Z&XEuqqI
zkfh`t|Kdy>%N<Z^Q$^SxwBwpy(JIkMS<;EoIk#Q%V2c7DO>pQ(>fJ7G2raH#BiOgf
z)==%{ciYO#jhwwVbIQ84oOcNsW3~-*iQlUM5yEEBhu7b_cRV7SJJ;aoy1Egd-B~bE
zA|u$8x-WMtN(@vD)8)a<`VKtnSHdwyG%4wHh?6OxL2DS)zTObDbQ*_$W$(CddDg2|
z&`Q;AHzs!1So+d~Ef`JOYOuYNPbg^R1sMtCccMfwcc+mLg_kP@m(17a#P9}a4Vfbl
z7dY>qNb#B_)+fwT41DkMDU=|UUBB4|@!XSjzDAIV>DJTN*pS4<Igdqyl|OwMtuFmv
ziNbicUh)7Q*?74lB<2!i3!(t>UqDtn$|t(wUBY*LFP`26$>L~y6^*y(8c30|o-x*X
zp^^&hgy?wv6fQkHzDy_ai6Jd))=`aIonQHWYT0rl`V5yb>tuVv)b-3C`2)r~^w##@
zi>;dphN(lr+FEW0pR3mg0cn}VKug1FiusW^tPG6>JK%$8uxRrl7;+qnt&PgGL=TGw
zbhz=+j(Sd-U&f&0f_pB$f!)J&Cz^oQ?~tK78c0<BAKsH(-)(E%4RaCdcUNJ&RPCk!
zPS&5qIRdmdQWb`SInU{o6%*Eh@W;y+89xJ`$n#<qx9EDeW+4ZND*XY?eNVRiMD|*;
zrbva^-1H{jPvIyrKjww$wr(9m#$zfq91k7EF5|YrMC{)VM@K{`mYHrU{aC5nU39=)
zElD@~$>pQ}Y6MDg;VS(8Ry)-2<vnqhUoQh&EJMDg1vU9+uDQSWV9V*9V8I&1%cBv~
zg_;(OVYFw<I5Sh86pE9s7CEcccj4XM?0b#`<3qlh&s2Aqxd5p;+wza$Y$>``6-S*a
zzJ=SuSWh5@`Ux}_Vn74#e3$0^@QKMhBA}p;mA)TqGzPXIlEaW)!rgs=;@yIP%?LeU
zeh8H@F6dq_-0$<FW*;c_;!~ZLkj(Kp556!zc7D&4s?eXv4l(dc!D;>Oy5A(W6tZl`
z!_E!HadH6N9Bt5__<U3z>awooagnX123`<={IXY)<iLtJ5JPvm(s(po_zAIWc4l8D
z9^oAzFP)BDPj@72%!4@l0exAy4WXbArsEt4*k{KWa4HxXEKqcQbJ`hswGKq{M-1h2
z+=Ox10o4jbPkp>XKvt+YSZP;1NguHtgeZ_3x+xzdFKQ~s$B>tp4L)-lUzB{{y@Bb~
z3AJ<1<h(-v*7PB*PV%wzhhzy~HJN0VMBoSd$#Sf$6123;?A3TE%#q`#(X@h+iG2Fb
zHlsSuscZTatRs+2fhZ7iNBMAqiG$$2({~9U1N*BB%neb~)J+{b3@)cK&vrYgKR=n!
z+5{sDXS<@jxIQ6H*pILVc(&;!Wo0>wT!4DuXEL{~c&u-rL+wUJP}c<S+BffqcE14z
zv{@ZW%o|B>ZGc=(H2xVEX4IE22PLTvI_Tl_r&0OvA$@b$pxOT9D6ywJP>fH*Pe<dx
zFdhSzDH_7cUfO-=-bw1&xW=7nI~E2)N^WE%<|Po2oY3#${W-zO@5^C=#U~da-V#<s
zJpOBf-u{^P#Tu7qhkIXVviC^rM2L|g`rlsiZ|xlZ8^Ev*G}m8Pgl6tD#BvYUve~FF
z-^JDBqDU~qj^-Cqs$Hy@LtAbWId-C|${kJ$=f+C4TbK=p1=dMG)RV)L?ISP;`O)lm
zv)hGDf08INu7T@E1S42>mx*&T@#x!TC%fN-o-0=Int@r)Slz-~i3CdZ4!e>|0lz`S
zzRD*(uvPmg6U!bYJ$piSg&M#j@dHqjQcEJb$U4ap4iTv3pOnFToT*z#K<f9NK<qG8
z4L{9#J0#OU0#MM_qznw1E#T{BYIhJ8dMwQy0X=`5UN`%uAO`&WtmG%PHoP6td(hs1
z{`3+xsX7O8)gV}4obeYm{8RRwCQb=IoV7iTr!h)UMfgRPsCpDIq$&pR0DF4!yfp+3
zc3e0lGIVx$1KBokJW<2VOff5r)P@GnPA9b)cOEU<vwiTxV<=*hrCyC3Czd;|gzae#
zzoU|!*NTpGHyIV$Chktf?4Sxgc4L|DOB2+Bbgite8WT<%8XB6S4Jet{aWolNi509~
zoSMYAO<Ar1x@GK?`CwOA5I4-*{pwUM2#4XgFg&b0Ss54u+j1Y6gTb}AN@NT7(5I!F
zdYJ2ZcboW<r9;EI{W%mG%a1ik?tZ~Br7MOk-{g8$EK&2i<~drCRWVBi>c<FuDzpYo
zKOTs?G55{v*+y8V(lYxgjVntxOnNwXCy3$*5a@rqs|aeVJ%s|(g1}{l+eDS>Y8uC{
z;ltyLS^C1c#XYIEQZ>>M<8X)nhv+{*zzhS;ZUAR)0AEHe=RlT&K-|-o)Yrv1DO2dJ
zuKND$qOr75UvgJpzUKU~A*P$EoW3JCrXacZutk1%AdJ8!GitWdII8+}ZCMSF&KME4
zYJuUG<IC-mD0<uMC(fz_CZ*6+%RA*BTuYp(C+!R62E%6{XG??P<3x!f%a%oeNWd+A
zYpeiQp10stuRLrCC6mw)hXML_kpC>18WvkSU2UFZ;AJvdN)9AdhIRwo>rOz^BkAW9
zsrv|x7v{&F@Q|*vV4J*mJ0RkBb{uS&uU9mbNCFgR?-2L#3%{2PvWAkO^8>;!qmu{2
z#Uy^S_M_Ifyz^momahrSeTv@@qkr(@`lI#;k+CAQ=}J>B-v%LTiB~bf5v2Ua{C5|?
zEa7QN!`o4!j)b?#V{OC=vxJ5KcaWFNg?v3O(U8=C7{V2?5z3O&f!C_hT|=E`S~(_e
zy;-awozoR+fIb+oJ5yuY>$G{(Gmm-QF5}Aesv{xsF)*Jk-Ky)tLi*hgR$ttC-Ru$9
z-PQ+ErsrRk5Bjlm*_bpFmJs$9NrIf)Za1YLXS`%%Yr6>sMyaN*H9u~UJ$}V958hw*
zmKR#L5IioCe50a3;?RWJn)0{+90?1M#3V9*fY<c6ScQ(8mNbb*(a7ci2i7Y2Nbygf
z{r1zhS#_R4lIz3zH-3^Ozg?99FmcWAX)ZCpa2G7@hyHJia&KSZ*na{~`!^PG^*@2f
zL=BRjXnongvsoU)_vp#g<c}PjvY%Fp`gaM3nOW|c@-S*K{`^Nz*wkJ?reBkMoCjVo
z+URU+IZ>XaA}ad$vrc@3e)Bbh@bgf<64uw0KO#uQwWh*74wl*#-@iwHyT7R7e0Ouk
z9+m~IGEu^OA;q;j*H9y$>ajFvgk{9%qP~#1b3=YuZ8nvGuQececP**3q7lmz<$MFi
zXS!<|<@M8|td0*bFiu7i4@<Z0@fbI(Itbrsftuk9Q1Q&DpMaSDlW%N=r^@vk&$UC^
z>kUgWU@1FXre$N!^S=upDQ@izENsOc#a&Y5Z#BO%v`g#qpbg#)XV@h^OU~vZM@IPg
zx2QvU`ur6YKIl6oI2Nr1G(Z7iUCcTvr32uSf|5*$z5_18>Wt}!)N(`BA}h2#y6$h=
zS$%>$MuVAFoprv^LYABw#dMZKayOWtTU?4qQL$z|*IV8LbBfTJ!9_6iRxnzGN2Mo-
zT|p-512VcYZDe-IHG?BgXyYqlgf2ue4>I?5-M6P+pguN)KPJ$S^YCak7U}i`8ipp<
zdOW%+i@8KlF6s9T50}^Y(I4pHz=w1eHmA}^IR`9?@agHL;p=jp&DvJ2_9>UZq(N5A
zqLxm?3woH-I!5#5=S%GIPhxypei1^r15Q3enX+jDG5G;v6Z~hh!XUuqM~7kk)5G{<
zdvn(~OhL<qHw$8484eIn=?iCFdTabwi{rflqyVM7+@hg!(-B^dpP79Qt=G9J>a!jA
zQcN`Ye4lbQ%cfr6H0}o{HGNPWJIstV7(}?~vUqA%vv_!znfsgU`OPKO*q)$Vls=y2
z+sN{k?sEO!4Ip98x(7w}<{CD^fKLVx_$zM7_Ozih8x9baa0YiSrvt@Tm<;bO+q`n_
z7PbVTbA>yQF~ijAX{5Oh8{fEL+HZ}fXJlkZ)-86SWtAY}ir!Cw8eN|s#KhuZp;j35
z4<i%AME|xu{Pqy-=Y5T;gFMoj=6RdPKsZrls)ES`w_r7AKQT0#r(oRb8)yi(Jzhki
z=CIvCXn9;zoT9=>G9sEx%PM&NTiAS$$8k?1FA+}{T4|KdK>K_v(qM5*LBbm^dGw#7
z#vgEM9TVm=cG#JG4aQmxYF{j65CaT7$k+>00%S<dNNFd$R$?Ca{wafgn-0V0gZMEg
zK5F=f*I3{9*V9lWg~;UZkv!V+a8pNs<n0Lf1Y`?>aL=9=AV5&D&CVDCVZ|_}fxWZ$
z6%ZC_Vakl;ugj{J>Op4K`Ly^ofu$$R1scpLUt@G7pU?jrLs~S8+P){67Ra@8+V2P3
zT_0+49FRZ^kln#l<7lB;#VgIAGEtI`p(Om!qj>bNKo0ZipC2DR^B0_Lyo$yntTvqp
z?Fc27;&(kA<p{N$sV*{@jo?}Ag7k3Yv=^-2w<nJC&Yelbq1Qr}fxmWk%rL_u_FL{!
z^%Ba!kZII?(T3^!gItv<a;n16bf=v#2dqIDZE|&t<Tqff{RQs=hUJ=cnW@`cmwgE>
zer}!a&8Z(PVfgeb$nuNakEWL8b85~xH9mL8WM!;FmEk{8Op7d6Q0sYD{Ah?X8ogK`
z{TL&lo_!HxaN+}{>SxHj)rq)-#1L|<PS#XQ8p<N#KPaX)oOe3YI;`O3z@V@6U_U;W
zd0lm9@FN{^`2T^uUtxT`OmxlGN%j)bz%c5bHai>Ia1^jyjmk|acl4`XNic(w^!}Q!
zNqLIX^oDA?<%P<eZdb&7cywj`idDApvs5`_45kkb{%2pO#Iil_Uel%w{5&lRSd%wM
zKa9{1(#v!@h~6A8BIHgPFcImZK1<>-&yY#sQDV{lL@`(>eDDGTjex}oPnhfQ;buWE
zP6;l{XxBXRJur8CY}fR1L)X|iubL>i6uUL7MSnRMULUCr^nD5dMEMH3_l_4Wo7x@A
zHP#4Em!#7Wk;?+RoSIP!UYfoZ<yx-#G7}#V&+Aw`e)cn6Soqe0%F;qY*SUbp;Gs|_
zS-9Rk$20ZGk9qbrQ}OPKq>=}vyonTKUe}<Yv*?~xFq}7iOs)Z6yZ(d9&tVK>@<?a`
zLX>CRI{C%raSQs?M=hs!6n+)r);T*XP3g^t*tkto=VfDBN;FSa|G5M7XYfvl=+Cw`
zAR6$8Z8w~09W&>RA+o6s1WX<Hnw)313wZz;Q#?FUp){_lHRGnEs>0zWtjvhU8u&6g
zux<l2d;iaMI=ldody2uc`i~C+I+=nqYSHu2@y`1=@$(Yw^}EZ>bjb;g0+qsY=}r?^
z<dGOcTp8J5Up4u?=X80~wj_&gv~04gq>P7(LNn)CP_vcY<&*OoO+;AoE}nGR{zjAS
z9#C?=ezOugyhL$la=0-~H6|sB@^82#fjdfR%A62%f%kroI0-W9f_K0W)pBK0<%YVi
z?VoRXuTVV*OIS&94d!zI$J!q1&Ud!c!?$HKUK061bVMcFnYnM&Y|UzLESMCkLS5?U
zHuwa){Y1!Q#sfdpU<%cFTeYILS1uOAu~s~9BtF@mfLEi_PB8ztq)KzFNXxTpm&BCK
z<3cwkn`#d~I3s|!22ZUYpxa;Z1uLs#y{1|%OkKU$&`U$TrcQbm8o`#D|1ol*F9rx}
zsDr87)C6Di?Y_Q=toPU_KP!5^E)ICtw#aX{NOw-d5%R8P9TJ?Q4uKzhEme_h=!0CK
zq52&JDkElME+vE}bQWisSDkmu?W$X0&Hogr-^9P|e5ffSGDi2O0eO@z7;?ZS{PWSD
zKTf6xf4CNVd#c<OG6en|GJ24<fBcBW4p~DPgbHQv?KMVE*?5erzlUx$Z;jk-L#s)w
z<<b57$gBrNd`-(G=C0nK&5RB&-AQfp)Z)<6HoD;_DO`w-g+ZqkQai=Q!edtlhF!xb
z#}aIo=dJlIWgj~%l|3%0(u+3^W>Eye^bM8oWd!AY9*ewr&4D=j2^>zoE7QbV#_4~A
zB7ZRWdv?}K^gkk<L!Y^dwXm(kXY{~`%V<hnH6|=j{k9i2`AgQ7dWYFEgAIZ)2Ju;b
zMvIvTKxBI5UO4kT7ZZD$?MNKGXT_6>?p#Ltob4}XDeiF3EKZd?a1GJAvRKVz<ui2i
z`i1@Hc)r09ev4){tv#Ejetb`Du!JcG>uglcLe_%&ZhKUe0llF!a5c_Jy1t0<jUShU
zvofFTmrah~5>Jj;eI!&2(&i+k7lzN$Vk9OIjk_U6J93&%3mGbHm3~|ox|ty3S?Khs
z0Jahn<%Ki{Aw1K9mtpi(qyWca@(C@3fm(YlK-(G@{Rw<5253-$cng^c;V%jQm#y{^
zIz-3g;dd-udf!a+4|i!FWw?Szi#QqdEp55pAn$3)n%Q`}M7d%DdHxPQ#kvi>R0VEG
zZ08z2ni$LUhM0_0LWQ$RZWmVF=?ggXzZQcT0*rO<Eg?lm6c<30RqzZk4Lw*e@100o
z5?<9$kT#dtCzJ&o2urFL*M`T&sM79pk%Z#B|L!eO6{W<VTMbgJREnCv&kHntFS#tk
zD3W(6E~Et$4%Od0)bA4~c^*w@))S@^KeScaFS?S=qv?~u;g4rWd5`Y}z&Y9Bp8-i<
z+YL5V%ts}`sYsJ^yuyA&fbKC5OqyY)GyWIL^~XgX4Z6pRhDO(@`n}Uc`y5@3np<wx
zb)5E_x(W4>7`i_ed>~}?O}0gEWqFhSIfhqt3o;?B2dtM8JBS}j*6#_891mfsKM>tA
zakdYWP>7Cxwyfv>{^4IokwDTOOrT70N<t+B6tE1@=wG1Wx9kH;wo5}FOmu&2a3B#c
zOwsC2hSgymFbIwEkaD);S}2#bW{jKOX#9h{Is%G!7H}kvyka~hd@rS<%RcyGbCh(+
zJ#&FV>Wm&I1-ct#eO!^6D?cpHdBr`lEl1lMLl8g!nBt59`VJ+9d&7`K-z~cLml?^6
z@JH~<_YK6w4F3Pp)xbIs#I*PP2P_YYpFg~q+9Pjzl*_qF%*FLauoDBgxd-A`633h^
zafTTw-?8D-=%FzzpAdw8D_!xp)!0?So~mk%ns=!)+x)pI_uu~$s4^rTq(X+WLYhq^
zAAq(C^R4UOs9JkZk1knAetC}x=+QWYD2;yfbr3z6?RpYpOagTU@+1w5J;q$mQfz7y
z4PK%~fpcK~o}M7;)ucIjBGRbE<-!$+6--36vB}zrC(ot6>S+@VdSz9dtc)yYme#}M
zxBP%xmNUdz&Zh;P0XvodsWK+eo=GN#KR1Rq=)k`rRL;68F1sFX@kn2?PgB^eR$7^F
z2pRq8llYD*lZ2bFidtpcB0ABYqb||@tF;#TzkU9{88H4^vU-CVF_(v?mWnNp@m^P9
z8vKMPEs%HiA-)Xd%g!oIjf58T75<QgJAMME-+{+b6>VtLfu}rb8XQsYLWS>@%Gga~
z!=)V_GL@B~`!^?%KUhXCe#nc#>{>>(7spO_VgKm4+W~xxg_d~_w0}k_8`D$$_2(ch
za+QMOXzTIaXd5%Mk;{KHH{$g7;;*tbtjsdNr#k|KhOUZ;$G$($)?%}~O~v3(;8EX7
z3bQpu`juJ{Yd9=n%T?p_^z_UsC#xd+a}h(#X>#5>VNatkq*d~BkrXuf(O*}(7X~}<
z*KvyYUf=n2LO;sT`zq<)+f*<*JU!)out|6Fhli8;gP)rw%gtrm=`CW}ou4dvpg;`l
z+WaZZvx>UFcBH=#_|H$ov~NH2Ot0u<)|Wnv*xEjpQ6Fds1dA~dCj%bbG2XuCKX%1%
z=4-^pSBLh5=@+CNU>Zcyh!O^X#MMy=E8dlE^<ejN#LhF>^mABxp!rV-3VXn3h-oQ<
z<@~mqWx(m_ZBC=}+563sUb?l`#f(vzTm^s`u<hfCrF1|OXe`4F8fk&F;CYEwn-*Iy
zGOGx9KlvWNdfvdV?v*aC{5Pobf3F%z1dQTQjoRrjbC7#o4Nzn0dnZ2Bg1OW0U*9BB
zOUgaYdbv3VpfO&fd4?1etx<*z#24_GAhM&QJuM)AUl==(dy^!_t8OQ&&~_xkXV~aE
z5S>dU`*Bi_azkvP61&VhM{j{CyWJu-=^Gr0DH2~D9yH{>1i%nrtB$`f0fHL5xX|wf
zaVG?hsgR}JCDV8epLcVf&C>Rd`6Y}s8OaQraI$LUD_<ISN3rHn*q@Mf5Jx;hBS=9E
z{l{GYS}!^g+`KF`yORCDx?wfU5`P}P@ekR_s&5|mE9(mmFy2BWwzNtY;v^VOgJX#;
z-O|O9_0c%XjtGUEZ0``Ii9NSgsLSX|DPIKPc3HaTUvFNazR$&V0oun0@4oH0gzzrE
zyA_L~<1;vq)9$EqlwS+a?VkFgUuBjZIvT?<JtH*x2AS;z*7s(F%d>silIF{N_b1_A
ziC~*d_wk~iYC9QRt}<cd+#a{H?K4YDs;x19>vlR|(FZ(`@vT*pbl8uDJXfWN{NXEG
z%iYEtNdeXupe562`$--0OWtwFC%eJ++1S#xWN&-`ZZN$ewOGa1=(uh<DO^(W2#P~Y
z;#8-?O_*tbfO0Sz!W(ix24O?L5*ov+j?)i+B43b%x3)~C1)kS!qvz7awVoDj-D7`g
z{8Ow+`g`u6iTPUtlW}g~cwt}k5%SOfDhHm^af80oue_q7YO>aP%Er`tI+LCa9!##R
zI2<KG5o0mmUpyJtkJA)wayiKW!%m+m)z{iuO_eApso`dA1Gr1@p5G^?Pm&-4@6<eL
zX|(tvoUPJ04|#P1&A^$*&Zi%J0*CI;;7uK+Trv#`sH>+nZl`)KS4Sqa1B%k*EVZRo
zqL9XD{oi-xS!x%r40URUl=+r3w-|GNpxjX?$?ah|L*1{R`~z09{HdONx4xL7BZ5lL
zC8Qv?;E`72FJdox*&IT(#sqT8tRm|GNahZDTfDOS|3_l*Zsv1To%sw$Pj@#`uCcgz
z#+a<Qq?!hIM2Atk>b=r{VmV*wTvPv%Y!H)*|1<A<0Tdig{zos7djS-}@ME2SE5~b9
z2ky=#dHE-+!8^`fkMn7w18dhYO?Y%U;zhAkn#K2iBs>yq5jK(Vc65d;vl?D2Cs)rV
z5LLZQWlvU?dX{H(lzV|{xJ8ke93S$+g}7Ch!KH90stI^g>4Y0qeKp|wu-yCb(%kRE
zRfqYl4o~@g0vO&c=(V$|{W|V^mtQ83h#X7xT2G2p`7;<p4IuG2|G)rNkyNw^#v3g(
zyW0U&Y(xZC^Q~@YXjN8${r31!Ujl?7;<>nZ<t0*<a8@s-aj<eQ6+eGU+sgyM8NDTN
zpY5&rXgG_8`Sf2Mb3DS@Jx$+@nq88s$ZvV;zeC!J?PCC()mrGw_fM;tin+FC-*eEy
zYSGMAtNQfRDgwW9_x-LTI;mX$Wl55^e}F9j^}Ieyubb`s9-sa<lM5|jek(TsEkpnp
z>m17zS9R?{GA-<^PNf9Iz9&_k!8P7#pLSkTl-o_&*s*W_%B^Of;+%J)IN6&^0>g4r
z_bZl=+1Vr9R=Oe*+0g3E!Sg`V5f$HMu|U~lqobn{!>iN83!J7cR|3+@z}9R$itjIl
zz6pR!oH8taGiEm_Rp-7y6w}mKqxm=2tR#gUZJ>itNIcLzER}iCOFF|}eKfq(qAaN1
zXy`r@{djkai<N*I$Z>3b_lA(5U3EPtrk1^h$!>ZxI5#FwDLWc3rbv@$n$BTjaCkA0
zm}XG#2@<};Q4D?svbtP*$Ur_h$U*O=v9NtW-o58=b(3l5l*=k3t+~q+hgo}APvsQq
zyr<LtBUPaO*-Z3*N`<*C6^)l@AwCi7C7JIpxt)H0skWN|B8DwljhJhzX+;c7GK%;z
zKUUF$kN@fy;9O8Nu)A5FKw^LVcmr*6sd5MjU|f9sfllfqATER?Std92*tPOwy4E5&
zc3W#zL{gFzA&1%Tf|Fv+2BX!Un8qweCBu&4Vfjn&(mSKxScaU{<~Kh0C6-BB$45nB
z|MZ=0l+1c2dsHj(v8YL@Nf4{)!}<Vj$>665LPCiC>Dk{ukv7ylJ7Q#-YB=c6AYQmt
zJMaAwNg>#h*;8iJ>F61|Gjnc08z<GI2kHCu3hw{1Zvm1<7yN2>8jHuTk|?kz=__XD
z^V7R4BKw6}3n)E*X=}NKNGzkuW$BAqkE`AMHX(1)@O)t8N`e<FQIccl-gTFw3bJ|k
z?3xRZEfg50xmQnTh&+oqE0DWqh%M)sTCU~k2D`enOKfwYxiZ9-D^&4~Cm&<h&3{E9
zR+Acch*F+Zj>sr%j}_#m0?gC|$dkr$q?7XRH$H`p29AJZuVj?c1i<@NrSXVc#h7*{
zNe`aq)h_1=_|r=2H5NsOhj!DAdiwf#rsIWq!!0er?>p(;4_X}+tbr~<2^{wZ!!rl^
zlBUzd=|VL~71$uO1s2tnWDf!g_XJF$p7un360DZ5cE56nS-@0)Kp<6_8kxJ(RSFB9
zckVI#t(BRhM<|P|(KVKU0RB=xXQ36bb<d;UfLBJZ#uY?Mbf_7oWcS3Mpl_QB*Lr{n
z)h(b;`f|E8Cbc`?)G%FPD67dI(r|4a?h0G;Kg%EPQLi$|Za!ZMg7S0peAVp;mZOs0
zVlx>PZ{U~;UyjM7`AA#>6U4je+vDc=Y3y0D!c)n!k;Z*15Cso(X%sB)iK9Umrz!==
zxhlTp*VEsOx1lKR+>Q&SaT9r%+)etwN2~akUvq8<m49Lt*tP;Ij{d9D0kp72Up*lT
zB+_GH!Rh#kzOta#5sl@-ISfV-n42}JRTX|D5^IqhAyA0;9nUa}ltqsZAks1Q*DZHf
zf+zLc#R?Y;=aLtP=+z3jGBJlME8sXMNLG|-Uk)NFr3!M7og4vfDewQ$byfjcZQ;6>
z?(PQZ?rso}knWW34(Sw-?(R-Ox=Xq{r8}j&&P3PRd+m#J$A$Rwk2&V}>V2O9bg<O2
z09=?9()dnJ6LtzfSkn`8;<7G<-hlUk{3_qCyMo=jPaedg2j+{N)g$y%m(Ao*WeCo6
zBnR8{Y78{O_+YBQLQ^w!XcgJrIf1{CZ7c9p)Y-?X;Cr}6#V|0%t;BD#*=d&j^ogB)
zZ02ju6={%QQ=w-!pnPF7T=WG2^9)nId9Ud9ihm6#aIO-2AC{0SlnP}O05hPIGa%K_
zZM*s!|9YAAs_b#KuVCIP_;Q!mm(gZ6GtAhbox$f;otl+Z$qpc?ro-hqSDfM)`aIyB
zTf6r6EYnfOfWfMgp6dpLZHFqw)i}OSmZaasc_$4r@N9&PpaD4+{Jgp(zt@vmMGj--
zqd4ctz68evI)e8>kxkVU+_Nfh6smQBmTZ&T^)vAtR`|uow1P7^{7+`gi`EzqT#Y;S
zG`{Bh_KXBP;mBwKg1o`krpqy2S_Cb^*fa<?>2}FD3K~X7)e^1=QP&de(pIUT2$Zwe
zjziQ%#Zi10-*Uvn9Ht9DS$x;5D|I#gI$bDYAvTl+LNZgO#wmUd=?={DXn8pDT~m0R
z0Xjtx{-qI@Vf_*pDbgVn5}{Q^wp@i^{ZElTE6lGgu4o}p5w^wD-NFHoC2tA|jIvB1
zODlCnVYgXUPw?o)7I@ev9X0zr5_VljqTM@TWRi@+U+~m}CRc7GKXu))lc>eU*p!}*
z&p+&e-kpHm=ZW)`XZ7>dCHvUGGH~(0R9V<8%rk5Mnkni(v(fhwp9gB$la3&9wqhb!
zPsJ+KKE74WeiP7qo&@}l6V7aOwW=HQK?-c1E@mkUK&?-9^R#5`#Lz+w?-iFSq<zYr
zXEZ~~Zd=JAoY~61T|WOacJKm!bt{3?>jP`8)ncp>Bb1>$`lVNyF%pvZyH8I{Np~O}
zRg#^xvlk;gkANZ|BuOv#jS^U%9Lf8e%Rm&J|6xCm&Ev-EyDXQrOd!-it;2D3NBD70
zx2{4V|ElNZsMGJJz0|~mlu7KGRkdtVMg-NJW;o@w2CK<jv07lzZ?NTS8{@*Tyr@(l
zm4X59M$K)I_bvjDbBy*C&^wTM>>8PxqA`9K2TmV;KoHokH8vbP8HhDh8i7GVSP=o-
zeWU=yESmTW$p~EbP}c+QEbaF<g(NrGBsZv~ihaX*C?41-9x#*4{rFq-NT#i`2YVP}
zA;2N6edIV-uF+?e|4)eo%p7BMRtbQaqaR{|?ncyob(@K6;B+kC&I35k`ydcdd&%_|
z%Mubn<>u`8{osDnDMkt@8pFzhc3G&5@5N5Fr4y;1oNXL#m;}rvmZ=!UQTA#|g1wSD
zGI$P!5;^oyBgB_PelNHrME`aG-WrKt&|4(bh>=l2YK~^nV!uMGx^P&WB<MDK)p6=f
ze>gbBs1Em<QMO?^+3{mxguwh!YIV$j^wV@CIR^*o=D3830>Wy)Xu`s6D#D6p+(TC$
zIH;emhB~Mi8imS=)S!zZRymdpKlQZ(?5WLXm4OYWmQUYYeF#oAoD>vd^sWt<O<0ZJ
zlxU+|j?dTgmT3cNvoP_LbUi{;;+gXP81OqhsNG+!EGnUx-On>wmP)V(JR`oo$tnCq
z=qe%@be>+u5b|lNzC^EG#K)s=ww0R!#0T%AkHSn_2&x@{a9C3S-$l<IupUc3MD@S5
zHtbcim2%O5)`lx9-#_v+HA$VZ)tG|g1a2-<r@5CNyb^wiuO4HWmzW<dZ$R(KRB}4*
z_WH}w1KscE+V*fupU<vq0nbhZA?LIFA<633xWhS@2?qwtO|7-#)@Fe_ec!aCIV$F`
zC#j85E98sU!WfNUn8f}LB#aOk4mx-!a0{7tx8e~fBEEs(>)eJUGT%bEKx&G{sz)@m
z!GYd}^20CKPf4-YdJ39Jok&^%Dw;+I!Vlj})*~hl0LG?70z<DEtIvz)C~^1WM|og2
z&R+S>ZGT8GsBET^p&L-EJfBlsfbhS|Z+C_M_65OqS9TKuT^4W6JAk{v;yCxTVa;1Q
zN!uZ0&8C-EA4r^g9;cG$Lgtb`@p2kn5DKM!8GKVCw8HRb0P2Ds>5NH0<ZD#CBS^i)
z1u!F@)y|X*AVgQMbf@pb7~8X7o*ymwLzs6%X)7kC09<3{?n;kzmiZp9%kcyNqXL8*
zxhi}*cq;L5(ImHW*~B|0K9uFSl~t!eNSX#Dbaft@zim^9!2edPJ>>suH(4yNU2D~9
z_D*Xg?Hv#zz5&14o=$KM{Ww@GH=b-Bw~<ZDPx$b8O2&OuzCAtZ+M*LO6(*f?DB3o)
zydp}%beeNn-k{*Db9N?!-y)K0EgOplV;%gT-M{(fXRzlWk*6dEso@mZ>~o}GKeD3H
zehTIA-MkJP0F=-U9A-FnLbDj5@G%8Y2c)Tfw+1;RqCv`s4uGVLOq%lzY5Jz>upDMP
zz}Vg}E$*^#GkLyDi`%wcBx`GTO&<U=e9p1wvUbwwBK(fuf}POwZNpZBby6~%;7#gL
zrpwlM+~AnyzdM?SW=l~?k8}fIk<G;^HBhYpa95AV^4&~@r>3^HOlzg&&Is{X`Ddz?
zfoGCcre=HbL=AxLvd4yCG*}79>CDBegAnkhfRs;sFWoWHgpa8d+ZH&98$RZL$FHo&
zqNAh~XKwN~vT^Ic(QI-s)^;CXj}Z5ovjg|rN9pv!;-8B-G@lqQ^B5;uz2?=s=iaIb
z<vxX5^Gxpt%&AQa7f~Nhl^m$#wyUcOdSqVmIn4?+1YWm?NT5Ek(q4|1RUoZBI0)Dg
zb1P|IwMrDAY`Gb;*6{dUG<sR|D<2N=%<=5!sEl_=`rsX{@qfkT=T91%W93ADvz6W2
z=C>Zu(p(}V<>t02KRiin`@nz&;M2x!R#9Boy=q8eRHOR6`yKHixwi)g44BihCJi#7
zvW}-jcUXzR#+PY8In*6WEybE-Dh|DFcf@mVAK*GuvV1d^L|&YdsP%8X?*MX1>k_&)
zxkXz4js<fh-d--H>Cbh1J`d?Y?QsPtL+@zkE_#2=w0n6tG!4Db1x-_Yh7t%=jmqF`
zoG0XiS|aU{2&5_6h}3}Bx;W!S6>hD1vseRD*&-9y*qx4^ZgTl(^5vc%*cLzhbFK!{
zW~YIMLZLIG2M2-2an&mM_16UK$sSkIAi0`nlg;xb=mcY`>+gh>tGD@*ab`ci!Z-n`
ztEI+UR%~+C0+7b9Y5uYQEW~$C95n5rP!l1fWj?z6YgVx0K`%xCe9oOpGCo|>3s?Hk
zNIG&`_rcG(5+)PxMP1YfBkRntOKQ_2M2|!;&L-;EzG2}d4|r^O4zpYw^6rz5Y5g$b
z{j`1iy_RUVZhC(hsv;*RYBX0{=S2sMnv_1Uo^!OYs@8vKb%=)1h%x8O6z(*a0K!v%
zG8W)ydP`u16j6$={F0SP`#+!j=~Lisg})o+Dl=ep(TFVne1NC#m{{LOJ{n%#jfpEF
z>Q?LqNIRolnf8SfM~4OmCR$CdS`Q;Pt8zW^?&c6AGArSq`Q!yDGhN@)>omv1!opr1
zszd^PCm2b7{;$*Gws<4V>2F!m;)K8icG)!TGV`#ag2%0@o14thSk@%Pg!;%;{L$|>
z>B<dz>DbwO@%q#D$H*Qus(9k9N@~#qxf_ufd)vioG1m5UpO^cDnP^9iLz&-UKv5Ly
zIlOhgRLu4+OztO+EfWnqHk0q0!qir!$-BoxW|w>wbZ#T60mn~Z5U@{Wc$vDLUD9g(
zkiMdGDN(!QBJPc8cX&(kPk3{;C-85z9Y!3P@|^OO^eXnQ-SH*^6kTnTTQX>LaAY12
z56{NOe}$>IS2er0?^u0p^8Y0Mh3H(qBZPJT;mvM(bb0V0asFXu?&_<q-bTTaHlhKT
zz1YA=_Vj((^ATc!gMe*L%2*vw;Yj`tMqOSKY7~y^=!Ymy_i7}d?!HMWGn$wI!a7Sh
zo?$H|5Y^dhiz-nFzYJCJQ=R0)M#cJ%XTl8&kpiySGN>5p>O2%9(!YWlpPfH~ngYJN
zd46dcjI$8FbiEdr@3tI+Q=eWEdEq2<6A*`3L9a<Ofw}kjam{!t$6B*9?V#2*65p9`
z17AAFY9An0!tro6<on&YwVz@tljMmgLOR9ABeFy0Z=6!4Ql_eMAbNuwQ;<I*l3jJj
z%5<_=6WlZWbB(?DmUK8|XK&9Euk*;Q4=4uk*ejU~GxLqdEQjQisHM~1<T!@^KRFIN
zSXu{jkYknxcZF1_rW5K|8?wwi&@(Af{=}CIoRWmN?4?UPqqKG1Fn{;%6EZf}8GR8c
zzcmA%3GR_*Rbb&;w>7{|oDvX$^)ULKf3WJv_9?NsXz!&O_QgP-Jb(N$&CfpTxy(+6
zS4imTNfB7PTK_NYB9$-9<;Z<|TmC%j(H#AV@2o3lM)!grmVW!v@tS8TCW5`yd*SHV
zdU`t&j_MPmrHa%uSDTS1_Hfw&i``%E;YX`C#XFiu4|YbzpN740I!a?5Lu0UZe_}^#
zf8{v3=K~!<rbk0P)F<aN<0zaGTI;(y_wUUEs3|e<!AL}ffUbNJ2*)4a+L(Z@7Kq@_
z9}sZaif6vZ4}#qS{RG!G{T|*8K%bKbxE4m8^svlHZA)!eTNi1iZ}VX!GS$Axy_Xl<
zEFdxT0E8<CB}JJRhYat%pUx#deR1p7O{|*|<+8r2Z|{HeVJRr=kOy_+cw{q6WwG?V
zKJ5vzc|YAr4Tlfn1Hn<674z=Ugb(S<2*4mF_>{quz+ttpj?rMV!ei}L2|&On(@bUV
z@WS|tliXYZ9eDAN9}~Ym@F^Ij(73HLAT5Fg0@Ak0o)EMHpb<02r+n31P*A{TyUN!L
z+(<yp%BnP8;Z#m7s*_A-{dVty#k@C=1YKL5Q1vAPDoHbGD;?L3z=WY~GQ!FT8Y#_W
zK%3otqWk+2(0EJ$juf1VZ{KRW)3i{!U~`C(qX9FOuj^+0g!3E>)`jS#QW;h8l-i}u
zL2EA1zupW2;$LG*Xfn-ad!&sCSl`^6i#zYV+*n?Q>ha)DT4<zQy-COL>Hx`02O|RY
z%9Xg)d>UIkS)Z&64U@=jG1JiU{~>@tF0N3H=4ZKb*BW2r;l7A-@8hpN?vjeO6;o_f
zVX7?ekB9?R(}G!btdI&ImXl<%f}bq>;{swGqmsp%#o=rEDb)SL*sWT0Tu0LF^0XkA
zohasUT~<+A`kxVlq4tmrJnMUDBTWLp$suBHZm!7()JS!lL&i!ri)*5ZcNRd|0c4Qs
zq)6-%QH0;y&KU_o09r^61RN%^PovxIA`^$zd<8)S0dJx^km6f=GGyWyEXN>6^5t0>
z^2iG+$|aU-RvAFhYd6YY?s6ZM*L+;k2Fxi-iW1e~X|_PlUkT5RH4ecH-v-pitDAOx
zKx?6<rbe=rx&8IUBetM#47f2P)_&+|2>;&Q%r$`-I5lcqLzeBj5#Cfwhk%-K*Xunb
z9`jcnkCQsc0}M$Y!<e06BXefv-vhDkK&d~y`|&Vr((dKy9zz5)UpyRJxU%&E76FeV
z9N0Gn_Q;G0Ku0JkkxoMdu&}UOTSJJV&>imx@LbQ7jz{HdC$hwONZw4GBb>Wi-yNFI
zx^ZnGG@JtU!)P>>c<S@xZ7?n5$v?h`51)V2FP^sCBe-}654QBf3zDs9D5`rRl=Vmg
z8LOWwIn!^8!y&=_QyTihmiNV5)UC|nUj}PGKW=nTXEav$*y06pT*R6x1}~%qy~0oC
zj$E;jjwBzIATSQsM_ejjOi+e~jU~Bj%I{mv?V&xR|Lbcz1H7GtB)-1Z(94~v?Yns=
zN1on!fQ)Ad^4$qks#P;VSa!2C^=sas@>RE%zB8kAN=Uy989b{#wUHUwDnj*SldI-H
zw`0+?ITgDcKvq}!mEQ(5h*ZMC0cF`~pic(WzJx95s-VHQ*x>6~H~pFXh*i}{11n@C
zTn*3>Y~UPcGK}9j=#eMYCHo_da?<$CRf|kRso`Mx{BZr|i?9Iv5%L)jxM8kO1E@yT
z>5G8jiKte_uz$0FTvsbsD~gbxik*E{9iPb>qT821SqXc5UYxx&SOp(=9S^BQB6i0z
z$|MPSoXiH+z5}*Xb2?iLiI4o>0Ncsg+dL2I+o)`?tj2VrBK}if&{V`q;CQ;DbP_oS
zp2$>5m<NM>(NoTm8EllzVh`s)rR4>fnv!EuSi6D54UozdCge;(7e;wh<Sf+@T>bjW
z`Ed?@u3Zb_*Ej%V@X@tBxd>q$yc(^xpPz(&iJgn-=h8HaY{`VI5#A-^-fOcEcQBP$
za5SlRoyJ!QOZ<V@Mu;u)Noi_71BSJh--S(uust$!r&$o(0Hb`Y`0wy08XLUahd_|I
z@iY3lWao$@MLwg>Bz_{ciiI9ukYdePjz6PrHZaf#I9#1Ar8Q1>_L#XzhLNJqlG<D$
zmPQFgHNv3`F(JWsGjSkeloxbQbH6^kzc!mVx+Cm1H;mWUW*JQ;8e0g!F$AjYI`YnN
zufSA9=aJ3t)0J6tW%!=ZY?x4$(!_myAsdj)#Pzmox?wp(Km7HJy>O5EFYEMYrc0dw
ziCU@cz~8R(53E0;R35vh=Jv?l%;~*uCd10XfO<)Z^;KzV)?UfeX8x<sTtO+H{R&EE
z5|S{q`maU3#n)Y?#-D!Zhj+9$mZJe)f$BPWMVYKKUOi2(Ug5pn7V~)YC(2-0!sg6B
z+?kDAEICs#0R~II*MAjE=|JhVs~wG(1wOJ$b<<N+ilJG6|0_Vz#ydmm^&v2H-b=RN
zmuig1DTg8)qD4Y{xcj!%Ose+xmMW4xD+l%?`AKZ*zt!SbN<ahlHOl~=3GvNy*)4gV
zH<f+R)V3Qn@G{xJ1vqS&_q>3)`sE#&uLpy`9{H4JMB`VB0?urtJBi1ms8(UP_lraw
zakJrV8ZLp;{kx>K2VXVgxo>a}@Av66ZQ@D#$OVYxH;GrV>*@HIgAsqA?5;0Z9+-`l
zaJpfh%nkPt$L;1{E}A*Q@}3J;?)7g%N7bpzOJuna`+9UE*2MGi{N#$`bIgDyWxq@h
z@z@GS)D34~YTal1C+`0<efR4G4{B}wEfXx2^_J(${*&jgwwZu7e0WnXl*26BgA|-6
z7QAjHgbMDGcN9WS1t-ztl{K3W<}Y43XS@3i^>B3u#qhrzhPNb&JKi%!jpPTmI~x(D
zKvm}PF8<nKUlrY*qBNPXRg_4S8n&B|xfyU%8E371?kq4{Wlgwg+~7A6kX6`6xmh!3
zJwV7JUq+|d@iITdAA5%)`L^7EwCYa&;|*=6?p8yceV=Y|4*lPBsumi!m(DCkT##5^
z7!cq>gt>k~xIbMgL{wbc{)BkX>B~YaCV490dblJEgjMZ>?>DZqMD8jSz8)1UNG)0C
z0<_bM*cV7RP<VWi6oCx8&&t0OE&<oW(d6RQr)P>QuB^-<T8w!E#<;SDGV)JD*PXiL
zm#$GsFr=<IFk>xx_KJ=I?h&kAS2@G6q^CI3@x~AAV?Wv(90)Wb@<Q#!mjo{$y&fiJ
zoxYUDVOCe|b5^AEFRr+8*I-eM79WCa*-wiAgcet+0~zy5ps_XqX+k1hPQS(1r-b5a
z-e8*n%m1)0|F8?-SxR!rSg2jg#e!(_mjDws!sEF9`v^ck^C5M#hX@WRP6hk1N&eH_
z7pg|X7F^vZK8YUfO*^2vOR>-jj!Q)FGS}(g&a3vGxU>h-zi$E{9HrsK6>6(j-fZ(R
z*i15d!sNHclQ6vy2?(@ZSNiCD8wl<TuD{b57;<#YOZ<{l)40+Ewp<pE`KKww3YPFL
z%mZ9!?}lP8B}^vW1-|K*NPX5>QXAZczgg0-d#$Ubkb|+kXJ<_r0Q)m;`oRf;ZF<b_
zGg!zU&N|4kh5d)iKn4N;sS?2B!b$oFfRVmECwT%o?p*GxTe`h!feg2T`9Zo4_gQ)g
zD8&ma;N5)1?^W#f8P7k7`7JmrmQ;-gVA8J)tG{Y6jrx~f-pM?W7l*PJU(GKtv?-zY
z2bbo~aQ<p+fXtozqs8+lkstE06{MjqNJ|xLAF7lvWb%*Ai3EgdM%bKvW^!*10qdLe
zDoO`4g#R_=he&*07;b7nz`sjmJr_^JCUtlGkQ8;Na_!gpA?8aIPQr{1Ir$J1t{~ys
zE1F>{Gc58qTtBGF99B1t8l}W1@z)T|BIe_d@T>rm6P}eh9n1+RqGf#>wsWOjQwH|p
z(naOFIXxB#ltq<6{jiY%G1mDO9VD6`!{3vuoRs+J?8x$5ixMLiNDrP+upcg#(qt_k
zZs8hMmy&O-#lNY6zOf|MM`@dD6im21MqK%9Zhxfzs(uMo^U6{C40=}%Hv&}<L;j;9
z+IzjAFNjg|!{Ei24qc-V5uUew11^>=Ctnxa8VjrhnS}pFKQmCn)lETq*q8Gv|9yIY
zkBo0%pm_XZtiMb#d;KWnb9nxT9Fa8Ob^g{gQ_%5>e<L3F;`mW>$D1Z*{cwYQg!wak
z5;}(QMDxJd)B|fd;dX>4CFncniag;W99AG4%NnuAlBkIuO6fI$7^ootsbR!EORrjC
z6Vd;3;)0NZly}?a`>{cP$m|S_;g=pGW_i@bxgVPh&;t!9{Qg_O$7nvG9Rm>F585_G
zd0yJvFalq?X5Ld+>+gSnTnyq@&2PHwB!OT=_DQJ)+Z6XaoUhX!z27M^5Wm53dGwqt
z@}-YCdiZ6SIc!Tf68Z*EPI7~7g}3dYFa2dW{GFhYar<>KR{q{2o>v3FVlN#Ab!WQ(
zXQItxl#Ck}{!g2mj%31ail#OGGmIp(Bsl0p?3AlLF^b9bi{Y8+%(tSLSrg;--37m`
zU-(3OcW8ic@IW(jH^5wdtjS=ZDQv5|kUiR*5&-EenAd01{ypsE!Oymcsbq@5K@(8{
zehBp#E({)vwHs;hVd`+ZjO=QsyHIHCsu$1?2-sbj((oQ8A%g~lBrq&7*y19n@+!{e
zB8cz-pdhc{iINK0mRV|0e7!!7@xH#!Z$2U|$`xEm#;W8S{P8_kjD8jTfECkiwTegB
zel70z3t52B2zDIjahz8CzoNJ3ba3=wQZGzAlgPQ?$~SMDaw0-Xvv^_!aK-RHtN!+b
z-&b<v(@a)MmU~OC)&BDuCesZi8_K?vUHn&gCCF5O^@44>X-Jq2@#mYqHK%25f#x)*
zA2ASI;ZT*omLa6p-{7ylNa+47Lq}J+Z{H3>5L6!O3HkmeTf4pzcKZ`2s6c^{$RRCQ
zsa8Y9u5g@uK=)@WiML?`A3`n^EvYWcv}N{8^F{q4d$eESxiFw|3EY$M96_l4zkh8s
z^a#=7y%Oi7kP7)JA#3`o)-eN4Y@8zo*8d%$0_qAT;H8zzI~g0Y+Hm~=DRn|+`>O(y
zWr9uMue`LUe_06b&qDYy!3^*`J!n4ZhYGO*D}wy?%}~_A%if?%zEUeIDvC*$T5hEx
zzbzfbGm2k>!ak296MO;``MHvKUVqiP;QyGLK0q5!{!#0sKC8_q(Y2^f2cKpP^Db)Y
zm{LV~04JE`k4L25=k;vj|3rj$&}5|l>ji3_9bUzs+{rUC@@4)%Tbszwb<J^CD>HTj
zAJ1+XU}U8$ui6+VpG?wuLsL<d&dj))zDL#gSMkXICgTP4G-%NKfaK$VEY}C0$7?0T
zQQYWx`<LS%#fQflAZrc*!n3x)2&p*8+nE|7^v+|9>?a+?81@Js%BHy!Fjs-P2<TRp
z3YY8Vve|0nl)*8>6#T0h-Zb^j{@XqIcY1wC&a}TX+vc`YO%(js?ct`=fuZo<jR~L_
zh5kNw4^rC8S1#FIR}t(`{yytFsCWG6;#zDWg8xX^Iprpx|8(Yw!QZ|MxH^CV!2~+f
zrYHl*J@6UTh3(SLV`}KVVxcVm({2+*_ABK8l8CXsIH=jT6PmaS8c_9ur^%-wQLZMj
zqqTB6#FGySP|5l9_8*Gis$q|w6Gn35n>u{>A@~*n1%t6c{_|*rK#}D;^64D7A!B0g
zN!+BCVl-n*5Tqr!Wg_G0^D#X?7G&NJrPRt^U?dqr;u&imh^l=wKeC&uCYhegVE<w?
zr^rwAZ~69jCx65QbzvIA-yUoX-FoHn9<+z-SL1#enB-*J4R_=c_<61#X)x0iNGf80
z>}yZKyo-|e_xG5P0plGc_D6rAcwlx7M^yWO<D56+^wL(4E?)Q8M8!`Vp{R+>eHagO
zy*-;3o55eq#F=)H7^#X?2HYBDxcS;Xs>X1AqN0$H!27!VoM9_4h86|%F?!OJ!v8Gu
zu@vy=7o2xdm27#jO}{=lMD{y+MsMi=7oV#4oMT0zu5K(O8j^Bv*!gZ*;}<!E-x^n@
z)<n(&>$$wTgH83#eiJrQl)HY<^=n>h2uIQu7QQ8_$mEv6?_yi_4F1j?3wCD_`XGA>
zWrJd*|Bc_kro%*$$Kt)?R2}M|zWnJl#P@6PHxu3B9&N`KRW4gd#?*c>?bQC+-rkDK
zNaHl_@yhkkPcKeHW%Q|=R0Wl5p^#E|O}3YC#ju$G?Ejo>02L4e1PCSI7KHuG_<9?z
z-2HTs)kqQBME&P`7sYtAH)n3GkjyOMTdp+y-uOyfp(NDNzz_u{xxWgpKM>#4F@EQl
zkz586cl#K#GGmV2)w<2TROHktD)%`W`q=G#!ETYs*&@>z_&6;4-(<i_L}+dUOXXa(
z>JtUL1qC>eGUdqSHPc<{I?$pOA1pXIRC|d%4)>mR-9Ps+LOfmbUusj%@|4^lJCBS~
z&T3rsiT+v}*d!lycmLQVU}`jXGr=wl#|y|)P5U(h#@=4-=wxuC@su8X9C2d_P#2c5
zeNRqDgKBSs%RFpOGF_cfF4v5A4_~)r0qkO^uoRNPkCa}8mmlBCx$#92x(;)Otp`GG
zxmU|fKOF2c9rT}$3&%ANfc`fifFMK1_%;t*ze~XPj!)$uy-C!xil%;rwRxgoa687_
zqMx32KsS`VG%a4q?1yY$s<=htw0YQ_bR%tP#mdDk{;vW59Vm=gXC#mcC%@CMu=dv{
zer3uZjopM`%gA$`k55)s?{Z^w!0%>8-;ybI-`C;X?6G+!3zazUJ4axF{5$*uo+3Sn
z>&Sx3VRqviEdkE^Vuo?ym!71XZ>m6-cw#v0R#+wUePIQT*$~`St<8Xa{^k1_LyLk_
z+%IeCTg}>3M=YddKp)(3kaZq#P4xH+4d&=nw5_$Nb=VZeA<bJP=%|Z9z0mPA-h_>S
z33@41-mUAE)I}b|uWkA(w4~#t%nB;OR$0H?EMEb?^^4eNkFn08Za;c$dnU(YI<1br
zB3CIHXErinC-J3iyvg$XSlq4SMY+i3XiOrt8j<=st#^ebcQ%sz!;Q(u(Fdkqj(uNN
zF_Fg1+?HDX*G7%rBY~Iyk!BZo4payEVjE@dnVBR{dfbJg5pauluT%7;=(zVQ@ByfY
zS)XWe1z{(M|MT_$OPp{+H+z&NY@n^MQ})GeX4)ZspBbN={u*Eo7DNsde-VRhVtGlE
z4`=%Oe)uJ$sPk?-p?SB`ALZ+ruFh{v^qm?>`$~1`*ez834tXJXeQ+?D9=YMaOwI6`
zX86*zHkQ?Qb)?y98*Ih8&iQr8Wj<E}fs@a}OF}IJvS4(a;5-R`p5NYBwJR&lSj!(b
z_^^+LAf9mbbj0f_wtR{-SbKCF=Oqciyl)G*Yb0V*Ta^0-f|lY*7axx*1CzqGh{aZJ
zf&(I-$qrr1HblUOP7P}BY$$FKE&2YII7&%Cc{^a=SAWD{f-7OmBFj@WAv&(QT!W6!
z^m96lTj|6f`+^YhD$Hks7uPJ3{iJU|%!A-8JKS@E<UXn`xLQR#bujKL>@yw>zt_}s
zEZ9(Px@>Ws7(g{WpyVC6m*l;^^k<E)k<?`p{Rj#~_Sny{%pZ`yq4=Tw_Oc8(bfMMK
z#aro<30w)=<0xOdPJgCnXq|n;FZ&Hsw4Ja^;PlM##d8?3p`&%SJYzd>io@q|{J^L6
zQKpOm$){HJRx!pK_m<#?V1W&qbgOD9UfF)D^IY;Ag1c^Q!)#|&H9IK5Y{@u^ghjLI
z@X_mh;Z}#qkSdBj3a~;{$Il1@dPIHvrca~4mT+$M++TQoPH!pk?ukERyzy652ZPf0
zQTGQ|o`)vQl2H&UHmC&MS7XuO{Ns#g_V%CUYlKXGya!A^-Vpz#jKB3VI~_o&&AW?L
zJ9v(76d5!Hhe|RN^CV|lO}O*`<^Bvt%6q)OFc@MA@OIo-YT8_7m}>k)O+{@Rl}aj1
zM##E+`QaqCreQ0D*wv1w?&gBv<CVGPe^~&xgEhIK+06&*sf;YIy04$C+ib*g>Znco
z6bRKa9&CCyVv+J_a_;X*Z?F+q-2=tF9!`gUHD6FYFCSw|{aRb|I^4(Hi+T_Rlvx+$
zEF<4qfnDYJ#q1;(QbNzOU(fy3!OH%Hz6HtlsXnuXY5Y-}e(q9zMO{SS7Qc*HM~6sp
zeyuel3#>alhI}f9^fTo+&N+0`H$&P;dTx&0{pY-&2ZujZW>tQ;-BXvpexWd=q8gu$
zqEHbTvgoYa0lP<*xEG!cFhq;g;u>1zJauGy7TSJFg{-*`vM(%Do+SL6n-qfoK=hG0
z&$}ce_kCno2>FsOOntIoNs(N+e1RtH4T+t3fa8PlZ7)22P{9awSo|aI+x9XkWtLAv
z{-~m@=gnR`Ho>~Pw{u7{9Pmi=y4W<&oV42X-T3wKW#)3{?lKOHw;q?t#bc%1+fB=K
zdA?b{67Kf3?fh{h;3#pXa)pJ~_E9TN)0EKXBFBZ3zsCA>s^ReUyz8=m#=`Aq%ugLH
zRzC9H$gwTVipz$M;VdFKTWwD})6yzI!zGm>eQG>BIz3InHv6>qvw64zwRaJFDYp&f
zNnT4i4=dxZzlsBFmm0ofS0L^%m`~`>NNaf=iC;cVJdit2r4c1Dio+OoNJ%vhd5q%=
z)ZUbhK(;Z&?711uyImPG8p61&=2w7wji=;U6nLQ_U8)vhyvV7(<v#;N`}X*@d+LlD
zl%=WmruO#*_>3GSl!UC+`HPhl#v4Y1u_mbP8;v1(*hbskMXdcETvB?R4$GaBhkyfh
z_5%shpVj~S-UDSfDwU0}tQ(TFNRxf<BVi)n_|zrY;JBaBBFjqjmXfwfZH%dycJ`pr
z;y*@KW|B^q3nXpCdAH@Jc`ir0*&+${hkoN0;9FLhvW*q@LP%{?tXa(R=g4JN&@n=}
zSk=_nEW2$QhRsW5VpDoIzt%FO6|GbZn0AFk(QA}>QP?d84FZg_r?cKd`bvDI!S%dh
zDN84rDpLNuSY~0yxIEe|s)qCB(yK1#u&7cmtEBE`zH^@YqhZcShQVK1iW&BO4b~f(
z)+Hs^X_z4>AGQy4#UGkhCGw-h9$l&lN5e)QB)_laejJK0c1f4eyU7_zvp<fFOme%N
zh1(kx>qP8J8RR4XVizyCIqvXO%|}0Mp;2u@I$Y(E{1Z%l;9N-%Y;m7bF#Y&sd0!!t
z$Wi-{G@Br6eCLj%|2O_8L1Zq$<#$^$joTB5fA_V`+gaL}flHOx`F56aH<j{6ps&tN
znYp5Xg@YSrCg$LoTwOSHM*!eC_6ZAn(faXkGKmq<Y~X+;a8%han}cNCK8eApQ5&hM
zS?-ADRPWjCxzJR%#p9?f?^lSuFee?p`t!X)%1JZP`vQZVYH`ZjHEV4enM00!`>xfe
zHJ^%=gVhG_N#mg^`;IOT7q4q^CD~M5Q{1ao`O9sl`ViODB%Q~jX*JDSLcSNyHb!!C
zm6MwpPTJ{1cG}5HnJ5ZtxYq}E`U2f)gT;>Rc@4v*c+Bl4zCej|*wbr=WrVnf8S^#T
zHRZ(K6u?<slyAhrHY>POEKc4F^+kncY{7=`(*hK~E#%xoCM#6Er$>B;n-)vHcaqW`
zp}L`25E=9bz{=YF`<oMb;Y9ngEIcbM7h%6WUq3m1uc7FRD-jHd!|Ydh&a-0KO)eg(
zyf~zIDDDSblY9@2rDK}Neuwh>abT#e5|UmSXg5~bxEYFfeG`;NF)fpsgBbgk>mg(P
zYq_z&(0STXEBO}QNcs83O?hQ6%U*t?wo+DL$nxeH><Kj~YuO`tt@H9ofd&0y;BWvS
z<4fpjGYH~N#}@ToIK<{Le^0`Tq*F2q@cH}_(i6-WDka>P_;IV-l_~4oiC?XGaY)Yd
z+LC4Cu^?1R!xkYw+~4279kus|gL3eygvA#=a>v`lUN0x^$9bQmjK$|oiU;n3S&I#;
z1<lMyGhy3tLcCrE4*2~&_ok_AV^v9SUDD^}20qNa4xbB6y_*H<^0kxnj)>>WSdHGm
z0t=$S%c9D;da+;`b6V$oo0qP(3~9vEPm3&Dnf3Ixo)!ARRYWr<9QTj2-=UT}te<P<
zzYPEC_*Kbg_}bhm^lWKtUYC}3u*Wd+g~l1Tp3U;~vuI3FW2gRc)NOYOi{4mYf4x12
z6}6V6nQ&#++}Zx-X2)&*;a>aBMkZQR@7B;hvdkj^oSs=~-|;kW=G3CP-uLO+BpVsF
zX}O1qBP@*Pw>v<XLM<#iP3!ZgJS2ENnkk~kzYh)d55<jnM<NFf-Ww`sj_Qt*iGc2p
z#4P0h;T#K@P%hJ7Q8`rz)ZgiYyWHCGn9tZJla!T=m1CQw-@Dq4DZICtzuHr*Z%4Q}
zhnLiL3s}2`oGn=lx~e#)y+5i}r0M#f&fP*Ld7x5TU&O$mfA!5FP6*4XxY%XKetci>
z4oV9&<ID-9PXSxb;407#73!S4&j#DnfUcHUE33K2Jl1>1{w(_K0(GPomlJ>BUS^R%
z*orm?04Rz8NZu*GMMefK)meHhpoL*!fVlJX0XV#gDnp56E_(w7=k61C;m+AQ<`kLF
z9Mk>@)^>@uw7sT&QrQ3tWdZ<Jv`UPEIVbWlQmku%o4vwaizL`ajC~Q0%d12XHgQtV
zE{i)#2oEasZcr>bXlb^yH1Q%FzW?CB@UCt&=L$J{oCi_H7v>HTM6yVNSKQ4J!H`?C
z#jkwWl(<pxJDocbBi<yK4aF<;EH57&r03E4xO^KYJ#Q0YY*+R@2=PILnbpDaIl^yg
zFhG%0Ll&%nq~Z3flWLE`3wm9Erh@l2o9$T!R{Kc+Q8VIk_bb%wSF^(`-tJ<XxQ6!n
z5}E^hw^^&i-=3B`M2nw~%h6*!YNM~Se$Jlg38x4|!jG3{DzRRV<Y=ECUlqoZ3@E<#
zauqc9OH@Znwzx{wWFE|67|I9YE~W;S$~5b03bSAvr{<~R?8MAQlhb}O^?8nWl#XU*
zdy3@HJq`YJJZ}5Ysv1{inU2tP;%PjN(`t%DJO8Oo4oQd8(@UF5t;&F{v+mx8bHrb!
z6An~XPg%EC1zlycN?}R5_xjLY0ifDhSM>khUvbQbTA34F)MKXj>^U9aUh$y~Cs@^k
zEmMto3LB1f;#nw^$GMYJO1*GQehLN#W;3h{kkrT13GAX^^SiUrX;gy)_ke<7b66!y
zGc>w_bUfeSSpipo@O-KF?zhtjh9iJU=W&*@3l9%>qH~JfDwQ=cp<uIGKz@_Qr~_7+
zI83gVVrW+nYrt;=hYkozY*^V$?>R&~H>bf-(a?JD00NBXb4!Ap<8~J_$lK1?rtcfi
zmZ|g+Qs*S#qSD(xiEOv=IImB7sezc{@M3TtckgO28OhTJecKFzeza)6xTz#*<W`Q(
zOrDjLoA#WtkDN|JEi=X{p0k@7sFoEkH}sQ6Qc9Y^PH#fZcuo6Xj%j^xkjC&PT2I2W
zdRJLK?2XGwuv1OnRm`Ne={DvUI(tudlr29vsx4BG3f3vO=VvTqJ?s^crqyP&d39*&
zN@mHpHpuQ4Op^>XJu?@3&K?IbpJvpLHwX%{_^5YRkN$2q7G$W?zS&{z15<kZz=m^u
z5aTe~9L-7C5Kz`UfBPzbbKD$d(SU#JgZRjpC7Y@nwma`Mle=}3VXVuAVkFz*@=FhY
zpQKg)1s2}av8{FMC^V_TN2lOE4aR5b*7<HLCxWK-(7w$*s!OeSM6DvPrm499G_*}~
zOk=4XNsOB`YpHE*(fJpliaopcwYlBe&{1AS-SE9kHWTz+^A3CpbRlNn2e9%87Ot#1
z>{|oRVW>TEhCQ?JKmsRR1WmRJu3-ewTI^Op$euKm((&>DH<r!~25^d;^29Lh#fZtt
zAvU+Ry3lsQ%XmmgNQ7WP^AsLSFx3n<s{PT{EtTwomVa7<9tXd2z{xWEZNBfU<mHr1
zlc6Q{g5qgwVx0dDMQv6tEJ-WjVT`at>ozCV6cT<?@_9&8KWVrKM8;Ro5yaEp&B9w+
z2-KYPaAB<rCSO->Rk3RD?7rWO<sq5{)64Z9UlCvX(M#zbmcB)gc1Us+1-M8h*GkWv
zm-0IGW!>XhYvzD+x3(xW<>@eJ)+RN@{-yE9lfr{iWMfgtV9Et`zB^d0k$Ltv%)4)i
z)CZJklu5G|RL|QlEY1(@7Y(W!`r8<@&LdH6oQ7&Q>4Oz3GyBKQgRxE^L^9N^VOyg@
zO|m7#*>wvi(r9Zz0VEz@YV-D@C44g}?gYcPvG%yb+l$l90=FIr4E3SDKh9$9=`CC`
z#NNCK0J+?+sL};ao<PuR1wgC$q~uI1{UO?*&CLn<<4K!`D-mf;VBJ+(E63;iJY2Nn
zV3<HCI>DcUeXoe0pcAjQqML^O1<dW%i|umqc<9SbKcd8`*{lSJXGk2hCO1v{TN9#L
zKO(^rQlotal_6e-cKXz4kD#(Y&&rq%&tkef1*KA9tw^CklG|TL%R8^q8dF^$QqV$3
zuecLFWvW1E@7)}q@~o+Ra4{m*>(I0EvRs0v+!Psd^P{Q$_Hj8FM>%5c!O?V)<ys44
zIAAY_9nTc!=Wv>0)bDx)bmJ#4uH+zP<OYLMY7DI@f*S~&@Uo+zHLVHfaBXzUGzc7)
zc?2O)&9Ierwox+t5&Ll4$Ej>7JG!fHPiojl`kvr<3!d;%?dbk?Ol<W!VoX%u9}~PG
z9M1gTo!CzTd~td0b2IiTK=+Rga5}B+id3xPf=zPH0Mujm`PNk|{gU0z5XM5aF<i8<
zoIaH862(pe%NI%yQep3~d(F$U!bnwKECy{~fQqy(N>oc_fal=g@EM>>+Z``3k2+|X
zzq7rgvW}(&jAA!1s`Hh4YSkHp{3pXoZifpQmPPEHKbtu;tvyor4)DM6pO89l_Q1%s
z7~H>qFXW*oPnWyfqyaoCHt4e>S+GzY)u;uc!C`-JZjH*H->-2#cR_tJau*11_qG%(
z2#>@Lv|5dsLLY79aJCeMtiQ*FwWksTzhhSmW#EvTtpqx>xN8|N?j{*52urJqNUe&G
ztMj&Guj(XQ%acDGMtMD6@^hN2P-TY;WvGqkl<0>Vj$PWu&}`Gcu&2k-Qp_pPs`Hg-
z6`daWWXL2h_-c1ido6P{){T58?MSR#v@DK*<0@#Siex7ue7T(@uVl}y3cDHQKHK}5
zgg5VMrXHg)ir&k|r*G@Ca)F1`#IB^B#@%8QOLxtd(b1*ae&4ly0g86!++b2`xNwQx
z(cg%;B_3YD9*rdBV73Oi<%P{?;KcbMdtD(s<+rNcdkMkZB}06_$rS2dSGuyV_Jn;u
z5e4$n@7-_3?o3XN)9$Y=)gUr$^hPNvxweD8ltBOpwI2ub8F2uZD#tY>B*d>_&1J@i
zf|61cpw(F`BIFND!fI$}oB`^fZa|<8SyECm0oW3`T+fK(Y1I$`H5%BbR5p}Z2EBH8
z6ciK#+js_@<^?wAlO=Yu*-{Lvg&)HB*03kC0}*OYPPKM>Vka>o!j<!I5NLJ6`kOr=
zLJURPC*4zkatsjtVSPSZ@0ufAHrGO-NV2Q7MQZZEV>SRkUTMJsD4TQ-1@`5db+DzX
z-)VX#0|WN=0LY@y$Z%qtwf@slWyeo|5naM&$=|HizbHUHbO$&L--!kz243!sgQcGQ
z><z=}ts`W&h+l~J21MIJa|(ckAsF}vAys7L<id3p^|#CjBWMD1A;qWw|BO2vk5hIT
zZDV8O4G<~{V5rugZ`ZJHBcx`w+WBzOe)a(yl`t-rVZTmciF#o27k4zvF5vG7mX8K-
zX*zZn+mbnK`SU-PR#pLrzy^SC3zAM^@RK!tte3u}J8`UaJvTVVt<LHXKoWokgF-}q
zUPwCUemhHmfXm@WxwfW7!l%8a0ZHJc(SmjR_4ka=WV_82n6-B-=iZ3#k^NTo&ojG;
z7!pB9eg1$Wfz9MJ<Hw|-{jO-fl^O`2Q0<hEy&#+^u(P=DbXh>m?*va4l0;_G8ac9y
zie02%m61`smBYk5Fnxt*oX#Gl??eLELkJdpkjSf5g6fH~nvNJ2=0to^K3Mj_oGU!%
zz-QT051iw-CD39i^j-?_+Mz3yS;^$&RgQ6Vu%b{d5j`D7fE?!|O{*?&T=P+-eC+ED
z!}xWLiX_^imGqFu;~={`xP1`S@ZpQ`Cd~Y?C6<3$$4xJhqI~gIT&(h7!1|bW33oO8
zP&;7UIQ?~74^!r0YVn{KJf!@4VGotYI`orp{95gDN?6NrT?O9tX=zj{*q)F&sphJa
z_ewQM(e!f~Tx%rF!@8hMCkBX~+StTG!{;Z;z7OF@`gE{i@K*H;px>Ql8;&AI5C*TR
z{AjW=HRV-bOUAIUJq(T`VpNCBXSM3gu`^c?C74Ro<FER!q{8!;55p8-R>QX}%ke|b
zvBC|RW2?i_&Ct!*lf11bEpBVD89&n<3mMiU4e^~^-t!TGn$+N!`6Wg^-kt`}eb@Rm
zBVTSp6<1DM3ra0w*5w0e?+hH>#Y_MM_!;2kgBC;BQ7KV`5CGtLRUHV_EFTZ@z+=fF
zVsf<=E>5EIuMPl;s7xl`qKQIsJj@+grn((qcIJ5FV^aAK@MMcXNEBc+*^~CZqZgp<
zTB*ly<B4TC0A2$>Kr#I7MN;y^96IokXt23}mkQP%7f<63AiolE*epZ8`FT0>Qf>+(
zy+qf%ZV9Y_+5?kL{oVG@$e+Mxtj9<Y`T^+k?s#gYbwFAk0Qfn<L_|e<7Y<TmKm+5g
z57!ZhoV{@S2VzL|0n^spbJ#$rDPS@cvHu{&Pwu*uRCF;D)iN)O6ZfEwJukkJ*<dcW
zCxj`@=Azj1a@LKcS^!6_wO=QMPx&d8EJ*Y=u-CvZgZGQ(DS>gi6O!P~g5jnv1w;`~
zGz5~4<<f9yfGPL_Qio})dMXS$B#8G4w<0_oCqp+xqrf2sZH7@J7WwT*eg~5QEk(;5
zhaK|bE65kvoGgYRo;ZhETcyGNJP3M|3i=UnYOCfMey^Z5vMeY85bsfl7xrD82TH%+
zdWD3sJ6gZ$#Md^qz&vU%(!!s%zp8piKn3d-n(2v$X4cm<oHmRzhF_XgdV14!xZp-5
z`@|Y}JH#@7wBfWoB3vfzYks$Bqf>AqMew-VdFSzv5^zTr0Iu(>g?>e1USMu#S`liu
zQnbdcws+1sRs>|bxZV<F@PpX7o>a8&<zwe3GnyKQb&r}S$yN?T{c@aF$Rr;`srN|H
zsH8w&veR?!eSBjGCzrn3v-Hu0X9mgfeV6}maV5Rm7+0UCd@s%X9M$TmH~>E)9cMt+
zAx{YLHIXQ)C&J%hgnw8C(v2sQRlSL~&mMb=Nwvonm$XiYXbUm?gqg5Y9bH(Od+Q8D
z37r7SAT2bE)<H`=6uZtLXlUReU|FXL7NC~*t(_faoi$AX{?`C<>BLT8ZwG?<It&Ib
z4JFX(1AeDDv2_5#E!L>^KJb&bADxPs{-U>Va^_u;%g4nZz%69@T~j~>OI+{<-o{TQ
zmyQ8$4u}2u)28LKbAz?BVQuFp*l0kSu3j_v-ss`#Kq!$r%ln~(0<X*tg>t|egMNL!
zSfP9A4deGU@B<)4_odSSOzt?ADJ(8Dq>=enivFCQ@y$+801`WJDzscBUg#D<!+rru
z?(sr7^WpN^(sca8(egBRD}awb19a&;>Vq+xk#xobp$7nJ1VdE@G63WaNpS~UDSwom
z4lpub!0heqCji5F&VtGsrPFv!?kBHt^N5Vg<ackUboL$0)r*;z9l2o}B4)s{Sgf;-
zk)&<{7o9($cMaq+c53_Gb(P6y5zr<$LV-fQ5|6pz3(OUg#+!31=o3s({oC~<h$q5g
zPb@N894R<UoKjzNxMr5&r&;&|!Hv!Md+U-GW01ODzrSf|vM(blbjc)D3sYYpCT(D0
z{J9HXel68>zF~92^>c|Xu@0xt=9nQpKIltsx*L3vv?@`)(_K=M(k;f$h%>(6i{d$z
z=bjirL4d0aey^EfZs^`>c)(IVC3NBE^vEjizGiId(=XwOavwkI>O<1oSaB-}{Zvp;
z5Q4h^5TVsHXlAeI4w9;E?#@ZyEvZe4LFpG_o%=SrgTD{q0LHC2xokkIEh0QoYpz6m
zt7)W<ijObTD+_kQD9Vsx=BGg${c|@bn6Mf%hFsRuv$fD6DD#k#785up>J?7`!g002
z2-Q8{go9Yp3e*-cZ-P?V4t@BTNTV6H4Pb_`Fi|Xe!&xw^^WS5+0A7Kbu~97nsWhi@
z<r38BwALs8A}=?Z%B#~O;J7I&%cFE$s5M8!=XL=9Se1)0Ku)BQ9%G<T<xX;htvD6q
zoQ^sGL86f#Bi#kWWiXAE;uZ8*mObF62$i1ueGrgZi>U|C%WMctV^En+CD4KcXHF&t
zw`4jH@B0{kCy|u*-r>taCLBXJiw3I{C4^(hGUUX=IF`gap-*iP30zu5!hLhUk(c`3
z1@(1hSNiL{JMY4JFk#4}#A0=qWhNN)5BIv_)rT`mqxjfH{90sR&Ts#I0y@5MUOQWd
zHxubkF);y6^*EaScx&y)!I>HLigE_E1HUxdz06mOcXkKX=LjTy`HYYF+GxbXvAM1_
zUH$9C<c!UP!W#(|8e=~k3Vl7zJ7Rc$H;StNi0*}gsLs#vBIQHao(sES$O&{+ER6OS
zApnd7)bT?3G9<Tpz||S3p(J(Ts;SHRG13%|&y47cfFTh!5cI@PG?cLVohxwLke_z)
zY?+!I3fk`xv+yCllFsS+?-AUR*b(AY9nzAtrs?qG%~+ADO)M@BDn;tx7?4Cuzk<;n
z^V#xxU&JGD&7Dur>!`x3N}zH_*3>F9+{?$NG#!bPOn`j@Q61@k>3;j0UVDjp$4RzQ
z>1m4Qx9Jci`8XI7AMuBeI8sA|9y{>VPF#OVv}7S>v~Ll!*OjZMvKeDSk3B}@@U~6L
z{QmeF0>;r!!iRhsi39KZD3Yg?zCb2WoBk|v4wnMaoc=22Ro!9mvv_X4+)ZRK_B#0q
ztoG}b&a?2Bb%p3p5keeE!Z_u*{m(Gr{hhIOgwICa)Ys@8IPr&`MsG>k-_QQ@?h{A(
zR??dO?zA8K`7t#4k(u!4K+YBF8P3kg^hp^=?~{n+;0`Clqk|@25#_F=xo{#{ZSY8|
z+vRaD+xUUx!yC!tm?$<imF6Zghw{iA%)Qq+wfQ~$#I(sKfCM6Z0w55F?~BZrkN_4A
zfe>B}FPu@YBT#S^tCV5J(`%_x`FtGw;ITyIxBJmiMe7N8i>4CGNHq}6M^{ubHffYv
za;6Tay^jOCf^R_I)50|u4b<UU<G#*m*~09kyTfPR?3A=NGn$$|-t)#??sz_OBU4r@
zJ7N^4ms^@4(Ui(PeweI#01)<+%kLC*6#JqT6F{5zNu@a)C_io-FV=_%y8|fmZ;mfp
zI3PFOr#Os>CZK`B#K^i|6yR<-Vy^n4<R_<aIv`_$L12wao8wWXn2ZXZK}n41i#e*i
zlf>^SVu(&kQm=+|<|xf$SNCI4>bQ*S$V5|d=zfuDtY&iMUh7*)Oh?#NO!Zzc2tkTF
zXYcLZ;qb7d$by0Yw2Z7I{hKC(LxR*N!-OAUQ`MX~@5rG07g9Dr>KpaSWG_3fj(0x<
zD9(awjTL`(O(VlnGny1iq`Y$C<2TkTgy(<S=Q&>M(6d66O{A#}hH2ta1f{>S2n1;2
zpc*F8fpCKpFHH9knoNZdUZysq#RBwCWh*~oP`{O%qVK?4+sS&YPQ2hAu7CP<_~4hx
zKqp|t&JLiD?XxDm`SQ*39s`%hW3DkF-hz+7L}fzF{bXCE>PL&g7!_P<^jRq}2W#<)
zA+sn7*sG=c{k*6TsmoiyODIM40bsntzrhNw4rhl=D&La~_$2!VgZISULF)KKix5G;
z@rT&b(56O*HwXpGZ4TockpyS;+#=J4J#zaEh*4krlQ|A!bZf<7fmlY9U$Vpa6AFtB
zpAdIv#Duvd$l*@<EhmQj*i7)S5vP}x6^~1tlOiOD$bqcRyjRUgVk+NpJam+0{ZlOb
z+30_Ey}T2+rw;ctc2Q=az2hXwdgAl9ceJLV`vxk-@*uz4ZgusUYuv9bu-U#{<y3%a
zEJT+vYnoB+gtzDe7z`%sB{pbSSX8{9r;9Zb`8u;j5!;ds9YwFR))PTHKp;o07%?Nl
zUJ`mnbXpUCv@8zbv@Sb>a&P`|A1yY^d)i8hZklgJdaGfyDBLyO&X<66Ww~SJ4Y+qH
z-(vk(sF(n?lzA3)z{btIzkt_6Cz>1|$<UyRegGswdBx<%6j4|u#Z|x)DufTXr<1<v
zU{(`(d(ypnm_Z4XOiz$1et|x<@QqIF`dki?J?tUh8LJ5*<I@m4vJL%l<sqn5tdF0L
z#SIBA=?f-S^ZWJbN?s==OqHX9Nwa;Cp*WAp&4PNmBZr6FDdUrGhzCQhs1i<%e0KJ*
z8ue^S7Xv(fpJ7hI_v-N?2B%z>zZAl1bZ0Yo^Qz}t2RkpeLWN`CQ--Gc%6069x?d~{
z{8Rj)gRDCC6){MZ5QWbfG%yP1>~)&?;M9lR7L+ME7*00pEjkoNS3pr4ZEkLUePmja
zd*2~s(+4Ey{E8!VI?c+*c+)@!XM?arEDVzgz<)nAgDO@`?4IJ_;M@<<fErOF6A&N(
zUFi2=+Ub)`UT1~Yj~vU~Md!ueQA0~3c~9KmD=B;Kuciq4v@fK6w71<e*UKF8VEKPs
zy=7EY;np@xNvCvogQS#nO1HFh_XeaprKP*OK{}+nyOHj0B>WcYInR5(Uor+m#$IdR
zG3UJM?8&IDDdV9|LuIGO954ZYrz-%Y^I%l6-QZ|8*f5qQ1GyqpaZImHDa|<hY1-+N
zp0Y#ok05+~etn2{umf6-OhO9h;|jNv!P#@Ib~L|bn4xkh3n~caa-zJ#;|f|YzjX=+
z8C6FX9pzZRGl2LZ=X2|DfcCg$J<5f|#epSD<W!lPv!kIjqJZUfiqpl|nNCxV%&J~r
zI!?=YV61ew5MY^H_*tYhX^B2aLX;{z#6>{gX!z%ssC~T}v`zHa4*35Tbq7>vyw9A0
zb2Tza=GeY@MKg|rj^;M`NH4g&;E|W{jtr^J7q`Roj0$RL=$q-Qu@q{tuWU`jiA>!<
ztc_JkhJZH_l}r)|N01u4nSBO^u=`!JmtAbe1a(-V_w$jilxjnyND!hgpql+Eb^vJG
zz-R-H{Tr;88)AQQH+g|}4@Wh0Y!A6ChnJta*0*GezZ^_aR`-7c5xc&&PHU_aC3^_-
z;+EQ)u06$oP09JIP!bcEz4hx4#!xcbtZ)T`fI}n{yCyo@(`3dsu5o+J3NV9EXt2>K
z`c$6MoHR{@N+R$S%H)Mo@+P3DwftB>h>i{0LIy4vgmT~z&vHK*DJ5|7k@)f6DT`s~
zQzij&4Uk))Y*x|U=0nmIOY;tb67VN#-xtg+YK+5iWUF`4(+0ug+^AX16_sslsjiuf
z3r}TZMp#D+SDx`Q)fhE?txW#CNl%T{Vc?|7VFRX;$pw}3;l#`FrA0)q?_Ov<tFhX8
zYC|Rk2PwV!R4_QEHQd=yk2#^;i1;TiR7Cph#?zEUR^Dfa{3!PEq`%kgCApyc*!d5(
z2gS|HO)|a5xKH*shh7513XVK|-*EOCbt5Pt<9rf8;tu{(VCml9<ib|MUxN|^!^Tdb
zmgskZV6ZOMz!ki2l;9`U)qsGtdK^JRya=+X%55iJEx?k3GV?YhnawoP&bHoRPmUSS
zJdF0FO0N@Qe=;{%eWdW&-fS8KKSQn2J5g}OHyNMI6D=H!6)_lrHA@L_q&idU>z3@}
zC0Yj{Hh}-}Y2L!fUT`Jt&FdFiO7ie#LnIUzBybB@-*hLj7?My>h$05whLM&Qzs|^3
z&|mLi4<W9iBR6+zGbZY#<w1U~;fBPk-LzAdP0>mex77(Jh0_mitCJb5fLVS)qulLD
ztR)aC_9(N`0=N*>&{7%80RQ%U7CxRDfkw(~loVXE!htE!n$GUyooNWzp))M1_!XNT
z$9~N(DFH8)66xk4U-?z)QJO7iC{=bZpNN|c6nM{sey#)Imd*}y_IYIl;rH%K|FWh$
zd@SLiq9K|_kLcp|K|m_oxA&yb88d^-KSB35O9jz9g{Ep>tw<PbR4^5G(Vptr2~|^u
zCqzwr#~_>#-I9WbQfv#nzWuhP&qwiSn(dq4QM7UkEOE6|iqS{DWf@G#reF|Unt2OX
zM5Ae&puceK!lgw9xh|Ldx<kE0!MyQT_9VJ@vRPm6B8X66r#3Ljp5r0!z7I!d=D5lC
zy-=450DF;hf~CC89oMG-j}~&5Qz=myEKiOmpcMmx0Fko6vI`H-8n=+|4@)b-GQ+>0
zstQGT0zNf@6{R2N_@kW=Xw6@f4Rwtih|thTGbVlmkxJQ8@pJ~+556!&s8YCx(~HWF
zEG#Sr=njYd8t3)arYGh*K<pbVV84Yz$m58LUl>d2zX_6+&bVwX-YP5i+r|z+p*<L&
z)RB^c2f_o;0TdSo7;$4dw76W5c>p#M93oe>O!7O{JoFu38|H(`)IX9+x_>D4|L##I
zBQ&)Qxaps+3cE_dtGk^{5bSG?q;jHA$!C-;^3pmE>}UYjCHwGTJZ>9GbT1*y8C*2+
z%hYa|?hCOa<RBNa#fTUs0^L=*#?S<HyTZh}V*00Tr=s)nOF5i6%BAZUR=ZIp@hqCp
z&fgG}EJPW3&s9{kFI#=>X+Pv(6(TnD(+y8?Jo2_SoG==6q{WI1;i4Hue^=zbM95I*
zME#HkeZ*ENgfP%N$-MnZs=m#@#L*U3d}lOkhQY<<Q0ZQ;PwC!MlecU*Av;RWPjV|0
zeGTr?ZGxZ1bNvbfsg4qijNCUPVNl#yKFu>N9;`oomj2MgqXm}fkiXG$92TS3a_l==
z75m>ikl9xo!fJHT*^db=E3@eZ)ntR@QSaFIf>j(jKeodp?Td3u@m<Fi;}_RW4D27q
zQqig~!8zoD323z%Tzp^U5za!zq$h7+qZ1OSFy8Cy>rVg)Ev4_p^{+MJtf%i^ULI|;
zi*la7q|wG5?`_BuK61bxwUT%Y3>^TDHiD-kDeo<*;y@LBJFhoi%i;1Wt8E1mZOMRA
zf!Fgdj#XfjqR|DQ*SIXVd$%i;aF|VGp=xeZg$NoL<ba`JV(u{gESI|O8r1T;6Am=h
z6frYhh6&t{O@$4Ww~HX9Gro;utJhlp#rc?bitB7Xv&lfEunh&>lOHag-!H*II0zy;
zy)ZC*p7Or<;f|<RRUIR=GaMM8kM@4XiJV;R012he1(KneaBoa!rH=ddMaHQqhKLQP
zB=AUYd7CNgYfR!#`zh1~v-*x4^WlzLNpNaRof{U8w<UPpdfjnMS^lwyM5eLy96G4A
z;B09r^#qZ6kjQ7SThKzX{!rzNtZB80ofkpmvE@{_K}N~zws;+_qAy4X7lG^Wud`{!
zKVWdH?Wc_9`IqI`pJP!lV^Hu?K5=X_;^yvx6<I5dUX!S=ki}40p`PE5^WQ!!(7Czt
zI3pv}dE3fky+7nTzJOCZw*oVUuK5TC@_}Au`=Il4yA2?R;TOYDy?N_4bZ8EBXDUxd
z?b%l=q5FMD9+G?vG3P&h{@EDLW$eZ7QgkY3`8tEHbUleHCY_3%1)Os9m!^t4D5>Qr
zNLTa~lwYh!q}xR4vpUImncX12<_WnE<YgAjOi=N>zak>GWpVGMQM#fI@zP8nG@*Ds
zSNZteEf-lahgTW>?Sv9ixLWg9#a(uv&7e(w#`}wf@097I@W7Fag`Ho0ZD<L1KA#=%
z&R3Cg%iU4U_sz>Q9gT)MqumR1i(6+>@zZQqiwg6thsMou((Oo6zs5OI)ZI|PRi(z}
zE7=mgzxi0C@(vM;G`e}f2!FFII5e*yfxt!o5PspS;dHVg3EpZ^TlmgiRkDBr=^WZ!
zYm+P9`eME`Df((MUyv~q`8<_496viW+#GW&A1_e@{zMCYa17b{fn`Nr-{A9~BQr^G
z9bGS9W?k3NJ0%<FRhvmhMsXtOidt07<zw#o()}~ht6oA>M!UZQHk6@h|JUG3`S`H7
zkOVj}#y&Fhdh(H5hhM&zm7G>2UpKrIj*G-7N{NxMPT@rCF?I0*V8~!N3<zKFuv^YV
z3LUZ(*oLlUS*BBU4RwbF8dIg6<8)(E3K+((w!eSG<hgyzeEIyr+-UjxkV1@smPu~Q
zcP<s*JU^?opngJnaljXRtQYHi*1@*^;g*P{DCr;qdZMxbIY^^wgiQ?L)KK}&EH8FH
zQV?u_14@Cz1fA)4xQn21{39tACKIdXde)Ek;N^DwDi(4$h>UhtLPOk_C61$rd;*`r
zIpcX%#>&b}eMwz@z#e%8_n#?9IxxemHp}kunPSNNZ+m|(1kvEld%wPJ_t9HFZ6j4=
zOEDe{{L@$*nB!^8->M`uLGZvGB<(DKgP%Pc{$X4#B;T$T%y9(4)eRSrC9tW8OIWcT
zTuMLZV4*~GBC;)!CmqoEUlVC^%b;DldPDb6Ms>aU$*Bm9J*|jE!FK4fsc>por>&w5
zPv^e9XRp7;ps2;Vz5Y-Vuu+~+yaU}wD0tV>?1dHgt%{K#3U~Q=JanUcu6QRytF^L}
z0{=9TqV~-IK5>1kVedeQwM7$nCV?3vZhn1exLB9{P=UzT?&T$QKZ((w%EX94zhcVR
z_HgV>Q8jE6YHG$S2s{(*cJgX*o6OW)eI_pTSdu#p*!{VO({QaMwLavCG?OVWMIO%U
zJc_l1k1}MR7^Sa}*^BmITVQcVd%lAe)-uevai>}PAPd8=q)KTo3RBu*Y=B$G_23-$
z-TusY;$Wb4p(HK49zCeeBR-N6O=0nZ=7+)9m<Fkr=~q$$sbFr|n<|e}sc{T872M?#
zpI@{5;<Y;6Y8y=VH5EapeV<8p2ILD#Os-7aMz@JaCQ<#hnTjykR1kD336(7RV^rmN
z7ifA#lMyYePuY?_=M7v=-oX~>I{e_|Awv6mo5WE8mx)G7Mg7_M?0h_1A)0>Knc+4X
z7CQJ*l!2=EY1wg65BK1IyMs6e-($_J$^^~;-kne<X*lgeRjsLL2pBb;@=$^ngKGBh
zqq@xy>%;tMV?`)WX~|#Op2)NDvseo`x{hHgxQqmNn#Q6CHLNtGv5yiqJ|I4a>7p-a
zqu`7k%TOv{b-guvPEAOmEV*RCWu#VdY&;r233sT%oV={U`WTOP%#*4V$38+I*dJS$
zu<~y(NVCdGG{LD1<N5hPZ`~J6ai1EJF$;dI@=sHQ$BlTFdz0=*mL!8luno<?!%U>3
zAlF?=hM_e;+Yd2iC9(5q$c*3xA6AW~{$r={w|sfRcr6$+kMXUJfG&9M>{iOj&rLVl
z+Cy7zl3t;4io%_gaf%5yw?pON_Kz*nHID6O@`<HBvBu_Fn;{8W<uc5Z-ufW?HaUx6
z&tysaz`H1JiTXSb?i$$|DLFh;XFVRx-j%QhZHZcExZZlkqJdxSd1!<-G@v+fN*(yI
zhWHo>2@QBkgx~KuTuE#@s}C+RmV+vq>i2+#Sdv_G`V2P{mW2h?4=?WSyX=HKUsfJP
z^O~J_`ksxOCa=fcxbMWL^5cU^$X&-4$RCHAC@)QmqcA5Ee2d*hpSAaG+CvFWg8dIr
zw{?^UxBktD{xh{Z$A-A4kAy@&$A{pF_*MfFhlDr%i;DXEMx}9*w;9~ki&l`#`qUad
z=kLcOZ=}b+Lo0<+mmf|;y@ULQH<ZJ;|E%7uH-<j<S7|IkyRzA!WaESlvD8Po`mDML
z9YJH53iIVR5f@GL!J}Uv3{qV%;44Vd89uOr6@?@oBOf_+9{Cq2s2+>^sfo>lEgzCz
zR{P<UGMv;Vc0biG%%Q$jz|gLt_@){f0YboJT(#?q1s&xwzo!{gzNm}<L9bf!MXt&=
zl|zYqM^^d*t{%^J-~7Fd$JjTOsui<{5S|N(+$JcXIAZhZHq$z(R&gv#GvYS3T5*g5
z+4BAF7tEj1*+-RJDh55{+Tbt<_Kr20EFvc{bt9hs>(*`HpSBJVoqu(yF<TFq)Wz5<
zN~ADDxox~B^5ay0?;*_<oOnh+M5z1t4sRo8lUm`B|0tB0<!<2bj<I>GXDP=iRIOf(
zYWJkC$*>Bdmg%5}(7Y?Hq1#gR*Z%t7$tjHqL<~Oy1MUJXoeo1D=irBu=`OsWI9Au4
z%_2GFWSt%k2a*9rH2+UHG*<XVNR*LPhgFy&`B6azeq{p3B7+0jy`kNH?3&G7;V78Y
z$XDl1+_fS0tu}a9f)SzJ9;<COvmyj{!To_=H6hgq2`6!+!<Q`K7gz|bNro|4%Wx0Z
zw9UBqQSkKS%}C^7#bhYc3EgN<F`qeEV=z1)V7A}wC6!ySj>0c|n#d!K_CexLkB5w<
z@&<1!=n-*DfXcM^v@Y|%Q-t*`C<3pEhkF-Sm;6+pOYPhgi1HD;KNvo+@YWlkq<+R~
z^|g0;RFm!>7BOPk=nC8v@@WjMhq6|L{(R@0Fw_LLm{8MD)tI9xdJLXB)=?WC_T>77
zVD7DB`%V^ON1zMju9mT=--`Jn41#9zf`G7+FvvgvzvUu32Xaq8leSjBJ~<^p_y;-l
zfg^IePAx;|d4|0T?1Bcy<dIEB&Nj9qPnBxT$F~|i3js5hoG(ngjUEv_^O`zUwCAzt
zDkF$J!$0>-c&-^LNTWgn>>R@q6ZTj~(`|mkvI??c@Se&vx^jtbgmL1>vy!I`+Wk0t
zOXHd8^uwLe#prb=?mH#tQ_d~@NVu+b<$UkErPaMP$qbGUaXU<;GBAXTFY3|t?_BMe
zhfDH?n$C3gsiwknT%gze=r0(G6HnEN)E1speUN0VP$yeDUuMAup;<p8(uD5@`>s;{
zeI|%GDjKwX2f9x1&YcV-l5tZhQ@_TFUDV$o+~NO@qmQ1%=$0N?rb5v+nD^N2TQ%0Y
z#30>2;e<u^W<pAg24-8AV4LD9YX5K;F>mDCK#JJZaLky{SdM?&z8nL817o_7@Vvo&
zV1MThy5?A#Xri&eH-gi|m}SoWm-;ho9lKS*vch74L+qn{MU7YsChA|x-?1UMKDT5Q
z*XFn`zH#1JIhohtPD|~!C19k~iDa-wQR-tD#{M>pZ1k%wCZAxw)svX^ndX;{3n$)P
z;<xqkRP%|eX9|+ox-P`2qi?7=vo`T|`w>h!FxX$lcqA5Yiq<LehoMbbi;fR`K<$f(
z(pW3Pk~ayYLzIo-4?E1BbDuND|8&X!g(Q3`QSI(GHt8X^?3uxlAj;AveY!OM{&xK*
zl+<dh7S%NT$De3$*>{s&v=t4dbxcy4+)MP9G`fT6b+*3;7c$@r$}lA*$5Tk1{jZjd
zzZ3^3mRIUyi6w&X?9+M`%3spQ1K0Ojgn8&Yk*@Q(R0W@$;VO{&aOIyL3|yczJQW;q
zKZS=R`9omn1YvfQ_2qOd3=F9LnyD~G3|hBvo4MwqX_%Up`7SK1mtDq|RYR@Tna^gV
zQ)9bAk&J!d;P@MSP*{qF&mvi2kb<x}akx4u)=n)+z=)y$#0ODN$Te(H$lRDCl~bQS
zSy(c*a6N`GQi}OHvJO99OB&Ny<#f{O`#5~tsRA)%CSm#82>1p<NhUFZI^x9k%v^&4
zPP!5kW23I;BEhCZEB>vV>^dPBMdbtt9q*mqJ(-mh0eje$z{)-`bm;DN^I;o$6l!EY
zjwAtk{B;&F7b|vlq!>Lj6P}TzbSTa^kDa1Nh#yrw9UrY6MrN4wDq`es&S8#3l(+Qx
zcL}=^y}fzgDGVvTd`{cq4OSek!c_g{9L5vMOe`HISo~N<ueh}GkLJ@z)-jh>v2Ukg
zkDjI5KuMUg1-3tYiyPj}T%sctu;B_B9*Cv{pNnRbD+))@u||F@E|~hAYI*1DbU*;q
z#TKZ;2VAo^t^eN!%bI|tprQ(`2dKWhFE*-p*k#*{o$lGkhUKL9?^b>jexrrQqm-49
z7v6n`u%g*6f0*HvNIk1w4**wt^Q%2(W`(hO#f@%8loTR{GW1wFkCP|!>-LaY%`C8T
zLoxRePK3IO8#Ja@S;`h1j_HZ8iPdfP_c5snHokZciJf8=^XdE;++~kM1@!vFv<teb
z$Au}2h)nXj$Oe@eyiG#;!Zx^G+)?6`6RbnuQEVlIAEKErTB;ZmPJ|Yl7Yc|7kJ?zB
zxQ}!BOivO0y$!t(jva_^r%`|MAZOj$vC?_H_+OnC)zV^!y4^z`{SJ4ntD(f<b107g
zM%LGFK(Rhv*<{euqr>7WOloIr@Sz$j$ENIZrtLmq;2(nS5#hts$~_&cXuIos;M>pW
zYR==>dmnq)N+PM7WGU1)?sNe|EZjJaEGo_0;QCgH)^sOa*e~kuK2n&0g3n2#J*<8(
zc)a|bg^wdVCz3zkErjjn9+DSFDuzTTmR0vgYOkDSAsrqK!Of!imlxi)5kDQDg4s`e
zv1y=Bii3veUiPn6Vk`PUPwGw2V#=#vpJV$Lo-yC5k;5}z0hT)?hjN_2gWERk^3BtD
zwnDsb#gwp!e|BJ*zcSm_Mrg@LisDuoRQ4BgeB|b(X_FB>Sd^IT<X#ABJDUg1p~buI
zy+0q;o3djF*SihGdkQduQ4le(%}@IYA%3jS9lI0IJYfF_x365ylHYhRYT@N)Y=g4O
zxE=290J3w|9(_&6{;nl2sr6#=#}}8f%ZA_hWQ;By3Ky;_69L;}q6JY-Dt#fjbK6BS
zlM{MPZ}_xzlE1=rBYqk&yV&Gag94&3mmL8*2(rzb_AFdaP2=i*vrWTYD6T<FA;scZ
zI0~%_1>uVODOk8ORqpoQiT-y9?goL~_*k*FVV_Te<}~ZIgo*J@(f_R({@kS)kl?xs
zV^%$`a~&a3d^}dCUU*jqEFe7yQJ;7=58{8Qa0ePqguG4QA<~vt>&5j}ixP1och(R=
zW2Kkj1n$sg-RSq|YN8XShLRP($3<POBsir7$71(ii0+d7rVtgduT;ggI!o+uVu6>)
zgXzL)NPU9PXu&3C^-wGZ!vL}(*vE|m&qP7<r?RWn(oS;2S5P?42BLLO=HSQ64-WV8
z-qRQya=zIfEL2LNwf0{#&ARcbc)wHNq7vY?(OeFT3)cZmmGkP)BgNrCj5Bg6U{gPm
z$=)Y42{$z&u&tl8-D%0`@Y_J+HW1~}{J`2&6Io14Ey#_jUq?ni&>FS7$z_jb>&r4Q
z!Fy$fxCpNUQ|IW|SV(C9&gO?!>KlzYHVEAL^}ynbs_yTa45R;!hJWBSMCvQzp*Nmn
zx;G9rk@!_aFFynF?H;b_SemaZU<Qa;wN4%Z&^`PB*qJ+Cqs{I8E5s%@+|<DaK@|%(
z(rEb(2B?3en84jYrR;od@%i74HfCQ4R%5dhC^@y6XNMUKm!8PAlT)D&evkSwJP`~v
z`34phks62ZVEG(y2`=?9H1ePr*V_bGDXRhe7T+mo;$)u|s5Q^K)kh{>s;D6{UXi^a
zIO4FNj<okaOa{pL$%5v<nK?S2viE0go;yJpjTWmr{`#fv>ftL7eEq)O5s>CVns@Z{
zro4?^+1c65CL=gp?zd$o<$#eUB)}kyr&qH`P~HU$DK`MZsNC=sJ(trV#xF|wA^Gs(
zzgH=VAe;wV&1i4H2BX9fcfu}L@6LYbhynC!D;-M4>6_iM7E;f7f{)K8oZ7#)d9*Xq
zA=OM$H<tX4>=3Ss5RLOrHKHWj2yW>R6%2UGyC_g`af|zJ`D-n&x?4S3toQ*c$^Y;N
zeBdw$!GYzHb0phhyafJ*sV{q}H*FZI4661N2r^W^*v>3c%7<sSUc&HtzEP9i01T&v
zR8%k~6zNx5UD1(=`4NE>iKtK2TCTLTw5Y_yX9)}lf4UhdNZ+Yn+n)fEL%$lLCHDeE
z!JLwKi5(+gVvJE7yrWaBB@R>UE+<wTY}I3iYCe?98b^zC_h!7>47S#6a#Kt)MiU@2
zI}EAq*{1;2T^FT-roa(=B3VP<-=!2N1eALS{$ve`>QJ><EeNI7&db>>wY}=AtMCkJ
z7I0>(m1Suc=U?`x^24NeMu4zqco<{?A0V1FIKlaJ1rD$m^#@|5x#F5qza@xk$=Ls=
zR*se?v3FUH!>n9!h;?Q5&7ty{>x@@KV-qVS9x4QtvyT1QZvW}#{+<eN{=CEe;AJC#
ze((pi3lBC*r!{3|W%Vi3<Z;{w1FZJ^0WR-iw(*i>fy3Sy@`Ds0<l_`=+v<;^*Fct#
zk7HA-(Dnr+7ah9Z&pQ*5-@~`JjqbOi1ky0O0nVh}cm`kNtnV?22tFhb%d2zTl&fl^
z+nN5bG(TwTaeL_jEJ|R~M#Cs?gA}s-ld!U2?!~_M%eQ{SpmW7<3Iz*1+mRljK9go?
zp^zopMhfwlL+Ka}9KcxO4XJ`A{}Fsw$kD%f-36+i<=E!NNL!;s$9e{iGz)kpNykvK
za3dm+@XXYD(T8t<?$(yEQn43QuGJt65JQ9tr-kM#bOJ5E2NQd#(_%@0g6A4+byHh`
z^!0n-88!f<$bV)=MX-Uy?tEQ{My-MXkcUC1u$z-@Uv#5DqYyFNmJW`M1p!IZn#<W=
z&RVbJj<^A#1pp5_hKGcp4!y;*G%z5mwJ_fLDjL2Qul5;jj+d$JIwlEw)VIuoX2J!q
zAVmUfYJIXrg1a8B_Od-5F6}Nh#asZ#e*Kvu#hfu9KZJyo6#Q_$5(S{{z1_-gvoBFC
zH6g^NTuHA~`ufZHJ-^u4i_5QT^K4kHA0FK6Q5ug1N8B1*lXVW%jz@Dy!omZw6tAaF
zlHkQ#oO|=jHWJCyT&%+5Ud~lGIHvl~x~=2`CyXjEBHdltycv^Ze8EbhdMl0g<@U|p
zSIL)laavqJD$Mi4fISPkU1~=l99qP+(mB{5>Wo^9L(1kmttNPbbbi$YM2YRR<2n;=
z`s!b`+lDrrLw^6WXTgPlvj*HAS9+8}om&|;F(r-~+Fyu9VI~xL$#nM>FXFpCUR0)G
zKW-tXgny^LU?iW$rAWgwSE-8vz`wN#*_Nn4fD5&#K%68zbd3*yuJc>1G@{K_=vc<9
zQh%(TE>&Yn;&o+hJ?}!8BU6e~zz5_K{s4fabwz6rM5?IQSztz!`QOe|qx`>oUBD`}
zf#&tv;=LRo7Y1iF9>Qn@tg{1pWzEcJqMcA82(irqrX*K0TCP-I*Q|wxRBYF4EH%b+
zi_%CmgT`4;Z?EDH`TOhtcmc5f6vR?2OB=*SM_~3fpca(3Q7pW7W0EX=Yi_{4LM}OB
zH-ud!jL_8xuAoB<g|ZcFuSRfA`5kvoRu_4>&gkUIsEs|HD!7_(|K_JyFQx5!>D`7O
z1arIC?ix1_Jwg++44rkj!z3E%<K|TBwBS#tS7^=elry9rQ0BZvXM?y3K{4G=L?x_p
ztl*Wf+8%|*h9YJz3<DGLx^4M#x=lhM7@sjI3{U-8YiPgwrF|)meHgl!Yes<S|D3*Z
zkeU`=yI}EMDbj5=J4)^H+c*z#VNdtpv%`0jM4_v_CDbr<C72jZZzr5shk=OBjMkLE
zQ<;S(5z1~uh=$_hq(Noc1Ay3y=_u(%Th;)aeWBSBRWTwBkdcWA{4kCa%qtx>IGb=D
zy-+q4S~`IN?Gs_%y&Y$CJc35;>tvFXmG!L`5ZOc`Hk2b4W|ac0hfxV>77;&z=WBGJ
z$^f^RDbg>OI9)E1t)a?x6}TE<o2vovk0gH2W}4g|Gg#tlBb@CQ8Xv}_d4E4$O%y9r
zlSpSh2?J;625_MM_tuhwlL_&qc48grZ@64)4EY%5hdhKE=L)4(SD4E*X-wtwh6iDW
zWnX`-hxB|DXjCGn`>L~k=Rl(9FR#wB-Z$B2exCbgp0B(!63{sThD18QK|S(;rhf?c
zy`Y@CC>U{>3)UI8Jxb295_-_Qzn@>(5N_?hC!tOXpAV~e4EaN`a$&_~5jvI$1RX>o
zRjrq{DXbLpjFt7w{1HLkH2(R?EXPM7eUdk`FrK|1gc*>U{T{9(KR%rO%<O4aR4qKw
zHKZRk{hv+d`%%!c`UC3TpaVQ2L?j*<7(j#xv$SkKLp(XT1mv<>@$VZ2Nwrg-0Rs8l
zoOwI0($acP;QOezw|i2gwCrZyDc!*+^coLD=HAIbs!j)xJyb?34Ub<lQH=sfEF4Rc
zNEZRM?GlYi+%JGv8f-F}vIzWK({kF3V0Qrx@ZAfIzUFdXjoNw~A_Lek?Jo*7djfzp
zLPH3(KUf~);dW$)S2o@X%{cM4ARFsqvW*lcp3`6H`}#()^oBpK^v0d}Mb$g4AbuV=
zst9l7LyKAE%WEF<=9qdS;|IO2pSmAteB(ef=|?v_>V5?rWuBk1&L`<1fM7H?n&uCl
z^2OnqTf|$YC&WX5$4c5|Ja0^)+YmQUBT+-W>t939Wef1aoq9*NvXEsNEthCnMY-V>
z+=SLH5XMdjekW9$rKJyvbK1y6(>Us|!6W&bv2H^$^MyofD%0tuBc%V|z^+%s=Pr$w
zXly0S9cls8$0x_IgAY|3_Z7IfH<u=Uux(Sxz~u!T0C8IpIXj&Alb*JnR<L%tcq
z(`sEJY`+b^waSh-lbjTu<2(%liY$bA<66`3Pb3ZIc?{W}x_<ut<z(ervG}gz{sx$N
zs<9X<Lu>2+5oz)*n(OJv{OSd8fg!&NUV$8X@WV{V*C~A#7wme!5XWsSp9yn7kfr1O
z1@u7IvCyp;(E1GJkyfd)3QI72Fx0I`+X8JbH-d3PVlp;)QC4>#UP6ULIQ_z1BST#4
ztQ*c$WEU~-Pu*%1QkW(YvIM$~)H;H4KO>K#?ps_)%kRHEkSzjnvg0NF(774}sykkU
zoJsM~AxZbOzcX^6`{}(@83`d{;!rY=OZiv|hy1=r1Yd!SVyHaaLE7I02(Z+aNRgNH
zswJX#+}0dwOqFXCj2b{~G6oyn8*ViAi?{I^HQI03H(b-!txBSPT}yrqQZ6qX&D90N
zcZfDZLPy;g_5c-9!(EM`r@o@*O$6Ii+W7xf>AoE#>(WEXYG~#vXa*59UG+@|o1|HF
z24{}?g)TwcnQhU-3^it?y%0_3WIN$CKf_tcmBu`-(VBF|k&L+6yq;WkW_Deke>vR)
zBp7`_ju8!zOd0IR$jAafkgLH91I;|y&n91v7r+5lf)ky%fXU!gA)p(VGhSg-XrZ4j
z5!un)Vk&zmd87!O1DN^&2zBHVk%IAbsx!TE)BB-sKCYz;eiAObwhzV_X7=5rT%Y&4
zr@TjqC!8DKvuBx@BN3@RMZiH%HC;{ND%(N%c%8AgsYEpI?`opZHyM-(G)0_aG*QHe
zgy-cilZ=H~?hPOG%IZUVG)`8YoO7}a6i_`PrLqt2X=@oj`emOX`${d$2|cMdmsRln
z0kAiI(r6JGJ41uBCu2Z5$za|0hr3iQ;&?rw?g!~+{Ote2p^u-e4;HqgM`HcUtIQid
zDtFe}q1gy-{Vw6xJ@-pO4UJ8;dS8z5p=`OLB7ZX*ei%x~Q|eGL(VVWIjod*q&{RIW
z&Ehk0Y|aW;pELV~?H8Kdde*U-%3RQkrX|!{L6b}VM$BGB0$#~Z1J(i)H?8h$7|QCq
zb=I-a)_kVZ=FmCu-x)D11_b3n9f<<YB`<uG4->8F(0Xm$Sy3RThz)HttU9*0O02<7
zIDAlxq|yKopY`_kc#(W1(R#HRbIt9DG4(J{J3HcF91sN8S<VN&>LeRXID}nlynlPS
z99Q?Rzp5788vtAV6}rueDZygBzCr{^+U=2Y+`&<RHBzEhS)oFEYp6e_hWiBkg)AVe
z?;H@JL9pPk7z8wOoYE-0({2O<&P%lvkgGPJAun*}OpI^*(|ND^fxY8cOSKM?427jL
zamzK>nVsomMo8!sqs)w?4{p%DIz-rLPxv-;tgu%)-Tf$xb87s3APs5UdJU0#)nF#y
zrjSx)#Ht5oQcdn_us3bp+E?EXew6CImU;N`t?FRu1jxTT-%q}wX%QVH7JzNWL9Bu<
zH*X%%fb&*<Ir^p`$t0v5`GzQo2*s2Zr95Ic;AlNq2KRFYAxa@_l>ZqjGY+DIJ$tBo
zjZS0K$X<oG#7-=gEZ$evB9b`at3!$Al5S%eK~g<fxTyCtA<TzerRtb$IWp=4>*FiD
z(zs%hxdaE)RaI$<UhuhT@5<idSk`e!?Zp~aBh~D0{trtK)d$K$lADe2VGLHVk2k-I
z9H_{<4B=|e5H-4^V}cmW28@$DNi6Y}U<!bqS+JsQU^J0uB7@<+$5(+a8dQ)aq=@q#
zu;<T`#gu&R%BW=hsEz117)#w9E6-0`16bTc08L3?wP8OUnOqc*1{nz4N*@^aAb=I*
zk?GUU-}GLenKyJe!gQ}p@F*D|O%m?1&vsun(OYA(Nj0(1%W~{bjR5laY7@#NiO?8x
zxUTXf&uXo`GjS1i_|NGj09UeCLwaKyr~oMGqRNO(cFkOf_UsUi%R_Nv(ti-t%ZK}-
z2n+W(PvZ<Bpaf>z??0l18U$VLzVm0V9nkoiz<mwvwY(As&mBhtUh8N671AXL+7}BE
z<8wAedJC*stDfr{Otxj_DG|eQg%9`K>6<yo*eu|U%%nMJYueA(bswO7dv6&dZFriK
z<5K0xj1gaQb79TCP{SXDfS;*u+{y%75AMZ2DlX3!QWB#FdOh{)3LHJ*aHFLEodBf)
zU6CBM9#@MftQq5%!~wxZWvX>(a6sw3Db9yANh0_-77a21k}icib4bYmIu*Bwd#Bk6
zr^vejfDjuv8~(|&oLBQGi#S~~8_<^s<Hmhc%wmMDO?el`YrA_S*FVmCh1mMAoBC-I
z=wA>JvJLy6UYr}9<kEmUiKGg4;s?DOVQZ#!jnh`(<9t$KWdNI^{U8d8dhPFe+I;-!
zOjIOKg|g7LoSul1dNLjt#C8IP+R+*QX~}bD8=ouN@~N`YB&Kc=DcqqLrM(i64d0NR
zDz%905jVAmpP-yvBpeLc59&?lC?<u@%^3q}K0TnYYUP#w{J{bZAY#KnxWT%ODbyv)
zO>y%-CKdXjl_agy*4$vOW?*g1$#BDCn<hz15Y~$UX!JTbG9d{1FXH`gA|{ABp?QoP
z^7VoOBF1O#d1wfiiRrz@*zrmey4TBtY4Ed29Mxbi4z1~zlcoIPG*U|V<|l<7>&QeV
z-5{V|TG*bH1cFu5PrnI21G$7aFE>o3*m%LeD3dovJyvm<bP${X>JxZ4E*rFXB*i%0
zYo!G12ywlgaCx23K2n$QW>aC5NWkF@K`pVvPqCIR1(bG`se{;DEMoPy@4e+c-Ekis
z98o?Y#q-757IectvPO*?H=={qqd(l(Av2zb1?=|eILvi0T6JG)BC2H7pX=Y3yX;Gv
zJ9RYin<eNM@<dHj%Ig8+MhS)LDMCj5Ab><al#8MQR81<GOSBJ*=dd2>K1Q%^+G;;O
z!}9wE{JzkkJo{*_RI479L`9D~gg(cbgax14N}ew{=f<GRDawkIZritiq^GiV)j!TN
zd;T9<0Jx}68TVoio6ozVOnEVP)!A|C-fk3i2+bkfC)(~#e2AOFSxC_I%_!&34j>^V
zdl9N>giJc|z2#iU=;2A=!0I9(HU>N#z@ZUwNQ~@RAOQg#wKnLRYJ_JNj}aOOXz;Q_
zvO&F`konkv3X{+68r4uljVx>ah!($S6_fr`$g{26t)n_VI;+@of9iwV2Pz<sy51@|
z=?VxsY_kqJ`ZNB4PX}whz>7i%?i<tvYszziHO`NoY+O+nVc_yRB?j3|bO2;tdGw$L
zk5wTqCUO~=>c#|(?^ZZZ*vk#jR{ji1AZA+{B2&2sh#&P>Jd%2Zp2%5z;U9z`q$9bV
zt!KVAzT=W*$nL7$vBT-FhFLA}bfkwgxFL-W-10&@noX)l{9&(@`CU4?<N9D)GFyBy
zx61KgN~Y{QSt4G&Q3XAEJFTF3A~eq_o2mXDE96Yn5kp9F3(a3)ZGBnp^c#pK)5u}i
zp)|&l&4Uf09Ev-E;OF(7w8-B7oXH9#0K`R$%3q;90tND}UicE)?!)CG?V}w}$m=q(
zLv>*^8k3X+JDU2zt*}_7I7Wd8k&p-D1yJ;bK3gIa@=(Ol-#Zd8Xx0wsn*phhegLN$
z5P=i+eCEh2%H~tUcq1w*3U$2PfbtngBz+yj)mqLYd#6bmqyei7Lfg#V?0X|?YjS0S
zAsb#qmEDGLNO*X_TAMrM%_smh_;ygN%gbA5gA7Q#aPEW<uw@xtw{vuM>^(0s59tFR
z>7X{gZ_l#GYZR+shq2S?i0k|1$hvSsQyKIbGyI+@=$QQq)sMu}2q_lX{C1fBgV_Y6
zX9r<JqUQ&tMwag*a(?)8thj#6p7q)|9wGf8EZq7ud0z>Z6Gxl5=09ha9$XfsjD|+A
zEs7DE|4*$zV?=W`n&_^eDaX95y!1GF9zq^~%OIV+68iEypUHppl*%5J@%OO3B+qhT
zTpurZ`e^TJkE*vLW3RBw4<F`}up_fpnbvif<5>6IVz$JCS|))3BR#O6$Q@BGjSGBm
za8UTG@DN7tQXqqFZP%+}xEEVDK;ZWix@N5zIM6+RM=J$n%df5P)YMc!4iy7~3{9}G
zovF!V)t3{^M}|}fwH+Yw2|RQRqRHetO2*i{>hGEQ(n~;y{5n=T3GVm<ZTP+=^}!;4
zR=BvF8)e^r@Oo=zgKRSMrafQ?2j7}iQ)1Vr_u9rD39lXJDg51(SyYfLp68NB^t$E&
zPaI{U+R=%zf0x(`52d#iB76UHb7@ZRgdJtnABe|y8S0}%7+PI@7i>@@#z%?V?MYgb
z+%8RmVs*)B7ET2K+o5&71U>xEN<?*eoq<ByCh+D8l4d!z*MRz9D&93QktgaJA?4DT
zT;c<zbzPSG-6?V?u2UP(;%7apFV$NkXolj}S=kq~CEMCZMm-t;Lc@00dw*gtm67Yd
z;8s&(ldjyNq^84gyL6qXmxhtRO0S+v`cJqk-c%dCU%WNP5t4B5kz}cnd>3647?I0$
zB>f-mdmg&EcBy9qxABnLKZf_Q9)<4BxTyQGVp1XdvREv_v7Cp<1)-U1Fn6ZdcBZ#b
znw^yCmJ(3EHn*Noj?G#zw2+wZ@>EEBrB4;p3wT2f<da)-?W}`B@powAxQz5gh6s2J
zQb`V!Ai;|SJm%g&m}dNG+<f4+<$V}T7A&Z|<roxsp*%W4K|h*aBEE6x>~uhBqgA~t
zC<@qr+4#RV=OUzMz+O9@eJQFE-{E5!dZTbr%~V<ZY(~lIvI15V8Jav>17$klbMc$1
zSCDIHk(c>f3X;wqW!KJnI2(%UZwDomts9+)3XWtJ;DalwM#{V_EfvJ_^WyS&xt;__
z#dt#pi?GgVndsidQhZEjh9#Q7CjrM~wcSvU+A>Ssy3j(<d|9t9E{XJ@C3I7R^N*Sc
zTslko{UL#~k9t|w1mQZ5o3;k%J7LEREkEKs`S!~^K^XKgQC;(G)4ePvC%<9aEyMU}
z9#8km9L%akjYgDUI8fs`h-mytMr#sq#Omg5hd6|Sw?jiQlXGuEzovO^CfSb2T7R+t
zzxO{QY$%9VU@KZHe90Fqc26M>$_mT((#@bWnM`af;;#-%&>od$u&{mtWW+R99Z~8!
z_AvlKJ(521AsB)7{ByoZ73gK9j(Wo0(CRE|Zd}HNi59`~h5GkW<Xh#0E|o`Zf-6vM
zsCfiDo?D{?n~WB6aCO!*&>Sl>iC}v2=JjlT-`DKQ8|i%73%(1iqT9)K6~{`OsX<Ob
zMM6<}_CHVvj>J^lLDxod#H15a9)x8Igccz)aV}(ibqR}m2LY11#T$-bE59+e3=AnZ
znz$4rfP<v_EUKy40r(=8ad^a4Zo^rHW+5(^6j*UI-EPny-oIqV02rDl53lgwPKieD
z6-2Y2+Wm1!zK$@I{M0wFBjW>m9|xj(4i1cN;vS=}ka1}rL|v(<ln)Pt>HOO;=cfv;
z*f4k6)T#_3;D2_F!(a=<`y3UZAPIAq@d0e>%F?9|#1+hnafM`WNlF-a%Y`nAkn79}
zBB7#%8*ybFx&UkDasNQ^blaL<*ZBZE4iptQn^9$41rfe!krOtZ!FAIT4oFtqmz^!{
zQ|<c(C#I&IYqi0abpOwzAd<a9mN1PmvR<Y?!|DGBi+sT-fEEF)wFsPtGE->`ipDSl
z>u*UG2L|r#4WhT$Sfc_wLy}WLeZiIzjwEj+z6F8wt7Z&*S%lG4^7s}xB8N+N>_=6P
z9$}$^1v#~~S1q*s$6vQ8BUFjJH2;_P^;?{~?cP!Ba~9YXys5OYY8XsIii6`Hl<Ffk
zWIprP$4C51vn{f2q4RP*Jj-<Qf=h?_2Lz@-Eu<);BbX+jf{zj>QL<y5Ccrh4WjR&y
zJbc|M!F~6C7z=#7&luMmH7wJ(*+2qf)I?SuRu0M{h2;iIhAq5SpNi2h3&z+FwD0Q2
zkiL<aKauReO*NhTz=21X8&;>LXv{{d<A)~34}1Cwdu38a$0ie5HiLSM1{))7zuvP2
zva{nD<MPq%64F`|=eM%*&?)wD@>ql|T`$62bpX%$A5^FV3ZR@5O#z3;lP}mqbAvZt
z;w?SA1IIl;fXHseW!CdyLmM;whps8m0A6PfZDXQhV(}$spPsR)8YYkO+qle+S}(fq
z;*41CbXan3e1(0FL?XhYg5=!bg0`&irE7QTIw@BMcfEsI*Bu<46HY`#cCxoc^rrG8
zH{Hp;$^!NqZohYxBf^q>r2o!23sD|e^9IRDo3qZfHB_2m6HOfR1fB6I<6Gn;_b|0;
zrb(^_c&-CXymAg}pDl~v|Ev&TgjXSBm3S8kYWcKNeC-tRdU}ciXxan+IVHsL+(=>t
zbW|@UIO8zxXoRQE`!3`Ejz$GH&?b)b@Lr}G9;&0H!m?7M2~llajwNNIf`2kck^TMi
zkl;!M+CUBn>(%+vxCkB}RCDxYQ!|b>mFgOe@rt*&u(z=zhVO08;BstVj1ziI<TI1>
z9RBAryp}{vHsk<q|NjSa_Z52uR-O1DT{+6q;I>PL9sYvd1`sa*xVXLPiFAH&CUML}
z$a9kXiNJk);coaZ_du6Kf%40qQf*dKimQjRU$v59ZuMchP}a0oFx9sd=+)G(<h$?<
zrj6ec4wR(o4-OS$dksQMcqq>qOrCDOkGXAd{oC8%uQN)xBxzh!^hwS0zb#Cp&s^J3
z0vot<va9glceBm}6%$m)t<NgSIMJd8L1*4$%IbE)?J7E^eaz&rZvO{b3-trjm`M80
zw!Et?kP&BZq|<z`vNn>YwbzlB9_f}$P^CU1csdNSc}7x6us5>i8B`+tV*Ck1EdYQO
z|N9mA*uJGfN?<caedfdAeAuO2S8S35-jR);0n6Vr?3@YW<!SrVlr&=$*@-VC_DSZO
ztkj?=Kkqho7noRY9dIJX*HxW$4yTeDFMJl||4t^vj6&xFHwpF4lS9qo3>ZTJ>Y?lu
zE}M`oFKIHXTFQ1=&*5|o<P>3^@LZ<AQ5^UAx4&Nzgg=;;19YDgVw}c(GicWa>e(A-
z(72DEDZ5{<4Tbqrw+kjY%x~Zv!u{Q9!)rmct_GFic1Zg=HPYLk$o6_~W9olX_<%95
z-gZ(5%paKTH$`2pCneH1-pxMHi`Vk|fMw9kqPkrEd3q!WSE)pEb<uDCZu|e-UsZ1>
zG|%g?ehHGn_A5Z<R)ZNXhByrlo_W48^Dj-{1uu7xUrmXBa@s(HA_2#Je#-}hPrs^B
z!P3%?Xp?2Am{Uh90-uUzGIy1=4R7j+OH4cYz(dI<Vzq{*wZta*8~#Jc?jVH7g%NI=
zCnudD)Q~s6|30Q{!0>NoDkk=)E&E=y4{FJH{Xn<1NV<t?WyXj1zXV-9-T`=YY0WU6
z_5hR=oG%cdC#0klBk|oym71O&0qA+(0HxVYku25^*$&K~%SvzLG1Ua#5YslGGD8^{
zIzev5pyzDH5F;mXUx@xalmt<WjC_iXyVq&^{w>tMD?|aPdsfRe^cBqF=RXQQo--mU
z@UI2svHZw!4baFX9|P2^GXL!SNLg8@PV}2g(BHm=sNJ_^$LcYgJjsU2%cB1g$i1<7
zSjgNzV$J>bys)r7$3lP&)>#V2hj<7QkZ3(o><nyu3J+h{<)R<^W6T)G>svjkfa`sJ
zI|hDlZNf`1iSvz}AMbSbXkAD$XsHJ3aeB1M=#=&2boA^9C>JG(3+lA@Zj=ZIe{0%U
z!l5b(cX%Vs#2-B6Kt9t6+zrk}^Y7zCO9)<Qc4W$WNZ&(%Gg?CF;;89IWb<%{K8~*6
z+26NS;q82p3V7X)L&<NOFjk5y(LPE$xIcIBTt6LF8T5q?m5O?Kv{|U+D%s!UPQEvd
z7lt^&K4F!r!6=m>#Kvv$6<Xo}MAB!uS%Mqnz`o!Ij$70P9EwiYbmRnF%6&6|Di(iI
z4<$s8{KR$FFkG+U>JGB&V@-Z-n3k&WTl!JX?fL$qk2F^{>^)m3$@2`X1Y_Uy(DaC*
zA&&(EWgk60=>3m~-Pea|T{?kHp8SCSORZ#+u>RY=VX~by;>%cR_gTlzk5OLjK^G9>
z$S=~sruK3?c^!?Kj%Ne{Ys8mg<1(FYdwaXXw*46zKmdXPYYd*aQc<x>fJ#sWjAM&*
zTI!=4n3#^Q0QJ5g2*k<`NCXdOij|8--KW2}x$zvYwP}sC3;9E*w6<zw-4#S)g?F{i
zt({NeDJ$+ZImo+pHIQ80<`Hjhjy)TVM}0<t))uo^Yi;sGYy5CKG9ts}anJn9`vBrG
z-@Iy_%!c{9r>j4=d+D^Q&6I0l@7F0s3(mW|%75FvJkg37+cN4a=~LF3<Za%^3kUf^
zA={1C7<*1eZ+!YgcUePkj6;@B9E6~)C3})CjaMM#jnMk)$@!irp=ijt?E@5Dwx0>>
zZiLE?CDqlDKVA0wv1VG`4mwrHb^viCK35Lxo7IUYo{;o@C_~r}>xlhVHwgJKn%s*6
zx7m-pv~1N08=USd1016GH?HpaNLV}g6}dfF;a4nPsg>*q_dMV`pB18F8BJ%{rI=R%
z+5%vNTC0u<s48jF0B!Y9A`=QW)8_yn>>eL*5257H$Yj=sprD`#=^X>Q^-jR!iuP8j
z_|xd^hUn7OHbB@20%Wm>O3KRFfa?AKPj@hJf4DRb2?_B5zCMw}cX?rLgg|Rah2_g>
zwLlWtAYH4x-AB*EquF?s!x06j*s%=J(b3CqsO8dNx5Yz$IbbzApCL1~o<krK@h;G9
z$}NXhbpfP_1ZIOC*y=NBAn{nNy|$>$VwSE*v#tcSpSUR}vsA58_{$fLSGx{#=vNj`
z=X}byM7sv$2>(q$WoVtE&?gc0Gtj=g*k1mA0kHr_ex;3ibw_K$)qM)(Mr>#bTh{{(
zzyu)2PO0Be>ujlgb`_&h`0RE-d9gIRVc7q{`BX)ex7p-BR&}j5-QC?A07x3lVZGD=
zSk^IpUY--bdYcg*Uy{aU5Akvl<qc~*q~pB$hFU%YN=b9lJ%qaArKp_EbgUzqRHP?E
z8hH6Wz<uC5Xyk{&fA6#UL`wWb@@2(TJm~BbvTa@Z{a?1u{aau87?Sut{~@f{xyAx7
z@C(fcGrrgWTbHofq2sH|Bl1e^@+36br#Dqi{DL{2?x$(F*vnbFLKD#aIA2yF++QVG
zvo~u5V5+;6Jp2pyau_PDZa7L?UzRUP5OT_fqptw)PpP5X{O{HJpJH$V6Kw#;T|aMD
z%h?ci2n5Rj;k@p+|5>3Qy`msQ3M`o{IEQ+oP@WhU+hOPSWO=y#=`yTNM0*6_e^oGS
z`BgmuHbq0g^Tl77yC_CVVZW+;qwRfv(p6HtA}+YtAB|O_X*4$P$3C156ctquYBhoK
z!=;FDvB#}16{|_3hy~WD#uwq}w6}qsI2eY_RF5Vk7J~B|pbk-90!Y0)Hy4+>cyClh
z8puxy8W-~;>pMhxjhYr~dd!d=%gQ#Y+}UqWfQjI+nK?yhh+5_6Hlvh8&kixO<U`lD
zm!%>njG6&!e8<B2x|Z?}3rK|%^68_?vF<U@@x(v7Qk=b>Cp;AFH?F;ecTyomO0EDB
z#{e)WjGkQT&0dMEwq0#jZ8e$r)wdMcYw#_RL4)k~D@@KS<-+z!<$3^^A;y`GrckDi
znpnDD^9Rg*0a;m<y_c8dYUyJacWckN!&Moe+1&JmjjsSHC`7jclNrk&p280!_BTx!
zu*T!Pg!|Z-i80|7BStF@eSQb^zRXM+A_wHm=2H|JU-NBm^DX;|(jK6;1f5O!Ubu_G
zE(a|hoDE672qRuA>=jRE+_HQjS^4^I^0s){jymGq)Gx!fZmx{{g*}gO7#3TpC?Y-2
z{kOxnQTT{xhqtedT$wY1PH|h#SRwnaZI7oj&rZHT567yB@lUKwpf_H}e=Vq~mnDAU
z;70ku-9v+WEGQQ2<L7QD#sOKEsCn`*z443h33kL#3<~7OLt*xiUW*b&(^gUqQKwg*
zZS@8l!WDpeI7hTU=->uw(`v9`wp$l;xf<mpWshq`&iP8fFC^gk%TXN|w7RhV)PzF^
zxF13J(uq8cEPx?oid}91!jDfEFVBx6fHB%18#G?+q&Ijd8qJO91Bby39s`?Vgl5Or
z7|*~|Oq4@|!0~uNQT?%KCyrg?4d2a-GAfpMkayEx>%PsLx++w+^G=xZia3nV7Z(>k
zVO<s%2J+aub+<wC)$%wWuD3HBO+9+KZ9!oYSf-kH=xr~xr_?o$LOW?L){Eo)vDMxH
z^J=0}SAayCnKHUm1o)LQh;rEN+OZ5U9jbBB&7Nn{xTEo13+Aw0Lv+|30k4^IppLFX
zZCg8N3s`BS^<81w#C9(2&4|&}j54FVlI{8_S#G@eHX=%TL>g!9@@{I#Zx84u1@+!e
z1a`D>eb<d!M6yL`{E5|Coann^hs_lp5i}$uevh-7E_;>7qSAFOVwuc2%`o9Kq!c2g
zX3j(`6<mK`t=X>SnUr5knO(}^KQ=d2ZAEk4Q5ltaM6vvogp(DFmx{lopQx1ikAD4c
zcGN4eek&z@adRLkZ<Hf@IyjP{VzB5#0raoH<SqE!4d6wXEsnHwILMLp9*Ny@4)N6?
zZW)4?99wjoHAEYx*#DXTy%g@r!$gyVX|;18phoi;BIbX-R^Ip(ep^e*X@P+K4yqQU
z7-nA3V6{lLLN-ZO`H3WO6tIyf7h}1n|MGCTGw0yscWG{~2A5G^K0~gir1`j7YrTwR
zb`}Xxg=ljqK@4S<CybdfNwtM{d8I5^cdczrnIDs-q~ssh4;BFC0937aSW9PYwGCkK
z9ZA3qVP;+c$orKKZazw_KS9ufUh7gb>q1(oc>#*N{czCp>0tSSx^&n1r(p$_s{|_M
zu|IvuJ*}_$^*YUlSAGrd4}}6jn-12Rti{<dX7RzRYc2pR&k-AD2&ebAL%~x<ZViM_
zpm1JBgC`>#iI-(|JY~I@$SF#RgIe6H==w<DupK~y>gCP2z@H&tG|fS0c|Msju2}rp
z+R?VkOQ<3dQb+N$P~C^@x6Y1prV|I)L%_dI<SW<-clYBM)D-5;S(6!~%i~n|*rpzx
z!OD@e|7LX@EU@9~j7DLkp!|P3l^jRqya(k@nysda|9^bF1yEew(ls0;0RjXKZb5^4
z2<{d%xI=;r?(PyGxXYj+xVyUtcXtTxE`xlB<azGBZ`EIa)hTLfYN&J0-o3k5cdxy6
zp;lk!1DWw!Y>|nv7lPZS2*7QETEasZz@G2@3E=b&4e|WrJqfwJ35@%_+@q<pi%A~p
z2;&j7*Fcn~lTV$<l`HJ-;=^N;owpPFGZS`>O3rg5D`0n<Z8z6q#AG>}VUAX}@hLTT
z!{w5rT%g@zCfhzyKCvvd!+<1_FA4eRf?w6sV70=xGhba;)?qhodA!`*t;06lALpJl
z19%0vEa`un!PyDkF7Qp>uR}K!6FNssU)!m4th}6y5&a<arhtF=3r`pptGX-Ug*>g~
zq=!j%p}2q|!Ay5P?W(tj%c6!vFR`d37tBRg1^@UOHjkBL$)Hk&lT29PO8K$HTiOs0
z9lH?w(E-xVS8)VX|7T}Ua8#Q`D`EF6wNp226Nxv9H|IA}ua1EX@!=ETn6AdU*GeUu
zD!YJjo@_o<I#F6HV9pSG@y_Th=90Ys9=wQ-h9L(lhlwFA&j}kEoOI95jOS-d4=vOb
zZOrByWc&U{a+mYU>sMBh_@bg{an{7pZbQQtPuH%w33GGHOLI%$B~H)Fx%q;U5?AHR
z!`-wr*UN%5*M5axdaZ8(VbTPE?>M2b^>K5iUH1T&?3s|PXO&n`K<8xH8GwBD2ssY1
z^wj}RsGm1F9b+9xkpaZpns^h}vp%X#fL6{(`%{IL3zg9F%V)(o!;XUQN<D?aq?e8b
zIWr9<?!!R$;I+qu8I$Sc`}{pEf1%HmlNYS&;Ms<&Mwa?&$<p2J{2Ytk7REC`7^feY
zEvkmW42I%>2RFKxrZNlob1D`gF%EKdTD+yx?6VO$=0R;I=Z;#2`Tn}X^&)B2?6y~T
zjm(KNq&q0G+V}f9D<3e)rlp|C1)xtUnM`6Y+Re+7AG%pcrX7Yf-Zw~J>zdcchz+pQ
z&;N?niket0d;nxH`x=7Yr7vvKehUD0CYT|AR-1q=vl@)<0mq|nO`YmzW5gRmUKvVH
zUKFfFA)<OMMSuClNs)73?%VW{oczp5o1j4|3b-L~AxO6^TLrAhAh<`3=b0!CXN`5s
zo|UZED!f!ZL;=61NQ}gU1(cS_u(j=@471MwZtnmBRf_zyC84o{Z8C>dJaERlniSZC
zXbGPZFW0YzOHXUA!iYu~E*|KNYf{R-6>Pm7vg(gzon6(!V$?A|wD}y55H<hn4P*M=
zX~Wr=$7j+dKmgH#LfL_#LYl3&`c_(nvbN-*MB36LYbNK=bj}gbrPFW|*gf+rfS+>H
zxR$FZRkZ+4l+&(bnUoCB=*nlAk<7qPYpttObLZ1_M&BT@-pwrJCaFWKBxBLmnaM-5
zo90(QeP6#1#=qO)^R|C}@kkcfBOHpuA63q;2g1VY%%aRAoO<=T&z#=NGa!d{pXu9e
z#+mN>X}p|fl8u=v`gv2(sr0k+6VdxAU8YahsV-C4vFE3myUIh(KLq00`r^6SfzuL9
zLWEwjZueR8=8!?5ll*(-c|j`;Px9MMt8qVn2-h5s8|O8)2Qw3PO;L3H1O%I%Ql-=R
zmhkkZhI;sKf5+s#(sanQ9nxGvx<7G5BKRo(hTx+fy<rZdt?6=Fa~$Agj`1>dXj>Qr
zCdgenYu|oje23jvP+%A3QpjC$2&!PIcQ|x1wB09bX0WiDugV8@l`Jcm7qBQL5GaPk
z%{Lq^H4caqdng&&C{GeOE!wP~Dm^r04q#7rt(WwFGP7a%3<6FJJ?g`GUG-t1{YBc*
z=aXdf2*}x&`u&LEfy2*QAte>?2$3%k$O(KF7@~S~YVWB~V}qP{-=$CL!*J9|@1o0i
zK-HSJMP!`DZFbjNjxHC~0-AW+%7dI`XTp+pI-UZSqjp~XG0+EYm5`dKaj2esm>(zk
zI?M)$#rlbPD-I3^SRpYl{@mjol*S<WnyAzoB{#A&)mNwL)P#;FL>=+3H64&ODbv-c
zIEEmEj~*%}oH1S4RTVk===y+YIJ?5&jt(1_@FPG*tBofBG{cgmlRp$!(DY{)+KO^;
zGQNNRK0#;7XLy<zNdD-w>PGu>q*8WmvI@fvByh(F%-jkpk-o<&CbLAb?73>`y6?>o
z&%09=uHz4Qsw&HALk^Yruq<XEkW#OQ!fG!iZA{YncpxoPb~$dj+bjHeTVPkvcDw3_
zB2sL#^9do;*<jNq(cc3g^Y-xPCFV=`Qv+7mC#koH+Ba<ej`0<;i~F7XEvU%NW<PE`
zhXw51bJZcF|9ME^<}A|PC%X?%u1?`jv|n)(ecW@+sSp3Eew4)&LKzxQ7Bzja{8GW_
zH19PSsaESn_7G=Tol74>XC$nI_rKSVM}&{a&NE)L1tq6ADUSe)nN@i0@}(1E)9{%~
z%PS)F-RMN`2~b?7v81~otyu$&Bu(;Wi`fi=dSlH!UmmcMV}dZK`a4nhwI~(fJhunI
zWY39>R3Y6@sigew0U)u^RmbYRVAk*XH6h1ufs6zsKm+k#-?Qjj7>f7jAJqXvGu?tW
zwTZ~Do6NGdsgG95$md{TCc_`2p{wk!KX+WPmv7dZ+CNiu7)Q+uY}cSRjZ&IxK2mZr
zNU9N~*LgfksGztXFuVl>m427^5@F<09Q7M^wF3gKG`F|6XQf`dg<_($HXCAd6{5Pl
z>l^l4DnEyoS+vqh%93v1HTB9vAJf0V0)(%6KK?N)S|5DI*ic9M-|DRIOQn-(Bb264
z=mbANxs9iMRwl}|<75D{xl|3-yCfR;r;E?y5O6Hi=)}+M&IWEdVTccOU)eb4L4LvN
zHN%@F^xC7~LI`5{In{>@`Qn)&y+!pEbFwJ@jIKxnu3LU5_Jfx0FV9^WyuapcSGl0Y
z3%#P`Pg1C^H>oSb>%ur~EO|8qQu*UBd@xCWOsCX{f?hj>XiF+U+qj$tzeCg(B13s}
zzJU|+#1Lw@AQRY>PSUblq7~!NQ7w!Ggm5+8KJA!aVe2@z&u@r=mIAr8iFR|6Eu#Ta
zaRSBuBhxccf9K4I*z3OBxp}$5PBto#$W;QMgXke+$tAJnn{QYbs}_H?SG3dar(4Vy
z$^{5z(=0L?DhcMcw$)LG<hccGhBYP=l()cY8i-d>l)|en$Qzu>kv6Q@i`F4_Hyv*Q
zDkYMDXm33!`-8OU+TKl1!OGN>VhI^D*Y}#kg2XYyQP0}}-Tva*Wg0MedQWXyM;dVL
zz{a%rMk}*<o(Fd`&5LDw{i;v<W&G!^D}^1K@tT(9^Qa69yLcdwKGY-kajaQMz1XuD
zKem>&k8)O4mdG=+LPLtu)Rn8As`t|M%k05sud_C|R`17=QJIB7QqPjh%;iG5EWxZQ
zI*HG3@*l6!++|XJ76)ekijx1J%VQY2nA1z4wr<vgai(+rfdwNF)N#XwFXf^AY5?=$
zP<`u!rut_1mc?OF@+V_-Exx5m;7O}vfbm`;mz?Fd1AP4LUka1D-yGdbKB4`Z)%e<<
zZKtAED5yp{wyax^^c&Tsy@ZS>g-=c=ndg(`Y5nHQ_aPt1wgpq3(BfGkw6Rhs0ozl~
zOeBnT+7*;)q?<ZlLBnw)qw#$5J53?xZFaL?Olq%}0tOR53N0tM$hUlS4`BXpTp{ss
zMwPJN9F!Z|C2n6qe1bT?_%WFEZq?S9njS>(gWOFa1dE=||JRhNE`9BiV={Z$9YDei
zrV(JO6f_!1)1DSh-7Fa(rPpbyqZ<m(GYp||e&(G6I;1r*gq>-;ZvDWPwPK$+fJh(-
z)c1-7b(T;vy`COf^m~wcZ2=Rh?r*wK*rXMjRH@PTk0S?U1Vx{92?5o2TE*8ptJgqI
zL`(fnA(<uabzx`F-Kt{DRzAC!rDaKW<V3rN1N*J?yn}Kthsr}q2~i_&IZ`IjH&k%w
z6KIZDs5YYl7%$NPWkzk<3ZQ2^OLi9w4~<g{s1&Kab7kCXF1bDeG?X_-Glb9DU{F3X
zMC{myRlftuVKKnhn%6P_zpYv|j1R{C=_erA+1oI)zqtAdcDAwxeaMwE>pPg|z(<bg
z!pPwM>IdeI2rk$2xJo!f%NK+emP9!X$E>`IxT8s>k$)T&ZotO=HGC2}HkomIOsAZ}
za})Swd@_c*w;k3<(Ri-h+D73ZA%@jmnX%Y$8{>@Y^`+82ZYecq`sM)0R9R70en;;P
z+Uydc7x$Aq9^tmyE2+jMK8<MtHiO2A+lDNPJtuALyRM4AaI8;~WX#ivf=aZ8O9hrD
zU4^g^Z;ukh6)fj%h0vO7`BROn4<RM=O8n@)iNGZ3Xorc(Y6w<wAF<&#*^kPjnPN>4
zwWNKUGN@-Ja6(X9d2&bk4ny{1hIx$_-ClUExy1^QjnEs$9o0|dgq6HdZ!VMp?`2Oj
zh&SDeEEE7du@DVa0*{Na&EUx`R$Sy2wF~Yf*M0{|F}SfB?cz~;6DIB9B0#_Y=d?w+
zqp3};(GcFiPx{5^;rz7zlJYI(Le^@b2}4Xdu3hreFXEfVi@+|FB6Bja(7e;ew{S&2
z-wpDRqiRAc0tcE@;v{l8n1&rM%+>^qO9*sn><v;_{1^n}i^*=ad*wJ0CW(EPV(#KS
zdgrQZQfi`C96lfg{DE!)u;>qKMP!7aHYW)ZBlAz4s5=p+{GoXoaqQE!FVS|V-^H=Y
zDkX|N!&`Dx6ljOf!(ExY)p498*N1}?^&sr&8lHhDfeO?gwv_56O0HFQ>v1foQjy5I
zcVUZV1_)Ok8PE95hYR>Y&YKf8HhOsL4gn3Vt*l*xpNwWT$mE1F<uL(F_;|F?TGMnY
z<kW%x1X==kBrk)*dWHnE<rB{R94|{OzJ~P%PtQPeY0^J%@H?{)um9Otr^}g2Xtx5v
zb0qrb67a>a#ygSCqAWE0Rx%_!02XPWL7u?M9ih2SF-bIQ*O2@Z&A&<rfGp=KYrnp)
z>G`R}p#SPaqUAPAXrL@p#fNZb`z2%Zj^7s#n)-8ef6zU-JSWt9qc<v^AJ~V<`<=8+
zM-3SN!h$^^f$NmD7FLIMzbeYfBX{`gp8SUkE(c}G57-PTD4>8F$FWMfP|q3v4NM|r
z*4-)9!sUH4YaWY8yXN1SkAofgTbah@`>+1{7XN)68dQl3!=YfP+k}&+fDykZoHPbr
z1bNPww3*aq%IDFIr5atWtkEOfK%c3S7@d`L>|Wn@ZN>x->;L28@(_G1FkGKzAzMRk
zpL4(yIymf6-A#X&3Awx~^i{!2<VWtJ62dRHu_UW(tEt9>Zn(%vuS@&EWEjOZ=5ozw
z)^|nYwmHium85@PLWzQ@XsJ7_JY|mSz5WUImb4vocz>>cs!{Ae6ZrGpD!UeAJ69!V
z6Pk?Y*1JEG_Kf6x?B7N1#Is&G+#r&II$NRmU$>yP<XC#hAToIgp#g@J!2pLZ{MX`W
zp^<s*Xpb8_A!7-&3|<CH(sH{ktk{BN=ffQ~yt-k^#Qzu!NihPO)c$hi`QMj95cS4l
zMR4F8Z($B9N#T5}YK+jtaqx*AC1i%YU!99RBhJ|+HI_AHDA{=?i2koZNhm<eX|LF^
zCwVeorE|U=NZF(^;g~i5cg_Stz#@&L=5h^aOrlilqfvh$HHnXi>aaVUkrM?8$A^3X
z8&c2I6{$by!c9y3eFJ^ypbvItr*%5V-E|?D;(xyUYfr!-%F;0BT7KR7ct1>4B%u+c
zeq^XJNT@NHtNHzjMp*KF7N{&mY-8{r)rW!kHdYNf{5y=mnYIah4Go>GrH7dqUv~aM
zr$2NMs#0pkP0qBNYkcCO1ZXgBRt`Nhy%vrG2>rw6`@q{ME+66`{VmVWF2I5TL_BGh
zO2Y+xDer#p>{%Au-&ciwH;T$yE5>&sh=Ddz5EW6PiH`((tY+_Ve2oDMmn`@{uOKfD
zeP~p&NX-)dX_U*>;B=Ba(wG)Ii_-9lMgvIDM5Kam@sb3J<5+?4X}VKW>G_X6O3*=r
z!z=~;G=gzpv@EaP$hL~y+Uc07=<bwD*@LF!YXbfl5s5ZJVop7sHe}c{M!*oZ%zt7x
zE>)lF&pH=4dp4EZ67`0fy6Jwf{mUNDf3h4npS!erc3@kt%2ynYz=?L0hmdZ4bMblh
zwcXIaKl!`J$f<k+%7$fS70ewP&{mtHELC~RA<5?b;5TiY=#OeQ3@52;GB@dM``*4W
z=kLmAcVQ=@g9XCkSOFjW1Hno``|F3AcL=MSzljEA?+NEmAb_Os@=Bz|mnFakPbu_p
zz_pnHJ!{p8IheOH^{hN$Hvhc+f701MtKkMs0&5dE?X3>x@zss=jnEDK3HK_bQ*}Dx
z%g1Zb4bcfuaEwlPzHi@xa!m^CUiu58qbH*3f?U9}=7FFu`RoSZTgcWMP^FR_X!i@=
zRi5`;jylo)3B&)~$_JA~er0-&+B;NR0t32<WBj)el_W>%t0s)%T~6asuQe~2C!YI=
z)ylQTjtIZfe)?WX<%3wwhRgzJo`f>k6uv@Jc2U4~QDLC#>+2aVnNda2fR{v&)my7>
zPN(;@5H8glB|r=T4>1YxDak9akhLcdZT(#$|Hq>Vd_t25@TqI{xv@FHZqt3$iq1DL
zcv~8EeSFgDE2oqzWu*<vkJ0iOGmM8T_L}e;{%X2kI+CuX3p&Bf<Wv<QjV@?W)<<=7
z&g`VN-RWCIpq<7~I-H2$_c5^JQElp=)JyD}wq!tM*K2py>k15u7wtW~Sa3Y2>Ee>`
z5=(o=F3qdZ9vK$KFReZ!LW6ALe;+Xm9;$k{hQ3NMOX}biLf5w!$6<+jS&=ws&&V)n
zXFKksA!}~G`k;~qdD1mUn|`3TF!3nn9OvHX279%)6KKFHALV|mf2A`}pdd!?yZmj&
zUU~AuuQb0<hP&!bwVo?+3sn=%Xj%Y2oG0%6=aX_r*>q(Z&{rAo*?7{6s}zK<iq^}j
zCJ)ql+Y?~|I+hXZ-7}ZBOiOn|kJ0}L2LJgWAdHb`UK$@@nV|h7`8_XgkbpY_YwZq@
zrzE{E#oF9ZCP(yb^-+B(19xM#T4>wa-M4!}ww8`Oa6_|>(m4{C4p;h`p5MKyB<E1R
z-jp!&@)70{%XwkfhUgyuWIo*eMvW7AHqmHXi8h}XMu#ik4KAOnJ(tpLX=6BY``|AT
zB}W8pkaoZYoon+tMNIwgP_n)7ac<H4AmF4%fzTEDJdhb#KX6cis2uJyn;0a&A1ZSK
z*&{iH$j>o2M50Cfv=ZEzjPzm)e$6MCX1CV)merA2xhL-|hxHzno4LOXc*b(ocG{U=
zL!HV|AMjdm@8xgQxN=8Knd0BdV<BX*LLG6&46CMl$iYGD*gR24@#uyoRvxviRVEaQ
zG86s_4E{5C&;Abv9e5newk@SR4SKgdt`{q`<d=o7$75*M2-#E<&<Y1FdJ(kerRl$|
zABtu8=B}oeglP=i!|9xNjdOaar0zu+rpAgwn+U&v3=O_H<Wx+3t2@v1MO^Yn3;fPt
zvGee9!~NJ4|A(&8K76oR!-*4v>a;4^c2P3d!Q=I$Ykc@WVe;8`le|Q*)(yjK$~#by
zhCf;Ujv)3r2n{M1>KG>|JP1cwi~w%s=bQ1n!2W_w$pY?bCez)~zAK4|X2Y**vfogf
zDW3|2W?OA!i`^%EDOHAdD?e>Y7wss#LGboI;`Dn#OFRb5O%^&-?QRS0PyR5OP8>3R
zw#^43sDQw~(enTNmDh!xWTA*kr2!$`(5@1fhzeJGHHZ&zoHUw#<^u=6r&`Jcd5UDR
zd*Hf*ZYI`N{7XZ)U-_f7d+GQ`#(mrl;K$Q>D^_l;G~DG&Y8WX%0n23eq>-W^2e=RU
z5!m(m?ZNMyX2U+!gL7{i3_e*t+xhEV>HqbK5(v<zRfDn!0tX`V=zwFcN#J|OeFUHq
z3FM{kJv=a8jPBPOY|^0MM;uRE`hH5yC?exBjRJSnG3zC`oEJKs9pCM$In6_axC^Q4
z@VBe^hVW^(FVv^Fo&)7q0dKeAt{0@Dkpj@@zxXlL-Xwo)LIHD?ci5IiLk6Gj?p|Io
z3;%z5>N_d`8fiZj=QZ@`u3vn(!Cs>szPD#c_W8ZAX5~NPmyVLYcl5^ogx8rW1WSAV
zQ0?aJiG#z~d(bEOKAjsi8qg&4`*9ofN(oXPN*@jMHc!@b`w=k5vNPE<M0Bb`luitK
zR~W#g-DX$QUY19VR%;Htl)n>T!`njx;G`9UeFK=O8IM^QOyc>7nXmrESrR13&(_1%
zv~ysq1nX=i;vWRw3lbQY)Oq!<765>>b8zg{iw0KH{k;%98SeR4`2!;Ci=(U=?(Lnm
zd!(+x^+bGAO9J-txT#u_;tn+js&OfWCKW8p7^3fnCTN1)$Bb+g$Q3v-=y|uueBQ(F
zq_L_!eut^~T=o?UUE%}u$l@pnZvH;`i<hQ@(=sxZrA*CLW%ECF6&VV952*zX6hF~A
z&sp-cC4uCny;ZGfD+=;3TQP5Pno1rDzP30wblQnudF;TiX{^LkebTHAFf6o?B7_nr
zYyBO#baW_{ZtQTfuN-C@e&mpM2qtrMNhZ#S;l}7!UP!>JaN5(VcKG&MG<hWdP0n9e
zM9$}9AV2)Rd-FtljjCe>i65&4ez^vH40$KQr#24CwEEQSMtrr<D_<~&+)>>aB773j
zv9H5;>If$<D2o}#CY0v)`FF?%#5mTm=&yX}c`u;`=UdUcC!d4HB&(0}5hupn+xJE-
z5z-sX|9TjK1B9YfSHWu-T{<hu#HUG@gQawF8W8kLIIZ*tmy1*RuGlKB&D_RjnT5OS
z68G%6f>Y4NF8gh_Lrc8)9j%$N%z(;FeE(k12VT5e3b?DfnIW9<OqUfz$6D^|c?$?f
z(}clNX!_L-=WhMwj$6lj<AQxDnK^FeqLMd_(p9$y_fcP(=qkzf*O(E=@u8OS`y9I!
z8k_rmri>N-fO*(hdwTQ43Sf!`l&~CP#MXDeXUMc*TQT^iQW-AUiFI@hEDLjoEya0F
zc@<O5f{>C_fkWnu50NLT%~Z`yD&`3owd&+^WK;3fbF-Zb!+72*C0v#9+-A>jgT{V}
zT0Ur@5c4foq)1jjmh)G8J)XH>KW#5f7iS01mEZB<m;d%IEsSEfUB^A@U1IQCadL2e
zTHWS<On(Y+<=d}Db}Dap!Ykl6_geC@+S&G*VdOhTT)R#poEb59Xo|Yl9@3SUw@wNG
zO45=Jiw`xqD;zLT`V$x!UIZ5ggb@sC`k0PhSM=H!F6VsPM<$+E%Drn^nuod*N4dlo
zkOy(Ehtfuf6XA8jCrBft83b$CYRrG5%5CnF+K~Qe+1KttMDI2kSJ{8M=RScEz3bpr
zwp+g9FkbUx@U;9lZrHsQcIcCCh$r@;smJNDeH_N_?BS%7cFW*u4wao8=UoA0pjvgL
zx59%xDzJrZhTgta*E*S@RCn$Cy7fBkR&v5qIIFY1m9CM`f;*jD@9AMnw4RV#=Ru%x
zZ!aVu;fgptW90Sg5YMv*#d?}?TleL2mr~+ydui!B#OcSZ%U)9}UhR4(a!U;k5ba%z
zkf+Rdk<$IRjr_W#wxu4CTWj_gUN79eoU4uVQLw{ob8zw2{cB@c%Z)KDEvcBpgJf~R
zKo&304t2nBj%j-j%|Ce&C_e>ui3h?@4(tI~^J>6T+do~;K#M-${xD+`3fGzqNzTqr
z`{aJOh*mA0>QDamo!h52kfTPm^S!ESUxvq5*TsOfOLdH=gBa6p)C;(-%^}pVMi2Gr
zYwfZ;fvX~~Mw6J5TmnqFnp8slBPh<B?p~G0q76B8n_er4uGFb6(~!04^q#lcjfg#S
z{Ye`gBOi{P%FV$~fhSq7PvdjnOgK4eS5&2#IqYROwrVh){<01FXbsx>4lqCU>UcJ9
zM7IhLZ5+;q&|4358?!_cAPHUyS`Dzx%E}PLl9lrNT7Iq%YFa<$acJq~V;LDDO4-8A
zTvsbmDE9Nx47V@81)chMm@S(hoEg336~DylG#p~B?4P_$zY^6pEQ)Q8shs71gfPf%
zlTH+D7;evuL~6dysu+n(Ev=9*u5>_`2gF@y2RiK0C8VIQ`d^yNxj1sI!K=k1A<Pt9
zm_^d4@_pln_H>I~A`WwJh@GoHITlgRX)qtF(kJ=W1QjtyUeSu~;}O~Nx&`B&<D}f0
zL+BR_nlDk;sSKJ44><tuUqUsd{QfcmmRj>-1~zGeOyBv60znMjyR8heGH~|6@{U61
zq{GX<6fv=VA1h-2QU{tXN`}Pe8wrzPe{2QRCpFMH$7dm}w;xEA5f0KOYY#jtA+xsg
zpL!ujOK3wgm~i)G!=8JqTuliK6ZbwHekBnl{4HLAQl95By;4sn{F(s)4F?mSTNBH&
zO(Iy|c%IhkI#{}#t@84l-<O@XTpj~QVa;x=W|TgAic2a;qn5`LM`Jwi(tRT~@j>KL
zIV7XwP}lI~LwAWzjE<V_+#|!2o1yIfx`(DzHSAhkz<Qb(y;OM9O^>NptJmX<S%k^r
zWh~6$a5H7W6z}U=k4;VfeQaLYyK^d_@8QM!bQM12npWL<8^LhXo<VIWDQh?&OR6wY
z^DQBaQU`nw%%<`Eq1Vyztl60qKIx`;qJzl{*(>ep;_g1-qEmm7`tZ0*ii(G;atz2`
ziDs3<x9v`%jNAnW73_JK(JPT3duQi72w8}L>)f@?j>$|vO{PRoe%wzO);>93OAjbL
z6NTNL=tdS3-)_@IB&O|_t~yv27t{W9M0(>Nhl%HV5q46pFlfxq@PNJc5%D85Jc{Ix
z8rYMLBc7K<HPGq+>?1LSF2A=~wV1q_K?}yE8}!N6gMpHk%$72MUZA=<SpUEhfKRl7
z+DXT$;|sOaiee>j<d?H{yGTdv$)^Y8(cB(OIU+}J3xA4o4)44@rgbmsc(#RmxHN}b
z;!4@u)94niOEu@|v^e##uyOnsI6xeSo&}G5)dcZzd+uF7bS-&LOR~+_+b^*33N85y
zuM;YtMo>E=F^*k9#Q}SKjZQZ<n!2ue7QN{ajJYS}vg^8Hg>Sk`1{I_TxaoN=4A{%#
zVcy7vh`O)bWy{!dT{CFnc3xoPFDxfHWj(DDSvmBBfl8RM9GwB?;qu-&yHzgam8x~i
z)?vtn`%RERQ+U*Yne9>sglOPn97}nYq<X%H_q7;}_R#NRsh_l<D<~Td=W5i?g+Whk
zBYvs_VRcRLf|frU1&vo*R~?=Rl{KbnC)x>9mA$Cl4g0|v$|;@s8k|&{(*C$=)zO1O
z0PIjZg&!blf0s2LlMaUt6n3!rP842=ouy6--fpks%hDx#Bxfy2%^4lztZb;ssmL-^
z84E92xYaGvbA#|Wi@Q&ZG(39RzOtT=4nO)V>0UZotXM<R@*mQvsXxXp7){?XDq_9n
z24*0#2G-Xn8e5BK#Nb8UNI)6r5`Spn4MFopq1Sy$2#w~6dzB~Lk@kaFOpmr_6|ouL
zoO<?@9K%uuq-+);skU1Z`>6lPQCPNiA?<NdcBbqSSM^}=@v5ojI~NT|;{so6?i1SD
z*RS}!6=n-M5I0C>>8~Yo#OdtCyXEs`D%_#)lLeFT3}N+NcjU;OmDbLEi^cKshh@s*
z`)){0g5ogwl7qb-@uh>A_kKIg-?}Gyp!bBg(|T2jeN8CCKt?>yNc$7g<6EHa;PvhJ
zO}gw6oBfsgjoa#zmR~rB;PQr9WNSl_9EqO}hJ{>KBNBqgDy`z8$X3KvoxQ8#?IM8)
z!-$_axx5*$mC$9&Zq|wN1UR8CUzq?%y99XAN5PuqiOrX1GJYYxP){z24)+yH?x8v_
z?;oQjub4Z#NXzWL9!o8P)eCtJT~P*ZP95f2^x+H|l8EU87&WmQYnyu5hnx=Dq=zzH
zszc*tik8dRpK+zU6ZFC$3^7j5g)^RJ!R)wF`S|Y}VnA^x=h&yYP-kYPCVrvuZpdip
z4r$(u*lfpQKF1gO1r~lc5nQ)`*Z^5bh}W#TEHx@a;k)&f){~i<vRP{L<?BV;!T^Js
zBPDa%Y#K9FLqpmD-Sl0b4)@xDgxwp6_C)!%bbWcgL1TDadBj#j_c>RdLybiS;fU+|
zWZ391Ak#Hy{F=YCEK-~D`R>m4Qs@_l*p1yy)Jbi}%BxlaonjNL_TvD#S2NII=6KVi
zWWuMJsOvM%U<g)u^ypY0ysob<($fofs+)1@xbGr$ydN2o>A&5;otXMeqIkY+Iy9Yl
zLl5C<q6vINeo}OfE1ifN4)h{;R6QwA8&W?qL_VA1Y*~y;TlWIWtVyVC^WAFG3rGc}
zlAK!<Ku?Aa`W+&;y%SW*zhtOqqDFROxLkU-qmXHjAf~5!6w!<K^^v~vcoST55U=Kz
zJ4u2gw;?a(pcC99wVj*jd}ve8^VObfY1`Dpw{$0lD&0S{P-D}R2^;*U07*gtuN6<_
zW7s)FA?}=<l21r8d*>o17IG!pfT<qBw|%fwc~-r3wM@?Me&t!28L;%|;Xug2Lj-Bp
zU?^NWO;Wq6ym+U1B)u#67>^8Y|0yinDqM_YiW@bNA5lVwhT(J)5>Z*(eCiC*iP2S4
zRrGP7-dCEbeQIf3YL}t4zwfqHvt@Pe7(z6Cz}GF-|6zbgyk907^3-NoS~`c;ZHN#j
zf_}->ljK3M1Y-GHLJUTic(p10(N){Y!SYw3=#cfz=M$8U6_nCXDJ~}kUGmwoKO6dt
zXRQeEu0M-tP!V`xd~(Cd7)Ed4`YqfS-ku(L*EovnF*gX`O%}&lZ6YoayHi135cye(
zvYCNK6V8FEW5g$-+Rl}F`tEP)`J(bFuuA|*6sU<!`g-$bGGk4%I$nm9?3W{*vcmmt
zA-r9(0)3hFhMnB&hR5Vhvl+)>W=Z;v3$M3`R*<AO`nuYrfVOtrJheV;=A)4EmcbQ1
zttf4}J8Z>ORa(nd{i}mFg?~B%iRkcn?burD<3U~(pl+$5q$?h`^q%BP-s3>4^!n}h
zVc5ry!)7<`f>skX6s`m)wfb}GD*oKHN_V__X7dd<iRB(OT|bZ3wO3p@9xszhdLQ<z
z(jgj-d1Go~+WjiiSJS!33rp)=c63MCE*H68hpDA5nolU{4Ch>uC&aTGL%WlZ$ccKj
zEMSH7_<7LXnGby5Y|N&Qd54e%4aC-b>$i%zD1A982vn_a;jaDNQN9&q;X;){QV)D*
z46C;~OTFdTt~J>tJY-Tdh9`S==`s}8xH}5tKGy-&w2pzo%jhGI31S@(Qn9a7O5C@L
z2EbkJeTv+a{P7xEj{l*`4Ir;kUZ><kqsv7JprflryE%SA|G=G{T;w)br`6(*+Z-6g
zjS&ov%nrST_OE*&&8{@NND0_8?+y#t3k~gA-)E=GG`>_se0MH#B&h#ojUQN1m?V1N
zPicq(?zqoaTO6d4i8+o+ldCk32E_!OWGA8lpaZ!oam_TTBmy0Vft<L*>dqUT`)dso
z2p0F3tw%POxR9D1qyR)bEg|_ip~CS&_KVTNa5rgcs_YaEGM3nFGN3b1*@b`|-RdHf
z`9|1s{4}+$&RM(HxjX_9_BF7Cfij7N8ni6TGQz?v4PFh=wq@WktQX~d>#DMPX8-r<
zlh!~vP%U9<gwkMFaNw|#D~cyY6K@(S?cACzP{qy8#8|7x0ib-|TOXho-HbPHXCz+?
zTZRg_>S6?-Y}GKhjK>Fh2vZ%@@%U7X-{yPysVxQ^vv0Aou-|W@oRe6Nr>FVL7~V>~
z@0*ZYw<LGfd1Hm<_kIQN<vfDQA4Fn9Zr#)I;FgiNtXk($$tZ8#c4+2e0H%<vOoO>P
zdc5)9g3K^lOs|XkZ4{&g>0=LwWO|icvIaT=AfM5(iqLxVr~>1&$ezN7*Qd-w(NjmW
zq;j%~PwkAVv6}Nmdi!E#aS>_^DA*GLs-$%4vv#^+pBPADCei@kTlEA_j_GYslVSs%
z-|zDja32N_*ST*Ca_U@xkxOWT%}bG7HtPja87Q|AFo_mATY?M#vf<I{+^^;t)DHGM
z4@B2-nL!VgJ_3DRxt2SM8`XOVRi5zX>9duHIx2w;5eM5bdOx(X@6ZcXSgxe82MNTr
z=s|36-*6jfQn7!lEHbqZV*SNZTZ~^&u^_7p8e*trIU#;_Hss7uJWo1{XQf~cjy?CE
z3>n&68Y%W`Eml}^yamSB(}R>$_gP{pl!ij+<X`U)!!9cs4yUVn!4R(d3#Www)9EcL
zDhK2Fhe6_`Yu;<PA=Q`)<-Xz(yTf7P+RK4n&xP!f$(b44W$A-!MN(a!Gp}AEo8@Yo
z%A_FiC({nTvxe?W-k|sAhfwRgF0RMz3={0I1Lap7$h+TW5(vHpqFVMy4T`I^oZLDL
zOgGtIMOS#lW0cdHV>~~5SE@h(HOO4g19j}dxWt<u?@`ma;Ws6~ow3#Bui<M)(3^g&
zM_3m55CN+YFQ9#uV$&GP&FpN|Jn|*wF_)+o^y~ZF{$tHaL)ue9MO$>8zU8U*n#t0X
zVI572Z=e!W4b7|Ot~A!^&-uez$fnj|Q+YkMS48acgF*dlwHBjJE4$gC+w|vpX>_sx
z36ArZ8Mmi9{aQ_|xMb@eSXj3!UQrWycIhzF!pwq=^`}0C4Fq>a#nqt+UQY}wkI2VW
zH!HLe1o=a}MuSVqNtZ;<dK3D!L>@c)Q!>7fLNipg7iNa>v9r1_RZ2B38m(2gX+fT5
zcOhX<&5ze_x*<*nWk3liURiLgs=J^p{^@62X}n-}W9xPCeW!^>1-B!A;?q)Ym`0lW
zsqA|T$LWf_MC_0p#<2>g2Fv+X<GnnZm5Q|?6}Z7RWDq^y586bf-Mz11?d;WlIXrHv
zROjjeHp-2+IhhX%`W|JcC|)urr5r~~%M3QU6OfCRl_Q?crpn7I<tJ|mH_uO-<1PWP
zUqyaE9fjdkr+ih25~yc<B0tu{uF}0u86(bFT6I8)_39h}>U?|`idf7I2IP?;Vsb&o
zH%bI%T+fh<5W#u`%mr;=gPsTDGfx8&8`hbSLb|58|3%O5(;my6MlhpqM3~g|(}uK+
z`?{C-E}U?3W$7S~r7*EV+sIx*Xk}1X7$g2n^g88pSg2oRu_+tU!2;={jFrcvZ9kmW
z@~}^w;yAETqW_f-P%7F(n#(7d0OrnATJJim#qc^=yKb{8Zfe=73{}f$Tkg8ddVw;%
zR(%0|$%$%CJ#Tz{bZbhTGu8F28WvmMat6&jam0SWweLh>UPZgC2WyM!IYB8E3WsA@
zLSGNe`jGHr^cV3FjpPfdw;yI->U9IyP*Up0dR6o9y-&1k*DTIUx?5BkIjtcl8aKIX
z^f4>)Tz<bPS1-=eE*NwAJS1oDlt{gKLGfQlz!Sj!q2dWZ=s0iWGtfQraV3@C2WtlG
z-$`^Mz1piio+ozxk(M-UYAVsjBo*#t$j->_<+GkWiP=L`*;N>xqmG9cwrBPx{zIjV
zVLuy3cBx4G+pz4WcFH+*`YduaTX*E-&V#=-QAr_^^Y0AZlH-%6$tKPoMwuJ3!=)ee
zUmF<j?5ivw2(XK`T6{eoCbDS`<%lhDR42==zi~yu(yfJBw{%Ov!R7plMHt<7#h7(7
zL7~vJMZt^`xA#@vQa-%*f$z7LbJqpC?g9%=v_KR5<V-VK>6N0TDVl`XrZg5qJF2tt
zFRd5H{xgOHMnHP?1f9lk2_KvOR%@Q02BdJzRVj`v@`AODiF0g{9^|acKFY3FO)jqp
z-Cb%`*XLnb`__^<M=$c}J-t;@Pw0&23RV8hYAJP~L4QQ9x&FkaA1?<{^=Qdzosyuw
zc9Z;~N{`xnLjQ=SE5Ux!Mnj=A>+E~5T3PYmVF3R2$?0udXyfCrd!m)nX9weou*l)l
ztAn)lsSP^Zj1Ab)RC8mx5Yu^2T&dOWoT%NMT3Xr)vaMAkc0JL~WKBG-du-ig{0H<>
zp8bPDm_WsQkiFic`SVM@UV+s~pjX^atUs}8-<YA*evJXNjQtrI8mCWSne3iI1FT+Z
z3PA4y2|8=-*%<?(L>kWrI;RJWWykjYZo^QzInzN|x!}G@v+8gkln(uZytk(3hjjt2
z-zTQW4U-6m!_Cz8*fM*BUkc?T0eAoB5<r-o=W2X)6B>HZdrJ9J6Y;w<OV(IfqaMAT
zV>S9@G$o{I<$JEa6=$MTpoC=9u;wmhWMD^Yp2`bWV0FSz_BMYG+$uLtdhR<)OKC-%
zo-`y#Lq@B|HXXM*?B*&^BiRsuNEU-QWcdLJXnajDEvnNZbol};Llo^jkpz|vTch$9
zcyhQ5Gf<tnqk|`3Oi(5j3%sd|N5jqXh823gb;$cBI#SA%ZQ$K8gZiuqtIkgU1gXER
z+DRB6Z4B9aYD`xC+QR;NdsC%gT<+F@f?s2{foanj{AT@qBg&SAxP@ue=r<96hCSsu
zqa<~x=f<<6b$<a2`b2e=!+B+E=hF+h1it6~aaH`M^!<^v9x%}8yhU{zhArD3l(Z=c
zUI%hgQlMc*KfE059bFI@uzjxgXlbl-@*1OSznIXJrvpDjADP}Nz+Z(2{vkByW%uC9
z%kHr4bEWPc&%bs4XE2@OtpQrVnrCO!_CQc;_&s0Ga;Hs$%R5quVuoqTNpIC?UGIn|
zjQ*X%$9V<6+?>`ho6iTx(QaQQtZF$kXvW{Ackj7MY)U(Pb54WZ6KGquqeO+5-&KbT
zr~3r#C9v*3sK%~Gn(_8s_p401ZB=CImrIa_9gJSz_Jy<_9$EsYKEt3)M;oc+rFuh!
zzC}?VRx6A%(K^qs+_-X~&A5Oyz)FI8I#=O){6mMreCK_wn3^aF^u!{)VI5_5CWsjs
zs5E3cZH?4Hx~jP<rnf(p88rry^R?nuOndes_Q+%XpR;!o1bFfWTO&lA=k7lZ=mwX>
z=OcPs(L>C}`?R0H4+=>1MvRN5T`4(cHXZAY1eHe2bu(?qh_t<)+IiiMy^c_FiSvYb
z<S0;myfGO+T$wj?PCSa^I9Vr{>f0aVIuwA*uQCu!^vx+k2~E>Z>WDhQ?da5VmDD&)
zAy{!aC;b05IsjDQGJ>0cRE-O9O(84kDL(Af@Ym4U#4HW)6-)hckMxwEQm%KRTZEtl
zX{4Qgt3ZUHB-m!s));)iBMOZy*0x)@$0RbQcc|Y@9ytjM^ue@{?kOFpW}VR`Y@o^J
zQSieVuBsYLBruTHwm+CI-xq$VoFR@|*OmhxRj0OCV%Rf{z}A$|d=QdKGBI}1Zjs)&
zVlhI?U~n^HbiitXO`<?#QFiS2{wn^Tg^8Zz{iQ*+8h>ZsHHWZRap%xgOx;B|bfW(a
z&NHjV2a_@q9>&JBIrA#QW25WV_16Q5Yz1}ABm8WL>$jO-n+Y8zOtR1pBtW!K3A{?#
z{S3zp7zepLA~cs>VZ)pzjO|JfB$~7Or}KGb)4r>De&TSf2!*^tV*38dLp{G*847Lu
zm$V@DJsIU`l9G%V4xw$fs%G^vRi*fT>B+|m?f>{hWMPsth-qR#HC5FVvs2)By|BY!
zcCK(K3)<S*06OkF9~`vr+s$a1lnT;#XcDlSp(stnKCMi$i`+s-)HcyF<(J~X_yS$v
zhf|OYwJgx^%({7LbRC^00I9L?{HFf;kdzyp4cbX0=k#MaLKV3BaX_!kS!69|2GLIx
zZcL3GmO;60HY3{|Av+&BS1Aal5O2tg+Qt{1j*G-!w~3BM%8=5rbnZV46CWUiIz?kS
zdTE7e%#CsRui18N&PcvTd}iAKWITxrUJ&W2A<0_piB+fFVV?w7zK8Utw5hJ{Rf*3t
zcw962guIBQA_u)IMv}w}!df2P$ozs*e7t^{gux`l{|3-gv<<oKUpIr?b1XS$3wdeE
z>7Fl^EXG*9U%a{B0Ba9ps2VA1sc3!P(J@#2f&m#3uz8~$N@KO!ag~LD^*|z;yiCWk
zl(w(AUE*HC2J`e^8c^F8=qYt0;^96Vge>gmqOU_UMsqEYU<EC1Nwdjvbnwzf;rG>a
z`#*E~<5_J4Wioapo(FV88I&FqCa_%UX0G|MWIL=K-Ai?XZ95(o1|zx{#r*gSbg?ae
z8a^8gKmGk)nDC=TVaSko>mZF-u|6Gz%=@a1hMCYU|EZx2fwCQzI2$!m{r^lqxja8C
zlfOZPw&KYu1Gh#lla_4VJr{!Emp|#BHFTd5XUyuyxu%ckKsXDxrcQcxreX)26Pti;
zA&=we55YyHBw_^gJaK6D)cWe1fR^O3roz4QpduPs*hycb-Wwt+JV;AF{TyUUUTz{&
z3y@wlz{8d4-s~F{9TfQ#T=s3w6M<+^MXkas#MB}U0TX-6mb&tzHfHeO6cku1?@+@W
z94#766VEu8CBsM0e2%As8rA)$oXqGNem7Pd8q(bwiibuDm?LzZFMqd?qNXQ=2PkZB
zR5{u206%N0C4NOJh%gHrSQ8sNN=h$Sik2)2IKw1Zp-=IjpUdE150-eEey`Th>WNOM
z`+8R6V0UiPW7b}qb?Vm5TKy@56TaQazx}LN`p*g{F@aAMJK+`90-5@SaI}!Mq-{^v
zA3lZ!s_wIQyVnjwQ~wFk(cUirHl}sdtVjs(;hrQ?fB->n>(RyrQ*I2H0%g=Z^R4O8
zR?k_Mg_)h|A77RmcjnVV-jP<_O1$lGrZGYI7RxX(2K9#x^UsvYQ$e#=50`v46m${O
zV$m7EEcFuh-F^_$3e%jD5OLmG^+}CSsbHJ9ms3wcLkEbfb<6n7mbBg3e9t70$zyi6
z#L(v;Ns#rahqmtn75OurB{IQ3?2{cd7<aB#zy3RMeV5wD^)ZLS0R9R~i6pWw*U#bL
z-wWMa0Eox@=o4Co>@TItu$mquQZ<xMuFTD-Ri)s6Yn<(qNBxZ-obbTz6)N$8xqf>x
zy+eQeF%yPXOsH>x<no%(vvynf2b>sfX{_SYDz65f+uY#*eJVJm*`eDny~-(Qf2Kx6
zICPnz_H+^8a9CAO;QSwA5<t<L<XWg1EuoXs9+y$HJv<W~c%m2G*ZKd6!cYum@pm&H
zm0fqTB(iKNl}i{PW2>+Syr6qe{^P<|t$4c&FV1ClNf~CWZ9vFql{e1JQh4fYNF##G
z?&(rLR$UNYVdMIdZ(c82gPf@{kEhef4AbE!206|R1?QN_m?^9w#wJ178;DXOd`X<d
z$%`uXJ>SM`lNKnp^<w79WQ3=G!~HII!~T^KtWKK~O8jo9SqC?u;z19s`b@t3_foBd
zqv%!&n}+8)%WnlFX@k|p#&oP-{O4!nsNjfR@<<w3`&NqDDCWhJn*bc6zEgChSU!mM
ze;YP!5Ah0{ww1(yrBI9fm<m-MIZB$m)=iXz*d0!1c*&u49=_GBt;{?!Jk7Pz+&q9*
zBihuZnc*|~Lm$nT1(=yitsVIj&)9rhqk;-S0;nN!Q|eXDX0g4~YoYHAre#${vdc3_
z-HR?-2CwgvtoKt>pz=&g2e<AszBF@{YImzmmI?!rt0kJH(Cg+vle&rNCtf0XZZy33
z(S#U$n;_bHSl#SS6Y*70uOG+dt?erzll9Xime;4Y{b0?~*c5`=!N3j!FD8<wuD13!
z2~rHdZ2D|dxDj6`5h_=2pmb*Ck5HBVyywrD1c$i}|2pk6lZQ`56dko}JowFgW+wcW
zRS^8fB9u7zhpzoVRMbYmueWw-3GUpw9s%+am)AJ9hm$V079P_B4GQUP;itUemj;`^
zzj7339M$k@9mS`zhNVn@b(Xpu&uJ|KyBLF>+Kx(H8*8k9^TsD1M`AGv`Yl+^JU;P2
z6rWyKJ~Nt-Wk?Q|SS)3jL(WRkbse3WTJ16DtQ@URi=$oUjr+Lv@t9eJj?dmyCyNGV
z>Fus$pDf+ZDwo@LyTYa?bw`SeL)%cw6JLv^Gzy$jivTl#*^o%R{1%vjd+16xWGR5{
zJy-U{51gDVH2@B1#xzlFyp7i)WjGcxv5!|x(mLoY{mArvR$*`o_5=CSnw!C=N-0rK
z=P!u_?)#uE%}2Bz0T+gs8DP2aeVfx0fDFZgG=oHE;)s`c!1`&>O`)t5VJtiK_Uv3t
z^s51T0wfhLb~8F1&hKo!>h?@+_I6V~r^?&ch-TapUaAyzXfUdgY4v2X%EeZ9=XY>l
zW_Fv|cby4c)A=siY*t_0<2eQoVt4gmHe}1W-{sM6B2x9pr&=<3R@+$l?grT{_?>qO
z0d2!U&5SlK?ufjHu5L@>4_(`~Zlvk=Oo@X5{2Gfx+=j2X)8F~fAEU6=mfYd?ha2;H
z$k6aX6u0gw$bb>w75;I#gpXpLJPRR>*mcD%WVTlZwi#BqC{&$so?a3m(I)*#T(k2&
z`%CLVuodoi(W!OLqZ0$K;(b_T?DT_R8M&a(`T=b7jKtVTA|?94itP}NDW=+l>yL!D
z!G(Qh>3nnHOwy(RubBLDQ-1>^6-XuK!g)a&Ex@Td#lxK2DHZCALCcmkjP(eK#|Gh|
zD36rIy`bP)?L1brVDJgUUJ+L()%b6Pbc59O7(ypLL|j!|5Y{mANWA%JzmX+9-i#ad
zo$cM*${8KlmokN-t$Q9f*ya@uEbR*r>okH%N};)RCsUeOn#5`Ku{AO<6@yxb(ppa{
zxlSmTZUrqd>!;4e<y4kh7kse&IaA-AVj5-}I*5tR6w9C5sA$pO?RU!{R>gV`qM_06
zdM2Cj&&7=vD7z-#*=QH3297;tX^@<e@F`n1ShH)_;>92jXMKKh{U|kMP%t81v{gPM
zO@?(<;}<Y+6s;LW&Q+cbzp2V-uY2{8nV;h4uFs@Z9S16KCj$63=mX`7vJ1xK^#zNZ
zchNE@cKKBIG2BJdd2Z<pw5DjT{pwxQ{?ZTIz}|iiHFiB*IYi7JLo+L($CokB)fKrT
zK^ob&jH1W<CbxTC8@o?i8PRQJnV{m$MoWh>XFf{Uz5V6ylKRx-n+W95wp*m5D#T%p
zu;9swY!o0Z8Q<2Z%te0AqP758V>QXZaM*RZwu(dqEItN{oFHeWtq_M4nvYVre(BCv
z-h|Uqkw0jRQ43gd5AEp~*bmWu&By%2V#A6C_^+<z(D;<(us(X_?%TuOu9SoM*@%2Y
zQaK}W6iJH>fhoRnhZ>qt1|Vfo#8^RMwqev%`wK32>`<Z7>XC&E8l}kN0V9hns&SRs
z-6{XFLEPZ@2Q-!h;oV9Y6+y2w7L{eS26f=Dh@jX|V%k*qoJD`TUV&*E!Y9k4s9s84
zYL3PFHjOQrVJ;0UJ=_}0$gzs3Qu|i2&}@GHd%MEI^lizuAa9L99G{=Ptg?81>XR~E
zNU$dofd#K}Oz;X=Pd)Uq=T;D1bvEFY!7+1IbVdgJx9|ps885>y4FAy71RgNpsa1xT
zICr}WwR4zpkFg00E&QohULl_kPfw^kz_?f-dsKHP`uxeWg9J!{^2S_1Mm}tuM-ZZq
zEevr69)Z}|x&nuPv_L;b+u*}A^@^k;(--*IBWAzsL5(ljl*eo86L0bIHtgelDeZg0
z7y9w!5Pd?C=Ge_LZfl;p43_+Q(&`<q=Y0ngtniACMeQYp+Ty5lzr0_AX{<NR|C(rv
z7!TbE*Qmtl=b`^YGM3&k?YlK1=KI59HsQy>3RB5PXAz=Mt6$jd<AULNv8>NqI&pf3
z>8WD{S+NFcX&ABbabNcKk0>)|_NKjvWeMPM-R5!8jF?*DQ)qB&KI`{}vJKtp<YVom
z3X2+$f2v=!%ZWvMno09MV&1U5#WK1eqCAp*ZzMEgYsU7*#!ac@V$5o9d)c;TT};?P
zOicgu5-<L9)m^@SqzmcwkMgp=6?Y{Pd9Z_{e`vWBQ-<*^WC6y&a%92{#SvWcPQK{6
zS0qB#xwlWy8Gg;k9m*Y(&3B|!vt}#1Quvsn+zgr`O5hR}Z*@7Z<?~=v4J^7KC&r71
zr>w)bdcBzxt63@KUY(+}ezyPsqj~4u{$*r5Tgnf3Y;*zXaC#_Jqmth#M{gN>vMfYH
z6p^nKm*23<87#WNI(D^rvIoX*24jYg)Aj8ofruVDesS)jV8R)GUB2Y~KyJoAp1o9G
zaDaMSj2pSK;E`4-bYDwIrHyzcHFp-7c(O8W_)C^sPNWV4=B))gvh6anmMzB5>Dj7O
zt93P8@0<5z@9__-3WVOP@Z=qj7%yzrd=)$2?l?laVsLSs+eQ3M&utmVnjVX+*dsB9
zOPJM*9yR621=K}eLdhA)mTZ4d(@~WzWa$p2n5l?t@8vU~*5}_kl(#|{u<cQqAxWog
zeAy!8>K10PZ~KlcKmDOxPYU-L14l~0()kadn^`d(p~OpI2Uq}ZTtWgm=tqjA1e3W^
z2U;1`h(AY>{hgNc>kZ%^s!6@)3L!u*{px&#Kb_U!SLD40!k2gG-+jGrDdR0!e1)^e
z{Q2BS7=piMtLweiqqbvH$_*wAD=fNq3Meqwk2I&@1@k;PY)zkt&!BX9-zCqRrb~Hp
zeI34|dJyZ2Z8C$nr}FShmd3y8`Nf|2a645+^}BVAx7DrO9GSnz+4h6V2JQO<EH3Y_
zIWN*zja)xk__)J&|B{O7?PrzGynngv+1Bd@j~U&&aWU~-y!s7fm+)odQGc3=U8XLy
z%)V-2y2?^E?axN(2*X3+%~0Qr5Coq?3^&Ji3^!mavnL<I-eOayY#h^_(u_Re)vOTE
zB@!BOT{NDSL|&U*7fax^6O6C7Sdx!ha=dHEa=ZF}WPNo|+)dLhkR@3H!6CQ=C-@TF
zNgz1E-C@z-?(PtRF7ED@;O_2j!7aGkU3i}Fd*6F+RVx2LQNNk#={bG6`?Pw>bAk~T
zJ%|i;fh$P~&e~6MOiT<~6eFiq8)Ly53!-r~kuTMkjsrnuB^C{h0`Wp%UUT~39032$
z*Mwdv3Tyh=70j&H3$e&p2d}-mm?esv5a!n6f>DMIu315n`=pS!?x2KcP%zlq&=JM7
z#1(Jwq>(#V-b0EeBtxcr+d1)mX(!e|Q!I2#8jr&LSkI=aWwz3^pmOAQ9JjR;VHsI>
zqPlK!og~CJr=zq(i<!Ucch%pLd<Q_8R2r3hMztzIYyE+ul0h14_L~h4Q(`5oWoGgD
zx+NP5KCA8ZOZl#Q@G%2*;5B{*trd^5KHPs&KpxdXv<a1>>3Ghuv_|6EV&@{Qyx($1
zeqCm@p@SKE<se(N@0>0r<oNE)kbc5Hhdk-u0bycU(Dvk9B40zPG&^*As9wbWW#5g{
z2<I!yMed4EvxuP7A6LnzTscxW^m+PXGodF8e~@ub|D2B!xve?(yo)*@Q|q5{s!j{y
z;zNw<wVg-{8W!TvKz=q(@*i&jw9SNhLS?go3gpl%7p<1!C5<jnhfQ(a&jbno=R6>6
zP=Y*5G}rJ#k4C?`VXZ%*{P!okK>c@9{yvqXwaH_sEbq-Z&u*h4BNCJn>(2QvsT1mt
z>$U3EOvsGNhCXSO(yW=hgB&$p@=H3qt2>#AuM2`yN*S?inQ};h`9dAXz4DLfxYjct
z=Cm~NnF1OKeKB7Vp5eyX7%5)Yac>V)aVTkT9U~kCrRZhK4GWCv;bO;Nm6F53PvA2A
z;z5Gu0^i-(=xU{6nklLL9i(@ZP?AgSBbUX13+QnSF53MzUA0mhLbG7_n%HW;t~5jB
zWN^lCkhMDEf_QjN8+MwLKvj1l0@7W|7(ntS7Std7f)?6=e(Hmg1Rwc34ANWvjJc4f
z7HEG^hu=gx)mX)bz@}Wowe$+BwAA?a&|m33)q!(|q8mCsbBi#y86a(Fzq;$iE%gE{
z5<yr^ij!f?mPHTXenrN`m7{yUD?5^D{c#E_iLmP({2}z)C354dCiCs0BP*8T_!DWn
z!?2rEclVj0+r2b_>!xo{HB}<$)G|<9e#+w%^ZPlgyJ*%Cmjm-0HN(AUR;|~$jeH;Z
zH?lO#YR~?+l(4-64=W%qiGP1JIRH7Y<en=>Z9vc{;9x>(HhS3P$hMsOE(86Z0GTtp
zM1!?ddG3+KfvY2c!fJtHe0D$X2~f^BswvY;HCFxFwXe9RRoFAHF+tHqyp_V!feW0s
z<dRb@*f_N9aWgDkMB54822mS-FQSc11ue3a^)hY`s!?kz#>a1qrI!Mbsy3XIm27$b
zo(v)@7pwc6AkDDua|>kUj9iS%f_5C+*UoNhWb3OTLEJ_DF#w5(8I9+r8|c4Sj9I6z
zT6-GgOKws*Sd`(7*pRbsM2c7aN#EXtZr<Nv_mz&`D9YwGq8RqaAt?7|9yc9)7*LN^
z)?lg2SGiqVYF7)vI+7wbsQnSy6B(;)G!#3e)(!5}bjsw18%PlqW>H$#+zuy*Z**P%
z%DLM{wHYYu()9Y;w69?1tt##_?d$hU9bh0Mkbi)7v=%<vWI?IyAXr!=8%$_{ssOYX
zeWD557mwNpAOt1GyY&oo4d~B-zl3=$dOjgVE*uxPaSsKi*~QJc;fVqv3@3L7#eJAs
zpEg+67hbIxzV2$oZK7G?;6qKw=atI!$aYb;M?gkmYQ$mPIN3ehOdGp&>=wFt{odw^
zP+5HBm<6LOX4r+twzMWj<+{M;#Qc{=VB}3lxj9YCH_<wcK$P0D=n?BlIA3ypu4FYn
zhXE@S|C7HWI6OD?_qYTQxxakvI$hth*$RF{B9P0zf-9FcH?OxVsu%~i5UPD>PrF*P
zR`+ni`=c}`o6iz8+B@9;j|j*r6YP?5$FjOGy#W-lBV&dTa3!x>9kj*g0N+M`(+$Dh
z-x$1cLSZt*RU+rRvHBarOzjpxwj!j?oW)tq*Z6Aa0>U!R;JqHYJVzI~(1W+!AmFgN
zpetw_sX>@ZWT6qSm&745oW#*T&y7;XfR#cU2}PmX+?(DuU$H<^qqT9j5e7|EikNLN
zm>*C*EVE^Q&Bl^bv<m(vZ?ZC(HHWrD?UpsL`n54c3lu-oYXVSVFWPA<nh|dpkK6p^
zHasgt|21pg_p!=jiR?E^1MKe@)a)!q`1Mhu^(7Iss*Z1$87Vx=<nrEdB~@!8I%5-0
zDChdq53NlK(}gk%#v>u}2bM(r4eS~^(CrY%B7XHcsAu!1sFd*EUuR1^l4?4H*V=Q?
zLbBk<jPlYtb4Tw%1R2rh6)6Jx_knqHC!8NUQRs*1i6RGUumoHf-n;gtTd>REkcuw6
zV&w3w)kMAk>sltY=zP}yk&>(HA<HMgZU02p(-v{u7#1xs1rQo>kP!6!9k6yXYP%wv
z!~F*@pa;d1(U`vA<e)6^{m3&PCE$a~q|+42i*`7@$gCvY$=&r(S1+U4+g5086-+a-
z(BqKrI=#cm8(=7j(I^RsqifSEK|?+gFI!z~4ceeZxmZFtVsK7Ha-I%<I4kFww<9P^
zWUW+)iV3}x!uzN{HofVDlBEB8gTP#w&Gl5p+2KH?PhpM25v|zaG1xM8y)ocECrSo8
zpV;xIQ&Dj;)l1$+1OROd4ik`*TttX*-wmAV6x?9K3HvFa3Dg-<Y=#bEAQ_nU^4j(S
zps-;$PV5crz@#9*QGaEhsQG5S7{DulY5ftUouo5F`D(D-!&r4YdLxZD>)Q<9@ST06
zukj`S&+J{AF-dBHt<@~M5Qo=k<zy#l4LJM<C5CO{a5lc*eHI$S2RnB`8M1xdD7ORY
zOnty39=?Fk3hCR)QIwm73WJ2wr7LajNVRz#t90RC1ggk$9~I`j>#>E$s#=GAKMf!|
zuxHQ2R8+#fQ}sR5wEjMVa$_^y;hdQk$`W|__*No*cj#D1RZqp>md6>{_Z{p3%^Ayl
z1MaDVpce6UV+y`ukx>`dk-oJD_WT(y`7XC2h;D`r!v@7KczmunkYu0m+L3fy0?GU!
znM?UQ!bk@Pf`UZrCYR1NVj^xcLDNXO9gTaY87FsXM*TRiiFn6<a+ru#2egi!Du(|g
zBfkph)s($Xt<A%FV-ONPso(GK^!{e3?V};~vVns!SHuDqer4CNe&8FQ${c@*`TYt8
z)qtxA>|fzA$d3G~9(B_K!ES4px{*!PnhcnXMzxJ$j^+O4*EgY>NEkBp8T5R2IF=mr
zm#+<Uu5C=5*wx$=WV=a5%wycq)<T^|=3q9*^Kqe}0FoxHsGhxLHVfHh$s?5dmiH`H
zm0tpK(14F41fE-I5#ewMF6lJuS-%cIR7SJIXc2xlrb+ZoKmPe17RAofeX_q>e%h5`
zP%U%&^Jue@Ep$fXF_d4`_W=24|4(eKnUC+^>=oHv#2(H6T1k~N6n0`M1*9wBDr>@D
zuRU^aeACX6FN)<q|M9mAK4WvY-9{Mg3`}iFWI82W?;gdNwtM<PH;=;3&6lCXdy~ID
z?w&+@$A)Ch`M{|{TYC6dv8g(dcQ!qaC08BK^BZ+q0Ao0tyLZ0*w8Xk-Uxi52K^jDP
zdW-$Q<p8y^w9VxtrJUJ{Y(-;ECl5sKosWQ%I^uiNaM#dOUu-xv;xWZlSS7qqa^C<m
zFQvF7pikCzuL<NyQ+NLN$Vj#T=?6Grd4I3g`eS6yHvg_Kopv^bN??JMa&oJ0u9GU8
zO#A22<a$}FFpke3EpwF_IxCvC!}1NS1C)unvoxuGecpu2GNkhmG4J6Phv<nQ{(0Dv
zR~KN|%Iv-GQ0jN!+H)o{^z>ezGc*Z%PIC*qg0_$6dbd{1<6%%=-O(u|#(_R{_d?To
zQLP7aRa2(9IIhFjXhf-o3d|qnzo!2M47P7)8w*>W5GtFlW%}9~O0Q=KvP)HAvEvnn
z5{x3^r$=YXAX3Px=+gL)`vgCvfqtRi-A-29dnRjJPv_GM$Fi4)s_GMS`<oW{oXaE+
z&zbN=YWOs4V%_~Nw0eEnSg?0rOtmpkIJ~fZ5bk!UAMlLVjKofC+a6Xf9oWC8nedKR
zc!WO~EAxh8{c1rzDDxi8(jE!){G<|sTVz8ASNF6a1X`8p2YX6gGc!KF*pmSQzoA*0
z{{3<7@A8j+Vx+<y1A&yTZuqGIzb%71Yz!0ML|TL1$1xnY{Y+mBpy*CbVHH7AIcN@S
zGS%Ur%X*bN*fnkt`!_z%rk#+^eWd57Nu*!i7mObA1h(Gz;54*vAAK^T26+0uq?cZa
z*j&VwNZG+jPXmLOjcOTrpOwT4lEp?w;9Lpy;h@@flXns`4~o!6&=U$-bPc*MAIq~q
zVXxb-APAA_WY)JJY2vet(U5)J>nwgu_R!9sxJV2kmg_Vr0K8NfqjKa<fy>=l$t#(6
zD|oeDfJ`#fAAh<iT-o4<NVdHFjzAbhjINhOY$9n7q?SF0XBo1F1%qCyonTM9c<;KG
zut5Y+em+CM6TCg*#zI>(=U$&DX#iip@iB(+QKLrByTQ!6c%X;H<g3}Pb)V(QS4#})
zpZKVyqMA`})4p08_Vg~idSj@>Vn{_R5*Dn#(MY4eDMnXt=~7F2mHBu58!=UpvOyOn
zl)ijT(i-^fl>j)l<^wc(C@HT%MNp8@IbLnUncE5ooQl?qP(fvS-A8F#z(W?OMMxRW
za2~7LG24D(Rv`D@gi{~PbFRkhp~IV1d6;eRmecklq*STnm%U7en;)<7vgvhPIXpQk
zuXpXljKzGk>nX^)q$t37;p##)(zby^WR?yb=-{GK>GA1vYCy3bHEr6rIkYcM|Hx6m
zz}Z&}cN5WJ93Narr%-`1y%8j04yAa<@ti$@BP-SM)tw+uu+u!Rts#B{I3TY84*udx
zNwT08hU=@n8Q~o1c!~W~`m-NvftlzKJ$1xLC~H>kcaScQc>1{?BY&VN>eOns&E=(0
z_9$G(pBxn)3KmVT&v&UmGa*8+ogp;Uk9)wrZkML0<hM&M>k8NQN2Bj)l=1OUrSNr!
zh3Pwt@Ly-9`C5T{aMu=jl?s%E2-&bu;g#Pv&UhNz3I}>(x*nyTSKE&|ZG2(5yHzcd
zNz-R5oupy3gpJ;AiKQ8bLq1KJl?q0*m3nC}$4-hMzzjlHiwyPbjn@`gLYALiyJ&1*
zj#L%Q#;q*n;#A2sXYxR6)Yfb+KTFev%#`Z(U}aKm^a9DTh1={>N)ifw1S347@X=NP
z2kK1gd#@=`DQJE@<mmfzl~_Hx)8!nv0@QF_u|3#a-!Fc7x|KBE<+ku?%#CVW*tWp2
ztgiD|U28otcl!s`+}+&hAv^R4-nacktF&~;h&lLTD-;s9Cq&qbh(8+LN)*M5vb#4$
zMhAJfRG~Iq2KvR}S`>Gz5(X{)FaiEQx%xtVI``b7_Al$Tf4*R`D`^N1WraXw^G|ff
zU}^6E7#2q;@dz1GzJp<VX%^mt{8J?}ro`*adKu5WaV0ybah}IR6I4s^8jRhMcrxC5
zucRhhsNr#{T%IRd*S3Pk!e8lT*mLdRB&K1bX}JA~us_qI_v)Jg5!p`pNFiy%kCygI
z^C&~-WITYWt!1{z$Xw`W@>OWBcF>_g($|+N1SJ0V&%zONb8{}9ITxv*tp%InI@U9U
z6QklN5wO$`?3wd;2ggs*$SBTqW!6#?w!P3!4@sosyb_>(I!~asBzrHBdD6E^ECM*J
zrUGJ#Y%?69nQC;E!2{>tUp1DC=u}N4eiA<SCwgy?j=J0?ZcFmqA+qx^wz*Ny`l>{~
ze*2!g`p@jz95TH1KqU+2Ys4$DvG-+|knbakv>%;ahT~pP;~xw!y)7YXc_S?BejRF~
z;~x$Vj$v`jP)3L@8LIwnuHW1kT$xhAGe_Z2(3iELLFYJqH38>5C!NHqCPE@JXYlJ;
zz_G+;V`a4M$>gsFP4yX6n<}D*h}rYLgUQ4+&p)u#dD_PC5Xy3@cTCg}>|M9W&gU7g
zZLV!cJ+YPz2qGBtvvg>Vol-T@z3>oFy(L4GaVKD&Bzh33HwtNyrytKJNC^|-hYu(y
z&!{q89Bxbf8g}iHAx7j?)A)v78V+u6S(Q@?K{K2E*$uH4b)2u|4aF+4V3!w0-p&Nw
zBg2SLAK%=^Iq=QrtF2B(Zc*@1X+S2Wr%A<zXIRdl4s(`xD!Jojn26d4`HjE9S#(J2
zb0sIyIaJNJWFsS5K~uc2`)~5>9eVsg!|1{vm9k&<j%mLa14XZk6SPvt2YkK^r8gpk
zcy8b7J*6@}w559jus~w=b%y`celDHHO(>l<SLrg5dvjoTRc12Ay-PAv)L%7RsHTLt
zV;4HSi+iX)jaVC>-j5!V&E08cytDG%BGIlBtAxh6VbD02p|{~x+5W_tuT5tYqJikw
z2o)BVbYns1#dBDd%xZe-sI--bY&Y<>;9MFG10>uwi4|qMp+ly@R?B%GZv%hRrVnld
z*L_<<RVN+=MbSDMLht!yKKM9|#8R|+;a$^{)I{o!sSIB5qxboD<0I4aZJhgLmHqdS
zJ<tZ$`JI9Ka`w0JH~kQ>>|@JUGi~$UaL{(hQ15W}OMIv&?Zx<+SybHH`ol5v#%1V_
zl72j?7oiq1SR`cEI0j9;&SsE_VU$sGG$;1X|0Jj`Fi(uM*^D-xv<!MuK9mj?g@F*a
zm9!H$X~}d{N$cF|5@>l9+}t((aI?uT&dN;2%BztlM5cYeUQ)g{%)l(`&INRX>U$(v
z7!9$9efl9*c+hC+;aPLbgF{(yfL>|Q_J6bh{z<SJZR?er=jZKyV^Gc_V+jw-{c~zS
zf>4?blt?&?Uv0DSr?~sIGjBAqLj=<3FQzRFc8=(WbT%@WV<Ak&wf>IoT*xf4ORkY^
zw)ttj@O2mFOW=OwnPKRJvs6_v`2J5~GwJ}LG7AVfj>mC*?uizY-BO8Cbi$H|QdJdq
ze8{ek9kf00x3|%yIZ7ttp;(1&V^j5QMlap;Nr|#}#7InO0?P5H+gvlo7l9UmMX1^p
zhGS<ji3lK0cvuXrQbD!q!-w$aT<<6H<8cYbn&JNbaa<X0X4@92ccD}kmvW)+oR#J!
zF-~f2{syX<=3@mKR(R7nXb9U=I9;tUmwbi$5})G(FKUv9_qU}cEwbm6hDLa4Ymf%m
zSf8b28mC!Eh1ESAt-ebRUBdwBr_{R{@oC8yW)R9YB^Uj6*BfErBh&xAJJi^Ybj>X>
zYRP-|F8&v4T!}XRV|!PZ(O4$*c|Ms-<A^U2q0g|%75SY20qBX4K&W5}gmT0p8c$kk
zSagQpdRHJ4B6LOwut3WavjoI~g<#AAN23JfP^~ztSxStqxzE$Y$Wbxi_|jyK7?&1>
z-PL}}NxY*ooZe%jHc}%}YW{qwWzY1S*=hg(GCK+t8$q42+C4eq@vC;l_-}~+md7OL
zKU?E-8_6d)(wtDfE3(vwf@%57e`ME4)ZnBl4H@WEzUSyss_6|r*C!SOc>3n8YPk=e
z>vtUDlL<>1;kJ`)haDz2OUiE1Gl~99cUa!_V}a&n%`{`nnRtdGTq9dU2Eopoqm>#_
z?cWk5<8J}q323Bvy8iSS6~Nmc(u2QG#gYuXl!$Et>>!D5v)X*gRBt|xbb+29+aE4~
zpU(UpK20`5OCx>WlMS>7-#(yz-Et*EyRLXwK>>YZ2p90E!vT82&#%k?WVSp96CD)^
zxu}w_T1naTRYlzJzj*}a6P$uzWsC5wU)5~7*0J!AxP!^=o@IYv^>J@?_nZxl_j5!5
z?a^TKri<5Dq80rveV|ln|NIpKaBG141cbacsM0A^Pb(ct25T=(?%~IR3ha63k^mo$
zp(3gdpKWUe1kdP=eh|b8NQKJC^OSxhKf{d2sjc{t*fZ-w1C3dI5kF{FndT2i6w$@v
zIyrJkQ*Kt$J@cPVG`~^J7To>9S@{_cb?Tn}r#b~Em%s$Cl9Hb!a;ZT8Id(v?uk9Lb
zd~rWkmwEwZQHh8)uJM`l>pf7>uK%)5SEr;g{*J5sXwYjz%2h~7bW0*fe|S+mr>L*_
zhkzU!6J>ZDu|9r{e)-&Kv^0H-{_kr0yA8L-pfqpG<`i$QMy7QH=MkDRdAw$J>7J|Z
zE~m?eR{@P2?xl3r{ej^bBeu9KI5(9{@OoXB_NN^2mDV4+UgW}pQejG}vcO%*yI-K$
z3Kz|z&qpQDTHwrIkV6R>IH<r;O0)E{@{1@OQDl!{A`YBPJTLfP`n)DS%<z-%S}w~z
zE_W$uBNzAN?~;x&S@&xqR@#Xa&d*g8Nl7dsS{JyLLz_S0wS=W3%v%@`K!(Nl6BfOU
zzOXm><kVGT7rC4asu8vpAI?OHo^%sfFK#vzv)O&+z-(?J5i(lx{<2QXP#JIebpoVY
z+;L3tOn&{8-7AgnH+LJ%EG#_4FHfxioIM$nb@4sY$h76jq9=+rxK4Wf$2Od0K%*bn
z1}c<+qN~;JO7v@y-6Tw>qjqm;V~nL+zgM!coUu*4+w6OkSaf&)MHEHU(Ra6&d)Sqa
zM{C?$x{c?=bESE7_}oe?WM`)d|Ebr4;!?R_7<#bWu4%4=Hq@mY?hh<~BJqX}7jpOA
zo7lc<Jtt;Nb=aw-)t*#W!}i-d_#I$1iVV!fRL>w1O|C^RHXODRGAukGx8pwB+zt=v
z>@{V$;{HS|(SA=rt(DBCO@zqZLh#C!K0O^0QjOm}{_!)&c7p+_p7)bQ>f(E@Iyhfz
zcv--b@4EIpyxWH=CML(8bNJIE+4;iYECUQo)}u&nu~g|P)yd>KLr=KpI<Lw1-r<28
zIm2JZhh`Mi+A23|dR;6VWmz<G!?;xFwgXi;OMT=X+|wj#C4UQH52?_th{RPz9zm7C
zk>Vv8XyZJpu=};7S`N`~NUHqZkiOi1tlH`~l1g%-C?<cWVlW)*F>!2BWMWmsY_fVC
zk$X@4=Sq#{9%?(#YbdJ^S3O+H%!2)2>SYsPCI&GEV3H~gEY)UMMBzBJUaiv$=5-*i
zli;^~$Ye_yl!cZRPR}Fl3MHf!<^E2i!3rz>{$r^cE=NsF#kjeoZU#kFc2q25-{V{o
zx$tvs2UiU3S<1LK8OlK9&@;C$Sn#JSZ||q(4UQM0t*c}I;+BO0dK~k>b7{v84IOym
z&A{?K7hRI1rx2UokH4YNq+n6;5Bs(4!cY`wB?pGv_#2DC<@YhMD10>he(`nC0*<%%
zc68i166J?;)NjaQ1x*DNswKpUp!GTZEFhuh#fp6;VU!6OJJ92qsk9-OCw3VQ58?4Q
zWOQ7IG$Iu_7seXq^y5NF<@LFiQKi9rf+c>PaTUluh~BIhH!&b*&}AL}a%r(61XpUh
zY~dyW-@3+Lw!^a!$EQpBNKc+^$VI66Gq=z5#@jcXzcfh?K~3X`ae~@emfFTs$a4mC
z#ZD2JoM;w9o$p)0Z6Cb7^MI7NfcCo`g*cOc9OA$K%i=!QF7}_YK*RH&0>%c_8W;z2
z_M7O=&&LIVD{H!(x29eT^fA&O5~sBNPU!0W3zIQ}iRXLxkl!hkrh1*o$g5ed(ky&6
z&?F&A^sx@3rynNi>pqdqTVM&49CmOqUOn>d#<i(W323XjVoIMWI=HqXUpO9k><FCd
z(Sg`HUzw|h!ggxwUtX4P5#7L;I3nX%&5F}e2`|^Li|^txNX34^)tVcx1%ELkQ9*cU
z5Hq88)jP1;!UkJ*;m~$H+e@UEH@L<6dd{;)g%hp+;m$V0tm31;p5cEJ#dkO%5m#u8
zR|K`gxIe?)KDqoY(j#0Ed^)Mub>IDI>rPaMc<Azq|3gGzW!|D?@Is)N*@ipAM`3JL
zXWc$d!(zG?{p$1D1*1TW?p}zfu=fVDVg5o)Rh85YV>5qT2Y<j(DEFZo(hE&I288yL
z-+nfGzaR`PV}p8BVQ4yzSUUXxo{J~S8vU!lvrDA~qaO=Zi1mq*99|})jld!hb^4F5
z&`e!h`G<FtHI=sNZ3^P>z)YY-zJ2t(9Dc<YVtTg@X6GdpAUV&K%NGA8yQ>3Bmk&ke
zG!+?{e47~`evBr0WuAWSI)(fCt(*rQ{___a9|c$%+OJV~KweUdt0!)WzbueJ%7vfi
zoL6Ucb)a7>N0JWUBZ#f^D8QDyrcH0A0KO^?sapHB{#jvQzi)ZY+uBa#LR(E2)FeGc
zMWVY(Z`_m_U531Xe*A8HLrSi1KW+#Vm2*(r7Xv-sXjjWNF!#m4h|Y}s74N_wp)B+#
ztQ9LvMaZ2O8k+o)VvC3Wg|H&{O1p6=g(__&QN;S8e6ISNx^{%W7b+@Tn+VKGvIUE6
zsoSNn@z}exvJveaiIWic;Ua_fdkB~Et~26ku`=J4c8b)Su@#!@3THI?131j;qjzh=
zbW_8@lby{&$FcWkJF0dd=x=pydzMLJ@FK%LT+Cl)k$ip|nURx!N3rLyTL1N^Z#Z}R
zIOQ12Y$E>_bvE9YKN)iEu@}K5j4Jx4AqTg4KLV_>qnga0w<k^u@O%C`Apsx|mXrjb
zL7`K<c4$g0rwzn>1DoZl=m+W&)anL0!KH9JF8lUE^?zUA>p6RGmGe{I0g+Y8f7()P
z>bqwz@<>}m@jKWo%u22o>1WRcR+RWP`;v#ji5+A%!5CZ)f!JQfr3%jErj}b&&VAK6
zD?(?hkR#9$t4PfMNsG^)<k58^k%4ypX4&CUMozn)dGa%;oMu{PiiWNRA!v|wD)CFC
zREjW_f;-!qEo*FQcd{$xCFL|WStx>~6~<EO-mDzZto+-xwmA_ry^mN}Y{XX6aWax+
zm+PodM^(^Rw~4@O@)6)CV2$n-nZSKq=j?Q<VLY<&8rVVw*3K-dPOH1et=HvGQLtA?
zG}Edq=Gs1=MRozYI}k<*v`}1T-9<&TZy!&eRPBH*ntUQiW44ci6|O}F*15yo7gScy
zklKmtoH;knvqY^cmA6I1yFDe%qMk$y$facM^)vH9YL8w=RBnCR$ST`kCe=~DXSyn;
zX20yZHaW@sT(gs+fpoKgz$3fc8f`BDCT!L_OGU~8ROo;^kt^DxpW@IXYDEN;a}M(L
zmb0S++FW)&_Rl)gM2dkHQr{gd9QTBXTO$?t&_~K2R+JF+XXxpE+-B9Eq3ExR8Sz7;
z1sL@1%0#}JYOWxjpgn}6VoWD<cB^b<eaF^q&SzMCagE~xu`^HjQgk1Wp3gG;^se$@
z%rTM1uen3&hN@aIqL9l!he=ck#X=@5MPlK?9_%4y@>>pZC}Y-Kcs503-)cEX3SzQX
z+ZwQ>G-x5E;cRL-5*SQSysmy%NAO}<K4CCss6zNQYDP+07HGg0Tcm3I4&~CugZM9z
z2x$$|-%yW>?)#;vVS9Rxr?}ANo;=q<_g{dsEee~`IY#vuD}+Z}@-w-(5Mg4xsiCi5
znlC~|iR=f`wM?7qmBjsX!Le>PKS}?wW(Eu#=mdCs1O$ZQ90tK>%SN>W9H4Xw{<2Xm
zZ$V?KS3bd)^nsdvD!NLA9{hqbI1SX|JRh%JJyJ`T9wvod+5VnolIwWerjU1wC$>yZ
zofB-v?_v{KQl2>Cu10tE;Nu+_Ex9Kwb&|nD%^m$3YZ}Y%BMs{BS*a!Cec@#+lw`KU
zIB(tpOmcDwYsJ@*DPPheAX1EJWQD=EV7dnrX0uM?1um|D<YL%<0y8FvF-k3<4*4YD
z!_U}?sCa*-GV)QygbCP<in|@dT-m+;3-h+mzuNE4SnT(P6qXlHDW!HLfI#oe53&<N
zpp!!m*FPHbyPY`TRM!OY{r$OW{F6auT4hA$0%G1!u9=Q%6>Yb_p5E1;_!5qo0>^yN
z+|Lr@C$D0+7scO^zMYSy8finA9)($14p_m6Zpl0|HgYEQHlOE>BTx%J8@??)ZgMb-
zh-KS3+thg8%vA}%{TFV0quRU4VpZFoAT+Z2Yjl2y#47r6{8M;q6I>+={j&jE)Da-U
zYLn{Nm6<wzbvI09K_Ojr1PmHg^_kYpU9~UUuD+R|uYRw7q&?bY2m$1g8FW>0Zua6a
z(<;t0&&TYaa^}?r(UVIvGK_5SS-8L9XxDYmf9r>xX0SQhfw47l(jI44F=`Dorp1}|
z9etf-z_uoNgL4f}GSA8kaVV;{B%i>YJp;pz;u?4IWS$~Qk}7$7z1GBiqmCP@W%G*F
z09#l^|Be*rdj`X3L<NG*aNdNt#>AzD#jG&X&};g%;HupiQ>ubz_ehceY@OV&azhRf
zbYfYC*soVzI6+45(;f^HWu4vrew2@=tb&MiI3gyY314ouZVlR0?mivQ4bIae^N%RH
zOMjyQtaQ^^G4dqz{!EeAY1s-2jo$|S)d!vjvvruys~>o7bv0wY!uE|)c3vdlk@)(r
zOO8ZHkj$m-oDY&9O~0|bK3<#R`K!S7^22L{=RM|!)JuYTfjCP=mqRrq`lJI`a9R{F
zo}d=e$`J^**5PbCca=dzX7z<?4E!8i(noy@9B^jg0FCH!Lm#I|ZS(ID7!*dwyxDTp
zTXeYiP6Xl!BXAsI#mZ6sOn*KD>T-g9E}_QjGp29>+k$Vr=BMudjNVD%{pZe{q)u52
zk{1^@djt=xUS8dg(~e4%e$2T8mR~wd&JTB$)Q0Wz=(Fv@T=zBu3tYb9ENvNoFVvQi
zbG9A8a2Z#6HeX5@-OdhFk41Zia3Xsbx{rseBTnuQ)QG2&@%&eTS6PMB{Fi&?T=`{S
zjF5z#_Ty!?cjE5WkG2?xh@U1DZxE4e9RdPyJY|q}O+fla&i14!HNUFmI3i?XBXZgG
z2U$^8NT;*7caS$ue7ykB*Uiss1pS!o(7Ztm1T=OD@bf$H`C%kGlFBRWhFvN-ScY&t
z_7tFP8`sAz3ge22?&qbA3iTD6Z=$p{xFFfYs7$)H98PY-t3C-_W7<xZwd^U*N9cxW
zBFLV}9B36}`b9l7Jp>6EGVb{OtaD8k?wPEz9bOGIT$Kd$6$v{n>qZkFz%!g4M1(K}
z^3Z~IeG7b!*yh9$sx8xg0iJG4D)#Z|F#_f8ilv)sO8%8K?f1Jf=D=>`BMJ5$MAhGZ
zjdxXp)k3jSiK1?=lG`XrTQZcD_DAu~wD8b`G)b%Lu;TCVr$pqQ7b;MW>eb!rj+)yD
zXd6tSt9Ec$x|Y<2kxF`+nEAD-+G_;I8uk#Z%4n9gvwv|Q)<~*+Y2~zrm$-<%UgE=_
z9C-e)k-EVCa7}QeM<)W+4cH6lAxZm0YuhmjD2(J(_Ufw{8{IevbwauS*~DjyVOYTR
z^X)T!@bdx#bh=2z7hVBaD}u_{jYoClp>vSs8I4UA1}dI!QfnFz$HM7u;@+^dO&Esp
zU=#70kn-6_%OJQtJzUX`iB6;N-<J<hG~X9A6@c8~599M_?D4)`qDiKdWbAxMqFd)l
zLI4dhZyOT(>KKW<hI9~BTx`mOI$D>G*k=XNZti%_j;e97)#(HdnBTtUGnqFD^eQNy
z$*?{nVq3UA)Zd$p64LG8ayQOxEZsBA8>1t*lHQZCP$0?+ekfq8Hcjp8%bcSn=6m~?
zG8_3<ZhJoFp?7tUhZtRmxap_WCcP=!7Yr=XD(0=g=$|GnweKS0{StSuvYj8_Sarqg
z$(Md)L&5Jrk=BwT#c0G=3HDDUdIPn2eZO&U==#41!1zGEUn3>k?V>NiwjnKxFd=u`
zj@gr;8>5RKj_kXR@3>Ys;iauDDHfr*r~4acZZ(YWst@DCL*DwWK6$hCzhS~PT1gOY
zt`o$R`W(v`9leV#hc_R&7k(g9wq;strIc#5_u=7<&^bfG8Sgxt8x?N=apOqFARjwW
zc`H*|)orCQza%L^K=Z`?Y@-WFyn4GMi9k(j+gQVum-8RbLIUeKHXK0G{YaSJ6?C5)
zWF`_paPtTqy)j@mu%QBU5^B|cWam-Ms=gqG)0$H_PTTC4d`R$$BKhVA@^BM0+-nww
zL1|=51AQ&eBh+Y{<oh^x`-&}D+LI$PQ$66D-5Vgv@l+5Yg?c%~ESH=*m-#J{>#5yI
zo}~D~55tLsT_AM+=^Jx{p-vl%TyXigsmi|QEQ~889*}e|6Zw=d-K|({@v|mSfC3NS
zswo|Uo}tOY)`Wem-gU5pdm}7GssO)Bs~Gsn;wO5(i!aW6P&50<7D2ZfjNk}?BKVJx
zxHsDnFOEJ}cezfOW`cx0P{7nkeHYDm^}b+Fuldyw<$_{G%TjSa2O)cCoIZw?l@pM@
zyL-`NveD}Rdlvp4r8~K_WT40DdeHKlWtD+E*kbOp@r6#a6!Xfb-kz8;KLW%DbiK@z
z;3ktpj-kcU$sc4dSHlwB%d3HtPdM753bZDHl2&zHd`ItCyPzUWJ~K1DeSOc3i7TcZ
zA(@UdIdI0~K^i^Z#Fi#4)V0YF(}YBNy)S<E1AcR7J)#_VOJI@K`NkNTZ{}kTc#M#G
zK*3<O!wAlg+*d!#<Q3l?d!{s3vx#Nr_I>1=AT8O8<Dtd3X`76qowJgeb1mEoH(~yE
zq!lPj=;-2S$8}HWTU8@Qf5ye1e7mXAX5{Uo)a1wW)!kT|{F#6Kar@Z6uX8}>R_nO+
zsh}PMJVC5e#l<pz>2B3m8!@o*rhsGFipG~xQcFNo!CmYn))8v8hb!jvUC9i1ck_$(
zEfER|O3QmSR^jdl!aMEcbuyJ_>?K6P1jO;Yh3}p>!1Dg$wK5$#L0>>HJVy}zkE<31
z6!XgwvIIu@P!73cE>B9X*?*wPhFe|=uUjmmc()*ate$oQ1-SP^LP`_w4f+VMc5(+7
zy}7c9rv@eSFv-r?=^BGAKfimF%v+it;(XvZu&Yzxm>TZQ=nzTLDm%~p9b~1($_4p8
z_G>Dw>QXWxR?i`Cu6uAS-qmK<(nh;CLw{m>oKxm@js4eeUb9iB@yKc{sQ+??b3&YH
zt^m258{e$h5@|Lk-p^_MRV#!AMctiZeeiX66g0MEJEhOstHP}^BO@Up4dKBQcl~Mm
z$w+}S&k3T#0$?2=8NJa;D&r@b4lMLjq2V)Nc;M?dI+4^~@8?W92xAn!m;f2KL^+^V
zvs(I-G#?FXcye2#-h5Udx1F<57{L@75lEkaX)i`rc8MV)gUkDQof6iVaX(9c>KXq&
zI2pk^ox<yAgSg^4LJbCWc0+*S3_7P~|8yq(zyxSgYj(D&@`B^a02|)D+GQHYLw87V
z8X~_ADmhyI6w519=RqSA4yo32==`8W*NeR_Q_>p`9_}Rx7=>yNnHpWIeNvWqmmhSv
z%EEXUChBoC_vnxhkCX%c@G%w;%X<II*R3`W4hXa9`;qLKA0xzaX*?Q$L!DnZvb;$H
zh0+s%K~}j50<I=FY_3XN0ZIf*(uAQCD~Q!rN*evSetouQcf9)Y;3(#56vnqf1t~(M
zRY34tSn@6*l3s)`9A-U205hAEN4m~o|FH)7x{Zy^bfBMK`PQ9copZ8*u)x_?Uo@5I
zPy(~>rwxji;Nw0{fN09DMblrB2j>|pHAc}rKRGA}t=p0fZ?R|%I<^PJQ5sI~w&?7b
z?aRYJHQY0^lcZ$blWll<p@-Jy1HurXk@z`R$GIvPhu!$@%)_TDy%qS!mXfJ7t61n6
z%#g`)CH|zxcB$Zi2Q}g~_NfHac!Vioqlq1!h!2=%vGifUL3b58<evYu$oj^gLqI<x
zR4;H{5IfYcqt134?}H^=CQB&pTis!u`+y~QdGG<hq7SN7uov-z<?m{Sf`c>w5;5z8
zEe*>)3>6ephgeazGS<_TT?1F!;-|WsOiv50jTp(`F}x=w&Ob`_@>JMRa6d&ozsb<s
z)+Rvmz{<<#!1f@wXyz|dQxHX|29*@AA%fS><KdH3iwk_cea3+gG0LP0b(LmGeYv2c
zz*R@%7&Un%rD@S?i(C9OGJZ-{R=bUAlZkO%A3G-_BP02k3ItZHLg*lc&-Nc)pmHlQ
zLXaxRv(dDT6U^a8a`)bW!N)jzw=Gy$i5~hb{OCGw2B6^G+B3Ofux<w!q*|4!|B)9F
zRus@#3?XCrSw_XwNL=Hy0)dsyDtJW<QYs3T>lIo#63=_W-r$#7kzq;;Rjtx#zO8g1
zmF0j<_{0qqlzdm}ygQzwrmL&_PPhL+oiC7y5>W^e5KSp{8#=<7{@9Vvj};i3p?XGe
zKXTurMB@3fDIQ-*CQ-KO<dLP}L=r)QhxlY1makOp(a=!SKwkh#D;CBa4-t({?#;=F
zurD4Qx+~3MI``(cSfF+4m%x$)5aQ*fWcxaiX%{++O8qVDU}W!+9}YGe8Z%J`UwQx$
z$Y^kJJJt33Hw6WS8>n3pf4lhyD&0qU0m1hth49<S;nj_u#i~}zh?S;{7+xcgg=5r4
zBtCE)bSz$7XnccXAvYVE4`cTCa`}t>&bjkIK{AYG4GRptT=kP_(@%{x_4M?Ce2H9v
z40w}6@fYHjA&7`ssm89amfe<Yn4__qp|40`h?sjmp?SY{-ta|1*JsxriV3-wby=e7
zq=A-qL~mc7{+HYGvNs`KEyiGBZpybpI1ANKh-_9B@iRL5?5$*C$+l$7m7gV~Z}i7p
z_B0!GS{D*QsE!D1)FcJm<7vy<cxHCx3Oduc$nio~Z|AE-udycDc8FK=M&<86<X&%<
z(a$(RmHhOG>|@MTkS#w5b((O``#ZV8UVs`P`a`6e&CZNDV$&P^V?({cYBWPE#C6te
zs#pY+Ji~ltY@w7ZR5HDG6HY`w1$DQC4FN310qQ^C_I1>~NqKt;sDu8O$@&kiRfZO%
zz?+kuJ?jTr?}>cEYWeowh7Z6_prvR}{_^_(bl9<EgzO^RSu?aXNgZu%RZRr+-_Ut<
zar)QjcYI&PvWRk6dNHv?%kXuBSmaSc{o`8Ejztqs!^v*tIf2F^WJe}a9?c!SIEgU2
z!&Lg<F|#`Vq}5(_LZ5UR4$qjg7c$6d5F_9s1A8>T4u>VQGs2pP#paI`-04;vyNS5=
z|Mk_9t;E$;Z;)_MU$;pBN8Yz56#FB--#TUvEG{nYJU%|Yv%(4C`bADjxr`hQzF&ZP
ziQ^zI{(DQOY5|i2Wq`OjXt<2J&%oW=E2{7P3FkVZvd9)H5FkFeJqUgGy-9^;e;NWX
zgktAJZcu;Ik%OAR3c07RHDq)Ra{f|H<a_t8pWD5q=z&h$6Xc~aofoQXO39($UQgwv
zA*YMURWD}M!EC!I3-#_PRY*%z6{6X*!?UO@nJ`#H>3&;AEG<=$x&y+VXK`mpv1MI6
zURD^ulzw+yWaw$sjWuEZrfN5tgVN!%)agG^;XlW9{0Z)u3L|F=04tfgVu(Q{eXqaK
z8%3OcPgD9zelhxoTOhToAkyb&>;Nm+`W2s)1e-DxNDW7qFop=e9#Coi@oJA{Ku<gb
z#%*X(GegMVJX<?%l3)>c=BL=xsyxZa)oc&|FE&AdP|*_$Z!G*eeYO-JK<jz?tCKBc
zrb|RtJvL8fUv#2l$CJ!YxMhFfEw{{p#FKIz^C64>_?Z)8rl=b$z096yhYr!uBW1$g
zD>WVz*09{eTqc)pgWOc>jA~?OeJ+Q=qM^=hrlU*UuQ*K>wQ^@nA1UHO(${m^O}<M4
z+!Yw%9_`4lDPTQpC6<>@2PCMXvH#0xN6>W+&2usii=xAVgDXsiHikzo$5S0jO-xM8
zX-LD*NE%I$S3oEx1JEb@w{R=*Bm1IieD#S8oHJX^y)6REJ_(7AU0VG;wkxXDPl~s&
zzYt=wM{pgfEF!Ymj2onDz*1O1{?~yj1%dsD%<@=44k$1DcV<dqJ<lDDn;c(~NzSiB
zTxlZm%dCb~ez2tUYkNFU=xtc)mk}}x-wL_4qwc@k0S&&WTYMWR&BhrH`afx4FH)Cc
zMTVFhN-k@HR*<R!9cp&4fT7Zv!<s=DXm_oP3(=^24-vIXwo+Ad`vcE$(EnxdLap%#
zl8LOltXp7LG1L~Zm6;Ly{ur8`AN*3A!mdViNka15|2oYSud&=;EoEX`#7=;)zw&6q
zfMxqQtXNcU=<G{Rf1D8ZtEV!*+||?atn?{l`}+6AoU9U`QtN^%$3Z*ulNjfj{Z(rD
z=m0jFJ&pKW0Tns1crdrro&@`!)xl@a`mHXjF=F&P0`pik0R}Ih2Q1<EWL1Pzt|O^N
zMbgJAgy{dGqBYs19G<D@TNld9+<IE#Z4{xPtoyCm%C*34VW7?5Z}#LDGS3&>Q;Qu^
z^IvOrgFO4zNFv_b=E=#)Ug!|jFqS{gx97WksH4gK^bNT-oBeDcu!uc{43I<kIo3*6
z4V^skgcC_xer%yt6-<Xxm@-}2s_B}Bsxo5ePzPQt9r57#1mt|!wFYushCd`InSgnm
zVF~*_sXA~jd>%W-jqY)%C`4(K<O1N2?P(+loox!b^$t#KaoW1seku&~V2qGy&Kz6M
z*UafwKr!QBc3`lWnStDV%F(^|-;&)c?zghm14)mo+NUPX^C8YTMlfhg|GzgF70yFY
z2f7Y(hk2&-0|KbORUjQqqJWMwE|1}gS9rmkBY-frrUz$UmaA5BooZdWM3A?0iJ>Be
z08F!y%hqkOsw`Yb1CN0mv1tfnncy6RHNU+3`uM9i4CQh@5v%E+3|0LZm9Z(4q!MOG
z?nhfEJanOVV@C{uSgi0$A3wDg=noNK9D4VGO0(MMe>?5vOLMsApiFstZMRqZcf(Oz
zXmVa^J|D`F7Cr>>Ux#uZD?nlbPUE!QA_N*EZ)k;voSd9Em?1p6xEm}}h?4wxWEYRf
z3_GBQJC=un$7>!~GSW;S*zK{h5AYBSLu5DijdugBnGovZX>(<7$v-B!$Rx=0ASThf
zRT2cQK!yV@NUFWP42qf$piG}{V-Y2#FvlOg)<7(cWz;G<9u)*}-iVr5CidRg-bd9y
zX^bJ#b9l*8&3X4=rJ3+dkqo0-n2SfvYaMUpS62t)t?IGGU!fby3)@JzpqNIr{l6DV
zzzR)gC=rcJgj{#3%6Zd?59j_*DC016_+nvJ{Gkuxk76kI2HQeG(vI8GWGDE^`oM7u
z@$%h(8^sHKE&*7z+>&Qq-I-Y1{ELZ?Eu_7rLT%mNQ#}Xi6WTcEu9i{5o$?PD2<J3%
z5!S}m&B<kwy5v5!pzJu`giq0LbVnu9RFoxDjKdJ(K3TgA;hM>#HiOxHH`0MkM;YS3
z0cM=GRqrr%fsf`kAIhcDnLlJ#6qS1-hrwUablTeNk5%7F`}jlkKY$CmuxmWn>EX*7
zPG=V4syUt+y$UF+C5dq^Ps_#S0vHGL?#j=<rt%iJTLBIi;cyhO&7UJDPxXnz`FoAI
zL!pO2U4138Hp%K4ANcj6)L7J}F#-VdiI6=Bn-kIEo`(yDw7Wppy`Z92N>95UO2<)7
ze7E2u7S_yR0kHd<Ztg6cIM+iQ{=Ao6Qbs>TH0E;1mfh)_)egDd|L_oOtrvKegMaQ`
zLGsr53Jfvvrxor{rsHHa9i$T{;0>)Ob=LrZLL&aj_52Trgsus|;{7d4M37B?ta*`*
zgqgX*fuu}zni5Lgtzs}i7jUmQ@b)46WdXRJ4IwPrX%sMNAa`5$Ve-2_<Z1gNXo~$~
zPpia6yR!x^ztTA)c4Ru%qH|Mfk*j620=tJ?d0pi=wyS=0n&)0+5T4c7rg9mrApy9c
zjxUY!BFmEs>fg_A-1zK~HT@tJGaV)8f~8(vN{2as!o_B6cX&;L1}mdAp?>dz%I&qh
zEwU1(z@fq~U1R2hM;&5))hpAmPu{q!#zPybNllW-_lYoOgF(|_+S)>p#{ariZV>_`
zw^KOm-xt}LEblQo0$xk0eEr(Xk@kua<J1|>`?_WAY}C*P8DW+^m|*$`9I5On;4_E(
z>5PC8>MB6Ie|s><gB7Fw2R<$%;KNk9s?ALA<2g$jczbkWoj<q68q4H^^n!3YI`0Q>
z7r_N`p8-WLCWE$wIaZg}3(SDVj9k=0AG=suuvAhupm3QafGzV#3e|<RM+K2&2a>gC
zEMFB&n;{d}jyvc`vL_%IfNi!=pg(A6Bj@?nH+RHg(2?+Tpi-7{;PXg;>Y3?7SkjjP
z(esS9HPUJvpPYxC3%I>sUIM%P2`hF}RC#f9c(~{yNfiO{Oi#s&l@i8hD8td;JiTk1
zp?Q%1I{Fuo4HpjM(SYo&6W|F(iy(f^SH&P;P;wYva*e9=x<JwEu_s((9XZs~aCtjP
z%C>xSL3hS-bN|D_5B*Ca@vT2R6hNR9xm+m4{?Pj$rejpCZ!(_S^${8bgaY%vWJT}B
z`OKH+N<RQitw?>5b@>Y5r#4xpQ@xKVjT{p#JY);&&_&Q7^baOz)zC73?!f8ox0(6(
zpA^{WeyRy%y}pbunEB0l-Vv+h?e0gcC-KVA9~$(^mSz`cQ3G|(a?HM+bK7iu?X6)9
z9Lo|Nu>1foEJrE}07z4uxH3Bn-&Ck?JTWk$b3%jPR(Lq*P(l?yupd#Kl+tR$J=^<4
z3B{Lv89_xI!cD8gWQl?U;+PC?xVAE;H(?^clFGus&i?(xZazCYS)|O!_I_vPNxf`1
z*Eb@L%NC<rifxQdbaQwsFi_4>fL4L&$48jkp*fGU1g42k{d=M%!a;JXOdVfpYmVC}
zlU7}DtG^owCj}DfzYezwh-dsae*51*!F!bqW(CwSWOUYqI=Tu*UwXR&`qjPxS<(g{
zqeWpsk8ri@<?*toXE|!6&^cGaviu+cg)-2@?EEjwC#TX82;4)RlP-13(T55~`vBFd
zTK=Ovldif1RE*d*r+}3?!ZqPH#6Q`=>ZJ)bABXGuR8ldZp=K&5hUkC4;@MD6(SZgn
zc&MER-a1>tHF`i(%C%YJqpyll3cy(e!0A(fdz8&8I{=B@<=*I$WlRT}VoQV40V<`<
z#Z@Z6n3xfWo@(;@H)2hnTOv}w$P{ac&1oG#C9j6_1j)KTzl%oSiOqP(=V8~$7(Div
zEEJ}{#Ul<%le$jm9Ja|I&v{|z{)5X=;k*S%czNAfJ$mK`RfT13-R0!QUwM}^sQN;0
zdfOL)?EW_UrU(0)DW(%oyJ-Hfv%PvA&VM2+I&WweRKX3C*NA1}kf{he)IS$ed=55Z
zYXVs_93sv7bE_m=CtJgaRQd~ep|u)?UMC}zo6!y+@WDaN?7FnBSOtjr9`-sF;sy54
z;34uLKH!D6GWtS5U?Z%VYnF&-U(+fN{pD_XpYdi6V9=JNwQL--Pn-bBO)|npD2G0&
zX6x<pV7Paqf@;!#d8U6hJY5zJ7Z<loL_~xyIAoI~yFjzfW)qgfqhT~a?!`V<$)1ng
z-M~~36i%zMi`wa$qQb0QMii7rS}DJ;jmj_32tT&eIy`avVOVI<r<afFZPEZ#%ctVU
zj#LR<)}6v&;u;(@o3~5?rOyMmXSWBhRy?(fJro+59e0I+065qaP7I*(w`mf<e@tNI
z11gfoCoT7jsVXQt)EMA44ppd;Fd5#I1#Xa`B0b^XVAJFBNwgYIEP$!-1u*pglA*PT
z@_6+|4AhfgW6t^w%>+%L$&lK(diW-;WIw(hBNSbx6Sx6d6Z}~T5)0Z!T@fYl-=Qw|
z3WDd@d>iy3+4{2#G#-mY^SWgZBxp2=S=X13-u(|5@W0@~3)SlD>L3JEOk?}w<0`vF
z&gtgB(}J!c%?qNx#(_l}jyZt8!R@H>dnyyk{f4z^hBk+!$@^Jp;vUW#_77rE2^t2f
zD_K3d%H1Y%r*u#j>U*;0u--)G;-ND@<~Qm{No-7BRe|$dqtG$;y`jhOX=-O75@Vp<
z-^CmHPT9^j!75|beAHu7xPpQLO^(~q7#~yGBIUA(63qsOI{~$Gsx&wBl|scL{09rE
zM=ndxb-J^ENCEFvlF<TrDvTZh?Zflk3D9xn8n8whXskuRWoHa;&rpCmiUEv!diPMj
zxyX)yW8reQCHcxsiB1KlB-j(XvJo?8<g-Brf?5Ag2c^Mr5?t91;#-bLkvUM14C&vA
zDfLL(=}c#__%i8rYb&7?Okq>Iw07d!XD+)@(93<;4LXe7{14MA@nTb(#537KQV!hM
z*hr|XstQm2bnlSSNEz{13*(vctHnC*=HFx*Dv%Y<sLC06y4rUJeofL`MFr=`MC_lv
z=JEErCEYu#UP{x!PLvkinEZB)27iI3+RzPYHR@CO+Bwf6;$362M}pU6-`5UV7YPrc
z9x=A{-hQcwU_^!TOz}4E!{ux#&)*mV5fHmhhA``k#J+li?Dzgm&X9wjoa8EmSc?@>
z`9Jpt6-e2F=d?EJ6VDS+h=>9^6dfEKtXyOZrTyasm>w*RXXtyH#RU97&CZ&OkSSc&
zx55T1tn;lizsdBH*)V#VP~%CAc21{u`8lq!D)RbAv(2ZAvp<19b7a+#*iZv_NOjIQ
z$oRlKg47oNUMpzVJ6VyYhbt}o<42SX7z_?@Hf5OHHn_6Sk;Kb|vGQ&GZM}}Y{_C)C
zF?WrWlyrS@M<ns(ry;RkRchE>Tr{|Y^l<FyQ&2f$`;xmqJh;~Ie=LG%HGF;N_BsXY
z;csf}4b5+Z;CVkJk9q$errtU#>!$l2rj$}bN=iVaySovjyG!z-Q@R_K5a~uzy1QGt
zyBq25cn9z2yMFKgE*IC#XU^=i<4mN|bCF-MFk#*rQh2U|WP*Y>N&2r7*YF;}_?&xB
zuy~k~f`>7}=n^mf`xDS$_$7$n-Vg4yiEy&OX|n8BB-q}Ml<IfnNXIkk7c{3P!*lq4
zzF$T-q&b`~fD|Di`U<7f#g+|yrutjwO0A+oz3SsRi-}pAyk{oDm0X%oRuO_q+8ln(
zv}W3Q7$Qdg=f{Bp==VN4bdzR{sUv8U5c5NdOM#}eas+ow8qIFAL{Q;zb6O4=PIaX8
zk&u&fR?jhpIzy|pUaVK7Q7fkPJX!mgd1PqiIL{Q(U^!b}j>e4}-sWlV-4jk|7`E)8
zf3(zuzU+CW1FGuK+v;g{Tc@Tu_P{|ot(p(v0J^=cFrUny2@QRfo0=EjmLrp7pjxP|
zu+<)jTy24gho|}5h95GEHQBUw*eJD@_$U7VRSR#lw>(b!4Do;Cc(oABq>uT|^1TPN
zO{ks)>NmE#;!dUQCz*~M?b;fQKP?%z<sy5UO%JD|PfXzN!tBI3JIDt)kCPDONKW}G
zQOgk5JUZDzcvCt2pHV%o&70U}zeFgHp%0t4$+)-^#6~ATT0pF{ny=p68O^l7y-BVE
z9G<2&D5Hd-Ip+s8%dyWz!rf|iuFAr&!RcVGQ7jBECDmo^D|tsSYRhG!Me*Nr&?`+%
zP3?irKHHrvaF&#i___~}O~E%Tnx%I1o_oGjidI(9!GhxY{B>l8#t~b<QC*;Muhhyq
zIXR`e2|S!D{Y|p2&%?5*7I?f~zp(Q-`#~atH;^XK@?LG@6d(QCWoP7<+H{^$F6@#)
z;c~s>UQ$9^B&oQiaN=ZvT2Hn4<Zie7QPb6JoUU7*K6puw=XrG8WNivRXM?V_+RXjA
zr^RrZ>sCU&OU0qvLt>TV#EF(olS<59zJsURw2m_yp@5f%j1~WI5qOM@Lzt(l)AA-!
z;BLPn>?I1pOqOPqdCl=P?SMTdxoom}req9t3V0xHp1|+Y?P72G=y*f~m5tl^Fj=H6
zwqpO=ny$j0Eo3Rb=>ONiIKcT$g7A7u&}yABpWq_kqf$~x$*cSV1xp`o;QB8vY8<}-
z1)#y8`z*zGzbm9a?7LY6eOi^C`buPe|8$x?7qVUDDzR)gDg8(7BDz*48>lj$EtXE~
zknsE2^gr34GmW#AFeX{SgXXP0#h>DGDf`6NT0iAG0hqE@Iv7T(_){>F=}Zb0zLKys
zsBg6+II};NE~!@&lVicQ<D(T<rQ+IrrI~}`<}d5F0q<qfydQ5`n(t5hiy|W<*^9n{
zn6K!f=hb2nv2q>f=T;v>Z6d2#BP#v%mhnc{Gj<_P_weD1Uvg>uuUDhUU*gfX2%;I?
z@k3<d;E6m+|0>DxsXY0k^3N1mMa9NqBl!-n#XC%aB~%k}5YdtzQAIR_mIsY2OZsLR
z9!y^Hz<@Lc9AX&!YrHRXec?iA4zH7kkL33r&d24}nN%GRQe=g8Y$Y6b`rK5K+P89d
z&Bm{m+)GY@3ql@IlXmZXZ*=PH{2F}Hqv{TOV5cvxV?or<|JOl<3*hBx1smS^#o1~X
zEMKgj?@gDiGPK+)zMH;RL+GTp<?>FJOJvOxQ&d#U1b1T2-}&11;@^i@&Ra!#!T&!|
zL%0<ky76-*G6VJl&Sr!I;dQG1L4s;XHN<Er>R+9GzH+2i@RJ)e2W?lc(I;fY8~Xy;
zBY*enN!DPFZAnBE(PxD>UsD?$o3zSXL9@RtBv48HzD>KcT75ZFsxKmUON!GIcblps
zuu5^i@ih4`EO7H=cl_ipvy)Ar-`(3cN;FK8rT(IbjfPdcFjqFEyvTRD+~vf;qC~gp
z(RZqIiApJFLkm1zb5P{4H87i{)6!+Yv~jYPWTV;OxEC9N%6`Z~?4n`OiAvs8I8rac
z17r#3TYCh{LbQE#XHei2!&-oO)k0fa8wHLd`26A~xnC-d-paS>X!&B(`{5!Es~y1t
z6hx&~;$@e`2Irb~-jB^U4duF!Ewu`hK`C=SUU8ESSG3rT_2ErzHLbLWHaGlALxQQ6
zvmu@c;RbR!uHmpF5LCG_Nof|~hlkL>X{aJJdX2@dztf0t(ij*v<v(h9dW^-U(@Y*c
z+d4bj9?mSdKZD;+@hI83gN4UOJ-E*2Tc$xv*0|9fjU(dzvIF0&2C7tGM*q6q130G|
zR|oS~na%uZA_&HTE9k*$Tib1ChQ4WOGnHleJ6mUFz7Gy*r;UF{&svThjvuxouJ-b>
zwp2Z%pHOd}DD#J%Z|-A`h?;AzajiQ;(9L!m-jRDRE=K%3%7nV*|2Z6-@#N8Zgj2o$
zq3s2X7#<=$Yg|5=v=7;n4JnG~Ld1^cN{!r&%q&tM>*u0`^}vZyoFnmWiV)X<q8*w#
zm((NvF8oJ&Cz46P3fhW7C4H{0DN>++QV%DVXhx%iftlfLL5MP_)b9x5Z4zM=9LeWG
zl=-IQQTY0}L)c}7%(Qc3)XVX_>CaT>O2^0fkH31*5Jnw_`D)b6Y)$bydwMY7s49(m
z-xzOA=!Vu{K9#5Kpp2-IOD7VJ8Dp@TAqc<pqpJ2Vl*wdhIjrA9NV>nZtR_iih+sj|
zv=KTRcX}CRb3S7j!3G;l$XJv^NBz?fp%_6pl0__~1CCIr;cI$cbWbDxfZwF=)F|{8
z1T!NulQ7ZSu@hzbqLSwdNqReSw8^gya@qtlNP35Pv(#Qi9&f`kQ5^p5TlsVh?Ehf_
zaQD?pK@AHpoYPHu%~xC9ze-Z8(!%=BXH6*t)7@ezWh*Si&1x}3Am}j{cu_HUdJ%Gl
z2H!9utMUK~Kkks(H=b)lysp`4ed{MY<jOxxx>Z)7`zguqn^wj#7@V=xYp@#q4B2=;
zK)DxSjCRquV0cpAYfGP&@|e-XeG7zY(a@uX#X2EIgP91<RP1j!YVKd|uO7*gh?)YQ
z{Gxob$Bkt?=3Yq6lNYs0ezaA^LkFrrAzV=c9@?Cx^2G%evi8&Udc>b1t(sUrqPSY1
zPV2@bCf=r0>k63+rq9|U&lUN8Znu_{p2zmXPwF%Pl&`lv|DB9)UOqN>nM6IWC^sCL
z#%8JUX7HHlObwjAvzvB*c5#`3>_+CY?AwW)z@nPcld8@AyBA)Dn(ahGdDFV?gJgNw
zcgZ*40?uDyag1{E+Ku-dTJSg<%w0v@N$(pEFFHM^y36qOq28?+=ASR_n~_5sXtru4
z_y48AZJMs!dT$}ZW07KR7>x}ENV=Tq+Ci4k$Q{`d8**Lm$%}Sjo6|NpBK=76p1;OI
zw#=jIe};+N;P>0Yt^IH_B`=-7LXy(&y*pTxH5%b6P>0{x4L}d`!S;mWt`|?@9SRHk
zpdnm7g+bMYLRY+s#Jk(hrVTXn4*TP@u4nIn*fVmNFFZFT%r))6mi!SmHM2&Te-QV}
z{&ASR&1>oh1mDjjKEcXfrZ{OVZT4-(Df}N-!g&rqwRJPLAn0QZ-QR4dMXs){Vq1nl
zPDJ&)&uFD}V?d*u6oGp!NEJaea})4Z`|9qfg@MIls?dBt7|x`Bj6}N*st8l$^^0N~
z%ycevXy}g!()nzR)Z4uA`robRD*u{^Zx(L>b>1V%DIYbOYLGw1Sgxr|*Bw#^j~2_u
z>3V7eC#@en85c9bb@$iTDaK04cf0ipp;&>k@*90U4~0#s!-eiicjKZ?N##ES101#Q
zdIz2!ggtn{=DiXRWt*M_vqt}!W|8VR)qS9j-5&LKGKfMpB2V=TYFLf9=Th+aYPc13
z84dxGV@sGtntsCLzB`d;Eg-^$C-t8Qc=!fwHMC(vvRLOs(Bqp(8E@W@o#Gz`TyvKq
zMszrtt?LWlI9q?0{%9npT-N+VsP^q6=Q<Ky9u1uK2dFcNj*)=O|5_NR`yW~S0O*u4
zs!Y9T=7|N!Pi+$%LJN$}W94*QyEmGoCz6~M02g1k`NI>9$VP1zL<l^(cv`N+`O6zG
z^Bgr+@Q{;tUwZg>k*>}(hgW+E^?;E8*Nf)m?UqwSzkdBf2_!F^RFW6?sIfJ8VZQy9
zZQ2JlPTf`;8Cspjw%~pvO0NFw=eKIDh=}179vnP;e2eXr$Vh$nJGS@l+gC5_SbuZO
z*146qFx!alQPDna#_1I!4OZ>~VS3u)GtN|W`sxHV4b5KKvr~YovRwIJuZPLv8~tmL
zCtjpzy=-xT{AeT&>%-@V3Z%Y#LDTV30s~p@m<h`;vU%3V_D(v1%_0VfuQahbn@Qy~
z=3LKHo=O3>mQ!B1Ixe{~JPBcL5g1h^4w43bM~Qc*-@*X9;o98BU&9Ec9-t1M)5`co
zg}lLoF++ftL}s?1erok+o1)=~i_$roFdQazUQ8To;SWc>+#;4_9aAGKI*hN(=ZW76
zCh<Lpi2Eviy8%8nZ$d&tD}~>R2t3_aSoz79MI6ml73grnE;Cb17wh!%uk6$;dwn;d
z!?;w*)OkI?qVs}h&UmE94V#>@EwnnkqUU0FvY|7Phc@2VNjfp}&!cGa!T9_|W1TKr
z4Y)Of2UGbPrnb|(n-d;#SHk*IFvz8Ud#`mO=IR;w$$$U+?)0=ces>0$YM|?5M`OxE
zVnrQSGb-_?H5@Z-q$Q*6-KMqPMjcHfYh&zz&#)#qhnp9+2)unU{I&cw>z4}=G*;Qg
z&kb)nCE`7>ipr6*-$igmt8U>8V6v9yy{=iUUJw^z%a%rfd*cf_cqtby;{Z*J)L**o
z-({;I7BT-h#?wDMa|m%89!8cua4zd#_zjdqv6GVpzp$4=i9Nx35uHxGFE2rS@I(u2
zj09{_4oUto49XVq9B)CMv<^4H_9mL%_@=vkej3pgSv#&=h@=)tItdhHyrk`1fqh`%
z_^I}PeR~qSm6IUP76L0eLL$BK%H7vN|2Y_Gwd_s6E;t=7;J!FB*;JUOuT^kd@g{$4
zgD1N0`9sx^pDCyD<1;2Fk)O|*p7%r5@S~|Cq)E(Ta;vNw&Ew2cVQb?WZ>8d-GhN7k
z*K7~#%PVi{1khq`;+Vbi=kde8^zAU&U5V@Qk}K_g)h!uJ%rIn|4#T{tzg^_s73GRk
z`XTsZA(Mamm^QS<+uJ^x63%R)?u%M4>z-_fyI#H+(dQms-X0F6aqL5}M1x8H3GPLx
zC-#rp0FL^Y7+-hC|C<G1a7ZL){MZz~CRSMUk$;Pqo^(RNcix_8A+9fq)fTZ-?{&Cu
zgYdR$wZ(Mt>~E0zax!cD6TPD?m522kZo`{YzNFB*xF1ku0%~gl6((D1d+(8Xu6r`s
z^%|(!txQOq#2Ser4@N4cq&(pj+;xZq37v==&=iPn-><*yGz&3yK&BzxEK`qXq2!Pw
zqb;m^X-Bx05~7xR8bZBxFCzNLbO*O)Cu;NJz)d;reX_y_;3;|crZq{Fz<%VlI3mCU
z;Pl=dHZT~UZ4GW7ll~g00(TusB8{t;Xb0DWL_E)Ty(Txu8+x7ktexB*euGlq<lM0Q
ziZ6c^lMY-RixoM=2mM+p#8NEANkoF~UqtCvt-61_<0~mr%;-5RtSLo|qgJ8TDv3P5
z-79&5-ok2mxYW7`RsPI@W}Z#N@4<Ns=(-zLbjU!w!V5h4`@(33(qox~`!O8ZB=#ci
zVLCvX3_GZRI~vvNdNsIeM0L4yD>K*=W0OWOh-bgiwWOBECbJ4NHqN3iR8Mrx$GhNo
znt=%Wo})(B#k#2KFkY0#1&-<;$5t!5B5vBy84&34lEpYhd9U<k%I|>|{!B$w>70<g
zdWBE3`9?^uEB@``I=8mki$njrHU8%y6!8y4TJ2i22x+d}E@UsWb+g7$JC*+lVm?o3
zKoGl3o4wnX)<Edcq?wz6-fnVqRkAG><{;n={bN&8`gwzKjVPpnpi}XWnr|R2fwJ4p
z{;6K72u^2sNa5Ero;8wA>W~`JocHFiaVOe1BtxP`73K}-SEQ#oLa?2C=q;k>CeIuT
z=ekV$n40{v3Co)_Xietiuh45@Yz8>V5UBgFU-Cik{DJ-00=#pcW-<T>1+O<r2Ut$~
zGYZ%yL+8tbaI1rPKtP`0blQ*Cu~-LfC*l#{4x*kOj=b;3J~od4u8M_5wXn1W0S3CL
zX{hOHDQT|R^KN~Fpw151$6Vn!-7ojQ22h2}UG|aLt_-;?CYX45c(8lb9=ZO_j>9W^
zDW&fTTHMz-00Ow8hnwtCr_%n$<#YQ|#L#@5fy8eZfp=lI+8$U6TEwMB0GIEy1{S=4
zmtt}m0IB+t=G$G__?f<OOhW=cK?=Al5f;<EnLr`;tHUrKs1h#LQiC(rKYVt<DI7;w
z&f8X%*pSooqH~6c3O*JC(yhbJ&d%Z&RqOV}aa!<$RtL!D?~R|y9$y2Jku0kUj(tB|
z(>k%mN8T>}bG*Rl0)?Gx?chCo{C&y!T?f4d$gi+$KRt}kEqH5M>!)M{fLc;>Y1WK#
z7wY&m$}?5MP%OPf3IiNu==^5;#?NjC-FL$E<LI3J4qor6Y56nEYC6B(&OKH(o0?==
zX^BqNG9k_X=*<~3fB%Rdxx8x`>;gARX+-Mq>ix9M*K3%y^$;vHEmCXob0@I3CHwDP
z-C6(2A;956UvdDF(QiMa-`+DEiv18V=S9Jnqb0sH+pQs<Bk8~&S-eU3{@5Rz<1ol&
z5-V#I!ElP*>5^r1lVQzvOwBz2AQoFS+0k|_4W2&SzA(gaBf6i6-mG7E*=&5xv+(Zo
zn#Fcz1b;n|U!v-3JXX_w{70EYRx{5YLgz|9@{dj?CgcLn(VuJ~O^3429$S7S5}B7C
zKeP{|iEO~}MKPW4eCCJv;7E#Es}=!N$s~4}RRqd}l$a-&bVH8eS9bvXQHA~%3(bZ-
z;T+)p%^bhKIkTYEZFJ!)CKKS<Li8YN?HS>osuxO<<yicpM1!C(x1wuy(+Z19;5$s<
z<x8Pw`@daIu_2K)2?d-8M!r{8{<`;S7&Pq5nZS7JG^HU_L_~cndo2DwtCFGr%yrQS
z|FB3Y7c-m7gw<2V@9Kxiym2aUD^lC0Ez~^QJm9#Y5g<XPQ!_kNR9~;!Km4O(C8Gtu
zK9tgrs`Rfsimj+hU-K+xsjb|z>!c<FvU}wvbiAS){9tN3e!HBZ|FsTB{SR{C^clpJ
zp2;x-qKO`Oqay51)A;Rr`6joEf%csKIbyWi=(X36Nuc9yTuM$fAAYn391PAZSc}Sn
zl4fp^&A2bSyKdhtnrUnOR%r0>=zfjxa~DjzjSj8nrg>2>mgu$o>sESi#KQ>?=gW<y
z@=OL3b>tsyDlfvqHFzKow-;s8+75%+fTP1h@;N`yHde#D`c`q)y`}FvM}J%OcQ_b@
zP!rnZ=}}a`xc1euqd&<PS5H6)I{hN6Op=~Szu^erD6u2-Xik=qk}7~FiKF3vSC{vE
zZ@)rFmNd71@>c5F?6Hn)LU@t+<6K#>V$!9i;<cLLnNi`3bqP)ZZ${&TTc#T1y~I<)
zn$!N|H!weZPB}>DSk&n0o1dP7D-M(WkM3+hgW}K=qsWDX4Nea_t=gQx6cwuIqg{%h
zXRM`xRRT)8eR72a`}1Yw@(IdjYFH2l_Sm+sJ>iOPUp5gL9AD!01cNqOe@rImCfl5l
z;IsZiKCGeR3=$VtXg{26EQiCjuJ4YkuVxN=;J0IJA1U%<w{Fo^Uim?Z4?HlwLVT7$
zMLE~`JD*zv9K1J|Nk#AuzX!cTS#k!D@R%qr&{8}L;Q9AwC|@c#$*PV^VmOt4e2oY#
zCdi(CrYGN6If}+$W!Z5SI7myYtGEjn1)({cj!qUNCw{{B^>BQ!u@c+LxVOP#UKa$`
z=d>-By*E2(MxMgyzY$t{x>~i<Sl-7|QU4h$L8J5$G&eWL34fs+`Tn^_CN2fs^^;(F
zfpk+$5V1){b;E)~(&wN(JI{aVMDR}}zxm7(7;wnFTT0JV;x|D`qCs}9Wx%R5{LL!?
z>+f?t`Nx}3U(AUDj-^5wxYU~S!^vL1dtPVW={X`*^UA*msAsWUFL9uO*cZUk=7{>o
z{QD1|>trh7+Ohw+zM?sOV`$V&MDT+iKFq<uF-H70l&#v<j@Yw!w(&jVAVoN7BLnZ!
z>Q?f>MyyItvz}wsr?CjMV`G@VfB%v)2^b?)W8Lq~2BtUj+9W*7n7$G~{o`R&qb@jw
zfQWD&uoB;V>s#>rCK!l>#V<Gz#BvJWiHVmMLx4l{1%5KR^|v4TV~mk}-;h8>k@b@V
z=<}QfaG0FMJE?AK4_!za`7!@~2EDC6eEw?25NeLQH}{`iIvT%Xi_P*C@{!@7I+O7&
zdw*1DaJtd^w8G<Ezx#TQWgW2lZ}W%9UVA(*$o2G0nyM*S5V^Csx|bk6{%=9-4vb^E
z1ffuK2<#a#{{f=sA1n<AdcY`t(-GoJ$M?V9A_&r$Fg71ChX$>A&~s2n@q&#B5R^b?
zOeox(oY_*un)ua|SIuzn_MI~=Ap*1j$&ZADUUJ#t4cQu9zzGtaMdkIg0^$jQ5yo{1
z54p6e*B&OgwMo4Xr+(Ap>)G8GSNs1Z-Gzjq@biF;A+K*a)N~lG4=cveB?_uiv)-+u
z1(>F!1=B<>Jl9x^*W#HBgTPGZ=fvwU;`3phyi)Yq$DaK@BErNe5Hs-###o6Hrz=cd
zE@y<%U$YpZQBkpT9*^&UugTIMv52?o#xN#~aLwVHm6a7YXi_n_{VEkU{2h-|;SfM%
z<M-|=%<kY<-iP@_iutt^32otD14rYy2kNzNgpevF+_#|Tj|i`Ltl)WHfC=iIZ)IiW
zN&F=G;z0Pu)?WGMKSP~-Ib0!Vz1d*<12G+U?992mv;W^Ry|)Ee;4fMq!4}neR~U5+
zwMzM2?pdbsA7!vs#}3a|tXK1g1TUyJjBW*z1BowI0%OTyUO`_c-2m>TP`zxe?Pr2-
zC^ej468K{f5BC@f7<6I4n6;*mu=u#D%u}vyi!+c213S8c4_D|u!CQ{{n1jM6TY+kJ
zSI->OLnsB$|55PX9Mgjf=ti;Oc%&+HKyrP@{pI&!bb4PL>oLNgC$SYBC2!qG&c@U2
z0vuikjyLatnZc#FrLZyt@w3u5wo&_IMrwY@xA>|r4F9R*WO7hj{afJ#T=v8!V|aFS
zX>s?3Mlb(Ck$bp8BOT-SO5*gra~Gr^wWsO4$vEu6fp&T2NSk-`rd6)`N;_pSk~9wM
z<D%K#uKJ^<00l+Qc{jdIGQanu2Z0xAoEacaP*<vk^rlJAoWfiCVrZ<ezOQ-1UX4Z#
z4GNnh`u2lrN9yb+fChLA0d2B`%;h&b3v~`Q(y9NL2uYvkt%KfSHX6aGeY^#v{ZeB>
zDb-m?dN2tTkok`_V6dfz-aCYFeWi%6vmo2ddYAFTo{cw=;i}CuuhsCkp#<!A!O46T
z$jhm3jV*~kexO<GO%-v_YE|z$9eM)RKP>zmU;VRogVQLGc)mje$L+LF`GntW&t&w@
zsPZqJdVo*Kvq=<)hSSAA2YPG}Lx!L2e#~~y2MZ9iQG%A1v}|T$0U(pr&^t(<_0J2L
z{s&PxvD6Sh(lN}6Sc*NH&b?!u$;vamq`$3x>l7S58gTep$ZXFBIgWx8R@WP$zIVMT
z2jV0Cy%W>F|NB1t*`T{jCrnOo{6B;4y>Z+NY#iU~>+4})e*m(Et(;vL)jIfQYy-hM
z)wK(UOUAYWvCR;4aEr}G5Cz^$=iQ0kiA-@+(w3ku7HHg$|Guyf&{EZAwfTKpf5y~?
z6?5rx7gR0Aq~~k;Q(g%AgNy~e!Ah%dHq2+LASoY>3TT4Y$Yd<2(7ZT>YPfe-M<oq5
z%gy$?+GDMxP!%SyoG+L9j6ObodNxrIO_MmFSpHW=VeS(9cfUnKS!QT{wqS!-cr5uA
z+3<=C4(4ks(1xfNLP`Nx{v)WVrcm64lTfyJvA=Jbxm}zgO5*vS$x9F#j#_%*6eD$f
zSyg4M#l4>n`ZcmiZ4uG}#Z}ReKbf(ww1L4|)3X)I2ggY9<fkT5xD8_LML%~KqF~bH
z+ksZX6~%j-c;E<=AFj>W_-6%ZloIq9a*4}5S7il$6htG8I#YG25+QJzBR=S%fwy64
z;XHzTO8%$d10L&4xk$5$StroH208Qb{%m-xCxQqSAf87uwRw2M2SLbI?}IYIRR^HI
zOgHV+{c||GqnQ3VP*Txmn%(x0W#-r{Oy4U}>=P*6o^nv#jj(?^JQe=MZ#vWSDv5dA
zd_cYTmh|dxZ}NQ}-Mv&mwblHkYQ^k5@s=k8kJt1N(YB7=V-q_AT6ouXsD;`|^}O}n
z1rY5R9L8lhtOD{XJd3Mh=N?7D&$=F;M7pm7NB+3Pj<|5-6Q|*w5Fx_%C^iJ&pX9Cl
z5Em2x`TBrZ{^uH;@Mr?x2>#pOs+nS8^!234ZTP?8^*m0xM1Zn)`mn!2_UemC>Vll?
z`!5Z1WvkPRMz^k<fkwIGKnSrPu9s271IsC|4aZ%SwGYd&c^-m@d^13v-quQM$<+YC
z3gp=Nr2m>*!E`Xkh+d_QoeUJ!KK|>ZBH69t7~8C09s`0jAiw)vgNv_rcrDReB9z5^
zf|8|DXnq^X=r9;6r*V`woviGImslrPEqXF>f5Yu=fgCCx>yFX_i%_+_#xt=*_H2+K
zlhEsSiuD=A56j?Mr{&9|fI}hy{!Xc5!)#=ap)Usub!X+m`BKl3Y2D{VXk5tFTGXlk
z(o6o^v49Eh>*sDqd*6Y9@w;Bf#Gx7lmV_1(1e0f^4JZ{^a$S%zU#KRMw0`->VP7OM
zFZVPPX>BuBJ&W(+<lS8jgP_Cgt*8Hes1Qu;DC?FcMm7mTCGt`N&P;!_+H+yeF6g+H
zA1zRh*q|-AbcT1l=hwPPLVu07&5(^?k__1#W{YXT9Zf&k26kHqoK@)+?S4VsucH>o
z?}6uvhTeik73SIwa>nIPL~-h^Qn+Ev==~l_j{t6dJiBz&(t5UhGcZ2>ZOKDR?fBJf
z1w>@3NDJqCU2?E(4~?b`Ks>U@kOkZ)AeV(-KU^(mg4&nlbx<{pZGJm2dE4BsusHqP
zvCdRksIjt(G6Mq44IN)J|Ig+ZK=zJO$R;B+tOnuJN7HFlx9%NKbEr}Hhw;q;fIk3e
z)s7XD9ghTN<5%13`wtsKjt-7PFE9G>HY8%fT6ambnwy8M0xbL9AxYkNa|)sq#I9$G
zQWZ|YTxC~`yoU!$kW1iKE!eT10pRgS3=9h54_RhI_32R&lRkcMMA%H*Kr&*-os9ek
zVV<LRT{2+5yJWPx9fCpPYvC8URe(<v+-I976p8{#HWIXGxb84~wKG5lG~@t+!3-T8
zJ+|fXmZlo8ZNcDi`db`!m>mEaNE~-3{#Lr2tT~n{c(x&osp)yDpqRAaJ)HCq6;y66
zTm=((%|PXJ&fM%!c`oN4$_YFuyZ!k=a^ZagXy!CvbGQYp!Pg#bd!W}i*a94Pp-*(D
zHlrv|vTHG}QlQ$!e}CLD-O<x4nOegC1?1_t0?3V@Y3!*?@d#BH!x59MJYb-pc&6A7
zw3kP(<NF^Wsjgd;aaTZi?(&6&SKYl^u7JpG0Ev7OA?XHv<;&_yoB!ysoDw-NFK_U4
z>je(Ic5RvC-c<EXgJ#-1j#F4IN^tv>UENen3sAOIC;!nH_fc=H*A^V7b!7CT?pc?{
z_<n2p3SEPadhV@R9x!jD1<RA2PUI<}NAO+FEQ<uA@P#<eUvnFD-vII3-v0st<o@z~
z@7D8YX4KV9df7p5OG`~fWrmZCOQf)#H$iBfU&QkF*#!bRCnX3_eT`{=5H@s$c$haM
z{?2v7=AL0+#go%PAg_K%mqSJGd8PtPh)-`lyN{~UzEmEg9Wt`U+&iMFYwilJ4X=1^
z3wWWyD)OjFlX~RG0l7Ax*>~}bTE=A?wE3+1J=F$jMoh0ljwWwGK2cA2$KDFM!>W^I
ziqRvuNH8Ux*_aBC<;4P)BIX2~s3G}phL&ME{wIbnX-OH<pAdc^KsLz1+<;G@Qeu*5
z|D8KR6ucg_eR_OglJ|Uifq#J8NNo8dnD2VUUxvi?_Y>fE_f2GiPzbpR(pVs}DhQBA
zM$L0T4*X*{hJ)&N-FttMDc9K>`@I|x^IV>59vd4ICP}Y*xo&K=-0WHPrc$sV<JG;p
zvcIiKO?5N3Zs}(sEKW|ooZSMC7<#R0OJpLE>j|@^M%Sd$SciP+ByK!CEphKxGNc%A
z{L{<cPY+M6mv_)A{6K-v@9phnBQeq}VLR9~_dJxdqus&Fv*U7v5YAl&5>kBl!0KD~
zN80v5g9*~s3vMkX9mHm&J=jDE{U-M->wrsDgrF(45?#Ki`IR&7cUL-R#X1c+!mz!?
zo*O7N?OW+|0v^}4GwTroOXom6d)n(~Xj#ZF#)V^3a9;LwE^xcp)gfT~+{$GAeAIUx
z$A@~(3^6R4>q`1IZs4-uNzgjs3b8FCTBtzV<$%0_q=SUn_{u$m+7(ji=k9u_PfeI8
zp)roFST-A$%xSk~-B`(p#o~?wgD0N0$B}+N4;qGj!4#R1<W(Gc_GjD8-7~aD(v6A!
zKLa17Cd~U}D$VAx24|3Z?&cxL#v%Fq2`VXMc|BMX{yO<DLK+LS*>0=t>QAcYE8v#|
z0jtUyht*uAn1v*gUv%NKK7}tWUDcIy=fyrcS!1)@VM{y*N1pgNl2lUfa*~0IzaK-)
zTiQalFS%@<_;i0JHvpWb8N@rihWLa@Yr*Ng<8ScK-6wMD%sZ&q`)OJL8Qo%bw!r1Y
z>Lug}Qqq~!GsfmGShUmF1Q<(@Mx^sz(nWeJ6BSQgk&Cmz%PCG}%r&|?xN7fBN-}ml
zoba6js9fslp4$$n&CJoQkw1~72QYNjGr)xz_3Ut<La5FUB@%eFnM^2HQ09RsAsm*c
z>r~+_W4R`>S}$;>u=#BbrD*9OLQa)!ZP!2r9*J?E&AGRcwb7~~qy6E@SPL(yi3oj!
ze$BI*|7T2Pt(eRCe4x1?>S{i)e+gg6&0dt4HTk-ZWJ#e8up)tD>K2p_-yXNz3gjTS
zzQggTZ!j<-R$he2o$z59PQ1LKtu^aIzj(LF?ULHxcC~*rl3#{sar~-=ij2%yix|<e
zbNiK^8KN+;05op_>WcPgdhf89{kJqwtiPqzh3;?zQ)}&zv?c;{E})~n0lRWytnw>z
z7#I8slmG<xDnHyRuPGy+*Z~Pk=C~g6+g10&ItoWHNnbTN&Ob;xKfi~HRgJnedHUw|
zVsD-!Q~R&hJkEgN=KKo8!Nk(uYN}8pCe`CqBI(l4yuzUC8}l_$&63+*5kw`GB5(Bi
z1+Y`?O)(!!{R|GyNA=1Dtm&U-f&F$|wLSN!%)~efvoBvN17)hRb8>=Q$54?g&;4*G
z^#D8?5|YQx8{%?|NvbZY=&6l(Rj?+0U>2|2FjqPvl+!RhDCArW=@>%&8;hZZ%0;7d
zBnf^Z$qIpQKTN^{PfCFfZ&lD`=IIRD-&mP*kUKWYx-rLFikJ(C|En4mldmCD`G2uv
zZk7%;HzjTLw-=)G_{WG9n+D``9#;G*w7(D1Q!!PjgWkPOlJ0#m8CBj0ckFTcn$?th
zwZWW8k~&`zLHGJ*`|au8`@xD5A71Pk?cj6ntcYVNEZS1vpx4W}_~<o)pDRd4;MmpU
zo_Vw9?ui7(wK>_tRF@fxB%KV<L2iTS6;8xl*f4FMp!gx_jN@FGNi_w2b`RwyBrAZp
z&xmdY9v%IPMNUAJk`fM0XVzf~>?3ZagqPkhds<y9<snt*idWvuHJ3#$2WD1l&)4(?
zFGtcEYszfqVUm}k2q7rTq!eJ9lu|F-^$?0r)=`)Vt@G^0d1f0K8NKz4P%<&e!POLV
zwQOn7)axHLMJcvt-R+3sy`+GxF!AnyQ)e-)P;SF>d=I7O#!{>W{1?&-46AdBYISDw
z^$wwf^ae^+Y2XXsH876X?`KJCzx}dZk2RG`v-x9i!#$i9L!sxG-!WScDBb}+hA<<<
z#w=m3{AYyI^tV4FAIvmrzlJ9avQi9)(p``?&%_ysk&k4cVepyv{?e<UJAcQ2+t(Ry
zM%uTZ^mOaA>+wm>&xTDi=&T%Vw=wuFI)-e&265Kz+wAO2GiKhFCUa$S*B2N*F9%;D
zC~1OUDW`%l3^BI7=Ypd(UTG+T?RyOSlgMX@#J=ndjckdQM9lAZjSa`%BKX+>1t!VL
z#cZtw4T|r(3(GwMp83viJ5Ms>G?vF*=Nyd+ldD!so7Z~!e}~UnZ64;2pA>%Rk$dZ=
zYB@l{aKo+HV#S_~7@`i{{esyI?@2zN8_q)B?SD7w8UvmUF_`OL1mB2gRpa~1xVGrK
ze;HGftW>~V9xmcTv*?r9IxV-3e$c0WUvD;^ldHm&+)7mUa5-<|hg+e;#KcrhbiXh1
zQuDPPTJp?6+umJJbzl+lwXj+7?bub(DPcAvoueg?Nv2POxROTJ+-5w|kDBj_nFB=b
zFCkw80I{6gp697!8GE0{arM#x<^;`jbM)LUyE!#;6h>#$bRxLGHHt!R3En{uXUXgC
zpg_<4psF4a$UIPbbWd;1KG)50Ob2se&sR-WI|e3Irxf&no6-~4)Kta4uH|%QUm|!p
z2^ou8x%9@szM3Je)_R0|e3;|hqk8&<*)7w?GYhp$^5dr%pQP~*D(Y}ujzpE{1UGfs
zN~3+t+fPc(cB0<Y@X&*%8+=8*!_IGUMKF%p?t5o6&NZ!9bI>0s|BIrt>cJX)AeUG3
z;8bb-^)TE1J9UGL9O`vq(}<CE*mh|bTJgdkwn&sNXO-9SuRJly`@K|)G+N(aVNJqp
zPp>yzxbbr6M6A30PU~@&IdsK{W)o_H%jz6<MVs0e#cz;cZ45n~2e>R{EPsvv1Ew@o
zv+qF2>7q6xd9EZ$JZ7wH<WEWAbff3-A4awPzl+fsZ-s}hV!vDHNLqojjR}4KTRn`3
zmZN=pK*puOq^a*&Okjb&4{5Wpi^AH9du-K;x?#7+!Z&4V#p~O&U38qz)^QkQ!<h-i
z4It~`a=zT3jhU-7i^^Dx=h$(Vv&M0KW3}vx^hy?NZPa+SR9xoFm)|daTXl=e0lqE&
zGaO)({AQlgr+cLL*>gFCA;+sgl=y)?jZ<AfUnFY?IG@@dFG{=#r|5fPZeJ2q>C8sx
zteV<vtU|*vQ*eTft}zDU`Gv*GSuVTu(81*0(uwU`cL%lFS^PTNRzH4I*vVyJf8;`-
zy=nqAs38W*{_1csvbNhX`Cy2mILTL|VuTZYh|~Rbb}E`}FcfQ5nAdpSc#PWIfcht#
zT2ch9@=)#odE;sYets03cFrkxF=J{QkZpUp{&i6<M<Er5S2kDPUi+h@!MHI1D)srp
ze^-Jj-`RYS4Vtag$`r9Aud3_<Z`RDj>?*NHQGQI3%^a+DOgyy+u?+?ZL6-;HYkelA
zb6jMr1pc_8zwidHGs1!?4L2qG4uSV!eli1pRO>P!s?zbcW+U465kx}WGYE{yD54e{
zX(G<FIlfV9`D(UqBylYYYsJ)YzT56b!aUm*W@hc}bKu4m=DK~HSU4g0^2Td~TV&lB
zI(74KXz)lWTQa7PQElE3G$PfGTS$sC_CAYlP;(#E@|C{^vc5pFln?L!7&hu{nDsFG
z0WaeP9(%)&8pP!!{Mld`3FPMsdlV23lB<E~-(0p9f-O`&{Nn7U{Q0$E#*DZ~$z!P2
z<s{oJ61a+_BiCYvnnZnwJkEszSlr&*)59%%@F7A2c_A66F<M(h5Gy-+Bdx5V&Io%D
z&k9ldCUe?h*)5F-X}(_jo^R`t)*~0%;EBT?oSW*lE1$;isrk7=pW(3taVsbPxkJ%q
zGE<$I3GdU;+KSb(mhi2s!9#mfWuiwtLjKK<c<-EJ*`oseQR5V$SvyZpnV`x-KRmUu
zY8F`QSVEF!ujByYE(tElkV3Pcfpea-<K=S-OyV(IJy<C?_HP^*7I<pZr7``ITe4Pb
z9XR}bFmZE{0tJ~Wcze(r$%dK@+=d9|FtO6N6d&G~9Q{N`WJMyPrUA1(B!iTGp)ZCo
z)Er1cW%-mb!nUV7mZ48zyDsN>$m*Y3k_Wt#K$A}fbKDrM5-NJWdj5S%*XF{#nAXqS
zR`y_(k=zyk{);AFj^`biaj-&T{c51-0Y+#nkdvj!zwx60Q5F+zh=QT8l&SEY=DAA4
z;{?Kmd*-1S(yv{+n4QG9H#?V9z0u;Zs#QvVpC`t3dElY`!Z_V;Pw4deA|TxzCb}dk
zirnqWktYxqD8D~WK1s>_KHBMZZPM<*ec~yfe^<bWnu4ku?t!{R7vqX?Ol2yc`_eS9
zqSTz|JQ`VNYpt)Xd38I?5{}sO8BF|`ClN?%eaTQ>)#_X%W9t8?-oK@p<;rR~G8Eb<
z_NlFJLXM%d?JPj1e+yKnmeoM4B-;^Fl%Lx=Fvm7;YfMKo>OWE?Q-w1%N_90Y`5C7i
zjz+2qw|WBTZkLu)Nz4C?;rXYS714mPBeclhcDCGzM4lLwEAne+uBVG>y&e%xhtk%*
zMB#Y$iUb@UV5_6-0iG^#C!i{;D0OY6iOtuXawrvR*BQ^b6~9~lGA@IYPWIE6E6%^J
zbRx~`Hg63isZyqb*4f;7_800{%)#gvHV(iWlQT1q^_ff;*YD=PYLXWV+mrNkE{VkJ
zPpAY?G2u9O%{FR4qwTeaTS}l_IxN~J6<Va#P@08gjhbeZq;&V=Hh3N8!L1!H>hQfB
zV3~5jsK#+@OO9QC@P=A^3q%pW1fVFcTi~%4>tZ|)Nv8P2JsHlXLVW)=RLAX-tm5aH
z$bj;TubGfwf3ySTO*d-6&u4G>eH6upbDr{e)szr;4w*$}enttUbTg!j)E&r3AfA8V
zKX$Rcgd!R6jPF?a&NUGpD5Tx0$@)6;uP1^5dC>>AQC(d4_<3d1SDu_E*o#@GOxt#h
zecW%i8S;2NZ-rrEURR9q?vPz7t=kf&(!j9{YfENUOY&^REMelHiwS(V%%NM~8cW-G
zO6uFpgU=rxz2w;^$qzW|LtEp-9Qb|fq7f9RC?}M(iPr4iZO1>B!)Scj5At^nLOgD{
z`C9UcG^HA0+yqu6Y8^p?qvmXuyP4j>F!=GJdp%(!JNi+k4J0d|MNse90zomY@4}zg
zOCC)ue>9zMgUky`+Ik}^99GZUrNjb?2e>iruH{e)XTQn|cIZqQ8=DA{2ShN-@a<x*
z3zx)mf@p~t(041W+D{_w_&QQV!mTBUuW*?+7~(DR2ZgaAIPID*KsAuncQ82~RM-TL
zP|iNh*S9OO%eVZYL%+BjLOlHhDV4AV{*13+bUw!BD|lMz0&b?FwD(8_zD6pRmI_tL
zW*g*O4a)=w#QkvTovXYQ>^ACbebDG0j#oQIAw$W%&gf-d6^#$(s<0TQ8}D(pq@*pY
z2Pp*&DgD^WLi>0J9b%FS*2|u<OBpT8aN**I`z!wD=3{;lz`Af$a<Gjr*I?0MjCoUG
z!u{c&juzfZITcFm)|Mzzp*%Vu6CLw!v~%tI5g%(^G6YLSWkQ`q7%MDn#+h~+&)+C0
zsr-qO{`M8xvfS;j-Ubkk6B~GGlg6)l6&gnU)$v%H9gLX`kT3z4-40Vxs}V?@cM?3$
z)esg`RO3PX9<$YYHRN6CszHAHmMF`r(sgcK;j;>!A8us|BS}Jae)&S3?7&b499jHo
zxy0EH>ur)HO7`~kLZTH5;niCr3lUS!Mbzdr^D`u#=W$Ffjnbn;?P$jB#pKQ7s;ET#
z_d)G*+|&@?*93(nLLL_9JEIRwV(Y+Z*kh1f9WtD$!`yvG$doqqrq9JYlLU^-6Meb0
zqKz8Ca!5F&5<@tN>l3`bwbidxj6KtP!Tuf=Ma}@Tm|4qMP5haSyJA&i>P@GSZ+VTz
z{n){UE68Jiw_PB&N`@9nYCS7e*sKt%Wskz^+A*<ZasBO~!o?UzppH?(xW7_qe3;KD
zb^KI^b}SAf>;36wea&bHS_F`Z&<<3_B+Z|o1n}6`;DwZ~lg*|8Qd#z}3umSvk$(es
z2)IP$dl&8{Km_2E9M9xmEVknjU+Y_69c82t4X6>OvRlouF>0YhlZ1JS2y2N?UztPh
zrY`lr-2WksAn@0yaMN}g=DVsj8ha(XKiqn+dOt2FU}X_J{4yyvVw7`es|uu)7J$n_
zlSp7_?IdUp*K%f_ol5;v+ZO+M+vF{GxU3%OLv8rbO+6S9eD>aBW5N>*_jESQ^ih{T
z1ECGoXV05!yk&of3xH;9GtrCqHjO9lAe1#!;Ps0O6V_J)Yk@^eIh8|6t&R&1e;U3#
z`(79cfk19BfhAWAghCWQcPv-eU4R=h0QgMLriIUF7Jj$*9p+{<TZ^5IZYBJQJ|atV
z$f3>Lma$@K;n%g(v%LBJXT%y?>ayg3pX({O%|N<<=$nNhn}$u7lRBH_h;@VQ?tr*X
zY}7b^LGoMD;nKABBeAL<<*1b!w&YxLzQGhj1BNO`%yw^cC^QB+KG%3k<*>Qy=iY3~
zxp7ubC-IbFM-(UV5G*uVKNd4};Be#$@P$7`cp3^cuVn_DJrCVZp7Z~jK@$CET}doP
zh%qYssifRv$nRSS+ASkZ@XNOeM~YTRQZ~h67b8bqC%tN{ZUj|%6Y;x3>JA3`+H=Jv
z9S%O-eZmNzxHI?VRKiP|J+?R*j7B5vf3d$NQK6GF|0^C<IwbQvXx2(-9#0vE=a6cB
zdlVXsvw&=kygYV^j$mqeO^QLuUM4X4j^q2KeJWl24m2{H)b7MArf~Euh;#O&H<cqS
zZ<dAFxuD>NUtV#|i|bz;@Api5$XHjZ5V4xdD}VX&<xD%ubdbqf3UfkyLIAEzg7s2i
z0?LfYOH3G_29E81zm4CL9q4UM%(^aIFVbuz{}|zCrVBJ5Is|bXV!V07{Q!9V6To0!
zNSLC16sa{1G8xxo{Bxx<XaSruEEHs9Y>(j4n%(uyNXGb3zA(s7&PIf2HgF$i15&>d
zk#rTdd{>I0X#x=@5W$opVSRm_NTKjK;>9pmZROq_^Qf$4wY&dzA?O9oNbRmIU}H`j
zr+{awb1genzOl4?DzcZ&dCqiOZKkY7<J5)rv%l-MtV>;yau^<)ED@huCF`Lv@;$?&
z&oYc_AtEMwO9n0Q?SGU|yLF23c|>lTy!_vYe4_p!y@@U+TwD<526?sPc`zUNm&#)a
z%)wfp8!tIjls3<*dNm$<lvY=Fx5<eZpVyAqYL7G}K{L^xfdSSw4w+PrJ7P=;LS7eJ
z!+5HG07e2k!epFp=`Do-OfGK?m@0dQf!*^qElmxjw{!NEn)`dBnLQWB@Z*Wo-#7$2
zj$ti|s>2I<&@5B@q1NA?tvp`C*X+U?8cpRXZS#6@9tGMYk0&3Qds-`D64dEowc5e6
zqa^AVAFO`3eq%AlXlPns*_I<ks1l*n_}8seI)Qmsj`u`VsaZU|?h8F1A7Atlu4U<d
zJ?MzPZHVXbEL5JU_6#PnN%@;74O!JJPOX86$$a4{1@Ow@JVqNlTjk2Ya1+S2X8AEn
zc+8c4Q4jG-i^`;mnf{fMf*uDOYR5!C@1Dm{Jd30u5ffv_?vh>qAM=AyvfxBv;k(Z<
zt^q9l*17TV*X0$yo3z>jwC@Z41_Vm$NX10V*KRczqF6Q8er9o3MoVe$W;EAg%i1?;
z6j(ww?iJfS+~8t~ICATbVTy`Lpe>K5J<=%3aazSJC-D0*Z~d@#PvzWA_l-TUsB3(0
zi#|A`*7{@H@VqDa#jl)e82soM2}0yOd?IqHYi|n!Wz_Z*7Y~@(0ImazF<V{_hJ<6L
zSpl1{Im}r27mGnV&T9G>wFHJ7tqZ3fFE`8}8GMwG7OEFPBhr0-E^W2~h%l+aZ7a~B
z7ZS~~wKf<kgpr?*y=};_FV$47(n(k_+Le~%<u)4Qh;nw-A)U3ZYH;HD#Vmh;OElI{
zsRh>S&Li-4@<jTyiRFMdFN&sg6+(4)rd-^bLk0Jd<1;-ST8X%;NGC6E<nXeTlkXrT
z&9j?fP@N>13t316DOEyNwLtaYL#2mY{rgg(G=6l|B2BE|IR3-KxvD*D0tSV|NlD@|
za7dBFFuJ5f=}rtjVd~!BRJ)#SUE=5;`zg|x{%~$XRz|KP94LDyl+0r}Q!3{2D~J1j
z3pWwfsahbjj+5u5t{*M@S+#TK;}l>i=~Oi@7(Z&;0Uf>ufQ^EE!0AVAJG7>-i#gCz
z4$oHcG|1Z0-+_qf;#WwA46VAdVavso)->@GJ%T8<(D&2_?tkfa5#r(C7IHn5KpzJ5
zQkxA^a<Zg9cyIp&#C<q$wOqz@=a75_DWKZg^~3k4VFfTr%eYd4IF^_RijusgjusoP
zAmW>I9)~yr<oF^N0t6?{;+|()ch3ry`SaZUh-8PIy{s`?TV=U4)L+tgOMTj}A!*sf
zg=~w!CVNTL7#Mc+o4{n1IyWmzbbqPo4${bFu3w?YOTYAm)n=yj*Tl1+B6w`~kjQ4C
zTG7j*wpYN$=^jQfsB~c%UCuy+kT*pv*6}1wdCOT)=|x;y)}k?Bbu%aO*)D)IsY9#9
z%afK}zT8%Zo_gHc{FS(aU3AG@85BYf!9kmj^(Bfus>`GLw)r&;wZFU1@?2HS@d6Hm
z?mfOmD2bTJjtjI+zDfS2`W}*F;h5hcg-XxtUe2yNopFUpHP4A;#MYnI3$Cab$9i$~
zm-^x*`+Kz#CA8dEl-$jhsFr^8W(_L?;)(5H1@3WlS5NyX@W~Ssmpv@=vw?J_=f&r`
zFS1DeLVnMzYRIR(xS+b7fIy3;9q7B>?i1pFq^0!pL6Ahx!clNLD%~%qrL|0YI@(Vw
zoRu~qA7^1{@!+5Rz`=2qde;;+C^N*jO^LOZCOY>?rhc`!f%_bXjv&?z-6#)<P9x7i
z)q82)kj4H(PYkb?L5<8`uD{n(kUb4xY|!)y%$mWm_4r=ncz4I`Y;{t#+_<_T)e`71
zkkC`CW<IwTO%?dD?-)YCr5U+%*PJEF6;R9aaQ-*Dr&@7yAhE|LATUwJ^_3ZZ`T!bP
zwo|X@9MVFOhMZU7tBF*`xd@?n`~|7(*gUcA{_I(-w--B4u#YjYNLV)vs}~LI#Rman
z!#tjfW>N7O_mq-!>Uc$T1<)6y!;+m1uC=r>6&Px-vWI6kO`A_$Cyk7`)>e6;Yb?4X
z?D{5aU4FVPCDGBE8X0`VsAVf)_PW1rT8duXV}oF^zp436>xAAdEK$mA`n_^Ue(p;r
zTu8VJ`FDey1rDC9d9*81ohk6jN^E_aNV8(#>ot_VnWGR@eZ$RomiS}`e)0y(M=%Z{
z)-2&$`-}D5BdtnPyBKzzMe)bw)=zx2^X=TOuKq7yCmQ%EhS3z)u6>?G<lq5sY5wM}
ztb!-6EA%(M(7S?RelNDM3O-s{B2L>C6?s}KVElK_V~dDh9b^-8Y>+&2p4T^9da&`n
zo7XHd5ZlG+Cl0+U5K@>zm+THKHbR=>Qa@?LFRWKbMXV*2KO|~u7~qwIA$h-Yz0sAG
zhSu9|E<wD8@M!?ogSX0X>t|hDeF$mm%SazC7#E3wxJhm6Vf)oU?KXIi7LTj|t)j%z
z-IHB^e~*CRq+sUkP)HGFniee$nq>_Y|2acj=+GD{U%nrVR@o3I%CuhHHIXtQ3;8&(
zQrpi(-DeC?>5=B7y!;)}$t}6(CaYI6$`sA~(H|c5QYsBErX<I;Z#E=JGuE=}wl879
zzNlfZ$~fVH??wJ{WqI^`4Ocj+;n1I}_LkV24r*h(y1F+U^4^x*sm}0g()Tf13Z376
zajH_)U&C*P?o(h=LT(=mF=dD{K0uT|9r?{(7Tn49Q{C+_qcFkW>L}$U(a~?6`?jxJ
z6t@M7Te&_i9Pz01MxA2oKxkNRKg>HA3{i+(%<gw7VQDS!h;5&n9qx{7;Z!qK&zwkl
z>3h2?-Xk4w>0K@Mo}l>E&{W4ueMhR;%!JJTOmgrL)^zKBYd0I%O!*){wCnX{8e8vn
z5CereOYkyDGZ?D|RWD!pTZBqZTeC-{1~QH)?krS6=~?I;Y<~oO7gHb2u#EiWsz-)G
zDuuNo3hxC|GyyrzL=9nI=1Y_E^`{9UmpZRb4c%l5l!L*SkZqYPJr(7QFEK96@`8)7
zgil)QnAz}CV|G3hs4`<Bb1oc1eI6K34c+1LUS_&NxIVlBf)-z}+Us$>N}W}M6mB&(
zL)6wkWS-tfvg^0Q;f~)sInmK6VzSFuR(6Pf4ga(gQX3SvwY6okQWs%;YcWb~$rza%
znQWI`db8Izav+nyoJ~&7tkM<i7TintK88|{@sint9*@do1Mh2CaM{MM*ZAyNt{KTE
z&aUsjJKgM844adtO5v@&8Jx%|xO=O&)n0YI1#Ag94?v6Ema6BN{=Mm1egT;$YYwe7
z7vb^Z__+#P{tF0u3E#tpn8M&PQlMg3Mjw$8A}-_HHTY@Drz>3BGkc?d!ug3q!QL*f
z{>-70&P~$)TubG`RzVKlr6hs6f&^J(H~xa%3wqTlD$$Azp7Sog+GevImH$;*%9!Kz
zaBGZ>eS6v!_+sJP%nJQpbneZC@y9);I@j^Da3+X>VP8+OL}}MxlDAdYs%P#-jHl5L
z)GK!PfkW)DaUwO<t$R5w2T7CeX)?)^w;y|TRGC;FX4l@_Ie!oC`qE{Y@trZx6^s8c
zTwFS?j*r|}px#!!8|{@fQ6Sy-T1GQX8hTQ*OP~Gk$z9GGoV*WWNsFpdT4_$pCJep1
z{5RL`bFAL`IJ4g`yQ%Fq`C)`(?XEj-9fo_k1l+a68{ga&D_Rnf9*(tsSemzQtSdH-
z3Z?)5=z0sNDz~WLTeh1<>6Gs75D7s-Qo6gPJEa9_k?s~GHr?G_0!o8~pmd{@d~18o
zx!?Paaqk&>IP@Hk?q@w~%{AjcfAeR;Md^kFn?#~b)Vqb0*+}`lJjTI*#B-;Emppgd
zsK`lim3IW-$sEF5_)2QHA97?<h}=0}%`-6*7L@m?1#QXCnh7wLXWywa-B55}BNgU-
z4{!*snfm$6apu%4@wqBq;Ry{*BGROivWLy+&J1!gp_6rSk-bQXopl^c-RBmmaE2#`
zc`LCXKm0{U+3~ky4ZhA#ddo&;IaeRmPd*yA?nhvc>~1_sYE4v1oGXM)U#lgm7hK2e
z25Wp1`u*zp^~qk>R7GZR@P435hm3+ySrt=lRkzTRP?gyGP<&7PKJ8&i)>wr;0oZO#
zkF<V%H{=q9wY#=&+^*^hgd8yD0e|E?Ul*GQ2Tl5>r%@8#J5FJmS0JgBWIV3dy<abV
zb!$NWy>uFg1RoL|5fXo8uuiS2dm`7%9a<qU_mpKR!Odv#@o4H8&^jGyl~-I`^iVSs
z68s#-`5_J4x%lzcZ!DFkn||V2ADi)wOzTLWD=n5OWXlk3as=lzV4KRaXIHk%5z1Lx
z%DasbEOyp$`b2|364K2;=}@;Ool6{C?9lB;=8NFQ*V997^u0@=ML~O;7-_Ks2qV*Z
zfzBG!f%KJ_EDeu{;*U8E!uqgmm-o8L>F+ilrWgdo&f}<#tnhN)rehQ03Sgxq`DvHq
zCoCclEK#iM%5tbpj2#uwt~V{TMKoJbJi}hND#YpkV>2l$DB}Do$dn<^L;qT}2~A$o
zE+zgj!KH<U_#2nW6gTT#Z6fndBa_?&iwn%iPHKc)&n{x-w+3TB$E?HSi-A4~={IRh
zWYrCU@jr89%7WXT@111`8oC)Qmaln!yh+@59RL2ylf1S3V-e###%O)FLGCM5g7<f?
zd8Qn*T~ORRi7&MRaqqcgWMTR`R|K&|I-n85dr8Axxlj1<UJ|0yH;wHI<XHv|38tIJ
z`Ysot@)Z!3c(LK*?3JRn{Zdw|9|Oz9!hr!q899F>uMBgLH5nNOSBu*+a~Y2$y1CtM
zJ@4<IujgUQ#h`g?N!-RV-W<{#c1wTzxQrZ?jF~sqtgf*w?DaNYl}h>wdg_;3w6n+M
zt$!%-d^tlTIT6PC7JpnIzmBQ=9%+lh>4x9r+{vPt-+wGMBQe?gQ{uz>tWr|7J;=>h
z<>N2mit5tAJG6Wvx{Wx5k14J(M@zXsL~vHUD1oFjXo#rHi{+`<n7qZ@YIch7fU-}?
znayXqP6)nySYEZz;%@tFAi!ewqmAB}gGoeuAQ9UScx6g4w<>Z_GS{psUGQB4-Ws-J
zLu!rhANER^@xF{6)i-DRbbpEp<Jc(oN7vKz1Y)cD1spT`)t!qS02_nR#8t5hh<n5|
z#O&OcJx9E#W^-!qh*M=CDss8|RAhw*R<*rvf!$o6QcC1jKsBZ>faZP&qam@VA*qF$
z*EpNyh~t2;GKb;-#aYj3MGGz^cPm0c^*A%%UIr~Y%>+TOmL}5q_8#b4kadslSVt@c
z&-3GBV|Ba&EH(svU07TSpVAp}tB7WK>~h;r;)jiv4Lzq^R&tS19kCHnPok!cRBfyJ
z`!HB*SSb1z_m3v}!t|;eKDeIn&G=A{Tn0NYK7Hh|Wu~3t`_T|xO9v5{Em|4prNe}p
zO0^k_sND0MfqApsOcjhI9+8frw90Xc=sV{PjRA5YGzxbL)jeD-hbWR9>-0Cb>dEQb
z!ybRWmc%3yEe)A5)V%R_$$Eaan3%d-W&X@myJ*FuuP9NS!;^jN+BtLB|19Kf=k>T8
zmsReN7h!A``}ExJXuT)NbSYg)^Sv(m>ab|vZJw>UXqyq&r~8PZa;NjyBYZgFh7)Dz
z9rv&NcX79dm-;oxS=a5V<rM8k6M2;LeFZH;R6lt~)<7&OL#7KvcGGW<P)9!55NF#_
z9T{*ns1Ala9wBBzu89tFp&?He-J_~x)R>WCQhTj-7}1#0c<p^{W-&3pmlQzX$NWvA
zx(;BrsREE0pBskxR?2$+(d}y|ekzP@#iH@rh-pjou7vyOBqOoa6gRQt!v$r$#|xoV
zB40GM_@q{6{i4jcTjM(_{!0AsDcs4l>?DxisYVc%M4Fn?VcpP2q8(PI5G8t6J7x)I
zmBXdTF%39mARX4~dE`JlXQp6@S=WP<05QYvtdvZeOJHjgQGXZuTPl32kqj-}2#cm#
zVfZZ(`%ZQuo%fwVc89+SOlgL~j5W+oIYZ(JYm~s;&rx32{mF(XI$@aEB~?)ZOJlfv
z-Yt4~)N7uDuiHDlsXV%*%a~jU%vLX!5n!u$K`sx&Yqh*CC<Qd0@WhEFVjGX8G446>
z3v8fJI>_@|&ePGL+#LdG@_k4Wi5KXyB!|nV3EF{ppq^eSTVFm=2EeyNoeG>!fyV5c
zS2rSIVutKbYk!t{zQmdj8K2I{R?_?Du@GO>lGf#D>n>$asXloNrFblKo}KA>zhSZy
zr8YC;Q*u~&EOPt87kVe|Dj08PQJa1**Ld_&^eRx#p_*{u@MRlH*tttuu&-11ab4%8
z+4<#RLhYz1e-w%cea_-h4C9#z-v=y;GjZqYyGyE>qz~h%q3BKKt!x9sVN`GYBgQrV
z^ctpK6fF4jjCM2MZI3%neC1O%Uq0Mz%aj;PL9UAXx^>E)n0#+jQWv<9yWiX7gMdd?
zDdv49Z7E-Pm8>=*U7f2k*}N!vRlRxtHuy!MtLY1Gx~K|_o6hH&+3yM`ep^P!owf0d
zkjEg(u^1R`l}sj5#Lc43@;u6IlU?|Ie~Q0%CZ2qAZtW6Xvn5(uCh~)o<EetQsLp7z
zm8d4cTJB2k98>+qMj{C!zeR%fgCvX3`(o}L2+lT^5&16@#taCd`b>5cEs|wuDX-H~
zIXB~akZ>d3QwQE*H_-^~Muk2yW!na2F`jWLTnwcwQNL%FU?5tG@aYgu6aabEuW%+H
zN5BJ~UGOUlGgVc-0#}<wpz@qCX?D|JzLQG_uAoKQ%602O%oBkL>9(9P&~$w)%D03H
zW{6vX@$J1iFP>sC{;KiSG-kHy$@{@*rj6A#mRTMysCExp25kZ{3tM|$yhf9SU`uxC
zMG@n7YiJqJ)y}u44_72(53w2YqYuVBvAPb3`gHY>>CMw5MF;ZpWHM%)6=6+1U)g3n
z?G`*5q~S@v4Yj<G;VhB+mUE<HasNRlIZf^*a$HyJ%dJiN$eLf1&(&@z7Cf8h?L)(P
zHa`^7Run6%m}YgP+;I!nJ1aOV)bxgek!Yroljm7(J&s(g1cn)_`~wcH&ndD~Gr#Uw
z`@DG6w^O}jb)N2`zp<Zcn2U0QjN+-rQMI&h<)bjxtRr&ihW;a>>V0)1f6L?NQ40O!
zn}%2BQ~u{4zm2&ZH++l!{myKRxWI#`@a_k0Q=e;1_^Gr`3Cz`H(aq&WrVG=bH)AfI
z-8zV$bv>`XoK5bTektDRUVNVe8xr|(+R^)baEbO(p~^h+qtE5hw}95VPayYnw2Q|d
zAlITjzdv7Hzzwr3&91LviZOY}60R*+$!<EFr#s)p_O;o@M@~+TeW3a!?Yl>K>@UZR
z+ofgxV*xPRO^Ezj-UI|FYXEZ6w-GE){^0;eJ=wf|S<Mhh3~f(Ac5Jq4(~T9_Dw(rs
ztk?ytH3<U!+Xg}B4N2DJenCdD!c0&rneFZ?tf_7jn0vj&RePkkWKfT*$um7MTd`;@
zU17XsGL~i>XDMQd^3)HN)0OM&gV+Z!SRR-5GmQQjYL{C(aKUabNFEauB%tXj!sj8=
zVpT&imeZM}WFJ#FtU2u85Yb65Kc0}TN(yp9<kL&t=NP_g)>ODwt9kCJE$8lgHhgqH
z9#Ip^9Dg`Try0?%8&RLL2^)L*Cb|ic<Y2#zVF$fJrouX^UXh8Nop?21WLP3we~<M@
zNlOc>Q6)BL2B~kkxkz8MRRq^M{HxjME*F=cc8VU_MCKK}D3?cediar-sN(0I4?nQ*
zIPbok*6E!)T1K>+$MBEm+yw2vRJs|*Kn77{&ayUv!{L)ee<$17&6KQ>q1T%yKT!&a
z7nj#cl#-_JJEvy<h&5{A@rQqPQP-?zuSI(@j2Ws}3d^t3souEntyu9`vQIG4NhnLU
zs*}pjs%$%zn-f^|+ROw9JI_|pNx;#YS&J;s7ZrK_oMQEJ^h%z1cOq>hJ)_{ra^~=b
zGI>@|{K#jy!Hm;$?&h+G$)Y!g{#RSeKw8@959lpq*<3(YS^?csn1PeE(sdia8J<gD
z@tN_^Vyy%!dB4|2?50Ky-gg&sS{X@iufWjq#7;;rHWI3KDsWE-4oM>k{y;esrgo5X
zjvdML#2~W_Yqn#=dyOA(;sd{;7(Z@sz_E{3T`f<Oi9UW@YhrdLQ*zF~^nk9j>b#c`
zW^+o2S!*%gng1dSN|%QT6I`CK{1y5jkzn%LrG3Gc(4bVkhK^o=g`J7`pd>V^>=}D%
zdZ&`g>1h9+@)-w7vh#z3y^rL3jeeaI-=cVR?rJgx$!e*%R0JohI5XPO-JVwxWuHly
z=3Y-_`1AC3r}*>qSQMu6NT2Em9?V-?EM%x9anlthSzM&p6*#K1SNAifRe3j2+*diC
zXf;o1zSkb}TC~(cd%|eKjY;}cc^gO3(d1#jEY)&$v-()LFzBU_0+@oE$bkEsZ|$mT
zYUo%JMd$az^yrI{CTEAOM-57Ha={wbSBQq>?}g7lOGE$xzG(q6xtB2us~}!=hc_tO
z?cM+Cycgl1qRK12#YZ9iS~eF4D68D~BL!bxWj#w8p|3XhuIo55lfp4rvHQViAeN$&
zXe3G=ORor8Q<=c4V+qg`?w~80um{aQtBHwSv5&&BLZy;#k@}r7)qHa8yVIhBibbZ2
zeEX=1*25O6a3;L7Mcc~q9><a|`r6khKS+98c4_gH3X@&D?j!a<I(7>{9#=5u20KZH
z$%g7gqXltg+E~Ff>blGmETY=5kia;Xfhh(_8A-E3TC6`%B3Js`n-6T12q1RH;Z*8E
zvX6WvP>?y>%$G3j^GZ1TQKPP)sdymEbZbC@7Qss-_A1MM273JvdrX-Bqg?wAx<8|W
ze&O-3j~^!k_tIm16h;3O-pe&;$`bl!)uN;2!LH^s?Y?@-tsa%Sb?Rr$Dqr61kiTLR
z`k?1{!#4dC8efe+^3Ifg63qpwhQw&8+nveP(WKnGyAv^zY&<*OLW=qmkN*i>n2iYy
z+LNzzB&S&__?Yht6P3I-A{h8B&CT&B{S`W^%?1g?<J?G3oVaxx?7SZwHhlU9P!R1S
zMtO`?LxN{xJd<O+_3g%mQ-~Bdwa$t@pfBd~{<s%&Chx)PyiRF6ZU7!R*abKNBapBR
z!z6o_2**X1djX9|qmNp4hp(WAMp)+2x6S~9?>8i9C8DpbZjc_4kV=sb$SJ4xi7%X&
z#bxk2R>ZYL6EKQjf+F)9GL>J$K<;ey>HA61!QtWp@bc`M&~Hv>^&Afj0LJeLXbi~-
z{r1-28sFLNv4m`cei8@x=MZSdlR&fmS4d>KMb(J2GOWbAnJ!3%L2OIm<1Z)ky2<Ww
zXnH6ruRb`>x8e^dOfRXo72H$R=DH|9bFeGiQh~f=&}*u|6zrDW@Z<?gp>pxAuJW|<
zAqU>oko9OkX;Zrm8ZS-uuV_Zac$1M@raAB8KOrPOCGS_6N;V6<OM9Ex_eE~!TaV^z
z%~)AM$JTi3ld9;!0}l)F*x?0qgRR<A!6x=Mf>iW>vN$R<uY1r+jr}R;*x6Sv$>~))
z?JSxvYOWR?ySK51xvb9pdd<F}-o6PwZ96Kgg4&M}2h4nCvTI{D)K3@JsqzCX$dV6y
zJ3Z(e!e}h9(~cs51j7=uSH$uA2pmfXMm>5q?W$aER+as#2g9mtyvFWkzlGn?O+*I#
zIm-!v8MN}3yZ)G1aPsreWt+=5{bF0Mr{yjq+Gj3U4O%aEl+Eb?JEe*{5ubrhQSY7q
zw+!2Ji=h+Hzs%pQ`rS!aS69%M)^YK$B1<uYU!)!s7%BR=k^IyPfq+N~-cmL-xKyH5
zxw->dfD~;&3#OB{EaKe_x!DBKooXrpGemtNBvN?q0-UNXh0piiZ>TxX;>Dg~E1OMp
z9kH5Cgs2UwXu?kxB?1TP(-ywkg^iej3^`}nlV8<b&U@$1a(z9fC&%Xv%Id7|u#)S4
z8a`#+H29g{g2I?FZuo>Ni$jN(_0B|)y@4=xU*xK<vV?eNvYWe@Fh1V-kfh0#<tTW}
zA!pgoWg_9oO?@M;$49+-!Is)F&1FD=Ow}ltu+}Bq0&7v{lZWp>q*U(#%ha2LZVQ*m
z;e{L|UFq7b4C^S6n|e$-c4{h$Lm6~TN14p8oEqNB)p+>%i{?m}zjhC)O6<6ulr?3U
zilO5O5A&b~nav*(J{vqZI~Ip!;(TMA@4BbnVW#d!n{{iNM<m@C=%_k$*<gT0l+vtD
zQE5<pN{YIn#ZrV~ScOaBK4{=$MF}qpf8!Sa=iy-G-n5s@KTZN2lru^smy0|&f=ijp
ziqp1yT2Wx-3!D>(9=t&AxQmtyTb<>x7)cxh@YkJX5QyJxE8cj;{DN`@tx+FU@^^6C
zy0spBR_Fba4JIe&sBP}J3)NXKd(u|@+>Q3RN!q8u@f<v{^C}FlDe+E--bRk95r{-o
z^pZVFtE;$X(<UWy9jFyGbHMqG+cp1-%#S2imQo+J%l0LHlk46cIJM)RMNiS@8cg<m
zUN()qYL<`)Oy<d3pU@SdG3HH(OXkVuGIzjnk_+Ur%FJI!#X+%!pmbO#1ZJ*(4mTd&
zwHI;8f~|Mqh_;zwoi|Wh=52$yTOoFByv`earN_tt6N3~1Z4E#OZgD8V+Wi>>^~oRR
zUc5L@O>bX%vGJRBgp4KWg$0;_kb{Pr5M^+o$LoxeaG0O^*TXu27AfTz5}Nf3h7;ns
zbJbIKxGsprqK5RNw+@zXAjU~|{q!WMiG37@lyX3YFxE5jVk|8mAdQRSkx)N~9ixJx
zgyZH!+tqdkpXb415|(d?j#(}$9v<F2XBp=~$IYQ_NMNa-bS(Kwmqr$YQg(Fn_jvim
z7ueHJyw{!ct&dnL+iLiN$KOeo)cM_9qJd5*S*bOH(L2$D$Z-==Yc;v(fsBd|XQZef
z0+FG#+ZdY@UJs&mWV<lIFQQ322Qd4qQE-l*ASkTq;L7z!_hqVVPW&!cVGW12(RQU^
zade;5<l=B%XFY3EN7kU$lm<b@1IxyK<<EF#4SYvsD7#wxbk9(H0i@{y42Da?BP)-o
z(0p;Cxd;0H92<b1nfZ{_)0nCdgX~#d7PJqLgZCz#zBTAKD9`tGS9(9Wjm3N>%oy&m
z`S$ZNDEeevQc>_y#RO7P`+nZFl7cVs|NK0FxY-Wmh(E>3b)hHqiQs<6hA?IdBW>tW
zbHWLK0O{z<6!g1xvx8C^<6@w6Vg3@;*}}u)u*axLq=RGk(g4Uv_MK3zQ9^vY93NMW
z6fD!73?NNKK?~x!_*<l{L<6jkU`h<7hE)d%O`~N$iu+_JREE+Repws8*5O2bN~c_=
zS?-yY{T&r2+!O+w{PlM1{yWF|q9=am4e8N|VbEy*Pu)3?5r4`c6uM)O)9(v7=_kDH
zdXjfHYAMAZbbIR?d$<D1(sWT}u*^(SwK>qDCF}u2;ycQeg5SgV!XRbuBb`t`FtgX|
z$E`Nl>umrye@-<ZhTt_%jBP}~RJTFzzuNX#S^_(l)>z^ZYb=q5j82a%8w)%8#PsHS
z&`vr>BJ6dH;1`_+;0zm4>@7dn0GZBkROd}rA-dYm5~xNEJlTp9g*Ry`-X}+oD9;G*
z*j|w2G<ft^KQ}AMJc(f9p#STb{NtX0ACFTXuvrO951_vg>v~O3v%(RYOQUUJj`MoC
z$(dX~gHB95!E2mv4o}jOWq}!|iXFjmV;lp_Sh{=n=*hIJtLrAV*7vs5)YR$NagjHd
z!Wb-(7uU0*KqTe94k;J!kHlX`C>wf*d5Av{O?>v_`Ja>6yT!K7+f(`7P9S!-1d>7>
z;MF(s>9u(|Tbi1ha$S0^f?OW*mp0H+>L$YIF$vm0FPSnqg3!z5oO0@a^b;B_qe;C>
zSB&#|+PLf@&tk`t4R3;aOd*<?n3R;{wO8JF=yg8z(i61&J@us(U@6m$7&-;OGJ%n?
zG2w?6l0kifI{=_C3!;Wb1RiPhufCQIK!B!i@Vm<)T9m2F2Z?=?8Epogy=kpDZBI(4
z9>^9XJ-zq7l(-fFrl~(_AoIvaFeOh@qLYoU^cRo;Jr5E68R!~7l%+NE@rVMT&9*yD
z#J}-|^n5Y|(v`ClJz2=0uQ1Ilic*t8**U->)nWBh5(q73VEnRuqQOF8?+wBjGvxET
zNt<-L`U@GO0N^G)0=(4kr=BJ4tIuzu1ve?)vxY*)aE;rD&tRzu&|4|77y$`%k^pQA
z!MGQHf1WP0Gj;4RXyiD#zVcX(VgVX$*`+FoyWlr5ix?M3;#vguT3coCP?knqAP3ZV
zyhgVo%;n=zUj(k!v#324RaMg`qP|}8)wh9kLyiE8$fD7;A@OEA%X4*sToZpVH&>)Q
zX$xqYoqvB0c;(~cvkqF!2(6RY;*Y@MIwBNGp&8mFi><pzPxrNKwCq*bMsa7f-~YLJ
zLZ_GvniuQdO@DlF%C0=RP-m?+4X22|)L^^|cqMfrt;v|EcLx$)C9jv_LGZpMuj6!K
zi7N<<%B){^Fk<IyrO~&<Y2wHb0%bXkG@vJ}0lk^Y%&#O*sL-H{HM$$U156s4R(f@{
zBS6_alLg)bm0m+<A`bw>hI247hT8*AL$==cpaJ}epNtWbDD{_{i6ul;F6%2)3I>R^
zG1-EHONlI&z^0zv{d05V80m^^<h_;TiiP>(7d-Bm{dC1<!3XTgvZE|Zn1|7>N<fN}
z`}vbZ4Tpe~#XY~(Mp>-vQ`Z2ye(<Vzv<UvEbpt<N!v-lr;hz36Dr(?$hIiF2{-g{{
ziyoeuQf-Kb!eqNrQh5$_X_NVY*EIeHIN~1GXD>>e@sjCu;K0c%;BN1K&R%NubVS=o
zQBUQyo<_27*`G}1qIrlGt>D{agt4WV7QpR0#&HX3l(Xw^{V*AgJn|=IHl96l7%LTv
zP<z!)^W!)upsd4A$s#mVISGzo+y%9Hf)@ztACno-Q0n%S81YPv*QdS=#mkJBITRX%
zkE-HI(?pT*+i90N#k(pSvQ$woJlt|ez>|CQ^>(7cN-)Lj00hR<TrB)x0wS8!1Re*V
z%5&gJ5wU9HgtnLY<o%TG*eS~Jkj?n&Yq|oBDjq*rYCJ+5WpGw%R88dH^}b^3j$}qp
z)~ScbaR!Q>^+`UIqvT5dDHS7U=b4Jj=}B|rjEK&3EqF@u7XHBL(*h=Y0G}+ICa4Q5
zw_m}yspiWugIHAB@lmggP0-ZKBN-ctB^+Y7wa#3pAod&8)DC=LHldx1%LqY*hCje#
zM7+jav(W%mwy{uZdRdWKhjr#Lac}A$>z8HN`g!&<%$8sF`mPZO7+&W6ET*;wO~rH@
zIFCjcdQ<6k-FIizbn-Z#5+1gAyvFCT(W&v<E&0^2Zf5)_!YW@;U@^r*Jgru~EOlx-
zq2YCGd0nii0-mSCggFi(?(#c)F{=1A*4T7*zg&&6;+-Bk8k&L!hX)?E_&mGb+HAv)
zCBZ)X4tv%{tt^vgXKuFJf;@)e|NEj`bA(7!wBH#ZkJ2C*p%UG_V!Mg!n;SUw!9n3h
z&;y<5Xq+s|1x*;@$#Gxf#3XUM1`ii%kXqw3O4XSHH~QA!iimzEEIk}K=av;Fgl11p
ze<~XUM`HY2wb{!OZb?@5wOPBtZ})$Gc{sh*P+K-Dxe{I>8IBA*Poj*P(dOK!C<oW6
zchzah_ll1341rIe^ZUH2)mKv()6@C5V;igGQ)cl~OtP4{-}0k4p>oYH_>ebR9#@Bu
z`pc)Y7J0^8FP`^r^E`V{i@)Ue(7eN8n>MJJmXgbg$djJAH9O`p6+(ny!9W&j9uc+?
zpZ=&J68c69_P5U{Ho1R27)j;Y`9w}Su|`XiMQ0Uy{v;liS%7#rMxoRUTs8`I=-fwC
zqMell88TXklY3#)O>reY;>ZWdKDLN<(21O3J5zu2`@x50(9dJXr?{-&T}pivBe44V
z_q03*pR|68!LJq1Z-%}<Ib5R(x512|Q8YT0_@Uq$gMlFNwzPzB*@TVK{aTCudh3}1
zPRpn=+lwC`SsB}w-&f|1Ih8(`N!dAU0+A*nj@p1u$Hs=~7O<@+Rd&p-bK9(yWo~}a
zl=g;x_pc|tTaG4cGUpD6T%{u{#)yBkdA);z;8TWD7nPDecN(Bmf~uNCgHM)AUvI6I
zt{4N!8Lq)roU8fHd#uR>F*h8in^U4J?RlFh>|B=tQ?eJ%)L{=y5r>dfKT%vze=4*?
zMb%CbqcofNUHer^O3KvFBn-W3+_ZchW~VKh*dGultUN@Td;u5^GH_nQq&KMGO}1~o
z7is{>YKN3s?&ZSK(Q@tJzJY;fNfIMi-g%i$S2e!CH<><j>fWwl_2eG*t}DOHmx|(E
z**X9cIPBUJo#iAAps9v1mTtVt7A6M1*oLu=FUW~{RcFTe9*#sNpZ$Ct#=*#6*!Q}P
zm`!H>6%NW%gn>-dr>2@>kjKzlY*(bssCm$zpxOT&HH_<>M5uz3Y#^(_29qa_Lj+4t
zP2nP(ThYFZ`kah&J32?KhNu&Gi~}4*Ix>N%MD46Mw+M7y+_hdfC{G|bu1M8Txd~>Q
z9%%<OIj|%2Ax=SFBo!lal3Wk>h*Ahxq~ifDkr8>X6r^a2RzPxt%w*V^gnjZc@s@jv
zhi38yXqpsh2K=d4Y=(f0p9Gn+VqZy$xZ$Zr>kbP8L2h9Z{;c?t6RYj#WB?Q*q^G09
z2OJ#cLV(g>IL8OF@fPYogE`{l@3ThM=1H|bm+KId9)5U$3jc@^BQ$|>3XwJS0Ji!>
zqRNRw80)yJUbU&zK#Gz7!c*lD;<I>822^NvATVlOb0Z`&T|5wJ9z#LS<7g_^gg`0;
zp!ss*-;VA@7U@L?wNlzOPq?Fuy%%T9rb=i!m6<B)*P=IyrNw=5^z9xLFM2)4GX-nY
z%X}*)N@~VZxq3@j!~GJPK(rsJyxMeoPfx%DC{Uw=utN+jcioH3s;>P`2_Tx*aHk;#
zpvWbg@UyifoU!7zaG6>rPywV%=CY#BcU(Xj#W5Zu9`$&H7-bCk6DcUsAARpnYi|AC
z!qCvLtvPdb%XEzRsOh8>SZo)1PJNxy0J!A>-YADzGC?5}AR)g0iBIJTRf!B7rw^*4
z|B#-p`x^vrP0=MAlSqH|jP4iFY+GHy4Ab4&Yu)A0Zv1qU3uH`k1-C7d@ndR(4KN?0
z=>TZFpCdc0o4S9J!+Z(is$#%$2piY#71OD^0l%S`<3#5?s&6Yzz|j!juO;byW&7pg
zLuHSzeNlGJTxzdgz1j!#1MN+?7|ow$_=%6s{^Ly<Jrg@YkGzyPoc~^%7uHkvUG=g3
zg<2ji>mufjW#adv(YnFTa-=XCo34?@_$HWpqw#GjC*Y?{-yWXYOx^?kkQlhRvU5cx
zH}F8EFQh-+a&V>;W5Yw9?IErODT7}zL;JGN$Ut#A&CB<F0RVXQ)g(0D#?3IJs6qbv
zMFx4eACLW8^rwwSB&T}4MC~=V?<D_-4UKSvSwPU`Kl5*?pnH@E0?=r~<dx^mD|jO_
zAi9d=c3Km2M(zv32*9AldI8@X*5Lg<UJkJUEVI)<UU58BQ(5fdEx7;sE@eX^JZ*Xl
z&@BuL+?g6cTk}UiwP?K5nKK*#lV_9(IYR0UfkX62*Hdr9-*zHXkT1@&p2Nk^{i}wK
zT9JYvT3H-BXi7F*0XD&x3G+nD4o<~T>+>&()Nz)ta~DfP^izp@NXh7PsgU`!dKJv6
zS%h;+Meybdb<;tLl`hyG-Pf(^JeBw^{$lmb(U$n|7m~Z$SFoUvhl#-DtG7+@TMs29
zpK}Sn_7h^YI`4$&PDtKO20wtvgA4&=G^H>h5E~(&FEh@CYr$d-kx;*CFM=U^zPJ+E
zdi*({%{&HTuCW)aUGc#8tfwDPL=AuIDSc{6T0`iJ{A0Ky@nK5ClNPrfmwmvG+Cyad
z!OUm#We7~?aFhn&ZLGK~cO$s`KG9WrV25@C9`7o^0<CB$Pfme(K3YArxla{HuD=c)
zBz<{%+u2)g+ieyrGE0`=xYo@-@Hlitz@|B0KM3Z>wO&lUJ0S|*P3chV@1Qfz2H~P@
zwZ{B5VlGojqoGVDZgVRu28lepGoZix6N&n=s8dPMY=`ZMhUkH&IetfmfMdnu?3W<&
zVljp&Z2;%I*%ut<j5==-i>L<7-uXYDM%i1(y6vkNyZ<J*jjtja9xE-r<MJyHBljuh
zNaTr_c1%l46DYF)&gMY|xropAp%b1-;hFE{jjPWvJ99Rxq?tiNN<9hq_f}LzoAHM$
ztts0ol{}02vETUQtVmEpf&&YUcqI@*+;!tY;UjsU01clBWF9b(l>1R8?0uApne45E
zWGI(WyPlcfBxxBgRD-|kM{le<mF0*2O^dTRv4%Z(75pJu>Y_4ACWHGdXpN;^wp7PL
z8~_(`Q(10q*SmS%OJD_j1rkZ6yaer7eqb%p2|R!P{MG4u0E(If?MBbqe)}*B7;emM
zZov#Y{qI`Vw+=9NS>^$J$>zsIRxPK53zw)|Vl7%9a<H1RKAwcm6aoQV*_Uf(FjK0=
zxI4J;FRix8VQ&e5T=ktlQAU!sQ94)yWRxtUuWrfJ!*)M#J=%gfOEYX=e2!ggdc(#^
zeg~|`Of#z&E7yUz@M_oFS3wzgO>CxN^+Ef7lKx2#4PEBSu9`&TyN1ejP*>7Ve*Ndf
zWn*<oW=f|r&Y)@X%y7yU@gh+s63P;78bImB7U7xgunytx<u^I^w?Bj#`TTon<L}^e
zffdvvC8E0!QdwtEHL;@V=E5zY^f?&aFYoz4G<s(^AY9Wm?obbk8GAjTJBf#kVrO)~
zZMQy^H}PG|M7MeFl`>LN#>Ze(;hkO(-$Zlm%0<h9G3lU4b3Y+|O_#cSW-(KwIJ&_C
za7mmG&0ti725wW*`;#AJIaOl;)~LhpjYhqpQA2hOSE>BsJ5+@f>WNM1s*J)BX8S<y
zvL_p&hpPC-t2KP3a|Y=P4S6}g%<f3#>XTo~lSNNj?Tr%H<`NqDR{sD7;TIo$pv9<n
zb(_y&^N;i{9A@G$ha{eBZf^xh9&K(84}qMHwgBo=8qS9d?6X;<(VJ;4yyWo@P`zZ<
zz%Yu*E*SllSo$NQwIq?Y%mrCWbEq*DH;Z|a9?Z1*Lttxg<S9a@UZVqpBo_`VHWyDN
z#84(7x9w?Ynp+_cZOkr#z-w@k>CrJNqpe%+(^@uNX-L-@fDECcoTjEGd?rQ?%LoCD
zAg!W)fQh9MiD6akS3+NVLuW+#nrxte`jn^alG<J}Zrkst@drep*~)BzFvd?hX~w7?
zaSLshXBIKm9B5+>i?_;sSw?rw7hA05bw8Sw{`KMWXalQ{gtBKjxU9aD7_q(1)OlkZ
zJmKU^n}gXeRf5>aoTIJWRk&-c-RE0WG^{y-EsxJ=Y*{iwdePP-`F6t?Wg-BDg^*8Y
zXTEx~km%bXybejmTG4MnC@ii|N3l~si=&E}$iw$RRIxHOE|^Q21Y<P90GzVhBMbbj
zl^_fQt`bJ_1Ve(-AGFU0LUceU@Qx~<;!33I1N&yQ$U+NcnFGz4Ec>kL^LH^SWIupy
zp(9G6_$ve_&YCOo?w!>eC04pwNpe9-C}{zs3>hr|Po|JSXOTZ`1w^p%0$2pRM6IhP
znvb_>5U7sl0)$T~5%&J(8Sj3Yu2?E_<5ZC&_);d_L~&Wc=J*oBp$fQIQpgIoN?l*I
z9RTl$I=2a@6{JFI445owDS*N{6+114@VjsTi{(dOm6|ogv+scj4_)-Zf33_U)QQI9
zaS;^}X(!^i7F;h*of@G)jU={K5+gXd|72upDrl`Z88Y~9s&OskfqtiEU@Po9=;b>9
zAc{WlaUt~_s~~1He><pt45{5zdZOE2m!KkI1+d=Y(|>`%FJ&>>)YeKO7m0cJoXdOL
zJw84>u;Qh%xnjMcs;@F!G$$5)Y$O%V%40+IPIvXrSEpO<UtNre)p701(WKtLb&)v@
zr$*g$9Lx>1K_IsiF)>qT)jyvN-jcCsy5vsOcoV~6SHG3I@~hr{(T7v>`NKvl)J5LI
z1hJ>MoYz0eCJyrU<77dmS#C?g{E|-(-yCvx^J_A=Ujc$t|MjgtUl|Po&Qp0Y(}`kd
zD-?J}69tN~k}fEjhZ;-?3x?*01^yE@DU~g#;-Ii0RHvSNwHeSsr+rVs3Z)&$>itDh
zO&{k)mE9-a_dN@z$(}8+UJ<eWRnw!gzuHm&5@uJOIoE3{U16)XJ>RJ!AD;d+%!S91
z2E6puBfI!lfQRcEOYuA#V=rym11?>)0rRUK&sG>Lm4r5_i2PCDF*Q@~IL6Td=~dl{
z^2*vaU?-R-mw0RKXOb~JJu>q8%A&R7{Q7+4-c0OwXpcdAYzFM^fJR0a5-nIz*zH$_
z<K#6jp*%<&tC^JT5*OHy3Vi>5q3{i29F&kUn%>CZ55@kl;QTK~hZ(_?qGh1yLSsDm
zRMm>XEDA^~j`w8b6#glQFhlYm1nSkYY8!tMw@b^?6<lk8(VJ%BiKE7UQuJ)ZWWj4?
z^!Q={0DwCvr-tl?(LS$Ms1G-st1FmTS?gqPN%&S42)M4e`5l(E4VJ?NM``5fv~y`?
zmx<#LxO88T_exF<$XtT3*eW%N56dpboIAm?$bQk~qY*$*2|YR?Ug{$8XY_w~0~Zhx
z+yDqrlM-b8Mc_!9(@x3HWVK({`e>V#3?6Q^CVR4743Og)=7&G-fRlv%yu03)Ihrs5
z7(%HBa>NBcGPNvDs6je}2Wp43IfPR%EM|#jMh^Z3F7!8y2O&a7rfU*<i|;y+rVVg<
zhLPGVci=`(4Ab_1N%sxlwPz^&&Zi7zQhqy#vK_zJ^@zqmCo->Hs*@wrN}|qrG|P2!
z2;f?e*iYG{_Pc!rIVxs1Ha0K@9<f(Eo?@zF8-T%rjGv5IZr=2lD2ZN~GvT?G!IQ8A
z@bbS@70&3@+FQ(R37t(%CZZvrH2&VzLJb#i#!bb>t)dmAY*l$ZmG&8|@>^GWP#g9A
zk12RhM{*4y0tTNhT7*``j?2XRRWp_dmK@_6R!gwW;R)-Y*L0t2ttQPtP6knVz%2k&
zC*(Vqy2!L4cod_?inYC3?8?{XAbRddFLMEVFvAd#sWwxce$LuRkBBRD?*j+WoTR^i
ztMn6~>Ng8{9xCuWv#RH1Oms2E|Gu^{F7wK9A(BPNS;tD<l5zU;FGTnZ0}|~i7!ZH#
zwSrja>s#Q7^77%;RK3DEP6ki38o%k@O>2A}n=_JTeEDE6QX7KO)`S^j#@QZzV8YKz
zit>6n{bbYWsT+|6MgRoKVLVa$SFLMr=eivNYRLNsIqrF8lj#L1V!0dR#gdM#2MuRo
zm=qmAxXYCR+;p6rloY}xz<!+B^)_k*HI1QbFtTb9@ZqQ%z@UJB>8<W|4YDuRL`DOQ
z&taAy$7U-=z8HFf%at4s<TGQW#j|p@sk@SB{swX$Fz`5~Noft<qXc(O*FdZ!D}4Z^
z>1-bjFXQ=Q(jt82*VVsj+SJ{O>Kv*HRwvVzwjh1Li-#H2p<Jx%XF((W_k0XmfWbN!
zDs+*S55J*Zdk7LXJb<%Sa5EOB>do{wwtaxAofKH0$CL4*(Ngn(ZA9wt@iSJ0pokeE
zSL;2Fa>IE$kP}JFgd3UT2M;+zZ|Y`&Xj?gej?pSz^m5*;?BYizS#OIe1&qzb<X~n%
zhur=dp0dznrXcPwI6JvOQR`O<yFt1KXrGVVb!Kb?e%wqpD4*nz3VGPEc)ECjCyYb3
zr9<8$Z6cKSS_?w<X)c@F_paN-SUx4zWsNu;E+*A}s)1ET-sHh!A!VVCG$TWa2mQ^(
zj`v09(<gZTP<_HnF78UUE3gYhdrx|dY{fzQQX@&GB@PJRu~5P({I54KA+I&XA<PIF
z8^5OwP5nKJbTJyib+e6ntYejp?;0Q232#s0JB;oe`I7u_PCG2e?{eXBK5ZQ4wv#mh
zO&NbI$4ynXGCcbU?@hNqS?`SDuNaFa<|#GwSg??QlQ`U+Dw7XnMSxajgIYVa4vrTS
zA|N10*0BnAAA?uz2SBSaYgB)FcD7-N7d^`#FaYFNI;Y7CvU+kxJ3BzBJZ%O5;3iHc
zrV?dE%q>mUH-Mv=)?Y<w{oLkDal<sZ`xS`$RJ(Ta;Q*PSkKUt~KoPS7=ose&@W54T
z)Ue6PWB|Z5IY>ZNO`30fjxEujbKqHg3#jU=*R$>fTxE5?__+%@b6eXrWohnE&?_E0
z>3O2esxW-9&9tZcTawfvFG>)rLIG)zDw$exBNQT$Wsn01WvrwNs)cfvNgh^GL!otj
zf$?M#|2j+v41r<UrYtuEXZK(G`*v|4?1(J?7Op^=-~rv>G-$FkbO&{~IZ>edO)Fzz
zM2+O(=6<#D@a7WePwWeuGEW=w;CJTrdKg8=yWo$Q%XRgRpL4ta25mUD$si>9sl`au
z@J5%Pp5Ah^_cjIK@NAEsx$iEcXld(gNZW#!PMq)Lb2`kBVms;!*U03HAjS%x$~@OD
z#1ntN8Q%yh=g7d7`m_TEg1-I6DqVNsQ~TU}=tD1@PbmOG`u#`fd6!j|&(bq8^bjJW
z`+z3h8y`<q=&MV+2|zYC?t@7hC-jVF6(DD^%9WUr7$5I~G*Z?NkY1xc@YI~z=r4;U
zFr-7DF<4VxB)rg-sN#$25h$_XV~YM>)2wrRmgsKqS!>S9(lWbPwQyt@l;YHr{X<L-
z7>ste>S?jiAik)mi0-pHOhJWBxlxDt6;J;trVL2@HLD0XC_<3)UJ4Hb>7d@suBoX5
zOPbt+K$r2V6b#tu`J&*~v=_XFApr56y?R849;pgR)jEEkl<p72l}%uN2SyL_8Q*W?
z&X=sRUs7p+1Fi_?cxq_3yX}GQU-mvPoqf!Oes=;GFY{)>tt`(hw&iZ3huADLK8q&n
zE>P{gXOW86c7W+ObK}p-$PyZQtxtXbZZ5AxLSPt0z*dS8I?JTu(X_G7v;@L&2H<9}
zlHECxI(wfQBpr-4gV2($H%6DlbjW;fWjn$GYk8zlK7jJwE3vKPkB^AVR@Y_yx9LCu
z=Jyj6WXLF`xm8wfJ}l0S9h}q->?*2lg=MghBC$OOrr*kcOCK(i?P`Jk?jC9JlNhE>
z5SvZCto>XQv^a~8pQ5jRw@_bOZ@tU_DyI|(qI<W4@pMuQ2xUxG#^TeY6*Z;4AYhEj
zyg*r2s1GzPq7$uBa!(H1p~bR>7H3A(kw5Z%^K+K<I9liJ5i~k6{5E`FNGn+kf(Bva
z4_mlG>dU&qeX5Iv`d351xvFrSRy)ARVEl57c;)9bf!SQ4q#zA{4)j(baPrynhZfGX
z#=(Ikjz|tot9)@Q1+2iyDqRKadzuRNo|iUnK<zQMb)*+G_n<*++R^|d_4nN>iqsoz
zD=%Em$Sv$uw*2!rnScCC+DqOgJZ9i?;WD4XQvS(CL!tXfn=+DQ+o0FtpMsWgWFVuZ
z<z%+sA=r%=IXQ5>B(F(N$YnVGS!)R-Edu^O<|jS?!W8SlFr8X0iB*qhX-k`4G3+s6
z|Na?R9?+k=bo(vQkTAg@7}r6E7T%JvJ&#=EL5jMc$2f*P$uC(T#Ci!H@xIzzb_!6E
z9mZefh?4QV{+#;<j87>E?>cw_&j%TRDK(HL&N66O$YS`TnPVWUxUmP=#ifGU!TH_S
z@H)ZN;@N6rhQoN9)ga5;m!QJ^t~+{g4Ye;_hB6ps{<wzp24GwhIky#{7t9&|Jmy+r
zAeqCs-8GX9D8VEWD*c=K3(OV+lWkE~X?{=^DU1U-Xv;brdVCKW|C~XxZ`d9{YG;9L
z14tFqsqfr{|EaSxDgpQIRM^X05h$CqYmFE+pjcg$y0Fihm%CqW@f*l!a%Jrxs3Bws
z*Xx+A>;+vWN?n$m!-GIthH;ZzgFtJH4^Ev<Ap_$}Fmla&j5ukN4~&Ts)4WL4!vVV)
zysyXYcp}`VbhJL#&ocFF&V~5AN*@29SM|U_VG4TjWkN@h>Hwno>hBxG;)U?fPy8gv
z5RrD2FBt9q@BIJ&Y5)!d-HG2R4D#q0;JiS3lM3SlT|g5;I0!jZz(N3z<?cA_y@>65
zRfv;q(N7I7{;)*G&|b>~F=Lct9YUfC)+DGtcCkj;Y-_~?kf%tp1c7aLyc#|`(P-Qn
zBkH54tPqK}1;1Y(^N<&0VrM8pR&hS0@;HYJn7QS6>(N*K>+FHJu}WO9z-Zs2y+u0b
z?SE0+|0%4lX~7kkEUyR%C?V1uqi(hYl=H0oEhv=R3IcUrT&5GZk@mlS+KA}MLuM((
zp5M;?M3%!=gp!&~h#B?DqD`IS8tc7%6*$?FOJfyEn+v7c#^3^h7szF&IqlaEC8cpt
z#Vs|)ec>+n#soaqFTHm1#i%lE#Gqi|&QS%gMFNL(4Xa9MAXCMzS~?*pm<H39>VJzX
z!sWnsj2LmiY?W3M#QkeuLm!w<fYtPE8j=^3rNlw~j2S?C$-HE40pSN~HkV(@4JdS7
zRt6y+tQi*8;PsmA9OgMu(RQWsP=!pElA#d!oqG)RL2*#H5Wp8>1AQk<Fi}$!u!H;D
z=$`zugTX0XnJU-eT1iiPF{tqGvHNe~{?Lex62lKx5Dh3rug%wdXJiOW<?#zzW9Z9F
z3p+qmdH;5tA?O2EF2$b@##V(>id_|s9c%-y8ZCVo!LELevsOD1%c+BhnhO6`^x|B4
zU*K&n#d|LD0;wa!@}8KXLh^wl#DDN)D68NKn<`(OXMY{Tz(Tk;l*L{Y!ezz!RSNH)
zqE{{{LdLkq%VsDpvJ-<>t9hA0Ys27NI%Bqu45b}v@vnCRzcl9g(}VTk{&w;K30e`A
z*j6+WlBaA}4iw8(n$;4NgmaX22tbx7(EqmxbYP!$jRxVhrJP$SapG$M631(8Tn=Ec
zm4R3GDSj;(wAuy|fc)%udQa4YqnP8}N|}^o@U~Zb<EBIlzLmTciX3{)tS1f;X8;c<
z{&E5h*dt)qUMO^#kjge%QXOJz{+HAFXK__zfO1V`e#d0qd~i4>^@Ql%LGBQ(u^<G9
zFP}j3EGf6g6zq`oUWuVeLR7>8c^dO`6BZ<+u&V?atvG9VNgQ<*93mwk>+)bb9v=wF
zk#|5}R}*yjy&QfhANWh!_^QTj5`{>I)=LxOnjp6JPx%B@*mD+31irvN*Xe9@e|MfT
z++b%+c_qN$k<|bE0I}0+L0rx{*7;a?o#2sS>76!B6m<|I)#pm1=hdkoRA$_9<ey20
zK6M>Uzk5+aof>FREi^ye_w=JA=L7y2Xs{Cac56IVF|=Y)e1DK+x>6q;Ju+iANxsG$
z%80n=)OPJrxOCYd@jGWv2|}hdMu13nsxe!!e&)iXqK0o!NjI7&|D-~V(*qxk5U-fl
zF73=J42Jx7GyHqUs6ebg*s{)MY3KB%zvp8m)SU>iXq3wOtUe)WkW$;-O%1lIgvy*&
zxZoLIgBDVm;Vf_%j|mYbman4_f60KFi1AZ3Q&-N!2rh6pN|4TqLsZ1UcQ5{F$v<rh
zR@Somao*Eps;)P^+y?8I;_7tcevJDXFcT};-W05WTMTXKT=)`O3*x@sWqqCw|L(jO
z{7ayUC<ak^MLFQ$T9wIEJ>^0_5<p-l`ftqhx2pFWM>&aCXey+t=40NL!tXyn^JQI_
zt@2A67iM#D#KwsKu5+%cl?APG_5&-2q#xZh@FFPCCL*c4hOi^CfEbq=9Dn#<XM+!^
z)2@|MQhr>9z1D70PkvG+TpCYr$7&rd3e=Y<=rjn=vEUNEF|Tt85mu1ydEW(SE~em8
zV^}J-6@Kz>+9L}%8a(g)C-M^>{o6nCpD9>Z64RL8msDf5mz5YzJHD@$l0c7!)J>P`
z@SY1V5jjD^`CQ4EuSl07=Krv*;nzTb(L;u0Hg5%npweO~Lp=K5rSh~C_D#s5J);$-
zMF@bKh8$jS5>siw9&yb~Ww}NHw!c(VD~UI`{^Pp5jQSmUZ$tIpjnUH}#MFcc&*mTs
zg;K};t&2j#16K}>gY0YjpZ>Iks0&5?v#tNn{e;4fpdnj}`&D-&{#`Op_Z?Lg{}E`G
z*(4J%l!7>*Qf^0Uj}48$GhvcoAG1iV>DZs+aJtx9HUj(G-igv6i~<rQ=n!)dwLJsN
zJA<%Jg_=p%^@gT20If8BfIFEN=qn0Z!sfl?;c3fhmw|W#sl1*TAfG~VIV(_aawp&V
z=Q|z82aX%V&4FRhcZ>gJW3e2dU`xG1Ay<_r@sCDt%z5QD!cu!3CS^6VV1t9g8U)Hn
zOkgxk6}RiQ!d57uA&Q*^Yu3NVl~ru5ImkV6X|hcBRs2iK1C>=h|2lmV%zqCvWhQEu
zX}P+uHfo3~52Fz3?<{Z#J85D<Ah8A5m8N$bvzf;HLqpV5sTZ~EIGhs3wP^}t;CQT&
z1pz&RlqTHgg@3RfLH8pihKN&RT_$vS&qbF05e@&hGmxM!EM-A=BT{=eE&QenCxCzo
z+FNQCLU8GQ#$FpM|6FY~J|&ww-&LaH7H?c8OX&q)SaMsSzF<bs9V@17q6<2OdO0Tk
z=q_-9uLsJ<%kGwkwqoEoL-9D!fjy=~u*I!fu#>a1gVl$Abo=+4t-XX~ywCPv9A_M-
z!o~exHY^ZIofg<6@CMCR4~C#Mh9a|ggpSCr$-wy-B5LAJ#YBM`LK6i1IsIlpk4gkJ
zOs2{X=0m>b|BAQfxZ&MTKHq5ZDzn?hsVshO3=o0|i&`0(3#6B$qr*o{Jwg{``p6Xo
zU~`MVp=z>V7Xdp=eGHq0KZ<ro6qsGWXz{&3`?J{uXwiy1w9JBkYh!G<p>IW><*8ge
zxD>d#*z?Sh>dPnkZ>^mm$mG^@lofrN%I>$jcL<fy_#XmMAw;n%LT14U8nwm>iU-je
zLJV?SJ;*<&DQ%tB`y2yQuAdwiF2vm`JoEj1tkDCX)?swrGgLY!J$&|>_xJr0rXLod
zm@o_7i<g1LhA$^3isY$S|3eIuG(I^bW|IYhijeo;->^>`J^pCq{jK@hUGEp^k<i{{
zjL@*xWHXKV_*vTz*aC-T;f9}8a$^0d)Ty5p+w`B5cfT_4(`@9wx91gN8{Vk6IQ}lp
zL~T};zSHOjKrT>@0ShY0fPm3*yx)*CQ5yLQV#oJCtl&Qihkt$)3&E|5R+ZjT*ou~;
z=GCj#Zy~b9giRWc_wL%1#3p!}9=zwn-P44zR7E~-!_{urN#D%3t-2gck9B9CmDT=b
z>|XEnCaa9w`#m_*Lr@?rSs=1q0#nY$OG--ig7rn<DewnDGJKz`e#8eRp&obCx*RDt
z$k2AZVaIusWMA_v=ylaZgAawWs4Wh#qT~LkRqCMswD=%K#k1)VbC5&vfdovb+CTOJ
z8^)w<z?QQ9(f4VRZD+5~^Vys#xoZk-A(IQYoj~+tWBbqxHkgqyKHWGq?4s(KUR(Qw
zj<*8X$S70@+T)42KH#FN!qonMEk&RaYPjk)6YNoyXb>NASs?-V?L|V&xr+wcQsQdJ
zd*7;Cbfwq7&uHJjyU<e{SeOsM-DA*xKCt-MmP(UH-oQM5?|Ij?(m0|EYY=09u=I#f
zs75PToCb_TkQmku5tEY}A<$u?e%wz9L{(jBdMXPCc5dNdNzk2<tFDV_z>@5?WIk$m
zfS?Q00fM+-I@IIUD)YwQdW@sGBR`+WjS`E*t&vVj@F`^*eB)8sV*cKYRi?>RU`Rhg
zq_WD}_e@fex4NC!`_jZuCPm9~@>@6`*`D%>py2uCv$y%FKzC(~2!uBSXfDb~aI5iu
z{Q$ekXu)dFRNzR+W_2zJ^KR>k(GGJm)X#``Q&YL}T|>zzHY>I&Qi$e`dX{^pN-lfS
zouR$`l@<mjaKm6?lpGwD3n2Q`)hJe8p9E9M?5lzm5`&=mG2lX#(u=Ka*QD&anLuk7
zBhHU_aMb*I*S_H^GyK+1%FZR#1gp?%BeFs0-7}yn{Y_VQ;%7w2k@I88N>fUIwvV!z
zBnQ)vkz=a;3l+Z{M8&^^x%UN28??2*YR}dK&l80m_%*sGm4{#t5BO{Up6~yR=6?q4
zz1X}CSWZd!avlp4q)3ug`{&~tO%s~fhu0&|_G?=RBJat<GPb&gYe;sgwfD^M`(s&G
z4S(HhopAs`{}xJ@u4Mr%88;ddCx!<%wv4-W{9n1DUf*<zSQupzvDQQg>Bz<{bt50J
z&9Z(a!y9-3H&4YpZG=)9Be#T7#ox-?IoLw9rd{Ymj_<VH<rC=$*yY_V<Ze@aVKpM<
zf?8xZFL?*0TXkd-_h`Xnk2hI9BJji|0o<i0*vE?m@AALL5~rlp`~pPHCNXdkFPQz7
zsxaZ&d<#^v!}0KG5$$_l%LLYZSv^<1t<GRD)=GN+Yt_|wI(g<5Fu-7358yjL0n;k-
zGKS>qcTkOlt<@k1%l`N0W1&I~C1|p{NM<96cTkwELSFplh|NRoH07%a5yaeysjBEJ
zWw%UZl^V*7`Kq=RJ>_oG4mUyC(OnU^47%ikF5l%MM9jFR7$2ibK0-)q?qASTnOU(b
zmO=rxK<1@#^apI9g!0(}t!SR5hV9q=@C08h(E2njoNNvnFl)o*=DP4=XDB?I0f<qA
z<X+opR<l65e|8LX;<kMDi*E27Fi$4m(9i6%Lg6oItDOP&{-EVet`&56G#dEq6uu!~
z(^YTxJX-a;|GkOH{5sPeI#UoCs0}O8v{m`)#LsVsYPZtDhZ=cV1jg<*guHfY#v>$T
z0U%Q814F<kwV$tA4+XWOBO#DEd=H-~w0-&_)6K&XFyaI>%zJ_uZwnQeo=mnNNlBfS
z%v@3b07KS{Kq^x_3uxDu|A4_n2gXH@Cgi`Ty{2AsImTC{>}e{$qrBnM780_4+r+U{
zA*6*rIOt|A9Tv}^=GwEg$1CLo5<Ojd5N>67DS1!8c6J|%gmGh#MDq|I1Ib1S(2))S
z3!9k^DBFM*hS3wK$ld@6frqwh`HI~bF|kt{fM8y1Pv#f_;@!A47$hUe@3hwSw!$1<
z0I~skg3pZPnrSHTYSYn&y!})mcs_$WgQ#<o1f76&sL%0!5@hWOTjLci@8J;iA0P)<
zgC^=k2WXNEn7?HCk<Rr>)k`*fB#-iqm9{?QoAaN9pyKX=*~3lG4(~UBc!jiQe?!E}
zde~D1Unx#(2o?HM=ZABvaNOI;XR#-z5Ogd!5`r~bjiLGwP{_o%6ZtmKmUd1-tD?X)
zP`b{c?bQI<rGe06k@GKVO7?H_e+Yl{93VOw?CX;}=~{C+DHN`L-@FmY;`9G8_7z}J
zXlvIDgM-o`F%lx(F++!p5{k66bPXaP4I&{SjdZtyfTV<UDxFFx(nxnmN&S1yx%Yne
z%jfq|fnj3zeq*imVj}VqtSafXDkV=ren#R92sTqZN;B$4itz9ZEII2pMS_@&Fhn&|
zfWGm2%Cn%Fr9C|>JU*nyJ(N$J`?Rah0SFaq83I8*)<|<+5SO<rZAONCTE<Wsz;Afg
zUJQx(+SAftf?WroQCf9-oiv`H;2f32+oyV#+UekrZu~d;N0(0?)5mjp?t1liS-h?S
z^sNspK38uTW`_W-hiJ!TzZCXf&8iWq-Qg-x1~Ldu00Q95n(0jE==}$yHxaBSmdq!9
zW7kG^^~{i4-{;E{=*Au5ju`m8{h?*6Vy5y`!^{Vr+q$0hb#qvQQ5}C~CcI{hR`PCT
zvV!EQFYfNB$-^z%Y=6I?_CSQeZzx6ojA#LoCdic)IrG&ml10HdABPf%@}F?fIGBuE
zR1=6c1N<P2y4}`Uc&=3_gTJ+K3cz=Af<80uZIrbbQP6CRhX+;mNvQ-gu0D(6Tex<I
zjilI9<rZgU=-VktE09(nGto=rww|o9&x)MTu(Fta8Z*(7yflWXWG+T!;Lzp=s%G2{
zuE-<XuKPp1$>wu;x}uofe=}S&ST5sRK~3hKJQ0Thn*3LQ)(*(U-u@i^`R*we2JJgG
z-Q?-cGF5W`B<@IEt`Y7?1V1MZe{C}DULJy%{7)@_w=c%w)9y3fcj$Vo40X@bzufOO
znRb3j4UA$Lm`@i~?aT3bkHnPdXf^+puzBVdPy~$G4EtLLrWkwDJk#V|Z@NF8gGu|l
zZKbE_V$RQg<xCk|P?N<mnaw}nGGl)>E(uC%dH#07*-sv-M-3AiOtjVSG<tNNoHc=0
z0mFLA{%au%?%9l**c@Gf56FEHiJdL`U0o=AoY}k~r*Jp(n&NNr)#${?8&Gdh3jUM=
zV2GrZrEuF9U_<4P3Y<A+R;o<zwD+=>0;t^upp&J%Ip#j~(SKeC@cB>X>w}WNij;wR
zYlwTzE-7NpXiN5#mjH12a^Od~xjT?s__6PClc4vqC?FAeX%63vl`TKIq5bXW-A&V_
zcQ|BR)b8Q^Zb^;n3tO)1hnVB}g#uP@G*d+{szZzPrym~LVqln1^T;f_LJ`K9eBmfd
zS77or(5hx&iQvH=PbB=r?4FwNoG*R^l-gl44)Or#c^<rh(Vnnp_`)M_?*jbQVd>(s
z7}-ZMKeZuNZM_$q!3JFu?VM82zr>&zvRpHFfu!*h)+|?$s$@!O1&FgXmv?Wf^jY~)
z9I*``*{ew_&TjSF79bPJiPq=;WW4N%7`-!&aImW;uFTuO)fdaLXNpp24e_0@iV)dx
zl|TGo-JIBEmGLN6{}p-k)l%~2Y5o)5Vr9;vGkJYUn?V!*)g!R>>_jR?LD4VN;Fyka
z6QR+A)DN?g5xw8?+J3eE1f~gvIhp?#cT%NnuFnS+UznmKhu1*3SlBKDaR+NCO@24?
zqd`$}OvT%;k{AVS+t%C`R{7aSRfo>f^}sJyIyt%;A!#BWS1iwLUGFiI?}?4X{#E|6
zFkPU2kbEK;9Ns7y6kS2LiJW%wczxsSm>=Fvj&Rqol;OvppcD$I{*-Ym^FhIHn)=Um
z=o}=Er<{un2t|7hU5-%PutM#U_0gZLtGDuEPz5&(!1458)blxD&cFPgO|)Q&))@p;
zAF9=<#`xv9wlfS@Y8hkzIrxJ`erMP&1{?DpX%f{fgMQmFRCZ^R@HihZuBo+ho+N*w
z#mmC!HfIlB{B<AuEd$A_Rfc+6#I}U$l3L$Vr3y=%T@`tOZn;?-;Fy#Brd`b8HyG$2
z(GOWaSc`ho3-FCbx28U7`A^=;$75iBe$>{abO<iDo-=MLvX3|r)hvO`=I?N!$K2zZ
zMMB1XzbXeq_($&FL(P=!+L@(kE4)jBW)eh(cpm%vW6MOT|MZk8rJx#>4}*W20+22G
z;Iw01Nqx>j>k5oWxJ{3&PWxT*AsHh48RFx)6fO1Qr?e^vKb%1_fK`@!g(q@<6Sv$b
zbc|-CHlS9;UntMiwYs44faTw8CJ5`?8tR%ha{pU3j+t0LiMrgOW$PVeG03f|4|41B
zB<Yid+{8wleNRcFvd(!f?1)O^sSGuxt9fu2Yn5ut*6c!z9t56AcYo_qtyb^pQ9b?4
zElqp9JDF?`jR5AB?qFbk?lG`GcHSo${UqLYC`z^quiVF=P)M(!VRQA!Aw9k8bLkp@
zz!e~5{lVt3)oHQ{wl?eBj#Wk(<_cOe#<v_HZ<QUxt|f~g)$#f1ykJBUw_dn#T&(QY
zK%8Vn!Aw50(d#|w8-64X7=@R9Ap3TjxA3ub+R6t&1EbnHT<L=2lo&vwwjljROV;NO
zl2gOt8dJl$_a}-D83btDhFTv0{QO+67}xbr!-8QHrwW=dIy!pz(f0HflDwt>Tojw^
zB7I+zAh_JlWPnVi(3hg-F1|r52FaOdVT|u*%@i<~pD30$##yp8GhW^M`H(5wd)#_5
z@9xGf=>*?#M*M(PMk4=lP6C!cdw!<4E9YC|7M~@v;gBWPyLeRiR9=niHi=P{`}``(
z?H14^bH&<89+cD(DDcOcQWACgkb(9_t>^fB%5e!BrOjY3xv<xG;4ePrmzG8q77Fr!
z^QO9>FZ(tTu;&}qu2OAE*9eKP1xwb6V5(5Qd}r}3spi$yWf-t?96zjKsuB)@CH1(<
z$sr4;3XIIR;g6S>*GTrbila;}+HoCONOnbZ%N*1@)zZ@nBzCr3>&-CaY?4RVf54H*
znwa{h5DZEEPGQ2(Bm}qLC!w*i@dWt!bzXks;~m~)A+p$m=&QoYP|T%jIZ<JGS0tk{
z)LXtAn=!=qQZHXv*?WB2Ev?^q-+R7t&^?Iz4&u7x>0;c#E@ZtRMy?O!$X@yDBd<Xc
zltsq%JRIcW;xjY1ooqQut@%0Lk(mFG9ic!l-FSTm;yxsP;hE9ED$W%(B<{ZWV&)mn
zZhwz00B0l`@hXwAXK+Wy$7|M)WqrRb!vS~hqH`DQYEq}$&e7Qz`jTM97%$12rQnd_
z20IniLlH-HS>p7jxF@`K>I%cDmnpC>Z}vT;AjXOJjS%|yeBAH=9@)~xxdafN4X`eR
zQ?qswZ}5zfmD<fX5bgn6SwgL6V8G5D6zq}b;NU0{P~)J}T*c)?<I)0?J2qLv65lAK
zW2KfbAm%<!cM<+oF`S=E<ry%A>5Zl;0ePwAcJxHN!9#}O8=`3!MJdX*UBS6oUQ2Tw
zwhO<S$9n*{st~7YNW3IsUD3nTbl#^?>8em_W#Dd>>x|kjp%{4+P_9&K1z5H20OuYw
zgkaYQBgMH;tlV<bngL=l<ff-Qi2}S-IBvWs136ABgU1U!4s3JVdbq?l##pL@^zb28
zVB1k`x5Tf;-kx{YC?5xK4F600e5akJ<88U~WW8k6%a}^fUjT4jcnyCe3@BNsq)50w
zw`-@8`VBD8{bDu=dN7UnL^E|5K3!5TJhY8#R<oCPk?6+i^{-CyxXjms$#4qaqD#E1
z2VbTNij`@lyLRp=6~~od65BwF2hxkjqDu`E7fi^<BY(<nUWI7IOB$PNS@nbmymU-E
zA{}q|_`IXg82GiuL{Y%4zK;U;NCdTjS&}9n@^~y6iBN0v^%@5WPG(*LMJi($?fzR`
zytR>hRbzsPK?;02oOk^JS36Yz^Odg%EZ4Jl&5y2($**Eji)0SMXF5-~D?>50G;Laa
zA-n_g17GKD&5-fQY`3njX1V?prjgpHT9KMwsT|ZW>%VP{W1+n#Lo78)FJM^>>SYfN
zA>5Zj<&=DUnvV=8NH|z;cig+DVeuk1l$ivA(=WLJFb~iX9#<{;EY~~%M9#JA5wv0z
z-Zjssi3~t}tl)_DHVZ#Vr3j?BR%%e_dmbP}@=4oa$ETE;;gz<5LAp$Yk@}Nf5h8YD
z0~P94i!p4au=h@#9FCa|5(Re{Tga)y$##(JT029zko|Zc$&MrCVl#;I#smdTe%~v{
zWxq-*yB~#DSP@FAmw^ZGqEiA7akP7w?iI_Mds7uvc1&)hCv>@hI4J|^S7Ob%N>9SV
z#7{0Ueqr@|`d}ch`?#-z>P@HQylF&$K3A2lAaGYczjerl>cn#kW;tNcZy`*Oou3d0
zBpxDh3<fR}j=F8l`loK;bcC6?Y3Cq`(v>$O2u5<=V2%L$7Re2ZYUaDxXI`^=0Jzlm
z>nS5coL3M5+{p;URWX$hjCTsRfIx+Z&X72AY5ouL`eJeOFYywx+n>ouXy&R$2qf=e
zo2s<-R}v`;+LbRCO5WZ0eBMPSw;&A;y~FwSIz*#ezuTCt&d@txI}RkQd;=CVdA@R*
z_7-rOt<OQ-JCQStSoA`Q4y6vEK<XnlEECDK{mJ_DGPSqZZ1}b7(d^O>`cmeuu;kx9
zQctbb*XoFP$exJY-Y;4K-x{!`P9K!_9uh5kJ&aqrSDd2T5jcTWynwBIQ*xWqzRX#N
zcL+qdKyE->%>#JD-=7slPtgA~-)iQA60!S)3FzAd7zr>ffpVpni-0{<_ZwrVT>bgp
za?568@$L;-JcxVctBYxsLDnM?^+c*joOG!T>@mu_8`yG*OF>XS3?F2vP^+lW2Y3YG
z0>~dL#H#4tBESq@19g=eGzT$IXL`S{=5zj??~76#OqAdlQ?Vt>b-NeX9|cs&Xdz5|
zTm~jd=|T-^W)=e7)vSHFyN=S+0K10-ME@&#cF$$Q7xRIh!GL0H-UX@v58ZC`OhBzY
z4za&AIN-qWS}FU>Ye^N#-S;w#=TDM;HeI*(tP)i1KOs;)6~wY&*m^T$XJ0m=IbaBu
zmI;Z2uv%N6T^G`7P_Vt#>{_vo+q&hAMPC`1lEbTV+|q%<cSepa-GMlaLB1&yEzisi
z2c{k$Bu_XAm%|_RllNeTAmZm{+1X%$t=+9G*C)58qL{I-PKREtdjlwwvgw`rY=EnI
zF702}u+pE18`!r7E~gi|3+-=}WCBTwE8+r$rq=e&b(t?#e<VJ}&XnBf*DdyVoLR{W
zazJo+9&R|6*=Nm@$=S>Ci(am>DS+yWvSw?b2{T!q`D%1BVU<;40}LeB_-WhWTm{QG
zL%B8oir$;TKW#?6+ozS=abPNxYJLtID3DntdYJrIFe*l_bpX{-+L`V+zK$XiO%<{C
zp{-PHw_~Cr3t`7bqp$unw1LfG|Bjn4{d%p(cwiL=xZiI|a}i@fYTZ5vy!^cy368Yi
zcwW~f)@>*<<3(QV&H<sNTt@bwXW~W2;e5>nH&_Nk0aUq8>y{W-z#6lO9QLKp_-Q5B
z6vZdup+qTwdqNp*CF9PnTCr!VOBJI1#XEl(Sh7qZRH<)yZa+#I0UXFj-}2Uu0Xrxy
zJtd{52m2+zBrhh}4#)e8{)bv5br4bW3;n~1^YQGKHd4hZ3CF3`?~n2|*Efxsrr{vT
zA$~@6J1sFeMVT8}GdiK)?C@=n)lHkoid=%}%Wnr(aMxIk3cs3mm$%#)IVm0?pLY}`
zZ;hCL=04^4K&1Ai9`X08hJosi38@upj|5IFPH=J$`P4ox+A7`_b6y^m@0S^=5aR;4
zBoh|w8Oe=yvw@VcRFS2+g*K}UE^$dJcI=P25q=_(2j*sGn_!KdE61~=DRqv6DmV7d
zIXKzb*>&y>Wl9lWoF5FSu;GVzg}l<Bd;V?8L(oZ^XC%iLWa`zw_SiB49Fq$fzl542
zg|mG3HK*1+z~aa^4N_EbIU>=g3&oL4x!Bk_auzOc39V>!U@$q-qw1{&IFd(yiYK#h
zFi`_efJr<!#i+dHfpDF@kiPKwR@a@@3K=*1z6OBGb-w`O1D_UOdcVc+S=L)QCBTd9
z^PS|0gS$)>TW5#FUnQ}boEGCxX6gLEBRbx!zPvbdmq{;Q1oQ`M(qat)L&H>S6L8mz
zh&HYt0tqQq`)}{$s$X&gH}8m6)FFEq)Zrp*@@qY`a&>g~#=!Ny8+Cb;eZ{aIc?dDF
z_h{5>lCMOXyZe7CEB)}y23_Q5{)F&U5{x5*_{tG;7)6RFJ-*PeR0AnSdt6!s))OFh
z@doI2J7&KN?)~+Vd2Vgf_*2XvTCi{hFnKCNG5c^^pgyuP^i2UjkHL*(?{WC2A>-o=
z^@*Q?cQ>j8Ywanp&yIJZf<H4GC0GHj6NSYqV5s+ia`fzJ6jQ}|mFKTt<20nFfr=49
zI5M4^vr?eSO1rjK?xVYazo=)_lf#Rl>H|d8y_8oM7#jCvrBxIH@Du9*o{xQCS<xee
z%YoX`&M&rL65|qZ6<b77P*89<SZVxv8%)10=(hPotKcA)lemVNWDv*8WBfzn#80bC
zz9q3=-BuY$gc9*h$Z1y8ib4|L3goppEm>_Ly9_h-4Rb)pp>vEHF`iIfaT+fGtXyej
zbPkm~jwnq!ZUnkeTESzh_qR>~=QiWDVoMIllsXj2g;QP>ewu1K5QNcFJo&vf>3*BL
zFYMEI$A4PhbARRGWw9WtFiu3cjM-rN8;P85)O(<w<DoVlU%=i7cQA1nehM&ga1f0)
zT(V_w!tjy0d|Bo{<Y)dY9`E3}@tO-_Z*|oRuG?MZbrAZ^l>W(E8TufC`kTyR-DX8!
zMU~|<G&$c`o^uw?=NMnMA#v}sh)mZ%lIY0OVx&x{NAW`dHKu<m092JaUrqo$GZ#N$
zLcHXZXH$t74_fMOYbU^j9ur!QEsyLN1D`u>?@rO^PD_N)JS=b29JiEDWBG-C<`b&B
zxh27|oo-v7z8@8<y}kdoHnr`=c%+jE%A-nVpbNAV2{F!cJ;5sJ#(nC#mj{j~&6iU!
zf0UzRd4=hhAPc(*HHOZ%dM+P8U9?<-!{KruQqL3Z<um|-jb8Lqfm$*V<#M0`6APDi
zaY2ZS%tjKBxyVpUU<DZQXBnWNj%$Z3bO}&kxJr(0U!8PiUc;x<1YwyC4EDAAPYlz8
zZLJp55X1pE$?bLlD80J#09~X+^tG4s4B(LsB5&k-3;4a=vH?6>Tvqm*&u5#ZNCh5F
z_8sg1Hm+_?OHda;k^>-5A`Aofd)Rs($6ruQFVe5q$+}uxtu5Q)3ZxKx#@_9^v_g`o
z&PW)liEc_<02P@^`r8r2ACP<k639J3I<O9e#~otoqP_>{e(p5A4<&KHa>B#Dld7DK
z7fyMN?}l-q-H|*|U1DXZmj6@`z8{0sMtXC6QDtZo2ol#joF=HNNI`U^$`Z7)w%1>P
zK=y$_hPxXP9{IIjy{8~l2KRR~HD2WM9{14Ctfto;DE<;$c1W$Et)ig7(b-i_`ymSd
zhVC0*ks<23&hNxue;$k5W5N>8TJpL<0!b7IZh4lg7eyPXDt9gVWrT24-j9^NJo$AC
z{Aa!<R_nT*zwB5o<+Ru>I2IQou7Gt&|C##ZD4zUHffrQzyp)2N9Ki_iJKSY`%4vBz
zCG<LgQ;n@_QR??@`0GfB68ZuZIioc5H?V5QqVpHB2Z&_~Q*~!sAYY4b16;UOCE>g{
zUy`}*z!oSbC%5a5g@&jcE+C?K{dPuA05Qa1d7E(ywe*Epy4VkXOKQq6C-VW_EVu?g
zJw)Hai1{31BKU(J)5UP&rk$4SW3Pajr4R?foqC-6440NVgxhYacF+2gl52R9B@LQS
zIN-2fEcrCn_oYftKPf@vV}7+B43Jp|fFAqWlv7`NXIDSn81v&yTY^QR__xJk=qtS^
zZw;`H*C<J()hfiXUhIDRP;ocvTe*+0BiOQhpS^V*fj@{hRT0l&)fgkAe>+~#4j^5}
z)g;oWdGSP=c0c3mSDpHQ8am_E&Arf9<=<{Fk=&@_TUxIq8oGULDF3lL<^2H~u|@H_
z8?UjcUXfn!lv?4_lj7;>Zi>yf)RmIXa@F3sV}9!IIO}l`vG==aa8)ngy9}gz?ruc9
zl8DK?72(PBgWwzMwe7w_myPi)(RgH#z%%VKql^j=3B1YfWadDe@pL3_Kw)gZGo#T=
z0Q!AvEhNyHV8`Q*M(?i2VrhP{Yi2Lp<`DNizzJR_$Bsd$0V>N{SeqMNd!a6N+_CpI
zjB{8jlSs7P>k?3D6a%Gy;9(e`?Ik_x%CR!vrfCaa``3f;WUo{{8zc!Gnk<zr2@)<&
zZ7heUjbe#Z)<ylS%ib%>#6g#-4;y#-8{<7?GKa+N3-)eF`8{l0`E3`mXb$ZAN_VEc
zW{oUY0Hxjx*SoSLC-vI8b9cjwh1%%*gN3HIN2pv?>bZl5TWh;NfP@hJg)9fyq}=ih
zk71l@gD!v%Rs4n;sVMNpr=^1*SC*DY6Wd>CE)cYf%0Z2?#P>KuPNMUTZsX1rR>iLM
zj5Cl%rFyrQ=?q~bP}yt74^Tmn2*mD4`1lI1I<Tg&f}E@%XP@QbLAR6P!ea(t;DrXW
zVNL~xiW|B4`B#(^JQf=dTmRrNUUW?SkwV?biNV@wc)ICYEf$aSVkj{7?nb0G_A`S|
zPwk{9m9Yl_=*U4nXbl7($-^G$vHcA`6uNFA9HFdj^H7H4zo1I~<3|Q@;9j$e|4Auf
z#-y)h0Lazt?o{g7YrX}!5HaVk{N&`IS=f&NgbzC`DG(#WFykJi3*;9dnDqKTw8Q}R
zDgWsU#$J$K581!%+mWi;2M>3>-hn}f`Z17y{Bqm+#(EWQossr~f2KkInQZ-6)>T~*
z11X$5v=zm)p8b)$d}rArcoxYM0KlSna#<_|5;)D_gFaS8G7o%@4OV|tF0Znn!auEU
zc~(yeL1t)5Pj_t{Zh-F8HtNw}-b6PHql8l^!A|hMfi>buvX_+NT?EtqO3%vUMBbL3
z$;r;n7B?H@2GQg%@*c_Zpb5H|Cg?T;%u)nMvLfGou2LiSEt)8j;el=dI7A37KvspP
zH(My41N36hD~}4nSqw;vM%w5v{bEyPtVARLE{{t+oX1S^fBD<mg2U+XxfK+>QZ(Pj
zY_B-R^Rr4*>wgUm<{J_toW&)S_r2?UJsjXR<O9<%Kj|JXDS2Zz6Mx7jXY^ms_G1Al
zZNH_yVa!Lh4&&^;qh%|4ADHX#uV?(%Z%9mWY)QxIg@gZUS}PO-?$O6_UfSM$KAr>O
zjp0(Y@p}Kg;oOf<+~cxU&E9AGlUZ_SA!dO=0A2dk!K$PaKrhYv13&!La2}knZ(rY;
zaeo^&sgC<^o&PUm@%N7mVK=(_d0daQZ<?EoC@>TNAiy;W5iTZ3;4$qhod0P490rCO
zsg(Q|3RqnRi?0`qvQX1N(BF`qf54#r4W6SLh;^az1c1wNF)QM=FTf@jgL#e&5gWN7
zV5J>|!qNU$qW)hibj}L9&{7y^%fPwA@jbB{MLsv<GZjJr(79mS!W=W3t>)P81+2ot
z!T>PW+@<k<?9*QlHZqYs>r2rM$yk;BO_rPG<`mg8&dN`+U>I`fT})HLQ)s=$$&g=m
z9RAH-oJ=nzs5j#8&OZ8Z+h@L2@6EXo&|19i&}p%&3u@(%U^lLuNc<b<<N>y0q{2OA
zxc|>)NA+Jf(j^?+48yl3b+6icJL+*BGyF`_<ubDi{v<vAP`COW=_fDS?pp@4MjG<D
z6~lrt4ccsf>HLXU54T4~eU{TsO4$D!rTE_mG8&ajvO7lBsza2NgQY&$U60byC5tRF
z8)*MYvT;@JPVeye=EUtXmVj(oId#ZAwR;dbRZS;Eu!aEcwxsiTi&(MPibJy0;b8hT
zmb!5NFlu#mH53V;+AMy&+DS{He`)2379Z^2Zm!9ib#o8YDf7{y%QMnBoIxd;(m+*k
zNBkbm3ozI*+l{~^34{C#8CKLn<>8!8ERL`I&JejNs#E0ECxyKUC1LgYs|33PDfXw!
zN^H$yBPsTSUOKBsh2Fish-?CO=zsn)0sjiOnGhSkITi<z5-GH5pn6tvAm!Y-U`Z>H
z0{g^bXtT@bAoc=(^M(cb%_CC*21R#Q2tvlf%QS_uI~BKLvDx<|aBi7<ZS$K}0+%P9
ztF$UX<L_a`L*SRVX<l7kWDT16d)c6;L8RI;O}jIaZt^A2C`cos72Qp%3KhH$SA)ev
zEKz@d$$lch53D3*7%_dLWdEZFZ;?u8nn`~*8)b^%2ML!F@b8-=2s*Nu>WTs>=;nw3
zB|1=$#@=*QgRU2K2f=`}>TUI)k7y8#S+OMc5d4535rQrxp9-(v*K9$|1wv+Eb!3uf
z0142eBs=>V+JpM-_4><$X7apuV07*v8SJ0mxUXHb4qC+kg}$7zWO$Gh4<RPN#7;Gg
z7V6^c>g{_vtLns7WFOrBo=30rHMbY_$$Spzg*+d$uM{`cGH`Lc?CBNa>_xq|@*TTp
zghK@@30WmEym$E2$C7~tT2cJiGH#Op1(_($XsK;yQPW>jvKZzE$k@FRWzfkg>w`Pe
zF8X2#T(jx(T~76S@p6j|5hAKH<`b2;!36MFN4Ga|K7<;uLL?MOlh})au*bmX1(eG?
z_cSB7ci{tgK^VcRhYp3l)`n&`!6qR3zN(Pjz74V;XAIK^c{FD%WuQIpMwIE5a|kqb
zV3_(Y(od?M?qvZHX8XQu+7O~|hX7{DO<1USzy%F##X=fBUodiFi%huc4P!L2i<OMk
zaS7oqI~8g|%q95suBOL`hdImBA|-nK!u~N|a<Ii<9%Awe3k!Ez?yS8W_p(t<0C~7L
zeqF*+njy74ATn$FdSqE^fBLgb&nSlGcFnVKn+w{Da)Ysg+Mn6LI#vCp)^pN04B~!_
z_gLS&lW}=-TiOWytPm_?)GgTWMn#zj7#EZj@ev3gp1m-dqW2oSD9qPTPuX^jeXt)!
z^ld5L_zRdx6DDYWhTG34Mwwh6^lP?PR9g0m<0sI?3P#`V)XU~4sMhqNEYZF!1FR$Y
z#w(dl+>u>T7}SvPheHMT=MSIsFnP!YuRa<m>gq-PAkfraiTLEDse^leaZ>ODG+OF<
z99(3`PZ5=8JxmuOO*Dm{iDTv00%_5ZpNu|!=)e`-ru<C;=f&QP8}mia9O*8}37bQq
zL|Y^x<ZEB62FfL!=m_h1yQ+~j)PvnV`Yl$z8(Cx&<k)wm?G387S5z+ZL~|n1H_p%D
z;x@ww92dKsgTm+qv*+qUQW2}J6K}L|g3rrqPoe3uXu0cU#Qs*k=}=FoG+7nj9}bNJ
z924IO<UUlVd1x#ammu@rqs2d_Nr$w>`jzN6w&tHFPbGw(jXq4D3s%4)%Crd&4_~5D
z-~>x@&d;yi<KRB9&86qN>^Hskqoq<b!I=6mNS3Xb#ZpeI!)W1O4+di>2{mDimMEj^
z3-qqUL#g@>Ew}n;x~%GX)P^6?_i%~3j7R7%3mUpw&0)k|!kp1>&O3ieijQ&K{>K~%
zwdCo*@KPY1{H0%-5nm~A7p;CDUruZ$2*zsfKb(0TX*(fk1i#*o>Z}oj+6<M>sl_%D
z4dNnvSG5Q4%wNpuG6bBs#gMDfk|MJC0)`hNPH?0Bx}<$Y<x+jeT6xKis6jQF+Sety
z_Hm+@p(XKb(8TAtcS5(JkE^@!_;GECUmw@aS@7fK#&ybeD?fe_8{Qj*qJ6gIyZVAC
zB!^`hms)~UgkTezdv1-Q)tH3D5c@e?1=8j|jBXr;efJ%lYq;4o{x-LcHbh3=LYhG(
zDA5F>i~j^dTCPfKfLvq<7-d#jcZNoLbLjsp$A1b-w_x=16qkEJp#SjCDQbOF?%d%}
z6|a`+p>ZZ*HTLjqAad=@9jf@UlBw}gn&MTQ&jfWycdQOofRIzP9J6Y4bFy5M{DU4W
zVmPAY7*8L*P|)BjLj8T>vspu~RC<-FGlRF>BF%7*81r`q3UIdWc4I+1gkB!59}=#N
zMOrjj1LaqW^o;%G!~*3EDJBmfbHsBT%5^$_E`|Kz1{OE}`*#EuXw*<c|M}Bm1j#k%
z{#FW*3c8ATerjk}YcJKoos4v-ZufD|pV?SELvWiP*>6Y5ut-(BP73>3;LNHLbg2WI
z;%~JVas1ewoRsby#q*?U)i&|n;K`X$Gv5fPYKGf$TqfH*0^aqwAZ=gp4MHu2dhYy1
zg3qOUdCGxI=WD*VWs_Zfv14~i8b`e))-1Wzhg;;I*m#MisJO@bu~SF;UG}m|uti*Q
zzS%>}8RJXh6G0HkO&rmXKX#}>bNO_txVE?1)FI~ghpVsY`~5;O@k0#oDiqwwLhP=v
zhy$kE(Ai8+@!In&=cF7Y9mu}zU85!##2)Zlq*{qwC6S1PWI;j|DLmNeAr}0rvJB9Y
zX_N$mV{DDk3yM*w+l|=orEHTvc4Q#|_h|9TAl7`=Sv;-dsGoBxAGuyoS!|R=u?&aP
z&&%!~s%|RM@IppVR19|9B=npC*L4u&YuXOPwUCDrp(AoX-LP~CUY9|AyH8C7qkR|}
z@tB8H=A0dfYHwM@WqS*x`Milr2Ig(j?3XVcPWl2?mC|7>%&B_dZn)FLO3Yld$G?x~
zIGk@*0m5!JKh(RF{E4U8p+;~LE4-Dl!$mG^o%sTfz5(~A%x`iNBtsj&e=vu!YQ{sT
zz#XyT&#e58;B~*RkZ0%Pz9#zuFBuqH#Ca(f8(HE>bHq||j30l7kVYhnlp3r88J#SJ
zRDA3-coDvo{0QkC%(~~D@>qe(Lv#ndRtz`2|DF?Jk|xEp@<7>v%A9_wS;qXVx!*9B
zwJl&U^+OolJKqgv=(W?EjzNt?79^*4lY+XjnndsZ^piJ%e(ZCl?KXM0+DN%c*o?cD
zD?o}c;R~335D?R<u=sYL@FR^VPPcDpTzB>|aW+RZ&CDymINSSk9djxsw*?cb-kEdi
zLM^hr@f>3PaKZu})Hd+5_^mcwf9L~GcPE2y&XOsqh7!c;XzJO0P4P}?T42^rRhJDL
zS$oLe!*R#-af@@?JStsO#fibsKA^#Ocb$2`;sj?3drFig-6a=a|6#iqgX<9^CGqpx
zTj2|dbNstGGIth2$eX2MpF3Dk>8dK;PkvsqF$|X5wISx132zYmq{{gHOg9ofcSm*x
z`?MwA%*ttsR&b&19Dl+6Q<oyOefTDtc*GmCJqLZaczhfq!_Py09jJ}-yEV1|Hn{GB
zT&7rcQhR#11~@?Z-@YTLwL%R^92;0avSi?fJ+>MOG1@jgfHO$Mdd>tpanB~rrv>pH
zOmXln>b<K(bF;b>5xlVQo%QIMK!z4Oi-AQw+j8~b2yV0Q4IgaMdB|jr5|}?9)k9Lk
z>q}w@9DJCqhJJ9KP@EZ(cO6~mlB@&{rk2ONZ}^6cwqw2joAL$UJz8Fg0JeLD=AETm
zL`0F-QfQH}Ml)zTNDy}ZJzIZsMDW!DfgaxWwGWlb*TX`8e8)>C92P?}&pR<d4=R)?
zJQUryc8TXi9pbtfDjAKgS^18N#J(h{#>#c>C!x?^=N=Pan!mUI$cK+H65^Qs-nl=d
z^z0s7RE21`Tj=!_%py(YF4F?3!?maByy}o(`$#+Q)+lS+_-5Y0`leP(6OxDD^L0p$
z*-7Y$u-2F(nv+#(f>xp-^j|w1>k#j|t7%1IKWcU<F&gF_aKy-pfIBYw4P;XPTzh!S
zv02IG57C=LZ#Y>eEi#lq+@{k^rKZ<c5f~ZH3h}HOu=^HZ0Ynr~vPF1l1l?Px#2G)f
z-0VP*<W-_KU#~WU>k@LdnIc=wBeo+t{Ya&9w7dDWW8-^@PLF&r8weMDOZdm3%yO*?
zn^f-`<9bM8Cl?VB0!`MHh<@3a2_pZ{ImT#(PEro&SV}H3#>!u{jV6VRbs_O%6JfV+
z-pFPI-3@7CFZ@iHPp(u7FP18_&I3)Ja*Z+aoo1Ju%;zvNlV1w{i-(#dp;5Rk=pJUG
z+zFF3!L?_k&G^Y!?Q78r@}wLWKF3O-c!Jv&n@{39s>?%9Hh%advTU7&p4g4#;urDd
zsh73YS@_matY~D4vKihW-pE4OplgfTQy&5Yy#mn+xKPWDsb-2lqHCy(m%8=|v$_3f
zm?H!Q@;`v1r9ai9BT>~o0b<$80xah`=0|ZH&^w4$ceKSA^mN)b=_jpZvhU`kr2=D=
zl+y9v&-=?Ri=DAmjbxHf{Byh0dC?quLiP1T+;n=k4Q7L6KN4sa?Y0>7`#!Dx2v`&~
zYeE?a5*RdaA$=mUcQX##W8;b$_$ar|u}g6gwLUnyP=rEVnnvntTryr8yfC90#!Ase
z(*w0&f_JL)!e!TJXcwk&LXGNT?#{CcST|!A`4Z$#%F#;j_;UL#X@N0(lK5j=emRES
z)h#-qj7h@P&--g%nBw#k>-v0oP5)nGytX$pWv6m8XjD_j*C@1&m2T#wc_BZ7bcV!>
z9}XiT7O0bTugRh%E~r*0B)I0}{mXr_z<#wV*&~*IUa+zTgMgV!PYrA*s9vfj{CX%P
ze*&4JdC>o%5{)3ub?o)UhtZ;k+MVX3Pm>t5uB~JYrYf+=MHO*Ft<giGV>(s?qUG;Q
zV~}d>gYeVLkm7qbT@NF(U?TpPbA}9+bVt{mQNl4SO%<X2E~F{cv$cQtkjVD%dgwzs
z)C9-d`JDFgp*->wi(SDHgt1$bC2c=!YtwI28s~Mum7lY(=$olW(T&ghc2l3<fwxaA
zG7qydrge$Huk@`ct(3Gpn5KVw7P)EvbZHD4qbk{VNwb21JjibvRtwkjeGK&_tb?@l
zFJ}rdw@dj-ek(m(+=i7&=GF%S3vh%rY|>ieSu)tdhWj%eIpzQEORw((SUP&hLj7$e
zx`HJ4n^rKSrqb$U>wP7<P^D6*F!g4b^D;ztBLh1{p^~luw;F=bXPrbSGw(4kD8=aU
zhf6LY(6`n^ARqef^_U#pZcS}!Ewv&|lWh@wRbSiwc=*#bx0sz+0vu^*%GLpSE$n&{
zDBw6_TYu46#$;ZI>>obFmuLxiu8j&RNsvd=YRQpiPK6~1PrEV{H?NM<D<?r=$x_Z-
z#B^{g`h9fhU~11j{imd3QGSB<d-*K>B6MX*lGfx;UjygI!V+|{_@*_sC4&oWYA|y?
zMbm2F^<Xn+aVbG=np9jb?!P8f63UAnS}jiDoW;kh@n3^kI@|5J;GHvEBNxrQVR6x4
zAWO@z5VFguO>eXOCc!weqnu3E0_A1JFbJ*r5m1E)VAZk+Qnhjr*Ci^2N<&X6hb|f7
zaC*`d>~A_QSiGPFZhNAZxHYQ%8VYUXoxvLu#0I}!$;X<ouHplymVEJ16nPcb7WR0C
zo1=%MOdXtvB)*Uu`@MrN9$~qJYVl|-7$Dw&y=<+c26Gzn4WB@QsXQKd$D(ZtlkCs@
zriaw3z5R479o{YM88mAI7I%s`;Q$1)IeYGD(S=9XbS<6n8g=x>{W+==jeX{!l^AQ}
z;)7Rx6hhDOvRKB^KoZ*`+i<uM+d_W8fuE=fcMVL7i+B!R3toptn|U_4W)H8^z^bA7
z{$nT>B-He{qyN3N?I5+uQ9sX3))n__dM!Rp{U0^dK}$oZ9nL6Q22(=|AewfcEWk88
zo7jA^CJAn^qX;l%&QR+;tyn+XN-LLfr372Il6XZJ;;&xtij7))<5!!`Kx$;Wdfpy&
zd$<^L?sn=2mHx9SE}Jh(;&T{s>c(PyBz@<codM~U32SPl{z%cydT$>+LMMN2%pUW8
z=Scux+klDmu80Hdgq9XsinV{in6OD8J3&>MRLa3<4*W0#*Pd7~w)}y7PcYH^tz1mR
z0TB&u{|q5=#)-m5@Q3`_D;aLrEE$doERv;iD$!1}v`@^0Oj<iO;lK_f{NtznEyd3T
z(}4)2@vT$|*r*u!m75Y*RD8Ize6tpkk0ndg>Xv^Z$Gj1dC8|;E;c<)`U;Xg=4Ec67
z2T~HNPUY4ToSuzpf|j+*d-A78{p2WgR|M9AX6&Dy$10v!DFWFGk$wih<=hjW7Jh|;
zQ67+dU0lOuRP!91LgnP|&U#U@1Qwp*##RHevW<l$<CsW8Nt!WN78`7MAafqB*&-B|
zFZ|7{=q_{(J>)R6R)a3|Ij|wD<e%gB1l!$WUMVNX3YCF$jKqDJ{StrJ4T5mU;9N*2
zg_F#l{JAn@OrlEjqL9sNXe-OkC%$%X{|`H_4W(me_pt>L)(V#6^PY%o;cSi{RNWD`
zD2@phEBNYb9%%%9Zq2?IusEs5r`!8N_qG6Ff;gi83KS4gKRl+CUU}zZ!-{)uLyC!X
z?SM#^P02x@?-$Il#?7*R8!i%s!vEfjGe=LFHVJaU#;r*LuaU4zWi*2oHaIWKom`L`
z;IG#%{&_Y$h8rcG`zrGzMK<*)%tzLxyEvWZo}o0J<nn3iZ-i%FVwYE}<U^y|9hUu0
zc;yTHaXW`0@qTqCiL?bLW(?oGza_<__DDm$jF(yI<%O9ASBv3Sn$J`$TanPUnT79q
z$jlF;1iijC82dvxfmIWB-yGN<1Vfo%{|UNj#USSTf#(Pxl*kbJrIZk)Ed%>|RSVf(
z-7j`>bsy}{Q4hOE;~?D4DiMuBKUKmS5@$|)U-=xm<?FT5{$^B3p(@(^wANeS!Sqpj
zl8lJrCyKWp$)n+9M^OQ|JsTaJ{3F8mQylWi=%J>E9lP9*mER&-l~Q)pPw<F0?2k6S
zt2X!LVorH63x-1;pFue=jxv{@3f6mkGu_83r$3FKTDA&fCVa7mqw4pw&wr1#hVoCN
zqLd7X%@ILM6WE1r^@4Il)CJpv@V6%rhQUw?zy2`N;J~=wYz~qngM-)z<QIi)1_#Po
zs_wMawckQ_w#A|iv_rfVgUS!C)xxKUp82^9Cx$R-Fk0&I=~b+nMX#Tivq?okQ-qk1
zA>6=C8;*(E!o68fOdTKfxyT{q-kBLiUfwg!Hxo#a$8k1A@^)2}y{IKKOeeq&tNW>1
zQj(5Y(_)j#C8bjYx|LSc*GKe_2@M=$e;&01M=9t)#cDwFotuV+^B3R#OojgJ9%a8L
zVTfoyKc#?6c!kyn_7njGmLy>*?*2uBj$ePI9^OmeW)!;r97cp&k**`Iry$F>%Uw(K
z8&(EAP5ff}QQ`-FI>9Z!E9eKtJ_14<1V8Vd*~lwD?ukRf=oFp|Ur|OCNO)_S2_cCG
z@g{M*x(oLUe6Pe-&T0hoA^IpCs}tggrANkG_MYkXWEzL2D9sI4{||HSChqSPCi|ho
zkvV?c*7>n1e!!iqbbB)(0qxI>u?fRO>J~#G9^owj$+?dR#!$M92IJsQiV<ohz&t5^
zB2=SYjiP?opUFXDGfH5Yy^dt4w3>N!<qc;O2u%^xY8ieagE96Ho}HGDmGcN>2Ytn@
z7H;<WUihHUu?9pI(X+gWf-CH3J(hi`WLriRYR6Rr*dXqOui;{Y-~w&*K6--5_H03y
z8e0WtK`<76_XDF!ihi3^t5g5OTVM(g#R$4-_EhXU_}B~*RU7b&aT9TRY}UHFv|rq0
zix({thCB!mXUKSX5e#wknV=J08`fFbW$-Q+Kge^{!9<oBxS;!#uM`wQr&iu<S`B31
zz&yqeZnSH8Y833yOnCGJG81VH1Af@`b}*Yaa#6^YbiV^*xS&6zaC-@l7m<a0_Wqal
z(!&ppxN@DgissJ;k94Z{OE%9HVysn%p7rAqAyX`CL__Y3TG_0R440aN$~hOeX%X_q
z*Q?_eJ`%fg5)s%cCL0ilK(hjNe5d`|-m*=kjyCvHqJArM{VpsM^N{UwGZ80Avw(%r
zSS!DKZld^+;2djFpS<s$@QqlJv~B)heE0MOqTw>{QMo=|ap%TSQq}=Ye%z<k<h7LT
zu#O*vwdM>Dja_v7^B!Y*oqbE773E0yp;X+eP(8;+>tGOUN)LN%Axxm1QvkZsggCLK
zbl9;hyRSD9q#C9~c&3<h^mMw+f?y8+>42}@2BXH2jh<di#Jwfuu&`e7_!LH%w@5#~
zrO+}CBKIlum$Q(C0S|{z@3?{c5Gf0yGtph_YOGD#{B^{a5A4u{Oh3Yn@xz9zf^wFG
zW=2vSaNLuU7q2^iZiU3}6Gd-bPhts+L*t#`SGlG<0A2#HJpOxfVl36~zzF_Z2zuP0
zX+u`EuNCBXXlZ%nH5a0;N_jtUR7~S~Nw)Co4zT8?1mK<;BZiNa-xJ*AhVC0?OthAm
z%X3zccoF=>N>R_j98~|~XZ-tvi?1OU%StLw$L}+uQO#w)It0v9m^9>;Xe5e?v8Fly
z&o`(;`z|f^hUp@|@(P7L`$_ED5{vx!R_QLX_rCYOPW-GGIZf|^@znJ%l!?{FyVkjc
zLSem}9?nbcUao_SuhBfrX0XLci43X4I~pIaT^%K>#(_xq|1>E7c+eENdjoIsM?z+m
ze)$KjoNB3u$6Nx?U$1<=c(u%tkCbS=s?;xnc$D;XTdSr`D8AyCnxua&UcE}+ov4Z?
z7Mblo`<Nd2?vTcZgDM+0lAyr36U1?gEeIkyW}I8U0d4i_nmYpxCakqY1^?^Falv|7
zX*4d=klvGY=Ih^owrf3C);u9l(*Ip|YQ2|)p6rxJ8PwlQ&<S80qO)2)KV=H&YDjZa
zZosLq{IgItFe(?<yEb>GfDr!cBI!jLQLC*e89n`eUj{w*?DrO7<~4}`)7Bn|$@pFT
z);bfyRorDLCZPH6Iir95<j~bRJZiy1^GSek!#43&aaRJkZ~GQ5@vKL*(>@K^@U$y1
zSLih@``PK$dZ`4~-%Lm-DIh6;{57D9ArLSRAy0W&AHyzX&zk3m97t<#DnVNOJxuT!
zRLSky9y##U(rCWOW>%8>pFjUUH!36^yrJx)GR~np&~SDe_F5bQ$fTbg%7&jt2}{~v
zV9q`J|8GqOVj33c#v^ziM)~gS?e)W=XaCy>{Cf)+NT5R4OXL0j*BbiIzsm%JAHh~H
zSpFY7vDVV$HBc!P%A;EgpIZLr@}FPuA3u@MLtpWn^%Xy<<CF|6<PgLdrMD6aF+l^&
zaeJE}p1RWhgFx0wjxEnf3dFJZD=(i2KiXQ^oOR@G_{rRN-oXz-qr}kNNqVFF&l{9J
z`!@7{b&?Yw`m(8rn}?z#<4+?MPfMX8oc$)u1*=Z&$^ET@X9QEPy1t-)H|*$qdZrcn
zXH~?iP!{=&1$-t7UO-E*`=y>Mal2tK`4zF?auk0t`jI@-?<!)Mr1k5;et$X*g7cmi
zr3f9A3sk=RmE5L)WTD*(%wY+;+^lx9>>Pc+<Bs)*dnHj&IUfW_QFqHAK1Gk=cBTE_
z8+yBIW}7Ko?|RRKr<#tH5!{495x$Mi-v^hoTXu}8VOnHdiM)Ex?sBJ){3;!AE8)^L
z)Ye8fC;v`3wK$hWzM8=vF%8}tn2owSD)_XOPlRf}Ygl@qdp>8t-T0N2+h$_`+-nXs
zl4~7@CpMi|6#Y#GHR?*CQIT-R+DTo9gKd`4du4-<HO&}(QMh6$`{u3mDifpj*gsAv
zQG^4RbSbX$fu8K<uSgETkDUAth%K)x6LT~x=Yf`1LA(rIm9#|k0#iYVd+kyF(5B2x
z+MinoI;?cu3Eu(1F+a}7epm=s+Kzv@(z0{PIYhv=vG1w5+hV=o>vZVX1~PB+@mmLy
z-X}>)KhKx@a*-^&J5$V?dltcqJrEHN&84M}2qP$G&VYyYR5YGevkc?oF-<~P1>uWz
ztctTXr!~@vlXyXKUv6$qyVLKER>`5{tSJDPo2I|h(32}}1+@FEd8KzaN1l!Zp}#_)
zN;_z&fs^5={^*M+p(~H28zaRYBM~?}XU-{NhbPE^><irFJ6*R;?_T{@bnY#;N@mNv
zjlLyfo!-A8Y={Vm$|%_2WVPgy8D4x>`dFpea?_J#xwFV#0DqZBaI$(r9^6erA;85T
zEicr@ELp$xZWLIpi0<AOd6mT?%e*tb-u`fkFBuw7pK21sqpj0E_Ih_Evl2WP$tz)d
z{k~G|@aZd}aRhhIc)&(S-22Y+e(0LS@3g)-mpG*-3a>)A6CR|}8z|0v4t0V|mM=<M
zXrQeUs7ItUeDZ6{J2%73gA|PNkoA{@M)zlZ>pLU5+CG+O(BC*n<IA|~`Qoj^udc{~
zy7GhgoeTCd$a_-5%A6{(*)8>ur3XFkn%=o29-3)|U<uAm6is3|!vor9y$%fnAN64E
zpXM9c8ok;hoVnbw2%>ebjbSOZ!1CC8H_Mf)O860`m~>IjE$rKf9{BkKt3s52b3r&m
zD(d?M7_TFXe~p(MEc$Fq;0LYyYR@uI2Uwa+PfvQaljEKe)k0@QB)!bvRq-H-+Wh75
zj`j1#4e^m8W7Uy5MK<R@9#;sdvu!*2#*NuXk4?YhnWOB9<LrCOSyq|0;#(DQVtetw
z%vOG6h|HL)qNnn>l6rpK1MA$mB7s!#S1ftd7+3A0)Y+E#42Q>bM0#tPS3)=S*A!%o
zdQljN9fB?HL%?4!;r|5bw$2ST<J0KFALFb;Gw4sgwp6vFMZOD%d`K}}<JOe=nHBJf
zU@-ZQ38nXef}q{NB}a;2BDM3~J&}cIvC=F7`=7&+UhT=j64VC-;+J21zK=xc*@UY$
zNDL?2IkZPG&yssb)O9EB-stsAxs+VC`%P!jkEfr$mo`+&G;;T7F@vUjsE96>?b-9(
zi2E^Mj?$^%JNZ6hA;ahvVXY-E^?f?6+aG$j-n#bjfoZbVrC+Jum=F&U^5s_K(PIqg
ze#L#kB<$<8#iR42EmGgc<i)Br8^B<Y)IA5%h_Gl&wVz})YsJ5Ps@1=1+4gG7b1R!+
ziP5n6tm{7XFoSlVH9k)~Zx&px;$)t5GVNnc+kLN^3)1J29du4Osue_xig6UiZaFgj
zo!OskzNf~jzhB-O{w@NmaQ_j5n`U|}b?dCvtLe{S#H7no{E28tMQNhgL@)TVpqVLd
z`7A<5?{2ASPEev=`Bv;~Qm&y&X&dXI^48Dum*t_NO*Ui2*6X7s=J>8H`uQq$OuC<*
zeL7Q09Y#XW>9zKHS|b<FB%UrJThJPRnWnwKG&Sn1nJF>|<Wdi;vg1@Y*CN+AqdD*~
zZdPS(o!{53eipZkpY8+IphcG3E=1f>Nf)uJ5$QaoDv*Jkd%+*=mVqs1{K#d(xU+jz
z#oUKX>jmLA`{sDWFg+!<h*`Rh1heYs)I<2#z2B*ZbWg-|={`l_{8DvZ$%Mdnr*h|6
z;keOTk?_9zaMa~~Wm&OISBX$smBYJ#Y60kSD)LT%RnO+N>u8&;@p(G0a$~phwxuDD
zCs>-6hL%{TX=Tkqad$7dg&TS*dVcm|Rm63Z*_i1Rc4O_Qj@dj8FudJ2SN^-?4{uA>
z!V^ad)!L3k{bWL#7oV$pWZa37S;M7?N|*ckl~S|$<+POAwFF1*Yt{I(<);YBMd6GD
z>F@d?uC6v3ny%i*B{9oiArF#Qt_~h=p55)D^?K(qHyAtsN+-tV5lXFV9{eJfW{I-2
z8tc_Ux6U33@!dUhs&fz%-7%(0y`862L{bU~#F)tEHC^8<yeVuqj02Z7|GJk&p?m@w
zmfU^&5ePI_+0PfqMnaF{SdomTMeo;pBKvOdl{)V&Ev@IiUQXY$(H|y@l6ai|&8yw0
z*EKDn>Ja5#c3wA@#Y1N)l4KeD^ov{Wr<wo_Z5=x%SO4IQ3Agw5r=A_1AASp(KBnIt
zFb!%>&TBuvrlHEWl^{5}uqtI<tpvMOxyLb0xmsnjG^8W#TG69Q9qCW}0^cz4w)4$}
z<)rc|cuWQzxhT#)gO?=RV0cGo{FUXSCz6s}*S!L!e`!?y8AvK*EB?h^3))v&J753(
z$+q#@wl`D62d?ifr7;qYM4~N^QuAVSk6rDRa(LkpMtt9~biUgvdiedYO<_V72*33{
zGHLKYeC;4Q44EcBH`=M7m``9UXX`p{@BR}qE9}91fixDYpYILd>S;8ZWKL0cFC>IM
zhiOd{CDtn^@i|S8j+NM{k7?mXPgkSn^Us`yb@KX38<+_xx+X@VB+V@-(^$eOxe-7m
z$C@s2^0~r^|0%w?>GP2Ujp#2>%q*+ibfr7kK3iUUkR>z(Wbx}xGdBjbM2wyr(Vf0K
zwHq{W9^yG>Q{EV;aQW5c9WR-fp^)W;CX!euX=c*Av>JYf26_55tHkoj4+bx#*d<|w
z;m&CO(b*er?2W8y;U?MjpHy9o%G~xz;4fyA15Qb4JfE0Neu{tZO?`Nh5q`=p|LxV%
ziO#S=?1ygBPTNN_Gzt=F_j*zmy(@VY@kxb~8gWIJ5}Xu<sCA2Xo9RxmL>l7Qou7Tq
z+v$-G{s>$3%)AH`re!~sC3}f(kK(*MHrJQqnYsVbIry>CMr%cX<DMZNo5qba-0e0o
z^;rd)!`1u1&AuSNevBmdcy5vAMiTAyJ$groEkm2ydK;$tq-#8p-`cY?#Eh_<;;c;L
z$P14~FYbulGRyDdMe*+n7S#vCb<b#EpHpMqs3$J{aWzu}ewVad_r$emdKH^z!~y-~
zuqWWrQG|^7s4LwXd4~*5_4(W>xti~>5tS!=+{ZsSQ1h7%SAR2)aGv}JfsxkyBZsrQ
zY=)}8hprKpIQ$OdK^B{zX)iq7_|UX@YMFJO(H1;BBuZ<@=aV^jXH)X?cz&L$R3}W8
zHqPje;;Q8PpA>t0k4x_jJnoBzc>VksER|mGw)Eo6buY<oQ@zLgMdE=ggEfu5pU;8b
z=|QdQz#C(loP|!GyWb+|9yyLWPN&_FcP@$XoV^ZzQN77}@qw_m%aMm>NN8XGb5bn2
zfMN#-MqH6Le3kg3kTs27d*wjBE~MYfjgzLaq0-c#_C3m)YR$fk^{17vX>eZNr`zPW
zS6^CvPpIDs&d)xU?F;fcCN2MBlg4c8=Cz!YVRo#i@TGajGd)YVTy)It(7bgZyKu7Y
zan0I(M@3qF{U@HVdSIyOAK^=9^e1~P?_H(J*ePcA#Oi+OYBIgtDa4PX(5-SPw;nE5
z*#FVYG#^fNNssT8Hkc``bM|~=eLVBy2B%87#Pi0<DTl64d3pzV3P=n2A3`+-rt;0m
zpZ%#~T$D4OD82*IO2`(7_k0PWHS;k(_pF0I&3g4GaAM28A@!Ef3q}o&r?5g9{2t;@
zI=;nBmnQdWf+`&kZ_Ip$o)Bt&$gQY}(y1uY_iozHS*zr4m22P)2&8-TrY>}=n~>lg
z^5c&ZK6|cGVKjbZ8LP6u-j(Cyj<VH^x&o#D$Jkkh#T6}U8ViKbxHg{PE{!`08Z3bT
zA-D&3cMl2f&{%?n;MTZ1G%k(1ySq&1oO5T+dG4KO=I8ETd-tkZt5#LLUojk)=8BpV
zBFk*pd?fO#x^y^e>24#I+>{=m%S{A)=OVe{%<0%x>BKz`;{yM*qV1J6l(lAh?X27G
z;D-_H_tipSr;yX0`PA2@%K3gaiPQ<{$`6s)socpe3T*IfHscFeedOccs{hoSdrRzq
zFV7z?l5xK>X|SPl_84zJ#%~C)iA__93O8;Vwca8y$FP!XP_pT*SVVD~&u&B`_FTPi
z8aSQ~tF{KKh4Fs{yDgdfFAn_3)MB5{|Ay)t>&^X{&1J5ML#Uqxl;isrn`784$)TsX
zJu0Wd_;)Vtu{`xOwgxS#Ju7A&q5yP~xZ>8T2yhRf!s6d64{W-h?JRj*awx#O=tf1G
z|GautvLW3YozQNn=^#I|Hj{<#kU2Lv&N3f&6M^MGq*P#c_`Y6g*7j9YM^q=epX_*E
zyebv>^^b1_&U`XBSiENZcPraW4L=0TO=6yChNGKH1nL&BSgHJthhHfyj)%2MStyOy
zK-M0%s?N48_QT0q`DD5yUBt$Gn=N{;zpiUepJe<}O(M>Z%vBN|>*?+m_@ZE6tTy|4
z#&P^px@GwNq;9T9+(Xc;22SgBR(ToX%;8nq;e93$GUUpwuzuZkYX8iBI9_Sztzk<u
zu7jSLW5ZrqP<w@*476@3b4#KuC>eoZdaG1hql4`@e59-yFURQ7!4z4yUngdK_+oot
zqQmCk&k1{wG_Dob4^xK*h4aq0*|Rpfi*rP-x-JN_Z0CtIi!39pwh)VE7weNH+tV#J
zevWG&JT2}b%4J)}ICx{#DI1;)0Z#31`UPA|9a3t0__DFI$_&b<1m@+Xa81Zs+RTGD
z=g1_vKA3dmewIqsb>kCP0%z*{`G{2DecwF$Q%!cF#7pyhwkuLRNbK$ef1>JmX_Ij8
zqa=6RF_@WyrEv8PwOzwgTpnZ_R~)S=jZT+6NBF}`Bju|E#P%1I<J-c8$yfb(B_A(C
z-j>+0*H2Xjg32+Gsl0xg)%u=VX(-arC_5evu1q-l@+6o0gx*wKttHtX+YTcLiI)zB
zII+U|ixhw}+dcMN_YsMoVu-p#dl4Q!B1fdAf71oeDEo#ZDy9Qb=gsEUSQzDraYiA|
zmJ^}T;n+~I_kl+44MRyEGC398i=43cv~5>1L>!fTV}vr_QTCI?ze{oH9JW7mwrh*P
zm$^OOxCwT*c7FyYSo~1p;M{RB@tbGGzR^=;Dj5Ij6}yFWrIM^_p;@Rf4fzQ=P;Now
zG20JlvSIg-W;7-OpZ@@!B&;i{lIs&$a@&=0<UFmV-x}f!M@)*mc1S7Y$R`2{n{m1c
zB(~8y#lH*m3HOV?QSR!;!0l9>Xp7#syzrKwR5>(FWb%oB?5)_{^6k;F3%QQ0-3~S~
zzPso_e}99ik?unlyzYHsQY+dg_SlM_&Yyqm6Zb1jZMyGK6LP8_b1=H3jZ+@8Sp8N%
z$?3-kv7K`rJPd)vkV_@FZ}gPOe{`hT#&`d|KlxgxprvA#QMpzv+^YcF?s7t-6%XFY
z8maKy@3S|jYJ5c->8<^3@RLXQ-qSme%u>XegS|AiY2xogMDa0mL`Rz~?uxB9j{X(D
zr7|)%#j3tNopSGHDv6YJtp({F7oSboJf0Brh;n6!7OsB0!fmyQE9ctxnJzliSbVGw
zPt>f1SEm(&{<!j0VkWWQt5HtX_n85|LdIuZwk?<c+BaOUd=ZPf)^<KEuaXaoda!p*
z+63XYa_vVva-Y}b9xet9af1X?@bMa%D&23ZVjDWaA)Z;IlS%;(ISGBoyU+S-L->||
zNn0=52zdpJh~75c-eFjM4oD<UVRD>Ugw4mJEM2Q=)`r{`TG1~~_@x=nS;K^E;`k2d
zQnK=uqq||nS`!CjIl{^*98fWzEAG~(GqS;98w)sCAaK+^Zbk*%U%0#J4Btush2>8b
z;f8b~jP$ANk(hr|YCY<_N?@%KO};^{%I$Zl=H+z9eBtI6u<T@G(00~Tt<v^O#5DR3
z2Z5!JXlzdS`5;0Nm0zUsLQ~lgmvQayEG{YVn9d4EP1xWQP7gHPe4$-yC;OQ$w8FRd
zIJuho3G)rVn51CQDIw`7--s#qj;DBPOGxj02KTWY)KX^Mud0{sOfk4K*SLpLB{O8-
z-i+Xs$UdCwerCO6rr>ap#Pncxc`K!2pYDk+%B?D_ezu^@TDE}@fBd4*DD2pwZfrk+
zo)+uzqT8*!pcmX^aclrHn6oB$h<sDp0lus@by{m2dp>QgCvH7_A?x;WsT%vU?`UDQ
z(H!z(nQ*uKXNf4MVYLQNcg!$+htED;yp~@qm!<@?1^MTlj(lyqV|_8)hj-F<$*5NJ
zkqWd?hRioddZ$_VeQhoV3&Cfjx-!C7$_~)3%JI^iuY=-(pWlNqm8!J`Yc~8c5j>Jg
zsXs3Ib!{=0&Z-8yt*XaTa8zA}tv*G*Lct%+N*?-}bG%3Dlq4#)b2(f(Xt9i~!t3a+
z5>Xj_V-t@Ot#w@GYjZt8uWVCfsGfyr_2F{dievbT7LZl<>|V9kncIJXr-C(p-q}@}
z+yBAOTtBlbkyy_-8q0K9K^J}-C@4*JZUjnXo~}bvUfm;9q6lD8Yn%2XR06n*HbMhJ
zylJ+6f!KL##qFysmO4mn74==uiIlY1F=`uvSU1zH(msa{+6XnKA>Lnt3NoDD<2?(k
zn|L{|_>{T0XlG|Nn2oayMkV$a8CXcISx&AnF~4rUzvjlrTh53*9{IdIa^%&M!561!
zHrx-*?@=Pvo^{r41DqUau}S$BM<VZ6O}fUjp5A`5&17`O(*}O%L+znt&Zx7=($QcY
zX!-E%a)_?>i-rVh9h)8Fu-NA(^6rb*83p?y#QngHqR#oGQ%wVb^5EW(DFFAPImNao
zxSy{K!g;rVLW!NN+`XanOlzVh5~{_s0gRyFp9^F;z%rP|eASEZpcB&PkC@P!7SKPt
zJ2^Vj8k4WyaO1pc^*KMX4z-!01v8#KI_sodAXtCIC$`X#Zl(!qL{Yr)FlT)b>Z0A}
zgk&R3`gAAW(HnheF+(Mh>eNr7JxoW(e)u!%$}@U%u+NS0J^Yn@Gc_578GCP63g6Os
zqeSHCuG+kW{jSQWTz81y9Nf4|A{ZZxi_+4!l+Ufm?Co+oWYMo&AwRxZ?I{cY=ooY~
zRlUw(VV#o-EqefG((46oXB$`H&Aa7r=duxZ*StP#uF&a7K&}w{Zsq!(D=MtM8mLUB
z27AX(a_*nQ6)v&18<k&`NDsHHaeR~gF16C5a3gq<tIH1jS;vvxw0T6vv{4QG*>x9k
z$GPJ2uLIjP-vvQ%I24O|-b;FOLH`Nn=rqhkoZ`eGICxNf9#zUb*78pHd`NRyC_m$2
zli%z<>d@H1c4C&WX2B6-qQ$t8cFv|XLsV$AC99!<ZB#`Uw43BPlK$j0M+@>zH#}H4
zSoGomaaBNPW!J@&hZAD8EF;0zW?MrUc4B7oph1gxjS3k80CL3o?-`PBjuJ~65e2$d
zoCW~}I%xuo$RRVtfz$Hmnlz|4hQb+p({T2|lv}-#WP9kI)f8S6hD{m)>EuVi#e{kl
zF;uJEZK?A5DGuF~*5_Yrk&RvBM2e}-3+J2c@w@^}Pp(149<-5-40^-^3-pGZ(t1{Z
zMqOvmCQ3m-p2j3T-~5E*LC&`2*V>fgr-lwY1srGjNAGWdKNdf^<rt{>MTsT3qaL@u
z9&+?Gx|m6Mv(-%2#kKxz^}CwaX}(kOGR}2H)_9<!&NmU$^;J6Ky>p3soLx#|>W4w=
z-4b^Jy`C2Ez0bSxM?#*gMDb+p)`gpzF^&S?!K&??gNP`;QU?w1X)6<l;Oi^`u#axl
zp-}Oo_Z;9lquiJdn-X{&t<z{83&}8b8r{#x>m7v5NY8KWW$yIY-keRzK0v}v<FoFI
z6Jc`=HO({9;Lk!X+M*;Upx=A0<&!o-!V^ON<mT_-*_-BHXNP4v_2cQj<AN2~z1U*@
z3}(rdQ`6erX8I2|ohqA-(9}n(1NX)&8)NO--2tZXoi;n&#WeVlr(L@^-RE*VU)EY=
z5EvXyD%NwcQ*X{~Th#+jlL7N`pKr5sRuULjbsf;jm#bto58cF}3z*ZV={;V_Z;fSi
zn89nAoAU*dx1E6km0Lh|#+O!qqH*C%{0IB~%LII<2y)M4Y?rT!jORAPIQTr^rg3*d
z#LXXAdyDJK!}w;oQF+%1n1nI^`&@Z)_xiDl)xxBVf8hXy=v)%d%727X*OrRC0FfzY
zTu!{j=B?jsvXJc{!gGRZ%NX}`O6PYV4|);ai?&ta>K_7M4&KJow@L<to6%a(vNDll
z(;7)}Q_p26C)70ArO|Cw5>nvxCI&G4`2LGXfKc#rZ}p@?u>`;NoQv3QAAI@-EF!>1
zo(#YB_WjK_A0|+?T7dDxLfzKr^Ziy&8J%zaskYvGu?9cSpO~N?g<+iqzj5syj(i+z
z<pnh**%onqOK7ljb|~?vY}%Y9Dg4Sg5$^(HQK_$EFyj@H2t=^K%l>4i)eH&eKTK<=
z<X&<6m$%lhjKJ68e}~s0$+km#yjur;ItHGV-U}ZY$WzDKWRLt7yoaw94;;q}d%<gi
zwmfh(xyTn~+O_a}NdM(lu9`|Y!}^SFgJbGjv6o*oYqx^6G__Dq&i2{4G<9{zwUI_E
z4ZFW5ZdIO_afeYkrC8J`-MZO&UGG~^`rXJ^!BaG!`;E0z*=UwCZXNbi2+d^SApJzK
zerxI9Bnkl4U0!IrBPDtkqi<Z<HVF+^lWOF&BD%4s423*57Atr#Z6yoAYi^p{4#Pyg
zoUo0(L|}%X4?vh{Y<%lgSu;qAML&l;esGSIB1nFK_$6@HfZHG(K8|9xg;o<}9!#^k
zMgvY%nn^CQ;ine=+O7U1egcvs+*NQ)Tqb*&wH%0!Uf$1`I@dd}W1>IfeSe6d4C&W~
zg&~bG`52JIh;gjtx11)NK99TIMt*HTvWPEh=QkjfBlN(yDVmg+cSvcFVr%U*wo6g)
z*<|K$a}XTio|SNp2%3CK3J3MmaKAiwm8&Djv59HHbgq_dWbBVok{;%`#$NNH*Hs~+
zJ}zUB<hQ4_XwYaAKg}Z(eOHbA!Io(}%lt|nJ7I#@#+N$WDf)_{0^mg%T}f`)r}5>K
zYp$Eev7Q5{^rNcncaH1!kDw$w2rluB2nzyMm~kiP-bB=$Vsg*Vs3427dsFe($GG%C
z51M^kItOx@Y#O8>dR(gA(8@hE|B1^*_4?NiIwvApCJB{L>O)vqyhrrAHIQNP?_%xJ
zfOCueh~d<@8$}+$<oMxKgj>qIrkR>qTXNWH;njNil>)N;It&%QFR^hzy{lc?m&hij
zsy<k6ktdj+iGO{@7@xHaiRbh1!SHOZayFa4+hp4FGU96t>bkdyr#AKM->+3_O89BP
zLWQh{rr!8$qgh2(-bKplvzOOvL`5{h=bz4&k;nhtW)z+&>1SiGy_iLdq4GRy*)XVB
zM%=i!^Xq#M+UBQ_aDC?+$^%=YO?}7%w4Rm=*#!Xkw~)^_TC<`k^qmF?^R=%_w=%Vl
zdsoluwQ3v_J&q5@1hfX80ZYd#?OVkdD*RTkQr>9Mn_#Uv1H*bJ1+%$6*2vdr<W@oz
z<a5`{%Dnd!+%x3fHvtun#eTLETD#4*P9HV-O}{M~!d#A*L=``dg=|H{k8qWwluR1L
zqnZuyi>IuIQoe{Plu$oL6df`|4ofM}^RCuOhnl3_Jyw5iBqlt~HH~L)JTy9LJd|8(
zNSU_Lo;ON&Ykj^#bLxHp>v{t#)3@7t65`j$M{%BU@N$}e<QFub{4fI8=oIX<`*6<l
z6|jNEHF9cUDr+RB*o9>RwlUaf&9z9Bc%yB7j{T$%y$5Ld5n}zopvhIy9rrkcJ7Y?T
zragyDenmGDpH24}VLC7;;2VJQ$GTA{xjTWwaNmmMZBc<N9eW&xY%=n#1KpX5J;!fs
zv!D`xfEiKHVB2C_+|q&EVlA?hKkKst5{?Ig4uE?$tI$U(Ozw}*$CJZaj&C*{;U_fP
zlQa??xn<9YPgqCDNp*!~xh3esX#BKSdB+3esA&FXZSwSZbp`LF;NH`m4pIqtCked4
zokTE4s7Dq-Q$`yMh<D#qOd2FXy+T|w$Ursm-~U8?_8O;jLq{zDku;A2?4slBTZyka
z;GvvuKUJ%rv?+UV>sXOxJnx&G17ThDoPt4gKIVHmzExR8SvErPv3rgj*63xmUkJcA
z`fhCUKrWB3PSU<$-)-pj@Ht6nea%7K1$Zk4Bf5i5QWJ&t{p0|B-50FBg;hTq5QvD7
z_F~wm$J^IG^`el;s%@Kltjri8v^sn}m&{F#DH$qxkgQGM#cu&en$)vJD7s-PX)BVD
zYocZbBiwC-TeB~<LFwz#Qj-#RaP8Sf+?00Ez+>B+zPgff6KPLxV4wbRG~At!c`C?G
z&S%39`VkI(-$4u(eG9~8xKmZcSS#ze@##aGjb9Fe^Qr`S)1UBmJd^*7?AGxFLn7hF
zx{2LqL*IGY;xo`L6Xs7P?p;`2he_Lze*qS?opqyi(5CGTfsK&A&Lvt*S>t0`Bn89J
zQXWjt8f7*Y-c7$%5(iU+iykr4ioz>}P%Z{cE|71U#T>+n%*kIvPRf7YS=pm1Wb;?4
z?Lhefps^HwX*FCTy|=W!g1$E!s<v6~`KC50Z<5;HFD_h0e}5EaCukb3i!F3C6JR<z
zj>(WT0x{mEfbQq$GgJMRjpb}hd8cP0ke7+<ipHIiu6#vjgd4NmU76J{NYkvK2X-et
zDL(!}67&u8_u!PQFHb9SDY7fBtO9*~LZ8tILLC(p@r_b|fXD{(4f_l(XXuK2TKt!E
z(fXJ<f^metsYgY7<Qx9asJUeqs3QTw0f13?;Q{L7!xdb6vhqE47at-y;X(i2DeB+f
zd@kEPR=(iyElb`73e*JC6rM|*sGK0-$Oc{vO|=z?$b|k}`AF>}H`g9#JnMnOiXt26
zB0;-|y~gR{Qjnf3p<quG<N1UoMT+c@vH+mfRiWN4kFVQQ99HV-G+uro5L<E~xlf*a
zwlPQTbF5DNXiNBk%dGYdc}GEFmNa8m28y8D*lfn}hKC7zFXWM0eKW8L1022ZsWG2x
zSwV>XZJfDxz6hYy?I%1K1`0;=4<^*ctlfbcgX15|i6%pY`dzl?sINl03~}&j%UEaW
zV7p=UUw_1JiK17VASl2RIgw-PB!fj2<|Hrh>%+FCHkuLx>m+PPwphwSLi-#f&o5fI
z?mMS)s)t&43aH#S<;OuW52srK=BtW&o;O+XSOWszoIIFKp_OZo@Wi5QGiDGxu}CDu
zzq=<*mL;VlxkSbiKIGV1)ZY9HV_s!^Hl396;vbeN0EKJ{;p~Zy^)KOcde_K*+bmEg
zGy~_dNzuHDLgYuzgj-|9>^Npp<LbnCK5`!seuGPAn??yl5v+KZ12{6ruidTO4Hptq
z(_KuWkk(a8=5(_90d}^5SRK-*=J#_UN7QUcFUcn`2faz{BIUs?kcU)96m%^uTnqWX
zL8juCyXD1+4KNXebbgP&`4k&)4wB%oM^8a!jYZslMqVqAWtsWeR@B$_gruF%Hj`X=
zg^EWEX$t8;Znu?4XXlP#d=vC+EGaH1QHqcpVAUqMLT*sj2r&Ad;nY5xjr;Q5l0^S5
z^{bYks}~NR<irWXy5bF9;20KnRLPM6UbqEJ_)lRS^hlw`q6WB1q{F#jMNa(O_bCme
zzaEs@j(1beHw__4#-WQdtQ@=i_lUda<0{<YWW%LW?T{6z6V?@eUDc#YhT+!^1AV*_
z6rdQ+w01wz9l$rdmN@P}AXTv20?rs9=k@8VkyTxVyQ`R#jdomcm$H7T=E=DrL@$ms
z`6xx?{M$W=P8xe;a4S`-FIMn2jC@?aURyA@H}J#}){{THyY?80W<hpV+P)Pn!O*&?
zefxH8RCU{pMLBG#TLe^2fg&Tsh!-~0GEO<!NNln5t3Eaa3dn`(w-@@pKOj>UPyB_E
z#4ud(RNY@R;||288@XAPKO3#ptn{GJ^;0Pbs_XoUQNK;!ZP$_;_|mOo4SsbEfhsi!
zIXKFC+9_(<=G(jLsCduaTl}1bBF}gqJmU0$z*gv<&4U^r2t>yCelFry&1Nx)X;Yof
z)29vqXxP8=p?9)ReY_=!k(~0p+!rLZ#Y~zmq&arYC$8Ncx9U?;%!kJd)ZZHh5FFvR
z&+(So<7vKvzqyd|Sb*q1d+a`30u2C+9NbZ=&q&-c8OnS3V`LFX5xhO9MPDkE7`Q*M
zVWG%_6=G+(yn^I`uL!pRt%pMn!13H4mde5QF6=kJzLo3~eo9uPwYqw|$rpxwTa3HN
zzKS`DaKrgpLyWT>adN~{E^QALJY}5kSwTR5DG8C5P4GtJI>q9q!U)dEcEo(;i(IM1
zkcHbmV+h~Q$P3~*U38^iYX|uGGwr^j_2!8Qr~<W3Q)kqz7Ne!+Wu1GmmlRW*dCtxJ
zA^wQ1Vo}C3#r($<S^{8vs^>gfP@ta|Zy|sk^R6!+$cJhRr}xdr9Lw*|Y`ZRn-W1EJ
z%sQE93xdnfb@seY?z44*5B;N9qgr!|BS(nl%V;C3GEPEwnxX;67j7vw?Mjy1r)<4l
zty2k;EW+u~npEafDqA9M<2SG5=lNzP@CAe(emEGNl#dcNwqkVt&L&5#G1PgO7u-Mj
zbsS#WysBNQS$Iad8uayB^Y81p7KrJHDt@Ss2(8$O8z?<X(QmF@zBRq5t3Xt<bEWQs
z$C~#dn9n2rsy@iI9u`hjz-dbj9`6;-?I|(lqrbz#Yc6yM#{8Mi-u2AEJ8akTNz{-s
z3|`use-#T>bm{MXWQZ4q)K$X{-c}1bEY4~UlW9|MAB8$J4}S93&#cqfILJ)t_a%0)
zxc|0sIYG}p07!r*IZ&8Cf&RVo^8@@sIWNXhnWpM-#OnuV1Imx2)0<v#2_f<UGX&RY
zZ(csUMt5TDRhur;8i<!%heLm?yXn8W1*<sffK3?6h@x$;SC~)8>TvtAQ(5V7J}Jut
z2>5WLuXk|OM?J4m_t8Gc{OE_I;(2`gk!FXT|4B|L3dcd`1!2#$@gTjDLF1brl^?qX
zJ(|4IoDKIZRe;3)1&GF>TAM_weS+RPp$WDER&u^QJw|=mf9|BXJ~IYwa*V%Jad^Er
z{710Y<~@2?an3a8{JUgyqDFcR-SPQ>zI02#8X(60s&++6lo0!vfsC7j%A!!dk?t9s
z;D-~`3s(4rD-b2f)D@ng{xB!gQqNe%U~0<Nm4vVz4n?g8dg-&6?n<B4mj7DKp>Yqz
z|Gh!cB5xQ1@9|T(Dtj}c(4}uURg%INAS;M1PG9=sA<KBP!VdpdyiM|ok$Bt{Pc!gv
zM<|<SA^uv}+OsW_q4*JVcV33vQ!{{R#^F~6nzhvoOH1$?(f(&tR)BV(Bgtgw+RfRo
zV@E9Auu{AG9`lDpHDN3?ESuNcvxHK8&HM!&N;eL#9`MM-eqmyHGo6YG?Ano2X@>dA
zh{x`?<^;DpICP1~>((h(*GNczp<}KpA*9#dQ-DSMNH?xV*PU!a;J?O)p&qQDoY~rL
z#@is5+A*fRr&=DO6r+Ah_u%l<e>akehqlZsUn_2?l2`$?T28QYlP7JxrM2T@BCvg!
zdF}Ee4s5+}8(EN%#b9T0EhFw~Bj-01Rmj2vu0Qe)G9-3d66vjXRv2i`CEA{B&(qPa
zx3@9frukO_7fnU5nL%jwfRFVbJDTm_HF#JS-1YrUR@t^_tE;8hzqgz^wj+#;c;NRm
z8c~UudqY2Vz2*W!k)7JGB|6fjmaLaZ`>?Nc2APvzycg|~DSHDlqeQ0ghrV@3@<v)h
z;tx1lAsr=pAm%i~m(ytMV{^fEh}WQm>v4wLAw5T@9aEz&k<(VOaTr%uNNzDf%AFfE
zNO#ec@$yUGF-hDdUr6$w5r*eB&zZ{|go1q6dr^n52eS*lNOLjvZ?8sFAFh1?u)iA>
zIl)++K%Wn!Dr<B!G$rr)$WHIeb1UO&AcJ@7Yv4!9AJo3@u2!wTfUDD9xc<BUbxZ(K
zJv&=|4wT)ryj2W&*#$I$P-S&$N(KrR%TxB_zPS^<w6N1mvB{Ez74>-`>?+eg2dcJQ
z2SkO~Gd<Q4Nga4ogBT7x`<z%jP}^twFYV3Z&A(8Muh`$lcvG=~^2r`XDz__+<F5tA
zALZ||7!~GhxEeBp8yW6;ZnZ5{sHS~GAx!(7kA#iv-ARM1hI&j_#`Buw9Bq(cE1(?(
zjy{B*!0ER+HP$=Mw@C-T-VbA48j<6laHZWyo+O={|9B0}OdVj=uy#i!(`&c7VK_WR
zezakC&8pALgX~=M<0NamT@&EW+-N{B`@G`quEebN<9zm2Jllh|GMUF$?-bfflqvCm
z^a4=(weoL@tVOHScX7GrM5KcXRCi)J;tD%nCl46jBRR|B-KXl0Ys)??S+5-?d(FjU
zO(7C{x;P6r6`g-%J}rE{qnWvDc$PAdRW%K@f+XgzSRV~SKbET88?5*)C~P^24WTUC
zBe8F%H`GSlFF<du@k)gHc))G2#$4`giZYSShs+&o*Q*c2K3z8!@dtEh%BwX>MlVo3
zmN2X<nE#{4`3>quuO(cC?9~(DxAj#XG@5)IKJo|sH%DR+1_WPF@3f&ZN-y8`>9Za+
zNVe-|JTnZ}9&-oKq7bP_Za3^I=1vND0WY~KH%8S8&Jc6z;e?BPo33h8T4AQ5b1xi`
zxELgBRLuk=n8Xdi;jJFx_Av{*2Bvu>(#AV#Z`noS%Begy!c@d3%;9Ry0Yg8#jvi#e
zFiMwi*lIAf9dT4SaDxLhTrh&8XV|Hyc0isF>ma;#TVjuKbKuf|<TJ*fyo8=st_%09
zuHsJFy8LS6ekN2=zHWRzi|3nhQ>}Vncqakcdj`~xM9;(gUqe4Nm6}b(vbQS;EP;EI
zjV;5TQgt{tY`(zOdij)ZPyr~Hp<&VYLeOA)HGxfAJ+j{Sxao@P!Y1jTwC-m~(hUJ5
zZk-jpS@Boe{2pCNMrNiLQ6rvnhRDk{9Y-AHVEPD+h3X-&H%;+tmyS5k$^H9wfQGC^
zqCQqmbzzM2pkNEd2**V%k>6M=cHs?XW$_ks?<p`ay=dM((<dpa9MAh*dbF2+qE2m>
zq;?W}iRglzSYwp;5yPFVjP1jm=4{=m)AQ+IB-q=wbT-7SN}Xe_g=>4fRHu>e<X5NL
zeSv^!vGbJ;2~4<A?7l@m)6qwzdj7y{S&6FR;>H4>#g0H*+y<}bT#vw|FiL2|%X9@<
zTAFtl0((p;O$LX$Y%qY|s~fCT3FZtXw?^^`+bne+EQT_SiVV~NIlvY_#Q43etFm@0
zO^+p#yf3v2Q+c+V9fbx42CkZpzkS3Xe6h8=(xizds&Dc=3PLPieUF*1f$3ZL=My?=
z&+|j*t#3Pn8B7BiYD=BP?Kn4Kb~R3=SO$?ziDDaXK0h4p_%EzNXYJAo2Ju_jxh_b?
zm<)OanEZni*XUnQaQVVup&+oD-bB{Elr40-cIK4QS{xEGctp*KCIFX7Xu%2QDwRt%
zZFu!48}L)&Du<x(hjzvI**t5(n@#SXFHF*?Z-E@qm{DeMVm<qNA9Y-OaG@2$D+B1<
zE!j)u5PbTmdWgl^0_wBe+n#=tJfM?!9f=5HCX#BaC0ju^#mo4nxV8|@x*x%<!8x+z
ztGnr|z@baydC5~cgyo^%0t&R7?9|~4Ryr8bs%w)R&(6nh+#J07e`m=t_odcZfV)<n
z(dxMeIKt{HIhHqRhfg_zW`2_Q=F9ijGt03E*lx_O=2WAPOWh15%jgaV;Xl&Q@cTyo
zv0k{Tf4<r5Q4-^a$3j)Je~4{3M*NSpdu<_xI>+OQ>&XQ=%lH$sS?mk&r9Qj+&!zsD
z@qaLbY`~2vH4J&H(6!~AzkrcYzSK!E(fxE!8019tYO};B)yIff?}@KjGFwuxvEzLM
zhq%89rwz+!UEH+fP!xzd=-o)5sN}PI%=$BGupYvee;I{Z7g}1!INfk+M!P0uIhw_w
z#0T8oBB-}81uGgXAS(dy{TLfAmGbY$-~aIRn+HG;2-m-kOXYCI-Bv{ZY=fcG-XcIn
zJIr2LJVBq`AZ#8SPqjfc&eSsYoR|_&RPMv<4sdq<9afww4zNa3esSd#(&>noRLcAH
zk-+*8S9CyY{aGSiBO1X0FML?jANm`J)e~?f@&80m0s_smH8R;A($BUL9qL44%B8L2
zW|6qF)0xkb9x~v<DW+LcCiWUF7@RU#f9fC7e)|nt4!=|XEtW;ZDT|PbOEWmTfFXu5
ze(d(5vc(R~{zczb3P1ryFffPs`7vU#sh=Uin*axk9gP9G9`)dbFY;fdsQ*rG{rit_
z9_!Y1xX)3R#vf2Qjd*7ElxYCs#LN9IV19%e)P@l*H%o<`L>E<bp8xLGSxd|Lhm3HR
z(S~Q_#x5X@&*LX*<(IK^5@zM(*j*0mG99Dv#qyEBC?8e&DJTbu<PHtAYtvoH@8Tl+
zzmh?4<P8&fx=wF=<+`BNz>;7}j&fgl)68J(?B;$L<eO!kR2;EU6<vy6X`NcRf`2|p
zrKY>>^uEr$d*M}kjb(}%igFpwPsB}<zFH@lh}h%dzP8h4e<&bEiwETIjUuAn!eGD$
z$C3SOcq+Yd9#ylA6p7_xHkVD>zT`QAsE0{zGb7T8*S-nDiRIry21F-s40kKx{mo1d
zetLjD$uACbdM`b<vWuID0%Y$yF?+~UHEF8t|C`PIZUGQX&Ah2O0}=Vz>`hz7{F8C&
zX7T-@)#AK~1aet5_bBP6DkAD0(7%DDw?SBS#0uW(@+BnpW56e_N$1P;2oCla(@lR+
ztTzz-y#eT}r(;CX>hgH~QhPd?u-{znQzhqzm%~7<?{a3)3*@zQAc{$-qEKvIkR)!5
z2ya^>_3ww&4<E#T{}S1>Q>pht(qfLP&`v}E5WLkFcyVu<|6RaL$|Olkn%|j~dB3&X
zSyV;7dqw7i`7`D#c@uPjdf!Q;{a5|voHn~;`mY@IQiRn1rSA71MpO!9WZD1v-9*kT
zNAW?LNcv-;CEx*K-GECF2-P`@#0_Sbmevj{ohW#V>T70iRs%7oDt~myH;YWWqFyZP
z0w|`6k3Cd6|FM7-|FBrrS*jSx^5)w1p}`s)X1nGt;j^OZiz&nq1Br9Q8bTvgH6W52
z$g2Gv{s5yb?IgoC0qUqEGzmBApw?ZfbWj1(ZaZ?DHBKm*2cc(DU{W^DhEc=}USDyx
zHS*Berwhj47^Nu8?IeuBEu-I<gLzxISamBCX4O|-Q&)YmMXZm}Ac#8@=6A$$Msj59
zb8U%{`X8ORjRE<G+IfvdWqhG}m9k|yrKTz>5l0r8wxdOr=aABm(A!I0s6O8yt9El2
zk5ze|#dvl{E}IH~LyK$q(`R3cpbEXw%UqVUF4G9){*xY=S)2V9Pg|SZobls+RU^1m
zie=TI;<b&*XJJ2XpVN&Lr+lS}48`OU&llV#y8|JF>7WbKTU5w<BGgyyf;994<7RID
zLH>Z^B!6e_q;E9_oH2UjJ;lJuE~zccD^$lCY%2qi{F&a?jDDv#GZf@6wvZSs<W_vL
zIb!lMdJq#=02_@Q-V%gQ=qB`izx}7s)nMhpy8Kl(NTmq7>91|V@y+)K+se}4^~|^N
zu27a!3iNxR49wF-3rJh0{7)N-#AiUlsj3bfNTuD(RxU_W+UHgJrx!0=z@Rs&b}5Ho
zH{Kw(n4g6Y{qxJBmh>YORAeGYV_W_bq23qg^Z6SEXYUAjEmQQ7n5O=T#i1Bve@F*)
z76u8Bl%nBF(2u0KSBoQ=2ii!cIGIyPWeIr81fB;#lM@Os98g3txo4%Fe>-x|<s$(T
zo)DW}>LG*tS>Mkfep}yqTTXGqF{%ZM!HI)^_qTIn>X;S*Tsi%iU4PTIxWt3LGmX}O
zF^OSJ2KXO=`U<D9Oa9zXqUH3Bk4ln${JFGQy)#l!8<PZ4m=Av6c!=+YDEA@u5khZZ
zt-ru9N>Llu3cwx-r+4G<|K|iS`}<WBhVtY%+@p6f7(FI!8cCnNVD>b~9rJ|)-uzJ8
zQZ~qHuF5p7umkiZ_E9$h{<OX_!O<}rdBDgWP(eAsFu~EM6G1&&BsTj<=pgki$Qa+A
z=Qr|rzvG)w#9bCww5VKcwd5UqhA|xbH#lK`SY=a~*AnUp`>csuqkv<#G&9)bsHIB&
zN`kQs`Y(=<983S|F#Ly$|F@?PqX4+Xi;Udd7_u7o4vRg@E~-n$*|O{kMmEb=G!xHt
z6l&yg*VA8WXZ<UcRIxyOZV5lWnBxO$WRuyN)D@N!vX}3gCJTa*e&`S}BX7@+|1(aP
z_q~PefBVBxS2##V;>aAMMf1N2>SiJXJA1CWmwg97hm;vQMP1kC5qs*dSgtST19n%0
zV@mWJRuOpRJu*Zktwu4e(UTAz(tNRVv*391e|!7?JOMv2kucJ3E563%qcVPFLqEW5
zot7f;+ChEFre9WH072CbxsvTgQXjXOYwS9m0jWEhBI3J%MM3<0_D{m!p>e=X5`5om
zC=%X3vw|zB0r0N;FT@3|&o_8KO*09<{)N^5{T}phPj8K~W#l|RMZ#SuxYyrX+mz3M
ze5{6*cC$AdGX}_;6kIqsUZ`zK%>hU#4>pEFZehCapFBGuYL~a>66xI4=YyCLT3;qo
z`t!W8f4IDNJ1yf%PoFA0{s_A|5hGkOy(;@F8j#D0b3u-tCf-dB3%g~8)dWnjzCOh5
zuTo9mL9G$|`oy@O-|!DY>tAo#*8}HID!Fn`2PBSwGtqCvFFsmZo%Qj5F-c0KGu*3m
z-qQf~VIWKQ@cdf2@_n@(=A~&r`_-6FVTek~?oa7}V|q9?zst+CWhJ0texV)IWoI9f
zw)tLRSJt%WqF0`#3iF&9{*vv2@kxqkd&O0d^SS-%leebSJA!yTt@?Ur>ZST#MV>7K
z?)vDKPNNU%%_?6X|4$s3T@D<j>I+#Ev(&vbk!XR9&l*o{V%oR2s-1-tvPjoZG9xE`
z5753n6-xnUvUXm(=T(Z38V8->^7!%8yf3>nnR|X}KRvFIXTCK9|NVddFBalqEMY#p
z5$VbH87Mc{_SvXw%+Yy{V3TIf{)RmT9bQX9S4-9{JCg1COCPpG<G+4&`Ef6*V~NwH
z^e=>X<P(^R-`P2-t>t36p2MRc4ozx}abFG6QL=~f^lYJrm}n+IOsrZN2Zw8p_CCRM
zoc3#*2FB?VyOmoHKWLp>G)m1oO5R|`@y~rJbp59d1;7UvEjB%@4Q>@=<qOAsgk!2=
zx9-2{ACrWkFSd2QQj%TOX|;IQ8-FQi%Tq{7Gtczw(Q!YL-YV?P;`>3&;rbp9?m0mZ
zxH<P4z5{0mVtDF*7bV2o#kPQUC*-#hiS_$ynbaY0$o<o7{?vDMBh=EKD~`6R75Z{a
z`?!3RC4?4K7k^emKB|9uU;`B{vH;GP>@>pUj$}rcFgu$%QZy1!Lm5}WffaPYo>x=Z
zX7;9UTC;MKgI2?GJq6cm+Af_}O>RrtNC9&7BX?AUsYKaj2){e?M+vKqs@B290z#rl
z5JaIGlO%YV+9RhV(efX{5ZRs}`whc)P~}5mFi1so-uv=4nXnIs0XmBRWcYM_@T^$7
z*pM0@n)&tV;amljeNA0u|I@ogpz7haH*2e}mCE$VfW<?zOv@%p#90m+U^ue-KfseX
zw`)}#Mp1RtLNZODj)O{L&8p`%<e|gbC0wb*t)VsqmZra6E)l?W!f?eIYP~Vw2ItQE
zYH@vEocnt#){B0{&<ONuUo>EnOXPIP+nZj0NwR|ZtaNiq7A{pNj2EPCu|zW3634&y
zj;T(lM&IR+6=Mbs+IwaN@FyZ+r%{s^>cQ}DAHw~wZ!c2>^v&-W%SM~}yK7Fg_%y!C
zA0(T@7r&kK{%{wNdOxpIrB;vutzaNG=9@K%e`S$@w4roWyW~FTeYpuU+tzjEL!)wy
zv6wE_3dyydQ19D$O$}dqrhi}h^V!@HXmM&46jzoa*3QZC%ahuOm?_}0_3pbB`Jm5s
z$*S4PO2Aefew0#=+hUDXvE72}ImOnnm#s>u@Zo~{qtaG+E0xi($Tsw|bF!_j;-lxK
z;;A|;^`5GQBemP6!332`7*0Ye4rqkt_Dk#LrBhcLb$_RxHTFOQkQ#_(PO|W+y-(L*
z9^=dgng3c<uVF@(1jZa20bX!hZHctGL5H|UNm&t|5qkspB#zpYqkw_Nc=_xK1-7D`
zAB+@5N$t%cy87Qo90wBS7dfJXQJo<`E2tjL#7KE%@}@k$5WnO#r8R#o3sbJo-m-TD
zmuR_~?;@8iP6>e!1LgfY`bv?@M8)s4!z#iKrIRM%t;*p}4CWm123De@i<^S#Klh?Z
z)W8qAvcM|Q*q@j3&KhY>i)$xvB(55_dKi-Bz(3I7IhipwtQgx75Vr5Dn%JGO{FMAD
zp)yEMAND@$D%kT;uJC=&JNI+P<yY1W5AK2-@eu7MQ~_Bd7*p%5=ovxUqP%pfn@G)%
zZqu1<C=Gk*rk)3Z_M;9l_r#{)G=3}7oapAIqi+JRs$GxB!|K$!mgRc9<X~0ux*4K1
zddB1Dfm+!dS!#FPnAJ}WHCtC3-<f~xcsrFGipkG~Nyf`lV=4S>BD9zCdXpSPM6=fP
zKw$orUaQfy#KW#j&S`Blet=H+t}aKZ|31%b^Sy`u49VX%yi*4pI7SI4d}Dt!ya&d&
zKJ`RA8#LWo&FUvvY(n5IUR?S<r-`X2t0+|mDnRxH#{KCMV~xo-RhCnGDaO)CKhM{T
z3*pm!itTu3a@%D#8;FIFhs|wwx_`fgZ)hj|ZEw#+H$ZllkbgLfF&ZifkHr^Q3OHOS
zBrnl-JIoxji5z?u6u+Fdwbs(}Ks`(1f7JK#i0pt53G*aPTqTVcFFEFC?&bv00T>Vk
z*9dr<iH|-Tdhvs?0O+x$*2DB@y{vxvNBcds3b8Y`ajkGlZ<+_2f)*BFaoZ?tCiNqp
zY1DFCt<zm^@(xRK3kl_2*M3XFA!U!GecgqL4S&z}&{cxgss+52xO4a2j0Yz_w-#&d
z$3iX)7?~5EX$$xd9vCaM!0Vo*O4l+SY<xz{%Aj`_q^`*z_R?t)JL7ELWrBTs)E2oJ
z62Frz=CZr2`E&iv-2&2_2l70dg6bWjVe}ZRtb9OeF|LJDsD)ZrEj))xg*Jk{<A-Ta
zy0qS|zI-fSezb^U+tCWg_+UNr@L{hc*QJ9qJ8hjnV>DOLPOZ;wv`UU2%NdOJxpd?H
z`$J2Y!n~)2_#jCkh#cOF!X`SMbmwaJyu9;3M$TH+D<Noz@v;#aP&+PYlT%zoyr>NE
zKzH(5RGHeXea_p9HQtS{4mft={Mk8iyHfD11xYqyxi6?%tQRo#Pddq^9f<K#a4S8p
zio-p?+5W?URx!cMA?NqXF-q=M^1N}g|4&g3=9khuhQ(=>MmgOFohr}_^$2?DUxU8z
z)RlPVof)r*=JFdcD1)ZiEsUi2<tiA5+^@py^jf#kV8u16^gbmlU^0OtOf)NZB5Msx
zs9kVBD1AY}I~>M$uKvwzU_b&=J9c^Kdkq2xA6R?8C-rI6|NKF|w@pLa#+1)`<^%he
z7Wt}cF|X8SuOkYK=k&Y^*EtJW8Z3;)$r5dt^Rf>=DE4^fVi(q1r(_O`9rXE&b*fCd
zP&{4PJ3aF9lcX+AHhDNXZJKI=mwY@^tx8+xCjY1pLNj^i7n~o5w#?1KU6%5ym~t>j
zMEAX&dKYWKzQ}hye;l?7%~LJZ3b-0=(FYeZ2r+~&j>FQ7?f(=KP$G+*|Ccn2RGS1Y
zRy^?-i#WINwc!<dZU0sTU~v9I{4^&%nR~tCiM1ozXyck1oyoW@b^VFx<uhmOr~D6x
z(6&qGvx5QSNx>o#6d5~=Gi&9v-umCIbjV#Ao*WHyDurAOG3Fw7OL-4TjB}v|4y3y1
znd`DutTo9W?|>je3YKzH@q*yyiVS;SJMi$1)p~-=*{^lkfwGm9*CUw#Zw!X4PNiBg
zz^^JBEGoO~3kqL<H^r%Nv9wMe1xz<5oiA<a?uXbKJ{dRucCg1$Ccp1THalUWO>pC0
z$w-II9yCs;EPj3F^RfRuMr^v`f_n2Cn&N}fwal_R(Jz4NvO_F8!4dkN)Hxl+XnX4W
zjdhpFw5st_$Wi!Ch7_fsKKRK(5kKl#zAhaf|93Pila?f&PW77><i}&f0RcbKshiG}
zp}U?|bdA^1V1zQ8ACaZ=DgzvtKlSJLKN>+#XHJ+5CsvixRsR9Ny?L<jh{rPX5}S>#
zEY))<nA<v?xpY}Bt>uw;uG+7-!uJ`!7ToEjEPEWOREI3|>ix`e-U)xVOnFr9M_?#^
zt>OSp@Z1@cJRtB2yeNo6*#4G4!Q)qmmYFM}5SV1O+h2V-$Da2(3uFYIw-Pk<@=-qX
z*7e7>4KV{%umthYF6IN*;y>!$Dw&z|DlAdMjoTmdqQ>aEcQm)G1IN)^vU%gwf>(XC
zzS50L-f1H8%q4x^(mYffF>Q_)d3H$#R(tQ!*wL+s`s2jfdvE@cNUcGs*Prz@BNmZD
z?uf`kTj$ncEuR=<B#VE!dd|#>MZW%UTHDTM3zuV!Pgn4TpoqO1yw6rj3l@7E$z8kb
zSm1GZs5wNV0s!$Zy5=we#itKsgu{(xOr}Swpy&(~b}}u`rS<zu!qCO*Wz(nYai^_7
zCk`HNW0{RH1}d&?edm^4@$4**(<x%3550;JX@L*KcC&#zN|FB*u{z`COsYM;br3XQ
zQy9bKF@vbbcV&;?c2_v~yEBT50m0ny`W=R^DT#cuhsoO<L?92(bl!fy1(An0etwJW
zsAyXCb76`?XFnjW8nIhQN$#n!P4fJzFdH3rnBofoa--j8=Cw%!^qLeK%z={H)OQo;
z;<;h*u0e4YU;j%B0OyonFn2cjhClpRD&1S(;<IJiFZU%)wN7|Rc}-7eMA5e{Az&q?
z4QAx3oHy=;>xXz>&}$s{$l<KqbQPv(O*=QME)LC=UHcjYx#%Qrf=t|>&Kh{d*~t!r
zZ{o%7zAjr-cT2j@v}~7CJjN$GffO7bFdzCiv_mK0$tUsA*M5pabjpMsl(}{pL3+E-
zT!OSM2^;pW_J$c}Nt_qtyU@dTCY3Pu?#ZJ&WDSd4jvVjqiOX#E^By}6V5;u~7NUmM
z(_#FbtCt!j_<Tx?eEBNJH>?F@#RB(y@E~K#rgm`u=`%LWN5M_*aYQwF;Z79oac({D
znJ!ZCz#czEd*H_l-Zc;cGrX5}9G_!2wYS80*7Fc<#R+E*R|VPIa-$D=9&rudQP}uP
z>F)hF4g-}8b*d(SiKkzV!eqr^A<jLwnAaV4mO#C;J5ljm=$mxTg|K?{lp~*rR2suK
z;oCzD9%3oOyxGBrphwlrfOV#f=X(o~xe{rb3hF}B>Cu&vD;!N$*Z;ezYEQaM)!r{c
zqzv=pfuLxShu=&sr*pXha9In!Ebk{clUyy&ZM_;80$sBA?X3W=;{CN<RR=7$h3Tmh
zc{ry}&Z4R7n&|OR*sEAnqD?nc_%SC`aHBjZzE{-iTJcC*n8I!6>mEGn@V)W<v)fb!
zsQUeh9@V|)BV1tgZyn$z7?Hhn{-;wn$~?BB7qA<cWLr_7ut?QG;*?L72Z(w~7Ky+f
zJl3&O?M-RCHGB{f?^?DW2nTKGCiwOuczEj-tNn^JJZn&j=3lR}<qPe(M$IuNFpXS`
zSzg@C3+AT_KbFx=M_qw&tu5kf$3+3XY@J)WPJ3W}2eD#T7#Z=5_d(bVaav!)xWx~<
zJe;ztU&CeZQYsP#5HNk#r0;r`OUgMR0=h@CBT~afa6YZn&Cl1W*q9HnNvX2D&`W7P
zZx>)=H$x7BF#0uqgxK^we#viNd?7v|W^f58m?Ls(&e7F(`yOA;xKEmQG<i4bS4#t(
za1qaaO1AtiaO;y1^l?_Ly)D825=x(kO#v0`xkn>3<*AZEUo=$mma*iPobeGX6c~zH
zej@mrC0OQAksMTdXPmwA?fz?zRXr6YJPJj^P3VRCj$m`8T21S(r!3C#4>>uB8-L@=
zSv^<fP`{Wk)4Q;mTi{&Tigc^Bt87$C2B)dBb}Q+!Y&l|uSC-znwt}`3nJ(&|Ma<`_
zZEQRbYRm<zv~6eOBz=8P8Khd6)XFG-5DLq|RZZfS(;dLWl1dwrf8q3)-+MyFcYujH
zdHjhyZQ|PeuJQheg5v^jyEHx<B5?Z#Qx!e?OH@n3U_#5kYQBuf>Jf;jG4yA06WMOo
z3u8?&MSEOryg^i@F79gOApo;Rwq(SL9zX=e`=@5^O+y4v*1OF{JYa~02p#x55^MV1
z`gL`?EIu*n44-is*)c4_YJhZYA-L2hIQl3v>9dtctTlr8_;%dsewTw|P-Ic+M=oJt
zOl6QwE6yY{-3-jobd^?Z^8vn@2rYKq+Az%dFr10SmIBsH`kU}4y1g*BF%Qgk$N(zo
z?7J@t6wn_HMVO-C8779AX3r9?>qLx*n>M@o(8aQ4<6CY4k%T=grmJ1&VVT6`2D1s4
zQ^UP0&qeH_XThm>zEkg#h;d2=vO~j1pfg(}_5h`ddk7syy=Gr=>bT^ook~|_=wh<*
zd*mmlJr&83*zDWKGlJ>`|4%|@(GAgN4N3Q#WBV@c6>I9Wqq`5~e|mVW-Z@-(^*QDG
z1v>NU<R&t0{1ls4g}kSP3-$Ii&hyVM<`2G8H(q|S2gSGlLq=oy$ZZ}e1bj4a*8+O>
z-N8NI`c{3cOc$9v%eCxx?E}{vIKN+8R)S9?H<pA0T<79^2M&Mln{krQBTF7_@*jC@
zny>^aH^^zX$)#}7Wj_%RB5#p@_{deDvMEJC^NJDGz5V3wR|@h=yrVw(K-&IqZ{UNO
z?3dN5_y-S`LT6_e=bIY~`~Zs+y1KfL$EnSWrMr|qw+|=S7xndhAs_B6Yt<QiYI3wS
zo8hq^T$iaLV*#e`%QlgpFSq(OB-eFu&YkoqA~!?1N<HiCsWd{X3&}s{pS@8+05ya2
z+fNb9+szASbOC;EmXE$O4ALwrbNgny+Bf&gP|!!7tZV3-m8w)GOPI_H2BD*ucv%Pa
z&DzQS>0^;F1DRB$P}{dn#P^=$xHIQEkIx^b2k52*#n*p{au+m-n(TQ+mTSJkiFDy6
zE1W|7N;LaOY;vk4j3S$5o=ZMz^b&`!iTV{%#DwLi61;3r!pTL_Y?0hII?<TbNaQSt
zW%qzQ=7~2nhq$Y0!T}hM-(<~#RpMxPAh9i_r=Kd@aXhzW{tU+M6>)g|Diod^d6d)j
zYq<lmj=)4o6ub=Vgse%!y>^vzL4xKnZWIIhRaY3>xQ`K#JqTDC;-DYu6&<X-%v4}M
zUo*%_Q2$jwCqQg4!x3=l^qPS3lpSon{`4{r+y2P9jW=cn_gGu1nXP?Kh@#5CQ!BFu
z1xfVOwuW{l{s0Y|oTuw>K+DBkea=?@a1OtuCG_0+`Oam3QRUicpRSPxs3qtl3tg;T
zHf|b$Q!XOPK-vU0O${^UtW7mb>~v5hc!vii6HvosuI_A6mO0+CE`7wh*uoO`da`BC
zW?o+Za7S5Au8r8};qn#U)^I9=is)6f$SoA#JsJA>V?k0V+OZd-oDxwlOMv&OCCTBN
zzf=DXl(Rug%?QU`tlr)3c;fV$c}8B|59Q>%_`}~fR#ebX51vdEztMkR!1@VxAaBpI
z*EOv~s<0Jx1&+UdF*EalWwJBAzYx*#ZEGnHBe_J^mkSJ&tZFmrNEpVB6;ed@vIxDT
z%(+=zWyIr5!_Vwi56U6|VXp9tOB|Lh61Qe7N)m=MR&saul^g3(LXA2N_U^rs=gc8Y
zj*zR1hUFj(dpl00ui6<!{4e5vID6#w^1Xt&(AbN_nHc|KB>~G_PtED^xM<Jsl6P$T
zHtG6RK+yOfc92$*1KpHYJROA<Pu;5NNHn`oOA=iON==LI{C9E63ha+#%lE?CnTlPJ
zk(@u13|jF3q8fq)ESJ$JennW;;2dMmLku2ji#S^p_O~VB;*J8w|BJY{42vt<wm<{H
zHCS*bKyVKZf#6UC2o~JkT?#@1!QI^*3U`;_7Tn$43+Gk3`}FN|-pRf1-}~4<iv1Ov
zwbzt2=9qJ>X}my-BpxeFE8n~Tj9LPv)EIa+u1LRRw%eR|OcSpBIkzV2zco#06TPGD
zs*=QMV?KC(=NjK1B^C@EV0r>ZE`r-z#1pl6inF9+=>tiAA^`5pXV89`WD}Z78wzP7
zc|ifMUhCtTg{6$DKmh}6B|H>_eD3A`DGia5rF2Fx!t1d<j)#v{U(pxy&qU7-0Jf8|
zI1PkG$SID<SgOzd?xVU<?o?hG+96i0amnr<RhDG^F*o4L1g-5}@RFjcpjY;O>o<%E
zmw$Ep3RvpbO0dE<_DoV>qgBk0;U@q7iKjGMXjdVhKW<94e+;9Fwuwb5uoEJt-TZPb
zL~4`!^<1ozPW*{Bn;vJE-E1!%H+TuCK9tZHVEy#!MhpkvovlEIndowu>j%Ru*4kZ(
z4<}A6489}tkFuwOZwP>#gu!{%Rj3x0U<aXPP@CGq#LHp<C9M<cMr8h_5&T&vfe~*x
z%sHDaqCJbbBEFBjlLv^`{!ah0J=E?oGknXSIg?tYFk_t~FQ$a^IZm`Ii0P%fi3Jt%
z_jGPlwWo*K_70_MZ=0a{d`x$(yQgZ>;;~@9b!>};BgxtLLs0#5kcPEe-z{un2yGg*
znc6Djt%{4&G?b;|=!}{ta^8y?bTlitT<9c+)&qLDG@Z9B-w}P!n1C1<+h<^@a#P&l
zN}c?ZTH{dJg(alXxc%lSMxjc}XKIJ5CHCo$MW3X29Csr)gZH@K2=B<IprH&ytgKHR
zhK-2!U6%su3n3=|4supR?PO%$4Pt`KOAZ`*q*H?nj7cdGE%2r?Ew5*PXMYuv_p5;;
zpKKa~K~^1o@-0Xs=pO8n-3~*f?M1_GLvC$d5ADq|7M#qb{?#)ce%7s~^*5*GnS6%E
zU~Ic10bQgXvJo^{#cW4`?iY1GshDCY>y4k#!M)(C54>w63OlJx$I}+O+^yEyd2ErD
z%myKY^6kFz<rZjMlYWfF6jRPD0)+|$3z9lBtCfhF(gr-582JVee?_-py07x+BT_#4
z=-TwO-a#^ZiUG<$=?UbIJF7!b5*~;52h+&{-38aZ>!3sP<8pcHnY&sfI!jMtl<JS*
z4EA#P<7Nt!!j>OVt{ZeCdyUdtM2f!MQ^t_2CzZh7R)<m?98CsKCd`=yvqipH-3)YT
zlCNBdt-2172Wyz=3Rwv*ogh*XrlfM)iC8zRaqTVER4sbV96>RXoNx1u*J^2)d91-&
zH{6pp%afcu`#!Kdc^Duw0#C*uy}FseLauEZ;#kgG{B&YBREw?ioHPVLIa%2lpR#5u
z^rh!=Pr+-Yx|cn!;Awc$L8BHX948dB1Wtq3wZ0ZMIZnu3@$e{7_=W6SMHXdj=a+3N
zJ2g=>q`-k$I=R#$lrxzxBZ)V}gr6aONF!b%I(8so%JN<@1xvk=6<!KpLPN++zR`0h
z+*|<!Wf)Lr15+JGUN$9>r^QF&21?d(b!qyBw38C0>ZY42Bl!=dVMmJX$Gm^qfgJcY
z#n<-PNIxOWgIb}PME@%>u7sH8OVOWYR77`A8Gr!fHCLj&!uklyQu_)Eexh$Z3yf!K
zO`wBxoC1>L8RK%w!4$~mE3R2AFqpRo_QJ5oK7}_R&YXAh)sNPFtm&WYrcKFDg5Zs#
zyI^>acDgq>^rq$Rpm#5x(ZlYi>wBd2h`g_%_&=xr6il)=;%P|h{3THIL1Mcg;U+I0
zUdDhk&i#Pww$}B}=$F8cqz;V7W!BBxd<{Y(hcfgnO!vkmMKucVvJ8x~1%f_&`7%MX
zd?eU!t;z(gg;>;Srt^(%<W+!*4#Y;6F!~4@dRM8`FPkhDDy%f>`fadi3@l2(p*<^}
zJE&$3Pi}0qw>XJ>9w4Hw{Y28XX9Yk?yK!aQ*!@2JR-s8cww4>KV;{?0g@M1ngShIv
z<x-<^qw}G4$*x+fWy57_W8G!L)92vVtSXzckrxHnol+{7QvgEqN-b(~$I(|U^*cYJ
zXl%3_j`s;S7tNJ(KkbcI+TTiNw;<ur<FR!K5mCR4hL`#gg!4u|4Tk57i@Cu0Y?tC?
zN*2j5&RfN9+v=-B=fueZb*+Jzx0Ek+fc>J>WBrvvafD$Mu?qf+ao$g{VYt-z$^=~I
zw4@=9S|&b@<wGP;L;kFP_U$x$<0I%A52pe%^gcPX<cQ+gk#2wm@$drv7KVQ*oHY09
zlq%yyPN#IBdA!5X<wp@twE))C6T1JS(}?!b4Y%^n(93??2Qn4-Av{vH{qhF91vOwG
z=`}GKxEei?VKm5vdQB7EM;ES11$qj#t|Y_aJ_|>x=jHx)E?~UkB=r1T+P-^ai?&16
z1E=qO4bDJghn;JoP@?3vKYMLQ5A*oU6;pFCJYdP&?qnGm4&8TvJPC*@;ZpF3Kpk$^
zf7w84GowRwi0J$XyWwlE^%l(fgti=Z`FtL<yWzdkRl5-&Ai_OUiN%+yEx)!*xi;-G
z@OIHz0B&$ws@yIRaumo!jI2I@$5_#@U)Y-+$Yh9Z(*ay{!SeFdk?s~4$79oA-2Y1b
z?F==COl=8K&r<?*5~TipYUT&l=|`@+huTD3hOnLlSc~YbH4FssU%msm?46H=|H4fB
z{AiqbcA016GsB9jBr5*v2ZOV%j+bKLy6=0PufHZ{ofw~^fd$4?j~jOY>P^OsD)Liy
zZNAHeDJ}whJL*4G`K%IZ=Z*9S7DAiAyVULv7hoa#n^V0)jWW=tUsn&xQlkrw0GIVF
z)sLWb18%MA5(7mErEcQ3`(?g!@OIl)UnHd>`UyJH8xE`K4|$_fh`C5c#F%|M<s`*_
z_HKnC`;rfI3f-f7VCe?vrIF)@Aw3jWhb^86$b?iKCA<(sFRWNLYCKY;)7A_V8oX1z
zdUs-PN&g8)NK?XNM`Pp>l8m;*pF;P>er-U}C~tznaEd4-+w0-PyY?Qrz5d1g{F+1!
z==C`oR^$qL_>vC7J=m$z(qo%3VQS}Whzfc}lq?0RELIq&i=p`IovW$ffk+P?bN9>$
zee?&fQ)~R&JA|m#o$Yug_39#eXahpq2MRYW6IrI#pwa5Oc5^&D(6w<vx*RR|b`E>E
z^?NoU7p@d`;n&el&gI85m*dSpR(*sytkb)T-qEOM#POG`aGbAng4+Tg8H`B!S<1E|
z4jtVE{C=Tr9Uq=G?7&~T@g&u57dXagzCsY3gB1o4#2x<-w|0>ay<htS+}H3LYm<`O
z_!PLO<Y^;<o#a3*DlgnRUF(_iIPK)_tu{|vP%^fX7{||^_~j(OY>jWdBbA)nq`ESi
zm7UOtNFCZaO?+qK(IgRGqhHrQ-yDJvAPRD*Y{aabYmU?Qb-&qcvOaRfzN#IR@|Lui
z`y&LlL*6Y(BjL6x*O>9Kh=@N$<7jbJBqrc3l;^14pB;V!SfCy;ZuuR#{IfTt_>b@D
zy{gyAKON3|4b*<f@Os&QZZ^9wb5Q~n{ZVMW*MwPCx@G#MMw4ti@)&EO`ldd&(I+&g
z-mC|=hc8+!P{qi1>n=3oTjUO~VNeCng6mYp0Cr+omv_xJaMuDpch6#r>j{0Yf|qW`
zoaJK#!o|i^^$|l#^Eso%Msl*A3&5C%V>lT1avnpjje|bf^cHEN%Vox1DnE8-xT>{2
zs%N*+lpOW&Va*=jxR+l~@DU$=K8p*V<WcbhBJXU77AAOhJZ1u#mg?hQ;om2w3g4n#
zXa{^s#@l%FwjN}M8j?&M6>@MdYDI|;m<-mWHx|!k5Snj&t7w(vKE~QMLoz@VZdHC3
zeCgDpVfcyX6p98|yRYIo`#23R?<?Q!{0a60(|d0n3Gbea=FmA83!>px-YL|kvq-Wd
zJGsTBxp0I2dH5V+;BoKy-9_aJcc`yG2S`Zg=<`LC?aH@%bi8lqk=|$^nuo*-;?9P+
z)Y-jDcO)<gVqL2eV6FDRV-k7?jxb(QGuwypRA>K7_Qt5+r%3!8kDIeko;#i2pa>OD
z1z=}%a6QxAk<M(mRW{gb3hQe6!1pd<M&Hh^rcDIURdB8No|^oe0UIZe;f1cYh&tVH
zSMB#h)koNo`@8vGHpfA2T{EjT$gOGa--UtBEXHXAZm(JY(yc_h`#*H6FjO9#OobQy
z0x?3^6SE!62M6im4gvNESdu5cR*k0yXJ!u>p0(u`2RXwxJ`LDAd}kdCc=G2?M?rzc
zC(dVGV4TgPe&%x*I+f*BExdcy_g7hB$B4VBA*AKe`y#lGAaQp_$Ln@O=lc<gG7+u`
zbk$ND(K>gh@9VwH+c{g9vn?<0-mR&jHHBw{y8OKM<}%_=`jkJMp^==~ly;KLdO+R9
z0ePAOa3ur~U~Jajy#ihB{p#_NT`Z^8Wy5Zy`|N6UN&cWA)#3*oX(0!7#<uW`T1c;4
zLd=~Y27QC=5yQ`yd&;m{C$x7oQ@w*7T@_FU4+QgvVHOY0)fI$yGtQlKOtUOlR-+x;
z&y2bY+qUzk%J2GWc3drwLP+n3Dm;jzjc8`-m(efr?`~0RdH4c6%sAEdsn%ZJ$nJ%<
zwjKt*7uV*v-~Utxa-UQ+*>93Rgy~Jc#|uu-^nT*S0SP#RguJaZ&V@u8`Hlze>RW9Q
zZ_&$Ve%2h00VHOW>cNurSFveWfJoDs=e}E}3U;P7?d(>K+IT_=iQaVNI~NwH^xPZ1
zYLfK)XR;P}?`-jJqu#pTyi|uS%Y(1U`c`l>R$w0^%UQX?j?t~qAGO>jYuNZvS3Fd+
zM;F-cTA+SxDJ)}p`{&<QGODGE60z&Zu`r_{?qZ$zA+5znh3g9TrwIvpDZnB)Z$TII
zMP&&CYb=C0?u+EX?@VXg4Hn)MYu!1vwH{EYjpg4f%abe_eD(R&hTR?m;#&;<@-5Up
zq-UEp3bMypp<it?9s={Bo)IUPxg(oz?mm;U)v!?36cVOTE}uu2%f0fl<`B6hVJK7&
zLByu|2G7?h$9SWl2`$vEv3_ud@9Lp)jk5ICpeX46#w4=SuR@Ogju%&G9SCqWrCE;+
zoH!sKYxlJ;g&D_^#y8d?X~&J`Q^+q4duiDh?$LV#`kKD7vwIZ~SQJL<4Tr_~NR}->
zEy1yh@px{U4SPIAOjt2rTvkCZ{UQ1E<LkqD2j1Gu5FM&Ai=hIVY2TLwV8ywv`gG7M
zADcn#0ArohUATA`yEEk@D*Kuil9X7aD~FYm#HpF|IZ)edp+Kglck4XdeU`2_8S#SN
zx?uW_OphwbzTc(GU|}i%L^H8$dyFxKVGW$Zpv#x6DRw{f{n+zkkJG$2OqVV553UBE
zo|`Xdl~Q?s<77qK6U5Z>Xv?3V)$s)K?VM?1<a6q35UOrLRO?6C@p7OF&Z<K8=#<*B
z@&~(>3nrguh9#<p+ZjVYw(W88lar5rn~w5la#sz)VK5rj=Ox5SXkFt=tl5J%)5<Nr
zEbz9F^*Fy62sfImRUdD-b{em}%hj7>+bK(bWAr#f!aF`O!HnlVY;z&NX5cvfyl|}M
zmtfmcbDJM6F4fxXlNzexmba1I-aVB*kj1E!3mje^hmv6EQkC)VyIYdks{HOEQzcE@
z$!0V#U%~c#q9fp#BIs0feR`0;_w6+b6vC<t`b)&Wd%;6CAW}OFtkLwgsVS?JdPG=V
z<gJOVCFIa8OHnoJm{c6GX@omKCv@Kv>%_FRJ6!`(l9M)ux&*u^kI&*eT2;ahi1<E0
zL;BnEe0h<|&ZGP4o1@Q2BH5R$PqI6*G3`B(pCP?-Oj*{uu@Ap@3IF=ypI613FIKWl
za=p^Ax?%V=m{{8qWWlK;c#>qiFp6T<KHG}*BjV!uu;y11!s$c30#6ik_mth9f4D`6
zXZ-W>NfCL0p8FR*{`E$}mWU~KRvL<L@E7V~DyhUiyen-*!KzEM$a|@*kB{gp{9<Ny
z_?0*I!rpG&=h@Uya(cwoR56!~mUC`#BcmqE;9nN?&#N*?xE(Q~gzNj!pD+yaPc{M2
zt{A5r{EXg^<FVoI0I)jPEM}k+{iFbcX_QEL&wiG0n9esZB-S)F!&8}hkPEw}e%xIn
zP&K7gBG&gZi-x{i^E^WDowfF;c`Nom^YQ1b%tgPb;~Y%e5HFVcb0B0A80{g4g3Tsb
zd_){8lQ_40%7#SLrtj7sUhsY<mOxIalXxHrQ;ia1@gEU}j-(kA^JOj#;a6!Rpb6$^
zN00I=q@b}4$Mw-|8=*+QEp3PX*WCSU68p$uc2oMvU-n!7r4qu<G+z+sVs}Nm-co2?
z=*zt2Hco1Psf2I_N2HVfs)4u||JoSrHSPsjo~mE~ftKBmOQ<}s^1%$dJ}%O!j7CBK
zHi5r96cdE1vOk`!H2F*T5yXf&E;t=smpzfhNBP5$yX3ES198}2r4x$a1VusQ_|Ho?
zj10n+gyD%hfBnxV|Bs}|2(h?F7p^ne?`1@wr}5c05uj=0N#EVEg3Hd%?}hGU^$$A(
zs}00AqNhEXlxBeVapd`cgA0{3a`~^H#L?<PuDhf5AP4ZsN+!JjPnrBR>eXmr5fOot
zhZUBF_=LJIE?lEkZ+(dUpST!S>RtRE>essxuJs3foW$?x>qetf{htvB<Fx$i42!&}
zA433Sy<(CPzOon9y%Y5t7Mdsk-%v?D2gS<(|BNskosn2uE17thLzR(NA;Njdf@-jF
zzp%i6yEYY}_Y}38Nj7z=EEJghBCDEOz`M>rDM=tF*(QsH2l)H)zo7t4RCHdxp+KA}
znJP2CEZ-dN+S8zqK1|WRJoz+N4@Ngr1C?^nt#IXOC0@;dE04yg&9oYtCTXX*88n_|
zxwNK_uOIj#B5aK<)TAAxjFy)g3^V5B{!?}Sv_gbr;ja~Pa!SQ%uVZ;-d_4V1)MZT!
zpq~$Q=$}b6X|{q@kUhq^G*JkzAIO`+EF&FcCgY`lnET}(ue?0iN`Gm=@3<)!Z$*O7
z$}fe>bG!wPS@#=|ZsiYTtqziti28ENqs~$h5JUTkDXK_x+6d&#kkY0$TZ`SN%B^&7
zGD4MOG+-!B4edB$DAI}Y--k#Bl@2pL@%%%Eg+FLNo=+!eoB24hH~LU!^~1bf7mre-
z?He=2OrR`Z7QG9k|I%6tf!>1s-O!%wNa7{T9N`37r5!}uHRZ_ULB|4o^1o4#e|a{D
zzNpJW=pF{Rk?sG{(0}(Uf8V!%_sclIH&x)j)a)Nm`HwsQzdp2`)Mu&b`i5TL3ylI|
zj%Wn3&VIi&$I74=G!a!n91HzcFia<&PSywZphKHh%{dF_q~HFLBpCxVuMfY8o4FdV
z8V^78J@_a-@oiPE)J2bCi*b$Y0AyV>I_u13vzhjss{rOtmg-1=8lR-!YSw?GR@cFY
zzb1{5+MpR65K@u;)^nBHYqXSOSQwS~;b_392v0=3`o1U+#zNGnGOKJJ!0?S%+eAK{
z5^{!pq$1u)nAU<!@bs{OnPQk_GQElYW_hL3f6V@0zrGTJaqPU<Iq4bHA$)hwAx#tE
zgn9Z%PC5`Qj~eqFMnW$UPIB?~fL@74rWbftiXyzkEJR3ie|>!M##pv!KJr3+&e#|-
z6ysk*Ib>6qJ0x&~zU{T~;sPbQ=nyi=c<Cf*HLdp@dssK>@K*2Ajb3_d$1!UqO8GB~
zhA-axdOlB~+b(lN3BJ=}^hYO-6BMwC5oiJ%M}bGU9Zw?`JuY6_@3xcgIb3kspKrcx
zxbL|SJs734P#yV9GwBHV&)5H)g!|*BtM?o1o8K2e(c7iT{e&<5`@F?HF=eXk4Hq0o
zHv9C9^8eP=fBQfthyas(x(QSH*l$n#Q<q=)J64}G`HsJfke9|bD1k|JWo*I3ghoDr
zg1D(V{=H2b6)|sJe9tgJKk)G+@>=R>v!zbxRJ}Lp_W0nW|3^#XpceWfQ)m)bjK)c8
zzwLN4=Pq$^!fY&riCdR~40{{KqAQ}JpQ`)#N9avI7R^@tN$+4EvpC+yQig)S-ek=;
z&(l2?{Id~>N~?qG`U*}OyiTiO`%c<Q^xsG_A0kPma<=U&p8z0?1GIYGXSQzu3es8c
z_5@)PmYe4x%k&B!R!@l)jZpj44@@7+zwE+P0;H2WAmRQARhYBRRGzLC9vePZUq=G>
zo&o=41%NDB!*Y+x{z64AS0-5|XD1d{=7%)SddFnJ{Kp(gJyGvg&(?g`viiLE(Q7A*
z%1slvTLyM^`BdIFZ`*PQMhjPx)x{U@=LfgCGD!!|40>~QGr#Jqj)(?+i#ENU;-FC@
z{JCGeuhn87n!@cRLcn20W-gJ&-#YVYcR!c2cpC4)DY+qQ2b*(v6x(L0Qkkn{fhx9G
zrBlW6&El3=&|O({o3NJ~aJJZ5on0Zz5YcQduc4&<+PPtEneCqVXckN>(WJaj^Dk)d
z7s~qAuYEW$j#(9wKPfOxq38A!STlEF?8~i+hWZtk?E4{F>~<Z}&;ygV4WZ=sZKEJ*
z7&(uxH5jF>Max?<s(mLHulpk@B~Trp<;aQ;I~&>v7fcgt?}MBm6R$8|?-gA^oN6tV
z?+*~UN8X01+)H5HD#7YACfOK*8Bk?lHW^7dqa9;}#no*dD!1%MD*OhX*u64F9cWb#
z&T|bP#-v;Tr6^0N<Q-j3fdqc;9&GC5OPA#om&a~AZ`L6yZIM_Kg3nGBqTbnTo89Yp
zCsC<7w}|(6@s7BnkD<n|&~>d4keivAk!X!w?4(P^#XsAWKQ!3acyd_gRNE|zrMd6&
zxILX2sy8rPymlo)dPup^-;;=$<3hY~E}k!uW&PdCgBbt^rGw?e>*J}tFg*_E562gs
zi|cG^p}|D(Se`urfTU#=@S9{0_my~3g}v@$U2@e2sTG?)AmX#R;??q+pSVD)(}X0~
znN#}(V@LUnw$LpxAJIYR#WMeI0EqXw>LF=<m-oYMIJ7jT=b1%ezE()*d}ox|1>7W>
z^NCia(GXaHq9ro|sxVwr3^7u2p_w|n?w3f%KZ#;JDP7`ok;xrsRV7va@H9f)6msXh
zwHRt-*9jR`682te9w1SVTeT;Ia8Ell>tuxhykf)h8XnRT{rrIAc5`EN>*AzGa*qu}
zC`z1gmsaXxD<pW^X<&V;a1zd{GJxVvKbPvV3|m?eWSq?}u!=}&Gd@4IGMd9c>iOVR
z2cYhxfh;9AD20Cnvj65?{sO<9FozJQnaWT@kGsY+t74L0<{T^5SXPlDoYrcWx@6t7
z#qDz_zQ&$OZ!Ac7<;Q>fkbQjK-TeVy=n?*Btc39_YQB2^_?uy+aJ38Y;k?D>T<pci
z2r(Mxd-ItT$!q1*=(4uUxMnWLhy~VcXDTYHKBqcOdeIfv6{|Sj(Pbiz58uPxdyf)+
zc0Sx<ZT;vNl_~?Mu4xIY%G3}{h45l|CD9tR66}G>5P{4;f8bu(^O^7?S)~jma48L_
z($e6R?+20F!?2Zw2c=k>|4OL;*UKjYF(JB0_49I~9)L{f_F;~u`v?#>AB#LUhVZF;
zLA6Ovtg>ty=6uiw?Ucu-WML|?BSlz6{i5HCk)0-j=8;XE%Vyp(C>TJt;#GDa@ZLBN
zKqlYi0Z4%ZkipY{hi6Z6&1>k!_aEX&I=5KPKpJt09j^H(%S)T|*bHhP!$^g!u^|(a
zOEd5GYhuDAudm8NlXE|2ijdGJJj_+NZLBC3Ile7i$N*4dOl4{ak0iH@X*pk>D)96k
zXmRXc{>%U(aPBtQYRh=Zq<P%#eJoZKM1TUNAVLTH`0vga0+D3M#&U|LqI9Ek{n1F+
z{;l?*zg64-dd{;^Xcnfj)lx7<7eIzi$VnAr-XQJFsZI4f*Y`X?qZ5uaZ=*$AWnFU;
zD{}|Pu-_Ua;5{5rCww^BtvK&pH#%l;T5Vx7%(&;r4E$B$B0vMY%GapFD^}0{-liY^
zfO_yw`YH*e%t``_Pcvnq9DilORUe#GXKmapw3`LMZM@*Jxzj+xWn#!&RxQ$PpI{!u
zv5lDlX$+Qz2ll<naG+)*D*PP?B7=qzN&Hm9r*5t~VlGG%@xxUDmL{T9RqR^sq?1pd
zr0v<v?V_baG0$=@gZQmdZOl-5%V_@MQ_Fp>SGN0~r2wFK$2jEq8atCWn|7u9wkE!b
z2sLHS$qwVH-i@Ovf*Tj9a;=*eoBfn`Oa$KV2hY{;1Ro5A+-}l(b6%*3)L2f4OqVT@
zF$?m%(nu0GmiJqE2>%|6sr7yayw>zMP~Of1fu1Q<2EW1e5cUxh!AWuI*su_iCRgS5
zVdz9OJiv(rPDr4UM*1IneIoHHO6RZ~Z&#6UjO{+prS}|Q(x}vgBv`4lhak>4@@zSZ
zwgs{&RHw8SR+?|GSO};*Qp)iBR5hhjA>mcZ%V{)uZ@_<ELPJ=A)Z17DNrGheOglLk
zk3=g__5@g~gU1P(B_F>t`6ty_XIzf1$h`vYOeT~GqEYo!vLKA{V8QXLy!o#d1%(-a
z0!0&D<V+(2sj2aR3>*VMrcoAfm?-Fcn6T)wAUfVOiZi}3Ze7c#P$>Q8twJSCEtdwf
z)BcR}%)&XiO}x?AY9<bn-(N(^O0b<*ZK8&lV9n~ocxlh%d!+{RH|SW7xdVF>`Acq7
z<|*`dxAp7cvE%jXlrF8*9M5%UN}F*_nj>t(gjY|dd*E3hIiEo_2tV(ESFXqNrZD8Y
zK~KMmX&y<B58@`G=8)oV90TMvn4t2bi>CV(i~W44BVP8a4`#(WLxyubYosrc?gTxj
zAvgw8Sy#g2vsn8GdraW~-SDi0J|L0a#?0KtL$=v(=m35DP2S%DdQR@za(y!}`g}Dt
zT-u}AW3y2MB%rZMnY*CMnAA3#y;<vF69)k#{r%DTA)dq+xakD6bxR(>lsw_m7=nm(
zWzNGDgH#EvoqovpL^`hLZ@N142TtZ2H13wp0#e{C*f)ohRnvfbZ{3~(4a29nAUHVM
zgv3o7)S6l6|4pLqy#|MOpt``OtMLE>WN9n#yqUA*-VO5COvgFIS{=Z@FQJwCO<?s(
z4>}W2R4dq)TivM#=TPZ5R3Dke{2HChhe&p0k}#ATlC7`lXNuOngCK@33Digmv0yx9
z2&}&CjeZ1>wF$WwcvN!F!q^vRFeY*(WA&x$E;?+$oWJ0fiet31;;S{d2PB3a<%@ft
z2Kwy$^74>aU@LXzyIz!6B$1G_CiN3PJ|HfdT9C4J^PHpyDHyU>r@s&UMA_tgd|qGY
zm^&P1#g{vp`%aqD(*JrVh||6wM_}{3nE7JNKy@Tr5rO896k$Ps<l%u9c!kL<e|$3t
zG$UOVC$~A)A$s$`%Q%YeE>5y{?+c&lOuQg1(rIA-e1tA#RANr2nNwc!x%IPk;|4F$
zJJ&wJn;W40@Q-(R^g8En8uOknHXhj<2~GzrnB&t_H4n*c1m;>S(3e#&xDDxb>6?2u
z@rWS%GgB&p`gT@A<Dzn<-8av=@qPMQt@;yDOa5O3)L-s80u|x%Id9?A*Owptvs_E&
zZIvQN+ev<L3k^Rq4jt`}AYAq$$VC2HJwqNZ>CGmqBUv%<OraaBveE2~(g#`HZcC<M
zZY?Kq&zDq%TxH2tCQbn8_yQI5@r7BTAtk%Tsasn=Iv#%QDd>B8}HO${^EN2Cq0
z&Bmhj<cgL16yYbc{W{}laek8?O*d47eG6zF^0mWHo7-~9p@^I7Z-Ro`5I6S-+u~}P
zaHW!q;w7tsh?kBan6$1|0P+R>AEOnVKFy96MGT8<KTAciTqEybMayojS|yA_0C(OC
zK+CUGnl@HXVbjbq5Vm2uq#(EMsYy;+uXAbUwaR|;90Ze27ASYwez{}s1f{gysmGEQ
zlsQ}3j?bujKazPLr_eCcyTqz+4`t9tbAeV&HAkI^LK|_Cc;(C|K}?NBJp_}HSz)P7
zr1hdnwWbv2`EX7T1?)S8{%O(Vq^;!e1E8oja9JE0Cn(b0NJB%}Hzf@Wc2#c$=1H_n
z<O8NiydJAjNfP<{Kdc_o=;u)w|5rZf*Uffy=uBRdk?9awI>?&3)UViNA6YTm5C_R2
z6i-R%w4|~dYg8l%3o6)E$?t@iDbcc~vS*HES#luUIE;L#EEPkJ>qFOhHnharl_}%x
zqKEW)-<nU)a_GU$U>Mn`HC9JS8UCC9{Ety^Q+_P87i+`8!otec{?_O<I1JrnpXXV!
z{wsl@M0H=J(c@PMn+TQ}Ddl3meMJ9>!+vMj23MmbS~#A&NzqKY6=cEG6}&ss-+*&L
zc>53zBiT;k)8&sQ(>TLrXe_sMZnu|ZG=5Mdvqc&h3~P{Q*1$CqZQ(Hz_B)0QGC)Hf
zC_b#d()ko%*~d7-J~%0|WK8bo&I`lG`B1j=$DftnD_+^S`!PSkSXWKiZH^~%Vg&1t
z?@`X)f1x*7Hy|>F{qN)(<k6c*5SHShqluIdt8Zr6H8zG;9hzB0o_#s+>?s0$5vy<F
zU8A;ZI(<fYG}S98=(~}OXV2S6Pw*D_T=p4v6O!o9I=L%S+0b-5_v?ie)_%q?^`oN@
zgYZR?kvSy4T?oj0GGMrUh_EqWCEHnjD8!Hk8C{kPiNa#b_2Yg)5gC>$jNkt;R5XN;
zk;4pukjnP-P^}Ksbz%JXa=&r6e=|_8upq2jHw+))?-tp=;cQ>&FA}+wa@6a85z3#6
z)9w2L(l#9H&@$%tOwbn?O&H7%^KMl)PFJS=(d{M&Ee(b<7XlJ>_<7F_25FW;^>6c*
zX5BVPvmbvv17fe5;(d(<mkk!!r{w$Mwb2s^*>G68zCrR4B;2kdzj@8oc;T<uXG{wo
zj!i^DPP2T;J99DZKcnGM2gCJbT}W@=<+X69IZv8C_w~QjTgc*N^MiRC@s`;cAEzaG
z*^9dt>t^}-l%EYIESEUWz<+&%{o=GAm?e=T+PT-fpOW~>tc6F86p|6K`Y=cO-z5|9
z+UL?&<*UR)sowZJ>$Fmy1A>p-eOxff*(;Uf1WNZ0yeU%g^J6@e<L-qJv-kpY_-eJ0
zn=gh|^f#~cx8#JOF@CXhzQT&Azg6VF?Rb{F-D>#oV;0K)_SFx8!tRYj@{$qGDpxH8
zi!^Q*ws+G5RDAjI)^f3J*QF76MhlXU_E!4yv2MxUkw^Z|QLkegC2OKkJ1dQTT&q`l
zjVhuO(@capGzp_qw931>+;4Yk{`YwygR+6%+tXP~W#<#9!rIQ0xsw{@Pcu@5MvVF>
zN7d4pGuXC=SSgtIC>if=tM*9}ckTsWt*-&HcI+pdD+~8sZYSfwUN;7L%uUjh?Lq89
zhTC&0casbLWK7x^waM~k+b4a+TFKaiYYOkyuj2PR*tGbyjDDD-{gd&YZWkw`G5sO3
zzx3JH7yozBODh6Oi}QJL-^{naV^=a1XwMy4yi>K<|4p&~jB-VOm46m=)Bb;;lJ$OB
zH;QYC^#3jMOiajEKk3K(j==fbT*-X?FZle|SAAUH*xrok@pv7&HYddwA7wdEtD+12
z9}IYe5+aI+gRkt9(`UeucI~lhtEH7d=>Ik-A5Cf9=hjCycB5Jjvt_lSnUc6b5qI}S
z2JQM!+rvrBe6=SrjaLVK$}>7|{g817s1E2i+Mm9$nLQY)-}XwIjn%ha)|Vv5Fes0U
zI&R#_jkUE?%F74E(^AK4yGiAF=Hvf7E}q9J2`$+#RuYdas`Hp3NfZEvF?=rnAg$NT
z=WNBy`1xIUjQf5+S$iydGxmSsBL3#@$&xYZck=FYMT0>QkG^AcJ06=@BIJIVz;Azo
zb^fR-)b2JRdAI0Tv-?^V=)O$r;e0Yob&_hgyGq^w-iF^C^AR22*|yl+>{qe}hk^;w
zP>K&z@GlHIQv9NrA>HI7ne7jtvzAnk<jc7}QPbZQ9d)x6RGqYHkYYOxE!i$OjW;{7
zyBVy$x*ZLW*EdrMv*fdPkqqnZ0{m`?{$=UH4;ZfCjXJTv_)gz`tUSqHrlZhUHe<UD
z<4~48FX3g6`<M8C@}6f+Wo<OL;C&{S%a$oxo5|`-Y+5zpWrE8v6!-f#F2q_^<Czy?
zBuDkglraGiB6LXCsz6rdQh6azncHnW=i&U(760}J0vM?fE_7e&Kj=_>BkeGYcnHhB
z7*Q$uI|GB<hwx#M6n?o7!ZB)e6e*S-w6`^(m!%QScndTW=FbPDM#ZcshT^cRbS%$k
zGgX-kUAcBVv_b;tPl<{u6#@C#bE*@wkb!Ad-xU={K{#}B<eew$5^FXqwhf>MI8iEG
zO|@=U%?)0QGdC>mqv#X#?belEStBvK@gwFN8OcGeVI$buI)eZrNSsX38<q5LT0y1{
z<MsM2zjBUIP@fo<7@fE*E6zU!uxSpxMIRT>ZZhJ^;_BV!U-=>oik(5DQe=Pikz>ES
z94@ulW;*NbGNdI>YH6{QbZi*Np=U`g0Fe)fDrUrScXuEky*U7y>6?}B#XkWztetBZ
z-Dpr!F)|KY%@fZo(I2RkxujdH-&AajUp)kG6_2)E_sv8<f%&P6RZE9sNCDrO<JNW!
zv@m#0Tyg$(_J8O?3dW6G&NvT+$3;*Mb$~1_ZKY{aU#gvPo-A$yo$@BOWfSkR<=~rZ
zDp`y|p7%LwOnb~te+Op}P$To2Db|=Vxsh`ZlJ*e1lXWEIU_V@L(4bSkZUHKbZJsPv
z3}SPtL}N24O6CD%Bbj?V-xewvL$Hs;(pH7+?4AYf_1-|D<h9d4*~DgaxKR9P8zE&`
zEBnVZ(#sN0zCHT6Mx*3rcGrZ3>af&>!IC|P#hoNI?h}_i;1UiSy%R{{DR6_vaN@2l
z$FAx-$?aO2YZZzr>A?g~m&M<P`p1X$pmxJZhl}6`sO-!4MOfU5cAvgP=V4=#uHl7G
zah%R{i@B058Bsn+qNq!~K&u$EBR+(=iS_$%z3S<xQaf5JNk)SqiGv?>Zx%5j=%*pn
zRC6%7Y&!MQ`?1NjN$C+%raI4b5Gh=Oup5*(Dh?!9)gNSMr=q4FxY-lxiyjp+Hts*x
z^2jmVi#m3&5cx$y9*pop@-+O<H!p4-w#+Rn;#BkGQ_V_X65o0FXi&dKYyzVGsQtxF
z>f4|ZP(Y%1VQCG>d79}$3fe>uDYAD)m6prZb1Y>BGiYq^3;pWfc#Yh_4I<aaa|acE
zb6K##xl^}{kQ(H^(!m5)tGt-9j{rK|DIC#Fgw5a-y8mP$5#nD^FOd3lWmBcFHt}`5
z&AxLqf}vhLkyt)5`EX9rquH!kU{ryWfH1Lw>12B-#CUE}a@ZGow9n;oQ~3Di?uiW)
z`);j+UggC|(_-IOAhnO=t9IZgME7gHDIDv^!)cI`N48oS?-SEq+WXoW)B3BU8j-h{
z0a(y0A&=TcD#iM;%NlZY{(&wQV~kZuue;x&kY?(3e9ZzNv4`9kCP3h|=48LYccItJ
z(Gqup<U+Df;z98N!xTDAKu)V#EQ!jp<abY2mhh@|K3=)kD1Jydn80^q&2z}PpF_>n
z_9j0-I51sHBbD-q1D;Z4`llF^sPu@}rOAUQ7r6a?#PbBw+e0ZdN73_cbSN9Ls_V^h
z4{<0EvJr)`WIq^!AX^I{^&J1Se0^cEt~EOz>cne3|GnirL;*3htP%?97g!(TaL;;;
z)l2keRpEu&rW73E%4oE(==s)bmi8!stN_wZyQ!k!zIDm@K(nZW3}$97Wc;rdFGa~c
z*ux~buxgo{C_1J!GEL>8`p|u_sSR?~;59F7d6y6xcy(Q1&ZN5y_<~ojWHSBjbbZaJ
zr)F^6lPJ={4fI(wQl^oYL_9e4hksVHVe?f3n(a95I}(14+MeBKs(8B3gz}j_-BW;$
z7VZ~edc~V=e+T*58f+XKh@;;CBqJ_yl`1F4{YtHN_3Lh1JgodQ5KDb$JQhn)hJtUJ
zCQgNxTb9XzXRh<>tah*LPi5-EWs+B)r;HwxP{rMro85Bd7N%`G8RCL@wP+y>AI`#v
za(DPQn3TF7)5Z>qBZ&>{JA+H;i)*Sj1+XvEywpMM(#7*m7Ji>&&)@%|@?c&C|5fqw
z7xiC^pbOnVLY-eBDITU<3-qbclBE@jqBuuCuO)kotzmd=R&r_<;2hks{R8*`(9`+d
z)f!9l*blgXskzW9Hu-6{1j;YiUHrXE{delF{&9s(*jWUf;OgE;gx91L*9R%L3Y#hG
zpjctXk$?y2+vjz`F$pJTVSaKkL)oZ4AUFCOs&-a(0vz7=D|p>Hl*ccc#6@%*c4e-!
zhs*?o_EU_y>g!i`yTd|AdSN9@E*+d1HJ??p{KHpCc4J}%LY14;=Ok*ypw9rhxXTz9
zH;?jz$tS3BY(_HhoOQBuifrCoBpqiM*BdSLSA<g+3hRl=WqMk3<<{9I)FX}B<LXSN
zGd-#45%Aublz1XX7F0>sGc&@CP74*6j-H}tSk(;SesaA`#D-kUeAy%Tz~_)2f{~_-
z8Wit?mz1RoKlcXmAWInD&e{|BXSqUDTpTr!w3Sa&x#Z+mR3vy5mEQBOvj8$!{=Mo+
z|3e0bSDIeh=k1O)A2HlCsAw}AkL~Vb6roIJ;e#FBb5HBTl7P{;{D58KMI754A0-YI
zoV~ImxwAQsc;)&q6SCaiVV}bX)CDO$*lDUhOR?0(R53J-&9)Tka~`}AasHP^&s8z7
zvUve0-&@n_14mBi^hb~$p#@e%bj*wT+EP~w&Q#W`)9TVBQ?t=ow@k0QK*V{T5T!oS
zw|qq#=8<8GqSjrsgy$G57X`sJ-1avC{Ep!4^HZhA9KC3`T9ip;%)xlj>DwmDe*1>4
z<7Ce`b$0YaAu{D6m0DwbvyGNF3hpj}(XGa8WADGWwZbVX7X#$*ub%MQC<xbPD?ut|
zGiM@o4=H;NXWB{6?ECv%CXJ<w*H(OnYl|yO0K&E)tJe<?OE6NYY@$7$R~5qq*vPSv
zdCkPk=Kr@K>O;Z{<Gf(qH`1K42~Ir<F#FH#Fy2bC0eLQ1>Y!o0$|@A<q+oUGBvg71
z(m+osiUsyf^K9)Me%o`hWp_BNpIR&!RtWdEHbk}FojurhGA;P?Wxl-66lr<9G@|Gu
z!B@7W^L7@(x3}Cp2+6M-*+nHYh969;-V{yxGcDm73?f?U_&Gb@DSq?X-=5!pRpckS
z(qDytKJnOJ9ELcC3=$)+k{SiV+JcZ1Q+E%%EO_RT6AwN#gWm|v`&!9m91xxAzzL%M
z{7wc$GPup36NkH~jhH&BMWS^enqY0^lTAFtqcKS4n|+Kj^PmG^T`t+oQdJg{)b43~
zo8km^^l7|8OB5@lIQcfgPFuB=ay!R)wupWNIrFVec;x}R<j@C1V;9oBh6Xrj!1=%u
zNd!++!<RXq>9ii0J>Rt|^#Ux0u_&)jqCaIF=MsY&B}|0L{TF}V6TIP}8DvArC7MeY
z8#09pK|}GpR~AhcR<{dRHI>Fk1mep-vwL%F*F=u(+3oHw%?yf5RVkab5(`&U539>s
z9d8GM9d(MQxK$?;tN$YJ!>Kv{;_j{OcMh67M&5db9nRKHxbDdBv6<-bxGLm3v(X#X
z{)$T=e5|_*aem<2Hb68hsTW=LS_12e-Nq-}gIk&}E0!q#0c<O%J_;ME2}UN~*n2S^
zj`bfc*a^PHh`v`^GyNIpxydNDEX^%5!f7?-DO<_-qk;fyWGY|poM2C#TU>Y>wVPCQ
zhhn$Da{4Sxq0EUVa)m58s8ZgpIgB^kP7wEQ#2=MiZ^gK}nBG0)MZN@-Z1q&T9fCdN
z#OEf~`DRQjrVmSj+?NW?Gj<{G`R0w~Z3H6P;OyHr+R@a`$=BbFUGLcXG6>IEEuXtT
zVbo60p?HloU;?6pS`Ak@%#ibnGfzA$8k9h$7HG4>Us@x_T+r5E^M>IYG$7U1PCLJf
z7k-W<E+4QZvdLUFiO~4^B=p^WMnhYf8(J?)QrT*uP<ygCW9F;1&}%3M{!f>FqIp<&
zriTi;JW}r>q=2(fA39z(Cqw#IByP-742NIF_m!0bE93Y|O{Or49XYspk7QJACoM2G
zW>CPVZkgIM^?M^fO!Q?IcWw~u{5f3cBq<i}Oq=57LFo$blg??;>a>*b1ua-UWct|(
ziB{KqTtIaSIpeYGnWxtz+yaJFDR(Q(@7?g2hr~)k(sY0{?zia6w*}R`*Qzs1Svu^G
z@Jp@c7|SO5sF3_e6_pUXBp}W4F>(5+RNK=$W6}A9(YgXRO)?JMKa8Wcm5p8bIMhcp
z2Sa6VR;$op+KIq_L6z@Fys_+#B+*32T{;b36zCs<KZ$rbL^(P*_3komK$$*qPJFS#
z!2_RDznbF*AF!ljb|2*=RlUv_NWFb~z7;!>Z}nUl4Jm&z>|4z|Uo+_n#o?KDP6%Uw
z($KM{T%fA?XQ%<e^sX2;FLy#&8zx_!S$BKuQ_!QH(RCx2`--Z@ucjY%<GO;G-wjKU
zd<cnULMF=yA1wsAD5w&@`(}&%)x<;;NhN}+L7OC8UsZTnjP{vyM0(=}eh;i*LyWB*
zazovH5#f!{o4)h`jf@N0nB(BR%7QZCjEUhw&{*EQqmXZ=__RLP5q<YUCIh~mQTKBf
zl_BQ5##aX?!)CbKrpQmXv69~YTrL!?7&}%GBuI(FQcOFTm71B<t`vUyb3TB>zP{^#
zF%@P*D&buh3m+v=xfxxU`<M%P*rVig;6}lI&uyoc?dx1!F|>29E;p}k|5rjj`dT#;
zBs;1dc^x-<53nO^8mA};V?dNG^eli_43)4d7$vYRibcvQRy^@(n()g83DNxa`))C%
zpQ@AIFV1D`j@dJg8L+9!en_V^+NNb5TXByE;Ri>CfR%4baDT{dMZ0!y5zU*_MjyH}
z%fN3w+a(6@k=9;!IB~Y-r@NeIoM?<mwCFg-nYR{>nh8{SaRrF3?^X-_+zhZgfO#D4
ztfkWIHL;C^wCzH;6(TzcqZ+k3Th#hODXt3N`lS&}p;Sk_hz@V2Fq!Q9IY;PRIeg_+
z<{h)yy*cx`L493NZs%IaMXQuHJy=$>(`sfioAvIFY5vIXdd%r<fP*Jh1N6D76Xu?2
znnj3xZ#ucQyNK1yPrPiM{K+*~kbI`l9XJy%%+ZmH^o|Wo1qvsDQc?J9bqz2H5$)U9
zDfQo>dSwRv60)1J*3d80x%WDijd(#m^l24q<KZ9qua31!$uLqrCM@qX6dL4XIGK7#
z`fL28;(l|oP>Jol<+%@(+LQwcH<mCP$Hi66E*mbzyMCVkPXg>L{vBnr;33YhvMl?h
z+;>uT%{w`se)c`iro>Io5xClJBiP9FBSEe6W|Ra#tC@;YDk^?z@XAxfryld%P6*3g
zv{hVjw*|?38Lm%PSa|F$mCEOp_Bg(oND`VM*hq``A}=CiI$czV0(=yR;fyxtEK-+d
zqq$1uL|?h6|5Ts`rLiVJ-MdQ5>(JBn%$V-|?lWX7cVYvZg9vqow<Z2Rfj#82zGMcL
z%E;>5m4+MpCoJlOFe*LhadR~4Ey$*rRg-DY7<E#Qi3k0@nf^PV<chET5M*N$VyY%#
zc%_bAn(a`{U5^&-QL2AsA}!zdbd41<AoG3_n$L5VoEGdko8xQ$kTu%(mN48N1QZI;
zMh+i)OWCt$ZD2go-t#>w$kWgW9mCN0h}}5EpbIDRX2e+tj*$8<iuTcexdHZp>-xNt
zkT5oNzMLyWl%^Y2!1AIHD$7;QU`^$rU6wJ9?|a(IkD0{qtj|m3tiXgwkjpxDGBtN9
z-;vqKd<Rf=D$TUCmq7uXQ``4ZWnXvJ>H0E%%%e}$7H(L;h*L2BkO06B_LA#hMZmv(
z@r2L#rMXvjkZNZ0K%Tbf1>vzTBRj9&IDx@ubhpQ`9?w&Eyk@~px4ai_k2PAD(J5UY
zN%yxj`>cPq8extkg-n|oM^8az4?ANwz}9+wHF{B>0{g_TDy%J+Bgj7b*zGPiQyIte
z%favLMf`H`)!ccFpcxBWxmKu!CmY5T`}&Wg?Ny6NxO?$^6Be!PXyGgvfCFgT{Z7#L
zg5q=@rKnYu4xl2Z(6PdzyhCJJ`pZ*f6ST+zov|(c@zJ5dH4ODx_!Df=drsXMe^Wa|
zrqyXJv1>IA^g69#P{?J|%@AJb)Acvvqt%$kpA-&@QPc6l9r=2vj+glQQ-0M)&Pv2#
z6}A3nR$oR-{3M_Jp7vIP)Vaq`!KH5nFSLKCF~jW0w5uCg!V|;OY?w4bAw>z}!Dzd0
z{K%+DCpq=q#bEfw0m#2yk$OvtbzHZ;#P)7+S^|<Kq~bK6ihvXo2~2Jc#><m<Ri@h=
zE>+FgZWm-{2*39n5Ti8|(>bb!l+9_+W|ls1Txl3wh4700l~Gh315YyC4{kJ2)XkQ$
zU&-rjX8>y5Z(<m<;Ji7%`9i{r!KkZcozQQe^eYUYZ<>bwkrDU>NM2b$-`q|iaP8)}
z=IMwgs5QR06=PZB=rzwz4Ce>xNZ}4igDT=kdj_x%Mg4fVrM`5%Ko4sof^Ewnekgf@
zaRTUySfK1*RZUNDoQvl)rYW`|9gS4SWsmrAGl$G}kraox5Jc-+Mdo7})9AT8hb2&5
zMoG!rm!`2+vdYXu-;s6&&fiGVl!3n$ygJ<XW7A4i+>MbWHhe6+C5Xu9(g(;HA2hh!
z^ws2kWZdNrKpc#nwx2aQ71x&3REEzZC&;#XOm!mbA7stm!rR#nKoZ&BA~7Lm|4gOz
z;dS;(oeSRq75n+{Gt+Cx;g+?ZX6Pb46AU$t<J1jgMOvXA{lYq-W+u)Z`FlQ<h5IXM
z(noyHF2y$;93Coqxd)#qstVcHy)8QPo;~%)nOMEbj$2(|1s@!tQ^}ggCBl|n?I30E
z0gRK2UP`i?5nt&}F15A1=+<sErNUYOE1?^7v9T@b;T4-Dl%Nh1<a|KPGq-wXe(DuF
zq22u^2Zc6N&`#S>WBv2f`@19T67Bc5t}*v0Yty~{W4gNlUwf;SBbfL1$x&+zyD&7+
zg1Bk#Z=<;v$rqiMg4i&PSAMRf?w%-kr<2l=WRAYd-ugHXOCw=Wf|@DsLUDdor_U;Q
z;xXk{g_z<moYyk2fC}gx1$hlb3iD$Wd}_$jahP<=j=t{@f#h62gu9~qKu#CAHoctk
zKxS{MosBHku1X?i9WZ2t@+39-pDe*2r8NPZP_hg)-%tYsfIL`^PJK{BB++3Jsx)7%
z8M(L()|avg#kAyw3<RYyPh)P;s>%cUv<j36cLrU8_|3QP0|?3F>(n*$7`xmEQ%-t)
zZz(LiGB~IK>0%AM0j-A#bF9&YbjEB~tfNTWt$UkYtl2m$d+^WEYvr^-fax)Q8E$kl
zExGF1s2za5dnO6M@t|&^+$y!HJ=V6HQ`8@Db5PPf9^!Dai@^2;K$5U(@CGU64SX}b
zuRRFBa=EACx=B*QYh1$Hene@T_{|HwYFlH1)&^#0M0C1YEhoPN?Q+L{_9D>bzJ9wG
z$`Gj8HIrbpp-B%M5cY~s+@bG5*JFgdDn`mQ`$4Y<5uHT-V<5GJW?kF8?GRp`^AB^*
zkXuDhG{?GmG<ZYgkQ%RL0lzzM^}!*I^O<bVL^tcuu2LbKlUkqSg^UZ5YKO%ljfwsn
zhIY;p4T0-qLpNO6`Xl!`tO2(iK~WL2i5S@Ih~C+fVD5(K^fnw|#TLFG7W~%>F+41%
z#RVjYNaO60{ssIHRy~mZv5o5wUYF>Vsh?Cc&`dk}F5$2{3%4+6gJs}}<A~!z!36xa
zxvP7qt|uX+=$pfhzF`XhV1`cw-6%HOB1!UosRP(t+pUO?nn26oUjP%v(N{Lfk1AyF
z10{=&8;#OCb{iHs;jlQc_Opl=e2DAPcTcxImoJNG-8L{$5)Hh3t0pWe{die#UE^WS
z>DM-QrGk&d^?JN%Z2}DKx(}il7BYFZ%=ddXZQ*xi+kFol*+&UTaopf{?)Xa3gh|>r
zbxbVW>dmUXDIJFnzPgyfjKdP@^td|yn5~CE&~wtukR@sSy0<Slp=sJOgapg%+BGJ6
z?NPhP3&Se-cay>UNOb_YjOWkVJwuM4M@vXg><WlduW90lShopTL9wN^UHu+>=TEYj
zx6?^B?})8|wrx1hauWCS4>gG4sDf}gWQG<=Y-_a^`*xTTr|ghE22ck#1O_s&4}q7Y
ztL+m<2V5E6H{2DtDPidXM=}4Z<eUk|v;R+XA@}JAfg5AGx}&WpHrGzu4=Z>v9Q}F&
zc@osvhB+?mjw5a2EJCCTxB?~>D%yue!ymzsOkoDkyj?T;ajre5&K%+bOI{W-QCmh+
zr<WYUB-<sM-Hx)~-KWoia9TUrIoOwvKjk)URuoMf^y*sC^J&9SQ&-h^HKKT6@t64T
zxl_pRhZJ)NMc|+M*GNF6!9x@-A!xh%39=eLc((Rc4(G$<Xr6$Af$09dAnZu$at{W>
z{mw8u?sVk*DFfF{Qc+B2(=SiYB(lgvE^P;7&{3eU`87uLduZNw&0E=aX+gNI8%aMc
z)U2v;kQ$bWvqo^8q~VZGDe>qbRAK%VH0LX3qe3)p-WmaVo~N%z_!V29HbO>=#PMea
zWQ&jbCLmq*xFb*md^Je_AL`yRDz0Vg8pd6N1-ArucL|!{!Gk*lcXxLP!KJa_?(P;m
zxVt+v-pJRv=iK|A+~*nJ`}-YZ|JdDokKVnjYSo%+ty!}eOLV58>!T)uuxOj$f5m=@
z)2gV!H0M@K=pLrH!hchS$-nC|un<F{3<t10>MAwOS`H(Q2B5ZMlmAWn3xs%iVgIAF
z0fE@}4T37l2}-Fmo%A5!F2ikc3Ss||Deb%Q1m1G`L$U4yhI8nx^x^QE%nS}FsvRwT
zhfl-=>|?VW_!1=X!`)2IMNbDxcDJiMYqxIUnS#MFA(O%GS^%wvOq@|^(FU|{?>CI?
zmNP#h;A`9{5wtk}=u_gsf2kg#A*}bVct1fI&gMLRfNQuGgCeD^>K<V!p@jLaT7U30
z;p!tzpq1w}P>JZ#`1QSAT4v~r)A`Y0ao*!8%)zwfVW@tM-8W+C{#NSAhG9y4JxCeE
z8CsywBT$4|cv<P8h6gdGMWyUsw7sTdn9=>BH`DnO7(xBokMGlP=|z1`lQJDZOlh3H
z^-7fBo1m2{bP-R>Yb0*6oiSJ;eDGM$h^^xjO{UcK2R^P{4b6iV_e<z)HTCu}iz@Ht
zn~R$Uy$>6AaC-MMeUk9OUPn$)-ms2%dLmA7kmtKlz@^KRb?MhedAP{9zZ}b7OVj*b
zAdLTq0YLKL%7%8ylA3A<wL$Tix+kVS)V5zmPuo|_a`9;%UiNs(a;f&j<6)($nT0Up
z5XXnZ5@($n8kQ1Yc`a_sgvN*7(wSkySx#dFS|=(=>0O+h77Z(IN9d18#L>b%bS5yA
zM#RmNxOW|a!kw#VVN$=+2nY2V{dhA@Y%d{*8f}j~aEW)aSc(7gLGl4YHp@54=iqJ9
zU~H{0m5~V+5%s_{@Z|_@SgI3CDGOJ6fFbaN)=u%E2}$3MDWypH0=x#qi0Ex2^fS_4
z25sca`Nh;}T~;!|_7({15#EeWWS}C75zaeNf{hu9C^HR<H3q!Rj(CZYwlDO&AdisW
ze6uSvkEtB~_hTf%d)^7Sp<?Cxg~g1?&)|l>J!p;hL7QV+TGAg5TCOM_{20|>bc_FO
zOFv5Nkg@_`7>9npXf+Fu8#6Fb5@_)^I|4x*Ob!xp(f~^b{R7TvifvHHLhbek)ipv(
z-l1DY*F2}JXc=-!0PQ<SO(M%5ObJGyzEYO2-%Vev^RbgwcSuP#j@c`G{W3gi3`JG#
zA!-Ub96h&UJrP~}tjwc<vY0Sg=>4laKLBp%Xc!CCe+Mi3oOcLYL;#L8%^o(RQT&L-
z>h5r-Yx{6aNf#4`OW|%Q4b?f*VRk@klLoO}?>IbjvVYgFB#W*F^=8<xY$+#8q4o3t
zn%|jg7LlrynKabO#Ld1fQ@OiDvR<ISJ~LC^nE4})`;>vvvQni-T+6Itg~QxNU6yKS
zynK&~pR`v(F<B%VMc=RDddzKe0v6cP9lQ4NZb3H4@$wFDaXU|V^rBisK;}MN@--u-
z|6{4XqtMDevRC5<Klh4@w`F~DWP{Na{+3hJ>CG^sDsu7pikaw};hvxOuu3C_e>bij
zR+laMUup~gVG{dmyy=38<HNc&$Xzk|OoCPRo`qEf&BRUm>EvAtC+RNz^Fl?beS~P^
zAo4jF>J}c}lPfMO2!h`Q*(B61W8f-5SgskdLX~be2N)**CH0}(aOee+U%}{GUAeHP
z&Y@9~sG2iCd}Ux17^5tqE2WD?lwK`7v#-00W9I{N(@woE0fK-Z@}!wpr7Zo%sMY^N
z!!6IINP9L2sCbSN!|P#)O1f#`BrB9<HU|8p;3xP9D$7>)kuEgXQn%tjRzpNVNmU$R
zO&PZvQP%c;h4hdo#c38=WaG*?kbHF}q-Q9(AZA-SjOg&Ra^0=VI#)AHCZvs#_rySr
zxwkujYu_)!Y3_`b8;n|t<s)PrckVrqX}VF*ImOLsAgx%<X)sHaT4sGfvF965TZaBo
z{jH2=_~v}=NW@`|t;&wWM~ztaZx)yc8JIu@liSPo2hS&r%|E=tHSc>5%$B_@j5`mE
zT?HEMXx>OTlii1uf|Q3a!K*Qc=o|%N@_mtkUL`+Z@Q`p=o^?bRAL|O_zJ$G3K-3^v
zGAZAS+?AJwhuHUGLF)KdJVPzWme1M82tis3Sj{V~Z?49fsjujWI3C!CP058~_~brI
zRNRoi(p{1C#lwY8<OyD!VKb*<f#ViZ{@L;e^Fmcj_19Va0<F8zhNNEkK<=%$<`R;H
z((x+gqRJS?Sii+hJxC_b{=pK09o5i_OrDvX9hVSynksiFlJ)KorpaP@zb81^Afdn>
z*rtk?8~X3Tut#kl9M`*U9a-J*?woY;Vi>x<MZD%160?|}$Uwez7W09E``9S&2w!-!
zg0w}3`7uVm6$>?{iFs*bNn?%snsM{Gws3!@R*ODlsl4iF`wNKR)D_)D_>qOK%*Fe_
z(aisc4X_{v2|)e@K;FHCY82&t+slsyaDR?>eS7+Jq_cM}5OoPG{Pyto#IYgjH;#`^
zB=B$l`LAyAFMIEY2$D=`n+`t|(`jGx_kW_|`IIwiBoY1IH(1z`+!2y02TkAM(6toh
zmOI_wF*9ZQuEnZ1e^+dp#XXgR4Pw94t_Wz%x&<?%r(YiFWVw?pslAJ5wZ5ymcgOMb
zCjW|KhB1(oO!%voc_ZG^Y2Y>W5C}1ml$PEhk<R}~FN|t~CeVn;ILrU(eVDBRIEcA=
zp3;el;^&Wy3XO8*35(i_BaG#9wd60R(A)IlI?9Z!N#$X)MuZ2+u8}(i3!xr;r}g{T
zbw0VbJH2W55$6F{p4#{)O<j4ZbA{v@5m-A=ZR#EQGs|$?cXl3^Q<op{PPi>iYd>>J
z71lC?%168`M3>bmUOz8PPpG@A?bDL-R1V~>nig*I9L6s$?#;iu{=teCJ1f}~%az+h
zL=>2yQyRZ=^Gl6f8{VVb*V0=r9EqoVUyWp2JfD~ERGx#!aFnY%>kIzfXR2vC)N6@7
z&NbMJ4+q|j(A*m$-whVn;b%@m1L)b3-|9wzyVa*p%D;MS)AL@0ey<c&&wX?0RHiT$
zK503wBBA<Q->A#ruSXQbxr%94F4RFDy@PXhbZ=`vRv&CO=x-m~_M@gR*{uy_|8@@D
z2s_ABT@HU39GU+CW>>xer@1gZn55|H1}979EFQyWBan5`xO4o@s{q`1P(vzG!?b;o
z<xnZz0*L{x>DjW>sFnsFPKfiJ>6yf9@Lt;kzyh0EJygzd8iDe+rP?pK72cqPvS9xy
zepdT0^2YCsnL-2bepT?%O?vZ2-@0tN71s_W7oZKTqkvVfjhXr{kzneXhqu6Zwdzz;
zHsiqyicc;d%jdw#Z#<v~M2_|@+EwKV^8H<o717ls?8Mz3StaFFU36S?Zj65apy0(D
zt3YeB$xd60{a+26{Uzx^9o^k^I1iL@>ga>}2UuR1-_Q@C!>D_P=pTcsOcELn>9b#-
ze+{nNVUEiA2b%L$_!}?4petI`xsx8f>utd6TJE*qmr<v2H>Xt4n!h=z2ob`Oe2p-e
zstZX?C6zCGhk2i)k8q@)Asj4gw%~;5m(inZ<jjJ=!s(d8RD<hG7O|^AF`u)RY%X?A
z9BVz=wSh~MvWhs*y%U9~*@r_WjB*7VYm}?z>;^XQwwoZwEFx;6%mss6{wX4R+vkAe
zheS4tUVBE=Gx96+0d`WRprockU6I!JcKYPGW(VQR4g_cLQJ@8tDE@($!*}x_zc+Tm
z4I-<9ig{dDIb4nijU8VF!YxtfpDKgxJ2y$%kmq-`7a+tbszft)6t;Q%+o(Ae#NI7A
zTMR!5KR(hg$pjL+<~W6Y+Q=%MAqFTEa70Dkn@Lx>WX4omus=GHSH!EnJ(r5lg2)<F
z);3zh+JEh4wN4(ADrn;jv0hfJm{%;=Fn(gAHGo50c;|OqmD_kQOze1J0_Dme7DHCG
zf54ST%8&laIMO%~VF5rRS+}g~-0kAybthPl>Op9YeM)b3c@clE7{dsHIP?4;P8aiS
z=tpw~iNmmuJ-i=NWNViE!+*R<@v>U#uV3od*<oI1^n;;-JpN$z!7QG0&WjzjQbv>K
z4U@mDqpU3cd7S^e5fT-wYj?qi6c$@J&%M+ueES}!vTHv%IhOd_nSQ%Hw?ufsV4eL<
zsj%BdzAeGYdX&rS{HUKEr2!~h`$nH~v8>9Fnxj)Vp1@i|j{4NH7o<*8ST{_eFL36^
z7+dR#zFbQmr^BUZ7y^C{Ijf<@Z$!Mz1{sM1HY!7m;7>?Gxzy87Wk0mioTDr+QB=~L
zECsot3E;?Vq=?Q3gsSkclO<Lr49UIZ`-UjPbcQ)^-MCz9X`Xf|5e|B=oGi~?SZ|C<
zNjqCta+hEc5eh{-o6-u`)lXE4NJ6G~K~cFL9CGlivk0XUCA>Io5HQA6&nGG!Wi+Fn
zdP8|DXIG}X5=TiWSG^EDpxTuMYU}g?H?mvNj5=qv)&yLC2gNo~KK-y#)Gitz5{sCo
zLgsF7Am&n0gpsOb8|>YrXQqTuSAG6?^q%y3*aBr&5>WBZQJeWj@dCXf_(RyVomUI)
zcqX46T3#7LMV_YsooNO*gaep7T&6xE&8&vH=toq08{qW4HyCMTF(zZ$5N1zvlme@4
zsqG|6JKq4kjn5@P)CS7$y`)wAr!5O<3C7LBihKFxQK!H&TgY1C!QeE{RW5Voc2W3s
zNYq<W!fzSx*k+bVfJ8U5nx*JeZRou1F#-Y8mq5iHtT$Dn8}YWDPq5Wrxx{HdK-nv7
z$bR{<Z!j+>7w#!$WJUI(_77Q@zk(GocV8ST<)?g*Q~@Fp*{*yqBurrxpM}FG>8_xs
z;DdQ2S*znCN&sXn6gUU9gu-5j%v8H6z?R5UZZ<a|S<M(D55oEwgH=VxBAj94A=vw~
zVe&|}%7`?Bcvq>Bw<5LEn^!F05TVhBO--UcFc5)jX25uKz*z%vZoZz5MSmLH7Baj}
zNou%uHoN|f=HwY~K;Azk9@*b%v~I>!-$Kr7<E#MX5?LSX;WT+ri7qs!P#CVZ*&T9(
ze;S8w?<<f3KX#>eQLeiT-Xu>hrLAW_J!tR-wkDEGVJx`l=bZ>Phz;YGQ|v~lMOy&4
z&b1gr6;2X{`@e#9dUwGjY*2S+pEiy$-6!G1In#q1<!7IQG>2xvfpxn7vc&)3;J;4%
zH82let?#N4zkx-Fp8padvXl9(u!5!TOBdN7n|RCVDFaVbjiSd$OmXbWoH#<T#yCdj
zj4<+@)^c$8sZ-FEP{{j8<#cmiVHqQ7OItCuVqTkN=!D`AuEc-E@&8I_J^dy>`wRpm
z{Vx~)-Kl?F+JOQ?&7A;IQ@lTDM1PV2|El(Eg@UOXm+ZK8L4WYN{;L%9`y(0N@8{#8
zu(eA65qteJ4*UBYtP63F5e}%%e*eE*{72XZzS{g-D5+ULZc*Y-Fa2Nb4*uwWPY&jZ
zKSjYZ{(;#3=Z#<yVc6e{b=bgBrvKK@`LmtCt<{GP7Wg(o`oH{Um5l!RT05Kbm5Rl$
zk{>e_I`qo**Vj&|!nXdS(wXGT4K@iO(FDWMjVojl5!gS$lx!m~h5Kq6T^PLJ+E+TE
zKyK%1tv%oAaEc`Nc&;MwC6&b8<1<)vp!spBlx>%P>^E<?Lf_h0RWFk*LZv;E-}Te*
zj7loES=M(Z<u5IID>_RVzPkA{$_jAcrNo;*{%pkmYURbKuKIS6bnjC1uD>Vg_aBi-
z#x*ZszNy0?qwDai&?{*MH{(tNzfEEwJo=~km{pVc3Z2xQc6Kxp9-4Vw=S(7zGG~Ph
z0rGj>#X&oL4HM-;xnxatvyX;`La~&39Yf{F;J;fh+`Xij3SPU`fF-Z-1Tu8L&KV>s
zP2qD-Td^C9uCZMgY6fQU?+Wmk=z7`sq4?ej4)SUK;`Sd}CGWUlPyYWMZDc(#171Rs
zF!V;c;Ef0YX}%8@ZBEC(<ZN1YC0*TtNn;Y~y=^Z?=~r|7woO3q_i(6Q5*~Wa!2%bP
z(31AGxMTT>-2va8o@Cgp`V?Jpzy$4jPxV-rm0-+I;;ofmP5;d$55kHe5j<*o49BA0
zIksOPYC7+9sEqqMRac!cUr^6syYRW9rJzxz;`3s)kJ|M+FdIHg!0pe>!(xa?$<RG^
z{Bmt{HwT9lmh|pTO5OlBcQo}QA2#4epu-GA4Z9L`*D=*m*rFdQltd(MJXn+>(MzLx
zi(cDRj1+)|wVGxH*}#X4Ef1Eyz@nD@rgh#lCU)|eUXlFWqS)bNfz|aTOVBf4c9HE2
zX#Or2%;aAC7n2)QvXcB<t!+A%@*XUH!X*a`Q^*pamBC|9Tx2&J$q5V<7@nG%s-pYa
zNF?mjQ>A)}+OMgcEdu#qw}a4VG@zneqC(5tX-%jZ;DS#f1<tDsRp(RvVTMCrp=Pnf
zKbbn`S;fLUVQkzSz{_%tjdO0NGJQrgQV~jz!5WLfgw*9`g_<y>{E+?m$0vuqQG9eA
zTNIz^loM`tJOj(J5LCi2K87+;>T@l?{HMK%%=7WJMpwXhMl7BEsMcx>ESE=uh+*?v
z=ckG7-CVBI-DiXIk;-lA;U9t?_{-tNy%N3a>90v0uB+7TOdTJDUQdu8ZjaCNrzJ8?
zmaqj3<1k5@8UZN8Y$~P`Y0;-=kNAVJ>#xSRp*SEz{G<<Rxa&5m0sqUK0bVexpDW!D
zMbVfWY57gu2VrK`I9`BQ+++Jtauu%tjmjjJOgG%k$}GOv{|JsAxgTwoQ~sSTZ!)pY
zFCS&&!;^(ReecPZf0LWCVfKu`G6))@*`=J4D5M5fi?bEp{FI)9x5S^Dzv3wOQC58Z
z5n(0OjDLHwNH4L`mwGV*v-o(<Rs5yAq^`9{hiWi}7LZt-*%4x>Q1{4Zv1NNTu+g~F
z3g~pUB`iRz1Nc5kjJn=gy;#<+I4qAjD1}C@P*rNSOFn8lw=;Zcn2ZCtT}xf{x(W|p
ze?Pn$zaY11mVWnrcX~n)2T^q<{`Yb|H`@Pp9wKm%0<LcFV^pAirow#NH157f>w-{V
zq^ZHDpU@Rk@Ps)!jiuOkdh>uUoE5r_zb#m(M0(Zo>&0M4&aVR<diG8=>7*+LMG(<$
zG)mG#v1N#=kC{v;%<bKYv?S3yB-dUVh@Sg~Qw@(8e=tH|dF%`<H1nE=Njl=pt-F8r
z$<Q!&REtAhm*D%!_;S5t@^@P0C2t$K?~-6${>=@4=&MQSXOo8-96OAi#GeTT(xDB+
zCVSVZ1=6yHzq#Q}=GUs~0|Kw3K(>=-*Hy519xd1g93;+1R`=@97uLWvG}xP?`2;A~
ztC1t&`KI06_>`w*>|%|>*F}DG9@>bL*v|Lx;05pRrCIz8yD}g!aU92a%_f{yG9iJP
z9&iGV<1eWDsn7;^j*_q_+1X)X*<^1LuV0@{3zM=I&r-8ev#MG<?>2v}s#Z=lmRP#z
zI^Q_e$!9h%s=UF0Jk1D?$iKpb#Kic=QG^OJ=hxezX>A_1gdwZ{W05fI2MMN?$t|C*
z>+IwS=fbE++FD>0*><(BVbo&pTV~tsd$=tUU|%pFsup@Oe#WnPvN1&l3~Sy6t4{(?
z65n4@j+B%PZnq&AW_Unv1m=E#N}1~GHN@niA|-Tp=ggsE1@6O~kJn}!)=W!-T$~OT
z$T!nEX{YlMNlY%{spQfZ&Vlz<B0mHkck<?1oawhIViRJ8o`zxsuiQL=tJQNgsxo-m
zlST^-rk{(w>YRrgmAi@(Z<h1*s8m_c+#XG@xb@cunx>1-L%i8Ok<PjO{Py1EvMTbl
zdyoVnUEGBn(cGTu9ZhPuA41qaju{^)J1_3g1g+09h$!4z{slUFY3lwGXxHx9vYhGd
z6^lA<xHmBgV@XtmhN64hNVQt5rXl1^*IT0(D97CndPUkC0>n3h1GOx@G-85l%!5n>
z8M+XRi~aaB?c*zY&c)?Kf&Cb)OmNPLn$w7~Z-sx^bl%8&p6!}Va|O2=b!6QCs9Ys#
z8z>VCRgL7etO)nc{5a(ce5r=buEw!_ck+_GxazuMq}^seRis4Ox_gUx#`evK%6crn
zOLm!n%J$6TaJ5za?exO&PL{_OEg^f$N*bs2SMU~7bvX3u3pKlmoibQ-2`JIyg2w{<
z2a|z@txjyRbU+T6I-Uf$xLgSl81erc*##IIwn}w9)6X|!Y5?2Y{xwscAgm{INpzkR
z_4FtwANhl9?F|eYzsi928d`RmiK5<zFD1C9$6fU62scE+lU5%U@ozQ+&?1nLCj52a
zdGJ~bj_9u~rsqt(4H{we)~G<UnXUYa=M_ck?|4pqQA^Am+^Dp_P3Nvp&k6F%QOyte
zzFk@&v^d-85u>umtBYPoZp<IXx?vb>cceO6fp|{fo<M-x6QVKeH>|~jJ_5q@RQ5Ba
zF(UtDNU&EK0@3@$M{^tpc+~Yrvu%>q3U9mQJTd~T!Hd1J8y<p<v|YO7OY8Sl8t{6l
z+x1HqmZC@Jf_7m#d8xh87PT&fkU*)Qhc3U5H2us9`5Ylq2XjxzN=@XLyXt;UJlElU
z)Bk;EYN_5fbpBg=A`wyR-g(9KYE`9e26vUhx)9gl@yE67g@-dwwE9!ml?u9y%JYcN
zY}S_8Ch+x~vTG(LCJhenUvi}>&pUuwI&NorrvV20%ddjKCVQnwhXGo%f{&@**QoQW
z=3b{u&3I4mL9*Z3;za5wg{$65{Bz8PiLAa?G;m=il6T5@M9L_pCimgGhR}nCp*VwG
z(D_F3a_?fgzG?CX(Cx2P4d)XIWkFKd%S?lc=cw;!61>RZ#d?p;_h#+lq3K092-M*V
zwP_p8ww&s|_2foWU<%HVx&NSuwNmVa9D3!fy2p`S8{C4MQ?e530R&X+mBUxq2Anq1
zcfV~xtXL#vmAKcaWSpoq2^D}}x0u5i1Su8b>%Z(Bxp5|qkZ#7<;*W>sD46Ms786AY
zhmN7+n8NdEaTKsy>GsWE9aiCo-#!Pp2}oI#l9)ITWKnL3Iokh>+ha5zZsFJJq|9hP
zr5~lDh;@A0O6c8?1#{r_Ia;8M2oB{#9kF#Zzg-c2owYNVYUgUZay5a|fB!=jyB;nw
zP%WjMW99_C^9jz7sEh*y;i78PH^EfGa{ckZ9%i|1U9Wfvr&BLfeZ!}xog?uMdF5LM
zvTnE69O38+MYJu~gOw$fJ4kJu+RklvfiR@4FwZ(eOt)M6%2Q$&tyFbQQQ^x1m!B?V
zW;m}2-Bdf8Nt!E0L-ioXCbqp?b_RXpr?3lcvI?;kQ$Yp06YNc!apdRJ01jAuh31&_
zora??FcBa1yPsfPW(=g3*O_0^Z<gKi6je3RJ5AWmwiUdrg&z)#w0r}<Hac-GH#_cc
zxmqr%A@83jl{LP2s4jV~uFYE2pbiz3FDC8E;mxR3%1yd6pZGlNrP~JPmi9Cw057)D
zmvliVn)NKoIH{otg|Fupp3k%KF6=(YQr}3S?wQE`%Nq<)L-MGsOTKt^{elLlJNrXh
zF$awYohb=&H6YXwSh;8-QmZ)#^bn}(yF&KF_C4@MUWngYF$i=uw`29dbzS=M$IR}i
z*3Q|<UxmN6$JsFFTkt}lIl0lyhuX^=@bEA$zoQQFrssL&Gsrn$>N8O>b<k}a$4f_I
z5!C^RE)R$_cC)85n&zC**lGpY!bS7G3Oct~q%L+K_@Wr-B><Fz00-5z+6&~bYIed=
zBAG{GttcX|n@YEN+AmS?>*E_Bn(;<a?>njWvTAJafa6eoSM=fYu;2*POA{N8o`Kw~
zRrp+Q9-EN+`s|68r*E-1sgB_${tjQ1lxGE-8C=Kuiv~;;wjV8LfTnm^M?7!&o8VH3
z@A#LH<X4&<-enxI4_6m)4qF|oSVUsXlbXo)kMYlI-NqELrz9E;BHKK#z`B=1-`ssG
zANC=V_bh7}ozRrn+on0cQ&*I6?=&<BxEr>3D`O_QjN)e)TuMC^)swr|iXi6tAR|Gt
zoF=jMl6W0d(+*9yf9v?G!VxdK9Qv}^uQ{K(LeKxO=qkTIX`bbdG6TSzF&*0*4}dR^
zHx)aO(jT1PGoVcEB<o<@BVEaT33w<avBp4N2@kli5>mbr%5os)4lkccM8Hn&%9V&b
znS@5K?L`xQw>N{Xm0sUkW?lZJv4Oeso{jwH-MTkv>luzO>U>YIn?Scz60@F-5zDsK
zlCLsg;R(iMd9JGdr@EHs^@%~J0r_#Qf-rj7aAoJqS7bbfx{B2nwc1VF3Af<RS9aeQ
zFI&ZdedhLyZ_j3Z<+u0Bv0s$igyg{EZdT|mP&vv0=N~Wx!@oraV!SXp=dNy`2ZE++
zu>O-Smt|Db=_Uj}_Mj(bU|_d!5*f5zJxt|h4v;}q-wSbSDqi^QxK$aY%FmRDPf|NZ
zG<FLj0-`!NfeBQJ!VL0eOSoZLTU##v3GPjcHqeT=xQHR-Pf&RKF*L`xDi?^_W-0|T
z=YkZKZB?SrXCc5F+t#$P%cM{s`o_vSv|bR^R?~xiEjm6r-1;SgTCHNC1Oob#O9nDO
zwmsa{E7~q`{}qkcrIW|5=I(14sei2}d<t<4W!jqr_xo3Y8CyaICjr>>n1j3CIE7jI
zPM3gf5^Fv<>+OWIW%9Ffx1|zjwvjLy)t=#mI%kENSRF&r&`5gAY%7lX5<#DW*`C`9
zFV1zruqLx8O;V!R!6vVco)hYwLD(Lt$?<hNh61b54majzsG@blYb;yyvQ7yKON!*h
zVuuf@wbkbH%!zW?_B7Oa_CZ3AF}q3ouP-HYZIY(1;-pTZnY{D;N1}N;RamH@zIpN!
z<S+00KuA5nxj9+jZMptuvgOF^n3Pwp$VOIE|A$S_43xnSqk#y3Be@Wc`2bO6hBMok
z^t!K6o9w%m3z@1h4{t6Tqc}W`%xcA%%enIyCQ`XI1pL7p?<vFK>I>}h+0=;V7*k;I
z`U}6@_r?fZn-d1L9;a-_eoPhC)2qK}&>Hli9>CA?7@&1oTLe2oT@S7X<-p3uM`R!0
zof&p{XDXL*eBba>KVHyEZJ8+a6Ao0)>W|mAB+EarTngn?Lo~B?mgZRc_e#rA3AyS<
zDqBX!1#Zd~#@{wV)Ef39wE}NjtzrdTxtWrb);zWu+uqoK79O_xBTpk_D(krE*6N<o
zR+w=IUb&XN*jqQ4@dk7g2h<Bp=<+0EVs=M-Y=eVIKE5ERtg$|f=<!DW12l$-WKu&&
zO^qT6c%u}uc^QUBvfEwT!WYgYcW&HhxwW*?5=|0zli#M|5p0EScwCDLRj!Gyh*jy`
zpe`n0*iUR7zwMao&#0f)nK7@G!*u1S0Oi@JBC|93@JwZssQ3H?aS*xpDjfz%QN-W=
zk{xk!rzQpV!#((X*G_6BiPFK}b&Do^4AeG?WM^Vp1@Bzoy-~c*F2{?W_u`9K<(`XD
zc`XXKl^~UsZ4g~vlUVMM;&F9sOC$my<6V2Ev`}Mf`d5cLfyB6_7Tv;xJ;NLCw5`N8
zkJoOYLbv12Id>PxgS9KgU)GHQo-VZZfNN2~+Yi3_+p@XY9SRgL>?_O4SVnA}AJSR1
z%Ndunc6shbe(0JA;Mrf%>Fa#EQ+LR#PN_mnAwsu-&l_KVKL||Qh67tEE0>;ibTTom
z)Tn}1-*PMW-YMzRQshO@Wf<&g`AC&U8xb<)S}?xt7nUZhC^T!uX1l1HRCCy8tVOek
zM$^Udd8H^qTR|r#s0WHJQj6F7ykXQt;jw$#+s9%pn+{I36>uzPsb$sNti$_6=XrPP
zYR&30A#iu%csoeCKzSPPeYZZy^t22+kcr2=&eG%a@~|J&kKZ9=IS_@<<u*uO9h`It
zh;0PlHjl=)(^aBv(+T2gJp;@yk3)y_xhy%IEOZM!xMiR&I-9lLhWM&3?mB!9YY3sG
z1X!;+A5^GS`grrZy^58~%-5GYr*m=HKlLrx6r$;WYTk{(ba}X-FE<*DQA789vp)t}
z9q-Ov_{yjz)3w~@4qW)KJehe{b$09&+5Y7WtWqUYh1in9X_-Xqcwaq-Uzx@Krm{z?
z&JMFfc7vHXw(WiiI0tu=up<uvD1Y-uT!-QBh2;l4eE+=~{d=oY35<NT4(1&nu5Ob0
z299>v0!~^L4|F>Dq_6t>s|+$C+mowge>c18+$(2`%ywM6HR50=BlwNMtoF?{$FhYe
zn-Em4DjoRE?RNXc?}u^#uPkP+pSy0Azd~s5(^9+o2}zx@==pa%`H8X-`Axirk+h8=
zN=X%6D~e#3$UOL*7y(|>a6dNR6r8D==(C)MMII~d=LA9#ZL1F8XY}FG0+<HRPqRDB
zC=P{{23mOv48MsYZpyKe*aY50@^6yd$^^SO?u(8?CCgYELvhR2XX+!m2lf&z)X5<*
z^hl)HV{7CgoK*(3RdZgvWqEMW--Xispsj+p*uW|Kkh-Ts+Il*<rZBmN*Sn)Yn9qw6
zddCv-@PI`#bcb|3QKV!F-aie*JINA>P{+PKQO(kC%%6qCYSS~L$3+nr(}hau^~xbD
z?b6SgOvssOw?D8<+Gb(Bq~_WLa9H~3@)SO}m|sbauV=F76STJbbCu1v)m%f?_hA<=
z{c>Aj&U00F4jkNyMo1(5nqx89X)PCXSzIg<mIZa=GH5`=w3iFvT4kIF7*{D3dhySy
z>E%hxo<E%M*h!gvc5XWq=3c5hFT~gDZ(#BfrYWnlU0*AhNW-1ecHw<9xg{F|k*NKT
z$V~*M2gA?kSb`^lNzTQpo3<W#Yh^q+zG*KnOf?pN3vrDJK?{$oGKiLX5$^Dw|Lqeh
zK6s9}*RU12;>E>yp$UaX(|hYd#$;emwaE3rmpy7+TVlDepNT4vSiV7V5P5qo;coSc
zThx`Vnuy^04U7NU<jQicCmCgg<<KJV_WfP6EpMtFqH+U0j6#`fznd=-vOYKZC-yn*
zv??jLIBv1E!R{BugJkn#`vv(c<ArObMb#8yLQ<(?sYEkHAvt=z^8wc$PV}1aP~fjc
zSPLtP*l}Z`AG896AbMWgizNtyb5VPOXz_))uQ<b;whef9hw!s(^2Aj_XVYE80!*|n
zb6!E9m`GltbK%yD4@AG1I>8+(VaAzt1@2H4H7*K8D=5oy;wNo*Jln_X(}acy>*a>>
zFD%lQ+TwvK)~H(LUJbMDJq$%m$JYlW*ohcxKP;1jEOow<Kr)Vh*98<wQjkIY=ctc^
zAesTF=;I+1X?eXLrL>|TTFBgl`9e5ecjx{bx>y>0Ib7mUI}0~JTy`IrS-D0Kdwf_I
z{y~w1I7x@@Vwuh2N{d*YU#J?O{u7VT&MKhcEOHol_`qmCu8tQyXb)8}KT@F7ZyM@R
zDNa0vI2!On0A0K7N4x$Uiu}qexfN3kywU6q(uqs=@=6UJUwQSB9Snhsz><POZ|~dm
zWup_>QK9)0<*l{dt+n!^5esOF6&!4pJpTm-Rexf2no6BGIe|9D7+Cv^eIueA@Az@K
zq1Y_(bsaei9xCzuyz&Y^8X%SM3SJXzuS^E-`TX|)^a^*J`!TYWz!S=hqz<s5<PvcI
zP>|-t{uh{|;R3f3b}PI}$3K!QB0A(%Z{!&}mQX66Dz!7ZKY)wO(IjE>v6xU41wBgK
zI0hHI%RrRCgs@0n&C@gk4`I=lXp9%*x6~w7oi`(tTUA4gen#9?p2Rn=I3A!63{xi-
z4MVepms(pHL~L(%RKK3El?X3o;xSdcd8=%~IC?gClc8z4TsK$&TSlcFaNNn8juK;H
zM0XC$G<1S|OS*Y)O9UV|0BU{2-$_rPb*(k{b;xV%0P-7d*!!*r7w#(w@@M~12MuUr
z9{;{?L6mbAzos`-No&4a$C6UnP=l8@PIzh^#TwxJauLAc7OB2#rr4rVr4#XFp+^aG
zGxk-0TxWc{Koa8f`@hmE5%MCD>=Nfts{hHbfKRf~Fx?QX+o6$O)lNA1Tet4%X6P$>
zE=OQ9_CvS1sqBcr)0B^Hp!S5)J)WHq=K?KoetqYQQ^|FFb$99ZQg-xtb~N+0vpO|t
z&^qv=svVX6`(Y*(tgD~>st{iCy$=BWPShr5$<)FP88cX14w7Cvql5YF#Pn1DHc7I|
zq@iPSrw!9URvR7Oe!g7Y(j48DRF;aKlK`lBBdX!!q=M0kutJ@+cYtjKE6U9DxOJ6L
zfKE+t{6xbZ?Rn+ooNj7e@@Q|qJ8I=jp!L29<Q@6eMP*%(b?;OBr5W+mE{Dyh$bheu
z|FRXuVVHXp1&aS;H@^)?@h#NFpr*_T+=98A*XFJQ`+eJ+ph!;by6V%7xPcr<A_-d{
zADw?~#CVfj?l5z%7ll!;`e05p;|20`p`kxnO)!2#t@vW_QGl0&Jx-T~4sq=1xzOYZ
zcZh-Lt`18pY3$N^xQuvc(w?*{Vjb2MU!gdzX<GKJ*Yot1glQ-Wci?)b9TcvYARd?B
zgAA{(dT2_wUk1>!?MbExWo)141L4138~frQvqZ3X>uPaMVD}kk3G7U{-Vgd?`ZgBp
zIk;VM;%XXk4FSAW-b2&79&O}}&J`gs#3xt{Dpzb5h^JM4{VQQJ0|E6VqZl$XpDyAb
z_;Cge%Fd8cM27A{>OT8)>~Y(Rs$uA9!<yK|BP!Ua2n+Cv-T7}VfVR^14HUvCV7zqW
zsAa-2ufVWEcQ;plTMb$>xUl+2v*@>oYrcM;aEX*9OCMV21^YQWp&ENJ*xHCmyC(@!
zXdZaur3(s9tmHUW^4H;5yHMGGi~b7l4b$>2Q731sswU{>y(htT2{JCgg*al}msVx5
zf~bs5C<LgxYwE3&a))rszujWxvr9kUrWBvG&lVHcFnOoW`)GAwesVtq_d#emO=eBh
zy8G?M@hx0ldpg|WyHLt8-){55D-Hm>hAcry9qEbjEts|G#kh;&RpI;zsj{VS468**
zHT*+H_KH_@Va9-5SjmX^-~P-HoSMw@|Ho_#9yPG<AgC8)>}XJXNVmik8C-w9LvZn!
zT;#xI9^OC0XBuzI(8uV8&9F3z8&jTqB?Hb<4zYb>$Tfs+P*%zzQAq0sXy^=Imaqqr
zC<;<FWFArn-St>DdEuFVO3c*L`qt~P`7J|zl5@DIfvh2PQ9EiIm_4uG<%;r2t7MZ%
zYQf`@=V%@s`5ixa*Dtk&7Q?ceU<WCg@qkq^<+z{xla?XsXKkg!Sk6{Rx5-;19s{7|
zPva2V?%qpH$J#N?QIz?N2hYJjuSud>0j!wXF<j6J&Z4gwF=6ecgMDNQ5^lPC-?Vui
zXA=!l0QbY~@#g1pm7AQ#J}yo0T9g&-kzxcdTzIZQumzBL-WZ+yC-Sa`ZBiE?q80%6
zXeQ;US{UJGMV`Q(<?o<tVSwhmnI@H%`CqWme~Q9yf-U?Xvod%XisT?{6oc?Ki<Tf;
zs2h4!v@*iy12^HjFdO8M(lbh;TqmDkoz0g}glmL`tvq=q!2sp+{dZn9TS>ALK0LKB
zBNeL@^rPjv@<m3w4;5oAwch6QxI9zt?l3dS&!1}5F&a_E$F}XG5M5Z`3t;L-oM`JP
z6FY8xqg!XOhptA3+defa`@BLlAVkX5yJ<P-c5tS8?aDr5>1_LU03k&F2a6nkExZ9c
zLIb?My(#Sgv}<MAeqmiK^==CP+rxqFG~^srHi1ZwroI&Pm_GMcNK<Z#*Cdp|YG{E|
z^sTy|Z&v0}fJkTO{tL|)uuGTyb_&mJTt55s^mv~QP29E}@uh}pv+w2=+Z>QhvZ5bJ
zmHb%|)GYg#KZiW%D{Bh}98($uk}K+f4PX1JLU&q~JOwb`rE11-(HZLQB~!=B)o3Im
zhpbH+<Y@;r(2H35xv#<Wo6_=t8!Gt;2t76yD=L$#=>>i(>x+TQrHGfV+jO&I)JVEr
z2fNk8AO%^$#6r`FroWD36a1O@lRgZ2q`8Sn)&O|kfLp0)+0XrvxAIp;{wtJ!i7=7n
z2NPAPKOlb5A<V_hvr3%-UmbIXxh+gxC^9_1NGCzSd_T;>4)&;*S*kVK{+#1Qeme^M
z(m9S@me|Ah2NmtcK(W(~45Xhd_P_ft;{bkpWSqo~OI|FUDp$~PvI72Kln|kT$7e%b
zuy0KK{!kjEke{w(b4#3&vEJ>db(8N-TLZL|anNAW{stW0BiKGvv0Jx9$?BS*@~*IO
z>7zgy=*6xy)FxX%1BPt2d{uhGy=ha$OJq>aLbcYbagWJ|XrT9ah-+Hvest(E+GNB)
zm+sLdu(_EPP#s-S2T9|r20Oqt?h7#=BU4Ris~+WOb{zBe&TTsN84+Y-K0(DKu!?!h
z+`SjzfR0`uy2*L;LJ&0XVsK&&FPWYrC#<3`FOqJWBfQvgm{7$N&BK(dn&8#IncP;Y
zI)ml+U~wf-8~v*Q{nuLcq`Ucf@&W~KGLS%a0xqFc@Z-Hun+0-}Mli=Y*;6@WRe2YW
ztv(mwVeCK(nDaD{PO|+*qR(YkJ3QP$Xv!znDItEnVKS4)1~c}n1U)p6M=!5kGdA7o
zyxIJahWjtWOA~<rB>#78)*`FVll5(F&Ps>p%wr3==lf5);mi}DM@ibifl2K^^u_jq
zxw<D$g>Vo%_Oa3?jDk?3XiK!ZA1aYGX%FIky58_6XPgd`_S0J$hB%SMw<kV<MxIa^
z>+$>ro2cgygRY$t8s;m|<JVnZ$O0Og8aoHQCrO<zb;2%WVju5%1t$^4Z_pEdn?Qd{
zr{Y;!Tjek6>8{O@9WGd%{o3;89by$dJRfQw`r_n8xVO}qlV|oRgQhJ_*_6}Q9LsS)
zDxPSu^;aU}bfTCBJIFfbfiLt2;@|IdYUG3}fPz^y_}q(Yjt?7o0+wk0wE+Apx70-9
zt@2Af_%$QcqwPxkkL6?+N9yQHgVR#Wx5p=z{34OGuXl&2w`0tJB@)66{b#<@IAv8y
zIggb!z`$5_sqw?q4?$aA<I%>_4^Deh|FcQ|$|d?QeS(PD61>=b6)KajaK>=?H8bZ@
zqC=rb2jeu6_(@@=%p>FZI?f(kgCX>^T+JJkRI5Z8`SI<GsQz5QA(!n>=D8M4cuQ!;
zf~R{Bw$}Cj`J7KZQ-fr}9IvX$Z4c>bV}C8yl}7W`M7fPn_S18ah>i;7WcHUZsQ*(u
z`v=DR3JcjrS<Q)S$~15mJ_63d)3>NZ)7V3`?0;$t{M6*3kz$dk-EQmqEWj&0U+>qI
zA;BK~1&PhFn<Gz>kv@6e64yDao>iD}d}csbHT{1+;z$5epnc-}$*7ve5nh+o*)kKJ
z0)isW4E%sAl%_ee&Nr2QbjL^5doDV6)gE5mCHZO?DNiFjAd465<0%L4<GHVaxeXRQ
zqm9<%Td8xcr>1i`5{(&0JraGSzJrnRf;CRf&qvKO77$=q596m)^{4;%7m}GVhSG@B
z<eEC6dFmK$N?Spj)q(kn1i9+S`J?btZI*^KAUHDyajJCHbS_)BH;6@NMjOiO84u5F
ziR;|zxM8`i2=&3{tZ!jUHWYVNDfxwW*5qtUa~9W(U`DH%rBL=*+OWN@D#ocUjrLFc
z_!~mrA($H!oZU2k(bfjPbHs?=kKgGz5jl$N{?xP?t&-CJ<q!W^L5rpa-%79^{PiEO
z?2kMC^Zoxbzbax)elc>QXDOpR6NCM`ybBeR`#;CdAKzrh!Q2wm`I1K}Yf4Z|W=rpu
zX#C-7{$>pLKVi}kn#?^b)ar$#J!f?qey{yk1B<XCK5^Y~d)7W#FWYT*%vonv%$C$R
z?0y36^eUXTH?B1T%*Jchj&Ew7(G~~OI4!@7%`xk**>)~lGDp{i1LW#%Kow@KC)Ks}
zUsvh6orbxU<6q$s4cavFc8?lBLanOE6I^8L)+X&!iAqHcrXw^ZPLBJd(b1*$q~4~#
zlfI0Niz_Zm&iX2g^Z*_()8UumKS>VYbA)%`N~vS>Vxm7>L=h_(pl}wHI#e9It!=kk
zA1|@x!mMc2v-j2;Po)c_gTirk(iLyj>uKUEKoTbwf<*S_n%jI~{!svMyDx6&r3$oO
zSj1brSD;oaT>=+WPBRk58_izNeKpz+jo^tpIvG<syU++cQl1}2GQQsagarBmY^jOu
z)5ZYY>u7vi5Fp^h50@T;!U0^CwubYz%PjAU@zA+!gFta>jMWZ5ug1jFkLqyNNWavQ
zlx5vLPg-48x~Z2d-RhS*;7?7wLDu@QLKi`qu1k86n&n?KU7TE{%lq;q|DZI8tWs4?
zm0=+~B_RfHn%=l@@FQDLZ9MP^v@z4dLWz3wYCo$Ytzu~s>vA546|w+CLe#S-DnY$g
zHv^{Sl=Rs!Yy`SHtb|U?i4bXC>N#4enp@jf;N4fnft9^R+2Wr0XKKd<tuNLgnXIoZ
zL{g;q4_7qH_JyZL{3hB?>ptFAYqr1>;n$8;ruIEjrUkJ3?tC$7J>%_*5qv1p@B&10
zRSn1yEZ?ZFnXA8mYDzd(tZRC+Pu~m|M?@XG1NSxObfRebhq}aFY-Mm2%G6cRp|oDB
zR;P@B(7Gn)w0Zr^WnoQf8Ke=k%|!6=H;n-cRciOYVoVI`s=P&9yNy+Z{n=QAFEaX{
z()}Jw>p;}gO9te7`uWA3Lnm-;<5u|N59UsfgL&+XMPauDwGu&<zX=haB1C|DV}>^t
z7*%2>=g_y;<;;3=TwWSS(Z6Jpnd-0R=}&?q%&@K&?z-AfV$#oR-bW^DfZ3IX_Ky2c
zA*(pvZ%%SMhRn2dbil9D>Q$Fm)|qx+|6IL;uI1Z%6Ma*pKJgl?zS4eugQqum59D+#
zt=AY+?Od`SxW>Od-6@`PKb3dG(HSYZ-z$YJ%=v{D*ic#HO#Pcc@Sv8yn9odYVLu=m
zzHzWVf@xgF@P|c<z#LKe_nU54u>eWyUUG{Nzo(x<mxuAKw`AybJeC$*s|p>bPP7FM
zFyKnIQl1$xjOtIzo__a=UR?SpQ~n0*PW_Ky=1!Ul$~CGf$hYPQ&#KVZo{Q*Cfc98r
zcs_suMvbO0$!J2&PC2fg^Mxo!(dyNA<;&mqsK!{V%fCv8VVz}!*OXK0;ahv6txS+K
zpR;;C@BVV3&?R)8XS~yRew<ystrU1oJ7vFz@O<i~HtK|SI?$C`WSn0&9Z9;>j8g46
zjvOboQGePd06A=_yX=RxT7#lpYmg_NT6LV(?9%VuZ`3!+KM{Lzc`wDzRVxqbL-!lY
zitwAUNfyBTCq?(yNijlSW{+7x%};V0R2~35Db^S{)_WZZve;);<wrWe&8XR0DIe=)
zRP2Df97>6fx;s&AJ@WT2|4Ls#!VGxd7vC&9<&|iht+;KBhZ;sQXj!ca<GzfP>6Iz(
zqp5POCBz^AZh$8xWM@<sb8W@ZKJ_c6FV~J%g#&v-)b0j_bfS<7J$0N3N8*cf5lXgw
zxaw!VS2heDDmA*{^D}}EL(>P5?ufGlH^Xl|H!4C;H(%Fm!8vB8AbY&AA9%Vp+`8;N
zOilMIbRbWgcHsu)z2W0f^<e7`Ro|1`R72nP>nDu`%LJkF2xP~rBjn2E*oE-g?Tw?>
zuRZz6(Zc>mGIlR6NPkpZe|K>%mInCW9waKle!wiE^-i4d*keCe%p{}yd-j*6=ptrE
zUj-JOZoBVl_J*LI{-}PfMP~Z4yYUJu47Xkq{W>Ab`ZT{-x|YKGdiSspZ*O6hEEx!Q
zV|{YKZNx7C7A_9VpF;M;ZzxZlp(|-{a2@GnYF(>nI-4p9#-9tUMCZRBf-ZmfTKclH
zyS3=7zJ^ijjLcqVJyVpU<1n^<T8}5Sbf?^DrYNiH^P<YXmp{CY=wJXMZK;gzwmf3P
zWE#@GZ7e<65R9p^rSc}!db_0&FD?Zh1i41V8Gl#=*&C>d)OvG+1~gr+Uqu~@_x`3b
zJ*L_=a}dm22f<FI6Wky}TXv$JpVGBVZXTtlm8Mu|Y#{b5x=TUC&6?pMx&*#FpVE%v
z5C)SvP4trnO=~u{466nXV>YpNzOLpqPMksNv9ztl{*EfG0A}@s=f@-GTKOVZ_3!)^
zRrnVn$yUc%l^vtTR=72lPhlNj=P<peQ15zFu192r2}4Z2Or-UZ6^*T3%#Y657&lP$
zP~AuCRhF;2r-1sv`pCVCgSlgo%~y=r)VJFXQLs&4cwe{2?3+JwIjU3LsX3hXMD<!7
z^*LN|0;n2A=P49vpcI5(%F0)r8JDd_<gg;F)^ogjoKLt-(l(Muz4AXMPz^{RQ8x;*
z^n7Xf(%TjTnh<(e8RvcQ0<8}fT1O|9@=phl^fquNe3F=*J1_9El}qQ6Tj<7Ci9Iir
zjcor^e)rg3lFEYq5;Ey&>uavl>Kfh8{G|#(kLr!DdPt-Y_PH%=Kh)=^#zz9ivgfo@
zt6x3Ax12tfreNo8)s8wR7ym&i^>+oMpwF234`k;~0kJ;?;br|uSVP)0Wd-}9@rpHA
zfbdG%6BE4cB<k=h%o7={eq`gCG#t2%DNHm`TORH-GS6ZeP(|&-%};9{4;P7ivz)3E
z<)HB43!k}fuR3rx&IaX5kZ`k1Y#`v;mi!lG5s>(2IDWE4X8a!2jETZba29j}c>6vA
zgjYkq#h5jRt<x41dunnyWg@)0fdJLuMaL!~ce16!`hDIj<IGS6HbUg(*0??M1GXkC
zUCDc)aVp(ArK(r@o+4C(d7_7x&jv0_5VeIErg>w0y_de!TOkxSnARA=aQmU?veg=K
za;uuUOy1!QFt>O>Nw0%y#O1a-oMY^FG`{Fn9gvr+YH4F#_KZPe;tQR2-uw%LxI|zj
zC_w|gDU^Yf%*aa@xTT@w?&O>fv8|{Mv2^ZOz#v+frC)+f4)urS0Mr6Rt@54|%6wte
zTmN%D(WSaO#H*5TNVT5vW{x}V(yH0}FgHSoyL!Rd$_BtX>XIXDlMX)tAtGa!X5^@O
z${OB*-4P?EyEoH$Hu%8uAuQ@SYUvOoYX_-XWK3VU$80QIw*3vDel$|8LKjHj8Xm+~
zXUy;FuYTY;q6%m6A-dP<`1|o&yPFv$m`~AN>i6Ui;7zIeNZgLH>QuMSQhw@f{=y|q
z<u|h#wK&fOGwF`yOJlsOmrS*+zMi+=_*h+L(?L<kuG}@*Uk)I;`e{3h&0?PiIP#fJ
z&B-DS*OiO9196Owua8KuGcjHmaV1*Hr#Z<-J%5!;af9}bZA$~V&eoZ4L88m33k7s1
zpyKH=&05pBe2avVDktjYld5_pT1THuBQPdw$+TZ9=F-1FJN!l`^v>Ud{M4B0kI_gG
zCk2I*DF0-(fsqIFA!NX)OEH;4YBg&Jp0;8?0Q&S(w&4(c&D_{%L%x)vIOUDpu<8e|
z?9mWkKs11^YO-t9`*Olyjw-cczGF`?l?iLj4)GYOsjv_m;CiVObb64?rStX}*80dS
z*(B38K+0P$-y6l<XRLu&f`@oiwa3K(wved7Gv~$AUt$DOW9DJn%KL47Pz+(Pi`x4z
zK$Wfv!)TO9%i|;ybd{$?^`g>v(&fe~ej9231J?oIIsTPkA!ho`JbL#T$@g_kg)g>Y
z%v#3$xHl(47|-F`reS)>k7Xmw4=L8Gj;As+>{Xp~Y^3S#=cUFrXDw}0KS6reBfcFK
z+875RFBbkCeFe1qI8tj^@*n1FbKzLJ?tO^vdV~%};uV=H2F@@!Zl&k-N%Ts!H}Z|r
zf&KP8S1;zMri34aKP=sK$G3UC7gGDYM!gm^1hm#Zhct7gv~A*-^6}iv;7`A`GgzZM
z`pq#D)zRFi3!kSP=A}w<t+@N#;05e2M$(u<T;|Po$P&P_n&=<)P4~Y)5fSv*dc;X_
zg!I_XH}+3n9M{1~UDvAS>jMez@{BgfH^=ykNJn7!Q(&b;uZQuT&vY);H)>m3FROys
zi@uyLHCUpCKCOJ+M7c-pWS!=FX{GLfyX<o2J|G5VCK#u8x1!c0ObFzBisipaF;2*+
zV7IoE5AMeTlQ?P4aYod^5N1;wcylz%{(>tP$5NpyI2vWodmJ!*`dDgHhra$i1ct37
z%Qp_2slJ2fV;TFhY(fn-(@RXvkEAYxW3FZyYKQ@d8=$0r=ez&*R@n<M7tY5v3=vP(
z>H;ZAD#_?oaaHgyc7U+$y}-+Bx6f^xr3;MGuP{CE=n>~gVarj)McPj8DDn*kc5QNZ
z1;K^a0O@5|i$;+ZS0)?0T2i|g+kAJlOJ;H*2hk4v*~M9O=d|Fu$S#A~?WB+Oy+FVD
z>~U1gQ5Q!0U*_{Oo2IR(ehGNU480gW{*geU2N+&g0;1iX5VbQpyW|}~D7-tgF0{8q
z#98h<!ss{41*|UPT?LdG8KLBWJ4~=51Cm#OkN}+b1KLT0*oqd@IB{*crx553>ZrL@
zuFiV3pt(^8XxN~XF8~z0@q%y$eXAskR^lZwEz`Sc7edkUnXTANPT#M2o4W3#5Q*Jm
ziU4L6EMPMq@c*&*)?aOJ+xl>EcZyqq5-9HOTAWgW;sh-kTtjgv5TLkQk>Xa|wRnIQ
zcMI+gg*V;j?0Zl5x!?cb9fKbdMj)THrmSbq`OG!7wb|C}AyrY|N|X_JOD$)0x9h{h
z+EK6~9rkT6^CL7w3R*SN#ZSG+y)<mt_!zKfD?TTUP%zkgyA0+CEk%$~6(6!#3Oxa+
z97}Y3xQgIC7NyUN+#*X-{7c8QmIItjMaQ=w*9)`Q&J3{oWaG?R8bEi|_j9_g!oI*s
zn(B;Zp4R%$FFrd_S?Cq&_e;H^c4a|)E^WLT5bU;v;a61y&r1r-#EcbxNy=>Xo`Hvl
z*A!q0`xa_X#7Asp_A_vrrA^slW2#mt#KsggU`x8%+J^Wi`Ue}$B^%crTgeA524<0G
zEs7Pc`<Z%28t7++s+}BEmq~H(xf;EO23MnLN3-Jfiw-{K@E$3{^fT50uXDrx0*$-z
zr+GkR<SzCxdCi#Wf5yE`T=*KL-HoX&Ttq5Q0DGj4#=h_Ue%gE@<dIp(b1cma5<6b;
z$)!8g75epdWoC>Lseg~@n#WPBi&L)Ys0DOLwYp3v;YiRkYwj%9Mm{a9Dx5bWAit#8
zeJS^&{ewwgJ~b|hBeD?&Q(^bb%tCN8!^EZk<gKbZwWAN2=OYS;tAa^`?#NE7&u}{q
z4*4vD+cxaK9Dx>>gByW$u4C>-xn;W1YSRMkz`BErp`l>^1Cf>JD9fMb4VESaUbee2
za9H$4rBbMY|C;I>)%tWZ*ScC@-4X`mi0_!q(R1Asa?N%1l8yW{Ej%uo%dc}fi`g;L
zA+)9K{&`(ae=3A~o_U=9?G*k@WFI}H09r8VmjMsRz=h#pu){Oq)unlQiN~xxgXEKm
z5i=8}ByaMAQ7`aIe%zWe?Q&qpkS6SR?vRAj(6*4ux8T4Vx>PrGsN;smk}<U#v>a<4
z{w~coE~ZNuHoY?|PSR#Iz82)+-bqdA@TLX$J5I$KyyKp)3n|lyw>$9fC2spL?BH)P
zTWwxN%~@^;$4uNDB5K#Rxmd*_Jogt#3bRx54sV{ijg?)^?!YolEsZ=v7H^Kr+<IM;
zLJX5vIjGv|Mm5@QZquL@re`V@mAlBto^1c%@G`q1np<eEgzfIX)9ASo$+O1lCndGr
zrczUiuzTK5*Um}sh<$*`KKnyhmHGq)#=Id;a&YSRgDAzg7ZogOgE;T%bJ`;ASr{3u
zmYdWCd{Q8JOlXe$FB;FB=kxidBq<|Qh?>LQ=dl_)2PcUooAY0H&_F{jn=PEND`EHu
zLDa8vpHMz`<3BE`dmQM#5RF`Cg%)(?z3>Yi9OE1IeU*TF(;5`<A%d3~_^ziF5}dRt
zMBBYrEO$Z_gyU}-r)B{sNoSEdZ|X6t7r|DU_u~95I4u-@E{o7!RkLFJO5R<<qzav#
z`Z{y=)utXokN?zP^6`3dROpbiP8{sLvBH3-nsSiAa99OzykhL!+Kz)%u6CE#N<QjS
zV8L=bpTol`#Vi-IgvDMMUUcj3Cy=@a_+V2|hjxDgqglH@f8;XRMz1@b(p)c%--cq{
z;{+wCm&8#2rpEG@X(6pQg#GHc)Tq7f*FiPb0e$5q$ChK2)r>Id>_h0GO~&-$Ms)}4
z=xz>Y{~H5KC!T|5YqxWMz)U|aV*xB+gg?p9ra#?7j%jv400%(~o8h0*<3~JB{??_v
z>3f3aZ6q{)D^)K=y*ylysWjBT{N8Jl-WEx}9j(`TC)_AQcItC!UqN;6+7Rugg=C-|
z-b~chTo=K1v-f~a+^(3a*IYIP;}^jgZ%GqFMZl$b5RkXzvS1Bwdc1dgTJZgfwNI(U
zy)zFubknRxIr>#`5P4q$PavO~E^Tv@gfb8~WP${%>kJ=rU%pSbrL{=+o-~eZ3%lpo
zG#xY60Dli{fA0KYA+bF{c5Jm>e=gSJfWB)gc!ZUMt(f6-xW6TtXjuj%BTV^m4^68|
zP9k=3C}ly<O1Ah$q(meZXHL61D~jMUrMegenWL9}0JhH5^ZxW~SXaR92c_Hc0bWlz
zXXDmU-OI<QgFZjV$G8WeUbRPf>ur}3TlIYk^mOrZ?s;9e>CQyH+m+wpx7iwtc?&)J
z_vYOh0+Nq>L%4IlDraPdl8D;s7B822FX#Mzjs^X)>3QQeALTv=9};J8V_Z0r>Jdz0
z6k5js=6hHxc`!_P#{meovPtI2wck4_qyY{ka@4e<(OUfyScGmpN-QU`>)DeU2oJ;K
zq<dtu`NM{92?-fCg*46uu6^YN79(l1r0ea~-O=J?URLn?)nFN>X!~A8i+=rqQ`yfR
zcmnu7EPTHfY4pWi7z#NwIM59%K8HJjZ}YSby$`N87jB!9<>3eMy6`J;`pQ#?)Fnqz
z4|pmE-vSzmiuN-W&a`m<k=76<WbU`e6-v957g+dN{e1LBx#~nnz2m}5*di;2%JWjX
zV-Lgs+X~`AF0f1m1+2%U=KMA^uE~`mEi!LhgH<ck1+ixLv)v-rc+#dPS=ww}y-4Gm
zXCAskweL<oH1}B)nsBc)Em^$FJG@%gU1}z;ALKc!N$sM0Gqg7Rm^2syGUl&9Y$p%$
zAH2gTY#gB5pO9kWt(pleJs{W$Ik3ryeVi+ryld%m$ZGinZpKJ|M`=~}(Zo>)yVFFh
zyzoq9IiP8SkCR^9(|n{}GhIhx*l)&|B&ugV<Dx}R`RZ*skd7jEP#(F9pkaSbWrE`2
zV&d$C;W%6!+%ES!>kAutv?<f8FDCINhEKMK=HXSdEUSJy6mQbYQ@|mzrQowNzDMxd
zO*ew|kT_ot*7T`yl_)a)TfjEDx+xh>m*H+yB-RB!yeJU>>f;xU{H8xLX$^dx?*v3`
z?XY2oDZMlNldYD5>*W*`r5#v4kG>ZpZbwA!eTFRk!Gf5i;D%&pIKZo)vO57diaHec
zI3ISAgRXG;-lbP9>-NnVOc%d>Yx=^#Toz6Nn$^^%_#>|#mi#1LdvS%>i+Xg>D5svY
zhkhjUde!?K_F8?9Cxd&&9)UTZJk0|q#u)z@eSu>0_w4~Lh~f-buc!DetH^v4teJD@
zj86dWhxr{}B6yI*YfKu2dl^Nqk2h3s1X1qD=tn2p2}D*vm6bnVRu^!if51<k%&XgN
z<&_RW4?TEQsdC<tU#UTaa;5&WWW*FsUw+;TR<ST{k!W7&hjn_s*soMD8RM0VZ|B90
zJxVoM0tZw(r;^4>ylX>Q4e}B|)hi8^XZX&!=ULy<vqf6_ypS6oM+=^X?R^IQ>fGNE
z(CATfc-xA!ZXmZOP!{!m_j7TX$sUclUgOHQCDgqTc9GSm76ajZzUL`r<4#e<%eN-W
zS~|-I$6n2?cr!@t41BY5g*xdVPp6Y2I4)Yqhj`M#>osChZxa8ogcuBjmfb=eD)XZb
zmUEu;et_k=*ytCrx!jhoIvv}et(#ueD>k=1=|>fnzZ=X9;1h*!T@B}%@b8_2^Tf&S
zc4(W4Mwh)~w;bc$_4I1Cge2K*X=+Obus_eg+(O?=pvv_5MIqE#$NLs!MVrtP6nXfO
zes5-v_(uJyt&4KZyS+CL0wZVLBaR;exit=wjAQ$;Ub7W?a?`g3ube;SDxgL)(W+UT
z9d6F|aM%<<?c#~c_+2^Ki0BBo!LohsHg=8ShqXN!Em<#qHBT-1nQXpc*Rr4zk#L1p
z^q<9Jdcu_w@XE{wZjAO;&9Go{fv1~l{zNZ+F~rDkd{!H`BFz{`GmER6v9A6Nym_*K
zyz`e9a~eohyhyin3&fUdSjQ)u9`brjo<C4g%Ug{fp4VnI!^;vZqscg1Va%kKD-rdH
zz+nLj8Y7QRPFiDXX6Xi<Zh6Z2QCnUrifmkmvnG5*0pXDdf)jS+!io;pyrSgXMct8q
zuPxWK*yndBL#KzRXJ6Ck%WM5<(Ra`Ha(-dibk<QTq+P@1w#sroCP#D@m9jj`?oyeQ
z8(v<N!+t&cX91%Cc?Ek85mJ2ufUSlOY5&NlGRNIKlK4`|(#Tx7Rx<O#sP8~qRA?nj
zra2@&?cyq&V<($xH?zKN*xWYpy^+5I3hEQ1_8*g?_!B>C61lHA52G^%Cl_cPQ}K}U
z@Z5Or#)n=co%tuUGiW0Kda@5#hqA)orTkhMAreLF4{!(ElfC6+!^i2nQTn2{^=;tH
zQ~iptPiNEzs}tkG2bT&B_^`}}nF$X&8yEC;b~EfUVjR?gbOx=&f2Ff-n4s`{!_3hL
zU#G(Zw&<OysMF*6el2jcn)Kl=*TC_l(QTha5!Q@3D2W8;h7CMPRQ;m|la3boSn=ld
z#{BwA-fOuMI6=)d1d3GLZ$=yN6ym)?c1Mn42+?1uDC1zha*^9N5HIj}W_&@Ue8kUV
z?2kIS!5!v@d7G6q@|M$3jh9pSFNJp|PYXU|jhtqJ&Xi+G-jeOP$Z?`n3@lx{T$H}V
zZaFv=?I&A5dVhkp=de~XbZ^_?NE>v7+dTPe%^9QH=alf28o59=M&p!qWIubbce+YP
z8Yt-mLahg_GERH5ylA`G3i4`uQp^C-K!zZ>sM|!wUYcw(w3luz>K~T5Tnzpdle=8J
zYWc1E!~wsnEBoKq@#;lsntEx#TN2HCvGe=1@33p&@e^^bM?x$Df)9!EA6Z-}c+=bp
zUJa=hSo@x)kKHk(Y^13}dhUq^zk@$RhLxgJi7=0KQW<t{v~>50r@#a)ycZdLP?t6H
z9eGwk-vvX11ff9Ds~z;+Gt3V)*y_FO24%<bz5w^p^(+fNf9D!&-kRuRaQ4ACiCB;k
zi<O?G2Z7>WY$xS@+uh8u-?~N8RFRlhF@7|b?U41+9=mMU9fH)e7%5_)X1FAws58j@
zCXUQw{;5k_>1mt8{LH=q*REvC=~H0U@>J)AWXvJUzDR=y-C<U?STznn<-8jgDy|is
zEgZFqmTgq`(j?CA>8pZ>HN#H<4$p*4%7u}*M1ars#fQW7a*u#lSam;YJ1(R>u9kyB
zEC3$<IT{T@OB0k^qcthbPL1{Yjb9<NKUh9-#}uo7BKj980)Z25ULMxlfAYjXNg0#w
zr~3nMjPF5hXjBE0FNT@AJcZG)@bNmxFEP`<h^|~q|9C?g^zOJLwt@O|348O`N2kJs
zv=JHc>%bqAfX<w#h7+x%G!?KhH_?uUBK4!Li2%~YFWk}IBf~SX)hNYN%VC~*Dpx;T
zY$JtvaOgq5<{@$Fr#&~TE8p*h8;o$3EaCi1B}+PIFe-mUa%w3~>!G+vJlv51%|!YU
z5FJwN+)%oR7!Aa22m^ycQ1tgB?|cwQj62t<lUw*ZZ0&<*89lZv*O!DSoX7Da3rmnb
z!-!QAzZ=6DQB*JoWV62B6zXgvoX%D%<GA`2{iA&5Ymxw>8THIidDX~3yzwJo&e`Be
zn%s=NP(97oyC)K&o>UznSvaH}<#THScP}PA1(Up4ho5{eP#+eY=Eg6uul^ga6LwgZ
z{4;YyjK1<qJc@EvF?E&91`lu+>Zsp2{f3O(ypCybkqk0x?Z^SvO@39wiE?4reOun4
z{pi@@Cj_Ugwl9P*fc0uzP3DP=zy+!yM7JAV4|UN--(-T#^{#x*%{hQIO1^JU(VXxi
zqc3tMlahT~3ukj*uT1`|LFG~UxuCO<VS(eFYWH0j?dm~@&GA>dfEG2+_Y1>f79ZU)
zLkmjdoC2c0E}`K<edMbIZi`=z8vI>4H_+)P{@uLk?#_+%0lP6&nbsNlxBC4bhZGeF
z&}CQlEknc%EzCnb?4l3+g87T4p_E_XOsSQ!qX$c)QLiz91F;!_KoV`kH26(kcL%KV
zId=$k_RSB%Z#G-4IcKj^9GEt$e0C->>#2CQ=Ktc#zz^>aO2mJ1I{!Gy=@5e%;2-L$
z;&}V=eL4%t%>;YB^v+u~{=b#^5B&5uFF{25i35phJbAlD)$IZU+t;VaBoE=F$}(Gz
z-2LGDUukfX%fLi%)hUc4Gr`$_<bJirztvE8Jy)PIgt$C|m;X!NYqkIDAOFoNdbyHr
z>2`z5zV}{Kukh_peni$pc1pgrm2r=uQpTqUE$XlTQF#78R|v1kz?Vd9o$xPK^#A#p
zzkdmbIpjD<|L<q|w`mhF)A^&Ku&K!ZadiF<XXWn~j08H3ro9HNWwt~Pj=H|y&YMnp
zJ2;b`BhNq5`W=Oj?!T@WZquupD0o`)-29?+Z1Cw6s_PAf>y2<yQ+$H%RRXJ>odb<u
zB75pa>r~@!?;Z5~m<0RrBZt2|wLt8?Ts8?&1DV~rxO8g~Nqx<aRHnoHDTd<czt|yv
zKg9pgYUJdkD)=MNzf^yHgZx&}hC<rYPYx9Aq98GnO-SH~{9!KHD(urbLUi)6{wp%X
zHAAw|7xI7fxctwFfT^Aht>B->VRsQF=v%-48pFOs=62U>C)s<_{%rChbkPtrU*YJ(
zW+svn4!>ip$*vaTlsVKM-b?01b*^P)m&88au>+-5Day&`x%#oeU6mVjTf^~2eqryG
zLrK{`6!YJ2V*rRQ>YRZSTH?ZiJoeFrCQV<-zcdSH+Ot63S;VLpxTbZ$tRzS+t}zSk
zoTWNX+Rz7Eiq5jX8E_~f%}#AXaa4+`et+8!?oRNg%|lUAf@b^FWpS9d-I2*RjsI(%
z*`tI-?^Y7e>cw=JjDA+*)}ru5?jtZ1<hTWvqu?+gne_1K<9ms!22j~G5QU4PTF7L5
z=jMD~af2*$*`T@lrA_o}Z@=A|-Ma56wQN59?=SD=)_MVyy7))3@PFU>e}1O9-J9~#
zs&QVt10{}KCT1Iz@<9U0=%A~xu93(DoiBqgr1KWUklCKbA&q=85{tIi#8j(-e8Uov
zs<xu_+7C0nS*ZvnW7Di3h^ijchK+MK*CoTnlF(Sl0ST8muNeH_FZ)}U|F&id`D|ze
zZ_t@rI)27algv_>%&;jld_x%y?ngovb9T(M$r=o>v_TJgO~?d26(rI$gSnUZIv)#;
zuRh;Oi%K5_Ou3Oqp<V``(y;f4Ct-}eO7`AeOaA^P+Klzj+RoqB1{bYsfD|{0W#30O
z_jbPEL$UxKBWne{7k;84u1mt;o@bg3$FklWh*iTf=<cIRavdrk*P}&WT6iM48WJPI
zchpLiIREZV(f_URaLtd$^*3c3J>GFW{Fq~_>xq7!)jr<29K{j{AKekbHsaWZ!I$8;
z8j2-yujT%*B=;;Rg4d<xH}p}Cma@zAR*tUw!1(`sc6xlJkCMhlRyW8fP6&bK^&c;T
zz(^Ny59k{<!`F9woHFe}PUFE5{634b{ZP7DRD#-d#3=6!tr+(1d0F>>Q~uXB(l?^~
zk}}Kwr&qjNvcH1Nk6kTnhrBeW5+aoKi&jg1ehwC8E&!l;l{Y%})>UyBWcy^y<3R<p
z_0!i}U+9v?vU#JNPc}T$J;E_>qt0P_)ZJEM?#(x8_aVXhdS8Ktizd~G-{*C5;rJB#
zF5A|!y0SuNREl}XL{B&LQF?$!d!SCEWwW8qysJA+V%XE2?As^G@}5|`t=P3e*v&LF
zSnPzrGHC(L&bS#b9xKI01}4)Ua0*U-G9oARwmuX$q}qW}X0{NR!<!?0ib6jT)tp_M
zPi*+YIeO2gn&mRtm|95kICGJF4rdSq&(^YU??F*C4`nuzE%ceXxCNdx5xGz^S6E6(
zer$V^frLXiTBsjZSSe8hwCsEwPqbzpxY9DWx_E46_`cYjPAY7Pf&uGzrdjtqtx;Hb
z89v>ILuLA(-a_*oMrm}2_6`!(>A}yZ0yoC7(PB2Te$Yn{p$jXsulISN&@zYmb9zmE
z#G@P!k=yMe33~ZA9107$4NkGfKxR^u<U~(o48P?-f!886+5`4sr5`6e$8LoHjFZ)D
ziIR9;WIr@Qo<S3O*RxJ+3e?_;U_1EVz7$8!#5C89$l;((0%WYW%R~nk5W+rM%*Lx_
zFUzC=x?2v`<fLD!cgx2DvTH5x4DNIQQ$S1?hN5@4{U{lQOHNeU1wf;a1pT`nJ$0w~
zuH$Pe^2!lEsrn0xE$1ak`~EibRFybe{hLUYgrpkhUkZw6u9{Q+>l2$}J~M#YJ4q{T
zE3#Pvg}ChDXy5NXMN~)SW674)6Be>oI;+XKfj$R(80plK*)1}#$8xAj&XsIM01cu6
z7Dce*l5yH6;j4x=3-M3FOy6&jx2cMj8&-tYY0Lcrt0g@~s+`IBGJb@1?m(BbRoAJV
zsel_}>%LI_9PlX}qHw6={_?tI46&P7x%HUOKphrehH8j^bA7|{8bE7ggyYD^$=!UL
z`M-=v^X127lN;))=#M>ZV~Qg(J|%diiRexbg`AEOn75+mZ;IrQl><7S7NKOgx?nFS
z<|@!WgT$dy+aF}Rd_G~Z58<|RF3Q>ul@cMezWSVn&@~m>hLo>8Aq-3IYi5=y{1yT9
z?3al%CqpFbcUfv#;O|bKu->j0l2wGWw}*fk)<<QbSE&p3!4sV@fW3+T4R*jprkyc(
zI7{(^@Q-jooGeTjJ-%J?DCj)1hC2OqlQg9)ZQw+v`NR_UdZTlEHBEQUSH@zAlf+d!
zy#lfCzIqN8FJT!VamvBV&j)c(Gtg$?(9mq`>K2qmlV|-xrU*F5wbkpGC`^&6zFoUT
zw&}ak_u>q=thv|#6dP+DbmGxX*G0QjJ>%g^5NcKtQ)piC;fddgONc-Ym2pmhJ5zll
zQfF1iQF0Vl_`TL+pgx9K=kx|pCqo6fhWEp2g7RC%jd{(aOkk9=|4`b&CB%e7$pJ+z
z@fZHeX>tP9j`J_XE`9RSJr)UprR;BFmu!Wxf;?cF&4ppMvw_9JTg>0|v;X)h>P6no
zv%7~7eEHU}L(U%s7niu;as;oBhd>c$SBT(1k$DOwEKpwJbhyPlKVePzbtv$VP2-zK
zd)<lt-fQGf${*ErV8r)4*-Y&C>iyhkXYXjzU?6ogs>!26jH^t;2rQAu?QGFyN&-n3
z4mY)O;j;utKYR#hj-G6@RI@Z*(1h;(t}>dTV$nOdj^3XjUsT0<kMlslLho4v;0G&?
zW9QG;TO)2&BhfNwQu8vueX0bSD;8bpn6;)P2x8;}G)M+aisMjYE)sgt$3rv#OMJDi
z);bkP+2T11ASdp4eS`Zwp+cg>`ywA@Fj*~aEvbti^Iei1(ATQlQ8K(*qvghXK9L2l
z&J`}*OlNSV`p1coU!9BiVA|aB``4b&z6y&vnS!a3RC&q02;;FB#j{0acY+F{YQ)K4
z($z@x-xp^S*L_5p`whrzInSRJQR=ZJ0S{<t>_X2f*GLSyI7anp$F32ngfIJLgzcN8
z^`r-~u5k8x1*ot_<UfhSd(z7*BKVnRu8w3DeFv(}$<LX$y`ZDevg{=3#*)4$#Y?;~
zavH`cg3imv7UA+2+36~tM-@2KPg<!5K?5F_>}Lmf7lR)fjv+&@{gecQ4rG}7(P4ny
z66k9uMEaDFI?h|N&)V_M&JvAxuZ^Vt;Y`1!cSu;#wj-giv|FnAw_&srC}+tS5Q9UX
z=e=HL$y+%Ewxnl2pc6@OeERg|I|D7-X{_ZZrXOL<w~9Nw@nKQ}ge!4BM`x8ovzALz
zE@gzMeO!#4s(J?ogmPq~)SJ@`j3-VQ^qnX2My6hZ4o~sZp<6cFQl#6?8}#F^nW;T|
zZtqO3V)i4#P&Bvh)MK<>UB78?gQ#w`JHKkVI@Q;YK|6zmmM(uPalW4jiQIW2PLisu
zPf-^;$OFMs>gavGHE{oRU-Cu9Nm<T-40u`AddTg8h=TEEn`pYu)+467P9lL_cE(w=
zF4XQ3V~%DQP=24?qEBH7yZsm`Jhdwt_+>q=!EK%Cr_LRr8x>lu?~}o%yMY^YGiu!j
zVrJ@rS*3yq-ss$z{_L|H#V^c`4n0+P0NK6#N)^&KnD8@j&onB$1YsbGdNejkDlpl}
z-wgT3;#s;oM<D7zS(u|I@5nejA-w!V1Zhy@SmG98@TQ9u=-3H{rH^q_Fe2f1`smN9
zb~gzGpbngct$0{2fzJ$n{FW8kH?7rw$|~ujUiuAZs)zdL(>o$xYdmSmpi@yoUnC9b
zG}*n+$D(ZWS@xhU+nHmNaobH*#(QtfP4}@Z?H=B;Wp{04MJ5wnvscD&TmEwYswUqr
zX8oW5|LKtHa4@qB6Ft7_ij5qoaw4a$HG0<WL`Y;N#AB^_Jsp?7AY&K9z0l>w<j&5i
z`ewQ)rW=ky7qKD-g;3%0HL+U?$Eb_6p^I{Lxr-TYa6#NSSM~w@cCbX>=*f`SKzN*)
z2*~<cdIT@RZjzA>X-mWW*HKWSbl?V~9GB6Y1~$*9P{l<+x!vqBx>M$)^$=)4Y!;7F
zCa@=gHa<xOwm*YbyVxy{Oy&J7y{;VZzVF5&%zZtg<Z>kYYtPSTH<>C4Ou`(Kz2s9_
z5qiFHyr+#|B9mGeZFb}LX{BMGPyAOxqrGHA2IKaEOhSbQNH`1(#yucuoK$TuhSAND
zx(swRK>FC?LWlQG?ib`jueD|6PiKawDrkFs63>lBV@tb@i?V(wX6q@z#AcrjFz+@B
zXD!4Pm#h5YNAdkdWIirO-7F^JF1bwLp=!ulkD;VCG2Y`2xfQ}?Mj~(pBN(>b5PueZ
z6FMNm7Qy8yoaVE!b6OtWKcZy~sc#P{@@ebW4}0<b%M5ZrW+I1Nt3ocm$mzD+{9s{+
zI6+@9(NwIuE<hW^!vG$~>HoF{JM6%t5?Nx_m&>l<P`XA&7}5Y#bak{=e=tZ_?2Xdk
ziGfdZlH4AGx^!g#T$r5d$8)SB+ZFE<tZiGyOR-25mo8Cn)7yi%`Y&YDvk*6Xh!}zR
zt8R)Qt!D`vH5xpdtMA6T@jVb~x#L@KKNh^Nlyy;9yItw!`^v<SiN&G5O{S4ZO(2EF
zPYTNPpSa0TK2&716KV_n!V}2_!Q!2l?Y{SSGHTIZT5nWh85U3ZAvwt1Sv7ReZ07fJ
zpY505QWc4rI5q*54Qn>7>`_Po(I|{mrctv;IKY|yh#kPW(lbI0Z$lULyf>3X=Ank2
zm8>aJAsYD(jHN5^ee^-dAQ#oUpxu*Jha-@ijke+`k^jnL_#tLJyEfjofg)v}uPYH^
z@M*NbuWB-r@3`Sh3{ooNCPjQ@Cm8-A_8FzRzGKdMgvQxq_<n+Y!!{LO_E{E0v;u0P
z2%w@K7*ID8S@b~SI?{qN2SEl<P9htOd)_g~KXW-hE7&xh`1y>NE!04Fp$-pH8l4q0
z>&Pr@PCuZo++OQa)Z}Bi>XV-p3)mM;Zx*R((^8d4xqMkCCN>hrPQMiWq|Sdm0(T)P
zM{DRjhFm>jems0(4xpwOG>$OR1i$J_1NWARrsMKL2{Jr+QTTm2A*~!;LQ6`w<TU2u
zk>C0d6czbFl_~&R7<LR^Nnbw%rf)H}v^Pq1o%D%S_=h~@XaZ@4P9QfK?3bQO*4>p{
zU+_?&^^UcMegjqE#z)B9=SrJX>XIH>oW~gueCN5*?JVs7{2Z?2MhkI!Wj22*IlR|{
zzT-#=(8mypO#XBAnAzuzD_K@*CJI7SBU|>*I>ui8bWcjIMVrJrb;Kb&3=zDq`e82<
z-w|}+Jry~1)A+PNGd-Mia>Uph-1;TY+j83z>YWWg^(N<qaD2SdKqlq}WObh?A3v=(
z#9-`XZ{Xr%W^CK^jTS%lqhvW0qbU-ukra1X^oe+9bVzHo7Y=exOAI6^lD>vuapPpW
zY)hcGlD}8gCF4w2x0WnuL2GdAyIT+igLYVJL0EPzSITM&T_!Lp*0+Ps8W6RJR{BWK
z>!|Px{KGA((ToMPrwQlM3ijAeEg*l!QpZw|x;xOp_*EAGG~x}pay<3Xm|V;rUdLme
z?+KpQ$R=K@Uj$l)KZ3?XustN(lgZ4r#^8#W`P$XsKO511r)H#-5hdvo@n-EmPZZlL
zh|roFShe0g7?94rAG9k~0hY(#<rKN*0gH9I0J(l4P9T4XyU2KfG>c2exRxsHRLQD3
zp5Y@FkqaVjNTT#&PggIwA+Ik}1n+#ugVgpC)Lv8Mme~g}?Zz{BbL#~q9g+x@Cxeq^
zw`7h<udwva$LFHeY+ouDRD08yQY<wMgAe1fy<)ERG-+n9qOz}KBiLIcnW^r;s{IV<
zSilxiRH;b}&$G6*hV=N+2J{o>HS@aRs{L1K-cGjaGN-SBroK8(1bM<P6fPWmFLt6L
zh3u2B-a#dEQqW0<Lu7<$?UUn=J@t){H`j&@p>a8evuLDL>n(VO;fjv21sY<JH%k<F
zupDZa!0QViDoIFIGtzo)dKw&_{?z@m3l|Pl(N$J#$R^zT(59{z0SU*e-^$jN9}#JT
z*4>%gP#7Yy{l^P5x4MtQeYjFQeV*Wa28S6hF5&SU2MKlv?+Fr#%o5RD#^d>w<Jw{6
zw}(n7SD#?+Q5SG5@!g0NMmqtv-$K8w2mh>lS@-u{0F{yw0d1u1xXwDe+_@Dte%lsM
z0q6-7FK5`9hL84DB<N`!^(;?8h5vdOJo_Y~d8CsXWpk^NOW{1rvrkbGIBhxV9oF`A
z>cDxM8oHeGSww0DqZTCW^6b3B!+XbU>q&NAl%hHW+%*s?aPK%HWZ&^J$mF#MZTEKs
zu2k$zGZk0QYIS{0;~8RR07sHLY5E;TR-fUM{0#S)yV=Yjsl?SmpeT=40WjlMwzVrQ
z4D>sERQMt^#y67ledya+Bk_$_-nIXjmzJmpgU@K*ew^xj->%1`BmE|XT;$t8n*OYY
zk!)j3ta^)Q{%&-o+tO*@T7Z}^#O+F<XY2+o#9g)duBV^x5@GE&{$QbNj9HFM(nqJz
zrNSG_2F70PDt6>@l_;nN_2h<SR{By;jn4PI1_n4o_SRL&4I#BHp<k1v<2KEZ8(NIi
zAlM+^FhJ`vZEFqw7^rtF;v!p}Vm0WBX}u+ajdC^hiltjorqxM^gG5?*l=dSNmax3A
zAS}+?2~Ms5GnmwLM~9{i9*J36&Y0Z}Z8NJsXD7H}i@%u^RG|I7aV^|U6Cx0b`jS?z
z1tXum-3X+$HGj?Dz4Xx@^MXjCMX;N*3lMo>26n&eX`};(=Hu+evpExVG`zmmcFJX|
z81Ruvv~IL=5kGfl-WxnxHj?s#81e3dncEo(3Hx&?BX_080-KIb^}|vy%?)2CenaES
zXb}5#%~Z&~)u3Fia_U+hITV`%gt_~$85)-ZIo^frbKVND=r6R84&F|Lp0dV25OQd~
z#FYCeXv&?B7(6+v2)IhrR5r5WWk%~GW0pj|4~jrk9YWyevaj?Mi?kkGH<1qA3iBYi
zWv;bc2onQ;T*4J6nWLOTZo-X^dlf(NBTm;h-LhD9G%bt>WEX3C6;9jFiZ6sd7;eq&
z!c>br2OpIDez1$%8!i3{<EW4{n`&sFEBx1U=6)goP=Y#kmxV-b+T*Jf)=XA!viB#H
z=(8979Kn7*@+|29gQbnsua1!-C!<ju`|U!jrkDqA&i+D9C-G+jLtmoe#y~s(ur%e=
z7{+7625l+T9ynrX>6AAk$y(!N6z&2IbyFWm|0Br!9dP<PB42Bs%|XLihVK{-iMBbQ
zlR0)ddoteAda{q0;>60!2<xBopCVp&Y~4chvP+}|{VzZAK&+c(vi!Z;OIE8_c<`T3
zBndGZo<^M5ga|M4)4VI~K4kVFJZIYugtBL|XH!Z|2Ti!&2%RTi6NJ{vEJO96Lpg%d
z^}$ZIE}?E8X`W=@ZMtt^<!CASnY}7re}UNt$n&M*1@xKx5(OXhjmw`L>-XuGW3Rsn
zyOG<j_aJK$Jonh%)fAL$=%qw{`9ajb&1%JonYy30pVFHNf|ZSxV=vX8M~JJp`e{_8
zz?YrInH!?N3Y;TaGXaP4AVd5o#{16~42}`wJy8cL!m`;DAfA%5r2eI%c1tI`$3fH(
zXG>8+m<x$TUM3r`5RRaImXy-<yOj#yczJhsG+ZQ7XsT2XEerW(_qQ1rdrmfF03ZPB
zR{I5RdvPYEXG<-wE);b{9khTT=}fAld%BZ267+lA!KPQUcnsNhvR6Tds2ND+*O<&_
z%F|5A$e5BVx`2mgiOOAbo@Z83SX&u~OQ!`GBAinT-M2vpdvJFkP(VS{-xcGhA8S$&
z;cy=q1q>SGj$;dD=orDcN8;2L#>LRT)shIm>E{}<SX@5td8C9<P3v3D_7IIudS|!1
zOR*<q&d1)(d}Tt_&d7*0za|vb<0){)GbuCxaDNv9`A1CS8V!#>dD<B=GNdPW@QhQ>
zkS2~rC!(QWHANl+%`D}hS?+<+ofqgpa#>&a5DngR$ErS!C5092cJwSR3ERQy!^?1I
z>3dw2GzpFl*o=g|bGwHAYw4QU@gO!HhC<7nR_!lk(L%Sd8Fh|NDB-})qi%iXmz$3}
zpROx%hHGf~l=L&dUMF@nU=~Yu=b5!opEA_AT$vXDIWR+POZGUYq8~b5uLoa#79%Q3
zg56J=OA??Svh1}9!HO60nE}=kt^2SqvW}_}J&!KQ+Cs;(iFVnl)5wDZVoD7SmF%_L
zF68kLX_4aPKoVuJjI=GMxj~V0rlnV;xhB>FYp}%2zVCSTA)1ib?i?y@2N~LciB%O(
zviBsStRs)xW;7V)dO}M=TN?fn=)Q=90K@mvH=QSxE?noV@DhvZh<NYDEi0zGGZ>2K
zTz8?8ImR;*>UVx-xQ@8Wm({b0zD|~Ji?V5nY%a+>IT8rQycoo4SZN&+IHHMOK~#^p
zzG)V6l4jPOA=!GVOPacqzP}G7Uuplp(|KRfp$^hU9VzhRFfAMGM1UwN9UTZ>wZzBE
z%UUJh{tErJW^!1`uWli&EneG*ezM$zEK<NIrG6&^$gf<JnQdP?0Tfm`Yq_rkEthHE
z__ZnCYn>pI_C?RqDEsPf27>BJ1197<p=npH4MNzr`GT6TV89BMYcOYHwVcTOHeDXn
zvxUILn^}@?n=TC#hfrN?)MT0r=F((GBe=zw^&vo*=ES9q-G2$e7x}%FCoM`-FF81-
z%wQWcUdJ;Z3K&olV%o84eR5k1IMq+*406J@Ll>uhU)<Gn(j9*_7p?i=-{W>dljn#%
zA?-`*Vo=mf3wmFYuj8!2odXW(YV=TBor2X>)mS~;J(+E|Da9G}wn#qEoN=zlfNco+
zOgKFQ6URpg{y>11gElP@NKI!74auXhroyjS9%J$yPqMD~BpF(+_&7=!|B50@UX&62
z6JRURLvg3mWOf`@AyM-=L^+{S@Z&UidHn&=_sM%)HNbR7Hby*NjaRV7^(1WN9(PG7
z0~Y<{mvBwaUMO=wn<iLL9dHjGMtg=A;`Y*#3g0NL?+sHVlB3yEs;9CJt3c36v7i*J
z_nGE?FL|98whMRsXgv__JYDKq;el{1$hz??KwWy!8)~L#z()VDj4ITG_vz7#!AZ<A
z<*>V-I<WeS^CJWEWaUBr{)*zoDF@s&>Az4{JXwTp0F<|$?VD}CvI1_qLTx3##PB)D
zSYEx+r18|S!!jV6L-W$q7woTk9zRch^UN^m3mJT4Z@|%Aus+rfPv=K;)|w}tQ4z5I
zoBc1?r-yj+OCawDlQDTRN%#qL^b%};fVqe>=yr^!+&%{1)GU<vQ78?xHCt`{mJk9a
zyjV!rI0Rq3Oa8M^B(+HI$eucM#BI2FRjmt+&bg9mG<M>5;oT<Hl$=HLKc`*TBi$iV
zC=_qc8wc$_<=C$cYzp0I0f~&!BHWVt1(ZJp5#T+Ol-OwYOuU&Y03~qt?LZZJp?bJ5
zq?Z`@%)z}W^&L3~@#>B1*`u?UBBPTScH34qsNxudcqj#hll%z{ZfD>5cD}G;>!^#Y
zqUGqtTW5}vo!mdk2gce-qBU{UmMgd2GLmMNPOc-6JKk|*e7_G2bu)SN@VD*^S`UxN
zh{wyBG__1<9yII{PQZ2LSok5&(zl>(JBpWwNL7<Q_JF%dyR;Lu?hRy){<<L|$Kx<+
zslTuCfJ80*GstrZ)d0Z&)fYvCdKW-dF`Oo9((t`YQhf%u&V=!YM#J@}0nfeuO2EdJ
zCgxGF1JLr}QC=L5!kYcCh+q5zg}p<E3WC%=FA+Q3;|X1cOjKOpF}K&Cz;PE>=0hNt
zj3R%#-h~oR+tX=%GB0c|rugMm#v6u8k?JknuShxiK{jYnuiC;b6AxquzM1H6rh%Fs
zK7Gomr&`MUOi0%uA8U#Q@a3?AzMQ5~22ej=M8vWykg=4ydCivry~qEG=S!KYe7!nO
z(?s0FaBg4~<Cy;_ZIz4%pQgtTvFyQ<sEeR|D}BmFUm)czt$Xw?M^q|B<Ham;Y+1Gm
zS^i*Q{MzYHz>V#C_;+~H9oO`7eif>ilh0^>4-z}2(bG~+V0AM1#C=8FI8bG`U)ayu
zDSRvmjeGU_)7dZ))L(Pd|La2xEMW5uJm4Dj<&>=t9u2w0u(J=eM>b#5Pj5t~U~H#b
z(`J6$-tag?Hm9MLg-QPka&4d)tdhhY3|3k~Dz$kL?q%Ob9lROgUF^oJ{%1ux`;urS
za7gfu&zU`l8g<j1<CbG$0F^5l*?==62k;=6?tZ0z7H@ul?Mo<v=p_-4SG$C#20ID}
zXT<Th2wTPzAs$4-_m;X@^dN{|_9d0cBfd3Yxu?mD`q2Y=W7W6QpwP=@ApEJK8(9KA
z`O)kA3|nUbVLo3|<T4p8ddIEt{;5EEt<~826RT>+0B@YZ7K65Oh{5Nm^=W<M8*zIL
zz?Suiqq`0oK(8_4x=IjIibNfv%1i1c_M-x40Rsex$L%}H21nSK^?UJD*yWfoPV$A)
zYq#-)ZHkc=Jegq&pXXIW%i{YtmNEYvg8Ob-KqPFQ2PryegtasSU_%*!b-ICebhs3D
z?a4r;3z(~fILJl^;`-yNPEb3J9E?!sG@|zl4SB=o7?>Qi%+^9+zaWBK{bw>`;-&U8
zZMZft`51du=;eE6_s{SKN5s)tT0?6{1W}GY{wkHyW_lOMg`7|6mOj9Z<0uORe!}Nj
zz8lIl^>b0QbW9%V;`9t#Xz43ks57AZ)j)KZnQxI_pX(FI&cEFC<Tv?~e}I>7(NC6{
zI*x=b(fG=Ux?r;Hp#oTyC#aKCzV3q?LcM9>u=HgljSw(`Ir!YX6_XlM?fJREu#)*o
zxJ|h8mTx_P?IwQN#Cj4S+~vIZ8@d44b|+`QJIdgu*!$Po0)C0ztKW;ckhuVu@0zgR
zBWzw>(_wVe3$y!XZ(R8e5S;9}cqR@uU>1R89El_AWg#0+U5uI#dC8r%cvPp2%E2O2
z+=!1d-Wjsd{UtnBl9G|>d9i_hK4G;B=Db*g!Bk+OV<Gav;1H*64b2d(o#FMkCz4Vr
zk_klGG53@N<q|GdU?gf3^9R*C0zO?-x{ndmwS_p7lh!xS;$10_Hqj<Ai<~Vc4$-Zu
z&DNnK@EDXi{=%GEr=4r9%>93HtNG^wntjQT>Z2J44VhL#feJXzZ0TPM@WrnclAVvs
zK3rPUv214#s4h1&e{4AE$GZ*9*2f-x-Nn%q^zAHhnS#{zNOY-#Y77frEom~j6DiFg
zl}_d!C&Lgbtb81ZQygX2&VCBj$g#KO$^-%_;(?*CdpeEh_p)hj>5?g#>qqX)uo`$p
zoy{bjGD&O+wG2%S;1e)3GMk>}*=CX|S5Kc=XYX?cerd>tIcWNXJ~uV(Y(%^W2lF3c
zOi>2%Hr~2>`D^u+_CC9EbpMriGc9-r={e=(^hagJvM`DE?yAxI!#Sc|0Q^!y^-VtH
z*hS>+RcL{x<X0)<9}&FzmBv|%PcDdV(aC6Quiv~_AFMcBsw^UNAce*5wwX1iu#L(M
za2fb{rfj$163h_Uxk5xl#!=0nmp@JR(m#qVk?|ZTRNj#*EAj(k&CJD<ms>T#Q0UH~
zZPSXF(tA2n`X8J|B2r&%WaEO>6-6=zI)w=gJP*HzC#!3h*EzeZ`C3&Ag>vk***TeY
zxE5IB%1dl?d&z$j>Mq{Q)8MhA+>5oQ06+1(iyXBEn4DqSz)m+~{e~;?bS@7yC3ixl
z$}5|zx=*^fxBGrP*1Iz~)(~&DCQ^4km6p3S=oNpl4U*RFrS3)NK#(q9BZ+#Km#ajm
zpF8ko;avw#9%-X}nDl0L*d2szkb_POIfzcHXW`L#l-PO>K40gisXfJc^y~rrSkYWk
zvQ+HV)Z7Fs%e)C;7seCr(@%?=5g3RZi|Zr)kXEs45$J!!0kE{-zN{6WvO9Z2{!JM0
zeN3(sjlM73iG6wuW%MZ%k0G(RTTJx*!uS4JqvuF?QP(RAC3hBS+^2KfjpK}NFL_A)
z+U>QD#|N|zIn(d5?6nF~^GmA8vwWQ^>gHWBl%x;|V?H7?(jj0d8E-N{Eu1V4=!68<
zP@3%eSFXJ-i5zw(-NMd*^b(T>#EbmJX>q*ujHju#U3-u6drMB!Mz-u~%F}VYXgz=n
zS_=Nz2II$;^ml4@oYn?BV5{`5lA(bV4swX7VQ}~m{BEGjgVnY)XRjC6E7^qVD=z~o
zkgp(N0@U%AOl?sF*9+WU(&xir<aa+A0^CXEX`>+#qY;%vxjY>N`!p8&>ro$*6ycY&
z$&)!4aYm^}mWS`UsT0x=9uRQfNOQU570tZS^(Q*$LPPk;bf-jEPqW{U%OWc~_r)Ac
zKub$@eFF|`FZkjPi0xoh(5xG{6=I6JjB-<ZA2QGYlf%n(jC@%Tmss}gir+e;!Llo+
zsVX9g)BeQ$%r=9hj22`_OMOz7%+4@T+}G%LXH^*HoLJ@0H!66~mr?W5aMg|ks4)**
z6fnA=ixdSY8#1xthA)v*Q;D<83B6}%VliD<pK)G1b!@YHU4C<9=9|vV;Yhttc|upO
z=hBQ0++TYyDIp?YYMa($m@=$_iqbZnyYh8FcK#jZhMjjbb9pmG+R3!w)r3XT|H$iK
zqND48RkTNiX|=G^Pw@?p4njf|dUCI+W<r07spEYDQ~XkK`C%*?k#?Y=%X*TRM)Ynx
z;bP*bo57S+19IKcHdD8l-BnXq>R~uqVltA2*4I7vc7im}<m*sAvV}r6a^duD1By+b
z5r(q*ZfY6<f-_=4N$!QK>f5J@mS6J3!U*uetH>E(ozo?JZmrupt4Afh8w@VbPMzN5
zpP?rmfsZ^dClsk8Hk}V%|M)qxAb8blYAXK65<1ohJxq(&o$n&_UJq4d#<>637>+>o
z#I;(jh+s?V8}P(|NW}8Z4Z_MnZlPv$M|X;5VooukNm`+^88t|f{n$fLX%QmB`>r0w
zx_3O2<w<pHwR|__RI&9q5nd;>jQ*rx!O|l#+vqc4PT0?wt>t2yK2=Aj;XZ?;xO8U&
z9xC^i5D-r*01>(U+a@xZ)%DxXC~72=zQ;KhU&7agYBkB^S=9p>Bzt?wE_2_7UESJi
z_oNBpqI@QI4z%>{ym(Fz5bc>}1cpc-3<fY9TgXeuYHqohZjX&dOC)x6*s@f;-1r5q
zMtrN1b)x@eY5z7lu6AU@&jlxw77kG%zO0+q`^_S^g#@0}CA@baF+1{~M))nZn%zw7
zALg3nGX->&7Y?WFcsNZn@9=il(iCbc0bgFNsr1X|G5IQdv->SSyY#2)FGm}DErcQy
zcEs@=oeI#3n&LaC@I4&oIyPveH^$V*Gk>C*C)0IDGr=wXKaBtP%MLg>K7j9zoj_9e
z*Xc8Zl{D&5MLk7M>d%F0rv>ah1kNjIZ1=Cdf1yYla`j#sw2|FjY0WzCLtEnxjMsKk
zw>jdT?uombRIMkct_*M%?29oO&9=(xl2@7K?tTyRn>;Qixam7cO*;Fa1Mg@MD{cEM
zF(5qswEI=P%3sg2ozjtWGKO>Bw{1W)<=Vf|&aoB+9z4ULQu|*g_4gzD-#p_Q7)u@h
z0RC{h7G}i1LdmMur?PO5r?rOgNcFS*=jShJc6+4<?q^e{Pnpn<AFHi?Xq-RVU26BU
z<~1#HV5r8Y)8gE*lv!HPE{m-IX2N`1Bq=OZ*)*rcL(R#)WPN_l*<dfmv|QVaQJE!d
zVgey*FX$Gu4Dl_ojN2C#rl0Zi<lCj>P_63-z^L*Iw<qE!$8Mv>5morfV|P8;?fJ&e
z*|pWC50BcB@~Xz7*5tFZeGaxt){t<=M$y0aUYua&IXFdURbeo2P(X{aS(Yb*0Hh7e
z30fxk`Ikffd#(SHR5B-GocbvAwotAIL){n{W*2j=p|EjhaUKwsB#=Ti!?9LazC?b)
zN_mg0d*Rn@n{~jc5dS>$xM{m;+<<-!r{@JE;yMA6KVFHE&QOg<ORAs|eMrDfe%`RX
z5f+}Z$gn@L{cC6{^>IJZ%!h!@s9kud`t_}R8?Ah^g8Qd!ykE2e;jso~cBup8>Fpf3
z28D{}C=k`}V(eZT=ksB{?h#9*2IM$3gvoxgJov0ljHUtmvEvHk<>NNF5e%j>_f$(l
zHbd?-a5eSyG;nk2_4quH@^Y*(n2XJq6ZSEVH#qDsEf6QTetGZ2D?crVrr1p*XP@Oe
z7UiLHT@hB#>@ja1sucI{)9ODq$G8n$jC+J74mYlLpEc0QJCfzRbidsyCdsRq-c2ed
zDLjD5`xT)d@tdJ|S0)t0r)5M@)ua{xBB^|RU9h<IRT;4u?_M60<<!W75qt#QTfV{M
z4i?U)#M`jweTibtAz!Y%vbZ@47Dtbspx4&gx~I;rq4Fj2GkbZUINsxafhY-zpX9ut
z%kH8BjW23lg7yylduN{new3;7#i@=$A=abtRk}D8fX^*k3^<m^1-o~j)A`e&ABUjf
z(X^~~rm+di*(gXxk49hb@U(HX81yh=#|gDybZ-n_jAzC!r(+=hw*^#wBAns+)#lg3
z>4dLxt?T0Xqpwbt=^;mK`J4_Fr0(J-H=Gm2edAma<}k`7+^JIZL-H#Dk*8@-&rmoJ
z(;@wr>VqxiFP>Wkmp6Pi>wxXY7xW?B$U}|eSM*FDNK92fFIr|})!f^kH(L>=1v#9E
z4xJU(=CticfLb$eFKb~7GGtTpD|LT4v~wxrvuiQoH8ngY%fb1lYAS6k-N+-&4E9Q?
ze>l9~DC0lm_WNZ=Jn~_N(<8AjZ<ELiqe*j_t?KQ`@v~eU`u|??KmI%$4YA~EMf;xz
z;{Rbfe<PfKeD{BA(*OT~|CE=7;tK!QTRbFciDnC(8f3;wLLwbe-)6tsMdby>W&v-+
zrLiVqRHyxrJi~u(x^W-!+bqzpx19bU<SFHlpQtH1(~MjW$O3ACQ)%@Ivzg)OwuuYN
z(U8od_>|{d<OoNWaVHdsdXVn#$sGUG5>{e=epux<clET(h)c*JQPl|AiKIf3LZSR4
ziLdbO(Qn9c-w5N7pnq_u{Be@c);{j++hin+UYcf~PH{lUEg>#?tMlbT(HPdSCE=*+
z_hvZCvOm93m^eegEa$4mcY>lX-IR8Cu-6>ewaFXjcc%=WR{X1J@KUUTBYs`%YNYPv
ze`y9uMu)+xb?))3+&u;fk4N`FdsPa5XieVp*g7Yo>rj`Yc2tq-!g8=EShxZ@p%f=Y
ziKyClFY#ji$I%=y&)5%SGAv_XZI_C#t$d^lOas>vUp=o>Ur{Gn1hh^oH#cOR9B^dD
zHE^EBWhu%}k}Y30KYJlbIe61Pzz4C=EWm>BTGhQ#{;#L9e`$Q7XFqDYck=b7#p=;2
z$|@u5BZ9*5S#em0a3@ltjl8IwN*+ux6}jMf<r_>1y0!OGcoA>opS(#ec2)Z5KzLdf
z$(pNCdpso6ll(feN@Xv)Ll(8XQN3iDYP+Iu)^q8_8&QrC$Yaa&u!iqxde5V>h>)rG
zEALmBT;NqC>pzg4R6X(`jr*NK{lQe(5+j{|LU@Om>Zq8glt$Q8G`ibqR7!>1Sh&C^
zrFLz(W8;$T&G8EQHgQ?53%hX}xcN`wdw5}T6MVnvAv~akkM=|8OI6E@%42`;@(3Jo
zmhKT0hN>`>Ng_Ig?PbaPG;eAHi-q~_FvQMytypt~Im*Hm?LhOxH&Fs%G~Dt(#egT1
zk=M{a6xII_LyVmx+`TTG)5WNn2JV!f26U7HEHBaXuM||P@u^@@t`nD^Bq$8X-yD3l
zZDE*W5@A{^6vvMfhHv0AM<vWfku^O$)hE^J#_#kl&=)MaI36uemj&1SfQ$TIMsSt;
z2ztmI2j<%Jz3!Hq|6~hMVLct!ORqcI?;b)X<7`wljeLg}aosn<x7?KSh2!i`iLy!{
ztkpV%AJ(n8_tGfAk2x+D9@b{r7h8pa=%3EC&65<6u{fgp$lJexb<T&jL&wQunfza3
z9M?#vDYgjveCX-HhOJx@NU!iR*!_IQr6%=Qu(CQ&HRnT+yrna$f=89KwW&1hE}ERa
zni(wq+{?aatfQ{#bcF4cgw}2_Mez^VrE`IPd~^G#P#^9g?Rxe592TMH8v6g*`_8{6
zx2;>zY=Moafh|Q!lq%AD2N4L;MS2GT0jZ(4fQU4KpoCtPfb`xw(mRGu5Q22+H9#Qb
z#&gc~-S4@7!2PoM#0P$Pc-ES0&N0UrGs|V9Sp>RxeZ6E@Rw=;h%W#)k<-FtN&~U$~
zO9NAIKgp$fCFSB$786MJ?pN;C?vtTs#x*2-jQq3NC1(J=m44dZ+<W*-<7meH7hlVm
z&76OTZ$@evFVB5e8z^SL+0H#(``-nBEww#rypjF%+tZh?^lx7t#x)bbl*cm*J#=~l
z6x%LpJ{4KtXU=fYM%P%&u>9cx16)>?VB#CqjKD;TM4t7GL?St~;3WxNrigi_K#dqD
zwvVlpP@cxe_iY0tH)KN806{@grB5)Oukz$%0voMo=FW3tJ=-pCOMguzR`Q0O_18yb
z^MXSa_1Fcji@Sz&QvuNyFD<T&jwio*QI*749rQ&+Hk2@3S38rG#-2ue0tU0ceZIXl
zmVZGDOt(Cpjks!IemQpdX(Iu+KEeW)1X1lUI?+K|O=vQxAh*J14QGi)8$ExNr8AQ?
z+-{V@m>w65M7pOR9kwE00eep>H=;LkHX8H$?Z{k$_U~YKsrU`+z+Lqae7-Y4p0|XB
zs+J{#&eVKX^mW3~%jpOaP07-njS>~y$yOGAy!)ihy&B@->7j9azA<EHQ+LnP?aLs<
zex?E0-SSxzed6x^*=n@B)(aZry3I(-J%x+DSjt0$AUw0z6+nbYvs#$~o`&!$N-?Wh
znZ{V2G+wOKe{(Nb(5qwpTXp8s1enG5+|63XfA_Et@OtpYw{7E?hha<gIz=)=u)MY~
zTmV2#@~V{j#8Ngw`)wk?g#TvT5%VTn4A$^UhUwdL;p^3-v89nDsmzB-0@Z1ip!Xz!
zFoCyE;t|}PZ@W$o=kx|l^w5U0rI?R|F<>M_>nFA63(*h`91D&eN%&Ig9_#L-Y_wwe
zavE#sA6w<;P9D!EdhFI8B6X3V?8nAzR(9;^X0#0aGi;+Dwe)K>>9s^nT8$vs;o@;o
z>`A_gU)rP+%lIhbi%f$1?JG|$bW3)l=W3ZB?0GNCwA^%WYmv-7z1Wd~v54h9VMDef
z3!_q^k+~b$Y!??Sadi>PW&=~Pl+$B#h{Aid@rEzv`K?<9A)${|aMky$xS;M`;v}(d
zxbapu9Nu`!*CFse^S5CZs%cj`D=8*?J%8mHlaI<3x5`tSewtyK01;Y87f=OW8wkDH
z+SnyuYA!{I%s#*?Pj(fLkG3F~v#qib*?Ed-ico{3<rgR4KY?7uc^*%9s~hHxnn<pG
zxAS+|823)C8fv`nU+B~gnVu$bq&^Dt^4MISIXLW)L#_gQKUS+}Loid;i-7~uKb3)}
zMgs_u)h5Qy7drx5a4Y@w7xqQ}?$Q1OrK%sgS_>Scw`ivgF|0H&$-sWL$MO(TyzZNJ
zj-Sjo{tEu)eL7=)U40^i@gDHQgEw+505*ecZA9Tb4;?z9Pu_6gZZlIvw@9#NmxM3L
z)h-2jm81=^FOX<lM32q(QRg+up}<sz?$e?wGnC%j$=qQ;iE0qZ2y%~IX*v4Ils@7G
z6+WKZQ?KsF#Yn`1LB39O67%7bm5yWq+^aMC^7~Aqz}q_3XTBEQKaVOkpvMQD3CEBk
zV)h$Oxw2dwqrUPC`TJI~Te@FA%ZQKXw%M13l*U4npiGN_;I4CGTN<|*y-{_VBq%*R
zB)bs@r6An;f$g|6rwgG+GwPO<v!dfRVPPkwz4r<qos8p&!PE+?s3j>L9D9sN+#MeC
z$S)OpU!E5fsh^V(pSMl0xgSbde!y>pUC!OYOtL>$nR-ly5>u(;4!t?ZhW3Aj<_$Y*
z;Q4uRXTYmr!_8##XyIF)i;tH$MrSmtwCtnd_tAQh`qGqi%NHA-Kr0Zo$u7R+V}`z>
z3EebUE$l_931^x$H2#D?Ov^lrzDX(xTmi}YJCNH1Q2D!sjTZDy(Ea#@*n`=w-=YEB
zAYqhg;E-3cpZs(y_}2Ahfa$rC-i3ms(r6?m=KaCZ1A-$@nXbQ02*rz%K}n&6{k(PF
zHUd;g0%ab&u9Rf00LX5*)6I#bsHAAY1t(<^KXEz92>LlId<z=ikV9@UZSu+)p@Q&s
z$t5Wa==T;xl5fXBQC=}3(Yy3JR8Y2T4U{U~?WojSK>>of<*a+L2wO$(D7$qYpeWz<
zXWQZ2CzT76E4PAYJKlI5)j}&`DxX4+LqF-JxzI>Som&`UgI-U8?jzBY+5TV<>`!Ja
zph{m~p{2VKqc5$5G9xwxUGQ8eyg3*nfstU+_jqF&>79U1_jagpOiSN?%(yO0hQwBH
zo!rRyz#7nZF(_v=tDhLF>NokP@kk!)Bty-KOwX_FO>)1(SREP&x(~q#CT;_vJde2y
zt~`Yb7(QN~IN7B7!<%NMMM8C^t|4(j{85#6qNhXZlY%v0KM^yjZ?Tm*8F(XQ&I^N5
z2Hld~3!B+~F;}B5>DOBuFkmm?%Lhl!WFKt~G%_Y_uHW3**(B`yUTJTppK((o*lwL`
z*H$Ud4BKwdEj)69TaQ@2II@dB_}+Q;W<j{mpa(GEBJj+?=vX-Hq`zXzy&y8`f59&r
z@M7QgqPTxWt9a-7JF76^9*<w$S7`g1p{46%RQ2|Tr6KtM-?Mp|7_u)wdM-He3O>y)
zd7fF4Cziwq*vI@y^-YO$l(4wGKj}NzpN-CmxFm&@++o?6uvx8HEdxF+xi{W~E0QHi
z&Rk7MG2hl3ffHO;78sP>iQkv6B6#9=5B}QWl4+>$v?T5!#r;S>3(%=6V+ppPc4FwR
zjF*(;JY&LFAYW_2f%~DrR^A8_dSS!qt1pDACxbCP{b^6s>kDx{?qiDx5i0E^V@`G-
zGN&Z3Vz(hC(!9*%=hEhGDPOJUDcC7;#_*RewoPc_?@p6JO_*O-YO;Mi@`#hs!%0~@
z$2d%OJ>23|k(ll#R2YX;SLH+QZR*)2u@e8NiB-B2()chgQFeYJ%?wF&x`1EqnT+tP
z43#Ohw3Wg~c5Bd;{a!i0qryRtDKT5kAfzCjt{Q%nJ?Jl=)!M}9uVUQP3V!7&D=E<%
zxAnea^BV~#+HN^BFw2HxI7s7}fUg*cQK)as%k4`Uf3YUaG)wh{H6cy@ZgBkA%B?Oz
zG%cb_;>U{-_zbbg12-TqJ5JtX=m~G1Taw-VxyPZHs0A7*)6)9etot#3q#tRhvU&^Q
zf)2L+_zk>5uM+;RAK@4ctF$k7+qe$PBN{dC7gw2195?t3ZU;TnsUfyqR!1r!l9U46
zTWQV@>dcgBs=hN_+=*WFFgYHxi_NbtXr%XcY&uS0<GMh%5Wf=tc`nQ$xH-ncmjp=6
z(G7~A5hA&JY|r{g-W2ePhDM22^xAdmmAshI$e!7$epjeM3R)>#$<sKxpY}Qz`u*;U
zvS!PR1!Yd0b==Aia$lND6-F4PXbl9DVEurXo~Ey;)<)#L?AuP4{9XDAjR)^rKSO$5
z-lZ9DOGq21->F{asGF$@Sf(}n7M_-^oybm(yU?H2#(Jtx_0H!Hh${zS9QRQ*vW1U4
zYg>Lr3iLQKP@2;yDuE!AY_A)P<FjCWbJHto=F5MYa`Yy;C-84ou)xZ9s$!eV7n@hV
zx>6W(rv%-DWaxt?QUd}WMgn3)qw5!Z2v-wC=T=9#sq3*l*LaOGR88}83&Z|IOFLYp
zinL`a9LiSMum5Fl3RBfFA0h8a<=G>LVt4(v3u<cy9jph%c$TkTqDNsG^T)Xo;_lCC
z8*CxPc;VLqrTV+4=~dKv0?)&Qs+Cz=8d1|UwKk7`Mp=Q@UnE3s{jFn~r>vJZ$)q`O
z9I)+kN0{H5QoeXr9Vj#)GBQ%9Fb$6X(Qsb8A$xN!hk>#N#}$yag0slwI*>&vUhI4N
zPW&~ed{{|C<vXx4gs2)>S0A$*%?<IaZ37*oc)wU?l^Tep@SKV{yU!1B9L_)fxPF8N
z2ecZnKL!VGK7w-mJXyW3eAX0kc!rldPS_Z*GoU+fMp>I;JMd3O%al`uPx^&OhyDoe
z{c_JnU^z9@Sm&+wt!vca<?9W%z_%Dh6>SxqcFz0z+U2E>3nbpyxX;!-8+f3Ao|~!d
z*BLP3&wlcgdB;qE$s9^8TcjvUI`wU@5qOQsE(EPP>3w^4!RkYn7rs1kone+o?h{$1
z%&tNFQuFE)@vbNJ&*y}0Z-_dr7!=b@#-zE}bBcb6*`$3u$2%+$*f-r(5$8F(sd|`n
z=^8Xx8E<^`69rlr6n~n<8{Jc}hn+b^vSTNw>WRj^0jHA)wk-#@HTi<#SV*W$yj2vw
zjqP*j!KXjSMrQqM`^-lVgIS(mMV>GuIQ2XkIYQjDm$J(gGyv6#2}$k``k<_%_{BRz
zPQ26;L~|J8g;4$qovq4nLiWyY^K<s_{ROk8tu$LoOih`qnzOz}_#1e-g+)^LTpQ^v
zVv^~a*jZ%RPk(D>IBU4;89$#BXJ)$NaEpMi{@+NT@Oknb$hqWuQf?QLPm}f2obiAb
zvu)Rz-_Ho|LMsOB2KgVi+tuXHU<}WFx~ld_Ss`6<A{o>l@fpB5HEWCNp9jbWWER3!
z!IL>maW*4cUqMs>Pwzx^50T{p>kBWlJb1tTy841G!D|QjW7$2#Y3%(j{G>6V7MZn-
z;6&Iet%fd3+O)i8m4(vU1bBU7*o$#C9`t!qd*1w1c8_7+>QhOTV<ivt>&wsU`j+1k
z4Yjk>;O!=NecX%=vVhc_!Q~JyH32>v*w}QK*tzmpLHJjkOV|oWf04~<YQ!aglaacn
z#+ZHCQ;Va?xa2{|{foOqgCbL>D??ZI0H@FD>!Lyt!70$7_q)^OZ5)wA@6#Tv8tAbT
zy_bb`KP1R5;npiY4{mmZ-WT=2LamOGz>#{baqq)cR|Og8?6Yk1?~c8+>$*Qi=pK4*
zp+Hyc4Y&KuVY>Rgt+6u55#?f&tn@S~1z|%FTo4N5JpN2SCu`l}bS*PdTg+8mS}Jv7
zQQ_P>=8*y8Ef9a-QIA{-w^IChTZq)$t$ulllT0D&AmN5HR70xRODF}pn|#}D^g^}c
zdZKBps*p{pdx+ce?!U7`N^QxcLr!|<*<(Fae|_7`X%#}-7U~w}1%13^)@4xctyAa1
z9(<HgnWpb0aQg#Hq#!40Ql__DMw|+5I^X-}oxii2g#4gxVn$_311tx<3K&`O#h4QW
zF&H@ACj8-rvKcd0t?OuS9$^Vi<B9QE3&NJGDVxhmyeu!aCB%R<I)djtBMo^~9RX*b
zG9+56VKazVV`<@K+dNMG5Q=0z+U~aktauH_TNS-*88Xc_eSvW)EI(**pjjGAQ|yv~
z7bNiBO{bKYjm##NeOZ7WK5J6ZnAx=)^{G~XS+daWCdthit;Xp`#IwDN{tL-cxA;uT
zX-rzH9ZTjxui7+WH1Eolc+7`R(4ZX8I>u_(BCaAdW|n`>n|}MkDG^YJqPel%9TM$e
z({Nycb2g+33eaACCx_*t8dc6s_YQ)|NM#fC+JJBmd2PZ7_o~x-kEC?C8<>stYmZQl
zq&0f(lx?;Rdwx5q$h(rc(AhCJL_gs4R|ukpKES@xkrr8%&~)1qiCz8^9zrwSF2tpw
zEY4<r`aX>IBKe)H)rVjK1MP3g9wWo^+3pCk?0a8JX@uOyZxikqsjvRP29*sRJgL;3
znl_B6GMd$D^;TPO%i36K)h_bO9^Mr02rnrd9>*1<czPsC?ROQ4^}Vg3MG<%2e)n&^
zhZA`10FAMPx~1p27jWAi{pJ#bbyM1Z{Jb<JoHpB-YhTw@mXt@ML>&6O1IIG|!Zll(
zD>KO)eEHgge;i3Z$Q;*W+d{1Xa+ooXJKJ`(){&8qKi-U^>5kiIn(RVJ@LC=;UmY9y
zq0?M6{vcMc^?r_YSPn_rH@sc7XVdGC&{ym<OV4#oYHgHgv!kA3ty*QoHK|Y<xZiAP
zlkiORn|9HX6|5i9nRcpSiNE#&iWZt1Dn7c&;UvM1wGxViGN+tIlB}T};@%1l_*-s<
zJ_PfHr*)UjM9ee4{R0}@Nr$M_d)oqt-0x4j=^dLK@n(k=xZcjSEmPpJCD#oI5nr3T
zRl61C8CO2_0FuW=in%LRI;RQWCkW|~X}ff37TSF>be5>~F3Mc~9y6R!*>!4Zpl`PV
zk(Ajb*|G{Wy%p)}gA|A1N6-pK#@>63V1CBSO~=F?nxk;#q+Dbs(hKRUHILEg7~AOi
z3LCGozBR+SBv<SVfcf%C*0x%~>Ka5X5sZ(L)@g8cu@HNY7{sYr{QPS3qEW#D9XN@;
z?KIn(cC9J4!Zsi()jhfqytW#s=W?C|Z?EY;@C)#0+CY)`zv_}E_D3HoZngd1Qeiv6
z-?Uw@Zv3?@4s!|yx{UiVRiZ36vT!L~KI8R6@|YmOpKXyoY)%4W=JXk~40PNRnLy&C
z7>HXS#+<@QhRuAsNi(VwF6r9%D&V^$)^Xym&Jw+Yy;(6XrrWM6p1l?=s^-vib5nkY
zjYj1nCL)cqZjpr<6aVKNCoUAY*X8roGkF~R_G#dx+q0tThv0;#N%~WBrDh+a0g@F>
zT)z&U>=#e9$F8^RPInM)^o3kvGQ3f2!UV|%Qi@Fr+sW~~GpPCIy^J8g{C?o~9Llf7
zCrkzqL8!V;-6-p46Qw)DSSSKCDM(%`I&}Dfgr|BrC*RrhqUZutXDY2{O7Fk9Ri&vJ
z8N@hJo0C0bgj5Hvqf3~@1U2^GXv4JisD}%s^2ATqJx$J^lI+s%+{@5MG#J*Vd<)QI
z&De6XuQu+2EY|Cgn;VP^R|Je+;#TRmI<PcJRAZ%Dr(vOIx!aG>lMV#)itXner^n__
z>q65W@KK&Inp2s-4qkyyJ$k^k6(P($@}=?%Lg`&pD72;3%u^g}Q-1PPDoYiCU6lE)
zK9v{V^?51L=ym)xk%4XDY4NM3#PhVugJ!kMsL@h&%nS>Ku^7|lk~?U}$x2Gn1<7U=
z?rVnrSjMnyo3T+$b)}L?%;GY>qL$Q(6>DpJIdwh3Q16fHQRg>(fAp^m?Ge*)fpolv
zd>WiLd0mc0aNj5D*K~=E1BRQB({7iBZ53k{Z^gQk`;v9yS+8Ayio22*A35UP4F{I+
zpUa5B-?>PhLj)OZR~{z!9Z0b}hW2d^_oVd$K8s(i*fPCL%XjvE)<K<3k-S!vjxMkr
z@S485Ur3WOLCAcSGT5%Vav2bm>+u~MdaIr!ZzW;^<xejOBX&v-^%O7Fi5=#`z9^x@
z%w!Hy*PS&@TWqZu=PYdMnGcD$9Ld%vj<5xm#<uqKJho1+A_fx*8kt<5m{JqpRMBae
zO(fZw;DGnZW#)0Fd|<I3+AwnrhxV<{BHx;<n62>Ys&}`0H!r&+&kh%52g4Ua4s;zq
zkAf$yzO(nfmOcvLxmU{bn<s29RC_~x5rGV-V+AbnBq_|*4w(=4tZQY)o<^NbCT*%p
z&o<BQ@*7Aluf=y8o~swqPJL-L+TViFCyDK47@u+ao{4X<ZH?)J*7fy=QnAjc$Isq8
z>3K<agCk^@nQ7DZJjHIx%jW)*xpMoPo#ctsrkErm&xIH?hL=bTrGk&DFk85N<~9Y=
z_EM|2etF}<ROgaknV)QqJ7K}=Isc8HoJJx`8U0G-TN=o?jpKahGL4R9L=%`!$yRYM
zN+Zf*|IJ1y(ht~5;>z$Bc~vQnyeN;OWy{KZbE8rd;FALIe+C$(L+~mpDWc!6^*7S%
zul*!ieEUR3($Bh!PLV$3rP@e3C_|s(=AeLKm3M8M*=+BP!x1DkGvk0h>z2DoYT|Of
ze`mFfYJ*nm#eIL3c~kE7AAuspyt0EZ_iTTKJJGe#nT$AswdnG>Pyxi`J@@f!z65~`
zfGCP5{i**y$iYIFt-We%6w#!9?MLd$e52ykNt`X(E-PZ)ip|4^hwV&LY8#v1PqJ~;
z?58+B!Z<$Q?jNgryoOCW2+;LtqY{pnlD!{SM@T=J=JKg7l>jzozNmg$qrTDCQjeB)
zdO`^3a&pm3gnB+O;XYX`LtcIcS>^*)_}!tU(WZGxa_glxiz1>d&QjeLa^`#nA6y>x
zIQpcJpk6+NQ9Z}B%h5Ua9YT=@T6F%;1Q_l^ZpiYzPE9B9VR;Ip1gM&6JZr|hwWScy
z6c$dx)<p8@zt{Sr_h(#$dr~-ceUMP9Qpoy%L}|c7sJ~&Cu$$34Na-FUG(9F2$26z5
z$tQlaInb(P!vJR29n(vgyfLoT(<9T9YflSn6jj@*il<v`Dyztwn4ffG+_gnu4>Pv|
zth%5ZhBqL1%Z?>ZWX{wy%@lj@y;YOcV}oS#-}*>#G$0~(p6xzWK4@y4EPUsg#6i!x
zf@aKFaliXMF=7vFt0cvnd4FE;b!8L##{eV$>s_y8C=S~EyrROO7z5r5GYZrDII-6R
zKz#xU6t0;SB2`m=^XVJcHf2FI06#s9C91MhC>hzaPLrc+(ypGbtic3fl=a?r0~Xu#
zIKf60V|$g-MXVvLl*%)riM-BN7dyrGsM2hpM`@>ObscYv17BG$7rTgMq!p<0pB>c#
zpi2P`f6}uzAZ1c@N-1G;=1PEuP0S_BUBGghev)DvjN}C+eP@1BR-EMvyWWj7<fAPV
zI=^xAf>4;dgOZ>UVjAL%Ci(y-aq!V=0)(#GJz6IwpD7yqkWm%b8{r}#Jln6_Z0@l>
zFd|L7WnP?9@@Ek=R&`u}-qR&{%9cZPz}wtPv!JPHwRq2V84C#zD5ivrLT$S6@MSuC
z(E9;t*`kt?f3YT<0}T_{2gzFhv)WyNtK`;$A?0m*3TYn_cxRl(P$;gLGRQBWi%d<8
z#)J{rL~A4ZiXB4lByc^P{h|SRfncC}PZ7%;R;BJ?*R>vRIh9aWI$CDABW!wIn=*NF
zWyodh<@G|72$OV|sN}mTxj-rWk55F^QNh_I(hq38X{^7<nwQ>NN#kIB^z`|;R}D@*
z&D&08`%z5V5dOX07CF)uQHWgFQ~X|TA!h-7=mq)HT-x;sRMRY?ieo^g;6+#+KTp16
ztTAif(Uu%rz4RA7iycSMy^3ZIBMK+_oxcteZ~9qu9juUFAHiu&)wF4O3h!)TXS9{3
zp^Zub0tcZo44yXWdp1u?xTrUqMj<wzos8|yUAn#sBX{p9`P*cKXO-!V6&9(Dw0n)J
zEHN6&ayHh-)jlr9i1Vlqx_oc3G-`{dTmOY_9_}#cB_4Sso>p1zJpLIVzlLVc6sPfT
zSn$~u<4Nto-tWY&)Muxwj?>>6{B73=PKddd^#|#cxya`$1W}hm9A;$09SCWRRqXo2
zR^$G{;zF4$C1_T)Uz0SPn|Z0IES8aBhhCZ&8(1B5T(}X1e6;NTjJmZ^stsH-4W&p<
zZ7*iCwd1S$WYh67qgc?V6f``xkUlW@4j6pAV<dDlZ14TXU>4KOWK~3?sm!wnuD)&9
z5flbIdu83_2nAuhA%1hgR@cWs&iN}@aIkSo`}thH{gh;X$qsWr>723Btu8=J53S=K
z9rv-+Y8q9#Ra>kKjr!}}oc18aYFcyM5LL_b`C@Z{nf~ERvKtWXmrGw_2rYzq#)%Yx
zI4-@=86i%jPlEJ>5UtOPF@I8eK)L0{wdrnRxPY3yv}u+UZw&IdkfX#59UhY;s(Zbr
zpw-7PZm-Pnjllf*=&W5@J~<hP>nFMlPorgebM5_6YyTm4?dK#)zjG0S9@P`LptF2#
z8DlpbaRCIWe5q;eAvAuakJiVkd%N6z6GIj@58K^0!+?<T&>24twe*jp2u&EO>(96n
zq)*^l-qh|`{dAWWATxuY2y`vnj?sAEdXIt7b@)9$4$F`6{83bNRHW`d%p6=mG9=*I
zU3_-Le4Rw?8lPo_5YzMNp88KswUkRAzP@c|lry^dQA?EMu$i;byISvEi`V%Q?^U26
z2kVodKJ#r{);sP>Kd{0#x47jabU*%!9f6LPmXO5rzZDlP5(Q5ydP?$>QY(}jFiVKv
zUzapYWaw3@cv0nTOD8*%f$-)srn_2)%__Eg?d6v<Wcr_F##C(qfxWLYHsHt{%)ws<
zl1o0@v`%+*$uToIy^g*GZI^R3ZEeMt9xBtlnW)C?an_JEK`*J7<uZ<6>nH>7*NBJL
zW5kvCU^XfFhKc2GWC@9g%ubJ8vF$Fe$uvE2D+8uIqT*%p0fy|(po!A{ymSYRLsK=L
zM$C#nMaeiCQgEE+P(~ng4+bk8(9me<NsL#GOwonm9EM;FC4GPJqRT(2L>-^^fAf$K
zPOBp4x3Xd&PIBq^Ek>GZCL=so3PP88Lv`E9C~5uHpNXSI<5<JV0fyAuZ7hCj#~pTv
zjZ&l@5F)hXFB^?A(Ertip{i%4C=4t~n_0QpeV%*>XC(?nAi@<+79YtH_b6&C2Dfve
z^(=k0Cah(ZUmi5&{PHqO!}U;|D}Ttzw5rs-(y6(yUapRint;e3Z`vhGi%6=Np@!b*
zW(53TJKWwtMUVo;7O72v<W*xzhgxA0$*u*OId^j!&pW-YU8B}gR*=)V43B8+R|V1j
zwS`j|8?Kd~3Iu&kxztx8R;q(-)>=_4&_O7hAH6;tOe=QH4n&<d*rnya-m+a5M<xKB
z6erZLm=Ve`F1UD^0GqLngv<3;8D8cf>AJWP2mwxEk7J76=})5>4aYmHaQa9#Bp-o`
zIf-`%z*NI@O;q;|XT0b0^m9t+wUev93m4AafbZw#r|+3)uP7NSsQ0kkm<IV;cpB&k
zk4%~tk{Y@U)|NqXop6({1f7veBQ`zbp)(mTk{zx}kgdSLlww93yrg9R>6?u+lC^Z^
z<y%*TyYozXw(%EoW~F!$k;uIzmyqIbnSv@EfWg*Y*3=gx<fxgGU%Iy`Oj`FgJUYm!
z^q1JSF^U9E(^F=IgFCm+XAfL}(=*^s0oDwE&UF$L%l%jX>io8`Cii>(Evfwbqz3;a
z-7g*x<>rZ)N?91L#Mku+I?X9(=Ju<8FX$K1WXeh!jhcEr@J%3whRO+5U~@$YkDcij
zs&)QTYnt7NDzn<j=`e`h6JKnDTdwq6C7jwQOsyqfsrhbF`*#W-U8A0BI@j|7`-bdx
zuF72xaB~rF6lQxrbz(!27%h0hwJ{bs+RJT9EpPift7S2*bWrX_u9u*n)gh}**^>y%
zS|cyM`gnme_I!NH1~E`>je_OjhaLc#{aG8+{a3lM@p3D@cZx@16Tt!QZ=l_v?<0{S
zu%6k{{wH_y0Sz2cK?VVA*w-Gu$LsmkSt3%*g><4uhsz;STpZqV)k0V;1*6$mS(xle
z)yNaEf{cXg6N>088T%h9SNgXC7Pl@EJoN6w3_wC9cXC_Dk}7lVgfjPDbs}L}^8~qW
z1c<87F`JFpF|8^qvO?QrIj4EyB$n>K6CE^HNRQ0}x4nNRpME;65WpE`6<!+=Hrr;c
zB#sx;TURuP?Q0sAP8X_o)W$+=;;^bsnuC8re^?&zvqFdl$F&o^n%Q4E(%S-j3-H1~
zI9lpWZG}@@q#JupUqG4M0?4+(E9w${#<MFzGXViBCtEboEWZ#J#ny`+|D=m<0-MH2
z=a&4A3=4M;on67`A~NA!RTF(XIR6NirW3nX?|_|{Y!JE`+w{A;c?0;u;%YE(AcY;^
z7}Vb80LqxP_mD7)oOVQv;`CvCr>N~tbQE}{V0X!8v=P|Z(MxCto{RG8@Y>{ts-tQ}
z&PO#`RE;O**;EOXjaZwVJKaqu>?=mnA?{$)u}TmapfJdCBgDWIa6}qlPR+nr0wQ>C
z;~ze2vNrhg@Ml2hkD&;nv?tG67#zo84v2NIljFFxglvZ`Tz2f|G!YQI7#HBgW=kT4
zb60TvOV+h;nn?dFP^wXWVrn-r+E!sg?!3{w)>5IRE%MAueLRMve{kX5*^@+ixMeS`
zECAL(D)VdZ<kma*jC5Q>b;7lYe0Q64G&lUSALHF2Zm+Cj*YAIZSYE0c4^U+C`IzX`
zl;XHoBsMmG9eICZnrY%`iywxj9SQDhA;=yiJEAg(;QM!i$BIg)Ub?A&9!9)`SMK<k
zq*Mo@phf_)($!uCGGWF~KioWzKPQgxsJ9j|p&)jH0&tB_0lte_{fInpx?q!%%X_ig
zx@i-lHlh)BvR2D1wFbYpv#EUh^)`o-jg-NQ^jO2%7I3n`*^_sBt$Vh){Yti>TQOLa
znPjfxxLaI5?SQv**pDg~m}!P;weH^>NE?~$n~-x{#HfB(%~W*Q?9^<9dFKc>9IUpM
zhb*DPz^j!#+`0t!SCRSsv;ii69c<%J1e(5oH@Pboob`If*LrMYT!Z`~HBLi~DU16{
zjrNNVqoiF=dS-K+9N_qtf_TiHc<CY4Mbq(Q_qif`j?WG$T=i@*5dbFlsT-QbPu0EX
z#xcERzfdw6GFEg@q`9%=lD!9ReyCY~g0hQ;vPjl3TGzkS=?Q~=ry*I}p<}qaLp9IS
zL9<PAMh{}316n&;sil7x^t1GMUJdPHxe0B0l-;ryF+jewW7y_!JeMEybyXKRo^7jG
zZo=CU$N-g)V8dz%MP|T<?nGiD#6;AR9sjzAz~?D}V(U}dZIf{iCI9cJ%Tf;!v2)J4
z;+d~^*TI4a_Q^YS)UNuAo3b;8$dkdTFBk5R_bogf?6rm-atfcWs_k$Q%g+|Ky@7Xb
z=IF-Ib?hKAm<HT&AHhKfouX2W80w_Kks^XOg9io$L6@*||C$f7Ya&TpHG2CK0$iu`
zda6@ew3V1EgttWVAO{gQm|>JXY169ABQ7xQidbtKidxfHPWY5&lPoPRI$@@JkeUp8
zaULHC?RvM@&n^9NZ&7XmZj_iJGzZsSv?3VwUO&$ax9X9Hb#poL_vHGuVH-uCYnrz<
z(fHe$XHECLTo~OT!<<C;6Zb%!KKDqwWdy@TPWwuG`3iT_2tJvyYjCpPEYpKv80o%v
zrq8&4Hl9J6YpIH!|5Fw?4cw)LiM}0q<~X|k;q`XpXf2+G0hl3RaJW6dGRT~5ioj)8
zAuhnYF&Ki<PCk3EA2CX)1U`Gf_+<MqZX*~7sl0)h16TV`fu$g)8>&b&xP0ybic>;_
zsT7$9q+S$zfv-cjGcSb(+~mEfzf_f^M4#tCP`<|Wsn_+j>D1xvkL4_Cjt@nEL_5N3
zpL(})$?Kx*^r^!@mnAjVY-N=VZ%VwSo4jTXA#J1Z;jlO5I+&e=aL}u?7eryW6JrHm
z0Pg_ygMI)K4$Hr)M_Qc{rhZ5W-Jx6A{FSNXE}J+e3dzp)%5MB=X)c>eI@|;SHJV;l
z$#OD91_;)Em-aL2dN-$yP|w!=iftyOS^tTO6pwA%)uJ*e=ONC~hz%NT33WFCj^x_M
zhOhos%iG_pEgITiDgrCw8Ux|g%*<xD90iub!j%A*TQ<DUP?o^a6oMuH@cMe?yN5v$
z!p)Ddo;s(#(6KA?YMTk`>I4Bbu9^x;_)kqI0a)MB(smPnVeP#vHI$CiSZ&QPeoCi<
z<u0XsSAXG7ei?j^TT7hHuqD*<XOWm$-}z*s+^>l!mh$&^X{cF@75Rp8(oR)NzOU4O
zO&3=rgQ?TL+D}RM^_-|_t50tziH4?%>(D(2rULxy4Vx6FhnRkg{=eR+cm*J$hQ~x_
z(j7Pvs*KYPtiR71V3S3-6eY=$^4jbqN<?*}mjJ^~W7dAK9tth<IxgqzJJGTs7^$7^
za)%u}WjQD@<xDsj&Mg!ZkyId7l#PnWYY{0lPHNq3o&EJ6uZ#*2X8P{2S$uHxBF>^?
za8cP8x#nW3_PRIs8eFiMd%rT@AqXPwZcIxU$Icgda-Eyx_+JucRB4Lh?_T>q4d7bD
zHCgr%g+Uq0YJ0@W>fCecsR56lMQbQ^(jgF0Yi1rn)EEaoLZ|+aXzYB)<6~@rj;Iqo
zi>xnGG`)x#Oqm;)$xD&1A^P3a{LQ&+p+Q3b!OMgqLrBg)Ir;omwQjYuPO++aciz7&
zXDz^eazgI}^K4dii$@=>%<;}eb5}^+pcJ=Nz~961W98m??zl8e&t91l-o!Y2iYM6X
zKg({qH^lnCmYwi6M^6_Eg;o&SLTkMoM@L2tJkUv#CJw@1ee%D4v%k?+B2wv0@ax;s
z$g>^f34(1IG<B4g*UZq~bqoFY$EnzP%kWFn3(i2>1{1>5R5PczgGK)HG<Eb4^Z)ZS
z`NbijRV7N1^@aXX&Ce}GdZnu7!qd*b1cQytH*%G`DH=<V3+pSVE_}w1FCErol)~TD
zWGD~T+HDb4*#flkt#sL(hzlj%EtL7>Y4MbPkIeA<+!K{g+`%%x90<;Hvi~C}{pacJ
z0e5|jQ@`SM0*waO91{YP6sZ;H2%j*fKUwxKzk%PwcKvH9H1!-tW2)p53`!P9)?(r>
z4?~ry+04TV)E1g0eRpYonQ>A#4YsQjDm0^pzhPPUNdNuF%$(S@g#VhI2%e>^H%;Ad
ziO9N=eav)o!kUid0E9p*UKS4hf_TTer#rQPHP|(Lw25&ADLr8d&+}X8a+UR(VH!D6
zYux#SUEPK=(kWY*na!~j*2<7Ufioryri?UVL`z${XW9fX_WbpR;Q%()Z|lDRI~+59
z`QNd`=YO5<3#khX6B`kFKqk=js4U{lYsd>Z1>~=+1LN->>=C6B!b<&$kO99FhT~p2
z-wq`=R3<IX3_O?|DJ^ygGOclV7^v4H!~HJY=5I*uZz<@R1wts5I;k%5>?S!ScalOI
z&BmZ%t*@o{tn*|p@8$n8VEr?7^53W7zjy3^j8MOq_`l-npHKK_&Hjm!|E|XWoR+`A
whkt(kO-TMzasH_n|I~~BAN7KhGZ=qIbc6-nOxiJcjqszasG(5$)Z*>`0aBj~*8l(j

literal 0
HcmV?d00001

diff --git a/docs/img/structured-streaming.pptx b/docs/img/structured-streaming.pptx
index 6aad2ed33e9248341150ab77cfee7d678891efa2..f5bdfc078cad9a3696d6c6d8a5b25eb0859628c3 100644
GIT binary patch
delta 43830
zcmY(KQ+Q^<vW8>hk8Rtw?TKyM{NrR|+qP}nb~2NRZA@@x?{jX>MOW4LtX_T5t1hbF
zRUOtNR`(@_tt<x)fer!%0s{g9LIT1?sOz)>3IYOw+=xvE22^p}Vg`)wQ(f_i2joT;
z?qXNN_*-?CJBaE01?~9*wuisngbcSIkPbl`rD~O}^NS+M_3Qh_^QZe-ER$wfRN6rg
zJFjs_$2{pm<zz`7#pTzvuj2@5yr`up4kD~>h-G?mJkgk^ExaIU>oaUN*%thqKW~KS
z>xR|>`eZB}1aN6-wgcl#hy^@g?FQM~hCO(UFpbcTwJ%(c9$k`)&PP8vAxA><sgG5C
zD700!42))Gn%Z&Yvz2)pZK_CK4>F4bDMcv3uClo|$|u{#L{-E0Lr>@X%$qjFdrw*&
zQu-k}ASf9}T<zCwIkgpdJ7%Z*fs87}Ph?;+lzj3DD==)PBs!S<u1>o!kN!|px|iXA
z2fP0C2>DlH-1N1{?VcIm9+Sz|Lg+6q#6j&7h?872zw4X>xf6(CMHt!H6`Tt(x2UOC
z>_OEbF+i}?p(%tk7hq-n`Kq9aCrjl+<rZ%pZm;*8-~ubRPq&d1ekW(p2c?W^ZO_ph
zZkb}6MIr1g`zm9|ZOKF4*vFQyjCr?>V0j(Q3^$i}x&5DeR6?n$R#dz{KQ67D9lXG4
zD^{(%)@B8ZxWDy(&Pc4$#X>wnPZL>7$kt@L{F6))huMq*!33F3y959<w<AJ<CJV!U
z{Tr0&fF%H2@+QLEe*`FJ@&dv-++<KQF5SOWu!U-thGPT;t`6FSFylkr{x!M*xa6yB
zcR8>>vqA|VjI9X>mjOBGDM==~-k#yF01On5#Dou-eqWKW4sHFl!qCiVbTZDUqu4?8
zKPJr;?#92~9AXpQ<<sP;VbMK}?l}8<H+zDN=9rog*D>EPt!W%`mg7S=ih|NMrL7ab
zWJ=|ybBMBlSymc;jkwad<2WqCNICJmi*+R%ms60Ee-(#0cB`tqK$WUq9Yng|>xngP
zQT<5RIJl$Yhriil{qQK$QKmBi(Et+;G!q<AGd2<pl95mONcvDMrJoB>m(?m?n2!m9
za3q9f;`rPjIVA7Yq0+x;Fy`tB{NfL9(J4j5S!seP{o%Yka{~u05QYlO?w8+&YQSAR
zB$a+yNjilrWil$n6Biqbe?ccRL!-k*`jy@dfPsKuAtjF^5dyb~(Z_G7t_bKI%2)2%
z(MBt?Jn|^rRaaV&h3%KLMvXuflNEel2=H?fT9V&E`rvMbPMlsLnJqnft1Fu+L^X2E
zC%SRTA{v6zF?=T%mQ4j^6Ui_vD9UmY5w3oUuRqiqTFkbfh@+M@fN4SxG4qV#Y*DYu
z>uX}-shWC^;sJ+*bzN0VA@%Qmr-Aj0yk>QMdEs|+)M2xJ{bu{}>#c)x8INuYKSLIQ
zDAXr(QR8TeU-P5P>Wz9xA>T_<|4aXKiboCglo+}_YX4q`Z_-@70uvVpx0vTPq2(j2
zl=QcyH`D59q*^M@eJ?ZYGBFZpRmM4rGI14Oe&_d{kRy<&NWAgr+VlFPi$L&ISczWX
zLpWc{=g}a3JdF@o;G34GHQkKW{C<VbXLRn1W`u+i?H~S|UA9{hE6G`+q>+zy%kFpB
zugMy6+a1*>X%Cyy=ad;st$2p?p%%ilp+YT3t076HYBA9^#9h^T&|=(Z-%m&0jJ^Tu
zf(g)UsSjY!`|dm}yjlwz$!i|Nh;2LF<DO|5{1Vi0Fk+vbc43P{DG5nZ`Z;&K_$fHF
z?Ty={wfZqeslJq>ri)xy^C)xnR;GZ9@tVR#g+Dk59C!A<4QEjm+o)PF6MAs0>I8gg
z0%j6)?1%s>i`NHQ9<!|55_mBg_AoU!%ee?bj1Q1GT*b@e59CiQ<OCn-te7Bl?F3b!
zD-CAh^GR&!JJ_U~a_1KwY~m*~gp!fql+ZyZM<NnyRwz}U;1XCwtQV9dN>gG5dTHkr
zg#U_V0jK?bv22=_*<>soMY<}4&$qn&Kkz~@l84+AQic7Ik~Dq@Qpw%?6J(Cghp~=)
zU*}On(T`$pQo5-S!H*KLSiN+%=T!i``_?X6FZ*+{_Fs<T>^<kQ<nvq-v%eUSCLjCt
zeXhMZxW50J&hXN>s;l+@5D<BK&}0-+3?K)im%ZJkzOHL=2Wo(j`iYQozza64jIa%x
zZX?=!X4U2OkEGfMd64a#(E~E4q|MsTn`%7leUoG_3)Joh0Am`2jsNbsMtfF%KtNX6
zn%GP<`TQK;X~=HlZ~B>Ha=d)cfPOrW-Z=FX5ll$yywQ`~dgsNrx5NJLr{&8UX&{;0
z%3M_QK@S_v^rm*bW@xe(E;`Y@cFxazNuGSD(vCle1|uO-i|6cD#@iOX)(K_=YWX*6
zLPW7i%zZP%Z|rljyD;&~1?Y|j@=4k%t(@ECtvK=Ui!oKbkFrD(^F!P0N#iCS<A+*#
z=E(W4mmbey)%igG*P&W+)W>rzZ6Gs-*_oEWLo`i!J)5?2&Xa}?wr2U@=5nOck&4^A
z5AO}kJ5c@G_uk^;hfZN19;ZV@@;+xO5|w85)Js*wBQA1U?y_UY9-U;h!!G%ujJ3W~
zo{&xqJ#S}eM06R~GveO~iW&8rn1(AgQO)<sNV$sH$8FR7vbq4P-LM0@F<|t)T8<Kf
zx69tId&E_Ek3aQ;!|p0m$<Ah70P%0MV)m>uuo7eu8x~m=i3SUjo*yu5>FP?H_RpB}
zY3|Yd3Pr|AW#^(`41^^+sI3yN#?%cGD|n?3XH4EDH}EN&tIebSJdx*pn)^2#TT7a=
z<0Y9-)KcnsmYqGe=OvlApg;vrXZrrK#yTgo_>l}jnF_&&B6qf;4th>hg>{ZQLJkE;
zj<jMG{IuFUT}_qwxn_wg>!c$;HTXrMih#Dd0-6F}K|Z?tckf4ajUFtiAQgkM5Po%j
z6tiFCSgcg^-W=Ex-v1J`I`f_C9O8`I7Bjl}RBj*y?@kR@8WhKJ7r5{E#0)Q!q<yrm
z%?~FURI%+hH@wk>*fjJ$<a(j=`^SL~>MbcF9zJ1DfvQ$Z9eJf*e=k%ssg?v)7~$xy
z(q?TU0=Rb<Ext4bo&(h&eH-8z(`4|+1~Cq<f;}42#&bn&SO_=Oh&~1q<2^p9`7+dE
z8zoj$5_A$Egnt^M4d_zmT;a93V(0P@oK4F(Oql8jfHL95`MHQuzMOdqOpN6qOn=Rf
zzp=HFwQy)~ZO-xgW3DXkCi0YgMm~R{U7|$g4AB1x&ogq8D=*(o@FsfJVOr{3ru1Ww
zReDKcFo_B*`RsV8nzd*h`}7KJ(1Z{`{v%emJ`VyvpC#SP2Z(x$9~Ifm3)_)*Xeuy+
z^gsZ8lLbDqEx^`=IM_jnLD-3JC}GMG*BtFlQ=rUIB-fe%sG>JaL|*G1vMu_Of@R_e
zh_>dDJ|wOtf9C^`=%E#VMBZj991#exRCVi?93JlxrOx<Crs|v!Gprd5ip+S>n8R?N
z_PPR^f<h2(fH9C!lrr3Zg0*RVZJjMst9zoVuY-L-soSmn8P?&_bo%Coh2ZF>!zjw^
zxMrR3s;t9Pqy|-G|7u#U<wT3TUL+mBUm84jf676-%k1O137mkrM_iuUFpq?CV}CLr
z%81XEc|UftCXYU4S@SB0=3%{6d;pHsOb?$e-b`mKf!;+kdS-aK+GhxnUK_5=#a+t+
zPX?<h+cc}wc*U-*memg|4=kfw3F0t3dHu8sfU%imwl;LC5%);?+I;<1^T-|UUW6&(
z<m#s>VH#-l!R{VSon-FFpnl}0Q%YbBJ)F8MY|8xNct2E0Jir`Ff4sTA%k0$0#3{VR
zchJrQ_-iw7ndluz=U}U$#$|SROq1`0dL~Go$c$M+D#2pXO1af>vxVN7rP`HP<uF<M
zbAgax8xymnt8k);l52Z|AluyJuAH^_%5;_KHgCxpnqaR!$F8UhL884Jv>!9UjbG{q
z8Wz`VG+z+!F<BMk`%@Tb1r&*JqX7DFfgY#{5D#|;S|W$i<>=80#zf)`TkD~qh6XvS
zi`f_7>e&gTh}rkUiyBTF_WoXXQm)p!+I0BR$^fr?y~%lVZX;!{sI2uYKcSUIULf6Q
zz2nQk(XLByXoM}gKRZ@odz_Q9+}aLxs-k~z!wiW+>+rf4J9~SVEad7sk)Wxz^oK1C
zaNffP{g0FA2X}?5s_b|o=z9S#>;TS8578e{P&VY59xjm44;Dz`K0~znxWg67JXiof
zN_-KM9se<lwHv|6L~&khrGk7$c}U|Ufx`>6NknJ(!GQ`y{O@)>G!GHUkfvM86iD0M
zY8a!BM7ZH+fMDpl>1)Z*Z5Od1GRb}=kWvQbx9>P6b?D$VQi#Odc^zo%GbH%r*os5q
zb4_SPG=62TtYjk&g{ldCBWhXRO}B*_I>|;|(TTy7Ts0n)hy8@MX~(0hMvkYScte7*
z`9m-l({(o1YFla(FF}f5Bc8O&r4=wvf#}fy+n}oW_`RXN{UEZL$=uk3Z6(?h_%#3W
zU>H_X5m`j^FhhSm*?GbJ`bo)nY;8?L*QoW|cNSwmcVJv(&FIJ2PSG0s<gvNk6_JCx
z_yVmrE8%#L*7av}=+zcxtz_UMNf^UDtVd~7lidzk1Yo~Dl;H`Jk49u8fK1!T@6st<
z{%|*Hbu1u1N>Tl0j?GR{BQ{A4*vNxQKd9kOJNPm3EK&}}2}H~W&RINQ{#un<dH8I}
zIXu0vpFA|`YUidJr<>D#FWtfDPgyHb9AOIMC-Q|+X}*4X&viiW4pq%H4(#l+udl32
zVcqxN=Gz5yKSsppyPv7=-=NM9>&GMh-JKQzo-WW?9_S@vy_NFsyJKboRWM%&;)&af
zejEyV(A?Rs5`L)f4;Z#~3B9gx5xUoMc9#u0-^Uz^Uu>**v*xm6JNK7JwF+}<?Il~|
zXX;U&+ld_~TSn0~|I+b%z9#4OwZu1pVmc@jI2cn(U}s8WAMhLSt6d>7>J@9;o0VzU
zoBcVBv!M%ejFn+%6ws>;e5eMZ=3a$Bd3DUd5?dotSO>lZzHKoR*;PY4Tu>aZPflgt
zbm=b|I>U3RHW2meW{}KF_FZMn64me5P2i_<fAu*SlY81Opxwj&WxWb$<taVGvOr2Q
z1Mg?1mme5{pqCFc#MV<gz=Dzi4=^_Ba~$_eKqthc9AH8SFlO!t4t#(;T=Zre7N7@J
zqqFJ;Z3B$Fy0+f9)U`O?Y?tIJ%o7X3zK|Y+4U4_puy*#pDmlZeKg%u9+A*Nlwe!CV
zcg*kQma(c-nVDP{iGHd)G*k1U0F;{U(!DX$Qj;lX9W2jVkG^gi*ad4!Yc^&{|Jq|>
zSz?hrWj@$L&d$F8U4w$UsT5NFzFp-<KNOx$|E}8dtJ@K?@C<OGtRU6)+!q%rxDT27
zMs@xslmUGg8VM7%!Y$E>=ukEi+(?72frcU+2@^!af?K24YIYpYDjf^ZRn$kcKoCz9
zA%_2F&~ccFGFS$DjK+k~aWYtp#sYD`_pr49PzrE8EC;|?&|`2ms2S|rG2yhhTy5Sz
z?cixmZl`*%<%y-kQj9K*hbxZXQ{)7U4|ci%?(XyC1Q6S+RMCoLGSHGKr^w6<RA&W!
zj;y7^Nk}j_VfrQj3b<?a+uJ~2406N{&7e188R#0he=5oCk_4D$Jv;uwQ3@>=w_NJN
z*U?U~4{ZCh(QZ^ofU}C^I6?p&G%&|^Z$~UOHV&<T4MzWQ?GlBLlLH%wBab>E4<?du
zU;)O3W=09B3t?hRf{*;qU}9$cb3so710h;k4Hko0VGRBNUg$->0vVB0{Lnejhaenq
z3xv?Qe=37TXd_1Ce|I2*sE^ps%n$e~>NvxTA)F3&41W9<i7znlX#nrv{xF*k;5~58
zV4K;pK`}`!|65}(Ljco8*gkaCF|q)B9ZecHf&IV|9311rj`)WXnUd8jxt(rqt}C(9
zI%~*pTDDXBN$|<$HOP~<VvdPsdZg(bT`j7)AFbz{5UaAzg<A=o%*oQny+FG^(DwWa
zl<Vw5^t}?sI!QDvwJKRNUuv6|l*^RvFs^2Gs+p3@g`CZ~sq)#Otbb(0BT@5;B|^*0
zQ!UlvPRhCSb?(IxD$dMz1wIE31~Yb4Ni%VAJzW|A9#;ywE?k9hwleJbUrAE$g4OA`
zE1&z~`z!uUEfMWDwgidOFu*AZ{jdTzI}mau+7D_{+pehM9Q#<k7^zi&y}v7)PuYJe
zK}Y3vb_lPwvdRKUD|XaJd-d+RqP$$o=&fb)X+&M7c4a|^yjnUdxYIm2a*fyhNf<jW
zvQvgf1+R+>EbyI1fe4Zs&^UsapY32y7QTH~wt;`^W-E?CK)6AAfD{AfUxx%BTuToL
zAXF31m5B$T-h%p&+#UkyB#2Jyh3IE9gCI^IUb=xdX$O>_k0hYiEcuh2VscLB#p;1*
z*(4(kD8KR&Ufn6vLP&So&*+xILB#vv#XoletFJu7;`%m!S6F1nByr^JF4Z(9jmD`8
z%s>XXz<QWZ4I(%zfu%RGUVaI&%NwMK&fSm`*~&ek`etO3=pL{|f-DPmezt=TIIaYU
z2InZ6i~xRk1IE1@oDR<L0DFRm#OYLM^AX$qo6(+`2$|JLM)!<XMM{-V|7MHc+FYT(
zCrK(L@6MBBdui)7d8uI>`4S>HQft!k)Oa695yF3Dlpj!pfv>1d6x!r&S!peN#y*uh
z+Dwy}tpsl4^6iL(C8K(^Wj6#>QMy1xIV+UUkIAH|>7*i3-D<3s?MnK{u8BcO^Fjoa
zH<Ai-_iW?!7yV+#?YfiIy2$iO`a$iq={vUi4l#e4FCk>Fn%@^tol!crwbOPd<~2_4
z_yL=?ED<`1zz^DW@`)n$$+0xAm#c>}YvrSU*)I5_-)QqEY|cH{@!tHH0u#-cO}1g=
zRHQyuy)fMkNmiz-Pv#n^l8kqyj`BLMgr$A`$C6G0<6O4N{|eYn{M)|RlI4kl&e)JR
zdJr>k<SQN7<T&Q$6i!%+#xPF$9ER@7$~)C{L))1m;8w_0*8cfCd)tc<*J~non_ks|
zVdMxWiG_m6V0@c}#BbCn<jY&F3b1Lp;p}2c4~FsGcm;HSSDLu%_CIa-vWam=Z+ngC
z)$p#>bY2oT(AGSgwkY+psI_{~_7QZ?2z4JJzoSqJ6;xkv^ho~T8sZ9H%!<hI25pJR
zg+tj51JgdrZa^E4P|j6kd=s=Ck+im-CVf!#Dw1(&>$P2_Vo0TIlSfQPmNlG)&6Na>
zdH?t_cH(7YVD9$U!ey;tI`Y8EUa(Ma)&0)JpB!_>bZy;WgrQXTT1qoh(Es`~;VHyq
zC{~Ylyc+oMr!btrru8StTC8D{7a5j<hNUi!I`F0IERD3JXG`_Qh1vN@=hmd{$$w@f
zKhvIuR9XilLWglpXM73l-T(!_D{Lx99{g^ND&E+~pkB|=wSG+e(|?Gr&o@DQ%j?-E
zVFYT{ys^&#Qk#3yTMc$oO>=W>)IyqzM2q0stHwY!P?~rkN86RL<Fh_s-_q;gsoP&X
z9H{Sd*1a1|QoKvz4ukX98y)2O)Lt-vkl36sZe#r7Y9^qtT*HmVHP4sal#~196D2Hi
zXPgzv&vdWyMlw?_0D3_l<J<hfa@@>6IYro(xSI_nv&NErBe0Dj{8HjxlPTV#=l-Lf
zE_HvKUDYr3-Y|I65jo0zWO*tBd8IZ}4<Vi_e7-2=vbW+j9whmW1Oxu>gQ|nlKR}_W
z{9izkJcxn`Obn&FPNCQq&|j75vMWE9Pfi(um9P<>@}Nrn7Tl0l$)?ymA4$tY#>4^{
zx$li=2YvlDUM_RYPhKP+{m~O5*g-%wSxl<<u;_iV-hJV);5<b;bI440|EK4Hfrd{g
z;NwInAR{kT`*LE(+Z!{_b4;B>;Noe@Do1|kUW`Qn=<?>E6)B@NsmpRcO6r$Vs@O#6
zp<mAhyWC^?5UqcYHEfJakuJfWP3~<Dha!Wi(R6&ootx%vy{s>r9aWMtdb$Iz{!Xon
zxogq*&QR`2Dzl>gbt@z9SZ){=@cA&T`KzSyvOX?30&SClpxknQ!|hm56ZS~mu`*P}
zA16-$h&U?#Id#mVbV9KCz5n=~=aa#&u4tAgSv$V?n(kg%e7D;5*!xb_yB48-D37SA
zd(_ya&2?s_$Acw`9v>%l61%f7<h^LS9nN!Kd2+!^cIhIysmdpCu6FEG_MFi@!$oAD
z1;|s7(Ds*&`dQ34$0xd~tl3b!7mjQ=C959;G}#Vi{;J0>{Y9Qk><_~>Zi=u<u%;M_
zg=#$ia?Z5P>ighW-D93udgq>H{w@j1fMYh4*lF&pLf$Ch9^16}r<{~?o?>d}efz!>
zSNxOTD(^+dJhS`RvT4WrM{!l%gQ!5OcavUx;F;%}uKzS@@-Ir9DHeZKW&g4w1s2v$
zpe$#K1Q*X85|2HUK-#b|s=BX#`(#nS{-1}PWOk;JF4W)q$`AI*gO7pMZ;v%T=U8`K
zsdolnvN2J#c<7mBKbhml!v656P@E4^SL>!O?<hP8Wm3&Zdh=-~jT?%g>`iEDS74hZ
zc>;SmhUs}^u7B*8O)|%j7db>{Mxvf@0%zn-s;P6B;L1_g50z77R*P<LAv23yZse=d
zv$yb=FOaJL$`nhfy{HA)@(c^>GqOSJJcMsMb1UG7hVbY%;H)`?Yp7&rfO~)+;~o>&
z(>$eThQx!3VmRsyZ)v=WYX4MrA1Rn{{aGuDjc=pV#<RV`UtX3q<uvdEu}E2w3Mj_Y
zb??HOdd2jRICu#58NKKat{^eP;NnoOmfwRg-6&IGxG5QBk~V(qk`sED+M^=ow>Npz
zvaNzKB}qUf=i*w3reT~li>5)EnlZCRWpIqryC&?I2Xp)ocqyvi@o+8^=rsReZA2Lq
z$Sgj2PksO$YRWb<au|AG?;2RM3uG(aHi@Cr`=wHGx9VRr?(D--O^$vpDqkhQ9jZu|
z_8Q_^f0uRs?MzD>vhSjb*hD^K8~25z70H~d;xGO>V}AZK6Z>fyamU&JB0kDekW98#
zHNn`nzJPfC3FiTzyx?0-{09tDdkaSfJ$Ct2N7|(FsrD_|yb8{ystf*x0$A|T_S~95
zr$_xJhw`bx>rKMJQrnFateZRwD0xpMFD(W7AI*f~hYB&$^bLHO_iy);SIGSxEWk^E
zU+I0WtAkq4Pz_$pmrWFZv{N0)x8=~@tFI_aW=ECKMyN(sVQJ)baMa<aG|?&1nU$zp
zO^t;UNy+#Fsc>c9dTv-;23&D(=RI*5PAfN!Hw}*^{0SfhHMF3FF=bLlYx_;PLiMAR
zbH8r7$$zJ-2cdf5acwU=NH(@o($l>@jPi|a{8Mn|F#C6-2DKaQtdih3(Kq&jpy*8Z
zY0^ON`I4<rf~o6l7gpYdK7@nt`}&8Hg58zfdl2nTz)`Za>2f%G8n7!ksTf<*fGn~t
z?=PcR-x-aXbZ$H9oK}1i9ftI$u;(<~_Kwt*);#L;ek<k`VYOF~r&*lt`>)TYNlu$8
zX-;3%C??b-a$?r(o=)ri9hntxd=~#@M+bUgL>SPhzhW$K)fB|n%g`^znP!!&IfM!k
z+|DduOZ?Q<RYc3(%)tM!2x1VG41uozYFPu=k9*%cIWq5y{j{yczIMTSiCq^VeKnrw
zttm<&5_2?`aPDK(L;i%eGuKdSquBn7R)us^tmWkuPMQa7!(w}%P!+pC=&j=ymQfO;
zMPjBNSZ~lkn6B0B<J1CO(0V^`WqLw3kl%k-tFty_iZwS2fIVn;zhx*ulLwG-kB(EL
zFyQrO^lZfFV8^D6+c#q)ex7V2L30sy-1U`{tU(hZ4h>}kFmu#32?OtL(6KoqW=VQ*
zxym8J*CFR1VVqT!k^tN<aU9Ap0=tAhQ4=mDeIkEz0)H6$gpGZbgl5D?vM$q!Itxf*
zjB627%z(}L1G@n~RWDi5%3e%#bdZ+z+h8oh4w~k@z4v}anoJH?J;z$@@3cFfmy)Xc
z^HhxZLB<tr{JKWZ7RY#_Dq89fKQHdnekQH&(<s#!S)eDhGT5IFNXsd@Mt{h(w73(V
z)gKqR;gaE*rj@e&c?7nLSM@s0_sZSHfv@q?DT7Vs0jQnI-964tfYL&n7$^CY8cX+^
z)S|nIz6vA7Pue1yd$bQ#1vAxGocgH8qK!VI-T=blZ?4ziUE;7luS^@fhV8aLEvtGn
z>0A<zzJ@H4#4(I(e(lCBcPS%uCfvjahuIfT{@in<wO~=&4doHOl&&LPVrX*iCXaxD
zM_;+&6wvTq4oBvk{X^A7_KxSt-}c$(^PDx@e=0+J<>qb2U6WjMjQ_g-YW3g0tv~B|
zo~!1X{(Ge&f!#r#yEg_%@9UxZiJ+ZDOfGVeJ5&#|(#P{M!ifYSVJ91wDO^bc9o2my
z-8SAb*Og4qXS{#ue?aw5vIU@#nE*H2I;Eolp1^I(3S$`PZ5y+s5s$wB|5l6UM9U<b
z+F#Mum%B3T&vWm?^acZRz5}Ca1J(vJ)`72qug|E!z(*trA1*49e`C(l$Qrc5upl7l
z{mEjr{!50v4H=cVM$peU2PzUd(vj$Qh<a*4K(F{GueRVrdY#4}IfGwobth6c7|$^7
zKycBY`qVG)qfvx+G{3ri!V-0=2Nfq6W>zI%o|aPDbT`PP-SIlY1{py;U>oEDb+P%?
z4ltn<!5xf=`);KC8j$%mC<Yi19E_Fwg+IZbE_$mCiP3|~(Vttv>PZG}-AW&BXdAMw
z9lH!f7~;(hGm_I!9&xFLyKldqJ$vc(fc1i{avizkA4xBxj$68Aa~^#QnM2y@6VKE!
zTkEW6TB7{3Qihh^KUNj9==*ceZK_xIRm_!GwYZbpbIsaIf8U>*R3KySr!4gK<;x)K
z2xKaD=I?$9_V=aPXYQ}c&A0ka)XgH#o7Gb<?Yd_Z0p3LZ0Aag<@%o_mLZM-zKr>vs
z79`_BVZR1iWNBn{ei(RvN_zAb?FEbdIL_&sueJ_njtUll1XK|HKciwM>{l=bau$RO
zu3}a&7KjUOgWzR4K#Y)t{?Flok3icXRInVx1}X5iIV=^%A!%)t3}2=?;byy4JoHx5
z<!_M%8T86U+(6b<BkAutrvu{x(HsNv-e516f>4Ufnz203jB=7WEMtcm2>@(x(<8+4
z=pWSwzd^rgop5jK!R`hb35+s8zG*L)K+mx%3<okrK7;jnjna`eO2IwOK(K?|kU)a~
z02orCe>1w>g!QOH%YtHHoeKe2a3j#pg#`y9der|OZP8mbH}qTq2`X5!a>*{~Rsy*`
zwR+BGcibPz;8|Hmp!?L-HWtx{W=NPL2JpdLVK%glpu9g3z*u^jLjoZY>8*@Jz~OPN
zgdDxDdEdu^pTW0~L%jmOz+GYf?-lq_V<=E~+!^BkKcCt2iq~_H7jYrLND5{F2sA<p
z;(<5@WQY+%AzA<$3I-9t*kH6427*0oFxmg$!s8n3ghA6=Ibnj>Ng;V)a>Ii}BBTEx
z7TAb^ngavjfLg&;HZ@XGV15cAhlFSR7ZD38n+nGU32-Rg*F2rd&_W)EJ_L2?@#qt*
zmsjs=kp5uyU<{=u%oRyIk@*3{7#ci2b<3@$5qs95aCE~t=e}dtkcw!Z({l7z$W?MM
zq+cm&p{P_F%{5L_{MM)Adv0)%tdLsqzBwm%8Pc1lkc&uFJ1!I^5iNs>EW}+-e^NQJ
z;TNJ99Z|t@5w@qxKF&lAN7m3(5w~&0-kBB1>n?=D`<<f@Kx^`_W9tTF|NOw(<owKO
zb8j!JQ5R<<tw`|Z2yRM*Oz5h(E9TpLto~3(OEAZnY#=O(<!CRqOs+N)(+;65zY4T;
z-9qpQ4Hw(&_dRlGI(3w5ZJHF2c^FE1GJ#!P{@A7rIRjmG2^+!v)`cD1R)jfTDB<jx
z{fq`x+zIr8!3#7EW<dm|fn0ZjdUk?xzv#!~z<~sU54=Edi|B$nbe@oaDx%zF9v!T}
zbL&EJKZEa!K^+JhBfR}%7O%X4>6p8=1+z9*`xEnW45H`yp>vHrpc5C$J86P|q7<vs
z_lfc}?&FCgeBb+d;dGMqhgZ&Hdb6iI30ni|8Lg6|XeE!32oc~-xbaf`m~l?VxE))-
zI++#L<>wL1l+M0^#Dbf;2$ni0bxbbbOtCY`7VcYI)L+WhRiw%~kxyd*6TZgNE7r*1
zcVq3A6Sg6oX4XhKY(tAhn8v_LWLuI@SCdhvgT;a37Yxx?k5If<0>CdpR~|BrDFeDt
zWh_?<o=zngO*<eF2olJ?6ilIQDB#_PWJDs;t8c7sgra;3zlmCyjfog;R|i2w_75lC
z1SU-h$0MEENwIPnm;ETK!C|yE1!PJWAo-m6+3|`W==i+%*7n*>R9K1U6Kc?=ZIG&%
zB_*n=V?fc!ma_B7GJE*DQSb0m5JGnEd$RgTyI7Sqb``i9o(tags=z6B{`s!3HnBX<
zd?=aZz;c*&`oU^ZvnpJw6`*EF+!r5%44T2hY-%(nH6DYvrptDSxTrf2)6Q(#Z!zYX
zr1Tt3CDnKWsZk?qiy!%y#eBq1+-Ww2$7L}pd0|-aq)+`uPY4Eg{pRAzZ5zk!tzVlO
zl>o+ORuSmydNIo$(R%NR&;CL*8%OT=P9C-S<I5mMk`!kW6aCGC&>h$RAw$Dd>Yk`_
zXwB}gp(4%dUv?s%Niots9{FDSTmh|RRp?w8K9g87J4U>ww37?-CX%!!63uWGb2B*X
zLpqBe^T;smMI?6}m}vX$R$e*2tyjFAMBgcWvsu9EV3ZmTPkco*=-S3z#&9?mJQ}lt
z`in(zl(*;H+COqdkvfmkKc_pI`u){J6d6lto$P`LR|%nHEpBOsp*?t+y6#%8(z~O^
zYhv}bEl~Q1HYtehZ5!@)uZ*7b5iF-A&wWn3+RV3an@~Dyl9;Q<_%;(??og@KJ_0)K
z^9F$!LezEqQRFExA|3*DfO~s>@c&Sh>Fja7YZwNMyjED<sV-MxJIEAB`6q$f8ZIdM
z&r?Nm-U4By@JmtOf2BD(=-25>X9ToJ|F+a)mmE{)es9OK)=L(OEcEAriE!OliU6y`
zBR%TRHEY4~xqn!3n(ghjhpn#aOuN4PuO<b(ko=M~x;aaYp!jGB6=;&Aj3ezT84JTw
z&$%&HXRl1gF+n%_f+vd<L?<R@m&;RqU7gLWJ*I7q<?nK*_vsL*!VjI)$e;jr_481P
zd;VCP-kDb*=zn+voSaEnknum>fN_@$ASD29yBu<&`tMhLLZ|IvplO1sIjgqWS(e)*
z&1}s<FQA({izVtLH3?)-^)umu&_tz~S<@SmAC6MefQ=j#Ec!24eY`#llRsVmE|R~$
z?1%`s^)<+mcw?S)i1!8R89qwP(^WI$h5u`W81Hp--u(Mk4*WVCKPzV?@S%0r2!I2|
z`yN(h1@Q9H7CZC9P7~c*=Tz=Xdb7aNx$Eu_B1YdD9P!G;H{3iF%Vd}dWcO{5`wQYy
zsQ9HxHOu6vQznxt4$G$Y;8QI(&DnR!I4qMgs&n!_i6#c9azxiHk$f<fuS?($t9!i2
zvGisIFg(up@A@<Jm{n|5pP2s==a2)=^ELHvyv+DRF;!I;D<ZW&nZKSH|ByfEugZII
z+EKguR`?bOj<^#$k~p?r@xhM;8a{il3mQIuseC3DuSNV~&527sEmFp)sOAPP)mHv-
zSFaz=W`8EP)%0f3??`w_=YMa0Y|>LV<jSy)qnMO;9R#XhF`1j!_h={LzkC6Ci(et2
zEz3Wb*@E96e;~XE=M;QF3JDSE8Og#*0>y4f-mR06h1NHIyu{x!%M_6^J*Yg1r6|!i
z#Ff4I>&ZWeo5H_g7I+%_>=XyZa~~(Im1>SweP0Bhe}y$|$6agGEKiEN%vG%9N`WbQ
z8*Q2I{YohqC@;hMNvwMCzU&UvX#1&~ZfT5>HMZr$tg%ooFj3tlb0xHT$7Qo)c>J<p
z|4!25uI?{)I6s{KCHf(6;1sZDc~ah_6gJ>L$fWq!GGrNkXl^nvXSpBL4}S6yeB$3Z
zE}ax7i<f*@B+uvElsiEcqD@3+9z_h4bzXteeXHtr?A-EaNekratf~Mo=XluB$-Ve1
zI|w^Kc3L)l^dGDsdeY6<qkY3t5BscRG%O5|7E1_qUi&_jl^OabcnPf!UyUWy`}xHu
zA2o&0_V4_?#65E$36%SpK2zPNaHxV^S?HQNA4UCKH&pZ3huQf;(Mh0Ll{{{&GNx|3
z5wV|0P1~;MR#IKDeO?Zvu5oM5D(^zyytUN|P2c7i%3JX@;PL0<$glyZNmQ*RH@p^w
zlRh`%`kx;+`McHzM)`$4&RV;g4qAhp8g+0WB!d596oL7cpXSAR^K(La(1li_BHPCe
z9|^jJG%H!8dHPNh-R*0~Tq)XYDaFBXo1y*+nxQUXed#1pHsu2f?#G#=sM2?|7Gs+S
zH7=xWtI!*<63@lCc%(EJ=fpW3FpnUG{#Pa^B8C48SY5scGWW}o@)RX`1^1j4_NO_1
zBCVU*eIXBVKqf)<tz0fBc2F|Hif(Z0B?P|V4_tRpj&tzTO`gHT*}4$Bd88a><;1u#
z>miAq3!ehbO8!^6`TP-dQaD;!;FII*eXXHPV=VO8lTrEeLI`$Ue4MUf7gWhcm+lxa
zIFg<LH`_=o1jv{`8XpRUp~eh18(gXv2*?s(tfGi+dZ(D9BGkiwfuk;BER>Y=DxV}$
ze<`dL<Nk=ZX<H;mjQ904K6f5d-fg`~za0X~Wk9EC+uVm5wCsnsnCDRrN0YDa#FV-J
z7SX}>pp7ZwuC5q7x4X9$WS?)m;c7|pLF)E<bY0{ZvN<xp-pZF@Bi5h6o^bK9SuV-E
z23@{B7m6fGK42xRH1Mx@E5d-ZZ&Ilrwk1n-Peyf2SfX6;=fNFSX<hZqCV$`d=2QX-
z$Tan6r48TCTQF}xltIeRmd8uDs+5NCuadFP6geBTnBRqy`i6~c878tjQpM(7$yhDg
zC9;vXFdP0QCT~dqgAQJGd3(bfhZw?EV<0X)SgEI3*Aax0ew9U8`<)0C3TFH{$9)Pa
zAQ5?&EQs>uoo@gk>OKp3k?)kB4`T>SScZSo$9L0z!O&~Y((|KeHq_%0do50w-EJ9F
z&>FJ_<BLQt6Y06CDCOwj=1xsT5&6(D&)pB4nxZg?ow9Mz$a->^5X}DTM(j_M&^LuE
zc=^{rIe+HkS!9Juw3PK_<N7(;sM3wp<WjE0PGO~$Y5N_GlD{FXU^zdBc@Gpgd2Zis
zbQ5|f2Y<Lqj`d_a=*xjr-dHZF7qJ^`WA$$0n-?w3SP~zDZvr|Vdm(=)7fNBgG4>6^
zoX6%{ZB@rzbqta2->x7vdqzAARZA5~7<=`vJApPEO{FHQ-)0~EDm(jGL817^(@_EK
z@#V^7&6Jdk{*Q-D4lwfG=A9X!UIKb*K*?mjwVTZbiThJ4?JNXq`Ak;6qecHw<{aME
zxZhnOebX?qUeEKkp({spGt?WRsTx`T5!b8UpXyG0$N?hTKky|`!D4-4U)P%PeeiFw
zP5NZ}fv!{&nrK+&8<*-H6P^>mMt}M0L?Qt*P0SDQ4!=;~(qO-N{1YEg0dBP^wTneA
zr~%V@J@6RQg6<&<WyxockbI9?q3pXVmVi%>8V;s?81g|Ff)i}ZuY(vA90Cgd9$dz|
z7_$zW!CXCbA?06h)ylvm{6p{TSlqLDnqbI)Pa$iC6dcjH3#)ppvPC;oye;O3gTw6l
zR<wVPeOm=|DKGp#j#&x>R>fBMr6JB9aU(^XoI^Re?M$QZ`6?ejww;tZTQELGt7nuv
zU`^8^E!*@w?$(6{&efu{NJer-4|7Dyk6(#Ls+wb45%~fVb7b>9b8o>(QV{LJs3{_K
zjd)4kO_E$4>QM(C`Zg<Ku^BiG_q1wG4VF=zdF1FEIiIR_s^ft9Ho?Qy-%xlv&p5l;
zwP*DxCki|3Iyu?;3VbLh*eoj}H@`qdMiS_2Weh4oVYc*{tkf=bO;;u#))P*)Ca;;p
z-%8V5<<O7Xt+jo)8Ei4j?h`%pvBa2EX|2^A=+&^ce-%%xSeQC6sCiB_7?;cx4!kSV
z>SsdfW8jd8R;B=3<;_j5Cr{1Jr<>rR$(8GqWqR(I4QpOR5Fe;yFg6J;8B`a`v{<J!
zohYt?7$KAU^k>qz3yhu_QKN!x)UbEt=D;=t1cD*qH^v^Noqd-R$PRXPOhgT$|5ika
z7qaVr-SgfbRx(1TSrtxC9x}Ec9<d~@apT)5mAId>Q6d0UiUxxSmn~zovc#?ThEi@1
zHk%(Y>-Jgvq2NK_H-f@WcNZ;Hi7k|a5(j3&^X-={Q&x}Disuc~I5r%w?JE2W$_T;^
zj&Et<6W*v-v{HSEl2F~*D~d(Bmt8!q(R<7chRntzjb~<bEabF{1B9gytduRAgiK?A
zb7jr7A4<TN{_FVh-b3H~pG87Vcn#Sm=HRO{juj)4O*kD$`7i!S1gE^x32Q%^atHwT
zZ+`|<nefIs>o;K)(5OieTW(1sN^0#)k$xY@xGpS0)Qh9JrJGM8_1a~?fbDaQtMO(H
zEJDOlI<W^8!{RG6Xh)U}m&4|*+-8aG*)$a#v9$tM;=5FCJ+35Yxer&M5C((EM7yX;
z5<xaIRu6ch1t)kH{@xY^z)2TqFxYt4s;#GI!zEq15wIw){d^|Y?u?7%;l9dr7eC%!
zrL`pPX$>0a4lQ-B-TC>CHc&z`7sLkLQX`>Y!#uq=u>o>|ZV|wLJ$dt$7xNK(7Q}P7
z^6~=hu=G|Ibp<=*+#e;Iw_4-s-03F>j%8<V4=Vz+Xs$i3l%Qhp;?~Yi9cxVlTn_in
z*09A<LLpg#@G0QB3WRfDK+P9Al+&`^A3b=}wsD(R{qFwOoHOormM-PCToUKYFLy)<
zP{49|rhO0C79T(~6RQr!=nK(7!#Cd-sc`@kFqm2#rcxd`=*&_8`Px^73e`b}{g|`V
z18E%2YR}yTt44AoY1C*J%5An&u`W19(F`-00#XfO4B`yta;aI$`A(+|PmB!`?PHjR
z5e$!_s4>A%;Gpp>p+&(V5r;*KA{nVh^0I&UQhMQw!Ji-LaS3+Sxbm<JM%N<@11W%2
zkFH<@OO4|GKS}80krJl7Z^;?+*m*h6)|qqT#C*}A_8+NZZdwdKO55Nee(PDZpBDW1
zi*aTXuUKNme)e~EQ_eo~%DrI2q=Q7ed%hgjuLLRAeD&AMDbUjSZuSahpLfXDG-d<l
zrk3Ih8Sslhjr(z$b&rp`l;C$iXnQ>nd1BE0QpZy`4*vFQP7&8*aCg_4`Q|-C6MDhZ
z16>|XdW;sXwtzIEo_Zn>7IK1Daq=NPxG-g{5Sj%u#%Y<kHdhH^$Wj}M-e_s5fk;%X
z;-XJf&1u}ee@n(EK5KxW<RmU&@A1@<%V7T$TL4-GdeJkWBO~Or4B(G(Mfd>}+TQB5
zyE+l{yV^SmhLyD6$tG9`MD-}$uy@olfir&6uz5+6;rVV^zRsog*y{b~&i}Od{#!?-
ze3vhhH}cx!Zh3Hd@PUBIpVzC8nTs{-WUEA}h!HJ#0B1s(<b2&mW5nRf-@$-QMs%X&
zSP;r|HSA7?C$`uo5yD!q0eAqt%ik-LY<<Ooux@ZileaTcg*|#gvbHObbEKX(D%X=d
zOdZu6g^jiA%pEgp3KOg8X^5i{E5=Jo$G;RS<d%H($zA#Tz4iFwK%xfD>KO^$86)<W
zOUYC~C9S@t&bP}--IgrSksN!yPg6M9Xv%1P5=?GLJ=4~)^8KA>54dM<=@}igT6YbD
z#O~c#ES`A)8l-@HO|9aYQo$$t_!N&SkkaQta+jCzJ059Zs}P+lv0{dkb0~E$yc;wo
z7u{2JN9OJ(?Kfk_g3lRLH-#ZR3__wj>jnUfF;WnDq?njF6ao}9^CWnnXH`QsB%p6S
zsSYpE_b}?ZwS+FI2DoIY`QvbdP!B<rX{wo3i7S4+=<^S<+HLKTbs*sUmYo=pK&ja^
zFI*Z$nmbF(QKwi0BoPPm1XRTT)?!_!u6K_`rI`?3q2xCYrp|a4H~l37e2j~1#b(E?
zV&g(b(`@4c>l;3%M>u3%ix|;!X%7YF8qGM9_kutY!apbo0i0q%gYo$EnCl!5o#ItZ
z;yd|51S7w0_Q-G-`{6fX1a%?lDRvnIu`E^T>odU=b9*(;fykWWT3`x*4BB<#AzDjv
z4IU8K3N1s`nh&q%sguLjK3<*n`T>2T!!wwn{&x)me0d@YAjv!aHe$rq&O<-Hb8L!s
znuZ(uBP@Hx1DI2<uuHzUKiBMiE|YXL+JwKf)~*1yEM4ZDCM`ta2G!X*yS<p)=B%gH
zYU9}LW{V*93Vq>Ge_MJ?sx<6l34v4|EE?{EOmjzZam`90h=oqdiw8DduTYU2=)J4w
zW++_1fi`Kk$7VqosC{Jepl$AYgIW^=#J&U_aZ~v92$VBI{t}QY66$@30D$#5{IXuM
zPC&+omSTc$^&BAHVI@WmfJYF4gU#5m3YgyqT`r4bN&8VvP3JiP5LppF_ZZ-|<ZA~X
zy9#ukYjl8)2|2~%ov^_;?}}{QYLQpt5lF>H4qf^I=sjI#!+BI<<i0Ru7Ck%ok%KVd
zU0W$G0jzl^oA;=yGZL>blHTO9eM~*2C9wHqU<g1g@87r9s;pOcW?6_YlNG8+ZGGdD
zO_fhs^{%dG)qSzHdK|UI!`2mXC_|Va#~UjqUk<fJNmtjZFqr$@t%Z0jPjkA?i{Q%9
zIrC7(SUCrWu93{8w|OJdT}}FJ`0v6V$oDrmNb)NQ65Q25Fx3(q2*_9TKO}((Wc|M@
z8jjoCm;r`H-ysJtD@D>#KbX%7oU=B;*p?lV-$0|}x53=NCCVompZl2{UC1|`ccdp*
zXdVE|to0}Nxx>TJ0dG%?^F6v_7cKO2ln1#&cUp-l4=q}G3SV7){Do-sn9eNm%}qv@
z$-Wv&0si0fzz@~jwauMmU}^L_yI}UeW?aScWnS$@{<{5C47xv#(Mczb35IVjyeD^D
z+O~NL;QPqs-Z_|Px_LyZeR}juq53J4yq00isZMF5qR%}BrRuvY8U(7kquLDBi=}+}
zK1@<+dgiWA+M4S*EUB~(z{6Gbr?89s@1D=0UN^=wj=L!CbhAt^pxWQ)S<5~w+G2SH
zUk>_gGe4j9gU<Jq;fL!v%A~KC_wOG9W}ZWGGs`7U+}i$oseIc0JE_1ec2;{1yJX)n
zOm@?5ol%SjkT^*^ayS-*hBLoYHGIh4yI>d+d~pl(4QS!gZZRopQTBl@BwTUgtuj#%
z8nB5aFjIW6mU!Yxp#CDjtSretcz{qubh&8G1pbm(y1J@M0?{$CbhSSPqV;SE7wxw4
zW?9gfeASObm4oz@3^fBG8)dW{qkhz}W=0^DJ9{N~{LF>I!d#_6JY|O+d;iM-Ubl&+
zFc0g<Jj=Xnw&u2vM&mU~O@@BehNM-2@<CR$m@%!ZtZQZo*hyQ`e|t1bf4FC#BXhP4
zRQTq7`;m<0+{=pdq~uZ1qle|1tk>xEr4oShl1lDF^2@S_H-&-9U<@vK0Y@Uo0vX44
z@V9)8$8m0ziuD+_fxNC>J;O~qPW*ceG;++k9+u}xX_C(0j^S_pnVSkQV$e;JF}yfv
z5*WNRGNBP<Ai00&k>33qU&fCAQn|8w6%<#$W~alSa(wOQ%8GeP6k%CaiNElUz)ox6
z&uBAs*$gZN7s`$ys^!;+R%$HkW%cElYdwa#5&toHO}bAd_X7=?^cosn*)_`G>9nwb
z67vSxxy_-nzQ`@OLrEkqq5Ycg6xP*+@fNFp-L@v?K#)v!l_wy%rI^H%_f(jq`Yx9>
zFC2qQG`C8VN}cymPjyE{FH_HBfTj*rXIqr_tT_DAp(VQP`3)oc>fyrq-B(umG{s>#
zVRG8n9=O3QYN|+vT{uYA8DcZXHxY<Sf51UsDU8$#G6XmgIQ&n+@W4lO@hL22xRNKb
z5)}3_;LzE)?ZtSbH6*t*<TGr*29_ULb}@`#oF5TQyA-@241i@W8qJUD6&z9soBp+g
z>D7&YkP09nSfF^Ddl1j?dMi8#w4s#t{Y`|B3F|=#4nT>DIh9IOO2NH^g8)GlmCr}@
z<$A#re2IRqxt{;b`?Uy_5gKH{N)Gd$7SG@Z1Q+M+!rU){5k1sE7-%2IaYyn0a02yY
zoth91#nAcWCQWGWJgQc_0t5?a8)8p$+{T3;hZJUGJTS4u_iv2__!T&KzVbN0Zj6}X
z*>fZbL>ZzUP$!@%7q^p;3Z8{DKdK9tn8Z--VM_w|#z!JAqP@wefcn}Ip&p{_2D#Bd
z?Xlm#u@5;6wR`8p$F;5Lr1mi`;yx@FpT6G3jmlu>*WKgOMS0qSSo9cHBhc0GQET5p
z!_Xo8SE)b%s5;4rU|`VD*4^Ri3}%h7*1g&~N{rPSjo#wLZH@L#H>~IL5GX*Zz}I>c
zSVLC>*OaLikvL#IPCpTUFZ5?=ozVzzKw~=&EYJwz?}IP#6{l)6{Z?FfV8I|#Lv!(b
z;#V<e(Yp0q4y-?KMj`%MT~m9ow^$Jlz!(>-`V=NnS%R|*Z5KdRyTivx7eh1)L%pUl
zT>R4lCQp`T*28iSYtAd_=o5pF8+F<p1bz@Y&5^;93w~2sw5O49oS=UyjrJ1=4&5LO
zOq`OOhL&omF<GX*Z^!5+7$lvD$c9;dMX8ydo;KoiMWM$U^nq{Q`dSKxoV*jb$@RG)
z)tY-Cy!HmtR<%YOT&+KRbtNHUBQ{KBND3>h5VcQ%-Z!c{$Wk}iYR}Q~RbSTJFE>-R
zPVKL5rr8s++QJ1PKMghIafAU$TBkJ})*gqN%cywv3{92QwimhUD5Y&gf&jsvQrv{b
z|32)FV0?NGC0*)8Q&)e^qkM>+hf`H7;lNG)u@#<K`{FQ;nPmd9TYgAw!J3gh1wgKW
z!*c1lW8Pz%Mp$dEWR?f|M2OGb{oE&zYjvN)Aw({umnIl?V0PwZQXLr4?qqT={b31N
zu3ui&8gIlro*Gf6M;A#G^Ikrb#nyyEI`KBobR$&V>6E_uNk%f?JYjQh`f(DtQF%l7
z{YIn6_q~Z@b|OSK<F^^RgsqA;CmJfgAR44=;euBxWnlf@gXvItMyx3>!PuRy6@xj2
zAZ(TEXdMskZ{eLzhY19eP2_8OhFedy+tp;{PpL#Irc2O}FT*x%UH89`cd%l*{Ks~_
z;JZxHo8!Gae(ABCMD-GQT`T2}1zQGJ))yBe<pI&Xu9;|&A}qlQ-702Xev{qGD_0;f
z^LTmOULL%4|I)t73_$U6*bl83*wlQmfSw0Z@xPd^4gcEQF$P+IWy0l!)E*Yk`Tlo0
zPtZ(lwbvMeS-2oVg>!X7d`0L%0C6%gHb=TE7?B;POQ-a)tvKtVO8)A#!QdMKxWIXW
zeWoT3+RXIy+plrg_@qU#M#LvBp3UjJ@lMTDQEZfM9+nZ6)0acAP}QmrKyOSOv9hKH
zkC`upS%?%c*6zc}>;mbrB2mF`>#&wiDJ75FHwwbh@niR|+sojaitLLK_l+Y^jcxYs
zfFI`Z3d-lDss&wl-3+a&t%jEy_?etL9((klHG;b0)8+s03{@?DY#C%@T^JyXb~V$)
zHtpZM6Hh#dVyR8?2)cz&>uj-^X>OU8tL^bGI0Ej;ap!1XXCKwVt4jJ+qLW}l8Na4z
zw_#=QG+)l#1Dfqdz8$Gbn`7o!34iJO<d|Rf+c)-$SkIUb0>Dr?|A(-53=ZV^+J<A>
z+GwMVZQHh;jcrWq-DG3iw#|)g+qR$m{qOhtTQwif>6+6u-Bmqx^}%&4=r`(ebg*2;
z0;}6;pVd7sia6ece9q*=z6WxWkXc>EI}=|Dt_M8-CBEdH|ENu$ID@Pr-<HI=YoedS
zucJv82T;ildzdKDV9%587VW%Vcqk?^1-g)Xwwsw6du)&+efO;9I#zzs2biii)rjuQ
z9PD_^d+q&Mq4+9ED_v6S#AJ(aP2!#W2X(xF1m`5rs5ozpz-si`Lz}8NT6F<|%77Tu
zV~ZO`_Trvy>7B(Z)SLNV`Tf7^f|YQ}yd4^i0kPgB6A~bl818(65LJ!A$xCl3Pkq$1
z&d&^93Jwxi9J-xBDJnh~vo9mIOyedab-2YjwD69UO)r16E4I>}x|!{~YJJfJ{--n@
zmQzpqEEeHo(k8}?ZStUqWhpHAmYT?-)fxt)DD%Jys2;Mz0^gc(i64$QUr}&UVLO>7
z0O&dw@sNi|O8d)sT4(*kYu<i@A-&OA*%{z2o`LkFPPaSE1C+@)q^vx}MCR9fUu2Nf
z{_^M%h@^B;3eH#bQ<1N#{Jj4M;8f&tTKtlWVHwVxNH)pM_!EsYDQvV8w{WUQ8&KwY
zpg&n-R4jYh$d!UgPdOM)GEckM%(B~>Pc>K6*q5vlZv=Kv?<yAwR#FUoiip(Ure$x`
z38j9i8l}o(x*e<8V7#<3Vh_sfYs;7y3}(>Hl46cF)MyXrmdW?prIPuP{JCB2V50ic
zPRctet&*NupRZJI;uCXQIrxgd(Zht3Je+LE`$t}(ruskN%k)3stLtr_FOKrnt=tWM
z`^8_6Y#4D`ZbR9n-quvSe+g~_vetkzEv#Spy210-_q;+TpFbq@r?3AlCG}(Ce4O#R
zKjvv{yj4y#5$mNUjK78N|1yDJYMw668m7GD)$Vu?wf<1{Z;B{C_`N@WxC<m$sA2I;
z^Hfg)&o#{@XSX(tC3#|X`wL-tvQE#o>fz+oJIdqGwcGBDr_A_<OApX8S%-Xc2E0W8
zQ+lD2lfEd&7+LiLG3kg0VuD6gL&f>e0~2u?G38#yvL`$#qJa8i>ZpTS>h4IgKNc?+
zluG)E2Enf{OKS{$QYNt)QzI{E1GCOhhuCv~yunc&Nd_Xjc*>fJpPK->j3>ln#+woH
z{#VW?^8Unrmm8@sht<Y!tnLe|Z*=r-jYP{*P?Ztn&S`zNRV4?f%$W>U7G$E1N3obu
z-p@LS9q}x(e~y&onDU}M^;>$$`{-ePMcQM0rp(R|uIrUmABqb!xo=IE(c=Wg3=}?r
zQF{T6M&8unP;kO8Bx_b3bC4OGEH3^a-0_AB@s<rqw59OLXfDl(%H;*wzFNP(Xk6M!
z1{5qP+4~}>`8~<9%sV|Pj8Yh&n<d}W*^(#BEtA;zZoYe^AxS&bYoRSDq&1)99n#;b
zU74j}8!y4!5S66rxKE=^XCzxZe6)lEM5e>moM6PFnR1b`y)j2Kc_0tJt&9q?)E*Mg
zORJ?HkYBHDvYOM$J>Ns%{a^*mbXW7pu7LJBmxM?#o?7kS{jraDlHgB_gyYp>P6#=u
zcOvjV89+Sj%joi04WXtIdbUD^?e=g>bVyd1*LE%^b|<%nGTmt}6_1%93z6J`Z<mkT
z^vgq;n~%SH_5AyD`t`!v!COq$THt>VWgd~v?^N}F6J_YoEtpXS8L&}pYOBZ0XvK;)
zcws{+=to2^sC3`676It<1gI$)hz(UpvW8B6+u;r0!bqwb1#&yviE^KRxXRzaKp*XW
zK4;|cf2)_u{)2j|@DUX?f}tV}Fuv*xrH5zHMLk>K%k@8~L$u3&S5h6U_(Z{!L5D*g
z-K9r0`&0H4r<I~W-8xcOR(I<uyy+EOESc}re+bRqcusDTJt0<@F;vegk7M5`kRt&l
zf12E`q%pP@C69`y`P&#wtE<<fb=Ba*K7=qGw%Sf>*W@^;`e)kFjhUkb&?wfZ?ElX7
z`MebT8};^GDVy;rPoWqzXfW05<ImOW_d{mptWZD(q{>x=V}IK`NrChi8?q%r<aUoK
zXaM9i$*s!OS?@(kqXQPP3`)_JB~(I7lZ!)k)x})-sX{D_lMwl?>iosJ|9s;kge!z^
zvaU~PNdy$cH8OVcMO3vQU?}KgpckDE-yc=<R9NaK)6f+dd?3_c+b<KA{DrB{ozTIg
z&7(8B5@)e3_C|NQ>~y%gbQsa!HuWh=L|FZHE!n<zH4s6WC4<vj<<@Ah7%@!PD`c1*
za_+LrG(`6E@G|3hGUBR)A2eQ?UAAoT+TB>0X?ZTzt7Js3qd*D~;5)|;%VjCelv=>1
z-qSX<<dtJ-Buq>(;22ptafxI=AYNSkF_z=-e2Abke8a-!w_ux7ddsvXSl)AV-wX<o
z#Eno>PBheg&Mw%~r&5vq*JVwx8PO%{!TY8notni&S%=)5(q364P*@uGgMq*rEf^>L
z4^Pyp*8JqeIj5f(FwEUHRV_O>Cz@rq@=>&E=k(n!emNDxGfUK!gkZj`&H5FrludWJ
z&r}6>etvK{+they{-c6D7z@;zO||mvf|*(ZgAHLj2-Nw2d!nx4eI$+A#L_G`oJK<^
z$<1F2dD+~kOTjR+hOHr?Hm8(hRmBkVhY+jNw5vjURHbPpF#pJ0olK=S#WuJyahpuK
z_{2N$N6UN74;^;4u4qM!kmo@JmPR#7HL<YrwBh%UCo8yOw`A|unB&&&^)sf<Y^6)J
zEh6ftII4PwFO7AM1;4cxd$o^?fldJAY2Vc$20Ri64)M~=vOKFsP2<>$FzG6aYI}}2
z=i4>Uw#<|>z;IowpZd{*j0BS1g?9Y|&WB^Vhmyo2bkRr`a~_H?u7YYCn!e%Ee+#sm
zvR?bme`}QJ5`v!x#ODn1W2whoL|C}m6LMyl`)dh|AO1&hEz~nft{?RK6Uh6AJ*(vK
zGMOs~+1~CZ2_1U%=FeCodGJljt~J5l&r#wXAv47kK+32AM2d0Qst8odr^g=siuY;*
z$}b5UjUiMxDzsAb8YwWxh=IYWXR2)50JfE2W1cben~wW3I(ONdF^z-nVm$3m>o?`D
z!AzRvNe_s;E^0f`1Wpa!>I{ZF2mY}qA0a4s`mzGMPNIqInIC0Y>y|kliqwxvX0c`3
zWIm-}fb_>%E@La~?=6^e8vd*&wAkR|(9wp`rr&C~r*AoI`~IVh;y7%cz41z^YM}>o
zG*iv*Z7m}v*95AO9V$5+a1GQmwQ%#)vs~%{g5PnFWV^yVjau{|Vf4}B)WVd5VvoYQ
z-l_ZXm2>s%GcxP64@NT4$9$!fJ0vsMI%i>gfZ{kM<&|2UZtL8NJ(KGA$6|zM({0B}
zgbB-Q{&iEots-YIS^F!It7ZPk-6|&!EsD{P<LH1yIXzBZ_kaI6S-yx%g8Gf$T6Y8a
zj^peC_71@&;^9OssyyZif#`?!SbqX^#B0LFlaX|>N8HyHW*Jn{V@AC)Lz5coppdGw
zfNDpc{abw(hV{>;@cH3Ae)_&}qS5;_S<GNgga+<v*54tZH~S*Og^ma&9c6i3`Uf_`
zErr1`qjQAMzr&?==shytt#3Wtsh^mnwlx$E%qv^JrigwhLBB2h)noJ8b3ERI#eW(O
zmw&tGMR?6Gy?Q7n%ynTl&xdWTVe>*wVS;}N7{#WW&rcvlZgs+5(J8SI@W(aV#n%6H
zD`$nKC`V^fY93|HUXx%eXOHtden$RZ0aHyuV0Z%|2*`f#|1qqv|DXF#%YN-|k{^-b
zC-_cSF9J%V7JRv8YI;|A^E%Y*hS0h96${-=97@D|tV@PV{@2@lDK&jwNeJn|vNB|V
znDly?N77XW_t@6wr!Bv8SEv;CuY7llW6Gy-*nA`2H1R!mi?5D$+-D2<G{_mMltHEE
z%X9CPtdEZ);Ojl6n{KnH{?GTR-?PoV?e$Vgbemb(#r;y8$g|nIVmKpPguAAMrcNBV
z*)eh3Qu?m^Ji}FElSjR*+$=|w?HNP@ys}|CYP>!fVM;{|al!S(@d{b;MNzEXtH#OY
zba9M{$x0Rse6v$dru}OxF#L0adT60Fn7$vew079p0NTTyy@3Gvqzl>6*?2@?Yeo{w
z(fYgb@3}Wr0;=XoYYm*E^xidl<Pie?gX;|YtI7R$Km(ZC8N1dwM0jZKA6@DnxssDW
zrd*O6&OYMcs$SRRKWjdA%O$!+W7il8?eKAF)<TmVjvsH0&Zi<Ucibfw9b-zr!ynRZ
zbz0>G45P!3yi^e(Sfu|LHuk<{^^AZ2B?7leAIJKJGUN=+zL_~uq~rm$1#KWul*+{}
z0M(T(cFpn2Vs2)_g<XY5kYeY6*0Bq!-reHKq@aq?JL3y4=H9*QsBN$^YVSe+LD`|W
zoH+Bie)j4_E#siLH*@OFGd6qWNvkCL*!h_lC?=o~*=Tv>&1Wd-2<F8=@{E0=nRIH-
zFhf#ze!V8%GH@&+Jbq<~>R;g7E8%tK|Fv6ia(=GkY2X`3*hqNc8FJZ#M(oCcmUA2h
z^22;x146Q98z;P0-2e8$#0d6!AEqYuN)7^Me}xFtNz7mJS@_ut1QX1KFS&gkZF>Y2
zAl{Vpf92;ZHOzyJWjse;F9|Fq??L}As2#gSrWqW!`<_LGWI_^L#7WAzBX~T9anzm?
z@y*rFDkXm#k+1aZm?1Ji{r<!J&mCFz@KcLud%xf+67p<8Z2DaY0f6`G0fJ%1in)v7
za8>OKu5Mj0{5l@j6iq<<dnXrc&`jexFw(`a&nhuH#2o~k_G>Cb3+DvU)euY8;vhha
z88Pps;$@pVS7Xb=O5Z)GM$2=h!)N7G&SHz+SgaQD%4ZBdIYGp?Oo?Ai)FWYDpK}O?
z1^WbBzVyi17P<mBi||>X+q}EwDDMLi=Xgbu*cembYFJ@VdcBmDyr7l(UVcdiEM&f&
z%A+7s*0NgE``j_hIGvyjUJ=zl8>I=9b^Y>cVQq&z{1^3A1hzrDzye0r%uZEiHjO=U
z#RAq}6AD9N;9qI{x`21NS!Mnouk5eLx||?q{LxTf9f^w{M7Iymg8liHah7&6M=J-k
z2zl>KhGr6x_k@8Yijwas2qH%Sd${tE3K|blz}tR#I)oISIp!PN@?L?_>T7{B8~q@K
zz*v5$QJQ>ytusev*tJek%(U|jc+oQ_#iM1KTt?vu@8ykGF{>slg}h~pGVpuYC~h&7
zKs;YT2+gMop-oA!D{d#Le?)A{v-Bw8bPWNPYK)}UqSP95+o4251hNvS^_Tb|jPQeY
zn`9`pm_f@JbXV&62;VsfJ)7wwZUrCSM)^(wHmB?Ui!ewuo$dte{(j=pi#z2PIXXf;
zS(y9T11%(Tn+A?m*=LnS@k7d;6W^ha9PcyZufSB61WnHJyPYOn-W@Gx!i1BrMb}JF
zAJQdQ@q9%*sM>hQr#Ma^n<AIhDv(z8Q0R8>rP}ea`?ZI|iMxIp7g8&yn2(_I52Ne8
z>~~I10=b<d)}U^O<YG(K$n1>Vj4jbk!}bc@LT8N`PdyjHGnrFbdrx}%s{*Qop(Jp$
z76JTd^h0Zb{9#;SRh_%S^@9W!&{@q9z2rB)8205!W}Kd9Gu>Wbk8a?`t2Y2s&vM3_
z0OxRvD@sPi!s4a0c!!gsX9csK@Z?Qnx0OC6S$16|?KiU2drNHnjUTUkU#<4Cz09{_
z^I{@Ok`4pf0#Y}>UlL=J*%Psaj)d|M?jD2O7M4ezi3<}S=ez^Fqq_Aq^e85Fe0BYM
zRoA~_vRg9xNso<y3El%kfd0EZ=6jb_m$JA7Al@@`z0f4as;Q-?tx2M%K9do);&Kz?
zvoT$Rcf)(?RFT_x=2YF4A&xTny(FBNfW)^4G<m+D5wsc@CU3kL>+U$awI>?0v&4d`
zx0&@?u%(H>_}#=WSs;v~in43md3o3hRvB)H{(~w)n7AIGu!z_-LxH3T5_lzt3-yf-
zRv%BI(#KM&fVM<QP=U5AKm%={l<cR!A=$1VIn{(2-xrWW7=L;n?ckqEd!Gd5?sdQs
zPmL%{g~(R~5EE)bG1B%t@bVpRqwIejSVn`DWp+yNPE|NvEqG)>Rp@X}NBRe&wOoeV
zFOl%vNk0KW!UcpTv&oWs2=GLbu9=c70py~F+1{^3>IHl;lK3)X*392F$!-UaS}_hd
zX!sqvuLH^#5UwMVTQSFuAZ3<5age(U<S_tAtkxg`LX!tpIlb#2r6OI*^#{BZv$8LD
zk-JYR26_neRWNwd@Lx{b@lQV;`m3rL>=_Fo^1A_IIf$9&JCA#rEQs$<8fXfKb4F;z
zHzJR7j9KXjNq1uJ7!*++WpSFZ*33rp_#*4y39;6XbssceioFyEk<0_r4{L|8_;2>V
z@qTj?!+lCcN~^vO8RIMr<w<d<g}vcns5U!nsq=)Sk+f>kF8(MXrxx2<=_J*9A=!HO
zMfnBr5BG4ra&SsZl^Tc(d_^oLwiumzZR;6?gFg83Uaw3HZlB)nhI%aR$LY>>Cm3n-
z$@zj}bGX06%1U@4%>7CDEjXC*cV(ne|G*nY3_M%%!#lV%BQIl{W@#MFf>h!A?7guU
zh*g_9gfl*9M{jRzdzpMs@3cAAWe{ud_TG0OEzajx^RG2CKGzQYW|_p~x?F-sUP=Bk
zmHnY3pFJeyzYzPCBbiY?66FUmYr8%)2rY7A){+_(zaFt3c#tK;E6XaOzIE&<76uBO
z_HJel7+$d&As}Yrq*{~Pdo!U6OjHD)P#e+CX%$&Ry3>xinzc?*_Bv%N=yvsl9i|cj
z;}vRn>EhqHuvc}|;R=<W9sZ+H$yN5Amo6GXb?n<Varxt;#1C~n+^Inc8KdO`1)4PX
z_nj6wQ<6Vv=&E@5XUaj-1#O#N$#Cd>oRK-gS7FQWDM|Z5-rve6<dq`ue=I67aZs0C
zCsM6I$4CBp*B_G~Pq*t>W@`A|8rSN;B;4O(y}8JWTr0!{Uzb(u3^?mXw_0twqVo{M
zFd=iLgHPFHZ#Ozn?ffA;wlH=pq{uru8tnwDWEEP*3lpw`T{0+dBP9<7V{?op^xKMH
zOS=DCY`@ELT}C6XHf?c6g%6W?(r-hR?8KU`)2>pSPJ+%>w!{NPE%>f$pf)?1;peC$
zbsbg-WzB@P2d);pBC88|qiS~sk`_FK<G#p*B<7NIi~nwm%zz+yEu*9lSll&ZZ;|%Z
zp9B={9Wc%);*lb;ogAxNyn}0;<1|hbRXR%^I0^aKQ!oxiTS@F@&otRwd3nZ(9BE?R
zT&n21At@8eNR}h-l+-?MKv9blc5Ya!2>yJTn{z6GL&p^CUddPeVB)hpi6e^Xfl;*k
z48o;OITXv;RijWNWw=1Kw(uviLF6JjqICZ4?NgP+HfjkYJOesvw4$EnvKK`4MEAvt
z7X;m@PfcE~ct|z1E$ia}%^`DM09R3l$y{px?LzrMxf=~xQZ?Qwa0F8QP+Z-!N>a2H
zbWpxDrlD@oxLcm*Bp*`E3U(D$9gAGEb-QqONMrM$hTkPzXsO4Hb$j~3ukDOgLbYSn
zOyJ}@F4<Yf@rZKNSCIaFv_N~!T*;p9N8QBS>aplbIV9@Zh=lc6O2;EJ1wJo2BIFGZ
zPd(Eh1YBJJi%!>1K>F-8+%X%bA{9zyxBt<|Q#_MLM;1)v*@v=kOKyU$bG#F8PczfP
zm#DJVH0Ug!=BQ8jJ9wid*OF7!LlpsBB|Ao!NGAN2XCT^=q1sq`l6xie5lV*)9xvz6
z#%zE)7gn&K5hv#5p0^D-*5BKz1HCiy(`C#YqsCY_eY$&GfE<BWFw#Bbs@@Bm^gnYA
z&!y9iM^sgqKC`8#7><ojuM*115Q{vfKQUaP@(^Qo22dLrY$8#%odoXZL_j$I6&s~R
zl>QYDkA}PB1YyI`8>iO3=p88gnP^ZhLpufDg7QeOI7qs7+i3z6cd4%IMxJMgzgHXx
zLMbt=4;3KaE=!&8LZBL>Ptqfdv2Dls!q*K)`4^LWcR-9VC`8!#??clWMsRW9T9n0R
zgSK*T{s;Zhs!}=SH*9I%{We7@bwvMm9Ir5jr2}$)@au}KO&k0DGza!{`dPW?`VbO!
z6!YHN3542!#7LGW<M7>cJ{;rw1IS8ts>0xnq7q>1&~Zs4i@gGbuA0}?t%-Fo{0Gvp
z<c%zzr7j1lmibEHu5U6d3beGG14VGxNtCYZ)Z36fxasv|3`B5g-j>kmyZf_RTuHp#
z(GLF64|(ygIjB@Rg}<d9jAI~7l|pUMVRV)#q-;M#YrAz3l}k1k@d`Md84#5-Y}x+a
zS;hhs0H1?k3?77cMrw2woQr69%ld$=7UC^4Tli>08!%4rQ-!;dmAYBml*R;ehEhDr
z1&L2<mXz?%*V$wX_|HGS-<wnyD@DW2g^>%RBq{Y8qA0vCYE`PGu-SrrsrXNouuBt?
zy{K^^gxg|6>|AHY7R0m&tbH*A%uie(e)IzBxQ$MX{lb%8(Df&Y^DDg_`&a1UsDEQC
zYw;h0Z@$^c+;8=E>r(7$E8Kl%9U?ll-ZN?JJ1tDD2kC~Ep-|`)1X@5i!jjQwx4x4e
z+*bE;6uU5TS>lXRhGD!=`PB(~O62Ogt^A{TOFX6Vg*6M*8<^<Z3<$1EQhl*cC%**f
zQMwujJ{-5Zi6Yj5gLf|OsT@NTf|;hMRf!8Lj_{dtWQA|@CYusl7zB4ed4@YCc`T&*
ztYBSM<M;jocV>yKI6u=#(#edtBP_m`C*5<18TPtCs;NJYV9vjhMPw(<tCHi_n$&v>
z3mygdgY5*JT2aTK_7L8!G|6ryJ@^7mN4S9w23JH%TO#u!@ZD0KaS9+R?Iv8ka>JMA
z7p$k7qsr3oojQ8{mc`CA7N3m1Yl{7nsD8r<-$TRabBn@J^Qi}{SNd*X^43urhhfFm
zbenvK6`a-;-V1(|9FcX4TFR4F=2JQs7G|dN3@F;o=-YZ~VQ}@AcXWW$bW;FkS{_pw
z!-JA`F@m-fTbZ<}{~+`(Z;_&%y_To+H0Z)>krMd*PiDU+v1*dP2xa2cB&v?WuF?8?
zap_cS;}Wq7a-1$BK3&&(OCG+kkE)%s+UI!rx6DP_Xv4yor4@r!m95fme;b)yrK@pV
z71BxRhhWN}t9iThnCfW6b*urq?sv@Z(@Etkp?n?`G&Wc5VE9`e^~XI4Mk*I4V(BBc
z<&#&%>g}k5XENu-_8Ce9zOdcRRX@9P+s;SiEOp9V@$38%LaA#QtrdN@;rd{v@^?w%
z>@nw^BVI|)eZZ@>MN62M@f`-!${U6-6_LxqXbjEuNy=(&uCb792El=jT;-%6&`Xer
zs;P~rh^kr(c>yZo-~`ixZ+^;Ha0DoD7+rkekkodazZH9&QPn}Ms1=4p%5dJ{L&5z1
z&==$i9*L5`L(*b+q$>8<m)s)k+voa+fK91*OL9T|8<kQ70n;f{?5W;2bqNN;FzJ<Z
z6*`wG{Q0`2N?iyOAO&nb+8|=&5Two`oaOZV`^Ye`kRVYZ&=lPNCgqf-QsnHRR?Ml#
zwu#Wf&lOBmqr>1dtM>mIipVy{g6kloQfz@k#3*TjZTEtt>dQJ&2}`9MkRF2@ih!Ot
zVHX1RB($9^!lat^Nqkf^Y&Aw2AD0pW)w$d@%>g++{x=SE>dGf-rRNHU{BLF|;!Vze
z;(EnxNTC2`I(3l%D^eQ?svau$KNf8ou*t17mDTPcVBEiNrWHZrxM>tY%!j-nD>SyL
zas@?>4UxGf)XW<ddxQhzCSd<9-cd0}a)@0qC(dsJ6VCtjDVkyc#w5P;Q5hSCc&!Bv
zvdvu{sMiNQ!~C~-IV5XcVW!^gf`++0zZLzLAp^0YAtTj9{_Q<Ys9vuBu&(KBU~Nn(
zh__*NjgX4jS&vbW86*fIB)5dB4%I&+_z*C}5RkSdP)%cXeD<HYJ%^g0`0UReiiVw>
zJ=42=7~QE5QPV;#Rqxwv($QB`SQ%uKsvCf7iv361t-1BzhdVXVWR|TIVzt8XR%q7(
zHAni>7mDO+k=j_Q&?l?l4r)NfuU=c&Tr6Q+wH7&ms$pQHjHeADe5JMea(|V4jDw-R
zz_}Ntv=eUE0D+;QC_yK?PBq?LYIX1*1nZW<O;L#Knt;MRBvQz&uvQ~Jv*Si?9s^Q`
zX0jzw-4VFkk)UhVaFL(8P{0CXKvPsSJWx10#!nMk2wlw2%8?A-x+2?!s`Spl_^@Yd
zAOjvT;r=D1UT{ph(9%UY?EgI?8j4X$ax2fUp!5^iX=nr`PMMTJ1j+sFYe1e*69Y+1
zxB!CUIj-wHkru-0q^NIF>Y@r0Qx34lwO45+Xjop9C}zG;XTh;?vHSNzF%smLgYAWy
zMRjnPVOxkL8u`k?R2VJ0!k<<AJid!QZySVnQfGbeAKpmsk%4-G#n@F_ghO2qmZH<5
zi56OET%tr>9SdTVN@y?c)SX^!mL7h`fsdNBzTJ+^dt@!bIIR*Ee{mXl!U9;VDeE)F
z1A^9D@8lOQ*J^)XP^7LYS_s4L+A$|PV>>%smFWWsUOx+$h}=S)Kxk6WyarU8J%&bS
zI*5>!NJe>*qBfz|wT$dSUXW1_akPv!qZpg-SDN)K8ompMHJ$zT;(xP54(%lUZf*O7
z86xz0+_fu%(%Z2<ZIaUNY5)|o-omtGy1-*audt6h?tjz7aH&PqbhGKH%Uq5+P)D~l
z;?{L9ve_r?Wx7ueW~Zqd3#d$cg+mqc&=>6DDAPx(Q}jh5v+w-|`pGY8M8R<c&joJD
zx=GFBOx6{%uDeE{7Ww?F6xX<_dl(itWMLw*TQk#x@PC(651UK?U;7VaM{^?`8q=B7
zs}Q+@DYb{=7Iw{Xz3$zX$ujs{RdHA2TgT7tv#85y$R~f}Qb)6s;ir(k3KA*RKPDV!
zgiKrcDB6$TRLib4`UH6Ce|Z~;#*MZ9F#aI@D5Z<GhimYzPO&KaB$VL9OmWu7uj%k3
zl&5yfHKa#xoi<<s#KmE6T=9jb+)pC!#_xFPbGqZ4vQRBMQ(7jaxBFJ35**>J8R{vV
z{t?oMxtoN!eSaxWlXtYCaF2YQl%`H=nLhZgV=by6?|+6zN!C}@U3a~QUQ{Hx0#46l
zEyw5Pyk#fK-W*@2w~Y)w^G5^X8><r4KF3qQAZH>q8fm5)u>D72M7oYuJdSlD(gay^
zTzMm^h&w!3Pr=n^0jy+1Y*CM%8@c{u*WksJjSBA^K6y5==@g8HMqNr$rdUG0*eu|L
zg1a&)y=uA?LWL_gFp$kJ*AF()C|f$dpDfuhXB973`LNwn4{x8@$OlN$j!Ds$ao0g_
zta{zFn%C|DCb~)!;daZjcH0nJP(Xx}OA;N#bN|3A(j<d}PURLZniQ!cpjmffqqMo&
z4S<CFq>D9``b%x~$mq26q?`1QQ7afmQ(uc+jNB)n!X(F(f{_VjRr&TtnR(yI_S%9Y
zKR7N<O=v{F<bjCI>L8lY3QmQQOjodJ><pkM@h4RUn!S*%bhM56+>AG<+<xh8`kthE
z&@&U>h{t3GFG_-9SzmX1m8~&lblR+IX!(@7U9H*Qh&>akZL8Bg!68>-?e6h|w&dim
zNL@bq46U#6f2T=yDtLseZ=m6Y=_aqz+p)~fZ0qP#xtVjN%Z21?%3(y_wj%Og$0#g&
zW}$ijc>iu`=tfgvqqH=14Ag&8AK*=`o}G6N)z2iu(g^xe&7V%gJV0odGKCh5k}KL5
zxc@5K(~hHxNu4bt6R?l*FUcHruvLuUN!VE}3}1?QSj-`9FaKjlZK<EBu8-5Jzqzp~
z0+H%+NWGz0BpVAp_lrD%4;FGXrkWCC`?(VqD4ZFt%`}Z3q@r?u(<5z)J0vc*#V1ER
zBej%n?r$@TEL+TT8N8vPlec?EmPBcwqaRhCLv|+yxz@ZA^yi=G<V1mZ!cDL?fQLaX
z?h9cxFK^bT%Eh=b{1vPWi+m<wzaSZE63+Z{R%g7pj5surRUp=Xb>42MX0{7M`zP-T
z;G2@w#gJm?De$j#0y^?~RdM_^jL!!GU#0_YLQWK^T0e@ul2XBiegg$^<w!G1)yN3H
zCg!o;^6g5hwfYUz+N^ZD#Y6JO3{BP1CEslF@}^r@U16e4{@-L(ZA%_uIxpc0g`rBf
zBzxU14GEmwEs2D#o?wUgW^vuXyGVrvK+`gy_Qp^p+1n+Zxk;;?-MZN@w9>HMrYs}q
zA~LLmW_bwBSfo;@=MSL>#1c5TzWNfYE_?ifVf(7=BD+lOgz|4VisXwx8pc-dIm0!s
ziN1Y3o?=IxfcqDoV#ufw7ms+!$tg#nmx;1is&xhT-cB?W%rBLXal?~SqP?ADU_Qba
z7Blj^bZD-@CqUr`30Y7&-4Ro27th=OkLPF~U!VBZjWEU!muu^=p30ZzX)9B%&gSOv
zUAJ?W_`{O_8-*jDfT;lD-b*Wn=A4m7S9o@xDXK#LIYIkhzHI2Z<8cfd2O#7Mm0=2A
zJth$-gH{8ZNf`K<Hp0DXF+Mo-Ci%lz59D#|XOCHiay|c4uqZMfb7t}qv$O6>N8s+4
zRg=};FKi<+i#-zgLm>+H_?s6h*sP)%5B~Td*A-7Fd$k)0w`xg!zrXGPk!b!D)AQ!N
zy+Z<8On0>3CweW7WB>9)576F)0)G1Dv0a}Y)w^&oh<H;qLf%>g{3xo*D(do&N^kd(
zEc(tDdLmh7B6y$C;+b|o`-EeJ8EWv4aQ4EeA*##6Z-Um5BGzTQE#m~V<V1T0$(Hq#
zJpxE@bhhKOtD2x)hqi=)r;%VKpA=khvc;5Z$>P?pjx4=ZW>4tP0-#4ETqD<?Gwj!U
zBfF^wzq`}lezX1MQ&C;KK=VTl$AN>*0OHXW?lv^rttxeX1NTM=`5%)oly@<#$<O0~
zzq&M@Yf-7bg0+c^Y_KM&lgxTFEhbu3G-JN}ut{$|ySI@4N@4E}%`0b@S@A|%OT0J4
zclz}lfVZ$y(K0j_Szt+#5`kYFdj$Ji5}w40x2y7X%AmzGqCH+}_CYTC$nfm*$2|^W
z?yvIP)wK{Wo1dKA<M*`SKr&(8KQEIjWund%QJoxgTJ{wYIpVtST)fkl$D2f}@AqNl
z%YzO1L^a!voL(I+8scoXi$E(xHS3bg{5kzSRn79os0Aw~Gl0RSMFAyNED>_<AWKii
zf3c)id?NQ(TF|b~SR_{A>H!A4?3<>1D`!Nnz9z<Id<yQM35`*kG|rey(#b00w<cN(
zx)`jnACxo((W!;sm**|WE2`V<!MKw5IM#-ov0DFl@>-BH+#c_7l@9yJr|2Z;Uod6O
ziDX4-cnPg|$OHIOFL4AY5Aqs|prA<M>L1;->a{l+><#16`uJ0pdt7vLTF}9+g3BKA
z|5I&wi`i5SB66+vxc-TQ3G7*OL(c#6js27A^2AErxCX&g{=F(PX2V<j>!?B2BDHGL
zW#B(wm<{hy?jF|?_BpxT40bW?vWzUP36PvLN{^cLL+O_favI2D5Kb+Pk0teKWM_(r
zz0T2dmpAntzc=;Of_SSp$u^_Ze46@OU{Bk*TcbZ+7+jNtI?A)<=4{HPvdIf;(Yz=0
zBd7Uf*Z)DP-p%@-C(fDh6PY|$JthN4df@LRNA=lmS7k)jjhHY(iFTZNIst$nDMW)r
zm@Mql!KU<Drxh~tsbd_Gq8k`$mEy&#+XaNOexWNRO5l5;Jh5l>>f8y0vdg&52JAm4
zLbW{c;7SwvUuRb^5=mAZtRB#BJOQe>i^E{3w(QID#7)j(wj_uwYq0yRJOLHhFn{SF
z`Y!&()Mmp=$^}}g(Dp9|1E&C{hujwwLC{HDdS!^QBzO7(xwI_zd6lj(yU$RnP!3Co
z_EQfHzxq{nJ28)OzX)Z_(#S%Xu`(Vh*Q2hsc<Th(t&Z`y4AnOWffSDwb;k;`^%MD<
z!cTM1$F$(_BSzm0v)`j$^6vHfzfHD#MX)Hwz!ztCB$SsNWo>Z~DCKRFWQX?{SE}0Z
zJWpF(WZGI}gdrQ#nZFq53uc^83=rs1I&r6IdZDlEQ#e{~U<#A5Ki_1WGA%xl;}s{R
z2_Diu8FUA5nTvFtAHS~M*p<EPlHT!X{l3cdi#n3`b7dYDjPZWm%|ahAF}iu~#$S<O
z$LTTG6Kg|()+1@4f!9;$s6xZB2vwnsfP;X9BFE1Fr13?@NWio^@w~Md#e>O8iHfKb
zFM?#cp&OC^pJPoLF)Ol*WjeU~#<t;;_~?QGZpZZn{f;QmPNpBX*Y7@CTohB~OJ`|;
zn5cETHv7HxS<o;v9$K~Sf1d2vZ&Z*aL$o4(SeWkHhTrRFcRwGfu_QEf^x=1@3Z5A{
zg-dJ-mzv{1)B&2Ycw;P^C`C_)y7QVR3Xa$bU|ZH3Y>0~0$q;N+DNC8yVHrnvo@%j7
z3`uN8Woo1_gVJvd*lOQ1n+{QIe-SH}*A^4C7qI{Ot-f)0wkE)9crYMcEqJ9HrY~y=
zi&OQ`cb*lL!BV)eOt&%)L1I3n-W_Sax(?&LdvPl6Uk0$^wZ#*{v}MXyeVB_YP6d8L
zbhnEPJ8LbODCq?``%}!dw!GCt8`lUX)~?N~NsqPkRuDCK!2i;N3rmQ@-rGpX{|m0Q
z#5^$%qQ}-@(sD)jX15a}`YeYQ!8UeFbai@SnU>&FJQdx>{lfGtMntZXMc*zuQ-`mq
z+(WW3tql0}IPAxEIQ&Z505V812z$3B=Nh?0T?|oA$9T$Htad|4g(Ru<;~Gjv45Ehc
zByw6BQF2t+%oR<2(=Y(oQrpN|(VWUm4hXt^^AAOurf=;*9E8+z(6(ZjcHT)Oa`80e
zerl+E06)1h?4UfEWn4Hgc7`pYl2`1A2bg4yi~%xPbulA^en#Iooyyzq$1K5fm>akE
zy8OS|ljL8*r;_UQ^6BJ<aWdlq>(LbJNbf<w>1u~@Qs}51TRyLUexP?@SUJ`N9D9H-
z_*8$!YpC8UU@m|;w?$e-@RsGFElUW<oLWeI#7I-Sk+Tg&@|G1Zo>&NgNFVUEmbYP#
zmI8zj&L0UZwLY?O6=8M!w+wYKubwy|pCFGW-PprMzYPiX9^bOp$7<$5%gZR@PwjsE
zeVFC_L^6fW1L*~!(6MTO{{rQT_-@hHS~?bX-|$5{Rb){mIyRLpL)(BTF%z?iE}`sV
zZ1{?l@}p83Y?J<M?VCs3Tu(j^S)~tyumI3T^l6LM4?&WEfCgmz_U4!VzHfou`FHAd
zgj2|p9VaZvUW!)kUDn&f;nMHp{or6}3dkgNz1a35_cha%TA1ijR^?8h#V4auU=awK
z8HgDQ>H?Ugi~+PTr38x;F+zPmW<Kob&=BILh__0Y3!>;OR)1GD82T68Ow5i;Zo!e9
zou_}Dg6by7&Rs9I;L^)~Tn<a2Au76$XdkWr++cfGKm_wD*hRFf(qPxJ@<u;&E1kKh
z8cVE<j@W#dIs*Qeuq!clOX>&#0^*JO-?;`@{!4WkPqDV!V@L7-!an{6^UzHC$7@y7
z0(Hevhd3R=^${dmbhuABPkKhbeGtsM@{!X!&WUQIF5c#p)h<>5OKbw6+w=Os{b~tP
zf`;NuH{m;cb_<rV8FgJp2fh}OeUY0@uJ#ObzHWkzsja4Mr@+TenEOJ_;R@tNcp)IX
zDz1fm@>iQm4e?vCJa$FSLqv&YY4=13ZOLr)?c@mAM};l+I!P5iH{^s&`9gBoanuX1
z!is3YSYToq6De-7^xaZaM;VXx0?8uvh?VWo&#UjHa)q6hTCec=sUguhx+nBDnO0kK
zxKoN!@Ec4fKT5u%n9llCmgA4$G>ifZQ*~)-OS4pZRaNApER3_%I)5#@8=NM*VFTL^
z*6SUX*iIbyk|B9r1*g4CPdaSJtHCpEDiv4ni+DCpc2Aj3TpT}BI_3~}&k+zN*@BTf
zNi#J#T6Yn4Hd~R}s^~>uOT`PP<x?41<?Y1qmUgu0RtcyW7h%~A<noU^S&4z#u)Ulc
zbcsvi+g<P4UOWGwX<I!CM?q2suWb*LPP{T#(mKTBt<z$+l%x~k<G4=SiCgW7`B0WQ
zeaCfWGn>!!q^d6*Ew2h&!)CPU&6$erVJhj$!5`zo>Z?Q+URYRG<G6Hh9i?+jQ*#QE
zumg3vxyTLbvZUyYe3p$;jMcz6@el<~y*2d@kc(8i6zV;PqmrAC6Cdw29}(GVF1Zz9
z2R>L7F8NcE`6NQPx`#Zt>mZNqu+4Z(fAFQj8y4zqZ%YZtL~IHf&(xCz$TV?V&(h>+
zaob|r@GJJMe@yH)Y0h@~hBL+{^fQr8e6Wi(m&c}ug;rj9hQ%38vlD=h{oj+&)oSiJ
zGQM&mQHY9fPzd1Nc#MioJ^IMJJ^aBJjD`f=6#wad$kClJ>B>COIHCr>a2R!V4Z}Do
zm>@3c(7?@Q2p$K52PW{|pdvu)$$z_rf8XNq+3lofGHe=fR6uB%O7X0gMo6K%`DcTW
z*UWtfa(BZ5a%~*|_|*d^xghL(&J-n2NRVG<TTku2fH3CT+Ee<s$H6zhBHH+9$b&n(
zFZ=wr4RAr0>$!B7JM&$hr6c$2gDFN?-8j(!ei<K|7{0A~XihjA4ww$6UXw50$S#+s
z09l@j+d#Ry_Ciw)8MD#toC?ETl=JPHm@e^WA|x}R<cv}PCH8sa{&`qxN_T`ppqrs|
z-o)j{YctC#i;ByBPq3b$xfegPODnD2RR|@gyvKc~CO*I_Jkk36EEX*yEXfITA;s%x
zi}p4kKdHMVpXY!U$B8T3(c2XM?}|Pj483&uuRi|c{^D$Fm`d;N*lyTMuB}9EO;{$?
zS|iRBCC(AZScZKpZW6#1eOzHhIJew>`Q(h48rpopZZR_fQl!nRF1yV)3W?s3t3JfC
zq?MvtxuwKrK7no3U6%O7zY_*9%eD+3=0Au$_!pPJ)@8AVYg`pX;7_uJK<?wmkQ>^x
zQ?-ALOY^Ynsp)UJ7S4|O-)&@56o37HwX=%xFC>VtuJc+y{Qp6XnRq5^Dxg7q!*1<g
z1dpi3ow&hTY%%75X#NK&7a>%E9AhzZkEH5AA}Lp#+XWKjtM=O7y_B0|gln9+e~<?U
zsn|Eaql_yb(y8?3_TRa5N@;LiFuV0i{xH>|axWjG&kpTy#h|lLcX%wlBc#jd-#Yld
zUWz%Ks<eDVo{UXU4F3`0M*%r(;*<H38gwT${OEBG&x<e%M&Q+_{AHM*5X;}9;%2B~
zxMe*V&oze6vE@W)>M$0HzOUaCM>5(F`)w$f@}==Y$x-Kl4qf;W#I4BuRbT%=CAKJN
zy2gV(igj-^XWMNF3emB`5$ZvrhUD<yM`-9!3tq<E`YhYm-lS^2`T!oqNwjq`i~#?<
zoiW3uZf9$l`eWF>&g|WnDy7VC0muzH5Q<Hm-r{b5&JoI97fySf7W4%y^cs_=RwOp1
z?t5zGs$#B(E6}plrlfr&C!s435=;241|Z1}ejdla+szi+Nvt4|Gh&V7KN@W8qr_ND
zoWm(yRc7<xys7b8tO6Aigu9ORv;4ZWa0oa})ResMHBgD}MWzsU;7o@drVX#*(Sij<
zP@Vm&C?UF{*D4?-<RhGA1Qa_Di0aQz#nXK0vtpO|(Y%RYaz9UDzYT=g;Z#Y|Xd)_+
zRTm~NwnMWQ8!cWM>bT`xH>&E8^AtUNFY{*4=DvvcjF@99g#g|=qHSP#`h=!lWC+%q
zRKLY!ci{l!?4u15Jb(2s{n@8>vvto?vER_}ZzWIoXsY=ot-x{pLUlmvDRAZmGF*z|
z0C4P}iGylh25~7SslF)EsD9>&SqChIp`D++@?&K5^<yLZQQf9ilFQS-D~><J|D65m
z6#ZQxpCKwmFbKTlvZC~TDQ46%OiIJN8Sv98RF`Tb7c-SrPi5m*^n@|YL@1P?qsMz2
zFGepybZ)u0D;(oyhZ6p50(sX8eN{QRC;fd#CUvVtNMAgrlp<SCMoTZQr73%BmL(gt
z!kp=gUEB2H9De=t!td?%iD9ej(;4oht@laS!A#eZWgM6mSCTBGGWS%LkE#i;S4*L@
zh`%;YjOkzCoT#fy(N8!zz+P(~T<K6AcnIoV=!%c7C-NHktuFm@CZ%55MMMQp>L!Bf
zN6DGY-psU=U2SJt(Fo_nofP-YVBq%qabM$ufew_R>FKwmB^}D7$&KJbow#W(?@9t2
zo5wCpfiF<(|Mk$+oVLnj{s$8>kp6=S$tWu*kU&TB*?%W<xEpzy2WHhmjMSCm6ywh~
zWbj$Ck~!!c@>MM92t29w$51fe$_vL=vIDsra?2#v%dQj*-{(E9BS133e$x(bw}NSp
z%{Gjl6J8g(nY{AUjmbq|U9Mx?wMEc21jhDedc*j#i|^|pnPbC0{uIvkM4B^h<`xtW
zXp<pn3%I&-iohS3`WdG4vXxN>1^e<B%WrU+a>OniIf(cMU$wa^b1I7^d-BLNVz@z<
ztpyXRk5NeYbt>6)*jxLFKzo=m8Hev}n!NVHv>^`*qWw3!i8U9<x-+gP7kT1bo;cF_
z4|ogdQ@FEk5BFh4^-i}Bp0lD2x;d6AV5xb$lt!$EMQ^zIOOxkromyn&E(Rv5D+(%|
z%UYGjdB-NSD#4~4@MR0<==(A8vz0@a-kz|YJ~=sN<?Vy?bF+_{b8n}=#E>X)1bP{A
ziD#mnM%!;+UdBw38<FC#H#&NoRePZv@FV5$p=n6a-Ho0v4IQPQ3P!d9L=cAp*irIx
zLcxW{WJ9QDRYI1i#rNVsNe@)lVe_TX!DX=;BTt0nsaNeARJDO!{9pr>mKVjU{N1aT
zkD1k-9Q0nDn||y%p}e?!|3sYmI`fB#D~h=Bo8y^p%IL*%IBIXnMk&>#L;<}3cOZ95
zS;!rdV?b(txyGe_gsMj__KfU0pxzu#axe|L`aFNK{FdU1m!&RnzK~sIZm#5!2DV+E
zO6T9ET-H^E(nsbml)|S5LmaWk%Csp^j?z``7qpzHUSdb%IdFVwBsQ+TbSudg+<B__
z$zOuB5yE4KQLLESiQ}i~75Q^#2g#24)`{~k()K@P1_`MT<GAwC_;nM2YLt|Tc{=`A
z%Y<EBKvPgi$-^C-y3+nXx!C)j6%LS`|K297?bVb|b01UcC8;4PGxc;VWrH-WF^hRy
zoDtF?kop<6$9Njk&xE$zN?tM`BTao0;tAXNs#y_S9^Wi#x=HklJ!*>&f1*zz+u%^D
z*qgrFL6sJultS$RNJBQoP(kf6#B-B_SRVs9kt9%}^Tt>|)&T}JW<w<o4`l{*gsqC;
zq))9f&|{J9AXCa;E<2Rr=uf+{RB9o$3<@rIo`-e&jr>3rO1$+w+Q-t_SqNGtC)3D>
zjIQrE_fomJTSC^eD1)oA)N?*=9m8fkHj^ZAuvC`Eo(%!u!5EdJp$qXxqhN$HK<iIT
z|9e+>Yh-sKA%DADx8KWAbG5-IpV-N1QMd44g#;RUn<=eY?NDV5PcR9s9Zcdhj~Pyd
z&L}A~?|9DWJf%N+(S2i6r_^A1LL)U@tM5t=$)qTJEm&->BUHuCj_R{Ro7L~X{rW2M
z8@r4Hdh{b%7Q-d~%<_)bGa_FOCQUw6x@nvUsNu%a8C@y5*Dnsv9_M=$gmHVqo?Dw)
z<mX-4CmwD6mtK!VKsYpk0Sy8|&GcW7>tvx7l<&Z~wzVBT2g;{Gj=QD}fEAhamW0Ap
zvtPotO(uR$;J;c}Q2FCBNruZU5`;^}euj{e^5mMl(nhMu1i!}mW5y{gJ2d0PJQVrl
zAw3G$brQBZD(*q|e$@F}UmY{Ua>0=w_qQhHrBX<9t|Id9d{1%nm+76^fR~=Dby|0S
zBQdZJ`mN$}H;V@|@3d*nm7SbLx0{-#FUMZ@78mP!l=32DZz{)Ie68zf@B)`U#ky<C
z{_ewxm!8>4(fkhANSJ0P*ckM#x>!t2$9BQCK`moGN8H!jOxoMfz)jfsg+T89NP^id
z@*S*W-xCI<rJOY$^k9;!h|ir>sA_K0djnwqITyE8Rjwi*Zc}@=<>9LQ&6#cB?al;A
zTB!Jpkry1A2C-~-`<^FWu+m$se}LWb$#~r27M9-0FsWz1@YP0!q!mTJs8CBJLr$PV
z#A2@b?An$`vYn}hRetKaO|7f(v;if7a~Q+IRC|`dSiD_2pc2&8Rn^?0v0s`|u@w--
z@9_W0@z;XQb-)ZVGDOuTpiP4<$yw(tgH<xX#>^Q-;%`h<3#=z<q;W*-Vt!9d9;fC5
z=%soGR`(yL!@uHF2W05fwAU|6bO?*a^9}c=WmPT5$uEV$zqet#0wEwE=i&gzZ(e^-
zv(|)+VDjM~(8ME<89G*2v`<i76qNxnbaUL}lh)L2D`tc~Ux0qp(Wtm!IKj68i*gq&
zT&YRpR%C=rHo{m76vo<@I{=Om9bB4GM?8EPf0~rXS@a~9Ks8-D=h#0GRQf<s7_r0!
zEs4nFw}8Hy?Rvqx$w!`UBVS#hn5oogTEKyTtf3S`E5+6orUBJM{&Oe@d=LNwDfCJJ
zb`EVUjm6|u)vqFTg>O0=K|OSuNId80;;J~YsRzYJqBY_a7UwK%&*YEP(vIN3P4|59
zaMtFG{n76<Ie5Pa9_&t(gHyfmwQWhb$|}%f?D{dS-|C)r^sf&ZSZTn?azXpyox~g;
zl3`(Z6@9xO8WbE!eP``1Z!xeSs$+Vm(>V1bPAT{VqA#fI<j|CMB$;1q)>uqdD#}Mw
zvCVvrjQ(C<8bkJ{6q@I(%IYqpb~hbn<Aq|GrFq`kFWfRgqB&j2IRnS_r+2^I=EpY@
zE5ty?pRS5%jU7~QQ2cno_VN4W*5AQN<uO>8X(^oL`N!Gto28NWn|*<K@f&82At#!b
z?G@CObDX3?+USY*X(`{UcO02r@>F1wDU3<>cS`59o)g)9pemgQemt2dmF(A%lyS3}
z0;3!uC#MO11|=izZb$Z%xQ4K5z4sMLh%*(H0d^rb^riZU$-ZsOY_j5LwoNp>G*_hW
zRt%5h88P3rwjbVW<tNaE-SjHugYstC?Tq4rRoIb)`j=STPOrBccsokI+WPbj2dg4{
zy#Ln}o5WCiG6UOCx_c|)Lul3hL5b5Q1Uj>zQ6M^hkrA%XVOrq{x_7PmRvav3BO2XR
z%Q0om8VN(c{Wt{^_g~F4t-jg8_<Bl1`7O(inpTMn`hIFU_8_24;2Duk77<~>#_^_F
zp9uStUW9pcYiAT!4u$p^>=s?}A9pl@c76ndSy#e~G8A2gSum_%N@(J<dR-5MI+~``
zPx9PEQFSyeDI0P{+oA?Ti*1A#hfGH)U<quEc>!v45Vec;XqS;gm6hyu5iCS11<G+b
z0OC{{R$%<XcMzbcsX&LX;R~{Z$cI|KH022VfLf+M!|xRWw}C$wcUT4)<_O4#;%tQ_
z;OKePZHLTY?a55<v5d#atks#O!JtoyKVu@MSIsR|)6qA!S>NYYESJAI-j6D6X&Wuu
z6}m<}yJKmz8=;Xh*Uz<vTgWqv-{Tq!KG#TFkxD#KV*q$2Wk=3Ee#Nbb-XDbQQoCts
zV{hcYyv3#VyXX6zd1j%~GkDtTj8)!J$fg#Kp5kELX1$%A$6#T)MGR<o!8Bn<J|t#U
zG@ULbeX5~RHxlTZn1;TfDqft~n)T)k1=;JUUkAgWHxA_;IxTP8X!b?6x=iykFtTOg
zPe7}TX95WPUYAey0KQ|8j=@DLQHV~{pZ^{=@aD1;hL@;0N!)9K=P1IUu3?#putEhV
z^JDm=``%PKMI@iv_tKqgP#})A-(YzgxkY(6HF=cCi~c|*&ipwFZ!8;|JTm?pppYRT
zmA1oRca5EUy>f1#&-y=u+3o81pYU$)0H5P0nPuqz>*^|?+FF`$kl+%m6f02NDehL>
zy|@&2cL`P;3Is@7+>5(=p=fa}u7%=Oyzr-e=e@rF?KyL@nS5t=@4Y*dxiho7o$iCi
zpxen%!G}zt-@g}i8-M6){W<Kmrjk_nh+8l8<<+vfBC0Xz;W|nG2Me?H5Bjq-mOq}a
zsnLVu<Bh<h+MOr|{&hdt?Z~RnCsGm25(YRoN97ikE$Dw}Sagrd>Bo!M=B)7B+}(V7
zSZZ^dOjQz6^m)}p>a@VnuU5mfG}o9nLiX|YX5|6*Bba^~EU<is{o4S4Rw?Fx4l}?T
zh<^Lu2YfNW{l|h+UM9QT=(wwyN}eHPUIGv$j`}M&`~(C;G0lPa<<ZdtyZX1aD(Z5P
zCW6XOz;*4XN(tik?xQ#Rq{?eQBP<f_&=vP%lL-a9t-cepnkAWUZ@zi35#WJ0EG1&Z
zONQVY?=l(ele2>F*8{jeo^K(Q52V|qtIZ;~=M=?)wGgJtHt`8R2FfrRoY`?op!Z&@
z+*;G(3%htGrj6GK)r#pBm)_ZA-3{8Oe)g8!@CjC!6{8=;*}R;y(Vk%`-+Ng~U%@ZE
z(r$yRueGFqDOouD9k1E%T4`>}yk)H@IB@XiTRNZIl#gq4;rSmyBev>8Odi|?Ph-@Y
zuitA!z<ZfPB#wkvn!~tos=Rw2iYq?)j2x>gZc(|#oXlfQnDFPl;X4bm9!AgDZ%6_w
zx!|(Bf5L0ESwH3C+i2&RZgl*?sUhZ_9Xk3BF}krYP`|Q}Xt)|lU`H=+e=i(%#!xx+
z5(80q-{1~I-2R46K{*oqtg?N@khs;@Xb__3E*LrZV(QWWuv|V47zS2YF}w}NG@Pq7
zzFzJsh0~X&){VL&zplnX#ytC=G#}l{2-ezo%k}0Al^>TVOkr~>%u2O8oL}04(9M(o
zOYL=SUt~BW)`nbdgncHzR%o41bG|#DJ1J^->zUfcK2OVwrli>hZTX(n*`dH$dBiOd
z8So2bxlCugH5+q0m&qIT=hHI9$ZY@sjS?L>;n)>Iry5(3Ckk<^UsS{pA5Ghm5WRJ2
z9x0nE#aWfuh4fHL3Mw1UYkJgkeIw+wMO+~~*J<!c`Eg{%#mT`wrwpZOgu4NG^x)5?
zS*(g6hKgmEmus?AzAoZbp-d*x`|buNcVMw^a-vn~Ex;#SK+m}@IE;Lvh)|$Xa61-;
z>Dpiu85WFxIoH);b~Wcnr!$Vvx7!uc65-w26;jx>PCW1Slj<{Gon>1pP0ehS#dTQ4
zM>73;N!Q_pS5EkyFZa@>W*aFPh_3wedbvMuMdH$hq~!V+qNOtmBR}EW)vMTc0*hwx
zprs!I<CzM{@4RoFXcmp?qV_~$l${laB!^GocZ8o<hs|3B+kBXKxW8nW&Jv|{8=71}
z=euJ|81Xz)S$WY^>4_0HLAUo*()Oo_O!6^nieU-UB3-M{+DqM!tGndzB$2TKSJ*ob
z>r0~PcM;{bJMA-~Tw{AwcM&7c=fKnBp)pH#!o7Nlf>)rOXB&@4l=nQP(vj<N$!E$_
zFR9DEQ_OyF%U-c;|DZSWslD-$StXwh>}zD+JrL2SwFNM<7Hq(QU*%?=tB(L-WZ11f
zPIkyCHuW$wo$&dkYxwh8)x=+_Ba7j|<9TOrEro%_)3XVz6e9<QJ$&w`2Ed$V?yn+{
zJFdd5Riczh-&J9Lp&r96TON&xC_mxS`muX5?el3}km{lx`~rKqw<{Fnn`0Fll4#M@
z6)f#;!7sh#M}>`~##hs`qhx^osNiUN+<tesZ!8)B^FuH+R0!1_&)iUL7FycokJ$7X
zM&*u&_E2y<UyH@G!oTgCnFqVYRoe-UvZGamU;ZFG4SdEI`tJ43<RdiyrvV_HkQ^hL
zxiFuW^OBYJ&Fz_q_7>_?wE5X4Le=z62oU!z?Hu;<_~l_0K@7><vw~)c$EZkv=(h{1
zlA|ZCwfJXpe91*6W&ZY8^1L$MtAS3<_pld+U)$@cDj*=@1CRhH0LY0R5Jgi%gc9Z|
z+=PsSf4d5m7uoSUvg*95YW5QZ<fh2gKyu%?Q0ev7;S@I}QTi|K8|`>0PhpP26KX+E
z+o2#i19#qPH-G>8fK~p{!S&M?Q){V|3LrV~IvbB-NI`Ap-V@)zSm{$>P#tC4b46zX
zG#*Xlei?F6(xDv78KSR|1>&I4eM%xAy4e<(J=(oF8*>6#LLJM_Er<B^n%1&(%JWr9
z&z{_-slnXvX`%U}SI1x3zMZ$^o2jvq3@MRFs!3z4&Bf^1k}Gx4ebCf=`LyxQN!F4o
zxV#uMPh*q^&b7i8C{0QVcjwy2*eiz+&b0pqww3Y3nY@~<mH!Xj!S`*Njy<3E)N@*D
zz;F#D2>1@K8Uham8{^qyj3k?WXmA*eX_?6+Ae76%nLBJ|CUQ^Ki(A;SWW68$e8F<;
z>T;V3-olQ`B(oo^M=`FA7B1DsYNOf_xAUPL5{zpCs4Iu3Zb1|jmg8Mdg%DP%4Lu-a
z&niFY?&L1ayE&TJmzxjNy?>9n_RB*ECVc|Lik*L6B-EBEp2%Bhl6;cE{_UfQhfhST
zSX~LG0NsM>8`9@Zc%bQM^K?yC+U*${wXOZ(@$wIolvngrY}=hcrM7Id*Yqvs`T6MG
zj6tAdnZ+T%A-S^(N*b~=buwUu7Hq6Hww|QJDq72Y-0Q=JrE-QJA(ceC4!CLon-(o9
z!S*%%Zd2)XTxkdlQ(A^uu&tWzEnMBL8sVGteK@0@uq~e-Dfx?4MkP`73C5-%2Xw-2
zc?<Xc`nW)=YkXmc6_w~qX2m&lPBRawiO)67K9njg<UZLy#ZGt$Y>)dNVxC&LkXnT@
zlV0^_upbq<yvuT*WIwjpWIoyfhq`chGro4XaTB{fS_tsJ{_*{E{YpfB4zmMOT$FFh
zQt+jM7D9>;xw*^4SPy9tgq;lAcYTn0KOEwiR4BC~%8)eWw>au7z)avlcBZiH$E0lR
zF;~c%GXB~TOMb$B(VhCUS4sXy!=k<n2PZ=u%TBIAK)>)3*|j+wpQu0+1<xaADIiuc
z1CQUZ`L)fGSCca>>LYH4wx>vH`y)U9?eBn}2)U`(2|NH0h5Xx)P!2@~uREr(gC0a}
zj)Uf%(2yh~rt{I{IF<GmtZm4<fROBv!>`Jz_9fGE3(tyFYHGWc<~-G5lZL1N=BVRX
z(7(v4wa~Lcav*vlF-94Fj*`ntO6nEW)i>WGOamE4MRWsBdFi7}*(>||(;f!$p_ZmD
zzXmm|-fUPT|45HP<RSq4zEUo98+9}6Q)5yT5|cLpQzz&ex!2KF6eFb9$ysXosLgxK
zM>|zZ*^+scE3AYkl7d@XwbS60!GiU@@I(^@?x%NCkE!{Wq~j~I?^c@QUGVhTDZDTS
z&Mgf2@Oe!JVk3Cu16fce1Y3SGUnjAJowhHrhG`Jo+I#hR+#e}`cjh`Y4p9UjrNUB{
zH`Xig<BWoVR=lp_W$iw<3$mU=gZg$4U(zcpSI$P~cAReRc%mMetC8lkuZgv+)kdnG
zkLLFA>(;hVd=f`aZwMS@(+Q*_8v<mR=6}B1nykrw)p5i{kMF*wqKY?HfX<NfdV0pT
zedL>J3616<hxUO7ctu}FpXu4GI?GH4?IFqBTimT_oj@A7Ct<px7qq&uZ8rEaM$xyz
zY`Z^lZ6XU#eLBHTX?cLI65D|`JqN_*1^QS0xCzL<@DI-<+S6#JS3cS(u?wqK_R(Lw
z+{%-jD#!S)8Zwda1fBV$5h|`{(G+ZJdZV5C7=x$l@-3Dj0ld{LwC=LSkYfGgrlT9)
zRVOMFi(Dp`3J*{Jb*hIJXvcMGsUw?C087|ePU3`@$QH;Kh?hr=(L=4&_SNKx_c{e^
zdv4rbqgfS8Ilh5O-DDB&>gj6+&FiBTm=OGg3BlM4y*cK~*|7b6i4w;hdr6>b4YSgx
z?-b`8_+#zfcBz-`gF@eZ&W-T<r|j9^OYdCWUP_&^6ZS4`8lwrqsovWnacQx>GcOPr
z)Sbe_%c>=pbk=_F6B6PGZ|_nrwI@R#KS@#lTHM8xd)xLW^aGrZ-6_F&X}Qgl0VF+@
zxTv1q+IH8IiQPjKGDsrk6HL@KB$+ALzs4R00Pq|Fr;iU!SODRG>s~F#FYw`a7#AO#
z6wJL(cnliSL=2yipBlB!@jIrqtlI0<T~xK!Dz2=*;|oX<Zh&wg)S(kIQXcU2@9f--
zR;w7buV`YFM<pcBz3lJY<ByyMvf2D(cV8JPa{NY7>}>eD&yK^rK~kWy()j+egh*C<
zWrM|-0a3t~zw4zHxLA|(L`aue?#(uVlPu=(69Y}1?mX`k`C!qIqi7%TN~kwMVg1w*
zVbQ@(43K3YjIcC%dWaDx%oO-AOygi)RL(D@PZf#CZF!{uoo)!WhI`6g6rHGMILeE~
zarPS1$$y8Kg5I)BlDooWoefj~v>X_q62ll7lqP!d^$U;^?5kY~CLAeiCbYa-{LwB}
zH@%?8f_1jQTb>?JQBU!uO&Ak<86TSmUF|y#Kq4&%LCA5Wvxu-8M{|B=W5Tt%O_0`*
z*(rX|F9YW|(UdIx#|e8$c8o1H37F7gr`_Js=TtoR!%KH0Y?4DSoS5KeX%-6clS8Dv
zKp+~+t7RT0upO-+@7s;!0`ckNuh%`)m!aO+H58~ERN29ioU3D>4ggrCZA2fHMu*E+
z9-c`Di3@VOqs*ezMZeeNtOq{*qLKF)YX;v-G;1Mad$o))RH1Gr?c7c!4Lk6QMTFur
z4nwhoQ<bJ91p=Zb1O1E<!)Drq+}_zU^!uF-+x<W@fAFzCkLi8-_(_5)=9F*`P}fyE
zgst16Y3AfmLt%I%Z`n$f)Utc*sqz#%8u&#|OVZa5hade}w-<bRn*1f2Ej(5~0uM>^
zIMto`x}WOed4|9Z-|{1UQ(#n{BhM@c551CLQQ-k)I0doB?{#`lNEuEjRXta+9mzdA
z&|{)>+6L1#vA)oE*P6||7G>tNyUu<ZnEZg=MC-wv1))9$`Vcqe37sv!z|UN~nwmfS
z?81k2@8S+AG;UdJ3=1MCA4nD{Lu}UnK71kRdEbTZKc?b29faN)l-@*THPl=&+<!5j
z?#m;L4m*hnTv-JmA(MLa!p)SMcoQrqU}71YiU@8u)H=RetL{IkT@Pl(PbQZd$drew
zycR_bq4-XBM4fiAL=gkeOa9|IuL=a!9o+|vd)Y}>J$*X2wm#Slvm8a)Is<-Q;TGG4
zS6qcLZEBZzhHHmN(EF^Ygt(-(Hj^tYTTiMiPjE#o+mK36@Qq1to_vcrGW}_-it?Ix
zTU2AH;`3!*C~a%*o3$jiX|3+k2K_7Ihs4Nmjj5`;_J+?!W&zv<sP`M}b>G*Bc3LKy
zN^i1k5KYWs87@b&w7QILc=03Z2wC`s6$j?-7t6duKVT^?lmwXeaym#EFmXruN8Hzb
zi@Wxs<02m@z&QI+?BiJU(HYPqS~flgI7zG1dN(SsDVdC`1&b@QqFpq4iWV8yyp%74
zOal+%uB7rmu77#&7QWHu+wSmm0Yu5*WTou9E*Mz{6L&vRx0T8#NAu$9>kYlJjppAD
zBHfLQJFXA4cvH&Sx&=G5QT*#%qpg&|<(j-dgmkC#yU#+r^4~ZS5MpYdFdnt5uZ^v>
zni}oi70;P02=oNHd<itts-ti-DkGUJS&5>+VCrJCTaeo?7wprgi$vtLyfTxuofB^m
zi`8==Ge77>(uUu`RcA{rlVou;F_j{qLP3!LhrD26hXtp#`V>8x#3M87(|B{i`kr2&
z4gwpS@gh?f7S}$Dubfy^?XT)Ppl}{XMfeGRo^qfe7KVReE$AlJnc?H>ixZTcYs~VD
znefLU<$i95i23nXij~kVf#zrHW4dp6N;A_;veJPptt3YN$se~9l5=uY&LZTpTAsD~
zj!W2B%~n0y()tOda@UV|H&3~UfV<`^Hy9wc#hyX^Z63F#RVeHxI#5BI<tqjHE9vdn
zHBXh;7Ff25$n(EMw;Cq8p(x78d^6MxFH=tC7S6EH@B208gwry_9o2kOhm7%vbtTAC
zp0=1ZQE*$)n4pfmtfBoPZlKCX5O5<HWO8S6W%I=jJ3)B13z-9Z7o2@ni@AvRU|`B6
zC{1`H`!4gYNc?VE8MTEP7=bW05MD(Ef3c8`Yoev$()PG({)pc3q(nqO%-<KFM|J_e
z%)?pCd(j^)B!DB#G$w*WGhKcU4@I?15k~I;pBi-Lpt0g*3>({gxH08HTP|hBS`vz;
z^1_Y9ol{Ao#|{<h06&ZCSZFN{3nOqC2wABFHXH6U?Z|qBjH4s4$zDwY!XCxbd!uXD
z46=qJ&Mvrr*!wWj_y&lSR|h@S&V9V@yiJj`qNT0xWhP=l^=)(%5z3+C#}*#X3qB9U
z_yI0BCv-^&H+`q9?j@3W=g1fTlDGBDjF+U~oQu>7;=^^L_PkepXBGZap54`5W2bT8
z<K-B?{2WHZ%TT6G=@QfM*$@dvNKqV_QDw^%`i2c<qzx6A%>jY9SF}DGlLZmlrcMZS
z3OAbFzqWcVpsXZFa{UsarTf+v?Wc7*^(J5aYdAhZ;wHVv0!x52?aIcHr&)Bt3tm&l
zQ~~QIhw=jl_sfRR&imiNWmHqzJL5E%Z|e<2ES?bzlS-ba{l~}$WQd1)iLz>RdCE9n
zPuVGyLihs!(zy;f0?zn{^CYnB(uUvJrIHGsNqrvm%R0#yr!s-a4^rpLcMgdpB%k)m
z82f3PoE1Zp<x(uSUF`$?-|6PqOp`(xMoW`oN63mtuArii7xD3XLhaqHwj-F`^!qr^
zz-dgQ-v=7@&85%`j5U*!H<oi>1HWygvK%I_({;1Dk_f~*5%MvOG>8fofa`V4QQvD1
zwouSGQN(V&i@BLAjh+1<t&*cAZ5yoCj$PF2rRl9uyJc!eJQVBDi(q1eHU{y@B&K2Q
z4ZUMtx08-7imi-AElkR{x4rE`Ni26g2Q%fl`HpGMEsh9t)@bJHh1tv>rp;C~vlZl6
zf0YR<aNL;i>_+n;5P#8Nrm^K%6zFi1VBeg`r2<*mH21hl@~CME99)?Voo^2=a2vtM
z@&net!X_Gvf)m)e$hY&=pvkh$D+|+L#E66VLHay=3QBpf4L;SGo@;Zr+_XJS0FTFo
zuxkcBX9Il|MvEh92DON9vY(L2q+KjK&b#+Ex}+SLU&*)-9BpvuYUNs!>)RTg^RI48
z*7NdP?<6OEKK|75pb<N_dCX$oItLjdpGk}|;!oXANS^I))`<XSZ6QJpXRo)Z$C6Z#
zDlw8Qm#%~Cl;7oiqDd_mlNWjw3Z|!JnDYBll|ByF@*YnM%T6Zccy+X)w6+}D9T1OK
zQ*x&H1s)6HSB<3fV!pvD&==@U;V!NZulAH;mn<a+8~$U>a|OVg;QdBgx|A+EA6~RC
zj+!58I0)dW(n=O@wLe-<9hSGKD$RGh1G6N<*5TA%>*X%uPz8;Dm$Yc1lLsp*NRr_+
z1sg~gJ<mYM->Q?XjKDDcn9h&Pz|S&yoB#^%s1f{C=h&r0n5I4@g+T_YUdpd{<?n!g
zvG*tg*!Tp6lg?}OFqs*xA5jxy>MH4v*rT5lRu&r=tou(yE97bK1R7;;0)2zq&H`Ls
z0)%WHL}L_cDI)L*!OrwRgh+Hg{4UB~AO;(&%m8KYHVS`O6Def`2uigD+Hei?H#FD+
ze9B%tzu#SvWmT@{F2TN(-l0z^O0kW;SzkfM%+q857iwf<E|yBnVB9-!+Z<{76K+(-
zUTm|iz9oB#Jl+zGQ$@?_T8@I4x$T`P7;F+!5xQ3UV3v#qj_}b=&~3n9>}chF)hxom
z1XuvgcOB=l6`qj$OZ#V}>uL@5UQJEep9b;?qmO>Lzpw67jLp%wn3et^sKMoI0#EkN
zC$pfp$&m1^%=&2rs!B!?{KM<8mD`0({obw!&~2iVQctm2H%OeR!!>~b`C&T3loVzg
z828p+bmDV@g_?gwukWC+=rvKU`-~Jh4vJDsvCksMEHhq3ugg}C+*AD*Vw~tod0j0Z
zZZf&g4Z>($yt!Q}VcRev1xce)?O|*-YGwrjE)MSV25_}!W<rdqwI-hB&)4}rkJWxJ
zpa7@rcU#@vyu^nXKi`>cUCH;_^|qvt>0A_w1DYdA59qpZ%!@8iRToryYpu=dQSQz&
z=@;Pde#7?5oIr~+T~rx>X)8j!-x;%fzcXfa7Jp^T2C$8TLDRo8VR&>UJq_>&XvH&r
z!ju-32aQ^Rb^pri;GgA}ZkGx}F;9&*dTTP^1FEIZ*4Mv~x=L?Eyp1X0`u45?SUjfW
z{Oui5g+>y=kIm+rn?QT1!U+ydtB=&&BD~NhZc(HA`>6=a@zk%}dP!g+PCl7stnude
zoU<7Atx9eNIhV0qP2V1sP|HD^jTaN6^u8NQS~f5HQ4i1v>c7Sn1@u9_*U6qn=GUUl
zDJH5KJn#GIbe?O&tv?k}p3}6=<$6)5VE^^Rda#H53VkYpU0?@0$fui1h0elq`r`(6
zc)n$Nw>P7XBZeB)JL*EP!x2N@r;?q{Nz2J4HU10BK$*Dbc$#EyZ046m$&{Dp;0IXz
zV%F8ttgGHl${YBo$9`@etk)kh))(qUx6I$OX}RPZaq`NUy0;}<nT5>T;l->aN3i9E
zz_wrvNtp+fuGLLBxG7Dtm8;=BOI8d;J0r@KAe^H;b~g!8M40IWPwL#%a#ALeM5s}3
zUfd^?wx$sCdQ!T-Bw$A&m3cYaqrEf|#Dbf|^7#ZgaC<#hU<B2;4x7yGYYnP?s?NeW
zuDghZ_pP8zqw$Tu-TfGfX=R*!?^{yYn~%6r0g-n5=V8Y^(U=OcdnvXDuXUb$Ck(|w
zumW<5<fNL~4!EB>fQK1?!Vrk8vVoOX<n#4$r24bsM7Vu1ivy?9L)-cvJWWh$(QfLN
zOX%Jumr@NnJr~er<V19V<xTpF?9Ku|fCPO3sKrlh7l|w%dQ=q2qI8Xmk%w6&?L4&%
zcgfR5+^yt<JpUD7hsgW0-6U?!vWl<_X9?pfggYc{!l+6b?991>t{rKZCa<ZDrE{36
z;q_ie*%eYx(yHi?e<(L2AYwj=sw|g5M3cE$(~oG|O2*M0U^M=4f7E?%#*x>?beYj5
z=pXXB1@gGOpMJG86ZUOiO4(w23yDrXh5_mpKU7>SpQU~=ACF8XI{juNC3Qy{a19@5
z`<;_fva)~!ER}I8LZQfS@7c3TJX2K<+j!@2zo43Gima7<(^77!B#X=pLkM-hze%^F
zG1|T#Wl8n(<lD0m^)d#kr-PQx7~9(2NA#4j@KJ){uBBymL*p7h51$T|Z>*}Jbs%i{
z)Jj<obz#3v-Fcjp#VXb8sL-V9J&`U@-~O>BpHfZe0Y-DT@vB!Yj#{mt-BbSNHPl%l
z$qWXHZ*)<I&01{0+3osP5QL65lCFBeob2tT?DvJ8GaS#@nHYswpIPCrj=r02finKm
zaTGchS*<7Jb%|mzoV1cCovd$t;e>TF8S><_M6Dvv`0FI^jPkIPcQf$t_IeeEp7->P
zl=;qs<yB5ZzS|7PLXN+TcR2bTJV$~HV~<3z!;d3Hmwr~4q|jfR#Qqe>F29jTBFeU*
z-D~WI?N_->zWVsE-G2pMG5bCgBT~P{{k_E^!{Uo|_e;4XzMvlZYTUIr7AO3O*=@d)
z^6F#X74|6Bh@%-^ms_|tartT??HiUUWR5OB6&Gp^0tOnE_cgW2n#*6~mlNm|Nz?~6
z$=2MFIozmMT*#gV@tw6+a(_?jkav1>kCzAtWcf}vy)?S5S8QHDHYB5@Cc0Np^C@!e
zNAT?D&R;4Hw)`hl>|KP|J2)7@!w3OJh%f@e2nj~WFhYS5DvZ!zgbpJN7-7N)3r5(D
zV(&oMj9>wJ(aCZ+03Z<d_Jsq>@pX1~XLEM7a8q@4a(4TrH_ITa(MuHgeaGkl5o_&u
z+)?kt2AOZnvd1MD7Llv^zQ{u8_YNEx0<IRmd}&y;FlA^L7+M#6=ulZ+7W$&$PFJqs
zTMSsC01Zm*io+J0`F9+yKam_gw^)N1nFBk(a>$OCPOArsHf8Rvr%TA+Wk}wL$1wQ-
z<5$u+;xqV$-nYi_xT3bJExy02s*mulx)&j3S9u{G*~j?`y=nK*?!uQeB_ZQU5kDY3
zLWjSc`sib9t1@Th`^%!Ybh39Q>WY`4!iQdM?w2P88TRh|MQ9fRDo^3m?#p^OW-q@k
zfTO^}N(|)pemiWXyXIQJj@tBwGFdxWwaHKTMTk5=JBmZ+B&skqr8DwQc4s}TAymf*
zFHTWe9{N?6u}W5#+b=t1VKd5a1F0_jQJHyejr8(nJ`hPm*IwDX(_p!)P_`n@MJ5q{
z``i$2BY~#FYlxgyxMG=%n{0Z4!XOj)LH-`?E71?~8{DL+d#S#TVIAW+GRZOv_7Pm9
z7>5v3d6Fb9=45JNPRjy5*L^1zLgOC1d+PphL4Khm%SQ=$Bjq!LxwN;B4;*({zotba
zV=qV=5scDb9h3ljsGa|MsGc?!UjIDQWQd_c4?BMNG4S!_KzrfTf{iX+<nD-=>D1l3
zOpin^RJm_0KYdje%RSXiS7>%N1F|45X|J@32MGDt;zoCOIdT!}*qf)Nq*tG)5H==X
zcX^DaOe9ROc>9V9i>86+FBCp~o58^!7uhB<d}E@~*QO&r__Lro`$Z#H#THy;g-P=!
z2|g=9Yjkc{ih*s*u~*SW+O|5HH`i;!5Ulb@gwF_{?o#R=vMp8AYO#cdxRzyPHAt1`
z-t4uLxmYY61-J6gZr~-iB^4}*ui)p(vSXbQF(aI|#?t0(h2zeG$5>(=)V*9dIviXN
z=5tst<-eV{5xAusAGUw<G|kNT!Wn&MQ^`i2MG?}U!TmIsj=Cx2dstD&^B>A~qz9x@
zYG!e#hpOow89dy|Pt+l3wJwT-Jb5m-<R!k^pNZdFiI8%TxiNVOCF&y6cz}CF-&a;5
zCxYKZMCFAymhLLfvjBp5;O^l|oYAH~a*GD`8Xc_iAshi&eH@#+j3p2~-$F`#b*c~J
z%N6gg3V$cup;lm5pEJW;{<4icLm+ZE@RnA$;|4pZy|V7nWZMWAl+DUf!N}N~W1om}
z_%)8MSFb<b9?Ej}L(oMYg{5T3=&CpEegFB%0nzS=UGV@`8AMtoFOV$?W%mn8apROV
zWVEt^YC~u)gelLI_^?}w_u!7VrKS*mpN204cR?xd)157DKPyxARz-wGT*}fsILQy=
zkcY>Im9vTSs{-mme<(Fk|H+;>I_Q-a(^KwfNXFwUHPf0F(W`eyOw(pu+?l1Csa;5U
z`;>8SUj%Sy^YJ{3w^<VN)z^}@T*{-dXW+C;9~9O-Q?zw55ZC_3JD<`W*=NY=3y<Z8
zG9gqqkeLsF>H?HDIW1Z?V5h@Wg3<bs=1^VZUl^9&4r7q`y}T$oy$-HJ8h?Wif1yt>
zMfbvx)k<DF$2rJ7!v)BB5x#A}F9zGFhOlej&)CH#vH2WAX1=1cu|?i?<06^a9J(-J
zQ@5$@dYBhJiwIXb<TLzD+E0cD3o&ldJjl?IaF~(=2p^&n6{j6N2FE&n1UAklJD@7Q
zKwARJ#Ey~Ugsf3*Xhft@J#%ey1NWUgWbb=h=bpBHx9UsrZ>U*ZzawsjE`G|gC>x0x
z2`X#$M3mWPN>`b7Jnb*3O^IO7*y;(W-{n)x7bJf~zuNL?^8;~eXd4_?kLUUogPWC7
zJ_sJIvwtd*Yhi5y2BTM#F`mM%<vO*j<fb-cpCuAu$1KMv^z8^a!v0%dIgV?b*=5RC
z$F)6umay~H*^%*q))JgH){O}xAZj>pSki?2U$VWLv(=%LW|}hYk!|)03r!pUSi-jf
zxR_7f@8Rb+<+xvn)rRrt)scB`!?VckQ3c#jp3q2=e1R&apuy0dnKb8$Ba+aSGoJD>
zE}Y?OC{^wKB`t&n+9zSEg#Lt}sVl8+vnC@6^Yt);hbuyyb0S?G_NzV~F#OMn3LB=a
zA{_(^m-I;_RZV!{;Tuyy;z2dtLA9@eUwuKENfMZt*6v(UxW1M+_L~)07=ysC^h=G-
z1j28q+TCmsTPtE(27_*Y;@x)m2NG2fGcJ441aIlm@H3WCvMC}ru*;jN>Np3Q1$P|8
z#!HULqr=CIC<K#KUs@o#f!C3~C11qNeOjLs9hDdV(e#M<2)rdi*PrQh%v`}7@E(mf
zB<lcZtsM_w$E7DV4o<ZhB&2_qBkr>qI8sPG;oD|@*oEG`06*dB$9&+VWi~I1C9;ZD
zNkZS0OTZ?1ydOJr`g<*m0vtRG;IB~kuRK}aXZp_5K$zY|#D;Q@gVf+Kp%LRCJ~%OG
z*EmQCUIYc&Jq~&brJDc|Lm4MQ#PC9>e{qapU!Y4cHoQ98UmTBLnBN>#^uIWue=y1z
zzd3L`P@G8+(X+weT4E+xr*=pH06A=Ae~Ka|002=&BZq#O0D(v&{R>XJU@C?X3jiDQ
zze^i7G<gz4ODapOKH~ra02G<w00jRA04i`H3BrWXhACJ(cjxMkDlpV5)&HWDsUVd&
zuww-#VJL;yg~6sU6u~Ks{;#cZPAJh7h>rBn0et^Om`g$uIS8SB^RRaQoL2H*l#L2>
z1J(}PzqSJZPfsE`f7vr`_BQ4gZfw6lU^ne62p<Zbh23l#FG#Q{HH1PB4N43f^IuFR
z@83+USN~?d@cG4r=1qgR;CX$2*MI)13;X@9SI@xeFa2S48VHL4>c3i(3xJBxf^^_j
z0)GQLXJJ4s@UMhwJP3<1$v;R8sOTIB3%)++cN<wIXwbkp5F>nBFzh?qZxrdD<=wyL
z+P_Or2n>byD?)}2rJ4s3kp5XQ`d{d9_#de2FX%rjxzPU$^^5rf?fwP*v%uuPP=U99
zLot5eUef>1AOH7QOCi55o|E)XvfO_gd-(8oW9ZO|udse+{>~tQ9a{7M7tWsj2m13D
z^iSmcztDudKTyR581(-ljQ<`XptJCAC?)Ct1t$IvHCgf-1&dQqlK$fz#r<!$l@-5H
z?28}@(m!5_|DvL5|LzeT8odZ&gHLFJF{mLh)BUd*a<d4VuBpvXf+dg`{FhGXt0j;)
zJpVZK!xD&r^uH$jpJ7gHKv$PQ!tmu=&}Yja8q)vTg@2IeyHI1;32vnS*dZ`w0+Zf<
vm6|<h<}&Cx>F>W9n7XNhsghqW41Ws#5p-!8M1sW*U<X*hluYf(uNM9XmM4|I

delta 35762
zcmY(JV{~Rs)2L(Hwr$(i#J24_x??93O)_yNwr$(C?TMW;&-?v4KXz3$*Xr7P_o}YG
zs=vg;I>m`p<iQ~@K%hWiKtMo9LE5TwA;v*LK!TA#K+r*8z|!_^QPAsH;lO|sN&_q?
zlFu?9;)jg3n|Dy07JX(z@?b)}6v@lGtEqyF4>#!)keC>{Bobp(NrK&8(-hO%L^++7
zbiae{=T9RM`-R{|MyL{f2<jr<W41}*1>mw_EI1_Wk#e$Qxwylh-Qa~tnx0TIN!HD#
z{RM&)US_ptF=XQ=Am(S|+Ax8C8*spPuAiR)_8h^(M8`z-90DQg3YbtQH5>*>30aUO
zN9_U{gP~1w!(g<N_cS$&f;u=r)Nux7eTZ^Sq!gn>`{HC~%tx%H&_aggm#D^o3tzev
z8(moq2pKo=+}<&WV7<LtUGv63y_ikDTMF8w(25;d$YR+axZ<Y!Hd4SO=QV~Ox#1>M
zg#paF{5Un%cXHl^SM>roAKYt6&p&@-aZEXXgd2Bw1ZOY9dg@WH+J%v<Z1S1;qkr3&
zbjBM~?G>R0BOiD`$b1ryTYcu%)#_xb3edF<v;_lee8%3SJ8sk_r!cU6YY8|O66J1s
z*f<vo&RIDZn*I|blK;uIbJi=bgtNR5g$P1(zH^d=ydBBXW<K7rnB=r;1*CwP+W^(E
zB|&+qU6pE~86mbLkE?vr9&pM8^~2*KMArwXWL6)H27Cx+$Yf)<FBIy6>|?5bFZ&1*
z1cVp_1~iTN9)-9e3K13*xVU2*!h!*H``))rn)Qy`1YC|A-i{SzhBCV#B3UR1ACgF_
z%yjCRMXD<bHH=Jq<;WG@pL;xRNi_dC3|ECip^5;ZQ4Rlx4vM(rqyrP>qLr?75f0bg
z+9Y{vyK+ax^@Mv(w=wkv%K~?Cy6I=idgpNXhD=N8$1k<aaQ+?+Af^N0YlyzkhJd*Y
z*HgrEUbYz$yn)eD#M2;V#r>ilgIDP6qeJwFNg!C47VaTzQ~eZ&Aobe@@0BYr4fQ?>
zr2)m171fkTjDcE=K`io}6}3wA-L{dqmuOy^bkJ5|&~SGKH~*(tWCSbf^bKKW{J<R!
z6{$Gd&*<G;v>gL#QWs~e5EdF-e;%V&59?{E2b`QuRDnGAe5YdR+2mZeC+Pn?`Ok?|
z$)WJ84RWwppunJI7tVSx5RfrM5D<)iKa;bwJF~N^g`0(=yNSDvlOvP2gZ*XNmSZLh
z#>frz6=8xCwcHDps?1WOS;nHQ>Vi-pyi$m*y?G|bTBFxa9e5B$#v=4C+{U+C{7)dx
z0ejDu*5W!Usr<Cs;kH78r1o@7QdiOG5pM&!xontD5Rlf2LK2#6?rQ%c4Io$nnHWl2
z*}D|XqA=|o-7L4nK9cyFs;<3#o53>6rLV3ns@2IG)=P}iogdGgE};}AAFd1hiwo>t
zOW_6V0)F&8jF1GYBu8wfHeKc~<wkMo4*}%RZbsVyza%WOXkpzG!tjS~07cn{<&zrV
zaTtoTKrn9^-H(1X)V~ehDd(nxbTJqJM-=ENVn`5;$p;u3CRZ%C6XENVE3*U^Ja6Ci
zcYPwL=b9sL66lpXcajJ#R}S1!)!bK>nMWu39dO2(Y%oRVQDoZaB$hcs^aK}4bGv2O
zVcT=4zOix={7scdg(ur8q!fl@?VnYe`3{0rz);D$?b6USX1nC6+0g4!y%5E)p@INe
ztYW9yG1~gxW89u<pci|8J_aWhgm8V4H#f7M6Ya96=z70;t&<kw#II#&9ht~OD!yrc
z)HnX%Fnj}7T`V2hva?eInd`DzPaMl1?*|n<wQLQ#=PCntZZG@Y8k5Cxdi%J2*kFCq
zK+-g9F(zLEWxNQt{ki<VJ{+Pr7opNZP;wI^H?Qs}QbKrg6V#`#tpS%(<azOO@H7M<
za-D<b!eI02x+1E`qxVUpACOWa8UI>{K%&8c0tL$d8f1}W7I2Zz(0^ggx`WWb9sErY
zQf1{{xuCZc4Ds|$3rk&!WRnDTTX|`$&ENa`qP?aQdQZ$M?Uq<66SE1!Zyjaxx4l=y
zfN(Ez?-DEOpuo&8kve*7M5$XawI1SoB;S`a+GJ$_QB?2e)j6xZD#hcy>_>}yGcL34
z-|YS(|M!_A>2^Gvv!Q^149J0kV1lH5-=kmyFAZef4!E$om6txCQ>P2C>XMv?vg^$|
zN-Fqd)2<e?Y?6|A{>UK8tX&u0?LfoJs%BEI$*rg6ARm)~4BmA`w1B?sjg-iq3k=rF
z++B8t2(=PYPxlMR)z1M=Ryr=KcU+>`M)OIg8eTf@8JYNm13pfK1AgaV+8mZ{>bPJ5
zv%H2acm*#Wqn!zqgZIUS;@BTISkjX7)PK4z#+cuh%4HV{Ubc5~!zNY!I7;g{oQW8q
zh-JQa=89^V$09r6pwXC$abaUTpD*r6;riwBCz`blyZk}5iJ?=G+@G=7i!J`q;OSOY
z$*IJs==Jp^qGn!}Y{`g_8lI|LkFV<-7(tf~m?WB1bfSGP?M9r=33tt}DR-D&07!d$
zaewxd><lRvp|7XyeK_!ZStzbM`!!K{)O?B5JgRQd>9s8#J8QO_P~pbOpNNfr%<VPW
zY^@hhO&Dy@FeFH~S(V4s(&JdfARaCjOdd7wYqw6a6WiMoXDM2!21MUS6*x@;`PH<h
z7pn?@h(z5OMcufP8do8wJ+|iI$?Bl@q%wrlIORU&*-*@deVYeMUKMD*_skcSk~vg!
z9!bT=qT=*Kh4ZTE8a5`Svl5E_^J|AC0`)hT)tl!N=S@U>_v<xA&$^UrB~Q}xlYm9D
zvYQ6|o+h37N%yWRXAj1nqB=z&5FL3*M##oo=*L7m#xLCxyz=18Sa089x-Q&-Jr<-1
z|L*le7Nh9~z_D!7k*fjdoKxvny`qzzlb)a;eeN6JLGh$ROAFFMr#M7_vk}F7k4Nj0
zlEH!#P7+YTo)J;@TH*?EMq@`A-EeA05`zCIk7q4~W3^@EbhS$tpU4Y5YOxVO5Q8k(
z@S_@7Z9}Z@f2DIfi5V^qzT-)d)Sr&xT@_Z=zRZ;Na{A#BW3-^*fs8weqZTmkNYBJF
z+cFUwxa_y<NuXc`ZPH@`s_}-&G)rfNPTpHRqdx(&C|I<OMt+;1TzV{8L0}&SjZuW)
zj*W1~@WXq&C@klXer6p|u^sX6B1f42@fk9tZ9Psot42$5nXPwX90M4(wL$zfK8x*m
z_JSjS`q!?7N~{CleX>LDq?L~O^{@+yfkh(j%Dqyt<zjrbBHuZWO;$R7ay3Gddroyq
zl$c37*A*N0X%mzedqM6!V9Bf4{=`Ee2WDFET#xDoJ)B0z^ezXe4ACqu-%0}EwD-5%
zG*h~9=tHygTlHL&)87ut$%mDG=~l~PRf69QELn}|FX+kkZR+61Pc$98R=%M^f<l*s
zC4%>=1~{AdKOd2|=@Sq5{3q!x_}aT~#l*;Feo|bzl(+-kh6-$(k2nV~GRN6T2(Cfq
zm3-(5BeLF%m>!G3^7)S`Hr!WE+%3rG=+Ms8{l?$4+B7fnL|>XbI@OCbj260RddU-N
zdEIn$Y+{{lZkB3~-^6JcM#eYwJ6A?%i$(sx*~c-+bHn?JiayTb6yt}IWe`-s%)_}8
z-5!k@dcGqHSPR3$fjjZv%5+DK+*8D6yi@1}xnwPBu2ob(k;aFC&-S}=&BrA=e2*o1
z50^HB33Mjx-zF6hWTBDFsjblczu$eNmM=qmLQVNA?~}^E9JUOhEQ~vBojkf12W(Uf
zw()Ib3>Fi1pmp4ng_kpeWG%EuPfibr!PliD|Hd%=-Kt>fdh+9BS*i5xGYHu05IQr8
z5k^dGYDmWhw%?x9=bEFfaZx0qy*I@Uvg|iGulZcHApnXq?NL3Z)%eWVMXKjPrr;N7
zC+uClK1~GS83MTBYJF4gz_$}|9nV6&#<4bxX~mb0Wl4Hy2+(p%m&+nDp9ZqQ@UbJ)
zBLY5E`sT%uL0XYP2J-ZScZ3L?i7{L<tPhS|9>URqN91|KA*u!4t2tpd`%4QDpvL_i
zf4e|RU&wBUdxE)jUDoyWG9=#H?nD`Fqu*`fM-9{ohsH|dBUveGRo)Iv+@+OC6h6nb
zJ2buAAw5|IWw#R9rU%QfM_~(AMUdP_#PR1*oXt_-x(if5Zow_i81E!iQUjR-xIvD8
z()Qi}{psWs)$=t{>`ymrc-B&Oe@5NBHg(_0DMr0ASu$0bV#8|MF0dPNDYRPg<9FKU
z(SBkc|3KDelY!V#C3DskbO7nQp~SS0*AVyS)J9B){T!OJ6%q?xWG9uV=n}R-0~2V0
z@FuG#cyL77R*5Z`V6!85Dk2;dkZ9yJNDiL>;!?`X`?1`I-If>er<kUtx3;FC&8T(0
zr!JAxKl}=qj=ql<!Jr@3XeKs)-|PNVscE3O*bt>Xq1WOFLh_=VeduG_N+@d6MMHk~
z?2BxF*TvGH2p|mI`S1~b*Kr9e=fC96NiXmh<Rt|4*clvRU?eQK#DrIO9NTre-XK2(
z&KYYD&znqBj9FS-UJ~iKGbGSiu;C5WYumrYw4RTv<pV}<_FC!CFns%E2pG2rQ02l(
z+vx5yAN_kZkQHC0!@oPs(VWuKmbu<2B#JCQwIvtbRoi$_RLL0xn(dv@Q`*5KC`rz{
zqTbxNvae97{TQaU75#XF!?#n}kf`{94Gunq_P)wii7kf=Y;5zgJTZ%hd$8w#WtGP_
zOdSN@6xNQ;Y~Oy5v?JBw*4yoeXof-NA<h#m1J5OFgY);+i~Ea!XV=*ts>^7EQB42D
zno(>bo95l$Jyb%H&EMFDW<OO`;V*W@_)6hz088hxH7RF&?+k~KX~R@$;lFZ#J@y+q
z1(E?B4y8Zt1HrEvSfdJT?-Bf4EKB01Xye#!hnnxNS2qA!fKs}Yz?KIRJG7vx&e$rD
zHqbT)Y=^*ABr$vL5hZ)>k_mMZ9^wa@*#eQDKJ-57->VyI(0xeDCtbMBV24nL&A&E{
zr$|p#-%nLbM%BNThx|;(g!y-XQSYmt4OuEH)G4b?<~2qWBT>>(t8>CJ4`k4ud`P-~
zo(*eK-5UuTvq(&v1wo-1aj<%8LBb)OXM;yz2pW4edqv?7n~~f4p!STlL7*9VtVMr<
z3Bo;;w$w-m^@7LCuU5el07#sOdpbX{$Nx0%WD2c-Opq<XKDlofpJdShiMAo1TY54r
z<}Gb3n@ZQBLYQ@ozATY8mQTJ7e=GD=PHLEOuvO%0%-4Z=CdGA6*Vk5lQrmd)bN*Vs
z_%l$yuG-@tX~#dLWLL7ZH8h*W(?x3Vc=4$a_pz0HmhirIbu)F<WGi*AQZ>NEgx?`1
z$n&Gi#cXBrralo-=9Z)nNX*M5k`>`C4+9e7-$S~MHj21h1zIQCd2}|U7t9BDe)_B~
zty2`jcwLCF4g+B$y7-?+z)HZkCKs_sDTEZ&&5H}<0$Yc9k?Pfk??y#Q=;i+B?{ye!
z@?CtuiLa`$mkl#q18cABrR1L(KykH?opL7N(Gr8`*+7eU5!kh1iP25c$-D3d!Bx;3
zsk*Qblc~x=I(i<oP>Ty0YTQpo7ugOWOwD81I|Cb_M8ZwNy&4O~0Ka<@Iz=P$KVQ@Q
ztKcdKoT4#ihx=z9I{Xv+S0vj=6yE==K>$o^C=AO#5h(xPXqZ+APDC&!6LvTiX_Hsr
z%9#y$xEcASO=BSWyFY(@X8PIh5%fU=n08+?sR}bH?%!0#8G$xnm1Hcky%co|6riGY
zEV6;W;4AfwkHF}$OPP1fEcuQ-gWtf;;RjU$!N3<F6wp}|dpSVm^G%XK=&=>h{{Q|X
zoa}w`kSa#TA>b;;!2eBN4EvjkBo&?tw$`<e0yG&yyRCQp5vfvtv`Lexv(*kkyUkb+
zuzG@5Nmg4W@Xz&WBKEbxP|d%f9UvM}XQhm}rCGWM<RDNI{#hj4vr;1XL0benn}65#
zlF@Fi>XyL$J6w&qs*xwV87=baF7##6h514RCiZJ5fkh!xE~`{OeuSRastNukd%&HO
zEkq8@A<o=-OBHfk?=~tGQ~g!&GukuUhV9d<s$Ts>nO%%mv_DHY4#urb9&0AIIF{T9
zT$J)Q>JR!x+~e7T503EA>);%>EkOko7h^*%JW=KPpQ!qA<36ru?rSJY`zRN#SK6PK
zzJ?V#$G}JCa)V<|2OA?r*$Ntc2&L+JyxqbAIPUY}@VGO>je_%XtMq8*!2us@F3pX2
z-Ty_1H{7@t!@_BGa!%ovJnDiS%Ewd@uk~sOnUl#B6ZCY340(RG-S!I`%Nv?@FG~qK
z(f-|p@5c(+T!m8y@~n#bxz3@C#jJh5-(r3PkE@yWO;{NP(%3@)y&T3q(hBH}m<y@O
zL`Vx5s6RFQGF**nDHY6So_XqjKf#!df-8U*>!~o&iPVU)SIqepbXRi(b9lly>~>&v
zTm)`l(WB(2;57P-7Np`F8)JgGJzC9;Crq@h_6HZ0uepItxguh~SKIFmIVRLPZ?W(I
zk4s!ohMqV#B3B<v*FYVC+_v+tX<Dl62C7v}*?tLnKL80~2l~y8XW8o4h<sL@3C9<Y
z(o-^5gJCW5SckPqkk2@%u^LHX^1rqB`(apfZG71B!%zmJ7ka~k5O3&oBd_wf^7wH|
z1&`sR=v)BV)}jUs+C!CsVi1u3R^kOjXFSE53+`5HbVeoHWOvrk#=wZ8l>P0JYb}7k
zCw2kUQ^1;;z^Vc@O2;BD;`)hem;AVBI4MWzrT#B0jHs2vH+t+~8=QXD-1%&kSQs~R
z^O&lycTQqRiLS3_i_iMAzxz`b>*(hYUfMnA?%(O3E0r@T>cN(UH(Zs`JNv`H)z~7h
z^I>grV&MT#{IS*{&y)OH`PFLGgA~ouHKwD%`vlf=@D0djc>CwbW}>pS>+oJ0<Bpj+
zgBr^YuW0(Gd+G>0sx=k^X>85++b7eWNI>Ln#wXlizJ)O1NDhi&QBhmY&5F{>itb{w
z+x|YVL1dUJqrM*PYAlr-9d{Xsx5<jK21K|~jXh1CwEE%Eg%f8kptdyAFokdi!FS9`
zI}*RoC5Kpr`_PjmR(2xG<+>lC9pLu+tIqk!SVF$b4Q?`mQ>d(Ma^zU4G;P#Rd(5xD
z$<ptToho%Id-Fc3R6?jf4C{NlX4ESbKl+N#i}*FRYwCDh#>|ZndSMkPT7k7ZWK5tr
zw2#x8smo!)XZV<0wFlqtuKOSzH389-+^tqZ^B9Z7T_W5iV+0RDWbu;swGHW&Q8xpZ
z=CF=toVoNApk9?^t!Lka?$BZ5$tuM_c=k>hGMGBqGGV!$9~;t0aASJ5HR4xub?7S^
z<rJR8Lz2QH2`9L0+l~d4g8^tffaIM`<weaLKzWHL$NQ{<P*?JP{MkMHam>zW3Towp
zugz9-ZEnB$Y>T44qv18JP#*R9Gr-7M;P!n>D1XJkhqm*2h|_AzPKt)`;WGxb?d^xh
zFveVi%CHpkD*>-^G6((i;>7)QD?^D@SI${--V2MYSUUD=?FWj&adJ+-B-(ISnVQQX
zhjM*=gu&qxm1vSj`H^!@3?$iTlQWA)MNM&YJ8O%L`EK@jZtxj1GL+A`nFRy%@sc_%
zB&N@atow#W_cuJqe@O<o#{yYdxqnFp<*Vc%Wvt{cc4*+5>%R<xe{T6Jbm}%CI{z%D
z*Ldocf|f@T(Z(xCMm~cQO_I1;v#;9gbKkIJBZYijr%Zp0<UB;=(eD0sNK6bd&?k#(
z+h0A!B{c`37x}9<W}XFq=xl3j_c?QdT<CV3PDWZZsc3F&*H;}W;O%1u`0*P^mz+rb
zqvWV$*Br=FR{pW@*9BK8-$vyt;YgM4n|nIiMrCYc|Jqv}0-0^a#udRomq<B;hj8^_
zi0`c7FAq||JwujaK_go_hANtR{oxVC^aNjvU-sr58Jz_e-=lt_&_qVq%9zY&>Z>BE
z<T+i}ciU*qy;IoNbItP(95d0_6P~vX%daUmo@U^8ZOz%ngP{?@bJEUIT3^YC3#&#?
zz2o-5m<ZRG*W<IcU<?sGGY9g(lMT<SLtn?<p=sa7$Rl(-Ke0xds8H0vgji~4wcdN7
zk)-Ga-!?>x>>=!Y*@a)dG1Enj&WrR}QF>*YEzJT7yjr@aT2KA3(ZskRpn*&X_X7~W
zM+-DRv68krfg#vq7ZTV%#UL=4kBQl%9K;d5O*`iZ6jp4QOT$aL`+`L%cF{Js;HnOQ
zX($_7aLEyrkjxpvQ|oPM?L<yW2DKE*OMtV7XO`FaK6gEj*N|)x+^o^6@k-1~G5+(8
z$Qn(%P&NO=Ar<(#>Q1sR<7_RgZ)Kj@;syi|sR!AV#X%bvwY%lrl{=!plfQpx#y?hE
zih866ly0|IcT>-$&7BB?3Sy_F7!oALu)BovMo4EF<`8Bh&oH63iHN%geXF%*$`=!t
zPF`sRFzqLni3{LaMo|kxrK4n}AD;?9L9pZzXEL0pXzVE-0wGcQv3_vNJ$k3cbptCZ
z^j<5Vjc#da=?zf)lP`qpGeljfj|{Xik2Bk?!~Aj}<ZdO;=5AKhZa;6#d;NI}9Upt6
zA9qys)qO$%M~!$?stM)zpOmzUYBg^;(g!3d;KzKKm?nn?&^BlO^lUeFe%xi|&q~4h
zvE`n5(3vH$6N(E27jI{pZv;@k9DyqL>S?5e{_xSewhdUTVR9{;3oUIyYTzk!DH=IY
zLt4BadB}SrTlyYOjud=B$bsFw6;2Ekh?fK}Tp+~Gvm00si$85NsF97p##B8|M9vQP
z-UE?&PaSlK@C;PHJr@tJyEvncuksMD$d(&oPWx6RMJ%s><eE6Zb#Jne;Q-A>%Q$0U
zs1)?{V-U&vBaKIKrKVY2!xMi%Ji+?Siq8D6csCpL&rl)?kdU!Njsh=Aj)>>+l9+pw
zK)pegT(=iS$d82W_xsbDU4<g4P6Cqm*H0w&W%1@!jwgQ<RWhE&H9C+yn&2VJ<S>$q
z8W|bXZ&h?=Ok5g+7XUo%J$V@TDS`2;&BO}Jp5U+S6r4d>JD@}|QP8`0C5QQKDr`gM
zJZpRiUxwrka!?<gqSuOmj8!=SWg0hplZ8U%WqGN1jh1YqmZl=N!2Y}}CRy~4TnT&$
zDC(x&wv?u?c?@yv7rCC3fMRfRpMQdP3W{+@<2zE8FSME^%^x6@^pfw>1f5Mf<=;~_
zC620m+;r>T7~vXl{Va0y_8@$jUrBM3Zl~{@#1zC84s36%HN?bTu54agKPk%c<q<9y
z6a+bClhnDpY@A;!%;Nyxba)fpewBb6%A!iQM5l_DVYUM*Qa_<T{zlJx-UMMwR7MVJ
zr_5G1ncjFMI(0yrkU3(`kE{3Mr{d*Tbks{s4Yo-%M&4kKEOSZe^J>bBG?)^?UqTM>
zHk0<#SofCl=_WC4U#ljyL;B96Jau3%NAFcUfheYo2#_VKEtl4e@VL9(>W)n`g9u}?
zdj*L@;(kyde@dhy-?61%l3do9(+*El-X2ok39=2MdnAEa8|BO<VMh%6@~<E5(b$2|
zqVP`hAT6S}ly&PC9mG|84*%|YL<R%S@*!LfSK6(83*1%dKy2Jq{Tmj!yb2y5&5J^g
z?L_Vsmlt?1ZDf3i_z@6cO-Qe}>zm@RRb;xLaotd57Em81hpM9?P%Vb*-RP3gyO~X-
zx~4Q|vTuR@ihCVd1^oex)-}|kL;%;{Q;9!v%0sEhxQfaW^7LPNM+y6BDhajlgo$WS
zg1FG2LTdL8Q|r`KwVOUt08R4LoT2f1ZklWY$~5tZ^15>HXvQ=hdFR_0TbUczic?|g
zk4Lq1x}In5nidDEsSAD<ZLKobj9v4Qh?4D#>A!#gZ_5`#z+)BNl*HVv!4%NRs>iZy
zT&F4abrZeMEF$x+eBESZ^hjY6u9Uk{9`SKm_Qpab&5hGI`f^F-M1*WdbW7BqU!ETb
ze;)UyFKz(t3a^s3fJQZIf8e3wrLuGWlzXItOuS$h^Yo|P56pWJymJrmNUsRCR{E=x
z)eh)p>o8kA!y#o#eQ}*|DsUYS=ae=0+G86t6pxI6lke##GS0*Pw`=_`Xsd3ddI9*0
zg0U_=Kt5br2Q$ca&?L3kwa`55b)7Fn(iaLox76Ats4;v54fD=5W8ysREW2+vnzD6+
z|Gw(_4xiGls-cW68~SLFV*b0{Q-w=j`VDA{23Zi}UfAcxCPUc|UEVvk)hnN`Zgl!w
zPNKB_JWdCfWLtl_)Xk$}pj%)WR%oAUZMS>+wH{nJsMfL)a4v_U#^ASpZ)TMxzp*4I
z7Yo8EBVaB2Dz{(9$)3o@?ls<MZ*EcdRX*Uy=#?fNIj*AyXxOY^tKkDwfB9;@asr!!
zuNX>K^fZh}n==E8npT%H1Di>jNlEgeeu(3O$)J;E!|G4pig30XstwO_TNWm7)X=pw
z!EX>DoYOPTV_A3I%$0g8<1JO9TA1CC(n}fAcubb->v>Oe_lv^JST@))3baUu%G&K%
zO}10X9bq@*Ov8ipYe(vOqiwgs@B#N;<+6IaWv2iAj#YxCC-hyyA#XZF5)Toc@*isu
z%EJ$6J_Y=hkktJx@}ppEm(Q}o$M4!mN026GEmPJaN;o>1b2QhE_(I}sFfE5enX{N0
zsPg($@H+-WS^_Im(F^GpmJfxQS1;b<UP~44`1b~asWHw24%Q-p)C}07slc@aK8}^5
z4W}gfwjV3RDMxCd9|VTZ@;|#%j?f5DLLgvxD5xZC(~kNJn^OrU9<k8BkbdZsUumU0
z7r$-@R+Nd2i?V2I91{{*oq5ZR{AfmZ0ve#T#8+;xH<6N{pmim!s`k~+pqR#&xc}Q?
z$lv5MX1H9pH~&Tp6CTCKeFEQ4Uj@4~o|Gx7(lgQ!lLli}z?X6w>SuJH{DBaQN;ol$
zzD$O;tIY1^q$zLFnk-b^DI@tU4j}a8YZ)F1f>z7C=-*9>A79Zl>#5}Md$F(Gyz{f^
zFVllwRNd@<v!j42R9|>~;ZK``Kusq*CAbR2&t+1ZKj5wf%?>Nyzkw=Zb#W1(ypS-D
z@oQoTtE*$<pBMza<CE_l#KO=->qa@ESFal}zXF1qTQf!q6aNTsrdA(|MqjQoB?%fY
z$7hv)JC+MwB}WfSy1<tUiAy*)DOamLzVErGlpEDt@@edpy=9iaIVslgdTf+!ynE9u
z2UTn|DktwRhk5*I-vZJoY-qGJofqC|-jh<?%6{<DXgGI3P~-f`5cfEL>abB#7RYUB
zaey%HO@{)rlmS@=2g{3ed-RWkwDHDWu`c0jb0D{ygz1?o|2j|CEA_J}Kx(kbk<d7l
z4QlDOd;L?bKxdT9yC$1!(pq@6n(lhimDm^4m6%p<B*SVaHw+{@L@&T{Ut>NZt&wCg
zp1mb-<O?u*lE1=VMbP$xtZkN$HKD_4bPi7WU2x3j47Qq7rcDR(1R(%j@*IY~jiKZY
znblh{N?VD6j#(%TPc}(=1`1DRdW3-r_JX8+W6M>V&2(=$@Bxd&42QN~QGn6-NwZ`x
z;e$^_`Ut81*Bt2Xt|YhSrtLj#uU*0(LXivcWDMDU+y@7o0sD)sOH7A$NH!rZt+Jk-
zr_kX#C|ZCbTsgyNJ7DW|kc?Hp30TZVR0FaY@XA*R-b{Q?=kLGwIr(q11bUmPuY&Af
zMZbMPqeN(Dqgs@nG7^kdf&HTGHz2M9b68dP8jOx1{sM|BWq8yLY^9SC-}Wj)PawPJ
z$e?2nm4iEU#&&5zLy+r>%|Y#XzXHf`4){cfLbFK9Gm9EnVajHeQ@czhaONLK9zgXT
zDc=J5tzQw9^nKO7!G)ctvOh1T#l90ps(6k0i~~jsN(oqu{x&{LEqWg5tZ8gWc9Z~a
zmOOJ^`+=}rCI-#D`>;*91C9tGV5ajMhYK1Tkf`~uwLdnpPd#iq{wdGHMNeo2XoZqy
zeq*A>LH9iOyv?I9S?F;s3Le)-{mf&R)e`o=9f_#K?<z~lL!BMR`&3!fQUO~qGoHjQ
z?CewkhNhc>RI>ldMk~NOa}aQqSyA1!|AV6o7-x<f%}ieU4&az5XNbdU1%)=jDZ*s?
zs!cY=NFSja&Ae)5E5&ni3dHL0#cw&FT8NO<mwc)^UKlsfN}YZ=z*fanT0GLm`jA^*
zvDEoKXw|DKB3cSWFW1j)Y(X0geY8fRo6z&6U%SA5`Qb%4<}kzTMGk%5II_ZPpX59a
zOn~FMM9OrJP;4Kd+S#og>pHc-UUB%<7~q;Z-(1)P+hE=dB&AzoU5%-_C~QhzR?>AM
zsWKSM!VFfpV)n}}rItYvZC%3?@$=a92lfb>BJ}x8bjA4Gh4UjLZk}DkH3h_;Hrcgx
zh*d={$PuTiD3bZcS0y=_q^9cB2SlO)ue`e>*c>Ta@?o-xTENzSTobg^LpnmjPKWY!
zY27^Z$0p`%NAo=WvTI$|n|kyd`?UmP-GAB)W34>n46_Ob22+e<a`5quiaPre(*$E3
z#}yICND?R)He(fT<3|ipIqz<CW$s1-Q%HB4Fz*fv-3VOIP%p154FPQnnc86icJ55u
z`q~J~4D30a8*!%MzbTH<`NuYwKzVx!I0;0vhK&BMfMKz0CT|YWR*4P_^w_0DT2g<u
zENE2+AL|5Q#$6<+%by@OC~c<;FLKoJsQ;X<{q@1+l-al6smS%@za@20S812L8h@@4
zm5K2-p|ehvGi;;DFiv9AW53M}L^Q{<49ZFdv;3`7(2h=S?av)VIdRxb&OOXv|2q{w
zik5fD8*W%{Uzg@s8xgLn3hg!c;mLq3*Hr<(dZ^`i2CKKjwj4mCzRH50QE`Pte!5iX
z(so@~*9;E52t9KZ)Kt?SPj&7sVR7~CixmO%#{L-;E!!T%E?vWk#JL><)XhrYpaV0u
zd%Jo!M1`P$Az@QJGwjwVoH!SwsZ|q7b;J}U7>k;Bz;r>fcdUrqpxgIT*wRFWo?Llm
zU(%gdM75c+PULg-q@p2kEsF>71BA{%^7C~S9-bzV#70<sxJNSCdQdQstzql5Yh*)!
zh4O;V+v-%Og#J4pS6IuM{*_Po_N+mr6xNg+QV||6!ZUuXd%`TEAD$2HT+k}30ubSZ
z-3~)JrW$9;+Oa4}&+#Z>a-L(V>Oh8F?s2sLgezl|dwiFIMyM{<($QRVJ<m!Jv#r<E
zdU7kDvk3ZT2Km3tg9aNB>m(cqh<4O}nFk=-|3DoBJ+}fLtbm>JPvn%H1t}RZGS0KI
z_=WVV^7J!%>U(gQQ7#P}8q$*Ft@ZEjc4K_=5l{Oh8?TfI?ZAZDHoue8)2E4=pADP7
z#+}pS`=%g5FW^5hWz#K(4oz=YwjOZ_y_+IqIHo6p4G?m)1z#QygunmFdl;US0jYPG
z&hKALSupeYh><SL$1Pnx|5R#|=Elhj#e{VpCYy{O+|)1X2qE|h6HM<{;&9yD`)@Ok
z959$@S+M2SiTxJinJ^>m>CCaz=%T4wFKLW!(8Vg)x%QivUaLRkQeJX0*m4<GFZX_l
z(`s2lT>1I!*ScqzD(8dPNtPp94@3x`3ZcsEW*~AmweAwqp*Xl7Va+WwTI0I;Yf2LJ
zUHFB(B=|(rIk>5y>6Lu8f(S@HJ3(xVh=B{QlFK|ym4b|s+EO}*27pL^HX?=LK&|?D
zo&F$z+zJndAtjI~23>_FnZ;~bk5vh^4)lQKh!kmD{s2*fP!WfSb%B}vm)WBDITVaW
z{;QD|lu<6UT`0R?tVb$Jlh-2#VVh^B97r2vgI>5qu=``D1WS^l>7&#J;Q<x);oib*
z|2}$UpIdWU;8!coV(`eCIh~Y+YOO@draHFphavp^{ywBf2Te_bzzXJZ*s}p^uhylk
zX_=^5nKm151qJT8Y5qjn1W-VJiT^EF`C6LynP^`>I`hj;s5)6u!y9wuN3yaoo5SHz
zr;LZ~`2K@iqR)O=QXvz?B*Nm#VRBGfgBZ2D5USNHoe`sUR?Jy(rT7xb6!Ji@Nz8)B
zz>S9`dFGn&nEkIc^q*-(NosB_Q}>DpH=}|ipgL*mlxZO-q3~Sf!oWd!IiUedCO`+b
z89%rExcW)C##!>XJ2TLN{hnFUpEVLhEtqy4yVKugr#mvnTvrYSJLXWvIZU+pTG>rC
zzTuc(k@0Q8)`dRDwYaBY{#*SKSuV?*_f_JK9aVps{(+IvUBh*3Sn*tHG46p8!s+!P
z;Hw0qAe^n3?_PNc1xUXzT==)_n|uvd?%(b=944jsX);fPp*ue8l7ILo>pm3F;-c!y
z-T4rpMMu@f83&jegsVA7>q%Z@_O~j<|Fg7rKsZB%K0G5xh4_*T-I+m27s9fpaKYJv
zt-l;=^k>&IthbpE=3Na20-QJm>e&wg^vpICgDww4ie5s799W7N^dpk{M`W@Zcx~FG
zQ}V-XsypLPK^eYyKVsT6DS18^l)3B3WOwSb2pJ#e#zJ(xc^l;<C6Z`Ak?cZ#*&Ffs
zy_}@(3TSCY05Z~bp9g_YB#flga=|FIh~1oS8esy8X}kEeLGD$gke+!%%Z8P2n;D)l
z8dOGI0?8@<F|bJl0p+Zkiv&v&#a4RH8O1k;Uu*6}S<4yS3t@-6lI0BA<UO_skzK<U
z{2+Jcb=<hBu8F!;2rW}5aeGkUYkBb@gxuxb4%vmHmyM%cMUin*&>Igl9&F8qLk^E@
zA06Jm=pM8Xhru+^1ftH3({QYZ=l!DDh1Hrmet(Ib6*#V=pQW#IgkN#ecG^np;7}`A
z;n=VfX1Q?y9`y8pyk!3~O$<;PR430U8G-SG0Ilj2Gz<eGHUA&33yW`u00#CmHq$e%
z7?0mnz5yU?^HJZKF?@^_ryK5`=$qUnguwO@=e*Yw^vAa&VC>9h295el(CXvt^oyr9
z;&*Vj7*N@P9Tp@QwUO^oB*PpdcvvAkFpoT2$BN2`>bk^^{H7k)_ZiDo>r@nwXRu^A
zw2t&sP$bj_2CqpfGITM%Aq<CBW19sr?`Bn8q%Qt6TjEmmR$<(toAsu7cR-12W5B!?
znd<@tJ(!+rBV;&c&y+?~A3p*;p5+Mr*O#cc0<f2efGy~P+)N=3Y=P!(1*4VFjBGR_
zhDPOvwfN%V!kELAl?vO8Z!V{laEn`JwdKB3R@NYPjCTf_Z^kIv_Ih7luW_Adk<}zS
z1d*v2Rb}h^1ko|jZ?lw*5+hxd!pb?j&p#K+OY0}~tplV%`qqH20O{=?ydF-$9F{`M
zUf}4Xw_({xL-K^@Z{jMb+xWx!^R3d~LZlGynQa6ntAp~JlTlh{%TS)XxE|6jNDcLt
z#RVCBpni6Y;v3`bW$4r6?T%Ls<!x9ld`g_KgJ?=q?G-14yOy?8BHEsQj|#vOQ2s4<
zqFbq*(Htp<d|~M~DG2mHY{Df2*6$xiut0AxqT1le>X=5nOyQ)7Q3axKlpi=cF}YY5
zg5(kj_kl(;;NedQlHykfllI1%A~MK4B*kg%L*Uze2LEQJe-{!oZsnpc9~m3KQA4*7
z3ze9q>B3)iNsf5O*?8r{c67FCu_~f!c7N7ArEbOvto@r?RE4<MzoD3;0Yf9{2-H}n
z|1U=PHJn%{*%XbJG3PD8IxI0l-EJp8V0nNe;lJOLVdWwC7Avha-Qb$DBzsV^^>_ve
zge~rl*5Ld2?D)Gv)Xt9vgbkgi<qGl4{F|1i!no-2a&fz|cSDpY=^Oon`DJ6<Ncm5V
zaurC*zz%o65x=t^#aG5>y@I(aaL>t@In@2DX22+-$5#?-g)8)ou^#7*C2v@^Unn2V
zylRw8lttJ69>%Zs_xqt(Obwo{L`lbBYe)J4ye^;}bkSAwNP)#kv-rJ+MNkln7~X6&
zhs@j)>e|dn3C=_!1K>v*FSc_&h%Q(5z&we=8BDUtiryEbjZ6Zij6L`Q{32O)I64dd
zy)8PNsg*+`=u%hlDXeqQ(NX6Za<zbdUSP73!yVo(pBW!?MKT-n_mM!^2I-RV7pw+4
zKcRikmd_CS@FfRU-5|4k@(r*)l};9boxm_P`_A4!eakn{!Pe=y3jMTX1osDd*3~79
zz4O1?3xpc>&YRI&Pi3Hmb9jrH_X^kj=>-a@7z;wbbHtv*)0}Jg3hp*Gz9IPykR3Do
zuiNmAJCd!-aE)7Fjv#Vri)uRx7#h<uUb`wEx7>-;nIRzMx@5QhqRbxjq6ysY4a`nV
zrg3#`GU#6cdU+(c$&If?LO=C=Zb#NHG2uvuzk`r<=$8~K1<*2xkxC|8$!SVF?I){F
zj`=nC?27F12FeA$*22`3z9TGtL46njoNx8>P|i{vw`IU<gKg0?-5c$qoMpzXH`hdT
z)U$MY{U(0eW(THUP9(2YnOwcL;C7YEjOvL25Ap=&%ub=ig&UQ?X131PK9eCvxNV6>
zm8`1qJHj0;1W<MM@&~G_N9xQ50I^ApDES^AUmlIdorP{ikc(akO%tZUyEJE*mc7Nr
zw@&)xl))fxTrkyxdBPEHRlk|Gc&x^TgJ`0e$;h$Rcw=IM+kMKS&PQiFV2m3xp~jZ$
z6)_0Ggez|mE}gGJ1flCWsK83S+T@TbYDw5D>kK(57)ZXhUD(Mx!vyu+)fwAwEWtd(
z1llenu7%3`?oPPhV26N;XE3i!{_+Z}|BESd5(NR4_-iO_(KqWz(x1u65TRI1{bb-z
zv7$ckw*@Tog)4&)#gRP?C9nKyEnN@0;V&K1@F3)mJ%;H*z_Nq{YINxiK?C1nZu83$
zamBLUmho|!W<l0Bs+JLPWA$h|!~wZ~MbfGHwxdO2<0yvxnfe?@XLQ{9Hl}5P3#^*k
zSa_w(8V^0&pG)Wp$;qN-jLRUGvPCqEo5@$!1}6*JjGIMD+ThH5jq)<3IsJg!Z_xj4
z+kpF9&*NJEZriTu{zH2I4_0@xw=uVHWB#Aw{J$W0%Pms|4cJruPE6B9Zz=@3;#0%0
zHeE#5ky+p6F@uvZ$OyI-Xm8c>1nhYQBh`rxEud}7M7z_s)V*m75X<`Dk8uu0`b
zj-pv06TbQ5<v1gIR~oqh^mE;Q|GaL$dCsbbyXTpV7T~?{R~p~$3HbVl`DWz)(zzU6
z2LP~go{Kd&h4B%_turS3?+jR#U0*j^Q5tHK(oESRC0}UtvMs<b+x=MJ7dovjV*Jms
z4_xpl(<B>zQvxjTP-U?+>rak2veP{5?)`N$V+vEIPS@!T-gM%D4DC~<9t<a5WU`Ao
zU$=&nPA9pcU#}|>%KfrtvNbu$5$v+5+r3}LF=rDWqPYmm<;v#^x0f~ymgh7h#{${U
z6n}*)zQH}10p(evo3W{01!qq9<5CM1h(*wk;80Z&<*Mn^g2YHxkKT^+V~ws%p)^E<
zN||#G1nc!%aUuOc&^wxEcP~^=aH+L(5Z*@`x3)7LB>m$vXs#~gndAJ*ZR(*(Rq7ML
zKYS(c%RhR;k%=edvK34>-dVnCveN%DB=`Ekut%_mGZd{`+{)oj3OeT6=@BsGR+gtq
zrVg*zq@KY;R+*>3N^7Q8NhV65FMW<-H5f&&Zl<-mUL_v^9XLJSnvHgd8g<#Ut|H9a
z?n{r=vQNU=?aIyDsJcj71vGOT22*-uhh3?1G1z)$C)zP}bbsMZgl9&3dr9gU&g^+-
zNob^%aB)5)$N1?0Qjfz{7ezjx%X96=Gv_i@XJ*PvOi+QEkJm@4Vmj(;rK23op#ToX
zLhmR#6RSVKN=J=<q3p`Hc^pgTVU?IH5fDz&n;bYMMhTPL`Tbs%u20MATJk2KJZwT?
z;;zRak;2nrV#t>(vFE40nxc{up07|&$sVzFAz`t?7u~72f*8PAW(#LCX{{MB?o@?=
zQ?}qCDq_d$%#U~y6;its38MFvj$z%@pon6;VpR=9g}BR7b5ca{9;H?v%R524Oh=%h
zN2NoHjTT7rrV9<qIoAJCH`R`KzbNpJ@hyW6yR%?Tb)!?Za^t;a%f*4HzHR@#$!)Bj
z)p5Ax$@<tkQJa<F`bNzzq{?uoovWD4(mie_SaZ^)->vFf&b?N6VlG>)?A}JF&|uLf
zSBM4-TN|#wm^Yry*t`l3pgp0ya^IL<fd!yBU#srG0CHI*#xfF~{m_irn?j*OcL~`H
zJ?-CPn?rZluYnl0YMc&_hf~a0zp?^0!f6{@C}t^#G6<Zno9tyDzj-i!g;)HTE>}$r
z1u)hbb(ErnBQ4Fq<hChnvEB&yd&R(P3OoQ0P-Bf|<8m5ev5c54aj$)1T_70lrF5X$
zdfA4vU{4k}+Od-OZGYGRtg_=iGUBfLeaFNMC!BSwFseB|>LR~rvO5zjQ|8$aV;o8?
zcvJ`}v27>~b9v!O&%ReEhp<;W2%aL+ARf<Vi68x4gWgs?&TZY}Xen_%s~AKiW-Nh{
zAAg#ijvm=FT~np<X<}R+ndmH+c?c(CMkFR<)Hg~-o206jp-~>|lv%8NM!~nL-mEL*
z+HaN(^@&&C@YA4wo0viV!+eP=qmV;cpO@%$#*oiZqo+EV^y(oTvkk*FtgV;Ua%vX+
zY_+wXbqSN8c~72Ah;}f3HreFK+u#E4NWWfdPzlKfUnFy1t~GKEeY>;L?Us9XnR;#w
zauI4mdN|XXD$elBUEWik$>OGP=C#j1<z!89{_1wvZ$0UU3n)@8NEkGrO{<@W4tf;9
zmO5GIS5Az5B1TTJ)qecY>%CT`L-AI&X$^sKQfF%JQ<HR^Y*6t_;h_*pEJy}ob6>^q
z!*UUk6f=iEhJ_Uc5rNdggA~;2h8SUkZAMrm71?^+^OWul4neg3HQp>m3a`J-csXDB
zkMp(Lmj+cCmrRJ;5#z%1&?VMOm3Uw0`=hBi<XBAZs^u`UYi=)X(+<G!9Y%eAEH1z%
zQSk53?wE6T2fsA+Nh&6{$O1ynC`v2g4B@#r^}BCg_ux1@8eO7TZVhU4gEq_C<wc;c
zIpwlZ7ibZnMk1GcfQ0s0zURav)vongzMFtB5dBMNZZxiBoXVKhq@#2$I6pzo8y^Ub
zK-aJSzA-&k!xxvW<=E-oy&MSj4PV{81h?90(H?pfp$L(>1?L1iX#)-*le`o(24Atd
z!3ZN~h`)t>?U(Wr{|Q`l5{u06^&O^w$hd~#kTe&3F(mKY!;x+(de(J6`j7uztnW5O
zOk<Z@ZpJe?h7Pi7K~ar1z^{A~JJyNm*r|&-!Ub9o-`#a$lSCKomak*y|94I<efCv<
zJS#<;AmoS^9eHO1T0a>#Cq&8-BgROJH@HaV_?OFdkmngX*#&eNeb5yJQ$_%WvA5^C
zeg5&kUON;=WEBJsm%r%s9anex7oK2f@Zu{ZcqY?}Ui0O$KE<=*A=Q4#P$x^IREPr;
z)%}}`Y;b-2c*>%|qPs-w2pBj1;lCkyspXSSPq@MAf*dD6V%)Erd7XMaqjjFsWR+hT
z{`RS5XXP#~B*jum@GlwV6ujB(prvxXi`nUK;zCC5Fj_*XCO0MB1bre-^ogMjT|-=g
zYIR$4I)_!fF^%!=c++Hu(5ups>A%Wrc5sJlrE%HcFWazNuiM4OagaN;y4Vr^PV8pf
zR|s|BLHVBp-&=3)FHF8&IXeFVz_I@Uzzw6w0`rGpI@|vM;2g>y7hF#`h_()delpOH
zq*YG2f@g&M{v{Lfrnffs_cl0>uLso_#EefH%nizn;7BRAW~qZ*F%f(l33q0`vJQxT
z90#5JPcGX!*uNxUTji1KxzPs4CkEka@oP66m!N>ZuSh06-7fz<?P8Gm_kHnT{BW10
zE2h$jKmD`BRW6Yd1waPJMgMPrl4powgt4?oU4fEnso%<$UtdOuXFA%)DPbeQD-)c)
z>63!rr=yUPfLyR3VWW(Ln{mm(A4GL5?~by9t3O8&sM&##0*$rxAoTG61d*;YE-)wp
zYY@)cI%uoC7T=sM;uqxYY;XkDB~5KXZ@kZ^CkBncZd<6I;_eu@|NG11l~ZX1UOP93
z7RZE|EW~~GZn&#bR_smx`-bqNluZ}!f}@>h(yf^g2Jmzypn@6I=u~bmpYG5YgY9aj
z{!|@|e~LO63!uLAmCVqSeW6rA2%J;3@Rz@eow`Evy~;_o!Z!Q7x?nuIta*m#z4ubi
zAbF125YAk%<BdJ;zaCs>!f%ik6i8ldW$ieAmbC!K6R^IOj)zw{A>BnI>m|A8hR_G=
z=o{uV=7MPz=n9l)k?te-OgyG(;99k1vZ$3Jk%7<uPXspxZuR1UT802g7-8nY>>w&p
zYo-2izcEd__@I!me&oH5#&Ln1VBLw_Ik8|Jc4IW|rh&;aQlf40At3M2Z}p8M{wo?r
z=iAKKkVDm<7}GX~`TyA8Pm|mweGCQ|1vZpXNl?_^wGnW^Onp<Z0ocxN!P=nQ)V27B
z`GH}O&TctMOwk&6F=Ie~@jcZIV4Ipfbc3haf+NBk977d({_37XU5he7?Q%TF!4T@-
zD^WL7kcsZ}N?d6O6F3m43ote(5ogjD|K1z$bnf2Dz*99CuaHx<NG3?nf65t5)ScFR
z8^jG`Qg!tLiqr;$9Gui9j|8*>v?GWUY#AoBQ1lq_--7W#fp!pn<atCvPXAP|W34Du
zCF*Tlpws{SVQ3jgvZ77y%_)TVU#-1?7APmmqDXCfFEoTH<RIKjdv8005jM<BOL`BR
zPm*QX7(TCB$qbw3KA+i<1Ettd!E_kPPysvLoj_E%7?1Wz!Q!tBDvCZjz@JPN9lQnY
zB6EG<ah1KGyoEo<vsSyxx~F=*lvP&IJ#eMCc?x6<<_H3k51fK*@`y=i3PcuHx*IH*
zVHced83Fg=<EJg&&i`UVSE4JB%*6DOFyvu<&HfQh7ULfcp|vND>(TA7a;q@eLzih}
zT+e_mfjp;gsu2q{i{_T<Wkx~iJvwJN9iRp{b*A>_XfhsGMrU~bUFmkQ<`?CGk8QK(
z#$*O#m0aA>eP*=htx>u4#6=nIPfCjpbw}G%YV6ngJBBIviJ=Nab>*pKMF6`GyDel7
z-W`A4jF*|w4W^U0u2DC}!r&vU^r|r)dTd!ww9WNf5I&9qbk0Qjuz)chOBZ!9EO0%_
zhMXEX!X`R$q%x~`6L}S{xbDQ409U~`l(i6j1*{l!;oa1yC5ubv=f7UL(E43Ov00X&
zx&s^@ykZ1;qF3^DhSZq66s6#4T>{8Zy?1&8Y^#X!h?yaM^Jb2{ks8EYQM%{*fq!)u
zlLCX^Kz=)#wKoO^#9OYztl_^t09kU)qr}tHJjgw{L0b^D4r!Sy!|5d%@}S}wrvh1b
z#20sey)NE+%BYfdG!y=*f;BR-aVHAXWtv@X`P*v9P`=OC<nUW8LObrzmi4-$7~ms`
zZ~8P7l|S|{z-HD(_B}^dBS$?&PP)chle7wo=UHr$<L^0q=g`sD0|X%w0{XlOf*rOe
zF-)12op=;(M9s@tO@QfMNmdBW+I-hmGYQRK1z%{}H1*ZAtH(4IcMcHUW+<o2diZM#
z@T$L$<JtA9#w{7<DtUsM5|OR0L&lPyS7p`__=@i8@Gn35^>{9HjWWT^+=(e>o^2zc
z|M}P<!-$H<C&c6<YAEQc1!h~+1<$jFQu`rP3`Q-rEEby(S=k6B(p_RN)}WJ&0UPB=
z9-p7{wI`4MnzEseNqF{*^23yK`eX&?nZ%8lw%zuoGDW8`2pAvE^-@}9Pg*g+EN|K8
zD1&wQ$a{-Kezor68+h>qU6yZXAOFtJ;bNJp#<Z5oz&y3#Q6_ls19ky8)kD@~;@<Q!
z0Gkjl-cKcO<Qdr^Jh~s!Q5Bl+lOz0f@?|<JdUv^Y+CP0#_CkEXGqFF*kuBV6XRA3c
z-=ntP=L~x+eHqnjq);r-J-V%6=GOloRqq&_Nf&N`#<p$Swr$(C{YDep6HaVv;!LcG
zZ5tC~g8R;QPThNMRj;b<UHz|n?Owgsv!8AD^j74pOKGa5_^kVUy+5U56sDxIzfp)m
zfFhSG_n73nQ5c?N9XJ_W2G9<pI>NmQr@};UhEzJwnSc&URsKGc76X}b)@owgWwqM9
zl@U`IF{;f|!XrL(`fc%Kaiz_EgZRijF+duS#>w@I{4w5RyoOby+E#}*(R9r-<f>$h
zQ}#|Gd;*>_y7a;2jq|!9m2B&y2yDgBy6T%)2{7|iDR*RUDB18V1xQLF)%K~3v;F*{
zbk>MSiaDP>$N06laM3b}ZHHpQ^mPfJP8zd{RBCsVZcwb+lIisv^TvuQtH1C3puE7M
zH04;T;<#MF1r2qDs`}6a9@cb}S^y2@s3M<Hy{(iby&%@&5!Zu}%*bF*?S^jlxTi$X
zDUQbDwtn@!86~Cp3xJ0No*@0%AT06Fsmu&s4{-gh`<0C<CoTSECQ<j95mrF%hZQZs
z+z}L&SFW3pX?=I>>#V1F-~3Btfr?@0_q+^-C2QTb<gf!ta<0_4iID)6nNf)^XFro0
zbTi9uf3DwQH-2avy&MjR`eVII(p&na)7w^!$HfZoXwv}yS?nn?Vl7Iu66fCw0cKa3
zmZ+Rwx;I~7|C7f_Crr8J0ozd=sR8piLV&IF0T=q`C)fvaXrDhLQA)*+?E$a-KzN4@
z$)>;HII?X^=U}A<jBh*qB{H&YM_UFtT#YCsojy|t1-LVD5pTEU8yt;w<f<dST!e+*
zPh&AnB*+I2f`oyu-2~0anpy(zzA4r~ag3c_T(JH1r;j)`l?|5hG%uYxfNxzx6X44B
zJ?$5jobM|Wa}h#*ty}UP9MiBL8e9-~aoIS;MjOAZZ*IH?z_sA>)eaMS&X&laR8?y!
zEQ)3iQ^m+>zlft4^BK1mMHP0iVq2f02YNa*mLhGm>4l+4Cf{;}Si)5`0PXy?ex)EB
zGm@k}IsF!6h%NgRy7PPIRNd0J1)%)zdhT87M!L)qs!y|1IXTK_-n($}bJ4~G+<A_u
zKlkeU@&eat?qf@S7A#esETuN5>)%V2!D-*_ZH;AxsP_>vMCYEk*5aq8a9TG4J@i-_
z^MnKE=Ew%c0PWN(vX#a)tgH7D6w61_33d5=1!=MQG8z2w^7l7$-SJe<3PAM9-c18j
zH3z7U(fyDcu7Zm4x?@<EBZZ}umEVvof3sPu6-vUAA=>z_luH4<kWC8o7!1gEn`9XH
zh;ZIe5EQA$imFzJ)|k|iv#7o;)pl(*{o0}*eq%L?$eix^t$7PPlaf0id&FZ?2JQ77
z^&Lz~RU5uU+v7o*E)cs>bO7DMXfdLhP(`E(0*#^!Z#6CKrs7VTMd8z5sh-QW7?yj~
zOfS3wvOhWaRk=Rh3T<f?FRj#C5sXJ07EEw(BKK0p)CPP6LQ&lV!%RlO1#W0)iWbfB
z`qTP%+U~BvabWBVtJesUp3akwugQP=qRl7WutevB+JeuwB^_UpD*`@#2ol*6J@xz>
z@=6|gN<J1uv`vVg4Q?@)T-W<!h7K(c`h(3?!{3@Yw+ZoG=w3cJ9}d$)r*$jVIsTm$
z`IDxWA(BL1t}DVyxgM|M4(3!{sh6+0PC|h2!&B}S!N2_e_C4(fYhLqkRx&Og^Hy{7
zFq*m?%bWg4WK@0$!1tv&I%v<2d0*i`<j}IWY8%qGN_<5&c}JpX%p9|aOh2;Ujx{+k
z`l=I2)hn=QjBS(;kG18(#x1jvu(cRP$~5P^0iHK&*RgeQtUdZ_#cwnV88j(>Ev^J|
zlUMKvv)=M?*QPJ!qEf}ckm?O2HfX&0k3b(QU6CFw$tOWC;LBe01cvw8K_M%Ut4u!e
z*91e}?NsX)ZZqz8=8rzgr?$>Yd^Z=tNodqaPGnb@mv$4j#NY=_oxcqmk7HK-ays})
z<S_G|cIW9eUXRzoL4+z))$i!xAr30xELfV<DVcD~ggtOS>?ek$VSFp3N%p_R-d_(}
z|ClDPt7bYr28i9OF1*!X^kMP!^;sMhj_d>{gOYzSNkLh3Bk@7!u&&1Wo0U9?FE$>0
z_pJAv0Bk%5{a{XL4|FYgKR<98i8e=<YN!o3KJgrB6mK{=TS4ua<{8QIu{!>ROPqTf
zvcF;@G@kIPffq<{j2?FNP#LBmwa5Fr{KlmH2^Q+!7w~YbZYg<Bft2Z5q*a7*F-axb
zX~1-=8y{Y4HGxO-(!(Uq<?`3X$$0UQ`-px0usxmKdHax@WwJNY@e%CZS5XMmU8&Je
zl`ZT2mpPHZar-$dce?(1K%1q$^H#tc#A_Pc8zF(!{wvN3IP$W)OBOV?+wK!T-_$Qh
zuR{A+EWn+|%m{b7LoB^3;kRY7nI@VFH`BtlqS`aKG%Xj?b3aeY1X?LpWs5Ssl{JpG
zK6d|hol5sE{d$Y&?$B!{9eVw`S-R-hFZ`~O_$r2(h9j`C7&w+)q77OLZ`c|XF0OR8
z<L~?V;OPn>XrkmHokJDvRkB)?SIjqFyt7B0Q~>S{Il%bfTYmBCoV`=_4Vg&xED!Ib
zSam18i6tFs^+0_Y(K)TAW|>4~U)98?*XW?8=JV-$R8`ga6TPM;nm*LtPPm`CTs!h(
zTS*=E`L$_9YGyFs6OGx}ic03UBD+qd-}zTOtKOW;l_ztc7o%~Wo0nnhgM3={Za5d)
zrhu9S>|cD6`S$*t%hAZ!`CecfZyGb8{`HO5YqLK6Z>;7$<9Ao)J&G@nW_g4oowrWC
z%*&1iC_A=K`yD!^7koO_Lf=F*%(ti)2^&5$8iRa@gFQSv*`WGaXPg=5@!ZxnQ$S-u
zF9*FKR9KpKujDZXV5{Ac2~p4xu)Z&mPym=jmctnn*HQbGn+!rr%BDq3>iIsQ$iAP^
zOg7srR;FnWFj{G2A{0v3Z9I<#owu#VzR%kwO9-%oPcTk?2Rn~}CfSR9mxD<5{M9`F
zM&Wi*ytxZ^>HL$XxM~Y|ig=rvhIrk&2uB6)(Wo?FLQX<J6V&wM!Cuq}eZ3b-NFP8&
z)UF-Tf~h7Yy^dE|eg@-dsDkdMBaK5IcMh?wKbs82Q?ISxYmxb?i4`jS<8CEv<iSQk
zAH$rZj4{?Xdn6c%<?`$S!94&_KZ0aHY;p09;AXJnS<td@N$FW;&AyzYS<KMcDE2x?
zKE5TfoI$kogI}zYCi>wWgSb|t%@yD}`}n2~<-kVNsX=V-3m&KJ%`5ycdAH8H{nf~;
zW>0gMGrup1N_%3a0vHFR>kBm3;bqGAKoS|$5oK2UBF_CZ*uZ}yzHvj{_v5NHWRm;U
z=jI*L(!WQxD(dIl=ScqWB*63tU(Dv@EK<z}+BZBo3Y2^*31TbnANAHKngGB1(-~_P
zm}^TWg@{+7C&ALL+6dMi@`M2Oa*jCDxE!<e@~W)yl{P-_dVzCop|nEv8hj$D9l5Kg
za=n}6;=nTv`}yg2;}4ifzN{OZC(Yj9C%v)Q4nq0lodgT&JNPphb?iob;q$|}zp)Od
zG<qRT@DHic3tHdf)nW)N_^FPMq?KQDUdhapqKtiRH(KX0v#Mi;#J1@69cqp9S`;ZR
zW^ai9DHo~CsYevJ)b*be&BDGyV9l1^P@v6${u<D5mA~@ghQUEVc#%^Jo5@l?LXiLs
z-XtydlFP)5_?|H`36Y?-=!@6jENGavr7{U6g#O|oy~|HLfhj3z7TgJ4>#@vUbNI<c
zV~HmLJG^8w)hibf!I)T0Rv!B=*x=A6+08{|$mk~@X2(=WNpN#us8Y8>YbIyiz3;E(
z+5rr*u3=mbmBAxF+~TN|N->E2^QQpxn<T!!c*@@N$0fqjf-sSdNN0;joT9K1pl0n=
z6!V;Vc&r5JC;3LzGnnxeG~vluMCCS^tFtGG;)%mXAT#hr>VoW2)?Kb@{Vdv_`}q5(
z(dh;E`7_ra1C-!IREwdp5)mxo=n&0?WqVO@a)=B+=w5PnV7vCO&Xs~YSzrKLa-$bQ
z9t)TBhHDA!?eB#*MYo+8@wS}kCW5XlCiwRcQ#eNSN$p{;P8{`4$Wy<oFTk67g1~R8
z72~ymN#Yi)pEp)38P1D-y`Mj2pXn<!EX~3v&a?h=8@pbC;$(Hc7x>a|d*Suz>8u2X
zRwdm=)|}0YWuzk3WV<>phz$U?6VWn4;T7eW1ZUax-;RWHDN6|QPD%+wtCDG9kYNxe
zJIHO4$_9{SoIG&6`G#T!X)LsbeI(ciC*%&xWc^})_@4X$XLQ^r39mVbYnTbwVVS~s
z7$d3bcXexUveII2y&Eb(*akSzhL2W4!WRlc8)AtnfHJtG7UDoxAOT34UPhWW5i2o1
z%vHIHU&J^Z`4+~EsL>Y%6q!{oer9C+KyS-E$eja2=^i+Q@u@zB{S#Y-HCh}=T{N?*
zR&j3ga;ovtDG(KO{T}_v2w<SLo+Jh~EPDn2F#SUw5=1fk#T&9O6jCO$N4;jw7<5)>
zM02vnyJ*Ba?KYPACIztSegQ64L@62U?~x<l`8`16A^pkX1?@^;6;dF%GPpPT8LDlg
zt+}<U`_X{$;$?g7GFl1N<FfneQXa*%`<~rzl!|Lk1>dJN{3&WwC08jyKS?Z4@qpCB
zw$nkDTQud1i~AHeC2MH%$7^l#<`!(`S1gkg$AhR{@BQ<avMAtUkOXEIrgOo9&n1ik
zZu7worQ-}m$uy~?*p_D<s+J>P-Eph2@$GeCr!V01VFuT1i7POqSUDWOvXe+BMvot8
zW8+xJo|gr2xEG(<oErT$xt!i-iD}~Km>9T7EK|8Fc4ca)?GB=U%l@61uj#0m2T!G2
z$>g@|aa1t6M$V6wyVM5u0B!iYP3-tEI+8Vfj>aZY*<OD~&p!T-`$3;mv`^T})dZBR
z`Ao+YlB;2n7i&|FS-_3__GD^EGZq|<G54K41PBN>>OV>Y*#G6FQ#v<J8(b(~LMAsN
zHU*f`Z_LF?sB5=eB#NPYfgn;v=7nh$$u0KIG_xz~MCHnCM3lNp*=5^i5U*yRb6dC1
zUHo@4eZF0lIC|=&@<xe_0sDGU%e>4JZ%z(@KfkX<e=8yL6ve4OTU}SP?GgNVE$99%
z)3hJz@G1u&bu7*xXKTxl<0J<*`byD~$o~_u{I(Gq8$%<A;jKhEM(x;TmT=OUte)J|
z#uw-sC1lJnk*nm}TV+$LM;^*n#1YiNfWM}1kJPvfL)VI197vq9&XrwT^6H$1fyEYT
zR$JcSS=DjkuJt2Z!_Q2~T}7Hi=G!Y>4!$Vu!}%kizUVvm$^eaFT|FcnJj*<-zS46c
zI{ToXBi}UmV^^=dRL-}GA4}UJIgO=ZZQqQck58ovyYr?Z*8+SWqa1n#021qG*e*|4
zkXnpCozHv(^K#T!WBw#)CTeqG3w+V2{^lh9u`|CR+mc)8n2~pZnRR}+o>;~n?7N5_
zotWAHD2+1bj{i99Rd2xGXK~|iGeYxgBo;p0Tz8VPOT9INKX30`GLX$<?oa|QFQ-Yh
zt80tP{><J(QWM3#X>E9ScQ{M;OT32o(O16OpE!rE7md2=-O?sJ_1Igz7_O>zGf=8T
zT{9(2HjK(B_iY=svO;7;(=XF5*2M5TXPm4a0DDCOCAitSLvj(PNl<q1?-|SAE|xI<
zdff4|6!yG_F@C9`U*j1@Far;{{m`Z<P|05!yRN65JiI)Ac4E1zqJFaYm)F1tCWx%q
z)G|Gq+O|BU{DR+nYCf!Cu%a}#0x3t&V+mOfKWo|P2Q3cMS+2^99wd$S8o07)IC!An
z0?cqOJVKR-;`r}|h3yVBF+*arc)5IlI>mthSrc_}y$f{#HHYTL*)<a$p&CXSfKOrm
zhAradGbq>{of*7hL~I1Iwigf8ou8Qn{`j~DnMAnlgX~Q30`1(6Jq%C6-#s3T-A~lJ
zV}!;WeEFx$-=~-*DDN+#h>+14Xb;wEI^e-tD~QIq?9Zfs`q!7n^N;##^`g_Bb0@ho
z_iFz_2J}cu)7ajL_Jj|o@7&cwy0O3eUpxzM4*K`&X(kMqV+H>*z&8lqg6}A~60AGn
zyrCm2hu-siZtmQ`D{7`==B0bni`HvRNPH!^NF?0w@!)mWff9U;U_P;acSXA0P6Wug
z#8oh)*$m!|#=!K55q~E(=>L?rNVrI@cpfvj5sG2bUb+JB3m^Rx8?<4!kk~o0>6w_@
z!gD9UEJyC2tzftz4>mJBz1cs#Z-eJ6o1ygKk=cTlwSu=**zxu_37msI?q*QkmN-5(
zv<d{V)jS1T&`PONENu@JQ>La}SOCpows;%X%``36g~Sl447@onCk1=)abc=V6}~=S
zMi0tf1OfLp^%Djz<JjjECz{W1-guwUF%g-^$RL+q%3kh9<w3!ZlEC&2dpI_M_Wfd;
zCnCn}3f<(tm(~wuERhzYzuQf`)yXG|LR35hd~!eGK|sE~z(M}EduOVUA2i&J7t)Cz
zP-SGo|2HN5SI1nVwc)hEgA!=`<Llp)WXNc&QXX~a(<rhI;rRv{Cn0G^n<777wN)en
zuzz-cqa;*I*(%ZP7kwZz;t<Gnx00E@;O>ej%8=1?Z}SLc-w_Qz>aJM(n16NY^0G!!
z!)pdppB|fELqF=~1w4-0H>$M!9Psa5f?>%5=<SxO+cIM1m*2&3=;XPI;=I#$I5b2Q
zc0a>K3xj)!_sl+{*Z<;le|x-3d+cVo$gT0|b%a?UjH1A>|6{D#ssf(P1F0xe1Ok*A
zrzzX;CX+Wpd}DaySP>!aNZt2{j3|SM3Y*K>Xb|T=;@A;omPMlVWuwj-pO|U6_NCSV
zo;U0G(dPrZ4;O^>UvoKYr1p-Z{w?o8r~AWoY4NtA2rc@2D9t^uepU&W91~sR?>sr$
zE6S_KOGoI-+YKSllVuT_C!04O10hP((k>>PFV#bsGs3h(Jbzdp`2C<`6Pj%$m+0#e
zHJn#ITJnn7dvyFV@64RJI>MHf_920~wfH%0HH3xuA_4+H%N4Dd_16?Udh0X}Z3-#$
z^geF+mAZtNOtN-PzlI}uj1PPige0DC-Ohp(mfoph=!Tfon-1>cVYiT5=H*LFDl93G
zs}MiNUCmW57=3TjqBeGG&H_U6TjAz>m<O}QYDq1oj-bj<_1oKwj1l&>&7@qwAZ}?1
z<pYFI^#l8^bG2T60+Aqk;O}-dZR)|%nQ!%z)N`ZLS3YA`n9GNvX8NK=e#RJhtAQc>
z#0&FhsXbl4rN*Do>H-G$?NdsEvXsPuDL6QH?EY9D^g{H7GL<S8lx_fTUJ!nMB3Q+F
zC+Wpo5dHY$vH)_qB;_sH4txs0T%teZ)|?I3tm$|&cfh#V?EbV=mA3VA;BE#>e$_N;
zD(ZRmXADIc8NMDiM9&77ccRDlxI*|eC6JF2j;A|ZUfVDVgh7k^pVey<-h;XXooC;O
zyUsXtDfbxi>Wf}UZPQJs`@7WLA4i`7U+=dceL#M4iC;l552VirB0B;s5hdn5^265{
zJQqlex^C@+JeilrhYXdAc&6vqB(V&G*g17jnB8O@44=JOHNr*T^%YkaI}T$q-xy95
zjSc)l0aR*#)|WFwcC^D8xtM1dawTNh;>!ZF`r8Xzx3l}55db!o0kOlL_NgU}HwISa
z5E%V|2?UA=&xJpPzF_~AS;6X-iySfmRf0~cY9bAwC3DU#hX*6##@lKq#Izn(CKyCH
zPpRP`FHbgAAx|h6q3O2<iB^hX7k}QF{gseXfdwV#h|>IG<rcm?)_l49G{8f|dD}_O
zt`-&>vO0q6=O2ey4cy6CJJuFq0fpSvw(ud%1lsDc*=;n_zMl7wcJ4=!*>@=CH95T)
z)o4%v(YieCMF>!9%E~b?=_t}YI!yaSm%dG6J5AqG4>MK53nF(=GSPJz&Lry29N&|X
zn`hL@sf|Mozz(aeF=t3J^63R4)E-OF#OVgwq--uv=<CYJIZP@X*mHth${+c1m3C<E
zsUZrS;x4CQ@v<gLeDuT4_1Jw-*OxyEa?=BxR-QNM+!SrJ*PZ_U$g}#?vR`l|W;FIN
z6j+_|t<=Yav1e~dH`4#OIA?82)>=le`mky1_P26s>khJN+i={ycC6~Li}OE6y4<t{
z1NAcUe2RICuq6Jb$$FIN)#vKWU3{zC{@4`nN7Yf-s-AOFwy<&Uz#;+zK4ZTL0U`z<
zevJ%~6`56-MG_ZINO~T+iZDbzq_TX~s?V>~>WHm&7}rRd#-qY?qiavg`pgd1Q!b31
zSarUb%bnV(7QOYgA@8woR^@L$yJ5BaFz?>CLmhqtNtt+M(S&YDBbRe&MOQXTut^SV
zp=g#dlDQznN^zMisphMTsW_XzG))4OOx&=JuIC22nX5X!U%zvb`sjM}p$B$mbNUas
zpE7AROL@0OYu!_P1RjdcBpS~W|6qwrYJA7}=%e?GevRjqluWNSGwY0uX3^iO+P4Mq
z=Ph&$%tbGQ_qRilQrY;c_aNG7YZUwB?|~kxT`bz2xZI6J_+YauEmBaU-TW7DunMN{
z^>Z}Av__F9zBYsr|Iq=)++9W4+}>m|;b~QM_bV<f(gD$7&2?sZw_yG*7<do=ntAc8
z#`wZ2-U~fl{wdsSJ+q&v_QQw{+C~0;90Ts}bkt)?ce~rkQpf$aA&+g$eyZS|+F+{{
z?8&rO^8o>EV}|uu0E(PomK0#NLk@g5?!Dg|4dq(`cEl%4sHzL0q;cm>fk6jdY>Gn6
za7=1fI!7pRF_v543}l721g=Yv;($^rtT`XylP2tR;e^4_&*Y;U%wxMqrSHVuIYz}(
zzT}}^^;RFUc3X85YsEjqt$j5Y3<?Vn74O9ao#(+INF`w1Ms8Q#O$Y!Y)IE1#)bX=8
zzjzc!f|}&@3I*%aJV%eu+v~qRZ?7%@)?=Cr9@rp{bFSnT8_inXDhVaFp}RCnc^DAr
z)Hu}1?wq29?64msIHk8sV||}R%xEMy7P(om<Ho+;rJ%o3<rQ@;a$&hLv2p1)Hj?$5
zmBUwRSg<K}TeY(}%j8({X<$=UmAjQJE%jIM>|6h+U-jA10wOQET4ht_F*cKNtCkS!
zekTXTJoR*;8h!rPd=(LN>4X~@8U)0O1=ycE)dhf&8Tf{R0=U#ObUNTd31Xh*hdDRj
zYyG6ezrbj0cEXlv7fmG!&alx(RIJ3fqSD7$ZGOPIa($v8M3U84+IN_@04=}3jwK4h
zLpsU9{amf@>(XPfmXuW23fO(C7g#A2sE<jRfx2eKK2SR5i>K%&soD3~HCh<?xIVq8
zH(L3Kz!XlA0;uf;BW*v7_vgv$K6h<V6)Zs78fkJK&2rZch046pS#60W?T9{ydV<f$
zcfzZ=t#mUJ-34*xW$+4C-W`RyRi?vCL;)-6eRa9D^gmD>TYn%{{AzDk;0t{{I;l~a
z8CL*&DwHef8iZsD%s@k&6<hI{4d|kBxlpt3ToLtL1MnJbD>HN6#8j&E0v>xRW-i!6
z$h-CPU}s_J3?ow^U$klak-!J*1HQ#y$FEqo$-Zf;H>xPRB!Xjs#jcz;S6e*D@pWJw
z<J$^-ogq000o<WG?V(TGpfsE3N4D~nOv>F=<)rf1rQG=HbwC?KnZ#;Zdn%Qy-<w>0
zAl=pL0JG|b<PYyl&H4Oyolvc6m7F|V(ed%7-hPzu>?Szn_*9L=LmQvBJ*$c6kSgfU
zgCq&5F7|IF#<5aC%GKYB@xZlb4a_K#mTbv67sY0&9A&pMwFj$yYK_4P{E|;{`}5h2
zc4HXXbM(9a>QxoOT>c$x*cQZCqRYBziF~cX3?PHP$9szVvjf8hJ`zOOXu?hpFZLm+
zDwgxkz=%EOzHSLR1`=l|YQkrNzb{vOg!wQ^bzsGnG*PN%ikVQCw}qzE9F5pF_zi?^
zNdzrYJUtEDS|X{y*@lZrY)7ZyN6vL(xeK9kOpO;0<P56PglOIPRk=iZ0i>%NKnLhu
z03uD>zDyraWvVcj=mW67DaSD!2_e*2(KmTKY!aa9`VS?YaMb(kv2+$)f}XRE5fy%#
z8O21}kK*0KTpifa9=`uNFCML8F88(rT<0b@C>5Y|^ho>=&(B`?7QWUKX+Yv&=Z0_l
zUf+=y-hem6ov}y~F9uPuxLrfbZ1b!C46tf3@)Yw*=LM4^s=>n1_CsB=)2+O|EH+J}
zhGB%#+}k1iFnybNJVl-n_y^ytAI+%)*f>2~Pj0Y9n7hgd!P!#RGC}!BrVwYwDHA>M
z!FkP7#rc+(Uob+*oG_@mn^?qc`&Mpm4o0S!0nf)|EiVetv!f$Q!I0&C=nQI-1w>V@
z;|qDq4RKM9XqLv9@YAxy7i43on-JK(JIX@DnV^Z^xDln|Bx76sEW4VB8+$Ik{L01k
zYHtN}+;+2mU(FJL)~~pC>d4xRQlNOMOook=RSDE~G20ccv!m|yc6Rx^nSAPvIPP@#
zD~kdl%Y2q=L2X1mR`i2i;HhYu1)!*uLW2LKG_c+2z)g3@BQYvo{WyvY0NmdW3eTw2
z-kQ!_qX_l)+VaY)4oE}{8llBCnOdJpA1Hau%kz&OP+Z3yu*66WUix74zN#Bizo-gh
zV>sh&V?8AdBsy)WF-#a}C~~IP-`Sbg@ppPY@?&~x?#D;oe78q0p^5I{0LUz8NV8;H
z71-2F@Vh4`v)__e=8aE9elvXA-KMv}%$&`h0;7aaV^CqUNfszK02|<_p=e0qq-sf-
zl_R5%uAyj4S(QVh4?$zMrtE_-u86!)pnnR(TFqc$RiRKC_iRmbh8LHRFJdNor-^T3
ziS|q43#3GKhIc<|fXA=q05t2k=Wj+o>gzNsZ$%^O=`;u23`H>3&{ps_s>0_4fZ?|{
zoKZK(!pBlC`Zw6ZZ~TSevUd-W^+WcF<>_+Uq%txxwX|)hbt3h0G9jc<!nSbx+{Vx`
z)MzYHbDN0VN2}ji#3bW3F0@B_EIe#(rkp7~m;U|UJ^lEWQE*fy0eI?GjCKHzI0tjV
z@2o4i*dD(#9Dc;@)U0Vu&eW*DUyH<YD(;LQpfyFz8?X~}=`(y^H=Sqrb|?i+c?{Li
zF_+9O^_(XewDFgd$-liq2gMZ*1KMv=$F1UccQFC;5?Q@zPNlE)vULnkyMe{2+nqsf
zpk;+>axNE!Ix;cjFW`32SZ{rwqyP7xlijqN{eQ>K3~;>-qO&Yk@hx+3`^%c`Ux&(<
zxE9(v1joGyE}r0T6RyTo&OG;mCyL3IKCyb;pwd1(uV}va#$w+!EO58?m+y{;Y293R
z9hH66f!8Tm&W*%9X@TRJ<2huLe=^#9E=t$mz@7aka>xL{YeygWViDi#{*CRsvN5*q
zN4;)KUANWDOLpfNhL7^%-=y8w+HOk2Z!$>Exwc-vFP)6Tok%uLo@WLg<nM2{q%FE@
zgkP*rbQk&db_)otds#F`d&9!K2<zS%#%AAuXSrrBj6ys%&#u2dkNsU{69Rd&{AuQ&
z9fMPOumv&BuJWw>-J|>RH;j)S620DLa;3k({%a)`8{~d}2~^!2xc}MhJUIw};2Zu0
zZ<A9_P*{{*U2E!9*K+nX3Uz;C5m4STcr?r=^Y&);zj{{(m#G?PyL~)eLr<5A?&h#b
zLV3h_#pp*{e=U>o;RmYY=6Ye72o^{<Ol_rz{~nKykHIMxqh*!xSf@Sw#;lxr&J_N7
zsJqnk^$XK>U_u!Gp^5zb`#XvyV5Y6wLJH2nupvd|)p5cn<XgaDqSwT2ss(ljA|dn|
z<aEW_)QRD7^BITySQ6!ZsEVpM&v;|fopy6gjh^1xH!U|c*|_{3`V^gXHT~Nms>yNm
zF8`N7jt7(8r1}tF+DGLPUpp0pF>G+Hv@C1P;x~CsB_7f$Y3#l`H-l|=fZ(2&>r6a@
zX0TO}EjMwt>02_B$LMy>=c<>hGw;ziyHd_L%bheO{Yqo~9&&k8ipNQdd#>+Wgg7}<
zb;xmT8<3Z^lIdElE;CCZwS1`xheIItwR*nQcM+5YbYX0jiE4dX&VohDx~)ewoIPTk
z2j_{M&8WveD8;zp22`p~0c6A?fEB9#+mOUuDcxPM9qH@d3C_;TXv?Yj^z2U({VTN1
zNqSbS>fF;vwGSKCpDhvzno}rtfdrFTv_mA+S#^qxOwhg=0c;TLGTE>P;B^pt5nAd}
zSrT-l;aHO7u!=xs-FK60Zz|7dOJ9ifw!!4K!lc&rbH-K7ib1&)0P%-lTMditNNi%H
zoCom-O7J=*e!Xk~q$tj!5_};(D5E=e0d5u*I~Dt+c)Y__`QKQAF<YsS*LbAhxH2}}
ziaMw%XUma=W;HfVr;5)M{VnyblR`~*McZH(@P~Jhs>ZyEY4<8IP7QDI_~3HNQ!&WB
z|1O=yA)P=xbO0U$=gD0<!*&gySX|+S8GAZ4W}G@Lzr=9J92?9VW0p+97#(&y@&vtL
zs&x>#Hcc>_yC#uLFspw_>FXRqQ+Fsdo_}g7oW_-5C4(<lUJN0W59m<;w4)v*Odam#
zibcf1RJ+KNC~F?MjaFE*jkvc>9XZEvx4;YQJ>dVfy$5Jc9WlW0>m>+WU%Mw4Z0##)
zJEqSdNDZG`92*ZkB>c1fS>La0g?ppqtduqVfSTjZdJ*?*6gyJ}eRfg@_KsZ*!g%Xj
z1%fF9m}bq?uZk}19P(3SAVm?$i#4u;6~nHu@vny6whL{~-US$-|8QP{c!LqDrCL?A
z<WnZfn*bP9z_+Qb=^Osh!hV10B(X8KiGo~%CGkk;BmsU|8Z?c<-pbB~Mdqu$Hm?>0
zmh|4^RWAE^f|oTfLG5?_>M%q4+X;5Op-~XJFGyZCx35aoG?Otc(gde@J96Oj=(~I%
z*Xqo{TW+H;cHf0y^$X%~)K!08rSB3LeTNIG&;fw&boBeiTTk03SG11t%&3`dPRB+H
zcAa+nRvA_aq@RYBy-pK<lXlb#CJCnzTV^5P!$J_W8P-m)9tOoD!x@>IjbF}bQ_EJT
zHC*WdI)cLg9Kjphrz)p)e@9=kO^u>6T=#rg$$cI%7nBh#gzn@c#Nz%=JklM!*SZr#
zoBcm_yG8)5=Y#$W%G^`==bibdP9mQfrps7{y&5xh*W`wxj@NeiCkdZ%w!l~9+~dxm
zkfreh=w#xK5PVLjl&03&3@zTEnukosY2n>Fr$TPo!kLbcAWf@>#x?RBQn}}+KEllZ
zS=<HZbR;ND05k$SVg54$x&CDnKqF9TlLaNPei6_xf0A@z#cU=@F{zdQvl5=z1dSPg
zZ?*2q&`Y26R{!ec4<#W#tI7{FWZ!5)f2)A$@2*sTBR3+75R<Ep9nK~F3nO9ZeKnhN
zD!n~<pQVX_E+8Dx1zg-fJt8Cu_-tXOiqWF>8C}5>MJS{4IS16XV5O3%906UxE1(N_
z$@tF&)b>T!K-&?$jDT{*u@VilKlc2*BmWHOsF7zoIO-%-Xcwg@)Bz7_t@`FRHzZvI
zD~l1{xcwzI&kyS0Sxd`n?QueCPsiPW;|ty2YcLDPy4gQa+9<X>bxGq&3(Lyh_DVM_
zW*~Fn($<j#sHjs2o}Pp6jGvzBZHmu>sNZzn#87UNU&}&m5!tAOrrr2%ZL&P3RorCA
zP@p}z^vd6MbpAqk(NBod&9m5G_p7HAou983X%3w}Qac*mXq)ao4^ZDv8H>@HJ!VOM
zkH5=nK`H;2huN9qbafu@A)ZuM6t%|Lw{2B3<8Xkl4i4HMHHY(TmM=R{&#lu?w5fcE
zIcUh*NqwLLs98HltCcNd@vEOPI7y1ok+X>uwwkG7XvbL6NfQIBMI46*_KVqKKRnk|
zaTaozQrA@vH1sF3B0XXnV4$_%<Lp;R(sRXcA7(t}Js}*dz9t6bg8-br(!O}Fk||W5
zJa_=4cNZ3!7#_O$gS|i7TMcmk?7I6mE5`9xJ%3_FO)Lsi6w4W0P>_!k3Qyke?oM`l
zmRc!7O-+kUij#?)87`rdaralG>>qS&E-?%?hNvEGPIbm-DNlv)9@sD+bsSGz4+br&
zSfj37)9-6GSZ}v9@gMIr3LgPkzngs05&_oZFO@bxvkqiS+Wzr&?>TccQdn@YiqHTM
zlK2rlzlBf>wCe5xa)*a$p6_Y${+>AqJR}~k^%o$ga4_D@sU{NuyKiHbrmCTBy!C3<
z*-D+|w5=_c9>y>~8ja`FeL;oppwjv{>bqcgDY}PMzLrrO&=5(5xn@%JA4|n9lTrkk
zz6{D7(PEw8>r=#ktYdk}_6v(?lh05^bp1Wyd4c~=G)(fM(_8_!@3DdYk2Ku>kw#PA
zX@do23;UE1!n5|&evQoxI)e}!a(<{H6&TiA!WYV9lo)nN1{+ihDk;sAQu5iRfa&^>
zs}+CW>wcCRTxd2F&t?3V(;r4qhg_KGH8hn~C;Ph#K10=!V-``8#qB3|jm`_=1KwRG
zB2&_^ZDzP8LI^aF<^W@c^uv+8%(0wKNdl$>>MvTXb0m=UW=EM|AHkc5x5XqfA5cq+
zSNp^M`?{J-kok>f3no?T6kzBjKY}{yI$G9V{F_`uC`(x9zA{Rl2!NKL#Erq|&uB&*
zF>i#iSV~&L7(AB45pjIVJ}jII!mS{zfN|`B7b7R^fl7J84Z!fvpyxt&_IeL{uFu#%
z>03p6b!hQMAz;?rZlaYvL2v63-V?NX=C>A%%i7K_SQq!)-GTnWUzaNJRgBl1xnl$0
zNx$Vmib5K(tC{VELkG8cj5F)FQWH*qWLp(U`u(xlyGXC+KbMD$e@2&(C~?~pq2YFb
zZ=JTf*RUQfUI!f2Y+i^vn{rBVP^d93<(u3OJQjmNa>f|jW<#rPCw9x`hi`KznK8rv
z6=Xhr+_%M0O1%&I+I|v;RRM<9zCG(O0j;Wa)0DxeHsCL&ERRkZD&ECq5afP84C2*=
zk=|LBIOxT>FigUWkHBCbt|Vi$P@F{cBziRuzOsk3QUSc?aA`huavlhpbd-9!z0OI&
z=rwFyPcRb%naM+JO%$Hdd6SCne?v20aJu^$!WW&@%zcF279wmhByBVNntw6;DH28V
zeWj4;?k<>~w@I>Y=Hch4T^ys;c-}S&40!oCyWN{X^;=@_Vi<_)Osd-8#a9JS5}28{
z;T!7_Pyieou}!{hTjW1=`!Xw~*a~GS9Nha08Vy?Lz&JoXX3i(Pu=sKw=5(ScG|A;*
z)SNlN7c>*=xM=0TlOuGZ_DM=iE+h_8Oo!H_!rC=gS8BocExwpi?$hsU>n~8a;6w><
zQXz5ct&l4-;dS+pjsk$&fc>952_#k64+;^z#)oGH_#LT300ALO74gQ%OnOCu2e7jK
z<3@}2MglPG4rHDMkSto)USs3mLS^(Y!8tU=uAlLf&)Jiujit1hm%}9O*a=e`1Iba_
zY((AK$Yhd*!6}f-YaLNy)U&;e;25Qm#!XKB^Nvs=J4L(27l-!-i=Ep$Y5r#=%K;)T
zFMm<)$_h|b%OPM^JKZMKa{|EPq!4e%bZr?U^2;YMD;qoaLplC9ja$ZZ3QR!Pr@WAx
ztW3cN3P0K@;Fsqx1IxNI<@Hi%nO8V%J>bb;2l3vu>FG2Qsgy*~Q)q%4n+rNiyupef
z%^pJSq(&55SNX=)ClR}0bM1fp$Zdw>{237<+@F?_MQWcI%-TYt?+EyvrXwS!P`~z0
zH*Bye&;R%8*oaIIw*_hVwi7$=d!PeX+X0~h;Izx<r{TnUOFz<j0s)Gv^`{^^l+uqT
zLWbw!qx0)1$Bs^3v#UpVpB)CfBD*|)BBj+xcM-WiI7T-=(b~J;f2a(SvJ8sjpA<{t
zciRqEElA+gK)cF{c4z^LoV4ui`2C3!(bB9<1^#Tz7IVN39MWo=;vGNr?2(LzktE)U
zQ|Z~TE^dTs^RxdIZ0QfawK+s_2z567KJVURJ|qNlZ=IK7GSxuKG5_=CSjDC_pCvR$
zTJYg9eC`f>o*+5K?g5Hkk)jDhL>H{YRaSqLfx@=RpW*ffdaM?3G4Qta*IxXkF=5V~
z5K;UYA=6@57qTuqGiedU`v*-;-<GnF_!<o|fW#p~()9rVM~`ri|5ULH|BWk1KW_=W
zP@fe!v&A8=@h~@ypp90FD?AUxxl{-|sIlnI<Ka}{0oPc*hYwds2s@uCvB<b{Xuo%;
zOed8(-4fQ<)>0FoB72Tb@GXn+Z?@g@!wput;Umj)U+R@S&!++gpwxCRQWC54fDCC9
z7UMfVOw-U~Dn)kqFN)srrkRgQJ<*EnWeX|d<&@3Z-=x0Tbd}hzMoHJ0pwLY9vh((A
z2b&su?NzG+ewB3cjQYBt2IjEz4-@aBUfv08nU)k+xZl|UdZ-UxhJC`j!`IEsN03fw
zAc1LkU9Sc_wqYCygC0qp3jya{rbLlWtWW0cWAn5IAThr<@C2x8*^;?;w<`a?JQ)(0
zSA8I&DTV8ob`j~x!-ZfBlOR(2?qP`vxF4L6xQ{k-xLV6%jHZ$iD;MR=98=*94XUB}
zYi)hho-<E~zF$@uEU38+XTDV(dU~HcL43)S>@FI0f%kRMI5+V6Hem$GVytoesc{ap
z@8KMnQGB0edxeVaidRfo&v!@Xr`YZKF`3*G9o_Ra{qqz1-$xLU>~7~U8_0NIAI$$B
z<5~V!LjDiqlK>=XK*rxe-a^s)8e*(kV7D%KOBHEJ&?HmZjw+#$^eqJ`^_GasH8p2~
zc&#w+OAZdQTSpvr`*u0vtfN%YI9n^Z?D0pnKr-9<3%d)|U|p|@${n-~#kE@;Uq-oY
z8}+_j#b?oJZSN8q+e6?0YP0vhtJ^b{<(J1~bLbeli0Zv)yRMlaiu>NAqXD2UBYX>Q
z%8kNoFCH$ASTB4v=R&7^8=YjT`B7wXmivvDG%7$93Bsr-R6)$Og-d{R?}jveu)83!
zr{-os=gHjlYqX%Wa|K7Iufi22L3x9Zvkk%3ORjCRlB=O0En5LN1!bDiUY8ht1i4eA
zG9v0F?)!WsZcia?#9#R&X6zsv3T$aLu`*VKwy{HpwzC`0Oo75`c%%-SwpBP6G%}uF
zSl9HJg5Q(_Yu5-n%CE!2`15FfZC8iXqj7h?l&DPpB0dN@+1@U?Uu!A47XR_Tkuvyq
zBZi*eJ{|0UoPcm(rcA5uSLq%1FH)8l2Q!XPj7VMnXb4Ae_LzoodJa9E%$?gu2XZ&X
z?g8l^cP(}WKuT1l_2Yruo#bb@El5n_$NWYq$npRM3t+V1fO#b;pCufGtmLH+k@G=v
zjn_chPZ=ORZQ2U?XpbB&r*W!76(L<sy0!oWRQ0g{`!X7xVvh_~m<<*Z4HYlE-)1aV
zqPH2OgbsFCS%|Nq{^25{?yt9_OO#Gjk}&?$y7)Nv=eF`siyzj<CN(LM(g1<|-7bg@
zFlYvmlks_k%!iw*B7N-Yx?yNngq#V#-M7!S3d)KXd!~y8=S~C$%@D=^xH~aO0!XZv
zE_NQQRu8St=CTmISxMQf`)%%KprOWm*Fz#G0RfXMOf`;@3VCvj@kxN0Bh4HyS89o!
z(YK;AfFy45IaB)(JC>`R?4e>YZX-MU*Yo~1nQ5HYBP0g?M?w0Ck&w00Itk(+OJ2!!
zfWY&X3qhol*U_jGm)wah&otG&4Mmf(o~3|u$W~*=+reMqdO*O}GeU=5PT_ng_MX&P
zZ{%`HD0wPV9ig#vnHl`PEon3(^cPK}7+X|9S_&c~g?9HmHl*`g_9FT_dvd6{eg;&K
zv5ZrLudkRe9`!hn(q9%(V5IDzxSKfwu=ySkIeaxjC2PLZv8e=Gk@J^;Hw$MclJEUa
z_<xqih>yyYqygP68npj}D(^pPc4u7xE6*@S`;b@pVb-lB$vnBUF_H<9!RIL|7NAWi
z_^_%rXx1CKF;49r6BnpyktKE24mk43FJmRgBZc$fU$KY1Z`ys!9BHZ-ns`>`zL(m!
zL`_mK63sj+Kf9C9wq~c2k?QKsx(7<ze|!wr_jOlOd1e@-8g>AVvrRA_!*eg_r>moF
zmv3W^ON8j@^`gEqC?{dD+b`!7aD34&1=t%ceXW${#k4=xSU)C@*y7J>;#PPWeZWmQ
z$PHeM>*uSSY?oMDV&?QrEN%+r1GJ-UsyTXh&a*kp@oz!Su*vRxk}35>-EMc8;$O2>
zuV|0x9p&gqWOD$Woz`h$)vNE*Rg#{g7ArZw2^Z!eRNxA8&XFytoMYo1vV=VV4mlQ0
zhD`)@WaR5l2H4*=e$~UyoMza&-m_fik5q4fx80tf2e+;vnLI&#tH3+Sj||&zxmM3t
zMZ7Au`>3tHVEZ(%m!#bf`IGbK2}^OxrB;jXgFt1X?o}PY#b=FVMb;oL3}X?Ma1oo0
z<Zc$pHPW^-6eBRHhV>DJ@5Z^eQK8^9`&ZOJvZN$RyELl;8IisEHS~s!ZX*YQHWID#
zohI(0Z^(u)&>7^)XcR8ULYRur5<-iJF-{r2@y<=?Hx2F6@ILkeL%gtk_o7XT+ZA{v
zuVuDEnoc<&6vl&TG)YE}!aE$*rCixbpvt1bF^TQE0DH|Tc2ntZSKN??rd6~HSfZ+1
z<Gu&$Vq3FN?;oH}E1@HX>J8g6JrX=Lr-`iTMt^Br(IHTO&77z`3OAfAO~r~%LbI7L
zy1*~YY+9jj7Ugn{RC9l#Awqf+2&T#3x)nR*k-i7?yUl<{?eh{MuL;#;lZR*#!(c#q
z`1gZAqm06e{Gj=<0?LXcz`_p-9DeW@*-1|S?-YTLWHy0$0t7gU;4cDv%NXEPknHDw
zuJ2tMqcjlx9WYHKC1_Ca#NCdukS`@wOHm5+HJN-|^7GldGKGfe7<dCLp>(`REdo$D
zbncy3P&j6PqH;jQ*OH3w!r-w%B>eclCzE(!KBf&j{oSqw;FBs&?&rURPA~-B^aZm~
zg*L}{%uiJZ3Mx_U*q(|A##I4ycz06Ap0vFpBr$!0%uOzb<_BzUNvy4e8!*W?co`S#
z>TzY$={Eq|K5H~o0G+ZspEB|A^_ACe%WsLak=E-O(cMfJ10lCNs7Cs!zPe&Pe>Ic!
zTidlP$92^+gjd3&QP(HBo7Uc$({Z`UgL0B3GpIDWbWf0l4~Ox|nfLXLejt|}r+c5u
zregWxm@_}b>9RB<5b&%<F)@xt1vnq?k-%Ad{VZtVlrVz10VL#A@AQ7&ZvZpw1QzY1
z(vJk}b$3JerTJwB%n%9H(>+@hTvWH?2zL0u1Gnprk<C^@YdpwI^}pxvxzHrmXzX;z
zc%L%s<Zy=q{3T-xS?Qh~ht{s&s~3=(=?ZA7Cp3tBNaP7s9b}-oKzGpi(Wo6-C0~|}
zO50UlAcvUi2yp+3RBgoPQgmo6Rt9L44i{&W=fMtb=l?MX_-~V3I6o24#bMxl)$wn>
zq6KWZ6?5VA+$ijX6s|x}d<y1VLhxUEP~}*hUxL$+KzD+vlKNkHd081(2j&K*$br2w
zE5%(IJd6bF7Ly0=BqxXv3o*%=O|ja;=^1A44zMB`lycBloygzcAFReG6z^3`pcG>E
za%BEw6*l^KUx@UZO}fNlJd48*Ew+O?od-<u)<d^#mGCmDx{KNyM1Pf~rANi;KRH+<
z1Y&MdHWnU0ctyudT&_0@U7dO8mH0?3=tk3Tr?4oXU>`W&6*+O}Z$;Kx0bjPN$ET#1
zCaksja$zFuvO*IFCL)k~Ig5NH87qwn4CIKG{m@@8dVe-?iuBlM71V{<;w&|O0|1tf
zB|#=tXIt+fxRO+rd~gfbpQk40ZQ0fR+qR!I;v6~(y0rC0WF!0Y7z2z)^^663klgc6
zrkz3A$IrkdE2tUT@dD{GUddAHYS*X?+><X6l6n=Sy5_+r=Pjem7_Yd9jQD#{&NkfK
zF+;uQc6xz)71GLQW$2^HlNOoV0e~e&BPO31PO;s7e}pOAY4sQmZ1fBz2e*CdF_;-G
zo6+)u+7<JA5#8^~tO{jDYTK7^W>~G;++ZgW-26|b{@=z$?VNw%NDks7THjT#vTP)h
z419k!2`LXv(7z^`Xw(kvYm2Lu7$Ybl=3;KXgySWAFNC99t~5!zG<YneaRsCcl0+Q3
zaDCrEU82*?E1CG~m~6<{l>dYbQOZ+JfgTooB5Pz3%URO5E)-=jJ5DHF2>s#M0gfV8
z8IrOQE5>bu8h9uqZjL#Sse|Ka#wikhO;pn!y##{l=ie|*r3Ef=N<3f}3=<}<gNS14
z15*u37KdaCyzQ3M0e-bT0$|5Geqzz8Gi{*m231M9<WO7bwRP!LNp+2CsN^D7Jw@V^
zAdBqR+LrJGFa^I{uqBuhs9>$&;2n6@V}2HUX7u#z#qQi{DO<^1tAKg9xh3xxRY~QN
zFt+G`s08bK?gd3Cd?~P({xKB~@(1!wA&gya$Y}{)RMm3e#Ii!~H{g0m|A$>`qtFr6
zTh*7R#+K4%qVwdlmq61&YzVdL$4lG0@2E7t&~5}>d{DUa_gbnr6N;Wshyg<BYZ%fv
zFj)Q&m(ey()6>el3*eyh4Gg6|Ky`Z^gu`?<k~-$@Yn3wLp4|BF9E@xCk@SS<%wus}
zp9}e{)}<!DT2HSn0PkFC$&;=(+5!szExO-N5x(rX4{Co3^w`5@|9R|@C<r|{EbnE&
zjgFJ)nB@eR@~sheW`3&uOe6@o;^Tr@*2D()`4S|X&5ol@_j9f)+9sdTw4BhTpDbV>
zl0eq`4(>cKN^{?f3F|3IRWjD#%1vM3b&l38ZHSiwd)@;m<!y+}=dcw8Aw}5l$CD%<
z4NZowJzT%7p+JxVjZ5)49I2Mw7@MMJ?d<)pv@huO_G8C8tNOf187usGldC&64wqhn
zOco8=1&2}#th@0jet));2)dW^$#4drYVL8i=aovCCYW0WS4*0>&+=^+!iPCQae~aU
zrBFR6S!%wCAJAz3hS{8rbq)i?vsE}wFs`i$4Mmdh{zNa2sA#-oQhJU4z4s^D6YSEn
zJEydqWrg@-1yj|DQ%nshn`hvm$K(Gj_X(T*89DnC6bLXNz=41O0ul%)AfSPO0Rk2X
zI3VDGKmY;}2qYko+h>2GAkzWj%d<FYfbNSg@V5)~|7;ts;^yq){@-s`>W1AW3x?zl
z{0DTgcRb}Q7uNx7@0f}WY1#w0hHgG;D<i3`8|lMNwNGw~ElhY)Il1eqw7s)zSL}u@
zLfyAUwcnKe_z+;M?n{%eDxt*MgqxxLd|b~C8qGAn@&FCHW?!!hO4#c#^j4#}nMT?D
z$k~p+<Xk+IuuW}VRTZY3(y*Fu5GNF-UTb;7drY}MgKZID#cX+4dBZfFw~Mp~NjhvW
zg%yc*1%{$rNgJgruMRjqGHk!zd?H$&sa|b*zb^!h!r3DU=gdK@I53E=c!r~)^xDjZ
za~a;v!T`QvJV~9lv@e!5UD%o+o;l?4vqv!%R2TbkMj@7YYfabpX~bcQV=VfJNn8vb
z=H0B`n9+e6SAK<Ult^&@<=dpJTX7BEW<<4ZJpJOi6i)473XYWhB|N8Ih~TFC2-g6n
zoGfH>fc`HYGTxf=wr`$>j;S1!G`~)xRS_;ZI_17KF5Q9OlBJZc*+&`X%+^)uMcIg(
z=1O23Z@>)l)XHk88yCIAWnsLJ#dWd8ecjF6bm7=pNq#|OcSEOjXCHJhtq^^F=|2Jf
zQ|IknhbU4IfMF;GoCM%QZTdeC)yvk>`~Q8YY3ho~gDeO=<hR5{o1|ONTNTTSVlYgp
zBI~<EYW47PsWjtK?Z$~4*VSstBaBbYJN|9y<24KzYoYh^r)f{u<Z>eLsj_+H59Y6K
z++{=1b?kL<5oy=`3u_NgR%U?I+VBEz=_HZA;pHBLp^7;7W{8l_&m*By4weSLz$2Kf
zzaI$87|<*y7G+@?*!i7$uRP=|$-((>nW+aOAx47_gJ(b6+kPbfHX5`d^eL(A($6U4
z$*qH1vS#03|M`LL4PP^iiP4?*ZBtwen@pAksfo7+?DlWkNY8Cnle`VU)*)U1=|B~+
zkqe-Ckv_Fc5c6#UY2V1bi=$r2igx&dV{c=HJ8JjVtQFSv|LN*%z@f_iIDY4bVlrKS
zp&eJl?#xJeZ?r08hA3gFc1W~@Vp*?6FEU2x4S!5J>CL}YY_*2)&ss%jNv%+scG8QY
zwiKJUNyf{b8Bbm2%=651=iblxp5HysoHM^UFTcyC|CanV#ju9nv9^1zOKG{z+FZ-R
zc)ifL-tr^Smf+NJBfW3d#Tn@}HaN7Wy4SE9Vri6{&5;hGbB*E2Ib$)`*Gy1U{Ng&r
z5tSK_0_)cEci%P+)4XYRe*PotUaR}XeP=gKjoddl+a0QNf;|3}qyNy7ey?LX)!pR*
zp*5Yu@_`T4Ql4_g5;*#gS2p)zG!pOO=DLWN=GcvriYfnB+j8`e#$`u;?`c2@KIRzG
zDzr5at%?h(4i}d^U%!zP;@Q!EPIBvXUenzkw;gXfVlOpQ&UlGj3wC8WDy5fh%<E%4
z4jMWZsWHIUZT`QNd4{B`V3}lu?6-0&pR8uXR_!kG)Zkbh2{cn%QbqF@`!gze!XA=C
zWyVkEmXW^Y+T{9kY+Q4q=eLs5mG*k>PWHTFEl#b~%H=+s_#_Sg;r!x)Jayllmz7R1
z3sM@JPTF}J+Ui~mq*i8a^UCEW&#g!?$&C_q+GH_w?kK$@mzCAtEH-vx^~bhre)!8l
zv*on5_+U;~>Kt>H-KN52{iC9R%e)WaClj3SSrmr(P38IBA60Irt>{`Hd5-?>u`zk?
zrR`(%!n?0ruUt05_5H}gfV5|&XTlaNdCGW$?y>!-<weP>HW%~fi8M+QyL*~6_Vz6P
z(YdHSbM+r?hkbl>S>9fD)+;KRd&&m%L!U5o|N2{UmD=%-I63}EF*hxLP6B5&Zz3fw
zkk-7pt!wJW1T!STF`+j~dbk?jx%l3qbA<<*PBVRKI}>;KUU4oO)2k6e_u8a>OXFj^
zm877S48z}+(q3FIh|4g$(#q{W)klwf-EqF#GJ7&{y!DA78%C@7^Muy7`;B+K;pXKh
zZVHy9&9(mZRcMZo+;9If`<>|8hBk(BBF+8L_?9}jC6qsO%_IMsR07vg#$?eO0%Gea
zRbx(iLUXCWe4m!`oT9Xrot1|D7@fpR&uMqBH-`(n-4YI6{8;c*CFGRFe;B^&IWN;2
zs|#1#cZ*E>vg<lK)ffHGcaM1d`KP$C__w>fa_h8z-Q@J$I+i~jF>yOTypQv2aPR0k
z^bkkqXBL$iv>?;9Tw1+Z<Vz)Q2>Y@0mz&DZF6e=glyEG}{gOcUmlWMS9BEW=IFHdr
z>pLalBp$J9J9*Wj7+GuJ(Z*dQx{;0NroPV|sWe&5HZVHw!FO^lU^})NCl^0y;hR?J
zFH4nDBYrL_EsA#^@qfTN$b9Qj%X3_#ZC3L1dxwjiLVA|53EerB5wzh!k{|V=g||;s
zVj#cf{c(xoT^BN%(GtVgy($S-;w^u%>h<x83{d}~e$71>_CUpy`jqnHugF_FY9dag
zuez|Ns7XlqA?fQ6@jZu}+?)3g+T8t{eryYiBDPMa4O+hp|1(e`HjTI9X>*&@Cj(r)
z7caNN2R&l~Y*fo4y5CJ1OTSE0kWdq3A)<o7I3b7uQYFk+x<i70GLk8v<pbtPngeJ9
zm^Hd$F329l7J=vijD@o1O@F8zz^K3$zM+<?(`U90VpQC);OuKA)PJRnAT+2-By%-j
zGK5)7=Q%xJmd8@yqKd(3k<TelU9KdEQdI*-B^ZYDMbTDeutRB9N1&ZEQwas^8^V~l
zPxWxGB@DsZ;4&S8m0$!KU}^|6#EC~DBJv=u1Tks=IKz-29$lbF?6wpnIqHBj!_XGR
zV6TMEE?aP2RuU0%MNwzc4cvw$0ecCouX|hpJDx7v=FCDLddZGS)_^a;mZOio<yVp<
z@JiPfcx(z1MzE#mYaiKp!GZnqYFUcD-zsQNVgRd;FwdZ`JWu9Fm}k^a&iyxWy7~Y*
z3X7ZECWz*l0@qO}w9Ox8G?UdW4njsT3K|?BQxZ(@)xiD%c$35)YuOiKW(wa9wKe5)
zSHSfXriBxGHx<#S9iLHsoY+pKNKwN+Q)JL96a2ExgQAE&h#=6FV-TISoO`B~h<z-)
zTTR*OZhf3s!KNsfxsMQhBoibSdnlq+JOX7(AxcbfE20Yz66kIjI&120wwY-JdP|CF
z<Fn=rW})95CD4WA5G95#6%8=TB+#I7XaF&4poos=5NP>0W`-gs;R(6`y4mukC%8~G
zvt_E}0lNvz9<|8_NfQ_wy;K4kCNLvh(eEZKXV-&ylb9`v-vZ8)7z0;y<_L28ZE$!J
z`v(8wJ*XfEY|%3k?G9*~#J*lD|CNB9O9ku_WL`dz4zyrMP-LLoGc*f~j0u+c{2m*W
Sp%j9c!q-FCgXp%&Wd8;H)id7!

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index 77b66b3b3a497..3b7d0c400317a 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -10,7 +10,7 @@ title: Structured Streaming Programming Guide
 # Overview
 Structured Streaming is a scalable and fault-tolerant stream processing engine built on the Spark SQL engine. You can express your streaming computation the same way you would express a batch computation on static data.The Spark SQL engine will take care of running it incrementally and continuously and updating the final result as streaming data continues to arrive. You can use the [Dataset/DataFrame API](sql-programming-guide.html) in Scala, Java or Python to express streaming aggregations, event-time windows, stream-to-batch joins, etc. The computation is executed on the same optimized Spark SQL engine. Finally, the system ensures end-to-end exactly-once fault-tolerance guarantees through checkpointing and Write Ahead Logs. In short, *Structured Streaming provides fast, scalable, fault-tolerant, end-to-end exactly-once stream processing without the user having to reason about streaming.*
 
-**Spark 2.0 is the ALPHA RELEASE of Structured Streaming** and the APIs are still experimental. In this guide, we are going to walk you through the programming model and the APIs. First, let's start with a simple example - a streaming word count. 
+**Structured Streaming is still ALPHA in Spark 2.1** and the APIs are still experimental. In this guide, we are going to walk you through the programming model and the APIs. First, let's start with a simple example - a streaming word count. 
 
 # Quick Example
 Let’s say you want to maintain a running word count of text data received from a data server listening on a TCP socket. Let’s see how you can express this using Structured Streaming. You can see the full code in 
@@ -400,7 +400,14 @@ see how this model handles event-time based processing and late arriving data.
 ## Handling Event-time and Late Data
 Event-time is the time embedded in the data itself. For many applications, you may want to operate on this event-time. For example, if you want to get the number of events generated by IoT devices every minute, then you probably want to use the time when the data was generated (that is, event-time in the data), rather than the time Spark receives them. This event-time is very naturally expressed in this model -- each event from the devices is a row in the table, and event-time is a column value in the row. This allows window-based aggregations (e.g. number of events every minute) to be just a special type of grouping and aggregation on the even-time column -- each time window is a group and each row can belong to multiple windows/groups. Therefore, such event-time-window-based aggregation queries can be defined consistently on both a static dataset (e.g. from collected device events logs) as well as on a data stream, making the life of the user much easier.
 
-Furthermore, this model naturally handles data that has arrived later than expected based on its event-time. Since Spark is updating the Result Table, it has full control over updating/cleaning up the aggregates when there is late data. While not yet implemented in Spark 2.0, event-time watermarking will be used to manage this data. These are explained later in more details in the [Window Operations](#window-operations-on-event-time) section.
+Furthermore, this model naturally handles data that has arrived later than 
+expected based on its event-time. Since Spark is updating the Result Table, 
+it has full control over updating old aggregates when there is late data, 
+as well as cleaning up old aggregates to limit the size of intermediate
+state data. Since Spark 2.1, we have support for watermarking which 
+allows the user to specify the threshold of late data, and allows the engine
+to accordingly clean up old state. These are explained later in more 
+details in the [Window Operations](#window-operations-on-event-time) section.
 
 ## Fault Tolerance Semantics
 Delivering end-to-end exactly-once semantics was one of key goals behind the design of Structured Streaming. To achieve that, we have designed the Structured Streaming sources, the sinks and the execution engine to reliably track the exact progress of the processing so that it can handle any kind of failure by restarting and/or reprocessing. Every streaming source is assumed to have offsets (similar to Kafka offsets, or Kinesis sequence numbers)
@@ -671,12 +678,123 @@ windowedCounts = words.groupBy(
 </div>
 
 
+### Handling Late Data and Watermarking
 Now consider what happens if one of the events arrives late to the application.
-For example, a word that was generated at 12:04 but it was received at 12:11. 
-Since this windowing is based on the time in the data, the time 12:04 should be considered for windowing. This occurs naturally in our window-based grouping – the late data is automatically placed in the proper windows and the correct aggregates are updated as illustrated below.
+For example, say, a word generated at 12:04 (i.e. event time) could be received received by 
+the application at 12:11. The application should use the time 12:04 instead of 12:11
+to update the older counts for the window `12:00 - 12:10`. This occurs 
+naturally in our window-based grouping – Structured Streaming can maintain the intermediate state 
+for partial aggregates for a long period of time such that late data can update aggregates of 
+old windows correctly, as illustrated below.
 
 ![Handling Late Data](img/structured-streaming-late-data.png)
 
+However, to run this query for days, its necessary for the system to bound the amount of 
+intermediate in-memory state it accumulates. This means the system needs to know when an old 
+aggregate can be dropped from the in-memory state because the application is not going to receive 
+late data for that aggregate any more. To enable this, in Spark 2.1, we have introduced 
+**watermarking**, which let's the engine automatically track the current event time in the data and
+and attempt to clean up old state accordingly. You can define the watermark of a query by 
+specifying the event time column and the threshold on how late the data is expected be in terms of 
+event time. For a specific window starting at time `T`, the engine will maintain state and allow late
+data to be update the state until `(max event time seen by the engine - late threshold > T)`. 
+In other words, late data within the threshold will be aggregated, 
+but data later than the threshold will be dropped. Let's understand this with an example. We can 
+easily define watermarking on the previous example using `withWatermark()` as shown below.
+
+<div class="codetabs">
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+import spark.implicits._
+
+val words = ... // streaming DataFrame of schema { timestamp: Timestamp, word: String }
+
+// Group the data by window and word and compute the count of each group
+val windowedCounts = words
+    .withWatermark("timestamp", "10 minutes")
+    .groupBy(
+        window($"timestamp", "10 minutes", "5 minutes"),
+        $"word")
+    .count()
+{% endhighlight %}
+
+</div>
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+Dataset<Row> words = ... // streaming DataFrame of schema { timestamp: Timestamp, word: String }
+
+// Group the data by window and word and compute the count of each group
+Dataset<Row> windowedCounts = words
+    .withWatermark("timestamp", "10 minutes")
+    .groupBy(
+        functions.window(words.col("timestamp"), "10 minutes", "5 minutes"),
+        words.col("word"))
+    .count();
+{% endhighlight %}
+
+</div>
+<div data-lang="python"  markdown="1">
+{% highlight python %}
+words = ...  # streaming DataFrame of schema { timestamp: Timestamp, word: String }
+
+# Group the data by window and word and compute the count of each group
+windowedCounts = words
+    .withWatermark("timestamp", "10 minutes")
+    .groupBy(
+        window(words.timestamp, "10 minutes", "5 minutes"),
+        words.word)
+    .count()
+{% endhighlight %}
+
+</div>
+</div>
+
+In this example, we are defining the watermark of the query on the value of the column "timestamp", 
+and also defining "10 minutes" as the threshold of how late is the data allowed to be. If this query 
+is run in Append output mode (discussed later in [Output Modes](#output-modes) section), 
+the engine will track the current event time from the column "timestamp" and wait for additional
+"10 minutes" in event time before finalizing the windowed counts and adding them to the Result Table.
+Here is an illustration. 
+
+![Watermarking in Append Mode](img/structured-streaming-watermark.png)
+
+As shown in the illustration, the maximum event time tracked by the engine is the 
+*blue dashed line*, and the watermark set as `(max event time - '10 mins')`
+at the beginning of every trigger is the red line  For example, when the engine observes the data 
+`(12:14, dog)`, it sets the watermark for the next trigger as `12:04`.
+For the window `12:00 - 12:10`, the partial counts are maintained as internal state while the system
+is waiting for late data. After the system finds data (i.e. `(12:21, owl)`) such that the 
+watermark exceeds 12:10, the partial count is finalized and appended to the table. This count will
+not change any further as all "too-late" data older than 12:10 will be ignored.  
+
+Note that in Append output mode, the system has to wait for "late threshold" time 
+before it can output the aggregation of a window. This may not be ideal if data can be very late, 
+(say 1 day) and you like to have partial counts without waiting for a day. In future, we will add
+Update output mode which would allows every update to aggregates to be written to sink every trigger. 
+
+**Conditions for watermarking to clean aggregation state**
+It is important to note that the following conditions must be satisfied for the watermarking to 
+clean the state in aggregation queries *(as of Spark 2.1, subject to change in the future)*.
+
+- **Output mode must be Append.** Complete mode requires all aggregate data to be preserved, and hence 
+cannot use watermarking to drop intermediate state. See the [Output Modes](#output-modes) section 
+for detailed explanation of the semantics of each output mode.
+
+- The aggregation must have either the event-time column, or a `window` on the event-time column. 
+
+- `withWatermark` must be called on the 
+same column as the timestamp column used in the aggregate. For example, 
+`df.withWatermark("time", "1 min").groupBy("time2").count()` is invalid 
+in Append output mode, as watermark is defined on a different column
+as the aggregation column.
+
+- `withWatermark` must be called before the aggregation for the watermark details to be used. 
+For example, `df.groupBy("time").count().withWatermark("time", "1 min")` is invalid in Append 
+output mode.
+
+
 ### Join Operations
 Streaming DataFrames can be joined with static DataFrames to create new streaming DataFrames. Here are a few examples.
 
@@ -763,16 +881,78 @@ returned through `Dataset.writeStream()`. You will have to specify one or more o
 - *Checkpoint location:* For some output sinks where the end-to-end fault-tolerance can be guaranteed, specify the location where the system will write all the checkpoint information. This should be a directory in an HDFS-compatible fault-tolerant file system. The semantics of checkpointing is discussed in more detail in the next section.
 
 #### Output Modes
-There are two types of output mode currently implemented.
+There are a few types of output modes.
+
+- **Append mode (default)** - This is the default mode, where only the 
+new rows added to the Result Table since the last trigger will be 
+outputted to the sink. This is supported for only those queries where 
+rows added to the Result Table is never going to change. Hence, this mode 
+guarantees that each row will be output only once (assuming 
+fault-tolerant sink). For example, queries with only `select`, 
+`where`, `map`, `flatMap`, `filter`, `join`, etc. will support Append mode.
 
-- **Append mode (default)** - This is the default mode, where only the new rows added to the result table since the last trigger will be outputted to the sink. This is only applicable to queries that *do not have any aggregations* (e.g. queries with only `select`, `where`, `map`, `flatMap`, `filter`, `join`, etc.).
+- **Complete mode** - The whole Result Table will be outputted to the sink after every trigger.
+ This is supported for aggregation queries.
 
-- **Complete mode** - The whole result table will be outputted to the sink.This is only applicable to queries that *have aggregations*. 
+- **Update mode** - (*not available in Spark 2.1*) Only the rows in the Result Table that were 
+updated since the last trigger will be outputted to the sink. 
+More information to be added in future releases.
+
+Different types of streaming queries support different output modes. 
+Here is the compatibility matrix.
+
+<table class="table">
+  <tr>
+    <th>Query Type</th>
+    <th></th>
+    <th>Supported Output Modes</th>
+    <th>Notes</th>        
+  </tr>
+  <tr>
+    <td colspan="2" valign="middle"><br/>Queries without aggregation</td>
+    <td>Append</td>
+    <td>
+        Complete mode note supported as it is infeasible to keep all data in the Result Table.
+    </td>
+  </tr>
+  <tr>
+    <td rowspan="2">Queries with aggregation</td>
+    <td>Aggregation on event-time with watermark</td>
+    <td>Append, Complete</td>
+    <td>
+        Append mode uses watermark to drop old aggregation state. But the output of a 
+        windowed aggregation is delayed the late threshold specified in `withWatermark()` as by
+        the modes semantics, rows can be added to the Result Table only once after they are 
+        finalized (i.e. after watermark is crossed). See 
+        <a href="#handling-late-data">Late Data</a> section for more details.
+        <br/><br/>
+        Complete mode does drop not old aggregation state since by definition this mode
+        preserves all data in the Result Table.
+    </td>    
+  </tr>
+  <tr>
+    <td>Other aggregations</td>
+    <td>Complete</td>
+    <td>
+        Append mode is not supported as aggregates can update thus violating the semantics of 
+        this mode.
+        <br/><br/>
+        Complete mode does drop not old aggregation state since by definition this mode
+        preserves all data in the Result Table.
+    </td>  
+  </tr>
+  <tr>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td></td>
+  </tr>
+</table>
 
 #### Output Sinks
 There are a few types of built-in output sinks.
 
-- **File sink** - Stores the output to a directory. As of Spark 2.0, this only supports Parquet file format, and Append output mode. 
+- **File sink** - Stores the output to a directory. 
 
 - **Foreach sink** - Runs arbitrary computation on the records in the output. See later in the section for more details.
 
@@ -791,7 +971,7 @@ Here is a table of all the sinks, and the corresponding settings.
     <th>Notes</th>
   </tr>
   <tr>
-    <td><b>File Sink</b><br/>(only parquet in Spark 2.0)</td>
+    <td><b>File Sink</b></td>
     <td>Append</td>
     <td><pre>writeStream<br/>  .format("parquet")<br/>  .start()</pre></td>
     <td>Yes</td>
@@ -817,7 +997,14 @@ Here is a table of all the sinks, and the corresponding settings.
     <td><pre>writeStream<br/>  .format("memory")<br/>  .queryName("table")<br/>  .start()</pre></td>
     <td>No</td>
     <td>Saves the output data as a table, for interactive querying. Table name is the query name.</td>
-  </tr> 
+  </tr>
+  <tr>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td></td>
+  </tr>
 </table>
 
 Finally, you have to call `start()` to actually start the execution of the query. This returns a StreamingQuery object which is a handle to the continuously running execution. You can use this object to manage the query, which we will discuss in the next subsection. For now, let’s understand all this with a few examples.
@@ -947,7 +1134,7 @@ spark.sql("select * from aggregates").show()   # interactively query in-memory t
 </div>
 
 #### Using Foreach
-The `foreach` operation allows arbitrary operations to be computed on the output data. As of Spark 2.0, this is available only for Scala and Java. To use this, you will have to implement the interface `ForeachWriter`
+The `foreach` operation allows arbitrary operations to be computed on the output data. As of Spark 2.1, this is available only for Scala and Java. To use this, you will have to implement the interface `ForeachWriter`
 ([Scala](api/scala/index.html#org.apache.spark.sql.ForeachWriter)/[Java](api/java/org/apache/spark/sql/ForeachWriter.html) docs),
 which has methods that get called whenever there is a sequence of rows generated as output after a trigger. Note the following important points.
 
@@ -1089,11 +1276,28 @@ spark.streams().awaitAnyTermination()  # block until any one of them terminates
 
 
 ## Monitoring Streaming Queries
-There are two ways you can monitor queries. You can directly get the current status
-of an active query using `streamingQuery.status`, which will return a `StreamingQueryStatus` object
-([Scala](api/scala/index.html#org.apache.spark.sql.streaming.StreamingQueryStatus)/[Java](api/java/org/apache/spark/sql/streaming/StreamingQueryStatus.html)/[Python](api/python/pyspark.sql.html#pyspark.sql.streaming.StreamingQueryStatus) docs)
-that has all the details like current ingestion rates, processing rates, average latency,
-details of the currently active trigger, etc.
+There are two APIs for monitoring and debugging active queries - 
+interactively and asynchronously.
+
+### Interactive APIs
+
+You can directly get the current status and metrics of an active query using 
+`streamingQuery.lastProgress()` and `streamingQuery.status()`. 
+`lastProgress()` returns a `StreamingQueryProgress` object 
+in [Scala](api/scala/index.html#org.apache.spark.sql.streaming.StreamingQueryProgress) 
+and [Java](api/java/org/apache/spark/sql/streaming/StreamingQueryProgress.html)
+and an dictionary with the same fields in Python. It has all the information about
+the progress made in the last trigger of the stream - what data was processed, 
+what were the processing rates, latencies, etc. There is also 
+`streamingQuery.recentProgress` which returns an array of last few progresses.  
+
+In addition, `streamingQuery.status()` returns `StreamingQueryStatus` object 
+in [Scala](api/scala/index.html#org.apache.spark.sql.streaming.StreamingQueryStatus) 
+and [Java](api/java/org/apache/spark/sql/streaming/StreamingQueryStatus.html)
+and an dictionary with the same fields in Python. It gives information about
+what the query is immediately doing - is a trigger active, is data being processed, etc.
+
+Here are a few examples.
 
 <div class="codetabs">
 <div data-lang="scala"  markdown="1">
@@ -1101,34 +1305,65 @@ details of the currently active trigger, etc.
 {% highlight scala %}
 val query: StreamingQuery = ...
 
+println(query.lastProgress)
+
+/* Will print something like the following.
+
+{
+  "id" : "ce011fdc-8762-4dcb-84eb-a77333e28109",
+  "runId" : "88e2ff94-ede0-45a8-b687-6316fbef529a",
+  "name" : "MyQuery",
+  "timestamp" : "2016-12-14T18:45:24.873Z",
+  "numInputRows" : 10,
+  "inputRowsPerSecond" : 120.0,
+  "processedRowsPerSecond" : 200.0,
+  "durationMs" : {
+    "triggerExecution" : 3,
+    "getOffset" : 2
+  },
+  "eventTime" : {
+    "watermark" : "2016-12-14T18:45:24.873Z"
+  },
+  "stateOperators" : [ ],
+  "sources" : [ {
+    "description" : "KafkaSource[Subscribe[topic-0]]",
+    "startOffset" : {
+      "topic-0" : {
+        "2" : 0,
+        "4" : 1,
+        "1" : 1,
+        "3" : 1,
+        "0" : 1
+      }
+    },
+    "endOffset" : {
+      "topic-0" : {
+        "2" : 0,
+        "4" : 115,
+        "1" : 134,
+        "3" : 21,
+        "0" : 534
+      }
+    },
+    "numInputRows" : 10,
+    "inputRowsPerSecond" : 120.0,
+    "processedRowsPerSecond" : 200.0
+  } ],
+  "sink" : {
+    "description" : "MemorySink"
+  }
+}
+*/
+
+
 println(query.status)
 
-/* Will print the current status of the query
-
-Status of query 'queryName'
-    Query id: 1
-    Status timestamp: 123
-    Input rate: 15.5 rows/sec
-    Processing rate 23.5 rows/sec
-    Latency: 345.0 ms
-    Trigger details:
-        batchId: 5
-        isDataPresentInTrigger: true
-        isTriggerActive: true
-        latency.getBatch.total: 20
-        latency.getOffset.total: 10
-        numRows.input.total: 100
-    Source statuses [1 source]:
-        Source 1 - MySource1
-            Available offset: 0
-            Input rate: 15.5 rows/sec
-            Processing rate: 23.5 rows/sec
-            Trigger details:
-                numRows.input.source: 100
-                latency.getOffset.source: 10
-                latency.getBatch.source: 20
-    Sink status - MySink
-        Committed offsets: [1, -]
+/*  Will print something like the following.
+{
+  "message" : "Waiting for data to arrive",
+  "isDataAvailable" : false,
+  "isTriggerActive" : false
+}
 */
 {% endhighlight %}
 
@@ -1138,34 +1373,63 @@ Status of query 'queryName'
 {% highlight java %}
 StreamingQuery query = ...
 
-System.out.println(query.status);
-
-/* Will print the current status of the query
-
-Status of query 'queryName'
-    Query id: 1
-    Status timestamp: 123
-    Input rate: 15.5 rows/sec
-    Processing rate 23.5 rows/sec
-    Latency: 345.0 ms
-    Trigger details:
-        batchId: 5
-        isDataPresentInTrigger: true
-        isTriggerActive: true
-        latency.getBatch.total: 20
-        latency.getOffset.total: 10
-        numRows.input.total: 100
-    Source statuses [1 source]:
-        Source 1 - MySource1
-            Available offset: 0
-            Input rate: 15.5 rows/sec
-            Processing rate: 23.5 rows/sec
-            Trigger details:
-                numRows.input.source: 100
-                latency.getOffset.source: 10
-                latency.getBatch.source: 20
-    Sink status - MySink
-        Committed offsets: [1, -]
+System.out.println(query.lastProgress());
+/* Will print something like the following.
+
+{
+  "id" : "ce011fdc-8762-4dcb-84eb-a77333e28109",
+  "runId" : "88e2ff94-ede0-45a8-b687-6316fbef529a",
+  "name" : "MyQuery",
+  "timestamp" : "2016-12-14T18:45:24.873Z",
+  "numInputRows" : 10,
+  "inputRowsPerSecond" : 120.0,
+  "processedRowsPerSecond" : 200.0,
+  "durationMs" : {
+    "triggerExecution" : 3,
+    "getOffset" : 2
+  },
+  "eventTime" : {
+    "watermark" : "2016-12-14T18:45:24.873Z"
+  },
+  "stateOperators" : [ ],
+  "sources" : [ {
+    "description" : "KafkaSource[Subscribe[topic-0]]",
+    "startOffset" : {
+      "topic-0" : {
+        "2" : 0,
+        "4" : 1,
+        "1" : 1,
+        "3" : 1,
+        "0" : 1
+      }
+    },
+    "endOffset" : {
+      "topic-0" : {
+        "2" : 0,
+        "4" : 115,
+        "1" : 134,
+        "3" : 21,
+        "0" : 534
+      }
+    },
+    "numInputRows" : 10,
+    "inputRowsPerSecond" : 120.0,
+    "processedRowsPerSecond" : 200.0
+  } ],
+  "sink" : {
+    "description" : "MemorySink"
+  }
+}
+*/
+
+
+System.out.println(query.status());
+/*  Will print something like the following.
+{
+  "message" : "Waiting for data to arrive",
+  "isDataAvailable" : false,
+  "isTriggerActive" : false
+}
 */
 {% endhighlight %}
 
@@ -1173,43 +1437,27 @@ Status of query 'queryName'
 <div data-lang="python"  markdown="1">
 
 {% highlight python %}
-query = ...  // a StreamingQuery
+query = ...  # a StreamingQuery
+print(query.lastProgress)
 
-print(query.status)
+'''
+Will print something like the following.
 
+{u'stateOperators': [], u'eventTime': {u'watermark': u'2016-12-14T18:45:24.873Z'}, u'name': u'MyQuery', u'timestamp': u'2016-12-14T18:45:24.873Z', u'processedRowsPerSecond': 200.0, u'inputRowsPerSecond': 120.0, u'numInputRows': 10, u'sources': [{u'description': u'KafkaSource[Subscribe[topic-0]]', u'endOffset': {u'topic-0': {u'1': 134, u'0': 534, u'3': 21, u'2': 0, u'4': 115}}, u'processedRowsPerSecond': 200.0, u'inputRowsPerSecond': 120.0, u'numInputRows': 10, u'startOffset': {u'topic-0': {u'1': 1, u'0': 1, u'3': 1, u'2': 0, u'4': 1}}}], u'durationMs': {u'getOffset': 2, u'triggerExecution': 3}, u'runId': u'88e2ff94-ede0-45a8-b687-6316fbef529a', u'id': u'ce011fdc-8762-4dcb-84eb-a77333e28109', u'sink': {u'description': u'MemorySink'}}
 '''
-Will print the current status of the query
-
-Status of query 'queryName'
-    Query id: 1
-    Status timestamp: 123
-    Input rate: 15.5 rows/sec
-    Processing rate 23.5 rows/sec
-    Latency: 345.0 ms
-    Trigger details:
-        batchId: 5
-        isDataPresentInTrigger: true
-        isTriggerActive: true
-        latency.getBatch.total: 20
-        latency.getOffset.total: 10
-        numRows.input.total: 100
-    Source statuses [1 source]:
-        Source 1 - MySource1
-            Available offset: 0
-            Input rate: 15.5 rows/sec
-            Processing rate: 23.5 rows/sec
-            Trigger details:
-                numRows.input.source: 100
-                latency.getOffset.source: 10
-                latency.getBatch.source: 20
-    Sink status - MySink
-        Committed offsets: [1, -]
+
+print(query.status)
+''' 
+Will print something like the following.
+
+{u'message': u'Waiting for data to arrive', u'isTriggerActive': False, u'isDataAvailable': False}
 '''
 {% endhighlight %}
 
 </div>
 </div>
 
+### Asynchronous API
 
 You can also asynchronously monitor all queries associated with a
 `SparkSession` by attaching a `StreamingQueryListener`
@@ -1225,15 +1473,14 @@ stopped and when there is progress made in an active query. Here is an example,
 val spark: SparkSession = ...
 
 spark.streams.addListener(new StreamingQueryListener() {
-
     override def onQueryStarted(queryStarted: QueryStartedEvent): Unit = {
-        println("Query started: " + queryTerminated.queryStatus.name)
+        println("Query started: " + queryStarted.id)
     }
     override def onQueryTerminated(queryTerminated: QueryTerminatedEvent): Unit = {
-        println("Query terminated: " + queryTerminated.queryStatus.name)
+        println("Query terminated: " + queryTerminated.id)
     }
     override def onQueryProgress(queryProgress: QueryProgressEvent): Unit = {
-        println("Query made progress: " + queryProgress.queryStatus)
+        println("Query made progress: " + queryProgress.progress)
     }
 })
 {% endhighlight %}
@@ -1245,15 +1492,14 @@ spark.streams.addListener(new StreamingQueryListener() {
 SparkSession spark = ...
 
 spark.streams.addListener(new StreamingQueryListener() {
-
     @Overrides void onQueryStarted(QueryStartedEvent queryStarted) {
-        System.out.println("Query started: " + queryTerminated.queryStatus.name);
+        System.out.println("Query started: " + queryStarted.id());
     }
     @Overrides void onQueryTerminated(QueryTerminatedEvent queryTerminated) {
-        System.out.println("Query terminated: " + queryTerminated.queryStatus.name);
+        System.out.println("Query terminated: " + queryTerminated.id());
     }
     @Overrides void onQueryProgress(QueryProgressEvent queryProgress) {
-        System.out.println("Query made progress: " + queryProgress.queryStatus);
+        System.out.println("Query made progress: " + queryProgress.progress());
     }
 });
 {% endhighlight %}
@@ -1268,7 +1514,7 @@ Not available in Python.
 </div>
 
 ## Recovering from Failures with Checkpointing 
-In case of a failure or intentional shutdown, you can recover the previous progress and state of a previous query, and continue where it left off. This is done using checkpointing and write ahead logs. You can configure a query with a checkpoint location, and the query will save all the progress information (i.e. range of offsets processed in each trigger) and the running aggregates (e.g. word counts in the [quick example](#quick-example)) to the checkpoint location. As of Spark 2.0, this checkpoint location has to be a path in an HDFS compatible file system, and can be set as an option in the DataStreamWriter when [starting a query](#starting-streaming-queries). 
+In case of a failure or intentional shutdown, you can recover the previous progress and state of a previous query, and continue where it left off. This is done using checkpointing and write ahead logs. You can configure a query with a checkpoint location, and the query will save all the progress information (i.e. range of offsets processed in each trigger) and the running aggregates (e.g. word counts in the [quick example](#quick-example)) to the checkpoint location. This checkpoint location has to be a path in an HDFS compatible file system, and can be set as an option in the DataStreamWriter when [starting a query](#starting-streaming-queries). 
 
 <div class="codetabs">
 <div data-lang="scala"  markdown="1">

From 47ab4afed69bb019b4e0f85e26e52dc5cee338df Mon Sep 17 00:00:00 2001
From: adesharatushar <tushar_adeshara@persistent.com>
Date: Thu, 29 Dec 2016 22:03:34 +0000
Subject: [PATCH 0414/1204] [SPARK-19003][DOCS] Add Java example in Spark
 Streaming Guide, section Design Patterns for using foreachRDD

## What changes were proposed in this pull request?

Added missing Java example under section "Design Patterns for using foreachRDD". Now this section has examples in all 3 languages, improving consistency of documentation.

## How was this patch tested?

Manual.
Generated docs using command "SKIP_API=1 jekyll build" and verified generated HTML page manually.

The syntax of example has been tested for correctness using sample code on Java1.7 and Spark 2.2.0-SNAPSHOT.

Author: adesharatushar <tushar_adeshara@persistent.com>

Closes #16408 from adesharatushar/streaming-doc-fix.

(cherry picked from commit dba81e1dcdea1e8bd196c88d4810f9a04312acbf)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/streaming-programming-guide.md | 72 +++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index 1fcd198685a51..38b4f7817713d 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -1246,6 +1246,22 @@ dstream.foreachRDD { rdd =>
 }
 {% endhighlight %}
 </div>
+<div data-lang="java" markdown="1">
+{% highlight java %}
+dstream.foreachRDD(new VoidFunction<JavaRDD<String>>() {
+  @Override
+  public void call(JavaRDD<String> rdd) {
+    final Connection connection = createNewConnection(); // executed at the driver
+    rdd.foreach(new VoidFunction<String>() {
+      @Override
+      public void call(String record) {
+        connection.send(record); // executed at the worker
+      }
+    });
+  }
+});
+{% endhighlight %}
+</div>
 <div data-lang="python" markdown="1">
 {% highlight python %}
 def sendRecord(rdd):
@@ -1279,6 +1295,23 @@ dstream.foreachRDD { rdd =>
 }
 {% endhighlight %}
 </div>
+<div data-lang="java" markdown="1">
+{% highlight java %}
+dstream.foreachRDD(new VoidFunction<JavaRDD<String>>() {
+  @Override
+  public void call(JavaRDD<String> rdd) {
+    rdd.foreach(new VoidFunction<String>() {
+      @Override
+      public void call(String record) {
+        Connection connection = createNewConnection();
+        connection.send(record);
+        connection.close();
+      }
+    });
+  }
+});
+{% endhighlight %}
+</div>
 <div data-lang="python" markdown="1">
 {% highlight python %}
 def sendRecord(record):
@@ -1309,6 +1342,25 @@ dstream.foreachRDD { rdd =>
 }
 {% endhighlight %}
 </div>
+<div data-lang="java" markdown="1">
+{% highlight java %}
+dstream.foreachRDD(new VoidFunction<JavaRDD<String>>() {
+  @Override
+  public void call(JavaRDD<String> rdd) {
+    rdd.foreachPartition(new VoidFunction<Iterator<String>>() {
+      @Override
+      public void call(Iterator<String> partitionOfRecords) {
+        Connection connection = createNewConnection();
+        while (partitionOfRecords.hasNext()) {
+          connection.send(partitionOfRecords.next());
+        }
+        connection.close();
+      }
+    });
+  }
+});
+{% endhighlight %}
+</div>
 <div data-lang="python" markdown="1">
 {% highlight python %}
 def sendPartition(iter):
@@ -1342,6 +1394,26 @@ dstream.foreachRDD { rdd =>
 {% endhighlight %}
 </div>
 
+<div data-lang="java" markdown="1">
+{% highlight java %}
+dstream.foreachRDD(new VoidFunction<JavaRDD<String>>() {
+  @Override
+  public void call(JavaRDD<String> rdd) {
+    rdd.foreachPartition(new VoidFunction<Iterator<String>>() {
+      @Override
+      public void call(Iterator<String> partitionOfRecords) {
+        // ConnectionPool is a static, lazily initialized pool of connections
+        Connection connection = ConnectionPool.getConnection();
+        while (partitionOfRecords.hasNext()) {
+          connection.send(partitionOfRecords.next());
+        }
+        ConnectionPool.returnConnection(connection); // return to the pool for future reuse
+      }
+    });
+  }
+});
+{% endhighlight %}
+</div>
 <div data-lang="python" markdown="1">
 {% highlight python %}
 def sendPartition(iter):

From 20ae11722d82cf3cdaa8c4023e37c1416664917d Mon Sep 17 00:00:00 2001
From: Cheng Lian <lian@databricks.com>
Date: Fri, 30 Dec 2016 14:46:30 -0800
Subject: [PATCH 0415/1204] [SPARK-19016][SQL][DOC] Document scalable partition
 handling

This PR documents the scalable partition handling feature in the body of the programming guide.

Before this PR, we only mention it in the migration guide. It's not super clear that external datasource tables require an extra `MSCK REPAIR TABLE` command is to have per-partition information persisted since 2.1.

N/A.

Author: Cheng Lian <lian@databricks.com>

Closes #16424 from liancheng/scalable-partition-handling-doc.

(cherry picked from commit 871f6114ac0075a1b45eda8701113fa20d647de9)
Signed-off-by: Cheng Lian <lian@databricks.com>
---
 docs/sql-programming-guide.md | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index d57f22eca460e..58de0e1318d55 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -515,7 +515,7 @@ new data.
 ### Saving to Persistent Tables
 
 `DataFrames` can also be saved as persistent tables into Hive metastore using the `saveAsTable`
-command. Notice existing Hive deployment is not necessary to use this feature. Spark will create a
+command. Notice that an existing Hive deployment is not necessary to use this feature. Spark will create a
 default local Hive metastore (using Derby) for you. Unlike the `createOrReplaceTempView` command,
 `saveAsTable` will materialize the contents of the DataFrame and create a pointer to the data in the
 Hive metastore. Persistent tables will still exist even after your Spark program has restarted, as
@@ -526,6 +526,18 @@ By default `saveAsTable` will create a "managed table", meaning that the locatio
 be controlled by the metastore. Managed tables will also have their data deleted automatically
 when a table is dropped.
 
+Currently, `saveAsTable` does not expose an API supporting the creation of an "external table" from a `DataFrame`.
+However, this functionality can be achieved by providing a `path` option to the `DataFrameWriter` with `path` as the key
+and location of the external table as its value (a string) when saving the table with `saveAsTable`. When an External table
+is dropped only its metadata is removed.
+
+Starting from Spark 2.1, persistent datasource tables have per-partition metadata stored in the Hive metastore. This brings several benefits:
+
+- Since the metastore can return only necessary partitions for a query, discovering all the partitions on the first query to the table is no longer needed.
+- Hive DDLs such as `ALTER TABLE PARTITION ... SET LOCATION` are now available for tables created with the Datasource API.
+
+Note that partition information is not gathered by default when creating external datasource tables (those with a `path` option). To sync the partition information in the metastore, you can invoke `MSCK REPAIR TABLE`.
+
 ## Parquet Files
 
 [Parquet](http://parquet.io) is a columnar format that is supported by many other data processing systems.

From 3483defeb82b8333da238b21229e6a8c82820d48 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Sun, 1 Jan 2017 13:25:44 -0800
Subject: [PATCH 0416/1204] [SPARK-19050][SS][TESTS] Fix
 EventTimeWatermarkSuite 'delay in months and years handled correctly'

## What changes were proposed in this pull request?

`monthsSinceEpoch` in this test is like `math.floor(num)`, so `monthDiff` has two possible values.

## How was this patch tested?

Jenkins.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16449 from zsxwing/watermark-test-hotfix.

(cherry picked from commit 2394047370d2d93bd8bc57b996fee47465c470af)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../apache/spark/sql/streaming/EventTimeWatermarkSuite.scala | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
index 23f51ff11d903..c34d119734cc0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
@@ -190,7 +190,10 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Loggin
       assertEventStats { e =>
         assert(timestampFormat.parse(e.get("max")).getTime === (currentTimeMs / 1000) * 1000)
         val watermarkTime = timestampFormat.parse(e.get("watermark"))
-        assert(monthsSinceEpoch(currentTime) - monthsSinceEpoch(watermarkTime) === 29)
+        val monthDiff = monthsSinceEpoch(currentTime) - monthsSinceEpoch(watermarkTime)
+        // monthsSinceEpoch is like `math.floor(num)`, so monthDiff has two possible values.
+        assert(monthDiff === 29 || monthDiff === 30,
+          s"currentTime: $currentTime, watermarkTime: $watermarkTime")
       }
     )
   }

From 63857c8d30ceef9bf998659fc12ea8872c0f36ea Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 2 Jan 2017 14:41:57 +0000
Subject: [PATCH 0417/1204] [MINOR][DOC] Minor doc change for YARN credential
 providers

## What changes were proposed in this pull request?

The configuration `spark.yarn.security.tokens.{service}.enabled` is deprecated. Now we should use `spark.yarn.security.credentials.{service}.enabled`. Some places in the doc is not updated yet.

## How was this patch tested?

N/A. Just doc change.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #16444 from viirya/minor-credential-provider-doc.

(cherry picked from commit 0ac2f1e71f62ec925ed0e19c4654759d155efc35)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/running-on-yarn.md                                     | 6 +++---
 .../yarn/security/ConfigurableCredentialManager.scala       | 2 ++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index d4144c86e94cf..a0729757b7e1e 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -506,7 +506,7 @@ Spark supports integrating with other security-aware services through Java Servi
 `java.util.ServiceLoader`). To do that, implementations of `org.apache.spark.deploy.yarn.security.ServiceCredentialProvider`
 should be available to Spark by listing their names in the corresponding file in the jar's
 `META-INF/services` directory. These plug-ins can be disabled by setting
-`spark.yarn.security.tokens.{service}.enabled` to `false`, where `{service}` is the name of
+`spark.yarn.security.credentials.{service}.enabled` to `false`, where `{service}` is the name of
 credential provider.
 
 ## Configuring the External Shuffle Service
@@ -570,8 +570,8 @@ the Spark configuration must be set to disable token collection for the services
 The Spark configuration must include the lines:
 
 ```
-spark.yarn.security.tokens.hive.enabled   false
-spark.yarn.security.tokens.hbase.enabled  false
+spark.yarn.security.credentials.hive.enabled   false
+spark.yarn.security.credentials.hbase.enabled  false
 ```
 
 The configuration option `spark.yarn.access.namenodes` must be unset.
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/ConfigurableCredentialManager.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/ConfigurableCredentialManager.scala
index c4c07b49301f6..933736bd22714 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/ConfigurableCredentialManager.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/ConfigurableCredentialManager.scala
@@ -38,6 +38,8 @@ import org.apache.spark.util.Utils
  *
  * Also each credential provider is controlled by
  * spark.yarn.security.credentials.{service}.enabled, it will not be loaded in if set to false.
+ * For example, Hive's credential provider [[HiveCredentialProvider]] can be enabled/disabled by
+ * the configuration spark.yarn.security.credentials.hive.enabled.
  */
 private[yarn] final class ConfigurableCredentialManager(
     sparkConf: SparkConf, hadoopConf: Configuration) extends Logging {

From 517f39833cf789b536defe5ba4b010828d24831f Mon Sep 17 00:00:00 2001
From: "genmao.ygm" <genmao.ygm@genmaoygmdeMacBook-Air.local>
Date: Tue, 15 Nov 2016 10:32:43 -0800
Subject: [PATCH 0418/1204] [SPARK-18379][SQL] Make the parallelism of
 parallelPartitionDiscovery configurable.

## What changes were proposed in this pull request?

The largest parallelism in PartitioningAwareFileIndex #listLeafFilesInParallel() is 10000 in hard code. We may need to make this number configurable. And in PR, I reduce it to 100.

## How was this patch tested?

Existing ut.

Author: genmao.ygm <genmao.ygm@genmaoygmdeMacBook-Air.local>
Author: dylon <hustyugm@gmail.com>

Closes #15829 from uncleGen/SPARK-18379.

(cherry picked from commit 745ab8bc50da89c42b297de9dcb833e5f2074481)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../datasources/PartitioningAwareFileIndex.scala      |  4 +++-
 .../scala/org/apache/spark/sql/internal/SQLConf.scala | 11 +++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
index f22b55bb0465e..825a0f70dda64 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
@@ -309,10 +309,12 @@ object PartitioningAwareFileIndex extends Logging {
     val sparkContext = sparkSession.sparkContext
     val serializableConfiguration = new SerializableConfiguration(hadoopConf)
     val serializedPaths = paths.map(_.toString)
+    val parallelPartitionDiscoveryParallelism =
+      sparkSession.sessionState.conf.parallelPartitionDiscoveryParallelism
 
     // Set the number of parallelism to prevent following file listing from generating many tasks
     // in case of large #defaultParallelism.
-    val numParallelism = Math.min(paths.size, 10000)
+    val numParallelism = Math.min(paths.size, parallelPartitionDiscoveryParallelism)
 
     val statusMap = sparkContext
       .parallelize(serializedPaths, numParallelism)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 5454be4c01f19..8fbad60c8d846 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -391,6 +391,14 @@ object SQLConf {
       .intConf
       .createWithDefault(32)
 
+  val PARALLEL_PARTITION_DISCOVERY_PARALLELISM =
+    SQLConfigBuilder("spark.sql.sources.parallelPartitionDiscovery.parallelism")
+      .doc("The number of parallelism to list a collection of path recursively, Set the " +
+        "number to prevent file listing from generating too many tasks.")
+      .internal()
+      .intConf
+      .createWithDefault(10000)
+
   // Whether to automatically resolve ambiguity in join conditions for self-joins.
   // See SPARK-6231.
   val DATAFRAME_SELF_JOIN_AUTO_RESOLVE_AMBIGUITY =
@@ -769,6 +777,9 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging {
   def parallelPartitionDiscoveryThreshold: Int =
     getConf(SQLConf.PARALLEL_PARTITION_DISCOVERY_THRESHOLD)
 
+  def parallelPartitionDiscoveryParallelism: Int =
+    getConf(SQLConf.PARALLEL_PARTITION_DISCOVERY_PARALLELISM)
+
   def bucketingEnabled: Boolean = getConf(SQLConf.BUCKETING_ENABLED)
 
   def dataFrameSelfJoinAutoResolveAmbiguity: Boolean =

From d489e1dc7ecf7cf081141d3f45f86c39fc3db1fe Mon Sep 17 00:00:00 2001
From: Liwei Lin <lwlin7@gmail.com>
Date: Mon, 2 Jan 2017 14:40:06 +0000
Subject: [PATCH 0419/1204] [SPARK-19041][SS] Fix code snippet compilation
 issues in Structured Streaming Programming Guide

## What changes were proposed in this pull request?

Currently some code snippets in the programming guide just do not compile. We should fix them.

## How was this patch tested?

```
SKIP_API=1 jekyll build
```

## Screenshot from part of the change:

![snip20161231_37](https://cloud.githubusercontent.com/assets/15843379/21576864/cc52fcd8-cf7b-11e6-8bd6-f935d9ff4a6b.png)

Author: Liwei Lin <lwlin7@gmail.com>

Closes #16442 from lw-lin/ss-pro-guide-.
---
 .../structured-streaming-programming-guide.md | 87 +++++++++++--------
 1 file changed, 51 insertions(+), 36 deletions(-)

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index 3b7d0c400317a..799f636505b3e 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -537,9 +537,9 @@ Most of the common operations on DataFrame/Dataset are supported for streaming.
 <div data-lang="scala"  markdown="1">
 
 {% highlight scala %}
-case class DeviceData(device: String, type: String, signal: Double, time: DateTime)
+case class DeviceData(device: String, deviceType: String, signal: Double, time: DateTime)
 
-val df: DataFrame = ... // streaming DataFrame with IOT device data with schema { device: string, type: string, signal: double, time: string }
+val df: DataFrame = ... // streaming DataFrame with IOT device data with schema { device: string, deviceType: string, signal: double, time: string }
 val ds: Dataset[DeviceData] = df.as[DeviceData]    // streaming Dataset with IOT device data
 
 // Select the devices which have signal more than 10
@@ -547,11 +547,11 @@ df.select("device").where("signal > 10")      // using untyped APIs
 ds.filter(_.signal > 10).map(_.device)         // using typed APIs
 
 // Running count of the number of updates for each device type
-df.groupBy("type").count()                          // using untyped API
+df.groupBy("deviceType").count()                          // using untyped API
 
 // Running average signal for each device type
-import org.apache.spark.sql.expressions.scalalang.typed._
-ds.groupByKey(_.type).agg(typed.avg(_.signal))    // using typed API
+import org.apache.spark.sql.expressions.scalalang.typed
+ds.groupByKey(_.deviceType).agg(typed.avg(_.signal))    // using typed API
 {% endhighlight %}
 
 </div>
@@ -565,7 +565,7 @@ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder;
 
 public class DeviceData {
   private String device;
-  private String type;
+  private String deviceType;
   private Double signal;
   private java.sql.Date time;
   ...
@@ -590,13 +590,13 @@ ds.filter(new FilterFunction<DeviceData>() { // using typed APIs
 }, Encoders.STRING());
 
 // Running count of the number of updates for each device type
-df.groupBy("type").count(); // using untyped API
+df.groupBy("deviceType").count(); // using untyped API
 
 // Running average signal for each device type
 ds.groupByKey(new MapFunction<DeviceData, String>() { // using typed API
   @Override
   public String call(DeviceData value) throws Exception {
-    return value.getType();
+    return value.getDeviceType();
   }
 }, Encoders.STRING()).agg(typed.avg(new MapFunction<DeviceData, Double>() {
   @Override
@@ -611,13 +611,13 @@ ds.groupByKey(new MapFunction<DeviceData, String>() { // using typed API
 <div data-lang="python"  markdown="1">
 
 {% highlight python %}
-df = ...  # streaming DataFrame with IOT device data with schema { device: string, type: string, signal: double, time: DateType }
+df = ...  # streaming DataFrame with IOT device data with schema { device: string, deviceType: string, signal: double, time: DateType }
 
 # Select the devices which have signal more than 10
 df.select("device").where("signal > 10")                              
 
 # Running count of the number of updates for each device type
-df.groupBy("type").count()
+df.groupBy("deviceType").count()
 {% endhighlight %}
 </div>
 </div>
@@ -973,7 +973,7 @@ Here is a table of all the sinks, and the corresponding settings.
   <tr>
     <td><b>File Sink</b></td>
     <td>Append</td>
-    <td><pre>writeStream<br/>  .format("parquet")<br/>  .start()</pre></td>
+    <td><pre>writeStream<br/>  .format("parquet")<br/>  .option(<br/>    "checkpointLocation",<br/>    "path/to/checkpoint/dir")<br/>  .option(<br/>    "path",<br/>    "path/to/destination/dir")<br/>  .start()</pre></td>
     <td>Yes</td>
     <td>Supports writes to partitioned tables. Partitioning by time may be useful.</td>
   </tr>
@@ -1026,7 +1026,9 @@ noAggDF
 // Write new data to Parquet files
 noAggDF
   .writeStream
-  .parquet("path/to/destination/directory")
+  .format("parquet")
+  .option("checkpointLocation", "path/to/checkpoint/dir")
+  .option("path", "path/to/destination/dir")
   .start()
    
 // ========== DF with aggregation ==========
@@ -1066,7 +1068,9 @@ noAggDF
 // Write new data to Parquet files
 noAggDF
   .writeStream()
-  .parquet("path/to/destination/directory")
+  .format("parquet")
+  .option("checkpointLocation", "path/to/checkpoint/dir")
+  .option("path", "path/to/destination/dir")
   .start();
    
 // ========== DF with aggregation ==========
@@ -1106,7 +1110,9 @@ noAggDF \
 # Write new data to Parquet files
 noAggDF \
     .writeStream() \
-    .parquet("path/to/destination/directory") \
+    .format("parquet") \
+    .option("checkpointLocation", "path/to/checkpoint/dir") \
+    .option("path", "path/to/destination/dir") \
     .start()
    
 # ========== DF with aggregation ==========
@@ -1120,11 +1126,11 @@ aggDF \
     .start()
 
 # Have all the aggregates in an in memory table. The query name will be the table name
-aggDF\
-    .writeStream()\
-    .queryName("aggregates")\
-    .outputMode("complete")\
-    .format("memory")\
+aggDF \
+    .writeStream() \
+    .queryName("aggregates") \
+    .outputMode("complete") \
+    .format("memory") \
     .start()
 
 spark.sql("select * from aggregates").show()   # interactively query in-memory table
@@ -1159,7 +1165,9 @@ The `StreamingQuery` object created when a query is started can be used to monit
 {% highlight scala %}
 val query = df.writeStream.format("console").start()   // get the query object
 
-query.id          // get the unique identifier of the running query
+query.id          // get the unique identifier of the running query that persists across restarts from checkpoint data
+
+query.runId       // get the unique id of this run of the query, which will be generated at every start/restart
 
 query.name        // get the name of the auto-generated or user-specified name
 
@@ -1169,11 +1177,11 @@ query.stop()      // stop the query
 
 query.awaitTermination()   // block until query is terminated, with stop() or with error
 
-query.exception()    // the exception if the query has been terminated with error
+query.exception       // the exception if the query has been terminated with error
 
-query.sourceStatus()  // progress information about data has been read from the input sources
+query.recentProgress  // an array of the most recent progress updates for this query
 
-query.sinkStatus()   // progress information about data written to the output sink
+query.lastProgress    // the most recent progress update of this streaming query
 {% endhighlight %}
 
 
@@ -1183,21 +1191,23 @@ query.sinkStatus()   // progress information about data written to the output si
 {% highlight java %}
 StreamingQuery query = df.writeStream().format("console").start();   // get the query object
 
-query.id();          // get the unique identifier of the running query
+query.id();          // get the unique identifier of the running query that persists across restarts from checkpoint data
+
+query.runId();       // get the unique id of this run of the query, which will be generated at every start/restart
 
 query.name();        // get the name of the auto-generated or user-specified name
 
 query.explain();   // print detailed explanations of the query
 
-query.stop();      // stop the query 
+query.stop();      // stop the query
 
 query.awaitTermination();   // block until query is terminated, with stop() or with error
 
-query.exception();    // the exception if the query has been terminated with error
+query.exception();       // the exception if the query has been terminated with error
 
-query.sourceStatus();  // progress information about data has been read from the input sources
+query.recentProgress();  // an array of the most recent progress updates for this query
 
-query.sinkStatus();   // progress information about data written to the output sink
+query.lastProgress();    // the most recent progress update of this streaming query
 
 {% endhighlight %}
 
@@ -1207,7 +1217,9 @@ query.sinkStatus();   // progress information about data written to the output s
 {% highlight python %}
 query = df.writeStream().format("console").start()   # get the query object
 
-query.id()          # get the unique identifier of the running query
+query.id()          # get the unique identifier of the running query that persists across restarts from checkpoint data
+
+query.runId()       # get the unique id of this run of the query, which will be generated at every start/restart
 
 query.name()        # get the name of the auto-generated or user-specified name
 
@@ -1217,11 +1229,11 @@ query.stop()      # stop the query
 
 query.awaitTermination()   # block until query is terminated, with stop() or with error
 
-query.exception()    # the exception if the query has been terminated with error
+query.exception()       # the exception if the query has been terminated with error
 
-query.sourceStatus()  # progress information about data has been read from the input sources
+query.recentProgress()  # an array of the most recent progress updates for this query
 
-query.sinkStatus()   # progress information about data written to the output sink
+query.lastProgress()    # the most recent progress update of this streaming query
 
 {% endhighlight %}
 
@@ -1491,14 +1503,17 @@ spark.streams.addListener(new StreamingQueryListener() {
 {% highlight java %}
 SparkSession spark = ...
 
-spark.streams.addListener(new StreamingQueryListener() {
-    @Overrides void onQueryStarted(QueryStartedEvent queryStarted) {
+spark.streams().addListener(new StreamingQueryListener() {
+    @Override
+    public void onQueryStarted(QueryStartedEvent queryStarted) {
         System.out.println("Query started: " + queryStarted.id());
     }
-    @Overrides void onQueryTerminated(QueryTerminatedEvent queryTerminated) {
+    @Override
+    public void onQueryTerminated(QueryTerminatedEvent queryTerminated) {
         System.out.println("Query terminated: " + queryTerminated.id());
     }
-    @Overrides void onQueryProgress(QueryProgressEvent queryProgress) {
+    @Override
+    public void onQueryProgress(QueryProgressEvent queryProgress) {
         System.out.println("Query made progress: " + queryProgress.progress());
     }
 });

From 94272a9600405442bfe485b17e55a84b85c25da3 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Sat, 31 Dec 2016 19:40:28 +0800
Subject: [PATCH 0420/1204] [SPARK-19028][SQL] Fixed non-thread-safe functions
 used in SessionCatalog

### What changes were proposed in this pull request?
Fixed non-thread-safe functions used in SessionCatalog:
- refreshTable
- lookupRelation

### How was this patch tested?
N/A

Author: gatorsmile <gatorsmile@gmail.com>

Closes #16437 from gatorsmile/addSyncToLookUpTable.

(cherry picked from commit 35e974076dcbc5afde8d4259ce88cb5f29d94920)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/catalog/SessionCatalog.scala |  2 +-
 .../spark/sql/hive/HiveSessionCatalog.scala   | 36 ++++++++++---------
 2 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 7a3d2097a85c5..dd8e46da4555a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -632,7 +632,7 @@ class SessionCatalog(
   /**
    * Refresh the cache entry for a metastore table, if any.
    */
-  def refreshTable(name: TableIdentifier): Unit = {
+  def refreshTable(name: TableIdentifier): Unit = synchronized {
     // Go through temporary tables and invalidate them.
     // If the database is defined, this is definitely not a temp table.
     // If the database is not defined, there is a good chance this is a temp table.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 08bf1cd0efbb9..462b3c2686c6a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -56,23 +56,25 @@ private[sql] class HiveSessionCatalog(
     hadoopConf) {
 
   override def lookupRelation(name: TableIdentifier, alias: Option[String]): LogicalPlan = {
-    val table = formatTableName(name.table)
-    val db = formatDatabaseName(name.database.getOrElse(currentDb))
-    if (db == globalTempViewManager.database) {
-      val relationAlias = alias.getOrElse(table)
-      globalTempViewManager.get(table).map { viewDef =>
-        SubqueryAlias(relationAlias, viewDef, Some(name))
-      }.getOrElse(throw new NoSuchTableException(db, table))
-    } else if (name.database.isDefined || !tempTables.contains(table)) {
-      val database = name.database.map(formatDatabaseName)
-      val newName = name.copy(database = database, table = table)
-      metastoreCatalog.lookupRelation(newName, alias)
-    } else {
-      val relation = tempTables(table)
-      val tableWithQualifiers = SubqueryAlias(table, relation, None)
-      // If an alias was specified by the lookup, wrap the plan in a subquery so that
-      // attributes are properly qualified with this alias.
-      alias.map(a => SubqueryAlias(a, tableWithQualifiers, None)).getOrElse(tableWithQualifiers)
+    synchronized {
+      val table = formatTableName(name.table)
+      val db = formatDatabaseName(name.database.getOrElse(currentDb))
+      if (db == globalTempViewManager.database) {
+        val relationAlias = alias.getOrElse(table)
+        globalTempViewManager.get(table).map { viewDef =>
+          SubqueryAlias(relationAlias, viewDef, Some(name))
+        }.getOrElse(throw new NoSuchTableException(db, table))
+      } else if (name.database.isDefined || !tempTables.contains(table)) {
+        val database = name.database.map(formatDatabaseName)
+        val newName = name.copy(database = database, table = table)
+        metastoreCatalog.lookupRelation(newName, alias)
+      } else {
+        val relation = tempTables(table)
+        val tableWithQualifiers = SubqueryAlias(table, relation, None)
+        // If an alias was specified by the lookup, wrap the plan in a subquery so that
+        // attributes are properly qualified with this alias.
+        alias.map(a => SubqueryAlias(a, tableWithQualifiers, None)).getOrElse(tableWithQualifiers)
+      }
     }
   }
 

From 776255065c13df7b4505c225546b4b66cd929c76 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Tue, 3 Jan 2017 11:43:47 -0800
Subject: [PATCH 0421/1204] [SPARK-19048][SQL] Delete Partition Location when
 Dropping Managed Partitioned Tables in InMemoryCatalog

### What changes were proposed in this pull request?
The data in the managed table should be deleted after table is dropped. However, if the partition location is not under the location of the partitioned table, it is not deleted as expected. Users can specify any location for the partition when they adding a partition.

This PR is to delete partition location when dropping managed partitioned tables stored in `InMemoryCatalog`.

### How was this patch tested?
Added test cases for both HiveExternalCatalog and InMemoryCatalog

Author: gatorsmile <gatorsmile@gmail.com>

Closes #16448 from gatorsmile/unsetSerdeProp.

(cherry picked from commit b67b35f76b684c5176dc683e7491fd01b43f4467)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../catalyst/catalog/InMemoryCatalog.scala    | 13 +++++
 .../catalog/ExternalCatalogSuite.scala        | 48 ++++++++++++++--
 .../spark/sql/execution/command/ddl.scala     |  5 +-
 .../sql/hive/execution/HiveDDLSuite.scala     | 56 ++++++++++++++++++-
 4 files changed, 113 insertions(+), 9 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
index a6bebe1a3938c..9a6c732ea6971 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
@@ -229,9 +229,22 @@ class InMemoryCatalog(
     if (tableExists(db, table)) {
       val tableMeta = getTable(db, table)
       if (tableMeta.tableType == CatalogTableType.MANAGED) {
+        // Delete the data/directory for each partition
+        val locationAllParts = catalog(db).tables(table).partitions.values.toSeq.map(_.location)
+        locationAllParts.foreach { loc =>
+          val partitionPath = new Path(loc)
+          try {
+            val fs = partitionPath.getFileSystem(hadoopConfig)
+            fs.delete(partitionPath, true)
+          } catch {
+            case e: IOException =>
+              throw new SparkException(s"Unable to delete partition path $partitionPath", e)
+          }
+        }
         assert(tableMeta.storage.locationUri.isDefined,
           "Managed table should always have table location, as we will assign a default location " +
             "to it if it doesn't have one.")
+        // Delete the data/directory of the table
         val dir = new Path(tableMeta.location)
         try {
           val fs = dir.getFileSystem(hadoopConfig)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
index 00e663c324cb4..9d20602ef81cd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
@@ -324,7 +324,7 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
     val table = CatalogTable(
       identifier = TableIdentifier("tbl", Some("db1")),
       tableType = CatalogTableType.MANAGED,
-      storage = CatalogStorageFormat(None, None, None, None, false, Map.empty),
+      storage = CatalogStorageFormat.empty,
       schema = new StructType()
         .add("col1", "int")
         .add("col2", "string")
@@ -346,6 +346,46 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
     assert(new Path(partitionLocation) == defaultPartitionLocation)
   }
 
+  test("create/drop partitions in managed tables with location") {
+    val catalog = newBasicCatalog()
+    val table = CatalogTable(
+      identifier = TableIdentifier("tbl", Some("db1")),
+      tableType = CatalogTableType.MANAGED,
+      storage = CatalogStorageFormat.empty,
+      schema = new StructType()
+        .add("col1", "int")
+        .add("col2", "string")
+        .add("partCol1", "int")
+        .add("partCol2", "string"),
+      provider = Some("hive"),
+      partitionColumnNames = Seq("partCol1", "partCol2"))
+    catalog.createTable(table, ignoreIfExists = false)
+
+    val newLocationPart1 = newUriForDatabase()
+    val newLocationPart2 = newUriForDatabase()
+
+    val partition1 =
+      CatalogTablePartition(Map("partCol1" -> "1", "partCol2" -> "2"),
+        storageFormat.copy(locationUri = Some(newLocationPart1)))
+    val partition2 =
+      CatalogTablePartition(Map("partCol1" -> "3", "partCol2" -> "4"),
+        storageFormat.copy(locationUri = Some(newLocationPart2)))
+    catalog.createPartitions("db1", "tbl", Seq(partition1), ignoreIfExists = false)
+    catalog.createPartitions("db1", "tbl", Seq(partition2), ignoreIfExists = false)
+
+    assert(exists(newLocationPart1))
+    assert(exists(newLocationPart2))
+
+    // the corresponding directory is dropped.
+    catalog.dropPartitions("db1", "tbl", Seq(partition1.spec),
+      ignoreIfNotExists = false, purge = false, retainData = false)
+    assert(!exists(newLocationPart1))
+
+    // all the remaining directories are dropped.
+    catalog.dropTable("db1", "tbl", ignoreIfNotExists = false, purge = false)
+    assert(!exists(newLocationPart2))
+  }
+
   test("list partition names") {
     val catalog = newBasicCatalog()
     val newPart = CatalogTablePartition(Map("a" -> "1", "b" -> "%="), storageFormat)
@@ -459,7 +499,7 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
     val table = CatalogTable(
       identifier = TableIdentifier("tbl", Some("db1")),
       tableType = CatalogTableType.MANAGED,
-      storage = CatalogStorageFormat(None, None, None, None, false, Map.empty),
+      storage = CatalogStorageFormat.empty,
       schema = new StructType()
         .add("col1", "int")
         .add("col2", "string")
@@ -684,7 +724,7 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
     val table = CatalogTable(
       identifier = TableIdentifier("my_table", Some("db1")),
       tableType = CatalogTableType.MANAGED,
-      storage = CatalogStorageFormat(None, None, None, None, false, Map.empty),
+      storage = CatalogStorageFormat.empty,
       schema = new StructType().add("a", "int").add("b", "string"),
       provider = Some("hive")
     )
@@ -717,7 +757,7 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
     val table = CatalogTable(
       identifier = TableIdentifier("tbl", Some("db1")),
       tableType = CatalogTableType.MANAGED,
-      storage = CatalogStorageFormat(None, None, None, None, false, Map.empty),
+      storage = CatalogStorageFormat.empty,
       schema = new StructType()
         .add("col1", "int")
         .add("col2", "string")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index c62c14200c24a..b1bb56570ceec 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -329,13 +329,12 @@ case class AlterTableSerDePropertiesCommand(
 /**
  * Add Partition in ALTER TABLE: add the table partitions.
  *
- * 'partitionSpecsAndLocs': the syntax of ALTER VIEW is identical to ALTER TABLE,
- * EXCEPT that it is ILLEGAL to specify a LOCATION clause.
  * An error message will be issued if the partition exists, unless 'ifNotExists' is true.
  *
  * The syntax of this command is:
  * {{{
- *   ALTER TABLE table ADD [IF NOT EXISTS] PARTITION spec [LOCATION 'loc1']
+ *   ALTER TABLE table ADD [IF NOT EXISTS] PARTITION spec1 [LOCATION 'loc1']
+ *                                         PARTITION spec2 [LOCATION 'loc2']
  * }}}
  */
 case class AlterTableAddPartitionCommand(
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index f313db641b152..8b34219530259 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -199,6 +199,52 @@ class HiveDDLSuite
     assert(e.message == "Found duplicate column(s) in table definition of `tbl`: a")
   }
 
+  test("add/drop partition with location - managed table") {
+    val tab = "tab_with_partitions"
+    withTempDir { tmpDir =>
+      val basePath = new File(tmpDir.getCanonicalPath)
+      val part1Path = new File(basePath + "/part1")
+      val part2Path = new File(basePath + "/part2")
+      val dirSet = part1Path :: part2Path :: Nil
+
+      // Before data insertion, all the directory are empty
+      assert(dirSet.forall(dir => dir.listFiles == null || dir.listFiles.isEmpty))
+
+      withTable(tab) {
+        sql(
+          s"""
+             |CREATE TABLE $tab (key INT, value STRING)
+             |PARTITIONED BY (ds STRING, hr STRING)
+           """.stripMargin)
+        sql(
+          s"""
+             |ALTER TABLE $tab ADD
+             |PARTITION (ds='2008-04-08', hr=11) LOCATION '$part1Path'
+             |PARTITION (ds='2008-04-08', hr=12) LOCATION '$part2Path'
+           """.stripMargin)
+        assert(dirSet.forall(dir => dir.listFiles == null || dir.listFiles.isEmpty))
+
+        sql(s"INSERT OVERWRITE TABLE $tab partition (ds='2008-04-08', hr=11) SELECT 1, 'a'")
+        sql(s"INSERT OVERWRITE TABLE $tab partition (ds='2008-04-08', hr=12) SELECT 2, 'b'")
+        // add partition will not delete the data
+        assert(dirSet.forall(dir => dir.listFiles.nonEmpty))
+        checkAnswer(
+          spark.table(tab),
+          Row(1, "a", "2008-04-08", "11") :: Row(2, "b", "2008-04-08", "12") :: Nil
+        )
+
+        sql(s"ALTER TABLE $tab DROP PARTITION (ds='2008-04-08', hr=11)")
+        // drop partition will delete the data
+        assert(part1Path.listFiles == null || part1Path.listFiles.isEmpty)
+        assert(part2Path.listFiles.nonEmpty)
+
+        sql(s"DROP TABLE $tab")
+        // drop table will delete the data of the managed table
+        assert(dirSet.forall(dir => dir.listFiles == null || dir.listFiles.isEmpty))
+      }
+    }
+  }
+
   test("add/drop partitions - external table") {
     val catalog = spark.sessionState.catalog
     withTempDir { tmpDir =>
@@ -257,9 +303,15 @@ class HiveDDLSuite
         // drop partition will not delete the data of external table
         assert(dirSet.forall(dir => dir.listFiles.nonEmpty))
 
-        sql(s"ALTER TABLE $externalTab ADD PARTITION (ds='2008-04-08', hr='12')")
+        sql(
+          s"""
+             |ALTER TABLE $externalTab ADD PARTITION (ds='2008-04-08', hr='12')
+             |PARTITION (ds='2008-04-08', hr=11)
+          """.stripMargin)
         assert(catalog.listPartitions(TableIdentifier(externalTab)).map(_.spec).toSet ==
-          Set(Map("ds" -> "2008-04-08", "hr" -> "12"), Map("ds" -> "2008-04-09", "hr" -> "11")))
+          Set(Map("ds" -> "2008-04-08", "hr" -> "11"),
+            Map("ds" -> "2008-04-08", "hr" -> "12"),
+            Map("ds" -> "2008-04-09", "hr" -> "11")))
         // add partition will not delete the data
         assert(dirSet.forall(dir => dir.listFiles.nonEmpty))
 

From 1ecf1a953ee0f0f0925bb8a3df54d3e762116f1a Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Wed, 4 Jan 2017 09:56:11 -0800
Subject: [PATCH 0422/1204] [SPARK-18877][SQL][BACKPORT-2.1]
 CSVInferSchema.inferField` on DecimalType should find a common type with
 `typeSoFar`

## What changes were proposed in this pull request?

CSV type inferencing causes `IllegalArgumentException` on decimal numbers with heterogeneous precisions and scales because the current logic uses the last decimal type in a **partition**. Specifically, `inferRowType`, the **seqOp** of **aggregate**, returns the last decimal type. This PR fixes it to use `findTightestCommonType`.

**decimal.csv**
```
9.03E+12
1.19E+11
```

**BEFORE**
```scala
scala> spark.read.format("csv").option("inferSchema", true).load("decimal.csv").printSchema
root
 |-- _c0: decimal(3,-9) (nullable = true)

scala> spark.read.format("csv").option("inferSchema", true).load("decimal.csv").show
16/12/16 14:32:49 ERROR Executor: Exception in task 0.0 in stage 4.0 (TID 4)
java.lang.IllegalArgumentException: requirement failed: Decimal precision 4 exceeds max precision 3
```

**AFTER**
```scala
scala> spark.read.format("csv").option("inferSchema", true).load("decimal.csv").printSchema
root
 |-- _c0: decimal(4,-9) (nullable = true)

scala> spark.read.format("csv").option("inferSchema", true).load("decimal.csv").show
+---------+
|      _c0|
+---------+
|9.030E+12|
| 1.19E+11|
+---------+
```

## How was this patch tested?

Pass the newly add test case.

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #16463 from dongjoon-hyun/SPARK-18877-BACKPORT-21.
---
 .../datasources/csv/CSVInferSchema.scala        |  4 +++-
 .../datasources/csv/CSVInferSchemaSuite.scala   | 17 +++++++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala
index c63aae9d83855..49a991f6ffa2d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala
@@ -85,7 +85,9 @@ private[csv] object CSVInferSchema {
         case NullType => tryParseInteger(field, options)
         case IntegerType => tryParseInteger(field, options)
         case LongType => tryParseLong(field, options)
-        case _: DecimalType => tryParseDecimal(field, options)
+        case _: DecimalType =>
+          // DecimalTypes have different precisions and scales, so we try to find the common type.
+          findTightestCommonType(typeSoFar, tryParseDecimal(field, options)).getOrElse(StringType)
         case DoubleType => tryParseDouble(field, options)
         case TimestampType => tryParseTimestamp(field, options)
         case BooleanType => tryParseBoolean(field, options)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala
index 93f752d107ca3..8620bb9f65b97 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala
@@ -114,4 +114,21 @@ class CSVInferSchemaSuite extends SparkFunSuite {
     val options = new CSVOptions(Map("TiMeStampFormat" -> "yyyy-mm"))
     assert(CSVInferSchema.inferField(TimestampType, "2015-08", options) == TimestampType)
   }
+
+  test("SPARK-18877: `inferField` on DecimalType should find a common type with `typeSoFar`") {
+    val options = new CSVOptions(Map.empty[String, String])
+
+    // 9.03E+12 is Decimal(3, -10) and 1.19E+11 is Decimal(3, -9).
+    assert(CSVInferSchema.inferField(DecimalType(3, -10), "1.19E+11", options) ==
+      DecimalType(4, -9))
+
+    // BigDecimal("12345678901234567890.01234567890123456789") is precision 40 and scale 20.
+    val value = "12345678901234567890.01234567890123456789"
+    assert(CSVInferSchema.inferField(DecimalType(3, -10), value, options) == DoubleType)
+
+    // Seq(s"${Long.MaxValue}1", "2015-12-01 00:00:00") should be StringType
+    assert(CSVInferSchema.inferField(NullType, s"${Long.MaxValue}1", options) == DecimalType(20, 0))
+    assert(CSVInferSchema.inferField(DecimalType(20, 0), "2015-12-01 00:00:00", options)
+      == StringType)
+  }
 }

From 4ca1788805e4a0131ba8f0ccb7499ee0e0242837 Mon Sep 17 00:00:00 2001
From: jerryshao <sshao@hortonworks.com>
Date: Fri, 6 Jan 2017 10:07:54 -0600
Subject: [PATCH 0423/1204] [SPARK-19033][CORE] Add admin acls for history
 server

## What changes were proposed in this pull request?

Current HistoryServer's ACLs is derived from application event-log, which means the newly changed ACLs cannot be applied to the old data, this will become a problem where newly added admin cannot access the old application history UI, only the new application can be affected.

So here propose to add admin ACLs for history server, any configured user/group could have the view access to all the applications, while the view ACLs derived from application run-time still take effect.

## How was this patch tested?

Unit test added.

Author: jerryshao <sshao@hortonworks.com>

Closes #16470 from jerryshao/SPARK-19033.

(cherry picked from commit 4a4c3dc9ca10e52f7981b225ec44e97247986905)
Signed-off-by: Tom Graves <tgraves@yahoo-inc.com>
---
 .../deploy/history/FsHistoryProvider.scala    |  20 +++-
 .../history/FsHistoryProviderSuite.scala      | 111 +++++++++++++++++-
 docs/monitoring.md                            |  22 ++++
 3 files changed, 146 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 8ef69b142cd15..2bd019c53aae1 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -97,6 +97,13 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     .map { d => Utils.resolveURI(d).toString }
     .getOrElse(DEFAULT_LOG_DIR)
 
+  private val HISTORY_UI_ACLS_ENABLE = conf.getBoolean("spark.history.ui.acls.enable", false)
+  private val HISTORY_UI_ADMIN_ACLS = conf.get("spark.history.ui.admin.acls", "")
+  private val HISTORY_UI_ADMIN_ACLS_GROUPS = conf.get("spark.history.ui.admin.acls.groups", "")
+  logInfo(s"History server ui acls " + (if (HISTORY_UI_ACLS_ENABLE) "enabled" else "disabled") +
+    "; users with admin permissions: " + HISTORY_UI_ADMIN_ACLS.toString +
+    "; groups with admin permissions" + HISTORY_UI_ADMIN_ACLS_GROUPS.toString)
+
   private val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
   private val fs = Utils.getHadoopFileSystem(logDir, hadoopConf)
 
@@ -250,13 +257,14 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
           val appListener = replay(fileStatus, isApplicationCompleted(fileStatus), replayBus)
 
           if (appListener.appId.isDefined) {
-            val uiAclsEnabled = conf.getBoolean("spark.history.ui.acls.enable", false)
-            ui.getSecurityManager.setAcls(uiAclsEnabled)
+            ui.getSecurityManager.setAcls(HISTORY_UI_ACLS_ENABLE)
             // make sure to set admin acls before view acls so they are properly picked up
-            ui.getSecurityManager.setAdminAcls(appListener.adminAcls.getOrElse(""))
-            ui.getSecurityManager.setViewAcls(attempt.sparkUser,
-              appListener.viewAcls.getOrElse(""))
-            ui.getSecurityManager.setAdminAclsGroups(appListener.adminAclsGroups.getOrElse(""))
+            val adminAcls = HISTORY_UI_ADMIN_ACLS + "," + appListener.adminAcls.getOrElse("")
+            ui.getSecurityManager.setAdminAcls(adminAcls)
+            ui.getSecurityManager.setViewAcls(attempt.sparkUser, appListener.viewAcls.getOrElse(""))
+            val adminAclsGroups = HISTORY_UI_ADMIN_ACLS_GROUPS + "," +
+              appListener.adminAclsGroups.getOrElse("")
+            ui.getSecurityManager.setAdminAclsGroups(adminAclsGroups)
             ui.getSecurityManager.setViewAclsGroups(appListener.viewAclsGroups.getOrElse(""))
             Some(LoadedAppUI(ui, updateProbe(appId, attemptId, attempt.fileSize)))
           } else {
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
index a5eda7b5a5a75..c1a93e1ab73b2 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -35,10 +35,11 @@ import org.scalatest.BeforeAndAfter
 import org.scalatest.Matchers
 import org.scalatest.concurrent.Eventually._
 
-import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
 import org.apache.spark.internal.Logging
 import org.apache.spark.io._
 import org.apache.spark.scheduler._
+import org.apache.spark.security.GroupMappingServiceProvider
 import org.apache.spark.util.{Clock, JsonProtocol, ManualClock, Utils}
 
 class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matchers with Logging {
@@ -428,6 +429,102 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
     }
   }
 
+  test("support history server ui admin acls") {
+    def createAndCheck(conf: SparkConf, properties: (String, String)*)
+      (checkFn: SecurityManager => Unit): Unit = {
+      // Empty the testDir for each test.
+      if (testDir.exists() && testDir.isDirectory) {
+        testDir.listFiles().foreach { f => if (f.isFile) f.delete() }
+      }
+
+      var provider: FsHistoryProvider = null
+      try {
+        provider = new FsHistoryProvider(conf)
+        val log = newLogFile("app1", Some("attempt1"), inProgress = false)
+        writeFile(log, true, None,
+          SparkListenerApplicationStart("app1", Some("app1"), System.currentTimeMillis(),
+            "test", Some("attempt1")),
+          SparkListenerEnvironmentUpdate(Map(
+            "Spark Properties" -> properties.toSeq,
+            "JVM Information" -> Seq.empty,
+            "System Properties" -> Seq.empty,
+            "Classpath Entries" -> Seq.empty
+          )),
+          SparkListenerApplicationEnd(System.currentTimeMillis()))
+
+        provider.checkForLogs()
+        val appUi = provider.getAppUI("app1", Some("attempt1"))
+
+        assert(appUi.nonEmpty)
+        val securityManager = appUi.get.ui.securityManager
+        checkFn(securityManager)
+      } finally {
+        if (provider != null) {
+          provider.stop()
+        }
+      }
+    }
+
+    // Test both history ui admin acls and application acls are configured.
+    val conf1 = createTestConf()
+      .set("spark.history.ui.acls.enable", "true")
+      .set("spark.history.ui.admin.acls", "user1,user2")
+      .set("spark.history.ui.admin.acls.groups", "group1")
+      .set("spark.user.groups.mapping", classOf[TestGroupsMappingProvider].getName)
+
+    createAndCheck(conf1, ("spark.admin.acls", "user"), ("spark.admin.acls.groups", "group")) {
+      securityManager =>
+        // Test whether user has permission to access UI.
+        securityManager.checkUIViewPermissions("user1") should be (true)
+        securityManager.checkUIViewPermissions("user2") should be (true)
+        securityManager.checkUIViewPermissions("user") should be (true)
+        securityManager.checkUIViewPermissions("abc") should be (false)
+
+        // Test whether user with admin group has permission to access UI.
+        securityManager.checkUIViewPermissions("user3") should be (true)
+        securityManager.checkUIViewPermissions("user4") should be (true)
+        securityManager.checkUIViewPermissions("user5") should be (true)
+        securityManager.checkUIViewPermissions("user6") should be (false)
+    }
+
+    // Test only history ui admin acls are configured.
+    val conf2 = createTestConf()
+      .set("spark.history.ui.acls.enable", "true")
+      .set("spark.history.ui.admin.acls", "user1,user2")
+      .set("spark.history.ui.admin.acls.groups", "group1")
+      .set("spark.user.groups.mapping", classOf[TestGroupsMappingProvider].getName)
+    createAndCheck(conf2) { securityManager =>
+      // Test whether user has permission to access UI.
+      securityManager.checkUIViewPermissions("user1") should be (true)
+      securityManager.checkUIViewPermissions("user2") should be (true)
+      // Check the unknown "user" should return false
+      securityManager.checkUIViewPermissions("user") should be (false)
+
+      // Test whether user with admin group has permission to access UI.
+      securityManager.checkUIViewPermissions("user3") should be (true)
+      securityManager.checkUIViewPermissions("user4") should be (true)
+      // Check the "user5" without mapping relation should return false
+      securityManager.checkUIViewPermissions("user5") should be (false)
+    }
+
+    // Test neither history ui admin acls nor application acls are configured.
+     val conf3 = createTestConf()
+      .set("spark.history.ui.acls.enable", "true")
+      .set("spark.user.groups.mapping", classOf[TestGroupsMappingProvider].getName)
+    createAndCheck(conf3) { securityManager =>
+      // Test whether user has permission to access UI.
+      securityManager.checkUIViewPermissions("user1") should be (false)
+      securityManager.checkUIViewPermissions("user2") should be (false)
+      securityManager.checkUIViewPermissions("user") should be (false)
+
+      // Test whether user with admin group has permission to access UI.
+      // Check should be failed since we don't have acl group settings.
+      securityManager.checkUIViewPermissions("user3") should be (false)
+      securityManager.checkUIViewPermissions("user4") should be (false)
+      securityManager.checkUIViewPermissions("user5") should be (false)
+    }
+ }
+
   /**
    * Asks the provider to check for logs and calls a function to perform checks on the updated
    * app list. Example:
@@ -480,3 +577,15 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
   }
 
 }
+
+class TestGroupsMappingProvider extends GroupMappingServiceProvider {
+  private val mappings = Map(
+    "user3" -> "group1",
+    "user4" -> "group1",
+    "user5" -> "group")
+
+  override def getGroups(username: String): Set[String] = {
+    mappings.get(username).map(Set(_)).getOrElse(Set.empty)
+  }
+}
+
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 7a1de52668f1a..bfea572d3c5ca 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -169,6 +169,28 @@ The history server can be configured as follows:
       If disabled, no access control checks are made.
     </td>
   </tr>
+  <tr>
+    <td>spark.history.ui.admin.acls</td>
+    <td>empty</td>
+    <td>
+      Comma separated list of users/administrators that have view access to all the Spark applications in
+      history server. By default only the users permitted to view the application at run-time could
+      access the related application history, with this, configured users/administrators could also
+      have the permission to access it.
+      Putting a "*" in the list means any user can have the privilege of admin.
+    </td>
+  </tr>
+  <tr>
+    <td>spark.history.ui.admin.acls.groups</td>
+    <td>empty</td>
+    <td>
+      Comma separated list of groups that have view access to all the Spark applications in
+      history server. By default only the groups permitted to view the application at run-time could
+      access the related application history, with this, configured groups could also
+      have the permission to access it.
+      Putting a "*" in the list means any group can have the privilege of admin.
+    </td>
+  </tr>
   <tr>
     <td>spark.history.fs.cleaner.enabled</td>
     <td>false</td>

From ce9bfe6db63582d632f7d57cbf37ee7b29135198 Mon Sep 17 00:00:00 2001
From: zuotingbing <zuo.tingbing9@zte.com.cn>
Date: Fri, 6 Jan 2017 09:57:49 -0800
Subject: [PATCH 0424/1204] [SPARK-19083] sbin/start-history-server.sh script
 use of $@ without quotes

JIRA Issue: https://issues.apache.org/jira/browse/SPARK-19083#

sbin/start-history-server.sh script use of $ without quotes, this will affect the length of args which used in HistoryServerArguments::parse(args: List[String])

Author: zuotingbing <zuo.tingbing9@zte.com.cn>

Closes #16484 from zuotingbing/sh.

(cherry picked from commit a9a137377e4cf293325ccd7368698f20b5d6b98a)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 sbin/start-history-server.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sbin/start-history-server.sh b/sbin/start-history-server.sh
index 6851d99b7e8f4..38a43b98c3992 100755
--- a/sbin/start-history-server.sh
+++ b/sbin/start-history-server.sh
@@ -31,4 +31,4 @@ fi
 . "${SPARK_HOME}/sbin/spark-config.sh"
 . "${SPARK_HOME}/bin/load-spark-env.sh"
 
-exec "${SPARK_HOME}/sbin"/spark-daemon.sh start org.apache.spark.deploy.history.HistoryServer 1 $@
+exec "${SPARK_HOME}/sbin"/spark-daemon.sh start org.apache.spark.deploy.history.HistoryServer 1 "$@"

From ee735a8a85d7f015188f7cb31975f60cc969e453 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Fri, 6 Jan 2017 11:29:01 -0800
Subject: [PATCH 0425/1204] [SPARK-19074][SS][DOCS] Updated Structured
 Streaming Programming Guide for update mode and source/sink options

## What changes were proposed in this pull request?

Updates
- Updated Late Data Handling section by adding a figure for Update Mode. Its more intuitive to explain late data handling with Update Mode, so I added the new figure before the Append Mode figure.
- Updated Output Modes section with Update mode
- Added options for all the sources and sinks

---------------------------
---------------------------

![image](https://cloud.githubusercontent.com/assets/663212/21665176/f150b224-d29f-11e6-8372-14d32da21db9.png)

---------------------------
---------------------------
<img width="931" alt="screen shot 2017-01-03 at 6 09 11 pm" src="https://cloud.githubusercontent.com/assets/663212/21629740/d21c9bb8-d1df-11e6-915b-488a59589fa6.png">
<img width="933" alt="screen shot 2017-01-03 at 6 10 00 pm" src="https://cloud.githubusercontent.com/assets/663212/21629749/e22bdabe-d1df-11e6-86d3-7e51d2f28dbc.png">

---------------------------
---------------------------
![image](https://cloud.githubusercontent.com/assets/663212/21665200/108e18fc-d2a0-11e6-8640-af598cab090b.png)
![image](https://cloud.githubusercontent.com/assets/663212/21665148/cfe414fa-d29f-11e6-9baa-4124ccbab093.png)
![image](https://cloud.githubusercontent.com/assets/663212/21665226/2e8f39e4-d2a0-11e6-85b1-7657e2df5491.png)

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #16468 from tdas/SPARK-19074.

(cherry picked from commit b59cddaba01cbdf50dbe8fe7ef7b9913bad9552d)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 ...ctured-streaming-watermark-append-mode.png | Bin 0 -> 249196 bytes
 ...ctured-streaming-watermark-update-mode.png | Bin 0 -> 299141 bytes
 docs/img/structured-streaming-watermark.png   | Bin 252000 -> 0 bytes
 docs/img/structured-streaming.pptx            | Bin 1113902 -> 1126657 bytes
 .../structured-streaming-programming-guide.md | 214 ++++++++++++++----
 .../sql/streaming/DataStreamWriter.scala      |   6 +-
 6 files changed, 166 insertions(+), 54 deletions(-)
 create mode 100644 docs/img/structured-streaming-watermark-append-mode.png
 create mode 100644 docs/img/structured-streaming-watermark-update-mode.png
 delete mode 100644 docs/img/structured-streaming-watermark.png

diff --git a/docs/img/structured-streaming-watermark-append-mode.png b/docs/img/structured-streaming-watermark-append-mode.png
new file mode 100644
index 0000000000000000000000000000000000000000..541d5bf399b76bf93cd382fdec67ceb09b327475
GIT binary patch
literal 249196
zcmeFZg<n)__dW~=N{G@Z-67o#BS?2QNXO9Kp#sv~DJjz3CDIMj-6Gx1yc^>jpYwa4
z^Zf^&kDtNO*?aaKYhCMFYh5>iax$XGh&YH)P*BL?Vy_gSpdOk)LBSfqKLq|q${sxv
z_zT)zK~xZ`co1(3_y>ZGn7Ta_6gJxZ3)&=o6u1QhCI0HAk_+_C{G$|%_1lZX{T<A(
z7vnhY*u&38rgGksmnAE5(th!Vi8Hl0%+r6_mhcQcO9X>ao(YdAX$lkhJzHl+Aci{!
z8mUUQ&h({sOZxy*4Tp2h?wPT(z_A;jjAxo_+Ia<O&2|4VF9|I?kq;Ei|9<>$4gO~b
z|8s-?LBao!@c%@@wF;0pze!pnc2leW3zSJY)e|(R{|X*!p-`bLu8PWrtgl{Y63ptf
z2NQ2{zWdi_L<svzvlWfw#F>BW)~_e?``3$6NDdzHZLq&f_e<GC`Bw)9#lf;qI!u@z
z;F|DH>>knPNHhQYW+TAVY&9=3u9kFBXtmS-`*&l(R*$lpvtEQU-(7j~spR^?LBpbe
zp#IwrqF@;Bua!3s6Th6AsW(0qC;m6b6J{t)Mf_?mW)X9EzBpn}E?ffpuS*g^?rWA5
zwumw|dKE)EA8hz9<ACzb#F~<q)Xn>lBk~+_f$Jt+_%Vly_uu9k0q6TX$$t@QY{JbD
zYflvI-_{T`f_O3+H3>E?kna>|c~#;Vq!Rpw^xx+C9I=1m!mC}fe4Eh7MK<$auN9^)
zO^)L(CY}a{UhfUYa~r}~7xfqXmwP}IFy~;ymNxS7(~-ovgxY@{dl|yc2RNCkmlQ6y
zZ}cBzHU8_x6fohYYp)F?xn~9X$sT+W{;xX^d=Mo3+M3RkJCrHVR)-dgu{ZGFcGHmL
z;GKw!g{inRBtnW=Q0w2UDq|S#Y!W7S76lz^e_HTSmdF1w<YQJlHBw26E`P%);uVsA
z+ZMkUuS2O4SwB}|oaYO|Woi8%U(61xE{RN=NKmsps`hWz2Mvn@`((s17n!lB!yCrz
zzt4?ZBcn2lrS%pS9{%grnL3U>aT@|&1(<ydMQd+36=GIe+XhA-`(Cu!^0oENzbz+G
zM0P7o6Mb1gKY@7XI`h9gQK{7pvy>>p8b&W&6I`hG^1tpV(GiTEfSoA+m|^KRIE$(B
zFXU-1KA!*mn=`3Et5O(XKi(>Gl(V`V+gv)f!T)NruLOy9MEArDL!^j?gFsW8ah-`O
zt(^ZA5>fDdO9|_;qIvAzfNFnUZbK7B5)lTP8us1F7Q*}g9~L|+6*5;RF>EG1Fto4W
z_c!ZRt&Oh73(j~ZeJ;Gs<E47-{QcXXBV7Ny-Bh_Xm1b6gmR&&~Q#A0a6@!(wbLdi#
zV!dF>QBAhjsSsNDkd1#&LlMQ79x(*o)31ufMyA~n#El-<`ANhq_;jrmBO`3Bw{Kxs
z2VvQ!2C<=WOpbDLyG@!AUA!A7$HzxV>2$#n9)GOOu)oiBj71(Y^J<4}c<kVHXZCS1
zrotV5q#8$Pe#>^3>RC(@WQV)}d+$YoZtWM(nahGNlOfLj-8`0L1Ha8nOwecIm@J}}
z%nrsH99AvaJ$_a{MED+oYUhUwywdgIsL9k7T`N6+b=gBKk)f8agmc-*R2A6c<&w}h
z8XxsEO+4Yu)Jl;_5--UT7BfE0BXut7J_-OEb~NJTm*B5Ij(sFVzxF#CWfP%HJRtV_
zrWFJVfPd#{rFlhDF2vj|joNJwjk9;$2-Cy=2j0>l)D9_y#Ek~2mygsIm$%u@Fe(<u
zS0BaBjN3a&KW(aBB%ddvc<N$S>F^-z*V2F#U~Zt|G0?GvrIz>xh9(ePXGG2yWjQ;6
zpp{LWVL#~c$$rp6NmsY!u<a4yO$%!j5#k?@gnuRXT08u?D~LUXNMVA5H$cZA<TTFA
zwA2=-+R{>T9-AaxT8-uuESvFYfIEunE}j4^Ux;~x0SBEHO7#`S^#Ja(8_~t48Yzmh
zZ|M-7y}_%h>TXqjM3=>Sb8~Z-;-j*&Zk9iuM+~#4H9djbTe!$+nXpw+eAt2W8ttto
z?g5Ue1OD7u3C%9u=y|4$f&%+|at+0r4xZpo-}9}8!_jd!y%sD65edQ&n(461b`N3I
zLu@}YIidcDweK6DtgVEbgSuZsBOc6QzRpp|7DGDd7tV5<%cHrY)jltYA?TXnG)`pb
zgJ_V_q$ls<Rc>VWqm#n@H0&T*qQXalhtzT_9_aAbb4UK(&Eq7Oq2*A>=kmq)NB(~!
z3n&Oir{p=MSD^z>qn6g~Rf<IErRk2FwN6rNbxw|%n^|50A-(9YaXaEG*@B1`VdKpG
zImEk9a_b?B?+gHI1L44MZ}v|DuZNC~j(h=63GbwF#8Li(I*^bp@#EuCrk*!Qv41Qy
zeO4f4zu6LPWZ&nq8t=Esd06S4U(}M=B%;Fa9m+z+&&10t$|K=J0LCXD^30-dtw24+
zc~N}hcM77Shn7eVzeh8-!8HTvsLhYS292r|X>{0Qj^mK%{sHOPBuh$o<f&Jxg~_^|
zZiB2RK8J3@7qivXanH1Gsw~|XpWsaBw1?=dme37E1(EJDWBtMfG3izaC3}^cRw$?o
z(IdBJKR&__+0k45|Joo&kJDP&cu#UqhHYwcTvOvh=uWV<S$9Iq`~i9-TR&(0F2^ug
ziVmTw>{qbp^$?}kE}qhC!K$SAlb_M0o56SVrkB1>xD_3Kn``XA?WrI<Q3M<`WLp(0
ziUi?L)cR%vT~pk^nOfB*>^|Uv%zzB;Q#(kAmj@@5$()>3fv`&=JHsw@8y<kAEBN=$
z@Yoy!z2&kxOFj|s*fe6-EDI<{V~0BHm(jfKs+EDozW#XVx@<qpl>Pv}*K?_m#Yq$I
z27)#Vzoy~SnWz<P-(msJdgC`mMt$8E3G8b*5#Wu5BU~oE@pPIKTkp_iKK2Mkk@-Tw
z6v01!&P=wmSf!$Gg9r`FjJZ_TrxbAkpH0!q5}%}ct!=qWiE|@@<!#CF*vkF6sRdXa
zJFkc7daPrx<Ql#6u(}E9;^{9Z`zPX<rEEofTFUjK9WK!oNGlL5e%<@GDa*Zr$yt=}
z@QbVY;G0`jH3{y-u|3%asYUp&R)=}hfVFU6g#NZj1<Z_1sZaV^gm3^>XUZY1eRvJ$
z{{(*~|22|m8_N3(sMQ0}QiX1XHYC9NJTMho-y;Ctmm#QcWGy~F+LWE$z+P+GE@>l^
zSh}N*E5j@~PkFRBQxshn1^z`7h+f{uyLXaP!bEoWwz_Z-uEiGlffwub`yAe4-tw?N
zT#yCYdEjmNSrwa(?6YmQ)$=gGN~b2xz@cST5A*b9C)z9_jU2E^I(geyY}OM^<@zng
zHLJhxucT{zYGa9OD|`a0VjfpdW>`?LS0eEfGU=DTC5JB5u;AnJ780|bVgv&F^S688
zzgQZKwah8>_;6Au)grk~^r9_|<*YBFs;lw>Nqz+m8M}d-ZykHGFlr`AJ5}$<DgY}Y
znFBqeOM?aN3DFY5fyKeg_fb#+jl%it-IC?^v%F?X1eTZ?qjodRedp57aXGPfIO^Nq
zkA#0E+zRHkT`wuOlL_UV*v^r`HE^(O+ol^UJ#+7ZopdVNot;5C-JYOH|2QI&+A_+B
z9;-n?_~Q#MDTY_vQ7=_cjx?2Ez*Uv&2;|jV%qLa&znu0+5{P$41<l=-dho!miSQ`|
z-JmWd28(Fd$@Vx|749hKZEk627pAk9k|mauhOwMN&@b(dBrFRri&<VRh%=MZ3;n`5
zACTJ9cdcgZsPD4@?!i7SKtrld#Ow$90&7|uuI3kk|A!+oV?H}PThZb0I!dm;nDj1;
zt}kKby2eR;b+C~zvTb4c;ZD*rp-z}*?5pw!W3*Wj3gLY&m5~5<0bQH`7Vz$~^86%;
zR$lUUBj~Ki_$8FD6s&_<#o+DLsb)@lQV|Zf!^eU3FaN;>UFjKDs#OCz^vTb8!p3%e
z2%h#%h0>#Lv8m*xPk(gnDTL1LNnyPji}WtEO6*qk)}moLci3#5;Yk+V5L1HvR)!Uu
zL_{c$zBK(_BNW5gYv++A7VMEUQQ>9?azr2SsON|lrEL%Ij&l~f|AY1tEy8`W(sW4q
zqCY;fpx^G>SJ4+96?;0rwrRIK6GgvhpWXM8+oJZ4*ASVx+Jy$3p<Y9?;1F{aw$_^m
zHXIiKHxaPBU2pTRDE#du($J9f8P+UxWZx&K@Yf?#UUE-(7Kop(<^Hr8GMd(8%^nF`
ze3)r;07ufusfi;(poX-VC3c+$x_g2<JM_x}1D&TTcjnJa<~QD9OJ?7hQZLd`tj{Wo
z1r7?MgTH|Yb#qn~J#s4=nkz)ibwY!;Z<N2A7drriF3_wj{Fg)IBMsYAII`$F6#76^
zycw}4!2bS;8Su04h7_-n&v_LSpQGXaodf%d!S_GoIY^R^#y*ctu(vYs*E%16s$%7?
zw&e@vciil)UTJ1lFki$WKCIFNgQfhOQx%&<Qe^feJvyZFgNHKYbLGR4e;QFa31}#>
zkTFW^30&dJGWKOJ_D9k`;|vk9ZvdfJt?Fd4cFR{TlV;#`l^qEy|7RjX<OU-%9z*WD
z)onz%y?8Y=UJnjS;By-080n+n>Wzcqn<yt2=k}v$t&lfmOIln<*!GK9BqurdqenSQ
zG-^&1EOG3OXYmaRLWDfe+LRjnT8H`vLQJE6ENsj({IVu2@K}66yZ)qNOe-+FK)dV)
z1;<$M-LLWoC=Bi!SG0kRReYI`V~I{`L@&<#eu_<q_Rk3(3Eul<W>UAIF9P6~YJLhS
z-2j(a&kcIgvNa;pL{ptY9EOJ*-nYfld6u4%%wv!J%e}-zTfK$2Q)|Dx%pisR%wN&C
zyok!y#`lvV{Z*47A-4HilD@rApdZ{-sMpDiVQ;7~zD6D!*^QU@>mpw{Xt%+4<u;6;
zVYJz-Y0!>p6wVvS@eHt`VMkG1ZVtORw!Y?%Dc}dV<qvXIsQ;6?*ikWl&&77HO>G!(
z{PG;5v53YhRCAwzY4yZc_F;HMYD(*LR<@{F=+_0thUstkJ><h}sv`zPZ)tpHMOA!E
zNXwNKW1b^z`^m+iU~pUC&!nF#nqyp4B#T~7MP)<4;6yCg%_vgf*R<~kt@Qee%iX)c
zdVya2i;Y&EQpyy2VAr&)Oons5)VBrUFv&gh;-74i$kBK7YZl4_-zS&{-keX2-*hHs
zJOu2Ph|yxIRF;*y=>-cT*$bF_#DfhLAmW9CHeh(26LF(pVDjM)`g1a4C#v=+(`m1J
zjFZo{$;0ep;@@mfbmDV4b|yfx++Z+p3158+2L((9HahIGy7mbWsPU}IMBYm1+m){N
z8W)oysgC!h7q@6yR0dVY#jKd?I8iySQQfYT1vg`!X?PU%*Bj@0%uPshrZ=`m5DQ)C
z#PedSVnsp?iJ&Sw5n!<G1nlv4v#ZnU1C*OKltuW)H{;KMLF5A}L+U3K$li<2>63BL
zcGb{wR_5KqV_VJJ#x*YHRo;l<IWnWAaE|9HH_4pzqm-Pa5$$Ut4@<!+iI4xj!mK_v
zlL3~(`c{XGUh;G=59hpGaZPka7eZjkb&<(WAN?hkL6T<e&{nzJh*7)xr)^O=0QQ36
zhK#LqX_`Xa$er^F6)l0qWN>1AeSLo`h2{P=D?$Y}g!6raxinDprbq)3OAk7&0bccq
zI*k}~THTJyc6Yv{Kez3cB40IR=XLK4s6u2jo$FI`<MtC4S;A5x7pv(PT+Oo@w%q4Y
zi*VuBv#ADl9=!D7QVA5^N8wv;ZJ2oy)|MG{5;a5cc-G~K5_5f@4BdAV?Ue^4o_p+C
zs82);zd3%f?v*;b-baB@5giFQJM#*u9W>H`1%}P~hTe#2N>954%@OSwr{WqRRM28G
zOioVD9ZP2l%dVvK_dlX?Hm#iFZ02fZo_N)2ZP_R4;CR*W=ZusI?*|1cw9b=R`=>6&
zD~$Y#s>*UECIb83H5h8FFwIUbuvCy!M7IxKb}0nn(!}b1(Y|!d(Cd_>nD<ZYm7reC
z*K!);b-LuHht#{)t6k#8BZ){XE)GTv;XY-x(9$ArYW7f6k#eoe4>*O<xUTy;f0Ixs
z9n$_Qi=4(f@P60lRG%FHaz}XQY5aVmu&GkK<{PZTV50NxfXFUeh4u($8Vu?1OPI5b
zILMjGBO~8g5;+IJzJ*oSF>2oe*>1Taw6Y@htpIi4{3_?D_+}jS*r_L_jHl!NPBuZB
zM5D-ER_IIV<^WwLjZR&5jfVkUYTm3&#cbK8Y2i0lEQwWjpH6CuIfZ5Ba)N2?de>+v
zDPo%>)`j_;PM6w7FG%brWtjIzqm4iZ^fQuq_Ib>grdiMf?hniuKD3;C%M0a$E1f#$
zb}PaJ2`juS1a`(SH3pUi#q{W@#ltNWOkb`0nYp$Ls9w!I`a8bdza!d&A<d=bo`{^<
z<(8G~xk~2ImYj<cm2ET{8Qcyxa;8xszBUVDxXKTk7V5=QrKhr8cY&x$ZhxueQRy~z
z+ZFXHiK%!RFEjCaOyOoC3{2WzHe=|})?15bbDGM0^}S)uq@*Nj-+Xl_?vWLB%hBrh
z1MBc_Q&0g{*VTIq>JP!;89e$jW2;%Bl749m`4$?-X`*cx7C$rf2s+=+8iev#qAf$=
z*XYvg7|7P<wJo6-MP(~$$jUg?lcfsnXvi$A{o@heqpaLSzlH9ES5L%Ao*L#NQ(x7X
zR`{i{42W_1fZSkY`7hT7xhto^K=ufpanJR3%SS<ihE+ga3hh*?ti|%(0oa^TSELe(
z7=Wep>iKac*A82o;g;3vEte#NOYaKe3XAfutv{C)?u20nZpH7~&%0gvrDq^Na2h!)
zU;S>qh6GWY;-)S3N?a7gyA0>pmrB$sIdnTgn_lWwG%_lpF}bx%qJ1n$bC@s*!ie5p
zt+MMs9b0FC-0Vy%uxyBN8ALA})h`$veKB|C+3Rk<BJIL-8?e6Ju9V-D<DhpoB^|>6
z6CL3OSAUFtP`S08ct`?wi2HVtC3&-?Y-1YK3J|TOw<zSkr8@BPy}C^69p6_ZkS8*Z
zK0I_E%j%iMuTQUan0)ZmtXBQ)t)Ahb_1D)1J^Y`R*^%>U6SGaM2k^2!IMWap>XFCZ
zOjqfpPrtcnr&fk97R}&;m5^|78+4S+Mb6)i<QskUlfT}ZPX-m-MmS-)h)M~khOV<m
z4r*(xsJSYwz}nrh6Qur74U6NuY;5|s+lAx<6Ib?x9Y+?U{KX7ZUV^zNq1)XXZUdcV
z%ynheIx;Qa>7Qrv=`^eEcdr|T@!FkHZJ|nK<yhi}Sv(q;G(pep%lrb)<s~aP3GTN@
zoi+ZL>TVF*`0@JIlB%M$YB8Bn`fiu|$fJbc?)B<n!sE$H8v^j0$%7xp?(2jF<Y7Dy
z@*;0oaJoK=TYi~W8X6iYRce2N)7W$CVA&^y><o6%d7GnL^r47+?BwbjGQp^gye3EQ
zfd?%ae<%wv6`mjwKJ+e~0pEW4NJP@$Zs1#x{C%vGSK;y7dktXnlVT_<T>up9r}K}t
zHg}pDk8)L?&=h<-Npae{*c{DC51$Z8uHVUCV2$mQjha8{J9n*OW3JswtUlKWD$r#%
zuaeI>Iaap&oISnm-Jal**AD=n^$G2v2+F09rt>gykJE%Tt#y@4YF1VR=SnC{Mw8k1
z5>Dj}=aK^)y$vm1jp|VX7D96^)$6W)TZwhXd9P8)zR0)RT9=N&TN~2|ZvAWOev$Jj
zKti<^l7vf6jB+OAj5bo6jSpMsyZi~{(h%xSJ9ezdI%T8Z=Kow^rmx;M*gyF8fn_0!
zbV*6>;#6vFsJoFJ_YRBXsuV@$Ka2M7Lt5oqyo%k&V>+IOHd-$82PP(Y4xY!=?R~%f
z)b*IRt_jDOo==>8L?l>!roredgGo08?Ra>`3v9c+3e>#xFUh7DSM!YcR2-fp;NRY=
zOwJT$BD2m+pf|g07H;L}jSw(Dos~1L)+C4*I@LMLEbwU1*Z`vs_*Q#viA{@b7N~pf
zR!#(fE5Pc%${cZ^si*VP63GrTsu?QLE$VNJG&Ryj(Rs|oas!ldx71gqB)luDVz$`B
z0z8dzA>YhN>GM!Q8OYG7_QuZc6}AhFt`n{InF<$5DMyWqJI$lvIT&9bs3C2rx6-h7
zL_n#5_o%xE>B?z)-ac)WQ<Sr8ij;EU>buXEDVEMBS6bm_Jfdgu!C_H*M&x$hy!7$6
zM?=2EU!!W3HEqEX>wK2-2&t0_xFS(^d>@)$>5O$^T^Er$j1VF_O*~ARyosK<HVI>2
ze_Z8BNfA)!w2m;_tXEw);h)Svk3+slO&z_AcvXwf5Yy2-dD%39m?IVQz}EsEK!2e~
z)^ea)F63N2N|73dU21F6$2z6O+}82kv-DxAI|C4)PiRqbCqmL#sBP_-U`Ehi4<>#a
zN8tKZIt1K?>WMFE4eiXF?Ta*kjT({z1JcPM&yiIEqT03=d<d69z0ygX=t|felWo#m
zaxY(W;B_+3x}qhNt7-rr4-wg}=gC})P#6c#$)JR*W&p)D|DNGt${TF82a$V_kL(oQ
z$KjAq8qK4-C0(f`$y@cCr_Lpi-Oo6N+_BwyGp6>X?BAy5ntthR)}(;?P>Q^Bcj(U3
z*mJfiLXx(1!+BSUcD}Z^;ubYAI)R;4{scHCQJ^(6*i2fWYSd_9&HlZ98atJ0Gr5sC
zVhE<OE_SF&bm3UpXOmtR9OkiE0AzS}Y}hox^V!JE((dm#%f;(rc$r;p6njG6N6v)`
z7IR<QY*mU|yS9e*t)lP6%nH(*@n+lCh9I$*W}{6O#e-AP>e~EKkI^km4WC~{l8)UP
z#HW*N>N*8+gfw`6w6#3${u+GKnEmR`p=^+NHNE!0aBHS6UTSQC(fejR^~l$|QSVkP
ze--)qPXC}*X6m^1Y=%qfnix63MpaOjSoHgeOGEvm<EULrw?ZS$m=i={ACoLO^K!=C
z$QnpD#zXIhczy3Cp=beKoV{9Zy;KVk4T+K(ir1@6URioE{(P(!AD6X}*;z-0a8CSU
z3TgF_gQg{TXr7ay{*eRx@D&BjkJ=SZQm&gaJmAJt`;ywd7m*imiTw=r9n{RixJ1p?
zmm>^A$y+t&qyQPFFTbsK<62D;=tF8^j{!TLx2j3iYcXla45v}2?liWTmP940%0?G-
z?ktB6+6J<hT4ZjfRUP+qR)HyR)YF<)$m+5SweE7yUFXBLn%GNh*J^q8W7w*#gE!8l
zQr~uZnYc{xL>#0CI`^O_4Z^LHAC<CWd1j~<4@)-Z9rxV4#ErLrzo3w8)FGe`?D_06
zU%xzX6Fs>=ZT+#sY9`~jkpO%&`(`lm_#92NxUNt_Cqa$Q`r20QH>-fY4hSPpf|*D?
z6GAr0PUOSwI@QGq8(s>{ONt1)Ejw@aPkOgXw_6>T)--M4tu+0t?Ceo6TQ}|GYlBsS
zPs_^4d1fv`Lb<`WAFeb6Nu2hz(Zb4tp<vA5`%`(G7u9Ib8n^2iO!pISN5$Sy{Xu|<
zB630NYvh_SJbTA8+=uv<G#B*?j^3R1{jJ!eQpMVtw~N%}!UN{&#`e{Qc5j1aqjn`E
zzl{51xtCS+*xAOOd{?4B*Ky2y47MXN+*}zcS5BnSUQ{EuRx3%0kR0WA=%Fre4|2Oa
z+1OgJ#xu(sLq#p>N}8W^LvZ+VTSo~aj|tRb%m*+`n!j-=)}Hc<2OC=_ykqNE%U`U7
zWS3UPEr=!!vf)@9HEyDf&Z(T6nAXa+pR*Q@NA#dg*i)p5zX3qs_Y{UZ<jr|NL$OQo
zL`CTzy*1DTTv$qDMxT(uqas8KUUv&fxQL7n=UilzJ01tW`NotsHr$Y?-u2|z7q181
zE|=r!>#1>Q)C{p%l}2q|2B!81&p#IqaQ5if04X5NG`Q}`kMIo*izvDzPuE*yhPBy+
zGqmt}`o+LS;0Uwy(2<_Ejm`??%|^0yJ=W+H;YZuXTI55zP`~Hdn?}ADW|NP+UbJoo
z49T`b!d(bl2uAN9zK}C4h`=(0@&-ay)bdpE4pPEr&ue}Er_Oa@2YTHVeAhUT0;%UQ
z?_CKmprD(h=-&+GSP<L1w(Ermgy`S$@@>jE&mhO{EwycLxFZ}dU#?%C^6%V0{2(p4
zkZb*yHxOoXi1%0ThPyR^<icp6#O@cNB~={ssZ}SbOp`SwfB6A&BK@@3{)2%pw8)Q)
zlb(kXo+Yj$Lugy0VE-tK0EPp)M&gCoY7w|;wsg)68yS^Rs(xjyZf9L+{2=7M>cwPv
zGVP#xdH0l}_vTVTRcbxA@Q1Z^P%a_sDtdGb2BT>XB|c@#X5Q?_a=R5^RRY(7gmNX>
z$0}}awBF_<_LMh>c##uBCu?^3OBn!F!N+hw+VcV6$XY-MOdI4H+qf8fz7Y0js*Dla
z+D#qojc=b4{Kn6zL%Y^oc=zRbdd>A@+o}a=M4%7ANV!ay<;kpw5rGs@zi7ic@v|H8
znzdOF&*psP#Z$9V{~%pMUy|7^&n(w+Ru*lbQgPLbiUb31DCEOsQvN{<=MSG@#eT-+
z`cgM_+17C7xoC<uk29U+=<8?~<m+D-$+^Isqy$=}=8oFu3I-pR`Y+%-1gjMjN0ZNP
zsq0vMeSO^VxGME=93R`BB*Gyi|Jwa$Nd~|@-$oQ5t#15C!L0roNax^GK44HD0Tgb#
zz;tcRkB3X9r(#BIY*OHR{o$5mycfUQ-39UE)%C1Wksk~=10EKb7{<#oq)+dSG?Lue
zrfl>A%(^S}C}y|J;<6iwt&uD!+sSuWW<7is9$O1K{1bJ{=@<=mimkt_9=5a1TQ^nC
zUkr>JJWP{nm#WSYfBMV#QN(mx-3I4oL{8B7tdUs=!Bgd-V0781!w)8eC~C8#7wVE)
z=W*&`(IuvuCC=l`rl^)(xEtKTWZ)14|CYRH6kC9%*WLH0y<P($@hWn|gzG=*ygi6h
zjK;Aww9}kBSCNYGkBgGZFUm{p@q~zQVa}Qc_p9PUPsASh3ZgDeQ^h3FhS<emi7*{C
zV@%+ZmDzUS@VJ>yam#VLS8N>ndj{Lb`+vaMDN5Ero&}daj4NHI<NG!=V3%n%^!2sS
z2mJiDIES$Y*;N&D7x{K_oDI{127T*~vy<kW>g@<PcX`BP9Y=|^7wZzdgFt?sO}~Wf
zoNqsLpp|1K`uLC4<Nd)+v>>@&nT1m3fS2-qyz19(K^*PBFoU?(TmK~|3Q}slMlBZW
zBgoM||Ju)bDOBXwKcC;_J!i8QP+#2{YZ#AwBIw;+q3C!mgX}%VsP&c($b~{Z;?18G
ze2DiCXH_e~1KX7A&^dWT`diRmgGq)c$WBuB2qXR|Jhpmg*#eFF8|^r3HW3P>mp+@t
zDCUvOYtf)GKKs~co%yoD!ZmTxlz}EUyLs%m2C%;XtJCl&k53#&LB9&ayEH&~_Uu%e
z{j}utvyyj28F7F@=<23Pl2v1tUYR#&5*QVqQSpRLsDCQ!6k&$b&LYlD8h?FaERf==
z>-{sJ`&uA{)LM>Y0HG+sOMQPfp?Y;^R-1dVp=%9Jw1w9xBGj$zFB{0dUFCOxcZg|y
z-E^pkz>!{ZE%ZB{ebZv;+B!fwuPeTe{~#1VkS>Du>JMc{hAG^qT>828NVMz0nI`Ht
z*N)On6^lD#*DVt#obK?I;h?fpH87#D0^KX1_bpgBenY3I#_;Zj4w;ML;e8>_-T*^r
zK(SK`!G-TA!Ja~O?vL_BMg;69@A~b&Q^&(BE!7G3xV~731g{qnGD|3l#V`Gw4`MbC
zEtbpu;M?3*DbcO&IOfkc2gNx%e(BFfumA@)oqSqHBL(DWp9@tbFtvU4JvM;2<Pw5T
zQ>XgnR?d92y<lkWwCURf3fJ$}Oc)A)+QX=qOKI?AG(Zj8F*>y0@efq`@q9zc4D;U1
z=x`wBN$R_?bndyeeC?9`SY&&arAHwj@LSWu2vqqFxVkxfh1lA|DGlAi_6XyCSdW+x
zXfq{$lz1R_JB4M;aWFCZAAAAB?=Pr5vEJme2CD8Jt~(hsnN*y9{Ybw0J7sa$m_igh
zS^zC<FYtK`dyhx3G8KWKwV&l*^g44Aq01K;$Sij$EGJ@plM-Z$f6rp~g`B#ptT<5E
zuhDD|nHPh+<986v(@zQ^Bs}x1(mobk`E_kX3jnb_i}f@UDG)iNI~&1e!jW6jpGE>)
z0@8s8GGQGh^h8=lV3lDC<bu#$eFfb&0OLs8d#@M^Ut-NMuW>teXF@?>@}n_zkDu`9
zOGROfm|?=|L_kV8lt?xC%=Op$|Bm|-L|)<SU%qnCL~YM7>AbOg*!~+7ft$nqf84xe
zIxu&O@Z1A*9xnd3gd-)lo%u$-Yv3+xkn1@59|{gQ21dK16~)v9`+&QTE4Rg&p=_&F
zll{tYx;BUj958+p26Wa-J*I1LF6<<_!osm_KuJXy4y1Q1^K+z)1tQFN%Fr7rf#w4d
z41eL0FBJmNuUVp>j76%F?{r+XV@GC@|Ac?^m!}a86TozVGq29Aqiw9+=iamatOIxZ
z`83t7w=~5DU)w%~ML}7vTUGP{S-~A$CKjZed=FDrjJwh441ek_FDKJzd1zHPzGn4n
zXXL10K6q<kNGwx{zC;9Sd_`scznjm$`!~LY2ujl0wW8`~TlQh30wpt*M{%X^Oj7xm
z#!FZ6{#x)EG#~}6e#c43=8MiMRDs6q7`~UD@VA|EDF(1N2ydp;0k6w{^e%iGu|Fm?
zy1XBxHaJdpdWx};v?Z|Xt^PV)8kU4+>AatS*$}?vR=7H@TTK*5PaWQIpIe9%6=DlY
zPTJx5QUNL-<3xK3L^dG;L_vT7AZFrF|4oEI4`E_!?%Y`{6I_*|;o1Auf>uSO)g9a(
zilbxxwgyv-jMK)*H=(?!A4ff8WV^vtd*MHu)Ax?*N({93T;-(=%UkayN`->dGHY;0
zhlyipESqKz2KL|77T4~JTY9*hCN?x<^A58ACSiaM41g<#QQ{{sb!hO{{i6Ux{j-8f
z1>^g@?19y=zRoJPGOjV!#02N{2;*w-FOG0;=v3^!ij}Qag0%%xC{zqEyW~K6nqqCu
zKA6gLesxy=XzLFxVFUvVu-$VSY>tQaU2_K|X_rwZM}a;FKzX-i5{Cqys8q+t#GnaI
z6Q4nD=1`sJ5qvfo5ISONYoGakg}vc2(JxD2dsF@kAMc}!+%QaR?0HNKetDl|@iu)j
zlkrkcpkI*=_jO?}L4ZR&<i=B#GR@#Gx9oleYytBKsg)>vN-oE(!RJs&_BDiI`B}qH
zn_KhqfrzZ$_gETe8O%eZu-*Xm5aO>SM%|RYANlPA7Ees`bTBogCb46}IEHFzu=}5l
zwe;|^T&_v!{OYL@wcG>MnkquKW!cZlbjuV!Tnx}B`)#zqyKYd%Q5^ZaoAbgEYKvZH
zlpS83*V<Y~o^b@N{O{hJZ#JBZ5R2Ih`jGdnvTTHkXh+P-=UrsKT5iKNI9+Cyfrl<P
zNqX)6+HwCGyZ{|y7Et-tPMInSd_KNB@cnyEF{F@b(Uay7?;0Km-b(I;>(RunbU@??
z{aiTdL8lBDg9jbbCH)g1c3CuX2FXyPntDT9#vJ18o+1mT6`XBuJo)9AfZ5@bF?#m5
z&N{&RQ-reA?+0zCh_*{TQbw%#sdmlih6S=PLrfj04nCniM(PiEXOB)35xS;+eoVvw
z>tavC@F`FVD*;$wUuCL}z($Z^dfnlwfpW64%Jx%I%AoBD^Y;F4TL#yvq69IIf0+^J
zCH%WnL8|v}<ln!+1gsX2Wwohrz->c8<LvT4rE30dZIvaUb?K55uv5U{>Z*08YI@<V
z(cSmX)vjU2F{$?t{+<dvD`HBK=FDuPcy0b6EiNDyiu7q7=WJWBNa1m&KOQh76F2;8
z?|eVPD@p6{8h(u-Ukquq><Q0`qCD~ixg!BKjI`@XI4TNgyIk0hiaHT_TJ)Mw5F5V4
z&@aLPSpX1Z&Nav(k*EL^bt}O=|C?%bzi9>PY~iR~u2Ohg%Q4+b)&fXde`iY(`B3eX
zhA+f!w2)HrIBv8*OIr`$9;5%r%7Iw`0qVk<^~rPQcP)KKnVsE+#P`w-K{^G0543L|
zO$Ob1)!v5OJ<wcnuPZJLo$Pv=?AqCSdB-&Idj&wE_e=Wiy@!n~Y}Ealj!JW=^A!ta
zSVDW@S<Swu`P*)fK&{laPoDrd6^Ad%-64@0=o;Z!{{9!wgQJ(zSX$3=@uNgJYiDXt
z!qnkY%zT0dSZc|brtZh~<XiozQoTuMIrdsQ7-4cycw*)!D@0sWAgsPuf}N(q@eOm^
zSO_V3-lSNVS;O9gH}QVAo}o}f-Tl=nKQ27RN08{L8jYMxkTM8{7Z*+`5l$`!2wre<
zv_W#MfOJM3#{{wVGqr{l96+~f7Uq!ua=DCKSRDeMk4~;01BaQ&%ud+&qdR5mdr8Yb
zQoCJeRfV5P&(F+<5srRBf11&_&Bjs>D^}r$9v|cF&oHL^cs(Zs{uMxj=@F|y1VQ3N
z*h6#69DFloUt)x>{p(buHKq#8kD|b535^-FJG@=L7tc2g<+WNxnA~PsB0QjzK<8T_
z8Lsw0#_#5pvE;FZ`;E#zpjWUjtkp-08nyN&)0nPRrF8)4o`RNzt5EUWDAwJ&t6gIH
z)nWy5yf7^I<d&?g<1e?UrqWByd|brKFv<OgN53V7S1PLmIJ+j?jnmUW-b?ro3brP-
zG$nA8B^!TK93Hs3Gi@_wy}i9EWRk{j^Ky{6Efi330SRwI)`mwA5rVqle>PzVU0)K(
z1C*gJg(Xm8hcqJBXS-e}t7(;!SJey$?eDcG;(+Or)+rT_m(fl~2HK#Z4PE+WT`wq`
z>J6;y+O+>#pA*<;6SRCB`i?2UArX9&q10f|>u<v!1jh6=UIWQH9s)qa!ZPZRVW0tf
zfqIw>zXr-^N~_a|w0`lb*@B<!{cJ^LrJ)bj?f@P}xF!*bIJ8^;t8$HQC0M{8N#qa=
zKLDc**z1h8AVPV0&(DtyUX^BcD=Z+gLadT&go#&wI|LMrAML^L)XM^uqK|x^F`Yl?
z%A`6QSAAI(9eT#H#1DA}{tX92K`=M)*c}?gvPB3}9xUwpQZ{9m#uf|D0X<c>9B9!z
z<&*f$D}Yk~nh1M${~lEQzf2O!*9rS$qft+%(fZ!;)dNxM{AkkLbf%?Crxt1s5YY<E
z?zjQnN(TjyX;1^=XbM#9HMx6M@x*$l2u19kRrbw2qh>*a_O$`H8iMmO)!!!6g#?fy
zmy<w6to_7<=xLKvh!T(otXA#4WbG&WUiw?GBY&^pQv&;YRFT;frPXpSuaVb73VxW*
zy+=o2-<OrK$6rw(NdZ}3>x-C`S2{$vLv!7;aW_-ck@+jVs>Pb#ts!L%&8U}eqF?@A
zl?+VZQI$bkL|-aIvU&U5`ue0g(@eFuYY4#Psb2ej`t(A_gL>&I$Mna7<CDs<8IAab
zk+vM}?TP8y>H}4HopQ)d*{#6$1Uy^S-``3ED5Oo8ImrCxfN$lDp0%*$`1#Q&`ZIun
zA#-nypd$GK^6b6QTv8GkGX%q<er0K$ql!-dME?e!6$>4LXi6Y8DuslUz2%psQG6ws
z(F#`%h)h}osdOUm%Q<EpqLovugRd&}yH$c=s5u5h@VLr5zHt7O{DIhEjr$WKLk-1#
zcHW?2(O}KD*A~i|&$YU2tC7>}v|X+Jx<CEtpB`u#wiM~+!Hr3v-cXj(pGSL8jG!0h
zg6wu9Y6JmgC(YNS94&dw?zO~ZPZfwPO<ps1;~br*4&L+^nyl?XmoaVo?|lZ3#zm_7
z*N^~*@6PVh5Zz3@CLK-#P;fV2awZ?~-R*FyISwF9p}&tb{dCcNI7>Qzn=|kZ^aRcr
z;!tzG{eAn1t@b$$$KC?7XTOg;YyVm)OfBXw+o=qO;xn1?i+wdKPh_EW=fblPp8b=K
zn5qI}Hdoa_6D3jturJ2%EqrA%sC;Oy%=<z#oql~jccRvA?|#z|jZG(>H_#QQG_ExS
zcD<b}4TXZ)hGX6;tLDe7Mxo-pCmaQ19`vZ8ESFmD9f{|qX)LUR7@H=(3V*2keL!!0
zcvP!n)hx1O9QYOHlJLJ2ZEvL7NvOV8yo>u<;w83n_<WGjl_SWsOWO04CpBjcVPJO9
zh~$>W+RX?uz3E}C&RD|Q4;&`=1?|2b*uY*af=e%sWLKpQ$GOPwKS0y~lvRlJZq!<@
z0LvlKU^1kkgFdI8meI%j$GL*kqKfVMlNB0wi2?7ksBt9c*l`A}75-g73OAvZ*e8-y
zomPjQcjF$QP_rg(n?grB7Dmbwqw3)1@l~4PNut-_Lf_aMVl)_G`Us2gDiV{-3t=ii
zQuZ#EV3}l$6Meat82kY^{QyX;Q2qu9;DC^HYbvFvcwwPNJ-!kJUl<j3UNjn+RL}Em
zVui9`|2xdlW6sF+r>ea*$6>ac<0@Q2>uvA}TsaDE;fsu83524}n@)TDLg(I7E+N0<
zazM$<2Nn?JWc1QTWe#&Hl3_|S0lVz0j|$Wi^z+SnpOXU+OEvz`Ra~{zGgrT|vq||%
zTZ`BznUyShm=RrSZrom`PAq)2H|O}gZ@r>`aG2F=2d{QxaLH)5Y`&9ab7FilT}eUV
zC}fqhX<~8lb|#c(vUt(thbF<apiAS0nJpcR9~m%xv8qIf5yJHc9*Cqc0^$!?OJVWT
zT8;TTfB?Ic1n<IUyaL{fXkLAh?LfLqo246wCe-LP&!nk|($uM|X)q{)e%k1s9Iq1n
z)xL#gJ7gYflIK)l_;Hbzv-9~_z1wqM0}IHeT{`aGZ?*!zCUPGS=H?O`h$7NJQzpQ1
zniJZ<A0>YjXlQr)K5us#m}z1pcnm*)qslAUw_%MY8|1zUVBY4C$u10MD}DD!6rpi}
zx&DdO^<?}R)|R_G<w#{C>#RKOVPXMZ%d3V1R&C>3hZ<})aM$(-$M(wJU4;zcf%zEC
zln|7!5^Q0Y25!@Yf_NAhmFgMb7jN==-%yDGZCyBdMnw5ifYMB>T<u{OI+<(%zeeEa
z?*e!L_g7X6gic?5jQ7eR@x|O-Qq#HHIJ%8o#yd3ff~6wQcs4jXFJ|u?of<6+4oAyb
z?JGl)8rR+&Nw4o`S-Htmwwu)}0RpAXXjy(-M(|Fnb8?#AUsYMaOE>OCTXcZHgGV<I
ziO&R0c<RRqhHhNw1=IfIPb67zWUcn9pjdT7VcUDL=9lnTtAKd-_JkY-FVPkW^|v8K
zmWMTTMan~XU^AZfY;7Fj8xyOqUpv?X`a2eOY3c<yfo5dD0HJ_k=<ap!-sI(MAu5Q-
z6S~Eu8gi49j>d#aMSG7GpN2dbr0<gx2J}k?i=PTNKkdz!-~E#x1VSo<m0j>fGa-6j
zVOzUf*F|uP<#q&0O^LMI93cIc>>CZ~5V@i@^5paKuvm-fP^>iS21NcRRiWQ92H1Bw
zBymk$%viUV$bK8gB~dHk%S$=QHKzLmT5@kK??Vkx3AnGQmk1(81OOFvS2Z(3*Hj-x
z+Zc!My340-hd8fyN*Af3ON)vK&pk`4Zcei-Cps)I_RBOIDZ8vs@2=8VX5MZz8wnI&
zY>&=Tge1K{zX_gqx%mX-*j?Ax)x}ATrj6*q+Ru68b{zS7zZ<$*ewIZEv;Uf#Z0VAe
zn}^H+G>jvp0bXOM7+r}d4iv^B%vm6PXh1LgE<Okiqx{~A)Ba!gnLwzm;t|`TsJBFB
zM&{FNbhMcrG<!eEp_Nxa5u9d^J9xRRc21#M*){lNUKLzC2PZx1G%r4$-rwBR!{qcA
zLnA>IbFs`3laOXQRwwetHBs3<7)T$$xI99LR4xhr>RnY#AmJA1Fs+__PE3LuhO*UY
z$v^qj^@rVv$pbj60J~}j9D8WBkB55|_3hcO!xy%~xCRz`&%7<`4r?rT>5%RFS$Eiq
zEsCEbh4NWB%$h;cAKqQM)*NfA+t+Yn4snN7L~ksA-!gcpHJ!k~4ye2eU{25mUQh;N
zMgWRPXk}w)jGS8t3=I@n5DK;L4LC*pHmo}UU6|h+gsa!gFly!;X(bx;rM~!WXbDFC
zWq3FCP>_d5lh|UGaCM^;u32?325;iu*%`3!RPd~<g)YdnNrc{(>PS{KNy?H!xYmsi
zaWprL6PcWbH%YD39o|>N;d2}8Kmk$||I~(GH44xxIMb9s(~gKDEV!cr+l}K#!V8?j
z+1p+uxWMAcP|_uMq9W3k5pY9q(&JnOpKuDU&33)gAs{OsW3~Lq^}>cY`9~P{qXMct
zerg3V)ZYQilP@7Ynk)xx)u8E)C`z!KUZO#yq8FI%UTSWiM|SG4*YBRT<|F6~)6cqF
z*RD8^lW_F&f|t779^e?y2q(TmRbv1#jGSUw5Tjr$O~ad)&W`|r$H4*F2BHM_8OH=o
zgGWKSJhd2dVioB;IPoy<_*JikmZ_#j&nHh;yI$dX@K7S6Wr}*{Hhz__@oMy>@On!m
z#>LI?P>%Itb7Vhmq9Bo!&yYm|;;-zd8{JoJT2rCMfq{C2OTTgdDcxi33AvQj8W%;{
z$|}5;9z3q*zHi>#kBSxws=x(ZPw+}M=_>2VbT&EOSK|am|7J3vNCbfCXdA*}a16nI
zzM8v!`$9C2YQkVcdY5TzQ!sKgbfPKI*g6mHt@N<jb@ds=w#8I60$&l6Nmk%Y?J*1f
z6X`Miz-$mNL45lY9x0XLuOGJ5fF!4t|9Xcsod3$yU@(qBmtNqa?deHPiV98;Ziz9e
zDxi$SvbJ8ANWLa2mGRjOlNDS*tOXOqRFxDRHj7^Hy>7&A-C1<HC0Mqc)p$E*(+?od
zJ~!X~E#Gjd&h-^eV2Nc70|BqS>vbUMsw?g5&`j2l4O_UVQzt;wX3C*~CL`pN#$uL%
znj-|PKF02>r~_8AL8t$Hn88)QV)fWU=t<-FQsF6Y&birinLB$Io}1Go><01B)^Mlo
z{*K2A=>M{5)iTW3Y3J)5qoE%>borp$;&rDtVF}NB(E6bKe8F9EvKaaJQI)MHQ(Co%
zPgXd;#@U>`Y~9UKUvOlGP@k5jCe6hAZw!2HClUKz=M6_w2H~!utarA*s{|30Fph_b
z<!9@XIkU@2dbiyB$l}JKjP>t4TS`~&&XF2gb)JB4U`9)Ki!r5HioIF;OZ$E)mF~6J
zcBJ=$9jm>5Z4@zCAa`lFi|;xa?YAm?TGb^P?^&s5Jm$|JcxFUR=As*QQd{(P_O8mQ
z&|=lmOg%|mi@@XHWgX=1Iwv=`6KTKkI84j)bi8~_hNtIj)|z36_gHFT?`%1Muy<#=
zq9RTw2Ce??YI{A!elSoWj(Jd$`*d6*r3yUhhU^QunrvYn#6>U^ZRM|Mxg4-AkA9YK
zC=6gz`DZZ_pxg*x6B;(Qs95WBiv91Cel5MdTx=`fCW&ffW;OobQ#KRPx;3i&@&>>e
zzZyk#*Nqv3I73uZVc^JJ(UQ%lk3fk%sQ!vn^`expo-|&Z`5g!Z%pJT4$}B7NQVk$>
z2W4;D+-4f+dMLn8P^~ug<Zdrj=NOr3%&2Rf&43x|j(TaDY*s@#g1q#i7@98x>Fd_e
zE|;HrQ>}KI4W-l4O2*MfnvLZe5$BkaAI{kii4Udm83o`wy;tdE?w9^CZ{S!$S!Z&~
z%{s&LjgW8CXr(%K<mBW>qmm3pG~%PbV9pm0k@u!Uyhhq(J0uF?p_moQjJ!x3b-A#t
zJImRh*jWTo0*;<k7FZ5k;9>b{{!C-;&xn77YEL?30@^>O9D}sUIql)QA9SO7akPp)
z<k-%Y`&;s0m47*{)iwuf-rPsw8;jiTxM?#+b?3JSkTj<mvjsWr>)o34ore8p9U<w9
z*dBbkua|1Y4SU>|mqU(IbugNv`6bmWF|n6JLcV(IwzfMJ;3gBqDpk#9<<O?OSZh~`
z7WlOWfS)x_zP8<LxjII23g=R=>j3jK<XN3Y*$;Q7lRsMVMn0T8)eZD;4R*}UxAX7i
z=^>9=Q0BGYssmC+DS(G1b!=Yty^5>_aJEAJeN|G65hB1zy(NVPgMmY6EV!v@EV{hK
zLXnmw__`<7MEkpJ-j;l(H<p^<(hl9(YRFhh5#1D*x22gES66XwPkA4GL-*xo`u=Gq
z!k}uT%dvIyrAV6RnY!zK(>a<f+g{CSyXUH3eWa9I+qUM640|dYc%r+Yr1^X7@^(p!
z6?L6)mujKfGV(Z^$Hjs1=9@=_l%r#^0(6keA@4T<73HZiGeC91ys#vW;+<?x^Ua%V
z^saDREy%t%BwAI+RaoE(c}R50P+L2-@!&Jk(Ov_0lxX*5YtDi(H-Dg6<z}<zg2hGa
z`J%T#&Md7?quPSYYUnWMSzQflQC?Jw+SA0+4!5r9s<X$XzXO>U(Eb^N7~@A2GVjF|
zrd=)g+HEJ_8if)JTQN$%<p@b6I1XT{IKC_6-eo#o&#pqFxh`Yx=S|RsS;P9B&rm%E
zPNdf(Wf{g~%H1cbBW5Y;r}bx|v#)k{yNvqcTAo<VG9lYuAAK#Ywp4xFQf<kzSwbf*
z$rBLVqz)MoCG1(eJFB5^uHt%SQH+_Kp6d;{ThL;AglFTk(|A011fT(j4!AwrbCYK=
zl4Xc@EZL*c{d{jFU{c4u=&;^!3>-52S>)5!oNX6=`RRma-?T|O-Mwox-4HrW=^b5d
zTk_1>$Z1+u#SXTZ7nuxTU)$bfynPbIcQ)0&o%%YIlRf5Zrf@J0Lo)!J>$R(4;JK%>
zrL23+*LxIsQR1`VHfxchz38&M#OWSKfq1rGP!~J|<F%a<ry`t$0-nSIbG(dqYo8%k
z)D1;-Af4u}_K-?sCVw_Qdv<J+p8Vh`uiTD3tJ99!kke?k_yXJM>8TFsa`mhg4YsaF
z*<=CuV5#kB+GN@KeeTK_tq}2}jiGb~&#RM>OWS0K^#&p2BH;Xa?d{pDkbV%J$3dHZ
zdc~4{C{vvG^^UgVxbgxUz*=mB`PBNq^-!kAc;6kT>oldheb2W$>Y;oT9TP5aH&rk+
zz_F;+8BWM>#<gJ8u)7A}YFwNfU57qX*)}$<+AtZN6Sqj~!d&?<lk;XVJVqYie6|zr
z#IbH0ve%26n&`MwMQh15y4ShZjZPiM+}0sa?6L4Drmq#1A+BXraQj+CgG|SUp&o3G
zIZuq7-q^q1O|7$8PVDnzJ_lvw`<y0xn3PgPKSKf9t;qb3<bmGQ2Ofz_QU-Xk`zB!4
z@mrPDC&)_<Ru?N->gUZ+Y#42e)c~Vq_^^oEI-yg#ZJ{|CXY6^z$8$r*qJI8uM5y_C
ze!n-_x_O%HIasN*=A6*;M7~7y{jH6Kz#XA|Ka<_bh$x3n0c6T`)AFLE{i%RfGq8l6
zulek6$fr+kwly{Kq6UwL1Q<+GZ2cz>44WPixbHBj*E{5=xU2*oZRK5I>EZS*LhhWW
zjgu_}OUnHzai0u&fgnq7pEeS97qxY5oHL<Uly`U;a<s>}?Wna+wLosIx$Z7jardJw
zW&l%Y?===UNO#|>(=kXsT`PO|_<CfaA9(uJQAv#>x9yul%j-x{g2+fxQMXVv>!#DV
zNqE~u&+{HkRW0KCn6@S=a20v7J3F~er|)Q#8+R_r(8S6_E#8%<PJ;00Y|-1hM6VM^
z6fK_1e#52xZkpS+Oh@93t753RMU8<DTv}l;QDL$-SEmD<>nCsl4f5>smXvk2u-AHR
zraG@>2Gd6g0MK)E)W_g1a_5@7Z*o5G91noa_0f8~ZZ9CeX?!RGj;Rp&TIz9aTQl*H
z<eIkPFXTW}O0U~?*Y&Dw^YL`!^)Nl3xvW_4TP%*k3@?eSy85h?>gD_%EEM8bZ~cYG
zcylLdoM9;Ls($_(Q*`>NO!4o27=R+*sC@HIIwzi?KYrWJtf;NMtI@s1-15ay_quAk
z`EHi%^FB^-8DI<HxSVG*j>TXz6SK?dA+HS$Q}D<`SXK97);cM76xQi;XNM_j=XPWc
z2gd{!NJLHZ<wmmhZq>+|nhoXn$s60Zv-#P_!MZn}@tO2rF}g|2ax;V%Ame_ptrQJ%
z2ja1R+SrGcIVOWn*0eFgJMCTh(OSYeK;H_UAITDZ^t3KN+Y)uab5h&cvC2d^+QCB9
z9a0OZC!NseXFin}BW(Fruc+};$fnKFe!`!82s*zvhYI_=bg%l^B25m}%~xhldmY%i
z4t;%n+SmPX@k*X=Y&B;mM0n1>7nE2z^#FWz)!dq@24HyuN137e#q3F}fd;31wkuyK
zx^D1y`PQ7aMmxqIQUG&f&r0KSub&Z{S*vX^FsdF>p3-+LXquf1AaJAFsVqR|FXpNM
z=n#a1a%Yh=>}P0?2m~3e^O2M0(qGDJn$_AHX`XLQ8SPc@VKvA)Yi#lFesy1dK7SUr
z#nqHs<iH6Ol>LZ7;Z>hMib_O!0!jN$a+LlR4AA}>l?_dRuWSMGQk90dSBBKPMb>#m
z!~L+cVH#6sZbRR=S}3JgX8eVZi$@%9H1X++R0omF+6mMdf|o`|<#gC<{O;7|3-hEI
zt)`r+M<vNq7n<DNk7wVlQzjZrY`3}LlhIGmbodvI`j>oBT)30<Izlh03Gr6I)oolr
zx%0BIY$7Xk4sFR!swUhotQb7<k3T-YtvU@VzZ7{Rlzh2qS659S#a^hoqR%JgG-oSX
zvMIGvx9_>{t^g3?nU7;GFSOG;VKt^pFV)f9yF5<ERdq@zaf=82FDi%!-vpm(UA8^-
z8g}XSe^i<Fa^fwuMVVfAVRK2{Mv`%!XebUj&rYdO#VY$+60#(_)m$xL+&+Bhn3$L!
zHg_^k@5&e0e@VcJP%M4Rmm5MR)z^%}lgeG3*4N;0D`PZG5*oMhFw(!_m3sKt56=`2
zzO)L&e)83FKOt=ltPwP1`@1?0<vaJI3OtKv!vU)R{KATUkv!YnJ{fL{Rt369w!*1K
zCS>re%;SZ3DNdn;h0l;}8!ydoEVU=M<kYmCCXVa0ExL<znl!92%6pb}AFc50H;#c5
zx=*_9EOI6_xY^edkOz!IN!i;xJ_X-q;IkJdB6ArDc}{x}=mmF_J%mdp?21E|;cUar
z?e%B;|G4@Js4AmvYY77pDWyvgP>}ABkdW@~hC_EZ3P^W%N_TfRNFBO61ZnAiqu%%K
z`^PxL!N9|TZ|}8N%sJQEh}6tU@f%B<Nk_4A<L7HtmDTBO%+p#vFD;TF?>%O5v$JIv
zOATT!mKzyn&Jh;pecx29w1EuC4<4daSf&q1!o|fmet5(n9@TpEtE#Svt%eye=SqMs
z-b6Lb`LaE;?TStT7j9C5<MX0H=KTaF=A4S@tx+5tWp)ja%d<DX?BzXr=#<q=-ZJ@C
zkn@3a8r5*Yu{h);vs|f*lEhq<@!e8?2Scm{lb|~yFHcJ%JI3?$4^c2)rho@{Kv(Er
zzZ+tNStSY6^(xhD${c4M-=Yn_|M*6x`Ia5Sg8<uN#QBFKgH<tyVPxN&lj__mkYLaI
znwY6)B0a+;s6VL3yPMj)D6RW|hG|k6&twuaMO2~F^&FekGL2=W=~k;44xUtFanl_b
zHFNS1^W5Ox&ab(pJB24mIP2p5#eB(5=*2bh%1u0efoOa0qiUy<$eI28hK|O@begdD
zpB{;`EXdV698&iXxD<+|axEmDE{5thfngc%r*Yp+R^C>ssH$Rv+!;DH2(6=C(whC|
zWPwb0@6Nmx>?UCv61Nt@G0b5aOsC*bjpw|bu3_;W|7viq#_4MzsyWC}&(D8y$F!6#
z(gb5}`zF`?$wbmqOO~su%^)g=8Mc@Vl_T^()cVKA^UFSn8E`pPC8kPHf|tB1(+v`p
z2AvGn4>8R?uFZSh2O13b{fA-zR0KUh8{TG4G*6jk^rdFB8K}3k1gp*vZk(HFML)Sd
z^T_rk#W667D|j<W8hN@O3mNEX!`ZJg#~ts{z_%!P;VdjDx`r9aLos@3SGM$xm{6U3
zY;y7YRL_8gXW5{$85I<3Jj|vAwL^0b!2*i`AVD$s;~E$y_eH@B9;W_r4bj6sE8<Xs
z#<ljZh)u#jil%z-9p+b%cGgpC2xDZ`%cC{&cTCIs$QHlsusa0JJMR@~_yizHGMUXX
z&R7pnsZ#l#$(61Q6i$7zEF+vm=K8&i;m1^AQp9VN8O&@PWYFYbtC8B>jeT9_x;YrH
z!>8~#)^_Y=H_hqK^YgOwp+c6RC05-tn{ja#r$T1<M5KGeU~`bfg_Ebc%J?D^E@pXk
z?J`YIV<-?p*ZOe=BFtzD^72rVUC2M=WrZ|>VqU{NvOX?wR^F2ZXD&Ln6ZILOkr4G)
zv^wEM2IkjJk^PYp4F_)Zo0$grFXK5}qE6oxh0o#|4D4<~@py?m9)o&kgR6f;^KLid
z#1&(Crgv+Wvau1^IGvp_(u-IVR;dmcoPNgnob)Q+-~(s(9Nz^d&cc-u9Ck>lVg=!(
zrJwIO)7u8Uh?FXV8{rNuB>SZqoWc*XOs<L@jz~YN_#3QpPO6FBLQya6Q-o@KrY?Ms
zm~QMbJ-BPD1rxM%Lp;nkWcS7@D{X3(Xkc*7MeoB*8ViO_w6CFY+QAG!KQ2NOX<Jw`
z0%G7kQ`oc`9dl*WnUIlL8R7Czzn`4~<eIUd%E=xgKx=bkPj7GqHKYz-{0t}U$>vaG
zQT42d>C1@a2#MLY-(Q;-7P&s-S$)BCI~^s+MEX_(@_v?=(gwj4(07KxEF_;pPN8j=
z1|P<#8}^B5noea3CbfbS=PX*9qGDpIos&Jrd`T;wOxR+YXo)!pRoWg3bN=(WLK<42
zzdKS;O#N7SFnaFH#G7R*$X)-p>e-l!N4zVA+Dr`3p-lX8Ou%hAb5oHC+kbf1XL_<k
zb()^#PM^n~Kj+Nkz>s)O@M30BE_CQ`+n#B+lSkQ;#0DnLlD4sY=nrTF#Cd8L$u!3V
z_ptx|>nu*t!8omE2RTk*dd|lfP<bnTt7#Nl-}qR$W17a?u+xYQ-HsU4L1NK>6YuE4
zDAM>$p>!S4>v_Iv%BHKRZWo>8c0L0F-c6V9{_cF8jR+D0R5dL-<<U;o))r~)upqla
zG55_1@?z-Zxlhfl(BfEm7l!H==3%9^ek7K8q5{Q|^CXo3LQZq8+$8G&IYmX6U<T&S
z_@Cs|J6Xwbg7nu#lZv88<KM<*RCWj~EC>@Xx}3MB@dTjHY^Jr@zi4Zfx#8}bcKV}g
zz|B1*k*&&RRnU>{2jEG88`oxY@Tk}Xtt=Y<I%v`DVqx)z9@UV|goy1SgDD?cx$VPT
zCp%aCFrQU0&w{d<ePjx4<OXu@0+iCOT~vWXV<m`7%IccjBHf{(H3})B65(7|0{6ez
z!()q+$+XqylC5%k-AS+9zk%L+_JHGd`LpBPy~A@s10yO3N34t*H;oG+%g+O9T+?CP
z8?zm20<RCxf9R40#=Vu7deEK7Cp&4zfjai2S;|T3(Y5YXK9AAf<d(kk&sRvFjev-6
zySS#Y@V-f)qifH8?#6;_$8L0DWK65c_Qn_c-j}&D{_NS+SC2G&DBySAs&dc1VbK=0
zhY}!jKwM^69KKDsWh*M3M?BLY3DGb#K|Iq8aLt=E20$2_iYf6`qy&3k;`QqN-Qj)0
z{3H&(igUp|s7+!80x_8C7VQgTg+)7(vm-SsaJRkW#>@6PVnEUP?W`S=7v%o1?>)HC
z!KI5Ss7t$Z<8<h+ybrl4&iMd3`l!M7P}Q&<b)qK<NV5DiK&Y}xtGY6NZ_I9`B0{;a
zR`=_{cNs=94Nr42v~7g(y*%<|Z~+;S{>czaEE0<;JM6cO5$F1O041~;1<ygz_ar^8
zBGsICvV(hOXG`Be!F(ywiv`1<t3$CYORnR4x6qrxdJ=od|Ed)#ALl=)m1iZBZg-96
z(>*@w){LkO{EW`+PXbf*CPSM3%qUo`!c6&~UV=w;5yNV&(x{=4A2^_QyIaOP75CwX
zjrhH%uz!;Ayp~4u?P~CsiGP~22W<~1C2^y-5hjdd%%NI)F)_KPof0?^J4n&O&g=%U
zN^22kQ3$Gq<e^n>LDSK>QlW?N>ez0g?l;UWy_Q31cFHh?nOGJhxL`ZSd<z0Kx7eYO
zIrdt!o?Wijmsg?09L8{=*^|S8NCjsA3m>4WnI{GnU9Ri-jNu}!>Nh@unYxmeyV<>s
zl?&gpI>rZ7lWjZA=hN0iDJyhtIoV+m^5H8XOY$iWI)Z6dL<rO1G>Q^BxkMBIhZu8o
znuy87;)t#f8cJC7<k!B1Ati2qG*x0~n0QfiG5Gsvttkb=-PWdD<K_99`>GBS`?*gu
zQVFx;FRik4XnPEZK%%)-x_?gQ*u%+;$CK>p!Ff|O)yuY(6nRQzW>oQ+&~4(Hll+{u
ztUC@Q7!!#-@2rFv8obV^%Wu>j7VM4_mu_-|Ey^E&gT)h@w#S%`sY7}U6WOAebyvFu
z8|2)V-=<7@LKPJiw}w=0!B7y<i-U#Rjv*+s<gkXz&&hq(r`9xD7UoCi3I_{yx>gUP
zp&V{MSD&u+^gK+(QB%4z2tCwf*emoZ@k>S|P^}gT(Y%d5g*tSYBNJTIaP+J@-d3E3
z7aT^6lPyw(YS`3#h_j3pn^5c^7=MD};#Jy3I$7J+*myojBe}X%j{#xx;%RuML?hjj
zYt%6B$&cDA3(6DDqFUG?=3s`evSpek(*2BY@6ZhGIF<NlK({zmwu3j>vHz3gY%bf&
zP=cx>XG*>i19w^g0ycGHQ*L$V`k#96=v}@*O=RgV8r4#sEaAdvG|2G<v(UCG7Y2*f
zxzzQxxJyL&B&&MpA!BdBSd>Ui<~dV_I)KFKue5*Ffb&8ssXXe&Woam4f=-vJZl!uO
zPsM39J*}QKrmO?;UEtozBq@>v%WBT7CfaJPyDyE`r%`yU=)x0eMPHPC=kfS?jjV8w
zFOAF9POg4m1nFX!89-1Q@SN7ebuKFf^OP$LF8iu$4`wQI_W??!;iIrB0Qckt%HGGL
zP>&nFtMi|f%(;VLl4EDk(EGB_UkN=<w?`M@PoRN0*`y1Vsv<P|jwWZX?VIpL$t6S~
z`qrGU2~D!DAMlLJNQJ^@dtuX@<O;Fprg)CaUPF?*sJOF(DaeU6?@WP@&~U%hcly2J
z*F>OyUPsGueRF$W)AAZugmzMYFpka|#Oi_X(wUvUa}|t5>4$gMhp~n!PJ;H}S%17O
z3je`%t=?dd^l^muUzqJ};+Z7l#@UMMJiukrKico_Lu_AyLP%624F&@C2%;>`R0X#K
z-DXi3%-R_(?tYNHhaASw0L7cT_)D_=ES#F6#<y>mcFpy(qcpih&Kqy^*tYkj$4HJ=
zA4X|(>Q;Mz-b0)Uzl)D`eUQD!w&yF0c*Wu&6+Zt|jnCF)TCbg0V_DWU(qTh)N0FJ5
zP@S3Ue71J=&!7KhMu?rGX>u_Bo#?xQ6QQmmVv$ZpE8cNd^wzc@r9$&O)NCWcR&wU{
zDH;Ew{6%7LZZ^`Rt7QI0gnIPik)H>~Kk1HF;KN4_r<{zgOqOz}1VpaBb>g_c3N9IY
z$;i&mo9qq`r5;V8@7Zo-irun(S0Tr{?`M|yYQY(98o3c5`DSEgwmUKw$H%~Z)G5OW
zu<v|bpq;cWWC=vN)$52?)*TSUHG36fx=oj;+3A~JzS66rQt05zG@*p-mux>POhG}R
zQqQ=t_A|a(Kj*>USL}2`-WQ35lCSCV9Vzqqo;|7X?N_6=mo#}%bq$~=DE88<bEfhk
zBjq><7;nHhm!cdefS?W3ZKas3HP<-flSg94(edG~+U4cvTStUe9f;fq3ogEskPq2I
z719LiAvX@4>QK=AfrxCK6j65->VM%^MS0+I<b2T?pNp&(HjEE?|H&YYTUCBR&1aM6
z^yXhoHO7Pq#`m87Rs>^4Osx|y(GO2IU!NhfsQPswY3Mo4Y_y7wOdBPxK!(^rTd#G7
z`_Ebx+|aw?!`c~DCO$j2Lk7aW$&)S7<OR`_`qE;Je0Qgs59fz&8ril7qXJyx^g-jk
z*@-u6)zxpNcGc=`5+KJB=1bo-@iaxC6mc)-{)dAkgUycTCo|82KcAX!#EQtAMC!S*
z>m^P1QxO?)Z_>cP(Y@v6AiS$TzcLwH=ub^ya7SojGddXiCcP-nc6($#!#B%e5W?L1
zLDNCto4x&Rqff8k=YZ1&)tZ_!T|s-ZCP~wG1#%d;$9<osBP5(>gnG!NYr2UM^QfVs
z?auISz^g7I?`t0wDS9JCLJ34&a?CExikej@B>x2J@wL9+517Bk?Rvd-cfUbXiOQ~5
zq{K{7x-B?)7oSv*%!HaHxo@L<<EzsikQ?w|JRv}H>YSa*JScaK1jZJAPBIe_Niqyg
zY}86@3Bo^JIv*A({NyrJyVoMAVu9WhjaZreJ#^8<-Q+?tXl}MDFJ;^}UYC>%v#>B0
z4$LuXytFUG9HLg&+x1fIFJaWhR7nVVIjMjX$!AwlW|iY&h+TMC0;2ozy~p`5oJ0We
z?X~2{Aes02cX(cu`N!~WeA0AV{zRqU9Vv)>PhNNkpsIV%oD6?~mqZO)&L}Y)h7AZG
zBqrkJ<s~J7bPS$eb280UR8)MdV5s=mFwW3&EURGmH8<r-fu&(il3QNpfMr<C0bl=E
zjU!t;H|~WdJpt@9&qx1WO1!Q60yVF#-()%?lyW>#@3%_mD?L{BAP>0}-~IIZYeY{=
zBBjHTP!L0S1JReU_Qv`Bl<1~*G;aUV>@zria@Wtc{pK0XH{r3Jvgg11vxcAe#&<qd
zcf(l(EW<8d_%1FgiL!v!Ou4=>Zkb`~*?qEk^!At76OeO-Ct3J}vBG!iKG9(KRzdLN
zEz`5*I?-Ue>VXgA4CBRm9b#Wg;-+0T`*vvu*UKciL+>RQ_-+K^X~dL2k@^L0PcO{5
zbt7kTvJNrq;LZyc>$2GhV*BHrMrnLv!Sclpk4}~BYB|gaFFf&lYMtj3<;3J1TQU;e
z$XzP!`%y<jn@?=Km1<C>VMorqf_B6qCclM^o+!)|-m+M*yy)Gg?rRuMT!rv2c$ClL
znzpMyQR~Ukz*s4J#IE(c@m25H=z6m|7{J$O(i^;yqCN0wn&>RJquRe|Y(J&S&x-7v
zEz{5&4U5qzjNOO7T9&UA6NZgr6LcG=?VFYf<VsApc(kzF?DSKXh$Rc_<`6xCeRzc}
z!)xF!>PPq?A?<IJjYvhGAc}C^SruC0k4!K`nd`xI=NvKY476IM#PQ9gTobJ;XXZn`
zH+xy@46zKPhZIhoAC<_@w>$T9*!m;UT)xD#u<F+S`Q|F7q(Yyv0?aP`b;&;9#8D}L
zgfj}cvwoVqHn{_L&qV+4*Mq-6iVuEJ!*(i+0*hG?)?>?^+`5sW`>t)Fy>Cu)w$|P<
zH*JsI)s5);V7_RD3r06~!S*K!Ie0bdSazH4{tCSenMc98WVFT;?~LcF%?59O&X!3e
zv+t$^uP4207#<CAUK^m|D8{j^=78aJu-9x`ANLq8-FI3)Kh?5`?=sb!UZJAs9~x;1
zFk?PK(k;$0zj+G9KhUWsnGMg_8893kjM}13pqu6)bwR#=RURt1_j{bTikr@5{8_=J
z9fO--ob;5m>SBx;L+s9ab|@oe@?mV|=uD#r%pn61V)ltY(#Ys*YK~LihyP{TIsEct
z5xLfR#@v-O5uFBn=erZwo<z30_OkqRdZ(|Pt(th3jMt^SjRtow=7`TlDqNWJHIH=(
zaX#{<5yP-;@eN-Ca-Rbw%;hXjH>Aub{|72O&!dNzOh{{CVuLxwO#a#ESp>u8JXBY;
z5oX?lsY)xeYaR_5$w|-N)}uu}+oJPCr{X!T(WkqNqTS9@8JuapO?75y?<IZILH5Zs
zhak4rIpHRQjcw}a=3U!FkOw~d&9?2V{wd${Klbv+YM2DT5q1#Y@wUn&`H_%#AG#KZ
zr}?e80J0~v@ohAlel2LCXMk1re4y?g(|SBU9p&F&i5B;$d0u3>%gb0;6tGxh)J5uK
zIU5q{PVw>OmrgTX6@G**vxUR`<u26O^$-c?fxYaf1o6V6nc?`NOG?8@N31)anX7v*
z^K!g24QvhIhC&7Ph3Rm08JCz16MTk07G781-Z^mL8*~g6yZHW)9%f|sYuUg4vF-B;
zJ|g~6BY&im9bHX63gDbyJ8zpX!C)B{lBs+=)|)GJV5MIB?Sb(EJGhbCv9UTjXvTLh
zJNfw@v}l}E#a4YK<#xYfgxNRJA}H|Hzo%IhCz>B23G^U9TUWlie(b1neU0Pjog3Y9
zPrh(_dYo(W`^I@ol`rSjI&xfNtp?}a$HQseUqWk!8l^Xrj|a}6>XN?$qaF3zZ4)rn
z2oIcplgf!+owIWv^6yaUelejr%NXd3<zkaPMx!vXn6O|K+H*1~uh$$&Ra|kWP^#m<
zYu?vfO_K~h4;{+*s8;PTv-;J!g1PDXBa=@x?pm@X)NM<jb;p%?Hp#8c&LSw1ONKmM
zx5VTQHPF3zF?{6dPdE6WGmv4eyvc-LJX{W4BDCU;y1fqFYP;If9!g-6#Jz7O2@c}<
zin$PO$5(*!Hmpy#rAzvJ0>85?UxQ?{|2nRx@r&+vqT}JscRyEADdsC)e*T)_7=#%A
z?{uSJ!v40Vq*i+K3cej}y4nm^nE#R2E*Y&+e|{wQech=ftN32(X0KYkS<8|Hii4eb
zgC{-4Fs?AwY^vZ|?r4@J92aYp;7J{QE{<82MWF8~_s(2;k5(pYhV9|vM?L$^!8nth
z;iA13+UPf1y}t;XYCNtNZ*8>>E$)~PR{2hcI4;m!I&Ln?Ag`-0+z)#!$Yj@I7H)o$
zj%VEFHNK`x<;awk@Y$T%*u{H^xubD;f&&8|5$Ngot=e>kzK-ncjHl;g_+?%*7=HiK
zAkP^L=@<mRG=GG~5h@m&Q;&sL8HPBq_uL`)Y$@&1ZU~)_!Z#RBKOIYh@fs{Y&|)jS
zV5wkZ-f-;AYR(sEDfi6u{!+QRSH1)lr8FE$$iC~x=47qs#`D%&L;Gh@6r{8+)i>KD
z{X;{+R}PeG`q4MxQae0G;LvFy0QVK6^(E{bg4)rILvT60G0nJ8WQ!&&--@=igQJa7
z&2x`&c;@^M_sdm`Uy)EeBJFSdeuOu6O}#niM72P(b}?G-VAwHXuH_u!HZ@<V@0!nH
zo-!0_LUl|mWn0S`pI#xCQaQn!beOGG+)>C2tt>Id%@Q^ENfxYzZMU<0AWn&-E~0O)
zm0VRe{=G>p@$1u)9GhO@`^|byr9-%+bFGi5g!dF5dz{W#Qb!NHOuko)^HR=0tv7$i
zpJq!_@9^<KKH}P91SbkkJ&xhFq&jmjJ}u4tLY+bz2z?5DgZ#hl<>;+v?4zSB+`4^`
z1V~`8#Ae=<Q~AaZ;gKM*&0E6tw0pjk53ljx*xz)-?%^3p{?Htw>8T&Oo{qPMT3!BQ
zS&}<sz5ID9S34^@@=X}RpAXg&Orz+=W^j532ZQnm21SSqwlR2y)_y4tjgvuriZGRl
z(@@URNnCTug{q30ON?WEFvf9*o+1`UFKe8Wc{oXa1}n2Tvl_xaso~-e4Nw%<|CyQB
za;UQF*vjX@d4Y5LTAaJ`u@^s?Co(FUlU<ssI2!OWk*9R_v@nX6FvV*UJm<^C=!_y%
z&0OAVqc9uSY%LY1Dp!sDM`9tEmLFE8y)v<s2HPdojIhL?Si8UMabUxl3zrul2uwF&
z4e%0d)^PS;(+C-2;~J2zW@_-!ZS>wIzzeE-61#@;&wT!d0`M=IcdJQ<P0q`NdgoBS
z+}LT=tP~lMsg}iRvmQ8vbgeM7P{~{>UV#|RoQ7=~d$&tkv^Fx;IDs*$4CYV?@=IKB
z4F2b{@}nOGF-!)iB9RHkiNEn3LVB0lec2ML5y)#G1i70VV^kw<*~Znp@h{`|4i{;r
zIL>O>_8{oN#?v>cI>Xa1U>g0Y7P2ajUh-&7tg-hK^`9(|Cm5wQR+Y&c-zSF^*!6pN
zxQXS{trr6+u^d(#G9QQNVkeEZs79X-jc|0R`%uUa{4f~7v*5KS!_AXW{skm)E{cI6
zSn%)>!H=p3#j9U;xfT+88g@`bgy}whaO!DT$uExRM>F<59d`Zb!O~$j)Dx{}J<-l5
z9tmrRU|3)c=n2YNvYGFoTWlL-?I3(mSp9E?`tUW+Dterq&6GIYvBHHF)0usOjcBYU
zy5`z%quPeB=F~XJ)Sl1mS>FHiQ{esnP@%Bj4Bozd5<Jz;Ca)O#x?P(nzXw@viqWE~
z`TTijXW*mZq;HtYy^kNP&;=cfGjBO!hGnBs_fVO~1$>JGjrK&TVvRG6;hB>7qY>Xp
z{x>BQ@Pmyg`NYU>yhMxeg9pk<Z0HudVgGer9=t7>%Q)Zn*nu@^cJ5!qQL0V>4Hpl_
z{+|~&jH$`yXRjdfXRWNdkzc(N{<`UKRInG}R-Y(QN+TYna8N=%#bA5QV=AV8hg_}Y
zON?LTLgGI@5m#Zd92Qr{NUV}Pr{}ckYi^IzBt~nk!}(TA@#8(}v*j6~FY=x*pTJaL
zE(qewpa}898eDwfG(@w?PDB4cYnw&nHE`cl$i)VshNvXaxdm6+zw}!xnBR)6rt1&;
z-z9zYEQ=Y&aX9PzYR>AsnrYb4{$8Z3fGZ;cX(`OGXCw>rK=Gz(uzuatnlT&S1})ox
zEi?&9b;e2^EZ(0j41)i)p%AqE0|`E#2c#RXm(xwM;pt5pt{^y7MblELtb0&`M9LU)
z4}_Gubxqwt+M{Q!x>QF{cpTCvAQS&q_VP#j=SSVx^9c4NS{!RZhEo{qjms@^<(r7s
z%s<73ugv-;GcX~L_<$x28KJ8-e7WQC3%TEuwjz73?D}U-;Yq<Nb7p2~H2?SUIlb6A
z8X7r`c6VFw*qb^9aqOT$5?2>{r|mAX1TP=`w<Z2YXk}gdvt!f!tg?;vT==eQyw*%3
zIYB8?;T=1~AD>mmXarWCGOwHdh-oZxiQK9o+%6GTl>JHBBki0!?xXTAwh+xcTOKIV
zQ<Wa<c?th(envZbzi2H<d*;NNyjU$imu=ISzsGTb6fQPsUs>l+xvThNO5OBKI{C9s
zkw*k3Wf6QWdgNd#@QmWL#tDPVs96a%&zu~Imi@?+ISw>$;nCxZX<7zf`g%29h?xYj
zUq{~Qm~FCaIIIk@`h_(vX5BPTWQrR$ew-coH!ynTKL+{ti)++BCyi9Kii-f;i^xXO
zaIe#X&7=EYZ}W0Qqjed@A2}kzh0ho9<mN0uX?!hWT+8D$*vRHPW^)x8Bb{m+`nizi
zPHv+qTR&+R#wpKc!k9mPA7Zn;&?BVxMouCPs5ef5YN+Pe7vV>{M7;tFb!ndY>Nyt8
z%pu~XHKL})%$mlRm_9yWE60WU8G~;f#`+P5Z2LwAs8bk@f9o6A<;nRPQb#M`+K9h`
zT`e<kEFuk%_yGrx=JhBgm|yAfwiZfjwZRjFEc-WF22x`0)`F7gNL1tXZE^XB2T7)(
zt2X=6_n2_v?;&OM!pYiZV@(=kj<UwWq(<0w?{8}dqCRYiGf;i>*#u1@>0N=C1y<`G
zcDoblNlAFG6B!|^G&Qn0Sy^8=b>J&_OCyWRmZPq(fb=m_qse(Rj!riN4V5Y}Kj?L|
zj*(g>d`1(Ev7GXYmw(GVYj|HVs@ks}E*|c^QQl5n&MU!}B+dt-Y|r9o|I0jiCLW5h
z&s6aEJZU%xSk=L;Z_szSxEhy9A1pp(Hu%;TJNtu$>ZT+c&0Jg=sbDt6NVK4FC0Og;
za`y1vO8Q11<vN9`T1Z=2l?C83o2CIgRXSH9`8&|#Ra&lm8t|QKbYj@p+EOK>S@hn|
zvC!u<*5MKm5a|D1W2)R0%3lN}pI3@56JB>NU?V+NOUFfRo^Mxw1Uo4S_T`L05{mk?
zskX79*yLX3WW4eY5;B<WLsqmKUw9I1r{TD%L+a;Fgu(&iSl9)st;j^MGwT|f4MWuF
z8pd--1gAyzu&a8j9JSe|Ms(2H0csQGt(jZO=)!o(VvHX2J_UUkX{XL#RawoPE~Eaj
zL9w|+(m_mu6oR;UYZEgW4H56li(ZhYI1+z6{lQ1Ubu!3sY#nC*b5;Ndv<TI0M+F$2
zPmM%<Rhyid#DPozO?mQ7r}>pacNW=ZWPH4^j+!H2Zz#VTR&lL#=hM~(KQG+W%gp1G
z|7Yx_NM53Z_xZ)+8_vneOAIfvnZ{<>l4djJzfsO%>kIMzDfa%w=GLG`)l3YT-sSwH
zS_Wczh=QwMR^<s#&UNvHY~)-kgMJPKMN!c;X2+{t<`7k5-*rgL80iUiV;H`S*)W~=
z3Q%<8S9Y@qy4C|EiSzD41IyXYxSs^qrJnxxy#BPO$OMHV7siJsEN+x=a<V7FqM{7)
zR0LOFa0cXH5?^)uM2hF6L>TwSs=jBl62pNv4ttc+fb%7lpKq#IvxV>;>Mr*6y%S{j
zp)AdpN@)j!jb$8<=+1CRhzLyRSyNhYRAFBvrGcvW?RI<CcNGJg2H97SbIjmC*f9Bo
zZ72fu5D%%R((@=EGaGQtGgZvtGzgM&>hgc%I~l7P%bL$(lbr`DZtkM-B=BBYGf`V0
zGaUt}Y#!+9$<XyLwOHc10s=aq*Z$$*B6t$PhUWA}?%Q`(w9C<L$`nCFj1Or`?$=Zq
zM{u}oc=|OJ+oY;}w?{pMUg976J~I%CJ}KlEs=UHux9#-^#ol6(r|0iArqut6_trar
zzZMzs`E#S6vsG`ZZUGwz*%`~eISjjUDOTgCnG@IaxMeb%uN|#yR?}_v*K2NbqZp^D
z{aE~O`_jms4HAT(P+a#TFB8VzF&O+N-zxtopn8yx;6_XKU#HQt=~+}0er4?94`wEW
zxP_8*s;5k?y%lSQ)_P1R1&AD-mW(pdTilaDazQNR8_Ez0C;i5wT+=vdyXJ}b$uNF<
zJ6}MoCxdF{Q>yzFq)vMyC=}3-BFU1G$;m~-`E1}`s63mk;Y#;gXAjjGvY+2GLo$GH
zGag7R;PO>#in)PgGW5<qy#K_(pB_E$M(8YhMSq~U+3k7^fjo%^^g+B$wi#xNij3p~
z^5_61j-zuxL=bLA$x5VCtJnXSF4Gkb(m~_vgUnS^xt#>pg-d7{u$a$JbGtL1(6JAt
za;M@j8F$t|x_Wj2p(4AeM$@F{em4qq!r~>3#{q8w9u)Wq^!fq!>?&^6<_r0Hz1Tv?
zoWCCRF}ng5gY4&YSqIH{FMQ~0r8}^7k-hc5R|6-VM56oXuSASVLlc^jzm5W(N5%o0
zHk<9XRQ+ax$<27Qvo`6-FGy~(;IUXJ7M=HcN+PROhVj;sV^mwU+x0mkK$AAK9RS%g
ze*Sar);dTm>x5}mF(-ADnl;)ZGlcV4sESJ0GC8^U^704itgzwsb69(F&#mDD$~e}m
z5$u?cq4OyOH*>c-P-72aI`y+QZt4#rcu44c=$tkWX$5iBGyk3!jRrC%Lv(Y!Ab0W)
zLRXPThN*~Mm}H<Agz?&b*lvU~dQqBr$$AoXY!yA{eunP2TrA_aLO5|3(J?e@&Z&yx
zc|F0sA7(8o3u8|GNkY=rLb+3*MXHMCjX)|Hh6TImaqqT4M@-|ov2g)(qY@9w=oYH`
zQrR;}&Lenr%eKdr9{27Zui(C*5=R?WhB61sxo2GkXbV@nL}Jdh+#Dk|0s6DjU<k*~
zKC~AFBOIPi?FBA#Pzo?6efVpDh)l$Dv&>ui%*sgUDUkBBr?%2{O1<^Pro(yc2l{)2
z%&f>EmMEc7fGp3mds)m-oH@D$EK=SVyMyQ5qOcAMx=#$2Vg`Z^Ruty_{mmBl>n$E-
zYl|<43dcfUt}Q!!k!(jkUi=u0XMh0K?KSigsBtpee4bMJtN}<pYj??U4axae+fxWY
zdnZ~vJoF#E3dKD-jkwl?t^=a0#FK99@`-(+SW9blMC`T1-9q@$#vK?-`^VdQB@h#-
z#zmV>UTv?lk=)1ZBQa$^?f#RQ|Hm6+KJ3VO0!*biDQx+GqUHDB-#jb>jax8WP(N?D
zNhLhl6a`A-5iAHfL`&mL-i+T0B8vlCE-2k7dugmuP5t}@kkqk?Co%`mR+(TbiJ_B;
zNp*iMX}-!=bJE9D7(gT#F}|!>u*Y3+Ic&0B3*H#H$EMZtJQl=aF;^>no-?&7|5otX
zkq)hQ5`!&}w~n@Y!3m!hA1qtfZ36+mEWxWbT@P^LPC@%np4oh-4>4_Tpz~3ZF&H+i
zWi{wThxZVU-!8IB<jVHtsW&ixy!q9^sM{UnUp+?u<Q}l*eEkVrZq7=|e->OS2rH6Y
zGA9I|1IMvHyF5h4^ag0;HfauLD#-hqs1_Odd|yak7%C{l<Rhy^F1rxNTq@+GYUYgv
zC54iXi~s^2O-;YSgqa$GW|qtP>In=kdaI4$T33Pgq)qOtB*UCf?i%(-h1%+|ag*tT
z4x5kNp(_#q5h8sQTD7|oJh~53FJCt(#cRf|R%EXg*qk<v>|4m9tON)0MOFH=Wpzc<
zU<8kM#p1{0j@Qqkh4XbF@yF$;@$q&AZXkU$&gR5`eTo6+tsEA+O`dEk^H>tMqS{Lz
ztaB8V6XizWRM!aiy;#knv&k>oU6mKitJ1H8S@#Mn-Z3zYSMC4?p1;Cih<FAkzrI5N
zQ=PwUk<R6OKfTH2BEM*k-ENn^yh~Xt>}M%8;Cos#C^k0q8?Z{eeH^V{V_+nhn_<jD
zYm4(g6ga+ZcYD_zE#-^7!eqTJBny{ly5z7AMsSQ^`xJzqZ;$$jh=^>1UJ#Z5Ih+Ak
z%5fS?PoM;l3%&#t#bXST|5SEXq_lb>ZM1jT7jJSe;mBx;F{j#y@cy?l=ONlbh-=!H
zxDwX&t|ry`vr^qNwNy^XKXv>t>8eW|yuwAe-Wg|4MdW{fnngjgH6(;>VP<bc&X!nN
zbpACG^B4)|7dE@LH{P#i$0MB$aS~s|3I^VG+Y%}Ls>WrQd}2*b_TPcr)~ti(0y2yI
z#}W1Ug3;y+_2G`hH&=TlZM!|UD%EL`W<%>J4kW#Gd^%zgmIrlP5sstqPw0k)S{$Fw
zd@2CRseE$}|A>%&SKw#yc|X852SdIkFhbDEQ`<)O=i^%?!UtJoZaW*cfaV>!FUEx4
zogUt`F8W1*KihOLs7M=%l!o>=^p5+f_Yd!a#`r-+%0?giGNCfxaLB{}8R^&HjBaw8
zzBQ_6tdVsQxq&V|sNSGuU$XMiE#=kW-+J;?q0+jLq)2tgSH5)no}~rDwpXneinQ6T
zLF@h*-QHi?N!NuwFSc>TUuQko`98*g-3j@I7s8e+k87k$#3oB@8Ay_;7Q_rfrTmk4
zxTi0_!~L|eww?&IQ8$}1s0{j0rK0icyFy>=i2+cy<J$QO|HA67a(uBrTNS8MZ3=lM
zklb&t5{%2D&>gQE!bY}xC%;3q)~}#o{Y0Y0{Wj_yUALN5*J}!zxs-2PVP$TKNo6EY
z@__VOQGeS1I>1li!$sW8qwwDH2)~5`KA-rqsw6A?B;Qq|Nmsa@$cni2LNaXG;jZvA
z&|db}wK<}*f@Xy(yP+fY`-H8zxyYvzLMB_5V;S?p1J(u$6K-yecAl>P+}y(td99NG
zfabUeLO=3uC75AHeK@<FCO5o58I!s5mom0aaJX6jTP!DGQY=9%5~L5ZNUrzeP3Fh@
zm+{69PONE_&gFYkC5lhKJPoO~-Vm{)y^&W`7XTsrc^|W)U|4MS=#zqt$DtP4u15yp
zHd3F=OvrTa_cZS_42BXO7obj7VG*7mE=iv4O@-#C$_MLH(j58H1u>j#jpQ1px+#2%
zr7C8&SS)-Y6^z4_0n&~y!)#l4_cOE#J7GfBh0EGK*QFFDS{trNAN5>5(xEkjETF&Q
zLGe~;yEB&cE2WL#rkjG~vDYpMn5=T8MBX73yvts^jBMHOzC)EbVT)zZuk~eMB_8qf
zQ#&FQLQve1MI7C9`O#$QPN1IuXBNP0?4uN<>tc0!<Ckk<^u@-<!|%ab4f__oD>}hm
zzsvL-o8cjmc<ZYhlt)LVRkU712EjEd2sge+eJqmGTRcpZg05O^Dm~|jz1Ba2b`zNF
z%tp~2pZ~Bld%9IO`}?M6^sMuB_-*W+)=g$}%y!iQNhhdSdK^{DK2vaeMg5wtM_slv
z`myh=9zT9;3q$~scz4viwkUAXSQbw}!Y)HOk|^!-cKSZQzoYm<yKq433@iy~E|S#k
z3CSLyX;FLK1UjisCuAi@fzr>Ak@r8Wj@295>l|u^r{e1B+dp<l?}kN^nL1jwN+{TV
zO>wW%6MgwVxD5P5TzpSwB{miV5UHszvrw_`2FdOuF7>9whbv8{rrNTBN=B@^)^}nM
zip%Bi(NJMy{@Y&!q@sEX4q>rr4<WD0XtbTX(sU;}I$B^9%Rn!&L~2f|i+H3&3_f0I
z-EUm+WtL{C77RX@E35aYuEjCvk$D%IrBY=ajZUn}`g6XnLM`43wciNcoJ(+NS#J0a
z#R-?iT&_D7<l`-B=yfqg!+Pr6_cu1ri)p4<w}~;_y!qv>_2r$W&$xnK+kQ9+)+@Zb
zy&eZbs||}Q5DF<Jnd&5e5|kphEGVK}&4mNc!%?g<w99Fzbk1q*;{40vEbub+%BTLI
zj^chea_w4Fdq`qtZ1iYQIRL$#hlm(<l@6{X-d`$RIsmn`B3uVCWqF**WhQx<g0JZn
z8F0`~I1(mU2@Er#at-&66tkSEjRH@V2&G32E$L+RUEfoo!-b6aubt&~cH>%%x>Km#
zdqt4a7f|-LEhuWbRk4q(EV>+0u@qvFu5-SsAQJSEFHn&G9#K^C$;<&Gu<<$Em-qex
z?yPQIy8OeQP9S^j2*OdG1fy2Pa78Xb3-T`aIBBVLBD0whNSC|>C}<2_dNCvo-psoW
z&f@BqN5^7mbn*!f$0NJ|SK$@U4x4rbyq?IHr!=0eglO4KyB1!3d98vJEx!`pd!fzw
zql<n?=#=$xJ#v5vJ+v64?}>sTc*@0inEzpaXkaVM59;&u3ZS1IObmw!M(xZRyS>!@
z+v`jaX`PpIFd2j>&xU(?_?14|Kp(adv=u$HWuL5feD%86#jnxN{-ln9;M`YRUQ{;P
za0H%2pXG7&oTVFNmug;AfZIc&>N;4q9Ci~owlK+}+-Q{8dF3nScf}HoO1Eo!m1b8q
zCUC3NyrT@cujhkKt4X_2gQf|jMH4j^np_IGt7yW6m*s9C*K}9*B^Up2eDleh{sDE#
z$jGqHF6%|kEqR71cz<`%@}7pKWFAAeUAsF7yFX1@;u{L)Lr3QV5OcomrX{%n8K?1F
z?VtFF0PhFZ9rP4Nf+Dfp0R~8T$!6^LK+r@sFD|A`426(WzluL>xo=i!0EJSN30Zes
z43%m=Rd&Y0*aro-{`XRFnEw`9s+)dE=?q0Op-8o6%%r3Vqrm^mwjWImnhtgvKd>~L
zSRF)y4qKbtLe9BD)jt_l_{>t`Wcmk27*3n?nY1VHs{#PUJ0eD`167J)2dGlit~yvF
z)%N1|E9e?D(Z^Ux5W}r*y5z_&AQV@2sL}iHh!$j58Cv~OeKYBGqjCf_f>_3?jRq9u
zTw38tNi+i4tycN_dwZphdPzd%G}8v&EM^J@XMmL7KY_(ULmNOVy`n!rPm8RRnEz)1
zK+HoxM%u|ef3p<@Z*RKP{4to|V}kSfe%{Lvi>cyd`U9avrS-@CF9!n^2S7U~FX-RR
zX<@~<TseK+#>U3>mVg9w-{JQ$>pm2H$#ziXw=cf(Wof(x7hK8hcj4wprgBYWenyU9
zFrcnoh6No2H8(cT2K_I(AG!o5s~rv(`vX6}V16iA)m?s8d5Pa%_HV9PU`2&H^|@Kj
zLUJEZ{-)NaWT?aBUo!esc*V~k$T_R<^)vC3-;vhq0ax8g5-KWj><?W+6)r5e?Vv^O
zPaf6^3tK0PSUkzNMgAYuJ(*6|OUeKLAe>&-@D%Qw8pSx6L}60!DrL;7-i%k#C8|%O
z^F+@Nq*PP-(l?l}IUA~ftX$Ga_a)Hu${)V7)?+57$|PV@!#3_<u*N*XFu495l%T$T
zx5onDWncE19QGRez?e`|)wfT(0&~TH@Uw5nR)ONv%Q}E_ezXJ4h2AhE7)}Oo0hr$$
zfB3hN&~_4YCpG(;9yz1(1t%em=gE@y4-PWs6H6?Z*UY|@PzqQfpQyI-g4IW?0}5EF
zo)G*8VWXG)o0ITCa=k59xwi>iR&hFy!@s!_Do8=Sy_Ggi+De(obk6r@jQ8i~=f8W@
zM9|b^Rk+Hl>7uu`5{2mitT}ty`!n5w7fs>rzQ~k2(U`F)3zx1z<Wwedl8r1Tm&ubi
z3b21+yJ*yDD~}omyd7-5=``mj9w;*r+>V1peicGCIc94W>CL||Fq@$3;jcC2$ey14
z^D<KC@W0T~$iC|iDk1dZUTAu=TWs!C^Xl)?O9wIdnx`zZ$Q<Q|=B@Y82pv0@S#VHN
zHDR#fR_9c%4X2$_Z&_=YN<I7gQTD`-)`F515bM1kee0nK%BV%9K*9QTI;Fg0x7Qb=
z&*l|vn)V*rv}n$1NERky>%?g@dh}_pAHkGxVS`c8`R9EYWu}|Tz=0j!S*W#96;9Zu
z*p@nFfG~HZU!#l1NsOtgy(hQ-{5Cb$?~xqj5*4L7y~_XOYr7;`?&S_=!J*+iL^!b3
zv(OKYIy$y}yDmNfZFaR!{$zDR>af4!gcquM22DZ$)|Q}~+Dp=~OJK#er#MWTc{&Ms
z`GP@Sm5tLze$8)j17Uw~&#5J+?m*JQ!8j(<DTJ??T>-;ESRdEQFR+qbyjMB*TZf%o
zD9OA>;ckw_)v?>bh0t1KS7=mhOrs)g0NLTJj)6Q0n0aG-^J7j07T|M5_%LHUt?U4x
zqrBe)bDRF{z<<7wC!uvw8a729eWaf2rXO+2#Bt$dPTe5*5}qfG3}K((x`rGNQuWy|
z`S-=F1l`~-I7(g_O~&Jht01Og1GbPs4Udu{Cr;$U;p;g>rabQMzfb+hi|65Y!|jDX
z_2MT6LA0GUQiu{>{qG6PAS2Q=$JVqESir^y-_aokYDY5D_oI-18lUB&L5!1z#SZhD
zt^CmYl0*yc)hHsARN>Y5K;$X8$oi8uf|~7v)%$^M=I<B~NTVBeqU<FvlT7qQOpI0j
z({=rZiorz^A%`6G78|h<RaviO$HF!fs@DoQHbF4}S~mKY?#B60g@tT6h9SYqzX5+B
z&f<W4cwqDYw>m^=&j#!3_HrVMR0!S6T%hye3(tYNfG2Ge7|?i8L&u6UXT_FXqlvOF
zeg8Y%Aje^UuN=mFs$o`|JpN<n0h!M@`u4bSJC^JO=uS-Bln<f+@Cw_72LxmO63lr*
z%hdvfkiO$EJWLzu9}p41hs%M2E=8D?;xAzx4<dB&i{b<h*1Z`Ff_pECl@vgzpo?`w
zR}0yjeg-a`4aRkjMzv&PAg>K!e<LeelKFz}4u?%waFB*#BdfAqC|e?#Z;fHJv_l-D
zE@U@exP>adRh;H{K)5|<B|dnI1_2XOHXjX?Uh?gJ!x-qaC;X+3&-~$F2A%;di*{Ja
z7t{Nn+x+W1iR_-OOT5R*VyhhS6MNrFNb6+PG!f5c@ErD>_}6uu`Aof_Tk=o?yKprH
zqCdM!ckf?N{yRPMh$CCTXtxj>jomQDLK3m$`fMP4<zL`Ni21=<BF#eLB#%*O8#J|(
zgj3GC#2$Vz5h;Qe;a!xPy;!|3!g5`~%3~qV6&rG@y)?+|tL%WBGf5jgYJ5hB{0O`h
z)q8nxA5+wTidm${lp5T4f~`G7rAraE`y40Y>NtBYF1(TA9n*AFj!vAk*|Y)4@mO-P
z*Cg{d94sMDgJmXJxMe!Mc65*g{t}Ind9HBN7b7#?qh}`Ay6^tgs@ov&nce?^^1=B~
z`?Y_Ky)NXEZ1>${J3SmayfT4-lv0kTIfg40=FQ`c@*@d?X?&)l|9d!QMe)(DD>idu
zlym!U*`qk=VftpdV_&*%+C?E%d4<F8FT_O~XnAv<G5uX{nJB<I3y*%S*{Re{D%)x2
zHTzp6{(%1_plihZ9mdNMWD%G&<>z6l+!d4h`%E^jkIn3yZO(tKDJp_t;L!-n@dfnl
z6Bx^9l@5Bx!#?E#*tiBlx+eiy6j)CB*3wTB%WVv@yrM)^HJeHbB!&mC)(z^$h?U`n
zCxxKVU5)u7U{aJkLPkq8pqK1b-xHSew2J)kBlh4j?EmkigrmlNY#i8gHST)kUehqL
z-FA8Q^n4%RT#W>5u@D|?{gkYtL)P_hgm%?LJiV{8PHA}B3l<nB&AbxMG>x;3YAz>&
zmW!lOy!Zd0bUAWBBg>JS1<A|9(hkc-6J7mf<KN=J0GPp26!xo(Cvd2*l-5OZSV6Mi
z2-ud`T}Vh>mWg`prM$(GOaGlneLB0~L!5&5GY@#=(onh==-6U;YW<2F3)I9;sk$#9
zYK12r5hAp}p?Nb#aatHCsYDbaa6%*O^yy0SAJic7-rE+W5AuX81mz<#ErTRF_l|&z
zY5ADLyESqOjobHiEHoq|!$Vnwiv@bd%4dBuwMccdwz;SK6@8P9y3sw}kNzunekj_1
z!5b{h+l_lRPP(a4ZFkO)l*js61gn7m!68Xwduc@a^{nbtg{$Iu%VSy%_}@WvrpE87
zq`I*bCM<%&s|88oJ;Q0(_}494iBxq11HAs)<QE|Ls72oAZ{6>R`LYBpT;C$K{avxu
z*QEdTy~%B>gth36KEi8i$8Tyy4bo8M8kyNq^0?~HZ~m+{kM)C^psy+=F8Cu5*H-kf
zE9dDSWJTu=)#vTCYsEjv{M63nE!m2TNIeaQRZ`xQ53S5^e{8&lH&h{0RhgcZIT(yT
zW(<mbBA9QCBpRZ&Et45}@xz>Xn3W~}>g_EdSW#PTx*`g2x}^2LplH2;VUUDJ?BaX%
zOi2VBFL9-{paZM%HV4AY+maWEkT|+b5<$Ej5;uV-|0P2HqYv1Jw<a&z&1FkQ^!@99
z=u#=b9p^+^<ghmmN7q-)nt{E08%4d-V=@U>_8e<egIQuh%xJ-lSzb<=8p+}v?o8J;
z7(4aXZX8yg#ec21@AIP+hQ6AdK2DN3EW<{>`wTdn1G0;ezyWB-O2+AMuB^%yLzF{M
zrUbhD*2hvlYzq;nD{q&{<_S##myg)!MM<P#4_Q9L_IqnRd}*?$Y=4hx7O^KyO@uKU
z9d;rLfK(&Dmh%k3$SUFuk$bch@<UYa8L29I(u5114A<p}w$kikynsj52B#J=@M&~3
z$~)+S4dlX?BbmxY&uI0MIECSk!BFHxhK$#g9xaHGl|~+9HZy0D4bvUgk*q)i@eR20
zg8y>mE;D9FC@4>1V^D!heW=hH)xD7zbi(p3LRr2Kx01ih+<UxUxD>&tm=F*VOP?3X
zJO+KHGmonLD~-;d=)WQLM_#?pemT+a?9|&(w^W?D4E#b}{)s1HHoE++pk9#daS?D?
z#tA%uN0bT`JNjOR*T$h_DKr|$g!mmAj1NmsCSA2*EgbxPy_7Rtb=;=o_uqBT6K~zu
zcQ8h8+eeT0oViV~?2hLn1UK!{b$B2cB)f1-;@{9%Gn>l4SA~$6Wrtqj_`CRf3F;9~
z$MOIKZW|1QC~+eX`;w4qJ0^U3cRk%jgb=GB-e*3N<XZQ&q$c9byt5H48h8nQb!?L}
z>#7Nwm8Cov`LO(PjtYx|^72n?&sPP(9dPqMa)PvKa<^eF&b>c1{g$dT8|*|BtY(Ou
z_+XrE7m@FJ#EZs8Of$3cttVlnDrVoN>*gWjb7am)|5|-S=4m#xDvotr^fv5QcYH)@
zY?$e1gD-_Pcf88EFPNxFB!ap8xZMTMP~&%Iwxqzh<NrXS*71OcVbD;|^`!&lGvMMf
zbt1`{e?r^OKC9b;+crgd*H&n%P(F~Roe(Mk3JCjbcQO!^u;|gI%IGwQ^1)$A@V^F0
zx9W8i9@Y|t%4?>>W)X|g?(C7D1kGiWq3u;2KTYEs6=fp;O-oh4CxmF0nQr-fJfNEY
z-G?5JKU|Q_HXi*+$5PuQn7;A%^WXR7*~&ZAhB~#<94S`$E&+L#KktR6DqRFpj6?6e
z{%WRpkbtCPM%`yu%LwI}=)=MNUQ^)5?Xt)XGm|n%TmBm{vsY-Tm1-DP|MQ#f${j!(
ziGyw!jMtc$1-058PjFbw@j#XSjoo&e2N9E+@h7I}%H~kwdv0#-m}2R(yke^inf}%8
zKE}_Kn=*28SwTSqF)N1@l#VK{4WHCrJxGIImHv1I{{f_o!^C0CC=CCBc}!-y@W3^G
z$b*EVHmOK*k`%9x21zHaq0@Rep{iG~PS0}*?~cUBL|<OluprKF2DJpp&Ls=yXE;JF
zH5@QyR(wN|*^+k9qmpnE1<sIO0BS=wIDB6KP#aB9%9*MML}=fp`)2>)p^-Q@O9Tx!
zf&aI*P+l2tTggz_x~=HHdWZc%)Vnu^uJHN)(HeAm(2f?XH*ACXN11`t_5OmP1ZI6w
z09?q~8P8QErc4Uk#UW36c~mT#!=^$PJ2)tYme~miU2Ka}1e&GiDG1!c@1FgOw{&R#
zBfHV4abKKYVdL(6_hbQ_KQvFCj<UedyAoU59eYdM+cOp4F6aTu-=AR}w_+k3+~{yf
zqX9WQ!{C}HI&9chI{+6xO8u1#5;-AgsaCA3&*>$_uf*Z8)_gfidarStr&$7A-)GzI
zpcX#=pd?<B@)t7^^$89yMrRXN!2qrl=Zgbz(2}1HgfGh2#`(mT%WXdLI|Nos;CXbu
zuKu8n-!z(38yg#2<(YLSE?R}O#FP7VA8#mF55iYGg1LThim6or3gd5RFixG!^oBE*
z-T#CdX6{<NsY$>qshwF$L`&mrQMv0~Z&Pc&zAsyiU&S|>&1N#y><OQoPS>9xed;m4
zeO4Z-HtK#%FuP-SIOFJ9!!WmQg}JzRx*`uXL)C_x2FS4S>SaZn4sVw+h+vs2R=TAk
z&*Gej<;EUQ5=rU)!9(eWYguB=ieA;?E6YP3_dmzW%(w11<36Ze+XgVEoCf2{@WynF
z1)<yJMr<VzD1@zMI6?5fHA=w=Fbo6Se-di;fLy+_dfl(PBg-ok=Pk5JFof5IXx!%g
z_!Jz4uh>kk@|;_EeQM7U5g`Dl+a*9(*FkDmsNO}V!6ji$mLS2UukzLwT9KA}+ukHC
zCB<U`CKBeIrFz^6i^fuKP&HAqGt@7bUjax8veq4(bJq5WB;LX@wf_{+Jk;cG)LDN^
z>nPJkx<1y7L!V9L(YoFRCcMSOy?*BDiGso>+EK=Bd+~~(woz}Zek!2a{lsNO@-PPP
z=15$9ETd@YPNUpp!Nj~c);`Z5u1y{GfAc&A^xCk;{fRBDzcs$|0tO&kw6M7J`pD)~
zUAMkqd$D4&Tw5UKjbe8x@xw5P`c6wP&9GhIwZ^%xwll4Vet)gITaT37eVE1J4hDej
z0*at|v0Ps?d8A;}<7$W8V-E0qYLkZGj=QNk*%H~@5bk3PvA;ToXZ`%_A-1|C5sLu+
zYjVH6x{5bU$*VD$lCXdl#)CPAB~OK^8iKMVl`S+6Rs(6E<dP|PYK5QKFN`*S0|b3L
zH`zX6F-IIXxh7Y3F9PT&O4RDg+kdA&&q%Ha>Y`Z^?<!KM60w;09^OJdR@VsNc4sgp
z;Y?QYqdeHt6Ztzp+>U$P9DQ#2(Dy}$(he{JqnDI)O#nBfh;{*(<@pr=&N3?u<3zI6
zE&Zd@UERh@wfPp-j^G0)T5iu(&kmszd9suf!fnci$=L%*!BXQFzLvK@sc{AXMUBJB
zg4jiXg*6$Dr1&}qG~CP)jWiabrpjmUs@9knwSZo=e0*H}Q|U~>9T_q+;71D&#F^KB
zZ(zj>ZJAVt@^e=hh9KGQE>YGY7rpErNs{7@Y>ysjBB4{modC{T3AAn`S8<~A&U*G0
zsBTG6O*8538mkj;vHW4=h_|2M3sp;XkFd}G!&UyJ_gcEJm=y<JzSw;&=*C>#f%FBM
ztzRQ*C`)3O{8y>oznuKAYaOnSWvKUp^3&UHZ0yf3ut&JAP3zv=AKw2zy50gPtF~?1
z7DVY%k?zh*Iwho2B&EB%yQRCkySp2tF1n?=yBog6?frc3^M3RHGmg$U%D}bOI@fU=
z=eBR>^LKm>+kz2|&Uoth_zJ-6M5joG4v+yU&XFpaSv3U8PMMo>EF<SzgI9piWjFlm
z_0Pumo%0FR5D6zICp3Y+iF|JqIv73P0Q=+E;U6LD+7$3CZvxIl5l9XNb@J33tA1|d
zgj)4t&@8I9-S1O>B_`%Kuz<;K9KllsJEA_yeEp=dM{N`LP?eMO8zg+J(pWw@IVl;+
zD6d7a5wQ0CxN!9gV3E_KIaq1;u#6p12T+LgY`~y-^4yfv#`^2vci_h>z$d7tQE#5A
z0U{>8db#c6xda40nTxK#20nC8YOCSp>;8<#eqsJ4fXWq`LnGlx3l(6SH*}(Ge9um`
zAh~~sZSczmnDbo$0p?|%vRKRw1F3>^(KYZ{7ZncflozinP;moujie}N=VkP=#lqrt
z76e8i@cX1JJ`mlBx~#L-A_LIFfPWIiH$j<We-mD$_pFkMf202w5_~Ov4ItyI&O3V(
zUwo4y&a#HU@fK^kq}f(NAz$iO++7-t2F0zb2{Z;+&F1BH%tpZS{w}Z}#Ozo_DaQ^$
zy^R!WFy;L(*pHCTF?zG*8s)P^^2OeYLNspRoVcxTm?&APE_ybfEHZ#8Zl39?Cys?|
zJ~BQxSH^EH?7@^$mXZG8dp@7{iPRW!SV5%~5$wtD*65Z6*7-i-6oD;QJG|~2tTw!R
z_JO_qC^~fk$t0%nEhSh4eAiPi1}vz!>C09^)t)h3{RGsnxv%6B$UvImvVAWvIVN7!
z-M#I{=Jy^;Qol;g_v0h1AD+BJ8LL=`g+fjbwC#Ft6F=@7d;tILfYyaIPd8)ZvEL_l
z#Dp?h^jXM>ybhzhc44@fV$z%}DG&q86c0<aK82>@I+L*iBM%_rWuKfnXb#7}5<?a-
zW-wx?bS27DAxj%^(}83?V_`I)?R0Z;Iv7b`#8_iI9kfZ~-;rUoy`wOQiT4PZbgM^q
z2LIear*)w)$cTLi+$p5X?t$Eor~K8%9NP&x0f;{?PrH#P0T4z$`R9EC9I~w8*5Z~c
zCOf}>YnFs?3y^N#1Ly(fQbxF9osr(3Yck7NC0%alWp0*Bk9QYFXX`z`d%aev>}VI~
zWOKizq9Hg@vukI+3B_dW1$h7#Qxb#i&CN{|i&CN2L3^s}`M|UDGIt1Fi?qEN4Sb-b
z1p;v34F_<K=gCQ%Z}vz2RvCR4`=OE*u@V}wGG5x3V;L{>Pj|o{@qjRneF_gZnu}5i
zo_5O-E?~tr9>QeQhx!lL5J3W^AHI7(lTla<&1UNy=rQy#lC*{^C5~h`OFHn09uY-E
z1oB2rJoGkq;+=Wc34W>+Pa;KOXj|Mp8?J3#-ehEx5H+&X?pD737+-W<p5R(9h7EIS
z5qn{=kucq*ag9|M6T=Jf7di5nziqxJFY`a8W@xt#1xi2=&>ifDihaYu=ESZ)6L$d0
zu2Dtijptekg;fmsSZq-br_3Q0FJCh%=;p!TQ{d%HxzQ1ZtJj-Tfy*zaO#9t0l7H)N
z2)+p57y^FWfr8-aq>n=0S6<TJ_a-Um9RrG3LT(LT9Hi>LUjd`DKS24xK|$Y&6hGRq
z$O7<L*^l1DgR*8RTq27ZGS$Qil((4}PFb)DUt%6a5I$NLw4(mr?J)osh|@>B!-nl)
zBYx`gO=@6mvPZmgFlUnwZ1wpzlb@T5nyx3fP;Y@eCqzY*XpwXO%4uDuzdZT_Vo}7X
zvyb!u?Jb~+wi^S)4i`yR;Qqs;aqI=paZU`YC;FV93@xX+GS=>d0QiK>QkT)NIstyJ
z$tCp59X+vRwmRTcZu2wdvBe#>F}7`;<zH!T{q1il(>jl&JY*;7vyiN&$nw4&;q-I{
zi&Q_7hq5IS@mpp)69e{_0r2e!zod4QB>W_V*MGI?jw3%|K4z^;bsUyfo7%*obWZ1S
zHhL&!*-|Q9ywkHqe5qoV$rX%4)eYH4aQ(vZbY$UC{3Qp&Y<ncN7=ghx49aeALeS}5
zzqsPFPQ$kcFkDF-Iu{;35&q0u-uYmEPsiC)tz!~(-pWf@Jk}_a;%&NncL27v9UzVW
zg`?2&@s-kbr3*Dm-cPFDTZQm-eIId7cQ@PqEa(dJH5E{`{8mXF4<t{+7^AXP*tP<Q
z9Y1rFtX?=j*WK2MyXsNyY@k{wjQovZ(}Sj1@aVLYn%|P_c))1aYImTZ$8@mN?C`Gi
z!`^a1hH3(&lY7^@Le>X|uA;m5VXXi`hs9!=;GDO07xy1XLqz?b?NKA%<$dmv?PJ-U
z#i^s@%~VSL_mIrrHTwvgs-#ejln{4~7|p71Y;$MDPeRkBK+sd5ZL?OaDldJS_)r82
z)?o#f-q%>yF-4_n!;A5LCmu6&YWc*)7L_H$zXcbp(3{>wC>4}~k8z%J==iW3>;Q|;
z0t|3OB{Lj{UC$4nV}aa+Z>qFjED^shCSNbm(Hm5b0TuWlVj}=^M3p9&HSJS5Z^bH3
zu}BKNb-Keet58mxyM5du@ttQGFa+U|pLhLnG{O<b=U2!njI}|}j>4dSWjr_nE3<(u
zB*N+9Fm`_&t^&a2`uVs2K=Dzhy6c&so^2FuC$M!V<=QJLeQ+M%)6?^NCn*LLvI>Ut
zV+i5GUq{pd>Vf3iR|Ld}K*il`xkU+c03b#M0NpF>D|rfbL@ChDTEg~f>LIoO@Q){5
zPImgBfhS^<!JXl;iCpoI6E5#Hn6QfdK0y_Q1MU014sLnmdnvVXm*WcamG^_A0NlS_
zHR?TRrB)4e!P7J^jv?Ii=|};pEOjvn13;opl9hUgr}2D%FM4?2dyop7xS>@Q8H0C(
z3H|HNw2>p;iK#yDBxBxuKZ%iA`q`)K(pq!aIJ;ZkaqWPZ*ZYzr+4eN_mP%+##%JkZ
zw<Csh%P9c*$GDU=&kcm{au)$_Sgg|7&yP-y&m}o<N&`SpMTnt9`+NwZH*r&GP%a@b
zaYtgR-LwZjrr1f?y7pMeSe&su8OiMuzlifzLmo>?7iBPtph_=fAMSkX1=vHLQc&tm
zuvUP2(au-553<TD>NIxBykvjvJswWlk>yLlJrz@#{Lt2ot}isb6Xb_)_+k}CQ;{1K
zL22Eos2_5^`40Zvq4A@2y2A}8SpNCCu?ut@YM5!d51=zkSae-@R(cvC>=I21opY_L
zR~)5qZEGkG01W2abJHq55GP?+{600ZUzQ+R#vZQ<$!ndyVD81^apwxf<&2|if3%jr
z1h{N!AK*>%KM0DZEU=XdH$ptYjlqz+8mirT+WUz5^=8hNN*Wu8H+$A6xZCJg>=6@i
zIpS`e;a^{nOJv_c^w8^ZPG=G3Olq?rm4+#F_{>i5E}LrNq=#N04zi7P@PVx7T6|Ru
zaXc(?pTCr+@};kMAZ~Fp_`o6DIUr<|gfPud(D(~YemHlFl~*26|7M}jlu7T3`HU>X
zk0JB73yL9<K8Ds78^(#QC5EVeVAcv@gk(A>sp|ft$63{_(Tc@ru!r4{tCVq=;43u#
zL)~~8aI28uAqYj7RaaG2#mt8MP*9@6qxNGJvs?gnxd*?Lb%)vjT8T-3>Jaot#%D=V
z6ahLL;wXEefZXtL)+na9Mnx83_A%~79O_Bx8TLViADj*fw~i8s=&!sBoxU^J?FJxL
zifuxv7+V4&kAUk#dIl9l*8f-xmhO$NmeyVV9~LGB%bHdtb-?Bf5M;|*x&aqui%j!O
z5CC8VeE%;1Lv<7G7(UQMqTIp#7+6O%MvnG6y*;5qb~ZN>w+~F~q`Sh(f11Qe6o-~n
zo}O%D6hj<78_MnF5M~e;Cc9d*cQ3C80;E{!h&J)E`P9UF8cNvD7Z)?ysp<tKYL;KI
zwaPT3$JL-)ZTB0DY?pwD=?t^~u4CNnb`8sA!npcI>AWhE_FF5Zf@Hz`i}LGowwm)T
z*I)W%_4_d_sh{I(%i2;LhdNC1?j3bVfOfxHE4^0io#e2(eSTL9y|Bv%U@)2O@_@DD
zl#lBMsh9F|`-k;yw1E!8Sb&z$Jr<vC(BVFeHOkGjqDJY6#;f#a5Rnq}j5r!)jpj-S
z+EFZ3MpKojvXd=18qtna)bgD0w%iEF_jZ)!x)@^2TLKDRfpht%r<;|N^6=d7b{hav
z+E$;#31{ENaK1O1q)jIt3y2XGu1OK7Fl7XY`zkn{eoUgO*cc@k)_CdndAOYR5spf~
znxI3ibU7<uEBv>sp;aBWzzG6iYMKg(^QvumJ;VN((&68yL}~QaBNCcyH;XpY_WINc
zB^iLq>&IlC<lEv{FWM;_9jB3!6h(GH+NOXlr2ySsU~1B+%6O*P1XKY=oBd+-te{!s
ze=(due$5Wg+0?M1kU7%?BW=T;&q?-3`E@Ywv3y{Fl%6W2lYnV^hQ~hMxknGhf+sFr
zmX3o(esgirU4n@SlEl)(u5FNUyO|z=^_Z+P?9%BJ!o2xz<o_eOaQR1c!8~6O6UyA;
z@rU}*Z1S=Wo}#<J<S0*;z5NTw?`RqU^WXSNRI=yX;PGY}y}p6LzhMt3^HTU@vf1%m
z{FAdCMqtG0cRkuBp2{bS=?ZK>&Wqtcm#8zND-kiKX}vr@)%^c4J{2<LphX~Cy>;V<
zdh!csSdszrl8JL!@MR=un$`5rYP_D~#BZWe5DCRcbDXn@h{msge(e&NCEVMgqw+kS
z`b?;`RRDvV5EOCzYzs@}d?{SO*VEo(Nn?Y|DvT++eGBOGlF!f2!DftPCICo{php8U
zLwm|G!;Xy05kn@KBmj(Au7ZL1Pwo8jEADd>|4#qgu%P<XAcsW47cXF1^Q89sqhMWO
z6#DUb)K}F4`Qpz27#e2YawfEusa<2&yu@TMg3|<?L{JD!1ca;_Yl;vMwW@08fW^D~
z_z8f8FnXxabR-7jXs17`psPD-HF7kH{C3Iqj{5xhH$V#c$M3iT+Jeh@lky%aBDyDF
z(5YJS4LBE<6xbW5N&8i5n`*b{`zUi+0|&Q%*^`poL{lgo=KYJ%(|}GZE=vgQ3?UbL
z&a=|!QR1FKI+A>QjD>nccMbO!^qIzZH872h+Bxdys8V0(YZpRmV@Wwj!qrX$#2=}Y
zRWo>57Iz#fbqw$kOFECX$|DPzw8mo;t1A@Hsoe%Et|((=%GIYoixn0zhJ=9R&mY4|
z8D9BUl<<q0#CYC6qcg@4Ln-uW%uBgMFF&opYu$&m)+nT*R{tOcciyzt1rdk%0tzR}
zXbz)~4Y7$B`T}wNH5?KtY>^BM402rO{lwGgTh<iyDJO&~uFMw8EnN}XC6Vz-4$DsY
z_v;#5kn?qB;=nkD_<3!c$AGv(=i&Ou2yk&tRU3S_J8g8-53mycMsp6S2ff`LN_?a{
zSVSrwHwLh6#Vt-F90xQ}-0`Z`f%E$!byo05K%p5&IJ@3R;FtO8*VDiqT%=Cck&;3R
zWbJiWaTBhZ{sYXGL>ay-X((lDA|tjlKmdydjXDvaU|cB_4#%5ncCgrTBYKZb|Lc`X
zf|wYr%G@Seh##2I)?SRHy#j=@Wiul4!Q++y&7=3NYZI`QN&(Ky4~RfBrVI>o*aG6g
zDi*DRzyLf4xLXph>%22zK3|{Xq}nU@+z88nR1ycONep(O`~_NBy{~HaIpXemIqa2{
zlNd;XBK3h;LS+yi&)?S^77;*zrZ{jtzT@=h-sI8o(}*&srle`}lV7I+cFuUSJ^*c&
z-3HI@i6#0ZuGQ4pCvJ@1c0J{sjnj6@)J8Ky3+&LDvp9nZzdy--FPK^X-*yzxk=)p>
z>C6uC#)toG&q94%MGi;d?O=g=9!YA$$vL8Sb#s$WY|?$}dH+@tX{jtQ!)4KS*C)SV
z_hD8COTCWaV|bz?hQ788MMktEYd_!4x)C90G*!xgx75iVV9oX;u7T5#6~BA{9(C7G
z&j(yXY(n^F${EhXhc@Yz&uA#G7Xm3&-=y6_bpD1(c{iP@&#$^Ed-hyZ!qM?HP8
zec!mRA^Sj_ipfk#CJ={^dCdi20mC8SOv*Zf=%b-9;Dmwzz4u5o>dNc2q)n-oU9d4}
zX?Q<zSuEalSG<*zN$kH7A}#8Z!v)weEDEVq{n4~ilAY~sp{M3p*GAIRt>SgILg-3h
zHpT~ZXzw5@=k$($#~x7(cWRg#F3qXk4k@wPsTo#N=A<+^Zbvx`;NysWbg~cbZ51O_
zi#};jQf^Sae8*X}!_5#KCnQcBMSl#mYqR!aARP_TImVw)=4g9t$g2ELj_tLBeUjvp
zput{*+_gt>W%9I*R3e)pJEj0c268ZSx;&#dG-rPrBV$kUc7SqCgx#*)i>7g09G^v4
zYTn47A4RV-kF~lLe;u^K5ku&Inu-V<sFD{P9`**!&w-*HLpEeZ8cjtu%Gz>;Zf6u?
zqul;T9@kur^(6s@j*187F?KVh)Aqd*0%Gr&Kb*Xu3!+FOSYQ)ng0Jjnj3(Aw>!rV+
zD%ar25%w7;V++mwo)OYPI|(_cljuTXQ=-;fHxEf;Sz>rGKBClOC-qgnPCkt5S8}k=
z4JnFF+V7t}7$}^!yBXRw3?-TvM+sg>AMci7rY4JAX)bDwP5UAKVW%q4y_8rTmY4`E
z?C^!*PxfrI5-Mlc`4(b`Ek|?ZScgR|oLVk<Y5WpJy!3XbLhI2~hL5t{g;%X6!XK-&
z<O(Vq$`~!alU2!6Q09f-CWEHcZN?h?xtL_Ao>Ra3<F|y_FERcKTR%bnZXn2gdDZdq
zFW%9S0&J#lt4b#&Kn=A~T$AM$=_%&|1xoJHEnMTvZD5e1huHD(V1Km@<DUObk&v>2
zn>C9$e;LctT!alvW0C{SK?u&@jrEC-o_!J%AbPR{&)1%QHS3qO(Q1shx{d$NBZ_Kj
zdc4|@9zJyZ1_wQ`?<+oJ7g(>*4a!tbMBMxOoc$CFTRzd^7}mF<%WPl~$D@O()6J*s
zp-NlCd5^V!cZ#5lL%|;+?z+_eo6q%235bg&___jB|76!>PlT@U<@KdM705@{ExJ=Z
zzWvF5-D8BJJn$2q2{RwA&ODHQM&vA!+24iPkk~J=V;GtO?g#*kfA2`&)N!gk#`_=0
z{O=@q4HhA->0ZKLDjgwF=2X8@-BIT`AnmzYzw1VT7FcK`KQ<gef>(*fU-~%FJo;7K
zCE@asEmT;PZ^pV_T~wAwUDJV9eu%`J-)b>!XqN`Acq9{~PTto?+UVk8h}Bou4-UHe
z4dsSQW^3exn#RU$W$YbBUT9>~WSqFrNji<jaBldGCft3I%!WN|pR|80=2K}f+T6A=
zI>n=VLsa$pT>@CuDL>)#$s3p9m6={lmrTF9zu)|{;2RBdd32+Yw~%_kWr(d5nOa<Z
zDKXwAcw*+>jCgrJ_P=xqV7KYCf_wh*bD+13-O;+C<~GoS!{&b`yU_IMi_97CaZLk%
zwLaRWZNp0w)nGLg3qRj7YLT^~y=G_(YE@>)S%x{3XYe0&#N<q2O<JvbU?Q(wow7&?
z)J5Qr%W>70V2s0@VC%kL9x~Hq4TI~&+~Zl3zrXD)-Yc^uZyoH-(rDO%pgZndNc_s<
zt}KX*QSQHN$6=T|D`_C+LCnPwI>P`zr)`EBPoGxRq0;QOF}1q}ZM}B@TW#p3B<X5>
z`ZE|=QKXg!7;#=}bn7NTgfI@&Bn|MOV;Dqvgwpf>^XmBL2?NeoB#(&Sv7EB+Ut1qW
z{PZYQo7ycJY^|Z{wqFA9$k<!2<zG?y8*F($MHVW2XAkoFNDP)&v)YaRq*agqAhrS#
zCb9JCAH}`Qj)PTKE7&Wr2y%Pvd_&?T?9HIoN#cLmS1M>4)4ur3SQ7c=!g<;4NoPjD
z|1mLGcAK35zg0F}C{G*^!2Fki0~886Cs*RkM*s>|i`M7*`eYKkd(|O)`l%~HzXtR<
z;tiyf`C}1rPa_-N)nGKRuaw5{JtKY^BU%TEJ49OJZ;kAvsnfH)8q(#B{8wd)&fS+N
zD~Y-<!8s}dW_ZAWQgEsQF7z2=sn#KB+fie-<~@<n<XpbUh6kVBwMLX2q(&hekn&f}
z2aA=5a?9j{nfFTXMIzfvW5hHYKY#~8u}aDy+7M?_$vrs^JnFspfg?M9#l4La&_;Zi
zVo@*_A2SMj(a3BXDFuC2A)G3@K9^SavOEp*rQ_o7{Oo=p{j3MBrDruFoOVykJ&$QC
ziv6CBa%CJE^Cq26jJ#37dT=!ST_8<5SFD)Zo};xIN1iQuW7GCDZr3Z6G{h$_ZgNbm
z_d!m>occv{wT&B7?+S?-nojYmt?rOsVLrWF{NZ*#mW1viwESQZr0MEDm%oX<@v#54
zk9A+XPxgW`b`L*&N739hG17u7oo=zs_R*|Q{jP;#%-ZZ(q}{zYiNa}9htwvbWBv>=
z>!R*gn~DEsW0;z&d~JL==_n;DXG{5JIcT1z4OXR+%JEi14@xtoFa%})V<51A2G(GW
z0jMN!;r{t$DyG>#<*gFL^K2#K{w$xTA+TVpESi07(%tO-1<-0hxGoQgKWZ1HNrbEf
zKPt&&A!fF4Eu^p~GQoQXdU_7Sa#<p#4!p#rRIQ?)w+7VSxl+k$ATU0DX!1H-%Y#&b
zW4kijS5W?~*hb_*Y-cnTWJgG#kwklxT^3?Yy!CD6i<en$#iepo1cSU@W8HJ!bI4`9
zeJ<bDorjm6P^y?48d}lQyOgt?MxRJz#xzL7nrM3PmN*C;`;LjBh`Nw97L!1zs!iIR
zJ#@la>|JPX<yA(C>z?jz?l}Z*NJ(Ve^_2z|=A6?MjH>>p2L`^4JL<;NaZpA{iA}@(
z#92rs_zF}men%NQyVak*d}(xK*<x5wK4E>HLt@zCNf>Zr%H&Ott6}rpLt46gvVhOl
zW-$pnxY6d8nfX@c;R8i;^dTp-VHyWeZYw@o#c>>jC%%Szcx66BxZQ@f-bPz{Cwzh-
z!SDh$kj!@%4$F5e+8SZg7eXQmptb==PoKZyrDpb!_TA^{gENGIcG8D%foXJSUvkfM
zyS2LvmNqDR^og66=|2^B7nJhPOXc_)SrNbv6H3^dg<IuQ_@Nd{Gihi@z!Vez*A&A9
zbb-aAxnIVae7XOwvVVVv71d0wH+1%I5`-<pj-mRlIWw%r`kL28aiUAJk*yyE&#uG0
z-V;Q`Ux1OFzzl<^a;%M(XcW5^7J()sq-n;F1-N+$vjUa3P=|G;b3cX*Vfu8~>UJEN
zgq_$uKJ@XhC;8ga(XbTrCLX~+abRG9yKQ<2y#5N_I^v*!C=hKtqS!)0M-=K;@e!Cm
zPTjq-qEyq+QtIP6#{uTzKIF0K@w6U;ZYZi@CN8KQqa3fa?O)!h01IMCJi3?+qE*({
zQ!Rus)QRtShvLCU-<oE#!FR(xjNBT5IUGXu20v^L=f+x~f)#>s6cfYXur+TC?|*J~
zUV<K2gruf*Iq1f=*U8um+JP_YcKB`}zpd<JTE``@oRyHCF5c>mY`l+{(e*#k4R68~
z9h{FY&1D72;CD7%H!~}~t>5O_m7fhG#xe=x$CE7HxK_izHgqv@Viq1pZ99-a-f2zh
zEi~Mx*miX`lcYXVGud-h>oUmBzuS!Dl-<gem`As6+}>^(R4jL5U<$n`lE7+cC@=ah
zd&G=fZ)csD^FfYVlKT1vx$OHz?Uwz-;_i3#jrrWoyK>x|#@vy$%YhqkC55Znd>UK1
zllVgbFWt>_d|FJ`)9s<-@k#W_^$jRC|K4U{!kJw?tsMAE?k;t2)!4W~@z|ZVDC<)Z
z9{#*fj*VqC5_C^ZY+_rKY65NL?cdw@qc8Szzj|z!^|EdbGJo&qQLo_s?OgSxYzGTA
z<%S9~cO!Pc?A??&zES>saou9Q9RA8rhM@8^jl;Y1+^46efVdBG9+>U6T*vvn=d(7r
z2PcRL<>kv58n$aMVh0Ymc3@hDL_d}GG?MivGsGu$GzGl_VjXH&ZZ`o9q28c(s>P6>
zNl!{@l|u2=p9}ki-gtpwelZx!&6IN-m|SRTSReMEA%=xptB(BO(r=30W~WCCZj?jd
zbg*>ouXzV|BmmfjI7>g<{=H)Y0z9pj5!$H2V9>F=JV}A_Ldkpl1ECK&$oz0jt@@7H
zKU%}&^7|x%3qn$j@hZ@{_peyPh<*d$mM9vMJS`PSr$>(T^S4v!pu6cJ-r1{ixxA<s
z=8_z|X#!1j$k2e#vHSC+F(am-9Her%V7^xb7ohrMEPPAQl|-P!c5GULHW<RR69P#d
zheFlY9c@TL$H~@SRRpIgqPKcJ-v9k{5@?F?+ORPgRA_CBWXJKCn)TrmfUvlXa!u6$
z@#TO2OHpxx@vjD92IsSuE%6)V^krRri{C>SY1y9fvJq^>5dbKcq?dyi{%<h>58I*B
z`z-<G9VF@pfJ!DTU}ME*bDCyvx$<}YrAT)wMYIy@T6g_m?b}en3<R9&)VV4CiA;+M
zYOb`ox#@lbND)RZ!zg3-gpgLiq<ScogPGC+4u~I$p`zHjm?QxgdL!6MBE0g)fS*do
zEaTn3pzvSyl7OJ*aR42@OThbGj=Rx>wZ{?d?*prkkHrme|K&vkmYt~h*Fly*k{Pms
zK5<{GytJ{2(aN=agjZQ0;J5`qZ!-Fs_sW02=x-6;>owNv_3$51O#nn8APUHAVmj9U
zxc(bd7!%bP(%3p1Xi77=5XQ53L`3h4By8e%1+S&%G2s7M4+wA|&Jw(4CD^_d6idBc
zx?-D0aBq~^Y|;Js65FDYte#IM_dk~_GL0wZH2VL!UJ*h6(896<3Z>!4;~*C^$5~{V
zNQiBO|GHQa&QKqoh7v6tMg6Qy`>h;825q#B+H>izh3kJ4W+WEK$Zw<uihTMH!*_)W
zjEkrHu!y}p`2Z?}FYA7Ck+L`(u<5xto%a7_eWd`_H`?b7pxF8;JOugrgG70~^5}l@
z5^3xJH$^gW_?wLwv<cl)5QCBKXSq0JWe~zJMr-vqHb63R&-?Me%1j6p7*WmDDYRCn
zohtl)xm~N)2*e-%+3xy#=TY*$@}NYLF1XYeFKpaBXAKyiTz);A;=j4nL~t7?J^S~o
zFLUq_|MS&>Lx+KM0EK2ICCpC&;Aase$VQ3gw9<PUf1FZmloZTV>XAfjvj;G?&O(Hc
zl4cTbMpHu3k&9mUn0cd6I}#a1>oQo<rLrtc$Q;XeC{&_a6_Z}PC&Ay|vo=I;Ea6<R
zOYH<`dH^^39}h(OxBvAcd65ugcu37{S|gCq9(+5Gt08aADN6!QLrKRm!WQLu<Zf)W
zsbw^(jWMC?V`2f<u;i7!<^ysBeaOqXO|AnD%R{Vl{`-L=h@gTx)J&Nh47Nh+d$D><
z*<cEL*}8q(w>tI<eq@c!YhxUzr@-c;@UKk*AoXosLahJy(*us4emI5h;Qb6$6bNjV
z{oy-Emv#^v-E6O~X_I@9L)X(ckX9Ba62?uJKJmJjt2%<BqUq0ktFcrF8bfb6U1)f5
zvk_@#a+H&tjWd66d#LZ+YLMavkdr72N&0_2=L%|w6n352SaGS=Bh3`l^I&$o!83?3
z2g{Z)9n`*bHZjL&9nP@OE$?o;uzJgs-}K}4t;U@k>VWjn(j{1@fuJ{#nr++qu;YwC
z`0j%lOW!CPARfu?>*1147kl5FesCo=FQ9_BAyM=Df4xQD0H9|E$ntdMr%|}<5<Fw!
z@tl;szt#X>`#EPA0XX6hyH8jHk<Yb<^D?b)=-z_mPCBTy`QRQRNoc<WQ*HJpW_P~h
z&dH#GRt8!7ZyEaU?;Q$IhuSHq1<-6^>$e~J2HEv(Vr&d^9x=jr;)J??R{E5GWDbbb
zLg<@hO9Rt~W@AKkVFH_)p6ocyHm>m-5@A(9-oNmIyHAl#=z#Q7Fdi3z+Q5%qH41b>
z@7^swk3wHVOecE4hF$$l{O?VZi~<yDg7U8NrqS<wl0<<gPwjHB0953H>{L@ZWok>t
zKG&A@ytRI^`t7+Fg!XvgvSgDgckp}O=jsP*+E0V$Vq|kC=Hz;&gFVutrbpB{m8@B)
zuZJSQ9gVAq$nnn|ZT%sF(ICo3KOq(u_Rj#D&goJ|VtxpGa=!Lrgrgnl#i|?RE^*ib
zoBbxmdrIPfPW|`7cm9I9!|NDk0dmOv=ENf{-Dh~&8A&q!18jsZI7~*tME`ZmztS`m
zu)7y(H?jlI{)Kn#f0ku}5gBsZy`i7U==2UhZ<I5hM+MR6_e@T?_YuE+UTC|~nJs_0
zUt=e3YQxRNGW@^D(m(GSQNfO|-bCG>Z|026OgjXgKS!_*2BD9A3R_o`)t=CVjc@I+
z`qXC_!Z^OKg|L*sxDXm_EYX8a&eo3u-1Gmet{rtytMobxxjySGWWc2Syl0X}$HIg5
z$ldrF16!#I90KZuw-Fxxj|+fklhsZygR=C?xgTL)<Uq1?cv0-#PJ)X8?&Q_Cxp+)M
zJ0Mdfo?6>0!3X%BVh$fBYcCe+v8N7PC!Uo{ZQ!VPJ9vVx%kE2#)3(y5LCX)q&ZXHL
z_{L71{XD5kcGMpEpY&GvRp1yL+#eLW`4`*)luECUU-J9lQsP{Tn$^2hZJGkxGMd?^
zS-PEcczqV`(~EPC+67zoKOuG^CSzP{b*QDbbU&(C`Hflp<`Nk@59&0f6^h$zkY0?*
zKI#!@lD-BfsJHmzn4x|XT;K>Zc8Ab+p{MNquH-;4*G{>c#Av{ht$wII$IqEAAn=3y
z+vfk<?B@`zf*t+#Zw86z%XZI;9gKh=pV;O5oTzwvAu1;$VupQ<NWrp_7ZNmLiTD3}
zz8SqRC0H)cRy>PQZtJ6yCr1qyOZC<yyp5~uEi0AxmF}s}x313BO}Nf@hVIvh9?nQF
zTdLgqQ|3L*bzu6pg#0ybFDX3P<MRe(2b)EcV@vCcYaSv^_m$Mu$S-YtVWb7t1*T;T
zo-Qxu2A$pUy3NSPHnnU9fAE`{e^NW%R+-Yv$SFa<zmsE$H=Ii@RG7mM!mqZo;b7o-
zEYWyk5yH+PKmkF*UTOB4L{SnsfJSBA3;L0>zCzf1_USdkQG|+xW(wq>UPzk$wSy4C
z_ydH-@%PnE!U1vEoxV#A^%Mm-1Dg94<X>V&2n<r~W9AmtR?289Ojhgky|$>zRgKsb
z_aEu-m;jH=wkVRd`;_|2<tgU;90`5`Va^+%mZ7ZZPauMsLh;%-c5*8{J~PDDXsa$w
zgj+x#GM~y=U~g7YcdUN-s?fS2#lFg2F?%(5*rI@Ik=is+k{%;Cc{3J=&*<TDus5TI
zf3F$4mP@q3v^VMDpxl}l=U6JoqB!m}8S=vn>$=6z-~b|f%Mgkq>tX;JcQ0`{wER(H
zLFy%iPFRpcpum)FyaJgy6QG&X*gqOCDM&V-OkAuynmAC*Q9wf!zZO+>Vl10-Ag?s8
z#ZjEMh>TgC%s5-&X|r2h-mAKph-~*B1xf^rGN?d}1stIKzw1E)O?I!E5JI_wDUQQy
z8LD}at%-hd;}5a32=U@r3GsDAvbw~7KiVl6FVOqhSlk~}P;*G)AWFL0qImgWbRTt@
z0x;=!y!2AO^JesRjnP(*_!ep^_$L4Bvd5BR4U@Jqvl`ms8kU_Qj;@2E$&PGx)${Ix
zZ-!RSr?>!tn$5Qn{cyxlVYG+ZTdRa6xuUV+eBK|o6aF~-WeDTEgkdP41-wmVQg1Q+
z=qIf1x`5w%8&${kl6Ukl>n6J*>)7$4UYa*sS=A`e&23rcbqDT^sLPx=dUVCmhg2fM
z=Yh08PeBcj2xhC>{u8A0G3?nxj!i?Up{VLTOF574)ys@7lE3(1$W)G~oFHOj?IZ7U
z-;d`E4lf<qFl=`49P5cC)00>KmOP^V0q`o5FU;ckI|B~=0MuK|LHIuT6o582lCEU~
zLCj=SoiXweOzT0^pAG?WatZlDg))j+7%iv!VNgf}6L|T`=oixeNJbHvRZaJxuaG*%
zv_bxaK{Xisfd4$6JNqMkxvf#Y-Nl0yO!{;d2$8CMgWsB~eq9L7BH3IIkYMP?&n8O_
zb~k0}e=_Wg58tY&<FDi$SCSu@cyv!DJGfst`>ZUx92Y2;hH?J9-N~b|5UtD}OmI;2
z=UCagLC$BQTyCcQYT44!QY=Nxij7UPqRdk*x<Is44E;E^4ob^knJsdcT5&^kPI*zx
zK<d0(ZPRX;SFEVe>Wc)K!<nn3&OXf0ltj<1C-7^>z~Cr~4^G`3CO$nVxc#WtDi_~$
z!<)0i^_d$~B~oKGqV8m~0roL4HORY~?rOSTWMe&XLm1DIFEPUEtxM92;N=HesvDnW
z!qAgzK89n?O&l6TH~kFK6=#Iiuu&cTfxvLyoe$sF%ntrJYoj_iz7K@L0ShnkKWo^3
zCQ#o#P?(EK-64s8E41o)!HsJfWdM^5o_OglFmy+JlsjUa?{6k5lV(t2`I|Wde8-xc
zcP@1P2V8iI7AbShha1FLFGp28H)u2k_=YLt!N?}Q9?vqx%U0z0Ut+e7I>OA|XRa2)
zJajv$C_ireK7Y8kU;1*VAnE=Y=cT_(e%}Ccw$#no(%<Z)eXL|UHXDrTilBrU8ct=e
zs~^iqhG7@gR26m#rLGdY((K%cS{r`cv=TwCPVV9QydOylZJ#f%p8|WV(x7AG?Chg2
zs11ec<QE@lWfp>ZQe!yFGLtPBImfn#sQg*fO*0jDmOjCa>EJV)!vHzB>D__v!4#1u
z^u~~E3rYQx$937;FGboW>CL3E%f**uYC(`PctJtlL0G<T2!lK`3cjC-PLH9k&G8aa
z$b2M;dm-%F2!s*)VFU>)0*$)&)vem)#fGeLaA~P|;Fv7gls&t9D3zVjW;@x1wfSPn
z<~^}nfQP4*ta{p_F<RiL&|%+7cORQqEyl}^E|Gs~jEl@8IZIO5SvyAkyxrKF@{jcH
zzGrkHNggN_i<2FjA^H7QoI;+cnot-J>4Kh5rw@k{71|IXZ+IN&=*moy-ob{~%6$Y0
zbVy6b2SWNL{Mf$;z}BKC#)6?V4R}Pj3Usr9!GgL?gip%d4!nnj{A@F#oZ5N%xrkHo
z$K8Hg=vMJ{NTcI=^(+=jV+IQR?6nz<Y*VKiD{s?LV~I>YIbD&OZ=|WOeJdzGogHKD
zP@_1U!c9iG(d0!O-Cb1~tvYE_!uJ{(UL45|yL@N-X#VcSO*vcYr9l?_Gm2io{Nece
zYvxu_VhqEuT(;TvU36xzwS4v7V^s8D-^}oU2=h;d$;%IER}-gm?0w?Q`w?Yxim2#G
z=Bi9s;)+vKD?jt1C?5cH+`9K<=Eqx?_*qHcTI=nh+6_)Hn@WB9`xuL9+Ov6gdBzb2
zNOeA5lj;TPw@!bI&`pJZ51EG`()VgT!V)W)j`=$zVWSGZW#9^~-VZpd)lR~F;!6w|
zPfN&L8>pR|#5mBZ8Q(cw>t+z7LGO${08q)V`8A5US>H>a<rq@*fKOkjgB%4ozngp&
zKq+m;dXyS{2+(i2AeBAB;qD*u(_+Yv;P=xH;BTIM&frxacjq>)hCSS9Wg4J9pNEBj
zm{fLqwp!#;^BGVt_Pu26)C(1J|Bzn|-|mEa5UlyI`buUrzMFV-<Zel8v*lUfGIc_~
zzL!sYJWvqz+b+%~;36i@h8HAK05i3b(^l=Z=7NqFCY1Je_A@n<W7>+pqitb*BR1Zp
zp2>8>M~%5Qtc*0{)#hzf*w2NlO-`PWrKX&{z9O}9)9+rVGpvkOYBMWO-ai}HQ?S<$
zTTBF}>`~9p7^)`CM<ER4X|yj+*dA1!-A2)0neg0b_4Iyhqh7EdDSz1KVm`jiMAlx&
zQ*Yg^;kmjM^SBEI?SCUKHrX>6k(d?FrJ9*BF6Ynck4nvy4fo$!V=IYovu<s=531dx
zfJOauHx|TPU2UNb_oM9S;A`vql;9qe<v+342D2l@SV4dMnOo~~mDTCwYkj_v8Se?}
zP@F3%Da}+efKp-QXG+Hd?)p@|FT|9TB&&O$%v|}oj*`gSZ^ocfAVB(cG12+ef&FJS
zae_r4km_J;e-GYa2y1cqj-z)`%B{nPEPWaI{Zp5(uFcM1lJp6@u}TKH;_>nou$;jI
z<{#KC@YV^fC9wZ~3h?WE5)uegB}$XSkv|tzV8$*^0Yf35KN7Ac;KW(R@_V>?<MlYx
ztIOauCxNX{*QwN4`bpy*6(qCqlqW#ZY_=NX6;y9J(L=WRzDf1STrN1LJ#rOJ#&MH`
zq0jDyR4Y|2*Ud?~pA<{YKiTgpO}uiW+4(ro_-2aLf0fVkdB*DXabMx%o!A=>`?0J#
z{L}B+oZ#Rb6Q)#}IGDRnynYvgHQM7>(GmV9xD}3DM!6%&1*tpe0nM?;w*-5khNgRq
zXNYOyZ$+>?m7<6Zf0eZ8XvUaCb0eJM#6ChhWV1X;z^OiR<0*%|Z0BGP|1#VkPeq-o
z8^_j|V~V|OXf<XLcWA5J4>$c{FGDr(RrycT_tpJtP^KKdFv6(o*JkJaME=J0xxG+Q
z{(#&O_%kt^dfBQl(Pl^G>eRg%G#!yf<GG~RqHoHeQUc5nhT$K@Vc>q_{;1fzY7L&P
z`48F%;Ux?68y6L@4;R=x<>Q8nSO!!Ls%0tcJ~xM2dP`F~mt#$97gm`PcjxqPX15y`
z>ERh5@d#EgGtV<X7EDThsS`?6cPDr*I-Bc9Hn>Q_>1o7E7}rQKPiQ)jy|YV(@#zKR
z&W2aQ_buL$wlrYgo42TT`=0nQ6{>0(n;$=X_>JxZ@%BB1V9GY(7eIOZv&E6V1qi%+
z0bEP2R-MD}w@39Wnn18@tT6hDQ>e|XVFJ}_GXFa~PIgun4s~kG<}4W_<H!Bka%6yV
zLw0?-nw4^1)wUp9-J8EWT0o~j!slKTVm8=f`R2Md2}43g2KSVeo$UtT6W=%b!Ss+J
zo)&}z7WD3Lxz1nVQI~92dE3VNaNU-!S9L&72Jks%kE;7=S1lI93AI}PEIu!&4Ge{)
zZ$)LS_SOB757M&+FK4~VbmC)YInti4pNg6yM8vP4{nZ!@^7fHhri%rW=QytoH0}l+
zEss6(%=8~=M&bx<!`vB))C>I_aurUl9!Z&%cUavO_GZS_a<M}nfBRjEaMpqzcgcyd
zYn==p+-qB+cw$c#N{7Tf{D9@dNMsH?ccruPG}*TJeBa-qbtTT)_z>5sZrpe{Xb=LO
zcWpFoI7KcDX4;cjc2q^Wt8H!bklm{`;VGGOeSBe0X@4}BxWr;=@6G<4R)J4*(KUc#
zGhe&#yv~apc?D1QC1v>PJggk7vHUEbn^#VbQ;UCaD$I`^V=}klJdNj5E}dFll3m?c
za5akTQkzoU@`M4$PMp5nx-!>f7EbrV?0!|~a8h)Toq9F66~g<2VD%u5QI{w|`O`<l
z3U0AA*S4Mp^bKroHdbqPVUEvsV@9@y;}jc2=qMS(t7y~;o8lvnSF;+H+e4U!%Po!Y
zU#mpiVRCYEI;!*rJ8-SZ+F7);w0NPs#tWVl#NC@L)z(l~Ybm+>7Q4-${vf}Y(XjQw
zd_sqOTx<-e8L0U%;gB(z>|(RzjKZgbD5frYG=N2=2rdb=X2e{O?To3ND2oBT#ei_%
zhIF)YGIkI4gqnE2`KNCkN~WCLf^x<1wq=6})9oi^Fn5(H5-Rkp`^wKc)r;fV7WRRr
zaoOi{{THzn_QKgq=oPm-rBZjDXWRG$$^tWi;od~do$oM%_<CHVJ&ckPBmMamMOR}{
zxoJeoyPs%fC%%(P(UngBxG#LlHNJAYvnXxKqPUph`24GtyouUiM0eKI)V9vNibSmg
zT{g(vit9(r0@OQ5<>aL{rZckwgt_Ky+^_yzP`hT{FWn;UR`)uWg4S82`&XR3SxCw^
z#h{V)f}Nw-=i{~qb2j@!jK{nD3v}tRdzk)nim$1yaG&i_Cngr1DTNUfrLoEM;hL$^
z>UV0-a5&8eV7@?xydhrYff2ds4%Zarfk<qo*^PdWCfwx<yfJ@{qM$}AT72~pQWLss
zNobt(cEdw=dqDnFLP^{K{8tEn-q~N+vk~9g)121i0@yd=xO226qZtOQX`NM@t8C2f
zuqjo*@(l-C<xz~_gk=dU_okS{rzH9P$0@-c>)+F7sz%r{m2rV16^(T_4~y}ba;YmE
zP1UxA9JIk(Jr}3@h}8&@dVsaqegQ=g(*qxgG!||{L4r9~d$7sdO)F-=V(HWcz9}rS
zwX$9bBev1(4N*qz+>!XC#5X-DP1niz7%D!sU9>w8{j%2?VaWO^y@VU(uMCEq1v(H#
zu3h^?IWH0(0k47TE3=!?us*)8y#<c_VmL;?u_{$OpNBB~Z4JoI`F$kcjl42%u-<|L
zINf*sRrv)z<Zd6Ga)*`%=0v2&^xI~*xQw!=M}6VZHGyo@?F6)0&>ug*!vCkujT48*
zUA1~y0juMqYYcZk+5#ZMhEf2+Zi+#vdB<T}Wn74d;CUm;pK>PI)Ah}I$0J@9`8GLH
zX<U29(ljTeP`iilLF)Ty&95a9BGhf;<eQ$d0^!B*O+7yEloG|&nI?pa)nGD)R(v6i
z_S!5NVM-yw<an91EX<oc^U~1{ZVf>~B)(s7cx?D;{&2~+1KU5-x5uO9`ZHAW<-@eK
z>uB>=ZSKT(eb$&eVS+J6^8-(;h|{UDmg+2eVwBZsh6;5~^#`?hE=R8J`zgxog>gxm
zR{6k*hA)o?a$Pyjn{ZN?8@-*Sx#qZcwWe3<{AQ{hlvm3l#XM|}Cpy6zX1q<o&%^AN
zww>aL#b3V+nJ6$WC-`&zStwWGyN^&|?5Rzxwu{y+%-?%NtUITLkthQ(LAD(OXIvn-
zAZ^@W%xW)s(*JyWmqFrxT}r<kd`2Xs$PfRC%a49?`n}ZVuCN?u2=NyV#@qx?JG3t&
zJwK;94f+$y?KOT`T&ve5YP8qIt`6ACn}UP*%^mBp#!9W&ldaSm`$tAT{}wkjrD?g#
zBYsItNkJeaB(xP_Mv2Tu11S7eAfDh2IHNzw$jW8{krd0*RXz&YJcJUZ>i0mPgd|C=
z%@tF*#*lDCXb(_JN-EBlsrdkDimv;s13_b1BL_b77Hw_qU!0)*aF}<k>L3`heV;DC
z@8_v1wdH{HdhYHTz}Nc&a%vS}%Ulb=1l6wb#;=>lW`KmW!r?^S@J~f1M*QVZ;9vl`
zGF7F3Y<Ot;k?CSZuP-3A`#Rq<K*A6AN~uniGTYULR@;budfJ~Zh6X@!U%-O;HtDDf
zI1r*oIUI{w3`dLy(C-l(6aY@*4nQ;bi%wKjR5~~4$W0=s(I?!FsAved%aJzv!qIO8
zbahEy#e`!`b|#J|tvA2jvOn0l&^Xx2R#@pBO>t1(b(MuqeAklszQe0K1XSkKFjdXR
zP4Iw8f>eax*>C#z8W67{yo&Qwn`~iiITpgznhf?*vVkN|ph3So-m3u-0f$XGL`S1<
z7Fc{<7m~Knj*XKeT6E096B3`Wwi3He#!OD`YD*2)y|x*ifn*!~j;q@<X>&)C-TnxC
zpNh0jH~h%$&vVb>c9+^BcsHh<=|qb9>$O^bw)l^_)PsuueqB;8&pB=NV1t;J$-}GP
zx>vWlT9NM8hPs8$r4|s+e5ccvx7`NVIjP(P?z+k5sz{Air}=O@&&?TRdnz9NhZ|eU
zS1op~X9*le<TtUE2@G~F>J}QJBLkI*(!STNpjm9Lix5+A+=$zYQRyKzIw;O!LA(Xw
z$XEp{e7>JOF+VDwXC7xMHjq`dx?By$?0$cOCnv(PkCn7A61y|DQN}?RGsbPpw11hk
zxhAZ@&A|9x!(UAPaBSy86ibf~3K|ow;`Tb#weo?`#ty5{l~*-k7KE7vheo|=|K4r*
zu<x=kn$9Ut2(LTLEeAV)lSSt(C?pUAzhf6M7~!z)59E>UbR~A^5aPgPWm-|nva+=O
zOX09g$XEL7UJ=f%nuAgfCWLZxf8qpr8nLv_N2(A3^Q)c$sPC7zaM;3HkMB0vLImPa
zy@b5%ZT4nAAKCMSo<RcdoSbSm>1`v8k=b_ynk>m)EQXXz?H)WpOck8xtIf>stE-lY
z766lp_+oc36WG0nmIxf_(Ww*ul`H^@s#vb@gDYTQUw6eOVhFhW0Xgu2FDHUptkF#@
z=X<$&tLFLU084V00NHA?qjq;A5cdC~+mFHr6=}!YXdRY=)@Vhn?a7iJyK1}=#FZ;R
z21JBD1FS4<FL_iz2_^9Gk;7!YgZ~U;H4#eV3iv&sfg?pcE!v+HmO}NHqgk$TfeevD
z1>nj};PK#&faip3wm<Ur$8&}Oo((A3JP9vAtd0(3fstX5k<ptblTf|B=_YeHdWScC
z-J=KW+<#hK>aQM&s@9dNbOQ;(0;XiWn{^tgzi%`&*#}AO*HFv|Ev4r>IqdL$7SG{C
ztpx0;RZ(;Tbz{}MY;Ax<^c!vyh~_i=;bP;Y^9hCX#Mei_h#BCL2qR@j0FS3^!Z8@+
z;{sEG?|yTtZ6uUIfwH<Yg4z=xvSGdGd62zaIU(vtN6{#}AIZqS9HsnrAQnBd%jA~&
zmwOY%gM^szO1?~3;X7LWGTMG9O6{hpuWo*=BpT4%kdH&}p0O?vo<4dVYvkZXZK*8O
z)>yoZdwo^j3Kep9l__qSf8qzp)|3-x<qvZ%+$_YS_t-_p^YExk^B@WjaSB!vun%Am
z)u-0lA*Z40EWNR+EuC=iMRK#yeEj+}#OE5#=N&LV4EGRKGr`=zpeWdI9j!Au?R;5#
zVU&OED?DSCNQl`T!vhY2iP_|6VnA1{oK<E`%AwYw&@?PfCK2_ZxJqGwTo&lS@5fAf
zdBE61D-FdU*BR>CHNX9{^j<wGUxR@><r24)O2hlk;G!%r0y80=TstR2`tv)e!Enko
zAwE%?a_a#ChbuOp&}JQ+aC$r)rw|YAoVEkaMXyTS-nSU7s5SkV{yblnG1yQZ<WyB7
z7Z`}Nr#2C$Bd?$vo53K!w7_|bt^V|pThSSC*j50N^FK{?#RP8(VpIzNO9Pz#NQ%)C
z)H854!Y>z6B2zdx-NJ9!gV)$ym%Th!{JPy;zVN>9#DTWeQzTid+mGE=JdWt~=mY_Q
zMf=Wu<CXmpL!V@r2tZ1#6g)trdw;pd8^;c069dUu6tqAF{bp!ptBS=6!yGa|&cN4K
zpx%7RlSV1cd7siGHH&i^HB5kovJ<Ey>SCS&`8jG1no==^*D-e0dIj=7r9|k?fr&wQ
z;7o0^jpJT!{3=jgJ5)pwuZPj-^#|?k<)?pEka+I{@p`iTEnqs`;T?IrX@Bv(y>$en
zE%^lY(>Jh7HUc$;C+xIKjUoTcz=SexQFou#n#SOIIp<XntPMnG?BYnB?A9wjU!Es3
zuXc`I_LPU$zG3{(pGtoh0M0p90u89x`mfFlpYS@?e+g)SZl_tj;oWK`P#Ta{|2ZiA
zc1UF`R%yFjU*rXrHGjxLY>D=w5QOyanjPW^B0^Cp<+J~>>-(8nPzdZ;gdF)f7t2oM
zrlysr7Cy)Bf_zbFHjA%Aqd!sDvPt)fFQ>cxYf4jj1mo-vkH_3~h1(Gy7Vw+FFE$9-
zp@CHDE?Ll5q82C;oou)|vLUtfGIJ*wxXTe8Rapqi4V7CGgs5R*VejW7(wR--VkI&z
zWcx{#ri<h+j?{t^GT+x|iO*mHp}V>$M!DA`L->J##XG*S1}hzc8)aRs4_H`Y(^79!
zLHu-yhCWKvC|VaCZ;^i%_kS<|q{&H}2(8oK6==jP>n(DZf<bMD{6iF5X7e==94fx5
z-;5feWj9+=qjH00$ee(M=$GKHq=X-~gyK}zS`NpH><5cnJ;Hm@6L4r$ZyE=vDH=HA
zn$N+vZEf~JX?MBDLs~$;Y`NKwN}w%B9U;zL#YU|Cqtco10E1|+pF;|E(>sD@qCZy?
z2!eT)m`|C*1rEKQ`;KNM1z3AZlWLyZ8pq!TGXSk|GYr(E4eJ+frvw@Z_q)h%w%!t`
zJVNFK`Yf9vrt}ukav%d32~vVVOcMfY!RKXfl93ssh{{^dU-c}WnOYRP)NAOxu4le&
ziG#ZXJt1kb5`9$!I`~gge<0`b0%brHw`Mp8A}O#;P~Llq@HMp?cmMPqD26Z;JHr_d
zH)dWetZCsdgsaG#2=l|;DO`rLI!!?CI0T)3BHj(@j+i%&bosWTb9R-06trE=H$H8w
zH1dUAR>XC$C9=ux01p0h)oliE9Fwj2uV$}B=kYnx8r1K)aHya5rCXit_#jaeza6&&
zHUf$O!rBmpv_-8*ak7UC(5E@zg#rJ~YovoP%b(_ry3-aK0lL1!qLn`p*<xa37?`WD
zYpJa?6<GN~g{<1dMWY#|rmE9?v*pWorONg{1!rV+_>Wu@YKMt1&L3KgWq8e;nwJay
z*v(TANNg`JOI21ha4Kbw5U&O#Opr>#?C`+Z5TEIeVw{`uc0RS&1U)2z12-E_+8%zv
zL}m(H|A}T4*tL8eMF9h73Vo>cb=y(7BZqt`gA5z(st<mw8d)<0!g1fRNz?+GB;rV`
z0xDtiE~ionbHNFxX1LihUNcOn<M=0_^e;uz%qEEhN>zA~tb8^iD_01+NL&b2g%JJQ
zG88_NmyX<CXKX*KW-`-<KsjmatUePdvi}af|L2c>{*Kv!#HNBBnh=5Ck8d{!LvXy&
zA<_vA(o7ZKQGEU+CRRq!dU#-Tk+PgXKH*J6zKf99XtOgPY@|M!d<nScec5<On3?5M
zaQQ7}CLu0^4w&rt=QHxVe*;NgH8$tVf+nz0+DJEW>JRi7-|h&c(wS<b^*iIK0*GAk
z_|KcjkrK9fp&tyxm=tS!!kgWw-L~SRwjyR0FHB@r?TlIFly&A0)IZ&hDe>y+@!foC
zRLJJvcs=5UZ=}%9(t*ZdYg&hWaA9;MF$EOAFzRJVBwqV}xO&U5thTOgm=5V~q`SMj
zB&EB%Q)#3d>F!hnLAsHa?rx;JyS{bl-ur&u@7HyR913&IHRl*-jVFki#1BZ>qLRn^
zSu{FjZ~g8>8^Md}CHhcHWdoLT;wcH5Fp8XRj<iMNR)L)E-x;sYDBw5Ur^E&fBXF3|
z5CLb)lFai<GLr<t5(UK=en?~}keK~yAK=ZCjQV9Cp}S;t<G8B)Ql;*tl;56|W?fzA
z&vIKQ7#wDywb5+bz6<4G*Q$S4-74rTQPw>lTSBm*H`#lEEdnF;A@6qgj);$wEDIJc
z*z)sS{NLl3gay?oD!oJNmq>a@Wg`0o97kIdCmcBjdw|QFrx3~^imW;gz=1$?Pcbw#
zl10H(3*<L!?lV5B6z^^Sdg=xYyS*Fhz%^x=7FR}2IWASH!Smq&GtL}FyD<(i8%6bf
z9+K(exckm*^x{<C&qXAHco8$f!Z>!f4&H1}@m*U`t6R1N>(&1CjI^wj7vRbsqenMM
z3t~=iT!f`(bupKThV<`64CyT|3%v~$WNZfF)$!3HZB*j*GULnG_zEK+1o?qw&hJn3
zZUq5)Vznl{k!OH0Rvakz>_+L(1D{aH0ciNQf=|#1UR>8(7=*S6M>@vlxabCvFvs6R
zc!wlmYd!858rZk4PzBf}%V=cjnU#dAk*#oAK@91Y?@%vL(HR?p5Z$F1m({tfCA?j9
zsk#PJPn+=CXn64ybaZIVlkmHd%mgpre$ejvvGY7Lmu30)CHv=H1AUy5t{-m6i)VRZ
z_l>Lu`Fr1xL#1V%PnsmtYa%zh?UONBw7)#q;&O!F0hq7$d|Vmpq7}7E-wcaM4_q3T
zJ$`U-a5&_~?a>1H#eLnEpsjZmc@3{KzT*Mgz%7nmF}z2J%O}#t)a+Pz$vNGb*gkU%
zDQGx-7biIb?0;xyDKmnmF|r}#H=v8$C$BQO=6(f;j5q_V+Pr1h+Y(8Wys9wW$Cc1P
z>{$SiWF<eHSk~oreW0yhh?XuMOdHP)34@4Ao6DWZ+;gu{{npQi@zvW3fz)_voLt2{
z{K&@t$4<UcIe6ZXw{k~<4zm2GhM?gs?6~G@PS2=lOD(etJnZ{bZN2oj5JJz(`I*&w
z_)4Fp#Bs=!n)7<)6trizuOuh8m(Y%<l07LuTYd6eFTkv}vo1)A{}A1Yz6xJykAk0A
zeC%-|Pgxx*%H+qdRb(IbVZjF`8jEMtg8)3&Z>Lqz^Cb80uI~7Py3TItW5XM0fcJgG
z!4M*36|(Tse4c4FRik*zZKBz5!KEa8HV7^qs7*G6qDK>HXyP)f&d!_~!qWkZWw@(p
zj^_^E1~DjcoCV1B3k-!I)7(3|jgF3nFvkL~#te^$2nY^_QkGyE-CGp$m~8NUTcMXI
zWK#y^Sm#`9=x){;g0jL7SUWK+F=$VYLA%(~eMdj8ocd7veeUWv>MUS<SjOpP4Uipp
zo>lt6YUW$=4+}oOr$!P(Vy`{Y51iwiO?-uq8U0F+UIfPNMo<@`BC7@7XkucYLMKs#
z;W&4s#1_7Ykzz8kelY)m^}PQ}#*>bo0E^B{274*I{{Cu2J=(vLTx7TDD*W-Bi(T=}
zuipHD!joK?S^<R~M68W9BJm>1TJr4~QJA5R*PPpwZSqB*6&h1KAJl6jOPgFgqP_;B
zF|NwtE<81?kS}K11>Xh!UEROz<)4E*5)1tM$cc%1o`2__eZ?ZKtG*5<$?yz#tpDuu
z;FdfZBY)eqqT^g5WzfU1r{xNU@^ASQAQlS<PXf;@*&WLbi@>0c4BAM3vzSFKW_7yy
zWVl?!eKqiV8#EYma=O^9KaTW=*R@R*Es%}oMWWI#<gwV`D-5llct~wov6@wPT@+K(
zF2KnA#wRP@CY4Da-I9>@HXhV7ozlYS9QzpC*IrH1k!OLBeNZry24@zJGXyrX0V04$
zxPPUdvyf<w3qj0gFGQC3oxooM<eiBEbM*3Mlj#m@Mq2{F?l&}j&RZrJNF=uHi(jEt
zN_pSg%TgqHisZ`mt>=EEc&;UXUieiBS*%=tuq6}6c)69^<@$UK4Mipp@mnmfD5u2|
zo7f7KC>K4-p${7=T1gSS@J06828+|Db#UpZD$dPH(v+a+?2vyfiVV-;H~XcqkIQc`
z1`SQ~O<0kccj6=^n<rtrZJbaR^0-jQJu&vF$eNQyxO>6&uzb^7SglwHK~*o3k31hy
zIoxJLhk{T761;W-QLJ-V|9~ISBBF5P>Fhoy@}e}(zRsjZk0~0?F$WJkL&|P^eet-K
zEvwO#keJ|%E>f5NlVI6DHNQwY^!eRy<K!t=YC>dVxjsL9Fisb9i0M!8AFbDF`S_h@
zH-v$FIzUegG`yvYGeJesvS&C)P`8nLsmYZZH^!|e=_9EB1&qw%b~{t|JZ@IJ(D;ev
zCVS)eIeUZhMB(H43Z^cFlJ*0|DH>-*j^F(*=*e6AAvS=cN@LE(%vq$77a^zCc~;YS
z5FrQ!jeX1SsA)GR2pA(LqNSDgK4dfsxGVru?b0&FpYzPv>3zesoBbXGHQ&;J5D$Gr
z>g86(32RqG4iFe&MU?dTIftK5bqDBIa1PDZgqye?ucnlG+m?WkzDyf3Q=lXu9Yr@u
zwcr{*3yFg3&qUhnbhG7C%Z&1olVHLVx1ag65K)vr_D+_T##8qCF5Q6W-J=|P=wPP8
zZZR=0yGzKOp;U)E+W2A7g2(M|V}m=#lUEPj!w8mz{8{|6eln`X5_#(7tcZe$s<TGL
zP|uX^*K+GOW!Jm>$pr`SQ3UFcNuF>&85N{uU3oM5hv&5OQbbW)<IC?Y4kiOX>C&36
zZ)#xV4w=qh%c|ebmQ!@w5pLSs1|(F0Rh3er$I<9mrWns(nLE`>V6Kbefy%oxa;US0
z&Njayrm{g03@;)=`UktlXbl}~jdUjK>uWE<JyaosTR&9`dv7BAK*#~_55ebJAQtk5
zX(V_n9Mki-ZVn1u%yDM!yDm=q(}i3I_hz}J`g==5-)pS6L%M`I6tfjEiV+|L?6}OX
zLKgrlSI1w0CC*nVmNhB72SPzHiV$P=kG?<8&uck{^Tup4055ONOYI;5hZPtwd(I!2
zW*8MTHIN|c+*CrTWd6S=9`Cl5z(h$2{Y|qH*?y@$1OT-ooAg^f(11xw2m~{KPSecJ
zH_jXTi*Z;8XbEohiF`ctnnRdWN);m?PgfcHRmqI@7xH<>b%wL~aj2AX$`=ID__i*}
zfy}{->#f0PA=v)kpM!Sc;@-X(UG_4+f~|<KeGap6jWZKvTT3rSeBQ=^LMl0EC94$g
zYM7jdJCM8Dzh`qAA`$N(7}KAAO0t<iKo&qF$`x=o@)`WRCrR6VY@BRvo#~6Wx^#oh
zYBV}P>LLY&w`E5m?bnHL2yPNKWu(8uX$7OreGs-HJ5Wz;A$1%2p288P*rGE?HMQfB
zJ*DGxDy4JL4)0-)!oyqa(?&LlBh;}39^~4Wua%78s94Zf_3~T^g6B8R8W34dv!hGf
zcg|@0$E=N?Zc)$r*mrYafGYA&@6!RdMb<H!<wJdk4spdk_qK~cts13|7?;^}3pESQ
zuFu-NA=3+`b=5?ZiphCXw**o9gVPx?1Q53r7BZE2S9@NeRqNp>;a^+JAv!G~At3@Z
zdEaMoC0R?jAE};SULGAq)h0aoN8u<RJv}D-+G7ivnGl7{w%zW|j7i)rfG9k)B0WE^
zCQwItA9HMzO>HYKfu%hNBt`zRzy;B$l1`%7j*qveI7K#XUe}R%(kFqaGV=16A`;Tl
zk3;Td==waKa(f0ew`%lor)G}@sL$I}P`!puNLa~y-bw8By6X?zk3}bV?$))x$)q|C
z;Sj@>@M4O7g>gX+xRB+Xo0vXd3>Wy`;&vFBQU+}eg#;Tjek)y)lKehPKx}n`ujR%+
zn%&Z9>$)t=HghfOH*EA3y)?)^F;$GbxIFLbnDj@dT|mfGO3-m8gd+Y9cV8pTSG#Ec
zYt<AnZ|ad1CMr$4W>hOA3m!pxXu?ipLLsz1l)wWRcpYss9&_fP2PrZtT?ID80k+Py
zqK;*W7o`0gF(*jSo2d&`A#Zbd)}#$EzmJYee*D53S!gs25E7&B-*(;v>KQw4-mHpt
z%&lYq^hhAyL)TAuQwAyNTox@Y5gFRNoHF0H;7dqZdcHsn6Fe8gK;GZ0>KxkNFSCI>
zL}2Pw<2aeRETZg24Lp}@V8kRNq7Yv;a(Gg~p6s8dMSmin1W#!;bh*_U+HzZ`I)s*9
zvm#4XF0Jco$X-yIWZsXkpJ3x{9<%z}u9FGvmE_V5*LlVFnu}HJUEg4ay5hDb9IZ$k
zhwsANgrB}Tbb3mOAb`8)wn(on=rpU{v$+qE=*1ONAv-#_qvm)DX9O&uY#X7O^4iV^
zDRzZhq4^1&e@s0`ZpXhqO?vaLlJn;`x45LMTD*jH{q1HMRrnV%^y!B-FCT_l)_&AN
z-8ZJc$4$$1O(n%7ixiofXZj+}gbVY;(&4ni{imhm>q)cRN#dvH6ClTw^x^rHxwCn(
zO~zvu<+MH_x7<U*S0Q`sjEo3_=NME&u}x$zkZawd|6c5;>4fb=4d6s{gpKVxg}FNE
z%P&H8x=p9274+2m+o1+@Nc+D_?=Up|l#vfKM%ie?Be*ei@7>ETPR!1|`Cx^mJ08eY
zI8ko>M84s>r2MZkk_7`d-4hAl)1kXH_jvG<vWvdpR2WeGhIYp3Nh(VVZBvq@oV#-k
zc}|q`R+8wo8x9j30uuYgk?viy1P7W!L_?;*a#i(@ogbu@D%(fd#iR_dj1i8n?FmKv
z1w!|27L)y4mRRZuzcVZgTWUOtL{vn%9J}3q4Jv={Pj;<>_VH>q%XG{|3+j<~<nZ^S
z{J=6CxP5}g=<!*BvdEL^^IJql)GTk7+#n5JZwm%UiZVSEXPMT1tAHRMTL_&EpS;>E
z9^8z42hu^#-|8C~cwT<B=niz|GNa|q-4NnuUZnNU$Wy0rMZ9T24l5;>>FjUh0buoC
zbPYtf+>t;ud=j|JbNM=Hh--OQVpFaTPtvg^5AQ!J{?B)M-~_CDyUN|7uyBc#;c2c`
zqUvrmf+g*Q2pCA|#cw0dC`bBI>HAd~UKQk5X&Fg*L$MdL$($A>cVm*~-j4a-Z7DZy
zJn{{$Tks7Ann``8Cf*iO8It_<g=B$TIE>)ZBH|%|6t+^68qu*ykCB_@1*~~(=DH>?
zI@mW~$l~bsCw*9hURlcFPb)*scU({63FFu#O2S8;zXszO>r1y`e0=hf&5{;r*s$TR
zlN7?x!y2lppki|xz98cwexT5{9A#Y}AgNwuFgMTD=X_w=7<{+SZnIHQ4Ypz4X~GuN
zYX#$)!plCLaC01fvmYt{qxfLAVx>#Jk1FM?zFN^Cg{Pobg%g`M&X)<QOV>F4+M8yT
zKL(azS)JBd!n>@+yo=3C`Qntua&vVkZz|riDmg2^9V3+UQ^;fjoXkgoXr<wW1^V7o
zhWxI4x3m>46#AWVm)NxYzIjdFEzL*ViH}IRU4~~>6@e#x22?IvU#3u7JlO@=p<!JS
zd&nwqrh5?U#O7?nl4hOFW@-@aWLTA{yBp?jXos2A%y_Q_p{B>OPOX381F{y-1BJqX
zup7aXFFSs8z5d{`qZG_CdTVTk6a^1{7;7@)QE_ziwTE};-;0L@f-*|vBht9$fGTOM
zUKKHIfNq<O96p(o)7*^g*1USucjM(sEZUpW)E9;rvOQhd$=wUyX$=isqbTPIMit3g
zr>~C5XoCHbDq>W(Z<~k*uZKs-kI9-L7yTRHWx(n>n|Ixsh~8w!_mkSf5x7WEAGQ=O
zn^ld)`o^zT8{~xI0bgU2iT!P*iuD{^!!W=@v`wW#Zwm*H7yY16Y%B0fq65r0AK68U
z){f&dKbG4Y%zU%RbrNo)jR5$)Ko|UqNRoQgFN6p(r#uO|w1vvgbEtZvc4(dE@x@An
z1K%Nw)?B})k(8rSCpAzkCh9(KsuIu0U0_H^!3WC4DW`n6*_!pgdtRZ`ehAxBN|oyv
zsIFynmbm<`A<uF-9Jn>>l|7C9iw(m$<TjR9y1vMVwfaM~tRg<@CnG+cUbjx&3dqHl
zVAC~his~0;Yl%mKNW$&-BDv1{dbL5C<YK4lGec8Vm<#M_h1lC=F1L|7!Cz$da`+RG
zeq(nM;V~gBX75*rxI~5P#NNHDU3HSu-?8yqWV<^tgv^i0ING0pv~Vv&F#OmC`BJyg
zi*r0KxDX`6%;fYzPxA9<cc%~TZ+(5*&7}VwXMe!LGaRThO+r|y4oB2+YpcPnXc!v3
zp1%(qP<KGBWa?J)Vp34RPxl~H5o1o97k~|w%99FCr?3^|^b@^Fixe{dby{!Ln?PUq
zph?Z^_hmdG(W!7nC@C(>PSlp5#or9}#Ci!D=~*wi`=rOGN=Y8N6RRN6TpAH<a5G`1
zt^%O~B|}WTdJ0NYQWNFzfON1bU|c=r<KmF9C&3Z$Y-ylU8Qz&Cn}|D*PD~<MK7DlX
z#w<am^rU;S6(7y>{x0p@NZas^h2xN<8lgjWblufb+jkLyTfu|=y)dQ|zLWqWyemP^
z_3@{A<Jr(GQJt94#41ut^sqWY*OrLW?eS|(fdt$ki;CbN$*<YvR2E+it5cfXQCmy7
zJLWuiyOXQV;7cGE$~D;TYvZYq0}LAb#Lr;L0^h>#I^B;NAKV@7PQhzp2}6k9HdNe^
z?x6jt(T$+=B>cbg>O)2LMnZ%CdRx-V#(>fV=MfAv&)<#)J*G*yP5#4?0!*YtCM^Rf
zAz>vad&OGc8GIW>u*D@azc`TmHmaJf-y`Ho(i0ypN{@(Hk5mm4L0-;h*Bf^=K|<C4
z&@Ge{O{i1OV<%Hs92Mjzlf^UKJ-0nh*5|y!u@ev-+3lmR-|)@&^KSR1l(2TsJoL~<
zx;XQDo<4JIgw{CI1ndR)`zy+3yiEV#WVDsg_}-+s?piD-sGmi1pCic%D%brR0iPMY
zLdgQy;~P(JI0~aMC5!EjC7_t9q#Yumk_%MgAaEl6+mHU`<bi`_eG`u7DeuzRSG#Rp
zO2o5LIR@f*_+NP12<izNbVm%`ai=lQ!s;{D^>!U!tG?Dz%i%2pcC_=Q>roe89FirK
z2W*X0&8Tp8qiN{0ROdwDvCrK)K_?A_{b0-C+(u36QfB?xDZW4Qk|Un!DJUtZM{BC4
zP$H1C!hG|My6q4+8AE)e6({!z{aUQFF*gJ^W#HzuF(+KPjf^iY`H1Ed7!o9~S-D8B
z7h_(d5E*qD&u3C_?C74hAR^KO8lQX}O2o&0t9YtmN)+g})(WIrS0Fx0hl6M2t;1V(
zDUg%mzg@MXFqh&<+;CZwB)NN;y+6_UVe$7|<p-UsNACWPBWoM5pB+noEwVsx!Oc5*
zzo$%O=R!S){&&UJ58={2K}}6G8<l^CA-dT@V1<+D`I-iW<AKL&I^{f+M@R6}pr-j6
z&T3T;#oS?!w6<Jm#%HlpKx{C6ZDc}MNW2h1FzrgFT<Z!ualv?#Xj`Zb*O)ww(JT9N
zxf(^=K+$72xN-KqZ4CpF^qjWLo4n*m64)(6!Yzu+*u8@xM2Q_4vk|g=^LM)>mJ^)@
zZq)S*4b0i1Vsj>UA-_7eF?2?=#8S<L;|X(86ZpCIUh3cC=sT^5XuTGlTj|*qN>@|O
z7$`=6=`(=KYyT*kSX<z^0Ij*LM>MC-o0Zw^NXVGLB%M+pM#DFBA=zUNYx2;hQdDHS
z+L|gO9O;L%8rgYiPT{d>fq&)04Bv0|ohImNj0SG%ph1P=s!w{ZGbFpFQ~4tidX_|U
zxjR*mnUZ(X<4C(f^*WmDw^&4F=1y}4=AiqUn+u=+`lEl^q`Nl|$ijL)lB}~qXyYRO
z19(X98^iwr5r3k=HFGz}mn6rvlfUUYEsjxLcGRq4{!jZSak%tqW~T(LIy+cY^kpQ@
zyW{?tuSKuxtH#`qaFcfM?L9`nFOC=VF&LqBi!q*&_o2ZX-?<9FdVRXT8ISyw00+L3
z+^rio0}Fg*5RK8I5cF4r#*BUW9gSbV&OA(46eEU?(|MVB5%zz-;+QJ`!J<WSh$wTj
zg|5dZK1=yHs*vEBFC=|y7{?$XwjXaUmhg_xggMoem3pI;<x{^_>@RS<j=&4-8c1t%
zu1szd=Jdd#0rEz(D02AF)vAx=O?48ag)XW!BfRui%cO51bBW6^gMybn7vbm<*31p9
zRmP|7$4JbSN6X*AqSI(*k68L3*0Sx9cnzZGU275xS^l2AcqhKj<b#i+NQfBph4H5X
zu_8Xy_bML^t`73ZvqoGiLbA^BVYjq%MfBzOgUM<Qm?=6Y#uPWa76&S6g-NRXPn{1P
z)8;HFL&zJOuZSg=$zDmPfv#DHd3A`o@sX{%waUS~jLJ}fCyx3N1tUT1e@l|BFfd)$
zoiOA%EN+Oa!}=m|RW<*1yWCD5#KOg52`GcPZY@mGVj8{;Az91-NFFZ)exyC8S+f>4
z=k&NZ5<fj;To#eCjG;f58-UISkcpG9vMOI;5fL5VxR)6%|43*EJ_(tOtqmM+IeT&s
zmM67J5-2_i9@fWhtaHen7CdBfCHBU>LETPeb|DoZqq&=RahPsk_T!HTMQ-n+3q
zy90kegA4J+?A=pe^JgVBiou1}y`&5z{avaq^CL$AA(yF1#`pH4E{!tJ&1|7$VfJgi
zKG1~Z(7(!}H`Q1(H;VV^MY3iRsk!0w`x$>R@3P83Fn(csJhQ+|LW)BDiLhj}LY(wH
z#`j>NYIR6evqOl_b!Qar4sj@RBkj>C>)~#~8Em0@K<4QQXLm((Ulmot>kek=HQJoB
zni5ygH3ExVGJ48Hq9ZrA$s{-;%mgYDDu6z5F|rNygC*9NlipZ~e@XN9P=970o~cab
z%3bg&$SFooPuXUZr*|Lp)5@8L>sTIKb)P!JNS3W(C0twUJuy^DEnTEU1OH|G9Le9~
zy7VnI-<!N%po}9S8Xx9$36qQB_~}ZLnTLPPSA$yA&GT6$4q7Qjt;+jSQg&J_s<_iV
z=_`5DrtCKI$^y~p-*NWQsC*pCW=xi5UUnFdSSD+aAVl1imY_nxkP?m*R=IX7Jtp=t
zlvZv77SK3>H_IZRpoCadBP)*S!Dy<flgV8di{)*+U2tOg$!_0=7}}4Dm!ZYrgubhh
zSDf|C^Y>iv|49ZobbeM49_s7CogqN>hH%7gzJX|8pFQ$cOL{^u>}#A;+@JCgT&Rd<
z4#EM?Y<Rvf0*HpdTfg=6e|F{_Erb%`!tF0|tk=tKRO=Vt(2FrrQH&IRjP#dsay)XC
z?*q!&UE@Y=(1LpU=@EF{h*TD}XTRB`@{FK<yB8idujF8`bBGbYt6p2MlK3jax94&K
zChtU5n6#U1TqE67uXK4ZP$0DCuOVVhvtFC)L*PpJ?==Auy8$=LdqlW92;Lg-U2P=t
z_M1JY#>ZSO?67!9auGNd-tJrQzuzv>L5dpf=nw)}`aZRuZxyzvoVM`hP1-d!D2k8c
z$!;$0X!mQWK5lEw=$;8Y+9I%tMy4@+P|7uC9+(4Z!k{)?Q{NJ=R?uUB%Z&$`N0S7$
z3gX_LCc>Su?<Nj)$^K~`P?O-!(%D6uj)eHF0iqz`eafnh2|{>+$m-9&>1M-Zef-x9
zc-tYkEz+|d45bbrS-a2U&+mZB0e6-ADUVC>20G)6(Y1-~9LJ&~)BDw!*Y`dc&rHB8
zuHa55+z`p!)gecP(y<eRYO*0?;~>nM#4=jc!~*}iHvl-reQ*MzZO@*j&3|;ebDE9(
z`?`YwQqWufTe<^>!7}pGtU$p}GXHjYLWo2!<TxH?z_N!}ZqSBr@JXL{XFy{%u##gm
zN06Xk+}R@t2FZ^8vI4}_fwEHFj^>qXau8O;I{@zpp3=xR=n-^AiTr45N-nN33kf=L
zbyMQP97|!H95iHEiPs*i_2k!U8i()FCFmGu!MsX7qd5!L%aM|Tw7sMRo2p{ypO-!w
z!S3f$Nb=%s>_J*3h_F1thUmOmZi4=d6Kq)-plh&~(sD%(c)%_Tdrj2;RFZH`Q1cn=
z9XRu2C11=V)-6?yJ_Nyme}7CKz~grC2WTx^b_>KTL2r$knUj?k<K;%@A2u^|BVKe_
z9M<T1ZC*3hzM@!PlyaqhZoPw!a2EV$hxn77jr7mrV1#nNrQOxa_>HNV2g@FX=jKYk
z@@mP<V!48}-(n$tz}>R(@M~`4f}huEn}}U6^AnMSG2TpmG;m6yQCUD-!E9Ln(@Fkw
z>HdAf6mWNe+lbj<a#SamEPmJ&jh7sP;=m0&=2TPv4bbs%I4tA(Jzr~*xa-`2q$sFF
zL`X@BY3m*aha;KX8-TgX37EMu&;8+^4}gjRaV%B2tDZwo;NRK`cnl9a8o*sl?W7Hh
z68`F&!bE<F$XGRvp)#(v!zd7mTF4hyg(!&}EiyrZT0W*fk@u^O1>%IfV=U`2%^BhE
z76Csc0btbC&SdXvM1bjX81uSl#o8Z-F;yRHSx*rS1kqe6><&zRyQA6AyoEE-GU=KI
zp3umIJYf6#`^aSc?kB;3fd~*6ZWA8pG0Mmll9%xOw+c;g6v&Q=)fd~|4s67Ke(eAL
z>fb+!uz+xTBKgbRrUx7dom=`uc87|5b{8sga>)K~#Q<G3UT?2^cdz9CsPS(Wz-}&)
z*9+ix$y5EJh`cS^2?Y8|v|vyx(nung@^1|TjI$9C*LyZ+_=)#wvz&yN7Z0RJw48C;
z=z^ZFu|y&MK*HY-jNj-0@cE6q^UaLw8zjB-O5e=DtD4kZTVQ42e4zG5z8iUX+loBB
zoHS<W?Eb=9B8$aCj4v$6Q>#MihrDNi=)73IKZ5vSA{qt~hc|2q82?#QA8hdI9W@3%
zUyXMf3|6RT&<z%%kX%Z&<G|shY6S-lkl_%OZ8sS_!3LFcy57spAL!lo#=+hAh}@1B
z)f>6I0eR>5>m*WlE8v%}qa7L?3<L&!!wEfKsmdrQgaDGxPC)gXFQ3l#1DJDQ1q?ol
z`z7qd;&Eo=Y@qP*BUciW{^BCawfAneyH4f4UPf9PZeA@Q9}Z+7)yKfV$TuHK(cm47
z6Jq}P1fX!9=e;<+tyaip+lAAy#De($M+7YnD6lqu_eP!VWIA?zRW6js1jtM=F);`L
z<c38i;-k3AuW<%8GTDRji}Q!}NFUJ$T9=!BVB{5T_z%FJ0``(tS6Oe67=rOVn?4^P
zCQz}ZawvHJY=QT!aEMP7O$aiRI{>xdeJ<{9(uMGS7ON7bsW9Zw$V!4Dskf|@l2s@c
zWU}ZC>j;Tp#NR6(2Tvd^s-)QE=u@qCfj7w(QcppY-pV?19C6VwnXw-D6@)_cFE9AF
z59^EpOJwhCdwJpVrm~DQ>J1lQ#YGGhQN|$JnmhtgIOJDH3mD<!KTsqJhSONPOFRI_
zT;QHog98x|lDekD?Xb+_0nDc8;Zc);g4FI9&g4envYQ86^LzZRu8~b@@<p@KfW`B|
z%;V*L_q)18()+i7_An@e(^hAQ*7azf6{J8%jTf~%m=6PlO33y0OE7@IJPgP<)>S(N
ztb8CcT+|<Un$Rb0w+6<}7{tWLW-H>&v2TS9;uH^F^s+0P6+|CO&1sc_hc;t{nfk_J
z(@3?eS0iF$zt{x?FchR^(V|je8$m@9fp33j^jVdg{3els33)4nKAl^QsbLeq;s>TS
z6V?%Q_p6b9k=TGO)A;xb>xaJFY`grd+?2Ho?oyz^tG=7hg+`5;-)5BY_fJ)+>V``A
zQ2Hb2dYM&(2{YS(;Nwk{f2@FeNH$EvtuYkSXZzK0RAM|>wyP;k`-&;eavUm24VZzn
z#{xjo%|QJ{|G=u<r6BpFtJ7JN+Wm=&nw+s>usq3BNPsa`ebaK(bg%KBepv*K{C(HY
zmPik-<?Ns2p!%fJ$Z5Y)a+K$x<PDSja?bZ)e#fnh=x}Urmz~QoVVq~Q0MUv8AekEq
zc+;`X%?9Gr(`gw@r-Rx-`ct;rO~2p3kT)Y>22a<wpl}pT$apcci2?xAuvb&V1Q=ar
z5s5)SX=>k0jvlg8C^;Or4j2`n4(BSvY-YY`FU3D{9VC{&vszUWyiTfPVq-%GW@a!Q
zSTWRv#8Y1~$_O8Jc&6{>cq0RXI5;zSwfA2Q+W35c$#VZ)EQ)ApxCpU_W@%w>w>h0L
z;<=!S`tbk))SmH6A@jsGmH#~-!Zh2S@ktTs*~oM;%aZl3*7<N3B#P16$_(3``=&u#
zIr)_Am#yKR&i5!tK<2e!S{2Q&t!R9aQJ@O*S0Z<^HYt<-5E=(1eyt((Oq4C=<^}23
zx(Gu{0Z|mkm&a_#J*-kmpZ2<pM)~^uxH-?&d{O&sYgc?9`Tqc4B&cZPi$};ko)=jV
zkl@lVVE$uS=SKF&W7+vN`XCaFUfYxdKg701A(Knn`^%+8cx@H6$x&r8jD!^`JctV`
z{@Rf=n#${WLD70ODVI?#F&`K5*%z0`DNofT)J*y_p*5h`JYK4^g#{?HDpHH2W{}%K
zo(dz2-(8Uwz4Xt5UW2h)j$)c7l<|NHa#TXN@l0LH`<vMf4y{yAq>OyZVf#yItgAwA
zahe3~AeEZ+&QiJcXFR#}Tk9bs`+7uQH}qD)eBo)02!69`KI>-je1^C3caQY2=rZx*
zCwi50ohBT0#y8sFAyZ_Mc}2M`WZ8)BJF0McpIfLkEa6_s%I)2Xsto2(vyn^f5z%wp
z(+H9Uiv4#9b;iH{07v|AiUe=JBCJBa|0?bqE(LytBJ2uYn<jwu=eU0&{CnI#`_Ps9
zfsu5iu!H{y7v+FvWQ!lbLvaA(a}Qk(rVWru0}7KRRA#`x@sr1W2M8)hGcijSGm{ey
z&<mVMXLLIgL}N7b-LwHI<el`TEc>jWT{5N2;IkhAjjRopHK3p2JXMscdNXMw4wdY}
zr2q)3Yb7Mo=`TO1|E#l}>jF(1p{1o2*>wG*%F5$G?GT1t;8DCu>qI3kWnGs(Bn8IP
zE@+Mb4Z|uigf1nyTSqhd&GjIsgLHcYK^%LW)xb4<1eQe{wU}Pk)X$9(G9!i))0gN1
zJXztKqD@%Q4?$4h<7D`RY@v0a2kunDX3L1@?|(js$FPvsa=Zh2jgOd7M}}@VbUl4y
zxO{VVK`X(eET6QhNy6x&(bFs`J2**ufNm#R<UN@PHhTi^;pO;8agjHSkZjefS0=G`
zov}Fra;{{i5_lzK?*+G6XC&2vf812X)wyp&939f>jzewR_as{U4>BYX>qzZS*F?Rw
zVm4Xpit=@x>kaas{oFPcll@zHKv<eIRoiSrtOug$jLb!f*HJ1REuI~urE`Hd<7GGQ
zf;z0@1F1jmfVqQ40|}II85wj8P9RrOKN6n%oF0<f>M)Q-p69sMu9!I<t6hCns$3YV
zil<3GeKJ_P_`WUWMUvC-I!(d~iGWMK@jbuG7M$8QwRif*9J$%qNED~CB=n?IzIvjO
z=0GhjDdK5cM)BQJAg2oI6X}BS^P_+e_9CuX_yl7JtF!0q_lWKc$F7%CAldceEAcS9
zINRr$*QaFac4w}g;LdSk;e)rUF8Iv=y1iLTi*EVdb?BFr_<bDe9pStDZ+Dtjy5Ugb
zYxodYKPrRnEcdIMum2Fb!DH|183s=f!L6=7GEB2+uP@*Y8U57x*oC?mYM8Uu?Z;)}
zX#v$i#)}!Quoio)WP0FXl-Ue9`iKbO_q=?M7+D^U56)*Q^#$P_oP9{iCu5o6Enisd
zwP03e6+4k9mKj=sDzWAFvZ8Mokw_T}#cFPR<NbZ>;gi^{fEjN?L!jsVhcp9&@Vjpr
zf^{;*Y7O#dj%P1+iX3!TI~`8(zIKNy&lLU^!2*(D&M=^{0o8Aqp!W^jtvOIMgmABV
zbeHhP)4Q=er?Xq;0rk%h$2Fmi*<mR-=9|+slvsfaIDzM@sozH_B?_53l6sU#NiY^Z
zEpalWmJTB_4nUUw`#Kx-;&)YFBzX-b?}3RCqQDH|bzq7Ic$X1i7c;TfBUNG2YXs{K
zL(yy=<bDR`NN}#SY%hh=|7kw@VZoQ}juPxc8?ewQEWF68E@Tgk(x8Vn3RD#Lzio`n
zwO;saRTdYi@K0|aJe{?p23RgIa9E1Jj%1R4(JgPD-EIx;8HVve^PNdDKXWXenb-PC
zK-Vd|jIolJ)C<11C3DAo5-}>h&?|c%EIPhm<j-lPFy(necb7T%iU*B#h2FEPwcfrz
zw9b&6UpH|iLu5CYv{CGy(wVE)2kHU?F~G&O%(C)z5DJ;96Oz-=WZ)3+p3*ZdX$#{x
zWMzc|LbjtXRB^Z6(6BnR3X_%jatXtHd9;{dsJjF})$X730`via0&YlVSwh~*4uN8l
zql^Zi1@dTt?{-JyI1%GH%tXbRx{n~Wr^b0%i1^&VT#h9j+NT3X%{sY}p-M%z<c)*G
za5){88-M6~7W*oXX)3!MOqc$tjezMpk`q8FX*`<6%K}tXl0DWsEg$Uxe`m{-R|V+*
zUz6@ITwBiI!6oiH?yITMEV!08;7LNRAviSd%~G%LiB@qOmQ6%z=BRP8&(){|bB&-$
zPa#>3T+nQBd)7eg$y=W(r85b|AJu{QLWLB-94Aoy!pi=u!<8p&lg5_PLgwScn|UEx
z>{S#5j5P(*QU*3cuis^-nV2S_gF6gi9JT%0At}69;C@TXNp!q-91t%L%l8~#Ma!2#
zl_$x<q>^EcFPxU0@bB>8d~Q@MHYqYTnAbuzD2^74<RlYR4bD5!;_mjX%Yo;wr{AU$
zVxwe*U&3;Eg$)EWvOy~x045G_A&_>W&pv#BH`z~!QbX7uZM@l#5nRBqQZZ&Uo7BYu
zFh>L$P2fkDYlUa!*BI4~W|HEVZES3G*+`YOX=qo(+HdccewP4TQ9T}bcp-cy9raUW
ztruqP6^O6Xs!E-}Y+nbgY;MP#AE<nO%cq~hba#93K>Z8@)ZQ{y8#2J0Ei|8ZUD^&$
zX?CXX-EF}Fr0l1qq{)zdrRV@58jXP*8t44d@8QVAoy@QZb_jKqZs-Exu<Jf0>X{#~
zhRi)gO2xIwK2*@n&8mf#@!@^bt>o<nDs<5>e`2u*0y^%AcYMFx-(J$-iuP{pBo|ih
zSw>4{Nf#D%_2Q+deCuxis?>(bN3sXD4gU|G^yhp)rGT@t{|GxU?U~^h{zqCF9ft5O
zg&ovh0{z6oMk`*iTu9bJooy-=uQ){{_;b5e$WSVCcK{?T;Ws*s^1Z8QO#Di~Gjw+P
zNw;mBr<QpDLTTqPvb`4j;C-cmAP66bCgfGoANB+-$9tf?W86Mw`~=J-#txUx@Ld>A
zb{aR>ZAWmIT$fvuqsSKG^_mFiow&T2u1c-?DRpb49(R=P!%ieMKiA~T*qt2*8K!3)
z=4uscNWp<}lF<g^VEi^k(=?y(I)u5~vA2kNdj@>h>n9NWaKJYDC<O6xw|p=3t+OoO
z49-(c^X4-Phxfn-<&@Nf7B(`Sa@w5%S*a}MAlhwc<3zxGZ1&xHVbM6DbMJ{oN;ohJ
z*hV@G8p(gRk+gp~??M3phAhOEd;#p*(g!u)L_>i^lNGk*@$l~;a$g4DDmetkPzU4)
z65V7-#BQ-gG3d{hYZty7jccg^3SDuaiF=6~)_;TWvPW?b6edyiH{X;As^NG)9;MaE
zTFXI$S`=Vt6Uy)3zkBZKBT=tgeC}Q5YpoMi)Q*6n?ON?!brG-k4MVwJ)8rIWBheo~
z#XuBn#Y3S(7^#m|E<kURFL+XA-fG#QJTx=PLn7)#8xDyVohbK0=uM=cOc;z7YJL#U
z6T3>A8`KlZ<65zV-5Eq*mFJw2O<f&xp|dO4v9TUJx|YlE=yey9TfTMI2CBR+c-XlY
zO0&Xa+-R<-7%lp>7tzo+1zqH|MmEABP-S66Ml!$~^I>B<;)5tvDqLEt+SWOLbppDz
zxWm|m${@l;Uj`W~`GJe7uLpD-18@8ODK|@Mf@SYtVDS&WC{jh1_dMub^{pd_6@rO8
zudX!crw@-uBv=z2KhR{ihxH_K%9)suao8X1WtRYQ!axCBJ~x`DL&I0}-8n)&H#o1W
zJ@C7agbaF(<DzxIfZPsXL~9<<%>QN}AhX#UP6jsZn+0q}oen_uw+?bK17ciJkS}UL
z-0S1fJX(p;+%ke2?k@u9oZ&fD!DuOR2CW}p1qB6p?x(9Frl9&Rpaa0QR~N<DOh{f>
zPlB+SGRpm?yY$WD5AIUEiqG^yP?tZKN17`deibRO#GF;Fg?H@jqF`u@zRsOj+j1xE
zgbzYKDCj?K0LvPB%dr>m(vkfG$CF*Qk+r5Z5sDj_DiLh-VwVng2R6lqjaQ$g6cEOG
z>gF(8pLI?P^|aq~NxQCvrvMdaM9X-1wizIUYgQRzj#xs5sx0ma5|L70_m6~&l<uk4
zCk-;5pMJVezdXG*J^l;kQ9xl<LO(Uo#%aua+U?qkeyz}Fv8gcUO}wS2#@(QK$^u$y
zeIshK0kH{f4`AT*`qn^VTvir?3?`ZCl~)^TE=R=8G+}oVt?GIRj)8xnbgURKFeVRJ
zMJ_Q;+u7jj!#Qza<_?Js85tQAFJ>q;`%)O-VV!O7-{JfDAmsZ1CTSU<SF-@?zMHvr
zHw{Fp0mDT>w(q5~j=!GjBsd5w)=89zRBne7(XrbEkxM6*LZ&N83RAmzv)a$Y;8FYE
zf2-jC#C?ipn%s*Hjlnlvd?M{^o2;Xe6QY<~7z&k$yr`4CZ`87i-2I@L(~22NZ~j+C
z<U{~&qRv8QOMi4uQ!GLlqau~66&+kBQ>rtdnnCateEYILyH!Q;|Algaiy1(dJ2EN9
zXx^g&cSd^FdB6-3V6oL;F+(auWrr?RFO)*sco@z}Q*MZgj)vlkEe3kFJjvm&=27gY
z13poz!o*R)`SFAR{^_hc7SK-l40vl;Pzgl$469qh%`IM^y~^s}T}YSzG)eBfv_v<s
z@}Bn6?;i#juBRJR59+b%-uwo7YZjC>o=>PB{N(i`G=#;^qk62$Yfd~A^<lR7tCx#N
zS)>^GS+R*H2Uy4$5vX*A7(HAXpuT6t*d}jA8<MP}jAa{w4an}0<xQ<5$m4ZU;-S%R
zWiX@~H32zO8`X%~#+1dE=HZRXDx$x)lHuhDjxZ?nZAdzX$Mi_b$;Lpn4zTk@{246f
zZKzmg*-$Mu5K`E$My?&B8z8{n=LBh@{ctGxDwY2j2KhhE05}st43_wX1V?g*zjvfN
z$$1rXW!bY8C7Z_+*w@IO;kjxk%W?nn*VWG<GD$U_#$=oX&7%szL-}}?#9zIxj*0sY
zTk!SlIZWa9+zRQ4(w~y+l&DP`3;Fy!kHaYjeu|Nk;w>>YI6gX~7by}#-R>jB`3DaZ
zd_9DpuBR<|t;*X*DTz_H-;&!OZ&88r31<RARxj47+DY3N9N2X8s7Kz#XEz~!UKgkj
zmF#$%X7L*bmEDgu|CChoJU2UCo5l2o?mSz=x(k#W0)+sPun|uAEGyl{N+^tVh9TO>
zR2U2UEAJ^!S%owoyB9_Gd?K)v@|mrFzQL4Puj~IDZ9sDk2RH9SjTj7nP6&Ywz6iy(
zH1fIJ!sXd=L4BN(ScTbd2N72!N8x`E?y9)|=uQCuO%qM8eGhGbG!3D8@2w$rg2&V*
ztcTP5vK>|viSJn(nH~c*0wWN$6ZZ5#EDZPU(EMwWAv~?~ou8Lqb@!u~L~->v2RVYG
zwKl9mbim^^I-*b-@|d#lcKikWcZg;eth_3-V4V1I>rgX|+o!&DKF}>K55=U^Y8zPP
zvKi5*;~oaTB=CS$@WlE4;&<vhgC-RzCmPS<oWYC0{Ypc2h16KZN{$b=9L3RpdAT=Y
z>w`u0$hkBC)u%3${KjVVI$ETIyu)%7>IwaE2Fkb7rLPjq)Lae`Jd@k)vbNtnu{SfL
z#8C$<J#({-H+aZb`X&=G-~O?gVeBvD4%{#P4XEL3NL2ILa>L`et0WdLc+eXO$jw>&
zU>I*Qenn$BA)C`+<Np2`;)c1nDUZq+-Dj~SVyBXDZH~5G91pp7a5VCxf9dk$ENm)J
z{s6a?QRtES<jm$K66c9nA)b$WG=pWi<>=OSX>?$<`M072HqyAqSi6CMtg>H>7QFPS
z&zdvN|3`}noDv{|n+xzVu;N;5sBX!#fQ`DE{q2xxh@qaIN51btdlS5Cj{D*;i1*fq
zzd#U;&;ZGt%qXH4!W0T^YbM@~|Cjwtb>r^0YhNg_?mS=ux<j3Pr)GP!|4(7}_Go*S
z1C0+l7w2WpMD47v&)!K@QcI{C3-aJ%q{c~Vv}1CmX;uV}Hb?XX_gRi#9b2gUJO{?I
z44DY%|40!;M94aJN1(Q<AC8MSJ{dh6+i9JPNQoR8)V(5M^aH5>f6PgHg#RcHv^%zC
z(ridJE^00t9~~2APiCZrUeN9rrXZqKLh&-kWxH$)D$z-xP2xp%d6(hL`W{Wcye&}E
z@rxXbSQ$|&@UMwDk;EmY6I(XB^q37aetsKq#~y-<A6*^Y_u@&reO#ENDP4+g{*+hP
z{&j3!B-Zku6raC$zY|%Ur7lrc;;!~17KBH0i4%8$1|K9d*);TlKrU~gRM6!=#Ko!v
ziPl(_r-i7CZJPlZ_0H^RQX`S4PIcO21>wYjKQ}9LL_2klh_rya1<z<bDF8}udak6N
zQDnYFHt*QoJ7H*{aQn>)+=m_RsEr&|GlhJ<^Ek#a#BZiTBzEXRi=v!drD&mh<)!YY
zi%g0UYf_Fl`UR@7+X~X(mI>Iu;@fonISs+xK}N7;(J1pEH1PTz#6I=g72X<M_BDNi
z*<oyO#-Vg{e%C$?&i4O=RzS1ZhFr3dQ&8PYBO)?Mwnb$I8%#j_sGi-9=-15_N%-gH
zaT4+GPa1M2lCmIvxw-rSgW}IrZpU;g_9UNE8*MouvsW3df|hh?%~EH!5?sD1Y&PK4
zjgY)~!^XPl&w}%(rvI~`z=@*;@E#aihj{%p#x7KGnyI^epc6?1m3+O@(Ls`NQ4>Y*
zR>171+g`1uj&qivsW6ivJH!dedgWL)zpL3of@q$~@+hU1s4M4XPal+NNT`(QHov&Y
zmj*g=G=-p(5sAp+u46pnBJ<{?1p8*MVY8~V0(vEQfhat`o$>}S%YSZCSzK@-*OFiy
z|F8xicCmX&%AXk*{H-Xk6U~S4-f9zS1c7cB;C;G7mBF8VC>o&P6hAe9mzfOzIj~o^
zJB`Ncs(hB;2~1W8zG<Bc!c5e3bkr`<^;trtFTQTS#MPVK)gc?w^&{T}8?RASTF7H{
zmhLCW8FRvWu57bXq!zZ881mhJewaTWaBfh6oh~JqOuq$jxnUkmVD6sMvf$64clQ6h
z6R72IcbA#E@0huSLl3!}*1Mi-z}%+J)XyqEqBuIfnXTLMd_;kGd0M_l8SMOSovclJ
zb<eZc)q{;xWcvkXt0?D`IaA@f!{iOQ8AUcL##LWp0<CNEyHVM4K1N=W6;V2OaADUs
z=>LkFlQ;memi8ol$7UJY8K8o7P`i0$Nf?Uuwdq~y|5b4Gh+AgV8W|t-2_zx-R6$@n
z0(XZ60ZL~8je%rw+k5|Waw)y~g(LU(<jj;aC=Vo&N+B@pt=~M}vZ#K$i$obDT!*PB
z5`<FDpOr>}x{yh=){+Ex`+v5|KOaVtpy#8r51EcSosTbNO6s=YZ$B__+7_}Gl$in~
zSl-gLaMIs@Fh+tv1;=|v7X)VBgFK92S^6)kZ0YhqsLCbIMWN283?=9NgMFUq(LhTG
z4{`uh9$eC0-OGIhosZH$6qIXEupDl{1Z4iOa3RSufeb+GBdIkh{ey@op_14t*6&?G
z@j7fGLDh1#wy`R{yD8Ngnx=b4>2V}Rp7)`0FytheAHYq(Kv{d@Dk=%O>;shw8XDN3
zkj9&nl|zkO1|Ncer?WOj^*cT17*!92rZ9g|5++3;Er6GlPw2kWKOI9U`0v&eA$i{k
zrYa}hfq@A7|E!6~IgkzKFcJw7lH!>SRg1D#05<sRF^=Qva+H1;AJQ3(e0}KO<TnyJ
zx<PZ!OIr+VY?#`8M-J>hs|j;;2o)A#pD35EUQR{jgJEA@vUMd#d&xN;8`PX67O+<u
zlyHNji^L@ISVP|GUanu-&hX^xOK^8nuDeSz5A`26P4dWI){nCx&z6SGog~C{t;G=C
zCJxvA-i2mVLp@H7{O}lck(r8{VQ?`<OyhuRUf;DqiAFj<t~TZ6=>zUX#hPv~#w|-G
z5uB7Sf0`}-w`6%9jO#xY{2D#LzgDMkijgSP&?$zFW_F%#o_UifFu?5=e5hpO#R%qx
zP+C9>sO3n#MJbO#u<7oJ#-fX5gWfn){(>J9X-gQB-$Vn4X4T-7s+>^FV}8&$Fpq>d
zIU5kAaz_Xajj0GaWGzuWyy5KN;@4XhVTWSS-F|4=4I72~@ME`6%bjKQBL3XgG9vZ*
z$VgBcq^~8tn~aV72H#W-Gs%k{CS?O9U6YZGa5UbCbR8E4@<^nsL_YxX$A3D{zc+^n
z`MO*RyAs)?@6kAwOGwueE^}0t?Q2}F9$#4ryOh`Uqk1$Bi_4iI$QE5#*W$DR0SFeb
zfdQ!q3BgP5-{KMzA%8yK5U2r@kpVO3>HA7_tOa1IbO&HT&j&zHvDNnWua%X0O%MYE
z1LJw3kbpcxBmU#*yETA_|KWY>!1@Z<nXP|s55Tlh+t+r`4A$L1y=Ba9)!!Ku+(&K?
zNpIlaL}jX2R=T;HxHRg}F9OigMa_V+W=ji;u8r=n4Iu7k954*h0B#v^!yD+611|s@
zKi5_UfVw=(VwQke4`ANQdtgQ{S}Ai|&gaTY#j{lI27Ec=ZV?Drr;%#&Z1=bg13Zuu
z92gIJiwgIY!(7~*SX?dgh8g8`Hp9LVIj5(vr#-V75xeZb9dLZok%+S64M}|)<g9^?
z5sewgw>(ZN{-HC|$&6uYjPasQ+SXh)2+21zrYFM8tn~TMTk=!PpV8vUrU}A88^9{9
zaB2zq`de`WN~=j7l@^WE12*_%?K*i-Hb1>dvH&-f7HPtn_Nv~F#*aZCz6F9VrDBO0
zxg>yynKvMJE|8Q6%K6$D;a&??E|iK8pQTpJf`f;LH>tTC&lh(BIP}OGm2YZQzP$0H
z0{44`vFkwCYRL4on$>JM9-)8->R=KrBrvr!Kq>}5V!_~Vs2RB7r)N&L6M4O(W2fUq
z&MNp;z}p+YS}`#iPf!Wa@f)L24+4M_dOlh~UcR<I7tpB05zyj~Y45GJ@e8~@U4qE`
zvT%A|`mQ%AATS(_Tq@kv5TDDArmtAx7)UV_2Xgj8cwYLkHUV)9R&`BHzT5t!EcY3J
z|Gn|~4(*AjxfDl?3B;~AOobG@wf6&(%v5(z)=dlLSq&P!l$s+Qjdzbnc{s4+i<w)`
zAdVJlV6#?~-elGGfm8t`?+tv;CC`rz&gwR^`Q7Qr9KJ97n{YN}|Nl=otFb@kVBY*(
zF}($}>1RhZV{h2DD_Twh0dWR+yv7yY@6Tnz2Zw-*5|l*cyaS5n*;sCJHDXI&2T}|y
zxj%)YypVnru#^ft)j)D=%0Aot^FsMElb!+XRDzI9eOm2RMl)uLb#mdgJi6aH<Mg*0
zW_qjWRxeIg^5#1PBUd7SiN_bcCMo86QU&om&I4TM6$o!(9eD1UV}rL~$vaeh?ejcb
z_=ARmh^6N(_U9xB1?59=mlIz7D!1JrK=9Aa|IbGry%>q<QAqqpNBtOMnjz93s9&R|
z^1Vfgly<l!DZtGl30q@Da)o@&mWPu8Vw3t6V3cRUo!K=jw5kfC3A!#oB>h-AyL4Z}
zhNNHu(*+QjMZF30Q{7;o^}LUO%^_}3CXPmu446<}9xV~0umDV&Fr)A^TNY@bl{OL2
zVKEL=NUuSl(Axrn^7^B&8LQDm$t3n(0Xa*=1%P!7=I7_Lp$l`l3_>R$vZ2ecIUv$Y
zR+stGBf$*;6YkZe%E<2lE--asQbrBU0Pr3rSR34cbLX4e>?+cHMY?WqBK5lcfgss7
zd-B`#i(<SXUuFaUEd{MQYP%%oX<%ReJ&E(im7mzf2)MY-B@o#&B<OV&H?i28ze=<M
zWHHd?;MKNQ-I!KsZ2_;&5GbK5MY#_{KYlMBKxCQ6GC1Y9GDl`#_FUn<A8$1_^4O*)
zIM??Tk^34<>>{GGMK<0MicP8`!^6h|=bf%?ldsf$X5!jfvB=lMx1aJ1-F#%(n%NT1
zhw%`0mY}*?J^qlwjc_Eqm#lpO6{AjKpT5Ri4A|T^n2<V{FkHP)ExkPX%<4e09Kkek
zQ~Kyc8Ot=8)A=z>IhT@WM<5!u%1$<x*vNMu{qQChaEDDlKeNB-s#;-mE5502&brWQ
z2OnG6PfL9TdvwF2e{8?Y0Jeh<BuFYccr(GxVaf=w6Cz-SACqVwLb=MJK^G&3`1PK#
z_34sL^~YX+5nsZf6X*{DIctrBQt8hy_=Y%w5VZ>nB?<(1Yi#a&gln^@a>;a*)RdIG
z(-Sas_8xRl!DJ$t4LSi^<AYLwZY0<KTg08RW7o-im3SP_p6r0s=z=7Wgt?6(bSF~t
zP&a3gI72WG;IWm2Z{JqZ^1JTH^Kx;`mW*b%my#L-8;BlnZM|VXjA@)Gi;$R@X=zh+
z9HvcNTI>CL0!xDVKaaYQGqr%hmCMn4IGHtjB@uc<SM8F?icsl<+9t^17e@qMMgxB{
zM(NDYPoJPGd{mX(ig%rX5U-49^mKeQG}6MEY=yQO&Jxdw%L**3=ZTD%0le*42fZvN
zzLyRo3X4_sNS?o`<C<trsXXbJ2+vg`Uj}^u$bcM;09ROL@JQ9Y4;Kd#>%2VkZJb0z
z^(pmFTMFX`<IJ_`-HkXbq=P!lU%7>~zWgv2zpoDmIg97(x%*I@mrtj+tEx~#kClFb
zFK!rh2qd!G`16dx0Xo#19z^}eeKjq6sSbLe$=JlwYEnf(LpAIYu+t?R>6ge{PW=Da
z`U;>pwxHc02_C`%!2=2I!QFxfcXxMp3n91%cL?t8?(XjH?y|@mZtnfxU-e$q7PYm@
z?9BA^Y5Dp)-3h^ZUq5x`0(BOp`Qij*>iND%!VFa=_Je-rbS5N5lL_e8?qG}XO<yns
zw&(-YVe);R*qFZKrQDC@%aPhR`pOECw`?Q7_87DF@Jrh62wnL37U4~?`jl?sJ+)d5
zY^?||ApEx>S3Ma}t4yhW-;beOuIqcPnv!8_8<+*{1Xf&#q$DrCw2XKI#oo9%ku4Tv
z9GE3oJ(Id22a0mMO&yZz-AJt^y`60HhROgs{k@;GR=R!dSe3H6VQMWr3y|A7A_;}x
zP!BUUlHOWeaZ09fYgB4#)VW+5t`8s<A5V49a29(h$KQt7KArUPsV5WbI1g^abtbjy
zybWXs`H}W{&1p0xZT+zHpy#vb=XICt3anTPJ77Z?mde=HfgomPbbV4-@XrB}qM?pD
z3h2>vy6rX`x!80tBL)<T!0-1veI%IdMr`WxiNwTwH}!>IK@MSwm`EL`rEkH#hcIYZ
zkq2vhO9@Z7T6Tr>4J^v71gnuy8s*QqtZzzRN;Zr}3<dw|tdS0llFndsx0C=<fAeYK
z9Yisb6^=Kkg*A9HuD7oC*Y(=E*e8f$x0uyuvU>+|X{c>?G-|%gabvC|p+Zga%u7PY
z<At|ku~aAmL0oJyKyRu$ubUBQ-k^<x%~H3pWrKPtKI!vsTfYMmo&Pvz=>B~7R-jEl
zCQ1r5lP>Q^@DC8DEoP|pA6)o761o+ZpE8>ewwZa(>7WyS?bC$k=Taxvl$7J;l=+l|
zlua#ZwPlB<f`@B+DXC^x1?8E(l|#YXa#ny!xDAxr9D-)CXw+)N&$6i+a{ZDxuEzK+
zwj)p2{+#?4qd}pr1fHbnyaPUxMeYgPW}UJD$F?Z~o@V4zDYaL{re5>R`A*c(^UQ5R
zQ6Y8y#AyS!RN0E{@wXz2+2*)1_4qtlxmz34JUg?)ff&l|XgNug<=z}-79cEeqfK0x
zNO3;KD6$ra5w$1cg#=#nVgz4>N`66ZUXq9Rdv%ZxDU&5(SCR&AN5SXuJj1k((`QZT
z4YXMki#lsq_NKTFMjFq~kcST*^j)cpen;^{Lr>IX<DcZF`R!!0A{IxFFbMV`w=;Nq
z;Mpj_;g&W#KqNN;uX>@Y%>C*28@P!7UPO_+(+gFiMNtZoy!^1*kjhALXIPp{ruqUy
z&{GT9G`aiALU}^5%Z*Nhs8XCdl=@|>L09{e!mhU`7L;~N7l&)KHzAR)Ot`XS86=l#
zLG6CRc6#GKN$ImY_A`$F^#U3^Ap&?n{k^e_i@VM6LF(EiSp|V&Lwf@DrH(I(^9mgY
zta+hIc!IxU(GIIEsnYC5*m8wdudqXazO4t}3uVX#0+e~8ssk;|Tg{>tBkq)Wq{?gX
zz%6}1pGwkKgNg`jlo;W>6mOR`s)1yhxe=;SSllpNMx{}joNjmr9v9I_lZwgy=qa%+
z>oPNAz%V<~rmeZC?P7h=p>=cLkdIle2?}8GmTx)TSXLSc4r}X@nLw6vuq306@*a-~
z;GVs@uRy4O*@}*5RV^`Jl=+AekPNMLYK7fIaf`;!Z8I<ni=+~MrHPGfmiQ)?Np0!N
ztYSwKr)LqauJQ%?Z>p)QiuD*fo5zA3!2||Ms{3Y-<TakAT@YgYhUgDyksr{1NYZDL
z6BAY?WC-<oph4-jB;MBK>?P1=YWaSJxBlXxpb!xR$U?>9-vD=RwMN$EYFv0bFEgyb
zwDx^fLp>1h$pW2vOlb{rT*5d`SZmFhHtfd~VM5H*X2cQp+H7+rQwjzOzU(QrJ}AW4
zz5~nj0o*ykI3ai^q)0qX?#XbR!QB}(t77lHH6P@f&3}X*6yWDhVurdn=Q&Hlf_08$
z(x_Uq473BI2m0u`-Sa8@I^g1RI#IVNQQ}`Hxz(tnpOWxX?aHU0mB_$JZ7nJiFMCmN
zMhoX#`{S?VC}@pmD7=-(ZHIOf?~=zVZL=#kdsC<?>z@wXy$As+>=OHTc7Bs{=vW*a
zmtDyD-+SEcLEBc+>FQ{h!P~utgJRb+FxBuvkxL|n^MdCJ{`mr{lrHJ=UhfCf_o-sz
zp7_{ccoH>dLfnE4Yz7T&{Xea;e`C*(KG(3*_fMUS{7>5fwI@n@6T{GNG=hyI^kX2w
z&ql0KwgCpokao_euHlz*)yY9&=|^)!OXM<5n{M=rb$@&zrnw-AmgA=D;WlI|80PnJ
zKm)rV8iO2g8<lF6$a}V}593@k9UA=l{&AyMn>`nBTHk1XD`=QRx*T#??HN@t*M}ZE
zS=dIQtj5>WnofW33CFjhWLCf`5DUxi{?5NVE{soo+WO*|(ybul)`cP*FspWZ7)GOo
z8QiUr62nYDr(k5ct(+=~317vs*#1WIeZx(mq2_3IC;ZGOb>*?bA_xrw6c8}ADBpE8
z8!?^Av~L*WJ3^Z9Gal@~FAn@D<fyr8ha6&mK{>59!oxE?V|NRe$Qjgyq-sg_A}`IJ
zWGD`vXhtK9w_B<bVEzm=z11W<7A+DDohI%N@M#a|{9$84eoer1*of}G+PWe1<0FF&
zzbKy#!5Z@#9!$9nM#gay>~q=~JE#(_1-IqPyYM1=6Y;j=%I|{-j4kQaOd4^RgM)*W
z#fGMXs92?L<9mqrVzkXunzltoqp9=;1JQlvCA$Uc;}Rpm!ND4n%U>L7p7+Kx4W1wG
z9V=*Ta&{w2K35PL2-uYz=a148$V&1L2n-k>xm4US)}$cai_dhkq&}4truN({7Q1r-
z`gp7Le1YNilGj5fP(^Lke65&rdooDdYECJ=`~dot3!m(?>Y+4+?O*b{t<OIe-&<u)
zX=+R7F(oJ=Q67^Yk|-h-x(6xJu-;<(KK*mJWD4a=fD^4{gF5>R3F_z^x3V!+feL)u
zahy~+qe}21O&mnzg?#<5y0qA&i^QmpszMgG7Ma_;UDYi{sq!kE1>Z<|33tDl*pX0K
zYFZC`C4vAE{Pz;!hNy)&-Kza_f{iSAA$(R4j|E>!TZgmm5Ib5)=d7PCrUz-6rV#cg
z`}}Qj@qPhOzC|pMek~eY9~^+wEmxXUNW|j_fFM&lMg;FJ1!&+&rPb0zQBnh?F(FQ^
zc><W`g^IeKa4ZI)RQC%}H|@J_46Q13{zxnw3JzIGk6oaXRORh-9<7$MEC0Z+xbR+^
zSE&}4dy-VQG~D%6t~yLl&UrS*gjXL|lXKNtERvW;$}cvbC_3pU*9cN;K40Y)5I+_g
z+&^B9Zbu4#La8M_DKO~j7sSvY5)>4?81*_R6q2X~(&za)87(f*ru~j$!SD*bxD~SU
zusg<z9+|gO%-i78_u-p#a9Nbz@=d1`F$N7a;D+|!ll8@%7H_w+$t$-r8Xmlf`z$+C
z8FQy%da0>bqXVOa28(d~L^Yk+H+t5Zu6L?0Vo}3Aj6E52s?^t#{rwxl#ox>XmpQr!
z+(y_;q(AnLni1NShs>|$j>gia+)3Xx&6X7#tssT>9`(e1E!|Zo^K+F-K-D4F*S^Rk
zc1TL{LkM1vzg0&pi;sCV(;@^J&{K(%<$L1Bf@qSEG9N_xf|by&g^~p0#>js0fSyM`
zw?cf@xIqC7xL8U333?_@X`#YE01Dv|7qLAv7_gp3r{{kvPdZa*!tH_a9j`6W3;#HH
z#bYrT2GCgDw8P*BjP4eGx8rbL^M=u1YxBO*E<XugH^(ixE>H-uG%XQJ@a0h0vp<{}
z*31zdJoqhP2`r4|tts@}A%M|NkmhltGCg5xDw?T?US01OoiCxNw-MD&8hgWJwW9uA
zOso(+@R}jt_iz)Z)N@uzsyeJ|pJ<_`P**694?I8WNy4&hAGNi!P^hP4Pp++2lL8DM
znm$=nPRdQQvIt&Dave3r8!^&6byKKYa^4Je#OW|-)Yvo}E_?HW=hPRYq)AS#S``7@
z=PZq^jLhekyI!6nVpJyXq@~Tx2Tm6Aoe6#q!;?_VyhaG~B(+$&%(DXsw~9b0@b`a~
zoc!mXrGcnb*JWsI9|m?1Eb$3h4qe{H-X-3|H%;bbF8NN%>9{W=ig{>24x&9Bt8AZ9
zRG)>_)4ME{|DpX|i}`6k<cvqs&-fsoQ@F(E=umZQ5HzI~<mmqAJ4)Z2yiUJhW$b(*
z-`tn+6|MMpwgH?#N#K)Y3Sp@@HZjWs*9C60pw^dN?yQ$Lg9%S>2eQGPmj^aq2}WJS
zRnUtfOlixVhGWCWV?Cw!nnKOnE}J_iEMKlM=`A`$T#OFf@2|jUpZ>S2(I@m48#D3H
z*`gXYZ+e&pyPNM-rl-O5(iW#?KVZGPrM|WgB(-jTi=hl~r|paP=8e*-!?<pgXBk7g
zhP=%7C$gs}G6Krewae4d%15}t<_O<E90Ev1ZH@W7uFjk327oJn{D{d|{cv+^rt{9T
z`eGz7Higrr!cNub4uHdyF(ax~Yb`Bw4AXoev<`njy)UC_I{St&!(L+Wa#PznpVp0`
z>D<n=92g?#s6nNWdSPpZ9R2+xn!v{*I4ZJ&BB00OP=3oySqXi0b!Lroan|Qq)xqu#
z;Y-qqeI~$e6uw0WGMyFxCm_<wV`vRy06L{0r7<hExOU@GP%2Jk;a?rd<bG!jZt0-c
zj~(E?Tk|;vn*wc`C<+Fql}gprs=rd{7s*L+O#>f>daQON_IP@7o_C=tRCFSnHufaK
z%vdV|q?%)+uuH@?+rSwhoGb#m_b;TKTdS=1jyOqPTm9J^k@eV_DdX{))7cuTp7a}1
zU!ZFEM27*F#qBNMdMcZy?O1*0l)0TQau}x!_h=bkHDS=-6CTZMX~6V+Q-0B)rmCN;
zIx3D!*zAXdDy6TgH)@4NEv)I3P-R|}+;`Em`z7!!85$(4-P`r7m?fod5iR~2uoI18
zLRL!^T82#B#iYQ?)VA09tYYz$M@RcXhw&#7I(aH?hx9bE<B?55SGS~#@j$X4dJZQt
z-H6`-Xt0t>B~b$@&{`@E*#8hTlErh7mswy4?#)~g7?`=yDO+5bQomi=pOaDgQMAgG
zCH^Ff6H7Sm{>Nf^q1sHam@-`;PciFrTEFpKPv+|Qlw2``4SQhCv8iv4n9shgA%OOL
zsJ0BQl?!eJ5a#)h7VEM0me*5`p-i-BG&SgnG?}p76?f&Cwv3kTKi@jm0Lf|3qvdw|
zRPMrr;rLaOiwJ=R+*ym*EM;8%aZMaBSz;5flEF^*%W3`4(@}*{zA!I3Lu9cji-F6~
zX<j6~v*bS#`QQba|7<-I878;Z*wcC#9d8I7ChWEvY+OlUcC-!uW#d?m!e}Y)YuI0F
ziUN(DnJg;chD~9o-HuZqj37yCV(b!2_>XH0NNM{uxQ$4~$E@40WP7~PJ$LK-&vDp%
zK-?zOrZ-+h&;|#kVv{hL6P4exJ7gG9<W?C_#W(E7{QJkgEcYI$m+U_`>;3Kn=K0t`
zrTAnrgYHaXews2dGHs(PuM1RH8SFpF5N`;yV{9K7`QyMJ%wRf>|C=C1@Ft>K<D5xm
z)8rfcDjDV<OptG&keK{fJ4zA5AYn(XO!@bCT)4d<YNH!P-0mK#0me|1Iqil@6HH`W
zgRNrx`?JJxS?RT#C!9+2j|ATGgb@_C8U;2wZi#+x&=|mpIXhd^18y`fMJlL&e{31r
zp3Q`YT!V!$ha{Z0;AQQZR66f^=(?Z3BT+W`$D&cpKmuB`viZlq|L%%{$!&HmZGLh6
z#BJ$k&(amo&~(}DGg(M#PW`{`<i9953&vYsYo-Vh)9c*}?cKLK<hA{ZC?G1HcUhh{
zdlgxqGo%E!8Nv)*PQ2U~K0VTT1BB9(%WwLeqoP2p&Uo(;2lrtf1{CnSZD;Y}dF=K9
zsWGCtdQlToCP;_*UzrCWKp0I!wjlSsCG}<e{bTWm6QN*F^IdQRE0$&q15A$w``a7@
z2tbO|Mibiy<tcx&U~(~S<y{0-ZMI4#>7F&O_{yS1Yeg;^ss#VP-xGa6f=JV8d2=|#
zd?v(rpWy$GCjFB{RrvB2sEx4Bu1m49;yp9n$(#|Nl{ZwEf7E`C{=&O0{$1lev)^CE
z(}YfR-BDbhhPp|o?WFZE9@bbH@Gu(j7up%V-*j!oE#;Pd50{1QozrqE8w#_?4+%#y
z#@>EGp8D_E{*dqnC*;Y{>49Lg8f<q2zA7W0BIuPf9<j#qk4yymDjsY*$c<PtTODYS
z)&%@Lrvx0Qjl^k;@x$9WuN?s@JH_anW$ntp5w&dS8)`$@<DfFGd7(B?N&vs`Kg%iN
zC$y{v+a*%`a>CE0xvVWzYq=5|mC7wGm>H43>&*x<pV`iAy^i~|t3DV_7?NC=VEUvn
z8mjBvUq7S|4KdipIjcP%AF_wY!3Sycg%{y~)jKP`?t_TK_K?t31py6Lc7#3Ynjsti
zws?b<LQUE~mz&-XtA2GhQ1a4HkS6&n<L{Cr@Xi7mvl!GbZZ&4rc*jKYlAzwLQa^FH
zbFD2D6AyC?=I<i9ut$LhT3*l2%Z>`1L!7<RU~y9nSh9B)S&aYahBw;hF*Jr+8t`Q!
zwO|t^jhplEfi{8j;ZL)NXBI*0GAyD9G)M`XRpM;DqF?_?oHzYaW3V<bBoh>!_6}tI
znHRwngDk|99#VdU5zAE(m(cqirO`(!<qvuaLw{`%FC>;ELG69$o(>0!Ui$ud6NBwc
z;%*WAVa?mty-JEsw~>>-GRlI37_3<N9+gv(Kor3UIoGkjm8H-V^cwH|fCfndSN@C8
z@U6|4G3{;f<bzzyfAw1g)?0sc37gK&#ElN97;yODKVSsR&i$;T?A&w_hfMfRFn_C&
zW2@i#_lIQT-V_VS-7b_FNQnKo6`H&-I-Z8hO0vzNO!UHfTtD+~ck-`Y^MWpgg0~zA
z@1w7U?#V{?j*np=6b{RRvr^U2Wv(HITif2U9FlxQS~y!|7-8LP$uUPbB!K{lP(gI|
zHNR!5f-UW?$f{cU&C#*x{MSkSgDw5Lc7XR0tdRSEl1-4IYdf?M(h#&DTTwhIAJ5OK
zuk|n|o?%xJJXtt39zX#qY*8&527{sm_t0N`?rQTCxbT1{`WueOV<#5HU9Jl^gm~Ch
z(P26w!FSVjzY!e|PXDcU)~b^VUrfs1>_&tbvI-=TUD#fe1%g<?$+u^k`v@dD@|nNG
z-#x)RE1ivYyu4|_a!0<iIdiDb7Lo6ktj^{*plGX#CSJQyQ79y52N8I)ejp<w`#Rmr
zhb9Q*?pdtf_e$3u4M#B!9zXfasDOUdzfdloH-dsp$XCpn!~a#5zY_oZmp+J>oeDrf
zyM#OR{BoPH`C9Qul-dq$jaG%S*m9BA)<AdfB?W?fU6Dq7W+j&Gwe|7cHBPjNG@-me
zO%$ozK1#$>_-nTvEK}VJ?~8?vmM9h()`Qoze6-%Y0Nrtyr9PK^{kF$r%}w<sEEkY7
z62T9wuG`GHm$yFchX8N*D(+o?HVSYjj6`3Hw2N0K>;JrT6oQ-M{DzI9{;vLsA8lcQ
zPiEo;W`H|w)<aRYy)9<k`;;UCDN6jq1JdD$j6#@lJnt)=+(*z_bEGc@mv3Y8L1k7r
zMC0JPgJ`8lw|JGZ7N3xUu6<Yy@9&F!Znp3?FSr-nM`!11P{H66W?<o3^%29FDasXl
zQ%|W6{u}&4GZBue_tn}DwC+}StsOjYrQfkVyt!eyS4NI2u|u4QqlF>o#ozG!@Nu5s
z^@th9hrHoBghl&!MUkmXa!A9$@&9U91SE75`IuirqBRxBA$GW^fqpuwhB><A`>&Vj
zD3*kNm%JlNXN56)-&Rly&8-=cq;D(Y^L|+f?dBbybM@Cms8aZ1+dE^8qGRd$<+bXS
z*3MPfQ&uje!;jVJ<Bn+Uy@N7rsI<SM`5|eb%XSVN0u-eHGZ~=|*&koebhI9`4CTyo
zH>>c}B5Hf9j0cxR<gLHtL_TlB9Q2P`5SYPQYlRloPaPSDKs#G?!G_GEes8mA<H$ho
zAyY4AU~W4ND_3`RG^(%c>>!j|b4FSC%C?0c_J<4q%c#oPdJtA_JcXXT%Osv>04kON
zrWuF)5&GG{b*(FCM?R7nyM^g-tnAJix4}cv=lp{>7N84XmhAk}AVGcRklg2w<Wfy(
zVae4yD<}+c1%h<Ia{A95|KB&fs30KJu<Y2G%2TCw!{&IiN=kHni9In%xgo)*6o*nc
zY{B=F#v<qAw?M~C>2}mJ%UPjfgJ+~tmyP`;?n2A!-b%@VFWyC;trtvkhkqeAJwF^?
ztDGAkp*luG{}$vmvLv>>naM^jea!gmC%MwsJ>lC*H{3Kzn^bLhQ3hU-IaFZ>^?ip7
zY&op}QE*87^+K}HQ{Sh`(IoQy%IS#4x&YxZp1;+;o`$VMa#k7~>mn@(VTEk5AY8C<
z&nr?*)o+kP$aJJhIr_=EtVZ(b1QT}L>|&Jt*As$?>2~Hv#&cT$&Y<Y*P5bs=F94$r
z!8luyH_-N@SkTJ%0YH>T5C!W@HJXC*WTJ!BgLz+qetHZ0m6P~C%;bN-69R926zRce
zdB=iex;6h!p1B`>rZ4v4v88LZEa~p-=rWQG!lO$8LS~O@XUpqSeqJ~&bSKhiE66XR
zc;8|PhoQW_Hc^CZtmh>M!Uzr!c&ox{zdKhDk^NV*MnZZ7lt)97s}IDL2a`23#bVK3
zmp~!PC@iQC1R2<LS^@pJQS`5vHlRW0mI)gghL<7)6p&3Ue>%FvMZW+DSxQy`5ZiOv
za=rCJejJyH$x9VA3VQzFDSN#a!g=VcnV&t-h|)aip&hi&L=`Av)@MSBwxB50jwdqv
zz-ZN!ylA9wm=1X004RtPHdL4ZG(Jdjk#u7(ZF2LCGj9F;F8Iav?<boVO0y$&BUt|%
z-2WfVB1(G=z%JBeoI-i{FAVQ?*Ud&A?P-&I_SDv_e$xI*LrB{{QM5n9b7l$tsIhQt
zeP?hM>M%}bL?PWzbLUK9GzCdE0fZhRUpPs<-+|gZ8SLyw6J|Bts8_&(Ol5*pLXh+P
zuj*xbGt+9d*Xs5>-E2u&0E-)hh2&L3MUI5s)78--R+pu6<K8tVhw&;p^#VP#e-{B!
z%A4D15q4X}coIOnvif}oejDaIR2aHq*{c+Zt)=;CLG$1|pPu2j2{*hCU`6nxXrkH7
z1`nx20hf(vFO7w#Bqr=obF^EX)s0;o;v^&^a#<DrMXO3>2`x~5L_jS3UW5Q1vSAl7
zmS^LK1zJ(OFZ4uc8-PJC1~I>w&S@zPku}}KOYF~h*2MkmL?$A6?^CH)j!(HKa}-AL
zk1ZUfPZKFsv}d|brb}z*syD7@3>sO$Ne;&<QwC?%YMd_A2QS4qdh@P}6C!QneWiDn
z{zikpdlyc@x`LOFKH#HM`-8sY7G{{et&wQk>l<RB8_Sf!+A!%1<IDQ#cGkbK(G+RK
z`vI)%I<yv;N9a48wcd`n4i3dH4G>QmXJPcb#mt^?FCW~WXqoz=duWg?yxu(PI4>U(
zk6Y4G^o9TVz0+4o8{|H2m|;tzDmx_n*B!-W#=E{cQzBH6ofQn}A@`)MlX7;Dh$v)#
z)2S@z4UQmSN39_XNz>e}R;1!;J`Q;ua69&pMh+zKo@-ui@`Q1Ng5ra2NFSa3OBHf2
zGzPUyn{^+_pB}_rJwscUy|=5G=w~XftFvod|AdAMd3LX)q%uwVU71e4Ea9b$Hke+f
zI#|`HGaUCeyGpqmQa_~9!zVsGfGFKq{H~|hqXU`KqIl@VqYWJOxc~*TCv0;gTrN>I
ztYy)NW^hHfPxs<{TwU{mTt(2l%^1JyT%YxQRQ-jpR@-jWFqz%5({UI)G%R{>^l>V4
zhT*#2u`k`l;OXJ|X5}l82$#i?nDyO1-d`F3bJZdp7J_R1`sDaRO^^qpv~oN=fuM11
zeDoRs>C6Wc#1HYolV`iMX-<J&D;5j#CVimN)5^%;+XpvHmpBR5?#sMh#ygMMfLmnG
zXVjN_tAgE1%|&aBQgCVhL+1_4^R1ufC+8>7Wy=T8QnucQgyL|Pi<aVh0M(Y7_)1Gl
zTRalpa^L@=>iwg9y&R4Q(e*M+0ON=bK9{F4j_NK;qP5bohV3bcFk(g&JbSPgbsG~G
zhyU2kV}!e^x0Ggjd>ZDI3W|QJ8gyx$JLu!s+KXB(nX6qaG(c`}cV*ICF7*<(^+<JM
z;k-!Vuc9Wi)8}*%B78Y?*?gIGY`b6NP(Q=b*cNEG<~+BUOv70^b8d%&(X!F1zA<&>
zT*L_Gyvh!>%m5!ZB{{e}VW+jeG+$1It0G~3(HMPDnZDkidQye}TeYf7j}<SDzAV+Z
zJp5VDe(E*a+uZm)f$sqp@D8fNOS^SnIfTh**(IGsz4Yh-mp+4OF%h3L0I0t;-(C0I
ziV=E3lx_2BmjQQaGg;q-75xbx){C=AwTh@yFf-XC)bDQ1Vq2UO+c#-ZBE&39O{fQm
z)TbogfQLD~ae2CtnVvq&I~nExmaZ%22ZP<9%e+<5S!PIbTJUEt4{a0Pcb^bDCx1es
zrmiN0z8$>45=<A1qrsZScl3j%_ie)=RAnvHkw28}eqPpf;qqI*Ttt0I7O2*RjfLsD
z+79^S_ULkRt5H^^>{#NFsWAA7^a$08fs~_X3990B68Y%SMfelthUij60BLFV^2g3h
zo=~%fk=-!6XeQxbxiwhU-sZ{R#<M)^-eXDJoYSKQeG{hH=dNH!cW$c=@98f#8~wu=
zR+^e^Uz?}z1b58#Ts$s{gKXz3U2PUSPt+cCRJK7^jtUwh&4z~)S}$v7+A8IydPjA<
zCXw-)UfXmf`zztAQB6LsO4(*Veb2p@3pK5D9xi^a_&fyZEL;v9(T6@|ddMi@P#T=>
znBRmy-+ZmD>Z^NjX1l*;*oxH6KW5n4Dx{XDlu3c^I&E5jrQ=qEWBoWazu<MlQ4Kp6
zi<m#1Rb(DYp|s+|^Ttyw8d``jjmE(7<71LcY(A|T<1s7LH%*^jbI`n8aGPg<7K9h<
zqte;A^(<ds8X@#@_~jwthBE-@j=h0&W+U%PWx!~Eu?N_WQcS$i!LL#L8*iQ4=BQn>
z@dJjYHQ#W~QaI)ILX5D(OG$XfNQuABy^kJ}gXE?(<HKvhd8pjANVr%;fe;<e`4s43
zcBc6d`v*Syh+(yLr*E@S^RBS8LF20N#dmYkAnpCbz6FljHM_xTuGf>`tz&OW?PcTj
zY4u$5%`o%*vg`5U3ovbn+V#<RREMYawD8!9Ykntb_w;n3XmK1YR^y(EK}E{|7I(L_
zumCqco@T4y4LDjAOo4tWVY361J(X<M-p)>g!B0`G>JG-ifO;Ie&&0?SpU70)+r2F8
z^Bg-I;qw%!);tE+*V>ycgR8hKJgrZhXe9E3Y$?@Z0(_JgL7L2on+sc~n%Anqr7s>@
zwcJsf${6HE%U8j!xL@~zhAqNzbi--01{{itX(NxFMwaJhotFO;6LR9;z;ey!SGJ5{
z{ct3KoQ_M|CyJm3oTmO@jAXbciDjaQ`$MS~yHm%guNSCg1Op|@D3}8oFXgQlx_c8E
z!*~Ulmi|E2cDHl1G<)nrSE+w^oUTwI-L=%OO{z6m0#0IYKTsP&BN(mB4_==Qa2k1V
zU6;Myr7po*LS%+zUtwZ@B0+y8j-iQxGfx-;e+`dh=s0`u$5iZ&1R?G{dpJW~991Ho
zk}3+$S3(rp^X2B7F=yHGOk`V}19hG3QFBtbEGUSh%IBupt5lzw5}Soaw|h$Ja?-(A
zF0<IG_evwm2Hisau|P=5{vhz=Vah>8IsNp5k7v<-C`C-s?3aS3Zc*KKRZH6NBM(0!
ze^~wH@HBP-_%0Pi*xYs9TXLD^la|D~9_faZB)G)D&@!7i3>D*(>!s?#AcTK!_onmh
z0x|Vv<884jRwo5512b)+o%Vf8O3OnuL!`mxc&c+B*>ZXRsnw*)EmFz)qeVlssjKrh
zcFkUqn{xHqvzFe=`0oiogP|hvFMyYdDlvSxC7zC9X!NclZBh-$o9=2|-)$@07W$_7
zi-2W%nSk*u?X2M?jH3AWvAwmh(X<w{R+6ouF`kKtX@s5J$W*-dG(kU=03^{5^zgDH
z6x?qVT=y<Ox=8%>Zj^RQn`qSnFWQR=h)xRTYfToKLo9A)&YR!g?rdZ51~X4h=Ev=u
z%yP~4of7!b&Ym#5`>3V{uEoQjJ9p8t8*ET>l;nQS588Noi<evaM5UKerU&FDDq;kf
zdCntOuA~rc&F_DuP->Y??uU%4@w|Yo-pd&vq=w&|wrK#L&To|AAGIe+Nh+GWWCqt9
za8pVC+OcpuT00ihDjhs^xr|C<J;!!rHMpyxwi??1#jmt2z6h~9T_Y<{7k~FjZ@zlC
z^s#uby%V9u9{Nz$^6Aubw=;PLCYr;hBtuMP)?MO*N{dn9=<J^qGq4BsVqDzuimU`%
zl;iBUugBDEF<c2u;lF4C9;7AIO8#WSz&w2emA|SkC)5k-UECT=$IGW@<2!R((u_)}
z&Q+wBUQG5CkV|+7;0aXl`QJ5@EToD!!F_pXpUtTY9Kt_#*}lp-sAmgMPk|W~NpFXi
z<hq_RDmxX|9{(d~iZ1+`Q3?PfMkqg3)@n9AH%(}#KbjA*dyboM3zb4{Z=<88*!krP
zYA^n3!Em*`RCMOJ#f!b{EKIsb#*a6vVN*u*V$e8Rxp6JBdF~wTo90YZ8Z@4tI*@R!
zCeggraJ^0}Cib2pEt_8em*|L=l9p!&O{yvsRdHVUgD;VfTb*Wul&))wj~$EMn;i~4
zeH__#u^&_&iZb5(=oU(t6{L%pJZY<_IIU5Tga0*X8nkJZ1{Nb#K8mi|_fo7@;J4+#
zmE9rBHZ@bSFfbcT8@l5ajqJVJZFu&abv|N~voNPvF440%%B;5xY<h1>e4rdeQt@ek
z<bQ`;)6_=MEiLL?7#GwZG!X%CO5%*Alh#+sEb;9b$J%6jwIHFIFVj`q*f}5UxmnYI
zM`&iy*t`v_)(<V%7o$7=HOgH14T6s2E|+QZ&$z7!r5{D0*5cYZN<JTtRg`uV+U{wc
z@%Sm-vuo_*-U+XJLExXxCr@k65NM?w4r&;HUS9qBw>M@OdMZ|wdEBwKLt?-~QHB2Y
zk$yKczWzpkvTdSF=&Z6s>uNGvXkRv@?dB(b7*p4SndOU^-LT<t*^Oj2-`CWhjM#%F
zG_I25G~M;;UdCo!T-%D(BkSnaD<Jpu3!z-VXyU+0*n5SE2nb=-Wy!p2g_MJl&#E~U
zIOLo5qmq+rG`>5KSJXU>ELGdxQ(7c9i?)F8T8ojf9S3W#9hS~qQ((-hi~MGdbmm6~
z{LQPQ2crIj9@_e3gO@%17qF)2zq~|vHdF|xFSSVV7}wla3pZXRTrDLkzh%xTQq$0S
zF%#PLz=_?z3qCH}GCa$}*<re4X;JOv?f=L<vmOw8(ahog46GIf!71B_f?2wiBeQ+(
z%)g)!eKapSnNi31VMAdhzb3V*P+R!VR{UpfXN8R+ZzW<s3J2L~iEo{r+Y@!Sgd6YM
z@0Bv@U{DUIGSd9r$rhOIpme=ZzmLZaCy&`=(dT|Xh7AO1Fbks7oAzhT3K|@ypFrQK
zoSi#b(srxjze0bwubFWKYd{7p;1k|`ur_4u>2&SM_iUeTUg<c`heB8hcsy~c_-g(Q
z()3yv`aAwGn_~zj8+A84^w$$u1&Oc5{tJxLF^e`~BkGgam>CpywlE<v6WifD&?*3$
zq;V~d>PSjmDB0x^7fUc_3=-jk<RiRGCeM+4j7k~QG+r*a-EX|Eb9kw21wZ6BB2R2q
z;H};vw({(mP3M0bMP09HsV?*AuL-U841Z8_Amgw%Zw)=qYcvPXwp@GFSM;K^-W`%Y
zJ8I}*4;qWlFD$IAXX$V(e6q_KZl(`^zOt&jtlgJ?UKX~xUm}V=UEUw$$vX9{t#a-n
zzF;T$>B0GM)EBVZm`xb<8BQM(05lV<(HU})_j8irNL%4ZZ~fe9e-&h5!Igqj%>8E#
zs&_hQeWaLf%@{>Is3fV4us7^u#{$om*QN)^mYA~e*0-l!MQ>Z!K4}^u7XNTciTQQc
z!%ks@Ef%pYn*JUVm>r&FwA|qE@isXh3KS(mfCNWH!`e)Ql$0_qF~emjKFPzAd;0Z!
z>l+<vMRd7Vy7f_$$o>(w#H4geX5F}SO626XM-RX4=n{8bZv9W4T*l^9kFT^A2xvq)
z&7MwaFQb#iW<${=!k#AsAIHI{Mb5o`D+QT~ZG@K+^wMh6<-;nCz@wATgr|2Tkr#OL
z=wL^enPm@?(Gq^Eg}I8}Kss`AY`K~IRqCPRDPSmDqnfPGhPICU)%MSRg;>`0Z$T~U
z(@lpFhYs$fpJ`-f>12*jA-%u-TkeQ`MI#27Yr_1B1sAB#{M*vtU!<HBX7cEDnI5gy
zE9Wivb_vlL&Nng`VphDh-k*k$c8NLG;nt3tZtn>=(|n401IHM^3(#P+-XF6u<nci&
z163Blngi4lJwvO|`0AU6bq1x>v9)()+OlxYc9#h58V?Uck2eZISd9>2z9D{RH8J93
z_KL~&-$+~Mi;DB8ViKk#^?40Kn@{QAKs53~l8dcH-@7^T(KR>f55_$MiEucO=_<q$
z6}urXRd4$2E*z;eW;rb4C&x)|i_^c!@Y-2u^t&<l+K{Ss#CE<9Q$8>kpI1Hz6kcf>
z@sD%8REs$Uo2yi34+pUn*DBoy>|R)(8(VqMPxx{H^qbS~^jvCiXDfX(lxc}il177A
zEPo1c@1**){LeU@HBQXmJuaWw@4psOu9_XqU9dk`-z39zWb@h03jizfgSVJb2{&S}
z#<G92NE4w^u?PEyy!f-O>L*>z!c^WUe(k|uEB@;S4tcw65T0=F3KR-x%T0+lhwR&j
zY;X&KN_B^hzd45tKkH(FP#midmDy!t21VoK6w}R-EeF)gE0V;EjoNm3dLR1Ibm+IU
z96nB5Rj9tG2Xp#ty1wP}-N}5w!euI4w}L(&^!p2pqDx@8<AWd<5QsZC&Emfp+Gz|w
zT$~4LsNs1WI9WWTU8n2>XPax-MkUPLC)*@(yKT+-Une~tV0Xp^KY3DYP5J|X57bBt
zg^nUa;GP!M5n-v?mxZVx`c+J_l@-!kuF<NY&&pxf&Vv#BXAi%_t$8|V&q0^mv9#`v
z!)k?^tR6Pg+=KaZ=rw_V3poUn(G2g)JXyz{jQu+C5nq@%HC#~vrcQGyzFOMGYnh`6
z&FV{%FeH{(I-Q%Av-swF#FfQUJIvgd^4U#1dfT1E){)kR+ZO|>GRBj5;m6KqJlA+X
zl}mKate-aWbbG`%$>pOI4m+mOK4wKiOf_~ldEUB`@;TKmDJ@{hV)5<O2Uy0PmO?4!
zNnjmdOA&lP<g9&4@p^9A4MixBV3kG){$s^6Y_-z7stL}RyDI>F(RoTMWi@|MDm*r5
z8n`vTJyp3iTzqN18uM12&(Je8H(tICX~y|fzrXgRRb-AuMN7xiWW(!L_^0EZqHJ->
zv2E>mnYQ)Ntgvb5Zne<sP_@SBP*Io-)j|awO+xJm9_Nl!_1+XqzGhSH<zh8us#K*m
z^A_FXQs&aJSECH+dy)lG5&KE^pM9~aqvet$X4XXZgMr(bfEh|C#kGaWk0;aU;s^Gj
z<wGTM*2jT8HG#d4OI0DF&c6FQO;dCD=8x?!v3HXgx0YIRoJu!b^)IuFlI16RUk$of
zKTjm2$u`>OMDQlVGPXCP<i^U0lYsJWw$pa-I1!I$xs0y-O{AQcp|-WoD%UaE{XGXZ
z0%-8y(9*|ig&E!HN0?Fppg27ex^7pT5Tb$=j`1fiKTIHPQhh-<W-9t!q?!k$4{<Fw
zcgd_qs4*y*B%m$qCw|K=kNenp>I{s6O1JzmeI`HX!SEAyVp$Ar!)}PFzIw8rLZ*)P
ztq}QY+1`_5ZR_CnvAS}-Ak$TgT~s#!(Rnw(9mUVJ_N1FNk`FH9Tk8DL{(Pr6XQGH7
zYre8}TYVy*0Y1s%fK^aEda*UoeE#eaWPy-qe=3c4EY;=E>DJ!wAV04!sug-VNwT_2
z-4%*=Nk8w*+tkw9q&wF>sg(O9>nq_1p-cxXw*^ohn+0ui!^EfIW@ydZxO<treJjEy
z_-9e)#Vr=JQG#Z;y5^?br(hFTEl>@26s-1W0`Z*PP8QH8HpRUM%V^Io`V~tTg~sNz
z8~j=tC_ki~?m2kF<;_Eyzh6Wm1nH+>la(-=Gx7K~DDJx)rqK|nlpWj1%=k)L%dV^{
zL61T%yFTBYxT4tDd>e_mf5eOuy*-Ff&s<W`$m+}^ZhC6Y1rSNj7htM@j)8=NQ_~@n
zyJz7&N-ui1nkB`6*}-9KQ6Ov*>DBQG`lVn~#+7x6Pvol5qsJ(!*Qoc#zWpyuwtL-}
zleo56sqCZ7ZrPfHx8*lV2fEUh(k-)>T76jVZBq(btn`?_dH=u?9j=JTgUMMcVY4!A
zNs!~TVVfxQM|^MF4=W5N*LK^y;HBbRpbJLfOOEhk7!sUP<}*pY{J>I<Pd46<WoRl*
z+`}$Y1qH<L&KDk*+$K-kB?yNs()D>o#MV;jEl{=|$IY0X^$vVax!5FM?4W-t+wIBx
zxPYFlL}7dsjKS_0O*evV7zUV14x5|mlB4<qJM#p^G8gu8ZMDCC4G1MC(%Nd0+^W8I
zU2<y5P@prg_Hmh1ZKXJ77e~OIW<<`)4)KSf<GWWfltgY2dqw@O-Cc;1J9De@pS|Ej
zb|JNoFK=1Ulv}6&TfI#XA^-qT_OD*-z@5_kw1O)gjLml#GQSf%yX*cQ*AAVR+`kq)
zSi-~wF+{+fy|32(F@o0-WV|<24500hvSi_il*-`vQoA#3y3j!iW-NxvC|&kz)ndIh
zpR;%hQ~w3`U1feQg=~aple`hOnX(BJA+Go6u-)_lVZ8jOjeA)P82>`_j9lLO%7NHy
z!=Jj70xYQsHijd2I!}I85A9z&YEqjdraV!eJ04Pk;jPf~$)?%JxZYk!-{oGzYrg{-
zLj#pKoKWLW!G>7g@=7CBo!6MirEdN9a2hxomPX8+anuM9{l5XvMpZv{WH|X@EtNr{
zGo|`BFXwB;aUxsNo}7Wk=p%A^V6;x6Tj<5cj32(3TYBpEx8erABQ{J1h`$S>>BzL%
z5~>31igeg^sIYcgzKcErdBcC@??BSBry4y^#O|SPNr`(5iLmc<n_IKe=xnN%FH!7Q
zMpA6`%G)(*H5(=zKfV4*2|D3`@>Z^FT)2>)<xk``K*t(lYb-r)Snf^%x{UuT2WLih
z-tK9B5*oEFmElxpG~vxnG8=Qdf($3Hc%dOu>bvLFP^9Uvr_MY<R0|N)2%0)SotquL
zafO`;?WyxGs_A5S;QsOy+We`DZsEVt0~XEE6%%N6UM2oXVeq$%1C-9IK@L=csN*!s
z4QQ;kcJI`Q*qTQjCH3H|lVhFK>(oiZKEriI@%}jtb*_o2pig_{z_|O0S$_niI`}xM
z>%o;}o5TH0oYY(BpNIh(Qr?yMCt)zKpx_YG>L1)EzruWsJ5G9@dP(^q<Jn;UMOz`7
z3w7zzK}#x_rXJI|ERL~~{}>uk`$&%|UnY0owJ3vqcTTjHkOGyUvI{<%$82{K|Fw3}
z_W{zGXF4$9bw3Nah&I^bDXcemkj`<kELmk`Ux}q4MzzJh`|{?(kV0v@Hm&C3NeIqB
z=k-VtD9HSh<at=XgME=716ZE#L<qcJ{c#<1P_8jxVAx?<8L$tlQ_0D`a^<k${c&Zv
zWWi^-Sg!^)1J@t67<oEftgv@6ko&?yXPIY|+jSPN(D1a?`Q*ytWV(&!$qk1zBTI<j
zY?A3DOJDxNKV(Y=*qlKrI6q$jDg(q9A%21bFb%fO5D+2%qcA{uZN+J@r)oYrL(;~p
zl%d;vyq(QLI&WJ+a{fD+@f22%)LGmB7S6kut|Z=XfaqCbFX&TWgW4XaL_Be^SHgDO
z=b)-192^Nq2}$1DWb(E)t0<OVNWk6Z#kBR&bP5ra*QT^b#A0^P8n5IG?{*rNaOKT>
z6Bi+eOvx#ke}gw9=Pf}w5Hj}L5PLy0j&@Sd2&*zcI*!XOmK~sN49C7rpy0@6AME*`
z8lOHu0-=|(XWu=wDX^Q%8=KvXF1YZ+BvnW``-jRxAJ0pH*p+`Bw!m{E^<meH;C30h
zKWA<W#roqlY-!{mt?%R-9pFU)Empx=u**>*>?$JzG#LsOFeeX&R!CINP^V%uTZtRR
z;Q$@1y4;XcPd0{EP*jHRK1^!Br@OIS7i%X$Ic7!H04a>g{!)lcg1=X-yHUU(e@On=
zs-T845lo}7IO6!1cN4+O2o0()^)r}r`?ffIW?!7lfhLi3FtB;NkW0a)?x{e8EJ7d&
z4RW#S$`3?~5`>oJW7~u|^<RS_4RB-W<NC~3LC)*~4b;8mM~K3C%m%C>m+VM7X}>nc
z-9R_kAKXC=$?oCKN2a6HG0}(Z=1Zh+UR9^P^)Vn6V{TFfQ7!xT6nwVM?&yZI*cKy`
zL}!ipj;lOvtf>}=8m+yrNq&DND5FBbQ0+E6^X~<}fzt;;G(}Z8Jm@i7o#HZ6AjYB6
zw90%_iGPhSWs`XePT5ccX<<-W{k))3&7_1T`j6}=jOf7+6;2Z3Zx{kv({iS|EOf4k
zA}pZ)%!75GtD9HwZ`u|*-MJ5$L&m~8Ws&2TbepTr3Orx>nKcu}R^Q7haaA_2)tQOS
z@PLg^bc})%7UAR2mCzb6dgbRjo*AT*`7tsFaO%pqOnzpsithW(icyE(Ccb|=n3g}(
z)>{4{V5cQm@YChLWnta|l)O=Tj_HNLn+V<t*s~Rpk0vvyNUvKEj0cz$Qq|>)aMn|2
z<dL+SC4~NN;2>$lEn=O7wH{3nZ~Sv4MFJvwG$<Xu?7eQGQI=WhtSFjCZ*Q|mm>V_U
zmjnMqHiRi|3m4<|vhcd!i7`-2ZyKNwD%LY6mLRhab120cMkq{r^48ubrXW;vE>HU(
z1D((RMoz?$y%_Ff@#Oy_*#?!7ELKToT5rJRi4v#>kw6@=Reo|C!My<*z2b8lj}cf>
z42MI*0o&P%7-~OiF20B{o(2&yM=B5p{7@Y;$6Gzb)Vtk%i{8kMe_-JdVZzKqFxzKB
zoWmlFL5G@8)(tXrBdW8q6YWk7EvR;(kXDp!%O1jZlF@BG#a6-*0}{@EQlJYsqKWTQ
zWm~9CQMl>N@a~mDoW*8EEjV`mlr1V63O%t}4n0ty09tQ>6`XSm@@xYr2nwT$bqXUP
z{QTtpki$OBEHVhA(S)ENc-5@zJto~sKy&D$ct!f@O<yxKti~2xnR2U3Yvpl!prx8>
zNU+Ys_pVKF2N|u3vSkd-9-{*i`d-4ljCSd(mwLsnEMS#|&hYq$<f_{Z4p-5<PAjpO
zZ!WG^OJ~diuN^br%KmfdSrEX5Osst=4`c@3-0=Yx7Sx8gK?ovC{R2cV2L#fKL&X_h
zE~Od_UYANg{TxPr7P`{@3K-iVbb!^9T>!z7YvlZaI@kw^r`LdK+NWg!FZa(c6NU1A
zD;Ds%h>oY%<dCbmVS-D2qNG!+va(+N=~g>$TL7&*(^?qj!V76b5;5jRIAW1qH$?i%
zivR|NB2PxXdQu<xeP`nNooJ#qE+g5CiZU4$NdlJKd1_^v4VWCvOJVLN!|nk88N_~D
zq9g8q&->5wfd633W3dg^RiQk}(T1DZVEJS!Ul_C+Z$b;~)$~?Z^UNfw`1*Q47R;-T
zUm3HhC0IZoHCq$14{yhP`EeEvjs5+N8{jgzj39y7xy^4t`?c!fhccF+7Fk*AxB8-k
zAvdJO4nm@PNc^;!5Lv0Xy46Jopwh;y5Vhcv^$F$Q|1t(-!+)p-vd8`jG)^%P^(L&f
z-*3&bAEq2J5zmO7ya+>xI|PpNb_R)DHKUBy$SUrju2mQ&c!e@#`&$MH0Cr;gpaNe)
zvTfcjr>QCVRv}6&^vL4JbptL*4%;_Q7yd3_zfMum4l`0`qZHx`DTvHd@n`$n9)kWm
zE*4I<7SX-N=3S9#(vFItqAv+b4kJEwpgM!9s?Ol^HTcNe)5Ks2OEdP(nsM9Tz0nNi
zqQ~B`p9_N^FZ#GH^}U9RnDkyb@!V;M{PRHbbJdnsarjdWf<;0*NnC0kdZc<-C{#8P
zH*luzy|7LO^q0%?WT2S(<D_<r&Q=Ih^W{Tl0jum!?*XIM{dps-6{(`23L~f`D{?s+
z7BC+GJn=y<NU5D)-2^_p1SV8rnAP6A96Hi*a-ZLFavf;KdR+NwjFA7>mU+&%1rYXE
zvk<ZW8s1(}CbK;O;l5o7vD@#TKvYnt2G1NUF#v*`!LhB2uYaqn%ujqOAb0Vw6VzH5
zkf0?>P}f==?P1r1R(b%8A6?R=rc3>;E$VI4;2sMXkc<_oGsAEfY6i*cfPr=>h2Xht
zBzCQw+U?VJthZ&*#|HHswRH5i3~NbFa{az-|LAR>I+^$lG}Hw$BCa`;1RKk0bhZcw
z5`1E@L-nRpXI9KAEp6r_Px@3spsQs6?I5KzY|2}~vG=)0S#%d-{OAmP3Gf!;Y-oHu
z#|gA?p*UYUjkqWUC0q3Uq($&fnYIf0?Y0W^XOi|B)K3}(ltk}QpliZT9qL$UTi-OA
zAYI!Z7`QGtj51wt1gsZMiK%T97Z*P*h9Mu_HI>#IEK_rk;=jlXo<IK#o18W5s^o!$
zk$O!=2u`3oR;V{=7Ez32K5Be^HWc1`%Ffkxsq{dw`n?n-B(`Fr6cmZ-8s|js@2j})
zNv?UpLhj;5<ET&sj6-b~R>ayGkL3LcN3^*jVvRw@?E@>#7R86b(AoHHTij1e+p*LH
zZZDPLaHhBt9v<G9ZL%uv%b{uQ$9`YkhaIuYN%x5hNFL8_Z@#WrT|=?C2wbF9N+ne-
zqKjp_PW{zCY0AL^!q`TA3|O$kuI}ok2tq@u4v9MPlVw@+qS_UUd1WUfZ7n$haW&}u
z-NRzteP)EW^-N@=(ZN1^E}jGGE<{uw<k9378R-cjb!MF#X1F^$gDIHS&bkA*^(@hK
z<`c%`H&}mWuZ*x+)@}F0*5Wv7MP9&K(XydFZe|n8<gDo_OCg((p&~Vx0aM%+zmIy#
zT0%aTS$tbXkSO(PskPHe+fB#6mjLJ+{m4H3hG#Irxx=69+fQ;uv^Tx{Rgx4Rp$&u`
z;~OYY)1I*GJmz01o=2a@bb20|fqa*1)hvNlm&xx_fiby@Sy(iU*K5-2gFeiAZqMGK
zIF<K}q-<M`9Bf=3UF^zN^we0us}ft?^6lp?{u>vCNY2+9xHlESkAwt?lf;a-4Y*yG
zXQeZf*eOCxc$!*07}HU+qTKi}M(w%Zv{IPqLQ>iw(Xc+rm_v_X2z8xD1GoqTG|-&3
z3XgX37}A>;%rMGXY5LI#N}wDTPC%l~mBERM{zZX3WE5R%_hB%9M5uL`iR$HJ4T4i!
z8__NN=G5=`fPQ5b_nm{wjC7OMwSyHq`kE;62|6;hPXcl)EqY;`pCqE};pjV~6P$)(
zTdKG5ov65%o44)Aez<ZglYx`nXOUu)nehxdSH7@qAp6P|DKgx-*ZLNbOMXJKYLQmm
zs);dZtPx53nl0fRs~TQ?BH5h9Vwp5U+-R^LVwBbMqqj_FgNI(}0sE6lORhS1(~Hgh
z2nsNh!dZ$-gr>)Q4&f%I9|^nFDGiM&20eU|#&2Ij+pQhB7hCs<srDti-mua!h9T$t
zvz?)&*Gu{=Obhn~^%6Fjw^#eqVm43$>b}@gA8%P(o_BorEb8(~HVBV*_&i}ba4~?8
zd{Jx!B}(7{M>P5Us_@)nVGG>kTUQR#{T&fGVD>gn36aD3;o!8IsT^L%6BOXhOMe=Q
znb=*V@|_@t%1A!&4>z{cXZrly!B3D?$^EHj!>P;=uU;S6Ijl>J2QaTVAr^H>lrQq^
zyI8AgxrA2LLu%{;ea;{_xZeA1$R|viP@pl&efn9Hf@R95rxoC<bUO}5Wlt_bpiHvr
zY@4;f5v1C%HreESV6IXmaWW<**_P|*P^T-jsW1-dJJd*RyQQCe0G-|W^`=R6vLl5;
zKH{`;1}~dsBhEK>+5vGeIm3AV^s^m#Pjzhv#a?)Ac+709J9(>GlQKn5cntqz>{4JA
z|7Rm#=x+1P7-13(n%Nb)QOz9*Ne<;59juzvi1Cv8R@+v>kZd0B$I8QkBoiWO9Ej?8
zI>_ws9bArgm3eAG=&);a$lVCylZ^64>)^I9yA}*ajc}9#dQzu0<)J>$$IpEWq6mo#
zA`Jg*tN1NOkfqFKw%DcTX{p*n)TCu!So=2C9gneygI!$Z+-a-(5cCgeRv+J6UV3D&
z0*Sv8nad4e{+mMKXH?J-y3l<$a0JrHq&G9_$Gm7n5mb;dZbRDSl{d=y{TPW%$BOk~
zlk@P2(tqrL6xK@3I|O&0+oR=L=$>xB0Vb5e&aCHPV+o^_M`|^AZ{pXF1WHmJvUQEz
zdeeMeWn02n$WEBX|HIW=MrGA@QKNvjgmgE8ba$tqba!`mcS}fjx1@A8NOyO4NOudI
zjXv)==ljNBF#O>U_O-9QV$QkdlIQ&9WiUfynHxv`X$AVdI41X4^lQ#>2!}VzC&WJX
zooU@&LJ`HG6qwvyh;Rc;#rT12GuT~=1Qjsunhn(^Rdi<eQ+NSNgMsc!6nCNV*;V@#
zypyXziFZ&#IjM+GBjL#$!gQaXP3*=~#D)JX+d?$=hnqd_?_cASWol9w3y#}Ag+y*o
z#A)%LIj064;`K%Z9fqf4Y@IRF6guvrUJ+ekoqjej*!<g&7k$o#xt*=dHhoyeeP`r+
z7Wb5CqP~YjF&EYUB}(W#GVUK{9__<#3d+*Dnk&YXMr=!(G36nC>%UVs)<kt}D7aQ(
z^8D?|z`nmqyF^?1bip)InxwDW!c?TAM++37c5GC-09!XcpK`_>sBnFKG}*%teHrAt
z(34z}t8HW*bsOT&*$d&h#rMT$tkfBOY54F(T-Yqp(aFjl(fhPo&EhWcy^-K1YujAD
z1%Gw^>ignjq=!26=yfiy%sR;-8$LmIw6mDvT_VQKUvXvwY~(Uu8sXoeXRsjHul1mq
z&2LW-q&Z(oVNt)c;~9b;awjh@M+!x!KYSuya{if{Jo)E+CHJ>Bi5vyz!`4&@(E3ed
zX^U5QeCCvrJ1vIV!H$tA1mMdzI!b>|FtKd1*H9rpvS4ooi|u(Vl*lw@um*FJSV?Pg
ztnrRCdfajjLx;%At8b6>{dch=lV790|B#|@&<h`udT-ZsS3c5wE2nM|1Zo#7h@aW%
zVv^$&tkcjOar<3QUx!m{ad^l86sF^NmbWm&S-l2WB*^A75KZ(o0;%^0i7YwzkrU0T
zAt+e<s?*zquzU<+Glb!NWdbKou|jVoK}qQk_TXYmgJ{blNy)5VTn-jJ-4^2=zK=&s
z=D#TU2{<iIws(blPXbkLo<4cG$7<2ShsRJeK3)Cu6Y=0P$g%vu-|uSAT-v9iNlWJl
z(JRTw*I9PsIK<<I7#f2~7LibxE&PBw)!yCj0gV<b^65bgPoxgvX>b+Paym3dS(B>+
zA$6oul65O0frw|_&{|(7BTQ|=VIW`Oez}qe(}viM>hX_XI$UWz%b?P`{#l;GW#WSk
zPfZ-HZ&EyxgTdZ>oC2dZx5p}^<P1oIrcM`i7CB$ugx!8U$hM_54;e`ueevm;Z!+M0
zr{Ni81<tsiDr{GeyMG6i_OwiHi$b7vqlv$FMs5I8@NPP+ldJ=&=52engS?$+-I#~f
zyjvJ{nw(%N-$L5f;q+M*3%8w}@eF}xc$pZ9aaLNQhQEMxI^9k*U}Ab;*ZbmDg#9s3
zO{Mxr6RL<9e{1rJthTX6-OQqi=CD;`LHRuLP{u8mNxA%<?m=ZqmC8sI<7ue}buoS?
zwZi^@A%ztNWF?(P20CX*(6#?cRQhytmxhR#Nt6g(Kh!B6oOg--i-9ZZ5YdZp5iH)c
zKG_EHtF28iL4n@fAJafZ2Bjzg*2`kW5yke{-x0nZKg6?a&P2YEj!6#4r_Aj+x!y{s
z^F0|r4*mtECO9DadL}Q0coYzOs|%u9LYkcd953&8@mlxO{n<~g7I2Q6%%TRlt=(3)
ztZ`|{4aDYQ&*`>curB}1Ob~c(y7dE=22izeS=*D|aEJFCszl?GUNl^=o3@-hz!VXo
zpCqB|)d>fxdoQ2iC!kAYx+t!ep%enE`Kg{KX|NDILZ-W;O~Kh_Z&i|1>^8MIlw4`x
z!Gf@lQw;lFDaU{_m&kBz+4Yb$iI>M-+-Ps;*FvuD<)4x#w1EDI^rq$xhSBBj?Np}q
zMXmOh=h@NCvY$K%>Ye6ZKU2OjN_`eU#|cu&q^O9FaiNQ*y{Q8?7|>u4wF|P~>sB5s
zg_dbCDCdcAxKTpLI=5b+B%|7eL!+(N`>eF7j*zX()q6egP4-bzTZQBsENs4HO_#li
z^K6-##G>T)z3{;u?XTTOza}i5qMwiFSB>zFuv*aXeV50b;BSz|J+=BIg`PBD2}d73
zR8HF(2bVSP;w<yy`{OB&QIG?|X>b&1VkB!uX1&rspg6x5e~}|U*8=Y#OV)Vi0f%xC
zLmbDgtD&z19gk&NZ3Io2Pvg9-Mv)`Xs3-2<VaN;!U&R+hq?zBLoVyxr^;FrTmAZYj
z|FFWOzSJrH2fekOA5v(D-#}cHUJ_01dv(JW{_n5kI@R4`mD+vWlS<QjLk)xFX*Z-<
z0{3k(f0opUVHC-MX+|dk9tkaA`YrFYPH&jW7wj&5&^IiZPOEY#3a#Qasyx^B(5X>M
z$Z5>Z6k3|yXRebIUMu$G4_ChW@Mq)5<2k8?=`pjjt19lNmk)pN5IB2ibr{NJW?u-U
zdAbWoZLb2Sy7psypepGnGNJ~9Cu<=+21|Q8x-YEV7)!1Ld?|e#B^<Wi3^BGEx5;-e
zBT!XTzK@Uf%TBClst`yq^Dq_GYL2Dz>F`z1(@%?KSApP4wnUjL;0f+rjf1J@tU6tT
zEYhFxvx5`HZ-!)<f4g+u9X!^_Hr1v8vf4655Ow5?=6}veb*QSpbhLkKQ}I5R<?)h5
zw4$V30p1Yrc1Mf6jO@qSU=2MeyxzxCz6LTGZCiC4eZJRrj@dmKq%M%8qKyN0ZKzDr
z@Aa5&0n9wUT^Q`<AJpth&~CHuNb^j7^xqxpCc>t^D8Eu~r?k;=Nx}1Y-M-YK;QI;Q
zu6`M(PDK4S`sjxQfCn~RM1G1Sz-Zd6j`iDLW=H7zIa2<Z6BU?Hvu&B*l706zTHw2{
z(xhNGPk3n{!KQT4;8<1AW<LUt$GP;?CV9pQYZd`<fTY6Ev-F(fhYBqF*o&Jr8~bni
ztG;WB)_5PGB_}cq6!@9i%9=q?kXV_BIG4X?nl7VEpSZ2sJp*Bzw&#ZPU0hROk|bPs
ze_7PG#jCo?*KHdUG~s-_g3?=*_sWelZ))dXtQO|$RMUWq{j>43a45na8GVa&W*ylX
zM_*jHF%oNjhYU&WOjvIe;b}&Y!%W6$!Q_UCJQqSsqtJj;;(5!Rv+94o?{!}_-FnV}
zw`?SoGxX()139(O+m}G-cLr0GP-H+^j8V;;8Pi7k>FL+_#q;Ddy*MQrS#B2^v-6gg
z!v%rvpoVJ9edC$KJdb~u3Fvav1&ujX=%|}}!txhs5+|vx_e+}2Q0`b$(A`K}*}Ze}
zd>=JGVpP<vYLb2$gYZ};!_XQxVg(&*22~VLf9^N#`q_)3Gt*AiJ3mGm{vw}6>r96G
zgyd~wDBG}*z(U&bxsj#KR&BxZO(Rm#LKDzQ7bnmP!{IR|z}fvowPs5>9th_BShUtj
zU<(_#*`JD$pfZ`t{=6DCpQX~gQM~5ULqHa+j?!xX!s?|X`l@Xtjp)c`wqJkA!dE~q
z1*4JLT^6j0vCXE%M8gBGLA(kTmX^{Tq(sFBiGWsNHq@O+bjSH%jYVcvh46<y-2uL;
zr(afeUb8w^)i|)9c-@99JQ22QJAGSiI1f)f9k)$8<%GhQr3}ntFx!fKg-ss88B!Zi
zizy)7?CaU2@#}pth(;~clapQh%I)+@Jl`34-U^ft%`gF(C-wf2{Ww7%gEOCks8;Pt
z_3kh3)?w8pjCr1p_IefnGAhUrkfknF<UgiUHTs7bT?-p41nz#qa;mass^|^Yj5VPi
zX0oOh>{o`Hg(QV^Z1t*^vUY819gyZ_AR}w-|7_>pT+N?IBbM=~^dU$TNejI~N2M=O
zBtg&iLhT<?t#zVY>o&$NNDny~GM2`nmWjfrZlc07+wV5t|9G#=*<~`2mHK3q`DoW4
zS}``1@^@O<?o)@+=oTneA4NL*P=ddd4B=C7=LQiBOAweb;?-^qBbXW+B*a=9%F4Vt
zPuU@T?mI^=JWIqGGiUtp8sckEFh~6rA)H|8z2i9CQ*dHNCYcX_=5U4mI!|i5%~|x3
zi7PPq!khz^5*RfFvxM#0h0r5%4DqM3A?bYy^gvxRjCES1J!Y&-pkO|>_k<2apqRAl
z{sp7vcc&@xVDEu-f6{KpllE5*x0xx(Z|}4`_J-vLv<rq1X6leaGu@;KO4aJmOobj-
z_>>cKh)SZ@-kEX#qK=&=ph0`qROGC^!&GgSv<ccT{?v_7FcxFNuz=K&$wdF7O-FFe
zdEf|2r_rj-a;j8y--~9dFqd~-Ft%NxuFu}e<@4zpgB(|y6Te{_%q!cDXOLfnRh-Fi
z#HQQS1;?LuiT>L{tu!fz3OVhSrSttGFUWj}T~w{7O+1UDa(kK7AqF&}#5ghHbOZR|
zFgt7I7XD+VH>nqZpb=hFBlS+-=2iQyoD$xsGhv;5`nBZJB-x)fq8!ohw$CKQ#^si7
zYa`26&zc{Wc@%5pdS>czWi;i~w>_I=0k1evNWg!)5H^J_aG}o{hk1d(_7Q{M$39eV
zOH8J=_bh!IzVM{nHyycM6K5>~g8(g_aex`sC`;s?XU$>ma}$d>4xnEXOd5o`Xl7?h
zL4E>^zu&kTRts}HaQ0oeHRLz-<<@7gZ;ETA5{V!)PE^FycsX+L>AMg`OD=65T87J@
zehzP5uL@5)O6-%k_AQO3yzMTfq?c>NTf&yGyyBt<l{?CgkVh`9>PxMgKHP@+MI<t^
zc(8b??ZXl<{uQ5ZJcmb)=5?1X+V+lWcBXF4l`FD6e~^dLpqB1pGqW%T`<q)uRjUKW
zDrWOpEB}Z@g`VvwXd#gBL^)_G)AjG(A8D(PVq3~eU=I`1^3ldbbtMU>ammbZ9R*(G
z?%3uk-mWOA&hE{pyQd>nm-o0|1nU}X?7b{jkA;|sLdc;qv}S?Aobk>s4yqHK_@9)H
zipF40;?B<?Oxcy3-bF-!!s?07z4iD1wQPibji|oW+dMo@zu;90JQYcpBd-Sx?r2IC
zM-~#)r!Kx#6yFyAG#3#inqCU&n114K;=>MiFd4^|8{Wdp;`uJpB5fTq^j>&a^-uVM
zze_w-Z#YKJ)2K+KFT}{#n5K0NO74rIeYN;!$}tn=-iyBsLh5(G`auemIol_hy>wP1
zYo<Qd6CU$kG@Hy1*wt4&D~WxZPAPK#Op}|chAjjUL9Uy?L!5u~82b8M92OHluf*OR
z565wuL&PpOr+EOptrDDz^10{>1SxBxd=X9Zk`Jc?bzOx_Y1z~BN@|TTK6Y`dpVkP~
z{tB(41bUCp@5H^Hhc<hn)5N6otL84Vg{y+{>w~C46Q_J`voYcJF}LR6*0WfT_hkU0
z?C;TjdSo_9I`5rl@QkxRS?-SUwD7=X+YTuh<t>HyYF>%X?0i)Py^~oXQ7YgD)Xs4p
z?^SUD7LyIdv^1fJ?s>Or2Qe9qi6vdfv>Mn4ZpU#;@2plXd9}f4jovZ!o=+7yKCP-}
zVH&mnJ9yZtOQi}tT(FU2N9>|%YU$kbrJ{7>xBN3sRi=}i^G4H1MwkNP%iS%LFEgRp
zraMeB?#QOcZ86$yMivSgzG<w2#SJMZ!_sebFu(#`)pTh0rw9P{yVoyMPCm2KF#`H9
ziXcq!1E%BOaJhcxUdCNfCovq)*o9FKvZf!8%<gmf9W4_=X_>J!qZj%VxL{ZSp@%N-
zyLXG^2F$DcEM6815Ga6DLtqF)`FI7X);}2vrF~rT-fp%=r;hRQ7|{)2>JbB}B#a<S
z<@ug+Ti`~3Q9l6)M&ynuotkr!Opp0(IknN#IbNer)DxsSIi8gCveX!j-1~#6pJ?KO
z8lANjydy;1`vrb#hS|~{hAQ5F2`%9gS<~kDSsnXRyvR*g5{uYmollvdKi0g0jd_P)
z#*5@|+wdl}^vy+W0tC8eFo*bETKyTo^39OgFh2P`!lb*bxnV8t1qI*4=|{REF+dJN
zT7>YBKIssUjY2(@l<MJa-q$IXgw>|oF`fG4!|_*U$}dVQas{S0B}l8394t52d|B%!
zU<pfYCp}n&SaTU{^M2#ZD{D3y3n9{!;jWwx&Kf@MOQSM$8FQ06Zgu~-DIsFQt{%!R
z@)S1`LEEJKM<GG8^ogT+RIUV>RM=eNrC`8Is{XJv+f6_ic+zrxQTEEi&wN<!+CI&s
zO|$b>aZRU^mDQOU!rrN?WY6~IojG)Ds?ALs#524vuNAa*DGlrgptH(=<7{<UCZcT^
zmc6wBSB_)dsW%}Bc^h(RH0TE*P(H$E@!<zT8xsLafCw<ipREfveJ6Y+WzgGV*_$AL
zqxbUH+D-8`o${<$)(RHMrmb-*LN{EBm>o9U3HeM1qJ*%}0*u|G<24&xCi<Av;-Mdg
zTs-9oXx1VoS-fsl+=sniG&lm~^26ApWwu!zTa{WR6cdL=z9=96^_F2F1*b5k`!=`j
ze-^r9BG=Fz%j(KoQFUjajbs;8`cnoWk&+wn7xasUPJ88bRMm(@ROJ;;u*mMP=$FLL
zosM<6$v<O4kl(TkFbU3qG~a{iJ1ZguFvX!~)^9VWeJHMv2ZdG4E%659DXS3)38%Yd
z^Sca3@DBbr#xEXoDC=|<RdUuIymK5<LYa<dCJGJf9QYoZAZp>E(-czqD%epNO7%ln
zCqjBl0T~GHug-kgB`Q(1Mr=;~II6$KR<@4IAiE40QA`QJ*IeL1^WXLVj<#UY;Hp(`
z&c^EgJj9~TIw-cCG!VJ?5sLeyDm0oKnj%}&-#OJLe|Zq5ieF}#H)Q!GP8ER30q!;R
zA89w~>^Nn5Om8q?latK(3INu4R-ABSGy$yJUt=Cl8nb7b{_N1}%0$`kdsz*v3|!V_
zY%g!VXWTtpnqnmw`T;(0=WR$p{JVO*m}oiXS6{VdjFDR=HVDVw#!z^CF$`!N0Kzl$
z#4t_tjaA=;<HsaHf`S12pr)UwOYYLE)}p)1NSmTU=uHw>_)pl(Ja#Xp9>(UTZon%}
zZdA8f;N4g%5<}j-l%ngU6OC_RPS;hVE*mRaLIev)*&OtvTmS{3h30!2V`n~9s)Dl*
z8p?CAhW&r3ATMag<mN+c*zuRxn{n|<=nuDqb9sJ%?%~AX`20A)-(b0#2@O)hS>m9O
zK0%$}44sPF%Y7{IO)(&Fi0Z3v%n(mGEg`<cLt%U#?d;MJnb;oJH{_V7OiNwgE~+iE
zc&eItELb>`Cufh5$bIxGSJnEvM+%RYIHu6NdKoaI47+`}r2oC~%HUBS&+%Xlm}}`r
zC9l5%PF(g|T|y3#_E-n||MLQ9(^N|eG1w_Mccl&HdZ9<VLsoY+bLG0s+F{m;7f5V1
zay3_f2}s%n1d@Q4r@z4LHx}eB$~e%L=mzfq7oT8e>d0L3d*qnlH#3emP|X6_Tfo)x
zD*pkS&&<tFCme5QUai#{K_{*G;g8FB!eIN?T@;FsV!gh&g}=8gUkY5>S-8i@N2RaA
zPDev@oXf{Dwcx)>O~x`&K7vW4bm|Ky@R*$fg=cN1*cI&HULiVg4W&Gq>ZN{r<d{9~
zJuMNOUO90r(HHDLA@e<n{tBl41zTB!9nb!ma=pI3)N|itw^R2)l*%~u_OVy}>CO40
zRkbdyx;2shNj0CDA2@U`P<H9>D5GxU<p;P{OO0ESnkb=NWxd6UVH<p+3{UR|lp(a$
zJ0o`M3D2DpHZf;lm7M(<#3F)Q!<LhmxtuIQqe-SE{2;Q1FajDaMN}Fem_)pEFS`9j
zc>gXgIpWaE0D}?P6q_kQ&qsLv-AK;aGK*-Nc(Wm%9c-_D%ar|iL_{UKxMj%~|3-ri
z{x56RrVUW;uSbPEWYj_)V}I~3cqi8qevjweb5=EL8dk4ICrzAf<GH=zXA~xxf)B5R
zfKMUO{3@#kp0G6AUoscT1;Udklw1CTB$6K@p^HF(rP7=!&)Z-LF^q2XP>bPUEa7d&
zAXOO4(dbH*z@5qg1D}d##!=yS_NQXT+)JCU^=m}>d)j*G>P#&!T_bY1;L9HduQ7Ft
zuVpXpn0tL(%Uvt8ic)mcNK^x|tnXJ<Zq-8K%X$hxeG1L}l3esg7-Rn5dNx@AVdWrE
z#}gfA`cbjb(Y<!CheH~=Zlz^W!|dmR#Xr89#DxjTYj|HemE!ebYIspr8k+L-$Az_)
zk11CTf+SLAzp}DU0~(akqb_B@F&QSDTiMgfL$u-GYS)1s`^`5s=Qo27zY1G(zD2qf
zuL9lP%!BG_V`7S3<xYy9$?LG}N~?AnUc@iG<elAJQJLZRiqlkT?oU_d?he23UhGmz
zscCA_1!L5XuhQs}Jb#h$GJM>Cy_TBSF8GT?1iVSsH@~ZWPKTxmOgA7l{8??R_66^a
z^wlRh?}DXw92g#+#+F%TaQI~q-aF0ak>4A{LyKSd{eG$CPKcIuCbYYLK>!2LeLA7L
z-mP=xD?;Qa8T!8$F)B<wt!T@Nn#UtDzS_i!IYO|KEMLXx)`18spl1+lkfrj64gXx7
zbD%z$`W9D!%3bX5RKY_;I&vbtOCvhP8C|FS)1pmsJBn((w_bb_4NZ+WrekVz(D;k1
zzswIBDw9~~G^H2;EI-H0@7W&`5+5#?UX_PC)P_NQbdDrQ{16A8Ha$@gr1>AfOYsFo
zAk02@IGPzLkDj|6@aKvW(t-YX|60>&ZVG-ZwM|IGQ5`CM9+e%bebRnFS@f9gTmaY|
ze}`iVi&m@mO!kIP*AH{JyZwftq#`6ih8ZeUTK)=E)_9(c-SYN7-qho7MENvyKs?>!
zN|fuZiMq_<U*d$&&lINClB`Cr(;gL_c@o8d4@hi|q#IK6K{cytB7jh}v}Nbmwi-I!
zZ7)SbHi=&CCjSesv9;e$)-kG1(h@rJlB|bTLL~#w7$?Adpr^=O4gSY`uxRa}Hl8w>
z9ni+vTPCWnEUeiBBP%e~JTN`dzIR&3BU5z&bP5Wfr(q~qBc0}3zmvbfY!H&@{xe*d
z%V5GA<^q4fKKvFXrXJRcDr_WW_}^wD59VR(IdGvHy_C&3<KR^?y(B+?!^!`w=Brg!
zkuSEa7C+9f3Ez-4<6EG|ARw-Cot-oF*04ZzF0Ce<PFywQHRv5AnV6$iP-lq?i&Y~(
z`UGG-fagetKqe)>O%^?Sk9I8S+kyFlQbWPenWesF-T)bbBn^z#BvvZ@hBY4OBH&5;
z?MY-2{gQONn{cZw1N`?aMhH2JGLH|kj#+~xPGJhLBCDcP`#Ibxf%}Jv`K8r(RyM<-
zv|7=4;{;h$a@a@>$POpx@(frO%WY8+Ec`!@b?bih0baT!QIsY)iFl^i)x{KqQ7g<c
zw<^O*!-MxRQ+@k!dKBDXD~V_Haa|sm1GfXmsyfY9VLVOZk_RyO3+&1QAKpS2G0@m@
znphi6%^K=V1%jP%KZ#?biVP!GMJ(kkSU%V_ShyTPAM9KVbGjn)zhC!Uh5&c7UNX3h
z)X;9=@U%$}KZ}4pKfVLOxp@P$>v19?u27a0jy=KUuHJ{9YFdjOIXa_l;ZVD`yo1CG
zt{x&h9?Dr026h&4+`RU4P2<h{%7%G4DdSDD$mNysZ+f9syTVKj3t~Kx*Cc!emb6f%
zye%o*iuK!O0o4&Ua7eCB(nbA|2qHo^n;sKIk}{`Aqlwn1w~UuS*c~mPyz&gf!(8;%
zLfHeHED@uK_+cS;o<p-ozRYXvrd6Yv;S*npWWHtE%cotVo`xWK6EaWfb{%cGKv~x{
z01S=Bc)+@5&O?v?;?u)5eU?n{u$;$eujb14Hev;2SeD3+fT~MW9*tjp4zKv08R(5-
zLYWR;oR<)pgI2@vZeUj^YZvZ2Yas?8-1^xwvWP@RZK2aH*eCr~qt-3t6b`#mO;k0M
z2jh*WU6*@pH>h<+#I5|-j<L4gvMK{ob|(7qf3f7>)p|4jnXo4VrmBta0*6vY+S`w{
zP*Jd0Oea)|_&nMXOD~Cgm$9{!cf;H@hW+2J4iR4MJAcYv=c0Y&LFa(B5o{F2f+1<4
zhB8q)Yi%lFZQQ2uvIkX^J^gIg|LH~`Z<hptoke!@LA~=FBcn;&jK6Pt;yLYYDD1Ey
zw?lKFn{oGnt!npQ3k-Bm=YC%8=;Sn_qY4i~QkcGQpQ^esDVWGP>?K~o=!!xN5=Woh
ztv)_q3f{>d9N3Va=q+A~lRO+jN<^9szQNPd$qCyZC}v-e*#BbFowb3NPKKQOf9B+-
z78#7>%~If!`E;Dk{)}P{y#dtkD7JJ``83*R8ex*U#ix9~?pb0pykD=!foedBgN!^j
z9(>)rC*ThH%RB-hpG<&yGC8J7AQ1_01|!Afl4?%j{bpxo|9N3V{f(-1R7`jq0YLyU
zi0<~nhN_PnzIYia)%xN<@WNASr^|A3_1AZOCC+xNic+F}SD47;oc@iHrq)QArQs@n
z@uNwcA%(`!YAQnhkY<DH21U;$(05Lyb5JOG-YaS~wn4wM)w7;wOIa<*mF!_McJrZ#
z*O%(DXM3(Z9;NTD9-Qen<b^!ov+}9Ejy9?$<0)(3*6Z%^iPw3|i5PW;GJ1H98g|(<
zvc@Ai>4wBFiFJgj&p0yD$zu|w{Vv`h(F>r;W=h5cz(oY6&rm36;+a@l;Ku7(J(F&O
zGaYh2#Z?~fBJjkmcbR9J8>yApLw?9YcD)@3ux2WeJH%uTW#cE>WgPp|K0V5s_WK~O
zmJN^IaN(o*6U~u``z9}x?T<=uo%)3~DWtds{A5`<_@6!O1H;nOf+dnPv#$M&CNO_5
z7pm7uuwlbeXTJLy`B}r^;{CZ@rK0WzrN0=Y-#K4%f)U=oZt}n1fA~H2!3mTSZZGb}
zBvC{uU_g`@mGWOI>8NN~hRYwz-lOI91cjB9f1o1G3HnshMOWUkRSb^ysq%11Pf51p
zE4ZAjpAxd1Sx!<we9FU%$pHSTq;+h{APKX}o=@if&dT)oLq?Me(|+5T8}%pM`b{l4
zwSqVS5}5dt@JG1MJ}qP9F>hj|4QLvz=4;Iv3O`?C30u}OP4$zu4i6B$QO?Cu4ZDJJ
z!IVX>Kth<zK2@R)gICroX`k`XeW~Q&L<kF~4wv7AuohTeVUpFKvhpkvt!oy`Os`P*
zHrUpSILtxy5IipIvd0gpoDKC7i4h9|e~8!CPSf_HXGPPJW9;N5Xh_BQaCk+Ra5WUV
ze&m4`*J{1otO!kE+UBxh0(kYD4@YU++t7`REF(e1gdI=i9|RJNEL?9b$g`evnRf-D
z@3IIG)xxyl{S(#`n|En>%Y%oEBZj88ksSYE&hw4y$z+lZQee(F=)lZf;9m>-<1Zz#
z9mM^Pro@_ux<|Gn2;k-WMJjxgKt}Ooyh@_BK?y50W|va!e64$s9VLa>4;N!iOC(_5
zxGUVE;ZD+G^uC>cuItzLv#=0%&VS{q6(~Nj=yH;cE;1PONeVD*%v14w#)WZx^<yKu
zOa=Xj_FXhd=t`l`q(B&S2W^e*g>yuHKcc`8f0q8)h5MfEaA~~+Rh7pGsX0>MraK2G
z64#`JID>I<_AwwK5w2Dz$l&VGm>0e)ac==-xxD|m>tq4VBBzj2gN;LY-wZFwp*fRH
z-74O!y0CiN;kw!#nrI}#)a9Nr$uT3DI)%Z}>$L!)^`cZv;<!?3df1GkcvLU0<fie%
z(R>1&<yKojLzCCktj6?!bsU4r9M14n>fd@T0<4{r8<Sgx?}@0k5o3IwA0MCOfD5)K
z4RiaR88~GTO;Lv|&r^OzmHON^uSx@E2%}aAw3&}!n`3xw+F&^amT8;R4{<OKJ0E!N
zL^dAQD^%lJh_Zk6sQT~(h=x#J@(s27Rj(?k1-z=LQz33}Z%XA$<t7Dpv?CvHj`ZD4
z!&6*w{NeCy(ENVZy!w@{>Ei{|bv4}onE_NGjB(dG>pL$2NfBs-uLNyu2GZ>o`3mHI
zd))M18{cA%ZjZc#%v9n&EB>@25T%quCI~og3ANM@yxP3ZLx--Bee)H_i7%m6q_z0u
z);yy6m?uPpSJs(74LUxA<=wpSW=gRk!dQB<_L3=kzgN9nO=X$AA{$Q_w|g!k7JCV9
z$jBNbAj!Z><q4Cj>U2*j7L4*8Ad_j9*eNI9A0%jN<?1ro_r?xlyc@+K6xWah(C-b5
zCUN?07^2cn;Z2DJ9-gm@niE%J6ba>xFNUq_licp!PJ-=CO8D-j5)e7a8(x?=><ix?
zXfxA(N&CECVm3nPIy=bk&ri5WDap*Ss2|jB3nxg#r%x&FS86=6IEZ4az)qvoF@jIj
zK#J^lxE}^oqo@W=L*+#AD(k3Edbp$g&Z*T_+y0}>&>j;zF#C8kE}t&HW#O}?{HgW~
zy-smD96)Qc6xTWkN&UF_1fz3Fj<@Fqw|j}g`p}ly7yWiSe#C&)eaW>`;wUgB1e{H9
zl>H1QV3Ms|kKEwI2vebxy_9F^DBn6ZENI<0@kPS!HBmf{y9@UG-^<&4BB^e-mNV@U
z?xnwp%88WD^L|dao=^sR7OA>kDyh~UG;%~Gad*9cuRp~ZJg_QI=QK53-Nn&);8xw;
zPX-%V(Ak&VUl3lE-W;S`fP`y2=>C|SG6i=bBH#?YL^pAK`O%Z&*SGp1={89vqDW$H
zM5jAR_pfY^Ng2<HXuA0ri+Br`-<CBAOOW|>@QsWqJQ&%I+j|z2^H~$E*BtR;zPYvT
zM_l>;{RqN+rw9YscEYz%bgYn}e`Sy@MT86|WmOJuMZt_~-MK>F-&`AGijtMPY?e6T
z<-J60VDmvh+_GQ)@fA$_gRqYO3Y}Lf!aPMupPWFu6y<m)mcu*5_ND20cc|HjpM^Iq
zNx}r;Z!!>=00FBCSBaRJ70iDAT|y$Tf_(m>fak)zA5bRTaa<F+uKPwM&yt=4*8fFU
zvk9Xq6~)Vmq=!k(mwBcc5-XK~aCHcOIFWHGlu|RxgEKu7EIqH|wOZ`V+s&tU)pcPY
zXYmYg1rELkUv}sBUfL>2eQu0J^b5Ny!ZmOkXqobx&Rw<F(fJ7lGx(&Muo<^E&=uma
zTh;x^B>T_jc_cRtvmUG?Wk+~UHH{sm($2g=3`>||rXOr*$TSr?`vFMCh6S}<$=-sg
zOds36I1I2Br}vvp;m&0x{hqW9@^qk1XIaF60SN=Gi*9;&{+R8cr6>1T#$%|bg=EFX
z<HoxNiFdC&QO?9K`&c>!fyun8ry?m5wzUg{Ja;-GuA~Gbzv@Uu*A_mv_e(rB?4r6z
zaDVq_d&pBl>cm^xT3){P(0@#`2hD0C8ik6q^zX(ee80Q4`LJ%$mSL^7oi)bscFtc=
z!And-wvtc$Ow#T8<_-EI+qhEi?P#hbI;if1kITVKj7fkzX*IIX3iAaPyk@-EE|^8V
zZ_qB<cNcoWl5O<lHvKHHZGXW<o&Et_<E&!1inJFZmw2nMR;j3U>ieXS7Iz<3aWz%B
zU{pUw?1v~?T4gKurAWJgTqtn|Xj*MryP*>WjDgeQf+PDuYiQHd<iu|ikZFblo08jT
z3-6KC*jZKBj*Lw9hCZk7SAYbda*!Q@Gqug2t3H5OOovw7F5`dak9-!|vx2tZ;Rm5=
z*|XLf@IWLJ2Ug=*EJ>l`u6`7$`8;fLsJU1;Z}68)z4!tqA{E)`zyRJAH|RHlJ3hAx
zkH4Gs3{M~3=X!c1qGisOCh2>E;S`P)E5qTGo>HS|-BqJz)}VOLGe2B{6nK2krVMdc
zoIzGDZ<2qq6dRd=B{MdR^gGPc&tUqc_;@@Vi-b~_5zlPYes9wU=C2Bd(9axof~Ab^
z&*J%+kU)*?NHSxga*G@r(C`RAU=Y2XaZHXwvfbOwK3vrdW5!6Qg9%6gJ0A6XUtu|>
zi8mLN+r+4H;YxX5`bC^jL$*JG(kfY?Ra-f5sH6>dpYEbIe<|GVZUJ7Nzmv{JU?Y}w
z*XZF>^nu{<9sJO9Unvcev;ESJMMChl-Wb&A-RO{B+?kXojHX`Ve%4;?c7%0W?S;(z
zwrL?Ph-IyzT_Thyd{Vj<9X@wb1-;qmCl$9r4BC%%uc%eJ!bQ|PnXd-Z%Vp97><z_2
zQBGonED)_{5c`mNJts)snl6n3r*Msl8tuyCuCJF)TB6g^Ja_$Yi5LUxH$9q*r#fTA
zXp=j?lfen##b1vv?WF3tET@V+gUp5+5LzDYdbYiI`;(h8RY`(dSyyAfA9=yY?$T4T
zr9_<}-Hlc3Hi#;`LQlNSl?lwCR7NKj-H9?on?5d`+)5!$@vJo1=y=R@?RL;ebqL-C
z<#Rfm`yqD_yvK)8^?xfo_`lOsIpXELhc;cv3;meJtcp|?>=|`u3^mZXA1QGFo>$Z_
z21dZmlxa_;2()Iu?_S}DSn}WTSbbtwUY$51ZIUK->(3vQ;B+vu%f3qI(eZ}(>6Hs5
z=3#)3mB6RaTbO7zB*0U!Ce635+Go28H?O@7xH;M<t$&O=t0YqO86j+%;a7l3iguO=
zk>Z=UDj3DQ0q6KGC3q90T|Al1+V|`ot5xndQNW(~q}7J-S2*$=6Ji3Z3Arm?gedtT
z14SK*%Z6wAD%;E<&c{tARf0#vmyE(g{}gqJ;$`>o7QK!r>Cr-e7ALis?ok8WgX8&m
znKI0S{aF$3Ousdvx_e}ZtbkC<Ofj)ZE@cW57||u3CliQsK>G9ANw+v%GcOjR9Ypjm
z3g19)panPgVGw5jM0|Aw)=A78nV3Lb`zKcMRlJoR9pa!7w$?Hh??_f}=#9WOdxT%z
zZ^U%5#6~wFe=D3-yeF`4tE6N(G2zgHcUQ{v7hK#k^A~X!-QR~SX#5qCGzw;!#3OL?
zRAr8sAm(w55ZX-y84cxB&u9a$4RtZ|HPM#{__90lU1e)qHGHpkX)Ib1S{p@`g*kRt
zcx=lv%l)x2L<o3BS3E2VnRCMl7z22khyw(lGHzJVjp1Wza+Iz_OhQIxr}c6-VoE#h
zGYHs&h9cpV_hp=0S~-}mb~I)VDN>uwm@vKC8E94Nv5z($@q@{Tksnvysb1VwQM$mh
zAue|#L{=0;Lmoj|%L$H}a|6|gRe+^0$e#=-z|9SjMl{1rHfp!~cN>sKHmpDn?CzD0
zSZS2g(MQidUJ`zu-<wUgu)V)&TO13R5}Dgi76`h@<VOTcz85?<$&R>`!NcBLO%`e{
zv(icl{baMVH)I#UWL}xhZvCA{u3+&;tmjI*!6jV4Zxw@)SCf$rmnp6L&L^<TR)cT0
zyXf&^y?3L~28*SVO_1yCcLdb2G&WqNgE_{*p^rC-+wcE|d7`&4FNn1LjKHA(#a-lH
zX}d4N!+WMkE+WFMmCWxDsdA`IE`zhW+zHzCRjh6-nsbVSv=Z~`_LWIm1ZVs;Y8nw#
zh|dRsG*o0DqP+QWeY{G|$>?6aURdHZs?yHTVI&FI7ems-9RRgp(;pf8Q(=i|Auh>3
zDlA`DDn!IU7{-m}xH5(nclrj8_uFDds8kVzX(ojgN@vQSZn9rAS^5q~W=IdjmvcWP
zKU~IzD?ldl+$dKuU9Xh36<X6BqpaA_#AQ$-=0tGIxnwM*3vs(4T^Eh=RD5HT(tx%y
z&iRNrWF_c|10pnuZ{FP;bl??E;Wc8+JYzJr5-d+YDLZz_VFSNdV<oXc>e(>G3&1o;
z9~T&4f}J-fYLmVRYbavxICmHqqaXfY14&njeUIPQVy<6qh%-%u*W6n^Xb2rqKAb|o
zj(z3&*V%be5)9%r%rbSyy{~9fVw`q=!`)#qnVkY2*~o&LNUnbK;f{F<aqA(&-=2^D
z7N0h;T!uEf&D4e^bCB~rotg0r-*s&a31EEc`kU)Q(?(sDie1L1R|Hl5TeKm^(zxO}
zBgMCA(tOk<joY{ZEG>?2e2`mDdIXn{+ipA+mZf~3=LF2GTwEjZse4o#^lPn<UT_^`
z>-WaGuY7)JZ??C~)AOpmg!0YCl<RzY#S72rIf&)flh_k!OaE}e%K+7>bl%T<#c_dO
z{c+wR5_2#fC?<sxa!Zms+AVMD&W><o)Dd^ysw-A80V{!$+gVr`)<qeo<vwKNv#o~T
zbxO50^Sk!Ed}dR5NwVcD%=w|sBV^zoA(WfgJt0R$M^L^WLOG7e@An82Wv|UiT$L<9
z+l@p1(BaDci}0a=-!TCOKPde|aeKmI$EI;T4u{{{VAfWpI42`Zz?Q`;HFW+w+flv@
zky@WP`9-9t@(<Bad^ZaG_>sm04O6^*@q%64HO{-e_PJb^O{hD+@yBh#<gR;&y>pC$
zH|z2>4yV9WEQ%3h0CO9axsZ&?5;Kgo*rMjf0jfCbro*AsX|D7l>k&GqW`^>^b~e|w
zY7$707+^M7tc~ktYTD#1IPv?xShEhR)VHv-ER0wV|K6QtG%kzdfr#be=Qa$Zi=Vtw
z`kn++Ae3AU>Uu?tICz<VD`;9YrcNpb*tdDfGar&TuvJ8y3)th?0>4&|N^(Q{->)I{
zAU@WQtV!d1;S($TPFoj~UPwCdkx0@UR!{jZj`ngW94wI8STR``f+QQ&*LZ|U)8w}u
z`c@G$If(7!c6Fi7Bh?Q4w`k*B#*F#5lhjp$KpRta+TF5QfnwYZ@}8Fd`X{eS`j@0I
z5S$^%vS~FNC*~@2_?;AWHGc)}ydb4wA!zti&9+Esel-!qQRo+vli;CKpNiVzN+r)P
zf3Hs88YqFu2(gfj>(Bl}rgM4PCvt(XYk`qHq#HU;Dn`H;<huSIno2zMr@=T*-T)Lg
zyST8`l80Zx9#p>s>9ovpztC5FCuQ=l^;=)TeNJ8|axi$ivu(-T=lqF@{7gx}MpI^y
zdR$o^82N4>Fr}H#8}SOKA5vDWRZ`E&q)FUf5(NdW-nIlr`rHD^I_Qn&0)tUa^zE)D
z-tX9s@EtYms^v5Q1ZX5kG~f&A2j(@4e0>+jS=Yyl%;oDZJVcImyWjDG*qc9ni7yOe
z0mu71huXjD_~4q>#JDdSMIZl?`&<+vyVt`-f^-5r%-5$650Mq0t9P0;fztgwvpL8$
zt}18h@dAG|L{%RDAV@`BIdzvY;%}Gd=NqW~h2ugHFPkz47Q`Aw<=yCwC6bcib-YmJ
z43EPid)pCT#z5L=mpyV$CY8F9nvvm!T5CE-7!e7)4c#0M;tkHKY$urHmt3>e1KS-&
zNub&j-;{)-=KzX9(0NpaJTBr<j7%N(yITx;(Cx>hal3#K-r*$bZT|lRy%z%6=e{ev
zX61hx>i0ir7|F~{8crtr+3&jIITt_n$0SDNbUu*x^r4&D+gL?~JpV0|N|8IxCYm49
z#l^+7du*mF6!8uy5rPd?!m*h@kV_^r6f`zA5?{_Bn>KU9(_>TR!2fsZU{S0!$5Xwl
z9eU@VTXhzUYGi~}v0O+v^Z<He7)7=(rjRCjH8BGWN}CvTzo#6bYm`eOO9_y=30ryv
zejUl3m$(ZV*dJH{n^1$klQ|L{&C5+7_x)sgG7BQxdWr2%?R^&U1ryDdhv1mA<OVSo
zmI(F(D;`s#qbCqVQrZf)kow{UD5kA}Ma=6ZjrJ5i+(Y~!!^RV(ZeXd*<E~8ie|Ol>
zi2C-Ys@YC%ISv@7GTT|<5=-+#r8|!zWsjz$ZmO3niTv>V)^lx-E%_$G{m0FSL&WV5
z?PDch{&%jd0v$!1@1&Bu($mxLh<w$8t_jXz#KgqnCQr)T=TXU}J$L<}2vjuz1ybL1
zq0D+tA@g5%O=^^E+rH2ej4T7T>C3Gu<BJEijnZyOFgBCNjjRpQO3oTIHj{D$hbwT_
z!bs>&%&9t#!o#bd4!b^N2`sHmON-WVYpxbIimi3cIUSVre4H|oDe<&PEcU*6q$Hv`
zGi_)#?dAN+G+~+QgdR8ki|_KO$Kubj`=5o9$oO}5hMryy!s7%upeJs>a!^#mqd-T3
zowaPZR<Zo8-+XVNRliE0EipO~1zdpfIsT5`cSI|k=0p)m^(sJ#S;wq5Di9Jf&S?Ko
zV>~Jk`I$0#(vJZy)K}>=BAcwYE0I*UAVPO>&~}4S>fcS3q~|18NduiD+VohCh2wsb
zKw(`psu^qrNzq+p-F^QY%S5Muu`L?IttI`?Izu|X33nev$)*<8s_<v{ssG~{H9RV+
z5zLeV(Tp1cA0??=B9&jXHU=Upx#ETR2#+_}3VhC(o4eXyAn|W&^cLm*{^$|Qir7%{
z^RE-9r|`I4?%zi=i{=a2=B}W+WB<b)B|t@-s1haE%MLlVWcBk&HDQE)!2WmIuXo5#
zd;|2+X-DWG%Is2^Y~8ZJTGkfLO9VLZU#0^(+|J)w{C$lfrh>4ihPCFzVknXJTDi<3
zd6W{H5Q02miJ%{uw_J*A2kD0kq2urDa-oKLCyF?Yvxs^67lPr4H%U6!8@j~etYQRo
z-@;>GNd7-Yrc|QigZdAlU<4`Ojoxw!AuNSz)5%YT(ye)UB-D(I7s5Tjf-`Dr>LV&V
zw*v1Lz%{=PT6J@+Xbz;dC=dEnk8MIJj%Ue&#F4Xo$c?g;ezDt$1rs1n_=C+PQ;S4o
zwu8u$k}(Y|fXZQXipW-tgw1V?Mn(qLJ%r@DhV_C5p%qXA6AUD+)GClXZMMt1^nJQ$
z%`h5KnfH1M<?SS@(X__q($;-s7cG3WUJ|Br<jgzugLvp-mKkluzfM>nljG9o8-&uT
z_MW4*Z~SrE2N#{F{-d_HBGIb%D|wXhvNlz)S#skib4l|5eOCW{Tx<}P<O(VuQ9%rR
zzlIB>Gai2d>!6=eAADquY|-vmK(BuL+R+dIM@@o+3L2?J*4Nih|00Pj>yPcT8UDz$
zhiNZK%S?_+2~o-F;1x$EC63&V{F5<Ys-i^IyHlZK-^0?fAbfl%9<Hs8Y+r($hl1@8
zf{@p1&f~`zg|-&ISo3eQ7rivYCj66%K-=HAudT&fYTtGWj8BZ>_M-VWX}_mU#~GR3
z>S9N1Pqy0lA6N;5?aw(6R?A65^P96B{11)aEE&z);#fO*@8SPvbT2UsWhGXLUd(x3
zB3?y4cBl6$R#zr?EEY7Ars3%^94pcPStA*sMq%j*g8y6Th6J8ZT5J&tsYnD7C@(M1
zqt0xk<)OXAjF6k1Eszl}n>|I?+5}y1@`!uoCIyj$RN2*rJk+Y%p9=D1t1IPvAIXIP
zV29G*yu)}AIP5}6keK{Lm(pRJChEDSu$@$nZW0PU1bpm1v2Ie6saekB{L->~)2P#k
z|04s#+iZ=(QhrV6IPsSoDn#o54$XY0_t+PcnCoN?5lHD|VjsZ;Y_F5xHr$E~+*!N;
z2KG;0|AWXWTmXc;3CXMBcusyMO;3BiE<Ze=(v*Hk6N(?*n=sO&1+%NrM)oeKI(M;k
z*A1{J_}@qR?;n$rx<J5&V&EH>@V}ROY<*RI{j<-1Cin=)i@(GiHzQ~2p8mJLRL$^R
zENGbB@44tF0I(&>K<283v2?irXds(kNx4F;eQKu^6oPJEzL+o%=2{i9E*zPp9@i!(
z4g5Og{z`zIQ`n?6g$9x4$$N^qC!{(xLqCc-3ZT!ncD9F_4_Rv7U(K*5tg9@Tx_=I?
z#$D~02yx!!X+A&&P_Jzd@pO@-@f^^_v}C4}dt6r^npS5#8DZ_svs?J4mlP6v;|6dv
zuj0ut9iO*?eI9!~Fv3!g16yCI-fOm-nFAWXv8AK_$$w0*qErW!`VUkx$ws@^muDw3
z>2!`YOJyhoykrFM(e~=rnp9ScJ<h$6QkrQ|LZr7U86pLCST*bI>2FMWv3qddo<52t
ztWIgq^OKmsV!GVn&cbU$kOM+Y3(@Zdt^*@AJ-mx+o<%ul!^IFosaQSczWrnUHXaaj
zAd)4?k(~kbXwvd|bc&56?9OAT!Y)s(ho$|gSVU&%A*&Vrz8-=HS=%Sv=iw1=i4PO6
zpVEZf58YJLAHRC+MibD(@{S7{are__$o-cc|KBuO3G!2`G5%>#D20ECAjA=uYqI59
zQ~Gq;QjtjjNVI|1txTp{e*kRywLPv9>#*!jPGzoYnoCy#L3hkat}~@EyCMJP`%PB^
zB`)IW#;d7LgQ%i}jTq$#tvfY3^f{LdKb2KG0{s`0mN>jf!NaZ+R^4(w{?e+V9=K4@
zGOZPInJTkL94*JCWWwZXq1i!fj))?MeRldg@bO9Pu#xw&ebudz+7JMk>OLPt9Hs@J
zQ2H)boXmfp!w(gtFD7xl(fQ3M*mtk6x{jBWmv_}e9b!HW<WGtuaVK2qVDj*rSYT7~
zx(r4{G1GT(KfWvFGCf%kf(W&Pzj;WOcL#a$L>MuCFBR|!{vT2)44Xy+FgIrU>Ij0k
zMH%s^V{dpL#k#pUT>$ZCE}l}|OJTI(;tUN<q={shEM6yGH;P>L0O6V*I-kB6z|{U0
z{pok-6wxHn)#N(yf7$E*{{<mgfQDEss&Cvrdl=sX%Qd7YxBv1y9mOC#Wqgi)d=(Hy
zdPvFco}on=01(oA$r=)=^Eih5nWdY=o=!KIdRgs>JfKTt{|LE6{)>?s7CuQ@I&I{7
zT*6*L1W)P~eu`Hl=wV+4L&Qgeh{s$>4V!n|EXn4=7WXGmSM1dF##ox;5$sRp@saTJ
z>+*Rmkq(Q(ovOI}T=|x}+CDYRP)i1WvzLg|<Q})4%Dhn|*5MB1JvEzA4*h=zzX9E|
z^qR<)c7mQXd_w_Z#`OKtwzIQS1$~AX`2p|)@^QIfM$!V1qqba&06ch@j`Ic>(An}1
znVTdaoq>)Nm|0H8c5fq@HzPR{5V(*Z(^SLzeN8qFFbzj-HrIq^Hl=Z_qGO5|Kbxu-
zlgrEP;MAF+t&3-dE-DJQ0@)rgT8{Q3M`-A?2M3U#t_@r1zk2#ewhQs!^uxtUJTJ@p
zQ{=76>&GEp^8l5w{m^}g1Bpc!UK7UVD%!|zN38$+p8kqBfbOG*$?syjUx;m+seOfw
z1xVM&s+r9DK#PX-^!5k_K6B|gvu|;lGQDbv%Dz=j0;v8V6Bg44Bl<q`*_M&8+K{{k
zc_jlN)(}4x?)Cx^6C|5iGtRKU(~g<x3Quru2Z~ZNO6-E=B~PtECR^fkMS?9PL<SJm
zJ{lt?ic~`mJG~rGz3lLC25k>yN;?ko4})kj^`nf%YpiK^rG9U@1~P|XgrJHfJi|Df
zYqNZCn5i;%nl_~CyX&geT--Agc$O1MU-aDH8{H1fhncty%ukB?)<6Li>KIxa2x)J<
zjVPfmM3t!uF**6Z7T(}<l|LAKDyU^O6O{o8gT#*sqRQ3A)~-yVqeaYcmC|;{PQbS-
zg+tX)nz0)6$;iR@ST_Y;Pn4Y11Nw_}pz)y7cF=$UmG)u0UYt&%%(QgO&+pFP1W4U1
z3$DM_i^UGj;|CDAq3#a_<RpngObMGgn?>3NW>&CRRWt9_)>BqvSXF$R*#2~#0QGsC
zr!AbZ)@%yRN|_?P)AdXUn_^&KxzGKPwU3F{<2#AKv<Y!v3(mn$aSDYWgU>M5I%Qeu
zoYjuU0@#_RNex7*OF_y}KmPP0QSg4d7@b;H%qjWOw}`p}JF+Pk{wt5>jN|Z+zboC)
zKK6f1@4wpZ-*5d0Fz*&VgwF8C4ARDSr|aL7KETR#ae;pAejL~_p!w){1kLOL-U?s1
z{T$&Yd~zM>r$!<?MV>4<-gIv`#OL?AoVks%mnJl^g0Ir?=VZ^GW=~o<mO*pkRX*O?
z7cuWhXes3|WCjwn6K3U7xvXOIP9{G+U~ez#sHr6s)|wEuzTp<wALGrXn=d%lCRx8=
z2zmSt5xd%{Vfq};!Y3h*dN=@-qn!`KQ}ia8o{rTR6{Ui{_4K9H@^~gRZ*-|d$zT)k
z_jxTIwqfbayH2^Ee!c&s7Y@8~Xb6Dt(SidX9W_$oOJ>j$%pxqV0etHKUI-rCeC*4p
zye(k@62o*lC9t!H^d<!ufRKd&PhYjLm@m)xiz{em=bx34?+x9C7I1ms*GkO|28CX}
zlag^9fcL%jR@xl~3Fs&(Nsr%gc1g<6z)C<4CpgDEi!<cL@;Jz09`X1ekxn$Hz*-R)
zVa0V}_x{^b`~STGLSvYY+Yzj^(OVB_({pAq_wHLs8$7qYx_Rsx)cqddKk7#UNz`RT
zDi*bx2fU`Cc!rGWAI}Qx`93)X5>UdD1B{SLqRl6Z=<A*;V=cZ#GG=oJCTA&}AN<p2
z$8ZZ1okEwP^7DhaE9MTC$0d&-)2APgaElNyAYj>~R%MUszs;o`<{<sWa!3%P24S+>
zXB|{%^McJK%QI2>u4qszNdIB&Tsjsrkw~|B(jaBhA^SqG<JbXc?ORLv_SLB0zL5fD
zGR!{dx1OGSB*r6=K52@9O~Xh*<U=-cu2{>lBsQmcC5tk8ysYtv?t|1_;w1*b_g5g4
zpQrCqiB<nE7y<9*?MHwHiGqeE-FLkzbGwwNsELG|`)N)!8FL@VBS<<R3Z=-3oBBgb
zncfbc3*0{y-?R3T`u@Jy9vZ)=NAb4nX8R=LxDwk^buY<U0f%9i-u%nEpA`cQjNBI-
z$yio8J)NH%Lghq&6PTfefkNvBnL3aKVJ0aM`$41no}^GBjfBg0`)2P2YqdDnW$=19
ziv<Mtp1x56Kp?5*L}j4D2q%2ILr8orkzNp0h7M$jDo2Z7?a{8DBvPVwf7)CBUsWTC
zEUDj*A3w+sXMQ@+e_->E-a9rPO@;5hSuZXf_*-Hkk-l|!4}qqUL~udt5U;}M-ExAV
zxc?7ZUjY?$*S$-Oq`)|cbO}gzcS%Si-5}lFIkcp-bhmVO3QBh)ASK;h_s8q|-T(dW
zy=%F~h09^ioZmkC?EO5?-g^zPGZWSK<s<&}i(}&Mkl*nqY}2~7*XYLsbG0l^I`it~
zqCTeA150K`!Go1<(zU=~jhalb?dCr_B7_7wzC6xUuznXo^O&zzp^FhT1}gn`m;dgy
zA2skjzJ4-zi#){zbsL9#w>OFI+!|6sp{g}?Ia+SHN&~u4sJ6|COTZO?esBI_O6SAO
zjq*zt7nwq1K|suFj1EwP!=f-!2p%jCt{5sNv-;(q`Jw|ucUsL>p_DPrbxo9cqb$&o
zv3tAo2`LlJ7w6Ia`s5g^DK+NA^IAN?ZQ7+VZGQXpyEo6wO1Ch~Qzy-KZ>so+U2Q+d
z|D2V7^rg=~W^o`9@VP0N4_K*ruW?#W_C?|ge#C#M#(2J*kr1JFHd!Htd?ypZ5DqHg
z&_jgZzu~V9JpA?M;7%xjDHRF&y-#R<(qHOP9_6lYj-fmhubWZZG4EAHDX{d=pGaCE
zQ`aq=F_pNF%uf+`VVRLG9aoljh`pmH(uV((g#c@ylkl^((Xjc+wTc61lo$}+#3)t1
z1a?u~q?k(qQVAe;8Mts=GUX{Sx_o><X5?@=RjhPI&r6D)1{jbH(pPix-r)^0F2(!{
z=C0L_(ob}|sKy+vJaR*NOt~fH0cy4=5y~+zIxGVZuO2au#k(R(%A+`xx^pE|dVBN9
ztfN18$WkPG%Pq4{<1SJ_-N>@6M(DH~6$tGmq28_Z0^9#bd)vQe_=eKYaCplAMX%Ic
zNK#3Vl(g$wr1raM{~8eC_p-p&$R4R@RH$sL`a?_Chm_T0w1Oizj-$d*uJcO`f5w0L
zW8LAzpAv$apPW-`cApLz*}s(k*3~#4T!qK`uRl8a^)L>Ag7H4rYavQKV+5(sPa{?+
znuwomGvHg_Z(tqM&w?(zWA%i8ZsbnLvyiHs+Tt^;n&B6>wXsbn9*G@UAt6UwRn?i~
z=Nt+Lxp%V7AdRykHx2-f$D0_kBX&t>C^~5u*~ANHxZnkbzlQGl93ORJ-{kL&>)D-f
zd+G6afjRn|^Vm(P(wOOx7(Zn3fcV~+nmHEmAZh$(fZzXE6^TE{!NK8IpY0-+v8ZJ2
z;sN(70`!)Dgw=jXpc3O(3WjvA72%R{u87w#t|LEze>WWu6vz_0EvE;CH+kgw+wo5%
zv|;BeNf1*DV+65^)3m;WMaBFxU$gD>QqTRl><I}xwqlldDFa%q8Em8g%*7@t1M?YD
zfHsnXqo1@yxX!q0O$1cJBlaG1C88hC;;EI|BT~h--^0HJM_-8x>8n${1;wkr(6Ivl
z@i+g*kt}7PkwUa9mhwA?X5EZNFM+jLJhUiR4ZUjx{fX^<@ECK|*018Tp~jFv;Zz!|
z6%TMKV1c|EQLO&|$z{pGAK(B4;+FB&S2Pk<1P`s=G$VAwZI1KV4J(UvT1~c;@C-!@
zz-=2!N-@P+veX7iD%G3WFmF&FU>kG3d%YIHGa<u$@vfVAb=uU7_Ws2&>Z)2->0|c}
z@aeBd0^^t#!z|${x#M(998I*f16DUakca?x&|8t!d24M<;QpQ+9R2y6X9{V9L)BXF
zD*0LQG+9Tg#oAHoO+EU)`#Z+MVusFolIaW4sh2)Sq?XPw`*d`m3^7{U9>%h)I(kV)
zj_2jnUEZOYn<UZbA=^jT@V<>(OQ<ZQy%#+;T(k8*f~klaxWo2>cs=_XAPuq`H|DjM
zhk0rQd`-C6tzf17Xdul0*u{Um%{K{{dQ*<Ih6Cx_9|oS{=A%=+)Qle3|KuwqM$b0^
zOLI}N8>09r{&l@@#2-nG`FNV$DbYv84we_8rZ@RJ<HUC37>`=l603#Xo=b`JT1&e*
z&U-T%jJ+i5n}Mtc3w15W7qfb<A#)j^dBH(7+lL>F12^d&yTzCThoc{8HXanqwy|!+
zZnpXH1Xp-n_7xg3ia2j_?Y%6l?%UCJp0EWla<1L87Sqhfb6yCpWIy<h04i7C_Gw4n
zg?9O=uMrFjef=lLPkI+O8&AAdXHRm%!chIuBCtqGv-$!s{!VANdAFogWIj5qd7C;@
z%}ZD8?9Ae9YnZdvd~!>cUA(yyDSq)B{TrUN7&0tH{0;uS_wnK?ymP<eF_Tvu4GmRU
z^0G?&$>^`(K()1&8&hR6-9yJP#yJbN{u1ESi^GkjDE2{AAhvX5IJ$okh5z#cQjZdH
zaHz&R)4PD$&Q%Jtu&`YFH=_pv2#U}YcvW26iSBy~4$xJ(#~)_|m+B7p-s^fzcxp2I
z5d*tspfzKQc;vl^<+bGlYe{J7$vv`eBs0Mw3)^Xvo~R{Qt^1!*(H4--#U80a>%75D
zyy;T43iD~JsOkTb7IsSO>s^XvH2Ae5H=6O8^IyG<5CW)Bb|Ilzh|0n|`<T}h=gpWX
zMm~qxm_DuUp{!ECcht{UvT0l=M~Z@W=#_oQy?1Sj>wfdX3Lq+WbQHCz*Q))IZ7({A
z9>>8Fb*LAbDvAvhrz%bQ%cRN*hA)$73%LV=%^lxqJI%{20qJ=0E+VV-NbdtB^uNsT
z+k5@1KbM479C!37W|fGgbu(<PhU`8KT8kkaTzCC#-J1>!HaweqXsRjL{ZepiButK5
zLjm*i^GYOXH#HU*Yjdy2#D~03e^N#V<$C{h0Ba-wCcs5CV}tKJwv2x~V0UGGoQ*=A
zm~*J_mcRLFOIfgOEhNQQHO2+@wBuWj4AcDU5s+xO@dpw~IDXGdf=HTyXUW=oKL9TZ
zgK=`fy#n&%zf?!(1n5*kXKH!NPQK5@(RAf=n!=Y(>eb<xSpt#Nm#NE%ZH#x_D&-%o
zW#WqW1TX1F)#)%p)gp1%Qy%ZnMn`L{=0B$IJK%ZMG&}DJL@@QM2hW}WQJVODnLFF)
zoYf?6N<aRQp)@e^>Ug)Y(sDIn`Kv-Y5Bh=j^(7YD7~;;ZV2iKX7d=IXw=7e0GpfoY
zflf7}=0zgK;AT@SNT@a3dVh4oGi^w;ShKPJU+2AZ0hDMQYG^wXG&)C6-P)>OP~Bvn
zu0p7yR;Q@(VJk+6?$wXsVg9Iqw|%>g)$>--=)?lr^J;%<uZl=I88?g6gZ&V)IpnZA
zT?3EcIuHdE3PDUOJ;a>Kb76b(YYAnap8_~hcz53lpg-yzzc_C8M8Zz$TtT&!nLShC
zLTG<=uS5fRC$T2;Rj3*l!bCRK=IQF(A#lDHLM-qym33g|_48dz4_QssszXZEi4npD
zQrvqg;4rC2cFL16N=kpId_rAd;%_w!>7}T$4koWXAdO<>2`kgZluz6<Ymc{il9J+}
zkNYh<cw*Tx6&Gcg{^%w=i@NyW)O5C$ILSeDT;CJ^m0XB^9CNVQe6pYka8LM9b>3?P
zYHDf?r0K^uI%&Y5@mGbY#QBpPi|0`j*?mKmug_Q?yjXst-A_csda0PAd&ysGtxB&Z
z%t)uAwA$;~iX}%Lhp#>?laOnp2*L~0(beFJOegWo$P|0xr9)=YUUc9+x*m(K{h|)-
zfDuG_B)VQcUUTTX`D2Y2^uYrU&<H`nNK~n;c5=!p|JG*x$3IJwgdx!d)Q5NIiz6H1
z30yN4yY4xC*tIL0Q_>3El00^EE}Kx!pvfKBl|yz_)+&+Zj|7_Hvp|747~E@9x(WTl
zlH67sKVr%Ta$Y`L14$03HuRDxGo6-HlEk)XBWH=wI5eJQ<Dhy^$Um4WkJz09G}{DB
z8!pWqxYt1>9}2KVeImSfKbE@A3FP5jeMrAWl$$S5SYS0jUR+BcDWtgfT?5b$grLdd
zCvw44OFP`({0|d2|Cv`k_zHNKiCQe^TnI?V?cq?djn{Od&#uVksf#0{HpBDPqXNz6
z()AzlGvS3uF9)U$&a$Qsa(s3X4^h-cV!EI1ReHTYaUVO(tV}qzw+bG=0eTL9lhp?p
zTUvYK9hVf{3?_qa63fxhTp;8KeF0~Jhg`)}VV2P&{8H|LOf#XQhv~B`9F?t=JoZr0
zyRB|ls2U!pi48o2w>uN|38{6$ZwJNp-B;miD(^p^k4x^2kEO8AZQSw&y(zpJU$B~e
z1g`nb$1b%OK!1LaKV{bb@Yt5j@sfJLKQZ2a9+O3i&|^_}v*!!eKXk9VBJgq{TSd*y
zO4cRy7hIx{P-i%>q=3!Wb-(?6t2%5T(EzTER}=v9N;nQ%bykU~AQGW+98zNDj(wWc
ziI_8;o8r*z3;!h0Ix#Shs;4A}oN-SW&3;DN&4nB&ne`#=-Dqc0_tp@xh?V~|u&fec
zq51Z#hj0rRBNYe|6krg+Syd>&QF@OwM{xMlqkq?}TA$tIyqQMXIZ`4GcabA;-PsDP
zpqbNLU}iN`<WcjL%{+Di-?Qc$7CtCmsdV<OrB(vW5n5@vwpCPWck-O}ta@s~KXHY+
zmhlMXCs2GBE*FK}|7)!%Nc%*~MEzfq2z!hiB~Pb!SHpI><?+q-0w;Mg|KxgDS92h%
z2MO8RpA=QEw8O0p)oEyG;Hu*C_5-8D!Kt8g+l{$2LQ^c-XNWW*VS_IxR%kSX|EQ-k
zzVLYtI1KDVq`1ZCdl-S12rU3E{_V2EZ4aq@4VyZA$e+On_=Gw9blroKQVwOU?<+;{
zOm@N7p9QAux1E^9{hqWE^~Q>o3VnEOmKwQ#G%0NYqC-ls)5lq-6VYcU{H+el!~(#>
zgX|q6PQ;!OfZ^op+n09r?2>*QrWRm^)aS_gfB%7kk<D_Pr%Umo$tEX7&?2pI?~v;F
zXUJe_Ujf-YLC6UaBmS9f7XLbyzot160WI)EtiViwSBXrve^d4TFTy}q#<UGlgsbE{
z8wKEw<X419yp&Kb3ZX@8`{v^`m-ooPv7z#D0VpXj6Kq+X?v%awwN=txy>m528)I;l
zu%8S2(5(IK`A)~GS&fDVL#^pMQzN5eBT_<1g1}bJ-}*81Y+^OCwy0>CqLG+zCYeeo
zroW9&xQA0HvtEvLj&I2f=bB*r&7%H&1wKgDk#h!hahH9Zv6K*Rk>yhHAb1{d!+|hX
zqcYzWFcQ(OT5Xn_myMrAufV&Q#eqngx3A#GPA|+~CWyJRGEKw@>^GduyGGas41dHX
zB>PPzNz#sVJY06IX*<Hf^Xw3!oL8y--clA(fCit|gx7kZ7M>wV8rjT=)8@}u_D_#M
z11n6xB`@w-*3BndduUms6WH!!GTo&$8~QE)Nz@cN5d0Stj_e2h*OBxmU3Ia%J3lSY
zy}3266P=BT10GM2OAny?qkF&lXFRvY&cC<9;zvpWyX`{lj_4aafe>JFFPQ8fP}s;K
zP5kAnF_|rqVO&fqnYT~{q|RSMg9BSbh{RrYT2vPr0D73^@hqFTxUS+r4io{P7OE-r
z=YPFKZfyADoe11T9p7BkZ)>W`;=Gkx3^>3lpdt#*8wWr;oC~Jt+sP-%a<>9Be7|x*
z7f!)d67aE~Z2!{>0I<gWvf>2O_W^3Ex^36EEy>`R?&NC?-;vs2WU2txKi!{w#S@ls
z|B93U$0DgnR^MEq$kx7OkB^ldBroo%Gr&!-kajJ>ct|lghjhx<Z~GH2M-lS!sfZNg
zTj{CNA35>V0nKiRxEwYl@q92i0nfTFv0ArKa<<k8Pgd!5;~?La(#Uzt-OO33Sos<`
z4G6AY3L>ile??%aem1NH(1282zb;z&XFeDf4k&P%F7t>Jgj=qWhJ;E#pHYW-ZgWT_
z0c_G>)0|0zaVZqrR+C}mK`^{+d}lf(#6q+#pLaUMXPBuTsn%@#lZ(|b)qPFX;=v(p
zMXSSbEQNIW$1>ab>dpIfURR&xR`;{qwWCOSaa3qT#LHj!<JC9>Az!UMFj0};<3Af8
zyJ?Pf0vFLBBx@LVkbw~KFPr<9p#k@RQsizt0zy3#OvB1QQmeDnEYw`oLhI$Yu1YY0
z$D{#Bx)UkvzGJCbSxyy=+pq(x2C9*rcJdR`Uq1gTeQr3D)L)6IAVh*QphgGC0ekPO
z9sTRXNcj05^Z)~qAof1@t(&I21KON$eE^_bx$0Fc<f%76EK?_z^6;E@Ci#^r<L5SH
zN0HHLb4x@c5SBFekqt*!BQC4IRCZg1BE}xJ9JPBeGht0qud3H%52Tu~jEBHHGG5Eb
z&(&KmHfc53Sm*WQ_7TAnSaK-e01v>_NbGrCs%eP*1dIQ!>oUdBrA`vYG^g2arUWzN
zN`>CRRPDb`x&jvTozkpt#M9(cK@#o#sGcZfj0!+wN_nRXEVQEgrHeo;fP?NJ=5?_m
zs-?JGr>OJNz6wjaam?5PF*L6F>u@0)QYM6ZifRE06!$HPWZ(!1!L3mUrLwMV{+zK0
z25=4bo`kYkzYv&Tkmk?NhyAStuqki*Gx+-%fjv2C0Y(QQtH*TqCFlt_JqK&%ITxKH
zRj(Ny!s8!Xg<~sxilp$_2Kpj+28N25*WX;ROc!8j1hSaSCD}RcOyoBVo3x-^_*T5^
zz+V);$LkejEBM~cZao5O2=l%@zqzlQ)wOiH*-WbVu4t+iP&z!J`W9H7s%F_7F)hGY
zcl1>;iL$Om0PSJvWTG5Nppuvh9Hvt^w-<Zc5$Ec2Kv#-?aKtG#sE#3zcDXl;_<nv+
zHRIjeM|q9O&Us)agHRpvxjI9>pw3|{H$~SYW3(RvJ&nO&Ha#u*{}rU_0p93i^8>AX
zG7zE~NQyQv!<WGznM5UU*{WKrdg?1KbzYyWTj%HJFAsoN8iZgb=d2pXV%0Ht$=J|s
z5GY3`M)=N#;4uX6k43ypep_t;RR&*x0!Xu%{WR}5K;C!wz6tw&pgr_j$HzPZAi@HL
zVCqeN0o{2@PkJ^|GWb0ikh76h0n7a778?Z5B{{G8jMuUazWfSL0mUKr!*uq{-?qA@
zLMT+tRnkl(jY4hK=E8hxd18)_no=~8GJKc#g4M7uqG`#dffg;<XLYI-PHu&aA44R$
z6$nhcm4uUvr1}>4vG@I)JXxo*kP`_8`*~oOf62(2jjc90+I~A*zwegzOZnG6{N^oD
zYa<m|zs*|^Ksp66h*DXv?Yr-2&}TE1{(-5u$iLu)IBvy%AZbQwtfOTnK{55$#)?)j
zzDc^;j&3%K8D7hv%W+2khI~b%R?JcGbBfV$qHhRr@QhAe1PyDh!)2@E0};r9_=03l
z(=5;-o*EWlYTjR_bKxD3DOzKi`euUxmE&uRZ=!Ip5a7;QK~RtX4w$NiAB?r$6pS(l
z5S=_j^UEN%tHN>q&ObytKz^6-C4ca6{D61P>Qc*YN<$6FLeny3(a{NeKl!K!)<nxL
za2^Hld5t^q6`b#{B1G_)ixhJg$jQlDVY8+8q%_r0yN3RJ#-#M|W3hLRr(idsxlv}K
zEk~XT?32-k-`pq_16VqXHN#EKx4<&#p(dOsAAmUIx8pLm<@5SO_25z_USG^<h3g>z
zXXt&H-(t|gNyo8d&G9iKX0tMl8}71RN^oJrfR_8Ji}@;5Vk~ZWUgc@*+F&}5^X}IE
z{yydSZ$Y3q;7^ke_2~*efWFL9)u5-aN?DpLA1=3Wj%4t+ka*=9NdN0Kz<jX4_M7(M
z)yV@(TQD6VS`QWS@VE`R&YmEwH)YXxeD^MzCp33+Lz!L@W&xO+95PHA=jV)Uwz-n_
ztMZe#2;HK7<>4p$jVh7V{vlOoJT`gWz2lJpt<j-n7e3W3;~;k~h}gXHhl-sl4u>D)
z4Wa@Pf)!IvdVQA$tXr9YFXGkj^(OV1%)>c5-(%^LmFVToSxTpA19T1}fJ(_5z6>wA
zvTw(!d~kGh1uPARzmPUA2>bFwXOfUCAi+BL%nT_?0i2Qw>!;5@FoB1DYT5a!mkIV>
zJ$pDAM^{hoAE~HhXEe;n%`3qevxl=U)k_rw&%$Q|GCL8HnuEnkcAVf~akRv~IdD_G
z^2T2^psa-<g0yLitcRry&{XDUFkE5lHNg0qe5}`=_yHT6_=Ut<@#-U=!3;5NqS&*T
zd=mu^LUc&$yqEu@EY5*q_RG*94HCezx1yq@5MAq>afD^$V27-3g3Db=AG8An?ocF^
zQaA{~E<$dIRx@rOj<HI`%*ndrJ<>`IOiMWYq40mZdW0b<aAAstcV#O}c5Y`DRexmD
z)+J>y`66ToI-c<vT?J?=z-MqF19=RWc{Tm>b{MfT%$Xij&oS)~+;`lx$>WhdJ8?3w
z^~y673>hk?Bzh4AYIxaO7j0+KOY-4E)u48R7J5@!HBPY9<(!p+Y?$oK+X`L~n<q`~
znVX*Z*RNF43ck#NJfqo}<<y_6U&Cb!fI(eE5lxm~=q5=84nvl-&mb~op)hP_GeQAd
zQf7UvYVRUm1)`B=xxax+q&7D<$6h?Rlva3x<dp(bU^Ckg@@S#MFN}Xl=m+o}#h3*J
z1>t9So&NL-bTXs;G%~<p%Fue;{p1N$6LvJ7jqv(L;dJ8c;1n93ciBE3fc0%Y$55+y
z;^E@r)|4(I;gdj_UI7nZhGbqQ5^{VKQv9v3k!3?I*J<7j0IQ;n5q&Bf@1Ab}PaM#{
zk;5HGpLx+YHi|62eFE<ZF5;P4cn!UWYiiPsF>{{WFt7PMUH5aCA*q$syu9`QFH9Wz
zk1)|@U!PiLY-2QZMWY-JQq8i7$Cs9}dg`YkFj?{r;O>coF^GMPrKAQH!39nq*}+j3
z6L~*(cwG-$F(neca~fDa^@hd+7?N(pH3j&OHXLb`guc?dtnu*TTU{vHfN{yo+N16m
zvlYDP_p~%Lw*x3}_(#1t_i*)4tK)kc^d6ps#Kc%*9Ik3)ND(z4LZ5oAQI9VNXQKCG
z3Yh9_P>(eWI?nw>;Zy14Kb$;xqB?A9j}{O|34r}s!^ucOrb#EVog09-2QP^A<RN=?
z-!et0)_az}MKY>{TXgQ@Ck|I6m93O~(ba0C<~O6sn>*&Bj4>dyj-|>5p@m&@8on=#
zP3DD?^*qg1eQ&ARw$P3kcihPAd7@<Z(ygutij9ZRuQe~0Ya_M)I7lwTzc>vt%}DwB
z=yJHI23aJRk{F`A$&#KK^$<h9bIoH5oq41a8%&Zm7Z2uYu8)^9D?4A+y>XcF{J5d_
zi|!F9s`Hg6qy9OJMXV)?p-uatSa#!+8P+_C&S}1ncG_;C66Oc@PccPgKL@zk3))=l
zZyO4+y>^QWFVP8kYALM-HiRo3yftn+w}w+WJ%RAdCWGBF(KN%@KOXCc+OKs5;JZu~
z5FMlpfH<fkHe)Z%I{3RucO}`}u%q5D2G%-Xd)KMXSAMST4CXL>&tWyEfN?1%2YoZ5
zI{J3DGjrijE<!M&Q(P6BJiuaj+-PJYAQv&d=7EH|Jf~}uzD&8^ZwZKz-;=@K`vKc;
zri~Nz&tPB4tYD<Z!3W}GeOsf;Clj=bQ4;|4{U+6^Lf0X5{QImGC}LXAzKG2pE&h2k
zfsDt5?+p7TL-*Z~`d^WyM=05-iRho0vMA{Q8$JIFBAKu3k~S7Ix*X={H<-V=+qn5P
z5&;RzxLHM-3tz39jdc<oejVs0*Cejp99<)__A@Rx0_QF;aV*}3KHu{a>|UuwtMR$v
z-JNK(k+P<e#+g|G8*3ta3lUK4xXTu+vLk1%KQxL**TBXjHnYHn3-k41Y}DdBJIXN*
zRW4y8&uE_Q+~Whs%y@<U(o$++%cd8<%^MNX0{zJXSxynjPp|VX`o>K%`7Y-S>4$cU
z*i<n3bIbfPxedAlg#nGXRRbVwKp5lu6IM^;pBeH=%CLYyDR1$(MJm#gsk=i&y#@u3
zBZRc_22Yd2(JpT);5hy0InJgMRg(tIl<TC{yPf=Yw5)Ccp2)j?Vzb2)mQf-1N&@7-
zaKBM(+eSQNUUIMroWRq4Vx7*Rg!9)sBS&&DC+PUydV)QxhFm<*96_iyy@xm`l#3jK
z#0ZO~Rq8XMy2>^=bxHgEd7xks=+kD)h^hq7I&!t<b{3lo$vsY#V+9_Wb(2&BpaAth
z*j?zg4l(PO;DR|wZ#jk;$YfG`wEifNMUKQ<9*||2QzPjiGZL3MNMk;q?7&FmW?`j^
z2sB8*={Yu(+bLNbQPgAu&+uUg57lnIC2I|`hgR_;M`mNc8;KdbaEjnYjE!3h6)Y*d
z3BfRyqS?P03eo+o-$tw=9ywk9lQ<e%5?hDYWCP}dwA7dAYJ?xRX=)q1wNExReGaT)
zv<9KGk%x@3ugnpK3!H!TqA&n+cJe?}zJ7KarbyBb%FKa;_VM$+5|Mb$2LK`5QI=IA
zUk#ZfF)&WP+DI^;(+&(hk*wt~BPM=9T!?Bl67p_DKOq1Ubt=Xzm2_3nQN57O3Y~!K
zc(Z0yU~hcuv{+|2G{JgA*>U&Ru0%8u+K0IvYZAZkg+F9|22*Y^>M4}gBU9J?L4+V-
zAQ~ajuQKa>zWbQZ_qm9Wfa{I#8_V`TDo65F3Vnxf$*R%)DegDNKrMV{e5ae4_$pdg
zNEYiFmgEqiNsUgYEknilDbNOSI!h5zh^+uRQ+PFOy<&2Ul|YYov}3l~_SxA|V^}pq
zsMR6a{|YUfBMHzKpLUfWNbsc+4*T_0;;T^{r4GgE9i?6jzsGHk*vFT$2?oRXKi?rq
zW;|D)?YJ{E3<Vsdo1QnKNVB=)`X^yWn)mIUgkgbLkCh6BVi+%4tE=Sb>0|bsx*`Tr
zIrPx=(<ZIa-C@QNm-9~vYCYn;iHO%C$P{g0SAt69vd|Y8<Fc!AKx|KeYWNqXKxFxi
zfYoG3C5}>F7vGGx&%qcjRYg+i*E$ivS~p+pPET6TX@k}Vjejs<&5^Vb_D!I#kvNm5
zK<0!(YhRe}O4c%xRB9-F{CGNjzB3u^h8KMzI3_2hUG+&Uccx5>_+TtMKtl4xn@!!k
z<NmDOGVP}7@p(W!v7Zypz+&tSKnH$T{P5<ZzAtHrtcWejx6>Tuf($Q@M7J?z2%k7g
z!Vp=m^tW!#Mg=J!t~#Eqr`o;mdH@KXS5@U{h!X1d4ty+1lR;{K_=*w;czcul;6sh!
zu3;<R2qN(ScxMnm&fL=^ZVxoY2(dvz>m9FiV_x;Yhg&1h`W*bAqUWj(4E^w(Z1@sn
zNW7%pVZY21WvNp>G1}7ZvElnpX<YSEes6zfsjZ68h2~fMik;I+7j}g%4XkYuXe>m&
z0oA~DUD<QDEd309>()q_74m~i^`+U@V4K^pMHgz0@mcH^Y3vJiJ0n~67Ie=Gn5~ax
z&Zt46MSE{hSDZ&k9XJo8%i?qe%yZw<-vV0SwVLY`a#5-^LnZOi5TLcgm0C#{t*}_E
zjxLq`z}UR)=ZO)dpE~v>X(2ArvLpP(<4TI6vWtW6WB2o{J6Q97^u!}!;bO(WU(VlW
zsBz_*S1)$#k3;6if2@szPe)ICbpnj>wwf4B%jr1Y{BdYb3n#p~|9Q4aY1rJ%;4*-B
z+^D2u=w;bwEcNh>)WKzh;6^y#J?&kCNTpK}5gJ=Bv$11Mb+wDIr!h0f!(LCJFck~J
zh3(7Q$dZBKugs!l?%YsW2ZvKJmzD>}hEAXlKODaH-O+byG<@USBKmHqlzsq@4U!cH
z+2N}>tEI-Oc@5L_It+>Y<b@9+%MJ_+)+1hb2MyH8$;tda_b2`5z#J||Mz+vc9fl;d
z6xmFF;ie-C_AY;aIQ&5CgB7o{VGf{lNS^DUb`t&mbdb^9d3TCBS<v_KE=k8FxxYX`
zS64S2fL3Y|K`0-<F7p$UlSgB)Z6|&B6|OrIoBB)#JI;{?S>8oFd$Sc47?Guiiw(B6
z-?o}fM>B3KKWw-GYY;5^K_o}z9I;-y%d<qM;yb`FtmC>oZFuyn?LtoQ;T#@_udMt=
ztM`bXuKOj%ROj`kzgo~{0?2PT+Z}SC-xIb7iL-hz$RgEmKH7Maad&|T=Z*}jPU7c#
z&Cg+aEKhq>s;;gu0{sVw%yNLkL;s_Qc!85vRs^;6Cmr2QqD9Q3f7Q@JS+GC09|ipy
z4TRG_RbpdfGkCDo(LXo73D;U&ha9AML{83>H&^9(ZqG)0tdv)Tim7;iowhWPPmjt?
zh>!fNU30H60f*b~dt1O>`&o-wV}C+>v}sv$_I(v&5J{f=(QLF^9kIxcV48DDinQ}A
zy)rs`mCf3Qs*<%aMtsz@;;r3>DkJ4-M51>$jVlaHw1{hRJQ+en267FoMlT<4d`t%h
zPlGh(g(ml1%nTZhl}I+nU9}>Hw{cgM<m2dnR7_=r(;QJHLPD`+uHr~mn+RXpi0M__
z)|S03v6j`07-nla0xJI@y1X1?Uy{+*b_gX0KdKP7=*ac(#YJfH)9@=K#l_XUPQ6_|
zS=^SL;zkqk!+<Ztw`gL44i^?xwDtpLhGfMfqToc`=fK<wS)!uHBhHXiTang<4tG3W
zFgnwv;&&&Dg4s4Wtrwh*iVHKEu{Fs&176L!A-sCwo5dds=o>rsDw?)Zh8uk-si}YF
zQs<(Q*V`;F^jdxG2}WzNdY1ZQGiYCDC~9p*@g80ecS7MSepMR}zEwG@U$j|-eHs8j
zgwYU}Ve?EN(TLrxHC5#t6F7z#<-)=vXh)ywmFu?MS|ic&4Yys)XccxMC=$;}NIl%G
zJRP19AmvBR<IqZ@a!T9D9jtUb9UV4pXE;k$7F|8e0$+eGPY)6L<CB?05MAiZBGA~P
zO1u+jgl-wP?)B*zGh?oaPm^SWbf!i~Jv>u_fTrZrCcz`pnqdPukc7t>4qG{Yn%$E5
z@DrMf83y9@D3&)gtstn_IG;+bLcWztY7Jn-W0@@{__83m`%Xq^fl0J^$5_StT1|7j
zql+C!8gsvw@OZRTj_->-Ij^>ow#~tGIVw88%FJwY!UO)BnKDB)394m{^v52JhLR>_
z*JY74g>>hdQ`hA`&<SnFAEgxeEcyffr<&xCdxt>lkGb@noy^0}mPBhqNrnibqOx)0
zl6MH*FDdfanw|2>zArc10ut?ouLdrW#DgwN(gW>M(<Af>yHK}0I7Dcehg9Hw_nTea
zuo+*+`>#%tQZY|J?I~Mi`>9LFqoN7n{xaW|EUPwG8u&K8sV(SKD$Ym)-ji$HZa<|a
zO7u486pmjo9*oDfM@qnr(VxQJ7h}*7I3e!57msWezx=l1;H_Eo6f{v-B2Nb)dzgZ2
zeiefBDPF(NbQFCDZ{H`6ro(!%-f>#p2`USe)?aa0&d{w;Rem-}WgW9<s&+m4_2jve
zpCq7mHkJ-)^a5&IZP%&3X`QnfhMs0ZW0-$BM&@qK)LKlJTv%4L&FmMX+AOG)3%x*I
zvl;z{KU6nJ->_6|ur-uSdGz9S>*MX-6`Sp^`?AuC_S-i|iS;grOY^zIxeI{NXsjgU
z2YDWniAVf-_Mg7SuZM~r6HLZ{5)?UT);<IYS529tU|}HO&FKEZZur9&Ar+7p(YXqi
zi2(BZP@x-J-Ly>n?5avFNGSKSe8*9G0QtFy6hNKDg=n9A^U5R-urr(nZ)fE>dAFnp
z;V%vnMJ0}c(m%)Li^abQm}JQB@Rkpoa{g5H`T_toZFLxbnma4;u36t*@c-iav+RV6
z;rWvPZn{InaW3kJj{DqL(25Ul5p~-+TI@G>hwrAf=I8lDtJxdG?Uakcru6Xlj(ny-
zlM+=G;=a?cGT9pWr0&>Xhdjun^PWugrmf``1fyMUEoSxjQqt%}vho$YUdIYHJyW$-
z&PLcX-Hrq*d9zi31)47VuVsmr{B)kY3++3u!q4;Z*ZJk=<F1;8$58p<46U}?$B={A
z%*V-L!Dwqr`v&XggNbn;T1-0Tw$P7PS)%{cN?=La^A)y#kKT*HN)Xf4lC*6vrV{i!
z*OJn4XsV`!hn;#*0AJs^#V{NbL8EtxWAh|3(_ANh$%<JEgqvcsSqa!^sg;W)Uy-mk
z=hrIBaPHGu^w71*a6w*mMa;V?YueZx+JiC^+!M&9^x=gEgN!9|bYfFdJbKo8LMBPF
zx>(Yk#G%9Tf)7qn1I04>j@2d`<+|P%(^~+bD~f3~<*UzuLgDp6MuzP{mko2Mh=at=
zF%#$+-_mijl^WcIWZJ!)@#LlJbF-;rFq%(lMclTRS@=6i*JH<kfc{Z%PO$mUvg?XY
z!GuV0?(U1dZf;jI7Irm>Is+oT)W~%V-|HR|v(ft*x@dE9@X<5HdL{42zIAkF*ktvC
zv}4#XlG-tzT)`745Pg`f<etPUnY2ERUQ1lqOXdfRhzhRC4qZ?UAmGKaT8D94h5-kZ
zvvqAYUv`0T?sie+8YgVMWNEW_>9SoA72}NfWRd15<*{JNU~dvm@Yt&BvHQAk@L{Xu
zv3~G`W1O8UXbaWqP%cC^38R^gi^)Rlsdn#V^nmJ5brG42W`kYcw1&ep)RLz9sQQ!8
zuKsXY9n+<0>&#NQHxQxm1Eo^^&c!X~*ezUM9I(uZCv%^4ez6~|o+{i)g72*Cn`@@+
z&Glo!7QES0yicvQa_VuB&8woeqR7*#m#!5yYi4s|8)Lr8%__6ZUz|~c14fFP_e{PY
zHD`m%XMn<$)sStVNsO0u;UDcG5o7rmDU5gG7_pa1Tc59RW1p*6HVe1Ht{RDJZo3Mw
z7B&`N$2dxuNR^s5RS&BRlEJ57b4@77-#vcynq|<~jm9NUlFOhJ=vJf%6=~8sX1fp9
z6_^8G&CUU>F8nEluWuS<G>!=9u6}B}i`Q-L2#Vo-y|A6ZuD#9Z5%8Y*y4X2R{{sBw
zfQ=SA+}s!rzakjTr-mO9pTdRUY8r<_b;4kf%zLHXTUO-NToKv0eksM%5bNlYT8GcW
z?}e?ao9#?a0HGLWK*|g&S>%l3a?EN|CJ31^yE<IHFxbnpab9@o>WQPa4sc6{&y=LO
z*!u2d;iwA|S{vN5!mjd^3)9YYGB2Cq1S3HD5Z=aEKR$?%?H*@~hQubM;}h&*pFHpZ
z@TwkZAK8b_I|{;|!j-m=B7eWp=<e$3SH54>m`zOL{EU%ShiRgRlP5m5ZvRz505~JF
z91Om;jn~!9&_`*RlMkKa0!Lg{QKgDMTB;VhXGq`&78$e4rLIs+6OAPadl$Xfo+@2)
zF%#1>9X{VN>R=P>hw(x6DNmFob}b075<!a$<s+!*F5)bg%wj;m@6DZWzsvs?(X^zo
zV^V6<boh{GEAue2Wi#}OCjvW#9@fywGkRHZ;SCsqCZTEQRKpivuKH`{nBjQv=zbUK
z8}hNE*?B3Yv<Xs~T%Y^XWp450mA{5PA-qO|KKFZ%WtrL1U`*mh)mS|CSl{)~>MP3O
z1YhMDLJ+fIR@*6Z)X^!?Ec`)SA!)ofx`e|;XN>lDDM>ieo--TSjPh?~VW-uIgJf>{
z_l&0Htp|DtnpFLL=!u1xVvoqMgOQUfd?zw)Y)UE9&V%7@6K+@DQz!Zwq8svKX6~=L
z)9{@PMu3{_kF)O_ncK0?*`9phBJQlt*WGHyontp0&Bj~H135At5r4vSUD5+}kxOQ^
z#Bu!ZM+7xVc!7$R2M;~{4kEY=lS^`3gaXv_b2l`Dr0<_2ANdFNUIM)atj0vl0nwTu
zC$hLcJTzFM2LXRG#GYJJAnN_uTqXD3x0=hD+Rm-JqYioN>5>GsROL+REK)W_h(Ye=
zzVFlB%C@k6!Wa-xJ&dKyM>oA*rxh48Y25@|Sc8FfQPU@{kO=d~SwbEsuDn{y%EXij
z7?k`d-&AN{5Bt?o`yGQdpwX0Vao#I<lwfh*^=0&u+Z#KKpeB0P#|(Rv<V3)_I+dIG
ztefr-C&&&$UX>L$_KW=+D}D1q_YM3~r3EP>dXAN9A$qB-45mAa93h5uJRVX<GBxw%
z49<hnF2nqHI#x?G)~E?dW32{iI7v@Nt>bb5T3bRb8x(B}Q3Eu(^Lk3+VZ1K7cB>rx
zIzdcQf}>mY!dzqp-N_877!0h56_I-<TrR^~^@4%2%MurlQ;IgUqx2Tu*9u(%{PbV#
zG*%F{LuK*npMF4Xx~5P~HDs-1vwz0hE)-}D&h}1EQI3Av;}YluT##tv41G%b9?6fd
zk^J3q7G+hDo-}OI<vog@qKn;%m4an`Gz4UM#R}dopur_<R3%Ty<L0Q{H1|B({_1UF
zF(~4p!AP-7(McgYM=>RNuaTBcU05d@Q)lK$7)XSjUD5iXiACO)>??S^ZM480{T&@;
zn&N>z0LL#$`W1|dN2npIC?zNqypOxXTx!=8<aRXLnS-+MjWgJaR@S1G5sBS|lis@_
ztf%HLpe8Bxr$T$ULbhI+t$bS;PNpP|#C>};k7@j>uNn2jA_<`&wwKh$*v;rr&mh(k
zKJ|`JeO3GHkSeJhR&b|DJE|tt&A{GIaVQ$c%4D`|EIL}MI9U67zpTg=3lJyFDBK(@
zHe}}R8p1E@Ov2%}zBY5^B4F@t?F!^N$bJ>&)l%Sjb*M#}b#om?TYik<uHQq!{<U6t
z-mYPLrQ_3n<`!{CJ#v_^6}ow)L-y2(E$bIIuI;EQ%dw~%ERJ*<-Z%E_fxa2JqVTwS
zvWY(dOf{u<us(ns03E(`s`I@Cc|Ag@@P#GyHeTWK^HM;mC;`S;v0aFO=x_DTSLArX
znW2I%l)$b72VxEH65a!^Tr`Lkf$kFj(P~fWZDF~^mxanypL?v!o;!WabAC~0a`Rse
zDe?s^u32ddofr0pkUR9baH^z3rJ_W#HUzd-{M2zR-fhBT8;(*^r=PTQuLcwng)K>#
zyDt4FcZfV5v_68R@Yjk0$W~*QQ_B1!NnNw0C%rf?>FT4P<yu(;ImXM~RhNAnBtLL0
zB2smFwg$;0)!ILD;)@LSk0!Z*3EVjQE>OTuLxIOzwmaL>i$^F%H4|CmXEdmkL)1@v
zR17hLu@LVdIv?vs%HbFSvt1e6ICAUgH$O)a(BdzBjE#Q}o)YKPg<Q(eBH4cA$i&H<
z>>%hnrF>$;l33go;935vrl?`}LzH{7&E}Ng1{8xz+#;*VMF0%`ildi;|Dn}s2RnYl
zS*di^4mrn(b=N?K0(dzPA+dMGnC)<x&`?3qqP7-L;V*BvqO5G4*{q&(@*bdRnWg&A
z*D^Ix#DX^XO!}S9WD-%@&st?`cKX-vKQ%70?%k$2j&N53^q<nu>3J;>RMTLkNQY$M
z2KlnT2O^;cEHj+LrcBSnCPe)s$~GmSg@Zi24k(cfD2#kPa-6IG0!~0MaS%b~S<w<@
zSPOz}O+Mqd8mYZM?!nLkV6#0+>6a-1<WQWq`rjR|4Uq&3A<>{EaLjdEe+fmS*T<}#
z-+@4ryI}~P5R#+Y&+5gGTCfSXQN7tl<rFrl2YA}0!?J9fhdSyTULrbbyGxqL`$O@x
zza8QeL+5DkbuLf*5VjZ6>Ngy)1|d?`BRNE<QGvt3-@~+&co&uydV;t<7v~5B$jVzG
z@bcSS@h<D~=k4!<_DPUDVFD}0XdBsG6_aidkH*%!f*BMSm=95hcGnDJ7;Lnry9=pw
z`t@h%70|QGQ*l?+AIm*IF+}y2c2ciJev3oHGD1vC`|}0=_}s0d1jQIGtD=Oaa9jSY
zMlrW=R+Woieu`gh$2%lsoqym`#yXNZ@pit-uwQ{a6F`24E-~k<KJG~MZ1ofST7k-v
z-x=d^5aDx-`B#>DEmO;)>+~S>e@rhcEX>Oz2=08RH(v?uq!3hq4nISNW`J^In<rha
z_=ouCUm?t1du0uBU3_U>NGAH{y51+ZUj2L4!4ny}vpA|V{ok?mT5Z?5g9xJtxIgvV
z+i!xhO42a|u2*YW4W1-yS>t&;mW6s0wshzDve|RD+=Y$LUOHLTDMkU%O#sUsQoK-*
zARL^v-=<{N8vr3zLj5u?b`SB}ZSLQCIkLz8V9M<C({2lhMV`Zv56{FFb(xaQIszbY
zx5iB6;q^w2DASJ2%}S&q;?)jGYN*VYQ=TP0mp2dN&+PtHL40CUQgm*|(=vA#w(PAc
zX3ew2;Y_EXDt6PA=j)*4;5b~~w(Hc4aPD(4cc{Oss(jhg?mS(knA^33Kex2Atu%4~
zbZN7?FuQ9W63kM+Yy6#)Gm9C!PrUPauD^5!Rr5N@loQ0ne{mT>I>WiV>EdlJ1LRw>
z<%X4eZ;(QEziDwbRnadk1>{jivG1LpaNxeN<r6oS!XIjy?sfm|{@Ay8Z|Ttvq$FDO
zo(Z#eD9ZY|1ETtqLhclm8ZDQB^L4<{pu|XF54pG;5{aMZ03`N<PnQiH=@+X37&aG-
zH69nc!CYZw8NMC{2BdPLtb*uha0NV4V<6IytFNpo#ND}Xk&0#NoJ0WZS4QOmRyI>F
z)8}@l2jJ_m?>8BJsjmoYd>(FZ!6UUk4;Qm5V~gndQM#OCOz}->eGaRg(2h?&ApmbM
zy_Rg6p+WqIOC7k!h2-#p$zDQ2KLLmI{mE}Tvl&W1PF&ih<EqDfE_g_&58SvDy6^eq
zZ=??-1fXN|`W5L~juZYFnG3ixbm=TZ)``j70v(DVii}7_v+396yA=moE=tRj5DI4a
z1p<}!Vcm2^Q8VUzo(uT0mQ#e{3JQw$HK(R(|7pA8Z_&EpD85+K=T)=WNqX~w)|6YW
z#Y9&ao~^3as1xxR;j}z9ckie__!usH-8ux#kIGE8GLg?r@dQ`fK<$b~nv>UlyQbqN
zbWS{1jArVP>yjGl*L~BWv8>cCoF^yb!N6FDdKcE2Vm6J_MkecfR><hXlgiwVr1Q2(
z<lI=!3E6%0HyowG8Y();D=0_=h)Ab{ybp2B>}F$GI2=ZNqwcMAMbyyY{rMWk5~V`)
zS}AWrIrksFK)c4cXI|Wq$bBQ`DElX=)ZJF*)4k~+F00A?J7(spsRo3X63%;xf&giT
z048E@JfD!}H&m+EQC?P9IJ4FJPP-?1!3rp+RKU90i1eh`eeJ^e)K??CM{KwpR_d8r
zHciH&v*kJ%Zjcj7;Na=hfH?xCI%g#CX@G~xAwSKyReuWLXDvh=K~{h$>=rJQBD%R!
zlWZd$^`OKM%;(Owd3m&Q^k=E*><AE?G?Fng9x2t>&l+hW0CWzyZapD6P-+N5j`&U{
z(ka8G^e7V=`Z6*-*6#w4EJR$)eqsHqz(q>)Log4^v=WcHBeX2g+~au~gC!_+Rxk%=
ziCa7arw2b)k-lAF#zHED=UxbWL#Tz%{BSMR?%YN$Ew#(b)x4E&;~w%Rq+;`rMe(d9
zqOZS@*-Ew6Uft2j?s&z;xc5@STJkPWT7S=u^{f%jZu57%kQ5^VG;2{NM>f1Z#Z*H-
z{Q~|eT+B2I2k8;7n{tjj6TMC3Yh){qQb==fBxLT7L{7GL41*ihN|^0Hqy0KZVSYYV
zkB`hK;6oEdzBnOW+D(iIKHaBt+1!;02Ov0~m65_lrse#Mqx21IUe3xNM^1e}RG_Q_
zdJysc@;qv5Ab~O4){{UsvH=jsi;lbHBb)9jOHY@hU(2iQcZXWMfGXff)O@tV8>`|Q
zM#4&q>ybCkiX~XmSU&^1&SKi)2vC6Sit7LNtK3nlF<b!d##<Igu>bun5a;Xpw?{Ag
zK0PcdCj*{znh$uh4@`d$efwT{OLL~nf_Z>ey;T!NCSrr8+T8d&Jn1~1w--RmId5!l
z&jRneS`=0|<F0iWK%RMSY)&$R3~+IKv;_EPw5~&-f*@?Of9qfInL9eSWR!}OIC})4
z2Hf4`1nZyh$-#_36*j@6n}+Df<R_3!6#JyqvdNRJh<)FKTr9sic{{BdzNyCBkt@vJ
z40uv`S|oe_>fBFOlz8PUZ9w)lCI7(b6!hz;dUX|O(lA-_Xl}S<bCW0i3yPJH5WdAn
zpmXVzL%-G(<eiULn-!L19NH6;#UG7boEf!ce-~ZWyr;TIaILf5;UA$UuK%%Irj#zz
z9r}t1VC27vujb!_IG}25@yZe@!s?aI%gxScOfx*dA(=;|#~9Te1&E1_sy!2`!=_fC
zyGn!TT=i<4yns1K_BAJ)(B%#u-dHp9gFx`Rkp%z<^1Yi@(_?SluO38S{Zq5<(UM(T
zILjWKaFIRG^=>eR-EyY&g|O=HFf3X;VRd_%eB8z34!~Xd;?rcLr?<UGdFzSCVf80B
z^7X({X^QFIOc|<bx%N%QL_`-JdpS@7ZC)ssj()qi$h{uH8s&TP9UJvo8Us5PTXONh
zzmUd!m&Zi^@frMi?S6^>5|(v1S<}}IkySsD0uZi`S&ytPX)L5wO?2CT`+FmjLa2Pd
zB^OY9edwT;OFFY|8LsMDk^4MRsm@MPM+U+wWeP2t{-ZlCXw6}ynEskd)-e+$b!4_|
zKlAo$81M3k!SY#@*8&N>GYwlnGbuH$`-5Ft>d`AB8&}=!)&R_mmiG{nZaj*V7VZI$
zAXeyk_HC&-_8+cz6Y!S%LNT(rr5d_jMbEAdh{0Xw41K6{hZ$kPhs3e?uvr5tn$`Mz
zRdhCk;tS9~?{*ujXre_YO^Z2hvytN}`r(li#)RB!s<UfNXXK2%|7@nRTdF>{7&fM5
z(8rb{eCmtrFTgz$N%kZ;Q%+&(czQUZ^*+H|crR;kWY)m&g*7UCmK-b(l34+VQ^KT`
zQXm!OzCFCc-+>PmT>rNu|G7LV3TAYzVJX(W^9^Ms<6d{!7v(hbH1W@i?y5CuXo7f?
zZaNvA1MTLf`(NO11`g$Jv@P?g43rknCKe89_$xKc$J8y)yCUnWolEJ_@5@nbhSgI*
z_sBv4XWUP0eIJ0vF?JRbh6LL)4@W>=T^$CJP)*0z^=X?eG2Ct>69vSSF>52b0c84@
z@5tCq{SALikSmZrtKz|Etbf(@pJIf-K)b%*=cTNL4B_Zj6><+|C7wq-)BsG!@qY5F
zg#}$gnHHaY5p}!O-zs7fE;w7j%j}$OkLsa#3fr41(Q<vHBgA+%oDh}zM*^T-$|Ifw
zf}#wJt-(RSa54grv)lBnJYF`ymc~h(#B;Gd*JWh%*cNa8);IL?(_%|{I)P1T9C}=d
zfX+Lhkb?<9kC|;}Fkf<h+`5e(!bz*EcgH%$lH+1>qlT_2M`VV0glP<|z5J7WA;+AP
z5ylPU|5njqL{Y(iBh7J+ZRM$K(X)-8pOrt`w7KvpKl-q|{JQk8EXC)dWNvmomrxzW
z^;M<AD6VUak#(-DK2S_Ro0D!rwPQb;tYF6cZ5nh<)@#u%BZvoz$WYQ8c^^PGNdgP@
zo3}=P&BjeBmj(<>q5P+Bz9h*Ba@OQoF}X|N)k!Ymzk=Yuvf$^ZFX@-a`lj1>LXixZ
zPCuo<LJB#i@Jo)^^zz;TCV_%!;j}{pRr8WDkT)=yG-v1#wU(dD3)RK_ywi=%l9`aN
zDHH(b9DV<50}W!w3}w)1K3=x7zgsNZ6(?>xP!&%Md0ycPZuG``;Mcu@0`lL=AHd5O
z0#zgNzr(6XU<+Rz)3mSuFrqO{n~9)f{I5lFr30g?(ti0LQgw{Wh%<C-MAe(p8wzCs
zcMfG`t%f3%Sr{4)6A3GV1`8@6EJcGY@2P-xDM3P!VWUU6Vm(KCL42(C%&?cv3^rb>
z{5GJ*ZJDJpR>M@x)eKe+TaqNK(I0QSjFHrJ5W!3ya^615yjetaZ;|I>{hGIB#KjR-
zXtnCWsQ(OLB?qg(5v}l>@^LRYzAK4H#`&+6ei2fDt4%`=3x%~3@*BX#XM+p>(g<RT
zt9t)?CW@U5_6~EE=l;@`ERm@S$<0WEHC@NWM5=s*vTAo|;R`VIys{$P6_v@C69O#E
z#>#nS{w3@ow_z}(r#H-RT;fQ5bJ$C=lMXg_JD&A^c*2kif+XVSIRb+1UHt3okj4E5
zQz+B}s(JqY{O{|kB(N%bqnf7jG3AlQ@}C-OIFMsDK*)8O95Wmf`Ku1+O^`;6A1|^U
zp^s}luO*=x+{8APLk0mOy(!A8wHL_mlVI+e|C}0Gkn0D-T%|FKQlB!>Yt^^zKVRD}
z06k<C&BtB<Muh`5<xPBYhz8s}*fjBV&-XaLaYw1aWVd>xGdUbMfF=lN)zi?e;G{L6
z`5yMK2?`0pwR?_G{RG}i!t(2%zLpR>3h2_3_6upD3*Zx4v|1p3pQQp6A>@C^F{;^M
z<AfzUgFt57>)qn6`U%Bp_WHS1KSoG^?AnhYzP5-}YVc#M&?gA6*PrLhza@AhcUtvr
zqlQWRWXH2zXn@e+B6CH+-0_64<AkSzK{-|1LqN<D$7!?lH6}j37rFYC0G5$C(Q1iG
zsZ>{U{#&|Op!>VA06Fl*N5e?VD1|m7E+UdeH$V|Sba!+TuiQCgprEA>Lcp|TAjYt1
z&*@7CtN$Z$CV|C%b$lH`*7PkoJS9`*Z&@LeED1e5J>o#J?MEf0;xuO2VJ;F4JV)kJ
zoob^G^D$%+9F<bshVhgSk|eso@ZHbY?pydMuw-Wn=r2V`e^ipMhQK7U(`LlP*rPPa
zi%MN1i`xQLkNW)ZP}f+@Axv*qe=eGq9j6-|ax38=RY;OFgXU(%k|>qPU|B&qb=xno
zSWV1QQAHECxYD`ob3&WGtOfnfU|q8}q_afA0p>8qVJcBTkCAcs3zbI#N8^}3Wcdjt
zvlwqY0RQX7dpD7P)Z|~TL9GJolJct6<8G`cik1KAnh7R_0uTj-YzkX(HxM<l0Ki9v
z6-N^Qd*pTU<eZ*v4o(^gChFS&b#Y=65jZi}0_y4q4?qo1V!EKOPxFjCKfnZdGRvrc
zt9Aj#8<gl}<!T~mxJaXIpImEhlw5W52M_d7Wj@?pVdL_9I8mr)$8$7STqx~PSZ7}}
z{9ck-t^{c-7{a;F6v(Ek%4LvJcVHPCr;r2IDNKB+{@UODix3!2Tre@<YDpNfX$|b1
z*bdM~<%8X-P%s8Oa&HXG>{>LLm&t(~oUZ!FpMz?1?)4E<Xk8R8;6a#0ha)6`?O?l1
zi|GYa=u<7vP+q3@<-8yYWK<BUw+_%_wa$P*9sM29&FYuOUYSbPH)_plqn|a9JmbMc
z-giLDTiFqcbl>~+|3%hUKt<hk-ODf_EhQZT3?VJuARsNFlG2@0QUWq`N{2L3qI8FJ
z3kVV-ARQtl-QoX^KF|C8zxP{<#Tpfd-`qI&oW1wim(~uMKg-n5xj%acw3#X8`i%#b
z)S~W*r5bs?bS#R66J-XMQ9)ltG{|OOUG8MoJABuoIj5Qe8NkeRwbc_E@9j!2xj{ok
z35?&A$-jm{;l%uXeRb7%Fayp-%$oQ0#lyycDtB@4$?mQCdIGwnpx_Fq2hu~X74Y6K
zP2{mSqT^9b-L)u#SqS-@IbpIS#%FmhhkJo`;=(qyb2D;I%37b9yhP2jGNZOuLQfhY
z_iY_#om}FNE777P0N)zlsxgtVh-@-CoqXo_s0Ad2CpP18t{MRb-aEC|9S7JuE$W*m
zfYj6UU5%4(k`{iVGE*uYj3roX^BR<NFGBICqx+8|xmu_80`Eu&-5D9t&|$E+LnGGU
zQ>s^s*LpHzcL<a)*`#u>tzN8V@yb2@Ny-JTqVJ9rJH!nJ7Z<+$VyZ)k5r^0o4n%o6
z97`voa)(uBD4^pX`+#-{XjC%);98aCi480{=#R3c9TRHkcYS=Z;P=~?SLbsSun`Ye
zxp<P(tkAL?!NwUvXt^eSI$N!4bSJ2Ef3vte>vlnJr(Id2rFI<s!)EZrN*mwGjmkr5
zUo~))z<yY-gjsY0i--kU+G}lgor*<L7WISYG1Q`U%Sd_7;pu{(TA%!d#f*?kD`-08
z=`a2T@@+oqft<XVO8`|Uw*6|_x_IDUc?7_Nd+I_h*C*fIz{MTO%@kh4!t4EG3trDp
zwx=yBCGCELfjYaB5yPfcqBB45_QBK)C`-w0rxc>!R^1;^-gN(}sckUxPAyxq{dumQ
z)_c@MUDBI{-`5LTnkXck&+>c%&r?e&XS_&P?O491<#K+t0@oNTX4mPUB9%zgemCdq
zpjB$yQ|DLY+Sfs@wpsjS^{S`d>T6LFj|>x}#4y!`iPSEJzO$pSS`EdUg5xKyovZEY
zF-dhA9zQrv(D~P-qlOp3siOgFcRS0w6{i>rERMt?`mA`QqnAPS4T)t91bsbF`u0{%
zeCl}y6gv!AyAg7~0`K)qkKr!)gN+ZSa-AJj%R^t@%2>@ib>Ut|CGx6UKMjUh<_f4b
zx~x@Y1MiI<b*iI2`4iy^mwWw$TEx^j_Bv>GuanTSgf@U!rIl6lLsX9vzV@8w%Iox_
zNka93ny$Xf`!9Q<@=+Moy%QY^wC*$9m<Cq^!EA&&P7OJ~?S%b4Q7Di81h9BHM(*a^
zHlLe|dd*_3KBUM7d($OpQgZfCeVT0T5P(YV_&rgrCAr;i1Rdu+9WtDzZO6I2X<j$I
z_v#2J6qs?V{_uvt&&%@We8jIzy9+AnGLmW><SCg9y*VZ2N^a!gBE)zNPR7l{JdqkW
zIk@6W3ZosWGtc!)xU284{Hk3X+C1M(ULMSvkyv8%v*e3|;eRr=tC;p|zZ+euL5n0L
z{Be3Sa#~&upcmTV;bGOWUxT?J{<T9CrKWPneN{HM37EY=k@fz=g0_*bCa!mQq8FHE
z?c!H6W&oRx0bVB#;>iuR1;;6_i6(h*rgA4KaTcTa>w>;ijlE+=eo_yf$h6!Z%wa9E
z>je#sRr<%{G!O5ogHA`PQlp0Tw-xsXR?>kG0c$YMMQ3KPqGU!WuHtK<*@@U#lg*)`
znwn&!#Yh9+M-r?N>PPKA^Xl{1x;vTS$Q))MHOSspi**vpjrtR2$wShUQ}?*!(DD^n
zKYj$UZf9R>&pv@I3qyzEr0^e32Q#6gnUKI2X&~EMv*VK`?|m`)vv@~mJmidu#|o6K
zyv1mC!7<{~`mLYvNtF@CudTfEmfrl1+2x){E7RYp%eF9`XNiup4g1e4OEj*JKntJu
z^Mn?=qnk(R0^&ud33e=otQi1H{5Y{TdSqgZ>$(jTRE!mls)j89Fiso=?N8!gKWGkq
zmyxHHAo(U|A9Zlo@bP3+R1|yOrCyuDl<j!<(X9Pk&AGAg$7A*7q`>2+DU8a=iIelZ
z-kjdUVQ+9poZ!ZcD4cedj0T0&91w|2al(_#eX`QoH+Yy0Q6RN))(xy~LZ)qt?hwN$
z2DgPsF~wQe-klh<Eww+#u5lKE<(BPai5ETz+1E;+X%Yt&_GemX-kIhym@d*=wA)l#
zEQT7}MbCi{Xk%)%<;$!4o8re)$hK2cgM_4{wYY1$r+$8YxRe5k*DEoSte=>UZ#u&s
zW0J7EuiA}DMDeca_Ps+SPMKE)BxtQA(^YyMlSL|)S3?tz(xYnzf&Ec0sK>ECS89*m
z^+b2aqY?9D96q({+RuPyNQ0#JT}|T0!S|Qv8Mjyb#OppzSDe7{3H(8y*g(4%=}E@)
z>!1&EiDV3*|0L)kVrwrn_;s8D41!DnjoEbbK1mU*b`*gD&;zqv!{@0AsiZSL&f=Y7
zwnRB`+|UA&03uQ&pM8|~UuPR#$Fsp{rMj0*YQs^9^?tNINBUDI1ihn~ru6YU^;>gj
zonFS&)rUyy^t_J+E&A%fKYe_ePPw!oKVNA1l4vv8yskv6;wz-)f@*<F)fWji<h*%y
z0tB3bjUWtjg(OYIsY#rhizM<tND2V+ut{Qi0h;Yxn27aoD7lbX1ofzJFE-gBBT6-t
z`=bJK3!9OUb^X;W31uO^F&KGc`R(qn780L0`Ecm|+v{ABd`rMw@@cMYxC)EejO4}7
zJg)_Z3Mw59773D$G)vd~Mwty)Pe9(>MJ=dySsOSSNMfVy3F^wx;*D1gM90M|*R3uL
z7=6VjcMb#(D^tL^Xn<_4I-QnSN#6MGYIS$IhH#e!oAUIhcsDtl-mW_i*}^Y=ez{wb
ztLwY+TClF|Vzc<N1T^BxiVbv%DZ=!@p&Fzdy>F`6{_~Dx5^tqF%8#T9XvO}r>eJ}-
z4MuzHYExBM&M(0(-5ynIj^z7v7-Z~rh49<ind$g$R*VWTRgh%%V2V|{@uL)<q-(sL
zcmM7{Y5)$}VkDOCc6qwX<+9uzz6z4242@m+m-cf_#~rk<&fcB_f768Vu`!pZv91HH
zuW|2|Zg}LDO!ygWvM5;|IWz<Go;p*^%LSDMPV9F%Vcpet*bEFpZX*HrA(gpKi%55&
zYM^m@n)fn5V}hxv|FKnn*Nc}2KE~lKjXLPNyHnalFFv~OM3X0J9Iw3MWyPe~kwQiy
z>VY1fSVS_iL|KbU#vf4x%dl#F2qY0v^VJxSEuJE~6v0;o8_I&kXEFWBp|aOKE`O(X
zD)E*;Qh9XCvlGRA^E+wQtd{Ucz~=xQ%==`?YNFa@$9U)O9tgV*10bzY-*04rzl6=b
zb7q;W#5e5{JMTCYVmpXT;gGm!lb+wPPe%Zn1?V17S89n~-$&vkCt%hyeSV}`rJ=FO
zH`tHTFdLVynq>yA^+W*bC@?c%&F(~E)0t%OfLxC~{NNJ{wT{qT(qbC+9m|1;0T$eg
zI4LoVJu`i=BHj`8e{y-HxFyAQL_0v@H;r2+hrpTP$k*_Q3plut;p0b=>wWIN#a9hO
zNkYi)Uyo+ly>c&1!-)Z#@dv0(c|q-?2-Aa_kC3}9q*@oee{@^fYzL_;)QgOlm3U*O
zSPds|c%v|EBK{U;5_#ueFMwu{!@0KozPThh+gs58SU!X-3OGM*SKVkExP4`1FLxJ8
z^qB`IwQvQu@F|e8T7xL1X`D$nc^^=rwt<eV=Y_^xlSfBK_S;rft46;swBG?3Qd|4b
zzq9uckOS}Lf%Mh8u&~hb{ms5ugvAKRl7vV};HBtyXlA81^O|byz9mE7MS^_qdS;oF
z^E)z^yPm2vOCQRXlr(tcjm)k<x17iRU2XmS&`r_}0wqZ@vmLG$aJT)Kt=`zpK}Lu4
z&rQ&4^D#+Fkzoe|yKGXY?m1UUUUaeZ6}1I&K*;?}eyTw#>Dw(nn?6iCmz8cWa((B*
zM3@_;3^|8#7t79<+uLJUN1`xNhWumnz|M?NRof@_i9Bcfm~WmLx>jI}j*R@s;BO64
zcVYhtsuE4#%ac;~gVj#~1!Xz#eo$a;EBY8O905XuJ&EyMXQ?%?@WWGm!@VTWqR?cV
zTBij*twe?6LMvaTCE!rxe>}jfyf;w;4F9<$hGLq5)K?UMtk+EJ!EWlwqcfFe^12#A
zAm4S3thJH2+^+V^kw}e^yzvy&DKO8Yl4rCBDW-=SsAsl89{Ky>XH(g9N1=64eJz}V
z3^A$|G%JdBNf+HMNyVcQbuSSjChvLPdj7*gUBN+P95-|aDIInAHQTtA6LtO!q?>o>
z<|cqALRRpB?Q;;0@$e`Gw#E3b6{3NHRUG}{7ne`By*IYI9@`e|Hf@zL0-R`1Zo-j0
z(QQ4i^m&Q;+s=JZL1xc_5>ePjo@KI7E}m;89=X!x75=VtcB!C4w>ZxBW9dv<u7Aq_
zXJ`Xn+e%s#n!Aucs#<ME5Uzvw-47vx^&aP>TB@9@eE>M}FCs8DBIQq1=`VnCmoLhQ
zYF>jR^d4~k*bKb$gKe+l)kmH9BJXHqyFhn|ZZ(hbvC9$~seZ*>@8+S`a?=(Y>^e4y
zUGnD%mL&{BtM`tjbO%#dHCZo_DkMUMICwH8uyf<UwacFXnFkGvMs83kGcI<yFlY}C
zl-^2Y5rynyB<%nZ+hbCYZbZ8aMfiHD-c}Ng_XY_Zo4>`xfbpHwt_zF9ZXS%mfdXZ3
z<2%&Kg9J*{+`Ux9N}eH%AT2+`5%O=RC$7pY%gHSv@B;=L{{0x973(Skpds3|VmGSv
zc1oOP0<cSOg`RZ`v^sRfGcx%Hs@123r5#UBa*8saE-P`BX5=5Q2!oEVBMbmZ(I^+L
zszThK47Ug5-%lQ3m2_F{7rxPh4yy3`n-<=asEFAk4Y`5GS_8B~v4*6!RRV;{m%T%D
zE)Y2-*;lgB8Ej8rC}@-{0wL4q6x*VQqE38tgy6}hh!$V*R$2{H8K|~Lid((NQ&x-D
zUSDyMD*<ZYQ~)`#pjx=VjRO!(3pmM37yYt(u>tJQ>Pj91Y1QfNh@hB>d64Q;;Sz_O
zD!^!<6W5l&>4NasgMsGQBx{(-xF#jE+OB(ynf`CKDQj;tdCw4u{x$dW8vP0G{zCYB
z6j2P7Tikyxok6D9h<lLSH>rF!kC93X@{{br6fQMN2_H|k?YoER@1f*j$nC4Igy8Mg
zpV`AJK5<E`FDM%-b!w4b?^FKg#BicQ;XGb$U^4TkR$W{i=kU`YR+Dt#cX4e_F>kIh
z&6tc?2its<0aQUR*;C_1F{d1Kt9h-4;l~@}@A_`8FO^MJPWS=BCZ=De->AZFl8jy)
zWIIJ!IDCD&(B`za*pWUvZCX<M4AV)Mx4zEj?D$8O<x;iT&=di8TDS@K%6)e<Q&o7Z
z6jbc2GD8ehnfkwk6QoK9KKv~r&KMVasw`=)iTOG0ex#5({!>~YvZQQ*kNhin)!r-~
zO=RDT2pEG>HxM8bH@_Cs+JW4;qkjIS9W;@D`oa@<X(B%_iwa^OwNuQ~LJi~)Q`H7K
zAD}SAgBqQ;9H5PEoQDcvlSw*ggkVg{x_1AJiCViE_oqHPzWEnEk3dFLfenBacN@&)
zu1C<4Sy#C=Apqpf(Qop{z!#waQtZVnUCrV-kPq{onfb)e|56nfAheAU?4p?6Nm3*(
z>m`K^0w}zHuK@`oN4aBf^<sGC(b)H~pVE8r{{-8=JOcQhs|?`o(X~$#C3FDC<JV7R
z#blZL=s_Bpnkx(ZH}NHraQCb2)M{l+V_n{d<q`X5XAQ)7#g8D9_{K(7xY~tE5PyuP
zrFzGCQm0-(N28PjcD0Y-?97=!N&Tq2BPB_~Ll<GN;wG$m^9IQ)FW~r`DATS?XqTVK
zTCA1|KY9KW<c*CWG%Qv}!&_gA?T+aF-t6|zKm9ocG#I4@Q;U|(O-Ts2FDi#DKC=gW
zHlKTV%X?TqjAEoGi}%pUez$C<x(;r92R!rw%})E{hH%7g7VnpDPVoWhG$@n&Kafco
zl`yz1h>^s57@rqYC=OX<m=-fWPd#UVJAW65v$Z_Rcr2Wtz%kU+osQTR>=L(Hp2Hf-
z{xfhe0cbN4oXs)^nR(^CJFfqFZ~VK^8Bz3x8>*(MwQX3fsJ@xAS3JQm9wD!Mb8k8E
zpeh3%8wLWeAAKF`mbn##2Gl+4KTM_x6T#MoI_PK(PJ+!CRhk|e9LWHHZX)@!p_xzs
zbUS8C9l`vChyWOGBTTd|XL5om18{D*%9ASkzavzL;pv;gKe$w7S@`8X{(oWcM;KpR
zn2`I*4FsghXpSfD!YJFq<99I%O-E8~gxo!Z@wBp`%hI6w=tC3bbrVDZiAENjD}{et
zW(nLh8;6WMo#v0>#_>q78#AIqUmpB=0GvA*n6O&e&xx_?OCVz{ODv)m`3Y)$Qbqne
z1ELgJl8P9paGqvVY7$j)|9#qE3{SDF2f<=0`o_W?-x+lPT_PQANAHDBhtfx=zVO;<
zo~=(S4XvI7ys7<nKK3^u0NJ7-6JzwZCT!2*HN6k!Bo{NuCQj<laE7&$WkBh{ZDuNE
zs{Db(EpjWLF<ebwfGJ4=sCE)G297TZsnLm~w)?Tl4)GDp_-Yt#sCR>*ctoNabwMuv
zPGV34E~lxB4Vo;F!|u8|^yeOF1=ujur#DW2l6{z?bNip8^RGwT{|C4GG7Fma=AmwN
z&?g{vFjUe7O7CFt<9QE9^;Kr?VD&@d5ZD+_Ka#O2QZ;MHQDS0T&>58T_6t7`d|r-4
zxPnB|_4)V8)nP`dNdEOYYUSq~D6N!}2Ns)e3An2iFgVacm{7L<rmH$5nZgaB$n~oB
zth(^;P)2++rrNkq;TV7Se^2@!LMoXz`K?8#2Hg0_Y<f}Njo1~;;UXaf@ZCg_<iCVh
z;!iC3cN7u4_1n{&?3xgwJyt_n(Ns8XHWa=6L!8t$l7A6*{Q=k#ofTdwNH6s4Ihm{T
zo81-`N3{O*SOz>c@3+eozDJzI2vWE)DnxGNUJ|c>H5myV9fs3LYLK;@)?dc0oe$+s
zBCp8${!4)TM`Qk*hWs;@h0uW<{Sm1;sjR?!8LADyWAt0P8oUKOhCxDgXC*yC+^S5<
zFt1{LC`QPTyx0d&Z?V9e+v&&TZ=Jm;sOf`SA-s(=v@)siynRddh%PaO_Pb!8H|D15
zDy9?U^rU^J2<oZC1>;(06Jbp(t2s6s(;XsBWu@3fOE>C?^Csl$U(QhHQRc?SaO40#
zv8yga4)CH2MA0xNW=*9mG?|yvk6p+2pN;;f5?UuzPvU!~jwY`=_Qj>Q@lIyClImJi
zx?Z~5>w92_F_7>A4ySM;l^m0psXg^su-9G$l%3hQq9nD0lP{SxkUSQV4iXAi<ZpAQ
z0}J3|4Rp{%5G>K{NqP;Am=F2RQH-wv(N#ofVhYDc0|WeiB&v^PwF|D$?gkCPA&(ED
z9lYoA!eC9;<nM!d5OO~rJ}Yzmp(;?q|G$=mP&VR2{&c{HshRkYWZq%FsabW@JX}36
zA(VPqZZ#RVPSP4lQhi>p2nx*`dxZ>v$!QP0W%c|V_VzZWj#7Iu$eN;t*Io)MJ_rp;
zf#W{{fJClhf=wI(bSCNW_JzNll}GG`6wl~uj!jL*he%56AB&09RsC5M(GIF4vmwaX
zfJ+(t@Gr?(5r+7-W#+%L4?q8D$3Bqx2>_x0!Suk3?&LoYZ0}&9C^MKkC4n)-A%?8V
z`(9Tnz*zo{FFo?YIA?k=xXzFf`#I%yYhB**lmYTn34#(Y9AvVFVs7~WA_AUp9A$|S
zP_~O7mp0`efJf|&?*%n)@w2d`dPZbEl+0Ua&q^|21Dz1JQh)v`(M4~ivia{OGa=j~
zLy}gyZ@9_-8p`}#xd>>6oRsF6{ezAbTg9#zXI1YPv$}?<U;Q#gi-+|RKrjSGetBX5
z%oqSZP)Xm6{N`qc-uw-Xw=ky=?Sn1FZ1veG8b$CJwb?SC<})ce$$xQ8L;_~JDM$HP
z1)-A*zm7X$m<U+F8uDovm^2R=L!{?}sKPCRqu2>eQzZ}S0g*MCj@gZjuS*2|pFU~G
zBGoy_uXbE$bHqq8FE{O42ESm9_}1SR`oDFR6a*c*Y^eysDUM#C;hzOh0s%KCssH)!
z%km&JEcayze@u{y?#Q+L1@F*HklIF$DhQ>r5LOhup(3~^+`kh92I3zb#nRBHjGv{>
z-2LrCbT+Mp&F&d8{c&qMVOrexOk7<x_a==m_}8q7g;2+1F-px=?v;f9&;N4zj_rq<
zdsVN~aln$<ETQn7(_l#E945{Kl<Zn>I$a4uCwhNSZ0R?GDmv-DSucxB=)ghRKX(Vh
zW2d0;d)PnQ{EAEUC-dzMABJ+wzPoZg8163Pvs=f2fMz@=Sz%oTGYCL{Yxi+4;-xY$
zzL;{TE~b|5VD8OT_!9NxA#xMrVodKZxP}a*dh!V#{-I&h@!tqI%otQ;k#8QqT-co~
z{eCTQk0kQnhfgM&y7nBLx!0IA6!k56w@QrHQVPO^VJs%M6NRbjjvH{Fzn0hCfhr{#
zn>OcAK(~uf_3C7O%nBB7%j}X}18UUbphrCscje_e`^%gS#%RUSZhl8dQ+7YPmzSLB
zm!nB2hTI~6K_K+XA%vjIqq}{Q>WOn8D*6(BHOzPsgKDGQ>fxXD{BnN9+iIZ>Q;Vn7
zwF6`NP+~#YyDoD2@qP=h@_^wI;dx6n>Bi5LkX`+wRNE)3AKw-%mAIxfQ^v7cEte%~
zWPcz&4wC;jp0wv+`_?l%Y9)zcR!sig5C8W&rfR!@5_uAD5-)4`8C8$Vd#-%c)rb9<
z3NUSK!Cgd6=u?D!5W9c>OV2*KrH0-xzFT9cE)+ay2OQ?2@K|g?!yG=4J1W3>p{iM8
z)3?Ux`T5;^fu>lyN66a}I1%20gxji)^fID|5-oaLtxtlaDel%nqpZE=<fmWIS>sHH
zQvu_1a7OQjh=1={emt#uFK<#%QGDrsA(T`0dA4+&)GL26ynC3KtDIer&Yf7D{X)cU
zrD75Mu8pmWRX4DceRIbK{ruJes%#>kD~zA+M!#YIszBZkRAoYhh_5P~RqxhRsJ<g~
z0Z#k>2dV$OmQ6yiSY(Axgot4aLn@Q<)>M4b@`S49un5o|x%ErJ{AZ14eNbFbSD7ER
zD+>A19-ytdDkv+EbVn8YOA-Di>X%8waN(fTh(LRt?}3%;w$wXb*x?E$AswGZw`vug
zAe}h02Zo|exH!b2Eg;H9Z3Xe!U7o!)H17sI8*T(51`|qraMs~c_&=;*M4ol_$=0L;
zb;&#z5o0cLYH)#X<FTJz&kriY*;D9L)WWpx=gARSM6oz-N?jCZF+~e945{`rG*$3;
z-!LhNtYXcHH{u9pJ$QTsqICr-$k==F){M#jQ#btg3z!NdH?b1jP1kBzlQBwExqe&%
z>zRqXtg1<6S35C_t=Qb5bUq-LP39dFtxP6IjpH`jdLBZlmV$%K2DYUZyTgfIt7eLP
zEdeA{8<19(R?4L6pc6V#q72WvJd3Pub%(W|4&#-!^E8CtS`Zej71**R1#!JvS-FDB
zouS>M>p2Hu(t%L#V>zQ|dc!}%g-$OpA`Itq&NP>p0ZeM3tj~vt8hg1p{O}XQq=@N*
zye6lW_xoO8XIGG;=zL}Yg_Y&&&gK8i=zqO{GAs%yllul+3+FK}583I)7vZjA8|?S^
z;pOAyqndNd>KpVc_P=c0TS@McB!7%eA3^K9)O*P$@Egb+DyUI(2CwK$KL%7v!58Tv
z8I5bXy_T8|GKk$whqM4RXH1k<S56yit#K`=b){wT4`UO1S+JO2Q!rfR7Ad6)#syu>
ziCj%AlO-hcItS7Al%&&hR66PAC_g!g5yp*MA^+T>B(zZ8C?Fz^p%%_;qDlH(j9&tY
zRSLk{#jWh8Sl9{wXW@1u5IP!u7aQ-Az_oZ&+W^+#5$4JP4ay5j3RA8l;`OconesM(
zdh}6y@JV>?8S4;9GAKIUT%Fqhtlf4Npgo)bpV|@f+*=F*Sf=+r;_l+3zYzoM7+KgJ
z9_v1P_h<yU4F;=KP{83_LQ$t!SRCV!*IxM@T`Y}Eaz<qV2yIBvhm89X??7`4z!^Sc
zP)WXO-T>8pd18x|y%rxyC{$lY%JRAMNwQs9`rTSEMjiq-7$gkOp}o=6&j1Dg%7IPu
z!vvCR-`PWUMT)#UvX(*{1bp(-<L1NRtwx{og9BhQGJCxo$yJ8*LT$bGr0}UYa58y`
z6l9dNVUf4E#^r~KjeCRZq#QtIY=K1UB)M9S#&!Tq@P4Avb-fJu5ETUU8p(WF(1doI
zKC0Do)$GyL?IWCOa5iHl-dV??6i}(N#`p>D=14r9_b%51l1<Jc02PdY+g?0?Uw~h_
zW7~C|X?s}1#_k*$P-%FjI>E5~6rYUA8EK1(K9g{}@NoGJtSyVD0bA{k>``$Bo~1Pc
zPj3IdcMzS90j2V3#0(1Dn!&Z8y=Alh4zJ2*nrW^DN1t}=oZLX^4OSW@2pDi6@`Cw0
zVfIU{0f|BBS3uCMka2HgB}U^s<t8gS0bt3uDS#B}0aU~N`r<eSNk@Dy|M;uPH?6_2
z+IzSku_F2VT<tSJU9|PuUiqd1i!9nr0J~;MS)8vXsC^j968~;3SrR~xY?^O-i-Yz}
zj1J`^e(ZF&rv%$b8h<e)>v~}@i7kbw$e!o&^tIr8(MRCwd2rUQSpslF=DjScUc@Vz
z^Mf_in4hR{DqCUrz0^lyK4*KIuIt04L8p@MfI8ay{zp&IS2Y=Uv3DKgq-HU7CcXvh
zm#YtSv~rp^M}b0vSuIONPSTCA$_r>X7;{SE3M#Ifz^vEYe0OY{!>Rgt^4;-Uw9F~+
zb~^Nv9(AR;`fd_+W>;z2V~!Sr;EX@&7Wv`mcloSukn^Mcq2Pi~wbJLG8}G8`0dJau
zol~<B#QkI-@g7mdg4W7DaC2b7SE42A+!cB96+=+`#I10TN1Y71n%WGs)D&ELH7sUG
zV#+D=1@I;PP5x6fP{;0M=lkgu;1u&$Jcgh9tmuHXl$sxmO`6=cQJ60BrxPj1TGz5p
zNIsfMQI4WCXSJMl<3VE;_4SI4iX6)!U_3tF%ldu6vJi!Jm-XzbFk!(~9jms>u^p%T
zy9>}Y?7X>eCWm(9>VPkK-E~J&->!}dXI&L?MM3i0opdlcICdbmg^ont68ecYojg{Q
z&T@_oV~S?%zCzGk!*86`l`E&EUhH^W#{|S7Mh+?sLFD60$|B<&x=U$tk!vZmYQd8t
z)Fh2Oxvw8r<CUc;iWHc57TXu!8MpiECWKnF?~kiVe(i<*Q1e5@>$ZtR(P~6G3URZu
zi}?YV%;M9h2il{1q~hj&ECFSB!h{UnnNlV5!taZQm_l5te*p%Xt+T2m=nU9H+pt@z
zWhS{YFjxECA1pQi@g@4j?!2!@u~uocv~EI(M;QtFWlt+`SvXg>jClAV=7|;GW~o)@
zy-+x3b@+bA2Uoyloh*lTs-E9E1<3L@V5$3zV*dLE*x=!f0U^7YYSp5h3|hMeVizKL
zr<@rs3NlsYuH7r!VbZ;und4o^tJZYI`sPrN-F}X+5_9PBkzPkE!P|fWiL?uv+X@Jm
zAM`%v-`|hO{2qQ7Pa`qKp9SbQexJnjTv{1Z7g?U<XIt10KSuxsjOWKzDlNn6^6L09
zH=DlA7VLL1;U~LI3S(wQR&%`8-xAHPpBGW>Q5H>taMkv0LGaGij#}`sr96w0Y97~{
z@@%QwZ7)>FeqIc)m9u;Od>g8AjB4r}_=+9Zi?e-Kc&85E)FNRl_hBRrlB17W&^@@W
zi@gLMCG$A@o5s=$HAvnQdM+mT>gvozoyYAJu)Wg)9<#jZn!k0ZrfGWuT#gQM)>Mvk
zdPNvDN_<fWN_X<;EIE~6l>wMP%<Db|X&N-_Pn;nW%^<~F1F_fiJnEQ<LGC#L<hn#|
zs7cghMfOSlSeAa<+P7KBUcB8sfo3eR*hQM5x9Z>%;|Ntid!OS7xep-n^lEM2y+)@x
zV=*A+CG+qDTca2?Dg9KTT@|`4o~~m-0QNqO6;bh#=L>oiw#z1U&o>;QX-t0rn{bsy
zFaSrQqjH-ZL}WR{!gaulW31swWBBK^Y&;DU)r9O72avC<(oOQ{%-AzqKKd3>>mp|)
zZ<v4}K&kds5o+6tI(TwT|0u>cQz^dBnkcqy?alW2$2888M+5IcxB3yk)%^!A#sRk}
zK(1?h0s?M7knvwLYf;<ezdGbrlHUrwdt0lppkU3IFkr-kKqY5T&adjnejlwP;Ky`h
z1k_UT5jTYrT538annMaR>ee((>sj8D#$!e@d5(-TS6;uKBca;Ho!^j%c{G=Mt3q&M
zE4fdgqd8JaT637J2fM%?2V8uWLzjd1)VzWoGCqUY{V1aDPj6ACdOsoMgL@7=&6Mkp
z*(-4?3fn4<7a#D141rUGg+Dq~F#VcQSdWyPT!@h!YM#&$gTE(H1E3X8i+5);J?7sV
zQB1t^d^7E4-W^6z{bI0rH8`OI-1qI*Yzjn%j3#)F4enK=B3FdM6D|rFpy_`yS=U@W
zjD-D={zko>_3TY88CG^KON@yyoQfK=I}q{JH&Y`sYW}18;M>B%R#nE1yziKafXO)o
ziNnKVs?|L1<h0bndi<}U<3^MPz{017M<;{>cX0Ad^VS3BYNElxr!yZt%CE9O@%IcL
zqc#Tf*dtSeu?KmSM)HAxDi^dtdIxfFiM7CVGr_8OwAQYKere%#Iy9uxbvLhOH>5j>
zvFJ51uWb{lzEg(=`(aj8YeC{BbKmcq(>6_uT(%R5grra(s#mnn<+C-FO00dJj|D;x
zfIe$wNPdZl_|l|5Db`74<ph>xKdFPhJo3K!kg;U5^Yy#l^*B%?n?l~=JvHWMpmlK7
zV9zF}x7A=Eck0I}1|kDe9&D}gwKynYFk<NDxPB;dLPIcpYRq=f-c+a0A?41km+vwk
zm9+KQ5jqk2t6z!~Z}qkg10DJTCe&Od{MvG$4p=pOA<SA{0Bu5Lz}v${kUjL-yuHCy
z!6}%M#LfkTJ^P$jr;*-N`?<pN=ViAi^SzW4F0euEQNinPed-oh*w;EdtcE>U$cza8
zkgB7^t(bf;KA*6LcK<ym>PYpZ=uhEfF*Z*By^m#I>T%ge4X=TL+l)c3EarLP@=C?*
z`UdEyUB^#uDEsA_Km(6e1FDpDaV%er)fia)1Iu9DtMf4C!0A^g%cp%UZn33l-2cfX
zJc+*GJylBKY@tZzkTkViMt92zNTm+8B$wk^L|CX+0=fxc%k!^?uU1sstxw8{l`M&3
zC<V)mwanmXYxc_uKkq-!CHQjUHq-Rpms)~eeyt^;X8M~A=HYB9v?APP10+dzcfir{
zR@|oBg@`uTH3(b6(2akv&*&xDJCZK*K!lkxdwqJiPcsnWDe$M$cEurxFriypU)HaQ
zBYM)>j)HJ6MVKY%esLULogWTreR#S7Z+&QPa}VjuQ;Uv6_Jy2{cpG<Z?hHs5Z6o3z
zX3v)yd{p9hoEOU}d<UA0<$;To-s5L@SqX?_05j`RoJj6Q(?<u0;hob5EDy-F=h69L
z$M)WBIDP%=Rm1*^Pe=|0mIXaRy*s2b#CxoHoAr&GpMRat5}uIpNpS{H5Gw#x6zUCP
z@<)#=p@J;r#`5=v>Ts^cRYHr=+=m3(nbqM8a{W-kFhpzllj6b?xB5Az!wh<l?Jat%
zeqzGFc94O8!=TN<Bh#%Z(QkBNP3C`6@wRGX;6nM1@@RhHyLY##o(+ugy&;YZzHfhj
zoSX-SDHJPJ(W?6UBd5;Y>w>;kPBP*7dP<Zd;k&?T$X(rtQ-!cUojo=!EzM(M;hT}n
zy;MHXdD!bI$ZB%_bM4|%XP}xaQ4Hl0&YtQ{vLvJ{btmQ<8Wbzl<L=o(cQkY+PhIkX
zVQBMRX%q{Y-htTTCLyDx3(~NBQEd;$TV)ojKCg$3IJ;h3#S*uG!f^S4l&HEekj4%I
z4sU`Bl&>y0PY=-VUie2$qx1ovTl10kkWkw0svixv1LN53W3^GYs*{6too#`8pxR>S
z<q}d1{Lw;yks1>&g5p>0$6p=Zau&<S{}|6?h)IINCM!w};?9_}vsGj9)GyJ>!eXa0
zX2Wa7dnu#396OX^1y13Q^qepQ_!~cyT9*A(%7fJSz(-%?(@H&Zj*z+%oa6;la-uIL
zX6q{3#R_XGQcBGmFCZNkA|<iviYJrYc65teATp`?J)YHkW#9IU;<4M=-qM9*=78_%
zTs`V#pGr4$Eb_XAlGbPMfu@d5$_z+5J|U}%uA5wM&`~KWyuez0tB{4GCl`lA1SN^L
z>UO!$lY3$PM1)@ma@L~o6mU2#h>Aog-X~{UJ$juz`;8U!lf4vM7482XI}<DSDxdzU
z_BjgY<bxS&apxO0*+el(=?UF~DybIs^b_(`&VISD`Cv-5Y`k(%Krc&4Q5WG*2{{#;
zbMc;j6}K|a2|zjT2G^qu5Qcbh#-oJ#uq4X@I(Y0ZYTirJw}G!&)Qqopd4SMi<Q(J@
zI}ul7Hv{3n<E54f_XIDsZP}~CQ@D&IDuFlx<@3_#FH82OyoqGJxX>ft%(`JY-^`O&
zMeEyJ(u}=(;c`-zcO?({8A>bO4KdlbQ#l)vu!~oQ3Hx5uoYYs*JywxW@(*D8zQ-i4
z2t&bmv#}*{zDhLCkDEwyDKbt*{skTS3sh@bq=ueLzbGA>oy#?xxpfzzOugiD1BYGR
zV@$ivhpN2nyM}6F_&3H%N)aP8Ex9#`j#TQc&<t+Grw^%bh?Uo(jFy`TUZ~5+YB2BN
zJZ@fqX<ol%swH6BUAY&%E7J}6G3nFMQSMFTK{M30)uC~XYHTijO7<&K+LC@%NVc<|
zNlWd4M8KKrQtx33%{3TPQ&}lV^^UYTw%A&QtOVd{#@VNUPBnG)YXXQk4n&jfXIw#%
zM$B`sx9?}X&ib)W$>~eFME)n-3DZuf;tsk%tfvac2T^fC-jc}dhZZf17IAeo#kKRI
zy{Vu1&s_n?MXRcj-^P}C9b5t;e>GoOK0dxyn~u>B&jwX+c1lLZHnK$wWCe3;nF*Kv
z7b3hHbT_*8t0gzR$LWj`L3exRZ&_WTFM>uRArxL;>JQd4rX1=ox3{QnDjJH;Ko$F#
zgZ)?^uh?A$>*<b^?<mm83O~Ha157$GC9kDQ53={Dqr#+%0!VzVu)nO|-7XFhPdi|`
zWh_R%_q}PB%dlC985?SgpRxcDt!-LFNB`qQrVd$<@?iG3FzI+6w|;mN=b1a*f<eFU
zJ9NIt{L4fEEsWh{5}RH*Cp>`p4Z!v)UU&iaw4ENZDYXVlP^0&sr7gX!%WK!;aD;Ns
zK6Tia@*}0l3!uLw0qF)&0Zbm_W5iE%P#v<wnEhqx@a<0=Yqma3caJg+9Azl98yQ(?
z8ccoDeVDu<;@@6cKR@$gy2^qa^bQ>A58*_gbtR-8v1*t3`~Z4Yu<<}g0!P5>F9{XU
zN9_fc8SQT@;3y1`(O+8uAW{w&-Ti4Lhy=Y^BJX1B-67{Op9bE3-mLKlpAN=BLxb>T
zJL(<#&?+lEkUV+ld(62}3pmc}fBC&wug#grse4wqw19IvI2QB8o7lcjkT^btfIi%!
zIpw{kj|DGHd#$Vo-l4!yH=HJC@dUG&?6N>+-5r2v^T&EMTPN&&Vo#03p4|S@BC?fT
z3s1<rJ!p-q&djD7XO(S`Yb;Kx6Q7&+QoGbXC-s>v8L#{>lYmUP31KRt4Ltbp=*!)x
zvB?XtY{nM7=m|WJziQio6ftGJUHXL<(1ZlOf?}Txr#`xamBI@;;8-Ba8N~TEMfd@l
z?wBU8*Iu08XOE*1nPiNSV)4x+kE~T6JNyRS-nxo=>sNwdtoCMEaMnrY5~!RHMwqjG
z9*a)TRbW`o<2rOBg=m@pKwzWWkz06m>Cj=Wt<eN4g#^DAh<+LE>k(-JL1o4+lJY#F
zlNuzag&!g5GXHLReDj=T&C`s1Z9BHxRIe!oB_gJotLzy-lqW{vt@g_^Fd1fw7SJeM
zheIA*G&N`i`x@x)^SWYV7~7-_sEHpxO$RZ;1XCi2I*cz;eGGVmE_6&xOxUx8d^j6C
z1@%`EgmHKuTvUvvNpY?mcmp?<N6vF@DhM>968C?xj3X_sMg?m^4_3J^LNv7|C<Z-P
zLB2X2b1gk+J03c=HUyFpW!vkcb&zkcXuOq1QziKlxa5WcsAw$py+0ijgfUVT{#Ys#
zDJG~NX-)W7Jlw;RQ1MnBZJ&JWwj-ZpiNh?a_S>(tX-%E(=5bZdY`Kd`x&yZShLb?0
zB+rotkH0ZUsONHz=gA97PtA<pF?ZTA6SYSzMS$YB317LTq<5_5`_N?1f+(r&OW>W~
zXwdXxI=Eos!}*{$kiLDbSbw`wb*>x=J|8n==?Iwn{TQ+3$T))R1Ho1p-mdP$Tgi!v
zI<Hr9^Yh`mfO;a0Anb|4K6-u@8FRxd%FA1=jt<&tW<||<aoQ5l@_nX$7ZN5v#IDPY
zYqQ_jg#ebVAtz4vAprCMpD3!1S+gD$TkX=m573pQ4xAK*U-op9A_SO0JePtq$LsdX
z)<hOBRbj?;<&jokU-l$O97Z$w1DWgltIq&j8GR%;SMv*?Ti>k(97^i518|dl147g1
z0Xo#+Hs9=}$=E6}TEFn>qew~BH~`ArS*?zoJa=^dOkHLc!op~Ms&B9ot?ytvi$?sV
zewc5sk_xh;sN@MwN)z9cj`4SDmvf=ouiF5`bOdhorP3;23v0E!1QM@t%aJUH1`3Si
zn-ch-!Gg;m$H$s|5SaBqaVDo6h?E_{V`0!5)Vo)nnM;m20peCOboLzE9Mu?;ydjdN
z0O{zsP0N>na+{dH?<lO3+NuARHIVNm0Vk_o{|Fk^a%x4`#B`f2<Mn6jr@QImwpK)v
zM_**Vzg~70s3f?1A)JJm27C3@|G!ro4KTK6YKt+5nI29Y*iTm#G-Q-Nt1s&Y!e4Al
zG(p>mpD#e7*y7>{^uZPuyyinUb2r=7BhS0#KM(?Fkrji@<f-=i#|1+(wRWT><qIIA
zdJejRw82N`L*AP0)j_X#=MrmMMvpQF-n6$>-j;QnEma0wvx${l{r;+-A*RL`2x*h)
zMiOkjJmDzJL}B^mPdKV;eGTCo=PUiK!?ffV&mKzM{K%6O_5;Fs3*$x}neXe+!6&VD
z6Cg~JCig#Googq|tZh46j%=#v&)~Yw((pOYtOuV)se&T2v}FM}g(c{W?Y8H|NX*y|
zX9yqGB#Zu3OQ^_|P5=q`uOJKEXaSlLMJZ)ajd0**A$b)k2tI~I4t=$d`AF)OuN5(Q
z5%A#L1mdbj)(9tbDzc0TBj8;<4Xy}fKNQ4AvVW9j?T?QDlwwr=qlW=gC`qcJ+RsAZ
zzU5hAo!z%zvV5BWB0@sk4+dPPEmEi_3g4$bYL?m>aYa&y>K7uu*HPdb8Sl64ts!+&
zGEqWLaN|U}53oPLJ+jyW>&l~)mY93xtM4=96;4`bx1O!07`=MWkc<0HvJF(+-&SO_
z=9_-=Om_2dd$GC0UitGd&mK)Q@6kE%C-P+hWo@C^{&G*X`Frvxs}=<h+(h~GuTEm|
zfZUax`L={wLr<HdNvaA??58iq^d7*NndQONCyy{4X4GMY?M=n~V6d^S29r6`4f5n(
zt6(xaU!3fC(<EaThl%(8AZF8I_xKdgSg`15eLV(*k}Ds^%t2%H*V}h!d{CGuSd4s+
zH4W<kB)xkA-qr%t&S-PA0HFH{Oeoj!jwwS}c#U3e6wEj_@Ao$TbQBm^z2%_0b_M!Z
zbfYle8Qk}O+atQ$TunkmTm^{Fr#RlRb@4a5K*Fv2Hm1vt@C#v%%fFk*m;rL1qKLg$
z%A)e=GwLE^^(s3+7aZ2GNK-o*e~QeUg4Kx-i_T>M?fcg=<7Vwlukh*A9;A(~<(vDu
zTS%Wvb+EPrQj=!Hyi_6kF#LDZ-32D2xr_3h3bn&ai89MB0+mX!^6&7&G>q0y<r_uP
zg)&@#dSc*7ugfN8`=(bx*u3PIV{p^cJG7?VT^eF0tlp$?LHnsMv@4xu7%AVr1*c~T
z(CU0H=?pFZO7<j~O}EM%Pa>K+?&_yP`*UdLh8bKnpEjqqZL1TB8_i`EcC*IlIM-Q^
z=9X)eE*d>K?pyWen0jIvu8(XDO9J5ew`Omh$;Nbw-UVl1Kx{UVBs-x9>G$C3MR;j(
z=Z1ASR`!@aI0WOLc{uSzt@eRuCDrE+$EwEwW*!cD_ydcD6<3=g?o$b9v&wpn>9SLC
z+uE-`9NF?1@bL3nP)!V#0URy4t!gwI8gxsZ(+3a9yTNcEZ_tG3qpay3cFeqgh)i{K
zD->q@2i|4B(j({P6M;tN&CAnBT9})g52|!&8+sK?5~+!e;wAHXn^ZmP*VpbG$q=rN
z0&pGsLz^80Dry`WE(M=ZUg3M1eY5Gn>B!f{;!j9<jUwo?pYXUs1M}j^im0f-RTpD`
zp*v?!DD17@3~MrU(40~ECIYGST+csxv-7>vTA;Gka%Q?4;NIIIF{*1vCQ)U_)#1fY
z-@DwO?|N+37p+;ruI%r^l;L817#Lr84lwLZYbzqN@J_e#wwnudhX$R@!<NXq8$W*#
zMV@k!fGR`2apv3G-x^;e*Ep=`CIIT&ZDzzw{4@6<jAU28W`M$0L~f8RI4xwZ3!tvv
z*yJf(B)bMezA8lk0lW~|`3HNw+J-kdvd_QP;g!uKW;fEh>BQy|!&-}4qTvU6R<3g-
z5)(wvjJf+|A5b3IPdOX|lC;?^D_(euoOB8YEqFOU8K{uNntXiHq_!5-G80$;gnv#L
z{<7pnRKhN+5S+SVv+{^@Zm$J$`yBeuM4Jeip>@zHr$e#TLBjPoKaBKpl1k%I{L1Jo
zRZ(}~%T+I?0nvBN{*0d^OirA_=Uco<XY?P_a%R|_(>bI}cl+#i+H{=tDR!R@55*if
z_5ZDEe`3OyMc<X1(Hm8lyIO8BEqlK^ar;xe>HbC|BA|DmJ8_F6e4%wE>mxWpqHu(X
zx*ao-?A{wUKyFn-qzrDoGUkPZ^Qil~{J1J9DY2b0vqZ6++N~nDRF3HKk6_f{P~+2>
z7an{t&4&u8HitK`o@H-RXzXjp!^~>u6(|eELgOtbDyPWZOjL?bIbq>f^ZTeZ4+6&r
z`2gWus4>k6yJYyzuh<R&rVI#A&jh3bg)kj?rBr8k8J(Rx<b*l)r)0*?0)+i7CFWj>
zt66dN@IJiVgEQ+Wwf-4q(dCE6+M5zV%rhjP3l5>aK&i)KJQi7h6$d9U#56Yzfp=m^
z!5{tAzl7r-+3ml7<AD!~Ytfn1+CP#F8Hmp0Ha)wY7MdNwNJkd0HxBA3c&zrvfP%I7
zqfZHep3Re+zGX%_IP7Np<KO_aSZ&sxU28zt(r|@Rp?hp2Y*vkK;{0d#^+KC<V)#hC
zUYccaRsSPr`CpIl_qo0if(Nln&$|0ZiTe=Qsvzb9o-&LJFxnAZem{Px0t$b2`d5kD
z6~<m>B(3+4Z9_n4yE=Za{^1XVC>TSb9%e{+Zspo%VCo2-TI-KKG%YarzhtNXes)49
zOr-ua|E=yHGM1a(39y=Yz_dRJi09b1g{wzcaIV2Sl`sOu6jEdeHwG+oCP-6y(r+cF
zwTR4H7~g=g4wpfY*oBWLmngmf&k@F{R!Bk)5@7n3zA5BPfOd?EjHN8z=;seevi@68
z4BkFh85YNkqmnHl?l>~1l*H=%SoQ@iWue@kUJmg8|9lP8!zA%0XfcLag_V^d!;FC3
zZ*=I37fk@<zEvHi@}qx$+L!}~GR-)Q3jb({>X;C;1NZ3bIN{iTF5dn7X8-jaQ?5P1
z_}Ih^IcNx|G>jXh_`=-U&ttxR7I^_WJfCA|kSG412X8M#vbzzJwRYOCE~{-QD|Pni
z+C3N~W>bsrgJC6eef!6Uqaj!dk2Nh5bv+rY`k#gQU%&qU{*jXj##=uz{fx7>{X>G3
z3RwD)_nuqIL9iBtC7}QF<BQ1IZdLgSd`6@oWpv<^0t$h@^mD|2N#p(_O_&P4^)0go
zsenMf3I1)!l&dA?Nrp`zNMnJxx4@YNQvGKi{I9JN#)I;eC*6BkmYhAoRSaz1XvfZ=
z55QD}@Pn`}Ir2v_Ql;jzctOGl1dQe{qW+z;|9R4X|7M9GtbR>}C&FMzZPaziuXjZJ
z<@a<cN8)nk)wWp4ZG)%d1#Ho=ajjf(BQNNycyN<Y0I&k)B(d4;EEv^_|G);cJrFEL
zeVVGYRE5KUlx1J|KdTvrVg$L@YIy&6BW70acK^Q6ubttPBq6-OE4;VWT>kMaGYSg#
zfQaUf0)4~lBH6DPY*mo`XEtLBPqorm4<>#=B>>S%4^E?_<vgdlxxs;hzs+j@8NUB{
zA(e|CHbDq`jF-ZIqA?scG{%@5v4x|6ra-OW9md`zfaY?0RShHa8bg6n@%uywPUtL-
zBGiM6HI5{Q{&#l^QS4ik&h4HSHNI1#q@=<Kv070eP&}jrQxPF<m1Z)M%J0s_HTZv5
zEPlo<j>!OCQw$G6piCTgueU2*aS+aPNexesqOE1r=jeBcWrj)0a3aDia5Gyl`o|17
zV5W3DeTEMsi}-e-;n%l*?S?iJ!!u6YbF0QC<JO(&fz72PF_%&66+aO2)6(Q)RDDx%
z*V0O+x^4n^;N-yP&m42rRwG|uq3)yqk0vA*QH-GiG!f?D{tPv_>M^Z+^3MnNIsPsj
zZ^Ac#^8A{gpYQte=;wWL3dRf`3nguz+q&P1waX1g6=G~+5!9GrYf+>wei*^t?2^M_
z_{-cZm^zQ7h<V#|Gcm`R!KJ&#0{xP7My~+YWExIZ*M-Pu9!tF)9J5V?w2#KOfPiWl
zKJbZ07=Q)0Kv+gYbUlOPL;OKql+p{&5_CGMPq8<&wVW4LWRNT!qa*fzpa^uZNHGXX
z+vf<m;^lafc}d;IkZ35C+O6V1pF3XMV-X4z@ml#C+!B~j2G}60@aL8Y2v>+c`n~0v
z&gwq`PhjZ-R2L+X<;u3-%ET!pUlfzCv3U<K_Ekx;DcxBXk4|MKs@$Ibh>DJbVQpZR
zC~>XO{uv6S9opwW(#b)@fgVqiSdf&PL^Dq3FAHx!<s^8P<W|M&5AMUE=^|{DfAZvs
z)j6eO+P${!BtAHY;<yS`2pvj2;S}gi8wKssBLJMyVSJ1pACg}S`r8=d3VY7r#<w8=
zI3w`=M5$il!@(92q{kG_;LM1t$To3?qXv0P=#=Xsx5+X<t}(<HhYcE=kVlXa<e-Zr
z5L1_@ii4P4c`xsj-nR%Hm*NVqjeWNGUCSw8?F$~*@M7&7_U4$+0U1y0`20GuvB?P%
zNy;^!5+Bb)g_VR@NdhI>OIb((i;aRFkS-7?KSKTjNiZkpC^C+O6|wE0vpmGKdFoab
z@wKLWUJ2ybjZl7Bi6TB(!{-9*GSHI`{D`W!y`H}Ws*7Zb>)t0a=+!z24d62}+I-mm
zQ+2k-TNdxB#K>j4h!e*g0wWOhXGXMsyCFeILWJ65u{B4|UXLK7Op8y~qMCf3Dx<=x
z1Yh5vt_JTBMW&a-Vx%Fdih|wOlrYB!62Y^$!nA~CpR66aTPO~&WF?*}HKD@ZT04$B
zIP;IaL$~|jc`(^m>iLi|QXkq-AM`lBUx<v`_g6Z@*&JYj2ZSg*+_uanKGds^`s-s!
zVV$fw;bOnG3?6yn1YvBA(3uWT5z$+WDYH-!h;MT0YguT-U&6);4>BY$-{anQF}+Kt
zzYy$$66cX44yPjBs3ud*iXuWm(HZH#qd%Qp%!5uw7O0ZaMyr<+7`Y8tP2nHHL&J_4
z$;nJgdu-jPSXow~IOO!+@KG>yK~k{aE$5uV!d@t^u70$r(aCaQu!4!dk~&M?HANxG
zibc=?B~*ycPen*FVJzl@pLz4$I@N?1#qZxZ`=m_cFqU-3HN>NrS`NjV3Dpx#0esiW
z(~4(JkY+ybyUZG$GrTOH&L)^58#KQTL-%l3gcC=wZ7|VR3B`8j0+X~m6?DK#I~>O#
zd3~w~Glig7*~n$Q+=Ft;$BxE!;GZL`C+8v&jxuGT{qjDg38d=81P2d{B$Og~mPx{V
zJjJjG3KVqvb;BmZ7y4q^4#n@Ruq@fF8##7s93<RHw$#4|rND}7?#T?;M(`mpPUm9%
zV?W{haZ(HBClIS1$XlSw65Afb;(N96HEPbjBP#Fx!VP?ZP`KHm%YE|!n?R6~emmt;
z@bE)I?U_Y#1>SxNgQm@bATJ0#7P+%8EZ4jUXZrT1fTw|B2?yZ;b_x1Wr%w&c$LFN7
z!;FqLeI&65_zd;<S)^oh7^9Aoox8ybnKI5x6-{@^o6+K|yeRWoWwSg?m_}~=U>~-k
zh?ZC#I7hO@Qa13Nt5&~u#i}%7EW=gdBc`#dUf-1<WaeSIy9sGxvKptiEV(W1t`1ZQ
zPLzVEqp`iTx=ZDRyh2wU@j){kXjR#JD+J=NYVp1yzuW|=)o{i}-gaW@v-pzvmvYPC
z(h%K?aKGiy!`%vFQ6OQbdngY9Szrq`$O0AHu^?5aDga5ShdG=Zl;$j4;AnAsNKeO~
zetfp@?b8j9PVYAAQH-FnXG7PUr%%%dQrnF8j0m}9Zne$HcF-?0uV!AJfwtgN<9nNk
zwp|OqRG=zqj`XZw>!X|~eO4bcwA#lFUxY(ECFPccf2Svs?}qviy&@iII`P(h;D)co
z<kl9lcfg_1`8%v)^K>b|3FyAqF#1DZF*ms$Vz+dvEv*ImVXZ3nTi~C+F`%HQ{b?t4
z6hW?GSQ4O*M-CzA2?{DjeUH_M5|5sW?S(@)RQvGOz3&{cr*u0o3)s5Uz=M31J~|>g
zLFuTB{7RfLN*BR=i0pEti~Lxui~en0NSyae=D_8B{ByDwM6wb!bpaEB{o>)zuz9fJ
zu|}H|mTku|-3_5AvK=t2U|PB{!!d~@*jKtSbVn&Y878R!@p9Vr(3BTJ;oVzVew+@T
zWK}d{5PAo8da8`Ypd=mo7Kj(o0asW}0*gVugSQl(bda`WSVKpw5#0e5hBiU?IJAn`
zg=^JDfg8EI*?s5h-?RghtcC8y<$R`eZzl<IWralT(~J(7I5}#(_I)z)aPI?^wZ@E$
z3@az;9u<i@Gzze%kX)4x?_;rLFPPc`jl=3xUu$?22zKYr*?FeuaDYNU!Nz8S-Nyj!
zo(Nm<Jf_IjUCU`1bS1KL1MU8BqnjHZ;0GfSG-_Lj&uEB@+UJwJ&lutJjo&M%#-pSz
zzpU<Z@yc{6Q8jx!v*Ble|0Ik<^uCmvk1_9W+y9*RMS?Hw+Tw=8<Usg~EfWDP2IV(J
z3F7YYa2|6$Sqb_({JQw(Qd|)+FJt)>IAyo8x`KUF68W-&YeKK^_SeUpWhABs`*!W<
z5@2$w=t*WU2UyC3)xhw$gqSox<P>&YQkbqPbsn1ZQ{wN)Kfz?PDG4-}UX{7j3ya&r
zRMXhQ_;}k51wI!FBQ%m2rW8J(GD16e_sbIQ%y_RBH*HD3L^~>j!jk8C#Mp*yziA#~
zFX3YDb|WFZ5*337rdXf!4K6{MoN6KtHjGPVh=nm~Y;V1uR`5hRfB3CXQXsk3lQ<`f
z{=2cb=&51!s-7dE7?JXHxjsBE3k!AxrDcV+h+L?^19tF^S&gz7T=cxTg-*wVy-HwE
zm+owU5+CA!{{QIu3ZOW<W!vBo91<+JySo$I-Q5Ov2G>AvcZcBa?(P!YEx5beoBZdT
z_wKuOZxuB~&D6}-^G)~Oy?gaq&Aet;(ZS%8LN_CJG6ZVB_WuoM4!8<;)LDb3<YySd
z84P-BJ7owU(=!>L76jkX)sZg4CK3_{DFR$}{6`Vsc7Y|1{<*s2QP73$xqaD@?q$qC
zz7G}4bu^H7ema_{UH(5>0G488JtS~}5}&$mK9`$a=baDoxT=C$8V@Y!7EFAx+dWB8
z#X7<ExZ1Pp(-aLaQ(_=5eZFW+nNv4|AJE-^5Hxo^HwJUIJpTdZ6FA5^k?Ky!Uk8Mr
zlbwjNzk{OB#<FVXkB0=|Kty2jayfQPI*}Mb&35L2%a*=k5Uzha)U<YbH_epfQ>oCo
z|D*D_M0Y&O6C;CoN4#dODp8Xlg239YI7(TFBB+1x8Cw#w*N&h++X%YpI*EIh2GB1M
z*Ni6cg(3`0G3N>NW-UA6_Ly(Gz0sb~?`?#Rw+BgEgTXNrL`U!7%S&CvqYF^AJ@VLl
znwX~RqA^269fa>$N6O6)s3M%V0gq18ZWwO`vL9Gz-Jf*7Bo_fS7kYqck4Ilwp0>d|
z<e!Ax{+>$21X;rM5m!EqCjS(B`Ca@nMre>_L@oec0Z?%S6;&lc=V_TAMvQ17#9$qF
z5u|idSj)=L1fdrgiVw-tfN*5P`m|~AZ7$FjD`>?RPf+oBgQwkg7N%rmxCJy(G3+?_
zAxe#f8UY_Y$#20!OnD^9XL2B2jRl93Hxdzyg37EPO|%?O1mipY4@gHA2W5U07{>|w
z8~S15;S@ozn+v?bE7cIM->kKh<_I4zL|sBb!o3c6s7GMK_94z$JsBGQA-NJ&&6H3A
z@|{~Kap9!464*qSkujM<#G&98-|~a9?Wr#9kW%4M3xo+dusQZx>@X=~WG@g!T96>A
zR_9({rnZ|&C6+9n9;}|>Fe}Wa<*#p}$iT+c*1}&!xGY8ut2<%USaxKf0E8gd&4H|5
z9EWXJw_*d6HoX5s`Ks}b*`LnvfhG_*qMRnB5eiY38k^?E3+hj21TiNvB^40Fz!gdT
zya6Wj5KrsnKvDZE+s@&Z1-!}Vynp!bpgRnLs7Am4POk6y6%t$U9&CvFR1=bM@M00r
zkwBpSH)JFz!5@)oKVNj4*CobPqYYWR1(Eohqg01G7SdDC3Q;hhd|ZHzG<`&?L{q}M
zJI|^yG9Z>nvC}pBCEDXy+K;$OO$s$!=B4&E!XNn*0TX6KEzl1WZj!Atu|7uRjJ@T4
z8UDk;&K2kba?m14-Jj=0(KzPX^G^Y5_>eQ&FeO)$J(5fz_j$=4^1dp4_4Awd-FKSk
zreEa#j0dj2sVj6zRhjdJKP_LP*nxut&8v*74fmI0kbC8a+S)-ZU$jua9$KH0ESX%8
z!sYx(%cBKe1>hgy9U_!1PfT!HXwU!&5H&LkE`fD0qdpA|G6dAq2nhyplG?7JFuX>d
zD-V7FddsbXTV^%ntHu=8e6reKoi;Nrur^9F69H7s05zI;dp_K2@O&N<aFKF6`k+%k
zNT=F-bw-LL`ac-&K*qZ&4L}G+1FFaeSb?O^=bP*3ZQL~CI4@qr`tf9TT?YC_p<@eB
zQwY8YmKQ_|Cb1Mr9erFg;0Bg$%|`^qG(I+-9aa6RO1Rs1o)HEj$njj3WvWAqhpk+o
za@qr9{?#pl#b2+(<Lo=?4{9GKjqGr?Fi}*wzh^K}aOk>6h{7n>STz<fxGQovND9z-
zhLa?S(BB;W(wNUU0kHxQQT#g^@;j-42_HHsoYur&z0FYJ?(#bXAJ1t^a)-2aMYfu7
z$9+77lXCQ${1SM{pL2d84KmfHX(*(m6r*2iesG~E(TZwDUy>0$@8#?o-z$b#cKFT4
zWF|<?VD8Do$2&r99}CvY-IE=`ErX+4Wk<%C5%w~tfFh>4zON%YGQ`6)C2hea1NTS0
z5+6A3Jfo3ALWmq^D$r8H*B2)Djy8=Bxo^PlQ@NR5PYy}W9uXi3wBbS9^w6$}cTds!
zbP+PnUb%<vaD4j9^@ju%pXe!xp+S-t=!{l;0ZddCfLyt3xYk5JffH;#<aSrkx*@y`
z>_ulZ<P$DS4fOqdsx9P8U}~)UM<8~aHgkO74iHkFZIOr$6+%h;9cYO=e=Qctu$Ze6
z+CKtw<wTUr%SdxTRIc&+FjM1^HDC>l3(g&>7K+&O>h>q3V{AWSLgm*7k&#QXUGNYM
z%_k=^`yH?l4ZVt6%#e2|$lNDd4ARl~M>LQEN)siE0J1_Dc8Vr5U$^%Lh@2DR=eMl+
z`ncV$)+-kCH4juO0<j!Dhs6CMjpVxn?+_{gMw_(bO#X0z27ir*^g+^KGY0P|H@jR7
zS7RAGJp2X$?xHx^pHmv7IehA{yh)}~G&nuQk147oRUCTME1ecjI4_(44?HSP&TW1~
zAtqQM8<z;tn2QJ)51b2rbc*f2OD<G>mnYf=#zYexV41{y?lO#Njael!BXahK6M4}@
z$oO_K81eVl5CdHMyfkNI;MP><fl8+!4k*a>`m~y2u#|F|zZGHAy1E$$a+Gn8AT<^T
zTS7o~4CD{ZDbSBcl%AQ$EJ^71_Y27|n=o3zHtTZLVQ72{^lvou!40hf`fd?t+}2x2
zq~!`OVdtLClm9vJ0(-6U5BWJ|FF`T<MM4#ny-BVaScvbu!;un8Z_fXq4Q_`#g5V->
zq%fh{<sOblS?&s<=LDn49@uJO2{eT?;3H8B)yFouqyZ$1vtJfrf)e>JaFFQ(@@1)%
zy#S@Z&6F2-1W;%peEy#nSnAV4y`31vF;V{U6KqUgnDSgEFyFr~Fzt(OE}`Z)?pJjf
zm;MaVNnoi}pTO7URh&%tp|bE4>_|n>Pb2thGlMUs_1;@K6q!zhEF_Y8MciK=PFlIl
z+l%N5PdvK8k}cTJ4d4|?kWv!R96jO<(Lnf}N`ew@HmxIxMSTo1_y!G{h-)RVuCC64
zEf1>K33QJItN*#ju)OggF{)12N?RW_emTfj%_#$f(GI|$Ir4G#E4atHtRByy4ggx$
zlV%?wBtp5FIU9N)ww*SpT{N`|>j41_NET=V7K^;-K{1N$!1vA_cz2QZ3X&V8Dvv-p
z5{Su+i83Tbkz8Y}g~t(1V5-fQ5g6`3QM3&eRj`rZ8XiEYDgMPGPna~7`=rEVq5^w+
zJTHAe`79@k5O8+|BI$fFCI5R)LV$ftABc91^_=MU6R5MN02zF4?Yh9-`EQdp$qmop
z<o@LcXzgd39b1r~gzNEapaby3*cdh(4&)X$e2@46yzo?(ad)$@>bTdjLXURauAiA9
zl_1fg;#vEt51=G!?9pJECZz%~Pf`<5b|fz){shJ9c2j<I>nf&<`0)K{k6DkYa1_GC
z@YfIsH~bnG+e_j1W~$e3K*}Kir8N;49mga|XaZ{PyqdKnH%S21NDKewfRSNfl7yCj
zO~Ue=u_U*2PDO#kLjSEjwbhX~AHhWZ+!_PQ$U)9cZ|%#YwtP04%4eQH1Zw#6B<#^4
zfkc6iY>8n&1RtBxtonW|WRi`}La&KwE>LOe^uiqj1AO@2!dj=24g<&jGvsHWi_>4T
zl?Kg&WQC1-qv$;#K}(-lm52;gDWn0_4G$V_XwS1eh}9JDo%)v$Y$PEl5KeGG3|bO~
zvo%{lrFNcw^o{kwaZdiKT{Tgqv`NEaX@KHwkj!Sv5}G1MYig?rvjXsA$0!5~#lFo^
z45bSKb+7*$N=+E*&!4d;NK;%WhAtc;v@8*8CzorV-QkH@rr!$GF&;X9e4(WYbec!(
z8(|)nj#(t6Lh-Fcvel_*qZ;!Vi4#Q@z8xj7`TgQRq{d<-ln*+?m0I-gaKfj$_}zwy
z@55!7<xd%b=SfDC_IT_=&FwDt7b98;flm_WWP`#F+pK{=`a}kl@ntOA(l*q=6@#1r
zMGW*j2buH*A$aK>!Um7l1ZVNO#@rGL%1NpheA<%9u#N=bCx1bLHwY2wVr?U)83<<F
zYhICieQYSylN6E|cp_aQBc?idM>3J5o9hF)9WjdG<d_4(OrkuZ+T=cV37%n5P`QG2
zm;|LG-ZaSL`)*(JS~9aYVk}vkns9$@0hP!X)DJ&gzgNhSz>sv<BY0HN>ARS0BI)Zr
zgeW60(j>kQm{d4asF-8o(6U0*=pE@{l@!?@w1MRTxIfGj&1cv>31_0ZzMDw$w01)h
zdt*QpO}05ud^*8?O8@?MgQfsy4(|e05tvhjQ$e!@LO8z$EXRRLx1gm{6*K~xohZhR
z&p3Xj8jBPpNZjuM01<4iGet(4@+DuT>ViNL9)22ADF~wmqy}6dMCbiGEy3xFV1L{$
zT!`21BKK4A2cAf|l!-u!fm{M(?r(QoW~>>+TwpCjBqa=KSH;S=G*E{;g5Me>YC){V
z<6$PHsh)p3oZ?p&G_i6(KEJx4iCFt|`D6TL^4{z_;)s7SC@-<Zx#IjRuJ}E)wv=>N
zOF}V3fZ~n-8g(yO&3}iXJ^hWu9b<hw68IM;1LdqH7$2Z%<ptU5yS(O*(4ee8B<0g)
zTL>!iUsM8QJc##>3UXy)V!{O$RI=`2CJIJ3ld7vn!TCmoL3~rHPOYmpRU*pCj!Q}@
zSyq53uInF4Pt&0(_k?G++L@XkeE^MRyoh|lVtL?PQ7wC9#@gEX1yV{cT#ptd^nro6
znP3k2>u$6m_OQ}0PKcVn|5%I;Fwhop#81NG+8`)R52`<w_4xaHphRjvIEcObQ?P<i
zm2im2SJ0#?G(N@v^o4t7qW0g!W0z1stMi0k2+jNAudPj$49Rcx!NYNOvV>FLuld8a
zCCs7(NvCpKaIwPvA|g5>p9sR@^IJi^SH;ts^Ua_=9P@$P01WgJuFTeZY{x|p_<3f%
z>ZOMhrdtB(S-#?#O%_kYCugvR%bvz0@vHM&558+eYTL=3EB0Gze6QPy^?bK@7ZDW{
zdHT*NvJuw?@Z{YIuk(qr!ZD7<=C+<Z4?7{ET^<m}gELFe*2Sf%sVbFw{pxE?GpGSf
z10iK(C{R#f_ZnU#YDadTQYx(>20ezGe24MBX}teKh;B~nek1X$=nsC`uMFxQr9kk<
zt$J8+txS6@!J`)0i@?+JHx0o6dE9vHczTHiJ*9B0pAFn_C-*8L;46z#>|4xJ%*x*#
z;(y0e|EX~Mr=V<Qu`u<F%g~Y&zMKT+rYMG!6z0|rO=8an{F}4>uTuE`_ZY$kMoV#U
zXD}gHA$?71#o&H!58k4;4EO?nH{zT1xFWOb@v-7Sf@9h<lf<u0vPbUTj$~Jv@+EsT
zHN^lxj)5N2;hOd{B{;<(+zZ8s7y4h~O8*6@`p?xsS+a&7q?Hx=n`b(al#~)`1?Ci<
zjKO3Q)9A$u24zJlP<=(^1a&b@MoU0v*<}~Gz&Mj8(TRCQOeG0wb&#s;VVpi8h=Rh^
zlL%Q43)EjThyu@tCuTrB>dCOHHS9`0s`t&1T##KjE8zG42xg@aZ|N3L{(Q(d(z@U-
zaFdc$uE8FDhg=}gD7EHo)+tHd5V24@%5BDQn?Fw-jWYgrdDWb$RHYLhGJ1W`lQ`N6
z5e{GA{}#Wy`nPHc>Q()#D(%oSDU&C&Ws$##e@(WlGt3wg>(Yq(O5nE!GZN?=aQgFq
zv4zB5grM9}%cekOn%Tonk0+t~e8=~QCSSHJv4mhzD<n=rCw1;KYO?9a2p&K1%B`cw
zi}&asIRz$S(C~!_L6&dQaJa(9n8JF&ypZk)wHUSNwJ=I>#Cf6IF|#pEgdhHo2$b96
zpoQ^2faL%9PtxBTsnjmR8w80&Sd~r`{T*8H_`9W`rBI{L{&#l4LO~><{|ILM15bj2
z?!W&L(*G5ohj#&368XPLyzs%#n$7%=7yExtf^=T<LyTCM{|m_Wzo-Yq2ugrH9S#l?
z;{WOk9KuDYC(I~+Ex>=}d-nXdw%C93xDW#W@^M-+9v%uGD1%9#zkDZ5#DA-oEroW2
z<j&oKa%mfsQ`ht`6i0J(#S#<}NJheHKc6{$`sT^z%9CuqK;U%}aKde0@nym}72~g^
zpz#tP{M;Q-Bn$#WL9r|*pGgfkz?Jpab6;N%p$AUxRw~SCk>NPgy6vQW)m?0t_YS#9
zEw@_^WO5s6PyQJS;Xvv~Fh&>qHn1FaEhI0>g%g?EO-h1zG#4$VB}2ju8AdbeBvCT2
zsWCCGo=}nbE5$z!VB@URBu}lzI|Z-`&Mp|ZJmm<q9|vII^u-Eh!1dqQi8ZUb8l`4F
zKTL;~_jy;AM-A2dnh(s&?)0)3cs#Hmqo7eQnQKmvhu+Ysax_1|HeD=59}V?Rm9k9z
zQXo~Q1gri1%I)_bAsyQ&_2tIH;#${rDl)2pJQdNkQaY)peztG)5-wnN+Pdl^yc+mu
z<QTw2uiU-Ktnr>3|E|kN0hRREau%-9^G!MGy^}BMQb`PG6~tV`nRRB&iR-Hm)O$2Z
zsxzZl?b>b+NT;BIe(NprT;9hKae9^YN=1frWCbBD*;uJ&EQx37u2jJf1Zp#5MKo;3
zYN}xc7&nH->>2X?D7@vBWnaktbM-%G*YA_3Jmnd@K1wC3U1IIw@i9;7p77|k$K_xC
zaVfu_ka|5+MH`1ENQBWJS}Dl!Q4Ie6<_ArlG^6{kM&%)P4zy#LQ%=smJqoF$^m$qE
zV%EZSzwU|TnAE7~(WSU;`d^-*R)U)tSLV`kRZ`mN6;g5rp8KdK3_iB8eq!<@T%VP&
zlS<0<3O94}%$e(r#H5^ny%@+e^?4Ge4(_iZ8iP&mWo$+N0C3if2VLTANiHCnlVUKn
zLW&&K9uIksf{^zim+T2;ES+a9hyt1U5AU@^<dZV73-yJl_-y1bB~muc5ne=rek*4O
z_q|ttRR|2JFZ}WE4Wi2(&w)z0)?sn+XQ~UYUV;{DFb;O_`Z{&n-~fy*0PV3QIWkr5
zJP^^3sIZAyevfi?bE!0gH>Ch~*+H#jt~h_@LR8aX8P9e<uw}GU7v>tInCWV<lb10v
zIjV*+sZ;ueN=u|f@taiQpMeBEo_ngUsMKH19r<~2VJCw0=wpTx8I1BJibbVLO%8Z_
z-B}Rsi<vL=!9NPkg;eUcKNAN!7y)$)Bzmr67;B2lcG2x9lGw-gD@K(M6RCCMMzO<j
z0znVm%$k391_={zmM^BL{FeOdvWn!`^XN3G@82-KVByg0M4Zn~af7q`sF$sGd>2(5
z{j=l3z2ae8KT~hT?UhAhg-tu+LX<$;B-Bb$kQRqie!S7!Q{@Oq%hd_G)Hj&fbsV0G
znRna3>A5JJk+5pb)tUYph9fyv9#C7v_J_n`Pd)zuEQ-vY?~{P|MNn@JN%?CdcEML$
z-n@F^yE^zW`%xlb6Z)9MDzSnW%?Dt-`p~XD(G;nFH1eU`#Ag0xd(g!wGf%oZoB)YO
zJxd`jj!!D~&xq8$rrY6~e`L|GkmV|sfPyj6fYLop{tWJnig6!1o-gb$iKP?LRBT$t
zU$%xSGVK+X+wSv~ioCH4X8G81GDcMQ+q;CH@ZQ}WR$sp^T5}34L*7M+>Q3wuK{d2i
z3K{lO3hsyE?p-$4NtHwpOHHGVNKGp}k{uJfDn{Q7L`Z4YTypejU3A0TQ?pUi0H;b+
z?YnfumU+jb*))zzWnnaLd%Ltd+$xmnY}xiQ_7Sha6U$;MNhv63XfWRjGZssLLL&d;
zkEo74H8G%WN@-@u#RmVGo7)Ojfy?+GjYH6?4k!DGfSpA_s4H&+-#tFh^arbMIUJ-_
zx_KfDJXF54*5hzJmzKo}8y^2uYy(09^#GCfm9<Y;Lg~&cfxl8lC1rra2b5Q1%dhm@
z*L*mhBVkeZE}{f>i5;hmCfs~V&9oOE`Cp=yV4~xs{V)09xxD$H@pIST{14nmHS!nr
z73D0>L+`oD-Bwt0-J`CDG7IJ?Km=agwweTH&Asy1bpHoRgO6{vndP)j&8AXwZfdoS
z$4?S-jRqY?;(|$7CofeV?%4G2MoqO2`-a)$%3<VGnCp6~7E5_LJ<KMxtn*OS9@sCB
zVy25>=st-o%Vv&=h_gsE7Kve$nHN>RNHHaQJ*p)~p_<f`@9ma~%U|wdlsbTEaZU?X
z)ETK{b6(Uj7luGxvTrQzDW+(0x&z<$4kzo}KUe?J@SyMTD2-D>AgS<j^|C8e%GHt5
z<0<xWOmIW1Op$Ys3oRITG&~|5Tl%>prAk|>^RhV@rt$mrb!pH(&fYnL_(?jRjLa|)
zuw}_nY_SzC<eq6cI>SYSa8Z)g($X?oS<#jlUL|%>;Lz}GL1lqnKe?&LOG>KNh&_Wm
zv|6sByi6;{z+FRgF(X4~@9<ZiyUth=L%i8pev)*Q*;K||TSf-0VsgT5*MmZD_<RD)
z)KALUUWbtp$&mDXiI&515{2gZ@-YfjW!ng;H$-Aj!hiA|(5cH{Nqt83tBZo^GCPtb
zZSt1}o^sVzBVS@vgcAWIHJd<9=mPyI+n--q+^0pb13YuvO9EOve;1@UDRizy#PFC=
zj*_%u>7`t(L3TNpPkR46&qA~1_2!;#ZG@06kuKhg;dC*kGIh+|*>t05)H)p<KOk`_
zPgTB!GhwCuHFu%ZD9DtzPe^d890o!sqWaF+H|d_el9`V;fbIl411FS8Qt^D2$3SXr
z2_nML%m`?*gn)E;8nfuV6r`ek<9VjI)-<|!O>)4ja$f}w2xd9@g*dv4$xbt?w_Y0Y
za*#brnnAGaKn?;%`%;1zXrwzg^2Dev30@ANY5AI{<WciB68Id*`J1R15a*+ZmQW}a
znc3{9SZ!=;EnYiyU*Ox3jH+Upm$JoFHHtZQjeT3f_Nx1rdw3b@w&xye_|r<tmYFqG
z7j1mc704sgO09L;B$sO}ikSBfQp4(lDFakEjMsE1?V?JO-G@$3arct#KK{-ai_hb{
zca!8Un58<%Ku41Rj}34n42T5#7SRJ{W-MTAY#cEadT?-CeN_i$Yz@H<EyF5FnHduU
zY9)dkmH5t;b$ukqeRS)?)>fCcn!cV{@9C+u_%J^y;|c`cb$NQYv=+7%#n{?3*mt-Y
zuXrshYw30d#lRH<A%C<KSgoJ7Gj4g5H-E$b%rF8U9)SuC7SIhB*&RQwVnM?k;XhJ=
zF3SEwMXMk5@F#ZTr+OM<Jo9PA*4$X~5#p1SH*9uca%_=LP``L8hUk6<oTc^}(k2X@
zGQqPgzF!+Q?fY;zxV80<wcmZjtFK!JqD`3C*LLC^4`#njc>;2+B#ajWv>oMvd8WC1
zw=lubuWx#z<hWlLJilP{vDz?siowjC)^(z_EMejGhB9iJNb~-Alo0to^VTn?uYESR
zPW|z#{%Ucy;7?nJO#+aL`I+D})w$LMSD$2k_(?x}8FLY-E}B~#5H#$xrcZr?%%c-6
zG|L5?+jnQLM$pRAG33hZ2&=B4@!l7=of=IR#o3|c{`t&5Cul-81t##Ikz-~-K5jl0
z`*7uwbvOx=ZsX((t;&+cBmC)&B&%o@9Ti)SGWZS`X+@w=`2v%q0qVu+>2+M|1y+jy
zIse^P1JSbj+Srr0Vv}2vl$8RA50-fqC&Rs(TU#!r<!qY~qIG{vKV4A1;bToxXVW^`
zv!1x1WO4S)COf=DVdKFTYcqx<@w7pbwfAFktNP~F5kX6hp;dK|hTLcL`Td4xH`CWP
zzOpZTx4vvuIuC$oa`OkGcI!fWx8*~NZ1Kar`pX}b*;Lka%|W^3syeedYXCz|*xPuF
z;m5D_Z`AoV7aF=}t6mbLE^4EG4mRx=6T_l`-IF$L`E*Da>2q+^*Kf!bdz0MG)1j@z
zUtwtStBZeQl(<-{&CRWZ_eblnL2<QTDydcNyAAZ0@h8p}9H{xdEsZfqor2}pscvMw
zRnh`9Tei(ktfD0j@bQ)z%+vQ6>%bqEYfOu7aegZ|+OE(;1l>0Tq%OOt++T1J+N@~9
z>yh?ONWK#(+>9jqvWpp_e!T51Ugl6HVB^J-*w@Zaji_x5e;XgkiSh5HabwCXG~MHD
z4`dkT2|3N3^{G%iJ-Lt__Ts?9KoD#oJe!he%MvPX$oI}tPXh+p5S<m+TCnNw>d$gA
z6R<Gu$7l*>(fnbjBY6|c1y%(OacypIjj?7b=}e_9sZMMXHqOueVSwa;Ni<mb#ExcW
zPzfochSK=;T_0DUZF5&ao+~JJAJOEYC+uy-EiK!@#A_r|!j<D_1__7cM*DHw1ToNJ
zU4Gu4Iv4(6D(-=uIlWi#Pw?UT;MUH-q;o!YNJdesyJ|jS?L++gC(azH!s$jNlzZZg
zE3Qx(bZd>z7&mj!Q8#dPD_|}<WrlLZw3I<qtHe);PM{5M$YDw7mt0d{#*oqAiZoWl
z&rQc5o+*n5%v@i19h2ZZbsJ4%eV%BX;PR?x$HGOY8RVa4Ry;e7mhns8t+Casu*MNw
ze1-uG5wVN+Xk2nf6%o8H$lfUL$H(~(2tc<=mV{cQgVqn!*rLqsNhkY-7a(|TXj5{|
zQHICKaElpjKoW(v2KR-)v9};x6yNhm!==h|p{!0<dB9ZT`tauHWv|C`C)aIBE8`2k
ziEYi!dX!L>x+SK^V)K#HN=78V-px2ndi<DNOYk((z9PfQMpo3~lt*u+R1#1RouJg$
z`PeZFrib$MW_GfxaW!~lyNUk~`>cu8yFh+axmoR~J^rIRR=(UEzAyzpcq?hD58bql
z$c>1hXN2|SI~#G4H!Z(kyLj?>&r4RlP7*n#<)Q@A_V6@W6*ccD8T05>XmYIw8`W{`
zaYPCk!@0>asUBL_x-fU95~+;`d*ZL;rsO&e4><um&xaD5h0gEmVqY|+Q_gzejAvV)
zUh*~Df)>!ESCf*Ldun(30+dE-om)>ceytJNNAOvy`BbL~y`6{3042-AH9M^})=+a$
zzwWh0V948GTCU~S;8I7=*Qj%0vS}h;YpGQQiRh{U!|05<a#$E}V(rI^=~^?EurU$p
z0Mn<N-&RZO;61yG(FY}u%a8bSV1PYJ0pihJ+~)>r$}f{PR^wvMg=_~;{-4&5;pCsc
z-`i;{VtxqY(b_0mXPpMDdrrL4x#?CBbPZ=&@7;w*$3~OnTwowzAW@<|aSc(aBIL4E
za~dJ&6XK&C4c{ThQjx!3T#?QrwPR;X9n_@N7m>UG-<g3#HlFV+u7a0DGo9Un7((-s
zH)Q!`NRC&XH{$qbn>wK+fGP=TXW8v%?$a${r<0F|Or7Yu)XU|@#^x*>h$0+<Id*N=
z7~|04q8eW!4eS0%mKIw?47!@P_`qnWgrT@Q;v1+x`y@?%tis|ua|XLQv))nm^ACfp
z$HZKjdgw!s+4auhPJv&E7JD6RG6!o;qRyR;et(^H9ro=vH{oi`Lzq(JV3;)UTbB#Z
z0^*+(;(vXGJaVqq8k;kec`Z|`6c9ls=qoJJ9Ix()I;q*gNF?2yTC&+P@KP9DP(TCk
zbGAWE$6R*jj5c;?RtyTr?^wdN&5jP^vhkb3IXy8A=7~<jz$+UjxkpYJZl<d|<HA2E
z>&$qXFlmH-DycJPvpI3Tw&|;Z^1|CDpm-H6^161H{1h<+fGaXtt#w0Wb%eIL%*^LK
zc9=TMdC@tKzB%KpK7~k*9AYvHCL^%uyU48vbL2Z~&N({01@0cgkL8$-VkF(aiF-%q
zZs-@tzP>Q-xRTNG$Yff6!}^|1Nx#pY6-tuTs~1j}t?7?W|E}|b$c!BCgEoZo<<|Wx
zFx!7!57Z<#wNz~5@<S8N6EF$(qF$UVXS_5P>W2681?&|Icw-aA7ILZZW{mjr%Fpi(
z=lRKE6`gItXC-(=0WAj<Z*8Dx<F7Dt)FJ<U!)TDX6qB|C7vVQK`dDq!bjw{-P=8V;
zOu~e7bC%_Jqg<pFg^5^V5)^cvnd@0Ysw@=m;TQoENT0o@O0OwSU)hl~Uq+{x3|tLk
z9@}=D=4h4fBY$>%=FD`I)IMS?R$!-Jv*YS%jWP$@A4;kD;?%3cXeDVjc8fQk)?tn_
z8U*#srxdZ!6*(KU%wFMn=^pnhgQyxXz)fYTM|sB6P?lsi3y>{4lXtuwMz!e9Vz!;j
z$?0t12pe=ybefGnW2`d0vlO2*RUHcFZq;fi;qp>YP#|1?c9x}OE0P{LTkR0=Z(C{i
zJkYZkcxOH1ZZyP@waCR<wg2OMyw9x@8^c5a3bG3(3f@HJ-ppzrur?xL3p)}Yl2>Qw
z79BBU!a8*?lVVprAV+C*e*@uhk&mN=UgOI5&LocHrL^{TlHZyB5jQ9T#@6K^${h2L
z{D)>F@``;?H}WCzknD4nw)9lQ*NN6F%zevk9V_3*J~Zfi_fKn+T*d@!#e5FYOrLz$
z-C_r>mPQ{PuHq34N`qlm7r6geQA&PAnQ2t9R1T64M;zx55oF?gixyT8xJiAg9o9@}
zTIuGosK0oFFn#5=V66~UXFdsvPnKY*jfob2N`JPh6u=9QJWpO6-B`M<$|KIkI!M_A
z+jzD+QKLTNHR&99N@i6*RvnA0u{B39i?iiTTy_H2kG9exyVe-mP9I1j_%5uZ>|m`~
zJm3}CnSEX61b7n3z#gF@?q$@~mYJMs-CutxcqUFE6>vpOk$Ju8M<$XJ@M;#&g!W>s
zsQLiQzf#qA$y#CO@`X)5$ELYP=kby$=ij8nz~+<MmFxAj9O8;LWUNHY$Y>1aDtbc(
zyHTs};E2Dq9}D9*yDNSO6rFPw2X|Rov=v{e-O%<Dc3a!VVa-f`_d-lRLLbZ78zxWk
z_~`L1&~J<LIr9#VJkD=RY{nG7F%<x|g*J~EM==-|PBjoDhGYbcyK)w3m&q-A=*4DN
z@k6BZ-CbFFcP?3zz!i7&=w;_+<BknMxwy5<L|^NyNWQpzZV})4u%nHwe#^^@US)z6
zQ{)v`w|0OZ1N=<@9e7yy<KI?nlLaI)mn<f(`b7oJEXTd=<Js(epYQN0urViRxRar*
zmhwCI5ID6YqvJ)ysiR_(FtKns^!75SovEsj&CZjcBh9yC-Zc*$En_p>mCcGYik$>Z
z@6r@wY71vOtZaU=8l43$(4r^H9AjCn7L}Z8sT1vu%UK5GiJFa+`<itj7y>3L@Ae#T
z8S9KSkYuB+HkW8%TvfGF&fklBS1=_&zL4+@@;>3bA&KFe(-9Oli%L<?#`;jE#Ux}f
zHInpfuZHI(^?^*6<%lJfsRdj5ab5I;W}aLV0}0nNRoNm!KmuFlxwq%N^i2}C$(bBZ
z6N4Vi-N~XcyA03$^Rfk(-spX@WZk~3=!Fol|ByCKZr!;)Xpen~$NV=V-U4I71?*)A
z_jc$Nj8*BKc~th4!ipV=uO@V=j`%7>kZb&v1hQAV*;qqC&-IP6K#wDA61BDsk^a~>
z1hRN)5K`Fn(M>xzF~o_>GKqk_OQ=;i6T7*Muy|vI%|ek?1L>Me^d5jU_za8_N3@iG
zbrM;LO2q*;1pmIW&tvtz|B)p^;J3rDj;7z+2@x@9=Q21jI}XzqXgr85pN1!H5BRhZ
z$s)@9Q3fQxx0hQdFrK^YbwC=5V8OnUck~Wo86V@aDL{6F*_4<r4AZ^exy)UZB_~D*
zs2{+rIaoU|);Fa{XAkA}M10jF$sJ6-o-^h7BmVncUrsLNoZH;nf7))?ZtsLv;IObO
z<yr8d&ej1318pZ`C!rMAr>{o@PtEBlPmR2YFR(7rvyR~5oW}+C8n1_|!U-T_-R;AX
z;)0MqDW{Ja<x_*8RhR?ZA`@^X0I_AsxB%t(^#O~;o_LB&!g!l#RR@-qk{_I4&jfAI
zrYG(N>}Gk<i{QKI6OkId@z?zD_i#fI=!Xtofns=c7PQx@W^POG(<L-dys%<-PLHqS
z|MF-AkVgls;OEQ5oy_ZGYrv|U@(|bgGzs0hZD6)ZDSB$z;aIMeb~~mWUYm~8`|^x2
zfBh=a*9-keb+RN+QhKI=zIw*;nVgREfSdYSTdnE|n#ziMU7V(@oNVlvBWKa&7c?T=
zqMRSL6cpzeH<Bg!OH*w&f+w_1zvOJS1%$<XM<{3-13i=avtrAsp6~DqeDT3DoepbS
zjy0jeBtG*EfHuZyFd~86VMwagD~;9ksP^-_MN15~84h~YF35;Nt15bCxBhNOWbTSC
z7Vg`Y&03EMMhVT?qsjwrQf0oM-1^qXGXVt??6ytxcS|)^r+1>6?5zo@ap&Q)pUl)t
z$&P4=ZsIv$F9mM$3B9S(p%%TA)ERv0j+5OT=ILz*z@xZ-$An_X7d!1X_&91-TQ~mn
zqP3&1VRLsE@f)&Jb=dE@9#Y`I;nIKO%G}18VN2^(GQ%xH{U||ZGQVxJ^yg}qFiY2G
z!Sc36j7nB2G@5Z7mYGcv6;6{}x~g-Y*+~Tk^l}Za#mLc1;64!;<v4hk)~nn@o3s{&
z$$#}pF<l9v!QHFnkSS#q>dos#@$zmGEex9I-zS;StemuXnOkCZB5}F&^=r-i?6ZS!
zAZ%QOaIiDt1P@n1rWN-`Ne{ZDN?o%v!nt*^S3aC&^MLIdDcr$kOC=8l47^aeYRiSr
zdv0+pPCg7oVSLm65wl2!$KunDmR$4UdSs=P;~W}s^GXVvBov+?OFQiy;;|Xi%U-I=
zepSaU+74|eB0iQH<y|;4tQMZjAo)aFL7w9L>#r}$j}G23C$k4w(2RO~MY?#=i4287
zlXpt^)hGjU#0B)*!yv`VtZYlC#uf43&(#jQ{G*RDlSyZ`K%-UDC<=UV7#IAcW#1*j
z0;|!zb_VCzN#Ss2j9QDbpd?`fikD>4S$Y8Au<%w^yEI@RJwsXiW7@{#I{JAykjGP-
z_b9ld!doqaS!gOCVctqlXf#dE^(HD(v$lLR4SR>Y$!v!Rm+@{O3v70$9K+FEKrW?W
zulBscd%WDDcRU>$#69H>m&9XF$Gm6)h%}rj9~RC8ibe?TZA-ZuAEz@r?;2RFS0+-N
z(P=fs%$$weIseiPY$DYgDa%lnC6X?-@JQFL-$$e;{tP-yYr_1E2`$@9zW6X_>^h8!
zDC0a9V`sI9xN`yHsRSy6rxvluZzDamj=ip=y~;;y+fM<4LhmVYm`Ns1g8b%SXF@+x
zv{{QW;wKrvpO!FQlX`};Az?3TThpqfzeO3j@C~e|lnxJY+R+;cTmJ!G3MWHyE1OD|
zeT<ggF+3YpKxE)#!&BcIrt5n?ru&8!8uFzywHHFg_fU^*U995I6pzCqJI01F3f883
zT;E5p#IoGAx<HRTZmqfJ2hRV#-2VWykDWq@#|aiG8e-+sUEkSoK`3zl7I*jw$I8@O
z=#Vin>lSrEqP)P+=<s^tfO@wfODaDIl}Fa}{O3i!EEAQ3LBiIMX;M6cdHXUSw(9lT
zcla&U|IC$TWcA9R+>S>t>xBE+iO`yx$D5xZzzDluM(Bjl+-K5xH*<k0*w_H`AwKTD
zB=ouIdz?>L-DA5W;OB(@vs)3;OFO$wA?~wpo)<9afQtDfrlY@+%lgCo<jUjr$6x=_
zvqoCs|FIBkqa0&ao4LU6(a^yu{*Y#q%(agU-pVmj*Hlq|AB^o_BYxy$c#<%9bPA7m
zs&2X5Qp0-jTY{Z&OW<?|uV3%_v^%PAp_2kn-U2Y;MrE$D8Lu-cLlOZCI_;7FQ!H6m
z7JInW<g!U-qZv%K9U2X&b)N=1V$IhyQd8tf;+oYIYwe1lH!XaDred?uBgBK@h^Lbt
z%_owpW2cQ@Xg7XNRh(=ecV~o*s&*YerLvQ%0^=+2w7b}RWHZ*s$@H#qUF5e|-hV_W
zf}kxbFyTRLABC#h&@asar4G__rgWByq~g@k!<Be#1+p*4-dWtK?3L@J5J7q=jPiJl
zkvMGi8H|Zu?7vvtt&i6_0<71oWnFA(5`Bc9S9<nS4q(6Rws^O&S1~SPdkk0E=m6Sa
zG)d6#{`y-kP3E+7FJZUzxf&gUT-rG<J2_^=GI&5Ljy=mZN3ZEH!=Fbmab2-p=1Hqa
zN5gLrEkz35y2&OQGsAG;OXg`aBiPWltcX~ZyY>NDi(hfZHppjFuOQerxBpI&dZGR8
zgP#^jlE>MNfYju802aic#IY$^8%ZsDO64EQ426y8xt_G+0gl?Vyr{{KL{wGFME_8c
z>_yVsOuuE)r&z@&CrTg1_;%aol9Nnw*NWIKk(Cu=H75ARu1#&u&C{{!K>lqZugTRB
z_vt;;^X6e>TeB4@!c@Vt*1rH*yZC($@_y(LepZP-OWgty79|y#{U&4jpaj7L`8kf&
zH0Ou&pye-NQk9Rx-|_cz3}S0}n!Y}TY@8-0A<wX>bq~8fC;e>z63taul_Y}1fVL-E
zSn5q>3%qFD(zuRv<2t&}+8p(`%yzuhp^*d8BbQd(A(QP(edO10xq0UE6`{bV8Qn6|
zgWx^KyN<U4(W1eH^3-=@t+CZ;n?%>_HToj`MN~dj=DV!u@@0Goiu#(FBS&q$T{L%S
z&Jh>Qc~S;<5#5Exj&vz)Tq+u~Q-Ci4yY-6~rWJBFUpmsr$%9P{HO3B}hp4C31$={|
z*?Il>vIu+U5C=2uNWSOHQa-S4uSz@rN7PY$S=omu&tQ0HBjWmMF!cjSjxQ}Sh-|*p
zT%ff+t&y$(V3SJs6aCPn5F`gOnz6VqO#yxWf_R>Z5ms<fX&@ipIMW&bmdTl3leHqh
zkp&V#1w^MoBOWU+s$SopKEPgR%O<P0ScVdJ!h$k|eGyHyb#Xv{OVP-W#mz)`@~jDb
z<{G=vIYSoTh3`jv)n>%wja4kt>2&(_N-HiQcQ!7~=R7y>t7E%c{sA+sHseH=Lq{{_
z9@#jf0+5z{TzMA)2R-fDo?7g?;JfwmKIl<=1E66r5U`5__i?74XbTVvWIaf|a>t`T
zTYVc2rt!6B^<jBJQ%@I^7$+H<oPZC$xcu<vkX1pLeb0N9FT{}|CWLL`Ygt0{*7ZjD
z8sk7prR1}sZPe<G9)jMFK!G3elB+WmTQ`OSHxQ4JoS<<DN5?~QhXDOKHy9gu+s4fB
zZS|f~<<;)D>PB}mhP_D1L7#@RsGq@oS`^`14~9bms+C6l0tD_;o%UQ}CDQXZopf4i
zrf*FhC<`yNnTR6VSfou&&M6i+1)-Nm)%#@+1wLFA?wwHC2_nIz59u2@o^fC)RM!x_
zokP9{0yhM%%=&3oZ8agydRMbrT)U&}_7*Z=4Ha(W_?(Y*<@Xs(h|e^rN>ALPJ0bvs
z%ZL)<q8azip1zBcX-><+I*xA+zb=Y+yD!PCo3pQP3wX;K94U85k53*v@Qc*pW`*8)
z?r><K;|ob<Eei*B>O2S;yoG@$LwtMRPzT+k;}ON+#t@Wz^IB4;TAA6=2ct&a@{!lX
zbk%)t<fYhH*sZ=_>;*cJo41nA&(lvo5jJYPHM0ER_xO3koqj+@RE=P%4l3bWhHpC8
zN49$>6;exgE{*e*eat^7eQZjZmf=NN=L#UK3aoI?;sqEboE?~Pr}qs)RYX@mdB_sf
z%e5bKI5vcL-|AP^B{j_m4GWu!i#{-<UdK0Z0~o}S2;$`h*hMhjt<uX=>w=dV?@XT(
zlr&#YqbviCTc=<5K4#^)5hxSRXppo5uEzAJ66@@ajK10qo1U5Q)v;m-)aj~i3;yOH
z#j9Vuv?YmRfZJ82r!;DzEKBB{d#4;L^_-A-cOY=j?C@I{-yli1Z#|U3SwATHYzBfY
zj>&c9W+jJ*N)}t__VPF$q=Mgn&I5+(546l?&EH&cl=vdKFXn2lN7t=(XR;soDrpB&
z7p}Ux8o%Qly;@2<TyjY)jd5Pi^~?fAVk4f#qQ8`!_*|~mZ6+!KbSM++GUoU1R!yG%
zs4m=LA0WrO;H{3nz$a;a8z$g$RADb)7V*9^aZ!lwTsIj$3ZN|-V}^dELAo#A&zMeS
zFZEn;B3p1YZ<BtIh~{6<SN}L}_DFOxIXhv+G`))p#=!$>8cW9`HUKz|#xs5dLDhO5
z;2*h45gZBR%OhsAI$w|~XQlf0oxWN+aK~F*RCdgHrU&NDVVFy&G(tsMZPY${n~iZs
zr+sKwo!EctppwG_KALAqRiual?+;@aTAtvd1#LLZ+&&VzdMXUO6)QJrP_}Y<GNz*R
z<+oBSx_1eNog<xiQ-P1PU9Nq?+hl6PR|G)fVmfx+@zC1%M#t%8>@CMi3;ZCSo@0rU
zz^dTHdbYA1qe#6+QsCraPFYotXtsNCv(c@$hJJ1WKg=dP$Ir#S=V);luJG-Pp01xZ
zI0K<cx}Fh{)f;=+L5b%9cEPjD*_A#j!AiB>%iTQY+6t>*&<DgXN?;5SXB#^)-?3hK
zmdF|QRF|PIIkvM0ry9<Vl^>Lv=}yINKr}!yVkzBHy^GYYUiF^WJyh@K#o#W48%$~Z
zfG)`79@|~aIwHyzg3eEBFeg$PFio~U69Ii;s;48rh$!H|eDuRS0w?!`&0?jSvqP0-
z1|ue_GeTH&=_j46R~_G`1mb-xWP<iAPivia==a3VADL}V<96g!d0JOcBVc}CCab~o
z`uuwMu7{4S23%U_%q*BZB=wQkEF8~4{xuPZLm7qAC-|ti_^sqdRHd8r%c}!*6y|X$
z*!Ielc3`A3)lJ@O%^_zNnESC8F|(aXsS}U)5{Y{S)J3E6V-7Nj@?9cTT`2fYt6x00
zV0wFK-<j&THF#MAm5?MV*ut|wCEHmJnT=ld7#nPARz0S7PD#0_(CqTjdeqKR&ITQ?
zx}jkWf~DC%k)G{L=Ke}6s~Y81%e5g7q}<bVvayyNQ30sK7j*<46O>UowNQrU=+)i5
zIx}(9=H%R=rh-{~Nfc3_u$W6P<`vvpV{lHyE4yW@BtIIwlk#S;CaY*=#{ETdZ`&{7
z`QpRP+mnoweMCxAc%C#XP_nr&Sd1T)9eN5=XZx<Vd@Y2Xz6hT_)f9tIa@TOh>7K}w
z)w`)lKYC1t+`tV25^C)+y*qfM<M6t<oVwF`W?RM4$DZa(J>zi%?>dST58s*3U@+9a
zQQ9sS)W|#=GFT-g(EIM^OJ5vOz<;!;YPrmACyuKu6$%b+Z1g#z&Bqi@ILkP`&&8DN
zfRQlZ#@0agO>wCvuqcA0DowB1b~Fad$?rk%SsgA^>xZNh?kDbm!N~T}mV${dN#V5~
zBYll5$F-#aVpj~tBh4h7GigoM^vAzlNY;z?(X)5r*)UsowcZPwW4IHv8Z0zzuvsx{
zL}JdRS&xh(5U@r;LRdux3w0&2J<@?*c>L%KJ-aHbsUexwjbfnrT4ZEL1`R%n9~>zk
zw^<elabdz%8u}^vfxZYll6Z7She*WXhCq=+^_Fjj1?<(_ymMSvpLwwW);*XF!djVp
zEjd9ZO0b)VzEJN}uM}<q<yetF2m*W&Pr5S~@{r!&(R@C=6D-lUCEY1%oM^&)_kj&2
zYp9~~`u0k~Y1!Ua4+XTO64L^l!3n^8z%Tgbtgs{Z_#91<HbeQKEU@R_8O#=OK0&lY
zyhD0Hs!>PdS-y&uSdp$|2lDjj|7^%;Gbm82=9W44S3Uo(abd`D9hSP0&d@ipm2O}Y
zaL-ryWU-lJb77&este`CJdJ&4|LVhJ0U-M14UOU>oae){**#oiGuH6x++O66asm|&
zO+2O&H7PbbE}0zEmET7>rG$yyp|9N<cwqXQ6Oe4Le3A<;O%;F7An@7PTm&nKNoe*s
zPIraiBX%;C>FwBaB*N^dHFf-!#Fp40nwTfnC3osLBW9y14HeJO7wO_@g7tgE4NejE
zsW6xtvJ!`WPG;vhCPgBHseU#Rr&-Ub68*<fVE`}vfu5ca{>ka%Uku^NB?26TA(WPB
z%#ia7r`}v^rj^oPj|05<7Y>%0BX-hDtmTl9R|UvVq&uU~NS-4kva?4T#k}x}`H35Q
zRCN=8f71Nn%7Kv1jCx#nM}%Fqm^hA_o`9df+el-%k)LbjBOSS~sgvczOPh5P=B4o)
zy%m#Hlci65#Nflvc$M&g*aaI0Cp4b2$DGZ@074>JZ4A)fcD|2ufKlvT%omg^l%uI=
zC?$H_Zjo58l9XPp&ec@kf*`G~R^?)nSAJg$(6hNDIWQfJAn+Dr$AQEODEaEalvRe5
zluzQxD4I#0YJzQ6)4BG*Q=JPz%<a|!t2X0r(&18zS9CR!4{@L$#PFjF-Y(E)2JGab
z`?wnWlRnf78Q#+7xx`$o)5M9F#!hY_rh*%`7xb&N1`JlB^QA90uNp%R4``Kl?Mv>{
z56(1Q?2&D7f`4Ue=E8;I3yx#x0j=-W+6v-78<l|)ya)t=iCFQMvRW`r-Ch1&u&a07
zC8FHTtf=SkzTgYVbbAf<%%UY_uzOQ|?Ht;Z$pVgZF1EAya`o#v2v+HfE^$%q2KNwO
zZvE_j5d3uam5$keV9>~9UV+I_iApU&cvnW%hejD76JlW53AprE)l8Fgwy{=QGQ=nB
z%tT&pqyzi@YJ%{F_Sha!u_+rR4U|o*=${C<kRZ$3W-)r(g|zn>6qUO<`-5f+wLM`Q
zA}g#s0E39E%#m-Z2$u?v442lGNl%Pm0~CrrcE(a2_l1Z;-=?Au(<8I4U4Yr^gvCxX
zw>5`h`848$?qwB&J0vnI$9B>_5b9}-3%q7!_lXuDh=%Nuoc-N6#>E?68C@_Vv^GEY
zlV2PC3#;yTA^c2{pQgWw=S69c5Ne-k5r2?z;*;r)9@up*j}z3{Eb)9>_;Adkvi?Y+
z&$BiV=@7Ypl`-=yY?e7AG50nZ<*@c*KPD}gCGQaGCNw}{gvEkH0EhP1A?jXIsQ69m
z$oJ_)(1qGks-N<aErm(K3Z;x@NF^U){#a09Sto8Xii<!mhFlpqc{xd7K6~hruUy^<
z<_md}^Xm@>%m!T9&OVI2G3V!Fzqskk(l4WPN4Wu-2R+9bifil8?XZm*=*W)NQ2M1{
zs0zr=Cg>N8<nLkq*+0JKRXcNhX7BTm5w%0_Ny**&q!^*Bn9zDo98Z5~wp~!dOzK5l
z4q>SuH0yo;2dd6#`qx>8Yg2-AOv*f!@rkVnI&Us!0b`e*|1blV!der_e!p~xE}C|j
zm3qL^25+eSOW?Ogo+JFmDhT{0wE=*@vs4zzYUy%d=4MX2`nX@2C?~yn_3@Upf+@%o
z23V9i=iwN>A6!UW{^3ks#^8;>K;N{`vKA-|)Z^POv>8?j?YdF;jg7vXlK>3h!|bh$
zdNazvdZogOT9?jcq^1orv`P88%5wm_aqaXQ$nR#=l^|1v#vdET8SQw1zoT(atI2qL
zVr3lQ{^0jCQC}ov`+TQI#=qBr12y;*hW>E!G&gcHH&s=f?71HF<MrV`S^$?0@H;}&
zKiK&Rxz1th=%-y<j%A}rPPMMF#a;#CcYYK~FDR!kDjH+g(*hC4R^0U5``JwivZL9D
zUP?whvfXL3KbO{#sC=YQSLI+$SM8oYrH;BpZ&R%X7a}VW;NKw9V${olo0+5G{B;Vj
z0Kb2HWS9if?V#!_w@BsdP-5M-ioTw{hz`1&Wd>J1aKAX-fWBQF%1MGFZm|&I=_u#Q
zquCRhtH~TVDHC+zukSB_oDHuJw+Uqz<O_XbkVcP<l4L99&+R>fx-)?BmZA?t$tT$u
zpgoS28y44Tyt+4(LY&qNEZ(7}W!kr;E4qaq3cyb25NqEUGAoP;S5g<ae#Cg?NU7<M
zxZRXE<nt27kz`2u8k@z$5<XkZeU*!q^1J7n;?ql&bBgyVn&Wy|puJ$rIVDRuRzGFa
z&MAon!NDv2jI4s0Uy##A?6+$*-WEHx0)6rswfJj^=Yx?CN9bst;@@MOAr`O4<&zRZ
zHYDp92^`by%z!^A-R`+{LgzgjW>OkPvA1Vu=9gHo0=<#UHv>!8NosSDv94S#;jHp#
zzD2r?M#qr2cRuOjI-Zvwt%-z36j|>;TodFO!~6Ne)TO9OX_y~;7Rd&)-jXB4E#*?W
z9LK-g;Pt}rIJg|T8*{^rq{&=(dzlRsvCuI3s@cB`C82u%^gJ9od^V7={%EwcG~4d}
zMSsiR77yz^xS!y*#4i$bVbrH{yj4rF^U`VIx$_((R?o5$dVf>-1!{0Wb5L^ZyTJ4+
z;PVWdUJAZT3O*HsEQ^>&v?2wo;uGf5YgV(~B_Rj74Gd2vKK#+9Lj4bRpCA@5uhWm+
zjze!~z5{byHx9vbwBC1*h&`Pl(zEpfW}#V&m0~LWY6PQTqgZ(jkBV<A1E9&gXPU?J
zNJToT$NU>;V;t#6MH~NzvA2$Dvv0PB3#CZ$LJJft?(Pl+3KT0)+)8nGmlP;&#R=|G
z+=3LRI0U!g4hingms_6axzBsfdEalX{6Vt9%9ZOkGnv^ldrx20LenVO#`4`!zPU2O
z)#_`S^Bxx${q!w(F)8RulZDbjUo<*u;{Bj!ZbdEu>yE*cFnK-|mWa8B_`rwpHBZj?
zU!m#`t9(ZcZxuAnggUZYcRTlv=*f}3idZW@{@@`{W@MLdDC{11AuT%ld5uGoB?PjY
z?9w}&wT4gyet(UAPkWDe@9aShUp`L!h%&6DL@&5WQJ*9|5K;DMqyZ%p`u;e=yKIEf
zLZ|RmMyJ=VymO}@y6STgC=&FXFI;t2-6Z=!ej%H{3!9?6($DgH>zCeNEj%fHu5U%?
zOz;EaP)?dHNR!^MH)7!Eu3>(!q%EUj%P*Sr=d4)!*nlUN=%R+qeirI7Bw#34B({fY
z2#Agn@UiB1)#GVGIz6p?P-ejV>Wdcmph$FWm+n`XtS0|%A%)`njVYvW6~;L@Ia<O#
zDG*rU9($>}X-E5T`Rejcybr5t%1!4PPLV<HFj^7W#C-raUg!QcEq}ROtrIsVzn)pV
z-JxGB?rJ+2*z2hTA^x!tav!o;aG?=i5z=|cX8iT~z;RH<HM7tzI%-=WT;Ki6&{e0A
zHQJ@|g14T1A1Eg=GAlZUMG`MeAb|@%7?&_d-c5ztz0Z>)8tpjc<Gqx_&<!Be`bN;_
zMXh@E8m`SXpFy|pRG$-M;m!Wzph^-Xcj+miF##X9SG!y8?uAFx$RhTrh0)8#tOItD
zfcAsBaj_79lvHxUElJtco{k5Qm@jf<z5YakdadO}FO)kKggq1U>Xwn;{@GD^-Cetr
z@9f;%qun&#@5&D-3OhfUrQY#xy-1SSuZqZE(}G-j@FRN}c#aWkQ7e+$!-j>@`q(EE
z9`_M3T2jnMc@Nv`kM9<%Ocw*M$Tpf9c|kF``pDLo@RkTiPT6*tVWiXJeVJuV0}l3q
zWWZEN!=j|zO*tM@E&CIuq_*`d;(x}*P*eopuyPitLT}m^S7D#=9#m2nKeR_BVWs4l
z8}8v>b@j_m!gGR)y+^H8U=o!OCC-PGZ@<=Zj^j4_uU%217gpOl8#;jf5lThTPYxVJ
zW+W6$=O}f4uO9_`G{2GDCzvDNUdS7aD5HFuP?4u}eRhg=eTyGpaC%9(@tGX?$fhAk
z!E{eiVx|KS(7w)`Fm!YCeZ>EQ%xjEB^rJpN`iah{d{UCtv+2c&f$h>xq_=vW0wI6U
zopX-qlux}_BYDMlWediz1Df-(-Ru1>lwG0QnoaybLY89>mPTrji}nYM!d1c^f6ng`
zu@n0H63(_Gk6d}tpAnVUzt|J#MwhVN&Q5Ub&Q@nKgxW671!3$*B8GSx>zTdx7)bXK
z;{IK&imjkgvqH$H+<VE5X-@dOm&(D$c4Pa#L;h`jL5em}-q+epWc6w>JU6!Ac0bvF
zZ5G&NnzL0=4t}Rmq@S&k!_Y9F{FzN?Hd*L1QGVq?{#Z2j0YaYbYpCs9Ssl6kHqw|2
zsY&yfb5?7{%PQb?a3lF>z|da9l!P~PVi}jPg<w&1vkRHMvL(Eio7o&}`eR4^lTAzh
z*+kz?+K5|I@tww^chTgm`iar+IQh7APEh(8q4d-4lzA3P5B=<@@`6F74BJ8X%<Fv7
z{knO6<dS&$M`E))X1ZqZ!lTGq8(dEwFrp1s#(HYQ=b6ytm83&Nw6{-ZJhIDVEpp-4
z1RwQ?P3q)p!?!vl3a!M}HXAZ`3&y-O9q>+*H|gxvEZyUG+n*C?|FvCzGmU7yw+<?z
z6E#qV>K_e1CdG;+2HduEz7?dvNjV#LuCcjGbMl%!aYEKh<2Kz|F?q<`a#A_6Hu-Y*
zdBw-O_ITuiZQxGVToX@JM|=LL4Q5_e@y3GH>fNVLRMhSWy8TuZ!ycb7p>q9Su8E@w
zA4|1o%^lNsP-iE@jz7$eaO9obh)4RGx@s5x8Uv8}pqK5;Q?o>9)|h&t3^7=?MYhh!
zO(3ZDj+aYJD!dr;SEnJ>bIPN4&3KO-Y5MCZYBfo~2$}-RpafeVGa?a*O}@6+ygEKO
zHe7A*(rJqiXd`rf$9xPQaMbFdXX5W66@ebZ1)!Cz?Qj-YV^}V(KI-%kQ4Y8ZQQj(?
zDBPQ>?)Y8+PRLXV`jR4Vaa6X_W0RZ1upiqqneqa@rZC-E31wZ2JlYWoVf4;$Gqh?Z
zs=bOPeu!b`D-aJ4&E8Wj9=cog@ctNKI`)2whdEHVGT`e5O-8b)=!y+!NO}nd=q9?X
zPd3JX$kP>-;269(n;f}xF*1OJWQfZ8&RNzvVyM!o-17~8(2i94I~DpXT{gh!zxd`^
zqsaCuJ37T*4)>m|_7akEfXcR}*UmdLPp1Tz5+!;O^0v&Gu`F-rHa9QOMt@bqwGN|q
zpAb>d;+)<GOPKY|=B=*mZo_KSF{NhM+_TT0o2LYv8SgW(MbYPt9kf*YO2IyNuk?_g
z2p>^rI#1=lNd7w^`#Wj>SAO=7r`h0lKd`|>FXYYJ_~kOVxYi`2I|9>7&m=a|^M?d^
z0+ro%BEQz?0kSEo18>T^Q5S;_m{!HQpX(3iSnsW1U-{byWn8^Xo~CEi4;jCGvp7PR
zr8|<kN&v0%dZs}a3%%zn<FYacFz<Z$|8+U<j&SLVP0?2<p318P^$sHza(VY`5J*2M
z(LQ{9oOK}9O7>Nq=+s_f`;uyqzFEg$@Tf#5fJOKIWZp@<!r%2nB&)MCn=tW=8$hrO
zc8B%t=sTAHZQewELJM-Io6ttx%&wwS8<qw}8}<r@HMs*>3lXo$Kl6ltR5<^<S*LIO
zA#UJZCmFVG!F~wp4BOeLziyY>s)_2(u>D%tar=6#mnh0IdAleO`}ukhcHL9=0ES*>
z(nIZB0hrJh*3bq0{t1E!!$=}Fa$WSup?4{9-bQQ`Y3Y_P6!@VnN7m4B)ZOk6AZy?L
z(*K_#>z|kKXL>E3A<;oLD^DnFe9@lo*LoXf?X22>JE#5TZrO9BqNnDUFw=sN7ji$y
zbnlYSw0rq==42CE64|0ERbzHr9gTx)6e`{)%s;+MF-P-JBo;{)CT#dwhIB3RjABQ;
zB@qH_ZJ-KE%bErjyzAZ(EES9p!1`49d?pymiwKPze=s?3`y?~W_ay^&8r1;hE~nUK
zUa1|;<WX4+c~e{?1N8L5FeR|XDegrkY+ju*t6$EH7Vp0}!rvCT`r6;?LWnk^f<d1;
z3jJk#;D{@!boTkG+0d!T)`DqIbc1z0zBiMb&dm{j6-OP5&>-&&z5H~w@vXmy!6%WD
zqK))vizPd6`a+$mrs56M-PVG1?7H~7_X1XeUS*kfl3(ecBi^j0j~9f(Q#mcI3@$vQ
z42t-0?!%XjWM!7n?#d1$OG7=0$*OQG24AsL5tNyNb;3y=d_;dA3-qi$zl@UHXd;>n
zQ;kVXTWQn#u#dAFo1ZbBF#O(%BJJSqy>fs6@H=Eq2ddtIBEwj28hK=O;(Pz6gEPVO
zH||6JC_)-1pmfH!n@b<8#EryzWXLjjBW~^9&NdjU*6OuHIZO13;<=c0W3889e9RFb
zkh^%LgUAF7V@H&47)i$T){tERb>|xjJ@mF+TCDXF<zfxxWPsx<Snh&2O?oaHw(^z)
zBR$ijNlPcDzhRh)i=%K@r(GI%Fl&*rTy;_zk=`pxL(`jH&%6;PGr*Qv`%oc1O4f*%
zj_o4!!6n#L<5Qu(*@Nc!yG6oB1RHu1#GA1<scPSgxA{K6mkMzHLVpy~GB3I3OGk%(
zqy=6!jeCDVE%TKqynBKBzXTZ^9RG<;msXuDO86208k97cV7R#D{2S|mf+R)w%w8_!
zLf>YgUFnh>!eWaqBoFI)8xQiKXt`VEEvdd&O`Id&y`4xTT{Dh+zIWW~D44fE=xk3K
zk_XP(Q~e4}-<zn3iQKQ~hSbZH$`j~<fd;lpu}1o=FHQS1Pf2;wx#~=po-cetMmp^j
z$fJc$n6(G6g}8okrA-en#8W|24yb=E!(olg^JY(PM5XHTQUx<oH~jg1`p49?RjxJI
zDoYjdYusxg4~#N-p^*U0Aen5?LKh5w&AAGU<jG><m^<YPSgi2o2L&H}R}+x*!YcyL
z6s=~TyS{qF_Tjs4KUSHGvZMOc&g$90GVz`0pZVXvW3TvURFaLnMlV6+*zBQ@MMjp?
zIyt?x_~xjNvgy(O^!ib`C%10XiFc%_KSgdw<4{abk_2W}Pnud;F-`jyV;-rv#L7ga
zRNC8YPz$E0dH;lEC~*1-c828>BuFvD{8zqBdd^Jci4Z)#TznqwbhuAGR)m$saLi|v
z!B+CbI)!`A$Dg4uCG(Ll&-O)G;Ka}x-KzO2PZjG&{%0C}BY;ldRX~T?48}Vtsrsci
zQ#Tw4Ga!EQ8Bq2lLQE%bqI2$$fo*V6mzRAb9C-QJFMgm(H{$+fX+wG;leN9@Dgx|8
zgsA*o3>&syHQL?mVua(BD3#pN9w7Fj{aNXTLK7kx5`gHDyDWOZuq*f<=)u45&|hG}
zzn_X5Q2k&71E;g|u~11Z9>lOi_r8_0mbjp19!e~ihGJ2cs%_6!IbVWRDyG{k9V%o|
z)?7b5SZbkkx?T<wteV{Z_y!Md>%`$${t!EK7=5@P<y7Z5MjM$OQc*4^<zYyDIX@{s
zBY6Sqbdgf7^%^`sFNQjOAIMh%8g>XS&=ebgaIFYn3$6?X;G1Jj%MSSw+!JdZssAq?
z@NcN>&-$|x7k)C40o-#as)D<v`-!TU(B^_CQvaubkZxb2jOA<OvgZ>@r+I3Vd#g@3
z6GV7|;!iwUSMz&Vuo`RYwX4+;2mH+<VXlFy%svIzRpI!u<d)Wce#MvJ4QetVNI-Ip
z4}qjzkn4;rTG<r0o9sRBTA7HCsE^RSt(N&{arbXI6{r)`_dEKQ+wq<Qxd%@R5_pQW
z^~<+ZB#|MYld=cWBGEE->7)x$*}?FSe<FSV?MLXZV=2Z%-Hdmp&3d*F%yGkuSAdwd
z-6xP$B{*(d5(RQ#$bQ5wgYPn@)$bja^J*nFKx;>33$pjm00Ze=b0CQ>NyF9^9OEcZ
z=69xl-#<P?wum~5NHuRgo3JtCw$~|0|F`so@_n|Slvm{2KWckHBp3U{3-*oBOiPrw
zvsHsCaB(%mM}3fzGdJqgOGGa1u4<{xjAE|Qq~+oR?%M#WY$YP-P09OM5lpE!$yMZU
zU<sLT^o+R~lnX&J+IRkUFCAv*+y5|0e`=(zWh$ddI^f1l4e0d3W>t<>*4JdKU6t1e
z!`gZ7e4aUN-1eUd#5+91UV(Tt3Osw9ei1~1_^thC^Ow|7hvT`mWj5ie$>ozIn27ZO
z;v%uzCW~yc+bxaQA?<wX`KC^{>|XJ%(}^$_4-W>(xpRZlP}Rbc{+MyTb)2EZq_ODe
zu*hz?yjo1_M5RIy%C>?|974kUFr;Ey!<=(e^RMef=_vr6Vp<)AWmd3?YIZ?Lcpt)+
zX%%k%;a&aqNW<t(ezP&vhnrI3SJp((+TGh1ji*_06VfG2a`jE{d&Yp|*Ew`YP3F~%
zu_Ea@c)CwW{3fPquU75nl#YGjBS4G+WSI7GFVpt_>b6sF5zV$0$|dYMc~SGFz6f$a
zEt*6Nq%J}Y^x6<BgWx?X_#r8^ZVSt!CWN-|Jg0llJ7uos&buF_x}y@ia_o9!m?ChZ
zn&1WdS)t&OU}6yhoY=u*wdtRJ+o!-}<42XXKUl~DgtDc1?&w?%e4f!WCgyH$R;6?8
z0cT%)(wqc(4cLtYb@C}d_LBxJ`2Gg2PfuTJSGxr5FSqu+jx_Fz9@tjYHF;cYFzL1C
zG9CXFQBYGep+3V>m7DusRI5eGOd*5ei0xK<aL5&XGj&tF!tCP_#XM`NP8<Vv;()X^
zPZosMR!EoEKJoRC%P(umdGU_n^a13bABrU2ksyju-`pJM9hV$G93%4sAvnh@*Tlz0
z9?UJ6ZmJ2*>0TqANjM$;V`S51jrby_YVPMVA4(ltP?=J-Ny^s+f3o0_0U;j?&u@_A
zB}urfu))Ck8xu`l+kwx3`C*~iX63?~?Y9frH`9rNB}{I}<sr?f;3;`GYgUXlqY1Ny
z&mX+?CB#v#FKsJ-OrN>S?geuaJ=`EnR5@PEW=f*O^oP$cYzCI3nXG_bT0(MjFj|W9
z(*2~dEvLB8juQ*BF2+9ISQwRXTWGC(^q%2qwaUL>B*0(zv(ID&01dsxrxMCY%5(7r
z*1@z*P+ka<v5Hat;w4MyskBL8Evk#l+4zMtp+<lw@^lU`*|?3$)lzEoXsNTaZGEH;
z&s9n(sZ>aYOWw<^Jo*+`8mVbNjeB{nR~Ky>&+5|z1?>+5gRw{jso8`da?gp!WP&q!
zrFiWZDMz2~#GJNxG-W3X`x=BV8Ix&CBjIaeO5gtY*O9Q-*$(k)pFXM*2{|f!;jtFg
zqpCF<PO5osI#HvJPQ>pa{zaeG$O?&5Y)Q3<hXWpH#NBY_LLdXjm8<sM?j6m1U9=ud
zi@1j<SHTy~LEdXCU3OWrQY8`07^cJsZO8V#L-rc<Y0b4?t5kHXQBy#w)?+epC-7sT
zQ`4rhBoX+estL`nm;(kf4wW)1w3;*scC8ZA+PcY`*1=ccMw6Aq$+UAA4!V&iwdxKP
z6x$jLJP(miI~;kneZ*G&Llf}pw^Lc&(X?K321Ex`4xys~7%&W(*(9bt9+6mn`r&m#
zp!hU(MKE|cf>LnR57IDjCf8>v-L|Prnkm)mV$Y)cYs6*WCV85UyJEd`d5(4T!6nvb
zt&%_%#=4h*RPhpZv34t4_`HcPKU>AObop)^S&4RMxF;EQJk9}BTW|Z+0Zl9|(9Fto
z8XW6HLlts^ei^y<us%d267l;?xp-i*oPIL__##Hc;netgDaROvfu8<HQ{`x)WBfVs
z8lh)m?N%TOeEz1ML5++tKBC4-)Tn=SxcKwU`TmlcU1L+)XLc^wO_8B2jaio@<(G6W
z3Iy%}OhoPX;#WP5FCJNaSQb$Zv97+uhd!qCUnmE<cka=D)ZaVhaAd$qVVKwql?4bc
zA`3axspJSwGQ1`QNXpTyp~|>dGRCs*u5{>F*m0GSOsvaa;gd+u2_r3$=Xn28Q)x?A
zdvqHP-5vebdFe=LT?NzPwH<O-@BxLme7dn>+QYZh`{CskcUO?n{|?$9df+fmh@1NL
z5>5BwtTJaYe?fRrjR(U^JfC6(57Q|2TTK`xIVFx<+PQW^8<Y*|W^3#b8uQXaeeo5_
zfb)ZCZJDU+Y_VY_LO8;UOUM!36lCs&EdWhgj&2uoN)x%RPxDTmlV_@66hs11IzBnW
zP)Zo-<@qi0^+HWz^6obS=P$hQiLC3N1jIwdM?e(e^DRHe&$@x<=zD9Yq%iaq>Vvk4
z+a|H-uw(a2{#ekrZ<E*9Rf*<%+Z!Bf;QJZv#zh4yI>Za&sd%Ggm&E4<Atnb41vTE2
z66STTZI|U4HEx|s0(ZB|PU9mF1qKgZGYa2`u*-h9XX=}e6bqOt?7ZFM7B|Oex!9Dc
z@f{H*a)yZu+(DL;79-ky3`IR`Yg?Wmv_yV#?xRi>Ko5*z_+%pnn<sp-*0{Qc7L)~_
zTg{Kw(10QPY-!H@j7@iR#fBMP-Kp`)A=DNs(OOM~)+1)$UMvlXSoFWs<iEw-rK6er
zsJF4-@yDzpWZIf2`ze9cgcK#ux%0kZOLhzQm=&keX*vO^vENMF)|qoF7H(H$zdpJT
z<b+F3Ajs&z_u1K;y^=|sZ_1m}$*hnJxts_Es_?3-t82@Tl1lU%1SpYOVG{R3!da5{
z=CUQ8tB5-NJ}cFuO{86Zfv}P8h|q&XgmjAPPdaxBz@5h{s0dXMte$RRoG7gB7fCoh
z6Gn7DS=Vk4ltnk-H>BEkLrzO(LgbP{Uf1)8p4@hz0F;o)1Um*JEd?;^n*ZK4b>Z%D
zlu3na_C?<-om$GL+HRiQR7d-&-$Q3eWVtYwn$wqMJ(yyym1r=ka_@-ea@-0FJTjes
z&IzG}40M^SeRUutWx6AaHV;Er#B2H?jYXK1{J81)P8;djuWx%#Mmw*C`v*hq4Jc~@
zldngA>@W?S&bhqcUlJ1B-c^VOdnn$v^!!%#u0n?fqZnl_<mh@8*+I8pQwvD#7x{wp
z%Epi@{oQL`TRWi(y`Ji`;e0poxhzYVJLHCzJk~xSI;g*XoF8)H(`7+9<Gr9MS9-uC
zq0?6vEB4BP*5{dLtLAjm1CE}8r`}U+Z}m0AA~vjUfupXkJ=UCHg74kKUcPtCxqrX%
z(rt;@0wD_*;fuCeTanAyQ>?9f<y~Q7j6p&XyALUXZh#l<CPPi6wb~kD%}X|Cc-!?W
z$}>cD4>YiGYuorKiB)g*itgD5ocxD*Q1Tb19m91GE60gvENf`tJ2|6CvFB&7SGego
zJ0&b3)stX01@D*6F&2+Tlioc8tfjsfX_e)4(;Tv`7TN*l^OiPaPiUF9PVM}+2LxjP
zJ`e^u%V{<D<zrH#hOxA0+Q(ij>;U_*I*B9fTjn;)DoCSW(t}!sjDM%mBRYsBOu+nH
zEL~I{F?K}|Jt0gk;SC(jfDVKXd1-OVU~6g*rP0t4IvB}V(<(OPl;f$SDkXF4a@t0e
zc~^c`6)<PK)ExY1_Qgf4JSzPn%pmMMzRfY*#sJjkG=}jjrLtXQ(6UW}VZl`WSIcJX
zL5&6nvDHSL$m#g60@douiJ!lgsTSmP`)rC&eM33{l{PxfeO+z(m6WAeY1}cGIQIHY
znwLk#Kb$5ieT>r*DumuiAEe4^CefTpHg7Jauwm3CIq32zKY6`94G>p3GfV28U?}yT
za5-33Z|?lK0J2L49YEGeLZ^W&3)$KJF3?34_+ny85ihqZX7i%ai*b4FR><aSyvp0S
zEsaVYDrfUiyo{XB{jXeV<(GdPiZl;+5@c8ltX>{0nU5^aH@H{U!N$NpyZ6-0w79ap
zF2gTs)eM2OzuXQ$W-Yr-s!#q8(iv94r`QOOXM~)8r)bRYK4FIp;O$N)59j1b3r2<k
znuaez?w5A*h6x?pqH#B1gg32vlF=Ni!x#K%Xj!BL(~4?JrmJ2&mP(ah#Y+PuzMO;T
z6j4mba@7D5D518VV$D2?aMH6J7m9D?4*(s$E1g!x2eEJD#@{|9L*(U+r8TlQ-huS?
zY589+pzZkT2Ip2Lr$rV@_c5MUuBA|n2w?ay(H?yp2_47Cb+$nv8_hZIA>t>BbIpx`
z{s0ZE8C1xh65_QVbh1_wldT>!kv117Pb_7)c~q=;sged!jyk32F*qS#tXOkn@j!C1
zS}Oc^$ncuPf!E&ZqOV83ehP7O6ggOblPJ+={sq=1m{RxjDXgDhOoY4@T{iE^I?b1|
zyy=l-%}cpO!qY`=;4OY%F4M_#3jlyR#=at-&|6w38dv)>m4Dx}cGrlgOqjAFd<IWn
zD1uXM^Mj2?MAh_ap;8GiiEhU%?xSOuq06AqDBRcDn={;A@Lw$+3ArYJw*V-KTza-B
zBDbLg&TTZn9Ss`0GuLiO_nh^2&Jl*YKznpmOfQ_$_R*(qi{0&~h!JFR80RW<o0Qi#
z3_G1$EUjzX8Ybs{uFFP)yTWCaHrtlo^6;p9y2SPkCFZ?liCaMq<)%)4n6lJO4`1vL
zJTc4eMe=h5^}716H9m>Lh8x<#J{V62SSR~i0*{hi06of6_vp5g-++u8a5r@*vOldf
zW&!L+YT#*9DsPopILU@`k|H+1vEu$VuB68I!jH*<fXfl9s%$=6g_NZ)h-J1)?)hFY
zSVVV_h3=cCfRHaJ+Gr3uU1*b}Zr*eci*8`?y9&aru{v}EUL+@;ULT;F^g*;TY`PdO
zXW2&M&BIp`rP=HgOlfpY2V3R0I(5x-qu=T~9aP)5(V%BO%Kp5@hh>=04q0EZ#M6y=
zJB%7t_Ha8<;LO1ZPcHk53^@|!Ut6xg-Zfs+P^r1j8cxgQTzba?VK`OV<Ul&t2u?5E
zbM`zeC^I-1Fw>r`xRG5>Z4j@x{r#GAZvxF}Ed_n^R@sm#quE7aI1&yu|4>62q7!wE
ziX;-#=~W`6N;dQ@8TKjv?j2S{%e^-d#(7!>Uq#8I>9g+?5oO$Kjk#k0h8H6u+q5$9
zI(d~^{EMLB%9)cuTU@~|?eC=A+K;5<pKr~od9ZMv2P7-^7Mh&I^D|F%ccDWeCwhHf
zOP>zSwCmh3_~V0_qw!{MgPySE9Tdi@SHJ>CjY-M9wTBG*#!}1Jar^Pcq+BNJmm(j7
zg@{`IAmkEBEL7a8;R4VM-n&trh`GlC{~{gRaryvi6k4?)Me*I$l|+upB(HCfwAxJj
z0yP<S8dP`BLZ9C37}jibk(`mj{fUC~&576L6xQV4I6pL-58fH_gIRvC;hhu6Cm!~#
z9RY{j>dWctMTgiD8Tndf`J|_1h8r8Mve7R2rPD&wwUq+(64O6sFn)jV2VJ<HTirG6
zew=VTeMJ<-Y=S1|a-RxQ?)z$e4=QSj;Ye{I1wpP@8vqWcx($9%wn4XY8l-Mf60-HS
zHWUoN*JMGvN#Y*LJeeLdfrqJRXR^N?=td;#FmI*7Rc7kZ#x=}%%$vPf#1CTF)qjnK
z?axBvqxDIQ_jz{ju#9e|hbvgv#1J-auZv||v`tVa64WZw1&7Mwlhhe(+BjH%fQKn`
zUg>f3B2GqILo%ekcMtCwHv)q1I{5^!#ViAt$y3cm(%FSWJU{pMk5dx!C@?$%&Ws+(
z%L=hm22NtD<(~YF;||*;pu_s{Xd|x??YJ~tYOe#mvE};^Zd<h?-TYj{quHOE51fxo
z<Q$1J7+W&C64t>~oN<~+?`DgQ#Zm5C2kUn`b{w<|;T$C^4bY$$hRk4{7;8Lcg!|C(
zQ1j3WzKhoxXtmaoz!rzmuMAsrZSKz6oUXOpmNTC!8qc^bM<apA%shp6D6MCUUY<t<
zm`>me-_4nJ{V)2k8t2*jdzWz{c4*|J=^%zF0fPRe-FTNYT%~S@hI3-8)UI(0*%Fd3
zN4ft+yNTNt>L+I5anl)?RHoVVeVruJgQZk6zD8ZcwQM+3K!(<kS0$BCY<b1fFh_oD
zekw|vdtj>2me^XkLSM*TyVf#pW5m^ussljsFN_X{2hpX%cg*t`vk$$HD`tJJqnfLK
z@}@qLqsW`V)Zv_*{;QAHit__5z#lLaaZ=k7Dz=mK%DkV6Ucc+vjpovNeAhYCjdNDa
zNV8mcoZPNwREq&sz||p?JD!auj9fySkf2gRhVY~fNB31UQ4x}0*5)mSxmL=VPak#K
z0R@KLRCo2ei{HM^z!?fm{<W0qgE54+kl%Q9Q8DvIDkP50_hcexO_hKA@3OoW>k(?L
zQUBEk$o2~`f1jtfjW^aW+rCID#yLw~81K4U`Q)nKb5SNkf*YW&WyIc5@vQmMIyZBg
z=}6(>=INN<9nN;?N=)$sO<JzfqxjVq(0+;07u9FrNfZl7O^2A12MNX0!7`QiD5TLN
zvWlouOG$X)YUxO}%0@@AReOQ!_`ECl?7=U=PQ;1}$tay1vh6r8vvm|m&L2O;@6}=r
ze|3YU;6J7Hh2yZOb)I2~Nn@y*`<)Zd?^9*Vn~wm#mg9qL=zT|RvigtLpWafof-RT`
z{0g&PYk-|hG1Ks6C5*kosI5Ng^yaXbM{71wzZPgZhg0)<O-joaX7?M?^;v}Y62n#q
znMZW^K*&MU6lXxObgDqs@R8qyPl*yvLk~Ng&L#ESy>e<}#r~0x_s=Znt4r!hQj;Vz
zzCi27AA;Vhx;InnI4ALNCHNvGwX}cMsk33wZ+0z7(pFL7f}UObx_oc2Un>0_#oZDo
zVx9n>+uj>+OPQ_ls;AO5U8?nt;qK%p90J}@WSo3%uKk?4%FM(hv`beKJA#LF4KV&!
zvo80;H-!9~U1k!~D4`GKBK4xkjWak4p=>W=+FSGADF0@EHxuq(T}t?+jSY9#>Af(y
zzs}vq#@}SvU*BRgZlSJtAB*~?6MnHCm<}avF)@aHJ|($&mUc*%Tm|kud#nrk&Xe^0
z{UZ(+;|$#Z9jj-tt$3j|<-6Z<`kmDJxPkEERj~E<i*h>X1O7yXY+^`nCzWc?9cEUq
zs{3vll2V1z3*8kD2p4E%?G{;R*pvG3Q$hbxy3xESwoZde0}ReI#=xk|Z4qO^+I0bc
zS0Ae}n<za|fnF{g$xJ^k4$5U*A2QWi#-v4os~oEfkojJ@<#8>FvW1SMG5~ky2!){_
zo1NF&yEB=8qNON3@L*=-ppeMBV9Tv<NQN)d{_y^1O&02!-BpM_>uuT3_2JPUeLu*f
zNB~!yYGZ;!W%II|ul(JCM%8XcCFzc)%9hi>reQ}xS5?iMsmf6Weq)yzh3Ro))%I^Q
z2~y1hExoWv>`Es&MklAe`Z9y40S#?F)+LsZ!RY>i2JCXs_jJgXOY}5bsDM5exiDo~
z@=;=s8t-5vie~@XDU~W%fnKscqyu~)1eXPh)#JatkNmLvt-UqJr^Z3rGpL>}JlH$Y
z8LD-D_!W3MF@>b;?b^&G826{(gVB!~ukqGWmxemK1xQ43efnb83-qn|qReDX0}u5i
zHf^H=5jEFHTw+OAEQDoOjK7;d;!~){iBAwBO0o5Wlh{`p1VUZOTX0ZW80r)5DbX6*
zx-=$)Pu2zN2d1K|H17tCEL<$2bG*y39MX(v$xtE#y7qGd--(=Dm<X`-+#Y>2?-jO}
zHzqK4;8flaXp2$M7Bic3cx+|~d<=uD$NvCO9@tIH=#@d*86U!H!A7biD&DBik}Ih#
zp_T&S%>^nmQFUpPzwg3xMX+X&_%$X#ca5rvj;<Kv+gRgcL+ei30{AkY;kIGj_7TJ`
zb<|S)l1yEDX@8*5uKmI9^TB9&I<RZ;uoF^;;(d{~E#}PTn{?)_zuz8xZOGr<6kQkN
z2EFt$*k5bI@Q(SeNRD3w=X>>x#pr<+ESlSd>7)JbUWj>Y{<5hKx$9OYayTq2#N;OF
z@r%e`NMW#rTSWfs?;R}vdUW@}#l9I?d$ay@NA;LGBG_=`&pdUfgaUA!94Q(c*r+wL
z@PHqc#}**D|0f?PLeMk8B`hDyH*JW{k8Iw=m*ms(5VVeE$@34L4)eZ)c|F!<b0w99
zUbi1Gl?<a=G6NcNg$3pitk0ZxC9xM(vnXwX6ByH2h^8omo72e#@nOjdKf28NNHa^y
z7?_z8qPEiX@`jiO@#HPwdNz#Eg~}KgnBV<$%=9lxnszv=7ydzB)3!_0MBn-B7nx}$
z4U=oZBJr&z!C|a2MG2TY8pvu2E>fuYQu%$w9Dmn4)JVNGSBT4D0{2l*STREdG|Vxh
z5nGbAjvDypkAm!Sx)(zw7@?d?n`ZCrzYkq|{|eneA#vTS9^$`}p7RK&CF~bnqp?)Y
zH8g)~@<>21c|uvUBB$DSiP)_8P5QRd=OyRcOE2brofz9c`MCA2W%4OfOJr8)&QSe@
zPR&gG8&RwltE{O#3?|56L5q{vn?&EuqJy>s<VL1=rBjU^C?;?-H#(--u6C><<eTw4
zA@*}qU{zRi8Th`YWC1v;MgHK|R^JW*g%Cd)1$+8&#*5HdD5HfijE+xsHVWO>;lY^e
zOp9*oc0?D{=Devt-{SBG4f3&kL!h#*jSB8iDZ(*7c={qvUz<9-^RPDm1_~&Bd1k0{
z7Z?c!vb%22(^IAJAc6cEl?@)JQF7(u6z^{tCW_e~#?)1aksoj3J-*E5yyZQZR|UtJ
zV}8X7;*}8@5J*gSRb;#T#2_dtk972YmLk>%-OO~Op-gFEBE!3fIs>br@s?SlFgn?V
zd>2-hLGb8Z&74*(eQP+T0&XrWsvD;ng7C(+16&Qhy=Yuw<}mWZ<@dR+YB}#>asuC&
z*SctUD-SHxSyL^@Tb1$K7A%2G(=fz*Q(O<#!9YuuGfD=*w)$``#`8aHVuV{(yLH!Y
zsj!~e=WZ!7R}E<7G7ZPDhcue$!*cKt{Lhse@p8TwV*J_l%6@}ONXHqtZ+@KX#YlQc
z+CneN^Hv5kP%(?{u>4&ihr`A53E!Bj7_qYeL7-{da>E7A?2zb{^JY+TjEO0mV0$71
zY0Yb($@&Q!N4Bq+>4ChqrZwSKmXz1hMt7km+8rx_yVbY0vA=ks^N4VhU`iPte(zn(
zGR!3X*tNP(Fz>Q%kj~NIJDxiAwLG@yN%N7wE^_!Nw<ZP@9HSQPF)ri%4)#|5*i-JU
z7A>(WvFk77?n3-A>#QHIB5V?-!OUsR2g2zxm491?`KwwH`LEu1zF5L|V50rzOqgbG
z=)U7RiL0&~RLT9YLBt49>`iTRo6R3I!bch`u~AhrbGx6-4>7%fu+5#iUhG~y9;<<o
zZIp9Pr=;uK-X%s?xh8$AL>m=Fw0^UnPy*006Od$mdbFIViFeD}EAhOeov4$R)NZ$M
z|2m2%)KggCJ@=*U4~2l=PA@&;YVn+>d_N`BN=U#<Lafmg4|T$1>xEk2XLdZZ(WJnn
z?jDw#8TC7pi!>pV)ncX76LQTg4}&0M8FNmeK^DZs#KhAqbHlh!N#yD#AHz{#gT;6{
z%}}bAcWRyOEN`sc%ChfFlOR4krO@uuenf-kHD;!zW#u{z36ig0*Z3|(CxE&1zbXyQ
z<`!JYmYJQZm|+Xb!4xw-2cMF|k;s83LC!aSOx4ppLv%zLjN)hX`#rRE<S47qY3&T2
z<G!<QeEsV?(Tf%vzxYOud_@d{rh!|u?t$6_l=mU3BqAp?efQMviKN|{Q7!pw>y>=Q
zcBe9~EnmE!cZpTu{W$u3?18VW(05{k7=wuy>$&kNcCKASx9Ux_D?)r6bA5=;w}rvD
zVAT(4HwagKwa>LRGrgrU+?#LxK{+p5sFF(*K41308aIYolufLh=h&a=CVfbBrxl^m
z{<%=Rz|&avA{nF*V*RGh4xX*|@TWDa0|W=Bj&^kKhETk#)({|*T*~41n)llIOnQ3g
zbfaBUl{-`byMj{XC?+%q!{9&aBJ6OrIUg<~BI-8Y1&dE4@k788Lx=v!eb}2sq(-s(
zgde;md0{d$p+37b`k!65Y1pXnND&t_kCM#+_nF*qxdzP#henruUFT=2&O)Cx7#J5G
z>UcG_o5>Y_WlGm<my<eW*C~G;D8Gns8+damB;cw_VBPIm=`@~5Uk|T-6w9l<6hMQJ
zgM^uh6rh5=NUKsawzct^)lQibvHqiCa$IQ{Kci%?8z`@A!+%0jwK|3dth49t&oQB`
zlDIW&F#)&_t_kQEr;^z!*fEUFZ1w6d=?*R&F@}TL1=h2y69u}--xrU$f`^7nYB!<Q
zm32vDKY#v=BdQ$1$8?X=6fjRM9_@z9wTaBrm$E9mJr<vt-X`oh?Ik8tldyZ)GwU>J
z-JTyA3v^EPDiZU`HIOYaJI8z`i*0h=9_YOwHtkq_45R!5!uwb_0@q3LQmG_L?(a#L
z^8e-tmh#UXH~O=4ElkNCO+<|$s6RrzUVtI)8*U<YnlkljgAHcRe}jn?-A}@Ga_>6U
zB<i-;vdOmrFH>ABxq2x#V}u=CzCEZHsgiIOa7iz#6k)9W!J0b~F>oB5TK8Nwj_bMn
z#8frOFF1+pC)3p1(F_=~h*n&)cNWbE-n2}L(A;puU{%v@>l2RZPIuXBGz>A40jG0q
zUJ&gM+<9ks8Kn=WdKK9t9_=+54t~|XK!@3^?KY*9+&=hN18%-xYmG1)A!F9=R3NVc
z)*H#?h^-|>9{XM#+gyyZYWD;L)D=}-$<Cc?k)al$o+`pC^H3eTJbwoTbq$DD^HboR
zoaW99BbpN+zl%=#nh3($PMD>h_P&>h#4<zRb(-@N>OQ;|SI%^)gVx~JxB|iVn{GLn
zFq6!|ueYYmK&eX@l$6=(F>GUd5d|G89=%L{5Mna9VYcS^r4`Km&7Z)n&TQ)b#@SxB
z=ka6HeYBeCs&qyfkkH((rAYW9FzED{U}vz%&$p1ISKzoyB!b14iWO$GH%3Hkuz8<+
z>=AC0tnPdoCj%P%ITVkXF(<w3>DDUjw%`NwSYVimbNn?!{$mfIL-9wA<Y@SoIx6jT
z|Gtc9z8|st<Tj@1ADFSA_ZZ2dx7JHta(8gup{j*|)3$Jp&WEK5!%LN;)(<3EIgTXr
zQ|83U`$<mc&@vKR0`CU({+~??Wk~uVb)d^%av9BbeZwh&Y9QNryVtq<9SAoTBZAtE
ztNJjn^XrIw=Vb6qN9&<lZs2vPh95n&!(rl^#6$Umd^DK|<I%7oJxT~k*`I`UluPm-
zbWbM<bw^_c@abiS=g61I5=9s;Kda3)Jz`6f`un8+r10|0Gtr8Mn}=QQE$BiwWIfk_
z1bDTvZin>sr`B&)s4VXD9XZ`E(<aJ_fz#w<f-bTjGF_Xwtvv4AR8297dF!AjdjJt*
z7XO(#OSPsi=Qo*(kZNC)ySd@2&1s4TzzTxreJouW40NzC_&b7VgaaQ9F*1bcU%;~-
zr2~t%<#A}5e5Pdj%`3mU@BGOA`iA|6+b}<0y0yxw_vV6ZJ3yB+t)&mkwc{7hubtQg
zMFxwVx0!nQ{rG=A->zicgKJ>`P0Jp8A|9msm)0ob&$8jgzk;_cidi8jj@5RK|4rK1
zR)i>_v2V7Wr7pgPdGq;ZZ9O$8wiVPQP-4<#IQq`+{cG>#{J1bqVTC2LpY6p97g(3;
zoh84*ystFaufq*VYP4tAn;(*>;)O3%^HxS1E(wA}fDl94^w8NOGxALV53Zdj6ZS`A
zlB^!k%jju(1zoJK=3|%mmt<&ZqV!5eXXP5&WNe(%^7yt>c?>+i1J<=9W2wHK&g>Gb
zj}tdxM+0j1W%G*e4JT{|LMUqm7#I&sX?-SXzzLUII8Q2*Th@JwTkVb_v21GFD|1G`
z==tJW1<%y$Db#xux-c+)Nr>}i{~F_v(djmxbvcsP!@*Oe`4@mdAarZS?1YO<%TmC>
zrIIiW>rj+AtL?~UulI{L@Q_6^aIUs~F**rh-<I=mC~$LxZl-^wO;|avyl7Be>@6$l
zE!oOkSE=6n>e2YWrV!jfgMyDtfxx9H*=1j&*88*e<yY3^Z=cyumVcrJScevOZ|%dp
zh89N4y8X|R>UUz`p8V8;mE>&p6ul910dJ;!bS80uhU#(wP@)5kozAkriCDhTv)av!
za}BL}WbM8ZUkxdj&}A0JhuXC%)pw!J8bVE{DBTk!#gmT4bjL2k;snuc;t5=_;KjzZ
zZRK#Rb-GD-Odlxe4jDYu|0SKwS<9D$&6u1H9&-KD#1}LwQ|3+$Vp?y{Uv2r`UCPr8
z#W5^jz|J;f)G9?q!j2fc-HsCf7>j^w{`tBd%`d%?Bk5Blp=w6TU=5lnw6!TTqU7uj
z6I>VdoOC71S8&0HmlHX^cmJN0^M|4y*B=FaXWr_ms`HAJ<%oVsRxXHgJ-^9c(Qpbl
zRfvwE>(lR2HKei812cvD<(}#|-Q)AhZiyXr53EJQ5}MTuRX0n8@!x>IpaH8X6^_xA
zYv(l0xob(EZIIMui~`&;SpJ|VKB|lU62rc8k8xx6PGU}h!F+nD11^7Hf$MB(^t*mI
zF3FC{oHgZnkg}yPIK8gq9i`b(@W3@{4>Tl6&ePV%ow9}8M%3u{p%5J9DBtmDJ&<7}
z(-{~0;+<6Zs__oJ{lLe|XH7p5)#f1W<f*EUvUhb>{vd6l47p5xDJFBh!vBQkI+nIU
z=RxZ?jjMUUZK8W=`Y$6lKtK;r!@toeWj-RXgwf>kLYLl#nBb7Wd*?DzZzi8yC=Z(M
zW72onJDrA)0eeUH_Uo&o0(i6WZw$>1Sa%pEkz2(`Jh@fM=#Ok<Qhaq{o{HL6#n#&n
zMANxa`DHWafvLuIs6O{}RK*<|&rg*O1iRnERZ`*D1-AL;dT0HlB7L?8U0SDZ`_VYe
zqa(At8-tnp2WRj_Yr&DgfeD*ZnwQVpLhqj4Q-cAxCW$~sGVCw<b~*gF{;PMVqVLvc
zv$1`M$5`skMC3d>6v=vuTO(^{Wo|Zgua00R%nPo$GudoSq12G)R2Ns|o&Na|9q>zh
zroCavX>pHC(twofXRdTe;b~9^Ta`_ft6+@5C9~6D*8a}OyfN+T63IgSyWvVJgDusq
z_|jt+IMyr43)e$g$fsJ$>qT!mP=+%fVh2bOTbPLd6&*uy$CCShtrl=e*?DPy#~w1y
zf8I(EcJE&r-Qx2!j@1xzujreZJUT>_0L?bax4ma1r<gyvuT(D@(QoNc@_5%KAh1I^
zurt%kEc3yOb%`PV5N)4Na&W<uT{t-7DZ|7a!BbBqp!P-Or@=~rcxTo(6uO=-GQ_rv
zxhOBGhlbzNnQDYgXE3IbWF1(uFTE#I#D{IRoRY?~A5b)vfnf@^Jq*NK0<~S4A67zN
zAsg03DQ~5N`1OEionUW@pFSlU?&Gcmw#Rai>kK!to;o?3PcnPL>k#9G*}TEYnfQu>
zpppfJ3j)_)%M7PDrd|Z;+p3z$B9`S&X5~h6!7$>t{uEm|Foz<WB%WeB!NBCb_@r@K
zQJ9@;)R_2#pDn2UEzP@kFTFYI#TtAvjEtq+ayJB^tA3Er&JfMP9i0>q$!~D>0mHt?
zD_-AE>0R5fGR?{zAe1JY7do9}()5f!c#ptf$2c9OsP9rc7I?1=?A#ymU&LM{khh4q
zqFE@K{cgXudYkoOgKMpnvxHs^l6@s3+@5!D@7E@^$>!Ks_FAFI8}|~)v@Lq)m}NGc
zeNZ=JB2hp*LtWFZf>JV@BTKmMXlXIg)j_|+GU7wUiJRa8M_$cRFeu4cY`)K_W4AsY
za%IK(0N#7&J8E?VO{q3*9h`^9sz1kx(l5>O%Am|@eVSQZ3))JA<#-Dm#yjPA&n`r2
zTh$3{n|qSJOgvb<)IB{KlfWeA`_X3J&~WYoUFpKzIQV=}wIFxOF;8p%HY(vSmN4A7
z+~5AkxcGTfO`AQmTLO~@j?T)HSTpLP35Ks4mzDa264-8fW6T+<H+?)$9t4aE?)CMR
zQwv&t?i+oB+J!70>}Cw2?<XMazTa^IYvwvn;I{FTFgan@@EGXvw4y9=1`{E++#7tm
z?wM=Fe1K2WR{x3=KBPHnFMDlNk7t-KpQf@1y&IW(8_t2*8riCQH?m4Kz@n1U&<`Nr
z(&vreGLCm1byh%Z2-$QJtZc1&UF7q5XIKFkQPEctLrj?91)|_;vJev7ePZ2_*S=J^
zqKAB1uMzMdBre`C{IMeixgOc9#(Q9r;fg>fHjBSneBckOvei!!UYszq@b-tz$UmrT
z_aC|<4`Ee?9$=rNZr$@O@A^(nI#n$q`WfaZH>K7*PGJUmXu+f6XLknB-b<3^t1yC>
zi$`h%^%IY(uNlik3?PO-TS(V@0Ppac*s}6{$G@xMT_RfzAsk>#{e{aic}X~gQ?5+$
z?2v5~7<7SDuF_kBZhvz)w2Lpa=aBg_jOJkPDf2lP401t@j&IO;cL6<b$$wd(aSCe+
zKM*JceGh3ya{*ZRCAbfC&J7<^-5=>I0ULe~wWjVy|IPwCdjzN10tfeN!HYHQM*n4c
zw<h@6)7ihVP+nY*Og2CS;UY~8E&rz^#dR7XeLv-J?k_dR?@VqvDeK+d+kB9(r}j~w
zBSit@tjoJ|d>eW8vIsa3VW*4E)>H0*E0)pujj=^Up7rD?p;~?G;Y5rwD5$JZTzlu_
z)sf(8PEfz5LI4~Wf4xyuo<F%k7p;<9dEB>In@?1E@eMPo(Y@@6k)JWm+S?@C3jbuq
zID5OafAL7Boj_Z3{79CJs!_9{+z2#F&}uKdFYSv7RIp{UfDf^>(=YH)K{4-Ezik^>
z0lwYseY`9ySJ}l!m}j=eSz2{si>kb{h>a;?7ci|_@mtYIfL)sJq(KR%$ixmtS?sIW
z5bs+1w6Nmu?iKln8rUapyzE}obIE$w9AG-f6h~L>sfQS*=MHLKcu<I4ysC@xs-}vM
zG$eVrMDH11C82qr%oynLc#vzT-~aF_cA!FG^tISE?amgBZOWbGnKuwP%zqW)tRrPa
zpcfF&I3K-9V6}_%FD(F=f+=Y0VFs81Cb6b;SZJqP)T}gks1z`6^XqJ#$d&ur)y@SM
zh?77vEBE`bUFNIw!Ht+L9(n0=Db4lY`~yzVimQ`drM%=k=T+WQ|1fse&%%wlYU7>z
z!_?VrP4u#Yh=t6|YY&kU_g1bup<ZF?7nmM*d@k?b9YasshlGfWhJt)6k|I_i`E<*7
z<vxvPB2D`VqeE4!1VNmpkikCF$6L*xd;EWM5qCr01W&x{@Q$JQFC?P+h-ZNUq#j#y
zzsqVe@wm$J!~&zbi1MujQfUmVX077y!Xlpqdf1=I+>qDRK8eH*0b@)z$e!@8u9Z96
z>_s8wHh%>jIwAP1w)1+!vyJqU3_)Hn5Yj1<-1K%wfpxrs*t7a=^nd%<p%gu@NFXB+
zK8s89eA~3YW&%j@$LJ_r#GgBLVGR73o<R~h^BHwF`DSs$op@i3!lVoHKe`D2O3?6L
zAc%hC`Y*(@Y&YEEa2DN(@)T3O32jcb@9IyRoXB>iqd=wP{@+AMKJUbdf8n0kSEom_
zmh?bvf}O&idB0hD??<{8;smT582X>cN1Jc`>z==B;_clu|Idti$3WOKVuaCE>znR8
z>BoF;#>SkE?54w>b4KAE*Vy)^ffV*YaV;bVd~lvfbmnmIl#wg&d>My6TEG<M7-RS$
z+;GjO6|-+n$-d<8K>lxr#^20<G}IK+5vW-!QR%<GyTf~mj?jI(OJGW9;P%RMshQ+Z
z;elrdIu%!{V+O~l6>OW=DAzkZl#w~`Y(pxhgCDon()7HlaEf2ugq@8V{i76?V+^0<
zajCI1nH;b~>Q4=l|AU#}kB0=u%>*PEC-A%feK(#)m%X3OsO#N4$+Wr;B$?_u9ko73
z)&=cl&~y}v3sO^mr%q!|TcTD9{Qf{kEUWcC?!~Dw0V)p~sh){(M=$ST>q1T0viAdE
z)VEprN<>8NiboFA7O!{M%4H57z8P*+_H$ka3ap@gdGAJUlLGo+qE48U5MXRZE6mQp
zPu_j<HNJlbcjS{_+rq#jd<~-ut_7|@^wpiyrc^_ij}Ogd0+ey<_5ZocQLhncH@(TB
zFDdEob(|eHvf0LosD29eBs<1W^XBmVZ)~V~fuTg9fKBPw;qdkB@Ewc)d33yx_yz`|
ze<id;`p2vPd<XpR4o`(IIo>Peo=>{3UIp+Q7orD<JL~2d^~un!{IE4Y=L>tDoT@_{
z*}Ece9$6-G+G0p*X*s!~+f+D)Llj2BN^}rjut1u@we*8M@V_m#{A<-^2bKSi9|MOs
zM_--TD47RE(yn$G{Bu($Ka9q}TXOA358dB%Vhvq?TL28#%or*}HZ%=CTAlrxwm-6%
zMcb5!eNPRaxV1HA()1z455!wlssr&wkOfkd827|G_pG$J%A#s8nL{JQ8<N{9Uo!u1
z`%?*XkpxW|8y|i=eHyr=c)8KV`>Unx(>WnVN%3XEF77e@t5gz)3{j^2If_?t#^ruP
z=Xcc136l*Pv^!@Pm$lYg9yhE&-hE1-=zT5%kLl!q=9b<Fnl}6COQJNv?#i}(!3GJe
z<mN|GR5VMrWBEOtlHmsLPz>%7YWVWu|KZV&_MLEOYPbdpOsjvN+hts)-69%;mR=&v
zY+f4Ee|$~SPG|ej<4|UkoG`T|?{<nm@R8&wxw%$dW=mn3f`HVB)Co*u{LEfZ>h02f
zW*mMz#aa&T^EoD7Qdz|99TOL+bm~3~^n9yISKxSbb!?d56%C^>NcZmGfTDbf6F46A
z?EiIW(b2VYNZkb%gH=7R$bp-2eQ9UO;%e4;Yw2<kBXK}rGFCCg+on7)aQJ(*-&H|*
zYk8V;A(hl1%9J>-5+XHwKH7IIOPHnb|89K$vc+>Q1VQs#BL1}h$BJHrSA-C_PY^*i
zlworJeUIvYedQJi|Ar43=_CK+GTZ;S$ul$zTSONy#=%L^#eWv8{x3^k9~9s&<%bO3
z{y%rP{@2fz&89<)x`+~a&GCdtDZVrOf;)vf=znpKZ%DU+;@;LF`(902KVR~PEt0p)
z_l{8>Uk+J^By~zoL5?Y`_(o{|2NU|kt_Y=&1R+BCIX~4?VGGohf(l>bx@owXf4gDp
z5-&vE1~67Lti5`(cEdN|wq?V&ADG<^g4lAVfHVkr2ij&o=GRv%ZUfA|FR{MRt7t8k
zgfi(!Wk`>Qt-T^5esj!=T-#OHYbM}cC_52cAp2g`r=h~l#YNUqr3pWSB?(!|`2S<<
zEu-Rkvc2I1NC<&Ikl>Qw?(XgqAi=$n;M%wb574+b?(Xi|xVyW%OMtgCGxy$^dFFrb
z`-R2oKI^P=s&?)DligLMdE^6gQe0?+`qmot4F&2WV9JPJQSYVXW#Yy+D@m!j3pIHh
zw;R7-5tGc-nyEgRi5NX}saTqPjut<`*hr+JqU~~)kXCWb%|=nc>!a6>uv~OQWRRRR
z{rfSb{}mwrGh2e?2&|p!N_NFMpyHB<1A95U=>ywd(#D$QfvT{|!yE&yrYseW<V7-8
zUeN%B;Ak@4fhfQANEOBKS+T$a(o>GSoPt_8VG&_c58+U)iyFn{gN4}3xuCG2=kjTJ
zyoG7K#X?IzzW}{rUY?F>-UO)><h<H%gCkfWIl1{#Hs+kh<R!9^Txqx`u>gjIC#2ky
z57r)^d*0-fw}@aeq^%BfO)gAOYqDF!2lc(f;d?6|n?Ol5#1MD;97oYGt4%##e@e&3
zQP&qx#~(8es4`rds!-BUGh1S4dO!not`wWAj>Oz<<6CIfD0tLlFhA)R)v`oLrFgj2
z9>+7GOAPv%0&+4+6&<ZsXtH`kBdDq<J{Zo@*VpcY8`c71-u~xUZ(sX7HZwtAw&bO@
zux4W_WGG`{=C>|5{tlkSbgC-}Ug1mIcXHiSYST?X_TAT(Bd?ZkIxFfVbuXrc-K4<&
z<m(}ODtrO!r@i|1GxZC9T9RKI(YwF4wjYuE3G&TrjZ{ab4v!lIENUhLZg}QuT?fgr
zFW^_HqQlepDf+Xrj5<tMK0t;_RizCfmtsnZtE3I9!*L%u59qfX&I@}obqZ26Z(Lbf
zUJQLgu~Vov$#*#im06TR`@%gIQTl%8*n{s=>2EBS8Rj>|2e#DF!#1X_3=t3!qqT`n
zW5wc!?Nushc8k}##L6wF>93~)2Cf227DfbfYHQiKfLEckO%Dbpe&6217(;`2;Bd5s
zWPVdZ%EJ9f5P162yl%J60t;Ng8uS;-GhF~-Z^B@fk5%VjP-ClG;=3JMpc{8fi3V<w
zrqMPm7qdYlKs~ntiws~|`ELFp-rnx_)B8kE?Ep8n-If!#iE;X<#}o@3ezuW5Rf;iq
zFHRcx75eCgx(^%|M+01+++4<qY8OmJnh0n$*@Q)+*RQ-+MY6zesgK<YL@Tk#1}4hF
znd3SpJ-2A3qMSVKgh!4G#8~C3NR4j@ahSh)pnPjWO_7QAC;r*5jLX8bs9YP0%cE3l
zy|7;~G-P3I9sg5#cdJmLgt;p8GC^v-(hKs%)Y3Bg?2HjQW|`fA$$ATBj(KJ5KaGkG
zN@$Cs*cs&lU}Yc2qX88qCEA4pMM%$7rc&bHV&=VW?X~4vzNyYgY6%z1WGg$1`ZeWW
z>ZDl9*kN&=4kKzW(DJc-ACRHm2Qy{Bt;z1nzWS{6<}Z@Jw+RflP$?wwn2qoa`F5`K
z%+Q?``u=7d`QVCIjO70LTa%W60ZfuYaueIANL9@R+8+2rqaiUVc}OUx?2sI4Sn*er
zu_Mg@sg{|lFL{Fn_t8I1i{{H(7s>~_@iY$yHMho+?6|INW2>iXWTfS*L&4|_>x#?q
ztjg-ZUn%oCbadCpQM}aB#!CsH^Wf9kDYgD_?)iJ+;FvVCh`t0y02eo~+Ef}pc>8FF
zyOi(gc4k$<j@xMH;;ip0P7|vXzh|WmF12P8pK|WfR{=<`7@kaqI@ggH>2NvfJ~Qp$
z`a3$B(g@Nc1pqg#ta?hbf*6Jv)eUh!KAY^(K;re|qXk5z$>V)M$^G1@1W6<-vlzkW
zrn-}8)VWnCs@b`!a38moCQ5w|QLDFj(W8h1<55lS$j6P1l{zV=@Ey3DTl`?Y2_Bck
zF5z)Ee7n-Yn8>||ieNNrrA*IGFV1}?klT)$ZclDQTM+ygaosKhb3pnv(ARB(G7Q9S
zYy0Z=F^@!cL#rJ9^MPbyJ@NMgX^rgHZVREQ@oP%jx5799eC8m_;uvnx<NU665IE(@
zQVaLuqL)bXnB6EoSX&b~-=NwGRMSXaAOk2BXNUfZiz4}8=W1S4!4XlTJMhp`M%1mO
z`IFDrU_C)`-qmF{kHQEB-&m#sB_gr<nMZ(6{|J%w{bn9=Qr96hDK)?B#sqOFQ^<Y~
z7dk3|aiJZTQ?-*G1OEOD2Uf-xPi2j7R-CZ1NZg5D&ZCNe29`;#EWFO!YWMG2?xC#d
zTG85ERVB~-ixtyWX)u=N9s9yv^HFZlU#FWtE%7*>WwWxeH#x|}=kxe6Y1lp)jkNq^
z{bjuP$g?&!&tQ)^YBNBz1QaFC$}-F_+UEi`n_>)u$yf;NmVL2PuH72H#!c=j92X?q
z^M6dY?}h(SEc_S8#DCO+fktk2MecX;Asbp?ZVB5=yQc5d!qMHeBLSHH;9U?=03~8w
z3@fv=2JS*O>Ihzwog)FJmE66DIW$7T;3ibGYmc5*Q<ymA+xWFHmt&0_L~dK>1NnR$
ztch4^3=}eMy}aJa3<3L$Zo}fv<8!galG@J4C+)B!IU0|ay0@w3yz=F01ym^`Z8Pky
z6(CYdaa~-u9q9LVhL-+AFS5CY43aS)Aa%(HmMK7k$Ez9K;Eqbi;Kb!L1Wjprn=eKV
z&ZkU?EzXUTN@0!lxP$Rx#CBb>xWJ4^{8S|j^5<k+?i3dGZQgwZ$5d_2<C`vebe@N)
zFSYiUgzC-it&>by*Q^G{ol_N+PK%y-xUtLr9{%(V?iJ84{Pp!S<~V-THa~4>;0PFn
zl9F=q)38Y!bK&#PusYg-n(sT^v`28vGr|t-ZAg2%#Y4nL8Q*{eRhyO99F)-iBDK6i
zxRXt9D3x=tchW|qF;|JV-w6n$=o8cJkD^k|#ckC5oHtIXkGdRB67s9Z6T5_94w3;Y
zV>=&x+0f<rt=hb3nH<K{oGMdi<hUrhKsAqxfMo$?`H|dBpBPM|Jx)U=ahUV5vT+N%
z4-Mn9y{`>>CyG(8O!_mN21A(&Xe^mBQ;k<*)*XiOi%(E6P|>OV-OR6N4jqg&!^Q&<
z2BT1Yx>mE@V4Q!CQ3XQ?NpTYQ>3g7U=d)ykLsK&g)cSMEy~^(qQP^&H$C;b#Qm^26
zMoUW^A5kSW%?r&wv^Ij(b)}4^zr=wL)JDds%LbFfa5<EIAxeYiOXy1GLVB<7IpU=z
zTYfgnHDZk*b4Kmx@D45(#+>?kA5va_88Y~fY~;^}@5o<2)jZz3(8Ls0$yFX{W|47c
zm_JZkT7>TDlZT}#?jjXB$Q%c)jWNqPZWl|ln&n;nnDyk)h~s|ShOe;i!eVLe!85xu
zXC{$0aA#ny&=`U|O@bzFsAYl?+Jx`9bt(%*U}r^Mk?I^gH?+2xYsBPK_=Dh{{`nNP
zUS@mfnT=}{o5yseuz9z=Y8sz3KV92tYcA1hrqVLvyDFDC0(q~LEG0kf=Uz5K&mGE|
zR#h?){4Q1_!K7^`lSRooA6rGuU3r=iqaRGmvd>Q$5;x3Z4QqGPtJ3u0;m_j$gDbiX
zXScE4qS>+xc>8~-*BBK~lvBv{Hz*w;ur5)RBB!1O*&Kb}oL}F;xU#AE@HiWqm_R2*
z6-+3dxQ*gd+Gh3qLtp!^$M2QTlXc9%dd;?Yb#DUMvKN(=r7#JHM!AIwPu#qn0&Zlc
zRF>J%fI86qDi5X~l6xZ$kCG(W9~@`y#lJ$mGh4#}#pZojnr;&%Zr49ZWgLW8(HlQ}
zDiM#7RY0XV$3dN|d#fmmU6x%eDTPBoCvib-xCMs8VPaXCOZS{D>QNIeA&{Bxxt<~m
zzn7~isYI12WYZsF@D))=5t1#1a|llepRtT3Yd|DzoUGWOUU$VJKqu?jP10iFIi2E)
z2Jbj?91o~}3d4^k+gV{xBKrCAhS>$W9%?-*YABX_S0vd}A{_4$BAFIehTcqW`QFwB
zakoqa=HY;nOXiY$8yVyjp6o_f+cqQiD$Stf%8_Bi<JG&fXzEuV3x57p8S;XWn28?*
zVf_;02ugE$w6Qo^dCa!NzrpNq#8Fb7B2LuS{DDm{-j@um%E7&&E%esL5+i0E6J=pB
zg)p=4LjBf3EsDi<tvYVdCv`kxuE=75k%<CU>zi`cy~9!flJLYwPI{Q}=HzN<YakDT
z#SWMqGAj2*WbYT9!UqloILWiHNU>4bYI^9HjioZz%g?jBlv59$*3PSxJS!D3y*&L;
zGd88HD0gdE>+~u;tA^AVzbj2dTV_a##)zuh`b*5)Z7C$J+bAtS1CaTK4q;-2taZ-p
zSkrp8b#5WL^{$(<ps3`uV2(_HiA)Iji2EqYq@|!VU+9~RDu%p_Oh~to*Z8~vSCA`H
z1ykkOM1y=_QjL}MY#t~z66T0)v(-z^8mP8I0Hw&yn_{Ma5q>_OKi=K#!*E^cG-QW`
zM24K=_y>ixX0f3KNZ^jdSMd}XZM=uPy;`Vo<ivP?J1b?OQQw^-re_`aQPWPPs`}~b
zyke%<Tl>+VXu0?J&mfZVXt2T*Ft;gQE$~s9u;eHNMH-8?U=S3uWEu>)^A#=4r_`WD
z<LlER&IU;4S-z<HJ=z>8MpJ&u!z1G-Q8HeR`hp&pgPokId3fuvthGvh%IDmv=4sU*
zUwMXf)#4?g&`3Y^Tjhx0@|h>uXeb*WhtDzFl4Y};LGGJF_P1D786~q>JCzzXU;whc
z6n&FjE#8gDm?{));7Dr5>e9vG7Qv3z!_7)Eg?$_E!)58A=nXj`QPxrVQh=kKX0K5?
zlK&gsWSzEY3L{WtCuk^vqsQDNRQWaig#u<@$FhXsJ)4Y&#{1#IkXb(Tc5B3hzhv}!
zYwz&g+LVx+wT+MVE03O!2;*}mJmhMhR=IY&jcAEYnDZb~&mav8xYe5A-z!j3z40w8
zw1pH$=Fu-myW#osS`jpVV@hgHQglnh5gXjJsgvVo7?7uMs_e$o3mNC?M;nCME}m;e
z#`w0dd0auNf`rYYn6SujWztMr3_^`&j+`5=@Upg{Wo#f;0G|duK5xNssM^4#8ijB)
zzH&1x6O`DmI>rPrDxI~ow5;4;ri<coO^{9Co@tre)`>5prlN(iI9B_gd!8hgmLOY|
zypGyir-AImQklp3SbIgnH9RlNn#B~s`NGGx{L+%O0%S~<{V0p%m1WxFxUgzrR3XmW
zO!Z3OCURFIV4$$ecrZZ0@MsGk3qHITFAgIv(Lg6{G=(Z^ndmfnu(Y|)X3V9;`;!H_
z-d3pPH;lhfz|hVD+Q&b9QYtS}5&j6L>{{J1XKi?>J0kUsI1W$dUA(^&=p%sSHt&5p
zyQqN@vEgHtO{>Gxk0qN|v`-6^!SYD)PF{{mtQx|%stGUqvRO2){I%=p&DWdgcKv+n
z6QNR%%M9_@k6p4u#}Ba$+FaI0RRcGHC5{VY{BH(kvWqDb9W9fNTxvv9A3HtOomF#D
z#~lo{8!!m+Iq*5iVZ(lnKk3XoUy$~0)DJaNX>Xv`I2cQO=*>fW91`shJ!D3*B^bQZ
zkF+zBscCjN6_QE|dPm0TF1#ig@(U$C(tguu>!{QyX^6deS6Q?04|BBH>yhjqckP-7
zxLAB-`b34uzv(Pqd`%y7XgMd6jTDKIgnNKn{`*@b>VEp|B}1a+5nH9np(mApS58Td
z+z7rv$h%y6lcWKoa3ZovRQ(KjEN%9p3Vuyy=BBudNlCovmK|%CUb`QkjAEnstYRv-
znrAqmI9PdQ(cWv$Ebqgy(`U;RX7kMTAD+9hz$kXw?@7rTp)K({+PpIST9*}wxkHLw
zHUeEiKPTQ10#Clhs0{d-ZW?Y}+0s`+@owy`jy6N!$9n_%4>$o&CI;m(OLJ25z4cAs
z%9mI>xvn~r=rUzSEjwG;9*!K;1R}mPZ9K@VS0gN;g~#m#E|I0Mh*<TRGX7`Sc7@{8
z=>hCzR?SdTDgNLVxrYY$-q?S~#e~mz03N4|56b^a(kYgu?Jyc;vy@!WTHF(~Foxi<
zTqSxtHj;o1z?3mCj?sed+&F(>=wsn|0ahdoB&#gF3$N%!gC55C@1yWCnixQGyw<c0
zJ<7?z;nWdWOxxsDT_NzbGf0x6P|+UEa2lnzk79Hq{<bUz+wDoT+i&3NaKeZmpKLu6
zWIJs%l8N4#{TnXM`lm$BonDKdJ_Bx8zM{#pPn~pFeqID#N@0_>h06n&G%P5pmOQMp
zAYKJrGAg&dpNqJ_RrSg(=qh%HHj&EpDpt7S%jEaE(ksq&e$!sKuGi_0Nu+vIrRC+N
zPLqYRrQjOS^5NgKDPu2KqayCayx|nu==JP^f?_5xMPm$@oB-uE8yR<aHfKy@omEOw
zx?M*Dmtrz@Dl$`%VX(BJ+vYBRQ!bj@lX_iivDq_OwCCzbEf^FQr`cvB5-+UtCKjrb
z3iZfr|7Y=&(E)+>=^>v#O!p!uQZ24ffk|WyN`{%(K?=jR%P)rASI!MLSerQ+mD3R{
zH;Em&mnZ$DU^hOVTc~!G!amgcc0lE6`|0X%(&%El;H_?$Rj?dFJmVQb9qau4pnKJF
zWEGm|VyUmP9DyCKB03J%L8aFGxm>LH*6@)gOXyy~%}i-R{KGGm{>N)7u9g}NJ6GEg
z_lE*g<7i%ZZ$T~h(MOtsOL8u{f>D*Df$IXwKA4tyv$}o3g+gQo9{1tahEqkr&S67!
zz7}y7@drrgin#!rSl5?mF0*>n>+HvOQt=AYl)GMU1E9kI+m%xC?>5SRX^_rRpQ-tg
z26gnqSW&2;LMt&dU%Yw`6fhkkbpx$y7mgLuEv9wvB)YvBkj<InWh-~~Yfdhsz4ww*
zmHO9RgN3(-iZugp<de&TN_Ah%TY3-JIf0y(^XLLQ(Siza4lhZ_zeklPC~<)+Lurf}
z@-c2vdOBX{d(Y@UpHE&d{x8xvHZ$DD1W70*xTcNU-yZQsxWj=t4p2yE8b_yqCHY+_
zI<$?}rSB{~gY+XMOjjkdXnZ<P`DHnj0^^xYQz@%xefg8<vwDqj9^u~<@*SLqztSFi
zJ_lI-1=jc@MKin`YGx7{0CDv74^LlAv;>k8W1@21Re0I+u@hOJM)a#@4=gPP4E9Fr
zQ07n~M0ooJ2I4#pgE<|fYc1wLe9anUNG=Fcw#;tlNK(>Lq&Y&OP-R_+75nJ@e@+lk
z30P|n&Vj4i=oWeob=I|#L%xn#NYWpjSE(}VkK{la{>yGYYe#rj=uz@>1RODOr_n^{
zc`vGD%hV@lsKO7IO2zh9lMB836?D&;92|D85l&ZB0P3HI&?<+khidmc)vTd^fnNVx
z_19xWOSd;>1igP0{R<`FzYPVO4(O#jKM6E&#s9BM8L<&;MiV<YsgM4e>_)UleECRh
z7u-}~P2jF1EONK#>O{YL=%N0^m30XJi(N+lI`}nPR{crn_%nsc?7;X0f#-Ejb4I!5
zQ^XmlzmIpO#2bem>Z;gwbm0BPg!>R8_(_7@)LlF>SZk*mO)j5#$-b^?Sza~2254wF
zISQ8iuqfeJC^Ij;XksYL(tID8Z{-3T&(ztn-?`wE((ZJMatbSwo>8NMYxWjmVr*&e
zzlXYqF@c#olE?vDaw|M3u{-3?o>?9EpU3&9SUQPe>l(%L%+<(xUU6M>tKd6u{v3%b
zQb2Xtp2#@KSV4;jx!`WE;9(B`pLZx1@wT;fq*M&}&-eRhs0Z^W?q2z1<Nn3F|C8?m
zz3X7oe1<5_|90s+G#?Q1JQ^BtyxO#`z%?NP;`UWZZ2!G`OVwuud6lDBDx46*fpRfe
zvw<DuG8z42DX0OcWT%w-Fdg6GpS<;BULK2JxFk%Ae#}2;7q4|WwSh7IFJ7ZPMNk5w
zTd^x!VeZ%`K`=3XlmuzIj1>&>+X`|BjYQ^JL?xDhp8_E#A_a;W2lg>tk*ub*>J?>=
z=L?L8Nyt1R$3OZsBSf;C4Cg_@V$R0i-CC@q<-r*cZ-k{n#L1<_m-1iN5Hs^G{1Es;
z-E-R7ElTQ8MaD~8S*c0R(g;-;bMerO|Hqv9uT=T>Pn?O*`tnxXKHj@B{<kf0C*<um
za`!s?`~PX*Nw#Z3*Q~Ql=*54oOaHx|p#QAFL&?!^Z%E<)k4x$P@F+5Im;Y@J|L)hG
z`9dp#XFK5g|8XgHG2HX{6UoKR!KQDGn@FSXy2yenuEPJexS<AkZJZ<3k}id1$uYC5
zPNtuqU%lGI%EFgFCY6_4*i>w-q^wzeXlmML660>CXkfvd%jCimy~HJT_8+(*<}Z2k
zQxu1dp(CgCX1%Y`4gta5Qj`0SV<0W7<VP{Bv-@UI<@l?v<yCx%PS<{puv>BvD2t<@
zp!WO7S|Sy9kV#R2$V9IgfHs;`UoD%%jB@eMAU?e6{%e8gkKHH>H?@BORS%V1tAz>?
zlm9r5Ru919?p#MMVTYd{hb;bf79#$Ngg98#AJVD^eCh@aA8Ph1&qh#NlBTp~$-N||
z#N_m=x=lo-Wkizkb(FubSP3qV0;l<)%+=yK4JKI1F~!Wv@+4yC!7<SVAYOKS^?mMd
z7Re)yh32EdIMMbt1)gCSiAi>0x>P257a0b$`xTGI@Ro+mZg>=`Z>l(WIRGT9*B^S3
z5TZ><|FWDwqZHf*x3CCxPcy$m#pdjq0_~}WwbVaEPqbQAEsi*(ErKl4R$obXoEXp3
zP2;9bkva?YO*9wXNK2CK>iGB<9$o6V1N-3sd!zM~M4OT7L4j7UZ;ovm#92GsnbzOx
zp|-4nQs;IdQCM`j<m?^}@Zc`_r)AO2s5~Au7S!Vl^O1LH<N6S&TeaoMvFI$$icRAs
z5oX~^tGnWMh5`0QC*%Fqlmlk~-UB1Ri5>hsF8}jiA5Y?sggDfyzaeT_HMd?)y=n_X
zE&EV#MM_4YVTVc16=I-F92%53>t%xkG;?zpf!e5c7g^H%3D}7DvH+Z_zB$H*ja5j{
z3}18Yk{Yhur=nu{83?Jr3@b=559*3OLEXK9H(e?~zwarQ%*fj8as(Pw^W$i!=(%#Z
z1;4*#LF+GbrrHoMU-CGe;<<KZ((2cFVr*7`?A3~u62+>6sCXDS`t$j|HmGL&65Z&k
zQ<STwLE20vnaqVW%jAgpSdRY8TE@Dh;(xEUpZDOH$$MNw^Mj?|ae%4h*ycjv3ZT~Y
z%<qeAR5JUnE_Bfp#sk_hJ~QK0?j<9y#BNl@fUdkPN#APrS{Y}Dd{*IbdXG=2JT~JP
z&vOUEf^Z8n$fANRFOTkAH97Ae-vaXm#MX+Kzy|T2fEZF^ajES@@q5TEsP9POLv#bw
z`8~3_FL>($3#y!A8dtNjXtoDy;4W^}z=Nba&m<cjC^l0r9O;F-L<}zqS;I4f6C>xM
z^F$WPwFs2;-i>jgV}qvt)h>8Vc9tY*QQU{&E)Zgh$>kZ_*Wk@AX!Os?42$=5iiMLx
zjo9#n(kgj>7Qr}*5d?Q@DOH0ueCEsJDs41Z9ddNihVgh~KS|b75gsc3^jx@T{GD_V
zC?*$IbmVi??hl=u{I1V1$jSQAn}*#(oPe@qtM@-+){9D394I1c&ANvmUR)HA^0Yy}
zC5%KzxyFu@s~fZV2ZxMi*BAYUr*LI5LixC@aYZ|*ls(jHl)&P;GX<IMP351)R?zmO
zry68Z`4m*C2w5CTj%r~lQJA0D{Lnk$+=%i4`r=4n9u~%@Jj{PdruA#@nrWh9KPv_F
zAY7cG+|-@bxIDT>GN7=FjOQg^h-%oh0?V&0gRUqH!}{tu#LxnRT3Dn1s~WZ|z~B~C
zpga!m!8J9khKi@`5i*Yl>oZ?dOH7G+xD@&Mxq<?MWx^tI7ub)4HiMcA=zjz#=PK6n
zs_W6Z0ry($)F02NTrLA4Vp*JTspo5^lb_#A%v?l=**R<biyBB(=IG*)nF0W!B+<rN
z@m~=6lTAhWjy+1IY}}3HLnKJi!HhP)OqC$=&o{VJ!2#eu>^=|YjhlJJtwKIh+OIU%
zH<YGV@*VULX6K3mC<1*~1f74%s40<uDGskczmkAZ7N0bu7j|-sI*faNs2}O(dSm3(
zevSzes_0|!40K_5$X)%YjnyFjR@nxj7`@^3s1Q^8X^0OA%B{lNIx#Z}AH;aU)IJjD
zG|!VF|K}Nj)rIStwvIyhXKTIR{HH~pUWbnh^oNn{f~cG~o;h1yN+js2*_<SF)eJN*
zz|cR0%HTwM0$O6es1*(=vr4Od3(DUVi5Ho47+0-A^5A%y6}_L@RNjh{pdhg$R~h(d
z8hNX00=5gV5qO4_LPLluqPDOq>(fY4>?#p}v$Oaq!N)^aX}uWg!{N#V*T~#t!kLzu
z+jHxiK{pCfogr2(m5ol=vf&D&#Faa!=6aMWfeMmv7P{CTn6>=Ml(@cZm$^v07pCIA
zzr%Im_I551iD;m`bN_=j1oaTsW|eR2Lb5jo71T?ED)Otv#n+syow9Nm)@W?74KHk3
zNXzNm@;;#JKWH1Efuw({S+_aWP#vTA)QO3&;W!g=dj{;T3#=2!bQ5D1JntE5qhSxg
zoe>&!JWK21+-bhJo6pWZzpYro{yi*vF4ER^{q!U0yHvUtLJVP9{GaZI{Y^lql;iCT
zn^LcABda<*n4wXq+0D$ddxg?w$xn>a^ER!DQp%yrdD%%^Sym}u>$1wWa;GCYQzmy>
zwYuJd{^d_CEyo)tNRcT8t`W5X7^I?HoD3T|wl7;aYF#IT>S}Ltn>J*9MFB5F%Y!|9
z`)YfY_HUZZLdgb{Al=x=@snx`76w*@F@%<k?0Go+bn*l8dzB7Aan@*V8#9?CqooPM
za1h(A<3ppW$!KZ(udSmI+(f8eM$(v9#!XwN;5~x#SWGGPFA*t3R!)eF;{Mcvs%uVj
zMsST;Rbeh#s2Jz({xpCeP$`gX1nZpYUCU;W+akO*_UZ=kl|6;x;*-NsICjw+nsQq0
z&pZecL&sxS0X;OHz9z7(OjM`5F!Q0SjZ?vXtt$v(fp%igGs&IP&b18a4LHiYX#q>3
z&G10Z)2@52GtEWK<<tS4S6TYV9!2RUm|L}L&P`>lcN%4Ro_EOtWB9|*Zj&e_EOVSm
zj%Tue9$X~WTU^89?ih#G=@YR`8~=M$<`xA+NR9+92xqyen{R)sO?*zYn-k2UVa07M
zJOA;>LcrY>yoaV4HkV9J_%Ia1#DuB%`8a>G&(s%|k`2D1{k8PU!hvPg%G^To3356c
zzQG-QYaYfza2<i7bOi!x)Z4ry?k3y{$mo8_G+GLVm8d|NP6Q<de{s~W$*>8v<)0ou
z77VA{X`}GH9nO~R<}YuRaJ<wjWt(OywzOYODTz_|tgH$f5dRaufEM;W4wySnc7zwf
za53NO9OC4X4*=o&MVqlL5(c7KubQ|I-E8DddFyW3C&0>0cG@=fj~>`VmOqD=ZH6!m
zWMN0%LN@}uIiBT=i^2PbtpczzPV7S&m$=i|Qo=`dR(bZe4<AWv3xOJ~#6CMe=MN)Z
zbce*+%SdEwJndWhlpBa|*H)4-U?4-$`Q`Wzsh#&?U<Q%4XE+c0bZa2WfP2ZTFZXKY
zc!N;Kc)X{B1}`P&gZN8lssHW^xtg=UiqH0w>AqmiVj6oT9UAr!9Bbu<FXZwAMgOnU
zZERE=k82KolFpCg9*W{aK~AWu6yDs%4qSBF3q#B5`tR%XeRO~mY9w~m78Ad|Z8C1d
zxZL!pG#x)0m|Hh)21-b?#E~fPez*ALTlc;7!XjF<H$dtexGRLs*fy$yR2wR>kX-yQ
zu=>@Zb6fb@n50oGE2c9`7<S`m;ZY;l{lX&BaQIhzk8YoxxU~9XpZN>46mJp<%=}AZ
zosTza`)sM{*SH|5tr8n&J)KoHedgoYQZ87hz*Db~vBqc8(Vk#d8$MiKx;CmPA13_Q
z;&c!(<hL0n{bMe~?_6PX(Qd5$nECh@lP8=|Q3DEsoIWdib4RNLO=)bU=~7%Eae8`R
zipTJoQypI0we*RMIepN{>?smuL=tx;aMcNDZRQ=)r47vLja<Ig)jb57zB~G4f&CSf
z%jWo~KY+pxGH!Ebe@D7bFh=zH$_Y2$@o`bURszNsBnZ@xZa((~^rET8uW%G5XZ0MF
za}%;|2*=M)#PCvUObOZB$;8UGa1ZZ$Q*kLV9F!-P)6ORpmvK&0Hc|wOcd136MBA~J
z2^%PrAo0yq>S;fk6)iu1+va#PH4|Bd{x^KRcFf$cm8wX;%z3V#gG$Ci2qrPWA4zmz
zR52ND;H$B5*3TMn0qtQg>x$pzjpsl#@D|C?Wl|E|;2S8fH+I%|j4F@45W`x0>7ROk
z1RptkbjkRc$qt<f04$o=KeKYl&k1lkU7|Rcc4KU(OfR$#lik|p<w)q{qCE6i^I4XF
zN)`h2;o!Jn<6-s-+xMelF##)OpKwt4XwE(85UxjSL=4HuGi!?TsuwD<r@l=k`;s7D
zzyI8%0&Ion8;f+#>BBMi=+pLl#EtQ=7_M`Bf+MCT4;~8qjj=b-&P8-+U*hW_mg>;{
z{A11wwKWMO;`06gY=$=P?fJ<qYL*umCCE?6OyL^!dRHofDxf4*1*bY5<W^R1FrW9$
zy+a!QF)_brv)-w>?_2d3y@j6@yxuY-E#72xW9?{5Ljmp>1BiN~y+w@1Da5<mrfhKy
zA`mYFM}CwM)VUb|_ZKB*oWV8%VTa*my)F8B7*t^w;&A<DBEdwu-OyHrh!(74&y^<+
zpH{j<)Op{F({&r$kvk0(SC~{46c~_MOx@EMw{LLPO<;fQtCbMWW`(EJw}s1|95ElF
z1O?E36kD;e`^}OcR=|F)eam!$_t76U1>5RKAyYnDE_@pg{T}gjfHe-2h7Z&t%^fY?
zoV_C|1LbPX7bRU2eoxX?&%>lrf}(-)C*dum>NhPq)I7ShQR*BhEH|v`k44G>t-&+|
zi_4Y|BCS&f{nh?XVT{DI4rpztPta}&oB}F~xHoCTS8fIZbG6jSJeE-O4fVgs$atiY
z_4gSG+TP4fie>3YgJuSI1hBAYC(9<i$a$1H%O$1oQB7&}bM2_Gd031XWMh^Yu<#;f
z?LX=R<PInieK;g;QQjXf?Lst8JUv`BW8E*bBC|okaZneDF}0|%veYZLC)dv{^vZU&
z>#-GKtqJzuG_`g?w07EOk_1|o+XPd{b;@1f1Hqp14tL~tymdXH6?sn7iFNnHW;77O
z@u?Z-_lP#c`13=8=-&d6Tkd6D5dy?UGILn+XJA{>n+6csc*Jp1x)=YJr$2!HV4s6)
zI#`S{`{VUV`!zbJ@ftT>-K5WJh%o6^uW&9da`aOv?qjQOX>$4{MShxZm3;Q$Z4~Bj
z3Zkb56!bl7wVKbTn6VNr><8T&+(^5|4#JQ1+^SoqMa?=zm+-%wHklE|anMU`IKf)o
zYpf0LxCI1v$noP7{fj)dew9HAX+C&_w5!LvC&NCSt`$?EduRu>T6J;Zu!H@<pvZyu
zBXs$?sT3D!eO~x>VM#<me#oJ3i`K_#pg(-tvnGFtQTDB9MMZ89yy~AC-;cOYaXH-&
zt-Zc~A%DmiVZJ+aRj>u^77-{bl-+*5=e;26wXac2Vap9IyB>OgbwPeBp?kcK0&2t{
z^rYb}B}fyy(^}dbw*L@l&(4oEIe5muG*fe=mo^&P-?jR1holPc`Wv-bAr(K$nCHaA
zYmx1`!x1Ucseg6!qvi0TV+#SmU4G4WPx{WC2zi8Lxwk>49YUiE8HMbqg~7R!o%Oa6
zp#!`3>{N!Yk0*Tk{&<9)cM@XoD?y|DEZXe6c5lZqYI@NO7hmcmlh*ylr94>nJ()S4
zU5QuHkxjit#3)V`tV*L2Ik@p*?5&CDV}mf~uazqB-J6yY?yNgKDX=_%*@m)XUY+al
zVlPo7E^U04oIaiDJiN@^K`TFm;2_!G3z%UWq1$74DgCkC!UPu!K~d&jOgilRNemLR
zs(n=Ds)6dtmdO<yW7`7Q0M60kBfB`#e)ty;6{vp0odvGe$}+0un>6Osoq(Rkq8m4b
zl&5(gEDiuq1*}AI7tHXvYfNQiy$E`sk0yeO*i5HJOj1*=phq8u!<}jS{9DJLo;Si#
zXZM7x`%9vaXUqm?&><%HcD;EUjnmMkMAtkdTmy_`%s+5(PGE}YkgsfvwME!b!>#TK
zBEf}JTqi^f0+Z)-DC`gp_(Hff7>IzZ!t3u!%i8a&IKBKtC^k_tOIa#Od&Y@?wACAk
z{sW8hdx1Cm=K0=bCwADDt=^mX59ffbu&L+fz?P5n5FPeHR)sC07_U$9k({TH^Y^78
zUsY37Wo&+9k*~{_F#im)nN4IIX8aKUK+?T|!jtuDi1`wUV0HtSV%}_Le_plphxvyG
zf$5CSHQ=j(!|xOWwH@l^@E&p^gt9S|VDpF<2F?N7lE;jt(BFY;XA?~OJKzpy5BP_+
z*Y~KDDC*(1o$8bQTAZHv@t~JX*|x(cY>28&wulqk$hpfN{DBw21&{41Q^pB7MlpYA
zU$#_`nb2~RO?Bs@pB^co?S1(5&xC?e9nlm3PRskMlxgSQcptnACvA$2AqidUE{9IM
z*{gC26pPWoFd~r?<*{uCXlfok8g;R``IS|60~bxwYsq!>g7*-&=u;rdyLBK55_(>Z
zxb@eXH97~AH@}kj5ddJwE+bYZh>h*Smp=wNueQEPDa?alq(2A<@#s!JZ!CE`(J(1&
zoz}PEJ<U|?$NjdPE&Lej8RA$b;l`+)X{&1xN8<Hj7iqN9lB#%eX%du7XuWuC8mg~4
zh<*otHGA5Xs*r^Ouytskvzfb4WJwLDaSP6@zL|e&w8w8I6ma5ertlF&ufGTc<ZQ^+
zdC_AJL-jh`-8X6d-ANgD@tP=9w8Y;gAY9}ZyCStVx+$`4Dlh7Lk_VJ(HzngyZ6s2^
zP}s2L375SL?_xNWyMx9W4kQ3MS3i6)0R2@D6PUC{nDmL0n@x1Uo#L%+vA>zq+WfR*
zwc?_IIH4iPGg9Nv+<^J&DJG^hEE7@ghO8t<U?bE@WsbwEira5xfwp4{b*`OB4TV->
z_J_T!iq1co1+TZ;Qk~gkDXOZL5SoZ8yHK)MqK)%IrH4HtP4a({9X|WKCXJxEaFZpp
z?A@9$Pxw}-d%`Xiw`;RNn(4l{jGPxXzW5FbxnBdt^_Vp%Zuw!gy?Suw)ku&cVJW-9
zQ^T@fZmf@+F*y-14KlsZPw+nfW**U1AK(7yY|J<C^;RfmpZ|40R(CO?*rZKLXl3|V
zs&bzC$|M&`v~m6=9X>bWnaR`>?npgbRP8;W`a9B-8!Lsv)*|O}APKH8uH1#3R`NcB
zOF(e1l1XF#BSu}TY@i-9=tI%6r(zf{Nyl@DtzrnEiUe}5tJ`4q@(encwBwA#7$S%d
z0oNXg3%uamD%-}i=LY9z9h)oVO`(@RNo*ra{34mX(3Pd<54>RG^aLH*?0E@>pmKR<
zmioP(?Vt5p8w-kO2+(V&-u?kU+GV)4jMxM;Eqfee-(hZ|=sTp9e^1UuB<7JHZ_&5Y
zwjcENK<CRH7vfkqLJ_QZvKILT*VVPDEXukqX1XKkR#?~c3(Kh+=OGK<ipGvPUlB;i
z^Z1j5t|bB{gcow@N|V|jI9}%^fRu+%ICaA+`m+9I`Uj=Qj_R6?lr^A1NcKz1Pft0K
z<+3arIeLDE^VA=r^OnwO{KBo=8Plc<bZ;^}tGisX(-Jw_Neqst0&XCgx?Y05JZG;M
z)X08*4lmuZZqiNytG4x+^hTH8>RfH2apv$J)9ybUaa8_(LaNxCmRUUBx##R%c-QMh
z&0kf!aY1_Yh&hsMQ@vrmN$bAD7gVwGeni!`YG6*D8@V+!?f5OFB#h_BAPAbO>s4G-
zWu_<!k5rb?P!LX0v-m=&dXxqJjQe2?w@ZLR7z?twXL{*lmkN(%K3_-qcbvm#7S}l`
zl3nLGW%#f>jAXPgGOY>Q1$}^2@pZ;)l-IwtjsJ}bV*h|ikWC8c&%prm$>Gxpy=>UJ
z>m5^&r_gt1NAF;*sRbiId}l6{M054x2ob+wSKZ#N*8^4)X2+3R-|>b$lH)66th8`8
zk#6x-5OR;_X?|VJ(caQVT35p-YWQ}r{kAbqaI{WQ)hokO;OZqj*%|&qCD|n+&G*o&
zV?1=TiPEi;mSuFz9`?%AK^lpifQezYrW^O7Iy^y{g?w*_dW7X_)5)LtHI7=d&<z)&
z%{q<hv`<;(0b_{wiJ3E^yymr|PLXz_*N87o6F!SL>Xwt9<K1OlhyG4Rs@C>g_>NX8
zi8QHuEMUIYDl;M11^XB>)%DsbY<u=Rp@tuB{ZX$;-0%t6(o!@TzLCnt&>fZ;H_|>&
z9a$UKgXN<sRj*-{Ti4o>H~&F)F%LsnD5Y(D-Aa}f!b-~g46oLs#;DwIddqcOR#^a?
zKS}SA!d?3VM>G7HZew;h)4nGG|K!bj^T$*FK@-ZAK-~?TaWYh_br*iqndXUem%Bsg
z0YyUhc$BvZze~bVG6K_iU<PWQ#H&!Ny$smEZwAkC-t11m_PwO4=Rz?`&B28Eq~n|>
zlG~x%!|$*2uRH?ssmm7LP^9&$E&9mmcJDuLTe(SnIYs|{Uhjzpc3S(5x1{^4%kcvE
z+k!(-=x@o%=ZoB{7eb?dj{-QjI0HK>m}uo>-Yc;b7H+22VT}N<A%@--Ze7C8?&J`F
z`1V<m9k6fj0+2yLC*#(UeEDWeL9nGIq5TE9l#hHW`R7mOKAcF}+>a+%>-nAQ6GT-@
zlQ@mfAMSAy3}4KxoFKAw)DbWA%xo8%pZWk+C2qo=basd~q<RyU;O{WkSR1Wq*KER>
zn!?Y9hM7s#>jHr#Fs*jq3C7qEE%St5=!@hpagBiWRuXtlU0hLyJIN-TGKE9tx+4kT
z)_0am2)m<oB_N!lQmr%57yfR;n=I%uVH?QXoG%xrRu6eRb|rXt?PewDmFj5i@FuRi
z>ykA`ZA+!<=nW(xSEN;S7wOKtfg3e+%bc(3Sx>-ia9q{xXFuxbF*^zfC#%SYbuTKg
zkzw}JOzS2=a!AE9+CXdg14W6^4%<a|rhA=>HQ%GAS4ki5UMnJZ+fPR@2&XD+`i{1p
zrC5e<<q>4Wfc*z$KH4EqnFKLd`bN$IEKE(VFHWF0zq1krZ?K%_tu<o+GC^*d{j=v)
zIbh*`DD$si?wsELKP&>hU0`T83*}Qn-=gX9perOwhpVs5v_jN5;XZ<{CwsDKhI@yN
zJag(bYG5P8!A|;ewOfaxz70Me$SL3~hw^AA*d}k2CoF$>+@nt_+j`)NkYzhnt3mu7
zKPUAiL<&+tnHcztA+%e@D+Am%u=`3e9KTN4c|KT*7Or}5^p!%UTK!imBr7GdUKQ^)
z@qUKIS)r?i{*x2yY$Yq+K{XG$?SJhBaDZmUMS1zFr~nTbzmA(b@RnuNRs7y-ogd>^
zp<7{}hRL*bLgVC=P0$^en_YzJ+bwu3JS`)-h+Ewa%^iPKye9AG^AK#_i?~}^7h(A5
z-Fs2kWn=Z34<HE-V-fviHwS;2I{mh|yyV2z8J0too2oi|j~bKDTn0no0Ilm5C-P=M
zn#78a7Gxr9#JvgYc?SNLXT0pgkJWpegB5mO_OsWLtcH+8qvu-lgY#|t_R){JNvAcS
zJf_-j?P=Mm==jc!?#oPbf#K+Ge<mg`Md=?Cvo^ODhW{^ef;ZgTh*$XJ2c+1b;Jml8
zZ;$I7-&DydGbv*Iy4XQssxXXGzer{rJR0H{ybkSq7e*KyxDr0>?yO?G!H;_TJtyIc
z{c?HKGNOxA1><6Lhx?|ANpxb2Xi9=3r+<Oy_F?8xQo;|VaDaa3S4w4^OJl&rn?49{
z8CY-;x(BJ+#W!noPmdFDsKjbs=M5uc(P0i>nD{RVzJ&2u_iZz?SII8*^b$n8;v?;;
zO3re|?Y+C7IdFD&FBwgP9ADl<2L{UVd4lsgb1gllC0=UxKO2PRZDh+s`vQ2tyxQ*{
z&dE_wb+Eg{ekeWupjp2!RTP>EIn4UWuUqqX7Up{o-i7@`!xTVwXVN|rj&E0h+R`?z
zyHyvhHK%>b6i3GR^;i7&nnZMFZR}Sw%;0pn!*k?L(0RmSR^{s-b#>&A)QEj1lDc7Y
zRCO%9j#mlHcUsEuIv^M8&7;Y?`7s&Pe*i4TcWb8t|IeC0Yxm1N;qY~_D;VzSJ)CK6
zXUakkSIZaz0^C@LuGxw0UfCZLPAYAg83xPC<04$xXD|@#dZp`BC7ycrc0Xxt<>+O;
zu$kjr0F{q1rbaa_Ek+IKuH8SVr+oHLECqcWoe(dTS1KGHy(K2G=7~d5-jyYI1!gjO
zZO2_&(>iHYHDsr<E6es!I~|pWCxYD^dHP9px&pIpL4q&rn^{NM`mM&bMw#m8B$Jjt
zqrN7hi-*By^RZnqn+2`lN^KQ&>#__5^_P+|*Z3~3_`nv5vD$%6I~%g{Z+Sz2^4-Hh
zH#N|Ja=3kk!?#p`77K5z=Ea1qw&4o22o;}t>X1MWTd;VU;L|<r_>E>cs;r)V>At;Z
zE&ww;-a_PTohc$}CEnV~5n|hdH|u7z67ooOa?v+C3KOoVdQzEs0{c~Wwd{!Zh;CD=
zjf?>`tp@kni1bg&y0a5sgnhu-IL3AZkYHnNSkL`v0&=T6^LaX6D}(;wlI?-lS^l|K
zLJ$U4_Eogl!=YA6f_?DD-cj6%Se8trrQKm}WH|4wyU5X-z$z1Jt3swPHLFTaa993c
z7*?ZZkI|UpzbM~UxvZ9qS<EWyE{mqr^(u)#`As^f46j!DvBhw}Z*`QOkz&asqF3r+
zFx(#g=6=IUQ`4v1c;L>skZWz3`039EJp<x7vX9N%g+L%NC#DCt2SHr^WN4~VDG9DA
z8~Nx99ArrRiUV=9V+2TS|J!y<5aqWFOx1&Ygr$(~ch9dmw~Wtv=S~jLd_b&lKh{7z
zHyv+J4tv9s!jfVMH3&4Zm~!FMUox!T^il3>8H_2bf4+oiAD=ye3(cy5JrwKJ(^`F_
zx(t74>zrcUN{F|Z%uG4USASM97W)~?w<(|$|Fjh5tL|f#8%YCa1K%}_B>~IjljZEx
z<{*wG%A84`D^c$p9^U!yz10xuCSZGK-g#9~n%zJ0PP(qzzqgS?(+;`!RHFC&k`?IV
z@iYv6W5pAND?mz|pZuv-^8y@yb`=T<ldr7N7e0tk_ddkfn~+mvteXeC7rGCNpz-VG
z=P1xw1V*!2hf%)XlVxOi;7nG0)6#EF<n@YAQSa=<Mx`p$@ArLuzF6<d2<`18eqVmq
z-d127!nk~f&hcSz(M-=Pvt>hkhW(b5>WOu4+%yU$?`PyLTc(41d_LHwsd?6lvRdG!
zt<hQs*R<+%dR&<kZC4)pJkNz_>n;HBumsxAvO*sCK9b~KVlu6-waX1ot4;H@djEUq
z-A4&sH&jNDG5p8in1J1Dk!B+6&IaJEC7%Rh()Uo718_T(Aj;%oalFoNBl19oxII^3
z4Ik`Q=|0`$(mAPD^L<YH(h90@dwHy%Z3`*8`m#x@f(eD%%C8yzk-pPWVD5y_{ym|*
zLQe>kN`Eg)d{TP@+vcv&**#{nH4T(c!0F-J);tu}$+B&<bPjKZ>03ACv@ZS*o;?(M
zM(1GTTsZR=qBsck`}!6bXSk7XE@bFUQg`~2S-}@EL5crl)C8Hr*$2cGoETS^j39?o
zp~#4L^d^qE;Ci?$Q2RBb)|Wx<!ECx^t8nO3ZW!^ylmhh&m8{7ETHiUY=(@*!VEKt=
zs|nGo6Wq1-k`{r1+}7#3`Zi~xPTQ_tZ}@O4n3~Fx?qXBF__C*LE~=*J#rsAsl*&}r
zOouF0EHJaLRDXB`;uIaa6;95NP|uxIF2$BzyPBD#U%>W^&pyNZlB|RbW^k;R_sv>W
z?I(;sXN}l~UXlt}oY%Fs>e{zhuC98zEv9}av_$DM2`U3v9J^W1KHP&Y5IB)<$W7>A
z5z~4{3w>w*5wBx-(3T(9tS;t%VaM;7eP#(FX-yO0%qqEstJP(x4;Ef<k5^*M=bgHu
zRxO5Sxs89)FNX`IP|<^avP)#keHw^sKDIGP_-@FTiD|<dR(YB!5Xp(PrNJ&2$ZT&7
zj-xprv@+>wWw=OJAOT_uzQ~<ys4bX}Ge*L%rw30la~80#PX%$a%mnMv&#-+t720w~
zhaZZx&SL0MLAcT+v3B6f!Z+t%NW?mv1Bj=P;(pIQ=0rb8Y;M_EC=hiiLYF4cq(^96
z?H1hWc(xkH*i%IwXbXLhdkk3I-<}ZM{V>-8qfIj|9}c4X)FZkF%jwZv8J@GrJ^l9S
z$`8P+BfMIbELz(5cJTaufs*i-iJ}uR_Rv{2Y$&~i=exI3@$&ZMx`$k!kbZ^;+Feb9
z^=!J`D&CFurZ%<cqFM1BX!|-@WQX6}eJ2`S2>J2yt93;J#pAL_8|4A4t~c+E2mIzN
z+wp+3U!*P2JC{Xpl6W`th<S0vazl3uP47?GZK>aw)BcgJwf@*rKYcm=F++dP++%HU
zYqVbjCE!3wT6IQojESs>_i9puKvO1}=ap2~5Z6Xt6)2>v)i45a$H`CT)QJ<zX6U8#
zd2p8Yqu-+*96@Yh2lzX1UYtrokW$VRAWXO*g5&s-n8UlA3*4#E_H(n;K82YOtw2ZX
zu50D;T{g-_qF>hE{b|JH*RS4!9NhKqX$*<lZKvNxHeIY)2(PqI7Ik&c$J>kPKx*73
zf}cqHT70DD%N%Mi@fYsj-E(oWZ{#1ePpEV9L<?x~#D42)m8^bbUT8*rdLn*!nm|1B
z*Xw*xh#2S7vFi8z(ANg*q3LsIovE!=s30v}VTkUzcI*bjvm7mwrQrF997lhdjbK^i
zd!*4v1$nJ7U=r15zt?AB)3~%eKcfk+KZt}+KQ3PMk<N#kXv6tO&AWgsnv4=`!{6AO
zXBXFTPtxByXI-@y;BTnMpa_&48=uLTVl3_4EGy4*9mjjM_xxU;E)Z*DlweNtU99IX
zcO<u4zsB3lwcZo50IaUE2WO&l#m@{IcTdTnCwX4BNxIQhPbcLc{jUb?>r>`%qztHR
zwK7MA?*B}&E@Y1N)uy|xLYhlCkh=G7G2-5~6bst)$n9yI=KK%0(WmvD-z{0Y-M_56
z&pvCY?S9|b$-_37Si&24X8nS*jVLGIp+Gx-a#JtlUd!r<Z)jwRXlO3o6m}pP54r|M
zi&A`mh`d<aREy-DtSk9!V$~CZDqZ%Xxh0FGvJ}UR5PB@?#R;^`=#GwY<pUQ{2`(x6
z_DJ`<)_(X94<+J`GxTu282XcJypDgzgnEBucLlW^Fzedd$A5ZKA!?T%VS~H^o84Z%
zhDsARe04{PALkSRRnfEMx}6?%ZoTmJxgoop*E`Ka;F$?sL}nygY`?#j&0isae{3%y
zsBH<dr(D>R?5LEcv2A-h!r}h<(K<+8{Fx;}$SJgs5{S{_f0%4#zqLMb3-{RO@3<27
zlf{e=UldPo*X#=6IIrWzE!~Cf;YQ?aamS5j**r6BEMzG&R!^1ILasY|U#;pbp*&DD
z?x1C+|KmNPp-Q@fDy$^p3~q9ekGN`6-Us49Mfl9jNS(T@mCGL@1P*644rGjX_!#cx
zT3c<OO=SBvzzFveUx>oS3ay=_oV4#X%4B?BREyvdBy=D2HHe`5RAU2pN82Q^?g>(J
z0!uNte9w^}zKkfR<0#`+%*c14+LLTS+**ORuQ#8y2zv<tFJ%p&CvV{yg9fIlCuB2O
zLH;R6h36U7uwmz!v!S+6A-5k+*W%B#JpK4zMXtvZ1h+oXAcui|r|y`(7i~h8TYf>*
zToraJ*@MJ?eZY%t=ZOdU1NvpMV7Rf)Mk87mu7ROp$eRM?`v$xuk4!78z1PWW^YtrC
zJF{!gUN*&Ds~HrsE{8ZCJb`u|0gYt3y=BSJ{#<W47-wXO`j8WvhmS7_?RbL@LLVuM
z1We`>wV+Ln7*@DnlMC?%!)`8%K$7-&&qiAk)FnKNEslnw?zgf#_2}(xqyN^`&iEBh
zr=8RR7Yvg?`kQgK0zbZrCP7WQcWbja79<(Yf0+VPKZal>I~HgF^Blpp+8l7=;Eq#|
z`>?yA71&<@d#7<jNd87<EDu{YVN~3UJ^5mX3nh}hJ~g<%30@^QCh^{JG=VcYyw*<N
z8hL?E;<3qg#k)4N-?b=pdhbPvcJwXJS6Y%UZtsD0E(S&=d9w*Q1h3&I<nmeGT-5BH
z;>ub!qllGoqas9d%8C<K%)p|DH<vJb*m}Urzmxsw`5tIzgVDitv#}kQGw_iZBgDE8
z8?ymUg7=8a2Y@bk;s?mZaq-CI<z)Gc(W<q@!iV5gn}uGMhSJf^Fe1iDzQ*|;(+O~-
zSGpVX|1tKKVR3BD+HfEQ4GzKGLvVKp?g4_k>!3jf1`F;Xc+jB1eQ*!%Zh^rexXWPg
z>}Q|nIeYJO-tYactABL&^aZPH)vCIy`mS2L;WN++SAJe$MX#}tZi_n2^`<&zf@{$j
zm)p-fWkDC{o109~;?mFCCFIy@UNStK3h0e!M<VFH_BoiThSS8YLS7zO62~7&Whwbk
zc1H;lmdkM`E9!Gb`n`|6L2Y4%L7(&39;=o}ZDO4Z4PrcRQR+B|`=7?#L|4xsn<3t$
z<6i~&cGjTNuteXbCDp!Y(5@vX>e!m@2U$1f*bdTAH*4yQ9>(ELw@dt#$V|ScxqfRq
zT2T#uIQN?zvQND_?XVtiOnbw_djqOxFDUjL#(?JyhDFp3QNm%Faa1jMM|^pzy0>2l
z-3u%u<orb?e|>XXy`HV8G=JT8Z_?kffgHrr(anJ|-!C|RyYOYUi8_v7Xu~5Ax?eU+
zK`)Fb;2Uf3wQ_SeH<*vsB&hR|%@~MdCcrDj^d3Ga3)c4NAX3)JxW#Y6^HYFOc!#Fa
z-~8UcydMEI+=zv2Ck+=a#urn7P9M4ok^7VZBNGDdIYCcFTk1l^{vhKBs)gB<JFk_E
zSo>J$#lXzwAGspnCvYz;R|G$JrX2r6j(_|T7@7bmolP?-@^|)lExRJ%Wu@Zd8hTj(
z`|w!0?i{M^!3kxDOW^2_qr^FMl!|)QVyFr=Rs2>H;Kq!%np^Iy@X`A#+9fOuK7tDM
z*}cqaQuEq%*PYm`I>4%NUiRs>9(1S*|7pE7mqn|6&+jMl+guc-5x4PqoB8x1<XzQ{
zDp@;WJeQY#8Y?pVZb_^nP%_@)XWUoQvpl4kLJfSR0&wwr8T#0itp!t9jYbtTKUGbl
zFgG@NPCD*aHMoELqlV*;?6Zc*e|+KlPZi3aCnh<_po4AZEVTG9LS;wK_Q%<D!mk}|
zVV&JNmyw>SmoMgr3N{^+hn-(rKo7<s1Cvvkt}AtH!-r+V5Bo#kdd3Hnr<>n9Y1`S`
zrk%D3&5<1pttT<Se`@p2?H2yK$ND>Y=}%%ZG7bz>env>P`~N}l$AvvAtY%977gpb2
z8Ge5`M~L{NN(~;?5A^TB?EiAP_Q#`HKiK2`%@h5HeExFz`H$ic%D@7>|MzR1!X713
z{g^8IH=^AC^6$mrC15STl)iMTv;4nb%ZnQ8m?;7Hzy6v3+zd()*SBx^xHUf<*y^UP
zze<||5z_3=>oyQH(4M4ife0|)&u1j`Exn0|jr&r}%xSsKVopoFq&Zh$wyW&!64ZLX
z2gf9+#j=k5os)-8gB7}a#_B#iK1al<R$ZYndUQlqvzP5^aas0JwWMw608{^{4%U>&
z`&D0%v%X@BjTl!Struf`il$ei0=ONdaDJmDCmSZ`t&%a)At2kwHdIv<pbkcEZ07)%
zeE7e$QTmT+a?dvmWp_LT*74TME88aI$3`Y0@*Cs48%sOqazBUm_~hSu#aQ%6eC1*r
zO?80WZea_w7VC)K?3tmUod8V>YNqSYyE_0W$$h+OAl<^m-T6V;@vJYn%R^gkaqp5R
z#+v(iQwDUEHj1L7lfG18F?`*D@)nmmc&y%g|JiEkp29)j*V3ZA;b>N)=B${;b^O_g
z9>sQYj#__ye?<Qtf`pFrt1-m3kE_P%NI;;vO{wDk>2+GWT|SEl-hMhvxqP{D|36wP
z!WzmE!xLY9y`Pm2Pqj_<wMWgzfR)(Tj{8^_?Ag>CxInnme6JYu8ef`XRX*5+kp@a`
z{{*Y2RavC^@Pv`(8m&GLa}>X|OVsz{l%+T1{59Zz^nE24_X>#*f3R+wCc3pb(kene
zGSMHpC@fD*zNwZimevK<ytgVsPeOxj6d&4ATwI~Kp^=xLhfc*Nn$HtQ_T_Ydz3IX;
ziq0j~TzD6fNW=Dcio1BarIzM==Ij#p_53@L794Cb-V**dzxFSy@4s$#0;MQvxups{
z1Z5+uw{*;v6&qIBntaQYhRLXi%v+KhP*G6g3$n8*t#7o47JZXM1g%RViMK9~UDIkn
zjj)a{=mt+Q#WI(p?Ve~EV`<(}{lT)K3i-=5Rx5&A<}$GMU5<;klH0IWPBJGM5BVeE
zA0`hjInGSPl%{Gn>uqU%bGj9IBrA3WzExvT+W9G!=6)q--Xw@W0VS3%z*cO5h%&o4
zPtl^sC0C<ON%ya+KsRs+6wPGnxHuIKY>er1+3x2v6YPXZ=B$TeRffD^C1cUntRr-P
zHDiGis+6i*=nv$_y$10`NWRvKU1(Y2<k|gbcF|!|UfpxDr@DGU<5SO#If(In2uUxp
z({nZo&R<}MM@Dt#d*A$q$Vsj2`LSC~wb?w~-YNH;TY5hFMQ~snt9@X*TBx+zKUdNi
z;g@BQlH19rWfE4m<PW}V|K3c5dq;(Xzq!yOljn9Q2yc*h@3I+JzRFGVPCtDu8hBnx
z5h-ZAp`J@i{q|!l=;OQPXFom1=WURDSkTTyA2o2-^FGEUQaL(N>yRZ+O+(*^pUT+J
zwV2zK)SamL!#kGwz_XPrt#(sAA!?iBr{Mw|qUlH7QwyK-a2?gf(3f&4lej*j<Z*(j
zzZE)H!yD(;$^LG=U_Ip65g{?^h_c!XXgFA+aRhUc>=u2v(+RqS%|mGRw>HXED5p&*
zyr%n@cyOvJU){n=IpX7fL=8sBaA}61=VA~WA|7nTa)!C>IKT>+f~h~}-g8<ucZ?Pr
zZp(ZM?Gc!!UdJ{)-yipo|2?S357$lW<54ow=Er^A9f6j}<hy@1J12^-k3d}V(y3HX
zc(rp?+}%0U(0d+1*V!!=J0#!dRogvs1_wGo#$x<TRe{c4q2}H5kd*vO?U-xbpf=N)
zKsWXiQZMV_`D|Dx2fjRPZcpLX&TyN^0FAy1^FFhZ;R3;5D&`~R33;?sEBC$*28m~V
z@-VB<fNJzVSH)|5c&Di7aGWOGYx4I$e?q)#ed^`*EUEd-hqqKE9jWOp51d!SF5H-<
z=B~9GDmk5?-<yrFP<oDEEX8Ts_`ey`cWzxtLH}a<?+<>qjy6f<Y?+B$@zq~Nm}e<D
zLGN=7&PI#XovRdfR=xnXcudl`V`L_`hkj{%1O>;>E=4n<<}cOI7r*}*wm+7U$jCz%
zQ*)H0A+yzI7{?M{H)%NRN`w#nE#?8ZDirFfx;hM05C9i^l<Cy<IJ52(YT<G)_}tTl
zvmGwiys=u0vkC;DvV?H$uJye)Kin~5!^wVQDK!>svhe9}M8sSPXgtmUPLY3VN7cPl
zHP2A|aaR&`)eK8lO!t1){*SNv^F%VE(>p#rV9ihI6C1?_BdgqncF$mq(pFeFe^8|~
zYcDn&Qa^?B@ua|oXM8Yi{4Nz&gW;C@z%$#9(As=F9e+oJ^$wFT8;p(Yp?4%HCYTh~
z$#Fqjk&;KDy8b{tmeQ3a_MO6Q0`muXD~c~E=gIts8uy$meok`6bB7xc!|7ygf5ELz
zjKS5>Z=9fl>JjeUrODi|H15bk^kJ*tIJ(C$0*lnrTI%mk3K`|+hio(~O*N`%ZA*de
zS%KQ$Y3ApBmDfYoel2p{-DldhT)%OQ>I-6a%3ZV=-XkRIqgmA0m+}F;?Mum47}jr*
zdup7ufAN*4H_j578`%fP$S`IK8CV{QOfSvE{z%_(bbpzo2J_knPtM3b^FIe#(`#N9
zBzud)G)olmAA+Nukfub@z{(<6I{vyo3slLGw4hO%*sN_!J7-O9>2?Z&1S-MpE9nJN
zAnE_fC(cU2bY2awv1YLZv%<P>eGDDk3;VE3{Gb9ndmF!*c~_bDm@*mHDjZ)sy#9k@
z8)+TL*k&v_I2Y(WGvJjoP`h%G+?jPVHZ+%_+7-e1zObBhvXm^odLgB-eq~|)r{M>_
zQcQHT-WqF+er0v5vXP&r%4m**@ww)+;mAz!C2r_n1)J@Gwc6SzUvl$!RAe!<M!r^G
zOm9K`rvoNBX`LLd3Z<O@^?5DLjcU0YJ=OC{DYYXSB8E?gqsAu*oBE1tr1mLZ@7M|_
zIf&`<d`1vxY%k+k%e4$=DEXfH-^t5uezgkRf^`$5v{fhghe-k|zc9AnB%OY}TTgTd
z!bH%Ds>|DBuyP^KAZ=5&%)nwwdw1S18D7z(pWj<%owfdD5rZslJq`E*2+rgxZ95DD
z-+b$_^<snG17V-hqYxe!YRIuA8Xw4GB@QwqAYmi-Jc$mo*3OhTM3@NfV4x^2|FuiR
z*5mK0=!^=VWF?OXi!+N9M^wnv0zJCs%F~0qr9=?+wpmQ;S-xb-w%`|S#u)c<)?VUz
zHw?uey3>1Ufy*+J`ugRMr&rAk`bT?c(lgOAZ|iS1Ol(W+;|csuBeqV8*{{#j+>RLK
zyV|+d7YO!>D}61Bh$<~VEjGKWl77_?8n)b?FB-V*4To5wGQxZ|d}PAV-#&K28rJL?
z;oa&9p`g8|oeMwgP(xtwu`M6)Qey9?kPuN#m=Lk;3U=micT~MB7|r^GC}^Bgpnydb
zrh6xM>84r{ejrGkjCj`5Q<3eicka`28DvOLPvGWF!W?b;S}Q0Uu1N+26xt|0Azlge
z+s%m!<FGZJ(>E@+Ty?IVo3@Yk$@cZE+#D6T6nl}*@O=z--|Mu{k<wBK5(2p@#C<e~
z^W06}bK~#pcfT%fh%`!G?`lOks^c@UttT&mF)1)<BCTBfIWKku_Q!8Zkz~%Z<anX+
zu>_VU0O}oLc5oj{D%d-(g<)BEd$VaoDt~s*HLT4G1XV%CCy9YLVuo!Qsb&lK+MgP}
zhtRb6Yd|l6=etd3nTYmZ#n+Gzy)Rz#Mu@#VD^_zoy0MaTP(Q&YUaAqarrzw{9yxX;
zg5l0ZqL@M^VLg@zqDh<-QrmaFZxE3Ji}JClh!?T3xAiKTuaITVVYLa56PBy3^|P_Z
z(bZ=)PNM?+ZTmjaN&+nuim6AHcrkL#4<s6ycJ-=9&W8ws#jEoatIV>ZjG<gkxg*W4
z7CGrTqFBzpT;aig4^$}E5YDBZkZ@H5UG_O()oi7VY+Wv;vTzo;hdcw1KT^)wEW<N?
z;wZJS2d0OKaF(d;3S^!h6vqm>QC?gK$D4sT79uu9MnYjZp~?Z(>yMas>2u)*6_$-p
zI4TUSw$p~zMp%nE{=?s&$*LG%Ca^d=cE(Zj4ESS*4Ug3|KC@WFxd(8>HZ{(lv14pG
ziHoFc!8SWKxI(xV=(=ONEPgQpQT;ikQ|P*t4w7_!e9>3;0;Ax{q?Lrz`dmuXa;Q5s
z{tJ+7n)&Vv5c4`I7sPVTsZ6A5ow&SXHYb~@%<p~F!gTOEdT`YgNPVBP>8HAV3`|ve
z3fl>sToM+Z{^7|+FUye_#HpF9jqUDuvSbH>$aJLiBz(mQHwx+n98o(N&`fQ~8?$Bw
zVZ_H#?Sf>42VcEa1q_o&ooG9GxVKl9PD0>Y6&WslyPW*5YS&mmS<3KS)AE~Y%IttE
znTGx`;DEuA*EM;-F=(MMo62duHeYZY+q8a068}*cv;T~XG~`xEAkh0{V?vOE+9|e3
z@LO1o4JlN&DW4#1L?w;sm~((n?84z$St2vX-^gC<(bw@8U|R&`HC|vscH-c-ehgR<
zHbQd2x0OxAGff@v{_UK)HJ^rTufqO-j=-&7{EW7TuhOw^rTCD_>|k&dXH|w)_+jv-
z^@Xu2|3MnTw)MWn&9XHe9a=0Q>0>;oe%2B4+43N@>33fiS9-P>6>+&4tnM|v2pyxk
z@7tTrvR~!J8xCzYA7gl=`-0`&+DggofyVPotFVy_Okze^E~YRHwI0;QR3Dd;54SM{
zoWTDx(b7d5fHI4j^DL-zX);#o(~YozKEU$R#qdVm{G6GbQ`^G?XSj_N7!M3*6@S`w
z_VpL>!#0BNf7u>eF16~a^dJ`N)^G`ydrQx7Mexj*vx@#Bp_LSO?{>djtXHwG`ej10
zp;*G<%!@lbrAx~$wM$tG;H*^<YKlhH!d4&(6w2wKAGW#ClgiH~1DRZ<8_169NquPe
zw9`;d%}4XByG5q%w-fkKsk8Ny!eb(vQ+`YBlIgYVwQDqm-;iHXvGH)1C++PMMFpFT
z|1*ktLU84;!NzC*FZWza6mYV=N}x3Ts8;m7-35b%unSOPT=!W?@%UrLt_XqTt0no#
zyoUCc2Fuk_uD1$XbiXLxi&1b~_z2O!+dQMjvuX8cIZq8Fjhm{snTTZQ*-NqRmmj}K
zWD!qiI6c?WSvCq44O;Y;YF(SX5tEqbC(XY6Zi_V3e?ztD>5_Fbt~LRw%!;16!JpQF
zR)EOHr+oRRk~agw?BVyz0yQpZwoDs^i&`m{_JbV9GwZfW+J?!pk061Dkm8W?rrVZ)
z@vkV2{%E=ObI;F*nMfIAnv2ib$|r3%h*Y##3OMixa&pLR;Xm*s;NY;~&NP2^<f<J;
zsvAigNk~ayBNS6%4LCA>5BVl8_uiYT?FRb%c)VIWvYOfC3ERBD^W^v`OEqg`<oB-6
z*vJYnOMr`M3`T=JlI8a|_EPS6t^Y(`Cz&OgGu;(iIok7{(_aFFWZ>@{2}P?gW{2n@
z{e<_ll<;EhV5%N30_`eq8|}*>Y*2N^b@`tjP6eNDo(i48>Ws$OR7u%D4InBOF;cPL
zm=xU{(*v=>Ee$N*{^e~KGaIM{wSmm0yyZ)dH@=TNn++znBDe}K%lLNQ#-Ou5L2j;W
z@gZ@*^@_N1>HEH0N96{oW_&l^MdllSGu3DB&`E*rlaUdidx6M9Py>LkJ)ZOYq5W3!
zh5yPHmCH?%GMa|0f$z~wM*$hgOdFJK9V6n^lMcRpbO)Oq)BBvH(Ou6o7cC};J$LGP
z0wT2sd8&3#QZ=!~j=F%?HwQ)yN9k{ctOYmRU0Xa4<;(hXBQ#Y=;~pQXROQy~(lCu5
zW}q>9CNb$eRW}SQ<*8g5Vo|GTZ6AazV%+(3lPLO{quz@Uz+q$k=Lqyd?!4v3_Er_*
zNBr>8!6183s<tm&2m`SSt*VWHV?k$sW!fhAs1tw{W1WN$;UC<}y%iV4LE;yTnx?Mh
z82-TUw$=7^R!&8DmrBApX4G^lFT9J8!LLDfPE~dxckcaez@;+sb48S!{){^V@j#Ne
z?(FY#p9V(eH@r3wV-wKh6O+|J>y_!VGU3g5XUs&{0tSWk^_$Hu6RHN2dQ`wt=qaYx
zA;VpjBoHEXho%Oht2mrK{nWg?`{vBk#O=hUHmXWm8>a^Q`5AM`^ED}19Hp{$^nD0p
z(MFQ7xoM+pc6(&|nf#4fxDQ=ggIQ2_&4zk>HHg7{S!0R;V}0VgTLI6$j5afLGo?;p
zVR5^1rv(Z7Mu~$Y`e-{fMK!rA^z->1<pUCQFo}7VDUbb4WfAX-qvcqa4X^I!HxZv?
zn6}^425fxTwc|euMU;|#6YX8|O_ne(q^y<u>7rDDf+Xz`)qDaCuv5=Ei9s(&#5`Hm
zgb(nBKO3i9)y5*%H?Cz_ZU|%wJY@UayaDyJsELcfYZQIgJ%+$<b}V}f@5H-xhfc^H
z;^xn<G8HKh3Q1g)^E70tND1$7t65ZJNE<!hi80Hz4C<9+WS(&vN`)W!L;dj#K87M&
zP>UB0C)W$%G812~ZJs_{4=k&`Q?b~*QLON_h*o|$8u_UZZMb~1RQjBnavQ27Sytj)
z19)UT5d9Iu(|0g@yME*CTyXrWreAvbOG5MV4v)~$sO+1imk<!m8I2JIL{{J=nWoHr
zw=m+<W~I=A-Uhz)iCFE7XjU^dwv4Pe4dE5r!;13(2krB#{E$8SRKbnIgFy(f1B)Pc
z^-Y$M*>k&Xb5dd=Z6El)3vif7g2C$t+1*3CUlNLA1N-~ll>hX)z_*vpm>o`D6XI4n
zu~>7-JR@#t;=QVL!`(Eh^oua`zTt?_RAN=0NG{FKdK!Ig|5>0KG}jm06J!2633_#~
zIEzVsf<EOJi|{sKv>MA2_dmM-uM=eY7m`zvoWj1*`lbs%VT<cqXE;XQOc{@t2v*jy
zBkIe`@9g1iCoJe^n=TI<1inUXL5&?WkF%?njubA>y?`ZA!dTlf!~Je!ZXtLV<E}II
zy7F$0%?$=h%%!thZ=ItTlB)>Z-lb8SZ-5jdU==676F&oo567RJL{=YNbduB}=NnCh
z2YA*q&iaa$;{sMpboR%Uw2?N*#qXVv<t{#;82t|7Ap6X5%g%!iy(Wf&k=G4I&lK!H
zq6X~8rb|Y}L!2YZTld1B`1kuHADR?vIv{<Vc<skW@0=T7Z$H)GG3S5SAOG;g(3Fy;
zuadzKG#OjVPOihRp*yh}hW#u$cJwOAyhiMOwypd@z>^l?*rP(RHn}Q;N*Vl|G|;cW
zgLo{sr)$gA`5tGfm7OQ6KZ7q-GH`665&Er6NIkYuG)C_F#=)Q=<@SnTJ!yxAv{^Q#
zQ@52M7Q52CFP-4^F=8GXaiUP{dUk{syJ-Alp_hL0!IfYna1#)AEltS^Pp~SOJ9W_4
zdNr~5USRV+jNT}0JLcke+U6wV9l?W@PB%7}jgqz#ju7;-pNzR!2Sv~2kq=M94uC{E
zf4wAiv%@<8z5i{xyW(ABxk%r<o%`NO_qV#&1|PNK#YR-9pu~l-{PzmreqYM~bdmZN
z!0jiW4Smtkvlz<r3djp>&w(FNNDC%qRf~1rvfV_}6|xsU-xg*rU(;qCyS^jg0hnm7
zdS>_v`j~HM+8j0WHOv_fW!l2@4NTNdb9vQ4k*#xumsn^kwMqQZ&k&VOOyfX3ZdUs*
zHWm*12O>PXs7R3Ac%UnfLP#g02kJ&v*rufukx^GjxJFJ=2A3QcxwpME{WwPVSs$xE
zQBV@M<#wgw!Lf4Sa*N^@r+pjH;UdZLN?89+ua%BTP~9VzXb(w>V&>{Y@x4p6e0jf5
zwCG5fL>mr)6KKM({qV!FA$x$99@D0Vc1{9ZS#r+N<&Rf|5|`>WnAxseh?Z+Og-7`A
zCY;#;L6GmGMVW*qgq3m`y(&}kciHM7C#0jcH@f~If<`^OWC`!=khAeF)>Y*mFLg3{
z{F)}5Duf60_15q;7!k&~v~^LmiXEJF!nn!azS8QE2nFIT<RXt%Dp>Z=-fFU^@f7|F
z)MjEW2{O1RE_`wr4n;blzPus3y!p+(&4qq$5I%a4+Vk*;f|uxn*0&o%Fd`VorB9T~
z9)^6$bpWYmpWPD7M&)qot~0ikqQr%3cZ~d|Vk3oVFLwfD^tXEmQF&n}xIPqnjAXu|
z`&vD`J+!#Ton@>Yyg$Qz+9F=o!9vRNvGR5_C{%f0zw&#R`Tl5j^Z5LnJgOw+BauNW
z?~gZspQ<YalLc?$&|HclVz5c#MuWw#p_D=XCwP;GCAco!);26&EZZ!1(?y4PCy}WO
z5>6wc=+olWfEizYERW>w7hxo!PvNCo8S={M-lQL_?P?;;^W!qn-z5R66>O2ATnJay
z&omPODa^#!ehh3Lux)qd%R(xNn0mIXi7(mQM|=+kLlsk-4e7kh+W2<vX`BOA{LDg|
zcFu!Wx<7+xhuUBNW(yd5Z_L)?`ZcU)C+@9b+G4>VkejiDGib<G@5!65La)>zruZA2
zU&ezLqb3Wdn;h>2qoge`)413&Xub>Chb(f%hPJz1%l1RNWo-w)Dx^%^85$tLuliE^
zw6RP_my2wdEK?xkgxS!G3cQyBInhZiar>s~GLp_7qe=WPX$3aRijSo=^X=?cBZmk4
z`w-{B3J8Q|a+a9|nnqOy-yx;Hc<JFu80g)ddW{(8`Axao&8{KoTRmX{chv@_N4vK@
zVr&YhGTvvXP1T`hsMD|oV<mpHN0W$a%6Ai|PX-R1)i5+JFw<MK8`u4Ww2&Qmq(n-w
z1&~~L+_wtzIFTjcMEUwk(e|}LYrTl&+-sR%{n%jgWwtJRS2V4bHGpFiTFl(3Ac%;p
zh4k?x<STjSFJTO~qM_0i*Gf8&N=D14<40`3UEdzE_N!O*Ca>QpCYko)@Ar_bOerA!
zeDl+pykGeJGj$v9_tvhp^_CJQmfpkxe&~Klc#Xq(4=_-Jy_GiY0;J4*yS}6o>V&vX
zx<zQ(a7#CThd0~G<oDk0`@bY3?vDG#tq>-$tEm5oDgb!~AO4oxZe!MpA+HBz_cbZc
zCie9PVXI#>7FP9((B@ajL8FUV7KG7KpLVwD>_YDz5w<TfaWTU<HmHt-J2<H^qB79=
zui;vDC!gvkTEN}%L6~h0Zbvc?Y767g6%xkMg@q>8kfmHmwX;i-xk^$UcME3!fmRyX
zG)n24HRW6E*p8Dai)xlMbwAb#stbXt?AW97g$>+(&5oo*=_VlEwjD2jW|laWXAJ9!
zz0tO1Jdh5WJt#1Z4Kni_OYGy0o4A%6XG@&N16{bj?wi$)^_j8E>xkiyUE9TE8whIf
zN_s}L915vxQ4^h>N0#t`sn9^mO=~fBz>M6VN@T}CxvNx<8AwbCPyL_cLm0AfQ2P@R
z>b=$mk5G|`5Xyd%Jp02`;KddK$Q*vzh21OrK>5pw=-X%Hz9K~PTK`AocFm$;3L=tL
z*zOomeso{hp3{bgSJI$y2E*RPpZH8I9}L42qU>MXf?nRYbUqO}Lt+IxV$;Vs`MIA^
z*v$9lH<LC<`WO!|5@!)DAIV;NZSjxjYpc2gCM(|95NOl4>RVl}z&Dt`+CND6KDHWe
z-Mm0M21EeI8>N3(g*OoM*+!)qYw!Db-M@DLw|JQ{(z6A`sGN~pI`)Adll-}E?1FM1
zdLTR3;XaTu-^#>_Y8UgTVsPNDjb=$9NQwXD$YDn?iL2$+L3iE@&fm05A#yNE3LYaC
z1UTTmo?74z_`at}E=$L*HA(2{^>JJJql$&Y?LPlyHC;QqFApg~ynxlhIJG5#9rDK3
z25~^s5^CI0wrHQj%bv9lK=hI0p1f<%O{~f91p?=f$V=#!zNK=cT*?#P?N7ERytNk$
zsa?m${BUUPS!igT4+VIYrII83;$!zbMo&D6VSNH|6a8|P%8X8;UZklNV!t;w4(*jJ
z{iC1Azg4IfyO{hu;YC+QR}~3O;E6}vc8h*%p>(G}_K>K15ZOt+IOs0AE#wrLe{UE1
z=li2m!W00Vkd{a7uLAJE3c$8U^HvWP>sA#Wb0ECdHi;O)aXP7*GJpG8a>c|@<V9bV
z%R+WE$fwozd?if!4x4bHFD1Ew3bH{xNI^%!P7?Su+K5`YHVAeZ7}lnIMNAYGZ@3`3
z(?jP#wss-Zo;9BCXMe{$3QaM^kKq$uf&J3){W;)RE~Vmw%jvtW4vXim*8Nwz(@aI{
z_iQ00%d;=pHpQy_#fulQ2zFLJKg#p7E*d-y@i=|VzrP!y*<!i8(~RU0I+8BC9eDLV
z!*{gkFsI!F<J8?}a{@y1c`84YX58F_8_eBv0+%7rM|F~JKAK#R*fp=t+JB<gka!v?
znIZgrStFk5C&qq^z34WN(|c}1`TtT3N(u2XB4vcX`6)(9IKr;1K#AW{4^*P$v#v=>
zbX^LV#OZ|_(su?TlDjZuk3!8K-~vF61R_8?7+W<Fq-E5lK00)Iz!A?pn{qASmMSnA
zxra_%l=RUibNNP|zC0ynHAYgduDLi+w9^qaP#;DWlp)h3HM34F?-zce*kj{ZZ$aE<
znR6badDnJJu*8Rl@ik!$q=yeKPqmlKF_`??U~)A9!sA4T4x?-{pS&HCB1!-J80Uf^
zvi}JFsu*x59B%u|MRXaogMI$y@}ax;l6<5ksS)b|&lfSP<nvh*BDS@5jJ&u}z^P6J
z-JSELJ#&w<?*{V?eO^a{NZ9<ByMi8p1q_!=1Jd);Wdj53H>pU`V-gR)<ve4CcUL|`
zkS5&POdTnJved^Io}}Bn6?cgzt)>)t!n?T^g*AVQGcxLh-!Sce8=QYxo)GGQqT}q)
z@B3bRX*w7hy#fgiNIm00REMeDw&!-7s}gQ6mnQ0nL~EWtFS@Yj!1`S`oie$3IUS2H
zI2r0tfz_%_1#FzAF|0hhkrxvH*3T1_*b7r$ca}fc?jNnjsU0z+(^JgTvC7O!E~UIU
zYTN+I4WnU-&8|H^W4hiFjx;o^7c`0}LHVbY0?NrQzE^5<td34pXsxo3U@K+1nr6jX
z-O!9i$oY_GaJ6tyG<t7SidjM}w8om)s35ONr?K^@a9wS}hNBO<dKV~omku!*JJ5To
zkKh&W9~S^MFL!x*T0dX-yw+j`1^a=xyQW2*h&vu1w__8f$X0J*^D*>p@6Mgrs3}CS
zQue;XAXETZOXRCBQ`Z%&;r#Q<egYcwLUGNA+RD2`!}@~kkTvL~GO2r5(6hTif82Wt
z+<A|8#kLWfY#XE*5qGEc<F2&HR$tY^`n=GorZCgLWH6)(9l1Qb{3%j1ez?PN+jhF+
zcAuKkVERm(|A>(h&<LX6*$&_=`_?1i%(u}`g)A+_d~g0u^=^#BdH8omRC27spK;p{
z6oj`=&SfYunk3SHvzmptof<c26JZ4tRP4EJcoP(Y7_nh~!sL}vC~^tf?8AnZyrg~Q
z3?}mF$S;=)ISI$VgN8tGu|nNB*YmA!2RYq7I_O|T?-_TGjuElLwk^sQmGYnaZtS1O
z;q-dJ?7q<W0;>UjeV+29@{1F&c=0`<0718_{#ibd<G+{7&^F=4Z#!o(d(^DXaXSBY
zph^HP;)Q9uHU}G)1A%vsv%z}ePkFDmQwG6R;8fg7bJll3V-gGR#K=tNx~Ekd%=!Sh
zYE5-LNhoMHpb^09f*6^{(joSS>J9{hmg7FVRt@%XL%oLH*8cP{2ZE^^34aXF^)#^D
z^Xn;4!%R;Q@B=VLutVjieGB`IOyFev-t<>a1Kf!sO+wXcr{ARfMcA?odiZ}%V!`*U
z7Y5loj@&}8eZA2hGf`K<pT@dz2CrHxsD59WoTI6}X~%Rrd&6J)#hSR=m?);sGIbYy
zM2qHGn|-&hXy<IFzf}r9LX$_VC&ZQOob`Qs6XS`_p3a#5&ph49p;W@k`8}tx3;KCC
zRz1PD%Kx(OIrLbaCk6lXcdWRn0Y&+7JS50HD=&Ugj%1_d{n8YzEGn^$rvJv{7#wGw
zXPjx$=!e$#;Wn=uB@5&j2MRlSNkX`^`Wf3f$aoBe=Zm4J!2==VyD#E{cx8S;ratu1
zka&_pQ}h$~o|_4PGj|r8xwi>3R#zQ=1Hrm5VJP@kkow^~?@`hPgZs0~WBCkh38=Ye
z8vkhE8n=%gG;>Z(S}Mxf<iG!>x+rnvzV5lHFOQT;77d(2fAk}muf}I(wHX$EIfAk7
zj+?RCyHNRBA)Pq3eF9Dp!L6L-QgWt&oky9p-(|eWU`M%P*MJoC(Q1^+`7v+vS<{Se
z*GK=MjpIfq2zqNTqt1-11ikh1@%bI)VbC$Ycs8-0=-+YUb8j)~<I&e;T>E_`cD$5{
zZR9SctvWTzTr8oj#S^pf;(_Tx6m2q#0`9>`;co!q-^;>|8ICY=ymR!~cs=v*Ou8($
zxcu>Bhg6OZ@?HjWa{Le7qsAw(O$ZAUlfcOG{?id=y*Hh6ih|v%Ku!JRr9LwIGQ(Qd
zIdZ+s8i#4nGT1)B-EMMNkIyzmTSLpq(24|HS@E&&>9Iv-)z9cRNfNyKhY<0gw^XSA
zom~7&rsW4d!fEVJ59g6z(X+T>N#=!GuZsoMwh%sz;orT$cez+gwM}1Y1=@?rp-;K6
zP*yoEqvpK{DqE^q_@OrZ)OPhv+RzH{P8GHzGJnZ_!oLHK|G;VSOoo7>8{oJYer%f9
zCccJ$*@ZZQID#2{jvK30XMOwaqt~mC@=i{CvR9)0`)998zv@Yt9DnmXnjXC<*G`w1
zD>~|$T`EdrHmY}D)?NrCi>+g2x!;<7BZ!>>=#GjPsqv{Szaq8c3kb;If-}K;_ZPtW
zZ{*68_JWkthd$l#^GRu|e!~5CaFn<*RgPOlVf05AFmCLnrp|92d5k@;e4ZckRqCl2
zcC2viPf=rO7^5xAov}To^)zVn{;AGmpKKQi07S{!`g2akn!N(kIjc|QKzuz}&Vzyw
zcZ-}^<-Y-eKg9h<I2QY_aLnZjXF@=`?Lo8U4;jtS!n=;m<WH*W|IJwbTSNY;Hg>{r
z0@ij9SS2t2;TiwuB~JuQ2t2lRGg|*+Ab*+he~pV+6&5BEiv{ohXQuvdC*sP~Ff6D{
z%u=uU7cBU{jZ`Y9?~iz@5A(a=zeWE)$N0yc4<yKki`C;sj~D98HQw5Qy;$g%Q1Kvc
zmq?s{jPvi8Ke`aEk5;nTFHe@h<HS6r)}zaObx`ykvDyx|Ve;i>kGR=Vr@35n8v6-;
z(CT~h;fBf($4X=0gOuYcAe&Q_hj02)PQvm%9h2Nbmf^j_!M7E#^{%M>R7oI@{d}i_
zf&!|#r(v-aEP&>m+AsQ_$_$|XaQ<1>SXiTFhr3p}zSw*}>QB7P51+1mRFgyjs29+f
zTXf_%QhBr$<2PQDI2+g(SayI0F*}U59)X?4{YvL^ehOmRS@jz%imcn5(O#`&-b{J>
zJg#n|y!4fC!#QBjNh9+I#3=x^L5;l%{v8X%yyXTrPrmnny(Y4wPM13&Le$B5c6Yqo
zN>XH~c*!tkjsLUXeg3C5-_zUuV7JiCg&cX21CiYNnATQ(HKAP)g9|p&kbuU|puEC}
zzs|DwFNC2HM~D*`7`=$OS)sVyuPAO##0D{Y>l?YKTebKrinrdzZtg`tW`>Q2E=PSe
zeM3Q6tO3D+f>^0JYb3z^L#2=V@lx}A)@^o^jHF;D>*x{>cRR-LL56ONb4%OlK}duq
zr>w!m;?kv!!!QJ5#+9W5h%*xEZ~t^7F_-UN1@szRPIuTpQY-_E)AJq<8#+9GCF6ah
zU_P8Y_;U07rNI)os_6{N>bF6f$!6i*eZe(mCl!x#>z!6W%ztGNF+6CG+jr2J$KgFr
z7Sf$ZT3sMTbG9a2B+xsRxgS7ZpOg3`UY~P8{T7p!Oh3iv;9C0el0-6u&*P%&*#FS3
zD+CzVCsf68pfC@2WIxlygY0}HSOW?3L@_c@d_qTM!VACWRBA3%-pcTdGcD}H8VlhC
z<e1xjcA?DB0v}I80=EP>hM1`+9Y8-tKa=DqgUVuIYDR*S_;p)40P&*MPpH$wanYVa
zw7#$LJ-z0ewrY-Qb*qO_-!kPJ`iQ+_NqxP%H&+1YOJ;{bq4dK5jwdlHkjTP)TzL&N
zW`D8q<fb|~lSFsiv>d0@+^EAk%~u_~8k(`lp|*34?t^8-W;(FTJe`}|ct3O<S{(Fy
zW1ku57J{nH>(V=7#NSdPNz7a9bOhzDhRa)Cn$-SO>h);I7X3K0Ot8Gc$G2FBFBFd$
z7bbU34_oi6$eRD9DpA9iQvdD+@Lwxph6SD>HKtQ!lfrqp@wLc_T7$Rog?H&$3D7*b
zqSyH+2<7a`^z*%lku@0U{^093R{faB>5Ou<o`Sx}0OXG}fm(*Ar7wyxgW9u6J&de{
zY(PHtbKshu{PBS~-p!6uz+)~M{$k_X(Edl6dih|00eE_j?U!kgifxMXNMrEJ{`%TD
z`ikn1Y2KwZF2T<D4`xEVNAmnGdpQw`8;7O#ZiRxWto!YbFK#dyD(Yd35b&e(L|xy0
zp<`HG6=F<DEK~npPtp9sIVPA^OR@fVX#hi{GQpi>s{3|GpmN0DUD7PNp?n#D&dz$|
z%@=r4BrF{U!?v%l3M*P6zz=Ax&<g2oaZ-2ToHQ0M_q?Oktam>YP+wUn!fdKT1XJ7J
z49AQM{PO5(FU!g^e9y949wUzslBJ)0^y1!+;fm7jULGSJ%d!mz>kLbzQSDSCDD^Rb
z->S-{&_lt%4eT+cUipVFpiC8SgVJe|aw&~!5`WxPT5NSq5xEEgf(a-BcI^kfDQ1^$
zACnAkW6`yjyh50TY<@j|+R<9Bc8TKwOi#eJ0Z!LBGrb?>Wrd_IIK|GFaR9#4X17~S
z_c7i#n=WGbkX9Db6di{()D?R5{)$Y~$0_}$p#ETRa2nbCyvj~cSzc-_66;_YQ|)SZ
z7tO+ZXN`8Iob3_egxuse;_VTm+~Si~A0oLEzySbk_v-_&wOY9yS3C9GZ7r7PPCVo#
zotjF*Agmjxd4+&&b!2o_$@lRiFXSfw;T@7tXk)tPwJ>51fABbNUib?DEk=v)UiI?-
zw}_NQMW<RR8Ebfh`}wWyUy@%*{Na<%={q%vK>)4Ypmc_zk~mG`2cwwDl&_aX4$w~w
zq7OzJ_sZXrb*gJ`%ay?|Rfq(w!9DUl^S&b9D-O#su6vBOlhyI)B9(bpBF!K=mDQNB
z>Hv1%(ZanS&}NulNr#5f>71GIp4+b+cH#~jv!!@t{_@vaYQd~5v3Zke!uIT9+wH>p
z6z2O@tKNBJ=&5SKN~~k9nS4sP%lkfb<+gC*+fTl5m3!Ui$P@TB*Ln5yFzre7mc}Ue
zUWN4CosW1FjXmdgsI}!fz87ct^$`M`TO%0Gr-_vds3SkZClIzWP+}BN{s9y==mU^g
z(+Q4=h8uk}T81l`%<-*-Hs7^)jFVrf{0ZutksS<MgwA5V#Uk<cB0+{Txj;vYL><H?
zLTe_!U8L?5h$_CVM`Cnn7xHx5*IXa5JhKixJ?aEb=Y!H@v-9-z`{71$N*!1IKP4qS
zc~l>@TV<2+&po&Bx^N*c&sQCV*Hki#{<5>*b!u2Q{;j^AYAUq1urZXHrdm;KGgm|b
zkk1#n%!tL0C6eKNaT9;bI<oX?8F0PjK8M7d1uk34)FI+JKX-)vUMRA47|Z*-jSfbI
z(C50*N4Gu{6?plsATGBcL3+BpZUhSJ$*x1jOl#gM%fXOSFhYUF-;@UnKagQWZv)&w
zk)`VJ5#rEC*xD}WuvpND*&|DjA>OW)J}N4>{5sv4bFqxTF;l3FC;d*k)^O0^!AiR9
zt8)5eyDPKA8y4@$GBHv?mYvW2npWQ-(*Yjtfi7*Hg0YzCJRt(6$Wuq1qtgZ*)!wUR
zBugtK%2`}vuf2K{D*$o!?`qT1Lr9s$auY_H&6J>P208&Qy6Y*Sri<WOdJg3MTD}K?
z+N_un8~L=~eEw~=kJmjJ41Ra0M4`M|%kCV;?I9b9Cp<JG!r`BL(NaqrvpiMrGErlg
zvLAXqL@-ut>F@&IAs=?x5C38Jk6=Dm<yK1-G>_2nRsQ!x<vG&ootn3x_4OUdDA*MA
zNHl$#&c*PUg9)?1!>yK=q9sYnfo@iPo2BJ!(NOgC4FVx1eXWzaS+_GB#W-htRj#t`
z2!)#8_6ve;zwJLPf>;bB+j9@YwVvcV=LU!3!1P889szL)rP5(CbL8gz^I0x*8Y0nX
zWWp`ruJjG;io}i5L4HS5%dKt0a)u0dkmd?J$lT_)P&!9pl*8eTH$$I@gMb2o63jrF
z@J9a4@dnDFbT;x3H%LNcznfogx<hWwK?GoB>?i(iRx)!iUH$u?ZJ+f^vpe_1i0)Hp
z{NNq+g=iqFb5_b&*Aw)$%t0C4&a5*hHXdC2Z8$*NoRtZb*w<U>pIs|u`Io=cQ$8~f
zTw>PUjTfg8r2x|7cX{i!mFixNDqh;xGZU;+#PAArk-4QG&Z}a1(w%endfW#(>m;R#
z1a6l0<z%m3{$gWsx<F{{HGMnvL~#y!)r{2e6BpYzRx!ch9!>69Bv=hdvP`;N!Nhgn
zXHzSSWyRJVfSk~`5-x=9vYd)|rcvcTnaVx(*<fi5dQPNxN$1?E!@D6luACp^TeeV0
zDe-6>aCL$WvF=!Ob?~8dc-8F}v)R%qy6!Bcqje+&&9!3hJzP$8f=E%c<c`zXoZQmp
zh3s9NZuyx7@f#Q&_D?Y9kaZ%apxCQf@sizj1YVx3FKB;Baico|Zj9<!d9V#R#Bz?`
zgtqwB-7CyIyOA2la6h}Oud?ieP%T^Ca~D+&y=utf4b2Uje(kuia)>P@^FV4+`Kv`F
zzN(5PJaCN#8R#@#*I+Do`a@B^U~u1$`56dtXhW&|#QcznDW7<RrJa8d!d0FP^NB6T
zTO`gpMKBm|b(Fu;r-paIUMq?7e5#*%qY81LX<wRcCe84>lG=p996qh_!{rz*9_hVb
z>o=Ae-#bhbph++fw_4$;alq4>Gzv5|G^0b0%WMVhs_VBn%XUX}vR91kML1PNKY6gF
z&{hJ#Fe6>#a8La4`Z}fsmpOJoAmLQg4xMCP5oF!R#-c4+{;iGWY^hqykVH3~JmyHU
zto8RNajpZ-##cr>4fFKMsd3srl5~n*rr+VOlg!n*OOmiR1E`Nr+1$aqPP!&&#*kE<
z0=J3V>vkYsDjD1ZfZ=bd3x^$^F0R{z{jBz@f6B}MH7Np%c3OS!(Q!4q3>k{=R3$pH
zQ<Of~QDaR>{GoT?z8t^~?sl7mZ|F70<|Q5rX?Zv|vy>3xw=$9<f{3>t<Q&OLdVWI}
zhZ2|=g3wJFZnsDA`aC4WT86JJ2C7FJx^kZcwjLQGzm2vFNb{qQ;CK6VIcrDOS$}`D
z4wO!xbl&d_=R=NI5{LN0J(dl8`DNPj0~Xi^g08-|$(UbFdDJQz==BA8dY<}W-S?Y-
zMB~o&%>`6QNT11mA4@zMp5Z~M8q=tki*2kS9E7S4aZ(`6J%2-B@v^~k)`?DTX5gk~
z^5d^)-!k>4rp+l=q#<*{l;=#tW30WPGh5ikxw%X&#QIsdD(@R=%MjT+AD#%tLfqJs
z6TW|{pdZ8jbZs_o>X^;!={(ideb8&UaQ<L}WECb}L~}y2CTG<CHG|o99M?%E@$PNx
zctCthJm0UQyw=*P@g!yIM)s380NX%rhP-kIw-DN8>}?);HIMdP^$O7D#z(k!mJATT
z&@`l`8fRrovc}jf#Qu?GWReI0-<LZQLN@k?6fsp~sUMxzy^RL%2sNMfo5@?Q?U~_2
z=uf-MOONub=N38>mKJjoN*~fb76_C#^Q!PGgUkCXRU=P*xY5&RZyw2G_-t!E&`$yB
zzF@v>wVZ0ct7<O4Rfx*HZAWm{UDOlrKFI^_TjD`*rP>Vf%zRGG-P=ZA@nL+Hw0yM-
zC>V+K=J01_MBM2;&!@z*Wr<JZFbGfz&~01JYPQW10d5-jT&BDI5tIHA2)#A3+|^K8
zqO9m6@|CRo*$|xEVZ05jsRQB@DZRKc$N77JAJ~Yp20maDs<zv~t955`Z$s&sCc5o;
zkaP%(3!-{Uw`$VajC)7%{<aY}p%nhBZDC@Pkl~ZAoyRm`ZNX|QyWJxy(1+>-`6tUK
z7lw3}uBY;W3xthq3DDMO*IK(;dy1txRA57350XTTo!PZwT@%&DVkH?X8MNNy^I~zH
z=~hV4Hhr-Fd@m3T<ANF3I!B-hm4h?`g=kNNoSjc>gnMJkdup-opM$Q+rU{v?zZ5b%
z#<58WI8h2youak|aXh^3b0waNdH;)<bqwGJ@X0Wbz_8-so2(2u#Do&w=m<FA>pOQ~
z|9TF*2ry6l!e%y$=aH3Wa{aEZ+b&*N$R7Tsfsm-Qm>3I}ejFeu#|^=4n{RCfPvzHo
z>hh^!5?bnL->N%d117dV18Fo{rUmbCZ;=C676Yru&^P3^A~MoZWXUra#f}T4uoT;q
zV-RrE7T#63k>QFWYi^BXWUjdlazS|jGcuOiM>Eqv+?cf6ZJZ<owwK3gE9)*Owcj|t
zYkZZ$%r55G`LOj8*$BAnSXL;Eg~__+J(lF!52RV~6TOQ%^FD+Ylp{;Y2b9VgJzP=W
z<3ae>`AkENr|8a_Yn0rbU&$mSUjbk-F%Gkd0dxh%|N5p1);(~H!G&;}-nG`aY08w8
zWwo%J333S%v`!(%lG)9CwKwho)1<T`CaqLBX{0}PLw_>&=y`PG&oVc%A6MQ+A*vdh
z)DIu-&Mgk<>X%C^=vA2M-JS9BD--J_Xrbd&a&49#G<p{@XtDkgXf0hjn8Bj1YI2=S
zJQ?1~UzRLFMRh1din$au?4!lAw)?aDtHo}A179C#5&G65+zGWc$hacrx(1-w$F`83
z$1$&t9qMBV!iowhBCqy}8p`92M*P*VqYqB6Usr~0#yt*W`_Y`UT=q5j2K#@$6=w)n
zOc{Q&ZMCw)e${H@fiU!hI%O1it<7&`u!>?5=(Cs|ILI?@Y8is&slP?`I`69fnBV07
z#ZWt(i=2@(`TNEGTo{3h9tI~<P|XKt<)WO0M?#taFm&egZgM_jx9}hx!aM&2P1^46
z1XJ9OyY#OfA;g*Yk&s(;&~wPmQ}83jA!aLHn>~4xajgpP9jvVJmk0KDZO812Zotr5
zgaf0~E04lWsJkn5=LJPHB=5sq8BfG3k-1Nii{G>MezI=#OBx3!phpFDqn%s$#thyV
z423yS!L&q`57mT}AM@OFeMr@jVNLJbeyY5bVTe<>DeE(yn>StsLx293TbRK1_!Sfn
zUw_^UAf1DSU?p6=l<Z&iiXza-@sD&-bxn8WDv+(bp@ICC`^*rhZ`yR?yFZuyWgoqM
zOx%DF@Q7^~mm>4TpOk>PhxxgsSIhfg<7a8D0kNa?rn?o|S>{)am*4bCSvAogSL@V{
zc9-U>u4EVIS%bdm`+}ye>Dvbryp#=UuphL>4hiN1cdb{`Dqs=w4jJ(hiU0i!@4LIh
zabv+Kqq}MKWw3|m#+A9jN)0X~bN$A@*`!t0Xl_)cy{QtGy&9j0O=BtbSo}#bTB|!=
zUY?2;JN|3xDxz7CVrJx(Z8`(og9!aUT<H%-7!_-@pGC*B!-H~co^Z5|{q~57nJ-pr
zD9Vz~IK&k2W`p92nd`X9V2QL)*_q$)b4^ifU;k*()MoOP^e@g0OY8_If)P1+PlD*V
zboFj^Ofh+M=YErBgsW+0yi~+V#qS~p<PD?sN7C%cfo{`RqfY7e{*ObhrY5Rk3k2Dp
z&DWL6s$ADyoYb5q+2!W7`Q=!p9uCaUmTEu!W{-2~x@~>&(#M(~PaB$#?w0k@LJ~N>
z761V>7gJ*it^J!hrVC78%LCtNOhwB!>$jPjUdI=$&G&-t-b88y&qsFxzp$Z=6P}zM
zYA^OafXZha$oG5dz_yw=x59O6%D9PQ^9LHi4sD||?Z{hFYmc(pbXrw}^q0-AgdBF0
z+pf<kQgmKEyqc*~mK55FJO(?SVfZ}9Yer}i8bokh#r7tNZmGz2?j86n&bhY$mR5#w
zFait9Nfy4;3eszJM>p0*DL<@(e4t1dILwaVO?+##LVz5<w|AUFrj6%KkKTg)&AW#F
z4v~2%Wtnup3<^>=0~?@fXMrMC+u^RfWUz1=#+3%3Xvik|0_KZ~(FN;$H#FTa{kCa@
z9v)N(Nge`q!Cky2^@=@(gbVp~dlLgPMw6pMejNz033nb7gGbSj65aD_EB)(Ls{;=k
zMIjTVWvTr)nZ$Z%{H{7<MO<LBS*6YMBxUDc&U7D7^7QoMX!M$MOC6dxi$t`pT3t?D
z6RTIfN#{7vpv~J94!rGSWxmgab0@dw%_SCE;&TYZ7MYSnf6br&D?f0`y;>hvG!(Xm
zc}~T?fF2qv{+(@Co&EfrM|L}ojUG*Wp=-exM!(v??cfb_pwm@WPI)tyl3Bpc9|Z8v
zhq1=GL@H1A3qEZFH@jhcu)69gFWTXO;v>e=9CM=eUn9jGMytj5Eu)TojqL~t-JiTD
z{CEf;rt8yJ#lO!*SgjC=puEW7hTlX}gjzI8MT1C)ClU)EUB-gbc2@=2WwT{~Vk+*}
zIi!AyJNp)Te$2aQ3(xh#LPV8@QVZhwz_}3Vdg(M51A>bUeMXfZ3}5YKLV;fhq8$2&
zj#A@9ug2)(_yFQh85OE()mLEEd+IODfmb>b)waDA!4lcb?6-Q%f~t(me2GrhqU~?j
z->o+&fGbv$Zrf2h6gPrP-`%p{ws)7253P|52~s>Pc5i}3(w8D2M*t_Y-oBP+Bg?DP
zo=IC1H<J6dRs2diA!maYDKf4zIA|05&Pfm7M;+OD$sz6iQ}e4kg@5rnf7}Z`e62u{
z9XcF5h<@11_-?-SL$PJ?z~3;-i!9B2-p9+H#8X4W=UJ@ua8uqMSFx9*XOZ)PdBtHM
zjf|aldvmC|a2oKu`iLUCVl{#tdvttsk5l<!b3Y}Rrr)#84+!Wto}b=PHgs|`GH|NR
zk~U<FGFK=XbD6(XN<3I90QkYOCWCR=u74$Vg^Sz37wC)zE*Fkdz0|HJ4e@PPcR&Pl
zxqv?GeSS|Xl@=6#c!wo#{}JahN~Yh)@)oQV8rtwVo;bTJT#H@helk?)vxO$zJea`j
zouYYadee2o+otjNDxCVlD3#zIt{g}J*IEP8w?}wD2PXZG4)){P#Q($ITlclqZEd54
zQV6ca-HTg+V8LAry@6t-cp*3xcPrXLa0w1A6o=wa+zD3P-QDe^_kQ+%?smWLIbY!X
zw|*pKtu^Kt*RX4jIej=mfV}OIVEW)z&lmZ$D9Y^09EXu*m}zf=iD|`CiP`j-jVtra
z!;jIUnZg6+7N(gq<e1~ZZwkcUXItw!i7VML0wn_Svq%ID`&Mh8fqQrB!|vANc^4%e
z`o2SryJ(CEjd$#=?AeX{Pn!yahg^3PLDLV#zDj66_-7wpc+WF%-RL#(xISF%@)S4(
zdo7m)+F%>zO%xkQ5kaP0DMs6duFQOAn;q2J5LypY1m&LKLt1lS6h*9s`c0MAvkM(x
zu}k*e>J$7;;H2AWj$Qp>;amnU#X^^qn8>aj^G4j|Fp2|8Zea;OF-rwkbzsJAuT{^<
zR?xO_g*0nQrol}5i(^=V$CaSqYqTox?#0xa5l59wsa-|1RT}RdB`i~Oq$F;ZsS9$N
z?ND^{Gt;4|r)zv{O{j@=Q#qZ|$Hnj;ZKq6iq_>~;0%ylIjf?fso2*&W_!cI4XdalW
z@-(#eB`8!heqI#zu_ideIC(sXyZ9TYS(>Q#8_!;#k)>MCu2S}oCT*CP+z;}x6hKOS
zZb)YCg>&!GhKk6HW$SUfKNNRaS6`uhQ#Q-(n&d<Nvj3s`t!l)Z>(x;XuD20Y4L?dM
zo^eov!`_P73~HyQW2gvPEEOtvDVZx*h=%Dr8AAVrC5Gl1YnJ^GSN73s0;B%|e>{EC
z1^4=NT@Od~`eH1`;JpqK+p7o+kuDLd;2unWR37Q8b-7!o8}=7Svl#OlRe%mzKV<i>
zP#8k1wzV0akZEBj$My|r&A!HYJQnl@v;5=*l2J`mbgFaF=br<!w!b=H%{eTbE9Rok
z8%L=;Ugx^R#s<!W3?*&GJj^OTf0+SkGSB}~LE%+>aaZ_UCNg+Ll7gHwBuyMs+Slop
zW)Ov%vxz9k)du2720xpCP?CcXyOB)>7mD*4*Yx30lw9*v7tc^#vbAqJB4ymwS<Jk_
zdsogA{h*tN@gK#&ix%7(jQpZrj90e`8vtSImm=p^KzqrVgQFELZ!O8q0caMp(6Nxr
z)d&W9I!iCT$f6R;P#k`wkwTe6t!ld73z<k1OC7__SJGZ4n-DutJBEZbTbouV-+fk0
zu*zOsiK0&pC~+}Xvh3WdCE)L(I3Gguf~{CH4WCt;7uo&3A*zKgs!xhQ`-@X#!}uDL
zF-kI52O>R!dTEBpICx{!Vf6L%Z*l7rL^q!!dSd@KE4vfgM)B>M_jTgZ-lxXWLR^WB
zooOid+1%Gh?`%!fXB~fKBqW=21Jx}OcL8nPkkWx#Uo_R1I=6=`FK_wVT#@qBFiSBp
z^r5X@gcqUptC{Q=2p{=sWu>>%{Gt6bZ^^N|zuf7mt-2<AGv#4A|GfKboHG#Jfy`-N
zuI;W5fJchqVRn0IC;Jvj<zZnmU0;t3vVY3+J-Kn{>JUQ>fD<Bs_Xz|6-_!&M=zn#%
zvrsWwYOPmhrO*v$En^G>n2uR^#CXa*yA8cO^D{7S9f^duaxZEC0*tXcOs!0>|72Bh
z<F($y0L0W5t=#z6q5Wfg4FoP37ql7CYH|5n*{$KF?{c_@w2vV7bX${GuPTk3QNsnf
zVrshi6Rrma-;uE%v9}+crnB;`{Ux98`jZDaX8Dd-FJ<}8PilwpiHB41bOoBa`$6Q3
z$7p4eNDL-QZNo=2w$HjN?1Zc+1pyxfyfAj32!?eT`|_L^BdGnLFLgq7VzX<0wgS-x
zwMWVlu(gc`)_I7If`91uZl8aU_?0`6hlGy)(qnN(M6c)54wDyEW4;TEvidU<$Qk2U
zi`7<89T}&CeUusFO_w39pcQm^c(oC+?XTDWTy&SXCl6bOei@g(g5_cPEP<~YcQCRx
zUq|(dB2h+rz8!dj+CHwPfwouu`q{Z*U|3~QmzEJg$2)umTV8BmKaa*_FrWGeck5BL
z*x0UhsCwTa+Dy!;5@r&7J%iMzZFY^eaf2H+g=y&2gvwJ5ulyce^xIK6MJ`sapzPYM
z7WLna`Yi(iAd2mh<A`A<jLA3)^@ERZg(%nU;XJ|XcXg*OEFzZVazYmNq(rJGqKVln
zWC8_Vw<PA$+3);;_BJs6&cjyfvU&t79}&i-RNlIcf1?N2UA*isq#VU9)Nbhz!m>=z
zp>bLZHo(J5UZ8u!)GQOn0bM~;d*aLcZ}#H9d-yB#Wn+6*kj1LN>2xm1b5nynw{+~k
zsv}zysA?X0s={gFwjJZg23*@LJDH;43^Z)#oD9m|3PtrpT)_vFXiiyDzAtV_`|gC_
zrZpG$94A>s%oYdtof^=)goyW2_<2*KFP~C`HZRfKIZ7<nx39ROwE66|g<GX6&90L7
zdu^C!b-(BAB3*r8?z*7aGKPgaY>J4_?OTp<_Vf#H0F;Ca-s3&BYYDixgg`GTNK>1T
zW!o7czOxL+U<&fbC)boezWS#c?`2SG$(AgeesmC|^kAI9byhLCawkqS8O`mjMzuPX
zao~{oAz-VjamLHuV`@skrarEbR8-kF(0s&Ivdcz#T7_D&y19Y`4XNWZi<+Lt!2XnW
zN;G>enCXi~_;;hYuoDiZUpfPeO8(hJCqVI!D|bW=S?64iIplpC^hV$Of(Ao%cggRH
z3<EM8?az%rE0ANm|IzIkcBDm)`o-r$JJcNmjGRf9sz9n-Lm?DoaK~U+{eoBD(}?Fi
zzL+ytMLd=3mWPreW-Uj+O4Sm5k=Pcm_-Xs9dsMEpux7C2%=aw)7gbNXDMk)k>d!i>
z;2DiW)W6j5C01$cOcc{LcLuQsYoKkHuboLW+q#JOQhL##t%Msx@AyzkN9L<;L@d=d
z_vU9<I!8$%^reyfdPe5_C?N+Ef&$F})+25M1x84w!>mbc8m+IkaEP0Y(3m5Kl2@~+
zV!G?dKQY!Uc@5_=Ul9gqHs53LKG7&^O}i7_SkvY<^7V_j>v|>l9;XJ^gL8(<xN3T{
zd<9M6jsbd?=39673&MQup0oT-(ovrUEPYU>{hhF*4nTDCRa=$4(s30PBz<e@gN`gT
z6^us5ndBnd6#ApBni#oSH~7oT%Qvuv3meXiqf(UiTRjs*Ln~f{<L(qp=^2FDx?~WM
z>{b!lEf|_6oD6sS#J`m6Wz=ib$l7~u5VAz#0avPb{&bCV;O8&tFF#4tA26<z&1Eh>
zsg!aX{89JFL9WlrYyMRV6>fm<(H4K1pW<3uHU>6J=ANf)>b<{9tM%mbmd~N5Z=$Uq
zY><9f;Io;zZ(Jsb&~*Rpp=va5t5aLgW1=6mKgOpW?yu*y+m<7rlX?VJ83m#?RvbB`
zR`0k7XWt46<Go2gnM-S#UcscAJ@y{1^rr}2pMXpfflPS+YSjM{`5#Uh%d?$KG=#g2
zKdtwFbDd0kq`p^atkXZ!>>g}(H9g?f>0fRSzvSU$J?+Z_H**akA9D@z7twzL;-A;*
z-GC~^{lgQL({EyvI7)ER*|!FM#+!2YO(DIS?YQ^*W6|?^_~)gI%`AsCHq&)k?~Qii
z`ASF&KWV`mbGdjU|J---KNN!h?S~V5dSYnBFIZi_xiObWn!EZ&{1@3P@(l)$NvDz<
z!?e<8fjrX|7uhT-8AE$^R-Dp|_}@a8|N6myeM%4xf6-vtZyx_EjDLgMpMQCRboPc<
z<>~MB)Bg}b`~&c&tX<D3D;etkO0j#;-+e9iKi>@dbqkHpTEIUA_HU|F79jFv|1KTM
z^#=BYJOiFTBHmPs-Qwf^_t^HP*9qg_6E6>|?-#iCC-o5-%%5^4&F{odwZU0E4lWnD
zxfz%XEyFW8?3E)4@z+&(xg9qLi*}+-W_v8t{N0|#-oG9KptSUyehzgWK~%_wF%RGW
zjq;NDS=1)dd1!SlwDadn2hJK?4bMN{_g`|LrbWEjP?n~Ct^AD^hSp?-w0@1-2^^(Z
z=L=MnZpAIMa>}M5jYbi{Yx7nx284MU3l{yp`|3a3L1saJ$(EmcT4%6t(n<dJa*`KU
zT)Dym$rqwFROrce#G~o)%D3D#j~K~nz0!V2CkFg8NM;a8Imr(aoPlD1Z~Kk}LbU|-
z#bH8k*d{YlM3^p~kGTEJ%MbH3AO)ZQUnpkpXvJ6gnlBT$w1x+>qZ?w~Etd>G$u!Tx
zYYHZ}m24+3tmYYy5bD*sF^ra-Z77)q7r#sB61vb;)%2^4NL)N$G_R5hFRbiz-2d{$
zDPXm4@=n9|mGtVH*;LwtYVV*4(Emvt{DqV<4gEcb?A*&#o4@8!RWa~Iruj<`NbNt}
zN5G^r`4r~9h2!y3IYea4o&irAqu5UjVxik-c!~<2S){R?+dTWFbJir*d)0mDxl||Z
zubW3WW$SGC&CvXgdH>cRxMs;nWx|G%fm?NYqT->wVL0?R)R-;9t=bJoU%(~O#Mp!M
zQJ>$c^Wa_@^+RX^t~Qth!TT8d&n4Vhvd!_`a0a2+>nwOX-?OR>d_H3N0MY@Nw=->$
zTrU!=)eO?`gNcnM{_ppG87`ic&Kq?4J{Bix!Z@S2fX-YP5>4Who4#lxNRgrtEsps8
zy)nXtfN39&gjWQhPaU^9w3W~~cu|QIZ6Em_WC>0r8u>w^o{<wQxLL8Ri!qr|FUV9u
zLYMdaKL+;4g};zQrcfHMrg%b;QHrD*Kq9%6&*TkD3q?CW6f*Y?pV|42OEg`)xDux=
zo)-`I2yf|bRWbJUg805Ry8pYn3rkh4{-dH-|EJ~q`%cW1h!e|c%VWFx2vWeWUiep_
zmXRNaT@Z0lun?0H?;aJF3X}64EUP`H{?rpc7)Qv$((M=G5K#Hgc<sLsn|c5Ff5Gcd
zBoe1a@M&#^onW!#J`HsF9)#uRgLoci3|O-sxbjXZ$g0eAu7~NiPo_6vV#b=_{BU-L
z)lQN;%QJIh+R|Y*lA3thcR0=#s2X^3OOl%9(<XE!#Cx$$%{;4EfJgDttNP7X`)8e_
z<Kn~I6xw@av!m#JlXdL=%}9Ap;oaM&)U>*nVq2cQL8jjPkagGKiibx@CaD)3*xl>*
z((>27_vJurMLFWx&dK(8321Ny^h;a$J5iI&;!tlhGRn|cvzpl5B)44$@BWZb0pFzi
zX8CdcWP4D}9`APoZP2BK)OW{jIsN)^Zp)aeP4VrXJq!cvd*Z%RV@5u*WqnaBA*{Y~
zFkn4NHMqnNxmPZ%hwm`pgizD1T;Fp}Cr-s>7`4WK5+$RON#a%<sOC24{_IA?868rD
z11um(jIxi83W*NB2t08Eo~dkU!dzg~%jb}2H?-MGp>m=-sxcmHhI0`X>~A-|Mo4u|
ziIXyhl>tGMt;gyY42jlG@9P{K)xIfp{|UzG+Nc<k-CwHwpG{xvprs1xbZD4^iLqxu
zB|*_aqcw8EVU_#uv}qA1l$Kny)TF2|CyZ3bAgQ?D*}pjBkt646`nl`Memc6PRF=x)
z7EVtK@8MyNy0nSmmE_^veRofmMI?E@#)wp3UppT!V!oi2_v1*l5IQckG@<^pnqsHV
z!E98pnb6|}U%KE^zTJFstaKE!WQ@~`2qHAa_mo01lmR@Vw}&^~{N?biB%zW`j<{Hs
zBIiF>lX$4Qd5YJB85cZW-4cd$qcHn<FQAJ4)JqzEC^Hp`W<R|XWQZz)I&AkjZqL<@
z!0z;zXItI3`z(Ou`1!X2RWIP!<R|Tv$rfYM<5-w>Xgi=|{*`Bt23FPvlzHktj6tF=
z^dRmY-1VrL2?|+2xd)zkSz=ofwCy6_YNbz73jDN@a~VRdcp!DA7`HRD&p5`OJd^@b
zifqhXqw>X#%LQ0xl4TC8raPsXtPSrQWu<5NID!z}F_-6Bx;Z5jr`83{Jra~^nlh73
zIq+(oBBF^vA9rT<`l90+ilp@Z%Rv1d5gS9-MOWS;D9jDZ4N7_ysTT7C94`uD${rD&
z@tyIsXc1*3mI$i?x`aqW9wGTMW4PTB$e6YiS$wg_FKk1&?VJjsX%XV2c*zGOEl2<|
zq;%vwjZu=#r0!jUtdW_{@@A#1FOK+##NA#C6?5F}X`_}~n`v1zK4_5h@<HW8aOh>y
z+yfxbe^@Qe#D6^vVCHe*zh3TT`z24EX+gw`B~6t&-&m}Bq+!_SD0>uThWJK}d8|@3
z|HDA-Ax54}H<G2$$M?#OE^fC8?AVT=pL#ldJ|D<SLn>NKZ@Kth>)$8C*Q&=}&{+&J
z;MVwjCy6K0j62Px88ArC_r@vuM8wf4e%-;Q&~|}4<JQnjbXw5jhDP*MIVc}L@jxy^
zn#shM19GhyuNr`!d9BF(%dE6F)0SRtJ}XQ$A@>~Owe8I~?HEM2bsuoc-=s3!&V5d6
z6kLCoX5C0wTQO~SNS-g{LC^PkNXC!V<n{a9;bTpJlS|n*qf3QV%$;S0VGmMwy0}{n
z!&F_<v^npTQtIDV2_9f%irsX1MpA$4G#LJB4PP(#1S9x3IuSBHLgV&S(}~JG{O%vL
z(L9xhx(@4CB=9{#AJPkSMT}-WqpMjFClvi;bu0+dp2K1j00qlQan~klDNN;RPsvZA
z2dI|^56CEd4<q826n4^%P^OMo%uyj)q?8ttc0rVw`_J~D-XlV~-c9~RdQ=a$6A>)<
zJS4etm!TYn411#C{bco0zE*^$FKnsz@1Rsc(IKvWt^v`(VhAo+I5nGO>L7;@BS#ZG
z9mdmp6c5rf=B?-Iq+;oAr(ECq+g;O9D4wi`$E9Yi?W{n%#iGjA<kmG$1SCQwvHg4~
z`;N^PrzrD{YoXFW#)Xq6@BRdoTaV)JT*G3%Iu?@2A*(+oaC|`7B@fPKG(~mJYaJ(o
zoXHj(XNt@e^!Hgz5>`Rxq=^?wQ`oVqxDp2(;1?Hi62L>nOsP!Y(E7Sg58$NF(n$0a
zHtWzf-cR6ZgXxdO&IBi47RYHnb`?A1&V8ZGV6Qz7=9ODtDr(a}w!4#pPkuX@dC<6~
z??L5(dPZ&eVM~5XEmQyX>IPkR=q(0zY**OZNB!wEZ8LR4CHI3%xIh-3tv8yAbSJE1
z1$yK?!)iR`nWq}X{OrmKaSEDb+M%};x+bMf4z+?_D9EeAm0)%4V9e{;KT}({zZ-W%
z+|2ZCL_vGCU@;iljB)=I!XI4E+!)G`<9rFxu-8o}X0jA%3u+;gks5{IqC^PxDUSt{
z;>z4IgG2CencpCKRm_m+Ssk5vz8up96!^y=D+}qR=?7Dgj$2kW%s<PwDOgP+63qt+
zF$_^;#8BiXV^XXlde=DPvHOksbr-bQg&_BC;*>w@b1Znqposbij83PCTZcmM4n^<y
zv3dg6Pvw3NukK*9C`)zm`5B_AFmYht%9~LnqL^kneKf1^V(E@iH1QI){Q7B;mU*lK
z+7f`QKyI>flYoxxDEPCWoB`m82<L1bLM4SI?mH%^v=aJPN9(w$O@^v(I3)TmjL93^
z<7`U?C548b=HHV+z?X4{)Stkn_orEW^qaKPLKc+L439y!AwEErU<<lv6G5UkoGSIo
z8lCq04kiW$PAf!yYa3XlpFsi0+<Ad(ScSry?(=X^m465WX!D<XF)-7#VTW*pR$z+6
zB#65=9rY2A+t0ZVZjxihj)c7hme_6>>yb$Wt|>^{H&@!U>>7+uZV1+)oNFxq;r^-<
z{hWqGLO6U_W!xX7^=1~c>?GV{q-R^{U>mS{`o>VRM_!;si5#gITS}3BMC2o9)X9RA
zS&G}nd4qtAy(wOT4FB^kIFIa{Nbk)I3UJXDeWc85Q$LIaB8?P)Iv`8H#ESX8`<nh%
zlP4Ov<~ZZ3d8$An%ALrqZJr~lkI$pNVvJ;yL!v%}!z6`09eHR=fKH{}CY`rQoiHla
zh7~ipK!Ue7LcX<XHdEk+-ZjK1K|!@q-W)G66f}NUF32y1%=7kItLD+x5-y$+X{3I$
z8|*+lhV~pv{K@b7Zb&4<R#kAVP9_hP%9L2y!Di`SciI+2R`v{u08CN(+2^~MEC~$9
zNH?Y$oy`aY=HpL>j5~1{WQ^CsAHoKdt|#>h{RHSkVyrw25<mSs-810$b$4b}P`f*A
zE6S$<_``aJGs}1li+-gs6WKr$OFE4;)w=;PMZrz<!z6Ns5=gn%zMAPF9wK;1V<$Al
zAN}I`gtV7GGVn&h8%an`s5H*t6`qVYI6BT#am<DJ_b^{w4sf)PbXmi9WFDI(e?-MR
zHYTB&7<5^-_knS9KqR-0Q<kep<6?lgA6wH|htRms(Q1WxEZzYBd;5sx(D%b&ORqU6
z4rzQyZapfJD7!(JtBNcG-`YqR|7bxQFRpAi>a2WDaon>h-3*bfKyb`j^Xw5yXovt>
zI)LxzGgK(`+xuiwJiJ=%zc4#|wC1kppOTbC#Yizp@}g>Iz><0<WQk1W8v)0l{!d+G
zgEcVV$yWiG#Un`&u*A;zaaY~YEx;6wc5a7R==rEZnn^4>c8jZFSnHuAnN;tth^0^{
zMRp~g0J54z&r70X=UTxmW0BXY;!iNgomWKA@{Wbm=R)Q4m+vT{9L7No&I9Bqn5QUx
zMsepS<im2|%QJB*w>fF}X$@>Kk{2MwJ+p^l;mLqki$Q>)YH!&0EMI?fJi}qTpizwu
zYoyYTBHz<Qf+;@i3!>fX_pUN+(tf&j6Mb}dj8IuB;=E2B3qz%*lF+8V5W9`pr!FWg
z8FX-bES{M8W5y5;f-mAB-IWq-GjmhIs{6ARBdqZxD_WYG32`XDoKD(yuny<bd(YUy
zG>kg??DpUgPO(8Xx(e-^rA$vAzb;?WgGR|fiYR+N`U29=R?e%atrbRovNU|kwhDyO
zp}0m-1i>)aa9qMru2B$M8Ns$<tQCq$|HTz*vWj1<CV%Yv+_QeAz^|+yo?l$vGMogl
zcQjk`&XRR@GR<=L#)lS=p)##_1Kv^zwpg~N1ESs}gs|B=@K8kc?0cO!%MfG2Y<9_^
zSL2y#u?TxckK;W_(Th@bB@4>0k3J$Q#+;}OJx=CY$LyMFIjwjR^jKpJqZm5Pxkt`7
z25kyZ(5Kn4ohHJ{Bn!yWg&>SDjgL(yk5h*+aowI}qTF?d^Jg6rAm((*#*xg57#nnL
z-kaVR0MqolZ%bn)U+~VWfgA^4_J96Yxd#b%Nfh?Hx93>m<Bq)>LCsg8NHv3Df4>cV
zEZBR!gdOLhMzo}Zmr8O4e(NzvQK4Ew?_QbVAztN3N-b=^_}smmw9e!6Mp1uCinLxW
z-~s!La?4u%@{Hg0DRjYHUxy?t2psou^4+a%Pb8nZ<1;|8?Ag%Armv49%clwxOY)Lw
zIV?8DHkFeA^0PC~`jZoiI4}1z$<?het0#F9mp)tj@NJymdi$&(7bbm9e$l)cn+w+w
zQbxw4o0h9}+BPfXq4TFUI^0H{CwuUzS;lPbcB<q-_8$h9tK*8kWXJws?irLA5)hJa
zL)#=R-L^f5Qs}+nigq75CRc4*;NAYBo=}e8CpO}6iv?*rt+9JodgALm^9f;ivw&4j
z_iyAb;4=vQ1G#BGCA8vooY2g+kL7<&{X8VAOXP8Q(6TFO6j2xBVm0*qR8Y%$)-vfU
z$1?s1YnC$y>`0Yv<{fh|vLI==m3{_>O8hG%7`Fu7lAtB(xPcD*JU{MJi`6tB@^;G;
zQL-5nMJcLawYQZbrDY3V>u(9f1L@SHOKh&5^A!uyF=LjQ$|VF7{7n7kcA9&F!BPY>
z!5haqfrmou-OadXVwNuG!`Nt>%~WF~fC7`q%{EI*MjE>e?X*NE9)^;TE0<oO*)`n{
z<pyw>spjo%Z*)-S=Hll7ow=Pw`B;EB&m0!Y_F}RcGN%C)<^>Y&q9XJRt8s!4G?1a>
z%1mvECPyqR!ESd-k<`e{ICFcR$n^(qk|7njtslLsgHz4vb05gMWZdDX+F)H-^QYu+
z5?XEObN3^$i;owW(vOy_bK(?l5vhV!MF|!IaAmvbMsU>=+Ha-Bk_LDdf{=UFrOUu}
zNDN0o0<B`R-w}l-@hM#sv3+X-T>VBDq{tYi-GcYA#t7{)YA>vE$k8cK_mhi*R=Fu|
zJWd1$%)c|?f(@Xdl&E0rbOdkoJ{|$d)lWM~+Qzq^Sxsh01!F8DGS31%Hb>u+j~U`k
zr1i%eADWUi@;hZnRedW1Q;9!f%p=8*BOqdD#dv1vWf{p|B+8nrJ|Uf{UGE%Gb2~OB
zeGnv1P;QL?v={P;g?v8!WXS;`()-F92hNuAH$Kg3w6Cl1G+tD@;=3SZ9&=}^Gn!$=
z?Ge)V-jn0+^RgM^D%5tv<F;iw)3K9Ixwkph_p8tcLFF||1j0dAHbm7C0&NzcKJMNc
zJDX?$Y|dPLZmAr8_c+#WD}G3Hr1A<o>%3c#%H}sa7ObAQ(=F#;(Yfzix6LYHHLqAk
zAMv@Q3MkzjUNB%a274B=?P6y<=3Zoz$R<2iw?2qpWt2XdQD#EHpZ+~&djqKyXd78+
zcBQ$t4phEJ<$GNs>Bs8%>b+^$>Z$1U{lk_)u-g#pgCRMR%P2rt1b&*SO9gqjNk#z%
z6yv;4MsNywUF4CB?{&|MK5M76InCJ;lRt8ldMUpyt%Xy4s9B;FXHJ%2dX>bQZx$Q0
zhI>YQgYlB&;z<>dPac_W1%5`>$wRnmvU{&dNE4DM;trFSn_ZnzkQ(u9@5#do{$_w1
zc0Zqu=GS7mC#$2|1z(aKMHo+$fg5>aTOVL5>NW9~i*S#^7(?Pp1-b0m?kL|1ls?aV
zeCl#};@rXJSHZ@zEA%u)K|k5YOY`)v%|=DA(vCxOq>@Ia$>#Tai;6aQAk~ZkG^_%@
z+L~)5>bn>XsUTGJM5WM*MZ{hDsFVIGq|a(E=kqNyvl*a@1cGmZ1_2@ZNESRfb(D-!
zU?k4KFv%D-)q&*$X){8^02H5;r74v%;bqleN&pyfA7x$Ug`(BYi(;{h%Gc(b3LWSU
z;Lkgh_@L<T)JM|HoXWFVteFfjLs)8NCtF&jZ^((r^7R=3#0mJ?&Da;Y@46#`8yYQt
zNdTWB+6c))As^nUz0e{OG1LZrcyYJNs<`Yj8n03^jkZ~ce^@;dEE+-~q?vS}MHI%K
zUeIU@PmCQ-5L|yUU?+v_JHc55*C?!;^-!m2J>Qy1v-|?LQn(A|ELC~@jgj-Kohyjw
zRanP#*3Q=(N^?N5)eE!Eq{w`lg(5yH(Sp+SL~#~j^;I4_;#KPg^%et+5Z0b`pA~7c
zumF?GTJU^HPN?6NDLaGL&1CH(<UYVvP(Z)>)!IpjCF5xmmIufgsfyw3t)&GsIPo#S
z=Ve_dV-Ae}4DGA{Hk;Wk2^6@IY!Dv~v<*4QxR$jf;K2wc*li3CIDWlFwK?!yLFYST
z%)H2rF^>oon8?{~ijT*SJHt-tmx0+tK7+PXf<5FONUZ>7;zFXBL(f~j+BdZ*?Q}(V
zW*t}AGS$i(lqVn2>24fmFtP1QpcLD<f5S6iry0=tLY?#`30j1n5l}Hets4Qz&l}}B
zWP(aj&kpg8-Ql^lakMon36tFN5pHB>(Be1nDOUJo$T!x8e6~3~3LURbC)(j!(WwL*
zc=2?+WSqW;QaRmlU{xbmBlVbXG?|ka_$1qCmPZy-w%zzzr4zy3vH3K#L+uT%Jq-k$
z=SY44PjWpN&zN8?>LG`L6~)TllCk2;*dntXppH?6Ddz>AnPopS3!(3g?g1A?yEE+<
zK@_|2-||)#VB(OGd~lEnnHE~SLLu7(#5*y*=8*JjU_8QZv{~mdwZAqx>Ky=u=nBj}
z#4<hnwdZ6wHsV;zl-6)5?q0&P*sHFHIxd>mCb&SKi*~yrsM6)bF=@8pfXDPfJM>Rz
zcZq2QZt)y`E*Z|=twP}%{(yLLL}^9hi~#9+qz0nmR&CxdF#R~h=@L>s{pchySSDo=
z1#Jv(UH8EiGxnN-xZHO#4Lv)iOt3<3cmf3%n5Nu%tz@IUi3%{^P$vf76M4K%X*|c3
zyCuI>DA4&NNZ3e~pF^L1Um&nzA*H*aZuzN}XY80g#QpiWPl#Nv8$lmkrShg(jS~l2
z$X*c{XHCpStMh5_HUl?)A(7K{a`+<->YV0JsWP?Fc{gUxQ7@0_VfoOdpvK@wE3N|j
zl}gTT@#Ioe2+=8ZmeaD1rO6g0GiD&4Rtpw8#49{}%BWAgkT=7kL4`ma{ed-vTcNXu
z>U)U#(C3KEG)+#aN36Ubi6>90=*zX!9!GjaNcx)Xn;nLHDLg=DG|W&pEA(~jZsaY&
zt!~&1$AruN7u6DLjk<;n6kw7Q*l6dRZPxKh4mA8y2tage${|C%$!tasX~Bg!Wet4C
zy`dauRa^Gk<hb?n;0eG=OX$6yEX$K(emzB$pg;l`yhzy_WkPH9b#VOPV#)e@VndC%
z4S?B7=(SeENZ@AWb0UT08fb`o-@_@v6X_Zvm!&P`N#8!VVYycvknd^fM7FY7hcT1S
zAg&4x98!o2kFGFcS}j$K8<M#v5pMjhYVoKWbfpwaVykTHG*58b7#`?QQXFt1^8==L
zVdr2^<D4L%ejCiRgy9lgTGuE`I%`Rt19S6YJ`-g2<+G&LdxD8)=$7`Qg!{tEnJhzj
zAi}F2ePBJT;24Z@HVpF>M`yRol-x{P#?~E{6v*t^`6X(ZHlTvk?uV?$V`(3<TfM9_
zsZjA!f5M_N@E!MhT@6Mb=+hKuh6%xt1Zm+Twh+$FklfzS_g_k^)m$XKsp7}nRhCCL
zu|C32bMeIQtlic1+{5mvm>T<(QKaIg;?v8-NpZ|6zTKD=9de<{;)}i&ww&a#E<;|X
zBBFL3$`(B2n=y75b(AqCSp2E*7gCm5R=NJ!%A*bzmm{Y_$ZHB_<v$4U0f<u7qolAj
z#xx+@2w1-0i@U=^O%cE5D$>3R0)erH4wj<Ee7uk>hjlS22(r;;B;D=O6x@6VP-t3^
z_TyhTh<kxkw5PeST0d*rYhg6St9AaMxAVIw3Ke$|wwT&|0ozw6U2H}Jqvn<GTUjz1
zlO`sza#|APHe#u~9A}`7eWZx|cAMokK=yf4Y}XkiW=p0;A9XvHHPGHnROwGO7-6tN
z8s|l-Ds^m>`9%$4BR%yRHV^B9ElN7wf0q0?71VVAm8x3@y0BI8I&+;xC{RR$$(R#)
zPvjao6kPN@zs4QXYBh^ZKQR;lQ5l=mWEOvjTR--2H6t{`mYR7Qw|yNdn$G1TX@NGZ
z@L-#cDEhq5=LlP|Wf8sRW6Lwh0qg-*nW++YgHhx2D_c=L@Y&Z+`n{VYK2Li@=A3xR
zZMg0|&9XHM^UQQkx<mA!Kcj;emjlITun*27&QNnkGA=Fo$UTFj)1%cdKcxw%-X7y-
zdLL_A#)K7J3HPl^HMcvledix6swK9NBzrDpC)!9UAMPj4Eofng5Q?l$y3P?OWHNg?
zHB7H8C8fu^6|erl%ms=n+3q+`)!XQ$Z~E+aZi@GSBUGS#s{9dmm`a_LaQf0t0!4E7
zD;(mlk4#e^o~u7eJRA)h0J$Y`>``&bzu{W;%C3w%A#`?lSL*e8ns=&TG*h)(#Q^fl
zaSLpsz@7~08V)~DL44gT@P-cCGw1T-7HMP6PCaBTjCQED0AFd@PSs%EFj1IAmJ+-H
zb=_A{hZmws_~6jyQ-&Rv<s-Q%tlfL`!5^!4SsiUS*xgkVT>tM`03SskAG4eJyL;zb
zP|*Hp@D;KmS*Bi@g)b!R-Qn3_2aT4TKV?3g4lw1SC04tYAkNB4x%}&16YmkX+Zp2?
z--8`;X&-w9sFEDfr#XMdoNiB%rP0UK+ve!aT*p69R7+w(T3Sf#vAYapd4Y)N9Io~K
zC|fNQIm&r3i*Hne2dI-|j^GT0sL6~02B|*Zz_hcrgFuw&nGIP?;zohUsG|3mQtfw1
zM{=bO71R$$<wkeqXt9s9@5IiYFwcsjdWb;QbK}}^rwK_zH0mA0UvQLgf2@qr&OEA9
zCk+GI5=l@65ysVCcVG87*%)!ZEJ>MtMKPK2{=o4a$?eNt*(1`c78vUoU-)ElXr@VK
zBXa(&R0dW*RO(jTeLoc-N?1?2!=!x5w3Vfvpt3m-lPOVwjC&|?EyuL!B*5J7Mploz
z!(`Kbn3;AfAh8{`25ha#kAuU&hpdX)+JfECl|PW+JqSUp)~i8Hz>|7UlKHUCvZ03_
zh3*IWmP9h1$BMw)meS3nhT>iNt#+7j=A!?J|IgdZ<UIp<+FtQYl9l1$4CM8vVfR^$
z0t?9NO^1J3LL?hlgD;os<W<tNgU!=ET5DnOPUKt(DKAl+dAG=cdICD3pIrqYA)*OC
z!1=E^7NtlHtCBKiM_^jZ&klgh{jTfv{1=KUX(IW7W|};rQAvyPAq^^^y+~)2-hKb6
z3QRs?o?(xzb@di8dxnmAby8w@A1%$=wA;0!*d)uCepZAiQ@xod>|6vlA2E0Ov+}Tr
zf!E3Z#5Vnm*q9|Hc6>M=-Xc(<>isq4oRJOwm7IVY3q0v!Z==f0Qj<c!Ue7&9oJE|K
z!zp7-IP4D42IU01br(A@g5k*wd%i)%p4CFD$R)b>zOhq#w9AyUAyV=N(%1uXUp&37
zp-8ZIbf|X9>Jl0=1#KhpD>$5s+`wy)A=Cle`qQlCzLb3DZ(b4@+Y||Zn987%<n4`;
zZ@pGE6K!<mVM}L|qMwzTj~^eIGLQWmy@$t%I{%1s$Hfr6QlA_;m0)kZ5eTvi;RxVI
zmtVy+fAirGGU(d;9cNhV@M&p)50aUCm6=LF2C@oMl>wVtVk`b6@-5pfXGYn$!9qLF
z;L<o?*x^?wBVw#^SR(7sd&=yLbW#DWIz7s`QrJ>~m8(a)pjKh@-Cj|nw$r}k1ZS>Y
z9*&2tIpp(1m>E9fYk6f)5Gj`>eS#*$X%IX;P{&MSPRcnc2Hqw1H_B-l%Iw}BuG|9o
zMy6$bj`NibYt%QZL{pT*@+GfLN6*X2BDi@0G1ekIy`pc$W9~0FX(OQjPTN);iae;6
z$tkWYcB&eh-)D&goBna8+<wRRrums!YQFe&dlmLhWU}(5ndR5Dcd$YJ;b(=AeKWGD
zX08?R2O=njLzF^|x5-#VuO{~PVu<6Ope?pk6JAX*Jnl48>qO|L?g6$}X-m%6b=D0z
z%p#lVHT4}koW_98d_zY`lD-AG)c;CaTPU!m7XNFZjW?uCOqX5;uc~Ftoy_o1y+QWI
z-q}E21qu*K0YB6ZQ=K@sfvS?DC)GAK?`DzK&-WOZ(-|V2PGe_561x(-sL&<HNgKn|
zI$!w^WnXELvHI)5Q-%p$0<8{9X=xX5bgHL16#>WZQIWbZDvYwrv&30;>5yS;4iE9i
zn_Ty&GoniS<k@?l<)v1|;2X8g7aFkqZa${*zW(C%RKipM-vhVm#kg7@4so?p2o`$u
z^dTIC$q-fp(~LTKO7CV4FeAc`3+ZE=HyL&ZyMXu%?aHg`X4&^b(o4!`6C^v9(q!o}
zi+VKQco7|ML*Zwu$kfNHlYVu_Rd`(!xIPb$5HX_;I8-?IP!yD;y#VK|iOA~j)hxfl
zw&Zhofr50|MZ;isn*0&Sf@?|17i^X17BI8f7?mgt!6EpHF^76hl@zA&+Pon*i3KlC
zMZ?n(lpbMoK2_nr?XpDQ_PQIR+s_4xbsm=m$PJwzq!^+ak`O2es7VVYH_1qymIu+B
zC*$*)qNNV$DL0r-YrqfxTb;poEpE?0<w4_RO3H{1@XTyft#tyraM+i?K08pHMVbob
zZ(_SM0`X9<<mmdB=9Jh|yBlR~1AHh_qS$2LX)k=mAyJbdy}WI!d(;UNS)kW2NYyo;
zn`_$t+5CsY-)F?>0*(Oy=2F*B3?4BA4P{04RUG>KneW7y54Q;=8zeMkhLZcFZM$gP
zMX|uIfM5GGNF=jehxEgEbRXl@LA|DXoC)shldeRB467Q<kj8B1A?aSOF*hLN3}Kk~
zR6*#f?GS6>(t;^NTI)2ik=5KSca<j(Nc7k^C}h|lW+%xRGHPOkt++ZbxdqC)lmhv`
zq71G_Cy$z!XcH8}wWm8Pf>ElYL7O=OABgG^Qdp?ymWa0$)P+En82bR13M}EZ4%c!F
zQmy+n1f&ZU+2SLmCHw?%Mji8NhAGhQ7P3j?OoV8alnZ0tWpar8WmOFogHjfA&tbR&
z7+G;v@7{5G=}!ObUD@U`orlP*rz$>Pc8q9QIx%maQ>d4OZ)2JzX$xNJNufvqE!@Fx
zi8smmk5elFCC9&<-ru#I4>&x7sEG+f;iVFffF|-AprwVS_m&}ZsVBK-#`n9`M$mZr
zsogtJ+);h&>5@M4<0mq7@Uy(5nNFvBBUp=sJGfuAIi{nVkyjb}b2)+4)J3f6Oj!$V
z$rij)&&FwcP%=0JpA_^P;pPeI(ZjBu`V6ye=;%)ZdMcWC1dtl$1(YrtC-tp7c;=y(
z1yZ9l(WLP#+uW0T?(gDUKyq_0#SA8bhMxE|!FZ^3my5|+ITGOM+}*pk<m3EAhJcU~
z4)|8f8Gzubb9<N`?8!PX$FxS~v^xyhiBx$v8}5Nzg4g++A--WF)Lvl1bTKTe?^4^$
z^T<t4SiamNZ8a`chffv@2KgSr`(*SVbD|~VrvcAp{)U9Cq(aI+s#R@xf+!Qm&G$Dr
z6DQx8lv9U*P6j-K!8Cmlw7stgPClwhVADi`*S)nFtpbshZxi`YePfl|Lp1_(@o9C2
zyd_X+0R`<5c1b<5s<Q8Dc4ujL<7wto_^jH?<Od(H;3X2f(0?HPVz#+53R5@Rn0xYI
zT12dI{xRB0?oko|cE$dJsTCbl`}tlpF+lVo)sZbu)}`i)kjN^KA2@0QhvM8Qw$U~$
zxdvJBZJRa40^uy<bu;?49mIzu^_zIRGhcS5*5i$Ty?V4*!r{hiau<TH%0i+!_59s~
zP3p>8F~2XK_%U8Bg32nKgdB+cP4Sv&<)@Ty0wZQNZQP~CD?7`brvd%3FP$+p#$M2j
z1}UuUxvI3&bG%KVi*<3x({SmTE`(g<-DYj4iEYpP#dgY47&Xx;m<rUk43Q`%pnM7+
zkoV5-E3Hf>06-qa537@-JBb5U70@S#V(}!<{jtyVUrffrWTcIp>6XBg_z3}IXMa{<
zAHY8Py{mW%*N=HA`s_h}R^6+w7vm^M*M2Y7=T20X9+_x2?EK58Wnd1~2r~=~HsuAb
zY{=$?A4RUaAUQm_P9sfoswwOguR;DNyqJ&w>HzbU2F#EUt!CcnyYV}5u!NM@g8wzf
z=N+RxL*FL$mhBd_qPU2C)Gp-YqtC`B8u`cH<@PTd5I@H0*{C+Rd1>^r3DEbQ)V?zu
zdPHOEBL%teTNKySuBI-Oym@zs{#<Td*cqqn?ofQJDf>|`%z|PuG+&Vw38uk=O?t-i
zEdsYY9>wb!8JNn~9~qSbz1Uh_J;3zJ)aKgHdJ|*YF)H8R+dLr|sOpu9r9Fi7ZD%{Y
zRzaQgHc@Kqn|)SgFxwVR!}A$<>D2THna?t^(Y@^mUL9EnSAGD!@5+gt<7xUC3GOi;
zTJMLQPY)sp$uaz^SkH}1{Lyh+XBqp&qxv=qCLQ>bB`E~7bC(lzNcf9wclUTP^wcmR
zdtf#eHxdcAR~}Cb_DtgNE3*byHuGB@9Mk762JJhfxCR(SQ0P0?*R4UupNE5LGyQ0h
zF$(3?V*z2|@;C-E^M{Xf!kzr!Vddep1K!Pr+s2l~N9PYN6MC=JK4@PyPVmYh;(hW%
zqNhecl^6f}l913}fyu)19-npviK&9S<EP>c6Wxh7orh|NQmQhR4eqg~_F?2zJf=@T
z@lA_y&A<f_T_2vBo6A|x)&cG@^1J8Va(9Z|BqL?0X>^s?REaWb(KS&?7#>>AiX9yT
z2e__gXH*fBDUW*zre3&gMwP(+6|EbYW~Us3oNBrosmD;2cc$9bspgKzo2Ai~le)Ox
z{&e^}DAI=?eelceQn$QEsz<n3{jVpPo|b2`(lYi?5`H7wCvD=n(Un#XgSL}dfUh~=
zoJ}1g!*mJ*?hI2$^HC>T4h*Hme&EuYGR?{>_<M@0A^8xu%qfqYk7SLMp?ns$SmdRn
zBzzEFVMkkVon{&RheZB;siT8LnYsZ{2vvA<u`W?3+C|{;=E6v<M@jTGt}t~^zqCZ0
zD96W5*$E!AbOrxJnIf5-y}IdMB=r~C2}VjYSn&ayX5Qm#13S}?5n(5bk+LNCg6lGq
zuc(ec&&xf3Ux#g8<?>4J#WO=(ZkpFKS_yah^SC2uxEJ-7b88rP<(Szq(k@-fY2Kjs
zxA<0ZM^Ur0gEx)Ym@d1q$MkW$RVFFF-ZDMy3)MY|U1G$CuE8=7RQXI4#Kz?xfhcxv
zM~wYc+3`70=|t%}XA&c#1=@FbAqwORLB@_78<xX8c0))3jQ>%C|Gcy+YH^&;B4~vO
z7{+iXSTFS6B$XcjQ9Z-X11GSStDc$`eu{3=U7cEofJIW;5`)$bbUf^%2>-j4^<3`Y
z$$lS3-vg3+ksA+Z13v3jrmaT~+K_gnewmREw2bdJ-YLg|K`=Aj4E%W<m3MJR>^>Im
zhVlI4iQe2LlbVYe7ip>J?H*)v3BOd-pAuk004<Pi-2Snq?B0-V)hl3@l<EGV15+XH
zHx>T(v;RY}lVV%FH4hfE?W9|x`X-%|Kh?3^IG+_;ev|+I{MRl2)^O3Q@w|T;$^YRF
z|G3%zfB9c(^#39f|7lz|v#$@U;-yIjyv4eRcse1d#VO76pqY0h2RR0*q6mi;^W;x~
zuI(C}e_$0omkA>4P_0i<Vyhrx5q&#apmMP_o{l9FzkJ|N9m}W+QGDDfuDTX73SNyD
zyh?9*5Eq6SS6{MyClCG1ZSW`ZC9oi}a<=|lU<c2DW`t+N-lbx%_=cB64n>OLrW4+)
z<+9%*J<K=}QIS42VD4PKN-#RC6PxEGL;6ciKsCnoBQZ3C(wlGehOW#>--)XBYYPiu
zq(yh$Gbd&taF70>#cl4(NMNVfr+8?56l$WH$^$#A*#hJ|m1P#)-KB(_$Z^po^}m$t
z?E>nY=u1kndtg0qCGhf*Zz~}e7tloghQO+y-+waK^HCJ0A?k9jciitDJQT)<T1C@p
zI&hf7qdE`E|Ky&}*sAknw?+Cgc}B_U$6#FKVO;8mVfqONE5<3$Z&eYuB#Q?tz>l}I
zplf$$zNlkx3rKq1LJIge3GEue=yU$zfIL{E_;26vAGL1<qZYf(4km*|g<cwwQxJb6
z_(q7!)e7)2B*GPBa%**rw=CL<+?xK(=x>E;lM)~-tB=0*V`LOBQ&F=T3-w4vvN_&C
z<?NWyha{9ZE}Yxd>K@k(*7|bN>OA%7{e+<KaBBYh?!4(^qpg>xnotp@<Ca*gsHu!`
z%0$FSqbu>0-{b^i_xJ3a_ZgvRa48Z%m4B;CTZY4EkwKZsBx@~;Bz)a9keFj3y(^nr
zN`Wgf#pmI077=DQjHB$Wch)B#*UE>KktV5(<S14yjUqd9BIj3b9%$KQxk@<yXnCSf
zdaQzV79hVH)4IEQ(~Y}mdp(}?9&6YKarh2nX4EoTNoiFBK1IdJC#O0)R8`~Uu+AMH
zofzM5BFpqdMCo8S9^yX^KCqt5L833y!bv(`8=%ZOTL@Di3|MU}SDi5&k;}Hu`D*Yv
z>VtHuT(|6970NpvxnGZmrIa-*=e6sPD*h97?|na&zp(Pu`Xa+47Vab2F6GEA?Kco&
z<mqMfMN|ouCMG^usS?P+J}77BGc#0)&58fEagFCAx1p#YeB<Pp*RFYN{Q&E42FiAi
z6gJijoH5@%3>W>>+D6-&+f%&9@g)h0Maw*-7H>d;%T_ByYH|1T26tpQ=TOc<E(M8^
zp>-G;OmMt$w2h0;n--`ii{w8XQMDf0%7<{@f`q-e0fUDUR?+K{+y1Mgl0ZE41`6dN
z`{A|>98umCi?_2n*XD%o5i$Jh19ZPXjW5!b*FAKEg4R%dUKH+#{i6W`C#k%pid40&
z?$)QJxQj_vyxeg%-$_AEPJQ3>O61{&{rG7QsrIU%hY3U4KElpH^yO#*Ct{?Wa*d@D
z$Uo2BvCF|~8}zu5gVLyVR1}t|=N9c@Gye0+l{t>5NVdgVugB{*Y-tT(JZh{uI2Nt?
zlr&xaw55^0R6!h(0xxMoz~UfH>}W7?f|lbXQSR<Aqy)SxNH5>LHeB~~^X!^?BxB?T
zcX0~%Lg?lDir2BV)>V2P!(Yk==|{ik49iT)tVY_QfI&UdN0%t8Xr>^W8nU(30;Q0q
zbfX)c%l<<cezE&o=kH<A$Oe&{cUn58F0^!?o(kyLkRM|*B5#z+8j;vojLZlUVn-w;
z&GON<2DMaXKZXNHaouPEoy{Ck^-#PYB(Bh?twmv4Mt1CvhNKP$%I~U9e~cWAh|&!u
z_Gk0bMgQOu1_>|ee&@K0cQWOelt}Duju4vwIz&4e9^in99rR1XhoV)Z;pbOH)*M}+
zJZ9RZhV|CR({@GO%DK=HdzdklmoaQ-Z*BB7YZRpsHON-E&p0s1*lF*S8o`eR=O}qh
z&pFRtlqHgd#tcOyxahrMvZyCd3`r+5m?OJ6GXM!}3SD4WTTe~`x(|#;yyk~%IemnL
z_};L4$exv3rg6*K5;}mcUUl%c@%@}2{;RY>E!s&maR=<8_fOPD|JVx3<L=nJ0tpz9
zLMg(m7C429l)7(X{gBM%mX5Rrb1{BA3rMz>_MW<2fn+iuXh-_5#fH@@<oG1C!~poT
zqMqzm=Iqi1Y=qtAVeIGcq0j|Db0Vp6)-C(Xs2!T_L|M&Bz8)6aXy3|by60?N6`n;1
zeM$WR@riwC-$KyPGpSh79l^xiyE0cW{vl-JgHM51%{}9;<7{x&UD%h<OFV;GkSHsM
zRnzOFH6Fi@FE@^vSgMugL$`Yt<s;e{yI_8v)q56K_(g3!Ezs9s`~$Sm*5JA*gKnI=
z*9CT7e)v@_c!I9NQMSWjErl)4or`_)$>zR<M%=wMUaww<1E<+;_ebmPeT@k;y*{?D
z2gDT%>$Q^4^L%E)xf7aStdIxE)je1l)jfbdyDX$Yy8zC-A3I;pbrHw(fNUe#ngJQH
zzN@k7x;(O#*iaSZSz)=oM=fb6lwiI{B5m^ZtO(f}s4gYR<)-d~v#GO<v$L~!mGO9}
z3y|aWFy;+H;%bhg!5zcyS+aFO8X5Qy)B8r%iBM15M6LRp;K2>4|GwA4aI}Iqxv-(3
zAbq{iiePMw58u#23dWpT9g`@>3Y?iep6@kjqVv_j#@lgkN1p}k^5P$8;Lnc~t;m>B
z`Q19ME>L8$qr05qY9ttrpX4}#FTG9uXmM^1C~uWAKYGC0=@9XdxC#?nk(8W0w5o;8
zR(EmTnUzQaw2{iE7>_>cFk$~#G{`otadrJfgv$jsF>?&z!6<9rlbhD?Y7yDZ12gCt
z%qYX^&q9~T@8Rc;3;|5N8CcAY)uvxrxR5m;Mgl4@c<<5)KNC^q%qXN+k(sY;cKwXU
zZDg0|3uIM=#=Gp~#87?BjnNb3Be7(?H&$>TBHIxUrn=QFMZ-GpP3E{)@uYC}-fP&y
zN4Tt{8v+Tof3mLfux+c|TTLRzo7gvsQ;*i3Ao7_i317XsD10BxeogrL17Gn6(Gqo;
zFVKfW0t3=`>xk3ZGbrDTJiOllOz9d@$a<xndXk8F;`>EqW6gbl`jH#+T7YV=NG%gm
z{F<=J0dk;&hMAz!d^ykz#X5G<9K|Mw>^qLE*N$&CWvMM5?K$p!$sjT?qN}WhI)LpB
zc!BM?up+uHv9mTy$Tvv(q>Mh9fUI)Js}g<IT?=o>Id_PL5tO_^BvH290s2ozyo0pi
z=@>HnrTZt@at-_ge?~+MVx*@4O0nh_QgT)knHc^Uh^t5j*K&W|N(;*khh4x+x2Mz)
zIzI)nUSHWN-zu{zjVkN2gF+-1R0z)99QXT*>NuSMs<XPNdER8jWQ{o939*DsHn13{
zhq*M&H*gDKq=;^ZQ{v5x@1teXw={~y$hQc>0b^8Au2DZf9uV#mdNXFL&M1*ZhoxZn
zdhzC!F&tuN7s$*?S2-nykr5-%U$iH9=pMdz3~9A77H@E$ZJw&v%@Vtjgnb<pz@Uuv
zDr36`iSGJRjVwOT_LQRvN29wzs!Y_DZtASR&=!@OF+?$-y5+c)_B-waL<26;4mqPK
ziNWMx(tWCXzLmM<Df@+qz?iCCm#67T3|>9ArxI_bwG(0#J>V8QR3I}!QGG7f_53UI
zd7=|GWmT{b6{W*HZ^9m~HwO!wj@XNo@n~g)9S`@206ol;2Y#ig?{x_aK<3I12`<(t
z;{Q*3-`Un=y0xvqpbX8>J4#jQy-5(Ih_pfJz4wlw2mt}5C;_EIFleHn(t8h8A_57a
z2!tjH5FjAEh4#kTvz@)4=lKEehsoz0N3yPUu6Ca5zORpq;lYYfrNKt4AymET&{tqq
zv5r~sQItuTEJHqLP%2iu$){Up(e{blDPvMoRT=U<sZlecxk+hfxtUm5H1n;j41iU6
zaWo<GF&ijt9s~{nohjfBPKCJKZtP0~-#nz3&60`PA~Fu;v4Ox&<On6%dgGk`tNV%v
zC|_55go2g_pf*(I#+|@_Hr6+<RG64^<Ud!us`l8@JWiJGUcDAR){E*w-~9Q*vnyQg
zlkUh-z<DW(aH<D16{CI#5f4wPm_ZZGG5YGW=ZP&fhq@SPHC46WX+^GI++e6?cfD|)
zKSivAk7TP9oA;T&-=4M95-mTzc=qJdQ`qazqDpm=tKXI1YUDE)q^|5O2whr*mBKBv
zg4tbvbC}nVw7%jkOM#H?+<y;<fr}8av+`q_V@L!M$a^U;pPgNs(I-~<4*qeAc&rNY
zI1?isVH<X7qkDa#OLlaK)+U7r!y;DP`SHJ>>YwZ|2DJ_%Ghre}u1qS%qRTF$=#;oB
zhf)y+tFxkaA$B*^V2DDRY@ykns62_3;gxzlyDF1{n_gdneVCXxMjE$EufZ*zuMelM
z!7+xI`#LP)277Y_dkoxNIxGQGp@H|Bd9uGYYO1M7HMRWZsG3@$kE{89wM(#gV;M&=
zLu$nqSn<d5q?D1@X^3-LZ`a|*6V0r;bh&>!iR*I6mA}*Ipj=ET{_OiwD**jALFahA
zTQ52L#Uqtv1xzKSx@eyr%*%I}1*`G+_ReNv0d17C{ED!avt=Y-u@R~yZ;cXu#{K%z
ziOo}ftQy$w<*nnYFL8n^s^Jx=Fq2^d&amoC(`1w{2dt#SpJdOoIdJGakDTQacIpqt
zuWBgA4sT*;Vd}7<!j<5u%C#Yjyo!V9eZKsKcZ9Kb{U7wx+hc_)u9lKqy5DF%d1EZ5
z$+;pR-PNxAwc-4whRYg$kvj4yMg4e);MZf|V{Oy=!<_sp?OOv&KX$)z*R4ZphKxn@
z_=a94bd=A}DF@x>_HZzw-s+5rY4L`*!Vd(Mk4Y;i0Fer|#Le~aSf`<Jpsc#;^WjZ3
zXdpygUlE%28|Xm)!Ajb@x6x;k5qI0l(4+=#GZoQgG?d=Hc~6)1J*w_cq@)5d<we@X
z6|y}HU!qbJPR=liEg(bGjzZaBX0We{1!py%9`53#?9mrmT9geRooFR%+95k&EPPk9
zv=_;qc4zR>>qH_rp~|Gk@^16hF;_Q=h55qQwKZO%9v=!$r<)hcy|#Vygw9+bVMedM
zEi`EDE=)O=EN{&W7Oj^dq24ylEjo%xc-bH5{gc(45krSh>2e8fkO(#;Nx~#Os5Q8a
zkl$L879;!a?XKAJ=qRG6!tkD+1J99i_9p||?7s<;C1|@HZtGp5$9>>!dzO<26-L+u
zxB}dy&OQvG(j)_;Lk|#i+jxA_!+b~fsyBGk_9-)~y3ah>*Sf|=P44@O_P%hjoWGVX
zm-<DuKuHg-Zo%+v;?%#}V@400y>V6J?ueBV&ZqlQMJ9Ym7mAK%-aK@!{lIbf5x(>O
z-B`Am@Z2-N!sc1)QXiSft5DnuukCHxd3M$abx4=>SxNU|vb;pamusHUn@o)GH=!|-
zIcLiTrk$7Nka=`0tg7bigErEk=39AdJbGRn@}f=q9`zN{OwW&8gZKRkg1=_~hkj@a
zZjC<NthU}sccecl<34{mR`KA(6hwHrrcQn6wZHEZKJ#jgiK<DSh)>bJhO%MExJ~yo
zq?R7{Bx+bE<@1Wkbi-j*U9gLFJ@jqWNo)x|A<~dX%!<7ns1e=ExUVE-)VNkqb~4wR
zFSv79<;inYmI~qr!5sX++|nT3KnHT!BIh$;&Q}tL#nNq+x`SvY8j0i!5|)9enP%<N
z$65QLF<Ysn>mPjOsDq*W>UZ#(+npVxiqCTsF%CV$B9PQxPQH?Q&rdB^)?U8D%m{Mt
z8Rm`bHfWuUhQRwGZWkb$`65>Q)>gQ37f#4GiFT2pV9+8>9SF2%-@HP;(m^DKz_K&+
z6%||#UnOPktav-PSQh*rNXTkW3DPY|B#a8eYGKZih0#IX9^<sIP?*T1Qj(HZ<d5ix
zn)@}kOnv)GBE@kN``u~E&Uz5ZlrLwfL_ZN(Y3$C@5^Sj)b05|O=z^_=Vf0}dT4#KI
z9IWtyfahNl%AGc!uX@+;rrmAcOWfFO>`?U5p7dz-fA6<Lc)EmLS~dxvd$d+!Q955F
z1y4pZ)Zkk&ygr<aUlj{1mIxrY-vz9Ap9pb?U3~Fz6f3n~uEK7wJdOB8id18<DE6RA
zri<JjND<1oxvQYi#B_d<*N-`-&YbQ*(^_tsm!*C&uE3d?*ffJ=XIL_;nn!<^{ZPpW
zsPJ_G8KZb|bOzb55A<%wapX+w6PBXrk05CrZyIRsrMQJMy*a>J^03v`jtL1ODP=C9
zMjPG^uiL;<lLLFed5HA=M!~Hh=FjXWICnLaa{2EMXD)uU89Uw3=ncJj8eGz(x(C`x
z3NvzEAk9zBU-{Ha%E@02v3<+9Fn%gS^rPG`$nT8tS-FCFM=kob%}1+a%kf}P6RJe_
z)mIkQAI-4k3lw5p9*Nj`c``bW^C5WS5_k0?*_A5b>SrcQixsPnZDH4j^SLv)9|kN9
zKOA#G*Z7s%3oh{VSX2%f^_(0c3V731iDXKccptDY&=cDVZvu(Y<d<hmf2c|idh(zq
zO1QOTPbGf0xANe}3efyfL&krl2VUIPr=_aICnwVQ5D|2B0B2f^GFMPmY5b~a`1>U}
z0J?4ec|50IlPoUxthY;67&Nf-rK1W8=3v{6RByV0y~gDp(8sX#LrJ_VXQ!F|E5DkE
z!-O4nuSH1ON%r|~Zg)@D#yOl*4OBsowqKqI$?0{<k$fh|FKHv;EW?8SbToJg+AOru
zMrD3HI~r~y7+BHyrcPE+?Nc!IMe1>rCiK+{6Ugron>jb=q-`rWY!Nk&N~ihV+l-Sq
zo>nACScxyS&FT~VH9Y4Et<2ljszdA6Rmb-s;xwkUGSuS*^s;4X3y<S={i?)I4+_5~
z#qC&$LtouhSwH0hd1_Inpn;^@_Fw7b_~?{>kl&(0sAzeYUX(AIuM|qeI*dj`h;N84
z#wVb?kJz><gt_@%qCH2e21&<MU2#FxIC#D85|UNqf=pSAmt}rlumR!+E4|#F3{CXj
zWlFb-!Z$L@`H*I9cI|8({Wv>0&v|NI`8r|{`I>H$|L@sEx8ag<MRt$cDtKhT>y`x|
z<};|sd{+xH>hqM*#?0o<ezkWq2eF~Bk)79i`qi$XAn;=F`l0*I_J`741+q(nuDsUw
zQ-K)&nv=!RRQ9h>vx1KwKRCU3DY8rmO6I-0;v=(#)`Bp`Tzi>9IOYHz-(wd&UrWAR
zc_2|;l;qiUn%3=+0EcaH!robAcVA`GCYUn&5L=#p-X^8{EQ10N*I0R2>;^Tp%bzpa
zcY3JwBhq=-!S-zQ5kXm1cBGY+W0;*~c7N8Ro%LWt#|6%v)B2CqBhBtFNVy$ug-!lq
zLT{;Ft{fFkxVyhry5lwc3{wEpTP60k7;8zN0n=IXO=kM;Nzuu~s!WMTb?Ep`US!+(
zjUgZrYfZDqB-AMyz%?c2EI?6bp_NZrU9on5**)$#4RuJr62I>ld$F)nrY0`TifQ#q
zplv<B@EjUIz?ET?BmR*6&W_OV`Z8797JpqwBowsz(HBUf8asg_wIDjt$<iq<Drk1m
zc}Buf<isr=i$@%vA8iy@TeZ0K^NZM_68Dnp5+zes>K-zo68$+>mBsXXp0o<^b+71r
z&$&>IUr}S*P}~>s9id!E<AI;;$w-3Bf_7&5s+;rKZN)|-x|Y)L`}R=ruE>3vz2pFs
zkQ9@#FSi;CMs1?3nHm&T>{C;IuuO9}8G{=Lj_Wp6o+tI?G9|_-w9B^M^w{KI43(pb
zaJ$D>f-X|KNEg4l+uQCDEh00cEJ=>0**m0iL8u-id554)&_-&9)E0Gjn7QQJrkz-5
zaf5ruc7Mp>H!>LV%Xs+r!9Z?>&w<5e#}<kyiHVf?*3&YU>q8d&1PhCPt|tGXtvgSc
zqYz*vMGIcjyC+25HV$7Ui9N5o23TrL=IuSh^cMg3gQJ<db;aaN*<rN$L!XV7v|ccz
zD#EHvU2}voQDHD+<0)X*PUlLOTTr$*G3*K(YD!oTy{TQ0n{(xnhW}8K<2NoqNkc9|
z_R}OG?T<2e8=Ay`Y1}o4TMMnaAs$P+rmXUboLpB<k$x7bX6#5PiL^S*j4Mc}HvDk4
z?(R-@;`aI;AfJ=WI$uLxE8ccDgAV5((T3U{-n#gMbB)!`7T)Mompd3~Ec}|dmh4%d
zh;I-2$c3mV^l|QmKHJX@DWN60*yias>r8Vm;<CwcXHlM6)&p&<tN$2T`+C@R*bvx!
z$7Slsu^R%K8s_Al=wSeJFg7xT4+k=lUzeN!b_&^5`^0EAOM7U`HF{y6OceghQ!xwA
z+Fgb*W^l%uRq0JOX9b&@{@DyL!42mS8Rl1|EAnc*^OXsE!I5VGH+~`<ugIy^zulA)
z>Bp;4tWBuju-ID7X)#QfBZm|Ic;&ejt*N^e-^G3c_xv-_bh$Qu--^ha6EgUPm5*&L
zn8}@x`}jNyUf<2yK`<f<k0wcEQZsj7>(T0Y2NOR(?oh?#5(}?5nZ{)tp51m}Olh#X
z^akgC1NG&$^ecS0r77v<s0~;to#s}^aW7h{d(mY2N7_><En(>a3zarIbjDiQiGrz&
z>_>9yb0gQc04ETT<Y~(HRk<fwYCVM^sqVRQ7!M;>nwA)B`;iY~we#LubhT1FTik#9
zVR^^5YxR1q(`$$vznR5oM~T;l$5T9@Oz^@NWe>)bCcf;&*q1>mVNN9$>+~;}2^$_6
z>nn%q%jyZomb-{*q4!5^v~y94B^F3G<R<_gH3tYgOJsa)Fq6+>_?VGg_ZorRW~o61
zJjJ`>^C2Gt|AR!z@}OuL*G*o`&$^V@YzvsKc02V}BwzCo*%IAdjxcpt>0K<XTXS4n
zwYk6Ou`u@0-4a6b?kHCcw#BtP^qC`%6@(H<iM|6U-@#7CXQkPz@5*@!&TT)scXUmR
z;12~09x$uPTCa0{U4@)<W7hW`Y&uYnJo7)Ui>7JEn3EimceldYy{u;IS91b;yQ#x&
zn;y&P$<R(pWX=TrAw#*WZ>@-X-K$&Sv*u~?z}&PxTXYVCQHSlqw&DBQ9y-$FGLQXh
zbe>94*d}Oom5^UGy5e<roiv`U+C-+2>_s~GSUR?|mh>?gj1v5rt6hp1?eh8WLpKqZ
zA6s^FYxBf88=!2N>};8+`(&-EAJ(;*Par9`P#AVWzOUJ$#1PxOEc}Ovt{R|zqY2`O
zE8_07odkR5iLa$YR6Z}e&ZLf-Hbl!f<&;OpKT0}pL|O9lBFWd3w~I92P?N9!tXTh-
z&{1)N`TNAKC`OU_W&<!>$>&uxcz!|(D~dbJq2YzTpgCn`Y*ryp;iMbo_QN&R$eOLG
z85h>p#?$Z{4(!wA+VsNc#@kzA%f;wzpF7)zS?#V=?lLO3X%DTy7qnWYAKlqW`@l8T
zx7ac%b<bt4a+jqxq=WgLD^sl1@iYH@L&{v`%MAncN!-RVmV@kcZ1a#8GTinev5L?a
zaFq=8M}zSiu7`-Frys!TS92Ls8)Eo*6bw;Q89VpkMY{91(Ivj+8M~tb2)nM^ROxn9
zgYi~lr}3l188f?;QlSqfiYW09Ou8HUx@e~~m?X5~2vw?Okyk(wCfy3b?oWft*7-om
z;Jg$0NrM$>mF=yshy)K_9Z`#+tnAUaVUwO7V9$Fm&Xgu417dXs_d;`dWzop=Xz-OO
zw9f8=`ov*;;?dwRHSV<Q>E~>?Ug@7%U%#(Wez{V*?H9#n$wqTYzxlSj_#YZ2=xqAQ
zR}XMtbRv{v(<+_tol%RR#jbTj3d%RF=3Nyb{3_Bqf}-0U$g|n@IP66{XwOpc2+D8A
zd2-piN)ogi6s1r*Ax~pq_~gx?_$vNm4b_<inxt9nsa1*!@PKG?+o)lkH?5B}j)cTI
zoGCUON?8R<EteloKu6O$(vpN$iTZ5yrzhs$et$t<jlh&f6xukoOK8Yd*?$|$MGcI+
z=Qs|xi3J8B6n;xdpYr6_yFNyPT+;(+mNluX(NJ|9ERLFC$^~LasF2bt3q`IuELc^N
zj{MiAJ*7`&Bm{Y)+MIPOU;y7wxiH2Wb0-CJs}_jNwAw%1j-y^R^ZJ6aYV+w<%FSEN
z5So!{vT-9o<94OYhq4kz5y=1Rq`&P~2c0Ni;WU5IKtRaj&U!b&HH<n7(1Ou#-^Mt;
zi6Y-G*m{F5^<Vu(n`AjrMBkl#v?#PEpTwv9v=^W!y4V@@ElERN75AHn2who9;&gJA
zO;x&biXvx>h0u{UCR)oziA9)Cm(F8*X(JJRt*b=L!LGoe3+vNFRVX4|+?(rA^qI=E
z@lvDmESt;~S$)Arrze_}p6ry`Hj^@(^<1un8bzbOZw+KHv0V8OS%f_;9B7so4$J5<
zT;vzTk`k7xO-4P}9}4(eR$Q(CvL9Lfeiruq35idG-Q#`+$o4&5fQzm51YiB#mjsJA
zgTTppbH`C%e%{Js{hF)RQdmix1bxl@?#+363e<_ox7s$7P=z%jY5czK7znEf%sseO
zkkTZJ+LwK6o)xk?O=yrEs~f029>aepdG0@lw^pIL&=%LdM$JS#9Gg9agHlTh_)kjG
z>TZyy2}#4gfT>m|>$VJTLUU@RhvXWsdDixWto>@Iwd>1~w-?(4peh^@jWdsaC<F6)
zNL+uVkYOIizQ0zF$c(WY_*}tnG~XN)-bPKF!)5A4c<eMLDo^VQI{%T5eRi#4;oY<T
zwuATcqEEX-0Gp#@+Np_QDNt6BlB)D$tDVoD8JjBaXdUdJ4sa|HNJh7#dAgi@j@6*7
zQ>FDc&KqON?p^_R7u-4%Z<5#~xG1|GYVRzd0hIljCevknLYEp^96%m^5!1fWvd(n~
z#t5hUnV|1EPj(Wxt{?8d!<dT%o~n4~?HUF990dk<?(G36Wi;VI)RVlhMrcvwaBRrx
zKIQ^UZ*83AU_DV0U+f)tO&r}zVvTz})!!z4Nl945A8R>wiXE{+50llMCgX)j$KD`L
zj~g6;GhjK2$5gVFl1LaC=)$G12ON5u@57FCAM(jNp~cb99Vc9iL|+BDt#=ru(BoGd
zE>vsoeWS(3XNI`-e(NMn5*p<Fj3<A}Yty^qa(*_*Ovu8CukGYG8sdFJqlBfDCrAT#
zySL^S-1GKwERFsi6U3+<8QF;nFetLy2tG@a+X~;=G=W%ZRw;0y(Z?g?%iZ?#l%KZ`
zS8q2Zj!({Eddc+$4lmGUOtj~(t(1%m4UMYxDi0TuW{t6;F3Vw!4%0dpLvv0<9F=%+
zq*g|9scfxV*_8}i-IrOKt|RoZH`bs3;4WoyDz~mWfV9otxl1$1zo!&&rsRY#f${=s
z^5+kZjknrluvfX<Qoo?abfgLHyYWkfAN)my=Y}q_iD?&U;Uh)+aQK5jbML($zW58x
zHajVweY(!36q+xN|KMe<>k46<t+o$Sm&aL_vVmmtMu1rq#X3Bx>SVG0QwJYpCr9Gi
zZhds~-hRD8ve7a*5^S<TP0oXnJt+wc-x4Ga!a7r7iiAsJK*GhIN^xd|7tD(rMd%<m
zhJlrLOYFF3#5{a^eOK6X4zYO7fVPc_{!psfX~E>mI{l-Cc24}4TQ!Fq*>~F-_sCo>
zOPhG}-ZXOjzF&X^vAU_C)=y(hIQ<QWly=L*P3%te-yy-zs(<%a6X_P$o_b+=+$Ci?
zJn$=X{hUfg_e^1E_}m-E!xq)jf`!tlx#f**I0#PAA?oZWDuNT?fZ3y{k)Ecl(b2=v
z!hywC?f3M*4|u6`P{!U6r@6|*tbX;=1>HjxX~|#;Mk@Eiy5IZy;37v(OfP1&4LP}n
ze!Q+3;uDd&xV354lnMXjR{)k1-1-B_56jl=T75NPvNhyRU9vJytMsZ!m#hpWwx4fX
z%Ja<01g53Rj&wZL2MIj8yx=3SA|eHH{i9@GOs^hmZgm8r^4L04RF!W6nbw!4Z_Fe@
zzAu=hJIuH~W8NJ`p|v5pW;Sx%=)^Lxrx?tCi=WldJCazmGuM~SRyN;ul~{AMxRjgu
zAZ3!?WO;b38Wl7p_t?^qv3GA)o2dJ<Ga9*%0w_yznFmR<_PNZ47TBlN1sgb_g2z7i
zs|nBPo2bk-7Jf-pOvy{uqb}3fYF|nG;GJLGY|30UljY}tzdK1P9&n$6rRMh~7ll;T
z-p}zlEyY9uqH;_4+|JTo&riyzUWrv9d{K_mSTN$tmN_~LeVI*c0~_L+4{XH5Y6z=o
z(j8UnK}m%fA#sXfpr7PSwu^CQmk{0o;^WG;FGmV7=vF{#5$4DOLs(d(0FKM%p=IH_
zkbngq&7U^>S@!J;^t;A$B!AT*A!+sVP_x6r&+jqb!_8L5XKdkxermlTI$KN+y{o`!
zUkD2?;AVNUH3ow5+K@V*+_QJ{f?+b?Qs}Lb-Oa3LqJB1z<5{C=4S%b%24B0wpQCxJ
zA4f7V`$)LLk)}ml!sXoc^QIIBCCu^-0?sc_4Yh8<R$T_2PO3cnKJ?x_=au$`E^067
zLvu~SmXZ!Z%K=>Wd|X#`&GE3z^D|j>d&x^D`JV2SW+hsbWUuwIkxK94JDHAKT&Lwp
z@m9-%ZkG->DU0=CasmjIb_9;mpIwKjjrSdMy?oI$b}HkuhB%NJB;2G*@-f94b3>U%
z!Q2vuo0@!C8O`#lf~raarK?DOWC6*_<8uB8%J}>WY~2`I6DWM9PYvAIoxZDDoS?hV
z(D{1rp2M=f)lfp`E0?pko<y>cek4Y?=x1wM?%TarHZe9@xn5=b(4aYrHHf|wvS2KT
zLJq?nwBB7o9T5c)tX-A2xykT8bJbNjQ#NPUEz&+H5+Cy!6>*wGHpuFoyr1@Pe}>qo
zXbjNH@7gNKc+b0@#KDc^)U;oTcI%#3VtnK0yX^{_>Xne4^?12|!Kw@^a3+L!(OgR%
zns3J>Vu?aFn4-YIA7qi78ee8Y7RkMczmPG`!<>0UhhMP;A2E%wT1O{W1A8hEDnIgo
zp`nuC4!r-bHv4kSH5#5*!8cM7558`(MBI9sN~n<OOdL>G!iAjG3U0MtKib$g7Thv@
zxLM^fo#-^@0u=H10Ac@xc6|TL5L>5sD0QdRqvk`iVU{E)G$;zQs7Qz18Mwh@SwR)1
z*N5N-T#`tqqjL@g^owqQ?(aPJ93Y4G8#+y_c9$%@9)X*IpiMVUb!%nK5fV1Vp)Tj`
z!Ywo`3;;roUm3MAJeQ`(p7)Vcx&3{4RxlQR5Ocqkp`ml%1!D)Rt22~^@|s*&AVy8Z
z19neUagz#$N`*pFQOvj#t6+n8UXSBT%8McgUo?SRa+i#pCq+WM#Lj;(A$(PCg9zQ;
zy>U6m#`mZsaO(Fsr(=DauS`(sN2aE=<jX4-RF_u9A5Iq?QFDx47;~Jnvg^zYS){}~
z#)!hJqRBhAyy>Y*>Uqb(hHt?z=k%(z$F{jOQCN^W6>Zr92;RuWI2d^7x6_e@Dl<S3
z1+YPn`ef6M-sCH8pyx5nf~eeSxRMS}!Y9KONR^LvO+jT46Qn%oV?H<N&vh2dFonI0
zI$1vP>nNo7q`jblc#B5o!`c1np?R08&RrdlU-|2MbzUmH^K=W%d|ew8O2{&lF!s|g
zrr<oy17pX}4aff71Tmbom~NME_3C(=6G22zP-Gjvv`2rWXRV<sjN?35@f?|_14^bo
z+KV!)xi#YY&=98!Gger=aouajt2rKIs3$7?3e7f*{dnyQfy((Z-gl_3w)bqt^zP9<
zk45;46Ec~AJBIc8anvm3SqTN&P{65MCKp$uiI9)DC_qO#rJ&@ZA>2$L`TXV>Mg<V?
zh1s`NhRcl~g&LsdEaS|qbmCW{jtxK3zM$Q#{qVUowHgIbR12w1#W~5rEV`cp<>qDb
z<CIC#oRv)VTvvky$32SpwhlNE@pcIew|a1Mr{-dokJflO%$U66Hk%?+Nt=GCiQxkv
z<7v`E;m$i}B}*;<CFTRVY7KX!6Gjbw8;;bTQvTi!-oaheSWi)&*-xI~cS^5|ILYW>
z42ycOjjLTgJr@6L&pXFFDcvM5@JF<JjeB>HP+-{Y?zr<_X88C(f@gE$mTmMqjKlE;
zeQMds=LOuhH<tM3h0CmNLBnZX3Y1x00V@#e)|ln^II{cj>1&Qm;!5G?YXi~a_4+%w
zrSFOOr<;qroB)Cn^5b|ywJgnK88TCkSTtRb9Myk!)kB{xgmiS65oOo?bp4uy`m6Op
z&6#wYtX%m?{Qd8648la=D!NMse+B8xrpS-K*XjV8=KfG5HEWlwUQ>?taRsh>?EH3Q
z-Q7xwM<`n0`5Lk9dXNa$F&7TA{ejBj5U(e_<h_@Fuj$`JQXTLZw=An;+O!x`4#aio
zmUJ=HfS?R@c$WZha$DscR|Kwl%?B<Bzkfj=YM1L8?pfng6a7H5=JrmkXPalv$rUv|
zrp<8Nhl~8(l!<osfHf9?sG4$<i&nFTUpMo^R0PXO2vu_}tQCeQ*C`NJ(D@X>-Dt6<
zs{;04JT{yIg@F;F-9b%ct?SP;qEL$=D>Lj==69slF98Paz0U7E0@utL0oGPJH{#mN
z7Qe41T<g238wlrmA}%>+K_Mkv`a&4ktSb5JHjQ#9264C*6m}gKbM6%V`prE)#fED^
zt5N*CWKrPA23I@TR@^f!gp2}`y1VK8^7?8=w30&7d}@q|k8PK$OZ6cEJy{Xj7qko-
z)MczStocgNKUFeaUWgtNYJwnP>*Z(Bg>FMgj&DlH_j9&+dyoFJ^BtgqX7xDm0mMEM
z$Mn>$<36{G2>Ks9Pka*Hy82#EO#r9D9ARf_|J^oUdY-W|K3lu(`3f+=2z&NIF^Rk&
zcP~Eay<*rs*^@@Y9zZxW8&t0~T8B&5gM1>V+q%Qqyt_G6&d!O-R<*plp<Asg*O5ZS
z^_7w!R%D1u8&>veXd_vE$zrC!$GY~>#OFTPj{2Ah;Ze%jF7)gOy4FPpOqY{NZQ5hc
zmaBKGG?q*7rW6{Cy;-my4+Pc!jNyNS;-`ia<Cfp9x;zWT$O$6AL8%`~rxiGJli#e~
z7eFk?`chnOww{ddJzQEE^i1%)(xL@*KL(d(vmhk8Tu0z!4e(IfK2Zo!>+@a~(P+ww
zbT3|dxr794)>vqV5{E3Nl_tsmIPW-@{Fs0^l8P>E;K%n6INp$EhWdNy3%#`i7RTWD
z`rqtXLf51uJ^cHe3)~q-sqeh`Ee7~=HOwEQQ2lGt`>VZarx0jIC&^)|&{TfbO5ul!
zBW|^_cBD^@VfQI$Q_ne|hsicAU(5&?2;`l1yT#rjQQ^=%$$hLfWdZ*g#{GU)tHb?o
zvFHA-kAF8#6*n*PWQ5rQUf<A+$jLd>sqw0LY)Yo`J{SGTTD1Z;=oe=OQ$kzxgEMBg
z<|m~fsS=e!l*}p~x@Wv6*GTqHL)QY5#wJef*pB3FVBaKT!l+1p?qt}l_Y431JO6S3
z?X$1E5=Q6F(b?%~X_~f&dVSF?^z3K6w<ua0{Fg=K=T7#?#}<lyz?8b-5&N4R|K=pz
zN5`;7qlBPj9Io~xi4$SgxgN9?0r|OJbc_ma6yNj8x>0IF;dgN-{AWW6LFE4LM|K!u
zwudr8M14`9^n!w05f-Jk*$D5l>7T0Jp!)eZ*J-)e-T18DV@TqeT9@IoUN#Z$hTKRI
z+&~OkhoI{w8y^`>Zo7Z*o0a-Ro6S~RJCN-a%lal3Q}>s>erNrn|I6OE_Q@f-)1s~O
z5|C=IEjv~_u2OchQhf)rwlaP4^s@BqoWprn=kHP&n7#MlvN8p?aMv|Jx|Bha^r5VQ
z+t%WAZ^+n(-ln}VQ)Fb3#0yYfK>Rn+7`=ZPx-SHhFaJB?{Y#(hhqk{Sdb+N4ymN*D
z$h;A;5lbou`zIcl<(9&mLYIqwO#0D?D^ncH1^X?F1pbj1ovO)h^d&9V!%vaw;O-(b
zd>ZBlQ_~XDH5fluB$+?(-bq?&^2{fD*%q{28BK;#WMDM|jltbFf94O-wW34e|BNp3
z5jgHqebZx}Vd3Qf(Bj!$6HV-yH`j6`U;_fqt^wc~rp@*9d1Qzgo*I%jTdsVc?~hes
zQFP@Lu5c!Owoq%XDmw`sEo(fmBA+ljUsmS7?E3Gh&=-se9rm=how<SImN%%e1)My1
zGhF4n-5FdpU8e7FKn{uj>5E^=(7&g~f4=a~k^F;_U*y(*2iQN*_y-#QK;xe(@=vk+
qpAFVOdGh~i$o_w+7g=T$XZ$(mh!tt<B?jlnA3bd&t+HE=vHuU2Psq9e

literal 0
HcmV?d00001

diff --git a/docs/img/structured-streaming-watermark-update-mode.png b/docs/img/structured-streaming-watermark-update-mode.png
new file mode 100644
index 0000000000000000000000000000000000000000..6827849c3269d42d7cf8151d48318b02cfb6a075
GIT binary patch
literal 299141
zcmeFZXE>bw*EXsXBGE!Zl!y{-+!4KtNOaLViQWkY6P+M>C(0l~L>Xc*n9&nGI-{59
zz4y8=N$%&l|L=aE``DlMr{{yokz`!+E32IAT<4n5m&z}Q38@J&FffSaWS^;FVBi^I
zVBnY&-~s>gBpXv5_yf~L?Zs1!;_mw^z+Y}S$?CdbU{H}=|6p2X_5;5ni6Qq)O2Y$l
zeG)&N-2Aj*hqZkZ&dWhyB29{G9rjAwZsg8_BpZ8%<<Y+Afii{a_o<$i2WV+rii46b
z9&P!rQ0k;PZ0sh@Ps?6bE?$Y2iM-}dTW1+%O5636$b8MBJ_VWHGIO=esM+Fp`V<2b
zheYy!U;g(D{?`uv*A4!E2?cSiz2XqvSaK`^20x7d;Uy*$E6u8+9&NpMUL;=e{NLhJ
z+(V}d3-ZC0mU*)mpnrL!pXB9@@?Q3m!S(=9{aSghGSk2A)?W}ec<qwX=}n&goM^@*
z>Ax6<e+`vYF;=ImGd76d_|t%vaO}TF6BCLmgQGn>gNX$ZQ{d76_U7wH7hp-O%;V|R
z?17PW_ESWd|Kp)u|HpR*r_jwg;_`Rg)8x{0>jD4M0bjp&zCw~5cA*As!seRlSp%Q(
zLtxJq{&k~oaX^C`Qy2>63;3(;AzlA^+EauS)?M`jY<gMEsJdrI;~>Y}FM|KWb^qGZ
z%aDd@WEYK2zT-)w@UF?f+eb21kltDexPZHNcG9Teta3Q?-M?*>;U(8LE7TL4y%Z!7
z!0>OY@RLlG^zW`4WJ61ayVvuG6-#UKpNIe3v*#-t{({vB#E@<6*6*f8k3Iai3u45S
zIX1}i08ad(^WfO9;n}~L_O9-$FIrPHl5(ym`CiX+Hvi?kW2UiuV3(}I%+yvpblM+J
zT)Q2WZKD5W*(dByVOkS3g4WBs11E%Xwf}PO48FKtdIu-Me7wE0Vs&bJYf1GvgZKX>
zU+k*R+}5*pY+%yu>{)oo0)Ebbi$!^t`N99~U}6TaYH&JNI@1Ia%eHpzL*M3PwNTcq
z<PWs6wWV>Aw$b<tOnJ=NX7*G{Lm(%&yw46V_#N4mW&UHm{`W~e_PPW%XhZKiw>_v)
z`4MX?&qpH3z=y@ZEqKG<j!IC9BPPr*yn2M5Y<Lvw1||+Q)6BW5k(QRl<RRd#49!@F
zlI!9WdB+-Nz@ZIcO}KmAA(1V?I5K!!D@60hUcQ4~lAIN<hf1~(miNU?=Ec|ksA%_r
z{Dh)9EmuxquDhSl4!+$Mu5PV58=EjJ?p<Jn%7Ymn_(@`6h0wiSzIEe%%Y*;p3hyyW
z5+&cPCZWLob)5b;$-aWxIICu%ZarDMsuVcRx_*mG)n4HIzt`~3km>hOQh}<D;DqT_
zW778b@EIpk@rN;3m>ZWrExQm0rw415!iyhFlk#RL%tHUu5@McWR-WnU)#Q+*6p>0Y
zu;SbuU}gn9Onk4}fFvSPnZW+Z==B^pJ!QYLFJB(@dw)oM632lB;gpm(f_UxZzx{N>
zX_}8(jI~=GbF*5sc3Xqy_wqSOvT_wD4Vc0{@cPSg84@6wB9kLX-h;C0-`UW5bLjsc
zp^nGUj}nyP6ZDAw_X)31c^Ip#(DpPZ9m)GX7JP^WVUyJMfj~y{dH>%o8)J-B?%H0m
z2HmQN9E7PFa2c{Fejv%OP^?3#`Cf;1e+t5z_julAnG<@qQ~#H>GVo)zxQ2Fx)?UA(
z%1H2AkYp0kGr|wsj2xZZ?9DH({qYMIu+a)7oj}O~+^Q`#F2zfNL5i7-!j(qWKRKiA
zKBgYhth*wMDWA&9Nr9^JeIWjAH>#B1z8^#8A4<;pnaP+(=D*#WffV>)xo454fVt7*
z5+LX2G<&9nS-F<u>finLCI%WD4~FjG!e{qfl>E*>!^Mvd+FgMIR;+)TaQpWGpWn&y
zEIqG1JkfDMQgyZ#DntLn53b*o)rjS~HvH--53oN`QikO?R;pARVc@KOhX3-v7%D7L
zlBH(z&vExy8oZNVQ@xv=uK^6b+t`Ht{Vy|-L=*Pb>Y$B;2{Zg8bCN(fbmV^XB>&8o
zZQlMfYyDE^myNUB;!-(9pKv<$ZD4`396Ar{vU_q(E)>*sh5Fn-c%__C50YTwETf~h
zt{+X3h9Q+al0L#AfWuhfy0h?n=nL-eo29|^V3@LMB*R)Eo2gSs=3SBtk=#}q)y3xe
z^qopnE_HJ$`SWCWu9ZHiFOgX7WfIJJ!$R%!!U*nT<gR-)mDre4cNO13{K+e%ZPqoR
zr@K)s8D!lZ_{Z#b2uWN+wIeRdbQ#oogOWg_UNUSV|1>EkO@`y(bcVX78~xcTAMKnC
zVvkd;E?-0Ojs-~Brj+tP&2U}c*lA<BN48VTR9>I#zHGfri5tX<y@y3$7U2h8vkZE$
zobovNT>yihWH8}&laMs`&1U&f$&vT~ms@(soOh{;zix_J-Op!}Mm^JIsSxAHu+HdY
zqQHF{Hv1$e>)6`*Np`i}m@zg;Mz2veT6O5))++zFCHZ9KK_b_8Zf;>nj(?CRh`22s
z>yI^)kpb6BLW6IfkHKXK-)~a2`{yOe!u$rBa-F}K_S-xwr6}*y2^SX+Qc1j{xn}1O
zzbw94<76AlFR*+#qr+M<NSQoADw#!!Qxrr5GoZ%IP-pa$MBlhJ14ACd2@K2t5OKrY
z%j?JcuanNa7W6E>!`wxSxrv&nPCC(v|78thxjJk0;S>(m{_$-$(JFoe$lSMOsP#?b
zuR*ef^#ZYXDn;*jmDxpWMC~+*QJs*LQvTTowXt6kDAJesk<=*;qnHrzsi)^c|B?UA
zvL&T<z}<I2K^q0wpu=T~*UkYP$f?CCb9ZBX?GddUNG9VBRdmPq0yP#K5?KR3hYPaG
zIawT%Hf~_)C*az>S_BX+jvO%#u04GBnIHbFXmzi~lTSERTRf}IFFy$4q-%ldI^uJh
z`(!GR<V0IWoabe@`gVqomep$QAl4bmQ*RQOGYqSjX8TNx|D3aI9tL2P>S-=~2tRSs
zL3tcH+t{yv-_EaKxBEP+K|(g{=rFwGE3)HoWZsQn2K3@>LjNQ?xOT(FMPU=ZU=Go$
z5z!G?mE;B>X&Wvk@}M)QP?i>m6SP=3hW#IZ2=T};s3}k#Su?74$O`>>E{WAwjN9Ld
zS(vbKW+`Xt!qXzKd1M;2y#jbw)b6i>ey}AAw9V=suFS=QA6NjNGLRtRUO76kJF(rj
z!?^ZTD*3_fZu*72vl5ta)#~&s^93+@_Xd9l3Y2tFwNzum`3J;EvfLnXZO<u`(pBRk
zVOxHxkgQP{FY!;$F)Nn*YDdL3qStBJcqffBzy$a(XRt=D8ht+JjL9BB8uS9X+2Tsy
zIk3wDq{40jok}0AdjT0jE^_$<SOht;epw1(uiE(^;!d#QJN_)lq^l`}TR|?889P}*
zi&8&JFz7R>T`CN@sJ?QEwSdYX1fQA}N--*ln5pJIV!vLAq%;sa9KJBLJ|w9JeCpid
ze(VFR#@(b@+XBqERlhKi%Gv5pWrk+0<+=DWgNwW7Tzcu?%}+^|oAbI;O;PePvl$9O
z>=4G^=tPnS2S{7ev7&Gp26sJVkum3PB%<GVCmT=u5dHZ~OhzxAVYbqe+PgR3fJ5!H
zwymR5rVb^YqbU(fTa2FR>>hQ7J!1Xm3FRm?1TYH=Nm1y#2C}~HRECI-Kjku+08>Vk
zF7!H%KPBq^cn$<AT`m9$6xDfA&|%{AKa%WajvPrZ=)-k>xC}Q5Ys-Gk*mXp1?O7|{
z!>g)u^dN6+q(1bPL3Nt4;WK_AWwV1qM*0J?6<#Ckzo^q6{6bpu<$#3<c|-CIf(f0b
z&6k5d>O3Z(C`#(!OOtywaC%hnDuP`5O{Rq1CZ3+1YBJQ@)u>ue!gfkc%eU^vIuJlH
zs6+n14M{!VP;!D&;?3Usote#WMTtEyQ#oC^5!*~<Ju_dz7=+$}=gJNhjHw+os5AAe
z=_4s@d(A&~yAL%ej1LsJze)<l;?OFJ(SqrjoAf;*$0Ef&?tX6en)&xRjXs#+J~!y_
z9Z|sI62Mce;WBKpRc)Pn5oy?&yxJPTT_crzW66ysm&3l`Ni)(In(~zt6DJNNS?MFk
z$}A1|j4CD`X-^-KLIoqCnY#j&?a$rO6-Ird;vSIwzQ1fCxwxj52CS@FDf_I-g{Z&0
zS}Ks=7wyo_%dPgDP-8VK=LIb|p~$e%@Ovyto20+@(WLfgWhgcK=vMs|sP(=}M2m;1
zH#WI&LJ$Sv2zKe&%k-v!7(Ofl4<LAm-sQANmUQ|?!FFOMJ*2~RdgWsIgYxVOsdPxN
z&<!5duC;U7f|q5AWokH9LfZ3aa^Otnx9KKaMi2NtmH&o=^YK9Z-d(X#*Y?CBP#Gol
zFt{?;Og4eNup&z4Dx|k)D$LG=CS4q2Xw-JmxlMo(1+R!=xh~Zbhcn7tBa&|oiST>z
zd!M?d@G0ySDRb$S#}pf)gw@y3u*snxYos1K%~a-@-iNV^U1`H*mX{8Q<g<&@gZQah
z&(5~khkI1nu{Hn{W}&?AI>3@<$B5dhL)DzF7AkKy8}|H-^Wo33YMkQ|yj&B@1@(xE
zN(d%EO-09>W$%ton$RMRiL12f43Vh7i^#0bMlxcgmqxxXuVQ(vECE9d4FE`zX@+C<
z0AF3jh0g`KtkMcf`w-Z9DO8o#Z4NZH@TFAkM@CKd+tCwOl^6Onlpd?TYlj^79q=E4
zP-kPvCWIzLH$zTQ{j*{$m63;(yn+JMdS|wo=k31L%3yxH>-vc5)Y~;!4s;{Uu`aef
zo|ToWUa@02N@O!8oV!jiK5TFX&g5LXD*QPo=lhNAu9PQvjph#n=LtsK4-dy3Rg^8}
z<0vpa85TbZvL%=WNd5pSfXC{krg%5@eXTYT(#f1&`$zDu>6nTE`DRO)W(S``wo6(O
zZn%iMewoJ-qQo0f{lwbVGp)oMra>=t+2j6UxujJszG_KrPQLgL=<jv9PNM=$A?u`H
z^Mbb%PLBL4oOO4yW%ue%F-&jszEPyK+i|-+1JXB2%3FYbFCRAYY_AwLT2nllaEZ2x
z_tmX-&NYB6Bs#B`wsyQH9aCeyVg0iyh{xyrm}x#iSGQ)bTLeC8k=71Et;gx&+P<<T
zt2P4Dsv8(^QYJtyPnKwOcy6gA`Bm5u;b84H;?py6DC|o8hN%9$1iUvW({t*diYz0y
z=){UlCX2EXE{dd*RxRJ_WQa_7OHW3wl-`jDE%G!@Ye|Zdm%bwifVUh~AlLT4@5%?#
zRUt0VRgjC;p4#zE@?KUyJ+?i4B&V+)3UgeY-iNS|XbTpMvfCqwAHa<%uB>(+-Gz%L
zD;40zh262tfPAjGx;*2t_;DB5pf<Hvm;vq9JYw7-zW!VW<=a`B&~{L;^`YtMkx$#>
zBF^F{Z+GjBI3cPS{MjGXlnG3}fGh4U=`E=#v1SY`h});}Z0GeK<{q6py@(Gprpu4n
zH7*Y$&ER5$?IDUQ7s50d3+Z|-qjw+FNt?Z4hXMDLuQ2uZJ&|1BQ{UW(9eHn?Gz_?>
zh7Mob<X5x@)i3Bx(v8!qU2Jz|LE{Fj3Y!y2lQD5!_Hg^niCqJ}u0wf+G5gBN`F;g%
zuwJc4YaoHdiDUf*3-E*B)P*y7sV}>DbI(hQtUl#DfgD96?%Rsf@Y}^yyDSNVj}L~8
zKbdRWUZnEbri02(4>$B})7*n9?xza5eD=D!IGP%g(^Y2L0<WQiinYuA2j{Ad9AwCZ
zTEx&IS0_uE=7;N}r$Z%m5~4Jnot+D{M^o|K_2)dZZau}_CXC`gAvwEc-qS@i^(A$u
z?8O6|I$tFzx%4P@?w(EQXQ1J2O6Y86H8|Q1Q8J1BH#ycu=PzgJ4$<o1c<WPs7^6Fv
zOnjxOHOZxR8K@=s?eWj}0qBQfcNBTq#wOC){wLf<G4z<|YFR|%Gf+q~%V7BH;4Y)N
zwb%}&G5o!#+RmxEPke&D`10VwpDe>S)oFX2ZNXoYTeNW9Hw_VBVa>5X)q{zoK;dc^
zeZ=*dme&k?SWn)b_Bn%{FG*bGztN)k^sR-bbjKL!x-uJq>oVhyFTd0cT>#viV56|!
zA|5z?El|}AmSyi|5|@6Mi1%(g>&FX|E^b%JgdmSS2;e(rt+eh5;UJ`}19F8W@BBLY
zd!1TebfdY{&ki@h6KdgML&}H!D3wfc_)><aFzg1WpsAs7SEhtSc^NVLbiMCY?~*K0
zQY*QNsH&+tCj^Cm-}iiHcg(h=y+2oBp=vJ5rDd%e2%*i$B$54IY4elah0S8+Bj-|s
zl3q+C$r>NRIf6!@+w)7=dQwlF06wj2%*fI5gi8<D4wjpk8x}^$dOZA58vmZSSJir1
zkGS)8BYr2`yfb~{=hgQ)av!^#o+lPRVz??N;4QEAH+|5IKkBe=OPqg26||ghf4fa}
zmH~;!)h#+doJ4IR*S;TGk0a~{-$ehgI^AE{{TV_pJZwMm^|kYubsnTFVXN+}yE<ds
zz7iTi%_qO^c^yYOJGtvQ`}FReT=<k;SazjqKI9hXjRFuw<XpGzo<2~Y)|dDqhJBiF
zCzBJn4mKn<hRF{HUPSrqc0RN<-*BZ(&eh1IpIEx(o#b?>0m~SNm%$WUo%+0eAyzOq
zHaGZI7QA)fCjc{UnRomDH$5RaiFFvPl}TcwXgTaH+XW+MQ6v~kK=VE%x#{Dq|JU(~
zVdG3TN}#*86Va7YQ0Z#}WLMPWlS)xbN+uY)@8w=*ApcPEey;EKl;;+#bbR{qurjOe
znAj#fkb3PP$m1n8Hy?U5<ZKhg{e}D77Ur(~JuTDI;8--n%T+Y#RXmGQz?cm~LL_4O
zhWP37r$&H%dRPt`Ya$kyAh|E>YW2NKxK8e?A<C^0;B>yU90HMx<qP*#;Mi)jZizo$
z-p%^+U4_VtiQCko1P<_a7Abp~*<!+$)JjjkYGdbi=0>r@k%wQnHv`A?zK`Z`fdi-s
z2yOhnP4e-UY`qO~+vgl~k4T&YV)Ooe)e5UM@XMntyu5&`v+1j+<9rP>I@X2BGoh>*
zcLy1aU+S+2j>FbNXx((SReVp?t$f`d7||b9As5Zf&828jdCyNKF8Hj-1}PslVq)V}
z6+U%AIS6McZMV|<-h`bU&qW6U*%Gpyr0;OU<}bGLsnxnJ{&5_>S7dripbxQlSpn2v
zZulIIT5hiPPSkxntc9QTbgZ(VBsMO&*x3o|_WKn!SE*e%D&69>O=;=LsGP1c)%UKR
zdHrrGGQ9Ek$IiTQ^R3lSzf)&|IyMn}OG>eOUWHEf+rG7Ee9PN}cuPhUH@oSL__}HZ
zmy62m5z!lGNY^ji#)yZ0l5Rx3Fk?4k33-5#TOkLCrPCrI^*N#umGUO_qzUYyQpO`~
z3jiPtzke9inpl$wJ3Xx4iK|%rx|v?Hg4TCT1h#45HfGJbWz#45PA6HJzrjnCP4}cZ
zz6S^vk659uaRX|tT!j@n$8+>ok4`y6-8BcR!FiNMzV4Q`LN3jBwA*wop$o;%i`e+L
zC~R~80?@`%S;e#ENtF8_<*ie!Ve1<MMY?4##Y-1yg(azY-!lOK<&B<3m7x-!<CK9z
z&Cc_C5<XsvhG58^!XX^|#+-}i5HQP^=-RgjWP5NlH}ynZCkUmO%5T4F=iceS<yUkk
zLV$U8PE5StSxU<PK4d;tIsD`H#?|Ffy$tpvWh&>N=B00S*SKw)XIY3{g~6p;Ar-uD
z;)f>n21f*?V)gV4!9$waQXHe^$%YfsPhE)hgr*CDTh=Wqa)58;RXkfAQAM#t#Mxyc
zk$4f#cMH`S8nE9GzHBG+e+#70RZJ6?=L8gollBR26vB|@D%^N{aY5Swt04G}j+eK*
zN+KM-sm62mTvr7A@m$yeT7<0VPx0||f|hk7R>Tn{TYu%ByznXi5nrC+36hI3`<sE0
zqZ&C5W?9jVe}Hw5m4t09=Hy7Sk#bCCgt~bjPbHhv6Z^5ls}_;U&Gz(PbqLc}-7)Ss
zu!PwpA4I(zRIwg(S$DR{acb+ewRr$y<*4`A9W@E(O5ZVE<uXX1<*YKBln!F0sc%L`
z!;{xy<8V!`5&W2I%RS4scop$`r|X7a4=8iqsMkPHt%e>ygOmjvH!a<PXQD*rARef@
zE|s}1J^(}-CQz(eLKKg>O@g)i(04ZrdTT6uGEVEV6wB|(Ss7-Ff4sS>^L0n%vr?hZ
z=QzYmGMeOOWMo<KVNu~@N4Udel+fabUiP)mTYEhcRQ3=Gy(Po14C#iBM8QF<M4~c;
zRXSNMNPL>dCeMx^bX%fW%PW+=+avk|K!sW>7|8!hyMq1?<`!u;$)`1y8@=i<gDz)B
z16#VcG_|F;VrJLWA%R;B)2P$a1m2^A8+hp}42Ev#6$CH7{wh27IDo_|t|7OcvGWTI
z{|VCTtXplWq(p6e%_11jQ59y8@vdoz#pL2Du4^9;x#-`@O!w!sr+0_5Ytzp~;Lm~9
zYP|Ag%eoKJMNiT;a^Y?v8kQ~m?~%2~6=-fsC#z+JbF&3)ZE=2<U}9MZsp#z_QLq-`
z?Wfu=QG@*ppQ6Q`^*TKCAA9VTnR32-HP@pfd@>*3993HCQX_}@NXAhgxib6NW6v!*
zoWHVEoPW}F0MEF@>O{cR8>m@6S^P|z6e(7Kd)Kf^03jIqSko@+t$;3x3`)pFobJtc
zQWKa_4ba7NOHW(uhOR~EnsR}&na8IOiBtg3yluSTg%GB9YQQv+`u;4=o1V*0*2W8y
z?mf*F^~->B@|n8MdfN;oKDy(ECWQEoM6N0P-4v!3h6U>d%^s}r>ccck+UjglqGxyF
ztEfMDDTU>0>2!=D$Grt9h7+vV0MN&fsQFBgoR9+~p{=JRltR;eg}ht~<N!Ahr_n4K
zF#c#et&A)XPMQayT5$@GDj+M#jQX@I772)xVPjvzPvfjd-zg&?rz?3-$?&Ms2a`y!
zi_eZD_YU|Oj(g>YrS=YPw3qAo%{=oasQVNfIx&&hqS&-a5IeC|&5HI)W}+BbbLI;m
zo89TFOKrRDJ*##-`jK!4+9Me~gx!j(%i*B9icnxPs#S-Bis&Xpf9rc%Wtz63OQAI5
zaAC1%d<9oriNMx2Jg=!$%(zesmcG@#R#Ly|dSbE0)n;Nis0-h0Eds}ngC_t$1DzQY
zKU<%(tMo1Miq$K8=hllm?f`pTR0uy^-CDGU`I9NR-8uus|7NBn8Om@72?H+<Za2bg
zlG!~5zAGRv3_QhbugSwPEHo?=R*_}70W11dc{l-7cokVS-?Uz#3sM=)IzV)w(G_s(
zFv;<~@}ZV5OqmFr`ek5ExH*kFF4K*BJz>rXI^OdY=16;%@9qumH8hcrotm(`q$h-_
zke(d!$1WZcdDNOuHiT)8hJMtE$wgi|I5k1X@$y6-yW@M_vso+GeAeDd?Fz{`ZP@Q}
z{px;C)Wa!~ygN-mr3l4ro4Ye&8ZBhVQ7tk;H0PlX-whYYgjG)Ix%#=?6LQWu-^;vm
z#bQP)B}BTwzugiU)FrmneFb}4l;|%o+UZO*^K8xN$6<w}N*X_nZ<ZCz{&F|D{#^j*
zkj$$;N3Q!WJ#p8#r&lpIs|kH{sP_`=K>q6T0{X4Uw#0L*c32lt+*?E=PV{*t>yQU1
z=vT%o9U~h;4YeIfxcaMMK?%5xCdBU&lal0BRaJ9}O!O@0s&yc8k=#HGpEJgmkMRZS
z))V)xviuhEo)uw>UTLD_lfd1t;>v=i)ytU#S1H@y$^^{S#cIsE<~=vFKf>jt@)IV-
zbkUxjKY0F-R`VeMNk38DO}TzIe?Ur@_KW&QWm9kg$E!ZIN|k==+fXC)iX1d`*-RoA
zA;m~Hup$aIQ6g^d$;OIab^WUBHq|byYe?5`?GAmuGT3WI8nSKXkOED0Be4c&x2+01
z)4Oj}#vGnF5t;X_APCpEMvB@!v%*np!5WhHc(mjcHqLROskmIk9~JD4U<krBq)v@X
zG?v~iMGJikx$TZcPsv#d$|%x&YS?b1@q*f~Mj;u*<9WFLR-e~nPy8BeAFK|U7>11s
za`0~h?ZS=bmgDWu&vp;#GS@ou4}~_X=5mbdeQtdMD^<=#iG+=7e!x}W9I=qPN2C@U
zy<~#F0aTsW#ql8I9;cNx_B0g*#b9+Nst<`@)?k(0jujH>rj)cSBy{T!du>Ag!PPC<
zG9)5JKtyNRSC9Sy!q8atvBvYru>A-M{owUGny<pKs>O8}4nxM&RozXnv2-5+TpwqR
z6~#TE6Dh-hsQfEZjaNA}t3m&5x>6q6#Y<#TTq%#<bxekpCVx_i@;&5fo;w&`!?LVl
zY-8mkuy(`l-Eu8hPtBW$2uCb$;3hO_)eQ4PaE|P4iWGX+QtB<IinbS?HnK%&!2_-a
zVJ{Vu-U@Ou%;6Xn91Yx!(6HLVWG<6I@+XvKxV#wgngkEG%TO6Ip&7p`p}|@MpOxka
zt42!BK3zCV#GX&UJ(k+rT35k91uDY*aYF`Cdmfd907|m=QdC5#698QX_qll6$~Lw$
zb+G6H#I(+LH^|eo!ohqNncvhIAt6hool8idVx1!wV$m}SEmo<Bjx02RS=-%YAS$>I
zPH<yhmOC6p-lyTUiXj&~Yn8bCIa<(Xb>T1p9ySE~2z*vbTU^(kYY-M6ao-2PuM?7>
ztU;v)xC%5ML;lV+{d;aJd1F@vs7kJiA^d2M>#F4D{R$^v-?ffgSMIN)%}dcgI|j@Z
zydh4m)ou6!(aEw(eclh1lP|DOQKZH%5q(n;OR&P27h4`pD{>#}2EK!f^pl3^M9hXC
zndfhL+5Kt^UW-7~*h#Kv5)&vxe$X(y!VFpyRri+AG%7xe8dwp8)?iXyEayb;Pvg5%
z8y%ER`O?H0Kk2%73N6x%q;YzqVr3YiOO#Px%|ZQFWsI~3{OhyDyw77#KgPE-`kW=p
zo^b0K70Ta?Cr|IYuyasvW{=f;tINgR`9!H5&q?B7P#s#zTwFC9Mr~Q~>S?+0&k~)|
zr;9zA#-_e!uV7EJ!6HerJ{8JY9J5`mhtUa1+{PfZ4T~P_r0L<LyK$c9<D)W>O7t!C
z(`KXM*7#sz8kQc>y$JPaZDVguWub)xy%5$^cUZ)#uIlhXn41-2>$6UdzJrzRQcz0~
za~!Ur-F+%Y;o0{O(;b?D#LIDAFxpjUb~%i|ywE%g8J?Rvbd-7sj5YuOuxlHFHLVQ2
ziO@r%NbuTx^%Cclp@=t7Oeot{jOiDkM}#&|ihtCH{?94z@^wg<t)CZ&D}D>(foLT6
z=W(weT92%Lp)a9j83^;*jlZOu3w@foe`|VYcOd7a!m_OlGc#RR_W^x^c1Pgpa;ddT
z{f~ZozAL{QB7#$<Ya9wXhhEPx{F#V=hJ(Yfk6d@^`gH<IzwVi-CqF6fNLZOVS~1(R
zT2Y$pmCMu7d&b8ku_2<sX=TQx&&X^GF(nPCeP|zH3WYutHSZDYUB#gVzu+D%K=Eyx
z5GSzPs7PIGq`<CuEOg=()9%_sMwHD=%dY$z_$F-1`^XRiP!l=sTT^hmQJy?h;dXI|
zT^g~LBaCMP&B3qLoWQ1@0}u?Ptwmo=i@*)D&Ee7&PSHwC^)Fd6f;@Ih{ldXA;BUkN
zZNNP}t=8cP2J*8Jz*S1*t$pbYCLrRMe4De`Y^VLC91Z&PJknY3c~wYSe1-hi<inrs
zSbT@sQ%E6^<C(x_q-b~oS-XguI2cyA_Ets&6l<EjFcfpJbFAakrw1|c+G&2A*^j)N
zCJZjv^S<Tyz(gv^ZDUMm4u{qyf)A%CrHC&kaqF6j8+WW1bG;fdqW3xe=yGM?HfrAO
zUbYz9C&6cnR(u{!$7KlxCk=G8OqSKuOx&YdzU_M)SZ+7kZ)j-iFxR^dP?;6zGR6y_
z|C~3S{)Dl?yVac`9w*pN?A1H&8DED#)dt5deZXT%j}zgOZ>Pp)L>FDd-(qiBuO>?@
zXXchfTqdVOzLE1Bw6L0?53b^9_F0WfY-XB$?%w5V_AIL8MA5nq#n+ri%6H=ZY0_Ce
z^IwPjoOmw_@me*m*bEX5s()R!)zv6$U#u?5rsu<I%TlteLB?FR1-`%+CXRrMp@&Z0
zePIcPN0Q?MGRT@oyJycxX_A?B3gVmNifn|1Hrxu95bJgW9v?Q&&eR#9nG~g>Y6&e7
z_Z}*x^1k_Oj~IOpsy}xwZCu`Ht_QmeBJ9WYED;P5U|;Y`ZnCfx)>(xwkeC)mEi><U
zzbA}@{1p2t9de7u<U7I2P+{_us;Y~z=>YyDvi6>|O4NN7adhO!kCZ3o0N1NLZ0)&T
z8gflHnJkpQ8W>QK*s!1h*Il!|#i-3yoPs9z)v<~~9ovwjX<uW#3L9pAi_X^}v@8~O
z%0IanCgyqs=WbfQa<e@k<6=&AFF$95l>k;THjQR>RxGcfhKtkD6V-WN91M3KKeI(Y
zvys$6F}ZPVObIS$1==_c@}ZkQOw#j%Ugnq?MtJY{z1ZFD<QCy-SoeJGc6fmTn4@Oo
zez*YCrCVqhAfh^ZKT0<vYs*Br{EnK4xZCs{YnjE~?!W(#i+i<>+KN&jf5WQvck8VK
z7cV+(_{DjhEtLGk3!CtwxePxNI?d5X9-YEz>yXA+c)<2>_Vyuf1JC8@PWN#-pYlhT
zA&aem>DjQ(y~788+#c+6U+`tv^IM5i4^`sD0T6K9olKmh^>b}~Z~FoKM95RPr=5>q
zzYrQ8<<=q#E6un~Fdyd!&Xsddt#*~kCjv-q`38tgTEUO@N>$v?tE2UOE9ZC#vt(ju
zA9$_04&5Q%(HrRHCv{^0`1I|j<=u-IUWpRsIn8q@6F6iYDpm1LbLgWjCKzu=s-XDn
z3<sl6O^h@7zTX=WUR7Qh`D&!^;pI}>7DnZoo(vkJw8oE>v?cZ3D=5=09>C)slX0o`
z!Ih`9=}aOA8+?eh-caK-!t;D>9~e4ba<tKG&p`4;ARO?)#(VoHm!<9stlOdUR4eax
z7ENI)MMNxXT0a6F*^iZnCfI)m^ZSH=@y#a#WE9Y;O;~@S89%D$DXSS6+_xXD8qVW!
z<WMvm=FofnD=`xr(uYTbLoG20ZEMXj+gw>h@^FPV&P6t>OaoQyVXZ}cDMQvhVn+$t
z+1bK->%%3P1^}Uzefu?dv$bUemqmFH1NHfVbnvl>M3~qwwF4kvo2f0+r{cHYExnBX
zWccA*@Im65``gP2BZ**G^vN9GH?xS%^J0WmpRvnO=JT6z8(2#7nv?Z~s3ShhwnrE3
zBM~%o?kyXx+QX?YgwZJV8uJ}G+s@&1P~Ng>s}(Wfg|;usqCQDTW2vX^02Yz)Sh@r1
z-5mp2xWhC5nbpkS8?is^-|rf40asl6>nZm0BP|&0bQmelal509rnM6v=@l(5=tEMN
zC_ta|M@qwRhD!*qCmdu5!os(jx(biEI$Z^CiZ+2rwPiYcrsT&z`~_wnr#o310@G@`
zq+Qw@aF}ZY5}@PmyWCg({x9k@guK}*&zbGJc$BcY>a{mWdt2m`+)1rnYGiF`>T?4<
z&04x)74N}pcU0iT$#!8tbmqQyGBPtqi34YwGJ{bU;r6&sdoR@pM!VKjFn-<aP>WSd
zsZ6XaUb_Pm54}&$RT5kr-Mj=4{4T{RNNkXnfGyV@ShFG{xDvyvHHsFRUpEJ{t;zH`
z4fRv-e)$Kx03tV;<{;l*^}$nYp+}A5G-OV4IU|=Q_6uYa$D-CSX)md2MPI#&+(m&;
z#iHM$VYbsAmDUXlOgo+CwjA>fA{itXxv6gXSui6vTbA3j<$)7C((}&xO9P+*!gJ>w
zP;dZ<E3KpP6qV4rui~RmPl!x^6HH-iC~_w%S=Wv8hQGlbC)EC@3WZ{$*Q-GoSnXtO
z-BjJ}1!5<I%voKm^Egnt=&$h?H|yh-u^H{c5$t+CrvjA&b|Ohx3hPREjLA*bxnu}7
zl&_dwW5PxW<>;PwEVfqYejmO0sA~f^pli{m#4%_pmNW7_Uo*a1jTnd>CeYP85pr1+
zoP<nzIxe4xDA<zumTsZ8#7lQfWm6tDjF6?ca6OW{BygHu=&9(QPZb9%SC-zPmhNq6
zaC8s?Lo7yZGYyt}lv}+L5RJq5Fol3S+4U5K@^j*G@B?P}T7kKy6tTPav6sd>({D$m
zJ)sdRvp7EI&M&?yUe4ngaF(>k=kB*^I@cvZW`?A5KEVq2+dEHB2=s3;6C;+T7fh*N
zjdqMDjvOvHXKn7GzZ8_pOQXvER!W|*0YU(Y5>L(w#OFEgwSBz=sKmfQt|~%CkCv;R
z%hmFX&gOeox7oP<Q1y!o8kf9Wg$uBGH0PbW0F-{x5(2h;-T}g-Nn5C1$_TfeIbAP!
zJ>Oq!R6BNW0Lac?>Cth2YWA#BTo&l$f-YOhaz0Go!rJc@#9XdDrafK-t)qCV*8G={
z0yeoqv9H?HVa5SMZugsa$#dGyfl<D((kU-8Lx%xPw?1bs)FoRAeMW0lvKjQP(jk?V
zwrMIQEdFX`353r5U^$&3?9<gc#x;scb7aF0LZ9V2n!hw6_O`(EzklZ|C8*rFy)Rs$
z0-&$*0o_YnN6M3w3sqZqvT`y@{&Vm`CGzxs*P4(v(PZ7{4DZ!(#ok=;2M7VABtVot
z`K-XxYGb`GzaWPhe48G$F<#jdKju(m5l+dKzGcTb_m{nBSgr=qwq&}^>)jVw2tX)e
zPmbozXn042<Yv=f9hbPvDPq<yBjzh>Dfc{sU2I?Gn{lVYLdi~sK!&LVWbL#1ocBo~
z%%|w~OwXsJPS(s76)dPvK*d2gZh=KPXE9;%O$q5S+as&#Wn`8{2d^=(r;b+E5L;jD
zvFRs(;`H}Hf3bP(hmB=~NrVv18o(9yhN(-dd!N|%lAjmHM`0OT!nHsW!`D}N5F%d@
zSAwS2V(4wd0&3yNJgKLEKyV#}1X8S}v?|-f-{pGsqZyjr-Ws1+cK&PF9J;h#_g6nc
z1BP1F7}X0TRY}%t>rG>6IXXwjn3@(x2P^PylO5c)w@eGtD2PW`L39KE;`a%s(CmmE
zOKrYURAf|Q+K*Eh@K~ETiT`w|_cX1wQ~pSBkzl?6!_azhH8Ix+=;+%4>PIGhUew;W
zQTSE1^3~u<DU${>`keEFnsIM4A|wC=$PILRSt=t+z3P^;!;8U3M?avR3`ujfOMCZw
z7=JIF?aub)6)XD6xcc_@(#<U`zl*u6Gsf7pK=(w+V(;mRv#7PSv|$1D`V#3Du49x6
z7Wc%qZzj$X$be#aPqpUiVk~!QW^e7(hx@~k>es&*xHYsohQ2-bIaC!r885Z3TssV;
z^&E)jL5VwZgu5#ypB}nbYU9k46KA_RON0!+puZ(zJF`IU?x~xPr|aeX#e1q)eY$il
zP;=5T)BT&}8e(e2&&ZdaeH@Q4*Rc|td#dN~&E9zKku}wpd((yrGIR3$*nrvZh1*r_
zYwf->@1EARYOB8N(-vkoNB2eGnDPKK-ra9{@dW{%K8RCxaXr<4x3N<k##*1ryLWhy
z{tAE6;fy&wyZrtPovcm*_CK|pS*192XI+q1-$3Zqz#XZ30;s2W02V5I+@WstSxF*8
zs1+~McRhG#*G|-2aLL2kqIGld;x^e<=}vm<p<#_yT`#CfulT$a9RpI+n%%b$%lg56
z^=7ZuJm5ULv%XmWqRhdV%U$$A=Y+lfmTMO8jW$*B`ulZCA>x}hz-^cNfmenB6-j#W
z!$fJI>WzCqQlV?l!^0Z={LY60%d6*ky(O`$kCaLsJat)V1}|Shi%=!OIUai`Jry^k
zGU}O7{(@yaqu`6YP}Rdw^8<p(cF3chb+1pL@e@;dpsDASVTX5{?*72#`fuUBWHuH#
ztJVd#V_VFF^V+0GjIGxqCLwmG{);Wg(Q>&xA@Lo1Dy?SXwcMJyJ<FqGaXaro38N>9
zEmkaBQJ2Z03tj8kpK{wN_1ing4$x`BZRbi;_CGsQTi?G5ssUfb?;`BOmvkJB`Z`R`
zB18>ALm`MBK0ni*s*3)hZ}SB8Dw~h)2>ncIEkq91o%32xO>nE&xSP1VkN|zE!8ddX
zK3Br#8@CNlo-Xk;SWoGf)6RGRB@FD|Gxy|MfpI`BWyU==?m1h2Ra=at<GylsTcrQv
z@@*Xc0W}3&iIGe%zi@0M%cAd$U;<kDnK)S=^8K3OwL#51T;ZU+I%JcPpzg3Gv0NhQ
zt<Sj%I22m{6wFI567q7`Oc~S1HerLvZsKi0F4O064T^;fTUI=nJ=5Mjr$F@O!RLx;
zS$r#d7VbD-Myhmpra3YTthEN7`&fU*42$QJ?E8*D$)VO#a~ea|x<}R5b$8^AB@yUq
za@WuUolcG<_=u0*6cO(ilb&eOK#;(>f)Q#_qq^u31Gnw=qV1$h<mR|~;PWHntEvf0
zh=;m)P|f}A&YBKqp+VH;Vyk(q-hQuS7YZp(;Z*9)&c0BOnK|KcKmyqczOfs0B$!0n
zW`VgTKhWXGWdyILOP~d!qHG1mRjgu#)7%2Rf<m$%<(4&cJ=%(>%IlMQQ9Bk>ICyME
zDC@z=D1AYpD_n}9wlVhe>F6bc@t1+Wpww}}YmJAJGfI`qAsn?Ft9rn(84BB7+bfCe
zXGv*2BoOa^a7Y;ud<Ue?RB7a%%qp({P89zb;6(B6@q?X-@uu}pzwL@Yc}}aAQB%ew
zkjJ5%yrD4Mxr<`$JPlsFzmY*B{*2#Ijj6|`FyY&(NbB~KoLYpIb)6NIGgVZV9Y`D*
z8`>ElR8f9R=B8&sql8_=Qu%tgq&#l+q&u4!+b>Bt!1pO!^L?aYb}&9S0{v-43CHY#
zsEmU`Fhu6SQe@Fsc(eo@h0a&ZixSqIVNyFa&_*`KsJlYqln*;U&Vk{BG3V>Y1FrCf
z&Pidz!hsUuilG%FXpv`W{pw)veA0AUbYj|nPQaKYHsq9{L`BSg)QmPht-pwI$m@X5
zWGUXNy1NORTNu}PNOh;}#;N+bZ>uw1rDv(mVWfLzT5GtNbUaBL8>U{>{NO?k4-mzk
z0%{nx?@Fd<N&{W81utpQHTvNN2`yiQWM}W{!j)#vTIy&?-b8ULCxJ;jblh2bw`>k}
z{$h3ioie}RN4~}>8CH&QQej<=55X*euIA3X$F*bl7m>{P3vz(SgMK^)WW@JTS-NV_
z8&s~wJWhj67et7Hb>5XGhiP-U(s;EWvPTm`dF>y^c9;B8j9YmT)@mJ6EPvEGev(xH
zU+omTer^YQ>h*^5A`vI*jml#qsv_IRTgMy#G2P{!0ow0v*Q**i$A<d!vFXbBZfkHf
zuW4~$YyESc0s||b@~)^`;|_Vx+Ns(xge7`atzd0kmwjuXfOxm@L;LRYDQOHp$uOK{
zBTkvGX24XC5Ub>{ypX%xfZ78hBOUEm^|{M-64rGCuJRTQ&0+EiRO$@BwU{-2VkD^B
z%|gby?C&Lm0kmn}7Sz^fauedzXncV=o$zsvQjzX>rbx-9JoSwxN55DeW-{L!^QP|q
z+9lz&1QR2X^NH4V7jM%)qSk=o=HIa5nwShecll{FW{!qMz3UOk>NOVi>>W9&jk)f!
z^mDzMJe64;<DOiY@dBPG`I!h1S${Tu(2=ZoJ*WZfC#7i~faJh+lVXY9wES+KMFP1V
z53r;XRT>i8*%V5XlgyyZEa?@?Rq+xzxu|mwo{;byzTvxh5{VL@UYu|_acj=!{+-xI
zA2yVnmo4e3_6Gx_6-cMsyc3D{{{YQv-;@OAfttIBPHj1o4=c)UMYS*aDcIje1M3pT
zVFa%wN9{S^x_;L<j@3$>y>`Sd|1$=)BNN@p{#=tGcQs+()C8`}5)IBP`2ZR%5%Id&
zBr>boPZXOqB@|#0H7tF*VG@C;U!xaoJecmHR6e-r=2TI$78L;M(=x7$;5yH7L5c}4
ze+;TWyN(3L>7@n%Wal$wGr=4@h+b><yGI78Swh6WcSG=<3jpB`Jh$3R{L`&h)Jk{t
zbtU#l+0OQrZrBa!Vx?6i2f#c5efLPUXd|4M%m8K3A$}#vU-QI>P=uaS=`5YupR2NG
zw(~{bKN5VK#4ll$TiXXnSzyp*-T{@Vh;~vkbhq!MwqsW%fErLhm=bLVX0+5{rPE{L
zz-0iTG{S(bBju)lIsq~Pl={Fn6ZM>M%q^CswwuH^OR_0#`#3B$w8RSkhTW7Ad*$>C
z_PVQ=KJ=aiyjS0>*j8PgcsSdC%^5KS0+f08Ot^HPvlc*^TXNe!*RcDWGQW~cgM;F`
zr{~rp9sw~Tz_U1(8zb)z{q6q+E5edAF_>8B5eZ$|S9S}{BNJx&`fSYP*#4YxIkl>9
zb+bqK>7DaLc)2$HghHMCR5$QAz?oQVy$uSg$v$w4rd$(9%zxOz6WrhksJmiLoD$to
zu514ZswcEO5`1l?!q;<2v>oZdqMfCH?Sl+tR!IsawJKfT=v9eIsD&YKkr>B!G3ltj
zR!!`k&@C=9%G=Ab97UX@*|Rl|@mNfDR=!`5DKO6Ry}I0Odl)@fZ;H1bt7NVuK*hg8
z*%P(5aC#JgKf8WRr`9n644!nt)<$(I?w$4q5PADIa7edueGuJl)CJ}aA~ll~CAg0a
zu6-utS65i_@_I_<(&)^q2m%br=#qMrKQ<^*C}k1DlgsiTkNl{i*?cmL#XY1wkh(F#
zUwNYZ6|UwbJo~we>m;>2A$#&v2!h+~$&+tdqnIgfXwj9rkluP}-IpEYx-mACqQ^{T
zoALK|0aQBB>SZbs0kR&o*Nrev*-;JjGr<kd$r59Ul}S`(#C;*2V$RtL>$d7y_PZhi
z1(G$>;m^4^S%2MH8<D@%)wxIXt6ulK%adZ?sxv%_(=g9l)FM|2a!cUx;}>p5vM=QT
z8MVgs=)oX|bT#06132*6AeYY{RWpk2ydUguj#M#o%|-Wk>0s%DDSh{}KKg~LzuFMz
z<Q+C&Q&xP)**&3)j@+D>?3uPB>MD%a$!P!opw`{|Z?&$5WrF<ztmo6?RdqGBS)7B#
zANiA?p2IuUhpSi1+UGag-8$xxYW$?v8r=_$SXe7}fH|QO_hRVAJN;?vdygJ@JjVtp
zO(0LcSH?@sc)A0AB~rci!4a6-8o!<^`W;8jl7L+C14);_QgPkJzvp{fqv%w*Lmc`5
zSAPp+Vi*iz$-@ozK|qG!h_|I$T!~|c%`8MsfNH#Dgdd8JnEKW}u&ZnV2>oj3ffw*+
z&&}!=<?Z!F2GwfCHM{acha;xu-`|tU2~9Qy5t*MK@5r@t(59H_L)M;HN1RenQHA3Y
zXZT*8=rt?2K~{yeVeyuIVP+!cszYqQh8Zxh?s4~em35uk?j=EO@06?2r8-F+F$28$
z^;n4&4sb)ZIIhaTJfALG%no;YnOfpg%L2b~qU1`id(-{7u9voB#5yl_PHd0xmMN!J
z{2THU-B`86hOHDdV{+d*U{Z}+xYRG+u^Lj9UsBoA%dtE8@uVl=nk&BMuS<YE1oo5x
z^grHGZ1+t#tma^pSIe7_i0C0D*PVq>x8nD>ZE=bDP91RtOXiOu4Befb%DFVUerG_G
zkW9+-nXNXw%9SXg{vuXzxu_*WZ?t%>w!2}zX}QRUTeW2KcKh#2nCAuH(2E8{@^No5
z?H5%!%+JMM9yWW<I#yR4kZVnP>kLgGiOVqLtv1U|@(BwB18bJmQ_21EGej_{#8`M8
z`?NahjvcvTG+-nDEx6Ut)BThKG$xg%e9r6vx$Wug(^&}QzJiZx7}>z8&Te#G4VuSl
z@kh$;#nE)DtIw9Q&s9rFSz;&B^=u_Cvj)XvE|hV~wD9i0jLZ>&hUrtCMpe%}9C8W!
zk`|w$JsAJ}>jUz-5F$#>7@2TN8Lo{Ui7U=B%O3QV#ZZ)Qa&mGf(8Xsp_Bk>CzVS1q
zMQwW9bd|&S_|oh51UthdR($v7Zc&2v{$Tx~{mtYd^IYfMLMnRoxJ_p#@#~bAiUB0*
z_}5~9y}nd`HAjO?T|0Rv2_k&<b|7=-NV0ji<omR%OP>B;Z?HJ@h|@$mPj~yF`Y&6M
zbcKM#l04x?7vIdRU12JIs>6dMci{o!%U!Ayplh@sadn~+IWTsY+I`|p$dgN;X>OQa
z`s$YGw&co0bs5kDqWb84Fl+}0y5AkRR0a%yMz{dz-SF`KLqD5NRJurS3)$EQn#7sn
z-e%WaFwhe+1eD4gfFd&SWN)d}K-d<#0|-nzF!Ap*0J4_&wPKVq=ZLv_xz|{wBlp#X
z<NQooZH~iGf#&l0&S0yU6<~Mpg(Use;qt=`OIW7Y*qf7Yw_ng1F9Yp^<W@S*A6@cM
z5|<wL1RM;OU1}C|k*@jiA(4+=4~DeE!6EL;A458J7doXONTQ!hyT!g66~o_mg@G=A
z`cBwtXQvo?v3=iaee2^rrA+b3lkSv-udlz&mk%ftX$~XO5+}d?(%y=BZ&ojbc`p?W
zrTr4$l8paf#J5jRs)$O_!;|%OyVYkwJsb6^<920{17aq}2)olC*l+j^xjIrg00l<1
zGc!#k9Xe5GRBifFDqqlSH^`7uz`>g5Y<<Ed#oPN4?eB1O!g=k&dndCmk8+3xFCk#C
zX-rNgTj)JJfuf|W`nDTcItWVH8_f3AvrK88$&<7-Yp$b0h_}Gt5`1j(3D=$y|L8Nf
zX>a!vKqG9E?l|8TYr=h;rwm;!-~LKfeW-T!om?fs=i)Tza0s8n@(gekmKXHi<wZud
z!o0e#8kI^Hmp19QMA7G_TqU>>*UiPHAV7rf%LVAh;a)%|zk5V5DMa^s94EsYpmp16
z=svlCQ)%RT0k%|xN?fd{zyZy^2&5%WrQF*D7JcMFgrVEe93b`6daT^KTStQbwhB66
ze$ss!T<mhpf8-*%a02L+mT(Lv8Zs+b;F8I>1)Nr7DsaEd+TQy4IEDL`jz))*UpSZt
z9s+I_PQ{&6e|537J0Gvo0jyhNO<3PO;Cj#&D>T#hvbyX7U^U}yi>sdl9{ms-@1_%$
zC;DlNp2g_VbaA_;wxMSDWAy3U+eIZ_Q{MKYb?%4Lv?BH1^}tNZHNTpp_NuK4_r>!L
z-`y{uT=MSCOVu3vqU++3T3yojn6f<oac4s{0NMQQNPwO(v34jAi$ViAjKDmF`N;1=
z=~FzpQ+|eLa^V?ySrR8Av#YkVyR=(e%W*t!C+?zS5=pZA8EAKD()pgh8MV%X))X!T
z(>OQ<;BD3HDMub0c+0x^{9ICSyQDI7jWu}Fe8(BBYdIu1)ZmW|MPq+@5nWs=K8A0L
zYu1Qs3!Y)Im2(C}o$YLS>5N*Nu6Y3gSV91#fl}W7xdezquK+2ybNB!>wZdbY#S_z!
z3p$3dUo(Vm!*OvwE2c1_-?ESp2%hl%k)=XrLs?B$`#Z!=XdG)u`AT-?T8C4#dBk{r
zdRjGx;?sM*1e&{4$$2dk#h-9Vj{wbT@2OGk+feUH*uiy654vRus6?O}Q?_K6P}!Wi
zgG6qPFRQpTz8#yHMBFuhd8wnl7ySj-K=*LFF|W(Js`u7KO}%f3y3zFmm$1EdZ2I&H
zYibqflf2oC&J+bXnctAGO~{`U@-y9KQ{&4U97Y)$o=duIK6|!0_yK9)qp5~4uJL0K
zFmsU$w}Ey+^WQ=T*jsk(`gu0AO;8&M7rnb4Dt7+<vh7Au0L5Qdq1W<kY#0R^J%Q{Y
z7R(kMVN7+Ws~jhQgzLT!^->wGNfGW#&Xup<ZpF{e&b~)wxKBZNUN_;Cf>QgTQDTQW
zHypcw=4O3cN&V%j>qm6HI%{chOHDg6_j<0rrjWYaj=PWR(VdK<g(09x&*=7;vaaN;
z`Xhw7^V)nVc|$3k9+v<*PzjTnAIQFOK+FP70u>jEY>k%XB_t-cM@w8T1M1??!OFT_
zh>8>N4F)1hHGmkurcn1!!dStQrcWPM@?``^$Cv)2s*Ces<hlOph&awHSfe*TvyeXg
z+Hjqfvly0ao&^CT_aynb3N2TqBvwO4?8C4G$&Z(06MAqRzb5NGHB`2PIS}+i1O_R}
zxsXqn64$h3v%qI#DLEdIwN}$!F0`LMN10)N(m&EeFMPmdVvMX-ZT(rn&n!~EP0q{Y
zYwQ1d^6tq+?3^$9wADyl|7(t1Jj>6#s1uiHI<fSfCxEI}WE8RlH5CWl6VcOlq7;QU
z_6SUtuZE=a7f0iUa?<`5aTbO13F!7V>%Gho*3||E(9WXlD~7a)g$;`!xgDP#*M$Ib
zPL;MIVBSSF+J)BMAX(O*=cqwvmB#j@h_74Xa_6>gmD9(8+7!<yyNtcR(z~>y;g;8P
z)d-{N{iL(^<4gRvPtSXnGxs&SL=X1GcQ)p4x1pq4x>tmM)`;=nvidNUZ9X|#wFKXc
zQ+;*KN&ut5&jDOAWnjZza19V+r(G>cv|qPg<k~eV-YB<Q`mSHpEdS-TEchnNps5a6
zQ6_VuAP(1H0^Ch4fJL6r!6za2dN{DaU2*zHi`}*x-+absz+)AOi}Hc4k?XM@{~3D*
zi01>k2r@J0=ty3b>kuHTb;+L*PHg6A`tF&+A<<PLhqgS*Z6kX_ZR_nCx<Io~tN*SP
zl}HJg&D`|I{?_y~TOqElvhWI%<tUQAEJ@r!z=l6K5;xWE<$)`ct!iSMJEz*HNjpah
z-RPm1Yu5y5YNF@oo>f$PLCKL9<xE9s%z8TiW`rUAA5@%AHUiJRV!9YFR)2TiO70Yz
z)x-`|1COpt=YWiRVBJSIfhqSa06E{Rc|^fxj^*&Z(GyER^rvC6YNyQP|HIW=M@9Wb
z-NT3=AV`N&B1o5XcXvsrq;z+KD1v}=w?lU~h;&MKONVsF?~eL?p67knnl;NmWacxs
z&N+MUvv2s<XSM<i@YW6HWn$e~Wx=7&c&5F&F4=Evo-Nexuj$qdCk4c=D<Ac%k2cMP
z0c_*)H=e%N3QEyF-+Gh#Ny?)M1^z2zXkSi8L$X73Gh7%4bfKM4viK6zws*bwJlUL(
zdNaiHJBHnQ_5&VS$uRC)jb@K(tC|^8d!qQF)M6QW%jVT5x~2mOufS1N8r|&7W)e87
zZD8I{UmLSMmMgQ=;w@rS`nnJ$F-C!zx8lfi`m5#Ff$N6#GMfXs%Zp*(`!RDQ7mxz<
z?afp=W>q7jbsVUgh6`Q;N&Ki;>Y!PYn_ev?0$-I~pa2!xZu+4-mcbp1wa-k_664T@
z2b=gz{U*Lhs^fn`208%z8!8#5>tOWs#WDEP*F?7ueUiLVK>jBAm~nE844c~FG0g$*
zp7hKP{xjWz+W3d2HfUjCRuP8^ZFL$;jq}taXcY1#Uu!z8%8(IJsW?GqRH!)lJqigu
ziK;EJXKL^N+Ozpb1kl*gHF8L?J4xr&=J5O6nEoZm@J2ypNXfIyZUZ`<unAZfqGG?p
zD&WQdscRXF!iF2ekQDP?`bQ7r-HSIP{a^RMNYve5@2)rOmPCV|rV-7vt?Y<CBG>s>
z>U!>wMr6modn1bB&GsI{FzO2yTbHt*4^{WYi<;eryza-4n_`9Q&pp8D0bQ~JhF&KM
zMcM^GNtLiKma3J9`c%rh(0lqaLWS7)F6pcjD@19g9two8pFuQ!iBWnBA`e<~c^#|%
zHga@*TJq4@+kY(US`nWN4U1OIXOo#K+EZF`$?UOiundz6YE=grfG>ZW@EjMjx3%PW
zR`7tuY$)bzSLG(<4z%5I(FcQlrj{Ky89N$+Lu*mwC?Ja5HLm*dV*0Q?toYrP_tkp5
zyyALZc6dQ~sCNxR_g+KGQ5OMbOLN!IyO)^M3L3cP57UYfR(He{Pg5nmjku=#9zFh5
z(a(Uq?*WxS()aIqChhkU2fI@Zofk@8RYJ`==1Mfy;#~i0hI&bNLBxGDe-Mga%R1u@
z7S>*qP0U{p5`QmfGJ&R^$lU-NIUD!^Kdg?&WR<sU{e0A`?M(T25G9{ZGh3{x;4QrJ
zd_C6Nd_6P&#DZm6llLXwA=~AJCO_~>xj{4Rwlw?lS$=NX&X;&Q(5W)cSqx8L)f6e&
z8}R64bGWp+w;>~Ba$ke=YJyvci1yD=X(6K#Oy2{QP{CSJxwTfUjcx~#_1z%H5+OcI
zXX6+u&mM{6#=udDL!9*<%B}^qazP<~<hkpVX1o@zVmU57dw3`Z^yHSWH3YJgSWqc2
zlnCu((MMd|hLz8wN66k2=d?3NUUw;n-<{8ucG*nA?t=Ik4<cY|mk0k}sP!lMpUwv2
z?aSWmh8TIy)_#P@nsTU?wny~Q?&R?PEF)IGL?Sqg{bix<CDiHx4&OvbEVUb+%PZvu
zLT2oLnEWtflv;04<t~v4s?i&vQ5{Y-v6fmMs@C-HGIH`fYjXHes(d^_FxvF+p0f09
z;N0aa5L8Qe58ED(HoDqs)>tKa-5zyUbmD4{b5>hd;@v`<%&PVBi;93KFGmuTHcrz?
zv-V6pT|S>%+>TZi^=$g%MURn9Fl&Bhi6K){Q|FhnN8awzl$zLx1EoeSXqzh#SJ&S-
z1o<|ne+Yh1@bOAPVM!D;do>hd;F$3{y`~l19?AYRU3&dv3{)V-d(-6=??<{`YB1>3
zDHh!nvJCUGc)4ymdY^rE`&v@P3(ewn8f_LDCZ??mi)*5wiacGO&bexhto3{)Kk>_-
ze+N7O3^;`rs^wLM)3&nvPJS-tASqe+nm;EQ!{eYH7La@KHZC)%x%P-4%TL+A$VDTn
zH8^fVFyk_+>u{rLoFHrCQ%6Hj{)bFvi~y9G6*&9Yz(H~0iXGzj6*u;yhCRB<#|w?h
zHD3O3w0#Azw75DWy+{gghZjtc`=(pSms-dM5f(|}Bnn0on2ey=z)%%vCF5*;Ui-cD
z6X4@PvKS3iaFg8C@bC84Hz4tPJ5_dNxafeVk2g#ARy!4<VMPtTXhS99?g_>q`=(y1
zH{`d=vtJdBRaNf&M+fIRqm?=DGG~2!douh<jWWttcfI-x2$M9E?Q5W#$J;&F{V9SL
z3pDw%13wArNHAnPKz{_0r)4v=NRb&%<Lm6Q;{yF8>p^&W!3jatHPE)Nt-Z!!{u|C>
z8#;8yoBi5;fC3O7C0y@d9g*mopH=++E`&gYZI8FBJkWxyw&<puwf=dNFn_xC6>;J%
zpKOpS>6u&=VNg(zJjrMPW1*V4-#->cFaS1}_f2c%y&QiR*Zd}d;ud2X-}}8=JtWia
zcTj9I8`|BLtq=PYlp>m>%T4ivQ5DLvaQHL^cQ5XE=%UT9>o=A$-|@H!SXiP^+8G6U
zbC?!rToo6kAC<*;U*}+3H56Zl)?({N%XRAi?AUgb?USjZEnjj%Nr+XxzqwGJzCt(C
z0m^i0+|mf=m&T8awEN>g@1Y2+zhJZG^zxy;*HMtu!F;`%0cqqdY3ffcZ;enqWMelK
z(yJ*|Jq}w-h~cB>1fGz;da*s55O+O2y#uFy(>UCppC`SOIYf5*_JRB`R@Y@l43v9o
z3)s$Fao3`JobUw9#k|ychQ)L&SDqS$Wl|cElHSpFv*P`Rs~w+*>OY~pXkW4nbHU+c
z0(J1$gtoTEMN6%52bEp!qC(IM8(Yq*p*a(1uhT?BNA^_{w!d`8KDafnSJ4{c5`6!&
zJoWx-`RB{W<Sl|LXx|MwlVc&|3e_#<2g!-57Sewj&b~^)@^+YwPGr#K<8BpFy3y~w
z9y>e1c(<G7GZ2#zstuY$mZY|Cr@rk)b9BZ9LR1%|uR@Zf3_|xIcC{kk^5!rano>Ox
z|Aak%I_NP(ju&DGjqVt5PXwwA%JjXk+BwYQ>((e!#+)@SJ@^4yzlKalFtVQ!khSsf
zHGqQX>D>NEd+SV%kACG~Cmz+P9)&b{-2q<y?b%IV#mkMNk(b;)ntBcAQN=bX4{B@i
zyx!zxHg|hgldI(3X6v%hHFAz5O5DtSvm^8wBUr5MYK_FIyXaozdjPfE&24XD`%b3>
zG%ow|;sPcuf{|a<mKAJw@$BIq5W^dHJA}~a=}r*V-&uR<@Wsg-j^VVb>U@oaA8Hr2
zA2wML>t7}<eLk^ElU}-9kd=yRwPQOsVaxb*9uHAycW6$Zqy@K_{AX5mtHE4aa?As3
ztAt(eoXS~UvQ}7<v&s|ou&h7K7Hfc@TA0}gB7X{VE!-(1MC-F>1}+lq-*WD+X<|m^
z|H57jou=@C6)jCotG}l1C$kI8Jw#YG0U;=v)K;*2-ZH2^?Ik+ad_T==@CdK@9sNv`
zg+sh`$@k~a+Oey)cNh=ux;I)vLI~8uW^eqz;ZXcwF_eu*X%)l<{#eZ7et1JK9I2@!
z@u5`NqePWI5r?$}+O!Nc7rF<oW$R095s)q{hOL%wR{6FM1}2^F6w;TBgy_1<6s%z9
z9m~1O)fZrCOSawLl{KB(%6XtzP_zi|ix;(L(PY+F>S#90Z+bAQTTj(H^Qbiq*JGk!
zr_~!8mkLaIQ`G!P|A3i?P`p>e?D&K=(OzuN&Ov7r&O6ZO@xLR2uVBy<h>ZH#>HJR!
z<So7NC9^gHm=KU7RYdZgAKoC_&>uK|JSU#)K+541!nNEs^pD@^-L{a8w?1eR%qyDb
zyug#Tk5CD@d-Ombr@uBq!Dm)ub2nddH|i)p1T}p$&IL~a%{sW5pjw@cE~P)9hb9H+
z6pGV*6OxPta4IhQ7g!}7U+s~fT-~;Cd<@rq<GvhNS<r1K#mR&EMaa*S;mK>4e3c9m
zMK6)>HO(R3V)Q96GVjd@eso3+Iy-bZ0W`M1rguRi-aAOH^SJzW@6ErG0#5f5^;b01
zJZ%2))#<a0|12<|xXWuzgPv(FB5(dBPD<@`Tfxj$xATnh6HoFhR%1Y|NMT>v8%Hn*
zrJo{I?GvN#G7qr8SCfNG?GGkzKe<gCBy9<AG*`HZ`(?!1zt|IwPN1LTbLT)^5pWt<
zV?crzhVe(iq3ZeRphO3!{7Ui8F&1Bwr#R5EXyx%=+)b0g0Ifq)4?ZLUcw7N0c7a?*
z+CQSj)dK4Xk3T!H+KJ`#vn{2s;35(4!CD&D@Qin6W171I0I;KW6K(5puAa(MTP@VL
zZ5H0~$X-NHxt3ty?#mDGt2Qf_C4?#nG=%B0ZWYk$i_!^bvOi!i93^?3d~5kL0c`LV
z4@-~@7yegYWuOJ-{NsA3AgJW{e~4&v?CB!6MxX0;=L@Y|l)e)<9gF-jSpP2=hwx7$
zj;tGWu|nhCI=@t7t2N0D3gxNAWsuK~E2yXE`JS*-Pd~>r)F4-Dy?TN}9L*FfD<lNQ
z1U?Vy|Hnn$hP;{5RP>f6#P447EiIQX!Gu=-)4%Hy00RiCbS-Y!e3roRuHt-@EpHig
zkez>Vk!4j}n!DdH{noEvbhSR^I6pj~0&<3U{Hdni>WZwuR*g^k6&p121`v+}y4}LZ
zt~Nq1c0GzbZ2ILDDOjicLM!DOpVKbNMig#+<@op(8@drl3c%vbW3}oLdUQuyptee!
zC)WP&LH_QNU{!<B8Qrwl<B+s#3zxVSPx_HH`80>lM}<awX?NfQ?qKnG&0n12aXrH3
zX=E`C)kZ<?(5R?)t+%iqbZ%JSn%t20!O1LnvasN2K9s;v@5J^;)~)Qvqzg6rNTdLa
z?pO)p`kxj=$<Hu%^SK{lWij$p0Ffb|ma@dT5^x~%BwVY`_xohUREJ}uSIP;_kl#~>
zVUsqoo>kK6H+1-XBlsGHxT!^GD9PFm00rzk+_rdDU=bQuFnCZgSQxeczF(EB^=+x~
zM<`$TwHqo1yIkFTH?hzf+@xB1&(haO;~CfQd#xjlcQ4V`nn;F2+Y-5at)JR{lVn5T
zuf`+!9o17(DgFl}W1nG0g<3iU`a#ew1Y*MzWRw>?zcY|FoqcJWf}<szsVAI&e<<B#
zftKoi!>wKFHSbvTl`0GDKDW@@=|6u9ugUsBxw##w;ysSA5boU6LT|waLwqsT(T!t6
zY%*}LQ>EB4e+q3tCTRfEv&C-J%lC)xKA(`NxIepr+|Jcwlq%Z|KBcbR-i<K+G4Kgu
za6>$LS|%W~67DHJpC0??;p)l35o4pPU=d9Oj+hzyRD7+zS4Nu`7x1^k&xmT5XGWJl
zR#K$T(Xz(Lh@5O(Uc%LFuAUhFe)Gt8xxeqM@Eipj*Ah-_4RQ9gGx5{D>B|Luiv~xt
z_#Hpli3)2ZX^{BiJ0W>(7p?sfSot^M8t{fTwcNU^yZ=rXvstg)Ax2b->7R|ajhu)`
zB?5Ga+5k`O@)shbZY|>f`m6s0reFnwWf4c$as6_E9m4gKXH=(n!^EsEKBboq?`%Om
z0{6(9Ar1d_zY#OPXgnjNg(fde{fs+D_wx)lpaXv~%*{9M*|>K3jJUihjE{p0TvuVq
z^vS0QthURHl__xQ#RGtx6mYhKu09!W*hsC0@yl{)?-#5a=Y40b{Yg^sGFR%A?6^Pl
zNyJjo;s8XzS*&?Wuf;P*Gezebz-*G<LJdhD`YAwC2un}?l9vppKVC9a$Thfin}Xov
z-5(~6Bp1%@`A~kplu3msC_`l!-Uf7zA8Cbz1bojuliSO&!R7Y&A%E}J#N#$_yNJcO
z#XhV%%*zKN-M0PEL}kpA{`5H2P{wCfyE7el<5RGcqvWhPGG`UVmB!_`G6%d~GwkNe
z5UcggbqG!p<KM6R4Rx=)Qzt2fA%)ki$by7c%OjzIzIMn=WV|HEp)7Pe+D!L5awrxe
z;y|L++hG2Hj@+hEkrB2&aeT_OihP*vr{z}kH#q>18(49&85z2VZr;>$JXY1sAH0EV
zogPPx<m(Zvw!VPtN`aE+V39`6VNd&HBYX;R*SvD$cO2J)lG3y}3*>G$4t2VRZ^Bc9
zRbD?_hVvsp=etULbRXCwPvM~)_S0R3$t&{ZyX2l?_cI0c`G`a={fiwU0`0o3QtKq*
zBRq4i%c0;$@rU>tie2Z`Mk)Hgw@`82%6X+`Xn(f)`T`FIiHcfSE{)^A#$bcni*OEy
z1)X{jtH#Ye|7mZk@aDKZaKb*rSZ=Wl*4uc=bCgmttQw3mj#o+0ZHLsSZu0$^Z{O|c
zrYTbWro%}?PNmv94!L?fqjuRVjfN6&SHnq*&IX)H_iw%G&4U|54%=GgUUaQaap{hK
zaxCK#7y-xOc}tPrD8{d`^tyd5*OEorZ#dZYJ<D173=(dbHtIbGuBQ&{!^?R?8%|@}
zi7viyMKrji@Ow85BvqI1Rsy3RNucLU&3E1{uo>RrPP0Bj0uZ#6SP=hpbQBHxI6^Mq
ziKKonHk$wz$$s5>>YMUS0L6L^a0a!}Q@4O>gF49mFZ^S>fZ#mU+999lWm6g^l;Fl}
z1Y8l{JNXxvr1O|{vhkHpAFiLyh42v14B$~Y^cnkOTlTFESclCOb>8*nz-y;C<0s@(
z#Xbwwi^&*v4O*Ytij}s11j1Ek8`OiB3z*x+r_{&1ef`8=(pH4!o>=4I`Yy9TGv3hV
z@O2e)I6Tkfv0XHWt*dU<38>n80PR2mfk1|;+>Q)i@Oe}@?TkxtyX;ZfHlL|wX-;wf
zr!|<;O5elT=2ocaCsOdKf7@+54uN`t5f3FspA5^K`N&1BX#SR(eq{nIfd@qF``mh;
z`v-%Q3`5hwX8k|4cxwf(+pn=rr@J?sGznrjfWycNjr}fW&E|3bSh@V&?X^N$yupkI
z(G)~Dkp6ln0~#A67Ut{kkL9$#Z@Hu*^n<>>&eGx&gb*9M<KATtWmae)_CI}Ue2ct!
z>R?wxh}Y9_(Afu~LtksEwzK9fJW!!LYZhlm{qkNfj?R`xqGD9fbDtb|gcY+aI1ck=
z$=#c*_r*E@_pHA1Zem(8@h_5n`M3-^RBPQ43etMcCQ&6P<EkIu+V(@ZDY6+lJ39@(
zf80#><)1ixk?zaP%hD2s4Mp(4V{pTLym$P5AIYxpSCX?}DmvT64UR;9fBKSSXP-Cd
z&&%&a^DSw8G_$-z%oRIXxSr{pRt_$#_vyEmRNc|ia?ICV<z1{<lBRo$W@3&N>jwwG
zgOART2Vz3!<1(ghxQY^p%y#?32L>Uf%|IjkALPKF37%lB^VlK!g7Bchco_7i%DMx<
z&14{f>9~)!l$G<Q+H#7WGeUz|a?0J1TBhK<7yOb!22g(IuMQP>MXx8S#tvn+9}A|4
z;6)c0hXE#FQl}u{DYx5Rz#63OVh9jW6lh*{lQ~R89rETl%V{#Su0!cX6M32(sxid^
zIj6bxCzIYsJHOXy1xkxFYNZL9Vz?eXPgXC|rZ%DFFPzVqlkvHBOWU{YUgt%{`D5N4
ztPKc*d1Jz_ozN5B_>@oaez1!nylEkT?G)eb<Tx!m^J>!h)~0cvB3{oUe{2U*V$bMv
zv(MoU@CK9KC~~FB&`lFNx+^grCEm>|K)<Fkj02vk@cqn$R(cOpHBbnN(K$!thEH0U
z|C26E*-tibKq`jH$4!2Ge8%j0ptH$iY9hs&SnQl=R%|$rjR-g2nRjCRTzrr>{8{4R
zf<~EvKb=+;l|??#o|q-_dDfunY264ctgo*h19Go7Uf<`1y{B7F`8daI!gU{Bi$MAx
zM94wHU^x)E_5%KweXcnFlAo~e(Vh?p=6$L7%2;sd!LQ3E!cLZER~yhS@}|o+M4y%L
zs#ct~3fNrUu2)p2^S-Qd?;U-()2KPF%<Xe-+&8<C=%b9E8Y$Li(AVAhlNcR7?4=?Y
zU{6eT5AWq*qKk4%<DiQm;D`wdiFs#fkfQIXJUY_k?^h{!^5}a&GX~wM;rN9pT=}7*
z(AS(8#Jr4*i~v|^D(c3QRF+=4%F0T%WaIva9TU?7zS_G3d6&&JuFU|!_i*_3Oq$Is
zU2W<;^~<+cC#s*<aZveu>aRC1&$Z~dn;5MIrFA`;N5wgzh8y|Pr#NmaD#cpW1r1(T
z4jfE+i0bKH$5Pcm^<e?56DOdiJ;my!X?-`}^On3XQe|8^LvXZwEG%fWedi|nVrd&m
z8-*Qu;kD8W{kC8Kbout2_S5*mLSrG2J*aq|Ze_BYkBS!SHr8ANqoPC$KIa$Gi7JrL
z;gb3dd}m<MwPDQmvmAk?3$$=a?N1gb&z8LtiCyS0KZ#09E3uiW=*;%EMfx7DIlC(6
z+`m&}vrv#`-}R!TNwE94i@-L|d1qV+Kw|}Zz)Nq|y;QGx92n3|s>Y|d8PU3XUL2@i
z35gUdLqc3)hTj8dZUQR**$32u3pSS{Vr*v9cg<!G=ZO`&gu=HVXcXSjCo>s+lt_Dh
zx;LW|%e0%BB|y-A2D-eJr;4?6p|q*RCsd+w`Bd)hg|WK+r6={9CgE<3RP?Pvflqd1
z7@NQKmRE=ov022>01Mk}@n`#$y-)Gg`L3lh)+W{;Z>LZ2b%${5|Bm%S{Xum4oACY`
z<mKq#1&mbVjqey=If@^|KQ{eU(3QYtr0vtH-eRsVzaYp$0t5U1d7<OL;oC8WJ;mb^
zI7{+?5&4PN*4|vUQQRU>Ipjz!krD*I*byKf-{1rZt-=UL4;~ep#enYw@bvG=UjVAL
zbq2i$G_BPKaSWk_^x8E}GSQ(>w@K}cvmPob#2S9vvrEfOAX!ragq{-^<9wUY)r#i~
zec44tbP)w2)N4velx`|0a6B%)M%X5CRBZ#uH(75nI}x;-HJpd(>0_7>vYkc<PT1lA
zr<dQ`+e@R(xV1{7tz#~YbYfV~QKPKZMVH>+-Y%@pcU&Jk?5wOD1sR2~qCkk0wtCI_
z2!X?S2K9HSYn@|UpPB&8Z-s0?+mx0xh1^>qs=F}d>4ApKouO17-MdkR*AEAcztwnL
z_O{p;&vvIQ=Vx}r_Q!G~u7Nd7es{#nsSvRH&HErwjY4UcGm1--|LJg%6UGn#Wz_*X
zD5l=H!aLIH2z%)ffQaY^gXkxifOwkT0*O$X$x5#rHL*`f5@dr;06IIY>$8lpgTajP
zGS#hI(z>&g$A~lkrUv~J+|Vjhax8;q;7SIgUy!5Hpt^2(o)gA;AK%6N8oQO|ZOzkT
zHBYeF-nwvX#O+(!>%s*V4;MT92x@g<Xt^|W2F@uTF>~cxAMQP?l_Ao^YS@`ET&lbI
zEVd#32Hgj?Uksa1fOlT1#k5`5!Rq+Rk52^3MG7J>9P45kDqxlX7q*L<q}*JNgUf7M
zNHX7fEkUO@kXeNSdaw(jCT|-S6Cis~#3@V%)KP&$>GL1JYLC^#kEs~C&{eS|)t=&Q
z=l-g-oEnl-`gZXEgfde_vkndpr?01!u$8k8nOPA3jb3E^3ns%4g^VlwVB`X>(nd{=
z1dF2EFyQ(kU=A9OjdUmJ&ypiORZLwDFmzYiZyNS`-MDK#G;2ez1IYm3!m0N)Tn;t@
zC3BOQSom?DrBmJSzEIZLM~~2hU0{($_+d_8!Ud2lSA7lJ;?s>8xA?Ykm+3s0==ybU
zVn-9@)#dBWk-!q+QE~T=l|$U2#-(sZirAfQ+uZSd%x>=z-6G@MRu*Fqe!reS)2zPe
zrA$X}6v_g=fj?=Ihxl)-4B@fl(s&brxkF*^2j#8`hnJsM2pjj7NIl2PaH35@YZqJy
zbb(SO&fUY%{fnk;|5T)|le^b}S8wu(T8WMnu(Bl4JLNDlpK4E~m*G8b|Nd3C0fX^;
z?DA+$_v|IVSAwxZ5-<VAXynWW0<#c?hAmQc{>G!M!qS#mXL`!W!<pxA3}AqH+I+Eo
zS7;=e1ZJZj3x)Zn)t#`_k+9XoT4c9PUod?@aIwc0&@DQxe_{q|BuCXDO5nUUGMmC;
z7S8ly^Y6-I5;RyTJFR8WU#yUltD;|3ubDj};Jb4;uymw)^6?`uqwc_Z&X+K>98^#k
z5<DrFD7U2Pr163$p<&wEDKSggTQ!ruo9MJl#c?YksK_L$l)Ve>TS-%{3PFEB%GTTW
zQ{NJe@6fl36;UAlX$(xPgedTEf-rD>Uj#)NIpb&Yl<b=@gSjE0&Eg_cIyn0ad)$os
zQX|8;JIpmovmTGB8Ie2pqWW|5tb!Sk=K+T~j(dkj%nNyatoc*Mv~D-r)R@I(Q6*q>
zbhN3ePvNP$E~IxGHZx|V|Dy-p;53KLyyR_mk`?=I>C?qlac!re;o*zf=8)%=s%B4~
z;eVsn{fJ6bVBKOvzD6sj5xlAp!}0xt-J6)<NGd{O7B{|N=FkhP5eBg%6Se!#qhftm
zPjQZB-j8+do)ngrU}~SZ4JydWzFwpaK_%o|RuyN4ePNq%r(I!;#CNk-c?#J0uT?KU
zKT1v!UoFZ|D3=GRmet_lEzj&8!+frjJvWg$LtXP=(loxV)t4t1I#vVO_Sja{pr*=P
zx;!$H&l28zaq{cdUnu6hN6}-h<R`s6LNToarmcgN>X=oP|A|H7dU45J_4m*xs4fvo
z?{PHe><DN*np%a7CrC!YD1+B(6bzm2s>AG>%g<G<0YcJi`mhfl_w%=M!?e<oYab$_
z<}UR_+0cgOs>dM+=GnZDZGnHDj-9_mbk-Nms`rsO?^nF>^MJTyR{ORhp^w3n_tzcy
zhem!!&!($}VUBY#Jz({3`Fs&BwZnokhIzx~aXsqPG^EvXvM3W+Cwp3}6(QZ<o+zw(
z>(iO^Sq{X75n#GU5DnrR12zQMZH)m7KJh(=FGB(kUV$+eDla5JGinpETQ3rV$ReHI
z?n!RCk2J@;UEpLCm1arny~lnNpuLjVFdIq6N-p;2a;blCyB_R~6iQxOxj79<1k4Mx
z1pLyzOGTV=3$wOeYDop6qnw2?2jBi%fC@oRXi*OT78Z6_J=UUqtrIGTicw_FeHBOU
z5+SfGKel~eb23`@QdU;T%`MiFtR4xIr}Tbf&))p5XQ}O=x}b8^1U5H}^_zCuwWZOq
z&UF;Bv=g$?6J>-NKQ7aBF~$ayj+vl3yy#b<qbx%o!&d^J5kV0y!wKmr-iPcI!Ew!o
z*ARViN&!!~A9`ip<XotCvgbP$Y;s#dJ6{`4NvvbcmYe1CfE3?G4x}(a9YLdjdzQ6r
zxyspKkiz>aHfSJHw_9nEei3@OQz8|Qhm4_#IEfc1LY+N@v&EERgl7^-McGu37tQ|}
zGaK-sdX4JDM(4A2)Y*V0?MN=Ov}phd4~|^o>ymoY;WW9XPhX^F?<z7f{M?q%FFIgK
z+P^C%Mc#-Mse=Tdj>i+Y{Z$hO?-uiUp1<!aWf|fO<iEc(YPdb_XM^~ahen>yyg4tX
z8~VYe*X+9r9KUpruOjQ$?{uIk6NdSI0D__Sq#d09F&O|2Z#6zuLI(D%o2QXBnHOu<
zQ(@13S#yzb-{A~hE2+<50G8Foind@Q-jPaS)|6a2Q}rdqP}Ypv_H9qB2z!!EteO1D
z)JILd5*Fs3v;;v$PF4K0PU5a2V=svP)i=keJo=0BQ4C7&yLR5|IVxXY-vTD<M>sb)
z9L8VzzKx;ne{0$cJK*hDrF=l6@p$*b-IdEA4<YyOLAb5}BOQNi7LH3B?iXACp?c|C
z$ZocFKaz8jif~}fw|ZPh+>xX8VddESv3cWw=C9nryHO<0G6d^5zvS+kJufU63QyTh
z7@zpah--IXLuAasG@VSQgPJoQNTfIEwPo;vlqyXJsa|DXDp*sQc(EJwp<GP68@X8v
z%}fy!<}kSFeB?G&(&)`{(S$2c!)LR=hE}Xe!cy~gpT$B-NlL~=-#xcNL=Z8L=5Wu2
zDuylf&P6Te>&pA9Q_C>d6R1o?GtH~hWnTNR2bjG0;#JpOSj%U>Dw_1>9qifmn1qbS
zS{Mhb*y7emwlfAHSnl9_$O;y0#bcjXt9`$5xI6pwrLFW^W$sf?XUnCa+S)~Qu^)o!
zLL>)BoXA^vp3H@;C@U+OF`cuKvC*cZ(jpF$xtniS!**0>+oaRy4QSom-kh0i!bBV|
zvRKxIH;q}JKFw{uyVCQBS;`(I9vF)<?@SPixtb=N&6)DGeFCAN99^6(eotrpZMC*o
znt7&RbKzGKYZ{}sz}KDFpd&=&_&4pMs$hPY<l6RSsB<JjzWe)~5l~~;VkXa?j8kl^
z;_h`6XucYT#IQBsvP=>=K!mVkI{Sc|Z)nG=AlhmR4I7(krNhYBEm0>V)`rh8?iqyN
zk?6X*)Md8XQUX}|WnoduZ><t{ulL7C_iVq4VfL624A1<su=L!7%uV0ubgERbn~ov*
z{_@9~31cyrZQpZ!qw<|xuLC`h`&loM_&&u^N&_`YT!#Z?0?8U9y@<KOIew)5@7Zb@
zl>(>_xy`~?OM5Pg3sC#e_N2W0oc^r#(T6M2n4>47LCH1^pAT9eeB$%Gd^%=jHpoX|
zZ|5auzhnp;34&nZH0U7SrR}0EeGKdK+|HV?D=D6Wy|E$*hcPwFlJ#gtd*1Pg>$h5V
z4MY)7-nAZ)a4{0o{dermKFz{=@|RZl0zQkH8KW}a<BZn(&WfHkJS0Rn@N-9h<mV1O
znD<GUwfBD3$o4JY!~1EYtV|h(_Co4YRpo8Wd-I1JIeUdodKwjtos^Clxb;J79nQy~
z*zgY~gMr9&$RTmR{Rmg2CPQ{lENL|#88s|m^2lLzZBs<KDY#v%Z%1e+rlFs#=gB1S
zgdIQUa0l}9`J@<l?*+^b`ankG5vYUZhA}3d^x}MYkaD`jV{)XHS&V+zavwLKFQW?u
z2wU<m`b#FGuDWATG0+gUNl_VHaC8Ba=j1e>*-?lC3?JzrESZM9a(Ug&HOTJyX20@r
zcR-?557}^SL$ZPXIZ8v!?FMCMKUljIU^|usxx40FYar`qqh&BkO6gCDkIU4{t$U6Y
z6rxel!_UwK6$rhz>14NElm!k=xS$5@Pbj7yY6@eDxuHv0bJ%^Kz#v?Y2Wx(v70Td%
zg%z5OK<wrx9)7yZKVMA9@HgDjqzMWSJC>RT$#6uZ<(&%DI8iUY9U;R)0*g3OoFp{(
z4D-~tAW+AaGK8lD>uP-U=Od4dsyiZqU}KR{G8I!5MW0f(&WNxyW#sM2+>M`)nqP8p
zy*=E0h2QHv^gLc_@byCePt2AU;rNm8q7U!XiEJyJWs4h2vW)pkg)p4gPlhXt^l2tK
z`}U}8?wqE4OePU&o<Jw=ISKj^@(@$obCF;|Sha)B%UZc#pQP3K1d+mI^y=28bmhHH
z35j&=T5GyPTNnF@8AxeOF4^4VbatY+`<V&(JYv*J_2d|vPccE|@9wnR_UL*e**4GP
zY{zC$I*snzaJs;fNi*pr9MGfZfEr%=Aeq$U*WfW!e5V?3bA>7GT_P<@31+$Q>5Dl)
zy$b<7<F~@!%AMc}uIiS4f}>SEX*6i!mJ|V`ojJE&u6QkAA5J}o$h}wtx*riuO--AF
z7UOV$?qpLx8t`V(`^3EgjC6j2v{)JxaCWNk$;pNFGYz0kLdU!d%ho6o-YxMHklBoY
zs}6L<Ivhmj#d?Js=+bVU#?q*g&g9(#yRI=hX{VX2EK;pHyH^WtYhiC8(l~rG`x;sw
zJxo>&t+q!Yvu3Rz{qd-9!6ughj>N3$>c9!ET02cgbk*!fcJw=0M3bgWcE*dV7Vn$)
zi%ssCYDyIAhk`m38cNiNm#_V9hA7j5`*rL*3onS8wt593bUo`18Q1TC)9n*`v*I)Y
zQX7<`#aCb}-y=~Gw;CZXbh+*we0JPMCX1xR9lf{Pb8k=cx;c9J;i{|QF_&4b!C<{U
z0>uznL@i?FQtq0LSvbAFhomk~_?T9UJYKxv#|9qR6uG4`?TU%q^mmHrb6XCJ+NOy@
zsQ)4zh9rn|h$+}k`2&ce5J>1x#XY3A^-C(b*o-L@HtqKw_u_rprzGpPn&RPPH&XO8
z<5?*(NxwdS8G$5F>3n0zXPIAFkJr!n^~;!kvQ%H+^RW(lW!xQ0$Anrcw=-`NHu)Di
zwMC%#nkBeCe&P#k*S7X+mneL`I_w+0qnoZWSHb6Tp##W()Z7~#I%-jKvR5(JUz`bs
zy{pL_R=F+Ky27fAdoc&EbAYGtYZH%)gW`HZOjnBybm<q4L=N<EPqGf(FBV*NXa1bi
z+SL|fhI~g8>iUs&w>u-*2)&#pJ|^J<xHu(4&*g<*RBAXw{Ybw7)s!Oez-P0>Q0fQ{
zusmG;{&7|z)q^0k4g(8MKdTC3$_3Q)QyY@M#9q_^hYFS0OHKlQFV5!kSxZ)fHW-Va
z8U9^DMbh`cWqcGM!t&O0HPv)Ps2`Qf%i?eHv^j?39mw@{LVBBDdyXfFeDh%+xI>w8
zrdoSez2DIxDkntM7Z3c)g0!CX`Txm7-@^C@{TN-!A(cRg9e>ueyuHjJd%Zcouwnr3
z1TO~?#}9%Aw}KY^p;{NXI6s?TVWU;`bT%D@^gC;21ZiZ>MVT~fcT-hG>qdM=DL+Hr
z^)ok#jWJyB%gY=OUdY~GVU10x(vixb6_ZDPs@rv^S)w0zfnjskm&`c}&hlV2Sm`Pk
z!n-H_=!kDOm?h-ak1uDh&zzz&bdKBk1tKDAG^+{wdGf+Zxm)(A`}8l=B;`V7n9A*_
zk%MF;iI}l5E#YQJbm+*8mF8_~2^@+QE`BeSTH7pEn8-wQJ}B-~+63vlpN@p<i5EGG
zet#W$cwa;zts>4yE1^Uh+!<Q8%KTBWi@4``r(6n^LC4NoisMjb47NQbp$=kPbY07A
zi#HD_aZ@a&N{AGVTDnMaxmTac&WBXLA0b`l4to}=8mplxwFJluNhv9*F01s-6@#tH
zM*@kFw=;)hXJ0EsUf4=Li&e^mN5*~6W@c)-dZG1#NxT}G?UBGSS2#plIj6?c;g4LC
zp#$I`&kYt0Ei=9zuh1P0yELZ*r1T3P;I3a1q?z%v$FsxF?F!Di8K6Usa?S+g62w*;
z)wPikt5WR)x1OAPPHv9O7+-ZwU%p~Z@m)^+Vbr9sLnsh)U7oe>Rlm_}AZ};WJw0kt
zqS2b7_L+Z^KZPSlU_xFt$CYwm{Jd<W;kXCgBiv-_$`I#$`YVJ#a2+~7?27LMpO?3Y
z90|G&teQYnmc=WX7;^KeFInGC`?ouC8I?yC2Olv>zyBGh?766E<5>qcP96PKgDhAk
z!yuqeB??g3`NIXoQYRDPdO5)3PLCel>BR|C409Lvmy-T3+nHbZzB1)NAm$}?U##Ih
ziT4sZ8*(hw?X?NrKoBLkK|m)@?NAJ)dXmg$nbT>oi-_>UV2-s<;@IrR^srs(C+0=F
z>M)PLagQ`YaB0talOBK3qPgCTOLH4O<w;QJ3zQNe5@94>H+og6sxe<8d3?N(cJiO^
z$P35g7ChH0nkKXYhAjwPuH%aB#NDx0o8tB;WOpWo-#wz=bggG~?-W-jbp0vW>qhZO
zo|iY~-pdXqwf8r<UOUlMS)FL-Ev?PnH<NO=`~>0*RVWe~j0$VVh)Z<xLGR-yzp<>O
zye0%p0@rV2HlES)jYh8^POV{>_|C5(4$nyKUvw!Ak;@&6iW@_KaQ_c*=Y{T<esh^%
z_Jz3u5!@D5boJ&U=i$pd@hz?UzTciT*NZWZw``kh+}DW9X`wA#$Py8sE^?%)C$=9i
zdfg97Rc`Owjc*sD`b-s|YNy{G&@ZW9qjhh2GH9FZk<iSfoW)d8g<bCC2orc`<;tfj
ze7d^rng0GZJz^p)Nd)1PSE=gt+_tn@mrj@Xh0K}LzXY8$9u)ATt~Ng_53$NTc*tpD
z8Mu=^a1TlWf1tnh?7w$EHvju!n__D+XX2ak^`-khxzEzlpL|Mf_8Sk+CkcvVNO0f;
zNidP-opH!d;xc}hcQ2#OoJ|(CT=yLO=qf<H_FC1sZjVnX<yC4pBn_DIPU}L;x#4`^
zQM;#Onj*WY`a&RlQRdu7&?M1ctztH+KuX5YuWZelO#fbQY7E6hKvI#_60ere*bOZn
z4GSl!<D5N=Vu-n3StT#{0(iYu^J&yQH%0yvgq8yZ>+jy3k02QP3%X;JRXF)eS-Fo$
z@cb*q#DN%X{UiP}f4@+e10KAP9`4@0ikVTX3Pw?)W}~Ona#k){saMiAQ&(hTQ`_5|
z;&_kd9Y@&o?>+i&5{xUO)?g!z#KgUox9}HBe+kty7^-VWZ;r<2PGwPh&4lqiQ8$d&
z=BOl>s`$0*Uo41wOiY9I(e<0CJVEhvKEq30>3G4}pSw|Hzk8LFcTWqPR_+(SGgss+
zXF5e5N1KjFprL)ciQcRF5GyE9ZcI$+YEWy)D4!z582p8alc=i0CYyeUbmyIHI&-wk
z8)SRunxwG>Jqi;%OUu2da>Vgc`tnH%O9cHZgwXm+Fk=<b;Cub*boTl&C5d#GuuA3a
zXz+R9K#Y|)m+!sTK7u^(zAlFU`t%=)|BkfG;5%$_&cJ}|tsXL#%G<{PTK0u|Vr*L<
zg5CIuhgelrePHR~d_5-C<jhd**1tVih@=aVmo)(&J0&Bqf>K)Zne|(W)U*^)TZIHE
zJ^&w0GK68OO3&G+xCG;T5x6oqmO#tda7Vq~4{|?seX}kc*_p<WsVMRSq>*l~VPxXn
zQqC@kD(47Zkv}i{iqo2o@a4}>F^q#D4mNVJU@AkT04fV+O0&g7_WtX^p24#GQ~cK0
zlKu6psB6!6nanUx<9kvn1wFw<jf%=g)EAdlv)Yqn3ep*t?vbxD!uIKPJ_i*8_;<|p
zT_Fn2-)Q+E2<D~=T$NMz%HRx_;K08w(FX<l&j|jhWBbC}+L}33{>KYILiw|97l|mz
zpAc*MAWEb$Q{hi^!kLQN?U&*UdBFA86^n<~zaQG_gvJAL>hXN<vG}mabx~zFI*VWG
z+yuds(?9`bd)P9k&qqjIhkp+`@V*)WL;P{gc8F;;g#0U)(Qa{}Pp;5IL(9p0$Uc(R
zg}A{zk+qAN@w$st!RA)|;dxEEN3@8?DSDf$?&*Lk-TCQHDXj|!)Z*aI@^`X-S3HCg
zlvUd=vOaFTBKb`$&Dww+?fBV^G4j<v$m;JFhf^@fW$``ln&U~lKt*XGjX^~AZyDoo
z)nLuqr*Jg+bL!R%q=Z{S&qBmF7+Rg<0D3IsW4m2E<jP<s@F6h>kLuJ&xub;yI_#fF
zxXY|agf{{i*cSb3_kKZ-4lKXW^5Aq*C-lE7vkLB)zlJXrgS9Rd8Z60jdRrGp>Qnor
ztIfB>n0KaD2Nt#DMFd})=_!q_pID>CAHT=5ju8MmaebJO9*TSYdBKU3q{T9@Js%k(
zY~AFHT@wsH<04+3PRQn<%|IE#>Xc;*M;wu8>&qff`yIHIEOh$(e<u|THXW2q$^57*
zr_pA*=b{{wjA>1seBIno9UVj!^><2yNl0w&mS-8^?5JSDUT}qLqQ*5O62(V5L=GS1
zYWZRRwA+@mFTiUq%8_5=<Fgc+r^%jN{D##29g-9VCflS3oq)~%^Q9!Fh~U;v(;Upl
zpGjfse$#kWU?<a?pk4^;eH#l9mc~j>RlD$Y?k7jKv8@zW=w@Pv27)m64yynD9v7)&
zm*Bqi(3pMAjl4UV%V%(;4>lm0uRS8=7{8l`*WoMrGq|D{h9i%cqCdPcBmU0*zxU;M
z!KH0&#?4~<^{XLzs)A$HkzAh1dh%7+zYq5s7EB~2J2os>l_Ic5b<^|Vo9^h{OoRfc
z_Co@z4u<>693970YWCn8EX$no>#hX*|9-TIAUIG>U~p6e{9c0JbqhnYwkJlW<@<}I
zPf_(+#x`X5fyxg1e7Lthu@c=d4CbI-BU$MS!oP_y{3OUrqsx7K+S~5wLY>1@`^~CQ
zdE7q>M+7F00p|x%@&C*<bjAe}F*P{p?yPAXoD`)N+R(fTgu}mck<IPXj)^OAK^H{_
z#v}V(gef0;)|KZp;17D>R_ztyQDrUhy2)Otkd6A*ajbI-SI1F{olt2#g;i{bfh7+4
z`yHYPk9biex_L9>%f$dbVmz=31=ELm`u3v~CHC1K&e{7(v(>l6G|>T+i2GKF^1l!%
z$cfmI4GE=6D#Be1T$yPQb#r|aL_&Z8x2#7;n)4|2FO&N3-G3CLXJ80tYJ~U|C2h08
z_x<9<iypy$NkTqiTd{urVdfj~<b)W(5|DoUv?Wu*!g#~9VMgl4d~D~8_nqiv%5|M<
z-il18Bj6W8N2MMe2>$8~#K}mO#bGW|3Bs9Mw|I;k&CW?`d<4&HQod{x6vVj)z9(6Q
zuBkd|p-=zT6bzg+V#k_cC6%u6+NsYgpQjzlrZC-ngb#@2sMv*^uxfD(M{Y0Q&ZA_Q
zK)3z>yr|&)2jXi}(lTc8s6(OzLmX7H=uK2w)oWd7#A2!bMicPA|IEZAKPY!8IFA+F
z&8O>fT=ej1O@&b1;Wp0b%?cClMs2}+N$6-X!4=WziXs8|@!w^|Bm#r>gNY^fxg<75
zo42}8H01TPX+;IZe=+OdW=T(*bNn8Bs_MZVUM>ejQdYx^;(zm77}yb665?0}X8l?_
z>?wnnF)0rb5%<E($n)JtkHWbK%^XF8`C-+h@iZDlmw2<f|L;y=5*O4J+Kw^Um66_=
z_(f+WZSfArI#_yDse=4~^sHla)7O2Y+qpZ&a!^G5t;yKEK3Wl5?i&8zUE~GVQJg;h
z1Y&lXkVOF^+VpGmQt5ay-c$}puZXp$;VfJ&L-u?Uj(NRE@s24S<xCPvlk-Cuuegyk
z>hS_0GEPI}9?$*^z;Y0Xcg=o6%!o0s9?iRE{!kUjIgg2HAJ&$3mEKJfq~7k0(TM0#
ztFKcw(_^EVUy78sv2=F5QxZ!juD2g63PfXbEMS+T&Mw$l5mw!9WZoUk`swK(H3Alh
zPSxP^Yvh09d?Z4m2S(}#0cXuJXZTt(;U|ZI%}XK)q~8u!?d2bJ>^KLG^a=IX-<!~X
z@5R8{aZq^Th^=n%a5hF;eq5JkMA2<Z7-2uKUgx)#ODKaCu-(lzuZSlQ7PjI=3;CRG
zXfvbch#-T<AzJr8s|WfI|KnE~KBjACMIGJweG}Ag*O_^a*y)J{uB>x0$Yrb&Wpj1Y
z=PUatt+gR%z3FuqXM7TbYy{j=FOpbDy=|{fpBMjG`L~#UK{B^PeB!9!a3=J3Q)@uX
z=MB&1Z0RaT>vjs?bci{v<m&Tb#B?U_5aN#*CkY8WB5btZ>tl+{rV93=*8MeD7DPDb
zsMBuGAYq?P;W<gSeo4^{t`ia7M&J<PaENgch(~4=qEpUrI)X*${kKvwu-YD%%fm{b
zQwe}0FTkw0q~w@{D+cMUr`6M$!Z@Habp6(JzZ#F?GoR-W)a(K|q2;q|ZCa7=eRjY6
z(pyiT&})|pV;|0LSnLKR>*3$0qBp{=>TYKZ@1c1e$M|5ZODJhc5^k9hT9qpFa~*6>
zr({zlosxtka1p9I#x!g=qOilwqBSqFT>kP7N83R%ajNq85a?OX91v#l7k~e^%iBWy
z8kpaq;1H+EI_H!VLojKzq<wM-e2Zn0+65-gc<*$=i@r(aU2y8AUQR=l;~k_%jMVl?
zp8dC`mnRv$=#;R)Cbq)z8zpzyP@gxIdxNqG`=fp$Ut{0c5!*6B7}-B8<s7qHi7-&9
z0d5Yhc^lu$L9mtTNA~G3q16VQPT=2|EyxOn&YpFc_?oyF&Ta{w-*G3`Dz)@=c;>8%
zrhAl!biQGUuwK|GHY53q>-v%p<&ZL#Ev4A4izB0Pg-|tWK5wnTWXo>n7_2rvymhSL
zga1Ak1^Q?oDW`^iijEuyyi9zszt<&PcU1l*QByUO<#g3t6$_`hczKY*fC>qPPTl`q
zj#QSgNWH^K)db2?F%$(WsZ>{x5^61J?u-YAsfZ}bRfiaoZDon-U6t(~-!yXW5h0mw
zfxnYxA`Ami4>(#Hcn~;+Ks@PK{`S#cQlgGwhCuw>hs!EYJqCM$(}o>FAR54rb76>M
zvYjPR1HWL7{qF(%C13j?tV*WfoKjCBEw><BeVkC(Qm#<95vESHW~hDFmDLc?nXth<
z#l}o7u*BAn$FETTQ~Z2gZbCs`r$9&i!SVCj>l`F-uy1d7F8`M-6d)}>d&o;`VgBt!
zX#+Ab@t{yv?%-_RfhKaicZe<}Yfj-f5U20n=pUs-asI8pzeg{~6bz4H3dXA)$`eB*
z399L?>j~+zR0ec7b;G+HrJ}hZHgJCi&!3qL<VOEL9Qbz}gW>lhwD4z~X`SU@BjGOF
znqFz3;+-r{YM~s8N+tPJp(D}cbIbJaTKL+>AV9(iE$!z!mCobb)3bS<gUB={RE`UD
zI140*{ymlA5!~sgcTMm*M~?o)9cbV%^dG9;SC+QwQ)9>yJi_~B)^O{^ajU~|pEPyu
zvck(Ho!a3{*6|GFiuNoM&#C@JX;C77H`RvMN3vm?Zj{{?N*tE!V=GQJ8(QI08myGK
z469mu5}{3sQ61F@9~ilOhLy1C8@XyWE@33hcX7jcIOEtg_!i)^-_u&(Koi#g!W8(2
zJ&;Zl&#d3kAT<b!O?^v<q035863tgW##)TgE&3A4JM?qsR|639LHASN7Z|-QYL==z
zkcooIsiSaoojR?%k@LRqh%1~{dZ@EmRnzwQu$-uvTcT(EhcyX3kH#?>52{|z`k%((
zFCBDD3Nonm8J90nC1#$pq#KzF_hKb$<0v-Nt+c`a%?7HVJFIG$<oab8bcb<T7LX-k
zFS3v3DPnYnz!@dQWzg!J-eQu6nXY8v{QSnc9;QY)6^70MPNUAjw?-NNejB<oNS(^-
zk%yzGbl4qulZ|k@hq*$6g6e9yaYc<s5>Os~DWwh{wK(7}*xQ&9N>sLsCE?*2aX4BH
zj1&v-xE{PEcWA(ct+xT%A!OY}yYaMDSZZ6UKo#iX2}Xnky!+#3pq(Q2K8ibd75pKX
z620d_rEKZbSG0qR$8P@d_n%-dj1F5{2QMO2q*QyT^+K+6sdCrGN8A|Pwdi^&x$uY2
zynaOP#xxA>ui#%W`npy5-PE35Fdk6OE38f`kRIfw3T*jm+rdFI7h2e(0w)aIK+Zlx
zfdHOIN;OR#N2uWf_Fq06V!0Y(blX7ozht$FZR#oUE+QPjwcDK?b-x?~-N;+uf@U8}
zZD4mm(P+EG-&z4Z^l4hzJb=y7Xvb%){!*nXpxV`URMF6oMEyi3T9076eV|+yq6&L+
z@tbmC%DX5WV-40{l8odcW!@991qpj*xAI<;5aqB@C8S9})qyq5(x#hJ*O6AuA;wX^
ze9Luo=X`<J!6?{o-nMSj@Vzh-Hn0<cC8tOjnJ_=a1!es$$l6|hp82AFPV@P!StsuJ
z`Gp-}FVoMTe(<fUoat+K{<z0vc+vtz1u^&YKa{RDlFGu6`+eBDekL)#Ua(CN@NMCJ
zp$uyOyS}dEHsC2FX4`c11ga}xwVHmD{=-Zirpji4`x@B(Y=Ju?PN_p>f5U;hZL{qM
zdU_5SjQfsMH-7a-cT5_L*E^bd0K8m|++K4MPB=9g=rW=^fc~q|c|^M*8J&SNsVxA0
zRX3W7g@t8n$+DpxzC@kWBV?rhIUwyB%8*nAWW2&?^zRi4#s|e$DP-p=#cn<i;`F5R
zaG_lf6SvA=1Vx8?ALoqnJ=C;$z~;ZF1RAYbGxR%zzxk;DG&y>N?d8MZSR!7d3jT2q
zjbrZxsEv-6UQH>xz;~V{pO#no>qnj-zT%y5ANPl&APH>(&GToyt3Yy_30#a4N5=*m
z7}GnynhTL|aKxJq<MKHye^*b8iNTHv@}<&&-FzW0H6X`np2eV4Be)VU{0K3I5gbF0
zjv44uB)X@5)oYe-#nC6pFex?kD7btE=VjQYN6U)aYCkW{KzeroqP;Lkx>96WKO+4*
zIFuxz>1m;R+H_9p(3kcrnnH$LyUvGos(bTm`<UE#No2#YW4Y(tCTulsO=#_Yqm?1Z
z40V1!mn#Ufjjp9;rczI}k<H_TyCZ%yrmIUnG9~1p&{l-UyN#8Sl|?BLmi!b3vq51+
zrG)YKNcezJ_j=bCX`XyvjB=g%`XyJy@0@6Y6q^vdY}#_a*f->T3DLW$I-3l|2Z#hb
z4FL6lfh`vmq#JOet<=EB>jI@O4D1R8dPL|T7m7Vc_owz}EYiEtIFq$p?8|QC66J6q
zRbM-ZqFSsGMY@0W4iFk_8&Y7dR`0zUskPM~)m_z4WwBl4#pkpmN#%S+kavyhdv}KC
z4J^;9Ji!IJCInCw&o!{nlBBb5Clb!rj#0jzHW5H8T(*tm*D)#DxdQ#==0NnZhtpAM
zD<&p(3OJ%sV2(|9GI$o$E>S9?lV!cwq-liH<0JgN2B)a$l=f@=*o`gkN9Odd;}u>s
z7nJsUAT!N<>3hqRvLL7nA@M_4Rc;eeLPv}QvOtyPYu*#*>PIu$C4B>n?2&6B*JqZ*
z+dSTmp7Eke_k2!H)W5xk8NcQoFUB|Fnc$r9ayg6}%Y-{HjC_|z31{FD61#fXEO<$2
zF9fDqN0#Nu|4g-%;38*8tzyUHmY}t6!gxEKAtt@RR0IapwK<2TgzUZdFW$fJ3=pv6
zeewPgqr<V?c;p&l_@eB~E(bm5@G%f~dd5<?L_ZuKrG_DF3FtqpOxEtsO=S@9OghNW
zqSdb~>?qZ4Ow7+d`<0cEA*hmvLc|?I?9roAT5mnaE-fvc1sYGjQoSz{_VM9wK5R!m
z1+IxodV0jwRx?p8cjwyN`&DD_!A<VtMVc(d(_nmrfDUw2=bf(8h?tEk5l{>Qp9;DD
zIQp@0o5UPs`&>4+LxaKMxN*u5*v5qt>hN!<NPq*Uw7>h+zpr+2u&@o7&ukz&uGd2R
zW}~a2Q`y_Xx$0sKJQ+`cw0;a+xl>$Cd*`-WT$=|hH<LI83@rA+y}rR8+yQfXYOw_<
z*DRn-nQ|^phtuks<0tWWz~``$tzGn>1*R6ZZ>ud-x$Tzy*r%XAdNqkRb@pqLMTz+A
zR+8YZzNzb|=xC8@i+bO1i2nD(k?wcAEms@Ktl(DjeU`|$IPoQ~lV{b|b8%_z8!z@9
zjbfhF6dU|<X7?XREmyd9H{TR5&1LOY>?V<pgWhDEv|0#gwNub!A4wJQi<td@t;%6t
zx&gSAY%hNgG_I_F=#3pN+9P8%>O$EkW{y|`CMDV6rr-Db2`B%LtG57(a_#?zg#%J5
z4FXC@EG;Qrl1oW<mmu9CCDKSPodQdTbc0B@bb}zB(hcv0=lq`k%sa#A&M<S&a$oVS
zPuTri$zdfS(Zi{SmYcOoSzBgIlyMsDY~M&n#-*RsH&kr0^2P`R7Cu#YmGXSi@n^Aa
z9n6JM-r-*TXf+@*COPc}a<qZhBJU=<?Uc}o|L<Y0L;LP!T^Eui2&yOz>R&BK*hSeV
zFQ&{>Psk+|xt)#%A{y|Ly1Q~ULdq1_;S;|;2uLMqzVNhb3dt_^?YBJR(aSol98EB-
zalk90Fh+%w9oQkuahI-LU`9xYXBLlw5|yLd@P*HY#w=DjG-31^BTgURi)S$b>O6mO
z?=lT>VJ5+xpH;DtgCOSd%({h|X%A{1RUkTUkL6*4aN=A_$!R&B37oql9uRVLq&h?&
z*?@O1abGBzu7%M6dwLj~Bpz25B)W5jS|<K|=qE{1_s<vIIg_P5m_%%1JCSc>(CzbR
zFzzbyi-UxBCfC*=P)g04yVm<_p+WH!jm4go*18)UW{;?r5b>dhi9c{V!+$B@F^*q?
z$E?H*pf;XYhyIv?KXuv#fb;1IP{11jV1O+EF!&lo=B=TO11wMFsE#X)yPpkiizGsq
z#957=ef`5aS?+OaNzCIw^`N?_rTxPOM9E}$9OQF2#dOW5xP%(SG(O|jlViJqFM8@0
z5p5A7jNxETnD6ztlX4U6lL=V-LGwB30+6BP07_=+t`GQW2i~+<^R74zhYG-DxB~8a
zA`ToV<#o~Z@xwf#c1RbFaH%JycsZbIgMxB=zp*G?KbikmoVD)N@buNe!0!D0Nk(}|
zYf_46UqZS?;hZ*GA9b;l{rccIjON))iz1BfF6#zHNF>NRBYY2Zo1_ak^oqiiu~))`
zo51X!v*<a~e`#>XV}##`u2$iYKZ^YQSx3+b+83s#kPPE*%WDQ0u;?a;x{GH4-&mAj
z?C`y&0~3SBKhPoZ<z*HXQ-p=nM7!8bvaqtyIR3V?-{#EoY1zenW{&)6zx5)NT;E1~
z;0~n@hq(X1UZop+@eN}75CQfrq4Pl@pWr|}s5F^Mp_sE{4_5XE7+)WUMW-vXS&ULW
z;KtqqKnxW|9ZpBw3=nO#=!4Nwh2o%nm@1thSngEn(8ub>Deu9?z&DP1a{^BXLWHnf
zg^W_o)KmZ$p&}s3T4hBk->q8?B*|Bz9&sl4cgNHjb)w^Rb|skur5*x|2!PXw)fjC8
zJf?UnogbzjDfhEY3|jzWqBNe@c{@jw!)a&oOUIEI9*f7%Yi9Q!ij2W0Sm<QU#V<?^
z?*RyCiqZeX>2f}JB7g^JPVL*#s9w%+T2jb_^Ecx<U<k5xcYSdF%`&z*t)oJ`IKrmo
z(rj=WLP6mNP%PqPUMJYnEvz3emV;nvX>!1R>nC1K*Ko+k^^V!WizN2=&cDq|U*Suf
z=TXhpiG@GItSn+oa6iPZ+>0AivQ*Z8+IA{dd$M$`6)0-7wL$>fzjQ6kn5h|CCM3-p
zx1m!hV87Rhg7QGIiU@e77LxkvV|7E*Zu&&Jh0NIryWc`xH)J}0ToC>(=`)Mbg0+P#
z{$V+XaaxRnVg_puDSQ%RL@?gqnnex2nJ1KO*&^b|SYWzGoRadzvR+SYNb7~dTU#Mi
z;YE#%QoCq+Ge?-}fpSq%3=|J4l%s>)E%}fLkapkyh7|qc<mn@^UskHKQ%$aX&qMr{
ze+fn6nIYfsz`!_+q5(&OKK@p^gHvN~w%V)47;YDhSb^oa96&e<@KXzYz`VUbnN749
zx5GnXUZ)wbuwbdFyENu4SPVG!U#Kv;)kN@EjjXd#%2Sj0TuZ&TTwqS+fg^9fIq$69
zotF93)9IX?0BCCTt}bP15<Oe6X~@J%tp;CkKX>*I@q;`K!4QUeeV<!Zh0(Jiw7}+~
z%;g-gL{y%47Z`0r;@^%uuA>EAJ%qeo`V;wUJP)ak)iBjWw*NwV{u%P*)WCF*vh-2s
zCt02A2Y<C>Pkvp!h^(A?@HM|kb7MyN@?-5_b|Mh?JqO(qI<$@suY@?$o$J1JwcEUn
z2m^-ruVz%@g$+R^8ZqHpg@ru_&b9B{_-zFkpdB%b_;osP&{pliP_q$HPR7G?f#Vj%
zk7X#l%>GTbrhLcjEgE&;?k#KQz&Vs{l`wqZ@=`lpAt<-}NxuUTL}&h8*Mhh#D0<&~
zcn<o<f7>k`D1L6$PW6#_=LIozdR^W4FHFV%Jd#v=M1Da-Zp&4C#)PB9+*%`1PtBTx
zjtI$Vjx&t$AMHfSsY<BGkObfwjxgNvpHbxTEEOzozlod$MKz0btR*szr{H4(+!DM%
zInQ#E-ssW}0C{6I)Gjbc-q?)m*d(1N{Osoh{3>O<9#ZzK@6m704(c~Z2lv$dA7_y<
zK3h?fy~>YNR`M@t%%Lwsu^*wn&g+>jIg>cl`j*Oe(ri|oJa*0O*}6%<zx?YJ^Rn17
zP2X$#=1G9=DO&eW53@TNoW}OLiRN|fYVqK!CHAi)*qN<n2;q#aC1-96FI`*Jf+b}!
z#tv5^DK0)RrubS{LK&or^};<l0MfoV=f9auU%c>ok^URy-H_o*#5GYa8oIkza#dfn
z(=4obDDJ1Vpam<d>diW>8JaDcUB@Yw>+=h3r!G=MUW92qsnOS^S);5dJn9UES8JcJ
ztyc$$ik4P#bmXnFQtvUHk>O*9PrZ6;PA!!%Qew9A2TV2ejfJ4l6=sKZ7E_<NFc|49
zyMOry?tkkT2nymiJ35q}_h24FP6Or%qj2oud6kGPgL>F+mk89ux8E&TRmwLD{s6d#
z7S)J;K7_~TmTjXyt4_l=m`#-NWryVh>fMaB|AYIn*GQH)hvT|zUw{ADnOXFkWz(;d
z?*+06;{#l0K5L&5UxLf()FeZ)JY2KV>`P5h=o^vphUjMc?TMm1Yc&?=OGVP&bXJcj
z);^6d1iWpNB~xoVka$5g<8#&Arlauv%;H&&bAcBhx2MX=G{t+&p*pF(W;^#l1(g$?
zQjYmI121Q?-B>^Lj160LI)46H`FMTM%SH33lUU-+E#|0VDRLlg_m*=1f+{#he4hbr
z#aotf7&On3X8NTWa{%hTSoe3vFVtUt7kDhE%9c<rZJ&y~Gn(p5o&xcT;QszPH20ey
zf<96TO~YYz7_jt?vPmESw273p+|r+zK1VKXpEFFdD51yih^%V=LJhJ7z1wo6&v&xv
zI(e$<DB<Lg6hGA1c)XOvUSn;{)I&#Bp+m{aDN?Mv$fuh4{_fj}ch`>aP*@z@P*8}x
z9mivLmS+b*2=Uj8{V#ur{?|GT)<HbXcj{f;l3L}CdWFXveLcfil&(pqt7E$zr<#4J
zQC63XgMn0z7hOBER&vo77dBP@@o*Y=l-Cs0XvFuBQmi;gVOIElz&B}kDbK+Q!;6l-
zRlh;jM2!#du-)&>X=2<YS@;^_AX%&n5c5_~m_7S??(n;?pbK3`<=&*sf<|j6fJ|nQ
z!9H`43!F}8H%vj9PBp29^U0b)<5DAo!S;9TaEs!|dum!WlPm}aJw|6|qDI*!l(60v
zsGz%-)YTQtRhmcUZu0*QXW#rCDU=KJR_Co*$ctQ}9wRQI=HF*m_QuDEhk6LW^-QwI
zx#(a{G|?p;qW$j0#P~N3f@#yEf$|@Dv~FcDF?kfs=n_UT7q`WDQN>;&xDdu`9&|NM
zzZQp4>Q0;Gv=q*UdQ`Hl-D0Oj?EKbzoi#=VIEA6qPz(XNVojXX0>Y!ypi&MV#W)ss
z{RCp8<{v)mtyFmx9(g6cOsfAKTB9`I{T41_J&xeh`tKK@rAC?ZFhU$!r|aFQW2?w}
z+|2L_QH}X&_`yk>V&$Q_b+zS0J`meZ4(JDpv@-vogr<+VVud}wJIvi1p#*S@b|825
z9yF<7Fj$UtPK(7D&ECBF<;jT?4v&xR4#`f`mlhzZ3SCi>J=2W-(oMo?DOTYV!1`IK
zw)|spMo>zmhVU0+PN;Hl2#(ymMhJD_XM;I(HNnY^zPXDbfpZ;DKBd)KG3nxERzxa`
z{wT;Gp3-rA`j91SwbVVadVtuACqXR4u<4;mcemHZLWCJkx8_~f|FAYf8se=&jtd;l
z#d!CUsgz=flDBAm_c*d(jy$<}?>K8(t~{k5ua#_OmqG?sYxfAR^PZq#+0nimIZxY7
zsJQ>0A*pVrtF^kVsTL2&O!NIyowl$3pfL>Mj=t#KINpLrlxD^p%S=|LDmB<e7^#bE
z{?2vc`RFiw&(wIm9bNi~8^I5q5<E=@`6<cZ|0c0w2!3uK+w}>ieb(Hz4XMy+&~=}X
zAZRpvehR-26&s03ODm~AjJYMgLAF1SOHMA_%UDrR8zv+)sn7rwn*N}iBB&-UfDoBS
z%~Uo-&SdNa;N;!nSj~G<ZUMGB7l@dD&|I;UEghr)y*+0n2c}zAEo_)-lJNFZiQM9g
z9`lV(LjCcxyOD(f<cCzOEqpG+3Ua(vHdN6^C=Y=5NACBG`Yk%j^2FCabLB_|uigU^
zZL-qZ1Bcx#Rwt~V^-?EHX)N`CCL9CnkErl1zbS+|hcQdbcr1VWX|bKOk#y{jPK|gW
z+vEWYMr}tY;sce#bhRMG#~b8Wen?mHOS39mjpnZU>;M|m4;Q=6aNbEZG~6#L<#N?8
zFP{jb%a>o1VUmWoUSb|fel#~Ze0FBT+)kB&&wPYP^53xc1r8Plp+<eja))sbXs?Li
za-UvV{1nfw@l9;OT+s;lJ(B+tnU|Cv0ir8PO(<Fq>f&S@Af?bZFyQ1lx1g<>^@z0v
z-xhWa4()q@c^}W}EFgUC19<JATz>J;Twzw!bV>-&wbvhS@$yt}xD;E=C&Y*Yq^IPr
zh?gIR&O8K6da(AaZ_sdQegeMbCMeBMqn=jVM26l_1!K_IGEX((KZS?aHi*h$qL5t{
z2gh{2FlqNFPaGAJ{CmgO-;U}?{+LIa^;sUzk;e4^_<9b8SB~{SHFyljT7TVZG#<hw
zG|qa59Y!z{)cahF=lYjF=IHuh%2sjZP!_-q5>?uhBKf&_J)dOMNu^!N(JKC^kl)ro
z5LV_T*Zbq(SC1kh!?#|#P(nePN$Ns@5ZAku3;HQ%R05XH;6JtoNPX8Wm2)uK3*~+@
ztsC3J`y1ApFw;I_<47JE7+sQ$1=rw0B5Q~|0qM0uXvt^;&CZ;94PO|s5O3IeJsM0W
zd`qEvPkKCTX>q5|bx7oFy20c~r{pPveG!N0jluRntq!iOH9!YuOsN<-a-6^WmAR(8
z@dG)NFYcn5{=k#jTiSRPtlhsqZ8*V_G6_6Sr1h+Wq(6Skh@XyP(KNItc_I`>2}4G(
zKd*M#)51X(Cl@(0;&E6FHj~cMw(D8M{twk2#`mPwvLkLBl&U55kWqFM%y`;#H&I7+
zny`bGAArLx2J{%700$Ba+ohd~(e+dw$EpBB0Xd=2EC~Yt&a?^Y1Rc?p**<!3GpP!+
zjDAkgcyjZny<iW;$!4)HK&eYx7cR~6luWTMnB=uRo$Jx6wE7zpA%b#;24E04FNphh
zNsIo0^P@T2n3Pe?@b$iEf~TyB*2jA)a`!gVgyR0*umRlr+1m#d)(^)frTRMIOu7v!
z*SmoKKFg#HV?5oS7(3g>hBG0eBE1!!YwsLl?zF6+6gUhwf`>xX1{7PM9Gw_sUp5+a
zM4hvUF==q8;1fi$wB#6whLll<N;YW&>a~keNzK-?zPt0BN$d_mDMf0ko$a3jgvLk-
z7$dT>j4%jFeO+hifB8%v6$C~0oIE9TvOZJYfumWDShjFh4D9uu)_rv{HDY60b15V|
zQb*WfXqEjoR(1GxQ-s0YkvQSt_z%`q(ihlFT#_@1l7!+GT8k&fV07w@J~HzD`DnR+
zfj2T|q}js^m7Ee4lu8TL`eII~e{mdssG!{pO=#yQyzjPu(Y=n%%d)99gZ<6pU0C;Z
z<}PZAWxT-4LiG*g;zyr)Li$Uu_Zt<ItxmQl!T{$=3}^sw9b)oL>0v2N@EtV0T@i(v
z42l#hie0jq_Tf3k&pCt3tLn{mD-cO9RLp+do6ZENaXFxI@xj`|v)_C3=hxZwrO}95
zEX#$(?MkTaY*m37d<@8<TesVn^Z(S%(Cn4bf|xc<s3xC`{=vo|w0{bS3o;;ApI}UW
z0K0$L1LF2o0Onj<`uFOz`4YE6IbM%lgsbS?vnbv5i)jI(oa|`NT4VoPLJN>hL*gRl
z`Pd#Og@N{~yJ}6J7foq5u#-2NX{y7EZT+X>4W{TO(#nzFG{_Mwnu27En!!r1D}^Um
z4N5AON+p+?j0`*TNGlY-)zVd>1=~W3T<2W(es{j{qB%|<zj5!Sn!{2zdc;tZ#Vz%^
z4R@S!AD)1MYL$S-iOk-n#^D0Nq$+>U0nBNk^}t}ly?u-&Rfy5+Qwo;;F296~0%>mi
z&9k?-j_Eo}0D3y7LY$bO&`dLwV@{SLy(F6~Cxfoi(3Si1mGSk6rrKJ##oa8{O!$+E
z<F5uQjD1|)1;-Y(<t<12=G2N>?lnO*H@K)%0voN=QvcRZ5B~~-{!Fd$d|(0p(Fbn5
zA7HkLdMNVi73um~?$@)G57Or(K%YR(vPv%f7X2-`38GOid5Ts4+{p^%fK7t=LB&&G
z2Rw%N8uT`^K*boOq@?s?(m->PDQ&*hr#06%=GLl-E_A>j4IW+}VW*oA7^y+MH(wvy
zQIc4fj%%7yhGnJQ)^}~m_p|BUx&+=kqnS~lL{V$j1aCwOSQcd*ud$lRqLTrI*NZ@6
z#Q>}aosq|%bU7a1ddd-G<=8dV=_v}qb76^90hV$FDosIQfz020E*f*EkKjlE_(L_=
zOJIT`L6-O96Bkrr7GJ|OD%2d&Z_#y1u%!^XHm1-hOg)INvG{3uOM#^Rba-)eA$MAx
zc<h3`!FBPEnvKsYJAk{#Y>(+P=H0$uRw(UjY~F)#7wV)s>R(aR9*OAizYey9&%8hV
ztU1p3(+Cjj*PhYJWpD(|FFpFB<X6TeVg7Eg16mcX8|i$c#J_1`A7UnxRkNBY5W7C=
zs^fR|W>jM&(NE=#$X70_=Hpn+!0VJQS+(-5!KfS2S(de2rtj=I(fZ=4C?3PAVoN`1
z{;Pd`g+Jm-1^-?a>N8FtJ7VB|wzj#I5`cUASx4xyCxZV0+a$tbCcgO1s7Mm5S!sM9
zdk(;>+Z6O=4OSFuUoq?_Qa#2MrNfoIZR?2-jQnyN8(bl6ME<u8h?#?VXCj(JVqa^9
z-vrn0ji&)ka{z)@hbpu51<I4_!I+pu(Q3S54t`%?9Whjz3KOl1r;d@FlSZ1>LWF{G
z*+_J`aB3w(hlKI-yZ6<v*sopA)C+i3l+ss`9WI}4q2E^4F0}C`U%S^gN-3(KsQ>!4
zGQ)WONhVmqt^F85r<<wxr~RKFb^gwL;va7(Z<b3op09yqWG&&7)fAtm#BzgMIVe`W
zxozEcs3q#X0Vi2?{Id*xr)Gz>V<Y3nyU78P8Ov(L())hxIY<bUPq(YUPH5uC9!6lT
z<Mqpw%az=~CJME;;?+E%IMb6N)qJp<PhJ=Y+nG$Scbv^+m%e**Di_66{uCJ!L*Y4k
zNgJEiy+}5)RmY55Cg}qX_?GsNBHkS0-IJ>I$Ru4s%NrgnWZ#rgY6T_@{Q#wl<D66v
zHD%aiqOy<5LgproUc5`G#V3d^eDQ<NYLDEm)Q{~4oA_pfRedf|0=$<!yK~?0n?b5&
z-dTLJCJ>0~bcLmp{$%|!WDwWm=WB9{x{x9=p-?0?rb*&VaI81T#qm@_WIXjCd5&f%
z`&9E|B~Rd}fs45d?Z$oI#GXM>+A%39)aQV@!F^qpx7ViguFc)`NI%#t1YIB@Q7qYZ
zZ29HZ?CzHi9}+Ae*4XM<nspW6^8Y|Jzs4~yFAVLmF`cO;ZwquJS`_clD0(V!M4dn+
z*mM*imza9@Q~Q4qlP|t84ouW}dG84rN(raGKndFGXDY<+IrGSzWk6z+TlH=>uSELd
zAfxu!){Nkt>hFW@m0VR`f5Pq*ydAet217WMX*fxdq*5nx#Yrvi{lv%{Gaj|ws^3l8
z8vZJjeboh27+W?_!W7GcAqQNy!8)<l4hI)!oc<E7$pR|Kl*0$y(>sfrZ0L$fJhpmc
zo~Y6&`TmlawA8PAj!T#cX1y5o8ncZn<%|<vFHCHeV#?=gvb^|tM#pLxzG$(DL(EZK
z-YES!Gb!CQI+L14$Vy)UeVH45Ne%Sg1{+TB4>t1isT~~yHLcVBq2|fb{5Qcd(#k+m
zg=RykauixDji*|#QZ(FUUD>o7U11=0RjB+f_^PF$gvnN8DSgf%STs~6FI&oaotb#X
zB}GwQ=z!bntr3Q~rRPL$jn3<=2}S|k2yvA^A8_C`%gR%>3LyC0pFHIx2;W?-Xd*;b
zTMxHBnu#Lp(4lBEi+!VXR*!S>dExVdaZ_o&WBLYjQ}697kLzN)`p|9{g+=mYsN*N)
z%;{7Wf3%m+*@Lyp%?jBw2oFtcJgm5^M|Ln*6FXB?YZ8~Q%>}gw`STKp-UMqoEL|9D
z<D{zGe)IQedL4N;8LP2X@y#zb@Xx6e^ECpQkilAit@y5<^^w&%&(Ho}s1GS2pxF|b
z9lq{={t849Zqz@xs`CN*ZXJ=G3;C=DX^(_(J&?R7fkE8AXqssXELY49OG8I!em^cm
z{}^YjNcX;PYFMUae}>7h{Q6tW`D@u3Elt%UZvUO)l)98*8AO(|oxIX)IeVte#?j(c
zJn=EtI+N9$Ppc}ltsA!J#T~y0Z{*steEj<Cojb2y)@&NYyL2U)D61+w7Rx(EbbIxs
zcCsl^CZAJQV0osBbNx<NiSdUe6{x>YXRDXV<ZZj1pJF5E+QaEXAsq8CwPN9|u2<eU
zbr1}hfKcA(duQc_KUw$H^R1E!bhTFe<Z?WwDh&{DbIF$`XmMN_cvJFR;I^T^q}4{g
zePb?0Ykv<%<}nY;)3GV6PN$Jnm5xjJLC^?~z9O13+8FM!tQozi=qV^3A{sF(Q>0NE
z(hRZZjI>z7J)WM0Q&|bgD{$0~&F2w3&a;bN(NX722Rnj@bE)&grAu+AP3t}ZynDm*
z{bpaB5%szd_v5j5>5WT3iinQW;+gf7jClzDkd>><D<%c_dHt^3M?u|$PSzvT4#4$W
zl|%4A`Y(wm{}u4iPFOXh?#IBS>K62i!6&HoJIV$^+5hDP!s9sa!V05e`iiD*IENYF
zrP)t<5_6w^qiS`VNk5i>am*?*WJ=pVo{EF^hVHDn7R))8tE=KxamhgO$vV*6Ux+P#
zw{8sRi8jW-^=7s^Ak)ru3-`yTB>f%i#8YE%nEEEn{?CO#SAWJok%wA6<$4`1zinw1
zh!kE!YmlK^17pqe)_q}2(ep57tL*UrQ}91VV72#m6=q77pl5Kr$#8z<cT!U%e2KBI
z2;@&HS($zw1OAXd3d%daY@Cf4navOzz?v+5y4e<o&?4Ne)uFcwIhYWhn^-exB@$Fv
zmd2N3{`ar-^?NjbMP%^nFBjq?762rNVZP`I{pSJa)<$I<{80B`9U!e7`)R;vUpTLY
zc*8LL4|T5Xf{RH{9B(0^C_Jv}!ogKF-Jz+^KlOzjQM(Y9s<`;CmG_iYI0TOAvdxCn
zRkNYcYQIU;M=2oZv2J~?e6Qix_WLz0uRyn<#SAWwL+If8;R_Q}gbs90wT8q`0mxWB
zW-Y4<QjXbF#yA)Rv7?>64mER^$KS$^)!2tw!a`vRU)Hj=Qg!W2D;<o^VD2Z@&#y~}
z@tKtlpfxu2ZsY<c3Yo5#N9Dq^D~%dl{EnK{7m#c~KuS3N!vER!R_kVCMLoSy|8nz?
z=ui=*f*xcN-czcN-gErlOrIQ!1K}p>2{9467!QJ%cqjLh<553W%#Ht5g4hb=%MhO9
z!szD0`}9ymiIz*<(C4~s01_rr3Bx8La3G~l$+}UsJ1J{`=}e?=O#d#os8Pxh)HHEz
z8Ir(pp|7g^(P6P6u<rB5_4AsZBh4u7s@<$L*I;3Xp2qt*!}#CU%}dVteatR~ong0D
zreB$^iarRe;&tCouMyaq4LgLIeKAzWPYFsLlhCi!E}Q9ep25L$IqbB}bYm2&g!e1#
z^6olD8&7F;TE|+q*6Y^vU4#+lp0kmc=rS9dtGJu-ln7vU;-639F~%vfF)O30F#mM<
zC+>H-J3osi{gR5-V8z>oh7yQT+X}(av-{Jfs?|bgf*bu0k;B0$26#H(8q3N*CQ!*I
z!uPh6*9Fo!5?VanAZc;YB4;=<#2MBOOXHM5-9bM~o=n9#*-$5UhWKm{j=WOd7{#P~
zZ3*FoDi!3Vyk`CvGlyz(`-kG;n)LgQ>c%gIr1nIK9IG11U<;>OIVj>^04Nfae4jMf
zI%G+4<nFjvFCalv29Jgr9*SIza~moi;i)ttxR^|3Z~oA`w;Sem=Dd&Ks_1PN%kW!s
zsq_`gFueQ5MWQL3HT>GgEk!{CeE4RkaW%(CK4PBurfK2{LDVMu0LjQU+P@zrEjcj4
zhkD%l)2FH(L`5B#_&(U74+cb#F{CEPh?W)BAgQh3F_c=NU7`#+1cVwlH20iQ(8((@
z&uDsf^u1c+tJH}ei*DEU+kMBAQ9CD1PTSA-lC%#J)P9II<)Eb80)v#Q<mEYpP3^v^
z7`4nTHK7;e<uB$9oi?JDJ_s))H833~Rx|M`rrql9#MJ!=8>W_UHVEi{q-Lped~Uz3
zsP|9(EB{fvxCrDdJG!m0h#Vx#whg_5M#oV|Y+NvMF*mWI-fv;P<yTn18gBV*Q1ZX8
z`T?Ps_z1>q`sDe9<O_r%Qv&UdQ=FH874mPGLQVDca9~~YnG1|bhQAM($74R4818Rx
zswhM*HyFWZkIBr~F`ZVAp<(F0*`w{XKk_PhafcQnEI%~(jDB&cz2e5_z}n|CJJr<_
zp7z<h9z-0kK=D_zHbw{`g6%)%=0asyLImGtLW@%8#%j%5YXdFecuE<13zpZ#9DVF$
zqyyKrkduSfNX1BVhn^x)whlx3bF+7$s?D6EoAfPVQz%+2{miE7URko5>m^SZ?0L)Q
zc^z9;a;Pv#hyw0@os4O?WN4C3{k^wXkN~h*&y4stzz0H@5E!`<;>r6YNe~!h2sL&8
zlfQf9mq<a5*<pYBPG?m|>XIZahu~X_Sj)CMktzVg%55pd?8mdLbY-AWZ&(U4=+?>S
zIf@K?eEmAAf-3V_>%M3JJ~QiWV+=mCdikQmc5kc`CxgT@_kVcJURN8c8z}P#K}dfG
zy$hm`+YGb!&xe=qAO2Ioq|Om12PYjLdtl0ea;clFk6fnpQ;lZw+&2%Mjek6dc%b!%
z{#!#aReSU8EnOeu4`SYD&vbF@>A+QYRSpm%ov2iv`_)~S7QG&Q|3AhPxdvjn<(9)U
zf4Au0!!LsDIc&_dLdr-SpGfvRm88zk)<Ju&u6h#q_2anozv&BjNs1D`uGG1vi0kIO
z*NV+Hn$3l3ZX^mns>bwthyx&iseUCqOmBT!61h0#GIBK5WlNg-lW;MP0kxQCiYR;a
zVXq(;J*PoH*LuvMjp4brf7eOgY_Xkt!k6#fW(jupUM3KDdFtH3W)tEzuQkGLTMD9X
zyE~@f>)>>6%V#g6JP<96R~Tn|9NMk8Jp6PMRX4QXB+M1{I#1BkXNf4U$9y28@{SV7
zz^B2a$s6G+^_G{`*9~G^jNAHZ?Boum_K5DTh-2p}pFL$P9ou*Mxj<QW%RTr@K$q81
zo5am3r9O@3hSu=ISLnTz(&pRf5&!K>@nWY-%qeHOya|SPo27K@3a63@?br`vbE6~>
z3p<Z?jjX_nS*H56gUyk;2Xoa`u=hVt;P5~r6Jp>=c&i?fK~gH?NL+LSZKoRTXI_2Q
z&^*{Rb<53+p7R^n0&SRcW`QO@3gt7vc>H^+P?G>avhy)PQc{x0{bFz)$_l`Hn7fys
zq&G-6`XtT%$xS{R3!+svk{g_+Cch9>Z&`5ebJ+hSFoY>5)HP>UtIW)_sKJ-nK#n(s
zw6fK5{AQ$`Ps!+%z-e;)a`9aGP#wkA!PYh&x&=jencnoyGifLrReO=Y?R@`8()`i|
zzZ)=Zm17Iwme~S;6`8RfO4aIslh&N=+j3fM{)8cVG)Bd8*=aPApR3iACuy$D*$sfr
zffv^JV?;l!?_Pwc)IEPQqeE@kaw3y4=;6{eN_Uyu+xhk>Tv>~{aIy!IV@7?K)W7wU
zQL1ojLQQ(-+a}$>%MT*4>=|=FpuZ&Cg(g)n{9LJwV0yE9icUjCh}Z4q=}(!P2!2y~
zflNEJCzLr(h!3KP3cs%so1@V!j5pB<{nWE=rdOWkK%jUe&X>*%UruSV^AdKke><0D
z#C$9dJS3X7+<1=RR?=;5X!0<?5PpIc%y3@K`da@vNO6Qav4d6N{SFdZLBG)=*2T}7
zQ{y|aDFo3Hg)l|i2AM=Z8nxyB_q9WWoA|Ld=29;UOgv*3LKdK*F?CxSm>$QqLFj=6
zzm+$gyzgng)@)QIFNBmQh4;VI91+QCFWZP^I&@i!`O%pk5RHhqi1rb{ioW3B;Bb9;
z$yc)sK(uS(ET7q^?6jzpdkJVU-=n9qS)r&(GW)q*mWIs}2@n78Vs63?Uh6b-$LO{6
zTOh&6=|(}g6;S*A-n|+b0m&%n`_nh`!oVicWA_Q_E5I#%gusGy$6m4LciBEBStu?)
zGi=MOUBs<^=XD!<gYkz~E&WBdG1+koBXY4ybjIf+T;J@o5`6$bW{RxcAed=qkBF}P
zWd_$?mK_p#<d=y$y?17zlgZ1V*D=8=N`%X8pJ<c9oiYitXU`}7iz@jpEIc2!#oYCz
zmm6>)z}|k19c&5@v&gSQLipkvNBQhrgii2<(ESa&Tnf#HCzEZxW#j;T6X&vb)Pi~V
zFq#81jLOnS1tPc*Mn<Zjc5Ek}2aZJWOT@!7VSW&=8#kWHkgewY;OYx$%A01IHkyeH
zkhdE6`gSZrc`=BV?%p#i>(veP8G{BD!L7K~gQ`eRA1SjE0Rqw|sz1QXR=*I^_l3IC
zCT54c$k#0jDP2QW(w`CW@cAn&+a+gKF0bd${!8*ZIQ%r6SqR2r1KZ+4edFL5FlM-L
za&j^hlmE{nAmteeCf^5&V_jcJEKW8@l5u<&tld^+egKh0FM1nB3?iI!0G6h+W^?OS
z{f{hhLH*6Ztx?D8qz_&}CetSTFx1}8y;Hf+6>?aAvtETKCn3!%4Lyon9nh8%A3}yI
z9NoG2>d{4z8;JP&_)PO$@+eO_rY;K7(xTXkB!2&e^Y^Wjd!PZz;K|TS55RzW*Mel^
zT&_u)+q&-d;c`XngQD>FzEN>U4jg2!$R6g&d`17{H@5QnUZ-)U_tULBre<a_FG_wa
z`P|C^ynYVA-)Nhnz~Vu}7&t0FlOhoP8z(UVQ<poQEcDvkc5CSP`OouOh%#({NE$lT
zs!g|WJO<k@)D2s>(2@Dg57Aj!u7Omtf}Z4&OO1ntwUuxw^+IBxBgozU-~^u@H2fJA
zwSIW&-=8&Uay7s`D|LCn<f6c&(Da@G5lw?m*XUFb^5*8jd`<!04-`t@YS-Gp0mRhv
z!~#qo#)1D=p;}3@q+TXkcSJ9NJnFv>I}|qf&M?<7jd$HQKvYk{uD{^m%pXo<9Ik_C
zHR$=6L4&k<`)iI&iz?}V>V?AO#1HR|V~F_b33zu8bj@PQ{OrL{DuXpW#>_XS3>9P1
zi!~5kFs|Ad83Z%RD2=Z`(>w&YC@TPUR%9^j=$Df}bEpTX+J(_@!Us&VHE8TO>P~`T
zm~s3L|8sah0GLb9)mUfe%B74xK+y`@r$Ty;bL*Ujcq$Ys%`YB+&A=RS_PM)Xr(9-!
zzLcYvTBD0~%w5Xy0#NdbI!x}r*!eGKDOV6)`o%#|qB|{bT5$tkn@-<7!(^+hEpdQE
z_^=R9x>$=3?Ed2q+2iJ;_v8sWqR;ZG(kw^9!NsL@z%2CK5<iW(;cHrbMFc1Wct}Q<
zjiq=R@4$z2-@?K%WACWv<ftm-2zs@|ihhILvSXmg8&EMKN5Jug%lKw~!rmE$V}Rk5
z*K7-uOJf};%d-cGGXQ;ywBjOh-kVi*WHnQ32-dCu6o4J^D&zkj-niWJ*%g5AZUN)x
z?9MPkWgu6_!Q=fi3m}C@zP=3tGSa?<Dg`TrlG|X(9d)NF4*CJaRyqPSR<nS$^`TZ;
z8Udq*aEW%KJJQINBj6l-n0#}8HPu>W-}}6SUh>n)IWX4C0!MPJq;?V7dV5BqTA~#P
z#50941x~OQ?7LrTCE5S(8UfnV1%~Z`RbJOFB|^J%X_ga(YugE_NW6N@QB?;;J`Jgt
zb*=|`a&4SI)q5pC!?9*Zi!sJz9}x|hX=lUm4x8`a=Kh}+z-I8@oouR*d2mvq{>?^P
zC#)V(2v5gK-TJ8W9R)N&g=9gC?n|M=b;U&lmLw9e+8zrG;+r3i3aXqhiUf9G1VRoy
zkNtenZUjqJl^bK`^`)h@OY+4Bd$;n7@oQ#|Rj8Nws`Gz&1tqs;p5<6i7ttFOVu6$u
zDEKh8{KW04qxcKgZ^HrnHw~kxNS6mvbK8?pB{4=3F;*iL1E4<!hDhUDyTJS|H~;k;
zmOgB~U^^L7A@8Q#?;60%+w5hQc}3W>hW&<}Di<m_JQRSz!)U%(39ABV)rV1^vtn)b
z$%Bz#-ovCAcv!FZ;UWi*m%AUE4aRiEwb2UQ{P_p?L~2u^qt8@!y%4<k^^aDw2WRui
zsEkt^GU59fAWNIcP}ZcRY{d`Mex1N<dMx54AB2-Lu7g<eY6ycRnXOktKl3+d{Na>P
zc5!MLC0rpxQ0tn_Okbqfx}vhuE2QqI;}aT`knmY5kd4I(jzTf|JE4Wf&_oVVDmY5z
zasu6(8d^n`v*VdKIYms~7gb=Edo@ZT)hL&=8=qGQyP8rLnq2TGilZ({$@gaUCU<s*
zI9-(URLLoT6i<}md#hKvIXvU8rZ-5ZJ1)?p>O(Ifk9=!KRFE<jYRGp##VvE%oLtjr
z=+xYYyt!R-4td2<%a|CA8^mxew~Azs-CvgxLaw#tw|9G+Z*hn26-GjgHLG*i@355o
zvGpLLEjyH{I1T?tQGU2_GMx%gx8GM%qG>So1t;KqR>Jy)j4U@oj*eG3x(1{<$1Mpu
zJAmNX1K3=p(oBv?j*&$^oxeb{;(3Y>fLt>z*vTbxge<!KemaQ$7LVBPaXAzl0}lWl
z`^-#CegYXnX*{a2yhd$En1fi?1J<$v+($e>SvwqPXTM6&!g^z=l&9g-Q)deltp&n>
z((U`B)n2XiD$^=dbknB0s}0M#)zx@yo3DU8k<sDMt)H)5%iIib_;(#qe`#=fXyGJK
zFDv@y9(~MHcl&n)*ZeSMMn~#0HHGzBigi4kW)sKJBs7;~E^pLZ^A$#qAB7HaupqoR
zQsX5f!kpVnx`}y#5FGSp9P2xh8s4R<M8>l~G$ZjTVZtB6dx(6{XeTPlh6wgALK>U(
zC7}g4RwEDHs+}zm<_6{k1a;>XAIj4iV<8TdLiQ^iwVy1JevliG+u7-)e6U87KE~ay
zk*<1E_qx|i;g?X^zKC?87*y$PMaCkebEb}g=d85%-MP<LP`}w^YL#huBS+~&x<&5x
zpoYND4tq#H6cBHWD1h|_K0vs@Mx6Rvn6q+z`tPPy_yB1|oIpZS4nkuFwc?-RRn78Z
zM6aME#;^5v+_UndZuMRE=L={r1-yZA`w0MPi~{hv1sy}^^YVPwAV3muQ>_c$1?;i~
zJZ6tea2%9?IxVQEJ@kN-doZEtCa{5@h^;b){63u$GXCeUyfR*b^+UKj6cHv3J`Gw4
zeSSD(Ty?T)XxSUj5T9bd0A`?-N@fOnf~U#cvFTn`G+yi$TugNDwung>fVE0?W${(B
zfyPxRn4|LXsWj|sR5Q<=Rr%P))^ZK3Y0}!%&TT$b`zc&x73jZ{i8>EJ>;=0nC(_MW
zK9Q~8m#X<rKJfHa=j>QevaFqacp*am9MQ+bnbcB7N*6XN$j(%5KSh;NGW{KD_@Q;B
z=UT^N!vRk`Gj`#c=ACGESZvJPnM@_!*?CTc!EE^Bnb5n^u*VXcpgaB-tO~MmED&ir
z?IervCBNT@CY~Vmb-x}6+TEet2jaDTao`{{(y1^v?y_0yCjdb_OQWnDZ0gWf^3r+}
z1-zNpdUy~cCfxVab@tioqOxZu<>(dl%@7e2{Wzc|mmrnGwUR4vk*R&C5r@|q+j8Cy
z!m$N)795#vQTVzRh{Ab*b)o|dupF<yIUAsP-Z;PezK8dR=D5Iu;AzXZ@FO1gOr8&Y
zR#(T-6je~^oMY|-bH^6%W;?elqrN_^7)AE%NomSFRg#8cZ!66ZiNoYo!jE2}3K(nL
zX5<rc@A98A1iQ$k^{esI^JqpJ6Orypo+{xKIDZutI+|`Vrzl#Y;BbP#$7wCQCvsjV
zz8v~UAA^pI?;r~VotPLTjRLTL39NT06^clsJo~fgfLqM_YHOlMJz==sYJD;@Xa1F1
zh15R?wn!a6#O+sGljiMF>3Z{``;yJv4JDnAGSXuR6U2J6;D?GBF)SM&tH6j!iT5e|
zrfj3R&-vx2&agM5QUH#M`WaNZ$f9U$4~dm9-VQXu(=CR*-!4l#&Tt{W!3q_|A5y2f
zZ(7hH_n6QdHF$@<To^mRuWl3v9@df(rl$g99u{J;84PO&K!NwbX7vg_cL?_R1MDD~
z$C2hi^qzqIfn@|E*zIbUo16;R-Ka1dB#s%}XhwFI!tYN7h-`OUH135>u7Sd+RqazD
zo!^`_bfJYHZd;TNJh+<!ctjxU+zwBp7)3NDRj?{1jK?qP(-T0=^UL=pSKB01GpgfQ
zGY(X0M2dSnXEySWX0d^N^gy4(`nyTn;^}B)>kPwP^6d_v?Xo*7Oy`5iU<vIa!H;Sp
z*=$=UIaD68OO$1UOQJ=wcnzn^6e?*uSMja!cdz3~(?mFN$2OhgOZqByVuO<f*kD79
zPNPC(DRMR>g^@2s7cNjP*>#3#b-H_@V)w&~?%yR9t$l86q-oO@qZf<vQ9lJ6y+zt0
zH~4xfc1VXlDk}MB1|NZJR9gk_U5C(-u*U&JE}Dk3DypUNeu;>!--Nw!X2z;wPe(8z
zss42SVAsqXz^vvq<FQ1vj{eENe-q&!XMJ36j`ph_j_BPT)EB#8R_V^oC~aFPExu*P
z!1V${=uYNR1@qLreF3td{!Er!0kJA5OpmMyl0zzcD*Dx~*ZXmN`e%FQmm}!S1Mkvr
zlwL8IY)Xj`;?RBCad{@a)aryiYxRr8v+3f*HqFWWLWt%&QsN_A14P;H`e+x8qi|H4
z)SKO{vh%2J_?Pbsy~&NI#khQ~u@|xo2ufAsrMlX{)LYWwRSm#avN~=4d@&d``kBi~
z$Z<Wq%3i?QfW=m{jL{9qejWlWYY4M$gOq3Xc)ntL@?J5UaH+u$OJSw6X-5``HNeQa
z55}Q}_Nb!YZVnyx{m^f8kpmADk<Ad)#qtMts?>QG5pplcFU0|GXD#;yX)D+(s&76#
zW3Z?B`-Kq@WGMv`4P(yUM?+1;IX##**V6@ihr{TMTIyCa3oX^|T_|68b`HEY4Yt{t
z3M&q}DkSJ<P(%snCuV%1&t{W;A<WEQT$cYl3m1cRs$N1~#uvGSTJ$EoerQ<Hr6yG{
zE8)EvM=1U`ZwoHx5N?GnTPFhh$7zX4rUa3)skB3i#4su4S0_fg$>L2{i~RP<qug$o
z2-zbSf;rI(pZ9(UGU@ZhL>zU86S1S3W?qa1m`)w+=0DqDMM&~x<)CcSsF05m%*t0?
zrd*RIU~(HAtg(uYV>yb}^P`p_yi|R@cCu_wneWt0z7X&h?Y%$aH}2O6V7WMOqZXzr
zMwQulOTpR+`(pobh4+ii4@)DsD7e=}pv`*`)@w)LJGyGF?u4H?Q|#a?wM2Dd)bCe3
zHMteNE`A6FpLDFE8<|Wq1r+`~qNT%-+KO2sXZTd8S`@#Oi=NZc*v)_uB2CKbva4|j
zsGLw~P(%_OwBENo7yMYK)3IN!Q}18|ke@EcJpO2}5e{g76|r<oZ{A7YKr&JquqK&N
z_sn+Dg=jyezc$gUF$sVrj{a^LC=b)KKB<l8dq*#S6NE>OOCY`9T%}SS|8hb5q)U9%
zE#OB0TiyV6p2R7(jG?&kEi3ye0gKTSTE)piyRQ|vguzVNSD}%jZ{g-+EieLX8ZG03
zD_+fIQHz6N!dgR-N~Cy>6O@}$cZjLO9|(d*$#~$SG$~XK{C93&<hN!;e9QeVIROFw
z)hN3;ttCq9SkTMyWkwou=7?ftBC^tFfoyW00%l<Ts9u|!?NmM&PB|uRLcCCs+35iE
z1v<as!c}kC$Qnk%XU>^5GO~c3CvUiPyQJ>3^D%*2X6V0D<cr2F<EwtbCllI#iGfhr
zCw$?ViP#xOH9HjmV@`Qv8;?h8X&p6zMyLC}48QtTrw3m{LnF~-ElW+G>C5`-O;aT%
zj`u%>w^=wj^_g!bl3iqy8GbwG{s!%pkT**o$Fxp>ws9<@hkW|U<@tx5#EZ}1{ohjx
zUnd)6Ut_Yf9W;!Ejt4v=@GzEEmZE$9c~c*YqOIFwfd2+tB3+<zGqJ5_am4}#u`GII
zuOmaP&XWG)pN<52p8A**-VwFL4j)HTatZ;oErP?u)S!x-V(FN&63H;BQEf509nK6l
zWWVC#Vp&v50hyH<V`e;B?KJH0KE=!3iuGNW)Pg=!#6({aBv*EVqSC1?$Ff-1{iD&t
zq57t3JTLyiQE!$bMF-A672fR;f@+JO0Zy0(Rs2q1M<0p!GU`nCQIkEYrw1n+;jJ%i
z%0KIB$HJ9p-$vn&gwRd4DjYZD%R$Mfb6wxF&<9NbWn=-pt`raA6p6#(i*5$>(qMo$
zUYE)aySQ*`^o)AvqgtqfP4>~?^l@xa3oWh=Q5pNuX!E;3XdHbA;GfDoAr>Q*e>hOT
z4v>I(b6u0>z~`%vi>m=Es0$fG>@MQQT!+?c%|Gb(4P;-&2uV51HD<luJ4K`(noglJ
z@}-=qRu0P#DB1II&1FL>3JH2T@ufVs6sz`Upu3bl_FHSi!z-E>T4VKW223I(vEsWb
zxrg-|XtL@2m(THNm<;Pj55RevJn@IF6_8Hkx$p}S$A6zg+YE0T-puX~GBO=`sctwU
z=|CcZj<<uoaY7h9n$sH2oq)$WBgvarF=^~n|1OQS=gur>@tFig2Fu+fUB`y<|GUg1
z(ej6_t`2?O-y<kIDkH1^k%nwkAHyTU;CHmvkKvF|4eG(e1D|{CYTo)%%6=F=gPK%I
z(;NY1t#MVsO_q3=7@+^cD-v_CKxZIE$2b~4+&YujqzxqIXwq$>mlY()y!XI7TpawG
zYUbDI+gXkD(Sn5l3_|F2f0a$zhoP`gJU2Vjrc!kAcoxttPw1MjG*?hhS@w|i1zWZj
z*O#vZ`-1fuF`s(W2Ms!;t+#i`)H10b<s@&chjG>iM*28BH<mr3eR=wnqL)_)t8AZc
z1~E!iM4jP8#C8wjFKMywrDjG@$b9$iOv=PvXB<o8QQ+%5rKmlCe4$~)lBDP+tz)@f
zRV$Qw{W;XBM*!;-Dxb&_KUXl^OG@<`$lWW!6vy#^N_1=>y1qqWN5DVHzC$spa%2Qy
zLLYNvDW0O=;fyGvAMr$>9^EmyM!-b3RL<n@(h66Np7;#>SP_fo>F2xMkJ)?y;^!Yr
z#NEl^B3n$1ZlpwCLTm#WLY4+1M(9_p@7gk@-<SM#q;wR>6AURRD4xAhr`?Io$)QpN
zq%576z&TDDil8(b^(hw!1!oy`hOPipBy6|uxIkriST)aKt<MPXt+RkW^~#7OS=?C+
z1^mm4hm!B?V2P#1c8NKZfQeqJP1Lg=C|pQb*18?8IE;u2HL&Ta2RuPX$I<t`gs-}U
zUpPL*cptQ%809xgyL;K6N59gpBV+`M_k}?<hcg<F5NNQ1DEA6Eb!pcbiKqY6ozAwQ
z!RX4!<0Y)J!V?2;aO|NjTIuM71sG`;WBG*mFP-uRc#SxM=Y{Mbhw|pVX^EH3V)pG?
zZIlMhuS$VPO(Y17*i2^k8&PgJ7Du~<)sPHu+_1Qj4C-+#Mo^Z@4cfj@%MiF<<OF{u
zr!&FZa8)|UVK_R_Xfx67$&?#spd>t9%VxQqyks+vPXl}A-`)LF-j_+Yiv2fni=41E
zqJJE-O;a8SMg5hW75rb{v+$peE3bg&IS#12Vi<J+m;8OTJcSH?^=q4XZdSWr!uB^8
z_P}64&ag;|0=Xa1q&mTirbd=^bF0faLF<g@sxbu4e>KuPr7zcnZcoNOWi<;kk6QUp
z9`bHy%(}pqK-1EG`3JH#_C@l;m->Z``0dZ8Z@~0h+eLyBAz3^gt|hIEKd&_(H{B0-
zrkLS{gm~koU^f{JeG5PSoS8T}kE(83h4Moy?#}Th%HWw3eqYQ^L55ZTV@av4qScdS
zFY%|lj=E0>1aH?k1lb<`>V2F{bwV+A%Br!^Xtp&+-cjm`uVrx&PD~kNhw_=9oV@c8
zVU07Km6ex&lR3MXM+X}#RQZpXxjfHy9Ui>avc0a)Zn3^UciWT|{$s-KgNpdYHyMu6
z>g2YZ=Q|%9y~9+Y$NKz<yfkPcS&RSzfet#M=~{3ws>pTPLp;i-(}9^{9M8uc66AiI
zCj^&ti}-iKoB6lPM@TvgveGiR3ZlzKQ#YkU%j<E4RwX1JOs7*0GIy%yvLf>#%gy4|
ztX>_SGx3o6hB^=JkAc#CHBmf4_0;u{_VyElHKg?mcOweoV4?}hCb-pm6aB-wMaswX
z@`SHSw#;_jN^nniPd_kq(}Jr5i4>vvE}oekKNL?XK1y_LNYP#10uL^rT@e`*#tN}m
z&CbcE2z#zH|1)}4Z0_#SIHC8>jURiYAKJw$KVXwo$WP!}BLqYT^fm6>#{@o+xV|(h
zy)-l1mvap<rF(*SXSpS+kh6h(yJA@JS`ZasJScgTdMZ-n(c!PJe-{Isb~^??#b)uh
z0`tDoU$#GEI|i(M{W+bn(2Jnq%ztm=s)&f5aKOlG|M+pxQsfo`<vZr?1AB+S_lFss
zE9m3<3a<|KX8H854XUvc@?V+*S>54+e@KH<L@y6rq&{<hMnAS@@QQ}SNS@(DJ`Xp-
z#$$R*F`N-q^Eg5%(XhbZm{a^^{~F$**ofU>?`De;+2}b%E4o^idGbIO=ND;aKKFF)
z$6P6*?VV)YxyuNXpX10+UZi}!cgcl^Nh>Ev4OcnfKWr;32$EBm@j9$X?y1Q1a1<W%
zMK;|_)h?DY^4Lfv>?;{cl$-LnyQ*KoTmj;?PJMFOI%$!AukIw@-ImEICcSTDR^xy*
z8k#QTBUq)^?9qF4p(7xl!c_tVM{h{AK#4kNt)^)2(pbQi?VMn7KfZlo@ACIXPeRvf
z8I<tqm3mi#4N||>dn8yHnL{4WqQa`!zuW!cK2=3b|40!C@7A*`ItX6K>0F^aVvS8?
zKe=$QRZSDD8<UgS?q)#Ykp4O!7Svf)w{6sW9kvhAfea=I=BH{se<K-QD<|oRVbH0B
z8A_;Cl}-G9%+rL!yEXx0`Ykvs(0#=YaWbL^pN9JBsQ&Jf{Aa>11e{7^ophWZ(xaQj
zNeX?a#9?q_Mim~4&HzCgd6hBb5TAg6;7%zUY4?W<AC(a?zBn9zoy2^bh*BmfrX_$;
zHRqLK&t#hvy;j4}WPw!o?#B<ry4o08K#zZmLq=^Koo=T4{&>|kEi&8qFiH43ZX#d`
zd>zS_B=eiIpD^bs@dZuscPM=+JH<}r3PWiF%>lKIYj&=lX9tfr@E#-hs5am|=G$+7
zgO-1rxvrtPQ(y=$zE;X9$=E8`cRV2(F)T*4m}ZtF*8jh&5@LsVm1&;{*(E5z@M^vu
zXveJ_3L|32wKNZ2_{K>0{f~q>cJ5dh$~_N6Vs&CL2YH1I_BGKwv0f=^<$YxHr*Ejl
z@yXOzG&JbSE<~4ZSj^pvXkQ@H_H(geV@Y6<_z_}zG_quPFH6gvUO}`CjwOi+9k3-t
z1QV)XA62kjN}0kooW2;anuiqU#G1uYr{Og`ygLKLWVDNhfG~&eKwIfu%GTDFNMFsR
zPMU9Hq9=x}Yya~8+4uos9svPDS)vtlahrpX>?5QmwXGknLH|Vo-$vAntc>eYebU0n
zkdD$0VOl5Z%%}^h{BZ6`$1cBy{S~9Kw0t|(ZYu)Y!FeXOy*RpHC)uwO#>}dx?)+3L
z{GX-A<d8>LYI{MnKF5WyF8`F?REbX+I_Uo??OBod%?G2#FYEn~U|u#9Z^pt&MWTSI
zO4`~}$ghd|q;sjENwSmrVV80tf?->@R=tDiAqeP=7*nj8Hg6PoNMn1cfSC{BIp`D;
zG&=7PaBRDyYy+Iy8a1<iOL5`!SFVC)AY2rXU+n@6f`fti#n-y#t3F<>wf03OBfzg^
z?T{pm)Qi-8qWffg5j@Jc<^Jv(Cw+w}%o_|T|G;Gu;wjyO!?f(qT0F<<#=F`qkl%hU
z5JE>D<E-sN;ujU(ixeVSeF*sv$rcD9{8HaD3331Ur~ZCJbe)N`ghzxd$dl+H*TnBE
zzI=cjY)!m~IT6x<-P}PUg3(cfBpdncfNAAWVgZ9HU17vfh~(MB-OK1#Tm?hwpD-RK
zHex{DY0oN|WFUBN@-?;V-m;7<W<{$no6X^Mo-SdMdZcMGn-cEoe-W$-=MQ16H!%C6
zyBd{Y0d3UX8D!{Vn4s14Hx!U)Ubp8fW?M4p)O{l@W@mf7RT?x(=>QlqDU0qKFLzor
z9uWCqy_Yz(woA{pMzXuvd0(dMVJCB0du#&=+*L-!9++j-p3T=glEgt2S0wE0zM*;c
zbqE~~uuBSpG<88lL}XHEldApCa(fC8EBGJ|Y&=P)%Z(*@=$G;oGX5E_RqA>$|3$#V
zs|Nl%@lP&8Gxt)KfV1rE|3}qVhE>&WYro<`x=W<HK}w{N?vm~nB&EBQ?(UH8?iT3|
z1*Ac`SxDFS;Jf!Z-}#G|u-2SUjB$@UK)qZDf_vUPfKk*(`57$AS@86808Sq<3Q~dJ
z7R1X4xTpy<-vLz2ax|55f4Qir=*QgLTs+{0^iMW@MHgg9QX)QNEo93J25LSXtSZKk
z>P7P2(cS67-!0CEEw&({RIT*Xf3-Y5W~};Bb`HcRPGDw5%pu~?Q~Fu<fEo4oPb?Sj
z`o4qe5%<I9Nx`RIk#Utsng1~8B4)B)uIfVCK+<^IxzM|q=U{U^y>h=h%#(V&^|9Iu
z^={;~-wnUtAPqy@Z1EQOT=YDI*My-_X*7wEJ4_F3FGYPE^jR&C%@nK0&rRnxA8tHC
z&0jPc;~x>yH1>k+VT(a4uz3<POtF71$$RZ1BT_0rT!I!E{UZBwU*aFSsTgI+M&W6v
znn>8|*lkw&AcL_crm4uvKo_?6XdmB8|M~prdDu9H7JY`EEF^^P+bg~UI@nuuY_ct2
z-Ee*j`my}UuI-VO_qftw4dC7I>VuYIA-(}5XmdI;f-cT><MBs>^#X&VH9@AwQ8TV0
zxu`Mb!vd(>WNvX|O{IZHKCvI$<^zb6wN{;xl954r#pi05yR)<N2aLn{m5SrP+Ef01
zufTu)8XLP2D62Dqjj7`qS^!Mn;C}v7{rLOo23SS&PoHaHqRs7WU*!W>pPS2`XQTa+
z-Uosrw`f|Q)vifh&cN_3K7eXKnwB)Sh^%I-OMtt+%0(t@y*F&7?oDPtLcGG)=P!`W
z+07@imq4VE4nr&H=P9}p;5&5KwSN;$CPJzH9ky4S&Ikk-kwk`t$$V?qB6k4Qh!93q
z*)!8<l^^)rZPq$NtWdW#bc1}EHM=dnwjf`pb6;Uf)kfk_PXW{a+0ls6LPni8K28sJ
zcB9z_!x|Z4vnW_W?jtyvJZ4g9s=iC^V$)dLAw=k3$l;zUXCY>oUTQ*(PgHBm?<^$L
zy@|B0S-+x1bmSU0=eooUX;;l6sj(W5CmyI1O*Kt$M{fN|BfsCM+{OL;$)Ng6JA=Jb
z7sB7u1O;+up@8S{J<wDHoB9T=^K#iEDBkqu;?Uy)<h~v^r?7ueyWYu-x?ngX%%GvB
zPW%G%m9XqXq9L2046}BDY>FeT_RNyc<Gn6xv5#Ug{51$kF*2}Fg4lfXjvHo(qBL%_
zSwfXhe!Ui)E|2??#Pj6a1<>AA!fAfE1083n6h6gvCm3md$aduxH0}#8sjB$JP-vMD
zH6*gDh-1^6ID|;~F$^1%QUO>eOiw6yHQ%>Ig8o+abHFg0=2x;L+o`<4QSe8-S7NDT
zJ`PFaagR?b?YqWX$H>%v4?;`wQypUiu+p`ot=)<X-SC#vk=*Yg8MQH%moR+4bYAN;
zs~%Hjo9&WUBAn?JYqI^@mKeai`g<q!`1};8F4E7)>^A!Nk4F@aMmN9Nr7!gio*2sa
z?VP_4u#;So!*H8Wqqt7t*5U9F<l3?`Qr}tRdFj>U{$UgGXgBx)<v!dLHZ`PjSTK=4
z7Ph{^!A9=G^Vz>UBtCIX_7%ho4iA^DS8W>jU8Y&Pg`-g<Iy5^*SSXh^1F2|qro&;j
z=da1Jn8^O7Nvl3IU;+IE`bB4&Ybo*kZm|jxY4oAb`*kAH0b%KA*DMP|m8t>*KOwlZ
za0!Tb&0ajXbjm#?8p;K{JdNNlm;%Oz*BThKpH%L8PVf+$4r5?oP}6GHROq$8DwB0N
zU)1i~EV<KR@qx@uQTg08H5_9ozHh05PF@~S$c+38i+M{MtogdbvDRI9^Y9VlJ{?kB
zR4^u|_7)n4FIx6}s7-WlYd^ELwCU;RnG`WoYVAajxj4HFN(Y$^qZrC}L?=4^=CCkK
zOk<nEyc^6<mN{v{1i!?F0)5BkKKrfI?S5saQQbdc*Ex#HcjI~)pn+Nb!canMDnR`>
zeMrX{zxiX#A_}M8Q93LtN`cq=&b5tAztl6j-dtF?+u4-hh>wQL6jZJBF#FjW8q@J#
z80ud$9a<7a)mYcyb_=lGu5BGugBhL|uTEP=9)~Hu`Zzx~v%2Y)`!z+EG~VnL&b#_s
zjM)~o3`@6Y-+2<52FKG?l(h;_Bt3oAdkD;Zz}&rPv)I#G@!du+VNh__CDbgeIhrp%
z(j3HV`L>rI`}#v*tRs(?V(M$ieg3elcC3YzzK&7%*SO52mQ~8tl@@>FVJ7xx)Y<a?
zBqHHL&SevRP)J0InwXeC3^6;Kr5oZ!2^Ni`l94W;^#ugTFIr(@?>Mem;L4#&ZU;nI
zi@<WlBI{){kqe=Eg^s2th@kpeB_XGIfZ(tX&Q&S@Qnpq^%6ftNNQrWkCcTCC;9R9_
z!YYmO`jqOtJGHqA?oJ=|S{3C|+q5mF-!%zg;0h8VrPn=rK68v=QL9$y^?H6Ww4><(
zzWQxWogsE4q37qWCH<sUXifgGF0N-E`08m023R6~a;UJfZA*(aA5zcqPq7orDlX2t
zo}&c=KqQ1F%X5mIYkIn!nsUb@@u=<}+W-f=^GXV9;XR9(CZE60j|F`|tw7ed44eCJ
zX@eLcM0&8hE}smNzvk>^KBVxuL7j6a&DHT?s6<+LZ=D*L%Axsrc`u6&Cngl@n>jzU
zjw(gQ3jymU^FS|a>2_dKbX!F-qShqIu0%H9<^6DX9pRv2XZO{K?;`aTXn*NKK`goM
zAc*#@dKPaKQPnv@+{8=~=2_^^6*)}ztvPxD9+EQ^a0g8gMIM3^OMRIw$dOmPH41<B
zZ0fn{Pw$r+yX$@g-*jK#KGQ;Z67YWlzGBjGBYqq=AfCgJO=>*co7GDHm4e~m-bX<d
zh32V|3=(vGSF$6eg7++C!$+tN(?|SfL=SRFrfx24**s}WgMJG}|6T>-m@gOKi{i73
zZUu!1Pk%K1karQV!t<f;o9R=9st;3@EE!nSD~gC`W#{vKLA*p&fF6x179W>NY>_h)
zuytl`0dFbHcCN}WU|xVjE7^ibLC@Ow(-Le?)}U6tey6trz?HTxz~%KYQcAq(an_U`
zlQq1N^^d}v);`4s8@hchr;5#YFdrF^=Gv3tY~fuFGawHSx3)3K$wg9cC7cZW8CT;}
z50+Zh->2zZF+Y>=JWd>X)HFPg^O0(X)u*0t?W-Wo5KAJBdM4O|hLCIXeq1xi?PmjW
zz`y;bASwC>?B5jM#Uun@BBJ?$dKR6u(YwY{@I`E#Z|VZ;ykxs@0HS&li@qN72aDsT
zDS8skrmjtOD5&#fnk*qePs{iS60PsQs;(&~w=L9Ly6ggb1pYc;Ss2>zTP^PM&{cPn
zRe$&F5!KUXcAXim*qRlv*A@AFUOzY0yKjquw1WacB@tj;RZxwijoe#ZlbtXT7;)gN
z9}GCgwClOFDZb0XQ4_pQir2Oi?OM<eF5MNrHT=9S-C+D$b42i0!71n8gCGqa8C2C0
z=eX^5Wo8o0OTo)I*E=2LhO#8ANd4bz`B}&h;#K9GbI@2zqFn79NmyaLJmvabJVKO9
zBCbE>G(OH;v$_+%0>A4GBov!2HgI&s*eW)E7)%Bn@RrL7^qTDa_>Nt5C+JBS1yNZv
zs<Javx!`Kn^&4$;@tHpNWO4;&XOq@9^E^X}oBC*tgMD0A$HCu*QN%pbx6HdX$3hd7
z9=QPu%wpD!I)57QG!Db1(qwL%O~{U>^)f8M-2+e~A2dGC_e~zh6n3{4)}z9ks&Ht^
zsS6v3E(jaFmo|fU8BdQQF4ppv=h@v!!zkyAu&Yl}k1g8v`3^9DAA=0M(6?%84H{;H
zHRv5_M$@bsqjE*~E<*?FPEVLw++2()F8it>Cyy|TGg;Kd^HKJVpZyFOVlk8Ly?l2>
zL`Dt4xtctzBH3uUN>%~neHNXUZiwBme>+Qh$Ur$s>%}li0hl`{Wy3dU0bw+}acAt=
zB`5}q69Hc)=v<_%=FOod8z{S?9u8;dS3uBdl+afLtLcJKV-ONHW6cmr%>QZ`jKdme
z<kN%IBUvlY0O>CiBsNQxKlN<`rcX9<iU(mgCsBeA8e;PAiIGBGgoTC60cTARDJ(H(
z=mGdBygFO3?Nr^JDPQ<x+CstIW}zdUa@}?UuhB^uFl+kt;t!#_$9#drFNh8DROQ?T
zcMGxz^&#vua*P>j5UHN%#;P{N-aNlnO#P?P?M&63{yeakK)izw14fL9SLmax23CpG
z&Ifa3(($xpV33@tbIpzTdmVg)I<hohLHXh~>yFk+(CN@u?TM*?Ge4ebU$62@MP0<`
z@e>s(|5Uw11qk;Tu#PK!C>bA)c^o2=aHb&gjHZm|;dWf(Hzs-qhp}hTXnX(h^3^v6
z=QGLz<67B)MgZlB0gKIY8{OVaIfIdr3SQ!-xqfNcY^8qEgIYX;y8MI-m7w3zDQJNP
z+kLT%nabja6yi|#-EW6ax*we8UDDjDKI`rD|5k9fE(^{U!=NBS4Q2m(TM|>-af|V(
zQ|j(?8}-6{%=xn0>!7i5`x^j+4dgy}j7sa-uJ~bf??N4aPPg4;OXFr}aN1O86qp+<
z40IR~d8<lRB!)V63Y=f8@N@=_e>R4ZC53w?J97*o8i(UJEoTlO`$2y;Ha5@?-a{jF
z;E74zL=tdSycntr3l5h2-qwZEb}7DoPx;aUZY@$94-->({5vs%)<&+}(N|xMWiSX;
z13k&EZnaS_0|)BJ@Jl_9!}^Rl^-Z1&{!cfJpzalz2W{tg515tTUV|m1N9hbP?DBo)
zF|nLw$9s{(6z{*zZ!wi8Qe9oW`$T#qmXec0>E>Nk<>?-x!o+=-=5fB~Bu^Li|1Fq}
z^k&93Z=}+~*~v-EV3RR9Rzg-mHI!K;AH4Ms!GfgrC;Twa$)Jt|)tx;~nJhvoA3*6l
z6=eWgYwOfi85b<yaamU2^=grGmY<}D`MMWpa&i(z(0Luum@q2aprLi#*Pwsh?U0O1
zb}hmHHcPP=<aBy`+eX6k?>B%ICe9PlxGSou>w9$9mMu9wy>yvIweV`c?yGK)!U-Y9
zq~y{VO^t~>w`p=dOyy{91<sqhu3QrL;fogMM@!oFrDE2Zd~V9ACc15|)%6dc-G?+g
z*l#!^F&Om#mnz0511n33u{tK;Gqp|@NUW3RrFvpVc`g(86M*|zuF)}}#!itx(A3J{
z%k(e|4fAX}u%3n=rwNstxwTG`sG;|_mr1445}E?3Kb-s93&iVB2FYZf>C6!{)@Dh4
zxZZME9J`>1f@6q+sF{X<Mh@O1iw2+O=eOszc`v^`FZ;7Z_4o~-Iq_lUEgwK<TJH*I
zsrkoDdRjezpCk&{%DupBl-8?pQ(QMDVb91k!4()CQc_8QDNYZ7K_PUxJGPP_IC68V
zT%ws|Vf+owTR@7~nJ$c~D^!Z3mg@n0c?yFX<`Z;g&;yZ!JxmQEYMs3XG75HWVPVSz
z&Q`YTzd*dArpC{EervXi-CLryv)eypAzV@WfrMKP8W~{qv){R*z0$^SlkiG9o@HXy
zj8*`bXZrPG41e}?82<0AVE3{3LN=}2odxwTYE{8LePcM>YD&D*B;ND>mXNR;*Y~KM
zHn<*wy0kX*`|ZDUvG2#%7m;v-&e5x-B*dU2OXTl0Ga0?7Duc@u)WaM#jy%Iz`cOY;
z(k-kxfZD4q>Yru&ycHe=dj9I5KICHx3MTPs+g~+QxHgTBRaZ?P6+-;I<SC+vc-8U%
z8JNp}E@s`@c#?bt#3p}!V?B=@^oi1#dCS?mPgqSbIX9%`xZ^hVHCJ?^y0+pUY+74;
zIJn`ww<ApXD}bZG)&2eZPux$_<f1|Lt_w}jb?Ix39lov<d!0VkXFiRN*Dt>p79lg)
zof51Y2j3>(yQNQm{*bAE!#BTbHI7x)fy^MfmShBrY|9fsS3K9|R(l1yM_6xHs!4P9
zi+&Ox9ln#U=P(Rnq5yB}1!l=)ba3X%fJ%HZWx`0O9`6@VkXLoRTbO09(TGeUpi?4(
z_!4mrTmH!(HwTnOvG(L|uPPQ)=@;;9>+*^Q7!*CC1a6AD^1||7#|%$z{c#mapmrFX
z3ZyocA`@ZQ`>D2<CE%r%s@T_OZ;usQGmz3yj$<brN}jq1;!sO|2$omT=n?TaqW5eK
zvq)Xi=Cy&dCKNy=vR!$Jo<-8Tkq^12=H<o<nY_pKwFlOwv<H?S1mnh_qsL7dhe2<_
z^ib|*q{u5%5`)KB{OFQ2IYkCh;OY_QDWTX;JM7>Om*GVAyV!bMI!wBWtpJCuk(l_}
zJVn(P<u$RKh}EQ<%y$?_%BV`D`4nDgQfjhg^sQAM@*wFsb!sIJOcBLuj=pyLJX{yL
z0?w}kZN|Zyb)!J8HgsMpcVU0G7|G;g6;0-zr-tuD31$oV(+qm7!zsyyyHwY~=naBH
zkGl(UsY*eU#LHfba_euZn(7*ak)O{;)aD4O`}cYs^SsU$ELd&s-3Rj<g5fdtm%a4<
zBy;E!#xD5e*1{tpU>3^!z@X@{ISomdm(V!(#8Z|H@7_f<ZD}3%+G-qIdcJ+}^rxj^
zIOx!lwAIfrQI~mbltxO^3!(J`LA_}y$?uzXP&M~|mL|zg{%Sx>bs3DMVy{{~64rxm
zPz5zcFjSt}b=<;l^zVK5QpUECsgbF++_{Ji96(I!ZdZW)nU-qCgWd@T+dEr+*lnU(
zrd2-#cqx4r`EScPd_ZzFWJD8Imalx~W!<_La6~f#ZN!wcd;V)s>t}M%$sF~^8+Ly|
zh~=dy8>^yr0VMlWY;Yvja7IwuND-&fx==?apvuk1286OjALN;n1z^C|pm&uZ4Dv{5
z$_vmi5L>CxoQN>fNZ|^N)mfMOVx{s?hY|BZv~*t$wpA5paafcy>d$Q-wat#zupF??
zUleY#TWG5vw!H3_JK8<)%G|k?kVan+N~FsRuU2hR<YuaQ1VWnF8b!4xMK619N$?Mh
z)hJq~UH}uz?_~Hx6o<q7gPP8J*7za>Th2svTb6X6_@-le1=GHe8t#TyG*PT3AR~QD
z;$HydL;>bT@sw1&L5^7Wv5#Ex1BUG@#`U9;SKQM9zfYFQ1>Q<}BK8hKn0|iM+5Ja#
zBCT;W2e!=aru)_A&z1M0&+p3AT#NjZ$s6-O4|C)SZ0qU780O%3A9c>`+ZV+3qU`;I
zPo;4Sj!s*p07ay?@IT`VJfYrfgv2kNYRc>`c*wd5yJlH*+UxgzN}GZ0DF7aAyFQv5
z5HI~z-O@#^3U6gaZO}mv9D?H+qI90c>=WCs<svuSb`7|hoW4_IujBf{<_VUz5x`@o
zN}T7}0~b?bIN@c#B=o9`b6L%iHq+z`YS#;ezN~4Ad9FD=vFB4Sw98?^09S53d_IsL
zH}B2%+5)$Z>&1xwW)3lT%f4RgP(Rk^nVS?xN@K&Qg@kb|I;o%7%vz>VVPSC>ruID|
zDTQJ)^_H`U$u~$>q-t*r`Dv)=yhx;))6_{}Q8c$AUa|_;(=I(uD5fUSC$C1Z^zeTm
z%$zsgTB~gF@~+bP7M`m87ZG$N@q=6`^c-xUhe?ej<Wsol6z}G^Pf?vFrWc&QZL6hy
z(^8r2-$eP+qs_!D<f~#jCC@@+-qmE-B~!|J{^hnz7)E}{>!GnL8YyJ|kqn6o^w>vT
z_WAyEWA6Dt%oj3@9f3}=r{5Id_n+HeGfu*6ddr{-AGR|{U|qML^sThqMTYDHXyWhq
z*#PF@9VvPO^S0go!1$rwV^aVhVg$sVrL&LRBEs746B<_ZC5aLi1ugAB%Vn5WN4j_g
zz+vKbFee{S8l;1i^QEP3nWFu_T!fedPb?E%idh%Oyo7UCgpq1#J<UwD<~Zfxi$B@E
z5oeU@QXA5;_~u!Nv2lx7gYPUC?D;(;Qj!j@4ii`o9Lq1~mpl<2+T6UTi3Zo1emnnh
z^Vh83L$4c>_aFc#tz>jf?1hv)XVIW^MXy$!IoSZ>h@R_u2o2?>?*(+z@fBXPASD$B
z92w8pygqJeF`UTkpI8A&AHt!eC?44r{q$dd@rSz%-{L4&{Lm1=ein3s1mlbr(ey%o
zg?J0CF{XER_oldyq?v&MGA-{|wCEB@fX1)U;3w&9{wY%LyM-Ed^-%0K4ywI~9L~-A
z81ja-mN(7V@8hjGyff#5I=K4#!h*_n-A{{tk*jG}UO$n&I?07>?oDRBy*XbKBmZru
zMj=NxEp`sy7YVUWltCX}0i$%ili%3ST|L0ltCa1@c`06JJ2aIqLnS6-Z(mc#g>+bm
zNkmkxl}YXNVKkKrv^La{B!Wjp!-Q^i+uy;sDPQilg9$COdBQEAB#VK{S<J*FZ-d!l
ztvkT~A_6S_<_Spc6uB^X;M$Rq7p~kT?={@Cj-9-(>p)V$?8FW+ut}47J2b31I0z}v
zlJXb@<=4f$6P51%wp8gs9UkUkB{b{tPQso`lKCtU0B>~S^+T2n41+ml5x@``I{4}Z
zfr*yxIo;W&!^bd2C?C#7lA-OVM|30B@IFkq!8$mf2XnDM`)lGNvttNB43yMA#!j$l
zj*_0HU5e2V@pH`c)0|Urvq1CBBkux6%njy+7N6as<@L>`bt5l-bTsmk_OlK2XBFQ*
zt{+3I2}6dEGTw%ITAXocbS8^mV(e3hP;CyS8hVL|HA&nO^fK*OM2yiA_g_Cs{dCHa
zOB8D>v?|FSYd>tP$nG0F{4X;A@FFC96PP<(#x>UOFuKUgb^H>To1?GvyZxTS^?(@!
zm1ctH{rPN;c*LujcT<oOt+(JfYI59--B1o6npkESgQSM%hZuMq^%kL|zGha+`BKlV
zd;-)P`)c`NzpQX&5*cGIqN{MbLl~?X_HS?9yssxZC>p*31VlMDmv4b*Os5r^Qa5Yk
z-EUpF)Gg^E=3FD{0fayK{D}i72CdGAdB+@w3Y04>>L!K98OnpjGcOL$gr#*kdn;-@
z#l-{RQBl>XVGrD*ST18dQt|AcPuwTrFgF+5A%eB`@NRWUF9$mwLuT!wv_G%6n<)*M
zm;8BYTxIslUGqI8E(3?2@yH=*H^rpY-hql<LGQh6iq6tDKe1u@r3^3K8AzI#wz<=c
zB5$K|?+qBxMvko<GD~MG?`<@bk4NMc^F56!JfnOY8Fx<k?HO6Y-}Dm)57f`O->vOq
zX~#i_)obQQQX!AF=vJ20bSibdu|Pl-4hCW-Nvy&;smn}$kMva0e3Xu2D?Ns-1bdKu
zqqW%LBoi&eI{zWyO-H3hbxt-vW&F2<hJ^F`d%jN3_=-g<P41yurnK4mQm|AH|1MRg
z@IQA0@@gNy2cpDbC+2NX&gz>Ou74~73|TNX4Eny8=L?3CC9hu*2@1y$yko+(N9A=Q
zn@-aUc5*j%lAW2%bXU{2RMgTPuFI2iFf`W*cv_jO=a6M@anTp}J(twAFS#xJ=H3Ux
zkW6Zentr%p$M3-?<0CMuWQ2YcW^m9L3#uP`RYny(BytVa2$e$4X+y&jqrXUKzHwGi
z!tL8C&mI`mUZdbj9na;upV@Sv)%jnJ)CoI`Ut4!p-!jIjwrHv$cT{pnaz0rOEgT6&
zc7Y?p#<5@2lE7>amAby6p<yaEbKNXbx%u&Yed)$cDL}=H?ygSd5rm|%<_Y;YIe!*I
zRf5%KK`*Jzs8uIfB2v+}GykS(s==CIOec=dt$d@=x<7%Z!FuDerRr)!U0z)vF7m3y
zVKMS%@VM#cUaN@njkgaFWK2ECUMo39Gp4<<w!d1^1$NbWFI5S`*dnmmfvghCgdc0i
z(D1RPlx@`FvP;06@1cSc3oT5Hi}RCE9D(0hVE;HkkRLI209_-fFNCD}`fe6$E5vq@
zzq>IL&-wCuw{%U5-PPugz0X*Uy`P%k!w65?%#9c*=pv$L=-#u;47vR}8mWldVHzT3
z+Pc49k4={dO+^3fN;s7KrYibcN`CvPvUrg2UA!5rxWBV?<RgE&(R^>=A$ulg{6bf}
z<a9bbq*I^wEkv~vYL!Jql~4u5|2c*<!)Ui`2(RlS<lR*@(%%myFs!eBOTP^QaNN6a
zLT=@!szME&<0~)l@va7XM9b86EPEz#TzW9u@@pqA+4Z{lg8|yyIBHMzN<AhzPb;LB
z`37rVH{JgJewkaf4ORJ+V`(B&FPC&Y+^oByl5=*cd+zo9u}{2vG@ZZIN-I^JJ%&bx
zlUR^yNNX;2)%O?PM3|5vB}54}v<!?ZZHx;(Vn0#1KUQqnWA}&w8$&T^>CmeSZTBAo
zL;&lem(MDYi1J5Rz7Up0OccA{3m=mmZt6F}jf)lAqk(0&w@LWBe7RxjV!Vpqo<4sV
zyXW(cyqb!kw1a&E<Iz!Pry|b8BFrKlal~XcZ}pKSdIoOOn3)haZNq(Tdvk@tBdl=K
zax><_g4NK6NuzM2fM08akJgDCfINH}v=1&rgrB74#|0kDe@M+RFU0R^)L*gB?DL@p
zC}f;ESTlM!YWy(Ah#VD3h_HBj*|5TclH;KC-*Q8?Ya9W4;CoaOd+w+<7Dg%A6p~<X
zTreTbXW#kQ-$JSaFZzi3X)RcA=d04oBwJjLnKct8_RdsG)ucR%Cg|NvNx8%-+b>d!
zRElFcQ!eyVKs!dtWemUo>?hs;Uws^SemZp@e@dr9h&eyTJAJp_ODfYaXScJbD?rXG
zqEoL(-(U{ei`}QJUzJ|?c=%SD!i+{C6B~esRV{BRhVg3rtMGJ+jVsIV^6*}eFhOzX
zltuFj3%84Mz?)1sTi>tvwEa^d(ywW^Tl1IHn()zpp6999>5R5*$b_PWzOzLoMU5z6
zPiRISm#dxboXOQpxJ>(M((VCFe)=*Ko6ff5x`6U?jdi^TAu%f4<?p@_kcg!zDlQ({
zPg2MqXeghSK-F=*MKI_@mJg>T=~+H$8tx{V9g)^Q3>Nq?7O5DVH}z3&i~){1{y`x0
zPqM(eb<3E4X?X}zPvYFV5gRj!f3@RV9UCV<Dui|qt>?t~iQ#E9eHXTjJMZk9#z>{t
znv|GVWl4;;L%*4O*qMBp^L>~m>t|JV&W<>n>+1MDvJ`vyqF8F+HdHSNQ@AYlR*{%y
z1gA6nh-w-6O|YsWOop&jjkp!0(J1Sk#h>CD#*8#Mql9-XsL8dWkwJ5V4m~8%`2yIS
zX-bU!<XlY}3m+#!LMS|fnXqs}em7V{p8)m_G2f+|>zR8aizjvxp`VaSOC8*ic<?a=
z>N%nN(Jv7Q7{rjBWFEX>W|miWYy+Cs^~1)ML~2<ujdpn#ic%Jh#cnoI6)I6WHOP-)
zZDdO@fEj?z6-T3hn=293Fgo^0w@{v_8ps7Ly6ep+b8>))X=;P%kZTeQ*H{5Jns9S7
z48TRwPTn_z9Uoq(FC^aDZD|005UWuS{3MW)@0$q9eFTmI(>6}(dvn%|B=`Z}B*I)x
z_+z4@N$KXzr!xH$$MY7|^*on2kJ@fxHa~4of+g`zy5e@nL#ITAPN~{ROsT<2b5N@s
zFeHpXZFY?trW1a++^#+064SB&XNmu*qVT$HxB`)HDofZUOU9UaK93LNZ{DcgEQ^YY
zN_u>oSovq3WT$b(JFa8@8h1|q`SOYIBK?}r*L^Hv`C1Bcn!mNYXXroiRQG3?u8TUS
zw((zgUPl{kWX$KNiI<Ry)p5R^hQlFtksyQa@@9zDv8t#93=R!iUFy#?>`+p?UnIah
zm;<^@*!4%ny0NN`9HVG4(YqM(2U3P)LKIVd*<=>CFhyNAh^eCLPaeDKH*Jk?qsy@H
zenQXUwop2qu4A(HK~c5Z3Ne_hSgA3UVZ>E3eNwC+u+6&IQUSdMVz}=;)iCT#em_Cx
zKM3NaeH7GgI<IiEs}LN;gNChmsD=xf-&m9+CEF#b*~HXC;8rMvN;lAmcqr)v&B+tg
zYVgFf2ep*DCWbD7pRb1cc5Cjjzg5}>ug^nU-j^JH>rgzFqxgY}AD>DL@epx1NIr=1
zcwSYJS_T7N&PLJt(1$7fj-!{2?Drcl!_%146Ee#B2E_21aZ2^tTSzTAT;OV=c>&MV
z#IK;v^ZkDRGJiph36;2-@y1vc5HgUz<i+5{!e!B8#MuXG?s5J~r$^{&?i`+Fg{B5I
zVQt>%Jb$)S0lV=Qn`~DS0?-ikwRHOc1)jiV`?=`_V=K?I!Odhn?4Ve)RCfgVzg?g?
z1t}o@O6AjJmLFH7{-EyAmbP8#;D0r~t<dGK8|Bi*!C1m~`;^?JH%XZ6tB>y681&4W
z6&($Y99{o~`HNgwBTcm0?IV7XRc=Si)~{6Di^m(`Z$Ug!AZC)`zvD(1^ns|U-sz`K
zlz9Fb%4ItyT-eocPAw%P&*dmgvRAQYddqXX#%~Zlc~G|zcfvCI`gaX%i&-|o?02l$
zvYOw!AC#^KeVgud@(zv<>VZHhS5a1Y6&Q3^L$8!HnoL$Ty(n<iZhq7<T@*#MIqoXM
zFS6->f2mUb>g)#Hz%ca_fa|J(4S-@SyR|t}mii-X%rPpn)+W@9xy*6Wo$Kc?T#iRI
zlF-+WnB|ed@9)EXonUi}bliKnv;MbAzb2iq-ui~B7JyWri<MB7-?eCyFEDeF@Lw;0
z%+?lB%Dd#Kg1>?kz}k2#MM@`ka7C_E=1}t%4-6H+kHos-aGB*1G~fePOWJ+_#IkWL
z4iWDFzvLD~+J%;bO7jkolq?;O7Oz(UcHE?IeYJsT4KTP%8;}%$X)@OJP*(u1<-YEv
zn-CU|E#(1*3h6bHolg6SU@(h{_Px84Lx+#%Ys6bubnY{-`mMY4(wgHHw1!IuD>z57
zbn5;{{XT`@l@m6%dHfwg%RU5>f_b3krNM@pXRuHO3CsX-Fs{CU<FJOxzt`o2beAzp
z4+4UF`$SY@0v-dgGYh`r?1PzV1J1vEb|A+Q`v);;!;^FMc>@21_V#lt4N>ra5d(#^
zv{H$WqD7t<Ii;}EpB>7d*uJkWa?9W4%TdP>D>>=y|Jj}XI;Z|;5>Q(g?*Ow$U8Piw
zDPXTF!SS_D9{(bs&IK8^|3S*<Mg%5FErprxdrHdl>3nYX&*;)6Q$E+d0c3oim!Vnu
zYg%*jUuK{)q1SwLR?L(YwO-1a3ZK4qaAOPjwXOtrA1}P;k7b~%adsPh4`bI!O!ZXQ
z_8j|3*uuE{M~w9#mF-YmNKqZXh91wz%_PHwJFmd<A=QU`Rf+X~unO5OTZCv2)?dBJ
zE2vZ7y3F;Z5a)80tZfG0$z7U75OPClb&T$Xs=-<7j2933s1-v+^o;AZAd)u&h<LGi
zj=NL%peMT~(L5~EX=WQA)!rV@(#H)=E$Zp%L8%EqBl>!!=>oRxdX+O>G2z!4=upcw
zHw*YirT_mo5~_0?{AUXfqf;ag<%bKoJv<@IM=XI71{fGPgsAwZ+N%q47vX!yrZZRS
ziv`Eg{48;)#oNuiMyWCl9(LsY)!Mza(j$-MT?A{?e@L=?606<8?w9-iVOE?b`85kL
z<4Z4&xc@;&e&kYFq}5Z<@osfDsV<hVg(Zb43`8Erss;#hGN>Vgx*}$OCMUlvyqE!*
zgKRc)Rb(A*XIoNZE%)}6ia^+2p`^m_zQ$w_nbwH;Xt9~S1BkJbegK{kC}B&1Cqz_c
z)zWt=W@hG6)8Ry#>Q1?zK)p5z<3eZ7#4^iJmfPB7o-n)6Y)*uh<RVL|sD4%XBC$F+
z&ynu;ON{Ls{Acw);r(v-PinP?zyIgZu_dc=G|0F|x}Ei&H(?cozfGKN7h_I*(jGoN
zn9oAd@{e&bILr^9*xj%N#Dr{FL*~Lf0+=Zs{S^?7UKm!}N}qwM{4%GYDoH8|sGB+z
zK_?v9AcscqrVN!)>gK5wu%?zzp9581NjnJNIiqd9sbOjb%}@?V1aS{!G+#a1_X9|Y
zGa;*y=-IE@59)PH+UT=YbA$e^b%eKi#>YvZG3=j-E_dI7@0INZiI$nlanEks(&t;8
zEXBALEZv$jxrj2>a#ZzvR&$(2{PYCSnRGWi9$4jx#dX3YeK@Ak>jS@QG1$39VHxCy
z_1iu}`VXE4E0<lxq;nAQ;c1F~u=$Ax)H+X0GC$zsIVho9F$CCs9SA!lBQ971GymvV
zT6YUjYa5V3*%sZI=mFWM<q+L~b!rbJ+=@L}O>6?AiDs$mx=ShGRzqSA%F06ED(_Y3
zcqVo|M?-CXvidC|CpUL`?T7(`V5!v2{A-R4nnV;a6Ohkzi3|mP<!E^XE*6_dosSO;
zV9~VO1G=Q*ftb-3;hmP6bkE37%aZc)sJWkFhgr)vH=WydcaZVQp*PR*%8gO~K>eD!
zJMnFJnqTIQ<`RqKMHD)dO_AW2W2nBpdikuLgvFU<%a|*s_diU>WAcelGY>V(R2VYf
z|3fB|6Ds`e^0u?%8{gYw8iVD7dp6vPFG#qIw6!2+4r+JSDy-HN4ap1k)h-^5Am9>U
zHyMb&waUMF;u*ds(>DS5t_R}#wHgpDcJ<@O4+D)g9i^c^U^qnLCsUVe_bowmw~j?t
zQc^OoEh!8XqDl0s^h7?fqoAZmF0?G)ERN)Y{m^Wl@M{*VVU{F3a_}C<*83FJ(@3hG
zNN@(W{AlS<OA~0F5FHefGPA70x+yk0GHX$ZIMPOrnTa)QEmxzrV*d9&k|ni+Pj-4?
zp^&sQohI>OtSEzE7hD;0FPXb9CiUn(a#AZ~wjrg>yL>h>YgDEaF$Lr->9B7|OwpyK
zpp3hHN2_r$cf|F4IH{9vs2jvdP&3lg4^_C3w(GGx5k*ihEeydsmYdPiJL)Hh)Oy;1
zN#N5nBz1~BVYP*<e;^P4Kp3BK$|MQZT_Mh){qyfv=z=WDlGY8RnX~G_>oT-|tIF>m
z5h-QVOvDJ&FZSpf<^SaHm(vKg;Q?kaNoP{I{=w85GWDuFOh5^VUEwP1JT4>tdY0Q(
zO2h?Ev1;keA-EcJK)<iuro4-Zfe{0gBTmr1_vX($MpZh24GnE1+}Yj_gV!49gt(t;
zgg<b+$`{re=Cx{yX0NxKq)%mWnB-Ao_6d5To?R`&y#XH?&-*Rf!8jx;^KX;t0ztG?
zNIGJ7#=+rPB9QjAV>zjNy~j*`Xllxn^}itX-;eD>Y4=3H4-ZE+26NmzFR;m~t$x+a
zR@=%jLLY0?3$mwY%QWBr31jJKItHmNTGr#H$;vMV(h*_hlG)_kY}iuFS5narKE+Az
zY7H|i#Et_JrkW{W`v2Upylw*LU)F|$3rW5G<a{S@llfuXuty@8OYjY6d9ZlGe3Zr;
zO=Je74rb56=KqXQ3}E!cg$j{~P-Dmwj10+i+zZ6Q-xr*6|7XiTvnBQYIiJ?{4mHsa
z!4>NnW3t6j8DGF|_l#yr+gBYmHQd*G5KZ>Cu|Y%8NDK<FR_G)T!0o5e>ui*(@ktw(
zlo68)M25wRw6MSfX&4<J-=e160UYCsch*0ZxQC{-mK*q6Y!(_013;a-s5Z<H%%KjJ
z%XcE3x{JY0ei=~JWp0kQ;|YMhD%9XecX!*x|L;N;dWK0`hi8;=%|-S{p{rg)Zr{$)
zNZjZVIHJ#bcK?q+`rk7<SqMx$V?X=**Ek<96GfoxRQ@8M$Htyi;5+T1^SFjh$&8OT
z0r%$b-%rj;)j+bl|E1OoL@&_uQ&LhsSEH_04AiMEZv^w&mj~Ce<l3X=OtuTey0fL~
zEGao!nXS`0n>nheE_RdiG)b3?&(vDFpeT*mG)6c*L%|8?7Df-xMC~KK;`+xFwXh~?
z8Oop0!+exMOmA@fR$yM1eOj_G*!}mDCi~JLFz%@z2aRPUm9sSz|3qTA#B}75ss6z@
zEvhio<Mn@*6gIM7x4ohE>nS{)(z+B+un;OMKp|MhpO2*ZKAqp=%>{70Xw|ZdEozii
zqmQCc0WF;cIDvV8$`lt)&5@oJgJ6+{oYW~;<BB;z?(N)|*BTx^=le8*_H=||f+O@d
zbPtlAX{A<qG+iu4b`KZak5Nc^A<X#M6(I(j<pNtdBSykU)(D;ZirCMSPS?}63E-$F
zjDBD-;*&mOVBq&4XuAA|Qi!D|;9NO9d9Jt4RVbr5Wn0WC?$@6I=)3b>&Cvhq2>w$h
z!M}tN2}J%?Yq!?@0`Tlml?ncU%#-On5F}EV-PQCpQD4zw&Q=V~YuLC-bkKL*<hk5~
zC5mo~Bljs_=BhP&-@8XPE5F(Yq=%#3U$r!&<yxPgYP$f}bx^wZp2IS0_4jNZVof~A
z)vhG6Y>EoaMFdw;RES!Ac^RoeR4tg3TZNR8S2b8|QI?CCzO4!YR73S8RNTHS1i;>8
zL3z?_z;a1=+_|Omy#Z&@5xS&oQAV|i(ETCE<M{VI{>#M`M2JSx{uNQ~6FG6N1gI&u
zR$$|g)mi<5rnGxpYF+Z>4c^-<wQvHOp!zi+ce$qR^P5+uWM)<XX^u4;J39){n&08r
zbpP1uvFEr0G%&6rpiaSS5=M$)&sO2quFz@D1DAh;tc*<DR;-0i2Cs{aE|4@JJ?VL?
zcceibkk!r85*DWB)}9eIb86?-4_IEbM0n?=i@Pu7RD1d{9u<BM3=C~D^r)lvo0n;8
z^jfH1#|4jK4E9Rsd80FAj|la^VMu*-go%llAt7HDcH}fq_<Tc_b)t*DKsrUOV~Jrg
zd&3@l1aGywjvBi70?W?-pF_%ElymdkcAd0CZl#Pc@JN8^6Li?_okpu;W_>4S$B&ei
z^HQHTB;@w0-~s+6)BSt0uc|{<KU&@cTSslUL@FX<V`E$6-`9TtKNw8HwJ!-TM6X{k
z4@^OMS5JTUt3QHNDDw^lM6+5=(8tVpY>j8FBx7c447P#A!^cCyZYFDe!2X_<oSeKI
zXg}Jst>AN@@rB5MA#>BIuJ4tw#oPPsGh7(p39-Jg4@#hWPn|J^{NgidK1w`8Imn*O
z%$(bJpofOdRO&w-08(FP<`6AdOhYXdnI8UOi2@cnnXBvc3KdB{Fn&AG{AX2%VxDlH
zN)WqmBo~4-a8J{v1*qyJ?ue^7)&+x?G0jO*y@R7j6UT8&C?hW}3ciwa;NBR^%BqnO
zi<bG#5mdi5f)iElGDoNwcy}1rJIN!e{DnyZS%MN4S(CJ0h%X{}#ZK_m+{bD7@}Ydk
zCzaH{=bOLKJ9=IB5uQ3>8pkN%;+ufp*f(yczSPZce*8fS=7Xosj`@fB4WoSmC>md^
zdnEFC3b6UX(AYVti0og=9Wi>}E%SA{Tpi9+CjEt_UEmGM)16>e@RC<O_`WY}a-_%_
zfm@W-t`lbpu*^zXSy?rgnQIwItQpaIWhfb)D+VowD4MYs05U}gt?cXV-Q;F2-%Rjw
zn|9OFsT<lG)j8oZ^abnhdJzKkq#e}l-F~mv4R1!)N2aHaFXxrFneSNa&(@DYB0>r4
z*Z5@y{YyL+wxq=$wj7H5Ve}0Jp@WdC=Q(VUHUD_lb94eyGjbk+x}kT6i6WGEqCd9N
zg6*@%&B=2ZB|_8J)~o6Eezgev@~gYmv5G1M?UzB8FRDk&&RQdO*!mNXdle1RGB)qi
zsq``-49^B3Z1a2gM%sJK$E9!}!hG%L3#hqCzU}9Hc4$Bt69$*<kmUVz6?uDDx*0r)
z2>?iepj{z-qeJ(QB{YLgg8>HZW086dzQqSt0mAeWYUIYXzZq$6D5?5$gBuX>N~EQ6
zm4{S>t%8cE!0ws%eiqY&nvW!wjU#K~g!Nfcjq4i4Ea3&rsYkZIUtF@U2SS-{>&3$X
zzivZQqp3{<c+CnJuQib4tZpLfb{xHWM$OeK=6%;>{=dz0MewYqAd#u$-r9<f`03}q
zya+FsekDiMi5aV<xz3V%FCBFId@SAwF!dQ0p|+QChE$`jyST-V+B&mvoN(sMx0CHB
z9%B@b2>0B5(J|XG4==O$>set2qd`P=xa<F1p|2%{iK`z1B`57fZUx0AF*{n_x6A7_
z9WQNBVI$$gaGl-s|FfODAT<aeSia80)%A+k4owY#ptY0V4Z2~B{I_vjt%SfR?Fuuq
z-jKz5R8!*FGBy|G2qls~5D$>k>X}B@Wy!&c=7(V<7|By{3Kf?Gukg^yNt4Vw1TyML
zQmcPXRSsg()yW?`PxgHd6U7IG*KC5UABR7uPw$0zCJ7B$Pgk7SeW%P=hVslXh)zhJ
zOx;OrJrboHx8@oR{;QYx&lXCjS{?Gw1ZRPS;c?VgR~ygg_o4<EQD8vX5pcGhahNpx
zw~O}#^ndv`ak0LP1TD?3wdOD}7edzrlPg@2RPqrRE)?|7^KFIm5PrfX;4^2ge0Fcd
zAS$<?3-f$VPUjBA(%6k9ao0&gFsBgmXqmW7e7g7oWPSC5b=~NkDYq_dIodkRg$^<3
zGz&Zo-rR)rl248DEEV;LE;kNhaE<*1g8sc;gh(-WfB^9Xkcst!x&w6XqQ2CKY1N4^
zj+oUR>b3d>2loRtj4ufmylDQg(&G3gfTd^b*Op^h&TcUzS`}w!OMV1)nP}FGird*w
ziluE%v5r!YCk$g2IdVsZs>dkYEB(5IeEj|<8Z+eSQ`%5hfqQDEt`I5Ga%qtRtbt@-
zI0Qx#mB*<Z?|S`l4pTL~smcZl230iaLEZmTq%@`Z#(}+;ZM6$BV@CzE&9C9vuvG`D
z$4%Jki3GjxK7?}oD>S<z@)L5pg_0jpHov&Vh#7kfN>+z|=jTJw5s)1G7Hx9nd+SS5
z&DtW(?C3B~W#7Ay<dxO%DJrB49g&gBsQ@Njs7GUy02VMLP-EHs<IaD$-R;rxb}&4Q
z^_dF>Z7{#Cm8lHjkJWVRSNi|w145Xe(Q3~NPqM*6UeVHZ)z9fS5px32Ozdq5Q=lsD
z_7fK|*>@ZZUL_yt5Mc`T6t2f9*a7`9tO;xK4Yo*kZZH-)16gd18EH;u(9BCx)|S~Z
zp+TZ#UTo$ce5~?0aeOcdN)^!qxlJD%=A&Bm>M3bJ2^-A<V?AF-hLj|9pD~EXTQLG@
z3lRE^-|Ano-|bFZpU1rTgvd4y^xFUX#(@f)115!w`BkNrHU@$2nV<IB=KjXT+h?6w
zvuF>m+^knBS@CCCAVEpxDatn?@9<CzV?H9GIjm1>a4BfoH&k2?rCik7=yB>SR%M$*
z^>0H(Rf;@XdqnsvAiarsy{URcP{Oo6kFRg$_-+R(D3ccEIBT@R@*98PE^K{V&b*z+
zpb+O^eHC&16=sx*?mG^x4)q;~1D?jh!eWod_xuZkoqz-|KKuqGt_GfUNzwU{QWhhg
zoI8;-qOs$5@if<iL+kp%`s7>p6aK;igx#dK+rhQy7Ay(=sC9l?U$8Q;-=&z)3~|%v
zI3I(Oi^3?qAEHA8w#rVc<Jvv3j>G_GimxT)pdH=Px;&71gX-W5S9=Syqe?vsTBN4u
z+JYE4*}KC=70YRzzs=d-lgKW%>sxiyY>ShpdU#c`Q^JirzgejxYyR@o<Ehf|N5v`=
zoc7B5pV>Q@DYF+3gtfLBVK+_^kFK<DJHnR&u(bBKsKVkzYP3h5o*orGs+^TN@ZbKe
zF2g^6+#NyDDMpk!*WsL@#DHpipjYZTXlbQ&9Pp~A?IwDkc6b+AT+51KJYY~Pod*@5
zKhlzt1EREQJh}c(Hn6jehNn<NW#y?l1z`PA@IK-b+255$f>HJG?oTdjGg~};(H}-5
zYti}b0CJ1fD3kv0Z<)p0!gm<_7JVlBOsFteKBA`zU=5>NlD-k443PBpXSa`@viyhq
ze3y2DfA>q%IyfI<d!>QlaQ=8zpyl3gv^06eds@2HFGz<Lhgrj)YXfh4+dPwJfsAq<
zA2~3=6&~;O^z`n2>ftg}@KNCI`Z8!GZj8hVAuKFPUaCNU<v@{9C%CZmm1tT4=hYbk
z^_|BS^ekE5+Z!0v8rK&Q^E#JB3X<GiXA5RLoOlTRK4};~`>BLdzQPq7)t#Ux1KrRC
zREANNH=La3?|1?O0<L?Up!E#Vv*cr=KDWXuqj0u;9LJh+bdSIExA|#DG;<DeS+-t|
zH@k_9r?5|-Z|dFT#n-gi-#y;()g})bl;N(ld7u)1NTP<9v7ZwBsh4qHSkp##ULe59
zbnoXd5LL2xqERdXgzzJU-cW}_+1q}z_EQ79YG}?nc-Q?NruUwoZg&d8I0Hv`W3nF3
z7&>!rzL7hW_L@v(=)8tLkpD~zVMDvD8&Wv0uub+EF&dld6}S^tZAlc&>fdfV%jTw#
zQ#pS86bZHOux}eX$%PiwEs%2zR2^;{_^8XWELEA9xA&jjJxr{y{|d0SvMST)@Vv%T
ztoZZUo+@}l^@aN*;IHptZWOM2T*53PJ&}W15txhK10@k(W(7cVPcirM>aBj+lj%O`
zK+tkMwE&#hFm(=eIPaewmAnVGD{dhb)e?Y2Vs|*txqC|jc4XX58|Z_4cyPBk`_gfV
zi1?#!HlI-4e*{)IB!<bdATYnC#6pfB%**Q3kdtdjcsmX`oeZ?p?j4pqtTH0baIOh5
zbT>#R3`VMaPzP<Bmh#@N6laSAtWX^qUy_eR#Hn&`U+UNfSc0=`74g}1z+6I$eMUBU
z%%d{B4(+`?qUQ@JmzH?Xe+ipLFk9GA`b^$vZF;1wV5h$Hvz~%K*oLg@gPgQKgty&|
zt5j0T&}{Cjb-yH3iqx*oykIh`ZSr27mGn|f{-x28IkAg$1UH55-p~3)-d4pU3+Hrn
zV1U$xcE0TQPr*)NEpC^wrFiA2OPc(Uyq@MXdQq#m!rt9qB02FEjyCXNYy_eu6ZrPf
zTBhwiC%r>$Hg-p@i%0U0kt@-;VNGzY!AJ}Ry~$It=h4bTxgH<jQR5gLM|X?KcTXO3
za!Xc{7{fKh$l=1IC`nf6{=6e2FViwsVQR8qdqv=68AY^&G}M1zBrU-$d1<uF3(fy-
ztarCD^e2TaFd*f?t6;<aV}E=+cD3m+?vOj|%FBB4JiD)dY~p=@Jy7#Vt71-YyT|cc
zm2ce5*LdK)$3wqrT$5y!>9uRMprWAMr^&A-%C_{aznEv>*aQLwY&%nN3th#2$YvaA
ze)yn?yMIL(HXZrq2QxDP$=R5g7<=#b?CfktdWG}T3N6;RZ{LRDA}D)lZ1#}p&qu{W
zV?KyxIx-=2J^rm}IM5Xz_lv$hW8SNWq*uSL#_8a~G$RuTy-Kq#2%8-nn0#ejDHbl)
zW>}bYBe@c!fuVG>);pP?2%AvWdG!k=dWxxEB!U6Of#~V<dg^<Zg@x2wn)b{NLKM6a
zLr}`?aE}LT(o`+XlXs<bepEW++2Yd-e}lAz_?G8~aSis>yng0G0W{8-g5I`~*Ab)q
zMrbC<Y#w}V9M{o&4`n~JwVjd6X6g>uD<5?$e)B#K>^f4q$ZYaPyd~0Y6@#AcR;q+{
zUJmA^Ni<663EpX2i8jv<Ha^Z1;A`Id5hUo}tR6F@CH7|CpHA1TZGK#mZ@v1E%*`&M
z@A9MM`^AL<nPqJF{`Rr?r}zxBEP}9}4+(DWc9}@XQC>60nFD*5OXT~Q31;?G`MOu^
zS10lE^#ne3{d;fjD&4*h_x`YOqiC4?jjAh9{8QCATNT$-_rus>z0ZBu`~drkk7C8q
zPFCbxgM*25{~i^I+nVrV`(d72fKW%&#nmsiT>GR(Z{E7=97`nYGeTW75`nfC*x2k}
zRV>zu(gimR!_8<-Yb5=ii^WI@Bt<DsNV>%<STwy+dPO8PDAEtu#p%p|57K_k9F%M}
zo~e5jryyWa2ga}@gt!~B-^v!x$htmcGU+b*)~u>r*`QbWrkY1O(~ymcP;zOZl5j+m
z(5^`#$IG(clC{XJtuwZk(@J{O^93{w$mcst3Wg?=1R!yiZw2Wo;6Hh(*zrDFp6)%A
zY<@?*O887YfysWqRNsTGec_K+)Go?9tgo(a{!?G=SV2BOMC|a%<Sl~ElkWH(-jwbK
zWShcT6ICoTtivdpFU!$ZllW1ZHEe3BjJsh^{)SVx65lcZB71chUEUjlug?1#icD2o
zbXIO<urK(UU4KsmzRaKMjg2XhX?3ONYf)o&XG!QT?nOwD7J#l_zleK7m<N$DOSM@!
zNj#SyaaE-``8{VaWec|}xfXTVs?Ps7y-hX5&&3!-E-}>OLnbzCwj*+^p)+7Hl*`Ik
zBBVC0FW5##pQFrQ3WrpK*<58~lQOcAm)#c6+~AY|b%*lZ?_IC1&MDWC*jAtuYV%}3
z6Y%}KNfH<r-2~jACiV-<_P~3T#n9iG-l(j`OCu)Q-+<1Dc5G0T5!<vI-9d-p%xZXk
zi)@9$AvszkD<J_V93Ddl$xbZspkxY``IlobpY0~Z-E0#=$*gP+;e*1JPKXySWBhtK
zD@qv_ses-oksc<QhzkqN{_Nb&&j<6YQ;aZBa;T^qwRp#l+ekvxLpli+6&k(isL0+w
zR!u&%o7hAOwj(M+7mY1V8AK}(=Q1SupRI&bR108gdLjTL^MW&5wOYYB>r+#+UJxt;
z&sj*1`rb4QJgT1j>QvRdYPcbWT^|e2OX(jo1nUd|d*q=AO(QXs{XK@^n-`k}1*JYS
z)B1dDd=^&Io0`RqSfL8svNN6k+U|J2>#`cs%IA6|Vf^|r1}?tN$J#1nrk&;fdc{Yb
znhLQQGq=t1c2-<W&BpP$ch*Ku&dzfpmIQBTf{Z;(LWJdAPeMGq1o_8~_Jibf`F<m&
z7Tu1k&SomRk`l)dOXvoQ>9sI-&_Wqfr>AO*aO0Qv_C77y1~T6*v@gE&#E-;Jt3=T|
zsSdl5vYz$jXJ-$%s$PH22{kj*=}Z>c#hkdT{^uwvg0tQf&=2+70ZhtZIC@HkO~16s
z{ArnZD3rG8>J>b!T;y?>j`)5x&|wAwAUi+~7)3bLSxPD>aM4FOX2!%AezEWIr6r1?
z)+g8@WFE-l+f)+h|I*Hhwk_ULz^B||p767w?Vw@gO-AJtO-LWcugLMMDsw)DMXwA?
z&j*a%pf$dA7=Za~u)P;c?F@uteXZs98s(G%<um0<1r^2;^mmAJ@IAd~><9K7wh0q!
zubzq_QM~bAQ@vt#IuQuwRK9T5ZVE|4s^*qreVZx13-EJo=-Z`Bz0W_|3O`4ym%dub
zz`ymZ?@K^R?b?hv&-^9;2@B8PgT)^bL@x`nbzi`tX<4=Vic~?0T#X|#nxF+gy<48a
zw0+4R*1{hu_qHl0+jQgNZ?hcm|EPM)uqxl6-5YQrrP82uNOyOGNFylS9ZGjfOG|fm
zr*tEo(j^TG=?<y)!vFK^z2A>`c)(hB%r$e)IlmEE_1&7OV~w9%7~&1BWDK73hxlMo
zewaRNC$v@Q)||b(*Nxo|++BFkH8j{2vXi4xRdhJf!o01l?5|9lD&QqOF=l18(ivr$
zulPKK35BY7s=(oJnpeFf9$EWMUaQs<*0Ncp%2nYO5M?bb!?NQiHod$DpJ)2JEy^&R
z?)#CU{u1i0;Y6+!uI&yBn5a)yNQs#3kdBSm=vL$;5dQlUjNRS~?2kl4O(S>bQ3@4~
z;syY?-Zi<E6+Y^faX8T+fzr-vK&v-0j>tVHr9h=|ug-~;48c=6RvcD=c_ST9uYMge
z>-)9y%Trpz?n5B-PJZ_E-uL`i9P<u~96dt}=ukN8i{F~j{`48JtbY>MMYKg@)gU!X
z+CMwxrgaL&G__$^6kiTK%RoyY@$ywe|F!Hdjd5C9;$hSFGKxMhndI<YtRJ17;_pYX
zw|QgE-wx1=JT*+j2dgU3bwwQdc4Zb_5!mTYTP=L>*1&-(EDu+vS+5g_74r029Y2Eu
zt|`VTTrDACl^wB$Xd7?cv`SgjFLN9piHT5kxt)pJGhxT#C4$J)CZk4&MJu?FwR^TZ
z{~B=kPF~U9!G380IFLnxLFm7;`q&swu{M#@vA&e)4b}x(OGw3yZhc}1scqim0EL>?
zJ?Cun<pTyCown6O6-T}YbPalnS20@H7S%fX+NhIp+4-TNm_6F=@B2cY`?}v(^3}q@
zg=)G=FN!|Sruv7kbu*RTH#z+NRr1QbEH$Ia!ZYp03J6d!&F0Gz$TqI#<tM!!2`dK!
z$?$_UHjM&XEN3&*nX8G>uJZc`U7jE9Y@v<hxOE%7;hK}}-jBSB%x{I)PM`nfSby!i
z(ExOCM||LP`x=Z%HDN%MfviZ7MDtEud|c~41kG1v2;8SX+5Tb=a!{kb3GV1=6!i{|
zkZV{n8k23ZXuGbI$Bu|Vj|%dIKtfI=Y}<Qhrv+T@=$=8Dc@{qPi(^uy$Fwvt)R^;b
zsZJHCjSvE74UBcd)bH6_rs7RZQ+01_->%DwJ3}nSIOjaM>lpfHfgEc_*5Te{*3}b`
z1dP1gZlq3AGFM4K2%7ZjeDN;|+kb!H89ft(Gg`}L%ku{0o5=A1aiq`zaXQ(6p3m_S
zZ>xIjps5i`nI61-QL*e<a(_e4TT$A@HKjhE*dd@lK3S5jF+1cDOU?ls{$>X9hyF{>
z9=}K0JFLm$sua_B1v&*Nz=b^j<Lfsf@{6K+r-`I1PfjKzUt93Y?cwJGTJ9#bUIEX&
zj5^BAjkg<vNtrnrty-<Sbd!<reF@X-K~Wh^m*R|<de9QvmkaNgj$*S^y=KL1J_iq{
zv|A%9S(2WqC@G5fxYbXeE=6tAUHu|7bo>zxs>l4y!^OEhV^S@-Nq+gs{W=T%n41+)
z6SD1r3GpM3^Iwu7WBw(rH@X51?3jS^5-uezjchetJU!jt{~nWju*8($)oaHvu<zrW
zHGqX+ELCn@A9nwJ_j$37VH;4fNT;l1Wy2=Dk2tNCUZ+Gy8<@>k8!MR=HpL|+>uKyo
zg?$%d1n%+UaWPbiUvtw7ag(Rr((}ES2aAS`IxVk#1)oIe`7OIApc&}mNr1+Z;1AGI
znmuuiPgx_!RD6<rQ`P!==De#7zYw^SUJCDjb%D1<U*{LkGe%%+C41oma9xmU2V079
z;yaUXTWABc@84)eiK%Clh6j>Ppt6;n&j;;Hp}rX8!JLX^bW$bYnFJ74I9cH|m?iv+
zccZDGLopICEu#w>VZcQ3wH|GUcSFxuw$mGC2*EHiDae~Ssrnrw;YZywWOeH|3M2y}
z?hQK^ouQ(;jyQpe)LPXHnz;znsY9qrwS)ero70rlqJa@u4x~|J4OY<a=l3$WStHL|
z`a=B!9)VQY5rm}Icgt~2{{AQ-SI>*SZ6Id$&lH-*&(Gh!jY>CPs~-RGw9GNIsBd}q
z2OgXD^0jka=7LIHN}R`tjgiJ|vN2zaBX6-Ao7`Z~-PET+J$E9FY^6Z7xX&@#ZGjSs
zgGD`tW$|o-flBg()Jk=~E!iuSNjh;44Sil{{VJ?s{9H9V^x*IBkHF0G!1OWWTL4Ma
zw{ye^;*`O99|_stpW(x^FFFJXdF+hJfG+qO9~(Ok4<A1T?wxJol2Aa;$6{|G(5X~*
zxISJR0p5up*}$xh3R<)|T7b7!1eUxY09;$z2I~lL#ZKRV!WchiWKq#5_iAsJsSOyY
zpdcS^MMw50h~J;0+~owW&Y=)eu|RgPUk0piv9S_C$M67t)<V;C*X?o83i{ElvbJg=
zp#E+j<)b_3P=^D~fvsP!4S*TBoI66wH=Vi%-+RTzg`$*|mSWc^FiWB%43$zU=Emlp
zL>W9;(QJV;={FZ}*9#1u0y8$EI~dv403#xtbzojbN>NKftKCb0VE8I4{E4mBxT!n?
zJ?nUafZSd^FjohXw}s9TR8f)9=<A5(A|WcZjSEuy;PsnQ_4p#@sxK>{z08sOAryGJ
zBnIm^Fv9B7LGg7!KqGD4CO~?1Vl9fz$CrF{AEinO&QFKyRgCgP4XIA>BL2%>Cjk?J
zWFRZ2sm5(xFWn-(79+<9qu7J|ArH~OW{i%FmOvr02ancf5XsGZtKLJkRwE(gtW-gM
zc}QG2Zf>&;HSCx18m6aN8K8n<m{slc{uudfSpK-cQyMl(X5hOqq^9cKH)Za4usiyx
zsy^aNB<=lAI(W_xx@yO=MfK-FRL+I!wdw^j6dD5FpJdu^ic0c`onv3peWLjN8a*w^
z!@$`kiZ*<pSQNPk8Y#WZz4Rwv&Z?O?vdw|%bR8}arEK>a%c`f14KF-n?QVXb_X>+i
zCl%*}Xr%4cwHWZGBp=P!6zqeMGu18(ir}x`DG)BZi#Uu2V{uj;m0x09CL%pC+Q_M^
zC#tOFv&(#+)<TtKvQz&B#6(8G=L(*BUaE2qqJn28cofqzJ+8~a;Q}~A7qBIfg<yEf
z(SNItroUFUE|~{#4WhulGocf}cnR<V7QOcQ!3_(6+F3Z{NVERaFF<OePzV137mhMg
z*vH`!*uUy%Z|&mZmY!qr{{MMx_(VjEWrDt^wEvK8B;AhGsSmM3Pk=&wwT@ijHVW1i
zfOW&V0Ze=gVRdrc4RXwWiLXM0jo&w=Gp{DXzdb{jA)APV+@Pj~^mkq%E~yVDDO)`d
zZEkj|%62e7D#?QAwZgtJHRg#}fmuEuIP&o)TqSR4xn{(xRa;~fjI(p=jGR}xSiC-=
z<a-}`yDbpJk|;a7UJ%xkN#oEr<Zq<etF?ar6#9mPI-2#J;JVfzc@N4$HfPa+C^4FI
z|5LSZ!v=e|b<6S-jrB#SPaRE41jySj@Pi(3WB!f>9~a`GnpGozsM~l6jUQ(Jy+%W5
z`EJ&0$8G-dv2FEzzA}nrTx;FcWB*74R_#>Wp37gg;xCV9R3@lJveh!xiPb+(m~_jO
zPOH2G%lIelm_CC9@HeKNzRU+1@43FMCE$VM>+r;KjZzQdhV{W_oJRv<v`o2fdmC-3
zv<k3H4rU{H+VH~3U?<0Xd<oEu3NPrHL%ur(mZE(}VFbXM7g$JJRj(}rN^P?Em00OD
z>pt6qY-3Eg(I!JnM=-yj0H5k!$Q=qxEb-(1gl$pDob;Rw(f6Z76`4ebuflQGJ9wI^
zP{9oNkNz-OhF1bp1v|Q8k~!Ihh=_>3A<`dckl&+Y6=em~jvU^3n^xDza>let)uj%Z
zHaJ_gugni0F0nyVX_RL~G0j^UZPuz^5%T65mg{yv=3MDB$M^PZ)fY|{eyL)b4buOD
zXI#<+=7DO`&+3h!5>cUF9q*$?rE84@u0dmw?yY;ObQgmJ@LO0-*j<iSGXUPT<Duk7
z2jwDQ!z>^sbEJfM@Hy=#j(UW-y5R>0e3chw*LJ|@>BG%LzI9)F8EkK}-$zP%jreVx
z;<`L(nA-*En?k~Bs}t)P36<hPkiHT<s^}lm6&b|2TwaIs+`V>@2(rxrYl=R?I^Hsd
zki69~H&03NNK*Yy<L#Q33^96peKBGJd%Y53dKaHyo^`=#zs2kNqqk*>514SG!iW$W
z*o4K4BtV-9_$Z@5pVi#<V&+@_wWX=-=Q9*}OD?fil8*I*$JUabBq2`A?b=kTNM3oX
zTi(MX3TyUt^K`!RMr{thOa9iqG<0a{{F%LDJ=;*VtqatJgX?O)jQC=m_ljr1<9O=6
zm`(ELcBmd7n#%9dpi*F2TfZ!`5Z^~Ppw*zGE+%zrDwm#W=*`0^@O#U5hw{rL+ru44
z-6aJVkq`6sd-cBZ9VZHPO{t22dB@G01fv%<Zj}hEjt5_p6>O7a9AECWtvUp#5&6KZ
z8NTQYf;{tqrn{H3vD0(n2fPY22ni22wqy=?UVFuHTQTi<vz5R^#BH;x063dL!R@~K
z;CR_<cKBn5;7^3_;m^tgMBSRA<9K)`?g#&lEbN6%d|bFNrF!>=8z^6Q*n%6IF6~GP
z*H{x+9r$<$W-jnV`m>P!3=T;bC?*CX>%0~~)W<%0v>>IV?1k@?J_P-K0=M*^qM5l6
zWWlt!T4R@~jr!Z?>#GRz@eb9IQIR%Fp@tN%kUAw{n5=LRZ@j+aP{+L>C4mfz!&v9>
ze<fyn>`vjZ)_dlnmc$$N4&1hn$Udn|_4kWzaw#uWlzR0yifB-aiw8H26?D+1tRGa6
zTWE>It9bsZTd;@!gzrvAy0!FKg*?`Fpg#_tq*{n#zgIhzErw#-COwFKlM6RL;3@z0
zR*9#nhDjrIT*8m$5eb$QtrXJKp721oUCyJU3We%QK>wt0ZI2D`@mbI9xeuT2U9~}}
ziQA0nI~tGYt-Vi1uKHZ}yxW^g8oV?Fk9TzhLrMzf2rUZCEx8sfXU?qF<7*kNw4)ob
zxPr0=WhT92G~Ye^Zdk!Y7~8++q0LthlI<b-b5qC1C(5lYspBYhb2HO@<XbYTM92FH
z2eVG%iaL+k85>pk^GZSh#2;=x0Lk*<dIR+`RH-mt7Wg8@gmX5p!OkKJ`~b$Dkquz(
z6ncJur`#?r@q@#%-gGS81u+$bByA8|6nr98OY-?kk@5XT8o<&=3Y@M2jTMhGx%<fi
z1<5~-^SUT^Eq5>2jAjLaxp}2Kc9HR9EqgMmfg6s@f#y9mk$U}_9{!A~OGvwj2XVxe
zt(mo-%P9r2AY8gL&zT(mN=oaE^rko>qK_RqS_b1*7gczGa1v7Nr4Kj91t)Ia1Rv6X
zCUg$Jf!JTaiq9UE+B)79ZlZDmk=nwFPUd47zauuHPHp`Uo*B5GG#b1LrP;UE1EGz+
zj8Ox0v$Z;k8HSc03AUQ-HMcIvA5mbzlj1L2i1w{?rR%)xN>7UzCCXr|`E7<!h@O_D
z`Zj^|Y!&?;Y8)rHNTELJjC}wI<kpp8x$b3eXZQJdL}`<V(jSj%73I0mTXO}ezMz`h
zFu4>?4r+7H)^uyWF@gg$Gc4M3K96e~N5mlZGJNlvzvq(*d!xx5CDXv)h1YF5`p14u
zfQvy;FkQE94EKqT6b`&k3Vu#w@ZgHJpVuu~E#3h{(w#+qT1-V&s=oj%def#SL@i{%
z`m`!@%Z=a;=S2!?N|+&KL>l*7|L3rtxLpl+=uNN9-)kUV2MYtF)v~av$9pt5QIhwF
zxs>%A;farK5hO{DE()029mPdBYW#Y-3`A#o$y5GSmA{$gqSsxa;tKYkmCA=KxM%);
z{H=q(;QDgY`{`nRMSJ=^sYtO4{$IGeX*(Czrj`e%$;in^bkG<@P`5wX=^K3KNGVe(
znI}rDN1xS~i=6NW?k66#?tJzy4dqW4<x?Wh?XpP?Hm?Grcx}~PPsl7vjiqXrs;nOv
z3%M$A28nSqaF%znkfGm3CIR|TPbz01NRtn?HUKcH7`5gLt|*!V`d#~Hf7jZPUbko*
zxOZ@C^bTv`mnF89nOi0VrRBXCax*4UomCi;e-8Q0NJv+a61A1bM#VbDv?-|}z9BFl
za~ZB#vRCEM4iID&<#Z_NAik+z`Oy$=;OQ^UZ+NhS@DU!1*Ao5GFYhKkd@oBPIm6mB
zui#-N9!Y4C|9`H9FnCl$NMK%uI3Nlxjjw-e-LD>!|7-c;Fu^lQI5g+oY`JzM?#vN@
zBz?*OS-(399oJCMeDyA%AJhFXiLmBNC--My#A%2%I(RU+01?s2-$~&&APF!PD;Gpe
zehv@Vs;a;zkWrJ_24={;XJ?;HiQM<Aeg{k^+#OS*MOa_Y_ELBgAy`(^_eFj4qloz|
zSSt7mr^}{oew)9V0>T4ZvxgVqjUeLn#`tfia3-i-BiAKm4}rs9_eJ2%AVGj|1A4>8
z$jIYsZo|E^d)@nVZ+t04plc18_asS$<mbap>ZK?4(77cW3bJP~q^q&9JYNeVvh>B9
z3Dfs^*|!seCF53U5as&c+w}B+JWzE|es*N5dpHufb2EC0n^-HY+veda;9Z_h*u(W@
z;d~CHm3ZK)fbuqHEhP^JLez&`vXO%N1#<|L+xXp`p0Fz_ZI0nklLMCsXsZ2a71))+
z$uh%0f54;yMmx>jh8XJaR5W}-ZFNB2D{wdF{a`-?GU${yXA}&OFi4keO(O$LSVyO|
zPR$K--6v1%h<aNr^F6dJ4Bw2e(BGpTDClwt9^4?(T!+J4UVm(x4apB(`7SLfnrQ?H
z@gxbPOrADKdL9Z#o=sTQjL`POQLEi_HhzKBWWa>{2W{+EgT=Mcj+$yp=FS6^$lvd8
z2|}Q$h6f!C3;34iT>fmSP)d#s>Y|-yQ7B{D|GhGzB*RF;_6B>vbk5tP1y7tZkA<US
z@cAQ-_;|-0xAWl=N+bb~0hi??h!W0$Oi8V68kC7a6m)b$4(%?3u{6-W-d+>1b+p^v
z<Cb2gd96FBB1kt*9s!leziesUPe`qEHXK_+*o4UllU)rD&$N>uRleX)I_cerc(+H2
zxlpg*Tf#O-*RPg2;)bQ!RkdKT2kx$;8f&K!xpvtu)bLU@#Onm%v2+1l(a8ToMhD`H
z&xy=1$`%!JGFiJW(eES5KJDouSzEPoU-&rP!hLRHO(R1rY~%@DqPlXfRk3`HM4Fsm
zl-;xRydv4m{5}qo5z5|2a9*oZZoG+|IYvLdb@#D^>gkvyfr$FU3|}9kfDNEu(WuE`
zS@bpE*+{VCgFKJ(8rgid+4;!w>C477`8myq`yiE+&s}q2Cu9A}Q(*^V3PMk6>HTl;
zzYx}js=?Sy+BVz_ZzIscnE-Oia}r>TJvJ9n2MG(^&-{@vXDHvkHIv_0zh(oor)Mi}
zdu1|fYF<eGILo?`ZnxF7e%5i%o~E^QhEMXrn1FlZFD$%URaXF@&}={H#m7Hqx$8vs
z&2I;p={SzZ!&g`45#3=>ACo;^RCsGYZA!VkIe5vOwCr@U-UID)9!lOTZyqiA{WOa<
z{p5YsY&#vK8)}K8Acc4!`sa)XaA6p+bwd5n-W8BJAmxNI$dqZotxx7z+zjokft|<*
z>qG;(YgZY(5BsCdqtf>b7e1G#_)>l7Z>vJ>aH!3j-_7IcKwwcks;Xmq_9lB`X7ZT-
zZd4JS;f0%zC4X{e+{!<&Yo6M_5%Cqtr}>}Q{S>b<-*(6^Rr$0ZkMbSuKP@mJ1<{Ca
z5bgzca{a^Y%Hsoio2<OBWuCh_4#Js138)~a0aK%*y-}r1!<1R&5-d8lblw?Xs{r=v
z%V&usL#GG0j@9`J84uHJWBm3#-_Xg60R!k5vhWjxKR!aytjLO{U0P*tM&mCCK9hKe
zE~7zAb8Sy9%%_nPR{*cK7>038si~=n18Y<ca7%>LL=>y5z+HT@7IR&<fNjgM>FGqB
zFNj=LeZp5;@lE;;XOe_#Oi>!DOcW{r7}gz3I!4;QU`VhSpSUprbA#T094T<%+92mw
zOCD!iT8RF%ux&|yy^1Q0_*g&U((hrweUgCL4u5C3FXcjKVh4`N=wb^Z)^ZTPZjfHw
zt<1<^m;A-%gyw`(Fb*Ent+t`eygqwNNC26LW=b+Fan$i`)u%!bii$r+Xwy-358<Ha
zxa!DHUE1JN-iY=lFzSytVu65Y29@CPy1S0n2{dqvGXT&0Fd_l|<iPD!e!^^egVe*J
z!)pu_A7rx3TYH*a_zYycg+&Hgwe~*(NL7#&I8Cq;TMO)mp9Berd#~=0;iNJ6sU-M;
z-0ghFwiMJ(jV7Mml2Uh1yQ2j>T8Gz8pVYyp!k&YvaWfhd?fK1No@+C;C7(4y`cE*D
zReUg4MUEqADDNv0Xx(_y(o(7xV*-Brw7w3*%dS|_+5E@>%JEfH%5q?sn_snM<>g)2
z&$DWEQ|uc@_9Q99MI(tp{UnF0Gb_OD)lpRhrzZ4G5Ytvm2i8+20b8bHKi{lqjx^7i
z6s-+%>^B$AwC~$%-i;evs`Zq5XA^yczO=j&>$23;?4a#1+aF!2_zs6ZxQl__@NqD&
z=(~tr4QuqYA>V^}+4PcraPiu-&WEgC-@VXBlzlmiv+*JB89%-~2v6jJO_LpK%mqKF
z1fNn8@K`?u6xwtcKIfyA_D53hNXQELRGtj3^2@mFUIV$;1j)fw*}FIKaX$!CgeONb
zZ!Z71>*%Nu@;S*uyX#lFM)zhbGICNh=}IFuQDDh^6tSBspXGDe9Ba;ClRB9DdNBdn
z>l!iffcIa8nKR6dh`4%CFpx+|{1m<rnc<31>o<Z5BoD7w@^KkdZn+b7lZo_{WB<hZ
z3ja3O^cISqvela@XIcs6Kl=MVl>D7#S>g5kGuIFt`&UCM*9qSl4MiGFT*LNA8|+6F
zXGWRy(uO&{-%hb^34w3+U*;u@=mKnffuUNEr+*S=Mo0XVC1YL=o08)8qED4ujqRPN
z87A|j!>@WGQTMHCUQ)#Y$Xo#mYVh3tQF*i}c{h^JF`427!a966?*V*Lb62RmS$W-P
zt}YItT$_>F%%#Fg_-9)Nbu!D%0kzXj#)a%p*KR(Sq_(B-@eMY3ReZNt#zz`aWg1^l
zXQY;Eso7};ivRbM{ksAGe!x}`UOpZ>+hTcH*sRKL^@bD%(Z%V_>4>;L<Bd&!FxoMy
z4>0}9?<}`A4{a>=2&G9g{dWTuseP_DiIp?+?38B?w@gzJyhlH1%bV2+A0F`367#>e
zFD)22AS*{<NMd7jzWlNznf2_w&;PmoXfu#G|1zsy&Qg*7^?#2=4;q?Wf&q+_Z9h6%
zmQ264lDZ=3C}%Ko|16Lp(S4hw`5t4OIM6XX;i1Aymu~^RdAO{QX}G=O9*%xt|Caf!
zy8mLf+Q!y#zY{Jkl@k?|Xyx~cN={J^hNh5GaG?l}rucHe*hrwskd$2k@$jHPV`LsP
ziZ+k>fPoVIk|0<U_5a+sR0<gA#UFskq5FZISq+M{>Rf-}ou|*Wqphs|;RG@_6;%<B
zn-PkXyFM@Vl2p+EuGQ^ATGUwZEx59ZJrFYT?2Jw<#_I2^)~lMD??kbZ*&jPEGgMA$
zjjoZ&R?*X6@WxKH1=x8UeO)^Da~9sq<7rHb;?FgxGL4#Ule9HquqmED+ml<UF7eGL
z`;GkczWlpgq~gNfSycY<Z)I4}o(3Fefa}irb+vRXy=60oBV(Mu@OKRGF~93@Cpk3&
zeI-xpI=2dq`VH*KA0<|KB^ktz5ZH}pCxnw+oVDM#^gRgzVk1K0a9*ZX-3+*1IlG9A
zC2*f3y7<$?RI>j2GXL{;|M#W!7p6cv_FQbo2#h857mmYZW-B7|(k{NM?uUqm3xoXz
zI^dRpK@t!CsTs;Wbhu9O4m^G0&>IW9SIe#?7J<nkHLcL-{~HAUeJOD3GZ=edqMt&p
z|JR_eh}xSuXc3U%;co*gqTF5>MkCmAFo#-m?rgyd2_cjW_b9N$aDfO0Eg$vF_my9`
zC+~W13(c&3w{A=Qf6}-A2{Zrx!WKEncF$m+f(8N=ESnrIdJ_ctBUT-O*$3nVulmlf
zzmq_U!@f2}FGP*l@zH$X>d25lQA(Sx9iYCPG^#ox<FIGhYFbfto0y?ZG3t3$I@;R)
zJN%i+-GufB`WStUQj{n+(Sc7H6?6J|&XmW)XvN{gi=KADUWvS+!~2GGIYElJB>`@9
zv>Zy<e<`{pj#`*gMq~X~`+YtRSrtHP4`h+Q@WzN0F^v}Fw0-G&kwr(3{JXRm%ct7f
zYa|y{Nm+vAa7b!YEC}Y7t(q478CFV8MfP?bSv{SKW?D~jIc*)Gh1F;Q<B2I2FRqpf
z83;Fd=*-ELWwjz;>m|6T!e~!V1)2`KUFn;!|78Ir*)<|1nm2uOt+F$Wd*v_e1rizx
zAyl3cEx6r;sbGSrW5X<mg2Zypkk7axmw}3Z`xl+bo!#W@h#C^B>uL+me<&1|Oi#zg
z#N}#<rO-O(W3we6Jv+I(<ty)fuloCBd)r(17(?hWE259i#OTi_DrPHc28kvre^Sn7
zX}|BIR1R-g5)BMW*-?DI$5`Qde&rH<rAhDL85S^;Q_2Tn3c6?;UN68b4D1$>`StJ&
z(Q^4Gx3(cc?F-*@OOjQd{Q5V1u|z6rtLynvihK^D9rl(fuba*u+8p}A6llaMRFhaB
zatRAqP1bj^O%<$taaE!j9cxhs8VlJ5(NcL2Gj0d;GYcc+af!USb7%DQ#iOOkyGi5;
zO%Vr&W<0eANwp^ljKd@RF<Q<GGb>^l5v7X%9bEsODACO7r`N0c{Sdd0c!%r`Su&%K
zLx2$@sR+!2mQJ@QkrrScRDv1MR&<QeXbA{BtRn|sc)gI!wAHs<i`KH-<1s>F3P~Bq
zY(&8Yam9-VbO*N^M~A;>hr1TgNoi*;ZznNUdY8;@C~0FQjMh}tizW(Ad|s6wI}|m4
zB-D3s>Qe+WxI>p&Vk?5azkTJKaMF)&($DlgEG&x#t)Yd>ma=t+W{KGe(RcydfFZw`
zhsm6oL-O2afpnMs9%XQw-~WdpCQYN)7dPZTMx%xMdK4i=`z*#S<5V_piwuQAM_9&Y
zGIM?}oFfPISe&(<#jJnpwSA>-$r0+nbtGNxxk6Eh;Psbk;Z-Typ9G041t}HkrwBz&
z49lrMByiofs@$fN=g5r#9H_1bAk9Bo0OG(fYTw0BvRH<2A&PeSPYRFP#GyjI_*Uc#
zyO5$1T-|J{x38q(1o^BN@Wdx6sG0I)dB>lWVc~YsEH0so6bpqYnKud-?<Avt1WL}p
zsW;E3>1qn%3LokmXwXUM;)x+hwiSC3PHX&$zd6#&ukqMYEyiB?d@8KvsCshoQ5yP-
zRNOXs1g<Mu-Q)%>H+P-<CSN@%-TAS{sL!-%<<|>~9H_0w9b(>8v5X;+LS|utG7V-j
z8<+4ilQxmn>`!CTIk@B-X5HkTX2sS?RYEoFk=hl6Kl=JgOlFWY9+${?4>XxbeF)0P
zigFC{=3?X*xIG!uCic#EMDFfB;7V>D!(xry6im|O6xUk0cp!ButeTx}1tS+dhVDDK
z=TbCr9^SZedG=t6?V*QmwzOp2(ei7m({m~%Yj0If@vKC(+8$EvY?NPbI?@!r75P6;
zW1$`#V}gt=Z<-4(RYUhv;z%gy!M#36SHZ6X9MIAyrtV<7>Ee@#w*+Vk=5ZV*HoAGm
zNuJ;~JiwNE#J)WjqY!-SF9`d~0qNe7p6&$<+}!?FeQl_RG9OGnjWQxB+%D}RwOx9;
z?8V7?JH<ltpLm7UE_|EHAGy~3u9K;KqRGz{rMX$Rh#ce2GAGELr7PFf-7+cX8E7e5
zs(yUg^*gA8-{bAFT|OlEJBimB-BIJip?thyJe`ZB(bMR-@XPr6t@R8%pPL*`rf`2v
zxd!B9NL+XQD>3;(X~G^0inQX<RkCA|z$xd>O;68w?jImFg$m9Z9{pvmb<uf~@P0`3
zD6c@ZEPE*`cxpIn0xLExw9RYX^FV{s&2(Q6k5j+S#45ZgL##&sRoTnGg@k*V^Zb;T
z{VQUzUFJxlnY9qLZV6lCc6{3tMWxKn!X~YTA7YSapI9S!3M(rLwn%5-672C<P|V5r
zsFW`A8?7@d*nhbUh`s$!m>fpzM-edjD4l)<1ljt1kksk|ApbDED_xQL8%Q;%mB5hd
zw0f=Che^OgS-<o5_a9+uzdIc>F*G$YQUmg$(?BwZWwNBCBy{e;X%-lX)Jp)FpuHUw
zb1Sn&%6;c~bSrJrY?So^o_}kxGcroRS-bGW+cAI5Rt`kFTOpv~P6I!#iF;a`*q|!~
z21qwiQWSP{g9(faSIP98$Or2i(Y1!6=8=6m-X4Q-3eEa2i}_vkq4O(Rdh!}3*tL);
z?5UUUZY4jonxmzQr(qXgmw&rjVebqr5LVc;7Sqgx!53mpeii;Eue@Ko<UG5=vl;RH
z4A%O~fosRvYiQ;8whvp~s3UFThSE{9o<ioB_BMj6{#!>MVS}_$Uk(|pxWEk#;g+@x
zDhD6k_~-ABCikv?-jclbc}5NiczCBdbLXYX$8{GZ6p&^WkW5k{E}D=_>Og*35My6c
zIJ*g7o6#)Y>+n0+0R=_tdTHCYQX;FKayYl*I<=e4SxK`wEgrmSP&<KZ+c(T6)V~8y
zD$p|+UBfmXt1<P68F4}lb;SE1)$c@C*BcYWsEdB@{dhglqS;>DqY))Z((U`^m9#jA
zox}TY{SP;-t~+O<CnwqZQIG$H`;IXPSgAYNX4n@&CBCz6TIsd(3a&~8Y`9yyS;DTh
zp)U<KoH+o9EHZ#B+qMMcB?k+@VGae|-MpLk{D7BaxZ2^Py5h9>MYa3I3TsVWn=1uj
z%%TY0`<p(Y2iun@uE2W0fl!qaAk@F-IVs1b{Zix?GRo8v1?1Jg0*80D&&N;Q1VayU
z4rSV?m9@_AMFw)JXr=>XePiPh)k`~Y9YK<8`CGDj(~OcPVr=wRhIOHjXWP`vRlR#7
zrub)sx{F`VjQ*^^xD6X?X+PqudK~7MJW}&k9b;z?hw?M{QTqB086CMDV{pZD&WY5?
zs|g2Xs*5MevnK_-7LP1qbW@UKHhjjD!5gVznx1RuMkKg?VWS_p0~I_$H;bOy;6Fe6
zVL-|mXL?7MnJd2Q(;^a*Qb*3nDXeGE?NXs<Rlf&^YZW`}EkMH@w2c&FGAS)k)~5`e
zsIwKLX|@FZkdXfKFCwkwpovrq`5eVEQ1|!XAU@HhqC;X&?3SpbZ~4qKbLI3R@$oIY
z9PYhIuv`2NcQx0(nd&PmsdvPF(tNl@Oi#xUBEpk6<DvQ{`Q_#51xXoQh%gv_aT08J
z1ky;Gss09_g%E!LbKLWffYu77GE4)n;5NNl<!T97QfJcv)~0EIoqTOU;aetuq9X>r
zfX2%2*S=}H%%+#5C|cZ+hKM050A<&MH11U7y;oLGwCRE5BO)xW=VMD~gcBzt+{|<H
zg2o#~P<Znr;<X>ltwSe189ibGD0@MknOaV1x!;K8+9)w;u*Q{k@r?GFMzZ0gUxA|&
zK4S%AHCaXFcaElj87@_%Xq&#Uvir&)m&PN9-^K3P*6FDTM6k1pO}NB;zcHcXcpLla
zn!*UDH##4v7s{fG)R7UjShJ$wZr#K&O((Cu;Y7;i1~9*L;$W9$e8>C7xAB%`^tRA>
ziCbR(Pf7B)>$O&1C#qa<t--mMnvCg`-iZJFhv|HC=#B)CI_xmHL74)7xO`t0&db*y
zo`DxAHmTRi)8}eFjgR@@!mb}iB(|D8t>20AcU`dY$WB3#_rKiUr;>EF_i6-i%~a2T
z^`Sf=K3-)9<q)k2OT)y!CvqH=2w}G%R_;|w(HN@hn-e($1n)%z5L9XB)&v`$;vQXB
zDXLo0w;Q0d@8ZlX&*kUtan#XRdG$6?$gMceY9|N+xe@ovN}KlWf#!tMR?#c~PH(iq
z;kLH_z1V{apKXnUG53DFU9XpwZ|#;jX0^_mD^L>}8XB}(_M0pL#T<I$DEy~%2_k*o
zr*rrr_Zcag8stT@<#KTFcu%M(hG>Tn#O-oF>0A0!eLx%kRbX8{<Pa}~ivO4H4m9x@
zco6v+$RE91{`B?`*bD#d78o&GYqPhVsNsFkbcJfR2tkR2j(lDXa9;)bSX;@f(8t>@
zzj#~Rl%Cy0Uu-a6@qI!(16QTa{NbLkWdYHx?Wd1~-Y`iMjL<tG=WWHvb(6rzKj<DR
zavTYI^7KTkeb3y8nb$60AdFlh9bIiGH?5DxF(wl(W*PK0MXn}Y=S=~<$Unr|+i^6k
zs9$q@!&56`f1S)SyEYa%!f+SZ4VU)X^^n;n+UN#JZNN6DOx`lofadzw`XVoafJq+x
z5^hw?)gksH>8iEpt>5_Wnm;@f&Uue#F+%-I;fJ@3F;B%ADBw+z{CyP?WEdMa-|?3m
zdIJ7weX6#)&Ter@`R=o`4?k>%&ln8le1YDr+X`Nr8@D$o?<i#x$KRwpT&(qNun+SU
znc+L%9@Iacw?E!n=jfoe36_hcxtWfp27g5=_Bv+E7x2(>`;+I6%cOG`rxUpW_Otv*
z;j!PA0FD>FKQ`Q2?pKFpzbkaxzW}=B!t?7_zZPs;0fDRcX)mTK@ML)X$`tV{zXmX?
zWSe_H9D6tg8vW7cX%xAnlaI+ys>g?`b|%nFWY|u8l_gF8R}lVX@D^wWj%Yk(t6=z0
zt4D*IpNTu50`y)Zz1e_(N5~(+66?^ql;3dk=JrCah8ezP9oWR~18hgyE)H9-mI*-%
z&^mN%i-CS0CTL~cDFa*RT?y?f-sU<l6w{;gN`|4bgv(?&ab?f|E-XL`ZIiwGg8j!X
zM2N|K&2XEy)K%_NnPxA;`^;6QDPPC8l8F9zJ}m`hcO|^HvhhN?qff~-`BNGH&Ss+Y
z1%zHTrWL+cYuM_j?f1z+dd8<0^`1vLD7bGFJQkln+}t7XdM!cQL)#1AOgp^hd@ZRT
z=cpIU9juCTU4y#vraDu!>X-Yd79O{;AR%o8B)$1nXN8Qyq+1B3!+`0bUKjmeh~v%i
za}Sc_4$L7h>v!l#h>CNc0@osYTUa5I+6*dggk7m8i>!qT7PMP7a%Qu+{Z!+rNt^L$
zUk&WfYo^Tb2Clw5^oe3e{OE}CYKFc=`E*R!$%1g;J@rdyO5;sEl$Rq}oL18LQi21$
z$*gwspKc~>hQ0%q3blbFG*DM@kjg<nJQ}c=nt9ogs$F7IIq<^hf*vD!0eRx`!O55T
z?Wqp}(N6ft;Y(D(>s7noo0$G2(^W{z^}zi|^{?}`)CUj(@pen#aUi9kp$V;MK5WYI
zIK;PV+=?$pTbSy2ejj93af7Tsf|VG)3$H)81*GZQ(K$-c#d+u_kJS1Id@%c<A;>YU
zD(u4{(FN_BMypGmuhbW#<#REG9vn+eZ0LezH^$$bMa*n}0g_G=9$W$D($y121k#mf
zsZX>Ab2pZ|w-IO3>Xx#;T`<em%hY<VgmP?M2M+v5&soFLKJ&yIUY`xkYfkm6UVLkI
zZ}4*P$)@u+q73_eK}a|NC@O)C#r*6s^@hMPv9>WBe~JX)Tx}cxI6kl1N=x0TmIn|^
z-(NdaiM%h+CSPf~)=HOEP>3`jX8H>qua8Jo6@+b3q!K~&Cxorh=DE>CY>xDyZ^%Yk
zMfmMe*w~~}surdA%TVfZO`fEHRDI8l#D#OIHk`{)gXz)(zVhok%!$_qWBB8pj2YMK
zD_llnKz5@WPvlmmxN1Ek+Hc@q5+e0!c_{e%fulK2Fpz}8e30O@4PAXIb*;phZjgzi
z#P5od$zJ``n(TT8pC$1cHUIDx)%_n$6Q_vw+?7hz-wUO~i3MurCUr4Z$?Tzm<M52V
zc2X#?-WZBgODge&8m?D%cd+JH4iP`t1%J2=i@xC$*be3y(O@t&MAWFQZkfZwmJH#m
zj}wB@dj8k03IJ#Bulpv*UNT9?9D=gOOBcU|hUbaf7UMwA^qZS|z|<hsDYI>T#V&;h
zjBsEL>l3a9hjVVl8<6sy-f%Tl!FpK$WSCHAyCi<=7pZlC)cT7y$L<r@kD<NVb+B3~
zfZr7#=TdHU0p{d{n;$=a{a#4PxoN0g(km+rTNb|yKtQ|@kug+yo3(hbT#^=ozXCSH
z&O%#nwo?d6W&eueHrU*Nj#Dc!!K+OAjK<>X()xy*6PMnO1(w7`>|@>PFgeFJLm|>^
z-fgcyFX5H|Pb#A`HhBv?B>O{n@6dj-8&jQ2{J2tmWBC#cLh&B+3{ZXShU(*QR}=cS
z)K-V}cp2IJwhPN(GZaGGHs0vLxB_3Ec4Ro&)<Y@<v)|c|rGl(clw=cer`e`A;M3sC
zIquBd0O3N+4$EHzOV_7;@<9+%MIXpVRS3=a>Wn}2YspC0_W~nTEM*D^_+K<u)%Y6H
z=V$$wNw^KXUV9ZDtWBOB?j3t~vTPiuZryv*@Ulh-I@6sqqR>i9>!0%feU>p{NaRhk
zY$D}X#FHT__e<8z^~@9Vmx>z<@!2ftYnos8H{&=4XBf8Q{f+(gO!pqwFd;$N!uz$Z
zwUT3Jy|iH2*xTcrz1P{hz5G%rJ`)&US96msY*%L@ZaIC4+~sTEHy@=e-<~b(Y-wmt
zZ6w}`lda^d=$M|i|EE3s!}-vaH>`_;E*e`U&q_x_I;g{{SvY$i{a3<-K#y#(x0jWh
z*%{PuLX4!OYfOe-`uW*<ZTln(Q_M3AjQ8M0bADP{JzEp`YNhGPAm!_KDYRyv7iB#U
z>>>5bTna)#GxFQHQZaRZpx{v@WNZdDt-77A*R5e2U{?@<PoNx!AyL6l^>X&M={_->
zAGO&xt!MUMN^bDp3`xfXe*be0%<MBq0J<PzV6YAl-}zcRdL3UG^Zw+}JI+Jq0B(O8
z9xguo+GDFZP;HhNpbIs$6UTEKPLAh8i-G2(qJMGS^b@q7#bjuUD|w@JZy@43N;kvC
zKZ~hWT?Yrb&~G&RgT&<hg>R5ngSHR2OSuJUy{#ww(5QXC8jMhK-D33}q^+3D7#eDT
z40W-+Kr1xdoN+R86}L%`1p_X%gWQukTZF*V*K?TyQ+X#mJD<;qI^1MZyO-BS>>PvL
zXj%4sKc*Qd$2j?|Tk~koqjio>AR+i{{zMaFJb~T`595K22Wpff%-cE3m{MXp?+nw^
z)2n<^uPxtL)qDtHoLj3l;&(T+ii{+B4{hPjh_E9Xtk_O`)*I=2l=vvzKS9Stb@cdx
ztE`)!Z|#KyhQF{Y;_*FR5T(n>&)!<ey(kX#2BN;++8LFSZu?c<r>+l3Of#wB_9rQ@
z?wYim=8bQ`&b)YqN;rm?Ugr9XS)s?9F~8$Q3~wNnRLBH^6D1SfBana{^V?2~%VZkI
zhdF-S)Z`45Nu9g}7$}Q=Jbi|u<WUOnS>b2+23L46l>soO5&jQ@u!~R`L(B}~Oj$AD
zC9kP`AcN{gU&K3jl{z{seyP`fHmS*{HreZ-l-KW1Q=yiq&};^_>S(XlZ*$-6-06gD
z8y6$jn3O4`@nQ$N*eCH)XQO*e9ix;<Os!^!TJ-=kj&@N$QSSH(q=gI3+nN3z$p?nA
zquHWO#GkQEd{YZCHs<TK0}Yk1xehYi<+Dv*4@ULlrCjT^U6v~Y*|W$`b5gX@DO>UR
zeBV&*x=ts8-!#V^sxu5zvOVRh{m`6#MeGaFHT{P7g|Iq^oxLwSh8Z`^*D+@iGdl4}
zJzPS@?6uiHpA!77Q_)TinY2QS6$drOA_-#fs-3`UP7_R<TJMTCi%58Ay3Zd*FMRuO
zozY|#LyRKZl-C-OZCsUT&kPy9!kE_SAY`nb*C9K;PI!;c9Hpx`O4&6bZ>)GVt2|(t
zld@;nhT7?j^7zA}7uu#jA*%s_4Mh>uZ0qp2%5KluIf5f7lB}DAEl1@sB^xZz7X)pQ
zht^M%TlqyhSG85`xF(Zt&M!D3tjEWWLINVf6tdQOc|p0{gHfp_Pd7T896}!WI7%YR
zL&y1RlEA+I>?Ru5o%Q`Roo!(H_FS+4bx-usk(ed<f93~&;eNEQ88JeYJYMiP=K@T~
z+r0*Q=|j^mD7&xko;u#c$;?eKFoNoM-H?PL=2fs+u8a?uUsR5hCK^C<wqOi2eQU3;
zuSMbUmfEdXPk<+FfBaj+V7;u6DZ<t92RlU}ChKARn}7@05hQBw<S%LHH-@ck8?W%#
z3x@!HDu<L_AG5(2nZCh?rpknQ&q;7Nd0MZXF>%i{Zf!PTB)b&|MG5+MOoPQEN>kaS
zHbefx1&XAvhl|i>hykBFM#=Lq1W$QzP@zNRfiV|>6wLuL+@=<$X(v)ol2nO?C=REn
z{P0>(+sa>ivaQnGqA3{n+gUshJUQ&F>gLjX*#1lUWM=s0KGOhN4|nKd+Zw0!C4*kw
zXDB1Td7*9+wp2PAbqI_<#TD+SQDaP#3CwTT)U1{91Cs}ZU*stXPs$Kshv?7-TX0q)
z7WRiTS3K?ufwQ19NG41yZ>F)p3~C>GiXwb2XrJmJ<)34QX_|NjEqvs$tSeM0u44=J
z;z*`WVNb!y@i^%hnNk|ak5N``wbv<+%<0?NTYl2~*7MaQk+GTGvu0z|5V53%s}5{T
z37V%Y`iR6?;2iJqg(H6mhOfYX*RWW@{`UKgB<c66Dk%Y^{~BAXV!~HYjD4nS;he1_
zutJUB39J(MHLf@Cl~M-NtlcGm_PDi*c)K@wM>OHdH0X6YKy_x$T&W0$hovgooYx5o
zfLly<tv0^_MrwqZ{1dqLx45^fbm#0`T(~NY*p9$^yMmin29qW4l$G|;#z$z@GSxy!
zNlDY}=BLQy&NG`MgbYT8Iib)h`44}9qioifr|OLKwPj})t5ywz^KZvlLBzNai|u;W
z>x<yti*_q8tFj`RsJeR9B=p>unK>rFZalquH-p8nof#e%N$z#P9<Wzw|9Fb~Kkkgr
zi%In;%W>rriW_lQEzB_Wu+>ifU@XjXGpj%yM)Sgs^!j&-`5oQCYWU|sl1UHes6Y!m
zyWqFZX%XM9|Fb!qz88a!RgiTmOn_BMryo5wn(Z0zvRm}cPSu@~8V|I_1SvaQV%m1D
zkoW;F9_y1NlVJv{+X8($XOI`y(A7vvJY4u2AhU)+rj}AA4r6ZlG)g$ajMVu$x6^U#
zy|0-q|6r{Ay$GSr`e!<-4V#QM`R~r4K_F`QPta&@Pf*W=Kk+sRZD?1nvx0~gvK0tL
zlkT0DY$f>w!HIliSt<N;b_~&<neZviYMMLOHQL6NI4XH-dwY2>N&4Bgssp|&BZk6U
zdEW7^QOEP}U&(1A?cP$@_)V@J+}JM%mHOS~@VECb@i~Wo?P23|dk=0kI6;?fx?i@J
zX*HhaR3~fDl|HVYPEJgGzAn}BKKL<v_!^LNgS{ScW((9^{hq(9CMP4aTeU32k*zeR
z^bEf^ep7~IZ~F#2%h-@Waw3nM1b-xNgW`vn-`9ta##-Tb4FB0bePEWocKZai0ioj}
zfVe4!(O)<V61#kkts}QAr@l9j!n(W0IR-K>^XB7&73K6X@6hzVt#tA|Nv^+3c^BF{
z{Yt>4<p*<DdH(bH#V6gPyaX{riRus=lGW@~_(t>QC4hbm9Ii52YUkn1zi@A&43p#m
zN2JbSa3j;<WS+?wDnelOyMOIMbEL{ZnM%vX9n0RD-8P`BaN6;vA*VLX`owpUww8hs
z<$`0lEO<P|X}#=X^wCxgX16r3f|R{=D_+x!uB-$8eplPY(8=Fxlm|+n*<hoOX>IbA
zVz6mBJHE4v-VDNExy`aS0~EGyG^>LEq4=Q?i-H6JpkOZoXr{oC_v7tK?;uV?<qVQ+
zqV`IQ>lyITyX{GO%44ISL7#Osw4z~fvh8_;WuPPFEvJJ5CdSdKju4}q#@)H0{!fx$
zGX_1LwG)Ga2N6~X19YSZ1_n+AHYD>WKA}VGYKZeDL)iwxasPgr*@bGh&SxdqhKZHz
zfqEhcCI3#rJ4&i?J`$V7Ko~b*;+d_m->ayBJmX)e8>SPUB`P1RzR^p?qd$l2g6=#@
zS64v!GodYNdZQp^(uf~oz=~^U<`^|rbu#`}G-Jv6HAyVRzwUikGE6PaWoOWPzn)Os
zS@U1nJs-?4He&*{fQWdzHk*f3i-IQ-ll65RemlMN_?cl{`%f<gyyVXC@bHlZuqHsR
zhBdK319gQt?G+LK4l`ZFZp%}(z$3;+-lytk2GGAkB&p9XFG4NHx~c|Q$xTvGZX<nK
zwlva(AfzSg)JnhBfIa!vvnK?bK_QhpYRqC9s3H5~b-Y)=S+=D9%l`X=jAZ7z5O!Wl
z<P;8EoJhyM%!daYTECVsbQQ=dzs(zuriSD0Oj9)mEgVe>NUY8|M^U5l>h;5m#`#zV
zA-1(~f7*04%;HtL4*auO*aTMtdHY2}<){QciM<+W6>>jhpFxUaH`?II<3S0=oSR@_
zE0uTEXLGL#_|70NBu?-7kG?u8GmKQtXJryW?MsO*wr$tj8We=&6q?ZCKs4|t0KLVM
zl&Y7PJ%CZe3J71xN#oIGV}e>9v)#TppG~*|RubHv9u(M>QyEZXGbQ#~pm^h<yS?#<
z5f?Sw-UaJUTJq9O!?MC{U*9F~xV|&JI7YQ!1)$B9P}7~1%uh-(72Pa%y+47R;74(Z
zZuohx!a;pTGrsD{aCA0j7+HqTVewHF$0{~2GURhiT3XpE-c?{=EOyrq?6xl6nyRcQ
zdtl&PGPx^FoTj5i6^#>0VK<L<0tLPE*x=wq19Jvw{iHt?^e#Z+CuA&cjr}C(^Y!*v
zMG)={%L}Ac^HO2pgMoYhRtzyV66RyT@BPUDu^Tt!!<k)Vi!3i=DW&#^8y)R0LgnR3
zsSBOvqZ_PKNQa-E_wL6Lp*1y3Dzj$=hsu$j9kexw;Wm&sWsvbctk5fE_nCRZ_f`+G
zhbp>}Fk-@(zXhfMHMBy$FMCH;&Z8Zr+PWr<HR?`@&SRz`$OVo*$S6j&?0s56@Qmq+
zlZX40u|LjG)F_AE^m_)v^Q-6nS4=4;Fls-5BU0wG7A{&-hiV_JSr@xOjV{_K*7$R;
ztvFTAu39xWfDn4FYd1jQ36PEUbRMBgE+ZVXcd#|ap;`AGy6<k_=tVyJ6DI;%-D2iJ
zSugVVp_<>w#mKnD4ocSe7qHE~A!4s<Uqy&pacu3Sx8ePuY=UvCaT@Wh<P&!J&2>~`
zW~pav{%?Q**EDQ#Ki>dEPfHfC26Xf<4@5-l`~jrsvsS>(Gje&fLL6ktj+hbdzNWC{
zwX!j-s{5t9z7twF3XT{HusWJcdW>!)aVe0)O|Ac#e&iMyy=%q96Xg;Xvs47D;-0i@
zoxCeL+f}r}YN4495fU~eF}TIbZBD^GtI#uD&r=}xKUsiG;C5A~qUoas=C=MQd{EV(
zE;fA2gNLbZaQfGXO^wGJ!}uXuB57s)la?`QV@-5iiOlxbreRDJzrbCKq3&z-zw{E0
zp}9LlWi|!-Cxu}>XYUByRcAOiZAQ&GnPM(lLs{EKeZN!{#2p^a`Vz)GJU1wuTY7c5
zM|y^e3g?1&WmO(VETofOLwiz9EZ6q-gY8xu{`|Uz$HmNQvta1Q+Qfp+%d->KZ#ZX-
zSMD&bhx^L0t(FpXi=wE#RF8?-aYNp)3afL8^2_a9p4C67tfJfb2m^nLlK3O7razdF
zFhu@PtoyR4LMX^Jk<4Ws0S(T6W=T~k=8!OyCtPS($}ER;N%f2&We#*xj*X0O>1ZtA
z^R%r$cm~L*gJlaZeS!7UfcG$xqehAa@p659r3%RMaB!Wc5k4!jsIm2^=H8dig_US<
zYvOt+IE$mEod>pU(^Lxbz$!+K0DJkzmpFrVc^ipO4%d<5g1p3&sOPd)`2wFXx3bsO
zrxg}edmpuMUc5kXDC;~<-i`->R>1rN&$x_o{yl{wndHy6Z=02;;5n;*gppYC8?pRy
zO%M%u!!nCb20faOy<vM))RyC!mm1`#m4f*fy$3~w{TYq3w%a)+qj@iD)b`KzZ5a+O
zdA|@ERk;N)x<<B|_AoOuKKE%(NtLYI=AQI&Zu&jy<4Ac=w$l30On|){gQ!hQQ1yZY
z0v9iwiKGpO6Duo=7#jt=PJ@V}1?qxOK0#wUT8L=e*ZI+c7HsTdRn3`C#*j>l*5}qb
zT2%nJOb1l&d+j*6+-L-q_6XUZ-yd^$$;k(nJJ_1$zeqBPCc)_jPg^@FP8q-A<zK@*
zw@sK}pzBc<%VtIE$4obRd<IwP6kGM+RQ<Wl=c6ggwSBOJGcwK0WbT~v8aA|N&cz+o
zml3@s!Ka4(U$})J!{Q_p5X~QoZv8ymT-s>9U9@2Tp=PirNsG?WZxGt=enHBGUw<D{
zDelTlceN>4$5J;Q7ZXQM*#oylJIMdkhI4?+mLuFzeZNfw!c8U`@E~PsAtgx~V=%k#
z>D=Le+Q$A~)#VGbENmd<hi@&_;iT*Wk?ZbByI`D7dvD~Y0>A$g7cneAPQ%<qanQV;
zDl&lmA~V(2WgUNqPL5p%d7RGiT{?4*0FKj=&Fg1$_1D^%i`>F-`CC2>(kDZ!;L!Mp
z4jXxFG@g}v$q>8Yj<A_e+#=@4lM=J|W<Xhq{*vA-`|v}mI!da=<(*qh-F|U)(O~6&
zndkqsJ4j{0tta)cH8u~xSPj=(ArZyZC9`9M3`%8!?uPc@yo4gh*x4FfjKPTl`n+Yv
zQ`g-0V)uW#=Ay4P39=#)d3nF2|BZX<D6gX^j{dI=7E7*!T((p5#ZMrz{rhXjn6M|f
zJ5=8%!a;l`RS@P(7JI-Lf|El`Y;m(<zWF>MT>aDr5CAU#oHygznSf!iY<}XQQ#IE)
zqLx~lrJY$9_NM)3**!;|F`bNBaCjo=i>*fS75&{n!rF?2KYLcik9F$@a3ue{I`8#<
z>V>#c`^;(O{m%#ZUt^x)SK;f=f!JzIM*b|YY+0dq4E`qwqVYN`kkv_J0gwm)?ZipH
zCk&0dwV19Dp62iYYq2jY3Cpm?fI>iRE<1@-?6a)B?vC;vpG`b1TahrF?f=KtR|Z76
zc5A~hw8W57(k0#9-67JUfC$nlE#2KE-JMEzcL_*$8Fbh8jC=2QpZA>a7YsAZbH}>Z
zz3RFacbYa+noVUVdA{Y}lZEr8LtVipm59E}VK@+ns!dFdzpFxvX_(`2p?AIpJ1iTE
z=1}LmMjx_Hf%JA?v_~j9s<Q*UP@&)o1iW>AXAh1r+Kv7uAN)!H5R4a%*$5RE-Z-B0
zE~lkixb27EIeBrw5Qtg4*hwYVAQIuc?Q|T*c=dh?-7r%{RT?9QSrhd_Q{zMa%X5XG
z@Kv|B+@DWM-n;WVkbSFNqKCB>@LwwA;7hpO^yEL(BUq%g|CYNiT&qen*zl-fkic(`
z{_s<eC1Ly``0bPnazWWkO9)6zOq75lfH}CO){%BJzsZc_1((mE4v7=Vs_S8xZ9gS5
zz8AL}x|gNqJR`;$k=BOCmh|#n1#_TqR)Toh=pi*)fh7~lijTXRE3R=*4%7MX2()d^
zG6uK*5=-&UEb#U(?@!YS?S6qi=FQ-d#5gwuE?BEeLvRQ4oMmN`nWHL8K2^LH#hsdD
z%b;NV+wvVhYpSZk@XX!kh=@YF^~~uTDHzN%CckG0zbBW$>pka5NaDxgH?NtrY7tMq
z9?w#$i%`hNC`VcO<uZlQWaEylPz5o`xu7fip|^H*e*iAsu$xrho)4(rRU(G4q3ibh
zy)an;eDQMKXWVBvrLY?l3^qJomNnB$Cc)F6|A4=sbMQTd$jaw`+FWfWy?U@c;YzZa
zBRbT#jn3|Cq#fYHbw@jT?rB*!9c~hYMVyOE+K---!yQ<-neKuM-XAHKH0h_B_HSoW
zJL2=~5R5!Zv^^m?<A32!#AU8xiH%J^{k8J%Dp1iO<YQE!Q+1UgH#sFh=DL%CbqY9Y
zz}QO+pQO&HYA<h_Mp{?$SOS_EtsMU_zU0xeI4e#7lp{keSEnkY#)d?EnrqP93^C=K
z`K)7<YT&X|sY%L7C6a)>8uCJX%Nr;5wbv{j>vgyxd%Cb<-7*L#OC>|<==t!xNe}aX
z*W@iIdf09%x=+1ogiW_f#XJ^_K5Mx^K+m;Uup{EK;P%=cx}&$i{N=a4({jgBcqMUB
z@!swEoCquyP|>QY1OAeg(>GRE5}Ak4CxQJg^{6rWJO?RU-hbM0pWk0|Qw1~W6HtHC
zKJ{_(ehAgP!&czIbC&Ewwzj#I3%YYAcXX8F<EsaA15@G$B-pz+3;O!xvp58aGBsa>
zcPP%8A+~zs64wMQaP=Q@r#oW2#YX-qd-<n`Rumpu{S5D$Xako9a};umuzHyQRavCS
zo%IWv*G?7<zzsDA5Y2OQFzpOXl{PYPeI@Kg#f)(UzjPm+nVv}_3*@kCnVQaALbv>4
z?si)-glXxui%^iaI{%9Pvlc3OSP4Bol7g9ilPye?MhdLQ49}V?`rA;F+6r$Oj_MjS
zq=9Hv`HE)FpwCks8S<(6;5v5v&im1xpbwlD=+*<fOXJsHX4AMQ@l15=4+6jr8AsWu
ztONVlHjA7<5gz*&0%4y`wDizb3huz9FQ2Eq0tL%Y;=K|aiav>bUmgCdBpFQZJF79=
zRqtqL6duNw+Oa3hGx!y0mo|DMvlS1IUx%SqH3t>U2SjVKics|%v-a?&4%k8yHdVQP
z^$lep#v7BIpxr5*>iD9*ucJ6%Y(2A!c2OUG(q4x{OON7v`6~?whYScVFDhWuJP0}?
zaz)g?LNq`9*|q3$I!W`54x-Bw8N%GFPk91=oGy@_9U~WzjHutwmDZ3b2-TUQuOXP&
zUtUWY_5-?+Yyk<V+<ntx%x?4vb8^j<<C6Llsx%=G;`AoVrVC5=YTn-Mw$soCufFQa
zLDmoi0tdgETn}+l&9wDd1pNhqAA3*FR^kU})E(3*E+#`~Eo72?C(R71^Y%Qr&*xr;
zXP2D|T7`LqM>eCcp4Ek#_sP*tWa4G{(Tx4F&a>d)Mj-89nnP}|sz-Ojc0VUOQeb3*
z)H}?+`C$7KzyU*C7PYFp#D0;OUwCS?p$G7Kqu+~`i{RWfSso}ub^ZD$5X$d|;qM+#
z#gfgK;&;h$<PuYe{+-)F`ia8dCN5IEy6sZE(P8^P%I>vbSiVD%$}=qaeaFZ;@Nsg$
zg`u9B$tki&`W0){4rW5VA130|_hUuSH?b8dH3+9s8S2=GeQ3S+Easu>7fr(t=c<P;
z=wJ!%#9O`B&HTCrT-T4<(OV>*MvBK3EPMZ@Si7V;TjVhHXOe(AIR^;q(%8B1WMbKS
z3r0ZNlVL`Qc4own@d8Y{UDB2hxQ2A|uD{ioq1w*amBU9hM6|y8iL`U$EcR14l7fg+
zS}{@T^e}kgDM|JalrzHbrBnV5WSzL7Iqlg{zUhgiMo6|eU=d%9k#2x?;dzM**YarF
zX*H(Q{GHGmPBEwFw#o2o@$>G)$Ni7j1o+NgZ@Ec!o69di<fD2EeCc1p%)xu~tf^*o
zlg_g4T;`=xY8d*zKW^;#oIQ<P@E+@pYyY^p)TtL{k$J?2-lYMwu%6g1L-td=UKg0s
z=Uwv~c?bR$gl@l^cx_|ya9XdjTxQyNd%T{$>(pcT#hM&rb99^dso!-=-T7iJ-pi2Y
zfor<kdyXG_{gCU%(}sytWydIR(!%d<)!U*?k%DKR>w@Z1t8#t|xIgZTITegahi5-H
z>^P$C!|X|5k_I0Y5L|m7a=GC>^tnx<yQ7O=C+e_%>Ob2P{1b)s6{RAhv4T4(9n655
zj*_6-DdG?<sa=gJ@aSsu9*GL{Q`fODtF>RdvJ_lBC$xVG&?R4~+j;oDH?keISo0Tj
zBeq0bixafwoH=tRv}e9F#yKaUA=jxxk%)%<uIM#n6~(}ju3JPhj{U(-n7i{(4AW7v
zY73iG4Y#yt@2$iS#Qdy!sY!IJ?lyiiRs#fVwknf4%Min&Ms4hS?S1%fa##C60b5U@
z)1z|U+iQ@VETQb4eWqR)(Y)lm!y*xxRaewn3ug7~yDS8E?-~8Wu=Dw$MMUX|3zbof
z-c!||q)F4^fMtiP03)nz)9H0qwYMw2)Q9aomc8dtSDbp1IdU~gdNr-q7WKkoRB&!~
zf={?H5FvZw#{kXr1dt`u>>X;vc;a-U@v<)0(je@1dCZjcKdSL;A6obrK_uKq);krz
zw;XJVv=SL_AnZMC4rFQYZmJ}ur*XW>Q|JyulNg+RQTI$fzgM;OmB{&YctHge5dNd%
zfkih2ZPXCpwa&|!0j8d7@C?u3njZ^<4<EEcTTLWvps$e3Y_0xS32H^;vV^>|9T|SF
ztGE=t@$kHTXP<342G86L0L<wMC0G)&X7b+6GdjG(Wi7y3=NuLkf|jfmyQpMHSVnw4
zUh2B>%<PnW#@!0Ql>6N?f;y?OiQZ1RKgNe@knXAY?5m!C&e1+lu<HF)KatyU!Qa=%
ziaLj4jiHjEknZ(k{5pZ%j`|f7w^c6reUjJ@yVWSAFoHwFfXK!y)~~i7z7^W6dZv_i
z@wXpmD*R<xnt`>qFc!mE_IM8<ThrvTE~(~}{F)WRXCB7oBNodaznS{e*WQxQLB1oN
zeB(zKHp~<agkWZTifD~PM16o;Be6}$hwX1fKT^5R(pmY*xn1@?@T7R;LyZOeVQGSy
z8kf>INGk4+U5$hTGVbP9tqd5I0E`_`S^#<idO#Vvf9VI4If4TI*(C?HV6(srS*W*;
zJzv8|vK|mjjhcSLC$zoA>AUPYKmHbG;9YgY+TK!4Ce<-wE{>~``Q>@#NbxyJW<5&T
z`FEn_ZI_Ap(!lFaCJ0mTG2SA-M{{2EG2!M%!6c_<;U$^$!JWYV-pT=o1#X)l3ks~U
z0iI}3`4y=`KK;Tk!o3&rSt}iZHmA;3w%pvemi{Lw^StWj0>ex8daHB+(qp9`V$<y2
z(WmEhWju;PS_}+J7xvxrJGPt6XNU@IGQUlGKdM~qQ7kfhBQUH0<reE&Af**fIM9I(
zw3rji$#WC%Ihtka`t%HDqytFCO{3@5Q^?O*aK2=*#v@Sfu05cB4-OksM(sy0q@@`Z
zw#0k(E)X$sQ9S0<aszif=}ipx^(FU1rmGF)bN`he(V7UCkz5Y84Sq#~_CaP{#hLo`
z!Q|3TVPN|W;EDao?E;DHn^WI=J_j1;Ya~H{!UDO~l7geu71Ww{KqNWSmmHD-R5W7k
z11;|A99$rrV~K|K3I>spaU2RYV`~Gkw5C&mblO4WI&aj{L=kYE-bNU1jMfrc;WBNv
zUzDpx%An;UU`n^{jTZR6+MUVSoI_5(2|hNfFVs=buFOR?@ZP*%WOHH}v`I<JMb@t`
zZQTg$-CBT2M?_0%F^3zQL<zga*u?T*C>K@I=f|sa>(HzAy-tnW>v&hvqcoo*T6gz}
zKVI{7^tfY;;0GARd@%;Z?raly{sbsTN`UBd%uX*?&flLePdF?b<dVWYe&i5xm-;ss
zMO;`qWiK{j{Gg%5y`CV<+9^~n?g8O5DF*4(<cm3R1duSqOo4;HGFC}<J+bjv+GIXV
z_yxA}p)@%fNO>>mQH*a#@<C0gMV@ZY@1IWEVyq(bf10BTg)sk6Nn--s-FoOEi@t%x
z;Jbr<xDQ%E7g!&vo*U<-%{1-7twbh4<lDAgFcNX%ouuII;UZKx$33c7{a4Bk;%1#@
zJ=z`xRKbSHCc>^N{mwCfUpj$_p>mP;!G*!=nV9@^0$+U%)kxq(az*jS5*-7w*3oxE
zj>Z@qOus}OLv@6#R++jZpHS-9*94PsZQtnadji=*;)nXKpsXt<Yx5tBeRFy$%)!Sy
zi;9U!_P0A4!@Z3Ina%NpdcT~M-a2kI@1Wg_0?lJ~3g~_Tu6HpV2$O#qIj29eh<*Pz
zR&>rZUi-k^T(kqF%23Nu^b2t`HSft$0wGiZRTi&v(GQ19!3Ry^E!KD+%yczb9n;4?
zJPMsS`I#w?K3Ki{e9rx9$oHHLY4Zu&B**O<UF~UAoQEv>6B5)-{U4gu0kl91RFQT^
z`F!trmU#!o39hSvSXo?RyqXz}+FQSf+@a03CzKt0Y~ACJQEp9@Y8qfuYX6q7-l-g}
z8Eqo0&w7@5Nf^M}1lJ%8=iKF;GXHmZP4L;xN7}Jm4Ee>KZMSnoSrQ_yltrSYP69_q
z;jF_cE8hi60cEZ_qki{Hpqq<f(HPCnYWdp4q_WldYa*c(eh&^IWIRH0x3U@Wk&9Z3
zvk(y&+UlEmBzvViO7l?<xq!sVVAet&<}2g2$X@7_eC^0#(4rqPy#<kdM?|!r8%#TI
zWwyf*`tjV&&7Uskr*6*PJD1?_7_;_W&nd`~F`RyQ+@Ukfi(4{o<(e!dUSEpd6P!+o
z{>UgSvGTTGQ0P~Pm@p^V#o2qT1JBW>@c<pp7rMg!_RHW7BbLP8<$~4Vzn)u4K_+Za
z8rB-Y0ElBX)FG)hU`wqML0Ts$6^-c#{3~jn?i2pdZ2GQiP$mUjK+EL!WtdgoBb6?B
zC9DzU#mk69JK=$T&b=M+9_n8gR<#R9)3+#ZZ2)&F1i}6snY%PeQIUR8jV(RfcOtn`
znz;Jv0$R?}OGDVfq$uPbgGTdi)sJRK!M$zySz73MC!0hGkcP9LR{a3WloD*9oK~>c
zgV_0}q~&4xd0iHs{%4sAP4{50`u%sUCHSwy&EAear6-AdTzIP7H!0=B%2sjgZk4ih
znJrz!3^Zi%&dAis`1+eCeg-9q48qyp5XfUd?Vc>4?AOZ<BO@tMibr%^eKNK6{h*pe
z@z#Q5TF|SbaJ3~4bNEH>0io6`;kp|CY_4L}A|s9i)NzHNa&Y%AtsJ4#OCSbRFfY+w
zpoF8;cQZgwpU`(R@hnGkD`>jzg%ZyAFn-kM94&I<4s^%%A#I<de2?SxW2E6$NB>Dv
z=EK?Et1%>#m3+zO7i&Bi8AF9sw!*CqIqwF#`!D9=1OCbf074B_Rg+l9kE+iWaR++?
zMGugV)BC(U(;ndVPWx*xaQFb%CEE%-o49~kyRfTvPVEx*8$Ac+a;oN)DWP@?yvMJE
z8vbB*Z>JB0%#5wbn?7iz1lpBmAcQi@fU$yS=3&*Bh+Vr6)kBW^dRDq^U<!;pE1|>Q
zY{BkE(>FSuwy<^n+qb_OzR=r+jgH5*y;Y<T3KIS;y@b|!5af*~>eG|%v<mYpnH#fT
zae&um_72zH1c^JX+xKBB{{5708^!LKz-~EEErREsX*-(k;i^K2#f7B(CKusEDrK(>
zGgDC}jU}JwxY6~;4L+~pZ$5kUiW59CU#0fXDMvim>U2{n7&#Lant?PD&z-jN-Rftw
zH5WNi4H_d;o-;7<0qBHb<u}WX_N#ZD*C$)3O$W(sPog%+UCy-O;`EEexSj_kvsedf
zf{u@YT>J~ki7R3r_D@UE_t-Ev{_Z!1p7Ikpl~9!EzAeu0p?LOXjkRK?;tPlvsl7+1
z%SO8)3rDhzn~ja|m=t_>^v1qOY=?;|qUMVO_VcT6q>_2gIjH^51CD~f(-HaIomfri
zpGh6V%`ASH8U6ImxUz+9=?wD8kL#!SGRoBxvb%rq`Zp%ww8wBZ7V%XTI@?e_UIUwE
zqITWO{R`gyt4>A9`OU%Ux!5a`xS;cBo$s7GP4G@WKji2#I&0YO8?r=ZGaHcyssECS
zzY4Gpkq4vWN;?qP{fB8g4<8s!*$}*2ZX8X5@}ZG^QXsHd>wfX!ql1IPf_d}v&=RA!
zJwuzaLAwf{9Q5Ci%Z$j@c0X11nVj@~_WhOHBgxnCUTu8BMm;g`CqB(AU;`2)az316
zen^X#lNlYItX8FICu#VeD%T-KNnKPeeA4fzU+%AK+Op45H&z}CSE3o<FYGf}p!L>k
z|EBzxu@?RA*Jsv>bY0gh$LaG2%>2AwHr_H*k3?ebSb*5(A_de<_N^)2tiG5}OY79%
zvZ-tT&XZ(J9iDoG*Cw2fZAp@YEKG5=WZuGTH8sN5uK=15ldZ)Of57ecTstooo9!%B
zZ*^w2R+?jszM13o7%$}5vYfn0{a!z>n2rd#T*a)$SlvB&{|YrXBc$21gg~YOM?mNq
zxf&5tNpsdOHFYzfp2JvL>hzDv^G6qpl9P!ii#B1gM-NkV+QwX65X6k(cpk>n76ET3
z_;&)_K@)UgSf2q#j+w8vMzYVlIf@yw`QFw&BUAWEvOR8tPOp$JEpBfbi&Yc6e1J+f
zFJyEO_|=SBg8<g56|IkG)cNGg+1YDuWCCp{9SIHaxb<dFEAMjmXG~8X92dokpI%Ou
z>t`k^gbFY65ejni;Sor*oPb2NuX`9KXr%*~cXDX014X|hhj2T8|1PHhsEF=xAEwqM
zhB90bp)aU(e-IZo-;+1MCkt70ebX$9834YaDJbESyoZMNL-%peO$PgsNwG@PU=)2X
z0=}n~X&!NX^wa-##xQaXfIIO%OiM#=B??ucH1%a0LbX&7{%&}16k&ORNk2bDTyq!W
zcilSnp+8iB{|F<}3#f&dX-Um-K?+-k_GSi=^a+h#2;f|xgfYVU)@}POvBUU_tq4#b
zUwR)a@xHg7@pj`vLzjS`AujQ6J!cPQkDh6pV9K58`*!w`&(pUJvqbf9U+lcm3%x=i
zsEIu#soa{D5F9$X{w#3eXU(4=yQnA@<|kiRHyu-TV28kD07V#hT`HFTH81d+2Cp%J
zC=zC1i%RnF?Xv~qdR&t}tvS~mUUV??LCzOuH!}1FhL-@YUc7S+Tq?faZJELvzDgt%
zaH(D4)FBXC81YK(H7>unjqZshf`LxLC-zSqF^~U|7N~lD83g2<UIvw&6E1*zTeaU9
zk)|5#V-6mywh5&FHc?`)>u5nSpzq)cV`&m@k`z(=EG3|b@T&S?s_#S2s-5l<648qe
z)5iVJ&;q;oX878Lljap=Wb-8g+d`XvFzoJqZ2rXzqrpzI6J91?#an0!Cb8B$0XGsV
zvd}?@r+04aoWp4j{+0O<RfPt&#GIQ<Yk^N_ki)W-XapX=+kZ((md5-#$PfXY62xd_
z-&GWjYF~h=-;Y*MfF6jh%gDJ@IZ55VEc2`|#Xj`h_jOLo3iB=bypnDAvP|T2)Cw6L
zgSFfCy`TGY8`%YeRmgA0(s?P2iR&Zah1?C6&Goe#XMVzDc#xM{6wD_AbV}(kBA7og
z`W}Dl-Ts%b{A<dg&;lb0tc#7Cq<-@SmJu{K*|u6+TS}hY-3l3v*HZegkg!-nppEa@
z@&xU8?#E|4KL*eXc##bIDQ#28J4D&pg9=4u&sKX=&vT3HdKSnNkYOjzcstV1XeM%O
zAt$jc5rmTXVsN_=&NOXlRieuOwTXXTz$%I=E+8ED#g+gr7^W(q8n9H8C1_sBfk!zF
zt}8tMjh9pywk>|n$gsNP*&B#OaVfal_rdJbnWBj8`G*UjIsxe)RCM?A#dNcG?yC>@
z@g&Ux4^d{SL_*P9TQYP7ng)btt{|y4ApF|TF7!D@RI??ru;q2L2tkzN#eH1ApiA4?
zf%w<R+^F}H($=5D+(e#_*th#Lce>}QG+NA@H#wD7+e>5QJK`WV`07rtX8f$9nZDea
zrmtZ#dfw1Eo{YHJ*)Gg300_wco-L9A=+x!YzM-Cu&IWjtn}5eP$Ieil0xYkz4iwNw
zN>b8#HYW^<$d@1%XCnX@V{A~O=aqj=XhKGUtk606kNUikZ04^z_&@h=W;(L1^5_4G
zZ?0Y6n>S-WR0U7IFSFIL-;?t{*uDLApw%ZGy%`y*x0z@0J%yYT#cX&XK(8I9B7JE|
zdd93z8G`{&zW809^I-a99^#J2cfX0ZX1|`wUEIfV)oEMg<!T^KbaP^kCKUUhH6h1Z
zl>b7t|B)mZAVi=ki~v0kzHyMjuQ%$oGyppXSO}Km%sBS|>pb~29;Pf5*r{U&OM1Bo
zk$uJB!2BnUILP0IWtk*e%(g6_R+aQR!?r;cPV$b|8tYQbbY_yuXha?7Rx2tU0aw#d
z+j|<bc`A1jti^30<OcieYW5Y7Jb`P>aeElF6AoZ_U<=-OuRnVc_}wYxO#27Cc*@u2
z&k{45kjQmGbc2bUSnZ=JWR8Q(R$>Yy2hF#c*0lMP*Z{|8mkdxYNkPcc_3@W!|37E?
ze{31%GT8N%q8}y_4+xlxjh6nL-U0ab@>hD7uz{UM*?HFp?;Cc=HsN{7VQ#8BJ_mOU
z$Dg>IFhjBGiUx*X3Z*+q)^$j!s!*2TFQ?Hr&Cf!yp((q%)9zDkqQ26}5$3?(TuAzj
z&HQYEt3H#xmfVWiSUUei=qd+kx}gy|Z6on%QU2&N-u3$^4x1_?LM9Avn@0@s<n(&%
z8<oE_(f=6rI(a9{`7ycZUw_Irhq7R%MJEK&0E#(a*E#Vu{PVvOuVQCHTZHmg;yyJV
zo5Z>%k~hmB{&hBzq~r=!PS4tuTPuJt8yggEZsH+iT{B5CDW6)DmCV_m3uzGVcu!N5
z@vW1-+`8>@7R8cR(4N`)vVvpl)o|x^2_DhK?8tGw2Gbqg=h+D%wo*uqEn$`?9~uP+
zD7=)!S15e>e+~waCzc{J=k5^?Yi2AgMK&i26ONDJ;qvquEWxD1@SPC8E5}=`Q%TO-
zIVh@Kj3SW4Uq?MTStfRGZDg~r;{|oex9SL)clJtPOLH8&Q}@RGS>dLXoSKksrc*JC
z_we{ZcR#+QbQVD;&~loto}_S~G3rHo1+I$FId+r`dTtWta(+j~2WD;dxy1Va=>$N{
zy$uOb&*p%MhNMKGh(cBP4<z?K-`NfesBl_uwPF83Mgg?x9uj}he;t@p27{4@pa#cV
z^l2hh0j_AYNTN?^{#T*I@sH`5`WFFgn4b6Bdfb3dNmliJ76(a4o&aVVMXshya!kVy
z3tbP510#iKTWY%vF?WV4JQ1(^TxZT!r~a~)?!)`K$)#f2NBvO<f>PzR6LOjYHTIw6
z@;|n~Ux(5clM=9PFk7-~Gq4~4FVhB^@gEHuDtffq7M;V;lGR*RZIgK@^X;eR?ZQrB
zKE0#4i5!9-5Vn}aYnm-*=2T~P)Y->ft3^s=_6e=7802Yry4IZd7<J+zK{v~2WJXTQ
z%wXL!W@Sp=m%`oPs}Z!91OHY08`;$NKtIbx=^0tY-z;3ozx_iSvM%^Ha3o|KsWj%&
zladOVs1xRJUd~I;sa=3^q?+0f-;;Cvqe(D5!M#}u#|9j!HNcX;mH>(d%7R)VbQ{2}
zwr)g)YWwLMLHuzzfcc682CWp0{1Y`;-&m2W+xL%SwuZuTx<gI!5MFthq~~|dW=s?n
zPTKZ+DoeEG3xnDFK1^MI**a51weCSXY>f?thi8HM!R}0}_;lVP5&~?>4O9Llb+$%~
zZ^TzClu?=HM<(s^m!yWU&D5u_Q)dxR+BRO=O{E6N6XWB5O^8<2i7p2Mqfj&8e;hvG
zg=jUjK%`oqKD9^$IR)7Oh$v+R6jW8yRs)^ugM8M1LZN}80VlGgVdj`JS)EVAaiR3*
z(rS*x0twPRdkR`6B<A6*V6;>oY^=eq8^-%;?R_Tmx>nQjq9UDrkk}aHVVUOzM?j`5
zP|$U|dv=3bckpb~t8H8Lz&d(evh327;@go6I+Ne;2(A6NG<oKfXC64-Z&uk*EEZaI
zG?>x?Zywg_aTd#*JD<9U3F`GvE5`I_kpIs-e0%}?1zTs#pCY+zB+zPP8pWY(EG7~c
z$ZXdOU^bbptSv8+N8aAu)&1!raNmzjnn(y_xd|{G^!xocquU^yVm8D)37MN``uJ$+
zg2C+-uagTm>xsbBQzqC4+~&cOc1<tHwzXN`;zf2m8IMUWa)c!kW?TI)poM746!PCl
zi-G5QIHkeDsJP8<+A(s_*MfyPzRb*|i~x>T97!5qI@_A_VAW7JUXl1yZP`tO6U|uu
zI}^kyyWm7&|4wrjcO{vJa56e~+_Ut`tD%}Dxvg@S#SkvR(3>BRA8RLfRWgdQA3-Kw
za)=nPDflf>?->@EyBx<%3;%z210hhS%g0%nSDxzxHk7ys_DmCuIGg{ca!ynQBd3rr
z)oB|D<DK_)iKkw>%$mKYc|c%_Ctf>^%Zw(LZJ;1N@4Ci0e62w=SI#Hmx0wHa#(R`@
z!a@duro99&o|>}8UVkHoylW*(Q`;CkjW~Jf=CWyZyGr>ccX3Z$$%D!OjF!@5!?VI|
z!CfJ^^s@d{&h`^Pc$V{^llUz$T2$)BiGat3c>9~%TcBgpX+}X$j5d-6jIW8wwO8_e
z&SkIC>BnT!(2PGo!#^_oS`uJ5FGXs7rdI$psuMSpyLzAiM4dMG1_3_4?>}vs%~=Hn
z!cn+lxlQ2eCAcS9b_^nVq#vpsRm-tIM!zZQRsMaJiglKn?}%hzK`n3Mi;8byxzUTD
zMERb+-qcL&t`N?lTYA5H1S64*kU7VJ`W?ytZ*i@_z0p#CN5ybl8BjhC>#04GF?b=t
zV)#-kB-Kk!e+QEEzd!}%AOL|!I~s0qabf7*w4e6K)bLURPAKgW{S7HsBfVkl4`EC7
zv5q_i+yDI-FcE-4#<J2<+=YhWfRr?VDxhH5(xis1`}$jn=S7)A9QzkcBr^~msuR`e
zeUnDBO7fenq$DYk)=b@-MXDkW^LGu;2{~v0j_=0Pxmg{j2%w>E+O$+mW5ht@X%AGb
zR-Yu?>_RZlj8CDtl7GXf|GZG;f&v@3mfy{mQokuYpb$?kZKGj#4}mmZE?Ryxmb=8%
z(+p%p1mU|l86Eg~%?c)nwr%C@zX%RAi&V=0kRlnBJ3XGEPbdsH<7MY^Lq7le8DK_V
z0L!ae9svINEjAQtU&})6YgsLcU3DF(eZ3tl@op;FzB|wd^v#jMJE88(g+uIpqaD(|
zvpZP$sf+3+22&giaMvLjDC&g$@0$^L92ghYtwp4mIazT|1w0zSeEM~rwh<`&dIK`@
zzIo>%sf=VO|MT2n@LwVO&lpA5u>is}Uk__jGw2uLL1}x+e{58J=M<HZu&ajq5gmSz
z5Qz3*|E$;kMHC0`m(7%D6ZoiFFax3er#5@1-V>oHYTmH4kx05Dzr12rl)y(<lhtyJ
zDJS}{)&yi<b*yUL3Qyai*RQ|n3>T^!0LoefO3~)+Luc^+(c3`a1`aTh5@2<GxHr(o
z*?5~b_QU+W=v#gB=)@40j*NWru%jmIM!O)sTMoqG_O0PtV#Ec=jIFMSG8n|m!nlYN
z2Ta8Ho3j0H@odCsqVF1Xa%iLPxUo}krh4lKAD(_1Ai8Qu&1bHhXU2$PxzozZgiV)a
zQ-p;jU|^zZ-(bRnVGp{4eY9cJP6iO@DDW`z<&dSC&sPNk0%F8#hkie0Qh#h}@%U;M
zzoImL{On3ufa7f3><Zwn&d7^VxVC=-eh!OI4&Z_XrtRKlF*A3^_pyQ^U?@8QqR4-L
ztlI$ejNvU>K6med&)bUm@T9f9EBHxD8;chYQCT!sC*0A4gu0eR@nYH&P{gv@l5<P^
zjFu&2hzg)DzJG44*J~dlx%*6pN-60h1d;4zpt<Rz?{!#Wv1Ah6P;FunRqzv@RZjm-
zgpUbQj0*m(_gg0R0lTfm2p<JZvMlDG^L^B+{M1wc6KlDA;!CIf{J{Wd|JFjecp9ZP
zYw=Zn8{3}Lfsy|jGjs-5koCyjJ8joH5(x6vyOV%O<Q35y{1C~YeEQjy$ky*6lsLnT
z@8I*+(Ils%_^j1K6(eG4##!S%=?VPPf2imBRLL*0x3cMXX4hSB26h768tHHP3H86?
zR;8X<5x`3OKrktIc(fAZ<6q@x)>$ayv{`$#IWMmUaYQFWpi3*A?Ml`uY!EiUf5r%W
zp6m$;rzE-SzDKQ`DkdvX1yXUNcTKc|9O9(Al6j2t5^^K4$RzJuZ(d6A@^c*3^iG<3
z9%wL+Ee+vOk5wvQCPGvgb8snT#1|=y82aTg5MXuUjFP(#-N!TdPWHw>pRtrbHj*9z
z4NMqTTU-PkfFb@F$A5lQNBXp-AAF&<kIW4bMWK-rCK$X@sI~HwRIL}M%0fH6--|Ky
zO$jW<VWx0IH2x?#?HP;6&a$hlgim9zUDkC43<0vgP_yL{0$@vSdSlZ$K&t_r^G2Tx
zasb-h1KgJygO`Q&XWFQwu#e?9!n@0q|H#rh3RH*~R80XIK1x+$Okis^tB1!YY9MKu
zVhPv`R23`~7G3v}$U4OlcS5!G)4vQ&HQDHo#rRQbs%7!1mGG-LlHdA~(-Od{Ncea)
zyDo+uyN2O{ejgt=T>Un<724}WT95=L&P~3F@~`avEy?SBNUR5&ec@V261Kj=`}NHh
zgQFucmFjMcHiKFb4Wc_5Dlg>8#PSEMd!NY0CGTI@3mR)lYGG5z=c7I`!I+~*_>^I4
z*O@6<4o-LO9=U4`neqV-S$_d`;{Uu*_g-ZXD5awWcyp{{wTDJW>$Pb}`9?9q0FwM*
zn}up}K_Q`)$~B|ItE!I2uz(@XK4Dp!A)q61X>QO;$*UsJaT`u-TwLygp|G&W9)qok
zrRQn$@5Z$TORd4(vGktW+S;G)g%r;Kh!9CFz?~3MU(fkKcyoJuXcf6)_!H%@Zu4iO
z^;r1GAEErS<LfT~z;B=k@}HGh2!=_=ufr(X)GouLQ3{a34;+dQLQTb^WO`0NNrQ0n
z&OXSAe8w2(3q_{ra-=I^4f>|4<lzsg-n@`XB>8j%7i;mxT*ss?BpgJr22u*`ofAEJ
z_?Y849ay*CeCqk`LnQ_wXNYGl^0aF#!U8}A6;z~>g)RU95v1u=nk)62*iXlJ*2+;G
zjN=oa6cyFT*fbhKK$jm=D=Vu)-HJK2JA6aejZvV#lZxC0&~SBjeKJQPwxp!QdqVI`
z5xZ389Y8A9+U|5P4W};DtenPKat}bGUMADOqM;-s6ZJUV9;yZ~3`Rb!@e8v&8Y_+V
zCf5KKBE1^Gn~=aEJd<4jfJDy@_ckAY64|!e2Mz~gkQ$zm1^_@HP2E;k`x^kDsu;uU
z3s9+y-yalZU2c@hDJnkm0MIL>*-GBv72pj&8K6hedBxY&X48GfqyE(!b!WXTjzlIE
zllyPzq{77LSms|n$4~jEA84~Y)!l*4X(tQo+3zXqCV_u;#}K)<W8ZZ$e3@T^M#5iD
zBb!7*DIHOKx@6mtm~#Qwwd1ifoVfl%$UVb<8M{;BgR#?R_IL740FO^2p|HUB5FMc8
z8m*wAcW<vyO-iR5*!0ah3bdY7dj;#8;s2xgB77JijZZE=<VzUK=*U<R<F%t27rRZP
zQn%y3pnMK!eHUk&mhV9Ee%qHDg@&?HTrmNEy&YwfqPHLf4Z?I2E*87u%VqF0D2#y}
zG)@leMR3QI`blq-BUTeGaG)3bMFD7wjn!g3Jz;*hn)5Urc{e#bJ9~Ql3xNJBfw>BX
zDJ}&NTq5poYV$_`S4Sle&@Cq(O6k_#*(oX%3j)#Xhtg)6s{`b*=e}@=hSUV+3WXBz
zAqihl@tK!bfHof*^65dsUbliiX!n@r`Br7>1!0d@8Jw>GULu_%0KKAgeOOi%fv#n0
zYI^PGS`xu#iB8NLV=k{M+7cj}$!~9OuQNF;3l@t>2e3-O;neyCG$xxB{r(N;KJ_|i
z=F)%KQ>?!?y9xo|R8a<6#y2uVPQ~8HTU$k{l+t$i*@fu=?Cgw4vA+t>V&_MWgkl*|
z963Dkjh0$I+ETENQl|nqSyV7H2GAW~p48HLus#duhb9D*%Rp$^z6D@3<?|e-U!?Lo
ze`sPe>7NC~Cca(lhx|FJqm4>?T^9eU=IThmUapCR%>S*b=@yahLl4HB9l}_$Nm9|v
z_C@bRXA96$FS!?6{T%9M3b+?FB>CF5qZ8(P`hBBv6ZS;Ywp9h(?mK-iZ<(9gXRbNC
zMYD@Pc}KkvD$NTiiR)L0&9YT9n%HC)v0|>uMRJtq4_i$^xT$V39g1tt&!@@*IzYAS
z1LWKj1_4QZC4j6>T6qKfiPK(JD<4~S&gMTfgt>~MieK&LMH3FuwpNp8!Hd5u!}6Xy
zoP1rW&qv9}=N4(f7xJ70iRgBGf$Rcf49`GALxUM96}9k!UTJ4zV`CA@R1;&tSO4CL
zjKIlyvAzkGc=YziVndk?KmuU4_0hEbGNKBorTtiNGx;2uXc!LJI7lS%m4TaAGveG`
z&X{q{BY;8jhSz0~-r?lFeFm^tWi2V~sHL^1i>V%nnp!6$Z*iPrELtFm6r6YhJ%Bub
zK8GpBVLSx@p9GxR?iS*B@IjsNOAOxqlb9=(hN1}+BsLN+O~nxw{l@A{VFsBfVzH^V
z$6{elH(AslwllX02u=BLIU!4TTX{#Ex7NAq!$)6%UO$KVt}4xzJ1y?5jM4K0iCItG
zkAl>Z7TFw;tAbhG{n2+@9iJXOhOWH(<!%Kw|6D*m)L>Z-fb5S<CL0;64q_xD7;bKx
zJ(KHH+mIiLkiX+FCQUN4SvPnMsY$f(jko9t|5D!OZK9pDs|U!*AR_3Ph|F$Ux07l-
z%rj)85PI{FffjI7SYDSV_<Uo^OIkR_pY&@&9t);wPDZ@ugDiyLPrhNB)@*xEt!j9-
z>NQiS1V0*+imyEcJ4Ko8<tRVh7^4*#8VVtk?G3|apmNL$GESkayB0^)`OeDZ(xu9G
zgpkGp+V=IKuRY6uFxEBvYGdF#<}qWnG3r?nsj}JyaN_p@P(uw;UgC1iinP{Z(cfaS
z$T1}J^4%jzObpqPRM|Qj(uU|~`mb}SouZCkp>%b1$)|PSmL^sAG}8#2>G-;F=A7$v
zEz}*<ft1L5Fc0bi8}JfezE+<NLPtSipWPw)_lyjZ0@qoe59tv(LH0WI@2cWH9aIUb
z^n`Bt2M*{bT!@9QUN~DMn~`q{px4Kjz8DxQa#I`TXd4%oM(dSPLbb}b5cG#wMBJeD
z00p=-;)Uc7m2xitV?w%94*W)VkIkz4A!wH&$4~h&hnE%fA{yp%z}2<bBAckyZ`wCK
z&7gbF=bp{8z-9d%AXajb2Iwj~>$Qk7fJT{wgr#eeSt6gj8SC|jeIvfVlM}3l3`CP+
zECXpry8C>|hvOa$+>Kf9$kukQGvi4~^Jo`6QQqf@?n6s{F;g6nEbm+T8<&|oMc<*~
zAi#=SBCDcZiC@FA+(9Gcbah>*eoG4=psaT-(HHS|*pkeLW)rDic{gww={<~$03o`v
z?yEOj049`rEQPIy{L8RT7V}(nj$3z;(rZi=F^4PxvLA~*_lwFggkifI*=*`qTU8+I
z(q9hcibvLOWiAYEbnMNRue}bf_-(ZE<wm69IK1y&y(i*2*BK~PVUS{y!b=wRLp^Se
z#&K|zIY7+PKC<{&LuUQ@&ltl^z|Vat0Cr|4dIyEW^8oRwQw+)45#doedef!0+{iDu
zs>&C{XkVv%`uD@$T>7Brlwa(inCc)<#stGC&tjj92pe2ucOf;x`VIiRRS2GT`*Qe)
zj{u<2_wn&j*UHI~?x7P7V>#%c1dElY@M;4T$4SUCZnh2nW&lWJFO=Ce=-xBN-#v!L
zVv<5_mM;5N9ySdfmh6S+o5k`%b-o>gc2B-WfEs2fX_#b8tL&S6$Avo^-OL@mTHHa{
zP1-O#vL--%l@80wfM=n*TgiFT^}WgQeZs7Wmvpss`y@y{`INY=86+Sh!ch7AYu?%R
z?H`*82WVH>rqxoZ`|GS+zF4<g-3~2L9$(3E?m$ECn)H&uIlW<VIm<+Zx3Qzxr0a}B
z#%Bo6QIR+Jk3miiPH|EbFSy=;Hy-Ywh(nIb48a?d$sX0!j_mdh$xqD1Db7L<flEZO
z-u@<bBTVG{lSwm;G%KtYU#M~bX(8ALr6}9;SXFtA0gp%I7j|C*;0|;`!0UTkbCy+Y
zNpK=W>2D1c?g3_iDJI?eNo+<c#I$ETIXe)_Smvd#w$VUS&yV9ncVyOq0FM?QEYTUY
zXi`R|z{lTHF)^4AN3@Eu@ZIE|uid;XulV@*R2fwd-n1X`W(?@v3zIPsTRNcBjwYTZ
zMq@iNBa+g;?X+iT$4H0y+Nfpd)|c8t>rivkuA-!Lfrg68v5poWbGw)A;{y}^!=&Vu
zmhs;HduoxbFPx-$H$A}^EWh^lv|Ir5O~C3K7gO-14tAupqy1vNMY7Te?-<ms@o9lH
zvek-NHdNWLha~Oxn>1aOmaB)lCiMA7Sma%*MavPs`V`JB!&SPS&_)#b!!hh%mnX$(
zV*=*0<P$-SpfReXR6N$;SK1hz+R%^&2*yj!{SZNSW6F4mDDWqZ-x2%f0t#MqvCQQM
z#>h<L@4*?OFtmGF(sfz(1}GKD?>}R9DM1%m4h({Wa|V>3GUz)NT%AF(m`}eCFCT5F
z|4~vpfY>#cJplT>@TAm>58_8?C~Kw2l{S8QdJF+579R4p^&|}E=ez(izoWQ9Gzjyg
z*--WCHIkjmj4Dp=7(?$h?>p;2fOFuGEdwxpHt>x+Ys3JyTbpjGB;VthLk0seP2>tq
z58JN_C4TX4tV?*$I2?nxXzOgx`C!Cw_V)>!u%KdkL-3Vj60!jzWs{lYzJIt{BNchu
z9bkVsEcRfpJyCNlARCyGrr5d=PCO4izJAi{N8H_CmBVlAj^oez+vc4;JT$np?!wS`
zWahKH$n4kjta*@HyqA6^j$a63!*a9irvB*8GX@;o2?-z;AY`kA;wUM+Zv`<HlBAHV
zq+BEl(Iy|mb>off)$FAYDa@)M+m4jd52IVNb3;8i!t{qQ1**}}`SSLt!y}$0D%&@E
zO7wKG**i;T(@}^5)Q6|9akP^5woi}%gP$625!X4Jk1RVCYpUJLgMFq1gOxlnHa)$J
zA81&HIT|6wW4pSX4Nv*a_fv8Tz-n*k6ktd0CrYm>%zb?XWjke)-^q;G8}+n~Bs!bx
zx*9VaDkrLdC0;NhWjB>|*0U~fh~fASFftUvv4?dLhzEI&#x(I>x3PKB$o9Z{y)&ky
zmam~?NF9H(S2`i@-EaL)9xohtCvZap*6}%?aW7H}XM6FBZxh(WnY6SlL->Dgt>xg(
zCWv$HVSRZA;hGwm{wXL0=Si1Da<DF5s&{EbYWD7FpK|5Zi$vtvSWp8s0hPk=7Q;Wj
zvg$H~b>%tBZ&%kJ_ZB1!sge|I`vt0*ENefrx!1_PNaVlTH{EL^H1-Rmdn!K{{R|hP
zfT<*5WXdUikEB*>@FLjbFkCi`JmgfAO7UIquaIcU)lcr&xj55jkOOLGm`yH8E%2O^
zJ$x9WKcw0Zrd}_pJ0Xg+(zG@P4j2LsJLvijo0W*BI`joVKpClFp7xxC*XPw$dOorQ
z`i_9}p;M2O>$X2UQIE8BK7W~J#q^R(^7J&9prAe+1+Ap~NYxS01xzkwt`xLJbGqi{
zhWm9@fWt@+4jm0`jO`m)8o+32|HkkG;PFgz#Jy%CD`RIdXvWEiixWqD7tsy0o!SJX
zUVcK}%vY9c=sd|{Lx$8#BdLF`<I|_Sy;7jDPZspLg|Fwb&b{)LX#X^d60`BlrSiU<
zSHPyAX<>MTgI*djf^L0c_|tBi-jAofZ)X!_4c`c6Oy@k-)&J}dv5XI=zWBXoQn$Xg
zUP*(|*cXb;4h1RmXbXz$-+A4%we_kuw+DZANR6bF0X|NwR<<x<U_Lh#CX?+oPWbLm
zSIs~7KqETrjdb&m{M!6(X`eydY4+HGIoT`jp*u_qdcm;tC$eb^911~Z@)t5x@yPOQ
zI5+BKrvZLWZOP47B9S@R#zd0S8GHe*u#iimKFW}<m^;41=H()k-oCQCV^I{DQu$*(
zX(1RCk74_zo%!KqF+kkG2Oiq8r>NhIrOoLCt_ePZHyuJJ3&z62di0x!T3Se{f;pX0
zjNgfsdC?TAbRGvZ-iiolVw+hrgH@+F0Q_Qxj~9op{ge6tLXHP0uM9uH{zgPn$chT+
z9%Wy8rXE;~(+l#zqD=QgO<;Q~#b)vj0Q$U$kW_#K8os+o5x1mLpmKb4KGnXzw|86g
z#|B`d^n8fz4`m{VB;=9@ya~@+z-ug%bH#!MjEQR$z$6S0z8uA+Ca|!u@MdMZItHxH
zXGKvZ55c6~+LutG+)IEH^@w#;pMXn>6kl}qdi_k5bdd`oE;wV!KgALzvV=7c!43I+
zwd2nFzVG%7V||%|=J*fz7v~dkyekjAKsyitJxiG#4VEhOv>!`)mfi3Ex~EY<?(xQ0
z8=#8WY#D^`YD10yl~4!nyCQy#<VDd76{D;W{5Xp`Hm&~z%EDklip@BYQp#PN`*H)C
zo?_f9xGuBFBb4ltC~KRA@Dvt<bb$|pRPpb2AZyb3uTq#GER-+d*+n>EV-I<xJyB2I
z^~QyaVak>DCvH44PkNPbYN10-b_(!%eY&g>S_uAa`#RJwuo2A$?ggH?3O#&itcPQI
z2T)fX@TZh%quydax_+{D0|uu`Uw{89yxR=I?f_vAl};US7V04ueLm&il^#VDivq@W
zW{%;o<(x~<JpCta8_7Fo@oqTzP(g)q0Mnn|q_j6c#+W8_xnQD{eG>x^cl`RYX8eBF
zG9|^I2TCG~8^fY=e4;j#Sy=aeLKx{+N(x}#egSn&fg8UFetPu8L`rM}oav8=rjnA9
zrv7#UtU%A_Qtd6xYevqOT9x`sQsH=sdhMP!BH9Y>E}SUmd_EXizw)X5xvfdv_j72-
zWYV_krk@M@F!#`+kqon;0(x|KBCKwUJ3Ztx0XD7ja%8EI!nc#Jtd;;xID&)va1q1K
z-Su_M<yV~caZ8qhzrq_ndYFIAfF`+`?5HPPF{aFqu8gxmi}JpvaaQzW^SQw=t4B^a
z;j5#r(tn*xJlL;o%gru?jFAvKB;R>ek0`nDCfN>~xB)pp1K44~X1E;rM;1OwoE_zO
zKl;TRHxf{)79)vbzj>YGqV4k;2ttI{D@%o9Q9ll=@Z<;jJ%7fbV*#+)kA0yIujdin
z4VdnK=)n;29GERnoz1)-0`6@7Y9I<q0QfM&ai=R@&Q9%tT!s|+qUP`taDDiT;$M{-
zG`6=EfasKKhxY>^U~t*M?Eq)xyDebF=K$SmRpReQ27fTD#B+~{Jl$h!HaYCHY?m9B
zL|J6!E#UUP?_0Guc{N$;5_YhxI72BD#fW_7K}$yKH=NyeiawYCJ)T@v;$lQ{Tmawh
zSFz#IZ2oI+pubM68p$@Ug`XoJ<G!90F`R|I8%Ii5xI*?UN4BzW78+?HOR_>Z(#J{&
zQ;{hv67K^9!dD?_l%MuOC#^#>8>7)J9O(RzWCKvspELM{kGz83{mB%a7}U&x@qup0
zpeo}G$=bmlK`Oc|=nVpxZK7;aK61&%&Bsz!i-q|Lkn~K2!g!<UKm=Iv9VX0|ad-+J
z+}+(90CST)vJ69ZfWiwDYBvnS#sThqmcBBT*Ym^%x{>jb!{1|Ty)^vQ+%D|>bVyz7
zb@8b4la1c2VqjShzxS6)W>+!^K@BDat=21+OMib;SpUDD55$-mA#cu%`y9T9Kje>%
z7Q;Yj$ig26{9x^<p{``2c`c)Kr#)D<hq^<TIqGVd>ACNwe<$Dwh`^~ar0Sz0cCkwd
z-?i3f+U2-w7#WUV{UueXunmg?Z)lb~%!pb%8y5%}V!5dlP)>#O1>4#GNtyH^!_pXp
z&3ri12grhC@gMbKCN~%;*FoTQ*7SRJi}kw=NBj^I$e%BSTDUUgfc6X`Hm`~y759Rv
zF*cQdm01Wpv~)!4FIyrMhIlo;<FyyrjBzE3gU+e2K6TcPD*|*L9CN-hUBH58M*CFl
zVZ_P|A~h1ja@=+71Mxe3bPiNgFxZ-PhaIYaRIia5hl><8qBA^9!L5kCK>DNt+Jo(r
ze^V_jBr=n_fO9Z;tvK^)G)x8>Qj{WveT`-j)xPsX?(0oE;YjnEOgnswY$wqFREtQM
zwxyfXR4!7Q<H8?9m^CH9Ne!Pd=pWOYl?=G6U{TuXrID%&71(sGGD|%$&F3pu@b^<G
z5dTc-xv#inHqbydRU+saxe*013M{O7O{+gz>o}PXNU;TOBTj0k@Fv3ynnvmTj8v@A
zY<@7B!(X325=4>g4A7Z}y2xXu3XG49Sw|i17ae;kZoA^j#Z*<<KL0Jpk>&4?RBydB
zw;$u{Ki0(mM<)9apxZL0ekS=2NCr^A`P&ls`ic4&I_Q42spyyBvuLKXNXC`?$`}t!
z;}rAFC+5DZJk>%)f-ybYHs_LS3e`rAV7w7}^OjM`DoR}UOt~GDn+zba0Nr=MPhOuC
z!}R>%93VXsPu7-2R)KfeFG&zv%RKW5nyvYhg7Nhe2Tp~bc+!qLJ4l|W(^I!6UMpf?
z*BQyjjvzp60vO540FX4`S!lg#4n6kyPduLp35atl_S3AL6>I8hUa;?sOnd%Gc=^bX
zWzUqvy}@M5M95QWA@`)%F)|p(Q1jRe%3-^BD~oeHHW*8p_i%sToJ}-sb`W66>RcjP
zpD(`kj(RX9E9c-8_1nfeFKBxvP`9Q(d~3lT6&Na=A+SQ~t#j$`JuqPBoHkTIl&ZYY
z+?)%sKCiYTON0*uDtt)j%B&#xuudRuKa=x|otVgTO@ZXCi7oGrg2i9QVVw_YKgu_5
zGM<@l@5!0qAkJ~a=sQkHv>z%o`n`X2q(X(7if$QN79F@Mo450E_T~Z@?k5(VTB;>+
zq47Vv4_!I^FqnAezm<)A8uJGh0s)M8$T14o?)1RqS;9Xu1ti4=DOmw$hZN9%Cte>u
zyc~08C<wpLck=`GXgv{WKftC=zIbtRolE8s57wE~c}P$;38xt2U^Q`mDo<uUV5nM(
zOy^L}!JcL1u!>AO<oE{uQQ!NspxdePGDF)FQ3HJ?@*A#+(54>`F8tr;LVq_R1H!fL
z@f%mV>kde2nH}kIys&?Ss7^bvd8?0nB4zZ2(9J=Yt~e9BX5y1x-)($n{6~D~f<H7j
zqI&)Q+DH)-7dMUMIN^Jc_JpVP%ykty)*B@Ah9(h(yiws=vF+7nGV0ImanItY;oV5p
zGZ`w6<DJ0{?KR}#KMS}5jFOefEEa(rgy5*O7xTYs76o0i-~@PgSkRv|mTOn{31%Cj
zFF;{}K48v8W+;4|sdgs&A0gX=r108r+GLb_XH<?WY$O500K5T4Pz@>ZGR96c)2(4W
zAH`Oc&dqOC7P#8q@GD6kUfX)*xiIEf-e9KiWVPaRLzQ;@ZcG7kWtfv%W>!{K0vfW9
z<lcS@a8!yqQe7Dq_@HDKqZhJMmBad>P-Wq3908=J82ZDFB8n>W4M&Sk!cP$WiVWW%
zRjVX}{FC50Kywfv)i`&uG2Jae2Bd(00%2cUJAhSEDR92b!#yqob&HByM63BdawAk*
zR%63jM}0_G)UwJxG{s}&I3a?}Zn_ER5ozUpahNftT51gX@rulF*I$&}BTB?a|0LcW
zh9EDth@RBq^y>7DAu$dP4pn~?i3h}a2)vk-k&$6dtDJF-|7hqCyZd)Cu@k8g3@f~H
zg!Nn}B$+(hi6ZC;uhk0T)|eS73h*r!bytOxZa$xQK_49j%ux{67w8L|WEEz2zCR_Z
zLXuY$IfBjRzZ;A~q<f#SV?m1d0t33vmSB11?=6u09xLR4QLe20P<(ppqeLY94}kL_
zS6kpDW+>Lm=zC8)^}et?OCvu^>Zd<1{>`2JYl>IHeh~+O*Qr@qYrlbBI;#kfIt0ma
z-rgvHk`X0&Xj_i}1f-xI{c;(@ZU5eE9L#lmGI&+qvO&tue1%ve<IL<9$Tg7p^8AzO
z_q}5oM{*5QNd`Q%-YkKS<7=r6yk56Xn97+!MlCL5C70vp2fwVt_|0x^1abQpH?w`i
zjNbw+6aMJJ7$Yz`)o%|}^rNS_k*Nd8-NWd<&r(MVJ6nOCjj9G8C?}IZLlmjElH?DF
zoA0P)6P5R)>J4Sfs0ovwBvz#o+Jpg@!TsaWYNU8G$EIYjbI8ZyO2KRcq-hd6lJG{t
z3AG`zx0ilCH_vM30dGdn&@k)hKSHdBbgbaZ6wug8M=^;Of7@qjJQCx*NEICqJycX6
zD4m@TeiEB&iCc$3iwpK=oSre@Dw2Z9ntvU*_?B(I9B_FtfI#Y&9l$$&kxm|^Oh82a
za=}=O9rdud*!b1j4C~TA2ArSgQp}pv;L+ayqw1}rvW&K9Z$P?1q`SLYx=TQ$yBkUA
zZs`<|ZV>_LZjtT|$rq52?)*0AoO|#1#^67}0nhvFz1LprH|Lb!C+k$nF80wfsEYf3
zpOCBYNC%Ogs?05i)DXYClw?6K{Zq$r|E3F`@z`&#-)!#Is?(@DpuIEk7y+fkhE{FG
z35$`+H%Z0)=sFudsj=ID%aUI;@%DkTZw4cVg?nfWlJAwm$`F#!T#!$}=Ei~Xeb&}H
zE_(3biZ$mT2kx|{#|<B(3}2Ul3RKw0Xl;kyF0RVHWoc`7n{%JWwqExvqSgX5I-kKj
zdP4+p;GzL4W=BBKaxfRgu9ZKEcpkL8WTDnRRPnEU)oVckyzeF}q}j-Wvg<4BkyP$I
zD=XpzEkk=Kec3r49;#<fOyDx+2~I96In+R0%XAPDUVlE*Q2O@uIfVb~RpZ2KRz{7K
zN0ug9@M<>^24xl=G(>0A1+<rM<63x0B|ZF+5mGCae$-i*@sSkON?=%wnA<NDcJ2TD
zXg}p-(<=Ua=&15KZ9W5RNJn5}ZiG3lfj_4YK_B4W<4oXqRBg~+HOV>tB0Nxd!;p$b
zCep_2FREAIz%>#PnJzfM`-hDwwrzK!-*pBaL4gyqv9Kd2RQ}z&M=RiT&!GN0=w<JD
zIg{BRu}q{u<rf$@^;w6vUW-^WXII-AQLNR1S+H>6<=dFap^WfML03M1K~9tJ;|f0u
zHMP{9YGByhT&*+m2V#)SYR&!C$;V=?!*AINEPs=40SJ~n`V#4umX=HJTKK1*Gq|i^
z&`AVwVqTYL@Yv2Y*^#`VAg(wbAr#=k(A)Ylv=Jk+4o<ypeuJ+DAmP`IjI4DdRSLiI
z{`wfB!~d~`tWI_cgx6P8HWoG!2v(oN6KR@)3j+mcd|iYP<-P?D`%zof;K`4)EDEUG
z%~DppaUpowQea>NHMlN%ilReQ4+fLa?pND5YPzvsI;k;ki8~P_;*HXog@hU|Lmjd1
z>Z~V>fTt1UH2oEc3wrN%;$PLpFC-tJe<|`u;+2cYfpj%@tXKLeczxhzFDcd|gc>Ye
zg%<KaoAAbj^Rsc$GDRoJc0@$PfpOF!`IMd<QTPCU%wCK<QI$$fc=S9Qh04o8j*JAY
z*~vnk*Gv>Ik%uC49x)jCh3YZ%|Bhk?N)<tP)L?_KLly&8wz9&GWsF!iMPS;N1Q%O)
z+Q}JsgUj_xlrmyKNBQ4iGF`9n(PqyJH_)>ZCMPd{S_WdLmbZt~90UI5M@*$yKHeX9
zrgfEwRc!UgJAO%9PGXs*sa5&;KIi|B2K-K12FSwsg0I{S7+1Rks-b7(NT5DKKK3U_
zNfO2Ai5P$<tg`tF?&Gf#C=&TE_aa6ySa?}3)?WQzGC_S!?HPY~k#E&=D~hL9oje7`
z$nSDiccBOaO4(+2JQpMUISuh5Rne9FmVdCN9L%BbPT%V0F74vU+K^H!l8S)9pQ-;e
zO4iE%ocZo`0MCg_QV_ER0b5db+SvfxSTtLQHqrb9qsI3YXdUeIloXtkDMhwy0s$9@
z*N9$x70n1eS*fSpQwTd?C|u5W$E)yhap9k@1iqJRRl%D`?jvTwCokd@uA_dGfl2tu
zP5b=?$6!zQFY^X@4+9Ge3rcrc>s)WHdQjN1y4QLb<i7~#(v?*TAB3ZEq-EkM5*io{
zTlN)cfXNVGU)I^$h3#4(WcPN*vz6tV_d+PiW-Q`eGpc27lhhieA80n7%})N*=#-B>
z3V_?${rOtv0ZrTWs?VvvzkfMJa|1QWxM%OF&v|}|`-LD_Tc6zB-3^G{t!1Q8A&I<7
zaC$DP$AtUPY%EwDdX6hi)z{zwOf1-HX>a4__U`l{+u5(YoSiFR<D#-RJQN`|bhUc=
za>ygO8^j5tOqJ)Bl+I(26`oAQ39|!A?2M?De6Xt%%>hJZZ-)mAW9T~&17|$f4;O1G
ze!l8JBl@co3d`H^_yQSVNJ;c)8Toxm;l?no*<5-FbRsMocBZKVnFBa4?V7?^7a#Ls
z3yc2&2v6Jy=)`g<h&a~bQog{2*+Q&^ICVbl2xR#XuYs{~xtbq?QOnciyphiyX_4A8
zC<=}mAk%$n&q>6^Ot;LS4`)T{3qpn1OkYE?)OUfCI!n88ckmf`{8M7)f=ivqpzE8s
zsf9D2e`}oq+3D%&_UkS3Jz0o}&91v&QW077{KQ`MSAk=*e!>u;XEzr_rdxg>N`P?O
ztnmC@X+CGMMkS~V%jzDT!-s4G|I#xbFeE=fBNe)V04%9RMVAYcU0g-zR4N(Z5~Uc#
z{3nd{FT2X$mcjnyXP-C1MuJt|DbLV9VP*30sq&^bBggxdnwoZ~5@buXY1CH_XiN>J
zGe0qeJW=)d)rRd9(ojx&kKPx*#MP$)_@eIjMzF@hH+v<TCgqUZ>UQ#-`PC|$XObMg
zzi<7G46`IzKCeI{O=EZVVuZAjPn%yRIMnrr3-#>j%;eK=Zf<hsqt(9h(i88YE-5W_
zd<HDSM~Z(uK>!6CRXY^}1whv*BK{+JlNa-fFqNt{B7Js?RYg^gMD9HUF5-HhWJDV3
z+1}>t7)^Aa=_xhJo1iyLno{Dm)RrIwMGr!oeS%$i3npgQy)z$B>`QWKKInjy^7hbo
zA%g6wZYwh}IEGWR3;gkL&b1m}1NC_gA#b~?q60I2cBYssI<OkH;k$q&FLF%&h$5jD
z)7pC&Zbw41t;h5_0#>$uTF;eT5N%i`G@jw3_^6l60}QyQx>)s;n^gU#X1babB$A6z
z>jYU9IEgFR`X}cI<^A}%Fv=<_m~4dqE!NTz+GW#YvCf2xQ2itcn9}qqdjRs*ZfjSQ
z!-Z=2f~cV!pQGl?7km~QvHS*Xi@AyWZ=*72##WgS1<>m9u}}3(W{LngmQmDw4tiE8
zsd9LQ$hg>JKQiIok6DzD)C*oat{&_vlOc?~3)SO*Sq<q5uP^`f<dH1(Fis`O$ii<&
z9@}mOzxr0qC_sE1Eou4=CQ}Z{h~xb(Q)JOiccOCSKv%8UeO_MP+GHhKJ=~s&Eon?m
zHoDYN-l>XLVorAO>BuZjmH6oANO6BIDXQt@n^6=AL%XO8dj!)+r6dv=5t|<S5S;}n
z>!PK{Wpsi6b7vO90k?J`Hxo~7xF{&Q799FE<}9p0=Pmbt;ja+>!$FFLccn0V_#RBm
zPqQ|X7A1cUrOU!s?L{0rda9@bHq{D`RWI2HHzeyi{OsV+5_$Rb*_Pyl1mgwsq+!|~
zX;LMN;bEu!>BH@uk*LJIS`fiRdzGqXIB7>D=P*}kKom3zZ4iM@@=?+2wwdwL@!jf6
z*5UefuV=6qTA<MxXpCEj^oq6c>l(!s?(q}%P1dz@?X9mHvt%gse!UZzHnf$LN@duf
zOr8C%!Sv?W`@EEr^1`OU<s~0Sg(kUwVAz?A`r10Rs7_eRYJQY-(S0_koy25>?DNn4
zfkk62yUJgn#h^@{cYgX>hr+oh88I#KQHq;ahqSM(kmN(h8-=gmzrTN!awV((n>Y2D
zQ!sTQH<76$=P6sR5g{0>T{<N<_itQ7><L;epRgWtMz!2Z-JH9e2Nn(?rMHqkIj(lO
z*36JG<0U$`G#QIl<w|C{9^pgFgIa=X=;BY*f#<huYFdox$G%WnlqLhz1U9|+j|~*>
zI*IG)3&zAAR#6@_*Q5+MNm|hm9C>?V4oTCMX6;HT$Nzzq+2n`h3H4HV)4br9G$?(&
zZVaHg-h`u!`x`Nx3b3&w92=lQ#td9bJOSwg`p45!NLco$<~=rE?5UGJ{3FZ;V_a00
zQk=jYkbA_AAOkmfm=+E1!>!Ar8iBI5E%j?FwH&uFhVLlk9z*F#tT*>@K8Mx)f8DG)
zLh7GbkYqEne*9pTuFd<|SowSKJyoox?7^_JI$=m5KjWYib^JrU@$_F*CFfZ(Ui$@S
z{VD|!ed0v3ijQt?ZdZWykbrz5?t8a7m*Hd2j%3da7kr5HPDv>ySb#e~7Q$cqF>aty
z11~EY_X#8ByELqN-|kGzH23#jBv4s8kwV{~b3YZj8m1`f#`zgeI6A)deRFfSCq=eG
zl*lT1AC-VfcFNLm^wG|>Z~UgZ1wHyhw&aW{-jHsHXY)q~@?&?r69d{OiThmfxe-ZG
z7RiS`TTEdR@sqB#C>KNw+-Wfs(TFLyA)cs@tabc*PEoB&BnM&_2{M>{Y={==)X`yc
zwkRt}D)^M9gzL86-zed|{-~A`*))L?m`jmUKD>=(!hMz3>^Ew;6w$u3s#qQX*i=A1
z-1DDTqF#77Q3AuzuucOtR2?DXA=*MaVqBF&h&W7uTwu@~9!(yV?6T&fWg9OfUGORf
zRJbfu2ZBg9v0=n*A>S!G@AC3)@0Jy;eh?II{VU_jVFcw(JKZFNw~O6i`mOk7=v2+~
z7R;x-5ZM-FB_t#+BH#y}QFu!V#Uk+!E8bG+?e|W-q0OlT)lUS^fCpkmO@VhEM+bmh
z_+8W-%M8SK8*OLzag`j^d-uj)msrjl2Q$G-r6yX&QC*_I$ZD+zO}CVqxHCCi9WJK+
zoK&j-(RjP-DBA@|zf-s#H{2@x9kv}vy5FsiP5RtCMFq-e5TyT!{=K)TbdxF{V)T<)
z7UKD<Mf`r)ZKT7)<>q$kKje!veh?ACX2t|_K5`T`S854C5$?Eky=mVs$G-WRGlOGM
zHibY>g7~(1^IOZtAd1ab52(-lr$ii7RZiFUk5VPM3JD=Mc%y{mW^MtK1V+j<&UT&+
zs^9y)!a|^wk_;qO6weE~@}tCvcdVo842j_9BQO=o31JY^p{R9tq_GGI6_(g#Pn8<r
zy!4Ed>{053V-ZCpw_uu1fpq};Mu`k$%@WU-6<A2%E8?vWP)6~-46^=RZZ9%Wfm_^A
zlM^e|Iw$OjOI}~J7;ELv%C46S4kC10*1C#BaUq^rmOl4#UaD&F92gIjfbbTAR3<HD
zDpyl-@xRgrzW4tct3m2hC&ii*UurC#leYjKDC5j?dN3h1Xc@R4DM9$d%WxQOhjYld
z@EJ!#uijRJ`mA=thxU&|EzEd$`1q*T`HluYyRjI|L)>4#G2<uGzZ3ZyA3l6|bP7IA
z^2Lms;p=<PD(ioY7<cA=+l(;jlD!EIIx28l>JW&0`YwjDmr~}K`EWtuc!2ov%>8Qh
zbr3>Db|_#tzd&Jmc^|Db%hH&hf>~gY_SW~Sp|i+OAK2M;i2916+wBG}@u96SyEaH4
z9m`6s+eUOKr>sfI-*YRRL+hMXsEKr-ry=riYyAif!ax?aE_Z9spol^LNWF%MQlRje
z!(3uyX7S-|yA<lozmjfWod{#HjSCZ`{kOd0GxK**c@WfUSy7c#Rqs;VKdmUwumSHe
z&XJTSquO(y$kpDJN)w8<vPS+uBa%|i1h9n#+$7GFi`vI1zuZb3_r#;#)~KHcG@+K=
zFedD^3XiDX>abE@aM1GMipFN5{E`-V_e*W$vCcYaBqq5TWA{qp2Y6Dg1l_et&$*p>
zC0M1AzXOzc?$^1V>&ijBiIc{iuu>P71<a6P@Kt#rtrGSW1W!igSC`S+k<j9|p6*%1
zmy|CY1V4q1jPts33&k_!LX0vt55wqTp+BEFuK$UEW$sLNwNiVJsk-#H=bZkfY47v~
z#`Bg0RtHA_w0XoxVE|Oy8I5Pg@&d9@w63|s+4~z&x|(9|)d1PzloPs-+UkNjC~@H|
ze;*#nvZ}NMT+V8-fy-Ly#><Uu!@}?ZgiM}-Fk0u&HXLObvQ<TYroiD!#NMij4pHGc
z(}4MxOOB~N81Qtv%eo-RcbDx^)WHxJ7dM)77i#9OtRn(~GZ-R$1{#&Y?VBt#H=~uD
z?IJWw_;FdUA8X|>m7yx)L(7}f0+MQ*>5>2lY(ZOG`*m3L$A<obUihNZ4IR<kvvtzm
z|KkOK?b*?0-OBI+;s%+<2J-6ZUm!ufqd+;n#@jk6t}&T{sSc`9oWp*1y5%lL>0b0*
zN-YqwNQz57U_ML8kXy+vd<8cU!(rZlS~C6svA9}|KVr%W=Y(+2ti_EM7Nr|=d{Ppv
zCT{U-F5%4kH2q4{`L*$#JrQC-9esUzbXzW8Hs0$<<HUW*^4$>4H@}Wqa`Ea_JNfpH
zutf^&)qi{w`qAkL>4oc&Gt;o8GmyQ)LrNb};UG^=up&2*xqdx<_wAQ9#6fJPAa}f9
zh>bS*@UBdm0E>8?DGt?FLj7?Q!{o{yw$`x|VejIJ-lMSM?JJToff#7jha`9y>=J4@
zd|e|-)2|e~ziMG^I`LtqCgt41c5JnVeyHZd+&VAQ*z|%agne&qmd5!fcoW`_pjU`_
zLWX~NJ}#=oBsW#ee)qF_CQMCVe@BPj*oBTX0yLmW8>xlzB*et_)Z<R|Tqsb3Ta96(
zIAJ(*S;O&u6bKpR5K$Rpz|<|GKQnl|WRBt|u<rh|k^E$As^qwH0OoNWI8_F%xC`LI
zMMcI|Qj_Z2vap=nMoK(gDqc>7V#W$`9UFN+u4vnWgXMYPK)bcnxml1H#K8XrXaRjE
z;O1F^f`SA&12n5m`<Oq}Nbk=l-j(nxaO!)pXna@aeMr?_d8kQC*qg}jb-4K>fKX7T
zE}p;|x4wfaVvbx1ejYtTuJ*!<G3HaPfb5$Fw0FURRP&bsP_%wdNm%qRebR76|9LTz
z$WgcK1=c}|QS1c5;~PYAqCFz|HCdKT=jw|?C(@L6Hs9&sFge&ZQ2zeDaqM$c%PG`K
zz&Ds7fA3N&wLBOd26-jPQ_|@PXXMGvD+$#+g=XeSC#+SM;yax%H~gx5QxTK-10N#C
zj4A;i{<53(%}DM^>}uAR4n(sY*i!dnat<X!E&pfD04$^c0hD~%1(p4XA)6oM`m0Tu
z6uNRt%v9dd-=IXR{oNd^J*F@=VIj{1cq}>Mm&Xv!HE?%JjC&=p^|41<gt=E(E}ck@
zc{tj1B2OF|u-mcD{1Q{)UicOq;qccVADnH%*JLSe!o`zQ>P(c$E9S!~I4!4&<YnJ2
zIZY|UPEAcQi}Wut^jOyS{O#Q#N6wdU&ak2o8#M4Fhmk2|5OVSrqL#@c$5f>>xN*!U
zPq57B!-c+a%TLxJT-XoDUkL7cZ3T;i=?UO-l^%PwH<T(kQEzJHQ1U>_gm2&nzOT`*
z<kvn^66|PW1H7sEDd8o<aA(cXH8j|zL|p5xy-wfN%Q}QGGGPeqSBV6y)6(9$cAEA@
zS*Po_N`|7;(*<ySLBwP9sPh-!kTc7N+GIJODpu?UDXEsxh^{uLdW|1hK2`0sKiAgO
zsGEztV-V}5>-t(ISj>C1BM_C@?o#XxSwp1r7>z_`j39VhI4V>YTR;9myeg9$K3a>D
z%@SKZ-`;Qp7F!uigaeUawnCw*C|0Moqa2G0*xXUxS$_HEd~oMi0Mp`DO>Sy^Zose-
z=#GSiaK~N{ze+r7r}gnO1$a5;dD;8nu(zmG``LyO@vsXso5XzM9ECy_*@fkc;6E2&
zu?>Dspiz|K{e42XGf(nMmgg3~PVkh(ciESAeEQq3>l;cMN^y>uSDG8-`V&_n7`Tys
zYQ=5U%7c@UPuLq079a4U%m*?{uG&les)i>soqXiMrsDV5SKD<>$MA1&UPbhg+YX?a
zpI_4|-H@5igD|$Vk+HE#B+6}zm`6MPc!p8vP?QSs0P9}g&$|P?6QRvWuWPcMewI7|
zNyp|%o5_`a0d<6Li=4kf%8)P(#<<+m1Wr8FxyDp_V9=sty*jNy8A+Y?=GD<3BHHZ?
z!wtL1qg_uQNXAfUU7_EEensqJ9R?Da>{9yB&(sF1qqvdRmgg`|O<b$9$G8nU-9Ru{
zi@5M8y<v~A5{Gpn2Akq<eaifwuC0C~RL=D!xLzn>H2uWD?L+h=fl(2@oMCChF|Y$<
z#|KP;dq~H7IWx&4QE3r@^u(5nNm()tWd#Lfs9Vzo>xB8l;T*u2(JQec$#3<2wQoi(
z{*Ok9QAtg`v<4`Arhs6|=_v<>OPd2=G+#SNI`Ci(JyTn%-&TRadKt)Zdd5DPdIP}w
zOdR`P2c1@tzGB?TTh@L8adAHqb^38Rtw)E`*mBXyxMw>a&V~y^5wO^StkHQ9q$ygb
zD|7YSfQve_EbTTZdOv*w+=&Nu^~41dG!}0B_=q9Hc&e7HbL|Y@970y2z%3Ld$A7@C
z7Y)as$mr;4)%$2XwmS?`p(gl|tw|(<TH}A`>JtZ+y!gqesj3+y`|=t#QA_!xvtJBN
zF!t0tQuUaXp1NUS{9GKW5*DCRjp*#{>VK^s#_DIgdmZ$7G%(KACs@^DOuK&;F4VPk
z`wD3gEg|7%8J1nDbf&(FvJDd5VD)T1nY8)Dq|2(jm=G_5QeHONyzN1f-QAb*>4H0#
zb&T2g=zP=Sp*<_-rhS3gQPiQjiCIOGhZ^5pvgEy?Axx+ut-lTxgFLKMV0?Pj;G0*e
zBi0P{T0=ffG2hUe_mc5e=DXg1m-VWym;RK%K2P^Vj-Ec3&mq=pjuaX1PXCr7b?f-|
z2(#r^8GY7`^7+4s-w6!rAAhNjxLy}I_Klwf<8DjwJfN+nx2a|&?@s%<S1uq485a4(
zIPNYyQQBXe7Yr3S{$d!KMo9ge4`)J6(|(HVfIp?1&!77nBiVcs@Q+yIh7h71adQN<
z4CWLlFNx|NpgY^URf-M~;XBRreKXFWbxQ1owlo%%E0BR2-&%}z`}BC<DD1kUq*Sbs
z`jM874nZ<n8%?QUQ(*G2@hdIVx!wMYLg)!_y04w+e_@p9C7&u`hrJT{3Oa<9wxH{o
zV;&mcHw9vR>`)5G$${cDHYA))4(j3VplTQh@Dev8g~}=(Y#qsF!V#0f(rXX_+!puf
zB@nEdkdOO1V_KBcOk0iBCB~1>wU<{HJw@_|WNbI^*Wu~{Z5{+9(uZ!<R)hugUV7rO
zPdzpAh_G70BEN<{=bP?1%)U2;T?q-C!f<%+!{Cllv%&<>{TKPy-@d+bBSTLP08vOl
zl~kkma`+M0WAh=QpiED|3mJNboFr|c$nNbr>Ar_sW0OIE6?0$l2Hd%Ah}59%!q0qZ
zx~rr^Ab6Yk9bMZM?fOcu#$l9;n_C^_gQivh=uEl*9?g9Krw<hKDlhTo7(HtWTwf1B
zssC`Y2^oO@v3Me;T^yX=q~Mt#<em71F+ctgplhq$D5nfJIr1gGvIifCkYr`&B9|74
z0=LBN5oh|f^<H2)no&Gp^+e+nfl8pJ!D==0?fvdca2-4<zG}P()daV~=*Y-L@u#q|
z1S+sqFeRkDvvNv~*J}Z}>;CZEi7s#54;P#OT<25t0D1&D!JP#Op`oF?^|rGL`SsLl
z=$0Rp%__ANzc4R~e`hjq@XJB={!`>kZ>WLa_E`n*lP3QaLbS}n4AU6{S^gdB2j*9b
zwsag!TvHaVRVQP%sotCkH_EA0mUgrIH6kMW1bW`m%yu1rd2d615+3gLs(1Z9!b(-Q
ztzI#@i51IGl*9AsPh4O+^SZdL(M)?wP^4F>TTGa|z(twgx4E2~88`i5>S~0g^WZNk
zD_nJ4PH=O*bIcyq{sw}5B&}XuwPyU#iG|Cn=h^q)5vFM213bKwN;JGUkl9+9L~ac6
zI~-G&7KYAGu`tAru=Fm!5&O;mT8Pv8QI6fOYB+4_Wf<hxFo9_F@Bxwh<I^it@4PCq
z(J1)_g%tnJPHkh&lGjfI(aC4|lV@=1(bk2&+}mi+@$}JN@JhO#xtK}&-sf+qm9T$5
z`xxT9OfL~WPa*2E0(LSpawLwxW^R*D8!fm1rw_1hGmi&Vov<TG#XZdX8VQ(M`4+xt
z9i~abi>1S$N-T6r-}*e3{g$ln5n2AVE+wIIu80BlIWG!7@eDqKNU&3$l?51wqax@#
zoqR*$?s&U#GKFmmTl*~yE5P%({n~aTPf6#d_<mRkpYt%abY^Z<WUk7{2*%PfSM1Rb
zXeNx%u_f5hzbY0Rf{4X9?cM><btm-0;GdSUiJ3b)%x-NzhXuBq-k09Q<KeBRD|Vh3
zV4kDGNY#llGW&#DagAzcw#g~eWq^p2fd}mYJ@DvF1A}2Qv#cbe%<DFY%lMxg7W4}j
z@@qYxZys(_8<t9y9&z2mRU_Yx`XKbdavhExGjoi9KmQ92)6jJV6Nt4gxj5S%Hp|!9
z_^An}jQV|@nhS>xMgs`=PDnmT3{HOT5z<=$8d7a|wp5>xkdS(F|7i0(vP<xwB=Nu)
zBR)CGqS1aaNv@=yxWZA8RBMwy?Hz;=@tg6D$0=O5v!1-7tYPlaAoHIW95FmOmGA~m
zAx-H=#+<9PU}w`{Ka|fO+nZNWU9@sFbqwUG$+)<t>;u2J?f1<UZ@*Mt`RDwoAW}Ye
zZ=haVd?=c{blgaHN@?@^N?R%gX4w~zT%k3h<A7kt@F^Wiaz1j(-}5T<Ew?>5oFB{(
z14I~MdZ?&&?WB(M9Ps`$Graln4;PmN2jU|l>%*6SxjBNO)-g4!Nmk;Zx>j1}2+{S0
z&Vs*c{mQ14W2f7nQ762$aN*|ECO9FArMXR5ukWeX`TMhhbUI?}QrYCbV532n%UnhW
zN=t0Te^066yt(SU>N;97*lyw!$DmGZE*CZ1YeO<DjO~$56XBXhVOS*MB~kcJ*7n~4
zRCfxTMN9+X<Mq1LxFg5BMydR<2NWp;B;3E&<b@&6(RSG8Zb?hJEj_Y|7?=bq#^`=2
z`^Is6?@O+!f~$qmD7VUx)}s6sK5dNnli$4)du%N8Jv`qui?N7c5Rq(&&O0%7Cf;JA
z8_aKMnm1*BcT^JH4<qr5Fdc~qYBVpPSa3%4Dl<1oGy{~15Br%++~&41XNJeD5{mHA
z*%4e)4(X!9eL~@4g%L_6qe+bn=-2!Ms%R0~9f5--{MK^6HX8-8$9uFHhoLRSYfn&J
zSVTmgbU{R7)#k|nno#t%_Q&^4G$+zG{OF|#v=!a5N!&2RQ&<>6CtbA@+^f(=p1Z}q
zEouY<^j=?{3=1c}v5$+e?=|cIf~12{<EL~BKch+#Q|HqDRyP9KVbtzEO0M5k#$v5N
zPHQjd5nV9nL<HEZwly2xhnv5hmJy-71p$*4f*a?q=$8AaQi!Q<1wHi6i+I9#hXsh|
zINoYQD?(vxh$XFoN}X@GUlZr}^zcsMeU{ey+~iG$-~thw9HVMksBurr?)}~{9fz@(
z`F=U#dg$5pw~5!5Vcr`TSEl8u%?ViazY*8B*^*7g#nu%T*&W|tuw+G##Uw{Ur|Yq2
z5xU8Q&xCVRIC<B?^PV(rTon6D%2Sd$(Ktjcvv;Q=Cv#QvZ{P=g$o3#9Q=yKXNpt%|
z6PKjaf_+QP$419i>lPmymj(LRZC!k)Bz-zae7D4MdLaA|405hqbii^zw*3P!<7nwG
z$$n4xd895SPc#QpN2RMRt}TYQAq0jpWa;Uv=>p;)la|8~306kq=PKQ4VaB$tB*?OQ
zL$xfua?(9~gJG6wU9*H`9!!|ypH@n%fC%Y5%N=jX<Ie7rz>THY{UOUAmypohTPBDZ
z6P(-@PVML-`tT7o%~hPj#UvazJf)^Qo}OQNTxX2q45F_l-+?+KX9c9jc%0uC=D{bw
zH;m;B5pTb(?|4($l&!kUsQ3;eDVYHyU%R64Rnv+j!ZY5CAfez%SLO!!-v{4cRn58_
zOQbzvB~ZC+5$oSNK2N#jl}&PMu~@#-A!p4O5-UR=h-M-eAY@2o*v{{qW;A;LzHEe>
zZC;l6xBDgHv@b(!%~$qV8L5QYjYZ-L&&Y@Rn0>lc*ltb_;wK2^<v8?AXS_0M@_lmk
z%w!e~B^veY@2}#USTj0ETvp@3H{Nq<j|EwxHd95f0ZvYhMiT`?So#F4RAWQRzH6sD
zMuqIi%G>7=`E7)RB8usFMX|dw^2xiSA<$KBTt6`8XvFxRBcn(X4}2E=w6892-&Lit
zpDg!A$Y)!Z@xu_^nyb0xKY%=7xlBoyRSKzm?#g$u+8(r+1_fJy5&V-H<@)rl$~33K
z)-C6oFLZIe*D4`kw>)<oaxFotcjaz!If8VCDV3OzT85&;9sudOFtS5hKQRdvKyHvx
zpbOb1diGQ9O<-4I0w>FI+p5#a?3~WYn|)-uGO@a75gkOfH>JWiLn`>jWBWn3;T4SM
z`?*|G&<BB7jZ8Rw4j#vP1|=rmLU?8SR!CVWdD}_3$!br53~WYH7i|!-WjUBBIv(>!
zc?<s@&3MCmy_fXGyU|SD8uFT)Du(wW59^FnE?Da~(tE<m6aDYkX_PZ&6Zd9P9bKl2
zj(XlwZFwKcJ0wUGbCy`zsWShxGYNk4Q%ibyDA?()RQWbh#tQfSVVCaef&_kAbEm{(
z0`rWq+--l?$&LeJ%hOr^#4*b1DuEkDk7G=V=Q`P}URUg6?w6hWx6$2XXu`SXR&o1E
z-{f~RZ#>zw{yUq0T7oV`Wj1_1=8*C@1)(oYb;lV)6V^P#kF);g2~~6kQGW!NK1YY@
z`Oi-h`L%$)7A(RSOs12F=YSI?Q%I&%kMy3xlR{o<;)S8n`}>gNqs*z0FS0b>MJ<9>
zb0gF~l%8?UZaPI`&dOZiy(wkIwAz+f>T*QoRzdZVr<?Bkj1Vln@U5d@5_0^D)yse~
zJ;5KBv{KHjuPiVv>Y6T~oGt~n^6l>{JRjHl&+~nan_Sr6jzy(HnJ%klD3oGGd!O$A
zz84NER5lO1X(>>zYT8cxta-yKppv+wJg!Hlk`p7~b7jvwlJ^^!R8ZkA^>w@GpA6CQ
z$!j85O2Kg(9VN)ua9RxM1HXeI4eO|tnmR1DwA8hGlDF<DziP=Yqa~qkQ9R?W!rJ80
zX_%b<E!DGI6C!NO7L;$B=ud(q;<n)32V%vo9`T~{69gM_+iV*%`39=n+l;r|3uXYD
zbX`<e)ZHg5Sy#*FAXV}=m%BF@+66QR10udZy|9It5V{PAD`bEBIZ_0TEjVmK)C1Ul
zc)>c=l<?8YC%(}2o*XddW5`#@uh)GqVCB>s$6@K{xon8}Z*gCphrz#=ISXpP-N~Fi
z4bFvkMfMo@6-0Hp?Gkm~UdWGU^`oeH+T_)AXX4-WgxItXA4(E$sjXF3L__niM^D&W
zY7CA~WVUcijuOlfv&X4a({&h%Pv2KT`_PfuNhx~_n&a`7`M|;khfYdc+Pd%@%a3in
zZjJCmMN8?-mp(rbu6Qj!)Nc2Ml}=dhCWn2DN7;s_nRQZ*>r2$@J4TK3l^;K<)jGzs
zF6nm!n#>v}%C^1~8>F56YnSk$*Dlr^9mZ-FUjB$F@cE<X(gc!*ZzZi(CkF$Who$Y3
z*Ym)@)LGkRU0mGyCei`DQE5*T87xa9Hq9@`8yj7oQ@zvXdJ*xTF~JRkwCR(a>7zjn
z14`BdBL}MnbzwF?>?uXXNPUq)RJke!Yt(s5ay^OGr29A0{hMt_tYeFMe=R8SOWh*>
zl5ez`fD{P!-bFW&XWtN-hncW_9&zgycr>=o^F>Q4DVLf;{l_E`JhQ16<Jgg6wG^Md
zXYVDPPT<4Fu6-CdH_S<CcJX-qCdAm^q5JP@LFS4mic;pR!Z4<mh%V(vdCtH`Lrgwr
z5$H`G;}*H0-Et0?iJ=Kovu8o+6yiL!n`D%?&YM!iStI?yF_9teN;r=XgL<Z~gixi>
z`fS-R8I47F^gbq`?sQh-h01ecN+o(eam|vaVvOz!bIU$#yeb}Y?0nQDfh3Wk->d|G
z8D@#$A#$x-(kz>pJ%fOr4u?oP?GGU%e~f^rMtP!erJ}^}SS|<18F916?8>Q5bnul1
za|UPajHs|rx0E4yjsX|*?1t}4*$iIKKRi%JX;@CLU=&cBCPRXWe*ZGygpo#u&Jw!l
zv!N#eB6PLkq2yCa*t7BtPT)B5U3C{D@HxTyW~0gW^E$n~+~1;Qpe}q03z_f+<wgrk
zvIgJDlvmnA0WrT5IZoWnu#B{{MQ`0N5pN{dI)9V#{tx$ND_PnEaObAt`5Oc+GbxW2
zzP(OU7h}9h3L?rsb-ZLtryk32ZI`5)R9e2N$9Lg-i<WsDn%N{zC*>w*(&@F{TqVV|
z7d)})LAYTzT-3iWn+|51v2T51r5CFEifMyT?7R2X%G<syh4qJ>>G4T_-kwX@Kei0K
zViC4oYL)0qq`QvG8l?y%yd<nQxIB{Ov!fr|`DYnFQ(tP4Yh_oaG$nqNc?dpHfJ4Gz
zIIjKA5#3};vfSWsz;WzEPT$cruKYpQ$A0`v5Um_u`URo#UY7pp66CmV?wgt^a#Dp@
z3T)?^k&rC<N|`?L!n^@$C;b+J2>n9>pV~InrZZ0hE4i#c>9kQw1p%eI_|-@IPkdrV
z(HrrQxANgsS~1t3ZcHgqN9FL6LaLQs*=!(;X1&5^yB&z6u8}UpcD&n*0iKF}RGOV@
zvTa+(R9`~a1yRauzO7*v{kFniE2SRS2pWljj;KB{I!7`Wk#wQ+qnnHOHf%fEV*6tJ
z1LCvAOsk1@V=WQfM<hoihxZ-hN9XVD=|d&q-zk0F=&%ZDtbP%c-}gHM2B`@iYWjwh
zl2U^)L<{|XXJruw*!QFrB08Y7%hyCkM(P0-o7QkDlgAtC5gzN`Maz@zvy6ZH5n$uz
zY-v!!AjTa)r=7iFcFY}_7JHIvO758BxrtPG?1n-aQ)9p~F2|2uB9&$%P<+Z9Ve6Ry
zHLTd3!Fv~u{<J&PoV+7gr^AXy!1?Q|SI<8%HD$&8Fp936UCsq#iHPq<@HN#1o=TM)
zOZASz@?Ba%^PCV9A4On9OEK_xeS=}3B5|<o!G{*oY5}TNI;=pW6!WY|6jE;MiCfV8
zn;fq14+WITsx9cTZvu?S(ED_=ulO@@&v0vrTCu{OQ;ipnvT&^OlYs7;8SVZ~5c|hC
z*1%>jj(SF|ShUV<!BmVyre(Q;fQ0Xb%H_IgIfVvz`{jh8KR%~mJhNaeh{f8UZvFd^
zQj({(Hw6hr*^UhnsEm{FU}2Ii7SsOAdSB*eoje!gmfsLY^7N4kCQSM9{YPbGKCiXw
zI{zQ{9*Og5YmP@)vvAbPO3{4`btd}_P-^p?EOqwGsA7ERx)+gm9&upjyxQDkIIuQ(
z%o>6h=a|s!6_P#kwmO^J_pjEoUag=Zg`#lkts_N@(IL06?4QBrQ8AA!EIKvSm1RKN
zy}0;ej8nr?;2hY=;bp20eP8#w6Dv7Shl_<mY({Aq8_%4Os{Y-66w+EW&p^3wLFYF_
zHCo8u_S5aJbG_Q|nLF<ZYizdavf4(}4A#X&sA3;vl%HhmB@7=ogr#kajIh!wu>0lA
zUZ#}e8lNRkUe!h+5;MeCT~R<e;5}NAK!)+mlTe=|SyhYX$o17v%CY@G@;v__^&)6n
zn3Ns)cli_$TH2Y>OGM}MW>BLbBP;Z_ZfOj(X@a&;xo1hMbW!)CX2Y@AdJ%Ai?{R;(
zMTno<0$@D@nHc=Un>jtlY+lw+Lff`p3RF-sgZ%dD3S~+|_n^e261V?DTicqlt3)LK
zrCm#FoFr7A^I`87x5HA(v+_R!*vj4aFLcfO;qDLyYS0636W&Qtr!eXI^P<gxGHj{$
z%qvM_uEnc>`IjEX1x6C2K@WFsUVt=ACXXzTg>RFx1#)7KvDI$y{unk(x07z|bRi!&
zJuO3Q)V51Cwyo=!9v*!E;@t5k$t<yaU0E5-PAghnnqnH-JuaoBT7LSPguPyF7yaS5
z&csG#f?i9_CXb=;#y<_Q6S*aCU})&y(e{q{XIE}qyHCUn=WHamaQ4xD%eL}MCbCO9
z-&>3ItQj^6i2`I_tqukyEO0f|DZF7fScOFhz-xHm&^dMMY|AP3X;UidO3~xDX&5*d
z2?>W0f&c1!b^D#c<-i()b*!?G*lKST6ohhSjh`OZ8aLaD1*L`}|Cn|byZD?x`blg3
zSY*_@Jdex{aF|UVD*bvBn0SVAAP73pF&*@}s5b?~VjTv|n(Hv7nB)bpw9OyYLZKd^
z17CWhN%EZOfl<BU84EKrGuS1%co`WP`4eBt6r2I=ZJwm`s^XvB>FKFdkAAQb#*|?V
z;kxL~6+kG<gKj461obMInI3??S)uzuvyv(l#dl_1rT;?fIY-qBG8k^wR6i#RI=jin
zwvDXZvA$xy3>}VO-r2xBz|>pY#^rz-QhokAqggMLu1?=N=hKhT>9Krg9+@r{M0$~w
zRtxGC1}at07m?kD$*Ay~53t5Z-G9~XIKqNT|KpwkglYPis_bmEE-)GnX+=4qi&aki
z>hZSCC+?izG8j{2e_Cs_@Q4Jppf^40RD{@YWM9n(2P?kD(S`0Hhu29ye*NPSX8U?H
zTc_N<5H)m)eq^@lw!pky(u-*JALAU{O{`?!jjHJ7{;yR|J2pWLp}NrZWtuY@>ux2R
zj=xuG&0RCg1o9h+wuR1+ZyNIer)uwaF^Wcvr)T)(Ks8nDy`5mK?aDE&?4;gNe3#P^
zceTEGD5%>LdY&z5>}E8HzSE~`FT`Z%svLaXK)7f`ZG7>D&X?}#Y*nYp5mL0VDR)e+
zJcYUEq4K<(mXmWN>n{kQ9W*A4QQ8Nh7x9Ud(P$d+mIZ1A1XO${cR)U2jOGZpo&`ML
z8*#<HNf5v~(ZIL;OMMI<ios86QEQHGwhm~0g}^p;!2~D?dK5y@C|#bh0WOl)#TA#(
zqgjevR%1#e!rpG#psVJYYvYsdjPhzv%?!A`h3_I;fk{<;s=MXU+|DAv$raO$7=0Dz
z9uI5Bua@?6fjggyBv7=f=wXqnm;i;OqDkI}AJF|Qlqyym`scJEkO$)7ugaYSep;u%
zp@Vg@T!KuL>q7QN=YtWokvif3@m)p<vZ@RA?zw4Hx~C4wK60ww5!{Y$WLnFKQ@}s7
zxoxK3F`hsr?TPZktZdiNHfJ-vwX5d8{WMT$jU(0&6QmcFIf_>@!oxVAC`Lqs;{Wl-
zVu+TlrLYPe`o6mD*_yM0;iS>Oc^p{PpbU@mn@@GG@T}(F$JM$W%ZnHkad-`1&KXpj
zjbT@>>is^5a*jKqYxy=q2``8&c@p+mz+_&7+K+{ZgfuC1vmS!Spk|jLU!#plP))A8
zX<RZS7MIWvE!-|8BeR1F>aBTT@}n0P5n<IU8*S*Xsre-;GM%mCra;`-I4>idVq|I+
zpVwadMO#?j@p!dNm+vKRh7E{%jj(u;yKkR6J3BKN<oVq@Ie=K`1W0+pdh5RC_|mO0
z%*+vZDV*2NAzX3+n=ux0nijEEx#;TSQXym}%^&o)OLkSCLg()MIq{eW-AiI;H1`OJ
ziGM&hh@5d+?R<2|p(b)w1VH%iYx2z*V9m5gYE6^)CDA`tRTIr}drF*sk7qe=?|2y)
z5SD^f?m<8s*{CKA3?koRoqV#|UE!lzr-i*Rnxgf~#1Ca>*ro<oCjO|d+@HhCSakNJ
z05b?_-%%s}7jgjh;{RH$k_H7TAZ!`asw8`u7{<UOm}N5kT%06nVM%psw(Yg~GrTmD
zT|Aq3&g&lkE%f#7?s4KPQVx<*gz9E1b3V@lk}qEh9T3)7r01(EG#l|7HsN8ug}MB6
zvpub2)M}K=*^Rwct}?^w?<1O@no1XMD$-$O`hH8UZ@Z<q$fiY=lU;X`Lv>RiI3Jvq
zwgNml?>IQ*OXvo~+d_#&{e_NzZDutR5|VcS$JGT77=0AJfB$~*WPIufG(g6KG{+nU
zBEj5cE)^&V9w&j1ie`z+PzpmbkJlOG(s~(L_szTN&$%(YiQwPc+g!GXM1NL42ig!(
z>Uv-Nt-c112LXi2D^);N#_|B*_%$B4zZ1z|$)&2;*!igntX3zpcNmFr*LMpMssGSE
zrFBAig|&%-ZBw4q%x$%#YNP-S`CZJ``EVwFk^Quj>6lY2I5zTUP6&+gv)h2KI@id1
zuXqm3PeB%#L>RTuhdpZp6YnFzfvW7O8^9{~0$~AG15hF(h;<!yQE+qhD7^@;^Vl(s
z0nwtmBO-(bO>EDBavcYkZs7|5@@a-Z9bQbW>9G2?(l4cdlA&!h0d?Cw)<BgC-o4M}
zSMaIk$%=Kir86Q3VjxB+7gNtwk_x;bH5e+TqZ?1eq>4EE+^!+(`}j*wH`}&2<+x)0
z7wJZPu;*7sw?&7yY=cb}Hl!0bWe0`W?U`&!q@M*9BJ!DeF4G?rmzEEPZlsju4+$3Q
z`!-Y7Y>Yp*QwnZ2mv+-QoI%ACe0%+Ba9j|VmsNVPH2Sq-|2unfc4z30Tz*eD7%+ZR
z03N&&R1Ok8@G2#JVlY|2O2va8v7jrC<n3yxX1G6w_2b=TR0K32$+yqR-{XSyQwEzT
zHnAX=#LbvTQ!86$WIcBHGQ~shHSv?$`W>5W5`0*xK2G4JrguB5OQwz?qH=Ty#0K#|
zaB}AeI=rxQk#0lD!|d%IC9!3w3h@M7JKCAN4wbS}QusC)0$iZY`y#oMF*V&(m@@As
zl#U1w-vPA^ci&9zyW6wv<0BAT`5ydQKEO`IW=+M0F{>XD2mgg^#IIvs2*FbIZZ@0h
z3&D$t*zrTGvzxE1y^j4VJMv#yHu$ijUn=3$A(dy}TjVT5I!&!?I0~#qn|isl>%6Ib
z&7iNhoU*0mwBy%B_p!oMM1j;gVrEli>K*K9nj+O&MG$qLfM{t3zxgZV&u<uWja1#m
zzdZ)LJ0!t``9oH?dtje9UGuNtE#EBTB-|2v?}n<M<++8;ynLmwpK5zB`c+&AVr_jp
zcRXC#LxB9@n@i+9t{{`>(GH*M>Ro@VML1z)E66fe<3nX3-{vOjmzD4rq=<iix{pLV
zk%0x1!u(3n@oqSWV**};TZ{N?P}&?yn&x(F;6svG1xA^_zP`RY&@WUe*!WbA)p4XU
z@fp#wxmq4wjU;nx0sj_;@|~T$$JcsbFlC25NlS*e#sw1=8~AjgVd63L$N}?FCeY9}
zJ~L(mc53$B8UNr5YEB8b>@3*0%;q=H*46|gPfef5Nmm4vGs1*HTN6z=Td;mGB%+K=
z*Ccj!(cd3fETt20S*kArjf-eboW*-0vN|k%e|B;kx5sIT;_uY`3WdWJ#C$f-1YV?T
zC>+P)8`om7kuEHFt0D$CCpY(dc{TnbLHoFhN#l57w+n9(^bWN3ovZOgfz_EO42<Vh
z_Ab)aItM#Ur5IGnXrYN_#pOz~>vMw?VsinR2ap{wp?f-=@WRF@lX)$#d$q*`CL_|9
z5uB|BCmXwP$#lWM1oW0FqN_J$>e27o1CH@T0(M}LL7?U$JRfgPJNsY6()$Z|W)mts
zG@qr7y}iAU2KO(D-B5wgB`<+o7yK(f((%rls$0Coif%usBjN?uyxh>Ei8dt88|vfw
z-r)6wzoJr2`JWwgfsjg0<~79&&z{m-ySZN=)8x6Pa1u!ayK=WljTT%LBSt}$XKpyc
z#4C(UKz9^}<wOX5wildUF72d?$Y&1rlG6YFS9r2vBN<$DPvFmeifh=q%ZV3rcW*#0
zg`$GTAk}{pC$}6!D%?^~|NiwLn2V%&JDDyMeloeumpQP?@hU;js%zP9-UA1yIFK+S
zM)c~&-}~-|4jo=A)6_jwTKs$U+v3H~`M)O8zEPVy|2vKh<GNmE7*&7(wlNezBlX9U
z0EWvr`F)VGh1kGF;OB6rSpDb3Sz4xx7v^MdYRR3Y@;@FFk^j#LdLiXnt`jyVGNUmD
zDO7jw#U!TqbZM$CNGAC&@Vj|TNJz*^rc?PCD5>@b+@Lgm78{Vc_Z~c5EjvE<%!ubJ
zll&UL1-`R}dDS?!{a0?`$?)uKmip84<J7%P=^y_0o5ln-Qmmti7_^~%AY}tPGtRpg
zJpJpzP;JeN_#LRm)nv%mUbN)Ck^?Zn`-B2YHaS3m35w`IWUOJ^Jtr3GP~WIPt`Ig~
zZGOobh@uExfrw`vloJlAa_B8Rc%x6@Xhgh5fMkXg#*2J4eJ3NHB5%fkxOkgXW^Tx=
z)nBC8A-4THCh!yNQiH>AfY}_y=6@Zg{-ZB-dJW^-vi7daweh30*q$4;=+D+q@=fOj
z;ZW!gPm&@A;8D!~{5Bf9+lvSc1v$C6NGJ`ij@QVPXv@{q)DCSu?ynqjfp0Do_Rjdo
zzT~}x{aT0c4CymPsd6^5c89Om3~)S?_?9i)omibT0v=?<55soBB;T(KRBLJ^YQf|>
zp~=+4^YdDJUci$`V%!Lr9q)k;_W_yPAO^i0So%QRk5+E?!Ngc~X;K`kq^?1$%JA}o
zt)g9v<tSmRs3I713dzpt+9p?(E2meLVkEQP_M{N~uXOPwezAer`VxxpqER7z5$JgS
z?K%00j00Ub8q$_?R-d7kS7@;j6NpC%mCWUx%i6Tm_<BFg6+*!U10=+d@jk5BG_a%a
z@Vot0HMh0>PWIr={l0wm?wvV*VH<iy#H*5X;9q1bkWN_+j5d)K)dd*six6VLDKd~J
z9>O|&@NuPy;{haLy<2E-SdN2!1y2p&V;Wj=a&o+Dt2NK9SSGbU0LV(s<#ROL2Ay^D
zJbjd8O%ryrPbT03PbaP_Hw$~4>2=zNXSYSD3(m;5^HoL|N&p1&+$&BSiy;;`YXi0B
z;!b0`S*l=u^@~DJ!_3sQMvT)unJf|9oay`ktrB6ZDM>Y3yDJFXkz9Uq7@*lD#-@J)
z>I2oDIv^wC`)8v<Jp<M#jQuPSjN0S+j<o)Y<4Kgt<a1;V`?Op+!=@Bt&`XF}=C56*
zKU7#xCgSTk8i4teH&qOh5+5Hw{>zF6li%{`Vp`=MaU39GKO2EtAT;Fzrdzt>$+!1~
zaSFNuE?(fAw2Q1?Bj29XSjDKge7HSl3J(j@dDI_TZr#ZNf*g3+FFlA0@!-h=Yof#c
z7`y-LS`#C-LGJX7*5GuvWrz>0BGzTk1s`HM)%RL6ykFHEvuoIKi;{~V2a!ru2{Z6&
z(C{mxT>SrYYc4LctJl>4s${#w5_oRDCkBx0yLyenJ7H!BM6mD^!A%;#P`LH1fq;OZ
zW1*{xn1G;6z-e8qAWJIW8?;iqpv<KZ>&P2|<OU0brovycs)YxXUOb-T)JVg?d}|(>
zASw3rV-f@?u9=+x=x;$&@YnDiMHaz<>$tGiJrGcDq%Sut)!SJmF`rpTY<U9dNTwrD
zSCzLveH6nKf~sM-14i|KPA4QW_K>hBxg+tR#5GVtCBwNj6}3jVyn)6$@yk%LZw$+#
zMiFQfYCY7{*81%O!vNO7G^2x=vSjF3nt-0ZFU@Q-RM=I@;ONDz^TQSw+Lmbe4pvS5
z3s3aQLd$COs|LZ7B|5L|jNDr8VQoqZ5Sv}-B*jbmt31l(f@t@rSJ*>4yK1W<YuAj-
zKiD<1G`DNO7PU15^FN~)A!KNwR^N^HuQt2?<EXHn|I*>fN9pZlRE13eOLr{^rOpR;
z3=zi<)=Bpd8zZAa^r+&T8^hi3DA$D;Eb*k1v)fG((g$GQR|n4JoaDy7wflG0!PG3;
zKgs<8)M*8=0K!z%pnwE&erHkY@zVPjHDpkFWOw)jZUSfcF{=HPUB~m&{o_KUBedE!
z^Ie-F?TE25Ga)1f%URN5p-I$+ae1;@qL9uTNNQ=Wfol!HFXcGu_(-^s1H?!e9&S!s
zdYdP?PsL25DE=y&`H)2IGBq0EL@5i1FR3mWo^db-k<@#dvW{Xq(lHrrz3&I#ebdoT
zF_4jms-?dD`u*EBay|IBJd}xJg%Kuy_c)lT3Vf|qkFQJ2>vn4s4}q`9E0eO^bbn1i
zl{#&O)JY{jQN$<=D%NSWj(VockZOG=h^x)JoI(pmz~r8BXZ$PY8FW@>1XEXhXgkE8
z;n6&9O))?K^S@`l@);`k$b;@~rLpzDWOBDbdG{l(JS_NZpV)5Qf*hW{O1lILF}$c>
zp@5T!o0(p4A#htLBiDv0u6saEHn1#0OznNp)JUg(OGBq$<X%((g_f8}&R)R4#?P;<
zSTblV@@|qYaz?pDP*4*STEm$4ASTNx(R}h#PbgApo-~&1lLX{e;hLK*fqNGc8r?=Z
zDxGsx<nzM>Dnp}|o0c-pxPysEDx$MkP)(#F2>j?75?;djB0HCdp>mVl_5^3axLQI=
z!>3Pf6%K>0ZCRty0&xV5|N9d`i3EWC#tQuZq*JVfK{;}_qIez6XCu;Q;B6&G9_eEp
zwGL1~oeLaJ7Y|f0stjbl#BaO=Ob~kjK^*TL&vIlxe7g)Mk)<H%SsF~lKlzedwHEzm
zmkc1#nM@`wqJTa(wK4>khdXxJH!7Cji@f6uQ-34N9+}`rOB*qn^Hu;3+h$@2sdjb^
z;KaSHpAOvrXbP(^M=bLC#xe=B{vl%->Tc&p>oml~qPeCaTuVd}xY_5pXC|1hVJohi
zyQ+8l_JzRsyS~8_l*>1t$`M2yQP_b}l)g<q*Bp+Q3g8p0@#dwx^}`pY{d34I2aGfU
zFf7$f(MsF3M1n3(E>;rLm54qkfTnx#qcU)y`|AAqr6hELHFi;0So)Esf<-M0^$>j1
zcKVwLw|>*9fb&1*!D??g`QTV0>%hLG^_9a+hUey;z>BGU^g<V3dTG+WZtK^$HxL!_
zSh$*sJDniWG`AaHq-xr?DqkD{L-@w8K-<nvSEfeHe9_kuLEa>Bfiu;H3C-$y5mMWy
z*1oc$@4P!kB5F-ZFjAuP-Hv5#1|hz!a_??nn7G}en0eSQ^WF_h1e~>;hWb7Kz1pnD
zA~yAuqWB&mwXwMehhLxxjsH89Ax8Ed>5_Zbf#h)6tGy%<37SsV6vpRpV}6obIRrs4
zpaJziU!7}A;H&c>Zw$K<Ehc{F=Da-ugky4g5(ppk`{i4ElI*P+@gK@nNibRF8D1hB
zTy{n>QeVIZ#;Ufnp)%*K-`V#8S7Wb~qA34u2@@GF(l86`Qj1el(F-s{4V9V-jXPPs
zd<AAj?TvVpND2$Tj*NnamQP9!q$C%x`jW*vb{!D$ufDO=r-bg>?fJlK_1_V)qsF7H
zcWdh^vZY|~?XYb5$co&%cfc9=@Bc^EcYssfhwpQo<h(ZF*y|wKE1P3)B82R{l0CAH
z?8x3bBNADOkiCiQQQ6AO3|aj@r{4GXU)SYwm8*{L_cNYxKlgJ#35T5<MElGl!oo8Y
zaV4K1TmR^@!j!&ljQ~8)sJao%c3N=q`kidvv^8CseG^M}9+*8HFgW9KAsdX%$mJF=
zL74KIxWJU;E`G-PrZ5p~xeXDbeyZbO{@6?++8IuPGO?mgV<rJgq1RV{)8-Q!mD;S1
zX=wD=m6Onz?m~9D@n}R`*g&f%13@H(NoCS$*OxM&)?S8%F#oP{sGrKh7Sc`BP9otn
z#y5x7CwhK<x<~KLW^buFYOI2St|(w!NauDZStwY#b~vY@1k_^(J%7CaP8SC1oi?f^
z(pQ^Wxx7a!h%~y-k8TNPn7JAL6AW7Jh5=VEd*oinw*y0ir<_Z*7PWYp!z;*0Sw)eb
z<7s4de$%T)2$wgYPV@`7)*FCzI<N)6E8@UjARoaMe`VlK+_#adO8=E*hEAH?*mRgh
zW7IJT1D1e_!RgQA*7>Sz+hRU9C)>gt`1BNq%7p21H=)D}1=!<nE_*1<1WU(y!~}X{
zV=<J>6nf)dB`2fCLUT`L-QTgOJu-uY!*7^GY40eWW(TT$OiDz_;mFsP#tffCr6;#k
z&kQAkV({G!^*E#r6|Z^w3y!K5_oFZvsU!3m#cg%2AyD0TY(&1pb>AD%<ku}y4g}=w
zy@v48!4Qd@c2lg>`s)FAZcY%U)%k<T;X=;`B>>h(1-GC}a~o35XTFqpmOrMTvYy8s
zXa7*Y^BuIYH0Duu#0}<<0;s(!>{-7%wDxVp79bFBPFwPm!{j3|#IuUJ5{;5#P`{OG
z3inNw-`ur!c%+HpbbL3w7$_n5zWx|9xRQtg+rR->6JIb;sKq?x9Ak;zI0QU4<46`*
z_K(jEJR5k<hVO8Djk_*EOTFIRPb4-G(VlsrU1jkctrntT{#JGB#xHLAz~eDnka*8q
z*_`|ni+rdUPxtlSiS(HG+^H>yOZ0wTdpEW2)DC%fM(I)0mz69e63|IxoP%P)bkkP(
zYwy`tEzvwNGmjHO*re-kHFu{cvM^whpua-;ar*Jnm2KY}e7pkF)Y*Y>OL;^G?^I2E
z&a!`IB6mw^vBh0EuHnH-2DLtH58q%;0q-3HvrBWg`eo^><NNEN57Wl!YVxS^VM7|N
zfUWwY*LxF=l-9pE^loCe!+$<})nP#^XJeHz01%>yw)3smSs6k)W4DdBOQ+T5Ww+^L
zJUG0%bev_{gw`4&e2(uNQYRhu6;@1c(m`Z`U4O`QWHm*h#*&4Dx}KOov-G@pz%5%@
zcxU%<SB4=;(xf`^QigLzZx@ukzOvU7V}%R{fZMiF7;~~}<s*E8*gRSMeeIL!?N9}J
z4DDLL#q)|upVa{~V#3eQKD|B~<fy9n#O=>AWUi4X@&4`#_w(YMbYpjw5l|g%C9IQZ
z3ItKQ8bAO8Wsfa_Nk?}Z78gkTU+eZ6eL8yU*RFg+{9Xt>1Kjmp(BS|ZegpyS{56o<
zFfj!IO8!q95JcN_yb}^5b-tpD^B(X68yny0>nS$>{XymjW*YNYhZ)eNh=p~NK%#hk
z>D+_IwrL~yM$YESdHxN|?lg>3&uA_)N45|Hl6Ho)j#|8*q(D`pS&IIw{s~|ji-Be}
zk(>UzWM5so@aqp2bg?gn*Hw*MBAvThgC^FDkIfzC$birI9K8Bm_@uL9tsGPk#ah55
zn?axux=6U%m!kNbDmSIkh%Ew@?b7t>6zI8?Gvx>RpX|Isbx1nE@dRh=f3XkTJP7VO
zEcc6-IP5os>xQOU-*4ef2`JejD}oKqQ(#38GVfsl$@hxe>wVV&6Z<aj&7POi_1yN@
zt)Dqj$b79ri5{o|lhYmqeJB#(-^ZR;P8OHM*>ZN-+*kEG2v5;Fdt+GllGXQzI8fpi
z#T#b#AL44@d;<FK^_?cd2FbnJAbThGd0O^m+t&Si8ofZ8M0jI}u--3BN4RBT<~@im
zFFH4L^yfil;5hTDsTItJsTz^G*a>yN0(nk@@6ku$=9N<qoVv(=egH-3a*&A+cjQGM
zjfib?n$ZZ+#v{zbQI*ms6}l)lWd=NmA|X$lJRTMzIRPjq?$}l1=e`x<6??^M+kEyx
zBD17O8kT@uftla!ivT(@jrTzl!xQt|jeyuQ)jp-QF9vBl;Od#ex>QX(k9|@ls0Hm^
z@th*kNxQm*W{m4Zd3Z)cKbW<E+R<V|yRSAXzEuE$_uGjeU&M_`V?_!yFTMs?!M9Tx
zYRC5nZml%*CiuJ$tHh-S-4o3gKwiS(4b;qUGE;pw2kHd+Y*Lr;T@2bd%<@XZ7>0!r
zsX#Q@9=O;ev;}GCA#n`M7IN}7NE0H$!k4<7e&;ap4UQ-JXzHv+s`YqW%`5-O)~jhp
zsk04mUh<51lY~jz0JcsB@PWW4syI!^jiHz;{(!T6@7w<L>}`XX^i@Msz7pi5yhf%K
zQA05Yr!o)(5`tLbgpAEjun-aBRXapr@;*AC{UwCmc6Krws?CJ8=li|A@o<RN=Sz)t
zKAqdPG-xj}v8XUWbufyLPfUEssLzRzJj1piQnwp6%Ba*}7NF!Uv%W2Ml>RYa`nR~>
zcjCx(7pE`YpC4g8s5DA_K60AmBPPU!jio1Rs=?cDCl)JFf}L+}u6aWCz}Ss$K|8ot
z+##BAvwp_Z3G(}h&;EN1INNDEW+G106@$KgR8K}kzhA~3?b*1-tQApl0?medCLe{9
ztje5Y50jcK%*SucDjoP#XP2evZDBC$`Ru%IG!$|UADX_sXda}$9pSxz`3v4#d$&wY
zvL#<@d^>GU9F&|aM}g+YH#NK+F@|OS`;MR=gp1dMoitsrY2(F#*lo-!---elJOoOE
zWWe&;e{MWAsh}v8GGTm_0##;!KsNe6hm0Kv?HB5h`g5(x7d%r@VQ_)&XXnBjhAndv
zcQ%;}!^Bq=Yz58dP>17uiy%~Cw%m_$OH`Y*LYRmK*xR<>cB5fl3}B{Gg9x$J(#$=*
zfqSQ&Hz|I-HO@lbNOvfzi38|D4Gj(qsO*gu^;z&0+Sl@U-BEM3K$x-wh|u4Y{!&kD
zt-{Uj1RQ~8=e;%Ic^&GRxbKd?!1%0T=B-#-@w5rYOy1i|G_Tz%dx!igLgB2<PODw*
z(?#FP)=O*^9lHE9AqXN$%()2#LDyb2YnsgY577M|7v5#{@o8f{&feJA3&znSdz-w<
zXST+#*OMs;4d#y}hRuKFZ?P$jF$<`f8)TMIIZV15X5nFnyub-2tkcnbTynoulVa)0
zdmD`1S{9ZwO8eMZ(4kg*WFp=ya~LvKLVnpcO>HdCw~#o}<(&2r3~k<d%tA?)Uw~(4
zS$$6oyKY#g3n8@t^FVic|0>Uak}?uBxlRI)CGY(7R^g|xcvcNw<a2F00N6>=@2&eU
zfZ+?_ipxfAuCo#1VBV}Zp|xiR!z-6&pS_VR5x2s}*s1R{y+!((w?BH{zBxohg*oC0
zdrn~`9*WW~9iZ1Tx0_6?X9x)+Y43ZS8%B=GOvZr8%OmmN8IJU>b!SohI0*)6$0RVK
z9tx<H8m1!exnkjs7;4Q%xYqvi2DJqx=LfhoB$Y*6yqSo8UqxJ&dv1un;;N7&$dR^n
zyj({|w7pW0*c3z<2^cVhXo$g&?@w6_hAigFhjF<Z%^T=Q(_ToTI`G(Q!zgU6=%*sA
zyodhz(4o*-6JSU%UhJ~aB8o+O(JI4(?l0W3tcp_woiRBDQ!nzYGCy}^?6zRn<%yd=
zZr&_!qtTA=QpBd1Yg0&LlcSOjjVbOTF>bSZH$)^#5t%7buRXKH+o>+Pe*e4*9>u#f
z5PZLVe2mj1yt7xCF>SCiG?Pxxy!|knwlJO^_V5EY$4uh1M)u7GFAu(r*R|8}-w{OY
znBIa)stKFPi5Snh2WA~PJ#>f@Bu(}Y9sIhwipyzm)gUTJq-|rE_z(-MypJgfb%g`x
z=K0nDOw-^;l0+m4X#yp8&U#Weet%&yDh{=i97%jt8<}HfRx*5@a3w<YM$INDT1H;M
zKuU<P&Pr8ZsN>KCEh|w^x;Jpcmc7P$hkTA&kv?vYE`mdZ)97v4Jh}m)%l=*61`w9$
zT9%6>u(5m>Q)LRKid6*7JCyF0KWuoj8S4_bFxGn4(kdl#KdDw9XzooOHrcv$5!z^K
z{-*_C<5bLJEdAPRVymy#-N`)X_0tdVi+*ReNAOV<24hIhCJDFbOEp`F;!wOnusfT_
zM?-ItDAEuh4%UzB5Dt>t2i+0AFiI}^(sF+4o5WE)Npb|8UFBfgp-fA9El)^e*uwpr
zpt8NZe<QjzW}?dVo>)h2+6x0y&YJ?PTNZn18uKCsQWbYkNZ3(sMU$Kqk7$ZK=5|c+
z?t<5FF}ZtJiKm`EhUz69U-Gwv;fR6J(bN8G-tx9NV*dp_bVH%jpu4H9Tem?NB{OAh
zRZI$|e*q!@`)%HH?YWr>!~1iNhE^TZW$OBu6_XmTmF59<$!=!#yURP{gA`s{$S%-4
zJLM5seEq!7g9IlWZ1Rgwmd2$-XRFZ{N`OG6b4lDS0D@6ooLiP9sj@9Q@-{}E@LvKj
zSYsB(%zGcNqq;*F;}V~58|TV}jqLo>%%*URd@mOVhv^9oC>@B7f6WdRo7LHVjI*q2
z3Dh|SYNOVmb*bya63<1c!rK0GD$jN<+X{xPnIfTS=-{mbiHwi6S3v8^keFd1U}yLH
zL#YR85^8E{0=|!N62xR=${s$Os_lLHNaFSLkfJ{HNB!b$<f0UE)ZGS=(k2aOsSjF(
zb>gSmd{3S6Foz%P(?(LuqrRj_t#<oj*-tkgpUv^Hz#8K*(w;0mDf{8%J)HqnMnR3$
z+P4!;bH=NZ^9tXN)4eI_dIk&w*h`UcB<HCov7)5^$(MY<3%bcXWC)sMP>=T^Y3p~e
zLVIP0gXZAuN^lPXPJoCiGDw0#wVF2wcnB<PpKC8BnvH|zHF|>@O9i2#E7MD+t|Fkq
z6Dj;?UU%U3uuBl{E!u8Qy0(((*g;C}hlV<uX>kJMnK;sK0~f%Q-X^>{(=@cmpq&Y*
zaesdUrW`pl4R%T-XQLhkO%6OyEYXq$L6KGvoXtY6w*LgtE@`qe$IY4QkK>x_fBII)
z$&nON+mf+Rf=#dSTC39u9K~(uSw-E*Gd>jtL!2)FK9_t0vi<d0HS0^Dec0CJJ|%5F
zS3X)~;Yy4RIO)TNpfXYkY*5Az>1Q@Cw}f22%)0(2XO7EQPr@ov+eM1iZ)V+n^aN+f
zdkXbJX8Y05k-&&Zm;2XFn}?tppNzK4ybugU?d{TJmcE2Yo50%-7}J`v>i{$EpV@}U
zix#*L(G`TGV2p)<4qGe(HKwb%KuF-?s?XI5<(>ZyonGUD3)seP4mk&?b$<a{h5lkl
zgI=an9NjiH<xJ~-mHqc;*HCNFd})?$uts2|Cb{#xR>^aSj)ylWC)QSqcf+|#yQtTK
z)5^Ok(R9rWQ!zaa9XIO~xiU)+N6f~}5l8Zgm*#3WE)YhPXuN;_-r@K8Ok37Q*V>co
z%U`Kr2;hT-+nftQ!PiIm4f?Ni)@MN|xp3EDVYybppQdMrsclZs;M?or#p{y>(~iS>
zs<<a>D-BK}UH!x65tJ(H*5m|f21oS9Rzg<29$c8-{LSw+P0ZYif-};%7T%O9VIYWd
zkU)u>3kX{@CWZ+3I`)D*Fe(aF;ZgO*i$0Nwl@*huTCv@V7eIIC_Nq>qyyPIoPXpJ@
zh)RMDt$RbF=br~O2IDL*6XIMsxzd<OD(5ZUg&tlWjgJ08e?vwFiSM>@Ee8J%{E5rc
z;<~|-ZhXj*CMr&(ng^n!l^rzX69hth&%x1Ap3R3zbDX1+r^ZJ>W34!@oCml(jEiv2
z>pto0v2CCUUsihFMYQS%iYQs8{=4;90{r}kdcWoAYTBM3g6Mv<BQ~C3*5Dd%cP9s6
z?Y1EkqQ8Oo6txOvmyQ3NO?u@oPzroFnk~%puyVV?u<ni(>4gV~uTPq{C^m_ECDe`P
z9=ZV@EK8k!@zuId1f^v9LeRx7?b}`ohdwY%tb!Rk<wvjcOXkFHqsKp(!O8zJ^R425
z9im9vq3xeQ$`5PIMS>{|c%?U+b8)w|{zk+1AaI8QU)6I1O+9`Vz3<DPn(B<uVKNf(
zhLLDn(!{(aLmHrdY7qeHKb@_yVp&Ytxa6rYet*d4ne*5e$UX+AmE?DZOPRN$a_E72
zl^R^?yg-T;AR2EUkLF2|S)U95yDUx78;<{C3$%`(hJf06fLj;7@bQ}mRSQDs6en@L
z0@Skf4e?aa@^ws=BW!?)>@&%>2gXWnMH(V*-0owH{tSc#at-u%Kcj&o?d;_jABA&{
zK*99KuOC1Yx#{QhLw_qpt((JA^FC+$%kI4QQa=1>+D>c#^j2kxn&30&r41SaTzLJi
z{=;A}W^R`71Ery%;d@@yk1}ntBS7(fN^gpOpvL3-NTkJ*;hkURRaue-;}t898os_e
z1IgIK$n!v4W@F*EQ&T|KPFx2hsaC)#RWj}3gw9PWAKDn5k}O4{{68=33Q1#Pun_KB
z!q;76w<h%(<ZQ3Lsv%E2JJhU+ZJWZb_;*a7REVO@X#%pAc=pu(LS+`m``#;e{}}8C
zfm`VVC6~T`vlS%dE+gsy^7wP{?N&>)gwZ3QQ1ZQyIe=1ZrD1TG&`XPOXFCbFk>Rb3
zlJToyfzdCqHWlCHi*{<m$ZYj=W$Z3$>_6HjJSnKq1L6y2p8)<58iFU!KY`{|zlpM`
z;>RYSY2*<Qn6O8=3YJrm(RMn4BA%XknHaZXEK*g@rWr7aVgWtCwuqL%Vv%~YE+!g~
zE4*0K1YV=-dsye|+t#dvX+A?~C+&1Xd|XPmQ@g~!eSFbvXY5Z@FxEr2+pg1{-|lA3
zu1H=v)U7alUE?IMA_^19M8S>G-TaYa->gC>_%+{fY%Bgblt_py!}l5jQa5--iS|7M
zPtA{ZL7BxrSO2aU25dGQDWXW>C7o_C><ccHzCmsI78o7I_c@k0&EF9}RE^p*CtfH<
zE|>)H45&<!>J;AR=(xY5R-+ZgEE>aVp=bK8ijOFm1ntg0N&;HCW0u3?fDOfjHjV^B
z3Ix=ANcYe8Hdj;*+)>%h!DJPIv9NtZ3afTqEr=UyH5eh91#h8n<stKw@(1jjz}q7V
zR^I44L&Oh29}<3kR#zO%Dj7+8Pd=0^4+%~x#`NMx1*#EO7#Yp#UoVQ_!{os}-gv}0
zTbT}i^&cZnBhNrCV5}<vI-EX>gWkTEfc3X=r^x1A8n=lNCiSR~FMxRL${c(VXh^IX
zc!-`iTwU%xS*S5}imP<@iXr3`V#xsVWum7>W9S(R43l%W&(0|6Em_Pa5ri%@lUpH3
zA0DZjHpPJHPqb7wg5H<}<XaqbzZyNX%J63}3@N61`PVaG-08~z&z>%g<TXlTYuj=!
zwRd(W;2LN|zGpV_(;-Bk-0o)}JgQ+*`{2!HZtb~jFW=liHI#k}il)b?f|*A*Yg#hY
zFGe(<$<`(OH7E!oOiZ3MilYsUT;Qi5h3cw*cH3MDQuAekg_*esX9!!fH;=0xL<~c{
zAdh;Kk)5554pzHB_%P_Vp8BA-6X(9U5;rF)`Uy5bEK|tV>C`|3QhfE_uXf1EE7)N2
zsqH{MTXJr0?w7%1Vd?T`uVSIf?fjUA-%Xi3ZdKb@$0|dJN%rA*8wc)4tuWop94Lf%
zsg9e7HX;#uNCKA%IMYmAtHe|T=^+yGOtuI&_=XFM`MyQPe%EYe8}_t#ITToBZ=!$R
zm#XQ$(xLowjZu{_U)~vohOCUf>`qL{cP@hDfA^I^_s`o+*78cB|KV&VST%1K+BB{-
zsJ&-2i^-O^BpN9^&&R+-WMJ4eA{sF>K3sr3;miG~;LI!7)VP=4PSuwP{F+<X$BKXb
zJqjwzge+21ZiLUtKpR^!LaCDVYA8bX+J3j{+uFWVzqxqh)uXEM8o~co9t%PTH#r{J
zE*Vo84V(G<Qxy${H2)@q5S%;@eFQzBjX6mKI6NkDC;=1WiOCB47Fcm<=}?`*iN|st
z_d&$yrE)>WU;nFh8+^n8!%|x*?hfze^s}2!zAYO;0;b=;+cb3h6W#;oED2K0nn2PT
z6d%))G?g~^3_eSZdTwT3`%eY5W&Ed}ZidZuP3UJ?a)g-fY_5!HI3E;mNf>KHrLz5j
zeO)ooYlBj>CIip6B8yT?Z#1`My^AmxoCv4yz3Y6NoLv;=s+GRxESxqk3a;-5kNG4a
z6#__g!x`bm@}cFxm@lWzI0@KZhwQi;e?OD~vyT+qodb>Gf3k>p`I}r}IHU&0tReSs
z)WMi-H&C0n%kWWAIr-i=Klh)#1$iqA`ruCBFS4&4TB^*Ya0>q2bW!klmLn02=yxb!
zY1g#n!q01tM*X&zR{+P6m8OCNCLfP9GRl&8cvv&8YI^)u2b^5wZM31e{T7}MEtGBF
z`#PSyGL>mjm_(Qg4#+LirMfuj8~?5$`kpVTQ#8NB8zOT;r(jN<lY2&xtBX^!_(2aX
zikWMO!JqYZWyajuETnI{-woXUnVNYnD8ML#fYw(cFT3lulZZoSe}oG9@G!hbX8>Pz
z`^F=+z<TbZqkky#Qb!)Q1As<3Y4|L|#y*g9N_K${U_!Ke{a}XnuOWz-Fb#~KU6$7S
zzAr~c(NN|>Wf|M;O#ZyXzekcG4}GB79(}W6g9ND|3tX=k2fgp`4aq`OWX%EiU&-gV
z)c`{ODrDkkR+Sn<^?zS6Dh##Q%#uzK17({)S82c<NknmOv#l47O~h4p@dM;wHX77o
z=3WUIr1_HYQU#7)Fy?W!x6r6DNK2dHW&Gl8SrdPkQ~`(0=71!u!4Ip(mjJPJAkcJM
zf#%a{dxUMOvpi~4j9LggM)qI-g1Gk#!>%c%vjdlcc>g!A{w*pK<6|syJu4-1APe3m
zBMuO4T1!U|VuGNvn)Z*eX4CLLb8%?)wha0E#!QUul-!A*tc<|F5heyFN~7{)lOqiN
z(Q_=A-D&kG2bdi(^a05wvr0m$<Lrluuo}Edfeyob$*S@{vh)DSI!QV3S+cBPDf{~~
z{&)dphI6Rt(?PF<v4k%huS<KaS=z@Q49vT8`V>QBW4VFvDj7URU_bzs?8juGLQD&&
zSMV?p{kxd1H!ulfxy8O8JP|EW>I+N%d(j6pP($Y|J{L8|e2n)9jeg%C?izZOP)j66
zvgEFMiQkxpT8!jV2Ij3^`lh}_78DS%)=ZPGj2lbU1UqynU|~oUeK%vfr7lSn1XJCU
z5B@0`*f?!-H8O;`B3M}g{^P$_OBn-`%8Uf#>xiMdAD?&w!4SC`1e?BBP_~yu#H%^7
z9fgTS{^A{QP?8!WS$|4$+puC^$)KSaFgtUi=chCt<WIdPl6a{$h`{NdJ{vhycXg=i
zp42~`Hhq>dqA<_#jU?#vZl>NaU`P2nR7(&`=t%cWjv)j}x^&a+DK%=YIUdz<Xo=n2
z(io5Hqz>UjfaS$o))D%1XOYEF`+(>a^Q6-6DW{0XNOEas2=GSEp6bew5STikdY0~N
zjsq_5W{4F(fII)XX4&@sjg{r`dIAR%{m?%@EXcq}|GBCa0(P%}%50jE<um|BX?Dm8
z@;1NEaV+t%;C#u0@}UX?9CL28guwt20bGA~1cr2!RadH!N_#BEH2G0*1Lxu4!y!EQ
zl?uZ@AP<d!$d~_F0g@;Ru#_tb^Q^yI{rIv4p)tfh5Jdr#GYgtiTk{A_#HY64-<f3;
zz=&F|)@uIU=U!-w>8y!FNvA|!$~aw#tlxhz<bV?T0GMUhbiZQ;PuuuCW;ctq=|<rg
zOOwPaE<c>JctzbtsQjTIy_%y3I)M0w-@PK551uu5vA5y{P#Ndq`Sx>s!?MoS(k({;
zY`I-Mo0A;+GI28_S7W-UbC@q^7uqZG*fE)laQ8q(9S@yYgNXfZnXgtlZ2CSQ>wmUX
z05f#G=N30l+A!B(?cZ?dj1=tmwfcq}@XYs~^K2JH`nSsCOlS^>PV{J2cGPw!w_}D5
zf1^Gdivbo)9zrN}R&WSn-P<wbTvV~eifX^Y%M!<c#gb(~5n7s&#6k#!*wLX@n9S1G
zaR1coCVigN!{vd`W|5INw7>%6;edb+t4vn~G7Z}o9~I;1MPPD(qlTMKTPMo_TLC8W
z?<s4C*h{jpW`S#KtclS}ZUZLvahJAl{eA4lXw-~(Y)B->Ow%S?d()o-&4J^JjOWP8
zOjYMZ--RJ*$M+K(Btj{=@_$SZ15{#GS%if}f&I=M9cwcSF@Pq4x4-FjnJJ*;LwZ}#
zsB!wMvVlKIlqjauwq;xfB`}EK3lWEM40unakRY=fN&4W{HyQ%W9~Q&}LN6Krm?&PB
zPen?EXf$gO(84rUdH>oH6f{TP{AX)&&=lX{QH(s{zg9w-2~t~;-}Kguge+TuCBzz0
zjUV1h%e1X9v6K|X)t||&GHcJt|Hl;PB4uPkm}He8hy*xftB+R+lPyBp!;3ydD{cL@
zmZ`Mvz`{{X)!}2V*`mw02T>Qu>fU$EyrA^@IhEb$2Iv+^*O~&1-lCFm?<d?Jj&(k#
zM&Dri^T<)`n0)VIpT<V4^$N1xiD$>uQm?=NUxbf-Y2+7J`^L({&xDE;t=m1hQUabF
znD6(-wRRpDjrf+k{QRE&!<FOa^k-9O_2g@-N^VdMd@pZ5Q>ML&>#rSe>QL>>Q^`Hn
zrJ{kp97MPfs{%o)->)5<qZPIh_t>PnNpbQG(ff!>LnTW+)20Yueqvnr0#`b!zHMzy
z)Ms2WU1GB6fzE=@m=&d#M$6ucig=UXL0$~^sISASBn8iS%|PU-iF0GG5yxJ$0cl)r
z(1J&e)wcP^L@E<|XSa>`(4iR7w-$z6=v7y~6#}F>CZ*lilp6pi(|KTI#5AMKo!9K&
zE%|%t(ceQhh+Vtz>Nds-SxIn|QN3v>-z4b-#q_nua)J2PC*wr@a|xx4N{k`Ka?k~c
zD6S5EEkP~(kRS0SyzC+4u4_L=Kdk>&e`!Kx0+RcSXNV_1h<`zD!z){cGuLGMQVl{R
zpoacr`fkhv-UO(nEb7!my;_GImO-*4Y4<Dw^RF>EVL@7fo<-JjmZCA1auL$<3UT)~
z;&J-%f<{Eu61;!Iov~9oj59m{%aA}6-yL7x6F)Qxll9gNabl@VP*@9WAS?>=o42x7
zKY3Ilu5fN5_c?Iohy)}U@%Zi;q8ev7>tE~oZvcS~P6nYlEs;7Y+S;_}=zg}4ZDrqy
z$|Pr^ef?hQ$AnXcpA5kvP<J2ZefEC6goCW8WY_D*QE;nkTnEB|NCUT@#46+Q5no=b
zFwE436b<_l2lg=<QMiaz66ArMo{hqo=IgblINjN_WIpEn2X+R0a)vaNBuW}{!RNqk
zgyyRT>NKbMbACPAorNG6kydI+g1T>GGD#kVPp0!i_?y=TEd6F=+0z(CkWtf1g{-8^
zF>o6yxrp*S^0*ns5$}F5;;gniVK{FfqJM|-19)#!!YW{xoRLd_w(uMXKl&d4%tCCg
zK!g+WuSNWO!~Y!lTR8o-L0jWN+V_;m5KGn&OXozr;4B$ySAXuX5d{!nxoKN%my40k
z9C-%K4}EPkfH}PWiRT_9)F~Z#i?Ta;j>}9A5w{SYtyvX`j`Hs^GWa*$+S|5FCaHlV
zmbf984Bij?>)xF(v|s$m{~9AP_C(RR=$~9nMIM|tgkyyPxe!;{;4AS>7>zuBIn`t#
z!6lXKTgB{?Mi&Cgn|vN-OiSWXV0pi;bZYqA5&8`*-)tPx*7h4K@n^Ewe=qp|KGK*Z
zIN88k;!@L+ishr=4;5vsV{VK%x&`^)tYm!7EkpeN?$et>=_Mi7k0~lif6pcKCe$%{
zFsx)D<7B4$!=oSm^IJCx?yOs+*0`x&TObVXEG>?!7KQj*$k384`kg$s>wZKtwO?>+
zuVq#}gze{fI1SY;pbxS+2YL~0y6v57^y(0;yzJ3D5H<7rkVp5pGZXpE9!SmQuS4bI
z*n}*E+*%)aJo3!K(o$h_(*vhQNHQM1gv_pB<iyL`(xK(9NE=pa;6DCXWb}8yC^FxF
zez@SjH`R19@AUb{Vw)NE*BhQ-y70r7-IAcD?=Nis$^0H+<3pGCC3>2gng*fr%wu4T
zQOiHVivb?50qS+K37Q>70fN=s1DeVPZ&YAlskAFV8!Pf-sIv9yRhJ$|^-Dgi7<bo}
z072^HETjHU;_iH0zO47-WY-@Vq~&z&dVF<I{Wlq2P?^Mzzxs8k4BlD}(sS`1zPo4L
zHD0TXSdu_5l4*9W;t3nr`0GdAB)w_vfKUB!9Z*fSjX^<zQTU5%*Blr~9IPb{nCZx_
z0r9<-`=tQizyeJIMqa@2iKBZ1&A_CX_jbV1cp1HO7PSWr`l%mPA9;={6cYN`62f^4
z7J<>{F!A~7wawGG^t2La8orqWpm!^1EG}N|sbJjr3p}E;J|%`F+ypb{Cyn7qX<bi|
z;uw{18_F8|rs}Q6g{mWgrWBDWLCocs6J<;6z#TL~LGM61ASpf;l-PYc5oa#H9EW`J
z4=!$M!@`r-KhQD(!GM@c#?R0H4;TvPeFiu)U!L@kJ=oEnu(h$Nq|mOyq6Ka0De0z9
zUUh&b5hpa6LkDmoPJn8t?}LI`bZ^NCKwe`{vhNIR0o3w0enftfQ(gkf1gC$)>{80<
zV~%JYRnWz2ptq|7FLUNMIt7d|=R>p{v_hZW+>@VMv^|0D7`Zx5KyBkrC`qULi!?d@
z#IZIo5=Q0&faxZ?uguUm&R(zmQYAn&#EA77Goqj?0Q(tja(zapDtcCd8_7F?$ilD{
zFI^G!!m9;5$=pod&i<symp?*o(%FN9R#hL)*|PaE`|aL~W~Dfq_jK4fZ}#_|`gGHS
zsyPP-E7UV_BYBSV_i6C&nIkaS=*!svTmEIa?eVNt{G+Xj($P%b-ecJub}+a1pR+Ie
z69>wW16HCzf!-~jTy5Iew4b;S=<~f4!#086U%Ca^Npx3LOM4SJEN_VM>c*Uj*zJwf
z*d;?qCgyuG%PGm2C_9-a*ecbe%kgpxTyYdGO$>9q8PKG73^AF$-lEAvoq*4e8tRQ2
zT`hisK~)dByap90Ox|9DdQMVcI$kd49<OzOf(tlm9~G6hM*z9qX9i4kxlhh%aL)_S
z;55)oXboka-{JuTULha5NcUF_!~$Q`;ff`{CZxXWOsXhKmjM{6^`HhR{FK=4>ht5a
z4?0t&l1v(QDb1>VakQrnSM}e~Eq+OplLfC0?UUs_<C;vV5+SUbI3d1YJ#<7VuW*ZO
zl0nnXRQsWS=1K!-qkK#)?Bd)sP#8PiQ_E_SCHy$38t7$+gWKVXqnPne<98}>YN~fY
zZ;1q<qH3m$<6QWC0&VW1;4$K<W<W)31|w9OOvI)E(<1vEbU?g9U3jCJ<Et*d%Yk<9
z=N^&qnVy<R<az10N8BeE8?wtKYl^iBN4ESB4KzfLffziaJtaJ@=sKz}0-(gRePceF
zmIhFgsD;A?!8{`~(<q;r-E)cK8L-*w!>n3)#uR%^_mB4y;KvEiY(^v6G3s^zZvDA@
zR5)!WV7e#CWyiB`yA}64CFz!r39oA6#%dim+ytGMAOrwbqw;M!CTOk)=af-iaYTx!
z@Co0K{)$v{O~q0te3|8!xM_rFP~a&MD$%M6N6dm6``NSjQ9RDmPj!IJ#K!y7vCzf(
z`ffmhORsWwE1J{;=3ec6co<J7Ills&-!3mnkQWf#OV8d+t8eDBG*!IRa$0P?;){+f
zx$Ui)!D}HNi~Il<$bSX2$S#&?#gBaBhr2Z%fi@)msY<ObClY1B5c&9Py={6&Q3S2h
z@A6|`bwx<clj-omPDfGQ4*}D!5Wb%LA%N>N>xuDv@*y^B`>fj;=z*w=8Y)msS3b62
zkijZmEEaB57Pa=qO@n`y0+R(J4(>K7Uwa~mVo_6e>Sc>wioXVHgI0J1Oqr(o>~e`M
z(1D9cIAFQCsj4ZK(rL+P(D&a+U{1J~L$<1N_tr}}S)nF)32upVtQx?97#Zl3GZbo7
z$v82I_k;TST)1uXoKYdxaU)AB6Jh}S3Izse^9kpD@2o6*#dWgivYGRuSa^4&Qypjv
z<<iFIcBF~*oe%D#^{HsDK#`Z{pWJ4?1R1G|m^hL;{X4=O%oGKO+4N^#GbnoeRP#J1
zs{@6Io8X{}-*5Qxj3<w?<`C3GxB;O1fIQ8nY&9KDr?~w^t>xYjqXW7-MbXkT{6(Gh
zd$NN*$u|Ybj>LGPHuvbd+?6H7NR*)=z7pHVKlz*XWVs^ClSjo;q46$k_rSpO7JxgY
zk9j1FEC8VKnxr1ffZoOXQ8BvZB(<8xDBS7I``*cFtk7Za;gL7*{ZCzf<&`jFJ|x2V
zNl7~5V^Z);u{?gX3F%1*VE%+15Y4EZkjI+-stPH;V4#q@R7`u=Z%eV#&l;Hu>DE)4
zP&1wafsDo|Gcf+KFy)&(vCP|8l_k$h;!g&pc>MhQ=mLh{glMk~{X=%Af|p|+U5XUj
zsQzi0tYhf@Y4k;rABR-XkkzyrSztlzpN?&n57JdC?J7{~w{Pa$IdrtXZY*Z#n<)Oa
zy!%=ISM`ke?uJ<)yh?>36DRDvBF5`0NXUgOwzRMz28AjeLENc8$WhmkUE^R6kmAia
zAHOz#Z#<;OH*ty{x!3m_1>t<*H>qw20BZtrGBQ8ji-`vLGDC^;WK=Fpe$|joz<4sy
z<j<MP4d&vc2CoIc2+!!00)cTt8KH%cFF2W$wyED3b6bk@C>XBpg;hp4Pj|zFa}{rB
zftyF7fgxaiX#E&&IPqZcX}r;5o<;2luj2NDNX(MSGUU#Cohs11DJ~!&fN$4je)WSC
z^j&?4m*m*m-1c7UPpk#-{x@|BNw{fq9BRzoB!r<TxMp!&@Mow92`|kRHB9oQ7Zv7J
zon~eJc}167fZz{tv{6z`R}v=8ko5Of;52;r5(om9oCG9hrfuw`;_rT#CAEE~r)Yb=
zpJzlKiUVnVXgT*;V&E|lW1`eQvXUTCR)loqd7bYynR^LNux$e0rWn5rf~o;^fIV8f
zeAr0&AUzoyDgph)3rVdF`SBp(H*SFyyi;l{JThCeGy)A^MI(4Yyfg@u-XyTCl%-BQ
z9m1b?^7N{CZc~{0zF6L0?((-vxy+nD+?}l{3IOh$!13z%JKDE40@5#MWiZHKAy=DV
zJ|OJOqC%s~gT$2yvu><uYhw{GkO~%oX?m?_<P&%OJcVIGJQ{UkSX!ArSEotMmf|6`
zE&HOUQImBa2;1gGoOT2%3fu#3B>fez@42^!z8Kk<za&&kv9Y$ZyBW|B=GGU}LGa5r
zlFLa*x|bOw)<ipoktL2ics7R80)YY3EUQ(aHULz_2}eDacfSpT&uBwOWFPg)nW9=q
zWItLncNrxJH3|Oi`sqV7Cay$r7Yr#ya!>pkvT5AJ)BOW}5JP#(TaCa1dmtWf9;aRz
z%kw5HC^FtbcbOIC#B|MnvEic<S?YTPYLp(JM~ll}EL9|6V=!xhE^NK+Y$fJr%{y<K
z;Wq>JlVxMt0SWYHLuS{t_Z6kb6HybejOWu+vDbR8zkl9{<YBTP?%i0FOI+)Fi!ir?
z%-iL@%h{?UP;;72i7QjKYu-03EgAA?AUOkaALUy4QXj6@29isu_)v+(M4_%DJ(Ax)
zfBJkoZki_nkcUrFD-CK2hpk~7)t)YN*yLjyBnc!~$+^gCJKEN5+490nDX`d`{rzCa
z0Z=Vql2kp0_l(n@PH<ZJ>xu}zU=Yb0p<1twpU{50gc*kC45gXM_-9UE1B@#oJjb7`
zTq;GuAGd~;70ugV`Wm+G1ol5l@FM!(2^a~|d?ZJdjM3}Ohm-k_Ns>~^4+iqM;rM44
zxjqZ;@p<M+L4)ZXHQ0GhtYgxvwG2wqPa6>0_m(-@69d{}sP$POZ_<j6;_e&v;!%sx
z^_<VA{%Bl{a{>KVn=zMnlxf6BT=~)GO2>}!D&_(yx{61+O_aI1vDkmcSMPzdk?%F1
z^=h;U6sMd%bV+@$;|7@H1`)(pOC8M%7B`>%hP^{=@5dTr_%n1fnUWK<Fn<c-pe`5l
zJG7=+@W92x+EjrMFqp3!o&wc~_&Tvhh6J!?H2a|z=<*1J1n1n6m{$mES9Zzm)*5$c
zC+FlZq0aiM`?van<*a)L@q3<zzVfGJ4CWyCxm4!;lnUR?muT72J`+b2k|<JZ*HuI`
zC)_(6o;{*Ye+A6Viu@8_tM7gS?9E&-7V@WykQ!e^yWe^f2-HgMvZQKjkPsB9wiAbE
zd`torcZ;9DpKA%7JP{27oPes%-dD|NZg93?(YI(8)>_`LAE?s=wh5SrZ`EB{PM%35
zA!|u+1dSeSA&P?C78L}$fDGMO+Z`C)SIzbf!%`ca%Zix6K;VAUI7h!KB5l<PX~o_;
zfqvOj>EFLb50O6`#t)nn@`qb{BNj>)M4un__&ZQ?miUj~jtxRAK_TS%UTLqXltCDq
z#>dYDOl=aHQlmnx`CzBnjf>=2awhk($fo=*<|{%&=fWkaGim7NKZ%kL(_{j2t3<Hf
zLA+C(Yyl1<A7URMje^~IgnHG(GYBE-fP}obL=>0B@fjKO&6%?6E#hqUb447>NsD&7
zgs^mw=S<qmI=dT1C3bV@n#@~)Ffc9<blV<*`ijIE=&Eo=v%ZI7fv<JF6=GiGRb*la
zjL%)|Ej2}m;%Jk>bSU^XUgFYB+PEA+J<n-aTP&6~mh%i#q+|iBi@ljF5Y3PG1AISV
zL_r61DFr$C%gM@z!b|GAu7mx82=ki|{d}*jAnuOXypH1oL%T?QJ4$dwELBiLF+E9d
zJiLbNy#+#JjGX*r@^B6Rsj+m87R=4cQv#re2r-B}>F-!Y7sY{r^#<<eo2B#*>QX7W
ziRLDH_7{}cT!>@Ye*r6-bY-ZX8(ZBMb(Nq<sw!HI*Q3u_sryxz4@;Rr1!h25`C4B?
zDrzd;a*8L1S?PC#+;fK<`Su+{JKa9}uRe-R^+5sC4i5WYRG{clj@Y1eL>t{66Ko1T
zTXli!;}(1;udbXhA|)9(HH}EA2~3#`g<SUa_S$!hAF{JjbFUHRY>*cc+Z8F|ROf)g
zC7b02-f;(b77BFf;SZJIE=x7wh`T>9@tQIO41t4lFn~?o<lUzSMipLH**lv_sj2nT
z6=#;}9g&Z~Nfs3qwc+dJTN?{QyN|bStBQtFiE&UG1te|d#Yf@A;ppBCGY(gg^O=8G
zdfo1XAV-_zXEMRJ28^TlyO?kzM@hii%>^nHuX=pjzEHfnzIP1Fi=XH$O$!Eg{=Dt4
zI7)$+cVh*VK<hm9B;5LCD0mAc$P1}$>j<?1L+%$_xZPps5D5+*-`ofCl38@e)HN;s
z4^qa!2d#b|ct=We)N#6E4l^L<x|}C`XND=r7PNZd(e|<dTp@?$yR8fG4w=CG<N66Q
zzAS$@k2l3>24WCeSXq%*P5zKNM^P|VAYP=7GzfP#1dYFi7X`~P4mJ+RGR)SEl+lef
z?ZtX-U!7G<kS=@ilb!+UlGB%fqmN(r-ha<qH(>IPnQF@T>hWT$3-zK<D)4-#y&$3q
z9qQ{BT>`mRV+#L&jJUHC3C<HyUva{YDsXip?YBXuLG&mBUCYY_g^}40;RaG@<g=O>
zlk-Eu)Ai;j#l^+GZ0Z@+7}+`JsRaH_IW>R~7ZXn_K0V1(JzE5Z5;fZmXIO}3-8<}%
z;u8XsXKxQtMyvq>RD&un4lN!#F>^BNIV_CdNO*o~X=Blzw4~J<Z7kC_p6r8f<mT67
zOuV3*s(i-8$0Qzvs<!AUPHiLQrkrxw@9f7456{n*x0*rckML-&ctc$A%ys~14B4_E
z_d3xK{wiM-Lq|}RoQ7;16|1wk>YSp$VaWFi+2BR93w}UB_38&#_aS(}1e6BO+P7~{
zz;K06wA9hID@C5zwlCmuPad3l%GBVWdM0iy6?tuEKW?2v%X5)gphP)?j3UP_g=~I5
zLz9#ur=@3qMU%2g08-DZDKLT|Vf@C#kSiKOzNfTZzMev8DJo2TqoNnrdu?OUJPiIr
z5PjVhf5uG*+7&<CM%zs6(Op?{kPmF~h84eFpL^?YUsTi#kaoU-p=qj$n>pU|4r~C!
zS@l=S5plCz=Xle&;*3{2civ5^Rq+srEPMm0wAxB})0zQ*hFU=PO)97OXV705geE*i
zQQrP%JebjqX)Myl;}jIl9k^npze<C4koOr=wQU6j$c~C1M(Dfz9VEvAutN#fU!M|@
zwWLvN;P#)|ZzoHfRR}O?NCR_Yjixd1*OL$Z!5?8v4D`8RXu;Mypv0k?@q%COV(c}K
z4oDh<L-M0Wqb*7RzgY>*V=6wcf4q1NpWgU-+Kxr|HW$!Qjdx=y12Rg@*-NHDxbZcP
zeLdWfs{7G9ppGzT)fZ<{`13*6x-Z!Mdf9V+K;Sek`uSZHhF}hHRoWihSOl#`92(hK
zns;$wk+IhxNN^fWJ2VFUSEBRXSz~QS_16+?=!kBxPSZ@<Z(0a+Vsx|;ofjm$`Bi}n
zqhS6V)sE&mPJ^Mhejb5|Ba)|Iu8pMO`mALag<t%>=hA+v$+`v&b3)`}y%YIpFv5wK
zsH35x5^s25Xy~YNK4fGescd=cK3M>;z(W=R2gcrWHj<z%KRsST$A*{q_zg<izKAMb
ziA32Jk|?#kZ*p34(#%bRjw_FNqHF+WU9Ad?-|J%dAA3QAEU2!&X9$S4FW=ocGtlVr
z+&?x8zN+s=x6RxDuREITn`GOcz-UtQ<pI7klUOrYDFx65$xRr%xWHv(;=TZeeH2;B
zC&o_#|6jiV9zKxuOB@KQKTh>tfryygLQ&N$)>UE^#3N?W#Ie5s7;`)3W2bn3B;Xtu
zr8}%zZAS0T8r0L_-Myj%V@lUIa26`#Qo8L2Ykj`BDLssJbv^@l-Y^iPI-oaOGGYNs
z3?)+jv|)CG*}Jsez({A+b&xSy^t=JesFf6k1Yil6h9Xr+0O$3e255`acG8+sujM&$
zJLkHY{3Vdi0qC8r5z2gmp~dQsZ&gm*ac1aELa72pya)MNrhub+$gv=ozM5OBs;{rl
zxS&U<8sv#E7ltfLSRpiSFM5GwWqE70Zp5XHH7yd<Bgmm=Cq%gAlPc)qodr>Q*@qen
z1TH%)4O^`WD1e1`kXY!hu<p#&8mxQ(Ca~TdM^xN^N+<v<Q8w>*1F<5KtM|6=9_T0g
za)Y_ho5!%$I*m$65-n<x%KNl6hLbP^zd0Tl>`DpS_MP8A{NWog;+Mr#T9PZwv!P=V
zr55khNa4>0b*oIT8sYZUk?Q2*>n$Mdmp<K^8hZSmUnsC5?hM{0sNeKT*c-ayHmb78
zCw7WgG}d;W<)HK%ZP*q;E7t3cRk+Ev(r+-{_k!lj_Bv2l+1{MbWM58^%chUz*)6E6
z@jm8B`^WeSz=ai4y)iSQd(FOyaV}B^je9Ex6<r?RoxCzX+T{nndofnWo#PHv+P4cY
zvOSvL`F`<mv7;F}KQg3HYpR)`jlbZ|`3s|riUl~apOGJNSx`rVNY(Q6qR19l{r_PF
z{(;a*;?s|O&7}b5tkfbe+_}-7KPKAz3z)}O^C0q+n|;{nN(g699(L_8<(!)pu$2~z
zJqk^||FVY1pt^u5O`HR*g4ol_dvhvNv}3%NDTlthrhv*nSM&Y%<KpVM$A)m{2upo~
zZvUnYuc4c@AlpA|ooLW>Hx@r=mCL>5H(d`VCGw=r#?FIi)5QJq<*unJ+k(fPBj4Sk
z;&16u%=uo?1$2ISnk{<(4!HD@!3Rg-2QXVP&GXsyEa1}dVY0R6e1`kz#`F@dl&!>O
z^8*R}ov3G@N24gil|gkas@?fkE}DR%h@-G9Nde)cR$LFX>r$+_hhSUu0RYdWw(ls5
z*HO(rx?BOu@A@A*I{(yz4oTaUB1Q}1DfAy7&O`_?gsdS@nKot!y8%N-5@l0J%U)8$
zuEF6NqTQcc^z-wQzl0UQM?n)W@hgYL&^2i!4Gd8HxDB3`$%OwyMh^a5={B5Yi1t<g
z_g8lmirV&m9}evl_-~D|$Y(=aLtE%i0K|n1)Ua#FEk^1Q&uBiUayCo>$e~O{(5>vA
z7w8^CPuPBI47ZjebBH<Ack~8xx9fj(%@BFwCG~XqM7vh7Y>1_M5P&%wg_TEt`u~5u
zNEj#*c__=(QlZ0cHsBbBi3PCGALL4b=7GIAv3<0w0_v70&J<~bb5P$!(dIxy@e5*5
zwrsm=5&b`PL`}fP32%e`d4!NM5)r^(&iwW3msd^cknzrDEpz!GniKe6iTvL`Ffc+V
z*PR@W)p3Gsfip^wN8?k4y_VV_6;fuP1oftLYkQWou}$ci%+4{n?bQz0+FaU);WnNA
zb0wN3yn*MQi`OB*_52j6kE@{gZ*h_C=)~iHHs>=*|Hw$*<IkNn=I90azrQSqm+;c7
ztNDmQ6oCtKm5;)u&ecQjDd?>A1(D8#!fz0T8lyrVLoGRvid_C%<Ua-V|Fhm!jT>^S
z`BvEa-=Dc+hX8vN{25Ib0*4*nUGcdA?*IRP4X3qC=HmtbuE#K-iTy`yLqe8`Z_C$S
z78Wvzj)Q$epa3(36eHKLDGy^$eC9V4hhYS4dk>{Z9rJ&WQmu~iZ!%EqiTeAB`Z&ki
zGkX#o2XJ6k|KFAO!#Uploc{S1Ld&D3`2SoIO?3IP?4n7eM6=@Ne+&A5pILA=4H(<k
zdn)c(()@j;A!v2!(5<-%8z4#Wfs@Tr{{Mf|hiJaZI~O`MN3&fKY;=vbRW+K@h|V18
z;`<s()egpBdX}`x44Y`))@D}h4Q~ukBqp$z*Jpf%v!AC{{>{<?eKNpTyB;(LP^In|
zRadBRkazqpR)K%m)iCFMCQNil-@kXlUA{h^DmSb=BPVpOA)c6OQx==y3o-cKV78*x
zLGu6HLGV}kWOPF{tHu_ZC=s&8<VyT(8=oPP_#G8L*5RFEvapc_XzjfgW+I0f_7iSL
z<&w{)#-+%%8FLFYFHy17)HhizZw1iAv+vUubN}yR|F`y*D(%q%Cc36(%hoTSXa@|p
z*N7EU74xkT^yd<7_P5=rrIWRjhOpf$4oo;qs662?on?7K&dvc<zVZp94UcmBG`u@~
zL*&0-w)36;=K3}uHl)14!p1YO{$5+_{C{rb-#<^ItJ@^W0;UxQmYiT$PDxyA$JW^d
zoeD-A9eze6P%{$BwT9%t5-G_%Pna0g>~k?UF)|-j6)Nav&4d=W-dy%)9EC=g;(OoW
zctq>x5{$N$=whZ=4mgm1_(xp85Y^e#Ro^3EZTBDPcs^AAubn|Ak%C{=qdMWNSggco
zX`g6#b5Djl{L@@KiW8SDLa=EG+J8rcxfdt29q2U-zAP!Jrvh^1wa5X!tylg9t{NHW
zV75Zqb*Y_;N;>baRs6emjT!9#_}cdUuy6+i8`oS^a8`}1$?4UDoj&oMIQxwv0uonH
zUvYIqSE<2DxUd!c*mQpo#X!y!M2C><_Kekp)G#sB85bVDAbt(esk*>X&%V1`52CMY
z_In>~v?yXF@a!ki(LpV--7pBO%{UUc(aBT_D>|b=4(X)DIUn317vr-1Xpcy^1+{dG
z@0PXGX}4xnX=lQ0@d$bjK*$>SSHY|eB9oWIaP&cEMs>KRx~@bs5m1`5iEaKl$f-~r
zV?(D^)uC1_OIL?SH0popDIu4am+AL)bQVIF*L%R|zG5n3Fv{qc&e``>tcK^>B2q!0
z^~!Y}_!N0v#P(W_yeWHL&;tv`7XnmXc<|Dy(5fDpy?hlH&j`j?#41ZI)k$3G!EO5C
z<S{LB07k;#nyG7O(8he9EF)%C4Eyq!(E(8jo;13OSPcm|VRb!RQ~4F-&oNto@#TSO
zBC?*CwouIz`&r+gPC{4ykOVVR8a%mN?N18@akZyvgt={;`A0Ns!3j4R$QW(k+<?4!
z+5$;r8Fq>E3{43MTcu)5<P2q1D-6UY{?Ns^&Y*9iCf`DScz4Y#fto?lE3)AQHFPnO
zVNXuJk1OE~5eIk9+i`43#t^a0*m)JW8KxiQA^RG8RP`NPSdS2L&55z|ye*+k23xjR
z{Ru3bIZ5ej_)s~&symzfylzlwJd#Q#HfPw9=*?rKR9L^HD>;IOV)HRyjzlJ_I>g|c
z1#>J5eb>JGV*u;4rc-iYm;bZ$00T?og%lpo#skJ(3C@FiN1*j)bi2z>15lYV+5zSr
z4=N0gMh6G8E<kW8$R9(y6AD+W6eOorZqxA0+C+P03{x7v#REGe#ElKLZfCce>v4xa
zs0^gli@`wP0So{i%#_?lyeXDhi<FPtxRaVd0S;#n6o^ASp*bCze@4c^-OliyJcyX{
ziL%icW0QPok9F)HSS5}bSu{kiaVNKb#X6erLo*%SM2sZMf_;>y?iXB93Vuiu?wGhA
z-zB7$>QK60D4~^Tvn-F$5OWh!vo*+Exb}gVAN%lu^35pElwblwLP{uXtvK|UqkoTR
z(_jxPQE*Sz+Vfl~fio&IIdQv=A(<gwrn{XN<B-)F)0(xIxw~H>Nq-*cDSTJQc!$l+
z*1ogk(Oo&I5M$UT?3cK_!N+jH5U9tiD283mmW7Dpr<0*kGM4=EgT?6$OX<pNG{x#K
zs+%wm8jz+>k0WrZN*SFPEX~KoVCjo*T|W@<-#VnRzPl!*Oc_N6wrNb2E|wGvvauhC
z%`>LJn}at3s9RAEtCer(fJ>{AoIclx3{m_+oe$dic;`3uj4uf{`CqK+wS?_)5PS;(
zu>%wWaHT^!?JAI8#T;00j4mbQ6su@ynT5&!1cV?CES?Q;`<@al1yVqXLRA>Bp}cZ7
z@-_1wKpl}{V1cuvHc^Cx;$S4Pk&3n>i9ZKwtc4~M{qFTK)x%4A7<Jln^c)40ZEP=^
zl&MrW#N^YppP);Y38;rS#f>F}<Xe7E+;VJK-Ab56@tG81oAzm%j+tL|$aLtzJ~h?3
z9dN8M_2WdrX%KHb!)8MIhbaux0}0k-lzC?;iHn`bUvLxh@J;JJ?dJ@+L&QEo+%Mlh
zMa0Y|mWITZbBWXqRX+blG{(5z2&siLSv+A=EQS$nnv-Ga_(j13vt$wK<9EYiL#>f-
zxs%=+Np_4(5Wk>VW9&~Sa^P{u&m!VWQ>Jdg*~IyUNwPynWR=oQA=xQE{*y;4{jLNC
zaaT7bk%JZSasr;F!NBkfJ4PP4Ys^G%M@Ztk)VLO!sL9_FQH{-v(z0tl48{rw4X6Im
zgf>~aP^d@ZHF@Z!oi82?56+7WI?AW88>1PzJ3P~xVl`L85BIzMn<vm^r8~O*;aht?
z{N6P29xR^aNX;CPgVFmhE<q)i=nf{OSNz3>!k7Dp+7iS=S6@qKEKX!N-B_L%hqtsF
zazAlzVa2opL3r%@Pw4<%MGAaY=n#0#j*U0m=dq(Bqy>s;ort*ZR1sf&_q3RQVw6a#
zkp;f*hHx7r?<`aAQR8JLKlYCgFQ$v;a6oYV_&^~t)B_Px1e{J+!4PxWk+?T4FWozr
z)%)H09iO*d>*A4FR&T%(4Vg&Hay-?YOzv_QcbSR3DXBYG=~s=SU~*xTw}mP99aRx+
z9=Bpxev==aA>x~kl2I6}f$Lxp`w$)C%i}?kW*!iIAf;VpJZw)1_>DhDsGYl!P!co}
z{uU(x8EbCqD#Db<<F*XDw;g)lj8dvg8;^h7Fqov!E3tTLtHzzX!X@PRsRYIxw)X#`
z>MMZaik5A$;10opySux)1P$&4_rcxW-QC@SySsaE2<{LZ{*e3L{r^`@4MWwy%;~**
z?e5jx>troFd5>5kmmP{Bv$*hsRR>5OUQHr1S(s-v?huA{ER2tLH`a}aOV)EfZAkL*
z2o=69a*x?sQWp}4X1?MN#NKlnQ|{LQ^|n-7Fg9=2yTFw|4gcg{Yq8GDeLSML;%Gd*
zLv0VX5p&xQ?}@z_jN*?+k6scH1gDaxesN?M-}{I>Z6K4y-R<^#e>#lljsKkDk_BTv
z!6#_sYROTu|L6n}IV|z}>qfQNeW3i$$fm7<Fgc^zGQil~G9&w|0wL(R>{s6I`YYUT
z0j9cTUb+i!TBNeOrGtybs&Aj8R?xDaD~3PNcfOAu_Hr(B&d<>DH@s96_$EnhsQ{M=
zzRZLCxt%y*95@M#B)Jx!6}IiT_xU7esTlgZ8G0V1LjjbK4mY)Bh;fK7QlP}rCHI1%
zSiHJ^#)6TVi8b`1+zkTk;or|(;3b&aMiejcgF>*Zt3yH!dL~9`gY9Fri0%P#ZF`#d
zU1)&!weh_c^_qK1{ut6Kzcyc81Y~y%V?|$eLqHs6PzBh48?FHlp#!U91j-e=Xe8?D
z&p4|yJ#dp#a1Bc(@uw<<z&qd2S+PjjpWWu>FD`_AJG`N35kt1KvK(*lm7WRzs|Bzp
z3L+C))dSar1q1?yRjOFitG+iX-Z{cASjPZMP0cmdOrT^=A3@=2j0Qb(L-?oR!N5*R
zJ)*+LO=2)sE5u&+jnqY_*j}2o>6dry7`jF@=u6bE7+}=NfDLHWRKV)!5z5Sz^Q@Uy
zDX>b3wqTl-Ng~^&Dvg`%n0cTi96&*;pXJNFE|#!M+~B3lI5O4j1GlSNI5gZjtU*r=
zj}MH!wdH!w#Z#!wywd2Y>OmYBNMm1M{N_OMMw-O7tz&~?Yrw^55ff%Nr-cd=25jAs
zA4ezR+6lXb_<aNS(urpIVvz{viF$uiiXwx@ZW0o)pz{v;i%0@NipV5z<cG*FC@Iq5
zJi6Ft^6SF+7Cd7_(MTA--^LJQoeN~&Ni%|VA{@@)lzx>B?kMcGa7t<)>til&FZ5p8
zHhM>RhablwgVmE}ZR#NI$sqD}`;L4x0(1f~fr^<9>W9NJz`&jny0qY8v9RS~Dz;Pe
z3Y1syKEd%ifpHOupkMN29aH>%_j6AX1LGtNAtoFc(Kz4P$gjN1kih<>X;py&Rtv1p
z_C!Azv!CsyC|U;OX|_3-P8tFoM$dn3g&>X_Zn!#(K2awCSk?hu{)~Q$U^E}BvPLN1
z1Vr$IJwaAsv0>umZ)FJGVg)bK1pNjL4);=FQ99MKCfbnqWJZrb<b-D_vXT`9!&%?+
zGQ&5SV`nkY=tMdakdFzMxtUf*;Ij=h6(SDf3Y${Jq*#HKsb;viy9T!f0G+)OM!nKK
zw4KTx01<h(TBsERmOOJnBdq;aMMc?24QQgl)vH)Zu+;M#INi!_CQ1>gM;)8(ZVuo`
z&->Oa@bnV(^X)D5(17Y|o|aKmoP8sH6#RR>l|PC2Kxf)&=696|Aq;F{rH4?Dlm=5I
z)@NQHRN?vDJBHFBpupL2-(D+Bw_)1VL$P+B2(@n)d{YjyU^~G6a^DIV&ebCa3Qgx^
zdGeiS@Db(M_gDf#W}Y3~5q3#N3K`2Uj^#-{Lbajkc4<<;qmeaqf^@cRX%N$~DVr#}
zS;+I45vlQavRV-)rWT=GQ#R)ZrYM#t*b|MYfIpsYF>wDBOOwE_MBEvQGCQ1{CF6+u
zaSO0TM_#z(H{7)UIIS9D+rz2a%1<YVfg$&m*Lm`>SmgUuRy0y0VZjHIDRyAbiVyK%
z1cO~(y83q4z>8k=k1_b4?;j`xB$TjbQUih0S;%7(N#A$H)oIBoK!;HoQ_qQ=1}%$C
z3qvz4X<}StKmWph^a)<<37@302BEnvt{90!ub9No75OvoasoN4LQ8v^gNO%>Gbu85
zmZ?36M<yes5JydGs%I!v39pB|ZXkR%Nh?fvMi$Mo_MMN-N|d^bP?Si<tRsd(sbLa5
zHQtKxi<LvhuIfU(;T*6EqlZXpk(^7W47-s6y-RXeoH^PGzkLJV5QAW%%-%RS7A?!J
z{$!!$=Z9AQy;h?p{!j#yA5g)VRv4o<Stn8HY@m4KBii1+G#f%&qQ`8(ixB#e@DFW@
zpf9i<h5-#s?Lfj5KfA;$r<{8oqk$RIME1#VsUEXmGK8ot51iv<$b^T%Y{Dg)2hio1
z=FiaeKb25J=|CHzXIK8<5S*XJP0D6X^xlh0@{_8`!+$f>hh8i*Ur1xNaBsy3kRp}m
zZc_*eRqQDTh-s%?=esbIl+f*Cghs0Spw`0>XH^5IjI>G3!H=WFQn8Tu{WD>X2?zsZ
z12@3q(v-R(LWgseA@ZNJNR=~?d#qA?AZTEBBZVmz!}PDQKDCCpK?@Wg#5J#&CN${9
z^^YWggfcMXgJdNq@`Hv^PwO=#lMfu!cR;m608Ln>_r4$IAvbM(RLlc1AHM6Kn>N1m
zSCW^3@kiqKtaR}?%Pg$Wxkh{g;N4t<V%>?PT@pG52(Q^ixo60xFp<+hZ~68qBw2v)
zPFj^VrT!!UJ6J(<DXfWO91q&vOejalA>=>=@PiS3X~yE@2p$R&1m0Bha97Mj(|6+6
z!Jj`}XoQ(UK!8C(uH`<#f1omL741ork`nwpIf3;hDMEe-|H=QX!tOl!#30$fMfpT$
z_IEk`r<w>t7AtMhAL!QQ`FFx3k&a>}8d3znY3%$D5p|fP$^P%_`J7RO%WYfsDAD{?
zh_h6|e&<AEHdUwzw@+8y@Wd<*{JwYovVIG|P(H2@p_m6}M<P!&IoA?e1J(`FfxhtD
zz8WyrqalnDH1;6`rtgMNTJYICMgx%`;$I=t&u<oDiA2m2BeA`x$w9?n*nvSof963S
z0W6GY8pvfkC3q1xFd8GAkxdpfz))d<1sWODPrpIuDtc6d9`d)<A5|C`-%)Bh0FdrN
zeDogCRU6H7g!#OHt(mzpT91YW1jQyS9b<-6oi>0WfC{Os=oaZfX-oyd@K>uFW!uDV
zwNQ^!tOt={p(~EzqFDTr1^&)ZJ3fqwZOUwH1|b@uA#MD#br2vZ(tk#S1UY6azbT$k
zO$A1Oe+Qm2(>fP05A0&zWVtE<Qi+1-Fc9CIVvr1{9Kwig%ZWjWdS0Tt!9OP#8dm9D
zr|gquUNZu?+I$6TSLr_;Ven5k_+=TtbVkbD(_p|(1;v{1-+`k4=lk1imjx7cv!Tea
zH~*NoKnt4{`|Yb7Qu|&_DgRYEt#1rEOxNEk<PKg$h}67)gx(C5seZ(p?oj9*7NsC*
zV%r>|42%nkm_`4n5-s%5cGkTNi-j5@@~^Ys)(DWh*GQHs1X6*aOrQ;l506+t7shHR
zaR=%ncpZLfBBBAETRb98Z&!LGQ>sS;V&Z4}HeN!5c0BMly4N(|{dDcu;Yy&EAvu0z
zR&BCB5+n6t3?wP%hJ@PE)uk);Qm=mL&XiL-A!f-HY1`6ByO;Sw(~DC|!vcN<0z`Bg
zL=@Si6i+@%De)>78d@eyg+F$`-{}9|CkIl>ck7pqrM&QNU`j*z&%?+yxchhkk2!sp
zoW>BOzkO))MSjlmexZU3Yf3;k_$EBpH$oZTXkd)VII4$C9b6kUSa(IM=q${RsdtOa
z?dSS|A;LhU94H6k(Jng_(d^zGn%#61`OIh)1RN(MMMnL+{1yhTaWc~QNZHR>v#$Rr
z&1gd00e4#F9&L(QZxKE!BcVR~`j-cM(^jmj`UuQO=$eV|iT6h%?znTH%O0wJ9}3S7
z#Fa>|6smn=gQ-b%Ab9Szj+7GH0vN<JS6JLN`TG%yp!V<lB8qB+8$&8$X{viFYuJqP
ztn9L8y=ah8Do0+h?hm=(9@zWC7&8&4(>nz5aiaAB_6KZ*?V3~e9UU?GuhDH&!4tmA
z{EtyRvkQ6j8iU+Rd5-s{fVt=d_o?rRK6#aMHCkgrTREosHD-w#YGb2;6Tvw(c)5oO
zN^x#GoM4Zgb7E<}v`Fk=bI=Y@q^UdzC^r5RexAn28a5M$gmOg9IDy~XicWYRcAphP
zUkhF`=xGyWXk?0ae3IrMiCJ)9S;t7EDiqO{=V3?WR%XRw1c7nFEDZ{qSdy>JuknPV
zNa{!nNEILjfgJ1Wy}-vG474mE6J)&LKCl=dHWiB_2^_ayOQT!6{eJ&BZ|}<n_6&57
z;eA?6%Oh0m)31Or1QCiXz(vhah?|e<wokz?eHQcs>w#~FK{{Y_;2t0<D+QEE5$FfB
zk1qb61yMD866!QqUom=X;N~{Jl!286cC__h8e<2_38k_}CEbS202Pk_N8KRL^n#{J
z&IDp*aV{1L=1eWLL7U}jfHM2s3d|`a`5%c7_8nOF?bj3&6TKr(IMD4-HxK~n);C;u
z`X~dlv=B~&*o!gj707&~ztiG;TDhKC<uqeIo<)ay5OxBP---4q3Qr-9Y5m>Rk6D%~
z_Ss-a6MMu)@VZS7swtCqM-8*AX-@(Temo)gzeu{l`+LmI+=pE9OM>ph_hXgt4r;c+
z5I^IGq#jp9)_w>L3E#=XTh(8;*Sg^oo3J`Yq1ctte@|ZKMr=jO=QuGd*o=Z3-7|<X
zB{f%NQ(Nni##PP1Js`-h<#4nm5$uCX6AP^q$foMrQY>9n-_a+FPPf&|l+4&Hc8B2n
z`E?i%jZnU@dk~}?+ZYOLzC=L}OoPjC02=4-`3xa#z$r9v#`d1*cYrPmmkS<<S}Fvp
zATXl&l-*cHOJ%oSLq}N-AOry_i-Gc<Ud(qGNpB(RpdzPMQ+}jg55v+2lZj_e8SXQ}
z4hXC+NnQ@lPTPTfe2q1VRT9T7TFFzE``Pmfl2-I=9>XGE)GcvZl1*}EU6=uNt+i_f
zk^DvEQ9H0RcL>Q@ltPtjU^>JqqnL6TYv4if5HV1o6e%Ozt9_>%=~(xhU&s9p<H&;G
z+Fqz*G{0>1jsV7v!ndH==~f6Zrw*RxrMj9lzygx5CkQr>b;5>91LuN1MvRL-%OK<d
zHZvP*(6P9Hfp!uS2o*MTaEQV~-V$jM-o(^E?Q2SY8F@qQr12jX9MM9CNn!Sa*MQbQ
zj7y`m2GUTEaH5eydqj*~&$#a&)FOm8Naxa_>aCcV<5IXLL(vlhPiXrTBe%%F14V5y
zE_9D&L?w0Sqof|B9;cpQH>gy8?I)(_8p=4=b8>ROA3cblq)!B>3d;y01mnpul_Yf}
z{oE1&c&yu#6Bb-ls8;-WK~BEqG@U+^>`4MTQEUCG#xZb`8a@Sr1Ej$P4>au@*dujt
zY8&sk3~FZ=G%G~)<`ImTl|f^`svbPMJg3((LO2)7AAVEq*X_U(iOaYYDSCLTjYaQ%
zH#|$3y@_PclfQR9eRUaGFO_3N#Q_#DS7NZrq%P1U7_t%C%lMiYu5YJ^7qG!68G^Vj
zkSAXnpCghy#w+=YAL3ZN*VbJ?tW}wSe0jtNY3%mPu1=`TG5BITeUP@i?DW9(^5e$*
zV5lzuI1tqecf%;kec2?>eJT(zLK_Rd29n^&RK4vJb%QMWGhtho0jN0@4tSRF&X5Uu
zwI_lM*on0yBq6F1xs#8in-<m$E``JlYrr7B$A)yvSGS`)*w6W05bGm+^-(w=7XM3B
z3QRt*04wDkY}8*HkqVs2QR$<Z@Ihohhb@cCPtrT$k0!Pu92iha_hk&@ivf{jPZ5N1
z3{0e`r(!w7A^l*FMS`IAXPK1dqHy%=!rp6|`ldLmn$9kK`NcyhoI>w(`1T4qQsA}Y
zGUGJb9()>BL1e#du0w1wA~8`Dzb0Neb@y8+X6qayTOkIOQwM&LMYvL9MNdLKhb&u{
z@sW1b$vw4GBe#4uC)azHz~oxleZ%<EdCt9UTypkv!!O>W*F;<S<CMGzRKGH}8XD{`
zUlqdJ62wAEw)o_qxA*os!lC3&tevPirm186_00Tn9q0q@P1ExP=Pt<t97!BXB`vGK
zj&z>J1{2!N-%!1xnXP+5$J)pbV*L(_5g`%Ns_OASe&{>a{RBt}b(7t^mMQoy>RpLs
zriW=ThOR@nn=raDF2hu&q+@9>G%m(6oQe8yY6JYX>=wEw3A)}*LmMEwI`4_l(2KbH
zaPl+7A!)n-x;&x2Xi@Zf1zuMk(d8asYofwVEjdX)!izwovxt%iZ<uSr^4Vm!nM?cm
zs(F=Z@9oZ@%Mek#Zjgq`yO2<R6yk^iMg&UlfwU!}Bq2ps88HG89$^!a1Yx&`nozXR
z1TdbjYV=FuT&oWQrHb_c@#7FO4xWY&ZE+|;B=cdhBey1GczzJN2*M4H3=};PwdI_G
zg`%-v=qTA6IN>8Zw`Hq7irH_bJU%{?>am<QtW$9&wp(uT+j2`x)GZp@zpoZI73xX(
zt90#VAidolz)&c;t+NNIV}?UD1nP>l&_g1WZNjBHm@y-d6tf!_Ghj8*w;oiYB2TIs
zLht5NUbd+CeZTW8N`~Si_kOSS1={Xs+RGZRdQ_+Ma|IQ?LW$D#)%0u_!A{Z1Q8Apt
zSeet$vzcnwo1AB4nXoZi*|wG@`UY){Yd7Un)0A8E8Vb1S!<)97k*2%;8L_yYQR3rw
zl*2HZb!CC!cmfm5`(^KP+UvK9c8)gjbPuVj_wOnnG4lhNULkMVGpmaj(-prSTWrDO
z+^1lhSxfgIkIb-y-A8Xr1g~cmQ+RX~oeLu6P;A|DTLulX6r+`fGy+&lNVcGw-e-I`
zUN=JqC7<QMTft}a6bV3aO=>c9yy_~`_#>C6$bGjEbuyu?g!qYP9UL0O-TTKXlSd6O
z%vk2a0{4i4eY@T}g<MEs*#RT=kfKf$3kkag+62C#V&A68Ovv`RM<k)MoWSB<F{%#x
zQi1TsPP}a%+NVOPN*E)d8+e>A2qKP{g8>i}71A1@rq0EUB3?rM@&!Le!rO7OAh$d*
zJQ2MH%kW5B-j`)u`yE{W{(ow_gkdnU!<8tWTqE22me*vn2#4Lfo?Ox0JI{@h<-sWX
z?fRI%tYn3=nO{nJBb8|`AjaB=0Y6~Sh0vC;4;jp>WwPXht|9L$YkxGqfxoU1?J5@$
zf78yLphI>^;)i}`Rt|0e>R)Ty$rVJv;iRd;NAx5~l8M*D5ETx6{M>qu5wQ<sTKCx)
zMFucY|K9?*XjIRJ9^yO1uNHjGphrgS&?0WwCU0H^3$Q5t?aC-@opl4-s6YBOv3a?;
zbg|ImXl@+2wCy5lxpLd8I6eBB7E%d)9eoBhdQ2u|>^Rf^bFID*eI$%JidTqvvP11e
zKTBzB4Pg=tj#jv<Eks;7GE)?5MEHc8CqTH&*zm>(-J<-24%YR}kvUf2(G}xi_^N6P
z`=)<B>!YLYjg0MtD>1J;TZ>7B=*bi|Feen#0Y3C|30N9DUqrt_%#M!&sZx7~(|VgY
zWKPU55vG<>H@1OM%~0Dc<;8?g!s>q>lBA%O0-Ik6qHeO4cI9>U7_thWdjyHjwa0pr
zOJOlm$m-7!yz|S-x;n{$lQfaE$_c(k(1nFBNQ9CyQa{ED$J8acDUbz++Z>2LLTjiG
z=4g7db2xwO(<#Yg^w5H)gw>0lL}M<?6i_HQ;@{3wVUdDnJNCMQiKA}87l_b8?vJ4A
z>uFZOk2RnkNcG3-ViBUZ1`HPQ0sa(aYi13}8kl0aNn@L&Q%bXl4q1lSI(=&Zlq;5i
zkG*#AquwMP3~tW6+^#)+Uaj}HhTt8P6+dIcBE-SbM3FcS5vhR|;9e!&mIl+cB97$v
zUNtT`?w>NH{LSxBN{q0Z@&^^=*bk+md6FV4;qmMru1ZvQ7?kEzCW!z^Qu=O6q<iW&
zI`{y{2jXxD>iKwIBN$iqT#Ky6MV<m@0x}Xg$lr5Zem%cLER)VdmZ=vTPH5!HGqY!?
ztyexO>K|6vi*$=Hq&LFn6tKG1?Og-v?_;gj%bhl})@XRLF#nXC$;|U@X8brU%-lF4
z=J}dn66WjMq1@H|$dnU2zP3?PSVDfWakCxPIks}}5vW*`>7W|YE%CiVS!zq?@w+*m
zu8D?u%!+O=@Wztjez2D42TK`7`gTv(3!6(f3Q`{mgN=ri)6x)Q{Bw%-k9}wGS>1Q4
zU|kL4O3Ow~*nQ9>;N?s(;#5dXS!Dq|9>`sA$0fV!yLPk4m~4a*GQ4iPIAB0^tl~0_
zS!9?Ifw693`Vh~bMAjq&apr;Fh!liBi)z3J80yv3Fl1_xZXtt>M@qbg_|NQlpjGm%
z6B12$ifKwzFU#-DI9c;l)4SdsO?0M|ij+6}q}$i{NT|o>&gT;c*C|rw{9-uM4y1w8
zu!3MkFs_~J=o``FC5uPpDzuhPPfv3vt|kItb11apDdlNR*Ph2oGhq1Kt;?o>GSxgq
zm9cz|HrEK=7gSB5pPxJs253h~wl7m|yoO<*OOUDo7d-vbPkZ9)5XD3KLV<A4$mC6C
z6$GPU^}-BhyrK9mMMm{p@2ps#G7Z|bNQs_Wpr9j4N#YkP{J-@V0ocr;G~y&8{V>TR
zWzmgD==mSD-BYFDAMBFNXinQ73chw3XqI@up><yYzv1mBh#QoBb4jnzxCDk`A3<-w
zkrf<`WZHVCiag){=#~9%f5?A~_n&8CBH!q(EXl17*P*Z+>Ok8-(|IO!@o@%Zd`8b6
z`~q!5baHP-mAlkJN)7Ryk%q7ENDqWlXpFq6Q|PlLLDz;juf|KG-xw`Kja0HH44K&4
z?$i=7D2-Y>OP>EqhqDMPhHo<zRM}8nFJ$VQ|4KiQ$4>ATPJ4@#9%*2nJG6+uS`hne
zuojD&EDsjlW;DzJcLQ}`*cS?wwf&8_`lSp8MaIPlsCq#Yn?jgOKpZObZ=pUt3mE4T
zZL5a=aZgMvUYn>MipM(EzSNB%Et|%)O3SVM&7xrbqatOlY_lhD$>gL>A(0O12NfTV
zc5Y)O(aUh~C5CNACG8elMWa8liHJGU|HxPa+di;R1WP`UfJ&H>s)^r_>EF~0#Y$Nu
z6`e=8Xu-CU2KDzW!f&`EDGDHI#+Hs(6uvabarY6DfX6owp^;U`1zg}ucB87rd)V>k
zF(mH`Kf~|yKr9y!&FfFNu2a(Oo{6>`JvgT7Iv|O&fB1gk!t8Ic2OkqaYV(qLmRC(|
zlYAf~#q@r7AzO0Ir9H5B!zsSTno^;PrWQ|A?N_+JG*^0wltv=5V!ERh(e3Kgu%q{M
z&y&5}eMIB_VoR4)`Uqz54W$HSw;8B_eD${nSrx_BJS1{=ep=%bsMI{_*@5b_^B>g#
zydb?Fx<xReKRZ|ht^6kZDucE^y6Xu(^<e&47x?^()b}mE<Jo`Io2P!3F^dJd&~=6l
zEM-?p&b2)`k&Nu7UefD-ToJgN`bd1I-gSUEw{r*CE8OtQm2m2z-|QF2WOv5eGe4_9
zls#r%>#gAHeYJr&(BXUgP6Y7)j%S0dUGoz7Z|6Ighq;$OC8+`YP)}BE`IW<UFvR#F
zkw*oTHP2+!e+4@(?VnlajQ=iTD-2Kuu5##<li-@9`L>j?=wk`Jx+A9vF(YE6#&IJ3
zDg50ECBHb%NDpuk(m{w~F8zRA@DU8p_w;w*4{xc6bKyxer-)`CdesY--dWXC{6joB
zEZG1tOm<Zc_k?3;R_G^?6#zYRw3|C*tZFC@BJOtUlg~7s6#n0{^#66B&)2~Em=_}n
zctKEi;7%@IBpeoM9<6`fL2fzfkRJiLJ`D2&t$7G18zYTq-$UsRBQ0-Dwq=Z${JBQH
zdsK))9zHzE`F33%LnIpK?%eNt#l#eYnbcrrB1@l;+3EV6u{RRNiUV3P^RRlh08k@v
zae<oF)m?m6%3-Uco<MBJ<->(8mSU9eA6z80b5V()XE+5N0!XzjO&~YjI8L}<4&>r&
z(B|GO(ow*1#ehxZ5^NMS|MZ>yyUzdL3!oBu&2q(l_JlI?SW=XVYTtZ8g8)=ePa%d8
z$Ns`5NkpVb)l*+jEdrAmEe{r}am>E24a8qHA&9gC;&@~ZVt?k`XqxY4DBPGRzHzbt
zKk{S>Z69Ux9?V2*o2mpi{cj`y4))~sBrPPACj{LIG-tk);6;=9)>LG%^k<u#9Sm@O
z-CjzhA^rb;&2309f#|iuOiuM&?I&+b4wV$Af6AGtc0_(#-nvC9Wx5*XE3&_Ei{Mgx
zCk#hc9R<0Z_fx6H6NVM2+kagu{=FDe9Q&E5c*0s*q*9@==#U(5bC|3XzOUNuEmtIt
zclO<2yK;y{(qfbb->E4+1f6}!sR@D<G0r%6Vydb=sho|P<}3B6?!{-mZS&bNoS~WF
z1ne08ye2<V^6#m7g}7_k6;_!ZBQ9rd5`1KxxN3`wEodRIiHib%C{ImsRc_?bsmqS3
zqzf*R5?Sc0^AhQR^?q`V(c`*IPm{mhzo=h0a#VP!@QZ{}V5LiFEMI_aYc-<wp%mAp
z6-m5>D|OYDp=SQ#3<+O5vZ4|t`H*Y`$|}=i3l=Q2bj>}y*$T|=K3g{ZQqiE76e;jB
zUYsv~I2VvSLU0E*RH&%nG@F$c7HPe7Opi}0|FZd=hr8k-I9i=vwP33Rl{NY<bEqB#
zq^H%FRmU)Bk;2eo+NAlN?bKXX@-pk_`~S-EB$7G$+;4~+(EP;4(JAcreA-QtF8Fhw
z^t0+44MlX8O7i4~T-kEzubYoEFDQN0<+#7O!4TBYfw9P)4>mnR?2GIz>-L$Po5p*b
zbsVF`6dBo^@*8pr&L5n7@`l4wR>NPd=9%zhCGO}+i6BsOqsKnH9*+4p9`456<WT1#
z`F?v$XDR>jc=$S*nTE~crZPj8&FQHrkIPMER1%pCoHCJmpAvC+QVNR1;#nxDDa({7
zL8f05{*l1dvaf!M14hgARj1u5AQ3%TbLyAIZE?jsS+UvacDUd$aF*?d#;|sKwKU3i
z2y{>}LEw)zXs~;ysGD61BpDsm0F{(Q8O?4|n)KddKv}C21C5WJ5`AWA@_WKOspEGS
zDn3m*4?S)q3Axse)58KXqE(-ti{pb$H-8c+3ajgdf;Se%(+_*<Ba~_jgfBh@DWxPG
z?%9&y9*WH2bmabQKO0q9wqS|1>pi96xSi7s?_0<fECgi4vT0_~&x&gdpH@OM-b**P
za7sx+Pw%X1*u(s&(GPBWuDGu^Ip>K_O{4@5>Cy1#1|R%FR{vVW#{4mr;lG4e+JD;Q
zyef{(+1L#qu+!Ywve1^@ZXDedv8CHwmPto2#kADsAlLgRS61t2%#9l?ZF?4FX%)kA
z1Ur=^g5Zdz$xgVs-=i-B2M1?=VU~s2WfZ}t#F!jR``P#*dHMZsF+2{xpnKtS&7@q2
z+51$0!+7T{X<2)PI423dT1SVP1C+7MbYu2XVzI;KDck`!@YCr2@273lnqf$faEw;d
zy>RzA01(1`iDwX@OXrL51edA4{$~|{i$TZ=adee>JIh<ft4AS$JSa@k$}wA~<X)-y
zQ(#SR==W0m-yLhwYFu4KatijO@sX#_&UTIE-)tQ(L8zSAC&m{gwHfv{#fx3{$HJ=*
z4rRC+Zi++nqxH@eX(LaW1GS$vz$SN0H8d<$3&D`N+|sFjdBAPe+z36~?f@vS+lP2}
z0_^eNK;<0KH<n3K^QuAGZygP2@!8#LJq(M}3=4<63p3Nb_t0J1YZ5xwZ%k%td9nG-
z3!U0s)EL5IVp2A=9!4iJN?O}f$69#;?NKdtS_}0ZzXS8A1718o*{dVyQ>zHJf#Knu
z)tH1}$fZLYIZwTbY=MGBgkr4mv12;S=@ckHTPDAnKG#pH5ICs(2EfROkli2jk(RQ8
zsyev?O*+!RVD;0nB8ZFt=DLNZwt7waB%NQG8j;$D#&L4h&k}MgxZCDnMMhPiO>vnu
zC@)tWj2!5$&Fgz<xM&$9NPEa!HHFZWU^-}^AM1SW<oM-H#JRxVfoqZr4fqamgWSn%
zp-6Dd!z;F%Q6QfwQ&g~D{ei!;W4H4*`e4Lipq8JATka6SvBcqqu2mzXS>M1@GtzTx
zZW96*RMO2L!3+sVDX<)`66ldV_>C6ihAN|TPWB~(M6Hck^LS;AS>qvsr5-F!isxMo
z-c)1UvcJ*vMURIe3A1KRrYZUr{P8M0LaC#>uOJK4il_6<p~87SxaHA?8E1N_^h>8!
zQ9KJ?LBkjm!sIB4=|1dS-Xn1r{luhv|I4ptm$A6F;Xs~ixv=0}BPfM2=R3Z1A24%Y
z3omE3q}$ci|LlPO-Ir?_k7tZx7%uTNO@x9XcnA&e@J_5N+<5fP)1V@^w7ln*dc#9Q
z;#9n<#$qj;mq^%bl2b&QL-Z3i)Q7kvnYZdE7Bv(rxOEFjL$48%J8DWQ7T9*@Y^%e9
zIWF2M9M=s=;=7WRSom>~8%fW9kJ5<c9;b`z;SMBuel0&&nKd)9X?zH^{w;bLNoVXZ
zO#r&-){+F8j@&XjoQ`F=<-vI~`*`u<x}wKn$C<b>49UkBgSG=h1(vI=Mnk`sbq?-|
zlX@~=_uQV-9^cZ_YUFj|KR^qG0Cj2;_1C?7i5`r72$;_oXpAoX?*iLEpeF1UP0u8%
zNzVz1P(*a@oKstLzTO%^9TZLDC-1&W%e*@%X4T@upLTPGu^H4$JS4)3uGIbsDa&+>
zRL_*iuQ+x$%%j1h|9GA-(0CYUQEIFKb6pzqUij{It3~lW;xNCdF?}4N{xV5v^{N6a
zrYIS=GJevBHnB-SfTZawoj65tfY!$cY^5Yw45B*<6PT>X?HJ3rF^v-Ttlg0uD~OGD
z>({4M1u25FMgsny(C`uW$#ntZvHHd(cdf9)aPzoq?Ck31(MPFv@nx9}3Z;j2OG&3N
z>`*4a6JTn7bKv)f>~zD)qEo%kaPMDT`I6LbiRpc{U=eN+8QAIe`vM{+@OwQoR~J;5
zy9e>HLM6`cQpFBaOlvp4MSslS4@m#GIP$p-O0pv-Nb%n@H5sI_q&sY!PpkiPkKk~e
ze;5qP-SxPci?)`*UbzA$QeV>19I4xk00zKfnM{=%G?m`J>M}|;+OVk*a<9WLtMW;y
zk23#d#hlb|u>NcYG+%mt6;0N|>E;ThTY#ep=p2oF8k>+!htn_Fc7ymzy)JAv*Yc{O
z`|(rDA|)#;EBnN2NT+eUndxv%hm#(6jaKj%u8ceF&-2SK7Uk-->8pdwIkE}FX2f+D
zXg0_u!9kr9=f8suxE*q3fP)BZx)$`yGJ2c}l8XoT$>2pWKCWNSt70?l`Kn68$ycuD
z6={Yt+==E62A;(uXZ2}Q(hvBg*R?Z_EZR`T(^K^g%W3fR0MU&8KZm*xMp}641Ldn^
zf|_o|Fr(F@Q?tF$OnT6l_N2r;vhLf+O4_}vG0xhE!%9b$_0}m(O3KMhJ<E%vSC4VZ
zgtKzO9`4yx<qqIcpp%2e=W)|Y0L1&fSB0`aq#TXoUvgY2(@3_hv5M?Jt~LwQ_EnsH
zmJbWYw~yN)#S}t_R#c`uoo16*XBPoO>PKVKLn1xiy*eE2mS>D7K`34xv71rB21*yX
z-lfBiQx1Wvr+hJ`sYC^kz!BK6$aKcvHouezb*O4$H@!Ss*Y66wkSV!!XsP((gHQ<B
znRK?cQNVxKaICjJ`Ie1zgvD9O>X`iV%47-UPp`DrQNZOUlO_9u_m;AVG&}D4aki1o
zz;Y<xoDap~ZmRxGvi`X>)8zNI5EN?`jVD3<X0%*o0<aqZ@S+s+4_*U<e8t-Wzho>P
zv%kKOT%}%x%w#i(=2mp_>&R_k@pE%Geqt~=ggT@5Lj;+cdtHYVJ&BjQjf=+PU@!A?
zGC%W^pOwtU8qIxttpD##J1|#e2hENo;e5~`Z)L7arnTKQl^^p>qFT$h5*7OsJU=SU
z3d0_Lw%1Jw@+I+faR@8z*sXq8lr<@PTvY<f+SDlkrLjkZM$gT<;PGTAHS?<*bY<1B
z5881MoJLr2h3GmE{zZAStP;fJR9R;~$gr|?s3uVyKjyEFvh=owq+{}k^o#Tc=wE;~
zZsQpm92|{zVTd1{g!JCLGQHaGycpdm`n7hSW=TElZ7o;amR?P>)^_7HaVNgK!MoTG
zGHvSp9F<U2ipis6<g@T6&v8(S^&L^HtyvT6EURoFb*RqPh9HLf>y)?qCOdTP`>=c=
zN=CROECmyuevFDj2zkzzE~Gb<91|cAblV#);)hokTZaCs21L8^XrDE~7u}*hSQ~0`
zbah)9vdXX%>mMcZhcp_qGwmkIGjTiq7cZj#Fi3pf&-_Tm_rXc9n0_^6yUCHQM6|A7
zjrneQ17}+CL~HJsDO5(<(!26@3vNE+mmrjtmpL~spTd8^iTj;AfP~5QXK<|b?pCLq
ztk9o5+dZ2{k+wQFk^HhuKKdA0Q|F?ATT9j^+j!FZP@5)aWt}WbH~@ZhwcikrOD<|0
ziyZT5+<Vo4JCgjbO!CTerKr**ZzFfjNA$B6;#M@J0|)!8uanGse`&eui)E*XPfSic
zo;7NgS9QL|ho|!aHYuIpAzq8#N2ez1Bfn`ENlE8qg02oZoENo*0nNfPH~`?)iZ+i<
z{TbfwalG$p+E0%i`b<E(YxNBx?N8{Jf;9Ira-PZAK<qtU=ij&d^SE-U{8L7zwHpdC
z)-gTy7k}kXiM$+n`)QSNd9GW_gP+~Q6t3Pj&~lJNO0kwxvdXmAEvXI5D~{KI0Eb5n
z(7*4icDLPqvF)klr8&^j6faMjJ9k+9T9GU-e0!>-<}Ot{F;%-+a~MGnEdV9OihM|j
z5U=z?T<i5lzY`n~Igvaq?L#AN5p9+iuqs7AEW*(YKB+Q=A3)V`wBk6Iivq4FKt8;R
zehuu~NteecukIb{)b<ury4JlB8|eJz0M{U|sq{cflW#Jf?sB!kwuD+D^7AZFQH>Q9
zPyg3%w?boMZf3H7NJ%Uvm_@%sG%^rdr0?AZRq^^rHG=M|2jYT`6BI-XXTbF8v|KMn
z7#Mv{iZgaPpDYu(*f67bZ!KfxME~y}XcF0ZinN|RExTd6iv&k%uEaCvCK2MqBlFFI
zgb0G;GHj|ql}K@{l<uOoV~=wUt3Ql`QT_H3+eK|_O=l9jBpR*XAKj6!3!M@Hf*Y~2
ztn6?FZ6~aRbY|h*&D$*tAH}Hj!%!Oow%Z2E({7v1z^TK#<kt>EVhs|dN_TpXcU8m1
z0HWfrST6hivSAUnL(2dmYD^XgS*4HE;yb3Kh%{y)NcLH_5#)>Yx*64Z+}54*MhaSt
zNmj$#%NC~6XaJ-(^{ozCV3~6&bC@ypVdmAyyO9IvJ&IMhNLFf^W65fYB3Cf*^a|WL
z!ZEYL@jL#0Ez#l9`oyLW5Y^)>kcp6?3z0rZl0HsLHpC7;*59f2Y@YuTyv>yGWZI$R
zK@{748~Zg$IHU>2NkQliOIh8GOJ`;1aEe@+ON$?dYJAAOiU`|LEJ1j25peO{34xtH
zi~7~;D5laZ|4)9j=Vcv6*12|Bi}ta1&D^L1;w6)fB=Bzao7+p@0$(Td&i~=-qe#z#
zUgvvZ(}s913PnP%a_{N6xWKSNz878#375_@YfaL<Q8IfD6nEpRJ0j_#{YB8tbK^m|
z(LF6d#cLf^D@*2IT8IPz`=BzCEPyVphjnb~RocyyloS%Xmv2h2|9hy!+(|<w+WLhZ
zZmc0$@?pW1DXHP*hV#(=%%W(%M@XYz>&BNSy`LLF^~bza-z?kA%C%G|mhc8Hx1uM?
zd;!y3Xf(PwCPHd5In-tgVT$7I!NSt`90`k1Do*u|MZ%f4F0F*Du*-sLnR)F=&m)fV
z`Wikc&;9hi#@<H4x_Pu2W&J2?Q4A1NY_#y@oY-`f8=VepEG~kx9mi>N%%e9S52dmB
ze}<X>4ATqVc#vREQCEyhxOQz!i@6NzoZ^eQ>QO8{NFwZQHJc{Gm85N)Qz10JA~cYR
zHSvp?L0>Emf6X!gM!1x=l0Gh^-`20CE;{h8-AaKzhV=<n?6d34=p0D9(r)|VhC5R)
z-fq7yDU(@(Q7lNgji26cRS5=AXM7B!d&&w-3z3_&|6p@+oDVWOTnUd3s+pJ5XEWS}
zQnMyC3?DS;qnaCyhq%K_{FvtwUSwNoIZiWo2%4qmSMp<$!`iZptgfSiy4O0=mXc8r
zW7TZiuoCOV6>uGMXtQG(Ok70+G6tgm;vhSet2j}T-+67?0yZd`aKTT6giQC!5+pnq
zp5HM3MuRT9uzrDGGJ1Z-g8#}Er_K+Dr?^!LS^`e>nf4&Vf`$Ch`3z63xuk4fA3>x>
zp}U3yzx-}58HU5dlFwIc+Se~CG$!xc0b@^ejE;+Zf|`_k*v7|Vz(J3$F1`&eu*|J{
zffq}ZHVM$Ss-E9XB%6{INY2bA<a2MW#bzW!FyBZow6hIDzpRMuC^J`Mm8k*UAVWc6
z0SwfN%L+Sm7!&iQDGdt}em@~tH=9m^lsWESP|F+9=(B!}DXT6XXZ{55c~%MB8W(5V
zvo4n+uKHoqbP}AMwB$(hXsbC^33k+T593KcgmI4XKx`v2mhs9KcWxzOrl>52wovEw
zSWDoFhKLQWZvDg&DG=K^vfy^%Y=gwfaA#Q?B)IlU-mLiYO}u^vT6P!FgQq^|cqkRM
z47RK=T+z1A=7FR7*DxZeI6VGuM^T%$bN52v#6*tV&u^?&zUY~>7G#=s`QITC2)1oF
zk{n$^ccfa{(&?$z+Wr{!_soJ*c*^_Gegy8j7ipg&mB_$s+Aet@9I~7IG#MM5V=fbH
z#1<IO?EYbe%{PST$3`rmM|YF8GCA{?#04WOclgxRnt$alv~x7};xX#WGE;K!U%MZ+
zJUZ1yi3U3-YjMv`1EzT-I|Y`CpL7nPAw>AYkCEanbj^FgKR({Ig@=#w&EFL@DTbF*
z+7D&M<>NO4lxa!c4|bDvdMG$_<KZYQjJNMtX*7parugjO75aeiBQ?8}6m)S6;&GT}
zNF$c&h<C~BC{K?IHN`TwEo%S9JBgbZ$;}I+_J&dHJRX1+jGlu<0lw`^#iL3>7E=EG
zc2M<WkSja8841k!gdhPgU$#mqB<sE_l;`4Hg$kHz*r)z6V{!gi?{GRtJUlPW4EZmt
z7O+Z84jBMfHu{)mGL8=uVL(%>PrSe@`o47X=5I!Z2{f!23tlwUz32v}xQuq&$eSje
zs0hiZGi2&mQzUYGgB=hL6dRr71`*wh84XzjIA+~c>~<?9I-$AMTF`Q)k}68ejC}sq
z*<SQmQSmz>itom&4H;e5-E*@;ql13%X+4Zj+O<H0v+lZp9khfEkYkt~njWtz0|jEm
zSrnI^uq~)fW4=`wkpg2m;0hz+GxborE0m-rl#bU0P?!*B&dHCB9g~<89jr)xr><l%
zMe7ECdU7$|Vxl=2v^k8MWsZuXtEsrn&X#}z%^ml&q0_BBf?uyL06=7A1Lq<ZiE|(s
z!a*X5LbVVwOL+Wyc|4Z4wFG#0!0MK3B+g2p-5_~7Q@M47gX!}T@vb#OC5g@}Ka*QQ
zEh4*jvm_IJ(|Xk6leX|Sf{<jr#(V{=puRS`rmu{`Z+d>LJ4glZ3Cahw;k7KNedHMS
z1U^5K7ax)saKA1gmep8`cBX8qaaW99jwLU5yUV&>CfC5%UD&=(YS0+|yvyHQVwCkT
zAvT@H{tuf*?g$LD8@;8NkGs0+?-j&uPB3Fe&X5kJ9<oE}h0(|TIHaq%Irc8CsI;XI
zWu_igM)fo;xGawX>%ykez{lygn=sO@YY(@~JasA<7G=IF4W@~rGi|KaHIE-q=i5D=
zEvT{*7HMSdck5ad!AxiPqocdgIj&pI6i|TW6~8md9UD;}YDX1wIppP;M`tRC9denL
zH{aV@$Khh_OGM(~d}mrRIm%d<c>dFfg()G2OC*au%qn$#ZLby;er|giuZYHX8ksis
zMO!eCg9NzmpUqCB_s$niDMo)e@f-|kndXT{SZ?9rF==gs;B&jzwomF3xa<-9_ELW)
z+?|kOow(fa7A&VOCvD)m&+yVi=w(OU!83Q=YMVNUJ#-OZ@&<aslD;etFa?6Asag;I
zs#Q4>azEZoD2&#PhAaOSmZ9a8oYSaHW(VkJIKk8L=bU>Df~J$eH_}0Y=rydJ;Ix`w
z-!FMg>$ZN9m?w2~0JgM%)97E-19!!P93xY59+k^6mkBhbi+zp7w->2Ko$T@*E1N7$
zA&mFx!ucg!iL=J9v>k6k3NNgdH;jGeAJGBK-fLKgcHYy?-JQzxtUS6EJKyPey{6px
zR}7vrM?(pl<Q%+1X1;|B4nR+QO$+PJ@_~h#(STn<)y2FX;r68h%g$jg?&QwKHR81e
zaU7qr2{@=5BRpr%=&ed<_CV~haC)`e)N*EAgY?@1^&q#v+;Ju$58bcPvRO9%$mo&Z
zUFxEnf)qHE^?swjlC?v8#BxP?G_f03)7B0Y{9ALO&Z85GQnzxX{*ABj5=!CAUMGV@
z@3g;ye`nbAVSBG*&wwL=s|d>;ax7RUzq5qiX$1S`4nJJik~1HMH$$q?4Yn)nglV>M
zevJ%p9y4d-%QlsM7kPWHM=zVcZT_G`ic7&Bce;`N%B=p0bT!oNGUJx1-;(b-2IS_n
z7U9H72!O^FC-wHms}*g6tJy#4lSK$1ti9zD70xXEH77Hx7!P68iC5N#LjY)-NuBnn
zytT(EE2&;$)ofC?`by29+oYsYWI`d02Y*KTTJ@*+JweIjq6Bgz`cT?<6>YfcudAp!
z?bFcrisVo_Gip;{0ATfx8gDD8It^@pk>=IS;bTZ12`9*G^p30xmRTCXXZ~;uAmy68
zEjca~OLG9orNOuI_d9s6s{h4XRd(-%>Ay328*;;-{fW~+QZcrFFij8_;w><BvHPH|
zqsx%t<FVvkOgV7Bpk1BP$&k~0GOSHq8utx<xm)=aEcDw>;X6$7oaq8Y3Mi0aXyq@4
zL~($p#MO~YLHP`Lex?_IGwwf=6myN)75e7)jwYL)Bpo|GvMACTC1m|6EsXur-SCD`
zc8)@<Z&<qCPPawfArF3)wr8qHQ88<zR!o;~5X<KE=)R<aaAViT$Q&%*^Od2hZZ1Aa
zhZ3{+6`MI;;8<%!rMgJM%+xxsViZ@~snN-)Q-WN5Rx+|M&pt)`7-cVbbmY2U6Lc_L
zn)PvB@{IJNqZ@D?pR^k@WI}N-v?jn0C;KrBwnJ_l9x|%F+y<|Fv*9f8f%{jXGU2X=
z#N`Fk?l*RTySgXADkA0!#<05U!=xH18Xi0A;|sAem!4NZQ&Uhix#P-d<ge;5SD(oa
zw{%A83u(8YJCGs2{P=1;$$3pnobVB$dIn7ZP!q&rbeT`jn#Cr+Ri=y&Mg6=!%geoM
zq~ou)gvXQ)!a9aG!Uw~Wpob=nQlzF3>OJIiw^aeZX9j}g_-IZ~XU}iq<EL>n1Vo&O
z+vbcK+g|&iz3iHK(-}z-`c)}{zCn&2w4RqN4m<fnkY&Sp)Xdl{mB<Kf(|{Pqzfa()
zQyAQm)&5&><g=8rAp`chfs%l>!~k>sXfl)nhJfP{LO$4cY|by9IKK&g<Gn&W%53NO
zTBk>#o$$kJgZyET;<l9RhVa41HGA{ZC5RTf(bc29LVVEJUJHe}-g&uO*8g>_(GAf>
z@akAANB*poj5wwc(7oX~WtviV>Eiq1>otqPI`!9{1n5StU8e8QU6{Wi_`#yVeb6Vm
zncrdGk?hD;FoVKZUEy!gAI0syrmBSju8j2O*7XesAJXH2EKef~ShKga+0579cRAnz
zV=G`TA9jU_dA~-kG9G8=g$pq5<K88WRNe6PS|9ffcAWLt3}g>9#n}ZcKYgY42d=fw
z@aB?4a9@~1S?!WTdLdzux_kFUazlUiJpWIGQz8P_WPv8$G7pQ6-yaQ%*-?`}IJ~WV
z;WOrC)L#Yz)hiC)fc2XG!kRIC7g6*%+`8IoIlnpFFYyg8!#7yfK*GSGm@7QEdWuz|
z^W~>IZNI!Dt3{vOg>wR{MA0GrNa%I{hSwcw8+0nxNov<LX%K-6fWceepdxEi`??f{
zgcb?}&(@srjAeHaGi~hY<XdIcCwTYknl?HW?FBuubfQ}VQ6iTfo0z~81+VbZ`wm~P
zx%M=hJYq>yLa}BOyDIXvicCvWxVNAvB|D9<=OLrU@WNTolEv2xW!sDzQ$I9y3%0o8
z7%acaQdrpE-iv0QZOtJPP-m&TYVCe@1#P^Iw%nZ3(9gif4cD>Jr1*gbDt>x{^`(cL
zy4mxiEqrXont$XpwT|PljgW<5=uPSB3*$66O4Q76&~A8Hl3J?C5W?kNyt~5Xo(^Y+
z+SYF9C(K?S(E0EIGi;lN{d8}LU&~*oEG0|evN)}U!cqeX00JN%=&#m{A1B`+<-ah1
z-8SXQv%V&n9qA(K!WLf`|HXzX4oN1!(Swo*XyNDfJK7HYvKhw<2T+^c=`;21?j!gz
zPu*+ko4VjHQnZy1UiT-}6Y&w3&Whg$OM+Yf$TJnbSlRy-c?DC<u@XB~AL^0)YPeT>
z-Rw$|ackLAzz%P{x8pEImTB*H;Lwux5nP`?Lb<oUw_~^!GC&W6BH$mGClJki3tH~P
zqyH6NrXe+FYo&0^F{BzWVDrGAKIZmdd3q`cjPA(R+Q>^N)F1!I1BxyT0aA^W^j#V7
zz^t{bbiM#x3qn}#&9SRx!AQtiIw$x~*7NT)0?J86My{CN0#B6BoP}v{z?c`u^_*3L
z^>q#1CcS`ofw%>Nq!XBb#+^M%T$Dfjc+3c%*OeUHo8B?BGzFdn&C5)&z?~N!R4vOO
zZd4@@5^=odIm*-*R2K6euI@VoFN!Z{bBSq$t{X`%mKzD`n<Wl_2d)!R%q+5GHks%4
z4dt>MG#cvm^;X|UGz26I2hbKM+@2oPkP-6=moQ|g{EPr`+rtsi?B>)KddlEyF7-(I
zsCyN${XJCNqhvk^5f7*3TMtw(X3uQl4@PxFSMJA9qiy%h8VClEODLZ&yeO}F+wDEJ
z{#gJJ%z=H%M}giC@Gh|EMBP4+1py3KiD%jw(B4A;V(Rlc`jjPrf?ELWooQv+@q>2l
z*lrcxn}ajfh$h{r$gn!4*J&Gj+p{)d%)WQoog(#8ho2(n%L@tGL|6D!K#z;>v8O%i
zp$Fo`Av>lvxSG;`>(%IB<mP6qlZxXcD6~ZhFxj|OG=*HulW$x8)!F|t+JE&tK?x!K
zFYG}^?JD?@N-y~EnSRBK@hSa?;3SsGKlIih15hKy&COSOHlRHy;oCgw`d4eJARX`E
zxpPxMA`|-aeo#jyKgB>Xi0xe9g~?wCt?iKFM!th;aXVARfW03x4*fB5ab>ltKhO1A
zmt9TEq_+gs_>u~FLh;r!92(rZJ>0X9)xLV$W7^wwq{ezP8T0Tx1J&j4H0X;KCgv^N
z6-5tHfQVC@aUK%FBVcXDbrSDQo;xNYC%^}y+G!K|miOD+A3qtOD7dDXtR5IXQgMSN
z&kCk$-ZjUzt-m>Ai9N4U{xx5+t3-VlLH_NAen~&s@9QqV=pcRkHx&C1w$$h-f`DV+
zE3sMJQ_Iv`0asAYt!3j$YfpkMkax;Qifyo32Ik%~cR=BueOV1?`ubxf9-N(??SCzp
zg#ntt%9{SC^nQFg>!wSIJ8j!MW@&XEHz@vPYe@7zl>WcMDKa-$g-#mJRPLdUoswOE
zHI2V4BF=-qt^b5j3CKm2ZD7=W>4?Rd%DX3;Rji#Xe1NPn`&oHQcNt=ahe0Uc&e<Vv
zNnRHN`+Yuvb=_Of!BIwdR|SqJf6!m<$kP-;4kTv1;2zHrw!T!EC-lbdz+V!7+O2xk
zd>Y^2>s@yEi(`$obDC|o8(G4z{#Oftpz(0paW(Lq++26P+3u3*PO7)g9UT|evX%*J
zIcEC72rWW|;sGpOfpP1SNl#(oLHE+5LpSK&z<O)&Sc2%bg8}>KnR;jivVSe%uUBK$
z@RQ!`A(e!dBo@~*U-&IWZM1i42~mI0rDIARyHgjimW98^%TRMn!wse8YwCBs_EMNJ
zn+{)moh+N5FhnqWT#w>8nj;vw_JObm>Cj@Qi4&j@0Chtwnm=#C^ysl|vW*mA9ys(O
zY6ijp;R7{slrXw2c^rV%lhZ$l?_qK~+H7}sXjf-)?PfGf*iO`ERCyh3>}EOp6eS61
zb`fz&mIkp2Qp`ex+!>VdYSl6o^PP1V$B&C2{2dKz3rgttC3f56aR(%xJhdoapo@kU
zkuh=ezaE9|XGVxjc^_jgO`zOrR>=|11a@q}Z9@xKWR)mBs>lSM*Z<ae#tqqbhXiga
zO4J8OC+IDNxkW)M1}cvaZ76Sj$IcCk<EK&PbkFwYw@g3B|HIc?2F2MmU87il;1=8o
z7TjHeJHg!{xVr~;cXtTx?ykYz-Q5P~40+CT-|zdKug<TjsiJBwuHCzL_v*FQUPd;L
z)jbSim@j&T9w=M4X)5buClJh%^?e=#@BJmIF$p8p^x4wh_B?K0SAqVi10+Rh69<80
z`BSSg6Hc`)_SsB#rHkW>TKkZvi8uOLP}M*HYu+n~j9LYJX3AW2JC4By{DoAdtId2@
z2s64}dly%Z3V|^}rf5kVz@;saw{C9jvoXS^g|sG#dRVt}Lx(xmj69J&&aLW2TU3J-
zJvdUjVK_3E-dH|M#h;~%!N(%KA7fz3#eH8I4+_FwU_?m?pzGWOAETQSQ?+~bFFxG_
zcux)X9GNVF@B2w`LjPPLaM=>}h=T`tuT*i@(45DV5#Z^u<@P{%{pptlGSj{N)l2u4
z?O}A2yldiDk<V2#D(lt_GsRM6NQi~pjS0A4q!}5KdD|c5EsbOMjx<55e&>7#r~{na
zI-{Tb^G-6Ht-|)y9XgK%*X{=ppySS4_&SxypoUWP8WC?<U>i{PcWTJGF6{;X9c}+z
zp?{bRz;FBqq97vAQC*amDW59Jm}CN+U$Es!^EMxXosCGEhh5_i=<i=ZUZJ{G`RqG}
z#Gl_RiH&XE3^0Fc54<keZjk4^HUcj{!3tan(+1PnLTW}8IeTT-Zv5TM?8gH&K-1l%
zWKsmbNKP8&L9jD})nDIy=Bv6ALLMDrjE{>sc_P{L{;4XBcy5>{dI!Q>3+7GBS_K`S
zx-%fLCo#yKZHvVPT6B5b#ze{41Ap0!)`dfBaz1pQtrLL^vzm$Q8FJqXa!}X>@)!@i
zLVd?BpC_!k!`~Jb7jTOPZNuhnOw1iN?-v9JmeuESzFO(CZ-Xj-qIF91Y^#6tk0HcM
zz$AkzvK_jGx^;omJS-yL5ZZv8(cr*;fww99JZ@%C12k@5b3q~S-P%4gpx7bPP}xE7
zf+$U0seN5_W?f-?XuB7m!VR0iz_VQO7@uuH-Uw0lU$~raT%{pPekI7K1W-Sq>UvFm
z`zKxgMd1JUBGeIH`XdH~IRb2Fc}@8_sW5k~HABSW6KUrW){E&m`Cv%Ky}Jp$r(Gv_
zMW5wX0VkN<uV=itw93wy_BT_`7`P-2uk-k<hsfp`fgSG%-BG#-jlKQn4-mexn<fu3
zY%x)tN52vMw1$yS3@5g@_2E~8JMdcSfUfu)4kYt#AV+>xXyR&nBl<h1GJ+4~n+b>_
z4SMSaQqN#JZ&;P@2C8{8Tl#AQ?cShyBDi@mq{EptGq_x7GX+2?FM>^J5Spc5cewVJ
zpc(1}Q#aEY<Y0`oy4%r9$0J_Nnd1l){)F?`p*bejg?Z_N+ZdTCe$!kUTbF->g$7?|
z^#Iqe5#$X{KjbDR-I8vx2Dm&44>@2?3i^!TH~8z3$g*cI8V4rQKQjA<W;1S6=U0^P
zktQzmzsLMvGRHq8wZGpk^(>ry!49Al{PFyHb#rR*xHR~J@4@NkJ2x$&_Br5k2N^`Q
ze;f29KY-q9h1H+kEMR0}KK%TORnu#cqIwL$qdmx7e!8T3pug%4i;w`BUb$W0j{h9o
zpwTUPc4C6hKx^s5Yua(Ll-*0QyU=y;E2FHsJYwDwlt3XI9y01aH5}3E%Y&upGj{q+
z-qC9pS&k)b6NzXu@ZM{3?8XJ3XFvCT57rFAMCeJqTZ~JKszHpLg?t{HS1fwG$kz7r
zr6gcT$s~p7a=J*R$jEyf2uV5omH~N(YC?ITxfH*|*<9(C_`k%_KT!pW^h+;u9C2BH
z&EI_4oz*W-5BQ=3y4mbM$y=6x|Nb~f0p;UkN&qvc&4KTkaj$3Yall6pGbgaG={sc8
zink!QFwAgo-et>pdt%&p70kL^dj{gW_h-Q>W>0J*uwhD7qjj;0!5Rx4b=-V&KBqQw
zbLq%IsA!a}J6w@tcQO+3$jqiZFG2>B^NUNAr$wq?T;`^etL~YwL-bDfz9T*SY#;*g
z+9?qiXb<?cL){k|m(PojZ(RjS*>9A%NBu=J-H+Pc1yI7~Wt`zHUpzVo54-7({lYNC
zZPInvnDFEs*m*$qjluYE@IMiJqWb^aS}sJt6gB&8wNnH)p|kO>Xy~S*>GX!A2UGz$
zBqYW^x6*M;0NYK73kCxCNO+S;<8j~3H5;n=1@i?P*Bd~dgoVl7xeshX8QID~Bd;pT
zsJ-`i{FisX5BelK!&d6$yXUnIP5cr+_K1OL=&q7|254(yT&frE5Kdxc&|ZM7>(fUR
zc3K3cy3s<+{5X}i&fXH$?Fc#Tk&@za_|gWpI(KBj1j3<(7c7&_#}|F{Ll<IH=5nIT
z+V62y7W|#u1?_=qQx{>-BqAZI`GM4w&zRkkZ8<z;1Dp&}2htm9Y~mMASAgz+hO_^d
zjqtH8pulWk@^@-hr}{1v(KJ=KRs?v0?De1rykHHqK;B^}CC*z%6Z(g+VF1QkUnys8
z8PK0!z!4Hu{O$ry88jddfH@;-Z)ef}>I}`md(V?yw@jVJ=J)IkcHlW`X?L_=l!&$-
z&SeffS2BE5u7qVczp+Eh=2GqT;*`q>8?y7XJ(N4-gLD<!)X_ESW%=eos`a%8&x6E`
zqY7`hnX;F+3S<uI(HqgB06A>d-C&mkZ8RprP;i_#_zaPOz!WBMAK-L*b{|NSS%RnZ
zChs6$YF|cmZ&-E=d>8*ag#0gshaZ_<5F{`<b3Jx%?%;=G(?F|QWu+WMNkxkkB+H$W
zT~0}rR{bWnn3AIkv9{#g+-SLjX?T>Z)!gED?9|j_ruMUnqLJ%@ww8RqvL8vvlwi`4
za7xy=ZzOPkY-13xoZg>A&ZmBUdQ^8k>$2{_AJh@WWEp&D#jxV)h`RKaGG!O>p5g!}
z;|47nAT-vXI9LQ-fikdwz?amcc6<3g%lx^(n@}x0Ey@-Zz2#KbM~LTKxFl{+w}bA`
z!4)RFxGczvA+k<`>j<9v6^_|u?{oD!bl9((R~b;A3ruJSks{a_Pk1RmdY(*RaM0sJ
z5rDeSfHJ2;-hB5$ScT&L@*-RnTUy28-9Cj>?qEs6Pd;kg<)Jg#eJyT-!2a0-1{X%y
zoO&fibmFBphdLYH@F@J1`k|0DS+WSo?8OILYpKqRgDPi#3hx_qoX-N%9i2kLMp<sJ
z@>G?VcqvP1tyD5bOUBi12q>1QjcM`1O=wiinq|A=T3A@F5J%23%iTUI3@tpLLMz4$
zWmNKZ*9X&sf=p)=hzBjvbLe=T8u6j2!iU1Z7)(^B$w*P#we3f?a;PcW-BH(FqI`^n
zuVDQuGETvcY~p!nS~<lGEwklmUszBQz8{c~%waG5bg&YN`b^_&X-DAG&EsX*TWpKF
z;gi>sfhLFFs2k3VNy7K!f`&({i8%0Q(5GP|nnFGUj%>%+qEd-CZoZinjIIRru}y`K
zQ|~Rxa<Q8mF9>3@&1XRZ57uYgY-v+4^k5pMF?VK9nVvwxyU*R(?<!s0bCWq-!X&Ys
z$A8Osf%r<$o;8_r1-nxC6X#?qS5NJw-exx)I;W=CLq|063H7aaF3bD|{T=G&1&|U!
zB5V|tyQT`y$A{8&hi7Kgf+@V2Dw_Sd|NfzVMd09@OuZ)XCYbNpHRrL>Twq!i?S#7~
z3XM%x<xaJt?g)6Yz}T)3hG@C6mPb5}NhYI5a<4n;uMgwL%S{{#u@#@@$q{Y+)+3hm
zG{7k?zz@TLQgdeqGy<CaQ{TR^wD^5IZF`4%8rxP)&|Nl~($$`82*wdBhIn#Pm9Fyi
zy`E)EYX?8h|DjJT2(I-#A(c_hc317h)BE5d%AY54?Mla;=DFGPv!wLQRAN~0L~>0D
zlF0bmn+RC!%*z^+=^)#35uu8YYohCL<$#VmpOIE<fL?z5pbJy=dAm)M|I3nJ$6WGC
zW!`7Vac})KD&?@TWP;^^;$VmCo}xL@d3hg}a-VhBs^>;7h)`=(jvX|i*@@*27oH6i
ztj~gT#D^`ouL2D!pL&NliZX+OK;T0%Vla1F(c3^?BroCUBU;{qWs(NF;=04Sin{L+
zJkkTDSq_Kk-qWi@gRMl}MD4r7w?E$po<gQ<{CXsN|E5aG2*KG?8x8qIQ^S*FCZIi!
z1-*l=OkMINq5F62h2M(rHU01sA?g(mJ^t8)ukJ+>q2Q4vFtc2Wfbd--Y)i9tFLk)w
zLVe|svs{pos@S`E*`~2;atMMu;gKV|>#WvNTJri?fTqbwjcQksgsi4OtUj6(>%7Y!
z((c{OqG%x4sJlUl{0A#xT$-_Gbb=B`rY-!C@nyJpmJ*4gcr%OsL9p;8Tam4Z+l;PX
zf=bIegyaE^U1v*=3ffitfC^Uq&NSG1#B6R687DQBRkG1)fh*~LXpB5x1!jy?A_L5)
zF{<<HNn|Gls@=iRnE`>RN{05tyWi<IO%gV^W`%vYz0B!L)0^UgnrmoOpE{&mPQ(UH
z$Ew=ELiLb?ENhzCR#85zg(z|ScusJzJ9tN{qS542tk`&@6axknDOyEzH5m+%L(v^9
z^F4<cs|uSdG4!LuFy{e$&2>a4*K%Bd?q*iRbEge+J!k)hrTx0UC(^*X#6{Yu@Idan
zG2x98rY=u<)*NwXFTyR|R?>dlAwZA5kNfkDH}302cSI4-g{o_<O!nE9cXr*dA1|@i
zB*1pn8{{Q4szcZWC*;m(m3Z0=upkhlWWMR*Gxko~FS4>COzR%Cb;2p1;Sq7P2Fa}r
zh2J>PEFa&o<w>BbC<v$*uLzfRXyM+WF|doc0@pWW3><+FAtbsyGk-caSA9k7cJ?u)
z?`Jwn+H|SsE_#5gPNyAxkqGqiD<<3befG&Sx<v5&Qh|?Z`vCI5dFkHe;rE!U$oz)O
zP4I$xsa_?&3tb1OtwQ+54fAssGJP8vShl*sP;IqzK#fCqe>$BQW<Ue{{PnnpN$UxQ
z_m1-de<|AyudCCyhV9o~?v?z2GS3POF=%)nE9u`m{yOn8*a3Xm(=7ly)}u$3@UF+u
z+QgXx*$GW7L}O9+>8WX+#2Hd@OR+w4N<l9d>!Pqe7OdBt`Ap2N%h8Qoe23d@nJDx2
zLtJRhGFlWnQ9=>=0pd`Gj$#cr(Mj2be>y$~2MArwN38HpDm}E<7uQAp!>+DJ=oiX+
ztFqVjrcP);6vU)~vU3c$xe%|4uN}f4k@sD)m=%VBi|$f-E2`GdxLEb2NT2+ky#;kf
zylp$gK=+RzNlq0pU-`mrkWGSQi&*CLX~&+mPHP@WAv(Nqh<U5dA%u#h1UD-4px$jP
z_To3iFzvDS%Y%A@8pxJeOd(DX4OEFH#2szWNxxS>5o;;)Bh;i-@s6Sg-0kJOV|3S>
zJo}bHp+){6cTeXZ{Mp_Ko?=10Z|-GS3;>_BCo^2X5GB*C{VW0sHJSDux$C)PX8L3@
zJkpQ68>j1wu6G|_Jew~cBtAr79+TN^C#ETKG68UYe(?zaxpO1%27ypVyBhh;6Dbc5
zY-Z5|>BSicbM9=STvE{EF}`f#Ko&OMmg*K95>Bp9^Z>F4Xn<(3MwbUVt?!lENugOf
z!<82iW*XRw+jAZVdIwXv<J4mhU2R&I$9;qEqt`Ha6x0UH8+{Yn3nS)6j2?1l`lXgB
zhtWpR4wWuv-w?|t!)BE4V}CvR3*Wzm+$OeY_7U3K0N%Oc_CO9mLO?UOR(zsT4<a)0
z+}Hjp819%wB=BxayYxQKvma>OLYwy_6Zdw6AEh-E#_6S3`K5j5ba9dbBYnZG;@`%<
z1Nql?yE|;5_|0?KT0N3}4v(*l(1C=@(iq!bl*_bRtK{8RmvN9WYc>uq5U*KkG<I;7
zby&(iS;+@A?N6tzAgqI5D0skv26QG5#<1!4rG4l<K3(7H#fXZ8dcXpn+z>(XjzSU^
z=_5Nj18?^74$}}*e41!~As=}&+*c|$TZk#NJ&y-z!Cn(Y37(<i@<nl+(2IIw_Co5e
zqKx!*DLx=B@-tjHGwjQ+Mg^UMu403X3uR$c4UyZPe~&rbWr)s#J#HV^OHT#KTj1o=
zuQRtry`nwd`k$*W2K~+U3<kG%8qwF3|CJA%<)O=qKofT0nbS;YhdzUb4{WD)So&eC
zW<F>li<QLVr|J|0-R8q5`aCUOu=~P16IHl@gBW=<WRnZyEi{K5zpdWQ%uRrAT-V!g
z6t+yT8?~+4oqUtXe%qrLSELtE`p(&8^vSeCi$~C89qKJkO?EP88*0!EsJEX?JDcBU
zU(GiO{2sNH{sGY{93f!Zz9jRwd^SV6ywENf|HZGgW;A-ln7tsDkKR(MrSz@EqsOiV
z?t)3Y5dN1dF<6QDZ>v2AAKSi4TMFOey3&7OHcI4VD5!pUcB##_WRL>^GM9Icn7D?T
zrinjWUZzC3YBu&Ta7E4Z#M1qbwzqu#d5z`2=P+%KEd+NkG5MbR_24j7Wj40$tQKo@
zs%6`vlAW>Uc~I<S1U61fF=1sskD3aoz4SZ~T=3rCamQ@vk7IC<r!!0a9_`q>_r%xp
z&$T>)l_e}OiX1W-Vgx&64;Kj{K>Q&|hLXW<O`m$O>~g9Cw@A<@P0d382bK}x9IB5(
z)q6a$zRu3AT5DJdw}okko)u!xWQ;T|md~KP@{Lb)$6l8=xVPK53#W^jnTs->Vd;X`
z_yB4Av0JS7+1r=g56U;iZX3b{gPod=^2t?!;;BsJAe#X559%Z#zSO*-6)IMd3=pcp
zB~hO64WWGx2;he7z-xn)x^AfZnpaMTxxserdU?>5)Gh0qf%d059SCq>>{4GcU(#PP
zTzc759ZW-j!|fD$Z-sMXa(_vO?Eu?P!f<B0<WTr$0slXJ)@G^*2yhoUv5MsO>8qsp
z=GqMW%G<_$3TSJ_IjIol<FpbW8yg;4Oz7t1B?9XVH-gHg8A|S%8+P$!MPly%4WBrD
zR48`nfbFOm(DP&yQq9SN=`G1L=<*G3-G|?u8`fF(?H%%_3DQEL-gDYx*AeT1{(;4Y
z_W`+T<_P6aHGQ3|zT&6`?}bq36<Xle|KEQ2FGPTZ3}%abH$M)95Nyf`FL|kNnO#hw
zVR;dkq&JClMY<M&Z|0QY?jJSGKe)pGfjM-sME&B_s(4KL@#P0ZIFc$@abv_+o`7UG
z*dyw4u$%h~lSreeV$pv)wEqp${D2@9*aTjvF0bOulFLw6`!}v*h_hvq#E@s-Lc8jH
zKu&U`plyre2xUdi7!X7ERq?@30{#>J{0oAcW|n@`ufx0?&f%toA0xXDl_HwYcu-dY
zEX(!Hv&wJ-hKv*Mo+1g5nE@o$0)CyYSGW3=OC;rev?}nR<ww(V+O~6zrNmu5dRZWy
z7%%Cd(1L`ua2{YUG*xlA9EtXtJSu+)bKjtu=h-A&<?3e@H52mVA%=&fWBSgo1`JfR
zO5&&rSf=6Mjds6E0y0B*pHoL3PjL$d#DE+#5o*VF|Aphjft%QMHnk^sSvIZMk-vDw
zq$%>IvHN$gX2pbs;X|BfabWF_-@?kuEIySZY3E=ca>>L@N+LHf_9p>!$GtxG?r_ng
z`wCF`EtbihR{eSh<4$w~$Y2Jf4*nQlz%8`=5cfc{2GJP!h7@bB+o_b|eC1`3DWPlJ
zIDby1QrVlWfVDF_5H2n+N$L=kH`;|?nl}DX0<dcxb5u}>L&M038%?4eJp^7dd#t6T
zMp`7Bnb=b+96L=S90-R)-TZ3rIr3y?*a?&mODGt`!HH~ug5P+AsO_W;J)T-jE~2-j
zj&&EL>g#7K2cZ!m2nY1r?ayKrm;H|C^?V*``fNbTFa_8^!;=r$7iJ0esMFq+64lPD
zkVZcI{ULD7>tJl56(($f_Q31?1N#0kK15qwz%aJ1^M-s8x50A(eZXA=0<EJQ6`53j
z$*4#^k_RPwyOHusow;I%5H5$J3*r;nYipjsTPzT?66-tpsQc+frYM93mF|O?o6i(>
zbT!57N1wPjni^EsACA0=E6wArs(6gL32mP-$m))P%unk~-80J^^D2&0mH~B@alx{k
zw(!vV7fVO(`Wvv!N?1EXg%G&pXv#1?o{sXuc~>+aR?%M~efT`?b&qYFdb$uM@mhB$
zg7o1!@z*`yjv&d!<~J3?&NAy0RE&CcqK3y+V80Sk%E(Wn7CcQWow%L0;Wpn$qF$vO
zA8}VdU#mtP_89YZO2cf;4iAaPls<H<)Si}wYa~-Sdxm^Jr~Qyd3Gy_2&`)gEs^?i0
zE%b?Wv=vD!J*=bBx$M;349Ud7aQUI;(MA)!)8nB!n3S#uj0W_V5qS2m@&=90N*ZbM
zTtq4zvSgAJ$Yn<N;nQrb>!i{d<-UFUX85`s!RXQVBgJX0F8c{@(fh|WV1G1Rjj3!S
ze}&aLsV2;b(YD4I1xBGc8}$j<<S!NJTED!M0D8GBrT9-God)GvEBWwcOfQWnUWSwR
z`w8FP8(@`+$4ygXqa9Xwz8=ek6WD<cBmF3nBzOO=geTmE2sXA<gZUx|%*Z+r2YwU2
zVcoA?Bwd2tbDLQ{-e9y)?h4F<*Z3Nhf5oqtAs`ArcwVceeIpE}e`PeTxs!=$8MOz^
z?8)2Kw>|vHn3q{=Ccrm3+oa}%BOkj!7$xAXk>h<D`xBd)v2E|cn(5p9ns9LuGq#Ff
zvzeDy0DOGv=ixVHPwBlE8MIvyjYBg=QHo>;x#HdpUgz7Iwp+`>9Fy4P5y+#Ah1%Xq
zHH_cTLcU$KvhUKoFV&(XSOU)jAfR4@4AYF|*3RtjI36Ox>5JOmR&=;&dX0Y$$4*E`
z<G089Bg2hW>HQM96_>m=yo~4d0@@{!o-Xp)e3nUkvpc{bSC=F7ejQU@YHXVfb;Go`
zkDOaj9KR6yZdLbpty6$&4UJ}L_e`MUCEB#4e`P#4+Vt40Jo`tDeO>!;-Rg0n(P=cV
zGmV7ObYY$LMXU>l+o6j4r5jXUL5qa0^HQQT2m2UjchX1D4$yiY8;`bdG1#%B78{pF
zBKv+n!Du%oCN6o?nG=MJ;84T;a0wopo^BD56}KY$b~Sdx=rTkoK4BvaNFAwH*M)`$
zW*W(VWQ3Uxr<V*=*r72rtaqcsiK(gBOD(o(V;am0R`YqlOzmI?F?%#UtfQ+Wi)=&q
zBlJeJ>;01#5;q(-5;vv?b{nV%#27X$Mz;?|s==pJ(Tl&iQJ1LeR+qrz7!ou4*bp!c
z#!-`#|AjGJK3MjU4AINGg-dCmOT3+wgQeRp6IWA(nu#FWx|zk6?X<8ntLkavy`1v=
zZJ3_F?PYCk@w<n}J}>8p1iT*cf*a&bB2W{YkUXXr8q)leCYG6ub(W3jRfD@nvuU)c
zdhaJ!`W;RT?ZxXB92MvB+{L+R&=-eQ%2K^>PS5Rl!`>h9DNChu0wmHo-%0x9FexO)
z6T~)7I~euRoB~*J8q?!aMmV1R-SB~W=FI=<na`vPO-+{FGb}myX581F?<nWvPu^UF
zoam@bSUd@YIwsW^t-edTk$9>6>HP^VayoUjJ#i$jqDA+-rFq0u6|{P#gIXpHV|#Tq
z57?g}*!bG2O&2}Ib{AL50xx3cLu{r_vsy|7lx%axaY!+IR$5>b`j(K{MC{nbn5Pq3
zujaqu*%2&9aUiQKh9~dEF!%HRk~ynby7|c8bgyZ+Pa-SS7U$|YAbz=(^m<_dm(5Gp
ze>YqPu&5Z#R*@vt#v<FW36NF$UXUMd`3fB2l#NfLM;T{n>O}2|^HT5<X<N0wCMrA2
z%g2(k(Wyv;N`i4}ArNp9=WQle<I&<$P*REyL=un$A>b#TAiW9zbC`ec3pwy{)jssa
zWex>QMRm8nhK7b>m1E(*ro6sHw}Ys0w!7V3(~TbF*5xZ6<6wWSWt*mB6*Yu<rpEMJ
zSip$DWD%GzQ{6d9Yyid!Klz6$DdRRksUFP-1Uh}N{T<eZFJ5B`mnz;>Nf}rZv=D+Z
z3q_}(^kPV>bMpsIoKu;4YPNp1Ovza=QbT4qkX2lkw%c+DNMF|?Q#i-febpOS%OVYS
z%0?bdSxIjmS2Nx{+`kd<`{YcRHpF1<{uK?V)pq=p){7SR!-0lNgEH;iRzSw8Q<p=l
zDR656kl?>N@1o@oYBIACsbT7r8vU(;98O|^k6B7&ljS}$j&d^DD(DVz8+ALM4o-P&
zOZGTmi1fRazi!_`isP^RJ9%HnvpRr|*On3`yVfu0TC!%N;ap{4S)^bxJ3l`{Mb`!?
zI!d2HZ`y{2jy60x(fJL)ZNr93M{{AI&@84M42<#mPe9$fWe(MWsWqdt;LQ5TY48n`
z($5Rf^1Lg|g6L#iYhj<|6R#cP=W=Nnf~Pb1c>p0EXp%&U(SB=?YF0vnW>oms(!x<w
zpuVO}l{{guUc|206Ab6+?5dYX1W3eBFHH8&Jt;Jf8fFzDncL<fMd*Zjz#E9yUf<Aq
zu~>T;vt4?65vS@`!v%V3t@W!@Nrk0y750A?2pY@ct5%~$S5yPWpDht@b=uc0mV^CI
z5N0Ej<S*uD4$~Y3ZiuIpcPcpLbtU6YJc!}^W@fTfPs@p%h&wOcFMf7As~lE-@5MVs
zyH7aAE5zp{<~?QF9rI@n3<>s#lLO>r|It<0#r2G^sfa5nu^gZ74*5_)l?@fFdsZEd
zTygoHlm?ECG%8EtdAS>sT<^*+ky2@`_N<_bDQ{3Cei=tc8+l?mG3v_pi3K@WIo>O+
zW++0gSF8C-!=pqthJ8e2Z5NX|U_7w*l_f?63s`tI-6Q`f8m)iFE`j{FNCtsu%9~r+
z641r2WbMwL{H2vgFZJt?UGH0hdOmrct-+J0U%6JE(ohH=90E4EEt|%BHCBzcQOsOa
ztEBWUwP2@xHbvHll3YHw{6v^t)9ZZ;-X&w3dQe>5sV03*h3q7Y_jsMhq_X^W_RHg1
z9?sWdQQz>?nF)z3d+dI1owso-EM6+f^Y?khMmYmSC*RKd4fpblZiNeMii1e)>QKqr
z`S0GR%k|EnD+VW1xIU!LG=Vm#aENe$meMhWlZ!xy;&5CZfx>Z{v(t(5B={W>UklAf
zITMKLVa4%8!~;xVqAUD2(E;gJ%kB|)u3nr;?IDrZsZ@6YBrVd<>)+}+PUc_Vf)hf+
z5x$62?-I65q&*=>#_g;~6x5WVTaaOdlJvOq&YS>c8F(9Ls0vz*yxs!qUQd|veWatF
z8|lQSgBP7FoxIef!YbT}^^eV{HE$Dxmpn-k(w_ti+|6>+*L27vz1ws$-}=xWRqfRk
z-;~v^O8u0X$4&uo2}b~gShM_i_no3gn;u2=7+Hr|3kt3+hK^z3Egw57m>|>R_%USN
zm|HcrF$ZV%fZ)kBuKS_el$li)u5WT&R4na|PYi`$OUo>lpn;md%(4hsDF+rUd1l7~
za6cS2k;v|g;G>ESWX-`n$)vsV0xH8vB(gi37}DqMQe`cC<K@cS9%9$mf`aRcpmkCQ
z+L&Z3tGjC!J37{1`Gp1QmNOM;-`<}NMj4&ErEOaBhXY24OL^kGL&wo2`SJ@tI!~Vp
zKmHRPb1hNlImLgcUM(PCQd}ZXqtV3`xBw|rCB!_`6Sx%-NKXBhEfwTE$~H=-${@{h
zr@eW$gd6<4@(O9-Q%f>CmiHrQaf-AMvGn>5iVXAUq=oO8JT_)c#0nFLj+={48H=P*
z`*nu|vT~{<ix_wB<n4iXrqiHW4JQ}`bLzw3q8PD}(7;5fBfcJTQehuy$;u?0Y;d6=
z)RT?U*Y7gzNGmCG&_Tq;qLeFA*xGiy8|uQq7hUz7kE3BT8$#ql=AW?Jp8AK2`iI3O
zJMG;TGugzqOt&YGa125pa3{7b16>M4%N+Ka4nikBJD<<b5v7Lhx=b`<YreM_O{Di*
zZE_xzD<1$aN=8Oc`6+jJd~C*MAFkWS%q&$K7ya8E#81!2Ez6Ytk!K)SHVyO7@I_l1
z-|ljPmgDeUDkEMB8^d*Uz25Pqr}4J~9uW<#4*`E7P}4EEaPOUw*wHJ$7gJP`<b%pY
zTI-F7<)x+548Vyizxd{^wYqpYK3WuW&LngGGA%TWm7q5)(?mM)v~IwqFc$M|6N0N@
zudQBBkGF$MEh};=rP14pn>CSHf>J&%pmu5J`VpI>twYF8gz;>#UTJk@pqeL=9JzZM
z71C{maDru$;{AfawR2^whgC$GJ&$!sL_)yFQG{jL@PqTYHwDzJvj{6J_&o+ix0JUA
z^#+{iF4J2b+7yD0ZaDKZMoWx6vq@ULYo>5p)>vqFE|m*_cDw_xYr1jqL`6k4nmvrM
zdoekTltz#$nmEU6H9BpYI(Pl63<Q!dpQsvdItNN}{sEVakbgI+Ql({aSz)0;%A8wn
zO|~27D$yrWa-I$)wv<4Q4qaqC`QgP@-^8~{;pi^XCzvC4edsJTm>u_Imi2dYI=8mA
z81K%%pOao9Zg;zAwE!$0#U++{{E`fp*TB~z8J%>O6F$8&h>Y*+#I{>!CWwdK?AnRL
z%Wut&SmfiKq%3XIr#VgNE<LLIpq<h+5*w|e9*d_DZc+sdBt~eEB8tFaGm0HV!*<Fl
z6JW0MM0AejObGM_*et_ghYyhoHnYl&4SpqSc{<#$r9^MR=dzPxEn6|v8&RZ6TVMAF
z>lFMt4s0Hvj}a}Fs+8=aa|0j?S*R91S{TFqy9FZb^8Ty&#YK}du!foDs>$0eKT$(M
zZ8}bc!in@_1wPAgJh1Q|dP4h6pE%|L89Xz&Q4s+%ZsA`(k2F9Ec}_q6F%h?x_!W~C
z_`PtF;bQ~RRD8T2;0D|SU}R9<@G*`&oo#A@+mf$1px1m^yzIv2Evw!Cir!)zKb_LP
zr%c0Dh;IJdJf6EUm1@;y63_93-T3!xaJ;c~Z*i}>YwbqZ6EAOWkpNyT)lls?)QrrU
z*8~cp<0IBEG&(s8Gpmb-6X(U2Y1O**3m!0Uoo=8X<N#wm)8u&MF!ysD51ZA4#^6z0
zk<n_OEw6P{8oTn_)1dJhAlYk7EoiV~3J(F_iV-^K{^Cy%6NTUt;vs>I^VtV>vOobw
z<GrPw2Xk2SuPqU9S{48TJ6_>-oUKTh0b`%%EL!p{t%+pTK!xIz(Dw5KIHm}+b#86H
z-N2->{<i~6|4())<emMq#63WI4OELfI=fIWFTA+Qj9Sm++{UEw)=Sk)=wqzyviFf<
z4yf7n8I38?DX+t?J)*T0+gbl&it_s7=gA*^Vr1mistoWFZ^!TKMt9s#&@M%jy1Ts<
z&Q5Bdwk?Z*im_69x)!?ab3OEaQXt75yNG7_BY%t5Ir7-P<WDX+ZukM3B7B%jE>>y@
zDuqXB%_}YzV|HrnpRJnT+X2naW;c!+0ccb>Tfg-(z|z*2TELsH6}aWQrgEIFdW4%z
z*CS}FZyM_G4I)6yjoR<RfF_x7#P;hK&8=znTzJ$W!=L8?xO@cdA*O?>6sa4|A3iFq
zy!KM}ZMO3zZ2FB%{X1t_S-LtR9&o}cGs8Voa~q4ms#LX;CB1&@WtfpOpi}Y*;qH}V
z5pjy#VT}GYWwbfg<TWu_i*NRfkdo5_tA4D)EkFK-Ez9}&K1oLWt%SJw+=qzKrNmqz
z^uu>WM3;pZRWyo#rVI-uyv6y#N*Io@reFNUq9AS{vX34pGu9<~fH!AbPMWu!U5sHw
zL%1`M2l@q!<CFlUJ$udwu&ZfE8KfyGG%T^{o|ulq3%TeUVGd=x`+G1X20}kM`gh^X
zR^bU4X7m2+=AzwsIko#;EI?hbAgf?Oftd?w4>r#69fZ9MPpz8D=3GqVTzolvu9qn_
z#84>ff12z^Q*rz^lCteds<!9`cQ;$?YYy`a42K*h)M?gJi}K0UDf^DA<7U&6*)ob<
zP-3_H6~{RsXZ+G#4t!fp7$USd@}~E?FQlDAg&jUjE>HvVN;|97HhGaLtvm|quFe%A
zZrN6CGs4$eJANBy8J2F^Xu+FdWG6eEAHt2=FmFF~C=(r)*2CAJ-@zVID+tEYK^Uz3
zESgNVzkxpw!F;{EBtevvu&Ze;FNrl(kzZXHM{Q71QBsozuIQVkqZQEA!7aC#Do?Hn
z=Q*)IY!v~c!Y{BOB#xXg*C@<g4QSfKkC(A>p}p<YbV;Vu=n89{X#94|mhy|)racv2
zIXt)-um)&)zVWBjc)JcVR!^tJcfKQ5hNkbinj<(BM*G>m`kEl#gQn;6TNE-p3|d`(
z4c&Sh(r5Sf>nO7Bhu;zoi(WEu*j=62OT{S8#mdmpsj(Y=Nk!pK;K!?YZ0|Kmbg|fm
z775R;&(%q9MZnFVHpVKmE|VNENu+E%AX;0Ld&D!#15yA`_vFRv%O3ye3AvJ~vFPDy
z`DNm3YEYlNJ17SJ854jwrf<CBGa00%t+E_-YQvAPTPwp75xYH{>Xm3cJafpI;VKXv
zb)Qol&y3$FVEXC$7I-=}0{|4Sk<S_cRq~Mpe<RC14~K6OE_lQ9W&B=rzJ0%u29LwD
z_Iad5$yy^OGrVi9(VdUghfu()kKSAG!QucimQCAR64-R2A5AGJJUl{B+Uy{;QUA=W
z@7mEsb-wavL@RkoRV|~Ph6dg6{)6qPMD#!LXHGIOjVwM&jdxcR7XBv^eSQ8(;X)%)
zje#ge%nV4DBBEL@Eq1LC-}9?!5)$MAFD8ywx+qCrw(s9)U9g)>zJ{O&0N1#!FgZCq
zDLz(rFQZ8CE9Mk~g3gJ=-o9j_43;@LYp$Lu!00?kbmW4!?tA9j;?H6H%%5{#b+vqt
z0Pt3K>QC6RE4g96I?=G5eAL|=o}Nl$qc@*rksDSX&!zWHZC9>MfWMKNTWs#C#zEW;
ztfKU~_vz?bA;5J*4euQ1+U-V|{wxN|^%iX2sx{68IO?^K*XQ5lwY3|h)w|TmgrRmX
zW+Z)49!}#MO`2Btq708Dk^qsXI7=fb-~g<6B^jT{CVa>HMJotL=*LSe2Nq_8#yK{E
zPig~jVPSwK1CBuJH%9zUwv&>Rm>SqL7LVwe!xgH^qAvxmd-@R*VhOQ3DrFn^t03JA
zBv_&%yV~3uq~6axtY{(pc$_bZE*DXiH{q*y43)K}6F8rO!FUZlb!bKYFnRG&eG=WA
z^y%$+A_MSj9@!ww*2*edQ_^x8&!O0kWC&_p@X};87TVooI)GKlmVIao7f?RJ<j-LO
ztqQHqfU|-KA;XFG&x>_2c;)ru`c3J#^-cG1Id)bCcy%+glCs|YfYRUA^WM7N%_kjP
zs~e6Dn%M?gS{ZF`BaA6L#)*ai^Wyq1zoXU_J|+bLzr1>#Di###|El8unjCV2y}Q{Z
zr7ydJ4rF!IAj#1xn^h71rk}%%z!+>y9NF!sk-;u3{<f$QSVzP26`m_Ng?JSB_V5&p
zs-vx_|2{DgTpD(H73sqAMbkZqQW4uA%H)_x)8){ga!-3A@x+{2O<$DzrY6WeQ&IK$
za4&ySpKTi^lWxSfjOeEjD%EbDJt-z!^yfFn((i>l@6F}phB=0KG;AqZ$DCW=iGn%B
zg>13v6CR+d8P^0b&LgksGQNfu1vty?hh1BkV*d<0LLUDfAsBS=;ulEM6^nAz9Rs6d
zV|DUDK)*k^n}Y1kGC2Hc`K*vg98QzjE&V*S6_PyOw}%Ij(^d55#~5TUbCs!~2{FOe
zeWl7b%6Y)lXOGBn*LR!7?hHBFmkJzqN>m)U*va#!br>=X4H~Vypi-cpjT8d|R+L;>
z_z#Y%LNQ3G%hhygQ8@F0rCD#Oz%QUJHwjFPCi2#Z23Fl~j70Z-MYj#|VN)@+S1CJ;
z9zS)|UY4$ud6-0IF5Nj9mTK|Cc{(b0a_$ax%XRRhdHvi*v~)6ey=Xm#%`{-aq&k}n
zo@GY2P)3Y#!<uI}_}or^CwAL7ogUg^vHZ+$1sdS25e1TO9h<e}n;b9mRd%*!CIepJ
z;qUOJ1%u#4>PC2HnQ>01#$iRO*;i6R%UViA&{uuG%IZzem8$H`dAHLV3`SM6()x2q
z&9;=I>si&x3g)@W;oM^5{Vvs;n!7zSi7HT|))v*MuMqvWB|`%xfVObIicjbNz6H9&
zwAUaT1^B(QjmFp1Ag2AkrL9MUCO@$*&gh~}py#trf};^$U+U#4282U~n*ulzzxM$T
zG?lXPUZBjJXKI!OV}O)Xu%X>Xz9QSEO8U+wb#h8cW3GlOxwYQC3$$TtVi0)jZY>J!
z5S^fF@~V*RVC<`pGZ)TsIG@(+680~`BO*4!pwJDRR3KpB)HiSirV8{X=;!aL*;LfE
zKo$>1CkA{JhJ3h3e93P8`=$%RjaBZyTR0cc54mx*ULzJ}{bz+2o6Q9hRkQ1;R^=}+
zLtCBLi1C(|n2gI6xe5*>f$i?--heBOqmI<RQp!B^tg?1J@61E9(-CfH9{5u#j7pjo
zb%ts2`ol0U)!A%10I`<%DHKr$g?xqN{gIDtv=U$2c+CK3#*p!;_ExbVAu#4fDRE&0
zc4N_0*ksZbFEpJLcb^XkI*&4@^rE4nvZS0de%f$sP0FnIGN9+7)af+Pkl5APST-sc
zs2kLy!QfOvD5Ffo=;)XU4c;5@-6lygIzj}fRlWkDfco{pvU&w=)d=VUJB~CRs{-*{
zi=!hL4)5#Gf+C)ZPJ`hPs+#S01og;N4L*kP&;eUsrlgn(sbUr|>iLe(j^*bq7c#Ro
zX5ydsX@)1h&v)91Fu`Lo^V90IP!C3B*o0;AI*Le0j*o8U(8!o8Z~zDyI75JTd8EL;
zEkuC~=8D^^?vqXh2|;AV8W+H=4IL=6&=-QuFqYwT+^$nP2;VTb>PaGI3s^*MJa_7i
z3w(lfecdyfAAl&lIVFtusk~Y|1vw#v39JgQ2n6nPHh)o~`&{fJ6!&mP$bZkdU?d-!
zcWxqbIg9`<v6S;LIn>H`ldL^OJ)-5-PEIx2^y2tCZ(g&#m`YLYyCkG50l@%iX0NuB
z;bP^p!~(u`>w>$@$o0plh=y=%djdSl=R}N=u#HtCtjiO#6Qhf?0GrL**elp%{e=X9
zL80+XrgVcc$Gudk{p@a5^%gs}^K@^ZMrT9s(fdM!YN6en-sypC$Y)Mi9<oRokV(TQ
z{+P;rc~*Q4pz9!+=tW|=YIy6pr6YIN?qdQ8!gfNGZJ#m1+&w?zW;czr<fY?lcP`is
z7i%Om4!zi&6~dxtP$*oTLTTPTx%(Zkvp)wM>5%P07OkbU_Jo<y4)b+BMLP0Wc~vA7
zj;rWc+7ltkz(lNzMab6__IDrk#TKg+N~Kw_d|>rOg#{8N#{3C4PC^GCW-of|El)UH
zJS1AQIvv{=4WeAI0-d#UGe~=An-x07QznHNk`qJGOhque8J1i~3GtzL-8WyUxNOd-
z50c%^aeR_8S7`#E=+rhG9lo&<!bC&5qL9$>0hVcM&X4l!Tm(auPQ6a}WQSK|32LW(
zmq=K%S(p4avLb(f$!GNi&02V|GHCyTl0jR>)NnJ$6dN_6%jr}S-Eb8MfRD?Gd@=?e
zjcqyjD<2lV0^MJeBg4yHC$0CP^L_w2lqf_mIT=VXNi=I9i`45K9yntk@Z2xh3RNk$
zRVUw^3ACfcs@d61kIQ*49xt#Mkv8=5A%w=RpOaa(GvAjH%x6H&52_IGyfGN;j6M_V
zf=%i0)%V+h6x_=UFxB^}D+t();%h#b?lIAcvJulyx-CDiyR!2tw^fg*L!Q)WHfs`j
zKi_PWpYV#+yPU)dE5k<>RJQqC#3`!5kZ|T<qyvpCI#1dGYInoQZWe<L8H`3xhR&8{
zO-<~hYP}5i7fVqK+%hFUD#H<Q=f`UP0*rV6nJYVrDlFvZ3pE3LT4{>-GAOYg)(v2)
zb4UWAPevI26iRz^t~tJIUEd!!?pb?3w%(qlk?!p!eZ{Yw^878gU&fFW@?_NP+I&4m
zkR^=00&&tFdOnR;@9=atns`)JG;@6rz!UJ?Wcx}+Nlk%PxWaAsK*2kxLA*-QM^Tir
zpmUID9fs;L27s&DZBP{mO3C|A{-;M&zY$P>>p3%_nz{R*6p6`j5cn;rKi_vg!HzJZ
z_zJrniEUZQPQUH_=7^5duWt=kioh@KS3U6aWQ-0C4SZF{WECNXPmngHn0y;jSQstQ
z!HdPK%xu7A1t-6cb?@NqC(#Gm?%=b{8hlJBp%(C@wTyY``<6jp#zz;Gn%bbf`$7{G
z-V$ZQ>tUXLB06(Ov0I-XYF}r*rx4-1yNxY>TJ>L``Bb^o78RbDVzbznlL#A9$&&?(
z5A;Ri3N`(;&61ah(BUZ)Y4t589#M6h_kC-zthv`XEIwrc909XBlV(0~D_P<V8JaCX
z#LX?W-aNTg{MoR6x8a~d7X@}HR%(xW3-cWnsD~d7Ev#^e)f5<^+ZKlkk2BFV1l3Pt
zbTfj^5I+c&%-4|_H735OeLdVVxUK+5)%0&=FA>%1(#w;F;l%#VSiR~Gu|tSN(V0aZ
zH=bq|%7eR=(AuMs4T3$A32!{P1DowI6So%H+2PSi@mGw%*gbu-JAz!fh%WY2pX*{w
zxXE0(ZTXM;Nkm}<j~i)s3ZvTiX~$o<+bakzd2%$g%ZLa4O{O};aoFnlmOZ78T~<ag
zvr{2iIj(-MNv*qZI_^si5Mj2d=pO~&$oR>j(M0tN=Mw^n(PA}C!S&$#;@3DYQ-FzQ
zK5Ef`Y&n)hk@0iXwh7O2{YH<#5u)cN59k-Yt(z#X<Evkc_kW6+3ZWH@8T6;bad&)z
z&XYo^z9jaN!2^eKLyF@n8@iT%oh<G-^NOjwsoD-U@X<uFl}T7A%?B=DPs0xL)m&D<
zMqNb?!e28^Xxt&EnDP;M?=R-3?CQ4Kl_AN;*Vo+l$`>|bo1MoS&L5u+3SHS?nn{qo
zSO9R2Jl3IbLZ3#w4jZj`eQ8%(yeV$l!}-)M$FT#mE`kr@+kNJ75YVZF19yrgczpT4
zc@PWjA^PnR`&A`Ue_dV!5PDg$oSdX!Odvv6&$7i=zf#Ml;Q{Fy{h)!F-~Qu-42*lA
zTevQ7_*wViK4Jv$CfV0P9jklFdBni?ATKlzKuU4?gt#yBU)I9nXn0`-s`aSgu0$qJ
z-7~v``U&23^J2nl1$j}kkw?a<=Sf+Dw$ZTh;BcKgq<b)ZIUI>mletIJAl?LQA<y&X
zd+U0wTxB>CG-<#<1I-F4;^V?IqK$BbtuN(F(a`ap`CbRqMYYrGG*;Dd`J8gAJt(j*
z=bhq>0z#*ELXs-;DqE4}+pf<%l3yu57oz?!GD;}(uun)pAJay4#^2z67a6LE<vMR(
zJIl|BGkvT{V6^H{b;fy~%vN$~5X+!t{UyiESVHHhyy>QRv_wfC-H3S(F3snq=bdk0
z;X1KY<DeOpytbg<`e$M4|5#mfAUVZoY*jPt?8<cve|~vWW)PNyz1_Tv{TXlFW_<bg
zWzNQU%sM!aWl`S#t)N>IP~~(OTPs;=FB0&rALuGpG3j#_g-K%|P-sVH&@Vxj40Kcz
zQCP?Xx{5d+J+5CiB6#sZezTZ}_*4cycp*Y=L<Dvmm{yU*_x@HnzK`J9p}uQugTDGH
z3)xxm>&{bKyDm`rjGAJua})erhwP2|f^36s*aS=H7%_BD78CUcV=2JA6V24S6a2Z^
z=W$;zLTBLYQB{WoT_?x@W?4sIg+$dlVC{<~k=_+zjEe`3H|!4I0E2DWfKJL05bqhd
z+rexljUUPUI#lgCR%tI4Je{Kyi9z<oTqA(SKTCjX0Z6jPVDc2iTDbtb6QTN`3BaIO
z{%?j@6FL$Fhf{y3IyMts<D4C89BP@O19J;yKx9|RUg@5uT*zE?O|HP{ETu(q<KfTY
zvn;AYO6Cu9_6!=Xd^3-Ppc$jS-&^zN8{XVEvRkxEH^5n&*{v8B)A-D=**A=q#0vcX
zILSyeI~vj8aMrxt-`Y>pv56i>Ph>9GW2{q;YJ2vAS9}+QoJu3EY`<;EFDQbIke9+>
z`pj`dE}d~m#B~yQvU}nE8kB&u!74%ewNLWfba=$~DP3b9lc>l)7{KYj$K4#Oa5fe3
z4;3wI;bU!z-xhlgl#4oKK}d5P4sMctx+;BQj>-_Z0U#vY?K4A5227T!-Y;*>frrM>
z84uuaodiJ)-Ux8d7xraqByM}sNe260RwsN;gEm1_*$`#Il!#yC?rod08@lM<#o2MZ
zB2JXiOy8BkAx%|x)Ca*;Pu}@&OM`@+jC-zpWqXc-om>$tXN^D?$qcA3VLyjp(hN$|
zg?qDcE3F_!OoQMBZC8dC(Ql4_A!b4kA^g!F&QA8!0j)WxHx`w%0HdoG$$%wyOQ@YE
zZPMxM1kqhr$igto7GAAE_d~-2$lk-qsxtYYe*AkR@{5WRn%l@OT}hO`?8Ry9u5F}{
z4IhOJb*@R%epwj8o{a*|n5h=>WWML@Iy)$Fu08=<F}_cJ(WWr)=b@eteu%ESQjI}B
znFsnPQKN&}$0R8C$Ly^~FHL~Gi-Ftv2l-5|l1;sZk$A6ViOGLFsRq_aSqrQc>Gt{U
zmB^hFU^}_#)8kb5wKY`pHl^LYe)&jJ5G_o2;|79z*W|xm09`z>K+4PE51_}_y!hvI
zW*{L>)Geb+4l@U6E<`6Ii{?Wc-c{EMH9UVH3oJ6PBQLq#g;EpD!jHC8lY*ugKVTx(
z__LE9isIu?eg`Dok|v|d_7F5Zd*GzfK-ihlaVZBpX!pmPUh_={SF|~mjiH)@GRh;x
z1F3UtUKu*2Pjx4Zj4#idNWj*+H({JSsomOHWH6=+4MNTYIm3Y0>N_bn*ohS~$0m<V
z{%iXntP?y`zqL8?UXAK*ZqlhTyU@9RwL4$r!uJEqHUb!Qn<k*-nml(!>eimC$O@I?
z%6}-<mH1?9@e$?R8`MIm>j0|D8fjoJnfy!!^9F9*8_q-L|J8K!{Co=JY?%0DmM06`
zncJx7mWu!N)%Z)aJUoP!>a?zk@9nNRhydHI-3acPzEX~S24bhm-?!8FnB^;4y{p>F
zoPIlbQ@-w1AM_lW3=7Du=o)^*ut-<kK+G>aTAiAm!=~~l2CTE_rkm4#qizWOJhLUR
zy_k>2t`13}{*yU3UP7)&RP)a~&+A8zO)U89Xg3+s9!RSgo%%y+XJc8-qN^{__Q6v5
zx#>P$0;+(%#J#Km{8NZ}SCQOx$(GZVG`h5TYh=ux@{1-=a46L~*N1i$1y@UUEh!yY
zcJhj#OoA@fP+^&%uv$LcPo04Vn?#r~-abwbTIv+jZ!{z+S$aGxuj0K^u~GHsJ+d(x
zL<SsMFT-w)l|o0%>_l&PXH4`#D_i=bJU%AaLhl|>F?<sw5__N7rN5oIzeU<8N(a2;
zVFGwCZ}RCl>InK%Upf2r3+NV5MyoxKmJ^nNX)i(Ba!%=gKnOlE{vgiRbT3g;jIQFD
z@tGyRzzfw;s&<k%E^32{#MRO7ODI2hFSAIxp^T%oM+QQLv{ba5<avXUiFveXRfv?9
zA0uNMBwe0l_YO{u^or$=w<jQ!O~8sC-3rsq(OqYdhLPJKMwqe1{J@$HMYU;o1T{Fo
zp}{)+cF#V=?S#s)P7#Z7oIH5}e%R`S53~Aw0xLT<g8w@Pd`T~Q=|pziN_1!p%4ZFf
zolU!HCD#*9MJLX&&(0$5vC;_3u|;#VGP6SKfBsLjk^Df0>$I~;YQGjPjFJv=FbCM`
zQLE*RlU~l2E$SeVk!Fn&`Tk4uO^wlvG8V60aw&f`r<VSk`XR)B85R8TNSLrt0UJZe
z?g&CX7D0JF-DL*0He6aZ_@>_Op;b|pA%^;;a}b!#<Xzni_}r*|?RW#fPFq=j@Ckv>
zRRq2zo3rowa%$19vkzH9?l5zr343bLP$U{Ork2IA0coglon0lF|0{KU-1a~LHpNd!
z#L!)G5Zg(o18jJ<-)jMQmzJ8MPrd8z<o(1U`EHeoq@_!-G=RnGjNR$5xCguDx=zpG
zN8$ro9%u1%dpjpP8OLq=FjVJKxpsgyg~VyQGuZBX8We342&n#d5R23)#N{;=<Bm_k
z{m<)TKb)NrKYfCfm(qkQ*YsrSC+~ANz{V0pLV|z*{mu{k5P=4DVWg+LY$-iLQYb41
zdDMFnhe%iNn|WJiKga*>;je%GJQc+wp176XYo9T21y@nj%7x!}(MVbf(yXC*8wDw6
zbeIEWFy5nj(TX`?R8Mz0M|QWjxQxrTEhm<Qb5#-c$M+W@Av84D|NMXh_u1SeOzpJE
zbc4@pbwK{_<3RW@_=7;h$Y5JD=Ata${}S4~SH7z2bR=Yx;YZo!j{-0_<7sya=qEg1
zkeMw6^1m{slX$ng`&KtiND?Zp+svU_!fMrTxz)k;=Jbkb7+dN_$)4^#dT=x_xm(5C
z5ouY)bI~}#{P$T5K7;ZJtbf;VecY9$q#*2T_)#%H1I9vAAOTSL;{Lm&-bL~3w+XHM
z%|+)Mx>gC#dCdPq*gJ<ux-D;@u_v}|+qN^|#F*H2GO;z8SQFc}ZQHi3+k2n0&)(<!
z?tSh*UqAhH_xJUxs#Wz?t+&>quP-+acp;es`K)rpN;FWEp!$y}|B@&Ya4g0lqjx3^
zS}Z3&h&4;sgq>%p+sY<)Y&Xi@FR8BdS3#3D9g({u5z$bu13UvQMw=~<9FYwyCHF~G
zzrJDY3%8EC#Uvwy#f`8z1<9l^CysvfKa~Y|g8myJqETf@1EY-|{o*c;AH6aU8izI}
zig=wf2TErf-IIOd);W_g?_b~lL&t!hbuB_|aXs)JBp+Jf(-M}lzmrDPHy6H_gJSUt
z$)~I`n$D5sFz}v;^#m0THpx=bF&2eqn_%8jGmdi1EeVn6t(=Zl<0U)vqS?cfBK`B;
z;;6u#{6C`zNWz<B50t^)B(>|jV5;Eg|4ZMDxL{RBliB4E)^4){{ul$6gy0L8gqmdu
zT-=6!AJD%DG!?$@KAfBTN;rN|h44%i-kc@W_6W8(ESdm9XD~YZQc-2c`haN6!+20&
zeT9sj6E#Z>_)Eh}@@R=LG$1uw20_B|{}%rrGRi~j0@a1W=NAL>3-c4KbZe1~#Gz7y
z0@)Djig|;et4gO_l`;^D+sKd<$6*7L#$hOJF4dAb+PZg@kVD8!cGjmdZ`-S;@Be}^
zukrHv@&+<crd~*JpMT`Mf18`XIyc&(T`?#CB0&GNmdu!gvaz<g{=#&5x+5Xe&PD06
zs;=MGMn|Y?Lt;@wfI~X+N{FQ^@jQtrY_%&+oF`e0Hn$n_ljj;ITI6IUwD8>;g2fKl
zGkofdgaHYh6NU~#r+aWJ!0mnAhZ9u8ih<Ar>;2=^I+h_Vl-DH=a`zsG)Y+Z*#}N<a
zBa0ibxR&2U%y#2-d(l(7^_Mt?^ZV~so_zZrj3B}-a08;!e+%Ug3s|gvM-}=Yg7TnI
ziR7c-y<b^d6>0Z2xp{>B4fOe2iLG~S^^1-NRTo3-+dH4<+xa<LTr<LS+5JvPx7eR|
zsuKA<S!qihijg+cz6encygWDB|0Mdm>=@?Sl2+1SooXkkv43+i{o>yyc*O_uG<^@>
z5hkl~_sMGV%8+^Oopbj^bbsZ{t>&FMGj?;ZvihA6h`&DmXvri;cjDR4CrhGr+-=F*
zAtN3)IP+ogZi*n=75pxGKIz&WV>)f>wlz$&6G-arJ!&g8+mY~23krE#iglYG{+k_%
zh7*c68D?@&uiDb5`BJAZ-PLaMvKBZuU$i>LNJqDE1z4ZlqRUnUzb_RbOb~obt}WHe
zvq2m9)N?84q!`y27A;`n#aBaGQj<MExjmH5`mi>m|Hm}}W)UXHxmmEB0O-xsTBVYk
z-JPIcdXyy?d{T!5#fa-v<lcTA@#}?By|Kjh=X65LZk^kVcxqb12#HQ~l64`8@l&w{
ziBAH1k^|A7(*xX(+LN%x$n>isP-28AIT*<OIhx9*lGHR2^e@~Lwi0&LMs#IIb-ezN
zLM1Pa5{VX8Clw`~R%br7C}Z|lHV@V!1fEB*J2j(-ka4)@un4nn<cU0DfcubJ9XyW?
z6Hsd5r#ya2S}$Oejiz#29$y~%g5wDv$GHbDtcT3q#SDI~7j#rMJ|@fgg<xoGRnq>T
z07rk!>b^eJSi6J(j*Uk1;l)yBLqI@C-r*3`>_Ui_O5w8<L`6lscGaNQRqx`oYDzGJ
zoWKh&e8>9&gSRRC?8;=$X;lUCB_l#mjW<%^t@eriUD%HdL593SMyTzLBQ#sjZ*S}2
z_I!}dzaJu2i2;dDs%H)J*7^wP2iJsAnUq^UaHqQ8pzGNn{s}gBdqaX5*}yBw01Z_a
z?CzDV0i0N8r|8`<Z%(0Nh`Dut(t`(cLwTaZJ`^yK4sJHF@jqh>AmVg^2?M$RLSMFS
zDm;AngiRm4Lec&%OgJ??TX4py*^)4!c0r(;w$e&B%?Sgem+if{aVMEH;nR8-kPPa<
zVm}T`Xr%RWM9lBXUfMBQd)Wix{kF8ARmT$9Rb6APqNgSWSUNVJ-$TH>IE)BxjnK_M
zNaapN76?JlGYXpo<x;J-L_FTPTvXrbj^#hcHiJK^-PQh;WNoGxsBoCjEddWquEPrb
zB@$EhCbc)b`l(xF6XTMJH|<)uP2Kp*bY=C?CwAt`K|u1=;LfK3ZPO}Z@e*-=u=mHe
z90ou0lz@g|lbl^bEeLBuyS((m_w&KP2?22T8oZaD>pr!GUKH@{&KpIhr2D?sfO91<
z<i<X|E`sF;%qC4EpokDb+3Z0M_FkHOlN|G85{$s1`BBj%d^~Y_tV}#k`~d^*3lW%s
zGz6;%A+_Ty<Bs8T+^|30i$sg`GSDCHMW4v@Z*emULv3Z>!xd=q3C|9*`v0&+Iy&8b
zyiHEz7wA;<y0FS?F5NiLq$h^}(J6r6S=5M)&Qt?93$ov~zl_iUq%|yszc9%a6NtuL
zWiP57GM46xRFHw^s@sZQCEiec{EOu~mkTQ)cOo(KU4QUIlwma%(E=kO3W<k9-xTfy
zwt)?K^_JOQ=F^O>!hp+UI8JGOEIr4<JGv$KzrYRvUHA#=5oGipFcRqu^sOV3@rnTj
zidPpJJ+=7~_|&~jtGx536?jANh(TK8&vC@v+6Q(WkMKFwf1vJ@(dMO4jY_EAijZ9@
zipe02%V^1Q+1K*U>}$^hw`iNTJ{ZBnQQn&=9@G9;@FRqJLIhZ+7gPlt-T!<L2@Ur4
zvSp)xdioQUE6g8Mcref-C-4v|5K^pIh}hpqoUc(-Qom`2491|2cihpwe0z=14y0uq
z_d!JTFLC`FeqhE!5k{tm{vV+A->~Z+?+T%yj(4d1XY~H-M*3rfQ<|%C;|aJs-~RUz
ze?7?^EYJzdeau+=pEv*vn1HD2-gEJ&+!pP{%n3{Fzi#_)6e5U-i48n_%*@LFmF90z
z7=iieDig4&J5ARI$jzxNh|%DW|NcD%gmccVCF|ra0jkqoTG}t`m;J%&E(SN=%n;_^
zYrVSSbk=!fc9!iunV>wr?{Xt?duGg3x0Q_w6ODN^YVGn-7U<t&{aA5#kGx+QlUqi{
z6dmkRYDH?r#ylvwbb`(ZfepFM|9FpOX117DJ1qJoKf`Ieom)BsO^Ah^e5F2C=D&yc
z*Sjt@fbBO8NNWBq%PvY1{fp!L&W~HN6PruG0X+EOvhX?45GdgJUt)>q=GW_9G00TB
zi!;6!NGQInNtU$F2vO)(6y&r=IekvvP!SF#QckD!zi)b$gmS0Jg%_EZUS_lSR_6cA
z1)K-2a9id*D&O$6f0;cz169}u7&L5HPN#iN*K`JftL!$eYJLc}tgX@-4_%P|XijBA
z%=TjxZO3zIeL;o~uU)w13CI-d>;Kh;9}Y0UDs2K=Y1#h+hj(#-KLrIeZ>m|vnzX_F
zdNS!BzIwE!l+f4B{){P@IrIEkHV-a9Q_zzUrBYWJ=Ab_KZS<bP+b)Xp-hz#-JKkg;
zgJQZ)YghIDWAoSabK~jJ%mV~%4@FUpODrFe5^&u^={M9gtP+9&n!}EUgalS*$j00H
za^69W*r7ba)X#2Iv6giAT=8ud^gCp{e_$6^sH<B&9cR$``Z^^Sn}Xgj?p2=RnzB?2
z>QhQm*5sMmuT)I^H?l`n0XjsFrIZzc7&Zj?#s!KFFI_Ap9mUfwFZ>tjF@fdc2l^S_
z)}=g6Pc_!+rW~rjRFs4raK-vXUmU2hv;aaHAOvVPvOA5K8v3ss!T$#xh>wpC^&Cc0
zLk~%QeZgP@aD{_ySZ>~$opd_3jq>1jH}!VPSLcp#s^ghyvz(tsbgbq+c?ovgLdi!|
z0`o941%3sa=n6g`K6A@+B7WT;Pz>x_#zrjJAs7HD%(MI<g`o0+30-PnAvK?z1P?p>
zG&JFF5xG?SJduT@@SZN<ZVH;1V%zpYC9<!TW_l!T>HhQH3nNb&j`<lLTj2d0a=l0o
zKT-i3618cMQS+|b4aD;Q>6_hPfz@H1+p4~4sVb;yKwWhC+gW+B%1ulbbP*Z}T(-8h
z6z3JCV_RTTvJ)jT7v>o;aea?{5nugsawP6oX>F{kJ>G3C-lLXjN*jvAJK&$$v5<YH
z$%VJ-D(xLioJ)fHlfF&g=Vgd%WOy`hEhQdsW@5ZNE~iMm!;INWn^zJwx(;<gPQ;?w
zz?=EW^6A=j#N67PbTbS2>K)t4L_s_b28lsw9G8Z%-s%Yit$f;rhbsY{ZvBrQ657cE
zc^r79>f4NV6&>Bup$211bm(JdFtx8_P_imuk$eeqDyRMQe@#n4#AHS&upn^7o?qPp
z(f+ZE`HQ@<djS~ldaom7ffT3Na+@jAQqS9SUzg?5JHL1LQn9-+Hvi<M^Oufcqp6Gu
z4J1TFA`isA&Hc^jdoL2s!x+Z&b0@6);^^2~%aeQjuCY=NctZyj9Y++H$VbI1FJ+=#
zNkWD{Q`+5fw;1nndnrQI39s4H=JlW}2Q1#CHH#`Aac{Btv+#L=muLOlWaW9*(Aj%3
z=4OpaVFep-2H89q!*Bkb$4(N1nvxF>ik_O3>t?9mOOc2$QA{5!xz%OyD}s5<#aICy
z69-Hy6LHbvqh8iDr`s0rHY~^uH@rqdoNan;UR%F$b&P}>r>DB-g)@o=MIl}B0jwOt
zkul(J$mqfe`SW%A_aK^{i_|dv_lh&?QElHZ&vM6xk6FQCJL?nbPdx7NI7)TiH1%@N
z>mGW!URKl=hUanDOh~Qv-FcB72AabAU&%C}9z-Lzwuhf~i)}4TsMkOAk)8bl*McSk
z+mpvlT72>Y)i$epq@@SCyUB>SGB@5vJ%6z|BL_*W*xXIWbO#<a?_wM^ye=r(Nmi8@
z#)Opnu1cB5|D+<O7EZvjE+kl8chp-Qd1*=wSd9!U=Zz^e1aPeI{MFW4A~}b*!U9Zr
z@ns+Y@yG8v?8mj9$nGrvwC3L~&4v&RH;$kUb8JuOYvz6k)1ifV=`lGAFSWE_0psyZ
zbPfMoN4R|L8|8w?K(ki(!(E9sL%5T<kcb_vry$TAnzN#QXy*tK%lm^{X=89lXXQkQ
zWhu{#lgG5$gZ}dEDD!@C-BuiGs<-RUo{H?I_zgk+1pll=MN#4&m$9%sPPb0h1XpF#
zh%n(?+L+S)LQNIViPj$PuO=GkG*9V#2_~C|oZ2Bg57Js%U>2~e$0rx!39Ey1>oTt1
zt|i2yb!xIEx&m{e5}zu?!k^cNIh9GM_BA`xaQDqqE8N^SHt)H<6?rVarK(v}ARHr^
zvZWvPKji@W!W}oL8(VCs!LK;jVka@6ncNMV(>eW}%b+sTQrTS#lJ{U2c-~f5pDDe4
zt>)RBF|%#lbP^FQ@(O4AcD*(@FRVl5a#(0k?KV<pRhn`wWj~_4dp3PrXSVNU*_<0c
z8#1Z1@Cz=LeNp)9AjQB}K|!DlUZ>#1jhd;Iels4+5QFrm6*aYgSWE5jZMr>gClW93
zv?K5R7pSX2&&{kc6EAx%#1Cy+y;e8HFCmyRvQdREsRKk(Njp7X#m3A6VG;Vio6GB>
z52b%iZPri4UNvhK3U5-{vef#9rtWB$7FU(f+s(GXvAYpSb%`u7IV$_XI?qx3I%v>&
zXPvyn?wnSHlij^UjxLe;qr@=u)BlAJ;k`Or^bh=EBMTP%_{+=4kW3BsKMuW%Q{)HA
zK$d6)I=Kw<fdtjL(Ts@MxO$BWXU#K$Ca)H^);KEEWP)H|N==gDp6zk3fbN;8dFiPV
zm$BP+TyKiKPZI3q;&hq<>Q^>&1PPhQm6K?njk*MkbQyRKWo;KqrVClM>OSM3nF8M|
zm>#mwiMMufI&M>jh;Cw^Mq>3QIXwoH@8u%_{(MOz@^yCAKW}Esuaf;FzIFr!PLJ##
zVS$|0-)y*sAFX_th*>b1n_OUBVeU?kDl|-8=>013Z2kztZFwAYuRF<N7D{%WPW${?
z)W#M(<L^k9;ZZM+l=b*VnY17fCr78cemAhIZuNp~%`vAfG9UG42*mTYK2_wzET<8G
z&dI9^;1)DNP^Z3X=KJuSSBaSVAshbavf9xY%yS`nGzbVDjHg!F(HK_ufe|LRdBa%L
zd&w^6M@y@#GW&}(zd|B9_$dS;O19{7&o9_l<OK%oCcMo{3t~#S{s<en*$qh9!4^f3
znc~rb7$2ud7voAe7n{q|;FUTo!uig{N#@C<ku<!x$K=KVNLy9PZ&pQ8B03~J(HRXM
zf{t7v<T8`ev#A;}=_tXctV}ENmI5;pBn{Z}!$}PeOKsj-2-BH#klXL-Vy4mQg^!;t
zBHk4m0_3j|^g8OgeiAMO5^e+7+%1QJ7Pc3UR#Xxf8Q!b#N-``Rxpo2w=(ws3A8_)h
z?l`zJGFwCB;e3XJeZu;X4|;N>5q-19#Ord^Vt>0CD@4v$aXM8!tHS&&$y(@dtj`A;
zJ4xi;OVyhpV#WfH0z#a7N(>C_kDm=2PkY~~A>3m-={HjC+U;CH@w_@fV5i9XA_j%B
zK|$>$>EJW+lI&syyLBg^jpo$a@hc|Q>z#|P0Z^-$(Uyikjj8AxaZgXaENtOQsm4&y
zB8+D>Bfs&W7NxZsDI3WfjwcD-izKzq^7N1OYq~^5yCvSCW`krEqZm@8OQJ4=9(*s8
z7Gj*3Pd&6Y9f(kRYjpDcpZ1#z-h8Xr7j*0_zuvANHm~Ci1NSX~mC#Gr8o8LI+#bd?
zVWQxF-^ZBJ#E?&%)^2NY)!m0I9?vv*XuecCA@U8ZEW=$K1Oae?bL1Ekas#v9@`nD`
zreJYM?l+!3d|BDRD%bd2c3Hw_NRI|b4sRRO_4i)wgx=%zarN~!qnR}%kZq)FcG!-!
z12|s2pR>s2VXa)Es&F@*S3Rw|ZW6w{!sPSB+pg3-_<uwNBhtyJB?}b-KxoL6DumA<
zd!XD=v}=n^^A|mFOqu|xKBz=)sx6eWl;OEK?YkOb#M^Mj#h1CYW1z(maocWet`<9Q
zJ9JExb)&JzCy1LC`r$}DuF2ZV?QSf^ro*jVbkk-=^s%(--%fWHG`ch^jPgeeB=Kf7
zELF<zGNwVHZrxmkoXn9O$>N(Yk#`h<J%*0IsKD2(JvI@uvW$AL6_8z|$Cy<K)cP(Q
zsdw2{lfa1(LBXX{cRjd*bU8$E^?d=j-G_C-DUI?474aQc07q*9`TbO*2HK+?yj?d~
zw-ZOkWBIs-pl@qDEj0CfWN<(Ke0HIURp{40%TD@?An7Q3eKhmG+F4Tz*3ku;{3=K}
z(|cv$HijqV-2A2)0A<a%!Jwe{)?2q1MKERcex>yU_?A#RE?o2oDKI%sxNlgYz^BAy
zK9~av@VTp<d}5Y6*VA}f>nUS8S|Ql2`6VI#KWb~W36zS7RO}m~I3-{`NyNy(m7j~%
z#OA7!;No?qlzWR{i3a66b#p^81vp3RR9uxWQZ;DrJR6{Re_e1`=tzDCx{k1SA$@Kf
znB7CGR_hV7%ys_($m=Y{s9h0IY$F8HOB@=jIAoQ@FveS0rm=;iaSA7E1CwZYqCG#i
z`S;n3@-pskjWed(QS??y%y%r{bCcl~)dsS9o7qr}2HT3&*Bjf|>{&mvQSYA(Z23QM
zT&~->_tcoF5LiCi=pmMeA5(hY+waSTy&$Sr@i+V~`JGkw8+STmTzv%=PudHXPfv!v
zQytFiR5=l6l61l5rdtZ5=$f@Ty@bQ?Bu0w)ppW_&lFbz?$n%IFq5DTQ%)Uh5O^23?
zJKyA&w^@FiRl{1usgJ&QNA%{4?T6bL;H6llF!K#cN7{c=7GXK}a1~g8zOpV^mpe^V
zyq;%18>@WaEQF0=*uerpYpOKZ2H5@$ZUqC6>rXj(vIqGc#qSS2nN}V#6uIYTgIHmf
zxd6}BuigLv%@D;{79vem^3(R{46gi#hY#kC4$pJq$=|8EO@B%yh0$Lkk@XHt?i^cP
zh&DR2p7$ywI*n~|rgC$p@Ic0B+smH}NM8%Z`Zo%vEDnSVXC4vdMNk|`>HUqD0Tlz4
zbnE>0PO8GEbVbdN6ek!sml}(;N^dt1A5;<tL5YBJ2^jU-B=C^B9dKp2yTAxFKt)->
zl;Qpd>(uCsLV?n~dn^RE)Cdltu~UC9xAUT!D~0{wLiPYWn0fy*zguBD$@Z1%6JHs+
zZ;Ifs2BH*3<D4=J`avaul{kHzU^oO;eE9k!E^`G$_8LgQJ1P}>v4B&0u6_O(Am8&O
z;WXTG%pRXx-;g<|;CG&ED$CA`2c-3)>!pZwS)RVk4zG7xwKFoswN|}7*U4eZy+{jX
z(D+z<!K+MChcbW)Sgr?F|98%YL}K6hRs}RA6=}wC!H;67w{fG(d}Tt$fa^x<7i(yh
zD;773?$$`Y@f#u9+rxXPoBOK8uS3ef)ak^B13R($Cb8_pLm~ZH^NL>VL3gYkN#3*a
z`$OIIukNEJ-zfAM!g=`JXuwa=y))?C94IEfP0brDd)wA!IM*QYGFg-=B>-GWyzby*
zR!-UiL;dh;x3A2Sj2Oy;M|DzilUag{zw0s0CB`d+1D^$y9VYdITFWy#snjBL2<TiW
zTY2&(=o^<4`J3*+W5Be_#K5&`GY`N@y44%hh|)Fahh$uX1U^jy&y8;G#SwX__`)R4
zO>HSkTum-pMa{49J~2-iD#$$6zwyv-Mh-q`g<cG{zV$YZ^`iLTM4Q^bjdJH$bGmAA
z-eDyZ;a{pn_U=sywQV@(N92Yzpzr~C?oK^&M4#ka$r399m0zKw%^1sW;3-M}9Tjb?
zz&Pg?w*^$FbH2|Dm{3oms)ve_gBP#Sq7|-E9v1jj8Ld^G2)AnT9_TFNx(qJw7}7Yz
zg8f*Vc1SRD6z_v1`ITae9vEU&T98z_YiEULF~%ADRR`*EL1RU1`pr?EqU}o2xkC;~
zOSYm)Rufb1aHAwQoTc2YzPpuD-e7XOlv73dI@<*I-98Hmonp|FG*O<83y&OR5rNeJ
zsW@iFM5~AS_Ao*un#z`pcCNOrM}&&74<QS1K35lUe83PGNPIh0WJ3gQn@#b-^}VmS
z6<bL_rPcnF7o@Lx_6Pg=L-{ajOq(>V`{@qq^-q|eWQct?IWc*No(#rx(G4Z(%zWt^
zuaCrmDpuKdTuM%(NzCS-m7^-5{XqoMs-)5N)#;JgAnFXdg+oPl($aEu(?oFY6n8zt
zOQ0Z-DEVvTgi{2%Oo>sD;;YAl_{ENYxPU?Rh6eU}BnrjDD#HsX2I)rDhLR3ke`!e!
z*Qs0YRgxj#9`}S~Oh>ZrcYM0D<s1yy>n+p!WB~la$;~YfkjLlE)hRJjvnhayFjhX=
z4gwfV#)ytqRP-vCKeP1V{7zYx$A&0920X<;Agm{C&`$q8ijIqhgRvKs?|$LRY75rh
zumvm;D14CGHX4}d2c$${-som%4kF^USlzcNnLF~8Pv|{Zfh~r?YwNK`tysLQ!Frw@
z$ew+7Cyjt>pNQR2&JB>Ay+ur0{()61rz$#8$Ei#3y3K&3SbA|koOeJ$<I~IaZvHA7
zBEWaw%~TufA2<ul4@u%j@8Q|IF;-AN_>AUW$`+$)bfD|hqP7Kv{L(}Z&)_mm!IK&6
zc=55?qV|MA>q)HX_k#>*sN7wJqy`c%jc*uq&qdaTD&*P|Z{>+ORZVAB^7xZ!%-UK}
zCw9iB0qWNKK|1ieE_*@J`|H0a%^i30ErM>#T2au#dbkISgNk0wpvNJQ3pUe&+7qAB
zU5{CS2QrbP+vp-!sLtCW1dYroGCT{_km%2#yL<XD4@0)6?3ISmL27XFU@qZYL=7PV
zRE%a~+Or}XZY$6R#91s#tEY!d0f?2KoF~bfcU&6a*(1Hlhvei8DT2OgA9NA7j~x47
z8OoBg5Ro#oBbk+1el_S@*l3=~_dgr&$u7l(rpP`3mqgD!zuVR3*OGLGvM&e|xap(S
zX;*B$8v%s!D0xm?t*FTLxh2`@YS8Sc;FI3H5ocNu^+YwAuVXU^%?CC=g0`~luq>Sq
zqxDpDsaP;%S@N(CueK)>_>Oyw-#TXd%y`QAJS)emW`#yca_g_}jw;H<4;>fPa(7D@
z6Erdiq{j|)x$AC=S_o><<spsqmoaA+X2j?B*+iT(D4h`k0+-xzh%vcYpb#&F@j5@c
zzv0mP5^UEBk`ZfSCFCl6aG&K**ZU&jll%5*d)luBwFsP^1oMJd-*`thLNkFTu1h=B
zcV4Gq9;!yFD+;PKK!A<K2K>IU_KFhf#9&+iSGUghu_CHUEUE(v+)&E_YlA!)NOZZd
zB*}|-O_oCVM{F|yJE?S~{2A!0GEe9V6LQs}Jq6@ID(PPHoj!cPzaos&7sgZIYR7y2
zS*?V(@b$}-46C;hh;w7g@qMMXI3*)X`P&X@sz*eBfZ;^D6!qK0sUDy={nNU}qVZ7O
z*g{Be6nfUKJ$|B7Sptw_{p#e`-KW@&{T}kEn2r7`$(wuK2&8MV;ijC0Ii%5gze?Qk
zLMCUIK@E^IYAYFRCes!RAF;_7;iqBcT%<iL>;%2cr69rB${*k@cJgGMvLuO4gYRW*
zDH_8He3xP^yWf2jH?5W04~L5ASD?q}G13NY9=MDEY`0hIDh4Y&qO%r$==G*4lZ9bS
z2BQt<+^fzK?0KDX4VqH3XJiiQ9{D3OaZ(hN3Mg!eiz6)8&arK2QkO9u(5cB|t<BF&
zH26IuF(P=8js-*f8=vi+b%^IhG(hv1fXhHdhuJmps3yB$73ayIfz8T^he~L54qwzs
zdSwdG>2Vi<VjNQl5W|8a^3lE}M(O%W1?pbVvPGbk6KNY_nY$_`0LW92P^^<PK=XIM
zSj0@O*UujX_EygiEs2*`7MJDcdMY3@fK;}42f@$`?B4EF4J`?6y(A#-;PP3*#ipe$
zFkJ&SEYMMin{F^*J+;IR{Z}4H03<vcS{Svku?`3tY}_HO4cK@b+~nb!shW@m<(GQe
z8aX}fbh=!6dS1QhyS6ihS8^YE(Qq4O{<O2onmWLe2)x`}3B+%>p!ZK}Vs5>>;de<7
zUxey#=?Fa_9QpmYMe;^*B-6qgVhlDEj-W8uSO<8TKr=a;o*;DTr7)}rp7>YTcJ5u<
zYzAe%dl4x{WC0Z`ev?ePcHR`#`iCXPK;a6TrN3BaP>aSvHxnmoG`Ee{<SxvuOMG61
zd~>>bhN;)L8Sp=++I!o2qfU<yi+}JyAya@Y4DBO4>Dgg~ZJZA5{Z{@2fV7MKTAQ>g
z!x`=rPXiQ=l<d7VuddeudDJ=kZ>!EK4ofv!$-cj?cyTh*t?mb=25=|r|5)t^VX(@0
z6s+kvnbWg8djD)H02~H{Z1cdZ`RN89WT4)zdn0J2v%D<1JZe;BCZIUzPG-m~{EGtY
zFl$QYEDv#?t#@xZ<ioX~V`SUiGa$@GKnJE~>_&I{>J&FRY?#PVfxtLmPwP2I3jS4k
z8wlheQ<wXybDvOb6-`$Ys?-m?!8CgsIJ{_&(ePnr*=oR{JZq%?dtHW4WRFf1HEl+s
zT?fWvbz$!r+f2WX)_uztXMS8jN}^C|<WC92m2l^aP|y7yH(c@<`xwNhwIk>+-@z4(
z?AVJ|JO-9Gv?Tlk#r|^L;fD!BrjR{^h=v#m=~o@+01=TBrA&cUWM_bhzYA{Tj*QW#
zr*v$@-)=8iICTIn-_I!Bn(NYA&T5Xc{f+88X2(V7(OYpvv7b==W&nFr13x;91Ga)Z
zBIV5@0k0{L&FO(%q{PYYy!#rScm7QoGLt5<VK!#F^mcpYj|_D=H8Djv=rAHQW+E@U
zAZ{lf@(}URuIK?78Ckf%2IZq*c`<VGl!I<~7Gsf`0&&v%h(gncX--nm8B#Kd;hYdg
zTLz3V1YOXZVCB(;P$eZkks;?zVpRyLymrssehKc@uQq$KTfh~#L6`5WDh>m-5z%<N
zP=t8Nj|HLIyxG?*>A`}!ONSDLf9q|44>X;--XT|F03&{%$j;0OcDmU!%<d|~O2mzO
ztx(ddHN#P~$=%4L%{AG|U}nFJV*qEI3Bsp6x&WQI@t-S*B7{{R*H&)yoIXDD=*UUl
z)ENhSM@?T4##@_~SN-=d2o6eup5N{p1UgJM;);6Cixj!t{G){~;;x>6ZK2_r*e^kc
zB}Fp?<)9XOw|{bmz<Qb=NddDBGk)SgYekK1!)iU_kG0T1M>JQ@X-DtnYrr8yzzRnv
z>DPDJAbevUN`TCqP!~<=phtBVym+HWhrCw7w~&}yyV<8r1-T?6*NcQYJaNBDF@Lh>
zbGy!ZJ-zIVySC|LkHquzM7yvF43VMUuc&_4=ISA>5g5btj9Nx9m4i|mAjRC&0z^_r
zyA340VSx8283kOJ**(LVxYpM<BHmP@Jy{c-W-1D0LZzm7GE?FFS88VZG9@KGRF0eC
ze??=1knfo+zi$AK3{Mt}4Pyo0hYD^aAqJ9Ll-M>bn{S3+anovY(&nDSDVz!Q0xZNy
zT5owIJAzX}Uc@?A`D<BeZNw7)Lbt955{wiB5mcfP$TT@~4V20+mFZ#;)3Dx0z|MRr
zc1Nv%3TbNX_X0L`83zyK3~sN)kqUK1ahKGS5u&}O^Bq;#G7&6m)AOxd6a~O2epXtI
z4^QUM5b7l3rtx!3mcV;<G9I|s78Ip3kOdk4;&2SF?`WurmS5T*+#czhb1Nw=E;Ivm
zH!KT?+D1{_8ND9z!p{TZj6y<rV1@2!ltGPFYw)cl^Btq36mU<fz=eHA*sk2rN(;qp
zG4hY41n;(kI_D3)hTmb6+#m~=*uI=Pz`=nMLZWRphp4};+alvMr1<8r<ppoVWH6Hq
zM-1{}&4J>Lb-P<U+WZ@@vfme{a&jjj^r0=Dq2|XC)Qh)0-88f|59-}|09PhRZ5{Yw
zmU!;?Q+}$yj|tc~yx4LC=9%NgPX<EYpVy=2kOR+b2cIcEH$I+O{+U@K-k8yN&u-r-
z314t32*f-)7OBf{HY`n*9{3QACQk>e1UCJ6c`yqtzqF;Do!aSyaFgo{v_l;;U)W}2
z{)e9^>jg+>R1~QnuHcB$Nkp?KzK0h4;v|!8$E`Oo6elsoop;2VjGxP@?D>|qo%rXH
z;?&(^Q292Y=994~=Vmm?cF=c;o(-uN(XHr}r6lNT!<f)(%ZC@Y-D{e;aBWEz4l+cP
zq=~>=6vpd;94NU!-&Xh-CO)azd5fHrQM_M_Si{!#fPP6w_Ck(Pz&eByo|m!Z$Q=|n
zp!SGHU?ChTKnu$mTQEXzQF`t-_^lzqZaTHPP&WDEcEDgYxo^6s(foXmlNk!IF0*p`
zAtXI6Td;-|ACms-mYFo4O!4bU*CDg7JiWcY6?{A@;#Q@D3huJE|IOi#p@pz%mlk?m
zf%`eNT$tw`HQy$=2`zRUgP!Q=ahyb@?<xXuVVwa<6R0z!fkv}Ki6`Nj(%m3@5yRgL
zW;Kc`g`=V6qbsC@9ok?-3O;#C=7*=f!II&4>Q0kShV~phEqU4>S6rO&BBu047ibIQ
z120!w?+L3h76Ap^;Yd%TEK}3<^->j|#G9Ye^;?sdWOSV{hjgvRyK<Pju{Unpmz@>S
z@_})!!`R}{6s#-qR2-3?fD$u_<I{}VKqB8j^d*nfWk*ZY4zgi!d%4JZWe~@>YLQ$@
zqR2heV*_9jY{zqLWu~I1uDpkD<^F;n=le2=DZ_=kvpb!=S#J={fu@vsTChBQ^cg?=
z%s>x|lVHrv4)y)V)<J|#rbHMOw7kbl0j7d3hru362iE#=QG}9m9)()dzC})qaMtEF
z!;YHv#(*&`r4joX!b#7w@1T<Lt8u<%L=)I&z^IXBs84@Cq5Vr1_wxu(PaphH*y8U?
z*;rV;z9Owfe<vXGZhRVq?ZsiAU$u?H=n^c<&3-qXy9wbgSv{Ps3xpK#A1|^Z%SLW}
zJBk*cflOKb_k=>PoNvVP60e7fDKShRSh6dLYB}s}rE0mKu+!+Tp9YGi6yV8}Q0(pg
zClSeyhc6~p40hgl#?{HEnb<kK2!30Bc=6LA*Zk2q&6uQ$>*TSNQ|~%QSCk`Ji!r$Z
zzP-rO7f5SaA@}<$TnFX@n9&3p_2RpGM^B3<oJzWLsQ)0w{mJj}0~A;>vX)@|+Y@gV
z`T?65<&^}vS!SZYG0#b&KLi%HuL$9EBm^j`e_YwYh<2n)Qm-tE0!l*Db*r?nh^szM
z8Jr4VFDPbzDT%K>V*~%;(b6id<TB8*p~`A)=arayuJi9atM5|nK6Sg`t@h}4`d!JD
zG)wA-FqaCT75FC_`|=`mmq3T-=4EhGW2BTOo_oF*h8{v;ek<bfe!`t6Mu1`fO46nS
z{m^Kbf)1hEc_VCZZ+H6GBcsJNU5=$c!|ILrp;dh~WH5|<f*po|>3CD)l&gO~q|cKk
zZ=(%ZnFR0$61<IRJUOm5yVfr+3a=z1l9Y_R<5L-EK}I!V^BZYZGEogQmJa8;@!bZ;
zlCu>2gc@YnyvFyIA2?iyP`~Bfv?O<WMPv6K`HEjNa&NfV3|_uR5omDaL1%Ag-)&J`
zV%0>c4R4^mam_$sl&24B_)gNPfe?aRf=U?HW#o2LjQnZyW*#pi3jhYSSR#}C^30Oq
z`<VId%_DBSx%TGD8VoX-CX%kU@2a<Doyp12b2&SImHd8~fdeZi*yyoM=tlf-lnuXS
zc;(|W<{A<7R;^T++Rp0nkM}1H#mg-Uh_)`YB}tfys)9@HsYZx~F~>QOh<5^scWO8b
zgJoQ{8)2*sXn`LW-;ynlHFGRiVsXNx?LM-mc5RD%VejF=-+cpLs$6o&2&u{*b}FaE
zoSrD|1R`GawoJ}JNSrsO3AJOc^+eapj|vfPtJ;B#eqT&t9){Il!8^=tcF+>+X468v
z|Ndz$_{~e%n_$09Ir{v3?xklVXI#{^%<u4v<ZlYak1rJg%#%6SMyp{aLXEZBCIG{3
zK{0aCCHL6uh_1AcN!bDx-PN<|93JfCGxYl>rui2wBD8BF3ytPypzD?cv-3p4-Niv^
zIOFyk;o}=;jC6Fv6xSZdPL`=E#>!E>Z%d$^=M&Wu11|DaCc%Dj|Lg7L0^k}y)L3_g
z6w9Kaqx}-bx<h*D>8mQ9V7jXER9bR-{wpf*a#^NY0l~0N=kr%D_a7^zBSW|yH3@vd
zHk-UD8=n_!9=481`4iEC$<=}-&2vI@syZW%F;ca^tAMy~LZqp=W`vc7g;ZQ}(3<V4
zzWZxgkj?1Bh6J!Ut4@1vQstzhO{=&Ue0k(rv<vNzCXc^~-kFM(HL>p~-ys>lTlDS7
zJ4o>voWfV`Z5iIpi;oN9)-~g(S(?a1Zxm?KJ@0Ac5b$-O-zV;qcLcp)bBMVz%p(Et
z43zZLjsN6iu=05uTY9KbUzdJqJ(1%h590XLnJ4|?{I<g=DWt)=fPSvb3}Jht<1h*m
zIU8(0)(hjg2aliX<G>mW<hudjhNQEramA!Bz^h<db4(fSO=>S&sE`0~kh^O#m;<>s
z0S>`#*^1_b=%M1S22ONLJQ#0@YRs>awyo$+hsP~wkZAe_|DdNNMc*3qhzHGa8K}je
zThSvkGNL-}cce`f?a95!sH#P9m?e>2B)&L<*&$dALSb~l5CxIRki`=yjQ1(Z&YFOu
zUmdbMAreD!UHe8$BMzl3eAA!b?miLGPqV}YN-1L5Aqbd0VY{wjUzpEvoqhXpmt3-k
zsFLzzfCq%KAg?t2o3|NR2v<YVbKZ6-zeRG>Z0FQ3MY`KDe1ayo`FmDjiOS-j^6LQE
z68!RpIqxW_ujk?SKTRU&=Lc<dIRDw3uWE*|`%IZP>ek_z)A~P_xCIJ^94ojErvOz(
zqNJ@BIu4f&1v}8wtX`97S7dW04STm+?~cQi|7bu&0IQ_Y6*CHxZ-L12e7ak1&B-wt
z?3*Dw_*ofFJy}mBY<lr7MQHqW%bAHWb=qaYZ=DzG50Kq$@j9~bl+M`lzDbS}uLoc!
z6lXUn2ia&{DKhV`q2^xsY+=kEEkz`#03+wOcZAdk`iwX6#Zk*QQrn_`S*ZIKS9d}{
zF`d#m|Mhv8U3#n^-h2IhW`&D~**uAvB>iQJs24fQdii?t5VwJry8q#QV4t{Gq-(jt
z13wBw-kqb724Gb4oE;SeBWk&huQO_Nzmi#WI3XjE!j2G>P4dd?{%rpEfz;J#4T8Rn
zBM$S0m-_?RvAuaCjG~u2LOnzk@9lm!M8xN3Q6oAxryt(E84Dr<igj|8-{TO0{Yz^-
zl=c4aw8ikNyI1HVJN(-=?sTDUT5rsEcpwZrv<Yy>Hy*|{n4jePYG;-;w$O0&%Sq;o
zQ{yUvDq*K^<zvf8(q-c7fMa>d*WxW7&VTm~GA>TAiJG3B)>$$#t@d0$g*PZGx}=UW
zZPXq;?wW1`-xlFj-*D_bZ9fb0T2BpB3FKr-VKf-5Ma0tYexQc~|ILd17p!zeO7_O^
znZ2=uvQH%FQ%Y*}jXf<&&QTUIyHBdcT{wmXEHphbdGI4QV5f-8E~+;Sm;}$rWjTM~
z@DdY+s_`Wj7aQ=MNFkLo5@a&HVMp6#Qj$D+aT>`G{9R|L5(9$WdJ8K5nX$Uy_F3}d
zC|-k6FFei^6Ay2AQLBmWh07JA7H;6osdRttMxe%k5BN0)DdNQ5yP07kds8;j6Nn)l
zUdH6d8~08L1ymRbA=A=(2otVoOVbI$ezLyf#YK>`TlIIN=%nn)$wfa6d*%b$$o(>T
zXTL%CL;GToW=1sda3?>W8JVm3H*@^ybxKb;0rt10%kK9Qh?F`By$YT~$+Tf8WwmS?
zoW{Z~vHuTt>Ay*)f4=Aw6a5If)upYv<?c`bhkcf9;0+MFWvH;<;@Y;lsfW^0{jqm^
z1PN~}xxFr^h71T&5z~kEOO5^*pCM<*LDFMvu)UmPB8K1H?o4M6oYk$7a!QMV`oPY1
z9ysqS+WbTx+*qpczi{%uA@f?|W2YO1@GZH2l!5Ho*k=o|4&j{2<Kn|U9v~cjQr*=O
z=_o!hj_CHm9(+M>3n{QOkH6qYuwnC&29RL|9;I2){ud7Yn{xc0mo7vQjiZWQn8(;m
zgkT8l9Hg6!?*_mG)450L0Bp!3s%)yNIyH-S{1c1^^%x|F5a?X5i!FrI(=cRL=`foU
z$=Ua$(#N<Qc&7JSF`NuaL?@)yLw$cLVfVHtthfHRHNx5MgDzSui4PRqn--j=-j^B}
zd@4wwm(J}My)z1b_|P?RhHM14)NF8T*n7G$(aCP*LsZNXr#>W_!xp^6$1U;5$Gnd6
z4_t3{{jv-~o0j|i-Ft07LCfP%)b?*f`=^5bL+eEZODFsxW;GFne3-YgYe-ulVd$ls
zETAiR7i9oH#Kr5NeK!suGaI=;D9_kF37CSkDl1`xvr{yG2?e@>>&1vv2J0cTY$Eyn
z$#o|-SDp<nbxbnF4tfUH_*kO9#$YJ;eJU4?kexf0<KUJM0!A?P9SfhGnm3u9=rqE`
z?=%eF+*mxOF~=5on!k#jKqBkXQ6SP)0A79JeiMP-k+9K7K8Z9Yz{jXYQZk9jT}zbe
z|93pWF+k|8aV8>2h`j`TQj>UuY>NHaC1dBJF;?P~EEIAfv;)GguT!^1W_N57UtSNJ
z(X9Cg?=9)@Nd#Qx)M!?t^Ulo_EMw<%Xm0nQ@G1@4-nOItJ`Sgo+HHeTY3EAWlSeV~
zWLS5q=vJ4qb|LqMeDq(nn&x<6^KAxTEi}O>ct1hTSJp{h1YmJKzPzIz`|k!&zSh>-
z>-w91Ia$n}Ov<RovfaP>K>oXT?mwpX554?JqD_{B(kFVQ)P-%gMo~NFdYh-wAf4<H
z{y0yIO)_qB?1Bx#*u8Z@9Cu-bNXF$fjMa-E3rxu+CsAquZLnE^VEDBy3xT66@_Aon
zf35hAWOxcaN8leKwXAp^tPF|g1JaW(x!E|5tZ8Kd+@^cCZ}!T3x4tE<=S}d=wNuqT
zBk>EUO|4B1HR|?06zkxZlN69Z@N@V=OB{lvz@`Q-#*8uT2HLdL-)oA0Gk^bUBnXH=
z+0_UU%MrYohKlNLWMEzUy9`CiA=kFO!an!E#pPkDLIrRb9%8Fuc<C#43<-2MH)X%q
zszo*TC|+4FU7f6p+7(?#6Xq?z;jbQ<5hD43?)-+y8*-h3Jb^=By2qrxz|#gD0N&s|
zn$#5%@I)Z$!>t$kd{y+JSm8S;+L&eDS~3zC&=LKTd}-<C{i&0~T8{r>Gx$8}_RbMc
zuGu`jOP)iof9*N;{OyMq!V6v_h1Z=C@M5J%i0Pg2?z?tRMARb><;#DVXuC+j0;$H8
z579kIlwErGa%c8fm5Xv2Y|{6qUk;0!Yi(4<i!2$`Z}rN!WcSB|@Q-KKzMcN=+$2TC
zw^t+*`!mig5l#W9SZ~NFm{4Yq4zz3PwkJ7JY`PZ!y51(Lr%vS@e&NZ&=|jH*pFE%?
zqu0Tz^X+|&VR#Zjra70C&qOBr(#8?8-R0nLW3fjs$o*!eNn?yZvfDw3xvxtnPfFm*
z1QC}5$@p~ihZ86IXs!^P?7~PunC1d<aftp!Kst4&M4Iyxdn;(33<7e&A-G@Y^vY=r
zPZ)i6%*)QI&>iDTdnnh2D2^Q^s6Q?_K|he^BH}yhc}LVUw!N&_YvDA1UHlOZXgME`
z2z`A0(|j|Yi}tE4D;h(R-k2=0mBbOv(QOjQ5oiW}NOb;GpDj+0-8SB{YWiUg?i3gL
z(v$Jnt4}Cd*<zs!GTv%rbG8DAm8nAfxZ>WhGs#;kh|60sAQbBNJbigb)Rn%H|Ae`C
z=h>cu<o3blzL3yL76MCSdHVsZt3&x3oF8)jS+&nKM<8ZSAMM*vj@Q(qA=5lgmU9Rx
zJ@|=rQU71v6(rGuZSRr$bn0gjdmbQ!Q!DF&Z`iOiOW)+)ZqR!{oB!GipyBEDUd-K7
z_Rj{F0Ra#V{ipPHG={8?Fz+~R1t2i`c|0PYC~z*dj)bRuXXq<WIHQZ}mnL1zs>(*&
zpIPe;whFcw^Y!X(22NOpWkiabN^B-7H~pS)Z(q|nzV!LKSprYP(*o6E^k3c^RYElT
zUzU{q0cHETLh2n6p*wq@DV|@@5$TR@k@z8}?SLX~L}1irjiv-)`3l^d%p@!~wXz{U
z9IOK~efyr~!9|mpZ25H|w=%1@U2;4ujSv*B4ng8Zig0fCx-!8zO%1#9f55qa;jR$%
zU8v3jVmKPyU?y+#L73T!cNbO2hWDD#Z;mQ@`V#07LHfclckvChCkJSQg>$n16b51$
z2-juHs}J1K7uY3Z1zrJX=^34-%CG2+EQgHoptZ;o#|0c!!(mod*csQ#7wx^H*BTFc
zpc%?2`A%zI?CwV{yLNDOXHa#f$DAKUbS?jGHtwbIf48He19x_G$SmcsYYCi|yAu>l
z5uW8Dr659S7jX-#pU5l9uhFJv1oJuNQVz<%7L`52jLYhZD<dDe<qhwr?mATuT)2MY
zNbqDWa{4C&>kl{N6$pf0mo+$*)pz>%LbafYTw+AG-gL(>)H*MLgLnH59Py&Qh36<v
zGfN=Encj2HOFLI6eCU2YWbJ^_qj$WD$rqCOc~C}o$LY^iW6<Y(1QG(j3xby-n%t2<
z*`H;`|AG-9%T<Z9oz`+IROqTDocCv+*FN%iH|8+rF6V&rFnL9G_cZYt#SiP-^U)`d
zsECeag7i4+C5{WoMvQ?&oV4wE2bO8pqfuQNb%xw_q@Q5c2-~_Y)=tR&Hn5TmbBBvv
ztHFn9lE|EKkw{v9ds%%i!YK(KRX3p8xRl~iDr(Y&0$KV229Pi3^WjcX;bXtuj~gRP
zh!#&KX=cdyOu`3S>YUi%h;=*ytVGx%ORD9`A4xj*A5>Ri_40rOp1Jg6M%~&gZ0OWV
z93?XbKCXOnjb+?#RoU)SM)*`#0<-q<CFLxKe{Ya?c9>JnG1$RbkbwUUn)QW%o+Wh$
z7a9svD7$uAM*F=9b`~1piTbpo5O#heTR-i~YzIEb={wQGoBs5{-NcdCyO>qbz{Roo
zjJs%DM)r@&YKIf>&5jX(O7OgX$t3aK7DI{DBc9B?kRf#K;*+C|)BA>U>J!fP_cQ+l
z>Rp6jh_K!@Sy^=R-)7tl6nUsHKcVU#Sif3|>?Sh?4io4h3VwX}-0E+g8udsj3I7$a
zFn6J>YW7In(RE>BMCL1_?VY_V@R0`wcZ;8-Vu3f0ANQ`F(>Zp)N|AElIVpN0_KhVT
zD3ISFFS_9o?YBTFG66;C>x*Iy(!YD6{{Cl-D^O3xPq#=OY|J^gl$3-TdZdTDn0^EB
z6KHB8aozU6Fyr`366YEch(a_;`h&@pt&)44K!WcrPs;Lxh1^-*Dsk}j194tL>hON+
z%HuE*HbcXjU6Xz%MFh->WKX%fo8wQF242or8dT13Ux_rH)8q4q&H}<uL=XK50nf<G
z2nt^_9&x=a83n{u_<@SpzP-VxU+L?9Uc{Dk&|6W?>H6jR0+m6ReTJW(Kq02&$?ir2
zxDSTF+c$VyZ*ZY>oLb8;7rofOtOWa+S5w0r$?`_~nv)E}{wEdwmj*C}fQDoB`~sHM
z?@r3ny#&NU`c5PEyl_eE%B*nk=vIZ}4R6c<4VRH6WPh|<!m@=>aoF?INe*@xAnd91
z({pRwG@L`3+@>{v8y$|!Va?;24H`p$2NGJ9tKY`O;04$U%E`9#sg;r-MJHcgnn9xo
z()##(@Vmzlk<t?Dy6att-8`L(3UmuU1z^6LZe$sFMMCBD2l5gVh~Ovg?uzVQ=aPjc
z0>PsMGemHnak|5&y85d;BT%4w;brB;)L<mWzu4>1=8ZOW1{wa}Z4dtL8T;2~6G%XK
zuds7H&sziNMA}}CpW8;y!obtik(uEf%lvH%TqLT3KI>5~C@<}lc@ef7rpzV(BQ{1C
z#H(8#5M!GenmVFOqg?Dvk;;g4&*-k^l6R-_ZCbggEQQ&h7@qvUAxUuQ)`eBe7tLKp
zq({QjF1jT}F-j0-=U?L4jqdJKV69;G{$tXJV3CB3*%-}HK!ny_A%Yy!9R8sPJ!vPz
zk_j~QCaG7KVnjOHZJ>Go=)(BZod71xosKvm9L@|God*m!WMH)KY#&IFpnb2q^PU|y
z)^u}j_(=2bcMG2hn(bnGW4Qp|TF5&Hm6#Gg=H~w%+7}Q&T~V=%0f}&nf%1O^mmBn>
zcvMy^lS_e3O!AV7`2TI@5JE*df&pmU>6?N||LAhGLONo5RC08NbZ$tcB<1~)6W>co
zIP}CHKe2#sQG8BHS}nS%2m?>h8Y7drViB=+q|K)0H1&LrhKi0RDixnAE&-jQ`q$L|
z?`-G_8N|pHty>0Sgz+B}m<iFW;y7<+_TK5F-NB;tG6&(^4LWELLM!U@^fdRR2Yu!F
z&6rwXQL-RY_vZ02cg~#2={3pptCvY&eR*yCC}hd;IYCx-raa5@^Rv>;FUgi$97n5{
z1#Qc0jL%IZMDB(dCGU1oNA`l*@EBQV_O|UCKvUp6ZrPdS(#Y-8Hvcmu;makZ^l-5U
zHUGRIhKuUggEP*RD+Lw_C7hK1XwWXYYkvL9oUU!o@*}Zz^&ep82@d$fdwPCZF57^J
zKTQMz5EA{u!hW1vbobRMl_CEp<Mp$T7DNzMe80GT4dvrWmW7UiFQ{n1$C=4*Zi(0R
zzTv!%$wm8QO!!(Lu7ov|#OF(yr|0W|;JqC*kD9T^>nk>V4B_E@3mq;GoiC@CR^DdZ
z!0#w@?|sH#Ny*%OV^sR)6;EtjP%fL@wFQiT&2l%Qo4my|^JkfI2@%jb#-yQ({rg0k
zrTxXa%~den*)^Gxo^DNsWb|Kpc!B`_;I}G0?{>{j@fUulVx}6RtDLA%W2xu3queJ(
z3818;?cxacFi=sjzW0YQvI&jQ!YRrxNi*3<k^Jx*;Gj!x@tl7hh%_>jB@F6^D3K#K
znh;cyM8qV4x^sG*0@U`W3qovgNU+1O0g7kTMMRu2Zd;?8R{qOQn*5NUV4#1;eBO{-
z*q#*3UM5cOdj~jD>hWpIw6}ZTbzjnfZ6lXYjp)V-W={*%?H}JdA>qTx;1Ah{8UjvL
z^37Is*IxTP2fwnb4s3>rfiB?ibxMtHSCVb?B38IJW=Pwr64ZS2vkK4JTk#y&4irHJ
zbaG{`4B@U#s5d;MdJIQ`i;$PUb!nxvbh$+4<n4*i_6lg1Nw#zN+IgU|#T5fuq_@N+
zegi6i)DsOe`*>I1i%zpwITW(5nh0rV+p2#98lR#&6vb2zS3{wF1>9iB<>7Ppvt$aN
z$`~!ROeu^hw2HPm!>=0~5h{X3?Z0K$-BiP+bwuT7+)XrA!V?=ky*k{CSJyp}6i)<m
zXakB}EvW$sc+gs30t~DEAJ)DqDh_SgHb@8r4J5cb!QCaeLvVL@cMb0D?(V@GcXx-z
z-QD4J_PP6<v){X)@3Wytk0n*JYSpSa7lF4kR3JK;`a}D-GqZe@DuuH9Z$ROu#y2KI
zLn`V=l~Q&8j33_ygc#qr<`h|F)g`bD_l-Dao9o)*zK@ia6xK%a^ZQY7QzjtGhg;Wf
zdVT9lFq4;6Gx|(Ygm@?2;?$h%<|gU4Co?8mSe!igW4DI_{ZDNaL}5VqglB!Hi)~hR
zRPy;e4FTY-x1DEd=f(07Y6!WU)i$vP(zhnk$75^Y2?zQ0KN%Ar0a6^Y@XOW<UZer=
z6obi8bsFC~admg-_kC1==q%9ebY5jjy1*FI<jxYKw7jI$@B8K{oxWu%h!q-oSSEis
zS+;O+1vG2H(CuMkm2}r<Mdf($Og-*PYB0n9LxXc&QR;skG+!vW(22NG2@Mbtqd8N>
zcQRdpZREbWV%B3AvpDK$LoC>hk}MnwL!rQ*Z1CL7!E|BL%t=h8dC9&liU*h=k)sll
z(LnSrj@{)Uwo8vh{lLw}6#|E^Oh5(|Kx_g(oP*z8-xqau)*Mr8O5WEfjYAF}Yj=PG
zq-UrkOeA!+l}rACSW=`giskDY^X&c9>Ze$8TXD8I)P|WUUgfFdcaId@r>w)TDO^Zy
zGr;~M3CrKR?hABA_-ikH4oAH%&zY2UH=YQdDAb<Ep&TWxIJoH&`h3xdRA-F3Jf^<;
zVV#-FPs)o=)v$jm)eoRcl4o~1%mw51$^s(q^89>&joMAlSD_|4FnbU=MQCg7?R3a&
zd&uN~Ma@1@ld=e;++8AcId}jalrJG9_AP_QF4FFbfx>yu;K<}SALY~dWPUnRDi7^?
z!I4l>Xvx7Ab1J=e>J>SM^9vy}YrWiA_z;Vy3=W4x9qL#$#WpO0<1zfds^$%#u~={H
z4xy7q8~>Y9DOQB!Pf|uzQAk~dHo4rYB$mt|)0xoz3q&a66HT&p>JilZPhrU>+=mx+
z3nu{axCi_vE$V6gBt^xA!edVo0zPqA2-|4?3nwF}^o*j##Et&}*%E4Bg9w9B{}4Mk
zGg0kC0Ya=3$O1T+fRywUOZ$A>l`{bDCt}q#g`#c==U}@jrJl0*G-iBvRcI!ZE=Vwo
zg`)kjqnU|3OTyC>>-9<AozLpy<Sa>u3N~CJ@{YRe=?a<2GBt(61na&U8sWGVWzoX>
zD=~?Y$n=T&Oz~$|Pny#bInTH+dNge1BLN9=f0PO`@on}FTtj6-?VEHChniURwBNLv
z_eGjRp_^NZe!7w_Zp(Vrej}iHHtFfula;F^`KkvZx4Sq(O=oH+$Zehpr-ItfJXsyb
zTB~H*l=8+Gyr|WwJFh<mrF?%drOyEMJ!m1CmbOFW3@oDP^%p{hjyT|^$}{8kdxKcf
ziVS~q!%apW8;{7H9jPiFR*B8oYHwVB{@!gBQm}9KOQme3Abss0R*gwb5pU#87Sz;S
z`+c0@>1P$QWKnpmEN*g(L%Ff3*F}neKm#*$d9$Yt%quU6Bj=iSWs|$JVRL=~UvpZ=
z{ESc&odPpt`}@|K6C`=YAS9>?C|HtM!Rx)9J!JQDxKR4Th33}$n+wfU@YO&I6R0j<
zXsAB7aXx4Tm>A!BwwoG$oVQ~K)#H3aB7CB0P@dmH9skY)I^m-_Z{r+?{RoVI0J?vn
z>9RrbgbByn$hiU~(WkN+g%t1J(UNc@x$uHL5#>lx&3{_nt_?%%JWiYyR64~G!sQ|h
zm=5_ZF*$D&doFz8Wz{R=6n7n&x7xVbLzMnPkUmr@vo}4BsOIEi*A$mmg*FqpPop-s
zrNM3ez5|Df|8meywm&snkVuCO(0@FA=gNbwPC1??KQA;$0QnoNG6i`4sK}dn3G?~*
zoiXj?R0rzJRggWowb19+(^0SE2fHm?@N}+3=cg<Fm^>u1%)rSFq2SnDJF{Zb>_vZ-
z;q+gcIj>>rsfA+^)C!7Ls^lPg`e2yKlFH3hBFLBKXXdbp49gnnt_`q>bWON0ya9KH
zGALf4cV|u}{VB@yC_uWEc1<d%w)Mg(VY%@lk^;6tN`e)0YYJX130Dj1?eEJ-mF0VK
zoV!#}HOMrp$yF%!J;e_;jMOf@u~p&RX5hbGzFz9#OpcJX|5lU-<v*+AXblW^#=znn
zkdDGI=rL<o`#oS5ByA445R@ZJTJeT)ACtK}p&?C!zo4P!L#x$-caof>86EXYv?X+-
ze3kNc=A92)jmx2(d)}3A(e)hQ>Fy0$VjyR0Si1iE)}b(wCOCMtFBGkPhjBSB77rpc
zbT+63b_J~rNk63IQbBu$jrdSbL!~qrBf@6Wck!8kj26~Fnqo|YPAXz--R}f`Q{u9|
zqP(??y(xm$^3>!wyD-p5$f#yN7AP@ju(j{yUl0FV6Wma5ES%$&TaWwkIi!v4bfkGa
z;o{YiIF8hKa*CS_TTv_pl&VR}LR#oEO3D)TQT+#kn&`inn4jcAcvHv`^wjRDZ&?Qk
zeVi)FRR3}BWsch&RzRuKvU%GLq{L1g*5W7hi!XxPC(zZ6!npGp(##aZ>fk~ls4A3|
zH~iH0y3OeDP;Xv3X7XY@tK{r(omlY=Ev@)O10}cwn{u<;jV_WM7A2T|KL5l6C1IO6
zsi*H2g+^n*V5uT%44<^??tS;W7xDdfM3Or`wNTro6;YJ&H56=IQIaC8--PT45WX4b
zCj}iH7YI1~$>DC%gm>KpOm~)?f<j4^^59OkS2SyxR&k$mrj|CBdy27OF*$2mn?Aux
z%OjZ;G%l1^rX)|o4hUA!R4H=$y0TW^?iN&2awSz+GJ{|Qgy^oBQ>X|6J)C{)?3YXW
zXu|3lw#>Y=@hd1a79V(cP^-o#!Ky|&c)bGdxYYxZ<WQ$;D++^LYIq@F-|u1>xPNh^
zN`4>WSg_?J)nrRIl6OVpB4~+sF81Wr)^Z1Vq8lh&OWV9(q0vr~5VuB6zz764m@qir
zP2b_^$8~w5mVML&32SUU+iuO7IaVC1JLW`)M={M5Si;YOZwbdqucn2aFKoZT#hW?`
zN50@3i7sw~!imy(aa8=501<y(7(7m~r-B@WM>~Y{hUO%IE&=z*7JFg+HqpNSCWqA5
zFTNV|`ikKovZS;2;_w7k>pCyrvorROvkXiQ*NbpB1k-Z!+EPUr0EqKir)5sIFQbv`
zthHohzupvi{es=;0*t>1p8Rp`V()<=0m@nXalspy&KGB7nEsRGgLKtYpl=Av7eT;6
zFd;&=c7OW#^_P%~6fy%*u(pzK5-hEc4W2tlq6VyZ#=ni0&0n!UT%{t|2>*VBQU+I`
z9r;8}Gf8^027)(J{g~s6aM{#yqRe#RK=BI`O8~A$lVKUwb(48nT@HIVI_;69`XSp+
zgx4wHMI%q`K;7nkBjcB3vTajiZeIPY+Dd?Xpk}E{4Oy&~%a;)PDXx?oM>{KQ-*1gH
z+ozch;IQbCy)#IdMTl8><rEl9mR6aVJUsMDr&K(y2XNGFhB=Pnq`GX6)jSG8_0tk$
ze`GiunE!1lAb=89AGl=k-d$1>iq9vQO(x|&HL9n=MzT<~F^-DVI>8H;#Kdo@Z=lB@
znDy%@kmRc#5ODW{**9`~ElY}7=_>~y_H09#&aZ_G+Y5JuX<+H+M+#dwilA9952K%X
z75c~7PEu=+D`*Lp!+MjQ3OzKAe1!j_Ka*<fP6+D$R@NewoE9unV?l`TW(?f6xw=<b
zn{x_1nwXf8orbag8h0gVH_(u?d}Z}!{+vcm$P=794V9@Lctqrg3dttc!t+Vu-yB!}
zJNF6)LV${k!6N=(Q@B8sX}9-%vy8kaBkJ?<Twpw#i&sBsuZfiKtqF)T_t|Yl$qVru
zq{DG_Dp}(yTwGC_$E}Vq`P6EiLx*HQfg$GLa31n@VFWR8W1+sB|8=*%7q>0-0t(ve
z?4s~O13OjvFXBBmB$N`K6?R&m+<)N_7}DZBKS9R&&z#t*V{TIHl<}}ffXxKV%4tE_
zTs|rvTL&~O?ana_8cmoOJj7D0u=|^WD7@&0Dl*N6*CRqrZn?5SFsl(_>F==(yjMM4
z1e8r)Y+I2PXW{3S732L@<_Ai+{5hRV3BOr$sh!M%1UbAUBLr#C<&|k2om0tqh<?dk
z<<5C#MrzQ8mzAtYy1%&yQL7k^V?BVKnrW5bh4aZq@8a4Nvx;nqK}Tqnza(l`Wv}qM
zl#NZYKdJ`%ddIMR4NgqXKr>~-ZK3^>th=>>lJ&=7C4{UQh4p-!h#r32pr$DaW#fhH
zYAcP2)>w#39-VW-w(2Al_fp%3jLqN}I;I|sd*RcWCB59Z!8zoxRZ5J(nVg|l-id*3
z&recXx=X~95tFHJO@>bqkZLO%7au%$RR_UnwmoP(Rv7n$kWttH8{S(5-<Pi?F&yiY
zzkDB|52=jUFU$K0BJ{4OHOy-rW`kOgm~InQK*-#3(eG!NC9b@=Mq_O)+pEaZ-C~P?
z+b$y6V)NE)=l?XU@wH-D9HCJMZlsNp8_3`F1#F&+=WKDm{IniIN_^oFPqJ{5d|<tK
z{0TU?3p!bSMa3-zXfk2DT=O}d5vLI!p7?&*>hueEf3rLIF7*Jw)@SfViV}Lv9LH_(
zxI-5j5T2TzgZ=dSI`<UOJ5>x~(_S`I2%%M>1R>Y0$Hyec@;%lSO&=2N`)m^4W}u<q
z<pG>f<f(c;d9PpHGsICQfRdKdRGjn&uccI><$?i`_D0F!F?E)a?HYp*Amc*kg&t%*
zerQ(CE%{Fc30-jG;CSJ1Lqdp2-x71p|3bTx7@1wwMYp*IkioFONOLxi+E^;9=tkH#
z<q{J@@Ex@kqgDRaZeYQvgihs&L7MQK+IB(7dh~cy$ylE;q<eEInaY*~+brDq;m%>%
zt;*z;BdtXdJsv+=2OGg~%=qfSo3}3N?Y{1r+*26Nk?9`)TpDex1BEmESzy@Ia~IT-
z%l0{^YiK@hrn-)EEjBMmz`3_qr0gSNn-U;+B22nI!7)Bmu!kzqh8SXV`{q>Sc{La$
zKXU-9&R+FldjeJpdm_{M*@&=#YaeBDF#cELUFqsa4;d!ituO(fF)c26YAw@Uk5y1l
z?U)vm{O;3K(#Yc=QD<{a<V#7c8q%Bjm~tJT3L=s&AiBy`w%W+6c&qQ7=aYBn^F~OQ
zV`_O|5)=wF1tS!Izo<OC;6BFX^<^bJo;$L8lORXTT^~^~!*YgZF6O#du)yWK?l`i7
zCA6u@W??=ju&jNxa<DWdjfz0()-q*LkBePfM%@yTL>fjO-Hrx_v>$V%<pRg3yW8UT
zFyny!r89x|0zC#w3bN#s)|Kd{jct&ZvhRg)&P5AGd6||qJ073UwKuj1dQF<OA6c>V
z0|o;ZgN8CVI7%Aq?~WRlZ?9+oqzPPoz(L0*#Ic2F`XRpUoh7#*nzUwN%5@R@(b+YF
zGpKnqywTFo&S3b)B4|biBzWYk1vIPJuC|h_G|7i`uOse(2+We8$!Ze^aoLnD*>5P~
z1SI!dBq|x$zkZVzD+vK&%Ju)r<!yaHx%~D9X-Mebxx5d)4z@3AyLV_MZe>BbP>91Z
zS`iWMYbSVo-A^_d=(z6zL1rf7{l1X(>5~mN4tbwHqJESM2pEDuFgK(v0CBin1y8t{
ze)WJ;(7WTWhI#VJRD$TQoY&2u;8zo8pI3r{E5N*<MovF~3L6<xiFlmIL!ElpbA0d=
z^7NzW)*)q+S)u5vQLsPAr<Z=_SthIKAYL{#>{-dPJ|LD<<R?#e^6vV{S~b$RMch>v
z^kUHDMnI87Q5E@>Adx1OuCfJM@$VQyY=12nxX<|G5Yxc<5aMQ)rVqY(u8}6M!rP!Q
z;C!={(Fl?>5$M!}ygNqH6M}W^(V_+EX%xvrpR;%`SrllgB^nl0Wjd6UbguuTQd88^
z4RwYy?}oLdj;Ofc!Gx|Uo*S~=`NDSV2Cw;Juyo0pL~cG7Y<e_?^oPm~tK)4+@7djL
zMuk!RWYsRqc7PZ6X7?!(@-K30OKhAs0TMfa!%8%b$d8xmeg8x(|M$uPqrgec^|AUa
zv&Dt8#iMcJvt^Fi#dLR)53V+glzlP&5CXvTt~8|F8GdpEbo%B#f>kq`#uA|i$Y^@1
zleKA;RiPi7Z?jtK0*rh+E=1*6LD4I>4k5#gbz4}6Uooji_#@8}@@k1h{l~R3>1_qP
z=bNl;Iz+<SjB^6QFYAC3?M3OVLbQ_eqnaENUuf_0VadYx0VHli!($4~dN+odnXUR|
zQ_ZXOb8}6gBhXh2{b!AmTisPx9^fbvHgfr3h2KY!8eN@0vu^*f@W;nttPmKw3QZ~3
zh|eO05!;6`_9GXJlgpgSZXEG9Xn*W3(8r@SvJQ*%=->KmzkKW=BRhWbD<jMn1Mr+Z
z3{eA)kbfV$YqHB$1gEHX&}n`lPxt3G*W!AzbA^{u)mw__xPBZ_P+lz@#CqGj1J3MM
zqts`d32C9dE+*Y6O_ewg&jLbJBfVb^_wV~3Z7i8N8d)u+L=dq0k1xs=HpRYLOSxG?
z`8vL;a68r$_^WpqjCP+LChII;KU2mh;=!LM_y(*|<Ex~8=QP>ezxtLkKR+p?n3QIE
zBsKK=Y36Ym2_&xui5)Px(U-@GBuu4(X4I#;_)UqMn909|18QsSI&!9`HL~lPB!K_R
zbU;rA_KI_8;%cpr+!MF4aF0P=LrY#7D~PVkt(CluvxEr>XEr8PLX`Vct+dv8B{TNq
zpJ9~_Q%W4}9zL3T;aClD{vsn<#i9kIj4fKO<B69;X^D!N0##pjT?GT9Is>R39j+40
z5{f<FKFODdHH{0yMZtnoY>7ziX2e&Hymp^r#W4Ddr9_(y82GY~?)&NT1A%4Ng%d`T
z>F(6)8EWdJ=m3(l<)aAKj3toyb>WMSa(ha1bEaHuVB4u>W`HyZFW4xG`lo4Hys}dW
zQDU?|;!;zs7QRT95w^O~8}+CAPTC3_(kbA?@u%Atwim}K@g(}%7y?(ucxM49IgtX|
zSL=#*$V`L;gEP7OD)6887c1KLH6psa(^h^|-*l%*;qAqBNEk03X1`;dn`sc;o-Lah
zjOHw})j1aPXbep3eYZ`B3w+^@5H3yJ-<V9sY@S|TEo^^pinVW7@U_sTl?9qa$7Gxt
z2;PUXJ5Riw1|^VD@OA`)Zdf`a0zgprPr*#vkPktVErETm2%d8Cv;3N;q;B{7`{Yc7
zqOOS+Ps<i&<koqb6WVfNKfG)NU7vX^ox$v^65Zb&ah>8)4<gPpa-~5mRkY#M2^G}i
z1rL_2KeeZ%4##jM@9&800;3LF70BOe&E_jiKR2E{UOGIrF+82-J(HjB8H7r~776dz
zu(wwHBHW;;JDc)2rdi(v=l;_s*^|~#OEz^SLONrGL^0t}>|^zG@s+Cc6ZxuHRlS^O
zZzmCjJGQ6JwTIhbn#f^+H@o=t`FK8gw4Ts~jV?bmd?p?2`Iy{=aBqO@)hu~SZB?q*
zWf#uiA@Z2(`rI?|S9V=e-_9v_8uk{l7cZCnBIC<;-wAjQXC?wcX<f$G+Y*!`xBg{?
z)S6|xcbwZK26f1Ekc|sAxY4IoV{uJhZdrZc8fs|;8F<xdwZ^I7T#)IeK@YZ@;Wota
zEP~SOB9WsN=oo!-Sy(|W^<d%K87hSB?!HN<A6MAP2^S^_K<Xn{OT|*NusdX$WtsBW
zN#BsAV*UoP31lONMbRP53+p_4ry)R)NZ?@nrNou`!#Br|o9>i3ykE~t#R5Me!$dJ;
zwchPcyo7Bn@Z&pQsAMhG!}Yo#>8F!|D~;CSsc+@5`u&#Rn`>R{h8_eUIxY_PDoB8+
z2Iac@8-b!1GEis9$@_lM{Z0io!1qP3iXd^vbTd?=*{eFFl(v56#2sahp8<t-)7DqP
zqdJ9<&!MJKLZiX>!Ko|laIL6P&RhzS`q=?fO4Srg+L64~>ZS!$j#B_8Fk87$R`26`
zD=!6=lvdUFFE|90B(6xxG}+*hc1E}?H({=;zS>wgvLsH~88@^X;o+ctoRDy7WrP0M
zuiKp*uNy@qCHG}GVkn{bK-7a30VM6SSXYKtO%DwU>eb`gJR-*oCq&k{i><pK_#MWx
zn|giW1$%I1IY1!V*(9K|fp*p;50dfw&)pT)N8hJuCn2i~a3mC3Ta3-wrRh8^fO*<(
zFIIs{?*`zz=d9X7z?wyys2guJ^qkUqMQr&%egMIVNH(EnAr(e&1$G1%W!A&4-B$1Y
zCoL-e6L(&5q1d};R@&_9H^_?_Yq5gr1ALCB1-%wo*^<4Dk9v3gix&kL-{De<?hXE;
z=z=np>3|x}yL&y3w|nFhF5+)ICtdF<qOD%;Dac<?CF8{EwhuP(sB%G9X9~E2ZO}nq
zL*1*7P?|#lx}&OWfMg6<KkVzwp``vRg!-`7qPnCb@#TY)`OhtYj?`bg`4L<XCjEN)
z@ZGVdt&xwOovq?nlq+-6yL+WzQDI$e1%gpNoCHkCa3*FZ^UjT#*W)_Ua^)d!8k7<r
z#G;c^`4kNoQ$Anu(I!ox%Hg0&F<lOTe<LHhD{T6cwzQZZ%j(-Nej`;#hgo30Ux(0|
z-Hx^dX$ywRnwpympf}Rh>-0gwy0@9IJGpe3J#k%9*#{nr#%r%C$t+Z(Pq6<`dfm?d
zp-at-Gc4UOS??*;t403uih+qDdU#EEc$7)H^Ud^EP<TKz=cOGpbLWiVkcj-8^GVKn
zlDwtvr;2xPb{2L$@kr*GN9Utp`}*F;V4W$4g~*<I#pCm%155Qdz81QgzGPBq_`F$F
zh^#J%clFBdTIwg8P{I<G#^f6$AqT}wQ~7o6!&e{`KqWn5l?Dx1L3S58l#FM1x;;!P
za?1_9(!8HUa$$EQcOLX|n`p2Vxh$SS4USzHV_#Oj^c&f=2HP67`c$`hIF%vfGp$CR
zh5y^MgegcePAVfm)TQoIkcR`4m7&<TK9UG3dKL}!+dFnn5O2`*Vj5uA9y}dg)u;7&
z0-i?ZQ?s0$g0Lm^X)k0rscnd@g0}lFwac}E93az#bOmrN4+cxZsl%Bs;J|=T<bNdY
z{2iL<&s$agEiW8S32F5#BJr}D6R-7rz_gY0<#E+onLyQ&Rfufmj9)4ZRRZtn64^`y
zht`EoESvhmeA|eKowI{O-93H*Za;j}Mg<uAm$0lw6sx5n7kl&T$;xyIgnx=S2kh!g
z^&+QZilr!K=JR`<Gwy|=hR6!uEe(OMKRG|R;HMe*{{D~715T@3v6-yW?larO+HO_W
zZs#@IUk5_mv01C>p(H=exW^%xLEi$~_u2Id+N72#PZcvZSzB6-Co-?r*C=g`jE%Za
z6q=to!^Y}$(6QV)>CSjoY7Ev>>qr5M^>`^1XdQ;NiWWS_-&1!&s!n4WpT8S~4t$xw
z!JIvNkLzE$%SV3EswQAU|6Gt?J!LJz)Dn()dYrMTnSzVWMCCH#pMFV7U#S>5_&8rk
znbkEu7a<KXR^Qr?F}m``<w%TicA=+R{#=8?ftqg;zSOv#eHhy8(uDp^ANJeA8v2cj
z4I~1Q26~&a34Y0-b^P8F%hF8{c#g7|gNA?ki%H^aBJ<E)5NrWFEh?SDVxV@`<%3rK
z!m!~x=M?BNX7^K301)0HDiCdW>zQG9aAf!D>Rx7qkm{|mtHk2nTRZ~Hc)2t(;9+zr
z#Hi-IL<Xm;+dGulP*_4rEDqbd0vyT)oqnCUQU>ZnWFGI{Q9fehK4#ii3T1B^5iNg_
zp`-K6*p!7RmW5m!>xdo8ieL8<<)c5|a3mf$>C8=IPhLc7db`N`v1t=0KhzyhA;;a1
za%v;m119x%PcqF?nu?DjJdZWbCo<ltgvT?!&v?B>e$O$Eczrb1N*du=)rG#`aV9=T
z>%OQlKK6=>%4<F6#@xD&%!+Z9Fm|>xM5yClxs&KR<o7Q-x=7x2raY!mZJyMj9=JpB
zx6yFw)5}>1Bq{H)SJv_lQT%!}(LOpiw82lmaIzY5A<J$WuE$85I?oO_9spHkZrz$&
z7mnqj>ZfsRc0}T=w6GrWlE&n!5R1L$yt?C6lhwQLPrJ?bEpU7FVj30Qk%8;U#;tu`
z9$Z}K0ZBG*<f1u|1BE0TGTsMS7F@iOz?a;=AvE-ssY#{CPM6ozVL|;)srjDFK7R4K
z#RU(K>UnK!?ht(~-Q2(d@`1x$K0fRxOkN9WHef*A--G6|WXek*cuGfj0u9q#fnOq<
zm4GQW#=ogNfAzd`Ja09#Y0aACD+7%?f2=hRcwF=%Wx8rM%P;ElF%7@d9_?2?`&;FO
z@7v(Ot~HTz%`BNf6p0$@tEsrY(uw4|Fj`HW@lC8nN&eAQa1vvX)ZFGwB5IRBc4@U(
zX;>OZw_BG|G6Ml7dzD&*8thKcLOfPff)l#((QsOXGg$;^GY6-^1(->obWzjHR*22T
z7WAw<)Zzh;LCv+h69Kj7hxYx%DI63^mA*MXnRH)iF0}6pu$SIX*x4W5K6V=t%%VT(
zfJ_C*sujx!;Z2(ch5d36)y#27^woaMr*Q;cJB^SNk0LS8d_?MD_`n}p)kY?8w1cPz
zz^5a^gwtQyLz|I=u~3*YX*pjrbrFxjO8YbQSZ$Mj`%#niyura9p7pJg@9a@?LOAK=
zbdG_eXxkbKs1GRoGS%=5b-yGUdu9Kv_FHo|`KgL*Rar?KT9!-S3FFCZEoBmP#3<H+
z1$`zWmXfuvv1J(yr`dPShnl0r1k&*Br5D1w3tmqxD`Ny|njLL~ydh!#?tCzx3A*cq
z3cw~ZdKD*^(+rZ6*^nWu!G0n4o-<}g9M(}<G;HA)eIEj-K%aJT@6kCiyqsjpluCeZ
zzzI07R)+$h`e%d&HJpgsZW;~qLjQQwE=<T}_oRNz&&-jWm$e4ZS1h}kwS`5Q7;=BI
zZ4l3e6LBbAh><(;ab>CX)aMd&_chi#_k-|E<hgzsO<Q{UZ~eIi8IUao(tu4^r{r)V
z;xzv;N<J8Jus;^s2~8BuLjS{Tf_0cE3rOSarB49tj1)9Uw<-BvlBE3ZB3L@XJXnf2
z9!<zj*FAW%?%<BaPeecgbdgZVtU~T@YxK|ICZFd=i37UPhF`F)bL?XESOvlBulVLS
z+QZLgIybM_O9_s;H4MFgnQ9T3nQ(FVAyLlkSDs))!Bd8N%4~}>+8r&TFh=!YwZb{t
zS2ju7?Mt3=iLkDcRNrK5`!8ouwHTMnW(?x+rqSGfqs|Cfs<7{u^!>R}78ijAH!|u>
z2GBj}u2z{PrFvl1{gJuJL7f-gre)putY0bpwDBN7zi_kaqTi&q<rc_YKn4?mop?Dq
zPL0d1b@rH1B|_U%qV>KzbXS_OGE*rYS5t=^?ks7n%ZlavI-Ejg!zu+=CNNrK`-)T6
zl7WJfwSJqa-ddrLlb`glCuFIE)ivBw!}EA|&plzYzG>r}8ha#xRnyF?ihf7mK{CGg
zxn$onbLRCexh#%{4JbRx{JB3aj8@w}-GF-VTuy({aX-OfXb$aJh}G4tLH2csxluDD
z+5pEp>w(jXR-Xr*)vkMSW!Cb~29km?x4?}Z7v?Gb*Y*XIRo{$TQ1HrAK1a9iM%`O8
zZ8}#M`bKk6lhSsPSZDmI)@Q#Oeftvio%q8qm>gT31>GGu4C=!aO<l(a;R`2!MO{6;
z_S=>Dc_Sc022YjwPw$l{O;X*9(Z`w5w6b*Q<5OVm1QpN*tcW+i%HEMdxN(2Du^dgy
zF2=&$GD*W}tya1UA5*WPe1#&;*uXhabz5KKMc^V*cO1Kwu4)8$`e|%s!y|5yMne#R
z^d|IGV-0~CjI`01-^rW5YW!g~vhnm4r*SxUd-D(y=VW?KBO`L{wE1c&7;7$}cjb^R
zecrxjd(c!{t;!wzCi^03bd6)gMMsSjjOQ`ql=!!i&&+d0Q&BSqhA8<SCPv7G2f<UC
zPbLk;#iZiTKkEc3z!yQ&H96h#Z<GH6%6@?#(0*4wFM@7X>=Wa$s}e(VIpX|`C8cx2
zGg7H2Qr-jx;ZtGYwtOvuYVzQ4k>lN5c+$rxS#so?Iv%DU8H`R$Y%%%$gpmU)%!<=?
zbRBf<R2$m%N|eX=#avYbT;<t}mx-P9{WlfSr}f9=kBKo|1(QuOGqFZ_YoWJo1fKqw
zCUl<$C%^9)GmPh<reGC8pzlxyw@~5OC5RD&D;9#08=$Lt!atEWe4WsAX3ArI6eWyt
z{t;I~{7@Xzig&&Jlo&3et=)?9FF*b((tw=WH%AyWUxCxmLzjfV|Muq#958g}Ba5pB
zc^LsMQ^w#*vFm4;cAyzp_W^Ad=6Gu6mhNtvoBKa>z7UG3EH-Yfs=r>Xx=I*fSr%I}
z31eb@AiS6_QJ==Mcbpa}-V^VEePm=3ht(S;D<JMNA7qS=?~8TS0bT)F)mw41Sl2u&
z2HNWdwwH>3f6c1qn72KbDbh6oOw4%x!JPk$i%tQ$c*?L?g94}Mzd^|d89f*BrLU&3
zkYC>~F8X_T!Z{A@x|u$mrxUAk(KcM421+a!iY+HjSe%<Qmqz;UJjeg~(LbnWgFk7H
zw6IAhk>U5xAO0`s`_C}4A^9ait407-KK9Sm{KwM%`vZ}m52mJYzvH~>_kaKGAJ+Ac
z4+Nyy?3(;=Drw69cH5UPCU_)UDTC}CFV($5WKU(~h62%iLY^fUj*0>)>0BM&e8#df
zD!Na++RC&0HSr4x52FR#<;=h$S=fJ0^shPS2qJ@$j$ptEhw0zT3n7V5Ws!Lgow<5X
zc}}>72L*aA=nUydCpzP!dB9PFpr~NOilv$RKL+}*4|Gg0CN4EOu4@jn|I2^>v3q|O
z8lNaTlg97l*MZIdGO+&%GU;DTtjr~Y<JqEyNthpNPe;RW4O<u;V1Cy(IwE)(ca0$9
z;!ZWMuVY1wITns2Hta>O>YwVKqCC4xFK2<a?+isYFYH;eUg+=H{nx<I4<WmkX|Kn%
z@6OkFW2@;V(v664W@Ixv>XAH}9MZ748}0durssiXPwTcu5hveW3S{(a_I9U4RTi;N
ztS`v1h&iV=+r_{}jz0UBGQ2>aP`FvjN|si%2?5!Ty%v}0V6b?}NsIkJc9RWyO7dWu
z%)_nr-zGIl34VGNbVt{v7caHE!m4Xqx}=nwCXmdR{M)!OD8Nv{{TV}J0R@6Hp#cLK
zl?PRyJ+kJ;^dppcf$Aw?-08JxxiuEHnE8p{^dE$W0`sNIvH@ED3b6GEDTbp3KW!?m
zQ1QWmo2F?RvCsmuilCq>U)%R`Dm!g&m_ePF#Ug;g9Wx*{5_3ADiegy9Xn{0QShd|V
zWb_q6sZ0z8yKyizsZ3}S!XhE0l0%i!m@_V3xzfHef#@I%<@)R(0)h(yu|IHeRePBI
zH(bD3g7dZ>7i8%}DSQJB>M4jX(3#bPl9PdJxK-soh}`$;NssZWwuOIF{T_m1E@HRv
zIrm5&`OMrI+RS919?#k|%1q{l1}pzrovyO)uBOfY5{-o2>35rGV-THhg)8mQ{0o}F
z_@cy4>1rd~fut7u6D_-Jt!ko7%g;mu&6T2>lkX~&STe5zc@YiscLKN{{Y=QpxhwnK
z*;MliM;sY}XPV{D0`Ytj=JG{TyGN$?q7DWpK4bai4U_1axrDiz%>#f48r{=aR-L$+
zNYJ!wB0&DCAy5KO@eg<P?GnegNWCA_;(ZNwjyXew0Dnvf^5;O`QCXtNX`$Aw(B%;p
zY<#vUXmc|CS|zF{R%05A%-Wx@S0Cd`hJ^La8mjB#hHrc2JfVYy0)N&(Hw)mS?&U1j
z5?1Kh-!KBr2U=dRPLrB{o6KJxgf0v@u1_O>dYux>V$Fv#wLFl!;z?#xS;+StQ=EjE
zMnISh&VucsyEIO2MS<cF2IU#>pswVuG<+<<1r{JyVeEVMf6wJlnDg6LI}rV+AMyo5
z|Hc#KNh`SPKW{o9LPo(brky?|kkq-|>>wJQ3S%%osRcu{zgaxXBI;do@@H!)oVYw&
zz<o{vcpa%uP|keEshQ=HaFqF<eDQxk6Yev(AIq2J?CiWhp7D1w`QM28&tdoofSWnk
zLW`IEbJ+jbkN<qg<^+BFFftMsbfWtI_1knVFifnh67urb8#vjKk|5IuQMe*#h_)Dx
ziT^hJf2G8K&%)OiUsijKs>6H2ZRve@$@f6(Spl3Vw9~ikli~gN8j;yJhw2}%#q%BP
zVkO3j`R5k@Ygv3_B(ht|f21C%89FHa(uS_zjga#w7cZ5-BB5ay@82O7Ulx3Kd}Wq(
z$(I|3JtC%}g4-t*b(4;jsn5+*1l^IH$&b?u4YW4KBONu{fpgz}IclC)W=T0#b!wRp
zGWP!VyRmWPk<roa%pqb{nOR~w8GR@{lL{tJ@5vn6uM&@e;yYDh>G1aydh?d`%RdhT
z153Er)n&<TFKuHPN!0rz@b()kJNPr5k9sMtsSLMQLsU^bN^v-B&@(Zlcy{Az$%cw&
zzh+$lzupXJWZX>Q*`LDWHBM0UI`Z0staXHv<l}<PUAk&)^uY_~^690?pO|vP3wHk|
zcl2Ik9`(QM9}+Pb4PB9pbbPQVyay6-OSJODioMnyKo;K^Q9)GRvkdTBgf>!adztat
z(C~Yxt6u18*-xaP0U$=0*OOmRBDE!#Yq;BLXZRL@!g>6(vXOhRypEG-DU+TE61(lH
z6dVv5{B|#M&&KL)-OTm$-T+b*TPkR)*R)eM7#z#W0Ld@SITh~@|G1s~bPSr+(gdG4
z)>)de&Dwn>4`oG@b~jN^KE}QB7DzR5+Z@(l70wy;?vq_)*PZ9qB8=+&uv<w*NkaV3
zA+HFUBALCRy@)hhi@;HJIvF!FGlQO%1w*ql1J&OSKch5pSG`3MPPp+U5~*XVFX)yU
zTwCL_Xp>y8_ubN;pUkU9`Y37c$hQ$Jaw|puIt->u7VjT@uI6Ftt~NT>(jud#re?70
zXBrw#B_FV@;=UW)w%e{?742UP=lNY(1WG^X;{Rk&<eJntW&e-;Wg|k0%P5X$^8myu
zAJ<cA9~w@0e&-jpNclh|ynA_933j4SuTFlqtg!6Q);lk{kSzdQ%)IuO?atF`5f5S5
z%E`}@M|rrVaozWL_A?r7ttvcI;I4eqqKH2QO}m#pZ>Ml#l=1cDbX$F=<Gm5hvR`5z
zz$D+1TRl!Sx_%MDeqnaW1PD(9-hX`h*8ylrc4BTD9IjJZLcYx6@t$JobrnPPVNZ<V
zyyX%ZLWTrWd*S6Nn(J;i<7ynVPVu8nVCZX|jw#&i>cG3RkZIX<d>P)i@CiOOb+z4#
zt{%JNj7CunjZbMe;q<ZWH1cm(K|zMgqo%R(JI4e@%!2v|5me3{r!*lE5zKul+x^2G
zZ*R#4`;xIc@donq^H4-8g8rjzCXtS%#~`W;<)>A7yz&`5dqZ9bYIb=$9UIW|bHaNM
zk}hmg@$glkM_kXE+v+q}LC}T{1myw@fU2r->3;&_P3VX9;3}%pB%-P$eE*+*8kf8?
z7fNmWTjmf~U$NDciJ#}w6H@IXkh3tU>y!<Lf2GwDGxXP0cKiUj@6a+Yc8mvL4euKk
zMP21+e1Wrpd-@2Sc$v5DBw6~GfyNCf6vOV5PT0}6ryJ$@zmjq9(fW!4O+947FGu}&
zA#W6vBUK5S??Xv5t;69zbuFx7sQ~N}wP=xNSzvI}S_iH-Z$V1p=Jsh8ZaeuWu9x8x
zqG?#_hDUvebjcv)iv9BFbX>~&R~rgATJ>G>%mqGSG)w{~t_-pov7dI&%4p&*#U?Xv
zoL)7f=$200aqFfICG6|uQ!b<!4OFthj9FbDfBr!#a_J|XHrdhoj|(N>>sdGa;@CL)
zT&N~57i3gbY+)@;XE3HyvB{xN7oitq*pv|#F;km+E!XY=Td!*tyh#J7>l{P=(Ic8H
zQ2oCV5Lq!#jy9-;h=7maF7DxUc@mItI0|mW4{^bR(9LIs=AH<BeSO1CFx&b0#rf)a
zBQZT9h8%E=iCqEkLLwp~<?M{R7db?=UxBpo{ZRhU0G(O-j|2}OB()plzU|`3Nu`9>
z*dI?26BiGK8R)ML@y&;F9Fa{DuKJlLCmAf{?dYnVPRUM2T~QI6n)rPaEpzB>*@$y$
z>L)G~TG=U#>h^BmzE-W%t^A*$@+aUEZ4e%R{bx|&ql0X!eojKPPV7%Ls<UDH9A?WD
zXcIFYJG|v~ZS<Pr`7M^7S*&)iXn4slp>|I>Xy?Mb&u$$t(Fw%B*k9x|opimq=B)Nv
z#E(k~k2#8kay^|8*?mZqb3Klt=}+sx2_qF@LH+Zrc<itCQ`i`|HYVM>Vje8bzfPL?
zX&}cN_s(vJvLI&(128>F`ZNkTSoqU_#a@ynyje|lb=~8Gx0Jn+phYuN@foy^LyL-m
zsp}l0ED}HRchQ$D>9#i*%29^hMat|`=kbm)t;?;}8lIRPJ}Db;#?(+rderEjkffiW
zg)d<mwxQ=?AYC08=`&l2n2;Tx!_=1Xtj=7q=dQ1loPRwGo@^2&A`|#+I3{sRN2<Qf
z*}R=z{`&d_UGr$;qfd(qu3b{m$S$27S;W~zq>kp^yy|P2U+sB8T_J7<Y)YJvj(yG=
z$>#;L6S8t8PhMB4BJCrr`C#3Cmp494T~86t2MOeuG|=-JBXA90!>)r6uwzFmn||AW
zdA|vnu^TcR@g(g@r!!ZsI%aBNVq!|xpy&DaNz6B{nFFVE#oo}Jw&Njmc5aSnLb?#U
zoX3PKX<E2Qj4E8;sYE0rNn4FlDk35_CPmk-f=**~ZlNlc{!sr!v0!6V)uq59IwbA_
z946cv^y|!-L3uxEDcFDR(WLGDTpvQXhH~7;*v*Hp$O9dlCJD2JlX4u}9De^ZK;rK}
zCLil=Qmc0FOboCjbkc35Z6xW2Ufsu!?4A!L7~V^=z)ESLdtB~+GN84SOJ8)7wxhk&
z+}YlEbh_*Bz)8ARi3eQzvV%)+viz+DkkL$>`SL`=4rmudsGyBqQg$N#Udg>qd^t3A
zm1s>H8(*U-jMt#wRx@@A6bHpcTlPc0TYhfkJNn6PBo%-R(AP%zCKvQH;(ZnENwo6j
ztsn%p0A2|s>4v3^?iiFygyYV^fq3ZKYSg`ifMZy%PS<O{%(ySB8U%82;~+8m2LK5Q
zJNMNEmvUYteapsDROc@bN7KBoI3*0ivTn2CL((^NUT%%scV5=}n4@e*8tIe~{qMi%
zQ?$I3+87shq_^&OoOUL42iaP&wX~_Ch<+<`gwHDzt>Q(Hi?AdPdbuEE_TKM*!fjtg
z*L4~1@4!pKk=Y%ZH9V}vUT@r&UKkvKh8J#rI2)o%Cm9xFx46rA%l19(%fg9<bm0o<
zP8&L83)E}tt`n*qP4wHn8Jeke-^W$;xFigTIv?y(P**1l8EW4gXu8l+S6^X2b@OKp
zi;7|dq6r1`T@*v31=3%$KssbjPGCvz9XW_vjVE}Ea9uytL0>0Q!lKb0V1q|s(Q-Em
zlkOTgCZ@E=ybSE@rf@qe8M|&S@jsr+PYh+;uTe13{6x31Iz3X}ZD_Sh=*^E@{~4A@
z6HoU3`dg#Mj3k^!OioFCaVankemw2bFOf$3B)?@IRN>M|Mw;v}3K|I5v4%e~Gr7G;
zma8)U@)<S9q;3HPTaZjxm+Cy|>K3Q}*WzbS_@~bbhBhdZ?rt{PcsLn<G6tPam<#Ho
zzTj6GT{bM54{jsi{(i%%Ov^-Ib?oO2Vs-~}gmIl5(Nh7NwkGi@c9+T0ey}SYm-6;8
z#v6`H_gktdjzWd>JF`kUmuNW-xnGb+;EBTSx&GMCem>&7y?f<NweCT~jCFcaVn54J
zdZGccF{4L9W3?6V?7EPrEx2EgAbGR>L~OOf-bH3ozhK9u{xW$mJ<$S=f-94!&pBp_
zBP#3srO(bqvd;~0J5en>6P=zxFp#XRe=-}X@#*f`p%qjbG}+AM$)b>OISnnPltMsE
zoZD2@o6Ye5P16(JMspt1-x4V+k#G@OB0R_!NR`L>v5gMB(Co<&!wv$D4Q`EDcFUl$
zD}1b?Ire;c9gn3yj<cg)w0xEIMjFKWB#rVkHQujA7q@v85FtU%q;~ey&&iiQP`Svc
zWm-dTaw42v>F>ZzqCCrj5>Tx>ac$@8-y=<HdBII*;vKuWo@I5GAmoo7(_)sE#yzm$
z*?<z@m|m2JiYn?k4wWKrE#sln?+rkOg&yqkmA}vt>jZDB$DfdIBInh=y8*5D$bo8?
ztN{=i2ZcJ~qM_P<rARtGmIeJw@^!%elusNBhhk;(rkTEf&C%U`(F_R4lG5zC9X9Cr
z8~a?)HP{dW;q`eE3JWWfgb+?R$tj>oj98nh&zdf`4c>AeYh8PcdQuIIuVACS4Sx;X
zMuh$<S`0U}+gcQx?FgQ8`>Hjd$q9@u9X2betPJehvl~KB18%#j{QyO~%|EBko^3T?
zn5oA{4zw0;7s--^I_QsKvcH#(VGv?bM0Bxf`I3Gh@=x&de$+%z%!9>;vzu~vi`K@+
z$^6Uw$dQ73R<3JQEbbEznnYm)Oz0>z;L(1)-?T><b=`A!wDnTaWDfk&rba#6_t0q>
zIP~brgiSC;R89@(e~35Pw(LRrL2u%molk2IZ!~&CWoKSFN^u$WsX_d*#w;YDD>Sf-
z`+_ew3Fdt%04gbnw^nud$}A#DKu{rBbw{dm<!QmwqhW|ZZ*IT-`L&f*CtX+NBqF9|
z*vLhm3HI3T1YksgS8w+0RcPo)MdOYaA`!y>fZES~#gqHY{e^s_j6U-#++`AT<4D-<
zT8A2qqe}I@WA0>jceSU|0grp@6twT${IU`1rfR0USj!N7?a3Loi#aid-#Fv?5|O($
zRkWHvZe9-lS7F)b<QGfMjIs`Uh=gY5g}R{co4#_C3tk3BMO}SIjHZ%QTul$-YC&JO
zIM1*%8_R&ac{3Y&IIVX*br-8j1X)To<t{5<QQpc++WU5@YZ;BtGTB_f5;H+U8K;-_
z&vCA`hj?U?+&uSilu2#)6$975`9Fc}J?bo$qyathJ|VjmxdcU;S(?L}(7~VtI-0Z6
zqV7nJC%UiOMhy`1MzBr1?|)oswoIZ@WuXIp=^CAb5N72BfgMoDet+Vyu(49Wwdl&=
z4zQ&8v7ut=+GuDU_eI1QrYCCY`a<QCp3VjZueR0c-NR54^%lz3m}6yGnZRHzI7URZ
z@#wewTu@LP#HX`)x3}$2S{H7w+H}ZJU#Krpf91nR@Pr%}-Lv@MQBm2Ys#w7s4C8^R
zpVzg!J(;DAPL`#Nsjm>v=Z)Nin?c{Ri<cAhRHuI`@vpR;gNqHop3bSNkGgsI%BTBl
zuXGx!E_sA?mbTeCRDl2w4vkpY(8*u7rjg!F?iU>y_&O%5Fd7x6eau6FHZBuv3{$&}
ztt-$G<;6{RamCI)ZTl9L%>I_AsMWK5>a1U5C7Lcn*DqN=2$z_Z+b91xB!=H_Xn7qZ
z+Q2eFLaOYIt6QAx{R<WlW<9~;sx*#U87;!wDuDayy|Vkpt4WG+p_2m^vDuYuz0*KD
z#=5!9wrBiqU1js-v7w5JZZkfUyFIVf^ohG4N%Ar;_u{VoJZ;lUW|ChZ8_YFH>p^V5
z-C3ivkxXG9yv@)?cXu!u`g*mVa3Z*oiS;)DoR*+*?ZNK7bWF2L|H*#Gd)LU$)PyDL
zW`8nXzZIW8diN_W8fK5W$jWJ~<ImI}vNY$cuCq^!3p&dPPL;mPG(2jiR~wV)Tgn#N
z1aM3JFVymhbUHpo7p;G~B+$hsl5MO#oG@}P(vxfhaQzJQI)pQ+AI~l^iqHrq@)`+x
zxZbYoR(4N$HTPon2ks@Q3BUp~Qt6GM0J3f@3Vpr3_}-#%lXZPG6{4fkL=g$m8CgzM
z81uWA-zaHg_A_0(1`fIfFR&KctgPh<CsIk{igsM$Zym{!n>kJ?7n_|H&K_&8NxM9q
z9n1BZ67vh6v`d2{BSHNKH90>`R4`a|(<d1V^_zRleh~AXhf9uStSV0Spu)4lZV2fQ
zy|sPX3GwD7ab9y5*{^SmJp6M3(G%C%`3T9m?Fw5;zi5cRyTz?Nqk{Mf<M3U9!~rrg
z=a6rq+w`$;5?)6AOavV7hxT(0=#ZhwC~Awr3Nk_7FB@gGQ9tl}-fFX+X085J0n5e$
zr<SUyeVjre^_V60pUI(9m}SOG)kQs^ZKoWFpW^)G%1OBCxk+8)VxQLCSWPi{-_<7^
z$d>mdFf0J;-HHG0sw1u7h3Xv=2i=xqK}UFhUL$N{%?^FD!${BeR@#NQm&h2R=Lfy+
z)9!gjWaBpXdJ}I;&En3H4#H=*+ev*AbF0+V03<PwIInA7PGC5#NbUJ_++6JoaERe_
zy2c1B%d(2!h0tyOD{e{rVsB+b#`x=KyB&MmG1rx%!9jPGyh-tZyUi^vLidIuNf{4Y
z-axg*4e6HsoCAS>avP;vX29mRQt~vBlzTm%2(Y>;Tw}lly52Yq30Y}CCazG^r(&ju
z%a#_Bu5so?FTWJ?>;^0|wQyVi{)$Zh1s`_%hw?`hx1#zkw)tPOKG-X>yNCUvwY%D6
zo0{_mb}f@C!~3ZN>7PN*lOV$jBy||4u0B16h3guxWkqpV>h;dAG#GMrpVSXc8%ki)
z>sTc{J-KDIz^klAg`F=oaeWs4m3o9ZQ`8p2MzuHd+*b)Fu~!JmxzK)xV^X<RKN20J
z>jBZ}JGm$@k;|tSsYw@FSXe+<B;5iC#HJcOpI%+1YHjEsa2%XsBykqcCUv9Af{?+&
z+`5w9y;o7D1XT>_xS-*jWYEH{;uc-TmYXkzY8V-fN#h@3c)(uKZ(t5ub0F?{=^S`O
zcel|OFb=lcu-ETxwB%U#y}k?_ti04&9es7Smqdhz2lqIW%SL2Jx6(dm|D<V>K){Eu
za1Zv5)<X;tBKvym2)zo;AqXls2J-q^ilS=z^eNGFgU(253Tiid7CCDC=bz2kP;uG<
zU}MIlEHgHT&K`k!><zo=Fcm0{y7cxlL<73eY@oJP_b9+A8Jas_?R-tE_%36dIr(iZ
z_4rWJkThsynzPOmSZpYb*t$co4lB^L_TXckoEVwTJ!JGVi=Y?%Eey5`2s`VcpQZJ!
z9Ho4AJ6+k!86B#gy!oNB(w%tGNm$JjQzn^rXR?J*^)@GkVgKlon@|+g!W5nXr`hL!
z;*>K*3<bp)7;-F~t55VQ?4y3!P;wEUy1Z7>#xtMReAmLb65_jOR<>0m=7epoZz-aM
z9p$%LQ}IYzFaYWlAiI3KC)*vg?A;ITH^poPH9Y@1@!-&&e>sY_aCtz_6mk&|?A(R}
z8Y-Qkd!MQp!ciVfs7DF^mU^HmpjRZJ^X>T*g?~P5+WcC{`l)iq(cmVT2~;CxX%;*Q
z3jLYLvb@kUq!qL>Hb#y7EkMKQ!mS3jN51A%R?K>Dd7%a})!g~CFkC$GsPsEV!f-*z
z83{8rd`{PQkIIh|dYKrQh9*cg{yCe-C@8zk{;}qPjnfb1mv$qk)>n_7W^>L@LtNYm
z2?^3==G=calFc_q&q4Lm_!{-jHN$9-7ls>}GrslV!(<i<IFtC4%N(efL2F_8xbi~$
zSt4is(ph#S5FYH<yC(p_Xm-JU{0EwvAv6;7oM)#^z{{C7^co!`+X2#Fg@>Nh&Mz(!
z7O(ddDC1^bQYft%l=fIQU>)^K4WC^%!;N-mg8A6!_uz8qx^7}uwz*L`X_~FS^Bc~M
z)Qpjc0<^3FOf=~<GG3lZ=}X$WK+tRMXjpa84yo1-tt4He%*i}{86Zba3C{#lN%K1@
z63(hqb)%8wJ5jSOtw^OM;}?)2F(BXy4`I}h!aVybf0jC+pn;Z0D5quuq9}>7KR>Qn
zHf~YBC3$;J`g6CQ<<&hiYiKxcW1b8Qo=%**;}ASx8V=zs)m}*XX0ZmQ)?NAJONA%(
z($n|r|01X_U!@tnjlCC6xlkjEjHJh9V#0oCCA+I&HwWanF^o6$RX5r@F7tAmtfrbn
zA+ODlr!=;X?uge!`CezMF<1$Q(*HUU$}E7X<LbdxOk&<>t(et7)KB*%*ze$7B-=JF
zNXC}fg+>vzPOUkhbp%)k3zs22lL^2^VxpaItM)|uQcn_VIDdPGjKt$6;A}i+fJzcY
z|B?PVu9-1^He7XvTbkYL#9uOteJp;eC%3-f8?IdY>0Sm0HAc#rTeS;Z?}W7V{!tM{
z7GdK)hgY`?yWK&tfuZr^&D`>l=|egfiTd3skc|D|CegF?-aVf2tE#7_UwO;j_w&-o
zyOfRqE$O%XlT}w;Ev!FRbiz<zGI5WSnb%fu@3;qHvY1*t(2AGDowA^}sI#je%R2@$
zcJ`*+E31A74Ls%m5KDc6{|{qt0Tor(zYQxOh?Imdba#ggodVKbGIUFKcil=iNRD)u
zh(mYB&>$@}ba%a@_mlrOzHhCw77UAZW}mb7{^fODJNzaFz|#dkRzc!-oW!y5k|PQl
zDJo3CXu2LMo9pwcDlvi3rc>2vd`{2m-5T*Sx{EpLio`{V6L&%l^yp6Orr>Cm_)1;j
zB};h62mci_-IE!{<)SNZ(K!h6f+~j##v-p|$!c+x;?Oo;>%%AqD=Gao$sr%|eR<!p
zJwY3-%;}X@f7Ov5y~90lWg})j7`RD4Oh!A8=Hu_Pup87{JxeCUgks2=(9@uppsURp
z*wK$q^d8{;`RBx6FT4%#9e3xBuGJ&ZQmY<MUY^oS4+(jSvNZYB@^qL68%U>oz%zUt
zp}aY0n#pj~V2PB5RnKcjb4$yT<yE&lQPC@3K%~C`mjS@1hp?>E*;qUAzp6iv#t9X!
zh?2&yb0C|s%O$E}65J`$`}>D_SME>9lEVYtaJ+DZ#m9HqB%s*~f1>DIcIs3$KR0#y
zPs2L{9KqcKhudRm+kw7}&Ed*^ehoJuPnQ%<vWE7vDm+U38+5uZNsHZ6^mHUiXO>oK
zYo1@i6Iq#p`qZ($7EMBDSxag$Bzh?vG^@95B#Ey821P8yrf>F&qMs$!JA7zEHNXVQ
z+k#TB3M)R(3u${R8OR9l3Di3N%KxOMsY^>Ale1r*?IB<|n!HN*MQ9)?+2rm-CI@ee
zzGeu5^Swcrt>qra$LW2;9hWveB0MEO!ym>y6>=(%Wbbo&7GAm3bm<L>+~M#Y&kO72
zZpsu%r@y=0Pu)yq+hgiofvesODm+w5Z0jxXI;3iARY_TiDR+%^JVYpD3=`)49l5XC
zD3Sj>+cNxm6tX2=74nE*Iw3`ogqEY<0!zVxQRQBN2qNQ~3$8lex84eP{5F2VL0xc>
zwKpsb(p`IN_@0T!iZ|Ib{M4hNTmm6IcZ~8)og_{t)iA5%lR~4H%y)XX%XVC!;KzFz
zH@i@oDacAsT$vs1R_<R}R4NG`j(`yBvjjv&JqxFjfO2vcnacHOdrgHxe$EN|{)bT<
znb=j4`kunCu&9;M@I7$jn!-%YtV-u-LWP1Ux28o&IMF&1Q)A+Re%|)6y}V_1-|sq=
zuQIy#2$ndorE5yavK6#i;xBIy&{K=noI0C{htoTyPWoEEO)jpVCtO2UsyL)$NtL%`
z&1`>=$Yke9R=ttuiAqX=hU~{wu_r~9$ky)7`&z%tj(6jjSiEirsfUI4x3Fg=lPYKN
zjuBN!bp*W{Fo>h8tJ>E=Z+^tB+gvf6qYQYO;xz^2a@OC1LI$W40=E;wHDhYn|A25J
zGF%G4*A*{4uDi8Y;)>cJGQ6VB6M)9gvD&DT)Du?L!anii*mPbGc{wYT!;{Xw@+GJT
zTHdwNABQqt$2cVnLkicutM$WQEuLs(-|uE?uT|jLpief7UO&iSrD>9<5gt3kD|2{6
zd9xi>m#>|TPXys$17-7@dNs{OqsoVSVniSL7q*@+$#b6@ji6Pf5ppQYyS+hI#}fT`
zDN_`ivS;7iyK!tp<wFMvINaG*w8v^v4NE?1zH<K*8OaUqbGKihMZ+IEI(IhG;UKb^
z>-A^Z=YD$GLZO3?J$^E55j7I0IjcV(FNy>)n0U=w4z47-*xmd|T;-n0u#7I$9{Z=G
zw3uz<>YB9ZK3j1*;ZhdONlQyRANo2YGXv3d`c(--6^y1cmCzotj<4J1-Xe55D?=;n
z7p0-0A%|2se&FC$2MO2O)YzB+8*dz8JtD5S-0Krq_^<R(q>k!<s(Sj1s9TSMbc%-F
zl(9vOTgD2VR24a9bzwi+CM9(gr^Llk0GFI4x9g9D#Q!vfz9WTajE500k$*ri|3Uah
z8R5~7M0gK9wdK(8ijb|2<o*mGd$qXQMuZ5IXu(2`X}K)_jfQ&3_`DVMY5(dfy{8ix
z;ME#6dg@?VRrN8Uw)sP1b2_3)THX}uNhgQC{2ITpS3Sv+2GuZY&G{$^F3!$sS(wFh
z=YfNHvNNOWtly0vGZy5@b}0G80=y1HjJx7eGaU`F(4WFS3RsD31xNmPHtZ;oVzwUb
z#rDkMVU{tSPjI^Hulk*MT#j0ep&p&%f-5ymYeeV2@P;xq*Ec?dBCcoL2>}JzI%~#5
zBk%Vfecu7T+HP%{Dn61sbfv%j9)HR2x2aQ?W*_!YJGSC2t@C{~<<s+MGR2o;sY2h^
zqwo3qXKU^t2#|e!NS5M>cgx-E&<MigNku_nR+%Ud;uUE&f9ORX$MD>Of8#I!A*RKE
zDw!5zi_p+>h$Y=)35mOR6xjy??>xIrM8FI5{>g%EH7b`CY=K281zro;;cQ!r^C+g;
zho9M~xuerP{A|D^o)Bm#?)sTBRto=YR0L<^&{;ly@1*4MOH){F^3fM2jCz7>LmkJo
z$%ya<T)nY@Z}!2ylQwiV_N64tDl|Oq+UvSXW3umjAY8;_b)r~J1_nPVorrJ?R<4wg
z<xp8pP)|<5u>lm0FBam%<ndp2ee#M(HX-&{@`#wym`(GcWSj>|vGq9<<3C#8gZtza
zY@=vsZ)WP;2&OFlw0TLs!5?Je&S%-bVQOYdPB>widsZ>mCtdnWS(TxAl;!YILX^$g
zm=EI<PbywK!|lTnS57Z9e7e9H;B<Mo%*_69MGN`0wYjB}OMXURK%?P3tC6Hlm8wwN
z-D&l=MW*5)iDqjKjsP=*@3m|K-RZd<j}y@3p4zgYq)=e4QYhKm$Xwfq$If$YGh#h(
zP$>UPzVEC-Oml-?(R{5`FjYOs<OphUzxKnmHnE|MiHb^X4O-uU8Ox=MME!_1byfIj
zgU(EtlBe)@Nuc4J$Eoqy`+k}Q4~Y}GV2-U00~jyELV0Mo1oE#|XMxyduC1rnp5!AC
zDE7MR+);trK6!AX+t@`4rM5X=_$)Z(n6MFJ@KhMOB2kvxA?MI%c2v}Ge_%J);8I<O
zH+Yw^a9_E)_=H#yA&_jP(TwZO$%E@nBB!M+NeqVn`%#Ed75UzzXwC|*8z%BVV_7a*
zZV7Y=E}_jM)>1(9V(Ed+v5xg}Y2l7p$71<x>Teh#R#0hz_xgA{;`ZyAaDJ>9lapkA
z;e=I;_)j2&VZ@U~tgDVL+D?{5O}r^o7Yv=wuybo>t~igcvX8D$(uqc299ESLgnC(~
z&J+&`{A&##?;oR`kFHYRlcIBvJz2G#1RBrLygDNy=?$h@$cmPdXH%P5jlO%^CSoSU
zujS_$-c92gMp<`@xy3!@vZ<4KzP1JGqjVjI$)dj0*LcPrF6T)xBKeZ)p5EH8#;a`k
zG35}z(mdMYE)=EXbqcR=4!%7$$;I9rZhbX>*=;?Ec^0nn<9KXupqZ?upOD)^?W^v4
z^sa?wp<W;ik#Jc7@!AR9;rz&#&aif?VBlQ7?@P1L45z;{b|!>L`s)T2M7UmXpoK=V
zSZSkIdE`xLLpQC=konQ@+g-ompG8H4E#wlL&;M*gl<mDsQ2Ii@p+B{EhNpaIF~HQU
z5X>J3y<U74ThQ=yZj$rbf7Z|TyU<C!zFMG=mN0701t-5g@{jD@J*RCX7$kv<iFOtB
zJY`1~M#zm!1_;sj{!*5B(j+_^+KmJjh%YW?7yuAo`?&YbWI2yuXruL6rE(qzeyBOc
z@w!$Hs3Y)bBE`j}Hs#LpGxS)9Bh!F4GT!3=Dy36NK+!HL0g+ZSVMJlkDBuS11+WZ!
z2Ryt^s=7YTI1hnVKJlCM(Sba-IuFcM*v?6VP6JSmBGhIiMV}Wu@742<4jzD>ETi1-
zpEaD>Kt&}81z2_FBXIe&<?XwXr`bUHq?YV}Ks%xk5ky+i)*10bFX%YbLy{3oT=ZE%
zd@;SX^b3PAWr;S-O$Q^k+MI~iH*#yiObt1kG4fn-SDMP|Fy{3Da0s(qmM{8(hCEpZ
zve)HB_8xQBQwKl<rNAj<#i=c%w*1Wp)E2~BZYz$7S<afO*3&C8_BLWZtkLMsBQQ7A
z8X7B!20jXJMLLkZ&N{ZVqWgw4xV>$_5Vm6M5R|>D^m=09j$&@F@76GAH0<!m!WWzG
z5)}pnFIp@)!!mO3vrp*tRDLXuJqIqBocXo11)FbK%6-WH?T(IVj<2=dWobJ{DTqFQ
zqsn=P!9e^d=Oc7VL*YA4pVRnrBgp7w{>+wpVpT4o!<@{|PLgSD-l3*rX<>qOh|&8J
zmR)c1RVsj%Rn*+&oVC|+h!)E|2V~nPQh?yF>gn1RH^Fjcx9${K!YB;x@o!_rmP6qt
z{XM+xo8f+U`Zu=-xINgt<8XJ<)BBej!ZLcm4ac&6=aZu+>9zsT;IfL5C;Uz1f*W}j
ztrMeaEn06}o1aM;<nW$cjYueU`Sd{Qr4*h`Dc9`iSG_v3*?_VB%Rc3dr*~34p&5$;
z!-C>@i}hCW*V>=S!F`jHD~onQv~n@pL)<G%>+5ejZ0^Sc3^%uJS=A7z6U8*0^1PG)
zTEluD;|U^>PsTX{OW`RK)B{0LQE|&0A%HbY*hQ;1mK;{V9Y8(F1rKeJse{(6T-Sc-
z;=Aet+f!Eo1aXk|Yh#!w7!+0HcoK>Bv=UUiEC=H+)>o@JBS}KLISQQ+lkCC>+g^G0
zAad@+@HjhOae@*d$#hSSR>j>n+%szlHjMZghE|Tq2s*z<;lUhyaT)<$0OPB(QMn8s
zh0{qMg<jIAs|49il)>m#4A{NNPXg{i_k}h4+t4C#PQ)Ymw<@l*BW56Aa#iXPEjJ+W
z@#UapAI=`(?vU`TcKD3W*P4#I$111|_$bV|oJZkU@GJJN#v&k*7>@7ROykQbff`DZ
zhqgZ7d>A>(9RiC6wmz$G&8yzgek7M`Lw<OXG}cYXK#MVGxjqTq2H&&Uga{<JE~3~Q
zh}twaTdp8J2q(FehJTPSU1J9qs1T(IXdxr|yn@W*SD737ldri_gN-ceD{|od@F+2a
zs|TZ!fwfhK-E&^>A{<HdGUCm(Hk0HVU9=#N0FRoa?DCWwpyIjJX|2Zwkpl{mDOFrf
z*hRW%Cl1|X>#ZcWV(oG5JyA*3$CSBuo;nmVL5Yi;T7pY{m%l-5YnPybovpDEZZE!%
zaWq-T1mSLZz`@TAN}LW?dZ4u24C8RCm!{bfyc<}u08*PytZ}Hx0K6<xi)*~JMK#+!
z=%Fb2b@lsb595}<*MkH7#z0jYrTUHbU+~^T&)uUhAx@iK+ebg<9%_%M&+xLcK9V!L
zZ0bmCcARLdM@aKcBtiP3yl?i{y6)2T6>k5-B%SuMD8`xyrbkx39{l;?7$V6Y;ivt`
z+S5UKS{G0us&+5bJOb+%s-WXL`_>0Voz=2^%?0lS_`RtkpApbTWcXfmhGt`MaJC|J
zhF#Jz5Hw+m9jFD7vb7%tJ9nI-L6yZ1PNnu0f}JUoBX8W-Zb0aM$O4!@pOL;_lVG9E
zTkitISwR5`&M!l}UJUlEQ2Qgh<Cz}?(uo}fI-guod=vD-M!Aw8keqMT|6C&(0^n~(
zdJsvv@R0Oj4tZ#G4ykdoGhr7S>?>;XDQURMBt}1&J?ff+ka;ss`L`v_Ghb96%%m-@
zI$$K5dcTs(>t6r|z?bui`2C|7;jJ|+5h`}-Lz4Eag`{z$3@6$uC&+no-4wczt1OlU
z_wI;ge!$9{>6wwa+>Wqt39gfrfZV{VD+`8bHp6ga9N>cU-eYa=E**E7;=^xG5Q>v)
z+{0CgB+=gERD`y{A1RGCodeGqC4Sw}1Vji7Ss6JE^-s*h!lM5q5#>%0er>uVix|{&
z`}*aR^r+WP%bk4JdA&Wv7a|r1+(LwsoJ)amnw%%uPst>LZ|6M+p@EZGP}uKCE6@dD
z_zaGW!tcM0i0$ffH0=9Ckl<6q^)worDu>z0OLl26@3<T`10UBaLXb6gA?1pE?f!y~
z7_Z%&N#|;JPv1ay`6oJ2tgf=RQ>Eil4gk0X@upk7Np&8(Kykr%bQ=Snh`!o1xl;x|
zZpbr`VCla&I2=ou=6mEjmxPi_Ry4HLSUZ;|txhhicABP48L$7S&&yfDp(t8tsu;&6
z`rI3@2W}qCfq@?#obI1nsRJ*R*5?GlAgc$Kb35qrpix_uM8}VYa)hjHWJ#X|(RR!u
zf~Jlh|HXmtdZx~aq8LLwNl>4@g;kRrbyoLQd-mfx#jcS8D1vBT`PUrNkd-oDkzme7
zo$ag3$>=@Hi-$M1nPI2gakhOyE8=a>9<aY5>?$wX4prO@5fx<&m-GGxO8o)*iqSt0
ze>CyW@ne6}|7)REaA;xSr}-o`{l-36j>c^4(vkk4JfHaYj`-4EUe-IH5vpF!P!1fU
zuw|n=r;1KH_f~}?AliiyO#7nWl>Xjoa=GkC;kORkt_uFxNs+O>o9jU61Jno1s^Typ
z)j<!&5}%duNjUn7u(Eq)5VgH9z;mjic^lhK=c@TCYcp4#NWYQ$hmN9c2l7gP8;ZaH
z6Z)wO-;#C*2PTes!}OAg)sY#gyFoPp5`^?K=LJt__VwBF6JOGb2bTC9lY<Q65+P_#
z1Vh>j%5r#~cYblJD;eG0D+P^KA?j*MOm&QFBS=Lq-muUE|7cq@!8f}7@FxL;Z{w`?
z<f9Dx<_v@T8*wjK0C2!XbkXu0&gV+P@I({!bN&jt@MmxM5|634U4;aqYJVMi{%ZJ3
zDiiK@;g?MLkB&~LmAr6Z(I4z`yE%en53JMC(AQ?6GJ3A4VE_|G4(18<MHzHdL&<&I
z;Kh@ceA+n5&EaC}1&TrVEC`5fC&0p9mUCm{2d%hPGPq&TPks)Ad0`PXeMZwk_#cFs
z?&b-Vt~84QcO6@*8~BnwdG{i)$WRjn;pigzM_^tM156uiC;WD6m9K3j^F1~b<YDn*
z%E7QIXZjJ{wp$GG!JvBgWU{ob+rVnUy{<xK(a@f=P7!|U#XsQIuGmIODISl`5YUTt
zc{A4^nf~j|8%K<(<MXJ_%+89<+d$?}v@WLqhD#aI-~yNeu&X+xer~vd6RMwd8P4QD
z7{ad(WVdn=qTeofmwK-85s2M{Uujg`wsk)GK?B4@5eu}XxHV{bkP^mOFbtb9c`8qS
zHZk_G@yzHcr>;2~hJA5To*iNhzx20CI{5vJLA~oqPy1*X&1A*k*U710^@+aSso#1(
zl-JDY&-GWoYP|WjoAjc1{ePcVIWZc3VJ&!R{j=&O_#e~rFGM^0@AAKKe`?MR=1Q~I
zgH{r^YZPuC8UahJn59IP+zTe9%hKW2%CJYYL0^=d@ClzSafWCyCO_)ev6DxE%SZ)R
zsL`y(Dr}w;f@S!-g_$i-9d~wO($80#0o;eE6?*7CyYy-B(@NAGZd|~vIC5i&k4gBh
zy_Sv)ui6sFBnbon&6?ZM&)k-5nUk)x1J`W+2TS2Uko{uCsOWF<<S8)Exgc+DO<`v$
zd#q-*w{O^-$JMEz!CzJqRXiugf`l0|(EQkw`0-mYADCR<CWWVM=Q4F*{{T#{ptKMV
z`ka|FMC&f(qJYDo^(xLD>eu+9=Ab?qsA$B5-&>U^pWt1(uIyW9Zy`L>UFLu-*R{Ib
zUzyJ^qyxfEeQrN8v}xbV3cV6V6-Hb-s6rTAv}MMFkK6eFHSPdxz^*ibH-j(QtuSmG
zY#8oNugLe~jU4-Dqfj)v$I#8Trre>Effr3pxwDdnArllX=%o|fK~GZ2hGLN1;RE{I
z3+Q9qFj^Mgd2Joo*${(fJsa9MQeE_zN#fz7V1H~0=mI~s9E7$kxV45NI|M#H_ohDQ
zm}0A$GfwMggl6PML&bh+>DjLs2Emcl|Gj+wwG!Z#>*2i-AItIsQu8B<TvcjBZXY;k
z;~C~~=%u@puu2ol3VI8Q0I3`zv<YmJ%bcrq&1748O(|wN56o4TJPP9#yHj^&cXcmL
z&B6@3`<P3370R0gpMOE96R>XsKVUiU3;{!D2#(0>z$WV6>8M4!Rfl;qbj7d+`y&k=
z9kPq%M7Y!2*Kck;OfPv}r2RIfe@4e4hSb^F>FcbDua%SVrEu^$QM~&eZ%}D?j7DJn
zk3vf(luJMPm>{)JoEL5^wq4qt>%K$CGO|7z@}=*;^%-F%+a`Y9o%{#is{zjUdOQ+r
zqc=8Q%es4L8J`%u+CDBckPi|17A>!PDA)9D%)LR%P7w|*3Zmd3iDJ9|eus3<4;#w`
zN+O~XK-fYGB`f08f_5*z-@jDy#d2jomw`GM&h^O@eKTS%ba60q=r+AI47LwkMzssO
zY|w#E{rtH!@{r#ftNoB5r`#Id&4&)??$A!rAJ-PmmMX>#mQtesdV&6&XxrNChW`9P
zg{Y5l6IuankhG3-qO$+CIrFe-*8Bm^mih-n1@H_(hffc%m5hJ%N!ayJo6sI9Cg~6l
z&aTt{!>$VrYdCotK`#=4(KPc@{T$k&s#nyMMp-g)6Z4~U{J#4xXTdQq$lW?I_%%S!
zxLVoO)c0pN{EZ{Z8}R}3-^V%7i6XYVk;@yZgP%hBB(YY1eIL>GiY?^<VCVO$aY8v`
zSkak3smjDPt^YxTh*D}p5>r_MrE^tc_;<G30%r-Q+<kUAp$`eFX>*z%x=jSPtQ>w{
zB1+7-XLTsI;W%z{19}Q?i3dujC`+V@TN~W~lERt&uaNF;GxY2rZUM%x{%vR5jc8tP
z^@WI13P^Q!Q6aB8x+%0!K<Cuh6)P@q-jGq(sG_!5b+o7xeX%KkRH%NX{zXGq>!8G_
zn5Ip=WuAIjeJtMOj!ntx89#Ui8I((>tmC@gZX1LXG=DBrv6J6Lqz{&sT*;bwP6aAP
z8kiF+i$-(4gRARbU3!tQllsk-MZ32SSNv`=yr$;>MX@!bgH)O@2dg=K?>Y??9%WP>
z-@+PlV<jJ^#cOP8iCLW#HWZHV<;#yP)>WXB!7CUuvVQmFjgI~+kGH$)6!)2<P!T+D
zWQ@>e!YsB>5hP)hgJ}5sVuH@0e0Jiv0tIlRcMlpI!lTuVF;2fXw{Ky2DqEg}`#8gX
zuS+3|vEe(q5+P_|(lZiuZ|<=ge!@e~SN_P}zvquS;o0){oGsxKORRfUs?HnBQut%4
zXt)h3gg7i{^PXRheeig%<3M>Na`$CJljKl6uV>mzWV=BotSGO#fnDL|8$oICrTaV)
zK3(e1g*LAl6dHcI5rg~%$+Y&-;8b&&j*saA-mUNxYWf04%^n8&HdS_bAy$iWx_thc
zIlZ+-$8YBoo-!?oEA0Gy(uDO~+^VF$erXQKpzY0<bKkYqz8mjFjuDgBn+@=zkN(1_
zip`103kTuHwDa@iMr@GTV3h%?2dw+xJO77t``uN74F;B&<T?92WfHhl1nq%tO7D84
zf4wc(|JAL&m4g|AQY|`t&}d(zA{WYFL;3+S!Hno2`Rhl`OLzn03FS%6yZ;HCD^Ki*
z-1g(f{mKX@vIiagqZ<&BdMsA#rc}-jcu_8ySS^0Jgo>eJ7{MjeMbKC_*HzQuZ1{r6
zq8Na(JH~{ymF*G<o^Bw{v5y{5e7s4Uy8<^mh$21kf1_VxHbmT%>aBU<LVnJZx=g0r
z`KhNxcz#dl&#ashWgq_G=icK6*7Ju1D9neQ9BPJ>2f5V0kRU~uSdTOhbaD|guCTlJ
zEB_tH|I6mVGkDsdH8WRABO=vh)CZxYN3SCA@~IwPGr=vX6;o2=!Ll?q9$|V^j(qOZ
z;oMtM(a$;Xto&6Ha-vvdI(c7>E$k?FSt6(ZNZk6}HS^R;nbnaPf3)NiJO}<JXx;1x
zy?dHi-nNzLcmu6bX~3swxqIoJ^R(a);{WWH-b#2%uNS;34oilfL9MKmPsuf|3D3(^
zh%}qS(6fU^BTmT}LXmOkeNnpLR_z{aLQ*X1;{{*2hJ-)w+-w%2IUyrYC}|{Z1h8q~
zt6N1YLHM*i^yaQ{LK{5E_qQM52k5DwAH3xmJo57<Yn|%DgvaL5cV)4Y<CE}U48RB{
z)GjDf?d@ya`KBr~+lzI%%>7MI+`ac_Eaj(NPjog{+l^`k&PCv3rrM{%`Jm~U0?NH~
zC;G+9@&-MW0Q(pp9_SpMIoXTGf5xQ$aCOMoL2DMSC5Q_sxliF-MT1!?4r>Tc^(t)c
ze#nhDw;l{#qx)e>fuljrPKW*s7*xuV8M#RWxj=Eq@r@P(!678~y<OmF=qUzV<ZDL!
z32T>ClOeZb%LZzv;=QsTJh93_aBB>E1tW#NRSB@m{h|IHG}lO!Ll3Uck`V&>e_FE1
zWtR|rzUey&clYGMx7<p(eEAYx0D@?15^xcksE62kuC`cJsB_0Ph3oTkbrt6wSx-MJ
zigid^kbU@J$Y{{%m$RP}X<e6|tc|$;m;=oJ;~c<i(W#P8uO0-F#s(5q;D}jhfr@`X
zGw~Os2Xxn5+O#_gJktkEqRlIjw!3Q~^6-hk1<d+TP-Lzpym(R?e2WWDYu6u7fzqsh
zSdiDRt*{d7j13B0dMm~DpLD9!mPkvHJ`yoUjnh(d%5RyUjx7bjeXV*gjEj=CVT}LH
zZhTSa0xTFP$An_<{<H2smN$H@!?MN9t|B5f#``#SwR1sf+f>8^W6fcab{>)|s1L~J
z1S$^v5y3~|5DEuEkj^&4xisuRh3IARvRWhpk(xh}-Gzv!aZY3bNgozCph?BT&$b<z
zKy&4Xii$Dm%VP-kCWz&p@mvJvTr3Jcf#IrLLT0prOofmNf-EUQrBm@4x;njL*JuV?
z(F@k{NeA~P662n>!5|pC{AB~RDyFry1SVYtnyb{r+}ybidT=aiKlYAsH4vig_B9F3
z`Bbu_x7@kgto&Z=Ilakcxj7-H=>{+A(J-L*hj)DE-yYhZ*`fH5{}5K-@<jF)<$3PS
zKJ}OtsE@Vg(eD7}(K)&6WV8R?<G%ExZGV08H~ad(48XrW1U;vD%^&C|xO2;>@ybPQ
zyliyZ;4a9Wy!ZJo07eDFgE_(c8CKj6m98!L-x4>z7Qp;~QE>8$A}P{c7&TfrAZ#>b
zH2idAS6JoJAr*2-D!PEdu+C>6!h6SW&wfkj{)npoEbG-s#6)p>SX--{H#>3e__iL@
zP@F*01^iZ}HM*7bqv82I!evR&Mj4-20iM_jrXRcO;|t+WV|GxvVs+GT_5~8y-QY^|
zqFM8EQlvA`<yQ6{n&h4EYOIvQ&@C4o@J4UFC>5!Ntr!=4kGanM1F?}{Py|u7I?5hv
zO6B#Y61;oTx0Nq~(RdUwA%CLDKwG(<cSdh`LDwb^&IuQJw&`&S%yoCFWA$nBgS2IG
zR9_V={NDpvN-B;yxR%0~@5*)O*`5PMQ@YPrybCY2m>hrIqWxUp^(1R?dfcoIGMVxj
zH(q)y*NPd1?fg18fY-Tsz6Mngd~2|K(KWkkN^!(&gT9;tHrZc_z4CWBg0ym+lZEKr
z`4OrYU*~je6vKN4KIG@-1`eGAD-ow>n#Hu}^cDn%XSXClB51<T{II6%UEzb~Incg!
z4!E&XLHlQ_`Oi4+pD`|+<Q^;L`Q%n0{7-f=%mcweD%K0S9SnouxdFvpa?b;6x<|hM
zNA2Q2-NoOl#Rz~6mto*&OcVGG%7Opwn9`A24yFhwH(N*AIyUhXmxo*0kLsfT_i=<{
zKgVF_jbbZQlKMN^^nIBrH2}()I7DOXG~RV+E``jeXiqkl{d=_rVIowF*r~bZYV8hw
zG2eYtf<uPA6;xPJuP=u=J-Ab$sirwcQ<Fz!Hdv5nwz&8%r;&`_U=A*kmrBi^liKO`
zw$T4jvqWUOb~I>vOjak&a^>LQJ>3ruRcXa<f{1r}J-WJZPLS&PoQ&wmG+{h+`eU(u
ze6aqsv%5z$&rH4C2lw&~P(tw752%|!zn*;`hQ5{>Tm5u>LRQA6$2SZ&3^ffEssbr<
z99&}7b$au`w3@t!nG5ayvsY)IL;a$Mh?oi2{nXd81-{V3L3biWmP-9``AP5o2jdtx
zMj7GKvt0nVQWQSSQMDf6g3IN+litB8&&aH2rRA7Ih2e_MRmH{pgQtEgv1*=quL^5{
z1_)R&ptxm6WQY2bek1@c)Ham;=i-gK{O<1caP>mXw^|FXcSS?R1ez%787eMkLzKbA
zAM&`*dmmHI&#vl(jRRy-lfwiK!4<A)ExkU2E_P%^!f^zDriAl)T}Jlh@0*3>M@6Q<
zNiO`YtrE^DYl~y9Ss870a^LSEDmcCGz%7;dA2Jp=ze__l;JZGoWp4Kj)Y7}>@DeUk
z<OR+yYpZ#or-e?v7`Y<A=y7k`NK+_-;?t;+ZI`4SkV(Jkg#qV`GRAYRCA77CxD?f+
zoAqT|04xRDyZ)7<;<#JB@9P(<i>IuhbZq0wILw~XvViq|^cxFYQ$}~!hIk`-<j%}w
zlrNn>z&jR6iEJP-F+zEu`~~;0ix*rIsHC(!eo;43cyy`Gt!-FWXecjAa9LV8=M^T}
z9|Ga3qolBQ#QVmI%b#dfLkY=fv?7$AA!cY%mU=S>am6r(fk8rcPA5(^*TQ#Nq8Ftg
zw==m<J*~e>a{oguMVcvy-Sk(g#M7%Rlm}KG@?BPIhHIL0Qz#vb$%=CdZj{X(v>X0x
z5y>yl2zd9HKZHhhjGtiRWZ>+;?0jq{KuDEkRw-~gbseBTY^#IGFKf>zD>cUE_;qfC
z>|iLM`AsaKD7n?4H7BU36HG8jLo6L3FUq58f<v7TY1QO?{4S={YGB+9pksOeiVyto
z#QGLF)w)d0d^3Vx&W9Oi#c%UR=As7UJTa5`?x5-x=#n%MkFw0D`x80o=C}b#wiA#B
z-mnGu?1s3(U(S9JyF{MsJ6Axf<%0M0usp}yRHw`CX{`Cc#v8`1yYJ^ZS>+F^+0D0!
zLZ-u?WL>u#FFyt%I;vS{6)rkzH}dfE4%m#f9dZZH%xLlZu^3kVaC#vG62Ko+Ut8r8
zpjOQG+mK4<ar@d$-+Py7`3Bmx)?O*;d|;<7=c*6^wsn;*E%)-yipof%M3qG?{7CE{
z5grjSG?mUjQ&Jfa?p;aZ2xng^1q>%YoNIpgaNIK|&yQ%j%(tkgZo%=vEh4|5NDlX~
z?m-Z9@YfK&PS0%#Jk{c$=cx5B!#s;Dnf!+Y137{w1LXNFA`B7wgd+TmH4tCCPr?Q4
zyHrjFuO!5R->5jiZSB!_H^V=GLfFfxWZ)3Q^upQm!V94JWiz}=DITug7VDHPx<t|R
zsL3MrQoS3v+C|-EcHjI?_fmA7w!;QIv)+v+_`*SB=+-cs6PY+%WiSof0YBkjJ0+_;
zb$W$Atl?HTn)5)%C-Z8HlWYCMwz0u|bddQ2r#8JGoXL;K>G)y!uh&vVyxOP)2Q6I&
z{B|K1a<MZVl6guot<DB?tcod8SLVnrx7FqHUF(CfX6X>!WtL@J#i38hWn}%l#^7?(
zb5C8@6&+lG&%KFa(y3eFTKtb6!tyP8`pr4!j#+Ox%3p%+!brTfxce0PYZz0F#mP3^
zHd%X#gQcgsIXir`bWG9C33nm2hV*{b2F2TQA+r3f%!JF#oWgLuhhBMGwOKXvt{%-y
z>$uW7#X==D>2GGEvK-k;B!+OE#haLqGA-ey_>978^y3`g3!*$8d+&RWi0Jc*y1hIH
z@=Zy6W3Ge@5R}vC2GW+_(p?HJ(o#`@iYqD}F?6x9v1{*3X+;146I%s~FPZEWuo+l_
z$9)v%unsZ-@4KUjOzT9WVSeg5f>91T7tZYE*We_Tv1HSu60Dh>SO^yhsON?Wl);Dl
zWhAHJHHJy@HuK1Z={2tpsdwo&o;9fiWKZH>kGVS1z`DsUECa&;E~itZ`I<G1fQWCu
z6*Y|IjLaUiNDB#6iwgesP)j&`5qU>J{`lTN*H=l*M7|FUSdG};c#X*3WIwZh%xif(
zz&L+<%l1H=)r>P`-}}G3<eTeD6medU_IlRNMX_m0Mb~aM6@~U$F7bU(izm;8t4or2
zpEMua-8TgQCrypVeI%q_g3l)!l+)q54-PpLZ(d;N+%qTBi@56?-aM2mA!51cISQ>;
zzm(03p=DJ(NDwGt-cQgZ+qe81i{nweO~gVRa1mS1n!5DF77v%Jk(-!XX6?;cQfIBY
zrx*+U{_A1A*X<lQv!=uQQKFnC_Ci0mpidr2Lb|@a+GM~+bSzOQG_G;IXp&Zhb#Nn~
zgwZY~(q8$BBW=8k($qpId*wq(L`|66&I5MPb~JKUP(mvlLd_ZRv2u8?`mP3N{_dPV
zQ9E47I#Wqnr-8VY_eHj!s+#&wGa<!6C7yOUq^rV9jx$ica%3XV&!?1U=G)*pxuD5u
z(6?WLv6P)ih9M)zEWSey7GHQXhm*5aA8GixKYUozpAHw2U>Zf`*w+wz)KxRkqAZHt
z8oD=k5hOS!{9?PDFt3$7s=O6CAJgN3m4}ATAEKE#JiRj)Jx=@1#V#~HR64rxr}xI9
zY+~aV?~=;W_=1<X(#p6ZvBUH(`|Yy0znHx=aL_F`aZn}aK5NRK_z_TA#`|o~1>tTr
z>t|fK6X~-x*+WjGoduKwE`H&fi~#OQ|1`+M>3>VvjqR1zsr_AMG(;$1xJ`C&s*+o@
zEz?GXCjN=2z%=b)GkUG5kuT;fnjZ|<!tQK*8wDXXYS&x$Zibn*r%KhG8}ib!w$3!o
zhH^9xddUigJ+SCjI8x4`T{0X(RrzT9J%7`+ThVgvD=QX0(NP3{gDVYjVJb#f3ahtG
z;W36|11;Q?duE+?c*FG{?qFr#rW$1PQ)gAH?YP$N-u6>>^ejiWfzL|hIFR;xW%h)K
z)L~&}%8miSV+@_D2fp9tJikL1N(pBj&dpYYUicy_cZHN-f)S#}Uy!}o-=5lf#g7#t
z=BWY5KI2RO!7(qiuGI#9+&=(?5E8V=z~x7Um6HZ9+X&KC9MO&l(WqTRH8z0#w}x5I
zXXVn}x~%3e9l@O871!-ZvN>cC?-tf-*MOG!tI<U3=6rXBoX%$+MT4jtOU7j6;h~XH
z7&5Si_kGBzhdN0lrHKH*<j#0LynddTzu;1hx0%`>@7Jf8O~7o4BFqM}TVrL(D+;6K
z!9UKF^R!G#&IFQuD9Mu<C?0>rFrV>yxY#kPRO-Vig-qnuKK9J6=_b)mZYv#-C~11V
zmH6UDh?hZ>u{j7=sU(xpDp0Jl<?c-@>92$LhA(dVX8LQH39gumBBv{_==r&Lvi<}D
z5rK~ur2MlVEKC0#zWy$$lHeM_h?o*>;T%N=rPPwt*wdHVUEUDw!oiO1q5OTMue_DJ
ze05u(#H5^$apklFg}J50vZJI`K(HN42pZOKv;Q}e-cBllNXs_sC|zGJ2e4_!tYEW_
z%+TRI`X;CX`w}kn66RbvyzL0<;Tl*`HOK#z4m9g^vrh1H>gyyUs7PziifrmZ*&ANC
zG<9BAm}8G=c|pd=lax%gLDC1=?5OJ{{`6^)fSM<*t;fmca5)G4E{UIh9FX4G*%G_!
z@9>i3`iLJC;+X#9$2y@Yv5P&KX?71ShmIOinVzCS90P!}C2U}taI5r%fqByBducFm
z;d>iidNb#6wDU29OE%aNNtgpx!1*@|nHA54wh3`8iyJw*OoV>`Ajenu&J$x9bBvlu
z05AcQBgvy2CTnz>wyfPOQ6`HIUYeH3LyI=^(knamW9Z%Ez%U52f7z9iwX9~|nRP;#
zhdNvRPb~m#C+XbR%deeIYRJ$|x!JzZ%|md76Y_ymKq{8xqeujF&Y8uj#d*Sp9pjFp
zRUVc>AdqKh{6R1*8$e)KQ#?k7pGA6M(zOH1;w3GGWD6xkytb=>%V5MX^4BxBy~$}H
zJahI7m!p<{9H^7XlP?aPO_7G&3~hy$qS<M~J%srC-!fIlUtw|o%CP?w)!U8e+ojyu
zD7?{JDvo>9TB8-$Or9pONZv+XgN)45h}kZkp3iZ0>e#(oWp&shG^$MXX4!W!ZUC1~
zv%qAeBQm~s))*)_5V0*@rJkF^ncSQjF0@B|BF=N6w1a76?0K~&ib`+7nHuj+6KQOm
z5}HqRYW95!P2)X>H9|+lH!^YGm;1lsmpHt>LH8sMBd($+GK7J<h2Gqo8Kj1J#9|D>
zaL{op7Py~#Cuhy6P{Nf$yAb?lgYPFiq7rExhW3560E5wEmPk=)%Q&B)X6SzAEqJ4$
zdniUi<_uinxgYJVLY^ZVSXHGA?K<Reg%e~?uKaR^kCPeCg;jhxx$%tkcw>+2hmL9O
zw#(WT0D}#bWbP+YrWqE!CBQMIR~(AW>%GQtzIHD-A$Sz8asdSWw37;U3(smi|7lL9
z{;JO{N;&F^AumG8({t4_bT%_Q&`H;Hvr$mX3*cnY`PbR)mkMXAG1c^zT<WVxne~Ti
z++k1_CcTCWtB5)agxe((Gp~*e!?dEJLZT{PW$;s15iKX9!31|-Xf3gw-tVdVEiCze
zQ2J|Ep;P(RHq*?S!_VBI5Q285_|mCcnQc+6^mBYJK3dy(!ZH4qAYXmSh^IOr4=sxQ
zBAJxmB27CSInHbYCp|zLIqHSJ!-Y@7B>A1v7F+-VCjRRZl<`r|d}-_~8hi6JRw=23
zS$!lfZR!pqG)O)Ih(<7$*w@of;z<zC+yzmMsbEgUXcMBAiSG-GymIh!{BC<I8H7sO
zSMA&~vSUItOJ4cfh%H|+fQ4h$p<{{A0(*A3dWlaKiy-_m&#6c63vrED)X-=%H6ERQ
z+j$7CfJ@tP_K$N(+-T0ga6WJ6d~=U5rJpwp&Xna99N9dD&t1ipOPEDO2KF0IsM8?G
zlZMrBXtKMMLCB4NUQiu9e@npxKxiz-KJYmm=z4g94?p$SUi_50o>J_QPvI0mV|yvZ
zzIik=8#uAYNq~l1P?Udp-$Z{KEQ2Pjw?APVg}UdfW$kIajMoI!aT)J%iCXJffT_i1
z(l#~h*J-E9LUid^Qe=$DJ%ZEACM$!p$|l{Jv87oCwx)4lLPU+9(HxJc8_+P=&fzEc
z)(UB;scJFGT#_Z0ExYO#H$vTi7kTrMe4kRe-=mav{lf$QU9o^yM2sA9b3F!^i{uq!
zQikVAr3tnojQGBOWSLNEX*TW1k<GOhI=Si<Ub*G{l!ZOo|8kkazE5v{=NQ^Aru6BH
z%Ltv>#O_gMaH@35hQau_ML;5)w3^%dCK!TGUYd-<(ABC7w-%oq7TE7{oU+gYHr`jg
zY1(LKw!T~W9HkxE0%|}(dF~xwaVOi#`4tonA{P$3-#vc4*H5}39J1R<hbuiy<~@Ap
zzsiqGK=Wmh>U^+vXpRt7O?`3kMq$#_VsSC%=$TK;vOS96tkE4RoE-9#mn`5KvyQl>
zd2C(c=zAOfz+pft0C76O!%R+~c^GF^{KxP3l5$Ccvd@4S=+*E$OSti{2>lvqJiisB
zA7Sijn>de_<(1t{p^WB|=1HutgO9u5!q)&VQzYWGVuY)CPw3!8$C=kP_Zn@+*X*8O
zaL++5>)XU!+!onR&X0e5`~H1k#!YbeBCXD<uv%Hp?ybK}HtX93`zNDAvaKjx|7jf4
zMW-e|J9zaXs6+AhKD)bx!TyJdjH74=!qV{?@ujiDr;JZM<pC3)KcX5kZ4zIA(#})d
z%Qq(grWHA$s~!^mSYM75?1+n^N)lQdY@$637A@yB=;`n(CtV)9;0_^^8T#^14{lXg
z4wuceOu66dm#xJKHo$c?<C?SUyEli!CeXm!KsYmeAYrPwGwy9&YW?=(^yFBFc<wKS
zkmkoGgmovl!~y!yY+`p;UDOzDVv(`|=V0C~WZms0DuD9DdWjvI<LRZaNpl{ZqD~ym
zl*)UDt=(Qegpq04g<(I*vF4w(bcw`6EByBcg0;6o%}HN3Q;s&jY)=^q1BWnMRgfh=
zg7doqTYf#@-WDhW`$SdjvP4oGPAfr#p$!spA8O$dOKP{7^7{JCg!5WWcK!?SFcmem
z5IUO>Jrz}Fe37E^0gAwK1_b%zkhNDXL_k=SpH{Y%*~V9AVQfq=+o7|C6=ECI!e_QH
zY*^=XVUgZsmXaTHa^H%fTlBLUUbqPPEOr&3ict#QHTv(FfbXAjBzI8*lZ5IqNzA`K
zMK&6$nBZ846P;br0STC_os9hnUSTk~!-YF~je_{M6YvrNtwi9#G^?KpZlXh}F7GeN
z+&i|qMAE)J{2bfX(Y(DU90rB#u*^XOteY^Ui;I6aev;kg3ceAns%+NHepuuzDYN24
ztFh*<NvIR**?6_{E$~KAb?7$hS&K;a%|2&hUWD-qLfCwsQ#GcSIF7VODA433Kie_4
zxL^+Vkj}up5IndsyO;XZi})GbHHf|YaiV7+D_bR#<}7gkPPwF;ayFsOR|))V{5NYv
z9xm_;r&460#1mM;D~9AT&AjOJb1(C@UWGc<ZTDG=7dLndMWDa6kmW-khi?|fpG+8s
zXV<sR^S4l4*^!vH#w&|F?@)jVV9mSXk~+GELAWHgMHGdSjHvbCJAQvyk{R`Rs{Sa2
z^<91mt|E3z7A#17{c{7}CV~7%?<a5POZUoU)2jyZK?s-j@JR^0qCZK1UtM|gH;D-@
zU$2S8emxTtljCN}qJ*NB;ieDm4=E}5x=+6>2GX-xbrb24%8O9oGxmSgXba73@7Nwo
z&qU&9R*Or5Y&*X7Ma!h@zQ*^w<rxE>L!J#coR}v15rFq`*S<P1<5NP!tWz~}MtS*7
z_stcM4pWc>wSl)7JlgbjG95la<qw-D)uTlnn_*R1abG4Zbp4+^)17x1)(N}A*6Mt0
z%0J!`njnAzi?9Pb9eqKs%Bw!cZ<)H$oohQ=L~;7sUSANd(|*6UGaWhW%0F<gX&%hJ
zJy$JZvPvRu<A0HDfl8OZGQ1}<B%RrIP!&d}jCB<Gc11AkMK5o<ll~WLi#@|v1&+$m
zEX|hXoXs3~`qZaUwzqykN!!3dsrSv<afj(eoy%>B2fs6Y$G_E^d*{+o`!m#N+5PsC
z%Hhd{3x<u{7qfK-w74o-wXHT<#AT^9=a<X$<?`#Wo5YLSO|XPeBnKV68)0H1*Vkc>
zrE%@ij6I1wg~97=Ne$iEulr`C=xkwhS7Wsvv8e)pLn~-)==wN`B7St`m;^xN(ss$=
z)Tx`IT${ppIi`92DW~NsFtN1t_GxVR(qN~Wg*2Z){DqlnIksNZrxKz(5s}_Y8oJw!
zfwQ>#L_?g(sWgC(iOJ{QWey6KSAPYcFB=Exr<kKH?A?oQvw5VaXU#*nJuHZG*sF$G
zt`^4b1i1yzgs(DP-qB_IC}jj!9JPzodL8icdl#!9V~`s0unMTLeDLU9Ea~+ltQ3{Z
zOw&CplrPm&z^TZy+ct1-ZKLeUc979xPW+T#4#{&dV^3le1RllLBudvS%2XI8-4vg6
zMW?a9xQiJqb_s7tPQ}YgiZ(cL`o_@|P?#z7oSCQLt+fr_>Q@LvSL^W(cL%PW9z_CV
z5wpWje1>{%fuPZNzBOH7OCyHx@9VWOLRmH+m5(S3@D~u|n6Zt!@EATb(&DLOK(eIs
z!sau#J<xUi3W+x^v8E&KnfCiH=Q}zcKpl%5`m0$$MiLQXukcPwBW~ooS5h-ZTiE60
zTDS0@yK1oRY$Dm6J)g<rjmHnB+z+7cB);uI+gSUI8oFg!;O>%8q8GuL$UB6g0KejC
zIW%O<-Jf#F218^bX6G{+M+px1roq1906`fML37&~2mb@4;7S579a)&S{%w~r*RvV@
zj}>223{bQ*0dfQfR_9XbYCq~fT>7e5Y?N8ZB;!un@6w0RH8i3$9WO|3<gJj$k3IT>
zW}eo6jJI8uy&5%EDOY+@3&=SdFzP-;*R6=cX4Jjlw&{?R#2|3I(@q)Md#YckUbZQ>
z<4qd8HJBVL=}=eSfEK+u9b51K$feWiJS)VANsP+$l;kwwBIV%hv1)t6S$A2%klHv%
z2f3Whi@Mge$ch;~x*zkm?zEAc_1R&^B&uTf*dvP0xTq(fcb$@|Vq#`yOjQ;IIrN+9
zglbOr-&L`3A5N~_+Gs7Kmq-f5Xv&Ln1}|2DPEnI)M-C7w4Zk^7V&`n?h{xBgiF*Hf
z&$(V^MUsLB=FzP`SMT~IWr%xG@5Xg?dbUPYKK+>y*S=_KY5F!6C@(1t8aGaCtNi}F
z#%Pxlg~4RvplN^a&vVP_CLc62TMmU!XPBKZlfReNYQ)Bpc`8&A3`nS{?CNXRWL#`v
z^%6F8jV@!dK?mBE&fPkme2krK_`q1v`GtU-89RE2>mXP*W!7Zq=K=goACqQ-SC`~r
zUl-3Dia+pMD-hD*Py{V22g;=`xAi8)A-xa$_eCXPw4IRXZ1k4bwU1^HWIN@}EH*4Y
zouztGNt7N;-{j`<E}6}5)gR6%C^~lav&%>@6Xx|&rp&#%4x5*5!6IjFFLa6=7GD{!
zu>>8E7<kBCZlw~h7j~GdS>~&l*9$bfA1KW@DsBG|S+qD^+ON?JvzUs<NJjQqN}E-j
z&9_x9P<TE_Y_vvU?+tKMFC46)>xN^I*n_;THUTM^t=@0-#j{*AzoTa^5<UB=Ej%*$
zc9!Q!kiXVaH+}f}W5D|I{ZXd_R!aQY1cp>5viFfFo9yixM`CMiz1{qp0RMC5jE_2E
zB?;fycg?7wO|20Jg`;uPgMj!XO?{ft$Jh=oZ}CE~LS>n4iYtT0l>XP9?1Qf;&iSur
z%;;BfR1<qYsFu-aI+mbGbC=K#MhB2k-{r1q(dy#QP;$>UI^PVEV*r1qKX~OR61Xnk
zv|JdN+Jf94q)0y#<N(-#i_UHlZo?AwmC$H!^)-_%8*&0eKjRDp3dZ@#TJ^mh&}lA*
z3S8cPP)XJn6h(7eY<Wqz58*be=aSAhkB*M!Ofn}TFRX2zFoM~`4|ihIEVTBTE@5I&
zRa^MA339f}MwvM;=Yi~>5fD0=B&gxk&}9b@3p4pFDeyadg>}69ZD}+bkKg{M$$|gA
zDsDuO#7JbTm%FVpl-T<P|7PPxg1T3X>KDJllXE}`lQl<n<1EJAwTA6(n~IfCx`TWE
z%<EmXBFw}1l!w!}6SMlpvGnTd*c$7y8iwVEHGk@ue%o#ys1B9jiYVj!oGVX=?-gMd
zMUId*LqWvl;9n5y@LK65Vr$`!)+!O!!>I2K&q=IxlOvKjsc6fXez;Ehy#*1qJU|U3
z!O3Za)KUwjC*8j3=u(R*va^@*a~ljd`#lo2!yj2bF|1pAFQL1?^Ey2cq?08wWJ)Oz
z6+{Bc_(khb)Y%h4@k9DR0Rh{+jTX9h+f+g?%2P-xwD4)~J{s$)2If*yQEyuUI63a?
zk4A19YseS?E%q^bOBAzL@izv}+l8O;>pya`>Mafp+tAPGo6<u1<hh1Ao&H!TaP2!a
ztp{JVS!3^OxzctbKE@ut#i8j}v?9~OGPy$0@u=EYC&S-7i=3*{sASrAT-^cP!kmoN
z!3wOVu61oqMPjud(gSGBr?2@+kxo;;2?=v^Y{SxEn0F_$qGz)3P1#N(RuBm<4B=2b
z|IaI=eOytq7x$`ht<p(-O6}|Y`z+N$;-pfzj0jN1X|8ydzE&P<hRD*x<E@*@*W%=N
zIK0;*^y|~+G3n=w2hDd8{Y7(uPuJ<Dy}d=qx2@-G5rlFi6~N007SlYZy$t%-?Zk%y
zx&5Cc#H`)gT-KH(-<g8u6tgJH;IqS6)|fPryORO$-eef^8+X4@eK9vTRA+Xp_<(Vx
zibh7llq00au=l7IdLeOdq|*ZO9DB`Q+dA<Vs;M&>5_9C5%S4=^d3PbypI(2q=wQIF
zF1xL+aYW$T_phvfHQ7g%{Ze8DpVueF=m3a3Ge7;?Fu{$KH`8+_eec~tr{#Y;O!(@3
zO^HAhC89DI!c0@Pa~2!RiN~=Vl!KxypX~2gudX*kb9?S*V(~4aCv<DG6Gt-k%Y?Lb
zv2}DrTpG6BRD*!O%!PoY7PINQ4_fAF+|<)AvzHLGO+IGj%W-pKyc_<j%s-ofC|7U$
z>kMA05i#$=*(W2`j3-0M0-Wa^8EYmAOEi!sf9|_84=wqUjBNfsL(V^Q_b;28+lQMw
zQP9Uu`QpE3G3ZcHtb*|TIN9V^9*sqN`bdDG8;`QS$<Fwax=ihdET4QcO>LQ7`(Z<_
znXhfzAJVksmhFb;IF@gk>Hl*z1l47=r&}di3Zf1w|3@J4KX&*(Kk~j**)c&R%Sd>J
z8u$Oc0%Zf`2ojOT{_9o$^EUtWN28Eu$!xr<HH`mXw|s`0hE(jzDSNE&|7W1@7OaK}
z|LcqX^V^|rNJOMgID7o}C5?%-M;jJn3d<Kd2&|Qb-y!4-OH8VxY2WWgTZ^sv%+co;
zC-6#Ubu280x=-NCsPnhyJLWcz{oikHDfm1>;Y7OHb<B`vup?(}8RWg|cd;`v8)CkY
z|E;iEzK2iK@MBb4-Zd^i=zG@;2#&p4sP}+?sN*2D`r^QT$L??Wk?*{QH(({gUs2SL
zsH=aq4=qU)TWII8EL=4l|1Y|}0xGU8={iUtc!ImTyIX?0ySux4fCP7U2@tGt_u%gC
z?%ufno%g<(^)fU6THUPMy|{hvty5K}YS*rF#(RlKy4R_^NWqW~tihk_=NEgp@c&D0
zzZL}vXSD|6by}Iu`3Tm6g9UYz>JBFH6kUYAAACC7iEtzhY8kh`nJ0}LRM3@7w!azm
z70<bXX+-Asla3MxnS?FK&3{X$d?NWnRnEi+2fZXIN~0y56Oskc&m~*WnIJ83N+X(%
zP>v`{qMZ+0kJBw9FH4Vxt;c)kQSL^kR#<c!>9NS8=yhNbWx)+dCTN-2tTVqi<zb3l
z=P<ero{8rSe`xH9oiYh@(g%eVm7j8^$gC+BKmGAg&T8OMuC`HYYY)n9nY8;Rfxph;
zTz-hTiQ~XsiA38MQku=^UpG;$yacrf9B6HItx`OZ_?HuJI6JRL>Idf)Or}d2N{8k5
znR%IAwFe`A_2S{ub(u)<>DbBLvgd4(jx%jtx!!~d<}A<~l3%`}EaBal3>G4!MS+PC
zjHVDQkc!s(0{c((5d?pQS-IIy)wqAj`i2MlmZ<&(BsEuXcg-f`b8aM>8m^{!i7(QQ
z3;l+{Izsva;C1ITtE7R7VuM^SW5;O{8Tc+wm(icPjVmGs(uliRD3;QOJF8Et3wKze
z`8Bm2M@aH;R+U@Q8oWA6gG?imK|kl3r{PHL`hoTH<mGcS0cbM{roD7zlvx+<Q{WN`
z5SXNg<D|}chR9=xWt07S{W+RH$8qs2wi5>=RD6&vCgV3*XR!VuRdtdLX<4m<k79mS
zbsFK2;)LFi+I|=gE9YzLIkgIL*vW@ky~p^{Iki6rQhX$}gLVw#J7|<XKgb&0M|^9q
z-*+rIGv-mqpevtWGI5QMa97j54_t6YS>%kYiGJ5!<B`^$;q!r#3FaF1cdim<lK=4!
zi`fVk4A1+7&&V6cPlSDZ(p)IaN8XLI45^tfGLk|s!E|cauzV(vf|#Zw^MO*=s8L6v
z)?#v!n0`P7YvckT^@k78kp(>=y&kyizuo3v2gKsPb{Do+pAx`4m`ygk&#l+)<iXKg
z$beIOdY!Wpt==9-es0C6M{5VGnd9P~UkI8EegwygO<a}O7R7YfJg2JhsC^D(Jp~T!
z(ETAgXB=?d@m7~~4+r8up6PEL*^v0P>wbmAv3GtmvV-^Mi3A{7IA_;wNdGfx{-4L5
zjK8O=i!Fzv|8a>wm2ou~<U&`?=r~dSInV#EV}E?<-@4H1p27Xcef^&|7rdj8S5umQ
zp%}!96J#Kqm{}l=<V^W}dpR^P@`qE2*IexX^Sla*%KJnl&$bBE7|3P_|CbiT!7~uo
zuUpyk1s5o?ra|xMR1~meLvYl%T7{?13!5Zx807@#BHO0$Q(s*eS}E1#cfj)J%ah)}
zZ^_ic0zFhJ>Zs|<H86F4o=x1K(_!!^orua}@HF40Nn6peRXK`)E=f~eF5m3uQ01f$
z8F$~~_>-W~K@P$Vf};IKN<t=O{58NCi`hE|)GAq*9-SE>cvVSa#c0DLq{@@gwaEVQ
z%06m#^=WE;U~r4MxEzp_OA>saQLagew%O+T<A7B6t%{kku*3Ln7^pF?_A`;GHuF}`
zy-~dO^()a!J_s3TZ-(O<AL2b3z?NPXAI4_RXXN0OTC-|GjD^QJIH_-7a=x=XLdFWg
zN_4BxSWK6(OQ6Cep6f__KTUJ|-*c%m5BNg|7p)3eq~ofcm|IJe!xv(jH7&B<e#wZ)
zc@2;0Wh$K+n=uL+{`iyyOsqIKGVw1NsCZOKWEK0v&!u~kyk`&vf=1>~K|!J;CUx*w
z?D9d;IOJp^WPLBKG^RK><Dk{C6!FdRU5ycb+L#Skeit*Ff$3>w*_+Vm<Mg~p0qC_Y
z%4?Gsl0PUn7Xh7yn@{6-EJGrA%~EDm=Lh@Pv`p;J-eK`@U6>UmG9~o;uLke_Di36~
z9l~_r#I5$}rsq;oAS1b4gA=P}M&hZ4nr5G>4hmG`eBAJ&!$n<$hPk%>?GjFIwom>T
zc0yvv>*9r>A5dJ)#fm}WvdN7Jw*q)2VK_S%OG=8>6>}*<>s6u5&Z=po`E?b?R&lC*
z6Ru($YD1fHvJyTfHOA4>F`G`)WK1M|YRN>q8mYl}8i5u#78>c;+6xm%FiPq;Y#j9i
zmw2;N^3%jZW<SHaL(NQ5cLLtm2xFLUQ)u~2aRx+gfeAehUoZ(R53xKMW2#au^Dh%k
z?@L<@-?@D<hqq}kIdvVx;ur6e#x`+|PtBxPP*A8ooGxR&>auukMx+*)_*)OgV%R7d
zKLKz<JRpP5Ty^gHpALXHkK%l^&HUd2CVAL6IEHiE-ZP(v!7b6YGV5=|&2lKMy*@p~
zu^3j8l2dqcTs0!G2C1rA)t%zF4w238fpR*)q;A<tQhU`GDEKoP2`UMEpCCt+e~%+J
zY_@pqq3n6xF7f=AzNfwL9g#1pw~Ib7_NOhbHh>rGqa`j!=Re(ozbvV}vrz%y$tre6
zg>{gMQWw-p1ibO#GtC)Fs$)QvWCM^RPvsR9^pbg94I5vTQtYJZMSgoKiSn~l#}a~G
zd9@Ao26_`E@GBsroF}}{44bS6?>avG*fem6H8J^H#qv#V)PK=8TUfXb_RHZVZt1YO
z+ssNiXDC$%MaCugmZeAPu$I}fSdUm>7|04p7Og07@$YlHFAzRh?}}!dtfl(*nOj)I
z8U@4d0vTv8%+q+d``y7iii<UV<ejz@!m)ysOw$I3N~3Na7>_muuX-r2UJS78+@N9O
zXb=~%_)a+=0buP88w4f%rR6<OKU<M;E<^Fr#U`2N5+?jpo^TpC_QXWEa>boN>*OaY
z54x{CW1WW0^s2kQNmKDD;L9H(o5P<zU$m11f%hvnnrCTNA6nf{tdTg5%`l~z(7aFk
zThpjCR8sTKiB#l^r9}PC*}ki5>Cm8=6h<^#Zthf+x4R8b7G^0;DfK){->R*9`8O|m
z(II5><5_)Zd%xjhNA#=iyeskKK3N_^HbMx5^8GW$c18eGsV^e##X5~i1Bz~+@Wt=i
zD<yKV>h7)b2v>`ehQK|kwj?KEhozuJqYscF?Yf$`>6=8vLtC0`OZ69w9|$+ydkG=H
zrHxjg4F@HMne>tbn}GG%i$1IkTxV5uc+r4-V(Dx>C{GiB%1i|{{g>Y=$CKVgcA6$h
zvG&2o`+$c^Tn0%o-(}Tz0SnH*p5zz_nMZ`P_JlHVAdK?6II*F~JtPy}3j6W;ud573
z0oH6th>kz?bSi`s)`d(UJdRp548H3DyR6eFO`m9%U3_8A9SBPrSGD;j)$qAbv}O9x
zS_mMCt5O2mV+iL`cm`Z(`4(peI@@1?+19aCL{SSuS2#h?5czd)Gdp!iHfcDQ?>)1}
z9Ax&f_1u+<>by3JS;lm^xKTf#J1tn~aH{-@kojcbXqA1X+LHFCoEpH+NAG$(F0DkU
z^3f_mnNs4qZ9nU}t>!ilVd}$ts3o4dii4nK&R2!uHfKtn)#=bK<i$!;xHUZ=gx-wu
zz;@_q=4nzYthst6wrkOG3HJi>#iyS$PR6^^d>{+s^Kib!OQg?~$=Htl=g0|iP%s+m
z1|29=cc`Mjt+%_@*jSp!9_pP@<S~qa7R#NZSi|FtY@?Mz^wuUq6FaU-T_diNc+Cz1
z89!o)D%}yH)f^qfl6I0zb9sd_ZlG3;9L<ihL0djjlpRNhJ~Q}ecNCzrHu4+bMn%Ta
z(Jxau+dAfc#(WSvi!dM)0|Ow_8b3$JCm-z2fPkDHF?y2=6E`*QG(BXkq$*Eg5`3Iv
z?u7Mx1E8WtPw&a{QZtROcT$c}LiZM!Z+yEy%nR~ruK%L@O*&`Cy|`-7&NT<;8>oiN
zwi5LoCq@gx@X0AXA1CTj*+Y$-r`odtp#jkU@q<nx<&^+OVPtMI!%D+dj=8+eT|WJU
zt3f2arn`X_Eq-0IdT`|CsQo$ndz^dIr6!B$p1%IUCfo8=<=mgD3`%3^3!0~2HPZLR
zQ6}am?+>?^f~cq|lUB9vupD#&Jt~$~A>niLg#77kiiRBS6^99b4Rc2I(Uau1Y;k%6
z6N*^<WGQGKXB!r{l83iiMZ%~M)y~?)FUOLK%1%g>nj7eKM<N%D{VB0tSYn*&wrj0W
z5c{*rz9!}EOfuEAlax{ixuEs-TkN{hYq#EBj`vI(QCu7`Gw%3Q&z9Co(t~@dD=AGo
zn0rb%9ok`V1$Llpn04DHqAqSr9O*@9Vv-3vAU@{jb)qRuP(UzKN3xF`iwlsd$H*h6
zxq>(K3DEJv6Hp#=HLfOhlLH79NXcoljivq(05!)5va-)p(4K5NL4{&X22F{eA(l9E
zwby8_J+J9@E1KMAIA_~<n;el#Y|c@4pDNEWjw2KsVB`;GAxVmGyq!|(ct@UQnTe^E
z+87~c8YHXGnI*kFRx0D!eKd;@nkiG04rn%a5_fJbWCpl{YCm50F(ATi-Q4&0Vbs$<
zK|54OwDC@F51vLckW^kDQ@{_Ct8B1xG{bY)AwE}Eh&(L{)D=QHnT=anL0-50z-f;+
zi?p-KQAJk!#itIKpI);;<8GXw=xWPG<|yqq*zRbr?|$K^u@nY9_024MMs>*#E0V0J
z>_m<>nav*`XSzfne%Of!y0TcT%7iSlJjH3)z+%uTama(F-lSdzKSGkeJ?!#0R=qM!
zV9tPpT=&QM{ng~8429-K3?R25By6s_lz4>Qa?HFx>+R%3Y#`s<)Dh%!8`NxX3TWpv
zlWLOEiu%oj8$#Of8M5@2E)k95@G7DZvE8vV5E=SMdC!WGfyL^HuLiSc*N4S`yS)*Q
zX*Q`aIJsZkjLSwod7Jq50~ei+*ZR2%>qQF?uUhoL53kY;-xP&qP_`Y`f6{$<z2%Md
zI5W)QMS4IYvg2Q>)$hiPZW9Lw>WQt<U5S*e9H-A0P2ZEYZbhO$;4k@MJ#^bQGMck4
z`NEeC(<O)EG9E)a`N9x{-`78oTzm2KN&IR;-_G9)Z}orT*$Yq5Iz+mfV4!%<iT-00
zThB(mPypDlkrK|$vwECkO~xuEutrmji}=<aUeyuM1TTg+?3k6b8*J{zvvkv~fG9e6
z;Du%FdoI!ryajFL1ANS}mA4|GHfv#iK&wwi-6&-OWxM}$T<>5OC~|O~EO(Kk?`a$=
zlO}4nK0WvKRt!Yr&#yPnDG9m2v0tW>xd>YGU>Hg)GRYHi8rUe<wRJPj*OHXFE!z)k
zdKRg^zMwS9QrRSEtm9J7znUg!-_*o1U$3ARmznpGUtnQ^kkp;PY`g@@#<-UI&Ro&e
z@t2NI@+n*<H%emg@542oHqsmy?<))s1&B1tUw6fu&KmMeo>era;NLITra$5sks|HL
zNyn?1W!@bJ5gU46A7-C2<?}a{(RT*OjLOKk^WEEGpq;WBMnf)}H6AU;&Z;fj4{2&B
znAj^d82ZP}sk~R^UP1GqaA9G`Oz-RMU%C}5D6WEW;OEUwWm`?OpvC+-MwD~OH*-3p
ztj^m=5qY;c&OOqWRbi#_tEnaKsyuI~t6UtU)AJymMn^~eoSJF{BQqJh<qC^YvkA&a
zM21K6hp_MNxlP;mW?gc-7Bj?R_QmwM9q?Xv-Vjb*H{-VquLNIh&U|*mc|$!bHfHPL
z``@=1oPtwh$+M1#zB=RDHP5?sh%Rzcewiaa*RE~D5F?Lbjd0-y9vLdsqne!A0JIrI
z1>_}Z#opBNI!Z4?Q?j7nBPVcfPv~ZEEq?W_0bCcjOvthC?Qpz9SI)qek$H>p^j)a0
zAN|#kPLpf@0N=2moBb3Rt7PI2Jb<`<%QkmIiURQB-lOv%ztHu5B_OfGFZcQRyfNPo
zTnZjqESKhge`=C?sw>_m%^xQ8M=nk}Apsd>Q<5+h=)(K}cM*ulp_m3dBOpG1*6$c;
zVW23|sO@{Sm+;nn%BC%`SHMuO3*nglJ6^CFuKNCNRJqZ3_1>|Hb+&Hc`Fv*}=CIf%
zWN(E{cqkXus)%bFM)-J+sBb9XZ7h*!dTgfcaZ>?!i*!3U$022GtQ$mX+B49Owp*Ft
zFtlQ~9eS29=fZV&46$<`g}rz?2lKm>S5W!WhEvxW91LNu+_q;XQ>8JE8iIAAwj#gW
z?V8H(yw+@)p9>~g%5;j8gk@ib#je}2o}{#+K6l_q4%6kPpo$bgT2>0y@j^z&vs(|P
z5%TcEf&39^E;vssp4#>r=Gj~TmbnIhe3K-{wvhFgk7i%$xMWLPe^$M@!Z7;XOi7Nt
zyE?XI6I6g24b@!&i(`m6e2P72{(Q8;0RZp@ol4-(a|ze%27TA;hIG(Uun9!bXe?iT
zBr&ZO->X-_V{=dNv}mo5V{jFrXd^=4Ti?#peq21U>sq+*!-oADOpn2J^?6P6L5Y&R
zyi4!driAk|L5cRI_`y^9KqCW^k2`7zzz;czOn#shhV9i(o_)TwwOi}C-~nr|z%C*+
zreF+I_hkteE*}WIfqC@#_<)pR63Z(zwtR7V_!=&p1;nPv$&lxL!98^%j2?2Futw5=
zwSTb!OyW@bLGSXT00T7%oZzo<>`#Sut<}i018%EF=JdX_NmKrpsx(2e623Hdpi4Mr
zV`2rWGngKMsxa-Zcb9Qqx=$1!V84CL5@AYto%ra-Fi)|HD4e!_-LE<oHBykUEOBHt
zO6AyxLUq`J*uq-6dUuSeyRX-Z;N}ejfBtl*)UK7*>6U4&kKSYk80(dpO6Zu>kmmQ-
zQfafwag-`P_J0}NjuWNyOyQ3H+Nb*9zNpD%_X;0+m-_+6g&aHnM?2!O9e*6P&X)XH
zCe$usf`d|jEgm)mQ3fcs#j8UQQT21QuqK&)?9szxA<#P9D_F1b(!)0QnWk9Ycr1bh
zYt#85na?&n@6qFPx%>&P>x=$Vkd7$hjXiYz{9U|TD?w)ErEVbj3{UZ};N4xCuG!7#
zMY-wm+F!q}Iv4d#9**$tPF&0-(i6{HhY=7eoBp+Ai<N{%XRcz+8sM*)db-pKT?IHY
zkw5zy6?8ahvXHO0FSKbm04Xx;y0QG>eSd-PH|>OZ-AzN~Hi+_meKeqv_txmcJ9Jjx
zrG{bARSV239Lh!nCo84K6f}&^7p!)NXULbe8{Zfvs}cS$oD9HRd#2_?@i8)DrAwJx
z)qyL%0mq5?#)0*4#|}xRgk=o^Jt1t5Tt~!UX<v%jaM&DxG4VekW6-hx1ZdOL`hm3l
zC<ze*2wKe`al@bcC@p6=FREgb>_^oFgTcYa=+6OeoAa*D8HHbdmNb4|bJs^gg)sBo
zV-@0Zx?vElYvQTlZtw8?%gdr_yo}0~`5l#}<&TSxb8@yHS*->gnH~!852{reBSt0{
zaRzTP%bNZgABf>x?5O_2svCi2Ce}~AqDUs8QAWR>zhtI10?z{G;$H&`mP0XuNrAOE
zXGErvDoUNTC7_|bS7S*9WP=KDV+NPaM&v%#F}(wgiT_d%ajAd@tEsR{BAZghOa$s;
zl|@5Gx&E#q42V~u$^xpd#(jH@24PW|=E*TBg!^Ne)Q1|KR5dS%0|AIIxJz1u{U3*W
zdCFyjBWclpnjAfq&4h4mt>s?}r<$j_DJJ`G*4AF0t^d9h6iw;f%F6l8i+tdnmKeH`
zsgF;GN>{V=dfr+wSav~4f&vXLOL1asv)M|cDT`tE#`)n_>!u6f6&;Poh_B}~)==cj
z{Sw<iw6u&bFw&aRhF_Py8c8xeJhrMiuh|r<eboogsC)qBlJ%IGW&>Vk1UfWiHlVrN
zp|nb!IP*&!N(CT2SAG$!caSDFP5<l7C69Gc29FSVNBr66_h!D4*EteSCI10St_FVP
zUwv6zj~e!mrZFBu@F8<kb!P`dz%Z<=FXCUEkd=1{F4!hF%v<N{CA@#ip`zMyeoD;i
z+|Dz+bYg}7w^3~sF-sD>Irl*(EyAO1L;&^FIPzwY#il9M89^Buk)5|&G_5<OnqlV?
z3gxn;WctoZ#f=??SN}S0s$H}oieb6;^(*$E2yFmS_w%X$cYaait?r&pi^s4jCH(Yu
zmBcb^wo>}qFWH@`Nv_6qQ=nO!I+?ohxde&=Ved4{8AVM1Vk99W&AP{8GV*4GKT8$1
zgp%8FgEB$OY4k|G_9PZ&x=(R%N;Ln9J_?tYoX6lW#%(tvq|tHs^VeG+9;6}p#t%+%
zVu-l$8J_}IYlwYqKrA4eZYnE+Mlhb%^N?$tZ$$*U#RXV$ri}%^p{6xFXydO)7}ihq
zUVEj-9o*{IqhA%TtYz)Cx!@Wz)4o<Hp<E2I;rB~l+*2x(m!Jv~2nCu~7?mFsy(Th#
z(4u$5b;9-c%*=H@H0KsgW#A&}=c3dOgh<|$FwoA9;S~Gw^CIR@x2^F7_fQvQ+xkCH
z)@BYcm%%D@`d-&oHDV)l<Ds47Wjo(*)c&>yrM;4505f<FBU@Fv#Qx&~{PJ>U$4at|
zq^*zpWaqEA4H*|YOR;S7%ksuI6sgqqtY?xY3SK6OlYOTYez*ymB}LRcC(DTmnur$G
zDG9z8E0Os=^WDs%;~-iPDIza%F>E?0Ed1KYV5U8UmH411{DIYo#bLKYAof-0kvQbK
z60u*NL%;{hSR(s%5pV8Fg%iY)nQVlAH1<<Yn6@viJ<E$^_WW^X<04!A8_w+}EA3O8
zY@Z95K6%gb>Aqcs3UD?$=i8{ODNRRlQH-`eb45Yq7dX=7tePGts)OZIZP*5n!%|Bp
zXJJGlCySbyKP6(|Y)DPZNuMsTYT6^MD3afif3~I!#(25n6i^>@z*8A*t=l3Kg-<O!
z5~F&jg1w;gbTSg@_ZR#mt05LeKp(EBd(<g2Z}wImSZ%iKi^b30VAB*7g-iS7hR>K_
zc2<t$vQ~1@ytCwC<8t#kFl-7|0S?pcIJq9S?cR_JAbO$UgeR;#9HG&u>ZYb;zZUjX
z<tp4?Lnv}Tv{9^ak|PvR92S=0-D7*0lZIaK<J|V@eNS>mj0x}F!}&W$NZP2L$8SSi
z+RX1!n}gLrmE&Lr!ooi*yE>l*vzK(AzT}>$dn-DQ%f+XzW4?n!d4|7=l|`GCvV6O3
zKlD+)8Bi=uM0n$+2azX?E45<o=qi%M!NFV1JgIbev4b!Bq5!JPK~i%&?Nz@wq7_*#
zMVL3hPVbertWwm0Rij>bn8<gThVC7#l1Wrlm507FFiTL^bOVL;o7D80)xizT)XTsn
znC^n*o9HMe?Udqlj>t`v?5^y@GU3ygj<Y-jN8}`=G*<Q7s_y<snJ;bG{9e_cHa!03
zBoU9wj4D>XQ37Hrd$*IW2k$Q{L*$2HUG!Ac;<t01tC)ja3(;A*g9|G5an6Q8U{N@K
zLHXyN?pVMG2rdELxR&bb#O&QOh@mBv5h)G1Z;PtZEZd?V2he>ihHichRFUB8tkhHt
z$5B~INK6<Wa0(ixw5Dt+yqxYIjGIrwsUpLbUocnXP&xv)SUA9yLjR<i(h|LxUXgfJ
zDF4l3>PC@6rP(xXYIEv0`K#6K$ar>2PFQEZOdA+_nKfPKEQdjt0awcC9fXdGb*`e)
zY>`1I1A?<XOTM;TR9`X86$XI|a=aW{9UIfHXcTu%+x`nmO(cws&tS`@V=sS}nA
za!J)JF50pL0$>zjN?BN_rkj%W2yys95rPxc3Q#4h1j>s7b+{&yjrF*srP^A3vgk{H
z=pANSeKSFLw_DT+JQjxuEBYwl^l1fU-D+WC{Cm+`AI&pq6ovl0`|6iiY^5&=<6U)r
zqj?&W<5)hW6|If}y{FYo(0IpW&)>aDb#m39j?S)zj;giv91VRR$x7HbbwgMn-a-I!
zMH$^hN+Fb<$}GM-X9~l;gMSn|1b1M}M-XLX>P8nBQ7uFTy;vAnIMx=vE;NDm71mUw
z4kRxe;L09Oa<pD*p+G3dPsXKdNy<uy_H`2`*Pa#|d)oU|Pi^Xl7s+%Iz`A%XSp|5J
zip@t1VKMAIQP(E!{ebE>wqH#OgY)VdpXjj=J8QXYKZuQYoM7Pe5%S(n1UU9~692G1
z8oL8f=W=QN%<oktFN-$Upz)zNaVUY|IuS$#BDLddRO7#k{}cx`=8`B=smo2pMuB6(
ztldZDC6id`ZDDI3OJ|o6^9kdpQ4O+3Wl=HrBxPu&q~ySd=PSjha(WBa$5PQZcaP9d
zT0eg-hP1@5Q0(yG<&9@OXD1de6=rZKTz^P#X1N4{8k82KXV_Ky7@zR@4Dxf|P23Yx
z*=M3fA?VlG{U12H=iNnCYI|mxbHgOxZVnHl5|)CF+>E~_fyR(lK|h$5BQ7No=+!eV
ziB?l?dtF-h-IlSGSPacFO*vNy(PSZQ_gEMr0a{Wu1^OtjA;Ahaf4ID(NXTIeWqUVB
ztNg4oN~36qT}3ht7Gt!5IcQP0zxn<Mt^g`bR_8o_N4RF-cJc2bmlhg12*8o*kg7<J
zCcJa`;HhWV(>!^XQ;{#3mWm<U{RDtHAElOnG`ea#)YaQ2w`e~4IV@+^62YcGHm2K1
z{DgVlV1eOgeLXq7W+f$3Eoj5`ir?Y&WhV;fvhKY0Vw1Nxb*EI7r^mbAh=1~a=6Xg|
z>-3i#-}Uj+`>jIX4~wTeJzt?OiZfBZ=!wRQmxC1TbO@C?JZp~rqG2nG{L}uQ^<mPF
z%>LUvrMqS4=QePn>+-9_IC}I+Hg?;GkD|#VZ?aJ!G&OnFkJnUP8uh$9*3GwaoTZ4*
zm#Xx*V*JJV8{o~Ws*0=QEf2_E5T3{sBrV6r{&WS(X$kmzg>~P?@I!-^0nK6L5;&L?
z(;X`U<{b2E@_%`nM0scR-Tbgf`$6uTzIOYLIa05rL@X;TwCx1ZMi`|#tHZ!;c(Twz
z!;O4Yu5&F2lhtq$;H}Wp$HHDJ8F>IJM<c15YnpURafimN{p#)3e8aFRrA~c^k|Cg}
zot%}dlh}02#fS7Jp#F%T;q~PW@9S1!r~4_Wy%+P+FzNX^rwU<eYA3FujkD8?#}rDA
z5ErMRr1cP{bEzdAP?7^1l<o$Pc{fYXrAuAqP!-fAa^)h!dI;tB*7*W|OcL2al4W2V
z!Tns}^x+`w9(D@<<iJv8dR56SfelQOh7XW`a9}P(v{!M4*dv$5WtWx?tZ|9v=VOr}
zAH`)jf1R#Rrt}kX)9_6gAiI~!s@B>W;~~zm*C77U`6{}#V4)&DV#ae6L{~ZH-QzbH
zT#oG%L~)UW^!a%ZM&ou{1rnurLXmWy0tA$rz(xv>TZt4%jcc)trThVu5{u@nFOW8Q
zPugz}%IXEeXQdfZ@|53S+EW^6W;fodFj%P{?@h9GV8cj-1v1~Av4HuPMHt+sI@x0H
zq0scRt6{+=13tmTfw)^rgGQ=|Kx-5H#~JG)P|*Oj)W+p#Gfrin_ju2r8Qyk1EJs+W
zS7UF&T>-=N@uR&@;=T~zH;nJQ9S6^03qtQf&D)w#5y9b!u`u#cwYHpxnibF8p8_vT
z-gu<3a~LNV=#NTHW~w|FA5}vEo?AuIay=+_cqcwO&oA0kd^^j-qKZjA!v%zT-~d_h
z)ht00L#Yi7uk0)3QVf+LX|a#D+ll<8jPW3{#s~1er_tfajK$Xc${u?@(=`x~z|UuS
znY>a_4Ytd2S?CRiN?&GLWPlDY8R#`>g4X9TZaIJN`oO$Ir-KMF`apaPs6WohDnDWx
z;F{3G^M**>s~C~=g?6Neep~k?WIiyA%~O4JtS~b3nW*osYv^1|l0F08>{)?W30Fso
z?0?h|Klp#8Rs}EcM~R0}VH!gD<h6SgELK-)WddIlOKmSr7L|%oUh}sP)>6u{A1t|a
zc@%Q^5$raV!muV$7JPk?JcKwev6)1j>N5OJfrd2tn+<H;2T!|6UkiodWW)se67^ii
z-avdi@f;y=Y;SK+DqU@~ZA-%0*etAQBBdem-CC<d2nq>j;c<sQksi;{m<UFlh!M)c
zga36ysXH>S`D!{C^XM3RXJQR}JH_eHLy>{+coa%Rh{%|rAPF-$MA_X*@BwVcXP4|-
z7;sn9noBz>_ZbS;&a+|G>v^_g!zZ?o9_2G7=J+?J{?D8C0!PmqKL`z*1-Ytd3BoY0
zZ|_@(lXXM)d3e1LUlFesB2?$M-KSdEjCqlEqdoRp)`fx=a|c5ErL#JX#F4&GT^U{9
z(cvBpu^2qJFut-;-q)Cge)@9o4){egTkvmS)?;`<>%24jPZs0f;X60^1pJ*|LGaO%
zily*Lwn+y!{JgP3+yc@vtTZSuGa&hhhiB5`6x#I=kCr`H-M}9gy`~q4u+c60%RZ8#
z3&iC8h!4(10yPW@iHAWAYGiMjiPgCMW~J=dOl6=dO|A)t>1L{oyJx3pyJiaOX2n&R
zX|#RZoav<k1CSkvQZ3>OUC{Nh41!FMm6pYH3*dj!5oT1i_arHguKiVvy`CZP3&c%i
zKm9e}&b`NoZ74bMUf$VUuUz%Jd~=2GCftc{t&I4$0=TWT5s_>gxrn0K508HE9v#=S
zb85Qr10(|-kT&h5u;gWd$FUE@NCp`c3hnPS7+){aO+Sy}JrL^&dUa>uoI(u2d{<~I
zon6{5HGfj8#q#!qgx*OBdg|DEwJL&mK8?@3`J9bc-WG)p3qrc<lB?bJYTNX^3f&XJ
z{%|6{rTUBBe?(gyXLI4sph@{x=0fG-VO$6&ePYOX^<6i!LhtZMI<&YYBL0t63pdt!
z5R1~DBIoljF*!^F+_Q&Paxml%0{(z@tdQJ<Pp?reMg0%NA`g}Mzm)De`X5>rLhQcA
z84$YOLw9o*VD^JqzxsXB457$XZbWfm%^Lc+Lf{XzbqNC3z6NZA?Z|Xa(&u)7mTJ!(
z*W0-7cZ~JnL=x-8#tjDw<J-(1I71z^1rBHbklA{kV_GWFg>}=Hx6N3F-c1>9DZ)ua
zDuAXgnj(fUY6lk?j9$iuZ2T%};=#|Evt;-51Fu|egB$wg0GD3W0|t0O*5}rB-(i8i
zxYL7mss}GR7A~x-L(L5B`279_RX+nfqBIOAgwSt|&Jk{AAGDK2&yBR}oSE;}y7|~I
zyVhdlaxh-!pt5J1n++A#f7TDaegBHGOr`c}+-U8S*CStT?NO$#S^4x4Z99fP<i+vT
zCeWDCun(7S*R6w?_DY}P-+)5rrvVfX?DW@)F9wY7!%!*M-{uWjI#y104b_EP;V_J=
zh~wo883J0&J*qUK5N?A<t?rz%2HYE-IC4TT;Z3?ocTW3*-sm$Njs<o<gMoh^V;+GK
zYLMf=4#EWfB91*DN5=|miu`71_=X?5>h|qpAUP5&>A-_w#sK7XyuT67p2q|()^6%v
zT?GrLK{NAvQvnFZ&Em#-(!$&K1!I~SeTI0Chuj-9Gn-PHiQ<}o!wxOE(>Ej+pW4FK
zcUBcHe@O<fp@9PADxYAL`2TQ~AR6xn2>+CO%GAeR-@Z>lXAAHDB{)HfPrslZB;eoY
zp|2lGHg<jqrH1+<E3c3H*@%ypg3_7&eMxK4>}j%Q(}4la=P{)N{klw9dOMeJRZK|&
z_26xDXra7~xXc@v+S@&8^zQ>)%Bdp`UdLENY@n8n|G?$`$Hx{By|zj!=JXFv&Yzqt
zkey({A$<DRS^&Sz<{jv)&Efz0x4-?=KmT?K5^}@)RKoqwTX6$N{2ZY>J~K5%I?9>i
z3GI)L(Ir4~*i<^tmf^(obVOA;gFBmme2Ny_QUv@8;Jx>h`{aKGFPPK&y%Mx&N`LS=
zdLcyaMhhhy>nt`2M;)v10oxw-lE@DSbr1&0hzz2YMldd-{Y%pSD0F3zGtQ^?3~Tvr
zXZAkn3hKKJa!=;*<ll%Thv9F^A{I&uXVY}A+HpRH+eS&ER`A_dFKz$L%{~5BrhaY!
z@gpn;uh*(;PESh=QLLlhO4k_<T>fD-w4V&Nx!0I86kq5!<*r6KgYu87L(2WyHSa`;
z&(i(whU!QC9<3$tJc6#7%>QSSsFAMB=nZf#ilW6i>ZUOJT{(cEjt?U{2|_k?rU2a0
z9`s1IV(*gZ6BTqdlfjC+jg@Z`Y&q0qDOthk=te84J*LfhdE9b^xms*_YJTQQ&f#eg
zPmW<W5#E(dKss98Y`)H~oC#|ai}5nMPwzs~qd!y*Q*^>ybAs#KQO$14J9Pcq7Jna1
zTz%L$+#x;JP-9C7#g}di{|cwY2I}A7Ayt7CF{Y<0zdj?(F5qg^F74S&In%w<s_~^h
zgjqb(Xg%wMR3-4%0==ck5mIQ9qaG}iC4sMF9Ro=}5}C~w*c{g;XlzDJY&wxOs?PQB
zeFXhGMt_rEIn#|STb=2d8C@Dj@O*VR(#Q%8g9Vjpnt_%o&D!q@q^zA=GtaE+_%w7D
zemcYSTa9QPZrKkqX`vV!{g%#Wkj!nX;4>iqkooTdr{Fp91}Dn5jaq$&1<HtU*#jNJ
ztlp`E__MU@;-_+5wW~qD%=pzN;iqb1bJu#PO0Mwv^dJ96#`&rOp5+S#s5j*MpGy26
zUm}6mI$!?!O~L(#u^s3@YLAvfHR@ctf5Cd~IwmynIgh(ON$lHn<l5|`eh;awzO1Ap
z(M9y5gK2YWY<3G-tRS<Av*-I6ZZW<OXa3xF7RR51GYTT*PmQ8~n<o0lWeEN4k=>vB
z3jBopJQ@EWr2M7&!YS`SnFc0f%OA}E|2+R+$DBN%pLcWxuoQqu=l^_=KaTzBNn#j)
zV~(-u-CX|S*H~i@-&+_4o+vS2NmpK~J1&0PUn}n-&VP-@KMe7j8a!M$W8tI&sUI9C
zV3$~5D=6}|Jx%n7oZhx`IB)WRpme@oLkc{_X7W3u=6jKD@c;k)fNs^_gP0|k@qv#0
zB|4x_9~mT^3lMW3<IVavSNl)?Ea+pVY|e>!9qSC6RL7@c{{C{xlEgfoA(8P=i>fI<
z>5}zkejY*7942l0WK?VV`0Xd^UMw<ZedjcI+~l*0mY6Gu-!vT6W!EGCkr1p=camu`
z?y4?*Y9I<36$yO=-(73D^J+M3^l)*Q60_-wc{Vs`m$!k<z~J45$uAP((lINWLMFd{
zf$<+d4%IRT)la`7oe5BuqbI}nVg?4o17~#xJXH4##OCyuL^bDPh`u)&*w|#^^4p6)
zA|6m+VQs#hz+XAjxv0@p6Mf!sP<|azgHZpa9sc~A1^7UHF5%H8fljjm+CjdIu}Ujg
z5~BrjG3&n~AW(CIpniWC@#1SI6V;j?8A6>5uuja(z=^lGZ2xDF>EiX8g2v0dkD}f*
zDbuCGl=`xDhMA8;N=7h=JS=Na{e0xQn{Sg!Jv|q>gT!QC+s5VIRR^#3*xS^FppZH>
z(r=y~)O0<lAq_G=P?7>sc}SqWLAL$j`h8pie%g=xrYeynfi|hA)1sKE0Lw!TW)<7V
z*(SV*rIzs9^Hm0#Bp=dQmD#wYtRtmE$Ae!68a0*(Lwy51R44@{C6cP;>U1Qci18Nt
zvTz>;hA@`~qilFgo{~<PjppDTIPt1oN4cqJ=;A{`?cKQDlJ9?GDj?LDgsv0~-~Xuf
z;b_@$lFTD6BM!HyWF5$%>me0|%THW#(!3M)zD@Mdu|S;fTl;dw2C>DxQtZNK4>{^1
znQHY--}*tT@1bSf%FPR1{eAzfS)}^ef=)`|0B?f|PFzHbPBz&esxW88=5F6<pSEQW
z1*m1{TkdcrPH`A!O>Tf|cJ+h7<EOD;mF`&FWW#Nt{dFL^ap&G$^w90@_c$IpC3JpP
zi8O3Y<4e1SK{3B2xbS4<9W$?MoRi4k`4wAAeQXk&I(z$3DQ^jG36<3cjYG$hMpgE9
zAK@uzQhgG2Qtq=8#7iFM1^I`e(ZB-4xes=&KvSL*5TaUb)lc8_cm+pB1U7z4a&wqY
z_X2&LEFka_5P{7jsi>&9ROdAQfi-AOr68JG!}fUy0hxfnj`bXAI3!v3IT?O*G<Jq{
zea`l58?qVk--C_N(<SMlWR#a^i8t3|xVcN|HaPUGf;V?;^ZnjjqAR;xH`X(rRuz56
zI({A&rNw3JtfSiXu!`@0_=<1-g37f@?KdOVO*h4G##<(V5fp1}EjAYDsV{Y(D&<p%
ze8lp_y(J@zcS(oI+@j+T)C*5u?!URAM0?Q~6^ZopHNaIi5zx%PtfO|nSJCW%6dKGc
zADQb=*_bq=<oIo2Ai5Nf&>WOHY4KFPzEd%16)a>GPLIOop}sk8t)U%Kfc%P*kx$Kx
zDmzvfa;DwxAomJ%2UP|Wr9}JH82H2E`B?l8U;<#WvOhark@w>VDLcFB4<m1Za9lJz
zvSeKkiS(}C1P&r1T_|{L!d{VH)1-B$9H!dMGaWBuR#eWPAPB};ce^h-QHRIvxa<`5
z-|Sw?W-cWuqB`cwMCAvk5A@sKvlidF3UYHlA01urmzJ?1>iIwpg@c+r=Ozz%Tk3U)
zPO+v(1Y=~AJ4li1gpF62V>RqToB@Ni3xyq)O9FkVt;#zqyToOhvI2=hvAYnlpO)U@
ziJ5$x5x8-z9VgCN?ZaHGR(4!Z@0jC&P}2?TFt<SPbw3@?rU}&O|KU2Calq;v6*zQX
z6~}g@)ob!UcLosB*ix0*1%NgIl$w>ed7es}>u^wa^>Nm=y+33P4dqc1n7}j<v&uhF
zkCbjUWYiANT;z}>hzUudw{Y~nqbe>oXEYp4G5YTKmokAEz7`s8KFXvtS*s+86i4s{
z63P)mLDC~JrGAVWhA2M}FdPW@I>cOeFtFw=N0C7o+a(54jMO=on57)s(Db@C%rHf=
z!t2nR?MJiPZhNY`GP8YgVpOBfe8G{x!(;36^o$H+K;htqutVs{O1skDNV<)BN$P{@
zSnw@D+|_=@e0fP<lt#Pp_YAmXBc0U0Sch@dCB0L?f(Oth$|gN4#Q;D8uo|}MV2bx}
zz9T#L>td5nvh`Y}NL(B;83~I}6afP}DoL`QOR=z+7!H#QyWNUm?UVU~q{8>OpW`}C
z1w~sWg$3TRX0x6K?E}f)qj*F9%a{9mg_JEQF0Z?#h^Q!bxzTkPq0bAnVr!DxZ)i5J
z2T6Ay9yLRnePS`9xoza$5&C&$4e-{KKVjY2_P!9$90u2qT9nzmIlwx#h~`hVm1kN-
z#0N9se<8SHl7(aM#<7<bd_5j$F=-c1ToeEX#EB$|`Uv}ed$pVAcPJP6Ry5-s%VF;a
z{YcO8RPV{Zx{p*RS9=(ohpQjY)L`O%yQRBx{0v&zKRl(i^ZL{+z)S)R)$KP5@!u~#
z|F3-xm$I|F2p|H!bt4YTFfe<O?rA=8V5;9OMI&iUrQqhER08UA=T;^wqPUSxO|WBs
zi!A5)>40T{4XMd${Ygse)rBg_+8Ek;CZV3EJCxy8L`(qPZuJqv6Fi2a-J5rfwf9{}
zMCQC+`P-!O!xV!lCT#VM9r{4lY%g*)F3FW0LF7FRmFR1&l_O~VHlf`gZ<sVN#b&66
zo5o}9;3$;5s*oH;%}7gWh(zj-KX1~7s>6!P%VT$DZjtUvkE@R?<w_}>TE}=nRI(Nu
zP?u$R5{I7Ga;4y7SF&9Umq(4rs+Z1flN9NY$KI7<nOgZ%7n|d(OLqL?6@ARHUF)2D
z{mDxGuwWVwuUdJ#Z*IOmA1h+G?(rlF8a=gq_yBcAIj}m0(RlW)(!q~SR)n*B%Io~w
z=#BAA6MXtpiLr&$!0@lhBc}>FHGKcWUYCJ4oNCw4O!ieCgm3jMvziqXX4dT=;J@rx
zHMUoXlHzEl8QTQ5@^a34f$vx5dq)I%v;{aMZ8pBUHKOW`0rNH`@#99cdkaW1(V&fk
zF&S{Yc3^vK2eq*5f4-b~H;gCEkJk^SK6`)n5DL2{@$?OOAL9BU```*6SHpwzLcCXe
zMhEu$WdA_={_0rkcyvxH(Y1~2F@V>o?61C||H=zJsr^q0a8X?N*nqs&vl+6MpKAPU
zLPzeO<ynTOn)Z|-wt_b<4<c^qi&*hPn!7BE>UI`;rrbMKK?F{>Wl;A&x>%|10)>n^
zip%6r)h}sKasx4wHBVv9R!&`+8Jevb+`3kcAuL=g&pi*ou2n1Jr|K^=rxfw@IJAXf
z#~W#YCG~aw?j^g4bQ_N-UjM*KX^DV>aIcsfP}q>jv>%bw(=+FSXo_CPR%dT8w>GQ*
z>bjVjH%hOAi^tROsf$bi@YJ8OF36>_F9xcbs+umhX?n<H@)w-0wPLaptAV!Gr9dH%
z=BV);#Y(?D-*SgSH5Dq_Jqu`~K@P5Nx-aU0**8-NLJ62~|H->DmwBz&?5=9jmd@ZP
zU%@EO#9gBCbwa<4to=<a^R0J*VXO6>D(jh_e9~?MHTG*zJHBv-nD?DNAI2BHNVh4W
z<nRQ~x<^#l*wRX-c7%N=ru{n2C?=$%#V}g;{a5&xQYdforCAFe84$ZvozzhTf8q1#
z9bq;8F9WJh-^=}txIo{!@jz3OYmKkRdPcf-InQ_rPtpU?AWObIbqX+3Ag9wA`_cQe
z=)X<5EnIMNO$oS&^PbsSv>u*Ui~wLGW2teu>V_eny|Tf6h-0w372=2~30A{!1(+kD
zv-dB6&H9TeIm@nCjlZlxUm#YpyT$XHkR`2~8W<0cVdz<5yUt1;{Sy>ED+_|<!J=c7
zmr6FfcCV(OzFmRZ(PqB=I-oIG6LI1Lve^b}{`?k~_UY?@-U<@*ZRblb8H@=)e+g9Q
z(*x;=C+4LFxiqY<g`$#DAf}Xz;xqznSbzpjsY{hXg(~fF<ze)T=QyNEC$4qQIprr0
z0y3CkVRDZ-Mpnk;A7juQU5SZqsjFVsPv8l34MtICuo!t-32jgX;IC65h3IrWpPBl&
z2|!<RP-@m2Wkl*bTU4(4edVMve?q8jK`;V6)QB(oXQ=0bXBguqxS05n=cuueb!If2
zn}#3j=egs7?MK9Z3_~x}b@KS1JY48wPnMGbe0)f-eE$=x@y`;?X>QeBdpl)staOxI
zYY-)Z>P+3I#*+;elSVJC77_ZCQzfG54e6~mZ@5&!ax}uZRM9-ORDF+WI9j`REP5Fu
zk?OdH5v+m7uFv-u)i4YIMiB<vf0xN@SZ{8fDtOq_DY@9ZlXCKS(U?0?QfA&`TPT%=
z-7H##X5Mb*^Z+D#U^acmbZE$_944Wsm&HE95eBVopycFK%G?*yy{#($o$#Vm72rDj
z35(A*0UjsoIM(bA#1nY^&1X`<l9Wp_lwk%}ogXg<5PBcdQ6pukZ;)T5gB%|hai{aE
zkFOS-eSf`krlL1@4&7#u$dPO472&(26aR{B?mJJI#TivFj3OthD7}{b@5k6LUBk)e
ztOQ{x&#K=i3D?=W(!-lh`U`C)j5aG)=O<l7vIbQBSOUoRAQn0{8EWChGU;BAQXd#o
zo3M_nA>ehHuDW~>Rdi{tLX6i{`_64$ikuH^X}TW%g9H5+V(<<e+imw{-IilTmd@MU
zFuhm!t0`DG%CshjA6rO^TM2D3%T`1W`BsyDLw;BQHsx34&|C?K4mA>X>Bcxb4IWFJ
zFo3G@)IB%$1wZ#s6`0+M(Wu?;z@s0LaT#b7BpcYaJtNkGm<gnBzRoQM4_g&@S3KFj
zmgMFPoZ87w{7+}eG;C~acnYL+blONsyYM?{$a?xg6p?8@N0j*1ZYy_GXdff7xv1ZA
zz6ph{|DZ@VH`PlZZfy-1;r08OZ&up};8t;v1m!b^K1KMdC^+|a+e!y-8~mjN0Pd*t
z>HfXOKFg-7>lfq|Kd|`@#^K~&ZD_#0oG~v`U9&7ETdY1Fqh_qW&UH~<jMf@*eIak~
zKCEaX^5!?nZ$!1HVNpvuREizAD@=1AX5fmPKl|<HtYPCe!pp(twVslr4z@2N%t5eJ
zp8X>#uy_5~Xa~$j6r7q#3wnf+J8jInIEhJ9{F@<g%?|FO5?OtX#ACc(NU;;w(vMXo
z&_!lsC<^%W{KC1C6xp0pA=W@~tM<8SD<S`yGuj*BtXzx~ByEV_)Jz)YpSPi6E9s`i
z{#(@D<5$xZNNR&q6t0rqlC|D1msg}l8>#Mu9|SsQB0rogRcWz`4!Uq?qugqkn3zc_
zY&u9t*{Oi1K;Vk$nyNnCFDp#iO9ZpWN`e|DXV}#>%7#*6Q|Wh2c26;z8`j&LkZ7-e
z7{8KvoGrG!J`%^?JXB))b3Y@0cv3^dw5@_N5cB)MZ^}RERn(8Mjq(z^`@vX1FJJPi
zQUh>SU%&AhvUYb3xwu7i?h+$JCt<p&x$0!o=|OYWA4E*BR1!c+K~SXyUMu&VEV6)u
z?onC5;|*~_@7I?2fE6P4VEZVO=Wr@$EuQMQgus7=<-cJ4F&BpB$@_Y5g-ob*03VGq
z=_x<}4W;i#)iI2RZ2D~J6!Hli6IhhAm=-W4gpx;QT?#+gCKUFQPmru3FVhWxZ%@Uh
zTyMkOCcxdmmocql;r@Ef?EA7El^7VQ@R@d)YS64b*>PA6xK$3sT~eoBY#NM9I6Q<M
zEDf(*U5cHR1o>cYeQWG3XU>nAP4ZBYR&SNAEZPk!#HQhz5Om)5alu_8n<`lVjA>Z0
z^URQ{JgdV~OUdCBVk!Ogte0xvRVblNWIU8tx>^Mz(V=>}Ws{M{N&JA5<rY2BXuRE6
zj6)m{?aBhw^%snYL#@SOhr;(H1xl!<Dmn_0HICV>W)Lb+v)>h12rYD{xJOP|G}xDz
zHe$BGlf0^k_3A;AsKT`~U|uTA+v{^7&ICq@uFY^SJgbq(B&#<a+;2+9&|v_dae$dU
zm%?lb3~(ZrBTVxAaPyzv6omAa_Q+ZT)|j7wZlbF`uR|+W3JhR(b!Q%(x<XkY1Es%x
zO6Slb%8{G(@e&H@2JRn-dJYE4FQiKUk8)_=+sJQ3Q=2=t4x&$f$)2`!3Uj?*hh*jS
zL1W*%DXQqw#ayFo=Bl`}tjd)zLI%MF^u=Y?372qCL;1r{Si48J!@MJF>yHnn9?#(q
zqV1_>CJ~^z0+!$9JFfaX?X_2rQV!I}%WBLjqmtnx6ChfkwcV@M@21$YdQcqmB32-2
z+2Ou;F=wCwe8TJFKEj?cSY3NuG}U<M;tvftW>N_`)l-{7r)}GEFKc}|E`u`b#95N2
z(zu-ZFq~@CLLC8kJi0AkA8CFa<F3D4DzRnOPHw7HGH+jV=1W;Z;dI`J@qV7{ufAw5
z73SD_DTqTMtH!>$cEPa5&R|iW>79M`fnG|(=T{~q>!rvM9<Ue=zyAJzclSF5J}SW$
z;@TT4hM!|d=cx4!G9la>&eHK(4KKJb&!zRTD;CRIgU4kRj1rQV4v*_|Q6LL@D0bv3
z$M70#z{Ke(Nr@GixZ0|HmkoP<<Rd4mzDoO{4f}iqNiJl>U-j_4+xKw1#ffv*+UR!m
zooT^-a7u4L#CLv`2Q(m#KparC+K$HB(=hX*^w5j={eQG){ax5-j`_vYu|oV_Ye<+*
zs$>`&emF?&)hzH6PJUIp#+k1Lh@%q|d5#;K0%<+xr%B8M3@1gGL}saDZP|GwkV^#1
zn~wN(a<ia6kMK(KD#e$y;a~=5WZtHsH^A06^%@Q$7^V*vh*=r4u0%$r8tFlUs0n}d
zY?B(Rm<(}jFrOq6*v24$lACrm3Q{6AQR=W`VWSb+Mkhd#Avr==YME5jV3ta69O9kf
z?sv8z=AY*msDL*tk%3=Mnq?+yPqV7mZt7uoYC~mm<4Gf(63yCRN^D!Fyk5uDp)%I1
z;EL~cW(T<+j@z<BsAn7t%{JMXiSz~93c6q@S(E5>O+)e=G^yoO2G{lO3mvj&DWvGz
z4E=v2n=N+qNjnL~9(R#<`Bha_nToqtdd;%P$jI>XM#uBEF;vqO;eB+I#wPG(`_9}5
z1yNCT2y@to!6={CtRh+|8@sF1|KiVp8XFS;mSC=<2yiC7)zLe4ey_pThx1}3)TI|o
zwy|*g<7^+JvfZOL`%bg{d^D=7`brLlhQ{JbbL72_Rpf-eE98j{@z}Lfh`~mENmZh|
zqEDm2N}bELP_H*`!ey{lSyEP0scYkRCiZ}%PXL2}UeKA-UoJJJ!mGHKAgJ#{(|E^|
zb?)i%Lo#LEY7IB`kdufy5is?Xm2rXC3@I6ruo|N01f6B{p^)}KEs<WSdu89({*KiP
z&hV@F5YAwG;KwR4m4c}PF**#2KPgGKFtiv==xr+;1RZaucYXN8GC3{fYHpco`ST=-
zFLEv1V%3gu@4d=T;|?vUvCxs6+L^lSIcCd|nbWEivLpFfGoK!Et6w9*Q{9fSFhoGq
z!FpLr-*(^t?PBYF2(G4|BCdig*SS=;F)^)G*XGvHzm_K$#7;&fnSU`aInmIeh1MTc
zR9Yk4-n&Vzx_TUp2`xK&v3uQqU;WTV;`lLQw%P)i7#MNS3xZpAP*F)thp^*FIo>Y4
zHLmVn6tyNPYuY*Cs|+<c?nb%sGEw4;jK@Yv5TtO<J?SZrg2qp_GgnK`pRP|yIDB5h
z@rF`T5-8o)!fiC+ERG_mew5cpe-CE|aWxm~qFlOV<{PMC_37f{(hSiT`TufLffl7*
z!jf~1jhsRnbD*^-3JOf1x*<9KA6s7?)aIJL-BR4$rA3Rodt0ECVx>66-Q9z0@#1bR
z?!n#NJy?L??g4^)?Cv>x_BZp*Jd?@fuQ%`Y+;-h|NUN$}%q2fEGQy0_&d{UezMt{J
zMG{@jCe^@9N0K|v>0W5w<k?#M@R#Ms+q3h7$D4U_XU8REbfX2RzLl5fg$M^U;XSiI
z^{2nBi%Dttmvt6)cKSv{c1xebd6TnBw$MX`)GDSb=HG3S<P3i4L7waKHvMqI>dt^<
zLrMIZu&7V(rPplVF^)vPj_oTo3sMKz$FEVCD1scLe&X}6RpF`r_8#kXvGYDFv#j9%
z=0HDp3i<Oq6am|f%-ZzkBhmd&nsrxQbmKIGk)cBFA42w~l!sF@7(F#)FKsC>f+Kd{
z0RL4G7#*nE+qAGt8v1t`%?r)Tr1INX{&P5|PG>QhfF2_~+m`}8;n|lFVk$KdjE-L?
z9*@$$hk2j5`$R`B>X?!WSVyaMvIzoda>(v2_HUr6b0q~Wg(qyK{j2+EazwteBXiSq
zN<YKH_p7ocj$5<^+b=Cv8su{Qlr9DA77QI}66lmD%OqlPT|^7MBolp%d;Lh1w^~G}
z)T%>0eSpQ85ZyhCcq!;t(SCvZ0VL7yn#2}Ma!=g!IuvK5AFwD<Bf(hnuR@txkB1*s
zZQr&MykJTdIHTJn@Gc(M!x>chj}7+vj3RhD-bC}4r1%dYVDA~4+ba*8xCKnPQ;#fG
z==bIfR%Us@zieqIoIk8UBzt*wdduDW(&j!`{v`A4idJOJGq-1-I0Wfk#97YaQWP0h
z`Dn6~Shn~5P46y)w2bUF4Vj(bo?T-LgE%+A6N$ou?WK*(%3)(n)S+`w#A61qkN1s>
zg7uR3o&m*7TXa2!tIrAKuBH7)R%CcOz8B>tSl5po2%y*$+O$kkMEw!E(AH{$Q+8fX
z2W`5KA2?!EB$QIj;JNY+6)+VfXMLls6a9hXzSc($a!|Mx?W$<HD*PmSl+k<R<-E*l
zH73?N;il;6V=HHLxdA@Coqu=A)z$&HiU)-|OyyI891jOhw%z1?EWN+WU0I6^iA-<D
ze(~;FQv=OrJRjBDB{nZWTqfxK7F=<CsyMH);Z|MRnh#q&2WuSD{)*Wr8Q~A4)}rz5
zP!JCzs2rXlW=g94D}Eiv+{(LFJ_pVzQT3-AFcZ84#D=_0yIQU?Gelh`$FiOfJ~U3-
zcl|ckJ$v&D7EQistQRd%DIZr77Vo4U7t*rt1snY1Sjwu)wJIMhh}4X`uzFy*MV?rS
zoaNf6sqdfzmx|iR#dWAVR{-+sK`#jydO}WPv0g}aqxmJ&`(<kW2F$#)Bb8#$j%nvX
zuaPp`J8VWw>YJ`Gpdou76c>1H_)ja5zZx7{vXNXZw`soZB3StDuFsyOy?eu1>;QM*
zQ6UsUB=Br?v6V};Dtgi68WX!Jybt1>QpU7i7v?L*?IN0Kp%)EhZ=5R498bAk+um7x
z=Db4@!9382W@l9V2y&u&NO^c^e8_dP1OOG@UtUs9EQsP`=Ht;1?DVwpWw|;&p#Mbj
z-gkKz#yNI(QgS}&IHA1LD5-R9zZh+DrH!d?vFo0J&?}NOckes5A|}sbK}i0<5_lhU
zJCXzhUB}w+vYRGbd}W+hv_>uH5_ArzOMWz)4LiNGQ3<VuD&9r8MFCb8Al}hIy^XxM
z1a+jSxveu40w?JYg$zdI)!G}<J6M~}4@Ab#IKxi~xOjC>?XtdH^Es>fuesLzs7xN*
zQzEITcQ<7el%7bGI>eX8r^OyH0;%58)IKvv89vPIA9)WhVO^%(;PM!CMMp8baq_P1
zvjcZB63@H)?#D+<{!!9gjw{Yb(eePS6YXQqrsEu0RpvW<WbCQDd+q)sqtXfU58(Y_
z@PZ!xVhq|9<wcg_@0Ye_$oYShh;M7)bF2mYaGWoF;P4Ts?n&emv4}PoNlJdfqXtmd
zcA$Z^eX_8m(AuHoy~!Jaw_@`|cQ|sG8W%a2NE~bIt4PXC=~C@scRB<NjsjFz@wv8)
z2{gWrl9~Z`x?z`vCpM#2%r)-|4Ila?sqAtl+@_$6k>@1QkVbJonAk1ua|Wm|@luMq
zGH;ChN|53hZE|5_o4PMn(nO?2+n8;*{`P?R*TvHQ@`BDph)6aWwOQbLLfn7jC8XD(
zXr@!q#_wMd3hPvzVlNo<sslceSM)6UWrst=&*<1`m_VA|yS=;8BGudJurXaN@`_J|
za!hC2lXGUR0w(F2jyvQK=ikk}s^LPsW{!$_`-^O$%xiHu;>c+fY<I{;2HENbbKm(|
z^=E51J7)54MOV2`?v#~Wk6!lGhb1t1z3pcwXRY|C#3S#=wZ-NzNT4iR9BVnHM9%6X
z%!S4>n4ctxyS<1`DIa@{-S3SEIrYo42vQ-p=Zmh+oMH!aZ!{fJF5i#MnJ%zy-ZSiY
zy4B8{vu?H7?xdTAj+|p$;k;qaEZDoH^>*X`3LZ18!%hoJhadE)EvPprC0r4{Qxxyt
zr{9|UIIsS0T{t=^iS~VaMSt^X|4*?H*HX$=&t=ncs`soiW!Qt)*au^w)~vVFAU`L#
z3zznW(NJHDQ!%A_T#tw|Bf~!PxWdUNGJ&+5{eZc4oWNf5y!4@eB@}9w>l8Drl*M_e
z#NF+E2fX7P^YenAISQBpQ0-YhweYs5<wV;i9LA(K6#-}3Fo)Fw=3<~fS*QYYeiJ`4
zr%c$-I&3VM@C=Lh$1H1Kh5qysh91sbJYXN&%Ct)tvB@kO*Zv6M&lNi^IC`L;1&)%s
zP)YitO;Lr!y02t&2uuIFL4T($>!XQ{Iz3b{Wg(^X&jQlH#y0(PD1?t36pNN^^(!N$
z5c>!9v!I?a&57u@_Cer#xeuUZ5mW^*OV3hHcJ}6t!t7mgr4TWc^{SfCP5{78g5sf`
zyJ}}2A5mi@fF-SqbCJ`V#4t(ZqYhInBAfM070q2OZ`2KPxHrf!6=<8PdqR>}@rnBU
zM|0va@*=D)dSzgP_wQt<#4g?-9tn7==vE;Q>~KDp3whaHOO+trhX3<T;UK=ZjO&NH
zDnfAJa(g<GWo5n*o640n@Af-P)b>)=9{1`s8@3Qf1aVsFTz%Y~*@kKtu|88#Z#%EW
z)*G>1gjlIVaM<a-fM^x68Y57uUq3R=uaQce-VKcNfraGQ>Ge+7jE4PDA^Ex2`WYRy
zaP5)vz1?Luj9k{^`07}60=S{;8~Z=iB!0sF->qATnzVGpMeDcQ7<7}8&qZIFGC#_m
zA!#V>^}8x$^qhALwl&qg|N1O!jNRW4HPO5rEJ!~GESsUi9!@fDL($(8>GmyR+!jxL
zS7w`ec0NlIa&p!P6p1>VQzE`@GBX@+pj@U_Of{j%5{{Kw^^%ejd<wsjSE?niD6>kv
zU#ZYu=KE+j4tRS0Qz&eyoP?@(RN5}OzrRti%r66M4Yn~^CC|FbhWKVbgxa{(F>+K%
z8yV!rzXs+&GzwXIjru4{3#SnX*@!+ACq`YUDjv7zZ=pem_OTKwC3Xq6%^e<2rsVm>
z{k*tET_(27Dy?vL{lzuDH*Q6Ap7q)*=7<~mRB0QlAmX!Yx*Ia1l64Gv;UW1z-02@A
zBzcF3SucopLP~xG5_RD`C|LqIv|f@dtV4(I<zzBTb2&94sD#Jd18g-i2C3pYUW+T4
zt`DH*;cCkf^OFgKIiD;c1F3j^3F<AU<3eT+#LbS8gOx8m{RJ&lc+bcS^6u{rV_Kd=
z>#aB$g6w#GBs|dbFZ9<c^Rk^LpMCS}8I9ROMLtfRNTc6!UR=9W_vUZxr$4M~io<DU
zf9!C}1%pyflUMJ{om$lN(B4(oGsnZoS1J>EPlUYze%gAoQw!?8gHZX}I}R+=c9UD2
zND;urMf^53)UUc-dKq`#lHfq>2G^%7`{{f|t$^r#3uz%Wx%!5Ap|fQ6P>~bK{T=IY
zRFH_OVG-PA%k9QvFU9Rf)DvFK`I5{NJEwRnuXjtRz-n+7OJ%Z}bawS!EXFRUz)7Ip
z8QraYQNNcRrLixL-ERh^<5B^Umwmx{GL&!5L;hCTISY85Zn}?@Pm*S+o;W^sw`8YZ
z8jnAuGI0ecsF`R6fra*T2xv}*<8lU^?EI=@Ae4G`fpT#P3ku*a*{Mq_2$b-D241*h
zzju(mY>TmJB7g;E?VqzYST~t<1Ht~d>mH5SX}*WIW?Vf4hi~RdIG8y3Ft7|}e4}><
zDcuaR^)hJA<-+v1;PSsG)F+_BM!Jwh__BGWF<2yhqAs$#SC?1?kiIv=|6*X{yeY-z
zCjv+<@bQd#*e+1K3^>^eX9BeI?I#auj*H#DG5DVFsno0T)IzfuXzWsORUC|<@F9}s
z+}pU$W>C1#<sCk2>LR@^+{o|Zw?f*G*~WnohsKPB_f`*l?yApJd&CJF67;Ut%-W<s
zj*Y#nXUZ#c)22QgB7Ipr0+4oCQ#O;UT$jKY6R^}b(DwK;Ci<{Y^u&GIqx>;nNWo?h
zq29^4pN~~Q_qN%d9sw=4a-mHoTVS^l4nCPjRfyj-L^7d;Ioh7ADd=u3L{Ae68OY*q
zxmN5hG3H5ll`><;5!kc_^fTPvHP3#N`aLVqRB(&2^SUy*#}jFr^kLTQgO5_#80VxN
z=Y4q>sr3zrlk4A_*!NN}6TETw#z-8!3D@%MzWeZHpB&M=DN)6#|H^^drki_@_uf<Q
z+lyJcUT$~DsI=&J;rKPFNSlPQK2mVdCGdTh>@^uBBbJ-XqJf6RPTn+ha;K6Q#>tI^
zO0g%ol_}+ZfQ$4~Sm9E!gTAaR`6h_-nJBY8>=G_=d21e4<2@K7`-}k)4fBa<Of2TW
z@uFyo>9P7hwPgZi$O03H?-*(RR?~jGa3$|MceDd%Hf9;B00qs{1<=0j?+B8H-@fzk
zUH2q&+{i8hyr=YH;?lB5_V1ZoDJZH=G_R7!u)f2{!E+CM^*nMpd&IY6oD&7NWspQr
zoQN{pfc*Oygfl9!I!N?~%7Xs#@jrk5Y!fwF1pPYH%BW6e%Damu?0L4QN}h2j-bo!I
z*={9FEYbdb=cYuf`d0j%?YARmAi0|Gv$)spYX()9n`g87X)n6~{VuqlZ8<FRMv8{B
zDZRReF?S68`0-KUjFtT%ylLj%yyta&KYtFg6=`$^_-PbP|ACr}j6OqKT8K_Y6`E#T
zL$;yb?v2napLXczYU*y&ZU!bYn5e<irF>3}!+W{xZ6*zclJ|f__#@GH*1WCf1kTts
z1IM01%%;5qSI^)x3F<w(LRgh+3muKEFY#!lLuH#xD;q!h`7X`Tx7q5Rjm-kaP#iME
zUGcnU^|q|;EIM03M)DDZ^=Ritfj*sRFZIyvg2@7-si*ullb*8)3{H0GY+DImLQlww
zQ_zHz_tydBO%>DQd7R@Ero*pSj9OkR993b0V(Wv#s2LM)P7m8KiTK%L*84QysziuH
z$A+8{2q+9R26Zs&tb1=ZFFC0#bXY7p;+k><7ut((7{%?#3#gW?3h11M-+TfjCBf_)
z8@FklK4r!ju;_;Pcv7$_;Do{0mjVfBY|M>QKW8U8;A_TINJ+qd`tQ+qd;7L<<itLC
zP6h}2^}o*NfLD=@RYQGA9qcXM%t@ebYrMm-+DD_puVRSXst7+f^uuUUZK*P`?fI_!
zxlNp^-d2T?jmXTz5>U$*VPw!Eu%dl%e<xPmdw`Y99SF%sN_wSjll%6io#;<J+byPh
z<;S1X%urwVoOCYYA}>-KkPbF7U<AV{L-HLNE-E~qot`R;Z?qI|y`U?L&|le>)SWK6
zm(?~{%&yU`-vmf9uF28WJbBkxm?6o9473se?ly=EuIF~ywtuDUG7&pCpvWJEwQ*~=
zTAcM=i)>};nDUK~ors1qAU?ra`PX`R_5i)t20M?-v7+hOaQLtgA1R>z=YD|K-McyB
z6P`0>7b33@Q&j&9H>2F+J9+z^HtkJ%`Js1wwtjsn?l)pCVA1TQ^6r5^hu^XJ?QwWm
z`m4|{6ypr%OX)GY$Yf{~rx*j*ut}h9K;RNt)|k^Q(cDmv(8vJ=r!!mM{ZXGa>2MS@
zH7ZpRhx#qtKc`9<xAT-S968MIuR|;QR-eouh`INMnIe7^#4{9cu4hw42+P)ZeWPFz
z{7l&slx&t~k<<Bvr4UVM-&))ELC0C)H%qYpCT1C4!4m0FY`YQ4Z*znse4=j7Ko8cc
zDI$0{d+a$m8=Eftl{cftD9(+Rdtj)k`5~bzeq>KMT-W4)b2P%6F7;~jWyIRM*$``y
zPW@3)y>5(Nw~1C{uj^R=5oQ{@vNTHBU<U>Pf#So^q)dUmV)7{nsz0M3&mmPrgTB(k
zT#e!p3=)0+Y@bk@&%X9;s&J5QsC5cPbE>`OH|Qi_T~9OiWz?9Mm?U}R5HbIna*Z!1
z6Si?L`d)#XW~fR*$>>WNaEifB7%wuW>o(^rv>?VcI+a6#6n&4Qkb3EfYnY03KT?vI
zf-^;A8#(D1&26;PY6;6=<>U5T2+w$K{}FwVOLuMK{ux|m=%1ZwPh0m9g=_NT+&BCO
zW)s?`hy!sA8krG)zvzFx@lIRGR~<5R|G*K;<3FGB2Zv|0=(CX7jPd6Z07pvq1a1)~
z!kY=h9Te+74z&0hHI_GrXq&&!2)1X#6N3GP7A1}3R(IsP0_KlXUNMJvP0YQ8j<wtO
zxgd!OVL><SkD!j|0Uf>dEDT+Ovv9&vA``~iB}c2StZ>>&>e*CSZ5_}z+!Y$6?~x<B
zuG5zEIgZM`3wGC77tou%ymMR*<552#>;?|yDzSjRSLvMwDu^wJd62ThU{2WI_3Kd%
z?)awl{G_S;o!H`d@dh*-*NApy(JrQCrz&y18fXVsF#Fx~fN{79@zv(FW(cj;fHs6Y
zaTvrifS)Dt(%UezSnN-Cu7S6Jk+|*?r@fk*p(0u2$7qI(`H|1S82f6tabnu)0g|5<
zDs6EU?883U3(I4=%zZmI`fCNFqaIHiNCe5l>EVZTUm>WdclW$3;h|URn`>ayiM{OD
zQ=G36o?#BQ&EA4}HYCrZJ96Mz%)d0wfAo&|7dXde9y*&jy1&H2L*W(7TR)L?G%AdS
zrmPj&qWSHV>FtlIG08Uk?su<pq13gOuA3NntKLK8GZuPfo(CUz57E$mPMs;KmLpI#
zLT4hxekSs%Y?J|ua%ho#SW(NX$cBGlmmW1wtF2ZA5nI>$YmE+XP_hccw`Ma|b;j98
zX83o?pjLIDos7)u7cHe?TuiXDtzE_E#`yfKQvTylC}Si&ivIg>kY}#cN}wk%E;8MV
z^=SA;?;sKB0chu7&;@qr7^xy(YN#Z;9~3`LbJz%u;*x^@t`8C<E@sHd;+=$5%Z@j2
z&Y6~`qo2#HxAwN(czx0wy~HOc)~IG|e|y||%b3PTD6%K0eOgXnV;z2IvsPt1(ZyA%
zVV0*j#AQE|e`1)?Qy8ZaTaO40+HVVBSd6CYCoe=J=NQ*zHNn_693BZkeLvkLW5mwI
zJ?xJ~=bePs!^ki{v2ssyu@W+J8Fz#D-SbVwX+M4g*+yS)Z~6+_QhcIA0xYW-;U5Wj
zQBIRr@LSFno;xMVe;JI6q*weUg$SHk=&QalFI&7MYA^`P?iS_A(C6m};eVRf*0y<F
zvlQ#>Ox*78K%Kr80;-2Qe0LHLIgE@g-eL=&+tQGJ1D-e=WZH+61p&8WB><V@+E-Ic
z8ahfEh=oWLbaE%@0NE6pci+i8xxbTZM0q((&LN38zFl|g{bnfFKi2Fm)2#pg*88mz
z2darT8Mon5C-Xc`ylTJGUA6hON6$&R%aoyxFWPt#5(k%nkx3B0*pgt9=|SYpSE;3Q
z4ko=U*?g|yNAzvNC@$Drt=0XYT<eFC67)~|6CPF)!D#Ce2fG+@J0et?h0BG;_f)Bb
zF1{gEZPYz9m3jkTygx6Kt&tSO;GY~`yuz0~vnU34AmFKWZ}GZIHf=3NZ1<bM+3=h|
z+`7Mg(n&DrV1gIbFu1`Y_x0;OJq&PNe;5w?E$@>$<}kaJIn;pqMq`-s17Ss`k<FK6
zsh|T#b-F6oL)5~3vXc45l{xRqcWyN}HaTUW8j<7rGHH^yZ$!w3b-f~q-AcQINn=`^
zbA)9s^%W73)7KTBJMV&(!Dpf14ivrIJ2N5vZ$!QP$b&aaHm=XJWL#^=ttfo7kl+#t
z9BhAjhPQ=s@igkee}NqfhSxW-R#qpAe5A-7^a-XCTIJ+6{<YAkr>fFWpel=F9h2Mr
zYn<-UIu&qo%xE?k^ud1_2aLHhTE5n3Jg#D+w<2^9@Qg`7NRI#h{p~sLM@mn}mBM7U
z@N}^^aT>!%zSYd*{<m}PC`nJAk#fFLgkGR5ta>bb#HZ*pt)uKq^k^Qq(^v-}`#t(N
z*~!#oUDaFrH;t}+d3Mh$aefTE{zEBLHIyByF)UR9Nw@EakP+bw`hDqJJO@9%Ik~%<
zMi7ZfQ2MdQ$;>2xx52|PKro6<94KAiXS=5UGEKALR+%ko3u(Yc;#V9;h+QR5sdW8~
ztm$<oxtCAXk>q661pGZW75$qsp~SQ;nnPz;0%`cYPLUVpcnCey8!UbXj729ADO7Ll
zd4pe|SVj#l^xb?;0UyG+FohH^64x@&I|2Qh?6#i6PXXuBa$_AzKJUc7Le*0q@eP~@
zx4za0Mmx?Iw-ysm+#>s)FgsC>r_UF^OQqrP9V?U`LrQJhq{uhb`U!{d$o<2#U>I#y
zGj79Rhk&%V_}7k%{%U=q#}woEsT)OQsx`KcS>2$q2a_E9r?>p@hGgFzXDH{G{&pf_
zKf;Y){CLOA#KhEi%zz}~+?NSt`-9^Ys;_e0Vj#{G=-_0;LEP*i1F!HTwtKO?fB|qu
z+e9KH5U{h;JM5qFm@N6R&wXpNNu1K;|C20vlr!_DnzA<i8I_v3vc81j%ZGAXsb!c^
zadn^i$@6E$lhjR{Wfz$E;sokXc3r;9x%BM%!WmuMqI%OA-<N68D~R}yS{fsC;(~i|
z(QZDD8`0Z^K0I6V^Q>@mkjYKUD=!C|^3_wD7_Vq5Z}!|(ahYuM6>W7QRC#-dd#l%Y
zuwEBukI8zoWXy*p-jrOPREG*3{w=CP&73pkl@5myOQN-lt0PCDcdnl$4c1XuOhPny
z_^f2~Bk^8|k@s(<+Bf!s$rz-c+`G>9L^0Bbdr^TON4%|5C@WbM$-$XAA$-t_px{hG
zBfaP<KU((5ox@lY?gI~AYAb}3WePjP`M&o+X@!Qc+9<XegQ(VJCH_oFWjc&`i)%(p
zBX7jKu?hd{Ljsh3r0JC~YVojsa)>4Og}R|gB)P5HPiJNY4(%fwI8%H1lb`1SSWCLM
zoV=tgU<olH6>qeqVWCDH%1j3zF?EO2<kO$|w4Y!qPova!^GgJOid%HB#f;Kv$X2UO
z+}u~LxJ7=ARCu=4dB)q6gUnffY8fG_gl4!JUEa8;skY2%5i|hR==SBmfV2XARPE3Y
z6(>=@`54fb<*?|$%LQ0f>sV<FJqn-LA>DMw>rAsC+q(|kvL9s5gO_%xThV&ObDMB{
z=9zY$vu6<N>3(=vjK49ET92@(P5#h5z4zhesy(CZE$cpkY3h{zG6QJ|ub{^MxqFLB
z%IDe(6jy_=T>T<Aui)A)mc$usPV5nbI!0u8AJ2u+2zq{7jXmY9C%kTUiM5Cgjz$vc
zC!hDH#sWyqhT)}}i@XJjuV#!%cdv<lDZtiw_9DY6>&$1$!NAkZJt4)$6(ZBdBUR$}
zK1djcKfaN>KQHCReC<_ZqS7j=m_>z?$#og%l$>r`h>cfUDpUmTR@POktitZ*tT<YX
z?Q;nfCA~ZY#s(HJJd8z%eKGq3%W~F@?U&!s;RheUnSuEayE%F)wH-N(A!w}jIK#xK
zC31W=s61LF=%c!bPUAr+n_1q(g7FOa@6BUOhKQjNYkiLs!A{36E0q(<e71lt1alRJ
zT9fp-xyQM3{&}2`Erc*wmx_D*bJ=_DA52wS#G4aA?Zl;#8XFwH6ib322<Onq0bMeN
zt~(;;m$mDA%Y~nxDfR;6?p|he<M_H6H$sJis{QFopD-a0FQn}R=ZFc%g<cF#2PH{$
zA=+98pplOJ6$jpG<nLsOI2IkHXVpPpA?harO2}7XEx)P+4Tu=4%vzfOni+aBYGmy9
ze_63GQ(BjBKE+p+yK(ypD=~O^t606+{_Y|%8GzutB&@Z{w>~Vy4tJFc)OZFub{_@f
z4Nkg}SfgjQ!v}f7cC|}rhV!<}v{o(+1ZMpf)LRv3!%x)n9?mzFZ{T{(Ovw96-Q2=@
zxbJ8lRt49V7plFo?T)_)djvYSqv73}@2B5vO!G$~K5e|YHKTRAm=7>JzEqvD?l_TI
z>S~iNn2S|RR;-2+LD<V>;JVgDCQNiQ5lhgl$ocCFRaqmJwPNZYMc)anCZfzDy0wqY
zw2e`!>1d<@Ps&2$6Te}<9~~y{fyPQ>!ZKXc2wq(J-(O#WtN+K3uZn+#{z+Tk{+adW
zZ1EM+@HqkkLW3b<->H7>02%0I>(++j5Jh%|@4`5_@I=+ny`DjnFaA4@O~fypqEGDh
z^g5{d;!s?VGE>-2yuCFNk^)5Ui$C^EZDmDq1neVC=1uNe%oKk+DStCGHogJ`a^x1G
z4p{z}kHhyGDD-7RLx^1oyP01}!(RB_0To~KIJZUY2mX^#S_AfJQ_PdSV5L^67|ynW
zWM=vK;j!ba7ym&ssgw(Hs(dNh9A=Bx4g!^D#7)-3gY#6&S$vmk1dOm|f%1pDK_Y+F
zuD`_DGci`VWa@a$XLPsi6B42)yGgiRn(YDPLqGInBdQdKU<3p=e`x#d`9ehnn|psI
zH@nZ=7~Wyy>XAur-4Wbx4sWPQGpa!JTJQ3eKzq;#)dw<?n$+<aebn)5bHx$0lzpx#
zYk?8h%iQR-`1QAv#QkNyUHHMmah~WGkCEflJH2-~Ux0df6-YP}4*o+ZoZHCgdxD9k
z;Ez7H${7!=lS!vJ4l2t1v|o8WIk@;Uq?bQl1ND8vT4VCZ#V{=3_C`2##G?<X-eD;5
zB{^&4e_b2Va=tNaFdkjm+s?X7L47gDTjk=3PAkmgOa}q5bGBOwt+guJwNFR9L5F%Y
zCl)J%>BF0&3DoK<KuAtuUaN*t;)YoOWNK=9*1($j8CY~FyC*qG0l~L1SotvsW4QPr
zh=O2&w^kVx-1|B9*a1IZ3*rBSf)YTw7xxI*R(Fu{cO<UZ2`ZwTZ_}4QON>%=K{B9<
zbK;oUMnns#s6>Nrr&?*<pKLMHz53kx>olu+Z(8ZSd%+xnFC3D*2M2NMtYtk`;Kv=*
z@spzR3enC3OU$1v`ApyIe5-_b(9`=Sq9UCtXKcXrB&sy)DYsG_<_#?6+RoY96<%B4
zbld<nXZWS16cIxL$w(h_!65;X#10>K)fR#<+pPK#)Kd}%xZ7K#QnQ~p&fB=z(g6#R
z!TB4r0w{ZxL6CfMxu735UF$>UBh?U7oewgfzZTbCER}1QVqwgK&PNUjbqQGCC9%Tb
zt+D*y>=FxU<ozLm4k1+L_{ty!)|oV#C9k(XlMZBu2Dt3bez!T{vfYuYB49|uJDV3l
zA2)-@?J#n9`~I-tBSSQ0ff>Y1+v^}2@kLY7vN3x8tkyt1A_Mj)^`pIa5*c0#6A}xm
z=)H#1aSO(I$ewbN!J;_6{TzApv^ZvsH>?%p*#3cHKtkr7kJu*Z7@~vR$ED?|U9{Mn
zg~5(oI85Ro8^5PF7Q-?r0BG5Jpihb~ZBp+CuNbMdkFh=ubSdwi`PZSSdCA@=Z$vj6
z8$Q#>hPsd>jeYP}&&}%i9zfcFrT+IU0LB9yWV@~Ig~Q}rr-LtvE<ymy*&*6D99Fwl
z?lz(Rpzp>2uOLwK7a;q_xduY}E<`L!3igCwso^N~fV=nFUPL}TE=eI7l9L^;VoVyD
z%73PCq92scsD`rLc#oZtY`;Qnzrq9*f4h==+oacy`t#TOWyt$4iKw{$X4hU~-ND!X
z=NdxS#UVtIj!JDM)}(N+Tes&+EK0tyo+Ga}gns&mo;yI%KaHW&2tdUZ3J$sVhWt7`
z9^uz*vc~eAP1NUhB))Fxx)s3)`EmP&i+MYx&pr|)R%C=7a{N%KuhStC)n+`o<NAlA
zHoq)-5@cfkf!5N2Zu;F=4^0(6+Dtbt81GRm<yi8G9s5&$3%kF1sqnocHu)R99uJNI
zb$Z$_p}Yr2qe6lDC}y2JwH|<4&k+^lOv~)Nx_6)|q_Lk!29-3?k{pl3(Xoj`NpZPX
zuLZOb1J3pLelbNxsEpMYe5(h;62E}tIitT>EL3FdHk<W37HnOwiDighA+oaXAk;);
zw~Ry_FJGUvYU&<MiWvcGS~ZF)Y;S*<$$JY^*@5PsNoGome(M)E*aWsvSPa~V_mKU*
z{E_l1naJy98J_|co4^*d>4&cMGLx6*4!G=h)t^k?!PDu2`$;7JkYt=1q7#wJ>oRm*
zZ>l?r_7qz(iyQHc!~;upS|!a6+LM{vr>CN2pB2cZLrhGkX!|tA3@1PQPe+w<OM<`A
zdI8LQ9A2SBkIzDj52m5pu|<@L*G_?dz7)u3_vvBQ-2%7IAU<aM^GA6t(hRs1>t7ka
zdq`a>ByH^IOR7br=8JC_mAxK%^>k1DV1tTFEQKf=17|w3lJas7Yn9)bEPgs9d}&FL
z`5W}N05}X_DtjNx+PnSa0^ExriRV|kgc0T=&+p2-;cjfn51mds!ycMPvHkPV6Y~xK
zp>qGr>Hg1`i^x}|hG2N3hd_5pbDOdPhkn;$kVn`v9DAe)6*EQ82q)UT0MO1z`n;)e
z1r5ktzb-&F@T{LC)lK9X%U5t~F+gVes6X33J@Et&%sTBIVQxyhKB|l(`5X*W;~hGh
zXaqOpGy=Jle*gR&=a<387@)%Ue^{OWa6A9~95jQ|a4f-Cf&y6XggI(5x9zjY^-g2`
zvHC(jMr;Upyc$SRNG2Lxs^|B@wZOzoYm2;!>A8dRjiLyLxX(h7BcH|l>+Bd6YbfmB
z5=aWumAtYMM%-`SxJ~|KkuE2aq~l|32Tb3%I}jaavEJ|7nI8C&OXb*ngz6@1ga02$
z{pqRw*z+@$RXLQjTiU`)zLunU+v$DFR5twQ!9V+V-i+0$spHkCQADT=mb1Ks%04L8
z+ImKn;MUImCsV(=gIYP?web*$!38l8qxwD?tf_)6^Jq}UgFaBB+9nY&vcdK-t4$`&
z=H|CAE|BGk$fYe1g5UeNjJJC@YbVTMWg>>!Ygto`0Go5MF}V>OZe`L18zom+_1lO4
zIP=AXZg^j<E4G-jyFRLo?AIKEKVJWhE0^?JC3S9=?@Eg&<C@8)#xaqRyk)sLFaM$d
zR(kQzg?Rf*q7U3&FV9?h+yc3&4)~BVM{9wzSY_Q(x36U#+=rk%^6qe~w3IdII$X<z
zrRJthj#Vj^9Qw)SyIncZC8DIv)oW6#mc1tWQOkIHjowm52ymU=P<&5%ZAwqlXnXFe
z4Rc8>*Q{b?EJQnnH20~hwKa{S%AuA4Dsinq@7xr~BIhFlOdDhleLNo90Ud4-rfL-7
z417)#e^3cv^yvw~v}dxBDiu3A35$8`L@JY?dc0xgkI0bgr+KpD=3o$bGP&5~8t9lu
zXHjpku6Pq8vF$uG^&@G9PG()T{4e{wMdR=7Qr}p1(FU*;c3A-+cRo2vm4h}{3kMF0
zk;5eFQLIDbY(b;)^$s3&6!w<M(jn{3X*sN!)gZ#wwl%(rqWUC`$s5tuObnrlkNLIL
zOKC-zJ4T{FJuMqKaggzA;v1+0g;&hUR!g*oi&qa`9ZhCCeS8Ds{lT#t{Ww~L!M6xA
zlBjR8?}=;V!QYnyipk1Uo|ER?G?w2NyT;_`+YGjs=$OfgEg6@#FIjpey3~FgKgOB&
zh@Oba(lNNCuXGc`dUwVaU}W0Kd3;<X=cWCci2o;z&w>6;iG1D7N}Zn8b&`;P(<rL4
z?Q_2H;#6{eG`hgtL+!txgMS$i<s|$q`loMlPXxw%@KO3i!y?kN7G@zDA!cU|O4ssK
zfp$2>O+ID)0FFjNY54Z`ebhYGk0_H4E@hX%d=s74nlt6%R}EH7XK{?&J?h&9M59;4
z!7(%Y<|9v_5{enXeNu{x%pGuYqQrVG{!5ejgz!4pf!dVDK<x#egj|t*oY)yUQJ-Pf
zzqyDP^00~wuTyA9#T9Z=diWweB(xGntG~60K%T7>Z8&!G$+b+iqL@}@6!)7yRuPMr
zG_0sZnn!m!fUoW3i8Plf>Bh+xBMNMs(R&!4Y$bGP%Cm(u2%<XJA0C*^_@s74wMoHW
zeW4pB^=EBS{X)M0BVd~judA78=lis;Yu3=YW2cH3?S;8TwC7Xifuarzlc;O4qA}ZJ
z=Tu=63UVgqC_J^spzmXRt<pH}@Sa;I=h=&EB<%+$i%}ruZLmo$*uyn%biVP{1hh)x
z6+$nzxJ#ApxXxyZ=zY!FyiK+m-?6W{fQ!9#(qq{R{bl%_g7B!2V*hH+ZmnR?N@Kvu
zzKev@?rDOW*-}}*r}D^#;=Zny)vCm1`q|sY?!h8A)Adky-o}o_eJ*Gk7gnZ!G~N|`
zu$)%4bX`>Hj&|eI<&ddu_aZc`1Mj^18MEy9#Z+YG0~L4L-dDDcTFVv;n+Vz)PbNJH
z@BNu%D>9>rwqY782)s-zI>plomHh(}$6g?H*bS26(dm?QCK+x*NLGJ4#yvHivzDyX
z&ofHfEU4w3P=!7GP&IjE-aE99i=b2=xaB;=XOPJLKPdj4qC!EQbX{&&EUq(G-y=S%
zFSD$*j9`U11IN;9)(|PdQyysT$RqP-tT%2{x|VthIuRu3VPtEZrt&YBct?}H+|hLD
z$oC78@WrOCyQZ+Q-Ac_Qp>N~Rj)HEe=4%uvTQmq!`qtt_zsS?u)x$`MKH*6_ntp|L
zm6uoX=({w=Nc};O*YJ8$ty>6-o<Hp5Q`07>U9imp7x_QvOo805V!W39@`aAV#J3gS
z9M@gkrBDDdqJIgQ6@a1#!{R@=aNJ#JYFqb}jV%)A=O25)HK_3jZxp=dm}IZo=H?g9
z?zw6El6m(1S#I{P8+4sGQP_MCYqP8rk?Q_>H|zHyu|lHDSba|2iH>!?&lGscb$}fC
zbD^k~z_{fJ1~=BkM1gDby5fkkdB1ZZr#uv#TVyHu+i2SzNR=xZQ_t4}kKV(_48oap
z!adjz#;~RwH^xW-O=E1gAs@d+8k8zB8b&^{cvFa?rUNWJ`wr;nK3d`eB002u3`lVa
zsC5%JQG?r6zm5(I!RyTonITYmYjxx-nQ9Ze1E1g|^O3M4bboLF_FM!;|B@^S*8<f8
zjXHdn{XcyEf5438dQFOg1v5|-z8P+xhsL@pC$k;Y+cA?}9zMCQ@B*SRiBOXTNH30U
z)us!uM}BQQZU}4^Vv?rqcC>W~or1bCJ8}wDNWnW)=QBQw$vLy_X|<Rm+np;3A^%43
zI-Oq|K$kstS-+IAptc-orfwKA>ABp@w-R1&d7@TYY$?(b0F$uR#LG%55>zpP{5)n<
zVYg8+&PeS7;jeHTyAefq@%@l1^Bbup;nlfq5<2!p3>MT$2^aym^nFDTvZ^<7MGb!8
z0svrKv!pb<$uqq?p_C)5(QnK)+QvwL7-szAghz!F!)4ByU<mCG*}pNpN>s>J%)0N-
zf(<2)f34W9d`4BMMW}ZiA8%U~zg^(+IuEHN1+0mYbu2wOO_|kus5W9`WCI5#(rpft
z)##$ycc03SdoM)Y1awy5gs{vlSUH5_Mqs>k>+F4!#2egcjcZ05Z{68z6dtziO_y^-
zL0K>B#tuAR${pn6Y(DRj+SWcxE^FtVheq`}+L+ikR*7WzzHTFQ(T%uuuuH+XnGdWw
zpHy@Iu|fR8{S2gc5!m*`L5UhgUjH1C=7M{tYBjoMi;u_f3hD=a%n^{qPn3eYJ(B&$
z;Q#F)y!>*JDUr@bH*fVB0ucD3MWjy^2G@FwfhQMuCV%If$<1x?@v(n<nIiw$&qyeU
zRkeS6ycxsRWF^c?F6d2HId}Yk^Zfz89pC-%pBbjp+a=w*Ig#|6Mbz}`P0P|&Ky{nr
zjEMFHih3?*=;<wZw4tG&73HIJ=*Y+*{}caZcT!89@^hlz1Q4Zx1zoN(;t2PB?-T1(
zkeok361?juq37<?AN#8EYO`u~xtbc+cYNlj8ph=<grO!jVBqYRjn6-#9!=9eOz)S1
zTT@S%`AdBfvI*Gm7OM4%feaICMna&pyXB*6ah6_|JkJ5S7Ro>Mt_F#K%RYr9=XrbA
z_4@0&@wQ8@EN-7D$ZT|ja|VAKGy}7O@R(GXx3oHvJbS{~XGcP)C+AsSWlk8ytEkh;
zp7g+*r$#}5KC4@=7%=s+BOAiyBHCrmJ}ZvMb`0ZEr61{<&B#|e@puotj~`W&Vs1g_
z)|oslZX9v*g5Zp6vdmP^5@j+O1R#8hk;)P*9n+QhBzH!JoAin(j0z2K`4YDpQ_px-
zPU)^e+15BR`6>MOd?nK=eCBBXKMI$6Jzk<Vs*azn()A`~Zx(AO9Z~as$n<Cow-&_S
zl@K3fAD*Q(+POFDYo=-I$`;=Ppf8CnV5H4#zaL7EYWKN4B^dvj>p$R9%WVrDX&1Jo
zB6}=-s&{dTYP)GcSk&=K2R<md2~^DkeweHTtt1iv8#G>*@nrY|U8lwyj`&$tPl6ou
zaAa*!?2p)=og;t?-wxTXqkl!;>fkr9RGp-RCzYLD&Z3SeOOf6RI+Zt?7|d!a4$a3|
z-aFruvf35E?(c$lqg6_%ZrVvNM_%*YYG)o#QeD|6nL)UW;%#9|sqf|i9rNQ~JLxpk
zCJ4q2L5RDWassVMV~f_#N#FJ&HP1n&*4}#7$DZ@-SUWqiyMsb|>-%1xjS4-4w(o}`
zvRp#)jRxA8d@g>+BgSwD3G4Q^ho1TJ5DMtFf1dZ4Y)n{u*J$;!=`VB2G`mkB(Hotd
zewTW|YmexGx*DPN`IXtg|EHFN-bEn0*UC;Ti^iJ8+1uDX&nlq>^Ct@5FCFX`ABSjG
zweX)3jsMna7>B*0VD4LNY&y+7c1>_cj*L0>TIhIeqnR8Sx{-HrkLc+9qJI{ZkVNsb
zx;8~nj#;Xo_hltAAZj#yzrAAMYa%;XXP4Nqi3k~7wX1y!z_dIR&~1#Im3h|O&&kVm
zM<eHSZf&gDd&JQ2Em#E2gu<p9FUFF!DU9qZbLH)-T$Xc;MEw=B^|pyGusVHYGUg-%
z&#o$5hgsho8=KeQ22HRoJqbCJ;;++aXd}d1M88}U>5=rI_G!WH$MU@@rk?3g-Th{D
zX5rh6lSQY(d9@8-j|tAQK0Qt*<EYHt&Skm1J>pR(ce4;TR3I>Bb975v#R{-a9_}0@
zi1O{$Zuy=Rwv}pEhL=Qq7-c4@f+o!vb#j=VxAp~yCV6YWr<k7CPVADfF@c}<%Yh`)
zv5<j@fXadaL#*9EF4BK!q><=_OXWn)rwIY-ytm`gS<vI`ETihak$h5jgTEJ%%(@<2
zeqsdn`%VIqa*d1eUZ8Ms9cKLh$c-Q!$7#9C;<`^h2Gd~}ovZ7(Lhid;FX*WeU3omP
z&YqF^M7Fz)l==^{9Mjxf5DW5%3fq6)#tRbN-;{W+DBWDLtNwrnOZds@MXSI)2Oi63
z{a#T{y6`ffX_9L@u+v0x6vqQ*H{z*HOXA{m3_l4!a-lsty6yNFT>@<rO=p|OPHXcL
zqG7-CX<$#(?hnMerHvrjf?(&0#^_geV&kpI`p+p6y#vWcNicW^>BQ1(k+x}&djHEd
zhszh?VYaIQL@gls=oYnw)H`G(#94S@SQzQxI?yHIL&_S&8uhf-zdArIG_Sg(%@nhB
zk?I}hZS$N$x@k^jPT%yL+8N*ZRBFz{Vp!ZvSsVy4lOwGxyHseAA$Ie}9d_NIfcDjh
z4s!LRR95Q)pBgcDHm&>)B*<0U*M%6LV<h>;kagnQ6D26W`GC(FUnCC<d4ZV&F7~%>
zXBq$WwN%q7H+)6Mg8j<~DD*c#-Ky^(jf^q)89Lh(FwM^Ii}p}4C+Q}z&X;6HxJEbJ
ze?saP3Sdzb<-1{txX+PD6cty@NftaQRO>4#w{jTr7PJ4{WrGl6+YR80&7NB`ucaxI
zou!j(KDO@MA1AtRmQ@}C;CA*%`@yeX)=gtxR&BtE;CLj>CJ|5(*}V*5+{M47DUm<1
zz5lS&ljV&xgL?YOR5IP>g|*IyGE`u=(3OROO_c~iG6kutE3%h@HBgAyd_N_3<`Aep
zzgR#pr-t5h9zwTV&Jfwt$C|dLPgdE{qOxLXZ7zP@Wisy&ljOP=<H_e(@@EuoUX87V
zfU$1*<KjZ7mtPS0vLwod?=pDKU?Zbi%Irsa#niwLV^Da-%)Zr1tLzQ0u5eQ}TDCGP
zhZjrQ8Sn!`%*YFW2x70~8z+Sv0WsuCtigD~!ob(shJmokjUbTMcLtc}z4Ry_Gpdum
z3^Q1fIJ1CCw5gl$>#DxYkNFXYhufj#eV)#C#F(wJ3i&}0an(tOUp-j7`zY?kWw!Gm
z#*oSBAjhY=9fR;w5Ic%W>RDM)+h8npUFqhMt^P1Pa*$r1IX;^EHm}51(8^NeM@`|)
zb3;9oLoQ_TD{H&EwUMFq^l)Uu9a8t(0_Bsh5U&l{_w;}C-Vq`E@cAm#9cVQL(!6zy
z0&V{ic{@oZZYs1RDg4XwgNN(DG}{m@qu&=6xG}%Zeu8D2i_8cLq`|XNnDlC^V4`ik
z^&-6Ia!8sqXdiz42>pbABT+Qke0Ox%9;&>7YrG1T^YKNqOI`byII!C>0j}#yrv#sJ
zlyC)GQX<Pd4T^J{2&`;q8ribiha482gSbPu@J2Msvs%WqoZma&9PYc_j{kvVz~5I6
zA(!>{AH_f2)B%IQ4huKcNypq~Um)!qFL6Wo`VZ4cVD28rI<CL&Mr0wCSbYco`~2Pg
zD_-R_V;E_rk<{YHLgbORwm%kb1^c{>)gz(~wQI|L)f~VSHQE6an%V+|joB&_kIS8$
zD*B>W=7E5ed11K-hk7k>mYKm_o#qd_#K}A>Uf*SwTUgwdEj#@>_|m~H8?@$u7kY4&
zG0bB=S!@l?7Z6*jz^8F$ZtYA3!KsV9m=q2}FR6&j5Qln<>6;L@kbRn9g=D0h&xYnD
zKwmVo2KQ~=NU>YJqdvz0JHz~QhHK$9c%KPnOkYUZdAjaCm)u(?Uq-HFwnX}WRODE6
z=Xr{2jNPK!I@!%P=L$vOVUEzOF+5LF8hRZQu74|Sl_w>*S?}QG&R^_$X$-UrClkmu
zbgj_j;)i@0A0)Q_emgS7^ZA1HT#cefZ~eGl(JJ5c<Mh5&UTg6cuP&8vr-XtLJB8?q
z4E49c%HOK~J;@G$&^ni-QbU0kyY}ld&&~9CcRsL;_$KfDd#0-KMXV3gSMUNM^iL*n
zz?HSILzq_R+LX9e#$4<mh_L*c=b<vCZF-)qq{i0k)cL!khv17x3$S+;Q=eM;rJ(n?
zHD;`LhrVeF)xnIEyFYK*N}OOQbGXTJymQz=n`l!11*|1bIy;=K(xKvS$maLREY}VK
znWl)&5W!ggFjqeMw3pvore6mGug)$xxtR=I^_#aD;;A}B)!qSOj4y8GwGH~QdM_uN
z%T0F^cHN~BGVr{*ref_;I659L3wll)1RE(2Fd|%|<Mw{PhMFr7wL35TyGG=!9OCU<
z2b!QB&c)l&4WY1dMaCo-Jv|fBloVTYTl1C0MAm2d^1pt2T0kdX>+2BqBWO+q_){sC
zxX!qrnskIy&%<wrnVk_%b(?Y3Hu)~mDGke?d_~c{@D~8|9WNLz-kPY2wU(w0#FrbK
zO3_8a<}D|jX3y?_t?_|D(<0(e^&W@Di`zJmvcIDT)K4!@0roY@x@`TDu}%9*HM3uu
z{o-C*sS}Uwe@}h!l^ql@D_PR&yOJj0IsjUlBbe6;%<TY02xl@Ms-bMPD5l-1ac<8a
zEYmJ~O&&J!d6S9zhO4&ieGwQu9~;=#ZWY*DY<{Z>C_7x6qIk=5FYdHWG`{v_sJ?{V
zR25#I1OUzb6CEFXw_p3OSi&s4w7{|c&y0G92RBH49Dmu%nE|q!F+%337Qq}LzGSZV
zRpU^gx^-NCoG@P{A(}|2ab^3y5#NwA3E{zTQwZ`a@W;*9i7vszKGph*%W@6esyYf7
zL`)&ocG=OPH-7KqUe(uyNBzeO%?bV6n;u<ShOD66^nIaUtXR?~Wr8Fe-B};?WHwr6
za?h87L45qwPl3wU?4V#eR_^6Uzb~~;TW9x<YWiPB4DG1hKhr~euG3y2;gSpFJ3Jv?
z%z4h-ipqF}=B`rR%U3$5(qW&Vj$4D6<o_n}UxKmDkFUg~&(eD<&f%r9>Iae`nU#Xl
z{quS-Sv*m*lMhaFJ34deF4JQD-{zNdP`z;mp`0nF$Cn6AO|*F%uB)4#3`3se2IkfB
zr6o4nJ=!Z7W8M7V!pJ}XqAv^PqXCv#Q@#neMGuH^ynnLM`(}wfV=R5(p{C<H<aPGH
zLUwMX>{$@~>V2K)^K*NR$}`vCf&OG!qYcgtDJNMZ0DGE1{P=TWrO%~lb7cks=lb4F
zbeG$M{N>RiGprLu#r&|nq6SK-wZnqoHtE)K+&R!_Yv0ZCWFHTF{#5u{*i8A%1{Qp3
zTXmd%sIOB%;ZQiM8d^pQ1K7t<bYNE1xkqH1oKwzIQ)N1im@ULmobakFnL;_S9Nyd{
z`wA{-jz^`l*o%y3wyveFN{Jz5&#Z!cjW+cNH`i<$>Q^I<Gm(<`mPW!9TBdHj<=eeD
z$upL1x~T{k`z97TkWUMvpJeL!Yrv;lY<!m;rAHm&cf+1oRz`B-OYk27{uGY3SY{7L
zt(QaDcNR!cVcEJ$9l|lYwXaZ@)3{7)!7Sfaw+z;KqYl3D8iN;_cS}Gtrmb0VaW@c?
zMM#xT|MXAK#9`AL`ple+KmDmD01jB0YGEe#on`&JuW4ffL=X!LQyb1!=5<!QOG@?C
zR6KL$1W|I7*T$}%;7CrB>|P{=k35wCAW!&g#W_{Pa3H7n*7^Buyop%w@#p5WM<f$W
zsk?!0hqEZ~1@kDT#8Q(z?d)^cW%`s}^U3XJ-vb85n_QCCaqA6DQ}<-X>dtVk*EYLl
zS;Be)7kYb_NrO6J;%^KX@-bvS^*1ftUXDG|4s@S)OP-~Xolt@(6Szr)c{0N|gnne%
zL%s=626#SYeGoGldgZ^mTWN`?UyS^ZYc@##+Pip%w9}zaF|83{>n+#6wT+-e8PjEg
zM0-rvOJX5wZN7}R^dh=4Q&gl;Srp?yr^FmJvgx5kNrj;1GFzmx;Q18wWr%Ni^f}as
z_@*9Ge!TfD<vZzbM8ruvAK^|=sUM>Fc$S8k)g@+O#0!@@+qprJS$}5pGTp5er5js*
zU?eX2YIx95#%C!`K`(yjB`~4{yfmhlN36SR8nHiES3thz8l;WK;rVtHx*SkF5k6)O
zxKvd#&1+(fYFyX`t(oY{G55BatYskuk(s|Crb#~!5kDP+J~w51!Hg&y%8ILr?EK<i
zhj^n7sLb`VsyQ4|6BnLa3Jg;{Mm#C3oSf+y$aV^yOySCj%|Vm6TN*`(gIl#-SpuAx
zwur&Y*DNlkc^qs?RhQbNGj2!fr?g~_p6c3&AD_jHgg8@Q*n0b!Y5y-sFL`nG<vcfj
zp%GJzjuQ48Db$Wr+k4~azy8;VtJ#aJ9m^D65ZRTvn36h>R7#pq9Q&!XXuPl(21Np;
zk(k?d9B6+QCeKI$dfLIO9{p2O+XUm|`{5?f0+2}9z^9~r`H{<OxzY?X<JV%A2z~z2
za^tE!;WM+C3L<O;xfX%z#^&Pv)`1OU+vYpUre5wnXR_wOach{-!LMxv$X}%om-%Gl
z@A&s@>wV_N^(Ac&midffX40t|TtYfx%mNJ-`E03X;L3L)8ZM;Uzg>);pHMi7$zTL|
zc7LPJi;0QVMXG0Vd#m_CoG=NZNI7ev=U&pUo?`1raUtJ1=t%@<LI%LXJd}0KoSz2%
zA9-Kl73J3bFEEryOQ$r_Aq?H1A|)s&-Jo>G&@J8FB}#)dLw61+-8FRg5Wn%h@4eT1
z-+R~h5BROcT0ArGJm>6vcAdTV=RCbC5x!QW=6ubz``1x>n?lF;xa#xD3)TjCxjY-M
zYy`>8Js;Cz>JDpQWi0P_^U=w|>R6{a=Q4{bj#-2o0i18?{>))Uxgv5Ted}`n=DreY
ziVpT4W|d#DWB|R`_2Gziw`hM~SGX$gSv}Dl!qIz4Grhr*S5K~co}1t2n3U=1G8?>T
ziteQid$GzE3oWW#Npu837BMc$w42;<-|4P7KDF$|ZU$faOi~yRy)@Z%*-^~b0oMmn
zFkUF;j}+OR*CazP<A31mj=^#<F-9*4qK}HSOt*CkUf1#~Jsk;nb<*28b$rge;QG`s
zYF(jos^_HSf;7yl=_q&|Tt!<{!y#0nQbg-HGs3rId1dx`21^Ohi!&Ir>7@oUd*S3?
z?@k<crQoFrbo9d%3gY|hcBC4VdVYxECRoJ>l=ti27}l_3RF$W=`cIae0|wu|+no>U
zL(j@3Xa%Q8En0P&pP@-vhB#7u?|&~n?cID4RpWGR>Hbc0mus1TG*}62oORL-TD#*k
z^m_I{{z*x|>VpCyDmb{~o!m2%25J0BR~)udfk4Qn$cFkYzSCak;$AW*CjO~#dARoF
zU{rav=Z+qOpvDEo>@=iYlG|pvmZIo9o=5IxV@Ou_%9YFkBqw*fBzVZ1G_CY8t#Y9C
z?RiNmD&Hr)tHq}m{8&k@tq=-wOEqOIq(f~xd-@xSORK(j#P`R1>n@<4o{R9S1<ng{
zr9}Jrlc?!jU`y0wPaDKnSZqlEEZ0PIw`O2gM~-EREjgFpvn>*T=H9mZJn5y7laUp0
z+1Vw{NkS7}P&+EBh2twE@@{ja4R~Lkom7VkECG9>M?9+AwUTSEmD3~eds!6^qynUe
zE-Dzx34!Ugx}xw8Z^2_0=sDP}L`}?GF_0}g)YI4slL3RgWqFb^>ivX=!2bS0&wT|{
ztX!Ms2ls5pfjdoGXiGedxPLSW_zdE<CW3J<5ijNAGy*x?B|y`yG{h5l%V`lFPrx{V
zWq!Wmoz3A=SBXh4oB0dGY45K_-&+_8P%hv2ETOf$S}hzqz;z}W=(HsptNqIBjxd>+
zxQtvCm}iUq{2n{cI{XwWEe}Lr&)wC;aYh~!#%IHvxxwpC;v3sF;iuF`he4t?;2r$l
zjEm`?57go&G4<J^OnM@b{k{0wcU}D9kc@;L!>7eld`X6$Vx%8YS8og+t47pL02NxW
zS`wy88IMA@D#s0h?M-JV<Xf>Hc*YFtrVg?aB^$F2ZhR&R@T@}v21@}|N;c`rF(s31
zI9F++xjKs;iUC!cx@)*+nZ6BIw54L#15_FwI5mj+UR`Imn_`{=dyhM9n*ze=`4eoD
zn-Z<+Z8RJ@A`c%{L!VSb75RlBxO*?w>!Th=x>$<eGKaIR<=X0TovNK)G@Ch6W3tWg
zWOqbw*cV779aKbCTcaM4Cx=?V^mu;YJR(5=7OT8M!}h1?Yzd`WCpwlB95{ctQn?^%
z?IAeA?n7u9THK|F^<!!bPuBel)s|2(1>c7r-Pe(qMI8!?Qk)l3hx;K*-TA-Q40X=w
z$32W)(>Y^(x8)23s%R*<tlrI?7~iWl)Fpx=v@#TF;5AKmyjJH>uN)|m<o!Cm_W|3@
zo_`s|9de6@*{0IdNKL_Q_eU*N8!JNFto&g;YBS42BPUgd?#Myyy@zI#pREPKQhPp%
ztdb0%p<|Auj4Fa6uN&%wFiImQJp?dWZdjz;q>wI%fVbV^-US^Z>X|8hvNC`MG-<WR
z1Ir~y7f3e_%Up`lb~w&E0Bo>-HH;*P@g~?Sv&&VN24d)LylG69EnGXn9H1WR8=TEh
z$OlOmxu@hz&a}Eusu$5fuAPXD(E2T1o*e*^5%O4i;N|#I{H8bW{2`kw=a_@Hr!cN+
z<NWDS1iaE4EQXA7BCD&M9O<}J?rZKs!AxKSpdLau>zQ2up8JrNQDK_XlyEw?r`%a~
z-jc0*_;Bp!YRLd8a2|`9r}-CE8q6Nb=TGKcfAjM4zq<vrJZ<oHB&$IUi1fbN_bkNo
zy1%mG3BO{B5q@_d13bQSYHt?A5u~tmr<&uCtD`#7=D?8hppPeR1S))in_*dzaDWGy
zkEm!6c;Q5*S*l#Du2yDt3d&+bch9+e>nj^U&xpCo4dOumTvxq-ekGGUt*sv(Vy}|i
zZvTXUvLYpFe+qGco%sGxOqS?X)v7N~wI`wL>q|<%CiP1dr!mnp>vxnP4W&J|`vA+x
zlU!vJ0!p><Ly*v*>RrURMXj}*jNl(?KShte#Q)>u5^Tc>mbW2vWvA!GYw75gX0=bq
zMWOiQ#$ViFU8c6(Lv%_C61e~TDE51-ZC&1L^(NsZ0i-jflY%yjw6~X(u6Um6!poSG
zuH$%zcbsN3DR2Hrwuo2JdUH5qTlTOf;tx&8I_x8X`oxfRg-yn7h_AZJpX~j(<SNI}
z)f%xs$*8t}DyR#+<LNJ4KC#IwvKvqHu5B6M8u9=<I_KnG3_dI``e7w_fO{4eBszya
zOzp4kvf7;*Ph$lH=Xp}~cu`}a&a8JB0ZPA=pN`DxXP*(-!f@$D7Wd!x+VHNCsnrmG
zM8jV}8N*W<Dc*keA{T_w^^=Bk9DTD&1}c8xZ6vh4mld*>c296w$;n_CHzAI{M|G_A
zkA-s1IbV`(BU>(EXMa?ITGZ@ezUt_~{w81rsy5_4;}+|`W%G(#N>a-lUnZZn`tE@=
za|pb4uO)MSd)u8+7MI%mP{zT|mz&3l;~h_0$f}K5B$v15_f4KQIu+!tSWkAN7&(TX
zx8Jd3(OFN9lFv<~yg0fr!IT&G2Mac7UZ>P1)84nCF5vlyon_o3W^IO9aNJWBhTr2~
zpC)Q=4y#$lW%?w<D%~szv7u1e^>SR0?Zd@pt9Iw_@KYd!ijvs{ENTwEXr^k1gKvpi
zn2+q=#IPV<HvLk3qviqV^`?3n7zTx^0>P&qfgE0$tu{cu_m;u4R45l8bGn*CzS-k?
ziLS8APzaH;Tg})J=joT<(1L|^&l(~<RFu0PuBAuRw3_0xbLLTnHf$90NvTZWi75yA
z7upmL!Qs9`X1ZgzlvPyG>qe`B+HcM7@i}jFP-Jz2LdVrQZ;8Sxz7=B*8O<&S3q4%V
zP)E(Jnp|EfLE3Z0Ia6di2GTbjZKdVT^~CJiH>A<dhVt#Aqa=|}X!NVXE0K@mk}KTb
zg-r7?9XB$aILw6(u`ZTTdyK&Yo1@;dspA2(t$r>PS~#TG3H%~tq<e^%s>fiCB3(N9
z<sS$XYD6p_1(Vu#G>{2$LfL$Jbk@C0zQ!2o;xnoa=kM+*WEf3Z6O@Z2Wo_8?#~Ty1
z+9hIt;b4|I+7iYR2-LPmky``Pw^<}+!_w6lmRjh$G!SNaFWj*d;v+Iv-d|ruBw1`E
z)DFQam7D9`@^w>`o&x~Ywrxa1CWW}4rBVN<bkpHD=-K%^6SCy+E?ZXx^WMVfT{CSU
z)>n-S&U|p|l_NVhnEzqZDuFlb7`57(_t6%KHtCai(ZT_N%1pBkIE1&3T--?O%Vg0h
z%X{rQ<{sJ`zC*F9Vto|3YKNl0?DX&q4735XNUg{cx}>FATMM7@Ywg3#pt~hQnm;TA
zj?WsJjgo9ro)ei}da~B=Mm*`F@yDb59bxhVOC6D~2bc)~K=j32st(PFffU1gr+m@n
zi=1CNKr5w|jCbEWK)O17*+j4F!ylr@D@^{(_C8+Lmm?@zKE`!?H?qmv6^Uj<z>?DZ
ztpP)4Yj2&|<Z~GJHaV$zs~j99o3<|Ln!DU^_X%Oj8bu#Qeg9PmQG_u#bP`*szc}3_
zExtB?lRrT;!V|lBD@K?qjLK&mf4yz?PLY&2qdrn8_C=VeX`H>nEgow6OirYYU0M)*
zeqEf-TYA}+_HBPgd+ZxM7t#x8X|xR}P^Q3T8CoADLFp<|6C)99ijl7BP8Ta@{wL<V
z74A3fR{P79^4yynDJ#^DdWOCo&<@@q-747cEtY=gn4<sHu?Zj&Bv~S^F4d~6AamkS
z`q=L)asP&G<&RH^au!?LQ4eX>#9OmQU9~9$n5UWDY+j<Rmv|#kEjI4iqO`Zpky-AB
zUN1n;oGikEx1i2BN|ybk32N<$jO>wo_>=NU42yJR#g+3tfqY__pDE`)W5!Qipj-s6
zA{5rgzOt8H1b1JLb#N|}`p2$LG!NbR+g_k8bh-!I)^9SK&S$YTaqQR#!<Ywo+aHx%
z7F8pAyx6XfVy0|dnfla4G*UxQhwWk5#iI3RQV0*Hb<(WQN?iOx0NbiEjosA-|2yEn
zw#bYfpr^5iO$!Bp1~ieHy)e=^3JnZhYQ2KR5TW$@-xg{64KfP#9Q-MO|3Qj6bp}RF
zXd4XLHnbwF{!HkXXQIP1`r@fX)br2R+@b{_VpD#(BX;_rQe?{^l&{537?$j^$cG$s
zgn1=xB6`<pFZqCm6hgx>*M>5;W7yE#O#ZA~{-x{vHzxe;%PNy<>D{Jm-tYJ$v%udL
z^{Aq)H10~iPFvgF4j9dPpBJ8L^#USC*?~0%M_n#vL3L*pD+KI+rksE7+@h%9Akw}_
zSk~MBmP=dNl^zT@I0mZNJPk`*)u#pv$D$a-!ASbqNR7`W0wo|@aBSrpn?+mGd%{;h
zwAjf*CxJwA_lmhkg?ywaTL=F6y}6;RmHqEIrcEA$)pH(}Z)kqZ+rvC~G!9j(?5iJ~
z&grEr=7gaNULk#V)FnElfU5JoDO6$H`O+P<Vg^d6T|T)rys`P*dF*yi>6fhIIU5}U
zl@t^nh4)di#rb&&B>I@O*+bq(Gwy{>@CJmy@!{eV=m6GWC~Zs5UL1$IhZzeW+Z;(&
zm3G>LmV(KFlg#;0vHs<Y+3=5_3^{|XbD~ljp1PZu5Wd}h=pSMI$@4QD>Fl$M2d`Q-
zg5EUzw~(^S2jG-9dgO`qt&4s4Cs2|9ScM(45Y-fc!J_>?Si`fxHOLPWvCz*!e?Sc%
zx3ykjm7S<m_CEbMFOv$*I?N7v+DfF(h<OQ}+ZubEHn44ae}l1Fg;Nj!V;O|QmcK}+
zapPil@0B+#;2!b`^03Gn(mH>?M>AOv4nB_bL6k7)Tl*OrnHjw|#Bj7K^FKisvCOQi
zot8&1CEB3^+|FsrgR4ho8GeJzi>u<J5x#T3X-1E2P!2Ddt)XFkMKt8sdTp8Hmx_wl
zuAW^SR-v5rhSEVv0bcTZ9t&>@Hb+!*`or70A#z$#Vs#w80&10`I+-Bz$Y*p%ZiNt-
zz=Px@n-0g8Z@I-oOjUS$lmc_bkFVag_BKtWQdK!I7{1Hb!RN$B(oK`i&9tW0y=Adl
z@FxMvP0u2thc$+b8HK(PoIinVKyAEq%bweKMQ_MfullK&$lq2osx*7{2gesM3*@p#
zsIZgC<-&?JEc=as+fA660muv7wR+|ie4N+n1{3gb+o#F4V%Qd&`%de5AnI6wP)*&V
zW$wcRg0GP_(wLzHiA7w&6<!qEQ(?zC;h$MJW{4mR+6@3irwsRP%FOFizNYMO#^$_t
zIA{TfaXujMhZK+c>yET@=Bd#H?8GL0DmD+ElA1bWR}6FlfT#CPZL|KBosNd7r5*6-
zi7w}7iXne-CcVol4!9GU`hmW5;rEL>7-RJ2Aiz-WZ!J80>sdYa6Jpjr5=NNF^hoyn
z#q$&1(~P4t7%5%<iV$T%nYvytdB}OB48!mT^w_Fxeyq$#q5L&THfii;l1>0ijfj|w
z4=!ItKG4IN7yITr$`#rv#AF|-6La=p4bK4bsq<0*X_=9B{taX$H-P>N|9i?7{p7VU
z@HcNekPHDR%j|pEw8{CK?|^q`fJWKjan->IpZj~iI})n649_nz0vJ*3MNmWyqcGHc
z)fq#Kkzl24mBh!=nvv3T^A#GG6n=VcuZk-op>2BKZAHdzN?H3e*n<b$U<PMO%SY${
z$q5>6<nYrr<@D-g&ZX4-GrayQ5l2SqN`<Wmu3FZEOH4qq-^F|AANipQbh83^3%*Z(
zc;Dx~txi>m@<Ucz27FlE9b&6&S6!UX3708G^*MzX{Btf1o1;}btO+z0p>9A?oh}e;
zO9Ow`fH-F{0AVcI*7ridz2oo_W7En1cFsEa)#T{D*h)3oLzqHXlf^|cX9;-(uh^$(
zhCRKw`={5+p<wN>b}Lh#B>i!Wg)C#MWly+F#?J;CMS9LWDxF*o-2#L?VK;Py7*$2~
z5BcI(sN=ZY5IZIqvd;5zha@X{ZY$JqESx5<Xqobndw3DvdZMr2BMV}z5<ll`#;_&E
zch2VU66_->eKeGwr5&lBqKzhhtZjx2iaDZ>he{T><_XnA-7C&z^4ofO1}0lOXN@}o
zsr+HbBByucosqI2wIJes&Gr!oq&8a4bEsN-(HUk&zQcJ=1Yxfi-u4x$1KzeLC+Qx;
z2i@u~1iw`dT_vdX4p(HJ+e#FDV~V(@l#_m`{Yl&k-C_pJSBwCMG@tw-r9OX=(g%zK
zzWhVOfj&19#Mot@ix-`9IZ6{-H9WDy)-Mbj9#YVt=D#MRz*b78m>`Ith!dcc?UnrA
zc1wCKF0?Y)SH&s1kY+J$!QcPR7lhgxEJ`oyDX6{g?YZLU-T6G#&W0p`|7+)~2*lkd
zeyby+g&$6<CU-VzXV5i^lQVgz4Z5yXam6et4bPtmV|A2+2+|d-BR?<78`2M>N<dc@
zA}B21`q(e76zVJn$h=5m9^RZJOB+%)I#*XyMQ7{s%3Qv4?s0V|xAtkfeVYaiSklq*
z8Cfd46wVAAIb%5@jqkUAGA=wup-Ju>f5ub1zZq!yXxLV$m}(ybCi?K`D{c(}G`#bG
z_>taRhXD{N^LAFH2cLNJ1gD1a;oL(uDwTt^4{D-FDLEc0EN`=Z`hzue1keclv{QL~
z?#}~lP8ME@G4a9Tp7<`Ceq<e_Xo(o$4?njWPL7C|>_1ulUqA&s?L>_2XN%5P5Oh>d
zHo=&(A&9}Pj>H;eg4~m|=O=hsa|LQLxLaRWNs#I{N>_L{Aw9S+zKG?EoBcr6-`rRw
z;qh^7pe6eV?OV^;u*s`X)MScYE`Zc&59ibItC-5m68`mvQ5C}WX-J%P*qzN(kF;MK
zpLZ3V1ztF$B%U;x3lnko)23Ghh*q-%*b|PV!Er{HTI{D7d<(?I((jm{%eZF|+KQ;V
zA*P#cpAAK~FA;4I59ksszH!(Ny~GV|bsUM%4!zYBbX(OVto`QuAj1j|x7QHynd59p
z1D2PM@N1%nBE0*9=!ver#~Y`|j1jY6jYAW3-N<<BW~1#ITzoc#t<j|gwK*Zi*XgE;
zKvrwA;y}2^zNGO!IR0z*$xoK~XyJSgvuRk%!TiD_1SoYk+1>LIUvjhFGR1C2SZGvS
zpiU1JU%<U|xO(jF#idnGiT>jUX3I`-h_(CA@we~8w%(|{F8@ADEuh`R>yR-5AQjTy
z4G<auX^&SQUlkNt(k%IT>r@;ey!d5j_)A;nuQgw3YFq>XZ9Lr9O@Fpl{LxbOw~I^o
zDNUi+&tJlmP`|fi|Jy|fS}**C&`~RYhTQ))ek-aE4V8J^FN*u$=}}_<wg*c;KhsPL
z$a$0Z*V&0<{d_*s=9kxh#<c&r?YUmu^yBFNO6Kn(Y9<P(ZqhiZPu%|HY@@#s_ur4=
zSm^L4OI?0>|Mw^Vg==tXvaaCGc==az@@M}$_rDQK9rQE&d;)s&D>mUDSvU%U6E+6g
z&v*MjnG8Q_!LDHq2mFQbcQXG*^&cPM<in3p5xsaV@~6`N%RRkM;L>1+ru*hEaMWK<
zDf<1Im+I$9IhpQ78UM`4-~SK(*R?2k{3YOD_T~SSh7@f0@F^qBYD$0S_TM-v4zPPJ
zu9!&PuKwfSMDUOAhl1e44?%UlF#hq?zg-B>!C|*p%U?hIZrlEcO}+F!Gc)hqZ>Rb9
z=3k7n5<il)?7!h4oR+_L9hxYc$9d-I{^r~5MTPoq?1hPJxcYB~bypF-0MqIFHP^qS
zDgHJGO2n#ykqXQm{^^*20gH3FOm~BAC4S2(3!a5c^Vu4;X@BU`;?mLz!wo-Xs&@*G
z;X|oatqD2gze)7&@NWYgfUp0y@%c^2TwgAwu7BO_@@g?zWYwkk761~uf7X;Pc3Tq3
z_V}TULe&FnXr$LRW-denE_FER;yvsX#pUIrkEn7>MH+Zte*fYq|Fxv3sy#<?=SNu(
z!D9evzWHKB2|m@Mz+y{SP527IZ{sxL`{c|QRc(L$tFrz_5?V_o^tL;(i{0GA-++&6
z(vNaui}R}KikIQ^8K+KI*9?)>XzbLKSRP8a2X$F5)OCrr&0anG@$Gzj9DlD^ei$uu
zG622_67dm1Ir~P)CSOvQHznw}H-(0g&Q^H4B&~UV_Hw61T^Hl0ISeGPX@R(oXREpc
zwP$#rN+JB@cDWGl=&Qu}Evv6($#gc4AI@Tx1S$?0wZYFaaneWJfjD_GTuzUERZwuI
z?P}iA`1)sdj)6hkkOQ+WKqh>V#B#ABo!~Jcuu4nN@uLNk73536stvPr%EGNXpY6b#
zbhO>~fQE~G!XL{k=Q-6fMVM2Kx@tk8sRak#3wCvNoDOgZ;8Y1Z@7+YD=BhGOrj;K-
zkoD?=92YNsnN78{fOd9hHU!#5D_N<DJ4MhVjWPGclYve_UVJV!q4op*?-FT7gUFa^
z@9QDr%(YN_5R@ND+QK7f)*@2!CiU&xnVe2R=bi7L-^kcZrMa;$4(sZ>?^ns?x*-3l
ztm1rVYq{eAi+P^dj?kwIktg}0LSC3-j@HlJu_IVO8!+^-`p~zc{wFe`C(P6P3e>8@
z(Xl>9=1k$c_E|mRlM<<o<UUyJKG(M((^NT85qGCauIN4Y;^SYn-02BoSC<T*&7pT4
z+r}N<Q69ZX<D<D+uO_-re^7$s6Nc9-S16AoB1|-c-G5S+b??i7$8KzoT8;J6@Dg!T
zEGxu6aDO&y&`-N#*Xgq~ASLDjg+F=F<GY+X@@pa;Uc-74k4E(VkxPlu<WVj8pL6Nq
zir#^hUH>|lznBziOT=&}v_RWP_F<C$YeZ#1iHP%e1|(xY#Otdl<E=lzqNSpOyWa|C
z5lT!-R<Wt)_{J*_cak8)?=Ap<!_6^~!^+k$Zrgp&(yK>;-1x{QpTj_|=erSxG*Y$7
zU4L@<7c=<tOYwcQwXfsG52I{oO-L!XULDag(%^>wNUM+x8Z!Hwc}Ef80LKmlzCx~M
z_7}ReO>DtZb`F$M{r*+(IJWi7jF6wI@Wfi+RKm<YQ7qT5Dtk3s>phz;oo<Fm<7!fG
zB3G8vzYpCHI6e!%u7%J}l5pWbOR_cbPLI_(8*j6ZQmzC-jY)Yk$b)*{DkmuUeN;t*
zNd+les;U(T9%r;xnLW9n*~Yv@Y!C_y<%f(QWnD2`9q&$4^hkrlk-z<c8vY*}9mB9>
z%J6(Fz=Rs<!q7JHDhJ`Z9AJDdCyY6fwq98xA>eMz${TG{fDsUp=$uxyd*5?wN|T7)
z?^*KYNhW%++$!+I(AlPL?Zl(E&@{4JS~T+Uw_jVE{yJa|POo|)MruF*@Rsy)rEbVM
zD9gUfv1TsFUQii0zRqhVycN_tjET;-a`~pIlO*h8W?Q9N9DGeo(0QSzN+vIgc>mSx
z<M?nQ?%QeU`?%V1q3;nR<s%sbPB}(3?6_&>)_W;iudYVfmDxEaP#Q=`6*dQ_9~x5}
z$jam3$}$oRtcmitOV;yaq(8IIE|V1V?v5dRv>==E`&j_NOOJ@2p43+oDZZ^I%2+J~
zLPe=6jIgCaM1qg|Z}s0w*7N>g85J0Hkz|1J(P;$I#<z2OJ)l5Yf0cHT9VwM4Zn(Zo
zEQsFk{)4IkT@dM-oHqeK)I$`*jWZ5*!0P>0T-R@`FCi??UTMzF;EatDo4;()5FHQe
zh)&1H-Y(x|2dZ&9hi-Y3$)A1!=A!R8uFZl;xhf?{7E0>+A7{|f&POF>7IkDdXQesv
zVe}sZ{a8~h#Py0{6l@-%LF*k=J?>>E`mYW>ZCOEN{ZOQyUQ(t5flLo73Fbae&mH^Z
zv$npiQNJvaEUo?Mksp)U2jYICX6?`|QB2$?<YHk7Of+F5`-+(QH)%Fh`7W=+o{O3?
zdu}h;)BZ56Qhm|sHN8fu63_l`nmk4?zG?yPpEc7kms*Vo9)C_cFDGf<RJiIB*bt&j
zdS*^diQoZW6tm9-5|%7+ffE{I$hhi~Wqvh&m0E%-8jSf5ugRFg#}<Res41Br?M&fp
zlH&B;TSueY4jtZ&%8WkWiu88RW|VwqEzdM7VYtM%shhn!K%iV1%)m<>!!c`1cA7YP
z97qf6=4N&%7`+?-Lx`^lzYm>Yq-7!)YYIwj<>@yvKWag~rj8r@kR4Y#YCFo4?Z8xj
zRzCbSTRw|-d)S{L=G7d2ZG@Ujo$D>-eTY9f8F`sQB_w<KQ|%oITAqqfip%&B_=wWx
zk=TR$s(>^BcIGA}B1u;-mm3b==);q4m=k84@zI@)g$i)OC35~_V12u&f=@y{f@s9e
z9HzgO4|_!Hf`XjMN8zoh2>dJ%-)YeCNtbPh?T~F;M0uE&n0wQOV4|TfIZW~(Fau>B
zxJ0}vQh1oGigiouCi4BZc=dat&7(Qw^{T)T{Rn|tiwQ@hIK`HG)LZ_z5rHz!TbOd7
zJ!l{47LQbkfu@ow&Tx~SDO3H(=rK)?)Vm&{(c3IYo*3elt2}gimyJJluK8^9mwYtb
zG&`9ivS9F+?NEHMXBSf8+tj+VDnHNXC3*hy?58+PVN*F;mW?~uT!S*Fc4~LTu>(9*
zi4sDTssAddKg7%15O{SCq<Wx@LqDmHmWvxDyTMPove+4oB1vjtp7c`yWND(znN)YM
z_3y`nosmyDlryR_dim?$aRjhN`h!vP+Z-sqvkWTv(T$H!$#u{?s}u%w#c>6@;%;vk
zOD4$JMurJd_Va_~qzWZ+(Wfg9EaA%6f@DlZtjLHq+qd)ZGfNnYkCX2Lb~B*g(=w>x
z>?G}q*b}$<*(u_tiZUs1s~Z09ACVnf%$r-=JFHXjO&T*^{rcnv#)V_m<>eu&gS6r3
z0|{5_-UQi&0Ohhei$3;@x-CI!h;>#ATvQ`yDVd)zVNR4}@xBy879qGM<xQ|5e-(%o
zcuZL(yg*x%xatqJNG0ob-+0(D-!NY;-2;BkiF*As4&_2)rNB_k+Uj}d$f?Mv1m@&I
zhl!RJ%55Y+XWt3P<-ELazj^pak{+3?#8v(JZT|5Il;UHZLwDyBpxuoP=#n6!uP}R1
z%6J+?Sxy<Dp8;>kkh(_@!SA=u1$EaQV=B6|U<HnupRjEvj%si}z`e~bj%+u5dga!`
z)6v|{eI=lmnsFiPGW<@lAY+J?FLh32XabIj|HU=#UIQ{jcjhfO15U^e72Rpm7kwK=
zAN}{N0cs93I_{12%4ptQg&?e;V{E4IbBwHLZbF1oiNR-2m86Nohj@!2S9X}Zugx?T
zM(}$ZXx3Q)VIR@*a8tWQNz2W(QZd)brg+gZ;J>C>tH|Q2aM%Og0wTx+shJYRlOtt+
z5M`ZYMOQwd$$B(dChj^i74c?NOk6jh)ZZx}1{rhQ!J^uS+sc}0mPDZ!imIGQKq13w
z|FoLm6u^{D9c@R6Xhv-8S+mZ*$)h|`MFh78v{??XD{f>)$+EkYN%<M24*ZAx$6Rd$
zPt}#X$9rG;31U`4=wdJEDsR+Wr4v@0MKOuI{Exx5X*y{1i)#LR_5l}JIQ=m3)l+;-
zK_a;YAx|qSA3N5lo!n7T<>N?(8v{Y&{%Z1_mH<%%ZLIY=5%g<*rXIHN5sLvfWlu$B
zh-m5=SMpWnZPpF>wdtmiazqh!_D%6FstCz7#%4=4xqGCzD(-p62om=kipWFK5-&sC
zj1p7qPv`X0<oL#BZC{lmv?IvQ&-6EAN3WD4s-BFVWE+jt_-A>pJ*IQR+7=zDS-HVt
zq7c@Vnw5(5*1?`9>W8+6D2pQXh&P`J2Yf8hcL!sxpD-n)`J*kUdOCgX8qLjC&8k>?
zBpoN-x3ta_(#sq>vSDEtIR--hS>E{V$0$@hJ>X6or;^0jJ>%@NrrZFH=eLBPNvO?P
zeX5TT&bKTZVvY!!n%Zx04%Q29_M_f2{+^A%k@XJLCG9oawNIzgN4Md^e2GMkIU<r(
z%Gw=5wlIQuN^U-Ck6A$F-riiuYsM2Ht3e77k}^b3M{vj~SELjmqf01%Rgjx~q*;OI
z+Wzj~DHH5DrM38$S_nh&g*_9jyBb@ZcAXe7`mUW%dAfxoYd*VTO+&iQH7Nt-IcZaF
zMLt)o>?=Dxp((2m1gEf3Gdp&~=5_aje0~qU{TEM#n_NZD_(ts&{df-$NU9FJyk+sl
zjJYx|D5J!thl46J%(!uU*i|Q#I$U^3!o{6;xo@99@ScQWRn{v{40+xXr+nEB30vb9
zl|Zm+V2p|wj#C(=>h7}_T+!H9SB{8RTd(cy6^%KnBMvY+nU3AzS{Ajxd<UJ<K$+)2
zK}Df@xUS*QLv|pcX~w%1&>F6MVc#}_-I0S%A<*JmiD`9BMvGUeZNa#j9HF?Tag0Kf
zG|U#78QRkMGVJh9S+p@J!!m+gm7TF**+u_qOE`7@xU!8xA@)}-{`GoyA0Uh9{joEZ
z02w9RjrYMg_4ym=8;`mI1S^cwm0+Nb4yrNvKnnq>JeD{B^OL6$16~CiYocP3MojT|
zK$WBx5~sjn$$?8dU!#_pngvz#)W=a{cYX<&Y+gvy>4v_hj&JwJ9-Gd_z}KX7=H<eV
zNR<em#OzH!5@A$qf3=8jz4&@rBG)$0L9X@T=bfx2&0Ah-IehC|NWa|)+?P@|A%{2B
zTD`xx#8Gsi`jF*ik3<_WvXmmLBZx4NeY6IYu4J`KA>+{Z1x_ZfOm4MbCrG&-;G@@b
zikLFxJ+1dVcR6rm?*^EL9!^U{fHE+>8DjXJFL`bxb0mzfQw1ANliNFNj3y^Ow~|1N
za7`F!($3O(=ZnG5o<Q4(JXYtiGUb>Vt?P#I3D`nDxhpn)$4C>AnC!`C8tWdn!SeiH
zl23Zhw4<aHPMC(mV`dxi<2Djf<P?mx?zA@h-H;_EKj*yB(Lo?-$~f1s+dwl35$|?|
zh}W^KbQ<HTL$<`&GrpL%^59AXZAD4{SuytdRe~~!L}T6M@G#r`At@+Y;(7>UK(GU5
zIqz5|DTl4poF8xuN8XdS(nRuR?l+F&nm;b&*`{TJN!$WR5BA)xZ=;Q$>|h-gz(});
z8Kcwu01gHaY7u(bm1meo<Q%Zh=c|}HKcX^>&rfCPY{8~dB%bN)yu;YX@9qLHjqis|
zug&`GZg-Csb_yu*DG)jx6%S8rTzSR0+UG#zWRNuWd7OPwA{Su+S%K&_&;qM}rB+Q5
z`F6aJP1^|RKvNIdvVa%@)Pm~d9^v{)+`8@Dg^z5kI?kr3+(^Xps4Y`_zIr)c?Zm&O
zTK@PrO`{xD;>)APzQ}pkl+8*C#tLD&kk%TnQ|g%Z53YNtHLlz<jhveu?UAn%Ys(v`
z6ksArn1v%K`!cJDG?#X^t+r;`xgVxXdUD$v`doUI=8Bwv5s5^MoKf`scND;cbm*Z!
z#PwLA<JrN;OC_!1@vGo0mAD(4MWe`gMirOJ3pxVIXv<3?L=>q$Pl~wJxC@%tO3DC8
zZ`yN08lN5N(C)ZQ4AMyE8{t9w;g$Q^@+T>V)Cm=Wp6Go&W46yGP5ElEx2i4G-Oq6p
z3c3NA0yj6WFI_>M0t16ytf>*W{jWD!hTpB+11Iht=EtTDU)X2sxFQgdw~+6cy3h(k
z9S&`ph$OB+4w=z67M0r3?UnsmtcfFeli51@8LvcB-|5uUxzCo^|I4<>hZQf_LE5O|
zOI&D1a2ju&N@N&QKoz4Iajq?_BpAq^+(kkkm!MqMQGi-n^qBNc3+j!(bRt%{`dUuR
z_m1S6`_zPKR?-%8`b@p(UJT`(d^RtlSh6%%Rdj-7UWH8=@G9X+ghka83w!QLl8~*e
z?CLTxx-&0^{TB_wR+6FSr}_FxT0N5L;`a1SsJz^&KzG{LX1q?AJz<88_~>Fvk0Tr>
zz4rlYeO~Mb3+6S9x|t&8P6Mt9on-j?JV99A_mVuWk?;6=um?a7Q^JHJJ4;BpAGKFh
zF`mo<oztV5ZH6zOU-2P+%61@C3IfmA44tQ8g~#RMF4-AepW&iwDJHw|HLe@`xl4~b
z{u2!Nr<_HxA(l!oYp$E-XbGya`;ARGk)K*Czx4Y2QEi=@3-ag&6zl566o67<KK!ov
zB+#0FJXvKLX9FrO*AIW`j2~7Ix7Z>8$`wOT@gNyTFw|_Wpd#w_0OO2SB)Zp9Rb7mv
zGOGLI;Du@vf3vP<eH~-Pb$i8;CR6>Sq^X3dq&>6IcI6rw3}VENco8sW=JEIx(~LEH
z)x=t9O>)E)oK|XShSno17^5_j;c$(c%nwNdqLEw<F7ur2^$wcrjH?VCJrBx*37>1O
z+0kiXu7|OCf>9(U#hIHnUOV1AvLnw#YewCEIDUM9OhKAPAO&ULDa=h=HKL0f(Wn_9
zNRW7&rhO7dwkcq8J)NeopJB42_AjQ5Iu&3>KJ1(g8S$vfK+#t>qsv|O4>S@~&*=jI
z8e|VuZSD#KrRA2McTk-uS)qkGFNp$Lf^^XGArX7}%?@?&pdva@hm=`JYQDC@Hl=2?
z^?9~Ll=TNj+FjZJOAD)*0e8IcbxwdN!n#J*a>B)q_$K{mP}UV|WVS88#Po)))FOOm
z{t-t4i68dDl+hG(Ep=2m1KpJ@VC{N3HpG)oGY~eod}8>nG8sS5d0^x6T{5+0_2wHi
zBa)ji8Nt|0si`7mQLhp21tiCHP2loLhKF0xlPEY~?89*2Sf2xHHRATe%vy)f_QK68
zC~gr}y4+_LP6God&Pr8||CSbxQb6Q|pZGM&SzDbQ&0Be90i+5r#9+eI!6$2{!5+sI
z??GEvX2LX!TUO2(&k`gcRg$GWW?`aWACxl12p^8kYVnEiw_|1%)bvDfOzdelm-WB3
z4+v~CinGjTEfV8Ry|?#JqLDaOxSGsrQYW{jk}`k1;Zl#^zaDY?j#6?JSYVDOhIh^X
zS;)zzL8=ZgKBVkvH6@n8Z`yZO#i(xK>cxBm6PJ@==yH5eWpOc`_Ug)tQq=V7=A!>&
zoy`E0XMs!!^<g>}`Kq;~wkrbT59I{s7_wkP?9BztNfw*5ahrV+qu(TB!D;8O`@ui6
zb=08%v&$I*e=IDp<twBmTujV@@vdicN8vED*FoYNjucPiR?@nS*jFW&E4eZSOd<D%
z+93P}OQ};?@-4G16ysZ7#THCsU{^`u-BX_XvX4xGfo;*sq}MS&c*y$Au!2B!jM3yU
z8t({xL7D|WN~C;*qycisi*si)QH(^3Yp%Gx!raZLOkzZn5&}}YASN-?5lQWyF)!OY
z?&^B^(hQOMHF!LHd1H~d99TEPBX}j4uUwNFl*tmIS!OfwqY)DUL`b!0qiEV)feEEr
zsw-Mf3qn_paNTy!Dh=yGzh5}yX{(@Am;)Pa|5OslOd03DT3no|H)-s#kc+2Gn3lW^
z*xqAD5*_d9<GR~4+Ku(igkkr4>F9NAGLALK2oiu!+<h5o@h+}UOAfZwOTkOUI5y(8
zok<e0#ONwvIo&|I7L?8qAa0kLnl!Go4Ye;W(r0RF>MH7B?^VakmWxXHR1@rZm_ugO
zh`@nc#({&g<B#nxwO_vKzqtUfO$*F0tc06}Js=mAorc*X$^w22hu4HJ5NhKOC29-T
z;Yjp1%392RLjIa>P&LZvm~e_3>I#){mnN?YZcCy~l7HYlK?GJ*q$rE5qnZe<3B`&K
zC2VGKd!@FI490R%tR}a}-pB|ra{sZk5*8rjtQ9T)6np>~<G&dDBtg=>X)M)Gm0X=E
zHy0U6mS(zOinS)tsucAmy#D69M7wt7yj^*Vd(Dm<=j9-E33$uRYCqP!d7n~V91$<d
z8$d^mfd2f?%TsB+3f2aY$R6VZ*{r>22yITL3;?2`Mq=93*CZkB>T4q;9VeyxNp@Pq
z-nW$pLu5x9uzYy|*qDtNKrUjzkf9F-E;=T~?+m@ne6(uoi02&AuX&nia!-J(+j!{M
zs}CuFu##SMS3tUPF=Q6QG=Mn^#H*Rq=gv7Is5#mLZG;B<@61XUpCfO2Z76`TcE`S)
zkYKo4?q<gi&Y7C3M+~rI(0xxe?TSSSX*3=X9?rqkwfM2u5(BqTgB4+Ca&qvO1r!T#
zk(&Idap2;===0FHRfzo`GJjIM>C<ZP&Ow0163PbJRmu?yM=O5f+qJsuo7Q9$wR|_d
z{}}fb1Df+|-hQ;h#MWbhX)!OgZssxcHT@<Tkv<LH954X!s;H0vV)soHX2kyL3Z16g
z=ynL^%(xd8d=mD<E8GN<5&zPX5Z6qM#!cB_XEPe2gWG1?J@1jLBK9p#RetO@Vx#V3
zA99+wiVAZXAmp+L@kJz!l*`GmOzu8|om3hbQX1*wh;?j?V8q<{p{;%;kS5arHqi+3
zd?CZ&s*ZM#3rK;<kjY+(i9Q|i*<I*u$Vi2)dFFm{|LeN`zQm(c2)-XeyZRCP*j_Ie
z%CM%~Fg-=6*^Q_k*?1%&?Q1p?-k&qTJWXY4Y-^-8TB4JX<$zxR%2y2&Uj)M5DrOL>
z5c#g$d>^-~0bj#u9#L36z_e|3ad(o=N-BGy(K_`k<=LzNfdP5@Ijvtmmux-s(r&49
z^VqC@MsiRDqc7zuhAXr}<0$5td+dEV)5|;CZgsWAaN>`HeF{W0?^qXU+r=(^%;u_`
zSEOAUcTTdfelOSSox9pHOG?k^%#^N9cK6x25zM(^%vG5<o10_2P_g9H{wGO(o@I!r
zK!_#?Ui-cej*Gc@D+bS8cQ)+_?-m-wfbe0GuX~lsBlwfJ7W8@+nPB>g<*j~}L`NI<
zOsnKRV0GE=mNB?_h3tT<Ly^8ab`rGLcy(T&(`&OB+%&o+jfJO~ab8?$2USYfAMTTF
zK{nDjNdfKm!B!+gF<dpA-D}#DyH|xT^}x`K?QcW|_YGR7Qn-c*0#F4AQjTr6l8eIo
zPWJVCQ6XbdL!Z?*>z%=S^D%ct!BB$GC%TdSk{V9d2CR>{?^P}m<%D^X?jt|Rk{D|w
z=_`85bP7i>h!{4crNTBcHq8D(hkvg1C9MytX(E3vdIs(zqqemz75rJIWVFw?YF5{Q
z&k(?0QH;@Ma6z+lUq~^4Fu5jrh51i)!=G!v*l(MRgkHvYgA!#@Wx6!zmKm-3eU)B#
zeRTSaYHW6Ii!0`*T6-2#Zt9>Yt+TCO`{u>c4#n_Ef-`<<!C<oO;4`#1(egGKofzSp
zFmhv2Q93VO4{n9onYe5vPaH0%tqy1DHy^jZnuEU_xUWCR*lgbp>A&-whI-Oqudc8`
zRCBd+wLj!wU4-z3Z5U`4a!#&-Nlg__-gwM0pk7iEC6#StR^KlfX;2JJ&PjQsoI8$#
zE?FmxUbP->2VccF@#r=)89d4Ta53B_M|&C9R4zBzn7ccg#&kk*(!rQx`62mT8pyUY
zP5Dwc_LcwwG+g#?NOUV-CtW2JO13XjN+x3NAQDh09;mOk-dC70r2N%3{;osRKE{&X
zUBMs3!^D>?HLGvEVrv1*Xw;(3@Yl{l)>8m$qmwx!viC&r))@pq%i1dG_s~__P<GaZ
z!?!$FOp##R<;FZQ0oWT&M${m=A3L@s@;Uf43oD|}awxyYK48L7NR*2!nc?-BG4D@H
z4jq#PI#z5-lAVV(?8lnVOw<f27F}$kQJ%`j%c<%roOIn%6~FLK7r(y9;jWp&+q`pC
z>-Lzoi@4?iZ|Z{S^i+yUo@u`_PbqV|EhW;1TO9M5NjO8VcuDZryj-MP5(UD+rmsLk
z*DoDZI)O`fCa<Crk<Ge+q_vyvNmoYFB*!acx=^`GQ8t>AG$<)+-7fL7iVMc^?O?a%
z`%bg8`KOG1N;)-K7vEIyckHY-+&t{8$0$$!VAfyU3o9)mV_^wk14m+o&q<FPzb=>6
z=No|vz7Q}Otv%kG+&J`52HxUkR%Uh*Y@ZjY>Cm^8Dnd`Xo+!Rt&LP57>*ZkC*{)I~
z=OLet7FP*$xcF@KHAWc`S~#f`0nC6);1M#$_Hiq&=hr^a*6V9CHCW%GW9F;z8uNbg
zgoY|TK<t4W;H07eoU+l_wu9*<@16cpM?*wq8am+ZEE|U2Ih8}Tz4~E_tIeDc){Q)6
zf6XxAz?E+f-bQZFacH{$jfg0~8qC2@GZ_qX)}UY!4ugsSE;TBox!JI-*c^9hR?0+B
zY<K5OpSyS`DB!-qIznCU^8SO`Js!ggTFZ+x<_pr<7UJZSNX9x8M>(z9Ekvct(hByC
zjg_xT`|G4u^#5qK-Kbm18bbRPV){kxl=TsfEO&d2z^hpgtdDeO$`8NB9`Ha|a<p?b
z<%oUE7c%Z_I+a#&^8BEUG-2YWz*V+`H;|7xa<Vt4m~G&bTqT)%0!*G8FFH4;(?K&>
zFb4tWwvCg6!|B~-v02hltVs}-qD?acpF%Qr=TQO@RP2LIG*3)W0Qzn!zeV8PUV~DB
z`7E#T9Ye{J)>q{uu-M?T+?4ANDcFl$rNfFOvANgY$bGjc$d`88n79c0vvT~AUz|e8
zi^KM*Y$Gb<%5Sqa%X0HSoQPcT?(Gy@eYqVodKGt5XawDdh(Z={LlHm(?UEF_>s+W{
zp%r<dFxMcia4xFws>O9W!@a}JdbksvG@$JAB==yye(qrLy*onQWCL$IgsV`olAobt
z6*bv2-O{`#&u(4#g2d6oiFa>K3v|}ktkqHS_h{$8LTcjM>Q>YhDDvH3Ceg9*RNjLx
zsdD!<(}a2^D>7f~j5tnunI@XIF6I1_4+g9?P5dkJiHmaTGBd~2t-@WPA3+t=eMH=I
z<MM4kjq)1!vBoUQKiZAoQ~^VAX|)K!`mD;O(sW?PBISm)?r}Uv>H7w*#($aK?;)DT
z4dBxCOQo6JY{C%pz0<?>t6Gu%5y@{3hED%JoV=g-RV{sj>_>|L0NrT912ML@7O|D%
zHjUQD@tH)hx94BMqz#;@|JTr@fIFcfy$b)XSnO|RWBYvrBQZzu17$gd(kcJftu+A3
z=!CKe4dE+ehga27y!|yJA(67I-<1QqQfO%u{?T{;8IZENBccX~UwsH*!+5GLqZ=L(
zCQ6u29NjGdwqVw+198IcG}(Q!Ui^8(KN=piU5Z=awd?6@uuQ!E%2#4S6ZYWqVBUXc
z+kf1u52??Y{Y~{|2V@#DLPMyhVRrT6bZ^rV@_*0!c^0zIxA$w$zWyKV`QH%y9|`!s
zX8IQ^`~SleI6t*M6FA!0RENmsawoIH>DR*oOvq2~<yu0QI&${Vfi1PBHEAz_q6w<j
zvv2=_e;eVg9QOH(*lRfhP5Kv-nCKQ!AWhki&jzh)rs>KZzjO)PpIW@N?wjbp^5}E2
zWW=-wIryqD{ypFtCGc@<u;U%^MAH&wouQq1HuJRMjr8?5NHRnP5|k4PmvC(yAhjY|
z#yy|gyk2TJgHHv(X<(X`wj0ZS{L3a|^aPa;YS&p8g#K3BKu5$n?d$E{(kIqya$whw
z+84<lsO!VS=YFb}<-HcTb*^9RmGfVQT}VCcv2;3!8ssQ@3>1M;m612H{I3YK;xQbq
zRqvsyTrn3NJ4$)LLf76=G_xEJ1c_^`OkAXvVm}h=#uwA=o^p4{dy$3ia}rxLs5B^F
z_ThD}uqwWu#N!3lhy~LR932?!-xJwcR4HaU=S~`z-=ror$lKj87*N8-I_Ik5vk$I3
zYTnN_@$E2tBq0(`-P9#iDyV2td;%mAV|~m)Xb)md$f(Tt8|M<xeZCQ?2=TwtzGu1C
zkX{PaLz4KG^R)_ImG*utLBH@|T|NUUQz)Y;lL9E9KiqvxSJzdy0@l|uABb(0AK#^t
zNtBBWV<0gs_z-%A(eZ)I#PvSguirLP)FpvMP-fm`f2hlakNfWOsoF>QUiNlj@MCQ~
zG0}~?6l>DWbX=)!oLz13p=xGTfrB~M|Hwz{F`c;b2QFf<lH(tnN4Rash!{6vQ$jR|
z7J!FVWb<z2Egx&BKDq3~J{V8Wv6j)4O99N_={?%R=Q>A|&1RHg1$-EYY~3(FF2Fs(
z6eHp5yL(E~4?oJ+_<lb2p^K*?U?FL2F+3uIp}FL~eNCS$K9hEwjx$Vr7Uq!@dG*|o
ze39D?@|dX6y5EBAXzZcqKiR4caXl{924y+0O}%Ngr}SYAa=egSS%hN9<xD;?2Agh3
z2Gt&pHtx~egCdAbG=mS^5~m^>?^aCpC`sd{x!L8hi6OGqaB*;<q!6D>QqUr%I)|2q
zRo|h3m`e{R_6W6kmbp*!8GLL3GYbc+pflG}P;)1_eOUt0io7D&ccrCl>cig_^{1~I
zgm0`h9Hzzg+@(7wSpi6Q%x)6A%t;BXA3fjhB1}_K?%|TWLIadM^YKviEz?H(K!I~~
z!ou3VOJojbnXLKokS1U1)u%EJjmHxvLEfS4RpUfe?_<sPF9JSbZ~)3}j?)MU^w?KP
zdb|vew7!BnW2FAUi@>Qh@qh$m9=NJ@jtCxM=O1@p?&R`0xpL!^GZJUN$YS#eS6xEK
z^(o4f36f5fVQm#=T`4rJUYF5}UH>shKmVRcJHg??X{@fxu0=O-B7q1|L)!d$(YcY~
zpoB)}<?_H%f$kywSB8JkJ5I!%NAhXe)rooR>*2?`#`pOm-^2AfoRC@)O+q<6!4?Pb
zxJxjN+ymGEBsI~bI|_l^<_yswo<e1Nq;(@x0NigGbkSFIV?2Ceu@(bHbsf~>0n>QA
zMVjBj9b1GLPWExd<c?vRIh(U^Bl82oi_%yq>xFk@t$P?5;U#vh<zVuC;%(jov2_{U
z(TT4)_BkVRg|ZsD*3dC55Pp;aB=R;=+KU8I=q_U&IzzfQ#pb3b4b!_#-^>gCh*_1%
z5bcCrRe$|8BD#JKRr~x_YgP>^jT6JM=oC>CQ8#&658PMn9%F_utzsUAqddJ}ay=l`
z?o%969O1G~GkjAs3B4?SRy1_yz*VMJrn{&4p;e6czAhM+2}L+TGCbtLm6*%7B4a;8
zQG|fEU6LcAGP)-)$@znhC}#l<tP>^Dk#G*#$Pm5=r;vznhUt?SpZo6Rs}Y|4X$bE`
zm)5i029I#;(q~k++{Fq7|Lgk+<5~mi1%14ArLA|17=$oO;jcv*!u3g+lmT#hQrdc)
ze%(A%nXh?)i18S(g#LM@FfMxmpmQ~2au8fb+dbVT@WK^D0c1B^pL;URVXX7^McIC7
zKY}C8qL|$GRda|d?%o<p&THtRQ6@5N%{?Bj_s0*K^t2c(TBuajNG%ou47XEev!6re
znUh5c2)r!hMYhPc6|>ULE2ve5amf$*(Zp4~t~03#yH~J5bvmgJ+$zhi(243+%KFiC
zjhjYG8_ZydxefC)=-QX5J09h4wjpm8oqrUxZ#zw2^=y7YD-BEM=EHCvQwl>n#N;9D
z>J9A&t}T>a;P<T6+pAf;ae7&=D!<}ynRxQ@?0V3aIF#+4NJY1s;*7=7f8-f1M*8^7
zylPa}?hAXJBd<b<JD~)B^w~3%t6K6H>-jbT5Ec*OV5)SQ7<bP6NZ3A?v@rIU8b_>N
zFNbGbu<DZv_+|oI+B*0=tR}djJl?k55!cX})bQ@9hsO;u>W`x_=n3MT$KU~mM~gbd
zD&DVjqwC9J<%kBysxR_dp|tQweOY8M1IftC<5fSRml;$?i(KUP*1Q$$&@bC(*Q8Wh
zhF?FAcrKIvZ(ym%fR0lRvsCka<-Apt07ydf#_zqp*vwTym&AF@Sl-Vi;pF@F&8!G&
zTCVUJU6xFF?)T*5%k?A>{9l<Oxe2uo12U21jKAbl#Tee;Gr7Lo(-L5{q@!w+B*c4}
zK0PKo{G?Qy@bQ$G$P90aQRGXMW1^Po=1U{Zh<ZW{Iw9HBM-u=qdEN4oiCFvZv0Sff
z*4?z(8gNdT4G$ToTU^z4b9QY+hEL+f@pE+m^UKojW?E~iL-`VpBuSy6KvAwQ?-TF(
ziIX=(H!BYsE%0x<MB18Fl6c<gy$6nat&<yGw3|)2m=4-ms~-@7Y_*6y+@PN|^yLLF
z#_487NQf~m^B<k9&g`ll)cjo71H~(^;6u4t67_Y#F8)c0HT@_0q%f0*%Z6hfI-pFk
zUawa#lFB_+sP9`a`}jZT=NaW=za(R{B#C@OwgH3m*(mZ366s>>y>aARzw7_xb!|>+
zsK{n`d&!0Mc3q1EU(U285{m4&EV^swnHdig8G3K1JsjF)>Y=Iaps(})+WYReCby++
z#h|cp0}%|;kq*+8E(s__h=52{N)SXqdanVAA|QbvT|rO~M3CNFLPxq0Ff<88dWQgk
z5b|a3_nhl`ujBsyg7@U7{Is5A*36o@@444A7Sat0b`os|W>}>VcY{>Ne#jF_q)~=e
zTFFwORao4<Ecn&d(Q=x%WbJPmBI1p}_Q^=W$q>as?MDfREq|FFv$tPI#zy#MHPH#>
zVB@MVlXd#>t1x<Z=It5im0b^>awkG&bZrmRHs%@53;Pf&qpB1GX#5&5Q+Dz8cSc<f
zzbm!N6~=>Ud>lca75O*dz_am^{4}os*D=f~HAYm12coMeM{J3+<vS{kKD{{?3qqs@
z#Mp4XW@JD#L*m_8{;Vv7oa6*(SJ`X5z<<)rjt^s*MM=6*r@S~P=2u{#Vl?+Xj}PBQ
zh4;$3X)UZpJWS2t;*tMqXf1Zw>$FEVDH9y5`7L{`HRdLTnM_s>d16^DMJ|42U&4S<
zIMk>vE%0dGVmEd>M)xVS79r_Z&kr+c8^#g~XXCN(Tt!WXrlhEl9CyOmL&b)hZPND7
zssA<DFJ$c_TTzA33}o;)-Q2&I7K(3+AjO2dR0!w$yUcVX<V1%g-K=_DQ^HbRjr(|J
zfywFKw)^uY3D^6WKRJ?vaPRx=qkcK`du`-KWePZNElZ!b3lrX&6n9?WvP6uT7LgWt
z7Q?`@bD!w8F1c}Fm-?4HyC`U^m8-)a^B4A;m+x~+TY2oOBd>18NoBJkuM-BN*Gi&m
z;_~0%9T&n3_i-nR=QcR@@4Bi=&-tXAvRT&!$YhxkF7FLj=6d<xH@L6UG3HQ$2It^K
z;49y|ty21?qpj2Cb2m9{eBd^-#=kT365=$Jw-cB8=fY1RSo;C`_>^aYIQ}~a<mGs*
zsDO;Gn`RiI3UFVS^NEYl&M2_;c_PKMT~0WTkK6C+6;Iod4n|$!lS`sO<Iyzx3f^%;
zT7X^cai|Z%*7Eb-w1C{S!iu@JP@S*qfrmW*mRn+&*$dc2pp!}M94hSDcpk8I#;E4D
zK_;6isjNsV&1XLCz!)E;Z7Seo$gONR^JK#*6F4Si|EvLQtN8@6Z<SWgB!IAO|Kbu}
z<o#N_UHDLArKsc>y{3Q_!N)uGO$w~4_GZ0i$qOHoGt*%QuLL08qBv7NC)zmlS;_XA
z{pzSTs$cpFose<4Ub@bDA<u)|9O<~p4$6EvuAnP%^ak(Gnd*Ft<4|aW*!v<hGb~7x
zcZ1jH$=EHuM32Wyzvxca@S^)K&(TZ}m4m<52VWWj?Sc!!>^i)5$P1a2JHFOcL(xrw
z5ZN}!zQauemutq%kZ6({hkf}AWm?{DlvkPfk{%@~ja-t=Mo>I_khZ59`l%WJK$O$L
zZR@aPsnckmA3m1T7q7bncFYW=z7oK45kOYw&A$0{#21@aT3=YUW*^Fm_nZYQDZK3q
zFS?je|N6IPi-Z&npr7tMbNVCM9O<G9>2x_io!dI6)Aj%Y^cR_L6rxA7+COwn5`<>7
z#wY*m(Nd-Ne7)6jbty`SJ5f_f-rD!UyC!M&MW;Ix7m~ECcPQ_!F6jpAXu@}(SzeIr
z)M3TO08MqeY~nPP&5m_jcnj$Rd*BV^Xhe`Kp*d0@>rFODUKU|TM)g{Z70PAnro?Tb
zxEZPWNcA0;T(5(&%oif)A@0}ZF5$z}V&@Q!bEEm$oXf2(^F<{4H6Z=^W|k4;fwDX2
zi{c9I+T~re)${Tpr4?_R&o<d1`$a}%H1vDix|*_`RcZWmaPIX1Mhn(C2CZI?n;+=T
zl0CF*W9g#hO1jv6XvmuNkIX>Y)aq>ZxC*r+-!r3sDx%-3w0J_D3x3k##F6m?8+p36
z8-zVP0W%zi;P&R~&^$HEw@&x?yZ-R$0bylA{4NFYd{|<VyJ79S;!?8e6WZ^Hx#-#O
zyV3i3B8=|~A<hHTDf!*2#jmz@&7{xv=$_426*Q#Zu5>#FDYd?4;GgL5v*8?d1B0x)
zXyI%7<KpxHqTcr6X7$C|L0Pxin+&XzI{NFx$8FNBSt90txV|M<%pmY=ri1zuk_!b;
zwpgi?DQ58J3k*-%kzPH-$6RIV@`EM4x~tlaYg|z&jot~_av4lL->NY+hZ;DzAxoqC
zFte+Dl1*AAL3!`&;hgB2JvMJBC~u;cE#Ij2%4nJ1an?8nY<B|BOPqdk*!+Cgki$5-
z!ILQ^WZiNc;&M04pjjrzCY1epe~khE5{k#aBDL!lZxF8ul-JK9v{JNYqu_2e0q)Z;
zZ(|Jg@Tv2jIS1*#Iy`nGR>~SI{lqye3cr?9vULbv?DT@GJ;g4VigoN=E{F6F8uHJ_
z9F<5Q6(0QCwgH%n^KQS@Ayg9EWysSKYMb!>Oe@CAAC-AiRmB4tmf$lK*Tvonp^2Pd
z_fpnP;BFK=o_gEd6K*Ebmmz@QI}ser)~a-8`-l)a(>vFG`|0JETJ&eKGRej+OFG<P
z!Xn!Ndj+d3;Z1L1ffq?FZ<~)Ox0|zAWY6PbsdHw=!-Zd?1-JLBoSr+Sdu0O&twfq<
z<w`wLS039=jo?i-Ct%G|`diPXNa!tv(MhFs;)!5I;GJv_u*7YYGm?CiZeG&kM^|A`
z!H^i?6}Lm%d&6w8ONXkd+RkirAarVE*;h7)_%5~Xl<Dsr9z7CS65F77zw_a=DVo~a
zC)GXL(MsAtjQD=vvjZ(2(o~^H??`ohLFFUW8wc%TEqk$!-x;D<r)8Z3UM-`Bv~k+{
z#ijLsVBm462h_=V$N`c6#7VK4EfUfV$t`?-Q%*FFH26u)ll6Ylo&4>OGqZU;w-Z7#
z)x<^UA2aijiFL@^gXDnkbTW2LLIWPip@w(Ke_Yb!f96AZ=s+KqXpoRa8=Zgf^YKR*
zE_U;w7{Kxb7<go2g7`N6&U{kHym55q)sh409nc&jx<XM^>bC3U5!h`l1<ZYI3w-w&
zDT}0iuM$;4F0Q|~V3<UDzlPpw)K6I~9(Op|G1eS<HGPHqa<ddd(G1k%uCz+o`Z92B
zYIyD9+veHi0mrkE<)n1k1?2#^?{_{XhqCVJvics_v18gY2^*xSteQce##GpO-RUzY
z`D=6kO`C8ViC-sR*x%t@HNP;&dh(D!a_U<?Yc!gbaW1QJ8E(Y6_CKMmhpC!%wfRqK
z!lDD9ch3!!u|2PmM^~Z$$FoaHe4aYCdG3JlZH+7`q`D;Tk=J5|B)3iKN!6M^&|}1d
zb0<l!(iNca;EX-cR;xJYB=d?ZE{=!ir`S`w3=w1YklIq#I@rOEPD|P|?5%Nhy$rk`
zr^Kcy9j85x^3Dluu7Vvv6!--^Iz8B0cRyg+a*ebTNbV^~Y$gYN?zq!e`25KdQr6nR
zPvOPapR^58JRn)cZLQ0bay*rdT`vcO7OZ$DA7q%*WwM=@5H+yx?-9`i+^mgT#y6DL
zdEe>?edUGs!K(=e2bk1yC#getY#_4j-R4ZI3M<MA?fOnu)m}rfM_G8bxv(vJ8`q1N
z)SWUeAB-n2L~-sDNhS+N(p=Mm#B`~}dQ%%VIIGbmVtQU!J^ekFe{<*mrde%QfCU#g
z96v`ceUt;N(OsPKyVYqhbYw)4Ax!#g4v8dt_`IcMx#$m=nw&@xd&Y!7)_Pb5p529|
zxm<eIxe8lk1p&OWnuK-sAVTk<6wtL(r2uE$%q2+%o~pblPNp6(hhtT?$`%*)6T#~z
zIQ_*rfnBr;2itOR&>%%Phmm?Cf@8GUCESma6c2^@jUHN+mMZLcFmth%r+3X=V~Caz
zESqT~KEEEZldXl^*_D6jzcld2P@Y0*s1$&gyFB_n%y*pM`S2BE+^cfYpj%Z?0K?|<
z!dV8@<Cg<DM9amYdXrRXP+=Fb_><`n)1+g(xI?eM1-kyDe2~pO6m-e^HN6AdZkY6;
zSK8j(i^cex1Gt>67z>jyGlDtrkmZoydCDb9r$5H+OI7(=eIaq<ul!#5P1?;O!~8#~
zdXU)J(<Ohi4W!9B?s>18FJsEO3iUOA@1@EFoF;$hv9DH#O|~&4Wb@IU1ta<NN0rQx
z(k2rH#HKo>uc@|5=PjrGVRZJbk$hrgV~?$ZV;KXn+ox(ekNJsOEwSpO&vR?FBQ^Xf
zwub%F8bAq)^bqSC{S60->m2jn8#Y5qMs+g$<M2K?Z~>Y)nNPg&sMD**!_l;sAj?y!
z)nIgVpLUJOQcCXmV(C~R(<^}Ra_OnEqjvm|qsRR6)$lt<^Ne8@T%=y!q|{EHSoY2<
z?GlW&)p7UV1I9|-EbAVf{^fJ&c(&ueeJ=V`c$`SH#{wJzo?6a(0S_{xmza-H1;zXF
z=KIRvC!b*xy9${9B>N?9(VQ3_ewm<rv~w`B+gmS1O1;Uc02DM-Wp}W{t~1v)53t5V
z*+HVj&*cvk5m<*ztA|Cdk6LrioqbpOV^C4~3!2t|FJTRMMR2)+xdtZHTW9aJUbg(p
zDRCp=B1tFo>JDO=qUD!c3ghDD0iK9Z=5uf7@@iI*-T*Zmy3XS$z9xtt<IS)AHny|}
zIk|j)9$HnbDX58pkN6QgeODYpxmlYb4Z?+!w<PQCq1t>t4_sTyb92;I)d7I%&#>8J
zBg@0xb+5Sg@%P*NSiLh&XIUN|($QzrO!Ctm^K+*45n}j0mXF<cIe1GE8{*Nk!ZukH
zW%k?WX0gGCwkka_X{5R$N4smp+j1Cb=Mcu@Uq|CSyc;|_NM%(O$)_%Pan?ho@Oaey
zC2E$a=^v2(ElVKP>ErwDsxcXQWYGx;{xi)UR1wC9xXA2AEr8P&kefwb9<g!l{F@+4
zLPb6;n+$J+#Ht=|CakEtQ~4!rYg(f=EPm0>Ja5~Ye{4EPDn4JLZ*IChr<#8=?s-nO
zb`rAXwC}<jV*STP9Xv0WxuV1jv*-ez<JpQowiT$Ip%`7K5OEb`mBkWQSwfC$G$UK#
z;INZ`abb`--Tn5YmeWCO3RYs`ewH~grrcbcDb9eaMP#|rg@A4K@@r4imH1J%?5(~4
zfC+iQyfKnG0-pUjUDEuEZ(?&4{I?!2vUeaZ&lfXs9yTq9gk?vPnoEPcCeU-SGIe`+
zBZ=L;yGPa?SAqj>#Qn?BiedS)?(DxM1x-M&Wo6~Vg~N|_fD3fml%S3@0E1h19?eKh
z!_9mL2Raqd{J=tGU#l%t205cjt~lj$yQ&CS=DAsL#*hD*akY4Cx2fUXdP-cf{n@Bf
zDcrm7qb8jPdSb^jwOCoBB-*scjqLEFqiAmc@i1mpJDi7erv`k4X2<^U6F}O^F-YNK
z4!EKlkQG!bH4GLI{>lfw<M|<7cz<9kd~B0vU=;x^^sf{jDp|UGBK?tDzAmuZnn`aW
zA9y$}L%MZoi}&XT?_KP3_-6Hj6Ta|9@Y^FVT(Tp2mfc6M)7-o<DT@vNH8OqgIQ=i|
zSvuWqeUoKrLXoil2uK;AA=PU3R}x0cXomz1tDbTe9TGYaJu;h^h+{6^QI(-vm>Z8A
zep7Q$Qz?}rUa)=y{l+R_K{j+b039$z>|GKKH#sn`DH!ZaOlwim2smM8{>%X>ZB;(O
zmj~PRgCqLT>nmPX#L~gjo@xfTZ1+ftg4)%Gz=6(Fk;r<+`P~?7FgA38!AF(`F*18j
zQaUtnmt}<i{r(A^uxW@HqeARgA5G0~Z=`K_*f%Z%n-F+7wJV=QMUhD*<`BFeejd}O
z0DK`CYqs>55-RoW^-jK$87AE0&b!U6a<Lz{COPZ4hHLx#b#qHobjNJ=O%xRswBSgM
z#9w7{B**|Fer!Xy&6xTjrA5|fq)hn<*CjM^+&v*8s2ZGxx;9Ce`naT(y+`^owFU2w
z)jvY@9)8H2sqd((U*H?2AGZOIc{KhDUD#o&7Tm-B!Q1|_o6e)7{OB1jjdNjb&e={^
z<D^;9oWMXKB}yq(ZY1{RAUbd@%XVWjw}EV%pkB7~VL$11e0EyZCq~^Ze&=?j+2S!v
zO$^fF%r%Ym^%B|XCa@%eHkSU^n^?F)WS<>vrp@<n>}M5shdSx>ru}A~9fX$TU7tor
zo%L^Y8hzUgIUQGS1T=4xUu*0RRj7#_-}SW{5vKc5jj=+f_)!nKrZt6Djv~|~=Y5ak
zG}=;OAAYhK61>rNoN3-rfVT?ugi-H+z^i&%fU#n**g}a5pMJQaGco-n6u02C@}!%<
zW~Ef|RHgoLbKjc5{93M92ZG+86n%vGypwtC)}iKgmylEdb=m!Q;G%-OO31$MT$vBO
z+Cqt0y<U?)(Tj!P?rw<#sO9Ej+Hb(vSMS+-3Og`@26WA0nK)UXoA9ny1%;wh08Y*0
zFBfpO?#z)0$34Jq6jWvZJs5Bvb~&!Hap6}AaDV2Zh-SO1)}@~3u$se|zZnjd1XRoH
zJ`nc(V&zlL&s#8Ump}WI?pP{XB0gfG3mDLfu`kD^bR{I5CfA~<94IzmBxP-6qf9V)
ztpcvDc6%eRmdhA&<(_$!(a4YKOd2aDFt3wkU;<^^RSY%FX}Zy_{N5eePA(G3pW2#$
z`5|vKGc|0Io@QvrE#%kv-|do<3V#Dfi*q<+k}NbaDYAqWqc=&Xr_pD&hH}kylD>#d
zq8C&0aiul$Hp-$BQeU#ty~DF3AfT+-0rIgTR*7`qzy>I5zT<FV|7h4L(e2&EUuwbk
zyD}YR5ltwMgft1aP`0$%9!8wfXTknQA&+}gmoD#(wsR1Jp?Mb%J3h*d?zE|47xw1L
zh->y?*?FYqwIJEA+8jgox<H6E(GU#-8Ky*(<G1rv80~c_C@T?FuF!k@wunkOSJ{I}
z4y=m9zpHPqYuUz*EPgKlWYF<sa+hzH1)0dx_)cm82t@DqfIv~`%m??;GseTV*TK|^
z%!9@_JGv&O(7%r>neTk76zEP#jG1OGGJA}V7d(X8WH*g-36-0>d8Egjx01tfG#g+>
zNRHMjE>sh|;8{?!@n-bm?F0P79@N%+4(ZRHTAS|&$ibL`?o*?mboXqyfg|~FK(q~O
zF|X;yv@+2NfJ<b{F4)NFn-KHqd@nKoQP);++}#e~Cu+!D5?knc*e9zIJoq|FeVMAO
z6X-SVOn=<!*6x(-w}Em=ZK$nhYFH;pO=@P_TyD)Mzk3E9>hSuf{CQXxp9lGR{43Gx
z?%0JO<)J!3yaV=$ku2i9;j2P8{^iC8G0Wz|hII1@D@@hA&G-lSa$BNO)P|dJD(Q$D
zxW^c_#Tj>f(Q|i$nHy-$DI0aSqBtqSTOVX?Bmb3@mXygjSi56wCHKiFJ4vsNt0v&?
zxC_69+?MRQm2~<uXPEBmYHFA~slTuVGL73gOke(0Foi=Fwdd%XaZ8Vo(y$MJWHWA@
za?|+joZ;NnbhxmH_7w?gQovD%e%IrjgVXl-PqiNmYq2e&nW)-Hid}N$$25w~2Rw=^
z31tlLH>E<-@5x82Y2Ir8#7&Eo$;sao5O00Ao3Gq@7NNAHmBu)jW~;UonESS61ftW9
z-YhtuPRAo(y`Q*~Wb~OSLTuGHjF7Ti6<y_>rF9A6Ky4Ht`9S-5o5^Be!=6*x=l0)w
zd>k~HsX0_3Rthk?4JBL95&a4K(OTjsW~G!~F2f4H#My1S@DWZ5p;ZV=Pi^Xb2Ci(+
zOb{4wxvoEXvH-iY=fX#W+YWN*cs*N~g6K1o*xXpF%ikrj!;;BfnrA#Xa=Gk*kdYNJ
zBUw-!n(eZ->&kUvQmyzPR%#;f31K&S$E=!)a9{ZxNBwK(OD3yedHRuCxWt3;Xi^eb
znyfKNQ53g#pUPo4+rP$dVZ;L$XbaE{_dbZNel*$L^nLL-Z_rx0VTKKiEWRzU_)@p4
zO&rOD3}c0hKee+IVOe-9(J<whkhXIV)1Bzc;xgaJiq`ip7}6Db6mPkxvluVuC|ko`
zi_?Z6rBF)U=}&lOC;G2<>*S(6zkY^T_%+=FLHf0jJ}3SAK^kP}yB*j*DTT{W6*JUJ
z&Jz9-RKlGl>2~enO0IKujF`&E4}I}d;N7LC((&iIK&Qfz4Fp+pBMQ3f0vZ>?h&tX(
zv%2*U{4x&>;0j}OU|;48Q^fmAo&_HCtiVA2E4`EslpM`np_ZS*V@L>MedH4$Zq;ul
z&`;AE&Edu(U(P1T;wP?AT?nI5B&uGWkO+RL2_{rHrDL^wXO^ax)qwlr5mANV2ao#Z
z+J|!$YQp7Mh`Hgb;67R0$3Fq+EuOfF3yU2=*n%)AF1p9lRAGHzpi|@WZfkNU@7+Ka
zf2jixF(mKmZv86((8)AAwv<l0{kYb%(49WN+{x6<9CvEg3E9t?cH4T+`e3i~s}+e#
zGaHpY7dSp=`liCrk{!jS6QY%&BLttyfmRehxW~D`7SAg(+%i$I5p3fe0xz@%S=;)~
zzUTJ&c>?E}LD7CcRP@!xCr9?PDM(cM?P5_niv8hJd=y*-T-z7#gj*h<YLyGC_w~mo
zHAkZA3$Haga+MQXn+5~hDKaTRi~7nKgOQFa(!@@DJvJm~)F^Ez7QJ}*pbr0hmFdUA
z^&KXIYpM>|NK#>u7F!mSm{GNSOTHniXgg+K9_+q7J+N0u^!{D8Vj%}mFn}J!(mjO%
zfqbH8MG$u#H9cZ^F6ql5oB-|EYr+mZUSSU3nWE7wz)W$!dO1>*4(ineM+8P7rzm%B
z>v%mJJRvFGg}D8h(hjwAYZUJmiZ-KJwG`g?6R1&rsY+m|2N<g;lhx(^?v^wP%>x98
z`-vo+%MOpPiG4fP&{O5%Yo)1<AzUX(Ox(OGuB~3<X@L<`m}>iw%w5Yo(!t&OZ7!CU
z3v{V097*@E?Kld!FH}G}U4}e89^$IJ696~4QUSL#q1JQOQx|+TNF|dd7HUeW0A~7w
zv>iIjMN~6?7rXYo2Exz1A0LsF+qTTS{mMj#*?7{5BoHT|AURpGnBrW52@;1{a9ME3
zb#K7$qqiNsa$}km-{`%Oe9L-=X9D^rmva#>coff(EQYfqH5Q!Sby`!E3Vz_*Dt#~(
zt^8}QKw|Fx)*An!cNX+oj)DU0@7;++<JZ0{sYw4Io1#3jo_lAXLian8NvftNYPFH5
zlEuPRArc4g0tfs6TL04WKMTKvmK<UAnFuXJP3r-equD=mSF0_UTSbIeHp_MLCqs_u
zPopJEEr*Er!R{rbk&x&0D(uNo?UJgy^4acmgQf9wFKzx-Q6e1?gATI`g&H49$nu~2
zZUjvb$f4CSByXw1@1oT4ab1qEAJhZK+?p*HwulbV4ABnfR1>h9buMSOZ(L!M%$=yY
zHxvt|I$INdvOhNW*eP|GL`~&acunJ>&9Fi@KiGeg6@q*bjC6PZ$no=}w?<yw-pQPo
z^aUBxt=5$5C)FPxs4>V|lU$6m(YsQe3j3XUm`lq^&!~@BS+)F!m4pm!OFTNHg47l6
z(Qq>dT7+(=N~zT3VODWEp<;U(1uo%|MI+FIys>;6>b3db7x*8Pn{G{Wa`6RKKtg~%
zi%hgTApzZ7@sFh-(g(%s^F=P<ZmYxQ)6_N47AwDTFaL|!0wp|yuvI?W_F19CTbS7H
z{JL#qOzju<LE_0p>kx-~WiH|RYr~Fc>Pq0C6-yd8a61Ug_JFMSkD^HlcAE28P|Y>o
ztiIBCV(o)4c5app=D3OtpSfHhgM9L%@|W{=)}(7;Y~GEmlT`^fn`@}@teHK#hu*2b
zAGd!=5iJw|7jq;-&3Ti>lS|FUD&BjW5=NK5F56>cfX<ooeISt4SeQ&9g#@)G^$CHd
zQtdK8T!oht)S)zB>4!Ae|5+E?vMZp@yip50dor8~+jGqC4EZDsi4Wt!@=6q59A5oc
zDdVG2rE{Z~N*JxG?7cy=PTt5qX-Wkw>e-rd5u4JlY4!cX6{s>-`Y+Y4mgpu9H2=U)
zpKrhVD%9+P<iqnHHikXjd$FvGJ(rsTbjHM%proSFTyT}SzBFAb<j}=sp!T5U`g^Sd
zE_dEDP=9#C`vRul!slNQz_yjnx0yIzjiEhfT)%za)i^I`^PhL)|ALhBA0_Yq%fJ8o
z^&bQEzpn8cp#HB({%ex|-<kV=Glp8`1h*+?^)4Cr8NOV&cZT}W)iTg5xn+y^e=Sr#
ABme*a

literal 0
HcmV?d00001

diff --git a/docs/img/structured-streaming-watermark.png b/docs/img/structured-streaming-watermark.png
deleted file mode 100644
index f21fbda1710133f46ed37a3548bb0fc227681744..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 252000
zcmeFZg<n)__dW~=N{G@Z-67o#BS?2QNXO9Kp#sv~DJjz3CDIMj-6Gx1yc^>jpYwa4
z^Zf^&kDtNO*?aaKYhCMFYh5>iax$XGh&YH)P*BL?Vy_gSpdOk)LBSfqKLq|q${sxv
z_zT)zK~xZ`co1(3_y>ZGn7Ta_6gJxZ3)&=o6u1QhCI0HAk_+_C{G$|%_1lZX{T<A(
z7vnhY*u&38rgGksmnAE5(th!Vi8Hl0%+r6_mhcQcO9X>ao(YdAX$lkhJzHl+Aci{!
z8mUUQ&h({sOZxy*4Tp2h?wPT(z_A;jjAxo_+Ia<O&2|4VF9|I?kq;Ei|9<>$4gO~b
z|8s-?LBao!@c%@@wF;0pze!pnc2leW3zSJY)e|(R{|X*!p-`bLu8PWrtgl{Y63ptf
z2NQ2{zWdi_L<svzvlWfw#F>BW)~_e?``3$6NDdzHZLq&f_e<GC`Bw)9#lf;qI!u@z
z;F|DH>>knPNHhQYW+TAVY&9=3u9kFBXtmS-`*&l(R*$lpvtEQU-(7j~spR^?LBpbe
zp#IwrqF@;Bua!3s6Th6AsW(0qC;m6b6J{t)Mf_?mW)X9EzBpn}E?ffpuS*g^?rWA5
zwumw|dKE)EA8hz9<ACzb#F~<q)Xn>lBk~+_f$Jt+_%Vly_uu9k0q6TX$$t@QY{JbD
zYflvI-_{T`f_O3+H3>E?kna>|c~#;Vq!Rpw^xx+C9I=1m!mC}fe4Eh7MK<$auN9^)
zO^)L(CY}a{UhfUYa~r}~7xfqXmwP}IFy~;ymNxS7(~-ovgxY@{dl|yc2RNCkmlQ6y
zZ}cBzHU8_x6fohYYp)F?xn~9X$sT+W{;xX^d=Mo3+M3RkJCrHVR)-dgu{ZGFcGHmL
z;GKw!g{inRBtnW=Q0w2UDq|S#Y!W7S76lz^e_HTSmdF1w<YQJlHBw26E`P%);uVsA
z+ZMkUuS2O4SwB}|oaYO|Woi8%U(61xE{RN=NKmsps`hWz2Mvn@`((s17n!lB!yCrz
zzt4?ZBcn2lrS%pS9{%grnL3U>aT@|&1(<ydMQd+36=GIe+XhA-`(Cu!^0oENzbz+G
zM0P7o6Mb1gKY@7XI`h9gQK{7pvy>>p8b&W&6I`hG^1tpV(GiTEfSoA+m|^KRIE$(B
zFXU-1KA!*mn=`3Et5O(XKi(>Gl(V`V+gv)f!T)NruLOy9MEArDL!^j?gFsW8ah-`O
zt(^ZA5>fDdO9|_;qIvAzfNFnUZbK7B5)lTP8us1F7Q*}g9~L|+6*5;RF>EG1Fto4W
z_c!ZRt&Oh73(j~ZeJ;Gs<E47-{QcXXBV7Ny-Bh_Xm1b6gmR&&~Q#A0a6@!(wbLdi#
zV!dF>QBAhjsSsNDkd1#&LlMQ79x(*o)31ufMyA~n#El-<`ANhq_;jrmBO`3Bw{Kxs
z2VvQ!2C<=WOpbDLyG@!AUA!A7$HzxV>2$#n9)GOOu)oiBj71(Y^J<4}c<kVHXZCS1
zrotV5q#8$Pe#>^3>RC(@WQV)}d+$YoZtWM(nahGNlOfLj-8`0L1Ha8nOwecIm@J}}
z%nrsH99AvaJ$_a{MED+oYUhUwywdgIsL9k7T`N6+b=gBKk)f8agmc-*R2A6c<&w}h
z8XxsEO+4Yu)Jl;_5--UT7BfE0BXut7J_-OEb~NJTm*B5Ij(sFVzxF#CWfP%HJRtV_
zrWFJVfPd#{rFlhDF2vj|joNJwjk9;$2-Cy=2j0>l)D9_y#Ek~2mygsIm$%u@Fe(<u
zS0BaBjN3a&KW(aBB%ddvc<N$S>F^-z*V2F#U~Zt|G0?GvrIz>xh9(ePXGG2yWjQ;6
zpp{LWVL#~c$$rp6NmsY!u<a4yO$%!j5#k?@gnuRXT08u?D~LUXNMVA5H$cZA<TTFA
zwA2=-+R{>T9-AaxT8-uuESvFYfIEunE}j4^Ux;~x0SBEHO7#`S^#Ja(8_~t48Yzmh
zZ|M-7y}_%h>TXqjM3=>Sb8~Z-;-j*&Zk9iuM+~#4H9djbTe!$+nXpw+eAt2W8ttto
z?g5Ue1OD7u3C%9u=y|4$f&%+|at+0r4xZpo-}9}8!_jd!y%sD65edQ&n(461b`N3I
zLu@}YIidcDweK6DtgVEbgSuZsBOc6QzRpp|7DGDd7tV5<%cHrY)jltYA?TXnG)`pb
zgJ_V_q$ls<Rc>VWqm#n@H0&T*qQXalhtzT_9_aAbb4UK(&Eq7Oq2*A>=kmq)NB(~!
z3n&Oir{p=MSD^z>qn6g~Rf<IErRk2FwN6rNbxw|%n^|50A-(9YaXaEG*@B1`VdKpG
zImEk9a_b?B?+gHI1L44MZ}v|DuZNC~j(h=63GbwF#8Li(I*^bp@#EuCrk*!Qv41Qy
zeO4f4zu6LPWZ&nq8t=Esd06S4U(}M=B%;Fa9m+z+&&10t$|K=J0LCXD^30-dtw24+
zc~N}hcM77Shn7eVzeh8-!8HTvsLhYS292r|X>{0Qj^mK%{sHOPBuh$o<f&Jxg~_^|
zZiB2RK8J3@7qivXanH1Gsw~|XpWsaBw1?=dme37E1(EJDWBtMfG3izaC3}^cRw$?o
z(IdBJKR&__+0k45|Joo&kJDP&cu#UqhHYwcTvOvh=uWV<S$9Iq`~i9-TR&(0F2^ug
ziVmTw>{qbp^$?}kE}qhC!K$SAlb_M0o56SVrkB1>xD_3Kn``XA?WrI<Q3M<`WLp(0
ziUi?L)cR%vT~pk^nOfB*>^|Uv%zzB;Q#(kAmj@@5$()>3fv`&=JHsw@8y<kAEBN=$
z@Yoy!z2&kxOFj|s*fe6-EDI<{V~0BHm(jfKs+EDozW#XVx@<qpl>Pv}*K?_m#Yq$I
z27)#Vzoy~SnWz<P-(msJdgC`mMt$8E3G8b*5#Wu5BU~oE@pPIKTkp_iKK2Mkk@-Tw
z6v01!&P=wmSf!$Gg9r`FjJZ_TrxbAkpH0!q5}%}ct!=qWiE|@@<!#CF*vkF6sRdXa
zJFkc7daPrx<Ql#6u(}E9;^{9Z`zPX<rEEofTFUjK9WK!oNGlL5e%<@GDa*Zr$yt=}
z@QbVY;G0`jH3{y-u|3%asYUp&R)=}hfVFU6g#NZj1<Z_1sZaV^gm3^>XUZY1eRvJ$
z{{(*~|22|m8_N3(sMQ0}QiX1XHYC9NJTMho-y;Ctmm#QcWGy~F+LWE$z+P+GE@>l^
zSh}N*E5j@~PkFRBQxshn1^z`7h+f{uyLXaP!bEoWwz_Z-uEiGlffwub`yAe4-tw?N
zT#yCYdEjmNSrwa(?6YmQ)$=gGN~b2xz@cST5A*b9C)z9_jU2E^I(geyY}OM^<@zng
zHLJhxucT{zYGa9OD|`a0VjfpdW>`?LS0eEfGU=DTC5JB5u;AnJ780|bVgv&F^S688
zzgQZKwah8>_;6Au)grk~^r9_|<*YBFs;lw>Nqz+m8M}d-ZykHGFlr`AJ5}$<DgY}Y
znFBqeOM?aN3DFY5fyKeg_fb#+jl%it-IC?^v%F?X1eTZ?qjodRedp57aXGPfIO^Nq
zkA#0E+zRHkT`wuOlL_UV*v^r`HE^(O+ol^UJ#+7ZopdVNot;5C-JYOH|2QI&+A_+B
z9;-n?_~Q#MDTY_vQ7=_cjx?2Ez*Uv&2;|jV%qLa&znu0+5{P$41<l=-dho!miSQ`|
z-JmWd28(Fd$@Vx|749hKZEk627pAk9k|mauhOwMN&@b(dBrFRri&<VRh%=MZ3;n`5
zACTJ9cdcgZsPD4@?!i7SKtrld#Ow$90&7|uuI3kk|A!+oV?H}PThZb0I!dm;nDj1;
zt}kKby2eR;b+C~zvTb4c;ZD*rp-z}*?5pw!W3*Wj3gLY&m5~5<0bQH`7Vz$~^86%;
zR$lUUBj~Ki_$8FD6s&_<#o+DLsb)@lQV|Zf!^eU3FaN;>UFjKDs#OCz^vTb8!p3%e
z2%h#%h0>#Lv8m*xPk(gnDTL1LNnyPji}WtEO6*qk)}moLci3#5;Yk+V5L1HvR)!Uu
zL_{c$zBK(_BNW5gYv++A7VMEUQQ>9?azr2SsON|lrEL%Ij&l~f|AY1tEy8`W(sW4q
zqCY;fpx^G>SJ4+96?;0rwrRIK6GgvhpWXM8+oJZ4*ASVx+Jy$3p<Y9?;1F{aw$_^m
zHXIiKHxaPBU2pTRDE#du($J9f8P+UxWZx&K@Yf?#UUE-(7Kop(<^Hr8GMd(8%^nF`
ze3)r;07ufusfi;(poX-VC3c+$x_g2<JM_x}1D&TTcjnJa<~QD9OJ?7hQZLd`tj{Wo
z1r7?MgTH|Yb#qn~J#s4=nkz)ibwY!;Z<N2A7drriF3_wj{Fg)IBMsYAII`$F6#76^
zycw}4!2bS;8Su04h7_-n&v_LSpQGXaodf%d!S_GoIY^R^#y*ctu(vYs*E%16s$%7?
zw&e@vciil)UTJ1lFki$WKCIFNgQfhOQx%&<Qe^feJvyZFgNHKYbLGR4e;QFa31}#>
zkTFW^30&dJGWKOJ_D9k`;|vk9ZvdfJt?Fd4cFR{TlV;#`l^qEy|7RjX<OU-%9z*WD
z)onz%y?8Y=UJnjS;By-080n+n>Wzcqn<yt2=k}v$t&lfmOIln<*!GK9BqurdqenSQ
zG-^&1EOG3OXYmaRLWDfe+LRjnT8H`vLQJE6ENsj({IVu2@K}66yZ)qNOe-+FK)dV)
z1;<$M-LLWoC=Bi!SG0kRReYI`V~I{`L@&<#eu_<q_Rk3(3Eul<W>UAIF9P6~YJLhS
z-2j(a&kcIgvNa;pL{ptY9EOJ*-nYfld6u4%%wv!J%e}-zTfK$2Q)|Dx%pisR%wN&C
zyok!y#`lvV{Z*47A-4HilD@rApdZ{-sMpDiVQ;7~zD6D!*^QU@>mpw{Xt%+4<u;6;
zVYJz-Y0!>p6wVvS@eHt`VMkG1ZVtORw!Y?%Dc}dV<qvXIsQ;6?*ikWl&&77HO>G!(
z{PG;5v53YhRCAwzY4yZc_F;HMYD(*LR<@{F=+_0thUstkJ><h}sv`zPZ)tpHMOA!E
zNXwNKW1b^z`^m+iU~pUC&!nF#nqyp4B#T~7MP)<4;6yCg%_vgf*R<~kt@Qee%iX)c
zdVya2i;Y&EQpyy2VAr&)Oons5)VBrUFv&gh;-74i$kBK7YZl4_-zS&{-keX2-*hHs
zJOu2Ph|yxIRF;*y=>-cT*$bF_#DfhLAmW9CHeh(26LF(pVDjM)`g1a4C#v=+(`m1J
zjFZo{$;0ep;@@mfbmDV4b|yfx++Z+p3158+2L((9HahIGy7mbWsPU}IMBYm1+m){N
z8W)oysgC!h7q@6yR0dVY#jKd?I8iySQQfYT1vg`!X?PU%*Bj@0%uPshrZ=`m5DQ)C
z#PedSVnsp?iJ&Sw5n!<G1nlv4v#ZnU1C*OKltuW)H{;KMLF5A}L+U3K$li<2>63BL
zcGb{wR_5KqV_VJJ#x*YHRo;l<IWnWAaE|9HH_4pzqm-Pa5$$Ut4@<!+iI4xj!mK_v
zlL3~(`c{XGUh;G=59hpGaZPka7eZjkb&<(WAN?hkL6T<e&{nzJh*7)xr)^O=0QQ36
zhK#LqX_`Xa$er^F6)l0qWN>1AeSLo`h2{P=D?$Y}g!6raxinDprbq)3OAk7&0bccq
zI*k}~THTJyc6Yv{Kez3cB40IR=XLK4s6u2jo$FI`<MtC4S;A5x7pv(PT+Oo@w%q4Y
zi*VuBv#ADl9=!D7QVA5^N8wv;ZJ2oy)|MG{5;a5cc-G~K5_5f@4BdAV?Ue^4o_p+C
zs82);zd3%f?v*;b-baB@5giFQJM#*u9W>H`1%}P~hTe#2N>954%@OSwr{WqRRM28G
zOioVD9ZP2l%dVvK_dlX?Hm#iFZ02fZo_N)2ZP_R4;CR*W=ZusI?*|1cw9b=R`=>6&
zD~$Y#s>*UECIb83H5h8FFwIUbuvCy!M7IxKb}0nn(!}b1(Y|!d(Cd_>nD<ZYm7reC
z*K!);b-LuHht#{)t6k#8BZ){XE)GTv;XY-x(9$ArYW7f6k#eoe4>*O<xUTy;f0Ixs
z9n$_Qi=4(f@P60lRG%FHaz}XQY5aVmu&GkK<{PZTV50NxfXFUeh4u($8Vu?1OPI5b
zILMjGBO~8g5;+IJzJ*oSF>2oe*>1Taw6Y@htpIi4{3_?D_+}jS*r_L_jHl!NPBuZB
zM5D-ER_IIV<^WwLjZR&5jfVkUYTm3&#cbK8Y2i0lEQwWjpH6CuIfZ5Ba)N2?de>+v
zDPo%>)`j_;PM6w7FG%brWtjIzqm4iZ^fQuq_Ib>grdiMf?hniuKD3;C%M0a$E1f#$
zb}PaJ2`juS1a`(SH3pUi#q{W@#ltNWOkb`0nYp$Ls9w!I`a8bdza!d&A<d=bo`{^<
z<(8G~xk~2ImYj<cm2ET{8Qcyxa;8xszBUVDxXKTk7V5=QrKhr8cY&x$ZhxueQRy~z
z+ZFXHiK%!RFEjCaOyOoC3{2WzHe=|})?15bbDGM0^}S)uq@*Nj-+Xl_?vWLB%hBrh
z1MBc_Q&0g{*VTIq>JP!;89e$jW2;%Bl749m`4$?-X`*cx7C$rf2s+=+8iev#qAf$=
z*XYvg7|7P<wJo6-MP(~$$jUg?lcfsnXvi$A{o@heqpaLSzlH9ES5L%Ao*L#NQ(x7X
zR`{i{42W_1fZSkY`7hT7xhto^K=ufpanJR3%SS<ihE+ga3hh*?ti|%(0oa^TSELe(
z7=Wep>iKac*A82o;g;3vEte#NOYaKe3XAfutv{C)?u20nZpH7~&%0gvrDq^Na2h!)
zU;S>qh6GWY;-)S3N?a7gyA0>pmrB$sIdnTgn_lWwG%_lpF}bx%qJ1n$bC@s*!ie5p
zt+MMs9b0FC-0Vy%uxyBN8ALA})h`$veKB|C+3Rk<BJIL-8?e6Ju9V-D<DhpoB^|>6
z6CL3OSAUFtP`S08ct`?wi2HVtC3&-?Y-1YK3J|TOw<zSkr8@BPy}C^69p6_ZkS8*Z
zK0I_E%j%iMuTQUan0)ZmtXBQ)t)Ahb_1D)1J^Y`R*^%>U6SGaM2k^2!IMWap>XFCZ
zOjqfpPrtcnr&fk97R}&;m5^|78+4S+Mb6)i<QskUlfT}ZPX-m-MmS-)h)M~khOV<m
z4r*(xsJSYwz}nrh6Qur74U6NuY;5|s+lAx<6Ib?x9Y+?U{KX7ZUV^zNq1)XXZUdcV
z%ynheIx;Qa>7Qrv=`^eEcdr|T@!FkHZJ|nK<yhi}Sv(q;G(pep%lrb)<s~aP3GTN@
zoi+ZL>TVF*`0@JIlB%M$YB8Bn`fiu|$fJbc?)B<n!sE$H8v^j0$%7xp?(2jF<Y7Dy
z@*;0oaJoK=TYi~W8X6iYRce2N)7W$CVA&^y><o6%d7GnL^r47+?BwbjGQp^gye3EQ
zfd?%ae<%wv6`mjwKJ+e~0pEW4NJP@$Zs1#x{C%vGSK;y7dktXnlVT_<T>up9r}K}t
zHg}pDk8)L?&=h<-Npae{*c{DC51$Z8uHVUCV2$mQjha8{J9n*OW3JswtUlKWD$r#%
zuaeI>Iaap&oISnm-Jal**AD=n^$G2v2+F09rt>gykJE%Tt#y@4YF1VR=SnC{Mw8k1
z5>Dj}=aK^)y$vm1jp|VX7D96^)$6W)TZwhXd9P8)zR0)RT9=N&TN~2|ZvAWOev$Jj
zKti<^l7vf6jB+OAj5bo6jSpMsyZi~{(h%xSJ9ezdI%T8Z=Kow^rmx;M*gyF8fn_0!
zbV*6>;#6vFsJoFJ_YRBXsuV@$Ka2M7Lt5oqyo%k&V>+IOHd-$82PP(Y4xY!=?R~%f
z)b*IRt_jDOo==>8L?l>!roredgGo08?Ra>`3v9c+3e>#xFUh7DSM!YcR2-fp;NRY=
zOwJT$BD2m+pf|g07H;L}jSw(Dos~1L)+C4*I@LMLEbwU1*Z`vs_*Q#viA{@b7N~pf
zR!#(fE5Pc%${cZ^si*VP63GrTsu?QLE$VNJG&Ryj(Rs|oas!ldx71gqB)luDVz$`B
z0z8dzA>YhN>GM!Q8OYG7_QuZc6}AhFt`n{InF<$5DMyWqJI$lvIT&9bs3C2rx6-h7
zL_n#5_o%xE>B?z)-ac)WQ<Sr8ij;EU>buXEDVEMBS6bm_Jfdgu!C_H*M&x$hy!7$6
zM?=2EU!!W3HEqEX>wK2-2&t0_xFS(^d>@)$>5O$^T^Er$j1VF_O*~ARyosK<HVI>2
ze_Z8BNfA)!w2m;_tXEw);h)Svk3+slO&z_AcvXwf5Yy2-dD%39m?IVQz}EsEK!2e~
z)^ea)F63N2N|73dU21F6$2z6O+}82kv-DxAI|C4)PiRqbCqmL#sBP_-U`Ehi4<>#a
zN8tKZIt1K?>WMFE4eiXF?Ta*kjT({z1JcPM&yiIEqT03=d<d69z0ygX=t|felWo#m
zaxY(W;B_+3x}qhNt7-rr4-wg}=gC})P#6c#$)JR*W&p)D|DNGt${TF82a$V_kL(oQ
z$KjAq8qK4-C0(f`$y@cCr_Lpi-Oo6N+_BwyGp6>X?BAy5ntthR)}(;?P>Q^Bcj(U3
z*mJfiLXx(1!+BSUcD}Z^;ubYAI)R;4{scHCQJ^(6*i2fWYSd_9&HlZ98atJ0Gr5sC
zVhE<OE_SF&bm3UpXOmtR9OkiE0AzS}Y}hox^V!JE((dm#%f;(rc$r;p6njG6N6v)`
z7IR<QY*mU|yS9e*t)lP6%nH(*@n+lCh9I$*W}{6O#e-AP>e~EKkI^km4WC~{l8)UP
z#HW*N>N*8+gfw`6w6#3${u+GKnEmR`p=^+NHNE!0aBHS6UTSQC(fejR^~l$|QSVkP
ze--)qPXC}*X6m^1Y=%qfnix63MpaOjSoHgeOGEvm<EULrw?ZS$m=i={ACoLO^K!=C
z$QnpD#zXIhczy3Cp=beKoV{9Zy;KVk4T+K(ir1@6URioE{(P(!AD6X}*;z-0a8CSU
z3TgF_gQg{TXr7ay{*eRx@D&BjkJ=SZQm&gaJmAJt`;ywd7m*imiTw=r9n{RixJ1p?
zmm>^A$y+t&qyQPFFTbsK<62D;=tF8^j{!TLx2j3iYcXla45v}2?liWTmP940%0?G-
z?ktB6+6J<hT4ZjfRUP+qR)HyR)YF<)$m+5SweE7yUFXBLn%GNh*J^q8W7w*#gE!8l
zQr~uZnYc{xL>#0CI`^O_4Z^LHAC<CWd1j~<4@)-Z9rxV4#ErLrzo3w8)FGe`?D_06
zU%xzX6Fs>=ZT+#sY9`~jkpO%&`(`lm_#92NxUNt_Cqa$Q`r20QH>-fY4hSPpf|*D?
z6GAr0PUOSwI@QGq8(s>{ONt1)Ejw@aPkOgXw_6>T)--M4tu+0t?Ceo6TQ}|GYlBsS
zPs_^4d1fv`Lb<`WAFeb6Nu2hz(Zb4tp<vA5`%`(G7u9Ib8n^2iO!pISN5$Sy{Xu|<
zB630NYvh_SJbTA8+=uv<G#B*?j^3R1{jJ!eQpMVtw~N%}!UN{&#`e{Qc5j1aqjn`E
zzl{51xtCS+*xAOOd{?4B*Ky2y47MXN+*}zcS5BnSUQ{EuRx3%0kR0WA=%Fre4|2Oa
z+1OgJ#xu(sLq#p>N}8W^LvZ+VTSo~aj|tRb%m*+`n!j-=)}Hc<2OC=_ykqNE%U`U7
zWS3UPEr=!!vf)@9HEyDf&Z(T6nAXa+pR*Q@NA#dg*i)p5zX3qs_Y{UZ<jr|NL$OQo
zL`CTzy*1DTTv$qDMxT(uqas8KUUv&fxQL7n=UilzJ01tW`NotsHr$Y?-u2|z7q181
zE|=r!>#1>Q)C{p%l}2q|2B!81&p#IqaQ5if04X5NG`Q}`kMIo*izvDzPuE*yhPBy+
zGqmt}`o+LS;0Uwy(2<_Ejm`??%|^0yJ=W+H;YZuXTI55zP`~Hdn?}ADW|NP+UbJoo
z49T`b!d(bl2uAN9zK}C4h`=(0@&-ay)bdpE4pPEr&ue}Er_Oa@2YTHVeAhUT0;%UQ
z?_CKmprD(h=-&+GSP<L1w(Ermgy`S$@@>jE&mhO{EwycLxFZ}dU#?%C^6%V0{2(p4
zkZb*yHxOoXi1%0ThPyR^<icp6#O@cNB~={ssZ}SbOp`SwfB6A&BK@@3{)2%pw8)Q)
zlb(kXo+Yj$Lugy0VE-tK0EPp)M&gCoY7w|;wsg)68yS^Rs(xjyZf9L+{2=7M>cwPv
zGVP#xdH0l}_vTVTRcbxA@Q1Z^P%a_sDtdGb2BT>XB|c@#X5Q?_a=R5^RRY(7gmNX>
z$0}}awBF_<_LMh>c##uBCu?^3OBn!F!N+hw+VcV6$XY-MOdI4H+qf8fz7Y0js*Dla
z+D#qojc=b4{Kn6zL%Y^oc=zRbdd>A@+o}a=M4%7ANV!ay<;kpw5rGs@zi7ic@v|H8
znzdOF&*psP#Z$9V{~%pMUy|7^&n(w+Ru*lbQgPLbiUb31DCEOsQvN{<=MSG@#eT-+
z`cgM_+17C7xoC<uk29U+=<8?~<m+D-$+^Isqy$=}=8oFu3I-pR`Y+%-1gjMjN0ZNP
zsq0vMeSO^VxGME=93R`BB*Gyi|Jwa$Nd~|@-$oQ5t#15C!L0roNax^GK44HD0Tgb#
zz;tcRkB3X9r(#BIY*OHR{o$5mycfUQ-39UE)%C1Wksk~=10EKb7{<#oq)+dSG?Lue
zrfl>A%(^S}C}y|J;<6iwt&uD!+sSuWW<7is9$O1K{1bJ{=@<=mimkt_9=5a1TQ^nC
zUkr>JJWP{nm#WSYfBMV#QN(mx-3I4oL{8B7tdUs=!Bgd-V0781!w)8eC~C8#7wVE)
z=W*&`(IuvuCC=l`rl^)(xEtKTWZ)14|CYRH6kC9%*WLH0y<P($@hWn|gzG=*ygi6h
zjK;Aww9}kBSCNYGkBgGZFUm{p@q~zQVa}Qc_p9PUPsASh3ZgDeQ^h3FhS<emi7*{C
zV@%+ZmDzUS@VJ>yam#VLS8N>ndj{Lb`+vaMDN5Ero&}daj4NHI<NG!=V3%n%^!2sS
z2mJiDIES$Y*;N&D7x{K_oDI{127T*~vy<kW>g@<PcX`BP9Y=|^7wZzdgFt?sO}~Wf
zoNqsLpp|1K`uLC4<Nd)+v>>@&nT1m3fS2-qyz19(K^*PBFoU?(TmK~|3Q}slMlBZW
zBgoM||Ju)bDOBXwKcC;_J!i8QP+#2{YZ#AwBIw;+q3C!mgX}%VsP&c($b~{Z;?18G
ze2DiCXH_e~1KX7A&^dWT`diRmgGq)c$WBuB2qXR|Jhpmg*#eFF8|^r3HW3P>mp+@t
zDCUvOYtf)GKKs~co%yoD!ZmTxlz}EUyLs%m2C%;XtJCl&k53#&LB9&ayEH&~_Uu%e
z{j}utvyyj28F7F@=<23Pl2v1tUYR#&5*QVqQSpRLsDCQ!6k&$b&LYlD8h?FaERf==
z>-{sJ`&uA{)LM>Y0HG+sOMQPfp?Y;^R-1dVp=%9Jw1w9xBGj$zFB{0dUFCOxcZg|y
z-E^pkz>!{ZE%ZB{ebZv;+B!fwuPeTe{~#1VkS>Du>JMc{hAG^qT>828NVMz0nI`Ht
z*N)On6^lD#*DVt#obK?I;h?fpH87#D0^KX1_bpgBenY3I#_;Zj4w;ML;e8>_-T*^r
zK(SK`!G-TA!Ja~O?vL_BMg;69@A~b&Q^&(BE!7G3xV~731g{qnGD|3l#V`Gw4`MbC
zEtbpu;M?3*DbcO&IOfkc2gNx%e(BFfumA@)oqSqHBL(DWp9@tbFtvU4JvM;2<Pw5T
zQ>XgnR?d92y<lkWwCURf3fJ$}Oc)A)+QX=qOKI?AG(Zj8F*>y0@efq`@q9zc4D;U1
z=x`wBN$R_?bndyeeC?9`SY&&arAHwj@LSWu2vqqFxVkxfh1lA|DGlAi_6XyCSdW+x
zXfq{$lz1R_JB4M;aWFCZAAAAB?=Pr5vEJme2CD8Jt~(hsnN*y9{Ybw0J7sa$m_igh
zS^zC<FYtK`dyhx3G8KWKwV&l*^g44Aq01K;$Sij$EGJ@plM-Z$f6rp~g`B#ptT<5E
zuhDD|nHPh+<986v(@zQ^Bs}x1(mobk`E_kX3jnb_i}f@UDG)iNI~&1e!jW6jpGE>)
z0@8s8GGQGh^h8=lV3lDC<bu#$eFfb&0OLs8d#@M^Ut-NMuW>teXF@?>@}n_zkDu`9
zOGROfm|?=|L_kV8lt?xC%=Op$|Bm|-L|)<SU%qnCL~YM7>AbOg*!~+7ft$nqf84xe
zIxu&O@Z1A*9xnd3gd-)lo%u$-Yv3+xkn1@59|{gQ21dK16~)v9`+&QTE4Rg&p=_&F
zll{tYx;BUj958+p26Wa-J*I1LF6<<_!osm_KuJXy4y1Q1^K+z)1tQFN%Fr7rf#w4d
z41eL0FBJmNuUVp>j76%F?{r+XV@GC@|Ac?^m!}a86TozVGq29Aqiw9+=iamatOIxZ
z`83t7w=~5DU)w%~ML}7vTUGP{S-~A$CKjZed=FDrjJwh441ek_FDKJzd1zHPzGn4n
zXXL10K6q<kNGwx{zC;9Sd_`scznjm$`!~LY2ujl0wW8`~TlQh30wpt*M{%X^Oj7xm
z#!FZ6{#x)EG#~}6e#c43=8MiMRDs6q7`~UD@VA|EDF(1N2ydp;0k6w{^e%iGu|Fm?
zy1XBxHaJdpdWx};v?Z|Xt^PV)8kU4+>AatS*$}?vR=7H@TTK*5PaWQIpIe9%6=DlY
zPTJx5QUNL-<3xK3L^dG;L_vT7AZFrF|4oEI4`E_!?%Y`{6I_*|;o1Auf>uSO)g9a(
zilbxxwgyv-jMK)*H=(?!A4ff8WV^vtd*MHu)Ax?*N({93T;-(=%UkayN`->dGHY;0
zhlyipESqKz2KL|77T4~JTY9*hCN?x<^A58ACSiaM41g<#QQ{{sb!hO{{i6Ux{j-8f
z1>^g@?19y=zRoJPGOjV!#02N{2;*w-FOG0;=v3^!ij}Qag0%%xC{zqEyW~K6nqqCu
zKA6gLesxy=XzLFxVFUvVu-$VSY>tQaU2_K|X_rwZM}a;FKzX-i5{Cqys8q+t#GnaI
z6Q4nD=1`sJ5qvfo5ISONYoGakg}vc2(JxD2dsF@kAMc}!+%QaR?0HNKetDl|@iu)j
zlkrkcpkI*=_jO?}L4ZR&<i=B#GR@#Gx9oleYytBKsg)>vN-oE(!RJs&_BDiI`B}qH
zn_KhqfrzZ$_gETe8O%eZu-*Xm5aO>SM%|RYANlPA7Ees`bTBogCb46}IEHFzu=}5l
zwe;|^T&_v!{OYL@wcG>MnkquKW!cZlbjuV!Tnx}B`)#zqyKYd%Q5^ZaoAbgEYKvZH
zlpS83*V<Y~o^b@N{O{hJZ#JBZ5R2Ih`jGdnvTTHkXh+P-=UrsKT5iKNI9+Cyfrl<P
zNqX)6+HwCGyZ{|y7Et-tPMInSd_KNB@cnyEF{F@b(Uay7?;0Km-b(I;>(RunbU@??
z{aiTdL8lBDg9jbbCH)g1c3CuX2FXyPntDT9#vJ18o+1mT6`XBuJo)9AfZ5@bF?#m5
z&N{&RQ-reA?+0zCh_*{TQbw%#sdmlih6S=PLrfj04nCniM(PiEXOB)35xS;+eoVvw
z>tavC@F`FVD*;$wUuCL}z($Z^dfnlwfpW64%Jx%I%AoBD^Y;F4TL#yvq69IIf0+^J
zCH%WnL8|v}<ln!+1gsX2Wwohrz->c8<LvT4rE30dZIvaUb?K55uv5U{>Z*08YI@<V
z(cSmX)vjU2F{$?t{+<dvD`HBK=FDuPcy0b6EiNDyiu7q7=WJWBNa1m&KOQh76F2;8
z?|eVPD@p6{8h(u-Ukquq><Q0`qCD~ixg!BKjI`@XI4TNgyIk0hiaHT_TJ)Mw5F5V4
z&@aLPSpX1Z&Nav(k*EL^bt}O=|C?%bzi9>PY~iR~u2Ohg%Q4+b)&fXde`iY(`B3eX
zhA+f!w2)HrIBv8*OIr`$9;5%r%7Iw`0qVk<^~rPQcP)KKnVsE+#P`w-K{^G0543L|
zO$Ob1)!v5OJ<wcnuPZJLo$Pv=?AqCSdB-&Idj&wE_e=Wiy@!n~Y}Ealj!JW=^A!ta
zSVDW@S<Swu`P*)fK&{laPoDrd6^Ad%-64@0=o;Z!{{9!wgQJ(zSX$3=@uNgJYiDXt
z!qnkY%zT0dSZc|brtZh~<XiozQoTuMIrdsQ7-4cycw*)!D@0sWAgsPuf}N(q@eOm^
zSO_V3-lSNVS;O9gH}QVAo}o}f-Tl=nKQ27RN08{L8jYMxkTM8{7Z*+`5l$`!2wre<
zv_W#MfOJM3#{{wVGqr{l96+~f7Uq!ua=DCKSRDeMk4~;01BaQ&%ud+&qdR5mdr8Yb
zQoCJeRfV5P&(F+<5srRBf11&_&Bjs>D^}r$9v|cF&oHL^cs(Zs{uMxj=@F|y1VQ3N
z*h6#69DFloUt)x>{p(buHKq#8kD|b535^-FJG@=L7tc2g<+WNxnA~PsB0QjzK<8T_
z8Lsw0#_#5pvE;FZ`;E#zpjWUjtkp-08nyN&)0nPRrF8)4o`RNzt5EUWDAwJ&t6gIH
z)nWy5yf7^I<d&?g<1e?UrqWByd|brKFv<OgN53V7S1PLmIJ+j?jnmUW-b?ro3brP-
zG$nA8B^!TK93Hs3Gi@_wy}i9EWRk{j^Ky{6Efi330SRwI)`mwA5rVqle>PzVU0)K(
z1C*gJg(Xm8hcqJBXS-e}t7(;!SJey$?eDcG;(+Or)+rT_m(fl~2HK#Z4PE+WT`wq`
z>J6;y+O+>#pA*<;6SRCB`i?2UArX9&q10f|>u<v!1jh6=UIWQH9s)qa!ZPZRVW0tf
zfqIw>zXr-^N~_a|w0`lb*@B<!{cJ^LrJ)bj?f@P}xF!*bIJ8^;t8$HQC0M{8N#qa=
zKLDc**z1h8AVPV0&(DtyUX^BcD=Z+gLadT&go#&wI|LMrAML^L)XM^uqK|x^F`Yl?
z%A`6QSAAI(9eT#H#1DA}{tX92K`=M)*c}?gvPB3}9xUwpQZ{9m#uf|D0X<c>9B9!z
z<&*f$D}Yk~nh1M${~lEQzf2O!*9rS$qft+%(fZ!;)dNxM{AkkLbf%?Crxt1s5YY<E
z?zjQnN(TjyX;1^=XbM#9HMx6M@x*$l2u19kRrbw2qh>*a_O$`H8iMmO)!!!6g#?fy
zmy<w6to_7<=xLKvh!T(otXA#4WbG&WUiw?GBY&^pQv&;YRFT;frPXpSuaVb73VxW*
zy+=o2-<OrK$6rw(NdZ}3>x-C`S2{$vLv!7;aW_-ck@+jVs>Pb#ts!L%&8U}eqF?@A
zl?+VZQI$bkL|-aIvU&U5`ue0g(@eFuYY4#Psb2ej`t(A_gL>&I$Mna7<CDs<8IAab
zk+vM}?TP8y>H}4HopQ)d*{#6$1Uy^S-``3ED5Oo8ImrCxfN$lDp0%*$`1#Q&`ZIun
zA#-nypd$GK^6b6QTv8GkGX%q<er0K$ql!-dME?e!6$>4LXi6Y8DuslUz2%psQG6ws
z(F#`%h)h}osdOUm%Q<EpqLovugRd&}yH$c=s5u5h@VLr5zHt7O{DIhEjr$WKLk-1#
zcHW?2(O}KD*A~i|&$YU2tC7>}v|X+Jx<CEtpB`u#wiM~+!Hr3v-cXj(pGSL8jG!0h
zg6wu9Y6JmgC(YNS94&dw?zO~ZPZfwPO<ps1;~br*4&L+^nyl?XmoaVo?|lZ3#zm_7
z*N^~*@6PVh5Zz3@CLK-#P;fV2awZ?~-R*FyISwF9p}&tb{dCcNI7>Qzn=|kZ^aRcr
z;!tzG{eAn1t@b$$$KC?7XTOg;YyVm)OfBXw+o=qO;xn1?i+wdKPh_EW=fblPp8b=K
zn5qI}Hdoa_6D3jturJ2%EqrA%sC;Oy%=<z#oql~jccRvA?|#z|jZG(>H_#QQG_ExS
zcD<b}4TXZ)hGX6;tLDe7Mxo-pCmaQ19`vZ8ESFmD9f{|qX)LUR7@H=(3V*2keL!!0
zcvP!n)hx1O9QYOHlJLJ2ZEvL7NvOV8yo>u<;w83n_<WGjl_SWsOWO04CpBjcVPJO9
zh~$>W+RX?uz3E}C&RD|Q4;&`=1?|2b*uY*af=e%sWLKpQ$GOPwKS0y~lvRlJZq!<@
z0LvlKU^1kkgFdI8meI%j$GL*kqKfVMlNB0wi2?7ksBt9c*l`A}75-g73OAvZ*e8-y
zomPjQcjF$QP_rg(n?grB7Dmbwqw3)1@l~4PNut-_Lf_aMVl)_G`Us2gDiV{-3t=ii
zQuZ#EV3}l$6Meat82kY^{QyX;Q2qu9;DC^HYbvFvcwwPNJ-!kJUl<j3UNjn+RL}Em
zVui9`|2xdlW6sF+r>ea*$6>ac<0@Q2>uvA}TsaDE;fsu83524}n@)TDLg(I7E+N0<
zazM$<2Nn?JWc1QTWe#&Hl3_|S0lVz0j|$Wi^z+SnpOXU+OEvz`Ra~{zGgrT|vq||%
zTZ`BznUyShm=RrSZrom`PAq)2H|O}gZ@r>`aG2F=2d{QxaLH)5Y`&9ab7FilT}eUV
zC}fqhX<~8lb|#c(vUt(thbF<apiAS0nJpcR9~m%xv8qIf5yJHc9*Cqc0^$!?OJVWT
zT8;TTfB?Ic1n<IUyaL{fXkLAh?LfLqo246wCe-LP&!nk|($uM|X)q{)e%k1s9Iq1n
z)xL#gJ7gYflIK)l_;Hbzv-9~_z1wqM0}IHeT{`aGZ?*!zCUPGS=H?O`h$7NJQzpQ1
zniJZ<A0>YjXlQr)K5us#m}z1pcnm*)qslAUw_%MY8|1zUVBY4C$u10MD}DD!6rpi}
zx&DdO^<?}R)|R_G<w#{C>#RKOVPXMZ%d3V1R&C>3hZ<})aM$(-$M(wJU4;zcf%zEC
zln|7!5^Q0Y25!@Yf_NAhmFgMb7jN==-%yDGZCyBdMnw5ifYMB>T<u{OI+<(%zeeEa
z?*e!L_g7X6gic?5jQ7eR@x|O-Qq#HHIJ%8o#yd3ff~6wQcs4jXFJ|u?of<6+4oAyb
z?JGl)8rR+&Nw4o`S-Htmwwu)}0RpAXXjy(-M(|Fnb8?#AUsYMaOE>OCTXcZHgGV<I
ziO&R0c<RRqhHhNw1=IfIPb67zWUcn9pjdT7VcUDL=9lnTtAKd-_JkY-FVPkW^|v8K
zmWMTTMan~XU^AZfY;7Fj8xyOqUpv?X`a2eOY3c<yfo5dD0HJ_k=<ap!-sI(MAu5Q-
z6S~Eu8gi49j>d#aMSG7GpN2dbr0<gx2J}k?i=PTNKkdz!-~E#x1VSo<m0j>fGa-6j
zVOzUf*F|uP<#q&0O^LMI93cIc>>CZ~5V@i@^5paKuvm-fP^>iS21NcRRiWQ92H1Bw
zBymk$%viUV$bK8gB~dHk%S$=QHKzLmT5@kK??Vkx3AnGQmk1(81OOFvS2Z(3*Hj-x
z+Zc!My340-hd8fyN*Af3ON)vK&pk`4Zcei-Cps)I_RBOIDZ8vs@2=8VX5MZz8wnI&
zY>&=Tge1K{zX_gqx%mX-*j?Ax)x}ATrj6*q+Ru68b{zS7zZ<$*ewIZEv;Uf#Z0VAe
zn}^H+G>jvp0bXOM7+r}d4iv^B%vm6PXh1LgE<Okiqx{~A)Ba!gnLwzm;t|`TsJBFB
zM&{FNbhMcrG<!eEp_Nxa5u9d^J9xRRc21#M*){lNUKLzC2PZx1G%r4$-rwBR!{qcA
zLnA>IbFs`3laOXQRwwetHBs3<7)T$$xI99LR4xhr>RnY#AmJA1Fs+__PE3LuhO*UY
z$v^qj^@rVv$pbj60J~}j9D8WBkB55|_3hcO!xy%~xCRz`&%7<`4r?rT>5%RFS$Eiq
zEsCEbh4NWB%$h;cAKqQM)*NfA+t+Yn4snN7L~ksA-!gcpHJ!k~4ye2eU{25mUQh;N
zMgWRPXk}w)jGS8t3=I@n5DK;L4LC*pHmo}UU6|h+gsa!gFly!;X(bx;rM~!WXbDFC
zWq3FCP>_d5lh|UGaCM^;u32?325;iu*%`3!RPd~<g)YdnNrc{(>PS{KNy?H!xYmsi
zaWprL6PcWbH%YD39o|>N;d2}8Kmk$||I~(GH44xxIMb9s(~gKDEV!cr+l}K#!V8?j
z+1p+uxWMAcP|_uMq9W3k5pY9q(&JnOpKuDU&33)gAs{OsW3~Lq^}>cY`9~P{qXMct
zerg3V)ZYQilP@7Ynk)xx)u8E)C`z!KUZO#yq8FI%UTSWiM|SG4*YBRT<|F6~)6cqF
z*RD8^lW_F&f|t779^e?y2q(TmRbv1#jGSUw5Tjr$O~ad)&W`|r$H4*F2BHM_8OH=o
zgGWKSJhd2dVioB;IPoy<_*JikmZ_#j&nHh;yI$dX@K7S6Wr}*{Hhz__@oMy>@On!m
z#>LI?P>%Itb7Vhmq9Bo!&yYm|;;-zd8{JoJT2rCMfq{C2OTTgdDcxi33AvQj8W%;{
z$|}5;9z3q*zHi>#kBSxws=x(ZPw+}M=_>2VbT&EOSK|am|7J3vNCbfCXdA*}a16nI
zzM8v!`$9C2YQkVcdY5TzQ!sKgbfPKI*g6mHt@N<jb@ds=w#8I60$&l6Nmk%Y?J*1f
z6X`Miz-$mNL45lY9x0XLuOGJ5fF!4t|9Xcsod3$yU@(qBmtNqa?deHPiV98;Ziz9e
zDxi$SvbJ8ANWLa2mGRjOlNDS*tOXOqRFxDRHj7^Hy>7&A-C1<HC0Mqc)p$E*(+?od
zJ~!X~E#Gjd&h-^eV2Nc70|BqS>vbUMsw?g5&`j2l4O_UVQzt;wX3C*~CL`pN#$uL%
znj-|PKF02>r~_8AL8t$Hn88)QV)fWU=t<-FQsF6Y&birinLB$Io}1Go><01B)^Mlo
z{*K2A=>M{5)iTW3Y3J)5qoE%>borp$;&rDtVF}NB(E6bKe8F9EvKaaJQI)MHQ(Co%
zPgXd;#@U>`Y~9UKUvOlGP@k5jCe6hAZw!2HClUKz=M6_w2H~!utarA*s{|30Fph_b
z<!9@XIkU@2dbiyB$l}JKjP>t4TS`~&&XF2gb)JB4U`9)Ki!r5HioIF;OZ$E)mF~6J
zcBJ=$9jm>5Z4@zCAa`lFi|;xa?YAm?TGb^P?^&s5Jm$|JcxFUR=As*QQd{(P_O8mQ
z&|=lmOg%|mi@@XHWgX=1Iwv=`6KTKkI84j)bi8~_hNtIj)|z36_gHFT?`%1Muy<#=
zq9RTw2Ce??YI{A!elSoWj(Jd$`*d6*r3yUhhU^QunrvYn#6>U^ZRM|Mxg4-AkA9YK
zC=6gz`DZZ_pxg*x6B;(Qs95WBiv91Cel5MdTx=`fCW&ffW;OobQ#KRPx;3i&@&>>e
zzZyk#*Nqv3I73uZVc^JJ(UQ%lk3fk%sQ!vn^`expo-|&Z`5g!Z%pJT4$}B7NQVk$>
z2W4;D+-4f+dMLn8P^~ug<Zdrj=NOr3%&2Rf&43x|j(TaDY*s@#g1q#i7@98x>Fd_e
zE|;HrQ>}KI4W-l4O2*MfnvLZe5$BkaAI{kii4Udm83o`wy;tdE?w9^CZ{S!$S!Z&~
z%{s&LjgW8CXr(%K<mBW>qmm3pG~%PbV9pm0k@u!Uyhhq(J0uF?p_moQjJ!x3b-A#t
zJImRh*jWTo0*;<k7FZ5k;9>b{{!C-;&xn77YEL?30@^>O9D}sUIql)QA9SO7akPp)
z<k-%Y`&;s0m47*{)iwuf-rPsw8;jiTxM?#+b?3JSkTj<mvjsWr>)o34ore8p9U<w9
z*dBbkua|1Y4SU>|mqU(IbugNv`6bmWF|n6JLcV(IwzfMJ;3gBqDpk#9<<O?OSZh~`
z7WlOWfS)x_zP8<LxjII23g=R=>j3jK<XN3Y*$;Q7lRsMVMn0T8)eZD;4R*}UxAX7i
z=^>9=Q0BGYssmC+DS(G1b!=Yty^5>_aJEAJeN|G65hB1zy(NVPgMmY6EV!v@EV{hK
zLXnmw__`<7MEkpJ-j;l(H<p^<(hl9(YRFhh5#1D*x22gES66XwPkA4GL-*xo`u=Gq
z!k}uT%dvIyrAV6RnY!zK(>a<f+g{CSyXUH3eWa9I+qUM640|dYc%r+Yr1^X7@^(p!
z6?L6)mujKfGV(Z^$Hjs1=9@=_l%r#^0(6keA@4T<73HZiGeC91ys#vW;+<?x^Ua%V
z^saDREy%t%BwAI+RaoE(c}R50P+L2-@!&Jk(Ov_0lxX*5YtDi(H-Dg6<z}<zg2hGa
z`J%T#&Md7?quPSYYUnWMSzQflQC?Jw+SA0+4!5r9s<X$XzXO>U(Eb^N7~@A2GVjF|
zrd=)g+HEJ_8if)JTQN$%<p@b6I1XT{IKC_6-eo#o&#pqFxh`Yx=S|RsS;P9B&rm%E
zPNdf(Wf{g~%H1cbBW5Y;r}bx|v#)k{yNvqcTAo<VG9lYuAAK#Ywp4xFQf<kzSwbf*
z$rBLVqz)MoCG1(eJFB5^uHt%SQH+_Kp6d;{ThL;AglFTk(|A011fT(j4!AwrbCYK=
zl4Xc@EZL*c{d{jFU{c4u=&;^!3>-52S>)5!oNX6=`RRma-?T|O-Mwox-4HrW=^b5d
zTk_1>$Z1+u#SXTZ7nuxTU)$bfynPbIcQ)0&o%%YIlRf5Zrf@J0Lo)!J>$R(4;JK%>
zrL23+*LxIsQR1`VHfxchz38&M#OWSKfq1rGP!~J|<F%a<ry`t$0-nSIbG(dqYo8%k
z)D1;-Af4u}_K-?sCVw_Qdv<J+p8Vh`uiTD3tJ99!kke?k_yXJM>8TFsa`mhg4YsaF
z*<=CuV5#kB+GN@KeeTK_tq}2}jiGb~&#RM>OWS0K^#&p2BH;Xa?d{pDkbV%J$3dHZ
zdc~4{C{vvG^^UgVxbgxUz*=mB`PBNq^-!kAc;6kT>oldheb2W$>Y;oT9TP5aH&rk+
zz_F;+8BWM>#<gJ8u)7A}YFwNfU57qX*)}$<+AtZN6Sqj~!d&?<lk;XVJVqYie6|zr
z#IbH0ve%26n&`MwMQh15y4ShZjZPiM+}0sa?6L4Drmq#1A+BXraQj+CgG|SUp&o3G
zIZuq7-q^q1O|7$8PVDnzJ_lvw`<y0xn3PgPKSKf9t;qb3<bmGQ2Ofz_QU-Xk`zB!4
z@mrPDC&)_<Ru?N->gUZ+Y#42e)c~Vq_^^oEI-yg#ZJ{|CXY6^z$8$r*qJI8uM5y_C
ze!n-_x_O%HIasN*=A6*;M7~7y{jH6Kz#XA|Ka<_bh$x3n0c6T`)AFLE{i%RfGq8l6
zulek6$fr+kwly{Kq6UwL1Q<+GZ2cz>44WPixbHBj*E{5=xU2*oZRK5I>EZS*LhhWW
zjgu_}OUnHzai0u&fgnq7pEeS97qxY5oHL<Uly`U;a<s>}?Wna+wLosIx$Z7jardJw
zW&l%Y?===UNO#|>(=kXsT`PO|_<CfaA9(uJQAv#>x9yul%j-x{g2+fxQMXVv>!#DV
zNqE~u&+{HkRW0KCn6@S=a20v7J3F~er|)Q#8+R_r(8S6_E#8%<PJ;00Y|-1hM6VM^
z6fK_1e#52xZkpS+Oh@93t753RMU8<DTv}l;QDL$-SEmD<>nCsl4f5>smXvk2u-AHR
zraG@>2Gd6g0MK)E)W_g1a_5@7Z*o5G91noa_0f8~ZZ9CeX?!RGj;Rp&TIz9aTQl*H
z<eIkPFXTW}O0U~?*Y&Dw^YL`!^)Nl3xvW_4TP%*k3@?eSy85h?>gD_%EEM8bZ~cYG
zcylLdoM9;Ls($_(Q*`>NO!4o27=R+*sC@HIIwzi?KYrWJtf;NMtI@s1-15ay_quAk
z`EHi%^FB^-8DI<HxSVG*j>TXz6SK?dA+HS$Q}D<`SXK97);cM76xQi;XNM_j=XPWc
z2gd{!NJLHZ<wmmhZq>+|nhoXn$s60Zv-#P_!MZn}@tO2rF}g|2ax;V%Ame_ptrQJ%
z2ja1R+SrGcIVOWn*0eFgJMCTh(OSYeK;H_UAITDZ^t3KN+Y)uab5h&cvC2d^+QCB9
z9a0OZC!NseXFin}BW(Fruc+};$fnKFe!`!82s*zvhYI_=bg%l^B25m}%~xhldmY%i
z4t;%n+SmPX@k*X=Y&B;mM0n1>7nE2z^#FWz)!dq@24HyuN137e#q3F}fd;31wkuyK
zx^D1y`PQ7aMmxqIQUG&f&r0KSub&Z{S*vX^FsdF>p3-+LXquf1AaJAFsVqR|FXpNM
z=n#a1a%Yh=>}P0?2m~3e^O2M0(qGDJn$_AHX`XLQ8SPc@VKvA)Yi#lFesy1dK7SUr
z#nqHs<iH6Ol>LZ7;Z>hMib_O!0!jN$a+LlR4AA}>l?_dRuWSMGQk90dSBBKPMb>#m
z!~L+cVH#6sZbRR=S}3JgX8eVZi$@%9H1X++R0omF+6mMdf|o`|<#gC<{O;7|3-hEI
zt)`r+M<vNq7n<DNk7wVlQzjZrY`3}LlhIGmbodvI`j>oBT)30<Izlh03Gr6I)oolr
zx%0BIY$7Xk4sFR!swUhotQb7<k3T-YtvU@VzZ7{Rlzh2qS659S#a^hoqR%JgG-oSX
zvMIGvx9_>{t^g3?nU7;GFSOG;VKt^pFV)f9yF5<ERdq@zaf=82FDi%!-vpm(UA8^-
z8g}XSe^i<Fa^fwuMVVfAVRK2{Mv`%!XebUj&rYdO#VY$+60#(_)m$xL+&+Bhn3$L!
zHg_^k@5&e0e@VcJP%M4Rmm5MR)z^%}lgeG3*4N;0D`PZG5*oMhFw(!_m3sKt56=`2
zzO)L&e)83FKOt=ltPwP1`@1?0<vaJI3OtKv!vU)R{KATUkv!YnJ{fL{Rt369w!*1K
zCS>re%;SZ3DNdn;h0l;}8!ydoEVU=M<kYmCCXVa0ExL<znl!92%6pb}AFc50H;#c5
zx=*_9EOI6_xY^edkOz!IN!i;xJ_X-q;IkJdB6ArDc}{x}=mmF_J%mdp?21E|;cUar
z?e%B;{|Nias4Ba)Ye7^{8l+Q7q}g;U(gM=3feq5#-H6iNjkI)ksdP&>NH<6~d<%V^
z^PcnT8)FX!KlT`KuXW{|^SbV5j2y`cs|#z%2XTrcr^{89)#=|kCbfM~Et8FDJ*Nq?
zvlZq`4PwuJHA1FNk>+Q8$txDWg9^zH!C1AhOdpVh^Yi3>L@z)-s`czqS6_a&6mGzg
zD+8u@lQeK=%eKuoD%wO`1*u35PmB6Eb`se+aw;a*hX@GNcwnHG=V?amdVO#|DXW#T
zZW>UKL(eyfVL0bh9D0;luG&sVVWH0Ua-pXc5@*RS>hbKgP)iaI&eL@IXgF^(zysW)
zE3D67_Orn)QH1MymufX-j&O~vGe_L<lgl^X@Zflo;9Cy5{B(j?7xNlMb<a4f&ny86
z_N=d|xn?E`A|XlrUOmz6#M)VD9X%$lX=MVtY3u}9g--iZd@idmoQq92+QknMs9^JJ
z9>AzMQu;Y&`nETJ&n(<3KSm)~k?tuLPI1O6u1Qd967mbe+@_7LolK_T@bl|G7#`MX
z!rysvAk8_aSns&c*h%7A_%2OgF6nqaOt%Rv%b=Ykd^uKmQwdd9#|O3Z_;BBN9rJ?r
zbbzxZ8tI(}$9jmnj9F;>awzWruUQC-vST%o%SJlPl9u?gZ>Glib0LNWs8LT(cLifx
zO6QqEa5sEYYW`Fr>1idus;bS6q5BzjxRA<0Rv>Eq6XV}1+>03qd6#4+O3*_VyercU
zl2rSg4OaGX&H0ySz3+ky2D<;H7yuQ)3eX11%+clvvyASvjPC}TEiEDHQ>3e>rrI%&
zuTDI(eW?fxjN%K($Ec!?cjAnDJHHd`R9O&?bi#-&3y@tTM5UH-ql8`!UD%f`1iT>C
zq!}KY|2omxYw1<i=VHz<jyDo++k(-mHG^b{2LVV>Eb#~o$Mmi!1mbDtAKwr&;Ik+_
zPSUvi<MXp7iJwIioy3l_izu5bX*Hy=3Yz6Hu>4K4@^0$+Pn$fB!Lu&gMKGU06j^rj
zX|^ewUIujr-xI~s#ooe+H&$h&V`u{Bzi|B6D@==C8)b%Ym;@U%IoiR}Iy&&L%G}oa
z5_E)>AH~@XqjoSI@1CBPrS}(d1}|{wp4g5^b2=AtAS9vO8HQMZA})d^%}w4HjdVWC
zyM2p&auP?GbbPsopf}Q-xgakOBgK`5J})b@2{iLCkEr_iplKy9PJ)@3xHb$#KqEcV
zU({|xknEjZK1TCLNiyuc(Qjt&dwY?<>l%IhvM6Gj(4co~ZJg*endc+W@2m>+3~JqO
zMIO1~{F>@opQdYU1UAlMZ-RmxXUg@y6^@{XZ8j&p$~R=s#Usaej-9V?agcz=xKyQr
zbj-@ncZ8j?K`%13isV|NRU5@&VTz!TUV+_BrPT>#x9V+!b<R=s3->UL3x`zk8lQ<X
z-vjn*2V76V+G^26ZQW2$i&cf~;mS(eT2&@ELJO(8aMQ+u{-Y09<MAItAV5DZ$rNQ*
zSThK6;0}BEqy`H|W%P--k$D;EuU)_0HW3=FIMC%}_mg0Lcj8HJa05N0j&Q;hAM?>#
ze^gQRw4@no<gZAX>F?*Co9E^Q#E7gv5xE}^kz}F-)PTC56IIS2<kk2~!w^mivC!l3
z?-vI2!;B3(FPNH+<qO8NLy~4JTbiO{W2;?KJcoU$Djtv8;hJi{a}=+%+n3<`C+3R7
zw84CLl;}InL$$t`nG@5GoD;zw`seB=!>*nQZm%?^VukkQ6Mn@8-lTIh6`6AX%ex}#
zDUj7^d6heh4L^O#m&uDG^OOX6YF;s{|L@qIS%<S{+2f=JcD|DD!};Ss$0LnhCbm$_
za!d*Ld+t6@69n&#Fl)8a5EQ28@W+DATQ#7j@!iTQf90mx7mkL_M*Q)O$UYquPS``~
z)^?mCnAj`TtH3U=(<L)*T@_9Hm}K|UDUk54+kJPoX6tMvQ6S^$U$RplY-X*mQ`HWL
z@+cPzUN54}hmD>3)ZB>A50|&&sDI)ZP+jgp;hZHaP$@Z0h6a-ISqS7N+XO1AsJMne
zINB0+X&5)NQsPBfuZqT0qz*;`M&zNJB$k$>NoVaY>ytzx<A}DCA9y}}&@OW)+%jwP
z$ACSYxi2EWshL-e4|d!GPs)OXwrhO{#ip2LF~nEF^X_MJ^FQ?%`fW!g?e-yN!kFcD
z_j{e}-H5||mcTv>x@Mli3CzK3<2zTNly+-p2<jg$dA6XYsU;}c5f)aXoGK*~A#f>j
z_nRjot~iDIgXT<%b#9jj)ul%On7!u-Jp7?_a+tfle=2HV#9+)DC$AyM<Vwo9yT?dq
zHb8i7zG*|^P5<<#E_F~mrPBL*-HB3)vsOIlV~?8OJHLN$rF)q#WVAiDuJ5vojUr|Q
zM0^{?HI0RLO(L!B+YU3=meiZ}L!*Pk+D&%XzW8^(9F++th?k!|zYvcDeuq+BaGIR+
zgM`C42^z1l>lCMBz^Hq+it1@3B8(ywW@!5CL@UrOZ_ES$Vcbx&7ne~oJl#oGOLw>X
zcZsuO1jJC6f;-Tg#EAsqu-DBy6vjzNwWef8!72zhycI{vwp(LC(;0B`!#FS4<Nn?|
z3CH^u&L+lPf4DSGhW*Z?&rNm72hb5ejQhT;;XY`?N)nOf+%-U|vi?$aX+mqlV-1z0
zn_I5?eIL6Fqu2)~`5@m{!-Y`~d?Bu&BGNzV=Zr()G~<B}SRHh!p9N6DccYM*ajb1w
z&&wzcm(A>u&gtn=@^P?Vit236aCfOcj&s3nWcz0Py04zXLH56Tg~~_y_iE*7+2oro
zli74ne%+cuXzy-JZcj4Usy7ze^k+xGQWb70y=Dm!!&xks4YUyk%Ma?+yV)vxJrPg;
z(^mS<OTs_dWL6v2e6tktY4jh>*}b+0w337|8%U!ju@>Xn+p)2^$89nMk(($n5-vOj
zajMIaC(%gig*0JRlwj!SRJG7kVrh6QNjCs@U9V+dj)yK>c`A<6=wXPxQ@$mMhI?Fp
z=nPM-dFPhE^NY){7rZ79!?MQ)f=~)h02bcMP&4}ibac6Hr&EUW%<2KcBvW-IEw|I#
ztBYs8Wp!-#s3!Nu7oRulqorKqGrv+C=Z)z<lX9jU6JQ~kWkrUv_f4X!V$sM%18|5f
zN2lop^}BeotG$L2PCcdNfN+$g4Sq9KNW&;{(OKX5!E#e7j)$FXISlo5*<(ouh3C|#
z8Ks27>9=-S`uLAnkb$Ift91W(=J35|CJ@QCcM^~nO>}XuCr2GKm>X4yk-Cpw@zI=e
zm372}0%J6(^QE;oq`~`yvHV)oanAlQY2i9Y!m|7xIG8`O{r(8oDXm`*GMX)gTX(rt
zuu3C%5inue8K$D5vfi(52bO|Jo$bxtwDym4$PU0<cgJ?Pp4c#HTUs2PD(}tJ=~~~f
zhH|_St3F=t?7ZKKqoI0h5Vo%c*)H@h@k@C|qFyZ-s&x}{Jnq<Pfktvx!`r#ybW?F0
zQLrC5LOstA2D7c9kGG0@H>%P~GV++f)w{HvYOMBqW8-NblkCz$J&rNAw@?G3DwAAG
zu2I9R*ISG(1<;=G71hG`bA&)XE0k%O%Jr~O-eMZs^Qj6mfoXAuY{!q(hyIUKvIXoe
z!bqwPT<C-c4Lq0u2-wttPq*HY>woMiq<4Xgk;K_iG^DLIRw97YXprLzcA>3T&h-^*
z3TW!B3zkR;%T{%=8V^%~wJ1@z95ZH+I)KFKFaG!pd+3c)QhCrp$k|ZDK0aBhzMkgU
zJQ1(m^rU*)gsu+ccab}5)8r@$JnI?rni%Wlj_xn7eHtZ(i_W}I7WJigHXn_g)+k7H
z`ZBp*ZszKDM^ep)n*#*3fyj9!Lg%7VG*7L<;G(;_c5kX8cL$(UFdyY5k%y0w(YN^z
z!aT2qFHd*rICA^ICdanme%dmz&!nEm8$<I5N8>>`*;I3t>XJ-5PNpZ%9h!)xXk?^}
z^=<f`lbU8--QyV-QObpg+u@UZG|F+OW<*ZEy!&OhFbJpnQqhuXUYdaz0rR-fcRpYA
zYa-D<tz+iBy1qHBX?ad4$vmdt7tdk?a&_-lxy&}-nF_X|^!?kb{W!x^XHf_6tUoD>
zB7O>7X*M{Z@DINJC(U*>35`(;60AqJ?Gdu;AMEsWKl`yTZcL#bWzZYAO%iQ+qAt1-
z<UWlKG5^74dHa)^)_6Z*3Mk$@q*1ANvIuI58Yw9kwk-6sqqPJiPaCiGxHop>hA9q~
z?pJBF=~jD!*+YT~zcc>2ZsYAocBhN;M8(pf6+Zu{8WC6Izg;<V#c{5{(&59lhS1oP
zF<dz6eAc)0PoJK1AjQovH94A`C;2WCL~24MEz_x)rCU#mDD4VTE3{6-%vTfbWT$SP
zP`{m5I!g-4%|>}}new&~sUE9%aCgsSm*sE~A!2ZU!rAE3bRma9MDjAAjlcstq+}SC
zjfdxTiU-0t<4`JV=SCxY+`8S%3dPque&$Kob1n}j(Ha4gZ%$oiw<&LVcnHEro!UbY
zp3Tp5%wu+ioIxlzdaVg+y1k-==Gd`j8!Sm$ZNBN{i(OD?p`$PRs4ALYio>+Tt5>g}
z^=zxly9w3$Irs6tVzbSdw&-0L&9biWfts-I$>R#&9t}nZSySYyD*!!><1b9RWUB46
z(T#wFK@Qfr6y-Pr1g*DjJ=Ju%xyFT<CJHy6MVN5OJ}*DtCNivQPx3BAbpEA`Qs_2@
zxE4?kx%28&hk@yjXJ|Ibk#(10{%3wwbbGD`E@y2Cxo8^U1H@qVpW3)_y~;1R`DE;r
z)#8&GY*>P1Wc$gv3Rp8@W|MS*wST<!`~;0t-LD-5rsq7h`b}zZ(kN-sxSt!0_1a{3
z?AAgF!)}ZBYp2xNh3(z<A*9`7N9$54b5ck3rNyv(59b>C(|vbXw%y*4h`<PI@Q81A
z()Dt6HTlGrM%{Iy@nNLJ!dERKEy?j$@u)NZ;UKBuvlHG@TVx@ePRusqMP`nnbY9zc
zQKkDqB?mp4U^oQ2H?Mg~Z|hGlO^4@t(o!KFNKM>Ed&2>8^Ge(|2NqMp)4T?u99{HU
zjv@gL4qJ^rU7})v#|`Q=H7B~F4(3g=W-kjAaR?8)-%LizxJ-$6Qp?qJym*$!I4<?W
z1;HIe)p@j?4+ljmA5o&lNu*qJ%rDG~nxU^`|0L>><?i!)%pZ2MQvac&$DpYMy5$`u
zGgXvs2R!fWjrxJPc(bhFO|)-9^_P3(20WNhDA1g`WT$cTDPEy~wS{8I=8}@hhG9vK
z+DR?J#K#M#1CoVrT>ERcTV$b@Se-G?Dzm?a&AWP-p2-H!OjqTlj`$|%Qc>d;7REgU
zdyE<{913y!88!8`yw!V3*mQBBiJ_=t$^=ou_7!E;Ij)BIh5IESyC2?po(?=52*|&>
zGK@5PsO+K?NQSrvKkSCyHs63ayz|t5^#J3BF7Rdh>B~z#4g@nqaqFQ8u`*7fQh5X%
zINGOAC7-BO(;THj8lqDAWy&QS<f$(-1>)*#fiZgQ_faQ6)?wXe1ct>esl(2N#0m!T
zs=iE{K;|O!;Ga(+nfB?InWuH(7@F`qp|9g?l0N|L?f62(@`ET=Pqo{X14r9_O+LKk
zcfT1CZ!4JxIU5Z0i%QSYbnJ{z$mGKI17xOT?Fd>{s1BkZ>O0FDJZyb;?Gu{n3({p=
zb(r|E%UfAx4Am-=nCjq3+d*f~Ty-j*y;Z)u>m$mm8L~DsxPy4_iFen1PuL_-;cNFJ
zbyIg)dq$6=>86+!RvD;IQ=MNC_$D0ej11n~N!eqy$SLFGlzfT&_=3mdZjW`@FLmFQ
z$Z~kRCN$ZvVm;V^%;u+F)hCN#6vzuY8o$81S=mh0b#FA(?9<3~IV1A4G&*BT?M!6X
zw#EV9c<K(SK4^@*U~z?>+Y~H+eoNd-C<EhLGeH1HCSI5lf|aGJ2&T<{AJI~IbNF(u
zNBvyaMY~5&U-?b9>9?5G#1Fl1zLBklz>g!g{M<>b^0TJCX35k~#>C@uDy@DJK%eWY
z%>-ZW*azAwX*D&SAemIF^WPuL2eau)8d;qmheUjQfanE2LdjT8wr|(-EI8Oo<2uPT
zm1LM0FOJudE}bdE_O>?+IJC`D*piw#PLEdjrZ29=JkAKl@-IzIqp>XNjXCVKn;c|a
zDz-4kM;3D3I2u>DG#cCR{ZKBBiPH?b<qdpq>yK`E1P^SCdbc>U6q+tjc{($P#pZ1(
z|6>k9WTFUtBzB$R&+Q1Qy$NgQB#6ZPrsiwZ$ejIp=_XFiS8QQ^gGX+Mg*@yH1|O2k
z0vT66z5TXoW9&Mh3YH&;KmB4eiO>;@FKj+tiPX@fNjRRTAl6{uGey%=tXr8m?_}E>
zTJbn#SLVT4%A6td!EqWVpDA2SeM3b}pA84=&H?swl>Y9zKy!`=l;z@_;_HXo#lt%F
zbfc+njC)-N`eLsbqXi~~s7%rBmTE%Y5MGSEHh0%K&B0I(VjbxXX4Wmv<!Gdz;TTZp
z7EXJ~$g@BvmR{j}0}nGK0D9G=2aRvBTs>Ioe<S^xM@%DzLS?`0yskNE64J>+^p{%`
z39YfY4$+kswm*DY=hMP{XmJYX<J7<NRF&fT4Op?A#dNXg7!phNKE`Y3i2TZ(%*%@y
zv8&1j5NcDel{OIJ$sXK)B*e4_iZrWZcivy-n{?wJ?QJweh98jBCMH%4>39Sr`SNpc
zRxVhw8xtPaP8R-nOwgiLS;flI6#e8hT&=5vU`oqZp5KndwbSTveCwk|!%(O}rL#67
z`)Y~sqp-m&cNbow*Z(@O5Sj9y8<WI6CM57y*0}K`(poxqF_M-xeL^Q2%s7T`n8hdG
zfnUM2KF_q0Lm^E4$I>1kwz0r9);;fxdN_s|`(Ur(Qwp>J`5pe{VPbDzbU%KY5}#vR
zz;<6xVPC_Ue9t@U;RX^k{_{yQD1P9M)wD10wHXP)iZYQ?S$33b=1eR?BaiWCQh`%=
znb8ckIb0jF@Kt70`*$nv0lslM#~U1Ko%56X+F+s6g%Dsc5lG*p*dh#ZbqkQ-W(nET
z{i>C*ysSG+GKL~(dVQM)t)F+^Reb4vzjD)lQ<ax9IIh8XQdw%fEUJ_tvaVD-1;hY4
zQe+n{EnNS4P8JfvC13gZQ8?M)@ILM@<J~j%-w3jFEq64tC&$z4+s=0`>kX|r>ubo#
z)r}UEx2cXB0(#N63F~y8ntQS)1&sW+22?OlJ!*#E3}|)&OPKjo=B_D%x0J?o?g;rz
zs~J*hQfY|{#z<ph6TO_I>C%mCYHOT9IEr<tHWP*63vmx6iH_@FcLCgXUPq|K(B`-1
z)Af#b-4#YP^IBmQdxA{;66+O1Ydg=1JzX;@BUq7$T{Mpdt&ZtTiLagBw*-3$X#K(<
zCK(PoaCzeC=Gk`g{@{%C%;)5+_3~spZRoCf8IQ>O6<WQIsV|g;GJL?jrB`lzl(?@f
z2S$<Ja}|Bn_$jl6>~LW1<;e?-e2ofJv6w>f<7WwfO_B^910IYVy-+2`Mu5khth1$&
zc>BQH2n&zT-8_xSli?02_vJ%*qD?wSlGACV7sUg_xbLSUA7y@ZZcs2DMouL7+Ui%F
zS=LzcPOvGo(tO_Qmd1R>ku<~bi9Mgw?*|FH+>kveL6h!?N)D?YDgNz{>$)Q;pOEvD
zyWiuq3<J5A3oT)jgll6o28wF)S3YSiDWV}zyOZ&_w?_^)PP97nj-uH+m;L3hVz`MI
z<@pbf?%g%W%PFARXy`@R<Ih80UXKudy*7t?>t7n|HH9e`i{$s^3zAzGdyVyR^V~|V
z{?qk4!BAqZ)E{!LSpH#)zE&$$s{_?mJAEt@S-LE<eXEn7bekodr7JuP;X2O|jl0*M
zFO2s-TklKAyy*e!hAT-?1<I|T{^P=9nDM+S0+;?^qc6dsBi+r1m}~K`)f9>k;LuAy
zJ=o)wVtym;k5KHic;$<4T%|&r)wPeS`;z|4@aaDCK6I5cq)gs^)2}#t)6DDKZ*-aT
zjo1;U*2RwVi+M`LEwH(BAjfj@*CDd!<ZOq2Z=H-eqimE7|8Le0N7MsZNE9ybB`eH|
zc7p>ff9}eYn5h>PhcRwe5pvpP>kmiVs84LvNB7S5@z7xzyyxZSe1@hey&fKQ+p6DE
zHt+1VGvLz9youtHIx=l3&@|^W@t!8vXS}p%wNIjA)0AH;6irJkY4S^j*%EU7eEi+7
zDz<NVKQs3*>(3*&E4x*F+5QO-AR)btd_V&KHqzpExev<#m)fjb=%KzkGGvD9F=2V#
zl0oQa-^c6`HNmSp>rVJ$nd>r-(|U2W)hI4_LL(4)Rln{Rkc!#DVpK>!BnKdPbRoc@
zgX3eSc#pLloWQr@{U_&9lEA?_%(xmX-<=UB<mca|LSDEATY3}@?l;6jpX2@{nW~_Y
zuMftTJQ<NUh9fi_$S)3KnNUc=jxn+9%0*dHQqa74-?QHm%d=9qS+c&6)go-{S5JmM
zi17Qx8@@qQyC5nMVal??n%$9V-5^#Z*6`(zRZP=hCI=UCL}BbJq-)3XhqlH?k2@jU
zuy19bbX`Mrv{|0DSmju-;kRu%6XDv7#!(am@ZktgSYba5X>2-X?(`u_vUUs2q&IcI
z?G%4BG{?E2d(z*@{eLHSpUpvxedNt4rq{xEzH0x3=vC8?`4{%=*@`DgU^bKw5BKTi
z%Y*%d$~l_e!x2SX&7NW=(kB#?dPo)8iI0@l4-zO_(gFATZ>>g_kD#3#@XDeH!t%rH
zWMucf@GY{;!g~+K#|a-+H}`wOyipHDv^<x^f~hc%GqX%z$W8~{u3IO!%f%|sw8_On
zvE~L=U&7cUo-z--4WwWB8#voOJpi*9NDg+1D+pE#R2PrXTNeijQ>(N$qS@)jushw9
zGmCQH5;q!M<7cxd(!SD6nzL<0m`X6q;?X~_9An%keoj1ugh)vMN1u>n1PMG)6?t&x
zt3*lO`4pacK=m~X8~&1h#;5U50*IL*^X>0sx*`oaZa$d2kd15b#nR*s><lLI{N?1O
zo8^;W?W<J{c2wOP$^U)4&uN&~#~Ib1glbJpjJ1te&zq-shIxiiWG>#mx%?oB>W+`U
z8UC-^f?#(bOQuNi`Z^4efSLzRZL27Kkc{x)LLiOrM=GTGtrhAY2S=xcpR}cx*CWTh
zKi(Z_?XJ;TskQmYo8~4lxLuH3cf`N->Gp>%dkv6jvjYQC#`@fth|YZU=4$|7<s)x#
zEanD-vh{aadBOt!d+9>r2);7CO}&eX^MoAyoQA(DSIdvt;Q!}KPDt8n5T>V;W^W#J
z;3vJq9{uYXh@s#vo`zzXczeZw%TSZRD(^!?b~m1r2+X)ur&EZAqm^_y`>rs7n&>FC
zzG`rS#gBHTkuk{Jfg6TUS|Z9f@YF8w34AT*(g&O`J~H=}4a}DUv5f+*!W1VJa=@kV
z6X{=T`eRm+;ony8)Re-K+0wA7L`a+8NUHm_mf3AQqf_5@{x`G!^-@karvb{d>ly3g
zYW9KJvpbn~Q^EAe{)I@_!ICT-lJRr8&4d**H~MTMN7ifyfv{v0=(IKL3*^sp3&kUV
z;kEc*KamkkgdUt}qWnrXu0j|#ZMdX+T9pTtOJQ9<-J8i@qwzr9t7}Ws4Q9SY{HDur
zyM{o}{sbKU0SnsCjNk{skXp<g(QghN_-D<#ilygDuhj0Ib0^uoFl^#jF_{mFj<ySF
zg360;Mer*g_M$1)+EHiQ@uaZIG$bUWs85+&d|~>ZYoO%!cBbHI;Q-$0$*bt{IfQK*
z26>eyXr}mVU&@8+!Jk_0pY-{Q>cMg_?juYNa&0;=J>x54x3cTkHAWva>;jWT%?(l{
z{V8d}ATO^TRpCK+Sh*#=C+LyYk_(frDoGa4BbGhO3sLxE)!uI@07HvuF3Li}i~h~s
z&`+(rCiKYHkO7ZJzh9EQokfeBXFvhUYSjKUmFkp4>3n)Q^%<yC!8=S4ewE=ca#u>(
zfh)#z_uXX|sCY7WN&r_W%>i#PQCQMM!5EKya|}ml&>;t1r+b-w(!FBE#omOilwB_F
zOA(RpQX3kiVfIC$zdrbIEton2%ys?qcz=J<7KFF8*e$Qp^BbPfgu1LX%FTIHEFgYi
z`LAOWqC|@4-w4(73`c=yr~1Z}s)UsKikoHNC!fJQzF6MWLve`$vHTf^aol}5Dc7E(
z^REQM@@rqm!iT3us67|4D9h3FX@0hNdb$AUyr2Z{3Hwf#X{EKdbY;LyX3N_a#+81C
z#wPtRQw%6ID%`AN03_~!H8NdcLc=IWYT&$Z$vIO-Lti=eopbGaq;WONxDz=UiD~@=
zv~dH0Ux<tZ@z=b+pPCmc4;;N|PSm#{Mua}(Kx_Tv2z~=S+KS3zpY8+?>=|eek!$5$
zyO4qmwD6sLj(U<}m>)JQ8XAfjUHMLuf00!>_9%8LcJK8OkjM=kuJ%@WthDghZ@qVL
zsKz1WwAksdQOL;3`ZTM9Q1QAns(A5N^!6oCJ!Zn1T!w0`m)~Gwyh=(uejcgwl}G_T
z9fr%Igp=a)*OiF9KY&`iV_{wkkAvUwbs5yjcAzWOFA^30G4g-i2RbVpxL)#SuOFiO
zzkr*o{;<PZT)h0Ty(GLsg2ralVuiiWsOPX;f1V;&A0JZPW+cYY|M^m`Wc}-ym%cq^
zgt5L8AhI{+92c^emUI{_<{J~JWs=2-c^$G&cV|W-sif$)EoQ3Cqobm96B@;zl_xci
z9r(991qKGPEidFNDQRI=LDPiXl{%?YdyCev2X#MNymhjDrUHlfHVrfk8xYx1k|!<4
z3Y<zxspB30l#fqjHD@4lorHU}R)e3VP<MsM+~9w!-WxDA2k_(o{uT#9O%49hYC^sZ
zLVNfnJ^5;DDUk`CLfRs(5c8g<JI7aQE&dw)vXqh8J;-uL{)@KUvDs5WqK$fdd<X$L
zg+pOFpm-MGNpc*~3pAr^m7qy^izawoij#IPFXa-yI^E5@+HXl?tlxM+!0S**HCLow
z6KXJ!Y#dN%gjfb1k|m(sX?SErBKTfPE%W@OaGd77mJ&~mobOn&8JpeLe{)GSxz%P%
zoB@w!an0lfzqAUJJiDkhkDic6Us|0XP3*sN)O}IorQhH8m5d376pxQJ^$|lb-Dx+s
z@mkeYo=C}(!To~urYjZ?HvDd&$Cs=c|Mo-fl#$-sOH4!5*j2O9)W8XkH~6`8L<isR
zg{Y$qj+R?9HB2X)BVv%z(X0_b(vD_1kIqa>FKj(%H{qkDrOn9~c|+>yp72ULs!!ai
zy!&UAytK0|9qp6Q*FHkXNWYqc$gdH$>K&R~ZZ6cXtd)FM(&N@=r}a%^t|<q;@yE4A
zg{5}}dpQ+Ktk>S@a6H=Jk^wET|44v@d{rNb0}bs{<eA3^eZ>>CtOL<zm%c;^{rc|Z
zhx)xeq2(G0Yb=?h>?L?CZKaoo_5qovdd1f-jg6Q2$Bjj)lA1%Nu~V#mAhEd|8}$cu
zQ1DzL`JobL0O}{P+T!GH1`_Sp_r2>VQ}X@P_bxOMtphe_qZ7-`K_}_!6U)LCD@Bcg
zj5vC#vk5Iv`~uZTRDHf!JJpcz_b1(hgJ&?^%UkI&yTzJGNi`2%qzp2W;?~(&jp@=4
zK3e9EyVD|cD#a@;!qfY8k|&Z<Qky{bn9iVH9UrTb-$(O3c%nwy{_<k{Kb2bB$8WO{
zQ<kMA*Fv|o$9pWTX~_ks*yb&~IG0~!|NDkrF}^zS7%?@ao7-SAH^6SxQFXp`HLa`I
z=#AnQJmYP%8V!RcdggD6vCMZx8m@{-n065EN1#BUYEwfALtbwk*jfQVEiqGLnL8+W
z*C=08-r2d*9aVK>TwY#Y*<=EGJh2oW`wZzM4(IJ2R^v`0TjV*Qya2#5shoj94(BbP
z)aG5j_Z)E`vS1WmYoEmI>30`{8GsO%<Biv{{3qBcwgcSJDJjyXVAKK1dvJOVPINO`
zK_(rnA{qIFge4xVgL%^p<nk!)S4DLZG9(6^CZk<5KukYG$N;VMC*UyaiZdRY*#hn}
z(O3s_Zg;!OtQZtXf_dK9Yhr8KbW^dpU)t?Ef4$t9EEA3*cO~OL|K&s3`#Z&XEuqqI
zkfh`t|Kdy>%N<Z^Q$^SxwBwpy(JIkMS<;EoIk#Q%V2c7DO>pQ(>fJ7G2raH#BiOgf
z)==%{ciYO#jhwwVbIQ84oOcNsW3~-*iQlUM5yEEBhu7b_cRV7SJJ;aoy1Egd-B~bE
zA|u$8x-WMtN(@vD)8)a<`VKtnSHdwyG%4wHh?6OxL2DS)zTObDbQ*_$W$(CddDg2|
z&`Q;AHzs!1So+d~Ef`JOYOuYNPbg^R1sMtCccMfwcc+mLg_kP@m(17a#P9}a4Vfbl
z7dY>qNb#B_)+fwT41DkMDU=|UUBB4|@!XSjzDAIV>DJTN*pS4<Igdqyl|OwMtuFmv
ziNbicUh)7Q*?74lB<2!i3!(t>UqDtn$|t(wUBY*LFP`26$>L~y6^*y(8c30|o-x*X
zp^^&hgy?wv6fQkHzDy_ai6Jd))=`aIonQHWYT0rl`V5yb>tuVv)b-3C`2)r~^w##@
zi>;dphN(lr+FEW0pR3mg0cn}VKug1FiusW^tPG6>JK%$8uxRrl7;+qnt&PgGL=TGw
zbhz=+j(Sd-U&f&0f_pB$f!)J&Cz^oQ?~tK78c0<BAKsH(-)(E%4RaCdcUNJ&RPCk!
zPS&5qIRdmdQWb`SInU{o6%*Eh@W;y+89xJ`$n#<qx9EDeW+4ZND*XY?eNVRiMD|*;
zrbva^-1H{jPvIyrKjww$wr(9m#$zfq91k7EF5|YrMC{)VM@K{`mYHrU{aC5nU39=)
zElD@~$>pQ}Y6MDg;VS(8Ry)-2<vnqhUoQh&EJMDg1vU9+uDQSWV9V*9V8I&1%cBv~
zg_;(OVYFw<I5Sh86pE9s7CEcccj4XM?0b#`<3qlh&s2Aqxd5p;+wza$Y$>``6-S*a
zzJ=SuSWh5@`Ux}_Vn74#e3$0^@QKMhBA}p;mA)TqGzPXIlEaW)!rgs=;@yIP%?LeU
zeh8H@F6dq_-0$<FW*;c_;!~ZLkj(Kp556!zc7D&4s?eXv4l(dc!D;>Oy5A(W6tZl`
z!_E!HadH6N9Bt5__<U3z>awooagnX123`<={IXY)<iLtJ5JPvm(s(po_zAIWc4l8D
z9^oAzFP)BDPj@72%!4@l0exAy4WXbArsEt4*k{KWa4HxXEKqcQbJ`hswGKq{M-1h2
z+=Ox10o4jbPkp>XKvt+YSZP;1NguHtgeZ_3x+xzdFKQ~s$B>tp4L)-lUzB{{y@Bb~
z3AJ<1<h(-v*7PB*PV%wzhhzy~HJN0VMBoSd$#Sf$6123;?A3TE%#q`#(X@h+iG2Fb
zHlsSuscZTatRs+2fhZ7iNBMAqiG$$2({~9U1N*BB%neb~)J+{b3@)cK&vrYgKR=n!
z+5{sDXS<@jxIQ6H*pILVc(&;!Wo0>wT!4DuXEL{~c&u-rL+wUJP}c<S+BffqcE14z
zv{@ZW%o|B>ZGc=(H2xVEX4IE22PLTvI_Tl_r&0OvA$@b$pxOT9D6ywJP>fH*Pe<dx
zFdhSzDH_7cUfO-=-bw1&xW=7nI~E2)N^WE%<|Po2oY3#${W-zO@5^C=#U~da-V#<s
zJpOBf-u{^P#Tu7qhkIXVviC^rM2L|g`rlsiZ|xlZ8^Ev*G}m8Pgl6tD#BvYUve~FF
z-^JDBqDU~qj^-Cqs$Hy@LtAbWId-C|${kJ$=f+C4TbK=p1=dMG)RV)L?ISP;`O)lm
zv)hGDf08INu7T@E1S42>mx*&T@#x!TC%fN-o-0=Int@r)Slz-~i3CdZ4!e>|0lz`S
zzRD*(uvPmg6U!bYJ$piSg&M#j@dHqjQcEJb$U4ap4iTv3pOnFToT*z#K<f9NK<qG8
z4L{9#J0#OU0#MM_qznw1E#T{BYIhJ8dMwQy0X=`5UN`%uAO`&WtmG%PHoP6td(hs1
z{`3+xsX7O8)gV}4obeYm{8RRwCQb=IoV7iTr!h)UMfgRPsCpDIq$&pR0DF4!yfp+3
zc3e0lGIVx$1KBokJW<2VOff5r)P@GnPA9b)cOEU<vwiTxV<=*hrCyC3Czd;|gzae#
zzoU|!*NTpGHyIV$Chktf?4Sxgc4L|DOB2+Bbgite8WT<%8XB6S4Jet{aWolNi509~
zoSMYAO<Ar1x@GK?`CwOA5I4-*{pwUM2#4XgFg&b0Ss54u+j1Y6gTb}AN@NT7(5I!F
zdYJ2ZcboW<r9;EI{W%mG%a1ik?tZ~Br7MOk-{g8$EK&2i<~drCRWVBi>c<FuDzpYo
zKOTs?G55{v*+y8V(lYxgjVntxOnNwXCy3$*5a@rqs|aeVJ%s|(g1}{l+eDS>Y8uC{
z;ltyLS^C1c#XYIEQZ>>M<8X)nhv+{*zzhS;ZUAR)0AEHe=RlT&K-|-o)Yrv1DO2dJ
zuKND$qOr75UvgJpzUKU~A*P$EoW3JCrXacZutk1%AdJ8!GitWdII8+}ZCMSF&KME4
zYJuUG<IC-mD0<uMC(fz_CZ*6+%RA*BTuYp(C+!R62E%6{XG??P<3x!f%a%oeNWd+A
zYpeiQp10stuRLrCC6mw)hXML_kpC>18WvkSU2UFZ;AJvdN)9AdhIRwo>rOz^BkAW9
zsrv|x7v{&F@Q|*vV4J*mJ0RkBb{uS&uU9mbNCFgR?-2L#3%{2PvWAkO^8>;!qmu{2
z#Uy^S_M_Ifyz^momahrSeTv@@qkr(@`lI#;k+CAQ=}J>B-v%LTiB~bf5v2Ua{C5|?
zEa7QN!`o4!j)b?#V{OC=vxJ5KcaWFNg?v3O(U8=C7{V2?5z3O&f!C_hT|=E`S~(_e
zy;-awozoR+fIb+oJ5yuY>$G{(Gmm-QF5}Aesv{xsF)*Jk-Ky)tLi*hgR$ttC-Ru$9
z-PQ+ErsrRk5Bjlm*_bpFmJs$9NrIf)Za1YLXS`%%Yr6>sMyaN*H9u~UJ$}V958hw*
zmKR#L5IioCe50a3;?RWJn)0{+90?1M#3V9*fY<c6ScQ(8mNbb*(a7ci2i7Y2Nbygf
z{r1zhS#_R4lIz3zH-3^Ozg?99FmcWAX)ZCpa2G7@hyHJia&KSZ*na{~`!^PG^*@2f
zL=BRjXnongvsoU)_vp#g<c}PjvY%Fp`gaM3nOW|c@-S*K{`^Nz*wkJ?reBkMoCjVo
z+URU+IZ>XaA}ad$vrc@3e)Bbh@bgf<64uw0KO#uQwWh*74wl*#-@iwHyT7R7e0Ouk
z9+m~IGEu^OA;q;j*H9y$>ajFvgk{9%qP~#1b3=YuZ8nvGuQececP**3q7lmz<$MFi
zXS!<|<@M8|td0*bFiu7i4@<Z0@fbI(Itbrsftuk9Q1Q&DpMaSDlW%N=r^@vk&$UC^
z>kUgWU@1FXre$N!^S=upDQ@izENsOc#a&Y5Z#BO%v`g#qpbg#)XV@h^OU~vZM@IPg
zx2QvU`ur6YKIl6oI2Nr1G(Z7iUCcTvr32uSf|5*$z5_18>Wt}!)N(`BA}h2#y6$h=
zS$%>$MuVAFoprv^LYABw#dMZKayOWtTU?4qQL$z|*IV8LbBfTJ!9_6iRxnzGN2Mo-
zT|p-512VcYZDe-IHG?BgXyYqlgf2ue4>I?5-M6P+pguN)KPJ$S^YCak7U}i`8ipp<
zdOW%+i@8KlF6s9T50}^Y(I4pHz=w1eHmA}^IR`9?@agHL;p=jp&DvJ2_9>UZq(N5A
zqLxm?3woH-I!5#5=S%GIPhxypei1^r15Q3enX+jDG5G;v6Z~hh!XUuqM~7kk)5G{<
zdvn(~OhL<qHw$8484eIn=?iCFdTabwi{rflqyVM7+@hg!(-B^dpP79Qt=G9J>a!jA
zQcN`Ye4lbQ%cfr6H0}o{HGNPWJIstV7(}?~vUqA%vv_!znfsgU`OPKO*q)$Vls=y2
z+sN{k?sEO!4Ip98x(7w}<{CD^fKLVx_$zM7_Ozih8x9baa0YiSrvt@Tm<;bO+q`n_
z7PbVTbA>yQF~ijAX{5Oh8{fEL+HZ}fXJlkZ)-86SWtAY}ir!Cw8eN|s#KhuZp;j35
z4<i%AME|xu{Pqy-=Y5T;gFMoj=6RdPKsZrls)ES`w_r7AKQT0#r(oRb8)yi(Jzhki
z=CIvCXn9;zoT9=>G9sEx%PM&NTiAS$$8k?1FA+}{T4|KdK>K_v(qM5*LBbm^dGw#7
z#vgEM9TVm=cG#JG4aQmxYF{j65CaT7$k+>00%S<dNNFd$R$?Ca{wafgn-0V0gZMEg
zK5F=f*I3{9*V9lWg~;UZkv!V+a8pNs<n0Lf1Y`?>aL=9=AV5&D&CVDCVZ|_}fxWZ$
z6%ZC_Vakl;ugj{J>Op4K`Ly^ofu$$R1scpLUt@G7pU?jrLs~S8+P){67Ra@8+V2P3
zT_0+49FRZ^kln#l<7lB;#VgIAGEtI`p(Om!qj>bNKo0ZipC2DR^B0_Lyo$yntTvqp
z?Fc27;&(kA<p{N$sV*{@jo?}Ag7k3Yv=^-2w<nJC&Yelbq1Qr}fxmWk%rL_u_FL{!
z^%Ba!kZII?(T3^!gItv<a;n16bf=v#2dqIDZE|&t<Tqff{RQs=hUJ=cnW@`cmwgE>
zer}!a&8Z(PVfgeb$nuNakEWL8b85~xH9mL8WM!;FmEk{8Op7d6Q0sYD{Ah?X8ogK`
z{TL&lo_!HxaN+}{>SxHj)rq)-#1L|<PS#XQ8p<N#KPaX)oOe3YI;`O3z@V@6U_U;W
zd0lm9@FN{^`2T^uUtxT`OmxlGN%j)bz%c5bHai>Ia1^jyjmk|acl4`XNic(w^!}Q!
zNqLIX^oDA?<%P<eZdb&7cywj`idDApvs5`_45kkb{%2pO#Iil_Uel%w{5&lRSd%wM
zKa9{1(#v!@h~6A8BIHgPFcImZK1<>-&yY#sQDV{lL@`(>eDDGTjex}oPnhfQ;buWE
zP6;l{XxBXRJur8CY}fR1L)X|iubL>i6uUL7MSnRMULUCr^nD5dMEMH3_l_4Wo7x@A
zHP#4Em!#7Wk;?+RoSIP!UYfoZ<yx-#G7}#V&+Aw`e)cn6Soqe0%F;qY*SUbp;Gs|_
zS-9Rk$20ZGk9qbrQ}OPKq>=}vyonTKUe}<Yv*?~xFq}7iOs)Z6yZ(d9&tVK>@<?a`
zLX>CRI{C%raSQs?M=hs!6n+)r);T*XP3g^t*tkto=VfDBN;FSa|G5M7XYfvl=+Cw`
zAR6$8Z8w~09W&>RA+o6s1WX<Hnw)313wZz;Q#?FUp){_lHRGnEs>0zWtjvhU8u&6g
zux<l2d;iaMI=ldody2uc`i~C+I+=nqYSHu2@y`1=@$(Yw^}EZ>bjb;g0+qsY=}r?^
z<dGOcTp8J5Up4u?=X80~wj_&gv~04gq>P7(LNn)CP_vcY<&*OoO+;AoE}nGR{zjAS
z9#C?=ezOugyhL$la=0-~H6|sB@^82#fjdfR%A62%f%kroI0-W9f_K0W)pBK0<%YVi
z?VoRXuTVV*OIS&94d!zI$J!q1&Ud!c!?$HKUK061bVMcFnYnM&Y|UzLESMCkLS5?U
zHuwa){Y1!Q#sfdpU<%cFTeYILS1uOAu~s~9BtF@mfLEi_PB8ztq)KzFNXxTpm&BCK
z<3cwkn`#d~I3s|!22ZUYpxa;Z1uLs#y{1|%OkKU$&`U$TrcQbm8o`#D|1ol*F9rx}
zsDr87)C6Di?Y_Q=toPU_KP!5^E)ICtw#aX{NOw-d5%R8P9TJ?Q4uKzhEme_h=!0CK
zq52&JDkElME+vE}bQWisSDkmu?W$X0&Hogr-^9P|e5ffSGDi2O0eO@z7;?ZS{PWSD
zKTf6xf4CNVd#c<OG6en|GJ24<fBcBW4p~DPgbHQv?KMVE*?5erzlUx$Z;jk-L#s)w
z<<b57$gBrNd`-(G=C0nK&5RB&-AQfp)Z)<6HoD;_DO`w-g+ZqkQai=Q!edtlhF!xb
z#}aIo=dJlIWgj~%l|3%0(u+3^W>Eye^bM8oWd!AY9*ewr&4D=j2^>zoE7QbV#_4~A
zB7ZRWdv?}K^gkk<L!Y^dwXm(kXY{~`%V<hnH6|=j{k9i2`AgQ7dWYFEgAIZ)2Ju;b
zMvIvTKxBI5UO4kT7ZZD$?MNKGXT_6>?p#Ltob4}XDeiF3EKZd?a1GJAvRKVz<ui2i
z`i1@Hc)r09ev4){tv#Ejetb`Du!JcG>uglcLe_%&ZhKUe0llF!a5c_Jy1t0<jUShU
zvofFTmrah~5>Jj;eI!&2(&i+k7lzN$Vk9OIjk_U6J93&%3mGbHm3~|ox|ty3S?Khs
z0Jahn<%Ki{Aw1K9mtpi(qyWca@(C@3fm(YlK-(G@{Rw<5253-$cng^c;V%jQm#y{^
zIz-3g;dd-udf!a+4|i!FWw?Szi#QqdEp55pAn$3)n%Q`}M7d%DdHxPQ#kvi>R0VEG
zZ08z2ni$LUhM0_0LWQ$RZWmVF=?ggXzZQcT0*rO<Eg?lm6c<30RqzZk4Lw*e@100o
z5?<9$kT#dtCzJ&o2urFL*M`T&sM79pk%Z#B|L!eO6{W<VTMbgJREnCv&kHntFS#tk
zD3W(6E~Et$4%Od0)bA4~c^*w@))S@^KeScaFS?S=qv?~u;g4rWd5`Y}z&Y9Bp8-i<
z+YL5V%ts}`sYsJ^yuyA&fbKC5OqyY)GyWIL^~XgX4Z6pRhDO(@`n}Uc`y5@3np<wx
zb)5E_x(W4>7`i_ed>~}?O}0gEWqFhSIfhqt3o;?B2dtM8JBS}j*6#_891mfsKM>tA
zakdYWP>7Cxwyfv>{^4IokwDTOOrT70N<t+B6tE1@=wG1Wx9kH;wo5}FOmu&2a3B#c
zOwsC2hSgymFbIwEkaD);S}2#bW{jKOX#9h{Is%G!7H}kvyka~hd@rS<%RcyGbCh(+
zJ#&FV>Wm&I1-ct#eO!^6D?cpHdBr`lEl1lMLl8g!nBt59`VJ+9d&7`K-z~cLml?^6
z@JH~<_YK6w4F3Pp)xbIs#I*PP2P_YYpFg~q+9Pjzl*_qF%*FLauoDBgxd-A`633h^
zafTTw-?8D-=%FzzpAdw8D_!xp)!0?So~mk%ns=!)+x)pI_uu~$s4^rTq(X+WLYhq^
zAAq(C^R4UOs9JkZk1knAetC}x=+QWYD2;yfbr3z6?RpYpOagTU@+1w5J;q$mQfz7y
z4PK%~fpcK~o}M7;)ucIjBGRbE<-!$+6--36vB}zrC(ot6>S+@VdSz9dtc)yYme#}M
zxBP%xmNUdz&Zh;P0XvodsWK+eo=GN#KR1Rq=)k`rRL;68F1sFX@kn2?PgB^eR$7^F
z2pRq8llYD*lZ2bFidtpcB0ABYqb||@tF;#TzkU9{88H4^vU-CVF_(v?mWnNp@m^P9
z8vKMPEs%HiA-)Xd%g!oIjf58T75<QgJAMME-+{+b6>VtLfu}rb8XQsYLWS>@%Gga~
z!=)V_GL@B~`!^?%KUhXCe#nc#>{>>(7spO_VgKm4+W~xxg_d~_w0}k_8`D$$_2(ch
za+QMOXzTIaXd5%Mk;{KHH{$g7;;*tbtjsdNr#k|KhOUZ;$G$($)?%}~O~v3(;8EX7
z3bQpu`juJ{Yd9=n%T?p_^z_UsC#xd+a}h(#X>#5>VNatkq*d~BkrXuf(O*}(7X~}<
z*KvyYUf=n2LO;sT`zq<)+f*<*JU!)out|6Fhli8;gP)rw%gtrm=`CW}ou4dvpg;`l
z+WaZZvx>UFcBH=#_|H$ov~NH2Ot0u<)|Wnv*xEjpQ6Fds1dA~dCj%bbG2XuCKX%1%
z=4-^pSBLh5=@+CNU>Zcyh!O^X#MMy=E8dlE^<ejN#LhF>^mABxp!rV-3VXn3h-oQ<
z<@~mqWx(m_ZBC=}+563sUb?l`#f(vzTm^s`u<hfCrF1|OXe`4F8fk&F;CYEwn-*Iy
zGOGx9KlvWNdfvdV?v*aC{5Pobf3F%z1dQTQjoRrjbC7#o4Nzn0dnZ2Bg1OW0U*9BB
zOUgaYdbv3VpfO&fd4?1etx<*z#24_GAhM&QJuM)AUl==(dy^!_t8OQ&&~_xkXV~aE
z5S>dU`*Bi_azkvP61&VhM{j{CyWJu-=^Gr0DH2~D9yH{>1i%nrtB$`f0fHL5xX|wf
zaVG?hsgR}JCDV8epLcVf&C>Rd`6Y}s8OaQraI$LUD_<ISN3rHn*q@Mf5Jx;hBS=9E
z{l{GYS}!^g+`KF`yORCDx?wfU5`P}P@ekR_s&5|mE9(mmFy2BWwzNtY;v^VOgJX#;
z-O|O9_0c%XjtGUEZ0``Ii9NSgsLSX|DPIKPc3HaTUvFNazR$&V0oun0@4oH0gzzrE
zyA_L~<1;vq)9$EqlwS+a?VkFgUuBjZIvT?<JtH*x2AS;z*7s(F%d>silIF{N_b1_A
ziC~*d_wk~iYC9QRt}<cd+#a{H?K4YDs;x19>vlR|(FZ(`@vT*pbl8uDJXfWN{NXEG
z%iYEtNdeXupe562`$--0OWtwFC%eJ++1S#xWN&-`ZZN$ewOGa1=(uh<DO^(W2#P~Y
z;#8-?O_*tbfO0Sz!W(ix24O?L5*ov+j?)i+B43b%x3)~C1)kS!qvz7awVoDj-D7`g
z{8Ow+`g`u6iTPUtlW}g~cwt}k5%SOfDhHm^af80oue_q7YO>aP%Er`tI+LCa9!##R
zI2<KG5o0mmUpyJtkJA)wayiKW!%m+m)z{iuO_eApso`dA1Gr1@p5G^?Pm&-4@6<eL
zX|(tvoUPJ04|#P1&A^$*&Zi%J0*CI;;7uK+Trv#`sH>+nZl`)KS4Sqa1B%k*EVZRo
zqL9XD{oi-xS!x%r40URUl=+r3w-|GNpxjX?$?ah|L*1{R`~z09{HdONx4xL7BZ5lL
zC8Qv?;E`72FJdox*&IT(#sqT8tRm|GNahZDTfDOS|3_l*Zsv1To%sw$Pj@#`uCcgz
z#+a<Qq?!hIM2Atk>b=r{VmV*wTvPv%Y!H)*|1<A<0Tdig{zos7djS-}@ME2SE5~b9
z2ky=#dHE-+!8^`fkMn7w18dhYO?Y%U;zhAkn#K2iBs>yq5jK(Vc65d;vl?D2Cs)rV
z5LLZQWlvU?dX{H(lzV|{xJ8ke93S$+g}7Ch!KH90stI^g>4Y0qeKp|wu-yCb(%kRE
zRfqYl4o~@g0vO&c=(V$|{W|V^mtQ83h#X7xT2G2p`7;<p4IuG2|G)rNkyNw^#v3g(
zyW0U&Y(xZC^Q~@YXjN8${r31!Ujl?7;<>nZ<t0*<a8@s-aj<eQ6+eGU+sgyM8NDTN
zpY5&rXgG_8`Sf2Mb3DS@Jx$+@nq88s$ZvV;zeC!J?PCC()mrGw_fM;tin+FC-*eEy
zYSGMAtNQfRDgwW9_x-LTI;mX$Wl55^e}F9j^}Ieyubb`s9-sa<lM5|jek(TsEkpnp
z>m17zS9R?{GA-<^PNf9Iz9&_k!8P7#pLSkTl-o_&*s*W_%B^Of;+%J)IN6&^0>g4r
z_bZl=+1Vr9R=Oe*+0g3E!Sg`V5f$HMu|U~lqobn{!>iN83!J7cR|3+@z}9R$itjIl
zz6pR!oH8taGiEm_Rp-7y6w}mKqxm=2tR#gUZJ>itNIcLzER}iCOFF|}eKfq(qAaN1
zXy`r@{djkai<N*I$Z>3b_lA(5U3EPtrk1^h$!>ZxI5#FwDLWc3rbv@$n$BTjaCkA0
zm}XG#2@<};Q4D?svbtP*$Ur_h$U*O=v9NtW-o58=b(3l5l*=k3t+~q+hgo}APvsQq
zyr<LtBUPaO*-Z3*N`<*C6^)l@AwCi7C7JIpxt)H0skWN|B8DwljhJhzX+;c7GK%;z
zKUUF$kN@fy;9O8Nu)A5FKw^LVcmr*6sd5MjU|f9sfllfqATER?Std92*tPOwy4E5&
zc3W#zL{gFzA&1%Tf|Fv+2BX!Un8qweCBu&4Vfjn&(mSKxScaU{<~Kh0C6-BB$45nB
z|MZ=0l+1c2dsHj(v8YL@Nf4{)!}<Vj$>665LPCiC>Dk{ukv7ylJ7Q#-YB=c6AYQmt
zJMaAwNg>#h*;8iJ>F61|Gjnc08z<GI2kHCu3hw{1Zvm1<7yN2>8jHuTk|?kz=__XD
z^V7R4BKw6}3n)E*X=}NKNGzkuW$BAqkE`AMHX(1)@O)t8N`e<FQIccl-gTFw3bJ|k
z?3xRZEfg50xmQnTh&+oqE0DWqh%M)sTCU~k2D`enOKfwYxiZ9-D^&4~Cm&<h&3{E9
zR+Acch*F+Zj>sr%j}_#m0?gC|$dkr$q?7XRH$H`p29AJZuVj?c1i<@NrSXVc#h7*{
zNe`aq)h_1=_|r=2H5NsOhj!DAdiwf#rsIWq!!0er?>p(;4_X}+tbr~<2^{wZ!!rl^
zlBUzd=|VL~71$uO1s2tnWDf!g_XJF$p7un360DZ5cE56nS-@0)Kp<6_8kxJ(RSFB9
zckVI#t(BRhM<|P|(KVKU0RB=xXQ36bb<d;UfLBJZ#uY?Mbf_7oWcS3Mpl_QB*Lr{n
z)h(b;`f|E8Cbc`?)G%FPD67dI(r|4a?h0G;Kg%EPQLi$|Za!ZMg7S0peAVp;mZOs0
zVlx>PZ{U~;UyjM7`AA#>6U4je+vDc=Y3y0D!c)n!k;Z*15Cso(X%sB)iK9Umrz!==
zxhlTp*VEsOx1lKR+>Q&SaT9r%+)etwN2~akUvq8<m49Lt*tP;Ij{d9D0kp72Up*lT
zB+_GH!Rh#kzOta#5sl@-ISfV-n42}JRTX|D5^IqhAyA0;9nUa}ltqsZAks1Q*DZHf
zf+zLc#R?Y;=aLtP=+z3jGBJlME8sXMNLG|-Uk)NFr3!M7og4vfDewQ$byfjcZQ;6>
z?(PQZ?rso}knWW34(Sw-?(R-Ox=Xq{r8}j&&P3PRd+m#J$A$Rwk2&V}>V2O9bg<O2
z09=?9()dnJ6LtzfSkn`8;<7G<-hlUk{3_qCyMo=jPaedg2j+{N)g$y%m(Ao*WeCo6
zBnR8{Y78{O_+YBQLQ^w!XcgJrIf1{CZ7c9p)Y-?X;Cr}6#V|0%t;BD#*=d&j^ogB)
zZ02ju6={%QQ=w-!pnPF7T=WG2^9)nId9Ud9ihm6#aIO-2AC{0SlnP}O05hPIGa%K_
zZM*s!|9YAAs_b#KuVCIP_;Q!mm(gZ6GtAhbox$f;otl+Z$qpc?ro-hqSDfM)`aIyB
zTf6r6EYnfOfWfMgp6dpLZHFqw)i}OSmZaasc_$4r@N9&PpaD4+{Jgp(zt@vmMGj--
zqd4ctz68evI)e8>kxkVU+_Nfh6smQBmTZ&T^)vAtR`|uow1P7^{7+`gi`EzqT#Y;S
zG`{Bh_KXBP;mBwKg1o`krpqy2S_Cb^*fa<?>2}FD3K~X7)e^1=QP&de(pIUT2$Zwe
zjziQ%#Zi10-*Uvn9Ht9DS$x;5D|I#gI$bDYAvTl+LNZgO#wmUd=?={DXn8pDT~m0R
z0Xjtx{-qI@Vf_*pDbgVn5}{Q^wp@i^{ZElTE6lGgu4o}p5w^wD-NFHoC2tA|jIvB1
zODlCnVYgXUPw?o)7I@ev9X0zr5_VljqTM@TWRi@+U+~m}CRc7GKXu))lc>eU*p!}*
z&p+&e-kpHm=ZW)`XZ7>dCHvUGGH~(0R9V<8%rk5Mnkni(v(fhwp9gB$la3&9wqhb!
zPsJ+KKE74WeiP7qo&@}l6V7aOwW=HQK?-c1E@mkUK&?-9^R#5`#Lz+w?-iFSq<zYr
zXEZ~~Zd=JAoY~61T|WOacJKm!bt{3?>jP`8)ncp>Bb1>$`lVNyF%pvZyH8I{Np~O}
zRg#^xvlk;gkANZ|BuOv#jS^U%9Lf8e%Rm&J|6xCm&Ev-EyDXQrOd!-it;2D3NBD70
zx2{4V|ElNZsMGJJz0|~mlu7KGRkdtVMg-NJW;o@w2CK<jv07lzZ?NTS8{@*Tyr@(l
zm4X59M$K)I_bvjDbBy*C&^wTM>>8PxqA`9K2TmV;KoHokH8vbP8HhDh8i7GVSP=o-
zeWU=yESmTW$p~EbP}c+QEbaF<g(NrGBsZv~ihaX*C?41-9x#*4{rFq-NT#i`2YVP}
zA;2N6edIV-uF+?e|4)eo%p7BMRtbQaqaR{|?ncyob(@K6;B+kC&I35k`ydcdd&%_|
z%Mubn<>u`8{osDnDMkt@8pFzhc3G&5@5N5Fr4y;1oNXL#m;}rvmZ=!UQTA#|g1wSD
zGI$P!5;^oyBgB_PelNHrME`aG-WrKt&|4(bh>=l2YK~^nV!uMGx^P&WB<MDK)p6=f
ze>gbBs1Em<QMO?^+3{mxguwh!YIV$j^wV@CIR^*o=D3830>Wy)Xu`s6D#D6p+(TC$
zIH;emhB~Mi8imS=)S!zZRymdpKlQZ(?5WLXm4OYWmQUYYeF#oAoD>vd^sWt<O<0ZJ
zlxU+|j?dTgmT3cNvoP_LbUi{;;+gXP81OqhsNG+!EGnUx-On>wmP)V(JR`oo$tnCq
z=qe%@be>+u5b|lNzC^EG#K)s=ww0R!#0T%AkHSn_2&x@{a9C3S-$l<IupUc3MD@S5
zHtbcim2%O5)`lx9-#_v+HA$VZ)tG|g1a2-<r@5CNyb^wiuO4HWmzW<dZ$R(KRB}4*
z_WH}w1KscE+V*fupU<vq0nbhZA?LIFA<633xWhS@2?qwtO|7-#)@Fe_ec!aCIV$F`
zC#j85E98sU!WfNUn8f}LB#aOk4mx-!a0{7tx8e~fBEEs(>)eJUGT%bEKx&G{sz)@m
z!GYd}^20CKPf4-YdJ39Jok&^%Dw;+I!Vlj})*~hl0LG?70z<DEtIvz)C~^1WM|og2
z&R+S>ZGT8GsBET^p&L-EJfBlsfbhS|Z+C_M_65OqS9TKuT^4W6JAk{v;yCxTVa;1Q
zN!uZ0&8C-EA4r^g9;cG$Lgtb`@p2kn5DKM!8GKVCw8HRb0P2Ds>5NH0<ZD#CBS^i)
z1u!F@)y|X*AVgQMbf@pb7~8X7o*ymwLzs6%X)7kC09<3{?n;kzmiZp9%kcyNqXL8*
zxhi}*cq;L5(ImHW*~B|0K9uFSl~t!eNSX#Dbaft@zim^9!2edPJ>>suH(4yNU2D~9
z_D*Xg?Hv#zz5&14o=$KM{Ww@GH=b-Bw~<ZDPx$b8O2&OuzCAtZ+M*LO6(*f?DB3o)
zydp}%beeNn-k{*Db9N?!-y)K0EgOplV;%gT-M{(fXRzlWk*6dEso@mZ>~o}GKeD3H
zehTIA-MkJP0F=-U9A-FnLbDj5@G%8Y2c)Tfw+1;RqCv`s4uGVLOq%lzY5Jz>upDMP
zz}Vg}E$*^#GkLyDi`%wcBx`GTO&<U=e9p1wvUbwwBK(fuf}POwZNpZBby6~%;7#gL
zrpwlM+~AnyzdM?SW=l~?k8}fIk<G;^HBhYpa95AV^4&~@r>3^HOlzg&&Is{X`Ddz?
zfoGCcre=HbL=AxLvd4yCG*}79>CDBegAnkhfRs;sFWoWHgpa8d+ZH&98$RZL$FHo&
zqNAh~XKwN~vT^Ic(QI-s)^;CXj}Z5ovjg|rN9pv!;-8B-G@lqQ^B5;uz2?=s=iaIb
z<vxX5^Gxpt%&AQa7f~Nhl^m$#wyUcOdSqVmIn4?+1YWm?NT5Ek(q4|1RUoZBI0)Dg
zb1P|IwMrDAY`Gb;*6{dUG<sR|D<2N=%<=5!sEl_=`rsX{@qfkT=T91%W93ADvz6W2
z=C>Zu(p(}V<>t02KRiin`@nz&;M2x!R#9Boy=q8eRHOR6`yKHixwi)g44BihCJi#7
zvW}-jcUXzR#+PY8In*6WEybE-Dh|DFcf@mVAK*GuvV1d^L|&YdsP%8X?*MX1>k_&)
zxkXz4js<fh-d--H>Cbh1J`d?Y?QsPtL+@zkE_#2=w0n6tG!4Db1x-_Yh7t%=jmqF`
zoG0XiS|aU{2&5_6h}3}Bx;W!S6>hD1vseRD*&-9y*qx4^ZgTl(^5vc%*cLzhbFK!{
zW~YIMLZLIG2M2-2an&mM_16UK$sSkIAi0`nlg;xb=mcY`>+gh>tGD@*ab`ci!Z-n`
ztEI+UR%~+C0+7b9Y5uYQEW~$C95n5rP!l1fWj?z6YgVx0K`%xCe9oOpGCo|>3s?Hk
zNIG&`_rcG(5+)PxMP1YfBkRntOKQ_2M2|!;&L-;EzG2}d4|r^O4zpYw^6rz5Y5g$b
z{j`1iy_RUVZhC(hsv;*RYBX0{=S2sMnv_1Uo^!OYs@8vKb%=)1h%x8O6z(*a0K!v%
zG8W)ydP`u16j6$={F0SP`#+!j=~Lisg})o+Dl=ep(TFVne1NC#m{{LOJ{n%#jfpEF
z>Q?LqNIRolnf8SfM~4OmCR$CdS`Q;Pt8zW^?&c6AGArSq`Q!yDGhN@)>omv1!opr1
zszd^PCm2b7{;$*Gws<4V>2F!m;)K8icG)!TGV`#ag2%0@o14thSk@%Pg!;%;{L$|>
z>B<dz>DbwO@%q#D$H*Qus(9k9N@~#qxf_ufd)vioG1m5UpO^cDnP^9iLz&-UKv5Ly
zIlOhgRLu4+OztO+EfWnqHk0q0!qir!$-BoxW|w>wbZ#T60mn~Z5U@{Wc$vDLUD9g(
zkiMdGDN(!QBJPc8cX&(kPk3{;C-85z9Y!3P@|^OO^eXnQ-SH*^6kTnTTQX>LaAY12
z56{NOe}$>IS2er0?^u0p^8Y0Mh3H(qBZPJT;mvM(bb0V0asFXu?&_<q-bTTaHlhKT
zz1YA=_Vj((^ATc!gMe*L%2*vw;Yj`tMqOSKY7~y^=!Ymy_i7}d?!HMWGn$wI!a7Sh
zo?$H|5Y^dhiz-nFzYJCJQ=R0)M#cJ%XTl8&kpiySGN>5p>O2%9(!YWlpPfH~ngYJN
zd46dcjI$8FbiEdr@3tI+Q=eWEdEq2<6A*`3L9a<Ofw}kjam{!t$6B*9?V#2*65p9`
z17AAFY9An0!tro6<on&YwVz@tljMmgLOR9ABeFy0Z=6!4Ql_eMAbNuwQ;<I*l3jJj
z%5<_=6WlZWbB(?DmUK8|XK&9Euk*;Q4=4uk*ejU~GxLqdEQjQisHM~1<T!@^KRFIN
zSXu{jkYknxcZF1_rW5K|8?wwi&@(Af{=}CIoRWmN?4?UPqqKG1Fn{;%6EZf}8GR8c
zzcmA%3GR_*Rbb&;w>7{|oDvX$^)ULKf3WJv_9?NsXz!&O_QgP-Jb(N$&CfpTxy(+6
zS4imTNfB7PTK_NYB9$-9<;Z<|TmC%j(H#AV@2o3lM)!grmVW!v@tS8TCW5`yd*SHV
zdU`t&j_MPmrHa%uSDTS1_Hfw&i``%E;YX`C#XFiu4|YbzpN740I!a?5Lu0UZe_}^#
zf8{v3=K~!<rbk0P)F<aN<0zaGTI;(y_wUUEs3|e<!AL}ffUbNJ2*)4a+L(Z@7Kq@_
z9}sZaif6vZ4}#qS{RG!G{T|*8K%bKbxE4m8^svlHZA)!eTNi1iZ}VX!GS$Axy_Xl<
zEFdxT0E8<CB}JJRhYat%pUx#deR1p7O{|*|<+8r2Z|{HeVJRr=kOy_+cw{q6WwG?V
zKJ5vzc|YAr4Tlfn1Hn<674z=Ugb(S<2*4mF_>{quz+ttpj?rMV!ei}L2|&On(@bUV
z@WS|tliXYZ9eDAN9}~Ym@F^Ij(73HLAT5Fg0@Ak0o)EMHpb<02r+n31P*A{TyUN!L
z+(<yp%BnP8;Z#m7s*_A-{dVty#k@C=1YKL5Q1vAPDoHbGD;?L3z=WY~GQ!FT8Y#_W
zK%3otqWk+2(0EJ$juf1VZ{KRW)3i{!U~`C(qX9FOuj^+0g!3E>)`jS#QW;h8l-i}u
zL2EA1zupW2;$LG*Xfn-ad!&sCSl`^6i#zYV+*n?Q>ha)DT4<zQy-COL>Hx`02O|RY
z%9Xg)d>UIkS)Z&64U@=jG1JiU{~>@tF0N3H=4ZKb*BW2r;l7A-@8hpN?vjeO6;o_f
zVX7?ekB9?R(}G!btdI&ImXl<%f}bq>;{swGqmsp%#o=rEDb)SL*sWT0Tu0LF^0XkA
zohasUT~<+A`kxVlq4tmrJnMUDBTWLp$suBHZm!7()JS!lL&i!ri)*5ZcNRd|0c4Qs
zq)6-%QH0;y&KU_o09r^61RN%^PovxIA`^$zd<8)S0dJx^km6f=GGyWyEXN>6^5t0>
z^2iG+$|aU-RvAFhYd6YY?s6ZM*L+;k2Fxi-iW1e~X|_PlUkT5RH4ecH-v-pitDAOx
zKx?6<rbe=rx&8IUBetM#47f2P)_&+|2>;&Q%r$`-I5lcqLzeBj5#Cfwhk%-K*Xunb
z9`jcnkCQsc0}M$Y!<e06BXefv-vhDkK&d~y`|&Vr((dKy9zz5)UpyRJxU%&E76FeV
z9N0Gn_Q;G0Ku0JkkxoMdu&}UOTSJJV&>imx@LbQ7jz{HdC$hwONZw4GBb>Wi-yNFI
zx^ZnGG@JtU!)P>>c<S@xZ7?n5$v?h`51)V2FP^sCBe-}654QBf3zDs9D5`rRl=Vmg
z8LOWwIn!^8!y&=_QyTihmiNV5)UC|nUj}PGKW=nTXEav$*y06pT*R6x1}~%qy~0oC
zj$E;jjwBzIATSQsM_ejjOi+e~jU~Bj%I{mv?V&xR|Lbcz1H7GtB)-1Z(94~v?Yns=
zN1on!fQ)Ad^4$qks#P;VSa!2C^=sas@>RE%zB8kAN=Uy989b{#wUHUwDnj*SldI-H
zw`0+?ITgDcKvq}!mEQ(5h*ZMC0cF`~pic(WzJx95s-VHQ*x>6~H~pFXh*i}{11n@C
zTn*3>Y~UPcGK}9j=#eMYCHo_da?<$CRf|kRso`Mx{BZr|i?9Iv5%L)jxM8kO1E@yT
z>5G8jiKte_uz$0FTvsbsD~gbxik*E{9iPb>qT821SqXc5UYxx&SOp(=9S^BQB6i0z
z$|MPSoXiH+z5}*Xb2?iLiI4o>0Ncsg+dL2I+o)`?tj2VrBK}if&{V`q;CQ;DbP_oS
zp2$>5m<NM>(NoTm8EllzVh`s)rR4>fnv!EuSi6D54UozdCge;(7e;wh<Sf+@T>bjW
z`Ed?@u3Zb_*Ej%V@X@tBxd>q$yc(^xpPz(&iJgn-=h8HaY{`VI5#A-^-fOcEcQBP$
za5SlRoyJ!QOZ<V@Mu;u)Noi_71BSJh--S(uust$!r&$o(0Hb`Y`0wy08XLUahd_|I
z@iY3lWao$@MLwg>Bz_{ciiI9ukYdePjz6PrHZaf#I9#1Ar8Q1>_L#XzhLNJqlG<D$
zmPQFgHNv3`F(JWsGjSkeloxbQbH6^kzc!mVx+Cm1H;mWUW*JQ;8e0g!F$AjYI`YnN
zufSA9=aJ3t)0J6tW%!=ZY?x4$(!_myAsdj)#Pzmox?wp(Km7HJy>O5EFYEMYrc0dw
ziCU@cz~8R(53E0;R35vh=Jv?l%;~*uCd10XfO<)Z^;KzV)?UfeX8x<sTtO+H{R&EE
z5|S{q`maU3#n)Y?#-D!Zhj+9$mZJe)f$BPWMVYKKUOi2(Ug5pn7V~)YC(2-0!sg6B
z+?kDAEICs#0R~II*MAjE=|JhVs~wG(1wOJ$b<<N+ilJG6|0_Vz#ydmm^&v2H-b=RN
zmuig1DTg8)qD4Y{xcj!%Ose+xmMW4xD+l%?`AKZ*zt!SbN<ahlHOl~=3GvNy*)4gV
zH<f+R)V3Qn@G{xJ1vqS&_q>3)`sE#&uLpy`9{H4JMB`VB0?urtJBi1ms8(UP_lraw
zakJrV8ZLp;{kx>K2VXVgxo>a}@Av66ZQ@D#$OVYxH;GrV>*@HIgAsqA?5;0Z9+-`l
zaJpfh%nkPt$L;1{E}A*Q@}3J;?)7g%N7bpzOJuna`+9UE*2MGi{N#$`bIgDyWxq@h
z@z@GS)D34~YTal1C+`0<efR4G4{B}wEfXx2^_J(${*&jgwwZu7e0WnXl*26BgA|-6
z7QAjHgbMDGcN9WS1t-ztl{K3W<}Y43XS@3i^>B3u#qhrzhPNb&JKi%!jpPTmI~x(D
zKvm}PF8<nKUlrY*qBNPXRg_4S8n&B|xfyU%8E371?kq4{Wlgwg+~7A6kX6`6xmh!3
zJwV7JUq+|d@iITdAA5%)`L^7EwCYa&;|*=6?p8yceV=Y|4*lPBsumi!m(DCkT##5^
z7!cq>gt>k~xIbMgL{wbc{)BkX>B~YaCV490dblJEgjMZ>?>DZqMD8jSz8)1UNG)0C
z0<_bM*cV7RP<VWi6oCx8&&t0OE&<oW(d6RQr)P>QuB^-<T8w!E#<;SDGV)JD*PXiL
zm#$GsFr=<IFk>xx_KJ=I?h&kAS2@G6q^CI3@x~AAV?Wv(90)Wb@<Q#!mjo{$y&fiJ
zoxYUDVOCe|b5^AEFRr+8*I-eM79WCa*-wiAgcet+0~zy5ps_XqX+k1hPQS(1r-b5a
z-e8*n%m1)0|F8?-SxR!rSg2jg#e!(_mjDws!sEF9`v^ck^C5M#hX@WRP6hk1N&eH_
z7pg|X7F^vZK8YUfO*^2vOR>-jj!Q)FGS}(g&a3vGxU>h-zi$E{9HrsK6>6(j-fZ(R
z*i15d!sNHclQ6vy2?(@ZSNiCD8wl<TuD{b57;<#YOZ<{l)40+Ewp<pE`KKww3YPFL
z%mZ9!?}lP8B}^vW1-|K*NPX5>QXAZczgg0-d#$Ubkb|+kXJ<_r0Q)m;`oRf;ZF<b_
zGg!zU&N|4kh5d)iKn4N;sS?2B!b$oFfRVmECwT%o?p*GxTe`h!feg2T`9Zo4_gQ)g
zD8&ma;N5)1?^W#f8P7k7`7JmrmQ;-gVA8J)tG{Y6jrx~f-pM?W7l*PJU(GKtv?-zY
z2bbo~aQ<p+fXtozqs8+lkstE06{MjqNJ|xLAF7lvWb%*Ai3EgdM%bKvW^!*10qdLe
zDoO`4g#R_=he&*07;b7nz`sjmJr_^JCUtlGkQ8;Na_!gpA?8aIPQr{1Ir$J1t{~ys
zE1F>{Gc58qTtBGF99B1t8l}W1@z)T|BIe_d@T>rm6P}eh9n1+RqGf#>wsWOjQwH|p
z(naOFIXxB#ltq<6{jiY%G1mDO9VD6`!{3vuoRs+J?8x$5ixMLiNDrP+upcg#(qt_k
zZs8hMmy&O-#lNY6zOf|MM`@dD6im21MqK%9Zhxfzs(uMo^U6{C40=}%Hv&}<L;j;9
z+IzjAFNjg|!{Ei24qc-V5uUew11^>=Ctnxa8VjrhnS}pFKQmCn)lETq*q8Gv|9yIY
zkBo0%pm_XZtiMb#d;KWnb9nxT9Fa8Ob^g{gQ_%5>e<L3F;`mW>$D1Z*{cwYQg!wak
z5;}(QMDxJd)B|fd;dX>4CFncniag;W99AG4%NnuAlBkIuO6fI$7^ootsbR!EORrjC
z6Vd;3;)0NZly}?a`>{cP$m|S_;g=pGW_i@bxgVPh&;t!9{Qg_O$7nvG9Rm>F585_G
zd0yJvFalq?X5Ld+>+gSnTnyq@&2PHwB!OT=_DQJ)+Z6XaoUhX!z27M^5Wm53dGwqt
z@}-YCdiZ6SIc!Tf68Z*EPI7~7g}3dYFa2dW{GFhYar<>KR{q{2o>v3FVlN#Ab!WQ(
zXQItxl#Ck}{!g2mj%31ail#OGGmIp(Bsl0p?3AlLF^b9bi{Y8+%(tSLSrg;--37m`
zU-(3OcW8ic@IW(jH^5wdtjS=ZDQv5|kUiR*5&-EenAd01{ypsE!Oymcsbq@5K@(8{
zehBp#E({)vwHs;hVd`+ZjO=QsyHIHCsu$1?2-sbj((oQ8A%g~lBrq&7*y19n@+!{e
zB8cz-pdhc{iINK0mRV|0e7!!7@xH#!Z$2U|$`xEm#;W8S{P8_kjD8jTfECkiwTegB
zel70z3t52B2zDIjahz8CzoNJ3ba3=wQZGzAlgPQ?$~SMDaw0-Xvv^_!aK-RHtN!+b
z-&b<v(@a)MmU~OC)&BDuCesZi8_K?vUHn&gCCF5O^@44>X-Jq2@#mYqHK%25f#x)*
zA2ASI;ZT*omLa6p-{7ylNa+47Lq}J+Z{H3>5L6!O3HkmeTf4pzcKZ`2s6c^{$RRCQ
zsa8Y9u5g@uK=)@WiML?`A3`n^EvYWcv}N{8^F{q4d$eESxiFw|3EY$M96_l4zkh8s
z^a#=7y%Oi7kP7)JA#3`o)-eN4Y@8zo*8d%$0_qAT;H8zzI~g0Y+Hm~=DRn|+`>O(y
zWr9uMue`LUe_06b&qDYy!3^*`J!n4ZhYGO*D}wy?%}~_A%if?%zEUeIDvC*$T5hEx
zzbzfbGm2k>!ak296MO;``MHvKUVqiP;QyGLK0q5!{!#0sKC8_q(Y2^f2cKpP^Db)Y
zm{LV~04JE`k4L25=k;vj|3rj$&}5|l>ji3_9bUzs+{rUC@@4)%Tbszwb<J^CD>HTj
zAJ1+XU}U8$ui6+VpG?wuLsL<d&dj))zDL#gSMkXICgTP4G-%NKfaK$VEY}C0$7?0T
zQQYWx`<LS%#fQflAZrc*!n3x)2&p*8+nE|7^v+|9>?a+?81@Js%BHy!Fjs-P2<TRp
z3YY8Vve|0nl)*8>6#T0h-Zb^j{@XqIcY1wC&a}TX+vc`YO%(js?ct`=fuZo<jR~L_
zh5kNw4^rC8S1#FIR}t(`{yytFsCWG6;#zDWg8xX^Iprpx|8(Yw!QZ|MxH^CV!2~+f
zrYHl*J@6UTh3(SLV`}KVVxcVm({2+*_ABK8l8CXsIH=jT6PmaS8c_9ur^%-wQLZMj
zqqTB6#FGySP|5l9_8*Gis$q|w6Gn35n>u{>A@~*n1%t6c{_|*rK#}D;^64D7A!B0g
zN!+BCVl-n*5Tqr!Wg_G0^D#X?7G&NJrPRt^U?dqr;u&imh^l=wKeC&uCYhegVE<w?
zr^rwAZ~69jCx65QbzvIA-yUoX-FoHn9<+z-SL1#enB-*J4R_=c_<61#X)x0iNGf80
z>}yZKyo-|e_xG5P0plGc_D6rAcwlx7M^yWO<D56+^wL(4E?)Q8M8!`Vp{R+>eHagO
zy*-;3o55eq#F=)H7^#X?2HYBDxcS;Xs>X1AqN0$H!27!VoM9_4h86|%F?!OJ!v8Gu
zu@vy=7o2xdm27#jO}{=lMD{y+MsMi=7oV#4oMT0zu5K(O8j^Bv*!gZ*;}<!E-x^n@
z)<n(&>$$wTgH83#eiJrQl)HY<^=n>h2uIQu7QQ8_$mEv6?_yi_4F1j?3wCD_`XGA>
zWrJd*|Bc_kro%*$$Kt)?R2}M|zWnJl#P@6PHxu3B9&N`KRW4gd#?*c>?bQC+-rkDK
zNaHl_@yhkkPcKeHW%Q|=R0Wl5p^#E|O}3YC#ju$G?Ejo>02L4e1PCSI7KHuG_<9?z
z-2HTs)kqQBME&P`7sYtAH)n3GkjyOMTdp+y-uOyfp(NDNzz_u{xxWgpKM>#4F@EQl
zkz586cl#K#GGmV2)w<2TROHktD)%`W`q=G#!ETYs*&@>z_&6;4-(<i_L}+dUOXXa(
z>JtUL1qC>eGUdqSHPc<{I?$pOA1pXIRC|d%4)>mR-9Ps+LOfmbUusj%@|4^lJCBS~
z&T3rsiT+v}*d!lycmLQVU}`jXGr=wl#|y|)P5U(h#@=4-=wxuC@su8X9C2d_P#2c5
zeNRqDgKBSs%RFpOGF_cfF4v5A4_~)r0qkO^uoRNPkCa}8mmlBCx$#92x(;)Otp`GG
zxmU|fKOF2c9rT}$3&%ANfc`fifFMK1_%;t*ze~XPj!)$uy-C!xil%;rwRxgoa687_
zqMx32KsS`VG%a4q?1yY$s<=htw0YQ_bR%tP#mdDk{;vW59Vm=gXC#mcC%@CMu=dv{
zer3uZjopM`%gA$`k55)s?{Z^w!0%>8-;ybI-`C;X?6G+!3zazUJ4axF{5$*uo+3Sn
z>&Sx3VRqviEdkE^Vuo?ym!71XZ>m6-cw#v0R#+wUePIQT*$~`St<8Xa{^k1_LyLk_
z+%IeCTg}>3M=YddKp)(3kaZq#P4xH+4d&=nw5_$Nb=VZeA<bJP=%|Z9z0mPA-h_>S
z33@41-mUAE)I}b|uWkA(w4~#t%nB;OR$0H?EMEb?^^4eNkFn08Za;c$dnU(YI<1br
zB3CIHXErinC-J3iyvg$XSlq4SMY+i3XiOrt8j<=st#^ebcQ%sz!;Q(u(Fdkqj(uNN
zF_Fg1+?HDX*G7%rBY~Iyk!BZo4payEVjE@dnVBR{dfbJg5pauluT%7;=(zVQ@ByfY
zS)XWe1z{(M|MT_$OPp{+H+z&NY@n^MQ})GeX4)ZspBbN={u*Eo7DNsde-VRhVtGlE
z4`=%Oe)uJ$sPk?-p?SB`ALZ+ruFh{v^qm?>`$~1`*ez834tXJXeQ+?D9=YMaOwI6`
zX86*zHkQ?Qb)?y98*Ih8&iQr8Wj<E}fs@a}OF}IJvS4(a;5-R`p5NYBwJR&lSj!(b
z_^^+LAf9mbbj0f_wtR{-SbKCF=Oqciyl)G*Yb0V*Ta^0-f|lY*7axx*1CzqGh{aZJ
zf&(I-$qrr1HblUOP7P}BY$$FKE&2YII7&%Cc{^a=SAWD{f-7OmBFj@WAv&(QT!W6!
z^m96lTj|6f`+^YhD$Hks7uPJ3{iJU|%!A-8JKS@E<UXn`xLQR#bujKL>@yw>zt_}s
zEZ9(Px@>Ws7(g{WpyVC6m*l;^^k<E)k<?`p{Rj#~_Sny{%pZ`yq4=Tw_Oc8(bfMMK
z#aro<30w)=<0xOdPJgCnXq|n;FZ&Hsw4Ja^;PlM##d8?3p`&%SJYzd>io@q|{J^L6
zQKpOm$){HJRx!pK_m<#?V1W&qbgOD9UfF)D^IY;Ag1c^Q!)#|&H9IK5Y{@u^ghjLI
z@X_mh;Z}#qkSdBj3a~;{$Il1@dPIHvrca~4mT+$M++TQoPH!pk?ukERyzy652ZPf0
zQTGQ|o`)vQl2H&UHmC&MS7XuO{Ns#g_V%CUYlKXGya!A^-Vpz#jKB3VI~_o&&AW?L
zJ9v(76d5!Hhe|RN^CV|lO}O*`<^Bvt%6q)OFc@MA@OIo-YT8_7m}>k)O+{@Rl}aj1
zM##E+`QaqCreQ0D*wv1w?&gBv<CVGPe^~&xgEhIK+06&*sf;YIy04$C+ib*g>Znco
z6bRKa9&CCyVv+J_a_;X*Z?F+q-2=tF9!`gUHD6FYFCSw|{aRb|I^4(Hi+T_Rlvx+$
zEF<4qfnDYJ#q1;(QbNzOU(fy3!OH%Hz6HtlsXnuXY5Y-}e(q9zMO{SS7Qc*HM~6sp
zeyuel3#>alhI}f9^fTo+&N+0`H$&P;dTx&0{pY-&2ZujZW>tQ;-BXvpexWd=q8gu$
zqEHbTvgoYa0lP<*xEG!cFhq;g;u>1zJauGy7TSJFg{-*`vM(%Do+SL6n-qfoK=hG0
z&$}ce_kCno2>FsOOntIoNs(N+e1RtH4T+t3fa8PlZ7)22P{9awSo|aI+x9XkWtLAv
z{-~m@=gnR`Ho>~Pw{u7{9Pmi=y4W<&oV42X-T3wKW#)3{?lKOHw;q?t#bc%1+fB=K
zdA?b{67Kf3?fh{h;3#pXa)pJ~_E9TN)0EKXBFBZ3zsCA>s^ReUyz8=m#=`Aq%ugLH
zRzC9H$gwTVipz$M;VdFKTWwD})6yzI!zGm>eQG>BIz3InHv6>qvw64zwRaJFDYp&f
zNnT4i4=dxZzlsBFmm0ofS0L^%m`~`>NNaf=iC;cVJdit2r4c1Dio+OoNJ%vhd5q%=
z)ZUbhK(;Z&?711uyImPG8p61&=2w7wji=;U6nLQ_U8)vhyvV7(<v#;N`}X*@d+LlD
zl%=WmruO#*_>3GSl!UC+`HPhl#v4Y1u_mbP8;v1(*hbskMXdcETvB?R4$GaBhkyfh
z_5%shpVj~S-UDSfDwU0}tQ(TFNRxf<BVi)n_|zrY;JBaBBFjqjmXfwfZH%dycJ`pr
z;y*@KW|B^q3nXpCdAH@Jc`ir0*&+${hkoN0;9FLhvW*q@LP%{?tXa(R=g4JN&@n=}
zSk=_nEW2$QhRsW5VpDoIzt%FO6|GbZn0AFk(QA}>QP?d84FZg_r?cKd`bvDI!S%dh
zDN84rDpLNuSY~0yxIEe|s)qCB(yK1#u&7cmtEBE`zH^@YqhZcShQVK1iW&BO4b~f(
z)+Hs^X_z4>AGQy4#UGkhCGw-h9$l&lN5e)QB)_laejJK0c1f4eyU7_zvp<fFOme%N
zh1(kx>qP8J8RR4XVizyCIqvXO%|}0Mp;2u@I$Y(E{1Z%l;9N-%Y;m7bF#Y&sd0!!t
z$Wi-{G@Br6eCLj%|2O_8L1Zq$<#$^$joTB5fA_V`+gaL}flHOx`F56aH<j{6ps&tN
znYp5Xg@YSrCg$LoTwOSHM*!eC_6ZAn(faXkGKmq<Y~X+;a8%han}cNCK8eApQ5&hM
zS?-ADRPWjCxzJR%#p9?f?^lSuFee?p`t!X)%1JZP`vQZVYH`ZjHEV4enM00!`>xfe
zHJ^%=gVhG_N#mg^`;IOT7q4q^CD~M5Q{1ao`O9sl`ViODB%Q~jX*JDSLcSNyHb!!C
zm6MwpPTJ{1cG}5HnJ5ZtxYq}E`U2f)gT;>Rc@4v*c+Bl4zCej|*wbr=WrVnf8S^#T
zHRZ(K6u?<slyAhrHY>POEKc4F^+kncY{7=`(*hK~E#%xoCM#6Er$>B;n-)vHcaqW`
zp}L`25E=9bz{=YF`<oMb;Y9ngEIcbM7h%6WUq3m1uc7FRD-jHd!|Ydh&a-0KO)eg(
zyf~zIDDDSblY9@2rDK}Neuwh>abT#e5|UmSXg5~bxEYFfeG`;NF)fpsgBbgk>mg(P
zYq_z&(0STXEBO}QNcs83O?hQ6%U*t?wo+DL$nxeH><Kj~YuO`tt@H9ofd&0y;BWvS
z<4fpjGYH~N#}@ToIK<{Le^0`Tq*F2q@cH}_(i6-WDka>P_;IV-l_~4oiC?XGaY)Yd
z+LC4Cu^?1R!xkYw+~4279kus|gL3eygvA#=a>v`lUN0x^$9bQmjK$|oiU;n3S&I#;
z1<lMyGhy3tLcCrE4*2~&_ok_AV^v9SUDD^}20qNa4xbB6y_*H<^0kxnj)>>WSdHGm
z0t=$S%c9D;da+;`b6V$oo0qP(3~9vEPm3&Dnf3Ixo)!ARRYWr<9QTj2-=UT}te<P<
zzYPEC_*Kbg_}bhm^lWKtUYC}3u*Wd+g~l1Tp3U;~vuI3FW2gRc)NOYOi{4mYf4x12
z6}6V6nQ&#++}Zx-X2)&*;a>aBMkZQR@7B;hvdkj^oSs=~-|;kW=G3CP-uLO+BpVsF
zX}O1qBP@*Pw>v<XLM<#iP3!ZgJS2ENnkk~kzYh)d55<jnM<NFf-Ww`sj_Qt*iGc2p
z#4P0h;T#K@P%hJ7Q8`rz)ZgiYyWHCGn9tZJla!T=m1CQw-@Dq4DZICtzuHr*Z%4Q}
zhnLiL3s}2`oGn=lx~e#)y+5i}r0M#f&fP*Ld7x5TU&O$mfA!5FP6*4XxY%XKetci>
z4oV9&<ID-9PXSxb;407#73!S4&j#DnfUcHUE33K2Jl1>1{w(_K0(GPomlJ>BUS^R%
z*orm?04Rz8NZu*GMMefK)meHhpoL*!fVlJX0XV#gDnp56E_(w7=k61C;m+AQ<`kLF
z9Mk>@)^>@uw7sT&QrQ3tWdZ<Jv`UPEIVbWlQmku%o4vwaizL`ajC~Q0%d12XHgQtV
zE{i)#2oEasZcr>bXlb^yH1Q%FzW?CB@UCt&=L$J{oCi_H7v>HTM6yVNSKQ4J!H`?C
z#jkwWl(<pxJDocbBi<yK4aF<;EH57&r03E4xO^KYJ#Q0YY*+R@2=PILnbpDaIl^yg
zFhG%0Ll&%nq~Z3flWLE`3wm9Erh@l2o9$T!R{Kc+Q8VIk_bb%wSF^(`-tJ<XxQ6!n
z5}E^hw^^&i-=3B`M2nw~%h6*!YNM~Se$Jlg38x4|!jG3{DzRRV<Y=ECUlqoZ3@E<#
zauqc9OH@Znwzx{wWFE|67|I9YE~W;S$~5b03bSAvr{<~R?8MAQlhb}O^?8nWl#XU*
zdy3@HJq`YJJZ}5Ysv1{inU2tP;%PjN(`t%DJO8Oo4oQd8(@UF5t;&F{v+mx8bHrb!
z6An~XPg%EC1zlycN?}R5_xjLY0ifDhSM>khUvbQbTA34F)MKXj>^U9aUh$y~Cs@^k
zEmMto3LB1f;#nw^$GMYJO1*GQehLN#W;3h{kkrT13GAX^^SiUrX;gy)_ke<7b66!y
zGc>w_bUfeSSpipo@O-KF?zhtjh9iJU=W&*@3l9%>qH~JfDwQ=cp<uIGKz@_Qr~_7+
zI83gVVrW+nYrt;=hYkozY*^V$?>R&~H>bf-(a?JD00NBXb4!Ap<8~J_$lK1?rtcfi
zmZ|g+Qs*S#qSD(xiEOv=IImB7sezc{@M3TtckgO28OhTJecKFzeza)6xTz#*<W`Q(
zOrDjLoA#WtkDN|JEi=X{p0k@7sFoEkH}sQ6Qc9Y^PH#fZcuo6Xj%j^xkjC&PT2I2W
zdRJLK?2XGwuv1OnRm`Ne={DvUI(tudlr29vsx4BG3f3vO=VvTqJ?s^crqyP&d39*&
zN@mHpHpuQ4Op^>XJu?@3&K?IbpJvpLHwX%{_^5YRkN$2q7G$W?zS&{z15<kZz=m^u
z5aTe~9L-7C5Kz`UfBPzbbKD$d(SU#JgZRjpC7Y@nwma`Mle=}3VXVuAVkFz*@=FhY
zpQKg)1s2}av8{FMC^V_TN2lOE4aR5b*7<HLCxWK-(7w$*s!OeSM6DvPrm499G_*}~
zOk=4XNsOB`YpHE*(fJpliaopcwYlBe&{1AS-SE9kHWTz+^A3CpbRlNn2e9%87Ot#1
z>{|oRVW>TEhCQ?JKmsRR1WmRJu3-ewTI^Op$euKm((&>DH<r!~25^d;^29Lh#fZtt
zAvU+Ry3lsQ%XmmgNQ7WP^AsLSFx3n<s{PT{EtTwomVa7<9tXd2z{xWEZNBfU<mHr1
zlc6Q{g5qgwVx0dDMQv6tEJ-WjVT`at>ozCV6cT<?@_9&8KWVrKM8;Ro5yaEp&B9w+
z2-KYPaAB<rCSO->Rk3RD?7rWO<sq5{)64Z9UlCvX(M#zbmcB)gc1Us+1-M8h*GkWv
zm-0IGW!>XhYvzD+x3(xW<>@eJ)+RN@{-yE9lfr{iWMfgtV9Et`zB^d0k$Ltv%)4)i
z)CZJklu5G|RL|QlEY1(@7Y(W!`r8<@&LdH6oQ7&Q>4Oz3GyBKQgRxE^L^9N^VOyg@
zO|m7#*>wvi(r9Zz0VEz@YV-D@C44g}?gYcPvG%yb+l$l90=FIr4E3SDKh9$9=`CC`
z#NNCK0J+?+sL};ao<PuR1wgC$q~uI1{UO?*&CLn<<4K!`D-mf;VBJ+(E63;iJY2Nn
zV3<HCI>DcUeXoe0pcAjQqML^O1<dW%i|umqc<9SbKcd8`*{lSJXGk2hCO1v{TN9#L
zKO(^rQlotal_6e-cKXz4kD#(Y&&rq%&tkef1*KA9tw^CklG|TL%R8^q8dF^$QqV$3
zuecLFWvW1E@7)}q@~o+Ra4{m*>(I0EvRs0v+!Psd^P{Q$_Hj8FM>%5c!O?V)<ys44
zIAAY_9nTc!=Wv>0)bDx)bmJ#4uH+zP<OYLMY7DI@f*S~&@Uo+zHLVHfaBXzUGzc7)
zc?2O)&9Ierwox+t5&Ll4$Ej>7JG!fHPiojl`kvr<3!d;%?dbk?Ol<W!VoX%u9}~PG
z9M1gTo!CzTd~td0b2IiTK=+Rga5}B+id3xPf=zPH0Mujm`PNk|{gU0z5XM5aF<i8<
zoIaH862(pe%NI%yQep3~d(F$U!bnwKECy{~fQqy(N>oc_fal=g@EM>>+Z``3k2+|X
zzq7rgvW}(&jAA!1s`Hh4YSkHp{3pXoZifpQmPPEHKbtu;tvyor4)DM6pO89l_Q1%s
z7~H>qFXW*oPnWyfqyaoCHt4e>S+GzY)u;uc!C`-JZjH*H->-2#cR_tJau*11_qG%(
z2#>@Lv|5dsLLY79aJCeMtiQ*FwWksTzhhSmW#EvTtpqx>xN8|N?j{*52urJqNUe&G
ztMj&Guj(XQ%acDGMtMD6@^hN2P-TY;WvGqkl<0>Vj$PWu&}`Gcu&2k-Qp_pPs`Hg-
z6`daWWXL2h_-c1ido6P{){T58?MSR#v@DK*<0@#Siex7ue7T(@uVl}y3cDHQKHK}5
zgg5VMrXHg)ir&k|r*G@Ca)F1`#IB^B#@%8QOLxtd(b1*ae&4ly0g86!++b2`xNwQx
z(cg%;B_3YD9*rdBV73Oi<%P{?;KcbMdtD(s<+rNcdkMkZB}06_$rS2dSGuyV_Jn;u
z5e4$n@7-_3?o3XN)9$Y=)gUr$^hPNvxweD8ltBOpwI2ub8F2uZD#tY>B*d>_&1J@i
zf|61cpw(F`BIFND!fI$}oB`^fZa|<8SyECm0oW3`T+fK(Y1I$`H5%BbR5p}Z2EBH8
z6ciK#+js_@<^?wAlO=Yu*-{Lvg&)HB*03kC0}*OYPPKM>Vka>o!j<!I5NLJ6`kOr=
zLJURPC*4zkatsjtVSPSZ@0ufAHrGO-NV2Q7MQZZEV>SRkUTMJsD4TQ-1@`5db+DzX
z-)VX#0|WN=0LY@y$Z%qtwf@slWyeo|5naM&$=|HizbHUHbO$&L--!kz243!sgQcGQ
z><z=}ts`W&h+l~J21MIJa|(ckAsF}vAys7L<id3p^|#CjBWMD1A;qWw|BO2vk5hIT
zZDV8O4G<~{V5rugZ`ZJHBcx`w+WBzOe)a(yl`t-rVZTmciF#o27k4zvF5vG7mX8K-
zX*zZn+mbnK`SU-PR#pLrzy^SC3zAM^@RK!tte3u}J8`UaJvTVVt<LHXKoWokgF-}q
zUPwCUemhHmfXm@WxwfW7!l%8a0ZHJc(SmjR_4ka=WV_82n6-B-=iZ3#k^NTo&ojG;
z7!pB9eg1$Wfz9MJ<Hw|-{jO-fl^O`2Q0<hEy&#+^u(P=DbXh>m?*va4l0;_G8ac9y
zie02%m61`smBYk5Fnxt*oX#Gl??eLELkJdpkjSf5g6fH~nvNJ2=0to^K3Mj_oGU!%
zz-QT051iw-CD39i^j-?_+Mz3yS;^$&RgQ6Vu%b{d5j`D7fE?!|O{*?&T=P+-eC+ED
z!}xWLiX_^imGqFu;~={`xP1`S@ZpQ`Cd~Y?C6<3$$4xJhqI~gIT&(h7!1|bW33oO8
zP&;7UIQ?~74^!r0YVn{KJf!@4VGotYI`orp{95gDN?6NrT?O9tX=zj{*q)F&sphJa
z_ewQM(e!f~Tx%rF!@8hMCkBX~+StTG!{;Z;z7OF@`gE{i@K*H;px>Ql8;&AI5C*TR
z{AjW=HRV-bOUAIUJq(T`VpNCBXSM3gu`^c?C74Ro<FER!q{8!;55p8-R>QX}%ke|b
zvBC|RW2?i_&Ct!*lf11bEpBVD89&n<3mMiU4e^~^-t!TGn$+N!`6Wg^-kt`}eb@Rm
zBVTSp6<1DM3ra0w*5w0e?+hH>#Y_MM_!;2kgBC;BQ7KV`5CGtLRUHV_EFTZ@z+=fF
zVsf<=E>5EIuMPl;s7xl`qKQIsJj@+grn((qcIJ5FV^aAK@MMcXNEBc+*^~CZqZgp<
zTB*ly<B4TC0A2$>Kr#I7MN;y^96IokXt23}mkQP%7f<63AiolE*epZ8`FT0>Qf>+(
zy+qf%ZV9Y_+5?kL{oVG@$e+Mxtj9<Y`T^+k?s#gYbwFAk0Qfn<L_|e<7Y<TmKm+5g
z57!ZhoV{@S2VzL|0n^spbJ#$rDPS@cvHu{&Pwu*uRCF;D)iN)O6ZfEwJukkJ*<dcW
zCxj`@=Azj1a@LKcS^!6_wO=QMPx&d8EJ*Y=u-CvZgZGQ(DS>gi6O!P~g5jnv1w;`~
zGz5~4<<f9yfGPL_Qio})dMXS$B#8G4w<0_oCqp+xqrf2sZH7@J7WwT*eg~5QEk(;5
zhaK|bE65kvoGgYRo;ZhETcyGNJP3M|3i=UnYOCfMey^Z5vMeY85bsfl7xrD82TH%+
zdWD3sJ6gZ$#Md^qz&vU%(!!s%zp8piKn3d-n(2v$X4cm<oHmRzhF_XgdV14!xZp-5
z`@|Y}JH#@7wBfWoB3vfzYks$Bqf>AqMew-VdFSzv5^zTr0Iu(>g?>e1USMu#S`liu
zQnbdcws+1sRs>|bxZV<F@PpX7o>a8&<zwe3GnyKQb&r}S$yN?T{c@aF$Rr;`srN|H
zsH8w&veR?!eSBjGCzrn3v-Hu0X9mgfeV6}maV5Rm7+0UCd@s%X9M$TmH~>E)9cMt+
zAx{YLHIXQ)C&J%hgnw8C(v2sQRlSL~&mMb=Nwvonm$XiYXbUm?gqg5Y9bH(Od+Q8D
z37r7SAT2bE)<H`=6uZtLXlUReU|FXL7NC~*t(_faoi$AX{?`C<>BLT8ZwG?<It&Ib
z4JFX(1AeDDv2_5#E!L>^KJb&bADxPs{-U>Va^_u;%g4nZz%69@T~j~>OI+{<-o{TQ
zmyQ8$4u}2u)28LKbAz?BVQuFp*l0kSu3j_v-ss`#Kq!$r%ln~(0<X*tg>t|egMNL!
zSfP9A4deGU@B<)4_odSSOzt?ADJ(8Dq>=enivFCQ@y$+801`WJDzscBUg#D<!+rru
z?(sr7^WpN^(sca8(egBRD}awb19a&;>Vq+xk#xobp$7nJ1VdE@G63WaNpS~UDSwom
z4lpub!0heqCji5F&VtGsrPFv!?kBHt^N5Vg<ackUboL$0)r*;z9l2o}B4)s{Sgf;-
zk)&<{7o9($cMaq+c53_Gb(P6y5zr<$LV-fQ5|6pz3(OUg#+!31=o3s({oC~<h$q5g
zPb@N894R<UoKjzNxMr5&r&;&|!Hv!Md+U-GW01ODzrSf|vM(blbjc)D3sYYpCT(D0
z{J9HXel68>zF~92^>c|Xu@0xt=9nQpKIltsx*L3vv?@`)(_K=M(k;f$h%>(6i{d$z
z=bjirL4d0aey^EfZs^`>c)(IVC3NBE^vEjizGiId(=XwOavwkI>O<1oSaB-}{Zvp;
z5Q4h^5TVsHXlAeI4w9;E?#@ZyEvZe4LFpG_o%=SrgTD{q0LHC2xokkIEh0QoYpz6m
zt7)W<ijObTD+_kQD9Vsx=BGg${c|@bn6Mf%hFsRuv$fD6DD#k#785up>J?7`!g002
z2-Q8{go9Yp3e*-cZ-P?V4t@BTNTV6H4Pb_`Fi|Xe!&xw^^WS5+0A7Kbu~97nsWhi@
z<r38BwALs8A}=?Z%B#~O;J7I&%cFE$s5M8!=XL=9Se1)0Ku)BQ9%G<T<xX;htvD6q
zoQ^sGL86f#Bi#kWWiXAE;uZ8*mObF62$i1ueGrgZi>U|C%WMctV^En+CD4KcXHF&t
zw`4jH@B0{kCy|u*-r>taCLBXJiw3I{C4^(hGUUX=IF`gap-*iP30zu5!hLhUk(c`3
z1@(1hSNiL{JMY4JFk#4}#A0=qWhNN)5BIv_)rT`mqxjfH{90sR&Ts#I0y@5MUOQWd
zHxubkF);y6^*EaScx&y)!I>HLigE_E1HUxdz06mOcXkKX=LjTy`HYYF+GxbXvAM1_
zUH$9C<c!UP!W#(|8e=~k3Vl7zJ7Rc$H;StNi0*}gsLs#vBIQHao(sES$O&{+ER6OS
zApnd7)bT?3G9<Tpz||S3p(J(Ts;SHRG13%|&y47cfFTh!5cI@PG?cLVohxwLke_z)
zY?+!I3fk`xv+yCllFsS+?-AUR*b(AY9nzAtrs?qG%~+ADO)M@BDn;tx7?4Cuzk<;n
z^V#xxU&JGD&7Dur>!`x3N}zH_*3>F9+{?$NG#!bPOn`j@Q61@k>3;j0UVDjp$4RzQ
z>1m4Qx9Jci`8XI7AMuBeI8sA|9y{>VPF#OVv}7S>v~Ll!*OjZMvKeDSk3B}@@U~6L
z{QmeF0>;r!!iRhsi39KZD3Yg?zCb2WoBk|v4wnMaoc=22Ro!9mvv_X4+)ZRK_B#0q
ztoG}b&a?2Bb%p3p5keeE!Z_u*{m(Gr{hhIOgwICa)Ys@8IPr&`MsG>k-_QQ@?h{A(
zR??dO?zA8K`7t#4k(u!4K+YBF8P3kg^hp^=?~{n+;0`Clqk|@25#_F=xo{#{ZSY8|
z+vRaD+xUUx!yC!tm?$<imF6Zghw{iA%)Qq+wfQ~$#I(sKfCM6Z0w55F?~BZrkN_4A
zfe>B}FPu@YBT#S^tCV5J(`%_x`FtGw;ITyIxBJmiMe7N8i>4CGNHq}6M^{ubHffYv
za;6Tay^jOCf^R_I)50|u4b<UU<G#*m*~09kyTfPR?3A=NGn$$|-t)#??sz_OBU4r@
zJ7N^4ms^@4(Ui(PeweI#01)<+%kLC*6#JqT6F{5zNu@a)C_io-FV=_%y8|fmZ;mfp
zI3PFOr#Os>CZK`B#K^i|6yR<-Vy^n4<R_<aIv`_$L12wao8wWXn2ZXZK}n41i#e*i
zlf>^SVu(&kQm=+|<|xf$SNCI4>bQ*S$V5|d=zfuDtY&iMUh7*)Oh?#NO!Zzc2tkTF
zXYcLZ;qb7d$by0Yw2Z7I{hKC(LxR*N!-OAUQ`MX~@5rG07g9Dr>KpaSWG_3fj(0x<
zD9(awjTL`(O(VlnGny1iq`Y$C<2TkTgy(<S=Q&>M(6d66O{A#}hH2ta1f{>S2n1;2
zpc*F8fpCKpFHH9knoNZdUZysq#RBwCWh*~oP`{O%qVK?4+sS&YPQ2hAu7CP<_~4hx
zKqp|t&JLiD?XxDm`SQ*39s`%hW3DkF-hz+7L}fzF{bXCE>PL&g7!_P<^jRq}2W#<)
zA+sn7*sG=c{k*6TsmoiyODIM40bsntzrhNw4rhl=D&La~_$2!VgZISULF)KKix5G;
z@rT&b(56O*HwXpGZ4TockpyS;+#=J4J#zaEh*4krlQ|A!bZf<7fmlY9U$Vpa6AFtB
zpAdIv#Duvd$l*@<EhmQj*i7)S5vP}x6^~1tlOiOD$bqcRyjRUgVk+NpJam+0{ZlOb
z+30_Ey}T2+rw;ctc2Q=az2hXwdgAl9ceJLV`vxk-@*uz4ZgusUYuv9bu-U#{<y3%a
zEJT+vYnoB+gtzDe7z`%sB{pbSSX8{9r;9Zb`8u;j5!;ds9YwFR))PTHKp;o07%?Nl
zUJ`mnbXpUCv@8zbv@Sb>a&P`|A1yY^d)i8hZklgJdaGfyDBLyO&X<66Ww~SJ4Y+qH
z-(vk(sF(n?lzA3)z{btIzkt_6Cz>1|$<UyRegGswdBx<%6j4|u#Z|x)DufTXr<1<v
zU{(`(d(ypnm_Z4XOiz$1et|x<@QqIF`dki?J?tUh8LJ5*<I@m4vJL%l<sqn5tdF0L
z#SIBA=?f-S^ZWJbN?s==OqHX9Nwa;Cp*WAp&4PNmBZr6FDdUrGhzCQhs1i<%e0KJ*
z8ue^S7Xv(fpJ7hI_v-N?2B%z>zZAl1bZ0Yo^Qz}t2RkpeLWN`CQ--Gc%6069x?d~{
z{8Rj)gRDCC6){MZ5QWbfG%yP1>~)&?;M9lR7L+ME7*00pEjkoNS3pr4ZEkLUePmja
zd*2~s(+4Ey{E8!VI?c+*c+)@!XM?arEDVzgz<)nAgDO@`?4IJ_;M@<<fErOF6A&N(
zUFi2=+Ub)`UT1~Yj~vU~Md!ueQA0~3c~9KmD=B;Kuciq4v@fK6w71<e*UKF8VEKPs
zy=7EY;np@xNvCvogQS#nO1HFh_XeaprKP*OK{}+nyOHj0B>WcYInR5(Uor+m#$IdR
zG3UJM?8&IDDdV9|LuIGO954ZYrz-%Y^I%l6-QZ|8*f5qQ1GyqpaZImHDa|<hY1-+N
zp0Y#ok05+~etn2{umf6-OhO9h;|jNv!P#@Ib~L|bn4xkh3n~caa-zJ#;|f|YzjX=+
z8C6FX9pzZRGl2LZ=X2|DfcCg$J<5f|#epSD<W!lPv!kIjqJZUfiqpl|nNCxV%&J~r
zI!?=YV61ew5MY^H_*tYhX^B2aLX;{z#6>{gX!z%ssC~T}v`zHa4*35Tbq7>vyw9A0
zb2Tza=GeY@MKg|rj^;M`NH4g&;E|W{jtr^J7q`Roj0$RL=$q-Qu@q{tuWU`jiA>!<
ztc_JkhJZH_l}r)|N01u4nSBO^u=`!JmtAbe1a(-V_w$jilxjnyND!hgpql+Eb^vJG
zz-R-H{Tr;88)AQQH+g|}4@Wh0Y!A6ChnJta*0*GezZ^_aR`-7c5xc&&PHU_aC3^_-
z;+EQ)u06$oP09JIP!bcEz4hx4#!xcbtZ)T`fI}n{yCyo@(`3dsu5o+J3NV9EXt2>K
z`c$6MoHR{@N+R$S%H)Mo@+P3DwftB>h>i{0LIy4vgmT~z&vHK*DJ5|7k@)f6DT`s~
zQzij&4Uk))Y*x|U=0nmIOY;tb67VN#-xtg+YK+5iWUF`4(+0ug+^AX16_sslsjiuf
z3r}TZMp#D+SDx`Q)fhE?txW#CNl%T{Vc?|7VFRX;$pw}3;l#`FrA0)q?_Ov<tFhX8
zYC|Rk2PwV!R4_QEHQd=yk2#^;i1;TiR7Cph#?zEUR^Dfa{3!PEq`%kgCApyc*!d5(
z2gS|HO)|a5xKH*shh7513XVK|-*EOCbt5Pt<9rf8;tu{(VCml9<ib|MUxN|^!^Tdb
zmgskZV6ZOMz!ki2l;9`U)qsGtdK^JRya=+X%55iJEx?k3GV?YhnawoP&bHoRPmUSS
zJdF0FO0N@Qe=;{%eWdW&-fS8KKSQn2J5g}OHyNMI6D=H!6)_lrHA@L_q&idU>z3@}
zC0Yj{Hh}-}Y2L!fUT`Jt&FdFiO7ie#LnIUzBybB@-*hLj7?My>h$05whLM&Qzs|^3
z&|mLi4<W9iBR6+zGbZY#<w1U~;fBPk-LzAdP0>mex77(Jh0_mitCJb5fLVS)qulLD
ztR)aC_9(N`0=N*>&{7%80RQ%U7CxRDfkw(~loVXE!htE!n$GUyooNWzp))M1_!XNT
z$9~N(DFH8)66xk4U-?z)QJO7iC{=bZpNN|c6nM{sey#)Imd*}y_IYIl;rH%K|FWh$
zd@SLiq9K|_kLcp|K|m_oxA&yb88d^-KSB35O9jz9g{Ep>tw<PbR4^5G(Vptr2~|^u
zCqzwr#~_>#-I9WbQfv#nzWuhP&qwiSn(dq4QM7UkEOE6|iqS{DWf@G#reF|Unt2OX
zM5Ae&puceK!lgw9xh|Ldx<kE0!MyQT_9VJ@vRPm6B8X66r#3Ljp5r0!z7I!d=D5lC
zy-=450DF;hf~CC89oMG-j}~&5Qz=myEKiOmpcMmx0Fko6vI`H-8n=+|4@)b-GQ+>0
zstQGT0zNf@6{R2N_@kW=Xw6@f4Rwtih|thTGbVlmkxJQ8@pJ~+556!&s8YCx(~HWF
zEG#Sr=njYd8t3)arYGh*K<pbVV84Yz$m58LUl>d2zX_6+&bVwX-YP5i+r|z+p*<L&
z)RB^c2f_o;0TdSo7;$4dw76W5c>p#M93oe>O!7O{JoFu38|H(`)IX9+x_>D4|L##I
zBQ&)Qxaps+3cE_dtGk^{5bSG?q;jHA$!C-;^3pmE>}UYjCHwGTJZ>9GbT1*y8C*2+
z%hYa|?hCOa<RBNa#fTUs0^L=*#?S<HyTZh}V*00Tr=s)nOF5i6%BAZUR=ZIp@hqCp
z&fgG}EJPW3&s9{kFI#=>X+Pv(6(TnD(+y8?Jo2_SoG==6q{WI1;i4Hue^=zbM95I*
zME#HkeZ*ENgfP%N$-MnZs=m#@#L*U3d}lOkhQY<<Q0ZQ;PwC!MlecU*Av;RWPjV|0
zeGTr?ZGxZ1bNvbfsg4qijNCUPVNl#yKFu>N9;`oomj2MgqXm}fkiXG$92TS3a_l==
z75m>ikl9xo!fJHT*^db=E3@eZ)ntR@QSaFIf>j(jKeodp?Td3u@m<Fi;}_RW4D27q
zQqig~!8zoD323z%Tzp^U5za!zq$h7+qZ1OSFy8Cy>rVg)Ev4_p^{+MJtf%i^ULI|;
zi*la7q|wG5?`_BuK61bxwUT%Y3>^TDHiD-kDeo<*;y@LBJFhoi%i;1Wt8E1mZOMRA
zf!Fgdj#XfjqR|DQ*SIXVd$%i;aF|VGp=xeZg$NoL<ba`JV(u{gESI|O8r1T;6Am=h
z6frYhh6&t{O@$4Ww~HX9Gro;utJhlp#rc?bitB7Xv&lfEunh&>lOHag-!H*II0zy;
zy)ZC*p7Or<;f|<RRUIR=GaMM8kM@4XiJV;R012he1(KneaBoa!rH=ddMaHQqhKLQP
zB=AUYd7CNgYfR!#`zh1~v-*x4^WlzLNpNaRof{U8w<UPpdfjnMS^lwyM5eLy96G4A
z;B09r^#qZ6kjQ7SThKzX{!rzNtZB80ofkpmvE@{_K}N~zws;+_qAy4X7lG^Wud`{!
zKVWdH?Wc_9`IqI`pJP!lV^Hu?K5=X_;^yvx6<I5dUX!S=ki}40p`PE5^WQ!!(7Czt
zI3pv}dE3fky+7nTzJOCZw*oVUuK5TC@_}Au`=Il4yA2?R;TOYDy?N_4bZ8EBXDUxd
z?b%l=q5FMD9+G?vG3P&h{@EDLW$eZ7QgkY3`8tEHbUleHCY_3%1)Os9m!^t4D5>Qr
zNLTa~lwYh!q}xR4vpUImncX12<_WnE<YgAjOi=N>zak>GWpVGMQM#fI@zP8nG@*Ds
zSNZteEf-lahgTW>?Sv9ixLWg9#a(uv&7e(w#`}wf@097I@W7Fag`Ho0ZD<L1KA#=%
z&R3Cg%iU4U_sz>Q9gT)MqumR1i(6+>@zZQqiwg6thsMou((Oo6zs5OI)ZI|PRi(z}
zE7=mgzxi0C@(vM;G`e}f2!FFII5e*yfxt!o5PspS;dHVg3EpZ^TlmgiRkDBr=^WZ!
zYm+P9`eME`Df((MUyv~q`8<_496viW+#GW&A1_e@{zMCYa17b{fn`Nr-{A9~BQr^G
z9bGS9W?k3NJ0%<FRhvmhMsXtOidt07<zw#o()}~ht6oA>M!UZQHk6@h|JUG3`S`H7
zkOVj}#y&Fhdh(H5hhM&zm7G>2UpKrIj*G-7N{NxMPT@rCF?I0*V8~!N3<zKFuv^YV
z3LUZ(*oLlUS*BBU4RwbF8dIg6<8)(E3K+((w!eSG<hgyzeEIyr+-UjxkV1@smPu~Q
zcP<s*JU^?opngJnaljXRtQYHi*1@*^;g*P{DCr;qdZMxbIY^^wgiQ?L)KK}&EH8FH
zQV?u_14@Cz1fA)4xQn21{39tACKIdXde)Ek;N^DwDi(4$h>UhtLPOk_C61$rd;*`r
zIpcX%#>&b}eMwz@z#e%8_n#?9IxxemHp}kunPSNNZ+m|(1kvEld%wPJ_t9HFZ6j4=
zOEDe{{L@$*nB!^8->M`uLGZvGB<(DKgP%Pc{$X4#B;T$T%y9(4)eRSrC9tW8OIWcT
zTuMLZV4*~GBC;)!CmqoEUlVC^%b;DldPDb6Ms>aU$*Bm9J*|jE!FK4fsc>por>&w5
zPv^e9XRp7;ps2;Vz5Y-Vuu+~+yaU}wD0tV>?1dHgt%{K#3U~Q=JanUcu6QRytF^L}
z0{=9TqV~-IK5>1kVedeQwM7$nCV?3vZhn1exLB9{P=UzT?&T$QKZ((w%EX94zhcVR
z_HgV>Q8jE6YHG$S2s{(*cJgX*o6OW)eI_pTSdu#p*!{VO({QaMwLavCG?OVWMIO%U
zJc_l1k1}MR7^Sa}*^BmITVQcVd%lAe)-uevai>}PAPd8=q)KTo3RBu*Y=B$G_23-$
z-TusY;$Wb4p(HK49zCeeBR-N6O=0nZ=7+)9m<Fkr=~q$$sbFr|n<|e}sc{T872M?#
zpI@{5;<Y;6Y8y=VH5EapeV<8p2ILD#Os-7aMz@JaCQ<#hnTjykR1kD336(7RV^rmN
z7ifA#lMyYePuY?_=M7v=-oX~>I{e_|Awv6mo5WE8mx)G7Mg7_M?0h_1A)0>Knc+4X
z7CQJ*l!2=EY1wg65BK1IyMs6e-($_J$^^~;-kne<X*lgeRjsLL2pBb;@=$^ngKGBh
zqq@xy>%;tMV?`)WX~|#Op2)NDvseo`x{hHgxQqmNn#Q6CHLNtGv5yiqJ|I4a>7p-a
zqu`7k%TOv{b-guvPEAOmEV*RCWu#VdY&;r233sT%oV={U`WTOP%#*4V$38+I*dJS$
zu<~y(NVCdGG{LD1<N5hPZ`~J6ai1EJF$;dI@=sHQ$BlTFdz0=*mL!8luno<?!%U>3
zAlF?=hM_e;+Yd2iC9(5q$c*3xA6AW~{$r={w|sfRcr6$+kMXUJfG&9M>{iOj&rLVl
z+Cy7zl3t;4io%_gaf%5yw?pON_Kz*nHID6O@`<HBvBu_Fn;{8W<uc5Z-ufW?HaUx6
z&tysaz`H1JiTXSb?i$$|DLFh;XFVRx-j%QhZHZcExZZlkqJdxSd1!<-G@v+fN*(yI
zhWHo>2@QBkgx~KuTuE#@s}C+RmV+vq>i2+#Sdv_G`V2P{mW2h?4=?WSyX=HKUsfJP
z^O~J_`ksxOCa=fcxbMWL^5cU^$X&-4$RCHAC@)QmqcA5Ee2d*hpSAaG+CvFWg8dIr
zw{?^UxBktD{xh{Z$A-A4kAy@&$A{pF_*MfFhlDr%i;DXEMx}9*w;9~ki&l`#`qUad
z=kLcOZ=}b+Lo0<+mmf|;y@ULQH<ZJ;|E%7uH-<j<S7|IkyRzA!WaESlvD8Po`mDML
z9YJH53iIVR5f@GL!J}Uv3{qV%;44Vd89uOr6@?@oBOf_+9{Cq2s2+>^sfo>lEgzCz
zR{P<UGMv;Vc0biG%%Q$jz|gLt_@){f0YboJT(#?q1s&xwzo!{gzNm}<L9bf!MXt&=
zl|zYqM^^d*t{%^J-~7Fd$JjTOsui<{5S|N(+$JcXIAZhZHq$z(R&gv#GvYS3T5*g5
z+4BAF7tEj1*+-RJDh55{+Tbt<_Kr20EFvc{bt9hs>(*`HpSBJVoqu(yF<TFq)Wz5<
zN~ADDxox~B^5ay0?;*_<oOnh+M5z1t4sRo8lUm`B|0tB0<!<2bj<I>GXDP=iRIOf(
zYWJkC$*>Bdmg%5}(7Y?Hq1#gR*Z%t7$tjHqL<~Oy1MUJXoeo1D=irBu=`OsWI9Au4
z%_2GFWSt%k2a*9rH2+UHG*<XVNR*LPhgFy&`B6azeq{p3B7+0jy`kNH?3&G7;V78Y
z$XDl1+_fS0tu}a9f)SzJ9;<COvmyj{!To_=H6hgq2`6!+!<Q`K7gz|bNro|4%Wx0Z
zw9UBqQSkKS%}C^7#bhYc3EgN<F`qeEV=z1)V7A}wC6!ySj>0c|n#d!K_CexLkB5w<
z@&<1!=n-*DfXcM^v@Y|%Q-t*`C<3pEhkF-Sm;6+pOYPhgi1HD;KNvo+@YWlkq<+R~
z^|g0;RFm!>7BOPk=nC8v@@WjMhq6|L{(R@0Fw_LLm{8MD)tI9xdJLXB)=?WC_T>77
zVD7DB`%V^ON1zMju9mT=--`Jn41#9zf`G7+FvvgvzvUu32Xaq8leSjBJ~<^p_y;-l
zfg^IePAx;|d4|0T?1Bcy<dIEB&Nj9qPnBxT$F~|i3js5hoG(ngjUEv_^O`zUwCAzt
zDkF$J!$0>-c&-^LNTWgn>>R@q6ZTj~(`|mkvI??c@Se&vx^jtbgmL1>vy!I`+Wk0t
zOXHd8^uwLe#prb=?mH#tQ_d~@NVu+b<$UkErPaMP$qbGUaXU<;GBAXTFY3|t?_BMe
zhfDH?n$C3gsiwknT%gze=r0(G6HnEN)E1speUN0VP$yeDUuMAup;<p8(uD5@`>s;{
zeI|%GDjKwX2f9x1&YcV-l5tZhQ@_TFUDV$o+~NO@qmQ1%=$0N?rb5v+nD^N2TQ%0Y
z#30>2;e<u^W<pAg24-8AV4LD9YX5K;F>mDCK#JJZaLky{SdM?&z8nL817o_7@Vvo&
zV1MThy5?A#Xri&eH-gi|m}SoWm-;ho9lKS*vch74L+qn{MU7YsChA|x-?1UMKDT5Q
z*XFn`zH#1JIhohtPD|~!C19k~iDa-wQR-tD#{M>pZ1k%wCZAxw)svX^ndX;{3n$)P
z;<xqkRP%|eX9|+ox-P`2qi?7=vo`T|`w>h!FxX$lcqA5Yiq<LehoMbbi;fR`K<$f(
z(pW3Pk~ayYLzIo-4?E1BbDuND|8&X!g(Q3`QSI(GHt8X^?3uxlAj;AveY!OM{&xK*
zl+<dh7S%NT$De3$*>{s&v=t4dbxcy4+)MP9G`fT6b+*3;7c$@r$}lA*$5Tk1{jZjd
zzZ3^3mRIUyi6w&X?9+M`%3spQ1K0Ojgn8&Yk*@Q(R0W@$;VO{&aOIyL3|yczJQW;q
zKZS=R`9omn1YvfQ_2qOd3=F9LnyD~G3|hBvo4MwqX_%Up`7SK1mtDq|RYR@Tna^gV
zQ)9bAk&J!d;P@MSP*{qF&mvi2kb<x}akx4u)=n)+z=)y$#0ODN$Te(H$lRDCl~bQS
zSy(c*a6N`GQi}OHvJO99OB&Ny<#f{O`#5~tsRA)%CSm#82>1p<NhUFZI^x9k%v^&4
zPP!5kW23I;BEhCZEB>vV>^dPBMdbtt9q*mqJ(-mh0eje$z{)-`bm;DN^I;o$6l!EY
zjwAtk{B;&F7b|vlq!>Lj6P}TzbSTa^kDa1Nh#yrw9UrY6MrN4wDq`es&S8#3l(+Qx
zcL}=^y}fzgDGVvTd`{cq4OSek!c_g{9L5vMOe`HISo~N<ueh}GkLJ@z)-jh>v2Ukg
zkDjI5KuMUg1-3tYiyPj}T%sctu;B_B9*Cv{pNnRbD+))@u||F@E|~hAYI*1DbU*;q
z#TKZ;2VAo^t^eN!%bI|tprQ(`2dKWhFE*-p*k#*{o$lGkhUKL9?^b>jexrrQqm-49
z7v6n`u%g*6f0*HvNIk1w4**wt^Q%2(W`(hO#f@%8loTR{GW1wFkCP|!>-LaY%`C8T
zLoxRePK3IO8#Ja@S;`h1j_HZ8iPdfP_c5snHokZciJf8=^XdE;++~kM1@!vFv<teb
z$Au}2h)nXj$Oe@eyiG#;!Zx^G+)?6`6RbnuQEVlIAEKErTB;ZmPJ|Yl7Yc|7kJ?zB
zxQ}!BOivO0y$!t(jva_^r%`|MAZOj$vC?_H_+OnC)zV^!y4^z`{SJ4ntD(f<b107g
zM%LGFK(Rhv*<{euqr>7WOloIr@Sz$j$ENIZrtLmq;2(nS5#hts$~_&cXuIos;M>pW
zYR==>dmnq)N+PM7WGU1)?sNe|EZjJaEGo_0;QCgH)^sOa*e~kuK2n&0g3n2#J*<8(
zc)a|bg^wdVCz3zkErjjn9+DSFDuzTTmR0vgYOkDSAsrqK!Of!imlxi)5kDQDg4s`e
zv1y=Bii3veUiPn6Vk`PUPwGw2V#=#vpJV$Lo-yC5k;5}z0hT)?hjN_2gWERk^3BtD
zwnDsb#gwp!e|BJ*zcSm_Mrg@LisDuoRQ4BgeB|b(X_FB>Sd^IT<X#ABJDUg1p~buI
zy+0q;o3djF*SihGdkQduQ4le(%}@IYA%3jS9lI0IJYfF_x365ylHYhRYT@N)Y=g4O
zxE=290J3w|9(_&6{;nl2sr6#=#}}8f%ZA_hWQ;By3Ky;_69L;}q6JY-Dt#fjbK6BS
zlM{MPZ}_xzlE1=rBYqk&yV&Gag94&3mmL8*2(rzb_AFdaP2=i*vrWTYD6T<FA;scZ
zI0~%_1>uVODOk8ORqpoQiT-y9?goL~_*k*FVV_Te<}~ZIgo*J@(f_R({@kS)kl?xs
zV^%$`a~&a3d^}dCUU*jqEFe7yQJ;7=58{8Qa0ePqguG4QA<~vt>&5j}ixP1och(R=
zW2Kkj1n$sg-RSq|YN8XShLRP($3<POBsir7$71(ii0+d7rVtgduT;ggI!o+uVu6>)
zgXzL)NPU9PXu&3C^-wGZ!vL}(*vE|m&qP7<r?RWn(oS;2S5P?42BLLO=HSQ64-WV8
z-qRQya=zIfEL2LNwf0{#&ARcbc)wHNq7vY?(OeFT3)cZmmGkP)BgNrCj5Bg6U{gPm
z$=)Y42{$z&u&tl8-D%0`@Y_J+HW1~}{J`2&6Io14Ey#_jUq?ni&>FS7$z_jb>&r4Q
z!Fy$fxCpNUQ|IW|SV(C9&gO?!>KlzYHVEAL^}ynbs_yTa45R;!hJWBSMCvQzp*Nmn
zx;G9rk@!_aFFynF?H;b_SemaZU<Qa;wN4%Z&^`PB*qJ+Cqs{I8E5s%@+|<DaK@|%(
z(rEb(2B?3en84jYrR;od@%i74HfCQ4R%5dhC^@y6XNMUKm!8PAlT)D&evkSwJP`~v
z`34phks62ZVEG(y2`=?9H1ePr*V_bGDXRhe7T+mo;$)u|s5Q^K)kh{>s;D6{UXi^a
zIO4FNj<okaOa{pL$%5v<nK?S2viE0go;yJpjTWmr{`#fv>ftL7eEq)O5s>CVns@Z{
zro4?^+1c65CL=gp?zd$o<$#eUB)}kyr&qH`P~HU$DK`MZsNC=sJ(trV#xF|wA^Gs(
zzgH=VAe;wV&1i4H2BX9fcfu}L@6LYbhynC!D;-M4>6_iM7E;f7f{)K8oZ7#)d9*Xq
zA=OM$H<tX4>=3Ss5RLOrHKHWj2yW>R6%2UGyC_g`af|zJ`D-n&x?4S3toQ*c$^Y;N
zeBdw$!GYzHb0phhyafJ*sV{q}H*FZI4661N2r^W^*v>3c%7<sSUc&HtzEP9i01T&v
zR8%k~6zNx5UD1(=`4NE>iKtK2TCTLTw5Y_yX9)}lf4UhdNZ+Yn+n)fEL%$lLCHDeE
z!JLwKi5(+gVvJE7yrWaBB@R>UE+<wTY}I3iYCe?98b^zC_h!7>47S#6a#Kt)MiU@2
zI}EAq*{1;2T^FT-roa(=B3VP<-=!2N1eALS{$ve`>QJ><EeNI7&db>>wY}=AtMCkJ
z7I0>(m1Suc=U?`x^24NeMu4zqco<{?A0V1FIKlaJ1rD$m^#@|5x#F5qza@xk$=Ls=
zR*se?v3FUH!>n9!h;?Q5&7ty{>x@@KV-qVS9x4QtvyT1QZvW}#{+<eN{=CEe;AJC#
ze((pi3lBC*r!{3|W%Vi3<Z;{w1FZJ^0WR-iw(*i>fy3Sy@`Ds0<l_`=+v<;^*Fct#
zk7HA-(Dnr+7ah9Z&pQ*5-@~`JjqbOi1ky0O0nVh}cm`kNtnV?22tFhb%d2zTl&fl^
z+nN5bG(TwTaeL_jEJ|R~M#Cs?gA}s-ld!U2?!~_M%eQ{SpmW7<3Iz*1+mRljK9go?
zp^zopMhfwlL+Ka}9KcxO4XJ`A{}Fsw$kD%f-36+i<=E!NNL!;s$9e{iGz)kpNykvK
za3dm+@XXYD(T8t<?$(yEQn43QuGJt65JQ9tr-kM#bOJ5E2NQd#(_%@0g6A4+byHh`
z^!0n-88!f<$bV)=MX-Uy?tEQ{My-MXkcUC1u$z-@Uv#5DqYyFNmJW`M1p!IZn#<W=
z&RVbJj<^A#1pp5_hKGcp4!y;*G%z5mwJ_fLDjL2Qul5;jj+d$JIwlEw)VIuoX2J!q
zAVmUfYJIXrg1a8B_Od-5F6}Nh#asZ#e*Kvu#hfu9KZJyo6#Q_$5(S{{z1_-gvoBFC
zH6g^NTuHA~`ufZHJ-^u4i_5QT^K4kHA0FK6Q5ug1N8B1*lXVW%jz@Dy!omZw6tAaF
zlHkQ#oO|=jHWJCyT&%+5Ud~lGIHvl~x~=2`CyXjEBHdltycv^Ze8EbhdMl0g<@U|p
zSIL)laavqJD$Mi4fISPkU1~=l99qP+(mB{5>Wo^9L(1kmttNPbbbi$YM2YRR<2n;=
z`s!b`+lDrrLw^6WXTgPlvj*HAS9+8}om&|;F(r-~+Fyu9VI~xL$#nM>FXFpCUR0)G
zKW-tXgny^LU?iW$rAWgwSE-8vz`wN#*_Nn4fD5&#K%68zbd3*yuJc>1G@{K_=vc<9
zQh%(TE>&Yn;&o+hJ?}!8BU6e~zz5_K{s4fabwz6rM5?IQSztz!`QOe|qx`>oUBD`}
zf#&tv;=LRo7Y1iF9>Qn@tg{1pWzEcJqMcA82(irqrX*K0TCP-I*Q|wxRBYF4EH%b+
zi_%CmgT`4;Z?EDH`TOhtcmc5f6vR?2OB=*SM_~3fpca(3Q7pW7W0EX=Yi_{4LM}OB
zH-ud!jL_8xuAoB<g|ZcFuSRfA`5kvoRu_4>&gkUIsEs|HD!7_(|K_JyFQx5!>D`7O
z1arIC?ix1_Jwg++44rkj!z3E%<K|TBwBS#tS7^=elry9rQ0BZvXM?y3K{4G=L?x_p
ztl*Wf+8%|*h9YJz3<DGLx^4M#x=lhM7@sjI3{U-8YiPgwrF|)meHgl!Yes<S|D3*Z
zkeU`=yI}EMDbj5=J4)^H+c*z#VNdtpv%`0jM4_v_CDbr<C72jZZzr5shk=OBjMkLE
zQ<;S(5z1~uh=$_hq(Noc1Ay3y=_u(%Th;)aeWBSBRWTwBkdcWA{4kCa%qtx>IGb=D
zy-+q4S~`IN?Gs_%y&Y$CJc35;>tvFXmG!L`5ZOc`Hk2b4W|ac0hfxV>77;&z=WBGJ
z$^f^RDbg>OI9)E1t)a?x6}TE<o2vovk0gH2W}4g|Gg#tlBb@CQ8Xv}_d4E4$O%y9r
zlSpSh2?J;625_MM_tuhwlL_&qc48grZ@64)4EY%5hdhKE=L)4(SD4E*X-wtwh6iDW
zWnX`-hxB|DXjCGn`>L~k=Rl(9FR#wB-Z$B2exCbgp0B(!63{sThD18QK|S(;rhf?c
zy`Y@CC>U{>3)UI8Jxb295_-_Qzn@>(5N_?hC!tOXpAV~e4EaN`a$&_~5jvI$1RX>o
zRjrq{DXbLpjFt7w{1HLkH2(R?EXPM7eUdk`FrK|1gc*>U{T{9(KR%rO%<O4aR4qKw
zHKZRk{hv+d`%%!c`UC3TpaVQ2L?j*<7(j#xv$SkKLp(XT1mv<>@$VZ2Nwrg-0Rs8l
zoOwI0($acP;QOezw|i2gwCrZyDc!*+^coLD=HAIbs!j)xJyb?34Ub<lQH=sfEF4Rc
zNEZRM?GlYi+%JGv8f-F}vIzWK({kF3V0Qrx@ZAfIzUFdXjoNw~A_Lek?Jo*7djfzp
zLPH3(KUf~);dW$)S2o@X%{cM4ARFsqvW*lcp3`6H`}#()^oBpK^v0d}Mb$g4AbuV=
zst9l7LyKAE%WEF<=9qdS;|IO2pSmAteB(ef=|?v_>V5?rWuBk1&L`<1fM7H?n&uCl
z^2OnqTf|$YC&WX5$4c5|Ja0^)+YmQUBT+-W>t939Wef1aoq9*NvXEsNEthCnMY-V>
z+=SLH5XMdjekW9$rKJyvbK1y6(>Us|!6W&bv2H^$^MyofD%0tuBc%V|z^+%s=Pr$w
zXly0S9cls8$0x_IgAY|3_Z7IfH<u=Uux(Sxz~u!T0C8IpIXj&Alb*JnR<L%tcq
z(`sEJY`+b^waSh-lbjTu<2(%liY$bA<66`3Pb3ZIc?{W}x_<ut<z(ervG}gz{sx$N
zs<9X<Lu>2+5oz)*n(OJv{OSd8fg!&NUV$8X@WV{V*C~A#7wme!5XWsSp9yn7kfr1O
z1@u7IvCyp;(E1GJkyfd)3QI72Fx0I`+X8JbH-d3PVlp;)QC4>#UP6ULIQ_z1BST#4
ztQ*c$WEU~-Pu*%1QkW(YvIM$~)H;H4KO>K#?ps_)%kRHEkSzjnvg0NF(774}sykkU
zoJsM~AxZbOzcX^6`{}(@83`d{;!rY=OZiv|hy1=r1Yd!SVyHaaLE7I02(Z+aNRgNH
zswJX#+}0dwOqFXCj2b{~G6oyn8*ViAi?{I^HQI03H(b-!txBSPT}yrqQZ6qX&D90N
zcZfDZLPy;g_5c-9!(EM`r@o@*O$6Ii+W7xf>AoE#>(WEXYG~#vXa*59UG+@|o1|HF
z24{}?g)TwcnQhU-3^it?y%0_3WIN$CKf_tcmBu`-(VBF|k&L+6yq;WkW_Deke>vR)
zBp7`_ju8!zOd0IR$jAafkgLH91I;|y&n91v7r+5lf)ky%fXU!gA)p(VGhSg-XrZ4j
z5!un)Vk&zmd87!O1DN^&2zBHVk%IAbsx!TE)BB-sKCYz;eiAObwhzV_X7=5rT%Y&4
zr@TjqC!8DKvuBx@BN3@RMZiH%HC;{ND%(N%c%8AgsYEpI?`opZHyM-(G)0_aG*QHe
zgy-cilZ=H~?hPOG%IZUVG)`8YoO7}a6i_`PrLqt2X=@oj`emOX`${d$2|cMdmsRln
z0kAiI(r6JGJ41uBCu2Z5$za|0hr3iQ;&?rw?g!~+{Ote2p^u-e4;HqgM`HcUtIQid
zDtFe}q1gy-{Vw6xJ@-pO4UJ8;dS8z5p=`OLB7ZX*ei%x~Q|eGL(VVWIjod*q&{RIW
z&Ehk0Y|aW;pELV~?H8Kdde*U-%3RQkrX|!{L6b}VM$BGB0$#~Z1J(i)H?8h$7|QCq
zb=I-a)_kVZ=FmCu-x)D11_b3n9f<<YB`<uG4->8F(0Xm$Sy3RThz)HttU9*0O02<7
zIDAlxq|yKopY`_kc#(W1(R#HRbIt9DG4(J{J3HcF91sN8S<VN&>LeRXID}nlynlPS
z99Q?Rzp5788vtAV6}rueDZygBzCr{^+U=2Y+`&<RHBzEhS)oFEYp6e_hWiBkg)AVe
z?;H@JL9pPk7z8wOoYE-0({2O<&P%lvkgGPJAun*}OpI^*(|ND^fxY8cOSKM?427jL
zamzK>nVsomMo8!sqs)w?4{p%DIz-rLPxv-;tgu%)-Tf$xb87s3APs5UdJU0#)nF#y
zrjSx)#Ht5oQcdn_us3bp+E?EXew6CImU;N`t?FRu1jxTT-%q}wX%QVH7JzNWL9Bu<
zH*X%%fb&*<Ir^p`$t0v5`GzQo2*s2Zr95Ic;AlNq2KRFYAxa@_l>ZqjGY+DIJ$tBo
zjZS0K$X<oG#7-=gEZ$evB9b`at3!$Al5S%eK~g<fxTyCtA<TzerRtb$IWp=4>*FiD
z(zs%hxdaE)RaI$<UhuhT@5<idSk`e!?Zp~aBh~D0{trtK)d$K$lADe2VGLHVk2k-I
z9H_{<4B=|e5H-4^V}cmW28@$DNi6Y}U<!bqS+JsQU^J0uB7@<+$5(+a8dQ)aq=@q#
zu;<T`#gu&R%BW=hsEz117)#w9E6-0`16bTc08L3?wP8OUnOqc*1{nz4N*@^aAb=I*
zk?GUU-}GLenKyJe!gQ}p@F*D|O%m?1&vsun(OYA(Nj0(1%W~{bjR5laY7@#NiO?8x
zxUTXf&uXo`GjS1i_|NGj09UeCLwaKyr~oMGqRNO(cFkOf_UsUi%R_Nv(ti-t%ZK}-
z2n+W(PvZ<Bpaf>z??0l18U$VLzVm0V9nkoiz<mwvwY(As&mBhtUh8N671AXL+7}BE
z<8wAedJC*stDfr{Otxj_DG|eQg%9`K>6<yo*eu|U%%nMJYueA(bswO7dv6&dZFriK
z<5K0xj1gaQb79TCP{SXDfS;*u+{y%75AMZ2DlX3!QWB#FdOh{)3LHJ*aHFLEodBf)
zU6CBM9#@MftQq5%!~wxZWvX>(a6sw3Db9yANh0_-77a21k}icib4bYmIu*Bwd#Bk6
zr^vejfDjuv8~(|&oLBQGi#S~~8_<^s<Hmhc%wmMDO?el`YrA_S*FVmCh1mMAoBC-I
z=wA>JvJLy6UYr}9<kEmUiKGg4;s?DOVQZ#!jnh`(<9t$KWdNI^{U8d8dhPFe+I;-!
zOjIOKg|g7LoSul1dNLjt#C8IP+R+*QX~}bD8=ouN@~N`YB&Kc=DcqqLrM(i64d0NR
zDz%905jVAmpP-yvBpeLc59&?lC?<u@%^3q}K0TnYYUP#w{J{bZAY#KnxWT%ODbyv)
zO>y%-CKdXjl_agy*4$vOW?*g1$#BDCn<hz15Y~$UX!JTbG9d{1FXH`gA|{ABp?QoP
z^7VoOBF1O#d1wfiiRrz@*zrmey4TBtY4Ed29Mxbi4z1~zlcoIPG*U|V<|l<7>&QeV
z-5{V|TG*bH1cFu5PrnI21G$7aFE>o3*m%LeD3dovJyvm<bP${X>JxZ4E*rFXB*i%0
zYo!G12ywlgaCx23K2n$QW>aC5NWkF@K`pVvPqCIR1(bG`se{;DEMoPy@4e+c-Ekis
z98o?Y#q-757IectvPO*?H=={qqd(l(Av2zb1?=|eILvi0T6JG)BC2H7pX=Y3yX;Gv
zJ9RYin<eNM@<dHj%Ig8+MhS)LDMCj5Ab><al#8MQR81<GOSBJ*=dd2>K1Q%^+G;;O
z!}9wE{JzkkJo{*_RI479L`9D~gg(cbgax14N}ew{=f<GRDawkIZritiq^GiV)j!TN
zd;T9<0Jx}68TVoio6ozVOnEVP)!A|C-fk3i2+bkfC)(~#e2AOFSxC_I%_!&34j>^V
zdl9N>giJc|z2#iU=;2A=!0I9(HU>N#z@ZUwNQ~@RAOQg#wKnLRYJ_JNj}aOOXz;Q_
zvO&F`konkv3X{+68r4uljVx>ah!($S6_fr`$g{26t)n_VI;+@of9iwV2Pz<sy51@|
z=?VxsY_kqJ`ZNB4PX}whz>7i%?i<tvYszziHO`NoY+O+nVc_yRB?j3|bO2;tdGw$L
zk5wTqCUO~=>c#|(?^ZZZ*vk#jR{ji1AZA+{B2&2sh#&P>Jd%2Zp2%5z;U9z`q$9bV
zt!KVAzT=W*$nL7$vBT-FhFLA}bfkwgxFL-W-10&@noX)l{9&(@`CU4?<N9D)GFyBy
zx61KgN~Y{QSt4G&Q3XAEJFTF3A~eq_o2mXDE96Yn5kp9F3(a3)ZGBnp^c#pK)5u}i
zp)|&l&4Uf09Ev-E;OF(7w8-B7oXH9#0K`R$%3q;90tND}UicE)?!)CG?V}w}$m=q(
zLv>*^8k3X+JDU2zt*}_7I7Wd8k&p-D1yJ;bK3gIa@=(Ol-#Zd8Xx0wsn*phhegLN$
z5P=i+eCEh2%H~tUcq1w*3U$2PfbtngBz+yj)mqLYd#6bmqyei7Lfg#V?0X|?YjS0S
zAsb#qmEDGLNO*X_TAMrM%_smh_;ygN%gbA5gA7Q#aPEW<uw@xtw{vuM>^(0s59tFR
z>7X{gZ_l#GYZR+shq2S?i0k|1$hvSsQyKIbGyI+@=$QQq)sMu}2q_lX{C1fBgV_Y6
zX9r<JqUQ&tMwag*a(?)8thj#6p7q)|9wGf8EZq7ud0z>Z6Gxl5=09ha9$XfsjD|+A
zEs7DE|4*$zV?=W`n&_^eDaX95y!1GF9zq^~%OIV+68iEypUHppl*%5J@%OO3B+qhT
zTpurZ`e^TJkE*vLW3RBw4<F`}up_fpnbvif<5>6IVz$JCS|))3BR#O6$Q@BGjSGBm
za8UTG@DN7tQXqqFZP%+}xEEVDK;ZWix@N5zIM6+RM=J$n%df5P)YMc!4iy7~3{9}G
zovF!V)t3{^M}|}fwH+Yw2|RQRqRHetO2*i{>hGEQ(n~;y{5n=T3GVm<ZTP+=^}!;4
zR=BvF8)e^r@Oo=zgKRSMrafQ?2j7}iQ)1Vr_u9rD39lXJDg51(SyYfLp68NB^t$E&
zPaI{U+R=%zf0x(`52d#iB76UHb7@ZRgdJtnABe|y8S0}%7+PI@7i>@@#z%?V?MYgb
z+%8RmVs*)B7ET2K+o5&71U>xEN<?*eoq<ByCh+D8l4d!z*MRz9D&93QktgaJA?4DT
zT;c<zbzPSG-6?V?u2UP(;%7apFV$NkXolj}S=kq~CEMCZMm-t;Lc@00dw*gtm67Yd
z;8s&(ldjyNq^84gyL6qXmxhtRO0S+v`cJqk-c%dCU%WNP5t4B5kz}cnd>3647?I0$
zB>f-mdmg&EcBy9qxABnLKZf_Q9)<4BxTyQGVp1XdvREv_v7Cp<1)-U1Fn6ZdcBZ#b
znw^yCmJ(3EHn*Noj?G#zw2+wZ@>EEBrB4;p3wT2f<da)-?W}`B@powAxQz5gh6s2J
zQb`V!Ai;|SJm%g&m}dNG+<f4+<$V}T7A&Z|<roxsp*%W4K|h*aBEE6x>~uhBqgA~t
zC<@qr+4#RV=OUzMz+O9@eJQFE-{E5!dZTbr%~V<ZY(~lIvI15V8Jav>17$klbMc$1
zSCDIHk(c>f3X;wqW!KJnI2(%UZwDomts9+)3XWtJ;DalwM#{V_EfvJ_^WyS&xt;__
z#dt#pi?GgVndsidQhZEjh9#Q7CjrM~wcSvU+A>Ssy3j(<d|9t9E{XJ@C3I7R^N*Sc
zTslko{UL#~k9t|w1mQZ5o3;k%J7LEREkEKs`S!~^K^XKgQC;(G)4ePvC%<9aEyMU}
z9#8km9L%akjYgDUI8fs`h-mytMr#sq#Omg5hd6|Sw?jiQlXGuEzovO^CfSb2T7R+t
zzxO{QY$%9VU@KZHe90Fqc26M>$_mT((#@bWnM`af;;#-%&>od$u&{mtWW+R99Z~8!
z_AvlKJ(521AsB)7{ByoZ73gK9j(Wo0(CRE|Zd}HNi59`~h5GkW<Xh#0E|o`Zf-6vM
zsCfiDo?D{?n~WB6aCO!*&>Sl>iC}v2=JjlT-`DKQ8|i%73%(1iqT9)K6~{`OsX<Ob
zMM6<}_CHVvj>J^lLDxod#H15a9)x8Igccz)aV}(ibqR}m2LY11#T$-bE59+e3=AnZ
znz$4rfP<v_EUKy40r(=8ad^a4Zo^rHW+5(^6j*UI-EPny-oIqV02rDl53lgwPKieD
z6-2Y2+Wm1!zK$@I{M0wFBjW>m9|xj(4i1cN;vS=}ka1}rL|v(<ln)Pt>HOO;=cfv;
z*f4k6)T#_3;D2_F!(a=<`y3UZAPIAq@d0e>%F?9|#1+hnafM`WNlF-a%Y`nAkn79}
zBB7#%8*ybFx&UkDasNQ^blaL<*ZBZE4iptQn^9$41rfe!krOtZ!FAIT4oFtqmz^!{
zQ|<c(C#I&IYqi0abpOwzAd<a9mN1PmvR<Y?!|DGBi+sT-fEEF)wFsPtGE->`ipDSl
z>u*UG2L|r#4WhT$Sfc_wLy}WLeZiIzjwEj+z6F8wt7Z&*S%lG4^7s}xB8N+N>_=6P
z9$}$^1v#~~S1q*s$6vQ8BUFjJH2;_P^;?{~?cP!Ba~9YXys5OYY8XsIii6`Hl<Ffk
zWIprP$4C51vn{f2q4RP*Jj-<Qf=h?_2Lz@-Eu<);BbX+jf{zj>QL<y5Ccrh4WjR&y
zJbc|M!F~6C7z=#7&luMmH7wJ(*+2qf)I?SuRu0M{h2;iIhAq5SpNi2h3&z+FwD0Q2
zkiL<aKauReO*NhTz=21X8&;>LXv{{d<A)~34}1Cwdu38a$0ie5HiLSM1{))7zuvP2
zva{nD<MPq%64F`|=eM%*&?)wD@>ql|T`$62bpX%$A5^FV3ZR@5O#z3;lP}mqbAvZt
z;w?SA1IIl;fXHseW!CdyLmM;whps8m0A6PfZDXQhV(}$spPsR)8YYkO+qle+S}(fq
z;*41CbXan3e1(0FL?XhYg5=!bg0`&irE7QTIw@BMcfEsI*Bu<46HY`#cCxoc^rrG8
zH{Hp;$^!NqZohYxBf^q>r2o!23sD|e^9IRDo3qZfHB_2m6HOfR1fB6I<6Gn;_b|0;
zrb(^_c&-CXymAg}pDl~v|Ev&TgjXSBm3S8kYWcKNeC-tRdU}ciXxan+IVHsL+(=>t
zbW|@UIO8zxXoRQE`!3`Ejz$GH&?b)b@Lr}G9;&0H!m?7M2~llajwNNIf`2kck^TMi
zkl;!M+CUBn>(%+vxCkB}RCDxYQ!|b>mFgOe@rt*&u(z=zhVO08;BstVj1ziI<TI1>
z9RBAryp}{vHsk<q|NjSa_Z52uR-O1DT{+6q;I>PL9sYvd1`sa*xVXLPiFAH&CUML}
z$a9kXiNJk);coaZ_du6Kf%40qQf*dKimQjRU$v59ZuMchP}a0oFx9sd=+)G(<h$?<
zrj6ec4wR(o4-OS$dksQMcqq>qOrCDOkGXAd{oC8%uQN)xBxzh!^hwS0zb#Cp&s^J3
z0vot<va9glceBm}6%$m)t<NgSIMJd8L1*4$%IbE)?J7E^eaz&rZvO{b3-trjm`M80
zw!Et?kP&BZq|<z`vNn>YwbzlB9_f}$P^CU1csdNSc}7x6us5>i8B`+tV*Ck1EdYQO
z|N9mA*uJGfN?<caedfdAeAuO2S8S35-jR);0n6Vr?3@YW<!SrVlr&=$*@-VC_DSZO
ztkj?=Kkqho7noRY9dIJX*HxW$4yTeDFMJl||4t^vj6&xFHwpF4lS9qo3>ZTJ>Y?lu
zE}M`oFKIHXTFQ1=&*5|o<P>3^@LZ<AQ5^UAx4&Nzgg=;;19YDgVw}c(GicWa>e(A-
z(72DEDZ5{<4Tbqrw+kjY%x~Zv!u{Q9!)rmct_GFic1Zg=HPYLk$o6_~W9olX_<%95
z-gZ(5%paKTH$`2pCneH1-pxMHi`Vk|fMw9kqPkrEd3q!WSE)pEb<uDCZu|e-UsZ1>
zG|%g?ehHGn_A5Z<R)ZNXhByrlo_W48^Dj-{1uu7xUrmXBa@s(HA_2#Je#-}hPrs^B
z!P3%?Xp?2Am{Uh90-uUzGIy1=4R7j+OH4cYz(dI<Vzq{*wZta*8~#Jc?jVH7g%NI=
zCnudD)Q~s6|30Q{!0>NoDkk=)E&E=y4{FJH{Xn<1NV<t?WyXj1zXV-9-T`=YY0WU6
z_5hR=oG%cdC#0klBk|oym71O&0qA+(0HxVYku25^*$&K~%SvzLG1Ua#5YslGGD8^{
zIzev5pyzDH5F;mXUx@xalmt<WjC_iXyVq&^{w>tMD?|aPdsfRe^cBqF=RXQQo--mU
z@UI2svHZw!4baFX9|P2^GXL!SNLg8@PV}2g(BHm=sNJ_^$LcYgJjsU2%cB1g$i1<7
zSjgNzV$J>bys)r7$3lP&)>#V2hj<7QkZ3(o><nyu3J+h{<)R<^W6T)G>svjkfa`sJ
zI|hDlZNf`1iSvz}AMbSbXkAD$XsHJ3aeB1M=#=&2boA^9C>JG(3+lA@Zj=ZIe{0%U
z!l5b(cX%Vs#2-B6Kt9t6+zrk}^Y7zCO9)<Qc4W$WNZ&(%Gg?CF;;89IWb<%{K8~*6
z+26NS;q82p3V7X)L&<NOFjk5y(LPE$xIcIBTt6LF8T5q?m5O?Kv{|U+D%s!UPQEvd
z7lt^&K4F!r!6=m>#Kvv$6<Xo}MAB!uS%Mqnz`o!Ij$70P9EwiYbmRnF%6&6|Di(iI
z4<$s8{KR$FFkG+U>JGB&V@-Z-n3k&WTl!JX?fL$qk2F^{>^)m3$@2`X1Y_Uy(DaC*
zA&&(EWgk60=>3m~-Pea|T{?kHp8SCSORZ#+u>RY=VX~by;>%cR_gTlzk5OLjK^G9>
z$S=~sruK3?c^!?Kj%Ne{Ys8mg<1(FYdwaXXw*46zKmdXPYYd*aQc<x>fJ#sWjAM&*
zTI!=4n3#^Q0QJ5g2*k<`NCXdOij|8--KW2}x$zvYwP}sC3;9E*w6<zw-4#S)g?F{i
zt({NeDJ$+ZImo+pHIQ80<`Hjhjy)TVM}0<t))uo^Yi;sGYy5CKG9ts}anJn9`vBrG
z-@Iy_%!c{9r>j4=d+D^Q&6I0l@7F0s3(mW|%75FvJkg37+cN4a=~LF3<Za%^3kUf^
zA={1C7<*1eZ+!YgcUePkj6;@B9E6~)C3})CjaMM#jnMk)$@!irp=ijt?E@5Dwx0>>
zZiLE?CDqlDKVA0wv1VG`4mwrHb^viCK35Lxo7IUYo{;o@C_~r}>xlhVHwgJKn%s*6
zx7m-pv~1N08=USd1016GH?HpaNLV}g6}dfF;a4nPsg>*q_dMV`pB18F8BJ%{rI=R%
z+5%vNTC0u<s48jF0B!Y9A`=QW)8_yn>>eL*5257H$Yj=sprD`#=^X>Q^-jR!iuP8j
z_|xd^hUn7OHbB@20%Wm>O3KRFfa?AKPj@hJf4DRb2?_B5zCMw}cX?rLgg|Rah2_g>
zwLlWtAYH4x-AB*EquF?s!x06j*s%=J(b3CqsO8dNx5Yz$IbbzApCL1~o<krK@h;G9
z$}NXhbpfP_1ZIOC*y=NBAn{nNy|$>$VwSE*v#tcSpSUR}vsA58_{$fLSGx{#=vNj`
z=X}byM7sv$2>(q$WoVtE&?gc0Gtj=g*k1mA0kHr_ex;3ibw_K$)qM)(Mr>#bTh{{(
zzyu)2PO0Be>ujlgb`_&h`0RE-d9gIRVc7q{`BX)ex7p-BR&}j5-QC?A07x3lVZGD=
zSk^IpUY--bdYcg*Uy{aU5Akvl<qc~*q~pB$hFU%YN=b9lJ%qaArKp_EbgUzqRHP?E
z8hH6Wz<uC5Xyk{&fA6#UL`wWb@@2(TJm~BbvTa@Z{a?1u{aau87?Sut{~@f{xyAx7
z@C(fcGrrgWTbHofq2sH|Bl1e^@+36br#Dqi{DL{2?x$(F*vnbFLKD#aIA2yF++QVG
zvo~u5V5+;6Jp2pyau_PDZa7L?UzRUP5OT_fqptw)PpP5X{O{HJpJH$V6Kw#;T|aMD
z%h?ci2n5Rj;k@p+|5>3Qy`msQ3M`o{IEQ+oP@WhU+hOPSWO=y#=`yTNM0*6_e^oGS
z`BgmuHbq0g^Tl77yC_CVVZW+;qwRfv(p6HtA}+YtAB|O_X*4$P$3C156ctquYBhoK
z!=;FDvB#}16{|_3hy~WD#uwq}w6}qsI2eY_RF5Vk7J~B|pbk-90!Y0)Hy4+>cyClh
z8puxy8W-~;>pMhxjhYr~dd!d=%gQ#Y+}UqWfQjI+nK?yhh+5_6Hlvh8&kixO<U`lD
zm!%>njG6&!e8<B2x|Z?}3rK|%^68_?vF<U@@x(v7Qk=b>Cp;AFH?F;ecTyomO0EDB
z#{e)WjGkQT&0dMEwq0#jZ8e$r)wdMcYw#_RL4)k~D@@KS<-+z!<$3^^A;y`GrckDi
znpnDD^9Rg*0a;m<y_c8dYUyJacWckN!&Moe+1&JmjjsSHC`7jclNrk&p280!_BTx!
zu*T!Pg!|Z-i80|7BStF@eSQb^zRXM+A_wHm=2H|JU-NBm^DX;|(jK6;1f5O!Ubu_G
zE(a|hoDE672qRuA>=jRE+_HQjS^4^I^0s){jymGq)Gx!fZmx{{g*}gO7#3TpC?Y-2
z{kOxnQTT{xhqtedT$wY1PH|h#SRwnaZI7oj&rZHT567yB@lUKwpf_H}e=Vq~mnDAU
z;70ku-9v+WEGQQ2<L7QD#sOKEsCn`*z443h33kL#3<~7OLt*xiUW*b&(^gUqQKwg*
zZS@8l!WDpeI7hTU=->uw(`v9`wp$l;xf<mpWshq`&iP8fFC^gk%TXN|w7RhV)PzF^
zxF13J(uq8cEPx?oid}91!jDfEFVBx6fHB%18#G?+q&Ijd8qJO91Bby39s`?Vgl5Or
z7|*~|Oq4@|!0~uNQT?%KCyrg?4d2a-GAfpMkayEx>%PsLx++w+^G=xZia3nV7Z(>k
zVO<s%2J+aub+<wC)$%wWuD3HBO+9+KZ9!oYSf-kH=xr~xr_?o$LOW?L){Eo)vDMxH
z^J=0}SAayCnKHUm1o)LQh;rEN+OZ5U9jbBB&7Nn{xTEo13+Aw0Lv+|30k4^IppLFX
zZCg8N3s`BS^<81w#C9(2&4|&}j54FVlI{8_S#G@eHX=%TL>g!9@@{I#Zx84u1@+!e
z1a`D>eb<d!M6yL`{E5|Coann^hs_lp5i}$uevh-7E_;>7qSAFOVwuc2%`o9Kq!c2g
zX3j(`6<mK`t=X>SnUr5knO(}^KQ=d2ZAEk4Q5ltaM6vvogp(DFmx{lopQx1ikAD4c
zcGN4eek&z@adRLkZ<Hf@IyjP{VzB5#0raoH<SqE!4d6wXEsnHwILMLp9*Ny@4)N6?
zZW)4?99wjoHAEYx*#DXTy%g@r!$gyVX|;18phoi;BIbX-R^Ip(ep^e*X@P+K4yqQU
z7-nA3V6{lLLN-ZO`H3WO6tIyf7h}1n|MGCTGw0yscWG{~2A5G^K0~gir1`j7YrTwR
zb`}Xxg=ljqK@4S<CybdfNwtM{d8I5^cdczrnIDs-q~ssh4;BFC0937aSW9PYwGCkK
z9ZA3qVP;+c$orKKZazw_KS9ufUh7gb>q1(oc>#*N{czCp>0tSSx^&n1r(p$_s{|_M
zu|IvuJ*}_$^*YUlSAGrd4}}6jn-12Rti{<dX7RzRYc2pR&k-AD2&ebAL%~x<ZViM_
zpm1JBgC`>#iI-(|JY~I@$SF#RgIe6H==w<DupK~y>gCP2z@H&tG|fS0c|Msju2}rp
z+R?VkOQ<3dQb+N$P~C^@x6Y1prV|I)L%_dI<SW<-clYBM)D-5;S(6!~%i~n|*rpzx
z!OD@e|7LX@EU@9~j7DLkp!|P3l^jRqya(k@nysda|9^bF1yEew(ls0;0RjXKZb5^4
z2<{d%xI=;r?(PyGxXYj+xVyUtcXtTxE`xlB<azGBZ`EIa)hTLfYN&J0-o3k5cdxy6
zp;lk!1DWw!Y>|nv7lPZS2*7QETEasZz@G2@3E=b&4e|WrJqfwJ35@%_+@q<pi%A~p
z2;&j7*Fcn~lTV$<l`HJ-;=^N;owpPFGZS`>O3rg5D`0n<Z8z6q#AG>}VUAX}@hLTT
z!{w5rT%g@zCfhzyKCvvd!+<1_FA4eRf?w6sV70=xGhba;)?qhodA!`*t;06lALpJl
z19%0vEa`un!PyDkF7Qp>uR}K!6FNssU)!m4th}6y5&a<arhtF=3r`pptGX-Ug*>g~
zq=!j%p}2q|!Ay5P?W(tj%c6!vFR`d37tBRg1^@UOHjkBL$)Hk&lT29PO8K$HTiOs0
z9lH?w(E-xVS8)VX|7T}Ua8#Q`D`EF6wNp226Nxv9H|IA}ua1EX@!=ETn6AdU*GeUu
zD!YJjo@_o<I#F6HV9pSG@y_Th=90Ys9=wQ-h9L(lhlwFA&j}kEoOI95jOS-d4=vOb
zZOrByWc&U{a+mYU>sMBh_@bg{an{7pZbQQtPuH%w33GGHOLI%$B~H)Fx%q;U5?AHR
z!`-wr*UN%5*M5axdaZ8(VbTPE?>M2b^>K5iUH1T&?3s|PXO&n`K<8xH8GwBD2ssY1
z^wj}RsGm1F9b+9xkpaZpns^h}vp%X#fL6{(`%{IL3zg9F%V)(o!;XUQN<D?aq?e8b
zIWr9<?!!R$;I+qu8I$Sc`}{pEf1%HmlNYS&;Ms<&Mwa?&$<p2J{2Ytk7REC`7^feY
zEvkmW42I%>2RFKxrZNlob1D`gF%EKdTD+yx?6VO$=0R;I=Z;#2`Tn}X^&)B2?6y~T
zjm(KNq&q0G+V}f9D<3e)rlp|C1)xtUnM`6Y+Re+7AG%pcrX7Yf-Zw~J>zdcchz+pQ
z&;N?niket0d;nxH`x=7Yr7vvKehUD0CYT|AR-1q=vl@)<0mq|nO`YmzW5gRmUKvVH
zUKFfFA)<OMMSuClNs)73?%VW{oczp5o1j4|3b-L~AxO6^TLrAhAh<`3=b0!CXN`5s
zo|UZED!f!ZL;=61NQ}gU1(cS_u(j=@471MwZtnmBRf_zyC84o{Z8C>dJaERlniSZC
zXbGPZFW0YzOHXUA!iYu~E*|KNYf{R-6>Pm7vg(gzon6(!V$?A|wD}y55H<hn4P*M=
zX~Wr=$7j+dKmgH#LfL_#LYl3&`c_(nvbN-*MB36LYbNK=bj}gbrPFW|*gf+rfS+>H
zxR$FZRkZ+4l+&(bnUoCB=*nlAk<7qPYpttObLZ1_M&BT@-pwrJCaFWKBxBLmnaM-5
zo90(QeP6#1#=qO)^R|C}@kkcfBOHpuA63q;2g1VY%%aRAoO<=T&z#=NGa!d{pXu9e
z#+mN>X}p|fl8u=v`gv2(sr0k+6VdxAU8YahsV-C4vFE3myUIh(KLq00`r^6SfzuL9
zLWEwjZueR8=8!?5ll*(-c|j`;Px9MMt8qVn2-h5s8|O8)2Qw3PO;L3H1O%I%Ql-=R
zmhkkZhI;sKf5+s#(sanQ9nxGvx<7G5BKRo(hTx+fy<rZdt?6=Fa~$Agj`1>dXj>Qr
zCdgenYu|oje23jvP+%A3QpjC$2&!PIcQ|x1wB09bX0WiDugV8@l`Jcm7qBQL5GaPk
z%{Lq^H4caqdng&&C{GeOE!wP~Dm^r04q#7rt(WwFGP7a%3<6FJJ?g`GUG-t1{YBc*
z=aXdf2*}x&`u&LEfy2*QAte>?2$3%k$O(KF7@~S~YVWB~V}qP{-=$CL!*J9|@1o0i
zK-HSJMP!`DZFbjNjxHC~0-AW+%7dI`XTp+pI-UZSqjp~XG0+EYm5`dKaj2esm>(zk
zI?M)$#rlbPD-I3^SRpYl{@mjol*S<WnyAzoB{#A&)mNwL)P#;FL>=+3H64&ODbv-c
zIEEmEj~*%}oH1S4RTVk===y+YIJ?5&jt(1_@FPG*tBofBG{cgmlRp$!(DY{)+KO^;
zGQNNRK0#;7XLy<zNdD-w>PGu>q*8WmvI@fvByh(F%-jkpk-o<&CbLAb?73>`y6?>o
z&%09=uHz4Qsw&HALk^Yruq<XEkW#OQ!fG!iZA{YncpxoPb~$dj+bjHeTVPkvcDw3_
zB2sL#^9do;*<jNq(cc3g^Y-xPCFV=`Qv+7mC#koH+Ba<ej`0<;i~F7XEvU%NW<PE`
zhXw51bJZcF|9ME^<}A|PC%X?%u1?`jv|n)(ecW@+sSp3Eew4)&LKzxQ7Bzja{8GW_
zH19PSsaESn_7G=Tol74>XC$nI_rKSVM}&{a&NE)L1tq6ADUSe)nN@i0@}(1E)9{%~
z%PS)F-RMN`2~b?7v81~otyu$&Bu(;Wi`fi=dSlH!UmmcMV}dZK`a4nhwI~(fJhunI
zWY39>R3Y6@sigew0U)u^RmbYRVAk*XH6h1ufs6zsKm+k#-?Qjj7>f7jAJqXvGu?tW
zwTZ~Do6NGdsgG95$md{TCc_`2p{wk!KX+WPmv7dZ+CNiu7)Q+uY}cSRjZ&IxK2mZr
zNU9N~*LgfksGztXFuVl>m427^5@F<09Q7M^wF3gKG`F|6XQf`dg<_($HXCAd6{5Pl
z>l^l4DnEyoS+vqh%93v1HTB9vAJf0V0)(%6KK?N)S|5DI*ic9M-|DRIOQn-(Bb264
z=mbANxs9iMRwl}|<75D{xl|3-yCfR;r;E?y5O6Hi=)}+M&IWEdVTccOU)eb4L4LvN
zHN%@F^xC7~LI`5{In{>@`Qn)&y+!pEbFwJ@jIKxnu3LU5_Jfx0FV9^WyuapcSGl0Y
z3%#P`Pg1C^H>oSb>%ur~EO|8qQu*UBd@xCWOsCX{f?hj>XiF+U+qj$tzeCg(B13s}
zzJU|+#1Lw@AQRY>PSUblq7~!NQ7w!Ggm5+8KJA!aVe2@z&u@r=mIAr8iFR|6Eu#Ta
zaRSBuBhxccf9K4I*z3OBxp}$5PBto#$W;QMgXke+$tAJnn{QYbs}_H?SG3dar(4Vy
z$^{5z(=0L?DhcMcw$)LG<hccGhBYP=l()cY8i-d>l)|en$Qzu>kv6Q@i`F4_Hyv*Q
zDkYMDXm33!`-8OU+TKl1!OGN>VhI^D*Y}#kg2XYyQP0}}-Tva*Wg0MedQWXyM;dVL
zz{a%rMk}*<o(Fd`&5LDw{i;v<W&G!^D}^1K@tT(9^Qa69yLcdwKGY-kajaQMz1XuD
zKem>&k8)O4mdG=+LPLtu)Rn8As`t|M%k05sud_C|R`17=QJIB7QqPjh%;iG5EWxZQ
zI*HG3@*l6!++|XJ76)ekijx1J%VQY2nA1z4wr<vgai(+rfdwNF)N#XwFXf^AY5?=$
zP<`u!rut_1mc?OF@+V_-Exx5m;7O}vfbm`;mz?Fd1AP4LUka1D-yGdbKB4`Z)%e<<
zZKtAED5yp{wyax^^c&Tsy@ZS>g-=c=ndg(`Y5nHQ_aPt1wgpq3(BfGkw6Rhs0ozl~
zOeBnT+7*;)q?<ZlLBnw)qw#$5J53?xZFaL?Olq%}0tOR53N0tM$hUlS4`BXpTp{ss
zMwPJN9F!Z|C2n6qe1bT?_%WFEZq?S9njS>(gWOFa1dE=||JRhNE`9BiV={Z$9YDei
zrV(JO6f_!1)1DSh-7Fa(rPpbyqZ<m(GYp||e&(G6I;1r*gq>-;ZvDWPwPK$+fJh(-
z)c1-7b(T;vy`COf^m~wcZ2=Rh?r*wK*rXMjRH@PTk0S?U1Vx{92?5o2TE*8ptJgqI
zL`(fnA(<uabzx`F-Kt{DRzAC!rDaKW<V3rN1N*J?yn}Kthsr}q2~i_&IZ`IjH&k%w
z6KIZDs5YYl7%$NPWkzk<3ZQ2^OLi9w4~<g{s1&Kab7kCXF1bDeG?X_-Glb9DU{F3X
zMC{myRlftuVKKnhn%6P_zpYv|j1R{C=_erA+1oI)zqtAdcDAwxeaMwE>pPg|z(<bg
z!pPwM>IdeI2rk$2xJo!f%NK+emP9!X$E>`IxT8s>k$)T&ZotO=HGC2}HkomIOsAZ}
za})Swd@_c*w;k3<(Ri-h+D73ZA%@jmnX%Y$8{>@Y^`+82ZYecq`sM)0R9R70en;;P
z+Uydc7x$Aq9^tmyE2+jMK8<MtHiO2A+lDNPJtuALyRM4AaI8;~WX#ivf=aZ8O9hrD
zU4^g^Z;ukh6)fj%h0vO7`BROn4<RM=O8n@)iNGZ3Xorc(Y6w<wAF<&#*^kPjnPN>4
zwWNKUGN@-Ja6(X9d2&bk4ny{1hIx$_-ClUExy1^QjnEs$9o0|dgq6HdZ!VMp?`2Oj
zh&SDeEEE7du@DVa0*{Na&EUx`R$Sy2wF~Yf*M0{|F}SfB?cz~;6DIB9B0#_Y=d?w+
zqp3};(GcFiPx{5^;rz7zlJYI(Le^@b2}4Xdu3hreFXEfVi@+|FB6Bja(7e;ew{S&2
z-wpDRqiRAc0tcE@;v{l8n1&rM%+>^qO9*sn><v;_{1^n}i^*=ad*wJ0CW(EPV(#KS
zdgrQZQfi`C96lfg{DE!)u;>qKMP!7aHYW)ZBlAz4s5=p+{GoXoaqQE!FVS|V-^H=Y
zDkX|N!&`Dx6ljOf!(ExY)p498*N1}?^&sr&8lHhDfeO?gwv_56O0HFQ>v1foQjy5I
zcVUZV1_)Ok8PE95hYR>Y&YKf8HhOsL4gn3Vt*l*xpNwWT$mE1F<uL(F_;|F?TGMnY
z<kW%x1X==kBrk)*dWHnE<rB{R94|{OzJ~P%PtQPeY0^J%@H?{)um9Otr^}g2Xtx5v
zb0qrb67a>a#ygSCqAWE0Rx%_!02XPWL7u?M9ih2SF-bIQ*O2@Z&A&<rfGp=KYrnp)
z>G`R}p#SPaqUAPAXrL@p#fNZb`z2%Zj^7s#n)-8ef6zU-JSWt9qc<v^AJ~V<`<=8+
zM-3SN!h$^^f$NmD7FLIMzbeYfBX{`gp8SUkE(c}G57-PTD4>8F$FWMfP|q3v4NM|r
z*4-)9!sUH4YaWY8yXN1SkAofgTbah@`>+1{7XN)68dQl3!=YfP+k}&+fDykZoHPbr
z1bNPww3*aq%IDFIr5atWtkEOfK%c3S7@d`L>|Wn@ZN>x->;L28@(_G1FkGKzAzMRk
zpL4(yIymf6-A#X&3Awx~^i{!2<VWtJ62dRHu_UW(tEt9>Zn(%vuS@&EWEjOZ=5ozw
z)^|nYwmHium85@PLWzQ@XsJ7_JY|mSz5WUImb4vocz>>cs!{Ae6ZrGpD!UeAJ69!V
z6Pk?Y*1JEG_Kf6x?B7N1#Is&G+#r&II$NRmU$>yP<XC#hAToIgp#g@J!2pLZ{MX`W
zp^<s*Xpb8_A!7-&3|<CH(sH{ktk{BN=ffQ~yt-k^#Qzu!NihPO)c$hi`QMj95cS4l
zMR4F8Z($B9N#T5}YK+jtaqx*AC1i%YU!99RBhJ|+HI_AHDA{=?i2koZNhm<eX|LF^
zCwVeorE|U=NZF(^;g~i5cg_Stz#@&L=5h^aOrlilqfvh$HHnXi>aaVUkrM?8$A^3X
z8&c2I6{$by!c9y3eFJ^ypbvItr*%5V-E|?D;(xyUYfr!-%F;0BT7KR7ct1>4B%u+c
zeq^XJNT@NHtNHzjMp*KF7N{&mY-8{r)rW!kHdYNf{5y=mnYIah4Go>GrH7dqUv~aM
zr$2NMs#0pkP0qBNYkcCO1ZXgBRt`Nhy%vrG2>rw6`@q{ME+66`{VmVWF2I5TL_BGh
zO2Y+xDer#p>{%Au-&ciwH;T$yE5>&sh=Ddz5EW6PiH`((tY+_Ve2oDMmn`@{uOKfD
zeP~p&NX-)dX_U*>;B=Ba(wG)Ii_-9lMgvIDM5Kam@sb3J<5+?4X}VKW>G_X6O3*=r
z!z=~;G=gzpv@EaP$hL~y+Uc07=<bwD*@LF!YXbfl5s5ZJVop7sHe}c{M!*oZ%zt7x
zE>)lF&pH=4dp4EZ67`0fy6Jwf{mUNDf3h4npS!erc3@kt%2ynYz=?L0hmdZ4bMblh
zwcXIaKl!`J$f<k+%7$fS70ewP&{mtHELC~RA<5?b;5TiY=#OeQ3@52;GB@dM``*4W
z=kLmAcVQ=@g9XCkSOFjW1Hno``|F3AcL=MSzljEA?+NEmAb_Os@=Bz|mnFakPbu_p
zz_pnHJ!{p8IheOH^{hN$Hvhc+f701MtKkMs0&5dE?X3>x@zss=jnEDK3HK_bQ*}Dx
z%g1Zb4bcfuaEwlPzHi@xa!m^CUiu58qbH*3f?U9}=7FFu`RoSZTgcWMP^FR_X!i@=
zRi5`;jylo)3B&)~$_JA~er0-&+B;NR0t32<WBj)el_W>%t0s)%T~6asuQe~2C!YI=
z)ylQTjtIZfe)?WX<%3wwhRgzJo`f>k6uv@Jc2U4~QDLC#>+2aVnNda2fR{v&)my7>
zPN(;@5H8glB|r=T4>1YxDak9akhLcdZT(#$|Hq>Vd_t25@TqI{xv@FHZqt3$iq1DL
zcv~8EeSFgDE2oqzWu*<vkJ0iOGmM8T_L}e;{%X2kI+CuX3p&Bf<Wv<QjV@?W)<<=7
z&g`VN-RWCIpq<7~I-H2$_c5^JQElp=)JyD}wq!tM*K2py>k15u7wtW~Sa3Y2>Ee>`
z5=(o=F3qdZ9vK$KFReZ!LW6ALe;+Xm9;$k{hQ3NMOX}biLf5w!$6<+jS&=ws&&V)n
zXFKksA!}~G`k;~qdD1mUn|`3TF!3nn9OvHX279%)6KKFHALV|mf2A`}pdd!?yZmj&
zUU~AuuQb0<hP&!bwVo?+3sn=%Xj%Y2oG0%6=aX_r*>q(Z&{rAo*?7{6s}zK<iq^}j
zCJ)ql+Y?~|I+hXZ-7}ZBOiOn|kJ0}L2LJgWAdHb`UK$@@nV|h7`8_XgkbpY_YwZq@
zrzE{E#oF9ZCP(yb^-+B(19xM#T4>wa-M4!}ww8`Oa6_|>(m4{C4p;h`p5MKyB<E1R
z-jp!&@)70{%XwkfhUgyuWIo*eMvW7AHqmHXi8h}XMu#ik4KAOnJ(tpLX=6BY``|AT
zB}W8pkaoZYoon+tMNIwgP_n)7ac<H4AmF4%fzTEDJdhb#KX6cis2uJyn;0a&A1ZSK
z*&{iH$j>o2M50Cfv=ZEzjPzm)e$6MCX1CV)merA2xhL-|hxHzno4LOXc*b(ocG{U=
zL!HV|AMjdm@8xgQxN=8Knd0BdV<BX*LLG6&46CMl$iYGD*gR24@#uyoRvxviRVEaQ
zG86s_4E{5C&;Abv9e5newk@SR4SKgdt`{q`<d=o7$75*M2-#E<&<Y1FdJ(kerRl$|
zABtu8=B}oeglP=i!|9xNjdOaar0zu+rpAgwn+U&v3=O_H<Wx+3t2@v1MO^Yn3;fPt
zvGee9!~NJ4|A(&8K76oR!-*4v>a;4^c2P3d!Q=I$Ykc@WVe;8`le|Q*)(yjK$~#by
zhCf;Ujv)3r2n{M1>KG>|JP1cwi~w%s=bQ1n!2W_w$pY?bCez)~zAK4|X2Y**vfogf
zDW3|2W?OA!i`^%EDOHAdD?e>Y7wss#LGboI;`Dn#OFRb5O%^&-?QRS0PyR5OP8>3R
zw#^43sDQw~(enTNmDh!xWTA*kr2!$`(5@1fhzeJGHHZ&zoHUw#<^u=6r&`Jcd5UDR
zd*Hf*ZYI`N{7XZ)U-_f7d+GQ`#(mrl;K$Q>D^_l;G~DG&Y8WX%0n23eq>-W^2e=RU
z5!m(m?ZNMyX2U+!gL7{i3_e*t+xhEV>HqbK5(v<zRfDn!0tX`V=zwFcN#J|OeFUHq
z3FM{kJv=a8jPBPOY|^0MM;uRE`hH5yC?exBjRJSnG3zC`oEJKs9pCM$In6_axC^Q4
z@VBe^hVW^(FVv^Fo&)7q0dKeAt{0@Dkpj@@zxXlL-Xwo)LIHD?ci5IiLk6Gj?p|Io
z3;%z5>N_d`8fiZj=QZ@`u3vn(!Cs>szPD#c_W8ZAX5~NPmyVLYcl5^ogx8rW1WSAV
zQ0?aJiG#z~d(bEOKAjsi8qg&4`*9ofN(oXPN*@jMHc!@b`w=k5vNPE<M0Bb`luitK
zR~W#g-DX$QUY19VR%;Htl)n>T!`njx;G`9UeFK=O8IM^QOyc>7nXmrESrR13&(_1%
zv~ysq1nX=i;vWRw3lbQY)Oq!<765>>b8zg{iw0KH{k;%98SeR4`2!;Ci=(U=?(Lnm
zd!(+x^+bGAO9J-txT#u_;tn+js&OfWCKW8p7^3fnCTN1)$Bb+g$Q3v-=y|uueBQ(F
zq_L_!eut^~T=o?UUE%}u$l@pnZvH;`i<hQ@(=sxZrA*CLW%ECF6&VV952*zX6hF~A
z&sp-cC4uCny;ZGfD+=;3TQP5Pno1rDzP30wblQnudF;TiX{^LkebTHAFf6o?B7_nr
zYyBO#baW_{ZtQTfuN-C@e&mpM2qtrMNhZ#S;l}7!UP!>JaN5(VcKG&MG<hWdP0n9e
zM9$}9AV2)Rd-FtljjCe>i65&4ez^vH40$KQr#24CwEEQSMtrr<D_<~&+)>>aB773j
zv9H5;>If$<D2o}#CY0v)`FF?%#5mTm=&yX}c`u;`=UdUcC!d4HB&(0}5hupn+xJE-
z5z-sX|9TjK1B9YfSHWu-T{<hu#HUG@gQawF8W8kLIIZ*tmy1*RuGlKB&D_RjnT5OS
z68G%6f>Y4NF8gh_Lrc8)9j%$N%z(;FeE(k12VT5e3b?DfnIW9<OqUfz$6D^|c?$?f
z(}clNX!_L-=WhMwj$6lj<AQxDnK^FeqLMd_(p9$y_fcP(=qkzf*O(E=@u8OS`y9I!
z8k_rmri>N-fO*(hdwTQ43Sf!`l&~CP#MXDeXUMc*TQT^iQW-AUiFI@hEDLjoEya0F
zc@<O5f{>C_fkWnu50NLT%~Z`yD&`3owd&+^WK;3fbF-Zb!+72*C0v#9+-A>jgT{V}
zT0Ur@5c4foq)1jjmh)G8J)XH>KW#5f7iS01mEZB<m;d%IEsSEfUB^A@U1IQCadL2e
zTHWS<On(Y+<=d}Db}Dap!Ykl6_geC@+S&G*VdOhTT)R#poEb59Xo|Yl9@3SUw@wNG
zO45=Jiw`xqD;zLT`V$x!UIZ5ggb@sC`k0PhSM=H!F6VsPM<$+E%Drn^nuod*N4dlo
zkOy(Ehtfuf6XA8jCrBft83b$CYRrG5%5CnF+K~Qe+1KttMDI2kSJ{8M=RScEz3bpr
zwp+g9FkbUx@U;9lZrHsQcIcCCh$r@;smJNDeH_N_?BS%7cFW*u4wao8=UoA0pjvgL
zx59%xDzJrZhTgta*E*S@RCn$Cy7fBkR&v5qIIFY1m9CM`f;*jD@9AMnw4RV#=Ru%x
zZ!aVu;fgptW90Sg5YMv*#d?}?TleL2mr~+ydui!B#OcSZ%U)9}UhR4(a!U;k5ba%z
zkf+Rdk<$IRjr_W#wxu4CTWj_gUN79eoU4uVQLw{ob8zw2{cB@c%Z)KDEvcBpgJf~R
zKo&304t2nBj%j-j%|Ce&C_e>ui3h?@4(tI~^J>6T+do~;K#M-${xD+`3fGzqNzTqr
z`{aJOh*mA0>QDamo!h52kfTPm^S!ESUxvq5*TsOfOLdH=gBa6p)C;(-%^}pVMi2Gr
zYwfZ;fvX~~Mw6J5TmnqFnp8slBPh<B?p~G0q76B8n_er4uGFb6(~!04^q#lcjfg#S
z{Ye`gBOi{P%FV$~fhSq7PvdjnOgK4eS5&2#IqYROwrVh){<01FXbsx>4lqCU>UcJ9
zM7IhLZ5+;q&|4358?!_cAPHUyS`Dzx%E}PLl9lrNT7Iq%YFa<$acJq~V;LDDO4-8A
zTvsbmDE9Nx47V@81)chMm@S(hoEg336~DylG#p~B?4P_$zY^6pEQ)Q8shs71gfPf%
zlTH+D7;evuL~6dysu+n(Ev=9*u5>_`2gF@y2RiK0C8VIQ`d^yNxj1sI!K=k1A<Pt9
zm_^d4@_pln_H>I~A`WwJh@GoHITlgRX)qtF(kJ=W1QjtyUeSu~;}O~Nx&`B&<D}f0
zL+BR_nlDk;sSKJ44><tuUqUsd{QfcmmRj>-1~zGeOyBv60znMjyR8heGH~|6@{U61
zq{GX<6fv=VA1h-2QU{tXN`}Pe8wrzPe{2QRCpFMH$7dm}w;xEA5f0KOYY#jtA+xsg
zpL!ujOK3wgm~i)G!=8JqTuliK6ZbwHekBnl{4HLAQl95By;4sn{F(s)4F?mSTNBH&
zO(Iy|c%IhkI#{}#t@84l-<O@XTpj~QVa;x=W|TgAic2a;qn5`LM`Jwi(tRT~@j>KL
zIV7XwP}lI~LwAWzjE<V_+#|!2o1yIfx`(DzHSAhkz<Qb(y;OM9O^>NptJmX<S%k^r
zWh~6$a5H7W6z}U=k4;VfeQaLYyK^d_@8QM!bQM12npWL<8^LhXo<VIWDQh?&OR6wY
z^DQBaQU`nw%%<`Eq1Vyztl60qKIx`;qJzl{*(>ep;_g1-qEmm7`tZ0*ii(G;atz2`
ziDs3<x9v`%jNAnW73_JK(JPT3duQi72w8}L>)f@?j>$|vO{PRoe%wzO);>93OAjbL
z6NTNL=tdS3-)_@IB&O|_t~yv27t{W9M0(>Nhl%HV5q46pFlfxq@PNJc5%D85Jc{Ix
z8rYMLBc7K<HPGq+>?1LSF2A=~wV1q_K?}yE8}!N6gMpHk%$72MUZA=<SpUEhfKRl7
z+DXT$;|sOaiee>j<d?H{yGTdv$)^Y8(cB(OIU+}J3xA4o4)44@rgbmsc(#RmxHN}b
z;!4@u)94niOEu@|v^e##uyOnsI6xeSo&}G5)dcZzd+uF7bS-&LOR~+_+b^*33N85y
zuM;YtMo>E=F^*k9#Q}SKjZQZ<n!2ue7QN{ajJYS}vg^8Hg>Sk`1{I_TxaoN=4A{%#
zVcy7vh`O)bWy{!dT{CFnc3xoPFDxfHWj(DDSvmBBfl8RM9GwB?;qu-&yHzgam8x~i
z)?vtn`%RERQ+U*Yne9>sglOPn97}nYq<X%H_q7;}_R#NRsh_l<D<~Td=W5i?g+Whk
zBYvs_VRcRLf|frU1&vo*R~?=Rl{KbnC)x>9mA$Cl4g0|v$|;@s8k|&{(*C$=)zO1O
z0PIjZg&!blf0s2LlMaUt6n3!rP842=ouy6--fpks%hDx#Bxfy2%^4lztZb;ssmL-^
z84E92xYaGvbA#|Wi@Q&ZG(39RzOtT=4nO)V>0UZotXM<R@*mQvsXxXp7){?XDq_9n
z24*0#2G-Xn8e5BK#Nb8UNI)6r5`Spn4MFopq1Sy$2#w~6dzB~Lk@kaFOpmr_6|ouL
zoO<?@9K%uuq-+);skU1Z`>6lPQCPNiA?<NdcBbqSSM^}=@v5ojI~NT|;{so6?i1SD
z*RS}!6=n-M5I0C>>8~Yo#OdtCyXEs`D%_#)lLeFT3}N+NcjU;OmDbLEi^cKshh@s*
z`)){0g5ogwl7qb-@uh>A_kKIg-?}Gyp!bBg(|T2jeN8CCKt?>yNc$7g<6EHa;PvhJ
zO}gw6oBfsgjoa#zmR~rB;PQr9WNSl_9EqO}hJ{>KBNBqgDy`z8$X3KvoxQ8#?IM8)
z!-$_axx5*$mC$9&Zq|wN1UR8CUzq?%y99XAN5PuqiOrX1GJYYxP){z24)+yH?x8v_
z?;oQjub4Z#NXzWL9!o8P)eCtJT~P*ZP95f2^x+H|l8EU87&WmQYnyu5hnx=Dq=zzH
zszc*tik8dRpK+zU6ZFC$3^7j5g)^RJ!R)wF`S|Y}VnA^x=h&yYP-kYPCVrvuZpdip
z4r$(u*lfpQKF1gO1r~lc5nQ)`*Z^5bh}W#TEHx@a;k)&f){~i<vRP{L<?BV;!T^Js
zBPDa%Y#K9FLqpmD-Sl0b4)@xDgxwp6_C)!%bbWcgL1TDadBj#j_c>RdLybiS;fU+|
zWZ391Ak#Hy{F=YCEK-~D`R>m4Qs@_l*p1yy)Jbi}%BxlaonjNL_TvD#S2NII=6KVi
zWWuMJsOvM%U<g)u^ypY0ysob<($fofs+)1@xbGr$ydN2o>A&5;otXMeqIkY+Iy9Yl
zLl5C<q6vINeo}OfE1ifN4)h{;R6QwA8&W?qL_VA1Y*~y;TlWIWtVyVC^WAFG3rGc}
zlAK!<Ku?Aa`W+&;y%SW*zhtOqqDFROxLkU-qmXHjAf~5!6w!<K^^v~vcoST55U=Kz
zJ4u2gw;?a(pcC99wVj*jd}ve8^VObfY1`Dpw{$0lD&0S{P-D}R2^;*U07*gtuN6<_
zW7s)FA?}=<l21r8d*>o17IG!pfT<qBw|%fwc~-r3wM@?Me&t!28L;%|;Xug2Lj-Bp
zU?^NWO;Wq6ym+U1B)u#67>^8Y|0yinDqM_YiW@bNA5lVwhT(J)5>Z*(eCiC*iP2S4
zRrGP7-dCEbeQIf3YL}t4zwfqHvt@Pe7(z6Cz}GF-|6zbgyk907^3-NoS~`c;ZHN#j
zf_}->ljK3M1Y-GHLJUTic(p10(N){Y!SYw3=#cfz=M$8U6_nCXDJ~}kUGmwoKO6dt
zXRQeEu0M-tP!V`xd~(Cd7)Ed4`YqfS-ku(L*EovnF*gX`O%}&lZ6YoayHi135cye(
zvYCNK6V8FEW5g$-+Rl}F`tEP)`J(bFuuA|*6sU<!`g-$bGGk4%I$nm9?3W{*vcmmt
zA-r9(0)3hFhMnB&hR5Vhvl+)>W=Z;v3$M3`R*<AO`nuYrfVOtrJheV;=A)4EmcbQ1
zttf4}J8Z>ORa(nd{i}mFg?~B%iRkcn?burD<3U~(pl+$5q$?h`^q%BP-s3>4^!n}h
zVc5ry!)7<`f>skX6s`m)wfb}GD*oKHN_V__X7dd<iRB(OT|bZ3wO3p@9xszhdLQ<z
z(jgj-d1Go~+WjiiSJS!33rp)=c63MCE*H68hpDA5nolU{4Ch>uC&aTGL%WlZ$ccKj
zEMSH7_<7LXnGby5Y|N&Qd54e%4aC-b>$i%zD1A982vn_a;jaDNQN9&q;X;){QV)D*
z46C;~OTFdTt~J>tJY-Tdh9`S==`s}8xH}5tKGy-&w2pzo%jhGI31S@(Qn9a7O5C@L
z2EbkJeTv+a{P7xEj{l*`4Ir;kUZ><kqsv7JprflryE%SA|G=G{T;w)br`6(*+Z-6g
zjS&ov%nrST_OE*&&8{@NND0_8?+y#t3k~gA-)E=GG`>_se0MH#B&h#ojUQN1m?V1N
zPicq(?zqoaTO6d4i8+o+ldCk32E_!OWGA8lpaZ!oam_TTBmy0Vft<L*>dqUT`)dso
z2p0F3tw%POxR9D1qyR)bEg|_ip~CS&_KVTNa5rgcs_YaEGM3nFGN3b1*@b`|-RdHf
z`9|1s{4}+$&RM(HxjX_9_BF7Cfij7N8ni6TGQz?v4PFh=wq@WktQX~d>#DMPX8-r<
zlh!~vP%U9<gwkMFaNw|#D~cyY6K@(S?cACzP{qy8#8|7x0ib-|TOXho-HbPHXCz+?
zTZRg_>S6?-Y}GKhjK>Fh2vZ%@@%U7X-{yPysVxQ^vv0Aou-|W@oRe6Nr>FVL7~V>~
z@0*ZYw<LGfd1Hm<_kIQN<vfDQA4Fn9Zr#)I;FgiNtXk($$tZ8#c4+2e0H%<vOoO>P
zdc5)9g3K^lOs|XkZ4{&g>0=LwWO|icvIaT=AfM5(iqLxVr~>1&$ezN7*Qd-w(NjmW
zq;j%~PwkAVv6}Nmdi!E#aS>_^DA*GLs-$%4vv#^+pBPADCei@kTlEA_j_GYslVSs%
z-|zDja32N_*ST*Ca_U@xkxOWT%}bG7HtPja87Q|AFo_mATY?M#vf<I{+^^;t)DHGM
z4@B2-nL!VgJ_3DRxt2SM8`XOVRi5zX>9duHIx2w;5eM5bdOx(X@6ZcXSgxe82MNTr
z=s|36-*6jfQn7!lEHbqZV*SNZTZ~^&u^_7p8e*trIU#;_Hss7uJWo1{XQf~cjy?CE
z3>n&68Y%W`Eml}^yamSB(}R>$_gP{pl!ij+<X`U)!!9cs4yUVn!4R(d3#Www)9EcL
zDhK2Fhe6_`Yu;<PA=Q`)<-Xz(yTf7P+RK4n&xP!f$(b44W$A-!MN(a!Gp}AEo8@Yo
z%A_FiC({nTvxe?W-k|sAhfwRgF0RMz3={0I1Lap7$h+TW5(vHpqFVMy4T`I^oZLDL
zOgGtIMOS#lW0cdHV>~~5SE@h(HOO4g19j}dxWt<u?@`ma;Ws6~ow3#Bui<M)(3^g&
zM_3m55CN+YFQ9#uV$&GP&FpN|Jn|*wF_)+o^y~ZF{$tHaL)ue9MO$>8zU8U*n#t0X
zVI572Z=e!W4b7|Ot~A!^&-uez$fnj|Q+YkMS48acgF*dlwHBjJE4$gC+w|vpX>_sx
z36ArZ8Mmi9{aQ_|xMb@eSXj3!UQrWycIhzF!pwq=^`}0C4Fq>a#nqt+UQY}wkI2VW
zH!HLe1o=a}MuSVqNtZ;<dK3D!L>@c)Q!>7fLNipg7iNa>v9r1_RZ2B38m(2gX+fT5
zcOhX<&5ze_x*<*nWk3liURiLgs=J^p{^@62X}n-}W9xPCeW!^>1-B!A;?q)Ym`0lW
zsqA|T$LWf_MC_0p#<2>g2Fv+X<GnnZm5Q|?6}Z7RWDq^y586bf-Mz11?d;WlIXrHv
zROjjeHp-2+IhhX%`W|JcC|)urr5r~~%M3QU6OfCRl_Q?crpn7I<tJ|mH_uO-<1PWP
zUqyaE9fjdkr+ih25~yc<B0tu{uF}0u86(bFT6I8)_39h}>U?|`idf7I2IP?;Vsb&o
zH%bI%T+fh<5W#u`%mr;=gPsTDGfx8&8`hbSLb|58|3%O5(;my6MlhpqM3~g|(}uK+
z`?{C-E}U?3W$7S~r7*EV+sIx*Xk}1X7$g2n^g88pSg2oRu_+tU!2;={jFrcvZ9kmW
z@~}^w;yAETqW_f-P%7F(n#(7d0OrnATJJim#qc^=yKb{8Zfe=73{}f$Tkg8ddVw;%
zR(%0|$%$%CJ#Tz{bZbhTGu8F28WvmMat6&jam0SWweLh>UPZgC2WyM!IYB8E3WsA@
zLSGNe`jGHr^cV3FjpPfdw;yI->U9IyP*Up0dR6o9y-&1k*DTIUx?5BkIjtcl8aKIX
z^f4>)Tz<bPS1-=eE*NwAJS1oDlt{gKLGfQlz!Sj!q2dWZ=s0iWGtfQraV3@C2WtlG
z-$`^Mz1piio+ozxk(M-UYAVsjBo*#t$j->_<+GkWiP=L`*;N>xqmG9cwrBPx{zIjV
zVLuy3cBx4G+pz4WcFH+*`YduaTX*E-&V#=-QAr_^^Y0AZlH-%6$tKPoMwuJ3!=)ee
zUmF<j?5ivw2(XK`T6{eoCbDS`<%lhDR42==zi~yu(yfJBw{%Ov!R7plMHt<7#h7(7
zL7~vJMZt^`xA#@vQa-%*f$z7LbJqpC?g9%=v_KR5<V-VK>6N0TDVl`XrZg5qJF2tt
zFRd5H{xgOHMnHP?1f9lk2_KvOR%@Q02BdJzRVj`v@`AODiF0g{9^|acKFY3FO)jqp
z-Cb%`*XLnb`__^<M=$c}J-t;@Pw0&23RV8hYAJP~L4QQ9x&FkaA1?<{^=Qdzosyuw
zc9Z;~N{`xnLjQ=SE5Ux!Mnj=A>+E~5T3PYmVF3R2$?0udXyfCrd!m)nX9weou*l)l
ztAn)lsSP^Zj1Ab)RC8mx5Yu^2T&dOWoT%NMT3Xr)vaMAkc0JL~WKBG-du-ig{0H<>
zp8bPDm_WsQkiFic`SVM@UV+s~pjX^atUs}8-<YA*evJXNjQtrI8mCWSne3iI1FT+Z
z3PA4y2|8=-*%<?(L>kWrI;RJWWykjYZo^QzInzN|x!}G@v+8gkln(uZytk(3hjjt2
z-zTQW4U-6m!_Cz8*fM*BUkc?T0eAoB5<r-o=W2X)6B>HZdrJ9J6Y;w<OV(IfqaMAT
zV>S9@G$o{I<$JEa6=$MTpoC=9u;wmhWMD^Yp2`bWV0FSz_BMYG+$uLtdhR<)OKC-%
zo-`y#Lq@B|HXXM*?B*&^BiRsuNEU-QWcdLJXnajDEvnNZbol};Llo^jkpz|vTch$9
zcyhQ5Gf<tnqk|`3Oi(5j3%sd|N5jqXh823gb;$cBI#SA%ZQ$K8gZiuqtIkgU1gXER
z+DRB6Z4B9aYD`xC+QR;NdsC%gT<+F@f?s2{foanj{AT@qBg&SAxP@ue=r<96hCSsu
zqa<~x=f<<6b$<a2`b2e=!+B+E=hF+h1it6~aaH`M^!<^v9x%}8yhU{zhArD3l(Z=c
zUI%hgQlMc*KfE059bFI@uzjxgXlbl-@*1OSznIXJrvpDjADP}Nz+Z(2{vkByW%uC9
z%kHr4bEWPc&%bs4XE2@OtpQrVnrCO!_CQc;_&s0Ga;Hs$%R5quVuoqTNpIC?UGIn|
zjQ*X%$9V<6+?>`ho6iTx(QaQQtZF$kXvW{Ackj7MY)U(Pb54WZ6KGquqeO+5-&KbT
zr~3r#C9v*3sK%~Gn(_8s_p401ZB=CImrIa_9gJSz_Jy<_9$EsYKEt3)M;oc+rFuh!
zzC}?VRx6A%(K^qs+_-X~&A5Oyz)FI8I#=O){6mMreCK_wn3^aF^u!{)VI5_5CWsjs
zs5E3cZH?4Hx~jP<rnf(p88rry^R?nuOndes_Q+%XpR;!o1bFfWTO&lA=k7lZ=mwX>
z=OcPs(L>C}`?R0H4+=>1MvRN5T`4(cHXZAY1eHe2bu(?qh_t<)+IiiMy^c_FiSvYb
z<S0;myfGO+T$wj?PCSa^I9Vr{>f0aVIuwA*uQCu!^vx+k2~E>Z>WDhQ?da5VmDD&)
zAy{!aC;b05IsjDQGJ>0cRE-O9O(84kDL(Af@Ym4U#4HW)6-)hckMxwEQm%KRTZEtl
zX{4Qgt3ZUHB-m!s));)iBMOZy*0x)@$0RbQcc|Y@9ytjM^ue@{?kOFpW}VR`Y@o^J
zQSieVuBsYLBruTHwm+CI-xq$VoFR@|*OmhxRj0OCV%Rf{z}A$|d=QdKGBI}1Zjs)&
zVlhI?U~n^HbiitXO`<?#QFiS2{wn^Tg^8Zz{iQ*+8h>ZsHHWZRap%xgOx;B|bfW(a
z&NHjV2a_@q9>&JBIrA#QW25WV_16Q5Yz1}ABm8WL>$jO-n+Y8zOtR1pBtW!K3A{?#
z{S3zp7zepLA~cs>VZ)pzjO|JfB$~7Or}KGb)4r>De&TSf2!*^tV*38dLp{G*847Lu
zm$V@DJsIU`l9G%V4xw$fs%G^vRi*fT>B+|m?f>{hWMPsth-qR#HC5FVvs2)By|BY!
zcCK(K3)<S*06OkF9~`vr+s$a1lnT;#XcDlSp(stnKCMi$i`+s-)HcyF<(J~X_yS$v
zhf|OYwJgx^%({7LbRC^00I9L?{HFf;kdzyp4cbX0=k#MaLKV3BaX_!kS!69|2GLIx
zZcL3GmO;60HY3{|Av+&BS1Aal5O2tg+Qt{1j*G-!w~3BM%8=5rbnZV46CWUiIz?kS
zdTE7e%#CsRui18N&PcvTd}iAKWITxrUJ&W2A<0_piB+fFVV?w7zK8Utw5hJ{Rf*3t
zcw962guIBQA_u)IMv}w}!df2P$ozs*e7t^{gux`l{|3-gv<<oKUpIr?b1XS$3wdeE
z>7Fl^EXG*9U%a{B0Ba9ps2VA1sc3!P(J@#2f&m#3uz8~$N@KO!ag~LD^*|z;yiCWk
zl(w(AUE*HC2J`e^8c^F8=qYt0;^96Vge>gmqOU_UMsqEYU<EC1Nwdjvbnwzf;rG>a
z`#*E~<5_J4Wioapo(FV88I&FqCa_%UX0G|MWIL=K-Ai?XZ95(o1|zx{#r*gSbg?ae
z8a^8gKmGk)nDC=TVaSko>mZF-u|6Gz%=@a1hMCYU|EZx2fwCQzI2$!m{r^lqxja8C
zlfOZPw&KYu1Gh#lla_4VJr{!Emp|#BHFTd5XUyuyxu%ckKsXDxrcQcxreX)26Pti;
zA&=we55YyHBw_^gJaK6D)cWe1fR^O3roz4QpduPs*hycb-Wwt+JV;AF{TyUUUTz{&
z3y@wlz{8d4-s~F{9TfQ#T=s3w6M<+^MXkas#MB}U0TX-6mb&tzHfHeO6cku1?@+@W
z94#766VEu8CBsM0e2%As8rA)$oXqGNem7Pd8q(bwiibuDm?LzZFMqd?qNXQ=2PkZB
zR5{u206%N0C4NOJh%gHrSQ8sNN=h$Sik2)2IKw1Zp-=IjpUdE150-eEey`Th>WNOM
z`+8R6V0UiPW7b}qb?Vm5TKy@56TaQazx}LN`p*g{F@aAMJK+`90-5@SaI}!Mq-{^v
zA3lZ!s_wIQyVnjwQ~wFk(cUirHl}sdtVjs(;hrQ?fB->n>(RyrQ*I2H0%g=Z^R4O8
zR?k_Mg_)h|A77RmcjnVV-jP<_O1$lGrZGYI7RxX(2K9#x^UsvYQ$e#=50`v46m${O
zV$m7EEcFuh-F^_$3e%jD5OLmG^+}CSsbHJ9ms3wcLkEbfb<6n7mbBg3e9t70$zyi6
z#L(v;Ns#rahqmtn75OurB{IQ3?2{cd7<aB#zy3RMeV5wD^)ZLS0R9R~i6pWw*U#bL
z-wWMa0Eox@=o4Co>@TItu$mquQZ<xMuFTD-Ri)s6Yn<(qNBxZ-obbTz6)N$8xqf>x
zy+eQeF%yPXOsH>x<no%(vvynf2b>sfX{_SYDz65f+uY#*eJVJm*`eDny~-(Qf2Kx6
zICPnz_H+^8a9CAO;QSwA5<t<L<XWg1EuoXs9+y$HJv<W~c%m2G*ZKd6!cYum@pm&H
zm0fqTB(iKNl}i{PW2>+Syr6qe{^P<|t$4c&FV1ClNf~CWZ9vFql{e1JQh4fYNF##G
z?&(rLR$UNYVdMIdZ(c82gPf@{kEhef4AbE!206|R1?QN_m?^9w#wJ178;DXOd`X<d
z$%`uXJ>SM`lNKnp^<w79WQ3=G!~HII!~T^KtWKK~O8jo9SqC?u;z19s`b@t3_foBd
zqv%!&n}+8)%WnlFX@k|p#&oP-{O4!nsNjfR@<<w3`&NqDDCWhJn*bc6zEgChSU!mM
ze;YP!5Ah0{ww1(yrBI9fm<m-MIZB$m)=iXz*d0!1c*&u49=_GBt;{?!Jk7Pz+&q9*
zBihuZnc*|~Lm$nT1(=yitsVIj&)9rhqk;-S0;nN!Q|eXDX0g4~YoYHAre#${vdc3_
z-HR?-2CwgvtoKt>pz=&g2e<AszBF@{YImzmmI?!rt0kJH(Cg+vle&rNCtf0XZZy33
z(S#U$n;_bHSl#SS6Y*70uOG+dt?erzll9Xime;4Y{b0?~*c5`=!N3j!FD8<wuD13!
z2~rHdZ2D|dxDj6`5h_=2pmb*Ck5HBVyywrD1c$i}|2pk6lZQ`56dko}JowFgW+wcW
zRS^8fB9u7zhpzoVRMbYmueWw-3GUpw9s%+am)AJ9hm$V079P_B4GQUP;itUemj;`^
zzj7339M$k@9mS`zhNVn@b(Xpu&uJ|KyBLF>+Kx(H8*8k9^TsD1M`AGv`Yl+^JU;P2
z6rWyKJ~Nt-Wk?Q|SS)3jL(WRkbse3WTJ16DtQ@URi=$oUjr+Lv@t9eJj?dmyCyNGV
z>Fus$pDf+ZDwo@LyTYa?bw`SeL)%cw6JLv^Gzy$jivTl#*^o%R{1%vjd+16xWGR5{
zJy-U{51gDVH2@B1#xzlFyp7i)WjGcxv5!|x(mLoY{mArvR$*`o_5=CSnw!C=N-0rK
z=P!u_?)#uE%}2Bz0T+gs8DP2aeVfx0fDFZgG=oHE;)s`c!1`&>O`)t5VJtiK_Uv3t
z^s51T0wfhLb~8F1&hKo!>h?@+_I6V~r^?&ch-TapUaAyzXfUdgY4v2X%EeZ9=XY>l
zW_Fv|cby4c)A=siY*t_0<2eQoVt4gmHe}1W-{sM6B2x9pr&=<3R@+$l?grT{_?>qO
z0d2!U&5SlK?ufjHu5L@>4_(`~Zlvk=Oo@X5{2Gfx+=j2X)8F~fAEU6=mfYd?ha2;H
z$k6aX6u0gw$bb>w75;I#gpXpLJPRR>*mcD%WVTlZwi#BqC{&$so?a3m(I)*#T(k2&
z`%CLVuodoi(W!OLqZ0$K;(b_T?DT_R8M&a(`T=b7jKtVTA|?94itP}NDW=+l>yL!D
z!G(Qh>3nnHOwy(RubBLDQ-1>^6-XuK!g)a&Ex@Td#lxK2DHZCALCcmkjP(eK#|Gh|
zD36rIy`bP)?L1brVDJgUUJ+L()%b6Pbc59O7(ypLL|j!|5Y{mANWA%JzmX+9-i#ad
zo$cM*${8KlmokN-t$Q9f*ya@uEbR*r>okH%N};)RCsUeOn#5`Ku{AO<6@yxb(ppa{
zxlSmTZUrqd>!;4e<y4kh7kse&IaA-AVj5-}I*5tR6w9C5sA$pO?RU!{R>gV`qM_06
zdM2Cj&&7=vD7z-#*=QH3297;tX^@<e@F`n1ShH)_;>92jXMKKh{U|kMP%t81v{gPM
zO@?(<;}<Y+6s;LW&Q+cbzp2V-uY2{8nV;h4uFs@Z9S16KCj$63=mX`7vJ1xK^#zNZ
zchNE@cKKBIG2BJdd2Z<pw5DjT{pwxQ{?ZTIz}|iiHFiB*IYi7JLo+L($CokB)fKrT
zK^ob&jH1W<CbxTC8@o?i8PRQJnV{m$MoWh>XFf{Uz5V6ylKRx-n+W95wp*m5D#T%p
zu;9swY!o0Z8Q<2Z%te0AqP758V>QXZaM*RZwu(dqEItN{oFHeWtq_M4nvYVre(BCv
z-h|Uqkw0jRQ43gd5AEp~*bmWu&By%2V#A6C_^+<z(D;<(us(X_?%TuOu9SoM*@%2Y
zQaK}W6iJH>fhoRnhZ>qt1|Vfo#8^RMwqev%`wK32>`<Z7>XC&E8l}kN0V9hns&SRs
z-6{XFLEPZ@2Q-!h;oV9Y6+y2w7L{eS26f=Dh@jX|V%k*qoJD`TUV&*E!Y9k4s9s84
zYL3PFHjOQrVJ;0UJ=_}0$gzs3Qu|i2&}@GHd%MEI^lizuAa9L99G{=Ptg?81>XR~E
zNU$dofd#K}Oz;X=Pd)Uq=T;D1bvEFY!7+1IbVdgJx9|ps885>y4FAy71RgNpsa1xT
zICr}WwR4zpkFg00E&QohULl_kPfw^kz_?f-dsKHP`uxeWg9J!{^2S_1Mm}tuM-ZZq
zEevr69)Z}|x&nuPv_L;b+u*}A^@^k;(--*IBWAzsL5(ljl*eo86L0bIHtgelDeZg0
z7y9w!5Pd?C=Ge_LZfl;p43_+Q(&`<q=Y0ngtniACMeQYp+Ty5lzr0_AX{<NR|C(rv
z7!TbE*Qmtl=b`^YGM3&k?YlK1=KI59HsQy>3RB5PXAz=Mt6$jd<AULNv8>NqI&pf3
z>8WD{S+NFcX&ABbabNcKk0>)|_NKjvWeMPM-R5!8jF?*DQ)qB&KI`{}vJKtp<YVom
z3X2+$f2v=!%ZWvMno09MV&1U5#WK1eqCAp*ZzMEgYsU7*#!ac@V$5o9d)c;TT};?P
zOicgu5-<L9)m^@SqzmcwkMgp=6?Y{Pd9Z_{e`vWBQ-<*^WC6y&a%92{#SvWcPQK{6
zS0qB#xwlWy8Gg;k9m*Y(&3B|!vt}#1Quvsn+zgr`O5hR}Z*@7Z<?~=v4J^7KC&r71
zr>w)bdcBzxt63@KUY(+}ezyPsqj~4u{$*r5Tgnf3Y;*zXaC#_Jqmth#M{gN>vMfYH
z6p^nKm*23<87#WNI(D^rvIoX*24jYg)Aj8ofruVDesS)jV8R)GUB2Y~KyJoAp1o9G
zaDaMSj2pSK;E`4-bYDwIrHyzcHFp-7c(O8W_)C^sPNWV4=B))gvh6anmMzB5>Dj7O
zt93P8@0<5z@9__-3WVOP@Z=qj7%yzrd=)$2?l?laVsLSs+eQ3M&utmVnjVX+*dsB9
zOPJM*9yR621=K}eLdhA)mTZ4d(@~WzWa$p2n5l?t@8vU~*5}_kl(#|{u<cQqAxWog
zeAy!8>K10PZ~KlcKmDOxPYU-L14l~0()kadn^`d(p~OpI2Uq}ZTtWgm=tqjA1e3W^
z2U;1`h(AY>{hgNc>kZ%^s!6@)3L!u*{px&#Kb_U!SLD40!k2gG-+jGrDdR0!e1)^e
z{Q2BS7=piMtLweiqqbvH$_*wAD=fNq3Meqwk2I&@1@k;PY)zkt&!BX9-zCqRrb~Hp
zeI34|dJyZ2Z8C$nr}FShmd3y8`Nf|2a645+^}BVAx7DrO9GSnz+4h6V2JQO<EH3Y_
zIWN*zja)xk__)J&|B{O7?PrzGynngv+1Bd@j~U&&aWU~-y!s7fm+)odQGc3=U8XLy
z%)V-2y2?^E?axN(2*X3+%~0Qr5Coq?3^&Ji3^!mavnL<I-eOayY#h^_(u_Re)vOTE
zB@!BOT{NDSL|&U*7fax^6O6C7Sdx!ha=dHEa=ZF}WPNo|+)dLhkR@3H!6CQ=C-@TF
zNgz1E-C@z-?(PtRF7ED@;O_2j!7aGkU3i}Fd*6F+RVx2LQNNk#={bG6`?Pw>bAk~T
zJ%|i;fh$P~&e~6MOiT<~6eFiq8)Ly53!-r~kuTMkjsrnuB^C{h0`Wp%UUT~39032$
z*Mwdv3Tyh=70j&H3$e&p2d}-mm?esv5a!n6f>DMIu315n`=pS!?x2KcP%zlq&=JM7
z#1(Jwq>(#V-b0EeBtxcr+d1)mX(!e|Q!I2#8jr&LSkI=aWwz3^pmOAQ9JjR;VHsI>
zqPlK!og~CJr=zq(i<!Ucch%pLd<Q_8R2r3hMztzIYyE+ul0h14_L~h4Q(`5oWoGgD
zx+NP5KCA8ZOZl#Q@G%2*;5B{*trd^5KHPs&KpxdXv<a1>>3Ghuv_|6EV&@{Qyx($1
zeqCm@p@SKE<se(N@0>0r<oNE)kbc5Hhdk-u0bycU(Dvk9B40zPG&^*As9wbWW#5g{
z2<I!yMed4EvxuP7A6LnzTscxW^m+PXGodF8e~@ub|D2B!xve?(yo)*@Q|q5{s!j{y
z;zNw<wVg-{8W!TvKz=q(@*i&jw9SNhLS?go3gpl%7p<1!C5<jnhfQ(a&jbno=R6>6
zP=Y*5G}rJ#k4C?`VXZ%*{P!okK>c@9{yvqXwaH_sEbq-Z&u*h4BNCJn>(2QvsT1mt
z>$U3EOvsGNhCXSO(yW=hgB&$p@=H3qt2>#AuM2`yN*S?inQ};h`9dAXz4DLfxYjct
z=Cm~NnF1OKeKB7Vp5eyX7%5)Yac>V)aVTkT9U~kCrRZhK4GWCv;bO;Nm6F53PvA2A
z;z5Gu0^i-(=xU{6nklLL9i(@ZP?AgSBbUX13+QnSF53MzUA0mhLbG7_n%HW;t~5jB
zWN^lCkhMDEf_QjN8+MwLKvj1l0@7W|7(ntS7Std7f)?6=e(Hmg1Rwc34ANWvjJc4f
z7HEG^hu=gx)mX)bz@}Wowe$+BwAA?a&|m33)q!(|q8mCsbBi#y86a(Fzq;$iE%gE{
z5<yr^ij!f?mPHTXenrN`m7{yUD?5^D{c#E_iLmP({2}z)C354dCiCs0BP*8T_!DWn
z!?2rEclVj0+r2b_>!xo{HB}<$)G|<9e#+w%^ZPlgyJ*%Cmjm-0HN(AUR;|~$jeH;Z
zH?lO#YR~?+l(4-64=W%qiGP1JIRH7Y<en=>Z9vc{;9x>(HhS3P$hMsOE(86Z0GTtp
zM1!?ddG3+KfvY2c!fJtHe0D$X2~f^BswvY;HCFxFwXe9RRoFAHF+tHqyp_V!feW0s
z<dRb@*f_N9aWgDkMB54822mS-FQSc11ue3a^)hY`s!?kz#>a1qrI!Mbsy3XIm27$b
zo(v)@7pwc6AkDDua|>kUj9iS%f_5C+*UoNhWb3OTLEJ_DF#w5(8I9+r8|c4Sj9I6z
zT6-GgOKws*Sd`(7*pRbsM2c7aN#EXtZr<Nv_mz&`D9YwGq8RqaAt?7|9yc9)7*LN^
z)?lg2SGiqVYF7)vI+7wbsQnSy6B(;)G!#3e)(!5}bjsw18%PlqW>H$#+zuy*Z**P%
z%DLM{wHYYu()9Y;w69?1tt##_?d$hU9bh0Mkbi)7v=%<vWI?IyAXr!=8%$_{ssOYX
zeWD557mwNpAOt1GyY&oo4d~B-zl3=$dOjgVE*uxPaSsKi*~QJc;fVqv3@3L7#eJAs
zpEg+67hbIxzV2$oZK7G?;6qKw=atI!$aYb;M?gkmYQ$mPIN3ehOdGp&>=wFt{odw^
zP+5HBm<6LOX4r+twzMWj<+{M;#Qc{=VB}3lxj9YCH_<wcK$P0D=n?BlIA3ypu4FYn
zhXE@S|C7HWI6OD?_qYTQxxakvI$hth*$RF{B9P0zf-9FcH?OxVsu%~i5UPD>PrF*P
zR`+ni`=c}`o6iz8+B@9;j|j*r6YP?5$FjOGy#W-lBV&dTa3!x>9kj*g0N+M`(+$Dh
z-x$1cLSZt*RU+rRvHBarOzjpxwj!j?oW)tq*Z6Aa0>U!R;JqHYJVzI~(1W+!AmFgN
zpetw_sX>@ZWT6qSm&745oW#*T&y7;XfR#cU2}PmX+?(DuU$H<^qqT9j5e7|EikNLN
zm>*C*EVE^Q&Bl^bv<m(vZ?ZC(HHWrD?UpsL`n54c3lu-oYXVSVFWPA<nh|dpkK6p^
zHasgt|21pg_p!=jiR?E^1MKe@)a)!q`1Mhu^(7Iss*Z1$87Vx=<nrEdB~@!8I%5-0
zDChdq53NlK(}gk%#v>u}2bM(r4eS~^(CrY%B7XHcsAu!1sFd*EUuR1^l4?4H*V=Q?
zLbBk<jPlYtb4Tw%1R2rh6)6Jx_knqHC!8NUQRs*1i6RGUumoHf-n;gtTd>REkcuw6
zV&w3w)kMAk>sltY=zP}yk&>(HA<HMgZU02p(-v{u7#1xs1rQo>kP!6!9k6yXYP%wv
z!~F*@pa;d1(U`vA<e)6^{m3&PCE$a~q|+42i*`7@$gCvY$=&r(S1+U4+g5086-+a-
z(BqKrI=#cm8(=7j(I^RsqifSEK|?+gFI!z~4ceeZxmZFtVsK7Ha-I%<I4kFww<9P^
zWUW+)iV3}x!uzN{HofVDlBEB8gTP#w&Gl5p+2KH?PhpM25v|zaG1xM8y)ocECrSo8
zpV;xIQ&Dj;)l1$+1OROd4ik`*TttX*-wmAV6x?9K3HvFa3Dg-<Y=#bEAQ_nU^4j(S
zps-;$PV5crz@#9*QGaEhsQG5S7{DulY5ftUouo5F`D(D-!&r4YdLxZD>)Q<9@ST06
zukj`S&+J{AF-dBHt<@~M5Qo=k<zy#l4LJM<C5CO{a5lc*eHI$S2RnB`8M1xdD7ORY
zOnty39=?Fk3hCR)QIwm73WJ2wr7LajNVRz#t90RC1ggk$9~I`j>#>E$s#=GAKMf!|
zuxHQ2R8+#fQ}sR5wEjMVa$_^y;hdQk$`W|__*No*cj#D1RZqp>md6>{_Z{p3%^Ayl
z1MaDVpce6UV+y`ukx>`dk-oJD_WT(y`7XC2h;D`r!v@7KczmunkYu0m+L3fy0?GU!
znM?UQ!bk@Pf`UZrCYR1NVj^xcLDNXO9gTaY87FsXM*TRiiFn6<a+ru#2egi!Du(|g
zBfkph)s($Xt<A%FV-ONPso(GK^!{e3?V};~vVns!SHuDqer4CNe&8FQ${c@*`TYt8
z)qtxA>|fzA$d3G~9(B_K!ES4px{*!PnhcnXMzxJ$j^+O4*EgY>NEkBp8T5R2IF=mr
zm#+<Uu5C=5*wx$=WV=a5%wycq)<T^|=3q9*^Kqe}0FoxHsGhxLHVfHh$s?5dmiH`H
zm0tpK(14F41fE-I5#ewMF6lJuS-%cIR7SJIXc2xlrb+ZoKmPe17RAofeX_q>e%h5`
zP%U%&^Jue@Ep$fXF_d4`_W=24|4(eKnUC+^>=oHv#2(H6T1k~N6n0`M1*9wBDr>@D
zuRU^aeACX6FN)<q|M9mAK4WvY-9{Mg3`}iFWI82W?;gdNwtM<PH;=;3&6lCXdy~ID
z?w&+@$A)Ch`M{|{TYC6dv8g(dcQ!qaC08BK^BZ+q0Ao0tyLZ0*w8Xk-Uxi52K^jDP
zdW-$Q<p8y^w9VxtrJUJ{Y(-;ECl5sKosWQ%I^uiNaM#dOUu-xv;xWZlSS7qqa^C<m
zFQvF7pikCzuL<NyQ+NLN$Vj#T=?6Grd4I3g`eS6yHvg_Kopv^bN??JMa&oJ0u9GU8
zO#A22<a$}FFpke3EpwF_IxCvC!}1NS1C)unvoxuGecpu2GNkhmG4J6Phv<nQ{(0Dv
zR~KN|%Iv-GQ0jN!+H)o{^z>ezGc*Z%PIC*qg0_$6dbd{1<6%%=-O(u|#(_R{_d?To
zQLP7aRa2(9IIhFjXhf-o3d|qnzo!2M47P7)8w*>W5GtFlW%}9~O0Q=KvP)HAvEvnn
z5{x3^r$=YXAX3Px=+gL)`vgCvfqtRi-A-29dnRjJPv_GM$Fi4)s_GMS`<oW{oXaE+
z&zbN=YWOs4V%_~Nw0eEnSg?0rOtmpkIJ~fZ5bk!UAMlLVjKofC+a6Xf9oWC8nedKR
zc!WO~EAxh8{c1rzDDxi8(jE!){G<|sTVz8ASNF6a1X`8p2YX6gGc!KF*pmSQzoA*0
z{{3<7@A8j+Vx+<y1A&yTZuqGIzb%71Yz!0ML|TL1$1xnY{Y+mBpy*CbVHH7AIcN@S
zGS%Ur%X*bN*fnkt`!_z%rk#+^eWd57Nu*!i7mObA1h(Gz;54*vAAK^T26+0uq?cZa
z*j&VwNZG+jPXmLOjcOTrpOwT4lEp?w;9Lpy;h@@flXns`4~o!6&=U$-bPc*MAIq~q
zVXxb-APAA_WY)JJY2vet(U5)J>nwgu_R!9sxJV2kmg_Vr0K8NfqjKa<fy>=l$t#(6
zD|oeDfJ`#fAAh<iT-o4<NVdHFjzAbhjINhOY$9n7q?SF0XBo1F1%qCyonTM9c<;KG
zut5Y+em+CM6TCg*#zI>(=U$&DX#iip@iB(+QKLrByTQ!6c%X;H<g3}Pb)V(QS4#})
zpZKVyqMA`})4p08_Vg~idSj@>Vn{_R5*Dn#(MY4eDMnXt=~7F2mHBu58!=UpvOyOn
zl)ijT(i-^fl>j)l<^wc(C@HT%MNp8@IbLnUncE5ooQl?qP(fvS-A8F#z(W?OMMxRW
za2~7LG24D(Rv`D@gi{~PbFRkhp~IV1d6;eRmecklq*STnm%U7en;)<7vgvhPIXpQk
zuXpXljKzGk>nX^)q$t37;p##)(zby^WR?yb=-{GK>GA1vYCy3bHEr6rIkYcM|Hx6m
zz}Z&}cN5WJ93Narr%-`1y%8j04yAa<@ti$@BP-SM)tw+uu+u!Rts#B{I3TY84*udx
zNwT08hU=@n8Q~o1c!~W~`m-NvftlzKJ$1xLC~H>kcaScQc>1{?BY&VN>eOns&E=(0
z_9$G(pBxn)3KmVT&v&UmGa*8+ogp;Uk9)wrZkML0<hM&M>k8NQN2Bj)l=1OUrSNr!
zh3Pwt@Ly-9`C5T{aMu=jl?s%E2-&bu;g#Pv&UhNz3I}>(x*nyTSKE&|ZG2(5yHzcd
zNz-R5oupy3gpJ;AiKQ8bLq1KJl?q0*m3nC}$4-hMzzjlHiwyPbjn@`gLYALiyJ&1*
zj#L%Q#;q*n;#A2sXYxR6)Yfb+KTFev%#`Z(U}aKm^a9DTh1={>N)ifw1S347@X=NP
z2kK1gd#@=`DQJE@<mmfzl~_Hx)8!nv0@QF_u|3#a-!Fc7x|KBE<+ku?%#CVW*tWp2
ztgiD|U28otcl!s`+}+&hAv^R4-nacktF&~;h&lLTD-;s9Cq&qbh(8+LN)*M5vb#4$
zMhAJfRG~Iq2KvR}S`>Gz5(X{)FaiEQx%xtVI``b7_Al$Tf4*R`D`^N1WraXw^G|ff
zU}^6E7#2q;@dz1GzJp<VX%^mt{8J?}ro`*adKu5WaV0ybah}IR6I4s^8jRhMcrxC5
zucRhhsNr#{T%IRd*S3Pk!e8lT*mLdRB&K1bX}JA~us_qI_v)Jg5!p`pNFiy%kCygI
z^C&~-WITYWt!1{z$Xw`W@>OWBcF>_g($|+N1SJ0V&%zONb8{}9ITxv*tp%InI@U9U
z6QklN5wO$`?3wd;2ggs*$SBTqW!6#?w!P3!4@sosyb_>(I!~asBzrHBdD6E^ECM*J
zrUGJ#Y%?69nQC;E!2{>tUp1DC=u}N4eiA<SCwgy?j=J0?ZcFmqA+qx^wz*Ny`l>{~
ze*2!g`p@jz95TH1KqU+2Ys4$DvG-+|knbakv>%;ahT~pP;~xw!y)7YXc_S?BejRF~
z;~x$Vj$v`jP)3L@8LIwnuHW1kT$xhAGe_Z2(3iELLFYJqH38>5C!NHqCPE@JXYlJ;
zz_G+;V`a4M$>gsFP4yX6n<}D*h}rYLgUQ4+&p)u#dD_PC5Xy3@cTCg}>|M9W&gU7g
zZLV!cJ+YPz2qGBtvvg>Vol-T@z3>oFy(L4GaVKD&Bzh33HwtNyrytKJNC^|-hYu(y
z&!{q89Bxbf8g}iHAx7j?)A)v78V+u6S(Q@?K{K2E*$uH4b)2u|4aF+4V3!w0-p&Nw
zBg2SLAK%=^Iq=QrtF2B(Zc*@1X+S2Wr%A<zXIRdl4s(`xD!Jojn26d4`HjE9S#(J2
zb0sIyIaJNJWFsS5K~uc2`)~5>9eVsg!|1{vm9k&<j%mLa14XZk6SPvt2YkK^r8gpk
zcy8b7J*6@}w559jus~w=b%y`celDHHO(>l<SLrg5dvjoTRc12Ay-PAv)L%7RsHTLt
zV;4HSi+iX)jaVC>-j5!V&E08cytDG%BGIlBtAxh6VbD02p|{~x+5W_tuT5tYqJikw
z2o)BVbYns1#dBDd%xZe-sI--bY&Y<>;9MFG10>uwi4|qMp+ly@R?B%GZv%hRrVnld
z*L_<<RVN+=MbSDMLht!yKKM9|#8R|+;a$^{)I{o!sSIB5qxboD<0I4aZJhgLmHqdS
zJ<tZ$`JI9Ka`w0JH~kQ>>|@JUGi~$UaL{(hQ15W}OMIv&?Zx<+SybHH`ol5v#%1V_
zl72j?7oiq1SR`cEI0j9;&SsE_VU$sGG$;1X|0Jj`Fi(uM*^D-xv<!MuK9mj?g@F*a
zm9!H$X~}d{N$cF|5@>l9+}t((aI?uT&dN;2%BztlM5cYeUQ)g{%)l(`&INRX>U$(v
z7!9$9efl9*c+hC+;aPLbgF{(yfL>|Q_J6bh{z<SJZR?er=jZKyV^Gc_V+jw-{c~zS
zf>4?blt?&?Uv0DSr?~sIGjBAqLj=<3FQzRFc8=(WbT%@WV<Ak&wf>IoT*xf4ORkY^
zw)ttj@O2mFOW=OwnPKRJvs6_v`2J5~GwJ}LG7AVfj>mC*?uizY-BO8Cbi$H|QdJdq
ze8{ek9kf00x3|%yIZ7ttp;(1&V^j5QMlap;Nr|#}#7InO0?P5H+gvlo7l9UmMX1^p
zhGS<ji3lK0cvuXrQbD!q!-w$aT<<6H<8cYbn&JNbaa<X0X4@92ccD}kmvW)+oR#J!
zF-~f2{syX<=3@mKR(R7nXb9U=I9;tUmwbi$5})G(FKUv9_qU}cEwbm6hDLa4Ymf%m
zSf8b28mC!Eh1ESAt-ebRUBdwBr_{R{@oC8yW)R9YB^Uj6*BfErBh&xAJJi^Ybj>X>
zYRP-|F8&v4T!}XRV|!PZ(O4$*c|Ms-<A^U2q0g|%75SY20qBX4K&W5}gmT0p8c$kk
zSagQpdRHJ4B6LOwut3WavjoI~g<#AAN23JfP^~ztSxStqxzE$Y$Wbxi_|jyK7?&1>
z-PL}}NxY*ooZe%jHc}%}YW{qwWzY1S*=hg(GCK+t8$q42+C4eq@vC;l_-}~+md7OL
zKU?E-8_6d)(wtDfE3(vwf@%57e`ME4)ZnBl4H@WEzUSyss_6|r*C!SOc>3n8YPk=e
z>vtUDlL<>1;kJ`)haDz2OUiE1Gl~99cUa!_V}a&n%`{`nnRtdGTq9dU2Eopoqm>#_
z?cWk5<8J}q323Bvy8iSS6~Nmc(u2QG#gYuXl!$Et>>!D5v)X*gRBt|xbb+29+aE4~
zpU(UpK20`5OCx>WlMS>7-#(yz-Et*EyRLXwK>>YZ2p90E!vT82&#%k?WVSp96CD)^
zxu}w_T1naTRYlzJzj*}a6P$uzWsC5wU)5~7*0J!AxP!^=o@IYv^>J@?_nZxl_j5!5
z?a^TKri<5Dq80rveV|ln|NIpKaBG141cbacsM0A^Pb(ct25T=(?%~IR3ha63k^mo$
zp(3gdpKWUe1kdP=eh|b8NQKJC^OSxhKf{d2sjc{t*fZ-w1C3dI5kF{FndT2i6w$@v
zIyrJkQ*Kt$J@cPVG`~^J7To>9S@{_cb?Tn}r#b~Em%s$Cl9Hb!a;ZT8Id(v?uk9Lb
zd~rWkmwEwZQHh8)uJM`l>pf7>uK%)5SEr;g{*J5sXwYjz%2h~7bW0*fe|S+mr>L*_
zhkzU!6J>ZDu|9r{e)-&Kv^0H-{_kr0yA8L-pfqpG<`i$QMy7QH=MkDRdAw$J>7J|Z
zE~m?eR{@P2?xl3r{ej^bBeu9KI5(9{@OoXB_NN^2mDV4+UgW}pQejG}vcO%*yI-K$
z3Kz|z&qpQDTHwrIkV6R>IH<r;O0)E{@{1@OQDl!{A`YBPJTLfP`n)DS%<z-%S}w~z
zE_W$uBNzAN?~;x&S@&xqR@#Xa&d*g8Nl7dsS{JyLLz_S0wS=W3%v%@`K!(Nl6BfOU
zzOXm><kVGT7rC4asu8vpAI?OHo^%sfFK#vzv)O&+z-(?J5i(lx{<2QXP#JIebpoVY
z+;L3tOn&{8-7AgnH+LJ%EG#_4FHfxioIM$nb@4sY$h76jq9=+rxK4Wf$2Od0K%*bn
z1}c<+qN~;JO7v@y-6Tw>qjqm;V~nL+zgM!coUu*4+w6OkSaf&)MHEHU(Ra6&d)Sqa
zM{C?$x{c?=bESE7_}oe?WM`)d|Ebr4;!?R_7<#bWu4%4=Hq@mY?hh<~BJqX}7jpOA
zo7lc<Jtt;Nb=aw-)t*#W!}i-d_#I$1iVV!fRL>w1O|C^RHXODRGAukGx8pwB+zt=v
z>@{V$;{HS|(SA=rt(DBCO@zqZLh#C!K0O^0QjOm}{_!)&c7p+_p7)bQ>f(E@Iyhfz
zcv--b@4EIpyxWH=CML(8bNJIE+4;iYECUQo)}u&nu~g|P)yd>KLr=KpI<Lw1-r<28
zIm2JZhh`Mi+A23|dR;6VWmz<G!?;xFwgXi;OMT=X+|wj#C4UQH52?_th{RPz9zm7C
zk>Vv8XyZJpu=};7S`N`~NUHqZkiOi1tlH`~l1g%-C?<cWVlW)*F>!2BWMWmsY_fVC
zk$X@4=Sq#{9%?(#YbdJ^S3O+H%!2)2>SYsPCI&GEV3H~gEY)UMMBzBJUaiv$=5-*i
zli;^~$Ye_yl!cZRPR}Fl3MHf!<^E2i!3rz>{$r^cE=NsF#kjeoZU#kFc2q25-{V{o
zx$tvs2UiU3S<1LK8OlK9&@;C$Sn#JSZ||q(4UQM0t*c}I;+BO0dK~k>b7{v84IOym
z&A{?K7hRI1rx2UokH4YNq+n6;5Bs(4!cY`wB?pGv_#2DC<@YhMD10>he(`nC0*<%%
zc68i166J?;)NjaQ1x*DNswKpUp!GTZEFhuh#fp6;VU!6OJJ92qsk9-OCw3VQ58?4Q
zWOQ7IG$Iu_7seXq^y5NF<@LFiQKi9rf+c>PaTUluh~BIhH!&b*&}AL}a%r(61XpUh
zY~dyW-@3+Lw!^a!$EQpBNKc+^$VI66Gq=z5#@jcXzcfh?K~3X`ae~@emfFTs$a4mC
z#ZD2JoM;w9o$p)0Z6Cb7^MI7NfcCo`g*cOc9OA$K%i=!QF7}_YK*RH&0>%c_8W;z2
z_M7O=&&LIVD{H!(x29eT^fA&O5~sBNPU!0W3zIQ}iRXLxkl!hkrh1*o$g5ed(ky&6
z&?F&A^sx@3rynNi>pqdqTVM&49CmOqUOn>d#<i(W323XjVoIMWI=HqXUpO9k><FCd
z(Sg`HUzw|h!ggxwUtX4P5#7L;I3nX%&5F}e2`|^Li|^txNX34^)tVcx1%ELkQ9*cU
z5Hq88)jP1;!UkJ*;m~$H+e@UEH@L<6dd{;)g%hp+;m$V0tm31;p5cEJ#dkO%5m#u8
zR|K`gxIe?)KDqoY(j#0Ed^)Mub>IDI>rPaMc<Azq|3gGzW!|D?@Is)N*@ipAM`3JL
zXWc$d!(zG?{p$1D1*1TW?p}zfu=fVDVg5o)Rh85YV>5qT2Y<j(DEFZo(hE&I288yL
z-+nfGzaR`PV}p8BVQ4yzSUUXxo{J~S8vU!lvrDA~qaO=Zi1mq*99|})jld!hb^4F5
z&`e!h`G<FtHI=sNZ3^P>z)YY-zJ2t(9Dc<YVtTg@X6GdpAUV&K%NGA8yQ>3Bmk&ke
zG!+?{e47~`evBr0WuAWSI)(fCt(*rQ{___a9|c$%+OJV~KweUdt0!)WzbueJ%7vfi
zoL6Ucb)a7>N0JWUBZ#f^D8QDyrcH0A0KO^?sapHB{#jvQzi)ZY+uBa#LR(E2)FeGc
zMWVY(Z`_m_U531Xe*A8HLrSi1KW+#Vm2*(r7Xv-sXjjWNF!#m4h|Y}s74N_wp)B+#
ztQ9LvMaZ2O8k+o)VvC3Wg|H&{O1p6=g(__&QN;S8e6ISNx^{%W7b+@Tn+VKGvIUE6
zsoSNn@z}exvJveaiIWic;Ua_fdkB~Et~26ku`=J4c8b)Su@#!@3THI?131j;qjzh=
zbW_8@lby{&$FcWkJF0dd=x=pydzMLJ@FK%LT+Cl)k$ip|nURx!N3rLyTL1N^Z#Z}R
zIOQ12Y$E>_bvE9YKN)iEu@}K5j4Jx4AqTg4KLV_>qnga0w<k^u@O%C`Apsx|mXrjb
zL7`K<c4$g0rwzn>1DoZl=m+W&)anL0!KH9JF8lUE^?zUA>p6RGmGe{I0g+Y8f7()P
z>bqwz@<>}m@jKWo%u22o>1WRcR+RWP`;v#ji5+A%!5CZ)f!JQfr3%jErj}b&&VAK6
zD?(?hkR#9$t4PfMNsG^)<k58^k%4ypX4&CUMozn)dGa%;oMu{PiiWNRA!v|wD)CFC
zREjW_f;-!qEo*FQcd{$xCFL|WStx>~6~<EO-mDzZto+-xwmA_ry^mN}Y{XX6aWax+
zm+PodM^(^Rw~4@O@)6)CV2$n-nZSKq=j?Q<VLY<&8rVVw*3K-dPOH1et=HvGQLtA?
zG}Edq=Gs1=MRozYI}k<*v`}1T-9<&TZy!&eRPBH*ntUQiW44ci6|O}F*15yo7gScy
zklKmtoH;knvqY^cmA6I1yFDe%qMk$y$facM^)vH9YL8w=RBnCR$ST`kCe=~DXSyn;
zX20yZHaW@sT(gs+fpoKgz$3fc8f`BDCT!L_OGU~8ROo;^kt^DxpW@IXYDEN;a}M(L
zmb0S++FW)&_Rl)gM2dkHQr{gd9QTBXTO$?t&_~K2R+JF+XXxpE+-B9Eq3ExR8Sz7;
z1sL@1%0#}JYOWxjpgn}6VoWD<cB^b<eaF^q&SzMCagE~xu`^HjQgk1Wp3gG;^se$@
z%rTM1uen3&hN@aIqL9l!he=ck#X=@5MPlK?9_%4y@>>pZC}Y-Kcs503-)cEX3SzQX
z+ZwQ>G-x5E;cRL-5*SQSysmy%NAO}<K4CCss6zNQYDP+07HGg0Tcm3I4&~CugZM9z
z2x$$|-%yW>?)#;vVS9Rxr?}ANo;=q<_g{dsEee~`IY#vuD}+Z}@-w-(5Mg4xsiCi5
znlC~|iR=f`wM?7qmBjsX!Le>PKS}?wW(Eu#=mdCs1O$ZQ90tK>%SN>W9H4Xw{<2Xm
zZ$V?KS3bd)^nsdvD!NLA9{hqbI1SX|JRh%JJyJ`T9wvod+5VnolIwWerjU1wC$>yZ
zofB-v?_v{KQl2>Cu10tE;Nu+_Ex9Kwb&|nD%^m$3YZ}Y%BMs{BS*a!Cec@#+lw`KU
zIB(tpOmcDwYsJ@*DPPheAX1EJWQD=EV7dnrX0uM?1um|D<YL%<0y8FvF-k3<4*4YD
z!_U}?sCa*-GV)QygbCP<in|@dT-m+;3-h+mzuNE4SnT(P6qXlHDW!HLfI#oe53&<N
zpp!!m*FPHbyPY`TRM!OY{r$OW{F6auT4hA$0%G1!u9=Q%6>Yb_p5E1;_!5qo0>^yN
z+|Lr@C$D0+7scO^zMYSy8finA9)($14p_m6Zpl0|HgYEQHlOE>BTx%J8@??)ZgMb-
zh-KS3+thg8%vA}%{TFV0quRU4VpZFoAT+Z2Yjl2y#47r6{8M;q6I>+={j&jE)Da-U
zYLn{Nm6<wzbvI09K_Ojr1PmHg^_kYpU9~UUuD+R|uYRw7q&?bY2m$1g8FW>0Zua6a
z(<;t0&&TYaa^}?r(UVIvGK_5SS-8L9XxDYmf9r>xX0SQhfw47l(jI44F=`Dorp1}|
z9etf-z_uoNgL4f}GSA8kaVV;{B%i>YJp;pz;u?4IWS$~Qk}7$7z1GBiqmCP@W%G*F
z09#l^|Be*rdj`X3L<NG*aNdNt#>AzD#jG&X&};g%;HupiQ>ubz_ehceY@OV&azhRf
zbYfYC*soVzI6+45(;f^HWu4vrew2@=tb&MiI3gyY314ouZVlR0?mivQ4bIae^N%RH
zOMjyQtaQ^^G4dqz{!EeAY1s-2jo$|S)d!vjvvruys~>o7bv0wY!uE|)c3vdlk@)(r
zOO8ZHkj$m-oDY&9O~0|bK3<#R`K!S7^22L{=RM|!)JuYTfjCP=mqRrq`lJI`a9R{F
zo}d=e$`J^**5PbCca=dzX7z<?4E!8i(noy@9B^jg0FCH!Lm#I|ZS(ID7!*dwyxDTp
zTXeYiP6Xl!BXAsI#mZ6sOn*KD>T-g9E}_QjGp29>+k$Vr=BMudjNVD%{pZe{q)u52
zk{1^@djt=xUS8dg(~e4%e$2T8mR~wd&JTB$)Q0Wz=(Fv@T=zBu3tYb9ENvNoFVvQi
zbG9A8a2Z#6HeX5@-OdhFk41Zia3Xsbx{rseBTnuQ)QG2&@%&eTS6PMB{Fi&?T=`{S
zjF5z#_Ty!?cjE5WkG2?xh@U1DZxE4e9RdPyJY|q}O+fla&i14!HNUFmI3i?XBXZgG
z2U$^8NT;*7caS$ue7ykB*Uiss1pS!o(7Ztm1T=OD@bf$H`C%kGlFBRWhFvN-ScY&t
z_7tFP8`sAz3ge22?&qbA3iTD6Z=$p{xFFfYs7$)H98PY-t3C-_W7<xZwd^U*N9cxW
zBFLV}9B36}`b9l7Jp>6EGVb{OtaD8k?wPEz9bOGIT$Kd$6$v{n>qZkFz%!g4M1(K}
z^3Z~IeG7b!*yh9$sx8xg0iJG4D)#Z|F#_f8ilv)sO8%8K?f1Jf=D=>`BMJ5$MAhGZ
zjdxXp)k3jSiK1?=lG`XrTQZcD_DAu~wD8b`G)b%Lu;TCVr$pqQ7b;MW>eb!rj+)yD
zXd6tSt9Ec$x|Y<2kxF`+nEAD-+G_;I8uk#Z%4n9gvwv|Q)<~*+Y2~zrm$-<%UgE=_
z9C-e)k-EVCa7}QeM<)W+4cH6lAxZm0YuhmjD2(J(_Ufw{8{IevbwauS*~DjyVOYTR
z^X)T!@bdx#bh=2z7hVBaD}u_{jYoClp>vSs8I4UA1}dI!QfnFz$HM7u;@+^dO&Esp
zU=#70kn-6_%OJQtJzUX`iB6;N-<J<hG~X9A6@c8~599M_?D4)`qDiKdWbAxMqFd)l
zLI4dhZyOT(>KKW<hI9~BTx`mOI$D>G*k=XNZti%_j;e97)#(HdnBTtUGnqFD^eQNy
z$*?{nVq3UA)Zd$p64LG8ayQOxEZsBA8>1t*lHQZCP$0?+ekfq8Hcjp8%bcSn=6m~?
zG8_3<ZhJoFp?7tUhZtRmxap_WCcP=!7Yr=XD(0=g=$|GnweKS0{StSuvYj8_Sarqg
z$(Md)L&5Jrk=BwT#c0G=3HDDUdIPn2eZO&U==#41!1zGEUn3>k?V>NiwjnKxFd=u`
zj@gr;8>5RKj_kXR@3>Ys;iauDDHfr*r~4acZZ(YWst@DCL*DwWK6$hCzhS~PT1gOY
zt`o$R`W(v`9leV#hc_R&7k(g9wq;strIc#5_u=7<&^bfG8Sgxt8x?N=apOqFARjwW
zc`H*|)orCQza%L^K=Z`?Y@-WFyn4GMi9k(j+gQVum-8RbLIUeKHXK0G{YaSJ6?C5)
zWF`_paPtTqy)j@mu%QBU5^B|cWam-Ms=gqG)0$H_PTTC4d`R$$BKhVA@^BM0+-nww
zL1|=51AQ&eBh+Y{<oh^x`-&}D+LI$PQ$66D-5Vgv@l+5Yg?c%~ESH=*m-#J{>#5yI
zo}~D~55tLsT_AM+=^Jx{p-vl%TyXigsmi|QEQ~889*}e|6Zw=d-K|({@v|mSfC3NS
zswo|Uo}tOY)`Wem-gU5pdm}7GssO)Bs~Gsn;wO5(i!aW6P&50<7D2ZfjNk}?BKVJx
zxHsDnFOEJ}cezfOW`cx0P{7nkeHYDm^}b+Fuldyw<$_{G%TjSa2O)cCoIZw?l@pM@
zyL-`NveD}Rdlvp4r8~K_WT40DdeHKlWtD+E*kbOp@r6#a6!Xfb-kz8;KLW%DbiK@z
z;3ktpj-kcU$sc4dSHlwB%d3HtPdM753bZDHl2&zHd`ItCyPzUWJ~K1DeSOc3i7TcZ
zA(@UdIdI0~K^i^Z#Fi#4)V0YF(}YBNy)S<E1AcR7J)#_VOJI@K`NkNTZ{}kTc#M#G
zK*3<O!wAlg+*d!#<Q3l?d!{s3vx#Nr_I>1=AT8O8<Dtd3X`76qowJgeb1mEoH(~yE
zq!lPj=;-2S$8}HWTU8@Qf5ye1e7mXAX5{Uo)a1wW)!kT|{F#6Kar@Z6uX8}>R_nO+
zsh}PMJVC5e#l<pz>2B3m8!@o*rhsGFipG~xQcFNo!CmYn))8v8hb!jvUC9i1ck_$(
zEfER|O3QmSR^jdl!aMEcbuyJ_>?K6P1jO;Yh3}p>!1Dg$wK5$#L0>>HJVy}zkE<31
z6!XgwvIIu@P!73cE>B9X*?*wPhFe|=uUjmmc()*ate$oQ1-SP^LP`_w4f+VMc5(+7
zy}7c9rv@eSFv-r?=^BGAKfimF%v+it;(XvZu&Yzxm>TZQ=nzTLDm%~p9b~1($_4p8
z_G>Dw>QXWxR?i`Cu6uAS-qmK<(nh;CLw{m>oKxm@js4eeUb9iB@yKc{sQ+??b3&YH
zt^m258{e$h5@|Lk-p^_MRV#!AMctiZeeiX66g0MEJEhOstHP}^BO@Up4dKBQcl~Mm
z$w+}S&k3T#0$?2=8NJa;D&r@b4lMLjq2V)Nc;M?dI+4^~@8?W92xAn!m;f2KL^+^V
zvs(I-G#?FXcye2#-h5Udx1F<57{L@75lEkaX)i`rc8MV)gUkDQof6iVaX(9c>KXq&
zI2pk^ox<yAgSg^4LJbCWc0+*S3_7P~|8yq(zyxSgYj(D&@`B^a02|)D+GQHYLw87V
z8X~_ADmhyI6w519=RqSA4yo32==`8W*NeR_Q_>p`9_}Rx7=>yNnHpWIeNvWqmmhSv
z%EEXUChBoC_vnxhkCX%c@G%w;%X<II*R3`W4hXa9`;qLKA0xzaX*?Q$L!DnZvb;$H
zh0+s%K~}j50<I=FY_3XN0ZIf*(uAQCD~Q!rN*evSetouQcf9)Y;3(#56vnqf1t~(M
zRY34tSn@6*l3s)`9A-U205hAEN4m~o|FH)7x{Zy^bfBMK`PQ9copZ8*u)x_?Uo@5I
zPy(~>rwxji;Nw0{fN09DMblrB2j>|pHAc}rKRGA}t=p0fZ?R|%I<^PJQ5sI~w&?7b
z?aRYJHQY0^lcZ$blWll<p@-Jy1HurXk@z`R$GIvPhu!$@%)_TDy%qS!mXfJ7t61n6
z%#g`)CH|zxcB$Zi2Q}g~_NfHac!Vioqlq1!h!2=%vGifUL3b58<evYu$oj^gLqI<x
zR4;H{5IfYcqt134?}H^=CQB&pTis!u`+y~QdGG<hq7SN7uov-z<?m{Sf`c>w5;5z8
zEe*>)3>6ephgeazGS<_TT?1F!;-|WsOiv50jTp(`F}x=w&Ob`_@>JMRa6d&ozsb<s
z)+Rvmz{<<#!1f@wXyz|dQxHX|29*@AA%fS><KdH3iwk_cea3+gG0LP0b(LmGeYv2c
zz*R@%7&Un%rD@S?i(C9OGJZ-{R=bUAlZkO%A3G-_BP02k3ItZHLg*lc&-Nc)pmHlQ
zLXaxRv(dDT6U^a8a`)bW!N)jzw=Gy$i5~hb{OCGw2B6^G+B3Ofux<w!q*|4!|B)9F
zRus@#3?XCrSw_XwNL=Hy0)dsyDtJW<QYs3T>lIo#63=_W-r$#7kzq;;Rjtx#zO8g1
zmF0j<_{0qqlzdm}ygQzwrmL&_PPhL+oiC7y5>W^e5KSp{8#=<7{@9Vvj};i3p?XGe
zKXTurMB@3fDIQ-*CQ-KO<dLP}L=r)QhxlY1makOp(a=!SKwkh#D;CBa4-t({?#;=F
zurD4Qx+~3MI``(cSfF+4m%x$)5aQ*fWcxaiX%{++O8qVDU}W!+9}YGe8Z%J`UwQx$
z$Y^kJJJt33Hw6WS8>n3pf4lhyD&0qU0m1hth49<S;nj_u#i~}zh?S;{7+xcgg=5r4
zBtCE)bSz$7XnccXAvYVE4`cTCa`}t>&bjkIK{AYG4GRptT=kP_(@%{x_4M?Ce2H9v
z40w}6@fYHjA&7`ssm89amfe<Yn4__qp|40`h?sjmp?SY{-ta|1*JsxriV3-wby=e7
zq=A-qL~mc7{+HYGvNs`KEyiGBZpybpI1ANKh-_9B@iRL5?5$*C$+l$7m7gV~Z}i7p
z_B0!GS{D*QsE!D1)FcJm<7vy<cxHCx3Oduc$nio~Z|AE-udycDc8FK=M&<86<X&%<
z(a$(RmHhOG>|@MTkS#w5b((O``#ZV8UVs`P`a`6e&CZNDV$&P^V?({cYBWPE#C6te
zs#pY+Ji~ltY@w7ZR5HDG6HY`w1$DQC4FN310qQ^C_I1>~NqKt;sDu8O$@&kiRfZO%
zz?+kuJ?jTr?}>cEYWeowh7Z6_prvR}{_^_(bl9<EgzO^RSu?aXNgZu%RZRr+-_Ut<
zar)QjcYI&PvWRk6dNHv?%kXuBSmaSc{o`8Ejztqs!^v*tIf2F^WJe}a9?c!SIEgU2
z!&Lg<F|#`Vq}5(_LZ5UR4$qjg7c$6d5F_9s1A8>T4u>VQGs2pP#paI`-04;vyNS5=
z|Mk_9t;E$;Z;)_MU$;pBN8Yz56#FB--#TUvEG{nYJU%|Yv%(4C`bADjxr`hQzF&ZP
ziQ^zI{(DQOY5|i2Wq`OjXt<2J&%oW=E2{7P3FkVZvd9)H5FkFeJqUgGy-9^;e;NWX
zgktAJZcu;Ik%OAR3c07RHDq)Ra{f|H<a_t8pWD5q=z&h$6Xc~aofoQXO39($UQgwv
zA*YMURWD}M!EC!I3-#_PRY*%z6{6X*!?UO@nJ`#H>3&;AEG<=$x&y+VXK`mpv1MI6
zURD^ulzw+yWaw$sjWuEZrfN5tgVN!%)agG^;XlW9{0Z)u3L|F=04tfgVu(Q{eXqaK
z8%3OcPgD9zelhxoTOhToAkyb&>;Nm+`W2s)1e-DxNDW7qFop=e9#Coi@oJA{Ku<gb
z#%*X(GegMVJX<?%l3)>c=BL=xsyxZa)oc&|FE&AdP|*_$Z!G*eeYO-JK<jz?tCKBc
zrb|RtJvL8fUv#2l$CJ!YxMhFfEw{{p#FKIz^C64>_?Z)8rl=b$z096yhYr!uBW1$g
zD>WVz*09{eTqc)pgWOc>jA~?OeJ+Q=qM^=hrlU*UuQ*K>wQ^@nA1UHO(${m^O}<M4
z+!Yw%9_`4lDPTQpC6<>@2PCMXvH#0xN6>W+&2usii=xAVgDXsiHikzo$5S0jO-xM8
zX-LD*NE%I$S3oEx1JEb@w{R=*Bm1IieD#S8oHJX^y)6REJ_(7AU0VG;wkxXDPl~s&
zzYt=wM{pgfEF!Ymj2onDz*1O1{?~yj1%dsD%<@=44k$1DcV<dqJ<lDDn;c(~NzSiB
zTxlZm%dCb~ez2tUYkNFU=xtc)mk}}x-wL_4qwc@k0S&&WTYMWR&BhrH`afx4FH)Cc
zMTVFhN-k@HR*<R!9cp&4fT7Zv!<s=DXm_oP3(=^24-vIXwo+Ad`vcE$(EnxdLap%#
zl8LOltXp7LG1L~Zm6;Ly{ur8`AN*3A!mdViNka15|2oYSud&=;EoEX`#7=;)zw&6q
zfMxqQtXNcU=<G{Rf1D8ZtEV!*+||?atn?{l`}+6AoU9U`QtN^%$3Z*ulNjfj{Z(rD
z=m0jFJ&pKW0Tns1crdrro&@`!)xl@a`mHXjF=F&P0`pik0R}Ih2Q1<EWL1Pzt|O^N
zMbgJAgy{dGqBYs19G<D@TNld9+<IE#Z4{xPtoyCm%C*34VW7?5Z}#LDGS3&>Q;Qu^
z^IvOrgFO4zNFv_b=E=#)Ug!|jFqS{gx97WksH4gK^bNT-oBeDcu!uc{43I<kIo3*6
z4V^skgcC_xer%yt6-<Xxm@-}2s_B}Bsxo5ePzPQt9r57#1mt|!wFYushCd`InSgnm
zVF~*_sXA~jd>%W-jqY)%C`4(K<O1N2?P(+loox!b^$t#KaoW1seku&~V2qGy&Kz6M
z*UafwKr!QBc3`lWnStDV%F(^|-;&)c?zghm14)mo+NUPX^C8YTMlfhg|GzgF70yFY
z2f7Y(hk2&-0|KbORUjQqqJWMwE|1}gS9rmkBY-frrUz$UmaA5BooZdWM3A?0iJ>Be
z08F!y%hqkOsw`Yb1CN0mv1tfnncy6RHNU+3`uM9i4CQh@5v%E+3|0LZm9Z(4q!MOG
z?nhfEJanOVV@C{uSgi0$A3wDg=noNK9D4VGO0(MMe>?5vOLMsApiFstZMRqZcf(Oz
zXmVa^J|D`F7Cr>>Ux#uZD?nlbPUE!QA_N*EZ)k;voSd9Em?1p6xEm}}h?4wxWEYRf
z3_GBQJC=un$7>!~GSW;S*zK{h5AYBSLu5DijdugBnGovZX>(<7$v-B!$Rx=0ASThf
zRT2cQK!yV@NUFWP42qf$piG}{V-Y2#FvlOg)<7(cWz;G<9u)*}-iVr5CidRg-bd9y
zX^bJ#b9l*8&3X4=rJ3+dkqo0-n2SfvYaMUpS62t)t?IGGU!fby3)@JzpqNIr{l6DV
zzzR)gC=rcJgj{#3%6Zd?59j_*DC016_+nvJ{Gkuxk76kI2HQeG(vI8GWGDE^`oM7u
z@$%h(8^sHKE&*7z+>&Qq-I-Y1{ELZ?Eu_7rLT%mNQ#}Xi6WTcEu9i{5o$?PD2<J3%
z5!S}m&B<kwy5v5!pzJu`giq0LbVnu9RFoxDjKdJ(K3TgA;hM>#HiOxHH`0MkM;YS3
z0cM=GRqrr%fsf`kAIhcDnLlJ#6qS1-hrwUablTeNk5%7F`}jlkKY$CmuxmWn>EX*7
zPG=V4syUt+y$UF+C5dq^Ps_#S0vHGL?#j=<rt%iJTLBIi;cyhO&7UJDPxXnz`FoAI
zL!pO2U4138Hp%K4ANcj6)L7J}F#-VdiI6=Bn-kIEo`(yDw7Wppy`Z92N>95UO2<)7
ze7E2u7S_yR0kHd<Ztg6cIM+iQ{=Ao6Qbs>TH0E;1mfh)_)egDd|L_oOtrvKegMaQ`
zLGsr53Jfvvrxor{rsHHa9i$T{;0>)Ob=LrZLL&aj_52Trgsus|;{7d4M37B?ta*`*
zgqgX*fuu}zni5Lgtzs}i7jUmQ@b)46WdXRJ4IwPrX%sMNAa`5$Ve-2_<Z1gNXo~$~
zPpia6yR!x^ztTA)c4Ru%qH|Mfk*j620=tJ?d0pi=wyS=0n&)0+5T4c7rg9mrApy9c
zjxUY!BFmEs>fg_A-1zK~HT@tJGaV)8f~8(vN{2as!o_B6cX&;L1}mdAp?>dz%I&qh
zEwU1(z@fq~U1R2hM;&5))hpAmPu{q!#zPybNllW-_lYoOgF(|_+S)>p#{ariZV>_`
zw^KOm-xt}LEblQo0$xk0eEr(Xk@kua<J1|>`?_WAY}C*P8DW+^m|*$`9I5On;4_E(
z>5PC8>MB6Ie|s><gB7Fw2R<$%;KNk9s?ALA<2g$jczbkWoj<q68q4H^^n!3YI`0Q>
z7r_N`p8-WLCWE$wIaZg}3(SDVj9k=0AG=suuvAhupm3QafGzV#3e|<RM+K2&2a>gC
zEMFB&n;{d}jyvc`vL_%IfNi!=pg(A6Bj@?nH+RHg(2?+Tpi-7{;PXg;>Y3?7SkjjP
z(esS9HPUJvpPYxC3%I>sUIM%P2`hF}RC#f9c(~{yNfiO{Oi#s&l@i8hD8td;JiTk1
zp?Q%1I{Fuo4HpjM(SYo&6W|F(iy(f^SH&P;P;wYva*e9=x<JwEu_s((9XZs~aCtjP
z%C>xSL3hS-bN|D_5B*Ca@vT2R6hNR9xm+m4{?Pj$rejpCZ!(_S^${8bgaY%vWJT}B
z`OKH+N<RQitw?>5b@>Y5r#4xpQ@xKVjT{p#JY);&&_&Q7^baOz)zC73?!f8ox0(6(
zpA^{WeyRy%y}pbunEB0l-Vv+h?e0gcC-KVA9~$(^mSz`cQ3G|(a?HM+bK7iu?X6)9
z9Lo|Nu>1foEJrE}07z4uxH3Bn-&Ck?JTWk$b3%jPR(Lq*P(l?yupd#Kl+tR$J=^<4
z3B{Lv89_xI!cD8gWQl?U;+PC?xVAE;H(?^clFGus&i?(xZazCYS)|O!_I_vPNxf`1
z*Eb@L%NC<rifxQdbaQwsFi_4>fL4L&$48jkp*fGU1g42k{d=M%!a;JXOdVfpYmVC}
zlU7}DtG^owCj}DfzYezwh-dsae*51*!F!bqW(CwSWOUYqI=Tu*UwXR&`qjPxS<(g{
zqeWpsk8ri@<?*toXE|!6&^cGaviu+cg)-2@?EEjwC#TX82;4)RlP-13(T55~`vBFd
zTK=Ovldif1RE*d*r+}3?!ZqPH#6Q`=>ZJ)bABXGuR8ldZp=K&5hUkC4;@MD6(SZgn
zc&MER-a1>tHF`i(%C%YJqpyll3cy(e!0A(fdz8&8I{=B@<=*I$WlRT}VoQV40V<`<
z#Z@Z6n3xfWo@(;@H)2hnTOv}w$P{ac&1oG#C9j6_1j)KTzl%oSiOqP(=V8~$7(Div
zEEJ}{#Ul<%le$jm9Ja|I&v{|z{)5X=;k*S%czNAfJ$mK`RfT13-R0!QUwM}^sQN;0
zdfOL)?EW_UrU(0)DW(%oyJ-Hfv%PvA&VM2+I&WweRKX3C*NA1}kf{he)IS$ed=55Z
zYXVs_93sv7bE_m=CtJgaRQd~ep|u)?UMC}zo6!y+@WDaN?7FnBSOtjr9`-sF;sy54
z;34uLKH!D6GWtS5U?Z%VYnF&-U(+fN{pD_XpYdi6V9=JNwQL--Pn-bBO)|npD2G0&
zX6x<pV7Paqf@;!#d8U6hJY5zJ7Z<loL_~xyIAoI~yFjzfW)qgfqhT~a?!`V<$)1ng
z-M~~36i%zMi`wa$qQb0QMii7rS}DJ;jmj_32tT&eIy`avVOVI<r<afFZPEZ#%ctVU
zj#LR<)}6v&;u;(@o3~5?rOyMmXSWBhRy?(fJro+59e0I+065qaP7I*(w`mf<e@tNI
z11gfoCoT7jsVXQt)EMA44ppd;Fd5#I1#Xa`B0b^XVAJFBNwgYIEP$!-1u*pglA*PT
z@_6+|4AhfgW6t^w%>+%L$&lK(diW-;WIw(hBNSbx6Sx6d6Z}~T5)0Z!T@fYl-=Qw|
z3WDd@d>iy3+4{2#G#-mY^SWgZBxp2=S=X13-u(|5@W0@~3)SlD>L3JEOk?}w<0`vF
z&gtgB(}J!c%?qNx#(_l}jyZt8!R@H>dnyyk{f4z^hBk+!$@^Jp;vUW#_77rE2^t2f
zD_K3d%H1Y%r*u#j>U*;0u--)G;-ND@<~Qm{No-7BRe|$dqtG$;y`jhOX=-O75@Vp<
z-^CmHPT9^j!75|beAHu7xPpQLO^(~q7#~yGBIUA(63qsOI{~$Gsx&wBl|scL{09rE
zM=ndxb-J^ENCEFvlF<TrDvTZh?Zflk3D9xn8n8whXskuRWoHa;&rpCmiUEv!diPMj
zxyX)yW8reQCHcxsiB1KlB-j(XvJo?8<g-Brf?5Ag2c^Mr5?t91;#-bLkvUM14C&vA
zDfLL(=}c#__%i8rYb&7?Okq>Iw07d!XD+)@(93<;4LXe7{14MA@nTb(#537KQV!hM
z*hr|XstQm2bnlSSNEz{13*(vctHnC*=HFx*Dv%Y<sLC06y4rUJeofL`MFr=`MC_lv
z=JEErCEYu#UP{x!PLvkinEZB)27iI3+RzPYHR@CO+Bwf6;$362M}pU6-`5UV7YPrc
z9x=A{-hQcwU_^!TOz}4E!{ux#&)*mV5fHmhhA``k#J+li?Dzgm&X9wjoa8EmSc?@>
z`9Jpt6-e2F=d?EJ6VDS+h=>9^6dfEKtXyOZrTyasm>w*RXXtyH#RU97&CZ&OkSSc&
zx55T1tn;lizsdBH*)V#VP~%CAc21{u`8lq!D)RbAv(2ZAvp<19b7a+#*iZv_NOjIQ
z$oRlKg47oNUMpzVJ6VyYhbt}o<42SX7z_?@Hf5OHHn_6Sk;Kb|vGQ&GZM}}Y{_C)C
zF?WrWlyrS@M<ns(ry;RkRchE>Tr{|Y^l<FyQ&2f$`;xmqJh;~Ie=LG%HGF;N_BsXY
z;csf}4b5+Z;CVkJk9q$errtU#>!$l2rj$}bN=iVaySovjyG!z-Q@R_K5a~uzy1QGt
zyBq25cn9z2yMFKgE*IC#XU^=i<4mN|bCF-MFk#*rQh2U|WP*Y>N&2r7*YF;}_?&xB
zuy~k~f`>7}=n^mf`xDS$_$7$n-Vg4yiEy&OX|n8BB-q}Ml<IfnNXIkk7c{3P!*lq4
zzF$T-q&b`~fD|Di`U<7f#g+|yrutjwO0A+oz3SsRi-}pAyk{oDm0X%oRuO_q+8ln(
zv}W3Q7$Qdg=f{Bp==VN4bdzR{sUv8U5c5NdOM#}eas+ow8qIFAL{Q;zb6O4=PIaX8
zk&u&fR?jhpIzy|pUaVK7Q7fkPJX!mgd1PqiIL{Q(U^!b}j>e4}-sWlV-4jk|7`E)8
zf3(zuzU+CW1FGuK+v;g{Tc@Tu_P{|ot(p(v0J^=cFrUny2@QRfo0=EjmLrp7pjxP|
zu+<)jTy24gho|}5h95GEHQBUw*eJD@_$U7VRSR#lw>(b!4Do;Cc(oABq>uT|^1TPN
zO{ks)>NmE#;!dUQCz*~M?b;fQKP?%z<sy5UO%JD|PfXzN!tBI3JIDt)kCPDONKW}G
zQOgk5JUZDzcvCt2pHV%o&70U}zeFgHp%0t4$+)-^#6~ATT0pF{ny=p68O^l7y-BVE
z9G<2&D5Hd-Ip+s8%dyWz!rf|iuFAr&!RcVGQ7jBECDmo^D|tsSYRhG!Me*Nr&?`+%
zP3?irKHHrvaF&#i___~}O~E%Tnx%I1o_oGjidI(9!GhxY{B>l8#t~b<QC*;Muhhyq
zIXR`e2|S!D{Y|p2&%?5*7I?f~zp(Q-`#~atH;^XK@?LG@6d(QCWoP7<+H{^$F6@#)
z;c~s>UQ$9^B&oQiaN=ZvT2Hn4<Zie7QPb6JoUU7*K6puw=XrG8WNivRXM?V_+RXjA
zr^RrZ>sCU&OU0qvLt>TV#EF(olS<59zJsURw2m_yp@5f%j1~WI5qOM@Lzt(l)AA-!
z;BLPn>?I1pOqOPqdCl=P?SMTdxoom}req9t3V0xHp1|+Y?P72G=y*f~m5tl^Fj=H6
zwqpO=ny$j0Eo3Rb=>ONiIKcT$g7A7u&}yABpWq_kqf$~x$*cSV1xp`o;QB8vY8<}-
z1)#y8`z*zGzbm9a?7LY6eOi^C`buPe|8$x?7qVUDDzR)gDg8(7BDz*48>lj$EtXE~
zknsE2^gr34GmW#AFeX{SgXXP0#h>DGDf`6NT0iAG0hqE@Iv7T(_){>F=}Zb0zLKys
zsBg6+II};NE~!@&lVicQ<D(T<rQ+IrrI~}`<}d5F0q<qfydQ5`n(t5hiy|W<*^9n{
zn6K!f=hb2nv2q>f=T;v>Z6d2#BP#v%mhnc{Gj<_P_weD1Uvg>uuUDhUU*gfX2%;I?
z@k3<d;E6m+|0>DxsXY0k^3N1mMa9NqBl!-n#XC%aB~%k}5YdtzQAIR_mIsY2OZsLR
z9!y^Hz<@Lc9AX&!YrHRXec?iA4zH7kkL33r&d24}nN%GRQe=g8Y$Y6b`rK5K+P89d
z&Bm{m+)GY@3ql@IlXmZXZ*=PH{2F}Hqv{TOV5cvxV?or<|JOl<3*hBx1smS^#o1~X
zEMKgj?@gDiGPK+)zMH;RL+GTp<?>FJOJvOxQ&d#U1b1T2-}&11;@^i@&Ra!#!T&!|
zL%0<ky76-*G6VJl&Sr!I;dQG1L4s;XHN<Er>R+9GzH+2i@RJ)e2W?lc(I;fY8~Xy;
zBY*enN!DPFZAnBE(PxD>UsD?$o3zSXL9@RtBv48HzD>KcT75ZFsxKmUON!GIcblps
zuu5^i@ih4`EO7H=cl_ipvy)Ar-`(3cN;FK8rT(IbjfPdcFjqFEyvTRD+~vf;qC~gp
z(RZqIiApJFLkm1zb5P{4H87i{)6!+Yv~jYPWTV;OxEC9N%6`Z~?4n`OiAvs8I8rac
z17r#3TYCh{LbQE#XHei2!&-oO)k0fa8wHLd`26A~xnC-d-paS>X!&B(`{5!Es~y1t
z6hx&~;$@e`2Irb~-jB^U4duF!Ewu`hK`C=SUU8ESSG3rT_2ErzHLbLWHaGlALxQQ6
zvmu@c;RbR!uHmpF5LCG_Nof|~hlkL>X{aJJdX2@dztf0t(ij*v<v(h9dW^-U(@Y*c
z+d4bj9?mSdKZD;+@hI83gN4UOJ-E*2Tc$xv*0|9fjU(dzvIF0&2C7tGM*q6q130G|
zR|oS~na%uZA_&HTE9k*$Tib1ChQ4WOGnHleJ6mUFz7Gy*r;UF{&svThjvuxouJ-b>
zwp2Z%pHOd}DD#J%Z|-A`h?;AzajiQ;(9L!m-jRDRE=K%3%7nV*|2Z6-@#N8Zgj2o$
zq3s2X7#<=$Yg|5=v=7;n4JnG~Ld1^cN{!r&%q&tM>*u0`^}vZyoFnmWiV)X<q8*w#
zm((NvF8oJ&Cz46P3fhW7C4H{0DN>++QV%DVXhx%iftlfLL5MP_)b9x5Z4zM=9LeWG
zl=-IQQTY0}L)c}7%(Qc3)XVX_>CaT>O2^0fkH31*5Jnw_`D)b6Y)$bydwMY7s49(m
z-xzOA=!Vu{K9#5Kpp2-IOD7VJ8Dp@TAqc<pqpJ2Vl*wdhIjrA9NV>nZtR_iih+sj|
zv=KTRcX}CRb3S7j!3G;l$XJv^NBz?fp%_6pl0__~1CCIr;cI$cbWbDxfZwF=)F|{8
z1T!NulQ7ZSu@hzbqLSwdNqReSw8^gya@qtlNP35Pv(#Qi9&f`kQ5^p5TlsVh?Ehf_
zaQD?pK@AHpoYPHu%~xC9ze-Z8(!%=BXH6*t)7@ezWh*Si&1x}3Am}j{cu_HUdJ%Gl
z2H!9utMUK~Kkks(H=b)lysp`4ed{MY<jOxxx>Z)7`zguqn^wj#7@V=xYp@#q4B2=;
zK)DxSjCRquV0cpAYfGP&@|e-XeG7zY(a@uX#X2EIgP91<RP1j!YVKd|uO7*gh?)YQ
z{Gxob$Bkt?=3Yq6lNYs0ezaA^LkFrrAzV=c9@?Cx^2G%evi8&Udc>b1t(sUrqPSY1
zPV2@bCf=r0>k63+rq9|U&lUN8Znu_{p2zmXPwF%Pl&`lv|DB9)UOqN>nM6IWC^sCL
z#%8JUX7HHlObwjAvzvB*c5#`3>_+CY?AwW)z@nPcld8@AyBA)Dn(ahGdDFV?gJgNw
zcgZ*40?uDyag1{E+Ku-dTJSg<%w0v@N$(pEFFHM^y36qOq28?+=ASR_n~_5sXtru4
z_y48AZJMs!dT$}ZW07KR7>x}ENV=Tq+Ci4k$Q{`d8**Lm$%}Sjo6|NpBK=76p1;OI
zw#=jIe};+N;P>0Yt^IH_B`=-7LXy(&y*pTxH5%b6P>0{x4L}d`!S;mWt`|?@9SRHk
zpdnm7g+bMYLRY+s#Jk(hrVTXn4*TP@u4nIn*fVmNFFZFT%r))6mi!SmHM2&Te-QV}
z{&ASR&1>oh1mDjjKEcXfrZ{OVZT4-(Df}N-!g&rqwRJPLAn0QZ-QR4dMXs){Vq1nl
zPDJ&)&uFD}V?d*u6oGp!NEJaea})4Z`|9qfg@MIls?dBt7|x`Bj6}N*st8l$^^0N~
z%ycevXy}g!()nzR)Z4uA`robRD*u{^Zx(L>b>1V%DIYbOYLGw1Sgxr|*Bw#^j~2_u
z>3V7eC#@en85c9bb@$iTDaK04cf0ipp;&>k@*90U4~0#s!-eiicjKZ?N##ES101#Q
zdIz2!ggtn{=DiXRWt*M_vqt}!W|8VR)qS9j-5&LKGKfMpB2V=TYFLf9=Th+aYPc13
z84dxGV@sGtntsCLzB`d;Eg-^$C-t8Qc=!fwHMC(vvRLOs(Bqp(8E@W@o#Gz`TyvKq
zMszrtt?LWlI9q?0{%9npT-N+VsP^q6=Q<Ky9u1uK2dFcNj*)=O|5_NR`yW~S0O*u4
zs!Y9T=7|N!Pi+$%LJN$}W94*QyEmGoCz6~M02g1k`NI>9$VP1zL<l^(cv`N+`O6zG
z^Bgr+@Q{;tUwZg>k*>}(hgW+E^?;E8*Nf)m?UqwSzkdBf2_!F^RFW6?sIfJ8VZQy9
zZQ2JlPTf`;8Cspjw%~pvO0NFw=eKIDh=}179vnP;e2eXr$Vh$nJGS@l+gC5_SbuZO
z*146qFx!alQPDna#_1I!4OZ>~VS3u)GtN|W`sxHV4b5KKvr~YovRwIJuZPLv8~tmL
zCtjpzy=-xT{AeT&>%-@V3Z%Y#LDTV30s~p@m<h`;vU%3V_D(v1%_0VfuQahbn@Qy~
z=3LKHo=O3>mQ!B1Ixe{~JPBcL5g1h^4w43bM~Qc*-@*X9;o98BU&9Ec9-t1M)5`co
zg}lLoF++ftL}s?1erok+o1)=~i_$roFdQazUQ8To;SWc>+#;4_9aAGKI*hN(=ZW76
zCh<Lpi2Eviy8%8nZ$d&tD}~>R2t3_aSoz79MI6ml73grnE;Cb17wh!%uk6$;dwn;d
z!?;w*)OkI?qVs}h&UmE94V#>@EwnnkqUU0FvY|7Phc@2VNjfp}&!cGa!T9_|W1TKr
z4Y)Of2UGbPrnb|(n-d;#SHk*IFvz8Ud#`mO=IR;w$$$U+?)0=ces>0$YM|?5M`OxE
zVnrQSGb-_?H5@Z-q$Q*6-KMqPMjcHfYh&zz&#)#qhnp9+2)unU{I&cw>z4}=G*;Qg
z&kb)nCE`7>ipr6*-$igmt8U>8V6v9yy{=iUUJw^z%a%rfd*cf_cqtby;{Z*J)L**o
z-({;I7BT-h#?wDMa|m%89!8cua4zd#_zjdqv6GVpzp$4=i9Nx35uHxGFE2rS@I(u2
zj09{_4oUto49XVq9B)CMv<^4H_9mL%_@=vkej3pgSv#&=h@=)tItdhHyrk`1fqh`%
z_^I}PeR~qSm6IUP76L0eLL$BK%H7vN|2Y_Gwd_s6E;t=7;J!FB*;JUOuT^kd@g{$4
zgD1N0`9sx^pDCyD<1;2Fk)O|*p7%r5@S~|Cq)E(Ta;vNw&Ew2cVQb?WZ>8d-GhN7k
z*K7~#%PVi{1khq`;+Vbi=kde8^zAU&U5V@Qk}K_g)h!uJ%rIn|4#T{tzg^_s73GRk
z`XTsZA(Mamm^QS<+uJ^x63%R)?u%M4>z-_fyI#H+(dQms-X0F6aqL5}M1x8H3GPLx
zC-#rp0FL^Y7+-hC|C<G1a7ZL){MZz~CRSMUk$;Pqo^(RNcix_8A+9fq)fTZ-?{&Cu
zgYdR$wZ(Mt>~E0zax!cD6TPD?m522kZo`{YzNFB*xF1ku0%~gl6((D1d+(8Xu6r`s
z^%|(!txQOq#2Ser4@N4cq&(pj+;xZq37v==&=iPn-><*yGz&3yK&BzxEK`qXq2!Pw
zqb;m^X-Bx05~7xR8bZBxFCzNLbO*O)Cu;NJz)d;reX_y_;3;|crZq{Fz<%VlI3mCU
z;Pl=dHZT~UZ4GW7ll~g00(TusB8{t;Xb0DWL_E)Ty(Txu8+x7ktexB*euGlq<lM0Q
ziZ6c^lMY-RixoM=2mM+p#8NEANkoF~UqtCvt-61_<0~mr%;-5RtSLo|qgJ8TDv3P5
z-79&5-ok2mxYW7`RsPI@W}Z#N@4<Ns=(-zLbjU!w!V5h4`@(33(qox~`!O8ZB=#ci
zVLCvX3_GZRI~vvNdNsIeM0L4yD>K*=W0OWOh-bgiwWOBECbJ4NHqN3iR8Mrx$GhNo
znt=%Wo})(B#k#2KFkY0#1&-<;$5t!5B5vBy84&34lEpYhd9U<k%I|>|{!B$w>70<g
zdWBE3`9?^uEB@``I=8mki$njrHU8%y6!8y4TJ2i22x+d}E@UsWb+g7$JC*+lVm?o3
zKoGl3o4wnX)<Edcq?wz6-fnVqRkAG><{;n={bN&8`gwzKjVPpnpi}XWnr|R2fwJ4p
z{;6K72u^2sNa5Ero;8wA>W~`JocHFiaVOe1BtxP`73K}-SEQ#oLa?2C=q;k>CeIuT
z=ekV$n40{v3Co)_Xietiuh45@Yz8>V5UBgFU-Cik{DJ-00=#pcW-<T>1+O<r2Ut$~
zGYZ%yL+8tbaI1rPKtP`0blQ*Cu~-LfC*l#{4x*kOj=b;3J~od4u8M_5wXn1W0S3CL
zX{hOHDQT|R^KN~Fpw151$6Vn!-7ojQ22h2}UG|aLt_-;?CYX45c(8lb9=ZO_j>9W^
zDW&fTTHMz-00Ow8hnwtCr_%n$<#YQ|#L#@5fy8eZfp=lI+8$U6TEwMB0GIEy1{S=4
zmtt}m0IB+t=G$G__?f<OOhW=cK?=Al5f;<EnLr`;tHUrKs1h#LQiC(rKYVt<DI7;w
z&f8X%*pSooqH~6c3O*JC(yhbJ&d%Z&RqOV}aa!<$RtL!D?~R|y9$y2Jku0kUj(tB|
z(>k%mN8T>}bG*Rl0)?Gx?chCo{C&y!T?f4d$gi+$KRt}kEqH5M>!)M{fLc;>Y1WK#
z7wY&m$}?5MP%OPf3IiNu==^5;#?NjC-FL$E<LI3J4qor6Y56nEYC6B(&OKH(o0?==
zX^BqNG9k_X=*<~3fB%Rdxx8x`>;gARX+-Mq>ix9M*K3%y^$;vHEmCXob0@I3CHwDP
z-C6(2A;956UvdDF(QiMa-`+DEiv18V=S9Jnqb0sH+pQs<Bk8~&S-eU3{@5Rz<1ol&
z5-V#I!ElP*>5^r1lVQzvOwBz2AQoFS+0k|_4W2&SzA(gaBf6i6-mG7E*=&5xv+(Zo
zn#Fcz1b;n|U!v-3JXX_w{70EYRx{5YLgz|9@{dj?CgcLn(VuJ~O^3429$S7S5}B7C
zKeP{|iEO~}MKPW4eCCJv;7E#Es}=!N$s~4}RRqd}l$a-&bVH8eS9bvXQHA~%3(bZ-
z;T+)p%^bhKIkTYEZFJ!)CKKS<Li8YN?HS>osuxO<<yicpM1!C(x1wuy(+Z19;5$s<
z<x8Pw`@daIu_2K)2?d-8M!r{8{<`;S7&Pq5nZS7JG^HU_L_~cndo2DwtCFGr%yrQS
z|FB3Y7c-m7gw<2V@9Kxiym2aUD^lC0Ez~^QJm9#Y5g<XPQ!_kNR9~;!Km4O(C8Gtu
zK9tgrs`Rfsimj+hU-K+xsjb|z>!c<FvU}wvbiAS){9tN3e!HBZ|FsTB{SR{C^clpJ
zp2;x-qKO`Oqay51)A;Rr`6joEf%csKIbyWi=(X36Nuc9yTuM$fAAYn391PAZSc}Sn
zl4fp^&A2bSyKdhtnrUnOR%r0>=zfjxa~DjzjSj8nrg>2>mgu$o>sESi#KQ>?=gW<y
z@=OL3b>tsyDlfvqHFzKow-;s8+75%+fTP1h@;N`yHde#D`c`q)y`}FvM}J%OcQ_b@
zP!rnZ=}}a`xc1euqd&<PS5H6)I{hN6Op=~Szu^erD6u2-Xik=qk}7~FiKF3vSC{vE
zZ@)rFmNd71@>c5F?6Hn)LU@t+<6K#>V$!9i;<cLLnNi`3bqP)ZZ${&TTc#T1y~I<)
zn$!N|H!weZPB}>DSk&n0o1dP7D-M(WkM3+hgW}K=qsWDX4Nea_t=gQx6cwuIqg{%h
zXRM`xRRT)8eR72a`}1Yw@(IdjYFH2l_Sm+sJ>iOPUp5gL9AD!01cNqOe@rImCfl5l
z;IsZiKCGeR3=$VtXg{26EQiCjuJ4YkuVxN=;J0IJA1U%<w{Fo^Uim?Z4?HlwLVT7$
zMLE~`JD*zv9K1J|Nk#AuzX!cTS#k!D@R%qr&{8}L;Q9AwC|@c#$*PV^VmOt4e2oY#
zCdi(CrYGN6If}+$W!Z5SI7myYtGEjn1)({cj!qUNCw{{B^>BQ!u@c+LxVOP#UKa$`
z=d>-By*E2(MxMgyzY$t{x>~i<Sl-7|QU4h$L8J5$G&eWL34fs+`Tn^_CN2fs^^;(F
zfpk+$5V1){b;E)~(&wN(JI{aVMDR}}zxm7(7;wnFTT0JV;x|D`qCs}9Wx%R5{LL!?
z>+f?t`Nx}3U(AUDj-^5wxYU~S!^vL1dtPVW={X`*^UA*msAsWUFL9uO*cZUk=7{>o
z{QD1|>trh7+Ohw+zM?sOV`$V&MDT+iKFq<uF-H70l&#v<j@Yw!w(&jVAVoN7BLnZ!
z>Q?f>MyyItvz}wsr?CjMV`G@VfB%v)2^b?)W8Lq~2BtUj+9W*7n7$G~{o`R&qb@jw
zfQWD&uoB;V>s#>rCK!l>#V<Gz#BvJWiHVmMLx4l{1%5KR^|v4TV~mk}-;h8>k@b@V
z=<}QfaG0FMJE?AK4_!za`7!@~2EDC6eEw?25NeLQH}{`iIvT%Xi_P*C@{!@7I+O7&
zdw*1DaJtd^w8G<Ezx#TQWgW2lZ}W%9UVA(*$o2G0nyM*S5V^Csx|bk6{%=9-4vb^E
z1ffuK2<#a#{{f=sA1n<AdcY`t(-GoJ$M?V9A_&r$Fg71ChX$>A&~s2n@q&#B5R^b?
zOeox(oY_*un)ua|SIuzn_MI~=Ap*1j$&ZADUUJ#t4cQu9zzGtaMdkIg0^$jQ5yo{1
z54p6e*B&OgwMo4Xr+(Ap>)G8GSNs1Z-Gzjq@biF;A+K*a)N~lG4=cveB?_uiv)-+u
z1(>F!1=B<>Jl9x^*W#HBgTPGZ=fvwU;`3phyi)Yq$DaK@BErNe5Hs-###o6Hrz=cd
zE@y<%U$YpZQBkpT9*^&UugTIMv52?o#xN#~aLwVHm6a7YXi_n_{VEkU{2h-|;SfM%
z<M-|=%<kY<-iP@_iutt^32otD14rYy2kNzNgpevF+_#|Tj|i`Ltl)WHfC=iIZ)IiW
zN&F=G;z0Pu)?WGMKSP~-Ib0!Vz1d*<12G+U?992mv;W^Ry|)Ee;4fMq!4}neR~U5+
zwMzM2?pdbsA7!vs#}3a|tXK1g1TUyJjBW*z1BowI0%OTyUO`_c-2m>TP`zxe?Pr2-
zC^ej468K{f5BC@f7<6I4n6;*mu=u#D%u}vyi!+c213S8c4_D|u!CQ{{n1jM6TY+kJ
zSI->OLnsB$|55PX9Mgjf=ti;Oc%&+HKyrP@{pI&!bb4PL>oLNgC$SYBC2!qG&c@U2
z0vuikjyLatnZc#FrLZyt@w3u5wo&_IMrwY@xA>|r4F9R*WO7hj{afJ#T=v8!V|aFS
zX>s?3Mlb(Ck$bp8BOT-SO5*gra~Gr^wWsO4$vEu6fp&T2NSk-`rd6)`N;_pSk~9wM
z<D%K#uKJ^<00l+Qc{jdIGQanu2Z0xAoEacaP*<vk^rlJAoWfiCVrZ<ezOQ-1UX4Z#
z4GNnh`u2lrN9yb+fChLA0d2B`%;h&b3v~`Q(y9NL2uYvkt%KfSHX6aGeY^#v{ZeB>
zDb-m?dN2tTkok`_V6dfz-aCYFeWi%6vmo2ddYAFTo{cw=;i}CuuhsCkp#<!A!O46T
z$jhm3jV*~kexO<GO%-v_YE|z$9eM)RKP>zmU;VRogVQLGc)mje$L+LF`GntW&t&w@
zsPZqJdVo*Kvq=<)hSSAA2YPG}Lx!L2e#~~y2MZ9iQG%A1v}|T$0U(pr&^t(<_0J2L
z{s&PxvD6Sh(lN}6Sc*NH&b?!u$;vamq`$3x>l7S58gTep$ZXFBIgWx8R@WP$zIVMT
z2jV0Cy%W>F|NB1t*`T{jCrnOo{6B;4y>Z+NY#iU~>+4})e*m(Et(;vL)jIfQYy-hM
z)wK(UOUAYWvCR;4aEr}G5Cz^$=iQ0kiA-@+(w3ku7HHg$|Guyf&{EZAwfTKpf5y~?
z6?5rx7gR0Aq~~k;Q(g%AgNy~e!Ah%dHq2+LASoY>3TT4Y$Yd<2(7ZT>YPfe-M<oq5
z%gy$?+GDMxP!%SyoG+L9j6ObodNxrIO_MmFSpHW=VeS(9cfUnKS!QT{wqS!-cr5uA
z+3<=C4(4ks(1xfNLP`Nx{v)WVrcm64lTfyJvA=Jbxm}zgO5*vS$x9F#j#_%*6eD$f
zSyg4M#l4>n`ZcmiZ4uG}#Z}ReKbf(ww1L4|)3X)I2ggY9<fkT5xD8_LML%~KqF~bH
z+ksZX6~%j-c;E<=AFj>W_-6%ZloIq9a*4}5S7il$6htG8I#YG25+QJzBR=S%fwy64
z;XHzTO8%$d10L&4xk$5$StroH208Qb{%m-xCxQqSAf87uwRw2M2SLbI?}IYIRR^HI
zOgHV+{c||GqnQ3VP*Txmn%(x0W#-r{Oy4U}>=P*6o^nv#jj(?^JQe=MZ#vWSDv5dA
zd_cYTmh|dxZ}NQ}-Mv&mwblHkYQ^k5@s=k8kJt1N(YB7=V-q_AT6ouXsD;`|^}O}n
z1rY5R9L8lhtOD{XJd3Mh=N?7D&$=F;M7pm7NB+3Pj<|5-6Q|*w5Fx_%C^iJ&pX9Cl
z5Em2x`TBrZ{^uH;@Mr?x2>#pOs+nS8^!234ZTP?8^*m0xM1Zn)`mn!2_UemC>Vll?
z`!5Z1WvkPRMz^k<fkwIGKnSrPu9s271IsC|4aZ%SwGYd&c^-m@d^13v-quQM$<+YC
z3gp=Nr2m>*!E`Xkh+d_QoeUJ!KK|>ZBH69t7~8C09s`0jAiw)vgNv_rcrDReB9z5^
zf|8|DXnq^X=r9;6r*V`woviGImslrPEqXF>f5Yu=fgCCx>yFX_i%_+_#xt=*_H2+K
zlhEsSiuD=A56j?Mr{&9|fI}hy{!Xc5!)#=ap)Usub!X+m`BKl3Y2D{VXk5tFTGXlk
z(o6o^v49Eh>*sDqd*6Y9@w;Bf#Gx7lmV_1(1e0f^4JZ{^a$S%zU#KRMw0`->VP7OM
zFZVPPX>BuBJ&W(+<lS8jgP_Cgt*8Hes1Qu;DC?FcMm7mTCGt`N&P;!_+H+yeF6g+H
zA1zRh*q|-AbcT1l=hwPPLVu07&5(^?k__1#W{YXT9Zf&k26kHqoK@)+?S4VsucH>o
z?}6uvhTeik73SIwa>nIPL~-h^Qn+Ev==~l_j{t6dJiBz&(t5UhGcZ2>ZOKDR?fBJf
z1w>@3NDJqCU2?E(4~?b`Ks>U@kOkZ)AeV(-KU^(mg4&nlbx<{pZGJm2dE4BsusHqP
zvCdRksIjt(G6Mq44IN)J|Ig+ZK=zJO$R;B+tOnuJN7HFlx9%NKbEr}Hhw;q;fIk3e
z)s7XD9ghTN<5%13`wtsKjt-7PFE9G>HY8%fT6ambnwy8M0xbL9AxYkNa|)sq#I9$G
zQWZ|YTxC~`yoU!$kW1iKE!eT10pRgS3=9h54_RhI_32R&lRkcMMA%H*Kr&*-os9ek
zVV<LRT{2+5yJWPx9fCpPYvC8URe(<v+-I976p8{#HWIXGxb84~wKG5lG~@t+!3-T8
zJ+|fXmZlo8ZNcDi`db`!m>mEaNE~-3{#Lr2tT~n{c(x&osp)yDpqRAaJ)HCq6;y66
zTm=((%|PXJ&fM%!c`oN4$_YFuyZ!k=a^ZagXy!CvbGQYp!Pg#bd!W}i*a94Pp-*(D
zHlrv|vTHG}QlQ$!e}CLD-O<x4nOegC1?1_t0?3V@Y3!*?@d#BH!x59MJYb-pc&6A7
zw3kP(<NF^Wsjgd;aaTZi?(&6&SKYl^u7JpG0Ev7OA?XHv<;&_yoB!ysoDw-NFK_U4
z>je(Ic5RvC-c<EXgJ#-1j#F4IN^tv>UENen3sAOIC;!nH_fc=H*A^V7b!7CT?pc?{
z_<n2p3SEPadhV@R9x!jD1<RA2PUI<}NAO+FEQ<uA@P#<eUvnFD-vII3-v0st<o@z~
z@7D8YX4KV9df7p5OG`~fWrmZCOQf)#H$iBfU&QkF*#!bRCnX3_eT`{=5H@s$c$haM
z{?2v7=AL0+#go%PAg_K%mqSJGd8PtPh)-`lyN{~UzEmEg9Wt`U+&iMFYwilJ4X=1^
z3wWWyD)OjFlX~RG0l7Ax*>~}bTE=A?wE3+1J=F$jMoh0ljwWwGK2cA2$KDFM!>W^I
ziqRvuNH8Ux*_aBC<;4P)BIX2~s3G}phL&ME{wIbnX-OH<pAdc^KsLz1+<;G@Qeu*5
z|D8KR6ucg_eR_OglJ|Uifq#J8NNo8dnD2VUUxvi?_Y>fE_f2GiPzbpR(pVs}DhQBA
zM$L0T4*X*{hJ)&N-FttMDc9K>`@I|x^IV>59vd4ICP}Y*xo&K=-0WHPrc$sV<JG;p
zvcIiKO?5N3Zs}(sEKW|ooZSMC7<#R0OJpLE>j|@^M%Sd$SciP+ByK!CEphKxGNc%A
z{L{<cPY+M6mv_)A{6K-v@9phnBQeq}VLR9~_dJxdqus&Fv*U7v5YAl&5>kBl!0KD~
zN80v5g9*~s3vMkX9mHm&J=jDE{U-M->wrsDgrF(45?#Ki`IR&7cUL-R#X1c+!mz!?
zo*O7N?OW+|0v^}4GwTroOXom6d)n(~Xj#ZF#)V^3a9;LwE^xcp)gfT~+{$GAeAIUx
z$A@~(3^6R4>q`1IZs4-uNzgjs3b8FCTBtzV<$%0_q=SUn_{u$m+7(ji=k9u_PfeI8
zp)roFST-A$%xSk~-B`(p#o~?wgD0N0$B}+N4;qGj!4#R1<W(Gc_GjD8-7~aD(v6A!
zKLa17Cd~U}D$VAx24|3Z?&cxL#v%Fq2`VXMc|BMX{yO<DLK+LS*>0=t>QAcYE8v#|
z0jtUyht*uAn1v*gUv%NKK7}tWUDcIy=fyrcS!1)@VM{y*N1pgNl2lUfa*~0IzaK-)
zTiQalFS%@<_;i0JHvpWb8N@rihWLa@Yr*Ng<8ScK-6wMD%sZ&q`)OJL8Qo%bw!r1Y
z>Lug}Qqq~!GsfmGShUmF1Q<(@Mx^sz(nWeJ6BSQgk&Cmz%PCG}%r&|?xN7fBN-}ml
zoba6js9fslp4$$n&CJoQkw1~72QYNjGr)xz_3Ut<La5FUB@%eFnM^2HQ09RsAsm*c
z>r~+_W4R`>S}$;>u=#BbrD*9OLQa)!ZP!2r9*J?E&AGRcwb7~~qy6E@SPL(yi3oj!
ze$BI*|7T2Pt(eRCe4x1?>S{i)e+gg6&0dt4HTk-ZWJ#e8up)tD>K2p_-yXNz3gjTS
zzQggTZ!j<-R$he2o$z59PQ1LKtu^aIzj(LF?ULHxcC~*rl3#{sar~-=ij2%yix|<e
zbNiK^8KN+;05op_>WcPgdhf89{kJqwtiPqzh3;?zQ)}&zv?c;{E})~n0lRWytnw>z
z7#I8slmG<xDnHyRuPGy+*Z~Pk=C~g6+g10&ItoWHNnbTN&Ob;xKfi~HRgJnedHUw|
zVsD-!Q~R&hJkEgN=KKo8!Nk(uYN}8pCe`CqBI(l4yuzUC8}l_$&63+*5kw`GB5(Bi
z1+Y`?O)(!!{R|GyNA=1Dtm&U-f&F$|wLSN!%)~efvoBvN17)hRb8>=Q$54?g&;4*G
z^#D8?5|YQx8{%?|NvbZY=&6l(Rj?+0U>2|2FjqPvl+!RhDCArW=@>%&8;hZZ%0;7d
zBnf^Z$qIpQKTN^{PfCFfZ&lD`=IIRD-&mP*kUKWYx-rLFikJ(C|En4mldmCD`G2uv
zZk7%;HzjTLw-=)G_{WG9n+D``9#;G*w7(D1Q!!PjgWkPOlJ0#m8CBj0ckFTcn$?th
zwZWW8k~&`zLHGJ*`|au8`@xD5A71Pk?cj6ntcYVNEZS1vpx4W}_~<o)pDRd4;MmpU
zo_Vw9?ui7(wK>_tRF@fxB%KV<L2iTS6;8xl*f4FMp!gx_jN@FGNi_w2b`RwyBrAZp
z&xmdY9v%IPMNUAJk`fM0XVzf~>?3ZagqPkhds<y9<snt*idWvuHJ3#$2WD1l&)4(?
zFGtcEYszfqVUm}k2q7rTq!eJ9lu|F-^$?0r)=`)Vt@G^0d1f0K8NKz4P%<&e!POLV
zwQOn7)axHLMJcvt-R+3sy`+GxF!AnyQ)e-)P;SF>d=I7O#!{>W{1?&-46AdBYISDw
z^$wwf^ae^+Y2XXsH876X?`KJCzx}dZk2RG`v-x9i!#$i9L!sxG-!WScDBb}+hA<<<
z#w=m3{AYyI^tV4FAIvmrzlJ9avQi9)(p``?&%_ysk&k4cVepyv{?e<UJAcQ2+t(Ry
zM%uTZ^mOaA>+wm>&xTDi=&T%Vw=wuFI)-e&265Kz+wAO2GiKhFCUa$S*B2N*F9%;D
zC~1OUDW`%l3^BI7=Ypd(UTG+T?RyOSlgMX@#J=ndjckdQM9lAZjSa`%BKX+>1t!VL
z#cZtw4T|r(3(GwMp83viJ5Ms>G?vF*=Nyd+ldD!so7Z~!e}~UnZ64;2pA>%Rk$dZ=
zYB@l{aKo+HV#S_~7@`i{{esyI?@2zN8_q)B?SD7w8UvmUF_`OL1mB2gRpa~1xVGrK
ze;HGftW>~V9xmcTv*?r9IxV-3e$c0WUvD;^ldHm&+)7mUa5-<|hg+e;#KcrhbiXh1
zQuDPPTJp?6+umJJbzl+lwXj+7?bub(DPcAvoueg?Nv2POxROTJ+-5w|kDBj_nFB=b
zFCkw80I{6gp697!8GE0{arM#x<^;`jbM)LUyE!#;6h>#$bRxLGHHt!R3En{uXUXgC
zpg_<4psF4a$UIPbbWd;1KG)50Ob2se&sR-WI|e3Irxf&no6-~4)Kta4uH|%QUm|!p
z2^ou8x%9@szM3Je)_R0|e3;|hqk8&<*)7w?GYhp$^5dr%pQP~*D(Y}ujzpE{1UGfs
zN~3+t+fPc(cB0<Y@X&*%8+=8*!_IGUMKF%p?t5o6&NZ!9bI>0s|BIrt>cJX)AeUG3
z;8bb-^)TE1J9UGL9O`vq(}<CE*mh|bTJgdkwn&sNXO-9SuRJly`@K|)G+N(aVNJqp
zPp>yzxbbr6M6A30PU~@&IdsK{W)o_H%jz6<MVs0e#cz;cZ45n~2e>R{EPsvv1Ew@o
zv+qF2>7q6xd9EZ$JZ7wH<WEWAbff3-A4awPzl+fsZ-s}hV!vDHNLqojjR}4KTRn`3
zmZN=pK*puOq^a*&Okjb&4{5Wpi^AH9du-K;x?#7+!Z&4V#p~O&U38qz)^QkQ!<h-i
z4It~`a=zT3jhU-7i^^Dx=h$(Vv&M0KW3}vx^hy?NZPa+SR9xoFm)|daTXl=e0lqE&
zGaO)({AQlgr+cLL*>gFCA;+sgl=y)?jZ<AfUnFY?IG@@dFG{=#r|5fPZeJ2q>C8sx
zteV<vtU|*vQ*eTft}zDU`Gv*GSuVTu(81*0(uwU`cL%lFS^PTNRzH4I*vVyJf8;`-
zy=nqAs38W*{_1csvbNhX`Cy2mILTL|VuTZYh|~Rbb}E`}FcfQ5nAdpSc#PWIfcht#
zT2ch9@=)#odE;sYets03cFrkxF=J{QkZpUp{&i6<M<Er5S2kDPUi+h@!MHI1D)srp
ze^-Jj-`RYS4Vtag$`r9Aud3_<Z`RDj>?*NHQGQI3%^a+DOgyy+u?+?ZL6-;HYkelA
zb6jMr1pc_8zwidHGs1!?4L2qG4uSV!eli1pRO>P!s?zbcW+U465kx}WGYE{yD54e{
zX(G<FIlfV9`D(UqBylYYYsJ)YzT56b!aUm*W@hc}bKu4m=DK~HSU4g0^2Td~TV&lB
zI(74KXz)lWTQa7PQElE3G$PfGTS$sC_CAYlP;(#E@|C{^vc5pFln?L!7&hu{nDsFG
z0WaeP9(%)&8pP!!{Mld`3FPMsdlV23lB<E~-(0p9f-O`&{Nn7U{Q0$E#*DZ~$z!P2
z<s{oJ61a+_BiCYvnnZnwJkEszSlr&*)59%%@F7A2c_A66F<M(h5Gy-+Bdx5V&Io%D
z&k9ldCUe?h*)5F-X}(_jo^R`t)*~0%;EBT?oSW*lE1$;isrk7=pW(3taVsbPxkJ%q
zGE<$I3GdU;+KSb(mhi2s!9#mfWuiwtLjKK<c<-EJ*`oseQR5V$SvyZpnV`x-KRmUu
zY8F`QSVEF!ujByYE(tElkV3Pcfpea-<K=S-OyV(IJy<C?_HP^*7I<pZr7``ITe4Pb
z9XR}bFmZE{0tJ~Wcze(r$%dK@+=d9|FtO6N6d&G~9Q{N`WJMyPrUA1(B!iTGp)ZCo
z)Er1cW%-mb!nUV7mZ48zyDsN>$m*Y3k_Wt#K$A}fbKDrM5-NJWdj5S%*XF{#nAXqS
zR`y_(k=zyk{);AFj^`biaj-&T{c51-0Y+#nkdvj!zwx60Q5F+zh=QT8l&SEY=DAA4
z;{?Kmd*-1S(yv{+n4QG9H#?V9z0u;Zs#QvVpC`t3dElY`!Z_V;Pw4deA|TxzCb}dk
zirnqWktYxqD8D~WK1s>_KHBMZZPM<*ec~yfe^<bWnu4ku?t!{R7vqX?Ol2yc`_eS9
zqSTz|JQ`VNYpt)Xd38I?5{}sO8BF|`ClN?%eaTQ>)#_X%W9t8?-oK@p<;rR~G8Eb<
z_NlFJLXM%d?JPj1e+yKnmeoM4B-;^Fl%Lx=Fvm7;YfMKo>OWE?Q-w1%N_90Y`5C7i
zjz+2qw|WBTZkLu)Nz4C?;rXYS714mPBeclhcDCGzM4lLwEAne+uBVG>y&e%xhtk%*
zMB#Y$iUb@UV5_6-0iG^#C!i{;D0OY6iOtuXawrvR*BQ^b6~9~lGA@IYPWIE6E6%^J
zbRx~`Hg63isZyqb*4f;7_800{%)#gvHV(iWlQT1q^_ff;*YD=PYLXWV+mrNkE{VkJ
zPpAY?G2u9O%{FR4qwTeaTS}l_IxN~J6<Va#P@08gjhbeZq;&V=Hh3N8!L1!H>hQfB
zV3~5jsK#+@OO9QC@P=A^3q%pW1fVFcTi~%4>tZ|)Nv8P2JsHlXLVW)=RLAX-tm5aH
z$bj;TubGfwf3ySTO*d-6&u4G>eH6upbDr{e)szr;4w*$}enttUbTg!j)E&r3AfA8V
zKX$Rcgd!R6jPF?a&NUGpD5Tx0$@)6;uP1^5dC>>AQC(d4_<3d1SDu_E*o#@GOxt#h
zecW%i8S;2NZ-rrEURR9q?vPz7t=kf&(!j9{YfENUOY&^REMelHiwS(V%%NM~8cW-G
zO6uFpgU=rxz2w;^$qzW|LtEp-9Qb|fq7f9RC?}M(iPr4iZO1>B!)Scj5At^nLOgD{
z`C9UcG^HA0+yqu6Y8^p?qvmXuyP4j>F!=GJdp%(!JNi+k4J0d|MNse90zomY@4}zg
zOCC)ue>9zMgUky`+Ik}^99GZUrNjb?2e>iruH{e)XTQn|cIZqQ8=DA{2ShN-@a<x*
z3zx)mf@p~t(041W+D{_w_&QQV!mTBUuW*?+7~(DR2ZgaAIPID*KsAuncQ82~RM-TL
zP|iNh*S9OO%eVZYL%+BjLOlHhDV4AV{*13+bUw!BD|lMz0&b?FwD(8_zD6pRmI_tL
zW*g*O4a)=w#QkvTovXYQ>^ACbebDG0j#oQIAw$W%&gf-d6^#$(s<0TQ8}D(pq@*pY
z2Pp*&DgD^WLi>0J9b%FS*2|u<OBpT8aN**I`z!wD=3{;lz`Af$a<Gjr*I?0MjCoUG
z!u{c&juzfZITcFm)|Mzzp*%Vu6CLw!v~%tI5g%(^G6YLSWkQ`q7%MDn#+h~+&)+C0
zsr-qO{`M8xvfS;j-Ubkk6B~GGlg6)l6&gnU)$v%H9gLX`kT3z4-40Vxs}V?@cM?3$
z)esg`RO3PX9<$YYHRN6CszHAHmMF`r(sgcK;j;>!A8us|BS}Jae)&S3?7&b499jHo
zxy0EH>ur)HO7`~kLZTH5;niCr3lUS!Mbzdr^D`u#=W$Ffjnbn;?P$jB#pKQ7s;ET#
z_d)G*+|&@?*93(nLLL_9JEIRwV(Y+Z*kh1f9WtD$!`yvG$doqqrq9JYlLU^-6Meb0
zqKz8Ca!5F&5<@tN>l3`bwbidxj6KtP!Tuf=Ma}@Tm|4qMP5haSyJA&i>P@GSZ+VTz
z{n){UE68Jiw_PB&N`@9nYCS7e*sKt%Wskz^+A*<ZasBO~!o?UzppH?(xW7_qe3;KD
zb^KI^b}SAf>;36wea&bHS_F`Z&<<3_B+Z|o1n}6`;DwZ~lg*|8Qd#z}3umSvk$(es
z2)IP$dl&8{Km_2E9M9xmEVknjU+Y_69c82t4X6>OvRlouF>0YhlZ1JS2y2N?UztPh
zrY`lr-2WksAn@0yaMN}g=DVsj8ha(XKiqn+dOt2FU}X_J{4yyvVw7`es|uu)7J$n_
zlSp7_?IdUp*K%f_ol5;v+ZO+M+vF{GxU3%OLv8rbO+6S9eD>aBW5N>*_jESQ^ih{T
z1ECGoXV05!yk&of3xH;9GtrCqHjO9lAe1#!;Ps0O6V_J)Yk@^eIh8|6t&R&1e;U3#
z`(79cfk19BfhAWAghCWQcPv-eU4R=h0QgMLriIUF7Jj$*9p+{<TZ^5IZYBJQJ|atV
z$f3>Lma$@K;n%g(v%LBJXT%y?>ayg3pX({O%|N<<=$nNhn}$u7lRBH_h;@VQ?tr*X
zY}7b^LGoMD;nKABBeAL<<*1b!w&YxLzQGhj1BNO`%yw^cC^QB+KG%3k<*>Qy=iY3~
zxp7ubC-IbFM-(UV5G*uVKNd4};Be#$@P$7`cp3^cuVn_DJrCVZp7Z~jK@$CET}doP
zh%qYssifRv$nRSS+ASkZ@XNOeM~YTRQZ~h67b8bqC%tN{ZUj|%6Y;x3>JA3`+H=Jv
z9S%O-eZmNzxHI?VRKiP|J+?R*j7B5vf3d$NQK6GF|0^C<IwbQvXx2(-9#0vE=a6cB
zdlVXsvw&=kygYV^j$mqeO^QLuUM4X4j^q2KeJWl24m2{H)b7MArf~Euh;#O&H<cqS
zZ<dAFxuD>NUtV#|i|bz;@Api5$XHjZ5V4xdD}VX&<xD%ubdbqf3UfkyLIAEzg7s2i
z0?LfYOH3G_29E81zm4CL9q4UM%(^aIFVbuz{}|zCrVBJ5Is|bXV!V07{Q!9V6To0!
zNSLC16sa{1G8xxo{Bxx<XaSruEEHs9Y>(j4n%(uyNXGb3zA(s7&PIf2HgF$i15&>d
zk#rTdd{>I0X#x=@5W$opVSRm_NTKjK;>9pmZROq_^Qf$4wY&dzA?O9oNbRmIU}H`j
zr+{awb1genzOl4?DzcZ&dCqiOZKkY7<J5)rv%l-MtV>;yau^<)ED@huCF`Lv@;$?&
z&oYc_AtEMwO9n0Q?SGU|yLF23c|>lTy!_vYe4_p!y@@U+TwD<526?sPc`zUNm&#)a
z%)wfp8!tIjls3<*dNm$<lvY=Fx5<eZpVyAqYL7G}K{L^xfdSSw4w+PrJ7P=;LS7eJ
z!+5HG07e2k!epFp=`Do-OfGK?m@0dQf!*^qElmxjw{!NEn)`dBnLQWB@Z*Wo-#7$2
zj$ti|s>2I<&@5B@q1NA?tvp`C*X+U?8cpRXZS#6@9tGMYk0&3Qds-`D64dEowc5e6
zqa^AVAFO`3eq%AlXlPns*_I<ks1l*n_}8seI)Qmsj`u`VsaZU|?h8F1A7Atlu4U<d
zJ?MzPZHVXbEL5JU_6#PnN%@;74O!JJPOX86$$a4{1@Ow@JVqNlTjk2Ya1+S2X8AEn
zc+8c4Q4jG-i^`;mnf{fMf*uDOYR5!C@1Dm{Jd30u5ffv_?vh>qAM=AyvfxBv;k(Z<
zt^q9l*17TV*X0$yo3z>jwC@Z41_Vm$NX10V*KRczqF6Q8er9o3MoVe$W;EAg%i1?;
z6j(ww?iJfS+~8t~ICATbVTy`Lpe>K5J<=%3aazSJC-D0*Z~d@#PvzWA_l-TUsB3(0
zi#|A`*7{@H@VqDa#jl)e82soM2}0yOd?IqHYi|n!Wz_Z*7Y~@(0ImazF<V{_hJ<6L
zSpl1{Im}r27mGnV&T9G>wFHJ7tqZ3fFE`8}8GMwG7OEFPBhr0-E^W2~h%l+aZ7a~B
z7ZS~~wKf<kgpr?*y=};_FV$47(n(k_+Le~%<u)4Qh;nw-A)U3ZYH;HD#Vmh;OElI{
zsRh>S&Li-4@<jTyiRFMdFN&sg6+(4)rd-^bLk0Jd<1;-ST8X%;NGC6E<nXeTlkXrT
z&9j?fP@N>13t316DOEyNwLtaYL#2mY{rgg(G=6l|B2BE|IR3-KxvD*D0tSV|NlD@|
za7dBFFuJ5f=}rtjVd~!BRJ)#SUE=5;`zg|x{%~$XRz|KP94LDyl+0r}Q!3{2D~J1j
z3pWwfsahbjj+5u5t{*M@S+#TK;}l>i=~Oi@7(Z&;0Uf>ufQ^EE!0AVAJG7>-i#gCz
z4$oHcG|1Z0-+_qf;#WwA46VAdVavso)->@GJ%T8<(D&2_?tkfa5#r(C7IHn5KpzJ5
zQkxA^a<Zg9cyIp&#C<q$wOqz@=a75_DWKZg^~3k4VFfTr%eYd4IF^_RijusgjusoP
zAmW>I9)~yr<oF^N0t6?{;+|()ch3ry`SaZUh-8PIy{s`?TV=U4)L+tgOMTj}A!*sf
zg=~w!CVNTL7#Mc+o4{n1IyWmzbbqPo4${bFu3w?YOTYAm)n=yj*Tl1+B6w`~kjQ4C
zTG7j*wpYN$=^jQfsB~c%UCuy+kT*pv*6}1wdCOT)=|x;y)}k?Bbu%aO*)D)IsY9#9
z%afK}zT8%Zo_gHc{FS(aU3AG@85BYf!9kmj^(Bfus>`GLw)r&;wZFU1@?2HS@d6Hm
z?mfOmD2bTJjtjI+zDfS2`W}*F;h5hcg-XxtUe2yNopFUpHP4A;#MYnI3$Cab$9i$~
zm-^x*`+Kz#CA8dEl-$jhsFr^8W(_L?;)(5H1@3WlS5NyX@W~Ssmpv@=vw?J_=f&r`
zFS1DeLVnMzYRIR(xS+b7fIy3;9q7B>?i1pFq^0!pL6Ahx!clNLD%~%qrL|0YI@(Vw
zoRu~qA7^1{@!+5Rz`=2qde;;+C^N*jO^LOZCOY>?rhc`!f%_bXjv&?z-6#)<P9x7i
z)q82)kj4H(PYkb?L5<8`uD{n(kUb4xY|!)y%$mWm_4r=ncz4I`Y;{t#+_<_T)e`71
zkkC`CW<IwTO%?dD?-)YCr5U+%*PJEF6;R9aaQ-*Dr&@7yAhE|LATUwJ^_3ZZ`T!bP
zwo|X@9MVFOhMZU7tBF*`xd@?n`~|7(*gUcA{_I(-w--B4u#YjYNLV)vs}~LI#Rman
z!#tjfW>N7O_mq-!>Uc$T1<)6y!;+m1uC=r>6&Px-vWI6kO`A_$Cyk7`)>e6;Yb?4X
z?D{5aU4FVPCDGBE8X0`VsAVf)_PW1rT8duXV}oF^zp436>xAAdEK$mA`n_^Ue(p;r
zTu8VJ`FDey1rDC9d9*81ohk6jN^E_aNV8(#>ot_VnWGR@eZ$RomiS}`e)0y(M=%Z{
z)-2&$`-}D5BdtnPyBKzzMe)bw)=zx2^X=TOuKq7yCmQ%EhS3z)u6>?G<lq5sY5wM}
ztb!-6EA%(M(7S?RelNDM3O-s{B2L>C6?s}KVElK_V~dDh9b^-8Y>+&2p4T^9da&`n
zo7XHd5ZlG+Cl0+U5K@>zm+THKHbR=>Qa@?LFRWKbMXV*2KO|~u7~qwIA$h-Yz0sAG
zhSu9|E<wD8@M!?ogSX0X>t|hDeF$mm%SazC7#E3wxJhm6Vf)oU?KXIi7LTj|t)j%z
z-IHB^e~*CRq+sUkP)HGFniee$nq>_Y|2acj=+GD{U%nrVR@o3I%CuhHHIXtQ3;8&(
zQrpi(-DeC?>5=B7y!;)}$t}6(CaYI6$`sA~(H|c5QYsBErX<I;Z#E=JGuE=}wl879
zzNlfZ$~fVH??wJ{WqI^`4Ocj+;n1I}_LkV24r*h(y1F+U^4^x*sm}0g()Tf13Z376
zajH_)U&C*P?o(h=LT(=mF=dD{K0uT|9r?{(7Tn49Q{C+_qcFkW>L}$U(a~?6`?jxJ
z6t@M7Te&_i9Pz01MxA2oKxkNRKg>HA3{i+(%<gw7VQDS!h;5&n9qx{7;Z!qK&zwkl
z>3h2?-Xk4w>0K@Mo}l>E&{W4ueMhR;%!JJTOmgrL)^zKBYd0I%O!*){wCnX{8e8vn
z5CereOYkyDGZ?D|RWD!pTZBqZTeC-{1~QH)?krS6=~?I;Y<~oO7gHb2u#EiWsz-)G
zDuuNo3hxC|GyyrzL=9nI=1Y_E^`{9UmpZRb4c%l5l!L*SkZqYPJr(7QFEK96@`8)7
zgil)QnAz}CV|G3hs4`<Bb1oc1eI6K34c+1LUS_&NxIVlBf)-z}+Us$>N}W}M6mB&(
zL)6wkWS-tfvg^0Q;f~)sInmK6VzSFuR(6Pf4ga(gQX3SvwY6okQWs%;YcWb~$rza%
znQWI`db8Izav+nyoJ~&7tkM<i7TintK88|{@sint9*@do1Mh2CaM{MM*ZAyNt{KTE
z&aUsjJKgM844adtO5v@&8Jx%|xO=O&)n0YI1#Ag94?v6Ema6BN{=Mm1egT;$YYwe7
z7vb^Z__+#P{tF0u3E#tpn8M&PQlMg3Mjw$8A}-_HHTY@Drz>3BGkc?d!ug3q!QL*f
z{>-70&P~$)TubG`RzVKlr6hs6f&^J(H~xa%3wqTlD$$Azp7Sog+GevImH$;*%9!Kz
zaBGZ>eS6v!_+sJP%nJQpbneZC@y9);I@j^Da3+X>VP8+OL}}MxlDAdYs%P#-jHl5L
z)GK!PfkW)DaUwO<t$R5w2T7CeX)?)^w;y|TRGC;FX4l@_Ie!oC`qE{Y@trZx6^s8c
zTwFS?j*r|}px#!!8|{@fQ6Sy-T1GQX8hTQ*OP~Gk$z9GGoV*WWNsFpdT4_$pCJep1
z{5RL`bFAL`IJ4g`yQ%Fq`C)`(?XEj-9fo_k1l+a68{ga&D_Rnf9*(tsSemzQtSdH-
z3Z?)5=z0sNDz~WLTeh1<>6Gs75D7s-Qo6gPJEa9_k?s~GHr?G_0!o8~pmd{@d~18o
zx!?Paaqk&>IP@Hk?q@w~%{AjcfAeR;Md^kFn?#~b)Vqb0*+}`lJjTI*#B-;Emppgd
zsK`lim3IW-$sEF5_)2QHA97?<h}=0}%`-6*7L@m?1#QXCnh7wLXWywa-B55}BNgU-
z4{!*snfm$6apu%4@wqBq;Ry{*BGROivWLy+&J1!gp_6rSk-bQXopl^c-RBmmaE2#`
zc`LCXKm0{U+3~ky4ZhA#ddo&;IaeRmPd*yA?nhvc>~1_sYE4v1oGXM)U#lgm7hK2e
z25Wp1`u*zp^~qk>R7GZR@P435hm3+ySrt=lRkzTRP?gyGP<&7PKJ8&i)>wr;0oZO#
zkF<V%H{=q9wY#=&+^*^hgd8yD0e|E?Ul*GQ2Tl5>r%@8#J5FJmS0JgBWIV3dy<abV
zb!$NWy>uFg1RoL|5fXo8uuiS2dm`7%9a<qU_mpKR!Odv#@o4H8&^jGyl~-I`^iVSs
z68s#-`5_J4x%lzcZ!DFkn||V2ADi)wOzTLWD=n5OWXlk3as=lzV4KRaXIHk%5z1Lx
z%DasbEOyp$`b2|364K2;=}@;Ool6{C?9lB;=8NFQ*V997^u0@=ML~O;7-_Ks2qV*Z
zfzBG!f%KJ_EDeu{;*U8E!uqgmm-o8L>F+ilrWgdo&f}<#tnhN)rehQ03Sgxq`DvHq
zCoCclEK#iM%5tbpj2#uwt~V{TMKoJbJi}hND#YpkV>2l$DB}Do$dn<^L;qT}2~A$o
zE+zgj!KH<U_#2nW6gTT#Z6fndBa_?&iwn%iPHKc)&n{x-w+3TB$E?HSi-A4~={IRh
zWYrCU@jr89%7WXT@111`8oC)Qmaln!yh+@59RL2ylf1S3V-e###%O)FLGCM5g7<f?
zd8Qn*T~ORRi7&MRaqqcgWMTR`R|K&|I-n85dr8Axxlj1<UJ|0yH;wHI<XHv|38tIJ
z`Ysot@)Z!3c(LK*?3JRn{Zdw|9|Oz9!hr!q899F>uMBgLH5nNOSBu*+a~Y2$y1CtM
zJ@4<IujgUQ#h`g?N!-RV-W<{#c1wTzxQrZ?jF~sqtgf*w?DaNYl}h>wdg_;3w6n+M
zt$!%-d^tlTIT6PC7JpnIzmBQ=9%+lh>4x9r+{vPt-+wGMBQe?gQ{uz>tWr|7J;=>h
z<>N2mit5tAJG6Wvx{Wx5k14J(M@zXsL~vHUD1oFjXo#rHi{+`<n7qZ@YIch7fU-}?
znayXqP6)nySYEZz;%@tFAi!ewqmAB}gGoeuAQ9UScx6g4w<>Z_GS{psUGQB4-Ws-J
zLu!rhANER^@xF{6)i-DRbbpEp<Jc(oN7vKz1Y)cD1spT`)t!qS02_nR#8t5hh<n5|
z#O&OcJx9E#W^-!qh*M=CDss8|RAhw*R<*rvf!$o6QcC1jKsBZ>faZP&qam@VA*qF$
z*EpNyh~t2;GKb;-#aYj3MGGz^cPm0c^*A%%UIr~Y%>+TOmL}5q_8#b4kadslSVt@c
z&-3GBV|Ba&EH(svU07TSpVAp}tB7WK>~h;r;)jiv4Lzq^R&tS19kCHnPok!cRBfyJ
z`!HB*SSb1z_m3v}!t|;eKDeIn&G=A{Tn0NYK7Hh|Wu~3t`_T|xO9v5{Em|4prNe}p
zO0^k_sND0MfqApsOcjhI9+8frw90Xc=sV{PjRA5YGzxbL)jeD-hbWR9>-0Cb>dEQb
z!ybRWmc%3yEe)A5)V%R_$$Eaan3%d-W&X@myJ*FuuP9NS!;^jN+BtLB|19Kf=k>T8
zmsReN7h!A``}ExJXuT)NbSYg)^Sv(m>ab|vZJw>UXqyq&r~8PZa;NjyBYZgFh7)Dz
z9rv&NcX79dm-;oxS=a5V<rM8k6M2;LeFZH;R6lt~)<7&OL#7KvcGGW<P)9!55NF#_
z9T{*ns1Ala9wBBzu89tFp&?He-J_~x)R>WCQhTj-7}1#0c<p^{W-&3pmlQzX$NWvA
zx(;BrsREE0pBskxR?2$+(d}y|ekzP@#iH@rh-pjou7vyOBqOoa6gRQt!v$r$#|xoV
zB40GM_@q{6{i4jcTjM(_{!0AsDcs4l>?DxisYVc%M4Fn?VcpP2q8(PI5G8t6J7x)I
zmBXdTF%39mARX4~dE`JlXQp6@S=WP<05QYvtdvZeOJHjgQGXZuTPl32kqj-}2#cm#
zVfZZ(`%ZQuo%fwVc89+SOlgL~j5W+oIYZ(JYm~s;&rx32{mF(XI$@aEB~?)ZOJlfv
z-Yt4~)N7uDuiHDlsXV%*%a~jU%vLX!5n!u$K`sx&Yqh*CC<Qd0@WhEFVjGX8G446>
z3v8fJI>_@|&ePGL+#LdG@_k4Wi5KXyB!|nV3EF{ppq^eSTVFm=2EeyNoeG>!fyV5c
zS2rSIVutKbYk!t{zQmdj8K2I{R?_?Du@GO>lGf#D>n>$asXloNrFblKo}KA>zhSZy
zr8YC;Q*u~&EOPt87kVe|Dj08PQJa1**Ld_&^eRx#p_*{u@MRlH*tttuu&-11ab4%8
z+4<#RLhYz1e-w%cea_-h4C9#z-v=y;GjZqYyGyE>qz~h%q3BKKt!x9sVN`GYBgQrV
z^ctpK6fF4jjCM2MZI3%neC1O%Uq0Mz%aj;PL9UAXx^>E)n0#+jQWv<9yWiX7gMdd?
zDdv49Z7E-Pm8>=*U7f2k*}N!vRlRxtHuy!MtLY1Gx~K|_o6hH&+3yM`ep^P!owf0d
zkjEg(u^1R`l}sj5#Lc43@;u6IlU?|Ie~Q0%CZ2qAZtW6Xvn5(uCh~)o<EetQsLp7z
zm8d4cTJB2k98>+qMj{C!zeR%fgCvX3`(o}L2+lT^5&16@#taCd`b>5cEs|wuDX-H~
zIXB~akZ>d3QwQE*H_-^~Muk2yW!na2F`jWLTnwcwQNL%FU?5tG@aYgu6aabEuW%+H
zN5BJ~UGOUlGgVc-0#}<wpz@qCX?D|JzLQG_uAoKQ%602O%oBkL>9(9P&~$w)%D03H
zW{6vX@$J1iFP>sC{;KiSG-kHy$@{@*rj6A#mRTMysCExp25kZ{3tM|$yhf9SU`uxC
zMG@n7YiJqJ)y}u44_72(53w2YqYuVBvAPb3`gHY>>CMw5MF;ZpWHM%)6=6+1U)g3n
z?G`*5q~S@v4Yj<G;VhB+mUE<HasNRlIZf^*a$HyJ%dJiN$eLf1&(&@z7Cf8h?L)(P
zHa`^7Run6%m}YgP+;I!nJ1aOV)bxgek!Yroljm7(J&s(g1cn)_`~wcH&ndD~Gr#Uw
z`@DG6w^O}jb)N2`zp<Zcn2U0QjN+-rQMI&h<)bjxtRr&ihW;a>>V0)1f6L?NQ40O!
zn}%2BQ~u{4zm2&ZH++l!{myKRxWI#`@a_k0Q=e;1_^Gr`3Cz`H(aq&WrVG=bH)AfI
z-8zV$bv>`XoK5bTektDRUVNVe8xr|(+R^)baEbO(p~^h+qtE5hw}95VPayYnw2Q|d
zAlITjzdv7Hzzwr3&91LviZOY}60R*+$!<EFr#s)p_O;o@M@~+TeW3a!?Yl>K>@UZR
z+ofgxV*xPRO^Ezj-UI|FYXEZ6w-GE){^0;eJ=wf|S<Mhh3~f(Ac5Jq4(~T9_Dw(rs
ztk?ytH3<U!+Xg}B4N2DJenCdD!c0&rneFZ?tf_7jn0vj&RePkkWKfT*$um7MTd`;@
zU17XsGL~i>XDMQd^3)HN)0OM&gV+Z!SRR-5GmQQjYL{C(aKUabNFEauB%tXj!sj8=
zVpT&imeZM}WFJ#FtU2u85Yb65Kc0}TN(yp9<kL&t=NP_g)>ODwt9kCJE$8lgHhgqH
z9#Ip^9Dg`Try0?%8&RLL2^)L*Cb|ic<Y2#zVF$fJrouX^UXh8Nop?21WLP3we~<M@
zNlOc>Q6)BL2B~kkxkz8MRRq^M{HxjME*F=cc8VU_MCKK}D3?cediar-sN(0I4?nQ*
zIPbok*6E!)T1K>+$MBEm+yw2vRJs|*Kn77{&ayUv!{L)ee<$17&6KQ>q1T%yKT!&a
z7nj#cl#-_JJEvy<h&5{A@rQqPQP-?zuSI(@j2Ws}3d^t3souEntyu9`vQIG4NhnLU
zs*}pjs%$%zn-f^|+ROw9JI_|pNx;#YS&J;s7ZrK_oMQEJ^h%z1cOq>hJ)_{ra^~=b
zGI>@|{K#jy!Hm;$?&h+G$)Y!g{#RSeKw8@959lpq*<3(YS^?csn1PeE(sdia8J<gD
z@tN_^Vyy%!dB4|2?50Ky-gg&sS{X@iufWjq#7;;rHWI3KDsWE-4oM>k{y;esrgo5X
zjvdML#2~W_Yqn#=dyOA(;sd{;7(Z@sz_E{3T`f<Oi9UW@YhrdLQ*zF~^nk9j>b#c`
zW^+o2S!*%gng1dSN|%QT6I`CK{1y5jkzn%LrG3Gc(4bVkhK^o=g`J7`pd>V^>=}D%
zdZ&`g>1h9+@)-w7vh#z3y^rL3jeeaI-=cVR?rJgx$!e*%R0JohI5XPO-JVwxWuHly
z=3Y-_`1AC3r}*>qSQMu6NT2Em9?V-?EM%x9anlthSzM&p6*#K1SNAifRe3j2+*diC
zXf;o1zSkb}TC~(cd%|eKjY;}cc^gO3(d1#jEY)&$v-()LFzBU_0+@oE$bkEsZ|$mT
zYUo%JMd$az^yrI{CTEAOM-57Ha={wbSBQq>?}g7lOGE$xzG(q6xtB2us~}!=hc_tO
z?cM+Cycgl1qRK12#YZ9iS~eF4D68D~BL!bxWj#w8p|3XhuIo55lfp4rvHQViAeN$&
zXe3G=ORor8Q<=c4V+qg`?w~80um{aQtBHwSv5&&BLZy;#k@}r7)qHa8yVIhBibbZ2
zeEX=1*25O6a3;L7Mcc~q9><a|`r6khKS+98c4_gH3X@&D?j!a<I(7>{9#=5u20KZH
z$%g7gqXltg+E~Ff>blGmETY=5kia;Xfhh(_8A-E3TC6`%B3Js`n-6T12q1RH;Z*8E
zvX6WvP>?y>%$G3j^GZ1TQKPP)sdymEbZbC@7Qss-_A1MM273JvdrX-Bqg?wAx<8|W
ze&O-3j~^!k_tIm16h;3O-pe&;$`bl!)uN;2!LH^s?Y?@-tsa%Sb?Rr$Dqr61kiTLR
z`k?1{!#4dC8efe+^3Ifg63qpwhQw&8+nveP(WKnGyAv^zY&<*OLW=qmkN*i>n2iYy
z+LNzzB&S&__?Yht6P3I-A{h8B&CT&B{S`W^%?1g?<J?G3oVaxx?7SZwHhlU9P!R1S
zMtO`?LxN{xJd<O+_3g%mQ-~Bdwa$t@pfBd~{<s%&Chx)PyiRF6ZU7!R*abKNBapBR
z!z6o_2**X1djX9|qmNp4hp(WAMp)+2x6S~9?>8i9C8DpbZjc_4kV=sb$SJ4xi7%X&
z#bxk2R>ZYL6EKQjf+F)9GL>J$K<;ey>HA61!QtWp@bc`M&~Hv>^&Afj0LJeLXbi~-
z{r1-28sFLNv4m`cei8@x=MZSdlR&fmS4d>KMb(J2GOWbAnJ!3%L2OIm<1Z)ky2<Ww
zXnH6ruRb`>x8e^dOfRXo72H$R=DH|9bFeGiQh~f=&}*u|6zrDW@Z<?gp>pxAuJW|<
zAqU>oko9OkX;Zrm8ZS-uuV_Zac$1M@raAB8KOrPOCGS_6N;V6<OM9Ex_eE~!TaV^z
z%~)AM$JTi3ld9;!0}l)F*x?0qgRR<A!6x=Mf>iW>vN$R<uY1r+jr}R;*x6Sv$>~))
z?JSxvYOWR?ySK51xvb9pdd<F}-o6PwZ96Kgg4&M}2h4nCvTI{D)K3@JsqzCX$dV6y
zJ3Z(e!e}h9(~cs51j7=uSH$uA2pmfXMm>5q?W$aER+as#2g9mtyvFWkzlGn?O+*I#
zIm-!v8MN}3yZ)G1aPsreWt+=5{bF0Mr{yjq+Gj3U4O%aEl+Eb?JEe*{5ubrhQSY7q
zw+!2Ji=h+Hzs%pQ`rS!aS69%M)^YK$B1<uYU!)!s7%BR=k^IyPfq+N~-cmL-xKyH5
zxw->dfD~;&3#OB{EaKe_x!DBKooXrpGemtNBvN?q0-UNXh0piiZ>TxX;>Dg~E1OMp
z9kH5Cgs2UwXu?kxB?1TP(-ywkg^iej3^`}nlV8<b&U@$1a(z9fC&%Xv%Id7|u#)S4
z8a`#+H29g{g2I?FZuo>Ni$jN(_0B|)y@4=xU*xK<vV?eNvYWe@Fh1V-kfh0#<tTW}
zA!pgoWg_9oO?@M;$49+-!Is)F&1FD=Ow}ltu+}Bq0&7v{lZWp>q*U(#%ha2LZVQ*m
z;e{L|UFq7b4C^S6n|e$-c4{h$Lm6~TN14p8oEqNB)p+>%i{?m}zjhC)O6<6ulr?3U
zilO5O5A&b~nav*(J{vqZI~Ip!;(TMA@4BbnVW#d!n{{iNM<m@C=%_k$*<gT0l+vtD
zQE5<pN{YIn#ZrV~ScOaBK4{=$MF}qpf8!Sa=iy-G-n5s@KTZN2lru^smy0|&f=ijp
ziqp1yT2Wx-3!D>(9=t&AxQmtyTb<>x7)cxh@YkJX5QyJxE8cj;{DN`@tx+FU@^^6C
zy0spBR_Fba4JIe&sBP}J3)NXKd(u|@+>Q3RN!q8u@f<v{^C}FlDe+E--bRk95r{-o
z^pZVFtE;$X(<UWy9jFyGbHMqG+cp1-%#S2imQo+J%l0LHlk46cIJM)RMNiS@8cg<m
zUN()qYL<`)Oy<d3pU@SdG3HH(OXkVuGIzjnk_+Ur%FJI!#X+%!pmbO#1ZJ*(4mTd&
zwHI;8f~|Mqh_;zwoi|Wh=52$yTOoFByv`earN_tt6N3~1Z4E#OZgD8V+Wi>>^~oRR
zUc5L@O>bX%vGJRBgp4KWg$0;_kb{Pr5M^+o$LoxeaG0O^*TXu27AfTz5}Nf3h7;ns
zbJbIKxGsprqK5RNw+@zXAjU~|{q!WMiG37@lyX3YFxE5jVk|8mAdQRSkx)N~9ixJx
zgyZH!+tqdkpXb415|(d?j#(}$9v<F2XBp=~$IYQ_NMNa-bS(Kwmqr$YQg(Fn_jvim
z7ueHJyw{!ct&dnL+iLiN$KOeo)cM_9qJd5*S*bOH(L2$D$Z-==Yc;v(fsBd|XQZef
z0+FG#+ZdY@UJs&mWV<lIFQQ322Qd4qQE-l*ASkTq;L7z!_hqVVPW&!cVGW12(RQU^
zade;5<l=B%XFY3EN7kU$lm<b@1IxyK<<EF#4SYvsD7#wxbk9(H0i@{y42Da?BP)-o
z(0p;Cxd;0H92<b1nfZ{_)0nCdgX~#d7PJqLgZCz#zBTAKD9`tGS9(9Wjm3N>%oy&m
z`S$ZNDEeevQc>_y#RO7P`+nZFl7cVs|NK0FxY-Wmh(E>3b)hHqiQs<6hA?IdBW>tW
zbHWLK0O{z<6!g1xvx8C^<6@w6Vg3@;*}}u)u*axLq=RGk(g4Uv_MK3zQ9^vY93NMW
z6fD!73?NNKK?~x!_*<l{L<6jkU`h<7hE)d%O`~N$iu+_JREE+Repws8*5O2bN~c_=
zS?-yY{T&r2+!O+w{PlM1{yWF|q9=am4e8N|VbEy*Pu)3?5r4`c6uM)O)9(v7=_kDH
zdXjfHYAMAZbbIR?d$<D1(sWT}u*^(SwK>qDCF}u2;ycQeg5SgV!XRbuBb`t`FtgX|
z$E`Nl>umrye@-<ZhTt_%jBP}~RJTFzzuNX#S^_(l)>z^ZYb=q5j82a%8w)%8#PsHS
z&`vr>BJ6dH;1`_+;0zm4>@7dn0GZBkROd}rA-dYm5~xNEJlTp9g*Ry`-X}+oD9;G*
z*j|w2G<ft^KQ}AMJc(f9p#STb{NtX0ACFTXuvrO951_vg>v~O3v%(RYOQUUJj`MoC
z$(dX~gHB95!E2mv4o}jOWq}!|iXFjmV;lp_Sh{=n=*hIJtLrAV*7vs5)YR$NagjHd
z!Wb-(7uU0*KqTe94k;J!kHlX`C>wf*d5Av{O?>v_`Ja>6yT!K7+f(`7P9S!-1d>7>
z;MF(s>9u(|Tbi1ha$S0^f?OW*mp0H+>L$YIF$vm0FPSnqg3!z5oO0@a^b;B_qe;C>
zSB&#|+PLf@&tk`t4R3;aOd*<?n3R;{wO8JF=yg8z(i61&J@us(U@6m$7&-;OGJ%n?
zG2w?6l0kifI{=_C3!;Wb1RiPhufCQIK!B!i@Vm<)T9m2F2Z?=?8Epogy=kpDZBI(4
z9>^9XJ-zq7l(-fFrl~(_AoIvaFeOh@qLYoU^cRo;Jr5E68R!~7l%+NE@rVMT&9*yD
z#J}-|^n5Y|(v`ClJz2=0uQ1Ilic*t8**U->)nWBh5(q73VEnRuqQOF8?+wBjGvxET
zNt<-L`U@GO0N^G)0=(4kr=BJ4tIuzu1ve?)vxY*)aE;rD&tRzu&|4|77y$`%k^pQA
z!MGQHf1WP0Gj;4RXyiD#zVcX(VgVX$*`+FoyWlr5ix?M3;#vguT3coCP?knqAP3ZV
zyhgVo%;n=zUj(k!v#324RaMg`qP|}8)wh9kLyiE8$fD7;A@OEA%X4*sToZpVH&>)Q
zX$xqYoqvB0c;(~cvkqF!2(6RY;*Y@MIwBNGp&8mFi><pzPxrNKwCq*bMsa7f-~YLJ
zLZ_GvniuQdO@DlF%C0=RP-m?+4X22|)L^^|cqMfrt;v|EcLx$)C9jv_LGZpMuj6!K
zi7N<<%B){^Fk<IyrO~&<Y2wHb0%bXkG@vJ}0lk^Y%&#O*sL-H{HM$$U156s4R(f@{
zBS6_alLg)bm0m+<A`bw>hI247hT8*AL$==cpaJ}epNtWbDD{_{i6ul;F6%2)3I>R^
zG1-EHONlI&z^0zv{d05V80m^^<h_;TiiP>(7d-Bm{dC1<!3XTgvZE|Zn1|7>N<fN}
z`}vbZ4Tpe~#XY~(Mp>-vQ`Z2ye(<Vzv<UvEbpt<N!v-lr;hz36Dr(?$hIiF2{-g{{
ziyoeuQf-Kb!eqNrQh5$_X_NVY*EIeHIN~1GXD>>e@sjCu;K0c%;BN1K&R%NubVS=o
zQBUQyo<_27*`G}1qIrlGt>D{agt4WV7QpR0#&HX3l(Xw^{V*AgJn|=IHl96l7%LTv
zP<z!)^W!)upsd4A$s#mVISGzo+y%9Hf)@ztACno-Q0n%S81YPv*QdS=#mkJBITRX%
zkE-HI(?pT*+i90N#k(pSvQ$woJlt|ez>|CQ^>(7cN-)Lj00hR<TrB)x0wS8!1Re*V
z%5&gJ5wU9HgtnLY<o%TG*eS~Jkj?n&Yq|oBDjq*rYCJ+5WpGw%R88dH^}b^3j$}qp
z)~ScbaR!Q>^+`UIqvT5dDHS7U=b4Jj=}B|rjEK&3EqF@u7XHBL(*h=Y0G}+ICa4Q5
zw_m}yspiWugIHAB@lmggP0-ZKBN-ctB^+Y7wa#3pAod&8)DC=LHldx1%LqY*hCje#
zM7+jav(W%mwy{uZdRdWKhjr#Lac}A$>z8HN`g!&<%$8sF`mPZO7+&W6ET*;wO~rH@
zIFCjcdQ<6k-FIizbn-Z#5+1gAyvFCT(W&v<E&0^2Zf5)_!YW@;U@^r*Jgru~EOlx-
zq2YCGd0nii0-mSCggFi(?(#c)F{=1A*4T7*zg&&6;+-Bk8k&L!hX)?E_&mGb+HAv)
zCBZ)X4tv%{tt^vgXKuFJf;@)e|NEj`bA(7!wBH#ZkJ2C*p%UG_V!Mg!n;SUw!9n3h
z&;y<5Xq+s|1x*;@$#Gxf#3XUM1`ii%kXqw3O4XSHH~QA!iimzEEIk}K=av;Fgl11p
ze<~XUM`HY2wb{!OZb?@5wOPBtZ})$Gc{sh*P+K-Dxe{I>8IBA*Poj*P(dOK!C<oW6
zchzah_ll1341rIe^ZUH2)mKv()6@C5V;igGQ)cl~OtP4{-}0k4p>oYH_>ebR9#@Bu
z`pc)Y7J0^8FP`^r^E`V{i@)Ue(7eN8n>MJJmXgbg$djJAH9O`p6+(ny!9W&j9uc+?
zpZ=&J68c69_P5U{Ho1R27)j;Y`9w}Su|`XiMQ0Uy{v;liS%7#rMxoRUTs8`I=-fwC
zqMell88TXklY3#)O>reY;>ZWdKDLN<(21O3J5zu2`@x50(9dJXr?{-&T}pivBe44V
z_q03*pR|68!LJq1Z-%}<Ib5R(x512|Q8YT0_@Uq$gMlFNwzPzB*@TVK{aTCudh3}1
zPRpn=+lwC`SsB}w-&f|1Ih8(`N!dAU0+A*nj@p1u$Hs=~7O<@+Rd&p-bK9(yWo~}a
zl=g;x_pc|tTaG4cGUpD6T%{u{#)yBkdA);z;8TWD7nPDecN(Bmf~uNCgHM)AUvI6I
zt{4N!8Lq)roU8fHd#uR>F*h8in^U4J?RlFh>|B=tQ?eJ%)L{=y5r>dfKT%vze=4*?
zMb%CbqcofNUHer^O3KvFBn-W3+_ZchW~VKh*dGultUN@Td;u5^GH_nQq&KMGO}1~o
z7is{>YKN3s?&ZSK(Q@tJzJY;fNfIMi-g%i$S2e!CH<><j>fWwl_2eG*t}DOHmx|(E
z**X9cIPBUJo#iAAps9v1mTtVt7A6M1*oLu=FUW~{RcFTe9*#sNpZ$Ct#=*#6*!Q}P
zm`!H>6%NW%gn>-dr>2@>kjKzlY*(bssCm$zpxOT&HH_<>M5uz3Y#^(_29qa_Lj+4t
zP2nP(ThYFZ`kah&J32?KhNu&Gi~}4*Ix>N%MD46Mw+M7y+_hdfC{G|bu1M8Txd~>Q
z9%%<OIj|%2Ax=SFBo!lal3Wk>h*Ahxq~ifDkr8>X6r^a2RzPxt%w*V^gnjZc@s@jv
zhi38yXqpsh2K=d4Y=(f0p9Gn+VqZy$xZ$Zr>kbP8L2h9Z{;c?t6RYj#WB?Q*q^G09
z2OJ#cLV(g>IL8OF@fPYogE`{l@3ThM=1H|bm+KId9)5U$3jc@^BQ$|>3XwJS0Ji!>
zqRNRw80)yJUbU&zK#Gz7!c*lD;<I>822^NvATVlOb0Z`&T|5wJ9z#LS<7g_^gg`0;
zp!ss*-;VA@7U@L?wNlzOPq?Fuy%%T9rb=i!m6<B)*P=IyrNw=5^z9xLFM2)4GX-nY
z%X}*)N@~VZxq3@j!~GJPK(rsJyxMeoPfx%DC{Uw=utN+jcioH3s;>P`2_Tx*aHk;#
zpvWbg@UyifoU!7zaG6>rPywV%=CY#BcU(Xj#W5Zu9`$&H7-bCk6DcUsAARpnYi|AC
z!qCvLtvPdb%XEzRsOh8>SZo)1PJNxy0J!A>-YADzGC?5}AR)g0iBIJTRf!B7rw^*4
z|B#-p`x^vrP0=MAlSqH|jP4iFY+GHy4Ab4&Yu)A0Zv1qU3uH`k1-C7d@ndR(4KN?0
z=>TZFpCdc0o4S9J!+Z(is$#%$2piY#71OD^0l%S`<3#5?s&6Yzz|j!juO;byW&7pg
zLuHSzeNlGJTxzdgz1j!#1MN+?7|ow$_=%6s{^Ly<Jrg@YkGzyPoc~^%7uHkvUG=g3
zg<2ji>mufjW#adv(YnFTa-=XCo34?@_$HWpqw#GjC*Y?{-yWXYOx^?kkQlhRvU5cx
zH}F8EFQh-+a&V>;W5Yw9?IErODT7}zL;JGN$Ut#A&CB<F0RVXQ)g(0D#?3IJs6qbv
zMFx4eACLW8^rwwSB&T}4MC~=V?<D_-4UKSvSwPU`Kl5*?pnH@E0?=r~<dx^mD|jO_
zAi9d=c3Km2M(zv32*9AldI8@X*5Lg<UJkJUEVI)<UU58BQ(5fdEx7;sE@eX^JZ*Xl
z&@BuL+?g6cTk}UiwP?K5nKK*#lV_9(IYR0UfkX62*Hdr9-*zHXkT1@&p2Nk^{i}wK
zT9JYvT3H-BXi7F*0XD&x3G+nD4o<~T>+>&()Nz)ta~DfP^izp@NXh7PsgU`!dKJv6
zS%h;+Meybdb<;tLl`hyG-Pf(^JeBw^{$lmb(U$n|7m~Z$SFoUvhl#-DtG7+@TMs29
zpK}Sn_7h^YI`4$&PDtKO20wtvgA4&=G^H>h5E~(&FEh@CYr$d-kx;*CFM=U^zPJ+E
zdi*({%{&HTuCW)aUGc#8tfwDPL=AuIDSc{6T0`iJ{A0Ky@nK5ClNPrfmwmvG+Cyad
z!OUm#We7~?aFhn&ZLGK~cO$s`KG9WrV25@C9`7o^0<CB$Pfme(K3YArxla{HuD=c)
zBz<{%+u2)g+ieyrGE0`=xYo@-@Hlitz@|B0KM3Z>wO&lUJ0S|*P3chV@1Qfz2H~P@
zwZ{B5VlGojqoGVDZgVRu28lepGoZix6N&n=s8dPMY=`ZMhUkH&IetfmfMdnu?3W<&
zVljp&Z2;%I*%ut<j5==-i>L<7-uXYDM%i1(y6vkNyZ<J*jjtja9xE-r<MJyHBljuh
zNaTr_c1%l46DYF)&gMY|xropAp%b1-;hFE{jjPWvJ99Rxq?tiNN<9hq_f}LzoAHM$
ztts0ol{}02vETUQtVmEpf&&YUcqI@*+;!tY;UjsU01clBWF9b(l>1R8?0uApne45E
zWGI(WyPlcfBxxBgRD-|kM{le<mF0*2O^dTRv4%Z(75pJu>Y_4ACWHGdXpN;^wp7PL
z8~_(`Q(10q*SmS%OJD_j1rkZ6yaer7eqb%p2|R!P{MG4u0E(If?MBbqe)}*B7;emM
zZov#Y{qI`Vw+=9NS>^$J$>zsIRxPK53zw)|Vl7%9a<H1RKAwcm6aoQV*_Uf(FjK0=
zxI4J;FRix8VQ&e5T=ktlQAU!sQ94)yWRxtUuWrfJ!*)M#J=%gfOEYX=e2!ggdc(#^
zeg~|`Of#z&E7yUz@M_oFS3wzgO>CxN^+Ef7lKx2#4PEBSu9`&TyN1ejP*>7Ve*Ndf
zWn*<oW=f|r&Y)@X%y7yU@gh+s63P;78bImB7U7xgunytx<u^I^w?Bj#`TTon<L}^e
zffdvvC8E0!QdwtEHL;@V=E5zY^f?&aFYoz4G<s(^AY9Wm?obbk8GAjTJBf#kVrO)~
zZMQy^H}PG|M7MeFl`>LN#>Ze(;hkO(-$Zlm%0<h9G3lU4b3Y+|O_#cSW-(KwIJ&_C
za7mmG&0ti725wW*`;#AJIaOl;)~LhpjYhqpQA2hOSE>BsJ5+@f>WNM1s*J)BX8S<y
zvL_p&hpPC-t2KP3a|Y=P4S6}g%<f3#>XTo~lSNNj?Tr%H<`NqDR{sD7;TIo$pv9<n
zb(_y&^N;i{9A@G$ha{eBZf^xh9&K(84}qMHwgBo=8qS9d?6X;<(VJ;4yyWo@P`zZ<
zz%Yu*E*SllSo$NQwIq?Y%mrCWbEq*DH;Z|a9?Z1*Lttxg<S9a@UZVqpBo_`VHWyDN
z#84(7x9w?Ynp+_cZOkr#z-w@k>CrJNqpe%+(^@uNX-L-@fDECcoTjEGd?rQ?%LoCD
zAg!W)fQh9MiD6akS3+NVLuW+#nrxte`jn^alG<J}Zrkst@drep*~)BzFvd?hX~w7?
zaSLshXBIKm9B5+>i?_;sSw?rw7hA05bw8Sw{`KMWXalQ{gtBKjxU9aD7_q(1)OlkZ
zJmKU^n}gXeRf5>aoTIJWRk&-c-RE0WG^{y-EsxJ=Y*{iwdePP-`F6t?Wg-BDg^*8Y
zXTEx~km%bXybejmTG4MnC@ii|N3l~si=&E}$iw$RRIxHOE|^Q21Y<P90GzVhBMbbj
zl^_fQt`bJ_1Ve(-AGFU0LUceU@Qx~<;!33I1N&yQ$U+NcnFGz4Ec>kL^LH^SWIupy
zp(9G6_$ve_&YCOo?w!>eC04pwNpe9-C}{zs3>hr|Po|JSXOTZ`1w^p%0$2pRM6IhP
znvb_>5U7sl0)$T~5%&J(8Sj3Yu2?E_<5ZC&_);d_L~&Wc=J*oBp$fQIQpgIoN?l*I
z9RTl$I=2a@6{JFI445owDS*N{6+114@VjsTi{(dOm6|ogv+scj4_)-Zf33_U)QQI9
zaS;^}X(!^i7F;h*of@G)jU={K5+gXd|72upDrl`Z88Y~9s&OskfqtiEU@Po9=;b>9
zAc{WlaUt~_s~~1He><pt45{5zdZOE2m!KkI1+d=Y(|>`%FJ&>>)YeKO7m0cJoXdOL
zJw84>u;Qh%xnjMcs;@F!G$$5)Y$O%V%40+IPIvXrSEpO<UtNre)p701(WKtLb&)v@
zr$*g$9Lx>1K_IsiF)>qT)jyvN-jcCsy5vsOcoV~6SHG3I@~hr{(T7v>`NKvl)J5LI
z1hJ>MoYz0eCJyrU<77dmS#C?g{E|-(-yCvx^J_A=Ujc$t|MjgtUl|Po&Qp0Y(}`kd
zD-?J}69tN~k}fEjhZ;-?3x?*01^yE@DU~g#;-Ii0RHvSNwHeSsr+rVs3Z)&$>itDh
zO&{k)mE9-a_dN@z$(}8+UJ<eWRnw!gzuHm&5@uJOIoE3{U16)XJ>RJ!AD;d+%!S91
z2E6puBfI!lfQRcEOYuA#V=rym11?>)0rRUK&sG>Lm4r5_i2PCDF*Q@~IL6Td=~dl{
z^2*vaU?-R-mw0RKXOb~JJu>q8%A&R7{Q7+4-c0OwXpcdAYzFM^fJR0a5-nIz*zH$_
z<K#6jp*%<&tC^JT5*OHy3Vi>5q3{i29F&kUn%>CZ55@kl;QTK~hZ(_?qGh1yLSsDm
zRMm>XEDA^~j`w8b6#glQFhlYm1nSkYY8!tMw@b^?6<lk8(VJ%BiKE7UQuJ)ZWWj4?
z^!Q={0DwCvr-tl?(LS$Ms1G-st1FmTS?gqPN%&S42)M4e`5l(E4VJ?NM``5fv~y`?
zmx<#LxO88T_exF<$XtT3*eW%N56dpboIAm?$bQk~qY*$*2|YR?Ug{$8XY_w~0~Zhx
z+yDqrlM-b8Mc_!9(@x3HWVK({`e>V#3?6Q^CVR4743Og)=7&G-fRlv%yu03)Ihrs5
z7(%HBa>NBcGPNvDs6je}2Wp43IfPR%EM|#jMh^Z3F7!8y2O&a7rfU*<i|;y+rVVg<
zhLPGVci=`(4Ab_1N%sxlwPz^&&Zi7zQhqy#vK_zJ^@zqmCo->Hs*@wrN}|qrG|P2!
z2;f?e*iYG{_Pc!rIVxs1Ha0K@9<f(Eo?@zF8-T%rjGv5IZr=2lD2ZN~GvT?G!IQ8A
z@bbS@70&3@+FQ(R37t(%CZZvrH2&VzLJb#i#!bb>t)dmAY*l$ZmG&8|@>^GWP#g9A
zk12RhM{*4y0tTNhT7*``j?2XRRWp_dmK@_6R!gwW;R)-Y*L0t2ttQPtP6knVz%2k&
zC*(Vqy2!L4cod_?inYC3?8?{XAbRddFLMEVFvAd#sWwxce$LuRkBBRD?*j+WoTR^i
ztMn6~>Ng8{9xCuWv#RH1Oms2E|Gu^{F7wK9A(BPNS;tD<l5zU;FGTnZ0}|~i7!ZH#
zwSrja>s#Q7^77%;RK3DEP6ki38o%k@O>2A}n=_JTeEDE6QX7KO)`S^j#@QZzV8YKz
zit>6n{bbYWsT+|6MgRoKVLVa$SFLMr=eivNYRLNsIqrF8lj#L1V!0dR#gdM#2MuRo
zm=qmAxXYCR+;p6rloY}xz<!+B^)_k*HI1QbFtTb9@ZqQ%z@UJB>8<W|4YDuRL`DOQ
z&taAy$7U-=z8HFf%at4s<TGQW#j|p@sk@SB{swX$Fz`5~Noft<qXc(O*FdZ!D}4Z^
z>1-bjFXQ=Q(jt82*VVsj+SJ{O>Kv*HRwvVzwjh1Li-#H2p<Jx%XF((W_k0XmfWbN!
zDs+*S55J*Zdk7LXJb<%Sa5EOB>do{wwtaxAofKH0$CL4*(Ngn(ZA9wt@iSJ0pokeE
zSL;2Fa>IE$kP}JFgd3UT2M;+zZ|Y`&Xj?gej?pSz^m5*;?BYizS#OIe1&qzb<X~n%
zhur=dp0dznrXcPwI6JvOQR`O<yFt1KXrGVVb!Kb?e%wqpD4*nz3VGPEc)ECjCyYb3
zr9<8$Z6cKSS_?w<X)c@F_paN-SUx4zWsNu;E+*A}s)1ET-sHh!A!VVCG$TWa2mQ^(
zj`v09(<gZTP<_HnF78UUE3gYhdrx|dY{fzQQX@&GB@PJRu~5P({I54KA+I&XA<PIF
z8^5OwP5nKJbTJyib+e6ntYejp?;0Q232#s0JB;oe`I7u_PCG2e?{eXBK5ZQ4wv#mh
zO&NbI$4ynXGCcbU?@hNqS?`SDuNaFa<|#GwSg??QlQ`U+Dw7XnMSxajgIYVa4vrTS
zA|N10*0BnAAA?uz2SBSaYgB)FcD7-N7d^`#FaYFNI;Y7CvU+kxJ3BzBJZ%O5;3iHc
zrV?dE%q>mUH-Mv=)?Y<w{oLkDal<sZ`xS`$RJ(Ta;Q*PSkKUt~KoPS7=ose&@W54T
z)Ue6PWB|Z5IY>ZNO`30fjxEujbKqHg3#jU=*R$>fTxE5?__+%@b6eXrWohnE&?_E0
z>3O2esxW-9&9tZcTawfvFG>)rLIG)zDw$exBNQT$Wsn01WvrwNs)cfvNgh^GL!otj
zf$?M#|2j+v41r<UrYtuEXZK(G`*v|4?1(J?7Op^=-~rv>G-$FkbO&{~IZ>edO)Fzz
zM2+O(=6<#D@a7WePwWeuGEW=w;CJTrdKg8=yWo$Q%XRgRpL4ta25mUD$si>9sl`au
z@J5%Pp5Ah^_cjIK@NAEsx$iEcXld(gNZW#!PMq)Lb2`kBVms;!*U03HAjS%x$~@OD
z#1ntN8Q%yh=g7d7`m_TEg1-I6DqVNsQ~TU}=tD1@PbmOG`u#`fd6!j|&(bq8^bjJW
z`+z3h8y`<q=&MV+2|zYC?t@7hC-jVF6(DD^%9WUr7$5I~G*Z?NkY1xc@YI~z=r4;U
zFr-7DF<4VxB)rg-sN#$25h$_XV~YM>)2wrRmgsKqS!>S9(lWbPwQyt@l;YHr{X<L-
z7>ste>S?jiAik)mi0-pHOhJWBxlxDt6;J;trVL2@HLD0XC_<3)UJ4Hb>7d@suBoX5
zOPbt+K$r2V6b#tu`J&*~v=_XFApr56y?R849;pgR)jEEkl<p72l}%uN2SyL_8Q*W?
z&X=sRUs7p+1Fi_?cxq_3yX}GQU-mvPoqf!Oes=;GFY{)>tt`(hw&iZ3huADLK8q&n
zE>P{gXOW86c7W+ObK}p-$PyZQtxtXbZZ5AxLSPt0z*dS8I?JTu(X_G7v;@L&2H<9}
zlHECxI(wfQBpr-4gV2($H%6DlbjW;fWjn$GYk8zlK7jJwE3vKPkB^AVR@Y_yx9LCu
z=Jyj6WXLF`xm8wfJ}l0S9h}q->?*2lg=MghBC$OOrr*kcOCK(i?P`Jk?jC9JlNhE>
z5SvZCto>XQv^a~8pQ5jRw@_bOZ@tU_DyI|(qI<W4@pMuQ2xUxG#^TeY6*Z;4AYhEj
zyg*r2s1GzPq7$uBa!(H1p~bR>7H3A(kw5Z%^K+K<I9liJ5i~k6{5E`FNGn+kf(Bva
z4_mlG>dU&qeX5Iv`d351xvFrSRy)ARVEl57c;)9bf!SQ4q#zA{4)j(baPrynhZfGX
z#=(Ikjz|tot9)@Q1+2iyDqRKadzuRNo|iUnK<zQMb)*+G_n<*++R^|d_4nN>iqsoz
zD=%Em$Sv$uw*2!rnScCC+DqOgJZ9i?;WD4XQvS(CL!tXfn=+DQ+o0FtpMsWgWFVuZ
z<z%+sA=r%=IXQ5>B(F(N$YnVGS!)R-Edu^O<|jS?!W8SlFr8X0iB*qhX-k`4G3+s6
z|Na?R9?+k=bo(vQkTAg@7}r6E7T%JvJ&#=EL5jMc$2f*P$uC(T#Ci!H@xIzzb_!6E
z9mZefh?4QV{+#;<j87>E?>cw_&j%TRDK(HL&N66O$YS`TnPVWUxUmP=#ifGU!TH_S
z@H)ZN;@N6rhQoN9)ga5;m!QJ^t~+{g4Ye;_hB6ps{<wzp24GwhIky#{7t9&|Jmy+r
zAeqCs-8GX9D8VEWD*c=K3(OV+lWkE~X?{=^DU1U-Xv;brdVCKW|C~XxZ`d9{YG;9L
z14tFqsqfr{|EaSxDgpQIRM^X05h$CqYmFE+pjcg$y0Fihm%CqW@f*l!a%Jrxs3Bws
z*Xx+A>;+vWN?n$m!-GIthH;ZzgFtJH4^Ev<Ap_$}Fmla&j5ukN4~&Ts)4WL4!vVV)
zysyXYcp}`VbhJL#&ocFF&V~5AN*@29SM|U_VG4TjWkN@h>Hwno>hBxG;)U?fPy8gv
z5RrD2FBt9q@BIJ&Y5)!d-HG2R4D#q0;JiS3lM3SlT|g5;I0!jZz(N3z<?cA_y@>65
zRfv;q(N7I7{;)*G&|b>~F=Lct9YUfC)+DGtcCkj;Y-_~?kf%tp1c7aLyc#|`(P-Qn
zBkH54tPqK}1;1Y(^N<&0VrM8pR&hS0@;HYJn7QS6>(N*K>+FHJu}WO9z-Zs2y+u0b
z?SE0+|0%4lX~7kkEUyR%C?V1uqi(hYl=H0oEhv=R3IcUrT&5GZk@mlS+KA}MLuM((
zp5M;?M3%!=gp!&~h#B?DqD`IS8tc7%6*$?FOJfyEn+v7c#^3^h7szF&IqlaEC8cpt
z#Vs|)ec>+n#soaqFTHm1#i%lE#Gqi|&QS%gMFNL(4Xa9MAXCMzS~?*pm<H39>VJzX
z!sWnsj2LmiY?W3M#QkeuLm!w<fYtPE8j=^3rNlw~j2S?C$-HE40pSN~HkV(@4JdS7
zRt6y+tQi*8;PsmA9OgMu(RQWsP=!pElA#d!oqG)RL2*#H5Wp8>1AQk<Fi}$!u!H;D
z=$`zugTX0XnJU-eT1iiPF{tqGvHNe~{?Lex62lKx5Dh3rug%wdXJiOW<?#zzW9Z9F
z3p+qmdH;5tA?O2EF2$b@##V(>id_|s9c%-y8ZCVo!LELevsOD1%c+BhnhO6`^x|B4
zU*K&n#d|LD0;wa!@}8KXLh^wl#DDN)D68NKn<`(OXMY{Tz(Tk;l*L{Y!ezz!RSNH)
zqE{{{LdLkq%VsDpvJ-<>t9hA0Ys27NI%Bqu45b}v@vnCRzcl9g(}VTk{&w;K30e`A
z*j6+WlBaA}4iw8(n$;4NgmaX22tbx7(EqmxbYP!$jRxVhrJP$SapG$M631(8Tn=Ec
zm4R3GDSj;(wAuy|fc)%udQa4YqnP8}N|}^o@U~Zb<EBIlzLmTciX3{)tS1f;X8;c<
z{&E5h*dt)qUMO^#kjge%QXOJz{+HAFXK__zfO1V`e#d0qd~i4>^@Ql%LGBQ(u^<G9
zFP}j3EGf6g6zq`oUWuVeLR7>8c^dO`6BZ<+u&V?atvG9VNgQ<*93mwk>+)bb9v=wF
zk#|5}R}*yjy&QfhANWh!_^QTj5`{>I)=LxOnjp6JPx%B@*mD+31irvN*Xe9@e|MfT
z++b%+c_qN$k<|bE0I}0+L0rx{*7;a?o#2sS>76!B6m<|I)#pm1=hdkoRA$_9<ey20
zK6M>Uzk5+aof>FREi^ye_w=JA=L7y2Xs{Cac56IVF|=Y)e1DK+x>6q;Ju+iANxsG$
z%80n=)OPJrxOCYd@jGWv2|}hdMu13nsxe!!e&)iXqK0o!NjI7&|D-~V(*qxk5U-fl
zF73=J42Jx7GyHqUs6ebg*s{)MY3KB%zvp8m)SU>iXq3wOtUe)WkW$;-O%1lIgvy*&
zxZoLIgBDVm;Vf_%j|mYbman4_f60KFi1AZ3Q&-N!2rh6pN|4TqLsZ1UcQ5{F$v<rh
zR@Somao*Eps;)P^+y?8I;_7tcevJDXFcT};-W05WTMTXKT=)`O3*x@sWqqCw|L(jO
z{7ayUC<ak^MLFQ$T9wIEJ>^0_5<p-l`ftqhx2pFWM>&aCXey+t=40NL!tXyn^JQI_
zt@2A67iM#D#KwsKu5+%cl?APG_5&-2q#xZh@FFPCCL*c4hOi^CfEbq=9Dn#<XM+!^
z)2@|MQhr>9z1D70PkvG+TpCYr$7&rd3e=Y<=rjn=vEUNEF|Tt85mu1ydEW(SE~em8
zV^}J-6@Kz>+9L}%8a(g)C-M^>{o6nCpD9>Z64RL8msDf5mz5YzJHD@$l0c7!)J>P`
z@SY1V5jjD^`CQ4EuSl07=Krv*;nzTb(L;u0Hg5%npweO~Lp=K5rSh~C_D#s5J);$-
zMF@bKh8$jS5>siw9&yb~Ww}NHw!c(VD~UI`{^Pp5jQSmUZ$tIpjnUH}#MFcc&*mTs
zg;K};t&2j#16K}>gY0YjpZ>Iks0&5?v#tNn{e;4fpdnj}`&D-&{#`Op_Z?Lg{}E`G
z*(4J%l!7>*Qf^0Uj}48$GhvcoAG1iV>DZs+aJtx9HUj(G-igv6i~<rQ=n!)dwLJsN
zJA<%Jg_=p%^@gT20If8BfIFEN=qn0Z!sfl?;c3fhmw|W#sl1*TAfG~VIV(_aawp&V
z=Q|z82aX%V&4FRhcZ>gJW3e2dU`xG1Ay<_r@sCDt%z5QD!cu!3CS^6VV1t9g8U)Hn
zOkgxk6}RiQ!d57uA&Q*^Yu3NVl~ru5ImkV6X|hcBRs2iK1C>=h|2lmV%zqCvWhQEu
zX}P+uHfo3~52Fz3?<{Z#J85D<Ah8A5m8N$bvzf;HLqpV5sTZ~EIGhs3wP^}t;CQT&
z1pz&RlqTHgg@3RfLH8pihKN&RT_$vS&qbF05e@&hGmxM!EM-A=BT{=eE&QenCxCzo
z+FNQCLU8GQ#$FpM|6FY~J|&ww-&LaH7H?c8OX&q)SaMsSzF<bs9V@17q6<2OdO0Tk
z=q_-9uLsJ<%kGwkwqoEoL-9D!fjy=~u*I!fu#>a1gVl$Abo=+4t-XX~ywCPv9A_M-
z!o~exHY^ZIofg<6@CMCR4~C#Mh9a|ggpSCr$-wy-B5LAJ#YBM`LK6i1IsIlpk4gkJ
zOs2{X=0m>b|BAQfxZ&MTKHq5ZDzn?hsVshO3=o0|i&`0(3#6B$qr*o{Jwg{``p6Xo
zU~`MVp=z>V7Xdp=eGHq0KZ<ro6qsGWXz{&3`?J{uXwiy1w9JBkYh!G<p>IW><*8ge
zxD>d#*z?Sh>dPnkZ>^mm$mG^@lofrN%I>$jcL<fy_#XmMAw;n%LT14U8nwm>iU-je
zLJV?SJ;*<&DQ%tB`y2yQuAdwiF2vm`JoEj1tkDCX)?swrGgLY!J$&|>_xJr0rXLod
zm@o_7i<g1LhA$^3isY$S|3eIuG(I^bW|IYhijeo;->^>`J^pCq{jK@hUGEp^k<i{{
zjL@*xWHXKV_*vTz*aC-T;f9}8a$^0d)Ty5p+w`B5cfT_4(`@9wx91gN8{Vk6IQ}lp
zL~T};zSHOjKrT>@0ShY0fPm3*yx)*CQ5yLQV#oJCtl&Qihkt$)3&E|5R+ZjT*ou~;
z=GCj#Zy~b9giRWc_wL%1#3p!}9=zwn-P44zR7E~-!_{urN#D%3t-2gck9B9CmDT=b
z>|XEnCaa9w`#m_*Lr@?rSs=1q0#nY$OG--ig7rn<DewnDGJKz`e#8eRp&obCx*RDt
z$k2AZVaIusWMA_v=ylaZgAawWs4Wh#qT~LkRqCMswD=%K#k1)VbC5&vfdovb+CTOJ
z8^)w<z?QQ9(f4VRZD+5~^Vys#xoZk-A(IQYoj~+tWBbqxHkgqyKHWGq?4s(KUR(Qw
zj<*8X$S70@+T)42KH#FN!qonMEk&RaYPjk)6YNoyXb>NASs?-V?L|V&xr+wcQsQdJ
zd*7;Cbfwq7&uHJjyU<e{SeOsM-DA*xKCt-MmP(UH-oQM5?|Ij?(m0|EYY=09u=I#f
zs75PToCb_TkQmku5tEY}A<$u?e%wz9L{(jBdMXPCc5dNdNzk2<tFDV_z>@5?WIk$m
zfS?Q00fM+-I@IIUD)YwQdW@sGBR`+WjS`E*t&vVj@F`^*eB)8sV*cKYRi?>RU`Rhg
zq_WD}_e@fex4NC!`_jZuCPm9~@>@6`*`D%>py2uCv$y%FKzC(~2!uBSXfDb~aI5iu
z{Q$ekXu)dFRNzR+W_2zJ^KR>k(GGJm)X#``Q&YL}T|>zzHY>I&Qi$e`dX{^pN-lfS
zouR$`l@<mjaKm6?lpGwD3n2Q`)hJe8p9E9M?5lzm5`&=mG2lX#(u=Ka*QD&anLuk7
zBhHU_aMb*I*S_H^GyK+1%FZR#1gp?%BeFs0-7}yn{Y_VQ;%7w2k@I88N>fUIwvV!z
zBnQ)vkz=a;3l+Z{M8&^^x%UN28??2*YR}dK&l80m_%*sGm4{#t5BO{Up6~yR=6?q4
zz1X}CSWZd!avlp4q)3ug`{&~tO%s~fhu0&|_G?=RBJat<GPb&gYe;sgwfD^M`(s&G
z4S(HhopAs`{}xJ@u4Mr%88;ddCx!<%wv4-W{9n1DUf*<zSQupzvDQQg>Bz<{bt50J
z&9Z(a!y9-3H&4YpZG=)9Be#T7#ox-?IoLw9rd{Ymj_<VH<rC=$*yY_V<Ze@aVKpM<
zf?8xZFL?*0TXkd-_h`Xnk2hI9BJji|0o<i0*vE?m@AALL5~rlp`~pPHCNXdkFPQz7
zsxaZ&d<#^v!}0KG5$$_l%LLYZSv^<1t<GRD)=GN+Yt_|wI(g<5Fu-7358yjL0n;k-
zGKS>qcTkOlt<@k1%l`N0W1&I~C1|p{NM<96cTkwELSFplh|NRoH07%a5yaeysjBEJ
zWw%UZl^V*7`Kq=RJ>_oG4mUyC(OnU^47%ikF5l%MM9jFR7$2ibK0-)q?qASTnOU(b
zmO=rxK<1@#^apI9g!0(}t!SR5hV9q=@C08h(E2njoNNvnFl)o*=DP4=XDB?I0f<qA
z<X+opR<l65e|8LX;<kMDi*E27Fi$4m(9i6%Lg6oItDOP&{-EVet`&56G#dEq6uu!~
z(^YTxJX-a;|GkOH{5sPeI#UoCs0}O8v{m`)#LsVsYPZtDhZ=cV1jg<*guHfY#v>$T
z0U%Q814F<kwV$tA4+XWOBO#DEd=H-~w0-&_)6K&XFyaI>%zJ_uZwnQeo=mnNNlBfS
z%v@3b07KS{Kq^x_3uxDu|A4_n2gXH@Cgi`Ty{2AsImTC{>}e{$qrBnM780_4+r+U{
zA*6*rIOt|A9Tv}^=GwEg$1CLo5<Ojd5N>67DS1!8c6J|%gmGh#MDq|I1Ib1S(2))S
z3!9k^DBFM*hS3wK$ld@6frqwh`HI~bF|kt{fM8y1Pv#f_;@!A47$hUe@3hwSw!$1<
z0I~skg3pZPnrSHTYSYn&y!})mcs_$WgQ#<o1f76&sL%0!5@hWOTjLci@8J;iA0P)<
zgC^=k2WXNEn7?HCk<Rr>)k`*fB#-iqm9{?QoAaN9pyKX=*~3lG4(~UBc!jiQe?!E}
zde~D1Unx#(2o?HM=ZABvaNOI;XR#-z5Ogd!5`r~bjiLGwP{_o%6ZtmKmUd1-tD?X)
zP`b{c?bQI<rGe06k@GKVO7?H_e+Yl{93VOw?CX;}=~{C+DHN`L-@FmY;`9G8_7z}J
zXlvIDgM-o`F%lx(F++!p5{k66bPXaP4I&{SjdZtyfTV<UDxFFx(nxnmN&S1yx%Yne
z%jfq|fnj3zeq*imVj}VqtSafXDkV=ren#R92sTqZN;B$4itz9ZEII2pMS_@&Fhn&|
zfWGm2%Cn%Fr9C|>JU*nyJ(N$J`?Rah0SFaq83I8*)<|<+5SO<rZAONCTE<Wsz;Afg
zUJQx(+SAftf?WroQCf9-oiv`H;2f32+oyV#+UekrZu~d;N0(0?)5mjp?t1liS-h?S
z^sNspK38uTW`_W-hiJ!TzZCXf&8iWq-Qg-x1~Ldu00Q95n(0jE==}$yHxaBSmdq!9
zW7kG^^~{i4-{;E{=*Au5ju`m8{h?*6Vy5y`!^{Vr+q$0hb#qvQQ5}C~CcI{hR`PCT
zvV!EQFYfNB$-^z%Y=6I?_CSQeZzx6ojA#LoCdic)IrG&ml10HdABPf%@}F?fIGBuE
zR1=6c1N<P2y4}`Uc&=3_gTJ+K3cz=Af<80uZIrbbQP6CRhX+;mNvQ-gu0D(6Tex<I
zjilI9<rZgU=-VktE09(nGto=rww|o9&x)MTu(Fta8Z*(7yflWXWG+T!;Lzp=s%G2{
zuE-<XuKPp1$>wu;x}uofe=}S&ST5sRK~3hKJQ0Thn*3LQ)(*(U-u@i^`R*we2JJgG
z-Q?-cGF5W`B<@IEt`Y7?1V1MZe{C}DULJy%{7)@_w=c%w)9y3fcj$Vo40X@bzufOO
znRb3j4UA$Lm`@i~?aT3bkHnPdXf^+puzBVdPy~$G4EtLLrWkwDJk#V|Z@NF8gGu|l
zZKbE_V$RQg<xCk|P?N<mnaw}nGGl)>E(uC%dH#07*-sv-M-3AiOtjVSG<tNNoHc=0
z0mFLA{%au%?%9l**c@Gf56FEHiJdL`U0o=AoY}k~r*Jp(n&NNr)#${?8&Gdh3jUM=
zV2GrZrEuF9U_<4P3Y<A+R;o<zwD+=>0;t^upp&J%Ip#j~(SKeC@cB>X>w}WNij;wR
zYlwTzE-7NpXiN5#mjH12a^Od~xjT?s__6PClc4vqC?FAeX%63vl`TKIq5bXW-A&V_
zcQ|BR)b8Q^Zb^;n3tO)1hnVB}g#uP@G*d+{szZzPrym~LVqln1^T;f_LJ`K9eBmfd
zS77or(5hx&iQvH=PbB=r?4FwNoG*R^l-gl44)Or#c^<rh(Vnnp_`)M_?*jbQVd>(s
z7}-ZMKeZuNZM_$q!3JFu?VM82zr>&zvRpHFfu!*h)+|?$s$@!O1&FgXmv?Wf^jY~)
z9I*``*{ew_&TjSF79bPJiPq=;WW4N%7`-!&aImW;uFTuO)fdaLXNpp24e_0@iV)dx
zl|TGo-JIBEmGLN6{}p-k)l%~2Y5o)5Vr9;vGkJYUn?V!*)g!R>>_jR?LD4VN;Fyka
z6QR+A)DN?g5xw8?+J3eE1f~gvIhp?#cT%NnuFnS+UznmKhu1*3SlBKDaR+NCO@24?
zqd`$}OvT%;k{AVS+t%C`R{7aSRfo>f^}sJyIyt%;A!#BWS1iwLUGFiI?}?4X{#E|6
zFkPU2kbEK;9Ns7y6kS2LiJW%wczxsSm>=Fvj&Rqol;OvppcD$I{*-Ym^FhIHn)=Um
z=o}=Er<{un2t|7hU5-%PutM#U_0gZLtGDuEPz5&(!1458)blxD&cFPgO|)Q&))@p;
zAF9=<#`xv9wlfS@Y8hkzIrxJ`erMP&1{?DpX%f{fgMQmFRCZ^R@HihZuBo+ho+N*w
z#mmC!HfIlB{B<AuEd$A_Rfc+6#I}U$l3L$Vr3y=%T@`tOZn;?-;Fy#Brd`b8HyG$2
z(GOWaSc`ho3-FCbx28U7`A^=;$75iBe$>{abO<iDo-=MLvX3|r)hvO`=I?N!$K2zZ
zMMB1XzbXeq_($&FL(P=!+L@(kE4)jBW)eh(cpm%vW6MOT|MZk8rJx#>4}*W20+22G
z;Iw01Nqx>j>k5oWxJ{3&PWxT*AsHh48RFx)6fO1Qr?e^vKb%1_fK`@!g(q@<6Sv$b
zbc|-CHlS9;UntMiwYs44faTw8CJ5`?8tR%ha{pU3j+t0LiMrgOW$PVeG03f|4|41B
zB<Yid+{8wleNRcFvd(!f?1)O^sSGuxt9fu2Yn5ut*6c!z9t56AcYo_qtyb^pQ9b?4
zElqp9JDF?`jR5AB?qFbk?lG`GcHSo${UqLYC`z^quiVF=P)M(!VRQA!Aw9k8bLkp@
zz!e~5{lVt3)oHQ{wl?eBj#Wk(<_cOe#<v_HZ<QUxt|f~g)$#f1ykJBUw_dn#T&(QY
zK%8Vn!Aw50(d#|w8-64X7=@R9Ap3TjxA3ub+R6t&1EbnHT<L=2lo&vwwjljROV;NO
zl2gOt8dJl$_a}-D83btDhFTv0{QO+67}xbr!-8QHrwW=dIy!pz(f0HflDwt>Tojw^
zB7I+zAh_JlWPnVi(3hg-F1|r52FaOdVT|u*%@i<~pD30$##yp8GhW^M`H(5wd)#_5
z@9xGf=>*?#M*M(PMk4=lP6C!cdw!<4E9YC|7M~@v;gBWPyLeRiR9=niHi=P{`}``(
z?H14^bH&<89+cD(DDcOcQWACgkb(9_t>^fB%5e!BrOjY3xv<xG;4ePrmzG8q77Fr!
z^QO9>FZ(tTu;&}qu2OAE*9eKP1xwb6V5(5Qd}r}3spi$yWf-t?96zjKsuB)@CH1(<
z$sr4;3XIIR;g6S>*GTrbila;}+HoCONOnbZ%N*1@)zZ@nBzCr3>&-CaY?4RVf54H*
znwa{h5DZEEPGQ2(Bm}qLC!w*i@dWt!bzXks;~m~)A+p$m=&QoYP|T%jIZ<JGS0tk{
z)LXtAn=!=qQZHXv*?WB2Ev?^q-+R7t&^?Iz4&u7x>0;c#E@ZtRMy?O!$X@yDBd<Xc
zltsq%JRIcW;xjY1ooqQut@%0Lk(mFG9ic!l-FSTm;yxsP;hE9ED$W%(B<{ZWV&)mn
zZhwz00B0l`@hXwAXK+Wy$7|M)WqrRb!vS~hqH`DQYEq}$&e7Qz`jTM97%$12rQnd_
z20IniLlH-HS>p7jxF@`K>I%cDmnpC>Z}vT;AjXOJjS%|yeBAH=9@)~xxdafN4X`eR
zQ?qswZ}5zfmD<fX5bgn6SwgL6V8G5D6zq}b;NU0{P~)J}T*c)?<I)0?J2qLv65lAK
zW2KfbAm%<!cM<+oF`S=E<ry%A>5Zl;0ePwAcJxHN!9#}O8=`3!MJdX*UBS6oUQ2Tw
zwhO<S$9n*{st~7YNW3IsUD3nTbl#^?>8em_W#Dd>>x|kjp%{4+P_9&K1z5H20OuYw
zgkaYQBgMH;tlV<bngL=l<ff-Qi2}S-IBvWs136ABgU1U!4s3JVdbq?l##pL@^zb28
zVB1k`x5Tf;-kx{YC?5xK4F600e5akJ<88U~WW8k6%a}^fUjT4jcnyCe3@BNsq)50w
zw`-@8`VBD8{bDu=dN7UnL^E|5K3!5TJhY8#R<oCPk?6+i^{-CyxXjms$#4qaqD#E1
z2VbTNij`@lyLRp=6~~od65BwF2hxkjqDu`E7fi^<BY(<nUWI7IOB$PNS@nbmymU-E
zA{}q|_`IXg82GiuL{Y%4zK;U;NCdTjS&}9n@^~y6iBN0v^%@5WPG(*LMJi($?fzR`
zytR>hRbzsPK?;02oOk^JS36Yz^Odg%EZ4Jl&5y2($**Eji)0SMXF5-~D?>50G;Laa
zA-n_g17GKD&5-fQY`3njX1V?prjgpHT9KMwsT|ZW>%VP{W1+n#Lo78)FJM^>>SYfN
zA>5Zj<&=DUnvV=8NH|z;cig+DVeuk1l$ivA(=WLJFb~iX9#<{;EY~~%M9#JA5wv0z
z-Zjssi3~t}tl)_DHVZ#Vr3j?BR%%e_dmbP}@=4oa$ETE;;gz<5LAp$Yk@}Nf5h8YD
z0~P94i!p4au=h@#9FCa|5(Re{Tga)y$##(JT029zko|Zc$&MrCVl#;I#smdTe%~v{
zWxq-*yB~#DSP@FAmw^ZGqEiA7akP7w?iI_Mds7uvc1&)hCv>@hI4J|^S7Ob%N>9SV
z#7{0Ueqr@|`d}ch`?#-z>P@HQylF&$K3A2lAaGYczjerl>cn#kW;tNcZy`*Oou3d0
zBpxDh3<fR}j=F8l`loK;bcC6?Y3Cq`(v>$O2u5<=V2%L$7Re2ZYUaDxXI`^=0Jzlm
z>nS5coL3M5+{p;URWX$hjCTsRfIx+Z&X72AY5ouL`eJeOFYywx+n>ouXy&R$2qf=e
zo2s<-R}v`;+LbRCO5WZ0eBMPSw;&A;y~FwSIz*#ezuTCt&d@txI}RkQd;=CVdA@R*
z_7-rOt<OQ-JCQStSoA`Q4y6vEK<XnlEECDK{mJ_DGPSqZZ1}b7(d^O>`cmeuu;kx9
zQctbb*XoFP$exJY-Y;4K-x{!`P9K!_9uh5kJ&aqrSDd2T5jcTWynwBIQ*xWqzRX#N
zcL+qdKyE->%>#JD-=7slPtgA~-)iQA60!S)3FzAd7zr>ffpVpni-0{<_ZwrVT>bgp
za?568@$L;-JcxVctBYxsLDnM?^+c*joOG!T>@mu_8`yG*OF>XS3?F2vP^+lW2Y3YG
z0>~dL#H#4tBESq@19g=eGzT$IXL`S{=5zj??~76#OqAdlQ?Vt>b-NeX9|cs&Xdz5|
zTm~jd=|T-^W)=e7)vSHFyN=S+0K10-ME@&#cF$$Q7xRIh!GL0H-UX@v58ZC`OhBzY
z4za&AIN-qWS}FU>Ye^N#-S;w#=TDM;HeI*(tP)i1KOs;)6~wY&*m^T$XJ0m=IbaBu
zmI;Z2uv%N6T^G`7P_Vt#>{_vo+q&hAMPC`1lEbTV+|q%<cSepa-GMlaLB1&yEzisi
z2c{k$Bu_XAm%|_RllNeTAmZm{+1X%$t=+9G*C)58qL{I-PKREtdjlwwvgw`rY=EnI
zF702}u+pE18`!r7E~gi|3+-=}WCBTwE8+r$rq=e&b(t?#e<VJ}&XnBf*DdyVoLR{W
zazJo+9&R|6*=Nm@$=S>Ci(am>DS+yWvSw?b2{T!q`D%1BVU<;40}LeB_-WhWTm{QG
zL%B8oir$;TKW#?6+ozS=abPNxYJLtID3DntdYJrIFe*l_bpX{-+L`V+zK$XiO%<{C
zp{-PHw_~Cr3t`7bqp$unw1LfG|Bjn4{d%p(cwiL=xZiI|a}i@fYTZ5vy!^cy368Yi
zcwW~f)@>*<<3(QV&H<sNTt@bwXW~W2;e5>nH&_Nk0aUq8>y{W-z#6lO9QLKp_-Q5B
z6vZdup+qTwdqNp*CF9PnTCr!VOBJI1#XEl(Sh7qZRH<)yZa+#I0UXFj-}2Uu0Xrxy
zJtd{52m2+zBrhh}4#)e8{)bv5br4bW3;n~1^YQGKHd4hZ3CF3`?~n2|*Efxsrr{vT
zA$~@6J1sFeMVT8}GdiK)?C@=n)lHkoid=%}%Wnr(aMxIk3cs3mm$%#)IVm0?pLY}`
zZ;hCL=04^4K&1Ai9`X08hJosi38@upj|5IFPH=J$`P4ox+A7`_b6y^m@0S^=5aR;4
zBoh|w8Oe=yvw@VcRFS2+g*K}UE^$dJcI=P25q=_(2j*sGn_!KdE61~=DRqv6DmV7d
zIXKzb*>&y>Wl9lWoF5FSu;GVzg}l<Bd;V?8L(oZ^XC%iLWa`zw_SiB49Fq$fzl542
zg|mG3HK*1+z~aa^4N_EbIU>=g3&oL4x!Bk_auzOc39V>!U@$q-qw1{&IFd(yiYK#h
zFi`_efJr<!#i+dHfpDF@kiPKwR@a@@3K=*1z6OBGb-w`O1D_UOdcVc+S=L)QCBTd9
z^PS|0gS$)>TW5#FUnQ}boEGCxX6gLEBRbx!zPvbdmq{;Q1oQ`M(qat)L&H>S6L8mz
zh&HYt0tqQq`)}{$s$X&gH}8m6)FFEq)Zrp*@@qY`a&>g~#=!Ny8+Cb;eZ{aIc?dDF
z_h{5>lCMOXyZe7CEB)}y23_Q5{)F&U5{x5*_{tG;7)6RFJ-*PeR0AnSdt6!s))OFh
z@doI2J7&KN?)~+Vd2Vgf_*2XvTCi{hFnKCNG5c^^pgyuP^i2UjkHL*(?{WC2A>-o=
z^@*Q?cQ>j8Ywanp&yIJZf<H4GC0GHj6NSYqV5s+ia`fzJ6jQ}|mFKTt<20nFfr=49
zI5M4^vr?eSO1rjK?xVYazo=)_lf#Rl>H|d8y_8oM7#jCvrBxIH@Du9*o{xQCS<xee
z%YoX`&M&rL65|qZ6<b77P*89<SZVxv8%)10=(hPotKcA)lemVNWDv*8WBfzn#80bC
zz9q3=-BuY$gc9*h$Z1y8ib4|L3goppEm>_Ly9_h-4Rb)pp>vEHF`iIfaT+fGtXyej
zbPkm~jwnq!ZUnkeTESzh_qR>~=QiWDVoMIllsXj2g;QP>ewu1K5QNcFJo&vf>3*BL
zFYMEI$A4PhbARRGWw9WtFiu3cjM-rN8;P85)O(<w<DoVlU%=i7cQA1nehM&ga1f0)
zT(V_w!tjy0d|Bo{<Y)dY9`E3}@tO-_Z*|oRuG?MZbrAZ^l>W(E8TufC`kTyR-DX8!
zMU~|<G&$c`o^uw?=NMnMA#v}sh)mZ%lIY0OVx&x{NAW`dHKu<m092JaUrqo$GZ#N$
zLcHXZXH$t74_fMOYbU^j9ur!QEsyLN1D`u>?@rO^PD_N)JS=b29JiEDWBG-C<`b&B
zxh27|oo-v7z8@8<y}kdoHnr`=c%+jE%A-nVpbNAV2{F!cJ;5sJ#(nC#mj{j~&6iU!
zf0UzRd4=hhAPc(*HHOZ%dM+P8U9?<-!{KruQqL3Z<um|-jb8Lqfm$*V<#M0`6APDi
zaY2ZS%tjKBxyVpUU<DZQXBnWNj%$Z3bO}&kxJr(0U!8PiUc;x<1YwyC4EDAAPYlz8
zZLJp55X1pE$?bLlD80J#09~X+^tG4s4B(LsB5&k-3;4a=vH?6>Tvqm*&u5#ZNCh5F
z_8sg1Hm+_?OHda;k^>-5A`Aofd)Rs($6ruQFVe5q$+}uxtu5Q)3ZxKx#@_9^v_g`o
z&PW)liEc_<02P@^`r8r2ACP<k639J3I<O9e#~otoqP_>{e(p5A4<&KHa>B#Dld7DK
z7fyMN?}l-q-H|*|U1DXZmj6@`z8{0sMtXC6QDtZo2ol#joF=HNNI`U^$`Z7)w%1>P
zK=y$_hPxXP9{IIjy{8~l2KRR~HD2WM9{14Ctfto;DE<;$c1W$Et)ig7(b-i_`ymSd
zhVC0*ks<23&hNxue;$k5W5N>8TJpL<0!b7IZh4lg7eyPXDt9gVWrT24-j9^NJo$AC
z{Aa!<R_nT*zwB5o<+Ru>I2IQou7Gt&|C##ZD4zUHffrQzyp)2N9Ki_iJKSY`%4vBz
zCG<LgQ;n@_QR??@`0GfB68ZuZIioc5H?V5QqVpHB2Z&_~Q*~!sAYY4b16;UOCE>g{
zUy`}*z!oSbC%5a5g@&jcE+C?K{dPuA05Qa1d7E(ywe*Epy4VkXOKQq6C-VW_EVu?g
zJw)Hai1{31BKU(J)5UP&rk$4SW3Pajr4R?foqC-6440NVgxhYacF+2gl52R9B@LQS
zIN-2fEcrCn_oYftKPf@vV}7+B43Jp|fFAqWlv7`NXIDSn81v&yTY^QR__xJk=qtS^
zZw;`H*C<J()hfiXUhIDRP;ocvTe*+0BiOQhpS^V*fj@{hRT0l&)fgkAe>+~#4j^5}
z)g;oWdGSP=c0c3mSDpHQ8am_E&Arf9<=<{Fk=&@_TUxIq8oGULDF3lL<^2H~u|@H_
z8?UjcUXfn!lv?4_lj7;>Zi>yf)RmIXa@F3sV}9!IIO}l`vG==aa8)ngy9}gz?ruc9
zl8DK?72(PBgWwzMwe7w_myPi)(RgH#z%%VKql^j=3B1YfWadDe@pL3_Kw)gZGo#T=
z0Q!AvEhNyHV8`Q*M(?i2VrhP{Yi2Lp<`DNizzJR_$Bsd$0V>N{SeqMNd!a6N+_CpI
zjB{8jlSs7P>k?3D6a%Gy;9(e`?Ik_x%CR!vrfCaa``3f;WUo{{8zc!Gnk<zr2@)<&
zZ7heUjbe#Z)<ylS%ib%>#6g#-4;y#-8{<7?GKa+N3-)eF`8{l0`E3`mXb$ZAN_VEc
zW{oUY0Hxjx*SoSLC-vI8b9cjwh1%%*gN3HIN2pv?>bZl5TWh;NfP@hJg)9fyq}=ih
zk71l@gD!v%Rs4n;sVMNpr=^1*SC*DY6Wd>CE)cYf%0Z2?#P>KuPNMUTZsX1rR>iLM
zj5Cl%rFyrQ=?q~bP}yt74^Tmn2*mD4`1lI1I<Tg&f}E@%XP@QbLAR6P!ea(t;DrXW
zVNL~xiW|B4`B#(^JQf=dTmRrNUUW?SkwV?biNV@wc)ICYEf$aSVkj{7?nb0G_A`S|
zPwk{9m9Yl_=*U4nXbl7($-^G$vHcA`6uNFA9HFdj^H7H4zo1I~<3|Q@;9j$e|4Auf
z#-y)h0Lazt?o{g7YrX}!5HaVk{N&`IS=f&NgbzC`DG(#WFykJi3*;9dnDqKTw8Q}R
zDgWsU#$J$K581!%+mWi;2M>3>-hn}f`Z17y{Bqm+#(EWQossr~f2KkInQZ-6)>T~*
z11X$5v=zm)p8b)$d}rArcoxYM0KlSna#<_|5;)D_gFaS8G7o%@4OV|tF0Znn!auEU
zc~(yeL1t)5Pj_t{Zh-F8HtNw}-b6PHql8l^!A|hMfi>buvX_+NT?EtqO3%vUMBbL3
z$;r;n7B?H@2GQg%@*c_Zpb5H|Cg?T;%u)nMvLfGou2LiSEt)8j;el=dI7A37KvspP
zH(My41N36hD~}4nSqw;vM%w5v{bEyPtVARLE{{t+oX1S^fBD<mg2U+XxfK+>QZ(Pj
zY_B-R^Rr4*>wgUm<{J_toW&)S_r2?UJsjXR<O9<%Kj|JXDS2Zz6Mx7jXY^ms_G1Al
zZNH_yVa!Lh4&&^;qh%|4ADHX#uV?(%Z%9mWY)QxIg@gZUS}PO-?$O6_UfSM$KAr>O
zjp0(Y@p}Kg;oOf<+~cxU&E9AGlUZ_SA!dO=0A2dk!K$PaKrhYv13&!La2}knZ(rY;
zaeo^&sgC<^o&PUm@%N7mVK=(_d0daQZ<?EoC@>TNAiy;W5iTZ3;4$qhod0P490rCO
zsg(Q|3RqnRi?0`qvQX1N(BF`qf54#r4W6SLh;^az1c1wNF)QM=FTf@jgL#e&5gWN7
zV5J>|!qNU$qW)hibj}L9&{7y^%fPwA@jbB{MLsv<GZjJr(79mS!W=W3t>)P81+2ot
z!T>PW+@<k<?9*QlHZqYs>r2rM$yk;BO_rPG<`mg8&dN`+U>I`fT})HLQ)s=$$&g=m
z9RAH-oJ=nzs5j#8&OZ8Z+h@L2@6EXo&|19i&}p%&3u@(%U^lLuNc<b<<N>y0q{2OA
zxc|>)NA+Jf(j^?+48yl3b+6icJL+*BGyF`_<ubDi{v<vAP`COW=_fDS?pp@4MjG<D
z6~lrt4ccsf>HLXU54T4~eU{TsO4$D!rTE_mG8&ajvO7lBsza2NgQY&$U60byC5tRF
z8)*MYvT;@JPVeye=EUtXmVj(oId#ZAwR;dbRZS;Eu!aEcwxsiTi&(MPibJy0;b8hT
zmb!5NFlu#mH53V;+AMy&+DS{He`)2379Z^2Zm!9ib#o8YDf7{y%QMnBoIxd;(m+*k
zNBkbm3ozI*+l{~^34{C#8CKLn<>8!8ERL`I&JejNs#E0ECxyKUC1LgYs|33PDfXw!
zN^H$yBPsTSUOKBsh2Fish-?CO=zsn)0sjiOnGhSkITi<z5-GH5pn6tvAm!Y-U`Z>H
z0{g^bXtT@bAoc=(^M(cb%_CC*21R#Q2tvlf%QS_uI~BKLvDx<|aBi7<ZS$K}0+%P9
ztF$UX<L_a`L*SRVX<l7kWDT16d)c6;L8RI;O}jIaZt^A2C`cos72Qp%3KhH$SA)ev
zEKz@d$$lch53D3*7%_dLWdEZFZ;?u8nn`~*8)b^%2ML!F@b8-=2s*Nu>WTs>=;nw3
zB|1=$#@=*QgRU2K2f=`}>TUI)k7y8#S+OMc5d4535rQrxp9-(v*K9$|1wv+Eb!3uf
z0142eBs=>V+JpM-_4><$X7apuV07*v8SJ0mxUXHb4qC+kg}$7zWO$Gh4<RPN#7;Gg
z7V6^c>g{_vtLns7WFOrBo=30rHMbY_$$Spzg*+d$uM{`cGH`Lc?CBNa>_xq|@*TTp
zghK@@30WmEym$E2$C7~tT2cJiGH#Op1(_($XsK;yQPW>jvKZzE$k@FRWzfkg>w`Pe
zF8X2#T(jx(T~76S@p6j|5hAKH<`b2;!36MFN4Ga|K7<;uLL?MOlh})au*bmX1(eG?
z_cSB7ci{tgK^VcRhYp3l)`n&`!6qR3zN(Pjz74V;XAIK^c{FD%WuQIpMwIE5a|kqb
zV3_(Y(od?M?qvZHX8XQu+7O~|hX7{DO<1USzy%F##X=fBUodiFi%huc4P!L2i<OMk
zaS7oqI~8g|%q95suBOL`hdImBA|-nK!u~N|a<Ii<9%Awe3k!Ez?yS8W_p(t<0C~7L
zeqF*+njy74ATn$FdSqE^fBLgb&nSlGcFnVKn+w{Da)Ysg+Mn6LI#vCp)^pN04B~!_
z_gLS&lW}=-TiOWytPm_?)GgTWMn#zj7#EZj@ev3gp1m-dqW2oSD9qPTPuX^jeXt)!
z^ld5L_zRdx6DDYWhTG34Mwwh6^lP?PR9g0m<0sI?3P#`V)XU~4sMhqNEYZF!1FR$Y
z#w(dl+>u>T7}SvPheHMT=MSIsFnP!YuRa<m>gq-PAkfraiTLEDse^leaZ>ODG+OF<
z99(3`PZ5=8JxmuOO*Dm{iDTv00%_5ZpNu|!=)e`-ru<C;=f&QP8}mia9O*8}37bQq
zL|Y^x<ZEB62FfL!=m_h1yQ+~j)PvnV`Yl$z8(Cx&<k)wm?G387S5z+ZL~|n1H_p%D
z;x@ww92dKsgTm+qv*+qUQW2}J6K}L|g3rrqPoe3uXu0cU#Qs*k=}=FoG+7nj9}bNJ
z924IO<UUlVd1x#ammu@rqs2d_Nr$w>`jzN6w&tHFPbGw(jXq4D3s%4)%Crd&4_~5D
z-~>x@&d;yi<KRB9&86qN>^Hskqoq<b!I=6mNS3Xb#ZpeI!)W1O4+di>2{mDimMEj^
z3-qqUL#g@>Ew}n;x~%GX)P^6?_i%~3j7R7%3mUpw&0)k|!kp1>&O3ieijQ&K{>K~%
zwdCo*@KPY1{H0%-5nm~A7p;CDUruZ$2*zsfKb(0TX*(fk1i#*o>Z}oj+6<M>sl_%D
z4dNnvSG5Q4%wNpuG6bBs#gMDfk|MJC0)`hNPH?0Bx}<$Y<x+jeT6xKis6jQF+Sety
z_Hm+@p(XKb(8TAtcS5(JkE^@!_;GECUmw@aS@7fK#&ybeD?fe_8{Qj*qJ6gIyZVAC
zB!^`hms)~UgkTezdv1-Q)tH3D5c@e?1=8j|jBXr;efJ%lYq;4o{x-LcHbh3=LYhG(
zDA5F>i~j^dTCPfKfLvq<7-d#jcZNoLbLjsp$A1b-w_x=16qkEJp#SjCDQbOF?%d%}
z6|a`+p>ZZ*HTLjqAad=@9jf@UlBw}gn&MTQ&jfWycdQOofRIzP9J6Y4bFy5M{DU4W
zVmPAY7*8L*P|)BjLj8T>vspu~RC<-FGlRF>BF%7*81r`q3UIdWc4I+1gkB!59}=#N
zMOrjj1LaqW^o;%G!~*3EDJBmfbHsBT%5^$_E`|Kz1{OE}`*#EuXw*<c|M}Bm1j#k%
z{#FW*3c8ATerjk}YcJKoos4v-ZufD|pV?SELvWiP*>6Y5ut-(BP73>3;LNHLbg2WI
z;%~JVas1ewoRsby#q*?U)i&|n;K`X$Gv5fPYKGf$TqfH*0^aqwAZ=gp4MHu2dhYy1
zg3qOUdCGxI=WD*VWs_Zfv14~i8b`e))-1Wzhg;;I*m#MisJO@bu~SF;UG}m|uti*Q
zzS%>}8RJXh6G0HkO&rmXKX#}>bNO_txVE?1)FI~ghpVsY`~5;O@k0#oDiqwwLhP=v
zhy$kE(Ai8+@!In&=cF7Y9mu}zU85!##2)Zlq*{qwC6S1PWI;j|DLmNeAr}0rvJB9Y
zX_N$mV{DDk3yM*w+l|=orEHTvc4Q#|_h|9TAl7`=Sv;-dsGoBxAGuyoS!|R=u?&aP
z&&%!~s%|RM@IppVR19|9B=npC*L4u&YuXOPwUCDrp(AoX-LP~CUY9|AyH8C7qkR|}
z@tB8H=A0dfYHwM@WqS*x`Milr2Ig(j?3XVcPWl2?mC|7>%&B_dZn)FLO3Yld$G?x~
zIGk@*0m5!JKh(RF{E4U8p+;~LE4-Dl!$mG^o%sTfz5(~A%x`iNBtsj&e=vu!YQ{sT
zz#XyT&#e58;B~*RkZ0%Pz9#zuFBuqH#Ca(f8(HE>bHq||j30l7kVYhnlp3r88J#SJ
zRDA3-coDvo{0QkC%(~~D@>qe(Lv#ndRtz`2|DF?Jk|xEp@<7>v%A9_wS;qXVx!*9B
zwJl&U^+OolJKqgv=(W?EjzNt?79^*4lY+XjnndsZ^piJ%e(ZCl?KXM0+DN%c*o?cD
zD?o}c;R~335D?R<u=sYL@FR^VPPcDpTzB>|aW+RZ&CDymINSSk9djxsw*?cb-kEdi
zLM^hr@f>3PaKZu})Hd+5_^mcwf9L~GcPE2y&XOsqh7!c;XzJO0P4P}?T42^rRhJDL
zS$oLe!*R#-af@@?JStsO#fibsKA^#Ocb$2`;sj?3drFig-6a=a|6#iqgX<9^CGqpx
zTj2|dbNstGGIth2$eX2MpF3Dk>8dK;PkvsqF$|X5wISx132zYmq{{gHOg9ofcSm*x
z`?MwA%*ttsR&b&19Dl+6Q<oyOefTDtc*GmCJqLZaczhfq!_Py09jJ}-yEV1|Hn{GB
zT&7rcQhR#11~@?Z-@YTLwL%R^92;0avSi?fJ+>MOG1@jgfHO$Mdd>tpanB~rrv>pH
zOmXln>b<K(bF;b>5xlVQo%QIMK!z4Oi-AQw+j8~b2yV0Q4IgaMdB|jr5|}?9)k9Lk
z>q}w@9DJCqhJJ9KP@EZ(cO6~mlB@&{rk2ONZ}^6cwqw2joAL$UJz8Fg0JeLD=AETm
zL`0F-QfQH}Ml)zTNDy}ZJzIZsMDW!DfgaxWwGWlb*TX`8e8)>C92P?}&pR<d4=R)?
zJQUryc8TXi9pbtfDjAKgS^18N#J(h{#>#c>C!x?^=N=Pan!mUI$cK+H65^Qs-nl=d
z^z0s7RE21`Tj=!_%py(YF4F?3!?maByy}o(`$#+Q)+lS+_-5Y0`leP(6OxDD^L0p$
z*-7Y$u-2F(nv+#(f>xp-^j|w1>k#j|t7%1IKWcU<F&gF_aKy-pfIBYw4P;XPTzh!S
zv02IG57C=LZ#Y>eEi#lq+@{k^rKZ<c5f~ZH3h}HOu=^HZ0Ynr~vPF1l1l?Px#2G)f
z-0VP*<W-_KU#~WU>k@LdnIc=wBeo+t{Ya&9w7dDWW8-^@PLF&r8weMDOZdm3%yO*?
zn^f-`<9bM8Cl?VB0!`MHh<@3a2_pZ{ImT#(PEro&SV}H3#>!u{jV6VRbs_O%6JfV+
z-pFPI-3@7CFZ@iHPp(u7FP18_&I3)Ja*Z+aoo1Ju%;zvNlV1w{i-(#dp;5Rk=pJUG
z+zFF3!L?_k&G^Y!?Q78r@}wLWKF3O-c!Jv&n@{39s>?%9Hh%advTU7&p4g4#;urDd
zsh73YS@_matY~D4vKihW-pE4OplgfTQy&5Yy#mn+xKPWDsb-2lqHCy(m%8=|v$_3f
zm?H!Q@;`v1r9ai9BT>~o0b<$80xah`=0|ZH&^w4$ceKSA^mN)b=_jpZvhU`kr2=D=
zl+y9v&-=?Ri=DAmjbxHf{Byh0dC?quLiP1T+;n=k4Q7L6KN4sa?Y0>7`#!Dx2v`&~
zYeE?a5*RdaA$=mUcQX##W8;b$_$ar|u}g6gwLUnyP=rEVnnvntTryr8yfC90#!Ase
z(*w0&f_JL)!e!TJXcwk&LXGNT?#{CcST|!A`4Z$#%F#;j_;UL#X@N0(lK5j=emRES
z)h#-qj7h@P&--g%nBw#k>-v0oP5)nGytX$pWv6m8XjD_j*C@1&m2T#wc_BZ7bcV!>
z9}XiT7O0bTugRh%E~r*0B)I0}{mXr_z<#wV*&~*IUa+zTgMgV!PYrA*s9vfj{CX%P
ze*&4JdC>o%5{)3ub?o)UhtZ;k+MVX3Pm>t5uB~JYrYf+=MHO*Ft<giGV>(s?qUG;Q
zV~}d>gYeVLkm7qbT@NF(U?TpPbA}9+bVt{mQNl4SO%<X2E~F{cv$cQtkjVD%dgwzs
z)C9-d`JDFgp*->wi(SDHgt1$bC2c=!YtwI28s~Mum7lY(=$olW(T&ghc2l3<fwxaA
zG7qydrge$Huk@`ct(3Gpn5KVw7P)EvbZHD4qbk{VNwb21JjibvRtwkjeGK&_tb?@l
zFJ}rdw@dj-ek(m(+=i7&=GF%S3vh%rY|>ieSu)tdhWj%eIpzQEORw((SUP&hLj7$e
zx`HJ4n^rKSrqb$U>wP7<P^D6*F!g4b^D;ztBLh1{p^~luw;F=bXPrbSGw(4kD8=aU
zhf6LY(6`n^ARqef^_U#pZcS}!Ewv&|lWh@wRbSiwc=*#bx0sz+0vu^*%GLpSE$n&{
zDBw6_TYu46#$;ZI>>obFmuLxiu8j&RNsvd=YRQpiPK6~1PrEV{H?NM<D<?r=$x_Z-
z#B^{g`h9fhU~11j{imd3QGSB<d-*K>B6MX*lGfx;UjygI!V+|{_@*_sC4&oWYA|y?
zMbm2F^<Xn+aVbG=np9jb?!P8f63UAnS}jiDoW;kh@n3^kI@|5J;GHvEBNxrQVR6x4
zAWO@z5VFguO>eXOCc!weqnu3E0_A1JFbJ*r5m1E)VAZk+Qnhjr*Ci^2N<&X6hb|f7
zaC*`d>~A_QSiGPFZhNAZxHYQ%8VYUXoxvLu#0I}!$;X<ouHplymVEJ16nPcb7WR0C
zo1=%MOdXtvB)*Uu`@MrN9$~qJYVl|-7$Dw&y=<+c26Gzn4WB@QsXQKd$D(ZtlkCs@
zriaw3z5R479o{YM88mAI7I%s`;Q$1)IeYGD(S=9XbS<6n8g=x>{W+==jeX{!l^AQ}
z;)7Rx6hhDOvRKB^KoZ*`+i<uM+d_W8fuE=fcMVL7i+B!R3toptn|U_4W)H8^z^bA7
z{$nT>B-He{qyN3N?I5+uQ9sX3))n__dM!Rp{U0^dK}$oZ9nL6Q22(=|AewfcEWk88
zo7jA^CJAn^qX;l%&QR+;tyn+XN-LLfr372Il6XZJ;;&xtij7))<5!!`Kx$;Wdfpy&
zd$<^L?sn=2mHx9SE}Jh(;&T{s>c(PyBz@<codM~U32SPl{z%cydT$>+LMMN2%pUW8
z=Scux+klDmu80Hdgq9XsinV{in6OD8J3&>MRLa3<4*W0#*Pd7~w)}y7PcYH^tz1mR
z0TB&u{|q5=#)-m5@Q3`_D;aLrEE$doERv;iD$!1}v`@^0Oj<iO;lK_f{NtznEyd3T
z(}4)2@vT$|*r*u!m75Y*RD8Ize6tpkk0ndg>Xv^Z$Gj1dC8|;E;c<)`U;Xg=4Ec67
z2T~HNPUY4ToSuzpf|j+*d-A78{p2WgR|M9AX6&Dy$10v!DFWFGk$wih<=hjW7Jh|;
zQ67+dU0lOuRP!91LgnP|&U#U@1Qwp*##RHevW<l$<CsW8Nt!WN78`7MAafqB*&-B|
zFZ|7{=q_{(J>)R6R)a3|Ij|wD<e%gB1l!$WUMVNX3YCF$jKqDJ{StrJ4T5mU;9N*2
zg_F#l{JAn@OrlEjqL9sNXe-OkC%$%X{|`H_4W(me_pt>L)(V#6^PY%o;cSi{RNWD`
zD2@phEBNYb9%%%9Zq2?IusEs5r`!8N_qG6Ff;gi83KS4gKRl+CUU}zZ!-{)uLyC!X
z?SM#^P02x@?-$Il#?7*R8!i%s!vEfjGe=LFHVJaU#;r*LuaU4zWi*2oHaIWKom`L`
z;IG#%{&_Y$h8rcG`zrGzMK<*)%tzLxyEvWZo}o0J<nn3iZ-i%FVwYE}<U^y|9hUu0
zc;yTHaXW`0@qTqCiL?bLW(?oGza_<__DDm$jF(yI<%O9ASBv3Sn$J`$TanPUnT79q
z$jlF;1iijC82dvxfmIWB-yGN<1Vfo%{|UNj#USSTf#(Pxl*kbJrIZk)Ed%>|RSVf(
z-7j`>bsy}{Q4hOE;~?D4DiMuBKUKmS5@$|)U-=xm<?FT5{$^B3p(@(^wANeS!Sqpj
zl8lJrCyKWp$)n+9M^OQ|JsTaJ{3F8mQylWi=%J>E9lP9*mER&-l~Q)pPw<F0?2k6S
zt2X!LVorH63x-1;pFue=jxv{@3f6mkGu_83r$3FKTDA&fCVa7mqw4pw&wr1#hVoCN
zqLd7X%@ILM6WE1r^@4Il)CJpv@V6%rhQUw?zy2`N;J~=wYz~qngM-)z<QIi)1_#Po
zs_wMawckQ_w#A|iv_rfVgUS!C)xxKUp82^9Cx$R-Fk0&I=~b+nMX#Tivq?okQ-qk1
zA>6=C8;*(E!o68fOdTKfxyT{q-kBLiUfwg!Hxo#a$8k1A@^)2}y{IKKOeeq&tNW>1
zQj(5Y(_)j#C8bjYx|LSc*GKe_2@M=$e;&01M=9t)#cDwFotuV+^B3R#OojgJ9%a8L
zVTfoyKc#?6c!kyn_7njGmLy>*?*2uBj$ePI9^OmeW)!;r97cp&k**`Iry$F>%Uw(K
z8&(EAP5ff}QQ`-FI>9Z!E9eKtJ_14<1V8Vd*~lwD?ukRf=oFp|Ur|OCNO)_S2_cCG
z@g{M*x(oLUe6Pe-&T0hoA^IpCs}tggrANkG_MYkXWEzL2D9sI4{||HSChqSPCi|ho
zkvV?c*7>n1e!!iqbbB)(0qxI>u?fRO>J~#G9^owj$+?dR#!$M92IJsQiV<ohz&t5^
zB2=SYjiP?opUFXDGfH5Yy^dt4w3>N!<qc;O2u%^xY8ieagE96Ho}HGDmGcN>2Ytn@
z7H;<WUihHUu?9pI(X+gWf-CH3J(hi`WLriRYR6Rr*dXqOui;{Y-~w&*K6--5_H03y
z8e0WtK`<76_XDF!ihi3^t5g5OTVM(g#R$4-_EhXU_}B~*RU7b&aT9TRY}UHFv|rq0
zix({thCB!mXUKSX5e#wknV=J08`fFbW$-Q+Kge^{!9<oBxS;!#uM`wQr&iu<S`B31
zz&yqeZnSH8Y833yOnCGJG81VH1Af@`b}*Yaa#6^YbiV^*xS&6zaC-@l7m<a0_Wqal
z(!&ppxN@DgissJ;k94Z{OE%9HVysn%p7rAqAyX`CL__Y3TG_0R440aN$~hOeX%X_q
z*Q?_eJ`%fg5)s%cCL0ilK(hjNe5d`|-m*=kjyCvHqJArM{VpsM^N{UwGZ80Avw(%r
zSS!DKZld^+;2djFpS<s$@QqlJv~B)heE0MOqTw>{QMo=|ap%TSQq}=Ye%z<k<h7LT
zu#O*vwdM>Dja_v7^B!Y*oqbE773E0yp;X+eP(8;+>tGOUN)LN%Axxm1QvkZsggCLK
zbl9;hyRSD9q#C9~c&3<h^mMw+f?y8+>42}@2BXH2jh<di#Jwfuu&`e7_!LH%w@5#~
zrO+}CBKIlum$Q(C0S|{z@3?{c5Gf0yGtph_YOGD#{B^{a5A4u{Oh3Yn@xz9zf^wFG
zW=2vSaNLuU7q2^iZiU3}6Gd-bPhts+L*t#`SGlG<0A2#HJpOxfVl36~zzF_Z2zuP0
zX+u`EuNCBXXlZ%nH5a0;N_jtUR7~S~Nw)Co4zT8?1mK<;BZiNa-xJ*AhVC0?OthAm
z%X3zccoF=>N>R_j98~|~XZ-tvi?1OU%StLw$L}+uQO#w)It0v9m^9>;Xe5e?v8Fly
z&o`(;`z|f^hUp@|@(P7L`$_ED5{vx!R_QLX_rCYOPW-GGIZf|^@znJ%l!?{FyVkjc
zLSem}9?nbcUao_SuhBfrX0XLci43X4I~pIaT^%K>#(_xq|1>E7c+eENdjoIsM?z+m
ze)$KjoNB3u$6Nx?U$1<=c(u%tkCbS=s?;xnc$D;XTdSr`D8AyCnxua&UcE}+ov4Z?
z7Mblo`<Nd2?vTcZgDM+0lAyr36U1?gEeIkyW}I8U0d4i_nmYpxCakqY1^?^Falv|7
zX*4d=klvGY=Ih^owrf3C);u9l(*Ip|YQ2|)p6rxJ8PwlQ&<S80qO)2)KV=H&YDjZa
zZosLq{IgItFe(?<yEb>GfDr!cBI!jLQLC*e89n`eUj{w*?DrO7<~4}`)7Bn|$@pFT
z);bfyRorDLCZPH6Iir95<j~bRJZiy1^GSek!#43&aaRJkZ~GQ5@vKL*(>@K^@U$y1
zSLih@``PK$dZ`4~-%Lm-DIh6;{57D9ArLSRAy0W&AHyzX&zk3m97t<#DnVNOJxuT!
zRLSky9y##U(rCWOW>%8>pFjUUH!36^yrJx)GR~np&~SDe_F5bQ$fTbg%7&jt2}{~v
zV9q`J|8GqOVj33c#v^ziM)~gS?e)W=XaCy>{Cf)+NT5R4OXL0j*BbiIzsm%JAHh~H
zSpFY7vDVV$HBc!P%A;EgpIZLr@}FPuA3u@MLtpWn^%Xy<<CF|6<PgLdrMD6aF+l^&
zaeJE}p1RWhgFx0wjxEnf3dFJZD=(i2KiXQ^oOR@G_{rRN-oXz-qr}kNNqVFF&l{9J
z`!@7{b&?Yw`m(8rn}?z#<4+?MPfMX8oc$)u1*=Z&$^ET@X9QEPy1t-)H|*$qdZrcn
zXH~?iP!{=&1$-t7UO-E*`=y>Mal2tK`4zF?auk0t`jI@-?<!)Mr1k5;et$X*g7cmi
zr3f9A3sk=RmE5L)WTD*(%wY+;+^lx9>>Pc+<Bs)*dnHj&IUfW_QFqHAK1Gk=cBTE_
z8+yBIW}7Ko?|RRKr<#tH5!{495x$Mi-v^hoTXu}8VOnHdiM)Ex?sBJ){3;!AE8)^L
z)Ye8fC;v`3wK$hWzM8=vF%8}tn2owSD)_XOPlRf}Ygl@qdp>8t-T0N2+h$_`+-nXs
zl4~7@CpMi|6#Y#GHR?*CQIT-R+DTo9gKd`4du4-<HO&}(QMh6$`{u3mDifpj*gsAv
zQG^4RbSbX$fu8K<uSgETkDUAth%K)x6LT~x=Yf`1LA(rIm9#|k0#iYVd+kyF(5B2x
z+MinoI;?cu3Eu(1F+a}7epm=s+Kzv@(z0{PIYhv=vG1w5+hV=o>vZVX1~PB+@mmLy
z-X}>)KhKx@a*-^&J5$V?dltcqJrEHN&84M}2qP$G&VYyYR5YGevkc?oF-<~P1>uWz
ztctTXr!~@vlXyXKUv6$qyVLKER>`5{tSJDPo2I|h(32}}1+@FEd8KzaN1l!Zp}#_)
zN;_z&fs^5={^*M+p(~H28zaRYBM~?}XU-{NhbPE^><irFJ6*R;?_T{@bnY#;N@mNv
zjlLyfo!-A8Y={Vm$|%_2WVPgy8D4x>`dFpea?_J#xwFV#0DqZBaI$(r9^6erA;85T
zEicr@ELp$xZWLIpi0<AOd6mT?%e*tb-u`fkFBuw7pK21sqpj0E_Ih_Evl2WP$tz)d
z{k~G|@aZd}aRhhIc)&(S-22Y+e(0LS@3g)-mpG*-3a>)A6CR|}8z|0v4t0V|mM=<M
zXrQeUs7ItUeDZ6{J2%73gA|PNkoA{@M)zlZ>pLU5+CG+O(BC*n<IA|~`Qoj^udc{~
zy7GhgoeTCd$a_-5%A6{(*)8>ur3XFkn%=o29-3)|U<uAm6is3|!vor9y$%fnAN64E
zpXM9c8ok;hoVnbw2%>ebjbSOZ!1CC8H_Mf)O860`m~>IjE$rKf9{BkKt3s52b3r&m
zD(d?M7_TFXe~p(MEc$Fq;0LYyYR@uI2Uwa+PfvQaljEKe)k0@QB)!bvRq-H-+Wh75
zj`j1#4e^m8W7Uy5MK<R@9#;sdvu!*2#*NuXk4?YhnWOB9<LrCOSyq|0;#(DQVtetw
z%vOG6h|HL)qNnn>l6rpK1MA$mB7s!#S1ftd7+3A0)Y+E#42Q>bM0#tPS3)=S*A!%o
zdQljN9fB?HL%?4!;r|5bw$2ST<J0KFALFb;Gw4sgwp6vFMZOD%d`K}}<JOe=nHBJf
zU@-ZQ38nXef}q{NB}a;2BDM3~J&}cIvC=F7`=7&+UhT=j64VC-;+J21zK=xc*@UY$
zNDL?2IkZPG&yssb)O9EB-stsAxs+VC`%P!jkEfr$mo`+&G;;T7F@vUjsE96>?b-9(
zi2E^Mj?$^%JNZ6hA;ahvVXY-E^?f?6+aG$j-n#bjfoZbVrC+Jum=F&U^5s_K(PIqg
ze#L#kB<$<8#iR42EmGgc<i)Br8^B<Y)IA5%h_Gl&wVz})YsJ5Ps@1=1+4gG7b1R!+
ziP5n6tm{7XFoSlVH9k)~Zx&px;$)t5GVNnc+kLN^3)1J29du4Osue_xig6UiZaFgj
zo!OskzNf~jzhB-O{w@NmaQ_j5n`U|}b?dCvtLe{S#H7no{E28tMQNhgL@)TVpqVLd
z`7A<5?{2ASPEev=`Bv;~Qm&y&X&dXI^48Dum*t_NO*Ui2*6X7s=J>8H`uQq$OuC<*
zeL7Q09Y#XW>9zKHS|b<FB%UrJThJPRnWnwKG&Sn1nJF>|<Wdi;vg1@Y*CN+AqdD*~
zZdPS(o!{53eipZkpY8+IphcG3E=1f>Nf)uJ5$QaoDv*Jkd%+*=mVqs1{K#d(xU+jz
z#oUKX>jmLA`{sDWFg+!<h*`Rh1heYs)I<2#z2B*ZbWg-|={`l_{8DvZ$%Mdnr*h|6
z;keOTk?_9zaMa~~Wm&OISBX$smBYJ#Y60kSD)LT%RnO+N>u8&;@p(G0a$~phwxuDD
zCs>-6hL%{TX=Tkqad$7dg&TS*dVcm|Rm63Z*_i1Rc4O_Qj@dj8FudJ2SN^-?4{uA>
z!V^ad)!L3k{bWL#7oV$pWZa37S;M7?N|*ckl~S|$<+POAwFF1*Yt{I(<);YBMd6GD
z>F@d?uC6v3ny%i*B{9oiArF#Qt_~h=p55)D^?K(qHyAtsN+-tV5lXFV9{eJfW{I-2
z8tc_Ux6U33@!dUhs&fz%-7%(0y`862L{bU~#F)tEHC^8<yeVuqj02Z7|GJk&p?m@w
zmfU^&5ePI_+0PfqMnaF{SdomTMeo;pBKvOdl{)V&Ev@IiUQXY$(H|y@l6ai|&8yw0
z*EKDn>Ja5#c3wA@#Y1N)l4KeD^ov{Wr<wo_Z5=x%SO4IQ3Agw5r=A_1AASp(KBnIt
zFb!%>&TBuvrlHEWl^{5}uqtI<tpvMOxyLb0xmsnjG^8W#TG69Q9qCW}0^cz4w)4$}
z<)rc|cuWQzxhT#)gO?=RV0cGo{FUXSCz6s}*S!L!e`!?y8AvK*EB?h^3))v&J753(
z$+q#@wl`D62d?ifr7;qYM4~N^QuAVSk6rDRa(LkpMtt9~biUgvdiedYO<_V72*33{
zGHLKYeC;4Q44EcBH`=M7m``9UXX`p{@BR}qE9}91fixDYpYILd>S;8ZWKL0cFC>IM
zhiOd{CDtn^@i|S8j+NM{k7?mXPgkSn^Us`yb@KX38<+_xx+X@VB+V@-(^$eOxe-7m
z$C@s2^0~r^|0%w?>GP2Ujp#2>%q*+ibfr7kK3iUUkR>z(Wbx}xGdBjbM2wyr(Vf0K
zwHq{W9^yG>Q{EV;aQW5c9WR-fp^)W;CX!euX=c*Av>JYf26_55tHkoj4+bx#*d<|w
z;m&CO(b*er?2W8y;U?MjpHy9o%G~xz;4fyA15Qb4JfE0Neu{tZO?`Nh5q`=p|LxV%
ziO#S=?1ygBPTNN_Gzt=F_j*zmy(@VY@kxb~8gWIJ5}Xu<sCA2Xo9RxmL>l7Qou7Tq
z+v$-G{s>$3%)AH`re!~sC3}f(kK(*MHrJQqnYsVbIry>CMr%cX<DMZNo5qba-0e0o
z^;rd)!`1u1&AuSNevBmdcy5vAMiTAyJ$groEkm2ydK;$tq-#8p-`cY?#Eh_<;;c;L
z$P14~FYbulGRyDdMe*+n7S#vCb<b#EpHpMqs3$J{aWzu}ewVad_r$emdKH^z!~y-~
zuqWWrQG|^7s4LwXd4~*5_4(W>xti~>5tS!=+{ZsSQ1h7%SAR2)aGv}JfsxkyBZsrQ
zY=)}8hprKpIQ$OdK^B{zX)iq7_|UX@YMFJO(H1;BBuZ<@=aV^jXH)X?cz&L$R3}W8
zHqPje;;Q8PpA>t0k4x_jJnoBzc>VksER|mGw)Eo6buY<oQ@zLgMdE=ggEfu5pU;8b
z=|QdQz#C(loP|!GyWb+|9yyLWPN&_FcP@$XoV^ZzQN77}@qw_m%aMm>NN8XGb5bn2
zfMN#-MqH6Le3kg3kTs27d*wjBE~MYfjgzLaq0-c#_C3m)YR$fk^{17vX>eZNr`zPW
zS6^CvPpIDs&d)xU?F;fcCN2MBlg4c8=Cz!YVRo#i@TGajGd)YVTy)It(7bgZyKu7Y
zan0I(M@3qF{U@HVdSIyOAK^=9^e1~P?_H(J*ePcA#Oi+OYBIgtDa4PX(5-SPw;nE5
z*#FVYG#^fNNssT8Hkc``bM|~=eLVBy2B%87#Pi0<DTl64d3pzV3P=n2A3`+-rt;0m
zpZ%#~T$D4OD82*IO2`(7_k0PWHS;k(_pF0I&3g4GaAM28A@!Ef3q}o&r?5g9{2t;@
zI=;nBmnQdWf+`&kZ_Ip$o)Bt&$gQY}(y1uY_iozHS*zr4m22P)2&8-TrY>}=n~>lg
z^5c&ZK6|cGVKjbZ8LP6u-j(Cyj<VH^x&o#D$Jkkh#T6}U8ViKbxHg{PE{!`08Z3bT
zA-D&3cMl2f&{%?n;MTZ1G%k(1ySq&1oO5T+dG4KO=I8ETd-tkZt5#LLUojk)=8BpV
zBFk*pd?fO#x^y^e>24#I+>{=m%S{A)=OVe{%<0%x>BKz`;{yM*qV1J6l(lAh?X27G
z;D-_H_tipSr;yX0`PA2@%K3gaiPQ<{$`6s)socpe3T*IfHscFeedOccs{hoSdrRzq
zFV7z?l5xK>X|SPl_84zJ#%~C)iA__93O8;Vwca8y$FP!XP_pT*SVVD~&u&B`_FTPi
z8aSQ~tF{KKh4Fs{yDgdfFAn_3)MB5{|Ay)t>&^X{&1J5ML#Uqxl;isrn`784$)TsX
zJu0Wd_;)Vtu{`xOwgxS#Ju7A&q5yP~xZ>8T2yhRf!s6d64{W-h?JRj*awx#O=tf1G
z|GautvLW3YozQNn=^#I|Hj{<#kU2Lv&N3f&6M^MGq*P#c_`Y6g*7j9YM^q=epX_*E
zyebv>^^b1_&U`XBSiENZcPraW4L=0TO=6yChNGKH1nL&BSgHJthhHfyj)%2MStyOy
zK-M0%s?N48_QT0q`DD5yUBt$Gn=N{;zpiUepJe<}O(M>Z%vBN|>*?+m_@ZE6tTy|4
z#&P^px@GwNq;9T9+(Xc;22SgBR(ToX%;8nq;e93$GUUpwuzuZkYX8iBI9_Sztzk<u
zu7jSLW5ZrqP<w@*476@3b4#KuC>eoZdaG1hql4`@e59-yFURQ7!4z4yUngdK_+oot
zqQmCk&k1{wG_Dob4^xK*h4aq0*|Rpfi*rP-x-JN_Z0CtIi!39pwh)VE7weNH+tV#J
zevWG&JT2}b%4J)}ICx{#DI1;)0Z#31`UPA|9a3t0__DFI$_&b<1m@+Xa81Zs+RTGD
z=g1_vKA3dmewIqsb>kCP0%z*{`G{2DecwF$Q%!cF#7pyhwkuLRNbK$ef1>JmX_Ij8
zqa=6RF_@WyrEv8PwOzwgTpnZ_R~)S=jZT+6NBF}`Bju|E#P%1I<J-c8$yfb(B_A(C
z-j>+0*H2Xjg32+Gsl0xg)%u=VX(-arC_5evu1q-l@+6o0gx*wKttHtX+YTcLiI)zB
zII+U|ixhw}+dcMN_YsMoVu-p#dl4Q!B1fdAf71oeDEo#ZDy9Qb=gsEUSQzDraYiA|
zmJ^}T;n+~I_kl+44MRyEGC398i=43cv~5>1L>!fTV}vr_QTCI?ze{oH9JW7mwrh*P
zm$^OOxCwT*c7FyYSo~1p;M{RB@tbGGzR^=;Dj5Ij6}yFWrIM^_p;@Rf4fzQ=P;Now
zG20JlvSIg-W;7-OpZ@@!B&;i{lIs&$a@&=0<UFmV-x}f!M@)*mc1S7Y$R`2{n{m1c
zB(~8y#lH*m3HOV?QSR!;!0l9>Xp7#syzrKwR5>(FWb%oB?5)_{^6k;F3%QQ0-3~S~
zzPso_e}99ik?unlyzYHsQY+dg_SlM_&Yyqm6Zb1jZMyGK6LP8_b1=H3jZ+@8Sp8N%
z$?3-kv7K`rJPd)vkV_@FZ}gPOe{`hT#&`d|KlxgxprvA#QMpzv+^YcF?s7t-6%XFY
z8maKy@3S|jYJ5c->8<^3@RLXQ-qSme%u>XegS|AiY2xogMDa0mL`Rz~?uxB9j{X(D
zr7|)%#j3tNopSGHDv6YJtp({F7oSboJf0Brh;n6!7OsB0!fmyQE9ctxnJzliSbVGw
zPt>f1SEm(&{<!j0VkWWQt5HtX_n85|LdIuZwk?<c+BaOUd=ZPf)^<KEuaXaoda!p*
z+63XYa_vVva-Y}b9xet9af1X?@bMa%D&23ZVjDWaA)Z;IlS%;(ISGBoyU+S-L->||
zNn0=52zdpJh~75c-eFjM4oD<UVRD>Ugw4mJEM2Q=)`r{`TG1~~_@x=nS;K^E;`k2d
zQnK=uqq||nS`!CjIl{^*98fWzEAG~(GqS;98w)sCAaK+^Zbk*%U%0#J4Btush2>8b
z;f8b~jP$ANk(hr|YCY<_N?@%KO};^{%I$Zl=H+z9eBtI6u<T@G(00~Tt<v^O#5DR3
z2Z5!JXlzdS`5;0Nm0zUsLQ~lgmvQayEG{YVn9d4EP1xWQP7gHPe4$-yC;OQ$w8FRd
zIJuho3G)rVn51CQDIw`7--s#qj;DBPOGxj02KTWY)KX^Mud0{sOfk4K*SLpLB{O8-
z-i+Xs$UdCwerCO6rr>ap#Pncxc`K!2pYDk+%B?D_ezu^@TDE}@fBd4*DD2pwZfrk+
zo)+uzqT8*!pcmX^aclrHn6oB$h<sDp0lus@by{m2dp>QgCvH7_A?x;WsT%vU?`UDQ
z(H!z(nQ*uKXNf4MVYLQNcg!$+htED;yp~@qm!<@?1^MTlj(lyqV|_8)hj-F<$*5NJ
zkqWd?hRioddZ$_VeQhoV3&Cfjx-!C7$_~)3%JI^iuY=-(pWlNqm8!J`Yc~8c5j>Jg
zsXs3Ib!{=0&Z-8yt*XaTa8zA}tv*G*Lct%+N*?-}bG%3Dlq4#)b2(f(Xt9i~!t3a+
z5>Xj_V-t@Ot#w@GYjZt8uWVCfsGfyr_2F{dievbT7LZl<>|V9kncIJXr-C(p-q}@}
z+yBAOTtBlbkyy_-8q0K9K^J}-C@4*JZUjnXo~}bvUfm;9q6lD8Yn%2XR06n*HbMhJ
zylJ+6f!KL##qFysmO4mn74==uiIlY1F=`uvSU1zH(msa{+6XnKA>Lnt3NoDD<2?(k
zn|L{|_>{T0XlG|Nn2oayMkV$a8CXcISx&AnF~4rUzvjlrTh53*9{IdIa^%&M!561!
zHrx-*?@=Pvo^{r41DqUau}S$BM<VZ6O}fUjp5A`5&17`O(*}O%L+znt&Zx7=($QcY
zX!-E%a)_?>i-rVh9h)8Fu-NA(^6rb*83p?y#QngHqR#oGQ%wVb^5EW(DFFAPImNao
zxSy{K!g;rVLW!NN+`XanOlzVh5~{_s0gRyFp9^F;z%rP|eASEZpcB&PkC@P!7SKPt
zJ2^Vj8k4WyaO1pc^*KMX4z-!01v8#KI_sodAXtCIC$`X#Zl(!qL{Yr)FlT)b>Z0A}
zgk&R3`gAAW(HnheF+(Mh>eNr7JxoW(e)u!%$}@U%u+NS0J^Yn@Gc_578GCP63g6Os
zqeSHCuG+kW{jSQWTz81y9Nf4|A{ZZxi_+4!l+Ufm?Co+oWYMo&AwRxZ?I{cY=ooY~
zRlUw(VV#o-EqefG((46oXB$`H&Aa7r=duxZ*StP#uF&a7K&}w{Zsq!(D=MtM8mLUB
z27AX(a_*nQ6)v&18<k&`NDsHHaeR~gF16C5a3gq<tIH1jS;vvxw0T6vv{4QG*>x9k
z$GPJ2uLIjP-vvQ%I24O|-b;FOLH`Nn=rqhkoZ`eGICxNf9#zUb*78pHd`NRyC_m$2
zli%z<>d@H1c4C&WX2B6-qQ$t8cFv|XLsV$AC99!<ZB#`Uw43BPlK$j0M+@>zH#}H4
zSoGomaaBNPW!J@&hZAD8EF;0zW?MrUc4B7oph1gxjS3k80CL3o?-`PBjuJ~65e2$d
zoCW~}I%xuo$RRVtfz$Hmnlz|4hQb+p({T2|lv}-#WP9kI)f8S6hD{m)>EuVi#e{kl
zF;uJEZK?A5DGuF~*5_Yrk&RvBM2e}-3+J2c@w@^}Pp(149<-5-40^-^3-pGZ(t1{Z
zMqOvmCQ3m-p2j3T-~5E*LC&`2*V>fgr-lwY1srGjNAGWdKNdf^<rt{>MTsT3qaL@u
z9&+?Gx|m6Mv(-%2#kKxz^}CwaX}(kOGR}2H)_9<!&NmU$^;J6Ky>p3soLx#|>W4w=
z-4b^Jy`C2Ez0bSxM?#*gMDb+p)`gpzF^&S?!K&??gNP`;QU?w1X)6<l;Oi^`u#axl
zp-}Oo_Z;9lquiJdn-X{&t<z{83&}8b8r{#x>m7v5NY8KWW$yIY-keRzK0v}v<FoFI
z6Jc`=HO({9;Lk!X+M*;Upx=A0<&!o-!V^ON<mT_-*_-BHXNP4v_2cQj<AN2~z1U*@
z3}(rdQ`6erX8I2|ohqA-(9}n(1NX)&8)NO--2tZXoi;n&#WeVlr(L@^-RE*VU)EY=
z5EvXyD%NwcQ*X{~Th#+jlL7N`pKr5sRuULjbsf;jm#bto58cF}3z*ZV={;V_Z;fSi
zn89nAoAU*dx1E6km0Lh|#+O!qqH*C%{0IB~%LII<2y)M4Y?rT!jORAPIQTr^rg3*d
z#LXXAdyDJK!}w;oQF+%1n1nI^`&@Z)_xiDl)xxBVf8hXy=v)%d%727X*OrRC0FfzY
zTu!{j=B?jsvXJc{!gGRZ%NX}`O6PYV4|);ai?&ta>K_7M4&KJow@L<to6%a(vNDll
z(;7)}Q_p26C)70ArO|Cw5>nvxCI&G4`2LGXfKc#rZ}p@?u>`;NoQv3QAAI@-EF!>1
zo(#YB_WjK_A0|+?T7dDxLfzKr^Ziy&8J%zaskYvGu?9cSpO~N?g<+iqzj5syj(i+z
z<pnh**%onqOK7ljb|~?vY}%Y9Dg4Sg5$^(HQK_$EFyj@H2t=^K%l>4i)eH&eKTK<=
z<X&<6m$%lhjKJ68e}~s0$+km#yjur;ItHGV-U}ZY$WzDKWRLt7yoaw94;;q}d%<gi
zwmfh(xyTn~+O_a}NdM(lu9`|Y!}^SFgJbGjv6o*oYqx^6G__Dq&i2{4G<9{zwUI_E
z4ZFW5ZdIO_afeYkrC8J`-MZO&UGG~^`rXJ^!BaG!`;E0z*=UwCZXNbi2+d^SApJzK
zerxI9Bnkl4U0!IrBPDtkqi<Z<HVF+^lWOF&BD%4s423*57Atr#Z6yoAYi^p{4#Pyg
zoUo0(L|}%X4?vh{Y<%lgSu;qAML&l;esGSIB1nFK_$6@HfZHG(K8|9xg;o<}9!#^k
zMgvY%nn^CQ;ine=+O7U1egcvs+*NQ)Tqb*&wH%0!Uf$1`I@dd}W1>IfeSe6d4C&W~
zg&~bG`52JIh;gjtx11)NK99TIMt*HTvWPEh=QkjfBlN(yDVmg+cSvcFVr%U*wo6g)
z*<|K$a}XTio|SNp2%3CK3J3MmaKAiwm8&Djv59HHbgq_dWbBVok{;%`#$NNH*Hs~+
zJ}zUB<hQ4_XwYaAKg}Z(eOHbA!Io(}%lt|nJ7I#@#+N$WDf)_{0^mg%T}f`)r}5>K
zYp$Eev7Q5{^rNcncaH1!kDw$w2rluB2nzyMm~kiP-bB=$Vsg*Vs3427dsFe($GG%C
z51M^kItOx@Y#O8>dR(gA(8@hE|B1^*_4?NiIwvApCJB{L>O)vqyhrrAHIQNP?_%xJ
zfOCueh~d<@8$}+$<oMxKgj>qIrkR>qTXNWH;njNil>)N;It&%QFR^hzy{lc?m&hij
zsy<k6ktdj+iGO{@7@xHaiRbh1!SHOZayFa4+hp4FGU96t>bkdyr#AKM->+3_O89BP
zLWQh{rr!8$qgh2(-bKplvzOOvL`5{h=bz4&k;nhtW)z+&>1SiGy_iLdq4GRy*)XVB
zM%=i!^Xq#M+UBQ_aDC?+$^%=YO?}7%w4Rm=*#!Xkw~)^_TC<`k^qmF?^R=%_w=%Vl
zdsoluwQ3v_J&q5@1hfX80ZYd#?OVkdD*RTkQr>9Mn_#Uv1H*bJ1+%$6*2vdr<W@oz
z<a5`{%Dnd!+%x3fHvtun#eTLETD#4*P9HV-O}{M~!d#A*L=``dg=|H{k8qWwluR1L
zqnZuyi>IuIQoe{Plu$oL6df`|4ofM}^RCuOhnl3_Jyw5iBqlt~HH~L)JTy9LJd|8(
zNSU_Lo;ON&Ykj^#bLxHp>v{t#)3@7t65`j$M{%BU@N$}e<QFub{4fI8=oIX<`*6<l
z6|jNEHF9cUDr+RB*o9>RwlUaf&9z9Bc%yB7j{T$%y$5Ld5n}zopvhIy9rrkcJ7Y?T
zragyDenmGDpH24}VLC7;;2VJQ$GTA{xjTWwaNmmMZBc<N9eW&xY%=n#1KpX5J;!fs
zv!D`xfEiKHVB2C_+|q&EVlA?hKkKst5{?Ig4uE?$tI$U(Ozw}*$CJZaj&C*{;U_fP
zlQa??xn<9YPgqCDNp*!~xh3esX#BKSdB+3esA&FXZSwSZbp`LF;NH`m4pIqtCked4
zokTE4s7Dq-Q$`yMh<D#qOd2FXy+T|w$Ursm-~U8?_8O;jLq{zDku;A2?4slBTZyka
z;GvvuKUJ%rv?+UV>sXOxJnx&G17ThDoPt4gKIVHmzExR8SvErPv3rgj*63xmUkJcA
z`fhCUKrWB3PSU<$-)-pj@Ht6nea%7K1$Zk4Bf5i5QWJ&t{p0|B-50FBg;hTq5QvD7
z_F~wm$J^IG^`el;s%@Kltjri8v^sn}m&{F#DH$qxkgQGM#cu&en$)vJD7s-PX)BVD
zYocZbBiwC-TeB~<LFwz#Qj-#RaP8Sf+?00Ez+>B+zPgff6KPLxV4wbRG~At!c`C?G
z&S%39`VkI(-$4u(eG9~8xKmZcSS#ze@##aGjb9Fe^Qr`S)1UBmJd^*7?AGxFLn7hF
zx{2LqL*IGY;xo`L6Xs7P?p;`2he_Lze*qS?opqyi(5CGTfsK&A&Lvt*S>t0`Bn89J
zQXWjt8f7*Y-c7$%5(iU+iykr4ioz>}P%Z{cE|71U#T>+n%*kIvPRf7YS=pm1Wb;?4
z?Lhefps^HwX*FCTy|=W!g1$E!s<v6~`KC50Z<5;HFD_h0e}5EaCukb3i!F3C6JR<z
zj>(WT0x{mEfbQq$GgJMRjpb}hd8cP0ke7+<ipHIiu6#vjgd4NmU76J{NYkvK2X-et
zDL(!}67&u8_u!PQFHb9SDY7fBtO9*~LZ8tILLC(p@r_b|fXD{(4f_l(XXuK2TKt!E
z(fXJ<f^metsYgY7<Qx9asJUeqs3QTw0f13?;Q{L7!xdb6vhqE47at-y;X(i2DeB+f
zd@kEPR=(iyElb`73e*JC6rM|*sGK0-$Oc{vO|=z?$b|k}`AF>}H`g9#JnMnOiXt26
zB0;-|y~gR{Qjnf3p<quG<N1UoMT+c@vH+mfRiWN4kFVQQ99HV-G+uro5L<E~xlf*a
zwlPQTbF5DNXiNBk%dGYdc}GEFmNa8m28y8D*lfn}hKC7zFXWM0eKW8L1022ZsWG2x
zSwV>XZJfDxz6hYy?I%1K1`0;=4<^*ctlfbcgX15|i6%pY`dzl?sINl03~}&j%UEaW
zV7p=UUw_1JiK17VASl2RIgw-PB!fj2<|Hrh>%+FCHkuLx>m+PPwphwSLi-#f&o5fI
z?mMS)s)t&43aH#S<;OuW52srK=BtW&o;O+XSOWszoIIFKp_OZo@Wi5QGiDGxu}CDu
zzq=<*mL;VlxkSbiKIGV1)ZY9HV_s!^Hl396;vbeN0EKJ{;p~Zy^)KOcde_K*+bmEg
zGy~_dNzuHDLgYuzgj-|9>^Npp<LbnCK5`!seuGPAn??yl5v+KZ12{6ruidTO4Hptq
z(_KuWkk(a8=5(_90d}^5SRK-*=J#_UN7QUcFUcn`2faz{BIUs?kcU)96m%^uTnqWX
zL8juCyXD1+4KNXebbgP&`4k&)4wB%oM^8a!jYZslMqVqAWtsWeR@B$_gruF%Hj`X=
zg^EWEX$t8;Znu?4XXlP#d=vC+EGaH1QHqcpVAUqMLT*sj2r&Ad;nY5xjr;Q5l0^S5
z^{bYks}~NR<irWXy5bF9;20KnRLPM6UbqEJ_)lRS^hlw`q6WB1q{F#jMNa(O_bCme
zzaEs@j(1beHw__4#-WQdtQ@=i_lUda<0{<YWW%LW?T{6z6V?@eUDc#YhT+!^1AV*_
z6rdQ+w01wz9l$rdmN@P}AXTv20?rs9=k@8VkyTxVyQ`R#jdomcm$H7T=E=DrL@$ms
z`6xx?{M$W=P8xe;a4S`-FIMn2jC@?aURyA@H}J#}){{THyY?80W<hpV+P)Pn!O*&?
zefxH8RCU{pMLBG#TLe^2fg&Tsh!-~0GEO<!NNln5t3Eaa3dn`(w-@@pKOj>UPyB_E
z#4ud(RNY@R;||288@XAPKO3#ptn{GJ^;0Pbs_XoUQNK;!ZP$_;_|mOo4SsbEfhsi!
zIXKFC+9_(<=G(jLsCduaTl}1bBF}gqJmU0$z*gv<&4U^r2t>yCelFry&1Nx)X;Yof
z)29vqXxP8=p?9)ReY_=!k(~0p+!rLZ#Y~zmq&arYC$8Ncx9U?;%!kJd)ZZHh5FFvR
z&+(So<7vKvzqyd|Sb*q1d+a`30u2C+9NbZ=&q&-c8OnS3V`LFX5xhO9MPDkE7`Q*M
zVWG%_6=G+(yn^I`uL!pRt%pMn!13H4mde5QF6=kJzLo3~eo9uPwYqw|$rpxwTa3HN
zzKS`DaKrgpLyWT>adN~{E^QALJY}5kSwTR5DG8C5P4GtJI>q9q!U)dEcEo(;i(IM1
zkcHbmV+h~Q$P3~*U38^iYX|uGGwr^j_2!8Qr~<W3Q)kqz7Ne!+Wu1GmmlRW*dCtxJ
zA^wQ1Vo}C3#r($<S^{8vs^>gfP@ta|Zy|sk^R6!+$cJhRr}xdr9Lw*|Y`ZRn-W1EJ
z%sQE93xdnfb@seY?z44*5B;N9qgr!|BS(nl%V;C3GEPEwnxX;67j7vw?Mjy1r)<4l
zty2k;EW+u~npEafDqA9M<2SG5=lNzP@CAe(emEGNl#dcNwqkVt&L&5#G1PgO7u-Mj
zbsS#WysBNQS$Iad8uayB^Y81p7KrJHDt@Ss2(8$O8z?<X(QmF@zBRq5t3Xt<bEWQs
z$C~#dn9n2rsy@iI9u`hjz-dbj9`6;-?I|(lqrbz#Yc6yM#{8Mi-u2AEJ8akTNz{-s
z3|`use-#T>bm{MXWQZ4q)K$X{-c}1bEY4~UlW9|MAB8$J4}S93&#cqfILJ)t_a%0)
zxc|0sIYG}p07!r*IZ&8Cf&RVo^8@@sIWNXhnWpM-#OnuV1Imx2)0<v#2_f<UGX&RY
zZ(csUMt5TDRhur;8i<!%heLm?yXn8W1*<sffK3?6h@x$;SC~)8>TvtAQ(5V7J}Jut
z2>5WLuXk|OM?J4m_t8Gc{OE_I;(2`gk!FXT|4B|L3dcd`1!2#$@gTjDLF1brl^?qX
zJ(|4IoDKIZRe;3)1&GF>TAM_weS+RPp$WDER&u^QJw|=mf9|BXJ~IYwa*V%Jad^Er
z{710Y<~@2?an3a8{JUgyqDFcR-SPQ>zI02#8X(60s&++6lo0!vfsC7j%A!!dk?t9s
z;D-~`3s(4rD-b2f)D@ng{xB!gQqNe%U~0<Nm4vVz4n?g8dg-&6?n<B4mj7DKp>Yqz
z|Gh!cB5xQ1@9|T(Dtj}c(4}uURg%INAS;M1PG9=sA<KBP!VdpdyiM|ok$Bt{Pc!gv
zM<|<SA^uv}+OsW_q4*JVcV33vQ!{{R#^F~6nzhvoOH1$?(f(&tR)BV(Bgtgw+RfRo
zV@E9Auu{AG9`lDpHDN3?ESuNcvxHK8&HM!&N;eL#9`MM-eqmyHGo6YG?Ano2X@>dA
zh{x`?<^;DpICP1~>((h(*GNczp<}KpA*9#dQ-DSMNH?xV*PU!a;J?O)p&qQDoY~rL
z#@is5+A*fRr&=DO6r+Ah_u%l<e>akehqlZsUn_2?l2`$?T28QYlP7JxrM2T@BCvg!
zdF}Ee4s5+}8(EN%#b9T0EhFw~Bj-01Rmj2vu0Qe)G9-3d66vjXRv2i`CEA{B&(qPa
zx3@9frukO_7fnU5nL%jwfRFVbJDTm_HF#JS-1YrUR@t^_tE;8hzqgz^wj+#;c;NRm
z8c~UudqY2Vz2*W!k)7JGB|6fjmaLaZ`>?Nc2APvzycg|~DSHDlqeQ0ghrV@3@<v)h
z;tx1lAsr=pAm%i~m(ytMV{^fEh}WQm>v4wLAw5T@9aEz&k<(VOaTr%uNNzDf%AFfE
zNO#ec@$yUGF-hDdUr6$w5r*eB&zZ{|go1q6dr^n52eS*lNOLjvZ?8sFAFh1?u)iA>
zIl)++K%Wn!Dr<B!G$rr)$WHIeb1UO&AcJ@7Yv4!9AJo3@u2!wTfUDD9xc<BUbxZ(K
zJv&=|4wT)ryj2W&*#$I$P-S&$N(KrR%TxB_zPS^<w6N1mvB{Ez74>-`>?+eg2dcJQ
z2SkO~Gd<Q4Nga4ogBT7x`<z%jP}^twFYV3Z&A(8Muh`$lcvG=~^2r`XDz__+<F5tA
zALZ||7!~GhxEeBp8yW6;ZnZ5{sHS~GAx!(7kA#iv-ARM1hI&j_#`Buw9Bq(cE1(?(
zjy{B*!0ER+HP$=Mw@C-T-VbA48j<6laHZWyo+O={|9B0}OdVj=uy#i!(`&c7VK_WR
zezakC&8pALgX~=M<0NamT@&EW+-N{B`@G`quEebN<9zm2Jllh|GMUF$?-bfflqvCm
z^a4=(weoL@tVOHScX7GrM5KcXRCi)J;tD%nCl46jBRR|B-KXl0Ys)??S+5-?d(FjU
zO(7C{x;P6r6`g-%J}rE{qnWvDc$PAdRW%K@f+XgzSRV~SKbET88?5*)C~P^24WTUC
zBe8F%H`GSlFF<du@k)gHc))G2#$4`giZYSShs+&o*Q*c2K3z8!@dtEh%BwX>MlVo3
zmN2X<nE#{4`3>quuO(cC?9~(DxAj#XG@5)IKJo|sH%DR+1_WPF@3f&ZN-y8`>9Za+
zNVe-|JTnZ}9&-oKq7bP_Za3^I=1vND0WY~KH%8S8&Jc6z;e?BPo33h8T4AQ5b1xi`
zxELgBRLuk=n8Xdi;jJFx_Av{*2Bvu>(#AV#Z`noS%Begy!c@d3%;9Ry0Yg8#jvi#e
zFiMwi*lIAf9dT4SaDxLhTrh&8XV|Hyc0isF>ma;#TVjuKbKuf|<TJ*fyo8=st_%09
zuHsJFy8LS6ekN2=zHWRzi|3nhQ>}Vncqakcdj`~xM9;(gUqe4Nm6}b(vbQS;EP;EI
zjV;5TQgt{tY`(zOdij)ZPyr~Hp<&VYLeOA)HGxfAJ+j{Sxao@P!Y1jTwC-m~(hUJ5
zZk-jpS@Boe{2pCNMrNiLQ6rvnhRDk{9Y-AHVEPD+h3X-&H%;+tmyS5k$^H9wfQGC^
zqCQqmbzzM2pkNEd2**V%k>6M=cHs?XW$_ks?<p`ay=dM((<dpa9MAh*dbF2+qE2m>
zq;?W}iRglzSYwp;5yPFVjP1jm=4{=m)AQ+IB-q=wbT-7SN}Xe_g=>4fRHu>e<X5NL
zeSv^!vGbJ;2~4<A?7l@m)6qwzdj7y{S&6FR;>H4>#g0H*+y<}bT#vw|FiL2|%X9@<
zTAFtl0((p;O$LX$Y%qY|s~fCT3FZtXw?^^`+bne+EQT_SiVV~NIlvY_#Q43etFm@0
zO^+p#yf3v2Q+c+V9fbx42CkZpzkS3Xe6h8=(xizds&Dc=3PLPieUF*1f$3ZL=My?=
z&+|j*t#3Pn8B7BiYD=BP?Kn4Kb~R3=SO$?ziDDaXK0h4p_%EzNXYJAo2Ju_jxh_b?
zm<)OanEZni*XUnQaQVVup&+oD-bB{Elr40-cIK4QS{xEGctp*KCIFX7Xu%2QDwRt%
zZFu!48}L)&Du<x(hjzvI**t5(n@#SXFHF*?Z-E@qm{DeMVm<qNA9Y-OaG@2$D+B1<
zE!j)u5PbTmdWgl^0_wBe+n#=tJfM?!9f=5HCX#BaC0ju^#mo4nxV8|@x*x%<!8x+z
ztGnr|z@baydC5~cgyo^%0t&R7?9|~4Ryr8bs%w)R&(6nh+#J07e`m=t_odcZfV)<n
z(dxMeIKt{HIhHqRhfg_zW`2_Q=F9ijGt03E*lx_O=2WAPOWh15%jgaV;Xl&Q@cTyo
zv0k{Tf4<r5Q4-^a$3j)Je~4{3M*NSpdu<_xI>+OQ>&XQ=%lH$sS?mk&r9Qj+&!zsD
z@qaLbY`~2vH4J&H(6!~AzkrcYzSK!E(fxE!8019tYO};B)yIff?}@KjGFwuxvEzLM
zhq%89rwz+!UEH+fP!xzd=-o)5sN}PI%=$BGupYvee;I{Z7g}1!INfk+M!P0uIhw_w
z#0T8oBB-}81uGgXAS(dy{TLfAmGbY$-~aIRn+HG;2-m-kOXYCI-Bv{ZY=fcG-XcIn
zJIr2LJVBq`AZ#8SPqjfc&eSsYoR|_&RPMv<4sdq<9afww4zNa3esSd#(&>noRLcAH
zk-+*8S9CyY{aGSiBO1X0FML?jANm`J)e~?f@&80m0s_smH8R;A($BUL9qL44%B8L2
zW|6qF)0xkb9x~v<DW+LcCiWUF7@RU#f9fC7e)|nt4!=|XEtW;ZDT|PbOEWmTfFXu5
ze(d(5vc(R~{zczb3P1ryFffPs`7vU#sh=Uin*axk9gP9G9`)dbFY;fdsQ*rG{rit_
z9_!Y1xX)3R#vf2Qjd*7ElxYCs#LN9IV19%e)P@l*H%o<`L>E<bp8xLGSxd|Lhm3HR
z(S~Q_#x5X@&*LX*<(IK^5@zM(*j*0mG99Dv#qyEBC?8e&DJTbu<PHtAYtvoH@8Tl+
zzmh?4<P8&fx=wF=<+`BNz>;7}j&fgl)68J(?B;$L<eO!kR2;EU6<vy6X`NcRf`2|p
zrKY>>^uEr$d*M}kjb(}%igFpwPsB}<zFH@lh}h%dzP8h4e<&bEiwETIjUuAn!eGD$
z$C3SOcq+Yd9#ylA6p7_xHkVD>zT`QAsE0{zGb7T8*S-nDiRIry21F-s40kKx{mo1d
zetLjD$uACbdM`b<vWuID0%Y$yF?+~UHEF8t|C`PIZUGQX&Ah2O0}=Vz>`hz7{F8C&
zX7T-@)#AK~1aet5_bBP6DkAD0(7%DDw?SBS#0uW(@+BnpW56e_N$1P;2oCla(@lR+
ztTzz-y#eT}r(;CX>hgH~QhPd?u-{znQzhqzm%~7<?{a3)3*@zQAc{$-qEKvIkR)!5
z2ya^>_3ww&4<E#T{}S1>Q>pht(qfLP&`v}E5WLkFcyVu<|6RaL$|Olkn%|j~dB3&X
zSyV;7dqw7i`7`D#c@uPjdf!Q;{a5|voHn~;`mY@IQiRn1rSA71MpO!9WZD1v-9*kT
zNAW?LNcv-;CEx*K-GECF2-P`@#0_Sbmevj{ohW#V>T70iRs%7oDt~myH;YWWqFyZP
z0w|`6k3Cd6|FM7-|FBrrS*jSx^5)w1p}`s)X1nGt;j^OZiz&nq1Br9Q8bTvgH6W52
z$g2Gv{s5yb?IgoC0qUqEGzmBApw?ZfbWj1(ZaZ?DHBKm*2cc(DU{W^DhEc=}USDyx
zHS*Berwhj47^Nu8?IeuBEu-I<gLzxISamBCX4O|-Q&)YmMXZm}Ac#8@=6A$$Msj59
zb8U%{`X8ORjRE<G+IfvdWqhG}m9k|yrKTz>5l0r8wxdOr=aABm(A!I0s6O8yt9El2
zk5ze|#dvl{E}IH~LyK$q(`R3cpbEXw%UqVUF4G9){*xY=S)2V9Pg|SZobls+RU^1m
zie=TI;<b&*XJJ2XpVN&Lr+lS}48`OU&llV#y8|JF>7WbKTU5w<BGgyyf;994<7RID
zLH>Z^B!6e_q;E9_oH2UjJ;lJuE~zccD^$lCY%2qi{F&a?jDDv#GZf@6wvZSs<W_vL
zIb!lMdJq#=02_@Q-V%gQ=qB`izx}7s)nMhpy8Kl(NTmq7>91|V@y+)K+se}4^~|^N
zu27a!3iNxR49wF-3rJh0{7)N-#AiUlsj3bfNTuD(RxU_W+UHgJrx!0=z@Rs&b}5Ho
zH{Kw(n4g6Y{qxJBmh>YORAeGYV_W_bq23qg^Z6SEXYUAjEmQQ7n5O=T#i1Bve@F*)
z76u8Bl%nBF(2u0KSBoQ=2ii!cIGIyPWeIr81fB;#lM@Os98g3txo4%Fe>-x|<s$(T
zo)DW}>LG*tS>Mkfep}yqTTXGqF{%ZM!HI)^_qTIn>X;S*Tsi%iU4PTIxWt3LGmX}O
zF^OSJ2KXO=`U<D9Oa9zXqUH3Bk4ln${JFGQy)#l!8<PZ4m=Av6c!=+YDEA@u5khZZ
zt-ru9N>Llu3cwx-r+4G<|K|iS`}<WBhVtY%+@p6f7(FI!8cCnNVD>b~9rJ|)-uzJ8
zQZ~qHuF5p7umkiZ_E9$h{<OX_!O<}rdBDgWP(eAsFu~EM6G1&&BsTj<=pgki$Qa+A
z=Qr|rzvG)w#9bCww5VKcwd5UqhA|xbH#lK`SY=a~*AnUp`>csuqkv<#G&9)bsHIB&
zN`kQs`Y(=<983S|F#Ly$|F@?PqX4+Xi;Udd7_u7o4vRg@E~-n$*|O{kMmEb=G!xHt
z6l&yg*VA8WXZ<UcRIxyOZV5lWnBxO$WRuyN)D@N!vX}3gCJTa*e&`S}BX7@+|1(aP
z_q~PefBVBxS2##V;>aAMMf1N2>SiJXJA1CWmwg97hm;vQMP1kC5qs*dSgtST19n%0
zV@mWJRuOpRJu*Zktwu4e(UTAz(tNRVv*391e|!7?JOMv2kucJ3E563%qcVPFLqEW5
zot7f;+ChEFre9WH072CbxsvTgQXjXOYwS9m0jWEhBI3J%MM3<0_D{m!p>e=X5`5om
zC=%X3vw|zB0r0N;FT@3|&o_8KO*09<{)N^5{T}phPj8K~W#l|RMZ#SuxYyrX+mz3M
ze5{6*cC$AdGX}_;6kIqsUZ`zK%>hU#4>pEFZehCapFBGuYL~a>66xI4=YyCLT3;qo
z`t!W8f4IDNJ1yf%PoFA0{s_A|5hGkOy(;@F8j#D0b3u-tCf-dB3%g~8)dWnjzCOh5
zuTo9mL9G$|`oy@O-|!DY>tAo#*8}HID!Fn`2PBSwGtqCvFFsmZo%Qj5F-c0KGu*3m
z-qQf~VIWKQ@cdf2@_n@(=A~&r`_-6FVTek~?oa7}V|q9?zst+CWhJ0texV)IWoI9f
zw)tLRSJt%WqF0`#3iF&9{*vv2@kxqkd&O0d^SS-%leebSJA!yTt@?Ur>ZST#MV>7K
z?)vDKPNNU%%_?6X|4$s3T@D<j>I+#Ev(&vbk!XR9&l*o{V%oR2s-1-tvPjoZG9xE`
z5753n6-xnUvUXm(=T(Z38V8->^7!%8yf3>nnR|X}KRvFIXTCK9|NVddFBalqEMY#p
z5$VbH87Mc{_SvXw%+Yy{V3TIf{)RmT9bQX9S4-9{JCg1COCPpG<G+4&`Ef6*V~NwH
z^e=>X<P(^R-`P2-t>t36p2MRc4ozx}abFG6QL=~f^lYJrm}n+IOsrZN2Zw8p_CCRM
zoc3#*2FB?VyOmoHKWLp>G)m1oO5R|`@y~rJbp59d1;7UvEjB%@4Q>@=<qOAsgk!2=
zx9-2{ACrWkFSd2QQj%TOX|;IQ8-FQi%Tq{7Gtczw(Q!YL-YV?P;`>3&;rbp9?m0mZ
zxH<P4z5{0mVtDF*7bV2o#kPQUC*-#hiS_$ynbaY0$o<o7{?vDMBh=EKD~`6R75Z{a
z`?!3RC4?4K7k^emKB|9uU;`B{vH;GP>@>pUj$}rcFgu$%QZy1!Lm5}WffaPYo>x=Z
zX7;9UTC;MKgI2?GJq6cm+Af_}O>RrtNC9&7BX?AUsYKaj2){e?M+vKqs@B290z#rl
z5JaIGlO%YV+9RhV(efX{5ZRs}`whc)P~}5mFi1so-uv=4nXnIs0XmBRWcYM_@T^$7
z*pM0@n)&tV;amljeNA0u|I@ogpz7haH*2e}mCE$VfW<?zOv@%p#90m+U^ue-KfseX
zw`)}#Mp1RtLNZODj)O{L&8p`%<e|gbC0wb*t)VsqmZra6E)l?W!f?eIYP~Vw2ItQE
zYH@vEocnt#){B0{&<ONuUo>EnOXPIP+nZj0NwR|ZtaNiq7A{pNj2EPCu|zW3634&y
zj;T(lM&IR+6=Mbs+IwaN@FyZ+r%{s^>cQ}DAHw~wZ!c2>^v&-W%SM~}yK7Fg_%y!C
zA0(T@7r&kK{%{wNdOxpIrB;vutzaNG=9@K%e`S$@w4roWyW~FTeYpuU+tzjEL!)wy
zv6wE_3dyydQ19D$O$}dqrhi}h^V!@HXmM&46jzoa*3QZC%ahuOm?_}0_3pbB`Jm5s
z$*S4PO2Aefew0#=+hUDXvE72}ImOnnm#s>u@Zo~{qtaG+E0xi($Tsw|bF!_j;-lxK
z;;A|;^`5GQBemP6!332`7*0Ye4rqkt_Dk#LrBhcLb$_RxHTFOQkQ#_(PO|W+y-(L*
z9^=dgng3c<uVF@(1jZa20bX!hZHctGL5H|UNm&t|5qkspB#zpYqkw_Nc=_xK1-7D`
zAB+@5N$t%cy87Qo90wBS7dfJXQJo<`E2tjL#7KE%@}@k$5WnO#r8R#o3sbJo-m-TD
zmuR_~?;@8iP6>e!1LgfY`bv?@M8)s4!z#iKrIRM%t;*p}4CWm123De@i<^S#Klh?Z
z)W8qAvcM|Q*q@j3&KhY>i)$xvB(55_dKi-Bz(3I7IhipwtQgx75Vr5Dn%JGO{FMAD
zp)yEMAND@$D%kT;uJC=&JNI+P<yY1W5AK2-@eu7MQ~_Bd7*p%5=ovxUqP%pfn@G)%
zZqu1<C=Gk*rk)3Z_M;9l_r#{)G=3}7oapAIqi+JRs$GxB!|K$!mgRc9<X~0ux*4K1
zddB1Dfm+!dS!#FPnAJ}WHCtC3-<f~xcsrFGipkG~Nyf`lV=4S>BD9zCdXpSPM6=fP
zKw$orUaQfy#KW#j&S`Blet=H+t}aKZ|31%b^Sy`u49VX%yi*4pI7SI4d}Dt!ya&d&
zKJ`RA8#LWo&FUvvY(n5IUR?S<r-`X2t0+|mDnRxH#{KCMV~xo-RhCnGDaO)CKhM{T
z3*pm!itTu3a@%D#8;FIFhs|wwx_`fgZ)hj|ZEw#+H$ZllkbgLfF&ZifkHr^Q3OHOS
zBrnl-JIoxji5z?u6u+Fdwbs(}Ks`(1f7JK#i0pt53G*aPTqTVcFFEFC?&bv00T>Vk
z*9dr<iH|-Tdhvs?0O+x$*2DB@y{vxvNBcds3b8Y`ajkGlZ<+_2f)*BFaoZ?tCiNqp
zY1DFCt<zm^@(xRK3kl_2*M3XFA!U!GecgqL4S&z}&{cxgss+52xO4a2j0Yz_w-#&d
z$3iX)7?~5EX$$xd9vCaM!0Vo*O4l+SY<xz{%Aj`_q^`*z_R?t)JL7ELWrBTs)E2oJ
z62Frz=CZr2`E&iv-2&2_2l70dg6bWjVe}ZRtb9OeF|LJDsD)ZrEj))xg*Jk{<A-Ta
zy0qS|zI-fSezb^U+tCWg_+UNr@L{hc*QJ9qJ8hjnV>DOLPOZ;wv`UU2%NdOJxpd?H
z`$J2Y!n~)2_#jCkh#cOF!X`SMbmwaJyu9;3M$TH+D<Noz@v;#aP&+PYlT%zoyr>NE
zKzH(5RGHeXea_p9HQtS{4mft={Mk8iyHfD11xYqyxi6?%tQRo#Pddq^9f<K#a4S8p
zio-p?+5W?URx!cMA?NqXF-q=M^1N}g|4&g3=9khuhQ(=>MmgOFohr}_^$2?DUxU8z
z)RlPVof)r*=JFdcD1)ZiEsUi2<tiA5+^@py^jf#kV8u16^gbmlU^0OtOf)NZB5Msx
zs9kVBD1AY}I~>M$uKvwzU_b&=J9c^Kdkq2xA6R?8C-rI6|NKF|w@pLa#+1)`<^%he
z7Wt}cF|X8SuOkYK=k&Y^*EtJW8Z3;)$r5dt^Rf>=DE4^fVi(q1r(_O`9rXE&b*fCd
zP&{4PJ3aF9lcX+AHhDNXZJKI=mwY@^tx8+xCjY1pLNj^i7n~o5w#?1KU6%5ym~t>j
zMEAX&dKYWKzQ}hye;l?7%~LJZ3b-0=(FYeZ2r+~&j>FQ7?f(=KP$G+*|Ccn2RGS1Y
zRy^?-i#WINwc!<dZU0sTU~v9I{4^&%nR~tCiM1ozXyck1oyoW@b^VFx<uhmOr~D6x
z(6&qGvx5QSNx>o#6d5~=Gi&9v-umCIbjV#Ao*WHyDurAOG3Fw7OL-4TjB}v|4y3y1
znd`DutTo9W?|>je3YKzH@q*yyiVS;SJMi$1)p~-=*{^lkfwGm9*CUw#Zw!X4PNiBg
zz^^JBEGoO~3kqL<H^r%Nv9wMe1xz<5oiA<a?uXbKJ{dRucCg1$Ccp1THalUWO>pC0
z$w-II9yCs;EPj3F^RfRuMr^v`f_n2Cn&N}fwal_R(Jz4NvO_F8!4dkN)Hxl+XnX4W
zjdhpFw5st_$Wi!Ch7_fsKKRK(5kKl#zAhaf|93Pila?f&PW77><i}&f0RcbKshiG}
zp}U?|bdA^1V1zQ8ACaZ=DgzvtKlSJLKN>+#XHJ+5CsvixRsR9Ny?L<jh{rPX5}S>#
zEY))<nA<v?xpY}Bt>uw;uG+7-!uJ`!7ToEjEPEWOREI3|>ix`e-U)xVOnFr9M_?#^
zt>OSp@Z1@cJRtB2yeNo6*#4G4!Q)qmmYFM}5SV1O+h2V-$Da2(3uFYIw-Pk<@=-qX
z*7e7>4KV{%umthYF6IN*;y>!$Dw&z|DlAdMjoTmdqQ>aEcQm)G1IN)^vU%gwf>(XC
zzS50L-f1H8%q4x^(mYffF>Q_)d3H$#R(tQ!*wL+s`s2jfdvE@cNUcGs*Prz@BNmZD
z?uf`kTj$ncEuR=<B#VE!dd|#>MZW%UTHDTM3zuV!Pgn4TpoqO1yw6rj3l@7E$z8kb
zSm1GZs5wNV0s!$Zy5=we#itKsgu{(xOr}Swpy&(~b}}u`rS<zu!qCO*Wz(nYai^_7
zCk`HNW0{RH1}d&?edm^4@$4**(<x%3550;JX@L*KcC&#zN|FB*u{z`COsYM;br3XQ
zQy9bKF@vbbcV&;?c2_v~yEBT50m0ny`W=R^DT#cuhsoO<L?92(bl!fy1(An0etwJW
zsAyXCb76`?XFnjW8nIhQN$#n!P4fJzFdH3rnBofoa--j8=Cw%!^qLeK%z={H)OQo;
z;<;h*u0e4YU;j%B0OyonFn2cjhClpRD&1S(;<IJiFZU%)wN7|Rc}-7eMA5e{Az&q?
z4QAx3oHy=;>xXz>&}$s{$l<KqbQPv(O*=QME)LC=UHcjYx#%Qrf=t|>&Kh{d*~t!r
zZ{o%7zAjr-cT2j@v}~7CJjN$GffO7bFdzCiv_mK0$tUsA*M5pabjpMsl(}{pL3+E-
zT!OSM2^;pW_J$c}Nt_qtyU@dTCY3Pu?#ZJ&WDSd4jvVjqiOX#E^By}6V5;u~7NUmM
z(_#FbtCt!j_<Tx?eEBNJH>?F@#RB(y@E~K#rgm`u=`%LWN5M_*aYQwF;Z79oac({D
znJ!ZCz#czEd*H_l-Zc;cGrX5}9G_!2wYS80*7Fc<#R+E*R|VPIa-$D=9&rudQP}uP
z>F)hF4g-}8b*d(SiKkzV!eqr^A<jLwnAaV4mO#C;J5ljm=$mxTg|K?{lp~*rR2suK
z;oCzD9%3oOyxGBrphwlrfOV#f=X(o~xe{rb3hF}B>Cu&vD;!N$*Z;ezYEQaM)!r{c
zqzv=pfuLxShu=&sr*pXha9In!Ebk{clUyy&ZM_;80$sBA?X3W=;{CN<RR=7$h3Tmh
zc{ry}&Z4R7n&|OR*sEAnqD?nc_%SC`aHBjZzE{-iTJcC*n8I!6>mEGn@V)W<v)fb!
zsQUeh9@V|)BV1tgZyn$z7?Hhn{-;wn$~?BB7qA<cWLr_7ut?QG;*?L72Z(w~7Ky+f
zJl3&O?M-RCHGB{f?^?DW2nTKGCiwOuczEj-tNn^JJZn&j=3lR}<qPe(M$IuNFpXS`
zSzg@C3+AT_KbFx=M_qw&tu5kf$3+3XY@J)WPJ3W}2eD#T7#Z=5_d(bVaav!)xWx~<
zJe;ztU&CeZQYsP#5HNk#r0;r`OUgMR0=h@CBT~afa6YZn&Cl1W*q9HnNvX2D&`W7P
zZx>)=H$x7BF#0uqgxK^we#viNd?7v|W^f58m?Ls(&e7F(`yOA;xKEmQG<i4bS4#t(
za1qaaO1AtiaO;y1^l?_Ly)D825=x(kO#v0`xkn>3<*AZEUo=$mma*iPobeGX6c~zH
zej@mrC0OQAksMTdXPmwA?fz?zRXr6YJPJj^P3VRCj$m`8T21S(r!3C#4>>uB8-L@=
zSv^<fP`{Wk)4Q;mTi{&Tigc^Bt87$C2B)dBb}Q+!Y&l|uSC-znwt}`3nJ(&|Ma<`_
zZEQRbYRm<zv~6eOBz=8P8Khd6)XFG-5DLq|RZZfS(;dLWl1dwrf8q3)-+MyFcYujH
zdHjhyZQ|PeuJQheg5v^jyEHx<B5?Z#Qx!e?OH@n3U_#5kYQBuf>Jf;jG4yA06WMOo
z3u8?&MSEOryg^i@F79gOApo;Rwq(SL9zX=e`=@5^O+y4v*1OF{JYa~02p#x55^MV1
z`gL`?EIu*n44-is*)c4_YJhZYA-L2hIQl3v>9dtctTlr8_;%dsewTw|P-Ic+M=oJt
zOl6QwE6yY{-3-jobd^?Z^8vn@2rYKq+Az%dFr10SmIBsH`kU}4y1g*BF%Qgk$N(zo
z?7J@t6wn_HMVO-C8779AX3r9?>qLx*n>M@o(8aQ4<6CY4k%T=grmJ1&VVT6`2D1s4
zQ^UP0&qeH_XThm>zEkg#h;d2=vO~j1pfg(}_5h`ddk7syy=Gr=>bT^ook~|_=wh<*
zd*mmlJr&83*zDWKGlJ>`|4%|@(GAgN4N3Q#WBV@c6>I9Wqq`5~e|mVW-Z@-(^*QDG
z1v>NU<R&t0{1ls4g}kSP3-$Ii&hyVM<`2G8H(q|S2gSGlLq=oy$ZZ}e1bj4a*8+O>
z-N8NI`c{3cOc$9v%eCxx?E}{vIKN+8R)S9?H<pA0T<79^2M&Mln{krQBTF7_@*jC@
zny>^aH^^zX$)#}7Wj_%RB5#p@_{deDvMEJC^NJDGz5V3wR|@h=yrVw(K-&IqZ{UNO
z?3dN5_y-S`LT6_e=bIY~`~Zs+y1KfL$EnSWrMr|qw+|=S7xndhAs_B6Yt<QiYI3wS
zo8hq^T$iaLV*#e`%QlgpFSq(OB-eFu&YkoqA~!?1N<HiCsWd{X3&}s{pS@8+05ya2
z+fNb9+szASbOC;EmXE$O4ALwrbNgny+Bf&gP|!!7tZV3-m8w)GOPI_H2BD*ucv%Pa
z&DzQS>0^;F1DRB$P}{dn#P^=$xHIQEkIx^b2k52*#n*p{au+m-n(TQ+mTSJkiFDy6
zE1W|7N;LaOY;vk4j3S$5o=ZMz^b&`!iTV{%#DwLi61;3r!pTL_Y?0hII?<TbNaQSt
zW%qzQ=7~2nhq$Y0!T}hM-(<~#RpMxPAh9i_r=Kd@aXhzW{tU+M6>)g|Diod^d6d)j
zYq<lmj=)4o6ub=Vgse%!y>^vzL4xKnZWIIhRaY3>xQ`K#JqTDC;-DYu6&<X-%v4}M
zUo*%_Q2$jwCqQg4!x3=l^qPS3lpSon{`4{r+y2P9jW=cn_gGu1nXP?Kh@#5CQ!BFu
z1xfVOwuW{l{s0Y|oTuw>K+DBkea=?@a1OtuCG_0+`Oam3QRUicpRSPxs3qtl3tg;T
zHf|b$Q!XOPK-vU0O${^UtW7mb>~v5hc!vii6HvosuI_A6mO0+CE`7wh*uoO`da`BC
zW?o+Za7S5Au8r8};qn#U)^I9=is)6f$SoA#JsJA>V?k0V+OZd-oDxwlOMv&OCCTBN
zzf=DXl(Rug%?QU`tlr)3c;fV$c}8B|59Q>%_`}~fR#ebX51vdEztMkR!1@VxAaBpI
z*EOv~s<0Jx1&+UdF*EalWwJBAzYx*#ZEGnHBe_J^mkSJ&tZFmrNEpVB6;ed@vIxDT
z%(+=zWyIr5!_Vwi56U6|VXp9tOB|Lh61Qe7N)m=MR&saul^g3(LXA2N_U^rs=gc8Y
zj*zR1hUFj(dpl00ui6<!{4e5vID6#w^1Xt&(AbN_nHc|KB>~G_PtED^xM<Jsl6P$T
zHtG6RK+yOfc92$*1KpHYJROA<Pu;5NNHn`oOA=iON==LI{C9E63ha+#%lE?CnTlPJ
zk(@u13|jF3q8fq)ESJ$JennW;;2dMmLku2ji#S^p_O~VB;*J8w|BJY{42vt<wm<{H
zHCS*bKyVKZf#6UC2o~JkT?#@1!QI^*3U`;_7Tn$43+Gk3`}FN|-pRf1-}~4<iv1Ov
zwbzt2=9qJ>X}my-BpxeFE8n~Tj9LPv)EIa+u1LRRw%eR|OcSpBIkzV2zco#06TPGD
zs*=QMV?KC(=NjK1B^C@EV0r>ZE`r-z#1pl6inF9+=>tiAA^`5pXV89`WD}Z78wzP7
zc|ifMUhCtTg{6$DKmh}6B|H>_eD3A`DGia5rF2Fx!t1d<j)#v{U(pxy&qU7-0Jf8|
zI1PkG$SID<SgOzd?xVU<?o?hG+96i0amnr<RhDG^F*o4L1g-5}@RFjcpjY;O>o<%E
zmw$Ep3RvpbO0dE<_DoV>qgBk0;U@q7iKjGMXjdVhKW<94e+;9Fwuwb5uoEJt-TZPb
zL~4`!^<1ozPW*{Bn;vJE-E1!%H+TuCK9tZHVEy#!MhpkvovlEIndowu>j%Ru*4kZ(
z4<}A6489}tkFuwOZwP>#gu!{%Rj3x0U<aXPP@CGq#LHp<C9M<cMr8h_5&T&vfe~*x
z%sHDaqCJbbBEFBjlLv^`{!ah0J=E?oGknXSIg?tYFk_t~FQ$a^IZm`Ii0P%fi3Jt%
z_jGPlwWo*K_70_MZ=0a{d`x$(yQgZ>;;~@9b!>};BgxtLLs0#5kcPEe-z{un2yGg*
znc6Djt%{4&G?b;|=!}{ta^8y?bTlitT<9c+)&qLDG@Z9B-w}P!n1C1<+h<^@a#P&l
zN}c?ZTH{dJg(alXxc%lSMxjc}XKIJ5CHCo$MW3X29Csr)gZH@K2=B<IprH&ytgKHR
zhK-2!U6%su3n3=|4supR?PO%$4Pt`KOAZ`*q*H?nj7cdGE%2r?Ew5*PXMYuv_p5;;
zpKKa~K~^1o@-0Xs=pO8n-3~*f?M1_GLvC$d5ADq|7M#qb{?#)ce%7s~^*5*GnS6%E
zU~Ic10bQgXvJo^{#cW4`?iY1GshDCY>y4k#!M)(C54>w63OlJx$I}+O+^yEyd2ErD
z%myKY^6kFz<rZjMlYWfF6jRPD0)+|$3z9lBtCfhF(gr-582JVee?_-py07x+BT_#4
z=-TwO-a#^ZiUG<$=?UbIJF7!b5*~;52h+&{-38aZ>!3sP<8pcHnY&sfI!jMtl<JS*
z4EA#P<7Nt!!j>OVt{ZeCdyUdtM2f!MQ^t_2CzZh7R)<m?98CsKCd`=yvqipH-3)YT
zlCNBdt-2172Wyz=3Rwv*ogh*XrlfM)iC8zRaqTVER4sbV96>RXoNx1u*J^2)d91-&
zH{6pp%afcu`#!Kdc^Duw0#C*uy}FseLauEZ;#kgG{B&YBREw?ioHPVLIa%2lpR#5u
z^rh!=Pr+-Yx|cn!;Awc$L8BHX948dB1Wtq3wZ0ZMIZnu3@$e{7_=W6SMHXdj=a+3N
zJ2g=>q`-k$I=R#$lrxzxBZ)V}gr6aONF!b%I(8so%JN<@1xvk=6<!KpLPN++zR`0h
z+*|<!Wf)Lr15+JGUN$9>r^QF&21?d(b!qyBw38C0>ZY42Bl!=dVMmJX$Gm^qfgJcY
z#n<-PNIxOWgIb}PME@%>u7sH8OVOWYR77`A8Gr!fHCLj&!uklyQu_)Eexh$Z3yf!K
zO`wBxoC1>L8RK%w!4$~mE3R2AFqpRo_QJ5oK7}_R&YXAh)sNPFtm&WYrcKFDg5Zs#
zyI^>acDgq>^rq$Rpm#5x(ZlYi>wBd2h`g_%_&=xr6il)=;%P|h{3THIL1Mcg;U+I0
zUdDhk&i#Pww$}B}=$F8cqz;V7W!BBxd<{Y(hcfgnO!vkmMKucVvJ8x~1%f_&`7%MX
zd?eU!t;z(gg;>;Srt^(%<W+!*4#Y;6F!~4@dRM8`FPkhDDy%f>`fadi3@l2(p*<^}
zJE&$3Pi}0qw>XJ>9w4Hw{Y28XX9Yk?yK!aQ*!@2JR-s8cww4>KV;{?0g@M1ngShIv
z<x-<^qw}G4$*x+fWy57_W8G!L)92vVtSXzckrxHnol+{7QvgEqN-b(~$I(|U^*cYJ
zXl%3_j`s;S7tNJ(KkbcI+TTiNw;<ur<FR!K5mCR4hL`#gg!4u|4Tk57i@Cu0Y?tC?
zN*2j5&RfN9+v=-B=fueZb*+Jzx0Ek+fc>J>WBrvvafD$Mu?qf+ao$g{VYt-z$^=~I
zw4@=9S|&b@<wGP;L;kFP_U$x$<0I%A52pe%^gcPX<cQ+gk#2wm@$drv7KVQ*oHY09
zlq%yyPN#IBdA!5X<wp@twE))C6T1JS(}?!b4Y%^n(93??2Qn4-Av{vH{qhF91vOwG
z=`}GKxEei?VKm5vdQB7EM;ES11$qj#t|Y_aJ_|>x=jHx)E?~UkB=r1T+P-^ai?&16
z1E=qO4bDJghn;JoP@?3vKYMLQ5A*oU6;pFCJYdP&?qnGm4&8TvJPC*@;ZpF3Kpk$^
zf7w84GowRwi0J$XyWwlE^%l(fgti=Z`FtL<yWzdkRl5-&Ai_OUiN%+yEx)!*xi;-G
z@OIHz0B&$ws@yIRaumo!jI2I@$5_#@U)Y-+$Yh9Z(*ay{!SeFdk?s~4$79oA-2Y1b
z?F==COl=8K&r<?*5~TipYUT&l=|`@+huTD3hOnLlSc~YbH4FssU%msm?46H=|H4fB
z{AiqbcA016GsB9jBr5*v2ZOV%j+bKLy6=0PufHZ{ofw~^fd$4?j~jOY>P^OsD)Liy
zZNAHeDJ}whJL*4G`K%IZ=Z*9S7DAiAyVULv7hoa#n^V0)jWW=tUsn&xQlkrw0GIVF
z)sLWb18%MA5(7mErEcQ3`(?g!@OIl)UnHd>`UyJH8xE`K4|$_fh`C5c#F%|M<s`*_
z_HKnC`;rfI3f-f7VCe?vrIF)@Aw3jWhb^86$b?iKCA<(sFRWNLYCKY;)7A_V8oX1z
zdUs-PN&g8)NK?XNM`Pp>l8m;*pF;P>er-U}C~tznaEd4-+w0-PyY?Qrz5d1g{F+1!
z==C`oR^$qL_>vC7J=m$z(qo%3VQS}Whzfc}lq?0RELIq&i=p`IovW$ffk+P?bN9>$
zee?&fQ)~R&JA|m#o$Yug_39#eXahpq2MRYW6IrI#pwa5Oc5^&D(6w<vx*RR|b`E>E
z^?NoU7p@d`;n&el&gI85m*dSpR(*sytkb)T-qEOM#POG`aGbAng4+Tg8H`B!S<1E|
z4jtVE{C=Tr9Uq=G?7&~T@g&u57dXagzCsY3gB1o4#2x<-w|0>ay<htS+}H3LYm<`O
z_!PLO<Y^;<o#a3*DlgnRUF(_iIPK)_tu{|vP%^fX7{||^_~j(OY>jWdBbA)nq`ESi
zm7UOtNFCZaO?+qK(IgRGqhHrQ-yDJvAPRD*Y{aabYmU?Qb-&qcvOaRfzN#IR@|Lui
z`y&LlL*6Y(BjL6x*O>9Kh=@N$<7jbJBqrc3l;^14pB;V!SfCy;ZuuR#{IfTt_>b@D
zy{gyAKON3|4b*<f@Os&QZZ^9wb5Q~n{ZVMW*MwPCx@G#MMw4ti@)&EO`ldd&(I+&g
z-mC|=hc8+!P{qi1>n=3oTjUO~VNeCng6mYp0Cr+omv_xJaMuDpch6#r>j{0Yf|qW`
zoaJK#!o|i^^$|l#^Eso%Msl*A3&5C%V>lT1avnpjje|bf^cHEN%Vox1DnE8-xT>{2
zs%N*+lpOW&Va*=jxR+l~@DU$=K8p*V<WcbhBJXU77AAOhJZ1u#mg?hQ;om2w3g4n#
zXa{^s#@l%FwjN}M8j?&M6>@MdYDI|;m<-mWHx|!k5Snj&t7w(vKE~QMLoz@VZdHC3
zeCgDpVfcyX6p98|yRYIo`#23R?<?Q!{0a60(|d0n3Gbea=FmA83!>px-YL|kvq-Wd
zJGsTBxp0I2dH5V+;BoKy-9_aJcc`yG2S`Zg=<`LC?aH@%bi8lqk=|$^nuo*-;?9P+
z)Y-jDcO)<gVqL2eV6FDRV-k7?jxb(QGuwypRA>K7_Qt5+r%3!8kDIeko;#i2pa>OD
z1z=}%a6QxAk<M(mRW{gb3hQe6!1pd<M&Hh^rcDIURdB8No|^oe0UIZe;f1cYh&tVH
zSMB#h)koNo`@8vGHpfA2T{EjT$gOGa--UtBEXHXAZm(JY(yc_h`#*H6FjO9#OobQy
z0x?3^6SE!62M6im4gvNESdu5cR*k0yXJ!u>p0(u`2RXwxJ`LDAd}kdCc=G2?M?rzc
zC(dVGV4TgPe&%x*I+f*BExdcy_g7hB$B4VBA*AKe`y#lGAaQp_$Ln@O=lc<gG7+u`
zbk$ND(K>gh@9VwH+c{g9vn?<0-mR&jHHBw{y8OKM<}%_=`jkJMp^==~ly;KLdO+R9
z0ePAOa3ur~U~Jajy#ihB{p#_NT`Z^8Wy5Zy`|N6UN&cWA)#3*oX(0!7#<uW`T1c;4
zLd=~Y27QC=5yQ`yd&;m{C$x7oQ@w*7T@_FU4+QgvVHOY0)fI$yGtQlKOtUOlR-+x;
z&y2bY+qUzk%J2GWc3drwLP+n3Dm;jzjc8`-m(efr?`~0RdH4c6%sAEdsn%ZJ$nJ%<
zwjKt*7uV*v-~Utxa-UQ+*>93Rgy~Jc#|uu-^nT*S0SP#RguJaZ&V@u8`Hlze>RW9Q
zZ_&$Ve%2h00VHOW>cNurSFveWfJoDs=e}E}3U;P7?d(>K+IT_=iQaVNI~NwH^xPZ1
zYLfK)XR;P}?`-jJqu#pTyi|uS%Y(1U`c`l>R$w0^%UQX?j?t~qAGO>jYuNZvS3Fd+
zM;F-cTA+SxDJ)}p`{&<QGODGE60z&Zu`r_{?qZ$zA+5znh3g9TrwIvpDZnB)Z$TII
zMP&&CYb=C0?u+EX?@VXg4Hn)MYu!1vwH{EYjpg4f%abe_eD(R&hTR?m;#&;<@-5Up
zq-UEp3bMypp<it?9s={Bo)IUPxg(oz?mm;U)v!?36cVOTE}uu2%f0fl<`B6hVJK7&
zLByu|2G7?h$9SWl2`$vEv3_ud@9Lp)jk5ICpeX46#w4=SuR@Ogju%&G9SCqWrCE;+
zoH!sKYxlJ;g&D_^#y8d?X~&J`Q^+q4duiDh?$LV#`kKD7vwIZ~SQJL<4Tr_~NR}->
zEy1yh@px{U4SPIAOjt2rTvkCZ{UQ1E<LkqD2j1Gu5FM&Ai=hIVY2TLwV8ywv`gG7M
zADcn#0ArohUATA`yEEk@D*Kuil9X7aD~FYm#HpF|IZ)edp+Kglck4XdeU`2_8S#SN
zx?uW_OphwbzTc(GU|}i%L^H8$dyFxKVGW$Zpv#x6DRw{f{n+zkkJG$2OqVV553UBE
zo|`Xdl~Q?s<77qK6U5Z>Xv?3V)$s)K?VM?1<a6q35UOrLRO?6C@p7OF&Z<K8=#<*B
z@&~(>3nrguh9#<p+ZjVYw(W88lar5rn~w5la#sz)VK5rj=Ox5SXkFt=tl5J%)5<Nr
zEbz9F^*Fy62sfImRUdD-b{em}%hj7>+bK(bWAr#f!aF`O!HnlVY;z&NX5cvfyl|}M
zmtfmcbDJM6F4fxXlNzexmba1I-aVB*kj1E!3mje^hmv6EQkC)VyIYdks{HOEQzcE@
z$!0V#U%~c#q9fp#BIs0feR`0;_w6+b6vC<t`b)&Wd%;6CAW}OFtkLwgsVS?JdPG=V
z<gJOVCFIa8OHnoJm{c6GX@omKCv@Kv>%_FRJ6!`(l9M)ux&*u^kI&*eT2;ahi1<E0
zL;BnEe0h<|&ZGP4o1@Q2BH5R$PqI6*G3`B(pCP?-Oj*{uu@Ap@3IF=ypI613FIKWl
za=p^Ax?%V=m{{8qWWlK;c#>qiFp6T<KHG}*BjV!uu;y11!s$c30#6ik_mth9f4D`6
zXZ-W>NfCL0p8FR*{`E$}mWU~KRvL<L@E7V~DyhUiyen-*!KzEM$a|@*kB{gp{9<Ny
z_?0*I!rpG&=h@Uya(cwoR56!~mUC`#BcmqE;9nN?&#N*?xE(Q~gzNj!pD+yaPc{M2
zt{A5r{EXg^<FVoI0I)jPEM}k+{iFbcX_QEL&wiG0n9esZB-S)F!&8}hkPEw}e%xIn
zP&K7gBG&gZi-x{i^E^WDowfF;c`Nom^YQ1b%tgPb;~Y%e5HFVcb0B0A80{g4g3Tsb
zd_){8lQ_40%7#SLrtj7sUhsY<mOxIalXxHrQ;ia1@gEU}j-(kA^JOj#;a6!Rpb6$^
zN00I=q@b}4$Mw-|8=*+QEp3PX*WCSU68p$uc2oMvU-n!7r4qu<G+z+sVs}Nm-co2?
z=*zt2Hco1Psf2I_N2HVfs)4u||JoSrHSPsjo~mE~ftKBmOQ<}s^1%$dJ}%O!j7CBK
zHi5r96cdE1vOk`!H2F*T5yXf&E;t=smpzfhNBP5$yX3ES198}2r4x$a1VusQ_|Ho?
zj10n+gyD%hfBnxV|Bs}|2(h?F7p^ne?`1@wr}5c05uj=0N#EVEg3Hd%?}hGU^$$A(
zs}00AqNhEXlxBeVapd`cgA0{3a`~^H#L?<PuDhf5AP4ZsN+!JjPnrBR>eXmr5fOot
zhZUBF_=LJIE?lEkZ+(dUpST!S>RtRE>essxuJs3foW$?x>qetf{htvB<Fx$i42!&}
zA433Sy<(CPzOon9y%Y5t7Mdsk-%v?D2gS<(|BNskosn2uE17thLzR(NA;Njdf@-jF
zzp%i6yEYY}_Y}38Nj7z=EEJghBCDEOz`M>rDM=tF*(QsH2l)H)zo7t4RCHdxp+KA}
znJP2CEZ-dN+S8zqK1|WRJoz+N4@Ngr1C?^nt#IXOC0@;dE04yg&9oYtCTXX*88n_|
zxwNK_uOIj#B5aK<)TAAxjFy)g3^V5B{!?}Sv_gbr;ja~Pa!SQ%uVZ;-d_4V1)MZT!
zpq~$Q=$}b6X|{q@kUhq^G*JkzAIO`+EF&FcCgY`lnET}(ue?0iN`Gm=@3<)!Z$*O7
z$}fe>bG!wPS@#=|ZsiYTtqziti28ENqs~$h5JUTkDXK_x+6d&#kkY0$TZ`SN%B^&7
zGD4MOG+-!B4edB$DAI}Y--k#Bl@2pL@%%%Eg+FLNo=+!eoB24hH~LU!^~1bf7mre-
z?He=2OrR`Z7QG9k|I%6tf!>1s-O!%wNa7{T9N`37r5!}uHRZ_ULB|4o^1o4#e|a{D
zzNpJW=pF{Rk?sG{(0}(Uf8V!%_sclIH&x)j)a)Nm`HwsQzdp2`)Mu&b`i5TL3ylI|
zj%Wn3&VIi&$I74=G!a!n91HzcFia<&PSywZphKHh%{dF_q~HFLBpCxVuMfY8o4FdV
z8V^78J@_a-@oiPE)J2bCi*b$Y0AyV>I_u13vzhjss{rOtmg-1=8lR-!YSw?GR@cFY
zzb1{5+MpR65K@u;)^nBHYqXSOSQwS~;b_392v0=3`o1U+#zNGnGOKJJ!0?S%+eAK{
z5^{!pq$1u)nAU<!@bs{OnPQk_GQElYW_hL3f6V@0zrGTJaqPU<Iq4bHA$)hwAx#tE
zgn9Z%PC5`Qj~eqFMnW$UPIB?~fL@74rWbftiXyzkEJR3ie|>!M##pv!KJr3+&e#|-
z6ysk*Ib>6qJ0x&~zU{T~;sPbQ=nyi=c<Cf*HLdp@dssK>@K*2Ajb3_d$1!UqO8GB~
zhA-axdOlB~+b(lN3BJ=}^hYO-6BMwC5oiJ%M}bGU9Zw?`JuY6_@3xcgIb3kspKrcx
zxbL|SJs734P#yV9GwBHV&)5H)g!|*BtM?o1o8K2e(c7iT{e&<5`@F?HF=eXk4Hq0o
zHv9C9^8eP=fBQfthyas(x(QSH*l$n#Q<q=)J64}G`HsJfke9|bD1k|JWo*I3ghoDr
zg1D(V{=H2b6)|sJe9tgJKk)G+@>=R>v!zbxRJ}Lp_W0nW|3^#XpceWfQ)m)bjK)c8
zzwLN4=Pq$^!fY&riCdR~40{{KqAQ}JpQ`)#N9avI7R^@tN$+4EvpC+yQig)S-ek=;
z&(l2?{Id~>N~?qG`U*}OyiTiO`%c<Q^xsG_A0kPma<=U&p8z0?1GIYGXSQzu3es8c
z_5@)PmYe4x%k&B!R!@l)jZpj44@@7+zwE+P0;H2WAmRQARhYBRRGzLC9vePZUq=G>
zo&o=41%NDB!*Y+x{z64AS0-5|XD1d{=7%)SddFnJ{Kp(gJyGvg&(?g`viiLE(Q7A*
z%1slvTLyM^`BdIFZ`*PQMhjPx)x{U@=LfgCGD!!|40>~QGr#Jqj)(?+i#ENU;-FC@
z{JCGeuhn87n!@cRLcn20W-gJ&-#YVYcR!c2cpC4)DY+qQ2b*(v6x(L0Qkkn{fhx9G
zrBlW6&El3=&|O({o3NJ~aJJZ5on0Zz5YcQduc4&<+PPtEneCqVXckN>(WJaj^Dk)d
z7s~qAuYEW$j#(9wKPfOxq38A!STlEF?8~i+hWZtk?E4{F>~<Z}&;ygV4WZ=sZKEJ*
z7&(uxH5jF>Max?<s(mLHulpk@B~Trp<;aQ;I~&>v7fcgt?}MBm6R$8|?-gA^oN6tV
z?+*~UN8X01+)H5HD#7YACfOK*8Bk?lHW^7dqa9;}#no*dD!1%MD*OhX*u64F9cWb#
z&T|bP#-v;Tr6^0N<Q-j3fdqc;9&GC5OPA#om&a~AZ`L6yZIM_Kg3nGBqTbnTo89Yp
zCsC<7w}|(6@s7BnkD<n|&~>d4keivAk!X!w?4(P^#XsAWKQ!3acyd_gRNE|zrMd6&
zxILX2sy8rPymlo)dPup^-;;=$<3hY~E}k!uW&PdCgBbt^rGw?e>*J}tFg*_E562gs
zi|cG^p}|D(Se`urfTU#=@S9{0_my~3g}v@$U2@e2sTG?)AmX#R;??q+pSVD)(}X0~
znN#}(V@LUnw$LpxAJIYR#WMeI0EqXw>LF=<m-oYMIJ7jT=b1%ezE()*d}ox|1>7W>
z^NCia(GXaHq9ro|sxVwr3^7u2p_w|n?w3f%KZ#;JDP7`ok;xrsRV7va@H9f)6msXh
zwHRt-*9jR`682te9w1SVTeT;Ia8Ell>tuxhykf)h8XnRT{rrIAc5`EN>*AzGa*qu}
zC`z1gmsaXxD<pW^X<&V;a1zd{GJxVvKbPvV3|m?eWSq?}u!=}&Gd@4IGMd9c>iOVR
z2cYhxfh;9AD20Cnvj65?{sO<9FozJQnaWT@kGsY+t74L0<{T^5SXPlDoYrcWx@6t7
z#qDz_zQ&$OZ!Ac7<;Q>fkbQjK-TeVy=n?*Btc39_YQB2^_?uy+aJ38Y;k?D>T<pci
z2r(Mxd-ItT$!q1*=(4uUxMnWLhy~VcXDTYHKBqcOdeIfv6{|Sj(Pbiz58uPxdyf)+
zc0Sx<ZT;vNl_~?Mu4xIY%G3}{h45l|CD9tR66}G>5P{4;f8bu(^O^7?S)~jma48L_
z($e6R?+20F!?2Zw2c=k>|4OL;*UKjYF(JB0_49I~9)L{f_F;~u`v?#>AB#LUhVZF;
zLA6Ovtg>ty=6uiw?Ucu-WML|?BSlz6{i5HCk)0-j=8;XE%Vyp(C>TJt;#GDa@ZLBN
zKqlYi0Z4%ZkipY{hi6Z6&1>k!_aEX&I=5KPKpJt09j^H(%S)T|*bHhP!$^g!u^|(a
zOEd5GYhuDAudm8NlXE|2ijdGJJj_+NZLBC3Ile7i$N*4dOl4{ak0iH@X*pk>D)96k
zXmRXc{>%U(aPBtQYRh=Zq<P%#eJoZKM1TUNAVLTH`0vga0+D3M#&U|LqI9Ek{n1F+
z{;l?*zg64-dd{;^Xcnfj)lx7<7eIzi$VnAr-XQJFsZI4f*Y`X?qZ5uaZ=*$AWnFU;
zD{}|Pu-_Ua;5{5rCww^BtvK&pH#%l;T5Vx7%(&;r4E$B$B0vMY%GapFD^}0{-liY^
zfO_yw`YH*e%t``_Pcvnq9DilORUe#GXKmapw3`LMZM@*Jxzj+xWn#!&RxQ$PpI{!u
zv5lDlX$+Qz2ll<naG+)*D*PP?B7=qzN&Hm9r*5t~VlGG%@xxUDmL{T9RqR^sq?1pd
zr0v<v?V_baG0$=@gZQmdZOl-5%V_@MQ_Fp>SGN0~r2wFK$2jEq8atCWn|7u9wkE!b
z2sLHS$qwVH-i@Ovf*Tj9a;=*eoBfn`Oa$KV2hY{;1Ro5A+-}l(b6%*3)L2f4OqVT@
zF$?m%(nu0GmiJqE2>%|6sr7yayw>zMP~Of1fu1Q<2EW1e5cUxh!AWuI*su_iCRgS5
zVdz9OJiv(rPDr4UM*1IneIoHHO6RZ~Z&#6UjO{+prS}|Q(x}vgBv`4lhak>4@@zSZ
zwgs{&RHw8SR+?|GSO};*Qp)iBR5hhjA>mcZ%V{)uZ@_<ELPJ=A)Z17DNrGheOglLk
zk3=g__5@g~gU1P(B_F>t`6ty_XIzf1$h`vYOeT~GqEYo!vLKA{V8QXLy!o#d1%(-a
z0!0&D<V+(2sj2aR3>*VMrcoAfm?-Fcn6T)wAUfVOiZi}3Ze7c#P$>Q8twJSCEtdwf
z)BcR}%)&XiO}x?AY9<bn-(N(^O0b<*ZK8&lV9n~ocxlh%d!+{RH|SW7xdVF>`Acq7
z<|*`dxAp7cvE%jXlrF8*9M5%UN}F*_nj>t(gjY|dd*E3hIiEo_2tV(ESFXqNrZD8Y
zK~KMmX&y<B58@`G=8)oV90TMvn4t2bi>CV(i~W44BVP8a4`#(WLxyubYosrc?gTxj
zAvgw8Sy#g2vsn8GdraW~-SDi0J|L0a#?0KtL$=v(=m35DP2S%DdQR@za(y!}`g}Dt
zT-u}AW3y2MB%rZMnY*CMnAA3#y;<vF69)k#{r%DTA)dq+xakD6bxR(>lsw_m7=nm(
zWzNGDgH#EvoqovpL^`hLZ@N142TtZ2H13wp0#e{C*f)ohRnvfbZ{3~(4a29nAUHVM
zgv3o7)S6l6|4pLqy#|MOpt``OtMLE>WN9n#yqUA*-VO5COvgFIS{=Z@FQJwCO<?s(
z4>}W2R4dq)TivM#=TPZ5R3Dke{2HChhe&p0k}#ATlC7`lXNuOngCK@33Digmv0yx9
z2&}&CjeZ1>wF$WwcvN!F!q^vRFeY*(WA&x$E;?+$oWJ0fiet31;;S{d2PB3a<%@ft
z2Kwy$^74>aU@LXzyIz!6B$1G_CiN3PJ|HfdT9C4J^PHpyDHyU>r@s&UMA_tgd|qGY
zm^&P1#g{vp`%aqD(*JrVh||6wM_}{3nE7JNKy@Tr5rO896k$Ps<l%u9c!kL<e|$3t
zG$UOVC$~A)A$s$`%Q%YeE>5y{?+c&lOuQg1(rIA-e1tA#RANr2nNwc!x%IPk;|4F$
zJJ&wJn;W40@Q-(R^g8En8uOknHXhj<2~GzrnB&t_H4n*c1m;>S(3e#&xDDxb>6?2u
z@rWS%GgB&p`gT@A<Dzn<-8av=@qPMQt@;yDOa5O3)L-s80u|x%Id9?A*Owptvs_E&
zZIvQN+ev<L3k^Rq4jt`}AYAq$$VC2HJwqNZ>CGmqBUv%<OraaBveE2~(g#`HZcC<M
zZY?Kq&zDq%TxH2tCQbn8_yQI5@r7BTAtk%Tsasn=Iv#%QDd>B8}HO${^EN2Cq0
z&Bmhj<cgL16yYbc{W{}laek8?O*d47eG6zF^0mWHo7-~9p@^I7Z-Ro`5I6S-+u~}P
zaHW!q;w7tsh?kBan6$1|0P+R>AEOnVKFy96MGT8<KTAciTqEybMayojS|yA_0C(OC
zK+CUGnl@HXVbjbq5Vm2uq#(EMsYy;+uXAbUwaR|;90Ze27ASYwez{}s1f{gysmGEQ
zlsQ}3j?bujKazPLr_eCcyTqz+4`t9tbAeV&HAkI^LK|_Cc;(C|K}?NBJp_}HSz)P7
zr1hdnwWbv2`EX7T1?)S8{%O(Vq^;!e1E8oja9JE0Cn(b0NJB%}Hzf@Wc2#c$=1H_n
z<O8NiydJAjNfP<{Kdc_o=;u)w|5rZf*Uffy=uBRdk?9awI>?&3)UViNA6YTm5C_R2
z6i-R%w4|~dYg8l%3o6)E$?t@iDbcc~vS*HES#luUIE;L#EEPkJ>qFOhHnharl_}%x
zqKEW)-<nU)a_GU$U>Mn`HC9JS8UCC9{Ety^Q+_P87i+`8!otec{?_O<I1JrnpXXV!
z{wsl@M0H=J(c@PMn+TQ}Ddl3meMJ9>!+vMj23MmbS~#A&NzqKY6=cEG6}&ss-+*&L
zc>53zBiT;k)8&sQ(>TLrXe_sMZnu|ZG=5Mdvqc&h3~P{Q*1$CqZQ(Hz_B)0QGC)Hf
zC_b#d()ko%*~d7-J~%0|WK8bo&I`lG`B1j=$DftnD_+^S`!PSkSXWKiZH^~%Vg&1t
z?@`X)f1x*7Hy|>F{qN)(<k6c*5SHShqluIdt8Zr6H8zG;9hzB0o_#s+>?s0$5vy<F
zU8A;ZI(<fYG}S98=(~}OXV2S6Pw*D_T=p4v6O!o9I=L%S+0b-5_v?ie)_%q?^`oN@
zgYZR?kvSy4T?oj0GGMrUh_EqWCEHnjD8!Hk8C{kPiNa#b_2Yg)5gC>$jNkt;R5XN;
zk;4pukjnP-P^}Ksbz%JXa=&r6e=|_8upq2jHw+))?-tp=;cQ>&FA}+wa@6a85z3#6
z)9w2L(l#9H&@$%tOwbn?O&H7%^KMl)PFJS=(d{M&Ee(b<7XlJ>_<7F_25FW;^>6c*
zX5BVPvmbvv17fe5;(d(<mkk!!r{w$Mwb2s^*>G68zCrR4B;2kdzj@8oc;T<uXG{wo
zj!i^DPP2T;J99DZKcnGM2gCJbT}W@=<+X69IZv8C_w~QjTgc*N^MiRC@s`;cAEzaG
z*^9dt>t^}-l%EYIESEUWz<+&%{o=GAm?e=T+PT-fpOW~>tc6F86p|6K`Y=cO-z5|9
z+UL?&<*UR)sowZJ>$Fmy1A>p-eOxff*(;Uf1WNZ0yeU%g^J6@e<L-qJv-kpY_-eJ0
zn=gh|^f#~cx8#JOF@CXhzQT&Azg6VF?Rb{F-D>#oV;0K)_SFx8!tRYj@{$qGDpxH8
zi!^Q*ws+G5RDAjI)^f3J*QF76MhlXU_E!4yv2MxUkw^Z|QLkegC2OKkJ1dQTT&q`l
zjVhuO(@capGzp_qw931>+;4Yk{`YwygR+6%+tXP~W#<#9!rIQ0xsw{@Pcu@5MvVF>
zN7d4pGuXC=SSgtIC>if=tM*9}ckTsWt*-&HcI+pdD+~8sZYSfwUN;7L%uUjh?Lq89
zhTC&0casbLWK7x^waM~k+b4a+TFKaiYYOkyuj2PR*tGbyjDDD-{gd&YZWkw`G5sO3
zzx3JH7yozBODh6Oi}QJL-^{naV^=a1XwMy4yi>K<|4p&~jB-VOm46m=)Bb;;lJ$OB
zH;QYC^#3jMOiajEKk3K(j==fbT*-X?FZle|SAAUH*xrok@pv7&HYddwA7wdEtD+12
z9}IYe5+aI+gRkt9(`UeucI~lhtEH7d=>Ik-A5Cf9=hjCycB5Jjvt_lSnUc6b5qI}S
z2JQM!+rvrBe6=SrjaLVK$}>7|{g817s1E2i+Mm9$nLQY)-}XwIjn%ha)|Vv5Fes0U
zI&R#_jkUE?%F74E(^AK4yGiAF=Hvf7E}q9J2`$+#RuYdas`Hp3NfZEvF?=rnAg$NT
z=WNBy`1xIUjQf5+S$iydGxmSsBL3#@$&xYZck=FYMT0>QkG^AcJ06=@BIJIVz;Azo
zb^fR-)b2JRdAI0Tv-?^V=)O$r;e0Yob&_hgyGq^w-iF^C^AR22*|yl+>{qe}hk^;w
zP>K&z@GlHIQv9NrA>HI7ne7jtvzAnk<jc7}QPbZQ9d)x6RGqYHkYYOxE!i$OjW;{7
zyBVy$x*ZLW*EdrMv*fdPkqqnZ0{m`?{$=UH4;ZfCjXJTv_)gz`tUSqHrlZhUHe<UD
z<4~48FX3g6`<M8C@}6f+Wo<OL;C&{S%a$oxo5|`-Y+5zpWrE8v6!-f#F2q_^<Czy?
zBuDkglraGiB6LXCsz6rdQh6azncHnW=i&U(760}J0vM?fE_7e&Kj=_>BkeGYcnHhB
z7*Q$uI|GB<hwx#M6n?o7!ZB)e6e*S-w6`^(m!%QScndTW=FbPDM#ZcshT^cRbS%$k
zGgX-kUAcBVv_b;tPl<{u6#@C#bE*@wkb!Ad-xU={K{#}B<eew$5^FXqwhf>MI8iEG
zO|@=U%?)0QGdC>mqv#X#?belEStBvK@gwFN8OcGeVI$buI)eZrNSsX38<q5LT0y1{
z<MsM2zjBUIP@fo<7@fE*E6zU!uxSpxMIRT>ZZhJ^;_BV!U-=>oik(5DQe=Pikz>ES
z94@ulW;*NbGNdI>YH6{QbZi*Np=U`g0Fe)fDrUrScXuEky*U7y>6?}B#XkWztetBZ
z-Dpr!F)|KY%@fZo(I2RkxujdH-&AajUp)kG6_2)E_sv8<f%&P6RZE9sNCDrO<JNW!
zv@m#0Tyg$(_J8O?3dW6G&NvT+$3;*Mb$~1_ZKY{aU#gvPo-A$yo$@BOWfSkR<=~rZ
zDp`y|p7%LwOnb~te+Op}P$To2Db|=Vxsh`ZlJ*e1lXWEIU_V@L(4bSkZUHKbZJsPv
z3}SPtL}N24O6CD%Bbj?V-xewvL$Hs;(pH7+?4AYf_1-|D<h9d4*~DgaxKR9P8zE&`
zEBnVZ(#sN0zCHT6Mx*3rcGrZ3>af&>!IC|P#hoNI?h}_i;1UiSy%R{{DR6_vaN@2l
z$FAx-$?aO2YZZzr>A?g~m&M<P`p1X$pmxJZhl}6`sO-!4MOfU5cAvgP=V4=#uHl7G
zah%R{i@B058Bsn+qNq!~K&u$EBR+(=iS_$%z3S<xQaf5JNk)SqiGv?>Zx%5j=%*pn
zRC6%7Y&!MQ`?1NjN$C+%raI4b5Gh=Oup5*(Dh?!9)gNSMr=q4FxY-lxiyjp+Hts*x
z^2jmVi#m3&5cx$y9*pop@-+O<H!p4-w#+Rn;#BkGQ_V_X65o0FXi&dKYyzVGsQtxF
z>f4|ZP(Y%1VQCG>d79}$3fe>uDYAD)m6prZb1Y>BGiYq^3;pWfc#Yh_4I<aaa|acE
zb6K##xl^}{kQ(H^(!m5)tGt-9j{rK|DIC#Fgw5a-y8mP$5#nD^FOd3lWmBcFHt}`5
z&AxLqf}vhLkyt)5`EX9rquH!kU{ryWfH1Lw>12B-#CUE}a@ZGow9n;oQ~3Di?uiW)
z`);j+UggC|(_-IOAhnO=t9IZgME7gHDIDv^!)cI`N48oS?-SEq+WXoW)B3BU8j-h{
z0a(y0A&=TcD#iM;%NlZY{(&wQV~kZuue;x&kY?(3e9ZzNv4`9kCP3h|=48LYccItJ
z(Gqup<U+Df;z98N!xTDAKu)V#EQ!jp<abY2mhh@|K3=)kD1Jydn80^q&2z}PpF_>n
z_9j0-I51sHBbD-q1D;Z4`llF^sPu@}rOAUQ7r6a?#PbBw+e0ZdN73_cbSN9Ls_V^h
z4{<0EvJr)`WIq^!AX^I{^&J1Se0^cEt~EOz>cne3|GnirL;*3htP%?97g!(TaL;;;
z)l2keRpEu&rW73E%4oE(==s)bmi8!stN_wZyQ!k!zIDm@K(nZW3}$97Wc;rdFGa~c
z*ux~buxgo{C_1J!GEL>8`p|u_sSR?~;59F7d6y6xcy(Q1&ZN5y_<~ojWHSBjbbZaJ
zr)F^6lPJ={4fI(wQl^oYL_9e4hksVHVe?f3n(a95I}(14+MeBKs(8B3gz}j_-BW;$
z7VZ~edc~V=e+T*58f+XKh@;;CBqJ_yl`1F4{YtHN_3Lh1JgodQ5KDb$JQhn)hJtUJ
zCQgNxTb9XzXRh<>tah*LPi5-EWs+B)r;HwxP{rMro85Bd7N%`G8RCL@wP+y>AI`#v
za(DPQn3TF7)5Z>qBZ&>{JA+H;i)*Sj1+XvEywpMM(#7*m7Ji>&&)@%|@?c&C|5fqw
z7xiC^pbOnVLY-eBDITU<3-qbclBE@jqBuuCuO)kotzmd=R&r_<;2hks{R8*`(9`+d
z)f!9l*blgXskzW9Hu-6{1j;YiUHrXE{delF{&9s(*jWUf;OgE;gx91L*9R%L3Y#hG
zpjctXk$?y2+vjz`F$pJTVSaKkL)oZ4AUFCOs&-a(0vz7=D|p>Hl*ccc#6@%*c4e-!
zhs*?o_EU_y>g!i`yTd|AdSN9@E*+d1HJ??p{KHpCc4J}%LY14;=Ok*ypw9rhxXTz9
zH;?jz$tS3BY(_HhoOQBuifrCoBpqiM*BdSLSA<g+3hRl=WqMk3<<{9I)FX}B<LXSN
zGd-#45%Aublz1XX7F0>sGc&@CP74*6j-H}tSk(;SesaA`#D-kUeAy%Tz~_)2f{~_-
z8Wit?mz1RoKlcXmAWInD&e{|BXSqUDTpTr!w3Sa&x#Z+mR3vy5mEQBOvj8$!{=Mo+
z|3e0bSDIeh=k1O)A2HlCsAw}AkL~Vb6roIJ;e#FBb5HBTl7P{;{D58KMI754A0-YI
zoV~ImxwAQsc;)&q6SCaiVV}bX)CDO$*lDUhOR?0(R53J-&9)Tka~`}AasHP^&s8z7
zvUve0-&@n_14mBi^hb~$p#@e%bj*wT+EP~w&Q#W`)9TVBQ?t=ow@k0QK*V{T5T!oS
zw|qq#=8<8GqSjrsgy$G57X`sJ-1avC{Ep!4^HZhA9KC3`T9ip;%)xlj>DwmDe*1>4
z<7Ce`b$0YaAu{D6m0DwbvyGNF3hpj}(XGa8WADGWwZbVX7X#$*ub%MQC<xbPD?ut|
zGiM@o4=H;NXWB{6?ECv%CXJ<w*H(OnYl|yO0K&E)tJe<?OE6NYY@$7$R~5qq*vPSv
zdCkPk=Kr@K>O;Z{<Gf(qH`1K42~Ir<F#FH#Fy2bC0eLQ1>Y!o0$|@A<q+oUGBvg71
z(m+osiUsyf^K9)Me%o`hWp_BNpIR&!RtWdEHbk}FojurhGA;P?Wxl-66lr<9G@|Gu
z!B@7W^L7@(x3}Cp2+6M-*+nHYh969;-V{yxGcDm73?f?U_&Gb@DSq?X-=5!pRpckS
z(qDytKJnOJ9ELcC3=$)+k{SiV+JcZ1Q+E%%EO_RT6AwN#gWm|v`&!9m91xxAzzL%M
z{7wc$GPup36NkH~jhH&BMWS^enqY0^lTAFtqcKS4n|+Kj^PmG^T`t+oQdJg{)b43~
zo8km^^l7|8OB5@lIQcfgPFuB=ay!R)wupWNIrFVec;x}R<j@C1V;9oBh6Xrj!1=%u
zNd!++!<RXq>9ii0J>Rt|^#Ux0u_&)jqCaIF=MsY&B}|0L{TF}V6TIP}8DvArC7MeY
z8#09pK|}GpR~AhcR<{dRHI>Fk1mep-vwL%F*F=u(+3oHw%?yf5RVkab5(`&U539>s
z9d8GM9d(MQxK$?;tN$YJ!>Kv{;_j{OcMh67M&5db9nRKHxbDdBv6<-bxGLm3v(X#X
z{)$T=e5|_*aem<2Hb68hsTW=LS_12e-Nq-}gIk&}E0!q#0c<O%J_;ME2}UN~*n2S^
zj`bfc*a^PHh`v`^GyNIpxydNDEX^%5!f7?-DO<_-qk;fyWGY|poM2C#TU>Y>wVPCQ
zhhn$Da{4Sxq0EUVa)m58s8ZgpIgB^kP7wEQ#2=MiZ^gK}nBG0)MZN@-Z1q&T9fCdN
z#OEf~`DRQjrVmSj+?NW?Gj<{G`R0w~Z3H6P;OyHr+R@a`$=BbFUGLcXG6>IEEuXtT
zVbo60p?HloU;?6pS`Ak@%#ibnGfzA$8k9h$7HG4>Us@x_T+r5E^M>IYG$7U1PCLJf
z7k-W<E+4QZvdLUFiO~4^B=p^WMnhYf8(J?)QrT*uP<ygCW9F;1&}%3M{!f>FqIp<&
zriTi;JW}r>q=2(fA39z(Cqw#IByP-742NIF_m!0bE93Y|O{Or49XYspk7QJACoM2G
zW>CPVZkgIM^?M^fO!Q?IcWw~u{5f3cBq<i}Oq=57LFo$blg??;>a>*b1ua-UWct|(
ziB{KqTtIaSIpeYGnWxtz+yaJFDR(Q(@7?g2hr~)k(sY0{?zia6w*}R`*Qzs1Svu^G
z@Jp@c7|SO5sF3_e6_pUXBp}W4F>(5+RNK=$W6}A9(YgXRO)?JMKa8Wcm5p8bIMhcp
z2Sa6VR;$op+KIq_L6z@Fys_+#B+*32T{;b36zCs<KZ$rbL^(P*_3komK$$*qPJFS#
z!2_RDznbF*AF!ljb|2*=RlUv_NWFb~z7;!>Z}nUl4Jm&z>|4z|Uo+_n#o?KDP6%Uw
z($KM{T%fA?XQ%<e^sX2;FLy#&8zx_!S$BKuQ_!QH(RCx2`--Z@ucjY%<GO;G-wjKU
zd<cnULMF=yA1wsAD5w&@`(}&%)x<;;NhN}+L7OC8UsZTnjP{vyM0(=}eh;i*LyWB*
zazovH5#f!{o4)h`jf@N0nB(BR%7QZCjEUhw&{*EQqmXZ=__RLP5q<YUCIh~mQTKBf
zl_BQ5##aX?!)CbKrpQmXv69~YTrL!?7&}%GBuI(FQcOFTm71B<t`vUyb3TB>zP{^#
zF%@P*D&buh3m+v=xfxxU`<M%P*rVig;6}lI&uyoc?dx1!F|>29E;p}k|5rjj`dT#;
zBs;1dc^x-<53nO^8mA};V?dNG^eli_43)4d7$vYRibcvQRy^@(n()g83DNxa`))C%
zpQ@AIFV1D`j@dJg8L+9!en_V^+NNb5TXByE;Ri>CfR%4baDT{dMZ0!y5zU*_MjyH}
z%fN3w+a(6@k=9;!IB~Y-r@NeIoM?<mwCFg-nYR{>nh8{SaRrF3?^X-_+zhZgfO#D4
ztfkWIHL;C^wCzH;6(TzcqZ+k3Th#hODXt3N`lS&}p;Sk_hz@V2Fq!Q9IY;PRIeg_+
z<{h)yy*cx`L493NZs%IaMXQuHJy=$>(`sfioAvIFY5vIXdd%r<fP*Jh1N6D76Xu?2
znnj3xZ#ucQyNK1yPrPiM{K+*~kbI`l9XJy%%+ZmH^o|Wo1qvsDQc?J9bqz2H5$)U9
zDfQo>dSwRv60)1J*3d80x%WDijd(#m^l24q<KZ9qua31!$uLqrCM@qX6dL4XIGK7#
z`fL28;(l|oP>Jol<+%@(+LQwcH<mCP$Hi66E*mbzyMCVkPXg>L{vBnr;33YhvMl?h
z+;>uT%{w`se)c`iro>Io5xClJBiP9FBSEe6W|Ra#tC@;YDk^?z@XAxfryld%P6*3g
zv{hVjw*|?38Lm%PSa|F$mCEOp_Bg(oND`VM*hq``A}=CiI$czV0(=yR;fyxtEK-+d
zqq$1uL|?h6|5Ts`rLiVJ-MdQ5>(JBn%$V-|?lWX7cVYvZg9vqow<Z2Rfj#82zGMcL
z%E;>5m4+MpCoJlOFe*LhadR~4Ey$*rRg-DY7<E#Qi3k0@nf^PV<chET5M*N$VyY%#
zc%_bAn(a`{U5^&-QL2AsA}!zdbd41<AoG3_n$L5VoEGdko8xQ$kTu%(mN48N1QZI;
zMh+i)OWCt$ZD2go-t#>w$kWgW9mCN0h}}5EpbIDRX2e+tj*$8<iuTcexdHZp>-xNt
zkT5oNzMLyWl%^Y2!1AIHD$7;QU`^$rU6wJ9?|a(IkD0{qtj|m3tiXgwkjpxDGBtN9
z-;vqKd<Rf=D$TUCmq7uXQ``4ZWnXvJ>H0E%%%e}$7H(L;h*L2BkO06B_LA#hMZmv(
z@r2L#rMXvjkZNZ0K%Tbf1>vzTBRj9&IDx@ubhpQ`9?w&Eyk@~px4ai_k2PAD(J5UY
zN%yxj`>cPq8extkg-n|oM^8az4?ANwz}9+wHF{B>0{g_TDy%J+Bgj7b*zGPiQyIte
z%favLMf`H`)!ccFpcxBWxmKu!CmY5T`}&Wg?Ny6NxO?$^6Be!PXyGgvfCFgT{Z7#L
zg5q=@rKnYu4xl2Z(6PdzyhCJJ`pZ*f6ST+zov|(c@zJ5dH4ODx_!Df=drsXMe^Wa|
zrqyXJv1>IA^g69#P{?J|%@AJb)Acvvqt%$kpA-&@QPc6l9r=2vj+glQQ-0M)&Pv2#
z6}A3nR$oR-{3M_Jp7vIP)Vaq`!KH5nFSLKCF~jW0w5uCg!V|;OY?w4bAw>z}!Dzd0
z{K%+DCpq=q#bEfw0m#2yk$OvtbzHZ;#P)7+S^|<Kq~bK6ihvXo2~2Jc#><m<Ri@h=
zE>+FgZWm-{2*39n5Ti8|(>bb!l+9_+W|ls1Txl3wh4700l~Gh315YyC4{kJ2)XkQ$
zU&-rjX8>y5Z(<m<;Ji7%`9i{r!KkZcozQQe^eYUYZ<>bwkrDU>NM2b$-`q|iaP8)}
z=IMwgs5QR06=PZB=rzwz4Ce>xNZ}4igDT=kdj_x%Mg4fVrM`5%Ko4sof^Ewnekgf@
zaRTUySfK1*RZUNDoQvl)rYW`|9gS4SWsmrAGl$G}kraox5Jc-+Mdo7})9AT8hb2&5
zMoG!rm!`2+vdYXu-;s6&&fiGVl!3n$ygJ<XW7A4i+>MbWHhe6+C5Xu9(g(;HA2hh!
z^ws2kWZdNrKpc#nwx2aQ71x&3REEzZC&;#XOm!mbA7stm!rR#nKoZ&BA~7Lm|4gOz
z;dS;(oeSRq75n+{Gt+Cx;g+?ZX6Pb46AU$t<J1jgMOvXA{lYq-W+u)Z`FlQ<h5IXM
z(noyHF2y$;93Coqxd)#qstVcHy)8QPo;~%)nOMEbj$2(|1s@!tQ^}ggCBl|n?I30E
z0gRK2UP`i?5nt&}F15A1=+<sErNUYOE1?^7v9T@b;T4-Dl%Nh1<a|KPGq-wXe(DuF
zq22u^2Zc6N&`#S>WBv2f`@19T67Bc5t}*v0Yty~{W4gNlUwf;SBbfL1$x&+zyD&7+
zg1Bk#Z=<;v$rqiMg4i&PSAMRf?w%-kr<2l=WRAYd-ugHXOCw=Wf|@DsLUDdor_U;Q
z;xXk{g_z<moYyk2fC}gx1$hlb3iD$Wd}_$jahP<=j=t{@f#h62gu9~qKu#CAHoctk
zKxS{MosBHku1X?i9WZ2t@+39-pDe*2r8NPZP_hg)-%tYsfIL`^PJK{BB++3Jsx)7%
z8M(L()|avg#kAyw3<RYyPh)P;s>%cUv<j36cLrU8_|3QP0|?3F>(n*$7`xmEQ%-t)
zZz(LiGB~IK>0%AM0j-A#bF9&YbjEB~tfNTWt$UkYtl2m$d+^WEYvr^-fax)Q8E$kl
zExGF1s2za5dnO6M@t|&^+$y!HJ=V6HQ`8@Db5PPf9^!Dai@^2;K$5U(@CGU64SX}b
zuRRFBa=EACx=B*QYh1$Hene@T_{|HwYFlH1)&^#0M0C1YEhoPN?Q+L{_9D>bzJ9wG
z$`Gj8HIrbpp-B%M5cY~s+@bG5*JFgdDn`mQ`$4Y<5uHT-V<5GJW?kF8?GRp`^AB^*
zkXuDhG{?GmG<ZYgkQ%RL0lzzM^}!*I^O<bVL^tcuu2LbKlUkqSg^UZ5YKO%ljfwsn
zhIY;p4T0-qLpNO6`Xl!`tO2(iK~WL2i5S@Ih~C+fVD5(K^fnw|#TLFG7W~%>F+41%
z#RVjYNaO60{ssIHRy~mZv5o5wUYF>Vsh?Cc&`dk}F5$2{3%4+6gJs}}<A~!z!36xa
zxvP7qt|uX+=$pfhzF`XhV1`cw-6%HOB1!UosRP(t+pUO?nn26oUjP%v(N{Lfk1AyF
z10{=&8;#OCb{iHs;jlQc_Opl=e2DAPcTcxImoJNG-8L{$5)Hh3t0pWe{die#UE^WS
z>DM-QrGk&d^?JN%Z2}DKx(}il7BYFZ%=ddXZQ*xi+kFol*+&UTaopf{?)Xa3gh|>r
zbxbVW>dmUXDIJFnzPgyfjKdP@^td|yn5~CE&~wtukR@sSy0<Slp=sJOgapg%+BGJ6
z?NPhP3&Se-cay>UNOb_YjOWkVJwuM4M@vXg><WlduW90lShopTL9wN^UHu+>=TEYj
zx6?^B?})8|wrx1hauWCS4>gG4sDf}gWQG<=Y-_a^`*xTTr|ghE22ck#1O_s&4}q7Y
ztL+m<2V5E6H{2DtDPidXM=}4Z<eUk|v;R+XA@}JAfg5AGx}&WpHrGzu4=Z>v9Q}F&
zc@osvhB+?mjw5a2EJCCTxB?~>D%yue!ymzsOkoDkyj?T;ajre5&K%+bOI{W-QCmh+
zr<WYUB-<sM-Hx)~-KWoia9TUrIoOwvKjk)URuoMf^y*sC^J&9SQ&-h^HKKT6@t64T
zxl_pRhZJ)NMc|+M*GNF6!9x@-A!xh%39=eLc((Rc4(G$<Xr6$Af$09dAnZu$at{W>
z{mw8u?sVk*DFfF{Qc+B2(=SiYB(lgvE^P;7&{3eU`87uLduZNw&0E=aX+gNI8%aMc
z)U2v;kQ$bWvqo^8q~VZGDe>qbRAK%VH0LX3qe3)p-WmaVo~N%z_!V29HbO>=#PMea
zWQ&jbCLmq*xFb*md^Je_AL`yRDz0Vg8pd6N1-ArucL|!{!Gk*lcXxLP!KJa_?(P;m
zxVt+v-pJRv=iK|A+~*nJ`}-YZ|JdDokKVnjYSo%+ty!}eOLV58>!T)uuxOj$f5m=@
z)2gV!H0M@K=pLrH!hchS$-nC|un<F{3<t10>MAwOS`H(Q2B5ZMlmAWn3xs%iVgIAF
z0fE@}4T37l2}-Fmo%A5!F2ikc3Ss||Deb%Q1m1G`L$U4yhI8nx^x^QE%nS}FsvRwT
zhfl-=>|?VW_!1=X!`)2IMNbDxcDJiMYqxIUnS#MFA(O%GS^%wvOq@|^(FU|{?>CI?
zmNP#h;A`9{5wtk}=u_gsf2kg#A*}bVct1fI&gMLRfNQuGgCeD^>K<V!p@jLaT7U30
z;p!tzpq1w}P>JZ#`1QSAT4v~r)A`Y0ao*!8%)zwfVW@tM-8W+C{#NSAhG9y4JxCeE
z8CsywBT$4|cv<P8h6gdGMWyUsw7sTdn9=>BH`DnO7(xBokMGlP=|z1`lQJDZOlh3H
z^-7fBo1m2{bP-R>Yb0*6oiSJ;eDGM$h^^xjO{UcK2R^P{4b6iV_e<z)HTCu}iz@Ht
zn~R$Uy$>6AaC-MMeUk9OUPn$)-ms2%dLmA7kmtKlz@^KRb?MhedAP{9zZ}b7OVj*b
zAdLTq0YLKL%7%8ylA3A<wL$Tix+kVS)V5zmPuo|_a`9;%UiNs(a;f&j<6)($nT0Up
z5XXnZ5@($n8kQ1Yc`a_sgvN*7(wSkySx#dFS|=(=>0O+h77Z(IN9d18#L>b%bS5yA
zM#RmNxOW|a!kw#VVN$=+2nY2V{dhA@Y%d{*8f}j~aEW)aSc(7gLGl4YHp@54=iqJ9
zU~H{0m5~V+5%s_{@Z|_@SgI3CDGOJ6fFbaN)=u%E2}$3MDWypH0=x#qi0Ex2^fS_4
z25sca`Nh;}T~;!|_7({15#EeWWS}C75zaeNf{hu9C^HR<H3q!Rj(CZYwlDO&AdisW
ze6uSvkEtB~_hTf%d)^7Sp<?Cxg~g1?&)|l>J!p;hL7QV+TGAg5TCOM_{20|>bc_FO
zOFv5Nkg@_`7>9npXf+Fu8#6Fb5@_)^I|4x*Ob!xp(f~^b{R7TvifvHHLhbek)ipv(
z-l1DY*F2}JXc=-!0PQ<SO(M%5ObJGyzEYO2-%Vev^RbgwcSuP#j@c`G{W3gi3`JG#
zA!-Ub96h&UJrP~}tjwc<vY0Sg=>4laKLBp%Xc!CCe+Mi3oOcLYL;#L8%^o(RQT&L-
z>h5r-Yx{6aNf#4`OW|%Q4b?f*VRk@klLoO}?>IbjvVYgFB#W*F^=8<xY$+#8q4o3t
zn%|jg7LlrynKabO#Ld1fQ@OiDvR<ISJ~LC^nE4})`;>vvvQni-T+6Itg~QxNU6yKS
zynK&~pR`v(F<B%VMc=RDddzKe0v6cP9lQ4NZb3H4@$wFDaXU|V^rBisK;}MN@--u-
z|6{4XqtMDevRC5<Klh4@w`F~DWP{Na{+3hJ>CG^sDsu7pikaw};hvxOuu3C_e>bij
zR+laMUup~gVG{dmyy=38<HNc&$Xzk|OoCPRo`qEf&BRUm>EvAtC+RNz^Fl?beS~P^
zAo4jF>J}c}lPfMO2!h`Q*(B61W8f-5SgskdLX~be2N)**CH0}(aOee+U%}{GUAeHP
z&Y@9~sG2iCd}Ux17^5tqE2WD?lwK`7v#-00W9I{N(@woE0fK-Z@}!wpr7Zo%sMY^N
z!!6IINP9L2sCbSN!|P#)O1f#`BrB9<HU|8p;3xP9D$7>)kuEgXQn%tjRzpNVNmU$R
zO&PZvQP%c;h4hdo#c38=WaG*?kbHF}q-Q9(AZA-SjOg&Ra^0=VI#)AHCZvs#_rySr
zxwkujYu_)!Y3_`b8;n|t<s)PrckVrqX}VF*ImOLsAgx%<X)sHaT4sGfvF965TZaBo
z{jH2=_~v}=NW@`|t;&wWM~ztaZx)yc8JIu@liSPo2hS&r%|E=tHSc>5%$B_@j5`mE
zT?HEMXx>OTlii1uf|Q3a!K*Qc=o|%N@_mtkUL`+Z@Q`p=o^?bRAL|O_zJ$G3K-3^v
zGAZAS+?AJwhuHUGLF)KdJVPzWme1M82tis3Sj{V~Z?49fsjujWI3C!CP058~_~brI
zRNRoi(p{1C#lwY8<OyD!VKb*<f#ViZ{@L;e^Fmcj_19Va0<F8zhNNEkK<=%$<`R;H
z((x+gqRJS?Sii+hJxC_b{=pK09o5i_OrDvX9hVSynksiFlJ)KorpaP@zb81^Afdn>
z*rtk?8~X3Tut#kl9M`*U9a-J*?woY;Vi>x<MZD%160?|}$Uwez7W09E``9S&2w!-!
zg0w}3`7uVm6$>?{iFs*bNn?%snsM{Gws3!@R*ODlsl4iF`wNKR)D_)D_>qOK%*Fe_
z(aisc4X_{v2|)e@K;FHCY82&t+slsyaDR?>eS7+Jq_cM}5OoPG{Pyto#IYgjH;#`^
zB=B$l`LAyAFMIEY2$D=`n+`t|(`jGx_kW_|`IIwiBoY1IH(1z`+!2y02TkAM(6toh
zmOI_wF*9ZQuEnZ1e^+dp#XXgR4Pw94t_Wz%x&<?%r(YiFWVw?pslAJ5wZ5ymcgOMb
zCjW|KhB1(oO!%voc_ZG^Y2Y>W5C}1ml$PEhk<R}~FN|t~CeVn;ILrU(eVDBRIEcA=
zp3;el;^&Wy3XO8*35(i_BaG#9wd60R(A)IlI?9Z!N#$X)MuZ2+u8}(i3!xr;r}g{T
zbw0VbJH2W55$6F{p4#{)O<j4ZbA{v@5m-A=ZR#EQGs|$?cXl3^Q<op{PPi>iYd>>J
z71lC?%168`M3>bmUOz8PPpG@A?bDL-R1V~>nig*I9L6s$?#;iu{=teCJ1f}~%az+h
zL=>2yQyRZ=^Gl6f8{VVb*V0=r9EqoVUyWp2JfD~ERGx#!aFnY%>kIzfXR2vC)N6@7
z&NbMJ4+q|j(A*m$-whVn;b%@m1L)b3-|9wzyVa*p%D;MS)AL@0ey<c&&wX?0RHiT$
zK503wBBA<Q->A#ruSXQbxr%94F4RFDy@PXhbZ=`vRv&CO=x-m~_M@gR*{uy_|8@@D
z2s_ABT@HU39GU+CW>>xer@1gZn55|H1}979EFQyWBan5`xO4o@s{q`1P(vzG!?b;o
z<xnZz0*L{x>DjW>sFnsFPKfiJ>6yf9@Lt;kzyh0EJygzd8iDe+rP?pK72cqPvS9xy
zepdT0^2YCsnL-2bepT?%O?vZ2-@0tN71s_W7oZKTqkvVfjhXr{kzneXhqu6Zwdzz;
zHsiqyicc;d%jdw#Z#<v~M2_|@+EwKV^8H<o717ls?8Mz3StaFFU36S?Zj65apy0(D
zt3YeB$xd60{a+26{Uzx^9o^k^I1iL@>ga>}2UuR1-_Q@C!>D_P=pTcsOcELn>9b#-
ze+{nNVUEiA2b%L$_!}?4petI`xsx8f>utd6TJE*qmr<v2H>Xt4n!h=z2ob`Oe2p-e
zstZX?C6zCGhk2i)k8q@)Asj4gw%~;5m(inZ<jjJ=!s(d8RD<hG7O|^AF`u)RY%X?A
z9BVz=wSh~MvWhs*y%U9~*@r_WjB*7VYm}?z>;^XQwwoZwEFx;6%mss6{wX4R+vkAe
zheS4tUVBE=Gx96+0d`WRprockU6I!JcKYPGW(VQR4g_cLQJ@8tDE@($!*}x_zc+Tm
z4I-<9ig{dDIb4nijU8VF!YxtfpDKgxJ2y$%kmq-`7a+tbszft)6t;Q%+o(Ae#NI7A
zTMR!5KR(hg$pjL+<~W6Y+Q=%MAqFTEa70Dkn@Lx>WX4omus=GHSH!EnJ(r5lg2)<F
z);3zh+JEh4wN4(ADrn;jv0hfJm{%;=Fn(gAHGo50c;|OqmD_kQOze1J0_Dme7DHCG
zf54ST%8&laIMO%~VF5rRS+}g~-0kAybthPl>Op9YeM)b3c@clE7{dsHIP?4;P8aiS
z=tpw~iNmmuJ-i=NWNViE!+*R<@v>U#uV3od*<oI1^n;;-JpN$z!7QG0&WjzjQbv>K
z4U@mDqpU3cd7S^e5fT-wYj?qi6c$@J&%M+ueES}!vTHv%IhOd_nSQ%Hw?ufsV4eL<
zsj%BdzAeGYdX&rS{HUKEr2!~h`$nH~v8>9Fnxj)Vp1@i|j{4NH7o<*8ST{_eFL36^
z7+dR#zFbQmr^BUZ7y^C{Ijf<@Z$!Mz1{sM1HY!7m;7>?Gxzy87Wk0mioTDr+QB=~L
zECsot3E;?Vq=?Q3gsSkclO<Lr49UIZ`-UjPbcQ)^-MCz9X`Xf|5e|B=oGi~?SZ|C<
zNjqCta+hEc5eh{-o6-u`)lXE4NJ6G~K~cFL9CGlivk0XUCA>Io5HQA6&nGG!Wi+Fn
zdP8|DXIG}X5=TiWSG^EDpxTuMYU}g?H?mvNj5=qv)&yLC2gNo~KK-y#)Gitz5{sCo
zLgsF7Am&n0gpsOb8|>YrXQqTuSAG6?^q%y3*aBr&5>WBZQJeWj@dCXf_(RyVomUI)
zcqX46T3#7LMV_YsooNO*gaep7T&6xE&8&vH=toq08{qW4HyCMTF(zZ$5N1zvlme@4
zsqG|6JKq4kjn5@P)CS7$y`)wAr!5O<3C7LBihKFxQK!H&TgY1C!QeE{RW5Voc2W3s
zNYq<W!fzSx*k+bVfJ8U5nx*JeZRou1F#-Y8mq5iHtT$Dn8}YWDPq5Wrxx{HdK-nv7
z$bR{<Z!j+>7w#!$WJUI(_77Q@zk(GocV8ST<)?g*Q~@Fp*{*yqBurrxpM}FG>8_xs
z;DdQ2S*znCN&sXn6gUU9gu-5j%v8H6z?R5UZZ<a|S<M(D55oEwgH=VxBAj94A=vw~
zVe&|}%7`?Bcvq>Bw<5LEn^!F05TVhBO--UcFc5)jX25uKz*z%vZoZz5MSmLH7Baj}
zNou%uHoN|f=HwY~K;Azk9@*b%v~I>!-$Kr7<E#MX5?LSX;WT+ri7qs!P#CVZ*&T9(
ze;S8w?<<f3KX#>eQLeiT-Xu>hrLAW_J!tR-wkDEGVJx`l=bZ>Phz;YGQ|v~lMOy&4
z&b1gr6;2X{`@e#9dUwGjY*2S+pEiy$-6!G1In#q1<!7IQG>2xvfpxn7vc&)3;J;4%
zH82let?#N4zkx-Fp8padvXl9(u!5!TOBdN7n|RCVDFaVbjiSd$OmXbWoH#<T#yCdj
zj4<+@)^c$8sZ-FEP{{j8<#cmiVHqQ7OItCuVqTkN=!D`AuEc-E@&8I_J^dy>`wRpm
z{Vx~)-Kl?F+JOQ?&7A;IQ@lTDM1PV2|El(Eg@UOXm+ZK8L4WYN{;L%9`y(0N@8{#8
zu(eA65qteJ4*UBYtP63F5e}%%e*eE*{72XZzS{g-D5+ULZc*Y-Fa2Nb4*uwWPY&jZ
zKSjYZ{(;#3=Z#<yVc6e{b=bgBrvKK@`LmtCt<{GP7Wg(o`oH{Um5l!RT05Kbm5Rl$
zk{>e_I`qo**Vj&|!nXdS(wXGT4K@iO(FDWMjVojl5!gS$lx!m~h5Kq6T^PLJ+E+TE
zKyK%1tv%oAaEc`Nc&;MwC6&b8<1<)vp!spBlx>%P>^E<?Lf_h0RWFk*LZv;E-}Te*
zj7loES=M(Z<u5IID>_RVzPkA{$_jAcrNo;*{%pkmYURbKuKIS6bnjC1uD>Vg_aBi-
z#x*ZszNy0?qwDai&?{*MH{(tNzfEEwJo=~km{pVc3Z2xQc6Kxp9-4Vw=S(7zGG~Ph
z0rGj>#X&oL4HM-;xnxatvyX;`La~&39Yf{F;J;fh+`Xij3SPU`fF-Z-1Tu8L&KV>s
zP2qD-Td^C9uCZMgY6fQU?+Wmk=z7`sq4?ej4)SUK;`Sd}CGWUlPyYWMZDc(#171Rs
zF!V;c;Ef0YX}%8@ZBEC(<ZN1YC0*TtNn;Y~y=^Z?=~r|7woO3q_i(6Q5*~Wa!2%bP
z(31AGxMTT>-2va8o@Cgp`V?Jpzy$4jPxV-rm0-+I;;ofmP5;d$55kHe5j<*o49BA0
zIksOPYC7+9sEqqMRac!cUr^6syYRW9rJzxz;`3s)kJ|M+FdIHg!0pe>!(xa?$<RG^
z{Bmt{HwT9lmh|pTO5OlBcQo}QA2#4epu-GA4Z9L`*D=*m*rFdQltd(MJXn+>(MzLx
zi(cDRj1+)|wVGxH*}#X4Ef1Eyz@nD@rgh#lCU)|eUXlFWqS)bNfz|aTOVBf4c9HE2
zX#Or2%;aAC7n2)QvXcB<t!+A%@*XUH!X*a`Q^*pamBC|9Tx2&J$q5V<7@nG%s-pYa
zNF?mjQ>A)}+OMgcEdu#qw}a4VG@zneqC(5tX-%jZ;DS#f1<tDsRp(RvVTMCrp=Pnf
zKbbn`S;fLUVQkzSz{_%tjdO0NGJQrgQV~jz!5WLfgw*9`g_<y>{E+?m$0vuqQG9eA
zTNIz^loM`tJOj(J5LCi2K87+;>T@l?{HMK%%=7WJMpwXhMl7BEsMcx>ESE=uh+*?v
z=ckG7-CVBI-DiXIk;-lA;U9t?_{-tNy%N3a>90v0uB+7TOdTJDUQdu8ZjaCNrzJ8?
zmaqj3<1k5@8UZN8Y$~P`Y0;-=kNAVJ>#xSRp*SEz{G<<Rxa&5m0sqUK0bVexpDW!D
zMbVfWY57gu2VrK`I9`BQ+++Jtauu%tjmjjJOgG%k$}GOv{|JsAxgTwoQ~sSTZ!)pY
zFCS&&!;^(ReecPZf0LWCVfKu`G6))@*`=J4D5M5fi?bEp{FI)9x5S^Dzv3wOQC58Z
z5n(0OjDLHwNH4L`mwGV*v-o(<Rs5yAq^`9{hiWi}7LZt-*%4x>Q1{4Zv1NNTu+g~F
z3g~pUB`iRz1Nc5kjJn=gy;#<+I4qAjD1}C@P*rNSOFn8lw=;Zcn2ZCtT}xf{x(W|p
ze?Pn$zaY11mVWnrcX~n)2T^q<{`Yb|H`@Pp9wKm%0<LcFV^pAirow#NH157f>w-{V
zq^ZHDpU@Rk@Ps)!jiuOkdh>uUoE5r_zb#m(M0(Zo>&0M4&aVR<diG8=>7*+LMG(<$
zG)mG#v1N#=kC{v;%<bKYv?S3yB-dUVh@Sg~Qw@(8e=tH|dF%`<H1nE=Njl=pt-F8r
z$<Q!&REtAhm*D%!_;S5t@^@P0C2t$K?~-6${>=@4=&MQSXOo8-96OAi#GeTT(xDB+
zCVSVZ1=6yHzq#Q}=GUs~0|Kw3K(>=-*Hy519xd1g93;+1R`=@97uLWvG}xP?`2;A~
ztC1t&`KI06_>`w*>|%|>*F}DG9@>bL*v|Lx;05pRrCIz8yD}g!aU92a%_f{yG9iJP
z9&iGV<1eWDsn7;^j*_q_+1X)X*<^1LuV0@{3zM=I&r-8ev#MG<?>2v}s#Z=lmRP#z
zI^Q_e$!9h%s=UF0Jk1D?$iKpb#Kic=QG^OJ=hxezX>A_1gdwZ{W05fI2MMN?$t|C*
z>+IwS=fbE++FD>0*><(BVbo&pTV~tsd$=tUU|%pFsup@Oe#WnPvN1&l3~Sy6t4{(?
z65n4@j+B%PZnq&AW_Unv1m=E#N}1~GHN@niA|-Tp=ggsE1@6O~kJn}!)=W!-T$~OT
z$T!nEX{YlMNlY%{spQfZ&Vlz<B0mHkck<?1oawhIViRJ8o`zxsuiQL=tJQNgsxo-m
zlST^-rk{(w>YRrgmAi@(Z<h1*s8m_c+#XG@xb@cunx>1-L%i8Ok<PjO{Py1EvMTbl
zdyoVnUEGBn(cGTu9ZhPuA41qaju{^)J1_3g1g+09h$!4z{slUFY3lwGXxHx9vYhGd
z6^lA<xHmBgV@XtmhN64hNVQt5rXl1^*IT0(D97CndPUkC0>n3h1GOx@G-85l%!5n>
z8M+XRi~aaB?c*zY&c)?Kf&Cb)OmNPLn$w7~Z-sx^bl%8&p6!}Va|O2=b!6QCs9Ys#
z8z>VCRgL7etO)nc{5a(ce5r=buEw!_ck+_GxazuMq}^seRis4Ox_gUx#`evK%6crn
zOLm!n%J$6TaJ5za?exO&PL{_OEg^f$N*bs2SMU~7bvX3u3pKlmoibQ-2`JIyg2w{<
z2a|z@txjyRbU+T6I-Uf$xLgSl81erc*##IIwn}w9)6X|!Y5?2Y{xwscAgm{INpzkR
z_4FtwANhl9?F|eYzsi928d`RmiK5<zFD1C9$6fU62scE+lU5%U@ozQ+&?1nLCj52a
zdGJ~bj_9u~rsqt(4H{we)~G<UnXUYa=M_ck?|4pqQA^Am+^Dp_P3Nvp&k6F%QOyte
zzFk@&v^d-85u>umtBYPoZp<IXx?vb>cceO6fp|{fo<M-x6QVKeH>|~jJ_5q@RQ5Ba
zF(UtDNU&EK0@3@$M{^tpc+~Yrvu%>q3U9mQJTd~T!Hd1J8y<p<v|YO7OY8Sl8t{6l
z+x1HqmZC@Jf_7m#d8xh87PT&fkU*)Qhc3U5H2us9`5Ylq2XjxzN=@XLyXt;UJlElU
z)Bk;EYN_5fbpBg=A`wyR-g(9KYE`9e26vUhx)9gl@yE67g@-dwwE9!ml?u9y%JYcN
zY}S_8Ch+x~vTG(LCJhenUvi}>&pUuwI&NorrvV20%ddjKCVQnwhXGo%f{&@**QoQW
z=3b{u&3I4mL9*Z3;za5wg{$65{Bz8PiLAa?G;m=il6T5@M9L_pCimgGhR}nCp*VwG
z(D_F3a_?fgzG?CX(Cx2P4d)XIWkFKd%S?lc=cw;!61>RZ#d?p;_h#+lq3K092-M*V
zwP_p8ww&s|_2foWU<%HVx&NSuwNmVa9D3!fy2p`S8{C4MQ?e530R&X+mBUxq2Anq1
zcfV~xtXL#vmAKcaWSpoq2^D}}x0u5i1Su8b>%Z(Bxp5|qkZ#7<;*W>sD46Ms786AY
zhmN7+n8NdEaTKsy>GsWE9aiCo-#!Pp2}oI#l9)ITWKnL3Iokh>+ha5zZsFJJq|9hP
zr5~lDh;@A0O6c8?1#{r_Ia;8M2oB{#9kF#Zzg-c2owYNVYUgUZay5a|fB!=jyB;nw
zP%WjMW99_C^9jz7sEh*y;i78PH^EfGa{ckZ9%i|1U9Wfvr&BLfeZ!}xog?uMdF5LM
zvTnE69O38+MYJu~gOw$fJ4kJu+RklvfiR@4FwZ(eOt)M6%2Q$&tyFbQQQ^x1m!B?V
zW;m}2-Bdf8Nt!E0L-ioXCbqp?b_RXpr?3lcvI?;kQ$Yp06YNc!apdRJ01jAuh31&_
zora??FcBa1yPsfPW(=g3*O_0^Z<gKi6je3RJ5AWmwiUdrg&z)#w0r}<Hac-GH#_cc
zxmqr%A@83jl{LP2s4jV~uFYE2pbiz3FDC8E;mxR3%1yd6pZGlNrP~JPmi9Cw057)D
zmvliVn)NKoIH{otg|Fupp3k%KF6=(YQr}3S?wQE`%Nq<)L-MGsOTKt^{elLlJNrXh
zF$awYohb=&H6YXwSh;8-QmZ)#^bn}(yF&KF_C4@MUWngYF$i=uw`29dbzS=M$IR}i
z*3Q|<UxmN6$JsFFTkt}lIl0lyhuX^=@bEA$zoQQFrssL&Gsrn$>N8O>b<k}a$4f_I
z5!C^RE)R$_cC)85n&zC**lGpY!bS7G3Oct~q%L+K_@Wr-B><Fz00-5z+6&~bYIed=
zBAG{GttcX|n@YEN+AmS?>*E_Bn(;<a?>njWvTAJafa6eoSM=fYu;2*POA{N8o`Kw~
zRrp+Q9-EN+`s|68r*E-1sgB_${tjQ1lxGE-8C=Kuiv~;;wjV8LfTnm^M?7!&o8VH3
z@A#LH<X4&<-enxI4_6m)4qF|oSVUsXlbXo)kMYlI-NqELrz9E;BHKK#z`B=1-`ssG
zANC=V_bh7}ozRrn+on0cQ&*I6?=&<BxEr>3D`O_QjN)e)TuMC^)swr|iXi6tAR|Gt
zoF=jMl6W0d(+*9yf9v?G!VxdK9Qv}^uQ{K(LeKxO=qkTIX`bbdG6TSzF&*0*4}dR^
zHx)aO(jT1PGoVcEB<o<@BVEaT33w<avBp4N2@kli5>mbr%5os)4lkccM8Hn&%9V&b
znS@5K?L`xQw>N{Xm0sUkW?lZJv4Oeso{jwH-MTkv>luzO>U>YIn?Scz60@F-5zDsK
zlCLsg;R(iMd9JGdr@EHs^@%~J0r_#Qf-rj7aAoJqS7bbfx{B2nwc1VF3Af<RS9aeQ
zFI&ZdedhLyZ_j3Z<+u0Bv0s$igyg{EZdT|mP&vv0=N~Wx!@oraV!SXp=dNy`2ZE++
zu>O-Smt|Db=_Uj}_Mj(bU|_d!5*f5zJxt|h4v;}q-wSbSDqi^QxK$aY%FmRDPf|NZ
zG<FLj0-`!NfeBQJ!VL0eOSoZLTU##v3GPjcHqeT=xQHR-Pf&RKF*L`xDi?^_W-0|T
z=YkZKZB?SrXCc5F+t#$P%cM{s`o_vSv|bR^R?~xiEjm6r-1;SgTCHNC1Oob#O9nDO
zwmsa{E7~q`{}qkcrIW|5=I(14sei2}d<t<4W!jqr_xo3Y8CyaICjr>>n1j3CIE7jI
zPM3gf5^Fv<>+OWIW%9Ffx1|zjwvjLy)t=#mI%kENSRF&r&`5gAY%7lX5<#DW*`C`9
zFV1zruqLx8O;V!R!6vVco)hYwLD(Lt$?<hNh61b54majzsG@blYb;yyvQ7yKON!*h
zVuuf@wbkbH%!zW?_B7Oa_CZ3AF}q3ouP-HYZIY(1;-pTZnY{D;N1}N;RamH@zIpN!
z<S+00KuA5nxj9+jZMptuvgOF^n3Pwp$VOIE|A$S_43xnSqk#y3Be@Wc`2bO6hBMok
z^t!K6o9w%m3z@1h4{t6Tqc}W`%xcA%%enIyCQ`XI1pL7p?<vFK>I>}h+0=;V7*k;I
z`U}6@_r?fZn-d1L9;a-_eoPhC)2qK}&>Hli9>CA?7@&1oTLe2oT@S7X<-p3uM`R!0
zof&p{XDXL*eBba>KVHyEZJ8+a6Ao0)>W|mAB+EarTngn?Lo~B?mgZRc_e#rA3AyS<
zDqBX!1#Zd~#@{wV)Ef39wE}NjtzrdTxtWrb);zWu+uqoK79O_xBTpk_D(krE*6N<o
zR+w=IUb&XN*jqQ4@dk7g2h<Bp=<+0EVs=M-Y=eVIKE5ERtg$|f=<!DW12l$-WKu&&
zO^qT6c%u}uc^QUBvfEwT!WYgYcW&HhxwW*?5=|0zli#M|5p0EScwCDLRj!Gyh*jy`
zpe`n0*iUR7zwMao&#0f)nK7@G!*u1S0Oi@JBC|93@JwZssQ3H?aS*xpDjfz%QN-W=
zk{xk!rzQpV!#((X*G_6BiPFK}b&Do^4AeG?WM^Vp1@Bzoy-~c*F2{?W_u`9K<(`XD
zc`XXKl^~UsZ4g~vlUVMM;&F9sOC$my<6V2Ev`}Mf`d5cLfyB6_7Tv;xJ;NLCw5`N8
zkJoOYLbv12Id>PxgS9KgU)GHQo-VZZfNN2~+Yi3_+p@XY9SRgL>?_O4SVnA}AJSR1
z%Ndunc6shbe(0JA;Mrf%>Fa#EQ+LR#PN_mnAwsu-&l_KVKL||Qh67tEE0>;ibTTom
z)Tn}1-*PMW-YMzRQshO@Wf<&g`AC&U8xb<)S}?xt7nUZhC^T!uX1l1HRCCy8tVOek
zM$^Udd8H^qTR|r#s0WHJQj6F7ykXQt;jw$#+s9%pn+{I36>uzPsb$sNti$_6=XrPP
zYR&30A#iu%csoeCKzSPPeYZZy^t22+kcr2=&eG%a@~|J&kKZ9=IS_@<<u*uO9h`It
zh;0PlHjl=)(^aBv(+T2gJp;@yk3)y_xhy%IEOZM!xMiR&I-9lLhWM&3?mB!9YY3sG
z1X!;+A5^GS`grrZy^58~%-5GYr*m=HKlLrx6r$;WYTk{(ba}X-FE<*DQA789vp)t}
z9q-Ov_{yjz)3w~@4qW)KJehe{b$09&+5Y7WtWqUYh1in9X_-Xqcwaq-Uzx@Krm{z?
z&JMFfc7vHXw(WiiI0tu=up<uvD1Y-uT!-QBh2;l4eE+=~{d=oY35<NT4(1&nu5Ob0
z299>v0!~^L4|F>Dq_6t>s|+$C+mowge>c18+$(2`%ywM6HR50=BlwNMtoF?{$FhYe
zn-Em4DjoRE?RNXc?}u^#uPkP+pSy0Azd~s5(^9+o2}zx@==pa%`H8X-`Axirk+h8=
zN=X%6D~e#3$UOL*7y(|>a6dNR6r8D==(C)MMII~d=LA9#ZL1F8XY}FG0+<HRPqRDB
zC=P{{23mOv48MsYZpyKe*aY50@^6yd$^^SO?u(8?CCgYELvhR2XX+!m2lf&z)X5<*
z^hl)HV{7CgoK*(3RdZgvWqEMW--Xispsj+p*uW|Kkh-Ts+Il*<rZBmN*Sn)Yn9qw6
zddCv-@PI`#bcb|3QKV!F-aie*JINA>P{+PKQO(kC%%6qCYSS~L$3+nr(}hau^~xbD
z?b6SgOvssOw?D8<+Gb(Bq~_WLa9H~3@)SO}m|sbauV=F76STJbbCu1v)m%f?_hA<=
z{c>Aj&U00F4jkNyMo1(5nqx89X)PCXSzIg<mIZa=GH5`=w3iFvT4kIF7*{D3dhySy
z>E%hxo<E%M*h!gvc5XWq=3c5hFT~gDZ(#BfrYWnlU0*AhNW-1ecHw<9xg{F|k*NKT
z$V~*M2gA?kSb`^lNzTQpo3<W#Yh^q+zG*KnOf?pN3vrDJK?{$oGKiLX5$^Dw|Lqeh
zK6s9}*RU12;>E>yp$UaX(|hYd#$;emwaE3rmpy7+TVlDepNT4vSiV7V5P5qo;coSc
zThx`Vnuy^04U7NU<jQicCmCgg<<KJV_WfP6EpMtFqH+U0j6#`fznd=-vOYKZC-yn*
zv??jLIBv1E!R{BugJkn#`vv(c<ArObMb#8yLQ<(?sYEkHAvt=z^8wc$PV}1aP~fjc
zSPLtP*l}Z`AG896AbMWgizNtyb5VPOXz_))uQ<b;whef9hw!s(^2Aj_XVYE80!*|n
zb6!E9m`GltbK%yD4@AG1I>8+(VaAzt1@2H4H7*K8D=5oy;wNo*Jln_X(}acy>*a>>
zFD%lQ+TwvK)~H(LUJbMDJq$%m$JYlW*ohcxKP;1jEOow<Kr)Vh*98<wQjkIY=ctc^
zAesTF=;I+1X?eXLrL>|TTFBgl`9e5ecjx{bx>y>0Ib7mUI}0~JTy`IrS-D0Kdwf_I
z{y~w1I7x@@Vwuh2N{d*YU#J?O{u7VT&MKhcEOHol_`qmCu8tQyXb)8}KT@F7ZyM@R
zDNa0vI2!On0A0K7N4x$Uiu}qexfN3kywU6q(uqs=@=6UJUwQSB9Snhsz><POZ|~dm
zWup_>QK9)0<*l{dt+n!^5esOF6&!4pJpTm-Rexf2no6BGIe|9D7+Cv^eIueA@Az@K
zq1Y_(bsaei9xCzuyz&Y^8X%SM3SJXzuS^E-`TX|)^a^*J`!TYWz!S=hqz<s5<PvcI
zP>|-t{uh{|;R3f3b}PI}$3K!QB0A(%Z{!&}mQX66Dz!7ZKY)wO(IjE>v6xU41wBgK
zI0hHI%RrRCgs@0n&C@gk4`I=lXp9%*x6~w7oi`(tTUA4gen#9?p2Rn=I3A!63{xi-
z4MVepms(pHL~L(%RKK3El?X3o;xSdcd8=%~IC?gClc8z4TsK$&TSlcFaNNn8juK;H
zM0XC$G<1S|OS*Y)O9UV|0BU{2-$_rPb*(k{b;xV%0P-7d*!!*r7w#(w@@M~12MuUr
z9{;{?L6mbAzos`-No&4a$C6UnP=l8@PIzh^#TwxJauLAc7OB2#rr4rVr4#XFp+^aG
zGxk-0TxWc{Koa8f`@hmE5%MCD>=Nfts{hHbfKRf~Fx?QX+o6$O)lNA1Tet4%X6P$>
zE=OQ9_CvS1sqBcr)0B^Hp!S5)J)WHq=K?KoetqYQQ^|FFb$99ZQg-xtb~N+0vpO|t
z&^qv=svVX6`(Y*(tgD~>st{iCy$=BWPShr5$<)FP88cX14w7Cvql5YF#Pn1DHc7I|
zq@iPSrw!9URvR7Oe!g7Y(j48DRF;aKlK`lBBdX!!q=M0kutJ@+cYtjKE6U9DxOJ6L
zfKE+t{6xbZ?Rn+ooNj7e@@Q|qJ8I=jp!L29<Q@6eMP*%(b?;OBr5W+mE{Dyh$bheu
z|FRXuVVHXp1&aS;H@^)?@h#NFpr*_T+=98A*XFJQ`+eJ+ph!;by6V%7xPcr<A_-d{
zADw?~#CVfj?l5z%7ll!;`e05p;|20`p`kxnO)!2#t@vW_QGl0&Jx-T~4sq=1xzOYZ
zcZh-Lt`18pY3$N^xQuvc(w?*{Vjb2MU!gdzX<GKJ*Yot1glQ-Wci?)b9TcvYARd?B
zgAA{(dT2_wUk1>!?MbExWo)141L4138~frQvqZ3X>uPaMVD}kk3G7U{-Vgd?`ZgBp
zIk;VM;%XXk4FSAW-b2&79&O}}&J`gs#3xt{Dpzb5h^JM4{VQQJ0|E6VqZl$XpDyAb
z_;Cge%Fd8cM27A{>OT8)>~Y(Rs$uA9!<yK|BP!Ua2n+Cv-T7}VfVR^14HUvCV7zqW
zsAa-2ufVWEcQ;plTMb$>xUl+2v*@>oYrcM;aEX*9OCMV21^YQWp&ENJ*xHCmyC(@!
zXdZaur3(s9tmHUW^4H;5yHMGGi~b7l4b$>2Q731sswU{>y(htT2{JCgg*al}msVx5
zf~bs5C<LgxYwE3&a))rszujWxvr9kUrWBvG&lVHcFnOoW`)GAwesVtq_d#emO=eBh
zy8G?M@hx0ldpg|WyHLt8-){55D-Hm>hAcry9qEbjEts|G#kh;&RpI;zsj{VS468**
zHT*+H_KH_@Va9-5SjmX^-~P-HoSMw@|Ho_#9yPG<AgC8)>}XJXNVmik8C-w9LvZn!
zT;#xI9^OC0XBuzI(8uV8&9F3z8&jTqB?Hb<4zYb>$Tfs+P*%zzQAq0sXy^=Imaqqr
zC<;<FWFArn-St>DdEuFVO3c*L`qt~P`7J|zl5@DIfvh2PQ9EiIm_4uG<%;r2t7MZ%
zYQf`@=V%@s`5ixa*Dtk&7Q?ceU<WCg@qkq^<+z{xla?XsXKkg!Sk6{Rx5-;19s{7|
zPva2V?%qpH$J#N?QIz?N2hYJjuSud>0j!wXF<j6J&Z4gwF=6ecgMDNQ5^lPC-?Vui
zXA=!l0QbY~@#g1pm7AQ#J}yo0T9g&-kzxcdTzIZQumzBL-WZ+yC-Sa`ZBiE?q80%6
zXeQ;US{UJGMV`Q(<?o<tVSwhmnI@H%`CqWme~Q9yf-U?Xvod%XisT?{6oc?Ki<Tf;
zs2h4!v@*iy12^HjFdO8M(lbh;TqmDkoz0g}glmL`tvq=q!2sp+{dZn9TS>ALK0LKB
zBNeL@^rPjv@<m3w4;5oAwch6QxI9zt?l3dS&!1}5F&a_E$F}XG5M5Z`3t;L-oM`JP
z6FY8xqg!XOhptA3+defa`@BLlAVkX5yJ<P-c5tS8?aDr5>1_LU03k&F2a6nkExZ9c
zLIb?My(#Sgv}<MAeqmiK^==CP+rxqFG~^srHi1ZwroI&Pm_GMcNK<Z#*Cdp|YG{E|
z^sTy|Z&v0}fJkTO{tL|)uuGTyb_&mJTt55s^mv~QP29E}@uh}pv+w2=+Z>QhvZ5bJ
zmHb%|)GYg#KZiW%D{Bh}98($uk}K+f4PX1JLU&q~JOwb`rE11-(HZLQB~!=B)o3Im
zhpbH+<Y@;r(2H35xv#<Wo6_=t8!Gt;2t76yD=L$#=>>i(>x+TQrHGfV+jO&I)JVEr
z2fNk8AO%^$#6r`FroWD36a1O@lRgZ2q`8Sn)&O|kfLp0)+0XrvxAIp;{wtJ!i7=7n
z2NPAPKOlb5A<V_hvr3%-UmbIXxh+gxC^9_1NGCzSd_T;>4)&;*S*kVK{+#1Qeme^M
z(m9S@me|Ah2NmtcK(W(~45Xhd_P_ft;{bkpWSqo~OI|FUDp$~PvI72Kln|kT$7e%b
zuy0KK{!kjEke{w(b4#3&vEJ>db(8N-TLZL|anNAW{stW0BiKGvv0Jx9$?BS*@~*IO
z>7zgy=*6xy)FxX%1BPt2d{uhGy=ha$OJq>aLbcYbagWJ|XrT9ah-+Hvest(E+GNB)
zm+sLdu(_EPP#s-S2T9|r20Oqt?h7#=BU4Ris~+WOb{zBe&TTsN84+Y-K0(DKu!?!h
z+`SjzfR0`uy2*L;LJ&0XVsK&&FPWYrC#<3`FOqJWBfQvgm{7$N&BK(dn&8#IncP;Y
zI)ml+U~wf-8~v*Q{nuLcq`Ucf@&W~KGLS%a0xqFc@Z-Hun+0-}Mli=Y*;6@WRe2YW
ztv(mwVeCK(nDaD{PO|+*qR(YkJ3QP$Xv!znDItEnVKS4)1~c}n1U)p6M=!5kGdA7o
zyxIJahWjtWOA~<rB>#78)*`FVll5(F&Ps>p%wr3==lf5);mi}DM@ibifl2K^^u_jq
zxw<D$g>Vo%_Oa3?jDk?3XiK!ZA1aYGX%FIky58_6XPgd`_S0J$hB%SMw<kV<MxIa^
z>+$>ro2cgygRY$t8s;m|<JVnZ$O0Og8aoHQCrO<zb;2%WVju5%1t$^4Z_pEdn?Qd{
zr{Y;!Tjek6>8{O@9WGd%{o3;89by$dJRfQw`r_n8xVO}qlV|oRgQhJ_*_6}Q9LsS)
zDxPSu^;aU}bfTCBJIFfbfiLt2;@|IdYUG3}fPz^y_}q(Yjt?7o0+wk0wE+Apx70-9
zt@2Af_%$QcqwPxkkL6?+N9yQHgVR#Wx5p=z{34OGuXl&2w`0tJB@)66{b#<@IAv8y
zIggb!z`$5_sqw?q4?$aA<I%>_4^Deh|FcQ|$|d?QeS(PD61>=b6)KajaK>=?H8bZ@
zqC=rb2jeu6_(@@=%p>FZI?f(kgCX>^T+JJkRI5Z8`SI<GsQz5QA(!n>=D8M4cuQ!;
zf~R{Bw$}Cj`J7KZQ-fr}9IvX$Z4c>bV}C8yl}7W`M7fPn_S18ah>i;7WcHUZsQ*(u
z`v=DR3JcjrS<Q)S$~15mJ_63d)3>NZ)7V3`?0;$t{M6*3kz$dk-EQmqEWj&0U+>qI
zA;BK~1&PhFn<Gz>kv@6e64yDao>iD}d}csbHT{1+;z$5epnc-}$*7ve5nh+o*)kKJ
z0)isW4E%sAl%_ee&Nr2QbjL^5doDV6)gE5mCHZO?DNiFjAd465<0%L4<GHVaxeXRQ
zqm9<%Td8xcr>1i`5{(&0JraGSzJrnRf;CRf&qvKO77$=q596m)^{4;%7m}GVhSG@B
z<eEC6dFmK$N?Spj)q(kn1i9+S`J?btZI*^KAUHDyajJCHbS_)BH;6@NMjOiO84u5F
ziR;|zxM8`i2=&3{tZ!jUHWYVNDfxwW*5qtUa~9W(U`DH%rBL=*+OWN@D#ocUjrLFc
z_!~mrA($H!oZU2k(bfjPbHs?=kKgGz5jl$N{?xP?t&-CJ<q!W^L5rpa-%79^{PiEO
z?2kMC^Zoxbzbax)elc>QXDOpR6NCM`ybBeR`#;CdAKzrh!Q2wm`I1K}Yf4Z|W=rpu
zX#C-7{$>pLKVi}kn#?^b)ar$#J!f?qey{yk1B<XCK5^Y~d)7W#FWYT*%vonv%$C$R
z?0y36^eUXTH?B1T%*Jchj&Ew7(G~~OI4!@7%`xk**>)~lGDp{i1LW#%Kow@KC)Ks}
zUsvh6orbxU<6q$s4cavFc8?lBLanOE6I^8L)+X&!iAqHcrXw^ZPLBJd(b1*$q~4~#
zlfI0Niz_Zm&iX2g^Z*_()8UumKS>VYbA)%`N~vS>Vxm7>L=h_(pl}wHI#e9It!=kk
zA1|@x!mMc2v-j2;Po)c_gTirk(iLyj>uKUEKoTbwf<*S_n%jI~{!svMyDx6&r3$oO
zSj1brSD;oaT>=+WPBRk58_izNeKpz+jo^tpIvG<syU++cQl1}2GQQsagarBmY^jOu
z)5ZYY>u7vi5Fp^h50@T;!U0^CwubYz%PjAU@zA+!gFta>jMWZ5ug1jFkLqyNNWavQ
zlx5vLPg-48x~Z2d-RhS*;7?7wLDu@QLKi`qu1k86n&n?KU7TE{%lq;q|DZI8tWs4?
zm0=+~B_RfHn%=l@@FQDLZ9MP^v@z4dLWz3wYCo$Ytzu~s>vA546|w+CLe#S-DnY$g
zHv^{Sl=Rs!Yy`SHtb|U?i4bXC>N#4enp@jf;N4fnft9^R+2Wr0XKKd<tuNLgnXIoZ
zL{g;q4_7qH_JyZL{3hB?>ptFAYqr1>;n$8;ruIEjrUkJ3?tC$7J>%_*5qv1p@B&10
zRSn1yEZ?ZFnXA8mYDzd(tZRC+Pu~m|M?@XG1NSxObfRebhq}aFY-Mm2%G6cRp|oDB
zR;P@B(7Gn)w0Zr^WnoQf8Ke=k%|!6=H;n-cRciOYVoVI`s=P&9yNy+Z{n=QAFEaX{
z()}Jw>p;}gO9te7`uWA3Lnm-;<5u|N59UsfgL&+XMPauDwGu&<zX=haB1C|DV}>^t
z7*%2>=g_y;<;;3=TwWSS(Z6Jpnd-0R=}&?q%&@K&?z-AfV$#oR-bW^DfZ3IX_Ky2c
zA*(pvZ%%SMhRn2dbil9D>Q$Fm)|qx+|6IL;uI1Z%6Ma*pKJgl?zS4eugQqum59D+#
zt=AY+?Od`SxW>Od-6@`PKb3dG(HSYZ-z$YJ%=v{D*ic#HO#Pcc@Sv8yn9odYVLu=m
zzHzWVf@xgF@P|c<z#LKe_nU54u>eWyUUG{Nzo(x<mxuAKw`AybJeC$*s|p>bPP7FM
zFyKnIQl1$xjOtIzo__a=UR?SpQ~n0*PW_Ky=1!Ul$~CGf$hYPQ&#KVZo{Q*Cfc98r
zcs_suMvbO0$!J2&PC2fg^Mxo!(dyNA<;&mqsK!{V%fCv8VVz}!*OXK0;ahv6txS+K
zpR;;C@BVV3&?R)8XS~yRew<ystrU1oJ7vFz@O<i~HtK|SI?$C`WSn0&9Z9;>j8g46
zjvOboQGePd06A=_yX=RxT7#lpYmg_NT6LV(?9%VuZ`3!+KM{Lzc`wDzRVxqbL-!lY
zitwAUNfyBTCq?(yNijlSW{+7x%};V0R2~35Db^S{)_WZZve;);<wrWe&8XR0DIe=)
zRP2Df97>6fx;s&AJ@WT2|4Ls#!VGxd7vC&9<&|iht+;KBhZ;sQXj!ca<GzfP>6Iz(
zqp5POCBz^AZh$8xWM@<sb8W@ZKJ_c6FV~J%g#&v-)b0j_bfS<7J$0N3N8*cf5lXgw
zxaw!VS2heDDmA*{^D}}EL(>P5?ufGlH^Xl|H!4C;H(%Fm!8vB8AbY&AA9%Vp+`8;N
zOilMIbRbWgcHsu)z2W0f^<e7`Ro|1`R72nP>nDu`%LJkF2xP~rBjn2E*oE-g?Tw?>
zuRZz6(Zc>mGIlR6NPkpZe|K>%mInCW9waKle!wiE^-i4d*keCe%p{}yd-j*6=ptrE
zUj-JOZoBVl_J*LI{-}PfMP~Z4yYUJu47Xkq{W>Ab`ZT{-x|YKGdiSspZ*O6hEEx!Q
zV|{YKZNx7C7A_9VpF;M;ZzxZlp(|-{a2@GnYF(>nI-4p9#-9tUMCZRBf-ZmfTKclH
zyS3=7zJ^ijjLcqVJyVpU<1n^<T8}5Sbf?^DrYNiH^P<YXmp{CY=wJXMZK;gzwmf3P
zWE#@GZ7e<65R9p^rSc}!db_0&FD?Zh1i41V8Gl#=*&C>d)OvG+1~gr+Uqu~@_x`3b
zJ*L_=a}dm22f<FI6Wky}TXv$JpVGBVZXTtlm8Mu|Y#{b5x=TUC&6?pMx&*#FpVE%v
z5C)SvP4trnO=~u{466nXV>YpNzOLpqPMksNv9ztl{*EfG0A}@s=f@-GTKOVZ_3!)^
zRrnVn$yUc%l^vtTR=72lPhlNj=P<peQ15zFu192r2}4Z2Or-UZ6^*T3%#Y657&lP$
zP~AuCRhF;2r-1sv`pCVCgSlgo%~y=r)VJFXQLs&4cwe{2?3+JwIjU3LsX3hXMD<!7
z^*LN|0;n2A=P49vpcI5(%F0)r8JDd_<gg;F)^ogjoKLt-(l(Muz4AXMPz^{RQ8x;*
z^n7Xf(%TjTnh<(e8RvcQ0<8}fT1O|9@=phl^fquNe3F=*J1_9El}qQ6Tj<7Ci9Iir
zjcor^e)rg3lFEYq5;Ey&>uavl>Kfh8{G|#(kLr!DdPt-Y_PH%=Kh)=^#zz9ivgfo@
zt6x3Ax12tfreNo8)s8wR7ym&i^>+oMpwF234`k;~0kJ;?;br|uSVP)0Wd-}9@rpHA
zfbdG%6BE4cB<k=h%o7={eq`gCG#t2%DNHm`TORH-GS6ZeP(|&-%};9{4;P7ivz)3E
z<)HB43!k}fuR3rx&IaX5kZ`k1Y#`v;mi!lG5s>(2IDWE4X8a!2jETZba29j}c>6vA
zgjYkq#h5jRt<x41dunnyWg@)0fdJLuMaL!~ce16!`hDIj<IGS6HbUg(*0??M1GXkC
zUCDc)aVp(ArK(r@o+4C(d7_7x&jv0_5VeIErg>w0y_de!TOkxSnARA=aQmU?veg=K
za;uuUOy1!QFt>O>Nw0%y#O1a-oMY^FG`{Fn9gvr+YH4F#_KZPe;tQR2-uw%LxI|zj
zC_w|gDU^Yf%*aa@xTT@w?&O>fv8|{Mv2^ZOz#v+frC)+f4)urS0Mr6Rt@54|%6wte
zTmN%D(WSaO#H*5TNVT5vW{x}V(yH0}FgHSoyL!Rd$_BtX>XIXDlMX)tAtGa!X5^@O
z${OB*-4P?EyEoH$Hu%8uAuQ@SYUvOoYX_-XWK3VU$80QIw*3vDel$|8LKjHj8Xm+~
zXUy;FuYTY;q6%m6A-dP<`1|o&yPFv$m`~AN>i6Ui;7zIeNZgLH>QuMSQhw@f{=y|q
z<u|h#wK&fOGwF`yOJlsOmrS*+zMi+=_*h+L(?L<kuG}@*Uk)I;`e{3h&0?PiIP#fJ
z&B-DS*OiO9196Owua8KuGcjHmaV1*Hr#Z<-J%5!;af9}bZA$~V&eoZ4L88m33k7s1
zpyKH=&05pBe2avVDktjYld5_pT1THuBQPdw$+TZ9=F-1FJN!l`^v>Ud{M4B0kI_gG
zCk2I*DF0-(fsqIFA!NX)OEH;4YBg&Jp0;8?0Q&S(w&4(c&D_{%L%x)vIOUDpu<8e|
z?9mWkKs11^YO-t9`*Olyjw-cczGF`?l?iLj4)GYOsjv_m;CiVObb64?rStX}*80dS
z*(B38K+0P$-y6l<XRLu&f`@oiwa3K(wved7Gv~$AUt$DOW9DJn%KL47Pz+(Pi`x4z
zK$Wfv!)TO9%i|;ybd{$?^`g>v(&fe~ej9231J?oIIsTPkA!ho`JbL#T$@g_kg)g>Y
z%v#3$xHl(47|-F`reS)>k7Xmw4=L8Gj;As+>{Xp~Y^3S#=cUFrXDw}0KS6reBfcFK
z+875RFBbkCeFe1qI8tj^@*n1FbKzLJ?tO^vdV~%};uV=H2F@@!Zl&k-N%Ts!H}Z|r
zf&KP8S1;zMri34aKP=sK$G3UC7gGDYM!gm^1hm#Zhct7gv~A*-^6}iv;7`A`GgzZM
z`pq#D)zRFi3!kSP=A}w<t+@N#;05e2M$(u<T;|Po$P&P_n&=<)P4~Y)5fSv*dc;X_
zg!I_XH}+3n9M{1~UDvAS>jMez@{BgfH^=ykNJn7!Q(&b;uZQuT&vY);H)>m3FROys
zi@uyLHCUpCKCOJ+M7c-pWS!=FX{GLfyX<o2J|G5VCK#u8x1!c0ObFzBisipaF;2*+
zV7IoE5AMeTlQ?P4aYod^5N1;wcylz%{(>tP$5NpyI2vWodmJ!*`dDgHhra$i1ct37
z%Qp_2slJ2fV;TFhY(fn-(@RXvkEAYxW3FZyYKQ@d8=$0r=ez&*R@n<M7tY5v3=vP(
z>H;ZAD#_?oaaHgyc7U+$y}-+Bx6f^xr3;MGuP{CE=n>~gVarj)McPj8DDn*kc5QNZ
z1;K^a0O@5|i$;+ZS0)?0T2i|g+kAJlOJ;H*2hk4v*~M9O=d|Fu$S#A~?WB+Oy+FVD
z>~U1gQ5Q!0U*_{Oo2IR(ehGNU480gW{*geU2N+&g0;1iX5VbQpyW|}~D7-tgF0{8q
z#98h<!ss{41*|UPT?LdG8KLBWJ4~=51Cm#OkN}+b1KLT0*oqd@IB{*crx553>ZrL@
zuFiV3pt(^8XxN~XF8~z0@q%y$eXAskR^lZwEz`Sc7edkUnXTANPT#M2o4W3#5Q*Jm
ziU4L6EMPMq@c*&*)?aOJ+xl>EcZyqq5-9HOTAWgW;sh-kTtjgv5TLkQk>Xa|wRnIQ
zcMI+gg*V;j?0Zl5x!?cb9fKbdMj)THrmSbq`OG!7wb|C}AyrY|N|X_JOD$)0x9h{h
z+EK6~9rkT6^CL7w3R*SN#ZSG+y)<mt_!zKfD?TTUP%zkgyA0+CEk%$~6(6!#3Oxa+
z97}Y3xQgIC7NyUN+#*X-{7c8QmIItjMaQ=w*9)`Q&J3{oWaG?R8bEi|_j9_g!oI*s
zn(B;Zp4R%$FFrd_S?Cq&_e;H^c4a|)E^WLT5bU;v;a61y&r1r-#EcbxNy=>Xo`Hvl
z*A!q0`xa_X#7Asp_A_vrrA^slW2#mt#KsggU`x8%+J^Wi`Ue}$B^%crTgeA524<0G
zEs7Pc`<Z%28t7++s+}BEmq~H(xf;EO23MnLN3-Jfiw-{K@E$3{^fT50uXDrx0*$-z
zr+GkR<SzCxdCi#Wf5yE`T=*KL-HoX&Ttq5Q0DGj4#=h_Ue%gE@<dIp(b1cma5<6b;
z$)!8g75epdWoC>Lseg~@n#WPBi&L)Ys0DOLwYp3v;YiRkYwj%9Mm{a9Dx5bWAit#8
zeJS^&{ewwgJ~b|hBeD?&Q(^bb%tCN8!^EZk<gKbZwWAN2=OYS;tAa^`?#NE7&u}{q
z4*4vD+cxaK9Dx>>gByW$u4C>-xn;W1YSRMkz`BErp`l>^1Cf>JD9fMb4VESaUbee2
za9H$4rBbMY|C;I>)%tWZ*ScC@-4X`mi0_!q(R1Asa?N%1l8yW{Ej%uo%dc}fi`g;L
zA+)9K{&`(ae=3A~o_U=9?G*k@WFI}H09r8VmjMsRz=h#pu){Oq)unlQiN~xxgXEKm
z5i=8}ByaMAQ7`aIe%zWe?Q&qpkS6SR?vRAj(6*4ux8T4Vx>PrGsN;smk}<U#v>a<4
z{w~coE~ZNuHoY?|PSR#Iz82)+-bqdA@TLX$J5I$KyyKp)3n|lyw>$9fC2spL?BH)P
zTWwxN%~@^;$4uNDB5K#Rxmd*_Jogt#3bRx54sV{ijg?)^?!YolEsZ=v7H^Kr+<IM;
zLJX5vIjGv|Mm5@QZquL@re`V@mAlBto^1c%@G`q1np<eEgzfIX)9ASo$+O1lCndGr
zrczUiuzTK5*Um}sh<$*`KKnyhmHGq)#=Id;a&YSRgDAzg7ZogOgE;T%bJ`;ASr{3u
zmYdWCd{Q8JOlXe$FB;FB=kxidBq<|Qh?>LQ=dl_)2PcUooAY0H&_F{jn=PEND`EHu
zLDa8vpHMz`<3BE`dmQM#5RF`Cg%)(?z3>Yi9OE1IeU*TF(;5`<A%d3~_^ziF5}dRt
zMBBYrEO$Z_gyU}-r)B{sNoSEdZ|X6t7r|DU_u~95I4u-@E{o7!RkLFJO5R<<qzav#
z`Z{y=)utXokN?zP^6`3dROpbiP8{sLvBH3-nsSiAa99OzykhL!+Kz)%u6CE#N<QjS
zV8L=bpTol`#Vi-IgvDMMUUcj3Cy=@a_+V2|hjxDgqglH@f8;XRMz1@b(p)c%--cq{
z;{+wCm&8#2rpEG@X(6pQg#GHc)Tq7f*FiPb0e$5q$ChK2)r>Id>_h0GO~&-$Ms)}4
z=xz>Y{~H5KC!T|5YqxWMz)U|aV*xB+gg?p9ra#?7j%jv400%(~o8h0*<3~JB{??_v
z>3f3aZ6q{)D^)K=y*ylysWjBT{N8Jl-WEx}9j(`TC)_AQcItC!UqN;6+7Rugg=C-|
z-b~chTo=K1v-f~a+^(3a*IYIP;}^jgZ%GqFMZl$b5RkXzvS1Bwdc1dgTJZgfwNI(U
zy)zFubknRxIr>#`5P4q$PavO~E^Tv@gfb8~WP${%>kJ=rU%pSbrL{=+o-~eZ3%lpo
zG#xY60Dli{fA0KYA+bF{c5Jm>e=gSJfWB)gc!ZUMt(f6-xW6TtXjuj%BTV^m4^68|
zP9k=3C}ly<O1Ah$q(meZXHL61D~jMUrMegenWL9}0JhH5^ZxW~SXaR92c_Hc0bWlz
zXXDmU-OI<QgFZjV$G8WeUbRPf>ur}3TlIYk^mOrZ?s;9e>CQyH+m+wpx7iwtc?&)J
z_vYOh0+Nq>L%4IlDraPdl8D;s7B822FX#Mzjs^X)>3QQeALTv=9};J8V_Z0r>Jdz0
z6k5js=6hHxc`!_P#{meovPtI2wck4_qyY{ka@4e<(OUfyScGmpN-QU`>)DeU2oJ;K
zq<dtu`NM{92?-fCg*46uu6^YN79(l1r0ea~-O=J?URLn?)nFN>X!~A8i+=rqQ`yfR
zcmnu7EPTHfY4pWi7z#NwIM59%K8HJjZ}YSby$`N87jB!9<>3eMy6`J;`pQ#?)Fnqz
z4|pmE-vSzmiuN-W&a`m<k=76<WbU`e6-v957g+dN{e1LBx#~nnz2m}5*di;2%JWjX
zV-Lgs+X~`AF0f1m1+2%U=KMA^uE~`mEi!LhgH<ck1+ixLv)v-rc+#dPS=ww}y-4Gm
zXCAskweL<oH1}B)nsBc)Em^$FJG@%gU1}z;ALKc!N$sM0Gqg7Rm^2syGUl&9Y$p%$
zAH2gTY#gB5pO9kWt(pleJs{W$Ik3ryeVi+ryld%m$ZGinZpKJ|M`=~}(Zo>)yVFFh
zyzoq9IiP8SkCR^9(|n{}GhIhx*l)&|B&ugV<Dx}R`RZ*skd7jEP#(F9pkaSbWrE`2
zV&d$C;W%6!+%ES!>kAutv?<f8FDCINhEKMK=HXSdEUSJy6mQbYQ@|mzrQowNzDMxd
zO*ew|kT_ot*7T`yl_)a)TfjEDx+xh>m*H+yB-RB!yeJU>>f;xU{H8xLX$^dx?*v3`
z?XY2oDZMlNldYD5>*W*`r5#v4kG>ZpZbwA!eTFRk!Gf5i;D%&pIKZo)vO57diaHec
zI3ISAgRXG;-lbP9>-NnVOc%d>Yx=^#Toz6Nn$^^%_#>|#mi#1LdvS%>i+Xg>D5svY
zhkhjUde!?K_F8?9Cxd&&9)UTZJk0|q#u)z@eSu>0_w4~Lh~f-buc!DetH^v4teJD@
zj86dWhxr{}B6yI*YfKu2dl^Nqk2h3s1X1qD=tn2p2}D*vm6bnVRu^!if51<k%&XgN
z<&_RW4?TEQsdC<tU#UTaa;5&WWW*FsUw+;TR<ST{k!W7&hjn_s*soMD8RM0VZ|B90
zJxVoM0tZw(r;^4>ylX>Q4e}B|)hi8^XZX&!=ULy<vqf6_ypS6oM+=^X?R^IQ>fGNE
z(CATfc-xA!ZXmZOP!{!m_j7TX$sUclUgOHQCDgqTc9GSm76ajZzUL`r<4#e<%eN-W
zS~|-I$6n2?cr!@t41BY5g*xdVPp6Y2I4)Yqhj`M#>osChZxa8ogcuBjmfb=eD)XZb
zmUEu;et_k=*ytCrx!jhoIvv}et(#ueD>k=1=|>fnzZ=X9;1h*!T@B}%@b8_2^Tf&S
zc4(W4Mwh)~w;bc$_4I1Cge2K*X=+Obus_eg+(O?=pvv_5MIqE#$NLs!MVrtP6nXfO
zes5-v_(uJyt&4KZyS+CL0wZVLBaR;exit=wjAQ$;Ub7W?a?`g3ube;SDxgL)(W+UT
z9d6F|aM%<<?c#~c_+2^Ki0BBo!LohsHg=8ShqXN!Em<#qHBT-1nQXpc*Rr4zk#L1p
z^q<9Jdcu_w@XE{wZjAO;&9Go{fv1~l{zNZ+F~rDkd{!H`BFz{`GmER6v9A6Nym_*K
zyz`e9a~eohyhyin3&fUdSjQ)u9`brjo<C4g%Ug{fp4VnI!^;vZqscg1Va%kKD-rdH
zz+nLj8Y7QRPFiDXX6Xi<Zh6Z2QCnUrifmkmvnG5*0pXDdf)jS+!io;pyrSgXMct8q
zuPxWK*yndBL#KzRXJ6Ck%WM5<(Ra`Ha(-dibk<QTq+P@1w#sroCP#D@m9jj`?oyeQ
z8(v<N!+t&cX91%Cc?Ek85mJ2ufUSlOY5&NlGRNIKlK4`|(#Tx7Rx<O#sP8~qRA?nj
zra2@&?cyq&V<($xH?zKN*xWYpy^+5I3hEQ1_8*g?_!B>C61lHA52G^%Cl_cPQ}K}U
z@Z5Or#)n=co%tuUGiW0Kda@5#hqA)orTkhMAreLF4{!(ElfC6+!^i2nQTn2{^=;tH
zQ~iptPiNEzs}tkG2bT&B_^`}}nF$X&8yEC;b~EfUVjR?gbOx=&f2Ff-n4s`{!_3hL
zU#G(Zw&<OysMF*6el2jcn)Kl=*TC_l(QTha5!Q@3D2W8;h7CMPRQ;m|la3boSn=ld
z#{BwA-fOuMI6=)d1d3GLZ$=yN6ym)?c1Mn42+?1uDC1zha*^9N5HIj}W_&@Ue8kUV
z?2kIS!5!v@d7G6q@|M$3jh9pSFNJp|PYXU|jhtqJ&Xi+G-jeOP$Z?`n3@lx{T$H}V
zZaFv=?I&A5dVhkp=de~XbZ^_?NE>v7+dTPe%^9QH=alf28o59=M&p!qWIubbce+YP
z8Yt-mLahg_GERH5ylA`G3i4`uQp^C-K!zZ>sM|!wUYcw(w3luz>K~T5Tnzpdle=8J
zYWc1E!~wsnEBoKq@#;lsntEx#TN2HCvGe=1@33p&@e^^bM?x$Df)9!EA6Z-}c+=bp
zUJa=hSo@x)kKHk(Y^13}dhUq^zk@$RhLxgJi7=0KQW<t{v~>50r@#a)ycZdLP?t6H
z9eGwk-vvX11ff9Ds~z;+Gt3V)*y_FO24%<bz5w^p^(+fNf9D!&-kRuRaQ4ACiCB;k
zi<O?G2Z7>WY$xS@+uh8u-?~N8RFRlhF@7|b?U41+9=mMU9fH)e7%5_)X1FAws58j@
zCXUQw{;5k_>1mt8{LH=q*REvC=~H0U@>J)AWXvJUzDR=y-C<U?STznn<-8jgDy|is
zEgZFqmTgq`(j?CA>8pZ>HN#H<4$p*4%7u}*M1ars#fQW7a*u#lSam;YJ1(R>u9kyB
zEC3$<IT{T@OB0k^qcthbPL1{Yjb9<NKUh9-#}uo7BKj980)Z25ULMxlfAYjXNg0#w
zr~3nMjPF5hXjBE0FNT@AJcZG)@bNmxFEP`<h^|~q|9C?g^zOJLwt@O|348O`N2kJs
zv=JHc>%bqAfX<w#h7+x%G!?KhH_?uUBK4!Li2%~YFWk}IBf~SX)hNYN%VC~*Dpx;T
zY$JtvaOgq5<{@$Fr#&~TE8p*h8;o$3EaCi1B}+PIFe-mUa%w3~>!G+vJlv51%|!YU
z5FJwN+)%oR7!Aa22m^ycQ1tgB?|cwQj62t<lUw*ZZ0&<*89lZv*O!DSoX7Da3rmnb
z!-!QAzZ=6DQB*JoWV62B6zXgvoX%D%<GA`2{iA&5Ymxw>8THIidDX~3yzwJo&e`Be
zn%s=NP(97oyC)K&o>UznSvaH}<#THScP}PA1(Up4ho5{eP#+eY=Eg6uul^ga6LwgZ
z{4;YyjK1<qJc@EvF?E&91`lu+>Zsp2{f3O(ypCybkqk0x?Z^SvO@39wiE?4reOun4
z{pi@@Cj_Ugwl9P*fc0uzP3DP=zy+!yM7JAV4|UN--(-T#^{#x*%{hQIO1^JU(VXxi
zqc3tMlahT~3ukj*uT1`|LFG~UxuCO<VS(eFYWH0j?dm~@&GA>dfEG2+_Y1>f79ZU)
zLkmjdoC2c0E}`K<edMbIZi`=z8vI>4H_+)P{@uLk?#_+%0lP6&nbsNlxBC4bhZGeF
z&}CQlEknc%EzCnb?4l3+g87T4p_E_XOsSQ!qX$c)QLiz91F;!_KoV`kH26(kcL%KV
zId=$k_RSB%Z#G-4IcKj^9GEt$e0C->>#2CQ=Ktc#zz^>aO2mJ1I{!Gy=@5e%;2-L$
z;&}V=eL4%t%>;YB^v+u~{=b#^5B&5uFF{25i35phJbAlD)$IZU+t;VaBoE=F$}(Gz
z-2LGDUukfX%fLi%)hUc4Gr`$_<bJirztvE8Jy)PIgt$C|m;X!NYqkIDAOFoNdbyHr
z>2`z5zV}{Kukh_peni$pc1pgrm2r=uQpTqUE$XlTQF#78R|v1kz?Vd9o$xPK^#A#p
zzkdmbIpjD<|L<q|w`mhF)A^&Ku&K!ZadiF<XXWn~j08H3ro9HNWwt~Pj=H|y&YMnp
zJ2;b`BhNq5`W=Oj?!T@WZquupD0o`)-29?+Z1Cw6s_PAf>y2<yQ+$H%RRXJ>odb<u
zB75pa>r~@!?;Z5~m<0RrBZt2|wLt8?Ts8?&1DV~rxO8g~Nqx<aRHnoHDTd<czt|yv
zKg9pgYUJdkD)=MNzf^yHgZx&}hC<rYPYx9Aq98GnO-SH~{9!KHD(urbLUi)6{wp%X
zHAAw|7xI7fxctwFfT^Aht>B->VRsQF=v%-48pFOs=62U>C)s<_{%rChbkPtrU*YJ(
zW+svn4!>ip$*vaTlsVKM-b?01b*^P)m&88au>+-5Day&`x%#oeU6mVjTf^~2eqryG
zLrK{`6!YJ2V*rRQ>YRZSTH?ZiJoeFrCQV<-zcdSH+Ot63S;VLpxTbZ$tRzS+t}zSk
zoTWNX+Rz7Eiq5jX8E_~f%}#AXaa4+`et+8!?oRNg%|lUAf@b^FWpS9d-I2*RjsI(%
z*`tI-?^Y7e>cw=JjDA+*)}ru5?jtZ1<hTWvqu?+gne_1K<9ms!22j~G5QU4PTF7L5
z=jMD~af2*$*`T@lrA_o}Z@=A|-Ma56wQN59?=SD=)_MVyy7))3@PFU>e}1O9-J9~#
zs&QVt10{}KCT1Iz@<9U0=%A~xu93(DoiBqgr1KWUklCKbA&q=85{tIi#8j(-e8Uov
zs<xu_+7C0nS*ZvnW7Di3h^ijchK+MK*CoTnlF(Sl0ST8muNeH_FZ)}U|F&id`D|ze
zZ_t@rI)27algv_>%&;jld_x%y?ngovb9T(M$r=o>v_TJgO~?d26(rI$gSnUZIv)#;
zuRh;Oi%K5_Ou3Oqp<V``(y;f4Ct-}eO7`AeOaA^P+Klzj+RoqB1{bYsfD|{0W#30O
z_jbPEL$UxKBWne{7k;84u1mt;o@bg3$FklWh*iTf=<cIRavdrk*P}&WT6iM48WJPI
zchpLiIREZV(f_URaLtd$^*3c3J>GFW{Fq~_>xq7!)jr<29K{j{AKekbHsaWZ!I$8;
z8j2-yujT%*B=;;Rg4d<xH}p}Cma@zAR*tUw!1(`sc6xlJkCMhlRyW8fP6&bK^&c;T
zz(^Ny59k{<!`F9woHFe}PUFE5{634b{ZP7DRD#-d#3=6!tr+(1d0F>>Q~uXB(l?^~
zk}}Kwr&qjNvcH1Nk6kTnhrBeW5+aoKi&jg1ehwC8E&!l;l{Y%})>UyBWcy^y<3R<p
z_0!i}U+9v?vU#JNPc}T$J;E_>qt0P_)ZJEM?#(x8_aVXhdS8Ktizd~G-{*C5;rJB#
zF5A|!y0SuNREl}XL{B&LQF?$!d!SCEWwW8qysJA+V%XE2?As^G@}5|`t=P3e*v&LF
zSnPzrGHC(L&bS#b9xKI01}4)Ua0*U-G9oARwmuX$q}qW}X0{NR!<!?0ib6jT)tp_M
zPi*+YIeO2gn&mRtm|95kICGJF4rdSq&(^YU??F*C4`nuzE%ceXxCNdx5xGz^S6E6(
zer$V^frLXiTBsjZSSe8hwCsEwPqbzpxY9DWx_E46_`cYjPAY7Pf&uGzrdjtqtx;Hb
z89v>ILuLA(-a_*oMrm}2_6`!(>A}yZ0yoC7(PB2Te$Yn{p$jXsulISN&@zYmb9zmE
z#G@P!k=yMe33~ZA9107$4NkGfKxR^u<U~(o48P?-f!886+5`4sr5`6e$8LoHjFZ)D
ziIR9;WIr@Qo<S3O*RxJ+3e?_;U_1EVz7$8!#5C89$l;((0%WYW%R~nk5W+rM%*Lx_
zFUzC=x?2v`<fLD!cgx2DvTH5x4DNIQQ$S1?hN5@4{U{lQOHNeU1wf;a1pT`nJ$0w~
zuH$Pe^2!lEsrn0xE$1ak`~EibRFybe{hLUYgrpkhUkZw6u9{Q+>l2$}J~M#YJ4q{T
zE3#Pvg}ChDXy5NXMN~)SW674)6Be>oI;+XKfj$R(80plK*)1}#$8xAj&XsIM01cu6
z7Dce*l5yH6;j4x=3-M3FOy6&jx2cMj8&-tYY0Lcrt0g@~s+`IBGJb@1?m(BbRoAJV
zsel_}>%LI_9PlX}qHw6={_?tI46&P7x%HUOKphrehH8j^bA7|{8bE7ggyYD^$=!UL
z`M-=v^X127lN;))=#M>ZV~Qg(J|%diiRexbg`AEOn75+mZ;IrQl><7S7NKOgx?nFS
z<|@!WgT$dy+aF}Rd_G~Z58<|RF3Q>ul@cMezWSVn&@~m>hLo>8Aq-3IYi5=y{1yT9
z?3al%CqpFbcUfv#;O|bKu->j0l2wGWw}*fk)<<QbSE&p3!4sV@fW3+T4R*jprkyc(
zI7{(^@Q-jooGeTjJ-%J?DCj)1hC2OqlQg9)ZQw+v`NR_UdZTlEHBEQUSH@zAlf+d!
zy#lfCzIqN8FJT!VamvBV&j)c(Gtg$?(9mq`>K2qmlV|-xrU*F5wbkpGC`^&6zFoUT
zw&}ak_u>q=thv|#6dP+DbmGxX*G0QjJ>%g^5NcKtQ)piC;fddgONc-Ym2pmhJ5zll
zQfF1iQF0Vl_`TL+pgx9K=kx|pCqo6fhWEp2g7RC%jd{(aOkk9=|4`b&CB%e7$pJ+z
z@fZHeX>tP9j`J_XE`9RSJr)UprR;BFmu!Wxf;?cF&4ppMvw_9JTg>0|v;X)h>P6no
zv%7~7eEHU}L(U%s7niu;as;oBhd>c$SBT(1k$DOwEKpwJbhyPlKVePzbtv$VP2-zK
zd)<lt-fQGf${*ErV8r)4*-Y&C>iyhkXYXjzU?6ogs>!26jH^t;2rQAu?QGFyN&-n3
z4mY)O;j;utKYR#hj-G6@RI@Z*(1h;(t}>dTV$nOdj^3XjUsT0<kMlslLho4v;0G&?
zW9QG;TO)2&BhfNwQu8vueX0bSD;8bpn6;)P2x8;}G)M+aisMjYE)sgt$3rv#OMJDi
z);bkP+2T11ASdp4eS`Zwp+cg>`ywA@Fj*~aEvbti^Iei1(ATQlQ8K(*qvghXK9L2l
z&J`}*OlNSV`p1coU!9BiVA|aB``4b&z6y&vnS!a3RC&q02;;FB#j{0acY+F{YQ)K4
z($z@x-xp^S*L_5p`whrzInSRJQR=ZJ0S{<t>_X2f*GLSyI7anp$F32ngfIJLgzcN8
z^`r-~u5k8x1*ot_<UfhSd(z7*BKVnRu8w3DeFv(}$<LX$y`ZDevg{=3#*)4$#Y?;~
zavH`cg3imv7UA+2+36~tM-@2KPg<!5K?5F_>}Lmf7lR)fjv+&@{gecQ4rG}7(P4ny
z66k9uMEaDFI?h|N&)V_M&JvAxuZ^Vt;Y`1!cSu;#wj-giv|FnAw_&srC}+tS5Q9UX
z=e=HL$y+%Ewxnl2pc6@OeERg|I|D7-X{_ZZrXOL<w~9Nw@nKQ}ge!4BM`x8ovzALz
zE@gzMeO!#4s(J?ogmPq~)SJ@`j3-VQ^qnX2My6hZ4o~sZp<6cFQl#6?8}#F^nW;T|
zZtqO3V)i4#P&Bvh)MK<>UB78?gQ#w`JHKkVI@Q;YK|6zmmM(uPalW4jiQIW2PLisu
zPf-^;$OFMs>gavGHE{oRU-Cu9Nm<T-40u`AddTg8h=TEEn`pYu)+467P9lL_cE(w=
zF4XQ3V~%DQP=24?qEBH7yZsm`Jhdwt_+>q=!EK%Cr_LRr8x>lu?~}o%yMY^YGiu!j
zVrJ@rS*3yq-ss$z{_L|H#V^c`4n0+P0NK6#N)^&KnD8@j&onB$1YsbGdNejkDlpl}
z-wgT3;#s;oM<D7zS(u|I@5nejA-w!V1Zhy@SmG98@TQ9u=-3H{rH^q_Fe2f1`smN9
zb~gzGpbngct$0{2fzJ$n{FW8kH?7rw$|~ujUiuAZs)zdL(>o$xYdmSmpi@yoUnC9b
zG}*n+$D(ZWS@xhU+nHmNaobH*#(QtfP4}@Z?H=B;Wp{04MJ5wnvscD&TmEwYswUqr
zX8oW5|LKtHa4@qB6Ft7_ij5qoaw4a$HG0<WL`Y;N#AB^_Jsp?7AY&K9z0l>w<j&5i
z`ewQ)rW=ky7qKD-g;3%0HL+U?$Eb_6p^I{Lxr-TYa6#NSSM~w@cCbX>=*f`SKzN*)
z2*~<cdIT@RZjzA>X-mWW*HKWSbl?V~9GB6Y1~$*9P{l<+x!vqBx>M$)^$=)4Y!;7F
zCa@=gHa<xOwm*YbyVxy{Oy&J7y{;VZzVF5&%zZtg<Z>kYYtPSTH<>C4Ou`(Kz2s9_
z5qiFHyr+#|B9mGeZFb}LX{BMGPyAOxqrGHA2IKaEOhSbQNH`1(#yucuoK$TuhSAND
zx(swRK>FC?LWlQG?ib`jueD|6PiKawDrkFs63>lBV@tb@i?V(wX6q@z#AcrjFz+@B
zXD!4Pm#h5YNAdkdWIirO-7F^JF1bwLp=!ulkD;VCG2Y`2xfQ}?Mj~(pBN(>b5PueZ
z6FMNm7Qy8yoaVE!b6OtWKcZy~sc#P{@@ebW4}0<b%M5ZrW+I1Nt3ocm$mzD+{9s{+
zI6+@9(NwIuE<hW^!vG$~>HoF{JM6%t5?Nx_m&>l<P`XA&7}5Y#bak{=e=tZ_?2Xdk
ziGfdZlH4AGx^!g#T$r5d$8)SB+ZFE<tZiGyOR-25mo8Cn)7yi%`Y&YDvk*6Xh!}zR
zt8R)Qt!D`vH5xpdtMA6T@jVb~x#L@KKNh^Nlyy;9yItw!`^v<SiN&G5O{S4ZO(2EF
zPYTNPpSa0TK2&716KV_n!V}2_!Q!2l?Y{SSGHTIZT5nWh85U3ZAvwt1Sv7ReZ07fJ
zpY505QWc4rI5q*54Qn>7>`_Po(I|{mrctv;IKY|yh#kPW(lbI0Z$lULyf>3X=Ank2
zm8>aJAsYD(jHN5^ee^-dAQ#oUpxu*Jha-@ijke+`k^jnL_#tLJyEfjofg)v}uPYH^
z@M*NbuWB-r@3`Sh3{ooNCPjQ@Cm8-A_8FzRzGKdMgvQxq_<n+Y!!{LO_E{E0v;u0P
z2%w@K7*ID8S@b~SI?{qN2SEl<P9htOd)_g~KXW-hE7&xh`1y>NE!04Fp$-pH8l4q0
z>&Pr@PCuZo++OQa)Z}Bi>XV-p3)mM;Zx*R((^8d4xqMkCCN>hrPQMiWq|Sdm0(T)P
zM{DRjhFm>jems0(4xpwOG>$OR1i$J_1NWARrsMKL2{Jr+QTTm2A*~!;LQ6`w<TU2u
zk>C0d6czbFl_~&R7<LR^Nnbw%rf)H}v^Pq1o%D%S_=h~@XaZ@4P9QfK?3bQO*4>p{
zU+_?&^^UcMegjqE#z)B9=SrJX>XIH>oW~gueCN5*?JVs7{2Z?2MhkI!Wj22*IlR|{
zzT-#=(8mypO#XBAnAzuzD_K@*CJI7SBU|>*I>ui8bWcjIMVrJrb;Kb&3=zDq`e82<
z-w|}+Jry~1)A+PNGd-Mia>Uph-1;TY+j83z>YWWg^(N<qaD2SdKqlq}WObh?A3v=(
z#9-`XZ{Xr%W^CK^jTS%lqhvW0qbU-ukra1X^oe+9bVzHo7Y=exOAI6^lD>vuapPpW
zY)hcGlD}8gCF4w2x0WnuL2GdAyIT+igLYVJL0EPzSITM&T_!Lp*0+Ps8W6RJR{BWK
z>!|Px{KGA((ToMPrwQlM3ijAeEg*l!QpZw|x;xOp_*EAGG~x}pay<3Xm|V;rUdLme
z?+KpQ$R=K@Uj$l)KZ3?XustN(lgZ4r#^8#W`P$XsKO511r)H#-5hdvo@n-EmPZZlL
zh|roFShe0g7?94rAG9k~0hY(#<rKN*0gH9I0J(l4P9T4XyU2KfG>c2exRxsHRLQD3
zp5Y@FkqaVjNTT#&PggIwA+Ik}1n+#ugVgpC)Lv8Mme~g}?Zz{BbL#~q9g+x@Cxeq^
zw`7h<udwva$LFHeY+ouDRD08yQY<wMgAe1fy<)ERG-+n9qOz}KBiLIcnW^r;s{IV<
zSilxiRH;b}&$G6*hV=N+2J{o>HS@aRs{L1K-cGjaGN-SBroK8(1bM<P6fPWmFLt6L
zh3u2B-a#dEQqW0<Lu7<$?UUn=J@t){H`j&@p>a8evuLDL>n(VO;fjv21sY<JH%k<F
zupDZa!0QViDoIFIGtzo)dKw&_{?z@m3l|Pl(N$J#$R^zT(59{z0SU*e-^$jN9}#JT
z*4>%gP#7Yy{l^P5x4MtQeYjFQeV*Wa28S6hF5&SU2MKlv?+Fr#%o5RD#^d>w<Jw{6
zw}(n7SD#?+Q5SG5@!g0NMmqtv-$K8w2mh>lS@-u{0F{yw0d1u1xXwDe+_@Dte%lsM
z0q6-7FK5`9hL84DB<N`!^(;?8h5vdOJo_Y~d8CsXWpk^NOW{1rvrkbGIBhxV9oF`A
z>cDxM8oHeGSww0DqZTCW^6b3B!+XbU>q&NAl%hHW+%*s?aPK%HWZ&^J$mF#MZTEKs
zu2k$zGZk0QYIS{0;~8RR07sHLY5E;TR-fUM{0#S)yV=Yjsl?SmpeT=40WjlMwzVrQ
z4D>sERQMt^#y67ledya+Bk_$_-nIXjmzJmpgU@K*ew^xj->%1`BmE|XT;$t8n*OYY
zk!)j3ta^)Q{%&-o+tO*@T7Z}^#O+F<XY2+o#9g)duBV^x5@GE&{$QbNj9HFM(nqJz
zrNSG_2F70PDt6>@l_;nN_2h<SR{By;jn4PI1_n4o_SRL&4I#BHp<k1v<2KEZ8(NIi
zAlM+^FhJ`vZEFqw7^rtF;v!p}Vm0WBX}u+ajdC^hiltjorqxM^gG5?*l=dSNmax3A
zAS}+?2~Ms5GnmwLM~9{i9*J36&Y0Z}Z8NJsXD7H}i@%u^RG|I7aV^|U6Cx0b`jS?z
z1tXum-3X+$HGj?Dz4Xx@^MXjCMX;N*3lMo>26n&eX`};(=Hu+evpExVG`zmmcFJX|
z81Ruvv~IL=5kGfl-WxnxHj?s#81e3dncEo(3Hx&?BX_080-KIb^}|vy%?)2CenaES
zXb}5#%~Z&~)u3Fia_U+hITV`%gt_~$85)-ZIo^frbKVND=r6R84&F|Lp0dV25OQd~
z#FYCeXv&?B7(6+v2)IhrR5r5WWk%~GW0pj|4~jrk9YWyevaj?Mi?kkGH<1qA3iBYi
zWv;bc2onQ;T*4J6nWLOTZo-X^dlf(NBTm;h-LhD9G%bt>WEX3C6;9jFiZ6sd7;eq&
z!c>br2OpIDez1$%8!i3{<EW4{n`&sFEBx1U=6)goP=Y#kmxV-b+T*Jf)=XA!viB#H
z=(8979Kn7*@+|29gQbnsua1!-C!<ju`|U!jrkDqA&i+D9C-G+jLtmoe#y~s(ur%e=
z7{+7625l+T9ynrX>6AAk$y(!N6z&2IbyFWm|0Br!9dP<PB42Bs%|XLihVK{-iMBbQ
zlR0)ddoteAda{q0;>60!2<xBopCVp&Y~4chvP+}|{VzZAK&+c(vi!Z;OIE8_c<`T3
zBndGZo<^M5ga|M4)4VI~K4kVFJZIYugtBL|XH!Z|2Ti!&2%RTi6NJ{vEJO96Lpg%d
z^}$ZIE}?E8X`W=@ZMtt^<!CASnY}7re}UNt$n&M*1@xKx5(OXhjmw`L>-XuGW3Rsn
zyOG<j_aJK$Jonh%)fAL$=%qw{`9ajb&1%JonYy30pVFHNf|ZSxV=vX8M~JJp`e{_8
zz?YrInH!?N3Y;TaGXaP4AVd5o#{16~42}`wJy8cL!m`;DAfA%5r2eI%c1tI`$3fH(
zXG>8+m<x$TUM3r`5RRaImXy-<yOj#yczJhsG+ZQ7XsT2XEerW(_qQ1rdrmfF03ZPB
zR{I5RdvPYEXG<-wE);b{9khTT=}fAld%BZ267+lA!KPQUcnsNhvR6Tds2ND+*O<&_
z%F|5A$e5BVx`2mgiOOAbo@Z83SX&u~OQ!`GBAinT-M2vpdvJFkP(VS{-xcGhA8S$&
z;cy=q1q>SGj$;dD=orDcN8;2L#>LRT)shIm>E{}<SX@5td8C9<P3v3D_7IIudS|!1
zOR*<q&d1)(d}Tt_&d7*0za|vb<0){)GbuCxaDNv9`A1CS8V!#>dD<B=GNdPW@QhQ>
zkS2~rC!(QWHANl+%`D}hS?+<+ofqgpa#>&a5DngR$ErS!C5092cJwSR3ERQy!^?1I
z>3dw2GzpFl*o=g|bGwHAYw4QU@gO!HhC<7nR_!lk(L%Sd8Fh|NDB-})qi%iXmz$3}
zpROx%hHGf~l=L&dUMF@nU=~Yu=b5!opEA_AT$vXDIWR+POZGUYq8~b5uLoa#79%Q3
zg56J=OA??Svh1}9!HO60nE}=kt^2SqvW}_}J&!KQ+Cs;(iFVnl)5wDZVoD7SmF%_L
zF68kLX_4aPKoVuJjI=GMxj~V0rlnV;xhB>FYp}%2zVCSTA)1ib?i?y@2N~LciB%O(
zviBsStRs)xW;7V)dO}M=TN?fn=)Q=90K@mvH=QSxE?noV@DhvZh<NYDEi0zGGZ>2K
zTz8?8ImR;*>UVx-xQ@8Wm({b0zD|~Ji?V5nY%a+>IT8rQycoo4SZN&+IHHMOK~#^p
zzG)V6l4jPOA=!GVOPacqzP}G7Uuplp(|KRfp$^hU9VzhRFfAMGM1UwN9UTZ>wZzBE
z%UUJh{tErJW^!1`uWli&EneG*ezM$zEK<NIrG6&^$gf<JnQdP?0Tfm`Yq_rkEthHE
z__ZnCYn>pI_C?RqDEsPf27>BJ1197<p=npH4MNzr`GT6TV89BMYcOYHwVcTOHeDXn
zvxUILn^}@?n=TC#hfrN?)MT0r=F((GBe=zw^&vo*=ES9q-G2$e7x}%FCoM`-FF81-
z%wQWcUdJ;Z3K&olV%o84eR5k1IMq+*406J@Ll>uhU)<Gn(j9*_7p?i=-{W>dljn#%
zA?-`*Vo=mf3wmFYuj8!2odXW(YV=TBor2X>)mS~;J(+E|Da9G}wn#qEoN=zlfNco+
zOgKFQ6URpg{y>11gElP@NKI!74auXhroyjS9%J$yPqMD~BpF(+_&7=!|B50@UX&62
z6JRURLvg3mWOf`@AyM-=L^+{S@Z&UidHn&=_sM%)HNbR7Hby*NjaRV7^(1WN9(PG7
z0~Y<{mvBwaUMO=wn<iLL9dHjGMtg=A;`Y*#3g0NL?+sHVlB3yEs;9CJt3c36v7i*J
z_nGE?FL|98whMRsXgv__JYDKq;el{1$hz??KwWy!8)~L#z()VDj4ITG_vz7#!AZ<A
z<*>V-I<WeS^CJWEWaUBr{)*zoDF@s&>Az4{JXwTp0F<|$?VD}CvI1_qLTx3##PB)D
zSYEx+r18|S!!jV6L-W$q7woTk9zRch^UN^m3mJT4Z@|%Aus+rfPv=K;)|w}tQ4z5I
zoBc1?r-yj+OCawDlQDTRN%#qL^b%};fVqe>=yr^!+&%{1)GU<vQ78?xHCt`{mJk9a
zyjV!rI0Rq3Oa8M^B(+HI$eucM#BI2FRjmt+&bg9mG<M>5;oT<Hl$=HLKc`*TBi$iV
zC=_qc8wc$_<=C$cYzp0I0f~&!BHWVt1(ZJp5#T+Ol-OwYOuU&Y03~qt?LZZJp?bJ5
zq?Z`@%)z}W^&L3~@#>B1*`u?UBBPTScH34qsNxudcqj#hll%z{ZfD>5cD}G;>!^#Y
zqUGqtTW5}vo!mdk2gce-qBU{UmMgd2GLmMNPOc-6JKk|*e7_G2bu)SN@VD*^S`UxN
zh{wyBG__1<9yII{PQZ2LSok5&(zl>(JBpWwNL7<Q_JF%dyR;Lu?hRy){<<L|$Kx<+
zslTuCfJ80*GstrZ)d0Z&)fYvCdKW-dF`Oo9((t`YQhf%u&V=!YM#J@}0nfeuO2EdJ
zCgxGF1JLr}QC=L5!kYcCh+q5zg}p<E3WC%=FA+Q3;|X1cOjKOpF}K&Cz;PE>=0hNt
zj3R%#-h~oR+tX=%GB0c|rugMm#v6u8k?JknuShxiK{jYnuiC;b6AxquzM1H6rh%Fs
zK7Gomr&`MUOi0%uA8U#Q@a3?AzMQ5~22ej=M8vWykg=4ydCivry~qEG=S!KYe7!nO
z(?s0FaBg4~<Cy;_ZIz4%pQgtTvFyQ<sEeR|D}BmFUm)czt$Xw?M^q|B<Ham;Y+1Gm
zS^i*Q{MzYHz>V#C_;+~H9oO`7eif>ilh0^>4-z}2(bG~+V0AM1#C=8FI8bG`U)ayu
zDSRvmjeGU_)7dZ))L(Pd|La2xEMW5uJm4Dj<&>=t9u2w0u(J=eM>b#5Pj5t~U~H#b
z(`J6$-tag?Hm9MLg-QPka&4d)tdhhY3|3k~Dz$kL?q%Ob9lROgUF^oJ{%1ux`;urS
za7gfu&zU`l8g<j1<CbG$0F^5l*?==62k;=6?tZ0z7H@ul?Mo<v=p_-4SG$C#20ID}
zXT<Th2wTPzAs$4-_m;X@^dN{|_9d0cBfd3Yxu?mD`q2Y=W7W6QpwP=@ApEJK8(9KA
z`O)kA3|nUbVLo3|<T4p8ddIEt{;5EEt<~826RT>+0B@YZ7K65Oh{5Nm^=W<M8*zIL
zz?Suiqq`0oK(8_4x=IjIibNfv%1i1c_M-x40Rsex$L%}H21nSK^?UJD*yWfoPV$A)
zYq#-)ZHkc=Jegq&pXXIW%i{YtmNEYvg8Ob-KqPFQ2PryegtasSU_%*!b-ICebhs3D
z?a4r;3z(~fILJl^;`-yNPEb3J9E?!sG@|zl4SB=o7?>Qi%+^9+zaWBK{bw>`;-&U8
zZMZft`51du=;eE6_s{SKN5s)tT0?6{1W}GY{wkHyW_lOMg`7|6mOj9Z<0uORe!}Nj
zz8lIl^>b0QbW9%V;`9t#Xz43ks57AZ)j)KZnQxI_pX(FI&cEFC<Tv?~e}I>7(NC6{
zI*x=b(fG=Ux?r;Hp#oTyC#aKCzV3q?LcM9>u=HgljSw(`Ir!YX6_XlM?fJREu#)*o
zxJ|h8mTx_P?IwQN#Cj4S+~vIZ8@d44b|+`QJIdgu*!$Po0)C0ztKW;ckhuVu@0zgR
zBWzw>(_wVe3$y!XZ(R8e5S;9}cqR@uU>1R89El_AWg#0+U5uI#dC8r%cvPp2%E2O2
z+=!1d-Wjsd{UtnBl9G|>d9i_hK4G;B=Db*g!Bk+OV<Gav;1H*64b2d(o#FMkCz4Vr
zk_klGG53@N<q|GdU?gf3^9R*C0zO?-x{ndmwS_p7lh!xS;$10_Hqj<Ai<~Vc4$-Zu
z&DNnK@EDXi{=%GEr=4r9%>93HtNG^wntjQT>Z2J44VhL#feJXzZ0TPM@WrnclAVvs
zK3rPUv214#s4h1&e{4AE$GZ*9*2f-x-Nn%q^zAHhnS#{zNOY-#Y77frEom~j6DiFg
zl}_d!C&Lgbtb81ZQygX2&VCBj$g#KO$^-%_;(?*CdpeEh_p)hj>5?g#>qqX)uo`$p
zoy{bjGD&O+wG2%S;1e)3GMk>}*=CX|S5Kc=XYX?cerd>tIcWNXJ~uV(Y(%^W2lF3c
zOi>2%Hr~2>`D^u+_CC9EbpMriGc9-r={e=(^hagJvM`DE?yAxI!#Sc|0Q^!y^-VtH
z*hS>+RcL{x<X0)<9}&FzmBv|%PcDdV(aC6Quiv~_AFMcBsw^UNAce*5wwX1iu#L(M
za2fb{rfj$163h_Uxk5xl#!=0nmp@JR(m#qVk?|ZTRNj#*EAj(k&CJD<ms>T#Q0UH~
zZPSXF(tA2n`X8J|B2r&%WaEO>6-6=zI)w=gJP*HzC#!3h*EzeZ`C3&Ag>vk***TeY
zxE5IB%1dl?d&z$j>Mq{Q)8MhA+>5oQ06+1(iyXBEn4DqSz)m+~{e~;?bS@7yC3ixl
z$}5|zx=*^fxBGrP*1Iz~)(~&DCQ^4km6p3S=oNpl4U*RFrS3)NK#(q9BZ+#Km#ajm
zpF8ko;avw#9%-X}nDl0L*d2szkb_POIfzcHXW`L#l-PO>K40gisXfJc^y~rrSkYWk
zvQ+HV)Z7Fs%e)C;7seCr(@%?=5g3RZi|Zr)kXEs45$J!!0kE{-zN{6WvO9Z2{!JM0
zeN3(sjlM73iG6wuW%MZ%k0G(RTTJx*!uS4JqvuF?QP(RAC3hBS+^2KfjpK}NFL_A)
z+U>QD#|N|zIn(d5?6nF~^GmA8vwWQ^>gHWBl%x;|V?H7?(jj0d8E-N{Eu1V4=!68<
zP@3%eSFXJ-i5zw(-NMd*^b(T>#EbmJX>q*ujHju#U3-u6drMB!Mz-u~%F}VYXgz=n
zS_=Nz2II$;^ml4@oYn?BV5{`5lA(bV4swX7VQ}~m{BEGjgVnY)XRjC6E7^qVD=z~o
zkgp(N0@U%AOl?sF*9+WU(&xir<aa+A0^CXEX`>+#qY;%vxjY>N`!p8&>ro$*6ycY&
z$&)!4aYm^}mWS`UsT0x=9uRQfNOQU570tZS^(Q*$LPPk;bf-jEPqW{U%OWc~_r)Ac
zKub$@eFF|`FZkjPi0xoh(5xG{6=I6JjB-<ZA2QGYlf%n(jC@%Tmss}gir+e;!Llo+
zsVX9g)BeQ$%r=9hj22`_OMOz7%+4@T+}G%LXH^*HoLJ@0H!66~mr?W5aMg|ks4)**
z6fnA=ixdSY8#1xthA)v*Q;D<83B6}%VliD<pK)G1b!@YHU4C<9=9|vV;Yhttc|upO
z=hBQ0++TYyDIp?YYMa($m@=$_iqbZnyYh8FcK#jZhMjjbb9pmG+R3!w)r3XT|H$iK
zqND48RkTNiX|=G^Pw@?p4njf|dUCI+W<r07spEYDQ~XkK`C%*?k#?Y=%X*TRM)Ynx
z;bP*bo57S+19IKcHdD8l-BnXq>R~uqVltA2*4I7vc7im}<m*sAvV}r6a^duD1By+b
z5r(q*ZfY6<f-_=4N$!QK>f5J@mS6J3!U*uetH>E(ozo?JZmrupt4Afh8w@VbPMzN5
zpP?rmfsZ^dClsk8Hk}V%|M)qxAb8blYAXK65<1ohJxq(&o$n&_UJq4d#<>637>+>o
z#I;(jh+s?V8}P(|NW}8Z4Z_MnZlPv$M|X;5VooukNm`+^88t|f{n$fLX%QmB`>r0w
zx_3O2<w<pHwR|__RI&9q5nd;>jQ*rx!O|l#+vqc4PT0?wt>t2yK2=Aj;XZ?;xO8U&
z9xC^i5D-r*01>(U+a@xZ)%DxXC~72=zQ;KhU&7agYBkB^S=9p>Bzt?wE_2_7UESJi
z_oNBpqI@QI4z%>{ym(Fz5bc>}1cpc-3<fY9TgXeuYHqohZjX&dOC)x6*s@f;-1r5q
zMtrN1b)x@eY5z7lu6AU@&jlxw77kG%zO0+q`^_S^g#@0}CA@baF+1{~M))nZn%zw7
zALg3nGX->&7Y?WFcsNZn@9=il(iCbc0bgFNsr1X|G5IQdv->SSyY#2)FGm}DErcQy
zcEs@=oeI#3n&LaC@I4&oIyPveH^$V*Gk>C*C)0IDGr=wXKaBtP%MLg>K7j9zoj_9e
z*Xc8Zl{D&5MLk7M>d%F0rv>ah1kNjIZ1=Cdf1yYla`j#sw2|FjY0WzCLtEnxjMsKk
zw>jdT?uombRIMkct_*M%?29oO&9=(xl2@7K?tTyRn>;Qixam7cO*;Fa1Mg@MD{cEM
zF(5qswEI=P%3sg2ozjtWGKO>Bw{1W)<=Vf|&aoB+9z4ULQu|*g_4gzD-#p_Q7)u@h
z0RC{h7G}i1LdmMur?PO5r?rOgNcFS*=jShJc6+4<?q^e{Pnpn<AFHi?Xq-RVU26BU
z<~1#HV5r8Y)8gE*lv!HPE{m-IX2N`1Bq=OZ*)*rcL(R#)WPN_l*<dfmv|QVaQJE!d
zVgey*FX$Gu4Dl_ojN2C#rl0Zi<lCj>P_63-z^L*Iw<qE!$8Mv>5morfV|P8;?fJ&e
z*|pWC50BcB@~Xz7*5tFZeGaxt){t<=M$y0aUYua&IXFdURbeo2P(X{aS(Yb*0Hh7e
z30fxk`Ikffd#(SHR5B-GocbvAwotAIL){n{W*2j=p|EjhaUKwsB#=Ti!?9LazC?b)
zN_mg0d*Rn@n{~jc5dS>$xM{m;+<<-!r{@JE;yMA6KVFHE&QOg<ORAs|eMrDfe%`RX
z5f+}Z$gn@L{cC6{^>IJZ%!h!@s9kud`t_}R8?Ah^g8Qd!ykE2e;jso~cBup8>Fpf3
z28D{}C=k`}V(eZT=ksB{?h#9*2IM$3gvoxgJov0ljHUtmvEvHk<>NNF5e%j>_f$(l
zHbd?-a5eSyG;nk2_4quH@^Y*(n2XJq6ZSEVH#qDsEf6QTetGZ2D?crVrr1p*XP@Oe
z7UiLHT@hB#>@ja1sucI{)9ODq$G8n$jC+J74mYlLpEc0QJCfzRbidsyCdsRq-c2ed
zDLjD5`xT)d@tdJ|S0)t0r)5M@)ua{xBB^|RU9h<IRT;4u?_M60<<!W75qt#QTfV{M
z4i?U)#M`jweTibtAz!Y%vbZ@47Dtbspx4&gx~I;rq4Fj2GkbZUINsxafhY-zpX9ut
z%kH8BjW23lg7yylduN{new3;7#i@=$A=abtRk}D8fX^*k3^<m^1-o~j)A`e&ABUjf
z(X^~~rm+di*(gXxk49hb@U(HX81yh=#|gDybZ-n_jAzC!r(+=hw*^#wBAns+)#lg3
z>4dLxt?T0Xqpwbt=^;mK`J4_Fr0(J-H=Gm2edAma<}k`7+^JIZL-H#Dk*8@-&rmoJ
z(;@wr>VqxiFP>Wkmp6Pi>wxXY7xW?B$U}|eSM*FDNK92fFIr|})!f^kH(L>=1v#9E
z4xJU(=CticfLb$eFKb~7GGtTpD|LT4v~wxrvuiQoH8ngY%fb1lYAS6k-N+-&4E9Q?
ze>l9~DC0lm_WNZ=Jn~_N(<8AjZ<ELiqe*j_t?KQ`@v~eU`u|??KmI%$4YA~EMf;xz
z;{Rbfe<PfKeD{BA(*OT~|CE=7;tK!QTRbFciDnC(8f3;wLLwbe-)6tsMdby>W&v-+
zrLiVqRHyxrJi~u(x^W-!+bqzpx19bU<SFHlpQtH1(~MjW$O3ACQ)%@Ivzg)OwuuYN
z(U8od_>|{d<OoNWaVHdsdXVn#$sGUG5>{e=epux<clET(h)c*JQPl|AiKIf3LZSR4
ziLdbO(Qn9c-w5N7pnq_u{Be@c);{j++hin+UYcf~PH{lUEg>#?tMlbT(HPdSCE=*+
z_hvZCvOm93m^eegEa$4mcY>lX-IR8Cu-6>ewaFXjcc%=WR{X1J@KUUTBYs`%YNYPv
ze`y9uMu)+xb?))3+&u;fk4N`FdsPa5XieVp*g7Yo>rj`Yc2tq-!g8=EShxZ@p%f=Y
ziKyClFY#ji$I%=y&)5%SGAv_XZI_C#t$d^lOas>vUp=o>Ur{Gn1hh^oH#cOR9B^dD
zHE^EBWhu%}k}Y30KYJlbIe61Pzz4C=EWm>BTGhQ#{;#L9e`$Q7XFqDYck=b7#p=;2
z$|@u5BZ9*5S#em0a3@ltjl8IwN*+ux6}jMf<r_>1y0!OGcoA>opS(#ec2)Z5KzLdf
z$(pNCdpso6ll(feN@Xv)Ll(8XQN3iDYP+Iu)^q8_8&QrC$Yaa&u!iqxde5V>h>)rG
zEALmBT;NqC>pzg4R6X(`jr*NK{lQe(5+j{|LU@Om>Zq8glt$Q8G`ibqR7!>1Sh&C^
zrFLz(W8;$T&G8EQHgQ?53%hX}xcN`wdw5}T6MVnvAv~akkM=|8OI6E@%42`;@(3Jo
zmhKT0hN>`>Ng_Ig?PbaPG;eAHi-q~_FvQMytypt~Im*Hm?LhOxH&Fs%G~Dt(#egT1
zk=M{a6xII_LyVmx+`TTG)5WNn2JV!f26U7HEHBaXuM||P@u^@@t`nD^Bq$8X-yD3l
zZDE*W5@A{^6vvMfhHv0AM<vWfku^O$)hE^J#_#kl&=)MaI36uemj&1SfQ$TIMsSt;
z2ztmI2j<%Jz3!Hq|6~hMVLct!ORqcI?;b)X<7`wljeLg}aosn<x7?KSh2!i`iLy!{
ztkpV%AJ(n8_tGfAk2x+D9@b{r7h8pa=%3EC&65<6u{fgp$lJexb<T&jL&wQunfza3
z9M?#vDYgjveCX-HhOJx@NU!iR*!_IQr6%=Qu(CQ&HRnT+yrna$f=89KwW&1hE}ERa
zni(wq+{?aatfQ{#bcF4cgw}2_Mez^VrE`IPd~^G#P#^9g?Rxe592TMH8v6g*`_8{6
zx2;>zY=Moafh|Q!lq%AD2N4L;MS2GT0jZ(4fQU4KpoCtPfb`xw(mRGu5Q22+H9#Qb
z#&gc~-S4@7!2PoM#0P$Pc-ES0&N0UrGs|V9Sp>RxeZ6E@Rw=;h%W#)k<-FtN&~U$~
zO9NAIKgp$fCFSB$786MJ?pN;C?vtTs#x*2-jQq3NC1(J=m44dZ+<W*-<7meH7hlVm
z&76OTZ$@evFVB5e8z^SL+0H#(``-nBEww#rypjF%+tZh?^lx7t#x)bbl*cm*J#=~l
z6x%LpJ{4KtXU=fYM%P%&u>9cx16)>?VB#CqjKD;TM4t7GL?St~;3WxNrigi_K#dqD
zwvVlpP@cxe_iY0tH)KN806{@grB5)Oukz$%0voMo=FW3tJ=-pCOMguzR`Q0O_18yb
z^MXSa_1Fcji@Sz&QvuNyFD<T&jwio*QI*749rQ&+Hk2@3S38rG#-2ue0tU0ceZIXl
zmVZGDOt(Cpjks!IemQpdX(Iu+KEeW)1X1lUI?+K|O=vQxAh*J14QGi)8$ExNr8AQ?
z+-{V@m>w65M7pOR9kwE00eep>H=;LkHX8H$?Z{k$_U~YKsrU`+z+Lqae7-Y4p0|XB
zs+J{#&eVKX^mW3~%jpOaP07-njS>~y$yOGAy!)ihy&B@->7j9azA<EHQ+LnP?aLs<
zex?E0-SSxzed6x^*=n@B)(aZry3I(-J%x+DSjt0$AUw0z6+nbYvs#$~o`&!$N-?Wh
znZ{V2G+wOKe{(Nb(5qwpTXp8s1enG5+|63XfA_Et@OtpYw{7E?hha<gIz=)=u)MY~
zTmV2#@~V{j#8Ngw`)wk?g#TvT5%VTn4A$^UhUwdL;p^3-v89nDsmzB-0@Z1ip!Xz!
zFoCyE;t|}PZ@W$o=kx|l^w5U0rI?R|F<>M_>nFA63(*h`91D&eN%&Ig9_#L-Y_wwe
zavE#sA6w<;P9D!EdhFI8B6X3V?8nAzR(9;^X0#0aGi;+Dwe)K>>9s^nT8$vs;o@;o
z>`A_gU)rP+%lIhbi%f$1?JG|$bW3)l=W3ZB?0GNCwA^%WYmv-7z1Wd~v54h9VMDef
z3!_q^k+~b$Y!??Sadi>PW&=~Pl+$B#h{Aid@rEzv`K?<9A)${|aMky$xS;M`;v}(d
zxbapu9Nu`!*CFse^S5CZs%cj`D=8*?J%8mHlaI<3x5`tSewtyK01;Y87f=OW8wkDH
z+SnyuYA!{I%s#*?Pj(fLkG3F~v#qib*?Ed-ico{3<rgR4KY?7uc^*%9s~hHxnn<pG
zxAS+|823)C8fv`nU+B~gnVu$bq&^Dt^4MISIXLW)L#_gQKUS+}Loid;i-7~uKb3)}
zMgs_u)h5Qy7drx5a4Y@w7xqQ}?$Q1OrK%sgS_>Scw`ivgF|0H&$-sWL$MO(TyzZNJ
zj-Sjo{tEu)eL7=)U40^i@gDHQgEw+505*ecZA9Tb4;?z9Pu_6gZZlIvw@9#NmxM3L
z)h-2jm81=^FOX<lM32q(QRg+up}<sz?$e?wGnC%j$=qQ;iE0qZ2y%~IX*v4Ils@7G
z6+WKZQ?KsF#Yn`1LB39O67%7bm5yWq+^aMC^7~Aqz}q_3XTBEQKaVOkpvMQD3CEBk
zV)h$Oxw2dwqrUPC`TJI~Te@FA%ZQKXw%M13l*U4npiGN_;I4CGTN<|*y-{_VBq%*R
zB)bs@r6An;f$g|6rwgG+GwPO<v!dfRVPPkwz4r<qos8p&!PE+?s3j>L9D9sN+#MeC
z$S)OpU!E5fsh^V(pSMl0xgSbde!y>pUC!OYOtL>$nR-ly5>u(;4!t?ZhW3Aj<_$Y*
z;Q4uRXTYmr!_8##XyIF)i;tH$MrSmtwCtnd_tAQh`qGqi%NHA-Kr0Zo$u7R+V}`z>
z3EebUE$l_931^x$H2#D?Ov^lrzDX(xTmi}YJCNH1Q2D!sjTZDy(Ea#@*n`=w-=YEB
zAYqhg;E-3cpZs(y_}2Ahfa$rC-i3ms(r6?m=KaCZ1A-$@nXbQ02*rz%K}n&6{k(PF
zHUd;g0%ab&u9Rf00LX5*)6I#bsHAAY1t(<^KXEz92>LlId<z=ikV9@UZSu+)p@Q&s
z$t5Wa==T;xl5fXBQC=}3(Yy3JR8Y2T4U{U~?WojSK>>of<*a+L2wO$(D7$qYpeWz<
zXWQZ2CzT76E4PAYJKlI5)j}&`DxX4+LqF-JxzI>Som&`UgI-U8?jzBY+5TV<>`!Ja
zph{m~p{2VKqc5$5G9xwxUGQ8eyg3*nfstU+_jqF&>79U1_jagpOiSN?%(yO0hQwBH
zo!rRyz#7nZF(_v=tDhLF>NokP@kk!)Bty-KOwX_FO>)1(SREP&x(~q#CT;_vJde2y
zt~`Yb7(QN~IN7B7!<%NMMM8C^t|4(j{85#6qNhXZlY%v0KM^yjZ?Tm*8F(XQ&I^N5
z2Hld~3!B+~F;}B5>DOBuFkmm?%Lhl!WFKt~G%_Y_uHW3**(B`yUTJTppK((o*lwL`
z*H$Ud4BKwdEj)69TaQ@2II@dB_}+Q;W<j{mpa(GEBJj+?=vX-Hq`zXzy&y8`f59&r
z@M7QgqPTxWt9a-7JF76^9*<w$S7`g1p{46%RQ2|Tr6KtM-?Mp|7_u)wdM-He3O>y)
zd7fF4Cziwq*vI@y^-YO$l(4wGKj}NzpN-CmxFm&@++o?6uvx8HEdxF+xi{W~E0QHi
z&Rk7MG2hl3ffHO;78sP>iQkv6B6#9=5B}QWl4+>$v?T5!#r;S>3(%=6V+ppPc4FwR
zjF*(;JY&LFAYW_2f%~DrR^A8_dSS!qt1pDACxbCP{b^6s>kDx{?qiDx5i0E^V@`G-
zGN&Z3Vz(hC(!9*%=hEhGDPOJUDcC7;#_*RewoPc_?@p6JO_*O-YO;Mi@`#hs!%0~@
z$2d%OJ>23|k(ll#R2YX;SLH+QZR*)2u@e8NiB-B2()chgQFeYJ%?wF&x`1EqnT+tP
z43#Ohw3Wg~c5Bd;{a!i0qryRtDKT5kAfzCjt{Q%nJ?Jl=)!M}9uVUQP3V!7&D=E<%
zxAnea^BV~#+HN^BFw2HxI7s7}fUg*cQK)as%k4`Uf3YUaG)wh{H6cy@ZgBkA%B?Oz
zG%cb_;>U{-_zbbg12-TqJ5JtX=m~G1Taw-VxyPZHs0A7*)6)9etot#3q#tRhvU&^Q
zf)2L+_zk>5uM+;RAK@4ctF$k7+qe$PBN{dC7gw2195?t3ZU;TnsUfyqR!1r!l9U46
zTWQV@>dcgBs=hN_+=*WFFgYHxi_NbtXr%XcY&uS0<GMh%5Wf=tc`nQ$xH-ncmjp=6
z(G7~A5hA&JY|r{g-W2ePhDM22^xAdmmAshI$e!7$epjeM3R)>#$<sKxpY}Qz`u*;U
zvS!PR1!Yd0b==Aia$lND6-F4PXbl9DVEurXo~Ey;)<)#L?AuP4{9XDAjR)^rKSO$5
z-lZ9DOGq21->F{asGF$@Sf(}n7M_-^oybm(yU?H2#(Jtx_0H!Hh${zS9QRQ*vW1U4
zYg>Lr3iLQKP@2;yDuE!AY_A)P<FjCWbJHto=F5MYa`Yy;C-84ou)xZ9s$!eV7n@hV
zx>6W(rv%-DWaxt?QUd}WMgn3)qw5!Z2v-wC=T=9#sq3*l*LaOGR88}83&Z|IOFLYp
zinL`a9LiSMum5Fl3RBfFA0h8a<=G>LVt4(v3u<cy9jph%c$TkTqDNsG^T)Xo;_lCC
z8*CxPc;VLqrTV+4=~dKv0?)&Qs+Cz=8d1|UwKk7`Mp=Q@UnE3s{jFn~r>vJZ$)q`O
z9I)+kN0{H5QoeXr9Vj#)GBQ%9Fb$6X(Qsb8A$xN!hk>#N#}$yag0slwI*>&vUhI4N
zPW&~ed{{|C<vXx4gs2)>S0A$*%?<IaZ37*oc)wU?l^Tep@SKV{yU!1B9L_)fxPF8N
z2ecZnKL!VGK7w-mJXyW3eAX0kc!rldPS_Z*GoU+fMp>I;JMd3O%al`uPx^&OhyDoe
z{c_JnU^z9@Sm&+wt!vca<?9W%z_%Dh6>SxqcFz0z+U2E>3nbpyxX;!-8+f3Ao|~!d
z*BLP3&wlcgdB;qE$s9^8TcjvUI`wU@5qOQsE(EPP>3w^4!RkYn7rs1kone+o?h{$1
z%&tNFQuFE)@vbNJ&*y}0Z-_dr7!=b@#-zE}bBcb6*`$3u$2%+$*f-r(5$8F(sd|`n
z=^8Xx8E<^`69rlr6n~n<8{Jc}hn+b^vSTNw>WRj^0jHA)wk-#@HTi<#SV*W$yj2vw
zjqP*j!KXjSMrQqM`^-lVgIS(mMV>GuIQ2XkIYQjDm$J(gGyv6#2}$k``k<_%_{BRz
zPQ26;L~|J8g;4$qovq4nLiWyY^K<s_{ROk8tu$LoOih`qnzOz}_#1e-g+)^LTpQ^v
zVv^~a*jZ%RPk(D>IBU4;89$#BXJ)$NaEpMi{@+NT@Oknb$hqWuQf?QLPm}f2obiAb
zvu)Rz-_Ho|LMsOB2KgVi+tuXHU<}WFx~ld_Ss`6<A{o>l@fpB5HEWCNp9jbWWER3!
z!IL>maW*4cUqMs>Pwzx^50T{p>kBWlJb1tTy841G!D|QjW7$2#Y3%(j{G>6V7MZn-
z;6&Iet%fd3+O)i8m4(vU1bBU7*o$#C9`t!qd*1w1c8_7+>QhOTV<ivt>&wsU`j+1k
z4Yjk>;O!=NecX%=vVhc_!Q~JyH32>v*w}QK*tzmpLHJjkOV|oWf04~<YQ!aglaacn
z#+ZHCQ;Va?xa2{|{foOqgCbL>D??ZI0H@FD>!Lyt!70$7_q)^OZ5)wA@6#Tv8tAbT
zy_bb`KP1R5;npiY4{mmZ-WT=2LamOGz>#{baqq)cR|Og8?6Yk1?~c8+>$*Qi=pK4*
zp+Hyc4Y&KuVY>Rgt+6u55#?f&tn@S~1z|%FTo4N5JpN2SCu`l}bS*PdTg+8mS}Jv7
zQQ_P>=8*y8Ef9a-QIA{-w^IChTZq)$t$ulllT0D&AmN5HR70xRODF}pn|#}D^g^}c
zdZKBps*p{pdx+ce?!U7`N^QxcLr!|<*<(Fae|_7`X%#}-7U~w}1%13^)@4xctyAa1
z9(<HgnWpb0aQg#Hq#!40Ql__DMw|+5I^X-}oxii2g#4gxVn$_311tx<3K&`O#h4QW
zF&H@ACj8-rvKcd0t?OuS9$^Vi<B9QE3&NJGDVxhmyeu!aCB%R<I)djtBMo^~9RX*b
zG9+56VKazVV`<@K+dNMG5Q=0z+U~aktauH_TNS-*88Xc_eSvW)EI(**pjjGAQ|yv~
z7bNiBO{bKYjm##NeOZ7WK5J6ZnAx=)^{G~XS+daWCdthit;Xp`#IwDN{tL-cxA;uT
zX-rzH9ZTjxui7+WH1Eolc+7`R(4ZX8I>u_(BCaAdW|n`>n|}MkDG^YJqPel%9TM$e
z({Nycb2g+33eaACCx_*t8dc6s_YQ)|NM#fC+JJBmd2PZ7_o~x-kEC?C8<>stYmZQl
zq&0f(lx?;Rdwx5q$h(rc(AhCJL_gs4R|ukpKES@xkrr8%&~)1qiCz8^9zrwSF2tpw
zEY4<r`aX>IBKe)H)rVjK1MP3g9wWo^+3pCk?0a8JX@uOyZxikqsjvRP29*sRJgL;3
znl_B6GMd$D^;TPO%i36K)h_bO9^Mr02rnrd9>*1<czPsC?ROQ4^}Vg3MG<%2e)n&^
zhZA`10FAMPx~1p27jWAi{pJ#bbyM1Z{Jb<JoHpB-YhTw@mXt@ML>&6O1IIG|!Zll(
zD>KO)eEHgge;i3Z$Q;*W+d{1Xa+ooXJKJ`(){&8qKi-U^>5kiIn(RVJ@LC=;UmY9y
zq0?M6{vcMc^?r_YSPn_rH@sc7XVdGC&{ym<OV4#oYHgHgv!kA3ty*QoHK|Y<xZiAP
zlkiORn|9HX6|5i9nRcpSiNE#&iWZt1Dn7c&;UvM1wGxViGN+tIlB}T};@%1l_*-s<
zJ_PfHr*)UjM9ee4{R0}@Nr$M_d)oqt-0x4j=^dLK@n(k=xZcjSEmPpJCD#oI5nr3T
zRl61C8CO2_0FuW=in%LRI;RQWCkW|~X}ff37TSF>be5>~F3Mc~9y6R!*>!4Zpl`PV
zk(Ajb*|G{Wy%p)}gA|A1N6-pK#@>63V1CBSO~=F?nxk;#q+Dbs(hKRUHILEg7~AOi
z3LCGozBR+SBv<SVfcf%C*0x%~>Ka5X5sZ(L)@g8cu@HNY7{sYr{QPS3qEW#D9XN@;
z?KIn(cC9J4!Zsi()jhfqytW#s=W?C|Z?EY;@C)#0+CY)`zv_}E_D3HoZngd1Qeiv6
z-?Uw@Zv3?@4s!|yx{UiVRiZ36vT!L~KI8R6@|YmOpKXyoY)%4W=JXk~40PNRnLy&C
z7>HXS#+<@QhRuAsNi(VwF6r9%D&V^$)^Xym&Jw+Yy;(6XrrWM6p1l?=s^-vib5nkY
zjYj1nCL)cqZjpr<6aVKNCoUAY*X8roGkF~R_G#dx+q0tThv0;#N%~WBrDh+a0g@F>
zT)z&U>=#e9$F8^RPInM)^o3kvGQ3f2!UV|%Qi@Fr+sW~~GpPCIy^J8g{C?o~9Llf7
zCrkzqL8!V;-6-p46Qw)DSSSKCDM(%`I&}Dfgr|BrC*RrhqUZutXDY2{O7Fk9Ri&vJ
z8N@hJo0C0bgj5Hvqf3~@1U2^GXv4JisD}%s^2ATqJx$J^lI+s%+{@5MG#J*Vd<)QI
z&De6XuQu+2EY|Cgn;VP^R|Je+;#TRmI<PcJRAZ%Dr(vOIx!aG>lMV#)itXner^n__
z>q65W@KK&Inp2s-4qkyyJ$k^k6(P($@}=?%Lg`&pD72;3%u^g}Q-1PPDoYiCU6lE)
zK9v{V^?51L=ym)xk%4XDY4NM3#PhVugJ!kMsL@h&%nS>Ku^7|lk~?U}$x2Gn1<7U=
z?rVnrSjMnyo3T+$b)}L?%;GY>qL$Q(6>DpJIdwh3Q16fHQRg>(fAp^m?Ge*)fpolv
zd>WiLd0mc0aNj5D*K~=E1BRQB({7iBZ53k{Z^gQk`;v9yS+8Ayio22*A35UP4F{I+
zpUa5B-?>PhLj)OZR~{z!9Z0b}hW2d^_oVd$K8s(i*fPCL%XjvE)<K<3k-S!vjxMkr
z@S485Ur3WOLCAcSGT5%Vav2bm>+u~MdaIr!ZzW;^<xejOBX&v-^%O7Fi5=#`z9^x@
z%w!Hy*PS&@TWqZu=PYdMnGcD$9Ld%vj<5xm#<uqKJho1+A_fx*8kt<5m{JqpRMBae
zO(fZw;DGnZW#)0Fd|<I3+AwnrhxV<{BHx;<n62>Ys&}`0H!r&+&kh%52g4Ua4s;zq
zkAf$yzO(nfmOcvLxmU{bn<s29RC_~x5rGV-V+AbnBq_|*4w(=4tZQY)o<^NbCT*%p
z&o<BQ@*7Aluf=y8o~swqPJL-L+TViFCyDK47@u+ao{4X<ZH?)J*7fy=QnAjc$Isq8
z>3K<agCk^@nQ7DZJjHIx%jW)*xpMoPo#ctsrkErm&xIH?hL=bTrGk&DFk85N<~9Y=
z_EM|2etF}<ROgaknV)QqJ7K}=Isc8HoJJx`8U0G-TN=o?jpKahGL4R9L=%`!$yRYM
zN+Zf*|IJ1y(ht~5;>z$Bc~vQnyeN;OWy{KZbE8rd;FALIe+C$(L+~mpDWc!6^*7S%
zul*!ieEUR3($Bh!PLV$3rP@e3C_|s(=AeLKm3M8M*=+BP!x1DkGvk0h>z2DoYT|Of
ze`mFfYJ*nm#eIL3c~kE7AAuspyt0EZ_iTTKJJGe#nT$AswdnG>Pyxi`J@@f!z65~`
zfGCP5{i**y$iYIFt-We%6w#!9?MLd$e52ykNt`X(E-PZ)ip|4^hwV&LY8#v1PqJ~;
z?58+B!Z<$Q?jNgryoOCW2+;LtqY{pnlD!{SM@T=J=JKg7l>jzozNmg$qrTDCQjeB)
zdO`^3a&pm3gnB+O;XYX`LtcIcS>^*)_}!tU(WZGxa_glxiz1>d&QjeLa^`#nA6y>x
zIQpcJpk6+NQ9Z}B%h5Ua9YT=@T6F%;1Q_l^ZpiYzPE9B9VR;Ip1gM&6JZr|hwWScy
z6c$dx)<p8@zt{Sr_h(#$dr~-ceUMP9Qpoy%L}|c7sJ~&Cu$$34Na-FUG(9F2$26z5
z$tQlaInb(P!vJR29n(vgyfLoT(<9T9YflSn6jj@*il<v`Dyztwn4ffG+_gnu4>Pv|
zth%5ZhBqL1%Z?>ZWX{wy%@lj@y;YOcV}oS#-}*>#G$0~(p6xzWK4@y4EPUsg#6i!x
zf@aKFaliXMF=7vFt0cvnd4FE;b!8L##{eV$>s_y8C=S~EyrROO7z5r5GYZrDII-6R
zKz#xU6t0;SB2`m=^XVJcHf2FI06#s9C91MhC>hzaPLrc+(ypGbtic3fl=a?r0~Xu#
zIKf60V|$g-MXVvLl*%)riM-BN7dyrGsM2hpM`@>ObscYv17BG$7rTgMq!p<0pB>c#
zpi2P`f6}uzAZ1c@N-1G;=1PEuP0S_BUBGghev)DvjN}C+eP@1BR-EMvyWWj7<fAPV
zI=^xAf>4;dgOZ>UVjAL%Ci(y-aq!V=0)(#GJz6IwpD7yqkWm%b8{r}#Jln6_Z0@l>
zFd|L7WnP?9@@Ek=R&`u}-qR&{%9cZPz}wtPv!JPHwRq2V84C#zD5ivrLT$S6@MSuC
z(E9;t*`kt?f3YT<0}T_{2gzFhv)WyNtK`;$A?0m*3TYn_cxRl(P$;gLGRQBWi%d<8
z#)J{rL~A4ZiXB4lByc^P{h|SRfncC}PZ7%;R;BJ?*R>vRIh9aWI$CDABW!wIn=*NF
zWyodh<@G|72$OV|sN}mTxj-rWk55F^QNh_I(hq38X{^7<nwQ>NN#kIB^z`|;R}D@*
z&D&08`%z5V5dOX07CF)uQHWgFQ~X|TA!h-7=mq)HT-x;sRMRY?ieo^g;6+#+KTp16
ztTAif(Uu%rz4RA7iycSMy^3ZIBMK+_oxcteZ~9qu9juUFAHiu&)wF4O3h!)TXS9{3
zp^Zub0tcZo44yXWdp1u?xTrUqMj<wzos8|yUAn#sBX{p9`P*cKXO-!V6&9(Dw0n)J
zEHN6&ayHh-)jlr9i1Vlqx_oc3G-`{dTmOY_9_}#cB_4Sso>p1zJpLIVzlLVc6sPfT
zSn$~u<4Nto-tWY&)Muxwj?>>6{B73=PKddd^#|#cxya`$1W}hm9A;$09SCWRRqXo2
zR^$G{;zF4$C1_T)Uz0SPn|Z0IES8aBhhCZ&8(1B5T(}X1e6;NTjJmZ^stsH-4W&p<
zZ7*iCwd1S$WYh67qgc?V6f``xkUlW@4j6pAV<dDlZ14TXU>4KOWK~3?sm!wnuD)&9
z5flbIdu83_2nAuhA%1hgR@cWs&iN}@aIkSo`}thH{gh;X$qsWr>723Btu8=J53S=K
z9rv-+Y8q9#Ra>kKjr!}}oc18aYFcyM5LL_b`C@Z{nf~ERvKtWXmrGw_2rYzq#)%Yx
zI4-@=86i%jPlEJ>5UtOPF@I8eK)L0{wdrnRxPY3yv}u+UZw&IdkfX#59UhY;s(Zbr
zpw-7PZm-Pnjllf*=&W5@J~<hP>nFMlPorgebM5_6YyTm4?dK#)zjG0S9@P`LptF2#
z8DlpbaRCIWe5q;eAvAuakJiVkd%N6z6GIj@58K^0!+?<T&>24twe*jp2u&EO>(96n
zq)*^l-qh|`{dAWWATxuY2y`vnj?sAEdXIt7b@)9$4$F`6{83bNRHW`d%p6=mG9=*I
zU3_-Le4Rw?8lPo_5YzMNp88KswUkRAzP@c|lry^dQA?EMu$i;byISvEi`V%Q?^U26
z2kVodKJ#r{);sP>Kd{0#x47jabU*%!9f6LPmXO5rzZDlP5(Q5ydP?$>QY(}jFiVKv
zUzapYWaw3@cv0nTOD8*%f$-)srn_2)%__Eg?d6v<Wcr_F##C(qfxWLYHsHt{%)ws<
zl1o0@v`%+*$uToIy^g*GZI^R3ZEeMt9xBtlnW)C?an_JEK`*J7<uZ<6>nH>7*NBJL
zW5kvCU^XfFhKc2GWC@9g%ubJ8vF$Fe$uvE2D+8uIqT*%p0fy|(po!A{ymSYRLsK=L
zM$C#nMaeiCQgEE+P(~ng4+bk8(9me<NsL#GOwonm9EM;FC4GPJqRT(2L>-^^fAf$K
zPOBp4x3Xd&PIBq^Ek>GZCL=so3PP88Lv`E9C~5uHpNXSI<5<JV0fyAuZ7hCj#~pTv
zjZ&l@5F)hXFB^?A(Ertip{i%4C=4t~n_0QpeV%*>XC(?nAi@<+79YtH_b6&C2Dfve
z^(=k0Cah(ZUmi5&{PHqO!}U;|D}Ttzw5rs-(y6(yUapRint;e3Z`vhGi%6=Np@!b*
zW(53TJKWwtMUVo;7O72v<W*xzhgxA0$*u*OId^j!&pW-YU8B}gR*=)V43B8+R|V1j
zwS`j|8?Kd~3Iu&kxztx8R;q(-)>=_4&_O7hAH6;tOe=QH4n&<d*rnya-m+a5M<xKB
z6erZLm=Ve`F1UD^0GqLngv<3;8D8cf>AJWP2mwxEk7J76=})5>4aYmHaQa9#Bp-o`
zIf-`%z*NI@O;q;|XT0b0^m9t+wUev93m4AafbZw#r|+3)uP7NSsQ0kkm<IV;cpB&k
zk4%~tk{Y@U)|NqXop6({1f7veBQ`zbp)(mTk{zx}kgdSLlww93yrg9R>6?u+lC^Z^
z<y%*TyYozXw(%EoW~F!$k;uIzmyqIbnSv@EfWg*Y*3=gx<fxgGU%Iy`Oj`FgJUYm!
z^q1JSF^U9E(^F=IgFCm+XAfL}(=*^s0oDwE&UF$L%l%jX>io8`Cii>(Evfwbqz3;a
z-7g*x<>rZ)N?91L#Mku+I?X9(=Ju<8FX$K1WXeh!jhcEr@J%3whRO+5U~@$YkDcij
zs&)QTYnt7NDzn<j=`e`h6JKnDTdwq6C7jwQOsyqfsrhbF`*#W-U8A0BI@j|7`-bdx
zuF72xaB~rF6lQxrbz(!27%h0hwJ{bs+RJT9EpPift7S2*bWrX_u9u*n)gh}**^>y%
zS|cyM`gnme_I!NH1~E`>je_OjhaLc#{aG8+{a3lM@p3D@cZx@16Tt!QZ=l_v?<0{S
zu%6k{{wH_y0Sz2cK?VVA*w-Gu$LsmkSt3%*g><4uhsz;STpZqV)k0V;1*6$mS(xle
z)yNaEf{cXg6N>088T%h9SNgXC7Pl@EJoN6w3_wC9cXC_Dk}7lVgfjPDbs}L}^8~qW
z1c<87F`JFpF|8^qvO?QrIj4EyB$n>K6CE^HNRQ0}x4nNRpME;65WpE`6<!+=Hrr;c
zB#sx;TURuP?Q0sAP8X_o)W$+=;;^bsnuC8re^?&zvqFdl$F&o^n%Q4E(%S-j3-H1~
zI9lpWZG}@@q#JupUqG4M0?4+(E9w${#<MFzGXViBCtEboEWZ#J#ny`+|D=m<0-MH2
z=a&4A3=4M;on67`A~NA!RTF(XIR6NirW3nX?|_|{Y!JE`+w{A;c?0;u;%YE(AcY;^
z7}Vb80LqxP_mD7)oOVQv;`CvCr>N~tbQE}{V0X!8v=P|Z(MxCto{RG8@Y>{ts-tQ}
z&PO#`RE;O**;EOXjaZwVJKaqu>?=mnA?{$)u}TmapfJdCBgDWIa6}qlPR+nr0wQ>C
z;~ze2vNrhg@Ml2hkD&;nv?tG67#zo84v2NIljFFxglvZ`Tz2f|G!YQI7#HBgW=kT4
zb60TvOV+h;nn?dFP^wXWVrn-r+E!sg?!3{w)>5IRE%MAueLRMve{kX5*^@+ixMeS`
zECAL(D)VdZ<kma*jC5Q>b;7lYe0Q64G&lUSALHF2Zm+Cj*YAIZSYE0c4^U+C`IzX`
zl;XHoBsMmG9eICZnrY%`iywxj9SQDhA;=yiJEAg(;QM!i$BIg)Ub?A&9!9)`SMK<k
zq*Mo@phf_)($!uCGGWF~KioWzKPQgxsJ9j|p&)jH0&tB_0lte_{fInpx?q!%%X_ig
zx@i-lHlh)BvR2D1wFbYpv#EUh^)`o-jg-NQ^jO2%7I3n`*^_sBt$Vh){Yti>TQOLa
znPjfxxLaI5?SQv**pDg~m}!P;weH^>NE?~$n~-x{#HfB(%~W*Q?9^<9dFKc>9IUpM
zhb*DPz^j!#+`0t!SCRSsv;ii69c<%J1e(5oH@Pboob`If*LrMYT!Z`~HBLi~DU16{
zjrNNVqoiF=dS-K+9N_qtf_TiHc<CY4Mbq(Q_qif`j?WG$T=i@*5dbFlsT-QbPu0EX
z#xcERzfdw6GFEg@q`9%=lD!9ReyCY~g0hQ;vPjl3TGzkS=?Q~=ry*I}p<}qaLp9IS
zL9<PAMh{}316n&;sil7x^t1GMUJdPHxe0B0l-;ryF+jewW7y_!JeMEybyXKRo^7jG
zZo=CU$N-g)V8dz%MP|T<?nGiD#6;AR9sjzAz~?D}V(U}dZIf{iCI9cJ%Tf;!v2)J4
z;+d~^*TI4a_Q^YS)UNuAo3b;8$dkdTFBk5R_bogf?6rm-atfcWs_k$Q%g+|Ky@7Xb
z=IF-Ib?hKAm<HT&AHhKfouX2W80w_Kks^XOg9io$L6@*||C$f7Ya&TpHG2CK0$iu`
zda6@ew3V1EgttWVAO{gQm|>JXY169ABQ7xQidbtKidxfHPWY5&lPoPRI$@@JkeUp8
zaULHC?RvM@&n^9NZ&7XmZj_iJGzZsSv?3VwUO&$ax9X9Hb#poL_vHGuVH-uCYnrz<
z(fHe$XHECLTo~OT!<<C;6Zb%!KKDqwWdy@TPWwuG`3iT_2tJvyYjCpPEYpKv80o%v
zrq8&4Hl9J6YpIH!|5Fw?4cw)LiM}0q<~X|k;q`XpXf2+G0hl3RaJW6dGRT~5ioj)8
zAuhnYF&Ki<PCk3EA2CX)1U`Gf_+<MqZX*~7sl0)h16TV`fu$g)8>&b&xP0ybic>;_
zsT7$9q+S$zfv-cjGcSb(+~mEfzf_f^M4#tCP`<|Wsn_+j>D1xvkL4_Cjt@nEL_5N3
zpL(})$?Kx*^r^!@mnAjVY-N=VZ%VwSo4jTXA#J1Z;jlO5I+&e=aL}u?7eryW6JrHm
z0Pg_ygMI)K4$Hr)M_Qc{rhZ5W-Jx6A{FSNXE}J+e3dzp)%5MB=X)c>eI@|;SHJV;l
z$#OD91_;)Em-aL2dN-$yP|w!=iftyOS^tTO6pwA%)uJ*e=ONC~hz%NT33WFCj^x_M
zhOhos%iG_pEgITiDgrCw8Ux|g%*<xD90iub!j%A*TQ<DUP?o^a6oMuH@cMe?yN5v$
z!p)Ddo;s(#(6KA?YMTk`>I4Bbu9^x;_)kqI0a)MB(smPnVeP#vHI$CiSZ&QPeoCi<
z<u0XsSAXG7ei?j^TT7hHuqD*<XOWm$-}z*s+^>l!mh$&^X{cF@75Rp8(oR)NzOU4O
zO&3=rgQ?TL+D}RM^_-|_t50tziH4?%>(D(2rULxy4Vx6FhnRkg{=eR+cm*J$hQ~x_
z(j7Pvs*KYPtiR71V3S3-6eY=$^4jbqN<?*}mjJ^~W7dAK9tth<IxgqzJJGTs7^$7^
za)%u}WjQD@<xDsj&Mg!ZkyId7l#PnWYY{0lPHNq3o&EJ6uZ#*2X8P{2S$uHxBF>^?
za8cP8x#nW3_PRIs8eFiMd%rT@AqXPwZcIxU$Icgda-Eyx_+JucRB4Lh?_T>q4d7bD
zHCgr%g+Uq0YJ0@W>fCecsR56lMQbQ^(jgF0Yi1rn)EEaoLZ|+aXzYB)<6~@rj;Iqo
zi>xnGG`)x#Oqm;)$xD&1A^P3a{LQ&+p+Q3b!OMgqLrBg)Ir;omwQjYuPO++aciz7&
zXDz^eazgI}^K4dii$@=>%<;}eb5}^+pcJ=Nz~961W98m??zl8e&t91l-o!Y2iYM6X
zKg({qH^lnCmYwi6M^6_Eg;o&SLTkMoM@L2tJkUv#CJw@1ee%D4v%k?+B2wv0@ax;s
z$g>^f34(1IG<B4g*UZq~bqoFY$EnzP%kWFn3(i2>1{1>5R5PczgGK)HG<Eb4^Z)ZS
z`NbijRV7N1^@aXX&Ce}GdZnu7!qd*b1cQytH*%G`DH=<V3+pSVE_}w1FCErol)~TD
zWGD~T+HDb4*#flkt#sL(hzlj%EtL7>Y4MbPkIeA<+!K{g+`%%x90<;Hvi~C}{pacJ
z0e5|jQ@`SM0*waO91{YP6sZ;H2%j*fKUwxKzk%PwcKvH9H1!-tW2)p53`!P9)?(r>
z4?~ry+04TV)E1g0eRpYonQ>A#4YsQjDm0^pzhPPUNdNuF%$(S@g#VhI2%e>^H%;Ad
ziO9N=eav)o!kUid0E9p*UKS4hf_TTer#rQPHP|(Lw25&ADLr8d&+}X8a+UR(VH!D6
zYux#SUEPK=(kWY*na!~j*2<7Ufioryri?UVL`z${XW9fX_WbpR;Q%()Z|lDRI~+59
z`QNd`=YO5<3#khX6B`kFKqk=js4U{lYsd>Z1>~=+1LN->>=C6B!b<&$kO99FhT~p2
z-wq`=R3<IX3_O?|DJ^ygGOclV7^v4H!~HJY=5I*uZz<@R1wts5I;k%5>?S!ScalOI
z&BmZ%t*@o{tn*|p@8$n8VEr?7^53W7zjy3^j8MOq_`l-npHKK_&Hjm!|E|XWoR+`A
whkt(kO-TMzasH_n|I~~BAN7KhGZ=qIbc6-nOxiJcjqszasG(5$)Z*>`0aBj~*8l(j

diff --git a/docs/img/structured-streaming.pptx b/docs/img/structured-streaming.pptx
index f5bdfc078cad9a3696d6c6d8a5b25eb0859628c3..2ffd9f2a51399ca862bdf2d87033ab11f476bdb2 100644
GIT binary patch
delta 34357
zcmZs>W0WOL(=~kBwr$(Ct!dk~dD@<ywr%&crfu7twr%sB>wdnU-=E4=S-T=?#fr$x
z+OccD#Dez336<r*z|jGa0B8UJKmw?6|DeAC0RUo=0RS`rG-%rH4Ki9i3k)bQeZt`{
zGqS`p#2XT5OPB<o{X+5YYzd+&K~VNjfF$mk9Ze$fh{MM-fs98!WOPb0<-t!78?S48
z9ZIH?*CqXuL<DZ8pVO=KISt@$4zLwzIPc!~-Y$KpzeKS_aS(9+23u;zq%j?F>B0$;
zG(X|!lB_>xzxzR?+*H)(SE%9wHDQVibY1A~woPC@?05x^+vcEgskD554}Fj&o76DB
zx=H=gMNMox5aX+JNMSLqNo*6Qy>B1)b?>34CxSLt=W>Ak#Dy9svidVO6oR4PQ6(d@
zHhD{D{aH0qp`2>Yq^?Fh<>>>Vh#9X37qKlaf!5kQS_7t(SSsW5!gtySWH+aArIzom
zl}_}SqJ5lUt_fXyhv0|Y#}N3S<)5D^;Y%mTs^0%3FC|dGk2^-r{;7E5;}*<Sgw{!r
zv*YI$>{*0t6|(b8C2r~Tkh6A6Xi}N7GOb0l|6vy!0nLf}B~H@HMMx?<Zf+$sU?^eA
zwyLs0`x2xJA!X=3@+QM)w->XT$#L~0!55i?Ui19TIy4r?T*_^YI*d+b*q^>fu86Ic
z4G(J?og`~4k3}`=<{d2GcuFJs(>u{qmE+@vP+mQA%}ii3mw>-#^J`qEiYhg>w}3x8
z|CwNn*UMjdI~<PM$LId2Cv?aVki_DH-iQZo3z-xOw~9<3QCB7WFDIg)001HYG)NlL
zJu*>a6ap*=a8q%B7#-@y7i~Mp$<ex_Cd<N&YuF5nQt}+k$oCC}hCOhhyUXlsnNzlt
zxa$BZ`yP(`_ojanK7sJ5By4mM@gTG~V~pevbRkPmO}yA9<3b}n*!)(gpPr|OLr*B&
zY#vFqS&C%35?0Z+rtNs5ZU$l#Zp3`Z9;L~Z=EI2UKndQ|o(5QH{BRhR0bFi^CGq!Q
zlV+wu=%VWD9ibs}ZBCwz@sAn^<A75UM1)QuYU_@I*a}$qp)%^<OU$ogDDOY8JFupM
z_iyN8vm?Va!F_jc%zO|-2CzdFqm&|#6hNNk@suq`iScWk2;*1ecsX+dyVa=r&nbA2
z>*4CNd130ENHo|;QW?cr(M%3St1U&z83!#W2iL<|pQ4cG19Q9aL!6rWKbyh@^>aW4
zUd<Kfeg(%0&OV_17vdx%Sjwbe_|^YL5U!*M*v~ZGJ7n0Tc-U#6w05N70w@3gL<9iP
z|COeblN*zhi@B@0gPXCNwW9;0m%ZIZ+P1@AV)Wr_s!Ia;ZN-ZHZ$<Pnn|1HZMH!U^
zK|r&NDMkncw*uGYrYA8si8cEph#$rdpC5hz2z=wYzF0QHQK1F9=Vw*@xsgsoWhqnJ
z`-4_2>k10$94rt;()p^}bk|^}Cm^GFA=NafLL9Z7*GOy#{qRb1+~dr<aAOBDif-8i
zc0KbJQl|V4t8VWE;R0_DV&8d>qr9~CbVg_)uV%$P>RM}!gZX1C)U@y(IjbF`OZOdz
z%NMhZ2BvAK>Pp*4o0%43)|p^Y#$y6^NrwJh)kAr7vmVF?MHu{}Cq6#?yZS1O-`4qz
z))P!fwuoMyG;qAE5(3@n@rdnO6*x|g`EASBF7J~j=imF<)Hy?`Sj`S<?^OBMIvnd^
zalQ!dZE$}7mk;Zohl5~3Egv6|X9j^r=Jh1~Pmghe56>W>pl?h``fXRPu|0XsrV>eu
zJ;Z3(tQVk)8+_FbuDiJ4UXI?FXYesw@p`J9gDjHV6>>2rVrfy?k{}6RxLwq?sB*|l
zmktLaDbw%s$aCRIG9QD_tC7$snL;t<_A<DyO3gf-{ly<5>%CgP6H*BnO6F_W!MR^E
z?!1Te?CsJ!mLbU8)fYogqSrBBQk^VgjM&@QY;S?|@<`{S&zOK@CrCfrlSpZT=u+$)
z8f=^rbW2$pdU7P#C{H7k8*nLpmF&u5C`l;ha1}4J#Sm=lM<e*6lhIKmH6a}0$9Qxc
zFq%Ka<|3t*%))15*wSMts>o(hU-<CxKCF<cHX^b`PTVa>|AX;~s)mM#yaQwsSmG&x
zn8oCZ5&yfJ!`k@uJnI2#?s^!OxNyts`R&K#5G{cmUgV65SwyWhSOhN^UNokUf?40w
zdON}eT$&jAwIuN>rKyQ6|2C3D1oRT*a39Msvh`MP815YCOzkq>YS0lH`sn4)ee!gc
zF8y9Q^)9O$eiizc=ilF8fPY(360Z*&5a=r+5l#XCphAKHF#f+9aJ92GGk0bB-@y65
zBGCWq@K-C<pUCJd=w=`viaO1kk^f-KtrCS##vP=hlGyo~e*x7VE<u_@W=MtuH@z+c
zDT^t?l}tirF-O+S8UvPv;<ml*+V|4`>m!Y+sNk7rEJh@?yOS6w^oC`S_|))4?C<4$
z+w!+1Z7xhR*?Ij}gmz|U=SwC!7V!0S`}=ZcA8Y9Mbw$y_p2Wx9UOoK?Q*CYZmMLp=
z3{j(VV&(Q#qaS+Ondz6)(#$@U`|{jP^jmi;?oh}>KS7?Oi=PKj=!BxKamy^ZxI{oE
z7k*!L)BBbi8*2%;Hk_4hpG_6U`;*7#C^bM;Y5AS|;=E_*g;NQ&D<$BIhX{z@Gcz^Q
z;~$Xc8`5345|%bhCXotvJ^s1()R?E%RkYro$u}U@jMMYtlK1Pu@X>24Ee$LB%W$ek
z^KIfl?)34ZrE7SlYh?TH)0IHC_22Kx_EGiyeh1f~MimvHz{ZR>Cyp=s$ps0GaS#!+
z>wT;W>19RP4BmWT7=M$J2>E{Y{Hvv*Pa+LYp|C2-&CmipiOjW}PQrntBV(rn>JRo;
zt)ttW-TDWI26hD>^Hkr^8=A0<TLyHS>^3b4LWkT~=tq)x*N1wfVV!N450h-H!x<UQ
zyNBV4m|bF^0^`H;?XjoC=~K<q+%}Thr(*&F)Mrb`?yTQzV$ue@>$NMtpSSa~@FY%n
z(p;VPb^sPcx1nMC)^>nRuB_pyyB_~~QFPtVd3817f@XK+N&Z)=VMJ6{5z<%}X&YsD
z+s(%3gHXaL&0?)jny-7;8#QoB#lcc+;obM~>x>L|8Nw*ku(hejCRZjE2Wg$!LZBaz
zVX#7Il(#N;sUC8@%u4*I$(nzdgoqulh9VgbLW3b1?-T^GbY6xE@8tonmDR}!xp=Wg
z02Uq&VxEuTiT7Ra!3OokXZ}@0_c~)xMh4f}FOqRf-wUMb2c~ef!cg61A`$I77iuw`
zMLq(lQ&e@9ixSy{ban*x-h)<WU+J=w7e!6RI%3fl<XwMuZ81OQE((Z?lwajzRsB9Q
zkkGYpW!IQ7EJjLQWYfzM+F1JWJO$(ksWp=MO!Ae+uqpQMcugPt<T?|3b*~=HuDrPy
z-JZSK?Z>06*ktBQy*1+Ate=E(eq*-V_Ra+!X0?|&)pbam(|6jPBW3tcW-<2YS%!P+
zZoOt=)pAEjt@m2%i+xDHnOudb$(G+l`R`^!saQR0eJ#5_R<?M}G;Y&ZLVUV?*7?-&
zJ!%T9-=sAur)q0k58;~Mi*0>4t$ZMtb&beNyqPz9s~x4}N;hgIv33ysXt7~3b*KeW
z3)!$VxA1a)pWwYz<H+mRvI7yWfBLRy(3ICaoA0LoKI(Zp?ih27PNOYcomd4vM49Hi
zNY797_;=n!3oV{Uu_HZ0d0Us%V5e{mkSkVAoPmqmXk80lStE{7dNbV$4qNYb&9GhP
zqzXQs5$<<Nk4st+T{|h)t-j^c-Rl7*<pWfG9}^3)i;=c2jgn8cWji8DhCGRHt&yLs
zl?8o*xaPUCkq8MBJJU&O+pC(s1*i9sejYm>)>+;iER0leAKO$?#vW<asJJ_f*evRn
z9422qv@Tf_RQ$-3Kw+WffDUC&=#P~>l#iCqva3-Q6N)WFR6nDgdQG+!)A<0Jq8>*j
z)MQnY80f%tZ_2|lu4pZ4ZSsk=FCf=_$4!tE4ZTv8v>hGT5a1|=0CL+G7>GWKx4wxl
zEtO<|sgP`#=h>sicel~Fb%ca>2Yt!Re-7i&SUW7~v4D8bDHaa_Q*VvkdQ~0unTVEq
zp5X3pcOebOl!_0*lJhPytKGoqZDbIv?uj%h72+YkK1yhRoHjR}`vK)+&Sz(?<qm90
zgRS+d*<VCRM9J(A<DIZ$X(N&w&WUfHMqdI5ZKMxj*N!AAbSF-fE<<Nc7e8vX$Z$G}
z^j=BIrhoOAiCD4Lvbj){Fe<^&X_3MfN)Dg?J&@#7%oy%6NK9qyc*O=jqrEpP?lF$-
z{jt>0y)RlCTd~m8y>D=px{fy{;H>RRH?~(%T^KD>gje904v)6ZkZ#vppB}I^MzTYE
zGC~2<?<Leib`mPCf&L}T;#0f}F)ZqY3fmCqWkm?YQN~%S*rhoze&SDz(3{T4yd*|y
zhh|NBGkZU1>w4cc`d9%*N9c~SzH4DPV;5ko2FYNeN@apsbip7r|8PiuG%&1!d9GP<
zOTb|oCnrq(rAsxuawq*0{VZ4tH@lC*RIs_ic?gHxiP9VdyQEbVb-EWrY7oOjr{Xe|
z4*Vx|!t!58O@6joqx+Jk+E!BZaThBkCmPLGP~0STSTJ98r=bL(R=n70w6x?nQG;-y
z$9CZ|27mneGnl3yS?wyMUBOT@U(?Vc9EUD2?_P?gL?pk<Vv0?iWf*h@nZ~Vei^8>h
z_5u!lxIhIP7osb^!}g|cx~(J7+9_;$6D9A)iiBP4>MG;#3iROW&*)|puG&_76=b5H
zW*cg^kwy&av;;Cx{S|(6i7kaLIQvDzWl7a`zF|413zh<9&QBKK<Z)vV_Q-~=zLISo
zq27l4^cjn;Ft`$8P-yThBMKv|d0n!4lzljN90TSk=t>t3bLo<;yXFd9{H*hmzEyy@
zpux0ODlH~*`uW7cuunft!Wu5z^SloQ@6b@0Bj8GXA43}$B~fAB6m(6=7V9$Zmay;u
zfWdjaUzQF<Nm_(OIHaHd8<B$Jdg1he%{Pq1W%4l<yD>SjQ9%TbiRAsOhF~nUX{0-`
zDh65<PeGcI94jM5?6s1?O00ovoq2Zi&$L?Dg{VhDHxIOGD}{g2+`#*ogEWO{d&TUi
z1M&+lD9jo#?1EHk(fnCnt|b0ipC%UEQq(fAdh_QAs0&;J+?sx}{_3X0nm(V9jOlLb
zjFOA~4S}$m7|scQEsZK{-%$+<hEI4@ilz0vx$le;?_hRO8~!n7V;tvnjYDFFz{zAy
zGXaYa?wq__>_SgYEa+j731EUoHLdX&CoG0~(PIRd3RRwrXt$CLJ}xHdXN?ef_Yum7
zGW|G2FEEqXovt31h#T?LQP^Y&OXa&P4Z8O}kLX$waH7U#+Z@DgmN1*!*yh{PrD~D5
z>%`2c5jBPDtksv=-1#SF09K^(*M<!KS&=7l#L-FpwSf&YX4IRaW3`h+<#8?(Uk&y5
zWNHrZX}YN?Xdo8+V(eBFoV_VWppjJ9)$ahVf{l4tI#g8Fcdm%ORkJa1+1vCjCodA}
ziI>roiLq0NA`c_R=g=tmV@JT;`&+2RdBOt5h4FNPhILXW{!_)64Koyl_Eq$Gf<-%i
z{lURr<c!R-f%tLmH}Up#a1+IOGq{_Th!rvrmdm$wmlw7FV^-vkkx1M^F#IDRt3&4W
zR9o<DIj>U1@HeR~L{)e$H(dkQmDzAKxcsLOQ5->U^abn+>!sVcm>al2GJ}clTA>2{
znk$r|0F~}y@GG;+VgbXRu{74Q+$^@VVM|&+CP5toZR6y6k)0}LNC2p5lf8$wrgJLL
zGOgMC)J)umZJR2}sGY+@al<Mf<%2-sIL~8;bf(ic*E`H0*CHLyr!B+$N|-5P9!Wn>
zx-lv1Ro;_El!>Us;EUsHPXSHdAc6dB+TGDFQ$U2qu{^nqOmjWC;c<sgeLVJ!+|9fT
zjK;jGqH<fqc0)n$rH13`+4ZYquGJox``gy%Ty}pvQU)uR1FI2QBA?18eTY8PW$aR-
z!1<Tma{IH(Q0pZdMO@hz-c)Asgd84Q@-J^yev1LY$@$a)|4I559<WE+nZ`04wk0d^
z4OcIGQwMJfQ7z;)!L6_$<#DYj>1x}%O_$#@Z<Jy+8)?T<lh3%1za_TJ0z#F5)iSoj
zcd?1xSrevzbV^^U*F77p%9fr8ogRO?SEcxK`Tk|?9A*B>{ge!5HW%A`ARP{d@U4Q{
zy`5!oM`l%AF0t!?R0{Xl`PkYS^O-|$tX#~6lUh4Cf&`3nEZgd8r@9Z^6w*5qkI&e+
zhp#84r@O<xt=k<LR$(M|MrlU^ZI-=g%R*j!2ud(lOw+8V#akda>TN^W6P0~URzxWj
z^y+lYWaCw^m&mftOWw>5-j@zcaMNW%T~|5!maO&mMWst-_Z^PY>K%k8o<^Ikx07qI
zFSpr-{A8ypSK(TIMWfJ4()k<K+{E>m6xSS@R2f5MjD@dCbr&;oZPoGusj(Ir?0pxF
z?=bygY&A%D@zwZyY6HGPR%Ygv##Zb}C$!&!i*==H&iUasduH9}^$;qP+)2UV=;Zkm
z1`XZL#10>AS6^X!NX=B=sy?GBEZ5fSW=Y%zYMiPT1i9)!y}Vw)Y(Im3n@guo5MS;i
zu!l7-;aNq~b_TgVL5f}i5hm+`VKsMr+d<nim)syWq?Z}>Kol$BH~C3WW%F!9r|cY?
zl7BEcc8+80kT_4JPv`~x=L2IFp%_84S7ADW4xqVnpciCK^|**(^`vxt2Fo7WyoB-3
zg<g=W2Sg!J=Y=)2=1#g^5U-$9tuPtq>H-EdcR`TD`GSX{GOz>(QwpD@alD^alQ=%H
zMSCHfZZ;tGQa@;o(_0~<(FJ05-;1+0@Xs7vZSR-Bor1BwCj@Tczc0?#2YC%yiIHms
zl{$cx_cgAjo4p8FTfji(e~>ZK52A)o4}ZQmfZ@LA4-x>`9G$xX*MkL<0U?J_oI$e+
zf^(Z4Swz?H0)pOvYbcO~Q#rZi3U3A;@&`#yEOqAwK7<iz$w^C#hfc#2I{OPB(`Wv3
zFIPfL6Ik5q+z!e>;6cE?S|A9}G!oMcED&^Zs}<H~|92@eYOSL~hR$o%Al~$!ayxUP
zTFlR?b~akvX*!>22lXu~-+}GL;8xYlFL`zR-s%D37(>_~*Y?_!1C&Fymoc<z5z7f}
zFLAg2ozVACcpX0;yno_uD>Ha`Yv5`HmbUooFP+g2{?bE9c#(x_wSH76k{%GAiDL)j
z@j~YL_##B;w=UhTQoAcX^naT&`TG(~i<1dyDAP(9V#q19x@`EoTl-qj!1%wjurC)3
z9q?S8?Yq^}o}pyj#}G5MtE^i8+mGv2;j}lJ`iYg!P0NjUfMqMa0J|kbrNLmPN$@iR
z-l1gOVx|LZar-Y(Bq>eFFku$XjxjkFB`MnU2eF%ZFr&q_T~GYH1>;I-BcMB_EQkcq
zK^qp}`uhlJYez%^Q)XN3d3%D=TKj^F0SFa(t;f?`b1V65c%a}-`QzlagA)CgVooB4
z=dc(Qy$iMfK1SouFZD!zuC2`{mY69x_6%PBp*4~so*}1E<{N(LN~x+@tk4-n_rJjn
z#7D{LX@s{@d}^rt?V;7-68-@v{6jKmu0W)GNJdV609UY49z~{DUN~<{J{t;%F;J!H
zw|GKyto&1-7a*Xw(B0I8k4Sy_khhLtlIkx70_+NC?U1K419gz?T$C-y*F`&Mp&!Tj
zCu%F^wm-4J65Vxe#<Oj9Jmb-)<oFD9Gf_P|4KAz=svIObGNj&&G0$;EkH=4+*pEw#
z+l$wd#*Qp_(sK5Js-Jw9?{J&FKY(X0fx2Ju)FT=vkU`p=NLbnRqY49v<OP2oxEh_F
z8W!k5KB{w`F6;EjMp-=7uBP5;mZ1J>RC_Qp&8X8@K5ys<fky+*GW7g^yt$Du;jkD>
zo8@j>GGeUn`iP$YlzkUgs}^g@8m&E?&6m9T<6LeU-^HLd8fGGBn_*#AhX{-i<Ab;%
zj?oG_jf*{X)Arz52#dLz&DKGGuadY9;Ku&$BGs_B#HA#KH9PnyDlI4wXF|6@Cx3Yu
ze0Af!sQ#&xRWyU^R|y&st>VsA?26W5{zHVUw$pQ<T(v>@9f!?tT;f1eAaM|DzV6+$
zuexl!nq$5+MH??Z%fq53X9`%Gks^ng!C`;F%AN>&uA$hL5*B=JU6iRS_DBr!F^zVt
zQTg*k)uF^k=R+1Nf4RCjD3_IW343gye3SYnEK5Sfe;6}W5Hb0_5acE^A*^`vb;jtx
za(wh}6W@|xjjmrwhXoa`N~2%mavp!-28Sv~rz#0VU1}D&-iyd4p$f1}DATe4#CVVc
zeCj|bHOKP!V?<VJ+nYhZ+Eqgccb~SiI!Q=v!*eH#uzq@9uCB~B-&^Mpt6swrt2@@9
z)e2&p9ofg%oC3TkNY~d$gS^ylAPh&)&8Qos=wg)GM|sjk(pkTtgGIIq>UXZWp1TC?
z<(mB)dgc*vMR`i*umq6SmwDDuA=gR<e!EF0`+V{vTYH5AsWFu#A*B(<;mu&9DTR%j
zAn7#PcnT-Omp2rpq~N;Y6y$y=VP>ps;WV)Hz3XgK;+=1OTdDRlM-fVx6S?I5SI{6m
zK6|etFC5h%efGX}AlIBt3&IUTvv9d%-(mY$$}8_POP;RfWEybeHNnbTYT~^%^H5Ax
zKD@*eZyO=Wgl*%>n%~RMkg_dpuxrGI=ieXk*U;(HhY&T^j`OT<7TuBtgy@1JCYz6Y
z;V^T&W#_XsKfA9Pzw48y_O;q>Nk;Tfn^}o-V-5GS9@$r#rNcj+GJ^jIu9=nW?!w*k
z^NI77SiQURlOrIjN!{q7?(Nakl<qiB(ShL^!A}?<%lKQ*Ll;w#u}-IQo}mGM>fy`K
zQVNTW^X<OX>_j7J2NUqZyojzO2CD18CR>2wl^d`45aTfl$Po<{jTB2tLJI43AmxJ{
z-R19YxMb*BCb@<e-IC-&IF1FmXyuZ`zmsf@AcO~aB?TV%z+mXcsDdOl!)q?M@{n?I
z8lEG1h}E`1O%KadjymTZ7PbB$x{s&|AQVEFkC;;ys9~y={nSB-N%I^YuM#>t+#a(2
z7AwUfYw0=didFn7`Cbr;@?zMAy**;}a%EuhMxUFI?(FLJ^LPDBg>YO&@E!Oz53TV|
ze-+K!&<7qK`^~n@FrQRGiyw1~9%-J8XI#@1lm>8p3#cI8H!aR_TW0wFYS%e^k6t~3
z6RU;D`lArvI6c<I*1*mDeRXuBv1xG3d^xt=5K=6&+0iQPwhUivu>Is_?IU|s(Ir@E
zb$G;9W)l-;f0o=(Hp&xy?|FpWd|R;I%E6&y&kJ0NI6n({9N3<s^1Q~NSUj?-zHm$s
zWG}j$BJJc=)(Rt^6;h98$ofa>@cd1zFZ{Qs#Kn3(89UDG89OqVKH%qZ{{@3{<F|26
z<A-D4<C>3DMpZ_Y+AS58z^{saal)kC8^|~p`y3fJf!h5b{e>{!T{z??QkA!{J?u&J
z%?#wRt1X#5%<3W!NHaLjqh}?To6tPU_foaM@^RZCb{DGBCrSzUM~+-mSu4$Sy6e3=
z+d1f#Rr;1pFs~W6a$uBp*rl#$=+<3LsPnbwT~kIwIrL~1ZYT(Tv1|rZ<(j`$j3;GX
z{G=lvWI|VO+WN~5M3f5&9pxbx7dA1IY6p(R5o2;+;q>uU%LiKR?8p{Yeh3>O^NZ;r
z^UpxeK>eihR3<RfT}t@Jb0sg&-fF7-5o}a0quf+1n1-2m&f~U7KvK#ui<En+SvSZV
z8ZZF%5W|NTd|0Pp>=sP4&zd7UMm{?ahafwE`~LkQJEM`*Ho6>~KeY|X+WETHECif-
z0@(dUQ)WUocm$uUMh@5$0Lhl#Q`@dlS+|!1P{raNNyq_oK&NX#3G+d}5ZI*d7bSxU
z$0(Y>0hckU04wn(aA6r?8NgDYIF=6RENWnCAfqm*GW;6PUl3&Zh>BRYBQbJ@TryRn
zXje;Y!L!jBI8!7ICSakF+hBt52#}0f*yyGU4D3XC5L9@C>;{CF@Wd;UYY^|m3*1+D
z1dL!iF0M{fx_0*Vr@NRU(4<?597DA3dKHE@*R_i~TAwA>nKr|q%$kQc7cLLn;gvpr
z{}&Mf(%)ZE(CLwZ*+|0Yy+!@ZZXwbUj9?r1dq1<0Lh*Qr07EEiqzw{gaKMnDQA{hy
zWIm96$V`wugj-3onExj7ah8j44)`<srgQnw*u{E7p(~Itke$M*56-}L-~-w6!ysgc
zntl_Xf^mbL<pi<`r-4iZZYAfT`hON9{Rbfa&-#DHogf$aKr90p!LmX2evV=$F#Pw_
z4#qOy4slx89_aolzXSiHP9IwZ;q1ifQj`&=$m6m<GQ7LV(g4)bNgJ*g2Vm)?D3cd9
z3`g$oq41{s@Xu>VF;bx@xqtpd{6mlz*(uoWKhH9GA(Z|ve)A)LT(hO-Bhd0-gdRFF
zM{!UMp(QJ$vTgn{qBM~olNBpfT;hmMALAVO{YJh4l1ODOE^g8=xaAgaS-!!f8c4dp
zbg^nhe}OOU#a*(`MxL|=C353&VbFp`F+N;oOK(sAUloO|*-_60xB&6C{eQ4Bz#Qwq
z-n&?N$nJTbx_^xwG+Z?{`I#d|Q<<0MO2HMrYhS*VKMee9G6>1|nx}~2loE{`{fTkD
z2Kv`kr}|mT`naZ@A9Sy`(O*of{ha-HUGi_Uk$S%06u<4g#F9(y7#k5P{p+~YC@=I`
zx~2B`b+v_2vZ?W7&Sxgq-HwAingsjc%?_$97vVF$X_d-4h?Z5D^q720od%-%OF=s)
zcn(F1kTs6~PXe^GNP%j!EKOnnG%sL43W#zLxn#n`Sbzd@6p<L1C|rV$!uVev3x~X>
z0s<6WiPQ7tA%Cb9+~)!mBO^D0aZX@*@uA_r#bNLVgc1Tle-OD|*H=Th;Rrt=JNDud
zZ%DrZ2$8(_cWy@!h`F5NHvl`g(&+!28pOMKxt=+LkB#HL%ig&~_5p{a2I7)p0&EBZ
ze_)3@5TDMP_hyN8s<4MT6<I+SzqE)%$9NaFDiq|vscs;#Yw0ewY*VQ6lZHx=#P6jO
z0t47kJhXoxv!Qffwue|<i+P-a(@rUmN6<@($z<zw;ev*wdXtMPKT1BK$(5RpxeZc8
z%H`9kKud~)XpBU!M?y@q0WpPLNiyM36i6~fT}edg0MS9*gXp9utpAl@J)4OoAR2^v
z3tt(33M`rd^hq8tB#4qNx!4!*423SG_bHVDg3i_~k`V?6rVKC4f~gIbj~!D@(h-WY
zf*lM8=avjZZ6z*09-as=!KVeyV4VCr+-WatVq_hU%(EW~EgcKIfL^bebNFcmxLig~
z?}``~`M2=pIdyRSfGQ+Ojwq-`0TjFf=C)2B+r-EU!$4#~4~@&A#apdHx$7mNdA6aA
zonMdVphf&~m1VS6m&_h5$iS&WosBaPe<J_G)XA?P(p`}(JlbO&hAf23W8snnI!ls)
zvy<YI<dX*;1_GmnOX2>53X_4;BcPH>M+Od##^s}c{zQ(1oP`Dn%0R(}bQZaZm4q%w
z6c?jL64NFFh?0RbPg{isN{W+#Cxed<D5FF=%|YMB;*z8bZ9|jHSW7??LRaFUA=Z<E
z|N2no#TLRPnT2-N$f(s%MUW^x|65BBTZWLqnEa*!Jc~#at{?&pi9(hR&0j=X3i>BL
zKu+cWB?cER4-k_f28aT{okaj{i6F>6mOWzE^lKWgM@XI=b>V{dh6+T7lY0Z1ZhS)#
zRMI2<!9D*L+fZ*|HzFP_G!2tuYlE2L5JoUeygh$ZVOwY}6U+Sa9*}6C4`;o;vZW(_
zhfGzV!EPx~1Bv^o9P(4qdEp58U3(ycpx5)r><Co@YceQO3|B=)42(3cHTKA|0*<C)
z*xnYL#Z(m)ZcRh<>7_#eEJb9Lg3B_WBt>+)_V3};S)-nMi)p#)w*7AToMd!9>CpAr
zp>H@l21RPTvCpywD07}_VVLLclPx+&47e%q>8R@RXi7KygJ(=KcT2h5jT=7}4lXoK
zr~^M-gT{f&)%3|@(a`=NyFvPDN$reI=hi9-+SgU%%ntU*Kt{r!d~-+IGthhR{k>Sz
z!F}@$+4Yh>hfn~wI0NlsjqO2*XE4A&uq4p`?zRvtK5Zat@+e`i={fySA*QFm^!GBb
z-@Y{HCGK?3BZSpoV*unlUH`}Amq>!1K&X!u8OXcyM{KWTGK;Jxw7{V#%qVW$%+ja*
z@D<$k8_h~;Kec|Aq#p-OV@t>xxknwwwg1Z+%Xn(suESyaXZqe61tQIB-(L#WjaEWe
z$cGJagdb0iXr9E?X-V~4!rYCSlLpg3;T<&D(B`ngvTn1d4Yes`RLbB{92Z>i1bYjc
zcCdnm>uz`I)HX1=>4s)L<Rt|2VZ7PoAU1&I4th>dB>fv`=I65;L}owkYa(M4GN%Ma
zZ7)s-Xz2IO<@_>;rje(nQ5!FDHp3ZMe+^krgy>H9K>{L6w@tqkd33dZ7A-i?9Fwec
z0bGX+{09Wm<352b8Txnn;A<SOF`T|pFDB=uHzv%CPWwobUo~`^rP9z5chF^pJ6i~c
zcQ<z+gSDH%qDt_Y!tiIS{9AZI=N${9C*>MuSSDKGwwfUIg!ra&X?dF05b@{1P*|G6
z_^lb8*6r)fcJ~#>uDeN_wbC<insFjfhD}{`<^;MZK9Uw_0F}%R>z5O_^2GilRef=8
zseVl@@=~>)V?f4c=$W0Gh=ZK%_RS}KJa$y)3S44E<w1$}2wLIoR;b|KW5ru{+G=lZ
zBbJh@_JrSp9R^iAC{k2vze|ceY!E9L1y2z|FzUch)ZIcH&0LhgS&$5vc^2_a$d(2<
zbjXx4pPv($JJaZT{QY$dY?+&ZE0(-Q44mHtl++jQ($rdMkxf7&G*>I-DSxM0yh&#5
z(7UeQaCS^{aa+CKuT5m#xB4poZ6U~B$UWx!wRhl-(&ln+Q8`*B7_BDBPM$a_7P|-U
zMV^8LBZL${_`(0;eRB`oIqsYJJ3RAu&zQ@(J=a>ZH(DnQTGs}nzkAu=27~;rZ5ZFy
z3J*q%_Sp72RApM!AtycZwZSFdqek7%0(x)vg~bD=ed<O7Dh&Msz{Wc4#PwYG(O`v8
z!Wn@9>cBcbI<YRBo4!)8@l39n$T_=|T)6m6Tc_!{_uj4RJ@pKH^X4R^^4BN#kAD1e
z{u+D#6>-;dajo0IfY(8L{OZC<$N$h>j1l+xui0H$-@bTLKWXaEa;=_gskEh#S}N;!
zvQn+CU$V5Jt%I(+{~_n5TTI$b?J{a1qJ=!UV%!#4KD~%%<cQp`F3UJ)7uIq7W|32?
zQ+OPH%Gk;7?u!NVwnnqa<nXL7b&z^;V6e-m0r~zTmYOrl5CP8=M%NOYgC}(bP+xX5
z<N2XYB|wX5Xk6XgrpLT&TomL;WE50G&r<1%2!|6v`CAw=+bWwW5Ml;Z>o-M!4Kv&X
zpHOAV8owee{U7{UZZKV%#%d7#IcoTA;+_SiG0oq*9?1|^PUb||ek|QnXlQUPOGveK
zCHY))kB1z)j!dt%I0oH5biorXAp0Ljygu#&TR$E{405R}&8$bxy>BCBOtObdBto}F
zjJH=`jlXR9zdjp{!Q448iZ{~hhC?0-Xba?@=alLYKY$?rCpYUvB_#o3G>vThB?JJ{
z#z6jm6(3yxr}(gSERt^j3MjrOQWnI|A#<m64+=hO%uuiGB73}O;$7RE8KmU2!Rr*}
z-DJ!B?taw3Q2uiZAvPbUU!xIg->cl2cHH%SShrn@c5It`=$x6Ix05#;g+>7kal@u6
zx=#XbZ0AB!DH?>1F=rRcxCPt^j4OS;-2=Z@3@xtj$7WHas9xW1KFsvc8=oyLUybk;
z20qH%Fe&yrM%c5$!@RP7PKRs1&`<N`WTS+xuCd1^q51p$nYumQH|3A4UFch2mxXmt
zq|XcuYrXTspSioUYqX!{oG1k9WlItYc*+RH@ZoJJgx_S&Ts2Y$*BqQj*m-chCy&*A
ze?6)l4T{wpP${A+4mD(RTfX6S<@x0G#+jVk?ZOwiu=#yovSjsFwF(^neT~xkzP`K0
z@x%6~%MN;TGxFod{$6f+Hu76&`qq9Wvm8c>vHeWlm|B!D=sTM4GOh!5c65-!?a#G3
z)y|VgK}uxuV0SzUobTgZt#P|<`3-=pA$iw!qV(%wx&Cpw4;q2Fx7ue2ySCIe@Qa+U
z0K+?`O5%=E+_}FwAL*QZPkrw_P6GcrIwdFE8cvGx12aR_S71H1<>nFc#y0j2A6s}>
zaVr;u;}5OOt9{bEo>T+&G3iIVY7wLWXX+LT`QZaztM#C6b^P+g&!PpTewmCJvm78X
z>CN20)hdyAgy|CwU8_qKAV{6&9A*j%MjvYuR5-XjCM;duaV-=Jy5Le-FT1(j)_m!^
z2e#X~w^%}M+LH?hFQXD)#{l&EKZH?<DK@BtW8uK5359l30l$E80J_&L@j&6+Pn3Kn
z807oF`+vl<NZ_cL;2i@9ApuBYGkAGG85M>+28gO8s4TAfZ(Ep~QRvy4QAhk~QW0kn
zC3Gq1V?7o8L&3+O*<%_;;m}(KcZb@HqCJG>@~RUinIMhDYX#I}Z9S!(SI|pDQDZ)S
z*d$RqMa#Xfdoy4$rdOOs<D6LYvh-Kgs0#u~zvdfd;{y#&<&-lzqT^uJ-n%@#E2&*I
zT5JB;$zX<f)OtK0rZInoCw&9Aykyc*4i?O&-(Fk52138Zsqy>cK&n#vi${iRw7u66
z9${-fK9>{5xF^eDt{fI2H%U5k=n|NxKGRJK!SA;U%RQh<i`Vph9z7gbYsaqV>bXc~
zv~Jc%;=wVMo|UBtZrCd&+ctl?W4{@*Vi=EZa+aoqhHM0Rmly5)NNeYhN4C4YyFJ8o
zHT^UBuOPS=Sa8aK+W5M}CyNj{3fRHrGSU@stvSQT$AN`y2!@oH!T`z7AhZahO{uX{
zp{r2$?+&0BKgjG2^EB*`{cBTerns@{7roY66nWQ(697F?&^%<c4K8WpHMwM3YDe()
zWY{=WD$KYioDZLURF}w&@>;H8Y{sNcB>C8x)#Ym68hjhW2B{fS$S4ZQ)HrY7<icwj
zosm06s)%~x7*kVEo%I9VV%Kv23-9#&=Z3dG9|iC*d&xvpHtrF1$-G4TlU_U}c_r?g
z0eWe<aM4vi7ALiEabVIim)D#@wt}k3GJh^p32Zo7qN9;opNb-nO*!+mC$C?=pxY19
z<jTXB1xryfm<Uor!)9d3WE_IkC!Ur%6{zXDY?I>OtYh9o;o%pX<g^D5o=lDT>a+k$
zcnhrL`8Dt2^DEvJwho(T*9jX&FNIg)g*fuw8y`j<8&ic7p^<6&J2v`Tldl_XLR);A
znO?N{V(unq_D+mS(dgI;dtk*I1h}i2yKj~*$TvWrGgY{2q0%OQB%SzLc)-7Cxv0JI
zR?^yqm8dMN4h8N6gkCPAS2n<%vBd5e#{^am)n32FhA^j^V7-dWk_B(-fP*Es$s({X
zh~n0z;)&0vBz8swS$wIxv;v}aW0acVBkBR7c03G@ANjdG$<v}Yb)3j?);xk{r%AJ_
z(?eFt5@a}-_HqPJW{3DJhI-JyNtvhWPMx;g43M;OS9YbHA~YqUY}jVlOvE}eyn*T7
zjAFd>V=SgtER0vLkx=VmvI*B>(5L1#{HM8o40w(>AKlj7iJVeE$=<%3rBY<@!IGnm
z`6fB|e(&tT#-0=vtSOttd%XiRh)a~at|24vq2fuQg6uc!Dgpy$Vv=eEuMb^Fa!#n3
zkJ`RQ!C*{pl&~a|4PJ}-1aVp+T_6#;33B)ZjfA)a1OMP{BJ{GiMldz^G75qm`nw2v
zHDb=Q9C|D`xNAu|gUSV#N;RZ){QPg|`SJvzdNOIUNXYowh4{uX9^<_{Qb<g%p!(W6
zw7Cu5E)snB!s6Ks=zS;bieEvf8u6m8nBu04V%~Q@GmwAspu_!Bv>>ps1OR<`@CrfB
zml2z2vvt8%7;Z0V&B};Q9h4&(B~RW_tp&;W0jl~G^KM$XE~qcLZz^>222#a8Lk~IP
zpjtA#Zp<ltKF1P~_{;rvSkplSZ+Y4LzgnHHHi#Db(l%t~k=Cl+;s?y>iAu}i3?SSi
zr7Byp%jDB(fO;JV!7nRmWx%u*^t|@8f6Y^2rkhiSj<&XTids3CFSNvq`uBiLL?DeE
z^x%6*)2B86=sdqaeE@E;TFy6+Q!cN_E_Jv896+WoMVyCSDzD4}nx&=R)mnY0ZcuIp
z)zzzT-r9=^@;J!kMQc4|9?-j(fzT~H>8U?O5KC5wRy=*?ddk7c39KUtB-gBRp9_su
zmU&|&Bp-{$x;-H3H+$iMm{!^DGf<WNWc_$Wnyh_gFy$)SnxYVL4++eMym-2McssjG
z_2GJm-f9KP4lMG^2LPy6dhSSKBF+f+3gY90hXgUI)NBOyU8N5|@<#WL(_2lVWP`b;
zwauor@Fl}ehZYe=frmP%llI^@xKJUxmFovaS_C%tC=ytXLip<kDOQbcNXCRB-N*HH
zra1Br`#ahS5fPnc2nHlcOEE!ORo3bL$q|u1XAXx27NMM}Wl<d&C%9la{5FyYtw$pi
zphZchY2E(n${w@h+SD+$;1c&|>Qjimlsi0qQ9C>tpbQHy0HZWuG*~(lh2`?7G*Um2
zwJp~EHat)43r=hZg|-P1LAU}1U1T7cqUU^m+of64D~6jeB!@I$0tZfR1^rsz^_L4r
zcj_5!oC<GDT{8R=;Ncl(;k=b!eiT#1&*M_P+18(HF$1$>1qCQV3f-vW6FV|G?aCvt
z(brd4(`;<6fDQV$p{wm)^vjp!Jpo>*>`vbb4C4a}o7ay|HLYoH`r5H11UTzU*xguF
zprZDFd9ka=svV7b{K2Fe1C~{0hg6LE9)Fp>h-hhfeU4w~OJdMh=<?04Cq$S^b(dS4
zXKdj@a#~s02a!SJ8W0Xm8R!fgOTji7w`<ATny;OY0$C@vQo9VZyQ0h;Y%jXpW^b%0
zGXB`9+f@5;+=Wd&w<kRN>AVc%wx!WVz<LP>H!#Vn-I}Q*9{Oajx|DxXM+6(x%5j{Z
z?YdD%6t0edG~Fe`+%F!_HhRTj>xt0A)6xc!^NU6~6#5a@jK#IqFVjy(x7tWsaQ0?i
z|ABbK1qQb#S)GKo+2la@PyYB@ee4WR(AwV``1I2$uJUl1k4h<roFV&Kr9jt1WTuvW
zJdZNWX472Pu1i%WK;3LnO4mp`@p~pL(N9$Gm$32AGkcM=H!w8AugXRGNmJ3<Rl+`|
zZL-yU1e*b`7rqwWTk58-u!ty{J}(vfRShaf2B6^1ng$hv4s^ZEd{m$2uwRn?Ycz=@
z20K;lHJSkH(A`qjVU<T8>4YXiO<Ul~+k&RiwX#6uz}?c{L%OOuom=&#<cS@+u-^AQ
zI()!Ii{Im)*Jv&OrSLi@d!3cou&yBy`Wha%ALvr!ED<Wg*!tNx!u%KSdY%ovjA%CY
zNkECNdfhonL)#)W^RhK%J36MC%lL<7KJuDddG^@#lZ$Ce#)8>%l#R7Lk&dfEL6ZPs
z9u;{pJ>1@>iMv>S4_3U0%$v^|XdoX4*E6$crk0s|?xTOxa3VtOui9+50!c`l1((`G
zy>oqUnx^$xH8jtwic{w_(x2)K*E(Cxguv{y9sTv4zPYPZ#LjaxMVg1~WEi2Kk<^AD
z`u_k1g>hg~xP@U3$5zI@dgHxd<lbpeAiDTY7Tzm&SFG1MU4I2bV8!~Y6$|FQWfW7=
z4~zu3e@Cc52tK<BQX)nw8I6v0{!Qykw;!7ITS(R_U(=hI9{28G#rOmDZJWaY+}hFG
z92RI_Z)^W)>j8Cab8DaD>fXKZTuq*d^t(-(9aJ|(oUE*wM;y**vs9OGUYKfl>_CZK
z3R+JIIgk=V-V5MgcPukx9)wgWB>@e6jM0i03#6ObQ;TgK=N4HtMW$Zt*s$YOlJH{f
z>S{l7pG%}Axa6u|j*HE8gHQGXVv|^@GC1~1frq!xw0x9;fZfp7893G*W?HdES-$cP
zuk|9^LO1TMT7+v2$Sx)Dv531rAi}eJG)T=sRwP5)jKA4;Uk{BgWJ1sS9aJ<nme5Ga
zy9i4~TlP!eFU1ylr`Avl3de?U$J_K<W}id8zhk@S`PHnC)<BruOd^2;L&v;9jjui&
zAJpT^)dm+%a_+M&2EJ`WCXcWVwD{BAgVX1UH<JdA*r72p(HmBuHA8+>wlqUl#BIog
z{E*T`xbwj+9y3J3pDltU$(?aiNCP2YDINVsJ!phnltCt>Xdo$tfuY3@l?bJQ&DUVA
zjcTbsMJ<!&s3{#aMM@0;za=a;;H(n#+5VVq)(A)Xh#1OZ%_+89;VC>VyOwPwGTKPL
z$NMc7F(y(l#gv%ELr3K!Zk`o>E=fpa()uS0VtDh1CK^@6>hPqK#5b<@Rge63_h6oR
zedo47r!~V`{pqFmj>wtv=D2HE3+G%@=IyC;kK3mAo{rFF_m<%cjGvgr64i{s_6k=M
zFVLDBKkQ1L=n3D>Uhg|NP$93ympw>&Sc5%e2y+qWZIPkI`_z0x9Zx2{IPcwpgE6P5
z7{6YY+ps70vyFP+?*$5>w!pv)ES#$B-37YX=i>YzF0RJ1Vs*+0JW$7Tr~4;d-}|Z>
z?3`|6qcTnb=e-&NIuG`ocFOV!sB?<3Oc(lPD~4+@R47#r1}oq|VmGu*!Gpyjh!vQI
z2VJYJxG%+f%R29zB{%C3L+}er>mDV0HB@bg27sV(`mfFPQ|bLd)g5Kha!!3(gK+oh
zxI1%KvM-B7h8G}AW8C(6tk!fi<y-oEKHH@{M|mlyOJ!LF3JPaieM>Z+5HpgslZ_aN
zW&F{PY_VrtrBdna+LL?EVUtf{ntNWJie7O8@V8T3;%N(~R>06V<^4v|6Tx|$n3Nd(
z6W`YC+TS;lEMl{RtGBDhn3Yo7he0<zkE<y>g9@h6JKibs!`Ky*fp9ev>Lc8NvfW$>
zi`|ITmJF8}sJ&DeWGV?Jm>={cCPR<zB|@!CacN6!M1JWR3QE{7PKh$AXMr>k-sKAq
z5lma6D-tw4wt)$&Mg2QZnb_)N5az(KATSKHZh=UY7m^-*m5LhBCxr$JF1ckVMZJUC
z@4pEW4K7M^h65qV!FL>*I$*6K46-uP?&pDl{x49#C*^#Xcxh??o#}sOpkN?qq0&Jh
zczyeh(wk9W)2nR?dRcrSf;|=A`0JI8^74fbX&04yozOfUVO!I^d{U8(36JIMhcc7|
zu!)-xVI%i9DD7>8gN_=TB0%r;5sb{^U}0Ai7mI>G^NZ5*g`ZO)DH}P(Ktj|R(77$B
zdx6_LA-{hs8~w@`)^9J)57e$+M1f^qky>3qljJtBvKBu5qbz*L(#?(@*pCMW+dHmR
z7x)N^PmXRSO>j|gHEE^+2SJanwIaMO8q^g6s@|6!4k{U?r%4Ss+g`|wHxgY%MPID~
zfcyx}P|kmM!#l_LZ4mSs3W|V{Ab>8u4gdy)f$EBa*noliLP1oP7yNsB@HZX$4R(Jk
zJ8I-aAK@yEy|SmM_imGj!H_IE`boBm{eOY-g@vfp`WL8n&sWZjrBVbP>feHzy`>1<
zjObbnh|o<WD+F%CyYw#1+CKuE3}}X1(BTwQF&nrTt6O~Z4u$b!gIS{^{V=V7XoEkZ
z2cPp-%Ixy@e1d8Z&zX^_WOmzZNSMm37xU=3ICN1dk_;2^L8%#I8K@_@GG34R{M2mH
z3O71w8xoy#e+|-@C(IgP+|EFz$*M)yC`r_gc2HiPokx`Qz~N{yM!#inu+np9TTpT}
zM2-3L|Kyy=814{Cl<WX^dm_aL5|^_PLW558kQ0G1d!jr)47!Ib=|}UY=Pqd06+boB
z{Rz>-bC&$NE@So;D6TI!oNbo~ywZu$Nze@0Iy^S=Q(wR9?H!r53w@i8lIm}W43BI5
z5_wBT_ink8^w{-8Q^bp44?5-NDu(evcwQ2X0uKe-OAyQaJ@U||IY=K0ObD4Elxr>u
z_U*=hY_^{)6qK`-3d_zl^P)Z2TLp*A>UD~&g%!o^$2|P_%{UF4WDF)sGstYW(a%eV
zYvF8y*Ro1xDY1}kFz>HiXLJ*|H?LE$v0|<2x(sVN>v4H06Z&*VJkEN0WzB?@{KRp(
zDkRLe=iN?>WT3i8IFiT<JdUfT&U2jjYhr$BvK-@mX2kA3ZGPPy&N;#$f=%wpG^V#X
z&=fNH=r8W)bUp4ZUwgh=;w=yEd<sZq#q$2)+2SRDl(2p5_Mo`6z1f$8aio#{jcly=
z8{?=8+Ep&k|F63apUKC($EHY~r4X4O^kk(W_O45U`Dr7|sDw#8P!O^W98{dSj{O1q
zcCN5a4ci-iLvemZ26}EkM$<G-&(O`i8CRkjzqMJeI{yihegyVULFs?YNMeECcrd52
zw^BadCp&H~cM|oS?%jt3w$hnDg`4%3dq;xN&&?Br2&<S~_ENlN{coTBQ7i)4#*Eb}
zr>f~cvue_OYMhe5^mlNJK>sweT`=7#6%QQ$okb?R(Pzn}dl^iZ>7X5k7m2{0U!v(z
zQ>XIP!nv`W<I|{7*Q8vUh`-TD1^rRY7`TpK3kq+?q?5Sq*qOx)#CMCaxvn}!5o$&Y
zbICTDhojTMO89=(_rvpg$)etd1d7dCb|3eQL_(G7D>Xs@J-uf|R-iVGRd?Z{ONK}1
zZKFy!mIZ97QBw|9r`kv8Z}g-Y-Vte2pyW=8a+zX%1$wecw^tOZpXc+|`(HeQJc!8e
z2k)+pi`9a7j+;Rys#bQ(U9f16<}=_vIZVwMOF-1u1%<B??<ep1^9KAVPWg-aedfG#
zB=`EqlNP;!aCAzIq^`#My^>8H(Y2``rKL&ux{+M!Kua17cY~9U+!~HTDog;$W#}L{
zgj`TwAZwfWJfZN9kxZZL){eDEJ3=aRs>g14@ezX*w^{K5<?Rh9%zOO;Qj!I2N<s)m
zecDt?t@zS7?kB<HHPy{r6#}#V+EQNN8mLx$OmzoP&yA4k_9E}lgo7Fv)#;#JuE>!m
zL8Cf3K2;GZTk#xnLBY-DPZsCNkHCsVWsyFc9_60Uap@h{Br&Ws<otCAQFraY@osi(
zmRf&cst!`Rj;*IIS~KJO7G`;KaF)MX)r56m!K4<b(lAfpERRizpsR5_oN^VD12})g
zKRhr)2S&Pj$v`-LxeXKSM`5#QA_vJt^;%j&M^J>;8X#VIoTQW0F&g#-RDDEvEEcVj
zq^aa!pv0{FP-lO}K@x_8Vj?`TOZj{ovwX=ixviZAVgMhcA?MdOi7bc&px%e@;ZM_`
z&&goB#)VBsr|(II547zsuc8q}VOx5T7!KgeHvf`+#<T&_6<aXV(L?uUFcbfc9*l!|
zYnrcD8%iFLav&USR!*wJjAnJ$C0nCQt)xvEyBywb-+_PxXa7Psue)Hs>C0*RpV?p>
z-Pv>~CwF0fL#`6m51dg9*%G^o5dA!{y?MV#%SY`q?3wZV3;ccU^NCPsWg^(6P7dIi
z4?fc)_JHW2>Iv6;horhUgSP6CJdL_4VT+jPqK19za`_;`x|99F;Q5G;t&{rZk~EiO
zhGy9)T+tw1*<asu6a8ts`JWTTFn9fKrZDDGqCrplRWK0T0hXfH56*U!IQA?qX$1jd
zgBtUl(oOa?#m4&N3YQ*Q7^Te3spmjOo`A5SQ`>K_BXq~{n1;oN0Rm_MzXl)j|7+|m
zz~X4Swn5xwu;3cpArRc%Ew~4QyZhiya0%}2?iSo#gFC_90z1j`eShBf*}ZmmpqQ$j
zyQ`~<>8exrsdJ8jydx6|)EO+&N<$S?AjJ7582Zzv%qmKwvevtBwbF!YaV-|sz9%JJ
z#7{dIl5A$my7YEm6p-kwd`Q4S8^2{qmiUu(i!+wJHL`?A`e5lKSV_g5rR~~k<bUK{
zT+&54lR4}89|I$dO~~4FIwjKx&E@2^bRBwP8Ap9;#HUuP_sTvj_SwO$o=LA_-z`GU
zAfG$lvm7~E*}{r0pGmu@?eaUTuS6=A)d0$V+K+EtMWK1MguHw5CO>u*E~%1Pq|VK^
zdNMAQaTIpCnG@?guu&dpv|D_4rZ!fMX0_;;C{u0C83t_iw<aU`SdR}yaKs`z{Mn}4
z2U-68Ew3hY<G6dRi(Ys}vV$UXOD2w0_wJcv!081WVtf-}=Kd;Wew6?(LweLsk2NJ{
zS0D5()u?c;gTPt{s<I%?Yf=CE?=5veSy+7sDk<ns#R$cCs47>`dO?8zq)HG4M9(kq
zxU)im-3RRI<n3rLe3<J%s+5QH@Kdl|9v`1)u-BEo$l8XVtp39R;c{;j#*+h?*0ZuA
zBR=1~p2+fVGryVRq1Lh~5JzOe9NoymmAe5l#%B(w(+cdBK`M~xKPnJ0syU290@xK=
z&iG{2&Jn~!2qo8kGp%8<`biwJmFL4&Bp@l+-L)AQ%Z9UaXr{d%5vDPB*<``u(o%cy
z>zhNvyEBdqu`rFO|5UONP~{zo{-FPC3eBrJOxzzp?tV#@`AZ}PNuX%cQ{^{h#QXaH
zhcYVshzhb)?jF9a@*3LCX$^6so?{{L$jYr%_BeC&4;$;#e<46T&1s0>c6Xm3a~x1~
z^^KVXQAT@OGbq+<uase3y2m=Jm82oZB$~3rmUXSaxuWg?#@qd`K}EH{tjeH<=I6Y2
z%T}^2<i05*^*P9YKgr`2wen)*I3s-&a-sN57Xdj&XrG&L)OD<Azrf|5ZptU_G&61x
zcCNH~e{wAvlW?xPm$0w94`+r+m9Iam!7~H)Z8M68B!*UzqgBLI3x8%ZD8)j1rX}Mi
z4hntn+PwnkVKUtrOfb!Aq$Blf(R;XtOz(I<*xk1gc+gq)-1uryAqY?=FJmWyXHfPB
z@dBy6XlF2R+HwA&iiScsalJjHVVGGoR8H4q^*)X@5@K2;3fau1+nm*|k?7PoUuu-E
zDgl38kehA3rfDUF6t!*qJk_>mx@~i)uDn?{v-Nz?D@PdWu!cR;k~1PC8{7oc$XIo}
z|K%o8&QQJh*(lkV-x9gKIfnYZK$0P_{L*T<A8+z{;5uR@Ud>6va!5-=O+y>9xa;C#
zPwB@=LBrjh4n4|jY+~p$?LW6n90*lI)~nU*2_WLvaxWftPkdz1k8*Om?qKvysq!?_
zhE=-7r2a_~=>q|)59>#>1=lh(AC%bfPS|Zp#Mu06E<>Q(_aMO3UME%{a7Y|A5#S%U
z6`|G9@r7ey+iduwOjA7mPq0O$=VC3nBb`5#-vaoTpBQ(%w@b9a84;-E^mU2QNgb$j
zOvc|<xxKCAd@JBN{ZqmIP-6oBLycX$Q@O3!{K18~(fhM0(b!gMZ6$W$D{t+M+vv>#
zbG49&CE`-KGcYa20!u`I=147R*~@Ws{dsbIj<t#X&Z_W1&m_ExC`U+%y`&cO7RyrM
z(1eCZpmcVW!@_xCz<U4Lg-wVTu+Wm4BjmNM>eISrVrHcth%RrkqlkzBEaReBjoz13
z#u>kSNX_HQrH@yx{}qbD4mVgne3?Ui&@G*wS;U|VMAI$IBmfD52b(D*W6RF}NP(@_
zv|3L(d%mJ72Lw8%Cj~`U1UDWw9O;&`cKqRGv56L(#Qg(aBbDDDvwIN?RsDlv&KZz<
zKNcOgWGajj9xr<l*mDVbt}c7sYHCgH=W(W_jZ-b|7n@wGemK#d+%Nn3!~CPZbA1L#
z<E~ZrNWG673~eVkxa>vw9VXDIbPR?kZE_+`8%_k>#Pqu3&Z=`B0z6Ne<VVzZP4dW4
zv0K=KME~{<4LGlD7-kgAY~S+_%JW2vBE@KPMn6H8-BjMFYcI%B2nx!GE?fbvug2~%
z*KrpqAgvZFaT*+&T8b^}^?1Nz;T*MC3(#HFn4;uv%V$b+tD?5y=gDzJZE?UY#0dlZ
zw@IZ}HX5xIyQ}q$E5gwQw8tdZm*DdcUi7(EuOAa1o?}lOabz)q0%}x@FU>H&q&Yk0
zO>xrieFLd*C0Eep<Wv}$f>d3xbTaim-`~x3?7trMGhEBt@*TITC-?MYb<m_0B5~GA
zmP@DJtZ)UrKlxB@J-^>c6VNuSNP8u9XUX||TIQ9q-2{cII~;9d&q~`iF!$Wu5dr!3
z9_$|t`uc|iK%J|?JY2H*4FC`yq5!Ci6Wv3I7jS2dpK6{1y(7g>`clRJ^81}<jgR*E
z^PcQ`2l6?x(me9#;1}1qIwPPJc+x!pDqg{zEcxsGKc6CJF~3-0fLy>(vw$20#oGZf
zlf9n+0N{z1r30P>;5DoChoR;5hoI$q#)McIlK5ijMpbeC?rNTH@$V-lF5Ph|l~<Y$
z5xboHQXYEMlD;t%s$ro5CsvaZlTN@jK2*0~`xesWTqde|Q(sRz9jp*%>xUe;&z7<n
zpQ8F2X}_8UOA6T!b`B)|+CMzV(n~%KH$GSFX93m;!d9#l8!2!yt&W={%3olrR)wRh
z9O7eHg>$yo+J7)Q{BW2VS9rjw<QVQ?@$<Y+w^Kh-+&x?&-05R~0v!bsMxWv9JUJ6x
zUd0bF?O{7;i`^#mLMN!Z0?4?q1G{p5f$i8tQ}s=XL3?n&73B|SXeQ%mM@!#pEVak;
z7Uh6Z3m`R3X}bYV#Jh~Qfe|71iCw_?!2wc&zx-;h83IZYo^kX*i7TzI^nG}6jd||f
ze5hPMhK_L$z2C6q<1(hOSAWGPso7M{3@?okc`Dn*tu&Ju6Key*^wcfuGG3#3u?hIb
z33%ztd=lKgNQXoye8|$Pa!Dq-h<66rPGLY+H*h5zz@)?H%b)0&jyX$Us#R9>2aO9{
zVOe0yGbREEx=ig`l#B3d%;<229X~w{aURR~1yAEtUpRDR8Kw`HkcxDIp;NO|b&Qb3
z<z?gwt**;#Vs#M@0hyb0l7foNG)vg4WJUGIOAH)v22PCn{>*oqLGzx$bbRSI2xLHW
zVK!(!LY_TY`C#E@;z>7_k@1ANl-%e~UforGIOd)-{Q2oU32^9GV6y{7;GcYH&e0+d
zS%n{zqIZu9`VkRLv$fPAq+l_T6lsIE9Ry1-W={xMZY~QgPAQLwcd(C2Ph<C($Yd1N
zQ0dR@A!OW!7am`;9~>~4BG2HeyHSC<8wz)%-999RqG<0Ou`-pPrbsa46Ij$LpuUCY
zr3KL5oqae2D}o-U_diHEVfIC6enzrm8D2hnQeA6_e)VB7@~XWC>-hycBw=|g;E@VT
zIj#WE*A+-mCraPPg+=TVv>;)fIjVeknRbfy(xB;08?D5K?Gz2Qs;c08u1^J0N67k&
z-laE_g#&O!K!#pxNYPM_y+xJP8t5u9ygv^D%a_?M=^B!^x%Q;dL%v=l)Vlep*~*B?
zeMqo6{#NW_9n3l<K_1_8qmJoJScUHywPUmS(ZgqxL(7KXV><JVXXYy6`oN|7swj*o
z!DLZ$R<tP+Foou$i!^EzF54Q=?Hlg<npm%!A8>LuHxa@K{T?pG2|ji%kx-}pBLb$O
zJviJVH}Z%4xagcdWXWw&H0aGXF)x!UeO4oxhH<<OcH)$ru*|qt9a(ZA9KxS!o;LwO
zD%8~a*aft(XX=CR7ng|&m~vtSR&jJO9XPIdnJmPj;&=PZ-J~|zTL<NVQ-=f7Qo9su
z3t<~tm{-!GBtbaKEk3}vyGf>vu3yhM&^r8M%f$Y0!h%iZxT8G@HceRW9r>$=twLRB
z>Gj(K_5uj_zot<A^*FfB9&NkApW!&nijYRj!e_^H9GZJ7EWes(*5=ZRzo=@--g-;b
z(eoRry=!S_wI}pGC({fE3RB%}b1vA^ADCo1G?Ibes!B>P@qE(yUV}Kw7xDr%V_480
zxL|hz|BcExGe!upfToZD&}vAY8B5RH8T%lPjw5A2o)a4=j*bieDGh%3h=Nmu*UIu!
z%tm5&y;FmqgMDyi1g^k18y0(5Yn@#Hr{oK@nGi%nP+*41e&c5hpvGJn^isMYhyNPh
z(uom=e+S+Y{ZITJA7m?p6LC95NC{UzkZY<=M1uM5<4<kS1dH7jZQW8Cn29yoQ*@Y&
zN%5;J-BrdpCC1u1hE_kewG_)3r|d4oK-&|(e7WpCjC5Hp&}r%mBA>6CV)wI%oxOaF
zEY!J?8Fx}Fw3wHf1ghw=YRgv`FQDn#XEcZFCM}5F+ue0$KOO3pZCGR`r-ae1>sq!b
z>4iwu8EX>u2(Q-wGhU}je$dArild6}bJw&^$E%6|TAAsR7N$x5t{o|h#sgW(*HAG=
zGv{_;{T(h7&9Gfrqp6c%f%7MJ^=C#xr!fc7`h(!Qbto3PAYk;@U%ys2v~+eB;yw4N
zwvudifB&xcR9Jb)yylW8z??t=w^XL?ok|{?Z{R1`ykFz^IqM#0{FssixL$}Zs3d1!
zpxIx2>D<%ZOI})@mHDB~Q@g4gU$_kExwtOw#~v~A&0llzfI0Xl<F9*S*S}BbXMfeI
zsom_}_y9amh$(!ONqTwoqvCbjJYW9vd4t6Zg_P4$7wFplY_}oBdxo7>*k1n2+h(X6
zoa*$Jh>JgJ8ClA(!4-rb`MEuQacRGj*<DO8d@r}}>ygk#iS(hLh=irG=1FV`OO`}L
zkzaARiAB8aky@R^-apt6h;OvoHXI4*^<Ko6=R(*K+3M^dtT-3ko4nso7Q7Y7c^W;M
z2L7Iv#goTw|CvAQ1l(c35hzHk5bUAAz}(T|bLz<Ap+m5MO1VTI7c@m(e!Lr^6w2TO
zdp})MolvY?zi3m_tyji3k(o~*U3)^F#vO&f8!e{v;kzx7*W=o=bI)}6FgNzzP%l+w
zkjA%gdZu&ng~7BELhWL;`Y~#33Vcm`HSKc+;LDaSd%3%d5Xa||m`0@q5$@V|(yAhZ
zC9}|UX5M79K(%9AM^ePno3eICRw)6?RcKe<MdS?s{et=9QIVT|ugoGVvY9VE((^d6
zktzj`KgIOsGa8NomWjp<AC2noW9JHI))(l11x=3$iz`+-VK>S*oF7G+7Uu_@rPN@~
zp~115cP`9F86#=!9h+*7ms*-9Fsd)2@8RN@Co=5T^XqH1>~}y3tvQ@&Hvdd@YR(*m
z$mAw5<EwRA|G2;CC*P<>ZS66eapq)yo8r{U36xzwg0XjjfjPkI4bgLtR}EzZM(<=_
zVo02kT<v()^w{_YOj+yRZFfh!9Qy4<-C7hcgbjVn29{;57-|(F=E&SNvX5G_tKd(_
zdiLAPB-T9IYPG%^86VX&`mIR|dP>y!6X#Jkyencx)HDaCA8Be7vzXSOTo=j@@+t_B
zQUFFBg7~MjSi)qxZ}R6Ud)irn0XS*pW0fWOVD+Z0hNY3;Gx7%Ri>^;?uaQ|g_SwE5
z_S}*SuG=f4f-FZBV=Ay!!Qi*S;eS!lfx%Wr?3jM)zMuJuK+7bXbj3PbhR7DRa;40a
z6SX3*>DFi6x#)ke%y#gjy*symKqnV#&jY>OaBX^cOlJN@v`3!Hx*)9yxE?c$+@SAV
zcywD%9?Dzog2RmTV$m(Q4Drl&0fqs$YS_)`BWI9p<tK$2`3o*hnI{rQR6h!bS$o?c
zg!Aq*+z)MXn6V_zlQ2Fen}sC~Nu(48`+5-qw6}|}`_?|+W%NNlmQ8-6{SI0dgT}r9
z!focz_2V(fvB62O?keaf;P`7n2*lFO*thI;9?X|y?<}Y1*Ff6FrgILSlZg9YvYM2*
zh$9DUuN&g`ZLq#3n?)Q)`{RACm6PY2qgjS2&3fqWcbdZ0e0O2CsCJl3ApaXisL{jj
znBGTckl$Q1_Mp@DU1f$VlSUGHgfx3l?Cs_T^&nqRC2F%}#y`PH0-tRhhV0`$sMiip
zoXgPO;^_Lno(A5HK@oB>8)Bz?%Zx(OWsF$$+=Bm#LF7%hFhCro=t#@&1HUtM-EIe1
ziFeyx(Uaxn;c^Vi5Zxlb**0drgk43i?0HkhAkt~s=zP7MZK?ti=Q)k%wSkl-N4aX&
zhzwsUb*18j4x@JZfbz`WR*YFgPwLEZmS(%DjMMu}jow}mf7Gzx7RG&xc&^fP-5cEP
zDd-wfp|N05rz)Xd0ivc$dzsXtw>(0Ex1A<#(C<QK8F<rlj~m+~Y2UNe$GqY~2@15l
z2$q#4mL&F;*BQ;K?(ONS({><nZv6V<@Pzu`&e9M_H6Im$kYHewh`&dcx`qf(2q3{1
zC0;H}7>EaA=?`}Wn(&5iBO(1qShvU9Y~r``kB$LY4;Rqfmyb@LKRYOlrB0`2->03O
z!Sn6RPC-_@^A#9X1X5<`_1b5WOYx6>-*e(Zh&N9P?C+;WjF9Rai#KP{F04C8U-$?_
zK_ja8VXOc4y0^Lrlqc2Yc=(FQ2-1982ji|#Ba~qF(<_XaqrJ0lM5x=dLf0$5vTw#K
z(o}ml8^PM~@|mZJGfh3b+2*2Y<v!N_PK0WCS{j}K&rvPR#!Tux4L=$Mw6m@`FW%D-
zLnHPq8*Y(1zZ6xc1|XeSx4H4vauh65w&VMi$Irf*7SC!Y8T2QUs>Fb(@pWj9?SnUn
z-<}rX%j^z-ETC~WwN5Eo0TL*1CDe)`&V7NAALeOJnNV80giXaA#p$Sdp_L(^9y-kK
z;rIG;?^f7PK0Rgk6|0i}O9sdopGR83D_5Png#2Y1X?gK%SO$k2&`Yk+ZvPR5H$`lU
zj?sUJZpJvhyL@QS&_};lr+-5lHA}Zezw70Cb&=^UGuO#^R-a@Sb_9rp-FQ&0!5~5t
z&g+*~et`gxYd1?TTJI+h>yRQ4pDrF^6yMr2bw&f9&NkN2w|XspqC*QM(g{y(HBOjq
z;iNM+vQ_ty5HMVr*3z#QG^i4_6wQ}3C)(3YfxZ7MbMU0rR(6wBiZql|?ONQX`SY{+
zjS%mQNJcxvT|;f1(lpGy>&Uh3pZO`?+jA4NDttyLS<Md|1bu%3QUA<Lzx*+Qg|F6g
zG-O;vF!^%KZK&OnpUM--*yCJI-a$WUt?k@Mi7S83;5@TnTJQH=Pr-C|^XWV%h(DPB
z=`7NX;HdP0+hudP8$9J*dHK=Wk~>FMYYAkaXuW8l60gfg4bG}b{g<m!LU?7p9As_j
zY$rt)VIx!0FFaSgO7wOCeYln^Kvhqy9pt#<_ziaoiF;{R5ndp3Xa~&v+|lH<$V6Rf
zDI%|7e{Ihv7imsPN!qvmT$`Zz1)|)FJq>F1{Trj?-9pvr5l<KI=Mcz90{s%K!}PML
z2wWO)u_&#SjSn#&XcO`3C|0-zH6{;_)z>Fh@NMMVb=T|^leI*EGvIn%z}ZpfjZcWB
z-<tBaCgi8hbT7rD5s<2hc*5(V9&c~R^kfvB%K8KLKHGOFv})*_#ow2so%<OQ>Kts%
z7JK2(w<keCHX~1PGpx|5wPX9TVkP+HAYP?OVz&~l2KDRH)%mYgezW>jQ#|ZEFcQ`1
zST<mT6%`uup+)I6rAqA$2dLkSP9j;vS$Nj=P5M%NAL@N8w0P!0KI1y%Fc_mH{5lCK
z3+%vqRW#`m7%9BsNO{>&{J!b)wo)E8?VXm}nL0%u;;C+k6)Nmz(&BDJap}FFHyyWr
zSXH9XPhV>rd`Nx~M&6U_;^mj>$TIfKMSm7p8Sa8BWL`k$F6$-n7Xrrc4_C-PqTQJH
z4u@O0doX6L=!z7%^_fa6S5Bi88AVRm^_aoK=@~ls`h8t8LK?$n7tjIP)xXkcJG5{v
z3A(lWwXT%C?P6iQs)#vvr&s23lSP*n<50d%#yx2&zvab+TS>{q^hF<+I6f_-`YLlP
zoRz)?Ih4W*6=XVaCjq1F(~)23FV`aI18s-`{dfYUc2RPvhrClllBKSOJCS6ts>|_t
zE6$S$^qFpV&IMhvu|1fV(enr%P>aJD*i{CcRQbt^p7q^7awYB(@&Fmp)IZV-33$j6
zNKJ+LNzpTZP)^2a*ZtVUbJ17iCvhtw{iqk{<HYl_iWyvU{tUcicA+{l-h47YY+LRp
zajGNF?DI)Qrs}!SMnEJKP$eQtZ_yV3x<B4t>laDtepr&1(#261zWY$U+kgO41l42}
zeM{@HI}VOF+}4xb8OjhB!r9mKh=ongavFsGuGCXwzU4z_eGU7a>Qr9^I~`taxXz#5
zoA{{Vgy=eHzK|<1Frf2Mo79^{ObT;wYwEm8V<(+3sx9^Ho48<9V3*M8?FhMy(OJ(A
z`i9cwhL+mI>FL$8$Cp13HYNg+eoyihXvnL_{rx}#+5ebMd|R{06UQ7pr@JKtbeI@E
z46GPqoivvw$mOm)gJIynwla%^)EG5snd_H%r+dduz(TFs*FNN(M%nh_fcC*SzB-~%
zUA%s;rLTw1b(X&fLRr^r4Ym`WArR0z%qlc`Ki7~dl-}!a;FMFPSOIK)xeU3V&t{y#
zZw8V%7TUv9!d?hZ;*`M@YEt3S8vgqF3GUkRgRsodgTj+pAtt}Qi)KpWmq!zL$8wJQ
zS^oR@WqO6FQ1WYZ!$J~FtQh;}pY&D7Nouz_s+jWx@6XrS!YXLb7(B^l_in;|@VyjT
zI<V?q%nEkxzEZ`L%`?+KhLO_HNnWQKxEBB`3o%JqdLy#T*-Oq!+P7Tn6&iT4eD}q5
zKcvx(^WNe|gn1d;pb@Q-640C0V@ePKtle`Q*VL}6XykJ#X3?ELYVu?lyMjDcEZ<f>
znzV(sG*;WmYd*}-wcT2z)<KwOJ65u{R@z8jwJb1i$qzGAQ)LxVFZS1rXdy$DXA}b~
zM}$l#Cwv3tF#~gMW-s{+JhgMclVEu_EO=qQp1OBl_u4{N!C2wOJ5ihgHzg=g2r${x
zs<B*EUrYOl%@Ou65U5$<40mP&O%;cOq5Yd9xY`KbxnF4-iHev{)JIpFgj&pKX4(<Z
zo*2yF4U2MigkZk5MybK5&#Wv+n?C?owYVm=!hC;VGYjF;DBJjZ<1LG(J9Q?&-6|b}
z3qWPZ7RE<MU_%*Z&mme<L5!T~Z40Fv2PUV5z^oio4@N+Q^3s!b#5(M8?okLMQ5gD+
z{>*M=5c@zX@e6k2sd7mQb+VJ+5ifs@RF|o`f7eIH5>~w%uX5J@{igJL4|^eCa{!$I
z^5SW8H@xpyypI8wGa|hi@{%@Whzs>NC&BOhd3Ph>6RS-~0CWX7tmS66^OI#02IvFM
zlWu>ILZ+?Tf5A78z4{zlBAm+1qCJVCb|LJ`Lttewq4tZY!{~~-EoPU<aoXxa6KPMR
zXQ$HkCl;>UzA%F?)hX=Iz)BiE*ms<(b-Z*-KK$00oP(&k3bF!smp2>IjUC0|$NUlU
zio$lKSZ03VM2OU+cbW#Nd-Vcc-Uzn`(ZlJr<@XS0uZX*K887j!Nb6;U0jy7=gaMKj
z8wW90YxBnHvx%qiy8Lifn#CK3#KN$lk=*yF2d`T{1k#^FDl87Vfb#;yrnj-bzMI6V
zu-|Nw-D{!NsO?Vh^|gdNba{qmkC&nuo4qf1nY{tfs1WvPIijRCX;u{`X*J*9Wnq=v
z>?v$SGw`5T?a;dTu{h}rh6--tCLM5z;3KX5Bzc(Wl#*@vPR}&W2~1aU;xMiKdZ!GV
zQVH)bk7Lu_MIyH(4g5eUYP{FV*<;@Kjit4$3v$C#puR|u3PpAf#zc9SX2#}V@4K4Y
zCpkWDUgubchY2|EbK)_PCguz4)w}l_I6?16t3?(+ZqtpjZ+8<vy5LB>r}9|uZMYzI
zMD#rWE&DwktuA&c)Hv_uc$ZBO<8ZKBim#;a|48Kg>g9<u02D|1QM2_mx4RkdIg{m%
zV3BZOYksQbm27<i(lthrT>tScu8|ng_j%h<=o?kSobkXqMZQ%hPYRa1a6MVe%MDoh
zzEKZEIBYz&sNOD={9Jf{lx6gcPR^UCs4p?%6PlVUVvDBqBLTsPit1P5)sC5QwDl%^
zZN@i{=W>u!;rNdps0fmK6!^p-8D|UbkBnoHjS1W+6v)k{ESM$FHVcAf4qFxHY&31t
zZG#V6Tp<dfTCP!1fsx(LZ41lT+1WXNp0Jnys=$FPW=W<xzC0E2RTvdb?W)uyBFEK{
z@6wY<8LtjOWnyS*6@4!w)9b~K9wuDvz-?3=PY9_H#_f=y;avcU;$9q^2BEzWPWgC?
zRW&eJy#<aotV`r17{(sQL@2;=|MYJ3{^a1=T{n(}Ngdg+6`2gaAMc{<3qwK#<vNC(
zWG+eM+Aj|B4y<mq#nd>OJZ)4>^%fYT-3+j22XVSSJIG+mouKkGO{Hxu^v@<%zcj<Y
zHOIMp-+PkIZDj>_!RNy~QFHM*<@xZH8Bq@?s?khCWVK(*@_xwgqX=qkW5Z-O&ail1
zZ9~%J*q*~vl4}dE=cs^P2-GCX$6c+!D$V|{s&o?dR0o1|_7kH#K{&9=YQagn@iH=N
zk)I!VAt!2d8{+whic;5i+gDN@Z49j{+yoOdj~F&<Y<qd>-@&6`rjb#Nb$t_x<!H?Y
zLe+IJKjmjTyN9qz<d?Fyc1vS~(UUb}0OmsTD?WXsG@QpudiXgyRgv0D=1L7owbunL
zm!54@4&R#FOfDl&=L;YfUzLVDCAL#w&hWG&PY|q^dCjTM{7%85<71*pG~mv0C*h9w
zC>BTQBXI2UQavjb&Ph8{XLG-<+lBxK{Mkwx?@<$H+&Z(~rt%Kj9N>H(1`hT{%3dNf
z9_XloF&Q6EK*{CEuzKR9Z3u7L48>>7FYAv&FFOeTA<tQCYOl1#n^eIA$36Q|@PY%&
z!gg>=hI3?_bK>cnz*&@tz*#{}R&?*&WZN=lgr_?&{U%FV1NRjP(AnldOzh(g^zeG{
z4xMQTuoRv`o=99DpORh>q?mlx)VXmGh@J_{`ueO3WE<vw&5zGFkG&Tcb8xXu)l5K<
zma+oAD7Q@D6;ydxswBtJ@h?hH*t*DZ173Jy{TCY*B(^H+Dp}Y!zU345uNBX_4nx@4
z)Z59unu>$tl(ct$Zf)$JS*o%F7|KDslIv8Qcq4h<TRL39{$4F1ubNjpPJ^D4LnJUT
zvUovP%;cCy0Ak%^;V1;qi|OjcNdk%U0<k!Z$D%O(XQ`L$*YRT0_&UYj+Ut7+#Oi6Z
z8{om_Hz-}4lzd2&0W+zoRn=P`R3c9}0eqrEA)!a5z>{xIkC=DyBye#)+w!$ilgPEy
zcGpTw4%cko1gm21sSMvd5o8E~$V8dxzn9<FSD<AjQ*5irAvaK=nc1em!qkj{Ixz0w
z7deh;OWO1FTomQ6IYPPkrHlbX%&2QCHWb^=LzIPRm>`lux!1fCJSzcivl==rIL1d|
zU95HsPv@#F;xIE#_S4o4OZ*e|1#FenZYC6wG?YyHO8kFBEHH7(_04@7>$n2e`^_bK
z_6wouy%BjhafuD@9G$m+I%>*bX*C_xQ854AQAlOU5I_i~JO61t9Iy)k$#@O(cj-S>
z9u+|zmNFUS3q-3XirC))z9ZZazo~VhACC3qC#6v2Q3#zcjE7L&kRRm0a<kd_#N@nY
zIlZEz%rGO(N*zxOI&L5j+8>&z`<$5MIb{a#Sg}S^3P@`lzi11(Iqmtp-QAC#Z@h9z
z9HsW$nF8$$e@L>AO=&4gQh*$~r9^+=K-><(pQL(!ZxKs*Mi#csIXR&2?(|Fb-d%{7
z0g6R#_q@PbAmE`0ramWVGfw?DcXNDd*n{v*R&=n$+yoM6CcvSgFPlRg6>m&Xq(%>B
zDgmkfvuh5id#F$Q$C{074ck7^*tjz6X=6oVM-=SD`rUc*Kg(|fS?xjDZy;GT0R7Lv
zdsK-Agge!Wl~kh2lSf;=WG0dG=kf*<&$Z6sj>;wU6!Gg^xa0JWi%l>amY->^Y{ZX<
z<9{7G@9r||^z0h{P@PjQKl<6K8*vq^K6I3vA<1ZG=ld8ElA@zBIXN97_eYodkL&uc
zFSBhHpG=35mlG{fpz?|(jIGK!tHhI32kBDdfls;RW~A#3(n%PMHp?^8IPT~tymc|w
zo@(+ly=KpKDo@D~VtBLWxK)l?7cjkM;>{_*4(T$j*-{lVth|Qq;`I=f?KWvQ?Sjki
z3t%IW)_ss>ItY_H?n%D>yW2h9c-QQCn>2fL%%q9rG-<6K1_>i-HIIp^*T<P9l}%}$
zK$scPT&+Ox@;f=p#UI!+iuL91dzCqc&DsoF+QYMbPnQ5Kt0np}{Q24_+xzA}JPE4K
z^lHDVFETA_djQHfs+}soZ8=uO+#Qqd;X+qC$}50`&yfce<Zxxqg5D|477lZr<VB15
zeLtC$k5=oY;v8Ankm#H_#84v&Cr@N;K#3fCVOY{ZTY}(BYs>h~Rpd6jOY2XRJj=h7
zbb{LQL)8nYR|b<(=e;IeaaB*{U{<*(oG#m{E8j48ZXY}mm(Om?ccYJoM5`i)oFx&j
zcXZ0Xrz@~ZhcjBfIK&lPQgzD+#wP@z=I<wfV=>lPK`U>!;EA%B$Ns<&-wi~W0*<HC
z?p3>ddVD>#;^AKP!2MxLURcH|(?>tW+=6X61qiVA+%Ho1MTCTHqyWJ1i=QdMyG=>Z
z($zxQZSv?@Iw-@pC%ML8xa7?wybA;1EPq;CXc87nv$35IvdBd#q<bJILqG{dpGD93
ziX$AUKt(&tr}SBi>@0NZa*vf83usPELQy2v%4h5gu{XvgIcy%Q7a~f1k+AkB?}K`@
zLm77P8v;Y9eAW4$FXT7G5cR0#b@?bZJ$c^3K&j_hAyg;dTg*OK3A4owd2c)1I6_Sa
zuYvpi+TUt%5pg&1JOV@TQ2P2!QzsRgE5Es0ga$`YXEO&3g%rqW=Vs3(j6%vNF+|$8
z2geoIOh6g}NPfTjbDInn0JD~PAaO75U|(9H<Xz3dEm5LV@kyl@^`74ir$GAQLx#Wg
zk{(v6$);_Ylw;9$8vytvp9$!$XPh@Av9B7K!)0G>6g=fBx9Xe$?~mbi`kZx3oF~dT
zHk&>%Q?PWmRsn1oxs}rNd)Jt~(XS`FDew^L#?4+=n_zxURkXbM%g*6lI{WgYkz5Ln
z2-RVQ^|33Z*=F|xH}f^U9CfqqLA1oyWiY%#1_i|H=fL%+g-q?<DG@mL98>v#66;*B
z_a9noYj7Xe=R|}s<umN`dmC=L03Uc;9J+iSpOdTfUvYuCPV-{K064MM68BXI%OnK#
zT<V)C^3XB3?Qda4IktM^yo>2AC<F1-&8eJ=S$jf|eR(NL&i<F$xs&`PJ}ytOMJ#kb
z9eS9yS#6`0%1ga+%V|}d;~-A&PK-oYTdXgvYdm|l^lbZfZ&c;f_koP>Nh)!l1xwqa
zXp?l{PelQIr8A#PEDP#l*MEM`8wG;@p$Yv2>fi=J;}lk1cHI^vu=~ymQ0Xv9_<))!
znO%U3L1wIvEz#kVNk5UGR4|eCLG+P8E8ClC8W!PZA`IU7Gv$|9!yOYfKC1#Mx2&7`
zF9XA_CT99od@k?-V8S?ke(d4mU!mWlY7H0cXiDKkLf}wb4n<`p+tF=TwS;u2pGBVo
z->!C^1<q@n_QbL@ffwOUYqb-sCiR;+<WdCPh6x(*7XwKg;UYgB2p$%eT={HMM3Y7C
zkJC7x!PZF%G>A%Hl8$z8Haj?DO2QN3l4O@PrAyC@2W^Hh3Xy4cw@~a_cT%{}ZJSY$
z1PLD3no@7%^b4AVU&AE9x{_`%ENl)ODn8XtpzP%p78BnpBWN0mUOaLB-2Nvb5W>Vu
zf&MutCQS(JA4cJiGlp_incs1t*02u=p`0rYEr(e@!zK}6L(lXT$3rrse?iQVOv=}7
z6Z6$6<CB-4$sr#wkAu*3d#{|gHP(JF{(ZJiSEQWbU9E|e)iYJ5wV_sZZDCoBw<Dix
za3cj)E(KZSc1_3N41XF(oF0*gOTadp{5ax|^Xcmpa7mA@U!syOg54&D*N{NvL5*da
z1iI?;UJ}GF-z8*w$jOK`5LtLO8F8O(s;Y|ZHV`fsP;%@KCmZAcwAZ>nBf4`QHUOOj
zDCGNPD`hbWe##(pGtsEUQ3D-ds{rO+LtY@J1Q}d@J0(6`1wG&)(Ub+Pe-&ir&D4Zw
zIA6dIT+OOsXnLD@c0T{a(>vjfX85JitiDV%Pu-wrCCVCLxxue%-N*9<E2F{9gmFRL
z)*bujb<>M(e4lrcM!-*6uP9pPViQS2OCd$S#mEE=jaTrZ68$SgwI5Outlf*OGh5yc
zM;cRf`uBJV2;3J`#bJ_PM5K$C&Mn#3Pxlo|fux^KS=G***R?fx=<v`=k}MSL?gwu3
zAh_6*^zGB#g_58q*dn~BQ1hmwei?7N1qh|U`VeJM4R@G-MF%Tu?YZ`7Kzp#rvm%rc
zBszqD@MLzjNmBI8bMQywQ>>!w#BcG*9adx5Qxq&CVZfHmwq+7=YQ1<@48Dh_^vWb>
z2F(BVp-?d9U>j->tw=dn<=`0xe#j8YgWHS1@2jk=aj0foYv00NVT-qrf*XN{fvE5W
z2ktQxrw`(@JpjrckEd>H%+cQvlXH14BAq_W)Sh{4Ml*%Kp?dCt=F`L3gtynjucOPY
zNfxap-LbL2EoV~IuDXx8Fw~k!148^`>A<n*eR(z+3{&oC*a<IyxKbu^5r~7XGOZ$x
zLhxIqc`abO-iX#)1C!H*qtq;rM_9N~4;x)ZrROYLUG2Mndh8MtjYtUe!7S9X==0wm
zTS+Rr)wh5#pxCC{R$pz@B*8b!Xre^9&p1j9hz7ZMnHtvrt8xK_OpE99;EoUQnggc#
z_-VC4gMsma9G8Hi)%*$7WMk{}#Zk`C(dmnWBh!BrSpKkZ8s|2v9|3Rt2InMZ#ITET
zvct7XMWN}<$T5vV8T?=j(~Sn<bJW~&+?a=!*3q+6%a{7bR6?e(>%O_KpEKl(-k%R{
zWK7&&&0C9g-~-80<LY9GJK?;dtJAS5v?E`l7vtrRJ)`={yAwXJUY$$sba6l4CMVy<
z*h{50HOGp&Z3eN6@LnY=FMPLY8cxpd9Y@r|fSKIa`J~ZXq2YuG3W{_e?vh6$*^0&Z
z&2|I3t$p&~nw83wS6@9)YDW|hP9Zu)KiX=)eyrO*?+;|#6z}S2D8LA1&YBU{HATFB
z_8aS{ZH7+wCxJrboy#0fK{$t*jTO)i>|Eq?Uv5}XQ?})}Z6FbqI@xb1F1k&el4);i
zLEpsmdLsIcC0NdPNVPEW<HHv-FI#sFPBc{8gIr~G-=P5AUd{ERjY{P>?V~;MG~43q
zz2~Cx>pkG3%AIbLq^pr3R{HYRPB9twJib37$p}_Im!?ChaX@0D3mV%k#iV-FAg5?C
z3k%vt8V<o+gPzA#CCucgFqniu^oziP1{m%d&wwK0DgQkfE#A#{w(6}s&eNi!k1%c;
z5#pVc?N<rRSJ1-+h0vWXR{0IiA3f+yAa51dP1=FYDuo}KA=G00Hw}jSO*ite%W)qA
z0Hhi^$M~4{@0-!m6il=g$O#=+V1)3w5?vF_9)p9!#Zwqb220irh4zUB3k(q7W92WG
zv$J0XM`%HQ52vHA_K5zhN(}eYE4@|Sk}w-lcJRsD;)Z~IqMBU0FBC;2Quyhk?tPIc
zlbjL|{mzH-bEqkXkvyVzF7R#=wnNCvWksr%{@57OX$2QaaY4v1u-5(^l&RE3zw036
zAmeDb>`1>>%)MnmjN4FUVVsdPv!zH3l_9qC_s*4wIiA!qZf`q(GKnb~2E=XMPi{5i
zv+In!UtS=S)$o`MwRh`F{7`w~nsfTa8mdKsAH!;}W8>Ktbf8OvNspoLB(*;dQp?2{
zIWkGcSM%K-Oyi1JGSkpB1RAYV^%w9r&%@aZs_?mdU9F-LY;%yYO`#d7d1H`a9_^VN
znk;yXtM}q?eG{XJB%EC@$+U<tI%IB8+`8GME5ZFj{h}id9wS5m%u<p9kXT-cU=2k9
zXVuuz5ezKz43|rkqs+fm@ecDTC(P8X)qav>w}>sQFf43Ly={EQe$Y0Vs}kp^=9cgS
z(E1&su=J#^`?eOd?U{pMb?_~)DoE$7-FG;);~n*Z!jpiT2^C`<ReD+8hV1DBa|xe%
z(6PM<zGCJyY*EZ+tcCY$!O_thfa`Pw_;@mC9{IfLJ=o^V?%AEb%RJ_{4v{V(MkF$B
z>N+Ms7lbk;47=<enTzDSz3KIXD=WKh;)ooDM<`t^o?e=A_~@ZC$OcmghhS?W=p4kM
z`8?m<o{Q8^JZ%T#*$vnUt6eSR1}fHC#fa{({WK;`b(&hvM$I9&H$}e(;F%r)kD6Uy
z#gMgx*~Xx!KcqzT%GnCvnE9wAwrnD2jbd|Eosbl3Un_{twa?XMGgm`WtSO2M5sN9|
ziBjsfdN`hyHkG0Gp}0HHu5YiTF$hybqhCp^`+q7IOl2S|h)7^m=8mz;#GvfyMUcnf
z8t;&sH207XNQdE)J7KGtC6|W;dg{-_Re&KoUv_X6*+%wCHR2um{djMprlEs}w@P~e
zE>Lx8^|Xw>5u=+1@ET^Vnb{MOn@i~kk~q$i8Gfg3Y4Y02D|Lf~W)WVzD^c+ppoE+K
zELw)Tsy1rjF`2U-^|C>?kTau-{;~<*W1~0E>lVi`(XqVef?&a7u{A9P1ojPm&iAds
zxz&m!07yg<Fu!|0_Zjo<$<P66*e^e(hNK^#q(G_k+4sF93NE7Zj6c*yg(#}!R}wB`
zddXkNF;B&=<*WB3G@z0>Vw~`yM)SFl@{mRh3_b~k{o6mKlrC?4zJfM(mGXGtf6W3g
zVDSPTFm?GY^f2)}9tc1P;naF47%;FXyuWe#oXoyhe_{Ibm-Wy1uQ~n|H2!yDAMyZN
zd6KM$1tQ!awHxC<wn@dT<&VcEnT2Uevq^fmMv(}DY)t*?4+{is`EJm&y$dm|p8$kP
zNenAa*yaQzf<}<<q`W`8aEkckm#?~3sssZW!(tLly^HKeodLtbMAP(VJPH$34V3};
zy_$izlad>n=tn=eUSH}~A0#QrDd0{%xCcJV=gz$p>m`%28M#MHO2OQl=;Uh%srKDU
zZqi+()q2c2uij$LjtSw)_ndxTjtqv%IAF}!nR!g<Q7ANC2e8^FwNVBK4Txh$y^sNu
zSq&g`KQFjuXNBr##9Qu>1Q+<@dk@YN7x{R#b$<M85pCUXlPf^imMG*jJ?BD}S&7Uy
zdA}0S07N#hz~(rMT-~`_$%5Hr4vc-LBb$|BvRP4$rKnW@K;$f55$sp>V0*d=>+{Nf
zf%gfsgYd-|-RiRjIF=X?_8isod+ZTVKOp?GC+vnj?E5OCO?GNSwzegqHlH@Eggjjl
zlk~N^Ue8XBBR2I5<WjwFx<THBcjFD10tcTVz%q2o&4T9~<V`rCbbN6$5W?ln(!2Z8
zb|PiWT++_^tb1%<Z=Iw}F^SQ1Y8X?6TV@lkg;+72DxW5jH|I+PGa06IE79?Ql^$yi
z%^`Ko7MFL)=dZ6v_&1BTAT7de2O(401KRL*70twJ1*9sw+x7RK0zSZB`-8S`FSm)%
z`~Wwtpsd*h$s1ptoI&289!+i%l}np<2k?ce5kl`|(zkvL_!Eh|lpey$j8#K(keiTa
zA;ciyUz}~X2tClFA&N0{KTBBv!4$YmMHtJCTC?s|qk73APBS9Ju<mhoWVAPW)G`Mn
zxW?Qq9C69}vL8c*M(kx-a}|A}^#Pw*JjF}U7FF;N7$0SGOfivJ8iIGz<)fDCSz0Aj
zhDK+^0aw<2`!_Ur4knHQWXY81YOp3BlLK)L+as_Miqr-;zo_oQtA#}YcbAm|#k&jq
zWd{T8*}jG*d`t`ZY(CY2if*LvOru>$sZ8x*D3WO~5d}TL<PTNkNN1W#p?ZRS1yM*X
z9^G6Ux4Dln{4)j?5bb^BBOMldmbQ3@5kvY6HGEDVXe|Y>sT`z8GtA9cY+~apSdN0|
zqBwmGNxDO&J`PE~_RC%X-qtg0KijR&5Za+lN9(spxW)33Rcr9W-&1Y93LxBf-g>#U
ztW($X4<v^;pHi-QI%FvgOw7f_Vw5!K_Fm+|vmJ+j)DoZ`!=SEg{a{zq-5Lp{uCJ7s
zPT_{K#^!`tqto)0BGnPTEx@MnX`yYDjBihAdc`bZve3YHzL3xbm|RPEN2Xfe$deXQ
z-QcMkar@mtiloUK8$EGC7M%exOn%u75}vO%p!!f#!^!d9Q}b|?mah60F!8o<(czCa
z#xW+Da8OdsROfjidyZJhbyE1ntt0)XA**#vu`EZt=odUnPO{Z;SWrBfc;x^+qHtZj
zjI)p-XGSvZm2TELU~r|uc!QT6#<u&$x1JV2_z}U&unq@HL|rFFscQCHMD1A6r}y;n
z5QVl_I|?Oa5G<*^I#q>!U+`_BOsI%=B%~`3e7s_3lP%(gt;B^JioU1>c?MzpT)T%l
z!0H5>F_m%(DzqXcGJ;FkFy94_EdKP<O#IU)%6rOiZHES}aNq#YB#k0`nYX8muz+?S
z8pJ_4(e6yiHc5P0P!ISv;2yW`x!AF6Cc}1bMe<l_z8H<#G_0;+?X+Q_DtX_ya4TK0
z+9cEd7Oa@lcr=!s^q~>tHPV&fLQwI@hL8RJ6fI;OxzW_R6XfGdT6fi*tEXtva6fvp
zxz#}W-Q?Ti1uznj0Hni~&eoQ3Zc#HMHQR9CdV*v!dlDKyecDM~y!xTFNL+6Lxnavb
z;PagjIgN>wnpO(mXZjv0({#z<_Drnlyd5YtYIIi-Q{-GZ+2d?ozR|^&`n=Iaf)MLI
zZ6ohdv0knFvjHYPl>^VkvLl<XJZeSAlnoc!r-{S;h%s9R`3|O{+cLVX!R0-ih2HA;
zk*jCJ*F`Io_K9qsI~XSQn&8;vMb`Gz#Y=%db{oL@Ne$c<(4;~R(p1s^Kdm419lluo
z-&YZgzmG;v_8{~!Bf)I8ZSo}iux=t0H9ln|JE6S>mri!>uVApoy1#qpV!zoVLjwPX
zt;})mPR4{Z%{iKr4-KdvxgD@Txd)zkaDs<Pt?0>5kcB|MXZze$X3II8AY#wPA6LFK
zWGFkFg&Sefrzw8SyV8!Ulk#Q6dTLDOHU>{q6;t9}Co%v(zMd~+>W(7D=JFx>G`NSl
zo%1$*x{#PkV?=W-8wn2sR(f$*i!PPVAlD>qqYq?4@;^a7p%6fK3l=cn6*Aui4oVQ9
z1PMw|pacy{FrWkrN^qbA4@wB2ga}GVpo9!cD4>MeG~We4cn@p_7aGX`b>J9?jYs>R
zPINK<;;QIiYv=fT(oIn50TC5Q9jHqJaIICf8KgE*oY29BZJ0~G#%-3YJH;^+LZ6hW
z^aWPr+ubfmJHmE%c09@JJ#6s5e+4M=XkXosZXuOBBsMw^1FgHfJ$8}u>J!-}gohP;
zU)+BIFTamqOCZ|{K)#2F2%>-F7uSdLvp|jNgFWa`wmuMk|2`<^oobA&=ntG&ieY6o
zWo@4$)cc#ZHF>&+<GCDgeAqZJadGr$&(q71{d2=>@EEhFv#@;jZ*_!IFGTcbx}3mR
zLlF%vq4(NrhaW!i#YpBNDt2@;vhpVL>gUs#0=F9*ojaNMy!G~@z2#)7oDA4n{ed|v
zDaWit6*t=Ve21mGPFbWcmj_s%L|ZkPM0Beq;!5)UEQ#6-I^v)k%X#cF9mUzq<8HXA
zfvKYrHM7`^hg#YTNxa&|=^1dh3A#$jErgZ)F@D&Qiz8+R*=Z||A!hX*x1M!jCHwuK
zdDO0eNACSnRj#6uB7Kn%Kgl-Vt++)OJK^x_Zp)pmIhYhKI$cI3hj${W%w;guO_Xy-
zP5PNUg#PUM^AI#`Yy-$(ppKfX)S4*I02JUruefO&e`d_!^W+mk$@3a$WO;}btAVm_
zDUn}B&r%)wOs3-QWN%K@Q*v!glE>dNqG!9vuf28Md6?BK65|Z|LSdy3g-cl}svc*6
zB$;BB2zutwG+e$E*C(%xXma%Reht&eI0*)Prph1R$w@bc%E^{sX&1K;atXKC<;jf$
zrwQex@EQ5t;n|GobJ<ClI2~5tf0;1H<<$ZLY9CounW&+H5G)kU%$X4mzlkw7bZ|{k
zVH-1CPIb%OV4~ctkAPdPy3Bp`oI53+x}U$60jlbkR?>l!%OIfT^&!+Y*z}Y)d^)T^
z)@WjpP<IiA%{2-F|GHw#HORoEi@WY`yqd{Hp}T|C)a(#l7puri<ntZqXEV?(?^2aE
zPRYlb9&e;Bpzqu3=o$&c<D#(LDH7`>*BtU6GL0pczh(~A#0)vf(>}9!r7w@hf3Zz}
zqQ6)sM;+|2cQNFJul3pa`9}W?^M~_%!>$Zx2C;fhO~c0kVsPMzs+{dMGoYiY{;g-E
zgR|O;eyF*$7!Xn`Mpkdi@eN=}$WyD^cv7_?r7TQctaaB;wLZq18DcuwZL{GZ=3<b+
z=7HzLV<;#%NEz$FgZy#&W#3aU9h)0%#4H8I)2@f%?J20DoTl3Y{%dxi%Ph%tXNuzS
zHj={@x5k2oR1|PLXmHA=L`ntsdq<eQ<5cEdubt%9cz`@f3k_aRwp#khOIJ80CF8TB
z!gQw~-vrrCP`EKlJI6694&T7Z@>1XBrNNLH^0KZHH7;Uw(nUdJg+1l1G0o?a4tre9
zZ~6x6Fz>}zb#9;t?i9tYOT}L4G3;ufry}*J>k_L!TZ#c=xz<q!hqwl;Dn^*D(elni
ze=!pY2#Z^GzhjjW>{4g(Mz7m*C^S~WLW!v`$}tFDzxYHAOT9+E1R?5vPDjeX>grP)
zu!Jv@{u0F@fHV|gr^wBX?9wZp<E=P*oZVY$w|><!y&fAC5I9Y#+4Hd}>Pr?XErh>9
zSc6*NBzPro>P#<3cUgk$2&<XyD~>LIE&?qvS{Ans@ny<(#xp2NVpjuURci6C%I9zJ
z6#C^>t`9ZhM7tlav2iW0EsRJka|nBs9`PrrEL<Oq=KDvC*fVxdI`NzcC}%RG-+nlr
z4XZp|xS3Eg1+E+A2%##!Ii-=Y-uHet{GkwUl>v_^56?JhLdXW+*zIbnI-K=I*p`&b
zZUS*{#H;j8)XjAY{`-2fR#b>i%QMPq#{X=t07YyDhhPB3DTe`r1C70;U?t8k^cSGf
zI1-RNegRMdr%5Kh1aLy=z$V9D0dSJ-F9B>2T5$gqx_%cl;Qtij?cfoT!|woqWWGxP
z9)u0T@1v{){~dw;_oVT^FDt}khHC)JKk!TBe`>tH0^t2~<Q(PC5pa%VzdHc#KgUi{
z|3QI5t>Y2=JIwn3)(Zyq6Oin24WK0WcM$5ohzpEl#9IJWa`rW-sU_^m?biSy2r2R8
zw`%|u)4x3#|2=I~?H`KpKO<RlBwt(rFq0{7Ks7BpCkx+zsxALruntZh`dt87bmBn>
zN5_LAi6p0ktUHrIAr<f-BvRvrT**PkoqsxwAld8zfC3?x7C-Gv0kZN$NcOz^U*xCs
zKgejGKS){#t&HF0-=?2z5C)l`JY%xgEeH(p0IJp`E8ZT+m@NAMAOM+xqCgB}{{e$6
zLGcLw%L?@GO@VS_IiRyJejA07{L?z3l0T<`3}DzG$V-3c!8!l@9|-<h!TiIR{;h|$
z41~k@z1@I>;O~z1f1_5a{uhOqeEbN&BKSYsyHWlPO{x0}<$3~O{nIqK=HJZ$?S1~w
zjzehybsW>5uOGqR-O&H85UcfXg|fe|{db?`zoGb@f1!(@YZLt4fAnvt?DxM=%4ZPt
zzqVlf?H^FCbl@+v?-}rp;J-E@{1>!s<S!KS1qA)8hyQm2k|+K`jbA_w_&=&JjDJII
zXa7R$enbE2h5ik7Uiu4l2Hn;_y`Q)G`x~0feh<Lq`8Rp~xBVqQ{y(VnKhI2jehUD!
z`Qm?HdE3eSzW@ZuPZt0L#(#4${|f>GTR!}!?*C+F{#2*<Z(;oJIjHCV5AFGX5zYS@
z6XU14@cyHH=oiVpzW_pzKrpoAzFz=p!hg%Hf3Js)ftDQh3Lr~9`31m(+`&XkhJOX{
zLUz4FOO|{EkkkE^mCQh6Y&~f1{w?wTeL>@+ff?Hx{SkecjBFjg{JAyJ|1;5p=qbQo
kG$@x(_#Y=2+S&b<mlb88K{zllcF@lj$k+sd^!G*nFGfxo>i_@%

delta 21809
zcmagFV~}P|(>2<*t!dk~ZQHi(tL>idY1_7K+cu|d`<wfD<NQ7mJEAJHDl>L%RIFT;
z8F(OC^DTm<C<6+H1_S{F1q1{{1jK=_<G2J21O$dyk3|duNL6y!U_=_-rMTh|_0NvT
z-^Qwh^0VwLwHML;0PZ>h*}>avK!n-#PX(inQnt+0-X}|N`Tn`_h;&<tWzYzXO4;jT
z<u(d#n<btrA1}%wyZpZPaTp?w6Sfe>hKJS(wn$BkBN*|pf#oM|eumB@*?^t#;|>>o
zUDKRH8;_*{fGsRcx1pa2FoF87+#q^cvj&aer{LQ%_k`)vqKR|Rc<Uv`XNhS%^)Raq
zgfvT+fKW|MQaUVsH8XCaP87)Mf@iQHBniaZmN#}sd1qQ1E35l_>T3U-c~K{M?MSMD
zOFl&V2PR^Rs_fsEQd)wxVzj&MNhy;>A_5X2WD`#S%%M|7(LtnlHCp*Ow1>iy-E@0g
zShc6ei2HwjPhK0}?wIoIFc@#lh3tF6_iLSion)i>UT5veoPZ6=LrG6BVPA;2Moqk8
z^(zmEAO%Srnt(}iAT7;4U*$D$Whi|r-Qupo>~x?1y1>lt(Wz&L-OB3sMlPXP*>Ny~
zStQ$3U<y6Uyh<BzU2vB*^0whAVcc%{wYZ9Eijz&a*!r(|R6wq%l$XCh|65!>-G71I
zQm9;ht;Gxyet+xtoc5<m2NV7nEk$T0K2w9`@<<>NxD7p3hX@&=Q4CBEB9RDbE%67Q
zJh1@gzxEquJg^am=HJV=foKeYWdH{BTL&}ZLEL^A-XJ;WDs6VyvpzFJ@WG9&@ClXx
z*l5X##yj4gVXu(r$Q}vtAJTolBcSbDdaDH?8B=JaoKS|b0%=vp&E)Pzzu)X*|G3Gf
z$WlV1c^KZY_ja#$1scvUG{CQ7ykS^T*=H@rg{&0>rmRa^#eYi`%T8tyWMlv=)qU%6
zBymQunFbNE;<)E)iq<YCz{Nl02idkODm;M|Dqih{I$&!F)o)RJiCEY;qv8gCSYv&0
z$y1Rh(~+W)jM-3)v4u@ph}4OOzGNe4Lo^j4=O8XCmA)|^<N0BT@JmGTI6t$9-YG*Q
z52(;*YJd609p0jm3yCsQ1^JP{czR?9^qa#C<eA<tz714?y10ug>|2UE1}~)3%f%5E
z8i*#&pJE_H_!i&wfdBzvA^-uQ0i_a<B4Yq9Q@8Cm3DHJxD6W3d+LtcfwW1D}XSnB(
zyD2X<Aqv_pXbu|!%O}eDy!^t;if>AM2kL>j6*zHx1!uHy@2)IwBokK8G8^l}AqlSw
zN=5e>pIbEHm;OV7ZcbK`^#|@MQgrp9*1&wa30V}Sqz*&_a)6O*7<+?qSyoR216LVf
z;x&vrAgJS_WCE^tcaQ?oEA*Pt@$HG%$yS5K{C&Xk?b}@g<2)MO5_X0t1eULd@2tYs
z5Vzt>p3xokkVLwZp!Pu<Il-lZa!Lr<8nt_`%`<MMR)&FtjZ?^V8{hO9T1<Rk;l;2#
z9HEkoec#Opy-0`vT#<H;tVmeFliLpXxf5_8C=jhbzV^62>G;L}DyTrq_bHgG>HVl5
zH=2SE$n#0b(VT3=Y<#~$<1sw<K{Z4`j`j;XV3qEa$4qb%E2`(A-mpCg-5;+awb@dB
zl61E&eomUQ(2S!?9caQ&8OYakupAIqs1y-yf!|iH1un#i_W5$)PV4Ej${Pa!XG(l_
zy>HJ#!>Tl~5WVKm4cWBPJnop3z%D@i4TA5n)yi+OFD4>NNImDQ6+H!ow7GE|w^I9y
zUaTkKpy4bN+BnRZxslH2Y_uYGQRW8<1jCuRYt3Fz!7{88#DEqQt2_o<9FLIz89T(s
z%;forn!_mVx&T^8f;C9V$#f17f{XEH3{&znJ_3)#M2z>A%!mm@(~4IXxKd{nJRirB
zyn{}-DRp|`!XkV!g)15gN($+BbRZzIVun!m4l06%$9zFfAU7eDqm^_@g8QGa$mh7*
zE0RgoG@Xd4El*Pc_w|;udju;0C4R^`CXwG8Ax`BBCz066J4WIl;B*-4@O>UN5dA3f
zCZUrI7W60<i`h+Mb6$a@d*9qa?P+&T(z@>;%Gz}<O*+dVHoZ@WF#g!9=Y8$P#_<E0
zD3pPg8bOMT)Huq9LezNTmW0%(7zYm9=o7XK1yffP6w!na1a#D&*w2QMh>r#d&~n%u
zOYtW#{07|*?}tNb)`G3nOwa6zXx)NX*cQ6<xnZH3Pe6)Xig(R&E&TadDyOC|C<`S%
zUQ-4S6qDYn@JzYM;-1|7{<h<H=?RnKjxF@CIHh=*f-W@T%@9BIu=wff#D2Aq&j6pN
zOdD2uy}tBG%lY~``T30NrQ0b1H2xu({k_oI-`OaYLbsEXTRJGkiMWt^Aci%*i+^B>
zZ|cl}og0_1C#CPk&of#xIdjs_%FS{@(V0acz$+WRug2?}6|PjmkPy;Hn5d8=UlPsQ
zyJ4JKNteKwoT_BOz_&2#Y&y8P4#mGXtcMa-hwk?kPiv2vi}Li~U?4ySm~kaNxtNL!
zYR^hxIobL&{=N8tj7QlzW21p}lG(rMfH;oFe|($ea5HoC3D5w{?oZz8{DpgJ9h_Jl
z9KVs1K%`ie8_hl8;Hury<iBV=b<ZcbM`70(3G4E8ZPh}N9Zj5Sk13?YGj}>56rE(s
ze83&iZFk<_HH?Wk@m7TgfU!vbF>LPt$myFRi4}p_p-*7_KpJs@WZ%i2E>ZG?*o8C@
zC`spH7l7!=6}#n#wOE{=c4b%L5hUL~rgiFpX!NjnF)6BH^vU|ciF@?uIq4X#jy`<S
ze^PcVtt8AoZCtoHQ_DIo?a!Wl@QTk}f6*$-J#~2{EX5-i*=~CV@D?(Zb%pSvA$Y~V
z(9AfuW|<+VyS(2L?ix6i;Ge#;L=P_W9hULB@W&n$on2n)cp3Nw;Wy)7d4*oLpb)xq
zpyZuK15uf8X@E)A?O{dKiwE34nHWLe9zoT`-^hVM9jy}pJBtNKz6!s31EGVs@}+ie
zq3n$#6YfX`yz}z`$_)#k;~6hex5|Rb$@)-91hwOLNi{<f4nA`z5lo0eN;ru*_XSTU
z(M~$kB7eErTc;K7!Sj`0oH9fOsy}|2|9K$I9ersN?Hm-`KtNn5iqCuq#RK5PK7ldJ
zTQm1C{M}Idfoa$hjJQq2HboH-C+X&b4xVq`8t-8^VwG3`jBp1-X2i~BX<?ngyBT80
zS{w&zF~b+!RlV(T=WFbGTI+iR*J*jJcloZL%USHw8;jM$-}p|#rY4E_RVeX`iFzh2
z>2r=iv0$EI%9o$G*g;kSE+T!G>2@COIVuN$#5vv(B(^6NxSG}(l-{ppC9i0uNGh*M
zm$N_4<&oe46!okYjlK`eGR|j6!#4ztuSOXH6+N-uZLFQ(f0yDq6ajm{Js^SOn`Y-K
z^E<|#`C@@v?@7gBP_XYb{yl(Cxdml@s(1ExL|smx3;r00pRVMUFM|80SHZzTs{|{1
znUnQnTDXGG7DF?Ms7L%DB1Os1G&qqH_6X$@6%-x-LEy(xWhR&ujyd`V+uC7~(Z+j`
z3mg3~xxi#$m{Ep&VZ94ScKEGMN!*;v9calbC;78ghFn(h8SnL-cPXnTG`YN0n=;_{
zs8Pa77@l~cf)I*t4P1wkU{At+%HX)zu2=a<()lJFH02~wzeTwX`krH%h6s3deSid&
zFdP+tc8_Qzy_7-A82CWy^#s=?7&VvaDq$TL)>ipJ0Xnbe@drOxG?VTO<neL(+M7Eq
zmJAiHku==n;)xcVxkCfXy5hUWqVy^4!I|%`uN?0yV{A}5OOhsM<->jpHt)Wc3x3ks
z&x%_%urKi{w0NN+4n%z-_)7w3E_ptybr3B;_OH<W@LR3ZbMJc}hckEM95%RCUMU}5
z_a8>LBUutoPCU8&6V~8f$JA0Q)~MX9{H$Hk9mCEl-C`Gwc`rRz{0o_LS_dzBhnphG
zq>&U*lr{m}7}UQu0)?a4!m2tC#aqWoEWitz<9ewd{&DPUGt5|huV%Xahjc@C-u;09
zbUmwiA3UtTyIj#SDi#)R<)!<a<bCVtjreCD8VBw4X{oYXDjB~Kr9Ru@8}Iyi<p=7u
z*BoSim713lP!M$)&=wK9`^QR5&g4$V7dzq0gL!xkb6Z%QcqK1Se_iqp@lNPA*3l!G
z*z+|E9@gB(#^ttU4HBOkP4gZb0_aHq4(Ojf);-GN5&-eO`P=0dF;-12J#9@QJ@xsl
z@O9U_IN$BLI-EP+bLXo3?hEJIo-A>s8IrOHLOdeBKH$`)qGsS)z$jVs)nsqi#k~W;
zq`egeM5FD3_p%*LB--y5e#s(XELEff<L>Lf?I6_=hNx7Ok-~(H<Q9<!W=P-wnqYx<
zGMF&Gm=N`;6iR&zr7B1(q$Cwct0ENOCJM<x`a7b%Dxz~usHr0XIk>5p&xtPn*^JK_
zVD5fLEb;Wn;&iY=MF4zKT^L%%p(jqE(|xo<?6Fl0NJVzH1n+E>^Ubnn4n&m>_gqv!
z2uj;^gu^Nk&x7<!uy7H+$wDeX@(>P|K+-K+k|mH#)G*iQy+pl;FHRCyX3~cF*AD6Z
z@JTz`F$WF5WAANX<ucrDWNJJ5<O#UU>Ngf*Z;?D2KoX-p7!Tj%iB(STma1H&N4fEs
zmwZ9??IG&mCCxw&j=ly8XAbtqc`xzB(s8h+mcfCs7_6|DP!4Rq^}!SHD3b$5@}hyF
z@ORM&rSwkZd66+E6E5XJ>=TVV+Or}-Gv0>TXbD$jiv%BI>s0qi^R3idaTvioDD!Xq
z2nPS%(J$U#?qb+4=?EFMx1p1q#bG>Yj`h%YJPfsFf7=?oz-c6{TeM5R%E+k2cGtU!
zwcdz!KmCwm`A7S>-Z?k{(o*FH;sQUBYsqa!m)?7N1`)tde!REq)5Ck`_XlB~t49gC
zi@ixk+I(_;z?d8!Z}GAc-f)Y5l70&gXN|3oH|rn!K#76oN`Co-lxG!W?a{1Gp;!<r
zkSshJdjnZ_sDruS0(bTI$9Go9_w~=2V_XNbhU^`ZWF+{;w#IG(%=p~8^jl?;QycQ}
zo_QttD^!j~PJ9m$l*ho1s>idVeI+W7<2Da`Y2e!A#B3xrs$!opo_G)?#H%Z+A%1o3
zD;5U{ocHf!4;kLD8o|M5W2M`WIruQ43QSjpoKYLmE^3w7fP2tRx|y}lQuI6LD(Lp~
zh5t>*pQ=*B$rL96aA9ufsKXR1y*grU?SAJg`z%S9j3YY@?3uX!@m1o7xE<}*pa754
z@`V6SSseSMMa-7uPZ_x>9kon5Zn>iE&?_4adrUAghx;jR8$BoLJT4fkenDI>0iB+G
ziA#XE?m3fc2XuWEZuR^zp?bOB!Z1_A?bWzdpMe=G)mw}LROMU4FZ;P}*kr-jG`rVp
z)0JF?!iNi)D;<B!ruw+k0c#hI;IM_WTO&k0(9vioS*NPdGG3W*9UqWF_!ud9Dj1ui
zt)kvng;>%3-(m+{*BY{#d9`Uvv#NZV%u{|Fs^liu^_=&VV|C+ox3eW5D{8^^+}7u&
zGFVPH0jL`=$|&llwLP)5U=>+i$(mJrvk<gk!JLjnrX|r=rP~4y+GK_V$?6#;eL)g#
z8T(7LZ~i19aqok0MiY*gi0$WD=i?mTVx4AiBB|0@@xVyP$Df06DB4M4wt8jA=F7`7
zPUlGz>gH3%Jd8-0P(-nu_@t!|a4Tw2KrfDJ0ZL#m*SI-nlQ?utK^~R-)Q_jXD^ob4
znVuL$doRFT8<fK^T--E@HPS|lRO^c^5e=eNP~oKu@9$q~B=(TY7-1Pukz*9~tk%52
zYNvaz*1f^#&VB0&^2I}IsqI*wk7@og7X)&ZWSK0c58f|V9#^{4kfzk)oSy*IK9$z?
z0XB$Ac7u;AS0^>p4Vn)s3!LRcYgs{VqHE(3>vr##FaFZlKB?jM2p3!FF=O1HfAMR(
zV3bkrTespl`%Ou9H*h>7oeUIZl1vn7Z<;GP&`~u^FK(QQu2+I1Z;ngYOr~``Gn3=;
zqQZmU@$fV<4THfn1hVM#SV~{KM>yp|0jkm=R1OB8jl9IOd30n!L|%O<26pA9>AI)7
zaSk=JE&K?o>rI0%3TaLTgh@b~t+-a5Yo2QGV5-^CdPK5ecfEp8Rt?oAJ5xNWAy1IH
zWN>&nN46INJ-9GJ1dTY+uMd4}$uP$5YmW6U$j;Z$^NgC~-Sz1nvB}_g1*1Gd0XL1_
zn8g2?Yj~}mZ$BfeLJgR$zQl2CcYBvn%!FDLF#U<+3X=z$v^Rj*&SDdZw(G|8xFi5X
z@ZYdeSVZgJ@bGAOI8Ea>oqTX=9f>{wvtNh?=QDJY<18zW_lpB%YImF`L2#Gr%5E2U
zg$8&h0Kt_L()v=$cgj*Hz2N~=<MfI8gwgiwIp6qtVJODXxetcK@Pk8zUB;eTF3>_s
zgEpfrcAB)6Lkhp>Pd1b)DSlx}^B#35N~yyKbYgjjGprty@q^x0<?Prx9A!ANXVNdo
z#WaQzu_KxH*H6RM2PQ|cyckCuT=HQVKOTcuvr`s_Y?qYn{&iZ_$YBRm0nyd+y1BQo
z4o6TSoJ!uw;#lc&5NnyQ2OapOLL)&+%Q=#V^qfWOy3Kx!IDneo&cuO*lo#v@oqu|~
zswI>q%AM@vo>0k)|13hJ(<zLVdooS}F;xq7K!(#<ArZ4tiPrb(!Yh~Utl$)Jx-h^i
zXW6lhJy^w)1Na;T<8T0AK3VB8H88HC5p7#Tc3SZF%xn=8O&x$KJWCbsYF6r2ZBrT(
z^mz*LXjcS2tp#HIKR*{UZJ?Ha{770<SE@xL%!Lt)qa`Wynxe^luIg24r7+n-{3!X)
zl`zYbQoX6M!Gt^FL+#z>Czr*v@NE3h1kBG|!KnJxv74P42ZaGM-jI!F$xG|~T}L;l
z5y)fl)%Cc~A$PxQWgd6?dv(bVv=tt{bN<3Rw?8sz962w~ZUyUxRUnb;6a`s;IYE=s
zXt#e7AK%yZbCkL=a#>+bP=uqsQTjIsdr9Q$y06=!OeCMv_(7Wm=?zT}>;#51q^Q0*
zWRhLeBlR>7eK`U4dI=&oLqhhi9x0td6+)P1sZ|M!t4?s4^JInZ3uao9+ZY57zIjHw
zW_T>52CSi7HxduW01sx#tXP(rM46;U+>sU}l_`%LVut<h;A-m6<LFBtq>;HPOR8kJ
z^(Kuz!h$D(0U-Oq=hoD5$bI+^>n*aoDNlYaC)h!b1~&i#rCpIF5!ha-?gRxOl};0`
ze!0<W^DEZ#oe5=W*lr!Y0IO0L8jEj6zfHwKNo4=gB$BX*rTmfz<U;BpoArS^sDdq|
z=22*|P2Cp1Q3dBMh0h|YvJ=u?Q7d`k>Ou;a;^OQ~o*_m1d3`%CEi|sd%C0U@nqG1<
zEzjAk(P4n3eVm{j`EE9C`acMLz+0kd@1W%+JqNt}UZMp0Xvyr~B34T@23H|oOQh-~
z>=vVcn2<@yHYE|SAjj!C?%Q*#x9aH!{jA!(pnZu`c+Xs-jWQ~XUS2g^Q{688G1koN
zCS8l=rjSWYKLS+&S<Bm_$J9V0u47~0`-x66mjb9<599MBr?I{11i{_)Y&`8tGE%uZ
z6U!X8tDLzhRqsR|zL2>rb;we}^Mmeft+DLQ@3<V7v(l+_!)*wF3!|=Mv{CfigBgID
zEj%Dfa6n&jiF_xz^aZWi6D?z2!*v|asB9WRS46A?p)oYqC#tBsyTw4X9R}^nS5BdV
zTm^u`tEM+2!>ej77X+$^gW}Bze)ub6z~CXlp!M*9f>Yaf|5ogCK~@L0rdAjcslfV#
z4FmE2Ltm6Hcp^#!3r>sXnXcI9P<9V@<d7c_3NowWBgqBvPb;Me1fo-+*jIaG>KX!q
zX3{U`CUhxNZ27*cN?i;UD7Eu!3y+qE2S{IlyU6SNcaz~Dp~0d;z-ie3Ipvh5ROIZV
zR?KU}w2jol%@<5oqeJ5~s}1-WiOe;}faxNoRBVHRM=NWC?(_zy9LPCS2~VdOlAeSa
ziG-X!V;2JU!na!}L8qMaO@3B1Y&S-jnvxO%*16s@%>z0;{ipBNl~2~n%m)aD{?9Wd
z;ST2@VWZ+6xKJQ7ow`V%HL<M(Wgn%-ABzqR=+t(a>ROLb5boc1bBaI-+%$?n<|E$V
zRT_Jg`GO*+hKO9#YUa&~eZqlq)6oAG@1&R~Il``(m*BsR4ioVH5<@-&Ws=zatc(do
zxY-5+-r*tNs1JOB?wJly%p=<D2{-lW6g155`>hzT1|Eb72_B^;^6%Nxgz4o6jOv;$
z1l7lt1Nj)%)(NSYUGy0Rn}GwtgL6x$>QMeOf(-?M4+ZL21=ciH$7Q$7@B6C>jLZJo
zrD)jA**AAEfYzH17Ck4_R`a>nAsur=iIGJ*qq^;u=J3^VZw{~-dwNh4O=a0lBUCGn
zXoqwwQgfm|e<M$=6{(M>412K->7oXx_%~_`n~Nn)sn#O~QZ@~ZSMYSeMXa~iULS3c
zO>!_a7P<5zm3PA&7{D<!l_crpHmD|gNNo)NgJ8W<*l7w;J<|}_e+d-wt8CN=FYLJy
zTPM>;=5r;HJpgdrod}S1o7jl2JxCxyalmOR8lFg;T~p^tZTPO{7nKMGA3af>LN$69
zAbgk$b>M-|=rD!Wf37%YTxsc|9goIN2u9+RQrs)EEGYa1_M4i43Dag|;DPdg`x%fW
z)y07m;x7Xsc}?m1OlO3$IxFg%l)I`z#Z|H=bk=AkX#lLQ$`rHTsB>W0xYz^wAsF!r
zD?tv!%%ZzE%rGs)l8yXip{k75++Z)NEKeU|E<1){oz+>N0!Ftp`(z+qpwSN0R$!2~
zLZs-lXkvucn^!52HztD_rII>JyLIO_TBS!nv0$TTZ0`5s3!Yg^(9Uaw#owIAUoco~
zC>pb-0s+BW?GN(H*PHdfugKFk6)l9J5A2y!T`*l7Zz}WwN#2&ls|4<$&OkKj7v4jv
zt)3$j^IZgpN<<SpDbYKSTUti;p>K%Df3dWTcA^<uAJ<#;ESgA!!&@$Xd-H!-A%=Am
zli1k3poa>*pY|NcAoX``&6%Wix*3$R-b1xzy8>V_V%FKGoQ{5JqPf<?Yr5O^HDs?v
zAFHF<8FA}+l-M2-_cJ}FhOpDrOa@kGyu%<1dFl)Ha8&3cG${HZkUI4L0{-TgG$Q9X
zf#m|VV%?$UaUt!A+tS^{Q;T|iRZ3{y(ES@8G-6>Qa!@zl2ls!$si$o=uJ#}BuGVG%
z9SYNh)VmOwf+@A9(=KM+X`}ALuE`qcVohmJ^GBCu??v?W9QaFsak-P(+2~8?Koyac
z>K_x13w);CLL}|yU#b<i8Uq5n^szohq6w4jRK{P#U*&W$4lqqVwP_X=-}n-o=xHwc
zxOH9r`0~{5`G)kU?Q;f9;^NSEZn#2#S&y@*hp7i%`n=u*=Nx3K?zFZU>AiuC=p-jt
z8-_-5XCs?hF%Oe)capct40$J8a*wF@8ENW_wz*>x9UD;v`G5-?3etg^-iF&l)RGd(
zbx?XH8#z9AmtA{N_SVD(y*)(G`9B(9zgU$hk2qcehdGm}QHZnE_Wmf0OE<6r#1mMj
zqf8Jrr<Av&OSmIK^c38DmqE(L#a8s_xe*)B4h-H**(h-?VN(~9Th2jfXw;=7WlAOF
zOU(k$$hoUiGHd3_!Bn{NgM!%n^ZlWdjdG<E2T4;6^EPnumH&2n>ERqP8~FlKwBypW
zWju6Hn`_>8te3R=rhCehVGb$*IR_o^ZAd`Esb$HI;`x7I6=_mIfoJoJS4>LO;ZSV4
zF_Ajl?1z9tE$QM-rN*eOpBbH3UvyJ2frdh$H1)OE#mIaEt4#7t$r+iDHk9x0l$np5
z?QShN3PTbS)P%<M%bp0>tdC<DtzlFcNp%HVCNBW=L;=LAt=@>%I@-nnK6m47O7~d3
z9lx`5PkLtjJMp;ekQGT_44d0t?}|;PtZv&a4K3et_nS?JJF!=MwLNvZ7Z}89jDtgd
z;I_QNb*bxT-;u3NeiE8g=b~qr#wHqGs9v%fy?v|P?2fJhmAgeZx_ofHmOMtpJ!=A=
zEwti_R~E`A-mzT`-55#$CQ@5d*HGg(^)b%u#>Hj#NaK7eG>xDi<<j{a)DxI?Ia65C
z1eu~kkw<LBp>_ggT>3%<senUVKw0*Lqn%<TPtyKIal~re(@Gw3XXPJzYAgM0b$zUU
z{hjR{5wLXEztr1`C9?6Li?L)$e9+(%akUg^d#~Nl#q*=}*`_goVM<Du4?W_RgujH9
zcDQ8l7sOW5t%DtAQ57o%uETdUbn^C}h>}Q6bo3L-i-;bCK)0GVf&u)~-JD2ZFW5;o
z2Cz`br30a?=9R7bl=*0PhOr^a(1;fj4$G2ZCK1e*3p!J!6@+2QtOD@?8%y>hbqhUc
z+LpZQerY*93~7b{FM)rDeyE6BHKmESP`+PqeA$lJNqN!4YJ*7nN=ijn`c34B)#I&5
zHRI#_n&_u`YxnEvHtKf}n+wvN7Eh_$^E5Rl*L(}9Ydh}Y4aLc}g=49z+EzTmbl$>M
z3M19-DGs_l8WLFfyAnw~eIbsCt>U^t4^axsnpT1JcZMo}R3Fz&<`%6^cAHkiuxi6j
z+ls8<tEliYnza!WW07j1zCZXPV5^{@`s%B!y6lO|hMgO-E9^4$)5^bL$WyO^Xc*gl
z77aJKrU#DncuJjg0w3RaO2MPYT|E;eXJ(y*-li+!DYq0n`nyq((0^3ErVP){2@dyD
zmm-a!(W5Q_<s*wtzJUr<M5Mu)bSF&dJv<-(Kg`h)u0G+rJARx$HrMXoMoK@Lm)&f+
z2HU&mPu=bV!Y?cSU*t|W0;U4+hi~m@nu|uBJrTJBrpO9~mw26He7TT|r&DOQj-fZm
z470H6fVgBp1*96teA19*#yIze#nkY~ha{DY9?<ioWuIA=awGq2h$tcsb9U+~vy1L}
zSJ1(ab&K_3ET$2u#UYXWUm<dj#Je|2=$w*yPyWPUw{<TF2ems1_gYDP|1l2$foS0@
z)9cQogJTj(TyKp3H)=hN)8N`uUuO>z==p~yV6QPVx_|k282+wioUFYBKvhzcQ_>R<
zo!RLtSwg}Wb|zV2BKVlq=9Tfd@PcK89%k@^FLh<q6y4+LKTYdI9`Cx>k#z=KcBVa#
zV8{B+9tlWta<S*LubHOZg0zByrIBDIn-N@gwnLX|%i-3qjVixaW=|T-fgG1`i`sex
zWI1g0NA*$<lX%eI|FVnqt*R|urlC^9a^zq$0DHEBxev>AuSs9p#=cWRe7^q2xs*bi
zSe_1z>C$*@MyLA;)+aNvL7Sw{FzeB@nP}C}O#1Odr+oMx+=KJ8&ORENS1+uw;*7VK
z`D}~t59&DrKElsMD^OfzR~0GX_{A{+<Cwova3t1!+>~$AhArmc9dOcfkMmK-M;Bhd
z9<ku_V=MDFHbcE_EjhWT9%(@VsrUu|SSB}$WStv=206%#+#3Qi_$|N1MCTvR4~cd^
zl2PUB<8Aq5HM_37ejP3v!d&;OAZvIvo3iV|Mg2on&C2HJWosrg2HQ3Tq<AqvGWg<g
zj-E`wN?E=5bbf3`@PY4S6h`sJF&eDwho*cxXJo&=CfZJ78uqXWjZud*)}(97*#_gU
z7Fr9sIE+au3L1l$^kR~=B@42u+71U0uGB-0%@G%j_CH>{7Gw<fr-xkSqrUQKI!XFh
zOgW1pInf&4LhGLLxU+8wct}qGd5sldV1x+uuU=a9`nxRlrYUKC+*zwbF1kf6$PhQd
zwg0C66YY3Q*;EZ8^R55a9Zq7;SsZcsA2;@|*H9o<?#?v~qVjL5$fPZAY3xaptVMdw
zjO);UT$nBIN&X?%D&{4b{XAwV?V5}%tx0Oi1U+&N6+kK07cm29B^axo#@CAaJgPg*
z#6joewa170f!~MvW?8)5hiH#cYAHkgBdD+A(!Du=E*z#sLLKSVYG)zsTG`}{wPeW)
z@|n|orsw}KRsT+-<(W%1>~uEIO`pjSf*$B+*-2xr`%MLbO*1-_P_jLzo=%`3F<6sC
zxGVsA^>{~mv)dXG@!TnaK+zopxkmBo-TewoS-;qg0x9UTSf0?Uc4P4jOxbnHb{qO1
ziBKy~IK1A1I_BaALL|wGh0zE6iziSOdu0>^*^Yfpp0LG5%#H}2WfS_SohPsg6KaeO
zY~bqOnc6}`S*1W*4a(8gaL_E%Q~n#0ATVGCn_d}gGR1?wNG>DCV@aha-2OX^GK|9t
ztn=Jc!@qHZ-CoRd%0E&Wy*#QIYO;by%I&15Bhe;_cDHLPAxrhcQ6SB8UEQh5Z0k(^
zuK3#=_&FnF>V(lR%k1}rx4cK=(QlKzei01vNzj#reF^1NCs{izI12d`+0jGB^%~0#
z9Ix{>SDB7B8Da3|Oy(a3`l5N4GXpq!q;BjvntsR|hcu41JE-DR%<m5w=WL5_#6-mz
zX}qV5ZwB2VY~~VOm*<~bcXnlO`;<={TK}I4{gSTKqkNgCWn-M!hXu%ECPsI!gCE#`
zDKrv^c{xjo_BqU{gjC2di4{2${}a6b6YMMgCp2bcLjWgAw(TYE<&me}()}B`+Wg;0
zB1HcG*&<T>Q|4q%G`i;60;dL$BZDQ<w!i%I1o!_5B#FN>mta_gYLLc3fq=pg6YCm@
zQypoL!2y>V|B_4o5qAW09)wFaV&qRI>t!mU&b)AvnTGBJ0e?<4X~e8auh!^b9-BKx
z&k|#bhPa)!mi4=$fxDTg?r%SRce%)CD_1Wvg3ys04s4J58*?C`Xgsy*JO8}cv)`#8
zN``7hQdyWD*+o3+=k~rHtFa_Cbq(P5s0v;fIs+mkc7)5#vA`NM<8dZgc92S5{^~Ai
zo+&tCCV}kQY_q{D)~157)ugRvV}@s)Jb0<aGclyF8C9qeLk&xRFkq^YWVif9vWq2D
zuB<O5=qzH7{jI+JaIq=CYj`{)T`PE_8?G;F1&vknG;o;{oW)YSyhgV^1x92(qTU;2
zz5&>R@;SIV7Z0dl#py`Ihw8|dulX_;SDX#{1@B=W6@JlPHeJ>abn&N@YjbV4k2awh
zM66SrSCbxN^`j_y_?Z8#4;vaEi@m=YpFb9+z05qh0Ibi>amH$0_wJw@EaoDQ7S1ky
zS9D`;dW{zETRa`r*5k_bB2GlEnnm9}CL5r`*HY;zS)5T3`#kE;_ILE1xCv;Od>Hy+
zSI#YJmAVwHk&f}4w^Z#8pAtb*i|Q6aMhvVD|14@w8eVcj*vt(@eaA2mu&cIRu&z0q
zof;T?{}B*|Fh}3s2R{t1<*04VFz2$LOyKHe$o<k({RDb;W7tJ;w!pZ2Z0rJELJ5#p
z>`DYM$r>4x%BqVQ!3{F{CFoS&lbo`IETV7UKkD+wcBaU`Ma-r&=oQk*k78vf1T~^4
zHV{7ofzs8FVx`ehJGFh^T7IGSpjkWB1)h3>F8kJgCu*oZDxfcexO7BWNAgw_psYy<
z$ede9eZ@&ryOXhvMDbRXuAEs200E?r`PwTxFel3K!CbzQSn7Rc6RN@+`0p7Spx(W(
zLcf8YO?oj$jeZ;A>pg$uZcWxL0asR#C!X6=jXf>!ej}Jd76A1Fk?UAD!TtbqMUq$y
zw3knYKQ{f)&X!ozh)&Mt%Fs5!OU%dZph_sa8XLYNq)}B%gY3{>Z2t00SOoMH@{m^h
zG6)NF5PaLA41y6Q!Jz=Me*5rClN?!Kc8|@zk8=uHv15e?J4n&WeaiZHI$ryKeI6gL
z&H}QD-LCe$$^6W8rIx4rlvTNtXmLp?6<7p<=ZE4(f_nf=QpNz3aHS-RGckO9e`Y?+
zn6Oa7mdKB4*DHdU9M%9gH7O|iH{ERXu50e`y#1GdDKrh!q?c}2yD*uRRM(?YDDa9N
z<Ju=%mfLKf3h*G_MF;TqH5%+%);_3z-OJ~%Y9^DbV<LB+W={Z#f*I)05{vi5PGCSl
zK8Zq=oPa;x8=4l#>rOg^nP6_uKry1D1Ih)`^ZK2`AU@U4oIVN8l;aJFw&$$&@d6lP
z({R0Bx5pkgtKbqe<QKY0B(S+{7{+GQ4P9NhS_BRy?zZ{b^UQ_1Nw%hTns(g+Uw7di
z%XNR(!M7udg*U{t5YNW6Db?VAmC9pQ<vm4~X#&c7r$cGW7HaQj#!0`b>@c^8YH+#1
zr)4UaQ^QZA-*^?)MT;hbk}H^qu}h^NR-?Nrcx;x5R;b6V?M5tbNXq4kyQ{U{VGGkk
zWAb#*=xwvDcNei|6{TRenM|n4NRUhye5)&Q$FZ6wmS-C>)K(WL^=fL!CRi93sCCAy
zdik21r+uJ<I*zv*9aq`T9QjhgdEEr(yiL!#?51i#vu&#tHy%rPw$BdEna*6DzSFuE
z;SVn1;AYrD5W9&Jo2%#n;fHy5s1nzN_Xj@p{q_ODb9Q><PJ+Y?-g}-V-8dC)#0~JL
zyXU3uX(?yIrwQG5)A!obOJOXF`c7NQX13p%DK$S>THaN5hOH=bJM&e0qm<Is!&Fnk
z>Kg<W-WV9xQ`mGLUFC~Rvx^Fn&_fNn`G`&Gvc#y2d{)g;jI~n$!Vz+sMjL7>psRHI
zH0ndgld`+7Ghd%gUlG|_F1dAKM?PpIF8On!r4)RahNl9U+hEVV@SQ~T0MOOpI~M9a
zA1ev)WK41yuk^EJ@C<P~ukzG6al2C5h#U4@oBjve3>SNS!+B#9`uQklKIoOY>r>Oe
z#n#>hhNW383)4CPhq1G;jau$SQoc$8QLw6C5OAQqIE;!defo&Jef%L;jD~o<<p0V3
zh%wzznaVsdSfU28Sd2OchT)v#Okme^D4^ytc+W#2L(@1P5Rt%*WWU`bNOpOA54!1@
z3|oer6yVxs)4Xb>;nL{t{@K74v~oWHJ>0PX-P#2D0~!Hm`9SP^F63n|2;e_vyDy!7
z0Acja&6mty&%+=7CA5h#;KvX4KMsW-+n|Chw~Lvs59S9vt0x|}$Fq#Gx(T8~{4&0_
zaeTXtkeo2K98g_M{U$%WQ9Z6NfwDYR_d#+GoyDdaGG-Hdc~yo7NSAwcaXsR%1PEqA
zsafSnfcV$#$JbG*S>17RfnJ94B@<Vw_g0n-78Tc{z7Rb_b8miT*LGU_n@|c)dC$jg
zO<Vx0@O1m@t5}SPup}qcl@za&9m>a${EY6de1RiM0w=a?SAR>yzd(K|1akGdwHEO8
zcy+NmN~!m7YCq~N*HNanDJ+w2qmf{Wl;D&FSc85pZ4tm0eO_mUyR_PS`{s<C9oc!q
zY%?<fq)D6CT=$x97ZZFS)_jTONGnCRb4!WMe*^YZ4_FeD$EFRS*6bL*%&7=G`Bzp!
zwq!9z>)aGXV9#=efF2Vk5t}-+)3vFlq<Pr&)bw}UiWepW9=3DIOMm{QZ~y#&0x8NQ
z!nDCbyDe!^1^hz|^Nn}4IUs;6Q7OFPw|rKlfgi=JdWIQks1E~vT7}wjjnq=6irU#+
z+^W8ChWSW^GE~$=ALEsn6|nAIR}Y0#?A$PX%NC$dt+02Mvq$3J52RA}TKM#(lS*l_
zjij{n;#!)r_hvb=(d*3Fewg(wZ!QtHmRJ5C?=K9yJ>M=cXC3`7x{d%dT`QKUIdLV)
zVoGx_W%=m3h(@(EI*Y_x<K(!(WzOk_hBW=8vt!K7&f)bg<)NqG-sPUem_`Ec@!#sw
zmh)+i(yk&ZI8t|!lvHIGGKceXQug)T871SK(+^VIcf&z@pQi)OPX;;=hNkDgQdV^+
zQf9V8iggm^xO}SdY;7T)d(Z`bF#k(xgw~w1&Sw6%+h#S^HY$JuPDS>DO(|ks_bDP!
zUk+p`)k5nT6kKt<{x%#n^8;#-5^Ww)zE&?TLQ%3gna00lbp58dS1ZlklX6}~8Qhem
zUJJ1s7<LjdnIs8Aq;fnDZSg!Aqw_R$!Tx9zjdKQS{fQg=?1^ZP>P^Py?{x3=e>-Wd
zHTdQeJ3BAw6&_OnB+<~@&T7?bhbg0Zfk<fWqZ4L$&T}esM@ymjByz?SDE-lk8JL_s
zrv@ny8n5fw_*8mIB}U?F!(ek8rz~}GQePO^sr~%zKTuWJ++!TrrysQvA^B&4ccPII
z@p?FA=Be6U<4iyeJD$$yM$w~jWq9GV(6cC<+Y9>A#>}D+qTt3M`DFLM#Bg{#{E=xi
zNFX3;rhk<HiGjIjseP2lNPtUi8+%+1q;G>f4^3MDMpVj23KCb{Q5oAFsrV&cz(#RV
zHPv;B43~Qp5SNU@JU#`**)3VMtyGH%Zk^57ymNSNSk{|)7~<JeW;C|j40LUD!jtaf
zgv+mi24;q}q7#4aUoFb3<>2OAC1fOgF9}Q6nccYnZ#_4gjNZX!LckXAud3^V93J$7
z^Oj9Fb}|;-UTT_wJO|x-Y>e9pimR-{**qWd&7PCtD{T5So1R&Rhc9PddS+)u^9O7r
zVVeCAW8jC{QZY3hyJfp3wXCH)aX%k3X&*xacVU+|Jh{gc31;`GPmr!7FDRt8O4dZ+
z;~B0JJ`Yx*n#CQTZ2-IFV#01srHXunZT-Wpr<?LG7q+2~2NMKoq0$RRUQkFH_=?fJ
zN1jB%Y9Fz|A$F%1<0*@KXnJSEl)j_lcUu{vb|m?dVl9y@Ie{t>i^bN9TRR@fPNq6m
z`PthJwVvklCZr_JQ8Wuv?FBqz@lNT$YG5}vRdb8xL1{+Cc7P~uSAZqQm<5~LkQsPX
zsH$yXhXz}Ui_S$Bt7M?9nG2G{SX@mTv=?%eab)~TVP9MUr{)vzwR#su?;q#CW8$;N
zq^Q)iw{J>xa4W`3O^>D(HEpM<Z^a?M_n>=%z`(#4697)XyvNRSHie9!3Spm6#3K<I
zy4G2=&yZael>uU?=Gdoa?df~g%y0vK0Qw0hqtc?$B)=vMiUX7gr525Qk#R2BNMkKv
zC>uZSKo~|;P-#XT@rX6tIbt3c(X)6w)lBKU(|{mg>0?D<_%c_N6atgq0{UupTScEH
zUj@3&d<{Wjrcx6bfyV-}hEfde<hwWM29!^QuVFy2!2l?5p?5rxOGsmB3?}cIK^3VR
zT+@X}>XGwg!bK-nH^u24JqSJ`t#Rk@1Q%fkrT~PtPB=$yy4S0xiw+meuR-UT;l~xw
z5D$VptlH(DJuCbTR)Icax34+<c8`n`5ZX_W^1!pTqRzh$5{rBYhQ$#z^qu}F5HLiI
z-Sr2&rGRBo9n%M$=2@x)rI0hQf#8a>zoxX~sr+IK#$vKk(Y~6B9p;Oq^pE<|XtI`4
zC|(OH8wcRpy>#f!SBe!@<^`Lv*cE~Vi@M;822NWqpZ*7}&mTnA@Ij20Zi*<)U6e2o
z{5T;FiAR+-B%s9dXe`XM<Sz33Q*5}c(uhZ`et;$MJ7$g%XPURYb>#I+tdwHfnCZ_s
zDZiUfESUqcbifQ#IFsz}wC*`QXVS+YRXR`HL{d>o*`E_B<5n{TMmc;=P7~ZL3P$XM
zuH0F14Pn(rpBto57fMP4%wlfHYxQxHBfGeTRK<y0+ZcLjt|-6VI3A}9LcUvVf1LN~
zZ$J-b%e#~>(uY;A3z92FaaRiR7@@emUVks(<0SQF_uDT5qz3=_@n01Ik)ifX7N((e
z?{4Im(1yd45~pn_WOh-rKuqBZBg}x~oWcua|7PvII7sMr462)!Q`)8t0-Ap3X&O2<
z!-X@gzS;5AR$5c#J<GnDR+$XyQF<okFhCj4D>9cfGSY;N<3qJE8QPLwgn43je*#+$
ziS`xb9#t~D`5sQYFp|NnC+STYf-cJ}1X?gHEcsQvp$|+QMN`U>EI(OP9YssZmQ2yE
zq{+}?5AMw|+er$r3bM((3^6f`+(UbEz{sJ>O8UM65-ODj;j|VAb}kJqFm>fO3?Q$o
zLWQm43$_O<gjl;a<p}zMSfjtd?H2;Ig}oGaTmu^A2rPu)Y=_3<=zG`g1kYma%g*ex
zN<_=9*O{Y1qfbe^U?QYf%`aEe(Koi;I^tHWl)pPYiY{;Kn5Z}qx<$TtU}?4=r;#$(
z&$oeDE-*|y<eCh*)W}$uN<LF#0PxJnj$eAlCajA-9)})KyK8A<ZWq3NB%}{|6#8Fy
z<sj2Dcsb}yR^OA$rWa3~V`1Fqd|X_{VW7K54rzEpwO~d)CFfMNoUf*QtD#Ug<LR52
zhP@#xUR~Ik_2-QQJLst2hCrb<j}-iMUfZ+P9EfUno#SU<WXr*whE$o#2Eg%qU%xm2
zaGip646f1%LUmgH{CBy7wpN@myhSfcV&CGuM&k$fjLJ-h7b`fMpTed*_NUV+BKX#S
zmha~R0}@yV4c7J$+muJsQzr<#>5o+s%wMB%CUY^#q7r`r6tV=QGWHqlZ!z<4*Dnq9
zS^sCSuveRC3G4m|;B)#WwF>KX9XCLFUkK*;&f)#vcxk^u#z^Paq|2T{Ldg$GGw+nf
zwz4d=0p8U<?r4UY$$o~;3Yqy6@tzVT;P-ERz>In?)TM9J6Qj+q`iJ=xP?PvEmctpT
zO?flQCl#~)87ZCL{MPw9Tvi`%1K(R+E(<Agys};zZFmkFRHI4_G+V2!h10*v-rsh<
zQOW?6O91Ze55)fv1q^<zpzp&!MA3r%KZd6Fe+*6fwsRr-zw#fRMGwPMHdvJ1969%(
zUmo0va?H&yz-aNHdV;EBzqe;*E^V6Q8WohKA`E%tF#t{7cXIJUpRO}+7kKh}U*Tqd
zY+z+CVv;bqJuS{~=uHw#j}Jq?t+?4i^eVBbQIir;3{GiuF9_%XpZk7nWe-OX)ni#!
zSpcOKP}ls@7&XwvszWr)GJgqb-Fq7rVc4Npg?CGGG(IQyKbdn4yp4id<&_^+xgX=U
zDZ`$k2VQ|PD}t0W$cN9XR_e=i)#oCWlr>!9JKa_&I%-=w&!Q!hXQ=HyuX3wLrX72w
zf&SxPaTHz!$z^*KVMS#~)7HupG;VCgSb%7y_Sbav#Pgg9TzkwH)kzd!MUL}~@|rTQ
z=^JI)BNCVB+ckuFL$1P5&ieq%N!aX*mIQ#D6AD8*28YFA^P-b?tBre>{!NBMOLUqI
zEbIvmtbsSUPHhSHWIY7;iFV<|dDu{{O*QEj6?WLj_z6|)#ersVH3Z0tylu^d5Wr$~
zCIHO+6f*E}@mUvWyLt|261>KODlQOSZ?)0jb-S+;SVx>xEAoTjwH_G~{ysx)Evl0m
zpmq?)8hQ`Sg@PR-bGR5{q1YeBC2of4;?6bI_}VxU5tbNZMW8gzxLnl8yU(e*)?dVy
z5IK2-uk?7q-oezCu+pL~J-E9v0q|du20P-HV3L>0aYWs-GDUS-cvB``k|>Al0s<nF
zqaeVXeF5!NVhC`D!s+yh44>d6>)PU_w5%+|V{j(CuM>O}pGZ!IW<WNkgnrP`hs@kW
z;YD>`0^C;LL}WkSUS6<B5E+HL>Jmhaf3>Y3)C5q~Y&(hUNs@Rw3DpJD$Qnjnxat~y
z2>z84sLSdA$6y6_U+n{iE5Z&B2A2!$Mu0Qg8*lrC@K67|+Sg<9vT9H9ujG>Rv@fV5
z+_SSUsHATnXU*k{WEi!{Jdv;;8?d<2N<@YI;#)YxHhdI;LJ^c);9CNdMa>6^!Fj4(
zbL=3H%MO!u1^%0+gy6&T-GOXVzbW#ZKU&^Vc0zRW4&;On!!l&eBG4*h{`>QpYAIKM
z++|{62bS}LA%5EZUSWr+t=1hbcAnxKOVs*{Un1#-K3T7VW|N|mcTYsC46u7j0D>D4
z!~KGI^1Z($ko6H>ZGF<cEWkQ@PVx~xO}x4^7aYB1!#AY;hvx<91pnaYn&^|gQaoZm
zHtAk|QG~SWjBq8xC2z;PJ41VVpu6>lR-uRi;H_`kKNdcsb_8T%$<u-ivdc!h+8mAq
zmu<8Av(O_Y+cro`bIa+I1yJ$juxR-GtPClL3WDmH&AJr=5X#C!w~&n(n{@N~TpVLD
zan%Ti?0E^ZREU(vORIyQBArF4+MbCHufAnfdpf;c@*3FZA$jZuFd=Ra^#vn&=Uc=C
z{W0t73lw)X;}SpeAwh&t;%pc^k<*3!k+C<r>HfI7Fc9$j7fVsK0BU%fZkFFj4ol3f
zi>9qc^dbx9f(HqiiT7gQEzsUame*Wj>uq>u7-4F{o}V!9{P8)1(~RF1eo~9Rbb+!k
z3E-k=ONz)@p6SU$-|r38kDwQ$Oz#gt>y}P}z)|AJR}r`8p0DcAqj6X9i`#{NA|rqV
z{yvgaTw^#lqTNezA}5trRQcMzNOMSd?)p2lfBsh?h7PT&!cX)s$~=VnZ|6%48$<(e
zODz&8AxWLFLQ`t*1Ir#PK#e}zw%TwI-Dz(g-jedTTTcW?>AG^%yZHKk`t5SfjPKud
z7+H!X*MJj%zviJ5PRJ-Nf4ZaT8psX!2Q(3N5z9Jq!?3GDj!F;+;7#P4FJrGq6bm5*
zUl3t~!<rm1M!B}<!OueqD<jyr<l_L0_tdt<lZkhU+y@KV7RL+1mU!2$HEz}!{yuaR
znJCfYPRQYkDv87Gtww8G6Ug;YWT>i&V6}cYNSc!bR+qyUs?4whJJ(o)i{s$|e>jg&
z4@rTB(OiUPW$Mk}ODkDh_&#Zkr+2B^4-TIz=XW#!x8Q=J*=e-+U-7j5W{d{ti?%(f
zFze2$nMlNgR!abz+8yTnVOwYxGP9w}O`jZoq`Pr;dQSlyAx7r>vK?=RGH8tAt5iqm
zA~_MV@gkq#iERT?mJdrgN|ce={_T9n3%XOU=a%>jzcxebq+nys#oo}i+H|ZbJsp0}
z$zg#X4+9(_W{tR%w<|~J4@U{WFzGg%@o$--n^$;@U{eJ=H^qixC?0VeD$-JvX_hKI
z`SCKD($U4_Ty@3*(F-LB!*MUTTvwinF=dBoQ4wrEbpX<h#O4Ih6@jAyR3@Y&X%f&5
zIlw@Bc0WOrUZ9coX2^>HLE#=PTr7cnALymSsB}{<3Jdehs@CJCRu3EyBDehyn|7+Y
zcChw$s)QX<jv&vtmjgrn5OI|&%!oqipwBKQUQ+P66mER_>Y#vIyrS{h?I=V&)5@;G
zvY5Du%?~%Udl4yg5P0Q%3Epy`Ti=X+M`Kwy;aLRJ;=PP!GhUZErRBOVFy5FQ(q5kg
zJ287w8{56P2)<r#`1t}}pU&>~U-+e0;d|hP1UMJXc|>H?K$Ce1Or7Ru2k}Z18Gj*q
z?~jvSgeBT1l!)yJP$ew-Y|c1x)1tfmx|ccjp^-OmTP>kao-?*bke;{QbR`}3s3<Da
zD;>$UbI?OJ?`0ha8s*#i^=b;tDZt&v{=-rU6eF9B%B9zCY_)~#(dI}F{e#k@?#`dm
z{lmre{vX8)!%tx3ar>`AI1=)IKT7QyBw*h@lM(4#!0IMo%>f2NSa_)jMv6u5e8bX;
zpbtDTFX(DrKE<|TX>|j?RH31<UvAZ1`QJywKKgdubHn3XYSCHZ-Xb~{HUB4C9%Pk>
z)j~{6gXHC}_cgq(1hp)zE{nAIb&lkV?dRPf6~RPDTc1yh62efPbA-gTHWmM)A0WL}
zD)<=sFl3-LArgYd6P>0P=@oLw-dX@IXvo1)Z2P*)bJj~eMNQtEc9%7zf<1zeO<l29
zS3`Hh@{@1AjSyuZZSg0i=oxQrXC-Z?{kIdU4kMul+}MMe9w!=y;aE&KyR<(Y)I3ke
z7wu~TL&#nC7JY~c`n#>ih}-A24B%w7N978N=SM6gdHZ0$1}#=U5Zr>pS*WVp>wQDg
zePUe4=6fouwsz-!X7$A3?Snn?hqfMKRs9u5%~EN)j(DbEgiEWjgK$6yGONXZoI%r{
z;@1RFu2IqO$I(JVo<`3#D<#_hE8|MQq58u97{)SVnXzVz?EAiEONneDO9&;DeOLCe
zMGL~<R(6vXLUv=P$yP#yvSba}qhyKi4&TSWzB|u5&wbwWoA;bK_uO;Nec#`Gea8(9
z=?3!<f;rdwKRMTTeKXAGHCd7}Tl7PZT`|8Rd~)ED=%)t0CDy@*wA20Op}g9RQ5I?&
zd={~FPV`zf_qSEV=YHlo-F>ze(1`X(!$O+!o!jdy-AHg>1TMGHCXfX}_fDRzPvh+$
zd*P%np<-CvDzI^G8mrM)NI7p9(VM`C5LvAP)GZyV!<}un%u@Fs&{=qWixo^jPvev)
zyru<H9Di;#G()}3?`2YRXytO#(OtQo>Sv0a_3j&K$mTyyt>USDc2%C)8Kw|QhvlJs
z!=qpK`5L4D1SfU9D=pq;z~F|qLi<OHWL4Rc{@;Qozn8~Cf`9)3Z-Q<~%R!M}15pbL
zXY<`>T{U2ar6T$*^PKBa^xgITE~&rjJCx@G)@|t9`dlR*YtC+N|2n@W!Gs@~x(Zh!
zGTe10l`$22WS@7s!=jIhE~|_~!_(|>KtzN))YYr-{C6#Z_>Y_w*VVml$<8?c0iF<<
zyR0#+7Zf@%wv$>K#NB&?FRS-n?VVd9XU7;)Fj7#AlWO&WYsL4n=GJ8>dbbG?+--2H
zKMc_rmR`+$vlCbJu5Du|Jy1>s!7HMME`AMRWk0`vn>JIs_fEL>>1#J)Q%UNrO;<L@
zOWIb;Cc?$&lR3_}XX*e3*VQN@IOqA7dD1pUI3GaeIev1<8(=7W5dmo0Uyb}F{cD5c
z6CF5kHB}RD4EeK3`_d&EU9=e`JY|-DE7~mLtU$n+L4;RoEsjHf)bXRiXs^t@Q71Od
zUWIPWsnu@}mTmty8j@dUnNjO3dix6-$yb|uXFN%~-?X`)^2#R5UgF(o<G$BB^_6dJ
z>_TMoD0U|$%IC+KXKQ+^3%0VHNUqs~0ifjpJ_{iqy7;b3#98##MHGwnz8j<Roj=h8
z*BgbawmE4A*g52w=2jkz0S;ES)0qfGNAw_w#V>4rghcJDr{nLJjay~Ci}|r3`_nZ*
zh&M1q6@2^ggjw$X@5b#EjhlRY6?hS5QHsDScU9#aenlFU9&Gq}B;`+Z-a3<4LbUCp
zi<fSzX70Ev#GjL|{b481nzt^)b`ujIvt@+8H2W3Wf_2&4scO6$y8o+NQD=~{5`8XG
zcuF(hHhLi9tdNDA_M6~zoCQ*R#9x&eclm*g7n3%9TYy|cCw8H@rVlacbn31XH(K0{
zm<6v^kxeB^0@Ph%f_X&S3@rS<$#wdoz7p&2bHp{x+s^Pmj_Ev83KiFh6qH!1E%)<o
zKs5QrCYNv7-OrPhw{=fF?O5$rxJd5i`>QgtareM+3%BLlxtaz(X5FDB%%NySF@HDL
zx<|ucApAtz3fUq%$&zK_1tWg!Mbzgt2~Pb8#Sn;Qu3OG&Px_@HRxl7=c@PNK(7lz^
z>Q^ypp1iT96-u^uetmSuEs>SA<@+n4Hoj_4y~t$klp8bNuA#w?EOMOs*#N;01xc}8
z>@T}E0k!>&`1tS2_06@;=q{>efd$mbG~o~P?Uf7m=iydYO_Gx*M{}>kzD=fzE+tR!
zH;Z|*o{slmQV{N{R8z`BSD4#VJT~j7;pFw;jGcb;U~8}-cHoJoL5`88bGT7G&2#*1
z6Mx;ZX<HYT&R91*@iiNGHzpvHg;xw8xg#>+q8a-<wm6pJS<+KiXY_Uxd19gWx-ix!
zu-jyCxJyO4)Fjt3%4ujRZJ-Dzo|ohJSt}~feX{pfGdzGnU9!^7c-sAWsM}V8D=tyS
z05dja@3)!cS6UO=F*Xo6R3Dz_(?yvT1gp4A!`yw3==AKyQ<p$v-YjSAt0-e3$~a6A
zrqwSXuRxB*s-@g^9c?CCvsHH@p(idv!X%VS9Qj!V9><D9)~HWOtg>sRZH&4P`Qod*
zQkLyEwY(UXC#5V*a~(OZINPi%ex1>%kXLk^O-_2d(o(Z$96LC*B5Gedi0S0`lz7iZ
zF?As!d7uqv9s|poCIPGme$Vi9CmE0yQzqRQ`5oqR@lj3-FFLjGjE?f<NC7^<zM%K7
z(|d?&9@CjpyJ)4XeEm>aP*w;uhs4vB=KnBx52eNgm5}O74prU^4TSk~`l?@n8lB*r
zV^0aAfxal^(uLd!U#Q~aPib-qkcYo_H+fPj#Q^0_%OBo0d*Rr(R9IuEKh*3C%hHIN
zATqjcnLCW)4nz0MYdF;K>*(ofu%oKOtu&u=We`7|F27J5LuvaWU6D*sQS{?V0y4Ux
zREcmdHFt`*pJ$B?gtWgtr!#i@=`50<{(dS%TtP`CX}#2vN<_$UpCa)=d9lE<#C<NN
z;+Ixd6T!Xrb+Kl%p*Go5u)y%TA0b}nAj(dAYUl^LWt=heOr8QT;=2e1`leHO7^S$F
zR{JUZ47p-dHQT8eB)~n5ob-+gwzisvv}6yj@6L@~5W83*n@{sgb5<bbd5ZJ>t%)%*
zDy)eh`k8Sy)o{VtPn5fEvQEn;!Az6Iu^ThuUmvocAnU2Qk9y4~=3Pimk~!m_sT6)K
zq6i&1UbbhK437ygOR%V<A8x3Xy^K>86o!034mGXhia%TB2-XbFNVhQUz;E{TxvqsO
zs31N(+1)K^)r-wB-WbsQsbnnUc@4_`C?GQrUv16wP-|i>hQc7@Idt!O)Y$fy%q#e&
z802=MhyI(FcFjn2;Rf#n2C}{W7+W@Qnnqv6S8O!WODp3DL6sc<45~2a`hGgkfg;zh
zc^NP)H6TJ)g!h_$qkZCv<QZv%KDMdm$yTQ9x>XdPm%pr6G5B6OX_zJj_jk&vDwC{G
z$j0I>ww>1O)2E25Jf^)TpAMA=a>bfGKCMf1F=)G_88@Op5<k?K?O3dE-FrL#8JF<7
z8h}C$riV0bxMT0<8D3H{dgy2-=2z$|_c16W*mp~uFtfwsY=;fn!C8xmjxcMLz3gis
zMicso)7&Aec9o+%@yr8hJ`M6lv4?>~Di)c7Se-1jn|FUGmTT<TU}=))GU({O<MleZ
z-n4eT3LMfgSXG^OdKZgXdEe9E9<<0sN)#n=m)M0qjt!xCvOGz*tt?661WZC_GLcnH
ze9{bm)rTgic$DMo{@zU6CVI?nzVm@9x?)^*zQ!TL;k{$?Iqf8cur~rFwBvE29`rE-
zGYYGPB`bkr68FSnmOsgRZ4=e0>y#*)ZHe}gNi_v|@fb4*@`^q#ElV~T{Tx4<z^})8
zseOul+?PzshiA--{Y03;kJ@6{`LqTdk2||`iI7mydH(*94>Oi8?eo|>we*eDzQglM
zTkekk3?F#gNMKU|mVAdSD!@Vvg~V*m(UkQ}Qjtb7uEsE~5`h8*g%}hPP+*{tf<gug
zIVcpMz(GNPLJ0~LDAb_PR57k1X}GJ_=uupx6y$9sVF_SjqeeiC4#kYdSnIx#ppRaG
z?VoF}f7bWK$%6mxT-O=fzMV&zeu**^+TUva1R4w%-nwKv#ydL|l7%_Pw`uyaok>AF
z?!(-iR4z%m1g`(Q=2u1ormE!MO@1FzdJ}p@{R7oh)Y67FbX&fCLeclA&M;fwxn|s2
zXRhAyC$A)1vP!0Cny9$w8X7mnN-xGxdp|cS#mc#6<@WQ9w3$nAf0^sn5!8iuiQf{p
z?4*?MW#2F`Dx+5Jlo{2!V9a)L@Xq&ob}xsK<?vd?fl0dLx}>}j^)dR~3lh{nm_>-!
zYGe7Z)6uj8-J-E>mu`DWHMn^%4&{jb()qUP!{C#$vQ+==mThLnd+Gb=9j9V(jx5fI
zwhY-5x%@oU5%W>c8@PU6bYWX$J8xtcx3*-M?w27ady(-H243c+*CB`XqUFdBG<(bP
z_@*kG6uXb`ZRJD@GG0G4UhQ#lF<Bz|PRu=QbXCEe-Vh{Qj%XK}?+NdJA*&XOw^{tE
zK)ehS3vkCZT|G;3>mjB95olhr6kn*fv-#{G@JLZaQ?f1sJ{?`Cs%1xU5TU0{o82cR
zkoCpo``2b{Xpz}sQbj^S_#D?n@}<vl{CLZ@cvnDl=ckgF4!LN4r_Cll?Qz@s>LT-8
zm&?m`>Q@*|18k`HJ@UEt<S(oG95Ek?i?}D12P1^Bwk%7{=j9i7<ZDWwK^J%xB51$p
zV_SBdY1_nvv!{z<9O6<&hR{htluL4Q0b<hC(w}uHzO+U1Ft@FKS4SW(n+l(hy^qP*
zzieb%TBEl4Xj!=5PDVDfz$CSa6uWRL?xAFel$nCu$#|!cGl5r3b#9Dcd0hpiUD7*L
zEPm)YdswNPeUl$bX};TPEfxr+4kGVWE^p6#3IQx2`cu*lHIu0xqQz)<8|fg0cf6!^
z!S*{!)}V76&)0s3my`C~p@(i<Vd&$Rv=+OmW0vC?=9}RKlirA)u~Iw(zAMJmv=DUF
z%PA4Jj$yYSGoPI1sN3?=$izi{>2)ffGV@--Mh}n>6?6u?`=%MBB}YvXH*6nf?auV>
z6e|oGVQ??b?0z?qSkE&0YBu{K_sb3V2uv%sn*$Xw&OK=ylg540yUqvQy1JLW;P05b
zRy%*QH6^&Rba-Ni1qTebWI4R*de9a23U`Y{YeqQTpx=G1Ex#-!Mj~VSO-RL@g5Fal
zj(x=DbU<AYQrg(eYNeznH|QlAr=RjfX?a4T<+*l^ST!shQNk{?2L6}dsju?4$|4p-
z?-I}2<=8|%?BXC^co?WH^&2%Xdg}5@*_+lIXd;6Doy761v{MxmD~gv09hKCpb9ifi
z&6#e{PqR(w@yoWmObs`qUpX6C2^)UO)8-cx)aSnNjJl3;|8_&>u9Mubtlw+cLUONh
zl7=M38C_$+`pl$3Z<K06Q%+CHi@0dP&yn0^yTAA_qCo3eTYUs0Lv>SH`BZ5}64mv0
zf_~n_ah{3%<usRD<<QVSy+uyK)6dO`9lX*%k{X&oVM}*xl~_8AEIN!nhkg!3nk1d2
zqH^?=xknp#1LZnZL`~TN`z$b0<;kFOhr8a#nWVPpK}|>4_8+?KhTu@<A{L?1+q~h^
z7QBi=uTF{UkyT3Q*cqC8hT4TUEXKxbbn76Xab3FMtR=r3NPN%}q~DS^;s#qLKB|4t
zQU6)JPqh!*W=353)aaf$M%C{BA>Nw30b)A7(oPeXo>bM*S7()w{#Ki%)v0}1H+5BE
zMr3ae*pWmt%C%ALN%M)=zlvqP8EcS)n9@$5Vcp;D{^4=NGa-VCg4xdgN*o9~$Q%gp
zoI)AFslY|=s<(ryk2r8j7{vgDa-mqDXUKqFZqx}(0vQ|-5=0>nwW{QY+IVs}z{!OY
zf+|xSDq<<%fIC5P2~JQ*9l{L}2c<wMXz@^Uk@8TZ&y8YXJI-PUfpCDWJwEiPfH^Ld
zB-`Kl+K$U1kUAPd_BK93<|`Ix5iRhP2c-_Z$PLW$p!hMfen`q=aE~<L#*5+y26;iK
zy#~hSArk=S0~fHM=>Ni;41m`ljO{pO77>Ku*a~>2Y7W?f=7Dhm&=Bu!jARWR@LC9k
z0$Ijzpo<^uq=OP+|DzHD?^p&Z8E~usxyLMrF!{hEE#^taK`#qIRf7IcDGw5oNq|)W
z6dZak`2UnB0z>(Kf3F>Kti27k=+LS`=&=?gF0%lm;1|>s4Q%nFbO?N9YG_5+5d?D)
zhag;Dw!@OT@Z*xWGz8EH_7dAMtr;Yn{M8?CB0xAdAPXiRV>_ZoqW(p_iw03Afe(Tx
z2DT%z6XIV~(1QaMyAX&vBL09``u?u*^uq&`D*^Qv3FfbM6<~lu0hBb`ak%eaJ9|%n
z9AOag>BueqFG3>wpeAY&u%=_5;J>H@EGBV?8^{&~d*H}z0uDW{zuMFI?7)__2#S&I
z$bazvP#^ONDDaAq9`@+*Q}n;J78f1Z`$K5k@vik>)V;C;l%6PfOCOBOglbT6IARGq
zITpe$js=>E!!%AZ0{r6O&HKI)Beu!}ID_ZJiap4bbkqr;SR5SJd*Ub-;@(Llz$$@K
zCN7*t0+tde9`>W%8t4!Kuls+F^*JOkB97t`I@&RiLLkWh03eXk??{Nfhn=Cfho_IY
zt*0j-$%CRlPgo)x!O5u{+-3Z2l79I=csmbo2ZCF{P!|RohCn31j{`Vd%2o-L{129F
B$in~t

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index 799f636505b3e..473a196288e32 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -374,7 +374,7 @@ The "Output" is defined as what gets written out to the external storage. The ou
 
   - *Append Mode* - Only the new rows appended in the Result Table since the last trigger will be written to the external storage. This is applicable only on the queries where existing rows in the Result Table are not expected to change.
   
-  - *Update Mode* - Only the rows that were updated in the Result Table since the last trigger will be written to the external storage (not available yet in Spark 2.0). Note that this is different from the Complete Mode in that this mode does not output the rows that are not changed.
+  - *Update Mode* - Only the rows that were updated in the Result Table since the last trigger will be written to the external storage (available since Spark 2.1.1). Note that this is different from the Complete Mode in that this mode only outputs the rows that have changed since the last trigger.
 
 Note that each mode is applicable on certain types of queries. This is discussed in detail [later](#output-modes).
 
@@ -424,7 +424,7 @@ Streaming DataFrames can be created through the `DataStreamReader` interface
 ([Scala](api/scala/index.html#org.apache.spark.sql.streaming.DataStreamReader)/[Java](api/java/org/apache/spark/sql/streaming/DataStreamReader.html)/[Python](api/python/pyspark.sql.html#pyspark.sql.streaming.DataStreamReader) docs)
 returned by `SparkSession.readStream()`. Similar to the read interface for creating static DataFrame, you can specify the details of the source – data format, schema, options, etc.
 
-#### Data Sources
+#### Input Sources
 In Spark 2.0, there are a few built-in sources.
 
   - **File source** - Reads files written in a directory as a stream of data. Supported file formats are text, csv, json, parquet. See the docs of the DataStreamReader interface for a more up-to-date list, and supported options for each file format. Note that the files must be atomically placed in the given directory, which in most file systems, can be achieved by file move operations.
@@ -433,6 +433,54 @@ In Spark 2.0, there are a few built-in sources.
 
   - **Socket source (for testing)** - Reads UTF8 text data from a socket connection. The listening server socket is at the driver. Note that this should be used only for testing as this does not provide end-to-end fault-tolerance guarantees. 
 
+Some sources are not fault-tolerant because they do not guarantee that data can be replayed using 
+checkpointed offsets after a failure. See the earlier section on 
+[fault-tolerance semantics](#fault-tolerance-semantics).
+Here are the details of all the sources in Spark.
+
+<table class="table">
+  <tr>
+    <th>Source</th>
+    <th>Options</th>
+    <th>Fault-tolerant</th>
+    <th>Notes</th>
+  </tr>
+  <tr>
+    <td><b>File source</b></td>
+    <td>
+        <code>path</code>: path to the input directory, and common to all file formats.
+        <br/><br/>
+        For file-format-specific options, see the related methods in <code>DataStreamReader</code>
+        (<a href="api/scala/index.html#org.apache.spark.sql.streaming.DataStreamReader">Scala</a>/<a href="api/java/org/apache/spark/sql/streaming/DataStreamReader.html">Java</a>/<a href="api/python/pyspark.sql.html#pyspark.sql.streaming.DataStreamReader">Python</a>).
+        E.g. for "parquet" format options see <code>DataStreamReader.parquet()</code></td>
+    <td>Yes</td>
+    <td>Supports glob paths, but does not support multiple comma-separated paths/globs.</td>
+  </tr>
+  <tr>
+    <td><b>Socket Source</b></td>
+    <td>
+        <code>host</code>: host to connect to, must be specified<br/>
+        <code>port</code>: port to connect to, must be specified
+    </td>
+    <td>No</td>
+    <td></td>
+  </tr>
+  <tr>
+    <td><b>Kafka Source</b></td>
+    <td>
+        See the <a href="structured-streaming-kafka-integration.html">Kafka Integration Guide</a>.
+    </td>
+    <td>Yes</td>
+    <td></td>
+  </tr>
+  <tr>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td></td>
+  </tr>
+</table>
+
 Here are some examples.
 
 <div class="codetabs">
@@ -753,34 +801,47 @@ windowedCounts = words
 
 In this example, we are defining the watermark of the query on the value of the column "timestamp", 
 and also defining "10 minutes" as the threshold of how late is the data allowed to be. If this query 
-is run in Append output mode (discussed later in [Output Modes](#output-modes) section), 
-the engine will track the current event time from the column "timestamp" and wait for additional
-"10 minutes" in event time before finalizing the windowed counts and adding them to the Result Table.
+is run in Update output mode (discussed later in [Output Modes](#output-modes) section), 
+the engine will keep updating counts of a window in the Resule Table until the window is older 
+than the watermark, which lags behind the current event time in column "timestamp" by 10 minutes.
 Here is an illustration. 
 
-![Watermarking in Append Mode](img/structured-streaming-watermark.png)
+![Watermarking in Update Mode](img/structured-streaming-watermark-update-mode.png)
 
 As shown in the illustration, the maximum event time tracked by the engine is the 
 *blue dashed line*, and the watermark set as `(max event time - '10 mins')`
 at the beginning of every trigger is the red line  For example, when the engine observes the data 
 `(12:14, dog)`, it sets the watermark for the next trigger as `12:04`.
-For the window `12:00 - 12:10`, the partial counts are maintained as internal state while the system
-is waiting for late data. After the system finds data (i.e. `(12:21, owl)`) such that the 
-watermark exceeds 12:10, the partial count is finalized and appended to the table. This count will
-not change any further as all "too-late" data older than 12:10 will be ignored.  
-
-Note that in Append output mode, the system has to wait for "late threshold" time 
-before it can output the aggregation of a window. This may not be ideal if data can be very late, 
-(say 1 day) and you like to have partial counts without waiting for a day. In future, we will add
-Update output mode which would allows every update to aggregates to be written to sink every trigger. 
+This watermark lets the engine maintain intermediate state for additional 10 minutes to allow late
+data to be counted. For example, the data `(12:09, cat)` is out of order and late, and it falls in
+windows `12:05 - 12:15` and `12:10 - 12:20`. Since, it is still ahead of the watermark `12:04` in 
+the trigger, the engine still maintains the intermediate counts as state and correctly updates the 
+counts of the related windows. However, when the watermark is updated to 12:11, the intermediate 
+state for window `(12:00 - 12:10)` is cleared, and all subsequent data (e.g. `(12:04, donkey)`) 
+is considered "too late" and therefore ignored. Note that after every trigger, 
+the updated counts (i.e. purple rows) are written to sink as the trigger output, as dictated by 
+the Update mode.
+
+Some sinks (e.g. files) may not supported fine-grained updates that Update Mode requires. To work
+with them, we have also support Append Mode, where only the *final counts* are written to sink.
+This is illustrated below.
+
+![Watermarking in Append Mode](img/structured-streaming-watermark-append-mode.png)
+
+Similar to the Update Mode earlier, the engine maintains intermediate counts for each window. 
+However, the partial counts are not updated to the Result Table and not written to sink. The engine
+waits for "10 mins" for late date to be counted, 
+then drops intermediate state of a window < watermark, and appends the final
+counts to the Result Table/sink. For example, the final counts of window `12:00 - 12:10` is 
+appended to the Result Table only after the watermark is updated to `12:11`. 
 
 **Conditions for watermarking to clean aggregation state**
 It is important to note that the following conditions must be satisfied for the watermarking to 
-clean the state in aggregation queries *(as of Spark 2.1, subject to change in the future)*.
+clean the state in aggregation queries *(as of Spark 2.1.1, subject to change in the future)*.
 
-- **Output mode must be Append.** Complete mode requires all aggregate data to be preserved, and hence 
-cannot use watermarking to drop intermediate state. See the [Output Modes](#output-modes) section 
-for detailed explanation of the semantics of each output mode.
+- **Output mode must be Append or Update.** Complete mode requires all aggregate data to be preserved, 
+and hence cannot use watermarking to drop intermediate state. See the [Output Modes](#output-modes) 
+section for detailed explanation of the semantics of each output mode.
 
 - The aggregation must have either the event-time column, or a `window` on the event-time column. 
 
@@ -835,8 +896,9 @@ streamingDf.join(staticDf, "type", "right_join")  # right outer join with a stat
 </div>
 
 ### Unsupported Operations
-However, note that all of the operations applicable on static DataFrames/Datasets are not supported in streaming DataFrames/Datasets yet. While some of these unsupported operations will be supported in future releases of Spark, there are others which are fundamentally hard to implement on streaming data efficiently. For example, sorting is not supported on the input streaming Dataset, as it requires keeping track of all the data received in the stream. This is therefore fundamentally hard to execute efficiently. As of Spark 2.0, some of the unsupported operations are as follows
-
+There are a few DataFrame/Dataset operations that are not supported with streaming DataFrames/Datasets. 
+Some of them are as follows.
+ 
 - Multiple streaming aggregations (i.e. a chain of aggregations on a streaming DF) are not yet supported on streaming Datasets.
 
 - Limit and take first N rows are not supported on streaming Datasets.
@@ -863,7 +925,12 @@ In addition, there are some Dataset methods that will not work on streaming Data
 
 - `show()` - Instead use the console sink (see next section).
 
-If you try any of these operations, you will see an AnalysisException like "operation XYZ is not supported with streaming DataFrames/Datasets".
+If you try any of these operations, you will see an `AnalysisException` like "operation XYZ is not supported with streaming DataFrames/Datasets".
+While some of them may be supported in future releases of Spark, 
+there are others which are fundamentally hard to implement on streaming data efficiently. 
+For example, sorting on the input stream is not supported, as it requires keeping 
+track of all the data received in the stream. This is therefore fundamentally hard to execute 
+efficiently.
 
 ## Starting Streaming Queries
 Once you have defined the final result DataFrame/Dataset, all that is left is for you start the streaming computation. To do that, you have to use the `DataStreamWriter`
@@ -894,11 +961,11 @@ fault-tolerant sink). For example, queries with only `select`,
 - **Complete mode** - The whole Result Table will be outputted to the sink after every trigger.
  This is supported for aggregation queries.
 
-- **Update mode** - (*not available in Spark 2.1*) Only the rows in the Result Table that were 
+- **Update mode** - (*Available since Spark 2.1.1*) Only the rows in the Result Table that were 
 updated since the last trigger will be outputted to the sink. 
 More information to be added in future releases.
 
-Different types of streaming queries support different output modes. 
+Different types of streaming queries support different output modes.
 Here is the compatibility matrix.
 
 <table class="table">
@@ -909,36 +976,38 @@ Here is the compatibility matrix.
     <th>Notes</th>        
   </tr>
   <tr>
-    <td colspan="2" valign="middle"><br/>Queries without aggregation</td>
-    <td>Append</td>
-    <td>
-        Complete mode note supported as it is infeasible to keep all data in the Result Table.
+    <td colspan="2" style="vertical-align: middle;">Queries without aggregation</td>
+    <td style="vertical-align: middle;">Append</td>
+    <td style="vertical-align: middle;">
+        Complete mode not supported as it is infeasible to keep all data in the Result Table.
     </td>
   </tr>
   <tr>
-    <td rowspan="2">Queries with aggregation</td>
-    <td>Aggregation on event-time with watermark</td>
-    <td>Append, Complete</td>
+    <td rowspan="2" style="vertical-align: middle;">Queries with aggregation</td>
+    <td style="vertical-align: middle;">Aggregation on event-time with watermark</td>
+    <td style="vertical-align: middle;">Append, Update, Complete</td>
     <td>
         Append mode uses watermark to drop old aggregation state. But the output of a 
         windowed aggregation is delayed the late threshold specified in `withWatermark()` as by
         the modes semantics, rows can be added to the Result Table only once after they are 
-        finalized (i.e. after watermark is crossed). See 
-        <a href="#handling-late-data">Late Data</a> section for more details.
+        finalized (i.e. after watermark is crossed). See the
+        <a href="#handling-late-data-and-watermarking">Late Data</a> section for more details.
+        <br/><br/>
+        Update mode uses watermark to drop old aggregation state.
         <br/><br/>
         Complete mode does drop not old aggregation state since by definition this mode
         preserves all data in the Result Table.
     </td>    
   </tr>
   <tr>
-    <td>Other aggregations</td>
-    <td>Complete</td>
+    <td style="vertical-align: middle;">Other aggregations</td>
+    <td style="vertical-align: middle;">Complete, Update</td>
     <td>
+        Since no watermark is defined (only defined in other category), 
+        old aggregation state is not dropped.
+        <br/><br/>
         Append mode is not supported as aggregates can update thus violating the semantics of 
         this mode.
-        <br/><br/>
-        Complete mode does drop not old aggregation state since by definition this mode
-        preserves all data in the Result Table.
     </td>  
   </tr>
   <tr>
@@ -954,49 +1023,94 @@ There are a few types of built-in output sinks.
 
 - **File sink** - Stores the output to a directory. 
 
+{% highlight scala %}
+writeStream
+    .format("parquet")        // can be "orc", "json", "csv", etc.
+    .option("path", "path/to/destination/dir")
+    .start()
+{% endhighlight %}
+
 - **Foreach sink** - Runs arbitrary computation on the records in the output. See later in the section for more details.
 
+{% highlight scala %}
+writeStream
+    .foreach(...)
+    .start()
+{% endhighlight %}
+
 - **Console sink (for debugging)** - Prints the output to the console/stdout every time there is a trigger. Both, Append and Complete output modes, are supported. This should be used for debugging purposes on low data volumes as the entire output is collected and stored in the driver's memory after every trigger.
 
-- **Memory sink (for debugging)** - The output is stored in memory as an in-memory table.  Both, Append and Complete output modes, are supported. This should be used for debugging purposes on low data volumes as the entire output is collected and stored in the driver's memory after every trigger.
+{% highlight scala %}
+writeStream
+    .format("console")
+    .start()
+{% endhighlight %}
+
+- **Memory sink (for debugging)** - The output is stored in memory as an in-memory table.
+Both, Append and Complete output modes, are supported. This should be used for debugging purposes
+on low data volumes as the entire output is collected and stored in the driver's memory.
+Hence, use it with caution.
+
+{% highlight scala %}
+writeStream
+    .format("memory")
+    .queryName("tableName")
+    .start()
+{% endhighlight %}
 
-Here is a table of all the sinks, and the corresponding settings.
+Some sinks are not fault-tolerant because they do not guarantee persistence of the output and are 
+meant for debugging purposes only. See the earlier section on 
+[fault-tolerance semantics](#fault-tolerance-semantics). 
+Here are the details of all the sinks in Spark.
 
 <table class="table">
   <tr>
     <th>Sink</th>
     <th>Supported Output Modes</th>
-    <th style="width:30%">Usage</th>
+    <th>Options</th>
     <th>Fault-tolerant</th>
     <th>Notes</th>
   </tr>
   <tr>
     <td><b>File Sink</b></td>
     <td>Append</td>
-    <td><pre>writeStream<br/>  .format("parquet")<br/>  .option(<br/>    "checkpointLocation",<br/>    "path/to/checkpoint/dir")<br/>  .option(<br/>    "path",<br/>    "path/to/destination/dir")<br/>  .start()</pre></td>
+    <td>
+        <code>path</code>: path to the output directory, must be specified.
+        <code>maxFilesPerTrigger</code>: maximum number of new files to be considered in every trigger (default: no max)
+        <br/>
+        <code>latestFirst</code>: whether to processs the latest new files first, useful when there is a large backlog of files(default: false)
+        <br/><br/>
+        For file-format-specific options, see the related methods in DataFrameWriter
+        (<a href="api/scala/index.html#org.apache.spark.sql.DataFrameWriter">Scala</a>/<a href="api/java/org/apache/spark/sql/DataFrameWriter.html">Java</a>/<a href="api/python/pyspark.sql.html#pyspark.sql.DataFrameWriter">Python</a>).
+        E.g. for "parquet" format options see <code>DataFrameWriter.parquet()</code>
+    </td>
     <td>Yes</td>
     <td>Supports writes to partitioned tables. Partitioning by time may be useful.</td>
   </tr>
   <tr>
     <td><b>Foreach Sink</b></td>
-    <td>All modes</td>
-    <td><pre>writeStream<br/>  .foreach(...)<br/>  .start()</pre></td>
+    <td>Append, Update, Compelete</td>
+    <td>None</td>
     <td>Depends on ForeachWriter implementation</td>
     <td>More details in the <a href="#using-foreach">next section</a></td>
   </tr>
   <tr>
     <td><b>Console Sink</b></td>
-    <td>Append, Complete</td>
-    <td><pre>writeStream<br/>  .format("console")<br/>  .start()</pre></td>
+    <td>Append, Update, Complete</td>
+    <td>
+        <code>numRows</code>: Number of rows to print every trigger (default: 20)
+        <br/>
+        <code>truncate</code>: Whether to truncate the output if too long (default: true)
+    </td>
     <td>No</td>
     <td></td>
   </tr>
   <tr>
     <td><b>Memory Sink</b></td>
     <td>Append, Complete</td>
-    <td><pre>writeStream<br/>  .format("memory")<br/>  .queryName("table")<br/>  .start()</pre></td>
-    <td>No</td>
-    <td>Saves the output data as a table, for interactive querying. Table name is the query name.</td>
+    <td>None</td>
+    <td>No. But in Complete Mode, restarted query will recreate the full table.</td>
+    <td>Table name is the query name.</td>
   </tr>
   <tr>
     <td></td>
@@ -1007,7 +1121,7 @@ Here is a table of all the sinks, and the corresponding settings.
   </tr>
 </table>
 
-Finally, you have to call `start()` to actually start the execution of the query. This returns a StreamingQuery object which is a handle to the continuously running execution. You can use this object to manage the query, which we will discuss in the next subsection. For now, let’s understand all this with a few examples.
+Note that you have to call `start()` to actually start the execution of the query. This returns a StreamingQuery object which is a handle to the continuously running execution. You can use this object to manage the query, which we will discuss in the next subsection. For now, let’s understand all this with a few examples.
 
 
 <div class="codetabs">
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index 0ce47b152c59b..bf25b4845f609 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -115,7 +115,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
   }
 
   /**
-   * Specifies the underlying output data source. Built-in options include "parquet" for now.
+   * Specifies the underlying output data source.
    *
    * @since 2.0.0
    */
@@ -137,9 +137,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
    * predicates on the partitioned columns. In order for partitioning to work well, the number
    * of distinct values in each column should typically be less than tens of thousands.
    *
-   * This was initially applicable for Parquet but in 1.5+ covers JSON, text, ORC and avro as well.
-   *
-   * @since 1.4.0
+   * @since 2.0.0
    */
   @scala.annotation.varargs
   def partitionBy(colNames: String*): DataStreamWriter[T] = {

From 86b66216de411f8cbc79ede62b353f7cbb550903 Mon Sep 17 00:00:00 2001
From: "wm624@hotmail.com" <wm624@hotmail.com>
Date: Sat, 7 Jan 2017 11:07:49 -0800
Subject: [PATCH 0426/1204] [SPARK-19110][ML][MLLIB] DistributedLDAModel
 returns different logPrior for original and loaded model

## What changes were proposed in this pull request?

While adding DistributedLDAModel training summary for SparkR, I found that the logPrior for original and loaded model is different.
For example, in the test("read/write DistributedLDAModel"), I add the test:
val logPrior = model.asInstanceOf[DistributedLDAModel].logPrior
val logPrior2 = model2.asInstanceOf[DistributedLDAModel].logPrior
assert(logPrior === logPrior2)
The test fails:
-4.394180878889078 did not equal -4.294290536919573

The reason is that `graph.vertices.aggregate(0.0)(seqOp, _ + _)` only returns the value of a single vertex instead of the aggregation of all vertices. Therefore, when the loaded model does the aggregation in a different order, it returns different `logPrior`.

Please refer to #16464 for details.
## How was this patch tested?
Add a new unit test for testing logPrior.

Author: wm624@hotmail.com <wm624@hotmail.com>

Closes #16491 from wangmiao1981/ldabug.

(cherry picked from commit 036b50347c56a3541c526b1270093163b9b79e45)
Signed-off-by: Joseph K. Bradley <joseph@databricks.com>
---
 .../org/apache/spark/mllib/clustering/LDAModel.scala      | 4 ++--
 .../scala/org/apache/spark/ml/clustering/LDASuite.scala   | 8 ++++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
index 25ffd8561fe37..b55f1b1db227a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
@@ -745,12 +745,12 @@ class DistributedLDAModel private[clustering] (
           val N_wk = vertex._2
           val smoothed_N_wk: TopicCounts = N_wk + (eta - 1.0)
           val phi_wk: TopicCounts = smoothed_N_wk :/ smoothed_N_k
-          (eta - 1.0) * sum(phi_wk.map(math.log))
+          sumPrior + (eta - 1.0) * sum(phi_wk.map(math.log))
         } else {
           val N_kj = vertex._2
           val smoothed_N_kj: TopicCounts = N_kj + (alpha - 1.0)
           val theta_kj: TopicCounts = normalize(smoothed_N_kj, 1.0)
-          (alpha - 1.0) * sum(theta_kj.map(math.log))
+          sumPrior + (alpha - 1.0) * sum(theta_kj.map(math.log))
         }
     }
     graph.vertices.aggregate(0.0)(seqOp, _ + _)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala
index 3f39deddf20b4..9aa11fbdbe868 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala
@@ -260,6 +260,14 @@ class LDASuite extends SparkFunSuite with MLlibTestSparkContext with DefaultRead
         Vectors.dense(model2.topicsMatrix.toArray) absTol 1e-6)
       assert(Vectors.dense(model.getDocConcentration) ~==
         Vectors.dense(model2.getDocConcentration) absTol 1e-6)
+      val logPrior = model.asInstanceOf[DistributedLDAModel].logPrior
+      val logPrior2 = model2.asInstanceOf[DistributedLDAModel].logPrior
+      val trainingLogLikelihood =
+        model.asInstanceOf[DistributedLDAModel].trainingLogLikelihood
+      val trainingLogLikelihood2 =
+        model2.asInstanceOf[DistributedLDAModel].trainingLogLikelihood
+      assert(logPrior ~== logPrior2 absTol 1e-6)
+      assert(trainingLogLikelihood ~== trainingLogLikelihood2 absTol 1e-6)
     }
     val lda = new LDA()
     testEstimatorAndModelReadWrite(lda, dataset,

From c95b58557dec2f4708d5efd9314edd80e0975fc8 Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Sat, 7 Jan 2017 19:15:51 +0000
Subject: [PATCH 0427/1204] [SPARK-19106][DOCS] Styling for the configuration
 docs is broken

configuration.html section headings were not specified correctly in markdown and weren't rendering, being recognized correctly. Removed extra p tags and pulled level 4 titles up to level 3, since level 3 had been skipped. This improves the TOC.

Doc build, manual check.

Author: Sean Owen <sowen@cloudera.com>

Closes #16490 from srowen/SPARK-19106.

(cherry picked from commit 54138f6e89abfc17101b4f2812715784a2b98331)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/configuration.md | 78 ++++++++++++++++++++++++++-----------------
 1 file changed, 47 insertions(+), 31 deletions(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index 9c325b653e52d..7c51e13266563 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -59,6 +59,7 @@ The following format is accepted:
     1p or 1pb (pebibytes = 1024 tebibytes)
 
 ## Dynamically Loading Spark Properties
+
 In some cases, you may want to avoid hard-coding certain configurations in a `SparkConf`. For
 instance, if you'd like to run the same application with different masters or different
 amounts of memory. Spark allows you to simply create an empty conf:
@@ -106,7 +107,8 @@ line will appear. For all other configuration properties, you can assume the def
 Most of the properties that control internal settings have reasonable default values. Some
 of the most common options to set are:
 
-#### Application Properties
+### Application Properties
+
 <table class="table">
 <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
 <tr>
@@ -206,7 +208,8 @@ of the most common options to set are:
 
 Apart from these, the following properties are also available, and may be useful in some situations:
 
-#### Runtime Environment
+### Runtime Environment
+
 <table class="table">
 <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
 <tr>
@@ -453,7 +456,8 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 </table>
 
-#### Shuffle Behavior
+### Shuffle Behavior
+
 <table class="table">
 <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
 <tr>
@@ -594,7 +598,8 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 </table>
 
-#### Spark UI
+### Spark UI
+
 <table class="table">
 <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
 <tr>
@@ -718,7 +723,8 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 </table>
 
-#### Compression and Serialization
+### Compression and Serialization
+
 <table class="table">
 <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
 <tr>
@@ -864,7 +870,8 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 </table>
 
-#### Memory Management
+### Memory Management
+
 <table class="table">
 <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
 <tr>
@@ -954,7 +961,8 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 </table>
 
-#### Execution Behavior
+### Execution Behavior
+
 <table class="table">
 <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
 <tr>
@@ -1081,7 +1089,8 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 </table>
 
-#### Networking
+### Networking
+
 <table class="table">
 <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
 <tr>
@@ -1112,13 +1121,13 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.driver.bindAddress</code></td>
   <td>(value of spark.driver.host)</td>
   <td>
-    <p>Hostname or IP address where to bind listening sockets. This config overrides the SPARK_LOCAL_IP
-    environment variable (see below).</p>
+    Hostname or IP address where to bind listening sockets. This config overrides the SPARK_LOCAL_IP
+    environment variable (see below).
 
-    <p>It also allows a different address from the local one to be advertised to executors or external systems.
+    <br />It also allows a different address from the local one to be advertised to executors or external systems.
     This is useful, for example, when running containers with bridged networking. For this to properly work,
     the different ports used by the driver (RPC, block manager and UI) need to be forwarded from the
-    container's host.</p>
+    container's host.
   </td>
 </tr>
 <tr>
@@ -1190,7 +1199,8 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 </table>
 
-#### Scheduling
+### Scheduling
+
 <table class="table">
 <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
 <tr>
@@ -1410,7 +1420,8 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 </table>
 
-#### Dynamic Allocation
+### Dynamic Allocation
+
 <table class="table">
 <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
 <tr>
@@ -1491,7 +1502,8 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 </table>
 
-#### Security
+### Security
+
 <table class="table">
 <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
 <tr>
@@ -1647,7 +1659,7 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 </table>
 
-#### Encryption
+### Encryption
 
 <table class="table">
     <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
@@ -1655,21 +1667,21 @@ Apart from these, the following properties are also available, and may be useful
         <td><code>spark.ssl.enabled</code></td>
         <td>false</td>
         <td>
-            <p>Whether to enable SSL connections on all supported protocols.</p>
+            Whether to enable SSL connections on all supported protocols.
 
-            <p>When <code>spark.ssl.enabled</code> is configured, <code>spark.ssl.protocol</code>
-            is required.</p>
+            <br />When <code>spark.ssl.enabled</code> is configured, <code>spark.ssl.protocol</code>
+            is required.
 
-            <p>All the SSL settings like <code>spark.ssl.xxx</code> where <code>xxx</code> is a
+            <br />All the SSL settings like <code>spark.ssl.xxx</code> where <code>xxx</code> is a
             particular configuration property, denote the global configuration for all the supported
             protocols. In order to override the global configuration for the particular protocol,
-            the properties must be overwritten in the protocol-specific namespace.</p>
+            the properties must be overwritten in the protocol-specific namespace.
 
-            <p>Use <code>spark.ssl.YYY.XXX</code> settings to overwrite the global configuration for
+            <br />Use <code>spark.ssl.YYY.XXX</code> settings to overwrite the global configuration for
             particular protocol denoted by <code>YYY</code>. Example values for <code>YYY</code>
             include <code>fs</code>, <code>ui</code>, <code>standalone</code>, and
             <code>historyServer</code>.  See <a href="security.html#ssl-configuration">SSL
-            Configuration</a> for details on hierarchical SSL configuration for services.</p>
+            Configuration</a> for details on hierarchical SSL configuration for services.
         </td>
     </tr>
     <tr>
@@ -1753,7 +1765,8 @@ Apart from these, the following properties are also available, and may be useful
 </table>
 
 
-#### Spark SQL
+### Spark SQL
+
 Running the <code>SET -v</code> command will show the entire list of the SQL configuration.
 
 <div class="codetabs">
@@ -1795,7 +1808,8 @@ showDF(properties, numRows = 200, truncate = FALSE)
 </div>
 
 
-#### Spark Streaming
+### Spark Streaming
+
 <table class="table">
 <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
 <tr>
@@ -1916,7 +1930,8 @@ showDF(properties, numRows = 200, truncate = FALSE)
 </tr>
 </table>
 
-#### SparkR
+### SparkR
+
 <table class="table">
 <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
 <tr>
@@ -1965,7 +1980,7 @@ showDF(properties, numRows = 200, truncate = FALSE)
 
 </table>
 
-#### Deploy
+### Deploy
 
 <table class="table">
   <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
@@ -1988,15 +2003,16 @@ showDF(properties, numRows = 200, truncate = FALSE)
 </table>
 
 
-#### Cluster Managers
+### Cluster Managers
+
 Each cluster manager in Spark has additional configuration options. Configurations
 can be found on the pages for each mode:
 
-##### [YARN](running-on-yarn.html#configuration)
+#### [YARN](running-on-yarn.html#configuration)
 
-##### [Mesos](running-on-mesos.html#configuration)
+#### [Mesos](running-on-mesos.html#configuration)
 
-##### [Standalone Mode](spark-standalone.html#cluster-launch-scripts)
+#### [Standalone Mode](spark-standalone.html#cluster-launch-scripts)
 
 # Environment Variables
 

From ecc16220d2d9eace81de44c4b0aff1c364a35e3f Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Sat, 7 Jan 2017 18:55:01 -0800
Subject: [PATCH 0428/1204] [SPARK-18941][SQL][DOC] Add a new behavior document
 on `CREATE/DROP TABLE` with `LOCATION`

## What changes were proposed in this pull request?

This PR adds a new behavior change description on `CREATE TABLE ... LOCATION` at `sql-programming-guide.md` clearly under `Upgrading From Spark SQL 1.6 to 2.0`. This change is introduced at Apache Spark 2.0.0 as [SPARK-15276](https://issues.apache.org/jira/browse/SPARK-15276).

## How was this patch tested?

```
SKIP_API=1 jekyll build
```

**Newly Added Description**
<img width="913" alt="new" src="https://cloud.githubusercontent.com/assets/9700541/21743606/7efe2b12-d4ba-11e6-8a0d-551222718ea2.png">

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #16400 from dongjoon-hyun/SPARK-18941.

(cherry picked from commit 923e594844a7ad406195b91877f0fb374d5a454b)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 docs/sql-programming-guide.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 58de0e1318d55..fb3c6a7d43b4c 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -1358,6 +1358,14 @@ options.
  - Dataset and DataFrame API `explode` has been deprecated, alternatively, use `functions.explode()` with `select` or `flatMap`
  - Dataset and DataFrame API `registerTempTable` has been deprecated and replaced by `createOrReplaceTempView`
 
+ - Changes to `CREATE TABLE ... LOCATION` behavior for Hive tables.
+    - From Spark 2.0, `CREATE TABLE ... LOCATION` is equivalent to `CREATE EXTERNAL TABLE ... LOCATION`
+      in order to prevent accidental dropping the existing data in the user-provided locations.
+      That means, a Hive table created in Spark SQL with the user-specified location is always a Hive external table.
+      Dropping external tables will not remove the data. Users are not allowed to specify the location for Hive managed tables.
+      Note that this is different from the Hive behavior.
+    - As a result, `DROP TABLE` statements on those tables will not remove the data.
+
 ## Upgrading From Spark SQL 1.5 to 1.6
 
  - From Spark 1.6, by default the Thrift server runs in multi-session mode. Which means each JDBC/ODBC

From 8690d4bd150579e546aec7866b16a77bad1017f5 Mon Sep 17 00:00:00 2001
From: anabranch <wac.chambers@gmail.com>
Date: Sun, 8 Jan 2017 17:53:53 -0800
Subject: [PATCH 0429/1204] [SPARK-19127][DOCS] Update Rank Function
 Documentation

## What changes were proposed in this pull request?

- [X] Fix inconsistencies in function reference for dense rank and dense
- [X] Make all languages equivalent in their reference to `dense_rank` and `rank`.

## How was this patch tested?

N/A for docs.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: anabranch <wac.chambers@gmail.com>

Closes #16505 from anabranch/SPARK-19127.

(cherry picked from commit 1f6ded6455d07ec8828fc9662ddffe55cbba4238)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 R/pkg/R/functions.R                              | 10 ++++++----
 python/pyspark/sql/functions.py                  | 16 ++++++++++------
 .../scala/org/apache/spark/sql/functions.scala   | 16 ++++++++++------
 3 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index bf5c96373c632..6ffa0f5481c65 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -3150,7 +3150,8 @@ setMethod("cume_dist",
 #' The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking
 #' sequence when there are ties. That is, if you were ranking a competition using dense_rank
 #' and had three people tie for second place, you would say that all three were in second
-#' place and that the next person came in third.
+#' place and that the next person came in third. Rank would give me sequential numbers, making
+#' the person that came in third place (after the ties) would register as coming in fifth.
 #'
 #' This is equivalent to the \code{DENSE_RANK} function in SQL.
 #'
@@ -3321,10 +3322,11 @@ setMethod("percent_rank",
 #'
 #' Window function: returns the rank of rows within a window partition.
 #'
-#' The difference between rank and denseRank is that denseRank leaves no gaps in ranking
-#' sequence when there are ties. That is, if you were ranking a competition using denseRank
+#' The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking
+#' sequence when there are ties. That is, if you were ranking a competition using dense_rank
 #' and had three people tie for second place, you would say that all three were in second
-#' place and that the next person came in third.
+#' place and that the next person came in third. Rank would give me sequential numbers, making
+#' the person that came in third place (after the ties) would register as coming in fifth.
 #'
 #' This is equivalent to the RANK function in SQL.
 #'
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index d8abafcde3846..7fe901a4fbbaf 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -157,17 +157,21 @@ def _():
     'dense_rank':
         """returns the rank of rows within a window partition, without any gaps.
 
-        The difference between rank and denseRank is that denseRank leaves no gaps in ranking
-        sequence when there are ties. That is, if you were ranking a competition using denseRank
+        The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking
+        sequence when there are ties. That is, if you were ranking a competition using dense_rank
         and had three people tie for second place, you would say that all three were in second
-        place and that the next person came in third.""",
+        place and that the next person came in third. Rank would give me sequential numbers, making
+        the person that came in third place (after the ties) would register as coming in fifth.
+
+        This is equivalent to the DENSE_RANK function in SQL.""",
     'rank':
         """returns the rank of rows within a window partition.
 
-        The difference between rank and denseRank is that denseRank leaves no gaps in ranking
-        sequence when there are ties. That is, if you were ranking a competition using denseRank
+        The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking
+        sequence when there are ties. That is, if you were ranking a competition using dense_rank
         and had three people tie for second place, you would say that all three were in second
-        place and that the next person came in third.
+        place and that the next person came in third. Rank would give me sequential numbers, making
+        the person that came in third place (after the ties) would register as coming in fifth.
 
         This is equivalent to the RANK function in SQL.""",
     'cume_dist':
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 650439a193015..9a080fd3c97c1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -785,10 +785,13 @@ object functions {
   /**
    * Window function: returns the rank of rows within a window partition, without any gaps.
    *
-   * The difference between rank and denseRank is that denseRank leaves no gaps in ranking
-   * sequence when there are ties. That is, if you were ranking a competition using denseRank
+   * The difference between rank and dense_rank is that denseRank leaves no gaps in ranking
+   * sequence when there are ties. That is, if you were ranking a competition using dense_rank
    * and had three people tie for second place, you would say that all three were in second
-   * place and that the next person came in third.
+   * place and that the next person came in third. Rank would give me sequential numbers, making
+   * the person that came in third place (after the ties) would register as coming in fifth.
+   *
+   * This is equivalent to the DENSE_RANK function in SQL.
    *
    * @group window_funcs
    * @since 1.6.0
@@ -929,10 +932,11 @@ object functions {
   /**
    * Window function: returns the rank of rows within a window partition.
    *
-   * The difference between rank and denseRank is that denseRank leaves no gaps in ranking
-   * sequence when there are ties. That is, if you were ranking a competition using denseRank
+   * The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking
+   * sequence when there are ties. That is, if you were ranking a competition using dense_rank
    * and had three people tie for second place, you would say that all three were in second
-   * place and that the next person came in third.
+   * place and that the next person came in third. Rank would give me sequential numbers, making
+   * the person that came in third place (after the ties) would register as coming in fifth.
    *
    * This is equivalent to the RANK function in SQL.
    *

From 8779e6a4685f50a7062842f0d5a606c3a3b092d5 Mon Sep 17 00:00:00 2001
From: anabranch <wac.chambers@gmail.com>
Date: Sun, 8 Jan 2017 20:37:46 -0800
Subject: [PATCH 0430/1204] [SPARK-19126][DOCS] Update Join Documentation
 Across Languages

## What changes were proposed in this pull request?

- [X] Make sure all join types are clearly mentioned
- [X] Make join labeling/style consistent
- [X] Make join label ordering docs the same
- [X] Improve join documentation according to above for Scala
- [X] Improve join documentation according to above for Python
- [X] Improve join documentation according to above for R

## How was this patch tested?
No tests b/c docs.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: anabranch <wac.chambers@gmail.com>

Closes #16504 from anabranch/SPARK-19126.

(cherry picked from commit 19d9d4c855eab8f647a5ec66b079172de81221d0)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/R/DataFrame.R                           | 19 +++++++++++--------
 python/pyspark/sql/dataframe.py               |  5 +++--
 .../scala/org/apache/spark/sql/Dataset.scala  | 16 ++++++++++++----
 3 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 9a51d530f120a..058a77e4f8ca8 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2307,9 +2307,9 @@ setMethod("dropDuplicates",
 #' @param joinExpr (Optional) The expression used to perform the join. joinExpr must be a
 #' Column expression. If joinExpr is omitted, the default, inner join is attempted and an error is
 #' thrown if it would be a Cartesian Product. For Cartesian join, use crossJoin instead.
-#' @param joinType The type of join to perform. The following join types are available:
-#' 'inner', 'outer', 'full', 'fullouter', leftouter', 'left_outer', 'left',
-#' 'right_outer', 'rightouter', 'right', and 'leftsemi'. The default joinType is "inner".
+#' @param joinType The type of join to perform, default 'inner'.
+#' Must be one of: 'inner', 'cross', 'outer', 'full', 'full_outer',
+#' 'left', 'left_outer', 'right', 'right_outer', 'left_semi', or 'left_anti'.
 #' @return A SparkDataFrame containing the result of the join operation.
 #' @family SparkDataFrame functions
 #' @aliases join,SparkDataFrame,SparkDataFrame-method
@@ -2338,15 +2338,18 @@ setMethod("join",
               if (is.null(joinType)) {
                 sdf <- callJMethod(x@sdf, "join", y@sdf, joinExpr@jc)
               } else {
-                if (joinType %in% c("inner", "outer", "full", "fullouter",
-                    "leftouter", "left_outer", "left",
-                    "rightouter", "right_outer", "right", "leftsemi")) {
+                if (joinType %in% c("inner", "cross",
+                    "outer", "full", "fullouter", "full_outer",
+                    "left", "leftouter", "left_outer",
+                    "right", "rightouter", "right_outer",
+                    "left_semi", "leftsemi", "left_anti", "leftanti")) {
                   joinType <- gsub("_", "", joinType)
                   sdf <- callJMethod(x@sdf, "join", y@sdf, joinExpr@jc, joinType)
                 } else {
                   stop("joinType must be one of the following types: ",
-                      "'inner', 'outer', 'full', 'fullouter', 'leftouter', 'left_outer', 'left',
-                      'rightouter', 'right_outer', 'right', 'leftsemi'")
+                       "'inner', 'cross', 'outer', 'full', 'full_outer',",
+                       "'left', 'left_outer', 'right', 'right_outer',",
+                       "'left_semi', or 'left_anti'.")
                 }
               }
             }
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index b9d90384e3e2c..10e42d0f9d322 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -730,8 +730,9 @@ def join(self, other, on=None, how=None):
             a join expression (Column), or a list of Columns.
             If `on` is a string or a list of strings indicating the name of the join column(s),
             the column(s) must exist on both sides, and this performs an equi-join.
-        :param how: str, default 'inner'.
-            One of `inner`, `outer`, `left_outer`, `right_outer`, `leftsemi`.
+        :param how: str, default ``inner``. Must be one of: ``inner``, ``cross``, ``outer``,
+            ``full``, ``full_outer``, ``left``, ``left_outer``, ``right``, ``right_outer``,
+            ``left_semi``, and ``left_anti``.
 
         The following performs a full outer join between ``df1`` and ``df2``.
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 133f633212be7..a6bc99dcc158f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -749,14 +749,18 @@ class Dataset[T] private[sql](
   }
 
   /**
-   * Equi-join with another `DataFrame` using the given columns.
+   * Equi-join with another `DataFrame` using the given columns. A cross join with a predicate
+   * is specified as an inner join. If you would explicitly like to perform a cross join use the
+   * `crossJoin` method.
    *
    * Different from other join functions, the join columns will only appear once in the output,
    * i.e. similar to SQL's `JOIN USING` syntax.
    *
    * @param right Right side of the join operation.
    * @param usingColumns Names of the columns to join on. This columns must exist on both sides.
-   * @param joinType One of: `inner`, `outer`, `left_outer`, `right_outer`, `leftsemi`.
+   * @param joinType Type of join to perform. Default `inner`. Must be one of:
+   *                 `inner`, `cross`, `outer`, `full`, `full_outer`, `left`, `left_outer`,
+   *                 `right`, `right_outer`, `left_semi`, `left_anti`.
    *
    * @note If you perform a self-join using this function without aliasing the input
    * `DataFrame`s, you will NOT be able to reference any columns after the join, since
@@ -811,7 +815,9 @@ class Dataset[T] private[sql](
    *
    * @param right Right side of the join.
    * @param joinExprs Join expression.
-   * @param joinType One of: `inner`, `outer`, `left_outer`, `right_outer`, `leftsemi`.
+   * @param joinType Type of join to perform. Default `inner`. Must be one of:
+   *                 `inner`, `cross`, `outer`, `full`, `full_outer`, `left`, `left_outer`,
+   *                 `right`, `right_outer`, `left_semi`, `left_anti`.
    *
    * @group untypedrel
    * @since 2.0.0
@@ -888,7 +894,9 @@ class Dataset[T] private[sql](
    *
    * @param other Right side of the join.
    * @param condition Join expression.
-   * @param joinType One of: `inner`, `outer`, `left_outer`, `right_outer`, `leftsemi`.
+   * @param joinType Type of join to perform. Default `inner`. Must be one of:
+   *                 `inner`, `cross`, `outer`, `full`, `full_outer`, `left`, `left_outer`,
+   *                 `right`, `right_outer`, `left_semi`, `left_anti`.
    *
    * @group typedrel
    * @since 1.6.0

From 80a3e13e58036c2461c4b721cb298ffd13b7823f Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Sun, 8 Jan 2017 20:42:18 -0800
Subject: [PATCH 0431/1204] [SPARK-18903][SPARKR][BACKPORT-2.1] Add API to get
 SparkUI URL

## What changes were proposed in this pull request?

backport to 2.1

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #16507 from felixcheung/portsparkuir21.
---
 R/pkg/NAMESPACE                           |  1 +
 R/pkg/R/sparkR.R                          | 24 +++++++++++++++++++++++
 R/pkg/inst/tests/testthat/test_sparkSQL.R |  5 ++++-
 3 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 377f9429ae5c1..c3ec3f4fb1bae 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -16,6 +16,7 @@ export("sparkR.stop")
 export("sparkR.session.stop")
 export("sparkR.conf")
 export("sparkR.version")
+export("sparkR.uiWebUrl")
 export("print.jobj")
 
 export("sparkR.newJObject")
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index c57cc8f285613..870e76b7292fa 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -410,6 +410,30 @@ sparkR.session <- function(
   sparkSession
 }
 
+#' Get the URL of the SparkUI instance for the current active SparkSession
+#'
+#' Get the URL of the SparkUI instance for the current active SparkSession.
+#'
+#' @return the SparkUI URL, or NA if it is disabled, or not started.
+#' @rdname sparkR.uiWebUrl
+#' @name sparkR.uiWebUrl
+#' @export
+#' @examples
+#'\dontrun{
+#' sparkR.session()
+#' url <- sparkR.uiWebUrl()
+#' }
+#' @note sparkR.uiWebUrl since 2.1.1
+sparkR.uiWebUrl <- function() {
+  sc <- sparkR.callJMethod(getSparkContext(), "sc")
+  u <- callJMethod(sc, "uiWebUrl")
+  if (callJMethod(u, "isDefined")) {
+    callJMethod(u, "get")
+  } else {
+    NA
+  }
+}
+
 #' Assigns a group ID to all the jobs started by this thread until the group ID is set to a
 #' different value or cleared.
 #'
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 2e95737368897..4490f31cd83e1 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -2613,7 +2613,7 @@ test_that("randomSplit", {
   expect_true(all(sapply(abs(counts / num - weights / sum(weights)), function(e) { e < 0.05 })))
 })
 
-test_that("Setting and getting config on SparkSession", {
+test_that("Setting and getting config on SparkSession, sparkR.conf(), sparkR.uiWebUrl()", {
   # first, set it to a random but known value
   conf <- callJMethod(sparkSession, "conf")
   property <- paste0("spark.testing.", as.character(runif(1)))
@@ -2637,6 +2637,9 @@ test_that("Setting and getting config on SparkSession", {
   expect_equal(appNameValue, "sparkSession test")
   expect_equal(testValue, value)
   expect_error(sparkR.conf("completely.dummy"), "Config 'completely.dummy' is not set")
+
+  url <- sparkR.uiWebUrl()
+  expect_equal(substr(url, 1, 7), "http://")
 })
 
 test_that("enableHiveSupport on SparkSession", {

From 3b6ac323b16f8f6d79ee7bac6e7a57f841897d96 Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Mon, 9 Jan 2017 15:17:59 -0800
Subject: [PATCH 0432/1204] [SPARK-18952][BACKPORT] Regex strings not properly
 escaped in codegen for aggregations

## What changes were proposed in this pull request?

Backport for #16361 to 2.1 branch.

## How was this patch tested?

Unit tests

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #16518 from brkyvz/reg-break-2.1.
---
 .../aggregate/RowBasedHashMapGenerator.scala         | 12 +++++++-----
 .../aggregate/VectorizedHashMapGenerator.scala       | 12 +++++++-----
 .../apache/spark/sql/DataFrameAggregateSuite.scala   |  9 +++++++++
 3 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/RowBasedHashMapGenerator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/RowBasedHashMapGenerator.scala
index a77e178546ef8..1b6e6d2f6509a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/RowBasedHashMapGenerator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/RowBasedHashMapGenerator.scala
@@ -43,28 +43,30 @@ class RowBasedHashMapGenerator(
   extends HashMapGenerator (ctx, aggregateExpressions, generatedClassName,
     groupingKeySchema, bufferSchema) {
 
-  protected def initializeAggregateHashMap(): String = {
+  override protected def initializeAggregateHashMap(): String = {
     val generatedKeySchema: String =
       s"new org.apache.spark.sql.types.StructType()" +
         groupingKeySchema.map { key =>
+          val keyName = ctx.addReferenceObj(key.name)
           key.dataType match {
             case d: DecimalType =>
-              s""".add("${key.name}", org.apache.spark.sql.types.DataTypes.createDecimalType(
+              s""".add("$keyName", org.apache.spark.sql.types.DataTypes.createDecimalType(
                   |${d.precision}, ${d.scale}))""".stripMargin
             case _ =>
-              s""".add("${key.name}", org.apache.spark.sql.types.DataTypes.${key.dataType})"""
+              s""".add("$keyName", org.apache.spark.sql.types.DataTypes.${key.dataType})"""
           }
         }.mkString("\n").concat(";")
 
     val generatedValueSchema: String =
       s"new org.apache.spark.sql.types.StructType()" +
         bufferSchema.map { key =>
+          val keyName = ctx.addReferenceObj(key.name)
           key.dataType match {
             case d: DecimalType =>
-              s""".add("${key.name}", org.apache.spark.sql.types.DataTypes.createDecimalType(
+              s""".add("$keyName", org.apache.spark.sql.types.DataTypes.createDecimalType(
                   |${d.precision}, ${d.scale}))""".stripMargin
             case _ =>
-              s""".add("${key.name}", org.apache.spark.sql.types.DataTypes.${key.dataType})"""
+              s""".add("$keyName", org.apache.spark.sql.types.DataTypes.${key.dataType})"""
           }
         }.mkString("\n").concat(";")
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/VectorizedHashMapGenerator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/VectorizedHashMapGenerator.scala
index 7418df90b824f..586328a6efb92 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/VectorizedHashMapGenerator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/VectorizedHashMapGenerator.scala
@@ -48,28 +48,30 @@ class VectorizedHashMapGenerator(
   extends HashMapGenerator (ctx, aggregateExpressions, generatedClassName,
     groupingKeySchema, bufferSchema) {
 
-  protected def initializeAggregateHashMap(): String = {
+  override protected def initializeAggregateHashMap(): String = {
     val generatedSchema: String =
       s"new org.apache.spark.sql.types.StructType()" +
         (groupingKeySchema ++ bufferSchema).map { key =>
+          val keyName = ctx.addReferenceObj(key.name)
           key.dataType match {
             case d: DecimalType =>
-              s""".add("${key.name}", org.apache.spark.sql.types.DataTypes.createDecimalType(
+              s""".add("$keyName", org.apache.spark.sql.types.DataTypes.createDecimalType(
                   |${d.precision}, ${d.scale}))""".stripMargin
             case _ =>
-              s""".add("${key.name}", org.apache.spark.sql.types.DataTypes.${key.dataType})"""
+              s""".add("$keyName", org.apache.spark.sql.types.DataTypes.${key.dataType})"""
           }
         }.mkString("\n").concat(";")
 
     val generatedAggBufferSchema: String =
       s"new org.apache.spark.sql.types.StructType()" +
         bufferSchema.map { key =>
+          val keyName = ctx.addReferenceObj(key.name)
           key.dataType match {
             case d: DecimalType =>
-              s""".add("${key.name}", org.apache.spark.sql.types.DataTypes.createDecimalType(
+              s""".add("$keyName", org.apache.spark.sql.types.DataTypes.createDecimalType(
                   |${d.precision}, ${d.scale}))""".stripMargin
             case _ =>
-              s""".add("${key.name}", org.apache.spark.sql.types.DataTypes.${key.dataType})"""
+              s""".add("$keyName", org.apache.spark.sql.types.DataTypes.${key.dataType})"""
           }
         }.mkString("\n").concat(";")
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 645175900f931..7853b22fec0d9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -97,6 +97,15 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
     )
   }
 
+  test("SPARK-18952: regexes fail codegen when used as keys due to bad forward-slash escapes") {
+    val df = Seq(("some[thing]", "random-string")).toDF("key", "val")
+
+    checkAnswer(
+      df.groupBy(regexp_extract('key, "([a-z]+)\\[", 1)).count(),
+      Row("some", 1) :: Nil
+    )
+  }
+
   test("rollup") {
     checkAnswer(
       courseSales.rollup("course", "year").sum("earnings"),

From 65c866ef9e0b325998ce26f698e409c00a3f11e7 Mon Sep 17 00:00:00 2001
From: Liwei Lin <lwlin7@gmail.com>
Date: Tue, 10 Jan 2017 19:35:46 +0800
Subject: [PATCH 0433/1204] [SPARK-16845][SQL]
 `GeneratedClass$SpecificOrdering` grows beyond 64 KB

## What changes were proposed in this pull request?

Prior to this patch, we'll generate `compare(...)` for `GeneratedClass$SpecificOrdering` like below, leading to Janino exceptions saying the code grows beyond 64 KB.

``` scala
/* 005 */ class SpecificOrdering extends o.a.s.sql.catalyst.expressions.codegen.BaseOrdering {
/* ..... */   ...
/* 10969 */   private int compare(InternalRow a, InternalRow b) {
/* 10970 */     InternalRow i = null;  // Holds current row being evaluated.
/* 10971 */
/* 1.... */     code for comparing field0
/* 1.... */     code for comparing field1
/* 1.... */     ...
/* 1.... */     code for comparing field449
/* 15012 */
/* 15013 */     return 0;
/* 15014 */   }
/* 15015 */ }
```

This patch would break `compare(...)` into smaller `compare_xxx(...)` methods when necessary; then we'll get generated `compare(...)` like:

``` scala
/* 001 */ public SpecificOrdering generate(Object[] references) {
/* 002 */   return new SpecificOrdering(references);
/* 003 */ }
/* 004 */
/* 005 */ class SpecificOrdering extends o.a.s.sql.catalyst.expressions.codegen.BaseOrdering {
/* 006 */
/* 007 */     ...
/* 1.... */
/* 11290 */   private int compare_0(InternalRow a, InternalRow b) {
/* 11291 */     InternalRow i = null;  // Holds current row being evaluated.
/* 11292 */
/* 11293 */     i = a;
/* 11294 */     boolean isNullA;
/* 11295 */     UTF8String primitiveA;
/* 11296 */     {
/* 11297 */
/* 11298 */       Object obj = ((Expression) references[0]).eval(null);
/* 11299 */       UTF8String value = (UTF8String) obj;
/* 11300 */       isNullA = false;
/* 11301 */       primitiveA = value;
/* 11302 */     }
/* 11303 */     i = b;
/* 11304 */     boolean isNullB;
/* 11305 */     UTF8String primitiveB;
/* 11306 */     {
/* 11307 */
/* 11308 */       Object obj = ((Expression) references[0]).eval(null);
/* 11309 */       UTF8String value = (UTF8String) obj;
/* 11310 */       isNullB = false;
/* 11311 */       primitiveB = value;
/* 11312 */     }
/* 11313 */     if (isNullA && isNullB) {
/* 11314 */       // Nothing
/* 11315 */     } else if (isNullA) {
/* 11316 */       return -1;
/* 11317 */     } else if (isNullB) {
/* 11318 */       return 1;
/* 11319 */     } else {
/* 11320 */       int comp = primitiveA.compare(primitiveB);
/* 11321 */       if (comp != 0) {
/* 11322 */         return comp;
/* 11323 */       }
/* 11324 */     }
/* 11325 */
/* 11326 */
/* 11327 */     i = a;
/* 11328 */     boolean isNullA1;
/* 11329 */     UTF8String primitiveA1;
/* 11330 */     {
/* 11331 */
/* 11332 */       Object obj1 = ((Expression) references[1]).eval(null);
/* 11333 */       UTF8String value1 = (UTF8String) obj1;
/* 11334 */       isNullA1 = false;
/* 11335 */       primitiveA1 = value1;
/* 11336 */     }
/* 11337 */     i = b;
/* 11338 */     boolean isNullB1;
/* 11339 */     UTF8String primitiveB1;
/* 11340 */     {
/* 11341 */
/* 11342 */       Object obj1 = ((Expression) references[1]).eval(null);
/* 11343 */       UTF8String value1 = (UTF8String) obj1;
/* 11344 */       isNullB1 = false;
/* 11345 */       primitiveB1 = value1;
/* 11346 */     }
/* 11347 */     if (isNullA1 && isNullB1) {
/* 11348 */       // Nothing
/* 11349 */     } else if (isNullA1) {
/* 11350 */       return -1;
/* 11351 */     } else if (isNullB1) {
/* 11352 */       return 1;
/* 11353 */     } else {
/* 11354 */       int comp = primitiveA1.compare(primitiveB1);
/* 11355 */       if (comp != 0) {
/* 11356 */         return comp;
/* 11357 */       }
/* 11358 */     }
/* 1.... */
/* 1.... */   ...
/* 1.... */
/* 12652 */     return 0;
/* 12653 */   }
/* 1.... */
/* 1.... */   ...
/* 15387 */
/* 15388 */   public int compare(InternalRow a, InternalRow b) {
/* 15389 */
/* 15390 */     int comp_0 = compare_0(a, b);
/* 15391 */     if (comp_0 != 0) {
/* 15392 */       return comp_0;
/* 15393 */     }
/* 15394 */
/* 15395 */     int comp_1 = compare_1(a, b);
/* 15396 */     if (comp_1 != 0) {
/* 15397 */       return comp_1;
/* 15398 */     }
/* 1.... */
/* 1.... */     ...
/* 1.... */
/* 15450 */     return 0;
/* 15451 */   }
/* 15452 */ }
```
## How was this patch tested?
- a new added test case which
  - would fail prior to this patch
  - would pass with this patch
- ordering correctness should already be covered by existing tests like those in `OrderingSuite`

## Acknowledgement

A major part of this PR - the refactoring work of `splitExpression()` - has been done by ueshin.

Author: Liwei Lin <lwlin7@gmail.com>
Author: Takuya UESHIN <ueshin@happy-camper.st>
Author: Takuya Ueshin <ueshin@happy-camper.st>

Closes #15480 from lw-lin/spec-ordering-64k-.

(cherry picked from commit acfc5f354332107cc744fb636e3730f6fc48b2fe)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../expressions/codegen/CodeGenerator.scala   | 27 +++++++++++++++----
 .../codegen/GenerateOrdering.scala            | 27 +++++++++++++++++--
 .../catalyst/expressions/OrderingSuite.scala  | 10 +++++++
 3 files changed, 57 insertions(+), 7 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 09007b7c89fe3..891c1aafbfb77 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -640,8 +640,24 @@ class CodegenContext {
     splitExpressions(expressions, "apply", ("InternalRow", row) :: Nil)
   }
 
-  private def splitExpressions(
-      expressions: Seq[String], funcName: String, arguments: Seq[(String, String)]): String = {
+  /**
+   * Splits the generated code of expressions into multiple functions, because function has
+   * 64kb code size limit in JVM
+   *
+   * @param expressions the codes to evaluate expressions.
+   * @param funcName the split function name base.
+   * @param arguments the list of (type, name) of the arguments of the split function.
+   * @param returnType the return type of the split function.
+   * @param makeSplitFunction makes split function body, e.g. add preparation or cleanup.
+   * @param foldFunctions folds the split function calls.
+   */
+  def splitExpressions(
+      expressions: Seq[String],
+      funcName: String,
+      arguments: Seq[(String, String)],
+      returnType: String = "void",
+      makeSplitFunction: String => String = identity,
+      foldFunctions: Seq[String] => String = _.mkString("", ";\n", ";")): String = {
     val blocks = new ArrayBuffer[String]()
     val blockBuilder = new StringBuilder()
     for (code <- expressions) {
@@ -662,18 +678,19 @@ class CodegenContext {
       blocks.head
     } else {
       val func = freshName(funcName)
+      val argString = arguments.map { case (t, name) => s"$t $name" }.mkString(", ")
       val functions = blocks.zipWithIndex.map { case (body, i) =>
         val name = s"${func}_$i"
         val code = s"""
-           |private void $name(${arguments.map { case (t, name) => s"$t $name" }.mkString(", ")}) {
-           |  $body
+           |private $returnType $name($argString) {
+           |  ${makeSplitFunction(body)}
            |}
          """.stripMargin
         addNewFunction(name, code)
         name
       }
 
-      functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")});").mkString("\n")
+      foldFunctions(functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")})"))
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
index 1cef95654a17b..b7335f12b64b1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
@@ -117,8 +117,31 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
             }
           }
       """
-    }.mkString("\n")
-    comparisons
+    }
+
+    ctx.splitExpressions(
+      expressions = comparisons,
+      funcName = "compare",
+      arguments = Seq(("InternalRow", "a"), ("InternalRow", "b")),
+      returnType = "int",
+      makeSplitFunction = { body =>
+        s"""
+          InternalRow ${ctx.INPUT_ROW} = null;  // Holds current row being evaluated.
+          $body
+          return 0;
+        """
+      },
+      foldFunctions = { funCalls =>
+        funCalls.zipWithIndex.map { case (funCall, i) =>
+          val comp = ctx.freshName("comp")
+          s"""
+            int $comp = $funCall;
+            if ($comp != 0) {
+              return $comp;
+            }
+          """
+        }.mkString
+      })
   }
 
   protected def create(ordering: Seq[SortOrder]): BaseOrdering = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
index 8cc2ab46c0c85..190fab5d249bb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
@@ -127,4 +127,14 @@ class OrderingSuite extends SparkFunSuite with ExpressionEvalHelper {
       }
     }
   }
+
+  test("SPARK-16845: GeneratedClass$SpecificOrdering grows beyond 64 KB") {
+    val sortOrder = Literal("abc").asc
+
+    // this is passing prior to SPARK-16845, and it should also be passing after SPARK-16845
+    GenerateOrdering.generate(Array.fill(40)(sortOrder))
+
+    // verify that we can support up to 5000 ordering comparisons, which should be sufficient
+    GenerateOrdering.generate(Array.fill(5000)(sortOrder))
+  }
 }

From 69d1c4c5c9510ccf05a0f05592201d5b756425f9 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Tue, 10 Jan 2017 10:49:44 -0800
Subject: [PATCH 0434/1204] [SPARK-19137][SQL] Fix `withSQLConf` to reset
 `OptionalConfigEntry` correctly

## What changes were proposed in this pull request?

`DataStreamReaderWriterSuite` makes test files in source folder like the followings. Interestingly, the root cause is `withSQLConf` fails to reset `OptionalConfigEntry` correctly. In other words, it resets the config into `Some(undefined)`.

```bash
$ git status
Untracked files:
  (use "git add <file>..." to include in what will be committed)

        sql/core/%253Cundefined%253E/
        sql/core/%3Cundefined%3E/
```

## How was this patch tested?

Manual.
```
build/sbt "project sql" test
git status
```

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #16522 from dongjoon-hyun/SPARK-19137.

(cherry picked from commit d5b1dc934a2482886c2c095de90e4c6a49ec42bd)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../scala/org/apache/spark/sql/test/SQLTestUtils.scala    | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
index d4d8e3e4e83d5..d4afb9d8af6f8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
@@ -94,7 +94,13 @@ private[sql] trait SQLTestUtils
    */
   protected def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = {
     val (keys, values) = pairs.unzip
-    val currentValues = keys.map(key => Try(spark.conf.get(key)).toOption)
+    val currentValues = keys.map { key =>
+      if (spark.conf.contains(key)) {
+        Some(spark.conf.get(key))
+      } else {
+        None
+      }
+    }
     (keys, values).zipped.foreach(spark.conf.set)
     try f finally {
       keys.zip(currentValues).foreach {

From e0af4b7263a49419fefc36a6dedf2183c1157912 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Tue, 10 Jan 2017 14:24:45 +0000
Subject: [PATCH 0435/1204] [SPARK-19113][SS][TESTS] Set
 UncaughtExceptionHandler in onQueryStarted to ensure catching fatal errors
 during query initialization

## What changes were proposed in this pull request?

StreamTest sets `UncaughtExceptionHandler` after starting the query now. It may not be able to catch fatal errors during query initialization. This PR uses `onQueryStarted` callback to fix it.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16492 from zsxwing/SPARK-19113.
---
 .../spark/sql/streaming/StreamSuite.scala     |  7 +++--
 .../spark/sql/streaming/StreamTest.scala      | 28 ++++++++++++++-----
 2 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
index 34b0ee8064c3f..e964e646d22aa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -238,7 +238,7 @@ class StreamSuite extends StreamTest {
     }
   }
 
-  testQuietly("fatal errors from a source should be sent to the user") {
+  testQuietly("handle fatal errors thrown from the stream thread") {
     for (e <- Seq(
       new VirtualMachineError {},
       new ThreadDeath,
@@ -259,8 +259,11 @@ class StreamSuite extends StreamTest {
         override def stop(): Unit = {}
       }
       val df = Dataset[Int](sqlContext.sparkSession, StreamingExecutionRelation(source))
-      // These error are fatal errors and should be ignored in `testStream` to not fail the test.
       testStream(df)(
+        // `ExpectFailure(isFatalError = true)` verifies two things:
+        // - Fatal errors can be propagated to `StreamingQuery.exception` and
+        //   `StreamingQuery.awaitTermination` like non fatal errors.
+        // - Fatal errors can be caught by UncaughtExceptionHandler.
         ExpectFailure(isFatalError = true)(ClassTag(e.getClass))
       )
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index 709050d29bb00..4aa4100522cde 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -235,7 +235,10 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
    */
   def testStream(
       _stream: Dataset[_],
-      outputMode: OutputMode = OutputMode.Append)(actions: StreamAction*): Unit = {
+      outputMode: OutputMode = OutputMode.Append)(actions: StreamAction*): Unit = synchronized {
+    // `synchronized` is added to prevent the user from calling multiple `testStream`s concurrently
+    // because this method assumes there is only one active query in its `StreamingQueryListener`
+    // and it may not work correctly when multiple `testStream`s run concurrently.
 
     val stream = _stream.toDF()
     val sparkSession = stream.sparkSession  // use the session in DF, not the default session
@@ -248,6 +251,22 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
 
     @volatile
     var streamThreadDeathCause: Throwable = null
+    // Set UncaughtExceptionHandler in `onQueryStarted` so that we can ensure catching fatal errors
+    // during query initialization.
+    val listener = new StreamingQueryListener {
+      override def onQueryStarted(event: QueryStartedEvent): Unit = {
+        // Note: this assumes there is only one query active in the `testStream` method.
+        Thread.currentThread.setUncaughtExceptionHandler(new UncaughtExceptionHandler {
+          override def uncaughtException(t: Thread, e: Throwable): Unit = {
+            streamThreadDeathCause = e
+          }
+        })
+      }
+
+      override def onQueryProgress(event: QueryProgressEvent): Unit = {}
+      override def onQueryTerminated(event: QueryTerminatedEvent): Unit = {}
+    }
+    sparkSession.streams.addListener(listener)
 
     // If the test doesn't manually start the stream, we do it automatically at the beginning.
     val startedManually =
@@ -364,12 +383,6 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
                   triggerClock = triggerClock)
                 .asInstanceOf[StreamingQueryWrapper]
                 .streamingQuery
-            currentStream.microBatchThread.setUncaughtExceptionHandler(
-              new UncaughtExceptionHandler {
-                override def uncaughtException(t: Thread, e: Throwable): Unit = {
-                  streamThreadDeathCause = e
-                }
-              })
             // Wait until the initialization finishes, because some tests need to use `logicalPlan`
             // after starting the query.
             currentStream.awaitInitialization(streamingTimeout.toMillis)
@@ -545,6 +558,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
         case (key, Some(value)) => sparkSession.conf.set(key, value)
         case (key, None) => sparkSession.conf.unset(key)
       }
+      sparkSession.streams.removeListener(listener)
     }
   }
 

From 81c9430900f44f0602c7d32b298b90afa7450113 Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Tue, 10 Jan 2017 12:40:21 -0800
Subject: [PATCH 0436/1204] [SPARK-18997][CORE] Recommended upgrade libthrift
 to 0.9.3

## What changes were proposed in this pull request?

Updates to libthrift 0.9.3 to address a CVE.

## How was this patch tested?

Existing tests.

Author: Sean Owen <sowen@cloudera.com>

Closes #16530 from srowen/SPARK-18997.

(cherry picked from commit 856bae6af64982ae0221948c58ff564887e54a70)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 dev/deps/spark-deps-hadoop-2.2 | 4 ++--
 dev/deps/spark-deps-hadoop-2.3 | 4 ++--
 dev/deps/spark-deps-hadoop-2.4 | 4 ++--
 dev/deps/spark-deps-hadoop-2.6 | 4 ++--
 dev/deps/spark-deps-hadoop-2.7 | 4 ++--
 pom.xml                        | 2 +-
 6 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.2 b/dev/deps/spark-deps-hadoop-2.2
index 89bfcef4d9466..da17020099246 100644
--- a/dev/deps/spark-deps-hadoop-2.2
+++ b/dev/deps/spark-deps-hadoop-2.2
@@ -112,8 +112,8 @@ jtransforms-2.4.0.jar
 jul-to-slf4j-1.7.16.jar
 kryo-shaded-3.0.3.jar
 leveldbjni-all-1.8.jar
-libfb303-0.9.2.jar
-libthrift-0.9.2.jar
+libfb303-0.9.3.jar
+libthrift-0.9.3.jar
 log4j-1.2.17.jar
 lz4-1.3.0.jar
 mesos-1.0.0-shaded-protobuf.jar
diff --git a/dev/deps/spark-deps-hadoop-2.3 b/dev/deps/spark-deps-hadoop-2.3
index 8df3858825e13..92746f07e782b 100644
--- a/dev/deps/spark-deps-hadoop-2.3
+++ b/dev/deps/spark-deps-hadoop-2.3
@@ -117,8 +117,8 @@ jtransforms-2.4.0.jar
 jul-to-slf4j-1.7.16.jar
 kryo-shaded-3.0.3.jar
 leveldbjni-all-1.8.jar
-libfb303-0.9.2.jar
-libthrift-0.9.2.jar
+libfb303-0.9.3.jar
+libthrift-0.9.3.jar
 log4j-1.2.17.jar
 lz4-1.3.0.jar
 mail-1.4.7.jar
diff --git a/dev/deps/spark-deps-hadoop-2.4 b/dev/deps/spark-deps-hadoop-2.4
index 71e7fb6dd243d..49d99ae65ce84 100644
--- a/dev/deps/spark-deps-hadoop-2.4
+++ b/dev/deps/spark-deps-hadoop-2.4
@@ -117,8 +117,8 @@ jtransforms-2.4.0.jar
 jul-to-slf4j-1.7.16.jar
 kryo-shaded-3.0.3.jar
 leveldbjni-all-1.8.jar
-libfb303-0.9.2.jar
-libthrift-0.9.2.jar
+libfb303-0.9.3.jar
+libthrift-0.9.3.jar
 log4j-1.2.17.jar
 lz4-1.3.0.jar
 mail-1.4.7.jar
diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6
index ba31391495f54..652fcb27690ae 100644
--- a/dev/deps/spark-deps-hadoop-2.6
+++ b/dev/deps/spark-deps-hadoop-2.6
@@ -125,8 +125,8 @@ jtransforms-2.4.0.jar
 jul-to-slf4j-1.7.16.jar
 kryo-shaded-3.0.3.jar
 leveldbjni-all-1.8.jar
-libfb303-0.9.2.jar
-libthrift-0.9.2.jar
+libfb303-0.9.3.jar
+libthrift-0.9.3.jar
 log4j-1.2.17.jar
 lz4-1.3.0.jar
 mail-1.4.7.jar
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index b129e5a99e2ff..16b5c82859a27 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -126,8 +126,8 @@ jtransforms-2.4.0.jar
 jul-to-slf4j-1.7.16.jar
 kryo-shaded-3.0.3.jar
 leveldbjni-all-1.8.jar
-libfb303-0.9.2.jar
-libthrift-0.9.2.jar
+libfb303-0.9.3.jar
+libthrift-0.9.3.jar
 log4j-1.2.17.jar
 lz4-1.3.0.jar
 mail-1.4.7.jar
diff --git a/pom.xml b/pom.xml
index 8a0efece0cea4..c3909b4f8f66d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -175,7 +175,7 @@
     <joda.version>2.9.3</joda.version>
     <jodd.version>3.5.2</jodd.version>
     <jsr305.version>1.3.9</jsr305.version>
-    <libthrift.version>0.9.2</libthrift.version>
+    <libthrift.version>0.9.3</libthrift.version>
     <antlr4.version>4.5.3</antlr4.version>
     <jpam.version>1.1</jpam.version>
     <selenium.version>2.52.0</selenium.version>

From 230607d62493c36b214c01a70aa9b0dbb3a9ad4d Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Tue, 10 Jan 2017 17:58:11 -0800
Subject: [PATCH 0437/1204] [SPARK-19140][SS] Allow update mode for
 non-aggregation streaming queries

## What changes were proposed in this pull request?

This PR allow update mode for non-aggregation streaming queries. It will be same as the append mode if a query has no aggregations.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16520 from zsxwing/update-without-agg.

(cherry picked from commit bc6c56e940fe93591a1e5ba45751f1b243b57e28)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../structured-streaming-programming-guide.md |  4 +--
 python/pyspark/sql/streaming.py               | 27 +++++++++-----
 .../spark/sql/streaming/OutputMode.java       |  3 +-
 .../UnsupportedOperationChecker.scala         |  2 +-
 .../streaming/InternalOutputModes.scala       |  4 +--
 .../analysis/UnsupportedOperationsSuite.scala | 31 ++++++++--------
 .../sql/streaming/DataStreamWriter.scala      | 18 ++++------
 .../execution/streaming/MemorySinkSuite.scala | 35 ++++++++++++-------
 8 files changed, 72 insertions(+), 52 deletions(-)

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index 473a196288e32..45ee551b67d3f 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -374,7 +374,7 @@ The "Output" is defined as what gets written out to the external storage. The ou
 
   - *Append Mode* - Only the new rows appended in the Result Table since the last trigger will be written to the external storage. This is applicable only on the queries where existing rows in the Result Table are not expected to change.
   
-  - *Update Mode* - Only the rows that were updated in the Result Table since the last trigger will be written to the external storage (available since Spark 2.1.1). Note that this is different from the Complete Mode in that this mode only outputs the rows that have changed since the last trigger.
+  - *Update Mode* - Only the rows that were updated in the Result Table since the last trigger will be written to the external storage (available since Spark 2.1.1). Note that this is different from the Complete Mode in that this mode only outputs the rows that have changed since the last trigger. If the query doesn't contain aggregations, it will be equivalent to Append mode.
 
 Note that each mode is applicable on certain types of queries. This is discussed in detail [later](#output-modes).
 
@@ -977,7 +977,7 @@ Here is the compatibility matrix.
   </tr>
   <tr>
     <td colspan="2" style="vertical-align: middle;">Queries without aggregation</td>
-    <td style="vertical-align: middle;">Append</td>
+    <td style="vertical-align: middle;">Append, Update</td>
     <td style="vertical-align: middle;">
         Complete mode not supported as it is infeasible to keep all data in the Result Table.
     </td>
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index 5014299ad220f..a10b185cd4c7b 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -665,6 +665,9 @@ def outputMode(self, outputMode):
            the sink
         * `complete`:All the rows in the streaming DataFrame/Dataset will be written to the sink
            every time these is some updates
+        * `update`:only the rows that were updated in the streaming DataFrame/Dataset will be
+           written to the sink every time there are some updates. If the query doesn't contain
+           aggregations, it will be equivalent to `append` mode.
 
        .. note:: Experimental.
 
@@ -768,7 +771,8 @@ def trigger(self, processingTime=None):
 
     @ignore_unicode_prefix
     @since(2.0)
-    def start(self, path=None, format=None, partitionBy=None, queryName=None, **options):
+    def start(self, path=None, format=None, outputMode=None, partitionBy=None, queryName=None,
+              **options):
         """Streams the contents of the :class:`DataFrame` to a data source.
 
         The data source is specified by the ``format`` and a set of ``options``.
@@ -779,15 +783,20 @@ def start(self, path=None, format=None, partitionBy=None, queryName=None, **opti
 
         :param path: the path in a Hadoop supported file system
         :param format: the format used to save
-
-            * ``append``: Append contents of this :class:`DataFrame` to existing data.
-            * ``overwrite``: Overwrite existing data.
-            * ``ignore``: Silently ignore this operation if data already exists.
-            * ``error`` (default case): Throw an exception if data already exists.
+        :param outputMode: specifies how data of a streaming DataFrame/Dataset is written to a
+                           streaming sink.
+
+            * `append`:Only the new rows in the streaming DataFrame/Dataset will be written to the
+              sink
+            * `complete`:All the rows in the streaming DataFrame/Dataset will be written to the sink
+               every time these is some updates
+            * `update`:only the rows that were updated in the streaming DataFrame/Dataset will be
+              written to the sink every time there are some updates. If the query doesn't contain
+              aggregations, it will be equivalent to `append` mode.
         :param partitionBy: names of partitioning columns
         :param queryName: unique name for the query
         :param options: All other string options. You may want to provide a `checkpointLocation`
-            for most streams, however it is not required for a `memory` stream.
+                        for most streams, however it is not required for a `memory` stream.
 
         >>> sq = sdf.writeStream.format('memory').queryName('this_query').start()
         >>> sq.isActive
@@ -798,7 +807,7 @@ def start(self, path=None, format=None, partitionBy=None, queryName=None, **opti
         >>> sq.isActive
         False
         >>> sq = sdf.writeStream.trigger(processingTime='5 seconds').start(
-        ...     queryName='that_query', format='memory')
+        ...     queryName='that_query', outputMode="append", format='memory')
         >>> sq.name
         u'that_query'
         >>> sq.isActive
@@ -806,6 +815,8 @@ def start(self, path=None, format=None, partitionBy=None, queryName=None, **opti
         >>> sq.stop()
         """
         self.options(**options)
+        if outputMode is not None:
+            self.outputMode(outputMode)
         if partitionBy is not None:
             self.partitionBy(partitionBy)
         if format is not None:
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/OutputMode.java b/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/OutputMode.java
index cf0579fd3625c..3f7cdb293e0fa 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/OutputMode.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/OutputMode.java
@@ -57,7 +57,8 @@ public static OutputMode Complete() {
 
   /**
    * OutputMode in which only the rows that were updated in the streaming DataFrame/Dataset will
-   * be written to the sink every time there are some updates.
+   * be written to the sink every time there are some updates. If the query doesn't contain
+   * aggregations, it will be equivalent to `Append` mode.
    *
    * @since 2.1.1
    */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
index 053c8eb6170e9..c2666b2ab9129 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -73,7 +73,7 @@ object UnsupportedOperationChecker {
                 s"streaming DataFrames/DataSets")(plan)
         }
 
-      case InternalOutputModes.Complete | InternalOutputModes.Update if aggregates.isEmpty =>
+      case InternalOutputModes.Complete if aggregates.isEmpty =>
         throwError(
           s"$outputMode output mode not supported when there are no streaming aggregations on " +
             s"streaming DataFrames/Datasets")(plan)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/InternalOutputModes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/InternalOutputModes.scala
index 915f4a9e25cec..351bd6fff4adf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/InternalOutputModes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/InternalOutputModes.scala
@@ -40,8 +40,8 @@ private[sql] object InternalOutputModes {
 
   /**
    * OutputMode in which only the rows in the streaming DataFrame/Dataset that were updated will be
-   * written to the sink every time these is some updates. This output mode can only be used in
-   * queries that contain aggregations.
+   * written to the sink every time these is some updates. If the query doesn't contain
+   * aggregations, it will be equivalent to `Append` mode.
    */
   case object Update extends OutputMode
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
index d2c0f8cc9fe8a..58e69f9ebea05 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
@@ -219,9 +219,9 @@ class UnsupportedOperationsSuite extends SparkFunSuite {
     "window", Window(Nil, Nil, Nil, _), expectedMsg = "non-time-based windows")
 
   // Output modes with aggregation and non-aggregation plans
-  testOutputMode(Append, shouldSupportAggregation = false)
-  testOutputMode(Update, shouldSupportAggregation = true)
-  testOutputMode(Complete, shouldSupportAggregation = true)
+  testOutputMode(Append, shouldSupportAggregation = false, shouldSupportNonAggregation = true)
+  testOutputMode(Update, shouldSupportAggregation = true, shouldSupportNonAggregation = true)
+  testOutputMode(Complete, shouldSupportAggregation = true, shouldSupportNonAggregation = false)
 
   /*
     =======================================================================================
@@ -323,30 +323,33 @@ class UnsupportedOperationsSuite extends SparkFunSuite {
   /** Test output mode with and without aggregation in the streaming plan */
   def testOutputMode(
       outputMode: OutputMode,
-      shouldSupportAggregation: Boolean): Unit = {
+      shouldSupportAggregation: Boolean,
+      shouldSupportNonAggregation: Boolean): Unit = {
 
     // aggregation
     if (shouldSupportAggregation) {
-      assertNotSupportedInStreamingPlan(
-        s"$outputMode output mode - no aggregation",
-        streamRelation.where($"a" > 1),
-        outputMode = outputMode,
-        Seq("aggregation", s"$outputMode output mode"))
-
       assertSupportedInStreamingPlan(
         s"$outputMode output mode - aggregation",
         streamRelation.groupBy("a")("count(*)"),
         outputMode = outputMode)
-
     } else {
+      assertNotSupportedInStreamingPlan(
+        s"$outputMode output mode - aggregation",
+        streamRelation.groupBy("a")("count(*)"),
+        outputMode = outputMode,
+        Seq("aggregation", s"$outputMode output mode"))
+    }
+
+    // non aggregation
+    if (shouldSupportNonAggregation) {
       assertSupportedInStreamingPlan(
         s"$outputMode output mode - no aggregation",
         streamRelation.where($"a" > 1),
         outputMode = outputMode)
-
+    } else {
       assertNotSupportedInStreamingPlan(
-        s"$outputMode output mode - aggregation",
-        streamRelation.groupBy("a")("count(*)"),
+        s"$outputMode output mode - no aggregation",
+        streamRelation.where($"a" > 1),
         outputMode = outputMode,
         Seq("aggregation", s"$outputMode output mode"))
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index bf25b4845f609..5f49bef55811c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -44,6 +44,10 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
    *                            written to the sink
    *   - `OutputMode.Complete()`: all the rows in the streaming DataFrame/Dataset will be written
    *                              to the sink every time these is some updates
+   *   - `OutputMode.Update()`: only the rows that were updated in the streaming DataFrame/Dataset
+   *                            will be written to the sink every time there are some updates. If
+   *                            the query doesn't contain aggregations, it will be equivalent to
+   *                            `OutputMode.Append()` mode.
    *
    * @since 2.0.0
    */
@@ -58,7 +62,9 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
    *                 the sink
    *   - `complete`: all the rows in the streaming DataFrame/Dataset will be written to the sink
    *                 every time these is some updates
-   *
+   *   - `update`:   only the rows that were updated in the streaming DataFrame/Dataset will
+   *                 be written to the sink every time there are some updates. If the query doesn't
+   *                 contain aggregations, it will be equivalent to `append` mode.
    * @since 2.0.0
    */
   def outputMode(outputMode: String): DataStreamWriter[T] = {
@@ -220,16 +226,6 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
       if (extraOptions.get("queryName").isEmpty) {
         throw new AnalysisException("queryName must be specified for memory sink")
       }
-      val supportedModes = "Output modes supported by the memory sink are 'append' and 'complete'."
-      outputMode match {
-        case Append | Complete => // allowed
-        case Update =>
-          throw new AnalysisException(
-            s"Update output mode is not supported for memory sink. $supportedModes")
-        case _ =>
-          throw new AnalysisException(
-            s"$outputMode is not supported for memory sink. $supportedModes")
-      }
       val sink = new MemorySink(df.schema, outputMode)
       val resultDf = Dataset.ofRows(df.sparkSession, new MemoryPlan(sink))
       val chkpointLoc = extraOptions.get("checkpointLocation")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala
index ca724fc5cc67e..8f23f98f76190 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala
@@ -137,7 +137,7 @@ class MemorySinkSuite extends StreamTest with BeforeAndAfter {
   }
 
 
-  test("registering as a table in Append output mode - supported") {
+  test("registering as a table in Append output mode") {
     val input = MemoryStream[Int]
     val query = input.toDF().writeStream
       .format("memory")
@@ -160,7 +160,7 @@ class MemorySinkSuite extends StreamTest with BeforeAndAfter {
     query.stop()
   }
 
-  test("registering as a table in Complete output mode - supported") {
+  test("registering as a table in Complete output mode") {
     val input = MemoryStream[Int]
     val query = input.toDF()
       .groupBy("value")
@@ -186,18 +186,27 @@ class MemorySinkSuite extends StreamTest with BeforeAndAfter {
     query.stop()
   }
 
-  test("registering as a table in Update output mode - not supported") {
+  test("registering as a table in Update output mode") {
     val input = MemoryStream[Int]
-    val df = input.toDF()
-      .groupBy("value")
-      .count()
-    intercept[AnalysisException] {
-      df.writeStream
-        .format("memory")
-        .outputMode("update")
-        .queryName("memStream")
-        .start()
-    }
+    val query = input.toDF().writeStream
+      .format("memory")
+      .outputMode("update")
+      .queryName("memStream")
+      .start()
+    input.addData(1, 2, 3)
+    query.processAllAvailable()
+
+    checkDataset(
+      spark.table("memStream").as[Int],
+      1, 2, 3)
+
+    input.addData(4, 5, 6)
+    query.processAllAvailable()
+    checkDataset(
+      spark.table("memStream").as[Int],
+      1, 2, 3, 4, 5, 6)
+
+    query.stop()
   }
 
   test("MemoryPlan statistics") {

From 1022049c78e55914c54dff6d5206ad56dba7eef4 Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Tue, 10 Jan 2017 21:22:16 -0800
Subject: [PATCH 0438/1204] [SPARK-19133][SPARKR][ML][BACKPORT-2.1] fix glm for
 Gamma, clarify glm family supported

## What changes were proposed in this pull request?

backporting to 2.1, 2.0 and 1.6

## How was this patch tested?

unit tests

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #16532 from felixcheung/rgammabackport.
---
 R/pkg/R/mllib.R                        | 7 ++++++-
 R/pkg/inst/tests/testthat/test_mllib.R | 8 ++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index d736bbb5e9113..1a254ad49b086 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -184,6 +184,8 @@ predict_internal <- function(object, newData) {
 #'               This can be a character string naming a family function, a family function or
 #'               the result of a call to a family function. Refer R family at
 #'               \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}.
+#'               Currently these families are supported: \code{binomial}, \code{gaussian},
+#'               \code{Gamma}, and \code{poisson}.
 #' @param tol positive convergence tolerance of iterations.
 #' @param maxIter integer giving the maximal number of IRLS iterations.
 #' @param weightCol the weight column name. If this is not set or \code{NULL}, we treat all instance
@@ -236,8 +238,9 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
               weightCol <- ""
             }
 
+            # For known families, Gamma is upper-cased
             jobj <- callJStatic("org.apache.spark.ml.r.GeneralizedLinearRegressionWrapper",
-                                "fit", formula, data@sdf, family$family, family$link,
+                                "fit", formula, data@sdf, tolower(family$family), family$link,
                                 tol, as.integer(maxIter), as.character(weightCol), regParam)
             new("GeneralizedLinearRegressionModel", jobj = jobj)
           })
@@ -252,6 +255,8 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"),
 #'               This can be a character string naming a family function, a family function or
 #'               the result of a call to a family function. Refer R family at
 #'               \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}.
+#'               Currently these families are supported: \code{binomial}, \code{gaussian},
+#'               \code{Gamma}, and \code{poisson}.
 #' @param weightCol the weight column name. If this is not set or \code{NULL}, we treat all instance
 #'                  weights as 1.0.
 #' @param epsilon positive convergence tolerance of iterations.
diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R
index 40c0446740277..1f2fae9c813f3 100644
--- a/R/pkg/inst/tests/testthat/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -74,6 +74,14 @@ test_that("spark.glm and predict", {
   data = iris, family = poisson(link = identity)), iris))
   expect_true(all(abs(rVals - vals) < 1e-6), rVals - vals)
 
+  # Gamma family
+  x <- runif(100, -1, 1)
+  y <- rgamma(100, rate = 10 / exp(0.5 + 1.2 * x), shape = 10)
+  df <- as.DataFrame(as.data.frame(list(x = x, y = y)))
+  model <- glm(y ~ x, family = Gamma, df)
+  out <- capture.output(print(summary(model)))
+  expect_true(any(grepl("Dispersion parameter for gamma family", out)))
+
   # Test stats::predict is working
   x <- rnorm(15)
   y <- x + rnorm(15)

From 82fcc133040cb5ef32f10df73fcb9fd8914aa9c1 Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Wed, 11 Jan 2017 08:29:09 -0800
Subject: [PATCH 0439/1204] [SPARK-19130][SPARKR] Support setting literal value
 as column implicitly

## What changes were proposed in this pull request?

```
df$foo <- 1
```

instead of
```
df$foo <- lit(1)
```

## How was this patch tested?

unit tests

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #16510 from felixcheung/rlitcol.

(cherry picked from commit d749c06677c2fd383733337f1c00f542da122b8d)
Signed-off-by: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
---
 R/pkg/R/DataFrame.R                       | 22 +++++++++++++++++-----
 R/pkg/R/utils.R                           |  4 ++++
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 18 ++++++++++++++++++
 3 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 058a77e4f8ca8..c79b1d3d52a10 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1721,14 +1721,21 @@ setMethod("$", signature(x = "SparkDataFrame"),
             getColumn(x, name)
           })
 
-#' @param value a Column or \code{NULL}. If \code{NULL}, the specified Column is dropped.
+#' @param value a Column or an atomic vector in the length of 1 as literal value, or \code{NULL}.
+#'              If \code{NULL}, the specified Column is dropped.
 #' @rdname select
 #' @name $<-
 #' @aliases $<-,SparkDataFrame-method
 #' @note $<- since 1.4.0
 setMethod("$<-", signature(x = "SparkDataFrame"),
           function(x, name, value) {
-            stopifnot(class(value) == "Column" || is.null(value))
+            if (class(value) != "Column" && !is.null(value)) {
+              if (isAtomicLengthOne(value)) {
+                value <- lit(value)
+              } else {
+                stop("value must be a Column, literal value as atomic in length of 1, or NULL")
+              }
+            }
 
             if (is.null(value)) {
               nx <- drop(x, name)
@@ -1941,10 +1948,10 @@ setMethod("selectExpr",
 #'
 #' @param x a SparkDataFrame.
 #' @param colName a column name.
-#' @param col a Column expression.
+#' @param col a Column expression, or an atomic vector in the length of 1 as literal value.
 #' @return A SparkDataFrame with the new column added or the existing column replaced.
 #' @family SparkDataFrame functions
-#' @aliases withColumn,SparkDataFrame,character,Column-method
+#' @aliases withColumn,SparkDataFrame,character-method
 #' @rdname withColumn
 #' @name withColumn
 #' @seealso \link{rename} \link{mutate}
@@ -1957,11 +1964,16 @@ setMethod("selectExpr",
 #' newDF <- withColumn(df, "newCol", df$col1 * 5)
 #' # Replace an existing column
 #' newDF2 <- withColumn(newDF, "newCol", newDF$col1)
+#' newDF3 <- withColumn(newDF, "newCol", 42)
 #' }
 #' @note withColumn since 1.4.0
 setMethod("withColumn",
-          signature(x = "SparkDataFrame", colName = "character", col = "Column"),
+          signature(x = "SparkDataFrame", colName = "character"),
           function(x, colName, col) {
+            if (class(col) != "Column") {
+              if (!isAtomicLengthOne(col)) stop("Literal value must be atomic in length of 1")
+              col <- lit(col)
+            }
             sdf <- callJMethod(x@sdf, "withColumn", colName, col@jc)
             dataFrame(sdf)
           })
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index 1283449f3592a..74b3e502eb7ca 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -863,3 +863,7 @@ basenameSansExtFromUrl <- function(url) {
   # then, strip extension by the last '.'
   sub("([^.]+)\\.[[:alnum:]]+$", "\\1", filename)
 }
+
+isAtomicLengthOne <- function(x) {
+  is.atomic(x) && length(x) == 1
+}
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 4490f31cd83e1..0be924f8ba432 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1001,6 +1001,17 @@ test_that("select operators", {
   expect_equal(columns(df), c("name", "age", "age2"))
   expect_equal(count(where(df, df$age2 == df$age * 2)), 2)
 
+  df$age2 <- 21
+  expect_equal(columns(df), c("name", "age", "age2"))
+  expect_equal(count(where(df, df$age2 == 21)), 3)
+
+  df$age2 <- c(22)
+  expect_equal(columns(df), c("name", "age", "age2"))
+  expect_equal(count(where(df, df$age2 == 22)), 3)
+
+  expect_error(df$age3 <- c(22, NA),
+              "value must be a Column, literal value as atomic in length of 1, or NULL")
+
   # Test parameter drop
   expect_equal(class(df[, 1]) == "SparkDataFrame", T)
   expect_equal(class(df[, 1, drop = T]) == "Column", T)
@@ -1777,6 +1788,13 @@ test_that("withColumn() and withColumnRenamed()", {
   expect_equal(length(columns(newDF)), 2)
   expect_equal(first(filter(newDF, df$name != "Michael"))$age, 32)
 
+  newDF <- withColumn(df, "age", 18)
+  expect_equal(length(columns(newDF)), 2)
+  expect_equal(first(newDF)$age, 18)
+
+  expect_error(withColumn(df, "age", list("a")),
+              "Literal value must be atomic in length of 1")
+
   newDF2 <- withColumnRenamed(df, "age", "newerAge")
   expect_equal(length(columns(newDF2)), 2)
   expect_equal(columns(newDF2)[1], "newerAge")

From 0b07634b5e06cc9030f20e277ec5956efff6c3fa Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Thu, 12 Jan 2017 00:58:30 -0800
Subject: [PATCH 0440/1204] [SPARK-19158][SPARKR][EXAMPLES] Fix ml.R example
 fails due to lack of e1071 package.

## What changes were proposed in this pull request?
```ml.R``` example depends on ```e1071``` package, if it's not available in users' environment, it will fail. I think the example should not depends on third-party packages, so I update it to remove the dependency.

## How was this patch tested?
Manual test.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #16548 from yanboliang/spark-19158.

(cherry picked from commit 2c586f506de9e2ba592afae1f0c73b6ae631bb96)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 examples/src/main/r/ml/ml.R | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/examples/src/main/r/ml/ml.R b/examples/src/main/r/ml/ml.R
index d601590c22a89..05f5199aebe15 100644
--- a/examples/src/main/r/ml/ml.R
+++ b/examples/src/main/r/ml/ml.R
@@ -49,17 +49,16 @@ unlink(modelPath)
 
 ############################ fit models with spark.lapply #####################################
 # Perform distributed training of multiple models with spark.lapply
-costs <- exp(seq(from = log(1), to = log(1000), length.out = 5))
-train <- function(cost) {
-  stopifnot(requireNamespace("e1071", quietly = TRUE))
-  model <- e1071::svm(Species ~ ., data = iris, cost = cost)
-  summary(model)
+algorithms <- c("Hartigan-Wong", "Lloyd", "MacQueen")
+train <- function(algorithm) {
+  model <- kmeans(x = iris[1:4], centers = 3, algorithm = algorithm)
+  model$withinss
 }
 
-model.summaries <- spark.lapply(costs, train)
+model.withinss <- spark.lapply(algorithms, train)
 
-# Print the summary of each model
-print(model.summaries)
+# Print the within-cluster sum of squares for each model
+print(model.withinss)
 
 # Stop the SparkSession now
 sparkR.session.stop()

From 9b9867ef5b64b05f1e968de1fc0bfc1fcc64a707 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Tue, 10 Jan 2017 13:27:55 +0000
Subject: [PATCH 0441/1204] [SPARK-18857][SQL] Don't use `Iterator.duplicate`
 for `incrementalCollect` in Thrift Server

## What changes were proposed in this pull request?

To support `FETCH_FIRST`, SPARK-16563 used Scala `Iterator.duplicate`. However,
Scala `Iterator.duplicate` uses a **queue to buffer all items between both iterators**,
this causes GC and hangs for queries with large number of rows. We should not use this,
especially for `spark.sql.thriftServer.incrementalCollect`.

https://github.com/scala/scala/blob/2.12.x/src/library/scala/collection/Iterator.scala#L1262-L1300

## How was this patch tested?

Pass the existing tests.

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #16440 from dongjoon-hyun/SPARK-18857.

(cherry picked from commit a2c6adcc5d2702d2f0e9b239517353335e5f911e)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../apache/spark/sql/internal/SQLConf.scala   |  7 +++++
 .../SparkExecuteStatementOperation.scala      | 30 ++++++++++++-------
 2 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 8fbad60c8d846..8d493e0d56ca2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -309,6 +309,13 @@ object SQLConf {
     .stringConf
     .createOptional
 
+  val THRIFTSERVER_INCREMENTAL_COLLECT =
+    SQLConfigBuilder("spark.sql.thriftServer.incrementalCollect")
+      .internal()
+      .doc("When true, enable incremental collection for execution in Thrift Server.")
+      .booleanConf
+      .createWithDefault(false)
+
   val THRIFTSERVER_UI_STATEMENT_LIMIT =
     SQLConfigBuilder("spark.sql.thriftserver.ui.retainedStatements")
       .doc("The number of SQL statements kept in the JDBC/ODBC web UI history.")
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
index aeabd6a15881d..517b01f183926 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -50,8 +50,13 @@ private[hive] class SparkExecuteStatementOperation(
   with Logging {
 
   private var result: DataFrame = _
+
+  // We cache the returned rows to get iterators again in case the user wants to use FETCH_FIRST.
+  // This is only used when `spark.sql.thriftServer.incrementalCollect` is set to `false`.
+  // In case of `true`, this will be `None` and FETCH_FIRST will trigger re-execution.
+  private var resultList: Option[Array[SparkRow]] = _
+
   private var iter: Iterator[SparkRow] = _
-  private var iterHeader: Iterator[SparkRow] = _
   private var dataTypes: Array[DataType] = _
   private var statementId: String = _
 
@@ -111,9 +116,15 @@ private[hive] class SparkExecuteStatementOperation(
 
     // Reset iter to header when fetching start from first row
     if (order.equals(FetchOrientation.FETCH_FIRST)) {
-      val (ita, itb) = iterHeader.duplicate
-      iter = ita
-      iterHeader = itb
+      iter = if (sqlContext.getConf(SQLConf.THRIFTSERVER_INCREMENTAL_COLLECT.key).toBoolean) {
+        resultList = None
+        result.toLocalIterator.asScala
+      } else {
+        if (resultList.isEmpty) {
+          resultList = Some(result.collect())
+        }
+        resultList.get.iterator
+      }
     }
 
     if (!iter.hasNext) {
@@ -227,17 +238,14 @@ private[hive] class SparkExecuteStatementOperation(
       }
       HiveThriftServer2.listener.onStatementParsed(statementId, result.queryExecution.toString())
       iter = {
-        val useIncrementalCollect =
-          sqlContext.getConf("spark.sql.thriftServer.incrementalCollect", "false").toBoolean
-        if (useIncrementalCollect) {
+        if (sqlContext.getConf(SQLConf.THRIFTSERVER_INCREMENTAL_COLLECT.key).toBoolean) {
+          resultList = None
           result.toLocalIterator.asScala
         } else {
-          result.collect().iterator
+          resultList = Some(result.collect())
+          resultList.get.iterator
         }
       }
-      val (itra, itrb) = iter.duplicate
-      iterHeader = itra
-      iter = itrb
       dataTypes = result.queryExecution.analyzed.output.map(_.dataType).toArray
     } catch {
       case e: HiveSQLException =>

From 616a78a56cc911953e3133e60ab8c5a4fc287539 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 12 Jan 2017 20:21:04 +0800
Subject: [PATCH 0442/1204] [SPARK-18969][SQL] Support grouping by
 nondeterministic expressions

## What changes were proposed in this pull request?

Currently nondeterministic expressions are allowed in `Aggregate`(see the [comment](https://github.com/apache/spark/blob/v2.0.2/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala#L249-L251)), but the `PullOutNondeterministic` analyzer rule failed to handle `Aggregate`, this PR fixes it.

close https://github.com/apache/spark/pull/16379

There is still one remaining issue: `SELECT a + rand() FROM t GROUP BY a + rand()` is not allowed, because the 2 `rand()` are different(we generate random seed as the default seed for `rand()`). https://issues.apache.org/jira/browse/SPARK-19035 is tracking this issue.

## How was this patch tested?

a new test suite

Author: Wenchen Fan <wenchen@databricks.com>

Closes #16404 from cloud-fan/groupby.

(cherry picked from commit 871d266649ddfed38c64dfda7158d8bb58d4b979)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 37 +++++++-----
 .../PullOutNondeterministicSuite.scala        | 56 +++++++++++++++++++
 .../results/group-by-ordinal.sql.out          | 10 +++-
 3 files changed, 86 insertions(+), 17 deletions(-)
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/PullOutNondeterministicSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index f17c37256c9e5..ab9de023e2b30 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1859,28 +1859,37 @@ class Analyzer(
       case p: Project => p
       case f: Filter => f
 
+      case a: Aggregate if a.groupingExpressions.exists(!_.deterministic) =>
+        val nondeterToAttr = getNondeterToAttr(a.groupingExpressions)
+        val newChild = Project(a.child.output ++ nondeterToAttr.values, a.child)
+        a.transformExpressions { case e =>
+          nondeterToAttr.get(e).map(_.toAttribute).getOrElse(e)
+        }.copy(child = newChild)
+
       // todo: It's hard to write a general rule to pull out nondeterministic expressions
       // from LogicalPlan, currently we only do it for UnaryNode which has same output
       // schema with its child.
       case p: UnaryNode if p.output == p.child.output && p.expressions.exists(!_.deterministic) =>
-        val nondeterministicExprs = p.expressions.filterNot(_.deterministic).flatMap { expr =>
-          val leafNondeterministic = expr.collect {
-            case n: Nondeterministic => n
-          }
-          leafNondeterministic.map { e =>
-            val ne = e match {
-              case n: NamedExpression => n
-              case _ => Alias(e, "_nondeterministic")(isGenerated = true)
-            }
-            new TreeNodeRef(e) -> ne
-          }
-        }.toMap
+        val nondeterToAttr = getNondeterToAttr(p.expressions)
         val newPlan = p.transformExpressions { case e =>
-          nondeterministicExprs.get(new TreeNodeRef(e)).map(_.toAttribute).getOrElse(e)
+          nondeterToAttr.get(e).map(_.toAttribute).getOrElse(e)
         }
-        val newChild = Project(p.child.output ++ nondeterministicExprs.values, p.child)
+        val newChild = Project(p.child.output ++ nondeterToAttr.values, p.child)
         Project(p.output, newPlan.withNewChildren(newChild :: Nil))
     }
+
+    private def getNondeterToAttr(exprs: Seq[Expression]): Map[Expression, NamedExpression] = {
+      exprs.filterNot(_.deterministic).flatMap { expr =>
+        val leafNondeterministic = expr.collect { case n: Nondeterministic => n }
+        leafNondeterministic.distinct.map { e =>
+          val ne = e match {
+            case n: NamedExpression => n
+            case _ => Alias(e, "_nondeterministic")(isGenerated = true)
+          }
+          e -> ne
+        }
+      }.toMap
+    }
   }
 
   /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/PullOutNondeterministicSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/PullOutNondeterministicSuite.scala
new file mode 100644
index 0000000000000..72e10eadf79f3
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/PullOutNondeterministicSuite.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
+
+/**
+ * Test suite for moving non-deterministic expressions into Project.
+ */
+class PullOutNondeterministicSuite extends AnalysisTest {
+
+  private lazy val a = 'a.int
+  private lazy val b = 'b.int
+  private lazy val r = LocalRelation(a, b)
+  private lazy val rnd = Rand(10).as('_nondeterministic)
+  private lazy val rndref = rnd.toAttribute
+
+  test("no-op on filter") {
+    checkAnalysis(
+      r.where(Rand(10) > Literal(1.0)),
+      r.where(Rand(10) > Literal(1.0))
+    )
+  }
+
+  test("sort") {
+    checkAnalysis(
+      r.sortBy(SortOrder(Rand(10), Ascending)),
+      r.select(a, b, rnd).sortBy(SortOrder(rndref, Ascending)).select(a, b)
+    )
+  }
+
+  test("aggregate") {
+    checkAnalysis(
+      r.groupBy(Rand(10))(Rand(10).as("rnd")),
+      r.select(a, b, rnd).groupBy(rndref)(rndref.as("rnd"))
+    )
+  }
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
index 9c3a145f3aaa7..c64520ff93c83 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
@@ -137,10 +137,14 @@ GROUP BY position 3 is an aggregate function, and aggregate functions are not al
 -- !query 13
 select a, rand(0), sum(b) from data group by a, 2
 -- !query 13 schema
-struct<>
+struct<a:int,rand(0):double,sum(b):bigint>
 -- !query 13 output
-org.apache.spark.sql.AnalysisException
-nondeterministic expression rand(0) should not appear in grouping expression.;
+1	0.4048454303385226	2
+1	0.8446490682263027	1
+2	0.5871875724155838	1
+2	0.8865128837019473	2
+3	0.742083829230211	1
+3	0.9179913208300406	2
 
 
 -- !query 14

From 042e32d18ad10be5c60907959e55b0324df5b2c0 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 12 Jan 2017 20:53:31 +0800
Subject: [PATCH 0443/1204] [SPARK-19055][SQL][PYSPARK] Fix SparkSession
 initialization when SparkContext is stopped

## What changes were proposed in this pull request?

In SparkSession initialization, we store created the instance of SparkSession into a class variable _instantiatedContext. Next time we can use SparkSession.builder.getOrCreate() to retrieve the existing SparkSession instance.

However, when the active SparkContext is stopped and we create another new SparkContext to use, the existing SparkSession is still associated with the stopped SparkContext. So the operations with this existing SparkSession will be failed.

We need to detect such case in SparkSession and renew the class variable _instantiatedContext if needed.

## How was this patch tested?

New test added in PySpark.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #16454 from viirya/fix-pyspark-sparksession.

(cherry picked from commit c6c37b8af714c8ddc8c77ac943a379f703558f27)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 python/pyspark/sql/session.py | 16 ++++++++++------
 python/pyspark/sql/tests.py   | 23 +++++++++++++++++++++++
 2 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index 1e40b9c39fc4f..9f4772eec9f2a 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -161,8 +161,8 @@ def getOrCreate(self):
             with self._lock:
                 from pyspark.context import SparkContext
                 from pyspark.conf import SparkConf
-                session = SparkSession._instantiatedContext
-                if session is None:
+                session = SparkSession._instantiatedSession
+                if session is None or session._sc._jsc is None:
                     sparkConf = SparkConf()
                     for key, value in self._options.items():
                         sparkConf.set(key, value)
@@ -183,7 +183,7 @@ def getOrCreate(self):
 
     builder = Builder()
 
-    _instantiatedContext = None
+    _instantiatedSession = None
 
     @ignore_unicode_prefix
     def __init__(self, sparkContext, jsparkSession=None):
@@ -214,8 +214,12 @@ def __init__(self, sparkContext, jsparkSession=None):
         self._wrapped = SQLContext(self._sc, self, self._jwrapped)
         _monkey_patch_RDD(self)
         install_exception_handler()
-        if SparkSession._instantiatedContext is None:
-            SparkSession._instantiatedContext = self
+        # If we had an instantiated SparkSession attached with a SparkContext
+        # which is stopped now, we need to renew the instantiated SparkSession.
+        # Otherwise, we will use invalid SparkSession when we call Builder.getOrCreate.
+        if SparkSession._instantiatedSession is None \
+                or SparkSession._instantiatedSession._sc._jsc is None:
+            SparkSession._instantiatedSession = self
 
     @since(2.0)
     def newSession(self):
@@ -595,7 +599,7 @@ def stop(self):
         """Stop the underlying :class:`SparkContext`.
         """
         self._sc.stop()
-        SparkSession._instantiatedContext = None
+        SparkSession._instantiatedSession = None
 
     @since(2.0)
     def __enter__(self):
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 6de63e649325c..fe034bc0a4a76 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -46,6 +46,7 @@
 else:
     import unittest
 
+from pyspark import SparkContext
 from pyspark.sql import SparkSession, HiveContext, Column, Row
 from pyspark.sql.types import *
 from pyspark.sql.types import UserDefinedType, _infer_type
@@ -1877,6 +1878,28 @@ def test_hivecontext(self):
         self.assertTrue(os.path.exists(metastore_path))
 
 
+class SQLTests2(ReusedPySparkTestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        ReusedPySparkTestCase.setUpClass()
+        cls.spark = SparkSession(cls.sc)
+
+    @classmethod
+    def tearDownClass(cls):
+        ReusedPySparkTestCase.tearDownClass()
+        cls.spark.stop()
+
+    # We can't include this test into SQLTests because we will stop class's SparkContext and cause
+    # other tests failed.
+    def test_sparksession_with_stopped_sparkcontext(self):
+        self.sc.stop()
+        sc = SparkContext('local[4]', self.sc.appName)
+        spark = SparkSession.builder.getOrCreate()
+        df = spark.createDataFrame([(1, 2)], ["c", "c"])
+        df.collect()
+
+
 class HiveContextSQLTests(ReusedPySparkTestCase):
 
     @classmethod

From 23944d0d64a07d29e9bfcb8f8d6d22858ec02aef Mon Sep 17 00:00:00 2001
From: Takeshi YAMAMURO <linguin.m.s@gmail.com>
Date: Thu, 12 Jan 2017 09:46:53 -0800
Subject: [PATCH 0444/1204] [SPARK-17237][SQL] Remove backticks in a pivot
 result schema

## What changes were proposed in this pull request?
Pivoting adds backticks (e.g. 3_count(\`c\`)) in column names and, in some cases,
thes causes analysis exceptions  like;
```
scala> val df = Seq((2, 3, 4), (3, 4, 5)).toDF("a", "x", "y")
scala> df.groupBy("a").pivot("x").agg(count("y"), avg("y")).na.fill(0)
org.apache.spark.sql.AnalysisException: syntax error in attribute name: `3_count(`y`)`;
  at org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute$.e$1(unresolved.scala:134)
  at org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute$.parseAttributeName(unresolved.scala:144)
...
```
So, this pr proposes to remove these backticks from column names.

## How was this patch tested?
Added a test in `DataFrameAggregateSuite`.

Author: Takeshi YAMAMURO <linguin.m.s@gmail.com>

Closes #14812 from maropu/SPARK-17237.

(cherry picked from commit 5585ed93b09bc05cdd7a731650eca50d43d7159b)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 2 +-
 .../org/apache/spark/sql/DataFrameAggregateSuite.scala    | 8 ++++++++
 .../scala/org/apache/spark/sql/DataFramePivotSuite.scala  | 2 +-
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index ab9de023e2b30..f87399698d29d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -383,7 +383,7 @@ class Analyzer(
           } else {
             val suffix = aggregate match {
               case n: NamedExpression => n.name
-              case _ => aggregate.sql
+              case _ => toPrettySQL(aggregate)
             }
             value + "_" + suffix
           }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 7853b22fec0d9..e7079120bb7df 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -530,4 +530,12 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
       limit2Df.groupBy("id").count().select($"id"),
       limit2Df.select($"id"))
   }
+
+  test("SPARK-17237 remove backticks in a pivot result schema") {
+    val df = Seq((2, 3, 4), (3, 4, 5)).toDF("a", "x", "y")
+    checkAnswer(
+      df.groupBy("a").pivot("x").agg(count("y"), avg("y")).na.fill(0),
+      Seq(Row(3, 0, 0.0, 1, 5.0), Row(2, 1, 4.0, 0, 0.0))
+    )
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
index a8d854ccbc943..51ffe34172714 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
@@ -200,7 +200,7 @@ class DataFramePivotSuite extends QueryTest with SharedSQLContext{
 
   test("pivot preserves aliases if given") {
     assertResult(
-      Array("year", "dotNET_foo", "dotNET_avg(`earnings`)", "Java_foo", "Java_avg(`earnings`)")
+      Array("year", "dotNET_foo", "dotNET_avg(earnings)", "Java_foo", "Java_avg(earnings)")
     )(
       courseSales.groupBy($"year")
         .pivot("course", Seq("dotNET", "Java"))

From 0668e061beba683d026a2d48011ff74faf8a38ab Mon Sep 17 00:00:00 2001
From: Andrew Ash <andrew@andrewash.com>
Date: Thu, 12 Jan 2017 23:14:07 -0800
Subject: [PATCH 0445/1204] Fix missing close-parens for In filter's toString

Otherwise the open parentheses isn't closed in query plan descriptions of batch scans.

    PushedFilters: [In(COL_A, [1,2,4,6,10,16,219,815], IsNotNull(COL_B), ...

Author: Andrew Ash <andrew@andrewash.com>

Closes #16558 from ash211/patch-9.

(cherry picked from commit b040cef2ed0ed46c3dfb483a117200c9dac074ca)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../src/main/scala/org/apache/spark/sql/sources/filters.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala
index e0494dfd9343b..2499e9b604f3e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala
@@ -130,7 +130,7 @@ case class In(attribute: String, values: Array[Any]) extends Filter {
     case _ => false
   }
   override def toString: String = {
-    s"In($attribute, [${values.mkString(",")}]"
+    s"In($attribute, [${values.mkString(",")}])"
   }
 
   override def references: Array[String] = Array(attribute) ++ values.flatMap(findReferences)

From b2c9a2c8c8e8c38baa6d876c81d143af61328aa2 Mon Sep 17 00:00:00 2001
From: Vinayak <vijoshi5@in.ibm.com>
Date: Fri, 13 Jan 2017 18:35:12 +0800
Subject: [PATCH 0446/1204] [SPARK-18687][PYSPARK][SQL] Backward compatibility
 - creating a Dataframe on a new SQLContext object fails with a Derby error

Change is for SQLContext to reuse the active SparkSession during construction if the sparkContext supplied is the same as the currently active SparkContext. Without this change, a new SparkSession is instantiated that results in a Derby error when attempting to create a dataframe using a new SQLContext object even though the SparkContext supplied to the new SQLContext is same as the currently active one. Refer https://issues.apache.org/jira/browse/SPARK-18687 for details on the error and a repro.

Existing unit tests and a new unit test added to pyspark-sql:

/python/run-tests --python-executables=python --modules=pyspark-sql

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Vinayak <vijoshi5@in.ibm.com>
Author: Vinayak Joshi <vijoshi@users.noreply.github.com>

Closes #16119 from vijoshi/SPARK-18687_master.

(cherry picked from commit 285a7798e267311730b0163d37d726a81465468a)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 python/pyspark/sql/context.py | 2 +-
 python/pyspark/sql/tests.py   | 7 ++++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index de4c335ad2752..c22f4b87e1a78 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -73,7 +73,7 @@ def __init__(self, sparkContext, sparkSession=None, jsqlContext=None):
         self._jsc = self._sc._jsc
         self._jvm = self._sc._jvm
         if sparkSession is None:
-            sparkSession = SparkSession(sparkContext)
+            sparkSession = SparkSession.builder.getOrCreate()
         if jsqlContext is None:
             jsqlContext = sparkSession._jwrapped
         self.sparkSession = sparkSession
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index fe034bc0a4a76..20b9351ca8d68 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -47,7 +47,7 @@
     import unittest
 
 from pyspark import SparkContext
-from pyspark.sql import SparkSession, HiveContext, Column, Row
+from pyspark.sql import SparkSession, SQLContext, HiveContext, Column, Row
 from pyspark.sql.types import *
 from pyspark.sql.types import UserDefinedType, _infer_type
 from pyspark.tests import ReusedPySparkTestCase, SparkSubmitTests
@@ -206,6 +206,11 @@ def tearDownClass(cls):
         cls.spark.stop()
         shutil.rmtree(cls.tempdir.name, ignore_errors=True)
 
+    def test_sqlcontext_reuses_sparksession(self):
+        sqlContext1 = SQLContext(self.sc)
+        sqlContext2 = SQLContext(self.sc)
+        self.assertTrue(sqlContext1.sparkSession is sqlContext2.sparkSession)
+
     def test_row_should_be_read_only(self):
         row = Row(a=1, b=2)
         self.assertEqual(1, row.a)

From 2c2ca8943c4355af491ec19fe6d13949182260ab Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 12 Jan 2017 22:52:34 -0800
Subject: [PATCH 0447/1204] [SPARK-19178][SQL] convert string of large numbers
 to int should return null

## What changes were proposed in this pull request?

When we convert a string to integral, we will convert that string to `decimal(20, 0)` first, so that we can turn a string with decimal format to truncated integral, e.g. `CAST('1.2' AS int)` will return `1`.

However, this brings problems when we convert a string with large numbers to integral, e.g. `CAST('1234567890123' AS int)` will return `1912276171`, while Hive returns null as we expected.

This is a long standing bug(seems it was there the first day Spark SQL was created), this PR fixes this bug by adding the native support to convert `UTF8String` to integral.

## How was this patch tested?

new regression tests

Author: Wenchen Fan <wenchen@databricks.com>

Closes #16550 from cloud-fan/string-to-int.

(cherry picked from commit 6b34e745bb8bdcf5a8bb78359fa39bbe8c6563cc)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/unsafe/types/UTF8String.java | 184 ++++++++++++++++++
 .../sql/catalyst/analysis/TypeCoercion.scala  |  16 --
 .../spark/sql/catalyst/expressions/Cast.scala |  18 +-
 .../test/resources/sql-tests/inputs/cast.sql  |  43 ++++
 .../resources/sql-tests/results/cast.sql.out  | 178 +++++++++++++++++
 5 files changed, 414 insertions(+), 25 deletions(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/cast.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/results/cast.sql.out

diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index e09a6b7d93a93..b03e7182b16ab 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -816,6 +816,190 @@ public UTF8String translate(Map<Character, Character> dict) {
     return fromString(sb.toString());
   }
 
+  private int getDigit(byte b) {
+    if (b >= '0' && b <= '9') {
+      return b - '0';
+    }
+    throw new NumberFormatException(toString());
+  }
+
+  /**
+   * Parses this UTF8String to long.
+   *
+   * Note that, in this method we accumulate the result in negative format, and convert it to
+   * positive format at the end, if this string is not started with '-'. This is because min value
+   * is bigger than max value in digits, e.g. Integer.MAX_VALUE is '2147483647' and
+   * Integer.MIN_VALUE is '-2147483648'.
+   *
+   * This code is mostly copied from LazyLong.parseLong in Hive.
+   */
+  public long toLong() {
+    if (numBytes == 0) {
+      throw new NumberFormatException("Empty string");
+    }
+
+    byte b = getByte(0);
+    final boolean negative = b == '-';
+    int offset = 0;
+    if (negative || b == '+') {
+      offset++;
+      if (numBytes == 1) {
+        throw new NumberFormatException(toString());
+      }
+    }
+
+    final byte separator = '.';
+    final int radix = 10;
+    final long stopValue = Long.MIN_VALUE / radix;
+    long result = 0;
+
+    while (offset < numBytes) {
+      b = getByte(offset);
+      offset++;
+      if (b == separator) {
+        // We allow decimals and will return a truncated integral in that case.
+        // Therefore we won't throw an exception here (checking the fractional
+        // part happens below.)
+        break;
+      }
+
+      int digit = getDigit(b);
+      // We are going to process the new digit and accumulate the result. However, before doing
+      // this, if the result is already smaller than the stopValue(Long.MIN_VALUE / radix), then
+      // result * 10 will definitely be smaller than minValue, and we can stop and throw exception.
+      if (result < stopValue) {
+        throw new NumberFormatException(toString());
+      }
+
+      result = result * radix - digit;
+      // Since the previous result is less than or equal to stopValue(Long.MIN_VALUE / radix), we
+      // can just use `result > 0` to check overflow. If result overflows, we should stop and throw
+      // exception.
+      if (result > 0) {
+        throw new NumberFormatException(toString());
+      }
+    }
+
+    // This is the case when we've encountered a decimal separator. The fractional
+    // part will not change the number, but we will verify that the fractional part
+    // is well formed.
+    while (offset < numBytes) {
+      if (getDigit(getByte(offset)) == -1) {
+        throw new NumberFormatException(toString());
+      }
+      offset++;
+    }
+
+    if (!negative) {
+      result = -result;
+      if (result < 0) {
+        throw new NumberFormatException(toString());
+      }
+    }
+
+    return result;
+  }
+
+  /**
+   * Parses this UTF8String to int.
+   *
+   * Note that, in this method we accumulate the result in negative format, and convert it to
+   * positive format at the end, if this string is not started with '-'. This is because min value
+   * is bigger than max value in digits, e.g. Integer.MAX_VALUE is '2147483647' and
+   * Integer.MIN_VALUE is '-2147483648'.
+   *
+   * This code is mostly copied from LazyInt.parseInt in Hive.
+   *
+   * Note that, this method is almost same as `toLong`, but we leave it duplicated for performance
+   * reasons, like Hive does.
+   */
+  public int toInt() {
+    if (numBytes == 0) {
+      throw new NumberFormatException("Empty string");
+    }
+
+    byte b = getByte(0);
+    final boolean negative = b == '-';
+    int offset = 0;
+    if (negative || b == '+') {
+      offset++;
+      if (numBytes == 1) {
+        throw new NumberFormatException(toString());
+      }
+    }
+
+    final byte separator = '.';
+    final int radix = 10;
+    final int stopValue = Integer.MIN_VALUE / radix;
+    int result = 0;
+
+    while (offset < numBytes) {
+      b = getByte(offset);
+      offset++;
+      if (b == separator) {
+        // We allow decimals and will return a truncated integral in that case.
+        // Therefore we won't throw an exception here (checking the fractional
+        // part happens below.)
+        break;
+      }
+
+      int digit = getDigit(b);
+      // We are going to process the new digit and accumulate the result. However, before doing
+      // this, if the result is already smaller than the stopValue(Integer.MIN_VALUE / radix), then
+      // result * 10 will definitely be smaller than minValue, and we can stop and throw exception.
+      if (result < stopValue) {
+        throw new NumberFormatException(toString());
+      }
+
+      result = result * radix - digit;
+      // Since the previous result is less than or equal to stopValue(Integer.MIN_VALUE / radix),
+      // we can just use `result > 0` to check overflow. If result overflows, we should stop and
+      // throw exception.
+      if (result > 0) {
+        throw new NumberFormatException(toString());
+      }
+    }
+
+    // This is the case when we've encountered a decimal separator. The fractional
+    // part will not change the number, but we will verify that the fractional part
+    // is well formed.
+    while (offset < numBytes) {
+      if (getDigit(getByte(offset)) == -1) {
+        throw new NumberFormatException(toString());
+      }
+      offset++;
+    }
+
+    if (!negative) {
+      result = -result;
+      if (result < 0) {
+        throw new NumberFormatException(toString());
+      }
+    }
+
+    return result;
+  }
+
+  public short toShort() {
+    int intValue = toInt();
+    short result = (short) intValue;
+    if (result != intValue) {
+      throw new NumberFormatException(toString());
+    }
+
+    return result;
+  }
+
+  public byte toByte() {
+    int intValue = toInt();
+    byte result = (byte) intValue;
+    if (result != intValue) {
+      throw new NumberFormatException(toString());
+    }
+
+    return result;
+  }
+
   @Override
   public String toString() {
     return new String(getBytes(), StandardCharsets.UTF_8);
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 6662a9e974fc2..6d9799fb70c75 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -51,7 +51,6 @@ object TypeCoercion {
       PromoteStrings ::
       DecimalPrecision ::
       BooleanEquality ::
-      StringToIntegralCasts ::
       FunctionArgumentConversion ::
       CaseWhenCoercion ::
       IfCoercion ::
@@ -428,21 +427,6 @@ object TypeCoercion {
     }
   }
 
-  /**
-   * When encountering a cast from a string representing a valid fractional number to an integral
-   * type the jvm will throw a `java.lang.NumberFormatException`.  Hive, in contrast, returns the
-   * truncated version of this number.
-   */
-  object StringToIntegralCasts extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions {
-      // Skip nodes who's children have not been resolved yet.
-      case e if !e.childrenResolved => e
-
-      case Cast(e @ StringType(), t: IntegralType) =>
-        Cast(Cast(e, DecimalType.forType(LongType)), t)
-    }
-  }
-
   /**
    * This ensure that the types for various functions are as expected.
    */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 4db1ae6faa159..f15ae3255ca98 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -247,7 +247,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
   // LongConverter
   private[this] def castToLong(from: DataType): Any => Any = from match {
     case StringType =>
-      buildCast[UTF8String](_, s => try s.toString.toLong catch {
+      buildCast[UTF8String](_, s => try s.toLong catch {
         case _: NumberFormatException => null
       })
     case BooleanType =>
@@ -263,7 +263,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
   // IntConverter
   private[this] def castToInt(from: DataType): Any => Any = from match {
     case StringType =>
-      buildCast[UTF8String](_, s => try s.toString.toInt catch {
+      buildCast[UTF8String](_, s => try s.toInt catch {
         case _: NumberFormatException => null
       })
     case BooleanType =>
@@ -279,7 +279,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
   // ShortConverter
   private[this] def castToShort(from: DataType): Any => Any = from match {
     case StringType =>
-      buildCast[UTF8String](_, s => try s.toString.toShort catch {
+      buildCast[UTF8String](_, s => try s.toShort catch {
         case _: NumberFormatException => null
       })
     case BooleanType =>
@@ -295,7 +295,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
   // ByteConverter
   private[this] def castToByte(from: DataType): Any => Any = from match {
     case StringType =>
-      buildCast[UTF8String](_, s => try s.toString.toByte catch {
+      buildCast[UTF8String](_, s => try s.toByte catch {
         case _: NumberFormatException => null
       })
     case BooleanType =>
@@ -498,7 +498,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
     s"""
       boolean $resultNull = $childNull;
       ${ctx.javaType(resultType)} $resultPrim = ${ctx.defaultValue(resultType)};
-      if (!${childNull}) {
+      if (!$childNull) {
         ${cast(childPrim, resultPrim, resultNull)}
       }
     """
@@ -705,7 +705,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
       (c, evPrim, evNull) =>
         s"""
           try {
-            $evPrim = Byte.valueOf($c.toString());
+            $evPrim = $c.toByte();
           } catch (java.lang.NumberFormatException e) {
             $evNull = true;
           }
@@ -727,7 +727,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
       (c, evPrim, evNull) =>
         s"""
           try {
-            $evPrim = Short.valueOf($c.toString());
+            $evPrim = $c.toShort();
           } catch (java.lang.NumberFormatException e) {
             $evNull = true;
           }
@@ -749,7 +749,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
       (c, evPrim, evNull) =>
         s"""
           try {
-            $evPrim = Integer.valueOf($c.toString());
+            $evPrim = $c.toInt();
           } catch (java.lang.NumberFormatException e) {
             $evNull = true;
           }
@@ -771,7 +771,7 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
       (c, evPrim, evNull) =>
         s"""
           try {
-            $evPrim = Long.valueOf($c.toString());
+            $evPrim = $c.toLong();
           } catch (java.lang.NumberFormatException e) {
             $evNull = true;
           }
diff --git a/sql/core/src/test/resources/sql-tests/inputs/cast.sql b/sql/core/src/test/resources/sql-tests/inputs/cast.sql
new file mode 100644
index 0000000000000..5fae571945e41
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/cast.sql
@@ -0,0 +1,43 @@
+-- cast string representing a valid fractional number to integral should truncate the number
+SELECT CAST('1.23' AS int);
+SELECT CAST('1.23' AS long);
+SELECT CAST('-4.56' AS int);
+SELECT CAST('-4.56' AS long);
+
+-- cast string which are not numbers to integral should return null
+SELECT CAST('abc' AS int);
+SELECT CAST('abc' AS long);
+
+-- cast string representing a very large number to integral should return null
+SELECT CAST('1234567890123' AS int);
+SELECT CAST('12345678901234567890123' AS long);
+
+-- cast empty string to integral should return null
+SELECT CAST('' AS int);
+SELECT CAST('' AS long);
+
+-- cast null to integral should return null
+SELECT CAST(NULL AS int);
+SELECT CAST(NULL AS long);
+
+-- cast invalid decimal string to integral should return null
+SELECT CAST('123.a' AS int);
+SELECT CAST('123.a' AS long);
+
+-- '-2147483648' is the smallest int value
+SELECT CAST('-2147483648' AS int);
+SELECT CAST('-2147483649' AS int);
+
+-- '2147483647' is the largest int value
+SELECT CAST('2147483647' AS int);
+SELECT CAST('2147483648' AS int);
+
+-- '-9223372036854775808' is the smallest long value
+SELECT CAST('-9223372036854775808' AS long);
+SELECT CAST('-9223372036854775809' AS long);
+
+-- '9223372036854775807' is the largest long value
+SELECT CAST('9223372036854775807' AS long);
+SELECT CAST('9223372036854775808' AS long);
+
+-- TODO: migrate all cast tests here.
diff --git a/sql/core/src/test/resources/sql-tests/results/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/cast.sql.out
new file mode 100644
index 0000000000000..bfa29d7d2d597
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/cast.sql.out
@@ -0,0 +1,178 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 22
+
+
+-- !query 0
+SELECT CAST('1.23' AS int)
+-- !query 0 schema
+struct<CAST(1.23 AS INT):int>
+-- !query 0 output
+1
+
+
+-- !query 1
+SELECT CAST('1.23' AS long)
+-- !query 1 schema
+struct<CAST(1.23 AS BIGINT):bigint>
+-- !query 1 output
+1
+
+
+-- !query 2
+SELECT CAST('-4.56' AS int)
+-- !query 2 schema
+struct<CAST(-4.56 AS INT):int>
+-- !query 2 output
+-4
+
+
+-- !query 3
+SELECT CAST('-4.56' AS long)
+-- !query 3 schema
+struct<CAST(-4.56 AS BIGINT):bigint>
+-- !query 3 output
+-4
+
+
+-- !query 4
+SELECT CAST('abc' AS int)
+-- !query 4 schema
+struct<CAST(abc AS INT):int>
+-- !query 4 output
+NULL
+
+
+-- !query 5
+SELECT CAST('abc' AS long)
+-- !query 5 schema
+struct<CAST(abc AS BIGINT):bigint>
+-- !query 5 output
+NULL
+
+
+-- !query 6
+SELECT CAST('1234567890123' AS int)
+-- !query 6 schema
+struct<CAST(1234567890123 AS INT):int>
+-- !query 6 output
+NULL
+
+
+-- !query 7
+SELECT CAST('12345678901234567890123' AS long)
+-- !query 7 schema
+struct<CAST(12345678901234567890123 AS BIGINT):bigint>
+-- !query 7 output
+NULL
+
+
+-- !query 8
+SELECT CAST('' AS int)
+-- !query 8 schema
+struct<CAST( AS INT):int>
+-- !query 8 output
+NULL
+
+
+-- !query 9
+SELECT CAST('' AS long)
+-- !query 9 schema
+struct<CAST( AS BIGINT):bigint>
+-- !query 9 output
+NULL
+
+
+-- !query 10
+SELECT CAST(NULL AS int)
+-- !query 10 schema
+struct<CAST(NULL AS INT):int>
+-- !query 10 output
+NULL
+
+
+-- !query 11
+SELECT CAST(NULL AS long)
+-- !query 11 schema
+struct<CAST(NULL AS BIGINT):bigint>
+-- !query 11 output
+NULL
+
+
+-- !query 12
+SELECT CAST('123.a' AS int)
+-- !query 12 schema
+struct<CAST(123.a AS INT):int>
+-- !query 12 output
+NULL
+
+
+-- !query 13
+SELECT CAST('123.a' AS long)
+-- !query 13 schema
+struct<CAST(123.a AS BIGINT):bigint>
+-- !query 13 output
+NULL
+
+
+-- !query 14
+SELECT CAST('-2147483648' AS int)
+-- !query 14 schema
+struct<CAST(-2147483648 AS INT):int>
+-- !query 14 output
+-2147483648
+
+
+-- !query 15
+SELECT CAST('-2147483649' AS int)
+-- !query 15 schema
+struct<CAST(-2147483649 AS INT):int>
+-- !query 15 output
+NULL
+
+
+-- !query 16
+SELECT CAST('2147483647' AS int)
+-- !query 16 schema
+struct<CAST(2147483647 AS INT):int>
+-- !query 16 output
+2147483647
+
+
+-- !query 17
+SELECT CAST('2147483648' AS int)
+-- !query 17 schema
+struct<CAST(2147483648 AS INT):int>
+-- !query 17 output
+NULL
+
+
+-- !query 18
+SELECT CAST('-9223372036854775808' AS long)
+-- !query 18 schema
+struct<CAST(-9223372036854775808 AS BIGINT):bigint>
+-- !query 18 output
+-9223372036854775808
+
+
+-- !query 19
+SELECT CAST('-9223372036854775809' AS long)
+-- !query 19 schema
+struct<CAST(-9223372036854775809 AS BIGINT):bigint>
+-- !query 19 output
+NULL
+
+
+-- !query 20
+SELECT CAST('9223372036854775807' AS long)
+-- !query 20 schema
+struct<CAST(9223372036854775807 AS BIGINT):bigint>
+-- !query 20 output
+9223372036854775807
+
+
+-- !query 21
+SELECT CAST('9223372036854775808' AS long)
+-- !query 21 schema
+struct<CAST(9223372036854775808 AS BIGINT):bigint>
+-- !query 21 output
+NULL

From ee3642f5182f199aac15b69d1a6a1167f75e5c65 Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Fri, 13 Jan 2017 10:08:14 -0800
Subject: [PATCH 0448/1204] [SPARK-18335][SPARKR] createDataFrame to support
 numPartitions parameter

## What changes were proposed in this pull request?

To allow specifying number of partitions when the DataFrame is created

## How was this patch tested?

manual, unit tests

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #16512 from felixcheung/rnumpart.

(cherry picked from commit b0e8eb6d3e9e80fa62625a5b9382d93af77250db)
Signed-off-by: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
---
 R/pkg/R/SQLContext.R                      | 20 ++++++++----
 R/pkg/R/context.R                         | 39 ++++++++++++++++++++---
 R/pkg/inst/tests/testthat/test_rdd.R      |  4 +--
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 23 ++++++++++++-
 4 files changed, 72 insertions(+), 14 deletions(-)

diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 6f48cd66396ea..e771a057e2444 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -184,8 +184,11 @@ getDefaultSqlSource <- function() {
 #'
 #' Converts R data.frame or list into SparkDataFrame.
 #'
-#' @param data an RDD or list or data.frame.
+#' @param data a list or data.frame.
 #' @param schema a list of column names or named list (StructType), optional.
+#' @param samplingRatio Currently not used.
+#' @param numPartitions the number of partitions of the SparkDataFrame. Defaults to 1, this is
+#'        limited by length of the list or number of rows of the data.frame
 #' @return A SparkDataFrame.
 #' @rdname createDataFrame
 #' @export
@@ -195,12 +198,14 @@ getDefaultSqlSource <- function() {
 #' df1 <- as.DataFrame(iris)
 #' df2 <- as.DataFrame(list(3,4,5,6))
 #' df3 <- createDataFrame(iris)
+#' df4 <- createDataFrame(cars, numPartitions = 2)
 #' }
 #' @name createDataFrame
 #' @method createDataFrame default
 #' @note createDataFrame since 1.4.0
 # TODO(davies): support sampling and infer type from NA
-createDataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) {
+createDataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0,
+                                    numPartitions = NULL) {
   sparkSession <- getSparkSession()
 
   if (is.data.frame(data)) {
@@ -233,7 +238,11 @@ createDataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) {
 
   if (is.list(data)) {
     sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
-    rdd <- parallelize(sc, data)
+    if (!is.null(numPartitions)) {
+      rdd <- parallelize(sc, data, numSlices = numToInt(numPartitions))
+    } else {
+      rdd <- parallelize(sc, data, numSlices = 1)
+    }
   } else if (inherits(data, "RDD")) {
     rdd <- data
   } else {
@@ -283,14 +292,13 @@ createDataFrame <- function(x, ...) {
   dispatchFunc("createDataFrame(data, schema = NULL)", x, ...)
 }
 
-#' @param samplingRatio Currently not used.
 #' @rdname createDataFrame
 #' @aliases createDataFrame
 #' @export
 #' @method as.DataFrame default
 #' @note as.DataFrame since 1.6.0
-as.DataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) {
-  createDataFrame(data, schema)
+as.DataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0, numPartitions = NULL) {
+  createDataFrame(data, schema, samplingRatio, numPartitions)
 }
 
 #' @param ... additional argument(s).
diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
index 1138caf98ed8a..1a0dd65f450b9 100644
--- a/R/pkg/R/context.R
+++ b/R/pkg/R/context.R
@@ -91,6 +91,16 @@ objectFile <- function(sc, path, minPartitions = NULL) {
 #' will write it to disk and send the file name to JVM. Also to make sure each slice is not
 #' larger than that limit, number of slices may be increased.
 #'
+#' In 2.2.0 we are changing how the numSlices are used/computed to handle
+#' 1 < (length(coll) / numSlices) << length(coll) better, and to get the exact number of slices.
+#' This change affects both createDataFrame and spark.lapply.
+#' In the specific one case that it is used to convert R native object into SparkDataFrame, it has
+#' always been kept at the default of 1. In the case the object is large, we are explicitly setting
+#' the parallism to numSlices (which is still 1).
+#'
+#' Specifically, we are changing to split positions to match the calculation in positions() of
+#' ParallelCollectionRDD in Spark.
+#'
 #' @param sc SparkContext to use
 #' @param coll collection to parallelize
 #' @param numSlices number of partitions to create in the RDD
@@ -107,6 +117,8 @@ parallelize <- function(sc, coll, numSlices = 1) {
   # TODO: bound/safeguard numSlices
   # TODO: unit tests for if the split works for all primitives
   # TODO: support matrix, data frame, etc
+
+  # Note, for data.frame, createDataFrame turns it into a list before it calls here.
   # nolint start
   # suppress lintr warning: Place a space before left parenthesis, except in a function call.
   if ((!is.list(coll) && !is.vector(coll)) || is.data.frame(coll)) {
@@ -128,12 +140,29 @@ parallelize <- function(sc, coll, numSlices = 1) {
   objectSize <- object.size(coll)
 
   # For large objects we make sure the size of each slice is also smaller than sizeLimit
-  numSlices <- max(numSlices, ceiling(objectSize / sizeLimit))
-  if (numSlices > length(coll))
-    numSlices <- length(coll)
+  numSerializedSlices <- max(numSlices, ceiling(objectSize / sizeLimit))
+  if (numSerializedSlices > length(coll))
+    numSerializedSlices <- length(coll)
+
+  # Generate the slice ids to put each row
+  # For instance, for numSerializedSlices of 22, length of 50
+  #  [1]  0  0  2  2  4  4  6  6  6  9  9 11 11 13 13 15 15 15 18 18 20 20 22 22 22
+  # [26] 25 25 27 27 29 29 31 31 31 34 34 36 36 38 38 40 40 40 43 43 45 45 47 47 47
+  # Notice the slice group with 3 slices (ie. 6, 15, 22) are roughly evenly spaced.
+  # We are trying to reimplement the calculation in the positions method in ParallelCollectionRDD
+  splits <- if (numSerializedSlices > 0) {
+    unlist(lapply(0: (numSerializedSlices - 1), function(x) {
+      # nolint start
+      start <- trunc((x * length(coll)) / numSerializedSlices)
+      end <- trunc(((x + 1) * length(coll)) / numSerializedSlices)
+      # nolint end
+      rep(start, end - start)
+    }))
+  } else {
+    1
+  }
 
-  sliceLen <- ceiling(length(coll) / numSlices)
-  slices <- split(coll, rep(1: (numSlices + 1), each = sliceLen)[1:length(coll)])
+  slices <- split(coll, splits)
 
   # Serialize each slice: obtain a list of raws, or a list of lists (slices) of
   # 2-tuples of raws
diff --git a/R/pkg/inst/tests/testthat/test_rdd.R b/R/pkg/inst/tests/testthat/test_rdd.R
index a3d66c245a7d1..2c41a6b075b47 100644
--- a/R/pkg/inst/tests/testthat/test_rdd.R
+++ b/R/pkg/inst/tests/testthat/test_rdd.R
@@ -381,8 +381,8 @@ test_that("aggregateRDD() on RDDs", {
 test_that("zipWithUniqueId() on RDDs", {
   rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 3L)
   actual <- collectRDD(zipWithUniqueId(rdd))
-  expected <- list(list("a", 0), list("b", 3), list("c", 1),
-                   list("d", 4), list("e", 2))
+  expected <- list(list("a", 0), list("b", 1), list("c", 4),
+                   list("d", 2), list("e", 5))
   expect_equal(actual, expected)
 
   rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 1L)
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 0be924f8ba432..7f27ba63a8d19 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -196,6 +196,26 @@ test_that("create DataFrame from RDD", {
   expect_equal(dtypes(df), list(c("name", "string"), c("age", "int"), c("height", "float")))
   expect_equal(as.list(collect(where(df, df$name == "John"))),
                list(name = "John", age = 19L, height = 176.5))
+  expect_equal(getNumPartitions(toRDD(df)), 1)
+
+  df <- as.DataFrame(cars, numPartitions = 2)
+  expect_equal(getNumPartitions(toRDD(df)), 2)
+  df <- createDataFrame(cars, numPartitions = 3)
+  expect_equal(getNumPartitions(toRDD(df)), 3)
+  # validate limit by num of rows
+  df <- createDataFrame(cars, numPartitions = 60)
+  expect_equal(getNumPartitions(toRDD(df)), 50)
+  # validate when 1 < (length(coll) / numSlices) << length(coll)
+  df <- createDataFrame(cars, numPartitions = 20)
+  expect_equal(getNumPartitions(toRDD(df)), 20)
+
+  df <- as.DataFrame(data.frame(0))
+  expect_is(df, "SparkDataFrame")
+  df <- createDataFrame(list(list(1)))
+  expect_is(df, "SparkDataFrame")
+  df <- as.DataFrame(data.frame(0), numPartitions = 2)
+  # no data to partition, goes to 1
+  expect_equal(getNumPartitions(toRDD(df)), 1)
 
   setHiveContext(sc)
   sql("CREATE TABLE people (name string, age double, height float)")
@@ -213,7 +233,8 @@ test_that("createDataFrame uses files for large objects", {
   # To simulate a large file scenario, we set spark.r.maxAllocationLimit to a smaller value
   conf <- callJMethod(sparkSession, "conf")
   callJMethod(conf, "set", "spark.r.maxAllocationLimit", "100")
-  df <- suppressWarnings(createDataFrame(iris))
+  df <- suppressWarnings(createDataFrame(iris, numPartitions = 3))
+  expect_equal(getNumPartitions(toRDD(df)), 3)
 
   # Resetting the conf back to default value
   callJMethod(conf, "set", "spark.r.maxAllocationLimit", toString(.Machine$integer.max / 10))

From 5e9be1e1f05936da48aa2977f78144f26b2dd266 Mon Sep 17 00:00:00 2001
From: Yucai Yu <yucai.yu@intel.com>
Date: Fri, 13 Jan 2017 13:40:53 -0800
Subject: [PATCH 0449/1204] [SPARK-19180] [SQL] the offset of short should be 2
 in OffHeapColumn

## What changes were proposed in this pull request?

the offset of short is 4 in OffHeapColumnVector's putShorts, but actually it should be 2.

## How was this patch tested?

unit test

Author: Yucai Yu <yucai.yu@intel.com>

Closes #16555 from yucai/offheap_short.

(cherry picked from commit ad0dadaa251b031a480fc2080f792a54ed7dfc5f)
Signed-off-by: Davies Liu <davies.liu@gmail.com>
---
 .../vectorized/OffHeapColumnVector.java       |  2 +-
 .../vectorized/ColumnarBatchSuite.scala       | 63 +++++++++++++++++++
 2 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
index 12fa109cec823..e988c0722bd72 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
@@ -177,7 +177,7 @@ public void putShort(int rowId, short value) {
   @Override
   public void putShorts(int rowId, int count, short value) {
     long offset = data + 2 * rowId;
-    for (int i = 0; i < count; ++i, offset += 4) {
+    for (int i = 0; i < count; ++i, offset += 2) {
       Platform.putShort(null, offset, value);
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
index e3943f31a48ba..8184d7d909f4b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
@@ -119,6 +119,69 @@ class ColumnarBatchSuite extends SparkFunSuite {
     }}
   }
 
+  test("Short Apis") {
+    (MemoryMode.ON_HEAP :: MemoryMode.OFF_HEAP :: Nil).foreach { memMode => {
+      val seed = System.currentTimeMillis()
+      val random = new Random(seed)
+      val reference = mutable.ArrayBuffer.empty[Short]
+
+      val column = ColumnVector.allocate(1024, ShortType, memMode)
+      var idx = 0
+
+      val values = (1 :: 2 :: 3 :: 4 :: 5 :: Nil).map(_.toShort).toArray
+      column.putShorts(idx, 2, values, 0)
+      reference += 1
+      reference += 2
+      idx += 2
+
+      column.putShorts(idx, 3, values, 2)
+      reference += 3
+      reference += 4
+      reference += 5
+      idx += 3
+
+      column.putShort(idx, 9)
+      reference += 9
+      idx += 1
+
+      column.putShorts(idx, 3, 4)
+      reference += 4
+      reference += 4
+      reference += 4
+      idx += 3
+
+      while (idx < column.capacity) {
+        val single = random.nextBoolean()
+        if (single) {
+          val v = random.nextInt().toShort
+          column.putShort(idx, v)
+          reference += v
+          idx += 1
+        } else {
+          val n = math.min(random.nextInt(column.capacity / 20), column.capacity - idx)
+          val v = (n + 1).toShort
+          column.putShorts(idx, n, v)
+          var i = 0
+          while (i < n) {
+            reference += v
+            i += 1
+          }
+          idx += n
+        }
+      }
+
+      reference.zipWithIndex.foreach { v =>
+        assert(v._1 == column.getShort(v._2), "Seed = " + seed + " Mem Mode=" + memMode)
+        if (memMode == MemoryMode.OFF_HEAP) {
+          val addr = column.valuesNativeAddress()
+          assert(v._1 == Platform.getShort(null, addr + 2 * v._2))
+        }
+      }
+
+      column.close
+    }}
+  }
+
   test("Int Apis") {
     (MemoryMode.ON_HEAP :: MemoryMode.OFF_HEAP :: Nil).foreach { memMode => {
       val seed = System.currentTimeMillis()

From db37049da6d2fb743a16ba0ea3fec5dbce46e30c Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Sun, 15 Jan 2017 20:40:44 +0800
Subject: [PATCH 0450/1204] [SPARK-19120] Refresh Metadata Cache After Loading
 Hive Tables

```Scala
        sql("CREATE TABLE tab (a STRING) STORED AS PARQUET")

        // This table fetch is to fill the cache with zero leaf files
        spark.table("tab").show()

        sql(
          s"""
             |LOAD DATA LOCAL INPATH '$newPartitionDir' OVERWRITE
             |INTO TABLE tab
           """.stripMargin)

        spark.table("tab").show()
```

In the above example, the returned result is empty after table loading. The metadata cache could be out of dated after loading new data into the table, because loading/inserting does not update the cache. So far, the metadata cache is only used for data source tables. Thus, for Hive serde tables, only `parquet` and `orc` formats are facing such issues, because the Hive serde tables in the format of  parquet/orc could be converted to data source tables when `spark.sql.hive.convertMetastoreParquet`/`spark.sql.hive.convertMetastoreOrc` is on.

This PR is to refresh the metadata cache after processing the `LOAD DATA` command.

In addition, Spark SQL does not convert **partitioned** Hive tables (orc/parquet) to data source tables in the write path, but the read path is using the metadata cache for both **partitioned** and non-partitioned Hive tables (orc/parquet). That means, writing the partitioned parquet/orc tables still use `InsertIntoHiveTable`, instead of `InsertIntoHadoopFsRelationCommand`. To avoid reading the out-of-dated cache, `InsertIntoHiveTable` needs to refresh the metadata cache for partitioned tables. Note, it does not need to refresh the cache for non-partitioned parquet/orc tables, because it does not call `InsertIntoHiveTable` at all. Based on the comments, this PR will keep the existing logics unchanged. That means, we always refresh the table no matter whether the table is partitioned or not.

Added test cases in parquetSuites.scala

Author: gatorsmile <gatorsmile@gmail.com>

Closes #16500 from gatorsmile/refreshInsertIntoHiveTable.

(cherry picked from commit de62ddf7ff42bdc383da127e6b1155897565354c)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/command/tables.scala  |  4 +
 .../spark/sql/hive/HiveMetastoreCatalog.scala | 10 +--
 .../apache/spark/sql/hive/parquetSuites.scala | 75 ++++++++++++++++---
 3 files changed, 75 insertions(+), 14 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 5d507759d6a38..c0f96251316bb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -305,6 +305,10 @@ case class LoadDataCommand(
         isOverwrite,
         holdDDLTime = false)
     }
+
+    // Refresh the metadata cache to ensure the data visible to the users
+    catalog.refreshTable(targetTable.identifier)
+
     Seq.empty[Row]
   }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 0407cf6a1edbc..82e519c994afc 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -233,13 +233,13 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
 
         val logicalRelation = cached.getOrElse {
           val sizeInBytes = metastoreRelation.statistics.sizeInBytes.toLong
-          val fileCatalog = {
-            val catalog = new CatalogFileIndex(
+          val fileIndex = {
+            val index = new CatalogFileIndex(
               sparkSession, metastoreRelation.catalogTable, sizeInBytes)
             if (lazyPruningEnabled) {
-              catalog
+              index
             } else {
-              catalog.filterPartitions(Nil)  // materialize all the partitions in memory
+              index.filterPartitions(Nil)  // materialize all the partitions in memory
             }
           }
           val partitionSchemaColumnNames = partitionSchema.map(_.name.toLowerCase).toSet
@@ -248,7 +248,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
               .filterNot(field => partitionSchemaColumnNames.contains(field.name.toLowerCase)))
 
           val relation = HadoopFsRelation(
-            location = fileCatalog,
+            location = fileIndex,
             partitionSchema = partitionSchema,
             dataSchema = dataSchema,
             bucketSpec = bucketSpec,
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
index 2ce60fe58921d..e8b81109e2a90 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
@@ -23,8 +23,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.execution.DataSourceScanExec
 import org.apache.spark.sql.execution.command.ExecutedCommandExec
-import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, InsertIntoDataSourceCommand, InsertIntoHadoopFsRelationCommand, LogicalRelation}
-import org.apache.spark.sql.execution.datasources.parquet.ParquetOptions
+import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.hive.execution.HiveTableScanExec
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
@@ -187,7 +186,8 @@ class ParquetMetastoreSuite extends ParquetPartitioningTest {
       "normal_parquet",
       "jt",
       "jt_array",
-       "test_parquet")
+      "test_parquet")
+    super.afterAll()
   }
 
   test(s"conversion is working") {
@@ -575,30 +575,30 @@ class ParquetMetastoreSuite extends ParquetPartitioningTest {
 
         checkAnswer(
           sql("SELECT * FROM test_added_partitions"),
-          Seq(("foo", 0), ("bar", 0)).toDF("a", "b"))
+          Seq(Row("foo", 0), Row("bar", 0)))
 
         // Create partition without data files and check whether it can be read
         sql(s"ALTER TABLE test_added_partitions ADD PARTITION (b='1') LOCATION '$partitionDir'")
         checkAnswer(
           sql("SELECT * FROM test_added_partitions"),
-          Seq(("foo", 0), ("bar", 0)).toDF("a", "b"))
+          Seq(Row("foo", 0), Row("bar", 0)))
 
         // Add data files to partition directory and check whether they can be read
         sql("INSERT INTO TABLE test_added_partitions PARTITION (b=1) select 'baz' as a")
         checkAnswer(
           sql("SELECT * FROM test_added_partitions"),
-          Seq(("foo", 0), ("bar", 0), ("baz", 1)).toDF("a", "b"))
+          Seq(Row("foo", 0), Row("bar", 0), Row("baz", 1)))
 
         // Check it with pruning predicates
         checkAnswer(
           sql("SELECT * FROM test_added_partitions where b = 0"),
-          Seq(("foo", 0), ("bar", 0)).toDF("a", "b"))
+          Seq(Row("foo", 0), Row("bar", 0)))
         checkAnswer(
           sql("SELECT * FROM test_added_partitions where b = 1"),
-          Seq(("baz", 1)).toDF("a", "b"))
+          Seq(Row("baz", 1)))
         checkAnswer(
           sql("SELECT * FROM test_added_partitions where b = 2"),
-          Seq[(String, Int)]().toDF("a", "b"))
+          Seq.empty)
 
         // Also verify the inputFiles implementation
         assert(sql("select * from test_added_partitions").inputFiles.length == 2)
@@ -609,6 +609,63 @@ class ParquetMetastoreSuite extends ParquetPartitioningTest {
     }
   }
 
+  test("Explicitly added partitions should be readable after load") {
+    withTable("test_added_partitions") {
+      withTempDir { src =>
+        val newPartitionDir = src.getCanonicalPath
+        spark.range(2).selectExpr("cast(id as string)").toDF("a").write
+          .mode("overwrite")
+          .parquet(newPartitionDir)
+
+        sql(
+          """
+            |CREATE TABLE test_added_partitions (a STRING)
+            |PARTITIONED BY (b INT)
+            |STORED AS PARQUET
+          """.stripMargin)
+
+        // Create partition without data files and check whether it can be read
+        sql(s"ALTER TABLE test_added_partitions ADD PARTITION (b='1')")
+        // This table fetch is to fill the cache with zero leaf files
+        checkAnswer(spark.table("test_added_partitions"), Seq.empty)
+
+        sql(
+          s"""
+             |LOAD DATA LOCAL INPATH '$newPartitionDir' OVERWRITE
+             |INTO TABLE test_added_partitions PARTITION(b='1')
+           """.stripMargin)
+
+        checkAnswer(
+          spark.table("test_added_partitions"),
+          Seq(Row("0", 1), Row("1", 1)))
+      }
+    }
+  }
+
+  test("Non-partitioned table readable after load") {
+    withTable("tab") {
+      withTempDir { src =>
+        val newPartitionDir = src.getCanonicalPath
+        spark.range(2).selectExpr("cast(id as string)").toDF("a").write
+          .mode("overwrite")
+          .parquet(newPartitionDir)
+
+        sql("CREATE TABLE tab (a STRING) STORED AS PARQUET")
+
+        // This table fetch is to fill the cache with zero leaf files
+        checkAnswer(spark.table("tab"), Seq.empty)
+
+        sql(
+          s"""
+             |LOAD DATA LOCAL INPATH '$newPartitionDir' OVERWRITE
+             |INTO TABLE tab
+           """.stripMargin)
+
+        checkAnswer(spark.table("tab"), Seq(Row("0"), Row("1")))
+      }
+    }
+  }
+
   test("self-join") {
     val table = spark.table("normal_parquet")
     val selfJoin = table.as("t1").crossJoin(table.as("t2"))

From bf2f233e49013da54a6accd96c471acafc24df15 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Mon, 16 Jan 2017 10:58:10 +0800
Subject: [PATCH 0451/1204] [SPARK-19092][SQL][BACKPORT-2.1] Save() API of
 DataFrameWriter should not scan all the saved files #16481

### What changes were proposed in this pull request?

#### This PR is to backport https://github.com/apache/spark/pull/16481 to Spark 2.1
---
`DataFrameWriter`'s [save() API](https://github.com/gatorsmile/spark/blob/5d38f09f47a767a342a0a8219c63efa2943b5d1f/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala#L207) is performing a unnecessary full filesystem scan for the saved files. The save() API is the most basic/core API in `DataFrameWriter`. We should avoid it.

### How was this patch tested?
Added and modified the test cases

Author: gatorsmile <gatorsmile@gmail.com>

Closes #16588 from gatorsmile/backport-19092.
---
 .../command/createDataSourceTables.scala      |   2 +-
 .../execution/datasources/DataSource.scala    | 163 ++++++++++--------
 .../hive/PartitionedTablePerfStatsSuite.scala |  29 +---
 3 files changed, 102 insertions(+), 92 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
index 193a2a2cdc170..630adb0d994ec 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
@@ -224,7 +224,7 @@ case class CreateDataSourceTableAsSelectCommand(
       catalogTable = Some(table))
 
     val result = try {
-      dataSource.write(mode, df)
+      dataSource.writeAndRead(mode, df)
     } catch {
       case ex: AnalysisException =>
         logError(s"Failed to write to table $tableName in $mode mode", ex)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 31a491fb3ddf0..af70bf7e5c645 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -413,10 +413,82 @@ case class DataSource(
     relation
   }
 
-  /** Writes the given [[DataFrame]] out to this [[DataSource]]. */
-  def write(
-      mode: SaveMode,
-      data: DataFrame): BaseRelation = {
+  /**
+   * Writes the given [[DataFrame]] out in this [[FileFormat]].
+   */
+  private def writeInFileFormat(format: FileFormat, mode: SaveMode, data: DataFrame): Unit = {
+    // Don't glob path for the write path.  The contracts here are:
+    //  1. Only one output path can be specified on the write path;
+    //  2. Output path must be a legal HDFS style file system path;
+    //  3. It's OK that the output path doesn't exist yet;
+    val allPaths = paths ++ caseInsensitiveOptions.get("path")
+    val outputPath = if (allPaths.length == 1) {
+      val path = new Path(allPaths.head)
+      val fs = path.getFileSystem(sparkSession.sessionState.newHadoopConf())
+      path.makeQualified(fs.getUri, fs.getWorkingDirectory)
+    } else {
+      throw new IllegalArgumentException("Expected exactly one path to be specified, but " +
+        s"got: ${allPaths.mkString(", ")}")
+    }
+
+    val caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
+    PartitioningUtils.validatePartitionColumn(
+      data.schema, partitionColumns, caseSensitive)
+
+    // If we are appending to a table that already exists, make sure the partitioning matches
+    // up.  If we fail to load the table for whatever reason, ignore the check.
+    if (mode == SaveMode.Append) {
+      val existingPartitionColumns = Try {
+        getOrInferFileFormatSchema(format, justPartitioning = true)._2.fieldNames.toList
+      }.getOrElse(Seq.empty[String])
+      // TODO: Case sensitivity.
+      val sameColumns =
+        existingPartitionColumns.map(_.toLowerCase()) == partitionColumns.map(_.toLowerCase())
+      if (existingPartitionColumns.nonEmpty && !sameColumns) {
+        throw new AnalysisException(
+          s"""Requested partitioning does not match existing partitioning.
+             |Existing partitioning columns:
+             |  ${existingPartitionColumns.mkString(", ")}
+             |Requested partitioning columns:
+             |  ${partitionColumns.mkString(", ")}
+             |""".stripMargin)
+      }
+    }
+
+    // SPARK-17230: Resolve the partition columns so InsertIntoHadoopFsRelationCommand does
+    // not need to have the query as child, to avoid to analyze an optimized query,
+    // because InsertIntoHadoopFsRelationCommand will be optimized first.
+    val columns = partitionColumns.map { name =>
+      val plan = data.logicalPlan
+      plan.resolve(name :: Nil, data.sparkSession.sessionState.analyzer.resolver).getOrElse {
+        throw new AnalysisException(
+          s"Unable to resolve $name given [${plan.output.map(_.name).mkString(", ")}]")
+      }.asInstanceOf[Attribute]
+    }
+    // For partitioned relation r, r.schema's column ordering can be different from the column
+    // ordering of data.logicalPlan (partition columns are all moved after data column).  This
+    // will be adjusted within InsertIntoHadoopFsRelation.
+    val plan =
+      InsertIntoHadoopFsRelationCommand(
+        outputPath = outputPath,
+        staticPartitionKeys = Map.empty,
+        customPartitionLocations = Map.empty,
+        partitionColumns = columns,
+        bucketSpec = bucketSpec,
+        fileFormat = format,
+        refreshFunction = _ => Unit, // No existing table needs to be refreshed.
+        options = options,
+        query = data.logicalPlan,
+        mode = mode,
+        catalogTable = catalogTable)
+    sparkSession.sessionState.executePlan(plan).toRdd
+  }
+
+  /**
+   * Writes the given [[DataFrame]] out to this [[DataSource]] and returns a [[BaseRelation]] for
+   * the following reading.
+   */
+  def writeAndRead(mode: SaveMode, data: DataFrame): BaseRelation = {
     if (data.schema.map(_.dataType).exists(_.isInstanceOf[CalendarIntervalType])) {
       throw new AnalysisException("Cannot save interval data type into external storage.")
     }
@@ -425,74 +497,27 @@ case class DataSource(
       case dataSource: CreatableRelationProvider =>
         dataSource.createRelation(sparkSession.sqlContext, mode, caseInsensitiveOptions, data)
       case format: FileFormat =>
-        // Don't glob path for the write path.  The contracts here are:
-        //  1. Only one output path can be specified on the write path;
-        //  2. Output path must be a legal HDFS style file system path;
-        //  3. It's OK that the output path doesn't exist yet;
-        val allPaths = paths ++ caseInsensitiveOptions.get("path")
-        val outputPath = if (allPaths.length == 1) {
-          val path = new Path(allPaths.head)
-          val fs = path.getFileSystem(sparkSession.sessionState.newHadoopConf())
-          path.makeQualified(fs.getUri, fs.getWorkingDirectory)
-        } else {
-          throw new IllegalArgumentException("Expected exactly one path to be specified, but " +
-            s"got: ${allPaths.mkString(", ")}")
-        }
-
-        val caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
-        PartitioningUtils.validatePartitionColumn(
-          data.schema, partitionColumns, caseSensitive)
-
-        // If we are appending to a table that already exists, make sure the partitioning matches
-        // up.  If we fail to load the table for whatever reason, ignore the check.
-        if (mode == SaveMode.Append) {
-          val existingPartitionColumns = Try {
-            getOrInferFileFormatSchema(format, justPartitioning = true)._2.fieldNames.toList
-          }.getOrElse(Seq.empty[String])
-          // TODO: Case sensitivity.
-          val sameColumns =
-            existingPartitionColumns.map(_.toLowerCase()) == partitionColumns.map(_.toLowerCase())
-          if (existingPartitionColumns.nonEmpty && !sameColumns) {
-            throw new AnalysisException(
-              s"""Requested partitioning does not match existing partitioning.
-                 |Existing partitioning columns:
-                 |  ${existingPartitionColumns.mkString(", ")}
-                 |Requested partitioning columns:
-                 |  ${partitionColumns.mkString(", ")}
-                 |""".stripMargin)
-          }
-        }
-
-        // SPARK-17230: Resolve the partition columns so InsertIntoHadoopFsRelationCommand does
-        // not need to have the query as child, to avoid to analyze an optimized query,
-        // because InsertIntoHadoopFsRelationCommand will be optimized first.
-        val columns = partitionColumns.map { name =>
-          val plan = data.logicalPlan
-          plan.resolve(name :: Nil, data.sparkSession.sessionState.analyzer.resolver).getOrElse {
-            throw new AnalysisException(
-              s"Unable to resolve $name given [${plan.output.map(_.name).mkString(", ")}]")
-          }.asInstanceOf[Attribute]
-        }
-        // For partitioned relation r, r.schema's column ordering can be different from the column
-        // ordering of data.logicalPlan (partition columns are all moved after data column).  This
-        // will be adjusted within InsertIntoHadoopFsRelation.
-        val plan =
-          InsertIntoHadoopFsRelationCommand(
-            outputPath = outputPath,
-            staticPartitionKeys = Map.empty,
-            customPartitionLocations = Map.empty,
-            partitionColumns = columns,
-            bucketSpec = bucketSpec,
-            fileFormat = format,
-            refreshFunction = _ => Unit, // No existing table needs to be refreshed.
-            options = options,
-            query = data.logicalPlan,
-            mode = mode,
-            catalogTable = catalogTable)
-        sparkSession.sessionState.executePlan(plan).toRdd
+        writeInFileFormat(format, mode, data)
         // Replace the schema with that of the DataFrame we just wrote out to avoid re-inferring it.
         copy(userSpecifiedSchema = Some(data.schema.asNullable)).resolveRelation()
+      case _ =>
+        sys.error(s"${providingClass.getCanonicalName} does not allow create table as select.")
+    }
+  }
+
+  /**
+   * Writes the given [[DataFrame]] out to this [[DataSource]].
+   */
+  def write(mode: SaveMode, data: DataFrame): Unit = {
+    if (data.schema.map(_.dataType).exists(_.isInstanceOf[CalendarIntervalType])) {
+      throw new AnalysisException("Cannot save interval data type into external storage.")
+    }
 
+    providingClass.newInstance() match {
+      case dataSource: CreatableRelationProvider =>
+        dataSource.createRelation(sparkSession.sqlContext, mode, caseInsensitiveOptions, data)
+      case format: FileFormat =>
+        writeInFileFormat(format, mode, data)
       case _ =>
         sys.error(s"${providingClass.getCanonicalName} does not allow create table as select.")
     }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
index 55b72c625db41..5bca90b7c923d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
@@ -62,17 +62,12 @@ class PartitionedTablePerfStatsSuite
   }
 
   private def setupPartitionedHiveTable(
-      tableName: String, dir: File, scale: Int,
-      clearMetricsBeforeCreate: Boolean = false, repair: Boolean = true): Unit = {
+      tableName: String, dir: File, scale: Int, repair: Boolean = true): Unit = {
     spark.range(scale).selectExpr("id as fieldOne", "id as partCol1", "id as partCol2").write
       .partitionBy("partCol1", "partCol2")
       .mode("overwrite")
       .parquet(dir.getAbsolutePath)
 
-    if (clearMetricsBeforeCreate) {
-      HiveCatalogMetrics.reset()
-    }
-
     spark.sql(s"""
       |create external table $tableName (fieldOne long)
       |partitioned by (partCol1 int, partCol2 int)
@@ -88,17 +83,12 @@ class PartitionedTablePerfStatsSuite
   }
 
   private def setupPartitionedDatasourceTable(
-      tableName: String, dir: File, scale: Int,
-      clearMetricsBeforeCreate: Boolean = false, repair: Boolean = true): Unit = {
+      tableName: String, dir: File, scale: Int, repair: Boolean = true): Unit = {
     spark.range(scale).selectExpr("id as fieldOne", "id as partCol1", "id as partCol2").write
       .partitionBy("partCol1", "partCol2")
       .mode("overwrite")
       .parquet(dir.getAbsolutePath)
 
-    if (clearMetricsBeforeCreate) {
-      HiveCatalogMetrics.reset()
-    }
-
     spark.sql(s"""
       |create table $tableName (fieldOne long, partCol1 int, partCol2 int)
       |using parquet
@@ -271,8 +261,8 @@ class PartitionedTablePerfStatsSuite
     withSQLConf(SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key -> "true") {
       withTable("test") {
         withTempDir { dir =>
-          setupPartitionedDatasourceTable(
-            "test", dir, scale = 10, clearMetricsBeforeCreate = true, repair = false)
+          HiveCatalogMetrics.reset()
+          setupPartitionedDatasourceTable("test", dir, scale = 10, repair = false)
           assert(HiveCatalogMetrics.METRIC_FILES_DISCOVERED.getCount() == 0)
           assert(HiveCatalogMetrics.METRIC_FILE_CACHE_HITS.getCount() == 0)
         }
@@ -285,8 +275,7 @@ class PartitionedTablePerfStatsSuite
       withTable("test") {
         withTempDir { dir =>
           HiveCatalogMetrics.reset()
-          setupPartitionedHiveTable(
-            "test", dir, scale = 10, clearMetricsBeforeCreate = true, repair = false)
+          setupPartitionedHiveTable("test", dir, scale = 10, repair = false)
           assert(HiveCatalogMetrics.METRIC_FILES_DISCOVERED.getCount() == 0)
           assert(HiveCatalogMetrics.METRIC_FILE_CACHE_HITS.getCount() == 0)
         }
@@ -416,12 +405,8 @@ class PartitionedTablePerfStatsSuite
           })
           executorPool.shutdown()
           executorPool.awaitTermination(30, TimeUnit.SECONDS)
-          // check the cache hit, we use the metric of METRIC_FILES_DISCOVERED and
-          // METRIC_PARALLEL_LISTING_JOB_COUNT to check this, while the lock take effect,
-          // only one thread can really do the build, so the listing job count is 2, the other
-          // one is cache.load func. Also METRIC_FILES_DISCOVERED is $partition_num * 2
-          assert(HiveCatalogMetrics.METRIC_FILES_DISCOVERED.getCount() == 100)
-          assert(HiveCatalogMetrics.METRIC_PARALLEL_LISTING_JOB_COUNT.getCount() == 2)
+          assert(HiveCatalogMetrics.METRIC_FILES_DISCOVERED.getCount() == 50)
+          assert(HiveCatalogMetrics.METRIC_PARALLEL_LISTING_JOB_COUNT.getCount() == 1)
         }
       }
     }

From 4f3ce062ce2e9b403f9d38a44eb7fc76a800ed67 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 16 Jan 2017 15:26:41 +0800
Subject: [PATCH 0452/1204] [SPARK-19082][SQL] Make ignoreCorruptFiles work for
 Parquet

## What changes were proposed in this pull request?

We have a config `spark.sql.files.ignoreCorruptFiles` which can be used to ignore corrupt files when reading files in SQL. Currently the `ignoreCorruptFiles` config has two issues and can't work for Parquet:

1. We only ignore corrupt files in `FileScanRDD` . Actually, we begin to read those files as early as inferring data schema from the files. For corrupt files, we can't read the schema and fail the program. A related issue reported at http://apache-spark-developers-list.1001551.n3.nabble.com/Skip-Corrupted-Parquet-blocks-footer-tc20418.html
2. In `FileScanRDD`, we assume that we only begin to read the files when starting to consume the iterator. However, it is possibly the files are read before that. In this case, `ignoreCorruptFiles` config doesn't work too.

This patch targets Parquet datasource. If this direction is ok, we can address the same issue for other datasources like Orc.

Two main changes in this patch:

1. Replace `ParquetFileReader.readAllFootersInParallel` by implementing the logic to read footers in multi-threaded manner

    We can't ignore corrupt files if we use `ParquetFileReader.readAllFootersInParallel`. So this patch implements the logic to do the similar thing in `readParquetFootersInParallel`.

2. In `FileScanRDD`, we need to ignore corrupt file too when we call `readFunction` to return iterator.

One thing to notice is:

We read schema from Parquet file's footer. The method to read footer `ParquetFileReader.readFooter` throws `RuntimeException`, instead of `IOException`, if it can't successfully read the footer. Please check out https://github.com/apache/parquet-mr/blob/df9d8e415436292ae33e1ca0b8da256640de9710/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java#L470. So this patch catches `RuntimeException`.  One concern is that it might also shadow other runtime exceptions other than reading corrupt files.

## How was this patch tested?

Jenkins tests.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #16474 from viirya/fix-ignorecorrupted-parquet-files.

(cherry picked from commit 61e48f52d1d8c7431707bd3511b6fe9f0ae996c0)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scala/org/apache/spark/rdd/UnionRDD.scala |  2 +-
 .../execution/datasources/FileScanRDD.scala   | 12 +++-
 .../parquet/ParquetFileFormat.scala           | 45 ++++++++++++--
 .../parquet/ParquetFileFormatSuite.scala      | 59 +++++++++++++++++++
 .../parquet/ParquetQuerySuite.scala           | 30 ++++++++++
 5 files changed, 140 insertions(+), 8 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala

diff --git a/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
index ad1fddbde7b00..60e383afadf1c 100644
--- a/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
@@ -20,7 +20,7 @@ package org.apache.spark.rdd
 import java.io.{IOException, ObjectOutputStream}
 
 import scala.collection.mutable.ArrayBuffer
-import scala.collection.parallel.{ForkJoinTaskSupport, ThreadPoolTaskSupport}
+import scala.collection.parallel.ForkJoinTaskSupport
 import scala.concurrent.forkjoin.ForkJoinPool
 import scala.reflect.ClassTag
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
index b926b9207416f..843459221e689 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
@@ -134,7 +134,17 @@ class FileScanRDD(
           try {
             if (ignoreCorruptFiles) {
               currentIterator = new NextIterator[Object] {
-                private val internalIter = readFunction(currentFile)
+                private val internalIter = {
+                  try {
+                    // The readFunction may read files before consuming the iterator.
+                    // E.g., vectorized Parquet reader.
+                    readFunction(currentFile)
+                  } catch {
+                    case e @(_: RuntimeException | _: IOException) =>
+                      logWarning(s"Skipped the rest content in the corrupted file: $currentFile", e)
+                      Iterator.empty
+                  }
+                }
 
                 override def getNext(): AnyRef = {
                   try {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index 0965ffebea962..0e1fc7ae96135 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -17,10 +17,13 @@
 
 package org.apache.spark.sql.execution.datasources.parquet
 
+import java.io.IOException
 import java.net.URI
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
+import scala.collection.parallel.ForkJoinTaskSupport
+import scala.concurrent.forkjoin.ForkJoinPool
 import scala.util.{Failure, Try}
 
 import org.apache.hadoop.conf.Configuration
@@ -30,6 +33,7 @@ import org.apache.hadoop.mapreduce.lib.input.FileSplit
 import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
 import org.apache.parquet.filter2.compat.FilterCompat
 import org.apache.parquet.filter2.predicate.FilterApi
+import org.apache.parquet.format.converter.ParquetMetadataConverter.SKIP_ROW_GROUPS
 import org.apache.parquet.hadoop._
 import org.apache.parquet.hadoop.codec.CodecConfig
 import org.apache.parquet.hadoop.util.ContextUtil
@@ -151,7 +155,7 @@ class ParquetFileFormat
     }
   }
 
-  def inferSchema(
+  override def inferSchema(
       sparkSession: SparkSession,
       parameters: Map[String, String],
       files: Seq[FileStatus]): Option[StructType] = {
@@ -547,6 +551,36 @@ object ParquetFileFormat extends Logging {
     StructType(parquetSchema ++ missingFields)
   }
 
+  /**
+   * Reads Parquet footers in multi-threaded manner.
+   * If the config "spark.sql.files.ignoreCorruptFiles" is set to true, we will ignore the corrupted
+   * files when reading footers.
+   */
+  private[parquet] def readParquetFootersInParallel(
+      conf: Configuration,
+      partFiles: Seq[FileStatus],
+      ignoreCorruptFiles: Boolean): Seq[Footer] = {
+    val parFiles = partFiles.par
+    parFiles.tasksupport = new ForkJoinTaskSupport(new ForkJoinPool(8))
+    parFiles.flatMap { currentFile =>
+      try {
+        // Skips row group information since we only need the schema.
+        // ParquetFileReader.readFooter throws RuntimeException, instead of IOException,
+        // when it can't read the footer.
+        Some(new Footer(currentFile.getPath(),
+          ParquetFileReader.readFooter(
+            conf, currentFile, SKIP_ROW_GROUPS)))
+      } catch { case e: RuntimeException =>
+        if (ignoreCorruptFiles) {
+          logWarning(s"Skipped the footer in the corrupted file: $currentFile", e)
+          None
+        } else {
+          throw new IOException(s"Could not read footer for file: $currentFile", e)
+        }
+      }
+    }.seq
+  }
+
   /**
    * Figures out a merged Parquet schema with a distributed Spark job.
    *
@@ -587,6 +621,8 @@ object ParquetFileFormat extends Logging {
     val numParallelism = Math.min(Math.max(partialFileStatusInfo.size, 1),
       sparkSession.sparkContext.defaultParallelism)
 
+    val ignoreCorruptFiles = sparkSession.sessionState.conf.ignoreCorruptFiles
+
     // Issues a Spark job to read Parquet schema in parallel.
     val partiallyMergedSchemas =
       sparkSession
@@ -598,13 +634,10 @@ object ParquetFileFormat extends Logging {
             new FileStatus(length, false, 0, 0, 0, 0, null, null, null, new Path(path))
           }.toSeq
 
-          // Skips row group information since we only need the schema
-          val skipRowGroups = true
-
           // Reads footers in multi-threaded manner within each task
           val footers =
-            ParquetFileReader.readAllFootersInParallel(
-              serializedConf.value, fakeFileStatuses.asJava, skipRowGroups).asScala
+            ParquetFileFormat.readParquetFootersInParallel(
+              serializedConf.value, fakeFileStatuses, ignoreCorruptFiles)
 
           // Converter used to convert Parquet `MessageType` to Spark SQL `StructType`
           val converter =
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala
new file mode 100644
index 0000000000000..ccb34355f1bac
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.parquet
+
+import org.apache.hadoop.fs.{FileSystem, Path}
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSQLContext
+
+class ParquetFileFormatSuite extends QueryTest with ParquetTest with SharedSQLContext {
+
+  test("read parquet footers in parallel") {
+    def testReadFooters(ignoreCorruptFiles: Boolean): Unit = {
+      withTempDir { dir =>
+        val fs = FileSystem.get(sparkContext.hadoopConfiguration)
+        val basePath = dir.getCanonicalPath
+
+        val path1 = new Path(basePath, "first")
+        val path2 = new Path(basePath, "second")
+        val path3 = new Path(basePath, "third")
+
+        spark.range(1).toDF("a").coalesce(1).write.parquet(path1.toString)
+        spark.range(1, 2).toDF("a").coalesce(1).write.parquet(path2.toString)
+        spark.range(2, 3).toDF("a").coalesce(1).write.json(path3.toString)
+
+        val fileStatuses =
+          Seq(fs.listStatus(path1), fs.listStatus(path2), fs.listStatus(path3)).flatten
+
+        val footers = ParquetFileFormat.readParquetFootersInParallel(
+          sparkContext.hadoopConfiguration, fileStatuses, ignoreCorruptFiles)
+
+        assert(footers.size == 2)
+      }
+    }
+
+    testReadFooters(true)
+    val exception = intercept[java.io.IOException] {
+      testReadFooters(false)
+    }
+    assert(exception.getMessage().contains("Could not read footer for file"))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
index 4c4a7d86f2bd3..613237672492b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@@ -22,6 +22,7 @@ import java.io.File
 import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.parquet.hadoop.ParquetOutputFormat
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
 import org.apache.spark.sql.catalyst.expressions.SpecificInternalRow
@@ -212,6 +213,35 @@ class ParquetQuerySuite extends QueryTest with ParquetTest with SharedSQLContext
     }
   }
 
+  test("Enabling/disabling ignoreCorruptFiles") {
+    def testIgnoreCorruptFiles(): Unit = {
+      withTempDir { dir =>
+        val basePath = dir.getCanonicalPath
+        spark.range(1).toDF("a").write.parquet(new Path(basePath, "first").toString)
+        spark.range(1, 2).toDF("a").write.parquet(new Path(basePath, "second").toString)
+        spark.range(2, 3).toDF("a").write.json(new Path(basePath, "third").toString)
+        val df = spark.read.parquet(
+          new Path(basePath, "first").toString,
+          new Path(basePath, "second").toString,
+          new Path(basePath, "third").toString)
+        checkAnswer(
+          df,
+          Seq(Row(0), Row(1)))
+      }
+    }
+
+    withSQLConf(SQLConf.IGNORE_CORRUPT_FILES.key -> "true") {
+      testIgnoreCorruptFiles()
+    }
+
+    withSQLConf(SQLConf.IGNORE_CORRUPT_FILES.key -> "false") {
+      val exception = intercept[SparkException] {
+        testIgnoreCorruptFiles()
+      }
+      assert(exception.getMessage().contains("is not a Parquet file"))
+    }
+  }
+
   test("SPARK-8990 DataFrameReader.parquet() should respect user specified options") {
     withTempPath { dir =>
       val basePath = dir.getCanonicalPath

From 97589050714901139b6fda358916ef64c3bbd78c Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Mon, 16 Jan 2017 09:35:52 -0800
Subject: [PATCH 0453/1204] [SPARK-19232][SPARKR] Update Spark distribution
 download cache location on Windows

## What changes were proposed in this pull request?

Windows seems to be the only place with appauthor in the path, for which we should say "Apache" (and case sensitive)
Current path of `AppData\Local\spark\spark\Cache` is a bit odd.

## How was this patch tested?

manual.

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #16590 from felixcheung/rcachedir.

(cherry picked from commit a115a54399cd4bedb1a5086943a88af6339fbe85)
Signed-off-by: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
---
 R/pkg/R/install.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R
index 097b7ad4bea08..cb6bbe5946b14 100644
--- a/R/pkg/R/install.R
+++ b/R/pkg/R/install.R
@@ -50,7 +50,7 @@
 #'                 \itemize{
 #'                   \item Mac OS X: \file{~/Library/Caches/spark}
 #'                   \item Unix: \env{$XDG_CACHE_HOME} if defined, otherwise \file{~/.cache/spark}
-#'                   \item Windows: \file{\%LOCALAPPDATA\%\\spark\\spark\\Cache}.
+#'                   \item Windows: \file{\%LOCALAPPDATA\%\\Apache\\Spark\\Cache}.
 #'                 }
 #' @param overwrite If \code{TRUE}, download and overwrite the existing tar file in localDir
 #'                  and force re-install Spark (in case the local directory or file is corrupted)
@@ -239,7 +239,7 @@ sparkCachePath <- function() {
                    "or restart and enter an installation path in localDir.")
       stop(msg)
     } else {
-      path <- file.path(winAppPath, "spark", "spark", "Cache")
+      path <- file.path(winAppPath, "Apache", "Spark", "Cache")
     }
   } else if (.Platform$OS.type == "unix") {
     if (Sys.info()["sysname"] == "Darwin") {

From f4317be66d0e169693e3407abf3d0bfa4d7e37af Mon Sep 17 00:00:00 2001
From: CodingCat <zhunansjtu@gmail.com>
Date: Mon, 16 Jan 2017 18:33:20 -0800
Subject: [PATCH 0454/1204] [SPARK-18905][STREAMING] Fix the issue of removing
 a failed jobset from JobScheduler.jobSets

## What changes were proposed in this pull request?

the current implementation of Spark streaming considers a batch is completed no matter the results of the jobs (https://github.com/apache/spark/blob/1169db44bc1d51e68feb6ba2552520b2d660c2c0/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala#L203)
Let's consider the following case:
A micro batch contains 2 jobs and they read from two different kafka topics respectively. One of these jobs is failed due to some problem in the user defined logic, after the other one is finished successfully.
1. The main thread in the Spark streaming application will execute the line mentioned above,
2. and another thread (checkpoint writer) will make a checkpoint file immediately after this line is executed.
3. Then due to the current error handling mechanism in Spark Streaming, StreamingContext will be closed (https://github.com/apache/spark/blob/1169db44bc1d51e68feb6ba2552520b2d660c2c0/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala#L214)
the user recovers from the checkpoint file, and because the JobSet containing the failed job has been removed (taken as completed) before the checkpoint is constructed, the data being processed by the failed job would never be reprocessed

This PR fix it by removing jobset from JobScheduler.jobSets only when all jobs in a jobset are successfully finished

## How was this patch tested?

existing tests

Author: CodingCat <zhunansjtu@gmail.com>
Author: Nan Zhu <zhunansjtu@gmail.com>

Closes #16542 from CodingCat/SPARK-18905.

(cherry picked from commit f8db8945f25cb884278ff8841bac5f6f28f0dec6)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../spark/streaming/scheduler/JobScheduler.scala   | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
index 98e099354a7db..74ec19fddfc96 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
@@ -200,18 +200,20 @@ class JobScheduler(val ssc: StreamingContext) extends Logging {
     listenerBus.post(StreamingListenerOutputOperationCompleted(job.toOutputOperationInfo))
     logInfo("Finished job " + job.id + " from job set of time " + jobSet.time)
     if (jobSet.hasCompleted) {
-      jobSets.remove(jobSet.time)
-      jobGenerator.onBatchCompletion(jobSet.time)
-      logInfo("Total delay: %.3f s for time %s (execution: %.3f s)".format(
-        jobSet.totalDelay / 1000.0, jobSet.time.toString,
-        jobSet.processingDelay / 1000.0
-      ))
       listenerBus.post(StreamingListenerBatchCompleted(jobSet.toBatchInfo))
     }
     job.result match {
       case Failure(e) =>
         reportError("Error running job " + job, e)
       case _ =>
+        if (jobSet.hasCompleted) {
+          jobSets.remove(jobSet.time)
+          jobGenerator.onBatchCompletion(jobSet.time)
+          logInfo("Total delay: %.3f s for time %s (execution: %.3f s)".format(
+            jobSet.totalDelay / 1000.0, jobSet.time.toString,
+            jobSet.processingDelay / 1000.0
+          ))
+        }
     }
   }
 

From 2ff366912a5a72f090dd8a4e54bd7533ede7be27 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Tue, 17 Jan 2017 09:53:20 -0800
Subject: [PATCH 0455/1204] [SPARK-19019] [PYTHON] Fix hijacked
 `collections.namedtuple` and port cloudpickle changes for PySpark to work
 with Python 3.6.0

## What changes were proposed in this pull request?

Currently, PySpark does not work with Python 3.6.0.

Running `./bin/pyspark` simply throws the error as below and PySpark does not work at all:

```
Traceback (most recent call last):
  File ".../spark/python/pyspark/shell.py", line 30, in <module>
    import pyspark
  File ".../spark/python/pyspark/__init__.py", line 46, in <module>
    from pyspark.context import SparkContext
  File ".../spark/python/pyspark/context.py", line 36, in <module>
    from pyspark.java_gateway import launch_gateway
  File ".../spark/python/pyspark/java_gateway.py", line 31, in <module>
    from py4j.java_gateway import java_import, JavaGateway, GatewayClient
  File "<frozen importlib._bootstrap>", line 961, in _find_and_load
  File "<frozen importlib._bootstrap>", line 950, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 646, in _load_unlocked
  File "<frozen importlib._bootstrap>", line 616, in _load_backward_compatible
  File ".../spark/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py", line 18, in <module>
  File "/usr/local/Cellar/python3/3.6.0/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pydoc.py", line 62, in <module>
    import pkgutil
  File "/usr/local/Cellar/python3/3.6.0/Frameworks/Python.framework/Versions/3.6/lib/python3.6/pkgutil.py", line 22, in <module>
    ModuleInfo = namedtuple('ModuleInfo', 'module_finder name ispkg')
  File ".../spark/python/pyspark/serializers.py", line 394, in namedtuple
    cls = _old_namedtuple(*args, **kwargs)
TypeError: namedtuple() missing 3 required keyword-only arguments: 'verbose', 'rename', and 'module'
```

The root cause seems because some arguments of `namedtuple` are now completely keyword-only arguments from Python 3.6.0 (See https://bugs.python.org/issue25628).

We currently copy this function via `types.FunctionType` which does not set the default values of keyword-only arguments (meaning `namedtuple.__kwdefaults__`) and this seems causing internally missing values in the function (non-bound arguments).

This PR proposes to work around this by manually setting it via `kwargs` as `types.FunctionType` seems not supporting to set this.

Also, this PR ports the changes in cloudpickle for compatibility for Python 3.6.0.

## How was this patch tested?

Manually tested with Python 2.7.6 and Python 3.6.0.

```
./bin/pyspsark
```

, manual creation of `namedtuple` both in local and rdd with Python 3.6.0,

and Jenkins tests for other Python versions.

Also,

```
./run-tests --python-executables=python3.6
```

```
Will test against the following Python executables: ['python3.6']
Will test the following Python modules: ['pyspark-core', 'pyspark-ml', 'pyspark-mllib', 'pyspark-sql', 'pyspark-streaming']
Finished test(python3.6): pyspark.sql.tests (192s)
Finished test(python3.6): pyspark.accumulators (3s)
Finished test(python3.6): pyspark.mllib.tests (198s)
Finished test(python3.6): pyspark.broadcast (3s)
Finished test(python3.6): pyspark.conf (2s)
Finished test(python3.6): pyspark.context (14s)
Finished test(python3.6): pyspark.ml.classification (21s)
Finished test(python3.6): pyspark.ml.evaluation (11s)
Finished test(python3.6): pyspark.ml.clustering (20s)
Finished test(python3.6): pyspark.ml.linalg.__init__ (0s)
Finished test(python3.6): pyspark.streaming.tests (240s)
Finished test(python3.6): pyspark.tests (240s)
Finished test(python3.6): pyspark.ml.recommendation (19s)
Finished test(python3.6): pyspark.ml.feature (36s)
Finished test(python3.6): pyspark.ml.regression (37s)
Finished test(python3.6): pyspark.ml.tuning (28s)
Finished test(python3.6): pyspark.mllib.classification (26s)
Finished test(python3.6): pyspark.mllib.evaluation (18s)
Finished test(python3.6): pyspark.mllib.clustering (44s)
Finished test(python3.6): pyspark.mllib.linalg.__init__ (0s)
Finished test(python3.6): pyspark.mllib.feature (26s)
Finished test(python3.6): pyspark.mllib.fpm (23s)
Finished test(python3.6): pyspark.mllib.random (8s)
Finished test(python3.6): pyspark.ml.tests (92s)
Finished test(python3.6): pyspark.mllib.stat.KernelDensity (0s)
Finished test(python3.6): pyspark.mllib.linalg.distributed (25s)
Finished test(python3.6): pyspark.mllib.stat._statistics (15s)
Finished test(python3.6): pyspark.mllib.recommendation (24s)
Finished test(python3.6): pyspark.mllib.regression (26s)
Finished test(python3.6): pyspark.profiler (9s)
Finished test(python3.6): pyspark.mllib.tree (16s)
Finished test(python3.6): pyspark.shuffle (1s)
Finished test(python3.6): pyspark.mllib.util (18s)
Finished test(python3.6): pyspark.serializers (11s)
Finished test(python3.6): pyspark.rdd (20s)
Finished test(python3.6): pyspark.sql.conf (8s)
Finished test(python3.6): pyspark.sql.catalog (17s)
Finished test(python3.6): pyspark.sql.column (18s)
Finished test(python3.6): pyspark.sql.context (18s)
Finished test(python3.6): pyspark.sql.group (27s)
Finished test(python3.6): pyspark.sql.dataframe (33s)
Finished test(python3.6): pyspark.sql.functions (35s)
Finished test(python3.6): pyspark.sql.types (6s)
Finished test(python3.6): pyspark.sql.streaming (13s)
Finished test(python3.6): pyspark.streaming.util (0s)
Finished test(python3.6): pyspark.sql.session (16s)
Finished test(python3.6): pyspark.sql.window (4s)
Finished test(python3.6): pyspark.sql.readwriter (35s)
Tests passed in 433 seconds
```

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #16429 from HyukjinKwon/SPARK-19019.

(cherry picked from commit 20e6280626fe243b170a2e7c5e018c67f3dac1db)
Signed-off-by: Davies Liu <davies.liu@gmail.com>
---
 python/pyspark/cloudpickle.py | 98 ++++++++++++++++++++++++-----------
 python/pyspark/serializers.py | 20 +++++++
 2 files changed, 87 insertions(+), 31 deletions(-)

diff --git a/python/pyspark/cloudpickle.py b/python/pyspark/cloudpickle.py
index da2b2f3757967..959fb8b357f99 100644
--- a/python/pyspark/cloudpickle.py
+++ b/python/pyspark/cloudpickle.py
@@ -43,6 +43,7 @@
 from __future__ import print_function
 
 import operator
+import opcode
 import os
 import io
 import pickle
@@ -53,6 +54,8 @@
 import itertools
 import dis
 import traceback
+import weakref
+
 
 if sys.version < '3':
     from pickle import Pickler
@@ -68,10 +71,10 @@
     PY3 = True
 
 #relevant opcodes
-STORE_GLOBAL = dis.opname.index('STORE_GLOBAL')
-DELETE_GLOBAL = dis.opname.index('DELETE_GLOBAL')
-LOAD_GLOBAL = dis.opname.index('LOAD_GLOBAL')
-GLOBAL_OPS = [STORE_GLOBAL, DELETE_GLOBAL, LOAD_GLOBAL]
+STORE_GLOBAL = opcode.opmap['STORE_GLOBAL']
+DELETE_GLOBAL = opcode.opmap['DELETE_GLOBAL']
+LOAD_GLOBAL = opcode.opmap['LOAD_GLOBAL']
+GLOBAL_OPS = (STORE_GLOBAL, DELETE_GLOBAL, LOAD_GLOBAL)
 HAVE_ARGUMENT = dis.HAVE_ARGUMENT
 EXTENDED_ARG = dis.EXTENDED_ARG
 
@@ -90,6 +93,43 @@ def _builtin_type(name):
     return getattr(types, name)
 
 
+if sys.version_info < (3, 4):
+    def _walk_global_ops(code):
+        """
+        Yield (opcode, argument number) tuples for all
+        global-referencing instructions in *code*.
+        """
+        code = getattr(code, 'co_code', b'')
+        if not PY3:
+            code = map(ord, code)
+
+        n = len(code)
+        i = 0
+        extended_arg = 0
+        while i < n:
+            op = code[i]
+            i += 1
+            if op >= HAVE_ARGUMENT:
+                oparg = code[i] + code[i + 1] * 256 + extended_arg
+                extended_arg = 0
+                i += 2
+                if op == EXTENDED_ARG:
+                    extended_arg = oparg * 65536
+                if op in GLOBAL_OPS:
+                    yield op, oparg
+
+else:
+    def _walk_global_ops(code):
+        """
+        Yield (opcode, argument number) tuples for all
+        global-referencing instructions in *code*.
+        """
+        for instr in dis.get_instructions(code):
+            op = instr.opcode
+            if op in GLOBAL_OPS:
+                yield op, instr.arg
+
+
 class CloudPickler(Pickler):
 
     dispatch = Pickler.dispatch.copy()
@@ -260,38 +300,34 @@ def save_function_tuple(self, func):
         write(pickle.TUPLE)
         write(pickle.REDUCE)  # applies _fill_function on the tuple
 
-    @staticmethod
-    def extract_code_globals(co):
+    _extract_code_globals_cache = (
+        weakref.WeakKeyDictionary()
+        if sys.version_info >= (2, 7) and not hasattr(sys, "pypy_version_info")
+        else {})
+
+    @classmethod
+    def extract_code_globals(cls, co):
         """
         Find all globals names read or written to by codeblock co
         """
-        code = co.co_code
-        if not PY3:
-            code = [ord(c) for c in code]
-        names = co.co_names
-        out_names = set()
-
-        n = len(code)
-        i = 0
-        extended_arg = 0
-        while i < n:
-            op = code[i]
+        out_names = cls._extract_code_globals_cache.get(co)
+        if out_names is None:
+            try:
+                names = co.co_names
+            except AttributeError:
+                # PyPy "builtin-code" object
+                out_names = set()
+            else:
+                out_names = set(names[oparg]
+                                for op, oparg in _walk_global_ops(co))
 
-            i += 1
-            if op >= HAVE_ARGUMENT:
-                oparg = code[i] + code[i+1] * 256 + extended_arg
-                extended_arg = 0
-                i += 2
-                if op == EXTENDED_ARG:
-                    extended_arg = oparg*65536
-                if op in GLOBAL_OPS:
-                    out_names.add(names[oparg])
+                # see if nested function have any global refs
+                if co.co_consts:
+                    for const in co.co_consts:
+                        if type(const) is types.CodeType:
+                            out_names |= cls.extract_code_globals(const)
 
-        # see if nested function have any global refs
-        if co.co_consts:
-            for const in co.co_consts:
-                if type(const) is types.CodeType:
-                    out_names |= CloudPickler.extract_code_globals(const)
+            cls._extract_code_globals_cache[co] = out_names
 
         return out_names
 
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index c4f2f08cb4445..ea5e00e9eeef5 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -382,18 +382,38 @@ def _hijack_namedtuple():
         return
 
     global _old_namedtuple  # or it will put in closure
+    global _old_namedtuple_kwdefaults  # or it will put in closure too
 
     def _copy_func(f):
         return types.FunctionType(f.__code__, f.__globals__, f.__name__,
                                   f.__defaults__, f.__closure__)
 
+    def _kwdefaults(f):
+        # __kwdefaults__ contains the default values of keyword-only arguments which are
+        # introduced from Python 3. The possible cases for __kwdefaults__ in namedtuple
+        # are as below:
+        #
+        # - Does not exist in Python 2.
+        # - Returns None in <= Python 3.5.x.
+        # - Returns a dictionary containing the default values to the keys from Python 3.6.x
+        #    (See https://bugs.python.org/issue25628).
+        kargs = getattr(f, "__kwdefaults__", None)
+        if kargs is None:
+            return {}
+        else:
+            return kargs
+
     _old_namedtuple = _copy_func(collections.namedtuple)
+    _old_namedtuple_kwdefaults = _kwdefaults(collections.namedtuple)
 
     def namedtuple(*args, **kwargs):
+        for k, v in _old_namedtuple_kwdefaults.items():
+            kwargs[k] = kwargs.get(k, v)
         cls = _old_namedtuple(*args, **kwargs)
         return _hack_namedtuple(cls)
 
     # replace namedtuple with new one
+    collections.namedtuple.__globals__["_old_namedtuple_kwdefaults"] = _old_namedtuple_kwdefaults
     collections.namedtuple.__globals__["_old_namedtuple"] = _old_namedtuple
     collections.namedtuple.__globals__["_hack_namedtuple"] = _hack_namedtuple
     collections.namedtuple.__code__ = namedtuple.__code__

From 13986a72024aa95f39b1d191f8e2233e995653f3 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 18 Jan 2017 01:57:12 +0800
Subject: [PATCH 0456/1204] [SPARK-19065][SQL] Don't inherit expression id in
 dropDuplicates

## What changes were proposed in this pull request?

`dropDuplicates` will create an Alias using the same exprId, so `StreamExecution` should also replace Alias if necessary.

## How was this patch tested?

test("SPARK-19065: dropDuplicates should not create expressions using the same id")

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16564 from zsxwing/SPARK-19065.

(cherry picked from commit a83accfcfd6a92afac5040c50577258ab83d10dd)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scala/org/apache/spark/sql/Dataset.scala  |  5 +---
 .../org/apache/spark/sql/DatasetSuite.scala   |  7 -----
 .../spark/sql/streaming/StreamSuite.scala     | 26 +++++++++++++++++++
 3 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index a6bc99dcc158f..4889548221af7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -2016,10 +2016,7 @@ class Dataset[T] private[sql](
       if (groupColExprIds.contains(attr.exprId)) {
         attr
       } else {
-        // Removing duplicate rows should not change output attributes. We should keep
-        // the original exprId of the attribute. Otherwise, to select a column in original
-        // dataset will cause analysis exception due to unresolved attribute.
-        Alias(new First(attr).toAggregateExpression(), attr.name)(exprId = attr.exprId)
+        Alias(new First(attr).toAggregateExpression(), attr.name)()
       }
     }
     Aggregate(groupCols, aggCols, logicalPlan)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index cb64aab6acad9..bdf6264bd8f26 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -896,13 +896,6 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
       (1, 2), (1, 1), (2, 1), (2, 2))
   }
 
-  test("dropDuplicates should not change child plan output") {
-    val ds = Seq(("a", 1), ("a", 2), ("b", 1), ("a", 1)).toDS()
-    checkDataset(
-      ds.dropDuplicates("_1").select(ds("_1").as[String], ds("_2").as[Int]),
-      ("a", 1), ("b", 1))
-  }
-
   test("SPARK-16097: Encoders.tuple should handle null object correctly") {
     val enc = Encoders.tuple(Encoders.tuple(Encoders.STRING, Encoders.STRING), Encoders.STRING)
     val data = Seq((("a", "b"), "c"), (null, "d"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
index e964e646d22aa..f31dc8add48d6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -304,6 +304,32 @@ class StreamSuite extends StreamTest {
       q.stop()
     }
   }
+
+  test("SPARK-19065: dropDuplicates should not create expressions using the same id") {
+    withTempPath { testPath =>
+      val data = Seq((1, 2), (2, 3), (3, 4))
+      data.toDS.write.mode("overwrite").json(testPath.getCanonicalPath)
+      val schema = spark.read.json(testPath.getCanonicalPath).schema
+      val query = spark
+        .readStream
+        .schema(schema)
+        .json(testPath.getCanonicalPath)
+        .dropDuplicates("_1")
+        .writeStream
+        .format("memory")
+        .queryName("testquery")
+        .outputMode("complete")
+        .start()
+      try {
+        query.processAllAvailable()
+        if (query.exception.isDefined) {
+          throw query.exception.get
+        }
+      } finally {
+        query.stop()
+      }
+    }
+  }
 }
 
 /**

From 3ec3e3f2edf86315d7e32e96899cad279e90f1d1 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Wed, 18 Jan 2017 02:01:30 +0800
Subject: [PATCH 0457/1204] [SPARK-19129][SQL] SessionCatalog: Disallow empty
 part col values in partition spec

Empty partition column values are not valid for partition specification. Before this PR, we accept users to do it; however, Hive metastore does not detect and disallow it too. Thus, users hit the following strange error.

```Scala
val df = spark.createDataFrame(Seq((0, "a"), (1, "b"))).toDF("partCol1", "name")
df.write.mode("overwrite").partitionBy("partCol1").saveAsTable("partitionedTable")
spark.sql("alter table partitionedTable drop partition(partCol1='')")
spark.table("partitionedTable").show()
```

In the above example, the WHOLE table is DROPPED when users specify a partition spec containing only one partition column with empty values.

When the partition columns contains more than one, Hive metastore APIs simply ignore the columns with empty values and treat it as partial spec. This is also not expected. This does not follow the actual Hive behaviors. This PR is to disallow users to specify such an invalid partition spec in the `SessionCatalog` APIs.

Added test cases

Author: gatorsmile <gatorsmile@gmail.com>

Closes #16583 from gatorsmile/disallowEmptyPartColValue.

(cherry picked from commit a23debd7bc8f85ea49c54b8cf3cd112cf0a803ff)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/catalog/SessionCatalog.scala | 26 ++++++-
 .../catalog/ExternalCatalogSuite.scala        |  2 +
 .../catalog/SessionCatalogSuite.scala         | 70 +++++++++++++++++--
 .../sql/hive/client/HiveClientImpl.scala      |  6 +-
 .../sql/hive/execution/HiveDDLSuite.scala     | 10 +++
 5 files changed, 106 insertions(+), 8 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index dd8e46da4555a..a5cf7196b21e3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -327,7 +327,7 @@ class SessionCatalog(
   def loadPartition(
       name: TableIdentifier,
       loadPath: String,
-      partition: TablePartitionSpec,
+      spec: TablePartitionSpec,
       isOverwrite: Boolean,
       holdDDLTime: Boolean,
       inheritTableSpecs: Boolean): Unit = {
@@ -335,8 +335,9 @@ class SessionCatalog(
     val table = formatTableName(name.table)
     requireDbExists(db)
     requireTableExists(TableIdentifier(table, Some(db)))
+    requireNonEmptyValueInPartitionSpec(Seq(spec))
     externalCatalog.loadPartition(
-      db, table, loadPath, partition, isOverwrite, holdDDLTime, inheritTableSpecs)
+      db, table, loadPath, spec, isOverwrite, holdDDLTime, inheritTableSpecs)
   }
 
   def defaultTablePath(tableIdent: TableIdentifier): String = {
@@ -676,6 +677,7 @@ class SessionCatalog(
     requireDbExists(db)
     requireTableExists(TableIdentifier(table, Option(db)))
     requireExactMatchedPartitionSpec(parts.map(_.spec), getTableMetadata(tableName))
+    requireNonEmptyValueInPartitionSpec(parts.map(_.spec))
     externalCatalog.createPartitions(db, table, parts, ignoreIfExists)
   }
 
@@ -694,6 +696,7 @@ class SessionCatalog(
     requireDbExists(db)
     requireTableExists(TableIdentifier(table, Option(db)))
     requirePartialMatchedPartitionSpec(specs, getTableMetadata(tableName))
+    requireNonEmptyValueInPartitionSpec(specs)
     externalCatalog.dropPartitions(db, table, specs, ignoreIfNotExists, purge, retainData)
   }
 
@@ -714,6 +717,8 @@ class SessionCatalog(
     requireTableExists(TableIdentifier(table, Option(db)))
     requireExactMatchedPartitionSpec(specs, tableMetadata)
     requireExactMatchedPartitionSpec(newSpecs, tableMetadata)
+    requireNonEmptyValueInPartitionSpec(specs)
+    requireNonEmptyValueInPartitionSpec(newSpecs)
     externalCatalog.renamePartitions(db, table, specs, newSpecs)
   }
 
@@ -732,6 +737,7 @@ class SessionCatalog(
     requireDbExists(db)
     requireTableExists(TableIdentifier(table, Option(db)))
     requireExactMatchedPartitionSpec(parts.map(_.spec), getTableMetadata(tableName))
+    requireNonEmptyValueInPartitionSpec(parts.map(_.spec))
     externalCatalog.alterPartitions(db, table, parts)
   }
 
@@ -745,6 +751,7 @@ class SessionCatalog(
     requireDbExists(db)
     requireTableExists(TableIdentifier(table, Option(db)))
     requireExactMatchedPartitionSpec(Seq(spec), getTableMetadata(tableName))
+    requireNonEmptyValueInPartitionSpec(Seq(spec))
     externalCatalog.getPartition(db, table, spec)
   }
 
@@ -764,6 +771,7 @@ class SessionCatalog(
     requireTableExists(TableIdentifier(table, Option(db)))
     partialSpec.foreach { spec =>
       requirePartialMatchedPartitionSpec(Seq(spec), getTableMetadata(tableName))
+      requireNonEmptyValueInPartitionSpec(Seq(spec))
     }
     externalCatalog.listPartitionNames(db, table, partialSpec)
   }
@@ -784,6 +792,7 @@ class SessionCatalog(
     requireTableExists(TableIdentifier(table, Option(db)))
     partialSpec.foreach { spec =>
       requirePartialMatchedPartitionSpec(Seq(spec), getTableMetadata(tableName))
+      requireNonEmptyValueInPartitionSpec(Seq(spec))
     }
     externalCatalog.listPartitions(db, table, partialSpec)
   }
@@ -802,6 +811,19 @@ class SessionCatalog(
     externalCatalog.listPartitionsByFilter(db, table, predicates)
   }
 
+  /**
+   * Verify if the input partition spec has any empty value.
+   */
+  private def requireNonEmptyValueInPartitionSpec(specs: Seq[TablePartitionSpec]): Unit = {
+    specs.foreach { s =>
+      if (s.values.exists(_.isEmpty)) {
+        val spec = s.map(p => p._1 + "=" + p._2).mkString("[", ", ", "]")
+        throw new AnalysisException(
+          s"Partition spec is invalid. The spec ($spec) contains an empty partition column value")
+      }
+    }
+  }
+
   /**
    * Verify if the input partition spec exactly matches the existing defined partition spec
    * The columns must be the same but the orders could be different.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
index 9d20602ef81cd..59b52651a9fbd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
@@ -848,6 +848,8 @@ abstract class CatalogTestUtils {
     CatalogTablePartition(Map("a" -> "5", "b" -> "6", "c" -> "7"), storageFormat)
   lazy val partWithUnknownColumns =
     CatalogTablePartition(Map("a" -> "5", "unknown" -> "6"), storageFormat)
+  lazy val partWithEmptyValue =
+    CatalogTablePartition(Map("a" -> "3", "b" -> ""), storageFormat)
   lazy val funcClass = "org.apache.spark.myFunc"
 
   /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index 5cc772d8e9a1e..41ec40512cb57 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -608,6 +608,13 @@ class SessionCatalogSuite extends SparkFunSuite {
     }
     assert(e.getMessage.contains("Partition spec is invalid. The spec (a, unknown) must match " +
       "the partition spec (a, b) defined in table '`db2`.`tbl2`'"))
+    e = intercept[AnalysisException] {
+      catalog.createPartitions(
+        TableIdentifier("tbl2", Some("db2")),
+        Seq(partWithEmptyValue, part1), ignoreIfExists = true)
+    }
+    assert(e.getMessage.contains("Partition spec is invalid. The spec ([a=3, b=]) contains an " +
+      "empty partition column value"))
   }
 
   test("drop partitions") {
@@ -705,6 +712,16 @@ class SessionCatalogSuite extends SparkFunSuite {
     assert(e.getMessage.contains(
       "Partition spec is invalid. The spec (a, unknown) must be contained within " +
         "the partition spec (a, b) defined in table '`db2`.`tbl2`'"))
+    e = intercept[AnalysisException] {
+      catalog.dropPartitions(
+        TableIdentifier("tbl2", Some("db2")),
+        Seq(partWithEmptyValue.spec, part1.spec),
+        ignoreIfNotExists = false,
+        purge = false,
+        retainData = false)
+    }
+    assert(e.getMessage.contains("Partition spec is invalid. The spec ([a=3, b=]) contains an " +
+      "empty partition column value"))
   }
 
   test("get partition") {
@@ -750,6 +767,11 @@ class SessionCatalogSuite extends SparkFunSuite {
     }
     assert(e.getMessage.contains("Partition spec is invalid. The spec (a, unknown) must match " +
       "the partition spec (a, b) defined in table '`db2`.`tbl1`'"))
+    e = intercept[AnalysisException] {
+      catalog.getPartition(TableIdentifier("tbl1", Some("db2")), partWithEmptyValue.spec)
+    }
+    assert(e.getMessage.contains("Partition spec is invalid. The spec ([a=3, b=]) contains an " +
+      "empty partition column value"))
   }
 
   test("rename partitions") {
@@ -817,6 +839,13 @@ class SessionCatalogSuite extends SparkFunSuite {
     }
     assert(e.getMessage.contains("Partition spec is invalid. The spec (a, unknown) must match " +
       "the partition spec (a, b) defined in table '`db2`.`tbl1`'"))
+    e = intercept[AnalysisException] {
+      catalog.renamePartitions(
+        TableIdentifier("tbl1", Some("db2")),
+        Seq(part1.spec), Seq(partWithEmptyValue.spec))
+    }
+    assert(e.getMessage.contains("Partition spec is invalid. The spec ([a=3, b=]) contains an " +
+      "empty partition column value"))
   }
 
   test("alter partitions") {
@@ -876,6 +905,11 @@ class SessionCatalogSuite extends SparkFunSuite {
     }
     assert(e.getMessage.contains("Partition spec is invalid. The spec (a, unknown) must match " +
       "the partition spec (a, b) defined in table '`db2`.`tbl1`'"))
+    e = intercept[AnalysisException] {
+      catalog.alterPartitions(TableIdentifier("tbl1", Some("db2")), Seq(partWithEmptyValue))
+    }
+    assert(e.getMessage.contains("Partition spec is invalid. The spec ([a=3, b=]) contains an " +
+      "empty partition column value"))
   }
 
   test("list partition names") {
@@ -897,10 +931,24 @@ class SessionCatalogSuite extends SparkFunSuite {
 
   test("list partition names with invalid partial partition spec") {
     val catalog = new SessionCatalog(newBasicCatalog())
-    intercept[AnalysisException] {
+    var e = intercept[AnalysisException] {
+      catalog.listPartitionNames(TableIdentifier("tbl2", Some("db2")),
+        Some(partWithMoreColumns.spec))
+    }
+    assert(e.getMessage.contains("Partition spec is invalid. The spec (a, b, c) must be " +
+      "contained within the partition spec (a, b) defined in table '`db2`.`tbl2`'"))
+    e = intercept[AnalysisException] {
+      catalog.listPartitionNames(TableIdentifier("tbl2", Some("db2")),
+        Some(partWithUnknownColumns.spec))
+    }
+    assert(e.getMessage.contains("Partition spec is invalid. The spec (a, unknown) must be " +
+      "contained within the partition spec (a, b) defined in table '`db2`.`tbl2`'"))
+    e = intercept[AnalysisException] {
       catalog.listPartitionNames(TableIdentifier("tbl2", Some("db2")),
-        Some(Map("unknown" -> "unknown")))
+        Some(partWithEmptyValue.spec))
     }
+    assert(e.getMessage.contains("Partition spec is invalid. The spec ([a=3, b=]) contains an " +
+      "empty partition column value"))
   }
 
   test("list partitions") {
@@ -920,10 +968,22 @@ class SessionCatalogSuite extends SparkFunSuite {
 
   test("list partitions with invalid partial partition spec") {
     val catalog = new SessionCatalog(newBasicCatalog())
-    intercept[AnalysisException] {
-      catalog.listPartitions(
-        TableIdentifier("tbl2", Some("db2")), Some(Map("unknown" -> "unknown")))
+    var e = intercept[AnalysisException] {
+      catalog.listPartitions(TableIdentifier("tbl2", Some("db2")), Some(partWithMoreColumns.spec))
+    }
+    assert(e.getMessage.contains("Partition spec is invalid. The spec (a, b, c) must be " +
+      "contained within the partition spec (a, b) defined in table '`db2`.`tbl2`'"))
+    e = intercept[AnalysisException] {
+      catalog.listPartitions(TableIdentifier("tbl2", Some("db2")),
+        Some(partWithUnknownColumns.spec))
+    }
+    assert(e.getMessage.contains("Partition spec is invalid. The spec (a, unknown) must be " +
+      "contained within the partition spec (a, b) defined in table '`db2`.`tbl2`'"))
+    e = intercept[AnalysisException] {
+      catalog.listPartitions(TableIdentifier("tbl2", Some("db2")), Some(partWithEmptyValue.spec))
     }
+    assert(e.getMessage.contains("Partition spec is invalid. The spec ([a=3, b=]) contains an " +
+      "empty partition column value"))
   }
 
   test("list partitions when database/table does not exist") {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index e0f71560f3302..a9ca1a4249513 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -464,6 +464,7 @@ private[hive] class HiveClientImpl(
     // do the check at first and collect all the matching partitions
     val matchingParts =
       specs.flatMap { s =>
+        assert(s.values.forall(_.nonEmpty), s"partition spec '$s' is invalid")
         // The provided spec here can be a partial spec, i.e. it will match all partitions
         // whose specs are supersets of this partial spec. E.g. If a table has partitions
         // (b='1', c='1') and (b='1', c='2'), a partial spec of (b='1') will match both.
@@ -538,6 +539,7 @@ private[hive] class HiveClientImpl(
           // -1 for result limit means "no limit/return all"
           client.getPartitionNames(table.database, table.identifier.table, -1)
         case Some(s) =>
+          assert(s.values.forall(_.nonEmpty), s"partition spec '$s' is invalid")
           client.getPartitionNames(table.database, table.identifier.table, s.asJava, -1)
       }
     hivePartitionNames.asScala.sorted
@@ -561,7 +563,9 @@ private[hive] class HiveClientImpl(
     val hiveTable = toHiveTable(table)
     val parts = spec match {
       case None => shim.getAllPartitions(client, hiveTable).map(fromHivePartition)
-      case Some(s) => client.getPartitions(hiveTable, s.asJava).asScala.map(fromHivePartition)
+      case Some(s) =>
+        assert(s.values.forall(_.nonEmpty), s"partition spec '$s' is invalid")
+        client.getPartitions(hiveTable, s.asJava).asScala.map(fromHivePartition)
     }
     HiveCatalogMetrics.incrementFetchedPartitions(parts.length)
     parts
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 8b34219530259..3b9437da372c2 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -245,6 +245,16 @@ class HiveDDLSuite
     }
   }
 
+  test("SPARK-19129: drop partition with a empty string will drop the whole table") {
+    val df = spark.createDataFrame(Seq((0, "a"), (1, "b"))).toDF("partCol1", "name")
+    df.write.mode("overwrite").partitionBy("partCol1").saveAsTable("partitionedTable")
+    val e = intercept[AnalysisException] {
+      spark.sql("alter table partitionedTable drop partition(partCol1='')")
+    }.getMessage
+    assert(e.contains("Partition spec is invalid. The spec ([partCol1=]) contains an empty " +
+      "partition column value"))
+  }
+
   test("add/drop partitions - external table") {
     val catalog = spark.sessionState.catalog
     withTempDir { tmpDir =>

From 29b954bba1a9fa6e3bd823fa36ea7df4c2461381 Mon Sep 17 00:00:00 2001
From: "wm624@hotmail.com" <wm624@hotmail.com>
Date: Tue, 17 Jan 2017 21:24:33 -0800
Subject: [PATCH 0458/1204] [SPARK-19066][SPARKR][BACKPORT-2.1] LDA doesn't set
 optimizer correctly

## What changes were proposed in this pull request?
Back port the fix to SPARK-19066 to 2.1 branch.

## How was this patch tested?
Unit tests

Author: wm624@hotmail.com <wm624@hotmail.com>

Closes #16623 from wangmiao1981/bugport.
---
 R/pkg/inst/tests/testthat/test_mllib.R                      | 4 ++--
 mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R
index 1f2fae9c813f3..3891f0044d4f5 100644
--- a/R/pkg/inst/tests/testthat/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -860,7 +860,7 @@ test_that("spark.lda with libsvm", {
   weights <- stats$topicTopTermsWeights
   vocabulary <- stats$vocabulary
 
-  expect_false(isDistributed)
+  expect_true(isDistributed)
   expect_true(logLikelihood <= 0 & is.finite(logLikelihood))
   expect_true(logPerplexity >= 0 & is.finite(logPerplexity))
   expect_equal(vocabSize, 11)
@@ -874,7 +874,7 @@ test_that("spark.lda with libsvm", {
   model2 <- read.ml(modelPath)
   stats2 <- summary(model2)
 
-  expect_false(stats2$isDistributed)
+  expect_true(stats2$isDistributed)
   expect_equal(logLikelihood, stats2$logLikelihood)
   expect_equal(logPerplexity, stats2$logPerplexity)
   expect_equal(vocabSize, stats2$vocabSize)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala
index cbe6a705007d1..e7851e148855f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/LDAWrapper.scala
@@ -122,6 +122,7 @@ private[r] object LDAWrapper extends MLReadable[LDAWrapper] {
       .setK(k)
       .setMaxIter(maxIter)
       .setSubsamplingRate(subsamplingRate)
+      .setOptimizer(optimizer)
 
     val featureSchema = data.schema(features)
     val stages = featureSchema.dataType match {

From 77202a6c57e6ac2438cdb6bd232a187b6734fa2b Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Wed, 18 Jan 2017 09:53:14 -0800
Subject: [PATCH 0459/1204] [SPARK-19231][SPARKR] add error handling for
 download and untar for Spark release

## What changes were proposed in this pull request?

When R is starting as a package and it needs to download the Spark release distribution we need to handle error for download and untar, and clean up, otherwise it will get stuck.

## How was this patch tested?

manually

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #16589 from felixcheung/rtarreturncode.

(cherry picked from commit 278fa1eb305220a85c816c948932d6af8fa619aa)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/R/install.R | 55 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 40 insertions(+), 15 deletions(-)

diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R
index cb6bbe5946b14..72386e68de4b3 100644
--- a/R/pkg/R/install.R
+++ b/R/pkg/R/install.R
@@ -54,7 +54,7 @@
 #'                 }
 #' @param overwrite If \code{TRUE}, download and overwrite the existing tar file in localDir
 #'                  and force re-install Spark (in case the local directory or file is corrupted)
-#' @return \code{install.spark} returns the local directory where Spark is found or installed
+#' @return the (invisible) local directory where Spark is found or installed
 #' @rdname install.spark
 #' @name install.spark
 #' @aliases install.spark
@@ -115,17 +115,35 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL,
   } else {
     if (releaseUrl != "") {
       message("Downloading from alternate URL:\n- ", releaseUrl)
-      downloadUrl(releaseUrl, packageLocalPath, paste0("Fetch failed from ", releaseUrl))
+      success <- downloadUrl(releaseUrl, packageLocalPath)
+      if (!success) {
+        unlink(packageLocalPath)
+        stop(paste0("Fetch failed from ", releaseUrl))
+      }
     } else {
       robustDownloadTar(mirrorUrl, version, hadoopVersion, packageName, packageLocalPath)
     }
   }
 
   message(sprintf("Installing to %s", localDir))
-  untar(tarfile = packageLocalPath, exdir = localDir)
-  if (!tarExists || overwrite) {
+  # There are two ways untar can fail - untar could stop() on errors like incomplete block on file
+  # or, tar command can return failure code
+  success <- tryCatch(untar(tarfile = packageLocalPath, exdir = localDir) == 0,
+                     error = function(e) {
+                       message(e)
+                       message()
+                       FALSE
+                     },
+                     warning = function(w) {
+                       # Treat warning as error, add an empty line with message()
+                       message(w)
+                       message()
+                       FALSE
+                     })
+  if (!tarExists || overwrite || !success) {
     unlink(packageLocalPath)
   }
+  if (!success) stop("Extract archive failed.")
   message("DONE.")
   Sys.setenv(SPARK_HOME = packageLocalDir)
   message(paste("SPARK_HOME set to", packageLocalDir))
@@ -135,8 +153,7 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL,
 robustDownloadTar <- function(mirrorUrl, version, hadoopVersion, packageName, packageLocalPath) {
   # step 1: use user-provided url
   if (!is.null(mirrorUrl)) {
-    msg <- sprintf("Use user-provided mirror site: %s.", mirrorUrl)
-    message(msg)
+    message("Use user-provided mirror site: ", mirrorUrl)
     success <- directDownloadTar(mirrorUrl, version, hadoopVersion,
                                    packageName, packageLocalPath)
     if (success) {
@@ -156,7 +173,7 @@ robustDownloadTar <- function(mirrorUrl, version, hadoopVersion, packageName, pa
                                    packageName, packageLocalPath)
     if (success) return()
   } else {
-    message("Unable to find preferred mirror site.")
+    message("Unable to download from preferred mirror site: ", mirrorUrl)
   }
 
   # step 3: use backup option
@@ -165,8 +182,11 @@ robustDownloadTar <- function(mirrorUrl, version, hadoopVersion, packageName, pa
   success <- directDownloadTar(mirrorUrl, version, hadoopVersion,
                                  packageName, packageLocalPath)
   if (success) {
-    return(packageLocalPath)
+    return()
   } else {
+    # remove any partially downloaded file
+    unlink(packageLocalPath)
+    message("Unable to download from default mirror site: ", mirrorUrl)
     msg <- sprintf(paste("Unable to download Spark %s for Hadoop %s.",
                          "Please check network connection, Hadoop version,",
                          "or provide other mirror sites."),
@@ -201,14 +221,20 @@ directDownloadTar <- function(mirrorUrl, version, hadoopVersion, packageName, pa
   msg <- sprintf(fmt, version, ifelse(hadoopVersion == "without", "Free build", hadoopVersion),
                  packageRemotePath)
   message(msg)
-  downloadUrl(packageRemotePath, packageLocalPath, paste0("Fetch failed from ", mirrorUrl))
+  downloadUrl(packageRemotePath, packageLocalPath)
 }
 
-downloadUrl <- function(remotePath, localPath, errorMessage) {
+downloadUrl <- function(remotePath, localPath) {
   isFail <- tryCatch(download.file(remotePath, localPath),
                      error = function(e) {
-                       message(errorMessage)
-                       print(e)
+                       message(e)
+                       message()
+                       TRUE
+                     },
+                     warning = function(w) {
+                       # Treat warning as error, add an empty line with message()
+                       message(w)
+                       message()
                        TRUE
                      })
   !isFail
@@ -234,10 +260,9 @@ sparkCachePath <- function() {
   if (.Platform$OS.type == "windows") {
     winAppPath <- Sys.getenv("LOCALAPPDATA", unset = NA)
     if (is.na(winAppPath)) {
-      msg <- paste("%LOCALAPPDATA% not found.",
+      stop(paste("%LOCALAPPDATA% not found.",
                    "Please define the environment variable",
-                   "or restart and enter an installation path in localDir.")
-      stop(msg)
+                   "or restart and enter an installation path in localDir."))
     } else {
       path <- file.path(winAppPath, "Apache", "Spark", "Cache")
     }

From 047506bae4f9a00003505ac886ba04969d8d11f5 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 18 Jan 2017 10:50:51 -0800
Subject: [PATCH 0460/1204] [SPARK-19113][SS][TESTS] Ignore
 StreamingQueryException thrown from awaitInitialization to avoid breaking
 tests

## What changes were proposed in this pull request?

#16492 missed one race condition: `StreamExecution.awaitInitialization` may throw fatal errors and fail the test. This PR just ignores `StreamingQueryException` thrown from `awaitInitialization` so that we can verify the exception in the `ExpectFailure` action later. It's fine since `StopStream` or `ExpectFailure` will catch `StreamingQueryException` as well.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16567 from zsxwing/SPARK-19113-2.

(cherry picked from commit c050c12274fba2ac4c4938c4724049a47fa59280)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../scala/org/apache/spark/sql/streaming/StreamTest.scala  | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index 4aa4100522cde..af2f31a34d8da 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -385,7 +385,12 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
                 .streamingQuery
             // Wait until the initialization finishes, because some tests need to use `logicalPlan`
             // after starting the query.
-            currentStream.awaitInitialization(streamingTimeout.toMillis)
+            try {
+              currentStream.awaitInitialization(streamingTimeout.toMillis)
+            } catch {
+              case _: StreamingQueryException =>
+                // Ignore the exception. `StopStream` or `ExpectFailure` will catch it as well.
+            }
 
           case AdvanceManualClock(timeToAdd) =>
             verify(currentStream != null,

From 4cff0b504c367db314f10e730fe39dc083529f16 Mon Sep 17 00:00:00 2001
From: Liwei Lin <lwlin7@gmail.com>
Date: Wed, 18 Jan 2017 10:52:47 -0800
Subject: [PATCH 0461/1204] [SPARK-19168][STRUCTURED STREAMING] StateStore
 should be aborted upon error

## What changes were proposed in this pull request?

We should call `StateStore.abort()` when there should be any error before the store is committed.

## How was this patch tested?

Manually.

Author: Liwei Lin <lwlin7@gmail.com>

Closes #16547 from lw-lin/append-filter.

(cherry picked from commit 569e50680f97b1ed054337a39fe198769ef52d93)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../spark/sql/execution/streaming/StatefulAggregate.scala | 8 ++++++++
 .../streaming/state/HDFSBackedStateStoreProvider.scala    | 2 +-
 .../spark/sql/execution/streaming/state/StateStore.scala  | 2 +-
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulAggregate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulAggregate.scala
index 0551e4b4a2ef5..d4ccced9ac9b4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulAggregate.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulAggregate.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.execution.streaming.state._
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.TaskContext
 
 
 /** Used to identify the state store for a given operator. */
@@ -150,6 +151,13 @@ case class StateStoreSaveExec(
         val numTotalStateRows = longMetric("numTotalStateRows")
         val numUpdatedStateRows = longMetric("numUpdatedStateRows")
 
+        // Abort the state store in case of error
+        TaskContext.get().addTaskCompletionListener(_ => {
+          if (!store.hasCommitted) {
+            store.abort()
+          }
+        })
+
         outputMode match {
           // Update and output all rows in the StateStore.
           case Some(Complete) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
index 4f3f8181d1f4e..1279b71c4d6ee 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
@@ -203,7 +203,7 @@ private[state] class HDFSBackedStateStoreProvider(
     /**
      * Whether all updates have been committed
      */
-    override private[state] def hasCommitted: Boolean = {
+    override private[streaming] def hasCommitted: Boolean = {
       state == COMMITTED
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
index 9bc6c0e2b9334..d59746f947c1e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
@@ -83,7 +83,7 @@ trait StateStore {
   /**
    * Whether all updates have been committed
    */
-  private[state] def hasCommitted: Boolean
+  private[streaming] def hasCommitted: Boolean
 }
 
 

From 7bc3e9ba73869c0c6cb8e754e41dbdd4740cfd07 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 19 Dec 2016 20:03:33 -0800
Subject: [PATCH 0462/1204] [SPARK-18899][SPARK-18912][SPARK-18913][SQL]
 refactor the error checking when append data to an existing table

## What changes were proposed in this pull request?

When we append data to an existing table with `DataFrameWriter.saveAsTable`, we will do various checks to make sure the appended data is consistent with the existing data.

However, we get the information of the existing table by matching the table relation, instead of looking at the table metadata. This is error-prone, e.g. we only check the number of columns for `HadoopFsRelation`, we forget to check bucketing, etc.

This PR refactors the error checking by looking at the metadata of the existing table, and fix several bugs:
* SPARK-18899: We forget to check if the specified bucketing matched the existing table, which may lead to a problematic table that has different bucketing in different data files.
* SPARK-18912: We forget to check the number of columns for non-file-based data source table
* SPARK-18913: We don't support append data to a table with special column names.

## How was this patch tested?
new regression test.

Author: Wenchen Fan <wenchen@databricks.com>

Closes #16313 from cloud-fan/bug1.

(cherry picked from commit f923c849e5b8f7e7aeafee59db598a9bf4970f50)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalog/ExternalCatalogUtils.scala        |  37 ++++++
 .../sql/catalyst/catalog/interface.scala      |  10 ++
 .../command/createDataSourceTables.scala      | 110 ++++++++++++------
 .../sql/execution/datasources/rules.scala     |  53 +++------
 .../sql/execution/command/DDLSuite.scala      |   4 +-
 .../sql/test/DataFrameReaderWriterSuite.scala |  38 +++++-
 .../sql/hive/MetastoreDataSourcesSuite.scala  |  17 ++-
 .../sql/sources/HadoopFsRelationTest.scala    |   2 +-
 8 files changed, 180 insertions(+), 91 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
index 817c1ab688471..4331841fbffb4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
@@ -20,6 +20,8 @@ package org.apache.spark.sql.catalyst.catalog
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.util.Shell
 
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.analysis.Resolver
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 
 object ExternalCatalogUtils {
@@ -133,4 +135,39 @@ object CatalogUtils {
       case o => o
     }
   }
+
+  def normalizePartCols(
+      tableName: String,
+      tableCols: Seq[String],
+      partCols: Seq[String],
+      resolver: Resolver): Seq[String] = {
+    partCols.map(normalizeColumnName(tableName, tableCols, _, "partition", resolver))
+  }
+
+  def normalizeBucketSpec(
+      tableName: String,
+      tableCols: Seq[String],
+      bucketSpec: BucketSpec,
+      resolver: Resolver): BucketSpec = {
+    val BucketSpec(numBuckets, bucketColumnNames, sortColumnNames) = bucketSpec
+    val normalizedBucketCols = bucketColumnNames.map { colName =>
+      normalizeColumnName(tableName, tableCols, colName, "bucket", resolver)
+    }
+    val normalizedSortCols = sortColumnNames.map { colName =>
+      normalizeColumnName(tableName, tableCols, colName, "sort", resolver)
+    }
+    BucketSpec(numBuckets, normalizedBucketCols, normalizedSortCols)
+  }
+
+  private def normalizeColumnName(
+      tableName: String,
+      tableCols: Seq[String],
+      colName: String,
+      colType: String,
+      resolver: Resolver): String = {
+    tableCols.find(resolver(_, colName)).getOrElse {
+      throw new AnalysisException(s"$colType column $colName is not defined in table $tableName, " +
+        s"defined table columns are: ${tableCols.mkString(", ")}")
+    }
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index d2a1af0800914..5b5378c09e540 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -133,6 +133,16 @@ case class BucketSpec(
   if (numBuckets <= 0) {
     throw new AnalysisException(s"Expected positive number of buckets, but got `$numBuckets`.")
   }
+
+  override def toString: String = {
+    val bucketString = s"bucket columns: [${bucketColumnNames.mkString(", ")}]"
+    val sortString = if (sortColumnNames.nonEmpty) {
+      s", sort columns: [${sortColumnNames.mkString(", ")}]"
+    } else {
+      ""
+    }
+    s"$numBuckets buckets, $bucketString$sortString"
+  }
 }
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
index 630adb0d994ec..182d182faa211 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
@@ -18,13 +18,11 @@
 package org.apache.spark.sql.execution.command
 
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
 import org.apache.spark.sql.catalyst.catalog._
-import org.apache.spark.sql.catalyst.plans.QueryPlan
+import org.apache.spark.sql.catalyst.expressions.NamedExpression
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.datasources._
-import org.apache.spark.sql.sources.{BaseRelation, InsertableRelation}
-import org.apache.spark.sql.types._
+import org.apache.spark.sql.sources.BaseRelation
 
 /**
  * A command used to create a data source table.
@@ -143,8 +141,9 @@ case class CreateDataSourceTableAsSelectCommand(
     val tableName = tableIdentWithDB.unquotedString
 
     var createMetastoreTable = false
-    var existingSchema = Option.empty[StructType]
-    if (sparkSession.sessionState.catalog.tableExists(tableIdentWithDB)) {
+    // We may need to reorder the columns of the query to match the existing table.
+    var reorderedColumns = Option.empty[Seq[NamedExpression]]
+    if (sessionState.catalog.tableExists(tableIdentWithDB)) {
       // Check if we need to throw an exception or just return.
       mode match {
         case SaveMode.ErrorIfExists =>
@@ -157,39 +156,76 @@ case class CreateDataSourceTableAsSelectCommand(
           // Since the table already exists and the save mode is Ignore, we will just return.
           return Seq.empty[Row]
         case SaveMode.Append =>
+          val existingTable = sessionState.catalog.getTableMetadata(tableIdentWithDB)
+
+          if (existingTable.provider.get == DDLUtils.HIVE_PROVIDER) {
+            throw new AnalysisException(s"Saving data in the Hive serde table $tableName is " +
+              "not supported yet. Please use the insertInto() API as an alternative.")
+          }
+
           // Check if the specified data source match the data source of the existing table.
-          val existingProvider = DataSource.lookupDataSource(provider)
+          val existingProvider = DataSource.lookupDataSource(existingTable.provider.get)
+          val specifiedProvider = DataSource.lookupDataSource(table.provider.get)
           // TODO: Check that options from the resolved relation match the relation that we are
           // inserting into (i.e. using the same compression).
+          if (existingProvider != specifiedProvider) {
+            throw new AnalysisException(s"The format of the existing table $tableName is " +
+              s"`${existingProvider.getSimpleName}`. It doesn't match the specified format " +
+              s"`${specifiedProvider.getSimpleName}`.")
+          }
 
-          // Pass a table identifier with database part, so that `lookupRelation` won't get temp
-          // views unexpectedly.
-          EliminateSubqueryAliases(sessionState.catalog.lookupRelation(tableIdentWithDB)) match {
-            case l @ LogicalRelation(_: InsertableRelation | _: HadoopFsRelation, _, _) =>
-              // check if the file formats match
-              l.relation match {
-                case r: HadoopFsRelation if r.fileFormat.getClass != existingProvider =>
-                  throw new AnalysisException(
-                    s"The file format of the existing table $tableName is " +
-                      s"`${r.fileFormat.getClass.getName}`. It doesn't match the specified " +
-                      s"format `$provider`")
-                case _ =>
-              }
-              if (query.schema.size != l.schema.size) {
-                throw new AnalysisException(
-                  s"The column number of the existing schema[${l.schema}] " +
-                    s"doesn't match the data schema[${query.schema}]'s")
-              }
-              existingSchema = Some(l.schema)
-            case s: SimpleCatalogRelation if DDLUtils.isDatasourceTable(s.metadata) =>
-              existingSchema = Some(s.metadata.schema)
-            case c: CatalogRelation if c.catalogTable.provider == Some(DDLUtils.HIVE_PROVIDER) =>
-              throw new AnalysisException("Saving data in the Hive serde table " +
-                s"${c.catalogTable.identifier} is not supported yet. Please use the " +
-                "insertInto() API as an alternative..")
-            case o =>
-              throw new AnalysisException(s"Saving data in ${o.toString} is not supported.")
+          if (query.schema.length != existingTable.schema.length) {
+            throw new AnalysisException(
+              s"The column number of the existing table $tableName" +
+                s"(${existingTable.schema.catalogString}) doesn't match the data schema" +
+                s"(${query.schema.catalogString})")
           }
+
+          val resolver = sessionState.conf.resolver
+          val tableCols = existingTable.schema.map(_.name)
+
+          reorderedColumns = Some(existingTable.schema.map { f =>
+            query.resolve(Seq(f.name), resolver).getOrElse {
+              val inputColumns = query.schema.map(_.name).mkString(", ")
+              throw new AnalysisException(
+                s"cannot resolve '${f.name}' given input columns: [$inputColumns]")
+            }
+          })
+
+          // In `AnalyzeCreateTable`, we verified the consistency between the user-specified table
+          // definition(partition columns, bucketing) and the SELECT query, here we also need to
+          // verify the the consistency between the user-specified table definition and the existing
+          // table definition.
+
+          // Check if the specified partition columns match the existing table.
+          val specifiedPartCols = CatalogUtils.normalizePartCols(
+            tableName, tableCols, table.partitionColumnNames, resolver)
+          if (specifiedPartCols != existingTable.partitionColumnNames) {
+            throw new AnalysisException(
+              s"""
+                |Specified partitioning does not match that of the existing table $tableName.
+                |Specified partition columns: [${specifiedPartCols.mkString(", ")}]
+                |Existing partition columns: [${existingTable.partitionColumnNames.mkString(", ")}]
+              """.stripMargin)
+          }
+
+          // Check if the specified bucketing match the existing table.
+          val specifiedBucketSpec = table.bucketSpec.map { bucketSpec =>
+            CatalogUtils.normalizeBucketSpec(tableName, tableCols, bucketSpec, resolver)
+          }
+          if (specifiedBucketSpec != existingTable.bucketSpec) {
+            val specifiedBucketString =
+              specifiedBucketSpec.map(_.toString).getOrElse("not bucketed")
+            val existingBucketString =
+              existingTable.bucketSpec.map(_.toString).getOrElse("not bucketed")
+            throw new AnalysisException(
+              s"""
+                |Specified bucketing does not match that of the existing table $tableName.
+                |Specified bucketing: $specifiedBucketString
+                |Existing bucketing: $existingBucketString
+              """.stripMargin)
+          }
+
         case SaveMode.Overwrite =>
           sessionState.catalog.dropTable(tableIdentWithDB, ignoreIfNotExists = true, purge = false)
           // Need to create the table again.
@@ -201,9 +237,9 @@ case class CreateDataSourceTableAsSelectCommand(
     }
 
     val data = Dataset.ofRows(sparkSession, query)
-    val df = existingSchema match {
-      // If we are inserting into an existing table, just use the existing schema.
-      case Some(s) => data.selectExpr(s.fieldNames: _*)
+    val df = reorderedColumns match {
+      // Reorder the columns of the query to match the existing table.
+      case Some(cols) => data.select(cols.map(Column(_)): _*)
       case None => data
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index 7154e3e41c93b..2b2fbddd12e45 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -17,14 +17,11 @@
 
 package org.apache.spark.sql.execution.datasources
 
-import java.util.regex.Pattern
-
 import scala.util.control.NonFatal
 
 import org.apache.spark.sql.{AnalysisException, SaveMode, SparkSession}
-import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogRelation, CatalogTable, SessionCatalog}
+import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, CatalogTable, CatalogUtils, SessionCatalog}
 import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, Cast, RowOrdering}
 import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -122,9 +119,12 @@ case class AnalyzeCreateTable(sparkSession: SparkSession) extends Rule[LogicalPl
   }
 
   private def checkPartitionColumns(schema: StructType, tableDesc: CatalogTable): CatalogTable = {
-    val normalizedPartitionCols = tableDesc.partitionColumnNames.map { colName =>
-      normalizeColumnName(tableDesc.identifier, schema, colName, "partition")
-    }
+    val normalizedPartitionCols = CatalogUtils.normalizePartCols(
+      tableName = tableDesc.identifier.unquotedString,
+      tableCols = schema.map(_.name),
+      partCols = tableDesc.partitionColumnNames,
+      resolver = sparkSession.sessionState.conf.resolver)
+
     checkDuplication(normalizedPartitionCols, "partition")
 
     if (schema.nonEmpty && normalizedPartitionCols.length == schema.length) {
@@ -149,25 +149,21 @@ case class AnalyzeCreateTable(sparkSession: SparkSession) extends Rule[LogicalPl
 
   private def checkBucketColumns(schema: StructType, tableDesc: CatalogTable): CatalogTable = {
     tableDesc.bucketSpec match {
-      case Some(BucketSpec(numBuckets, bucketColumnNames, sortColumnNames)) =>
-        val normalizedBucketCols = bucketColumnNames.map { colName =>
-          normalizeColumnName(tableDesc.identifier, schema, colName, "bucket")
-        }
-        checkDuplication(normalizedBucketCols, "bucket")
-
-        val normalizedSortCols = sortColumnNames.map { colName =>
-          normalizeColumnName(tableDesc.identifier, schema, colName, "sort")
-        }
-        checkDuplication(normalizedSortCols, "sort")
-
-        schema.filter(f => normalizedSortCols.contains(f.name)).map(_.dataType).foreach {
+      case Some(bucketSpec) =>
+        val normalizedBucketing = CatalogUtils.normalizeBucketSpec(
+          tableName = tableDesc.identifier.unquotedString,
+          tableCols = schema.map(_.name),
+          bucketSpec = bucketSpec,
+          resolver = sparkSession.sessionState.conf.resolver)
+        checkDuplication(normalizedBucketing.bucketColumnNames, "bucket")
+        checkDuplication(normalizedBucketing.sortColumnNames, "sort")
+
+        normalizedBucketing.sortColumnNames.map(schema(_)).map(_.dataType).foreach {
           case dt if RowOrdering.isOrderable(dt) => // OK
           case other => failAnalysis(s"Cannot use ${other.simpleString} for sorting column")
         }
 
-        tableDesc.copy(
-          bucketSpec = Some(BucketSpec(numBuckets, normalizedBucketCols, normalizedSortCols))
-        )
+        tableDesc.copy(bucketSpec = Some(normalizedBucketing))
 
       case None => tableDesc
     }
@@ -182,19 +178,6 @@ case class AnalyzeCreateTable(sparkSession: SparkSession) extends Rule[LogicalPl
     }
   }
 
-  private def normalizeColumnName(
-      tableIdent: TableIdentifier,
-      schema: StructType,
-      colName: String,
-      colType: String): String = {
-    val tableCols = schema.map(_.name)
-    val resolver = sparkSession.sessionState.conf.resolver
-    tableCols.find(resolver(_, colName)).getOrElse {
-      failAnalysis(s"$colType column $colName is not defined in table $tableIdent, " +
-        s"defined table columns are: ${tableCols.mkString(", ")}")
-    }
-  }
-
   private def failAnalysis(msg: String) = throw new AnalysisException(msg)
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 6593fa479d66b..c0f583e5f7072 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -342,7 +342,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
     val e = intercept[AnalysisException] {
       sql("CREATE TABLE tbl(a int, b string) USING json PARTITIONED BY (c)")
     }
-    assert(e.message == "partition column c is not defined in table `tbl`, " +
+    assert(e.message == "partition column c is not defined in table tbl, " +
       "defined table columns are: a, b")
   }
 
@@ -350,7 +350,7 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
     val e = intercept[AnalysisException] {
       sql("CREATE TABLE tbl(a int, b string) USING json CLUSTERED BY (c) INTO 4 BUCKETS")
     }
-    assert(e.message == "bucket column c is not defined in table `tbl`, " +
+    assert(e.message == "bucket column c is not defined in table tbl, " +
       "defined table columns are: a, b")
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
index e0887e0f1c7de..4bec2e3fdb9d3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
@@ -108,16 +108,14 @@ class DefaultSourceWithoutUserSpecifiedSchema
 }
 
 class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with BeforeAndAfter {
-
+  import testImplicits._
 
   private val userSchema = new StructType().add("s", StringType)
   private val textSchema = new StructType().add("value", StringType)
   private val data = Seq("1", "2", "3")
   private val dir = Utils.createTempDir(namePrefix = "input").getCanonicalPath
-  private implicit var enc: Encoder[String] = _
 
   before {
-    enc = spark.implicits.newStringEncoder
     Utils.deleteRecursively(new File(dir))
   }
 
@@ -459,8 +457,6 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with Be
   }
 
   test("column nullability and comment - write and then read") {
-    import testImplicits._
-
     Seq("json", "parquet", "csv").foreach { format =>
       val schema = StructType(
         StructField("cl1", IntegerType, nullable = false).withComment("test") ::
@@ -576,7 +572,6 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with Be
 
   test("SPARK-18510: use user specified types for partition columns in file sources") {
     import org.apache.spark.sql.functions.udf
-    import testImplicits._
     withTempDir { src =>
       val createArray = udf { (length: Long) =>
         for (i <- 1 to length.toInt) yield i.toString
@@ -609,4 +604,35 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with Be
       )
     }
   }
+
+  test("SPARK-18899: append to a bucketed table using DataFrameWriter with mismatched bucketing") {
+    withTable("t") {
+      Seq(1 -> "a", 2 -> "b").toDF("i", "j").write.bucketBy(2, "i").saveAsTable("t")
+      val e = intercept[AnalysisException] {
+        Seq(3 -> "c").toDF("i", "j").write.bucketBy(3, "i").mode("append").saveAsTable("t")
+      }
+      assert(e.message.contains("Specified bucketing does not match that of the existing table"))
+    }
+  }
+
+  test("SPARK-18912: number of columns mismatch for non-file-based data source table") {
+    withTable("t") {
+      sql("CREATE TABLE t USING org.apache.spark.sql.test.DefaultSource")
+
+      val e = intercept[AnalysisException] {
+        Seq(1 -> "a").toDF("a", "b").write
+          .format("org.apache.spark.sql.test.DefaultSource")
+          .mode("append").saveAsTable("t")
+      }
+      assert(e.message.contains("The column number of the existing table"))
+    }
+  }
+
+  test("SPARK-18913: append to a table with special column names") {
+    withTable("t") {
+      Seq(1 -> "a").toDF("x.x", "y.y").write.saveAsTable("t")
+      Seq(2 -> "b").toDF("x.x", "y.y").write.mode("append").saveAsTable("t")
+      checkAnswer(spark.table("t"), Row(1, "a") :: Row(2, "b") :: Nil)
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index a45f4b5d6376c..deb40f0464016 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -422,7 +422,7 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
       val e = intercept[AnalysisException] {
         df.write.mode(SaveMode.Append).saveAsTable(tableName)
       }.getMessage
-      assert(e.contains("Saving data in the Hive serde table `default`.`tab1` is not supported " +
+      assert(e.contains("Saving data in the Hive serde table default.tab1 is not supported " +
         "yet. Please use the insertInto() API as an alternative."))
 
       df.write.insertInto(tableName)
@@ -928,9 +928,8 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
         createDF(10, 19).write.mode(SaveMode.Append).format("orc").saveAsTable("appendOrcToParquet")
       }
       assert(e.getMessage.contains(
-        "The file format of the existing table default.appendOrcToParquet " +
-        "is `org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat`. " +
-        "It doesn't match the specified format `orc`"))
+        "The format of the existing table default.appendOrcToParquet is `ParquetFileFormat`. " +
+          "It doesn't match the specified format `OrcFileFormat`"))
     }
 
     withTable("appendParquetToJson") {
@@ -940,9 +939,8 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
           .saveAsTable("appendParquetToJson")
       }
       assert(e.getMessage.contains(
-        "The file format of the existing table default.appendParquetToJson " +
-        "is `org.apache.spark.sql.execution.datasources.json.JsonFileFormat`. " +
-        "It doesn't match the specified format `parquet`"))
+        "The format of the existing table default.appendParquetToJson is `JsonFileFormat`. " +
+        "It doesn't match the specified format `ParquetFileFormat`"))
     }
 
     withTable("appendTextToJson") {
@@ -952,9 +950,8 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
           .saveAsTable("appendTextToJson")
       }
       assert(e.getMessage.contains(
-        "The file format of the existing table default.appendTextToJson is " +
-        "`org.apache.spark.sql.execution.datasources.json.JsonFileFormat`. " +
-        "It doesn't match the specified format `text`"))
+        "The format of the existing table default.appendTextToJson is `JsonFileFormat`. " +
+        "It doesn't match the specified format `TextFileFormat`"))
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
index 22f13a494cd4c..224b2c6c6f794 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
@@ -446,7 +446,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes
       .saveAsTable("t")
 
     // Using only a subset of all partition columns
-    intercept[Throwable] {
+    intercept[AnalysisException] {
       partitionedTestDF2.write
         .format(dataSourceName)
         .mode(SaveMode.Append)

From 482d361c36b5d0e093f931e27701fb59488ad583 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Fri, 20 Jan 2017 14:04:51 -0800
Subject: [PATCH 0463/1204] [SPARK-19314][SS][CATALYST] Do not allow sort
 before aggregation in Structured Streaming plan

## What changes were proposed in this pull request?

Sort in a streaming plan should be allowed only after a aggregation in complete mode. Currently it is incorrectly allowed when present anywhere in the plan. It gives unpredictable potentially incorrect results.

## How was this patch tested?
New test

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #16662 from tdas/SPARK-19314.

(cherry picked from commit 552e5f08841828e55f5924f1686825626da8bcd0)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../catalyst/analysis/UnsupportedOperationChecker.scala  | 2 +-
 .../catalyst/analysis/UnsupportedOperationsSuite.scala   | 9 +++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
index c2666b2ab9129..f4d016cb96711 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -87,7 +87,7 @@ object UnsupportedOperationChecker {
      * data.
      */
     def containsCompleteData(subplan: LogicalPlan): Boolean = {
-      val aggs = plan.collect { case a@Aggregate(_, _, _) if a.isStreaming => a }
+      val aggs = subplan.collect { case a@Aggregate(_, _, _) if a.isStreaming => a }
       // Either the subplan has no streaming source, or it has aggregation with Complete mode
       !subplan.isStreaming || (aggs.nonEmpty && outputMode == InternalOutputModes.Complete)
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
index 58e69f9ebea05..dcdb1ae089328 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
@@ -199,12 +199,17 @@ class UnsupportedOperationsSuite extends SparkFunSuite {
     _.intersect(_),
     streamStreamSupported = false)
 
-  // Sort: supported only on batch subplans and on aggregation + complete output mode
+  // Sort: supported only on batch subplans and after aggregation on streaming plan + complete mode
   testUnaryOperatorInStreamingPlan("sort", Sort(Nil, true, _))
   assertSupportedInStreamingPlan(
-    "sort - sort over aggregated data in Complete output mode",
+    "sort - sort after aggregation in Complete output mode",
     streamRelation.groupBy()(Count("*")).sortBy(),
     Complete)
+  assertNotSupportedInStreamingPlan(
+    "sort - sort before aggregation in Complete output mode",
+    streamRelation.sortBy().groupBy()(Count("*")),
+    Complete,
+    Seq("sort", "aggregat", "complete"))
   assertNotSupportedInStreamingPlan(
     "sort - sort over aggregated data in Update output mode",
     streamRelation.groupBy()(Count("*")).sortBy(),

From 4d286c903b1f88ef175209156b72ccbc3b9e8ae7 Mon Sep 17 00:00:00 2001
From: Davies Liu <davies@databricks.com>
Date: Fri, 20 Jan 2017 16:11:40 -0800
Subject: [PATCH 0464/1204] [SPARK-18589][SQL] Fix Python UDF accessing
 attributes from both side of join

PythonUDF is unevaluable, which can not be used inside a join condition, currently the optimizer will push a PythonUDF which accessing both side of join into the join condition, then the query will fail to plan.

This PR fix this issue by checking the expression is evaluable  or not before pushing it into Join.

Add a regression test.

Author: Davies Liu <davies@databricks.com>

Closes #16581 from davies/pyudf_join.
---
 python/pyspark/sql/tests.py                         |  9 +++++++++
 .../spark/sql/catalyst/expressions/predicates.scala | 13 ++++++++++++-
 .../spark/sql/catalyst/optimizer/Optimizer.scala    |  2 +-
 .../apache/spark/sql/catalyst/optimizer/joins.scala |  5 ++---
 4 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 20b9351ca8d68..877ab88d172fa 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -342,6 +342,15 @@ def test_udf_in_filter_on_top_of_outer_join(self):
         df = df.withColumn('b', udf(lambda x: 'x')(df.a))
         self.assertEqual(df.filter('b = "x"').collect(), [Row(a=1, b='x')])
 
+    def test_udf_in_filter_on_top_of_join(self):
+        # regression test for SPARK-18589
+        from pyspark.sql.functions import udf
+        left = self.spark.createDataFrame([Row(a=1)])
+        right = self.spark.createDataFrame([Row(b=1)])
+        f = udf(lambda a, b: a == b, BooleanType())
+        df = left.crossJoin(right).filter(f("a", "b"))
+        self.assertEqual(df.collect(), [Row(a=1, b=1)])
+
     def test_udf_without_arguments(self):
         self.spark.catalog.registerFunction("foo", lambda: "bar")
         [row] = self.spark.sql("SELECT foo()").collect()
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 3fcbb05372d87..ac56ff13fa5bf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -23,7 +23,6 @@ import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCo
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util.TypeUtils
 import org.apache.spark.sql.types._
-import org.apache.spark.util.Utils
 
 
 object InterpretedPredicate {
@@ -86,6 +85,18 @@ trait PredicateHelper {
    */
   protected def canEvaluate(expr: Expression, plan: LogicalPlan): Boolean =
     expr.references.subsetOf(plan.outputSet)
+
+  /**
+   * Returns true iff `expr` could be evaluated as a condition within join.
+   */
+  protected def canEvaluateWithinJoin(expr: Expression): Boolean = expr match {
+    case e: SubqueryExpression =>
+      // non-correlated subquery will be replaced as literal
+      e.children.isEmpty
+    case a: AttributeReference => true
+    case e: Unevaluable => false
+    case e => e.children.forall(canEvaluateWithinJoin)
+  }
 }
 
 @ExpressionDescription(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index dfd66aac2dd44..06fcbcb4ae2b2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -892,7 +892,7 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
           val newRight = rightFilterConditions.
             reduceLeftOption(And).map(Filter(_, right)).getOrElse(right)
           val (newJoinConditions, others) =
-            commonFilterCondition.partition(e => !SubqueryExpression.hasCorrelatedSubquery(e))
+            commonFilterCondition.partition(canEvaluateWithinJoin)
           val newJoinCond = (newJoinConditions ++ joinCondition).reduceLeftOption(And)
 
           val join = Join(newLeft, newRight, joinType, newJoinCond)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
index 180ad2e0ad1fa..bfe529e21e9ad 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
@@ -46,8 +46,7 @@ object ReorderJoin extends Rule[LogicalPlan] with PredicateHelper {
     : LogicalPlan = {
     assert(input.size >= 2)
     if (input.size == 2) {
-      val (joinConditions, others) = conditions.partition(
-        e => !SubqueryExpression.hasCorrelatedSubquery(e))
+      val (joinConditions, others) = conditions.partition(canEvaluateWithinJoin)
       val ((left, leftJoinType), (right, rightJoinType)) = (input(0), input(1))
       val innerJoinType = (leftJoinType, rightJoinType) match {
         case (Inner, Inner) => Inner
@@ -75,7 +74,7 @@ object ReorderJoin extends Rule[LogicalPlan] with PredicateHelper {
 
       val joinedRefs = left.outputSet ++ right.outputSet
       val (joinConditions, others) = conditions.partition(
-        e => e.references.subsetOf(joinedRefs) && !SubqueryExpression.hasCorrelatedSubquery(e))
+        e => e.references.subsetOf(joinedRefs) && canEvaluateWithinJoin(e))
       val joined = Join(left, right, innerJoinType, joinConditions.reduceLeftOption(And))
 
       // should not have reference to same logical plan

From 6f0ad575df219a58ba814fb402fbac653df46399 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Fri, 20 Jan 2017 17:49:26 -0800
Subject: [PATCH 0465/1204] [SPARK-19267][SS] Fix a race condition when
 stopping StateStore

## What changes were proposed in this pull request?

There is a race condition when stopping StateStore which makes `StateStoreSuite.maintenance` flaky. `StateStore.stop` doesn't wait for the running task to finish, and an out-of-date task may fail `doMaintenance` and cancel the new task. Here is a reproducer: https://github.com/zsxwing/spark/commit/dde1b5b106ba034861cf19e16883cfe181faa6f3

This PR adds MaintenanceTask to eliminate the race condition.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>
Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #16627 from zsxwing/SPARK-19267.

(cherry picked from commit ea31f92bb8554a901ff5b48986097a2642c64399)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../streaming/state/StateStore.scala          | 88 +++++++++++++------
 1 file changed, 61 insertions(+), 27 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
index d59746f947c1e..e61d95a1b1bb0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.execution.streaming.state
 
 import java.util.concurrent.{ScheduledFuture, TimeUnit}
+import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.mutable
 import scala.util.control.NonFatal
@@ -124,12 +125,46 @@ object StateStore extends Logging {
   val MAINTENANCE_INTERVAL_CONFIG = "spark.sql.streaming.stateStore.maintenanceInterval"
   val MAINTENANCE_INTERVAL_DEFAULT_SECS = 60
 
+  @GuardedBy("loadedProviders")
   private val loadedProviders = new mutable.HashMap[StateStoreId, StateStoreProvider]()
-  private val maintenanceTaskExecutor =
-    ThreadUtils.newDaemonSingleThreadScheduledExecutor("state-store-maintenance-task")
 
-  @volatile private var maintenanceTask: ScheduledFuture[_] = null
-  @volatile private var _coordRef: StateStoreCoordinatorRef = null
+  /**
+   * Runs the `task` periodically and automatically cancels it if there is an exception. `onError`
+   * will be called when an exception happens.
+   */
+  class MaintenanceTask(periodMs: Long, task: => Unit, onError: => Unit) {
+    private val executor =
+      ThreadUtils.newDaemonSingleThreadScheduledExecutor("state-store-maintenance-task")
+
+    private val runnable = new Runnable {
+      override def run(): Unit = {
+        try {
+          task
+        } catch {
+          case NonFatal(e) =>
+            logWarning("Error running maintenance thread", e)
+            onError
+            throw e
+        }
+      }
+    }
+
+    private val future: ScheduledFuture[_] = executor.scheduleAtFixedRate(
+      runnable, periodMs, periodMs, TimeUnit.MILLISECONDS)
+
+    def stop(): Unit = {
+      future.cancel(false)
+      executor.shutdown()
+    }
+
+    def isRunning: Boolean = !future.isDone
+  }
+
+  @GuardedBy("loadedProviders")
+  private var maintenanceTask: MaintenanceTask = null
+
+  @GuardedBy("loadedProviders")
+  private var _coordRef: StateStoreCoordinatorRef = null
 
   /** Get or create a store associated with the id. */
   def get(
@@ -162,7 +197,7 @@ object StateStore extends Logging {
   }
 
   def isMaintenanceRunning: Boolean = loadedProviders.synchronized {
-    maintenanceTask != null
+    maintenanceTask != null && maintenanceTask.isRunning
   }
 
   /** Unload and stop all state store providers */
@@ -170,7 +205,7 @@ object StateStore extends Logging {
     loadedProviders.clear()
     _coordRef = null
     if (maintenanceTask != null) {
-      maintenanceTask.cancel(false)
+      maintenanceTask.stop()
       maintenanceTask = null
     }
     logInfo("StateStore stopped")
@@ -179,14 +214,14 @@ object StateStore extends Logging {
   /** Start the periodic maintenance task if not already started and if Spark active */
   private def startMaintenanceIfNeeded(): Unit = loadedProviders.synchronized {
     val env = SparkEnv.get
-    if (maintenanceTask == null && env != null) {
+    if (env != null && !isMaintenanceRunning) {
       val periodMs = env.conf.getTimeAsMs(
         MAINTENANCE_INTERVAL_CONFIG, s"${MAINTENANCE_INTERVAL_DEFAULT_SECS}s")
-      val runnable = new Runnable {
-        override def run(): Unit = { doMaintenance() }
-      }
-      maintenanceTask = maintenanceTaskExecutor.scheduleAtFixedRate(
-        runnable, periodMs, periodMs, TimeUnit.MILLISECONDS)
+      maintenanceTask = new MaintenanceTask(
+        periodMs,
+        task = { doMaintenance() },
+        onError = { loadedProviders.synchronized { loadedProviders.clear() } }
+      )
       logInfo("State Store maintenance task started")
     }
   }
@@ -198,21 +233,20 @@ object StateStore extends Logging {
   private def doMaintenance(): Unit = {
     logDebug("Doing maintenance")
     if (SparkEnv.get == null) {
-      stop()
-    } else {
-      loadedProviders.synchronized { loadedProviders.toSeq }.foreach { case (id, provider) =>
-        try {
-          if (verifyIfStoreInstanceActive(id)) {
-            provider.doMaintenance()
-          } else {
-            unload(id)
-            logInfo(s"Unloaded $provider")
-          }
-        } catch {
-          case NonFatal(e) =>
-            logWarning(s"Error managing $provider, stopping management thread")
-            stop()
+      throw new IllegalStateException("SparkEnv not active, cannot do maintenance on StateStores")
+    }
+    loadedProviders.synchronized { loadedProviders.toSeq }.foreach { case (id, provider) =>
+      try {
+        if (verifyIfStoreInstanceActive(id)) {
+          provider.doMaintenance()
+        } else {
+          unload(id)
+          logInfo(s"Unloaded $provider")
         }
+      } catch {
+        case NonFatal(e) =>
+          logWarning(s"Error managing $provider, stopping management thread")
+          throw e
       }
     }
   }
@@ -238,7 +272,7 @@ object StateStore extends Logging {
     }
   }
 
-  private def coordinatorRef: Option[StateStoreCoordinatorRef] = synchronized {
+  private def coordinatorRef: Option[StateStoreCoordinatorRef] = loadedProviders.synchronized {
     val env = SparkEnv.get
     if (env != null) {
       if (_coordRef == null) {

From 8daf10e3f499a32493fb8a84369f7c4c74d65ff8 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Sat, 21 Jan 2017 21:15:57 -0800
Subject: [PATCH 0466/1204] [SPARK-19155][ML] MLlib GeneralizedLinearRegression
 family and link should case insensitive

## What changes were proposed in this pull request?
MLlib ```GeneralizedLinearRegression``` ```family``` and ```link``` should be case insensitive. This is consistent with some other MLlib params such as [```featureSubsetStrategy```](https://github.com/apache/spark/blob/master/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala#L415).

## How was this patch tested?
Update corresponding tests.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #16516 from yanboliang/spark-19133.

(cherry picked from commit 3dcad9fab17297f9966026f29fefb5c726965a13)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 .../spark/ml/regression/GeneralizedLinearRegression.scala | 8 ++++----
 .../ml/regression/GeneralizedLinearRegressionSuite.scala  | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index f137c8cb41894..1e7ba91e01989 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -57,7 +57,7 @@ private[regression] trait GeneralizedLinearRegressionBase extends PredictorParam
   final val family: Param[String] = new Param(this, "family",
     "The name of family which is a description of the error distribution to be used in the " +
       s"model. Supported options: ${supportedFamilyNames.mkString(", ")}.",
-    ParamValidators.inArray[String](supportedFamilyNames.toArray))
+    (value: String) => supportedFamilyNames.contains(value.toLowerCase))
 
   /** @group getParam */
   @Since("2.0.0")
@@ -74,7 +74,7 @@ private[regression] trait GeneralizedLinearRegressionBase extends PredictorParam
   final val link: Param[String] = new Param(this, "link", "The name of link function " +
     "which provides the relationship between the linear predictor and the mean of the " +
     s"distribution function. Supported options: ${supportedLinkNames.mkString(", ")}",
-    ParamValidators.inArray[String](supportedLinkNames.toArray))
+    (value: String) => supportedLinkNames.contains(value.toLowerCase))
 
   /** @group getParam */
   @Since("2.0.0")
@@ -405,7 +405,7 @@ object GeneralizedLinearRegression extends DefaultParamsReadable[GeneralizedLine
      * @param name family name: "gaussian", "binomial", "poisson" or "gamma".
      */
     def fromName(name: String): Family = {
-      name match {
+      name.toLowerCase match {
         case Gaussian.name => Gaussian
         case Binomial.name => Binomial
         case Poisson.name => Poisson
@@ -609,7 +609,7 @@ object GeneralizedLinearRegression extends DefaultParamsReadable[GeneralizedLine
      *             "inverse", "probit", "cloglog" or "sqrt".
      */
     def fromName(name: String): Link = {
-      name match {
+      name.toLowerCase match {
         case Identity.name => Identity
         case Logit.name => Logit
         case Log.name => Log
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
index 3e9e1fced8ec4..415d426af3c12 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
@@ -553,7 +553,7 @@ class GeneralizedLinearRegressionSuite
     for ((link, dataset) <- Seq(("inverse", datasetGammaInverse),
       ("identity", datasetGammaIdentity), ("log", datasetGammaLog))) {
       for (fitIntercept <- Seq(false, true)) {
-        val trainer = new GeneralizedLinearRegression().setFamily("gamma").setLink(link)
+        val trainer = new GeneralizedLinearRegression().setFamily("Gamma").setLink(link)
           .setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
         val model = trainer.fit(dataset)
         val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
@@ -989,7 +989,7 @@ class GeneralizedLinearRegressionSuite
        -0.6344390  0.3172195  0.2114797 -0.1586097
      */
     val trainer = new GeneralizedLinearRegression()
-      .setFamily("gamma")
+      .setFamily("Gamma")
       .setWeightCol("weight")
 
     val model = trainer.fit(datasetWithWeight)

From 1e07a71924ef1420c96a3a0a8cb5be2f3a830037 Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Mon, 23 Jan 2017 00:53:44 -0800
Subject: [PATCH 0467/1204] [SPARK-19155][ML] Make family case insensitive in
 GLM

## What changes were proposed in this pull request?
This is a supplement to PR #16516 which did not make the value from `getFamily` case insensitive. Current tests of poisson/binomial glm with weight fail when specifying 'Poisson' or 'Binomial', because the calculation of `dispersion` and `pValue` checks the value of family retrieved from `getFamily`
```
model.getFamily == Binomial.name || model.getFamily == Poisson.name
```

## How was this patch tested?
Update existing tests for 'Poisson' and 'Binomial'.

yanboliang felixcheung imatiach-msft

Author: actuaryzhang <actuaryzhang10@gmail.com>

Closes #16675 from actuaryzhang/family.

(cherry picked from commit f067acefabebf04939d03a639a2aaa654e1bc8f9)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 .../spark/ml/regression/GeneralizedLinearRegression.scala   | 6 ++++--
 .../ml/regression/GeneralizedLinearRegressionSuite.scala    | 4 ++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index 1e7ba91e01989..676be617953af 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -1027,7 +1027,8 @@ class GeneralizedLinearRegressionSummary private[regression] (
    */
   @Since("2.0.0")
   lazy val dispersion: Double = if (
-    model.getFamily == Binomial.name || model.getFamily == Poisson.name) {
+    model.getFamily.toLowerCase == Binomial.name ||
+      model.getFamily.toLowerCase == Poisson.name) {
     1.0
   } else {
     val rss = pearsonResiduals.agg(sum(pow(col("pearsonResiduals"), 2.0))).first().getDouble(0)
@@ -1130,7 +1131,8 @@ class GeneralizedLinearRegressionTrainingSummary private[regression] (
   @Since("2.0.0")
   lazy val pValues: Array[Double] = {
     if (isNormalSolver) {
-      if (model.getFamily == Binomial.name || model.getFamily == Poisson.name) {
+      if (model.getFamily.toLowerCase == Binomial.name ||
+        model.getFamily.toLowerCase == Poisson.name) {
         tValues.map { x => 2.0 * (1.0 - dist.Gaussian(0.0, 1.0).cdf(math.abs(x))) }
       } else {
         tValues.map { x =>
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
index 415d426af3c12..95b443dd255a9 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
@@ -757,7 +757,7 @@ class GeneralizedLinearRegressionSuite
        0.5554219 -0.4034267  0.6567520 -0.2611382
      */
     val trainer = new GeneralizedLinearRegression()
-      .setFamily("binomial")
+      .setFamily("Binomial")
       .setWeightCol("weight")
       .setFitIntercept(false)
 
@@ -874,7 +874,7 @@ class GeneralizedLinearRegressionSuite
        -0.4378554  0.2189277  0.1459518 -0.1094638
      */
     val trainer = new GeneralizedLinearRegression()
-      .setFamily("poisson")
+      .setFamily("Poisson")
       .setWeightCol("weight")
       .setFitIntercept(true)
 

From ed5d1e7251142e9e3f4e5e2783118bde38ac192c Mon Sep 17 00:00:00 2001
From: jerryshao <sshao@hortonworks.com>
Date: Mon, 23 Jan 2017 13:36:41 -0800
Subject: [PATCH 0468/1204] [SPARK-19306][CORE] Fix inconsistent state in
 DiskBlockObject when expection occurred

## What changes were proposed in this pull request?

In `DiskBlockObjectWriter`, when some errors happened during writing, it will call `revertPartialWritesAndClose`, if this method again failed due to some issues like out of disk, it will throw exception without resetting the state of this writer, also skipping the revert. So here propose to fix this issue to offer user a chance to recover from such issue.

## How was this patch tested?

Existing test.

Author: jerryshao <sshao@hortonworks.com>

Closes #16657 from jerryshao/SPARK-19306.

(cherry picked from commit e4974721f33e64604501f673f74052e11920d438)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/storage/DiskBlockObjectWriter.scala | 44 +++++++++++--------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
index 3cb12fca7dccb..eb3ff926372a2 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
@@ -128,16 +128,19 @@ private[spark] class DiskBlockObjectWriter(
    */
   private def closeResources(): Unit = {
     if (initialized) {
-      mcs.manualClose()
-      channel = null
-      mcs = null
-      bs = null
-      fos = null
-      ts = null
-      objOut = null
-      initialized = false
-      streamOpen = false
-      hasBeenClosed = true
+      Utils.tryWithSafeFinally {
+        mcs.manualClose()
+      } {
+        channel = null
+        mcs = null
+        bs = null
+        fos = null
+        ts = null
+        objOut = null
+        initialized = false
+        streamOpen = false
+        hasBeenClosed = true
+      }
     }
   }
 
@@ -199,26 +202,29 @@ private[spark] class DiskBlockObjectWriter(
   def revertPartialWritesAndClose(): File = {
     // Discard current writes. We do this by flushing the outstanding writes and then
     // truncating the file to its initial position.
-    try {
+    Utils.tryWithSafeFinally {
       if (initialized) {
         writeMetrics.decBytesWritten(reportedPosition - committedPosition)
         writeMetrics.decRecordsWritten(numRecordsWritten)
         streamOpen = false
         closeResources()
       }
-
-      val truncateStream = new FileOutputStream(file, true)
+    } {
+      var truncateStream: FileOutputStream = null
       try {
+        truncateStream = new FileOutputStream(file, true)
         truncateStream.getChannel.truncate(committedPosition)
-        file
+      } catch {
+        case e: Exception =>
+          logError("Uncaught exception while reverting partial writes to file " + file, e)
       } finally {
-        truncateStream.close()
+        if (truncateStream != null) {
+          truncateStream.close()
+          truncateStream = null
+        }
       }
-    } catch {
-      case e: Exception =>
-        logError("Uncaught exception while reverting partial writes to file " + file, e)
-        file
     }
+    file
   }
 
   /**

From 4a2be09023fb80db44775d05822e202104f29e75 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Mon, 23 Jan 2017 22:20:42 -0800
Subject: [PATCH 0469/1204] [SPARK-9435][SQL] Reuse function in Java UDF to
 correctly support expressions that require equality comparison between
 ScalaUDF

## What changes were proposed in this pull request?

Currently, running the codes in Java

```java
spark.udf().register("inc", new UDF1<Long, Long>() {
  Override
  public Long call(Long i) {
    return i + 1;
  }
}, DataTypes.LongType);

spark.range(10).toDF("x").createOrReplaceTempView("tmp");
Row result = spark.sql("SELECT inc(x) FROM tmp GROUP BY inc(x)").head();
Assert.assertEquals(7, result.getLong(0));
```

fails as below:

```
org.apache.spark.sql.AnalysisException: expression 'tmp.`x`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;;
Aggregate [UDF(x#19L)], [UDF(x#19L) AS UDF(x)#23L]
+- SubqueryAlias tmp, `tmp`
   +- Project [id#16L AS x#19L]
      +- Range (0, 10, step=1, splits=Some(8))

	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:40)
	at org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:57)
```

The root cause is because we were creating the function every time when it needs to build as below:

```scala
scala> def inc(i: Int) = i + 1
inc: (i: Int)Int

scala> (inc(_: Int)).hashCode
res15: Int = 1231799381

scala> (inc(_: Int)).hashCode
res16: Int = 2109839984

scala> (inc(_: Int)) == (inc(_: Int))
res17: Boolean = false
```

This seems leading to the comparison failure between `ScalaUDF`s created from Java UDF API, for example, in `Expression.semanticEquals`.

In case of Scala one, it seems already fine.

Both can be tested easily as below if any reviewer is more comfortable with Scala:

```scala
val df = Seq((1, 10), (2, 11), (3, 12)).toDF("x", "y")
val javaUDF = new UDF1[Int, Int]  {
  override def call(i: Int): Int = i + 1
}
// spark.udf.register("inc", javaUDF, IntegerType) // Uncomment this for Java API
// spark.udf.register("inc", (i: Int) => i + 1)    // Uncomment this for Scala API
df.createOrReplaceTempView("tmp")
spark.sql("SELECT inc(y) FROM tmp GROUP BY inc(y)").show()
```

## How was this patch tested?

Unit test in `JavaUDFSuite.java` and `./dev/lint-java`.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #16553 from HyukjinKwon/SPARK-9435.

(cherry picked from commit e576c1ed793fe8ac6e65381dc0635413cc18470f)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../apache/spark/sql/UDFRegistration.scala    | 69 ++++++++++++-------
 .../org/apache/spark/sql/JavaUDFSuite.java    | 22 ++++++
 2 files changed, 68 insertions(+), 23 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
index d94185b390448..14b1e874966f7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -109,9 +109,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
          | * @since 1.3.0
          | */
          |def register(name: String, f: UDF$i[$extTypeArgs, _], returnType: DataType): Unit = {
+         |  val func = f$anyCast.call($anyParams)
          |  functionRegistry.registerFunction(
          |    name,
-         |    (e: Seq[Expression]) => ScalaUDF(f$anyCast.call($anyParams), returnType, e))
+         |    (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
          |}""".stripMargin)
     }
     */
@@ -488,9 +489,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF1[_, _], returnType: DataType): Unit = {
+    val func = f.asInstanceOf[UDF1[Any, Any]].call(_: Any)
     functionRegistry.registerFunction(
       name,
-      (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF1[Any, Any]].call(_: Any), returnType, e))
+      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
   }
 
   /**
@@ -498,9 +500,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF2[_, _, _], returnType: DataType): Unit = {
+    val func = f.asInstanceOf[UDF2[Any, Any, Any]].call(_: Any, _: Any)
     functionRegistry.registerFunction(
       name,
-      (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF2[Any, Any, Any]].call(_: Any, _: Any), returnType, e))
+      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
   }
 
   /**
@@ -508,9 +511,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF3[_, _, _, _], returnType: DataType): Unit = {
+    val func = f.asInstanceOf[UDF3[Any, Any, Any, Any]].call(_: Any, _: Any, _: Any)
     functionRegistry.registerFunction(
       name,
-      (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF3[Any, Any, Any, Any]].call(_: Any, _: Any, _: Any), returnType, e))
+      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
   }
 
   /**
@@ -518,9 +522,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF4[_, _, _, _, _], returnType: DataType): Unit = {
+    val func = f.asInstanceOf[UDF4[Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any)
     functionRegistry.registerFunction(
       name,
-      (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF4[Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any), returnType, e))
+      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
   }
 
   /**
@@ -528,9 +533,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF5[_, _, _, _, _, _], returnType: DataType): Unit = {
+    val func = f.asInstanceOf[UDF5[Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any)
     functionRegistry.registerFunction(
       name,
-      (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF5[Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
   }
 
   /**
@@ -538,9 +544,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF6[_, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val func = f.asInstanceOf[UDF6[Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     functionRegistry.registerFunction(
       name,
-      (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF6[Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
   }
 
   /**
@@ -548,9 +555,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF7[_, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val func = f.asInstanceOf[UDF7[Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     functionRegistry.registerFunction(
       name,
-      (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF7[Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
   }
 
   /**
@@ -558,9 +566,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF8[_, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val func = f.asInstanceOf[UDF8[Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     functionRegistry.registerFunction(
       name,
-      (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF8[Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
   }
 
   /**
@@ -568,9 +577,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF9[_, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val func = f.asInstanceOf[UDF9[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     functionRegistry.registerFunction(
       name,
-      (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF9[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
   }
 
   /**
@@ -578,9 +588,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF10[_, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val func = f.asInstanceOf[UDF10[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     functionRegistry.registerFunction(
       name,
-      (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF10[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
   }
 
   /**
@@ -588,9 +599,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF11[_, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val func = f.asInstanceOf[UDF11[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     functionRegistry.registerFunction(
       name,
-      (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF11[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
   }
 
   /**
@@ -598,9 +610,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF12[_, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val func = f.asInstanceOf[UDF12[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     functionRegistry.registerFunction(
       name,
-      (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF12[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
   }
 
   /**
@@ -608,9 +621,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF13[_, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val func = f.asInstanceOf[UDF13[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     functionRegistry.registerFunction(
       name,
-      (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF13[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
   }
 
   /**
@@ -618,9 +632,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF14[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val func = f.asInstanceOf[UDF14[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     functionRegistry.registerFunction(
       name,
-      (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF14[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
   }
 
   /**
@@ -628,9 +643,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF15[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val func = f.asInstanceOf[UDF15[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     functionRegistry.registerFunction(
       name,
-      (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF15[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
   }
 
   /**
@@ -638,9 +654,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF16[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val func = f.asInstanceOf[UDF16[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     functionRegistry.registerFunction(
       name,
-      (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF16[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
   }
 
   /**
@@ -648,9 +665,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF17[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val func = f.asInstanceOf[UDF17[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     functionRegistry.registerFunction(
       name,
-      (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF17[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
   }
 
   /**
@@ -658,9 +676,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF18[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val func = f.asInstanceOf[UDF18[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     functionRegistry.registerFunction(
       name,
-      (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF18[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
   }
 
   /**
@@ -668,9 +687,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF19[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val func = f.asInstanceOf[UDF19[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     functionRegistry.registerFunction(
       name,
-      (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF19[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
   }
 
   /**
@@ -678,9 +698,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF20[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val func = f.asInstanceOf[UDF20[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     functionRegistry.registerFunction(
       name,
-      (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF20[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
   }
 
   /**
@@ -688,9 +709,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF21[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val func = f.asInstanceOf[UDF21[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     functionRegistry.registerFunction(
       name,
-      (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF21[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
   }
 
   /**
@@ -698,9 +720,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF22[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val func = f.asInstanceOf[UDF22[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     functionRegistry.registerFunction(
       name,
-      (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF22[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e))
+      (e: Seq[Expression]) => ScalaUDF(func, returnType, e))
   }
 
   // scalastyle:on line.size.limit
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDFSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDFSuite.java
index 8bf3278c43880..bbaac5a33975b 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDFSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDFSuite.java
@@ -18,6 +18,7 @@
 package test.org.apache.spark.sql;
 
 import java.io.Serializable;
+import java.util.List;
 
 import org.junit.After;
 import org.junit.Assert;
@@ -108,4 +109,25 @@ public void udf3Test() {
     result = spark.sql("SELECT stringLengthTest('test', 'test2')").head();
     Assert.assertEquals(9, result.getInt(0));
   }
+
+  @SuppressWarnings("unchecked")
+  @Test
+  public void udf4Test() {
+    spark.udf().register("inc", new UDF1<Long, Long>() {
+      @Override
+      public Long call(Long i) {
+        return i + 1;
+      }
+    }, DataTypes.LongType);
+
+    spark.range(10).toDF("x").createOrReplaceTempView("tmp");
+    // This tests when Java UDFs are required to be the semantically same (See SPARK-9435).
+    List<Row> results = spark.sql("SELECT inc(x) FROM tmp GROUP BY inc(x)").collectAsList();
+    Assert.assertEquals(10, results.size());
+    long sum = 0;
+    for (Row result : results) {
+      sum += result.getLong(0);
+    }
+    Assert.assertEquals(55, sum);
+  }
 }

From 570e5e11dfd5d9fa3ee6caae3bba85d53ceac4e8 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Mon, 23 Jan 2017 22:30:51 -0800
Subject: [PATCH 0470/1204] [SPARK-19268][SS] Disallow adaptive query execution
 for streaming queries

## What changes were proposed in this pull request?

As adaptive query execution may change the number of partitions in different batches, it may break streaming queries. Hence, we should disallow this feature in Structured Streaming.

## How was this patch tested?

`test("SPARK-19268: Adaptive query execution should be disallowed")`.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16683 from zsxwing/SPARK-19268.

(cherry picked from commit 60bd91a34078a9239fbf5e8f49ce8b680c11635d)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../spark/sql/streaming/StreamingQueryManager.scala  |  6 ++++++
 .../sql/streaming/StreamingQueryManagerSuite.scala   | 12 +++++++++++-
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index 7b9770dadd0f6..0b9406b027f53 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -230,6 +230,12 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) {
       UnsupportedOperationChecker.checkForStreaming(analyzedPlan, outputMode)
     }
 
+    if (sparkSession.sessionState.conf.adaptiveExecutionEnabled) {
+      throw new AnalysisException(
+        s"${SQLConf.ADAPTIVE_EXECUTION_ENABLED.key} " +
+          "is not supported in streaming DataFrames/Datasets")
+    }
+
     new StreamingQueryWrapper(new StreamExecution(
       sparkSession,
       userSpecifiedName.orNull,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
index 8e16fd418a37c..f05e9d1fda73f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
@@ -30,8 +30,9 @@ import org.scalatest.time.Span
 import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.SparkException
-import org.apache.spark.sql.Dataset
+import org.apache.spark.sql.{AnalysisException, Dataset}
 import org.apache.spark.sql.execution.streaming._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.util.BlockingSource
 import org.apache.spark.util.Utils
 
@@ -238,6 +239,15 @@ class StreamingQueryManagerSuite extends StreamTest with BeforeAndAfter {
     }
   }
 
+  test("SPARK-19268: Adaptive query execution should be disallowed") {
+    withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
+      val e = intercept[AnalysisException] {
+        MemoryStream[Int].toDS.writeStream.queryName("test-query").format("memory").start()
+      }
+      assert(e.getMessage.contains(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key) &&
+        e.getMessage.contains("not supported"))
+    }
+  }
 
   /** Run a body of code by defining a query on each dataset */
   private def withQueriesOn(datasets: Dataset[_]*)(body: Seq[StreamingQuery] => Unit): Unit = {

From 9c04e427d0a4b99bfdb6af1ea1bc8c4bdaee724e Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Tue, 24 Jan 2017 00:23:23 -0800
Subject: [PATCH 0471/1204] [SPARK-18823][SPARKR] add support for assigning to
 column

## What changes were proposed in this pull request?

Support for
```
df[[myname]] <- 1
df[[2]] <- df$eruptions
```

## How was this patch tested?

manual tests, unit tests

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #16663 from felixcheung/rcolset.

(cherry picked from commit f27e024768e328b96704a9ef35b77381da480328)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/R/DataFrame.R                       | 48 +++++++++++++++++------
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 20 ++++++++++
 2 files changed, 55 insertions(+), 13 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index c79b1d3d52a10..48ac307714392 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1711,6 +1711,23 @@ getColumn <- function(x, c) {
   column(callJMethod(x@sdf, "col", c))
 }
 
+setColumn <- function(x, c, value) {
+  if (class(value) != "Column" && !is.null(value)) {
+    if (isAtomicLengthOne(value)) {
+      value <- lit(value)
+    } else {
+      stop("value must be a Column, literal value as atomic in length of 1, or NULL")
+    }
+  }
+
+  if (is.null(value)) {
+    nx <- drop(x, c)
+  } else {
+    nx <- withColumn(x, c, value)
+  }
+  nx
+}
+
 #' @param name name of a Column (without being wrapped by \code{""}).
 #' @rdname select
 #' @name $
@@ -1729,19 +1746,7 @@ setMethod("$", signature(x = "SparkDataFrame"),
 #' @note $<- since 1.4.0
 setMethod("$<-", signature(x = "SparkDataFrame"),
           function(x, name, value) {
-            if (class(value) != "Column" && !is.null(value)) {
-              if (isAtomicLengthOne(value)) {
-                value <- lit(value)
-              } else {
-                stop("value must be a Column, literal value as atomic in length of 1, or NULL")
-              }
-            }
-
-            if (is.null(value)) {
-              nx <- drop(x, name)
-            } else {
-              nx <- withColumn(x, name, value)
-            }
+            nx <- setColumn(x, name, value)
             x@sdf <- nx@sdf
             x
           })
@@ -1761,6 +1766,21 @@ setMethod("[[", signature(x = "SparkDataFrame", i = "numericOrcharacter"),
             getColumn(x, i)
           })
 
+#' @rdname subset
+#' @name [[<-
+#' @aliases [[<-,SparkDataFrame,numericOrcharacter-method
+#' @note [[<- since 2.1.1
+setMethod("[[<-", signature(x = "SparkDataFrame", i = "numericOrcharacter"),
+          function(x, i, value) {
+            if (is.numeric(i)) {
+              cols <- columns(x)
+              i <- cols[[i]]
+            }
+            nx <- setColumn(x, i, value)
+            x@sdf <- nx@sdf
+            x
+          })
+
 #' @rdname subset
 #' @name [
 #' @aliases [,SparkDataFrame-method
@@ -1808,6 +1828,8 @@ setMethod("[", signature(x = "SparkDataFrame"),
 #' @param j,select expression for the single Column or a list of columns to select from the SparkDataFrame.
 #' @param drop if TRUE, a Column will be returned if the resulting dataset has only one column.
 #'             Otherwise, a SparkDataFrame will always be returned.
+#' @param value a Column or an atomic vector in the length of 1 as literal value, or \code{NULL}.
+#'              If \code{NULL}, the specified Column is dropped.
 #' @param ... currently not used.
 #' @return A new SparkDataFrame containing only the rows that meet the condition with selected columns.
 #' @export
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 7f27ba63a8d19..1f9daf5735374 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1021,6 +1021,9 @@ test_that("select operators", {
   df$age2 <- df$age * 2
   expect_equal(columns(df), c("name", "age", "age2"))
   expect_equal(count(where(df, df$age2 == df$age * 2)), 2)
+  df$age2 <- df[["age"]] * 3
+  expect_equal(columns(df), c("name", "age", "age2"))
+  expect_equal(count(where(df, df$age2 == df$age * 3)), 2)
 
   df$age2 <- 21
   expect_equal(columns(df), c("name", "age", "age2"))
@@ -1033,6 +1036,23 @@ test_that("select operators", {
   expect_error(df$age3 <- c(22, NA),
               "value must be a Column, literal value as atomic in length of 1, or NULL")
 
+  df[["age2"]] <- 23
+  expect_equal(columns(df), c("name", "age", "age2"))
+  expect_equal(count(where(df, df$age2 == 23)), 3)
+
+  df[[3]] <- 24
+  expect_equal(columns(df), c("name", "age", "age2"))
+  expect_equal(count(where(df, df$age2 == 24)), 3)
+
+  df[[3]] <- df$age
+  expect_equal(count(where(df, df$age2 == df$age)), 2)
+
+  df[["age2"]] <- df[["name"]]
+  expect_equal(count(where(df, df$age2 == df$name)), 3)
+
+  expect_error(df[["age3"]] <- c(22, 23),
+              "value must be a Column, literal value as atomic in length of 1, or NULL")
+
   # Test parameter drop
   expect_equal(class(df[, 1]) == "SparkDataFrame", T)
   expect_equal(class(df[, 1, drop = T]) == "Column", T)

From d128b6a39ebafd56041e1fb44d71c61033ae6f8e Mon Sep 17 00:00:00 2001
From: Ilya Matiach <ilmat@microsoft.com>
Date: Mon, 23 Jan 2017 13:34:27 -0800
Subject: [PATCH 0472/1204] [SPARK-16473][MLLIB] Fix BisectingKMeans Algorithm
 failing in edge case

[SPARK-16473][MLLIB] Fix BisectingKMeans Algorithm failing in edge case where no children exist in updateAssignments

## What changes were proposed in this pull request?

Fix a bug in which BisectingKMeans fails with error:
java.util.NoSuchElementException: key not found: 166
        at scala.collection.MapLike$class.default(MapLike.scala:228)
        at scala.collection.AbstractMap.default(Map.scala:58)
        at scala.collection.MapLike$class.apply(MapLike.scala:141)
        at scala.collection.AbstractMap.apply(Map.scala:58)
        at org.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$org$apache$spark$mllib$clustering$BisectingKMeans$$updateAssignments$1$$anonfun$2.apply$mcDJ$sp(BisectingKMeans.scala:338)
        at org.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$org$apache$spark$mllib$clustering$BisectingKMeans$$updateAssignments$1$$anonfun$2.apply(BisectingKMeans.scala:337)
        at org.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$org$apache$spark$mllib$clustering$BisectingKMeans$$updateAssignments$1$$anonfun$2.apply(BisectingKMeans.scala:337)
        at scala.collection.TraversableOnce$$anonfun$minBy$1.apply(TraversableOnce.scala:231)
        at scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111)
        at scala.collection.immutable.List.foldLeft(List.scala:84)
        at scala.collection.LinearSeqOptimized$class.reduceLeft(LinearSeqOptimized.scala:125)
        at scala.collection.immutable.List.reduceLeft(List.scala:84)
        at scala.collection.TraversableOnce$class.minBy(TraversableOnce.scala:231)
        at scala.collection.AbstractTraversable.minBy(Traversable.scala:105)
        at org.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$org$apache$spark$mllib$clustering$BisectingKMeans$$updateAssignments$1.apply(BisectingKMeans.scala:337)
        at org.apache.spark.mllib.clustering.BisectingKMeans$$anonfun$org$apache$spark$mllib$clustering$BisectingKMeans$$updateAssignments$1.apply(BisectingKMeans.scala:334)
        at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
        at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:389)

## How was this patch tested?

The dataset was run against the code change to verify that the code works.  I will try to add unit tests to the code.

(Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests)
(If this patch involves UI changes, please attach a screenshot; otherwise, remove this)

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Ilya Matiach <ilmat@microsoft.com>

Closes #16355 from imatiach-msft/ilmat/fix-kmeans.
---
 .../mllib/clustering/BisectingKMeans.scala    | 19 ++++++++++++-------
 .../ml/clustering/BisectingKMeansSuite.scala  | 19 +++++++++++++++++++
 .../spark/ml/clustering/KMeansSuite.scala     | 13 +++++++++++++
 3 files changed, 44 insertions(+), 7 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
index 336f2fc114309..ae98e24a75681 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
@@ -339,10 +339,15 @@ private object BisectingKMeans extends Serializable {
     assignments.map { case (index, v) =>
       if (divisibleIndices.contains(index)) {
         val children = Seq(leftChildIndex(index), rightChildIndex(index))
-        val selected = children.minBy { child =>
-          KMeans.fastSquaredDistance(newClusterCenters(child), v)
+        val newClusterChildren = children.filter(newClusterCenters.contains(_))
+        if (newClusterChildren.nonEmpty) {
+          val selected = newClusterChildren.minBy { child =>
+            KMeans.fastSquaredDistance(newClusterCenters(child), v)
+          }
+          (selected, v)
+        } else {
+          (index, v)
         }
-        (selected, v)
       } else {
         (index, v)
       }
@@ -372,12 +377,12 @@ private object BisectingKMeans extends Serializable {
         internalIndex -= 1
         val leftIndex = leftChildIndex(rawIndex)
         val rightIndex = rightChildIndex(rawIndex)
-        val height = math.sqrt(Seq(leftIndex, rightIndex).map { childIndex =>
+        val indexes = Seq(leftIndex, rightIndex).filter(clusters.contains(_))
+        val height = math.sqrt(indexes.map { childIndex =>
           KMeans.fastSquaredDistance(center, clusters(childIndex).center)
         }.max)
-        val left = buildSubTree(leftIndex)
-        val right = buildSubTree(rightIndex)
-        new ClusteringTreeNode(index, size, center, cost, height, Array(left, right))
+        val children = indexes.map(buildSubTree(_)).toArray
+        new ClusteringTreeNode(index, size, center, cost, height, children)
       } else {
         val index = leafIndex
         leafIndex += 1
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
index fc491cd6161fd..30513c1e276ae 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
@@ -29,9 +29,12 @@ class BisectingKMeansSuite
   final val k = 5
   @transient var dataset: Dataset[_] = _
 
+  @transient var sparseDataset: Dataset[_] = _
+
   override def beforeAll(): Unit = {
     super.beforeAll()
     dataset = KMeansSuite.generateKMeansData(spark, 50, 3, k)
+    sparseDataset = KMeansSuite.generateSparseData(spark, 10, 1000, 42)
   }
 
   test("default parameters") {
@@ -51,6 +54,22 @@ class BisectingKMeansSuite
     assert(copiedModel.hasSummary)
   }
 
+  test("SPARK-16473: Verify Bisecting K-Means does not fail in edge case where" +
+    "one cluster is empty after split") {
+    val bkm = new BisectingKMeans()
+      .setK(k)
+      .setMinDivisibleClusterSize(4)
+      .setMaxIter(4)
+      .setSeed(123)
+
+    // Verify fit does not fail on very sparse data
+    val model = bkm.fit(sparseDataset)
+    val result = model.transform(sparseDataset)
+    val numClusters = result.select("prediction").distinct().collect().length
+    // Verify we hit the edge case
+    assert(numClusters < k && numClusters > 1)
+  }
+
   test("setter/getter") {
     val bkm = new BisectingKMeans()
       .setK(9)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
index c1b7242e11a8f..e10127f7d108f 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.ml.clustering
 
+import scala.util.Random
+
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.linalg.{Vector, Vectors}
 import org.apache.spark.ml.param.ParamMap
@@ -160,6 +162,17 @@ object KMeansSuite {
     spark.createDataFrame(rdd)
   }
 
+  def generateSparseData(spark: SparkSession, rows: Int, dim: Int, seed: Int): DataFrame = {
+    val sc = spark.sparkContext
+    val random = new Random(seed)
+    val nnz = random.nextInt(dim)
+    val rdd = sc.parallelize(1 to rows)
+      .map(i => Vectors.sparse(dim, random.shuffle(0 to dim - 1).slice(0, nnz).sorted.toArray,
+        Array.fill(nnz)(random.nextDouble())))
+      .map(v => new TestRow(v))
+    spark.createDataFrame(rdd)
+  }
+
   /**
    * Mapping from all Params to valid settings which differ from the defaults.
    * This is useful for tests which need to exercise all Params, such as save/load.

From b94fb284b93c763cf6e604705509a4e970d6ce6e Mon Sep 17 00:00:00 2001
From: Nattavut Sutyanyong <nsy.can@gmail.com>
Date: Tue, 24 Jan 2017 23:31:06 +0100
Subject: [PATCH 0473/1204] [SPARK-19017][SQL] NOT IN subquery with more than
 one column may return incorrect results

## What changes were proposed in this pull request?

This PR fixes the code in Optimizer phase where the NULL-aware expression of a NOT IN query is expanded in Rule `RewritePredicateSubquery`.

Example:
The query

 select a1,b1
 from   t1
 where  (a1,b1) not in (select a2,b2
                        from   t2);

has the (a1, b1) = (a2, b2) rewritten from (before this fix):

Join LeftAnti, ((isnull((_1#2 = a2#16)) || isnull((_2#3 = b2#17))) || ((_1#2 = a2#16) && (_2#3 = b2#17)))

to (after this fix):

Join LeftAnti, (((_1#2 = a2#16) || isnull((_1#2 = a2#16))) && ((_2#3 = b2#17) || isnull((_2#3 = b2#17))))

## How was this patch tested?

sql/test, catalyst/test and new test cases in SQLQueryTestSuite.

Author: Nattavut Sutyanyong <nsy.can@gmail.com>

Closes #16467 from nsyca/19017.

(cherry picked from commit cdb691eb4da5dbf52dccf1da0ae57a9b1874f010)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../sql/catalyst/optimizer/subquery.scala     | 10 +++-
 .../in-subquery/not-in-multiple-columns.sql   | 55 +++++++++++++++++
 .../not-in-multiple-columns.sql.out           | 59 +++++++++++++++++++
 .../apache/spark/sql/SQLQueryTestSuite.scala  |  7 ++-
 .../org/apache/spark/sql/SubquerySuite.scala  |  6 +-
 5 files changed, 131 insertions(+), 6 deletions(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-multiple-columns.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-multiple-columns.sql.out

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
index f14aaab72a98f..4d62cce9da0ac 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
@@ -68,8 +68,14 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper {
           // Note that will almost certainly be planned as a Broadcast Nested Loop join.
           // Use EXISTS if performance matters to you.
           val (joinCond, outerPlan) = rewriteExistentialExpr(conditions, p)
-          val anyNull = splitConjunctivePredicates(joinCond.get).map(IsNull).reduceLeft(Or)
-          Join(outerPlan, sub, LeftAnti, Option(Or(anyNull, joinCond.get)))
+          // Expand the NOT IN expression with the NULL-aware semantic
+          // to its full form. That is from:
+          //   (a1,b1,...) = (a2,b2,...)
+          // to
+          //   (a1=a2 OR isnull(a1=a2)) AND (b1=b2 OR isnull(b1=b2)) AND ...
+          val joinConds = splitConjunctivePredicates(joinCond.get)
+          val pairs = joinConds.map(c => Or(c, IsNull(c))).reduceLeft(And)
+          Join(outerPlan, sub, LeftAnti, Option(pairs))
         case (p, predicate) =>
           val (newCond, inputPlan) = rewriteExistentialExpr(Seq(predicate), p)
           Project(p.output, Filter(newCond.get, inputPlan))
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-multiple-columns.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-multiple-columns.sql
new file mode 100644
index 0000000000000..db668505adf24
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-multiple-columns.sql
@@ -0,0 +1,55 @@
+-- This file contains test cases for NOT IN subquery with multiple columns.
+
+-- The data sets are populated as follows:
+-- 1) When T1.A1 = T2.A2
+--    1.1) T1.B1 = T2.B2
+--    1.2) T1.B1 = T2.B2 returns false
+--    1.3) T1.B1 is null
+--    1.4) T2.B2 is null
+-- 2) When T1.A1 = T2.A2 returns false
+-- 3) When T1.A1 is null
+-- 4) When T1.A2 is null
+
+-- T1.A1  T1.B1     T2.A2  T2.B2
+-- -----  -----     -----  -----
+--     1      1         1      1    (1.1)
+--     1      3                     (1.2)
+--     1   null         1   null    (1.3 & 1.4)
+--
+--     2      1         1      1    (2)
+--  null      1                     (3)
+--                   null      3    (4)
+
+create temporary view t1 as select * from values
+  (1, 1), (2, 1), (null, 1),
+  (1, 3), (null, 3),
+  (1, null), (null, 2)
+as t1(a1, b1);
+
+create temporary view t2 as select * from values
+  (1, 1),
+  (null, 3),
+  (1, null)
+as t2(a2, b2);
+
+-- multiple columns in NOT IN
+-- TC 01.01
+select a1,b1
+from   t1
+where  (a1,b1) not in (select a2,b2
+                       from   t2);
+
+-- multiple columns with expressions in NOT IN
+-- TC 01.02
+select a1,b1
+from   t1
+where  (a1-1,b1) not in (select a2,b2
+                         from   t2);
+
+-- multiple columns with expressions in NOT IN
+-- TC 01.02
+select a1,b1
+from   t1
+where  (a1,b1) not in (select a2+1,b2
+                       from   t2);
+
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-multiple-columns.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-multiple-columns.sql.out
new file mode 100644
index 0000000000000..756c3782a0e7a
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-multiple-columns.sql.out
@@ -0,0 +1,59 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 5
+
+
+-- !query 0
+create temporary view t1 as select * from values
+  (1, 1), (2, 1), (null, 1),
+  (1, 3), (null, 3),
+  (1, null), (null, 2)
+as t1(a1, b1)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+create temporary view t2 as select * from values
+  (1, 1),
+  (null, 3),
+  (1, null)
+as t2(a2, b2)
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+select a1,b1
+from   t1
+where  (a1,b1) not in (select a2,b2
+                       from   t2)
+-- !query 2 schema
+struct<a1:int,b1:int>
+-- !query 2 output
+2	1
+
+
+-- !query 3
+select a1,b1
+from   t1
+where  (a1-1,b1) not in (select a2,b2
+                         from   t2)
+-- !query 3 schema
+struct<a1:int,b1:int>
+-- !query 3 output
+1	1
+
+
+-- !query 4
+select a1,b1
+from   t1
+where  (a1,b1) not in (select a2+1,b2
+                       from   t2)
+-- !query 4 schema
+struct<a1:int,b1:int>
+-- !query 4 output
+1	1
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index 1a4049fb339cb..fdf940a7f9504 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -163,7 +163,12 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
         s"-- Number of queries: ${outputs.size}\n\n\n" +
         outputs.zipWithIndex.map{case (qr, i) => qr.toString(i)}.mkString("\n\n\n") + "\n"
       }
-      stringToFile(new File(testCase.resultFile), goldenOutput)
+      val resultFile = new File(testCase.resultFile);
+      val parent = resultFile.getParentFile();
+      if (!parent.exists()) {
+        assert(parent.mkdirs(), "Could not create directory: " + parent)
+      }
+      stringToFile(resultFile, goldenOutput)
     }
 
     // Read back the golden file.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index 2ef8b18c04612..25dbecb5894e4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -263,12 +263,12 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
       Row(1, 2.0) :: Row(1, 2.0) :: Nil)
 
     checkAnswer(
-      sql("select * from l where a not in (select c from t where b < d)"),
-      Row(1, 2.0) :: Row(1, 2.0) :: Row(3, 3.0) :: Nil)
+      sql("select * from l where (a, b) not in (select c, d from t) and a < 4"),
+      Row(1, 2.0) :: Row(1, 2.0) :: Row(2, 1.0) :: Row(2, 1.0) :: Row(3, 3.0) :: Nil)
 
     // Empty sub-query
     checkAnswer(
-      sql("select * from l where a not in (select c from r where c > 10 and b < d)"),
+      sql("select * from l where (a, b) not in (select c, d from r where c > 10)"),
       Row(1, 2.0) :: Row(1, 2.0) :: Row(2, 1.0) :: Row(2, 1.0) ::
       Row(3, 3.0) :: Row(null, null) :: Row(null, 5.0) :: Row(6, null) :: Nil)
 

From c133787965e65e19c0aab636c941b5673e6a68e5 Mon Sep 17 00:00:00 2001
From: Liwei Lin <lwlin7@gmail.com>
Date: Tue, 24 Jan 2017 16:36:17 -0800
Subject: [PATCH 0474/1204] [SPARK-19330][DSTREAMS] Also show tooltip for
 successful batches

## What changes were proposed in this pull request?

### Before
![_streaming_before](https://cloud.githubusercontent.com/assets/15843379/22181462/1e45c20c-e0c8-11e6-831c-8bf69722a4ee.png)

### After
![_streaming_after](https://cloud.githubusercontent.com/assets/15843379/22181464/23f38a40-e0c8-11e6-9a87-e27b1ffb1935.png)

## How was this patch tested?

Manually

Author: Liwei Lin <lwlin7@gmail.com>

Closes #16673 from lw-lin/streaming.

(cherry picked from commit 40a4cfc7c7911107d1cf7a2663469031dcf1f576)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../org/apache/spark/streaming/ui/static/streaming-page.js    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.js b/streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.js
index f82323a1cdd94..d004f34ab186c 100644
--- a/streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.js
+++ b/streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.js
@@ -169,7 +169,7 @@ function drawTimeline(id, data, minX, maxX, minY, maxY, unitY, batchInterval) {
             .style("cursor", "pointer")
             .attr("cx", function(d) { return x(d.x); })
             .attr("cy", function(d) { return y(d.y); })
-            .attr("r", function(d) { return isFailedBatch(d.x) ? "2" : "0";})
+            .attr("r", function(d) { return isFailedBatch(d.x) ? "2" : "3";})
             .on('mouseover', function(d) {
                 var tip = formatYValue(d.y) + " " + unitY + " at " + timeFormat[d.x];
                 showBootstrapTooltip(d3.select(this).node(), tip);
@@ -187,7 +187,7 @@ function drawTimeline(id, data, minX, maxX, minY, maxY, unitY, batchInterval) {
                     .attr("stroke", function(d) { return isFailedBatch(d.x) ? "red" : "white";})
                     .attr("fill", function(d) { return isFailedBatch(d.x) ? "red" : "white";})
                     .attr("opacity", function(d) { return isFailedBatch(d.x) ? "1" : "0";})
-                    .attr("r", function(d) { return isFailedBatch(d.x) ? "2" : "0";});
+                    .attr("r", function(d) { return isFailedBatch(d.x) ? "2" : "3";});
             })
             .on("click", function(d) {
                 if (lastTimeout != null) {

From e2f773923d3c61a620255e1f792c97e8999fa157 Mon Sep 17 00:00:00 2001
From: aokolnychyi <okolnychyyanton@gmail.com>
Date: Tue, 24 Jan 2017 22:13:17 -0800
Subject: [PATCH 0475/1204] [SPARK-16046][DOCS] Aggregations in the Spark SQL
 programming guide
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

- A separate subsection for Aggregations under “Getting Started” in the Spark SQL programming guide. It mentions which aggregate functions are predefined and how users can create their own.
- Examples of using the `UserDefinedAggregateFunction` abstract class for untyped aggregations in Java and Scala.
- Examples of using the `Aggregator` abstract class for type-safe aggregations in Java and Scala.
- Python is not covered.
- The PR might not resolve the ticket since I do not know what exactly was planned by the author.

In total, there are four new standalone examples that can be executed via `spark-submit` or `run-example`. The updated Spark SQL programming guide references to these examples and does not contain hard-coded snippets.

## How was this patch tested?

The patch was tested locally by building the docs. The examples were run as well.

![image](https://cloud.githubusercontent.com/assets/6235869/21292915/04d9d084-c515-11e6-811a-999d598dffba.png)

Author: aokolnychyi <okolnychyyanton@gmail.com>

Closes #16329 from aokolnychyi/SPARK-16046.

(cherry picked from commit 3fdce814348fae34df379a6ab9655dbbb2c3427c)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 docs/sql-programming-guide.md                 |  46 +++++
 .../sql/JavaUserDefinedTypedAggregation.java  | 160 ++++++++++++++++++
 .../JavaUserDefinedUntypedAggregation.java    | 132 +++++++++++++++
 examples/src/main/resources/employees.json    |   4 +
 .../sql/UserDefinedTypedAggregation.scala     |  91 ++++++++++
 .../sql/UserDefinedUntypedAggregation.scala   | 100 +++++++++++
 6 files changed, 533 insertions(+)
 create mode 100644 examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedTypedAggregation.java
 create mode 100644 examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedUntypedAggregation.java
 create mode 100644 examples/src/main/resources/employees.json
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/sql/UserDefinedTypedAggregation.scala
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/sql/UserDefinedUntypedAggregation.scala

diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index fb3c6a7d43b4c..ffe0f395b9c0c 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -382,6 +382,52 @@ For example:
 
 </div>
 
+## Aggregations
+
+The [built-in DataFrames functions](api/scala/index.html#org.apache.spark.sql.functions$) provide common
+aggregations such as `count()`, `countDistinct()`, `avg()`, `max()`, `min()`, etc.
+While those functions are designed for DataFrames, Spark SQL also has type-safe versions for some of them in 
+[Scala](api/scala/index.html#org.apache.spark.sql.expressions.scalalang.typed$) and 
+[Java](api/java/org/apache/spark/sql/expressions/javalang/typed.html) to work with strongly typed Datasets.
+Moreover, users are not limited to the predefined aggregate functions and can create their own.
+
+### Untyped User-Defined Aggregate Functions
+
+<div class="codetabs">
+
+<div data-lang="scala"  markdown="1">
+
+Users have to extend the [UserDefinedAggregateFunction](api/scala/index.html#org.apache.spark.sql.expressions.UserDefinedAggregateFunction) 
+abstract class to implement a custom untyped aggregate function. For example, a user-defined average
+can look like:
+
+{% include_example untyped_custom_aggregation scala/org/apache/spark/examples/sql/UserDefinedUntypedAggregation.scala%}
+</div>
+
+<div data-lang="java"  markdown="1">
+
+{% include_example untyped_custom_aggregation java/org/apache/spark/examples/sql/JavaUserDefinedUntypedAggregation.java%}
+</div>
+
+</div>
+
+### Type-Safe User-Defined Aggregate Functions
+
+User-defined aggregations for strongly typed Datasets revolve around the [Aggregator](api/scala/index.html#org.apache.spark.sql.expressions.Aggregator) abstract class.
+For example, a type-safe user-defined average can look like:
+<div class="codetabs">
+
+<div data-lang="scala"  markdown="1">
+
+{% include_example typed_custom_aggregation scala/org/apache/spark/examples/sql/UserDefinedTypedAggregation.scala%}
+</div>
+
+<div data-lang="java"  markdown="1">
+
+{% include_example typed_custom_aggregation java/org/apache/spark/examples/sql/JavaUserDefinedTypedAggregation.java%}
+</div>
+
+</div>
 
 # Data Sources
 
diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedTypedAggregation.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedTypedAggregation.java
new file mode 100644
index 0000000000000..78e9011be4705
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedTypedAggregation.java
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.examples.sql;
+
+// $example on:typed_custom_aggregation$
+import java.io.Serializable;
+
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoder;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.TypedColumn;
+import org.apache.spark.sql.expressions.Aggregator;
+// $example off:typed_custom_aggregation$
+
+public class JavaUserDefinedTypedAggregation {
+
+  // $example on:typed_custom_aggregation$
+  public static class Employee implements Serializable {
+    private String name;
+    private long salary;
+
+    // Constructors, getters, setters...
+    // $example off:typed_custom_aggregation$
+    public String getName() {
+      return name;
+    }
+
+    public void setName(String name) {
+      this.name = name;
+    }
+
+    public long getSalary() {
+      return salary;
+    }
+
+    public void setSalary(long salary) {
+      this.salary = salary;
+    }
+    // $example on:typed_custom_aggregation$
+  }
+
+  public static class Average implements Serializable  {
+    private long sum;
+    private long count;
+
+    // Constructors, getters, setters...
+    // $example off:typed_custom_aggregation$
+    public Average() {
+    }
+
+    public Average(long sum, long count) {
+      this.sum = sum;
+      this.count = count;
+    }
+
+    public long getSum() {
+      return sum;
+    }
+
+    public void setSum(long sum) {
+      this.sum = sum;
+    }
+
+    public long getCount() {
+      return count;
+    }
+
+    public void setCount(long count) {
+      this.count = count;
+    }
+    // $example on:typed_custom_aggregation$
+  }
+
+  public static class MyAverage extends Aggregator<Employee, Average, Double> {
+    // A zero value for this aggregation. Should satisfy the property that any b + zero = b
+    public Average zero() {
+      return new Average(0L, 0L);
+    }
+    // Combine two values to produce a new value. For performance, the function may modify `buffer`
+    // and return it instead of constructing a new object
+    public Average reduce(Average buffer, Employee employee) {
+      long newSum = buffer.getSum() + employee.getSalary();
+      long newCount = buffer.getCount() + 1;
+      buffer.setSum(newSum);
+      buffer.setCount(newCount);
+      return buffer;
+    }
+    // Merge two intermediate values
+    public Average merge(Average b1, Average b2) {
+      long mergedSum = b1.getSum() + b2.getSum();
+      long mergedCount = b1.getCount() + b2.getCount();
+      b1.setSum(mergedSum);
+      b1.setCount(mergedCount);
+      return b1;
+    }
+    // Transform the output of the reduction
+    public Double finish(Average reduction) {
+      return ((double) reduction.getSum()) / reduction.getCount();
+    }
+    // Specifies the Encoder for the intermediate value type
+    public Encoder<Average> bufferEncoder() {
+      return Encoders.bean(Average.class);
+    }
+    // Specifies the Encoder for the final output value type
+    public Encoder<Double> outputEncoder() {
+      return Encoders.DOUBLE();
+    }
+  }
+  // $example off:typed_custom_aggregation$
+
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("Java Spark SQL user-defined Datasets aggregation example")
+      .getOrCreate();
+
+    // $example on:typed_custom_aggregation$
+    Encoder<Employee> employeeEncoder = Encoders.bean(Employee.class);
+    String path = "examples/src/main/resources/employees.json";
+    Dataset<Employee> ds = spark.read().json(path).as(employeeEncoder);
+    ds.show();
+    // +-------+------+
+    // |   name|salary|
+    // +-------+------+
+    // |Michael|  3000|
+    // |   Andy|  4500|
+    // | Justin|  3500|
+    // |  Berta|  4000|
+    // +-------+------+
+
+    MyAverage myAverage = new MyAverage();
+    // Convert the function to a `TypedColumn` and give it a name
+    TypedColumn<Employee, Double> averageSalary = myAverage.toColumn().name("average_salary");
+    Dataset<Double> result = ds.select(averageSalary);
+    result.show();
+    // +--------------+
+    // |average_salary|
+    // +--------------+
+    // |        3750.0|
+    // +--------------+
+    // $example off:typed_custom_aggregation$
+    spark.stop();
+  }
+
+}
diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedUntypedAggregation.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedUntypedAggregation.java
new file mode 100644
index 0000000000000..6da60a1fc6b88
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedUntypedAggregation.java
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.examples.sql;
+
+// $example on:untyped_custom_aggregation$
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.expressions.MutableAggregationBuffer;
+import org.apache.spark.sql.expressions.UserDefinedAggregateFunction;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+// $example off:untyped_custom_aggregation$
+
+public class JavaUserDefinedUntypedAggregation {
+
+  // $example on:untyped_custom_aggregation$
+  public static class MyAverage extends UserDefinedAggregateFunction {
+
+    private StructType inputSchema;
+    private StructType bufferSchema;
+
+    public MyAverage() {
+      List<StructField> inputFields = new ArrayList<>();
+      inputFields.add(DataTypes.createStructField("inputColumn", DataTypes.LongType, true));
+      inputSchema = DataTypes.createStructType(inputFields);
+
+      List<StructField> bufferFields = new ArrayList<>();
+      bufferFields.add(DataTypes.createStructField("sum", DataTypes.LongType, true));
+      bufferFields.add(DataTypes.createStructField("count", DataTypes.LongType, true));
+      bufferSchema = DataTypes.createStructType(bufferFields);
+    }
+    // Data types of input arguments of this aggregate function
+    public StructType inputSchema() {
+      return inputSchema;
+    }
+    // Data types of values in the aggregation buffer
+    public StructType bufferSchema() {
+      return bufferSchema;
+    }
+    // The data type of the returned value
+    public DataType dataType() {
+      return DataTypes.DoubleType;
+    }
+    // Whether this function always returns the same output on the identical input
+    public boolean deterministic() {
+      return true;
+    }
+    // Initializes the given aggregation buffer. The buffer itself is a `Row` that in addition to
+    // standard methods like retrieving a value at an index (e.g., get(), getBoolean()), provides
+    // the opportunity to update its values. Note that arrays and maps inside the buffer are still
+    // immutable.
+    public void initialize(MutableAggregationBuffer buffer) {
+      buffer.update(0, 0L);
+      buffer.update(1, 0L);
+    }
+    // Updates the given aggregation buffer `buffer` with new input data from `input`
+    public void update(MutableAggregationBuffer buffer, Row input) {
+      if (!input.isNullAt(0)) {
+        long updatedSum = buffer.getLong(0) + input.getLong(0);
+        long updatedCount = buffer.getLong(1) + 1;
+        buffer.update(0, updatedSum);
+        buffer.update(1, updatedCount);
+      }
+    }
+    // Merges two aggregation buffers and stores the updated buffer values back to `buffer1`
+    public void merge(MutableAggregationBuffer buffer1, Row buffer2) {
+      long mergedSum = buffer1.getLong(0) + buffer2.getLong(0);
+      long mergedCount = buffer1.getLong(1) + buffer2.getLong(1);
+      buffer1.update(0, mergedSum);
+      buffer1.update(1, mergedCount);
+    }
+    // Calculates the final result
+    public Double evaluate(Row buffer) {
+      return ((double) buffer.getLong(0)) / buffer.getLong(1);
+    }
+  }
+  // $example off:untyped_custom_aggregation$
+
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("Java Spark SQL user-defined DataFrames aggregation example")
+      .getOrCreate();
+
+    // $example on:untyped_custom_aggregation$
+    // Register the function to access it
+    spark.udf().register("myAverage", new MyAverage());
+
+    Dataset<Row> df = spark.read().json("examples/src/main/resources/employees.json");
+    df.createOrReplaceTempView("employees");
+    df.show();
+    // +-------+------+
+    // |   name|salary|
+    // +-------+------+
+    // |Michael|  3000|
+    // |   Andy|  4500|
+    // | Justin|  3500|
+    // |  Berta|  4000|
+    // +-------+------+
+
+    Dataset<Row> result = spark.sql("SELECT myAverage(salary) as average_salary FROM employees");
+    result.show();
+    // +--------------+
+    // |average_salary|
+    // +--------------+
+    // |        3750.0|
+    // +--------------+
+    // $example off:untyped_custom_aggregation$
+
+    spark.stop();
+  }
+}
diff --git a/examples/src/main/resources/employees.json b/examples/src/main/resources/employees.json
new file mode 100644
index 0000000000000..6b2e6329a1cb2
--- /dev/null
+++ b/examples/src/main/resources/employees.json
@@ -0,0 +1,4 @@
+{"name":"Michael", "salary":3000}
+{"name":"Andy", "salary":4500}
+{"name":"Justin", "salary":3500}
+{"name":"Berta", "salary":4000}
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/UserDefinedTypedAggregation.scala b/examples/src/main/scala/org/apache/spark/examples/sql/UserDefinedTypedAggregation.scala
new file mode 100644
index 0000000000000..ac617d19d36cf
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/UserDefinedTypedAggregation.scala
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.examples.sql
+
+// $example on:typed_custom_aggregation$
+import org.apache.spark.sql.expressions.Aggregator
+import org.apache.spark.sql.Encoder
+import org.apache.spark.sql.Encoders
+import org.apache.spark.sql.SparkSession
+// $example off:typed_custom_aggregation$
+
+object UserDefinedTypedAggregation {
+
+  // $example on:typed_custom_aggregation$
+  case class Employee(name: String, salary: Long)
+  case class Average(var sum: Long, var count: Long)
+
+  object MyAverage extends Aggregator[Employee, Average, Double] {
+    // A zero value for this aggregation. Should satisfy the property that any b + zero = b
+    def zero: Average = Average(0L, 0L)
+    // Combine two values to produce a new value. For performance, the function may modify `buffer`
+    // and return it instead of constructing a new object
+    def reduce(buffer: Average, employee: Employee): Average = {
+      buffer.sum += employee.salary
+      buffer.count += 1
+      buffer
+    }
+    // Merge two intermediate values
+    def merge(b1: Average, b2: Average): Average = {
+      b1.sum += b2.sum
+      b1.count += b2.count
+      b1
+    }
+    // Transform the output of the reduction
+    def finish(reduction: Average): Double = reduction.sum.toDouble / reduction.count
+    // Specifies the Encoder for the intermediate value type
+    def bufferEncoder: Encoder[Average] = Encoders.product
+    // Specifies the Encoder for the final output value type
+    def outputEncoder: Encoder[Double] = Encoders.scalaDouble
+  }
+  // $example off:typed_custom_aggregation$
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder()
+      .appName("Spark SQL user-defined Datasets aggregation example")
+      .getOrCreate()
+
+    import spark.implicits._
+
+    // $example on:typed_custom_aggregation$
+    val ds = spark.read.json("examples/src/main/resources/employees.json").as[Employee]
+    ds.show()
+    // +-------+------+
+    // |   name|salary|
+    // +-------+------+
+    // |Michael|  3000|
+    // |   Andy|  4500|
+    // | Justin|  3500|
+    // |  Berta|  4000|
+    // +-------+------+
+
+    // Convert the function to a `TypedColumn` and give it a name
+    val averageSalary = MyAverage.toColumn.name("average_salary")
+    val result = ds.select(averageSalary)
+    result.show()
+    // +--------------+
+    // |average_salary|
+    // +--------------+
+    // |        3750.0|
+    // +--------------+
+    // $example off:typed_custom_aggregation$
+
+    spark.stop()
+  }
+
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/UserDefinedUntypedAggregation.scala b/examples/src/main/scala/org/apache/spark/examples/sql/UserDefinedUntypedAggregation.scala
new file mode 100644
index 0000000000000..9c9ebc55163de
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/UserDefinedUntypedAggregation.scala
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.examples.sql
+
+// $example on:untyped_custom_aggregation$
+import org.apache.spark.sql.expressions.MutableAggregationBuffer
+import org.apache.spark.sql.expressions.UserDefinedAggregateFunction
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.SparkSession
+// $example off:untyped_custom_aggregation$
+
+object UserDefinedUntypedAggregation {
+
+  // $example on:untyped_custom_aggregation$
+  object MyAverage extends UserDefinedAggregateFunction {
+    // Data types of input arguments of this aggregate function
+    def inputSchema: StructType = StructType(StructField("inputColumn", LongType) :: Nil)
+    // Data types of values in the aggregation buffer
+    def bufferSchema: StructType = {
+      StructType(StructField("sum", LongType) :: StructField("count", LongType) :: Nil)
+    }
+    // The data type of the returned value
+    def dataType: DataType = DoubleType
+    // Whether this function always returns the same output on the identical input
+    def deterministic: Boolean = true
+    // Initializes the given aggregation buffer. The buffer itself is a `Row` that in addition to
+    // standard methods like retrieving a value at an index (e.g., get(), getBoolean()), provides
+    // the opportunity to update its values. Note that arrays and maps inside the buffer are still
+    // immutable.
+    def initialize(buffer: MutableAggregationBuffer): Unit = {
+      buffer(0) = 0L
+      buffer(1) = 0L
+    }
+    // Updates the given aggregation buffer `buffer` with new input data from `input`
+    def update(buffer: MutableAggregationBuffer, input: Row): Unit = {
+      if (!input.isNullAt(0)) {
+        buffer(0) = buffer.getLong(0) + input.getLong(0)
+        buffer(1) = buffer.getLong(1) + 1
+      }
+    }
+    // Merges two aggregation buffers and stores the updated buffer values back to `buffer1`
+    def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = {
+      buffer1(0) = buffer1.getLong(0) + buffer2.getLong(0)
+      buffer1(1) = buffer1.getLong(1) + buffer2.getLong(1)
+    }
+    // Calculates the final result
+    def evaluate(buffer: Row): Double = buffer.getLong(0).toDouble / buffer.getLong(1)
+  }
+  // $example off:untyped_custom_aggregation$
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder()
+      .appName("Spark SQL user-defined DataFrames aggregation example")
+      .getOrCreate()
+
+    // $example on:untyped_custom_aggregation$
+    // Register the function to access it
+    spark.udf.register("myAverage", MyAverage)
+
+    val df = spark.read.json("examples/src/main/resources/employees.json")
+    df.createOrReplaceTempView("employees")
+    df.show()
+    // +-------+------+
+    // |   name|salary|
+    // +-------+------+
+    // |Michael|  3000|
+    // |   Andy|  4500|
+    // | Justin|  3500|
+    // |  Berta|  4000|
+    // +-------+------+
+
+    val result = spark.sql("SELECT myAverage(salary) as average_salary FROM employees")
+    result.show()
+    // +--------------+
+    // |average_salary|
+    // +--------------+
+    // |        3750.0|
+    // +--------------+
+    // $example off:untyped_custom_aggregation$
+
+    spark.stop()
+  }
+
+}

From f391ad2c82d01c84a1cb5e032a086cb120e7cee3 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Wed, 25 Jan 2017 08:18:41 -0600
Subject: [PATCH 0476/1204] [SPARK-18750][YARN] Avoid using "mapValues" when
 allocating containers.

That method is prone to stack overflows when the input map is really
large; instead, use plain "map". Also includes a unit test that was
tested and caused stack overflows without the fix.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #16667 from vanzin/SPARK-18750.

(cherry picked from commit 76db394f2baedc2c7b7a52c05314a64ec9068263)
Signed-off-by: Tom Graves <tgraves@yahoo-inc.com>
---
 .../yarn/LocalityPlacementStrategySuite.scala | 87 +++++++++++++++++++
 ...yPreferredContainerPlacementStrategy.scala | 11 +--
 2 files changed, 93 insertions(+), 5 deletions(-)
 create mode 100644 resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala

diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala
new file mode 100644
index 0000000000000..fb80ff9f31322
--- /dev/null
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.yarn
+
+import scala.collection.mutable.{HashMap, HashSet, Set}
+
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic
+import org.apache.hadoop.net.DNSToSwitchMapping
+import org.apache.hadoop.yarn.api.records._
+import org.apache.hadoop.yarn.conf.YarnConfiguration
+import org.mockito.Mockito._
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+
+class LocalityPlacementStrategySuite extends SparkFunSuite {
+
+  test("handle large number of containers and tasks (SPARK-18750)") {
+    // Run the test in a thread with a small stack size, since the original issue
+    // surfaced as a StackOverflowError.
+    var error: Throwable = null
+
+    val runnable = new Runnable() {
+      override def run(): Unit = try {
+        runTest()
+      } catch {
+        case e: Throwable => error = e
+      }
+    }
+
+    val thread = new Thread(new ThreadGroup("test"), runnable, "test-thread", 32 * 1024)
+    thread.start()
+    thread.join()
+
+    assert(error === null)
+  }
+
+  private def runTest(): Unit = {
+    val yarnConf = new YarnConfiguration()
+    yarnConf.setClass(
+      CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY,
+      classOf[MockResolver], classOf[DNSToSwitchMapping])
+
+    // The numbers below have been chosen to balance being large enough to replicate the
+    // original issue while not taking too long to run when the issue is fixed. The main
+    // goal is to create enough requests for localized containers (so there should be many
+    // tasks on several hosts that have no allocated containers).
+
+    val resource = Resource.newInstance(8 * 1024, 4)
+    val strategy = new LocalityPreferredContainerPlacementStrategy(new SparkConf(),
+      yarnConf, resource)
+
+    val totalTasks = 32 * 1024
+    val totalContainers = totalTasks / 16
+    val totalHosts = totalContainers / 16
+
+    val mockId = mock(classOf[ContainerId])
+    val hosts = (1 to totalHosts).map { i => (s"host_$i", totalTasks % i) }.toMap
+    val containers = (1 to totalContainers).map { i => mockId }
+    val count = containers.size / hosts.size / 2
+
+    val hostToContainerMap = new HashMap[String, Set[ContainerId]]()
+    hosts.keys.take(hosts.size / 2).zipWithIndex.foreach { case (host, i) =>
+      val hostContainers = new HashSet[ContainerId]()
+      containers.drop(count * i).take(i).foreach { c => hostContainers += c }
+      hostToContainerMap(host) = hostContainers
+    }
+
+    strategy.localityOfRequestedContainers(containers.size * 2, totalTasks, hosts,
+      hostToContainerMap, Nil)
+  }
+
+}
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/LocalityPreferredContainerPlacementStrategy.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/LocalityPreferredContainerPlacementStrategy.scala
index 8772e26f4314d..db638d84c0a1c 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/LocalityPreferredContainerPlacementStrategy.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/LocalityPreferredContainerPlacementStrategy.scala
@@ -129,9 +129,9 @@ private[yarn] class LocalityPreferredContainerPlacementStrategy(
       val largestRatio = updatedHostToContainerCount.values.max
       // Round the ratio of preferred locality to the number of locality required container
       // number, which is used for locality preferred host calculating.
-      var preferredLocalityRatio = updatedHostToContainerCount.mapValues { ratio =>
+      var preferredLocalityRatio = updatedHostToContainerCount.map { case(k, ratio) =>
         val adjustedRatio = ratio.toDouble * requiredLocalityAwareContainerNum / largestRatio
-        adjustedRatio.ceil.toInt
+        (k, adjustedRatio.ceil.toInt)
       }
 
       for (i <- 0 until requiredLocalityAwareContainerNum) {
@@ -145,7 +145,7 @@ private[yarn] class LocalityPreferredContainerPlacementStrategy(
 
         // Minus 1 each time when the host is used. When the current ratio is 0,
         // which means all the required ratio is satisfied, this host will not be allocated again.
-        preferredLocalityRatio = preferredLocalityRatio.mapValues(_ - 1)
+        preferredLocalityRatio = preferredLocalityRatio.map { case (k, v) => (k, v - 1) }
       }
     }
 
@@ -218,7 +218,8 @@ private[yarn] class LocalityPreferredContainerPlacementStrategy(
 
     val possibleTotalContainerNum = pendingHostToContainerCount.values.sum
     val localityMatchedPendingNum = localityMatchedPendingAllocations.size.toDouble
-    pendingHostToContainerCount.mapValues(_ * localityMatchedPendingNum / possibleTotalContainerNum)
-      .toMap
+    pendingHostToContainerCount.map { case (k, v) =>
+      (k, v * localityMatchedPendingNum / possibleTotalContainerNum)
+    }.toMap
   }
 }

From af95455383db00c3690d61346cb5e37053875e6b Mon Sep 17 00:00:00 2001
From: Nattavut Sutyanyong <nsy.can@gmail.com>
Date: Wed, 25 Jan 2017 17:04:36 +0100
Subject: [PATCH 0477/1204] [SPARK-18863][SQL] Output non-aggregate expressions
 without GROUP BY in a subquery does not yield an error

## What changes were proposed in this pull request?
This PR will report proper error messages when a subquery expression contain an invalid plan. This problem is fixed by calling CheckAnalysis for the plan inside a subquery.

## How was this patch tested?
Existing tests and two new test cases on 2 forms of subquery, namely, scalar subquery and in/exists subquery.

````
-- TC 01.01
-- The column t2b in the SELECT of the subquery is invalid
-- because it is neither an aggregate function nor a GROUP BY column.
select t1a, t2b
from   t1, t2
where  t1b = t2c
and    t2b = (select max(avg)
              from   (select   t2b, avg(t2b) avg
                      from     t2
                      where    t2a = t1.t1b
                     )
             )
;

-- TC 01.02
-- Invalid due to the column t2b not part of the output from table t2.
select *
from   t1
where  t1a in (select   min(t2a)
               from     t2
               group by t2c
               having   t2c in (select   max(t3c)
                                from     t3
                                group by t3b
                                having   t3b > t2b ))
;
````

Author: Nattavut Sutyanyong <nsy.can@gmail.com>

Closes #16572 from nsyca/18863.

(cherry picked from commit f1ddca5fcc1e914b9efb8a634ea7c89407358ea6)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../sql/catalyst/analysis/CheckAnalysis.scala | 106 +++++++++---------
 .../negative-cases/invalid-correlation.sql    |  42 +++++++
 .../invalid-correlation.sql.out               |  66 +++++++++++
 .../apache/spark/sql/SQLQueryTestSuite.scala  |   5 +-
 4 files changed, 168 insertions(+), 51 deletions(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index aa77a6efef347..65a2a7b04dd8f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -117,66 +117,72 @@ trait CheckAnalysis extends PredicateHelper {
                 failAnalysis(s"Window specification $s is not valid because $m")
               case None => w
             }
-          case s @ ScalarSubquery(query, conditions, _)
+
+          case s @ ScalarSubquery(query, conditions, _) =>
             // If no correlation, the output must be exactly one column
-            if (conditions.isEmpty && query.output.size != 1) =>
+            if (conditions.isEmpty && query.output.size != 1) {
               failAnalysis(
                 s"Scalar subquery must return only one column, but got ${query.output.size}")
+            }
+            else if (conditions.nonEmpty) {
+              // Collect the columns from the subquery for further checking.
+              var subqueryColumns = conditions.flatMap(_.references).filter(query.output.contains)
+
+              def checkAggregate(agg: Aggregate): Unit = {
+                // Make sure correlated scalar subqueries contain one row for every outer row by
+                // enforcing that they are aggregates containing exactly one aggregate expression.
+                // The analyzer has already checked that subquery contained only one output column,
+                // and added all the grouping expressions to the aggregate.
+                val aggregates = agg.expressions.flatMap(_.collect {
+                  case a: AggregateExpression => a
+                })
+                if (aggregates.isEmpty) {
+                  failAnalysis("The output of a correlated scalar subquery must be aggregated")
+                }
 
-          case s @ ScalarSubquery(query, conditions, _) if conditions.nonEmpty =>
-
-            // Collect the columns from the subquery for further checking.
-            var subqueryColumns = conditions.flatMap(_.references).filter(query.output.contains)
-
-            def checkAggregate(agg: Aggregate): Unit = {
-              // Make sure correlated scalar subqueries contain one row for every outer row by
-              // enforcing that they are aggregates which contain exactly one aggregate expressions.
-              // The analyzer has already checked that subquery contained only one output column,
-              // and added all the grouping expressions to the aggregate.
-              val aggregates = agg.expressions.flatMap(_.collect {
-                case a: AggregateExpression => a
-              })
-              if (aggregates.isEmpty) {
-                failAnalysis("The output of a correlated scalar subquery must be aggregated")
+                // SPARK-18504/SPARK-18814: Block cases where GROUP BY columns
+                // are not part of the correlated columns.
+                val groupByCols = AttributeSet(agg.groupingExpressions.flatMap(_.references))
+                val correlatedCols = AttributeSet(subqueryColumns)
+                val invalidCols = groupByCols -- correlatedCols
+                // GROUP BY columns must be a subset of columns in the predicates
+                if (invalidCols.nonEmpty) {
+                  failAnalysis(
+                    "A GROUP BY clause in a scalar correlated subquery " +
+                      "cannot contain non-correlated columns: " +
+                      invalidCols.mkString(","))
+                }
               }
 
-              // SPARK-18504/SPARK-18814: Block cases where GROUP BY columns
-              // are not part of the correlated columns.
-              val groupByCols = AttributeSet(agg.groupingExpressions.flatMap(_.references))
-              val correlatedCols = AttributeSet(subqueryColumns)
-              val invalidCols = groupByCols -- correlatedCols
-              // GROUP BY columns must be a subset of columns in the predicates
-              if (invalidCols.nonEmpty) {
-                failAnalysis(
-                  "A GROUP BY clause in a scalar correlated subquery " +
-                    "cannot contain non-correlated columns: " +
-                    invalidCols.mkString(","))
-              }
-            }
+              // Skip subquery aliases added by the Analyzer and the SQLBuilder.
+              // For projects, do the necessary mapping and skip to its child.
+              def cleanQuery(p: LogicalPlan): LogicalPlan = p match {
+                case s: SubqueryAlias => cleanQuery(s.child)
+                case p: Project =>
+                  // SPARK-18814: Map any aliases to their AttributeReference children
+                  // for the checking in the Aggregate operators below this Project.
+                  subqueryColumns = subqueryColumns.map {
+                    xs => p.projectList.collectFirst {
+                      case e @ Alias(child : AttributeReference, _) if e.exprId == xs.exprId =>
+                        child
+                    }.getOrElse(xs)
+                  }
 
-            // Skip subquery aliases added by the Analyzer and the SQLBuilder.
-            // For projects, do the necessary mapping and skip to its child.
-            def cleanQuery(p: LogicalPlan): LogicalPlan = p match {
-              case s: SubqueryAlias => cleanQuery(s.child)
-              case p: Project =>
-                // SPARK-18814: Map any aliases to their AttributeReference children
-                // for the checking in the Aggregate operators below this Project.
-                subqueryColumns = subqueryColumns.map {
-                  xs => p.projectList.collectFirst {
-                    case e @ Alias(child : AttributeReference, _) if e.exprId == xs.exprId =>
-                      child
-                  }.getOrElse(xs)
-                }
+                  cleanQuery(p.child)
+                case child => child
+              }
 
-                cleanQuery(p.child)
-              case child => child
+              cleanQuery(query) match {
+                case a: Aggregate => checkAggregate(a)
+                case Filter(_, a: Aggregate) => checkAggregate(a)
+                case fail => failAnalysis(s"Correlated scalar subqueries must be Aggregated: $fail")
+              }
             }
+            checkAnalysis(query)
+            s
 
-            cleanQuery(query) match {
-              case a: Aggregate => checkAggregate(a)
-              case Filter(_, a: Aggregate) => checkAggregate(a)
-              case fail => failAnalysis(s"Correlated scalar subqueries must be Aggregated: $fail")
-            }
+          case s: SubqueryExpression =>
+            checkAnalysis(s.plan)
             s
         }
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql
new file mode 100644
index 0000000000000..cf93c5a835971
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/invalid-correlation.sql
@@ -0,0 +1,42 @@
+-- The test file contains negative test cases
+-- of invalid queries where error messages are expected.
+
+create temporary view t1 as select * from values
+  (1, 2, 3)
+as t1(t1a, t1b, t1c);
+
+create temporary view t2 as select * from values
+  (1, 0, 1)
+as t2(t2a, t2b, t2c);
+
+create temporary view t3 as select * from values
+  (3, 1, 2)
+as t3(t3a, t3b, t3c);
+
+-- TC 01.01
+-- The column t2b in the SELECT of the subquery is invalid
+-- because it is neither an aggregate function nor a GROUP BY column.
+select t1a, t2b
+from   t1, t2
+where  t1b = t2c
+and    t2b = (select max(avg)
+              from   (select   t2b, avg(t2b) avg
+                      from     t2
+                      where    t2a = t1.t1b
+                     )
+             )
+;
+
+-- TC 01.02
+-- Invalid due to the column t2b not part of the output from table t2.
+select *
+from   t1
+where  t1a in (select   min(t2a)
+               from     t2
+               group by t2c
+               having   t2c in (select   max(t3c)
+                                from     t3
+                                group by t3b
+                                having   t3b > t2b ))
+;
+
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
new file mode 100644
index 0000000000000..50ae01e181bcf
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
@@ -0,0 +1,66 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 5
+
+
+-- !query 0
+create temporary view t1 as select * from values
+  (1, 2, 3)
+as t1(t1a, t1b, t1c)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+create temporary view t2 as select * from values
+  (1, 0, 1)
+as t2(t2a, t2b, t2c)
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+create temporary view t3 as select * from values
+  (3, 1, 2)
+as t3(t3a, t3b, t3c)
+-- !query 2 schema
+struct<>
+-- !query 2 output
+
+
+
+-- !query 3
+select t1a, t2b
+from   t1, t2
+where  t1b = t2c
+and    t2b = (select max(avg)
+              from   (select   t2b, avg(t2b) avg
+                      from     t2
+                      where    t2a = t1.t1b
+                     )
+             )
+-- !query 3 schema
+struct<>
+-- !query 3 output
+org.apache.spark.sql.AnalysisException
+expression 't2.`t2b`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;
+
+
+-- !query 4
+select *
+from   t1
+where  t1a in (select   min(t2a)
+               from     t2
+               group by t2c
+               having   t2c in (select   max(t3c)
+                                from     t3
+                                group by t3b
+                                having   t3b > t2b ))
+-- !query 4 schema
+struct<>
+-- !query 4 output
+org.apache.spark.sql.AnalysisException
+resolved attribute(s) t2b#x missing from min(t2a)#x,t2c#x in operator !Filter predicate-subquery#x [(t2c#x = max(t3c)#x) && (t3b#x > t2b#x)];
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index fdf940a7f9504..91aecca537fb2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -228,7 +228,10 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
     } catch {
       case a: AnalysisException if a.plan.nonEmpty =>
         // Do not output the logical plan tree which contains expression IDs.
-        (StructType(Seq.empty), Seq(a.getClass.getName, a.getSimpleMessage))
+        // Also implement a crude way of masking expression IDs in the error message
+        // with a generic pattern "###".
+        (StructType(Seq.empty),
+          Seq(a.getClass.getName, a.getSimpleMessage.replaceAll("#\\d+", "#x")))
       case NonFatal(e) =>
         // If there is an exception, put the exception class followed by the message.
         (StructType(Seq.empty), Seq(e.getClass.getName, e.getMessage))

From c9f075abb1bb3c4773a21bc3b08253abb8b85b3f Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Wed, 25 Jan 2017 12:08:08 -0800
Subject: [PATCH 0478/1204] [SPARK-19307][PYSPARK] Make sure user conf is
 propagated to SparkContext.

The code was failing to propagate the user conf in the case where the
JVM was already initialized, which happens when a user submits a
python script via spark-submit.

Tested with new unit test and by running a python script in a real cluster.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #16682 from vanzin/SPARK-19307.

(cherry picked from commit 92afaa93a0b67f561a790822ccdd2b814455edcc)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 python/pyspark/context.py |  3 +++
 python/pyspark/tests.py   | 20 ++++++++++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 5c4e79cb0499e..ac4b2b035f5c1 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -132,6 +132,9 @@ def _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize,
             self._conf = conf
         else:
             self._conf = SparkConf(_jvm=SparkContext._jvm)
+            if conf is not None:
+                for k, v in conf.getAll():
+                    self._conf.set(k, v)
 
         self._batchSize = batchSize  # -1 represents an unlimited batch size
         self._unbatched_serializer = serializer
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index fe314c54a1b18..8e35a4ee8e2d3 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -1970,6 +1970,26 @@ def test_single_script_on_cluster(self):
         self.assertEqual(0, proc.returncode)
         self.assertIn("[2, 4, 6]", out.decode('utf-8'))
 
+    def test_user_configuration(self):
+        """Make sure user configuration is respected (SPARK-19307)"""
+        script = self.createTempFile("test.py", """
+            |from pyspark import SparkConf, SparkContext
+            |
+            |conf = SparkConf().set("spark.test_config", "1")
+            |sc = SparkContext(conf = conf)
+            |try:
+            |    if sc._conf.get("spark.test_config") != "1":
+            |        raise Exception("Cannot find spark.test_config in SparkContext's conf.")
+            |finally:
+            |    sc.stop()
+            """)
+        proc = subprocess.Popen(
+            [self.sparkSubmit, "--master", "local", script],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT)
+        out, err = proc.communicate()
+        self.assertEqual(0, proc.returncode, msg="Process failed with error:\n {0}".format(out))
+
 
 class ContextTests(unittest.TestCase):
 

From 97d3353ef16a6e6edc93d8177b08442a03e19eee Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Wed, 25 Jan 2017 14:22:58 -0800
Subject: [PATCH 0479/1204] [SPARK-18750][YARN] Follow up: move test to correct
 directory in 2.1 branch.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #16704 from vanzin/SPARK-18750_2.1.
---
 .../apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename {resource-managers/yarn => yarn}/src/test/scala/org/apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala (100%)

diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala
similarity index 100%
rename from resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala
rename to yarn/src/test/scala/org/apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala

From a5c10ff238e4a117f597e017b7d746404aaa1663 Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Wed, 25 Jan 2017 14:43:39 -0800
Subject: [PATCH 0480/1204] [SPARK-19064][PYSPARK] Fix pip installing of sub
 components

## What changes were proposed in this pull request?

Fix instalation of mllib and ml sub components, and more eagerly cleanup cache files during test script & make-distribution.

## How was this patch tested?

Updated sanity test script to import mllib and ml sub-components.

Author: Holden Karau <holden@us.ibm.com>

Closes #16465 from holdenk/SPARK-19064-fix-pip-install-sub-components.

(cherry picked from commit 965c82d8c4b7f2d4dfbc45ec4d47d6b6588094c3)
Signed-off-by: Holden Karau <holden@us.ibm.com>
---
 dev/make-distribution.sh | 2 ++
 dev/pip-sanity-check.py  | 2 ++
 dev/requirements.txt     | 1 +
 dev/run-pip-tests        | 7 +++++--
 python/setup.py          | 5 +++++
 5 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
index 6ea319e4362ab..00e0580a34a09 100755
--- a/dev/make-distribution.sh
+++ b/dev/make-distribution.sh
@@ -213,6 +213,8 @@ cp -r "$SPARK_HOME/data" "$DISTDIR"
 if [ "$MAKE_PIP" == "true" ]; then
   echo "Building python distribution package"
   pushd "$SPARK_HOME/python" > /dev/null
+  # Delete the egg info file if it exists, this can cache older setup files.
+  rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion"
   python setup.py sdist
   popd > /dev/null
 else
diff --git a/dev/pip-sanity-check.py b/dev/pip-sanity-check.py
index 430c2ab52766a..c491005f49719 100644
--- a/dev/pip-sanity-check.py
+++ b/dev/pip-sanity-check.py
@@ -18,6 +18,8 @@
 from __future__ import print_function
 
 from pyspark.sql import SparkSession
+from pyspark.ml.param import Params
+from pyspark.mllib.linalg import *
 import sys
 
 if __name__ == "__main__":
diff --git a/dev/requirements.txt b/dev/requirements.txt
index bf042d22a8b47..79782279f8fbd 100644
--- a/dev/requirements.txt
+++ b/dev/requirements.txt
@@ -1,3 +1,4 @@
 jira==1.0.3
 PyGithub==1.26.0
 Unidecode==0.04.19
+pypandoc==1.3.3
diff --git a/dev/run-pip-tests b/dev/run-pip-tests
index e1da18e60bb3d..af1b1feb70cd1 100755
--- a/dev/run-pip-tests
+++ b/dev/run-pip-tests
@@ -78,11 +78,14 @@ for python in "${PYTHON_EXECS[@]}"; do
     mkdir -p "$VIRTUALENV_PATH"
     virtualenv --python=$python "$VIRTUALENV_PATH"
     source "$VIRTUALENV_PATH"/bin/activate
-    # Upgrade pip
-    pip install --upgrade pip
+    # Upgrade pip & friends
+    pip install --upgrade pip pypandoc wheel
+    pip install numpy # Needed so we can verify mllib imports
 
     echo "Creating pip installable source dist"
     cd "$FWDIR"/python
+    # Delete the egg info file if it exists, this can cache the setup file.
+    rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion"
     $python setup.py sdist
 
 
diff --git a/python/setup.py b/python/setup.py
index bc2eb4ce9dbd0..47eab98e0f7b3 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -162,7 +162,12 @@ def _supports_symlinks():
         url='https://github.com/apache/spark/tree/master/python',
         packages=['pyspark',
                   'pyspark.mllib',
+                  'pyspark.mllib.linalg',
+                  'pyspark.mllib.stat',
                   'pyspark.ml',
+                  'pyspark.ml.linalg',
+                  'pyspark.ml.param',
+                  'pyspark.ml.stat',
                   'pyspark.sql',
                   'pyspark.streaming',
                   'pyspark.bin',

From 0d7e38524245e30f29c67aaeb5be67954a0a500c Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Wed, 25 Jan 2017 17:17:34 -0800
Subject: [PATCH 0481/1204] [SPARK-14804][SPARK][GRAPHX] Fix checkpointing of
 VertexRDD/EdgeRDD

## What changes were proposed in this pull request?

EdgeRDD/VertexRDD overrides checkpoint() and isCheckpointed() to forward these to the internal partitionRDD. So when checkpoint() is called on them, its the partitionRDD that actually gets checkpointed. However since isCheckpointed() also overridden to call partitionRDD.isCheckpointed, EdgeRDD/VertexRDD.isCheckpointed returns true even though this RDD is actually not checkpointed.

This would have been fine except the RDD's internal logic for computing the RDD depends on isCheckpointed(). So for VertexRDD/EdgeRDD, since isCheckpointed is true, when computing Spark tries to read checkpoint data of VertexRDD/EdgeRDD even though they are not actually checkpointed. Through a crazy sequence of call forwarding, it reads checkpoint data of partitionsRDD and tries to cast it to types in Vertex/EdgeRDD. This leads to ClassCastException.

The minimal fix that does not change any public behavior is to modify RDD internal to not use public override-able API for internal logic.
## How was this patch tested?

New unit tests.

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #15396 from tdas/SPARK-14804.

(cherry picked from commit 47d5d0ddb06c7d2c86515d9556c41dc80081f560)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../main/scala/org/apache/spark/rdd/RDD.scala |  5 ++--
 .../apache/spark/graphx/EdgeRDDSuite.scala    | 27 +++++++++++++++++++
 .../apache/spark/graphx/VertexRDDSuite.scala  | 26 ++++++++++++++++++
 3 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 374abccf6ad55..66a773dcf60f5 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -1610,14 +1610,15 @@ abstract class RDD[T: ClassTag](
   /**
    * Return whether this RDD is checkpointed and materialized, either reliably or locally.
    */
-  def isCheckpointed: Boolean = checkpointData.exists(_.isCheckpointed)
+  def isCheckpointed: Boolean = isCheckpointedAndMaterialized
 
   /**
    * Return whether this RDD is checkpointed and materialized, either reliably or locally.
    * This is introduced as an alias for `isCheckpointed` to clarify the semantics of the
    * return value. Exposed for testing.
    */
-  private[spark] def isCheckpointedAndMaterialized: Boolean = isCheckpointed
+  private[spark] def isCheckpointedAndMaterialized: Boolean =
+    checkpointData.exists(_.isCheckpointed)
 
   /**
    * Return whether this RDD is marked for local checkpointing.
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/EdgeRDDSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/EdgeRDDSuite.scala
index f1ecc9e2219d1..7a24e320c3e04 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/EdgeRDDSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/EdgeRDDSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.graphx
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.Utils
 
 class EdgeRDDSuite extends SparkFunSuite with LocalSparkContext {
 
@@ -33,4 +34,30 @@ class EdgeRDDSuite extends SparkFunSuite with LocalSparkContext {
     }
   }
 
+  test("checkpointing") {
+    withSpark { sc =>
+      val verts = sc.parallelize(List((0L, 0), (1L, 1), (1L, 2), (2L, 3), (2L, 3), (2L, 3)))
+      val edges = EdgeRDD.fromEdges(sc.parallelize(List.empty[Edge[Int]]))
+      sc.setCheckpointDir(Utils.createTempDir().getCanonicalPath)
+      edges.checkpoint()
+
+      // EdgeRDD not yet checkpointed
+      assert(!edges.isCheckpointed)
+      assert(!edges.isCheckpointedAndMaterialized)
+      assert(!edges.partitionsRDD.isCheckpointed)
+      assert(!edges.partitionsRDD.isCheckpointedAndMaterialized)
+
+      val data = edges.collect().toSeq // force checkpointing
+
+      // EdgeRDD shows up as checkpointed, but internally it is not.
+      // Only internal partitionsRDD is checkpointed.
+      assert(edges.isCheckpointed)
+      assert(!edges.isCheckpointedAndMaterialized)
+      assert(edges.partitionsRDD.isCheckpointed)
+      assert(edges.partitionsRDD.isCheckpointedAndMaterialized)
+
+      assert(edges.collect().toSeq ===  data) // test checkpointed RDD
+    }
+  }
+
 }
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala
index 0bb9e0a3ea180..8e630435279de 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.graphx
 import org.apache.spark.{HashPartitioner, SparkContext, SparkFunSuite}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.Utils
 
 class VertexRDDSuite extends SparkFunSuite with LocalSparkContext {
 
@@ -197,4 +198,29 @@ class VertexRDDSuite extends SparkFunSuite with LocalSparkContext {
     }
   }
 
+  test("checkpoint") {
+    withSpark { sc =>
+      val n = 100
+      val verts = vertices(sc, n)
+      sc.setCheckpointDir(Utils.createTempDir().getCanonicalPath)
+      verts.checkpoint()
+
+      // VertexRDD not yet checkpointed
+      assert(!verts.isCheckpointed)
+      assert(!verts.isCheckpointedAndMaterialized)
+      assert(!verts.partitionsRDD.isCheckpointed)
+      assert(!verts.partitionsRDD.isCheckpointedAndMaterialized)
+
+      val data = verts.collect().toSeq // force checkpointing
+
+      // VertexRDD shows up as checkpointed, but internally it is not.
+      // Only internal partitionsRDD is checkpointed.
+      assert(verts.isCheckpointed)
+      assert(!verts.isCheckpointedAndMaterialized)
+      assert(verts.partitionsRDD.isCheckpointed)
+      assert(verts.partitionsRDD.isCheckpointedAndMaterialized)
+
+      assert(verts.collect().toSeq === data) // test checkpointed RDD
+    }
+  }
 }

From b12a76a411cf49baf53e265a194ba41adfb8d9f4 Mon Sep 17 00:00:00 2001
From: Takeshi YAMAMURO <linguin.m.s@gmail.com>
Date: Thu, 26 Jan 2017 09:50:42 -0800
Subject: [PATCH 0482/1204] [SPARK-19338][SQL] Add UDF names in explain

## What changes were proposed in this pull request?
This pr added a variable for a UDF name in `ScalaUDF`.
Then, if the variable filled, `DataFrame#explain` prints the name.

## How was this patch tested?
Added a test in `UDFSuite`.

Author: Takeshi YAMAMURO <linguin.m.s@gmail.com>

Closes #16707 from maropu/SPARK-19338.

(cherry picked from commit 9f523d3192c71a728fd8a2a64f52bbc337f2f026)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  2 +-
 .../sql/catalyst/expressions/ScalaUDF.scala   |  7 ++-
 .../apache/spark/sql/UDFRegistration.scala    | 48 +++++++++----------
 .../scala/org/apache/spark/sql/UDFSuite.scala | 14 ++++++
 4 files changed, 44 insertions(+), 27 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index f87399698d29d..441c891b2c51a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1904,7 +1904,7 @@ class Analyzer(
 
       case p => p transformExpressionsUp {
 
-        case udf @ ScalaUDF(func, _, inputs, _) =>
+        case udf @ ScalaUDF(func, _, inputs, _, _) =>
           val parameterTypes = ScalaReflection.getParameterTypes(func)
           assert(parameterTypes.length == inputs.length)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
index 6cfdea9fdf9c5..228f4b756c8b4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
@@ -35,17 +35,20 @@ import org.apache.spark.sql.types.DataType
  *                    not want to perform coercion, simply use "Nil". Note that it would've been
  *                    better to use Option of Seq[DataType] so we can use "None" as the case for no
  *                    type coercion. However, that would require more refactoring of the codebase.
+ * @param udfName   The user-specified name of this UDF.
  */
 case class ScalaUDF(
     function: AnyRef,
     dataType: DataType,
     children: Seq[Expression],
-    inputTypes: Seq[DataType] = Nil)
+    inputTypes: Seq[DataType] = Nil,
+    udfName: Option[String] = None)
   extends Expression with ImplicitCastInputTypes with NonSQLExpression {
 
   override def nullable: Boolean = true
 
-  override def toString: String = s"UDF(${children.mkString(", ")})"
+  override def toString: String =
+    s"${udfName.map(name => s"UDF:$name").getOrElse("UDF")}(${children.mkString(", ")})"
 
   // scalastyle:off line.size.limit
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
index 14b1e874966f7..7abfa4ea37a74 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -92,7 +92,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
         def register[$typeTags](name: String, func: Function$x[$types]): UserDefinedFunction = {
           val dataType = ScalaReflection.schemaFor[RT].dataType
           val inputTypes = Try($inputTypes).toOption
-          def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil))
+          def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name))
           functionRegistry.registerFunction(name, builder)
           UserDefinedFunction(func, dataType, inputTypes)
         }""")
@@ -125,7 +125,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag](name: String, func: Function0[RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
     val inputTypes = Try(Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil))
+    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name))
     functionRegistry.registerFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes)
   }
@@ -138,7 +138,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag](name: String, func: Function1[A1, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil))
+    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name))
     functionRegistry.registerFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes)
   }
@@ -151,7 +151,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag](name: String, func: Function2[A1, A2, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil))
+    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name))
     functionRegistry.registerFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes)
   }
@@ -164,7 +164,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag](name: String, func: Function3[A1, A2, A3, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil))
+    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name))
     functionRegistry.registerFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes)
   }
@@ -177,7 +177,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag](name: String, func: Function4[A1, A2, A3, A4, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil))
+    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name))
     functionRegistry.registerFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes)
   }
@@ -190,7 +190,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag](name: String, func: Function5[A1, A2, A3, A4, A5, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil))
+    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name))
     functionRegistry.registerFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes)
   }
@@ -203,7 +203,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag](name: String, func: Function6[A1, A2, A3, A4, A5, A6, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil))
+    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name))
     functionRegistry.registerFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes)
   }
@@ -216,7 +216,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag](name: String, func: Function7[A1, A2, A3, A4, A5, A6, A7, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil))
+    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name))
     functionRegistry.registerFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes)
   }
@@ -229,7 +229,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag](name: String, func: Function8[A1, A2, A3, A4, A5, A6, A7, A8, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil))
+    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name))
     functionRegistry.registerFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes)
   }
@@ -242,7 +242,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag](name: String, func: Function9[A1, A2, A3, A4, A5, A6, A7, A8, A9, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil))
+    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name))
     functionRegistry.registerFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes)
   }
@@ -255,7 +255,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag](name: String, func: Function10[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil))
+    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name))
     functionRegistry.registerFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes)
   }
@@ -268,7 +268,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag](name: String, func: Function11[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil))
+    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name))
     functionRegistry.registerFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes)
   }
@@ -281,7 +281,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag](name: String, func: Function12[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil))
+    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name))
     functionRegistry.registerFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes)
   }
@@ -294,7 +294,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag](name: String, func: Function13[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil))
+    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name))
     functionRegistry.registerFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes)
   }
@@ -307,7 +307,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag](name: String, func: Function14[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil))
+    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name))
     functionRegistry.registerFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes)
   }
@@ -320,7 +320,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag](name: String, func: Function15[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil))
+    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name))
     functionRegistry.registerFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes)
   }
@@ -333,7 +333,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag](name: String, func: Function16[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: ScalaReflection.schemaFor[A16].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil))
+    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name))
     functionRegistry.registerFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes)
   }
@@ -346,7 +346,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag](name: String, func: Function17[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: ScalaReflection.schemaFor[A16].dataType :: ScalaReflection.schemaFor[A17].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil))
+    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name))
     functionRegistry.registerFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes)
   }
@@ -359,7 +359,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag](name: String, func: Function18[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: ScalaReflection.schemaFor[A16].dataType :: ScalaReflection.schemaFor[A17].dataType :: ScalaReflection.schemaFor[A18].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil))
+    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name))
     functionRegistry.registerFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes)
   }
@@ -372,7 +372,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag](name: String, func: Function19[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: ScalaReflection.schemaFor[A16].dataType :: ScalaReflection.schemaFor[A17].dataType :: ScalaReflection.schemaFor[A18].dataType :: ScalaReflection.schemaFor[A19].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil))
+    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name))
     functionRegistry.registerFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes)
   }
@@ -385,7 +385,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag](name: String, func: Function20[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: ScalaReflection.schemaFor[A16].dataType :: ScalaReflection.schemaFor[A17].dataType :: ScalaReflection.schemaFor[A18].dataType :: ScalaReflection.schemaFor[A19].dataType :: ScalaReflection.schemaFor[A20].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil))
+    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name))
     functionRegistry.registerFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes)
   }
@@ -398,7 +398,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag, A21: TypeTag](name: String, func: Function21[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: ScalaReflection.schemaFor[A16].dataType :: ScalaReflection.schemaFor[A17].dataType :: ScalaReflection.schemaFor[A18].dataType :: ScalaReflection.schemaFor[A19].dataType :: ScalaReflection.schemaFor[A20].dataType :: ScalaReflection.schemaFor[A21].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil))
+    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name))
     functionRegistry.registerFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes)
   }
@@ -411,7 +411,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag, A21: TypeTag, A22: TypeTag](name: String, func: Function22[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, A22, RT]): UserDefinedFunction = {
     val dataType = ScalaReflection.schemaFor[RT].dataType
     val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: ScalaReflection.schemaFor[A16].dataType :: ScalaReflection.schemaFor[A17].dataType :: ScalaReflection.schemaFor[A18].dataType :: ScalaReflection.schemaFor[A19].dataType :: ScalaReflection.schemaFor[A20].dataType :: ScalaReflection.schemaFor[A21].dataType :: ScalaReflection.schemaFor[A22].dataType :: Nil).toOption
-    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil))
+    def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name))
     functionRegistry.registerFunction(name, builder)
     UserDefinedFunction(func, dataType, inputTypes)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
index 547d3c1abe858..a070fc803ecdb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql
 
+import org.apache.spark.sql.execution.command.ExplainCommand
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.test.SQLTestData._
 
@@ -248,4 +249,17 @@ class UDFSuite extends QueryTest with SharedSQLContext {
       sql("SELECT tmp.t.* FROM (SELECT testDataFunc(a, b) AS t from testData2) tmp").toDF(),
       testData2)
   }
+
+  test("SPARK-19338 Provide identical names for UDFs in the EXPLAIN output") {
+    def explainStr(df: DataFrame): String = {
+      val explain = ExplainCommand(df.queryExecution.logical, extended = false)
+      val sparkPlan = spark.sessionState.executePlan(explain).executedPlan
+      sparkPlan.executeCollect().map(_.getString(0).trim).headOption.getOrElse("")
+    }
+    val udf1 = "myUdf1"
+    val udf2 = "myUdf2"
+    spark.udf.register(udf1, (n: Int) => { n + 1 })
+    spark.udf.register(udf2, (n: Int) => { n * 1 })
+    assert(explainStr(sql("SELECT myUdf1(myUdf2(1))")).contains(s"UDF:$udf1(UDF:$udf2(1))"))
+  }
 }

From 59502bbcf6e64e5b5e3dda080441054afaf58c53 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Thu, 26 Jan 2017 16:53:28 -0800
Subject: [PATCH 0483/1204] [SPARK-19220][UI] Make redirection to HTTPS apply
 to all URIs. (branch-2.1)

The redirect handler was installed only for the root of the server;
any other context ended up being served directly through the HTTP
port. Since every sub page (e.g. application UIs in the history
server) is a separate servlet context, this meant that everything
but the root was accessible via HTTP still.

The change adds separate names to each connector, and binds contexts
to specific connectors so that content is only served through the
HTTPS connector when it's enabled. In that case, the only thing that
binds to the HTTP connector is the redirect handler.

Tested with new unit tests and by checking a live history server.

(cherry picked from commit d3dcb63b9709a34337327be9b7d3705698716077)

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #16711 from vanzin/SPARK-19220_2.1.
---
 .../scala/org/apache/spark/TestUtils.scala    | 38 +++++++-
 .../org/apache/spark/ui/JettyUtils.scala      | 87 +++++++++++++------
 .../scala/org/apache/spark/ui/WebUI.scala     | 14 +--
 .../org/apache/spark/ui/UISeleniumSuite.scala | 19 +---
 .../scala/org/apache/spark/ui/UISuite.scala   | 56 +++++++++++-
 5 files changed, 155 insertions(+), 59 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/TestUtils.scala b/core/src/main/scala/org/apache/spark/TestUtils.scala
index 2909191bd6f14..7d866f89fc6e2 100644
--- a/core/src/main/scala/org/apache/spark/TestUtils.scala
+++ b/core/src/main/scala/org/apache/spark/TestUtils.scala
@@ -18,19 +18,22 @@
 package org.apache.spark
 
 import java.io.{ByteArrayInputStream, File, FileInputStream, FileOutputStream}
-import java.net.{URI, URL}
+import java.net.{HttpURLConnection, URI, URL}
 import java.nio.charset.StandardCharsets
 import java.nio.file.Paths
+import java.security.SecureRandom
+import java.security.cert.X509Certificate
 import java.util.Arrays
 import java.util.concurrent.{CountDownLatch, TimeUnit}
 import java.util.jar.{JarEntry, JarOutputStream}
+import javax.net.ssl._
+import javax.tools.{JavaFileObject, SimpleJavaFileObject, ToolProvider}
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 
 import com.google.common.io.{ByteStreams, Files}
-import javax.tools.{JavaFileObject, SimpleJavaFileObject, ToolProvider}
 
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.scheduler._
@@ -182,6 +185,37 @@ private[spark] object TestUtils {
     assert(spillListener.numSpilledStages == 0, s"expected $identifier to not spill, but did")
   }
 
+  /**
+   * Returns the response code from an HTTP(S) URL.
+   */
+  def httpResponseCode(url: URL, method: String = "GET"): Int = {
+    val connection = url.openConnection().asInstanceOf[HttpURLConnection]
+    connection.setRequestMethod(method)
+
+    // Disable cert and host name validation for HTTPS tests.
+    if (connection.isInstanceOf[HttpsURLConnection]) {
+      val sslCtx = SSLContext.getInstance("SSL")
+      val trustManager = new X509TrustManager {
+        override def getAcceptedIssuers(): Array[X509Certificate] = null
+        override def checkClientTrusted(x509Certificates: Array[X509Certificate], s: String) {}
+        override def checkServerTrusted(x509Certificates: Array[X509Certificate], s: String) {}
+      }
+      val verifier = new HostnameVerifier() {
+        override def verify(hostname: String, session: SSLSession): Boolean = true
+      }
+      sslCtx.init(null, Array(trustManager), new SecureRandom())
+      connection.asInstanceOf[HttpsURLConnection].setSSLSocketFactory(sslCtx.getSocketFactory())
+      connection.asInstanceOf[HttpsURLConnection].setHostnameVerifier(verifier)
+    }
+
+    try {
+      connection.connect()
+      connection.getResponseCode()
+    } finally {
+      connection.disconnect()
+    }
+  }
+
 }
 
 
diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
index 35c3c8d00f99b..f713619cd7ec4 100644
--- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
@@ -45,6 +45,9 @@ import org.apache.spark.util.Utils
  */
 private[spark] object JettyUtils extends Logging {
 
+  val SPARK_CONNECTOR_NAME = "Spark"
+  val REDIRECT_CONNECTOR_NAME = "HttpsRedirect"
+
   // Base type for a function that returns something based on an HTTP request. Allows for
   // implicit conversion from many types of functions to jetty Handlers.
   type Responder[T] = HttpServletRequest => T
@@ -274,17 +277,18 @@ private[spark] object JettyUtils extends Logging {
       conf: SparkConf,
       serverName: String = ""): ServerInfo = {
 
-    val collection = new ContextHandlerCollection
     addFilters(handlers, conf)
 
     val gzipHandlers = handlers.map { h =>
+      h.setVirtualHosts(Array("@" + SPARK_CONNECTOR_NAME))
+
       val gzipHandler = new GzipHandler
       gzipHandler.setHandler(h)
       gzipHandler
     }
 
     // Bind to the given port, or throw a java.net.BindException if the port is occupied
-    def connect(currentPort: Int): (Server, Int) = {
+    def connect(currentPort: Int): ((Server, Option[Int]), Int) = {
       val pool = new QueuedThreadPool
       if (serverName.nonEmpty) {
         pool.setName(serverName)
@@ -292,7 +296,9 @@ private[spark] object JettyUtils extends Logging {
       pool.setDaemon(true)
 
       val server = new Server(pool)
-      val connectors = new ArrayBuffer[ServerConnector]
+      val connectors = new ArrayBuffer[ServerConnector]()
+      val collection = new ContextHandlerCollection
+
       // Create a connector on port currentPort to listen for HTTP requests
       val httpConnector = new ServerConnector(
         server,
@@ -306,26 +312,33 @@ private[spark] object JettyUtils extends Logging {
       httpConnector.setPort(currentPort)
       connectors += httpConnector
 
-      sslOptions.createJettySslContextFactory().foreach { factory =>
-        // If the new port wraps around, do not try a privileged port.
-        val securePort =
-          if (currentPort != 0) {
-            (currentPort + 400 - 1024) % (65536 - 1024) + 1024
-          } else {
-            0
-          }
-        val scheme = "https"
-        // Create a connector on port securePort to listen for HTTPS requests
-        val connector = new ServerConnector(server, factory)
-        connector.setPort(securePort)
-
-        connectors += connector
-
-        // redirect the HTTP requests to HTTPS port
-        collection.addHandler(createRedirectHttpsHandler(securePort, scheme))
+      val httpsConnector = sslOptions.createJettySslContextFactory() match {
+        case Some(factory) =>
+          // If the new port wraps around, do not try a privileged port.
+          val securePort =
+            if (currentPort != 0) {
+              (currentPort + 400 - 1024) % (65536 - 1024) + 1024
+            } else {
+              0
+            }
+          val scheme = "https"
+          // Create a connector on port securePort to listen for HTTPS requests
+          val connector = new ServerConnector(server, factory)
+          connector.setPort(securePort)
+          connector.setName(SPARK_CONNECTOR_NAME)
+          connectors += connector
+
+          // redirect the HTTP requests to HTTPS port
+          httpConnector.setName(REDIRECT_CONNECTOR_NAME)
+          collection.addHandler(createRedirectHttpsHandler(securePort, scheme))
+          Some(connector)
+
+        case None =>
+          // No SSL, so the HTTP connector becomes the official one where all contexts bind.
+          httpConnector.setName(SPARK_CONNECTOR_NAME)
+          None
       }
 
-      gzipHandlers.foreach(collection.addHandler)
       // As each acceptor and each selector will use one thread, the number of threads should at
       // least be the number of acceptors and selectors plus 1. (See SPARK-13776)
       var minThreads = 1
@@ -337,17 +350,20 @@ private[spark] object JettyUtils extends Logging {
         // The number of selectors always equals to the number of acceptors
         minThreads += connector.getAcceptors * 2
       }
-      server.setConnectors(connectors.toArray)
       pool.setMaxThreads(math.max(pool.getMaxThreads, minThreads))
 
       val errorHandler = new ErrorHandler()
       errorHandler.setShowStacks(true)
       errorHandler.setServer(server)
       server.addBean(errorHandler)
+
+      gzipHandlers.foreach(collection.addHandler)
       server.setHandler(collection)
+
+      server.setConnectors(connectors.toArray)
       try {
         server.start()
-        (server, httpConnector.getLocalPort)
+        ((server, httpsConnector.map(_.getLocalPort())), httpConnector.getLocalPort)
       } catch {
         case e: Exception =>
           server.stop()
@@ -356,13 +372,16 @@ private[spark] object JettyUtils extends Logging {
       }
     }
 
-    val (server, boundPort) = Utils.startServiceOnPort[Server](port, connect, conf, serverName)
-    ServerInfo(server, boundPort, collection)
+    val ((server, securePort), boundPort) = Utils.startServiceOnPort(port, connect, conf,
+      serverName)
+    ServerInfo(server, boundPort, securePort,
+      server.getHandler().asInstanceOf[ContextHandlerCollection])
   }
 
   private def createRedirectHttpsHandler(securePort: Int, scheme: String): ContextHandler = {
     val redirectHandler: ContextHandler = new ContextHandler
     redirectHandler.setContextPath("/")
+    redirectHandler.setVirtualHosts(Array("@" + REDIRECT_CONNECTOR_NAME))
     redirectHandler.setHandler(new AbstractHandler {
       override def handle(
           target: String,
@@ -442,7 +461,23 @@ private[spark] object JettyUtils extends Logging {
 private[spark] case class ServerInfo(
     server: Server,
     boundPort: Int,
-    rootHandler: ContextHandlerCollection) {
+    securePort: Option[Int],
+    private val rootHandler: ContextHandlerCollection) {
+
+  def addHandler(handler: ContextHandler): Unit = {
+    handler.setVirtualHosts(Array("@" + JettyUtils.SPARK_CONNECTOR_NAME))
+    rootHandler.addHandler(handler)
+    if (!handler.isStarted()) {
+      handler.start()
+    }
+  }
+
+  def removeHandler(handler: ContextHandler): Unit = {
+    rootHandler.removeHandler(handler)
+    if (handler.isStarted) {
+      handler.stop()
+    }
+  }
 
   def stop(): Unit = {
     server.stop()
diff --git a/core/src/main/scala/org/apache/spark/ui/WebUI.scala b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
index 4118fcf46b428..b1a6ef2ffebdb 100644
--- a/core/src/main/scala/org/apache/spark/ui/WebUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
@@ -91,23 +91,13 @@ private[spark] abstract class WebUI(
   /** Attach a handler to this UI. */
   def attachHandler(handler: ServletContextHandler) {
     handlers += handler
-    serverInfo.foreach { info =>
-      info.rootHandler.addHandler(handler)
-      if (!handler.isStarted) {
-        handler.start()
-      }
-    }
+    serverInfo.foreach(_.addHandler(handler))
   }
 
   /** Detach a handler from this UI. */
   def detachHandler(handler: ServletContextHandler) {
     handlers -= handler
-    serverInfo.foreach { info =>
-      info.rootHandler.removeHandler(handler)
-      if (handler.isStarted) {
-        handler.stop()
-      }
-    }
+    serverInfo.foreach(_.removeHandler(handler))
   }
 
   /**
diff --git a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
index e5d408a167361..6e734d7f9f8dc 100644
--- a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
@@ -475,8 +475,8 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
         val url = new URL(
           sc.ui.get.appUIAddress.stripSuffix("/") + "/stages/stage/kill/?id=0")
         // SPARK-6846: should be POST only but YARN AM doesn't proxy POST
-        getResponseCode(url, "GET") should be (200)
-        getResponseCode(url, "POST") should be (200)
+        TestUtils.httpResponseCode(url, "GET") should be (200)
+        TestUtils.httpResponseCode(url, "POST") should be (200)
       }
     }
   }
@@ -488,8 +488,8 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
         val url = new URL(
           sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs/job/kill/?id=0")
         // SPARK-6846: should be POST only but YARN AM doesn't proxy POST
-        getResponseCode(url, "GET") should be (200)
-        getResponseCode(url, "POST") should be (200)
+        TestUtils.httpResponseCode(url, "GET") should be (200)
+        TestUtils.httpResponseCode(url, "POST") should be (200)
       }
     }
   }
@@ -671,17 +671,6 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
     }
   }
 
-  def getResponseCode(url: URL, method: String): Int = {
-    val connection = url.openConnection().asInstanceOf[HttpURLConnection]
-    connection.setRequestMethod(method)
-    try {
-      connection.connect()
-      connection.getResponseCode()
-    } finally {
-      connection.disconnect()
-    }
-  }
-
   def goToUi(sc: SparkContext, path: String): Unit = {
     goToUi(sc.ui.get, path)
   }
diff --git a/core/src/test/scala/org/apache/spark/ui/UISuite.scala b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
index 4abcfb7e51914..7c3d891047dec 100644
--- a/core/src/test/scala/org/apache/spark/ui/UISuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
@@ -18,12 +18,12 @@
 package org.apache.spark.ui
 
 import java.net.{BindException, ServerSocket}
-import java.net.URI
-import javax.servlet.http.HttpServletRequest
+import java.net.{URI, URL}
+import javax.servlet.http.{HttpServlet, HttpServletRequest, HttpServletResponse}
 
 import scala.io.Source
 
-import org.eclipse.jetty.servlet.ServletContextHandler
+import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder}
 import org.mockito.Mockito.{mock, when}
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.time.SpanSugar._
@@ -167,6 +167,7 @@ class UISuite extends SparkFunSuite {
       val boundPort = serverInfo.boundPort
       assert(server.getState === "STARTED")
       assert(boundPort != 0)
+      assert(serverInfo.securePort.isDefined)
       intercept[BindException] {
         socket = new ServerSocket(boundPort)
       }
@@ -228,8 +229,55 @@ class UISuite extends SparkFunSuite {
     assert(newHeader === null)
   }
 
+  test("http -> https redirect applies to all URIs") {
+    var serverInfo: ServerInfo = null
+    try {
+      val servlet = new HttpServlet() {
+        override def doGet(req: HttpServletRequest, res: HttpServletResponse): Unit = {
+          res.sendError(HttpServletResponse.SC_OK)
+        }
+      }
+
+      def newContext(path: String): ServletContextHandler = {
+        val ctx = new ServletContextHandler()
+        ctx.setContextPath(path)
+        ctx.addServlet(new ServletHolder(servlet), "/root")
+        ctx
+      }
+
+      val (conf, sslOptions) = sslEnabledConf()
+      serverInfo = JettyUtils.startJettyServer("0.0.0.0", 0, sslOptions,
+        Seq[ServletContextHandler](newContext("/"), newContext("/test1")),
+        conf)
+      assert(serverInfo.server.getState === "STARTED")
+
+      val testContext = newContext("/test2")
+      serverInfo.addHandler(testContext)
+      testContext.start()
+
+      val httpPort = serverInfo.boundPort
+
+      val tests = Seq(
+        ("http", serverInfo.boundPort, HttpServletResponse.SC_FOUND),
+        ("https", serverInfo.securePort.get, HttpServletResponse.SC_OK))
+
+      tests.foreach { case (scheme, port, expected) =>
+        val urls = Seq(
+          s"$scheme://localhost:$port/root",
+          s"$scheme://localhost:$port/test1/root",
+          s"$scheme://localhost:$port/test2/root")
+        urls.foreach { url =>
+          val rc = TestUtils.httpResponseCode(new URL(url))
+          assert(rc === expected, s"Unexpected status $rc for $url")
+        }
+      }
+    } finally {
+      stopServer(serverInfo)
+    }
+  }
+
   def stopServer(info: ServerInfo): Unit = {
-    if (info != null && info.server != null) info.server.stop
+    if (info != null) info.stop()
   }
 
   def closeSocket(socket: ServerSocket): Unit = {

From ba2a5ada4825a9ca3e4e954a51574a2eede096a3 Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Thu, 26 Jan 2017 21:06:39 -0800
Subject: [PATCH 0484/1204] [SPARK-18788][SPARKR] Add API for getNumPartitions

## What changes were proposed in this pull request?

With doc to say this would convert DF into RDD

## How was this patch tested?

unit tests, manual tests

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #16668 from felixcheung/rgetnumpartitions.

(cherry picked from commit 90817a6cd06068fa9f9ff77384a1fcba73b43006)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/NAMESPACE                           |  1 +
 R/pkg/R/DataFrame.R                       | 23 +++++++++++++++++
 R/pkg/R/RDD.R                             | 30 +++++++++++------------
 R/pkg/R/generics.R                        |  8 ++++--
 R/pkg/R/pairRDD.R                         |  4 +--
 R/pkg/inst/tests/testthat/test_rdd.R      | 10 ++++----
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 14 +++++------
 7 files changed, 59 insertions(+), 31 deletions(-)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index c3ec3f4fb1bae..8a19fd0e927ad 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -94,6 +94,7 @@ exportMethods("arrange",
               "freqItems",
               "gapply",
               "gapplyCollect",
+              "getNumPartitions",
               "group_by",
               "groupBy",
               "head",
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 48ac307714392..39e8376808f61 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -3422,3 +3422,26 @@ setMethod("randomSplit",
             }
             sapply(sdfs, dataFrame)
           })
+
+#' getNumPartitions
+#'
+#' Return the number of partitions
+#'
+#' @param x A SparkDataFrame
+#' @family SparkDataFrame functions
+#' @aliases getNumPartitions,SparkDataFrame-method
+#' @rdname getNumPartitions
+#' @name getNumPartitions
+#' @export
+#' @examples
+#'\dontrun{
+#' sparkR.session()
+#' df <- createDataFrame(cars, numPartitions = 2)
+#' getNumPartitions(df)
+#' }
+#' @note getNumPartitions since 2.1.1
+setMethod("getNumPartitions",
+          signature(x = "SparkDataFrame"),
+          function(x) {
+            callJMethod(callJMethod(x@sdf, "rdd"), "getNumPartitions")
+          })
diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R
index 0f1162fec1df9..91bab332c2864 100644
--- a/R/pkg/R/RDD.R
+++ b/R/pkg/R/RDD.R
@@ -313,7 +313,7 @@ setMethod("checkpoint",
 #' @rdname getNumPartitions
 #' @aliases getNumPartitions,RDD-method
 #' @noRd
-setMethod("getNumPartitions",
+setMethod("getNumPartitionsRDD",
           signature(x = "RDD"),
           function(x) {
             callJMethod(getJRDD(x), "getNumPartitions")
@@ -329,7 +329,7 @@ setMethod("numPartitions",
           signature(x = "RDD"),
           function(x) {
             .Deprecated("getNumPartitions")
-            getNumPartitions(x)
+            getNumPartitionsRDD(x)
           })
 
 #' Collect elements of an RDD
@@ -460,7 +460,7 @@ setMethod("countByValue",
           signature(x = "RDD"),
           function(x) {
             ones <- lapply(x, function(item) { list(item, 1L) })
-            collectRDD(reduceByKey(ones, `+`, getNumPartitions(x)))
+            collectRDD(reduceByKey(ones, `+`, getNumPartitionsRDD(x)))
           })
 
 #' Apply a function to all elements
@@ -780,7 +780,7 @@ setMethod("takeRDD",
             resList <- list()
             index <- -1
             jrdd <- getJRDD(x)
-            numPartitions <- getNumPartitions(x)
+            numPartitions <- getNumPartitionsRDD(x)
             serializedModeRDD <- getSerializedMode(x)
 
             # TODO(shivaram): Collect more than one partition based on size
@@ -846,7 +846,7 @@ setMethod("firstRDD",
 #' @noRd
 setMethod("distinctRDD",
           signature(x = "RDD"),
-          function(x, numPartitions = SparkR:::getNumPartitions(x)) {
+          function(x, numPartitions = SparkR:::getNumPartitionsRDD(x)) {
             identical.mapped <- lapply(x, function(x) { list(x, NULL) })
             reduced <- reduceByKey(identical.mapped,
                                    function(x, y) { x },
@@ -1053,7 +1053,7 @@ setMethod("coalesce",
            signature(x = "RDD", numPartitions = "numeric"),
            function(x, numPartitions, shuffle = FALSE) {
              numPartitions <- numToInt(numPartitions)
-             if (shuffle || numPartitions > SparkR:::getNumPartitions(x)) {
+             if (shuffle || numPartitions > SparkR:::getNumPartitionsRDD(x)) {
                func <- function(partIndex, part) {
                  set.seed(partIndex)  # partIndex as seed
                  start <- as.integer(base::sample(numPartitions, 1) - 1)
@@ -1143,7 +1143,7 @@ setMethod("saveAsTextFile",
 #' @noRd
 setMethod("sortBy",
           signature(x = "RDD", func = "function"),
-          function(x, func, ascending = TRUE, numPartitions = SparkR:::getNumPartitions(x)) {
+          function(x, func, ascending = TRUE, numPartitions = SparkR:::getNumPartitionsRDD(x)) {
             values(sortByKey(keyBy(x, func), ascending, numPartitions))
           })
 
@@ -1175,7 +1175,7 @@ takeOrderedElem <- function(x, num, ascending = TRUE) {
   resList <- list()
   index <- -1
   jrdd <- getJRDD(newRdd)
-  numPartitions <- getNumPartitions(newRdd)
+  numPartitions <- getNumPartitionsRDD(newRdd)
   serializedModeRDD <- getSerializedMode(newRdd)
 
   while (TRUE) {
@@ -1407,7 +1407,7 @@ setMethod("setName",
 setMethod("zipWithUniqueId",
           signature(x = "RDD"),
           function(x) {
-            n <- getNumPartitions(x)
+            n <- getNumPartitionsRDD(x)
 
             partitionFunc <- function(partIndex, part) {
               mapply(
@@ -1450,7 +1450,7 @@ setMethod("zipWithUniqueId",
 setMethod("zipWithIndex",
           signature(x = "RDD"),
           function(x) {
-            n <- getNumPartitions(x)
+            n <- getNumPartitionsRDD(x)
             if (n > 1) {
               nums <- collectRDD(lapplyPartition(x,
                                               function(part) {
@@ -1566,8 +1566,8 @@ setMethod("unionRDD",
 setMethod("zipRDD",
           signature(x = "RDD", other = "RDD"),
           function(x, other) {
-            n1 <- getNumPartitions(x)
-            n2 <- getNumPartitions(other)
+            n1 <- getNumPartitionsRDD(x)
+            n2 <- getNumPartitionsRDD(other)
             if (n1 != n2) {
               stop("Can only zip RDDs which have the same number of partitions.")
             }
@@ -1637,7 +1637,7 @@ setMethod("cartesian",
 #' @noRd
 setMethod("subtract",
           signature(x = "RDD", other = "RDD"),
-          function(x, other, numPartitions = SparkR:::getNumPartitions(x)) {
+          function(x, other, numPartitions = SparkR:::getNumPartitionsRDD(x)) {
             mapFunction <- function(e) { list(e, NA) }
             rdd1 <- map(x, mapFunction)
             rdd2 <- map(other, mapFunction)
@@ -1671,7 +1671,7 @@ setMethod("subtract",
 #' @noRd
 setMethod("intersection",
           signature(x = "RDD", other = "RDD"),
-          function(x, other, numPartitions = SparkR:::getNumPartitions(x)) {
+          function(x, other, numPartitions = SparkR:::getNumPartitionsRDD(x)) {
             rdd1 <- map(x, function(v) { list(v, NA) })
             rdd2 <- map(other, function(v) { list(v, NA) })
 
@@ -1714,7 +1714,7 @@ setMethod("zipPartitions",
             if (length(rrdds) == 1) {
               return(rrdds[[1]])
             }
-            nPart <- sapply(rrdds, getNumPartitions)
+            nPart <- sapply(rrdds, getNumPartitionsRDD)
             if (length(unique(nPart)) != 1) {
               stop("Can only zipPartitions RDDs which have the same number of partitions.")
             }
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 499c7b279ea9d..c6a324cd561cf 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -138,9 +138,9 @@ setGeneric("sumRDD", function(x) { standardGeneric("sumRDD") })
 # @export
 setGeneric("name", function(x) { standardGeneric("name") })
 
-# @rdname getNumPartitions
+# @rdname getNumPartitionsRDD
 # @export
-setGeneric("getNumPartitions", function(x) { standardGeneric("getNumPartitions") })
+setGeneric("getNumPartitionsRDD", function(x) { standardGeneric("getNumPartitionsRDD") })
 
 # @rdname getNumPartitions
 # @export
@@ -492,6 +492,10 @@ setGeneric("gapply", function(x, ...) { standardGeneric("gapply") })
 #' @export
 setGeneric("gapplyCollect", function(x, ...) { standardGeneric("gapplyCollect") })
 
+# @rdname getNumPartitions
+# @export
+setGeneric("getNumPartitions", function(x) { standardGeneric("getNumPartitions") })
+
 #' @rdname summary
 #' @export
 setGeneric("describe", function(x, col, ...) { standardGeneric("describe") })
diff --git a/R/pkg/R/pairRDD.R b/R/pkg/R/pairRDD.R
index 4dee3245f9b75..8fa21be3076b5 100644
--- a/R/pkg/R/pairRDD.R
+++ b/R/pkg/R/pairRDD.R
@@ -780,7 +780,7 @@ setMethod("cogroup",
 #' @noRd
 setMethod("sortByKey",
           signature(x = "RDD"),
-          function(x, ascending = TRUE, numPartitions = SparkR:::getNumPartitions(x)) {
+          function(x, ascending = TRUE, numPartitions = SparkR:::getNumPartitionsRDD(x)) {
             rangeBounds <- list()
 
             if (numPartitions > 1) {
@@ -850,7 +850,7 @@ setMethod("sortByKey",
 #' @noRd
 setMethod("subtractByKey",
           signature(x = "RDD", other = "RDD"),
-          function(x, other, numPartitions = SparkR:::getNumPartitions(x)) {
+          function(x, other, numPartitions = SparkR:::getNumPartitionsRDD(x)) {
             filterFunction <- function(elem) {
               iters <- elem[[2]]
               (length(iters[[1]]) > 0) && (length(iters[[2]]) == 0)
diff --git a/R/pkg/inst/tests/testthat/test_rdd.R b/R/pkg/inst/tests/testthat/test_rdd.R
index 2c41a6b075b47..ceb31bd896133 100644
--- a/R/pkg/inst/tests/testthat/test_rdd.R
+++ b/R/pkg/inst/tests/testthat/test_rdd.R
@@ -29,8 +29,8 @@ intPairs <- list(list(1L, -1), list(2L, 100), list(2L, 1), list(1L, 200))
 intRdd <- parallelize(sc, intPairs, 2L)
 
 test_that("get number of partitions in RDD", {
-  expect_equal(getNumPartitions(rdd), 2)
-  expect_equal(getNumPartitions(intRdd), 2)
+  expect_equal(getNumPartitionsRDD(rdd), 2)
+  expect_equal(getNumPartitionsRDD(intRdd), 2)
 })
 
 test_that("first on RDD", {
@@ -305,18 +305,18 @@ test_that("repartition/coalesce on RDDs", {
 
   # repartition
   r1 <- repartitionRDD(rdd, 2)
-  expect_equal(getNumPartitions(r1), 2L)
+  expect_equal(getNumPartitionsRDD(r1), 2L)
   count <- length(collectPartition(r1, 0L))
   expect_true(count >= 8 && count <= 12)
 
   r2 <- repartitionRDD(rdd, 6)
-  expect_equal(getNumPartitions(r2), 6L)
+  expect_equal(getNumPartitionsRDD(r2), 6L)
   count <- length(collectPartition(r2, 0L))
   expect_true(count >= 0 && count <= 4)
 
   # coalesce
   r3 <- coalesce(rdd, 1)
-  expect_equal(getNumPartitions(r3), 1L)
+  expect_equal(getNumPartitionsRDD(r3), 1L)
   count <- length(collectPartition(r3, 0L))
   expect_equal(count, 20)
 })
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 1f9daf5735374..2d0439e13e002 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -196,18 +196,18 @@ test_that("create DataFrame from RDD", {
   expect_equal(dtypes(df), list(c("name", "string"), c("age", "int"), c("height", "float")))
   expect_equal(as.list(collect(where(df, df$name == "John"))),
                list(name = "John", age = 19L, height = 176.5))
-  expect_equal(getNumPartitions(toRDD(df)), 1)
+  expect_equal(getNumPartitions(df), 1)
 
   df <- as.DataFrame(cars, numPartitions = 2)
-  expect_equal(getNumPartitions(toRDD(df)), 2)
+  expect_equal(getNumPartitions(df), 2)
   df <- createDataFrame(cars, numPartitions = 3)
-  expect_equal(getNumPartitions(toRDD(df)), 3)
+  expect_equal(getNumPartitions(df), 3)
   # validate limit by num of rows
   df <- createDataFrame(cars, numPartitions = 60)
-  expect_equal(getNumPartitions(toRDD(df)), 50)
+  expect_equal(getNumPartitions(df), 50)
   # validate when 1 < (length(coll) / numSlices) << length(coll)
   df <- createDataFrame(cars, numPartitions = 20)
-  expect_equal(getNumPartitions(toRDD(df)), 20)
+  expect_equal(getNumPartitions(df), 20)
 
   df <- as.DataFrame(data.frame(0))
   expect_is(df, "SparkDataFrame")
@@ -215,7 +215,7 @@ test_that("create DataFrame from RDD", {
   expect_is(df, "SparkDataFrame")
   df <- as.DataFrame(data.frame(0), numPartitions = 2)
   # no data to partition, goes to 1
-  expect_equal(getNumPartitions(toRDD(df)), 1)
+  expect_equal(getNumPartitions(df), 1)
 
   setHiveContext(sc)
   sql("CREATE TABLE people (name string, age double, height float)")
@@ -234,7 +234,7 @@ test_that("createDataFrame uses files for large objects", {
   conf <- callJMethod(sparkSession, "conf")
   callJMethod(conf, "set", "spark.r.maxAllocationLimit", "100")
   df <- suppressWarnings(createDataFrame(iris, numPartitions = 3))
-  expect_equal(getNumPartitions(toRDD(df)), 3)
+  expect_equal(getNumPartitions(df), 3)
 
   # Resetting the conf back to default value
   callJMethod(conf, "set", "spark.r.maxAllocationLimit", toString(.Machine$integer.max / 10))

From 4002ee97dfd67a6305d062705df8f539cdbc8ac8 Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Fri, 27 Jan 2017 10:31:28 -0800
Subject: [PATCH 0485/1204] [SPARK-19333][SPARKR] Add Apache License headers to
 R files

## What changes were proposed in this pull request?

add header

## How was this patch tested?

Manual run to check vignettes html is created properly

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #16709 from felixcheung/rfilelicense.

(cherry picked from commit 385d73848b0d274467b633c7615e03b370f4a634)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/NAMESPACE                      | 17 +++++++++++++++++
 R/pkg/vignettes/sparkr-vignettes.Rmd | 17 +++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 8a19fd0e927ad..62a20e6ccae47 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -1,3 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 # Imports from base R
 # Do not include stats:: "rpois", "runif" - causes error at runtime
 importFrom("methods", "setGeneric", "setMethod", "setOldClass")
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index 6f11c5c51676f..9b0ded3b8d38d 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -10,6 +10,23 @@ vignette: >
   \usepackage[utf8]{inputenc}
 ---
 
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
 ## Overview
 
 SparkR is an R package that provides a light-weight frontend to use Apache Spark from R. With Spark `r packageVersion("SparkR")`, SparkR provides a distributed data frame implementation that supports data processing operations like selection, filtering, aggregation etc. and distributed machine learning using [MLlib](http://spark.apache.org/mllib/).

From 9a49f9afa7fcf2f968914ac81d13e27db3451491 Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Fri, 27 Jan 2017 12:41:35 -0800
Subject: [PATCH 0486/1204] [SPARK-19324][SPARKR] Spark VJM stdout output is
 getting dropped in SparkR

## What changes were proposed in this pull request?

This affects mostly running job from the driver in client mode when results are expected to be through stdout (which should be somewhat rare, but possible)

Before:
```
> a <- as.DataFrame(cars)
> b <- group_by(a, "dist")
> c <- count(b)
> sparkR.callJMethod(c$countjc, "explain", TRUE)
NULL
```

After:
```
> a <- as.DataFrame(cars)
> b <- group_by(a, "dist")
> c <- count(b)
> sparkR.callJMethod(c$countjc, "explain", TRUE)
count#11L
NULL
```

Now, `column.explain()` doesn't seem very useful (we can get more extensive output with `DataFrame.explain()`) but there are other more complex examples with calls of `println` in Scala/JVM side, that are getting dropped.

## How was this patch tested?

manual

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #16670 from felixcheung/rjvmstdout.

(cherry picked from commit a7ab6f9a8fdfb927f0bcefdc87a92cc82fac4223)
Signed-off-by: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
---
 R/pkg/R/utils.R                          | 11 ++++++++---
 R/pkg/inst/tests/testthat/test_Windows.R |  2 +-
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index 74b3e502eb7ca..1f7848f2b413f 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -756,12 +756,17 @@ varargsToJProperties <- function(...) {
   props
 }
 
-launchScript <- function(script, combinedArgs, capture = FALSE) {
+launchScript <- function(script, combinedArgs, wait = FALSE) {
   if (.Platform$OS.type == "windows") {
     scriptWithArgs <- paste(script, combinedArgs, sep = " ")
-    shell(scriptWithArgs, translate = TRUE, wait = capture, intern = capture) # nolint
+    # on Windows, intern = F seems to mean output to the console. (documentation on this is missing)
+    shell(scriptWithArgs, translate = TRUE, wait = wait, intern = wait) # nolint
   } else {
-    system2(script, combinedArgs, wait = capture, stdout = capture)
+    # http://stat.ethz.ch/R-manual/R-devel/library/base/html/system2.html
+    # stdout = F means discard output
+    # stdout = "" means to its console (default)
+    # Note that the console of this child process might not be the same as the running R process.
+    system2(script, combinedArgs, stdout = "", wait = wait)
   }
 }
 
diff --git a/R/pkg/inst/tests/testthat/test_Windows.R b/R/pkg/inst/tests/testthat/test_Windows.R
index 8813e18a1fa4d..e8d983426a676 100644
--- a/R/pkg/inst/tests/testthat/test_Windows.R
+++ b/R/pkg/inst/tests/testthat/test_Windows.R
@@ -20,7 +20,7 @@ test_that("sparkJars tag in SparkContext", {
   if (.Platform$OS.type != "windows") {
     skip("This test is only for Windows, skipped")
   }
-  testOutput <- launchScript("ECHO", "a/b/c", capture = TRUE)
+  testOutput <- launchScript("ECHO", "a/b/c", wait = TRUE)
   abcPath <- testOutput[1]
   expect_equal(abcPath, "a\\b\\c")
 })

From 445438c9f485489f22a1c4b9ec2644a7a9426d9b Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Mon, 30 Jan 2017 14:05:53 -0800
Subject: [PATCH 0487/1204] [SPARK-19396][DOC] JDBC Options are Case
 In-sensitive

### What changes were proposed in this pull request?
The case are not sensitive in JDBC options, after the PR https://github.com/apache/spark/pull/15884 is merged to Spark 2.1.

### How was this patch tested?
N/A

Author: gatorsmile <gatorsmile@gmail.com>

Closes #16734 from gatorsmile/fixDocCaseInsensitive.

(cherry picked from commit c0eda7e87fe06c5ec8d146829e25f3627f18c529)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 docs/sql-programming-guide.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index ffe0f395b9c0c..55ed913b26b3e 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -1091,7 +1091,7 @@ Tables from the remote database can be loaded as a DataFrame or Spark SQL tempor
 the Data Sources API. Users can specify the JDBC connection properties in the data source options.
 <code>user</code> and <code>password</code> are normally provided as connection properties for
 logging into the data sources. In addition to the connection properties, Spark also supports
-the following case-sensitive options:
+the following case-insensitive options:
 
 <table class="table">
   <tr><th>Property Name</th><th>Meaning</th></tr>

From 07a1788ee04597700f101183e67d237f9a866c46 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Mon, 30 Jan 2017 18:38:14 -0800
Subject: [PATCH 0488/1204] [SPARK-19406][SQL] Fix function to_json to respect
 user-provided options

### What changes were proposed in this pull request?
Currently, the function `to_json` allows users to provide options for generating JSON. However, it does not pass it to `JacksonGenerator`. Thus, it ignores the user-provided options. This PR is to fix it. Below is an example.

```Scala
val df = Seq(Tuple1(Tuple1(java.sql.Timestamp.valueOf("2015-08-26 18:00:00.0")))).toDF("a")
val options = Map("timestampFormat" -> "dd/MM/yyyy HH:mm")
df.select(to_json($"a", options)).show(false)
```
The current output is like
```
+--------------------------------------+
|structtojson(a)                       |
+--------------------------------------+
|{"_1":"2015-08-26T18:00:00.000-07:00"}|
+--------------------------------------+
```

After the fix, the output is like
```
+-------------------------+
|structtojson(a)          |
+-------------------------+
|{"_1":"26/08/2015 18:00"}|
+-------------------------+
```
### How was this patch tested?
Added test cases for both `from_json` and `to_json`

Author: gatorsmile <gatorsmile@gmail.com>

Closes #16745 from gatorsmile/toJson.

(cherry picked from commit f9156d2956a8e751720bf63071c504a3e86f267d)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../expressions/jsonExpressions.scala         |  5 ++++-
 .../apache/spark/sql/JsonFunctionsSuite.scala | 21 ++++++++++++++++++-
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index 667ff649d1297..92d0888fc6eea 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -517,7 +517,10 @@ case class StructToJson(options: Map[String, String], child: Expression)
 
   @transient
   lazy val gen =
-    new JacksonGenerator(child.dataType.asInstanceOf[StructType], writer)
+    new JacksonGenerator(
+      child.dataType.asInstanceOf[StructType],
+      writer,
+      new JSONOptions(options))
 
   override def dataType: DataType = StringType
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index 890cc5b560d02..9c39b3c7f09bf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql
 
 import org.apache.spark.sql.functions.{from_json, struct, to_json}
 import org.apache.spark.sql.test.SharedSQLContext
-import org.apache.spark.sql.types.{CalendarIntervalType, IntegerType, StructType}
+import org.apache.spark.sql.types.{CalendarIntervalType, IntegerType, StructType, TimestampType}
 
 class JsonFunctionsSuite extends QueryTest with SharedSQLContext {
   import testImplicits._
@@ -105,6 +105,16 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext {
       Row(Row(1)) :: Nil)
   }
 
+  test("from_json with option") {
+    val df = Seq("""{"time": "26/08/2015 18:00"}""").toDS()
+    val schema = new StructType().add("time", TimestampType)
+    val options = Map("timestampFormat" -> "dd/MM/yyyy HH:mm")
+
+    checkAnswer(
+      df.select(from_json($"value", schema, options)),
+      Row(Row(java.sql.Timestamp.valueOf("2015-08-26 18:00:00.0"))))
+  }
+
   test("from_json missing columns") {
     val df = Seq("""{"a": 1}""").toDS()
     val schema = new StructType().add("b", IntegerType)
@@ -131,6 +141,15 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext {
       Row("""{"_1":1}""") :: Nil)
   }
 
+  test("to_json with option") {
+    val df = Seq(Tuple1(Tuple1(java.sql.Timestamp.valueOf("2015-08-26 18:00:00.0")))).toDF("a")
+    val options = Map("timestampFormat" -> "dd/MM/yyyy HH:mm")
+
+    checkAnswer(
+      df.select(to_json($"a", options)),
+      Row("""{"_1":"26/08/2015 18:00"}""") :: Nil)
+  }
+
   test("to_json unsupported type") {
     val df = Seq(Tuple1(Tuple1("interval -3 month 7 hours"))).toDF("a")
       .select(struct($"a._1".cast(CalendarIntervalType).as("a")).as("c"))

From e43f161bbe04d2dc2af1e2f9280d4d0b47392acf Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Mon, 30 Jan 2017 22:14:58 -0800
Subject: [PATCH 0489/1204] [BACKPORT-2.1][SPARKR][DOCS] update R API doc for
 subset/extract

## What changes were proposed in this pull request?

backport #16721 to branch-2.1

## How was this patch tested?

manual

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #16749 from felixcheung/rsubsetdocbackport.
---
 R/pkg/R/DataFrame.R                  | 15 +++++++++++++--
 R/pkg/R/mllib.R                      | 10 +++++-----
 R/pkg/vignettes/sparkr-vignettes.Rmd |  4 ++--
 3 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 39e8376808f61..c960b45d99976 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -936,7 +936,7 @@ setMethod("unique",
 
 #' Sample
 #'
-#' Return a sampled subset of this SparkDataFrame using a random seed. 
+#' Return a sampled subset of this SparkDataFrame using a random seed.
 #' Note: this is not guaranteed to provide exactly the fraction specified
 #' of the total count of of the given SparkDataFrame.
 #'
@@ -1825,6 +1825,8 @@ setMethod("[", signature(x = "SparkDataFrame"),
 #' Return subsets of SparkDataFrame according to given conditions
 #' @param x a SparkDataFrame.
 #' @param i,subset (Optional) a logical expression to filter on rows.
+#'                 For extract operator [[ and replacement operator [[<-, the indexing parameter for
+#'                 a single Column.
 #' @param j,select expression for the single Column or a list of columns to select from the SparkDataFrame.
 #' @param drop if TRUE, a Column will be returned if the resulting dataset has only one column.
 #'             Otherwise, a SparkDataFrame will always be returned.
@@ -1835,6 +1837,7 @@ setMethod("[", signature(x = "SparkDataFrame"),
 #' @export
 #' @family SparkDataFrame functions
 #' @aliases subset,SparkDataFrame-method
+#' @seealso \link{withColumn}
 #' @rdname subset
 #' @name subset
 #' @family subsetting functions
@@ -1852,6 +1855,10 @@ setMethod("[", signature(x = "SparkDataFrame"),
 #'   subset(df, df$age %in% c(19, 30), 1:2)
 #'   subset(df, df$age %in% c(19), select = c(1,2))
 #'   subset(df, select = c(1,2))
+#'   # Columns can be selected and set
+#'   df[["age"]] <- 23
+#'   df[[1]] <- df$age
+#'   df[[2]] <- NULL # drop column
 #' }
 #' @note subset since 1.5.0
 setMethod("subset", signature(x = "SparkDataFrame"),
@@ -1976,7 +1983,7 @@ setMethod("selectExpr",
 #' @aliases withColumn,SparkDataFrame,character-method
 #' @rdname withColumn
 #' @name withColumn
-#' @seealso \link{rename} \link{mutate}
+#' @seealso \link{rename} \link{mutate} \link{subset}
 #' @export
 #' @examples
 #'\dontrun{
@@ -1987,6 +1994,10 @@ setMethod("selectExpr",
 #' # Replace an existing column
 #' newDF2 <- withColumn(newDF, "newCol", newDF$col1)
 #' newDF3 <- withColumn(newDF, "newCol", 42)
+#' # Use extract operator to set an existing or new column
+#' df[["age"]] <- 23
+#' df[[2]] <- df$col1
+#' df[[2]] <- NULL # drop column
 #' }
 #' @note withColumn since 1.4.0
 setMethod("withColumn",
diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 1a254ad49b086..91ce669814d87 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -173,7 +173,7 @@ predict_internal <- function(object, newData) {
 
 #' Generalized Linear Models
 #'
-#' Fits generalized linear model against a Spark DataFrame.
+#' Fits generalized linear model against a SparkDataFrame.
 #' Users can call \code{summary} to print a summary of the fitted model, \code{predict} to make
 #' predictions on new data, and \code{write.ml}/\code{read.ml} to save/load fitted models.
 #'
@@ -499,7 +499,7 @@ setMethod("write.ml", signature(object = "LDAModel", path = "character"),
 
 #' Isotonic Regression Model
 #'
-#' Fits an Isotonic Regression model against a Spark DataFrame, similarly to R's isoreg().
+#' Fits an Isotonic Regression model against a SparkDataFrame, similarly to R's isoreg().
 #' Users can print, make predictions on the produced model and save the model to the input path.
 #'
 #' @param data SparkDataFrame for training.
@@ -588,7 +588,7 @@ setMethod("summary", signature(object = "IsotonicRegressionModel"),
 
 #' K-Means Clustering Model
 #'
-#' Fits a k-means clustering model against a Spark DataFrame, similarly to R's kmeans().
+#' Fits a k-means clustering model against a SparkDataFrame, similarly to R's kmeans().
 #' Users can call \code{summary} to print a summary of the fitted model, \code{predict} to make
 #' predictions on new data, and \code{write.ml}/\code{read.ml} to save/load fitted models.
 #'
@@ -712,7 +712,7 @@ setMethod("predict", signature(object = "KMeansModel"),
 
 #' Logistic Regression Model
 #'
-#' Fits an logistic regression model against a Spark DataFrame. It supports "binomial": Binary logistic regression
+#' Fits an logistic regression model against a SparkDataFrame. It supports "binomial": Binary logistic regression
 #' with pivoting; "multinomial": Multinomial logistic (softmax) regression without pivoting, similar to glmnet.
 #' Users can print, make predictions on the produced model and save the model to the input path.
 #'
@@ -1321,7 +1321,7 @@ setMethod("predict", signature(object = "AFTSurvivalRegressionModel"),
 
 #' Multivariate Gaussian Mixture Model (GMM)
 #'
-#' Fits multivariate gaussian mixture model against a Spark DataFrame, similarly to R's
+#' Fits multivariate gaussian mixture model against a SparkDataFrame, similarly to R's
 #' mvnormalmixEM(). Users can call \code{summary} to print a summary of the fitted model,
 #' \code{predict} to make predictions on new data, and \code{write.ml}/\code{read.ml}
 #' to save/load fitted models.
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index 9b0ded3b8d38d..36a78477dc261 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -923,9 +923,9 @@ The main method calls of actual computation happen in the Spark JVM of the drive
 
 Two kinds of RPCs are supported in the SparkR JVM backend: method invocation and creating new objects. Method invocation can be done in two ways.
 
-* `sparkR.invokeJMethod` takes a reference to an existing Java object and a list of arguments to be passed on to the method.
+* `sparkR.callJMethod` takes a reference to an existing Java object and a list of arguments to be passed on to the method.
 
-* `sparkR.invokeJStatic` takes a class name for static method and a list of arguments to be passed on to the method.
+* `sparkR.callJStatic` takes a class name for static method and a list of arguments to be passed on to the method.
 
 The arguments are serialized using our custom wire format which is then deserialized on the JVM side. We then use Java reflection to invoke the appropriate method.
 

From d35a1268d784a268e6137eff54eb8f83c981a289 Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Tue, 31 Jan 2017 16:52:53 -0800
Subject: [PATCH 0490/1204] [SPARK-19378][SS] Ensure continuity of
 stateOperator and eventTime metrics even if there is no new data in trigger

In StructuredStreaming, if a new trigger was skipped because no new data arrived, we suddenly report nothing for the metrics `stateOperator`. We could however easily report the metrics from `lastExecution` to ensure continuity of metrics.

Regression test in `StreamingQueryStatusAndProgressSuite`

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #16716 from brkyvz/state-agg.

(cherry picked from commit 081b7addaf9560563af0ce25912972e91a78cee6)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../streaming/ProgressReporter.scala          | 35 +++++++++-----
 ...StreamingQueryStatusAndProgressSuite.scala | 48 ++++++++++++++++++-
 2 files changed, 70 insertions(+), 13 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
index c5e9eae607b39..1f74fffbe6e67 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -180,6 +180,26 @@ trait ProgressReporter extends Logging {
     currentStatus = currentStatus.copy(isTriggerActive = false)
   }
 
+  /** Extract statistics about stateful operators from the executed query plan. */
+  private def extractStateOperatorMetrics(hasNewData: Boolean): Seq[StateOperatorProgress] = {
+    if (lastExecution == null) return Nil
+    // lastExecution could belong to one of the previous triggers if `!hasNewData`.
+    // Walking the plan again should be inexpensive.
+    val stateNodes = lastExecution.executedPlan.collect {
+      case p if p.isInstanceOf[StateStoreSaveExec] => p
+    }
+    stateNodes.map { node =>
+      val numRowsUpdated = if (hasNewData) {
+        node.metrics.get("numUpdatedStateRows").map(_.value).getOrElse(0L)
+      } else {
+        0L
+      }
+      new StateOperatorProgress(
+        numRowsTotal = node.metrics.get("numTotalStateRows").map(_.value).getOrElse(0L),
+        numRowsUpdated = numRowsUpdated)
+    }
+  }
+
   /** Extracts statistics from the most recent query execution. */
   private def extractExecutionStats(hasNewData: Boolean): ExecutionStats = {
     val hasEventTime = logicalPlan.collect { case e: EventTimeWatermark => e }.nonEmpty
@@ -187,8 +207,11 @@ trait ProgressReporter extends Logging {
       if (hasEventTime) Map("watermark" -> formatTimestamp(offsetSeqMetadata.batchWatermarkMs))
       else Map.empty[String, String]
 
+    // SPARK-19378: Still report metrics even though no data was processed while reporting progress.
+    val stateOperators = extractStateOperatorMetrics(hasNewData)
+
     if (!hasNewData) {
-      return ExecutionStats(Map.empty, Seq.empty, watermarkTimestamp)
+      return ExecutionStats(Map.empty, stateOperators, watermarkTimestamp)
     }
 
     // We want to associate execution plan leaves to sources that generate them, so that we match
@@ -237,16 +260,6 @@ trait ProgressReporter extends Logging {
         Map.empty
       }
 
-    // Extract statistics about stateful operators in the query plan.
-    val stateNodes = lastExecution.executedPlan.collect {
-      case p if p.isInstanceOf[StateStoreSaveExec] => p
-    }
-    val stateOperators = stateNodes.map { node =>
-      new StateOperatorProgress(
-        numRowsTotal = node.metrics.get("numTotalStateRows").map(_.value).getOrElse(0L),
-        numRowsUpdated = node.metrics.get("numUpdatedStateRows").map(_.value).getOrElse(0L))
-    }
-
     val eventTimeStats = lastExecution.executedPlan.collect {
       case e: EventTimeWatermarkExec if e.eventTimeStats.value.count > 0 =>
         val stats = e.eventTimeStats.value
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
index 34bf3985bad2c..10d487a9b30d2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
@@ -20,16 +20,25 @@ package org.apache.spark.sql.streaming
 import java.util.UUID
 
 import scala.collection.JavaConverters._
+import scala.language.postfixOps
 
 import org.json4s._
 import org.json4s.jackson.JsonMethods._
+import org.scalatest.concurrent.Eventually
+import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.sql.execution.streaming.MemoryStream
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.StreamingQueryStatusAndProgressSuite._
 
-
-class StreamingQueryStatusAndProgressSuite extends StreamTest {
+class StreamingQueryStatusAndProgressSuite extends StreamTest with Eventually {
+  implicit class EqualsIgnoreCRLF(source: String) {
+    def equalsIgnoreCRLF(target: String): Boolean = {
+      source.replaceAll("\r\n|\r|\n", System.lineSeparator) ===
+        target.replaceAll("\r\n|\r|\n", System.lineSeparator)
+    }
+  }
 
   test("StreamingQueryProgress - prettyJson") {
     val json1 = testProgress1.prettyJson
@@ -165,6 +174,41 @@ class StreamingQueryStatusAndProgressSuite extends StreamTest {
       query.stop()
     }
   }
+
+  test("SPARK-19378: Continue reporting stateOp metrics even if there is no active trigger") {
+    import testImplicits._
+
+    withSQLConf(SQLConf.STREAMING_NO_DATA_PROGRESS_EVENT_INTERVAL.key -> "10") {
+      val inputData = MemoryStream[Int]
+
+      val query = inputData.toDS().toDF("value")
+        .select('value)
+        .groupBy($"value")
+        .agg(count("*"))
+        .writeStream
+        .queryName("metric_continuity")
+        .format("memory")
+        .outputMode("complete")
+        .start()
+      try {
+        inputData.addData(1, 2)
+        query.processAllAvailable()
+
+        val progress = query.lastProgress
+        assert(progress.stateOperators.length > 0)
+        // Should emit new progresses every 10 ms, but we could be facing a slow Jenkins
+        eventually(timeout(1 minute)) {
+          val nextProgress = query.lastProgress
+          assert(nextProgress.timestamp !== progress.timestamp)
+          assert(nextProgress.numInputRows === 0)
+          assert(nextProgress.stateOperators.head.numRowsTotal === 2)
+          assert(nextProgress.stateOperators.head.numRowsUpdated === 0)
+        }
+      } finally {
+        query.stop()
+      }
+    }
+  }
 }
 
 object StreamingQueryStatusAndProgressSuite {

From 61cdc8c7cc8cfc57646a30da0e0df874a14e3269 Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Wed, 1 Feb 2017 13:27:20 +0000
Subject: [PATCH 0491/1204] [SPARK-19410][DOC] Fix brokens links in ml-pipeline
 and ml-tuning

## What changes were proposed in this pull request?
Fix brokens links in ml-pipeline and ml-tuning
`<div data-lang="scala">`  ->   `<div data-lang="scala" markdown="1">`

## How was this patch tested?
manual tests

Author: Zheng RuiFeng <ruifengz@foxmail.com>

Closes #16754 from zhengruifeng/doc_api_fix.

(cherry picked from commit 04ee8cf633e17b6bf95225a8dd77bf2e06980eb3)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/ml-pipeline.md | 12 ++++++------
 docs/ml-tuning.md   |  8 ++++----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/docs/ml-pipeline.md b/docs/ml-pipeline.md
index 0384513ab7014..7cbb14654e9d1 100644
--- a/docs/ml-pipeline.md
+++ b/docs/ml-pipeline.md
@@ -206,7 +206,7 @@ This example covers the concepts of `Estimator`, `Transformer`, and `Param`.
 
 <div class="codetabs">
 
-<div data-lang="scala">
+<div data-lang="scala" markdown="1">
 
 Refer to the [`Estimator` Scala docs](api/scala/index.html#org.apache.spark.ml.Estimator),
 the [`Transformer` Scala docs](api/scala/index.html#org.apache.spark.ml.Transformer) and
@@ -215,7 +215,7 @@ the [`Params` Scala docs](api/scala/index.html#org.apache.spark.ml.param.Params)
 {% include_example scala/org/apache/spark/examples/ml/EstimatorTransformerParamExample.scala %}
 </div>
 
-<div data-lang="java">
+<div data-lang="java" markdown="1">
 
 Refer to the [`Estimator` Java docs](api/java/org/apache/spark/ml/Estimator.html),
 the [`Transformer` Java docs](api/java/org/apache/spark/ml/Transformer.html) and
@@ -224,7 +224,7 @@ the [`Params` Java docs](api/java/org/apache/spark/ml/param/Params.html) for det
 {% include_example java/org/apache/spark/examples/ml/JavaEstimatorTransformerParamExample.java %}
 </div>
 
-<div data-lang="python">
+<div data-lang="python" markdown="1">
 
 Refer to the [`Estimator` Python docs](api/python/pyspark.ml.html#pyspark.ml.Estimator),
 the [`Transformer` Python docs](api/python/pyspark.ml.html#pyspark.ml.Transformer) and
@@ -241,14 +241,14 @@ This example follows the simple text document `Pipeline` illustrated in the figu
 
 <div class="codetabs">
 
-<div data-lang="scala">
+<div data-lang="scala" markdown="1">
 
 Refer to the [`Pipeline` Scala docs](api/scala/index.html#org.apache.spark.ml.Pipeline) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/PipelineExample.scala %}
 </div>
 
-<div data-lang="java">
+<div data-lang="java" markdown="1">
 
 
 Refer to the [`Pipeline` Java docs](api/java/org/apache/spark/ml/Pipeline.html) for details on the API.
@@ -256,7 +256,7 @@ Refer to the [`Pipeline` Java docs](api/java/org/apache/spark/ml/Pipeline.html)
 {% include_example java/org/apache/spark/examples/ml/JavaPipelineExample.java %}
 </div>
 
-<div data-lang="python">
+<div data-lang="python" markdown="1">
 
 Refer to the [`Pipeline` Python docs](api/python/pyspark.ml.html#pyspark.ml.Pipeline) for more details on the API.
 
diff --git a/docs/ml-tuning.md b/docs/ml-tuning.md
index 15748720b7ae2..c3bba29a9d37e 100644
--- a/docs/ml-tuning.md
+++ b/docs/ml-tuning.md
@@ -74,21 +74,21 @@ However, it is also a well-established method for choosing parameters which is m
 
 <div class="codetabs">
 
-<div data-lang="scala">
+<div data-lang="scala" markdown="1">
 
 Refer to the [`CrossValidator` Scala docs](api/scala/index.html#org.apache.spark.ml.tuning.CrossValidator) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/ModelSelectionViaCrossValidationExample.scala %}
 </div>
 
-<div data-lang="java">
+<div data-lang="java" markdown="1">
 
 Refer to the [`CrossValidator` Java docs](api/java/org/apache/spark/ml/tuning/CrossValidator.html) for details on the API.
 
 {% include_example java/org/apache/spark/examples/ml/JavaModelSelectionViaCrossValidationExample.java %}
 </div>
 
-<div data-lang="python">
+<div data-lang="python" markdown="1">
 
 Refer to the [`CrossValidator` Python docs](api/python/pyspark.ml.html#pyspark.ml.tuning.CrossValidator) for more details on the API.
 
@@ -128,7 +128,7 @@ Refer to the [`TrainValidationSplit` Java docs](api/java/org/apache/spark/ml/tun
 {% include_example java/org/apache/spark/examples/ml/JavaModelSelectionViaTrainValidationSplitExample.java %}
 </div>
 
-<div data-lang="python">
+<div data-lang="python" markdown="1">
 
 Refer to the [`TrainValidationSplit` Python docs](api/python/pyspark.ml.html#pyspark.ml.tuning.TrainValidationSplit) for more details on the API.
 

From f946464155bb907482dc8d8a1b0964a925d04081 Mon Sep 17 00:00:00 2001
From: Devaraj K <devaraj@apache.org>
Date: Wed, 1 Feb 2017 12:55:11 -0800
Subject: [PATCH 0492/1204] [SPARK-19377][WEBUI][CORE] Killed tasks should have
 the status as KILLED

## What changes were proposed in this pull request?

Copying of the killed status was missing while getting the newTaskInfo object by dropping the unnecessary details to reduce the memory usage. This patch adds the copying of the killed status to newTaskInfo object, this will correct the display of the status from wrong status to KILLED status in Web UI.

## How was this patch tested?

Current behaviour of displaying tasks in stage UI page,

| Index | ID | Attempt | Status | Locality Level | Executor ID / Host | Launch Time | Duration | GC Time | Input Size / Records | Write Time | Shuffle Write Size / Records | Errors |
| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
|143	|10	|0	|SUCCESS	|NODE_LOCAL	|6 / x.xx.x.x stdout stderr|2017/01/25 07:49:27	|0 ms |		|0.0 B / 0		| |0.0 B / 0	|TaskKilled (killed intentionally)|
|156	|11	|0	|SUCCESS	|NODE_LOCAL	|5 / x.xx.x.x stdout stderr|2017/01/25 07:49:27	|0 ms |		|0.0 B / 0		| |0.0 B / 0	|TaskKilled (killed intentionally)|

Web UI display after applying the patch,

| Index | ID | Attempt | Status | Locality Level | Executor ID / Host | Launch Time | Duration | GC Time | Input Size / Records | Write Time | Shuffle Write Size / Records | Errors |
| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
|143	|10	|0	|KILLED	|NODE_LOCAL	|6 / x.xx.x.x stdout stderr|2017/01/25 07:49:27	|0 ms |		|0.0 B / 0		|  | 0.0 B / 0	| TaskKilled (killed intentionally)|
|156	|11	|0	|KILLED	|NODE_LOCAL	|5 / x.xx.x.x stdout stderr|2017/01/25 07:49:27	|0 ms |		|0.0 B / 0		|  |0.0 B / 0	| TaskKilled (killed intentionally)|

Author: Devaraj K <devaraj@apache.org>

Closes #16725 from devaraj-kavali/SPARK-19377.

(cherry picked from commit df4a27cc5cae8e251ba2a883bcc5f5ce9282f649)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala | 1 +
 1 file changed, 1 insertion(+)

diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala b/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala
index f4a04609c4c69..78113ac8a8d00 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala
@@ -176,6 +176,7 @@ private[spark] object UIData {
       }
       newTaskInfo.finishTime = taskInfo.finishTime
       newTaskInfo.failed = taskInfo.failed
+      newTaskInfo.killed = taskInfo.killed
       newTaskInfo
     }
   }

From 7c23bd49e826fc2b7f132ffac2e55a71905abe96 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 1 Feb 2017 21:39:21 -0800
Subject: [PATCH 0493/1204] [SPARK-19432][CORE] Fix an unexpected failure when
 connecting timeout

## What changes were proposed in this pull request?

When connecting timeout, `ask` may fail with a confusing message:

```
17/02/01 23:15:19 INFO Worker: Connecting to master ...
java.lang.IllegalArgumentException: requirement failed: TransportClient has not yet been set.
        at scala.Predef$.require(Predef.scala:224)
        at org.apache.spark.rpc.netty.RpcOutboxMessage.onTimeout(Outbox.scala:70)
        at org.apache.spark.rpc.netty.NettyRpcEnv$$anonfun$ask$1.applyOrElse(NettyRpcEnv.scala:232)
        at org.apache.spark.rpc.netty.NettyRpcEnv$$anonfun$ask$1.applyOrElse(NettyRpcEnv.scala:231)
        at scala.concurrent.Future$$anonfun$onFailure$1.apply(Future.scala:138)
        at scala.concurrent.Future$$anonfun$onFailure$1.apply(Future.scala:136)
        at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:32)
```

It's better to provide a meaningful message.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16773 from zsxwing/connect-timeout.

(cherry picked from commit 8303e20c45153f91e585e230caa29b728a4d8c6c)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../main/scala/org/apache/spark/rpc/netty/Outbox.scala   | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/Outbox.scala b/core/src/main/scala/org/apache/spark/rpc/netty/Outbox.scala
index 6c090ada5ae9d..a7b7f58376f6b 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/Outbox.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/Outbox.scala
@@ -56,7 +56,7 @@ private[netty] case class RpcOutboxMessage(
     content: ByteBuffer,
     _onFailure: (Throwable) => Unit,
     _onSuccess: (TransportClient, ByteBuffer) => Unit)
-  extends OutboxMessage with RpcResponseCallback {
+  extends OutboxMessage with RpcResponseCallback with Logging {
 
   private var client: TransportClient = _
   private var requestId: Long = _
@@ -67,8 +67,11 @@ private[netty] case class RpcOutboxMessage(
   }
 
   def onTimeout(): Unit = {
-    require(client != null, "TransportClient has not yet been set.")
-    client.removeRpcRequest(requestId)
+    if (client != null) {
+      client.removeRpcRequest(requestId)
+    } else {
+      logError("Ask timeout before connecting successfully")
+    }
   }
 
   override def onFailure(e: Throwable): Unit = {

From f55bd4c736b01f1fe3df0ca2f4582c8d2b4d77f9 Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@databricks.com>
Date: Mon, 6 Feb 2017 15:28:13 -0500
Subject: [PATCH 0494/1204] [SPARK-19472][SQL] Parser should not mistake CASE
 WHEN(...) for a function call

## What changes were proposed in this pull request?
The SQL parser can mistake a `WHEN (...)` used in `CASE` for a function call. This happens in cases like the following:
```sql
select case when (1) + case when 1 > 0 then 1 else 0 end = 2 then 1 else 0 end
from tb
```
This PR fixes this by re-organizing the case related parsing rules.

## How was this patch tested?
Added a regression test to the `ExpressionParserSuite`.

Author: Herman van Hovell <hvanhovell@databricks.com>

Closes #16821 from hvanhovell/SPARK-19472.

(cherry picked from commit cb2677b86039a75fcd8a4e567ab06055f054a19a)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 | 2 +-
 .../spark/sql/catalyst/parser/ExpressionParserSuite.scala       | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index df85c70c6cdea..6e8b89f7db91f 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -525,8 +525,8 @@ valueExpression
 
 primaryExpression
     : name=(CURRENT_DATE | CURRENT_TIMESTAMP)                                                  #timeFunctionCall
-    | CASE value=expression whenClause+ (ELSE elseExpression=expression)? END                  #simpleCase
     | CASE whenClause+ (ELSE elseExpression=expression)? END                                   #searchedCase
+    | CASE value=expression whenClause+ (ELSE elseExpression=expression)? END                  #simpleCase
     | CAST '(' expression AS dataType ')'                                                      #cast
     | constant                                                                                 #constantDefault
     | ASTERISK                                                                                 #star
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index 17cfc8158803b..2fecb8dc4a60e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -298,6 +298,8 @@ class ExpressionParserSuite extends PlanTest {
       CaseKeyWhen("a" ===  "a", Seq(true, 1)))
     assertEqual("case when a = 1 then b when a = 2 then c else d end",
       CaseWhen(Seq(('a === 1, 'b.expr), ('a === 2, 'c.expr)), 'd))
+    assertEqual("case when (1) + case when a > b then c else d end then f else g end",
+      CaseWhen(Seq((Literal(1) + CaseWhen(Seq(('a > 'b, 'c.expr)), 'd.expr), 'f.expr)), 'g))
   }
 
   test("dereference") {

From 62fab5beee147c90d8b7f8092b4ee76ba611ee8e Mon Sep 17 00:00:00 2001
From: uncleGen <hustyugm@gmail.com>
Date: Mon, 6 Feb 2017 21:03:20 -0800
Subject: [PATCH 0495/1204] [SPARK-19407][SS] defaultFS is used FileSystem.get
 instead of getting it from uri scheme

## What changes were proposed in this pull request?

```
Caused by: java.lang.IllegalArgumentException: Wrong FS: s3a://**************/checkpoint/7b2231a3-d845-4740-bfa3-681850e5987f/metadata, expected: file:///
	at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:649)
	at org.apache.hadoop.fs.RawLocalFileSystem.pathToFile(RawLocalFileSystem.java:82)
	at org.apache.hadoop.fs.RawLocalFileSystem.deprecatedGetFileStatus(RawLocalFileSystem.java:606)
	at org.apache.hadoop.fs.RawLocalFileSystem.getFileLinkStatusInternal(RawLocalFileSystem.java:824)
	at org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:601)
	at org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:421)
	at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:1426)
	at org.apache.spark.sql.execution.streaming.StreamMetadata$.read(StreamMetadata.scala:51)
	at org.apache.spark.sql.execution.streaming.StreamExecution.<init>(StreamExecution.scala:100)
	at org.apache.spark.sql.streaming.StreamingQueryManager.createQuery(StreamingQueryManager.scala:232)
	at org.apache.spark.sql.streaming.StreamingQueryManager.startQuery(StreamingQueryManager.scala:269)
	at org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:262)
```

Can easily replicate on spark standalone cluster by providing checkpoint location uri scheme anything other than "file://" and not overriding in config.

WorkAround  --conf spark.hadoop.fs.defaultFS=s3a://somebucket or set it in sparkConf or spark-default.conf

## How was this patch tested?

existing ut

Author: uncleGen <hustyugm@gmail.com>

Closes #16815 from uncleGen/SPARK-19407.

(cherry picked from commit 7a0a630e0f699017c7d0214923cd4aa0227e62ff)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../apache/spark/sql/execution/streaming/StreamMetadata.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetadata.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetadata.scala
index 7807c9fae840a..0bc54eac4ee8e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetadata.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetadata.scala
@@ -47,7 +47,7 @@ object StreamMetadata extends Logging {
 
   /** Read the metadata from file if it exists */
   def read(metadataFile: Path, hadoopConf: Configuration): Option[StreamMetadata] = {
-    val fs = FileSystem.get(hadoopConf)
+    val fs = metadataFile.getFileSystem(hadoopConf)
     if (fs.exists(metadataFile)) {
       var input: FSDataInputStream = null
       try {
@@ -72,7 +72,7 @@ object StreamMetadata extends Logging {
       hadoopConf: Configuration): Unit = {
     var output: FSDataOutputStream = null
     try {
-      val fs = FileSystem.get(hadoopConf)
+      val fs = metadataFile.getFileSystem(hadoopConf)
       output = fs.create(metadataFile)
       val writer = new OutputStreamWriter(output)
       Serialization.write(metadata, writer)

From dd1abef138581f30ab7a8dfacb616fe7dd64b421 Mon Sep 17 00:00:00 2001
From: Aseem Bansal <anshbansal@users.noreply.github.com>
Date: Tue, 7 Feb 2017 11:44:14 +0000
Subject: [PATCH 0496/1204] [SPARK-19444][ML][DOCUMENTATION] Fix imports not
 being present in documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

SPARK-19444 imports not being present in documentation

## How was this patch tested?

Manual

## Disclaimer

Contribution is original work and I license the work to the project under the project’s open source license

Author: Aseem Bansal <anshbansal@users.noreply.github.com>

Closes #16789 from anshbansal/patch-1.

(cherry picked from commit aee2bd2c7ee97a58f0adec82ec52e5625b39e804)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../org/apache/spark/examples/ml/JavaTokenizerExample.java    | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java
index 101a4df779f23..2fae07a189d77 100644
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java
@@ -35,13 +35,11 @@
 import org.apache.spark.sql.types.Metadata;
 import org.apache.spark.sql.types.StructField;
 import org.apache.spark.sql.types.StructType;
-// $example off$
 
-// $example on:untyped_ops$
 // col("...") is preferable to df.col("...")
 import static org.apache.spark.sql.functions.callUDF;
 import static org.apache.spark.sql.functions.col;
-// $example off:untyped_ops$
+// $example off
 
 public class JavaTokenizerExample {
   public static void main(String[] args) {

From e642a07d57798f98b25ba08ed7ae3abe0f597941 Mon Sep 17 00:00:00 2001
From: Tyson Condie <tcondie@gmail.com>
Date: Tue, 7 Feb 2017 14:31:23 -0800
Subject: [PATCH 0497/1204] [SPARK-18682][SS] Batch Source for Kafka

Today, you can start a stream that reads from kafka. However, given kafka's configurable retention period, it seems like sometimes you might just want to read all of the data that is available now. As such we should add a version that works with spark.read as well.
The options should be the same as the streaming kafka source, with the following differences:
startingOffsets should default to earliest, and should not allow latest (which would always be empty).
endingOffsets should also be allowed and should default to latest. the same assign json format as startingOffsets should also be accepted.
It would be really good, if things like .limit(n) were enough to prevent all the data from being read (this might just work).

KafkaRelationSuite was added for testing batch queries via KafkaUtils.

Author: Tyson Condie <tcondie@gmail.com>

Closes #16686 from tcondie/SPARK-18682.

(cherry picked from commit 8df444403489aec0d68f7d930afdc4f7d50e0b41)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../sql/kafka010/CachedKafkaConsumer.scala    | 102 ++++--
 .../spark/sql/kafka010/ConsumerStrategy.scala |  84 +++++
 .../sql/kafka010/KafkaOffsetRangeLimit.scala  |  51 +++
 .../sql/kafka010/KafkaOffsetReader.scala      | 312 +++++++++++++++++
 .../spark/sql/kafka010/KafkaRelation.scala    | 124 +++++++
 .../spark/sql/kafka010/KafkaSource.scala      | 323 +++---------------
 .../sql/kafka010/KafkaSourceProvider.scala    | 262 +++++++++-----
 .../spark/sql/kafka010/KafkaSourceRDD.scala   |  63 +++-
 .../spark/sql/kafka010/StartingOffsets.scala  |  32 --
 .../sql/kafka010/KafkaRelationSuite.scala     | 233 +++++++++++++
 .../spark/sql/kafka010/KafkaSourceSuite.scala |   3 +
 .../spark/sql/kafka010/KafkaTestUtils.scala   |  21 +-
 12 files changed, 1180 insertions(+), 430 deletions(-)
 create mode 100644 external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/ConsumerStrategy.scala
 create mode 100644 external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeLimit.scala
 create mode 100644 external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
 create mode 100644 external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
 delete mode 100644 external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/StartingOffsets.scala
 create mode 100644 external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaConsumer.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaConsumer.scala
index 3f396a7e6b698..15b28256e825e 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaConsumer.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaConsumer.scala
@@ -44,6 +44,9 @@ private[kafka010] case class CachedKafkaConsumer private(
 
   private var consumer = createConsumer
 
+  /** indicates whether this consumer is in use or not */
+  private var inuse = true
+
   /** Iterator to the already fetch data */
   private var fetchedData = ju.Collections.emptyIterator[ConsumerRecord[Array[Byte], Array[Byte]]]
   private var nextOffsetInFetchedData = UNKNOWN_OFFSET
@@ -57,6 +60,20 @@ private[kafka010] case class CachedKafkaConsumer private(
     c
   }
 
+  case class AvailableOffsetRange(earliest: Long, latest: Long)
+
+  /**
+   * Return the available offset range of the current partition. It's a pair of the earliest offset
+   * and the latest offset.
+   */
+  def getAvailableOffsetRange(): AvailableOffsetRange = {
+    consumer.seekToBeginning(Set(topicPartition).asJava)
+    val earliestOffset = consumer.position(topicPartition)
+    consumer.seekToEnd(Set(topicPartition).asJava)
+    val latestOffset = consumer.position(topicPartition)
+    AvailableOffsetRange(earliestOffset, latestOffset)
+  }
+
   /**
    * Get the record for the given offset if available. Otherwise it will either throw error
    * (if failOnDataLoss = true), or return the next available offset within [offset, untilOffset),
@@ -107,9 +124,9 @@ private[kafka010] case class CachedKafkaConsumer private(
    * `UNKNOWN_OFFSET`.
    */
   private def getEarliestAvailableOffsetBetween(offset: Long, untilOffset: Long): Long = {
-    val (earliestOffset, latestOffset) = getAvailableOffsetRange()
-    logWarning(s"Some data may be lost. Recovering from the earliest offset: $earliestOffset")
-    if (offset >= latestOffset || earliestOffset >= untilOffset) {
+    val range = getAvailableOffsetRange()
+    logWarning(s"Some data may be lost. Recovering from the earliest offset: ${range.earliest}")
+    if (offset >= range.latest || range.earliest >= untilOffset) {
       // [offset, untilOffset) and [earliestOffset, latestOffset) have no overlap,
       // either
       // --------------------------------------------------------
@@ -124,13 +141,13 @@ private[kafka010] case class CachedKafkaConsumer private(
       //   offset   untilOffset   earliestOffset   latestOffset
       val warningMessage =
         s"""
-          |The current available offset range is [$earliestOffset, $latestOffset).
+          |The current available offset range is $range.
           | Offset ${offset} is out of range, and records in [$offset, $untilOffset) will be
           | skipped ${additionalMessage(failOnDataLoss = false)}
         """.stripMargin
       logWarning(warningMessage)
       UNKNOWN_OFFSET
-    } else if (offset >= earliestOffset) {
+    } else if (offset >= range.earliest) {
       // -----------------------------------------------------------------------------
       //         ^            ^                  ^                                 ^
       //         |            |                  |                                 |
@@ -149,12 +166,12 @@ private[kafka010] case class CachedKafkaConsumer private(
       //   offset   earliestOffset   min(untilOffset,latestOffset)   max(untilOffset, latestOffset)
       val warningMessage =
         s"""
-           |The current available offset range is [$earliestOffset, $latestOffset).
-           | Offset ${offset} is out of range, and records in [$offset, $earliestOffset) will be
+           |The current available offset range is $range.
+           | Offset ${offset} is out of range, and records in [$offset, ${range.earliest}) will be
            | skipped ${additionalMessage(failOnDataLoss = false)}
         """.stripMargin
       logWarning(warningMessage)
-      earliestOffset
+      range.earliest
     }
   }
 
@@ -183,8 +200,8 @@ private[kafka010] case class CachedKafkaConsumer private(
       // - `offset` is out of range so that Kafka returns nothing. Just throw
       // `OffsetOutOfRangeException` to let the caller handle it.
       // - Cannot fetch any data before timeout. TimeoutException will be thrown.
-      val (earliestOffset, latestOffset) = getAvailableOffsetRange()
-      if (offset < earliestOffset || offset >= latestOffset) {
+      val range = getAvailableOffsetRange()
+      if (offset < range.earliest || offset >= range.latest) {
         throw new OffsetOutOfRangeException(
           Map(topicPartition -> java.lang.Long.valueOf(offset)).asJava)
       } else {
@@ -284,18 +301,6 @@ private[kafka010] case class CachedKafkaConsumer private(
     logDebug(s"Polled $groupId ${p.partitions()}  ${r.size}")
     fetchedData = r.iterator
   }
-
-  /**
-   * Return the available offset range of the current partition. It's a pair of the earliest offset
-   * and the latest offset.
-   */
-  private def getAvailableOffsetRange(): (Long, Long) = {
-    consumer.seekToBeginning(Set(topicPartition).asJava)
-    val earliestOffset = consumer.position(topicPartition)
-    consumer.seekToEnd(Set(topicPartition).asJava)
-    val latestOffset = consumer.position(topicPartition)
-    (earliestOffset, latestOffset)
-  }
 }
 
 private[kafka010] object CachedKafkaConsumer extends Logging {
@@ -310,7 +315,7 @@ private[kafka010] object CachedKafkaConsumer extends Logging {
     new ju.LinkedHashMap[CacheKey, CachedKafkaConsumer](capacity, 0.75f, true) {
       override def removeEldestEntry(
         entry: ju.Map.Entry[CacheKey, CachedKafkaConsumer]): Boolean = {
-        if (this.size > capacity) {
+        if (entry.getValue.inuse == false && this.size > capacity) {
           logWarning(s"KafkaConsumer cache hitting max capacity of $capacity, " +
             s"removing consumer for ${entry.getKey}")
           try {
@@ -327,6 +332,43 @@ private[kafka010] object CachedKafkaConsumer extends Logging {
     }
   }
 
+  def releaseKafkaConsumer(
+      topic: String,
+      partition: Int,
+      kafkaParams: ju.Map[String, Object]): Unit = {
+    val groupId = kafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG).asInstanceOf[String]
+    val topicPartition = new TopicPartition(topic, partition)
+    val key = CacheKey(groupId, topicPartition)
+
+    synchronized {
+      val consumer = cache.get(key)
+      if (consumer != null) {
+        consumer.inuse = false
+      } else {
+        logWarning(s"Attempting to release consumer that does not exist")
+      }
+    }
+  }
+
+  /**
+   * Removes (and closes) the Kafka Consumer for the given topic, partition and group id.
+   */
+  def removeKafkaConsumer(
+      topic: String,
+      partition: Int,
+      kafkaParams: ju.Map[String, Object]): Unit = {
+    val groupId = kafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG).asInstanceOf[String]
+    val topicPartition = new TopicPartition(topic, partition)
+    val key = CacheKey(groupId, topicPartition)
+
+    synchronized {
+      val removedConsumer = cache.remove(key)
+      if (removedConsumer != null) {
+        removedConsumer.close()
+      }
+    }
+  }
+
   /**
    * Get a cached consumer for groupId, assigned to topic and partition.
    * If matching consumer doesn't already exist, will be created using kafkaParams.
@@ -342,16 +384,18 @@ private[kafka010] object CachedKafkaConsumer extends Logging {
     // If this is reattempt at running the task, then invalidate cache and start with
     // a new consumer
     if (TaskContext.get != null && TaskContext.get.attemptNumber > 1) {
-      val removedConsumer = cache.remove(key)
-      if (removedConsumer != null) {
-        removedConsumer.close()
-      }
-      new CachedKafkaConsumer(topicPartition, kafkaParams)
+      removeKafkaConsumer(topic, partition, kafkaParams)
+      val consumer = new CachedKafkaConsumer(topicPartition, kafkaParams)
+      consumer.inuse = true
+      cache.put(key, consumer)
+      consumer
     } else {
       if (!cache.containsKey(key)) {
         cache.put(key, new CachedKafkaConsumer(topicPartition, kafkaParams))
       }
-      cache.get(key)
+      val consumer = cache.get(key)
+      consumer.inuse = true
+      consumer
     }
   }
 }
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/ConsumerStrategy.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/ConsumerStrategy.scala
new file mode 100644
index 0000000000000..66511b3065415
--- /dev/null
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/ConsumerStrategy.scala
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import java.{util => ju}
+
+import scala.collection.JavaConverters._
+
+import org.apache.kafka.clients.consumer.{Consumer, KafkaConsumer}
+import org.apache.kafka.clients.consumer.internals.NoOpConsumerRebalanceListener
+import org.apache.kafka.common.TopicPartition
+
+/**
+ * Subscribe allows you to subscribe to a fixed collection of topics.
+ * SubscribePattern allows you to use a regex to specify topics of interest.
+ * Note that unlike the 0.8 integration, using Subscribe or SubscribePattern
+ * should respond to adding partitions during a running stream.
+ * Finally, Assign allows you to specify a fixed collection of partitions.
+ * All three strategies have overloaded constructors that allow you to specify
+ * the starting offset for a particular partition.
+ */
+sealed trait ConsumerStrategy {
+  /** Create a [[KafkaConsumer]] and subscribe to topics according to a desired strategy */
+  def createConsumer(kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]]
+}
+
+/**
+ * Specify a fixed collection of partitions.
+ */
+case class AssignStrategy(partitions: Array[TopicPartition]) extends ConsumerStrategy {
+  override def createConsumer(
+      kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]] = {
+    val consumer = new KafkaConsumer[Array[Byte], Array[Byte]](kafkaParams)
+    consumer.assign(ju.Arrays.asList(partitions: _*))
+    consumer
+  }
+
+  override def toString: String = s"Assign[${partitions.mkString(", ")}]"
+}
+
+/**
+ * Subscribe to a fixed collection of topics.
+ */
+case class SubscribeStrategy(topics: Seq[String]) extends ConsumerStrategy {
+  override def createConsumer(
+      kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]] = {
+    val consumer = new KafkaConsumer[Array[Byte], Array[Byte]](kafkaParams)
+    consumer.subscribe(topics.asJava)
+    consumer
+  }
+
+  override def toString: String = s"Subscribe[${topics.mkString(", ")}]"
+}
+
+/**
+ * Use a regex to specify topics of interest.
+ */
+case class SubscribePatternStrategy(topicPattern: String) extends ConsumerStrategy {
+  override def createConsumer(
+      kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]] = {
+    val consumer = new KafkaConsumer[Array[Byte], Array[Byte]](kafkaParams)
+    consumer.subscribe(
+      ju.regex.Pattern.compile(topicPattern),
+      new NoOpConsumerRebalanceListener())
+    consumer
+  }
+
+  override def toString: String = s"SubscribePattern[$topicPattern]"
+}
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeLimit.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeLimit.scala
new file mode 100644
index 0000000000000..80a026f4f5d73
--- /dev/null
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeLimit.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import org.apache.kafka.common.TopicPartition
+
+/**
+ * Objects that represent desired offset range limits for starting,
+ * ending, and specific offsets.
+ */
+private[kafka010] sealed trait KafkaOffsetRangeLimit
+
+/**
+ * Represents the desire to bind to the earliest offsets in Kafka
+ */
+private[kafka010] case object EarliestOffsetRangeLimit extends KafkaOffsetRangeLimit
+
+/**
+ * Represents the desire to bind to the latest offsets in Kafka
+ */
+private[kafka010] case object LatestOffsetRangeLimit extends KafkaOffsetRangeLimit
+
+/**
+ * Represents the desire to bind to specific offsets. A offset == -1 binds to the
+ * latest offset, and offset == -2 binds to the earliest offset.
+ */
+private[kafka010] case class SpecificOffsetRangeLimit(
+    partitionOffsets: Map[TopicPartition, Long]) extends KafkaOffsetRangeLimit
+
+private[kafka010] object KafkaOffsetRangeLimit {
+  /**
+   * Used to denote offset range limits that are resolved via Kafka
+   */
+  val LATEST = -1L // indicates resolution to the latest offset
+  val EARLIEST = -2L // indicates resolution to the earliest offset
+}
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
new file mode 100644
index 0000000000000..6b2fb3c112557
--- /dev/null
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
@@ -0,0 +1,312 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import java.{util => ju}
+import java.util.concurrent.{Executors, ThreadFactory}
+
+import scala.collection.JavaConverters._
+import scala.concurrent.{ExecutionContext, Future}
+import scala.concurrent.duration.Duration
+import scala.util.control.NonFatal
+
+import org.apache.kafka.clients.consumer.{Consumer, ConsumerConfig, KafkaConsumer}
+import org.apache.kafka.common.TopicPartition
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.types._
+import org.apache.spark.util.{ThreadUtils, UninterruptibleThread}
+
+/**
+ * This class uses Kafka's own [[KafkaConsumer]] API to read data offsets from Kafka.
+ * The [[ConsumerStrategy]] class defines which Kafka topics and partitions should be read
+ * by this source. These strategies directly correspond to the different consumption options
+ * in. This class is designed to return a configured [[KafkaConsumer]] that is used by the
+ * [[KafkaSource]] to query for the offsets. See the docs on
+ * [[org.apache.spark.sql.kafka010.ConsumerStrategy]]
+ * for more details.
+ *
+ * Note: This class is not ThreadSafe
+ */
+private[kafka010] class KafkaOffsetReader(
+    consumerStrategy: ConsumerStrategy,
+    driverKafkaParams: ju.Map[String, Object],
+    readerOptions: Map[String, String],
+    driverGroupIdPrefix: String) extends Logging {
+  /**
+   * Used to ensure execute fetch operations execute in an UninterruptibleThread
+   */
+  val kafkaReaderThread = Executors.newSingleThreadExecutor(new ThreadFactory {
+    override def newThread(r: Runnable): Thread = {
+      val t = new UninterruptibleThread("Kafka Offset Reader") {
+        override def run(): Unit = {
+          r.run()
+        }
+      }
+      t.setDaemon(true)
+      t
+    }
+  })
+  val execContext = ExecutionContext.fromExecutorService(kafkaReaderThread)
+
+  /**
+   * A KafkaConsumer used in the driver to query the latest Kafka offsets. This only queries the
+   * offsets and never commits them.
+   */
+  protected var consumer = createConsumer()
+
+  private val maxOffsetFetchAttempts =
+    readerOptions.getOrElse("fetchOffset.numRetries", "3").toInt
+
+  private val offsetFetchAttemptIntervalMs =
+    readerOptions.getOrElse("fetchOffset.retryIntervalMs", "1000").toLong
+
+  private var groupId: String = null
+
+  private var nextId = 0
+
+  private def nextGroupId(): String = {
+    groupId = driverGroupIdPrefix + "-" + nextId
+    nextId += 1
+    groupId
+  }
+
+  override def toString(): String = consumerStrategy.toString
+
+  /**
+   * Closes the connection to Kafka, and cleans up state.
+   */
+  def close(): Unit = {
+    consumer.close()
+    kafkaReaderThread.shutdownNow()
+  }
+
+  /**
+   * @return The Set of TopicPartitions for a given topic
+   */
+  def fetchTopicPartitions(): Set[TopicPartition] = runUninterruptibly {
+    assert(Thread.currentThread().isInstanceOf[UninterruptibleThread])
+    // Poll to get the latest assigned partitions
+    consumer.poll(0)
+    val partitions = consumer.assignment()
+    consumer.pause(partitions)
+    partitions.asScala.toSet
+  }
+
+  /**
+   * Resolves the specific offsets based on Kafka seek positions.
+   * This method resolves offset value -1 to the latest and -2 to the
+   * earliest Kafka seek position.
+   */
+  def fetchSpecificOffsets(
+      partitionOffsets: Map[TopicPartition, Long]): Map[TopicPartition, Long] =
+    runUninterruptibly {
+      withRetriesWithoutInterrupt {
+        // Poll to get the latest assigned partitions
+        consumer.poll(0)
+        val partitions = consumer.assignment()
+        consumer.pause(partitions)
+        assert(partitions.asScala == partitionOffsets.keySet,
+          "If startingOffsets contains specific offsets, you must specify all TopicPartitions.\n" +
+            "Use -1 for latest, -2 for earliest, if you don't care.\n" +
+            s"Specified: ${partitionOffsets.keySet} Assigned: ${partitions.asScala}")
+        logDebug(s"Partitions assigned to consumer: $partitions. Seeking to $partitionOffsets")
+
+        partitionOffsets.foreach {
+          case (tp, KafkaOffsetRangeLimit.LATEST) =>
+            consumer.seekToEnd(ju.Arrays.asList(tp))
+          case (tp, KafkaOffsetRangeLimit.EARLIEST) =>
+            consumer.seekToBeginning(ju.Arrays.asList(tp))
+          case (tp, off) => consumer.seek(tp, off)
+        }
+        partitionOffsets.map {
+          case (tp, _) => tp -> consumer.position(tp)
+        }
+      }
+    }
+
+  /**
+   * Fetch the earliest offsets for the topic partitions that are indicated
+   * in the [[ConsumerStrategy]].
+   */
+  def fetchEarliestOffsets(): Map[TopicPartition, Long] = runUninterruptibly {
+    withRetriesWithoutInterrupt {
+      // Poll to get the latest assigned partitions
+      consumer.poll(0)
+      val partitions = consumer.assignment()
+      consumer.pause(partitions)
+      logDebug(s"Partitions assigned to consumer: $partitions. Seeking to the beginning")
+
+      consumer.seekToBeginning(partitions)
+      val partitionOffsets = partitions.asScala.map(p => p -> consumer.position(p)).toMap
+      logDebug(s"Got earliest offsets for partition : $partitionOffsets")
+      partitionOffsets
+    }
+  }
+
+  /**
+   * Fetch the latest offsets for the topic partitions that are indicated
+   * in the [[ConsumerStrategy]].
+   */
+  def fetchLatestOffsets(): Map[TopicPartition, Long] = runUninterruptibly {
+    withRetriesWithoutInterrupt {
+      // Poll to get the latest assigned partitions
+      consumer.poll(0)
+      val partitions = consumer.assignment()
+      consumer.pause(partitions)
+      logDebug(s"Partitions assigned to consumer: $partitions. Seeking to the end.")
+
+      consumer.seekToEnd(partitions)
+      val partitionOffsets = partitions.asScala.map(p => p -> consumer.position(p)).toMap
+      logDebug(s"Got latest offsets for partition : $partitionOffsets")
+      partitionOffsets
+    }
+  }
+
+  /**
+   * Fetch the earliest offsets for specific topic partitions.
+   * The return result may not contain some partitions if they are deleted.
+   */
+  def fetchEarliestOffsets(
+      newPartitions: Seq[TopicPartition]): Map[TopicPartition, Long] = {
+    if (newPartitions.isEmpty) {
+      Map.empty[TopicPartition, Long]
+    } else {
+      runUninterruptibly {
+        withRetriesWithoutInterrupt {
+          // Poll to get the latest assigned partitions
+          consumer.poll(0)
+          val partitions = consumer.assignment()
+          consumer.pause(partitions)
+          logDebug(s"\tPartitions assigned to consumer: $partitions")
+
+          // Get the earliest offset of each partition
+          consumer.seekToBeginning(partitions)
+          val partitionOffsets = newPartitions.filter { p =>
+            // When deleting topics happen at the same time, some partitions may not be in
+            // `partitions`. So we need to ignore them
+            partitions.contains(p)
+          }.map(p => p -> consumer.position(p)).toMap
+          logDebug(s"Got earliest offsets for new partitions: $partitionOffsets")
+          partitionOffsets
+        }
+      }
+    }
+  }
+
+  /**
+   * This method ensures that the closure is called in an [[UninterruptibleThread]].
+   * This is required when communicating with the [[KafkaConsumer]]. In the case
+   * of streaming queries, we are already running in an [[UninterruptibleThread]],
+   * however for batch mode this is not the case.
+   */
+  private def runUninterruptibly[T](body: => T): T = {
+    if (!Thread.currentThread.isInstanceOf[UninterruptibleThread]) {
+      val future = Future {
+        body
+      }(execContext)
+      ThreadUtils.awaitResult(future, Duration.Inf)
+    } else {
+      body
+    }
+  }
+
+  /**
+   * Helper function that does multiple retries on a body of code that returns offsets.
+   * Retries are needed to handle transient failures. For e.g. race conditions between getting
+   * assignment and getting position while topics/partitions are deleted can cause NPEs.
+   *
+   * This method also makes sure `body` won't be interrupted to workaround a potential issue in
+   * `KafkaConsumer.poll`. (KAFKA-1894)
+   */
+  private def withRetriesWithoutInterrupt(
+      body: => Map[TopicPartition, Long]): Map[TopicPartition, Long] = {
+    // Make sure `KafkaConsumer.poll` won't be interrupted (KAFKA-1894)
+    assert(Thread.currentThread().isInstanceOf[UninterruptibleThread])
+
+    synchronized {
+      var result: Option[Map[TopicPartition, Long]] = None
+      var attempt = 1
+      var lastException: Throwable = null
+      while (result.isEmpty && attempt <= maxOffsetFetchAttempts
+        && !Thread.currentThread().isInterrupted) {
+        Thread.currentThread match {
+          case ut: UninterruptibleThread =>
+            // "KafkaConsumer.poll" may hang forever if the thread is interrupted (E.g., the query
+            // is stopped)(KAFKA-1894). Hence, we just make sure we don't interrupt it.
+            //
+            // If the broker addresses are wrong, or Kafka cluster is down, "KafkaConsumer.poll" may
+            // hang forever as well. This cannot be resolved in KafkaSource until Kafka fixes the
+            // issue.
+            ut.runUninterruptibly {
+              try {
+                result = Some(body)
+              } catch {
+                case NonFatal(e) =>
+                  lastException = e
+                  logWarning(s"Error in attempt $attempt getting Kafka offsets: ", e)
+                  attempt += 1
+                  Thread.sleep(offsetFetchAttemptIntervalMs)
+                  resetConsumer()
+              }
+            }
+          case _ =>
+            throw new IllegalStateException(
+              "Kafka APIs must be executed on a o.a.spark.util.UninterruptibleThread")
+        }
+      }
+      if (Thread.interrupted()) {
+        throw new InterruptedException()
+      }
+      if (result.isEmpty) {
+        assert(attempt > maxOffsetFetchAttempts)
+        assert(lastException != null)
+        throw lastException
+      }
+      result.get
+    }
+  }
+
+  /**
+   * Create a consumer using the new generated group id. We always use a new consumer to avoid
+   * just using a broken consumer to retry on Kafka errors, which likely will fail again.
+   */
+  private def createConsumer(): Consumer[Array[Byte], Array[Byte]] = synchronized {
+    val newKafkaParams = new ju.HashMap[String, Object](driverKafkaParams)
+    newKafkaParams.put(ConsumerConfig.GROUP_ID_CONFIG, nextGroupId())
+    consumerStrategy.createConsumer(newKafkaParams)
+  }
+
+  private def resetConsumer(): Unit = synchronized {
+    consumer.close()
+    consumer = createConsumer()
+  }
+}
+
+private[kafka010] object KafkaOffsetReader {
+
+  def kafkaSchema: StructType = StructType(Seq(
+    StructField("key", BinaryType),
+    StructField("value", BinaryType),
+    StructField("topic", StringType),
+    StructField("partition", IntegerType),
+    StructField("offset", LongType),
+    StructField("timestamp", TimestampType),
+    StructField("timestampType", IntegerType)
+  ))
+}
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
new file mode 100644
index 0000000000000..f180bbad6e363
--- /dev/null
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import java.{util => ju}
+
+import org.apache.kafka.common.TopicPartition
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{Row, SQLContext}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.sources.{BaseRelation, TableScan}
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.unsafe.types.UTF8String
+
+
+private[kafka010] class KafkaRelation(
+    override val sqlContext: SQLContext,
+    kafkaReader: KafkaOffsetReader,
+    executorKafkaParams: ju.Map[String, Object],
+    sourceOptions: Map[String, String],
+    failOnDataLoss: Boolean,
+    startingOffsets: KafkaOffsetRangeLimit,
+    endingOffsets: KafkaOffsetRangeLimit)
+    extends BaseRelation with TableScan with Logging {
+  assert(startingOffsets != LatestOffsetRangeLimit,
+    "Starting offset not allowed to be set to latest offsets.")
+  assert(endingOffsets != EarliestOffsetRangeLimit,
+    "Ending offset not allowed to be set to earliest offsets.")
+
+  private val pollTimeoutMs = sourceOptions.getOrElse(
+    "kafkaConsumer.pollTimeoutMs",
+    sqlContext.sparkContext.conf.getTimeAsMs("spark.network.timeout", "120s").toString
+  ).toLong
+
+  override def schema: StructType = KafkaOffsetReader.kafkaSchema
+
+  override def buildScan(): RDD[Row] = {
+    // Leverage the KafkaReader to obtain the relevant partition offsets
+    val fromPartitionOffsets = getPartitionOffsets(startingOffsets)
+    val untilPartitionOffsets = getPartitionOffsets(endingOffsets)
+    // Obtain topicPartitions in both from and until partition offset, ignoring
+    // topic partitions that were added and/or deleted between the two above calls.
+    if (fromPartitionOffsets.keySet != untilPartitionOffsets.keySet) {
+      implicit val topicOrdering: Ordering[TopicPartition] = Ordering.by(t => t.topic())
+      val fromTopics = fromPartitionOffsets.keySet.toList.sorted.mkString(",")
+      val untilTopics = untilPartitionOffsets.keySet.toList.sorted.mkString(",")
+      throw new IllegalStateException("different topic partitions " +
+        s"for starting offsets topics[${fromTopics}] and " +
+        s"ending offsets topics[${untilTopics}]")
+    }
+
+    // Calculate offset ranges
+    val offsetRanges = untilPartitionOffsets.keySet.map { tp =>
+      val fromOffset = fromPartitionOffsets.get(tp).getOrElse {
+          // This should not happen since topicPartitions contains all partitions not in
+          // fromPartitionOffsets
+          throw new IllegalStateException(s"$tp doesn't have a from offset")
+      }
+      val untilOffset = untilPartitionOffsets(tp)
+      KafkaSourceRDDOffsetRange(tp, fromOffset, untilOffset, None)
+    }.toArray
+
+    logInfo("GetBatch generating RDD of offset range: " +
+      offsetRanges.sortBy(_.topicPartition.toString).mkString(", "))
+
+    // Create an RDD that reads from Kafka and get the (key, value) pair as byte arrays.
+    val rdd = new KafkaSourceRDD(
+      sqlContext.sparkContext, executorKafkaParams, offsetRanges,
+      pollTimeoutMs, failOnDataLoss, reuseKafkaConsumer = false).map { cr =>
+      InternalRow(
+        cr.key,
+        cr.value,
+        UTF8String.fromString(cr.topic),
+        cr.partition,
+        cr.offset,
+        DateTimeUtils.fromJavaTimestamp(new java.sql.Timestamp(cr.timestamp)),
+        cr.timestampType.id)
+    }
+    sqlContext.internalCreateDataFrame(rdd, schema).rdd
+  }
+
+  private def getPartitionOffsets(
+      kafkaOffsets: KafkaOffsetRangeLimit): Map[TopicPartition, Long] = {
+    def validateTopicPartitions(partitions: Set[TopicPartition],
+      partitionOffsets: Map[TopicPartition, Long]): Map[TopicPartition, Long] = {
+      assert(partitions == partitionOffsets.keySet,
+        "If startingOffsets contains specific offsets, you must specify all TopicPartitions.\n" +
+          "Use -1 for latest, -2 for earliest, if you don't care.\n" +
+          s"Specified: ${partitionOffsets.keySet} Assigned: ${partitions}")
+      logDebug(s"Partitions assigned to consumer: $partitions. Seeking to $partitionOffsets")
+      partitionOffsets
+    }
+    val partitions = kafkaReader.fetchTopicPartitions()
+    // Obtain TopicPartition offsets with late binding support
+    kafkaOffsets match {
+      case EarliestOffsetRangeLimit => partitions.map {
+        case tp => tp -> KafkaOffsetRangeLimit.EARLIEST
+      }.toMap
+      case LatestOffsetRangeLimit => partitions.map {
+        case tp => tp -> KafkaOffsetRangeLimit.LATEST
+      }.toMap
+      case SpecificOffsetRangeLimit(partitionOffsets) =>
+        validateTopicPartitions(partitions, partitionOffsets)
+    }
+  }
+}
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
index 43b8d9d6d7eef..02b23111af788 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
@@ -21,11 +21,6 @@ import java.{util => ju}
 import java.io._
 import java.nio.charset.StandardCharsets
 
-import scala.collection.JavaConverters._
-import scala.util.control.NonFatal
-
-import org.apache.kafka.clients.consumer.{Consumer, ConsumerConfig, KafkaConsumer, OffsetOutOfRangeException}
-import org.apache.kafka.clients.consumer.internals.NoOpConsumerRebalanceListener
 import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.SparkContext
@@ -38,11 +33,9 @@ import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.kafka010.KafkaSource._
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
-import org.apache.spark.util.UninterruptibleThread
 
 /**
- * A [[Source]] that uses Kafka's own [[KafkaConsumer]] API to reads data from Kafka. The design
- * for this source is as follows.
+ * A [[Source]] that reads data from Kafka using the following design.
  *
  * - The [[KafkaSourceOffset]] is the custom [[Offset]] defined for this source that contains
  *   a map of TopicPartition -> offset. Note that this offset is 1 + (available offset). For
@@ -50,20 +43,14 @@ import org.apache.spark.util.UninterruptibleThread
  *   KafkaSourceOffset will contain TopicPartition("t", 2) -> 6. This is done keep it consistent
  *   with the semantics of `KafkaConsumer.position()`.
  *
- * - The [[ConsumerStrategy]] class defines which Kafka topics and partitions should be read
- *   by this source. These strategies directly correspond to the different consumption options
- *   in . This class is designed to return a configured [[KafkaConsumer]] that is used by the
- *   [[KafkaSource]] to query for the offsets. See the docs on
- *   [[org.apache.spark.sql.kafka010.KafkaSource.ConsumerStrategy]] for more details.
- *
  * - The [[KafkaSource]] written to do the following.
  *
- *  - As soon as the source is created, the pre-configured KafkaConsumer returned by the
- *    [[ConsumerStrategy]] is used to query the initial offsets that this source should
- *    start reading from. This used to create the first batch.
+ *  - As soon as the source is created, the pre-configured [[KafkaOffsetReader]]
+ *    is used to query the initial offsets that this source should
+ *    start reading from. This is used to create the first batch.
  *
- *   - `getOffset()` uses the KafkaConsumer to query the latest available offsets, which are
- *     returned as a [[KafkaSourceOffset]].
+ *   - `getOffset()` uses the [[KafkaOffsetReader]] to query the latest
+ *      available offsets, which are returned as a [[KafkaSourceOffset]].
  *
  *   - `getBatch()` returns a DF that reads from the 'start offset' until the 'end offset' in
  *     for each partition. The end offset is excluded to be consistent with the semantics of
@@ -82,15 +69,13 @@ import org.apache.spark.util.UninterruptibleThread
  * and not use wrong broker addresses.
  */
 private[kafka010] class KafkaSource(
-    sqlContext: SQLContext,
-    consumerStrategy: ConsumerStrategy,
-    driverKafkaParams: ju.Map[String, Object],
-    executorKafkaParams: ju.Map[String, Object],
-    sourceOptions: Map[String, String],
-    metadataPath: String,
-    startingOffsets: StartingOffsets,
-    failOnDataLoss: Boolean,
-    driverGroupIdPrefix: String)
+                                     sqlContext: SQLContext,
+                                     kafkaReader: KafkaOffsetReader,
+                                     executorKafkaParams: ju.Map[String, Object],
+                                     sourceOptions: Map[String, String],
+                                     metadataPath: String,
+                                     startingOffsets: KafkaOffsetRangeLimit,
+                                     failOnDataLoss: Boolean)
   extends Source with Logging {
 
   private val sc = sqlContext.sparkContext
@@ -100,41 +85,9 @@ private[kafka010] class KafkaSource(
     sc.conf.getTimeAsMs("spark.network.timeout", "120s").toString
   ).toLong
 
-  private val maxOffsetFetchAttempts =
-    sourceOptions.getOrElse("fetchOffset.numRetries", "3").toInt
-
-  private val offsetFetchAttemptIntervalMs =
-    sourceOptions.getOrElse("fetchOffset.retryIntervalMs", "1000").toLong
-
   private val maxOffsetsPerTrigger =
     sourceOptions.get("maxOffsetsPerTrigger").map(_.toLong)
 
-  private var groupId: String = null
-
-  private var nextId = 0
-
-  private def nextGroupId(): String = {
-    groupId = driverGroupIdPrefix + "-" + nextId
-    nextId += 1
-    groupId
-  }
-
-  /**
-   * A KafkaConsumer used in the driver to query the latest Kafka offsets. This only queries the
-   * offsets and never commits them.
-   */
-  private var consumer: Consumer[Array[Byte], Array[Byte]] = createConsumer()
-
-  /**
-   * Create a consumer using the new generated group id. We always use a new consumer to avoid
-   * just using a broken consumer to retry on Kafka errors, which likely will fail again.
-   */
-  private def createConsumer(): Consumer[Array[Byte], Array[Byte]] = synchronized {
-    val newKafkaParams = new ju.HashMap[String, Object](driverKafkaParams)
-    newKafkaParams.put(ConsumerConfig.GROUP_ID_CONFIG, nextGroupId())
-    consumerStrategy.createConsumer(newKafkaParams)
-  }
-
   /**
    * Lazily initialize `initialPartitionOffsets` to make sure that `KafkaConsumer.poll` is only
    * called in StreamExecutionThread. Otherwise, interrupting a thread while running
@@ -159,9 +112,9 @@ private[kafka010] class KafkaSource(
 
     metadataLog.get(0).getOrElse {
       val offsets = startingOffsets match {
-        case EarliestOffsets => KafkaSourceOffset(fetchEarliestOffsets())
-        case LatestOffsets => KafkaSourceOffset(fetchLatestOffsets())
-        case SpecificOffsets(p) => KafkaSourceOffset(fetchSpecificStartingOffsets(p))
+        case EarliestOffsetRangeLimit => KafkaSourceOffset(kafkaReader.fetchEarliestOffsets())
+        case LatestOffsetRangeLimit => KafkaSourceOffset(kafkaReader.fetchLatestOffsets())
+        case SpecificOffsetRangeLimit(p) => fetchAndVerify(p)
       }
       metadataLog.add(0, offsets)
       logInfo(s"Initial offsets: $offsets")
@@ -169,16 +122,31 @@ private[kafka010] class KafkaSource(
     }.partitionToOffsets
   }
 
+  private def fetchAndVerify(specificOffsets: Map[TopicPartition, Long]) = {
+    val result = kafkaReader.fetchSpecificOffsets(specificOffsets)
+    specificOffsets.foreach {
+      case (tp, off) if off != KafkaOffsetRangeLimit.LATEST &&
+          off != KafkaOffsetRangeLimit.EARLIEST =>
+        if (result(tp) != off) {
+          reportDataLoss(
+            s"startingOffsets for $tp was $off but consumer reset to ${result(tp)}")
+        }
+      case _ =>
+      // no real way to check that beginning or end is reasonable
+    }
+    KafkaSourceOffset(result)
+  }
+
   private var currentPartitionOffsets: Option[Map[TopicPartition, Long]] = None
 
-  override def schema: StructType = KafkaSource.kafkaSchema
+  override def schema: StructType = KafkaOffsetReader.kafkaSchema
 
   /** Returns the maximum available offset for this source. */
   override def getOffset: Option[Offset] = {
     // Make sure initialPartitionOffsets is initialized
     initialPartitionOffsets
 
-    val latest = fetchLatestOffsets()
+    val latest = kafkaReader.fetchLatestOffsets()
     val offsets = maxOffsetsPerTrigger match {
       case None =>
         latest
@@ -193,17 +161,12 @@ private[kafka010] class KafkaSource(
     Some(KafkaSourceOffset(offsets))
   }
 
-  private def resetConsumer(): Unit = synchronized {
-    consumer.close()
-    consumer = createConsumer()
-  }
-
   /** Proportionally distribute limit number of offsets among topicpartitions */
   private def rateLimit(
       limit: Long,
       from: Map[TopicPartition, Long],
       until: Map[TopicPartition, Long]): Map[TopicPartition, Long] = {
-    val fromNew = fetchNewPartitionEarliestOffsets(until.keySet.diff(from.keySet).toSeq)
+    val fromNew = kafkaReader.fetchEarliestOffsets(until.keySet.diff(from.keySet).toSeq)
     val sizes = until.flatMap {
       case (tp, end) =>
         // If begin isn't defined, something's wrong, but let alert logic in getBatch handle it
@@ -253,7 +216,7 @@ private[kafka010] class KafkaSource(
 
     // Find the new partitions, and get their earliest offsets
     val newPartitions = untilPartitionOffsets.keySet.diff(fromPartitionOffsets.keySet)
-    val newPartitionOffsets = fetchNewPartitionEarliestOffsets(newPartitions.toSeq)
+    val newPartitionOffsets = kafkaReader.fetchEarliestOffsets(newPartitions.toSeq)
     if (newPartitionOffsets.keySet != newPartitions) {
       // We cannot get from offsets for some partitions. It means they got deleted.
       val deletedPartitions = newPartitions.diff(newPartitionOffsets.keySet)
@@ -311,7 +274,8 @@ private[kafka010] class KafkaSource(
 
     // Create an RDD that reads from Kafka and get the (key, value) pair as byte arrays.
     val rdd = new KafkaSourceRDD(
-      sc, executorKafkaParams, offsetRanges, pollTimeoutMs, failOnDataLoss).map { cr =>
+      sc, executorKafkaParams, offsetRanges, pollTimeoutMs, failOnDataLoss,
+      reuseKafkaConsumer = true).map { cr =>
       InternalRow(
         cr.key,
         cr.value,
@@ -335,163 +299,10 @@ private[kafka010] class KafkaSource(
 
   /** Stop this source and free any resources it has allocated. */
   override def stop(): Unit = synchronized {
-    consumer.close()
+    kafkaReader.close()
   }
 
-  override def toString(): String = s"KafkaSource[$consumerStrategy]"
-
-  /**
-   * Set consumer position to specified offsets, making sure all assignments are set.
-   */
-  private def fetchSpecificStartingOffsets(
-      partitionOffsets: Map[TopicPartition, Long]): Map[TopicPartition, Long] = {
-    val result = withRetriesWithoutInterrupt {
-      // Poll to get the latest assigned partitions
-      consumer.poll(0)
-      val partitions = consumer.assignment()
-      consumer.pause(partitions)
-      assert(partitions.asScala == partitionOffsets.keySet,
-        "If startingOffsets contains specific offsets, you must specify all TopicPartitions.\n" +
-          "Use -1 for latest, -2 for earliest, if you don't care.\n" +
-          s"Specified: ${partitionOffsets.keySet} Assigned: ${partitions.asScala}")
-      logDebug(s"Partitions assigned to consumer: $partitions. Seeking to $partitionOffsets")
-
-      partitionOffsets.foreach {
-        case (tp, -1) => consumer.seekToEnd(ju.Arrays.asList(tp))
-        case (tp, -2) => consumer.seekToBeginning(ju.Arrays.asList(tp))
-        case (tp, off) => consumer.seek(tp, off)
-      }
-      partitionOffsets.map {
-        case (tp, _) => tp -> consumer.position(tp)
-      }
-    }
-    partitionOffsets.foreach {
-      case (tp, off) if off != -1 && off != -2 =>
-        if (result(tp) != off) {
-          reportDataLoss(
-            s"startingOffsets for $tp was $off but consumer reset to ${result(tp)}")
-        }
-      case _ =>
-        // no real way to check that beginning or end is reasonable
-    }
-    result
-  }
-
-  /**
-   * Fetch the earliest offsets of partitions.
-   */
-  private def fetchEarliestOffsets(): Map[TopicPartition, Long] = withRetriesWithoutInterrupt {
-    // Poll to get the latest assigned partitions
-    consumer.poll(0)
-    val partitions = consumer.assignment()
-    consumer.pause(partitions)
-    logDebug(s"Partitions assigned to consumer: $partitions. Seeking to the beginning")
-
-    consumer.seekToBeginning(partitions)
-    val partitionOffsets = partitions.asScala.map(p => p -> consumer.position(p)).toMap
-    logDebug(s"Got earliest offsets for partition : $partitionOffsets")
-    partitionOffsets
-  }
-
-  /**
-   * Fetch the latest offset of partitions.
-   */
-  private def fetchLatestOffsets(): Map[TopicPartition, Long] = withRetriesWithoutInterrupt {
-    // Poll to get the latest assigned partitions
-    consumer.poll(0)
-    val partitions = consumer.assignment()
-    consumer.pause(partitions)
-    logDebug(s"Partitions assigned to consumer: $partitions. Seeking to the end.")
-
-    consumer.seekToEnd(partitions)
-    val partitionOffsets = partitions.asScala.map(p => p -> consumer.position(p)).toMap
-    logDebug(s"Got latest offsets for partition : $partitionOffsets")
-    partitionOffsets
-  }
-
-  /**
-   * Fetch the earliest offsets for newly discovered partitions. The return result may not contain
-   * some partitions if they are deleted.
-   */
-  private def fetchNewPartitionEarliestOffsets(
-      newPartitions: Seq[TopicPartition]): Map[TopicPartition, Long] =
-    if (newPartitions.isEmpty) {
-      Map.empty[TopicPartition, Long]
-    } else {
-      withRetriesWithoutInterrupt {
-        // Poll to get the latest assigned partitions
-        consumer.poll(0)
-        val partitions = consumer.assignment()
-        consumer.pause(partitions)
-        logDebug(s"\tPartitions assigned to consumer: $partitions")
-
-        // Get the earliest offset of each partition
-        consumer.seekToBeginning(partitions)
-        val partitionOffsets = newPartitions.filter { p =>
-          // When deleting topics happen at the same time, some partitions may not be in
-          // `partitions`. So we need to ignore them
-          partitions.contains(p)
-        }.map(p => p -> consumer.position(p)).toMap
-        logDebug(s"Got earliest offsets for new partitions: $partitionOffsets")
-        partitionOffsets
-      }
-    }
-
-  /**
-   * Helper function that does multiple retries on the a body of code that returns offsets.
-   * Retries are needed to handle transient failures. For e.g. race conditions between getting
-   * assignment and getting position while topics/partitions are deleted can cause NPEs.
-   *
-   * This method also makes sure `body` won't be interrupted to workaround a potential issue in
-   * `KafkaConsumer.poll`. (KAFKA-1894)
-   */
-  private def withRetriesWithoutInterrupt(
-      body: => Map[TopicPartition, Long]): Map[TopicPartition, Long] = {
-    // Make sure `KafkaConsumer.poll` won't be interrupted (KAFKA-1894)
-    assert(Thread.currentThread().isInstanceOf[StreamExecutionThread])
-
-    synchronized {
-      var result: Option[Map[TopicPartition, Long]] = None
-      var attempt = 1
-      var lastException: Throwable = null
-      while (result.isEmpty && attempt <= maxOffsetFetchAttempts
-        && !Thread.currentThread().isInterrupted) {
-        Thread.currentThread match {
-          case ut: UninterruptibleThread =>
-            // "KafkaConsumer.poll" may hang forever if the thread is interrupted (E.g., the query
-            // is stopped)(KAFKA-1894). Hence, we just make sure we don't interrupt it.
-            //
-            // If the broker addresses are wrong, or Kafka cluster is down, "KafkaConsumer.poll" may
-            // hang forever as well. This cannot be resolved in KafkaSource until Kafka fixes the
-            // issue.
-            ut.runUninterruptibly {
-              try {
-                result = Some(body)
-              } catch {
-                case NonFatal(e) =>
-                  lastException = e
-                  logWarning(s"Error in attempt $attempt getting Kafka offsets: ", e)
-                  attempt += 1
-                  Thread.sleep(offsetFetchAttemptIntervalMs)
-                  resetConsumer()
-              }
-            }
-          case _ =>
-            throw new IllegalStateException(
-              "Kafka APIs must be executed on a o.a.spark.util.UninterruptibleThread")
-        }
-      }
-      if (Thread.interrupted()) {
-        throw new InterruptedException()
-      }
-      if (result.isEmpty) {
-        assert(attempt > maxOffsetFetchAttempts)
-        assert(lastException != null)
-        throw lastException
-      }
-      result.get
-    }
-  }
+  override def toString(): String = s"KafkaSource[$kafkaReader]"
 
   /**
    * If `failOnDataLoss` is true, this method will throw an `IllegalStateException`.
@@ -506,10 +317,8 @@ private[kafka010] class KafkaSource(
   }
 }
 
-
 /** Companion object for the [[KafkaSource]]. */
 private[kafka010] object KafkaSource {
-
   val INSTRUCTION_FOR_FAIL_ON_DATA_LOSS_FALSE =
     """
       |Some data may have been lost because they are not available in Kafka any more; either the
@@ -526,57 +335,7 @@ private[kafka010] object KafkaSource {
       | source option "failOnDataLoss" to "false".
     """.stripMargin
 
-  def kafkaSchema: StructType = StructType(Seq(
-    StructField("key", BinaryType),
-    StructField("value", BinaryType),
-    StructField("topic", StringType),
-    StructField("partition", IntegerType),
-    StructField("offset", LongType),
-    StructField("timestamp", TimestampType),
-    StructField("timestampType", IntegerType)
-  ))
-
-  sealed trait ConsumerStrategy {
-    def createConsumer(kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]]
-  }
-
-  case class AssignStrategy(partitions: Array[TopicPartition]) extends ConsumerStrategy {
-    override def createConsumer(
-        kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]] = {
-      val consumer = new KafkaConsumer[Array[Byte], Array[Byte]](kafkaParams)
-      consumer.assign(ju.Arrays.asList(partitions: _*))
-      consumer
-    }
-
-    override def toString: String = s"Assign[${partitions.mkString(", ")}]"
-  }
-
-  case class SubscribeStrategy(topics: Seq[String]) extends ConsumerStrategy {
-    override def createConsumer(
-        kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]] = {
-      val consumer = new KafkaConsumer[Array[Byte], Array[Byte]](kafkaParams)
-      consumer.subscribe(topics.asJava)
-      consumer
-    }
-
-    override def toString: String = s"Subscribe[${topics.mkString(", ")}]"
-  }
-
-  case class SubscribePatternStrategy(topicPattern: String)
-    extends ConsumerStrategy {
-    override def createConsumer(
-        kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]] = {
-      val consumer = new KafkaConsumer[Array[Byte], Array[Byte]](kafkaParams)
-      consumer.subscribe(
-        ju.regex.Pattern.compile(topicPattern),
-        new NoOpConsumerRebalanceListener())
-      consumer
-    }
-
-    override def toString: String = s"SubscribePattern[$topicPattern]"
-  }
-
-  private def getSortedExecutorList(sc: SparkContext): Array[String] = {
+  def getSortedExecutorList(sc: SparkContext): Array[String] = {
     val bm = sc.env.blockManager
     bm.master.getPeers(bm.blockManagerId).toArray
       .map(x => ExecutorCacheTaskLocation(x.host, x.executorId))
@@ -588,5 +347,5 @@ private[kafka010] object KafkaSource {
     if (a.host == b.host) { a.executorId > b.executorId } else { a.host > b.host }
   }
 
-  private def floorMod(a: Long, b: Int): Int = ((a % b).toInt + b) % b
+  def floorMod(a: Long, b: Int): Int = ((a % b).toInt + b) % b
 }
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index aa01238f91247..597c99e093a42 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -28,8 +28,7 @@ import org.apache.kafka.common.serialization.ByteArrayDeserializer
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.execution.streaming.Source
-import org.apache.spark.sql.kafka010.KafkaSource._
-import org.apache.spark.sql.sources.{DataSourceRegister, StreamSourceProvider}
+import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -37,11 +36,12 @@ import org.apache.spark.sql.types.StructType
  * IllegalArgumentException when the Kafka Dataset is created, so that it can catch
  * missing options even before the query is started.
  */
-private[kafka010] class KafkaSourceProvider extends StreamSourceProvider
-  with DataSourceRegister with Logging {
-
+private[kafka010] class KafkaSourceProvider extends DataSourceRegister with StreamSourceProvider
+  with RelationProvider with Logging {
   import KafkaSourceProvider._
 
+  override def shortName(): String = "kafka"
+
   /**
    * Returns the name and schema of the source. In addition, it also verifies whether the options
    * are correct and sufficient to create the [[KafkaSource]] when the query is started.
@@ -51,9 +51,9 @@ private[kafka010] class KafkaSourceProvider extends StreamSourceProvider
       schema: Option[StructType],
       providerName: String,
       parameters: Map[String, String]): (String, StructType) = {
+    validateStreamOptions(parameters)
     require(schema.isEmpty, "Kafka source has a fixed schema and cannot be set with a custom one")
-    validateOptions(parameters)
-    ("kafka", KafkaSource.kafkaSchema)
+    (shortName(), KafkaOffsetReader.kafkaSchema)
   }
 
   override def createSource(
@@ -62,7 +62,12 @@ private[kafka010] class KafkaSourceProvider extends StreamSourceProvider
       schema: Option[StructType],
       providerName: String,
       parameters: Map[String, String]): Source = {
-      validateOptions(parameters)
+    validateStreamOptions(parameters)
+    // Each running query should use its own group id. Otherwise, the query may be only assigned
+    // partial data since Kafka will assign partitions to multiple consumers having the same group
+    // id. Hence, we should generate a unique id for each query.
+    val uniqueGroupId = s"spark-kafka-source-${UUID.randomUUID}-${metadataPath.hashCode}"
+
     val caseInsensitiveParams = parameters.map { case (k, v) => (k.toLowerCase, v) }
     val specifiedKafkaParams =
       parameters
@@ -71,94 +76,145 @@ private[kafka010] class KafkaSourceProvider extends StreamSourceProvider
         .map { k => k.drop(6).toString -> parameters(k) }
         .toMap
 
-    val deserClassName = classOf[ByteArrayDeserializer].getName
-    // Each running query should use its own group id. Otherwise, the query may be only assigned
-    // partial data since Kafka will assign partitions to multiple consumers having the same group
-    // id. Hence, we should generate a unique id for each query.
-    val uniqueGroupId = s"spark-kafka-source-${UUID.randomUUID}-${metadataPath.hashCode}"
-
-    val startingOffsets =
+    val startingStreamOffsets =
       caseInsensitiveParams.get(STARTING_OFFSETS_OPTION_KEY).map(_.trim.toLowerCase) match {
-        case Some("latest") => LatestOffsets
-        case Some("earliest") => EarliestOffsets
-        case Some(json) => SpecificOffsets(JsonUtils.partitionOffsets(json))
-        case None => LatestOffsets
+        case Some("latest") => LatestOffsetRangeLimit
+        case Some("earliest") => EarliestOffsetRangeLimit
+        case Some(json) => SpecificOffsetRangeLimit(JsonUtils.partitionOffsets(json))
+        case None => LatestOffsetRangeLimit
       }
 
-    val kafkaParamsForDriver =
-      ConfigUpdater("source", specifiedKafkaParams)
-        .set(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, deserClassName)
-        .set(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, deserClassName)
-
-        // Set to "earliest" to avoid exceptions. However, KafkaSource will fetch the initial
-        // offsets by itself instead of counting on KafkaConsumer.
-        .set(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
-
-        // So that consumers in the driver does not commit offsets unnecessarily
-        .set(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false")
-
-        // So that the driver does not pull too much data
-        .set(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, new java.lang.Integer(1))
-
-        // If buffer config is not set, set it to reasonable value to work around
-        // buffer issues (see KAFKA-3135)
-        .setIfUnset(ConsumerConfig.RECEIVE_BUFFER_CONFIG, 65536: java.lang.Integer)
-        .build()
-
-    val kafkaParamsForExecutors =
-      ConfigUpdater("executor", specifiedKafkaParams)
-        .set(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, deserClassName)
-        .set(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, deserClassName)
-
-        // Make sure executors do only what the driver tells them.
-        .set(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "none")
+    val kafkaOffsetReader = new KafkaOffsetReader(
+      strategy(caseInsensitiveParams),
+      kafkaParamsForDriver(specifiedKafkaParams),
+      parameters,
+      driverGroupIdPrefix = s"$uniqueGroupId-driver")
 
-        // So that consumers in executors do not mess with any existing group id
-        .set(ConsumerConfig.GROUP_ID_CONFIG, s"$uniqueGroupId-executor")
+    new KafkaSource(
+      sqlContext,
+      kafkaOffsetReader,
+      kafkaParamsForExecutors(specifiedKafkaParams, uniqueGroupId),
+      parameters,
+      metadataPath,
+      startingStreamOffsets,
+      failOnDataLoss(caseInsensitiveParams))
+  }
 
-        // So that consumers in executors does not commit offsets unnecessarily
-        .set(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false")
+  /**
+   * Returns a new base relation with the given parameters.
+   *
+   * @note The parameters' keywords are case insensitive and this insensitivity is enforced
+   *       by the Map that is passed to the function.
+   */
+  override def createRelation(
+      sqlContext: SQLContext,
+      parameters: Map[String, String]): BaseRelation = {
+    validateBatchOptions(parameters)
+    // Each running query should use its own group id. Otherwise, the query may be only assigned
+    // partial data since Kafka will assign partitions to multiple consumers having the same group
+    // id. Hence, we should generate a unique id for each query.
+    val uniqueGroupId = s"spark-kafka-relation-${UUID.randomUUID}"
+    val caseInsensitiveParams = parameters.map { case (k, v) => (k.toLowerCase, v) }
+    val specifiedKafkaParams =
+      parameters
+        .keySet
+        .filter(_.toLowerCase.startsWith("kafka."))
+        .map { k => k.drop(6).toString -> parameters(k) }
+        .toMap
 
-        // If buffer config is not set, set it to reasonable value to work around
-        // buffer issues (see KAFKA-3135)
-        .setIfUnset(ConsumerConfig.RECEIVE_BUFFER_CONFIG, 65536: java.lang.Integer)
-        .build()
+    val startingRelationOffsets =
+      caseInsensitiveParams.get(STARTING_OFFSETS_OPTION_KEY).map(_.trim.toLowerCase) match {
+        case Some("earliest") => EarliestOffsetRangeLimit
+        case Some(json) => SpecificOffsetRangeLimit(JsonUtils.partitionOffsets(json))
+        case None => EarliestOffsetRangeLimit
+      }
 
-    val strategy = caseInsensitiveParams.find(x => STRATEGY_OPTION_KEYS.contains(x._1)).get match {
-      case ("assign", value) =>
-        AssignStrategy(JsonUtils.partitions(value))
-      case ("subscribe", value) =>
-        SubscribeStrategy(value.split(",").map(_.trim()).filter(_.nonEmpty))
-      case ("subscribepattern", value) =>
-        SubscribePatternStrategy(value.trim())
-      case _ =>
-        // Should never reach here as we are already matching on
-        // matched strategy names
-        throw new IllegalArgumentException("Unknown option")
-    }
+    val endingRelationOffsets =
+      caseInsensitiveParams.get(ENDING_OFFSETS_OPTION_KEY).map(_.trim.toLowerCase) match {
+        case Some("latest") => LatestOffsetRangeLimit
+        case Some(json) => SpecificOffsetRangeLimit(JsonUtils.partitionOffsets(json))
+        case None => LatestOffsetRangeLimit
+      }
 
-    val failOnDataLoss =
-      caseInsensitiveParams.getOrElse(FAIL_ON_DATA_LOSS_OPTION_KEY, "true").toBoolean
+    val kafkaOffsetReader = new KafkaOffsetReader(
+      strategy(caseInsensitiveParams),
+      kafkaParamsForDriver(specifiedKafkaParams),
+      parameters,
+      driverGroupIdPrefix = s"$uniqueGroupId-driver")
 
-    new KafkaSource(
+    new KafkaRelation(
       sqlContext,
-      strategy,
-      kafkaParamsForDriver,
-      kafkaParamsForExecutors,
+      kafkaOffsetReader,
+      kafkaParamsForExecutors(specifiedKafkaParams, uniqueGroupId),
       parameters,
-      metadataPath,
-      startingOffsets,
-      failOnDataLoss,
-      driverGroupIdPrefix = s"$uniqueGroupId-driver")
+      failOnDataLoss(caseInsensitiveParams),
+      startingRelationOffsets,
+      endingRelationOffsets)
   }
 
-  private def validateOptions(parameters: Map[String, String]): Unit = {
+  private def kafkaParamsForDriver(specifiedKafkaParams: Map[String, String]) =
+    ConfigUpdater("source", specifiedKafkaParams)
+      .set(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, deserClassName)
+      .set(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, deserClassName)
+
+      // Set to "earliest" to avoid exceptions. However, KafkaSource will fetch the initial
+      // offsets by itself instead of counting on KafkaConsumer.
+      .set(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
+
+      // So that consumers in the driver does not commit offsets unnecessarily
+      .set(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false")
+
+      // So that the driver does not pull too much data
+      .set(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, new java.lang.Integer(1))
+
+      // If buffer config is not set, set it to reasonable value to work around
+      // buffer issues (see KAFKA-3135)
+      .setIfUnset(ConsumerConfig.RECEIVE_BUFFER_CONFIG, 65536: java.lang.Integer)
+      .build()
+
+  private def kafkaParamsForExecutors(
+      specifiedKafkaParams: Map[String, String], uniqueGroupId: String) =
+    ConfigUpdater("executor", specifiedKafkaParams)
+      .set(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, deserClassName)
+      .set(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, deserClassName)
+
+      // Make sure executors do only what the driver tells them.
+      .set(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "none")
+
+      // So that consumers in executors do not mess with any existing group id
+      .set(ConsumerConfig.GROUP_ID_CONFIG, s"$uniqueGroupId-executor")
+
+      // So that consumers in executors does not commit offsets unnecessarily
+      .set(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false")
+
+      // If buffer config is not set, set it to reasonable value to work around
+      // buffer issues (see KAFKA-3135)
+      .setIfUnset(ConsumerConfig.RECEIVE_BUFFER_CONFIG, 65536: java.lang.Integer)
+      .build()
+
+  private def strategy(caseInsensitiveParams: Map[String, String]) =
+      caseInsensitiveParams.find(x => STRATEGY_OPTION_KEYS.contains(x._1)).get match {
+    case ("assign", value) =>
+      AssignStrategy(JsonUtils.partitions(value))
+    case ("subscribe", value) =>
+      SubscribeStrategy(value.split(",").map(_.trim()).filter(_.nonEmpty))
+    case ("subscribepattern", value) =>
+      SubscribePatternStrategy(value.trim())
+    case _ =>
+      // Should never reach here as we are already matching on
+      // matched strategy names
+      throw new IllegalArgumentException("Unknown option")
+  }
 
-    // Validate source options
+  private def failOnDataLoss(caseInsensitiveParams: Map[String, String]) =
+    caseInsensitiveParams.getOrElse(FAIL_ON_DATA_LOSS_OPTION_KEY, "true").toBoolean
 
+  private def validateGeneralOptions(parameters: Map[String, String]): Unit = {
+    // Validate source options
     val caseInsensitiveParams = parameters.map { case (k, v) => (k.toLowerCase, v) }
     val specifiedStrategies =
       caseInsensitiveParams.filter { case (k, _) => STRATEGY_OPTION_KEYS.contains(k) }.toSeq
+
     if (specifiedStrategies.isEmpty) {
       throw new IllegalArgumentException(
         "One of the following options must be specified for Kafka source: "
@@ -251,7 +307,52 @@ private[kafka010] class KafkaSourceProvider extends StreamSourceProvider
     }
   }
 
-  override def shortName(): String = "kafka"
+  private def validateStreamOptions(caseInsensitiveParams: Map[String, String]) = {
+    // Stream specific options
+    caseInsensitiveParams.get(ENDING_OFFSETS_OPTION_KEY).map(_ =>
+      throw new IllegalArgumentException("ending offset not valid in streaming queries"))
+    validateGeneralOptions(caseInsensitiveParams)
+  }
+
+  private def validateBatchOptions(caseInsensitiveParams: Map[String, String]) = {
+    // Batch specific options
+    caseInsensitiveParams.get(STARTING_OFFSETS_OPTION_KEY).map(_.trim.toLowerCase) match {
+      case Some("earliest") => // good to go
+      case Some("latest") =>
+        throw new IllegalArgumentException("starting offset can't be latest " +
+          "for batch queries on Kafka")
+      case Some(json) => (SpecificOffsetRangeLimit(JsonUtils.partitionOffsets(json)))
+        .partitionOffsets.foreach {
+          case (tp, off) if off == KafkaOffsetRangeLimit.LATEST =>
+            throw new IllegalArgumentException(s"startingOffsets for $tp can't " +
+              "be latest for batch queries on Kafka")
+          case _ => // ignore
+        }
+      case _ => // default to earliest
+    }
+
+    caseInsensitiveParams.get(ENDING_OFFSETS_OPTION_KEY).map(_.trim.toLowerCase) match {
+      case Some("earliest") =>
+        throw new IllegalArgumentException("ending offset can't be earliest " +
+          "for batch queries on Kafka")
+      case Some("latest") => // good to go
+      case Some(json) => (SpecificOffsetRangeLimit(JsonUtils.partitionOffsets(json)))
+        .partitionOffsets.foreach {
+          case (tp, off) if off == KafkaOffsetRangeLimit.EARLIEST =>
+            throw new IllegalArgumentException(s"ending offset for $tp can't be " +
+              "earliest for batch queries on Kafka")
+          case _ => // ignore
+        }
+      case _ => // default to latest
+    }
+
+    validateGeneralOptions(caseInsensitiveParams)
+
+    // Don't want to throw an error, but at least log a warning.
+    if (caseInsensitiveParams.get("maxoffsetspertrigger").isDefined) {
+      logWarning("maxOffsetsPerTrigger option ignored in batch queries")
+    }
+  }
 
   /** Class to conveniently update Kafka config params, while logging the changes */
   private case class ConfigUpdater(module: String, kafkaParams: Map[String, String]) {
@@ -278,5 +379,8 @@ private[kafka010] class KafkaSourceProvider extends StreamSourceProvider
 private[kafka010] object KafkaSourceProvider {
   private val STRATEGY_OPTION_KEYS = Set("subscribe", "subscribepattern", "assign")
   private val STARTING_OFFSETS_OPTION_KEY = "startingoffsets"
+  private val ENDING_OFFSETS_OPTION_KEY = "endingoffsets"
   private val FAIL_ON_DATA_LOSS_OPTION_KEY = "failondataloss"
+
+  private val deserClassName = classOf[ByteArrayDeserializer].getName
 }
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala
index 244cd2c225bdd..6fb3473eb75f5 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala
@@ -21,7 +21,7 @@ import java.{util => ju}
 
 import scala.collection.mutable.ArrayBuffer
 
-import org.apache.kafka.clients.consumer.ConsumerRecord
+import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord}
 import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.{Partition, SparkContext, TaskContext}
@@ -63,7 +63,8 @@ private[kafka010] class KafkaSourceRDD(
     executorKafkaParams: ju.Map[String, Object],
     offsetRanges: Seq[KafkaSourceRDDOffsetRange],
     pollTimeoutMs: Long,
-    failOnDataLoss: Boolean)
+    failOnDataLoss: Boolean,
+    reuseKafkaConsumer: Boolean)
   extends RDD[ConsumerRecord[Array[Byte], Array[Byte]]](sc, Nil) {
 
   override def persist(newLevel: StorageLevel): this.type = {
@@ -122,7 +123,19 @@ private[kafka010] class KafkaSourceRDD(
   override def compute(
       thePart: Partition,
       context: TaskContext): Iterator[ConsumerRecord[Array[Byte], Array[Byte]]] = {
-    val range = thePart.asInstanceOf[KafkaSourceRDDPartition].offsetRange
+    val sourcePartition = thePart.asInstanceOf[KafkaSourceRDDPartition]
+    val topic = sourcePartition.offsetRange.topic
+    if (!reuseKafkaConsumer) {
+      // if we can't reuse CachedKafkaConsumers, let's reset the groupId to something unique
+      // to each task (i.e., append the task's unique partition id), because we will have
+      // multiple tasks (e.g., in the case of union) reading from the same topic partitions
+      val old = executorKafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG).asInstanceOf[String]
+      val id = TaskContext.getPartitionId()
+      executorKafkaParams.put(ConsumerConfig.GROUP_ID_CONFIG, old + "-" + id)
+    }
+    val kafkaPartition = sourcePartition.offsetRange.partition
+    val consumer = CachedKafkaConsumer.getOrCreate(topic, kafkaPartition, executorKafkaParams)
+    val range = resolveRange(consumer, sourcePartition.offsetRange)
     assert(
       range.fromOffset <= range.untilOffset,
       s"Beginning offset ${range.fromOffset} is after the ending offset ${range.untilOffset} " +
@@ -133,9 +146,7 @@ private[kafka010] class KafkaSourceRDD(
         s"skipping ${range.topic} ${range.partition}")
       Iterator.empty
     } else {
-      new NextIterator[ConsumerRecord[Array[Byte], Array[Byte]]]() {
-        val consumer = CachedKafkaConsumer.getOrCreate(
-          range.topic, range.partition, executorKafkaParams)
+      val underlying = new NextIterator[ConsumerRecord[Array[Byte], Array[Byte]]]() {
         var requestOffset = range.fromOffset
 
         override def getNext(): ConsumerRecord[Array[Byte], Array[Byte]] = {
@@ -156,8 +167,46 @@ private[kafka010] class KafkaSourceRDD(
           }
         }
 
-        override protected def close(): Unit = {}
+        override protected def close(): Unit = {
+          if (!reuseKafkaConsumer) {
+            // Don't forget to close non-reuse KafkaConsumers. You may take down your cluster!
+            CachedKafkaConsumer.removeKafkaConsumer(topic, kafkaPartition, executorKafkaParams)
+          } else {
+            // Indicate that we're no longer using this consumer
+            CachedKafkaConsumer.releaseKafkaConsumer(topic, kafkaPartition, executorKafkaParams)
+          }
+        }
       }
+      // Release consumer, either by removing it or indicating we're no longer using it
+      context.addTaskCompletionListener { _ =>
+        underlying.closeIfNeeded()
+      }
+      underlying
+    }
+  }
+
+  private def resolveRange(consumer: CachedKafkaConsumer, range: KafkaSourceRDDOffsetRange) = {
+    if (range.fromOffset < 0 || range.untilOffset < 0) {
+      // Late bind the offset range
+      val availableOffsetRange = consumer.getAvailableOffsetRange()
+      val fromOffset = if (range.fromOffset < 0) {
+        assert(range.fromOffset == KafkaOffsetRangeLimit.EARLIEST,
+          s"earliest offset ${range.fromOffset} does not equal ${KafkaOffsetRangeLimit.EARLIEST}")
+        availableOffsetRange.earliest
+      } else {
+        range.fromOffset
+      }
+      val untilOffset = if (range.untilOffset < 0) {
+        assert(range.untilOffset == KafkaOffsetRangeLimit.LATEST,
+          s"latest offset ${range.untilOffset} does not equal ${KafkaOffsetRangeLimit.LATEST}")
+        availableOffsetRange.latest
+      } else {
+        range.untilOffset
+      }
+      KafkaSourceRDDOffsetRange(range.topicPartition,
+        fromOffset, untilOffset, range.preferredLoc)
+    } else {
+      range
     }
   }
 }
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/StartingOffsets.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/StartingOffsets.scala
deleted file mode 100644
index 83959e597171a..0000000000000
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/StartingOffsets.scala
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.kafka010
-
-import org.apache.kafka.common.TopicPartition
-
-/*
- * Values that can be specified for config startingOffsets
- */
-private[kafka010] sealed trait StartingOffsets
-
-private[kafka010] case object EarliestOffsets extends StartingOffsets
-
-private[kafka010] case object LatestOffsets extends StartingOffsets
-
-private[kafka010] case class SpecificOffsets(
-  partitionOffsets: Map[TopicPartition, Long]) extends StartingOffsets
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
new file mode 100644
index 0000000000000..673d60ff6f87a
--- /dev/null
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
@@ -0,0 +1,233 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import java.util.concurrent.atomic.AtomicInteger
+
+import org.apache.kafka.common.TopicPartition
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.test.SharedSQLContext
+
+class KafkaRelationSuite extends QueryTest with BeforeAndAfter with SharedSQLContext {
+
+  import testImplicits._
+
+  private val topicId = new AtomicInteger(0)
+
+  private var testUtils: KafkaTestUtils = _
+
+  private def newTopic(): String = s"topic-${topicId.getAndIncrement()}"
+
+  private def assignString(topic: String, partitions: Iterable[Int]): String = {
+    JsonUtils.partitions(partitions.map(p => new TopicPartition(topic, p)))
+  }
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    testUtils = new KafkaTestUtils
+    testUtils.setup()
+  }
+
+  override def afterAll(): Unit = {
+    if (testUtils != null) {
+      testUtils.teardown()
+      testUtils = null
+      super.afterAll()
+    }
+  }
+
+  private def createDF(
+      topic: String,
+      withOptions: Map[String, String] = Map.empty[String, String],
+      brokerAddress: Option[String] = None) = {
+    val df = spark
+      .read
+      .format("kafka")
+      .option("kafka.bootstrap.servers",
+        brokerAddress.getOrElse(testUtils.brokerAddress))
+      .option("subscribe", topic)
+    withOptions.foreach {
+      case (key, value) => df.option(key, value)
+    }
+    df.load().selectExpr("CAST(value AS STRING)")
+  }
+
+
+  test("explicit earliest to latest offsets") {
+    val topic = newTopic()
+    testUtils.createTopic(topic, partitions = 3)
+    testUtils.sendMessages(topic, (0 to 9).map(_.toString).toArray, Some(0))
+    testUtils.sendMessages(topic, (10 to 19).map(_.toString).toArray, Some(1))
+    testUtils.sendMessages(topic, Array("20"), Some(2))
+
+    // Specify explicit earliest and latest offset values
+    val df = createDF(topic,
+      withOptions = Map("startingOffsets" -> "earliest", "endingOffsets" -> "latest"))
+    checkAnswer(df, (0 to 20).map(_.toString).toDF)
+
+    // "latest" should late bind to the current (latest) offset in the df
+    testUtils.sendMessages(topic, (21 to 29).map(_.toString).toArray, Some(2))
+    checkAnswer(df, (0 to 29).map(_.toString).toDF)
+  }
+
+  test("default starting and ending offsets") {
+    val topic = newTopic()
+    testUtils.createTopic(topic, partitions = 3)
+    testUtils.sendMessages(topic, (0 to 9).map(_.toString).toArray, Some(0))
+    testUtils.sendMessages(topic, (10 to 19).map(_.toString).toArray, Some(1))
+    testUtils.sendMessages(topic, Array("20"), Some(2))
+
+    // Implicit offset values, should default to earliest and latest
+    val df = createDF(topic)
+    // Test that we default to "earliest" and "latest"
+    checkAnswer(df, (0 to 20).map(_.toString).toDF)
+  }
+
+  test("explicit offsets") {
+    val topic = newTopic()
+    testUtils.createTopic(topic, partitions = 3)
+    testUtils.sendMessages(topic, (0 to 9).map(_.toString).toArray, Some(0))
+    testUtils.sendMessages(topic, (10 to 19).map(_.toString).toArray, Some(1))
+    testUtils.sendMessages(topic, Array("20"), Some(2))
+
+    // Test explicitly specified offsets
+    val startPartitionOffsets = Map(
+      new TopicPartition(topic, 0) -> -2L, // -2 => earliest
+      new TopicPartition(topic, 1) -> -2L,
+      new TopicPartition(topic, 2) -> 0L   // explicit earliest
+    )
+    val startingOffsets = JsonUtils.partitionOffsets(startPartitionOffsets)
+
+    val endPartitionOffsets = Map(
+      new TopicPartition(topic, 0) -> -1L, // -1 => latest
+      new TopicPartition(topic, 1) -> -1L,
+      new TopicPartition(topic, 2) -> 1L  // explicit offset happens to = the latest
+    )
+    val endingOffsets = JsonUtils.partitionOffsets(endPartitionOffsets)
+    val df = createDF(topic,
+        withOptions = Map("startingOffsets" -> startingOffsets, "endingOffsets" -> endingOffsets))
+    checkAnswer(df, (0 to 20).map(_.toString).toDF)
+
+    // static offset partition 2, nothing should change
+    testUtils.sendMessages(topic, (31 to 39).map(_.toString).toArray, Some(2))
+    checkAnswer(df, (0 to 20).map(_.toString).toDF)
+
+    // latest offset partition 1, should change
+    testUtils.sendMessages(topic, (21 to 30).map(_.toString).toArray, Some(1))
+    checkAnswer(df, (0 to 30).map(_.toString).toDF)
+  }
+
+  test("reuse same dataframe in query") {
+    // This test ensures that we do not cache the Kafka Consumer in KafkaRelation
+    val topic = newTopic()
+    testUtils.createTopic(topic, partitions = 1)
+    testUtils.sendMessages(topic, (0 to 10).map(_.toString).toArray, Some(0))
+
+    // Specify explicit earliest and latest offset values
+    val df = createDF(topic,
+      withOptions = Map("startingOffsets" -> "earliest", "endingOffsets" -> "latest"))
+    checkAnswer(df.union(df), ((0 to 10) ++ (0 to 10)).map(_.toString).toDF)
+  }
+
+  test("test late binding start offsets") {
+    var kafkaUtils: KafkaTestUtils = null
+    try {
+      /**
+       * The following settings will ensure that all log entries
+       * are removed following a call to cleanupLogs
+       */
+      val brokerProps = Map[String, Object](
+        "log.retention.bytes" -> 1.asInstanceOf[AnyRef], // retain nothing
+        "log.retention.ms" -> 1.asInstanceOf[AnyRef]     // no wait time
+      )
+      kafkaUtils = new KafkaTestUtils(withBrokerProps = brokerProps)
+      kafkaUtils.setup()
+
+      val topic = newTopic()
+      kafkaUtils.createTopic(topic, partitions = 1)
+      kafkaUtils.sendMessages(topic, (0 to 9).map(_.toString).toArray, Some(0))
+      // Specify explicit earliest and latest offset values
+      val df = createDF(topic,
+        withOptions = Map("startingOffsets" -> "earliest", "endingOffsets" -> "latest"),
+        Some(kafkaUtils.brokerAddress))
+      checkAnswer(df, (0 to 9).map(_.toString).toDF)
+      // Blow away current set of messages.
+      kafkaUtils.cleanupLogs()
+      // Add some more data, but do not call cleanup
+      kafkaUtils.sendMessages(topic, (10 to 19).map(_.toString).toArray, Some(0))
+      // Ensure that we late bind to the new starting position
+      checkAnswer(df, (10 to 19).map(_.toString).toDF)
+    } finally {
+      if (kafkaUtils != null) {
+        kafkaUtils.teardown()
+      }
+    }
+  }
+
+  test("bad batch query options") {
+    def testBadOptions(options: (String, String)*)(expectedMsgs: String*): Unit = {
+      val ex = intercept[IllegalArgumentException] {
+        val reader = spark
+          .read
+          .format("kafka")
+        options.foreach { case (k, v) => reader.option(k, v) }
+        reader.load()
+      }
+      expectedMsgs.foreach { m =>
+        assert(ex.getMessage.toLowerCase.contains(m.toLowerCase))
+      }
+    }
+
+    // Specifying an ending offset as the starting point
+    testBadOptions("startingOffsets" -> "latest")("starting offset can't be latest " +
+      "for batch queries on Kafka")
+
+    // Now do it with an explicit json start offset indicating latest
+    val startPartitionOffsets = Map( new TopicPartition("t", 0) -> -1L)
+    val startingOffsets = JsonUtils.partitionOffsets(startPartitionOffsets)
+    testBadOptions("subscribe" -> "t", "startingOffsets" -> startingOffsets)(
+      "startingOffsets for t-0 can't be latest for batch queries on Kafka")
+
+
+    // Make sure we catch ending offsets that indicate earliest
+    testBadOptions("endingOffsets" -> "earliest")("ending offset can't be earliest " +
+      "for batch queries on Kafka")
+
+    // Make sure we catch ending offsets that indicating earliest
+    val endPartitionOffsets = Map(new TopicPartition("t", 0) -> -2L)
+    val endingOffsets = JsonUtils.partitionOffsets(endPartitionOffsets)
+    testBadOptions("subscribe" -> "t", "endingOffsets" -> endingOffsets)(
+      "ending offset for t-0 can't be earliest for batch queries on Kafka")
+
+    // No strategy specified
+    testBadOptions()("options must be specified", "subscribe", "subscribePattern")
+
+    // Multiple strategies specified
+    testBadOptions("subscribe" -> "t", "subscribePattern" -> "t.*")(
+      "only one", "options can be specified")
+
+    testBadOptions("subscribe" -> "t", "assign" -> """{"a":[0]}""")(
+      "only one", "options can be specified")
+
+    testBadOptions("assign" -> "")("no topicpartitions to assign")
+    testBadOptions("subscribe" -> "")("no topics to subscribe")
+    testBadOptions("subscribePattern" -> "")("pattern to subscribe is empty")
+  }
+}
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
index 544fbc5ec36a2..211c8a5e73e45 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
@@ -384,6 +384,9 @@ class KafkaSourceSuite extends KafkaSourceTest {
       }
     }
 
+    // Specifying an ending offset
+    testBadOptions("endingOffsets" -> "latest")("Ending offset not valid in streaming queries")
+
     // No strategy specified
     testBadOptions()("options must be specified", "subscribe", "subscribePattern")
 
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
index fd1689acf6727..c2cbd86260bc5 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
@@ -50,7 +50,7 @@ import org.apache.spark.SparkConf
  *
  * The reason to put Kafka test utility class in src is to test Python related Kafka APIs.
  */
-class KafkaTestUtils extends Logging {
+class KafkaTestUtils(withBrokerProps: Map[String, Object] = Map.empty) extends Logging {
 
   // Zookeeper related configurations
   private val zkHost = "localhost"
@@ -238,6 +238,24 @@ class KafkaTestUtils extends Logging {
     offsets
   }
 
+  def cleanupLogs(): Unit = {
+    server.logManager.cleanupLogs()
+  }
+
+  def getEarliestOffsets(topics: Set[String]): Map[TopicPartition, Long] = {
+    val kc = new KafkaConsumer[String, String](consumerConfiguration)
+    logInfo("Created consumer to get earliest offsets")
+    kc.subscribe(topics.asJavaCollection)
+    kc.poll(0)
+    val partitions = kc.assignment()
+    kc.pause(partitions)
+    kc.seekToBeginning(partitions)
+    val offsets = partitions.asScala.map(p => p -> kc.position(p)).toMap
+    kc.close()
+    logInfo("Closed consumer to get earliest offsets")
+    offsets
+  }
+
   def getLatestOffsets(topics: Set[String]): Map[TopicPartition, Long] = {
     val kc = new KafkaConsumer[String, String](consumerConfiguration)
     logInfo("Created consumer to get latest offsets")
@@ -263,6 +281,7 @@ class KafkaTestUtils extends Logging {
     props.put("log.flush.interval.messages", "1")
     props.put("replica.socket.timeout.ms", "1500")
     props.put("delete.topic.enable", "true")
+    props.putAll(withBrokerProps.asJava)
     props
   }
 

From 706d6c154d2471c00253bf9b0c4e867752f841fe Mon Sep 17 00:00:00 2001
From: CodingCat <zhunansjtu@gmail.com>
Date: Tue, 7 Feb 2017 20:25:18 -0800
Subject: [PATCH 0498/1204] [SPARK-19499][SS] Add more notes in the comments of
 Sink.addBatch()

## What changes were proposed in this pull request?

addBatch method in Sink trait is supposed to be a synchronous method to coordinate with the fault-tolerance design in StreamingExecution (being different with the compute() method in DStream)

We need to add more notes in the comments of this method to remind the developers

## How was this patch tested?

existing tests

Author: CodingCat <zhunansjtu@gmail.com>

Closes #16840 from CodingCat/SPARK-19499.

(cherry picked from commit d4cd975718716be11a42ce92a47c45be1a46bd60)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../org/apache/spark/sql/execution/streaming/Sink.scala      | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala
index 2571b59be54f9..d10cd3044ecdf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala
@@ -31,8 +31,11 @@ trait Sink {
    * this method is called more than once with the same batchId (which will happen in the case of
    * failures), then `data` should only be added once.
    *
-   * Note: You cannot apply any operators on `data` except consuming it (e.g., `collect/foreach`).
+   * Note 1: You cannot apply any operators on `data` except consuming it (e.g., `collect/foreach`).
    * Otherwise, you may get a wrong result.
+   *
+   * Note 2: The method is supposed to be executed synchronously, i.e. the method should only return
+   * after data is consumed by sink successfully.
    */
   def addBatch(batchId: Long, data: DataFrame): Unit
 }

From 4d040297f55243703463ea71d5302bb46ea0bf3f Mon Sep 17 00:00:00 2001
From: manugarri <manuel.garrido.pena@gmail.com>
Date: Tue, 7 Feb 2017 21:45:33 -0800
Subject: [PATCH 0499/1204] [MINOR][DOC] Remove parenthesis in readStream() on
 kafka structured streaming doc

There is a typo in http://spark.apache.org/docs/latest/structured-streaming-kafka-integration.html#creating-a-kafka-source-stream , python example n1 uses `readStream()` instead of `readStream`

Just removed the parenthesis.

Author: manugarri <manuel.garrido.pena@gmail.com>

Closes #16836 from manugarri/fix_kafka_python_doc.

(cherry picked from commit 5a0569ce693c635c5fa12b2de33ed3643ce888e3)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 docs/structured-streaming-kafka-integration.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/structured-streaming-kafka-integration.md b/docs/structured-streaming-kafka-integration.md
index 2458bb5ffa298..208845fef4dc2 100644
--- a/docs/structured-streaming-kafka-integration.md
+++ b/docs/structured-streaming-kafka-integration.md
@@ -90,7 +90,7 @@ ds3.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
 
 # Subscribe to 1 topic
 ds1 = spark
-  .readStream()
+  .readStream
   .format("kafka")
   .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
   .option("subscribe", "topic1")
@@ -108,7 +108,7 @@ ds2.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
 
 # Subscribe to a pattern
 ds3 = spark
-  .readStream()
+  .readStream
   .format("kafka")
   .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
   .option("subscribePattern", "topic.*")

From 71b6eacf72fb50862d33a2bf6a0662d6c4e73bbd Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@databricks.com>
Date: Wed, 8 Feb 2017 08:35:15 +0100
Subject: [PATCH 0500/1204] [SPARK-18609][SPARK-18841][SQL][BACKPORT-2.1] Fix
 redundant Alias removal in the optimizer

This is a backport of https://github.com/apache/spark/commit/73ee73945e369a862480ef4ac64e55c797bd7d90

## What changes were proposed in this pull request?
The optimizer tries to remove redundant alias only projections from the query plan using the `RemoveAliasOnlyProject` rule. The current rule identifies removes such a project and rewrites the project's attributes in the **entire** tree. This causes problems when parts of the tree are duplicated (for instance a self join on a temporary view/CTE)  and the duplicated part contains the alias only project, in this case the rewrite will break the tree.

This PR fixes these problems by using a blacklist for attributes that are not to be moved, and by making sure that attribute remapping is only done for the parent tree, and not for unrelated parts of the query plan.

The current tree transformation infrastructure works very well if the transformation at hand requires little or a global contextual information. In this case we need to know both the attributes that were not to be moved, and we also needed to know which child attributes were modified. This cannot be done easily using the current infrastructure, and solutions typically involves transversing the query plan multiple times (which is super slow). I have moved around some code in `TreeNode`, `QueryPlan` and `LogicalPlan`to make this much more straightforward; this basically allows you to manually traverse the tree.

## How was this patch tested?
I have added unit tests to `RemoveRedundantAliasAndProjectSuite` and I have added integration tests to the `SQLQueryTestSuite.union` and `SQLQueryTestSuite.cte` test cases.

Author: Herman van Hovell <hvanhovell@databricks.com>

Closes #16843 from hvanhovell/SPARK-18609-2.1.
---
 .../sql/catalyst/optimizer/Optimizer.scala    | 125 ++++++++++++------
 .../spark/sql/catalyst/plans/QueryPlan.scala  |  42 ++----
 .../catalyst/plans/logical/LogicalPlan.scala  |   2 +-
 .../spark/sql/catalyst/trees/TreeNode.scala   |  46 ++-----
 ...RemoveRedundantAliasAndProjectSuite.scala} |  52 +++++++-
 .../test/resources/sql-tests/inputs/cte.sql   |  15 +++
 .../test/resources/sql-tests/inputs/union.sql |  16 +++
 .../resources/sql-tests/results/cte.sql.out   |  49 ++++++-
 .../resources/sql-tests/results/union.sql.out |  70 +++++++++-
 9 files changed, 302 insertions(+), 115 deletions(-)
 rename sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/{RemoveAliasOnlyProjectSuite.scala => RemoveRedundantAliasAndProjectSuite.scala} (60%)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 06fcbcb4ae2b2..44782977c5958 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -109,7 +109,8 @@ abstract class Optimizer(sessionCatalog: SessionCatalog, conf: CatalystConf)
       SimplifyCaseConversionExpressions,
       RewriteCorrelatedScalarSubquery,
       EliminateSerialization,
-      RemoveAliasOnlyProject) ::
+      RemoveRedundantAliases,
+      RemoveRedundantProject) ::
     Batch("Check Cartesian Products", Once,
       CheckCartesianProducts(conf)) ::
     Batch("Decimal Optimizations", fixedPoint,
@@ -153,56 +154,98 @@ class SimpleTestOptimizer extends Optimizer(
   new SimpleCatalystConf(caseSensitiveAnalysis = true))
 
 /**
- * Removes the Project only conducting Alias of its child node.
- * It is created mainly for removing extra Project added in EliminateSerialization rule,
- * but can also benefit other operators.
+ * Remove redundant aliases from a query plan. A redundant alias is an alias that does not change
+ * the name or metadata of a column, and does not deduplicate it.
  */
-object RemoveAliasOnlyProject extends Rule[LogicalPlan] {
+object RemoveRedundantAliases extends Rule[LogicalPlan] {
+
   /**
-   * Returns true if the project list is semantically same as child output, after strip alias on
-   * attribute.
+   * Create an attribute mapping from the old to the new attributes. This function will only
+   * return the attribute pairs that have changed.
    */
-  private def isAliasOnly(
-      projectList: Seq[NamedExpression],
-      childOutput: Seq[Attribute]): Boolean = {
-    if (projectList.length != childOutput.length) {
-      false
-    } else {
-      stripAliasOnAttribute(projectList).zip(childOutput).forall {
-        case (a: Attribute, o) if a semanticEquals o => true
-        case _ => false
-      }
+  private def createAttributeMapping(current: LogicalPlan, next: LogicalPlan)
+      : Seq[(Attribute, Attribute)] = {
+    current.output.zip(next.output).filterNot {
+      case (a1, a2) => a1.semanticEquals(a2)
     }
   }
 
-  private def stripAliasOnAttribute(projectList: Seq[NamedExpression]) = {
-    projectList.map {
-      // Alias with metadata can not be stripped, or the metadata will be lost.
-      // If the alias name is different from attribute name, we can't strip it either, or we may
-      // accidentally change the output schema name of the root plan.
-      case a @ Alias(attr: Attribute, name) if a.metadata == Metadata.empty && name == attr.name =>
-        attr
-      case other => other
-    }
+  /**
+   * Remove the top-level alias from an expression when it is redundant.
+   */
+  private def removeRedundantAlias(e: Expression, blacklist: AttributeSet): Expression = e match {
+    // Alias with metadata can not be stripped, or the metadata will be lost.
+    // If the alias name is different from attribute name, we can't strip it either, or we
+    // may accidentally change the output schema name of the root plan.
+    case a @ Alias(attr: Attribute, name)
+      if a.metadata == Metadata.empty && name == attr.name && !blacklist.contains(attr) =>
+      attr
+    case a => a
   }
 
-  def apply(plan: LogicalPlan): LogicalPlan = {
-    val aliasOnlyProject = plan.collectFirst {
-      case p @ Project(pList, child) if isAliasOnly(pList, child.output) => p
-    }
+  /**
+   * Remove redundant alias expression from a LogicalPlan and its subtree. A blacklist is used to
+   * prevent the removal of seemingly redundant aliases used to deduplicate the input for a (self)
+   * join.
+   */
+  private def removeRedundantAliases(plan: LogicalPlan, blacklist: AttributeSet): LogicalPlan = {
+    plan match {
+      // A join has to be treated differently, because the left and the right side of the join are
+      // not allowed to use the same attributes. We use a blacklist to prevent us from creating a
+      // situation in which this happens; the rule will only remove an alias if its child
+      // attribute is not on the black list.
+      case Join(left, right, joinType, condition) =>
+        val newLeft = removeRedundantAliases(left, blacklist ++ right.outputSet)
+        val newRight = removeRedundantAliases(right, blacklist ++ newLeft.outputSet)
+        val mapping = AttributeMap(
+          createAttributeMapping(left, newLeft) ++
+          createAttributeMapping(right, newRight))
+        val newCondition = condition.map(_.transform {
+          case a: Attribute => mapping.getOrElse(a, a)
+        })
+        Join(newLeft, newRight, joinType, newCondition)
+
+      case _ =>
+        // Remove redundant aliases in the subtree(s).
+        val currentNextAttrPairs = mutable.Buffer.empty[(Attribute, Attribute)]
+        val newNode = plan.mapChildren { child =>
+          val newChild = removeRedundantAliases(child, blacklist)
+          currentNextAttrPairs ++= createAttributeMapping(child, newChild)
+          newChild
+        }
 
-    aliasOnlyProject.map { case proj =>
-      val attributesToReplace = proj.output.zip(proj.child.output).filterNot {
-        case (a1, a2) => a1 semanticEquals a2
-      }
-      val attrMap = AttributeMap(attributesToReplace)
-      plan transform {
-        case plan: Project if plan eq proj => plan.child
-        case plan => plan transformExpressions {
-          case a: Attribute if attrMap.contains(a) => attrMap(a)
+        // Create the attribute mapping. Note that the currentNextAttrPairs can contain duplicate
+        // keys in case of Union (this is caused by the PushProjectionThroughUnion rule); in this
+        // case we use the the first mapping (which should be provided by the first child).
+        val mapping = AttributeMap(currentNextAttrPairs)
+
+        // Create a an expression cleaning function for nodes that can actually produce redundant
+        // aliases, use identity otherwise.
+        val clean: Expression => Expression = plan match {
+          case _: Project => removeRedundantAlias(_, blacklist)
+          case _: Aggregate => removeRedundantAlias(_, blacklist)
+          case _: Window => removeRedundantAlias(_, blacklist)
+          case _ => identity[Expression]
         }
-      }
-    }.getOrElse(plan)
+
+        // Transform the expressions.
+        newNode.mapExpressions { expr =>
+          clean(expr.transform {
+            case a: Attribute => mapping.getOrElse(a, a)
+          })
+        }
+    }
+  }
+
+  def apply(plan: LogicalPlan): LogicalPlan = removeRedundantAliases(plan, AttributeSet.empty)
+}
+
+/**
+ * Remove projections from the query plan that do not make any modifications.
+ */
+object RemoveRedundantProject extends Rule[LogicalPlan] {
+  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+    case p @ Project(_, child) if p.output == child.output => child
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index b108017c4c482..a5761703fd655 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -242,31 +242,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
    * @param rule the rule to be applied to every expression in this operator.
    */
   def transformExpressionsDown(rule: PartialFunction[Expression, Expression]): this.type = {
-    var changed = false
-
-    @inline def transformExpressionDown(e: Expression): Expression = {
-      val newE = e.transformDown(rule)
-      if (newE.fastEquals(e)) {
-        e
-      } else {
-        changed = true
-        newE
-      }
-    }
-
-    def recursiveTransform(arg: Any): AnyRef = arg match {
-      case e: Expression => transformExpressionDown(e)
-      case Some(e: Expression) => Some(transformExpressionDown(e))
-      case m: Map[_, _] => m
-      case d: DataType => d // Avoid unpacking Structs
-      case seq: Traversable[_] => seq.map(recursiveTransform)
-      case other: AnyRef => other
-      case null => null
-    }
-
-    val newArgs = mapProductIterator(recursiveTransform)
-
-    if (changed) makeCopy(newArgs).asInstanceOf[this.type] else this
+    mapExpressions(_.transformDown(rule))
   }
 
   /**
@@ -276,10 +252,18 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
    * @return
    */
   def transformExpressionsUp(rule: PartialFunction[Expression, Expression]): this.type = {
+    mapExpressions(_.transformUp(rule))
+  }
+
+  /**
+   * Apply a map function to each expression present in this query operator, and return a new
+   * query operator based on the mapped expressions.
+   */
+  def mapExpressions(f: Expression => Expression): this.type = {
     var changed = false
 
-    @inline def transformExpressionUp(e: Expression): Expression = {
-      val newE = e.transformUp(rule)
+    @inline def transformExpression(e: Expression): Expression = {
+      val newE = f(e)
       if (newE.fastEquals(e)) {
         e
       } else {
@@ -289,8 +273,8 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
     }
 
     def recursiveTransform(arg: Any): AnyRef = arg match {
-      case e: Expression => transformExpressionUp(e)
-      case Some(e: Expression) => Some(transformExpressionUp(e))
+      case e: Expression => transformExpression(e)
+      case Some(e: Expression) => Some(transformExpression(e))
       case m: Map[_, _] => m
       case d: DataType => d // Avoid unpacking Structs
       case seq: Traversable[_] => seq.map(recursiveTransform)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index b0a4145f37767..a28cd604878cd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -55,7 +55,7 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging {
    */
   def resolveOperators(rule: PartialFunction[LogicalPlan, LogicalPlan]): LogicalPlan = {
     if (!analyzed) {
-      val afterRuleOnChildren = transformChildren(rule, (t, r) => t.resolveOperators(r))
+      val afterRuleOnChildren = mapChildren(_.resolveOperators(rule))
       if (this fastEquals afterRuleOnChildren) {
         CurrentOrigin.withOrigin(origin) {
           rule.applyOrElse(this, identity[LogicalPlan])
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 8cc16d662b603..26d13ba3bca0b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -191,26 +191,6 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
     arr
   }
 
-  /**
-   * Returns a copy of this node where `f` has been applied to all the nodes children.
-   */
-  def mapChildren(f: BaseType => BaseType): BaseType = {
-    var changed = false
-    val newArgs = mapProductIterator {
-      case arg: TreeNode[_] if containsChild(arg) =>
-        val newChild = f(arg.asInstanceOf[BaseType])
-        if (newChild fastEquals arg) {
-          arg
-        } else {
-          changed = true
-          newChild
-        }
-      case nonChild: AnyRef => nonChild
-      case null => null
-    }
-    if (changed) makeCopy(newArgs) else this
-  }
-
   /**
    * Returns a copy of this node with the children replaced.
    * TODO: Validate somewhere (in debug mode?) that children are ordered correctly.
@@ -290,9 +270,9 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
 
     // Check if unchanged and then possibly return old copy to avoid gc churn.
     if (this fastEquals afterRule) {
-      transformChildren(rule, (t, r) => t.transformDown(r))
+      mapChildren(_.transformDown(rule))
     } else {
-      afterRule.transformChildren(rule, (t, r) => t.transformDown(r))
+      afterRule.mapChildren(_.transformDown(rule))
     }
   }
 
@@ -304,7 +284,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
    * @param rule the function use to transform this nodes children
    */
   def transformUp(rule: PartialFunction[BaseType, BaseType]): BaseType = {
-    val afterRuleOnChildren = transformChildren(rule, (t, r) => t.transformUp(r))
+    val afterRuleOnChildren = mapChildren(_.transformUp(rule))
     if (this fastEquals afterRuleOnChildren) {
       CurrentOrigin.withOrigin(origin) {
         rule.applyOrElse(this, identity[BaseType])
@@ -317,18 +297,14 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
   }
 
   /**
-   * Returns a copy of this node where `rule` has been recursively applied to all the children of
-   * this node.  When `rule` does not apply to a given node it is left unchanged.
-   * @param rule the function used to transform this nodes children
+   * Returns a copy of this node where `f` has been applied to all the nodes children.
    */
-  protected def transformChildren(
-      rule: PartialFunction[BaseType, BaseType],
-      nextOperation: (BaseType, PartialFunction[BaseType, BaseType]) => BaseType): BaseType = {
+  def mapChildren(f: BaseType => BaseType): BaseType = {
     if (children.nonEmpty) {
       var changed = false
       val newArgs = mapProductIterator {
         case arg: TreeNode[_] if containsChild(arg) =>
-          val newChild = nextOperation(arg.asInstanceOf[BaseType], rule)
+          val newChild = f(arg.asInstanceOf[BaseType])
           if (!(newChild fastEquals arg)) {
             changed = true
             newChild
@@ -336,7 +312,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
             arg
           }
         case Some(arg: TreeNode[_]) if containsChild(arg) =>
-          val newChild = nextOperation(arg.asInstanceOf[BaseType], rule)
+          val newChild = f(arg.asInstanceOf[BaseType])
           if (!(newChild fastEquals arg)) {
             changed = true
             Some(newChild)
@@ -345,7 +321,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
           }
         case m: Map[_, _] => m.mapValues {
           case arg: TreeNode[_] if containsChild(arg) =>
-            val newChild = nextOperation(arg.asInstanceOf[BaseType], rule)
+            val newChild = f(arg.asInstanceOf[BaseType])
             if (!(newChild fastEquals arg)) {
               changed = true
               newChild
@@ -357,7 +333,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
         case d: DataType => d // Avoid unpacking Structs
         case args: Traversable[_] => args.map {
           case arg: TreeNode[_] if containsChild(arg) =>
-            val newChild = nextOperation(arg.asInstanceOf[BaseType], rule)
+            val newChild = f(arg.asInstanceOf[BaseType])
             if (!(newChild fastEquals arg)) {
               changed = true
               newChild
@@ -365,8 +341,8 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
               arg
             }
           case tuple@(arg1: TreeNode[_], arg2: TreeNode[_]) =>
-            val newChild1 = nextOperation(arg1.asInstanceOf[BaseType], rule)
-            val newChild2 = nextOperation(arg2.asInstanceOf[BaseType], rule)
+            val newChild1 = f(arg1.asInstanceOf[BaseType])
+            val newChild2 = f(arg2.asInstanceOf[BaseType])
             if (!(newChild1 fastEquals arg1) || !(newChild2 fastEquals arg2)) {
               changed = true
               (newChild1, newChild2)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveAliasOnlyProjectSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala
similarity index 60%
rename from sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveAliasOnlyProjectSuite.scala
rename to sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala
index 7c26cb5598b3e..c01ea01ec6808 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveAliasOnlyProjectSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala
@@ -25,10 +25,15 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.types.MetadataBuilder
 
-class RemoveAliasOnlyProjectSuite extends PlanTest with PredicateHelper {
+class RemoveRedundantAliasAndProjectSuite extends PlanTest with PredicateHelper {
 
   object Optimize extends RuleExecutor[LogicalPlan] {
-    val batches = Batch("RemoveAliasOnlyProject", FixedPoint(50), RemoveAliasOnlyProject) :: Nil
+    val batches = Batch(
+      "RemoveAliasOnlyProject",
+      FixedPoint(50),
+      PushProjectionThroughUnion,
+      RemoveRedundantAliases,
+      RemoveRedundantProject) :: Nil
   }
 
   test("all expressions in project list are aliased child output") {
@@ -42,7 +47,8 @@ class RemoveAliasOnlyProjectSuite extends PlanTest with PredicateHelper {
     val relation = LocalRelation('a.int, 'b.int)
     val query = relation.select('b as 'b, 'a as 'a).analyze
     val optimized = Optimize.execute(query)
-    comparePlans(optimized, query)
+    val expected = relation.select('b, 'a).analyze
+    comparePlans(optimized, expected)
   }
 
   test("some expressions in project list are aliased child output") {
@@ -56,14 +62,16 @@ class RemoveAliasOnlyProjectSuite extends PlanTest with PredicateHelper {
     val relation = LocalRelation('a.int, 'b.int)
     val query = relation.select('b as 'b, 'a).analyze
     val optimized = Optimize.execute(query)
-    comparePlans(optimized, query)
+    val expected = relation.select('b, 'a).analyze
+    comparePlans(optimized, expected)
   }
 
   test("some expressions in project list are not Alias or Attribute") {
     val relation = LocalRelation('a.int, 'b.int)
     val query = relation.select('a as 'a, 'b + 1).analyze
     val optimized = Optimize.execute(query)
-    comparePlans(optimized, query)
+    val expected = relation.select('a, 'b + 1).analyze
+    comparePlans(optimized, expected)
   }
 
   test("some expressions in project list are aliased child output but with metadata") {
@@ -74,4 +82,38 @@ class RemoveAliasOnlyProjectSuite extends PlanTest with PredicateHelper {
     val optimized = Optimize.execute(query)
     comparePlans(optimized, query)
   }
+
+  test("retain deduplicating alias in self-join") {
+    val relation = LocalRelation('a.int)
+    val fragment = relation.select('a as 'a)
+    val query = fragment.select('a as 'a).join(fragment.select('a as 'a)).analyze
+    val optimized = Optimize.execute(query)
+    val expected = relation.join(relation.select('a as 'a)).analyze
+    comparePlans(optimized, expected)
+  }
+
+  test("alias removal should not break after push project through union") {
+    val r1 = LocalRelation('a.int)
+    val r2 = LocalRelation('b.int)
+    val query = r1.select('a as 'a).union(r2.select('b as 'b)).select('a).analyze
+    val optimized = Optimize.execute(query)
+    val expected = r1.union(r2)
+    comparePlans(optimized, expected)
+  }
+
+  test("remove redundant alias from aggregate") {
+    val relation = LocalRelation('a.int, 'b.int)
+    val query = relation.groupBy('a as 'a)('a as 'a, sum('b)).analyze
+    val optimized = Optimize.execute(query)
+    val expected = relation.groupBy('a)('a, sum('b)).analyze
+    comparePlans(optimized, expected)
+  }
+
+  test("remove redundant alias from window") {
+    val relation = LocalRelation('a.int, 'b.int)
+    val query = relation.window(Seq('b as 'b), Seq('a as 'a), Seq()).analyze
+    val optimized = Optimize.execute(query)
+    val expected = relation.window(Seq('b), Seq('a), Seq()).analyze
+    comparePlans(optimized, expected)
+  }
 }
diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte.sql b/sql/core/src/test/resources/sql-tests/inputs/cte.sql
index 3914db26914b4..d34d89f23575a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/cte.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/cte.sql
@@ -12,3 +12,18 @@ WITH s1 AS (SELECT 1 FROM s2), s2 AS (SELECT 1 FROM s1) SELECT * FROM s1, s2;
 
 -- WITH clause should reference the previous CTE
 WITH t1 AS (SELECT * FROM t2), t2 AS (SELECT 2 FROM t1) SELECT * FROM t1 cross join t2;
+
+-- SPARK-18609 CTE with self-join
+WITH CTE1 AS (
+  SELECT b.id AS id
+  FROM   T2 a
+         CROSS JOIN (SELECT id AS id FROM T2) b
+)
+SELECT t1.id AS c1,
+       t2.id AS c2
+FROM   CTE1 t1
+       CROSS JOIN CTE1 t2;
+
+-- Clean up
+DROP VIEW IF EXISTS t;
+DROP VIEW IF EXISTS t2;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/union.sql b/sql/core/src/test/resources/sql-tests/inputs/union.sql
index 1f4780abde2d2..e57d69eaad033 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/union.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/union.sql
@@ -22,6 +22,22 @@ FROM (SELECT 0 a, 0 b
       SELECT SUM(1) a, CAST(0 AS BIGINT) b
       UNION ALL SELECT 0 a, 0 b) T;
 
+-- Regression test for SPARK-18841 Push project through union should not be broken by redundant alias removal.
+CREATE OR REPLACE TEMPORARY VIEW p1 AS VALUES 1 T(col);
+CREATE OR REPLACE TEMPORARY VIEW p2 AS VALUES 1 T(col);
+CREATE OR REPLACE TEMPORARY VIEW p3 AS VALUES 1 T(col);
+SELECT 1 AS x,
+       col
+FROM   (SELECT col AS col
+        FROM (SELECT p1.col AS col
+              FROM   p1 CROSS JOIN p2
+              UNION ALL
+              SELECT col
+              FROM p3) T1) T2;
+
 -- Clean-up
 DROP VIEW IF EXISTS t1;
 DROP VIEW IF EXISTS t2;
+DROP VIEW IF EXISTS p1;
+DROP VIEW IF EXISTS p2;
+DROP VIEW IF EXISTS p3;
diff --git a/sql/core/src/test/resources/sql-tests/results/cte.sql.out b/sql/core/src/test/resources/sql-tests/results/cte.sql.out
index 9fbad8f3800a9..a446c2cd183da 100644
--- a/sql/core/src/test/resources/sql-tests/results/cte.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cte.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 6
+-- Number of queries: 9
 
 
 -- !query 0
@@ -55,3 +55,50 @@ struct<id:int,2:int>
 0	2
 1	2
 1	2
+
+
+-- !query 6
+WITH CTE1 AS (
+  SELECT b.id AS id
+  FROM   T2 a
+         CROSS JOIN (SELECT id AS id FROM T2) b
+)
+SELECT t1.id AS c1,
+       t2.id AS c2
+FROM   CTE1 t1
+       CROSS JOIN CTE1 t2
+-- !query 6 schema
+struct<c1:int,c2:int>
+-- !query 6 output
+0	0
+0	0
+0	0
+0	0
+0	1
+0	1
+0	1
+0	1
+1	0
+1	0
+1	0
+1	0
+1	1
+1	1
+1	1
+1	1
+
+
+-- !query 7
+DROP VIEW IF EXISTS t
+-- !query 7 schema
+struct<>
+-- !query 7 output
+
+
+
+-- !query 8
+DROP VIEW IF EXISTS t2
+-- !query 8 schema
+struct<>
+-- !query 8 output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/union.sql.out b/sql/core/src/test/resources/sql-tests/results/union.sql.out
index c57028cabe933..d123b7fdbe0cf 100644
--- a/sql/core/src/test/resources/sql-tests/results/union.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/union.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 7
+-- Number of queries: 14
 
 
 -- !query 0
@@ -65,7 +65,7 @@ struct<a:bigint>
 
 
 -- !query 5
-DROP VIEW IF EXISTS t1
+CREATE OR REPLACE TEMPORARY VIEW p1 AS VALUES 1 T(col)
 -- !query 5 schema
 struct<>
 -- !query 5 output
@@ -73,8 +73,72 @@ struct<>
 
 
 -- !query 6
-DROP VIEW IF EXISTS t2
+CREATE OR REPLACE TEMPORARY VIEW p2 AS VALUES 1 T(col)
 -- !query 6 schema
 struct<>
 -- !query 6 output
 
+
+
+-- !query 7
+CREATE OR REPLACE TEMPORARY VIEW p3 AS VALUES 1 T(col)
+-- !query 7 schema
+struct<>
+-- !query 7 output
+
+
+
+-- !query 8
+SELECT 1 AS x,
+       col
+FROM   (SELECT col AS col
+        FROM (SELECT p1.col AS col
+              FROM   p1 CROSS JOIN p2
+              UNION ALL
+              SELECT col
+              FROM p3) T1) T2
+-- !query 8 schema
+struct<x:int,col:int>
+-- !query 8 output
+1	1
+1	1
+
+
+-- !query 9
+DROP VIEW IF EXISTS t1
+-- !query 9 schema
+struct<>
+-- !query 9 output
+
+
+
+-- !query 10
+DROP VIEW IF EXISTS t2
+-- !query 10 schema
+struct<>
+-- !query 10 output
+
+
+
+-- !query 11
+DROP VIEW IF EXISTS p1
+-- !query 11 schema
+struct<>
+-- !query 11 output
+
+
+
+-- !query 12
+DROP VIEW IF EXISTS p2
+-- !query 12 schema
+struct<>
+-- !query 12 output
+
+
+
+-- !query 13
+DROP VIEW IF EXISTS p3
+-- !query 13 schema
+struct<>
+-- !query 13 output
+

From 502c927b8c8a99ef2adf4e6e1d7a6d9232d45ef5 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Wed, 8 Feb 2017 11:33:59 -0800
Subject: [PATCH 0501/1204] [SPARK-19413][SS] MapGroupsWithState for arbitrary
 stateful operations for branch-2.1

This is a follow up PR for merging #16758 to spark 2.1 branch

## What changes were proposed in this pull request?

`mapGroupsWithState` is a new API for arbitrary stateful operations in Structured Streaming, similar to `DStream.mapWithState`

*Requirements*
- Users should be able to specify a function that can do the following
- Access the input row corresponding to a key
- Access the previous state corresponding to a key
- Optionally, update or remove the state
- Output any number of new rows (or none at all)

*Proposed API*
```
// ------------ New methods on KeyValueGroupedDataset ------------
class KeyValueGroupedDataset[K, V] {
	// Scala friendly
	def mapGroupsWithState[S: Encoder, U: Encoder](func: (K, Iterator[V], KeyedState[S]) => U)
        def flatMapGroupsWithState[S: Encode, U: Encoder](func: (K, Iterator[V], KeyedState[S]) => Iterator[U])
	// Java friendly
       def mapGroupsWithState[S, U](func: MapGroupsWithStateFunction[K, V, S, R], stateEncoder: Encoder[S], resultEncoder: Encoder[U])
       def flatMapGroupsWithState[S, U](func: FlatMapGroupsWithStateFunction[K, V, S, R], stateEncoder: Encoder[S], resultEncoder: Encoder[U])
}

// ------------------- New Java-friendly function classes -------------------
public interface MapGroupsWithStateFunction<K, V, S, R> extends Serializable {
  R call(K key, Iterator<V> values, state: KeyedState<S>) throws Exception;
}
public interface FlatMapGroupsWithStateFunction<K, V, S, R> extends Serializable {
  Iterator<R> call(K key, Iterator<V> values, state: KeyedState<S>) throws Exception;
}

// ---------------------- Wrapper class for state data ----------------------
trait KeyedState[S] {
	def exists(): Boolean
  	def get(): S 			// throws Exception is state does not exist
	def getOption(): Option[S]
	def update(newState: S): Unit
	def remove(): Unit		// exists() will be false after this
}
```

Key Semantics of the State class
- The state can be null.
- If the state.remove() is called, then state.exists() will return false, and getOption will returm None.
- After that state.update(newState) is called, then state.exists() will return true, and getOption will return Some(...).
- None of the operations are thread-safe. This is to avoid memory barriers.

*Usage*
```
val stateFunc = (word: String, words: Iterator[String, runningCount: KeyedState[Long]) => {
    val newCount = words.size + runningCount.getOption.getOrElse(0L)
    runningCount.update(newCount)
   (word, newCount)
}

dataset					                        // type is Dataset[String]
  .groupByKey[String](w => w)        	                // generates KeyValueGroupedDataset[String, String]
  .mapGroupsWithState[Long, (String, Long)](stateFunc)	// returns Dataset[(String, Long)]
```

## How was this patch tested?
New unit tests.

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #16850 from tdas/mapWithState-branch-2.1.
---
 .../UnsupportedOperationChecker.scala         |  11 +-
 .../sql/catalyst/plans/logical/object.scala   |  49 +++
 .../analysis/UnsupportedOperationsSuite.scala |  24 +-
 .../FlatMapGroupsWithStateFunction.java       |  38 ++
 .../function/MapGroupsWithStateFunction.java  |  38 ++
 .../spark/sql/KeyValueGroupedDataset.scala    | 113 ++++++
 .../org/apache/spark/sql/KeyedState.scala     | 142 ++++++++
 .../spark/sql/execution/SparkStrategies.scala |  21 +-
 .../apache/spark/sql/execution/objects.scala  |  22 ++
 .../streaming/IncrementalExecution.scala      |  19 +-
 .../execution/streaming/KeyedStateImpl.scala  |  80 +++++
 .../streaming/ProgressReporter.scala          |   2 +-
 .../state/HDFSBackedStateStoreProvider.scala  |  19 +
 .../streaming/state/StateStore.scala          |   5 +
 .../execution/streaming/state/package.scala   |  11 +-
 ...ggregate.scala => statefulOperators.scala} | 134 +++++--
 .../apache/spark/sql/JavaDatasetSuite.java    |  32 ++
 .../streaming/MapGroupsWithStateSuite.scala   | 335 ++++++++++++++++++
 18 files changed, 1059 insertions(+), 36 deletions(-)
 create mode 100644 sql/core/src/main/java/org/apache/spark/api/java/function/FlatMapGroupsWithStateFunction.java
 create mode 100644 sql/core/src/main/java/org/apache/spark/api/java/function/MapGroupsWithStateFunction.java
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/KeyedState.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/KeyedStateImpl.scala
 rename sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/{StatefulAggregate.scala => statefulOperators.scala} (63%)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/streaming/MapGroupsWithStateSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
index f4d016cb96711..d8aad42edcf5f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -46,8 +46,13 @@ object UnsupportedOperationChecker {
         "Queries without streaming sources cannot be executed with writeStream.start()")(plan)
     }
 
+    /** Collect all the streaming aggregates in a sub plan */
+    def collectStreamingAggregates(subplan: LogicalPlan): Seq[Aggregate] = {
+      subplan.collect { case a: Aggregate if a.isStreaming => a }
+    }
+
     // Disallow multiple streaming aggregations
-    val aggregates = plan.collect { case a@Aggregate(_, _, _) if a.isStreaming => a }
+    val aggregates = collectStreamingAggregates(plan)
 
     if (aggregates.size > 1) {
       throwError(
@@ -114,6 +119,10 @@ object UnsupportedOperationChecker {
         case _: InsertIntoTable =>
           throwError("InsertIntoTable is not supported with streaming DataFrames/Datasets")
 
+        case m: MapGroupsWithState if collectStreamingAggregates(m).nonEmpty =>
+          throwError("(map/flatMap)GroupsWithState is not supported after aggregation on a " +
+            "streaming DataFrame/Dataset")
+
         case Join(left, right, joinType, _) =>
 
           joinType match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala
index 0ab4c9016623e..0be4823bbc895 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala
@@ -313,6 +313,55 @@ case class MapGroups(
     outputObjAttr: Attribute,
     child: LogicalPlan) extends UnaryNode with ObjectProducer
 
+/** Internal class representing State */
+trait LogicalKeyedState[S]
+
+/** Factory for constructing new `MapGroupsWithState` nodes. */
+object MapGroupsWithState {
+  def apply[K: Encoder, V: Encoder, S: Encoder, U: Encoder](
+      func: (Any, Iterator[Any], LogicalKeyedState[Any]) => Iterator[Any],
+      groupingAttributes: Seq[Attribute],
+      dataAttributes: Seq[Attribute],
+      child: LogicalPlan): LogicalPlan = {
+    val mapped = new MapGroupsWithState(
+      func,
+      UnresolvedDeserializer(encoderFor[K].deserializer, groupingAttributes),
+      UnresolvedDeserializer(encoderFor[V].deserializer, dataAttributes),
+      groupingAttributes,
+      dataAttributes,
+      CatalystSerde.generateObjAttr[U],
+      encoderFor[S].resolveAndBind().deserializer,
+      encoderFor[S].namedExpressions,
+      child)
+    CatalystSerde.serialize[U](mapped)
+  }
+}
+
+/**
+ * Applies func to each unique group in `child`, based on the evaluation of `groupingAttributes`,
+ * while using state data.
+ * Func is invoked with an object representation of the grouping key an iterator containing the
+ * object representation of all the rows with that key.
+ *
+ * @param keyDeserializer used to extract the key object for each group.
+ * @param valueDeserializer used to extract the items in the iterator from an input row.
+ * @param groupingAttributes used to group the data
+ * @param dataAttributes used to read the data
+ * @param outputObjAttr used to define the output object
+ * @param stateDeserializer used to deserialize state before calling `func`
+ * @param stateSerializer used to serialize updated state after calling `func`
+ */
+case class MapGroupsWithState(
+    func: (Any, Iterator[Any], LogicalKeyedState[Any]) => Iterator[Any],
+    keyDeserializer: Expression,
+    valueDeserializer: Expression,
+    groupingAttributes: Seq[Attribute],
+    dataAttributes: Seq[Attribute],
+    outputObjAttr: Attribute,
+    stateDeserializer: Expression,
+    stateSerializer: Seq[NamedExpression],
+    child: LogicalPlan) extends UnaryNode with ObjectProducer
+
 /** Factory for constructing new `FlatMapGroupsInR` nodes. */
 object FlatMapGroupsInR {
   def apply(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
index dcdb1ae089328..3b756e89d9036 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
@@ -22,13 +22,13 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, NamedExpression}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Literal, NamedExpression}
 import org.apache.spark.sql.catalyst.expressions.aggregate.Count
 import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.plans.logical.{MapGroupsWithState, _}
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.streaming.OutputMode
-import org.apache.spark.sql.types.IntegerType
+import org.apache.spark.sql.types.{IntegerType, LongType}
 
 /** A dummy command for testing unsupported operations. */
 case class DummyCommand() extends Command
@@ -111,6 +111,24 @@ class UnsupportedOperationsSuite extends SparkFunSuite {
     outputMode = Complete,
     expectedMsgs = Seq("distinct aggregation"))
 
+  // MapGroupsWithState: Not supported after a streaming aggregation
+  val att = new AttributeReference(name = "a", dataType = LongType)()
+  assertSupportedInBatchPlan(
+    "mapGroupsWithState - mapGroupsWithState on batch relation",
+    MapGroupsWithState(null, att, att, Seq(att), Seq(att), att, att, Seq(att), batchRelation))
+
+  assertSupportedInStreamingPlan(
+    "mapGroupsWithState - mapGroupsWithState on streaming relation before aggregation",
+    MapGroupsWithState(null, att, att, Seq(att), Seq(att), att, att, Seq(att), streamRelation),
+    outputMode = Append)
+
+  assertNotSupportedInStreamingPlan(
+    "mapGroupsWithState - mapGroupsWithState on streaming relation after aggregation",
+    MapGroupsWithState(null, att, att, Seq(att), Seq(att), att, att, Seq(att),
+      Aggregate(Nil, aggExprs("c"), streamRelation)),
+    outputMode = Complete,
+    expectedMsgs = Seq("(map/flatMap)GroupsWithState"))
+
   // Inner joins: Stream-stream not supported
   testBinaryOperationInStreamingPlan(
     "inner join",
diff --git a/sql/core/src/main/java/org/apache/spark/api/java/function/FlatMapGroupsWithStateFunction.java b/sql/core/src/main/java/org/apache/spark/api/java/function/FlatMapGroupsWithStateFunction.java
new file mode 100644
index 0000000000000..2570c8d02ab7c
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/api/java/function/FlatMapGroupsWithStateFunction.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api.java.function;
+
+import java.io.Serializable;
+import java.util.Iterator;
+
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.annotation.InterfaceStability;
+import org.apache.spark.sql.Encoder;
+import org.apache.spark.sql.KeyedState;
+
+/**
+ * ::Experimental::
+ * Base interface for a map function used in
+ * {@link org.apache.spark.sql.KeyValueGroupedDataset#flatMapGroupsWithState(FlatMapGroupsWithStateFunction, Encoder, Encoder)}.
+ * @since 2.1.1
+ */
+@Experimental
+@InterfaceStability.Evolving
+public interface FlatMapGroupsWithStateFunction<K, V, S, R> extends Serializable {
+  Iterator<R> call(K key, Iterator<V> values, KeyedState<S> state) throws Exception;
+}
diff --git a/sql/core/src/main/java/org/apache/spark/api/java/function/MapGroupsWithStateFunction.java b/sql/core/src/main/java/org/apache/spark/api/java/function/MapGroupsWithStateFunction.java
new file mode 100644
index 0000000000000..614d3925e0510
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/api/java/function/MapGroupsWithStateFunction.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api.java.function;
+
+import java.io.Serializable;
+import java.util.Iterator;
+
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.annotation.InterfaceStability;
+import org.apache.spark.sql.Encoder;
+import org.apache.spark.sql.KeyedState;
+
+/**
+ * ::Experimental::
+ * Base interface for a map function used in
+ * {@link org.apache.spark.sql.KeyValueGroupedDataset#mapGroupsWithState(MapGroupsWithStateFunction, Encoder, Encoder)}
+ * @since 2.1.1
+ */
+@Experimental
+@InterfaceStability.Evolving
+public interface MapGroupsWithStateFunction<K, V, S, R> extends Serializable {
+  R call(K key, Iterator<V> values, KeyedState<S> state) throws Exception;
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
index 395d709f26591..94e689a4d5b97 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
@@ -218,6 +218,119 @@ class KeyValueGroupedDataset[K, V] private[sql](
     mapGroups((key, data) => f.call(key, data.asJava))(encoder)
   }
 
+  /**
+   * ::Experimental::
+   * (Scala-specific)
+   * Applies the given function to each group of data, while maintaining a user-defined per-group
+   * state. The result Dataset will represent the objects returned by the function.
+   * For a static batch Dataset, the function will be invoked once per group. For a streaming
+   * Dataset, the function will be invoked for each group repeatedly in every trigger, and
+   * updates to each group's state will be saved across invocations.
+   * See [[KeyedState]] for more details.
+   *
+   * @tparam S The type of the user-defined state. Must be encodable to Spark SQL types.
+   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
+   *
+   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
+   * @since 2.1.1
+   */
+  @Experimental
+  @InterfaceStability.Evolving
+  def mapGroupsWithState[S: Encoder, U: Encoder](
+      func: (K, Iterator[V], KeyedState[S]) => U): Dataset[U] = {
+    flatMapGroupsWithState[S, U](
+      (key: K, it: Iterator[V], s: KeyedState[S]) => Iterator(func(key, it, s)))
+  }
+
+  /**
+   * ::Experimental::
+   * (Java-specific)
+   * Applies the given function to each group of data, while maintaining a user-defined per-group
+   * state. The result Dataset will represent the objects returned by the function.
+   * For a static batch Dataset, the function will be invoked once per group. For a streaming
+   * Dataset, the function will be invoked for each group repeatedly in every trigger, and
+   * updates to each group's state will be saved across invocations.
+   * See [[KeyedState]] for more details.
+   *
+   * @tparam S The type of the user-defined state. Must be encodable to Spark SQL types.
+   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
+   * @param func          Function to be called on every group.
+   * @param stateEncoder  Encoder for the state type.
+   * @param outputEncoder Encoder for the output type.
+   *
+   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
+   * @since 2.1.1
+   */
+  @Experimental
+  @InterfaceStability.Evolving
+  def mapGroupsWithState[S, U](
+      func: MapGroupsWithStateFunction[K, V, S, U],
+      stateEncoder: Encoder[S],
+      outputEncoder: Encoder[U]): Dataset[U] = {
+    flatMapGroupsWithState[S, U](
+      (key: K, it: Iterator[V], s: KeyedState[S]) => Iterator(func.call(key, it.asJava, s))
+    )(stateEncoder, outputEncoder)
+  }
+
+  /**
+   * ::Experimental::
+   * (Scala-specific)
+   * Applies the given function to each group of data, while maintaining a user-defined per-group
+   * state. The result Dataset will represent the objects returned by the function.
+   * For a static batch Dataset, the function will be invoked once per group. For a streaming
+   * Dataset, the function will be invoked for each group repeatedly in every trigger, and
+   * updates to each group's state will be saved across invocations.
+   * See [[KeyedState]] for more details.
+   *
+   * @tparam S The type of the user-defined state. Must be encodable to Spark SQL types.
+   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
+   *
+   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
+   * @since 2.1.1
+   */
+  @Experimental
+  @InterfaceStability.Evolving
+  def flatMapGroupsWithState[S: Encoder, U: Encoder](
+      func: (K, Iterator[V], KeyedState[S]) => Iterator[U]): Dataset[U] = {
+    Dataset[U](
+      sparkSession,
+      MapGroupsWithState[K, V, S, U](
+        func.asInstanceOf[(Any, Iterator[Any], LogicalKeyedState[Any]) => Iterator[Any]],
+        groupingAttributes,
+        dataAttributes,
+        logicalPlan))
+  }
+
+  /**
+   * ::Experimental::
+   * (Java-specific)
+   * Applies the given function to each group of data, while maintaining a user-defined per-group
+   * state. The result Dataset will represent the objects returned by the function.
+   * For a static batch Dataset, the function will be invoked once per group. For a streaming
+   * Dataset, the function will be invoked for each group repeatedly in every trigger, and
+   * updates to each group's state will be saved across invocations.
+   * See [[KeyedState]] for more details.
+   *
+   * @tparam S The type of the user-defined state. Must be encodable to Spark SQL types.
+   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
+   * @param func          Function to be called on every group.
+   * @param stateEncoder  Encoder for the state type.
+   * @param outputEncoder Encoder for the output type.
+   *
+   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
+   * @since 2.1.1
+   */
+  @Experimental
+  @InterfaceStability.Evolving
+  def flatMapGroupsWithState[S, U](
+      func: FlatMapGroupsWithStateFunction[K, V, S, U],
+      stateEncoder: Encoder[S],
+      outputEncoder: Encoder[U]): Dataset[U] = {
+    flatMapGroupsWithState[S, U](
+      (key: K, it: Iterator[V], s: KeyedState[S]) => func.call(key, it.asJava, s).asScala
+    )(stateEncoder, outputEncoder)
+  }
+
   /**
    * (Scala-specific)
    * Reduces the elements of each group of data using the specified binary function.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/KeyedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/KeyedState.scala
new file mode 100644
index 0000000000000..6864b6f6b4fd2
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/KeyedState.scala
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.lang.IllegalArgumentException
+
+import org.apache.spark.annotation.{Experimental, InterfaceStability}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalKeyedState
+
+/**
+ * :: Experimental ::
+ *
+ * Wrapper class for interacting with keyed state data in `mapGroupsWithState` and
+ * `flatMapGroupsWithState` operations on
+ * [[KeyValueGroupedDataset]].
+ *
+ * Detail description on `[map/flatMap]GroupsWithState` operation
+ * ------------------------------------------------------------
+ * Both, `mapGroupsWithState` and `flatMapGroupsWithState` in [[KeyValueGroupedDataset]]
+ * will invoke the user-given function on each group (defined by the grouping function in
+ * `Dataset.groupByKey()`) while maintaining user-defined per-group state between invocations.
+ * For a static batch Dataset, the function will be invoked once per group. For a streaming
+ * Dataset, the function will be invoked for each group repeatedly in every trigger.
+ * That is, in every batch of the [[streaming.StreamingQuery StreamingQuery]],
+ * the function will be invoked once for each group that has data in the batch.
+ *
+ * The function is invoked with following parameters.
+ *  - The key of the group.
+ *  - An iterator containing all the values for this key.
+ *  - A user-defined state object set by previous invocations of the given function.
+ * In case of a batch Dataset, there is only one invocation and state object will be empty as
+ * there is no prior state. Essentially, for batch Datasets, `[map/flatMap]GroupsWithState`
+ * is equivalent to `[map/flatMap]Groups`.
+ *
+ * Important points to note about the function.
+ *  - In a trigger, the function will be called only the groups present in the batch. So do not
+ *    assume that the function will be called in every trigger for every group that has state.
+ *  - There is no guaranteed ordering of values in the iterator in the function, neither with
+ *    batch, nor with streaming Datasets.
+ *  - All the data will be shuffled before applying the function.
+ *
+ * Important points to note about using KeyedState.
+ *  - The value of the state cannot be null. So updating state with null will throw
+ *    `IllegalArgumentException`.
+ *  - Operations on `KeyedState` are not thread-safe. This is to avoid memory barriers.
+ *  - If `remove()` is called, then `exists()` will return `false`,
+ *    `get()` will throw `NoSuchElementException` and `getOption()` will return `None`
+ *  - After that, if `update(newState)` is called, then `exists()` will again return `true`,
+ *    `get()` and `getOption()`will return the updated value.
+ *
+ * Scala example of using KeyedState in `mapGroupsWithState`:
+ * {{{
+ * /* A mapping function that maintains an integer state for string keys and returns a string. */
+ * def mappingFunction(key: String, value: Iterator[Int], state: KeyedState[Int]): String = {
+ *   // Check if state exists
+ *   if (state.exists) {
+ *     val existingState = state.get  // Get the existing state
+ *     val shouldRemove = ...         // Decide whether to remove the state
+ *     if (shouldRemove) {
+ *       state.remove()     // Remove the state
+ *     } else {
+ *       val newState = ...
+ *       state.update(newState)    // Set the new state
+ *     }
+ *   } else {
+ *     val initialState = ...
+ *     state.update(initialState)  // Set the initial state
+ *   }
+ *   ... // return something
+ * }
+ *
+ * }}}
+ *
+ * Java example of using `KeyedState`:
+ * {{{
+ * /* A mapping function that maintains an integer state for string keys and returns a string. */
+ * MapGroupsWithStateFunction<String, Integer, Integer, String> mappingFunction =
+ *    new MapGroupsWithStateFunction<String, Integer, Integer, String>() {
+ *
+ *      @Override
+ *      public String call(String key, Iterator<Integer> value, KeyedState<Integer> state) {
+ *        if (state.exists()) {
+ *          int existingState = state.get(); // Get the existing state
+ *          boolean shouldRemove = ...; // Decide whether to remove the state
+ *          if (shouldRemove) {
+ *            state.remove(); // Remove the state
+ *          } else {
+ *            int newState = ...;
+ *            state.update(newState); // Set the new state
+ *          }
+ *        } else {
+ *          int initialState = ...; // Set the initial state
+ *          state.update(initialState);
+ *        }
+ *        ... // return something
+ *      }
+ *    };
+ * }}}
+ *
+ * @tparam S User-defined type of the state to be stored for each key. Must be encodable into
+ *           Spark SQL types (see [[Encoder]] for more details).
+ * @since 2.1.1
+ */
+@Experimental
+@InterfaceStability.Evolving
+trait KeyedState[S] extends LogicalKeyedState[S] {
+
+  /** Whether state exists or not. */
+  def exists: Boolean
+
+  /** Get the state value if it exists, or throw NoSuchElementException. */
+  @throws[NoSuchElementException]("when state does not exist")
+  def get: S
+
+  /** Get the state value as a scala Option. */
+  def getOption: Option[S]
+
+  /**
+   * Update the value of the state. Note that `null` is not a valid value, and it throws
+   * IllegalArgumentException.
+   */
+  @throws[IllegalArgumentException]("when updating with null")
+  def update(newState: S): Unit
+
+  /** Remove this keyed state. */
+  def remove(): Unit
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index ba82ec156e850..adea358594a0f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning._
 import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.logical.{BroadcastHint, EventTimeWatermark, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.{BroadcastHint, EventTimeWatermark, LogicalPlan, MapGroupsWithState}
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution
 import org.apache.spark.sql.execution.columnar.{InMemoryRelation, InMemoryTableScanExec}
@@ -324,6 +324,23 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
     }
   }
 
+  /**
+   * Strategy to convert MapGroupsWithState logical operator to physical operator
+   * in streaming plans. Conversion for batch plans is handled by [[BasicOperators]].
+   */
+  object MapGroupsWithStateStrategy extends Strategy {
+    override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
+      case MapGroupsWithState(
+          f, keyDeser, valueDeser, groupAttr, dataAttr, outputAttr, stateDeser, stateSer, child) =>
+        val execPlan = MapGroupsWithStateExec(
+          f, keyDeser, valueDeser, groupAttr, dataAttr, outputAttr, None, stateDeser, stateSer,
+          planLater(child))
+        execPlan :: Nil
+      case _ =>
+        Nil
+    }
+  }
+
   // Can we automate these 'pass through' operations?
   object BasicOperators extends Strategy {
     def numPartitions: Int = self.numPartitions
@@ -365,6 +382,8 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         execution.AppendColumnsWithObjectExec(f, childSer, newSer, planLater(child)) :: Nil
       case logical.MapGroups(f, key, value, grouping, data, objAttr, child) =>
         execution.MapGroupsExec(f, key, value, grouping, data, objAttr, planLater(child)) :: Nil
+      case logical.MapGroupsWithState(f, key, value, grouping, data, output, _, _, child) =>
+        execution.MapGroupsExec(f, key, value, grouping, data, output, planLater(child)) :: Nil
       case logical.CoGroup(f, key, lObj, rObj, lGroup, rGroup, lAttr, rAttr, oAttr, left, right) =>
         execution.CoGroupExec(
           f, key, lObj, rObj, lGroup, rGroup, lAttr, rAttr, oAttr,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala
index fde3b2a528994..199ba5ce6969b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala
@@ -30,6 +30,8 @@ import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.objects.Invoke
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.plans.logical.LogicalKeyedState
+import org.apache.spark.sql.execution.streaming.KeyedStateImpl
 import org.apache.spark.sql.types.{DataType, ObjectType, StructType}
 
 
@@ -144,6 +146,11 @@ object ObjectOperator {
     (i: InternalRow) => proj(i).get(0, deserializer.dataType)
   }
 
+  def deserializeRowToObject(deserializer: Expression): InternalRow => Any = {
+    val proj = GenerateSafeProjection.generate(deserializer :: Nil)
+    (i: InternalRow) => proj(i).get(0, deserializer.dataType)
+  }
+
   def serializeObjectToRow(serializer: Seq[Expression]): Any => UnsafeRow = {
     val proj = GenerateUnsafeProjection.generate(serializer)
     val objType = serializer.head.collect { case b: BoundReference => b.dataType }.head
@@ -344,6 +351,21 @@ case class MapGroupsExec(
   }
 }
 
+object MapGroupsExec {
+  def apply(
+      func: (Any, Iterator[Any], LogicalKeyedState[Any]) => TraversableOnce[Any],
+      keyDeserializer: Expression,
+      valueDeserializer: Expression,
+      groupingAttributes: Seq[Attribute],
+      dataAttributes: Seq[Attribute],
+      outputObjAttr: Attribute,
+      child: SparkPlan): MapGroupsExec = {
+    val f = (key: Any, values: Iterator[Any]) => func(key, values, new KeyedStateImpl[Any](None))
+    new MapGroupsExec(f, keyDeserializer, valueDeserializer,
+      groupingAttributes, dataAttributes, outputObjAttr, child)
+  }
+}
+
 /**
  * Groups the input rows together and calls the R function with each group and an iterator
  * containing all elements in the group.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
index 6ab6fa61dc200..5c4cbfa7552cb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution.streaming
 
+import java.util.concurrent.atomic.AtomicInteger
+
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.expressions.{CurrentBatchTimestamp, Literal}
 import org.apache.spark.sql.SparkSession
@@ -39,8 +41,9 @@ class IncrementalExecution(
   extends QueryExecution(sparkSession, logicalPlan) with Logging {
 
   // TODO: make this always part of planning.
-  val stateStrategy =
+  val streamingExtraStrategies =
     sparkSession.sessionState.planner.StatefulAggregationStrategy +:
+    sparkSession.sessionState.planner.MapGroupsWithStateStrategy +:
     sparkSession.sessionState.planner.StreamingRelationStrategy +:
     sparkSession.sessionState.experimentalMethods.extraStrategies
 
@@ -49,7 +52,7 @@ class IncrementalExecution(
     new SparkPlanner(
       sparkSession.sparkContext,
       sparkSession.sessionState.conf,
-      stateStrategy)
+      streamingExtraStrategies)
 
   /**
    * See [SPARK-18339]
@@ -68,7 +71,7 @@ class IncrementalExecution(
    * Records the current id for a given stateful operator in the query plan as the `state`
    * preparation walks the query plan.
    */
-  private var operatorId = 0
+  private val operatorId = new AtomicInteger(0)
 
   /** Locates save/restore pairs surrounding aggregation. */
   val state = new Rule[SparkPlan] {
@@ -77,8 +80,8 @@ class IncrementalExecution(
       case StateStoreSaveExec(keys, None, None, None,
              UnaryExecNode(agg,
                StateStoreRestoreExec(keys2, None, child))) =>
-        val stateId = OperatorStateId(checkpointLocation, operatorId, currentBatchId)
-        operatorId += 1
+        val stateId =
+          OperatorStateId(checkpointLocation, operatorId.getAndIncrement(), currentBatchId)
 
         StateStoreSaveExec(
           keys,
@@ -90,6 +93,12 @@ class IncrementalExecution(
               keys,
               Some(stateId),
               child) :: Nil))
+      case MapGroupsWithStateExec(
+             f, kDeser, vDeser, group, data, output, None, stateDeser, stateSer, child) =>
+        val stateId =
+          OperatorStateId(checkpointLocation, operatorId.getAndIncrement(), currentBatchId)
+        MapGroupsWithStateExec(
+          f, kDeser, vDeser, group, data, output, Some(stateId), stateDeser, stateSer, child)
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/KeyedStateImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/KeyedStateImpl.scala
new file mode 100644
index 0000000000000..eee7ec45dd77b
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/KeyedStateImpl.scala
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming
+
+import org.apache.spark.sql.KeyedState
+
+/** Internal implementation of the [[KeyedState]] interface. Methods are not thread-safe. */
+private[sql] class KeyedStateImpl[S](optionalValue: Option[S]) extends KeyedState[S] {
+  private var value: S = optionalValue.getOrElse(null.asInstanceOf[S])
+  private var defined: Boolean = optionalValue.isDefined
+  private var updated: Boolean = false
+  // whether value has been updated (but not removed)
+  private var removed: Boolean = false // whether value has been removed
+
+  // ========= Public API =========
+  override def exists: Boolean = defined
+
+  override def get: S = {
+    if (defined) {
+      value
+    } else {
+      throw new NoSuchElementException("State is either not defined or has already been removed")
+    }
+  }
+
+  override def getOption: Option[S] = {
+    if (defined) {
+      Some(value)
+    } else {
+      None
+    }
+  }
+
+  override def update(newValue: S): Unit = {
+    if (newValue == null) {
+      throw new IllegalArgumentException("'null' is not a valid state value")
+    }
+    value = newValue
+    defined = true
+    updated = true
+    removed = false
+  }
+
+  override def remove(): Unit = {
+    defined = false
+    updated = false
+    removed = true
+  }
+
+  override def toString: String = {
+    s"KeyedState(${getOption.map(_.toString).getOrElse("<undefined>")})"
+  }
+
+  // ========= Internal API =========
+
+  /** Whether the state has been marked for removing */
+  def isRemoved: Boolean = {
+    removed
+  }
+
+  /** Whether the state has been been updated */
+  def isUpdated: Boolean = {
+    updated
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
index 1f74fffbe6e67..693933f95a231 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -186,7 +186,7 @@ trait ProgressReporter extends Logging {
     // lastExecution could belong to one of the previous triggers if `!hasNewData`.
     // Walking the plan again should be inexpensive.
     val stateNodes = lastExecution.executedPlan.collect {
-      case p if p.isInstanceOf[StateStoreSaveExec] => p
+      case p if p.isInstanceOf[StateStoreWriter] => p
     }
     stateNodes.map { node =>
       val numRowsUpdated = if (hasNewData) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
index 1279b71c4d6ee..61eb601a18c32 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
@@ -147,6 +147,25 @@ private[state] class HDFSBackedStateStoreProvider(
       }
     }
 
+    /** Remove a single key. */
+    override def remove(key: UnsafeRow): Unit = {
+      verify(state == UPDATING, "Cannot remove after already committed or aborted")
+      if (mapToUpdate.containsKey(key)) {
+        val value = mapToUpdate.remove(key)
+        Option(allUpdates.get(key)) match {
+          case Some(ValueUpdated(_, _)) | None =>
+            // Value existed in previous version and maybe was updated, mark removed
+            allUpdates.put(key, ValueRemoved(key, value))
+          case Some(ValueAdded(_, _)) =>
+            // Value did not exist in previous version and was added, should not appear in updates
+            allUpdates.remove(key)
+          case Some(ValueRemoved(_, _)) =>
+          // Remove already in update map, no need to change
+        }
+        writeToDeltaFile(tempDeltaFileStream, ValueRemoved(key, value))
+      }
+    }
+
     /** Commit all the updates that have been made to the store, and return the new version. */
     override def commit(): Long = {
       verify(state == UPDATING, "Cannot commit after already committed or aborted")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
index e61d95a1b1bb0..dcb24b26f78f3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
@@ -58,6 +58,11 @@ trait StateStore {
    */
   def remove(condition: UnsafeRow => Boolean): Unit
 
+  /**
+   * Remove a single key.
+   */
+  def remove(key: UnsafeRow): Unit
+
   /**
    * Commit all the updates that have been made to the store, and return the new version.
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala
index 1b56c08f729c6..589042afb1e52 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.streaming
 
 import scala.reflect.ClassTag
 
+import org.apache.spark.TaskContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.internal.SessionState
@@ -59,10 +60,18 @@ package object state {
         sessionState: SessionState,
         storeCoordinator: Option[StateStoreCoordinatorRef])(
         storeUpdateFunction: (StateStore, Iterator[T]) => Iterator[U]): StateStoreRDD[T, U] = {
+
       val cleanedF = dataRDD.sparkContext.clean(storeUpdateFunction)
+      val wrappedF = (store: StateStore, iter: Iterator[T]) => {
+        // Abort the state store in case of error
+        TaskContext.get().addTaskCompletionListener(_ => {
+          if (!store.hasCommitted) store.abort()
+        })
+        cleanedF(store, iter)
+      }
       new StateStoreRDD(
         dataRDD,
-        cleanedF,
+        wrappedF,
         checkpointLocation,
         operatorId,
         storeVersion,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulAggregate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
similarity index 63%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulAggregate.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
index d4ccced9ac9b4..1292452574594 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulAggregate.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
@@ -22,16 +22,16 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.errors._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.{GenerateUnsafeProjection, Predicate}
-import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark
-import org.apache.spark.sql.catalyst.plans.physical.Partitioning
+import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, LogicalKeyedState}
+import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, Distribution, Partitioning}
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.execution
+import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.metric.SQLMetrics
 import org.apache.spark.sql.execution.streaming.state._
-import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.StructType
-import org.apache.spark.TaskContext
+import org.apache.spark.util.CompletionIterator
 
 
 /** Used to identify the state store for a given operator. */
@@ -41,7 +41,7 @@ case class OperatorStateId(
     batchId: Long)
 
 /**
- * An operator that saves or restores state from the [[StateStore]].  The [[OperatorStateId]] should
+ * An operator that reads or writes state from the [[StateStore]].  The [[OperatorStateId]] should
  * be filled in by `prepareForExecution` in [[IncrementalExecution]].
  */
 trait StatefulOperator extends SparkPlan {
@@ -54,6 +54,20 @@ trait StatefulOperator extends SparkPlan {
   }
 }
 
+/** An operator that reads from a StateStore. */
+trait StateStoreReader extends StatefulOperator {
+  override lazy val metrics = Map(
+    "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"))
+}
+
+/** An operator that writes to a StateStore. */
+trait StateStoreWriter extends StatefulOperator {
+  override lazy val metrics = Map(
+    "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"),
+    "numTotalStateRows" -> SQLMetrics.createMetric(sparkContext, "number of total state rows"),
+    "numUpdatedStateRows" -> SQLMetrics.createMetric(sparkContext, "number of updated state rows"))
+}
+
 /**
  * For each input tuple, the key is calculated and the value from the [[StateStore]] is added
  * to the stream (in addition to the input tuple) if present.
@@ -62,10 +76,7 @@ case class StateStoreRestoreExec(
     keyExpressions: Seq[Attribute],
     stateId: Option[OperatorStateId],
     child: SparkPlan)
-  extends execution.UnaryExecNode with StatefulOperator {
-
-  override lazy val metrics = Map(
-    "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"))
+  extends execution.UnaryExecNode with StateStoreReader {
 
   override protected def doExecute(): RDD[InternalRow] = {
     val numOutputRows = longMetric("numOutputRows")
@@ -102,12 +113,7 @@ case class StateStoreSaveExec(
     outputMode: Option[OutputMode] = None,
     eventTimeWatermark: Option[Long] = None,
     child: SparkPlan)
-  extends execution.UnaryExecNode with StatefulOperator {
-
-  override lazy val metrics = Map(
-    "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"),
-    "numTotalStateRows" -> SQLMetrics.createMetric(sparkContext, "number of total state rows"),
-    "numUpdatedStateRows" -> SQLMetrics.createMetric(sparkContext, "number of updated state rows"))
+  extends execution.UnaryExecNode with StateStoreWriter {
 
   /** Generate a predicate that matches data older than the watermark */
   private lazy val watermarkPredicate: Option[Predicate] = {
@@ -151,13 +157,6 @@ case class StateStoreSaveExec(
         val numTotalStateRows = longMetric("numTotalStateRows")
         val numUpdatedStateRows = longMetric("numUpdatedStateRows")
 
-        // Abort the state store in case of error
-        TaskContext.get().addTaskCompletionListener(_ => {
-          if (!store.hasCommitted) {
-            store.abort()
-          }
-        })
-
         outputMode match {
           // Update and output all rows in the StateStore.
           case Some(Complete) =>
@@ -184,7 +183,7 @@ case class StateStoreSaveExec(
             }
 
             // Assumption: Append mode can be done only when watermark has been specified
-            store.remove(watermarkPredicate.get.eval)
+            store.remove(watermarkPredicate.get.eval _)
             store.commit()
 
             numTotalStateRows += store.numKeys()
@@ -207,7 +206,7 @@ case class StateStoreSaveExec(
               override def hasNext: Boolean = {
                 if (!baseIterator.hasNext) {
                   // Remove old aggregates if watermark specified
-                  if (watermarkPredicate.nonEmpty) store.remove(watermarkPredicate.get.eval)
+                  if (watermarkPredicate.nonEmpty) store.remove(watermarkPredicate.get.eval _)
                   store.commit()
                   numTotalStateRows += store.numKeys()
                   false
@@ -235,3 +234,90 @@ case class StateStoreSaveExec(
 
   override def outputPartitioning: Partitioning = child.outputPartitioning
 }
+
+
+/** Physical operator for executing streaming mapGroupsWithState. */
+case class MapGroupsWithStateExec(
+    func: (Any, Iterator[Any], LogicalKeyedState[Any]) => Iterator[Any],
+    keyDeserializer: Expression,
+    valueDeserializer: Expression,
+    groupingAttributes: Seq[Attribute],
+    dataAttributes: Seq[Attribute],
+    outputObjAttr: Attribute,
+    stateId: Option[OperatorStateId],
+    stateDeserializer: Expression,
+    stateSerializer: Seq[NamedExpression],
+    child: SparkPlan) extends UnaryExecNode with ObjectProducerExec with StateStoreWriter {
+
+  override def outputPartitioning: Partitioning = child.outputPartitioning
+
+  /** Distribute by grouping attributes */
+  override def requiredChildDistribution: Seq[Distribution] =
+    ClusteredDistribution(groupingAttributes) :: Nil
+
+  /** Ordering needed for using GroupingIterator */
+  override def requiredChildOrdering: Seq[Seq[SortOrder]] =
+    Seq(groupingAttributes.map(SortOrder(_, Ascending)))
+
+  override protected def doExecute(): RDD[InternalRow] = {
+    child.execute().mapPartitionsWithStateStore[InternalRow](
+      getStateId.checkpointLocation,
+      operatorId = getStateId.operatorId,
+      storeVersion = getStateId.batchId,
+      groupingAttributes.toStructType,
+      child.output.toStructType,
+      sqlContext.sessionState,
+      Some(sqlContext.streams.stateStoreCoordinator)) { (store, iter) =>
+        val numTotalStateRows = longMetric("numTotalStateRows")
+        val numUpdatedStateRows = longMetric("numUpdatedStateRows")
+        val numOutputRows = longMetric("numOutputRows")
+
+        // Generate a iterator that returns the rows grouped by the grouping function
+        val groupedIter = GroupedIterator(iter, groupingAttributes, child.output)
+
+        // Converters to and from object and rows
+        val getKeyObj = ObjectOperator.deserializeRowToObject(keyDeserializer, groupingAttributes)
+        val getValueObj = ObjectOperator.deserializeRowToObject(valueDeserializer, dataAttributes)
+        val getOutputRow = ObjectOperator.wrapObjectToRow(outputObjAttr.dataType)
+        val getStateObj =
+          ObjectOperator.deserializeRowToObject(stateDeserializer)
+        val outputStateObj = ObjectOperator.serializeObjectToRow(stateSerializer)
+
+        // For every group, get the key, values and corresponding state and call the function,
+        // and return an iterator of rows
+        val allRowsIterator = groupedIter.flatMap { case (keyRow, valueRowIter) =>
+
+          val key = keyRow.asInstanceOf[UnsafeRow]
+          val keyObj = getKeyObj(keyRow)                         // convert key to objects
+          val valueObjIter = valueRowIter.map(getValueObj.apply) // convert value rows to objects
+          val stateObjOption = store.get(key).map(getStateObj)   // get existing state if any
+          val wrappedState = new KeyedStateImpl(stateObjOption)
+          val mappedIterator = func(keyObj, valueObjIter, wrappedState).map { obj =>
+            numOutputRows += 1
+            getOutputRow(obj) // convert back to rows
+          }
+
+          // Return an iterator of rows generated this key,
+          // such that fully consumed, the updated state value will be saved
+          CompletionIterator[InternalRow, Iterator[InternalRow]](
+            mappedIterator, {
+              // When the iterator is consumed, then write changes to state
+              if (wrappedState.isRemoved) {
+                store.remove(key)
+                numUpdatedStateRows += 1
+              } else if (wrappedState.isUpdated) {
+                store.put(key, outputStateObj(wrappedState.get))
+                numUpdatedStateRows += 1
+              }
+            })
+        }
+
+        // Return an iterator of all the rows generated by all the keys, such that when fully
+        // consumer, all the state updates will be committed by the state store
+        CompletionIterator[InternalRow, Iterator[InternalRow]](allRowsIterator, {
+          store.commit()
+          numTotalStateRows += store.numKeys()
+        })
+      }
+  }
+}
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
index 8304b728aa238..5ef4e887ded09 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
@@ -225,6 +225,38 @@ public Iterator<String> call(Integer key, Iterator<String> values) {
 
     Assert.assertEquals(asSet("1a", "3foobar"), toSet(flatMapped.collectAsList()));
 
+    Dataset<String> mapped2 = grouped.mapGroupsWithState(
+      new MapGroupsWithStateFunction<Integer, String, Long, String>() {
+        @Override
+        public String call(Integer key, Iterator<String> values, KeyedState<Long> s) throws Exception {
+          StringBuilder sb = new StringBuilder(key.toString());
+          while (values.hasNext()) {
+            sb.append(values.next());
+          }
+          return sb.toString();
+        }
+        },
+        Encoders.LONG(),
+        Encoders.STRING());
+
+    Assert.assertEquals(asSet("1a", "3foobar"), toSet(mapped2.collectAsList()));
+
+    Dataset<String> flatMapped2 = grouped.flatMapGroupsWithState(
+      new FlatMapGroupsWithStateFunction<Integer, String, Long, String>() {
+        @Override
+        public Iterator<String> call(Integer key, Iterator<String> values, KeyedState<Long> s) {
+          StringBuilder sb = new StringBuilder(key.toString());
+          while (values.hasNext()) {
+            sb.append(values.next());
+          }
+          return Collections.singletonList(sb.toString()).iterator();
+        }
+      },
+      Encoders.LONG(),
+      Encoders.STRING());
+
+    Assert.assertEquals(asSet("1a", "3foobar"), toSet(flatMapped2.collectAsList()));
+
     Dataset<Tuple2<Integer, String>> reduced = grouped.reduceGroups(new ReduceFunction<String>() {
       @Override
       public String call(String v1, String v2) throws Exception {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/MapGroupsWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/MapGroupsWithStateSuite.scala
new file mode 100644
index 0000000000000..0524898b15ead
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/MapGroupsWithStateSuite.scala
@@ -0,0 +1,335 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming
+
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.KeyedState
+import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
+import org.apache.spark.sql.execution.streaming.{KeyedStateImpl, MemoryStream}
+import org.apache.spark.sql.execution.streaming.state.StateStore
+
+/** Class to check custom state types */
+case class RunningCount(count: Long)
+
+class MapGroupsWithStateSuite extends StreamTest with BeforeAndAfterAll {
+
+  import testImplicits._
+
+  override def afterAll(): Unit = {
+    super.afterAll()
+    StateStore.stop()
+  }
+
+  test("KeyedState - get, exists, update, remove") {
+    var state: KeyedStateImpl[String] = null
+
+    def testState(
+        expectedData: Option[String],
+        shouldBeUpdated: Boolean = false,
+        shouldBeRemoved: Boolean = false): Unit = {
+      if (expectedData.isDefined) {
+        assert(state.exists)
+        assert(state.get === expectedData.get)
+      } else {
+        assert(!state.exists)
+        intercept[NoSuchElementException] {
+          state.get
+        }
+      }
+      assert(state.getOption === expectedData)
+      assert(state.isUpdated === shouldBeUpdated)
+      assert(state.isRemoved === shouldBeRemoved)
+    }
+
+    // Updating empty state
+    state = new KeyedStateImpl[String](None)
+    testState(None)
+    state.update("")
+    testState(Some(""), shouldBeUpdated = true)
+
+    // Updating exiting state
+    state = new KeyedStateImpl[String](Some("2"))
+    testState(Some("2"))
+    state.update("3")
+    testState(Some("3"), shouldBeUpdated = true)
+
+    // Removing state
+    state.remove()
+    testState(None, shouldBeRemoved = true, shouldBeUpdated = false)
+    state.remove()      // should be still callable
+    state.update("4")
+    testState(Some("4"), shouldBeRemoved = false, shouldBeUpdated = true)
+
+    // Updating by null throw exception
+    intercept[IllegalArgumentException] {
+      state.update(null)
+    }
+  }
+
+  test("KeyedState - primitive type") {
+    var intState = new KeyedStateImpl[Int](None)
+    intercept[NoSuchElementException] {
+      intState.get
+    }
+    assert(intState.getOption === None)
+
+    intState = new KeyedStateImpl[Int](Some(10))
+    assert(intState.get == 10)
+    intState.update(0)
+    assert(intState.get == 0)
+    intState.remove()
+    intercept[NoSuchElementException] {
+      intState.get
+    }
+  }
+
+  test("flatMapGroupsWithState - streaming") {
+    // Function to maintain running count up to 2, and then remove the count
+    // Returns the data and the count if state is defined, otherwise does not return anything
+    val stateFunc = (key: String, values: Iterator[String], state: KeyedState[RunningCount]) => {
+
+      val count = state.getOption.map(_.count).getOrElse(0L) + values.size
+      if (count == 3) {
+        state.remove()
+        Iterator.empty
+      } else {
+        state.update(RunningCount(count))
+        Iterator((key, count.toString))
+      }
+    }
+
+    val inputData = MemoryStream[String]
+    val result =
+      inputData.toDS()
+        .groupByKey(x => x)
+        .flatMapGroupsWithState(stateFunc) // State: Int, Out: (Str, Str)
+
+    testStream(result, Append)(
+      AddData(inputData, "a"),
+      CheckLastBatch(("a", "1")),
+      assertNumStateRows(total = 1, updated = 1),
+      AddData(inputData, "a", "b"),
+      CheckLastBatch(("a", "2"), ("b", "1")),
+      assertNumStateRows(total = 2, updated = 2),
+      StopStream,
+      StartStream(),
+      AddData(inputData, "a", "b"), // should remove state for "a" and not return anything for a
+      CheckLastBatch(("b", "2")),
+      assertNumStateRows(total = 1, updated = 2),
+      StopStream,
+      StartStream(),
+      AddData(inputData, "a", "c"), // should recreate state for "a" and return count as 1 and
+      CheckLastBatch(("a", "1"), ("c", "1")),
+      assertNumStateRows(total = 3, updated = 2)
+    )
+  }
+
+  test("flatMapGroupsWithState - streaming + func returns iterator that updates state lazily") {
+    // Function to maintain running count up to 2, and then remove the count
+    // Returns the data and the count if state is defined, otherwise does not return anything
+    // Additionally, it updates state lazily as the returned iterator get consumed
+    val stateFunc = (key: String, values: Iterator[String], state: KeyedState[RunningCount]) => {
+      values.flatMap { _ =>
+        val count = state.getOption.map(_.count).getOrElse(0L) + 1
+        if (count == 3) {
+          state.remove()
+          None
+        } else {
+          state.update(RunningCount(count))
+          Some((key, count.toString))
+        }
+      }
+    }
+
+    val inputData = MemoryStream[String]
+    val result =
+      inputData.toDS()
+        .groupByKey(x => x)
+        .flatMapGroupsWithState(stateFunc) // State: Int, Out: (Str, Str)
+
+    testStream(result, Append)(
+      AddData(inputData, "a", "a", "b"),
+      CheckLastBatch(("a", "1"), ("a", "2"), ("b", "1")),
+      StopStream,
+      StartStream(),
+      AddData(inputData, "a", "b"), // should remove state for "a" and not return anything for a
+      CheckLastBatch(("b", "2")),
+      StopStream,
+      StartStream(),
+      AddData(inputData, "a", "c"), // should recreate state for "a" and return count as 1 and
+      CheckLastBatch(("a", "1"), ("c", "1"))
+    )
+  }
+
+  test("flatMapGroupsWithState - batch") {
+    // Function that returns running count only if its even, otherwise does not return
+    val stateFunc = (key: String, values: Iterator[String], state: KeyedState[RunningCount]) => {
+      if (state.exists) throw new IllegalArgumentException("state.exists should be false")
+      Iterator((key, values.size))
+    }
+    checkAnswer(
+      Seq("a", "a", "b").toDS.groupByKey(x => x).flatMapGroupsWithState(stateFunc).toDF,
+      Seq(("a", 2), ("b", 1)).toDF)
+  }
+
+  test("mapGroupsWithState - streaming") {
+    // Function to maintain running count up to 2, and then remove the count
+    // Returns the data and the count (-1 if count reached beyond 2 and state was just removed)
+    val stateFunc = (key: String, values: Iterator[String], state: KeyedState[RunningCount]) => {
+
+      val count = state.getOption.map(_.count).getOrElse(0L) + values.size
+      if (count == 3) {
+        state.remove()
+        (key, "-1")
+      } else {
+        state.update(RunningCount(count))
+        (key, count.toString)
+      }
+    }
+
+    val inputData = MemoryStream[String]
+    val result =
+      inputData.toDS()
+        .groupByKey(x => x)
+        .mapGroupsWithState(stateFunc) // Types = State: MyState, Out: (Str, Str)
+
+    testStream(result, Append)(
+      AddData(inputData, "a"),
+      CheckLastBatch(("a", "1")),
+      assertNumStateRows(total = 1, updated = 1),
+      AddData(inputData, "a", "b"),
+      CheckLastBatch(("a", "2"), ("b", "1")),
+      assertNumStateRows(total = 2, updated = 2),
+      StopStream,
+      StartStream(),
+      AddData(inputData, "a", "b"), // should remove state for "a" and return count as -1
+      CheckLastBatch(("a", "-1"), ("b", "2")),
+      assertNumStateRows(total = 1, updated = 2),
+      StopStream,
+      StartStream(),
+      AddData(inputData, "a", "c"), // should recreate state for "a" and return count as 1
+      CheckLastBatch(("a", "1"), ("c", "1")),
+      assertNumStateRows(total = 3, updated = 2)
+    )
+  }
+
+  test("mapGroupsWithState - streaming + aggregation") {
+    // Function to maintain running count up to 2, and then remove the count
+    // Returns the data and the count (-1 if count reached beyond 2 and state was just removed)
+    val stateFunc = (key: String, values: Iterator[String], state: KeyedState[RunningCount]) => {
+
+      val count = state.getOption.map(_.count).getOrElse(0L) + values.size
+      if (count == 3) {
+        state.remove()
+        (key, "-1")
+      } else {
+        state.update(RunningCount(count))
+        (key, count.toString)
+      }
+    }
+
+    val inputData = MemoryStream[String]
+    val result =
+      inputData.toDS()
+        .groupByKey(x => x)
+        .mapGroupsWithState(stateFunc) // Types = State: MyState, Out: (Str, Str)
+        .groupByKey(_._1)
+        .count()
+
+    testStream(result, Complete)(
+      AddData(inputData, "a"),
+      CheckLastBatch(("a", 1)),
+      AddData(inputData, "a", "b"),
+      // mapGroups generates ("a", "2"), ("b", "1"); so increases counts of a and b by 1
+      CheckLastBatch(("a", 2), ("b", 1)),
+      StopStream,
+      StartStream(),
+      AddData(inputData, "a", "b"),
+      // mapGroups should remove state for "a" and generate ("a", "-1"), ("b", "2") ;
+      // so increment a and b by 1
+      CheckLastBatch(("a", 3), ("b", 2)),
+      StopStream,
+      StartStream(),
+      AddData(inputData, "a", "c"),
+      // mapGroups should recreate state for "a" and generate ("a", "1"), ("c", "1") ;
+      // so increment a and c by 1
+      CheckLastBatch(("a", 4), ("b", 2), ("c", 1))
+    )
+  }
+
+  test("mapGroupsWithState - batch") {
+    val stateFunc = (key: String, values: Iterator[String], state: KeyedState[RunningCount]) => {
+      if (state.exists) throw new IllegalArgumentException("state.exists should be false")
+      (key, values.size)
+    }
+
+    checkAnswer(
+      spark.createDataset(Seq("a", "a", "b"))
+        .groupByKey(x => x)
+        .mapGroupsWithState(stateFunc)
+        .toDF,
+      spark.createDataset(Seq(("a", 2), ("b", 1))).toDF)
+  }
+
+  testQuietly("StateStore.abort on task failure handling") {
+    val stateFunc = (key: String, values: Iterator[String], state: KeyedState[RunningCount]) => {
+      if (MapGroupsWithStateSuite.failInTask) throw new Exception("expected failure")
+      val count = state.getOption.map(_.count).getOrElse(0L) + values.size
+      state.update(RunningCount(count))
+      (key, count)
+    }
+
+    val inputData = MemoryStream[String]
+    val result =
+      inputData.toDS()
+        .groupByKey(x => x)
+        .mapGroupsWithState(stateFunc) // Types = State: MyState, Out: (Str, Str)
+
+    def setFailInTask(value: Boolean): AssertOnQuery = AssertOnQuery { q =>
+      MapGroupsWithStateSuite.failInTask = value
+      true
+    }
+
+    testStream(result, Append)(
+      setFailInTask(false),
+      AddData(inputData, "a"),
+      CheckLastBatch(("a", 1L)),
+      AddData(inputData, "a"),
+      CheckLastBatch(("a", 2L)),
+      setFailInTask(true),
+      AddData(inputData, "a"),
+      ExpectFailure[SparkException](),   // task should fail but should not increment count
+      setFailInTask(false),
+      StartStream(),
+      CheckLastBatch(("a", 3L))     // task should not fail, and should show correct count
+    )
+  }
+
+  private def assertNumStateRows(total: Long, updated: Long): AssertOnQuery = AssertOnQuery { q =>
+    val progressWithData = q.recentProgress.filter(_.numInputRows > 0).lastOption.get
+    assert(progressWithData.stateOperators(0).numRowsTotal === total, "incorrect total rows")
+    assert(progressWithData.stateOperators(0).numRowsUpdated === updated, "incorrect updates rows")
+    true
+  }
+}
+
+object MapGroupsWithStateSuite {
+  var failInTask = true
+}

From b3fd36a15a0924b9de88dadc6e0acbe504ba4b96 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Thu, 9 Feb 2017 11:16:51 -0800
Subject: [PATCH 0502/1204] [SPARK-19481] [REPL] [MAVEN] Avoid to leak
 SparkContext in Signaling.cancelOnInterrupt

## What changes were proposed in this pull request?

`Signaling.cancelOnInterrupt` leaks a SparkContext per call and it makes ReplSuite unstable.

This PR adds `SparkContext.getActive` to allow `Signaling.cancelOnInterrupt` to get the active `SparkContext` to avoid the leak.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16825 from zsxwing/SPARK-19481.

(cherry picked from commit 303f00a4bf6660dd83c8bd9e3a107bb3438a421b)
Signed-off-by: Davies Liu <davies.liu@gmail.com>
---
 .../scala/org/apache/spark/SparkContext.scala |  7 +++++++
 .../scala/org/apache/spark/repl/Main.scala    |  1 +
 .../org/apache/spark/repl/SparkILoop.scala    |  1 -
 .../scala/org/apache/spark/repl/Main.scala    |  2 +-
 .../org/apache/spark/repl/Signaling.scala     | 20 ++++++++++---------
 5 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index b6aeeb9559ec8..11ad4423997f2 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -2339,6 +2339,13 @@ object SparkContext extends Logging {
     }
   }
 
+  /** Return the current active [[SparkContext]] if any. */
+  private[spark] def getActive: Option[SparkContext] = {
+    SPARK_CONTEXT_CONSTRUCTOR_LOCK.synchronized {
+      Option(activeContext.get())
+    }
+  }
+
   /**
    * Called at the beginning of the SparkContext constructor to ensure that no SparkContext is
    * running.  Throws an exception if a running context is detected and logs a warning if another
diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/Main.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/Main.scala
index 7b4e14bb6aa47..fba321be91886 100644
--- a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/Main.scala
+++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/Main.scala
@@ -22,6 +22,7 @@ import org.apache.spark.internal.Logging
 object Main extends Logging {
 
   initializeLogIfNecessary(true)
+  Signaling.cancelOnInterrupt()
 
   private var _interp: SparkILoop = _
 
diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala
index e017aa42a4c18..b7237a6ce822f 100644
--- a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala
+++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala
@@ -1027,7 +1027,6 @@ class SparkILoop(
       builder.getOrCreate()
     }
     sparkContext = sparkSession.sparkContext
-    Signaling.cancelOnInterrupt(sparkContext)
     sparkSession
   }
 
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
index fec4d49379591..7f2ec01cc9676 100644
--- a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
@@ -30,6 +30,7 @@ import org.apache.spark.util.Utils
 object Main extends Logging {
 
   initializeLogIfNecessary(true)
+  Signaling.cancelOnInterrupt()
 
   val conf = new SparkConf()
   val rootDir = conf.getOption("spark.repl.classdir").getOrElse(Utils.getLocalDir(conf))
@@ -108,7 +109,6 @@ object Main extends Logging {
       logInfo("Created Spark session")
     }
     sparkContext = sparkSession.sparkContext
-    Signaling.cancelOnInterrupt(sparkContext)
     sparkSession
   }
 
diff --git a/repl/src/main/scala/org/apache/spark/repl/Signaling.scala b/repl/src/main/scala/org/apache/spark/repl/Signaling.scala
index 202febf144626..9577e0ecaa2ef 100644
--- a/repl/src/main/scala/org/apache/spark/repl/Signaling.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/Signaling.scala
@@ -28,15 +28,17 @@ private[repl] object Signaling extends Logging {
    * when no jobs are currently running.
    * This makes it possible to interrupt a running shell job by pressing Ctrl+C.
    */
-  def cancelOnInterrupt(ctx: SparkContext): Unit = SignalUtils.register("INT") {
-    if (!ctx.statusTracker.getActiveJobIds().isEmpty) {
-      logWarning("Cancelling all active jobs, this can take a while. " +
-        "Press Ctrl+C again to exit now.")
-      ctx.cancelAllJobs()
-      true
-    } else {
-      false
-    }
+  def cancelOnInterrupt(): Unit = SignalUtils.register("INT") {
+    SparkContext.getActive.map { ctx =>
+      if (!ctx.statusTracker.getActiveJobIds().isEmpty) {
+        logWarning("Cancelling all active jobs, this can take a while. " +
+          "Press Ctrl+C again to exit now.")
+        ctx.cancelAllJobs()
+        true
+      } else {
+        false
+      }
+    }.getOrElse(false)
   }
 
 }

From a3d5300a030fb5f1c275e671603e0745b6466735 Mon Sep 17 00:00:00 2001
From: Stan Zhai <mail@zhaishidan.cn>
Date: Thu, 9 Feb 2017 21:01:25 +0100
Subject: [PATCH 0503/1204] [SPARK-19509][SQL] Grouping Sets do not respect
 nullable grouping columns

## What changes were proposed in this pull request?
The analyzer currently does not check if a column used in grouping sets is actually nullable itself. This can cause the nullability of the column to be incorrect, which can cause null pointer exceptions down the line. This PR fixes that by also consider the nullability of the column.

This is only a problem for Spark 2.1 and below. The latest master uses a different approach.

Closes https://github.com/apache/spark/pull/16874

## How was this patch tested?
Added a regression test to `SQLQueryTestSuite.grouping_set`.

Author: Herman van Hovell <hvanhovell@databricks.com>

Closes #16873 from hvanhovell/SPARK-19509.
---
 .../sql/catalyst/analysis/Analyzer.scala      |  3 +-
 .../sql-tests/inputs/grouping_set.sql         | 12 ++++-
 .../sql-tests/results/grouping_set.sql.out    | 53 +++++++++++++++----
 3 files changed, 56 insertions(+), 12 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 441c891b2c51a..f41e43431ac1d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -307,7 +307,8 @@ class Analyzer(
 
         val attrLength = groupByAliases.length
         val expandedAttributes = groupByAliases.zipWithIndex.map { case (a, idx) =>
-          a.toAttribute.withNullability(((nullBitmask >> (attrLength - idx - 1)) & 1) == 1)
+          val canBeNull = ((nullBitmask >> (attrLength - idx - 1)) & 1) == 1
+          a.toAttribute.withNullability(a.nullable || canBeNull)
         }
 
         val expand = Expand(x.bitmasks, groupByAliases, expandedAttributes, gid, x.child)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql b/sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql
index 3594283505280..2b54658a0710f 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql
@@ -2,7 +2,12 @@ CREATE TEMPORARY VIEW grouping AS SELECT * FROM VALUES
   ("1", "2", "3", 1),
   ("4", "5", "6", 1),
   ("7", "8", "9", 1)
-  as grouping(a, b, c, d);
+  AS grouping(a, b, c, d);
+
+CREATE TEMPORARY VIEW grouping_null AS SELECT * FROM VALUES
+  CAST(NULL AS STRING),
+  CAST(NULL AS STRING)
+  AS T(e);
 
 -- SPARK-17849: grouping set throws NPE #1
 SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS (());
@@ -13,5 +18,8 @@ SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((a));
 -- SPARK-17849: grouping set throws NPE #3
 SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((c));
 
+-- SPARK-19509: grouping set should honor input nullability
+SELECT COUNT(1) FROM grouping_null GROUP BY e GROUPING SETS (e);
 
-
+DROP VIEW IF EXISTS grouping;
+DROP VIEW IF EXISTS grouping_null;
diff --git a/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out b/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out
index edb38a52b7514..a9c056555dcda 100644
--- a/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 4
+-- Number of queries: 8
 
 
 -- !query 0
@@ -7,7 +7,7 @@ CREATE TEMPORARY VIEW grouping AS SELECT * FROM VALUES
   ("1", "2", "3", 1),
   ("4", "5", "6", 1),
   ("7", "8", "9", 1)
-  as grouping(a, b, c, d)
+  AS grouping(a, b, c, d)
 -- !query 0 schema
 struct<>
 -- !query 0 output
@@ -15,28 +15,63 @@ struct<>
 
 
 -- !query 1
-SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS (())
+CREATE TEMPORARY VIEW grouping_null AS SELECT * FROM VALUES
+  CAST(NULL AS STRING),
+  CAST(NULL AS STRING)
+  AS T(e)
 -- !query 1 schema
-struct<a:string,b:string,c:string,count(d):bigint>
+struct<>
 -- !query 1 output
-NULL	NULL	NULL	3
+
 
 
 -- !query 2
-SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((a))
+SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS (())
 -- !query 2 schema
 struct<a:string,b:string,c:string,count(d):bigint>
 -- !query 2 output
+NULL	NULL	NULL	3
+
+
+-- !query 3
+SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((a))
+-- !query 3 schema
+struct<a:string,b:string,c:string,count(d):bigint>
+-- !query 3 output
 1	NULL	NULL	1
 4	NULL	NULL	1
 7	NULL	NULL	1
 
 
--- !query 3
+-- !query 4
 SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((c))
--- !query 3 schema
+-- !query 4 schema
 struct<a:string,b:string,c:string,count(d):bigint>
--- !query 3 output
+-- !query 4 output
 NULL	NULL	3	1
 NULL	NULL	6	1
 NULL	NULL	9	1
+
+
+-- !query 5
+SELECT COUNT(1) FROM grouping_null GROUP BY e GROUPING SETS (e)
+-- !query 5 schema
+struct<count(1):bigint>
+-- !query 5 output
+2
+
+
+-- !query 6
+DROP VIEW IF EXISTS grouping
+-- !query 6 schema
+struct<>
+-- !query 6 output
+
+
+
+-- !query 7
+DROP VIEW IF EXISTS grouping_null
+-- !query 7 schema
+struct<>
+-- !query 7 output
+

From ff5818b8cee7c718ef5bdef125c8d6971d64acde Mon Sep 17 00:00:00 2001
From: Bogdan Raducanu <bogdan@databricks.com>
Date: Fri, 10 Feb 2017 10:50:07 +0100
Subject: [PATCH 0504/1204] [SPARK-19512][BACKPORT-2.1][SQL] codegen for
 compare structs fails #16852

## What changes were proposed in this pull request?

Set currentVars to null in GenerateOrdering.genComparisons before genCode is called. genCode ignores INPUT_ROW if currentVars is not null and in genComparisons we want it to use INPUT_ROW.

## How was this patch tested?

Added test with 2 queries in WholeStageCodegenSuite

Author: Bogdan Raducanu <bogdan.rdc@gmail.com>

Closes #16875 from bogdanrdc/SPARK-19512-2.1.
---
 .../expressions/codegen/CodeGenerator.scala        |  2 --
 .../expressions/codegen/GenerateOrdering.scala     | 14 ++++++++++++--
 .../sql/execution/WholeStageCodegenSuite.scala     | 12 ++++++++++++
 3 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 891c1aafbfb77..683b9cbb343c8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -555,7 +555,6 @@ class CodegenContext {
       addNewFunction(compareFunc, funcCode)
       s"this.$compareFunc($c1, $c2)"
     case schema: StructType =>
-      INPUT_ROW = "i"
       val comparisons = GenerateOrdering.genComparisons(this, schema)
       val compareFunc = freshName("compareStruct")
       val funcCode: String =
@@ -566,7 +565,6 @@ class CodegenContext {
             if (a instanceof UnsafeRow && b instanceof UnsafeRow && a.equals(b)) {
               return 0;
             }
-            InternalRow i = null;
             $comparisons
             return 0;
           }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
index b7335f12b64b1..f7fc2d54a047b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
@@ -73,7 +73,12 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
    */
   def genComparisons(ctx: CodegenContext, ordering: Seq[SortOrder]): String = {
     val comparisons = ordering.map { order =>
+      val oldCurrentVars = ctx.currentVars
+      ctx.INPUT_ROW = "i"
+      // to use INPUT_ROW we must make sure currentVars is null
+      ctx.currentVars = null
       val eval = order.child.genCode(ctx)
+      ctx.currentVars = oldCurrentVars
       val asc = order.isAscending
       val isNullA = ctx.freshName("isNullA")
       val primitiveA = ctx.freshName("primitiveA")
@@ -119,7 +124,7 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
       """
     }
 
-    ctx.splitExpressions(
+    val code = ctx.splitExpressions(
       expressions = comparisons,
       funcName = "compare",
       arguments = Seq(("InternalRow", "a"), ("InternalRow", "b")),
@@ -142,6 +147,12 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
           """
         }.mkString
       })
+    // make sure INPUT_ROW is declared even if splitExpressions
+    // returns an inlined block
+    s"""
+       |InternalRow ${ctx.INPUT_ROW} = null;
+       |$code
+     """.stripMargin
   }
 
   protected def create(ordering: Seq[SortOrder]): BaseOrdering = {
@@ -165,7 +176,6 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
         ${ctx.declareAddedFunctions()}
 
         public int compare(InternalRow a, InternalRow b) {
-          InternalRow ${ctx.INPUT_ROW} = null;  // Holds current row being evaluated.
           $comparisons
           return 0;
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
index f26e5e7b6990d..9f6ef032d5f4a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
@@ -113,4 +113,16 @@ class WholeStageCodegenSuite extends SparkPlanTest with SharedSQLContext {
         p.asInstanceOf[WholeStageCodegenExec].child.isInstanceOf[HashAggregateExec]).isDefined)
     assert(ds.collect() === Array(("a", 10.0), ("b", 3.0), ("c", 1.0)))
   }
+
+  test("SPARK-19512 codegen for comparing structs is incorrect") {
+    // this would raise CompileException before the fix
+    spark.range(10)
+      .selectExpr("named_struct('a', id) as col1", "named_struct('a', id+2) as col2")
+      .filter("col1 = col2").count()
+    // this would raise java.lang.IndexOutOfBoundsException before the fix
+    spark.range(10)
+      .selectExpr("named_struct('a', id, 'b', id) as col1",
+        "named_struct('a',id+2, 'b',id+2) as col2")
+      .filter("col1 = col2").count()
+  }
 }

From 7b5ea000e246f7052e7324fd7f2e99f32aaece17 Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Fri, 10 Feb 2017 12:55:06 +0100
Subject: [PATCH 0505/1204] [SPARK-19543] from_json fails when the input row is
 empty

## What changes were proposed in this pull request?

Using from_json on a column with an empty string results in: java.util.NoSuchElementException: head of empty list.

This is because `parser.parse(input)` may return `Nil` when `input.trim.isEmpty`

## How was this patch tested?

Regression test in `JsonExpressionsSuite`

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #16881 from brkyvz/json-fix.

(cherry picked from commit d5593f7f5794bd0343e783ac4957864fed9d1b38)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../spark/sql/catalyst/expressions/jsonExpressions.scala  | 2 +-
 .../sql/catalyst/expressions/JsonExpressionsSuite.scala   | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index 92d0888fc6eea..abd7696a58c02 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -497,7 +497,7 @@ case class JsonToStruct(schema: StructType, options: Map[String, String], child:
   override def dataType: DataType = schema
 
   override def nullSafeEval(json: Any): Any = {
-    try parser.parse(json.toString).head catch {
+    try parser.parse(json.toString).headOption.orNull catch {
       case _: SparkSQLJsonProcessingException => null
     }
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
index 618b8b29e8ee5..8e20bd1d97248 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
@@ -376,6 +376,14 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     )
   }
 
+  test("SPARK-19543: from_json empty input column") {
+    val schema = StructType(StructField("a", IntegerType) :: Nil)
+    checkEvaluation(
+      JsonToStruct(schema, Map.empty, Literal.create(" ", StringType)),
+      null
+    )
+  }
+
   test("to_json") {
     val schema = StructType(StructField("a", IntegerType) :: Nil)
     val struct = Literal.create(create_row(1), schema)

From e580bb035236dd92ade126af6bb98288d88179c4 Mon Sep 17 00:00:00 2001
From: Andrew Ray <ray.andrew@gmail.com>
Date: Tue, 13 Dec 2016 15:49:22 +0800
Subject: [PATCH 0506/1204] [SPARK-18717][SQL] Make code generation for Scala
 Map work with immutable.Map also

## What changes were proposed in this pull request?

Fixes compile errors in generated code when user has case class with a `scala.collections.immutable.Map` instead of a `scala.collections.Map`. Since ArrayBasedMapData.toScalaMap returns the immutable version we can make it work with both.

## How was this patch tested?

Additional unit tests.

Author: Andrew Ray <ray.andrew@gmail.com>

Closes #16161 from aray/fix-map-codegen.

(cherry picked from commit 46d30ac4846b3ec94426cc482c42cff72ebd6d92)
Signed-off-by: Cheng Lian <lian@databricks.com>
---
 .../apache/spark/sql/catalyst/ScalaReflection.scala  |  2 +-
 .../scala/org/apache/spark/sql/DatasetSuite.scala    | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index 0aa21b9347a9d..fa1b900592b67 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -342,7 +342,7 @@ object ScalaReflection extends ScalaReflection {
 
         StaticInvoke(
           ArrayBasedMapData.getClass,
-          ObjectType(classOf[Map[_, _]]),
+          ObjectType(classOf[scala.collection.immutable.Map[_, _]]),
           "toScalaMap",
           keyData :: valueData :: Nil)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index bdf6264bd8f26..381652d33796d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -1063,8 +1063,20 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
     // sizeInBytes is 2404280404, before the fix, it overflows to a negative number
     assert(sizeInBytes > 0)
   }
+
+  test("SPARK-18717: code generation works for both scala.collection.Map" +
+    " and scala.collection.imutable.Map") {
+    val ds = Seq(WithImmutableMap("hi", Map(42L -> "foo"))).toDS
+    checkDataset(ds.map(t => t), WithImmutableMap("hi", Map(42L -> "foo")))
+
+    val ds2 = Seq(WithMap("hi", Map(42L -> "foo"))).toDS
+    checkDataset(ds2.map(t => t), WithMap("hi", Map(42L -> "foo")))
+  }
 }
 
+case class WithImmutableMap(id: String, map_test: scala.collection.immutable.Map[Long, String])
+case class WithMap(id: String, map_test: scala.collection.Map[Long, String])
+
 case class Generic[T](id: T, value: Double)
 
 case class OtherTuple(_1: String, _2: Int)

From 173c2387a38b260b46d7646b332e404f6ebe1a17 Mon Sep 17 00:00:00 2001
From: titicaca <fangzhou.yang@hotmail.com>
Date: Sun, 12 Feb 2017 10:42:15 -0800
Subject: [PATCH 0507/1204] [SPARK-19342][SPARKR] bug fixed in collect method
 for collecting timestamp column

## What changes were proposed in this pull request?

Fix a bug in collect method for collecting timestamp column, the bug can be reproduced as shown in the following codes and outputs:

```
library(SparkR)
sparkR.session(master = "local")
df <- data.frame(col1 = c(0, 1, 2),
                 col2 = c(as.POSIXct("2017-01-01 00:00:01"), NA, as.POSIXct("2017-01-01 12:00:01")))

sdf1 <- createDataFrame(df)
print(dtypes(sdf1))
df1 <- collect(sdf1)
print(lapply(df1, class))

sdf2 <- filter(sdf1, "col1 > 0")
print(dtypes(sdf2))
df2 <- collect(sdf2)
print(lapply(df2, class))
```

As we can see from the printed output, the column type of col2 in df2 is converted to numeric unexpectedly, when NA exists at the top of the column.

This is caused by method `do.call(c, list)`, if we convert a list, i.e. `do.call(c, list(NA, as.POSIXct("2017-01-01 12:00:01"))`, the class of the result is numeric instead of POSIXct.

Therefore, we need to cast the data type of the vector explicitly.

## How was this patch tested?

The patch can be tested manually with the same code above.

Author: titicaca <fangzhou.yang@hotmail.com>

Closes #16689 from titicaca/sparkr-dev.

(cherry picked from commit bc0a0e6392c4e729d8f0e4caffc0bd05adb0d950)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/R/DataFrame.R                       |  3 +-
 R/pkg/R/types.R                           |  2 +-
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 42 +++++++++++++++++++++--
 3 files changed, 43 insertions(+), 4 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index c960b45d99976..c4147c515c590 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -417,7 +417,7 @@ setMethod("coltypes",
                   type <- PRIMITIVE_TYPES[[specialtype]]
                 }
               }
-              type
+              type[[1]]
             })
 
             # Find which types don't have mapping to R
@@ -1132,6 +1132,7 @@ setMethod("collect",
                   if (!is.null(PRIMITIVE_TYPES[[colType]]) && colType != "binary") {
                     vec <- do.call(c, col)
                     stopifnot(class(vec) != "list")
+                    class(vec) <- PRIMITIVE_TYPES[[colType]]
                     df[[colIndex]] <- vec
                   } else {
                     df[[colIndex]] <- col
diff --git a/R/pkg/R/types.R b/R/pkg/R/types.R
index abca703617c7b..ade0f05c02542 100644
--- a/R/pkg/R/types.R
+++ b/R/pkg/R/types.R
@@ -29,7 +29,7 @@ PRIMITIVE_TYPES <- as.environment(list(
   "string" = "character",
   "binary" = "raw",
   "boolean" = "logical",
-  "timestamp" = "POSIXct",
+  "timestamp" = c("POSIXct", "POSIXt"),
   "date" = "Date",
   # following types are not SQL types returned by dtypes(). They are listed here for usage
   # by checkType() in schema.R.
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 2d0439e13e002..418e1ef23c9ad 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1297,9 +1297,9 @@ test_that("column functions", {
 
   # Test first(), last()
   df <- read.json(jsonPath)
-  expect_equal(collect(select(df, first(df$age)))[[1]], NA)
+  expect_equal(collect(select(df, first(df$age)))[[1]], NA_real_)
   expect_equal(collect(select(df, first(df$age, TRUE)))[[1]], 30)
-  expect_equal(collect(select(df, first("age")))[[1]], NA)
+  expect_equal(collect(select(df, first("age")))[[1]], NA_real_)
   expect_equal(collect(select(df, first("age", TRUE)))[[1]], 30)
   expect_equal(collect(select(df, last(df$age)))[[1]], 19)
   expect_equal(collect(select(df, last(df$age, TRUE)))[[1]], 19)
@@ -2767,6 +2767,44 @@ test_that("Call DataFrameWriter.load() API in Java without path and check argume
                  "Unnamed arguments ignored: 2, 3, a.")
 })
 
+test_that("Collect on DataFrame when NAs exists at the top of a timestamp column", {
+  ldf <- data.frame(col1 = c(0, 1, 2),
+                   col2 = c(as.POSIXct("2017-01-01 00:00:01"),
+                            NA,
+                            as.POSIXct("2017-01-01 12:00:01")),
+                   col3 = c(as.POSIXlt("2016-01-01 00:59:59"),
+                            NA,
+                            as.POSIXlt("2016-01-01 12:01:01")))
+  sdf1 <- createDataFrame(ldf)
+  ldf1 <- collect(sdf1)
+  expect_equal(dtypes(sdf1), list(c("col1", "double"),
+                                  c("col2", "timestamp"),
+                                  c("col3", "timestamp")))
+  expect_equal(class(ldf1$col1), "numeric")
+  expect_equal(class(ldf1$col2), c("POSIXct", "POSIXt"))
+  expect_equal(class(ldf1$col3), c("POSIXct", "POSIXt"))
+
+  # Columns with NAs at the top
+  sdf2 <- filter(sdf1, "col1 > 1")
+  ldf2 <- collect(sdf2)
+  expect_equal(dtypes(sdf2), list(c("col1", "double"),
+                                  c("col2", "timestamp"),
+                                  c("col3", "timestamp")))
+  expect_equal(class(ldf2$col1), "numeric")
+  expect_equal(class(ldf2$col2), c("POSIXct", "POSIXt"))
+  expect_equal(class(ldf2$col3), c("POSIXct", "POSIXt"))
+
+  # Columns with only NAs, the type will also be cast to PRIMITIVE_TYPE
+  sdf3 <- filter(sdf1, "col1 == 0")
+  ldf3 <- collect(sdf3)
+  expect_equal(dtypes(sdf3), list(c("col1", "double"),
+                                  c("col2", "timestamp"),
+                                  c("col3", "timestamp")))
+  expect_equal(class(ldf3$col1), "numeric")
+  expect_equal(class(ldf3$col2), c("POSIXct", "POSIXt"))
+  expect_equal(class(ldf3$col3), c("POSIXct", "POSIXt"))
+})
+
 unlink(parquetPath)
 unlink(orcPath)
 unlink(jsonPath)

From 06e77e0097c6fa0accc5d9d6ce08a65a3828b878 Mon Sep 17 00:00:00 2001
From: "wm624@hotmail.com" <wm624@hotmail.com>
Date: Sun, 12 Feb 2017 10:48:55 -0800
Subject: [PATCH 0508/1204] [SPARK-19319][BACKPORT-2.1][SPARKR] SparkR Kmeans
 summary returns error when the cluster size doesn't equal to k

## What changes were proposed in this pull request?

Backport fix of #16666

## How was this patch tested?

Backport unit tests

Author: wm624@hotmail.com <wm624@hotmail.com>

Closes #16761 from wangmiao1981/kmeansport.
---
 R/pkg/R/mllib.R                               | 29 ++++++++++++++-----
 R/pkg/inst/tests/testthat/test_mllib.R        | 27 +++++++++++++++++
 .../org/apache/spark/ml/r/KMeansWrapper.scala | 11 ++++++-
 3 files changed, 58 insertions(+), 9 deletions(-)

diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 91ce669814d87..1ddfa30d7fa70 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -599,6 +599,10 @@ setMethod("summary", signature(object = "IsotonicRegressionModel"),
 #' @param k number of centers.
 #' @param maxIter maximum iteration number.
 #' @param initMode the initialization algorithm choosen to fit the model.
+#' @param seed the random seed for cluster initialization
+#' @param initSteps the number of steps for the k-means|| initialization mode.
+#'                  This is an advanced setting, the default of 2 is almost always enough. Must be > 0.
+#' @param tol convergence tolerance of iterations.
 #' @param ... additional argument(s) passed to the method.
 #' @return \code{spark.kmeans} returns a fitted k-means model.
 #' @rdname spark.kmeans
@@ -628,11 +632,16 @@ setMethod("summary", signature(object = "IsotonicRegressionModel"),
 #' @note spark.kmeans since 2.0.0
 #' @seealso \link{predict}, \link{read.ml}, \link{write.ml}
 setMethod("spark.kmeans", signature(data = "SparkDataFrame", formula = "formula"),
-          function(data, formula, k = 2, maxIter = 20, initMode = c("k-means||", "random")) {
+          function(data, formula, k = 2, maxIter = 20, initMode = c("k-means||", "random"),
+                   seed = NULL, initSteps = 2, tol = 1E-4) {
             formula <- paste(deparse(formula), collapse = "")
             initMode <- match.arg(initMode)
+            if (!is.null(seed)) {
+              seed <- as.character(as.integer(seed))
+            }
             jobj <- callJStatic("org.apache.spark.ml.r.KMeansWrapper", "fit", data@sdf, formula,
-                                as.integer(k), as.integer(maxIter), initMode)
+                                as.integer(k), as.integer(maxIter), initMode, seed,
+                                as.integer(initSteps), as.numeric(tol))
             new("KMeansModel", jobj = jobj)
           })
 
@@ -671,10 +680,13 @@ setMethod("fitted", signature(object = "KMeansModel"),
 
 #' @param object a fitted k-means model.
 #' @return \code{summary} returns summary information of the fitted model, which is a list.
-#'         The list includes the model's \code{k} (number of cluster centers),
+#'         The list includes the model's \code{k} (the configured number of cluster centers),
 #'         \code{coefficients} (model cluster centers),
-#'         \code{size} (number of data points in each cluster), and \code{cluster}
-#'         (cluster centers of the transformed data).
+#'         \code{size} (number of data points in each cluster), \code{cluster}
+#'         (cluster centers of the transformed data), {is.loaded} (whether the model is loaded
+#'         from a saved file), and \code{clusterSize}
+#'         (the actual number of cluster centers. When using initMode = "random",
+#'         \code{clusterSize} may not equal to \code{k}).
 #' @rdname spark.kmeans
 #' @export
 #' @note summary(KMeansModel) since 2.0.0
@@ -686,16 +698,17 @@ setMethod("summary", signature(object = "KMeansModel"),
             coefficients <- callJMethod(jobj, "coefficients")
             k <- callJMethod(jobj, "k")
             size <- callJMethod(jobj, "size")
-            coefficients <- t(matrix(coefficients, ncol = k))
+            clusterSize <- callJMethod(jobj, "clusterSize")
+            coefficients <- t(matrix(coefficients, ncol = clusterSize))
             colnames(coefficients) <- unlist(features)
-            rownames(coefficients) <- 1:k
+            rownames(coefficients) <- 1:clusterSize
             cluster <- if (is.loaded) {
               NULL
             } else {
               dataFrame(callJMethod(jobj, "cluster"))
             }
             list(k = k, coefficients = coefficients, size = size,
-                 cluster = cluster, is.loaded = is.loaded)
+                 cluster = cluster, is.loaded = is.loaded, clusterSize = clusterSize)
           })
 
 #  Predicted values based on a k-means model
diff --git a/R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/testthat/test_mllib.R
index 3891f0044d4f5..8fe3a87f6faa1 100644
--- a/R/pkg/inst/tests/testthat/test_mllib.R
+++ b/R/pkg/inst/tests/testthat/test_mllib.R
@@ -375,6 +375,33 @@ test_that("spark.kmeans", {
   expect_true(summary2$is.loaded)
 
   unlink(modelPath)
+
+  # Test Kmeans on dataset that is sensitive to seed value
+  col1 <- c(1, 2, 3, 4, 0, 1, 2, 3, 4, 0)
+  col2 <- c(1, 2, 3, 4, 0, 1, 2, 3, 4, 0)
+  col3 <- c(1, 2, 3, 4, 0, 1, 2, 3, 4, 0)
+  cols <- as.data.frame(cbind(col1, col2, col3))
+  df <- createDataFrame(cols)
+
+  model1 <- spark.kmeans(data = df, ~ ., k = 5, maxIter = 10,
+                         initMode = "random", seed = 1, tol = 1E-5)
+  model2 <- spark.kmeans(data = df, ~ ., k = 5, maxIter = 10,
+                         initMode = "random", seed = 22222, tol = 1E-5)
+
+  summary.model1 <- summary(model1)
+  summary.model2 <- summary(model2)
+  cluster1 <- summary.model1$cluster
+  cluster2 <- summary.model2$cluster
+  clusterSize1 <- summary.model1$clusterSize
+  clusterSize2 <- summary.model2$clusterSize
+
+  # The predicted clusters are different
+  expect_equal(sort(collect(distinct(select(cluster1, "prediction")))$prediction),
+               c(0, 1, 2, 3))
+  expect_equal(sort(collect(distinct(select(cluster2, "prediction")))$prediction),
+               c(0, 1, 2))
+  expect_equal(clusterSize1, 4)
+  expect_equal(clusterSize2, 3)
 })
 
 test_that("spark.mlp", {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala
index ea9458525aa31..8d596863b459e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/KMeansWrapper.scala
@@ -43,6 +43,8 @@ private[r] class KMeansWrapper private (
 
   lazy val cluster: DataFrame = kMeansModel.summary.cluster
 
+  lazy val clusterSize: Int = kMeansModel.clusterCenters.size
+
   def fitted(method: String): DataFrame = {
     if (method == "centers") {
       kMeansModel.summary.predictions.drop(kMeansModel.getFeaturesCol)
@@ -68,7 +70,10 @@ private[r] object KMeansWrapper extends MLReadable[KMeansWrapper] {
       formula: String,
       k: Int,
       maxIter: Int,
-      initMode: String): KMeansWrapper = {
+      initMode: String,
+      seed: String,
+      initSteps: Int,
+      tol: Double): KMeansWrapper = {
 
     val rFormula = new RFormula()
       .setFormula(formula)
@@ -87,6 +92,10 @@ private[r] object KMeansWrapper extends MLReadable[KMeansWrapper] {
       .setMaxIter(maxIter)
       .setInitMode(initMode)
       .setFeaturesCol(rFormula.getFeaturesCol)
+      .setInitSteps(initSteps)
+      .setTol(tol)
+
+    if (seed != null && seed.length > 0) kMeans.setSeed(seed.toInt)
 
     val pipeline = new Pipeline()
       .setStages(Array(rFormulaModel, kMeans))

From fe4fcc5701cbd3f2e698e00f1cc7d49d5c7c702b Mon Sep 17 00:00:00 2001
From: Liwei Lin <lwlin7@gmail.com>
Date: Sun, 12 Feb 2017 23:00:22 -0800
Subject: [PATCH 0509/1204] [SPARK-19564][SPARK-19559][SS][KAFKA]
 KafkaOffsetReader's consumers should not be in the same group

## What changes were proposed in this pull request?

In `KafkaOffsetReader`, when error occurs, we abort the existing consumer and create a new consumer. In our current implementation, the first consumer and the second consumer would be in the same group (which leads to SPARK-19559), **_violating our intention of the two consumers not being in the same group._**

The cause is that, in our current implementation, the first consumer is created before `groupId` and `nextId` are initialized in the constructor. Then even if `groupId` and `nextId` are increased during the creation of that first consumer, `groupId` and `nextId` would still be initialized to default values in the constructor for the second consumer.

We should make sure that `groupId` and `nextId` are initialized before any consumer is created.

## How was this patch tested?

Ran 100 times of `KafkaSourceSuite`; all passed

Author: Liwei Lin <lwlin7@gmail.com>

Closes #16902 from lw-lin/SPARK-19564-.

(cherry picked from commit 2bdbc87052389ff69404347fbc69457132dbcafd)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../apache/spark/sql/kafka010/KafkaOffsetReader.scala | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
index 6b2fb3c112557..2696d6f089d2f 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
@@ -64,6 +64,13 @@ private[kafka010] class KafkaOffsetReader(
   })
   val execContext = ExecutionContext.fromExecutorService(kafkaReaderThread)
 
+  /**
+   * Place [[groupId]] and [[nextId]] here so that they are initialized before any consumer is
+   * created -- see SPARK-19564.
+   */
+  private var groupId: String = null
+  private var nextId = 0
+
   /**
    * A KafkaConsumer used in the driver to query the latest Kafka offsets. This only queries the
    * offsets and never commits them.
@@ -76,10 +83,6 @@ private[kafka010] class KafkaOffsetReader(
   private val offsetFetchAttemptIntervalMs =
     readerOptions.getOrElse("fetchOffset.retryIntervalMs", "1000").toLong
 
-  private var groupId: String = null
-
-  private var nextId = 0
-
   private def nextGroupId(): String = {
     groupId = driverGroupIdPrefix + "-" + nextId
     nextId += 1

From a3b6751375cf301dec156b85fe79e32b0797a24f Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Mon, 13 Feb 2017 11:18:31 +0000
Subject: [PATCH 0510/1204] [SPARK-19574][ML][DOCUMENTATION] Fix Liquid
 Exception: Start indices amount is not equal to end indices amount

### What changes were proposed in this pull request?
```
Liquid Exception: Start indices amount is not equal to end indices amount, see /Users/xiao/IdeaProjects/sparkDelivery/docs/../examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java. in ml-features.md
```

So far, the build is broken after merging https://github.com/apache/spark/pull/16789

This PR is to fix it.

## How was this patch tested?
Manual

Author: Xiao Li <gatorsmile@gmail.com>

Closes #16908 from gatorsmile/docMLFix.

(cherry picked from commit 855a1b7551c71b26ce7d9310342fefe0a87281ec)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../java/org/apache/spark/examples/ml/JavaTokenizerExample.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java
index 2fae07a189d77..f42fd3317b797 100644
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java
@@ -39,7 +39,7 @@
 // col("...") is preferable to df.col("...")
 import static org.apache.spark.sql.functions.callUDF;
 import static org.apache.spark.sql.functions.col;
-// $example off
+// $example off$
 
 public class JavaTokenizerExample {
   public static void main(String[] args) {

From ef4fb7ebca963eb95d6a8bf7543e05aa375edc23 Mon Sep 17 00:00:00 2001
From: zero323 <zero323@users.noreply.github.com>
Date: Mon, 13 Feb 2017 09:26:49 -0800
Subject: [PATCH 0511/1204] [SPARK-19506][ML][PYTHON] Import warnings in
 pyspark.ml.util

## What changes were proposed in this pull request?

Add missing `warnings` import.

## How was this patch tested?

Manual tests.

Author: zero323 <zero323@users.noreply.github.com>

Closes #16846 from zero323/SPARK-19506.

(cherry picked from commit 5e7cd3322b04f1dd207829b70546bc7ffdd63363)
Signed-off-by: Holden Karau <holden@us.ibm.com>
---
 python/pyspark/ml/util.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py
index c65b3d14be1df..02016f172aebc 100644
--- a/python/pyspark/ml/util.py
+++ b/python/pyspark/ml/util.py
@@ -17,6 +17,7 @@
 
 import sys
 import uuid
+import warnings
 
 if sys.version > '3':
     basestring = str

From c5a7cb0225ed4ed0d1ede5da0593b258c5dfd79f Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Mon, 13 Feb 2017 11:54:54 -0800
Subject: [PATCH 0512/1204] [SPARK-19542][SS] Delete the temp checkpoint if a
 query is stopped without errors

## What changes were proposed in this pull request?

When a query uses a temp checkpoint dir, it's better to delete it if it's stopped without errors.

## How was this patch tested?

New unit tests.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16880 from zsxwing/delete-temp-checkpoint.

(cherry picked from commit 3dbff9be06c2007fdb2ad4a1e113f3bc7fc06529)
Signed-off-by: Burak Yavuz <brkyvz@gmail.com>
---
 .../execution/streaming/StreamExecution.scala | 24 +++++++++++++++--
 .../sql/streaming/StreamingQueryManager.scala |  6 ++++-
 .../test/DataStreamReaderWriterSuite.scala    | 26 +++++++++++++++++++
 3 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index a35950e2dc17f..a8ec73e00b726 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.execution.streaming
 
+import java.io.IOException
 import java.util.UUID
 import java.util.concurrent.{CountDownLatch, TimeUnit}
 import java.util.concurrent.locks.ReentrantLock
@@ -41,16 +42,20 @@ import org.apache.spark.util.{Clock, UninterruptibleThread, Utils}
  * Unlike a standard query, a streaming query executes repeatedly each time new data arrives at any
  * [[Source]] present in the query plan. Whenever new data arrives, a [[QueryExecution]] is created
  * and the results are committed transactionally to the given [[Sink]].
+ *
+ * @param deleteCheckpointOnStop whether to delete the checkpoint if the query is stopped without
+ *                               errors
  */
 class StreamExecution(
     override val sparkSession: SparkSession,
     override val name: String,
-    checkpointRoot: String,
+    val checkpointRoot: String,
     analyzedPlan: LogicalPlan,
     val sink: Sink,
     val trigger: Trigger,
     val triggerClock: Clock,
-    val outputMode: OutputMode)
+    val outputMode: OutputMode,
+    deleteCheckpointOnStop: Boolean)
   extends StreamingQuery with ProgressReporter with Logging {
 
   import org.apache.spark.sql.streaming.StreamingQueryListener._
@@ -213,6 +218,7 @@ class StreamExecution(
    * has been posted to all the listeners.
    */
   def start(): Unit = {
+    logInfo(s"Starting $prettyIdString. Use $checkpointRoot to store the query checkpoint.")
     microBatchThread.setDaemon(true)
     microBatchThread.start()
     startLatch.await()  // Wait until thread started and QueryStart event has been posted
@@ -323,6 +329,20 @@ class StreamExecution(
         sparkSession.streams.notifyQueryTermination(StreamExecution.this)
         postEvent(
           new QueryTerminatedEvent(id, runId, exception.map(_.cause).map(Utils.exceptionString)))
+
+        // Delete the temp checkpoint only when the query didn't fail
+        if (deleteCheckpointOnStop && exception.isEmpty) {
+          val checkpointPath = new Path(checkpointRoot)
+          try {
+            val fs = checkpointPath.getFileSystem(sparkSession.sessionState.newHadoopConf())
+            fs.delete(checkpointPath, true)
+          } catch {
+            case NonFatal(e) =>
+              // Deleting temp checkpoint folder is best effort, don't throw non fatal exceptions
+              // when we cannot delete them.
+              logWarning(s"Cannot delete $checkpointPath", e)
+          }
+        }
       } finally {
         terminationLatch.countDown()
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index 0b9406b027f53..38edb40dfb781 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -195,6 +195,7 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) {
       recoverFromCheckpointLocation: Boolean,
       trigger: Trigger,
       triggerClock: Clock): StreamingQueryWrapper = {
+    var deleteCheckpointOnStop = false
     val checkpointLocation = userSpecifiedCheckpointLocation.map { userSpecified =>
       new Path(userSpecified).toUri.toString
     }.orElse {
@@ -203,6 +204,8 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) {
       }
     }.getOrElse {
       if (useTempCheckpointLocation) {
+        // Delete the temp checkpoint when a query is being stopped without errors.
+        deleteCheckpointOnStop = true
         Utils.createTempDir(namePrefix = s"temporary").getCanonicalPath
       } else {
         throw new AnalysisException(
@@ -244,7 +247,8 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) {
       sink,
       trigger,
       triggerClock,
-      outputMode))
+      outputMode,
+      deleteCheckpointOnStop))
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
index 097dd6e3679e2..f7519483f5a43 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
@@ -669,4 +669,30 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter with Pr
       }
     }
   }
+
+  test("temp checkpoint dir should be deleted if a query is stopped without errors") {
+    import testImplicits._
+    val query = MemoryStream[Int].toDS.writeStream.format("console").start()
+    val checkpointDir = new Path(
+      query.asInstanceOf[StreamingQueryWrapper].streamingQuery.checkpointRoot)
+    val fs = checkpointDir.getFileSystem(spark.sessionState.newHadoopConf())
+    assert(fs.exists(checkpointDir))
+    query.stop()
+    assert(!fs.exists(checkpointDir))
+  }
+
+  testQuietly("temp checkpoint dir should not be deleted if a query is stopped with an error") {
+    import testImplicits._
+    val input = MemoryStream[Int]
+    val query = input.toDS.map(_ / 0).writeStream.format("console").start()
+    val checkpointDir = new Path(
+      query.asInstanceOf[StreamingQueryWrapper].streamingQuery.checkpointRoot)
+    val fs = checkpointDir.getFileSystem(spark.sessionState.newHadoopConf())
+    assert(fs.exists(checkpointDir))
+    input.addData(1)
+    intercept[StreamingQueryException] {
+      query.awaitTermination()
+    }
+    assert(fs.exists(checkpointDir))
+  }
 }

From 328b229840d6e87c7faf7ee3cd5bf66a905c9a7d Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Mon, 13 Feb 2017 12:03:36 -0800
Subject: [PATCH 0513/1204] [SPARK-17714][CORE][TEST-MAVEN][TEST-HADOOP2.6]
 Avoid using ExecutorClassLoader to load Netty generated classes

## What changes were proposed in this pull request?

Netty's `MessageToMessageEncoder` uses [Javassist](https://github.com/netty/netty/blob/91a0bdc17a8298437d6de08a8958d753799bd4a6/common/src/main/java/io/netty/util/internal/JavassistTypeParameterMatcherGenerator.java#L62) to generate a matcher class and the implementation calls `Class.forName` to check if this class is already generated. If `MessageEncoder` or `MessageDecoder` is created in `ExecutorClassLoader.findClass`, it will cause `ClassCircularityError`. This is because loading this Netty generated class will call `ExecutorClassLoader.findClass` to search this class, and `ExecutorClassLoader` will try to use RPC to load it and cause to load the non-exist matcher class again. JVM will report `ClassCircularityError` to prevent such infinite recursion.

##### Why it only happens in Maven builds

It's because Maven and SBT have different class loader tree. The Maven build will set a URLClassLoader as the current context class loader to run the tests and expose this issue. The class loader tree is as following:

```
bootstrap class loader ------ ... ----- REPL class loader ---- ExecutorClassLoader
|
|
URLClasssLoader
```

The SBT build uses the bootstrap class loader directly and `ReplSuite.test("propagation of local properties")` is the first test in ReplSuite, which happens to load `io/netty/util/internal/__matchers__/org/apache/spark/network/protocol/MessageMatcher` into the bootstrap class loader (Note: in maven build, it's loaded into URLClasssLoader so it cannot be found in ExecutorClassLoader). This issue can be reproduced in SBT as well. Here are the produce steps:
- Enable `hadoop.caller.context.enabled`.
- Replace `Class.forName` with `Utils.classForName` in `object CallerContext`.
- Ignore `ReplSuite.test("propagation of local properties")`.
- Run `ReplSuite` using SBT.

This PR just creates a singleton MessageEncoder and MessageDecoder and makes sure they are created before switching to ExecutorClassLoader. TransportContext will be created when creating RpcEnv and that happens before creating ExecutorClassLoader.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16859 from zsxwing/SPARK-17714.

(cherry picked from commit 905fdf0c243e1776c54c01a25b17878361400225)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../spark/network/TransportContext.java       | 22 ++++++++++++++-----
 .../network/protocol/MessageDecoder.java      |  4 ++++
 .../network/protocol/MessageEncoder.java      |  4 ++++
 .../server/TransportChannelHandler.java       | 11 +++++-----
 .../apache/spark/network/ProtocolSuite.java   |  8 +++----
 .../scala/org/apache/spark/util/Utils.scala   | 16 ++++----------
 6 files changed, 38 insertions(+), 27 deletions(-)

diff --git a/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java b/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java
index 5b69e2bb03546..37ba543380f07 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java
@@ -62,8 +62,20 @@ public class TransportContext {
   private final RpcHandler rpcHandler;
   private final boolean closeIdleConnections;
 
-  private final MessageEncoder encoder;
-  private final MessageDecoder decoder;
+  /**
+   * Force to create MessageEncoder and MessageDecoder so that we can make sure they will be created
+   * before switching the current context class loader to ExecutorClassLoader.
+   *
+   * Netty's MessageToMessageEncoder uses Javassist to generate a matcher class and the
+   * implementation calls "Class.forName" to check if this calls is already generated. If the
+   * following two objects are created in "ExecutorClassLoader.findClass", it will cause
+   * "ClassCircularityError". This is because loading this Netty generated class will call
+   * "ExecutorClassLoader.findClass" to search this class, and "ExecutorClassLoader" will try to use
+   * RPC to load it and cause to load the non-exist matcher class again. JVM will report
+   * `ClassCircularityError` to prevent such infinite recursion. (See SPARK-17714)
+   */
+  private static final MessageEncoder ENCODER = MessageEncoder.INSTANCE;
+  private static final MessageDecoder DECODER = MessageDecoder.INSTANCE;
 
   public TransportContext(TransportConf conf, RpcHandler rpcHandler) {
     this(conf, rpcHandler, false);
@@ -75,8 +87,6 @@ public TransportContext(
       boolean closeIdleConnections) {
     this.conf = conf;
     this.rpcHandler = rpcHandler;
-    this.encoder = new MessageEncoder();
-    this.decoder = new MessageDecoder();
     this.closeIdleConnections = closeIdleConnections;
   }
 
@@ -135,9 +145,9 @@ public TransportChannelHandler initializePipeline(
     try {
       TransportChannelHandler channelHandler = createChannelHandler(channel, channelRpcHandler);
       channel.pipeline()
-        .addLast("encoder", encoder)
+        .addLast("encoder", ENCODER)
         .addLast(TransportFrameDecoder.HANDLER_NAME, NettyUtils.createFrameDecoder())
-        .addLast("decoder", decoder)
+        .addLast("decoder", DECODER)
         .addLast("idleStateHandler", new IdleStateHandler(0, 0, conf.connectionTimeoutMs() / 1000))
         // NOTE: Chunks are currently guaranteed to be returned in the order of request, but this
         // would require more logic to guarantee if this were not part of the same event loop.
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageDecoder.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageDecoder.java
index f0956438ade24..39a7495828a8a 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageDecoder.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageDecoder.java
@@ -35,6 +35,10 @@ public final class MessageDecoder extends MessageToMessageDecoder<ByteBuf> {
 
   private static final Logger logger = LoggerFactory.getLogger(MessageDecoder.class);
 
+  public static final MessageDecoder INSTANCE = new MessageDecoder();
+
+  private MessageDecoder() {}
+
   @Override
   public void decode(ChannelHandlerContext ctx, ByteBuf in, List<Object> out) {
     Message.Type msgType = Message.Type.decode(in);
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageEncoder.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageEncoder.java
index 276f16637efc9..997f74e1a21b4 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageEncoder.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageEncoder.java
@@ -35,6 +35,10 @@ public final class MessageEncoder extends MessageToMessageEncoder<Message> {
 
   private static final Logger logger = LoggerFactory.getLogger(MessageEncoder.class);
 
+  public static final MessageEncoder INSTANCE = new MessageEncoder();
+
+  private MessageEncoder() {}
+
   /***
    * Encodes a Message by invoking its encode() method. For non-data messages, we will add one
    * ByteBuf to 'out' containing the total frame length, the message type, and the message itself.
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java b/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java
index c6ccae18b5e06..56782a8327876 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java
@@ -18,7 +18,7 @@
 package org.apache.spark.network.server;
 
 import io.netty.channel.ChannelHandlerContext;
-import io.netty.channel.SimpleChannelInboundHandler;
+import io.netty.channel.ChannelInboundHandlerAdapter;
 import io.netty.handler.timeout.IdleState;
 import io.netty.handler.timeout.IdleStateEvent;
 import org.slf4j.Logger;
@@ -26,7 +26,6 @@
 
 import org.apache.spark.network.client.TransportClient;
 import org.apache.spark.network.client.TransportResponseHandler;
-import org.apache.spark.network.protocol.Message;
 import org.apache.spark.network.protocol.RequestMessage;
 import org.apache.spark.network.protocol.ResponseMessage;
 import static org.apache.spark.network.util.NettyUtils.getRemoteAddress;
@@ -48,7 +47,7 @@
  * on the channel for at least `requestTimeoutMs`. Note that this is duplex traffic; we will not
  * timeout if the client is continuously sending but getting no responses, for simplicity.
  */
-public class TransportChannelHandler extends SimpleChannelInboundHandler<Message> {
+public class TransportChannelHandler extends ChannelInboundHandlerAdapter {
   private static final Logger logger = LoggerFactory.getLogger(TransportChannelHandler.class);
 
   private final TransportClient client;
@@ -114,11 +113,13 @@ public void channelInactive(ChannelHandlerContext ctx) throws Exception {
   }
 
   @Override
-  public void channelRead0(ChannelHandlerContext ctx, Message request) throws Exception {
+  public void channelRead(ChannelHandlerContext ctx, Object request) throws Exception {
     if (request instanceof RequestMessage) {
       requestHandler.handle((RequestMessage) request);
-    } else {
+    } else if (request instanceof ResponseMessage) {
       responseHandler.handle((ResponseMessage) request);
+    } else {
+      ctx.fireChannelRead(request);
     }
   }
 
diff --git a/common/network-common/src/test/java/org/apache/spark/network/ProtocolSuite.java b/common/network-common/src/test/java/org/apache/spark/network/ProtocolSuite.java
index 6c8dd742f4b64..bb1c40c4b0e06 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/ProtocolSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/ProtocolSuite.java
@@ -49,11 +49,11 @@
 public class ProtocolSuite {
   private void testServerToClient(Message msg) {
     EmbeddedChannel serverChannel = new EmbeddedChannel(new FileRegionEncoder(),
-      new MessageEncoder());
+      MessageEncoder.INSTANCE);
     serverChannel.writeOutbound(msg);
 
     EmbeddedChannel clientChannel = new EmbeddedChannel(
-        NettyUtils.createFrameDecoder(), new MessageDecoder());
+        NettyUtils.createFrameDecoder(), MessageDecoder.INSTANCE);
 
     while (!serverChannel.outboundMessages().isEmpty()) {
       clientChannel.writeInbound(serverChannel.readOutbound());
@@ -65,11 +65,11 @@ private void testServerToClient(Message msg) {
 
   private void testClientToServer(Message msg) {
     EmbeddedChannel clientChannel = new EmbeddedChannel(new FileRegionEncoder(),
-      new MessageEncoder());
+      MessageEncoder.INSTANCE);
     clientChannel.writeOutbound(msg);
 
     EmbeddedChannel serverChannel = new EmbeddedChannel(
-        NettyUtils.createFrameDecoder(), new MessageDecoder());
+        NettyUtils.createFrameDecoder(), MessageDecoder.INSTANCE);
 
     while (!clientChannel.outboundMessages().isEmpty()) {
       serverChannel.writeInbound(clientChannel.readOutbound());
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 1319a4ce26f56..00b1b54f61a52 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2566,12 +2566,8 @@ private[util] object CallerContext extends Logging {
   val callerContextSupported: Boolean = {
     SparkHadoopUtil.get.conf.getBoolean("hadoop.caller.context.enabled", false) && {
       try {
-        // `Utils.classForName` will make `ReplSuite` fail with `ClassCircularityError` in
-        // master Maven build, so do not use it before resolving SPARK-17714.
-        // scalastyle:off classforname
-        Class.forName("org.apache.hadoop.ipc.CallerContext")
-        Class.forName("org.apache.hadoop.ipc.CallerContext$Builder")
-        // scalastyle:on classforname
+        Utils.classForName("org.apache.hadoop.ipc.CallerContext")
+        Utils.classForName("org.apache.hadoop.ipc.CallerContext$Builder")
         true
       } catch {
         case _: ClassNotFoundException =>
@@ -2633,12 +2629,8 @@ private[spark] class CallerContext(
   def setCurrentContext(): Unit = {
     if (CallerContext.callerContextSupported) {
       try {
-        // `Utils.classForName` will make `ReplSuite` fail with `ClassCircularityError` in
-        // master Maven build, so do not use it before resolving SPARK-17714.
-        // scalastyle:off classforname
-        val callerContext = Class.forName("org.apache.hadoop.ipc.CallerContext")
-        val builder = Class.forName("org.apache.hadoop.ipc.CallerContext$Builder")
-        // scalastyle:on classforname
+        val callerContext = Utils.classForName("org.apache.hadoop.ipc.CallerContext")
+        val builder = Utils.classForName("org.apache.hadoop.ipc.CallerContext$Builder")
         val builderInst = builder.getConstructor(classOf[String]).newInstance(context)
         val hdfsContext = builder.getMethod("build").invoke(builderInst)
         callerContext.getMethod("setCurrent", callerContext).invoke(null, hdfsContext)

From 2968d8c0666801fb6a363dfca3c5a85ee8a1cc0c Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Mon, 13 Feb 2017 12:35:56 -0800
Subject: [PATCH 0514/1204] [HOTFIX][SPARK-19542][SS]Fix the missing import in
 DataStreamReaderWriterSuite

---
 .../spark/sql/streaming/test/DataStreamReaderWriterSuite.scala   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
index f7519483f5a43..4e63b04497cff 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
@@ -22,6 +22,7 @@ import java.util.concurrent.TimeUnit
 
 import scala.concurrent.duration._
 
+import org.apache.hadoop.fs.Path
 import org.mockito.Mockito._
 import org.scalatest.{BeforeAndAfter, PrivateMethodTester}
 import org.scalatest.PrivateMethodTester.PrivateMethod

From 5db23473008a58fb9a7f77ad8b01bcdc2c5f2d9c Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Mon, 13 Feb 2017 11:04:27 -0800
Subject: [PATCH 0515/1204] [SPARK-19529] TransportClientFactory.createClient()
 shouldn't call awaitUninterruptibly()

This patch replaces a single `awaitUninterruptibly()` call with a plain `await()` call in Spark's `network-common` library in order to fix a bug which may cause tasks to be uncancellable.

In Spark's Netty RPC layer, `TransportClientFactory.createClient()` calls `awaitUninterruptibly()` on a Netty future while waiting for a connection to be established. This creates problem when a Spark task is interrupted while blocking in this call (which can happen in the event of a slow connection which will eventually time out). This has bad impacts on task cancellation when `interruptOnCancel = true`.

As an example of the impact of this problem, I experienced significant numbers of uncancellable "zombie tasks" on a production cluster where several tasks were blocked trying to connect to a dead shuffle server and then continued running as zombies after I cancelled the associated Spark stage. The zombie tasks ran for several minutes with the following stack:

```
java.lang.Object.wait(Native Method)
java.lang.Object.wait(Object.java:460)
io.netty.util.concurrent.DefaultPromise.await0(DefaultPromise.java:607)
io.netty.util.concurrent.DefaultPromise.awaitUninterruptibly(DefaultPromise.java:301)
org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:224)
org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:179) => holding Monitor(java.lang.Object1849476028})
org.apache.spark.network.shuffle.ExternalShuffleClient$1.createAndStart(ExternalShuffleClient.java:105)
org.apache.spark.network.shuffle.RetryingBlockFetcher.fetchAllOutstanding(RetryingBlockFetcher.java:140)
org.apache.spark.network.shuffle.RetryingBlockFetcher.start(RetryingBlockFetcher.java:120)
org.apache.spark.network.shuffle.ExternalShuffleClient.fetchBlocks(ExternalShuffleClient.java:114)
org.apache.spark.storage.ShuffleBlockFetcherIterator.sendRequest(ShuffleBlockFetcherIterator.scala:169)
org.apache.spark.storage.ShuffleBlockFetcherIterator.fetchUpToMaxBytes(ShuffleBlockFetcherIterator.scala:
350)
org.apache.spark.storage.ShuffleBlockFetcherIterator.initialize(ShuffleBlockFetcherIterator.scala:286)
org.apache.spark.storage.ShuffleBlockFetcherIterator.<init>(ShuffleBlockFetcherIterator.scala:120)
org.apache.spark.shuffle.BlockStoreShuffleReader.read(BlockStoreShuffleReader.scala:45)
org.apache.spark.sql.execution.ShuffledRowRDD.compute(ShuffledRowRDD.scala:169)
org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
[...]
```

As far as I can tell, `awaitUninterruptibly()` might have been used in order to avoid having to declare that methods throw `InterruptedException` (this code is written in Java, hence the need to use checked exceptions). This patch simply replaces this with a regular, interruptible `await()` call,.

This required several interface changes to declare a new checked exception (these are internal interfaces, though, and this change doesn't significantly impact binary compatibility).

An alternative approach would be to wrap `InterruptedException` into `IOException` in order to avoid having to change interfaces. The problem with this approach is that the `network-shuffle` project's `RetryingBlockFetcher` code treats `IOExceptions` as transitive failures when deciding whether to retry fetches, so throwing a wrapped `IOException` might cause an interrupted shuffle fetch to be retried, further prolonging the lifetime of a cancelled zombie task.

Note that there are three other `awaitUninterruptibly()` in the codebase, but those calls have a hard 10 second timeout and are waiting on a `close()` operation which is expected to complete near instantaneously, so the impact of uninterruptibility there is much smaller.

Manually.

Author: Josh Rosen <joshrosen@databricks.com>

Closes #16866 from JoshRosen/SPARK-19529.

(cherry picked from commit 1c4d10b10c78d138b55e381ec6828e04fef70d6f)
Signed-off-by: Cheng Lian <lian@databricks.com>
---
 .../network/client/TransportClientFactory.java   | 10 ++++++----
 .../network/TransportClientFactorySuite.java     |  6 ++++--
 .../network/shuffle/ExternalShuffleClient.java   |  4 ++--
 .../network/shuffle/RetryingBlockFetcher.java    |  3 ++-
 .../mesos/MesosExternalShuffleClient.java        |  2 +-
 .../spark/network/sasl/SaslIntegrationSuite.java |  4 ++--
 .../shuffle/ExternalShuffleIntegrationSuite.java |  2 +-
 .../shuffle/ExternalShuffleSecuritySuite.java    |  7 ++++---
 .../shuffle/RetryingBlockFetcherSuite.java       | 16 ++++++++--------
 9 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
index cb10edff659f1..b50e043d5c9ce 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
@@ -122,7 +122,8 @@ public TransportClientFactory(
    *
    * Concurrency: This method is safe to call from multiple threads.
    */
-  public TransportClient createClient(String remoteHost, int remotePort) throws IOException {
+  public TransportClient createClient(String remoteHost, int remotePort)
+      throws IOException, InterruptedException {
     // Get connection from the connection pool first.
     // If it is not found or not active, create a new one.
     // Use unresolved address here to avoid DNS resolution each time we creates a client.
@@ -190,13 +191,14 @@ public TransportClient createClient(String remoteHost, int remotePort) throws IO
    * As with {@link #createClient(String, int)}, this method is blocking.
    */
   public TransportClient createUnmanagedClient(String remoteHost, int remotePort)
-      throws IOException {
+      throws IOException, InterruptedException {
     final InetSocketAddress address = new InetSocketAddress(remoteHost, remotePort);
     return createClient(address);
   }
 
   /** Create a completely new {@link TransportClient} to the remote address. */
-  private TransportClient createClient(InetSocketAddress address) throws IOException {
+  private TransportClient createClient(InetSocketAddress address)
+      throws IOException, InterruptedException {
     logger.debug("Creating new connection to {}", address);
 
     Bootstrap bootstrap = new Bootstrap();
@@ -223,7 +225,7 @@ public void initChannel(SocketChannel ch) {
     // Connect to the remote server
     long preConnect = System.nanoTime();
     ChannelFuture cf = bootstrap.connect(address);
-    if (!cf.awaitUninterruptibly(conf.connectionTimeoutMs())) {
+    if (!cf.await(conf.connectionTimeoutMs())) {
       throw new IOException(
         String.format("Connecting to %s timed out (%s ms)", address, conf.connectionTimeoutMs()));
     } else if (cf.cause() != null) {
diff --git a/common/network-common/src/test/java/org/apache/spark/network/TransportClientFactorySuite.java b/common/network-common/src/test/java/org/apache/spark/network/TransportClientFactorySuite.java
index 44d16d54225e7..43e63efed4a57 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/TransportClientFactorySuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/TransportClientFactorySuite.java
@@ -100,6 +100,8 @@ public void run() {
             clients.add(client);
           } catch (IOException e) {
             failed.incrementAndGet();
+          } catch (InterruptedException e) {
+            throw new RuntimeException(e);
           }
         }
       };
@@ -143,7 +145,7 @@ public void reuseClientsUpToConfigVariableConcurrent() throws Exception {
   }
 
   @Test
-  public void returnDifferentClientsForDifferentServers() throws IOException {
+  public void returnDifferentClientsForDifferentServers() throws IOException, InterruptedException {
     TransportClientFactory factory = context.createClientFactory();
     TransportClient c1 = factory.createClient(TestUtils.getLocalHost(), server1.getPort());
     TransportClient c2 = factory.createClient(TestUtils.getLocalHost(), server2.getPort());
@@ -172,7 +174,7 @@ public void neverReturnInactiveClients() throws IOException, InterruptedExceptio
   }
 
   @Test
-  public void closeBlockClientsWithFactory() throws IOException {
+  public void closeBlockClientsWithFactory() throws IOException, InterruptedException {
     TransportClientFactory factory = context.createClientFactory();
     TransportClient c1 = factory.createClient(TestUtils.getLocalHost(), server1.getPort());
     TransportClient c2 = factory.createClient(TestUtils.getLocalHost(), server2.getPort());
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java
index 772fb88325b35..eea5cf7bec0f4 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java
@@ -101,7 +101,7 @@ public void fetchBlocks(
         new RetryingBlockFetcher.BlockFetchStarter() {
           @Override
           public void createAndStart(String[] blockIds, BlockFetchingListener listener)
-              throws IOException {
+              throws IOException, InterruptedException {
             TransportClient client = clientFactory.createClient(host, port);
             new OneForOneBlockFetcher(client, appId, execId, blockIds, listener).start();
           }
@@ -136,7 +136,7 @@ public void registerWithShuffleServer(
       String host,
       int port,
       String execId,
-      ExecutorShuffleInfo executorInfo) throws IOException {
+      ExecutorShuffleInfo executorInfo) throws IOException, InterruptedException {
     checkInit();
     TransportClient client = clientFactory.createUnmanagedClient(host, port);
     try {
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockFetcher.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockFetcher.java
index 72bd0f803da33..5be855048e4d6 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockFetcher.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockFetcher.java
@@ -57,7 +57,8 @@ public interface BlockFetchStarter {
      * {@link org.apache.spark.network.client.TransportClientFactory} in order to fix connection
      * issues.
      */
-    void createAndStart(String[] blockIds, BlockFetchingListener listener) throws IOException;
+    void createAndStart(String[] blockIds, BlockFetchingListener listener)
+         throws IOException, InterruptedException;
   }
 
   /** Shared executor service used for waiting and retrying. */
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/mesos/MesosExternalShuffleClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/mesos/MesosExternalShuffleClient.java
index 42cedd9943150..c6d6029d5a2e2 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/mesos/MesosExternalShuffleClient.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/mesos/MesosExternalShuffleClient.java
@@ -69,7 +69,7 @@ public void registerDriverWithShuffleService(
       String host,
       int port,
       long heartbeatTimeoutMs,
-      long heartbeatIntervalMs) throws IOException {
+      long heartbeatIntervalMs) throws IOException, InterruptedException {
 
     checkInit();
     ByteBuffer registerDriver = new RegisterDriver(appId, heartbeatTimeoutMs).toByteBuffer();
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java
index 6ba937dddb2a7..81a3f065d6fca 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java
@@ -103,7 +103,7 @@ public void afterEach() {
   }
 
   @Test
-  public void testGoodClient() throws IOException {
+  public void testGoodClient() throws IOException, InterruptedException {
     clientFactory = context.createClientFactory(
       Lists.<TransportClientBootstrap>newArrayList(
         new SaslClientBootstrap(conf, "app-1", secretKeyHolder)));
@@ -133,7 +133,7 @@ public void testBadClient() {
   }
 
   @Test
-  public void testNoSaslClient() throws IOException {
+  public void testNoSaslClient() throws IOException, InterruptedException {
     clientFactory = context.createClientFactory(
       Lists.<TransportClientBootstrap>newArrayList());
 
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
index 552b5366c5930..a04b6825d19d3 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
@@ -240,7 +240,7 @@ public void testFetchNoServer() throws Exception {
   }
 
   private void registerExecutor(String executorId, ExecutorShuffleInfo executorInfo)
-      throws IOException {
+      throws IOException, InterruptedException {
     ExternalShuffleClient client = new ExternalShuffleClient(conf, null, false, false);
     client.init(APP_ID);
     client.registerWithShuffleServer(TestUtils.getLocalHost(), server.getPort(),
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java
index a0f69ca29a280..6ef1a2d2c89fe 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java
@@ -59,7 +59,7 @@ public void afterEach() {
   }
 
   @Test
-  public void testValid() throws IOException {
+  public void testValid() throws IOException, InterruptedException {
     validate("my-app-id", "secret", false);
   }
 
@@ -82,12 +82,13 @@ public void testBadSecret() {
   }
 
   @Test
-  public void testEncryption() throws IOException {
+  public void testEncryption() throws IOException, InterruptedException {
     validate("my-app-id", "secret", true);
   }
 
   /** Creates an ExternalShuffleClient and attempts to register with the server. */
-  private void validate(String appId, String secretKey, boolean encrypt) throws IOException {
+  private void validate(String appId, String secretKey, boolean encrypt)
+        throws IOException, InterruptedException {
     ExternalShuffleClient client =
       new ExternalShuffleClient(conf, new TestSecretKeyHolder(appId, secretKey), true, encrypt);
     client.init(appId);
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockFetcherSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockFetcherSuite.java
index 91882e3b3bcd5..f221544e9e5e5 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockFetcherSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockFetcherSuite.java
@@ -66,7 +66,7 @@ public void afterEach() {
   }
 
   @Test
-  public void testNoFailures() throws IOException {
+  public void testNoFailures() throws IOException, InterruptedException {
     BlockFetchingListener listener = mock(BlockFetchingListener.class);
 
     List<? extends Map<String, Object>> interactions = Arrays.asList(
@@ -85,7 +85,7 @@ public void testNoFailures() throws IOException {
   }
 
   @Test
-  public void testUnrecoverableFailure() throws IOException {
+  public void testUnrecoverableFailure() throws IOException, InterruptedException {
     BlockFetchingListener listener = mock(BlockFetchingListener.class);
 
     List<? extends Map<String, Object>> interactions = Arrays.asList(
@@ -104,7 +104,7 @@ public void testUnrecoverableFailure() throws IOException {
   }
 
   @Test
-  public void testSingleIOExceptionOnFirst() throws IOException {
+  public void testSingleIOExceptionOnFirst() throws IOException, InterruptedException {
     BlockFetchingListener listener = mock(BlockFetchingListener.class);
 
     List<? extends Map<String, Object>> interactions = Arrays.asList(
@@ -127,7 +127,7 @@ public void testSingleIOExceptionOnFirst() throws IOException {
   }
 
   @Test
-  public void testSingleIOExceptionOnSecond() throws IOException {
+  public void testSingleIOExceptionOnSecond() throws IOException, InterruptedException {
     BlockFetchingListener listener = mock(BlockFetchingListener.class);
 
     List<? extends Map<String, Object>> interactions = Arrays.asList(
@@ -149,7 +149,7 @@ public void testSingleIOExceptionOnSecond() throws IOException {
   }
 
   @Test
-  public void testTwoIOExceptions() throws IOException {
+  public void testTwoIOExceptions() throws IOException, InterruptedException {
     BlockFetchingListener listener = mock(BlockFetchingListener.class);
 
     List<? extends Map<String, Object>> interactions = Arrays.asList(
@@ -177,7 +177,7 @@ public void testTwoIOExceptions() throws IOException {
   }
 
   @Test
-  public void testThreeIOExceptions() throws IOException {
+  public void testThreeIOExceptions() throws IOException, InterruptedException {
     BlockFetchingListener listener = mock(BlockFetchingListener.class);
 
     List<? extends Map<String, Object>> interactions = Arrays.asList(
@@ -209,7 +209,7 @@ public void testThreeIOExceptions() throws IOException {
   }
 
   @Test
-  public void testRetryAndUnrecoverable() throws IOException {
+  public void testRetryAndUnrecoverable() throws IOException, InterruptedException {
     BlockFetchingListener listener = mock(BlockFetchingListener.class);
 
     List<? extends Map<String, Object>> interactions = Arrays.asList(
@@ -252,7 +252,7 @@ public void testRetryAndUnrecoverable() throws IOException {
   @SuppressWarnings("unchecked")
   private static void performInteractions(List<? extends Map<String, Object>> interactions,
                                           BlockFetchingListener listener)
-    throws IOException {
+    throws IOException, InterruptedException {
 
     TransportConf conf = new TransportConf("shuffle", new SystemPropertyConfigProvider());
     BlockFetchStarter fetchStarter = mock(BlockFetchStarter.class);

From 7fe3543fd2cc1cf82135b4208e1391ab3a25f2d9 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Mon, 13 Feb 2017 14:19:41 -0800
Subject: [PATCH 0516/1204] [SPARK-19520][STREAMING] Do not encrypt data
 written to the WAL.

Spark's I/O encryption uses an ephemeral key for each driver instance.
So driver B cannot decrypt data written by driver A since it doesn't
have the correct key.

The write ahead log is used for recovery, thus needs to be readable by
a different driver. So it cannot be encrypted by Spark's I/O encryption
code.

The BlockManager APIs used by the WAL code to write the data automatically
encrypt data, so changes are needed so that callers can to opt out of
encryption.

Aside from that, the "putBytes" API in the BlockManager does not do
encryption, so a separate situation arised where the WAL would write
unencrypted data to the BM and, when those blocks were read, decryption
would fail. So the WAL code needs to ask the BM to encrypt that data
when encryption is enabled; this code is not optimal since it results
in a (temporary) second copy of the data block in memory, but should be
OK for now until a more performant solution is added. The non-encryption
case should not be affected.

Tested with new unit tests, and by running streaming apps that do
recovery using the WAL data with I/O encryption turned on.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #16862 from vanzin/SPARK-19520.

(cherry picked from commit 0169360ef58891ca10a8d64d1c8637c7b873cbdd)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../org/apache/spark/SecurityManager.scala    |  2 +-
 .../spark/serializer/SerializerManager.scala  | 20 ++++++---
 .../apache/spark/storage/BlockManager.scala   | 35 ++++++++++++++-
 docs/streaming-programming-guide.md           |  3 ++
 .../rdd/WriteAheadLogBackedBlockRDD.scala     |  9 ++--
 .../receiver/ReceivedBlockHandler.scala       | 11 +++--
 .../streaming/ReceivedBlockHandlerSuite.scala | 27 +++++++++---
 .../WriteAheadLogBackedBlockRDDSuite.scala    | 43 +++++++++++++++----
 8 files changed, 120 insertions(+), 30 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SecurityManager.scala b/core/src/main/scala/org/apache/spark/SecurityManager.scala
index 87fe56315203e..6e12cd16a3a52 100644
--- a/core/src/main/scala/org/apache/spark/SecurityManager.scala
+++ b/core/src/main/scala/org/apache/spark/SecurityManager.scala
@@ -185,7 +185,7 @@ import org.apache.spark.util.Utils
 
 private[spark] class SecurityManager(
     sparkConf: SparkConf,
-    ioEncryptionKey: Option[Array[Byte]] = None)
+    val ioEncryptionKey: Option[Array[Byte]] = None)
   extends Logging with SecretKeyHolder {
 
   import SecurityManager._
diff --git a/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
index 686305e9335dc..dd98ea265ce4a 100644
--- a/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
@@ -172,20 +172,26 @@ private[spark] class SerializerManager(
   }
 
   /** Serializes into a chunked byte buffer. */
-  def dataSerialize[T: ClassTag](blockId: BlockId, values: Iterator[T]): ChunkedByteBuffer = {
-    dataSerializeWithExplicitClassTag(blockId, values, implicitly[ClassTag[T]])
+  def dataSerialize[T: ClassTag](
+      blockId: BlockId,
+      values: Iterator[T],
+      allowEncryption: Boolean = true): ChunkedByteBuffer = {
+    dataSerializeWithExplicitClassTag(blockId, values, implicitly[ClassTag[T]],
+      allowEncryption = allowEncryption)
   }
 
   /** Serializes into a chunked byte buffer. */
   def dataSerializeWithExplicitClassTag(
       blockId: BlockId,
       values: Iterator[_],
-      classTag: ClassTag[_]): ChunkedByteBuffer = {
+      classTag: ClassTag[_],
+      allowEncryption: Boolean = true): ChunkedByteBuffer = {
     val bbos = new ChunkedByteBufferOutputStream(1024 * 1024 * 4, ByteBuffer.allocate)
     val byteStream = new BufferedOutputStream(bbos)
     val autoPick = !blockId.isInstanceOf[StreamBlockId]
     val ser = getSerializer(classTag, autoPick).newInstance()
-    ser.serializeStream(wrapStream(blockId, byteStream)).writeAll(values).close()
+    val encrypted = if (allowEncryption) wrapForEncryption(byteStream) else byteStream
+    ser.serializeStream(wrapForCompression(blockId, encrypted)).writeAll(values).close()
     bbos.toChunkedByteBuffer
   }
 
@@ -195,13 +201,15 @@ private[spark] class SerializerManager(
    */
   def dataDeserializeStream[T](
       blockId: BlockId,
-      inputStream: InputStream)
+      inputStream: InputStream,
+      maybeEncrypted: Boolean = true)
       (classTag: ClassTag[T]): Iterator[T] = {
     val stream = new BufferedInputStream(inputStream)
     val autoPick = !blockId.isInstanceOf[StreamBlockId]
+    val decrypted = if (maybeEncrypted) wrapForEncryption(inputStream) else inputStream
     getSerializer(classTag, autoPick)
       .newInstance()
-      .deserializeStream(wrapStream(blockId, stream))
+      .deserializeStream(wrapForCompression(blockId, decrypted))
       .asIterator.asInstanceOf[Iterator[T]]
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 04521c9159eac..cdf48e430caf5 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -28,6 +28,8 @@ import scala.reflect.ClassTag
 import scala.util.Random
 import scala.util.control.NonFatal
 
+import com.google.common.io.ByteStreams
+
 import org.apache.spark._
 import org.apache.spark.executor.{DataReadMethod, ShuffleWriteMetrics}
 import org.apache.spark.internal.Logging
@@ -38,6 +40,7 @@ import org.apache.spark.network.netty.SparkTransportConf
 import org.apache.spark.network.shuffle.ExternalShuffleClient
 import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo
 import org.apache.spark.rpc.RpcEnv
+import org.apache.spark.security.CryptoStreamUtils
 import org.apache.spark.serializer.{SerializerInstance, SerializerManager}
 import org.apache.spark.shuffle.ShuffleManager
 import org.apache.spark.storage.memory._
@@ -753,15 +756,43 @@ private[spark] class BlockManager(
   /**
    * Put a new block of serialized bytes to the block manager.
    *
+   * @param encrypt If true, asks the block manager to encrypt the data block before storing,
+   *                when I/O encryption is enabled. This is required for blocks that have been
+   *                read from unencrypted sources, since all the BlockManager read APIs
+   *                automatically do decryption.
    * @return true if the block was stored or false if an error occurred.
    */
   def putBytes[T: ClassTag](
       blockId: BlockId,
       bytes: ChunkedByteBuffer,
       level: StorageLevel,
-      tellMaster: Boolean = true): Boolean = {
+      tellMaster: Boolean = true,
+      encrypt: Boolean = false): Boolean = {
     require(bytes != null, "Bytes is null")
-    doPutBytes(blockId, bytes, level, implicitly[ClassTag[T]], tellMaster)
+
+    val bytesToStore =
+      if (encrypt && securityManager.ioEncryptionKey.isDefined) {
+        try {
+          val data = bytes.toByteBuffer
+          val in = new ByteBufferInputStream(data, true)
+          val byteBufOut = new ByteBufferOutputStream(data.remaining())
+          val out = CryptoStreamUtils.createCryptoOutputStream(byteBufOut, conf,
+            securityManager.ioEncryptionKey.get)
+          try {
+            ByteStreams.copy(in, out)
+          } finally {
+            in.close()
+            out.close()
+          }
+          new ChunkedByteBuffer(byteBufOut.toByteBuffer)
+        } finally {
+          bytes.dispose()
+        }
+      } else {
+        bytes
+      }
+
+    doPutBytes(blockId, bytesToStore, level, implicitly[ClassTag[T]], tellMaster)
   }
 
   /**
diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index 38b4f7817713d..a878971608b35 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -2017,6 +2017,9 @@ To run a Spark Streaming applications, you need to have the following.
   `spark.streaming.driver.writeAheadLog.closeFileAfterWrite` and
   `spark.streaming.receiver.writeAheadLog.closeFileAfterWrite`. See
   [Spark Streaming Configuration](configuration.html#spark-streaming) for more details.
+  Note that Spark will not encrypt data written to the write ahead log when I/O encryption is
+  enabled. If encryption of the write ahead log data is desired, it should be stored in a file
+  system that supports encryption natively.
 
 - *Setting the max receiving rate* - If the cluster resources is not large enough for the streaming
   application to process data as fast as it is being received, the receivers can be rate limited
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala b/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala
index 0b2ec298132ad..d0864fd3678b2 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala
@@ -27,7 +27,7 @@ import org.apache.spark._
 import org.apache.spark.rdd.BlockRDD
 import org.apache.spark.storage.{BlockId, StorageLevel}
 import org.apache.spark.streaming.util._
-import org.apache.spark.util.SerializableConfiguration
+import org.apache.spark.util._
 import org.apache.spark.util.io.ChunkedByteBuffer
 
 /**
@@ -158,13 +158,16 @@ class WriteAheadLogBackedBlockRDD[T: ClassTag](
       logInfo(s"Read partition data of $this from write ahead log, record handle " +
         partition.walRecordHandle)
       if (storeInBlockManager) {
-        blockManager.putBytes(blockId, new ChunkedByteBuffer(dataRead.duplicate()), storageLevel)
+        blockManager.putBytes(blockId, new ChunkedByteBuffer(dataRead.duplicate()), storageLevel,
+          encrypt = true)
         logDebug(s"Stored partition data of $this into block manager with level $storageLevel")
         dataRead.rewind()
       }
       serializerManager
         .dataDeserializeStream(
-          blockId, new ChunkedByteBuffer(dataRead).toInputStream())(elementClassTag)
+          blockId,
+          new ChunkedByteBuffer(dataRead).toInputStream(),
+          maybeEncrypted = false)(elementClassTag)
         .asInstanceOf[Iterator[T]]
     }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
index 80c07958b41f2..2b488038f0620 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
@@ -87,7 +87,8 @@ private[streaming] class BlockManagerBasedBlockHandler(
         putResult
       case ByteBufferBlock(byteBuffer) =>
         blockManager.putBytes(
-          blockId, new ChunkedByteBuffer(byteBuffer.duplicate()), storageLevel, tellMaster = true)
+          blockId, new ChunkedByteBuffer(byteBuffer.duplicate()), storageLevel, tellMaster = true,
+          encrypt = true)
       case o =>
         throw new SparkException(
           s"Could not store $blockId to block manager, unexpected block type ${o.getClass.getName}")
@@ -175,10 +176,11 @@ private[streaming] class WriteAheadLogBasedBlockHandler(
     val serializedBlock = block match {
       case ArrayBufferBlock(arrayBuffer) =>
         numRecords = Some(arrayBuffer.size.toLong)
-        serializerManager.dataSerialize(blockId, arrayBuffer.iterator)
+        serializerManager.dataSerialize(blockId, arrayBuffer.iterator, allowEncryption = false)
       case IteratorBlock(iterator) =>
         val countIterator = new CountingIterator(iterator)
-        val serializedBlock = serializerManager.dataSerialize(blockId, countIterator)
+        val serializedBlock = serializerManager.dataSerialize(blockId, countIterator,
+          allowEncryption = false)
         numRecords = countIterator.count
         serializedBlock
       case ByteBufferBlock(byteBuffer) =>
@@ -193,7 +195,8 @@ private[streaming] class WriteAheadLogBasedBlockHandler(
         blockId,
         serializedBlock,
         effectiveStorageLevel,
-        tellMaster = true)
+        tellMaster = true,
+        encrypt = true)
       if (!putSucceeded) {
         throw new SparkException(
           s"Could not store $blockId to block manager with storage level $storageLevel")
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
index f2241936000a0..c2b0389b8c6f0 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
@@ -32,10 +32,12 @@ import org.scalatest.concurrent.Eventually._
 import org.apache.spark._
 import org.apache.spark.broadcast.BroadcastManager
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
 import org.apache.spark.memory.StaticMemoryManager
 import org.apache.spark.network.netty.NettyBlockTransferService
 import org.apache.spark.rpc.RpcEnv
 import org.apache.spark.scheduler.LiveListenerBus
+import org.apache.spark.security.CryptoStreamUtils
 import org.apache.spark.serializer.{KryoSerializer, SerializerManager}
 import org.apache.spark.shuffle.sort.SortShuffleManager
 import org.apache.spark.storage._
@@ -44,7 +46,7 @@ import org.apache.spark.streaming.util._
 import org.apache.spark.util.{ManualClock, Utils}
 import org.apache.spark.util.io.ChunkedByteBuffer
 
-class ReceivedBlockHandlerSuite
+abstract class BaseReceivedBlockHandlerSuite(enableEncryption: Boolean)
   extends SparkFunSuite
   with BeforeAndAfter
   with Matchers
@@ -57,14 +59,22 @@ class ReceivedBlockHandlerSuite
   val conf = new SparkConf()
     .set("spark.streaming.receiver.writeAheadLog.rollingIntervalSecs", "1")
     .set("spark.app.id", "streaming-test")
+    .set(IO_ENCRYPTION_ENABLED, enableEncryption)
+  val encryptionKey =
+    if (enableEncryption) {
+      Some(CryptoStreamUtils.createKey(conf))
+    } else {
+      None
+    }
+
   val hadoopConf = new Configuration()
   val streamId = 1
-  val securityMgr = new SecurityManager(conf)
+  val securityMgr = new SecurityManager(conf, encryptionKey)
   val broadcastManager = new BroadcastManager(true, conf, securityMgr)
   val mapOutputTracker = new MapOutputTrackerMaster(conf, broadcastManager, true)
   val shuffleManager = new SortShuffleManager(conf)
   val serializer = new KryoSerializer(conf)
-  var serializerManager = new SerializerManager(serializer, conf)
+  var serializerManager = new SerializerManager(serializer, conf, encryptionKey)
   val manualClock = new ManualClock
   val blockManagerSize = 10000000
   val blockManagerBuffer = new ArrayBuffer[BlockManager]()
@@ -164,7 +174,9 @@ class ReceivedBlockHandlerSuite
           val bytes = reader.read(fileSegment)
           reader.close()
           serializerManager.dataDeserializeStream(
-            generateBlockId(), new ChunkedByteBuffer(bytes).toInputStream())(ClassTag.Any).toList
+            generateBlockId(),
+            new ChunkedByteBuffer(bytes).toInputStream(),
+            maybeEncrypted = false)(ClassTag.Any).toList
         }
         loggedData shouldEqual data
       }
@@ -208,6 +220,8 @@ class ReceivedBlockHandlerSuite
     sparkConf.set("spark.storage.unrollMemoryThreshold", "512")
     // spark.storage.unrollFraction set to 0.4 for BlockManager
     sparkConf.set("spark.storage.unrollFraction", "0.4")
+
+    sparkConf.set(IO_ENCRYPTION_ENABLED, enableEncryption)
     // Block Manager with 12000 * 0.4 = 4800 bytes of free space for unroll
     blockManager = createBlockManager(12000, sparkConf)
 
@@ -343,7 +357,7 @@ class ReceivedBlockHandlerSuite
     }
 
     def dataToByteBuffer(b: Seq[String]) =
-      serializerManager.dataSerialize(generateBlockId, b.iterator)
+      serializerManager.dataSerialize(generateBlockId, b.iterator, allowEncryption = false)
 
     val blocks = data.grouped(10).toSeq
 
@@ -418,3 +432,6 @@ class ReceivedBlockHandlerSuite
   private def generateBlockId(): StreamBlockId = StreamBlockId(streamId, scala.util.Random.nextLong)
 }
 
+class ReceivedBlockHandlerSuite extends BaseReceivedBlockHandlerSuite(false)
+
+class ReceivedBlockHandlerWithEncryptionSuite extends BaseReceivedBlockHandlerSuite(true)
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
index a37fac87300b7..ee69bf87d4efe 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
@@ -24,6 +24,7 @@ import org.apache.hadoop.conf.Configuration
 import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach}
 
 import org.apache.spark.{SparkConf, SparkContext, SparkException, SparkFunSuite}
+import org.apache.spark.internal.config._
 import org.apache.spark.serializer.SerializerManager
 import org.apache.spark.storage.{BlockId, BlockManager, StorageLevel, StreamBlockId}
 import org.apache.spark.streaming.util.{FileBasedWriteAheadLogSegment, FileBasedWriteAheadLogWriter}
@@ -45,6 +46,7 @@ class WriteAheadLogBackedBlockRDDSuite
 
   override def beforeEach(): Unit = {
     super.beforeEach()
+    initSparkContext()
     dir = Utils.createTempDir()
   }
 
@@ -56,22 +58,33 @@ class WriteAheadLogBackedBlockRDDSuite
     }
   }
 
-  override def beforeAll(): Unit = {
-    super.beforeAll()
-    sparkContext = new SparkContext(conf)
-    blockManager = sparkContext.env.blockManager
-    serializerManager = sparkContext.env.serializerManager
+  override def afterAll(): Unit = {
+    try {
+      stopSparkContext()
+    } finally {
+      super.afterAll()
+    }
   }
 
-  override def afterAll(): Unit = {
+  private def initSparkContext(_conf: Option[SparkConf] = None): Unit = {
+    if (sparkContext == null) {
+      sparkContext = new SparkContext(_conf.getOrElse(conf))
+      blockManager = sparkContext.env.blockManager
+      serializerManager = sparkContext.env.serializerManager
+    }
+  }
+
+  private def stopSparkContext(): Unit = {
     // Copied from LocalSparkContext, simpler than to introduced test dependencies to core tests.
     try {
-      sparkContext.stop()
+      if (sparkContext != null) {
+        sparkContext.stop()
+      }
       System.clearProperty("spark.driver.port")
       blockManager = null
       serializerManager = null
     } finally {
-      super.afterAll()
+      sparkContext = null
     }
   }
 
@@ -106,6 +119,17 @@ class WriteAheadLogBackedBlockRDDSuite
       numPartitions = 5, numPartitionsInBM = 0, numPartitionsInWAL = 5, testStoreInBM = true)
   }
 
+  test("read data in block manager and WAL with encryption on") {
+    stopSparkContext()
+    try {
+      val testConf = conf.clone().set(IO_ENCRYPTION_ENABLED, true)
+      initSparkContext(Some(testConf))
+      testRDD(numPartitions = 5, numPartitionsInBM = 3, numPartitionsInWAL = 2)
+    } finally {
+      stopSparkContext()
+    }
+  }
+
   /**
    * Test the WriteAheadLogBackedRDD, by writing some partitions of the data to block manager
    * and the rest to a write ahead log, and then reading reading it all back using the RDD.
@@ -226,7 +250,8 @@ class WriteAheadLogBackedBlockRDDSuite
     require(blockData.size === blockIds.size)
     val writer = new FileBasedWriteAheadLogWriter(new File(dir, "logFile").toString, hadoopConf)
     val segments = blockData.zip(blockIds).map { case (data, id) =>
-      writer.write(serializerManager.dataSerialize(id, data.iterator).toByteBuffer)
+      writer.write(serializerManager.dataSerialize(id, data.iterator, allowEncryption = false)
+        .toByteBuffer)
     }
     writer.close()
     segments

From c8113b0ee0555efe72827a91246af2737d1d4993 Mon Sep 17 00:00:00 2001
From: Sunitha Kambhampati <skambha@us.ibm.com>
Date: Mon, 13 Feb 2017 22:49:29 -0800
Subject: [PATCH 0517/1204] [SPARK-19585][DOC][SQL] Fix the cacheTable and
 uncacheTable api call in the doc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

https://spark.apache.org/docs/latest/sql-programming-guide.html#caching-data-in-memory
In the doc, the call spark.cacheTable(“tableName”) and spark.uncacheTable(“tableName”) actually needs to be spark.catalog.cacheTable and spark.catalog.uncacheTable

## How was this patch tested?
Built the docs and verified the change shows up fine.

Author: Sunitha Kambhampati <skambha@us.ibm.com>

Closes #16919 from skambha/docChange.

(cherry picked from commit 9b5e460a9168ab78607034434ca45ab6cb51e5a6)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 docs/sql-programming-guide.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 55ed913b26b3e..2173aba763f8a 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -1217,9 +1217,9 @@ turning on some experimental options.
 
 ## Caching Data In Memory
 
-Spark SQL can cache tables using an in-memory columnar format by calling `spark.cacheTable("tableName")` or `dataFrame.cache()`.
+Spark SQL can cache tables using an in-memory columnar format by calling `spark.catalog.cacheTable("tableName")` or `dataFrame.cache()`.
 Then Spark SQL will scan only required columns and will automatically tune compression to minimize
-memory usage and GC pressure. You can call `spark.uncacheTable("tableName")` to remove the table from memory.
+memory usage and GC pressure. You can call `spark.catalog.uncacheTable("tableName")` to remove the table from memory.
 
 Configuration of in-memory caching can be done using the `setConf` method on `SparkSession` or by running
 `SET key=value` commands using SQL.

From f837ced4c448e918efa7bfc49becfa09a50f5147 Mon Sep 17 00:00:00 2001
From: Jong Wook Kim <jongwook@nyu.edu>
Date: Tue, 14 Feb 2017 11:33:31 -0800
Subject: [PATCH 0518/1204] [SPARK-19501][YARN] Reduce the number of HDFS RPCs
 during YARN deployment

## What changes were proposed in this pull request?

As discussed in [JIRA](https://issues.apache.org/jira/browse/SPARK-19501), this patch addresses the problem where too many HDFS RPCs are made when there are many URIs specified in `spark.yarn.jars`, potentially adding hundreds of RTTs to YARN before the application launches. This becomes significant when submitting the application to a non-local YARN cluster (where the RTT may be in order of 100ms, for example). For each URI specified, the current implementation makes at least two HDFS RPCs, for:

- [Calling `getFileStatus()` before uploading each file to the distributed cache in `ClientDistributedCacheManager.addResource()`](https://github.com/apache/spark/blob/v2.1.0/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala#L71).
- [Resolving any symbolic links in each of the file URI](https://github.com/apache/spark/blob/v2.1.0/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala#L377-L379), which repeatedly makes HDFS RPCs until the all symlinks are resolved. (see [`FileContext.resolve(Path)`](https://github.com/apache/hadoop/blob/release-2.7.1/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java#L2189-L2195), [`FSLinkResolver.resolve(FileContext, Path)`](https://github.com/apache/hadoop/blob/release-2.7.1/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSLinkResolver.java#L79-L112), and [`AbstractFileSystem.resolvePath()`](https://github.com/apache/hadoop/blob/release-2.7.1/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java#L464-L468).)

The first `getFileStatus` RPC can be removed, using `statCache` populated with the file statuses retrieved with [the previous `globStatus` call](https://github.com/apache/spark/blob/v2.1.0/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala#L531).

The second one can be largely reduced by caching the symlink resolution results in a mutable.HashMap. This patch adds a local variable in `yarn.Client.prepareLocalResources()` and passes it as an additional parameter to `yarn.Client.copyFileToRemote`.  [The symlink resolution code was added in 2013](https://github.com/apache/spark/commit/a35472e1dd2ea1b5a0b1fb6b382f5a98f5aeba5a#diff-b050df3f55b82065803d6e83453b9706R187) and has not changed since. I am assuming that this is still required, but otherwise we can remove using `symlinkCache` and symlink resolution altogether.

## How was this patch tested?

This patch is based off 8e8afb3, currently the latest YARN patch on master. All tests except a few in spark-hive passed with `./dev/run-tests` on my machine, using JDK 1.8.0_112 on macOS 10.12.3; also tested myself with this modified version of SPARK 2.2.0-SNAPSHOT which performed a normal deployment and execution on a YARN cluster without errors.

Author: Jong Wook Kim <jongwook@nyu.edu>

Closes #16916 from jongwook/SPARK-19501.

(cherry picked from commit ab9872db1f9c0f289541ec5756d1a142d85545ce)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../org/apache/spark/deploy/yarn/Client.scala | 19 +++++++++++++------
 .../yarn/ClientDistributedCacheManager.scala  |  2 +-
 .../spark/deploy/yarn/ClientSuite.scala       | 10 +++++-----
 3 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 2c7d9d6b3ed02..8a0c3f2536d83 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -358,6 +358,7 @@ private[spark] class Client(
       destDir: Path,
       srcPath: Path,
       replication: Short,
+      symlinkCache: Map[URI, Path],
       force: Boolean = false,
       destName: Option[String] = None): Path = {
     val destFs = destDir.getFileSystem(hadoopConf)
@@ -375,8 +376,12 @@ private[spark] class Client(
     // Resolve any symlinks in the URI path so using a "current" symlink to point to a specific
     // version shows the specific version in the distributed cache configuration
     val qualifiedDestPath = destFs.makeQualified(destPath)
-    val fc = FileContext.getFileContext(qualifiedDestPath.toUri(), hadoopConf)
-    fc.resolvePath(qualifiedDestPath)
+    val qualifiedDestDir = qualifiedDestPath.getParent
+    val resolvedDestDir = symlinkCache.getOrElseUpdate(qualifiedDestDir.toUri(), {
+      val fc = FileContext.getFileContext(qualifiedDestDir.toUri(), hadoopConf)
+      fc.resolvePath(qualifiedDestDir)
+    })
+    new Path(resolvedDestDir, qualifiedDestPath.getName())
   }
 
   /**
@@ -432,6 +437,7 @@ private[spark] class Client(
     FileSystem.mkdirs(fs, destDir, new FsPermission(STAGING_DIR_PERMISSION))
 
     val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus]()
+    val symlinkCache: Map[URI, Path] = HashMap[URI, Path]()
 
     def addDistributedUri(uri: URI): Boolean = {
       val uriStr = uri.toString()
@@ -477,7 +483,7 @@ private[spark] class Client(
           val localPath = getQualifiedLocalPath(localURI, hadoopConf)
           val linkname = targetDir.map(_ + "/").getOrElse("") +
             destName.orElse(Option(localURI.getFragment())).getOrElse(localPath.getName())
-          val destPath = copyFileToRemote(destDir, localPath, replication)
+          val destPath = copyFileToRemote(destDir, localPath, replication, symlinkCache)
           val destFs = FileSystem.get(destPath.toUri(), hadoopConf)
           distCacheMgr.addResource(
             destFs, hadoopConf, destPath, localResources, resType, linkname, statCache,
@@ -529,8 +535,9 @@ private[spark] class Client(
               val path = getQualifiedLocalPath(Utils.resolveURI(jar), hadoopConf)
               val pathFs = FileSystem.get(path.toUri(), hadoopConf)
               pathFs.globStatus(path).filter(_.isFile()).foreach { entry =>
-                distribute(entry.getPath().toUri().toString(),
-                  targetDir = Some(LOCALIZED_LIB_DIR))
+                val uri = entry.getPath().toUri()
+                statCache.update(uri, entry)
+                distribute(uri.toString(), targetDir = Some(LOCALIZED_LIB_DIR))
               }
             } else {
               localJars += jar
@@ -646,7 +653,7 @@ private[spark] class Client(
     sparkConf.set(CACHED_CONF_ARCHIVE, remoteConfArchivePath.toString())
 
     val localConfArchive = new Path(createConfArchive().toURI())
-    copyFileToRemote(destDir, localConfArchive, replication, force = true,
+    copyFileToRemote(destDir, localConfArchive, replication, symlinkCache, force = true,
       destName = Some(LOCALIZED_CONF_ARCHIVE))
 
     // Manually add the config archive to the cache manager so that the AM is launched with
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala
index dcc2288dd155a..e6e0ea38ade94 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala
@@ -68,7 +68,7 @@ private[spark] class ClientDistributedCacheManager() extends Logging {
       link: String,
       statCache: Map[URI, FileStatus],
       appMasterOnly: Boolean = false): Unit = {
-    val destStatus = fs.getFileStatus(destPath)
+    val destStatus = statCache.getOrElse(destPath.toUri(), fs.getFileStatus(destPath))
     val amJarRsrc = Records.newRecord(classOf[LocalResource])
     amJarRsrc.setType(resourceType)
     val visibility = getVisibility(conf, destPath.toUri(), statCache)
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
index 7deaf0af94849..cb985f66eae72 100644
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
@@ -145,7 +145,7 @@ class ClientSuite extends SparkFunSuite with Matchers with BeforeAndAfterAll
       .set("spark.yarn.dist.jars", ADDED)
     val client = createClient(sparkConf, args = Array("--jar", USER))
     doReturn(new Path("/")).when(client).copyFileToRemote(any(classOf[Path]),
-      any(classOf[Path]), anyShort(), anyBoolean(), any())
+      any(classOf[Path]), anyShort(), any(classOf[MutableHashMap[URI, Path]]), anyBoolean(), any())
 
     val tempDir = Utils.createTempDir()
     try {
@@ -251,11 +251,11 @@ class ClientSuite extends SparkFunSuite with Matchers with BeforeAndAfterAll
       Some(Seq(s"local:${jar4.getPath()}", s"local:${single.getAbsolutePath()}/*")))
 
     verify(client).copyFileToRemote(any(classOf[Path]), meq(new Path(jar1.toURI())), anyShort(),
-      anyBoolean(), any())
+      any(classOf[MutableHashMap[URI, Path]]), anyBoolean(), any())
     verify(client).copyFileToRemote(any(classOf[Path]), meq(new Path(jar2.toURI())), anyShort(),
-      anyBoolean(), any())
+      any(classOf[MutableHashMap[URI, Path]]), anyBoolean(), any())
     verify(client).copyFileToRemote(any(classOf[Path]), meq(new Path(jar3.toURI())), anyShort(),
-      anyBoolean(), any())
+      any(classOf[MutableHashMap[URI, Path]]), anyBoolean(), any())
 
     val cp = classpath(client)
     cp should contain (buildPath(PWD, LOCALIZED_LIB_DIR, "*"))
@@ -273,7 +273,7 @@ class ClientSuite extends SparkFunSuite with Matchers with BeforeAndAfterAll
     client.prepareLocalResources(new Path(temp.getAbsolutePath()), Nil)
 
     verify(client).copyFileToRemote(any(classOf[Path]), meq(new Path(archive.toURI())), anyShort(),
-      anyBoolean(), any())
+      any(classOf[MutableHashMap[URI, Path]]), anyBoolean(), any())
     classpath(client) should contain (buildPath(PWD, LOCALIZED_LIB_DIR, "*"))
 
     sparkConf.set(SPARK_ARCHIVE, LOCAL_SCHEME + ":" + archive.getPath())

From 7763b0b8bd33b0baa99434136528efb5de261919 Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Tue, 14 Feb 2017 13:51:27 -0800
Subject: [PATCH 0519/1204] [SPARK-19387][SPARKR] Tests do not run with SparkR
 source package in CRAN check

## What changes were proposed in this pull request?

- this is cause by changes in SPARK-18444, SPARK-18643 that we no longer install Spark when `master = ""` (default), but also related to SPARK-18449 since the real `master` value is not known at the time the R code in `sparkR.session` is run. (`master` cannot default to "local" since it could be overridden by spark-submit commandline or spark config)
- as a result, while running SparkR as a package in IDE is working fine, CRAN check is not as it is launching it via non-interactive script
- fix is to add check to the beginning of each test and vignettes; the same would also work by changing `sparkR.session()` to `sparkR.session(master = "local")` in tests, but I think being more explicit is better.

## How was this patch tested?

Tested this by reverting version to 2.1, since it needs to download the release jar with matching version. But since there are changes in 2.2 (specifically around SparkR ML) that are incompatible with 2.1, some tests are failing in this config. Will need to port this to branch-2.1 and retest with 2.1 release jar.

manually as:
```
# modify DESCRIPTION to revert version to 2.1.0
SPARK_HOME=/usr/spark R CMD build pkg
# run cran check without SPARK_HOME
R CMD check --as-cran SparkR_2.1.0.tar.gz
```

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #16720 from felixcheung/rcranchecktest.

(cherry picked from commit a3626ca333e6e1881e2f09ccae0fa8fa7243223e)
Signed-off-by: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
---
 R/pkg/R/install.R                    | 16 +++++++++++++---
 R/pkg/R/sparkR.R                     |  6 ++----
 R/pkg/tests/run-all.R                |  3 +++
 R/pkg/vignettes/sparkr-vignettes.Rmd |  3 +++
 4 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R
index 72386e68de4b3..4ca7aa664e023 100644
--- a/R/pkg/R/install.R
+++ b/R/pkg/R/install.R
@@ -21,9 +21,9 @@
 #' Download and Install Apache Spark to a Local Directory
 #'
 #' \code{install.spark} downloads and installs Spark to a local directory if
-#' it is not found. The Spark version we use is the same as the SparkR version.
-#' Users can specify a desired Hadoop version, the remote mirror site, and
-#' the directory where the package is installed locally.
+#' it is not found. If SPARK_HOME is set in the environment, and that directory is found, that is
+#' returned. The Spark version we use is the same as the SparkR version. Users can specify a desired
+#' Hadoop version, the remote mirror site, and the directory where the package is installed locally.
 #'
 #' The full url of remote file is inferred from \code{mirrorUrl} and \code{hadoopVersion}.
 #' \code{mirrorUrl} specifies the remote path to a Spark folder. It is followed by a subfolder
@@ -68,6 +68,16 @@
 #'          \href{http://spark.apache.org/downloads.html}{Apache Spark}
 install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL,
                           localDir = NULL, overwrite = FALSE) {
+  sparkHome <- Sys.getenv("SPARK_HOME")
+  if (isSparkRShell()) {
+    stopifnot(nchar(sparkHome) > 0)
+    message("Spark is already running in sparkR shell.")
+    return(invisible(sparkHome))
+  } else if (!is.na(file.info(sparkHome)$isdir)) {
+    message("Spark package found in SPARK_HOME: ", sparkHome)
+    return(invisible(sparkHome))
+  }
+
   version <- paste0("spark-", packageVersion("SparkR"))
   hadoopVersion <- tolower(hadoopVersion)
   hadoopVersionName <- hadoopVersionName(hadoopVersion)
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index 870e76b7292fa..61773ed3ee8c0 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -588,13 +588,11 @@ processSparkPackages <- function(packages) {
 sparkCheckInstall <- function(sparkHome, master, deployMode) {
   if (!isSparkRShell()) {
     if (!is.na(file.info(sparkHome)$isdir)) {
-      msg <- paste0("Spark package found in SPARK_HOME: ", sparkHome)
-      message(msg)
+      message("Spark package found in SPARK_HOME: ", sparkHome)
       NULL
     } else {
       if (interactive() || isMasterLocal(master)) {
-        msg <- paste0("Spark not found in SPARK_HOME: ", sparkHome)
-        message(msg)
+        message("Spark not found in SPARK_HOME: ", sparkHome)
         packageLocalDir <- install.spark()
         packageLocalDir
       } else if (isClientMode(master) || deployMode == "client") {
diff --git a/R/pkg/tests/run-all.R b/R/pkg/tests/run-all.R
index 1d04656ac2594..ab8d1ca019941 100644
--- a/R/pkg/tests/run-all.R
+++ b/R/pkg/tests/run-all.R
@@ -21,4 +21,7 @@ library(SparkR)
 # Turn all warnings into errors
 options("warn" = 2)
 
+# Setup global test environment
+install.spark()
+
 test_package("SparkR")
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index 36a78477dc261..d16526d306d6b 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -44,6 +44,9 @@ library(SparkR)
 
 We use default settings in which it runs in local mode. It auto downloads Spark package in the background if no previous installation is found. For more details about setup, see [Spark Session](#SetupSparkSession).
 
+```{r, include=FALSE}
+install.spark()
+```
 ```{r, message=FALSE, results="hide"}
 sparkR.session()
 ```

From 8ee4ec8121aa47c34ea153a6f47ef5f04004da9a Mon Sep 17 00:00:00 2001
From: Tyson Condie <tcondie@gmail.com>
Date: Tue, 14 Feb 2017 18:50:14 -0800
Subject: [PATCH 0520/1204] [SPARK-19584][SS][DOCS] update structured streaming
 documentation around batch mode

## What changes were proposed in this pull request?

Revision to structured-streaming-kafka-integration.md to reflect new Batch query specification and options.

zsxwing tdas

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Tyson Condie <tcondie@gmail.com>

Closes #16918 from tcondie/kafka-docs.

(cherry picked from commit 447b2b5309251f3ae37857de73c157e59a0d76df)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../structured-streaming-kafka-integration.md | 160 ++++++++++++++++--
 1 file changed, 149 insertions(+), 11 deletions(-)

diff --git a/docs/structured-streaming-kafka-integration.md b/docs/structured-streaming-kafka-integration.md
index 208845fef4dc2..013fc11ff9028 100644
--- a/docs/structured-streaming-kafka-integration.md
+++ b/docs/structured-streaming-kafka-integration.md
@@ -119,6 +119,124 @@ ds3.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
 </div>
 </div>
 
+### Creating a Kafka Source Batch
+If you have a use case that is better suited to batch processing,
+you can create an Dataset/DataFrame for a defined range of offsets.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+{% highlight scala %}
+
+// Subscribe to 1 topic defaults to the earliest and latest offsets
+val ds1 = spark
+  .read
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribe", "topic1")
+  .load()
+ds1.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+  .as[(String, String)]
+
+// Subscribe to multiple topics, specifying explicit Kafka offsets
+val ds2 = spark
+  .read
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribe", "topic1,topic2")
+  .option("startingOffsets", """{"topic1":{"0":23,"1":-2},"topic2":{"0":-2}}""")
+  .option("endingOffsets", """{"topic1":{"0":50,"1":-1},"topic2":{"0":-1}}""")
+  .load()
+ds2.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+  .as[(String, String)]
+
+// Subscribe to a pattern, at the earliest and latest offsets
+val ds3 = spark
+  .read
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribePattern", "topic.*")
+  .option("startingOffsets", "earliest")
+  .option("endingOffsets", "latest")
+  .load()
+ds3.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+  .as[(String, String)]
+
+{% endhighlight %}
+</div>
+<div data-lang="java" markdown="1">
+{% highlight java %}
+
+// Subscribe to 1 topic defaults to the earliest and latest offsets
+Dataset<Row> ds1 = spark
+  .read()
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribe", "topic1")
+  .load();
+ds1.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)");
+
+// Subscribe to multiple topics, specifying explicit Kafka offsets
+Dataset<Row> ds2 = spark
+  .read()
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribe", "topic1,topic2")
+  .option("startingOffsets", "{\"topic1\":{\"0\":23,\"1\":-2},\"topic2\":{\"0\":-2}}")
+  .option("endingOffsets", "{\"topic1\":{\"0\":50,\"1\":-1},\"topic2\":{\"0\":-1}}")
+  .load();
+ds2.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)");
+
+// Subscribe to a pattern, at the earliest and latest offsets
+Dataset<Row> ds3 = spark
+  .read()
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribePattern", "topic.*")
+  .option("startingOffsets", "earliest")
+  .option("endingOffsets", "latest")
+  .load();
+ds3.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)");
+
+{% endhighlight %}
+</div>
+<div data-lang="python" markdown="1">
+{% highlight python %}
+
+# Subscribe to 1 topic defaults to the earliest and latest offsets
+ds1 = spark \
+  .read \
+  .format("kafka") \
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
+  .option("subscribe", "topic1") \
+  .load()
+ds1.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+
+# Subscribe to multiple topics, specifying explicit Kafka offsets
+ds2 = spark \
+  .read \
+  .format("kafka") \
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
+  .option("subscribe", "topic1,topic2") \
+  .option("startingOffsets", """{"topic1":{"0":23,"1":-2},"topic2":{"0":-2}}""") \
+  .option("endingOffsets", """{"topic1":{"0":50,"1":-1},"topic2":{"0":-1}}""") \
+  .load()
+ds2.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+
+# Subscribe to a pattern, at the earliest and latest offsets
+ds3 = spark \
+  .read \
+  .format("kafka") \
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
+  .option("subscribePattern", "topic.*") \
+  .option("startingOffsets", "earliest") \
+  .option("endingOffsets", "latest") \
+  .load()
+ds3.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+
+{% endhighlight %}
+</div>
+</div>
+
 Each row in the source has the following schema:
 <table class="table">
 <tr><th>Column</th><th>Type</th></tr>
@@ -152,7 +270,8 @@ Each row in the source has the following schema:
 </tr>
 </table>
 
-The following options must be set for the Kafka source.
+The following options must be set for the Kafka source
+for both batch and streaming queries.
 
 <table class="table">
 <tr><th>Option</th><th>value</th><th>meaning</th></tr>
@@ -187,50 +306,69 @@ The following options must be set for the Kafka source.
 The following configurations are optional:
 
 <table class="table">
-<tr><th>Option</th><th>value</th><th>default</th><th>meaning</th></tr>
+<tr><th>Option</th><th>value</th><th>default</th><th>query type</th><th>meaning</th></tr>
 <tr>
   <td>startingOffsets</td>
-  <td>earliest, latest, or json string
-  {"topicA":{"0":23,"1":-1},"topicB":{"0":-2}}
+  <td>"earliest", "latest" (streaming only), or json string
+  """ {"topicA":{"0":23,"1":-1},"topicB":{"0":-2}} """
   </td>
-  <td>latest</td>
+  <td>"latest" for streaming, "earliest" for batch</td>
+  <td>streaming and batch</td>
   <td>The start point when a query is started, either "earliest" which is from the earliest offsets,
   "latest" which is just from the latest offsets, or a json string specifying a starting offset for
   each TopicPartition.  In the json, -2 as an offset can be used to refer to earliest, -1 to latest.
-  Note: This only applies when a new Streaming query is started, and that resuming will always pick
-  up from where the query left off. Newly discovered partitions during a query will start at
+  Note: For batch queries, latest (either implicitly or by using -1 in json) is not allowed.
+  For streaming queries, this only applies when a new query is started, and that resuming will
+  always pick up from where the query left off. Newly discovered partitions during a query will start at
   earliest.</td>
 </tr>
+<tr>
+  <td>endingOffsets</td>
+  <td>latest or json string
+  {"topicA":{"0":23,"1":-1},"topicB":{"0":-1}}
+  </td>
+  <td>latest</td>
+  <td>batch query</td>
+  <td>The end point when a batch query is ended, either "latest" which is just referred to the
+  latest, or a json string specifying an ending offset for each TopicPartition.  In the json, -1
+  as an offset can be used to refer to latest, and -2 (earliest) as an offset is not allowed.</td>
+</tr>
 <tr>
   <td>failOnDataLoss</td>
   <td>true or false</td>
   <td>true</td>
-  <td>Whether to fail the query when it's possible that data is lost (e.g., topics are deleted, or 
+  <td>streaming query</td>
+  <td>Whether to fail the query when it's possible that data is lost (e.g., topics are deleted, or
   offsets are out of range). This may be a false alarm. You can disable it when it doesn't work
-  as you expected.</td>
+  as you expected. Batch queries will always fail if it fails to read any data from the provided
+  offsets due to lost data.</td>
 </tr>
 <tr>
   <td>kafkaConsumer.pollTimeoutMs</td>
   <td>long</td>
   <td>512</td>
+  <td>streaming and batch</td>
   <td>The timeout in milliseconds to poll data from Kafka in executors.</td>
 </tr>
 <tr>
   <td>fetchOffset.numRetries</td>
   <td>int</td>
   <td>3</td>
-  <td>Number of times to retry before giving up fatch Kafka latest offsets.</td>
+  <td>streaming and batch</td>
+  <td>Number of times to retry before giving up fetching Kafka offsets.</td>
 </tr>
 <tr>
   <td>fetchOffset.retryIntervalMs</td>
   <td>long</td>
   <td>10</td>
+  <td>streaming and batch</td>
   <td>milliseconds to wait before retrying to fetch Kafka offsets</td>
 </tr>
 <tr>
   <td>maxOffsetsPerTrigger</td>
   <td>long</td>
   <td>none</td>
+  <td>streaming and batch</td>
   <td>Rate limit on maximum number of offsets processed per trigger interval. The specified total number of offsets will be proportionally split across topicPartitions of different volume.</td>
 </tr>
 </table>
@@ -246,7 +384,7 @@ Note that the following Kafka params cannot be set and the Kafka source will thr
  where to start instead. Structured Streaming manages which offsets are consumed internally, rather 
  than rely on the kafka Consumer to do it. This will ensure that no data is missed when when new 
  topics/partitions are dynamically subscribed. Note that `startingOffsets` only applies when a new
- Streaming query is started, and that resuming will always pick up from where the query left off.
+ streaming query is started, and that resuming will always pick up from where the query left off.
 - **key.deserializer**: Keys are always deserialized as byte arrays with ByteArrayDeserializer. Use 
  DataFrame operations to explicitly deserialize the keys.
 - **value.deserializer**: Values are always deserialized as byte arrays with ByteArrayDeserializer. 

From 6c35399068f1035fec6d5f909a83a5b1683702e0 Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Wed, 15 Feb 2017 10:45:37 -0800
Subject: [PATCH 0521/1204] [SPARK-19399][SPARKR] Add R coalesce API for
 DataFrame and Column

Add coalesce on DataFrame for down partitioning without shuffle and coalesce on Column

manual, unit tests

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #16739 from felixcheung/rcoalesce.

(cherry picked from commit 671bc08ed502815bfa2254c30d64149402acb0c7)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/NAMESPACE                               |  1 +
 R/pkg/R/DataFrame.R                           | 46 +++++++++++++++++--
 R/pkg/R/RDD.R                                 |  4 +-
 R/pkg/R/functions.R                           | 26 ++++++++++-
 R/pkg/R/generics.R                            |  9 +++-
 R/pkg/inst/tests/testthat/test_rdd.R          |  2 +-
 R/pkg/inst/tests/testthat/test_sparkSQL.R     | 33 +++++++++++--
 .../main/scala/org/apache/spark/rdd/RDD.scala |  3 +-
 python/pyspark/sql/dataframe.py               | 10 +++-
 .../scala/org/apache/spark/sql/Dataset.scala  | 10 +++-
 .../execution/basicPhysicalOperators.scala    | 10 +++-
 11 files changed, 136 insertions(+), 18 deletions(-)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 62a20e6ccae47..6f96b969d1497 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -80,6 +80,7 @@ exportMethods("arrange",
               "as.data.frame",
               "attach",
               "cache",
+              "coalesce",
               "collect",
               "colnames",
               "colnames<-",
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index c4147c515c590..986f1f11cc5bd 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -680,14 +680,53 @@ setMethod("storageLevel",
             storageLevelToString(callJMethod(x@sdf, "storageLevel"))
           })
 
+#' Coalesce
+#'
+#' Returns a new SparkDataFrame that has exactly \code{numPartitions} partitions.
+#' This operation results in a narrow dependency, e.g. if you go from 1000 partitions to 100
+#' partitions, there will not be a shuffle, instead each of the 100 new partitions will claim 10 of
+#' the current partitions. If a larger number of partitions is requested, it will stay at the
+#' current number of partitions.
+#'
+#' However, if you're doing a drastic coalesce on a SparkDataFrame, e.g. to numPartitions = 1,
+#' this may result in your computation taking place on fewer nodes than
+#' you like (e.g. one node in the case of numPartitions = 1). To avoid this,
+#' call \code{repartition}. This will add a shuffle step, but means the
+#' current upstream partitions will be executed in parallel (per whatever
+#' the current partitioning is).
+#'
+#' @param numPartitions the number of partitions to use.
+#'
+#' @family SparkDataFrame functions
+#' @rdname coalesce
+#' @name coalesce
+#' @aliases coalesce,SparkDataFrame-method
+#' @seealso \link{repartition}
+#' @export
+#' @examples
+#'\dontrun{
+#' sparkR.session()
+#' path <- "path/to/file.json"
+#' df <- read.json(path)
+#' newDF <- coalesce(df, 1L)
+#'}
+#' @note coalesce(SparkDataFrame) since 2.1.1
+setMethod("coalesce",
+          signature(x = "SparkDataFrame"),
+          function(x, numPartitions) {
+            stopifnot(is.numeric(numPartitions))
+            sdf <- callJMethod(x@sdf, "coalesce", numToInt(numPartitions))
+            dataFrame(sdf)
+          })
+
 #' Repartition
 #'
 #' The following options for repartition are possible:
 #' \itemize{
-#'  \item{1.} {Return a new SparkDataFrame partitioned by
+#'  \item{1.} {Return a new SparkDataFrame that has exactly \code{numPartitions}.}
+#'  \item{2.} {Return a new SparkDataFrame hash partitioned by
 #'                      the given columns into \code{numPartitions}.}
-#'  \item{2.} {Return a new SparkDataFrame that has exactly \code{numPartitions}.}
-#'  \item{3.} {Return a new SparkDataFrame partitioned by the given column(s),
+#'  \item{3.} {Return a new SparkDataFrame hash partitioned by the given column(s),
 #'                      using \code{spark.sql.shuffle.partitions} as number of partitions.}
 #'}
 #' @param x a SparkDataFrame.
@@ -699,6 +738,7 @@ setMethod("storageLevel",
 #' @rdname repartition
 #' @name repartition
 #' @aliases repartition,SparkDataFrame-method
+#' @seealso \link{coalesce}
 #' @export
 #' @examples
 #'\dontrun{
diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R
index 91bab332c2864..5667b9d788821 100644
--- a/R/pkg/R/RDD.R
+++ b/R/pkg/R/RDD.R
@@ -1028,7 +1028,7 @@ setMethod("repartitionRDD",
           signature(x = "RDD"),
           function(x, numPartitions) {
             if (!is.null(numPartitions) && is.numeric(numPartitions)) {
-              coalesce(x, numPartitions, TRUE)
+              coalesceRDD(x, numPartitions, TRUE)
             } else {
               stop("Please, specify the number of partitions")
             }
@@ -1049,7 +1049,7 @@ setMethod("repartitionRDD",
 #' @rdname coalesce
 #' @aliases coalesce,RDD
 #' @noRd
-setMethod("coalesce",
+setMethod("coalesceRDD",
            signature(x = "RDD", numPartitions = "numeric"),
            function(x, numPartitions, shuffle = FALSE) {
              numPartitions <- numToInt(numPartitions)
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 6ffa0f5481c65..2992baee6809c 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -286,6 +286,28 @@ setMethod("ceil",
             column(jc)
           })
 
+#' Returns the first column that is not NA
+#'
+#' Returns the first column that is not NA, or NA if all inputs are.
+#'
+#' @rdname coalesce
+#' @name coalesce
+#' @family normal_funcs
+#' @export
+#' @aliases coalesce,Column-method
+#' @examples \dontrun{coalesce(df$c, df$d, df$e)}
+#' @note coalesce(Column) since 2.1.1
+setMethod("coalesce",
+          signature(x = "Column"),
+          function(x, ...) {
+            jcols <- lapply(list(x, ...), function (x) {
+              stopifnot(class(x) == "Column")
+              x@jc
+            })
+            jc <- callJStatic("org.apache.spark.sql.functions", "coalesce", jcols)
+            column(jc)
+          })
+
 #' Though scala functions has "col" function, we don't expose it in SparkR
 #' because we don't want to conflict with the "col" function in the R base
 #' package and we also have "column" function exported which is an alias of "col".
@@ -297,7 +319,7 @@ col <- function(x) {
 #' Returns a Column based on the given column name
 #'
 #' Returns a Column based on the given column name.
-#
+#'
 #' @param x Character column name.
 #'
 #' @rdname column
@@ -305,7 +327,7 @@ col <- function(x) {
 #' @family normal_funcs
 #' @export
 #' @aliases column,character-method
-#' @examples \dontrun{column(df)}
+#' @examples \dontrun{column("name")}
 #' @note column since 1.6.0
 setMethod("column",
           signature(x = "character"),
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index c6a324cd561cf..f018ec9e46a67 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -28,7 +28,7 @@ setGeneric("cacheRDD", function(x) { standardGeneric("cacheRDD") })
 # @rdname coalesce
 # @seealso repartition
 # @export
-setGeneric("coalesce", function(x, numPartitions, ...) { standardGeneric("coalesce") })
+setGeneric("coalesceRDD", function(x, numPartitions, ...) { standardGeneric("coalesceRDD") })
 
 # @rdname checkpoint-methods
 # @export
@@ -406,6 +406,13 @@ setGeneric("attach")
 #' @export
 setGeneric("cache", function(x) { standardGeneric("cache") })
 
+#' @rdname coalesce
+#' @param x a Column or a SparkDataFrame.
+#' @param ... additional argument(s). If \code{x} is a Column, additional Columns can be optionally
+#'        provided.
+#' @export
+setGeneric("coalesce", function(x, ...) { standardGeneric("coalesce") })
+
 #' @rdname collect
 #' @export
 setGeneric("collect", function(x, ...) { standardGeneric("collect") })
diff --git a/R/pkg/inst/tests/testthat/test_rdd.R b/R/pkg/inst/tests/testthat/test_rdd.R
index ceb31bd896133..787ef51c501c0 100644
--- a/R/pkg/inst/tests/testthat/test_rdd.R
+++ b/R/pkg/inst/tests/testthat/test_rdd.R
@@ -315,7 +315,7 @@ test_that("repartition/coalesce on RDDs", {
   expect_true(count >= 0 && count <= 4)
 
   # coalesce
-  r3 <- coalesce(rdd, 1)
+  r3 <- coalesceRDD(rdd, 1)
   expect_equal(getNumPartitionsRDD(r3), 1L)
   count <- length(collectPartition(r3, 0L))
   expect_equal(count, 20)
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 418e1ef23c9ad..0447d2470d467 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -725,7 +725,7 @@ test_that("objectFile() works with row serialization", {
   objectPath <- tempfile(pattern = "spark-test", fileext = ".tmp")
   df <- read.json(jsonPath)
   dfRDD <- toRDD(df)
-  saveAsObjectFile(coalesce(dfRDD, 1L), objectPath)
+  saveAsObjectFile(coalesceRDD(dfRDD, 1L), objectPath)
   objectIn <- objectFile(sc, objectPath)
 
   expect_is(objectIn, "RDD")
@@ -1228,7 +1228,8 @@ test_that("column functions", {
   c16 <- is.nan(c) + isnan(c) + isNaN(c)
   c17 <- cov(c, c1) + cov("c", "c1") + covar_samp(c, c1) + covar_samp("c", "c1")
   c18 <- covar_pop(c, c1) + covar_pop("c", "c1")
-  c19 <- spark_partition_id()
+  c19 <- spark_partition_id() + coalesce(c) + coalesce(c1, c2, c3)
+  c20 <- to_timestamp(c) + to_timestamp(c, "yyyy") + to_date(c, "yyyy")
 
   # Test if base::is.nan() is exposed
   expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE))
@@ -2481,15 +2482,18 @@ test_that("repartition by columns on DataFrame", {
     ("Please, specify the number of partitions and/or a column\\(s\\)", retError), TRUE)
 
   # repartition by column and number of partitions
-  actual <- repartition(df, 3L, col = df$"a")
+  actual <- repartition(df, 3, col = df$"a")
 
-  # since we cannot access the number of partitions from dataframe, checking
-  # that at least the dimensions are identical
+  # Checking that at least the dimensions are identical
   expect_identical(dim(df), dim(actual))
+  expect_equal(getNumPartitions(actual), 3L)
 
   # repartition by number of partitions
   actual <- repartition(df, 13L)
   expect_identical(dim(df), dim(actual))
+  expect_equal(getNumPartitions(actual), 13L)
+
+  expect_equal(getNumPartitions(coalesce(actual, 1L)), 1L)
 
   # a test case with a column and dapply
   schema <-  structType(structField("a", "integer"), structField("avg", "double"))
@@ -2505,6 +2509,25 @@ test_that("repartition by columns on DataFrame", {
   expect_equal(nrow(df1), 2)
 })
 
+test_that("coalesce, repartition, numPartitions", {
+  df <- as.DataFrame(cars, numPartitions = 5)
+  expect_equal(getNumPartitions(df), 5)
+  expect_equal(getNumPartitions(coalesce(df, 3)), 3)
+  expect_equal(getNumPartitions(coalesce(df, 6)), 5)
+
+  df1 <- coalesce(df, 3)
+  expect_equal(getNumPartitions(df1), 3)
+  expect_equal(getNumPartitions(coalesce(df1, 6)), 5)
+  expect_equal(getNumPartitions(coalesce(df1, 4)), 4)
+  expect_equal(getNumPartitions(coalesce(df1, 2)), 2)
+
+  df2 <- repartition(df1, 10)
+  expect_equal(getNumPartitions(df2), 10)
+  expect_equal(getNumPartitions(coalesce(df2, 13)), 5)
+  expect_equal(getNumPartitions(coalesce(df2, 7)), 5)
+  expect_equal(getNumPartitions(coalesce(df2, 3)), 3)
+})
+
 test_that("gapply() and gapplyCollect() on a DataFrame", {
   df <- createDataFrame (
     list(list(1L, 1, "1", 0.1), list(1L, 2, "1", 0.2), list(3L, 3, "3", 0.3)),
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 66a773dcf60f5..199a3770ac4ad 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -423,7 +423,8 @@ abstract class RDD[T: ClassTag](
    *
    * This results in a narrow dependency, e.g. if you go from 1000 partitions
    * to 100 partitions, there will not be a shuffle, instead each of the 100
-   * new partitions will claim 10 of the current partitions.
+   * new partitions will claim 10 of the current partitions. If a larger number
+   * of partitions is requested, it will stay at the current number of partitions.
    *
    * However, if you're doing a drastic coalesce, e.g. to numPartitions = 1,
    * this may result in your computation taking place on fewer nodes than
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 10e42d0f9d322..18ce130f7e5b6 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -516,7 +516,15 @@ def coalesce(self, numPartitions):
         Similar to coalesce defined on an :class:`RDD`, this operation results in a
         narrow dependency, e.g. if you go from 1000 partitions to 100 partitions,
         there will not be a shuffle, instead each of the 100 new partitions will
-        claim 10 of the current partitions.
+        claim 10 of the current partitions. If a larger number of partitions is requested,
+        it will stay at the current number of partitions.
+
+        However, if you're doing a drastic coalesce, e.g. to numPartitions = 1,
+        this may result in your computation taking place on fewer nodes than
+        you like (e.g. one node in the case of numPartitions = 1). To avoid this,
+        you can call repartition(). This will add a shuffle step, but means the
+        current upstream partitions will be executed in parallel (per whatever
+        the current partitioning is).
 
         >>> df.coalesce(1).rdd.getNumPartitions()
         1
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 4889548221af7..563bfa8a84ea4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -2453,7 +2453,15 @@ class Dataset[T] private[sql](
    * Returns a new Dataset that has exactly `numPartitions` partitions.
    * Similar to coalesce defined on an `RDD`, this operation results in a narrow dependency, e.g.
    * if you go from 1000 partitions to 100 partitions, there will not be a shuffle, instead each of
-   * the 100 new partitions will claim 10 of the current partitions.
+   * the 100 new partitions will claim 10 of the current partitions.  If a larger number of
+   * partitions is requested, it will stay at the current number of partitions.
+   *
+   * However, if you're doing a drastic coalesce, e.g. to numPartitions = 1,
+   * this may result in your computation taking place on fewer nodes than
+   * you like (e.g. one node in the case of numPartitions = 1). To avoid this,
+   * you can call repartition. This will add a shuffle step, but means the
+   * current upstream partitions will be executed in parallel (per whatever
+   * the current partitioning is).
    *
    * @group typedrel
    * @since 1.6.0
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index e6f1de5cb05b8..dfdaaaae872e4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -495,7 +495,15 @@ case class UnionExec(children: Seq[SparkPlan]) extends SparkPlan {
  * Physical plan for returning a new RDD that has exactly `numPartitions` partitions.
  * Similar to coalesce defined on an [[RDD]], this operation results in a narrow dependency, e.g.
  * if you go from 1000 partitions to 100 partitions, there will not be a shuffle, instead each of
- * the 100 new partitions will claim 10 of the current partitions.
+ * the 100 new partitions will claim 10 of the current partitions.  If a larger number of partitions
+ * is requested, it will stay at the current number of partitions.
+ *
+ * However, if you're doing a drastic coalesce, e.g. to numPartitions = 1,
+ * this may result in your computation taking place on fewer nodes than
+ * you like (e.g. one node in the case of numPartitions = 1). To avoid this,
+ * you see ShuffleExchange. This will add a shuffle step, but means the
+ * current upstream partitions will be executed in parallel (per whatever
+ * the current partitioning is).
  */
 case class CoalesceExec(numPartitions: Int, child: SparkPlan) extends UnaryExecNode {
   override def output: Seq[Attribute] = child.output

From 88c43f4fb5ea042a119819c11a5cdbe225095c54 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 15 Feb 2017 16:21:43 -0800
Subject: [PATCH 0522/1204] [SPARK-19599][SS] Clean up HDFSMetadataLog

## What changes were proposed in this pull request?

SPARK-19464 removed support for Hadoop 2.5 and earlier, so we can do some cleanup for HDFSMetadataLog.

This PR includes the following changes:
- ~~Remove the workaround codes for HADOOP-10622.~~ Unfortunately, there is another issue [HADOOP-14084](https://issues.apache.org/jira/browse/HADOOP-14084) that prevents us from removing the workaround codes.
- Remove unnecessary `writer: (T, OutputStream) => Unit` and just call `serialize` directly.
- Remove catching FileNotFoundException.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16932 from zsxwing/metadata-cleanup.

(cherry picked from commit 21b4ba2d6f21a9759af879471715c123073bd67a)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../execution/streaming/HDFSMetadataLog.scala | 39 ++++++++-----------
 .../execution/streaming/StreamExecution.scala |  4 +-
 2 files changed, 19 insertions(+), 24 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
index 1b413528935f6..e6a48a06a03f5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
@@ -114,15 +114,18 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
           case ut: UninterruptibleThread =>
             // When using a local file system, "writeBatch" must be called on a
             // [[org.apache.spark.util.UninterruptibleThread]] so that interrupts can be disabled
-            // while writing the batch file. This is because there is a potential dead-lock in
-            // Hadoop "Shell.runCommand" before 2.5.0 (HADOOP-10622). If the thread running
-            // "Shell.runCommand" is interrupted, then the thread can get deadlocked. In our case,
-            // `writeBatch` creates a file using HDFS API and will call "Shell.runCommand" to set
-            // the file permission if using the local file system, and can get deadlocked if the
-            // stream execution thread is stopped by interrupt. Hence, we make sure that
-            // "writeBatch" is called on [[UninterruptibleThread]] which allows us to disable
-            // interrupts here. Also see SPARK-14131.
-            ut.runUninterruptibly { writeBatch(batchId, metadata, serialize) }
+            // while writing the batch file.
+            //
+            // This is because Hadoop "Shell.runCommand" swallows InterruptException (HADOOP-14084).
+            // If the user tries to stop a query, and the thread running "Shell.runCommand" is
+            // interrupted, then InterruptException will be dropped and the query will be still
+            // running. (Note: `writeBatch` creates a file using HDFS APIs and will call
+            // "Shell.runCommand" to set the file permission if using the local file system)
+            //
+            // Hence, we make sure that "writeBatch" is called on [[UninterruptibleThread]] which
+            // allows us to disable interrupts here, in order to propagate the interrupt state
+            // correctly. Also see SPARK-19599.
+            ut.runUninterruptibly { writeBatch(batchId, metadata) }
           case _ =>
             throw new IllegalStateException(
               "HDFSMetadataLog.add() on a local file system must be executed on " +
@@ -132,20 +135,19 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
         // For a distributed file system, such as HDFS or S3, if the network is broken, write
         // operations may just hang until timeout. We should enable interrupts to allow stopping
         // the query fast.
-        writeBatch(batchId, metadata, serialize)
+        writeBatch(batchId, metadata)
       }
       true
     }
   }
 
-  def writeTempBatch(metadata: T, writer: (T, OutputStream) => Unit = serialize): Option[Path] = {
-    var nextId = 0
+  def writeTempBatch(metadata: T): Option[Path] = {
     while (true) {
       val tempPath = new Path(metadataPath, s".${UUID.randomUUID.toString}.tmp")
       try {
         val output = fileManager.create(tempPath)
         try {
-          writer(metadata, output)
+          serialize(metadata, output)
           return Some(tempPath)
         } finally {
           IOUtils.closeQuietly(output)
@@ -164,7 +166,6 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
           // big problem because it requires the attacker must have the permission to write the
           // metadata path. In addition, the old Streaming also have this issue, people can create
           // malicious checkpoint files to crash a Streaming application too.
-          nextId += 1
       }
     }
     None
@@ -176,8 +177,8 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
    * There may be multiple [[HDFSMetadataLog]] using the same metadata path. Although it is not a
    * valid behavior, we still need to prevent it from destroying the files.
    */
-  private def writeBatch(batchId: Long, metadata: T, writer: (T, OutputStream) => Unit): Unit = {
-    val tempPath = writeTempBatch(metadata, writer).getOrElse(
+  private def writeBatch(batchId: Long, metadata: T): Unit = {
+    val tempPath = writeTempBatch(metadata).getOrElse(
       throw new IllegalStateException(s"Unable to create temp batch file $batchId"))
     try {
       // Try to commit the batch
@@ -195,12 +196,6 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
         // So throw an exception to tell the user this is not a valid behavior.
         throw new ConcurrentModificationException(
           s"Multiple HDFSMetadataLog are using $path", e)
-      case e: FileNotFoundException =>
-        // Sometimes, "create" will succeed when multiple writers are calling it at the same
-        // time. However, only one writer can call "rename" successfully, others will get
-        // FileNotFoundException because the first writer has removed it.
-        throw new ConcurrentModificationException(
-          s"Multiple HDFSMetadataLog are using $path", e)
     } finally {
       fileManager.delete(tempPath)
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index a8ec73e00b726..12a75a6fe5c95 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -179,8 +179,8 @@ class StreamExecution(
 
   /**
    * The thread that runs the micro-batches of this stream. Note that this thread must be
-   * [[org.apache.spark.util.UninterruptibleThread]] to avoid potential deadlocks in using
-   * [[HDFSMetadataLog]]. See SPARK-14131 for more details.
+   * [[org.apache.spark.util.UninterruptibleThread]] to avoid swallowing `InterruptException` when
+   * using [[HDFSMetadataLog]]. See SPARK-19599 for more details.
    */
   val microBatchThread =
     new StreamExecutionThread(s"stream execution thread for $prettyIdString") {

From b9ab4c0e983df463232f1adbe6e5982b0d7d497d Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Wed, 15 Feb 2017 14:41:15 -0800
Subject: [PATCH 0523/1204] [SPARK-19604][TESTS] Log the start of every Python
 test

## What changes were proposed in this pull request?
Right now, we only have info level log after we finish the tests of a Python test file. We should also log the start of a test. So, if a test is hanging, we can tell which test file is running.

## How was this patch tested?
This is a change for python tests.

Author: Yin Huai <yhuai@databricks.com>

Closes #16935 from yhuai/SPARK-19604.

(cherry picked from commit f6c3bba22501ee7753d85c6e51ffe851d43869c1)
Signed-off-by: Yin Huai <yhuai@databricks.com>
---
 python/run-tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/run-tests.py b/python/run-tests.py
index 38b3bb84c10be..53a0aef229b08 100755
--- a/python/run-tests.py
+++ b/python/run-tests.py
@@ -72,7 +72,7 @@ def run_individual_python_test(test_name, pyspark_python):
         'PYSPARK_PYTHON': which(pyspark_python),
         'PYSPARK_DRIVER_PYTHON': which(pyspark_python)
     })
-    LOGGER.debug("Starting test(%s): %s", pyspark_python, test_name)
+    LOGGER.info("Starting test(%s): %s", pyspark_python, test_name)
     start_time = time.time()
     try:
         per_test_output = tempfile.TemporaryFile()

From db7adb61bebb5e9a74f2e3f8eba481615ff8c31a Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 15 Feb 2017 20:51:33 -0800
Subject: [PATCH 0524/1204] [SPARK-19603][SS] Fix StreamingQuery explain
 command

## What changes were proposed in this pull request?

`StreamingQuery.explain` doesn't show the correct streaming physical plan right now because `ExplainCommand` receives a runtime batch plan and its `logicalPlan.isStreaming` is always false.

This PR adds `streaming` parameter to `ExplainCommand` to allow `StreamExecution` to specify that it's a streaming plan.

Examples of the explain outputs:

- streaming DataFrame.explain()
```
== Physical Plan ==
*HashAggregate(keys=[value#518], functions=[count(1)])
+- StateStoreSave [value#518], OperatorStateId(<unknown>,0,0), Append, 0
   +- *HashAggregate(keys=[value#518], functions=[merge_count(1)])
      +- StateStoreRestore [value#518], OperatorStateId(<unknown>,0,0)
         +- *HashAggregate(keys=[value#518], functions=[merge_count(1)])
            +- Exchange hashpartitioning(value#518, 5)
               +- *HashAggregate(keys=[value#518], functions=[partial_count(1)])
                  +- *SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true) AS value#518]
                     +- *MapElements <function1>, obj#517: java.lang.String
                        +- *DeserializeToObject value#513.toString, obj#516: java.lang.String
                           +- StreamingRelation MemoryStream[value#513], [value#513]
```

- StreamingQuery.explain(extended = false)
```
== Physical Plan ==
*HashAggregate(keys=[value#518], functions=[count(1)])
+- StateStoreSave [value#518], OperatorStateId(...,0,0), Complete, 0
   +- *HashAggregate(keys=[value#518], functions=[merge_count(1)])
      +- StateStoreRestore [value#518], OperatorStateId(...,0,0)
         +- *HashAggregate(keys=[value#518], functions=[merge_count(1)])
            +- Exchange hashpartitioning(value#518, 5)
               +- *HashAggregate(keys=[value#518], functions=[partial_count(1)])
                  +- *SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true) AS value#518]
                     +- *MapElements <function1>, obj#517: java.lang.String
                        +- *DeserializeToObject value#543.toString, obj#516: java.lang.String
                           +- LocalTableScan [value#543]
```

- StreamingQuery.explain(extended = true)
```
== Parsed Logical Plan ==
Aggregate [value#518], [value#518, count(1) AS count(1)#524L]
+- SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true) AS value#518]
   +- MapElements <function1>, class java.lang.String, [StructField(value,StringType,true)], obj#517: java.lang.String
      +- DeserializeToObject cast(value#543 as string).toString, obj#516: java.lang.String
         +- LocalRelation [value#543]

== Analyzed Logical Plan ==
value: string, count(1): bigint
Aggregate [value#518], [value#518, count(1) AS count(1)#524L]
+- SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true) AS value#518]
   +- MapElements <function1>, class java.lang.String, [StructField(value,StringType,true)], obj#517: java.lang.String
      +- DeserializeToObject cast(value#543 as string).toString, obj#516: java.lang.String
         +- LocalRelation [value#543]

== Optimized Logical Plan ==
Aggregate [value#518], [value#518, count(1) AS count(1)#524L]
+- SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true) AS value#518]
   +- MapElements <function1>, class java.lang.String, [StructField(value,StringType,true)], obj#517: java.lang.String
      +- DeserializeToObject value#543.toString, obj#516: java.lang.String
         +- LocalRelation [value#543]

== Physical Plan ==
*HashAggregate(keys=[value#518], functions=[count(1)], output=[value#518, count(1)#524L])
+- StateStoreSave [value#518], OperatorStateId(...,0,0), Complete, 0
   +- *HashAggregate(keys=[value#518], functions=[merge_count(1)], output=[value#518, count#530L])
      +- StateStoreRestore [value#518], OperatorStateId(...,0,0)
         +- *HashAggregate(keys=[value#518], functions=[merge_count(1)], output=[value#518, count#530L])
            +- Exchange hashpartitioning(value#518, 5)
               +- *HashAggregate(keys=[value#518], functions=[partial_count(1)], output=[value#518, count#530L])
                  +- *SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true) AS value#518]
                     +- *MapElements <function1>, obj#517: java.lang.String
                        +- *DeserializeToObject value#543.toString, obj#516: java.lang.String
                           +- LocalTableScan [value#543]
```

## How was this patch tested?

The updated unit test.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16934 from zsxwing/SPARK-19603.

(cherry picked from commit fc02ef95cdfc226603b52dc579b7133631f7143d)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../sql/execution/command/commands.scala      | 28 +++++++++++++++++--
 .../execution/streaming/StreamExecution.scala |  7 ++---
 .../spark/sql/streaming/StreamSuite.scala     | 28 ++++++++++++++++---
 3 files changed, 52 insertions(+), 11 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
index 52d8dc22a2d4d..58f507119325d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
@@ -86,18 +86,18 @@ case class ExecutedCommandExec(cmd: RunnableCommand) extends SparkPlan {
  * }}}
  *
  * @param logicalPlan plan to explain
- * @param output output schema
  * @param extended whether to do extended explain or not
  * @param codegen whether to output generated code from whole-stage codegen or not
  */
 case class ExplainCommand(
     logicalPlan: LogicalPlan,
-    override val output: Seq[Attribute] =
-      Seq(AttributeReference("plan", StringType, nullable = true)()),
     extended: Boolean = false,
     codegen: Boolean = false)
   extends RunnableCommand {
 
+  override val output: Seq[Attribute] =
+    Seq(AttributeReference("plan", StringType, nullable = true)())
+
   // Run through the optimizer to generate the physical plan.
   override def run(sparkSession: SparkSession): Seq[Row] = try {
     val queryExecution =
@@ -121,3 +121,25 @@ case class ExplainCommand(
     ("Error occurred during query planning: \n" + cause.getMessage).split("\n").map(Row(_))
   }
 }
+
+/** An explain command for users to see how a streaming batch is executed. */
+case class StreamingExplainCommand(
+    queryExecution: IncrementalExecution,
+    extended: Boolean) extends RunnableCommand {
+
+  override val output: Seq[Attribute] =
+    Seq(AttributeReference("plan", StringType, nullable = true)())
+
+  // Run through the optimizer to generate the physical plan.
+  override def run(sparkSession: SparkSession): Seq[Row] = try {
+    val outputString =
+      if (extended) {
+        queryExecution.toString
+      } else {
+        queryExecution.simpleString
+      }
+    Seq(Row(outputString))
+  } catch { case cause: TreeNodeException[_] =>
+    ("Error occurred during query planning: \n" + cause.getMessage).split("\n").map(Row(_))
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 12a75a6fe5c95..9346a6769d4f5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.execution.streaming
 
-import java.io.IOException
 import java.util.UUID
 import java.util.concurrent.{CountDownLatch, TimeUnit}
 import java.util.concurrent.locks.ReentrantLock
@@ -33,7 +32,7 @@ import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, CurrentBatchTimestamp, CurrentDate, CurrentTimestamp}
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.execution.QueryExecution
-import org.apache.spark.sql.execution.command.ExplainCommand
+import org.apache.spark.sql.execution.command.StreamingExplainCommand
 import org.apache.spark.sql.streaming._
 import org.apache.spark.util.{Clock, UninterruptibleThread, Utils}
 
@@ -162,7 +161,7 @@ class StreamExecution(
   private var state: State = INITIALIZING
 
   @volatile
-  var lastExecution: QueryExecution = _
+  var lastExecution: IncrementalExecution = _
 
   /** Holds the most recent input data for each source. */
   protected var newData: Map[Source, DataFrame] = _
@@ -673,7 +672,7 @@ class StreamExecution(
     if (lastExecution == null) {
       "No physical plan. Waiting for data."
     } else {
-      val explain = ExplainCommand(lastExecution.logical, extended = extended)
+      val explain = StreamingExplainCommand(lastExecution, extended = extended)
       sparkSession.sessionState.executePlan(explain).executedPlan.executeCollect()
         .map(_.getString(0)).mkString("\n")
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
index f31dc8add48d6..0296a2ade3459 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -22,7 +22,9 @@ import scala.util.control.ControlThrowable
 
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
+import org.apache.spark.sql.execution.command.ExplainCommand
 import org.apache.spark.sql.execution.streaming._
+import org.apache.spark.sql.functions._
 import org.apache.spark.sql.sources.StreamSourceProvider
 import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
 
@@ -277,10 +279,24 @@ class StreamSuite extends StreamTest {
 
   test("explain") {
     val inputData = MemoryStream[String]
-    val df = inputData.toDS().map(_ + "foo")
-    // Test `explain` not throwing errors
-    df.explain()
-    val q = df.writeStream.queryName("memory_explain").format("memory").start()
+    val df = inputData.toDS().map(_ + "foo").groupBy("value").agg(count("*"))
+
+    // Test `df.explain`
+    val explain = ExplainCommand(df.queryExecution.logical, extended = false)
+    val explainString =
+      spark.sessionState
+        .executePlan(explain)
+        .executedPlan
+        .executeCollect()
+        .map(_.getString(0))
+        .mkString("\n")
+    assert(explainString.contains("StateStoreRestore"))
+    assert(explainString.contains("StreamingRelation"))
+    assert(!explainString.contains("LocalTableScan"))
+
+    // Test StreamingQuery.display
+    val q = df.writeStream.queryName("memory_explain").outputMode("complete").format("memory")
+      .start()
       .asInstanceOf[StreamingQueryWrapper]
       .streamingQuery
     try {
@@ -294,12 +310,16 @@ class StreamSuite extends StreamTest {
       // `extended = false` only displays the physical plan.
       assert("LocalRelation".r.findAllMatchIn(explainWithoutExtended).size === 0)
       assert("LocalTableScan".r.findAllMatchIn(explainWithoutExtended).size === 1)
+      // Use "StateStoreRestore" to verify that it does output a streaming physical plan
+      assert(explainWithoutExtended.contains("StateStoreRestore"))
 
       val explainWithExtended = q.explainInternal(true)
       // `extended = true` displays 3 logical plans (Parsed/Optimized/Optimized) and 1 physical
       // plan.
       assert("LocalRelation".r.findAllMatchIn(explainWithExtended).size === 3)
       assert("LocalTableScan".r.findAllMatchIn(explainWithExtended).size === 1)
+      // Use "StateStoreRestore" to verify that it does output a streaming physical plan
+      assert(explainWithExtended.contains("StateStoreRestore"))
     } finally {
       q.stop()
     }

From 252dd05f0d883bc7d4419308fe71bd817e6c814d Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Wed, 15 Feb 2017 21:31:36 -0800
Subject: [PATCH 0525/1204] [SPARK-19399][SPARKR][BACKPORT-2.1] fix tests
 broken by merge

## What changes were proposed in this pull request?

fix test broken by git merge for #16739

## How was this patch tested?

manual

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #16950 from felixcheung/fixrtest.
---
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 0447d2470d467..d9dd0f3e14de8 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1229,7 +1229,6 @@ test_that("column functions", {
   c17 <- cov(c, c1) + cov("c", "c1") + covar_samp(c, c1) + covar_samp("c", "c1")
   c18 <- covar_pop(c, c1) + covar_pop("c", "c1")
   c19 <- spark_partition_id() + coalesce(c) + coalesce(c1, c2, c3)
-  c20 <- to_timestamp(c) + to_timestamp(c, "yyyy") + to_date(c, "yyyy")
 
   # Test if base::is.nan() is exposed
   expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE))

From 55958bcd11d13d4e2da59ba2b7895c890e26a9e7 Mon Sep 17 00:00:00 2001
From: Stan Zhai <zhaishidan@haizhi.com>
Date: Fri, 17 Feb 2017 15:11:06 +0000
Subject: [PATCH 0526/1204] [SPARK-19622][WEBUI] Fix a http error in a paged
 table when using a `Go` button to search.

## What changes were proposed in this pull request?

The search function of paged table is not available because of we don't skip the hash data of the reqeust path.

![](https://issues.apache.org/jira/secure/attachment/12852996/screenshot-1.png)

## How was this patch tested?

Tested manually with my browser.

Author: Stan Zhai <zhaishidan@haizhi.com>

Closes #16953 from stanzhai/fix-webui-paged-table.

(cherry picked from commit 021062af099d06b4b0095c677b3a81d21f867a9d)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 core/src/main/scala/org/apache/spark/ui/PagedTable.scala | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ui/PagedTable.scala b/core/src/main/scala/org/apache/spark/ui/PagedTable.scala
index 2a7c16b04bf7f..79974df2603fd 100644
--- a/core/src/main/scala/org/apache/spark/ui/PagedTable.scala
+++ b/core/src/main/scala/org/apache/spark/ui/PagedTable.scala
@@ -175,13 +175,14 @@ private[ui] trait PagedTable[T] {
 
       val hiddenFormFields = {
         if (goButtonFormPath.contains('?')) {
-          val querystring = goButtonFormPath.split("\\?", 2)(1)
+          val queryString = goButtonFormPath.split("\\?", 2)(1)
+          val search = queryString.split("#")(0)
           Splitter
             .on('&')
             .trimResults()
             .omitEmptyStrings()
             .withKeyValueSeparator("=")
-            .split(querystring)
+            .split(search)
             .asScala
             .filterKeys(_ != pageSizeFormField)
             .filterKeys(_ != prevPageSizeFormField)

From 6e3abed8f1837e365cc5615f6911bbc64b0254bc Mon Sep 17 00:00:00 2001
From: Davies Liu <davies@databricks.com>
Date: Fri, 17 Feb 2017 09:38:06 -0800
Subject: [PATCH 0527/1204] [SPARK-19500] [SQL] Fix off-by-one bug in
 BytesToBytesMap

## What changes were proposed in this pull request?

Radix sort require that half of array as free (as temporary space), so we use 0.5 as the scale factor to make sure that BytesToBytesMap will not have more items than 1/2 of capacity. Turned out this is not true, the current implementation of append() could leave 1 more item than the threshold (1/2 of capacity) in the array, which break the requirement of radix sort (fail the assert in 2.2, or fail to insert into InMemorySorter in 2.1).

This PR fix the off-by-one bug in BytesToBytesMap.

This PR also fix a bug that the array will never grow if it fail to grow once (stay as initial capacity), introduced by #15722 .

## How was this patch tested?

Added regression test.

Author: Davies Liu <davies@databricks.com>

Closes #16844 from davies/off_by_one.

(cherry picked from commit 3d0c3af0a76757c20e429c38efa4f14a15c9097a)
Signed-off-by: Davies Liu <davies.liu@gmail.com>
---
 .../spark/unsafe/map/BytesToBytesMap.java     |  5 ++-
 .../UnsafeFixedWidthAggregationMapSuite.scala | 40 +++++++++++++++++++
 2 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
index 44120e591f2fb..4bef21b6b4e4d 100644
--- a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
+++ b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
@@ -698,7 +698,7 @@ public boolean append(Object kbase, long koff, int klen, Object vbase, long voff
       if (numKeys == MAX_CAPACITY
         // The map could be reused from last spill (because of no enough memory to grow),
         // then we don't try to grow again if hit the `growthThreshold`.
-        || !canGrowArray && numKeys > growthThreshold) {
+        || !canGrowArray && numKeys >= growthThreshold) {
         return false;
       }
 
@@ -742,7 +742,7 @@ public boolean append(Object kbase, long koff, int klen, Object vbase, long voff
         longArray.set(pos * 2 + 1, keyHashcode);
         isDefined = true;
 
-        if (numKeys > growthThreshold && longArray.size() < MAX_CAPACITY) {
+        if (numKeys >= growthThreshold && longArray.size() < MAX_CAPACITY) {
           try {
             growAndRehash();
           } catch (OutOfMemoryError oom) {
@@ -911,6 +911,7 @@ public void reset() {
       freePage(dataPage);
     }
     allocate(initialCapacity);
+    canGrowArray = true;
     currentPage = null;
     pageCursor = 0;
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala
index c1555114e8b3e..6cf18de0cc768 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala
@@ -342,4 +342,44 @@ class UnsafeFixedWidthAggregationMapSuite
     }
   }
 
+  testWithMemoryLeakDetection("convert to external sorter after fail to grow (SPARK-19500)") {
+    val pageSize = 4096000
+    val map = new UnsafeFixedWidthAggregationMap(
+      emptyAggregationBuffer,
+      aggBufferSchema,
+      groupKeySchema,
+      taskMemoryManager,
+      128, // initial capacity
+      pageSize,
+      false // disable perf metrics
+    )
+
+    val rand = new Random(42)
+    for (i <- 1 to 63) {
+      val str = rand.nextString(1024)
+      val buf = map.getAggregationBuffer(InternalRow(UTF8String.fromString(str)))
+      buf.setInt(0, str.length)
+    }
+    // Simulate running out of space
+    memoryManager.limit(0)
+    var str = rand.nextString(1024)
+    var buf = map.getAggregationBuffer(InternalRow(UTF8String.fromString(str)))
+    assert(buf != null)
+    str = rand.nextString(1024)
+    buf = map.getAggregationBuffer(InternalRow(UTF8String.fromString(str)))
+    assert(buf == null)
+
+    // Convert the map into a sorter. This used to fail before the fix for SPARK-10474
+    // because we would try to acquire space for the in-memory sorter pointer array before
+    // actually releasing the pages despite having spilled all of them.
+    var sorter: UnsafeKVExternalSorter = null
+    try {
+      sorter = map.destructAndCreateExternalSorter()
+      map.free()
+    } finally {
+      if (sorter != null) {
+        sorter.cleanupResources()
+      }
+    }
+  }
 }

From b083ec5115f53a79ac54b85024c358510a03a459 Mon Sep 17 00:00:00 2001
From: Roberto Agostino Vitillo <ra.vitillo@gmail.com>
Date: Fri, 17 Feb 2017 11:43:57 -0800
Subject: [PATCH 0528/1204] [SPARK-19517][SS] KafkaSource fails to initialize
 partition offsets

## What changes were proposed in this pull request?

This patch fixes a bug in `KafkaSource` with the (de)serialization of the length of the JSON string that contains the initial partition offsets.

## How was this patch tested?

I ran the test suite for spark-sql-kafka-0-10.

Author: Roberto Agostino Vitillo <ra.vitillo@gmail.com>

Closes #16857 from vitillo/kafka_source_fix.
---
 dev/.rat-excludes                             |   1 +
 .../spark/sql/kafka010/KafkaSource.scala      |  32 ++++--
 ...ka-source-initial-offset-version-2.1.0.bin |   1 +
 .../spark/sql/kafka010/KafkaSourceSuite.scala | 104 ++++++++++++++++++
 4 files changed, 131 insertions(+), 7 deletions(-)
 create mode 100644 external/kafka-0-10-sql/src/test/resources/kafka-source-initial-offset-version-2.1.0.bin

diff --git a/dev/.rat-excludes b/dev/.rat-excludes
index 6be1c72bc6cfb..17c0c8e331470 100644
--- a/dev/.rat-excludes
+++ b/dev/.rat-excludes
@@ -103,3 +103,4 @@ org.apache.spark.scheduler.ExternalClusterManager
 org.apache.spark.deploy.yarn.security.ServiceCredentialProvider
 spark-warehouse
 structured-streaming/*
+kafka-source-initial-offset-version-2.1.0.bin
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
index 02b23111af788..04f785d75d9e6 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
@@ -21,6 +21,7 @@ import java.{util => ju}
 import java.io._
 import java.nio.charset.StandardCharsets
 
+import org.apache.commons.io.IOUtils
 import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.SparkContext
@@ -97,16 +98,31 @@ private[kafka010] class KafkaSource(
     val metadataLog =
       new HDFSMetadataLog[KafkaSourceOffset](sqlContext.sparkSession, metadataPath) {
         override def serialize(metadata: KafkaSourceOffset, out: OutputStream): Unit = {
-          val bytes = metadata.json.getBytes(StandardCharsets.UTF_8)
-          out.write(bytes.length)
-          out.write(bytes)
+          out.write(0) // A zero byte is written to support Spark 2.1.0 (SPARK-19517)
+          val writer = new BufferedWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8))
+          writer.write(VERSION)
+          writer.write(metadata.json)
+          writer.flush
         }
 
         override def deserialize(in: InputStream): KafkaSourceOffset = {
-          val length = in.read()
-          val bytes = new Array[Byte](length)
-          in.read(bytes)
-          KafkaSourceOffset(SerializedOffset(new String(bytes, StandardCharsets.UTF_8)))
+          in.read() // A zero byte is read to support Spark 2.1.0 (SPARK-19517)
+          val content = IOUtils.toString(new InputStreamReader(in, StandardCharsets.UTF_8))
+          // HDFSMetadataLog guarantees that it never creates a partial file.
+          assert(content.length != 0)
+          if (content(0) == 'v') {
+            if (content.startsWith(VERSION)) {
+              KafkaSourceOffset(SerializedOffset(content.substring(VERSION.length)))
+            } else {
+              val versionInFile = content.substring(0, content.indexOf("\n"))
+              throw new IllegalStateException(
+                s"Unsupported format. Expected version is ${VERSION.stripLineEnd} " +
+                  s"but was $versionInFile. Please upgrade your Spark.")
+            }
+          } else {
+            // The log was generated by Spark 2.1.0
+            KafkaSourceOffset(SerializedOffset(content))
+          }
         }
       }
 
@@ -335,6 +351,8 @@ private[kafka010] object KafkaSource {
       | source option "failOnDataLoss" to "false".
     """.stripMargin
 
+  private val VERSION = "v1\n"
+
   def getSortedExecutorList(sc: SparkContext): Array[String] = {
     val bm = sc.env.blockManager
     bm.master.getPeers(bm.blockManagerId).toArray
diff --git a/external/kafka-0-10-sql/src/test/resources/kafka-source-initial-offset-version-2.1.0.bin b/external/kafka-0-10-sql/src/test/resources/kafka-source-initial-offset-version-2.1.0.bin
new file mode 100644
index 0000000000000..ae928e724967d
--- /dev/null
+++ b/external/kafka-0-10-sql/src/test/resources/kafka-source-initial-offset-version-2.1.0.bin
@@ -0,0 +1 @@
+2{"kafka-initial-offset-2-1-0":{"2":0,"1":0,"0":0}}
\ No newline at end of file
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
index 211c8a5e73e45..4f82b133cb4c8 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
@@ -17,7 +17,9 @@
 
 package org.apache.spark.sql.kafka010
 
+import java.io._
 import java.nio.charset.StandardCharsets.UTF_8
+import java.nio.file.{Files, Paths}
 import java.util.Properties
 import java.util.concurrent.ConcurrentLinkedQueue
 import java.util.concurrent.atomic.AtomicInteger
@@ -141,6 +143,108 @@ class KafkaSourceSuite extends KafkaSourceTest {
 
   private val topicId = new AtomicInteger(0)
 
+  testWithUninterruptibleThread(
+    "deserialization of initial offset with Spark 2.1.0") {
+    withTempDir { metadataPath =>
+      val topic = newTopic
+      testUtils.createTopic(topic, partitions = 3)
+
+      val provider = new KafkaSourceProvider
+      val parameters = Map(
+        "kafka.bootstrap.servers" -> testUtils.brokerAddress,
+        "subscribe" -> topic
+      )
+      val source = provider.createSource(spark.sqlContext, metadataPath.getAbsolutePath, None,
+        "", parameters)
+      source.getOffset.get // Write initial offset
+
+      // Make sure Spark 2.1.0 will throw an exception when reading the new log
+      intercept[java.lang.IllegalArgumentException] {
+        // Simulate how Spark 2.1.0 reads the log
+        val in = new FileInputStream(metadataPath.getAbsolutePath + "/0")
+        val length = in.read()
+        val bytes = new Array[Byte](length)
+        in.read(bytes)
+        KafkaSourceOffset(SerializedOffset(new String(bytes, UTF_8)))
+      }
+    }
+  }
+
+  testWithUninterruptibleThread("deserialization of initial offset written by Spark 2.1.0") {
+    withTempDir { metadataPath =>
+      val topic = "kafka-initial-offset-2-1-0"
+      testUtils.createTopic(topic, partitions = 3)
+
+      val provider = new KafkaSourceProvider
+      val parameters = Map(
+        "kafka.bootstrap.servers" -> testUtils.brokerAddress,
+        "subscribe" -> topic
+      )
+
+      val from = Paths.get(
+        getClass.getResource("/kafka-source-initial-offset-version-2.1.0.bin").getPath)
+      val to = Paths.get(s"${metadataPath.getAbsolutePath}/0")
+      Files.copy(from, to)
+
+      val source = provider.createSource(spark.sqlContext, metadataPath.getAbsolutePath, None,
+        "", parameters)
+      val deserializedOffset = source.getOffset.get
+      val referenceOffset = KafkaSourceOffset((topic, 0, 0L), (topic, 1, 0L), (topic, 2, 0L))
+      assert(referenceOffset == deserializedOffset)
+    }
+  }
+
+  testWithUninterruptibleThread("deserialization of initial offset written by future version") {
+    withTempDir { metadataPath =>
+      val futureMetadataLog =
+        new HDFSMetadataLog[KafkaSourceOffset](sqlContext.sparkSession,
+          metadataPath.getAbsolutePath) {
+          override def serialize(metadata: KafkaSourceOffset, out: OutputStream): Unit = {
+            out.write(0)
+            val writer = new BufferedWriter(new OutputStreamWriter(out, UTF_8))
+            writer.write(s"v0\n${metadata.json}")
+            writer.flush
+          }
+        }
+
+      val topic = newTopic
+      testUtils.createTopic(topic, partitions = 3)
+      val offset = KafkaSourceOffset((topic, 0, 0L), (topic, 1, 0L), (topic, 2, 0L))
+      futureMetadataLog.add(0, offset)
+
+      val provider = new KafkaSourceProvider
+      val parameters = Map(
+        "kafka.bootstrap.servers" -> testUtils.brokerAddress,
+        "subscribe" -> topic
+      )
+      val source = provider.createSource(spark.sqlContext, metadataPath.getAbsolutePath, None,
+        "", parameters)
+
+      val e = intercept[java.lang.IllegalStateException] {
+        source.getOffset.get // Read initial offset
+      }
+
+      assert(e.getMessage.contains("Please upgrade your Spark"))
+    }
+  }
+
+  test("(de)serialization of initial offsets") {
+    val topic = newTopic()
+    testUtils.createTopic(topic, partitions = 64)
+
+    val reader = spark
+      .readStream
+      .format("kafka")
+      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+      .option("subscribe", topic)
+
+    testStream(reader.load)(
+      makeSureGetOffsetCalled,
+      StopStream,
+      StartStream(),
+      StopStream)
+  }
+
   test("maxOffsetsPerTrigger") {
     val topic = newTopic()
     testUtils.createTopic(topic, partitions = 3)

From 7c371dec1c406831cdea86c7309960e08ddf2c36 Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Mon, 20 Feb 2017 09:02:09 -0800
Subject: [PATCH 0529/1204] [SPARK-19646][CORE][STREAMING] binaryRecords
 replicates records in scala API

## What changes were proposed in this pull request?

Use `BytesWritable.copyBytes`, not `getBytes`, because `getBytes` returns the underlying array, which may be reused when repeated reads don't need a different size, as is the case with binaryRecords APIs

## How was this patch tested?

Existing tests

Author: Sean Owen <sowen@cloudera.com>

Closes #16974 from srowen/SPARK-19646.

(cherry picked from commit d0ecca6075d86bedebf8bc2278085a2cd6cb0a43)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../scala/org/apache/spark/SparkContext.scala |   5 +-
 .../scala/org/apache/spark/FileSuite.scala    | 178 ++++--------------
 .../spark/streaming/StreamingContext.scala    |   5 +-
 .../spark/streaming/InputStreamsSuite.scala   |  21 ++-
 4 files changed, 53 insertions(+), 156 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 11ad4423997f2..2db48f6f35f18 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -944,12 +944,11 @@ class SparkContext(config: SparkConf) extends Logging {
       classOf[LongWritable],
       classOf[BytesWritable],
       conf = conf)
-    val data = br.map { case (k, v) =>
-      val bytes = v.getBytes
+    br.map { case (k, v) =>
+      val bytes = v.copyBytes()
       assert(bytes.length == recordLength, "Byte array does not have correct length")
       bytes
     }
-    data
   }
 
   /**
diff --git a/core/src/test/scala/org/apache/spark/FileSuite.scala b/core/src/test/scala/org/apache/spark/FileSuite.scala
index cc52bb1d23cd5..0276575d82ced 100644
--- a/core/src/test/scala/org/apache/spark/FileSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark
 
 import java.io._
+import java.nio.ByteBuffer
 import java.util.zip.GZIPOutputStream
 
 import scala.io.Source
@@ -29,7 +30,6 @@ import org.apache.hadoop.mapreduce.Job
 import org.apache.hadoop.mapreduce.lib.input.{FileSplit => NewFileSplit, TextInputFormat => NewTextInputFormat}
 import org.apache.hadoop.mapreduce.lib.output.{TextOutputFormat => NewTextOutputFormat}
 
-import org.apache.spark.input.PortableDataStream
 import org.apache.spark.internal.config.IGNORE_CORRUPT_FILES
 import org.apache.spark.rdd.{HadoopRDD, NewHadoopRDD}
 import org.apache.spark.storage.StorageLevel
@@ -231,24 +231,26 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
     assert(output.map(_.toString).collect().toList === List("(1,a)", "(2,aa)", "(3,aaa)"))
   }
 
-  test("binary file input as byte array") {
-    sc = new SparkContext("local", "test")
+  private def writeBinaryData(testOutput: Array[Byte], testOutputCopies: Int): File = {
     val outFile = new File(tempDir, "record-bytestream-00000.bin")
-    val outFileName = outFile.getAbsolutePath()
-
-    // create file
-    val testOutput = Array[Byte](1, 2, 3, 4, 5, 6)
-    val bbuf = java.nio.ByteBuffer.wrap(testOutput)
-    // write data to file
-    val file = new java.io.FileOutputStream(outFile)
+    val file = new FileOutputStream(outFile)
     val channel = file.getChannel
-    channel.write(bbuf)
+    for (i <- 0 until testOutputCopies) {
+      // Shift values by i so that they're different in the output
+      val alteredOutput = testOutput.map(b => (b + i).toByte)
+      channel.write(ByteBuffer.wrap(alteredOutput))
+    }
     channel.close()
     file.close()
+    outFile
+  }
 
-    val inRdd = sc.binaryFiles(outFileName)
-    val (infile: String, indata: PortableDataStream) = inRdd.collect.head
-
+  test("binary file input as byte array") {
+    sc = new SparkContext("local", "test")
+    val testOutput = Array[Byte](1, 2, 3, 4, 5, 6)
+    val outFile = writeBinaryData(testOutput, 1)
+    val inRdd = sc.binaryFiles(outFile.getAbsolutePath)
+    val (infile, indata) = inRdd.collect().head
     // Make sure the name and array match
     assert(infile.contains(outFileName)) // a prefix may get added
     assert(indata.toArray === testOutput)
@@ -256,159 +258,55 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
 
   test("portabledatastream caching tests") {
     sc = new SparkContext("local", "test")
-    val outFile = new File(tempDir, "record-bytestream-00000.bin")
-    val outFileName = outFile.getAbsolutePath()
-
-    // create file
     val testOutput = Array[Byte](1, 2, 3, 4, 5, 6)
-    val bbuf = java.nio.ByteBuffer.wrap(testOutput)
-    // write data to file
-    val file = new java.io.FileOutputStream(outFile)
-    val channel = file.getChannel
-    channel.write(bbuf)
-    channel.close()
-    file.close()
-
-    val inRdd = sc.binaryFiles(outFileName).cache()
-    inRdd.foreach{
-      curData: (String, PortableDataStream) =>
-       curData._2.toArray() // force the file to read
-    }
-    val mappedRdd = inRdd.map {
-      curData: (String, PortableDataStream) =>
-        (curData._2.getPath(), curData._2)
-    }
-    val (infile: String, indata: PortableDataStream) = mappedRdd.collect.head
-
+    val outFile = writeBinaryData(testOutput, 1)
+    val inRdd = sc.binaryFiles(outFile.getAbsolutePath).cache()
+    inRdd.foreach(_._2.toArray()) // force the file to read
     // Try reading the output back as an object file
-
-    assert(indata.toArray === testOutput)
+    assert(inRdd.values.collect().head.toArray === testOutput)
   }
 
   test("portabledatastream persist disk storage") {
     sc = new SparkContext("local", "test")
-    val outFile = new File(tempDir, "record-bytestream-00000.bin")
-    val outFileName = outFile.getAbsolutePath()
-
-    // create file
     val testOutput = Array[Byte](1, 2, 3, 4, 5, 6)
-    val bbuf = java.nio.ByteBuffer.wrap(testOutput)
-    // write data to file
-    val file = new java.io.FileOutputStream(outFile)
-    val channel = file.getChannel
-    channel.write(bbuf)
-    channel.close()
-    file.close()
-
-    val inRdd = sc.binaryFiles(outFileName).persist(StorageLevel.DISK_ONLY)
-    inRdd.foreach{
-      curData: (String, PortableDataStream) =>
-        curData._2.toArray() // force the file to read
-    }
-    val mappedRdd = inRdd.map {
-      curData: (String, PortableDataStream) =>
-        (curData._2.getPath(), curData._2)
-    }
-    val (infile: String, indata: PortableDataStream) = mappedRdd.collect.head
-
-    // Try reading the output back as an object file
-
-    assert(indata.toArray === testOutput)
+    val outFile = writeBinaryData(testOutput, 1)
+    val inRdd = sc.binaryFiles(outFile.getAbsolutePath).persist(StorageLevel.DISK_ONLY)
+    inRdd.foreach(_._2.toArray()) // force the file to read
+    assert(inRdd.values.collect().head.toArray === testOutput)
   }
 
   test("portabledatastream flatmap tests") {
     sc = new SparkContext("local", "test")
-    val outFile = new File(tempDir, "record-bytestream-00000.bin")
-    val outFileName = outFile.getAbsolutePath()
-
-    // create file
     val testOutput = Array[Byte](1, 2, 3, 4, 5, 6)
+    val outFile = writeBinaryData(testOutput, 1)
+    val inRdd = sc.binaryFiles(outFile.getAbsolutePath)
     val numOfCopies = 3
-    val bbuf = java.nio.ByteBuffer.wrap(testOutput)
-    // write data to file
-    val file = new java.io.FileOutputStream(outFile)
-    val channel = file.getChannel
-    channel.write(bbuf)
-    channel.close()
-    file.close()
-
-    val inRdd = sc.binaryFiles(outFileName)
-    val mappedRdd = inRdd.map {
-      curData: (String, PortableDataStream) =>
-        (curData._2.getPath(), curData._2)
-    }
-    val copyRdd = mappedRdd.flatMap {
-      curData: (String, PortableDataStream) =>
-        for (i <- 1 to numOfCopies) yield (i, curData._2)
-    }
-
-    val copyArr: Array[(Int, PortableDataStream)] = copyRdd.collect()
-
-    // Try reading the output back as an object file
+    val copyRdd = inRdd.flatMap(curData => (0 until numOfCopies).map(_ => curData._2))
+    val copyArr = copyRdd.collect()
     assert(copyArr.length == numOfCopies)
-    copyArr.foreach{
-      cEntry: (Int, PortableDataStream) =>
-        assert(cEntry._2.toArray === testOutput)
+    for (i <- copyArr.indices) {
+      assert(copyArr(i).toArray === testOutput)
     }
-
   }
 
   test("fixed record length binary file as byte array") {
-    // a fixed length of 6 bytes
-
     sc = new SparkContext("local", "test")
-
-    val outFile = new File(tempDir, "record-bytestream-00000.bin")
-    val outFileName = outFile.getAbsolutePath()
-
-    // create file
     val testOutput = Array[Byte](1, 2, 3, 4, 5, 6)
     val testOutputCopies = 10
-
-    // write data to file
-    val file = new java.io.FileOutputStream(outFile)
-    val channel = file.getChannel
-    for(i <- 1 to testOutputCopies) {
-      val bbuf = java.nio.ByteBuffer.wrap(testOutput)
-      channel.write(bbuf)
-    }
-    channel.close()
-    file.close()
-
-    val inRdd = sc.binaryRecords(outFileName, testOutput.length)
-    // make sure there are enough elements
+    val outFile = writeBinaryData(testOutput, testOutputCopies)
+    val inRdd = sc.binaryRecords(outFile.getAbsolutePath, testOutput.length)
     assert(inRdd.count == testOutputCopies)
-
-    // now just compare the first one
-    val indata: Array[Byte] = inRdd.collect.head
-    assert(indata === testOutput)
+    val inArr = inRdd.collect()
+    for (i <- inArr.indices) {
+      assert(inArr(i) === testOutput.map(b => (b + i).toByte))
+    }
   }
 
   test ("negative binary record length should raise an exception") {
-    // a fixed length of 6 bytes
     sc = new SparkContext("local", "test")
-
-    val outFile = new File(tempDir, "record-bytestream-00000.bin")
-    val outFileName = outFile.getAbsolutePath()
-
-    // create file
-    val testOutput = Array[Byte](1, 2, 3, 4, 5, 6)
-    val testOutputCopies = 10
-
-    // write data to file
-    val file = new java.io.FileOutputStream(outFile)
-    val channel = file.getChannel
-    for(i <- 1 to testOutputCopies) {
-      val bbuf = java.nio.ByteBuffer.wrap(testOutput)
-      channel.write(bbuf)
-    }
-    channel.close()
-    file.close()
-
-    val inRdd = sc.binaryRecords(outFileName, -1)
-
+    val outFile = writeBinaryData(Array[Byte](1, 2, 3, 4, 5, 6), 1)
     intercept[SparkException] {
-      inRdd.count
+      sc.binaryRecords(outFile.getAbsolutePath, -1).count()
     }
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index 444261da8de6a..4be02e7084f90 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -434,13 +434,12 @@ class StreamingContext private[streaming] (
     conf.setInt(FixedLengthBinaryInputFormat.RECORD_LENGTH_PROPERTY, recordLength)
     val br = fileStream[LongWritable, BytesWritable, FixedLengthBinaryInputFormat](
       directory, FileInputDStream.defaultFilter: Path => Boolean, newFilesOnly = true, conf)
-    val data = br.map { case (k, v) =>
-      val bytes = v.getBytes
+    br.map { case (k, v) =>
+      val bytes = v.copyBytes()
       require(bytes.length == recordLength, "Byte array does not have correct length. " +
         s"${bytes.length} did not equal recordLength: $recordLength")
       bytes
     }
-    data
   }
 
   /**
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
index 6fb50a4052712..b5d36a36513ab 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
@@ -84,7 +84,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
 
           // Verify whether all the elements received are as expected
           // (whether the elements were received one in each interval is not verified)
-          val output: Array[String] = outputQueue.asScala.flatMap(x => x).toArray
+          val output = outputQueue.asScala.flatten.toArray
           assert(output.length === expectedOutput.size)
           for (i <- output.indices) {
             assert(output(i) === expectedOutput(i))
@@ -155,14 +155,15 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
         // not enough to trigger a batch
         clock.advance(batchDuration.milliseconds / 2)
 
-        val input = Seq(1, 2, 3, 4, 5)
-        input.foreach { i =>
+        val numCopies = 3
+        val input = Array[Byte](1, 2, 3, 4, 5)
+        for (i <- 0 until numCopies) {
           Thread.sleep(batchDuration.milliseconds)
           val file = new File(testDir, i.toString)
-          Files.write(Array[Byte](i.toByte), file)
+          Files.write(input.map(b => (b + i).toByte), file)
           assert(file.setLastModified(clock.getTimeMillis()))
           assert(file.lastModified === clock.getTimeMillis())
-          logInfo("Created file " + file)
+          logInfo(s"Created file $file")
           // Advance the clock after creating the file to avoid a race when
           // setting its modification time
           clock.advance(batchDuration.milliseconds)
@@ -170,10 +171,10 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
             assert(batchCounter.getNumCompletedBatches === i)
           }
         }
-
-        val expectedOutput = input.map(i => i.toByte)
-        val obtainedOutput = outputQueue.asScala.flatten.toList.map(i => i(0).toByte)
-        assert(obtainedOutput.toSeq === expectedOutput)
+        val obtainedOutput = outputQueue.asScala.map(_.flatten).toSeq
+        for (i <- obtainedOutput.indices) {
+          assert(obtainedOutput(i) === input.map(b => (b + i).toByte))
+        }
       }
     } finally {
       if (testDir != null) Utils.deleteRecursively(testDir)
@@ -258,7 +259,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
     val testReceiver = new MultiThreadTestReceiver(numThreads, numRecordsPerThread)
     MultiThreadTestReceiver.haveAllThreadsFinished = false
     val outputQueue = new ConcurrentLinkedQueue[Seq[Long]]
-    def output: Iterable[Long] = outputQueue.asScala.flatMap(x => x)
+    def output: Iterable[Long] = outputQueue.asScala.flatten
 
     // set up the network stream using the test receiver
     withStreamingContext(new StreamingContext(conf, batchDuration)) { ssc =>

From c3316743e676369ed8ce68fec5b28050a5a28d15 Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Mon, 20 Feb 2017 12:19:54 -0800
Subject: [PATCH 0530/1204] [SPARK-19646][BUILD][HOTFIX] Fix compile error from
 cherry-pick of SPARK-19646 into branch 2.1

## What changes were proposed in this pull request?

Fix compile error from cherry-pick of SPARK-19646 into branch 2.1

## How was this patch tested?

Jenkins tests

Author: Sean Owen <sowen@cloudera.com>

Closes #17003 from srowen/SPARK-19646.2.
---
 core/src/test/scala/org/apache/spark/FileSuite.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/test/scala/org/apache/spark/FileSuite.scala b/core/src/test/scala/org/apache/spark/FileSuite.scala
index 0276575d82ced..467a16d00456d 100644
--- a/core/src/test/scala/org/apache/spark/FileSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileSuite.scala
@@ -252,7 +252,7 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
     val inRdd = sc.binaryFiles(outFile.getAbsolutePath)
     val (infile, indata) = inRdd.collect().head
     // Make sure the name and array match
-    assert(infile.contains(outFileName)) // a prefix may get added
+    assert(infile.contains(outFile.getAbsolutePath)) // a prefix may get added
     assert(indata.toArray === testOutput)
   }
 

From 6edf02a8b8e9b5f4526311f218d425d30b607b2f Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Tue, 21 Feb 2017 09:57:40 -0800
Subject: [PATCH 0531/1204] [SPARK-19626][YARN] Using the correct config to set
 credentials update time

## What changes were proposed in this pull request?

In https://github.com/apache/spark/pull/14065, we introduced a configurable credential manager for Spark running on YARN. Also two configs `spark.yarn.credentials.renewalTime` and `spark.yarn.credentials.updateTime` were added, one is for the credential renewer and the other updater. But now we just query `spark.yarn.credentials.renewalTime` by mistake during CREDENTIALS UPDATING, where should be actually `spark.yarn.credentials.updateTime` .

This PR fixes this mistake.

## How was this patch tested?

existing test

cc jerryshao vanzin

Author: Kent Yao <yaooqinn@hotmail.com>

Closes #16955 from yaooqinn/cred_update.

(cherry picked from commit 7363dde6348fd70d67a13bb4644baca7c77ac241)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../apache/spark/deploy/yarn/security/CredentialUpdater.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/CredentialUpdater.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/CredentialUpdater.scala
index 5df4fbd9c1537..2fdb70a73c754 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/CredentialUpdater.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/CredentialUpdater.scala
@@ -55,7 +55,7 @@ private[spark] class CredentialUpdater(
 
   /** Start the credential updater task */
   def start(): Unit = {
-    val startTime = sparkConf.get(CREDENTIALS_RENEWAL_TIME)
+    val startTime = sparkConf.get(CREDENTIALS_UPDATE_TIME)
     val remainingTime = startTime - System.currentTimeMillis()
     if (remainingTime <= 0) {
       credentialUpdater.schedule(credentialUpdaterRunnable, 1, TimeUnit.MINUTES)

From 9a890b5faa8684ac405c9ce3713f74698657eecd Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Tue, 21 Feb 2017 20:15:47 -0800
Subject: [PATCH 0532/1204] [SPARK-19617][SS] Fix the race condition when
 starting and stopping a query quickly (branch-2.1)

## What changes were proposed in this pull request?

Backport #16947 to branch 2.1. Note: we still need to support old Hadoop versions in 2.1.*.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #16979 from zsxwing/SPARK-19617-branch-2.1.
---
 .../execution/streaming/HDFSMetadataLog.scala |  59 ++++++-----
 .../execution/streaming/StreamExecution.scala | 100 +++++++++---------
 2 files changed, 82 insertions(+), 77 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
index e6a48a06a03f5..6af60d60d56d3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
@@ -63,8 +63,34 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
   val metadataPath = new Path(path)
   protected val fileManager = createFileManager()
 
-  if (!fileManager.exists(metadataPath)) {
-    fileManager.mkdirs(metadataPath)
+  runUninterruptiblyIfLocal {
+    if (!fileManager.exists(metadataPath)) {
+      fileManager.mkdirs(metadataPath)
+    }
+  }
+
+  private def runUninterruptiblyIfLocal[T](body: => T): T = {
+    if (fileManager.isLocalFileSystem && Thread.currentThread.isInstanceOf[UninterruptibleThread]) {
+      // When using a local file system, some file system APIs like "create" or "mkdirs" must be
+      // called in [[org.apache.spark.util.UninterruptibleThread]] so that interrupts can be
+      // disabled.
+      //
+      // This is because there is a potential dead-lock in Hadoop "Shell.runCommand" before
+      // 2.5.0 (HADOOP-10622). If the thread running "Shell.runCommand" is interrupted, then
+      // the thread can get deadlocked. In our case, file system APIs like "create" or "mkdirs"
+      // will call "Shell.runCommand" to set the file permission if using the local file system,
+      // and can get deadlocked if the stream execution thread is stopped by interrupt.
+      //
+      // Hence, we use "runUninterruptibly" here to disable interrupts here. (SPARK-14131)
+      Thread.currentThread.asInstanceOf[UninterruptibleThread].runUninterruptibly {
+        body
+      }
+    } else {
+      // For a distributed file system, such as HDFS or S3, if the network is broken, write
+      // operations may just hang until timeout. We should enable interrupts to allow stopping
+      // the query fast.
+      body
+    }
   }
 
   /**
@@ -109,39 +135,14 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
   override def add(batchId: Long, metadata: T): Boolean = {
     get(batchId).map(_ => false).getOrElse {
       // Only write metadata when the batch has not yet been written
-      if (fileManager.isLocalFileSystem) {
-        Thread.currentThread match {
-          case ut: UninterruptibleThread =>
-            // When using a local file system, "writeBatch" must be called on a
-            // [[org.apache.spark.util.UninterruptibleThread]] so that interrupts can be disabled
-            // while writing the batch file.
-            //
-            // This is because Hadoop "Shell.runCommand" swallows InterruptException (HADOOP-14084).
-            // If the user tries to stop a query, and the thread running "Shell.runCommand" is
-            // interrupted, then InterruptException will be dropped and the query will be still
-            // running. (Note: `writeBatch` creates a file using HDFS APIs and will call
-            // "Shell.runCommand" to set the file permission if using the local file system)
-            //
-            // Hence, we make sure that "writeBatch" is called on [[UninterruptibleThread]] which
-            // allows us to disable interrupts here, in order to propagate the interrupt state
-            // correctly. Also see SPARK-19599.
-            ut.runUninterruptibly { writeBatch(batchId, metadata) }
-          case _ =>
-            throw new IllegalStateException(
-              "HDFSMetadataLog.add() on a local file system must be executed on " +
-                "a o.a.spark.util.UninterruptibleThread")
-        }
-      } else {
-        // For a distributed file system, such as HDFS or S3, if the network is broken, write
-        // operations may just hang until timeout. We should enable interrupts to allow stopping
-        // the query fast.
+      runUninterruptiblyIfLocal {
         writeBatch(batchId, metadata)
       }
       true
     }
   }
 
-  def writeTempBatch(metadata: T): Option[Path] = {
+  private def writeTempBatch(metadata: T): Option[Path] = {
     while (true) {
       val tempPath = new Path(metadataPath, s".${UUID.randomUUID.toString}.tmp")
       try {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 9346a6769d4f5..93face4390acb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.streaming
 
 import java.util.UUID
 import java.util.concurrent.{CountDownLatch, TimeUnit}
+import java.util.concurrent.atomic.AtomicReference
 import java.util.concurrent.locks.ReentrantLock
 
 import scala.collection.mutable.ArrayBuffer
@@ -157,8 +158,7 @@ class StreamExecution(
   }
 
   /** Defines the internal state of execution */
-  @volatile
-  private var state: State = INITIALIZING
+  private val state = new AtomicReference[State](INITIALIZING)
 
   @volatile
   var lastExecution: IncrementalExecution = _
@@ -178,8 +178,9 @@ class StreamExecution(
 
   /**
    * The thread that runs the micro-batches of this stream. Note that this thread must be
-   * [[org.apache.spark.util.UninterruptibleThread]] to avoid swallowing `InterruptException` when
-   * using [[HDFSMetadataLog]]. See SPARK-19599 for more details.
+   * [[org.apache.spark.util.UninterruptibleThread]] to workaround KAFKA-1894: interrupting a
+   * running `KafkaConsumer` may cause endless loop, and HADOOP-10622: interrupting
+   * `Shell.runCommand` causes deadlock. (SPARK-14131)
    */
   val microBatchThread =
     new StreamExecutionThread(s"stream execution thread for $prettyIdString") {
@@ -200,10 +201,10 @@ class StreamExecution(
   val offsetLog = new OffsetSeqLog(sparkSession, checkpointFile("offsets"))
 
   /** Whether all fields of the query have been initialized */
-  private def isInitialized: Boolean = state != INITIALIZING
+  private def isInitialized: Boolean = state.get != INITIALIZING
 
   /** Whether the query is currently active or not */
-  override def isActive: Boolean = state != TERMINATED
+  override def isActive: Boolean = state.get != TERMINATED
 
   /** Returns the [[StreamingQueryException]] if the query was terminated by an exception. */
   override def exception: Option[StreamingQueryException] = Option(streamDeathCause)
@@ -249,53 +250,56 @@ class StreamExecution(
       updateStatusMessage("Initializing sources")
       // force initialization of the logical plan so that the sources can be created
       logicalPlan
-      state = ACTIVE
-      // Unblock `awaitInitialization`
-      initializationLatch.countDown()
-
-      triggerExecutor.execute(() => {
-        startTrigger()
-
-        val isTerminated =
-          if (isActive) {
-            reportTimeTaken("triggerExecution") {
-              if (currentBatchId < 0) {
-                // We'll do this initialization only once
-                populateStartOffsets()
-                logDebug(s"Stream running from $committedOffsets to $availableOffsets")
-              } else {
-                constructNextBatch()
+      if (state.compareAndSet(INITIALIZING, ACTIVE)) {
+        // Unblock `awaitInitialization`
+        initializationLatch.countDown()
+
+        triggerExecutor.execute(() => {
+          startTrigger()
+
+          val continueToRun =
+            if (isActive) {
+              reportTimeTaken("triggerExecution") {
+                if (currentBatchId < 0) {
+                  // We'll do this initialization only once
+                  populateStartOffsets()
+                  logDebug(s"Stream running from $committedOffsets to $availableOffsets")
+                } else {
+                  constructNextBatch()
+                }
+                if (dataAvailable) {
+                  currentStatus = currentStatus.copy(isDataAvailable = true)
+                  updateStatusMessage("Processing new data")
+                  runBatch()
+                }
               }
+
+              // Report trigger as finished and construct progress object.
+              finishTrigger(dataAvailable)
               if (dataAvailable) {
-                currentStatus = currentStatus.copy(isDataAvailable = true)
-                updateStatusMessage("Processing new data")
-                runBatch()
+                // We'll increase currentBatchId after we complete processing current batch's data
+                currentBatchId += 1
+              } else {
+                currentStatus = currentStatus.copy(isDataAvailable = false)
+                updateStatusMessage("Waiting for data to arrive")
+                Thread.sleep(pollingDelayMs)
               }
-            }
-
-            // Report trigger as finished and construct progress object.
-            finishTrigger(dataAvailable)
-            if (dataAvailable) {
-              // We'll increase currentBatchId after we complete processing current batch's data
-              currentBatchId += 1
+              true
             } else {
-              currentStatus = currentStatus.copy(isDataAvailable = false)
-              updateStatusMessage("Waiting for data to arrive")
-              Thread.sleep(pollingDelayMs)
+              false
             }
-            true
-          } else {
-            false
-          }
 
-        // Update committed offsets.
-        committedOffsets ++= availableOffsets
-        updateStatusMessage("Waiting for next trigger")
-        isTerminated
-      })
-      updateStatusMessage("Stopped")
+          // Update committed offsets.
+          committedOffsets ++= availableOffsets
+          updateStatusMessage("Waiting for next trigger")
+          continueToRun
+        })
+        updateStatusMessage("Stopped")
+      } else {
+        // `stop()` is already called. Let `finally` finish the cleanup.
+      }
     } catch {
-      case _: InterruptedException if state == TERMINATED => // interrupted by stop()
+      case _: InterruptedException if state.get == TERMINATED => // interrupted by stop()
         updateStatusMessage("Stopped")
       case e: Throwable =>
         streamDeathCause = new StreamingQueryException(
@@ -318,7 +322,7 @@ class StreamExecution(
       initializationLatch.countDown()
 
       try {
-        state = TERMINATED
+        state.set(TERMINATED)
         currentStatus = status.copy(isTriggerActive = false, isDataAvailable = false)
 
         // Update metrics and status
@@ -562,7 +566,7 @@ class StreamExecution(
   override def stop(): Unit = {
     // Set the state to TERMINATED so that the batching thread knows that it was interrupted
     // intentionally
-    state = TERMINATED
+    state.set(TERMINATED)
     if (microBatchThread.isAlive) {
       microBatchThread.interrupt()
       microBatchThread.join()

From 21afc4534f90e063330ad31033aa178b37ef8340 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Wed, 22 Feb 2017 13:19:31 -0800
Subject: [PATCH 0533/1204] [SPARK-19652][UI] Do auth checks for REST API
 access (branch-2.1).

The REST API has a security filter that performs auth checks
based on the UI root's security manager. That works fine when
the UI root is the app's UI, but not when it's the history server.

In the SHS case, all users would be allowed to see all applications
through the REST API, even if the UI itself wouldn't be available
to them.

This change adds auth checks for each app access through the API
too, so that only authorized users can see the app's data.

The change also modifies the existing security filter to use
`HttpServletRequest.getRemoteUser()`, which is used in other
places. That is not necessarily the same as the principal's
name; for example, when using Hadoop's SPNEGO auth filter,
the remote user strips the realm information, which then matches
the user name registered as the owner of the application.

I also renamed the UIRootFromServletContext trait to a more generic
name since I'm using it to store more context information now.

Tested manually with an authentication filter enabled.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #17019 from vanzin/SPARK-19652_2.1.
---
 .../scala/org/apache/spark/TestUtils.scala    |  6 +-
 .../spark/status/api/v1/ApiRootResource.scala | 78 +++++++++++--------
 .../spark/status/api/v1/SecurityFilter.scala  |  6 +-
 .../org/apache/spark/ui/JettyUtils.scala      |  4 +-
 .../deploy/history/HistoryServerSuite.scala   | 62 ++++++++++++++-
 project/MimaExcludes.scala                    |  3 +
 6 files changed, 118 insertions(+), 41 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/TestUtils.scala b/core/src/main/scala/org/apache/spark/TestUtils.scala
index 7d866f89fc6e2..c3ccdb012fb1d 100644
--- a/core/src/main/scala/org/apache/spark/TestUtils.scala
+++ b/core/src/main/scala/org/apache/spark/TestUtils.scala
@@ -188,9 +188,13 @@ private[spark] object TestUtils {
   /**
    * Returns the response code from an HTTP(S) URL.
    */
-  def httpResponseCode(url: URL, method: String = "GET"): Int = {
+  def httpResponseCode(
+      url: URL,
+      method: String = "GET",
+      headers: Seq[(String, String)] = Nil): Int = {
     val connection = url.openConnection().asInstanceOf[HttpURLConnection]
     connection.setRequestMethod(method)
+    headers.foreach { case (k, v) => connection.setRequestProperty(k, v) }
 
     // Disable cert and host name validation for HTTPS tests.
     if (connection.isInstanceOf[HttpsURLConnection]) {
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala
index 17bc04303fa8b..67ccf43afa44a 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala
@@ -18,6 +18,7 @@ package org.apache.spark.status.api.v1
 
 import java.util.zip.ZipOutputStream
 import javax.servlet.ServletContext
+import javax.servlet.http.HttpServletRequest
 import javax.ws.rs._
 import javax.ws.rs.core.{Context, Response}
 
@@ -40,7 +41,7 @@ import org.apache.spark.ui.SparkUI
  * HistoryServerSuite.
  */
 @Path("/v1")
-private[v1] class ApiRootResource extends UIRootFromServletContext {
+private[v1] class ApiRootResource extends ApiRequestContext {
 
   @Path("applications")
   def getApplicationList(): ApplicationListResource = {
@@ -56,21 +57,21 @@ private[v1] class ApiRootResource extends UIRootFromServletContext {
   def getJobs(
       @PathParam("appId") appId: String,
       @PathParam("attemptId") attemptId: String): AllJobsResource = {
-    uiRoot.withSparkUI(appId, Some(attemptId)) { ui =>
+    withSparkUI(appId, Some(attemptId)) { ui =>
       new AllJobsResource(ui)
     }
   }
 
   @Path("applications/{appId}/jobs")
   def getJobs(@PathParam("appId") appId: String): AllJobsResource = {
-    uiRoot.withSparkUI(appId, None) { ui =>
+    withSparkUI(appId, None) { ui =>
       new AllJobsResource(ui)
     }
   }
 
   @Path("applications/{appId}/jobs/{jobId: \\d+}")
   def getJob(@PathParam("appId") appId: String): OneJobResource = {
-    uiRoot.withSparkUI(appId, None) { ui =>
+    withSparkUI(appId, None) { ui =>
       new OneJobResource(ui)
     }
   }
@@ -79,21 +80,21 @@ private[v1] class ApiRootResource extends UIRootFromServletContext {
   def getJob(
       @PathParam("appId") appId: String,
       @PathParam("attemptId") attemptId: String): OneJobResource = {
-    uiRoot.withSparkUI(appId, Some(attemptId)) { ui =>
+    withSparkUI(appId, Some(attemptId)) { ui =>
       new OneJobResource(ui)
     }
   }
 
   @Path("applications/{appId}/executors")
   def getExecutors(@PathParam("appId") appId: String): ExecutorListResource = {
-    uiRoot.withSparkUI(appId, None) { ui =>
+    withSparkUI(appId, None) { ui =>
       new ExecutorListResource(ui)
     }
   }
 
   @Path("applications/{appId}/allexecutors")
   def getAllExecutors(@PathParam("appId") appId: String): AllExecutorListResource = {
-    uiRoot.withSparkUI(appId, None) { ui =>
+    withSparkUI(appId, None) { ui =>
       new AllExecutorListResource(ui)
     }
   }
@@ -102,7 +103,7 @@ private[v1] class ApiRootResource extends UIRootFromServletContext {
   def getExecutors(
       @PathParam("appId") appId: String,
       @PathParam("attemptId") attemptId: String): ExecutorListResource = {
-    uiRoot.withSparkUI(appId, Some(attemptId)) { ui =>
+    withSparkUI(appId, Some(attemptId)) { ui =>
       new ExecutorListResource(ui)
     }
   }
@@ -111,15 +112,14 @@ private[v1] class ApiRootResource extends UIRootFromServletContext {
   def getAllExecutors(
       @PathParam("appId") appId: String,
       @PathParam("attemptId") attemptId: String): AllExecutorListResource = {
-    uiRoot.withSparkUI(appId, Some(attemptId)) { ui =>
+    withSparkUI(appId, Some(attemptId)) { ui =>
       new AllExecutorListResource(ui)
     }
   }
 
-
   @Path("applications/{appId}/stages")
   def getStages(@PathParam("appId") appId: String): AllStagesResource = {
-    uiRoot.withSparkUI(appId, None) { ui =>
+    withSparkUI(appId, None) { ui =>
       new AllStagesResource(ui)
     }
   }
@@ -128,14 +128,14 @@ private[v1] class ApiRootResource extends UIRootFromServletContext {
   def getStages(
       @PathParam("appId") appId: String,
       @PathParam("attemptId") attemptId: String): AllStagesResource = {
-    uiRoot.withSparkUI(appId, Some(attemptId)) { ui =>
+    withSparkUI(appId, Some(attemptId)) { ui =>
       new AllStagesResource(ui)
     }
   }
 
   @Path("applications/{appId}/stages/{stageId: \\d+}")
   def getStage(@PathParam("appId") appId: String): OneStageResource = {
-    uiRoot.withSparkUI(appId, None) { ui =>
+    withSparkUI(appId, None) { ui =>
       new OneStageResource(ui)
     }
   }
@@ -144,14 +144,14 @@ private[v1] class ApiRootResource extends UIRootFromServletContext {
   def getStage(
       @PathParam("appId") appId: String,
       @PathParam("attemptId") attemptId: String): OneStageResource = {
-    uiRoot.withSparkUI(appId, Some(attemptId)) { ui =>
+    withSparkUI(appId, Some(attemptId)) { ui =>
       new OneStageResource(ui)
     }
   }
 
   @Path("applications/{appId}/storage/rdd")
   def getRdds(@PathParam("appId") appId: String): AllRDDResource = {
-    uiRoot.withSparkUI(appId, None) { ui =>
+    withSparkUI(appId, None) { ui =>
       new AllRDDResource(ui)
     }
   }
@@ -160,14 +160,14 @@ private[v1] class ApiRootResource extends UIRootFromServletContext {
   def getRdds(
       @PathParam("appId") appId: String,
       @PathParam("attemptId") attemptId: String): AllRDDResource = {
-    uiRoot.withSparkUI(appId, Some(attemptId)) { ui =>
+    withSparkUI(appId, Some(attemptId)) { ui =>
       new AllRDDResource(ui)
     }
   }
 
   @Path("applications/{appId}/storage/rdd/{rddId: \\d+}")
   def getRdd(@PathParam("appId") appId: String): OneRDDResource = {
-    uiRoot.withSparkUI(appId, None) { ui =>
+    withSparkUI(appId, None) { ui =>
       new OneRDDResource(ui)
     }
   }
@@ -176,7 +176,7 @@ private[v1] class ApiRootResource extends UIRootFromServletContext {
   def getRdd(
       @PathParam("appId") appId: String,
       @PathParam("attemptId") attemptId: String): OneRDDResource = {
-    uiRoot.withSparkUI(appId, Some(attemptId)) { ui =>
+    withSparkUI(appId, Some(attemptId)) { ui =>
       new OneRDDResource(ui)
     }
   }
@@ -234,19 +234,6 @@ private[spark] trait UIRoot {
       .status(Response.Status.SERVICE_UNAVAILABLE)
       .build()
   }
-
-  /**
-   * Get the spark UI with the given appID, and apply a function
-   * to it.  If there is no such app, throw an appropriate exception
-   */
-  def withSparkUI[T](appId: String, attemptId: Option[String])(f: SparkUI => T): T = {
-    val appKey = attemptId.map(appId + "/" + _).getOrElse(appId)
-    getSparkUI(appKey) match {
-      case Some(ui) =>
-        f(ui)
-      case None => throw new NotFoundException("no such app: " + appId)
-    }
-  }
   def securityManager: SecurityManager
 }
 
@@ -263,13 +250,38 @@ private[v1] object UIRootFromServletContext {
   }
 }
 
-private[v1] trait UIRootFromServletContext {
+private[v1] trait ApiRequestContext {
+  @Context
+  protected var servletContext: ServletContext = _
+
   @Context
-  var servletContext: ServletContext = _
+  protected var httpRequest: HttpServletRequest = _
 
   def uiRoot: UIRoot = UIRootFromServletContext.getUiRoot(servletContext)
+
+
+  /**
+   * Get the spark UI with the given appID, and apply a function
+   * to it.  If there is no such app, throw an appropriate exception
+   */
+  def withSparkUI[T](appId: String, attemptId: Option[String])(f: SparkUI => T): T = {
+    val appKey = attemptId.map(appId + "/" + _).getOrElse(appId)
+    uiRoot.getSparkUI(appKey) match {
+      case Some(ui) =>
+        val user = httpRequest.getRemoteUser()
+        if (!ui.securityManager.checkUIViewPermissions(user)) {
+          throw new ForbiddenException(raw"""user "$user" is not authorized""")
+        }
+        f(ui)
+      case None => throw new NotFoundException("no such app: " + appId)
+    }
+  }
+
 }
 
+private[v1] class ForbiddenException(msg: String) extends WebApplicationException(
+  Response.status(Response.Status.FORBIDDEN).entity(msg).build())
+
 private[v1] class NotFoundException(msg: String) extends WebApplicationException(
   new NoSuchElementException(msg),
     Response
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/SecurityFilter.scala b/core/src/main/scala/org/apache/spark/status/api/v1/SecurityFilter.scala
index b4a991eda35f3..1cd37185d6601 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/SecurityFilter.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/SecurityFilter.scala
@@ -21,14 +21,14 @@ import javax.ws.rs.core.Response
 import javax.ws.rs.ext.Provider
 
 @Provider
-private[v1] class SecurityFilter extends ContainerRequestFilter with UIRootFromServletContext {
+private[v1] class SecurityFilter extends ContainerRequestFilter with ApiRequestContext {
   override def filter(req: ContainerRequestContext): Unit = {
-    val user = Option(req.getSecurityContext.getUserPrincipal).map { _.getName }.orNull
+    val user = httpRequest.getRemoteUser()
     if (!uiRoot.securityManager.checkUIViewPermissions(user)) {
       req.abortWith(
         Response
           .status(Response.Status.FORBIDDEN)
-          .entity(raw"""user "$user"is not authorized""")
+          .entity(raw"""user "$user" is not authorized""")
           .build()
       )
     }
diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
index f713619cd7ec4..fbe8012ea2dae 100644
--- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
@@ -90,9 +90,9 @@ private[spark] object JettyUtils extends Logging {
             response.setHeader("X-Frame-Options", xFrameOptionsValue)
             response.getWriter.print(servletParams.extractFn(result))
           } else {
-            response.setStatus(HttpServletResponse.SC_UNAUTHORIZED)
+            response.setStatus(HttpServletResponse.SC_FORBIDDEN)
             response.setHeader("Cache-Control", "no-cache, no-store, must-revalidate")
-            response.sendError(HttpServletResponse.SC_UNAUTHORIZED,
+            response.sendError(HttpServletResponse.SC_FORBIDDEN,
               "User is not authorized to access this page.")
           }
         } catch {
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
index 715811a46f42d..49be9b92ab198 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -20,7 +20,8 @@ import java.io.{File, FileInputStream, FileWriter, InputStream, IOException}
 import java.net.{HttpURLConnection, URL}
 import java.nio.charset.StandardCharsets
 import java.util.zip.ZipInputStream
-import javax.servlet.http.{HttpServletRequest, HttpServletResponse}
+import javax.servlet._
+import javax.servlet.http.{HttpServletRequest, HttpServletRequestWrapper, HttpServletResponse}
 
 import scala.concurrent.duration._
 import scala.language.postfixOps
@@ -68,11 +69,12 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
   private var server: HistoryServer = null
   private var port: Int = -1
 
-  def init(): Unit = {
+  def init(extraConf: (String, String)*): Unit = {
     val conf = new SparkConf()
       .set("spark.history.fs.logDirectory", logDir)
       .set("spark.history.fs.update.interval", "0")
       .set("spark.testing", "true")
+    conf.setAll(extraConf)
     provider = new FsHistoryProvider(conf)
     provider.checkForLogs()
     val securityManager = new SecurityManager(conf)
@@ -547,6 +549,39 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
 
   }
 
+  test("ui and api authorization checks") {
+    val appId = "local-1422981759269"
+    val owner = "irashid"
+    val admin = "root"
+    val other = "alice"
+
+    stop()
+    init(
+      "spark.ui.filters" -> classOf[FakeAuthFilter].getName(),
+      "spark.history.ui.acls.enable" -> "true",
+      "spark.history.ui.admin.acls" -> admin)
+
+    val tests = Seq(
+      (owner, HttpServletResponse.SC_OK),
+      (admin, HttpServletResponse.SC_OK),
+      (other, HttpServletResponse.SC_FORBIDDEN),
+      // When the remote user is null, the code behaves as if auth were disabled.
+      (null, HttpServletResponse.SC_OK))
+
+    val port = server.boundPort
+    val testUrls = Seq(
+      s"http://localhost:$port/api/v1/applications/$appId/jobs",
+      s"http://localhost:$port/history/$appId/jobs/")
+
+    tests.foreach { case (user, expectedCode) =>
+      testUrls.foreach { url =>
+        val headers = if (user != null) Seq(FakeAuthFilter.FAKE_HTTP_USER -> user) else Nil
+        val sc = TestUtils.httpResponseCode(new URL(url), headers = headers)
+        assert(sc === expectedCode, s"Unexpected status code $sc for $url (user = $user)")
+      }
+    }
+  }
+
   def getContentAndCode(path: String, port: Int = port): (Int, Option[String], Option[String]) = {
     HistoryServerSuite.getContentAndCode(new URL(s"http://localhost:$port/api/v1/$path"))
   }
@@ -629,3 +664,26 @@ object HistoryServerSuite {
     }
   }
 }
+
+/**
+ * A filter used for auth tests; sets the request's user to the value of the "HTTP_USER" header.
+ */
+class FakeAuthFilter extends Filter {
+
+  override def destroy(): Unit = { }
+
+  override def init(config: FilterConfig): Unit = { }
+
+  override def doFilter(req: ServletRequest, res: ServletResponse, chain: FilterChain): Unit = {
+    val hreq = req.asInstanceOf[HttpServletRequest]
+    val wrapped = new HttpServletRequestWrapper(hreq) {
+      override def getRemoteUser(): String = hreq.getHeader(FakeAuthFilter.FAKE_HTTP_USER)
+    }
+    chain.doFilter(wrapped, res)
+  }
+
+}
+
+object FakeAuthFilter {
+  val FAKE_HTTP_USER = "HTTP_USER"
+}
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 6d1b4d2b277f9..d8720935e989c 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -37,6 +37,9 @@ object MimaExcludes {
   // Exclude rules for 2.1.x
   lazy val v21excludes = v20excludes ++ {
     Seq(
+      // [SPARK-19652][UI] Do auth checks for REST API access.
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.deploy.history.HistoryServer.withSparkUI"),
+      ProblemFilters.exclude[IncompatibleTemplateDefProblem]("org.apache.spark.status.api.v1.UIRootFromServletContext"),
       // [SPARK-17671] Spark 2.0 history server summary page is slow even set spark.history.ui.maxApplications
       ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.deploy.history.HistoryServer.getApplicationList"),
       // [SPARK-14743] Improve delegation token handling in secure cluster

From d30238f1b9096c9fd85527d95be639de9388fcc7 Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Thu, 23 Feb 2017 11:12:02 -0800
Subject: [PATCH 0534/1204] [SPARK-19682][SPARKR] Issue warning (or error) when
 subset method "[[" takes vector index

## What changes were proposed in this pull request?
The `[[` method is supposed to take a single index and return a column. This is different from base R which takes a vector index.  We should check for this and issue warning or error when vector index is supplied (which is very likely given the behavior in base R).

Currently I'm issuing a warning message and just take the first element of the vector index. We could change this to an error it that's better.

## How was this patch tested?
new tests

Author: actuaryzhang <actuaryzhang10@gmail.com>

Closes #17017 from actuaryzhang/sparkRSubsetter.

(cherry picked from commit 7bf09433f5c5e08154ba106be21fe24f17cd282b)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/R/DataFrame.R                       |  8 ++++++++
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 12 ++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 986f1f11cc5bd..d0f097925a2c6 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1800,6 +1800,10 @@ setClassUnion("numericOrcharacter", c("numeric", "character"))
 #' @note [[ since 1.4.0
 setMethod("[[", signature(x = "SparkDataFrame", i = "numericOrcharacter"),
           function(x, i) {
+            if (length(i) > 1) {
+              warning("Subset index has length > 1. Only the first index is used.")
+              i <- i[1]
+            }
             if (is.numeric(i)) {
               cols <- columns(x)
               i <- cols[[i]]
@@ -1813,6 +1817,10 @@ setMethod("[[", signature(x = "SparkDataFrame", i = "numericOrcharacter"),
 #' @note [[<- since 2.1.1
 setMethod("[[<-", signature(x = "SparkDataFrame", i = "numericOrcharacter"),
           function(x, i, value) {
+            if (length(i) > 1) {
+              warning("Subset index has length > 1. Only the first index is used.")
+              i <- i[1]
+            }
             if (is.numeric(i)) {
               cols <- columns(x)
               i <- cols[[i]]
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index d9dd0f3e14de8..9608fa1f77754 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1007,6 +1007,18 @@ test_that("select operators", {
   expect_is(df[[2]], "Column")
   expect_is(df[["age"]], "Column")
 
+  expect_warning(df[[1:2]],
+                 "Subset index has length > 1. Only the first index is used.")
+  expect_is(suppressWarnings(df[[1:2]]), "Column")
+  expect_warning(df[[c("name", "age")]],
+                 "Subset index has length > 1. Only the first index is used.")
+  expect_is(suppressWarnings(df[[c("name", "age")]]), "Column")
+
+  expect_warning(df[[1:2]] <- df[[1]],
+                 "Subset index has length > 1. Only the first index is used.")
+  expect_warning(df[[c("name", "age")]] <- df[[1]],
+                 "Subset index has length > 1. Only the first index is used.")
+
   expect_is(df[, 1, drop = F], "SparkDataFrame")
   expect_equal(columns(df[, 1, drop = F]), c("name"))
   expect_equal(columns(df[, "age", drop = F]), c("age"))

From 43084b3cc3918b720fe28053d2037fa22a71264e Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@databricks.com>
Date: Thu, 23 Feb 2017 14:58:02 -0800
Subject: [PATCH 0535/1204] [SPARK-19459][SQL][BRANCH-2.1] Support for nested
 char/varchar fields in ORC

## What changes were proposed in this pull request?
This is a backport of the two following commits: https://github.com/apache/spark/commit/78eae7e67fd5dec0c2d5b18000053ce86cd0f1ae & https://github.com/apache/spark/commit/de8a03e68202647555e30fffba551f65bc77608d

This PR adds support for ORC tables with (nested) char/varchar fields.

## How was this patch tested?
Added a regression test to `OrcSourceSuite`.

Author: Herman van Hovell <hvanhovell@databricks.com>

Closes #17041 from hvanhovell/SPARK-19459-branch-2.1.
---
 .../sql/catalyst/parser/AstBuilder.scala      | 40 ++++++++--
 .../spark/sql/types/HiveStringType.scala      | 73 +++++++++++++++++++
 .../org/apache/spark/sql/types/package.scala  | 10 ++-
 .../spark/sql/sources/TableScanSuite.scala    |  7 +-
 .../org/apache/spark/sql/hive/HiveUtils.scala |  8 --
 .../spark/sql/hive/MetastoreRelation.scala    |  7 +-
 .../sql/hive/client/HiveClientImpl.scala      |  8 +-
 .../spark/sql/hive/orc/OrcSourceSuite.scala   | 39 +++++++++-
 8 files changed, 161 insertions(+), 31 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/types/HiveStringType.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 06f0f5b67f220..a3b39a8540656 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -76,7 +76,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
   }
 
   override def visitSingleDataType(ctx: SingleDataTypeContext): DataType = withOrigin(ctx) {
-    visit(ctx.dataType).asInstanceOf[DataType]
+    visitSparkDataType(ctx.dataType)
   }
 
   /* ********************************************************************************************
@@ -997,7 +997,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
    * Create a [[Cast]] expression.
    */
   override def visitCast(ctx: CastContext): Expression = withOrigin(ctx) {
-    Cast(expression(ctx.expression), typedVisit(ctx.dataType))
+    Cast(expression(ctx.expression), visitSparkDataType(ctx.dataType))
   }
 
   /**
@@ -1415,6 +1415,13 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
   /* ********************************************************************************************
    * DataType parsing
    * ******************************************************************************************** */
+  /**
+   * Create a Spark DataType.
+   */
+  private def visitSparkDataType(ctx: DataTypeContext): DataType = {
+    HiveStringType.replaceCharType(typedVisit(ctx))
+  }
+
   /**
    * Resolve/create a primitive type.
    */
@@ -1429,8 +1436,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
       case ("double", Nil) => DoubleType
       case ("date", Nil) => DateType
       case ("timestamp", Nil) => TimestampType
-      case ("char" | "varchar" | "string", Nil) => StringType
-      case ("char" | "varchar", _ :: Nil) => StringType
+      case ("string", Nil) => StringType
+      case ("char", length :: Nil) => CharType(length.getText.toInt)
+      case ("varchar", length :: Nil) => VarcharType(length.getText.toInt)
       case ("binary", Nil) => BinaryType
       case ("decimal", Nil) => DecimalType.USER_DEFAULT
       case ("decimal", precision :: Nil) => DecimalType(precision.getText.toInt, 0)
@@ -1452,7 +1460,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
       case SqlBaseParser.MAP =>
         MapType(typedVisit(ctx.dataType(0)), typedVisit(ctx.dataType(1)))
       case SqlBaseParser.STRUCT =>
-        createStructType(ctx.complexColTypeList())
+        StructType(Option(ctx.complexColTypeList).toSeq.flatMap(visitComplexColTypeList))
     }
   }
 
@@ -1471,12 +1479,28 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
   }
 
   /**
-   * Create a [[StructField]] from a column definition.
+   * Create a top level [[StructField]] from a column definition.
    */
   override def visitColType(ctx: ColTypeContext): StructField = withOrigin(ctx) {
     import ctx._
-    val structField = StructField(identifier.getText, typedVisit(dataType), nullable = true)
-    if (STRING == null) structField else structField.withComment(string(STRING))
+
+    val builder = new MetadataBuilder
+    // Add comment to metadata
+    if (STRING != null) {
+      builder.putString("comment", string(STRING))
+    }
+    // Add Hive type string to metadata.
+    val rawDataType = typedVisit[DataType](ctx.dataType)
+    val cleanedDataType = HiveStringType.replaceCharType(rawDataType)
+    if (rawDataType != cleanedDataType) {
+      builder.putString(HIVE_TYPE_STRING, rawDataType.catalogString)
+    }
+
+    StructField(
+      identifier.getText,
+      cleanedDataType,
+      nullable = true,
+      builder.build())
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/HiveStringType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/HiveStringType.scala
new file mode 100644
index 0000000000000..b319eb70bc13c
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/HiveStringType.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.types
+
+import scala.math.Ordering
+import scala.reflect.runtime.universe.typeTag
+
+import org.apache.spark.sql.catalyst.ScalaReflectionLock
+import org.apache.spark.unsafe.types.UTF8String
+
+/**
+ * A hive string type for compatibility. These datatypes should only used for parsing,
+ * and should NOT be used anywhere else. Any instance of these data types should be
+ * replaced by a [[StringType]] before analysis.
+ */
+sealed abstract class HiveStringType extends AtomicType {
+  private[sql] type InternalType = UTF8String
+
+  private[sql] val ordering = implicitly[Ordering[InternalType]]
+
+  @transient private[sql] lazy val tag = ScalaReflectionLock.synchronized {
+    typeTag[InternalType]
+  }
+
+  override def defaultSize: Int = length
+
+  private[spark] override def asNullable: HiveStringType = this
+
+  def length: Int
+}
+
+object HiveStringType {
+  def replaceCharType(dt: DataType): DataType = dt match {
+    case ArrayType(et, nullable) =>
+      ArrayType(replaceCharType(et), nullable)
+    case MapType(kt, vt, nullable) =>
+      MapType(replaceCharType(kt), replaceCharType(vt), nullable)
+    case StructType(fields) =>
+      StructType(fields.map { field =>
+        field.copy(dataType = replaceCharType(field.dataType))
+      })
+    case _: HiveStringType => StringType
+    case _ => dt
+  }
+}
+
+/**
+ * Hive char type.
+ */
+case class CharType(length: Int) extends HiveStringType {
+  override def simpleString: String = s"char($length)"
+}
+
+/**
+ * Hive varchar type.
+ */
+case class VarcharType(length: Int) extends HiveStringType {
+  override def simpleString: String = s"varchar($length)"
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/package.scala
index 346a51ea10c82..f29cbc2069e39 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/package.scala
@@ -21,4 +21,12 @@ package org.apache.spark.sql
  * Contains a type system for attributes produced by relations, including complex types like
  * structs, arrays and maps.
  */
-package object types
+package object types {
+  /**
+   * Metadata key used to store the raw hive type string in the metadata of StructField. This
+   * is relevant for datatypes that do not have a direct Spark SQL counterpart, such as CHAR and
+   * VARCHAR. We need to preserve the original type in order to invoke the correct object
+   * inspector in Hive.
+   */
+  val HIVE_TYPE_STRING = "HIVE_TYPE_STRING"
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
index 86bcb4d4b00c1..eaa5fb30edfa5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
@@ -203,6 +203,9 @@ class TableScanSuite extends DataSourceTest with SharedSQLContext {
     (2 to 10).map(i => Row(i, i - 1)).toSeq)
 
   test("Schema and all fields") {
+    def hiveMetadata(dt: String): Metadata = {
+      new MetadataBuilder().putString(HIVE_TYPE_STRING, dt).build()
+    }
     val expectedSchema = StructType(
       StructField("string$%Field", StringType, true) ::
       StructField("binaryField", BinaryType, true) ::
@@ -217,8 +220,8 @@ class TableScanSuite extends DataSourceTest with SharedSQLContext {
       StructField("decimalField2", DecimalType(9, 2), true) ::
       StructField("dateField", DateType, true) ::
       StructField("timestampField", TimestampType, true) ::
-      StructField("varcharField", StringType, true) ::
-      StructField("charField", StringType, true) ::
+      StructField("varcharField", StringType, true, hiveMetadata("varchar(12)")) ::
+      StructField("charField", StringType, true, hiveMetadata("char(18)")) ::
       StructField("arrayFieldSimple", ArrayType(IntegerType), true) ::
       StructField("arrayFieldComplex",
         ArrayType(
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
index 26b1994308f5d..81cd65c3cc337 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
@@ -54,14 +54,6 @@ private[spark] object HiveUtils extends Logging {
   /** The version of hive used internally by Spark SQL. */
   val hiveExecutionVersion: String = "1.2.1"
 
-  /**
-   * The property key that is used to store the raw hive type string in the metadata of StructField.
-   * For example, in the case where the Hive type is varchar, the type gets mapped to a string type
-   * in Spark SQL, but we need to preserve the original type in order to invoke the correct object
-   * inspector in Hive.
-   */
-  val hiveTypeString: String = "HIVE_TYPE_STRING"
-
   val HIVE_METASTORE_VERSION = SQLConfigBuilder("spark.sql.hive.metastore.version")
     .doc("Version of the Hive metastore. Available options are " +
         s"<code>0.12.0</code> through <code>$hiveExecutionVersion</code>.")
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala
index 3bbac05a79c23..8f40a59fc15e9 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala
@@ -35,8 +35,7 @@ import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions.{AttributeMap, AttributeReference, Expression}
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
 import org.apache.spark.sql.execution.FileRelation
-import org.apache.spark.sql.hive.client.HiveClient
-import org.apache.spark.sql.types.StructField
+import org.apache.spark.sql.types._
 
 
 private[hive] case class MetastoreRelation(
@@ -61,8 +60,8 @@ private[hive] case class MetastoreRelation(
   override protected def otherCopyArgs: Seq[AnyRef] = catalogTable :: sparkSession :: Nil
 
   private def toHiveColumn(c: StructField): FieldSchema = {
-    val typeString = if (c.metadata.contains(HiveUtils.hiveTypeString)) {
-      c.metadata.getString(HiveUtils.hiveTypeString)
+    val typeString = if (c.metadata.contains(HIVE_TYPE_STRING)) {
+      c.metadata.getString(HIVE_TYPE_STRING)
     } else {
       c.dataType.catalogString
     }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index a9ca1a4249513..9b3f29970e8ad 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -47,7 +47,7 @@ import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException}
 import org.apache.spark.sql.execution.QueryExecutionException
 import org.apache.spark.sql.hive.HiveUtils
-import org.apache.spark.sql.types.{MetadataBuilder, StructField, StructType}
+import org.apache.spark.sql.types._
 import org.apache.spark.util.{CircularBuffer, Utils}
 
 /**
@@ -777,8 +777,8 @@ private[hive] class HiveClientImpl(
       .asInstanceOf[Class[_ <: org.apache.hadoop.hive.ql.io.HiveOutputFormat[_, _]]]
 
   private def toHiveColumn(c: StructField): FieldSchema = {
-    val typeString = if (c.metadata.contains(HiveUtils.hiveTypeString)) {
-      c.metadata.getString(HiveUtils.hiveTypeString)
+    val typeString = if (c.metadata.contains(HIVE_TYPE_STRING)) {
+      c.metadata.getString(HIVE_TYPE_STRING)
     } else {
       c.dataType.catalogString
     }
@@ -793,7 +793,7 @@ private[hive] class HiveClientImpl(
         throw new SparkException("Cannot recognize hive type string: " + hc.getType, e)
     }
 
-    val metadata = new MetadataBuilder().putString(HiveUtils.hiveTypeString, hc.getType).build()
+    val metadata = new MetadataBuilder().putString(HIVE_TYPE_STRING, hc.getType).build()
     val field = StructField(
       name = hc.getName,
       dataType = columnType,
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala
index 2b404690510cd..aa60a3fd4f477 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala
@@ -154,12 +154,43 @@ abstract class OrcSuite extends QueryTest with TestHiveSingleton with BeforeAndA
 
   test("SPARK-18220: read Hive orc table with varchar column") {
     val hiveClient = spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client
+    val location = Utils.createTempDir()
+    val uri = location.toURI
     try {
-      hiveClient.runSqlHive("CREATE TABLE orc_varchar(a VARCHAR(10)) STORED AS orc")
-      hiveClient.runSqlHive("INSERT INTO TABLE orc_varchar SELECT 'a' FROM (SELECT 1) t")
-      checkAnswer(spark.table("orc_varchar"), Row("a"))
+      hiveClient.runSqlHive(
+        """
+           |CREATE EXTERNAL TABLE hive_orc(
+           |  a STRING,
+           |  b CHAR(10),
+           |  c VARCHAR(10),
+           |  d ARRAY<CHAR(3)>)
+           |STORED AS orc""".stripMargin)
+      // Hive throws an exception if I assign the location in the create table statement.
+      hiveClient.runSqlHive(
+        s"ALTER TABLE hive_orc SET LOCATION '$uri'")
+      hiveClient.runSqlHive(
+        """INSERT INTO TABLE hive_orc
+          |SELECT 'a', 'b', 'c', ARRAY(CAST('d' AS CHAR(3)))
+          |FROM (SELECT 1) t""".stripMargin)
+
+      // We create a different table in Spark using the same schema which points to
+      // the same location.
+      spark.sql(
+        s"""
+           |CREATE EXTERNAL TABLE spark_orc(
+           |  a STRING,
+           |  b CHAR(10),
+           |  c VARCHAR(10),
+           |  d ARRAY<CHAR(3)>)
+           |STORED AS orc
+           |LOCATION '$uri'""".stripMargin)
+      val result = Row("a", "b         ", "c", Seq("d  "))
+      checkAnswer(spark.table("hive_orc"), result)
+      checkAnswer(spark.table("spark_orc"), result)
     } finally {
-      hiveClient.runSqlHive("DROP TABLE IF EXISTS orc_varchar")
+      hiveClient.runSqlHive("DROP TABLE IF EXISTS hive_orc")
+      hiveClient.runSqlHive("DROP TABLE IF EXISTS spark_orc")
+      Utils.deleteRecursively(location)
     }
   }
 }

From 66a7ca28a9de92e67ce24896a851a0c96c92aec6 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Fri, 24 Feb 2017 10:54:00 +0100
Subject: [PATCH 0536/1204] [SPARK-19691][SQL][BRANCH-2.1] Fix
 ClassCastException when calculating percentile of decimal column

## What changes were proposed in this pull request?
This is a backport of the two following commits: https://github.com/apache/spark/commit/93aa4271596a30752dc5234d869c3ae2f6e8e723

This pr fixed a class-cast exception below;
```
scala> spark.range(10).selectExpr("cast (id as decimal) as x").selectExpr("percentile(x, 0.5)").collect()
 java.lang.ClassCastException: org.apache.spark.sql.types.Decimal cannot be cast to java.lang.Number
	at org.apache.spark.sql.catalyst.expressions.aggregate.Percentile.update(Percentile.scala:141)
	at org.apache.spark.sql.catalyst.expressions.aggregate.Percentile.update(Percentile.scala:58)
	at org.apache.spark.sql.catalyst.expressions.aggregate.TypedImperativeAggregate.update(interfaces.scala:514)
	at org.apache.spark.sql.execution.aggregate.AggregationIterator$$anonfun$1$$anonfun$applyOrElse$1.apply(AggregationIterator.scala:171)
	at org.apache.spark.sql.execution.aggregate.AggregationIterator$$anonfun$1$$anonfun$applyOrElse$1.apply(AggregationIterator.scala:171)
	at org.apache.spark.sql.execution.aggregate.AggregationIterator$$anonfun$generateProcessRow$1.apply(AggregationIterator.scala:187)
	at org.apache.spark.sql.execution.aggregate.AggregationIterator$$anonfun$generateProcessRow$1.apply(AggregationIterator.scala:181)
	at org.apache.spark.sql.execution.aggregate.ObjectAggregationIterator.processInputs(ObjectAggregationIterator.scala:151)
	at org.apache.spark.sql.execution.aggregate.ObjectAggregationIterator.<init>(ObjectAggregationIterator.scala:78)
	at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec$$anonfun$doExecute$1$$anonfun$2.apply(ObjectHashAggregateExec.scala:109)
	at
```
This fix simply converts catalyst values (i.e., `Decimal`) into scala ones by using `CatalystTypeConverters`.

## How was this patch tested?
Added a test in `DataFrameSuite`.

Author: Takeshi Yamamuro <yamamuro@apache.org>

Closes #17046 from maropu/SPARK-19691-BACKPORT2.1.
---
 .../expressions/aggregate/Percentile.scala    | 42 ++++++++++---------
 .../aggregate/PercentileSuite.scala           |  6 +--
 .../org/apache/spark/sql/DataFrameSuite.scala |  5 +++
 3 files changed, 31 insertions(+), 22 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala
index 356e088d1d665..8dd4f2c59243e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala
@@ -57,7 +57,7 @@ case class Percentile(
   child: Expression,
   percentageExpression: Expression,
   mutableAggBufferOffset: Int = 0,
-  inputAggBufferOffset: Int = 0) extends TypedImperativeAggregate[OpenHashMap[Number, Long]] {
+  inputAggBufferOffset: Int = 0) extends TypedImperativeAggregate[OpenHashMap[AnyRef, Long]] {
 
   def this(child: Expression, percentageExpression: Expression) = {
     this(child, percentageExpression, 0, 0)
@@ -123,13 +123,18 @@ case class Percentile(
     }
   }
 
-  override def createAggregationBuffer(): OpenHashMap[Number, Long] = {
+  private def toDoubleValue(d: Any): Double = d match {
+    case d: Decimal => d.toDouble
+    case n: Number => n.doubleValue
+  }
+
+  override def createAggregationBuffer(): OpenHashMap[AnyRef, Long] = {
     // Initialize new counts map instance here.
-    new OpenHashMap[Number, Long]()
+    new OpenHashMap[AnyRef, Long]()
   }
 
-  override def update(buffer: OpenHashMap[Number, Long], input: InternalRow): Unit = {
-    val key = child.eval(input).asInstanceOf[Number]
+  override def update(buffer: OpenHashMap[AnyRef, Long], input: InternalRow): Unit = {
+    val key = child.eval(input).asInstanceOf[AnyRef]
 
     // Null values are ignored in counts map.
     if (key != null) {
@@ -137,30 +142,30 @@ case class Percentile(
     }
   }
 
-  override def merge(buffer: OpenHashMap[Number, Long], other: OpenHashMap[Number, Long]): Unit = {
+  override def merge(buffer: OpenHashMap[AnyRef, Long], other: OpenHashMap[AnyRef, Long]): Unit = {
     other.foreach { case (key, count) =>
       buffer.changeValue(key, count, _ + count)
     }
   }
 
-  override def eval(buffer: OpenHashMap[Number, Long]): Any = {
+  override def eval(buffer: OpenHashMap[AnyRef, Long]): Any = {
     generateOutput(getPercentiles(buffer))
   }
 
-  private def getPercentiles(buffer: OpenHashMap[Number, Long]): Seq[Double] = {
+  private def getPercentiles(buffer: OpenHashMap[AnyRef, Long]): Seq[Double] = {
     if (buffer.isEmpty) {
       return Seq.empty
     }
 
     val sortedCounts = buffer.toSeq.sortBy(_._1)(
-      child.dataType.asInstanceOf[NumericType].ordering.asInstanceOf[Ordering[Number]])
+      child.dataType.asInstanceOf[NumericType].ordering.asInstanceOf[Ordering[AnyRef]])
     val accumlatedCounts = sortedCounts.scanLeft(sortedCounts.head._1, 0L) {
       case ((key1, count1), (key2, count2)) => (key2, count1 + count2)
     }.tail
     val maxPosition = accumlatedCounts.last._2 - 1
 
     percentages.map { percentile =>
-      getPercentile(accumlatedCounts, maxPosition * percentile).doubleValue()
+      getPercentile(accumlatedCounts, maxPosition * percentile)
     }
   }
 
@@ -180,7 +185,7 @@ case class Percentile(
    * This function has been based upon similar function from HIVE
    * `org.apache.hadoop.hive.ql.udf.UDAFPercentile.getPercentile()`.
    */
-  private def getPercentile(aggreCounts: Seq[(Number, Long)], position: Double): Number = {
+  private def getPercentile(aggreCounts: Seq[(AnyRef, Long)], position: Double): Double = {
     // We may need to do linear interpolation to get the exact percentile
     val lower = position.floor.toLong
     val higher = position.ceil.toLong
@@ -193,18 +198,17 @@ case class Percentile(
     val lowerKey = aggreCounts(lowerIndex)._1
     if (higher == lower) {
       // no interpolation needed because position does not have a fraction
-      return lowerKey
+      return toDoubleValue(lowerKey)
     }
 
     val higherKey = aggreCounts(higherIndex)._1
     if (higherKey == lowerKey) {
       // no interpolation needed because lower position and higher position has the same key
-      return lowerKey
+      return toDoubleValue(lowerKey)
     }
 
     // Linear interpolation to get the exact percentile
-    return (higher - position) * lowerKey.doubleValue() +
-      (position - lower) * higherKey.doubleValue()
+    (higher - position) * toDoubleValue(lowerKey) + (position - lower) * toDoubleValue(higherKey)
   }
 
   /**
@@ -218,7 +222,7 @@ case class Percentile(
     }
   }
 
-  override def serialize(obj: OpenHashMap[Number, Long]): Array[Byte] = {
+  override def serialize(obj: OpenHashMap[AnyRef, Long]): Array[Byte] = {
     val buffer = new Array[Byte](4 << 10)  // 4K
     val bos = new ByteArrayOutputStream()
     val out = new DataOutputStream(bos)
@@ -241,11 +245,11 @@ case class Percentile(
     }
   }
 
-  override def deserialize(bytes: Array[Byte]): OpenHashMap[Number, Long] = {
+  override def deserialize(bytes: Array[Byte]): OpenHashMap[AnyRef, Long] = {
     val bis = new ByteArrayInputStream(bytes)
     val ins = new DataInputStream(bis)
     try {
-      val counts = new OpenHashMap[Number, Long]
+      val counts = new OpenHashMap[AnyRef, Long]
       // Read unsafeRow size and content in bytes.
       var sizeOfNextRow = ins.readInt()
       while (sizeOfNextRow >= 0) {
@@ -254,7 +258,7 @@ case class Percentile(
         val row = new UnsafeRow(2)
         row.pointTo(bs, sizeOfNextRow)
         // Insert the pairs into counts map.
-        val key = row.get(0, child.dataType).asInstanceOf[Number]
+        val key = row.get(0, child.dataType)
         val count = row.get(1, LongType).asInstanceOf[Long]
         counts.update(key, count)
         sizeOfNextRow = ins.readInt()
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala
index f060ecc18426a..d7c25271f3567 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala
@@ -38,12 +38,12 @@ class PercentileSuite extends SparkFunSuite {
     val agg = new Percentile(BoundReference(0, IntegerType, true), Literal(0.5))
 
     // Check empty serialize and deserialize
-    val buffer = new OpenHashMap[Number, Long]()
+    val buffer = new OpenHashMap[AnyRef, Long]()
     assert(compareEquals(agg.deserialize(agg.serialize(buffer)), buffer))
 
     // Check non-empty buffer serializa and deserialize.
     data.foreach { key =>
-      buffer.changeValue(key, 1L, _ + 1L)
+      buffer.changeValue(new Integer(key), 1L, _ + 1L)
     }
     assert(compareEquals(agg.deserialize(agg.serialize(buffer)), buffer))
   }
@@ -233,7 +233,7 @@ class PercentileSuite extends SparkFunSuite {
   }
 
   private def compareEquals(
-      left: OpenHashMap[Number, Long], right: OpenHashMap[Number, Long]): Boolean = {
+      left: OpenHashMap[AnyRef, Long], right: OpenHashMap[AnyRef, Long]): Boolean = {
     left.size == right.size && left.forall { case (key, count) =>
       right.apply(key) == count
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 312cd17c26d60..22dfc46acfc0f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -1734,4 +1734,9 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
     val df = spark.createDataFrame(spark.sparkContext.makeRDD(rows), schema)
     assert(df.filter($"array1" === $"array2").count() == 1)
   }
+
+  test("SPARK-19691 Calculating percentile of decimal column fails with ClassCastException") {
+    val df = spark.range(1).selectExpr("CAST(id as DECIMAL) as x").selectExpr("percentile(x, 0.5)")
+    checkAnswer(df, Row(BigDecimal(0.0)) :: Nil)
+  }
 }

From 6da6a27f673f6e45fe619e0411fbaaa14ea34bfb Mon Sep 17 00:00:00 2001
From: jerryshao <sshao@hortonworks.com>
Date: Fri, 24 Feb 2017 09:28:59 -0800
Subject: [PATCH 0537/1204] [SPARK-19707][CORE] Improve the invalid path check
 for sc.addJar

## What changes were proposed in this pull request?

Currently in Spark there're two issues when we add jars with invalid path:

* If the jar path is a empty string {--jar ",dummy.jar"}, then Spark will resolve it to the current directory path and add to classpath / file server, which is unwanted. This is happened in our programatic way to submit Spark application. From my understanding Spark should defensively filter out such empty path.
* If the jar path is a invalid path (file doesn't exist), `addJar` doesn't check it and will still add to file server, the exception will be delayed until job running. Actually this local path could be checked beforehand, no need to wait until task running. We have similar check in `addFile`, but lacks similar similar mechanism in `addJar`.

## How was this patch tested?

Add unit test and local manual verification.

Author: jerryshao <sshao@hortonworks.com>

Closes #17038 from jerryshao/SPARK-19707.

(cherry picked from commit b0a8c16fecd4337f77bfbe4b45884254d7af52bd)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../scala/org/apache/spark/SparkContext.scala    | 12 ++++++++++--
 .../main/scala/org/apache/spark/util/Utils.scala |  2 +-
 .../org/apache/spark/SparkContextSuite.scala     | 16 ++++++++++++++++
 .../scala/org/apache/spark/util/UtilsSuite.scala |  1 +
 4 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 2db48f6f35f18..5ae9db7440cbd 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1727,10 +1727,18 @@ class SparkContext(config: SparkConf) extends Logging {
           // A JAR file which exists only on the driver node
           case null | "file" =>
             try {
+              val file = new File(uri.getPath)
+              if (!file.exists()) {
+                throw new FileNotFoundException(s"Jar ${file.getAbsolutePath} not found")
+              }
+              if (file.isDirectory) {
+                throw new IllegalArgumentException(
+                  s"Directory ${file.getAbsoluteFile} is not allowed for addJar")
+              }
               env.rpcEnv.fileServer.addJar(new File(uri.getPath))
             } catch {
-              case exc: FileNotFoundException =>
-                logError(s"Jar not found at $path")
+              case NonFatal(e) =>
+                logError(s"Failed to add $path to Spark environment", e)
                 null
             }
           // A JAR file which exists locally on every worker node
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 00b1b54f61a52..4cdfb9cbf39b7 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2016,7 +2016,7 @@ private[spark] object Utils extends Logging {
     if (paths == null || paths.trim.isEmpty) {
       ""
     } else {
-      paths.split(",").map { p => Utils.resolveURI(p) }.mkString(",")
+      paths.split(",").filter(_.trim.nonEmpty).map { p => Utils.resolveURI(p) }.mkString(",")
     }
   }
 
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
index c451c596b069a..a2d25d25009f8 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -289,6 +289,22 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext {
     }
   }
 
+  test("add jar with invalid path") {
+    val tmpDir = Utils.createTempDir()
+    val tmpJar = File.createTempFile("test", ".jar", tmpDir)
+
+    sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
+    sc.addJar(tmpJar.getAbsolutePath)
+
+    // Invaid jar path will only print the error log, will not add to file server.
+    sc.addJar("dummy.jar")
+    sc.addJar("")
+    sc.addJar(tmpDir.getAbsolutePath)
+
+    sc.listJars().size should be (1)
+    sc.listJars().head should include (tmpJar.getName)
+  }
+
   test("Cancelling job group should not cause SparkContext to shutdown (SPARK-6414)") {
     try {
       sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index feacfb7642f27..8706d721a8f2e 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -484,6 +484,7 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
       assertResolves("""hdfs:/jar1,file:/jar2,jar3,C:\pi.py#py.pi,C:\path to\jar4""",
         s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:/C:/pi.py#py.pi,file:/C:/path%20to/jar4")
     }
+    assertResolves(",jar1,jar2", s"file:$cwd/jar1,file:$cwd/jar2")
   }
 
   test("nonLocalPaths") {

From ed9aaa3147553b737b852995ece67d1121467d0c Mon Sep 17 00:00:00 2001
From: jerryshao <sshao@hortonworks.com>
Date: Fri, 24 Feb 2017 09:31:52 -0800
Subject: [PATCH 0538/1204] [SPARK-19038][YARN] Avoid overwriting keytab
 configuration in yarn-client

## What changes were proposed in this pull request?

Because yarn#client will reset the `spark.yarn.keytab` configuration to point to the location in distributed file, so if user still uses the old `SparkConf` to create `SparkSession` with Hive enabled, it will read keytab from the path in distributed cached. This is OK for yarn cluster mode, but in yarn client mode where driver is running out of container, it will be failed to fetch the keytab.

So here we should avoid reseting this configuration in the `yarn#client` and only overwriting it for AM, so using `spark.yarn.keytab` could get correct keytab path no matter running in client (keytab in local fs) or cluster (keytab in distributed cache) mode.

## How was this patch tested?

Verified in security cluster.

Author: jerryshao <sshao@hortonworks.com>

Closes #16923 from jerryshao/SPARK-19038.

(cherry picked from commit a920a4369434c84274866a09f61e402232c3b47c)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../apache/spark/sql/hive/client/HiveClientImpl.scala    | 4 ----
 .../main/scala/org/apache/spark/deploy/yarn/Client.scala | 9 ++++++---
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 9b3f29970e8ad..faf8a2b77ef73 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -105,10 +105,6 @@ private[hive] class HiveClientImpl(
 
     // Set up kerberos credentials for UserGroupInformation.loginUser within
     // current class loader
-    // Instead of using the spark conf of the current spark context, a new
-    // instance of SparkConf is needed for the original value of spark.yarn.keytab
-    // and spark.yarn.principal set in SparkSubmit, as yarn.Client resets the
-    // keytab configuration for the link name in distributed cache
     if (sparkConf.contains("spark.yarn.principal") && sparkConf.contains("spark.yarn.keytab")) {
       val principalName = sparkConf.get("spark.yarn.principal")
       val keytabFileName = sparkConf.get("spark.yarn.keytab")
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 8a0c3f2536d83..5280c420b9887 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -101,6 +101,7 @@ private[spark] class Client(
   private var principal: String = null
   private var keytab: String = null
   private var credentials: Credentials = null
+  private var amKeytabFileName: String = null
 
   private val launcherBackend = new LauncherBackend() {
     override def onStopRequest(): Unit = {
@@ -503,7 +504,7 @@ private[spark] class Client(
       logInfo("To enable the AM to login from keytab, credentials are being copied over to the AM" +
         " via the YARN Secure Distributed Cache.")
       val (_, localizedPath) = distribute(keytab,
-        destName = sparkConf.get(KEYTAB),
+        destName = Some(amKeytabFileName),
         appMasterOnly = true)
       require(localizedPath != null, "Keytab file already distributed.")
     }
@@ -740,6 +741,9 @@ private[spark] class Client(
       // Save Spark configuration to a file in the archive.
       val props = new Properties()
       sparkConf.getAll.foreach { case (k, v) => props.setProperty(k, v) }
+      // Override spark.yarn.key to point to the location in distributed cache which will be used
+      // by AM.
+      Option(amKeytabFileName).foreach { k => props.setProperty(KEYTAB.key, k) }
       confStream.putNextEntry(new ZipEntry(SPARK_CONF_FILE))
       val writer = new OutputStreamWriter(confStream, StandardCharsets.UTF_8)
       props.store(writer, "Spark configuration.")
@@ -1036,8 +1040,7 @@ private[spark] class Client(
       val f = new File(keytab)
       // Generate a file name that can be used for the keytab file, that does not conflict
       // with any user file.
-      val keytabFileName = f.getName + "-" + UUID.randomUUID().toString
-      sparkConf.set(KEYTAB.key, keytabFileName)
+      amKeytabFileName = f.getName + "-" + UUID.randomUUID().toString
       sparkConf.set(PRINCIPAL.key, principal)
     }
     // Defensive copy of the credentials

From 97866e198afe07824d041293849d9302e734d58f Mon Sep 17 00:00:00 2001
From: Boaz Mohar <boazmohar@gmail.com>
Date: Sat, 25 Feb 2017 11:32:09 -0800
Subject: [PATCH 0539/1204] [MINOR][DOCS] Fixes two problems in the SQL
 programing guide page

## What changes were proposed in this pull request?

Removed duplicated lines in sql python example and found a typo.

## How was this patch tested?

Searched for other typo's in the page to minimize PR's.

Author: Boaz Mohar <boazmohar@gmail.com>

Closes #17066 from boazmohar/doc-fix.

(cherry picked from commit 061bcfb869fe5f64edd9ee2352fecd70665da317)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 docs/sql-programming-guide.md         | 2 +-
 examples/src/main/python/sql/basic.py | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 2173aba763f8a..e72a0be148e1f 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -1355,7 +1355,7 @@ Thrift JDBC server also supports sending thrift RPC messages over HTTP transport
 Use the following setting to enable HTTP mode as system property or in `hive-site.xml` file in `conf/`:
 
     hive.server2.transport.mode - Set this to value: http
-    hive.server2.thrift.http.port - HTTP port number fo listen on; default is 10001
+    hive.server2.thrift.http.port - HTTP port number to listen on; default is 10001
     hive.server2.http.endpoint - HTTP endpoint; default is cliservice
 
 To test, use beeline to connect to the JDBC/ODBC server in http mode with:
diff --git a/examples/src/main/python/sql/basic.py b/examples/src/main/python/sql/basic.py
index ebcf66995b477..c07fa8f2752b3 100644
--- a/examples/src/main/python/sql/basic.py
+++ b/examples/src/main/python/sql/basic.py
@@ -187,9 +187,6 @@ def programmatic_schema_example(spark):
     # Creates a temporary view using the DataFrame
     schemaPeople.createOrReplaceTempView("people")
 
-    # Creates a temporary view using the DataFrame
-    schemaPeople.createOrReplaceTempView("people")
-
     # SQL can be run over DataFrames that have been registered as a table.
     results = spark.sql("SELECT name FROM people")
 

From 20a432951c6281bb6d6bf9252ad5a352fef00424 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Sat, 25 Feb 2017 20:03:27 -0800
Subject: [PATCH 0540/1204] [SPARK-14772][PYTHON][ML] Fixed Params.copy method
 to match Scala implementation

## What changes were proposed in this pull request?
Fixed the PySpark Params.copy method to behave like the Scala implementation.  The main issue was that it did not account for the _defaultParamMap and merged it into the explicitly created param map.

## How was this patch tested?
Added new unit test to verify the copy method behaves correctly for copying uid, explicitly created params, and default params.

Author: Bryan Cutler <cutlerb@gmail.com>

Closes #17048 from BryanCutler/pyspark-ml-param_copy-Scala_sync-SPARK-14772-2_1.
---
 python/pyspark/ml/param/__init__.py | 17 +++++++++++------
 python/pyspark/ml/tests.py          | 16 ++++++++++++++++
 2 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py
index ade4864e1d785..205b8d516a6f9 100644
--- a/python/pyspark/ml/param/__init__.py
+++ b/python/pyspark/ml/param/__init__.py
@@ -385,6 +385,7 @@ def copy(self, extra=None):
             extra = dict()
         that = copy.copy(self)
         that._paramMap = {}
+        that._defaultParamMap = {}
         return self._copyValues(that, extra)
 
     def _shouldOwn(self, param):
@@ -465,12 +466,16 @@ def _copyValues(self, to, extra=None):
         :param extra: extra params to be copied
         :return: the target instance with param values copied
         """
-        if extra is None:
-            extra = dict()
-        paramMap = self.extractParamMap(extra)
-        for p in self.params:
-            if p in paramMap and to.hasParam(p.name):
-                to._set(**{p.name: paramMap[p]})
+        paramMap = self._paramMap.copy()
+        if extra is not None:
+            paramMap.update(extra)
+        for param in self.params:
+            # copy default params
+            if param in self._defaultParamMap and to.hasParam(param.name):
+                to._defaultParamMap[to.getParam(param.name)] = self._defaultParamMap[param]
+            # copy explicitly set params
+            if param in paramMap and to.hasParam(param.name):
+                to._set(**{param.name: paramMap[param]})
         return to
 
     def _resetUid(self, newUid):
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 68f5bc30ac57f..46be031ee8ff1 100755
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -389,6 +389,22 @@ def test_word2vec_param(self):
         # Check windowSize is set properly
         self.assertEqual(model.getWindowSize(), 6)
 
+    def test_copy_param_extras(self):
+        tp = TestParams(seed=42)
+        extra = {tp.getParam(TestParams.inputCol.name): "copy_input"}
+        tp_copy = tp.copy(extra=extra)
+        self.assertEqual(tp.uid, tp_copy.uid)
+        self.assertEqual(tp.params, tp_copy.params)
+        for k, v in extra.items():
+            self.assertTrue(tp_copy.isDefined(k))
+            self.assertEqual(tp_copy.getOrDefault(k), v)
+        copied_no_extra = {}
+        for k, v in tp_copy._paramMap.items():
+            if k not in extra:
+                copied_no_extra[k] = v
+        self.assertEqual(tp._paramMap, copied_no_extra)
+        self.assertEqual(tp._defaultParamMap, tp_copy._defaultParamMap)
+
 
 class EvaluatorTests(SparkSessionTestCase):
 

From 04fbb9e0986ffdf61813eff7f0c36b1f0766f6df Mon Sep 17 00:00:00 2001
From: Eyal Zituny <eyal.zituny@equalum.io>
Date: Sun, 26 Feb 2017 15:57:32 -0800
Subject: [PATCH 0541/1204] [SPARK-19594][STRUCTURED STREAMING]
 StreamingQueryListener fails to handle QueryTerminatedEvent if more then one
 listeners exists

## What changes were proposed in this pull request?

currently if multiple streaming queries listeners exists, when a QueryTerminatedEvent is triggered, only one of the listeners will be invoked while the rest of the listeners will ignore the event.
this is caused since the the streaming queries listeners bus holds a set of running queries ids and when a termination event is triggered, after the first listeners is handling the event, the terminated query id is being removed from the set.
in this PR, the query id will be removed from the set only after all the listeners handles the event

## How was this patch tested?

a test with multiple listeners has been added to StreamingQueryListenerSuite

Author: Eyal Zituny <eyal.zituny@equalum.io>

Closes #16991 from eyalzit/master.

(cherry picked from commit 9f8e392159ba65decddf62eb3cd85b6821db01b4)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../org/apache/spark/util/ListenerBus.scala   |  2 +-
 .../streaming/StreamingQueryListenerBus.scala | 14 ++++++++++-
 .../StreamingQueryListenerSuite.scala         | 25 +++++++++++++++++++
 3 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/ListenerBus.scala b/core/src/main/scala/org/apache/spark/util/ListenerBus.scala
index 79fc2e94599c7..fa5ad4e8d81e1 100644
--- a/core/src/main/scala/org/apache/spark/util/ListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/util/ListenerBus.scala
@@ -52,7 +52,7 @@ private[spark] trait ListenerBus[L <: AnyRef, E] extends Logging {
    * Post the event to all registered listeners. The `postToAll` caller should guarantee calling
    * `postToAll` in the same thread for all events.
    */
-  final def postToAll(event: E): Unit = {
+  def postToAll(event: E): Unit = {
     // JavaConverters can create a JIterableWrapper if we use asScala.
     // However, this method will be called frequently. To avoid the wrapper cost, here we use
     // Java Iterator directly.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala
index a2153d27e9fef..4207013c3f75d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala
@@ -75,6 +75,19 @@ class StreamingQueryListenerBus(sparkListenerBus: LiveListenerBus)
     }
   }
 
+  /**
+   * Override the parent `postToAll` to remove the query id from `activeQueryRunIds` after all
+   * the listeners process `QueryTerminatedEvent`. (SPARK-19594)
+   */
+  override def postToAll(event: Event): Unit = {
+    super.postToAll(event)
+    event match {
+      case t: QueryTerminatedEvent =>
+        activeQueryRunIds.synchronized { activeQueryRunIds -= t.runId }
+      case _ =>
+    }
+  }
+
   override def onOtherEvent(event: SparkListenerEvent): Unit = {
     event match {
       case e: StreamingQueryListener.Event =>
@@ -112,7 +125,6 @@ class StreamingQueryListenerBus(sparkListenerBus: LiveListenerBus)
       case queryTerminated: QueryTerminatedEvent =>
         if (shouldReport(queryTerminated.runId)) {
           listener.onQueryTerminated(queryTerminated)
-          activeQueryRunIds.synchronized { activeQueryRunIds -= queryTerminated.runId }
         }
       case _ =>
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
index 4596aa1d348e3..eb09b9ffcfc5d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
@@ -133,6 +133,31 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
     }
   }
 
+  test("SPARK-19594: all of listeners should receive QueryTerminatedEvent") {
+    val df = MemoryStream[Int].toDS().as[Long]
+    val listeners = (1 to 5).map(_ => new EventCollector)
+    try {
+      listeners.foreach(listener => spark.streams.addListener(listener))
+      testStream(df, OutputMode.Append)(
+        StartStream(),
+        StopStream,
+        AssertOnQuery { query =>
+          eventually(Timeout(streamingTimeout)) {
+            listeners.foreach(listener => assert(listener.terminationEvent !== null))
+            listeners.foreach(listener => assert(listener.terminationEvent.id === query.id))
+            listeners.foreach(listener => assert(listener.terminationEvent.runId === query.runId))
+            listeners.foreach(listener => assert(listener.terminationEvent.exception === None))
+          }
+          listeners.foreach(listener => listener.checkAsyncErrors())
+          listeners.foreach(listener => listener.reset())
+          true
+        }
+      )
+    } finally {
+      listeners.foreach(spark.streams.removeListener)
+    }
+  }
+
   test("adding and removing listener") {
     def isListenerActive(listener: EventCollector): Boolean = {
       listener.reset()

From 4b4c3bf3f78635d53ff983eabe37a4032947b499 Mon Sep 17 00:00:00 2001
From: windpiger <songjun@outlook.com>
Date: Tue, 28 Feb 2017 00:16:49 -0800
Subject: [PATCH 0542/1204] [SPARK-19748][SQL] refresh function has a wrong
 order to do cache invalidate and regenerate the inmemory var for
 InMemoryFileIndex with FileStatusCache

## What changes were proposed in this pull request?

If we refresh a InMemoryFileIndex with a FileStatusCache, it will first use the FileStatusCache to re-generate the cachedLeafFiles etc, then call FileStatusCache.invalidateAll.

While the order to do these two actions is wrong, this lead to the refresh action does not take effect.

```
  override def refresh(): Unit = {
    refresh0()
    fileStatusCache.invalidateAll()
  }

  private def refresh0(): Unit = {
    val files = listLeafFiles(rootPaths)
    cachedLeafFiles =
      new mutable.LinkedHashMap[Path, FileStatus]() ++= files.map(f => f.getPath -> f)
    cachedLeafDirToChildrenFiles = files.toArray.groupBy(_.getPath.getParent)
    cachedPartitionSpec = null
  }
```
## How was this patch tested?
unit test added

Author: windpiger <songjun@outlook.com>

Closes #17079 from windpiger/fixInMemoryFileIndexRefresh.

(cherry picked from commit a350bc16d36c58b48ac01f0258678ffcdb77e793)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../datasources/InMemoryFileIndex.scala       |  2 +-
 .../datasources/FileIndexSuite.scala          | 26 +++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
index 7531f0ae02e75..ee4d0863d9771 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
@@ -66,8 +66,8 @@ class InMemoryFileIndex(
   }
 
   override def refresh(): Unit = {
-    refresh0()
     fileStatusCache.invalidateAll()
+    refresh0()
   }
 
   private def refresh0(): Unit = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
index b7a472b7f0919..c638f5f7d3066 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
@@ -177,6 +177,32 @@ class FileIndexSuite extends SharedSQLContext {
       assert(catalog2.allFiles().nonEmpty)
     }
   }
+
+  test("refresh for InMemoryFileIndex with FileStatusCache") {
+    withTempDir { dir =>
+      val fileStatusCache = FileStatusCache.getOrCreate(spark)
+      val dirPath = new Path(dir.getAbsolutePath)
+      val fs = dirPath.getFileSystem(spark.sessionState.newHadoopConf())
+      val catalog =
+        new InMemoryFileIndex(spark, Seq(dirPath), Map.empty, None, fileStatusCache) {
+          def leafFilePaths: Seq[Path] = leafFiles.keys.toSeq
+          def leafDirPaths: Seq[Path] = leafDirToChildrenFiles.keys.toSeq
+        }
+
+      val file = new File(dir, "text.txt")
+      stringToFile(file, "text")
+      assert(catalog.leafDirPaths.isEmpty)
+      assert(catalog.leafFilePaths.isEmpty)
+
+      catalog.refresh()
+
+      assert(catalog.leafFilePaths.size == 1)
+      assert(catalog.leafFilePaths.head == fs.makeQualified(new Path(file.getAbsolutePath)))
+
+      assert(catalog.leafDirPaths.size == 1)
+      assert(catalog.leafDirPaths.head == fs.makeQualified(dirPath))
+    }
+  }
 }
 
 class FakeParentPathFileSystem extends RawLocalFileSystem {

From 947c0cd901a75e110ea3c1767a54a22b8d033972 Mon Sep 17 00:00:00 2001
From: Roberto Agostino Vitillo <ra.vitillo@gmail.com>
Date: Tue, 28 Feb 2017 10:49:07 -0800
Subject: [PATCH 0543/1204] [SPARK-19677][SS] Committing a delta file atop an
 existing one should not fail on HDFS

## What changes were proposed in this pull request?

HDFSBackedStateStoreProvider fails to rename files on HDFS but not on the local filesystem. According to the [implementation notes](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/filesystem/filesystem.html) of `rename()`, the behavior of the local filesystem and HDFS varies:

> Destination exists and is a file
> Renaming a file atop an existing file is specified as failing, raising an exception.
>    - Local FileSystem : the rename succeeds; the destination file is replaced by the source file.
>    - HDFS : The rename fails, no exception is raised. Instead the method call simply returns false.

This patch ensures that `rename()` isn't called if the destination file already exists. It's still semantically correct because Structured Streaming requires that rerunning a batch should generate the same output.

## How was this patch tested?

This patch was tested by running `StateStoreSuite`.

Author: Roberto Agostino Vitillo <ra.vitillo@gmail.com>

Closes #17012 from vitillo/fix_rename.

(cherry picked from commit 9734a928a75d29ea202e9f309f92ca4637d35671)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../state/HDFSBackedStateStoreProvider.scala  | 11 ++++++-
 .../streaming/state/StateStoreSuite.scala     | 31 ++++++++++++++-----
 2 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
index 61eb601a18c32..2d29940eb8dab 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
@@ -274,7 +274,16 @@ private[state] class HDFSBackedStateStoreProvider(
   private def commitUpdates(newVersion: Long, map: MapType, tempDeltaFile: Path): Path = {
     synchronized {
       val finalDeltaFile = deltaFile(newVersion)
-      if (!fs.rename(tempDeltaFile, finalDeltaFile)) {
+
+      // scalastyle:off
+      // Renaming a file atop an existing one fails on HDFS
+      // (http://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/filesystem/filesystem.html).
+      // Hence we should either skip the rename step or delete the target file. Because deleting the
+      // target file will break speculation, skipping the rename step is the only choice. It's still
+      // semantically correct because Structured Streaming requires rerunning a batch should
+      // generate the same output. (SPARK-19677)
+      // scalastyle:on
+      if (!fs.exists(finalDeltaFile) && !fs.rename(tempDeltaFile, finalDeltaFile)) {
         throw new IOException(s"Failed to rename $tempDeltaFile to $finalDeltaFile")
       }
       loadedMaps.put(newVersion, map)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
index 4863a4cbcf4f5..21a0a10e6deab 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
@@ -210,13 +210,6 @@ class StateStoreSuite extends SparkFunSuite with BeforeAndAfter with PrivateMeth
     assert(store1.commit() === 2)
     assert(rowsToSet(store1.iterator()) === Set("a" -> 1, "b" -> 1))
     assert(getDataFromFiles(provider) === Set("a" -> 1, "b" -> 1))
-
-    // Overwrite the version with other data
-    val store2 = provider.getStore(1)
-    put(store2, "c", 1)
-    assert(store2.commit() === 2)
-    assert(rowsToSet(store2.iterator()) === Set("a" -> 1, "c" -> 1))
-    assert(getDataFromFiles(provider) === Set("a" -> 1, "c" -> 1))
   }
 
   test("snapshotting") {
@@ -292,6 +285,15 @@ class StateStoreSuite extends SparkFunSuite with BeforeAndAfter with PrivateMeth
     assert(getDataFromFiles(provider, 19) === Set("a" -> 19))
   }
 
+  test("SPARK-19677: Committing a delta file atop an existing one should not fail on HDFS") {
+    val conf = new Configuration()
+    conf.set("fs.fake.impl", classOf[RenameLikeHDFSFileSystem].getName)
+    conf.set("fs.default.name", "fake:///")
+
+    val provider = newStoreProvider(hadoopConf = conf)
+    provider.getStore(0).commit()
+    provider.getStore(0).commit()
+  }
 
   test("corrupted file handling") {
     val provider = newStoreProvider(minDeltasForSnapshot = 5)
@@ -681,6 +683,21 @@ private[state] object StateStoreSuite {
   }
 }
 
+/**
+ * Fake FileSystem that simulates HDFS rename semantic, i.e. renaming a file atop an existing
+ * one should return false.
+ * See hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/filesystem/filesystem.html
+ */
+class RenameLikeHDFSFileSystem extends RawLocalFileSystem {
+  override def rename(src: Path, dst: Path): Boolean = {
+    if (exists(dst)) {
+      return false
+    } else {
+      return super.rename(src, dst)
+    }
+  }
+}
+
 /**
  * Fake FileSystem to test that the StateStore throws an exception while committing the
  * delta file, when `fs.rename` returns `false`.

From d887f758152be4d6e089066a97b1eab817d3be83 Mon Sep 17 00:00:00 2001
From: Michael McCune <msm@redhat.com>
Date: Wed, 1 Mar 2017 00:07:16 +0100
Subject: [PATCH 0544/1204] [SPARK-19769][DOCS] Update quickstart instructions

## What changes were proposed in this pull request?

This change addresses the renaming of the `simple.sbt` build file to
`build.sbt`. Newer versions of the sbt tool are not finding the older
named file and are looking for the `build.sbt`. The quickstart
instructions for self-contained applications is updated with this
change.

## How was this patch tested?

As this is a relatively minor change of a few words, the markdown was checked for syntax and spelling. Site was built with `SKIP_API=1 jekyll serve` for testing purposes.

Author: Michael McCune <msm@redhat.com>

Closes #17101 from elmiko/spark-19769.

(cherry picked from commit bf5987cbe6c9f4a1a91d912ed3a9098111632d1a)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/quick-start.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/quick-start.md b/docs/quick-start.md
index 0836c602feafa..478bdcf6bab1e 100644
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -260,7 +260,7 @@ object which contains information about our
 application. 
 
 Our application depends on the Spark API, so we'll also include an sbt configuration file, 
-`simple.sbt`, which explains that Spark is a dependency. This file also adds a repository that 
+`build.sbt`, which explains that Spark is a dependency. This file also adds a repository that 
 Spark depends on:
 
 {% highlight scala %}
@@ -273,7 +273,7 @@ scalaVersion := "{{site.SCALA_VERSION}}"
 libraryDependencies += "org.apache.spark" %% "spark-core" % "{{site.SPARK_VERSION}}"
 {% endhighlight %}
 
-For sbt to work correctly, we'll need to layout `SimpleApp.scala` and `simple.sbt`
+For sbt to work correctly, we'll need to layout `SimpleApp.scala` and `build.sbt`
 according to the typical directory structure. Once that is in place, we can create a JAR package
 containing the application's code, then use the `spark-submit` script to run our program.
 
@@ -281,7 +281,7 @@ containing the application's code, then use the `spark-submit` script to run our
 # Your directory layout should look like this
 $ find .
 .
-./simple.sbt
+./build.sbt
 ./src
 ./src/main
 ./src/main/scala

From f719cccdc46247d7d86a99a1eb177522d4a657ae Mon Sep 17 00:00:00 2001
From: Jeff Zhang <zjffdu@apache.org>
Date: Tue, 28 Feb 2017 22:21:29 -0800
Subject: [PATCH 0545/1204] [SPARK-19572][SPARKR] Allow to disable hive in
 sparkR shell

## What changes were proposed in this pull request?
SPARK-15236 do this for scala shell, this ticket is for sparkR shell. This is not only for sparkR itself, but can also benefit downstream project like livy which use shell.R for its interactive session. For now, livy has no control of whether enable hive or not.

## How was this patch tested?

Tested it manually, run `bin/sparkR --master local --conf spark.sql.catalogImplementation=in-memory` and verify hive is not enabled.

Author: Jeff Zhang <zjffdu@apache.org>

Closes #16907 from zjffdu/SPARK-19572.

(cherry picked from commit 7315880568fd07d4dfb9f76d538f220e9d320c6f)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 .../main/scala/org/apache/spark/sql/api/r/SQLUtils.scala    | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
index e56c33e4b512f..a4c5bf756cd5a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
@@ -47,12 +47,14 @@ private[sql] object SQLUtils extends Logging {
       jsc: JavaSparkContext,
       sparkConfigMap: JMap[Object, Object],
       enableHiveSupport: Boolean): SparkSession = {
-    val spark = if (SparkSession.hiveClassesArePresent && enableHiveSupport) {
+    val spark = if (SparkSession.hiveClassesArePresent && enableHiveSupport
+        && jsc.sc.conf.get(CATALOG_IMPLEMENTATION.key, "hive").toLowerCase == "hive") {
       SparkSession.builder().sparkContext(withHiveExternalCatalog(jsc.sc)).getOrCreate()
     } else {
       if (enableHiveSupport) {
         logWarning("SparkR: enableHiveSupport is requested for SparkSession but " +
-          "Spark is not built with Hive; falling back to without Hive support.")
+          s"Spark is not built with Hive or ${CATALOG_IMPLEMENTATION.key} is not set to 'hive', " +
+          "falling back to without Hive support.")
       }
       SparkSession.builder().sparkContext(jsc.sc).getOrCreate()
     }

From bbe0d8caa88cfe5e3cde80b85898a198d785370d Mon Sep 17 00:00:00 2001
From: Stan Zhai <zhaishidan@haizhi.com>
Date: Wed, 1 Mar 2017 07:52:35 -0800
Subject: [PATCH 0546/1204] [SPARK-19766][SQL] Constant alias columns in INNER
 JOIN should not be folded by FoldablePropagation rule

## What changes were proposed in this pull request?
This PR fixes the code in Optimizer phase where the constant alias columns of a `INNER JOIN` query are folded in Rule `FoldablePropagation`.

For the following query():

```
val sqlA =
  """
    |create temporary view ta as
    |select a, 'a' as tag from t1 union all
    |select a, 'b' as tag from t2
  """.stripMargin

val sqlB =
  """
    |create temporary view tb as
    |select a, 'a' as tag from t3 union all
    |select a, 'b' as tag from t4
  """.stripMargin

val sql =
  """
    |select tb.* from ta inner join tb on
    |ta.a = tb.a and
    |ta.tag = tb.tag
  """.stripMargin
```

The tag column is an constant alias column, it's folded by `FoldablePropagation` like this:

```
TRACE SparkOptimizer:
=== Applying Rule org.apache.spark.sql.catalyst.optimizer.FoldablePropagation ===
 Project [a#4, tag#14]                              Project [a#4, tag#14]
!+- Join Inner, ((a#0 = a#4) && (tag#8 = tag#14))   +- Join Inner, ((a#0 = a#4) && (a = a))
    :- Union                                           :- Union
    :  :- Project [a#0, a AS tag#8]                    :  :- Project [a#0, a AS tag#8]
    :  :  +- LocalRelation [a#0]                       :  :  +- LocalRelation [a#0]
    :  +- Project [a#2, b AS tag#9]                    :  +- Project [a#2, b AS tag#9]
    :     +- LocalRelation [a#2]                       :     +- LocalRelation [a#2]
    +- Union                                           +- Union
       :- Project [a#4, a AS tag#14]                      :- Project [a#4, a AS tag#14]
       :  +- LocalRelation [a#4]                          :  +- LocalRelation [a#4]
       +- Project [a#6, b AS tag#15]                      +- Project [a#6, b AS tag#15]
          +- LocalRelation [a#6]                             +- LocalRelation [a#6]
```

Finally the Result of Batch Operator Optimizations is:

```
Project [a#4, tag#14]                              Project [a#4, tag#14]
!+- Join Inner, ((a#0 = a#4) && (tag#8 = tag#14))   +- Join Inner, (a#0 = a#4)
!   :- SubqueryAlias ta, `ta`                          :- Union
!   :  +- Union                                        :  :- LocalRelation [a#0]
!   :     :- Project [a#0, a AS tag#8]                 :  +- LocalRelation [a#2]
!   :     :  +- SubqueryAlias t1, `t1`                 +- Union
!   :     :     +- Project [a#0]                          :- LocalRelation [a#4, tag#14]
!   :     :        +- SubqueryAlias grouping              +- LocalRelation [a#6, tag#15]
!   :     :           +- LocalRelation [a#0]
!   :     +- Project [a#2, b AS tag#9]
!   :        +- SubqueryAlias t2, `t2`
!   :           +- Project [a#2]
!   :              +- SubqueryAlias grouping
!   :                 +- LocalRelation [a#2]
!   +- SubqueryAlias tb, `tb`
!      +- Union
!         :- Project [a#4, a AS tag#14]
!         :  +- SubqueryAlias t3, `t3`
!         :     +- Project [a#4]
!         :        +- SubqueryAlias grouping
!         :           +- LocalRelation [a#4]
!         +- Project [a#6, b AS tag#15]
!            +- SubqueryAlias t4, `t4`
!               +- Project [a#6]
!                  +- SubqueryAlias grouping
!                     +- LocalRelation [a#6]
```

The condition `tag#8 = tag#14` of INNER JOIN has been removed. This leads to the data of inner join being wrong.

After fix:

```
=== Result of Batch LocalRelation ===
 GlobalLimit 21                                           GlobalLimit 21
 +- LocalLimit 21                                         +- LocalLimit 21
    +- Project [a#4, tag#11]                                 +- Project [a#4, tag#11]
       +- Join Inner, ((a#0 = a#4) && (tag#8 = tag#11))         +- Join Inner, ((a#0 = a#4) && (tag#8 = tag#11))
!         :- SubqueryAlias ta                                      :- Union
!         :  +- Union                                              :  :- LocalRelation [a#0, tag#8]
!         :     :- Project [a#0, a AS tag#8]                       :  +- LocalRelation [a#2, tag#9]
!         :     :  +- SubqueryAlias t1                             +- Union
!         :     :     +- Project [a#0]                                :- LocalRelation [a#4, tag#11]
!         :     :        +- SubqueryAlias grouping                    +- LocalRelation [a#6, tag#12]
!         :     :           +- LocalRelation [a#0]
!         :     +- Project [a#2, b AS tag#9]
!         :        +- SubqueryAlias t2
!         :           +- Project [a#2]
!         :              +- SubqueryAlias grouping
!         :                 +- LocalRelation [a#2]
!         +- SubqueryAlias tb
!            +- Union
!               :- Project [a#4, a AS tag#11]
!               :  +- SubqueryAlias t3
!               :     +- Project [a#4]
!               :        +- SubqueryAlias grouping
!               :           +- LocalRelation [a#4]
!               +- Project [a#6, b AS tag#12]
!                  +- SubqueryAlias t4
!                     +- Project [a#6]
!                        +- SubqueryAlias grouping
!                           +- LocalRelation [a#6]
```

## How was this patch tested?

add sql-tests/inputs/inner-join.sql
All tests passed.

Author: Stan Zhai <zhaishidan@haizhi.com>

Closes #17099 from stanzhai/fix-inner-join.

(cherry picked from commit 5502a9cf883b2058209904c152e5d2c2a106b072)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../sql/catalyst/optimizer/expressions.scala  |  2 +-
 .../optimizer/FoldablePropagationSuite.scala  | 14 ++++
 .../resources/sql-tests/inputs/inner-join.sql | 17 +++++
 .../sql-tests/results/inner-join.sql.out      | 68 +++++++++++++++++++
 4 files changed, 100 insertions(+), 1 deletion(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/inner-join.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/results/inner-join.sql.out

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 949ccdcb458cd..243bb592f5b80 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -451,7 +451,7 @@ object FoldablePropagation extends Rule[LogicalPlan] {
         // join is not always picked from its children, but can also be null.
         // TODO(cloud-fan): It seems more reasonable to use new attributes as the output attributes
         // of outer join.
-        case j @ Join(_, _, Inner, _) =>
+        case j @ Join(_, _, Inner, _) if !stop =>
           j.transformExpressions(replaceFoldable)
 
         // We can fold the projections an expand holds. However expand changes the output columns
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
index 82756f545a8c7..d128315b68869 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
@@ -130,6 +130,20 @@ class FoldablePropagationSuite extends PlanTest {
     comparePlans(optimized, correctAnswer)
   }
 
+  test("Propagate in inner join") {
+    val ta = testRelation.select('a, Literal(1).as('tag))
+      .union(testRelation.select('a, Literal(2).as('tag)))
+      .subquery('ta)
+    val tb = testRelation.select('a, Literal(1).as('tag))
+      .union(testRelation.select('a, Literal(2).as('tag)))
+      .subquery('tb)
+    val query = ta.join(tb, Inner,
+      Some("ta.a".attr === "tb.a".attr && "ta.tag".attr === "tb.tag".attr))
+    val optimized = Optimize.execute(query.analyze)
+    val correctAnswer = query.analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
   test("Propagate in expand") {
     val c1 = Literal(1).as('a)
     val c2 = Literal(2).as('b)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/inner-join.sql b/sql/core/src/test/resources/sql-tests/inputs/inner-join.sql
new file mode 100644
index 0000000000000..38739cb950582
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/inner-join.sql
@@ -0,0 +1,17 @@
+CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1) AS GROUPING(a);
+CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (1) AS GROUPING(a);
+CREATE TEMPORARY VIEW t3 AS SELECT * FROM VALUES (1), (1) AS GROUPING(a);
+CREATE TEMPORARY VIEW t4 AS SELECT * FROM VALUES (1), (1) AS GROUPING(a);
+
+CREATE TEMPORARY VIEW ta AS
+SELECT a, 'a' AS tag FROM t1
+UNION ALL
+SELECT a, 'b' AS tag FROM t2;
+
+CREATE TEMPORARY VIEW tb AS
+SELECT a, 'a' AS tag FROM t3
+UNION ALL
+SELECT a, 'b' AS tag FROM t4;
+
+-- SPARK-19766 Constant alias columns in INNER JOIN should not be folded by FoldablePropagation rule
+SELECT tb.* FROM ta INNER JOIN tb ON ta.a = tb.a AND ta.tag = tb.tag;
diff --git a/sql/core/src/test/resources/sql-tests/results/inner-join.sql.out b/sql/core/src/test/resources/sql-tests/results/inner-join.sql.out
new file mode 100644
index 0000000000000..aa20537d449e3
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/inner-join.sql.out
@@ -0,0 +1,68 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 13
+
+
+-- !query 0
+CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1) AS GROUPING(a)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (1) AS GROUPING(a)
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+CREATE TEMPORARY VIEW t3 AS SELECT * FROM VALUES (1), (1) AS GROUPING(a)
+-- !query 2 schema
+struct<>
+-- !query 2 output
+
+
+
+-- !query 3
+CREATE TEMPORARY VIEW t4 AS SELECT * FROM VALUES (1), (1) AS GROUPING(a)
+-- !query 3 schema
+struct<>
+-- !query 3 output
+
+
+
+-- !query 4
+CREATE TEMPORARY VIEW ta AS
+SELECT a, 'a' AS tag FROM t1
+UNION ALL
+SELECT a, 'b' AS tag FROM t2
+-- !query 4 schema
+struct<>
+-- !query 4 output
+
+
+
+-- !query 5
+CREATE TEMPORARY VIEW tb AS
+SELECT a, 'a' AS tag FROM t3
+UNION ALL
+SELECT a, 'b' AS tag FROM t4
+-- !query 5 schema
+struct<>
+-- !query 5 output
+
+
+
+-- !query 6
+SELECT tb.* FROM ta INNER JOIN tb ON ta.a = tb.a AND ta.tag = tb.tag
+-- !query 6 schema
+struct<a:int,tag:string>
+-- !query 6 output
+1	a
+1	a
+1	b
+1	b
+

From 27347b5f26f668783d8ded89149a5e761b67f786 Mon Sep 17 00:00:00 2001
From: Michael Gummelt <mgummelt@mesosphere.io>
Date: Thu, 2 Mar 2017 00:32:32 +0100
Subject: [PATCH 0547/1204] =?UTF-8?q?[SPARK-19373][MESOS]=20Base=20spark.s?=
 =?UTF-8?q?cheduler.minRegisteredResourceRatio=20=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

…on registered cores rather than accepted cores

See JIRA

Unit tests, Mesos/Spark integration tests

cc skonto susanxhuynh

Author: Michael Gummelt <mgummeltmesosphere.io>

Closes #17045 from mgummelt/SPARK-19373-registered-resources.

## What changes were proposed in this pull request?

(Please fill in changes proposed in this fix)

## How was this patch tested?

(Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests)
(If this patch involves UI changes, please attach a screenshot; otherwise, remove this)

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Michael Gummelt <mgummelt@mesosphere.io>

Closes #17129 from mgummelt/SPARK-19373-registered-resources-2.1.
---
 .../MesosCoarseGrainedSchedulerBackend.scala  |  27 +++--
 ...osCoarseGrainedSchedulerBackendSuite.scala | 111 +++++++++---------
 2 files changed, 70 insertions(+), 68 deletions(-)

diff --git a/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala b/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
index 5063c1fe988bc..22df2b1db284e 100644
--- a/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
+++ b/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
@@ -54,14 +54,17 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
   with org.apache.mesos.Scheduler
   with MesosSchedulerUtils {
 
-  val MAX_SLAVE_FAILURES = 2     // Blacklist a slave after this many failures
+  // Blacklist a slave after this many failures
+  private val MAX_SLAVE_FAILURES = 2
 
-  // Maximum number of cores to acquire (TODO: we'll need more flexible controls here)
-  val maxCores = conf.get("spark.cores.max", Int.MaxValue.toString).toInt
+  private val maxCoresOption = conf.getOption("spark.cores.max").map(_.toInt)
 
-  val useFetcherCache = conf.getBoolean("spark.mesos.fetcherCache.enable", false)
+  // Maximum number of cores to acquire
+  private val maxCores = maxCoresOption.getOrElse(Int.MaxValue)
 
-  val maxGpus = conf.getInt("spark.mesos.gpus.max", 0)
+  private val useFetcherCache = conf.getBoolean("spark.mesos.fetcherCache.enable", false)
+
+  private val maxGpus = conf.getInt("spark.mesos.gpus.max", 0)
 
   private[this] val shutdownTimeoutMS =
     conf.getTimeAsMs("spark.mesos.coarse.shutdownTimeout", "10s")
@@ -75,10 +78,10 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
   private val shuffleServiceEnabled = conf.getBoolean("spark.shuffle.service.enabled", false)
 
   // Cores we have acquired with each Mesos task ID
-  val coresByTaskId = new mutable.HashMap[String, Int]
-  val gpusByTaskId = new mutable.HashMap[String, Int]
-  var totalCoresAcquired = 0
-  var totalGpusAcquired = 0
+  private val coresByTaskId = new mutable.HashMap[String, Int]
+  private val gpusByTaskId = new mutable.HashMap[String, Int]
+  private var totalCoresAcquired = 0
+  private var totalGpusAcquired = 0
 
   // SlaveID -> Slave
   // This map accumulates entries for the duration of the job.  Slaves are never deleted, because
@@ -108,7 +111,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
   // may lead to deadlocks since the superclass might also try to lock
   private val stateLock = new ReentrantLock
 
-  val extraCoresPerExecutor = conf.getInt("spark.mesos.extra.cores", 0)
+  private val extraCoresPerExecutor = conf.getInt("spark.mesos.extra.cores", 0)
 
   // Offer constraints
   private val slaveOfferConstraints =
@@ -140,7 +143,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
       securityManager.isSaslEncryptionEnabled())
   }
 
-  var nextMesosTaskId = 0
+  private var nextMesosTaskId = 0
 
   @volatile var appId: String = _
 
@@ -257,7 +260,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
   }
 
   override def sufficientResourcesRegistered(): Boolean = {
-    totalCoresAcquired >= maxCores * minRegisteredRatio
+    totalCoreCount.get >= maxCoresOption.getOrElse(0) * minRegisteredRatio
   }
 
   override def disconnected(d: org.apache.mesos.SchedulerDriver) {}
diff --git a/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala b/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
index f73638fda6232..f96d65338b79c 100644
--- a/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
+++ b/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
@@ -20,9 +20,7 @@ package org.apache.spark.scheduler.cluster.mesos
 import java.util.concurrent.TimeUnit
 
 import scala.collection.JavaConverters._
-import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.duration._
-import scala.concurrent.Promise
 import scala.reflect.ClassTag
 
 import org.apache.mesos.{Protos, Scheduler, SchedulerDriver}
@@ -37,8 +35,8 @@ import org.scalatest.BeforeAndAfter
 import org.apache.spark.{LocalSparkContext, SecurityManager, SparkConf, SparkContext, SparkFunSuite}
 import org.apache.spark.internal.config._
 import org.apache.spark.network.shuffle.mesos.MesosExternalShuffleClient
-import org.apache.spark.rpc.RpcEndpointRef
-import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.RemoveExecutor
+import org.apache.spark.rpc.{RpcAddress, RpcEndpointRef}
+import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.{RegisterExecutor, RemoveExecutor}
 import org.apache.spark.scheduler.TaskSchedulerImpl
 import org.apache.spark.scheduler.cluster.mesos.Utils._
 
@@ -304,25 +302,29 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
   }
 
   test("weburi is set in created scheduler driver") {
-    setBackend()
+    initializeSparkConf()
+    sc = new SparkContext(sparkConf)
+
     val taskScheduler = mock[TaskSchedulerImpl]
     when(taskScheduler.sc).thenReturn(sc)
+
     val driver = mock[SchedulerDriver]
     when(driver.start()).thenReturn(Protos.Status.DRIVER_RUNNING)
+
     val securityManager = mock[SecurityManager]
 
     val backend = new MesosCoarseGrainedSchedulerBackend(
-        taskScheduler, sc, "master", securityManager) {
+      taskScheduler, sc, "master", securityManager) {
       override protected def createSchedulerDriver(
-        masterUrl: String,
-        scheduler: Scheduler,
-        sparkUser: String,
-        appName: String,
-        conf: SparkConf,
-        webuiUrl: Option[String] = None,
-        checkpoint: Option[Boolean] = None,
-        failoverTimeout: Option[Double] = None,
-        frameworkId: Option[String] = None): SchedulerDriver = {
+          masterUrl: String,
+          scheduler: Scheduler,
+          sparkUser: String,
+          appName: String,
+          conf: SparkConf,
+          webuiUrl: Option[String] = None,
+          checkpoint: Option[Boolean] = None,
+          failoverTimeout: Option[Double] = None,
+          frameworkId: Option[String] = None): SchedulerDriver = {
         markRegistered()
         assert(webuiUrl.isDefined)
         assert(webuiUrl.get.equals("http://webui"))
@@ -422,37 +424,11 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
     assert(!dockerInfo.getForcePullImage)
   }
 
-  test("Do not call removeExecutor() after backend is stopped") {
-    setBackend()
-
-    // launches a task on a valid offer
-    val offers = List(Resources(backend.executorMemory(sc), 1))
-    offerResources(offers)
-    verifyTaskLaunched(driver, "o1")
-
-    // launches a thread simulating status update
-    val statusUpdateThread = new Thread {
-      override def run(): Unit = {
-        while (!stopCalled) {
-          Thread.sleep(100)
-        }
-
-        val status = createTaskStatus("0", "s1", TaskState.TASK_FINISHED)
-        backend.statusUpdate(driver, status)
-      }
-    }.start
-
-    backend.stop()
-    // Any method of the backend involving sending messages to the driver endpoint should not
-    // be called after the backend is stopped.
-    verify(driverEndpoint, never()).askWithRetry(isA(classOf[RemoveExecutor]))(any[ClassTag[_]])
-  }
-
   test("mesos supports spark.executor.uri") {
     val url = "spark.spark.spark.com"
     setBackend(Map(
       "spark.executor.uri" -> url
-    ), false)
+    ), null)
 
     val (mem, cpu) = (backend.executorMemory(sc), 4)
 
@@ -468,7 +444,7 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
     setBackend(Map(
       "spark.mesos.fetcherCache.enable" -> "true",
       "spark.executor.uri" -> url
-    ), false)
+    ), null)
     val offers = List(Resources(backend.executorMemory(sc), 1))
     offerResources(offers)
     val launchedTasks = verifyTaskLaunched(driver, "o1")
@@ -482,7 +458,7 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
     setBackend(Map(
       "spark.mesos.fetcherCache.enable" -> "false",
       "spark.executor.uri" -> url
-    ), false)
+    ), null)
     val offers = List(Resources(backend.executorMemory(sc), 1))
     offerResources(offers)
     val launchedTasks = verifyTaskLaunched(driver, "o1")
@@ -491,8 +467,31 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
     assert(!uris.asScala.head.getCache)
   }
 
+  test("supports spark.scheduler.minRegisteredResourcesRatio") {
+    val expectedCores = 1
+    setBackend(Map(
+      "spark.cores.max" -> expectedCores.toString,
+      "spark.scheduler.minRegisteredResourcesRatio" -> "1.0"))
+
+    val offers = List(Resources(backend.executorMemory(sc), expectedCores))
+    offerResources(offers)
+    val launchedTasks = verifyTaskLaunched(driver, "o1")
+    assert(!backend.isReady)
+
+    registerMockExecutor(launchedTasks(0).getTaskId.getValue, "s1", expectedCores)
+    assert(backend.isReady)
+  }
+
   private case class Resources(mem: Int, cpus: Int, gpus: Int = 0)
 
+  private def registerMockExecutor(executorId: String, slaveId: String, cores: Integer) = {
+    val mockEndpointRef = mock[RpcEndpointRef]
+    val mockAddress = mock[RpcAddress]
+    val message = RegisterExecutor(executorId, mockEndpointRef, slaveId, cores, Map.empty)
+
+    backend.driverEndpoint.askWithRetry[Boolean](message)
+  }
+
   private def verifyDeclinedOffer(driver: SchedulerDriver,
       offerId: OfferID,
       filter: Boolean = false): Unit = {
@@ -521,8 +520,7 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
   private def createSchedulerBackend(
       taskScheduler: TaskSchedulerImpl,
       driver: SchedulerDriver,
-      shuffleClient: MesosExternalShuffleClient,
-      endpoint: RpcEndpointRef): MesosCoarseGrainedSchedulerBackend = {
+      shuffleClient: MesosExternalShuffleClient) = {
     val securityManager = mock[SecurityManager]
 
     val backend = new MesosCoarseGrainedSchedulerBackend(
@@ -540,9 +538,6 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
 
       override protected def getShuffleClient(): MesosExternalShuffleClient = shuffleClient
 
-      override protected def createDriverEndpointRef(
-          properties: ArrayBuffer[(String, String)]): RpcEndpointRef = endpoint
-
       // override to avoid race condition with the driver thread on `mesosDriver`
       override def startScheduler(newDriver: SchedulerDriver): Unit = {
         mesosDriver = newDriver
@@ -558,31 +553,35 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
     backend
   }
 
-  private def setBackend(sparkConfVars: Map[String, String] = null,
-      setHome: Boolean = true) {
+  private def initializeSparkConf(
+    sparkConfVars: Map[String, String] = null,
+    home: String = "/path"): Unit = {
     sparkConf = (new SparkConf)
       .setMaster("local[*]")
       .setAppName("test-mesos-dynamic-alloc")
       .set("spark.mesos.driver.webui.url", "http://webui")
 
-    if (setHome) {
-      sparkConf.setSparkHome("/path")
+    if (home != null) {
+      sparkConf.setSparkHome(home)
     }
 
     if (sparkConfVars != null) {
       sparkConf.setAll(sparkConfVars)
     }
+  }
 
+  private def setBackend(sparkConfVars: Map[String, String] = null, home: String = "/path") {
+    initializeSparkConf(sparkConfVars, home)
     sc = new SparkContext(sparkConf)
 
     driver = mock[SchedulerDriver]
     when(driver.start()).thenReturn(Protos.Status.DRIVER_RUNNING)
+
     taskScheduler = mock[TaskSchedulerImpl]
     when(taskScheduler.sc).thenReturn(sc)
+
     externalShuffleClient = mock[MesosExternalShuffleClient]
-    driverEndpoint = mock[RpcEndpointRef]
-    when(driverEndpoint.ask(any())(any())).thenReturn(Promise().future)
 
-    backend = createSchedulerBackend(taskScheduler, driver, externalShuffleClient, driverEndpoint)
+    backend = createSchedulerBackend(taskScheduler, driver, externalShuffleClient)
   }
 }

From 3a7591ad5315308d24c0e444ce304ff78aef2304 Mon Sep 17 00:00:00 2001
From: jerryshao <sshao@hortonworks.com>
Date: Thu, 2 Mar 2017 17:18:52 -0800
Subject: [PATCH 0548/1204] [SPARK-19750][UI][BRANCH-2.1] Fix redirect issue
 from http to https

## What changes were proposed in this pull request?

If spark ui port (4040) is not set, it will choose port number 0, this will make https port to also choose 0. And in Spark 2.1 code, it will use this https port (0) to do redirect, so when redirect triggered, it will point to a wrong url:

like:

```
/tmp/temp$ wget http://172.27.25.134:55015
--2017-02-23 12:13:54--  http://172.27.25.134:55015/
Connecting to 172.27.25.134:55015... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://172.27.25.134:0/ [following]
--2017-02-23 12:13:54--  https://172.27.25.134:0/
Connecting to 172.27.25.134:0... failed: Can't assign requested address.
Retrying.

--2017-02-23 12:13:55--  (try: 2)  https://172.27.25.134:0/
Connecting to 172.27.25.134:0... failed: Can't assign requested address.
Retrying.

--2017-02-23 12:13:57--  (try: 3)  https://172.27.25.134:0/
Connecting to 172.27.25.134:0... failed: Can't assign requested address.
Retrying.

--2017-02-23 12:14:00--  (try: 4)  https://172.27.25.134:0/
Connecting to 172.27.25.134:0... failed: Can't assign requested address.
Retrying.

```

So instead of using 0 to do redirect, we should pick a bound port instead.

This issue only exists in Spark 2.1-, and can be reproduced in yarn cluster mode.

## How was this patch tested?

Current redirect UT doesn't verify this issue, so extend current UT to do correct verification.

Author: jerryshao <sshao@hortonworks.com>

Closes #17083 from jerryshao/SPARK-19750.
---
 .../scala/org/apache/spark/TestUtils.scala    | 23 +++++++++++++++----
 .../org/apache/spark/ui/JettyUtils.scala      | 10 ++++----
 .../scala/org/apache/spark/ui/UISuite.scala   |  6 ++++-
 3 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/TestUtils.scala b/core/src/main/scala/org/apache/spark/TestUtils.scala
index c3ccdb012fb1d..5cdc4eeeccbc6 100644
--- a/core/src/main/scala/org/apache/spark/TestUtils.scala
+++ b/core/src/main/scala/org/apache/spark/TestUtils.scala
@@ -27,6 +27,7 @@ import java.util.Arrays
 import java.util.concurrent.{CountDownLatch, TimeUnit}
 import java.util.jar.{JarEntry, JarOutputStream}
 import javax.net.ssl._
+import javax.servlet.http.HttpServletResponse
 import javax.tools.{JavaFileObject, SimpleJavaFileObject, ToolProvider}
 
 import scala.collection.JavaConverters._
@@ -186,12 +187,12 @@ private[spark] object TestUtils {
   }
 
   /**
-   * Returns the response code from an HTTP(S) URL.
+   * Returns the response code and url (if redirected) from an HTTP(S) URL.
    */
-  def httpResponseCode(
+  def httpResponseCodeAndURL(
       url: URL,
       method: String = "GET",
-      headers: Seq[(String, String)] = Nil): Int = {
+      headers: Seq[(String, String)] = Nil): (Int, Option[String]) = {
     val connection = url.openConnection().asInstanceOf[HttpURLConnection]
     connection.setRequestMethod(method)
     headers.foreach { case (k, v) => connection.setRequestProperty(k, v) }
@@ -210,16 +211,30 @@ private[spark] object TestUtils {
       sslCtx.init(null, Array(trustManager), new SecureRandom())
       connection.asInstanceOf[HttpsURLConnection].setSSLSocketFactory(sslCtx.getSocketFactory())
       connection.asInstanceOf[HttpsURLConnection].setHostnameVerifier(verifier)
+      connection.setInstanceFollowRedirects(false)
     }
 
     try {
       connection.connect()
-      connection.getResponseCode()
+      if (connection.getResponseCode == HttpServletResponse.SC_FOUND) {
+        (connection.getResponseCode, Option(connection.getHeaderField("Location")))
+      } else {
+        (connection.getResponseCode(), None)
+      }
     } finally {
       connection.disconnect()
     }
   }
 
+  /**
+   * Returns the response code from an HTTP(S) URL.
+   */
+  def httpResponseCode(
+      url: URL,
+      method: String = "GET",
+      headers: Seq[(String, String)] = Nil): Int = {
+    httpResponseCodeAndURL(url, method, headers)._1
+  }
 }
 
 
diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
index fbe8012ea2dae..639b8577617f6 100644
--- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
@@ -330,7 +330,7 @@ private[spark] object JettyUtils extends Logging {
 
           // redirect the HTTP requests to HTTPS port
           httpConnector.setName(REDIRECT_CONNECTOR_NAME)
-          collection.addHandler(createRedirectHttpsHandler(securePort, scheme))
+          collection.addHandler(createRedirectHttpsHandler(connector, scheme))
           Some(connector)
 
         case None =>
@@ -378,7 +378,9 @@ private[spark] object JettyUtils extends Logging {
       server.getHandler().asInstanceOf[ContextHandlerCollection])
   }
 
-  private def createRedirectHttpsHandler(securePort: Int, scheme: String): ContextHandler = {
+  private def createRedirectHttpsHandler(
+      httpsConnector: ServerConnector,
+      scheme: String): ContextHandler = {
     val redirectHandler: ContextHandler = new ContextHandler
     redirectHandler.setContextPath("/")
     redirectHandler.setVirtualHosts(Array("@" + REDIRECT_CONNECTOR_NAME))
@@ -391,8 +393,8 @@ private[spark] object JettyUtils extends Logging {
         if (baseRequest.isSecure) {
           return
         }
-        val httpsURI = createRedirectURI(scheme, baseRequest.getServerName, securePort,
-          baseRequest.getRequestURI, baseRequest.getQueryString)
+        val httpsURI = createRedirectURI(scheme, baseRequest.getServerName,
+          httpsConnector.getLocalPort, baseRequest.getRequestURI, baseRequest.getQueryString)
         response.setContentLength(0)
         response.encodeRedirectURL(httpsURI)
         response.sendRedirect(httpsURI)
diff --git a/core/src/test/scala/org/apache/spark/ui/UISuite.scala b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
index 7c3d891047dec..16fb4666f3621 100644
--- a/core/src/test/scala/org/apache/spark/ui/UISuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
@@ -267,8 +267,12 @@ class UISuite extends SparkFunSuite {
           s"$scheme://localhost:$port/test1/root",
           s"$scheme://localhost:$port/test2/root")
         urls.foreach { url =>
-          val rc = TestUtils.httpResponseCode(new URL(url))
+          val (rc, redirectUrl) = TestUtils.httpResponseCodeAndURL(new URL(url))
           assert(rc === expected, s"Unexpected status $rc for $url")
+          if (rc == HttpServletResponse.SC_FOUND) {
+            assert(
+              TestUtils.httpResponseCode(new URL(redirectUrl.get)) === HttpServletResponse.SC_OK)
+          }
         }
       }
     } finally {

From 1237aaea279d6aac504ae1e3265c0b53779b5303 Mon Sep 17 00:00:00 2001
From: guifeng <guifengleaf@gmail.com>
Date: Thu, 2 Mar 2017 21:19:29 -0800
Subject: [PATCH 0549/1204] [SPARK-19779][SS] Delete needless tmp file after
 restart structured streaming job

## What changes were proposed in this pull request?

[SPARK-19779](https://issues.apache.org/jira/browse/SPARK-19779)

The PR (https://github.com/apache/spark/pull/17012) can to fix restart a Structured Streaming application using hdfs as fileSystem, but also exist a problem that a tmp file of delta file is still reserved in hdfs. And Structured Streaming don't delete the tmp file generated when restart streaming job in future.

## How was this patch tested?
 unit tests

Author: guifeng <guifengleaf@gmail.com>

Closes #17124 from gf53520/SPARK-19779.

(cherry picked from commit e24f21b5f8365ed25346e986748b393e0b4be25c)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../streaming/state/HDFSBackedStateStoreProvider.scala     | 4 +++-
 .../sql/execution/streaming/state/StateStoreSuite.scala    | 7 +++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
index 2d29940eb8dab..ab1204a750fac 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
@@ -283,7 +283,9 @@ private[state] class HDFSBackedStateStoreProvider(
       // semantically correct because Structured Streaming requires rerunning a batch should
       // generate the same output. (SPARK-19677)
       // scalastyle:on
-      if (!fs.exists(finalDeltaFile) && !fs.rename(tempDeltaFile, finalDeltaFile)) {
+      if (fs.exists(finalDeltaFile)) {
+        fs.delete(tempDeltaFile, true)
+      } else if (!fs.rename(tempDeltaFile, finalDeltaFile)) {
         throw new IOException(s"Failed to rename $tempDeltaFile to $finalDeltaFile")
       }
       loadedMaps.put(newVersion, map)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
index 21a0a10e6deab..255378cb0ea81 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
@@ -20,9 +20,11 @@ package org.apache.spark.sql.execution.streaming.state
 import java.io.{File, IOException}
 import java.net.URI
 
+import scala.collection.JavaConverters._
 import scala.collection.mutable
 import scala.util.Random
 
+import org.apache.commons.io.FileUtils
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, Path, RawLocalFileSystem}
 import org.scalatest.{BeforeAndAfter, PrivateMethodTester}
@@ -293,6 +295,11 @@ class StateStoreSuite extends SparkFunSuite with BeforeAndAfter with PrivateMeth
     val provider = newStoreProvider(hadoopConf = conf)
     provider.getStore(0).commit()
     provider.getStore(0).commit()
+
+    // Verify we don't leak temp files
+    val tempFiles = FileUtils.listFiles(new File(provider.id.checkpointLocation),
+      null, true).asScala.filter(_.getName.startsWith("temp-"))
+    assert(tempFiles.isEmpty)
   }
 
   test("corrupted file handling") {

From accbed7c2cfbe46fa6f55e97241b617c6ad4431f Mon Sep 17 00:00:00 2001
From: Zhe Sun <ymwdalex@gmail.com>
Date: Fri, 3 Mar 2017 11:55:57 +0100
Subject: [PATCH 0550/1204] [SPARK-19797][DOC] ML pipeline document correction
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?
Description about pipeline in this paragraph is incorrect https://spark.apache.org/docs/latest/ml-pipeline.html#how-it-works

> If the Pipeline had more **stages**, it would call the LogisticRegressionModel’s transform() method on the DataFrame before passing the DataFrame to the next stage.

Reason: Transformer could also be a stage. But only another Estimator will invoke an transform call and pass the data to next stage. The description in the document misleads ML pipeline users.

## How was this patch tested?
This is a tiny modification of **docs/ml-pipelines.md**. I jekyll build the modification and check the compiled document.

Author: Zhe Sun <ymwdalex@gmail.com>

Closes #17137 from ymwdalex/SPARK-19797-ML-pipeline-document-correction.

(cherry picked from commit 0bac3e4cde75678beac02e67b8873fe779e9ad34)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/ml-pipeline.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ml-pipeline.md b/docs/ml-pipeline.md
index 7cbb14654e9d1..aa92c0a37c0f4 100644
--- a/docs/ml-pipeline.md
+++ b/docs/ml-pipeline.md
@@ -132,7 +132,7 @@ The `Pipeline.fit()` method is called on the original `DataFrame`, which has raw
 The `Tokenizer.transform()` method splits the raw text documents into words, adding a new column with words to the `DataFrame`.
 The `HashingTF.transform()` method converts the words column into feature vectors, adding a new column with those vectors to the `DataFrame`.
 Now, since `LogisticRegression` is an `Estimator`, the `Pipeline` first calls `LogisticRegression.fit()` to produce a `LogisticRegressionModel`.
-If the `Pipeline` had more stages, it would call the `LogisticRegressionModel`'s `transform()`
+If the `Pipeline` had more `Estimator`s, it would call the `LogisticRegressionModel`'s `transform()`
 method on the `DataFrame` before passing the `DataFrame` to the next stage.
 
 A `Pipeline` is an `Estimator`.

From da04d45c2c3c98322220c57cee90be78cf2093d0 Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Fri, 3 Mar 2017 10:35:15 -0800
Subject: [PATCH 0551/1204] [SPARK-19774] StreamExecution should call stop() on
 sources when a stream fails

## What changes were proposed in this pull request?

We call stop() on a Structured Streaming Source only when the stream is shutdown when a user calls streamingQuery.stop(). We should actually stop all sources when the stream fails as well, otherwise we may leak resources, e.g. connections to Kafka.

## How was this patch tested?

Unit tests in `StreamingQuerySuite`.

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #17107 from brkyvz/close-source.

(cherry picked from commit 9314c08377cc8da88f4e31d1a9d41376e96a81b3)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../execution/streaming/StreamExecution.scala | 14 +++-
 .../sql/streaming/StreamingQuerySuite.scala   | 75 ++++++++++++++++-
 .../streaming/util/MockSourceProvider.scala   | 83 +++++++++++++++++++
 3 files changed, 169 insertions(+), 3 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/streaming/util/MockSourceProvider.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 93face4390acb..dd80a28b52606 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -322,6 +322,7 @@ class StreamExecution(
       initializationLatch.countDown()
 
       try {
+        stopSources()
         state.set(TERMINATED)
         currentStatus = status.copy(isTriggerActive = false, isDataAvailable = false)
 
@@ -559,6 +560,18 @@ class StreamExecution(
     sparkSession.streams.postListenerEvent(event)
   }
 
+  /** Stops all streaming sources safely. */
+  private def stopSources(): Unit = {
+    uniqueSources.foreach { source =>
+      try {
+        source.stop()
+      } catch {
+        case NonFatal(e) =>
+          logWarning(s"Failed to stop streaming source: $source. Resources may have leaked.", e)
+      }
+    }
+  }
+
   /**
    * Signals to the thread executing micro-batches that it should stop running after the next
    * batch. This method blocks until the thread stops running.
@@ -571,7 +584,6 @@ class StreamExecution(
       microBatchThread.interrupt()
       microBatchThread.join()
     }
-    uniqueSources.foreach(_.stop())
     logInfo(s"Query $prettyIdString was stopped")
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index 1525ad5fd5178..a0a2b2b4c9b3b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -20,10 +20,12 @@ package org.apache.spark.sql.streaming
 import java.util.concurrent.CountDownLatch
 
 import org.apache.commons.lang3.RandomStringUtils
+import org.mockito.Mockito._
 import org.scalactic.TolerantNumerics
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.BeforeAndAfter
 import org.scalatest.concurrent.PatienceConfiguration.Timeout
+import org.scalatest.mock.MockitoSugar
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{DataFrame, Dataset}
@@ -32,11 +34,11 @@ import org.apache.spark.SparkException
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.streaming.util.BlockingSource
+import org.apache.spark.sql.streaming.util.{BlockingSource, MockSourceProvider}
 import org.apache.spark.util.ManualClock
 
 
-class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
+class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging with MockitoSugar {
 
   import AwaitTerminationTester._
   import testImplicits._
@@ -481,6 +483,75 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging {
     }
   }
 
+  test("StreamExecution should call stop() on sources when a stream is stopped") {
+    var calledStop = false
+    val source = new Source {
+      override def stop(): Unit = {
+        calledStop = true
+      }
+      override def getOffset: Option[Offset] = None
+      override def getBatch(start: Option[Offset], end: Offset): DataFrame = {
+        spark.emptyDataFrame
+      }
+      override def schema: StructType = MockSourceProvider.fakeSchema
+    }
+
+    MockSourceProvider.withMockSources(source) {
+      val df = spark.readStream
+        .format("org.apache.spark.sql.streaming.util.MockSourceProvider")
+        .load()
+
+      testStream(df)(StopStream)
+
+      assert(calledStop, "Did not call stop on source for stopped stream")
+    }
+  }
+
+  testQuietly("SPARK-19774: StreamExecution should call stop() on sources when a stream fails") {
+    var calledStop = false
+    val source1 = new Source {
+      override def stop(): Unit = {
+        throw new RuntimeException("Oh no!")
+      }
+      override def getOffset: Option[Offset] = Some(LongOffset(1))
+      override def getBatch(start: Option[Offset], end: Offset): DataFrame = {
+        spark.range(2).toDF(MockSourceProvider.fakeSchema.fieldNames: _*)
+      }
+      override def schema: StructType = MockSourceProvider.fakeSchema
+    }
+    val source2 = new Source {
+      override def stop(): Unit = {
+        calledStop = true
+      }
+      override def getOffset: Option[Offset] = None
+      override def getBatch(start: Option[Offset], end: Offset): DataFrame = {
+        spark.emptyDataFrame
+      }
+      override def schema: StructType = MockSourceProvider.fakeSchema
+    }
+
+    MockSourceProvider.withMockSources(source1, source2) {
+      val df1 = spark.readStream
+        .format("org.apache.spark.sql.streaming.util.MockSourceProvider")
+        .load()
+        .as[Int]
+
+      val df2 = spark.readStream
+        .format("org.apache.spark.sql.streaming.util.MockSourceProvider")
+        .load()
+        .as[Int]
+
+      testStream(df1.union(df2).map(i => i / 0))(
+        AssertOnQuery { sq =>
+          intercept[StreamingQueryException](sq.processAllAvailable())
+          sq.exception.isDefined && !sq.isActive
+        }
+      )
+
+      assert(calledStop, "Did not call stop on source for stopped stream")
+    }
+  }
+
   /** Create a streaming DF that only execute one batch in which it returns the given static DF */
   private def createSingleTriggerStreamingDF(triggerDF: DataFrame): DataFrame = {
     require(!triggerDF.isStreaming)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/MockSourceProvider.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/MockSourceProvider.scala
new file mode 100644
index 0000000000000..0bf05381a7f36
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/MockSourceProvider.scala
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming.util
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.execution.streaming.Source
+import org.apache.spark.sql.sources.StreamSourceProvider
+import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
+
+/**
+ * A StreamSourceProvider that provides mocked Sources for unit testing. Example usage:
+ *
+ * {{{
+ *    MockSourceProvider.withMockSources(source1, source2) {
+ *      val df1 = spark.readStream
+ *        .format("org.apache.spark.sql.streaming.util.MockSourceProvider")
+ *        .load()
+ *
+ *      val df2 = spark.readStream
+ *        .format("org.apache.spark.sql.streaming.util.MockSourceProvider")
+ *        .load()
+ *
+ *      df1.union(df2)
+ *      ...
+ *    }
+ * }}}
+ */
+class MockSourceProvider extends StreamSourceProvider {
+  override def sourceSchema(
+      spark: SQLContext,
+      schema: Option[StructType],
+      providerName: String,
+      parameters: Map[String, String]): (String, StructType) = {
+    ("dummySource", MockSourceProvider.fakeSchema)
+  }
+
+  override def createSource(
+      spark: SQLContext,
+      metadataPath: String,
+      schema: Option[StructType],
+      providerName: String,
+      parameters: Map[String, String]): Source = {
+    MockSourceProvider.sourceProviderFunction()
+  }
+}
+
+object MockSourceProvider {
+  // Function to generate sources. May provide multiple sources if the user implements such a
+  // function.
+  private var sourceProviderFunction: () => Source = _
+
+  final val fakeSchema = StructType(StructField("a", IntegerType) :: Nil)
+
+  def withMockSources(source: Source, otherSources: Source*)(f: => Unit): Unit = {
+    var i = 0
+    val sources = source +: otherSources
+    sourceProviderFunction = () => {
+      val source = sources(i % sources.length)
+      i += 1
+      source
+    }
+    try {
+      f
+    } finally {
+      sourceProviderFunction = null
+    }
+  }
+}

From aa19da674a6f8c608cb0f76c912b83b832f9547f Mon Sep 17 00:00:00 2001
From: Mark Hamstra <markhamstra@gmail.com>
Date: Fri, 3 Mar 2017 15:39:31 -0800
Subject: [PATCH 0552/1204] wip

---
 .../scala/org/apache/spark/SparkContext.scala |  29 ++-
 .../apache/spark/rdd/PairRDDFunctions.scala   | 234 ++++++++++++++++++
 .../apache/spark/scheduler/DAGScheduler.scala |  18 +-
 .../spark/scheduler/SchedulableBuilder.scala  |   2 +-
 4 files changed, 279 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 5ae9db7440cbd..3dddf3f7d196f 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -41,7 +41,7 @@ import org.apache.hadoop.mapred.{FileInputFormat, InputFormat, JobConf, Sequence
 import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat, Job => NewHadoopJob}
 import org.apache.hadoop.mapreduce.lib.input.{FileInputFormat => NewFileInputFormat}
 
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.deploy.{LocalSparkCluster, SparkHadoopUtil}
 import org.apache.spark.input.{FixedLengthBinaryInputFormat, PortableDataStream, StreamInputFormat, WholeTextFileInputFormat}
@@ -2050,6 +2050,33 @@ class SparkContext(config: SparkConf) extends Logging {
     new SimpleFutureAction[MapOutputStatistics](waiter, result)
   }
 
+  /**
+   * :: Experimental ::
+   * Submit a job for execution, explicitly pass in the task context
+   * with the partition job and return a FutureJob holding the result.
+   */
+  @Experimental
+  def submitJobWithTaskContext[T, U, R](
+      rdd: RDD[T],
+      processPartition: (TaskContext, Iterator[T]) => U,
+      partitions: Seq[Int],
+      resultHandler: (Int, U) => Unit,
+      resultFunc: => R): SimpleFutureAction[R] =
+  {
+    val cleanF = clean(processPartition)
+    val callSite = getCallSite
+
+    val waiter = dagScheduler.submitJob(
+      rdd,
+      (context: TaskContext, iter: Iterator[T]) => cleanF(context, iter),
+      partitions,
+      callSite,
+      resultHandler,
+      localProperties.get)
+
+    new SimpleFutureAction(waiter, resultFunc)
+  }
+
   /**
    * Cancel active jobs for the specified group. See `org.apache.spark.SparkContext.setJobGroup`
    * for more information.
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index dc123e23b781c..91b979dac91a9 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -24,6 +24,7 @@ import java.util.{Date, HashMap => JHashMap, Locale}
 import scala.collection.{mutable, Map}
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
+import scala.concurrent.ExecutionContext
 import scala.reflect.ClassTag
 import scala.util.DynamicVariable
 
@@ -1250,6 +1251,239 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
     }
   }
 
+  //////////////////////////////////////////////////
+  // CSD async saveAs(New)HadoopFile extensions   //
+  // code is duplicated from synchronized version //
+  // to maintain binary-compatibility             //
+  //////////////////////////////////////////////////
+
+
+  /**
+   * Output the RDD to any Hadoop-supported file system, using a Hadoop `OutputFormat` class
+   * supporting the key and value types K and V in this RDD.
+   */
+  def saveAsHadoopFileAsync[F <: OutputFormat[K, V]](
+      path: String)(
+      implicit fm: ClassTag[F], executor: ExecutionContext): FutureAction[Unit] = {
+    saveAsHadoopFileAsync(path, keyClass, valueClass, fm.runtimeClass.asInstanceOf[Class[F]])
+  }
+
+  /**
+   * Output the RDD to any Hadoop-supported file system, using a Hadoop `OutputFormat` class
+   * supporting the key and value types K and V in this RDD. Compress the result with the
+   * supplied codec.
+   */
+  def saveAsHadoopFileAsync[F <: OutputFormat[K, V]](
+      path: String,
+      codec: Class[_ <: CompressionCodec])(
+      implicit fm: ClassTag[F], executor: ExecutionContext): FutureAction[Unit] = {
+    val runtimeClass = fm.runtimeClass
+    saveAsHadoopFileAsync(path, keyClass, valueClass, runtimeClass.asInstanceOf[Class[F]], codec)
+  }
+
+  /**
+   * Output the RDD to any Hadoop-supported file system, using a Hadoop `OutputFormat` class
+   * supporting the key and value types K and V in this RDD. Compress with the supplied codec.
+   */
+  def saveAsHadoopFileAsync(
+      path: String,
+      keyClass: Class[_],
+      valueClass: Class[_],
+      outputFormatClass: Class[_ <: OutputFormat[_, _]],
+      codec: Class[_ <: CompressionCodec])(
+      implicit executor: ExecutionContext): FutureAction[Unit] = {
+    saveAsHadoopFileAsync(path, keyClass, valueClass, outputFormatClass,
+      new JobConf(self.context.hadoopConfiguration), Some(codec))
+  }
+
+  /**
+   * Output the RDD to any Hadoop-supported file system, using a Hadoop `OutputFormat` class
+   * supporting the key and value types K and V in this RDD.
+   */
+  def saveAsHadoopFileAsync(
+      path: String,
+      keyClass: Class[_],
+      valueClass: Class[_],
+      outputFormatClass: Class[_ <: OutputFormat[_, _]],
+      conf: JobConf = new JobConf(self.context.hadoopConfiguration),
+      codec: Option[Class[_ <: CompressionCodec]] = None)(
+      implicit executor: ExecutionContext): FutureAction[Unit] = {
+    // Rename this as hadoopConf internally to avoid shadowing (see SPARK-2038).
+    val hadoopConf = conf
+    hadoopConf.setOutputKeyClass(keyClass)
+    hadoopConf.setOutputValueClass(valueClass)
+    // Doesn't work in Scala 2.9 due to what may be a generics bug
+    // TODO: Should we uncomment this for Scala 2.10?
+    // conf.setOutputFormat(outputFormatClass)
+    hadoopConf.set("mapred.output.format.class", outputFormatClass.getName)
+    for (c <- codec) {
+      hadoopConf.setCompressMapOutput(true)
+      hadoopConf.set("mapred.output.compress", "true")
+      hadoopConf.setMapOutputCompressorClass(c)
+      hadoopConf.set("mapred.output.compression.codec", c.getCanonicalName)
+      hadoopConf.set("mapred.output.compression.type", CompressionType.BLOCK.toString)
+    }
+    hadoopConf.setOutputCommitter(classOf[FileOutputCommitter])
+    FileOutputFormat.setOutputPath(hadoopConf,
+      SparkHadoopWriter.createPathFromString(path, hadoopConf))
+    saveAsHadoopDatasetAsync(hadoopConf)
+  }
+
+  /**
+   * Output the RDD to any Hadoop-supported storage system, using a Hadoop JobConf object for
+   * that storage system. The JobConf should set an OutputFormat and any output paths required
+   * (e.g. a table name to write to) in the same way as it would be configured for a Hadoop
+   * MapReduce job.
+   */
+  def saveAsHadoopDatasetAsync(conf: JobConf)(
+      implicit executor: ExecutionContext): FutureAction[Unit] = {
+    // Rename this as hadoopConf internally to avoid shadowing (see SPARK-2038).
+    val hadoopConf = conf
+    val outputFormatInstance = hadoopConf.getOutputFormat
+    val keyClass = hadoopConf.getOutputKeyClass
+    val valueClass = hadoopConf.getOutputValueClass
+    if (outputFormatInstance == null) {
+      throw new SparkException("Output format class not set")
+    }
+    if (keyClass == null) {
+      throw new SparkException("Output key class not set")
+    }
+    if (valueClass == null) {
+      throw new SparkException("Output value class not set")
+    }
+    SparkHadoopUtil.get.addCredentials(hadoopConf)
+
+    logDebug("Saving as hadoop file of type (" + keyClass.getSimpleName + ", " +
+      valueClass.getSimpleName + ")")
+
+    if (self.conf.getBoolean("spark.hadoop.validateOutputSpecs", true)) {
+      // FileOutputFormat ignores the filesystem parameter
+      val ignoredFs = FileSystem.get(hadoopConf)
+      hadoopConf.getOutputFormat.checkOutputSpecs(ignoredFs, hadoopConf)
+    }
+
+    val writer = new SparkHadoopWriter(hadoopConf)
+    writer.preSetup()
+
+    val writeToFile = (context: TaskContext, iter: Iterator[(K, V)]) => {
+      writer.setup(context.stageId, context.partitionId, context.attemptNumber)
+      writer.open()
+      try {
+        var count = 0
+        while (iter.hasNext) {
+          val record = iter.next()
+          count += 1
+          writer.write(record._1.asInstanceOf[AnyRef], record._2.asInstanceOf[AnyRef])
+        }
+      } finally {
+        writer.close()
+      }
+      writer.commit()
+    }
+
+    self.context.submitJobWithTaskContext(
+      self,
+      writeToFile,
+      self.partitions.indices,
+      (_, _: Unit) => {},
+      { writer.commitJob() }
+    )
+  }
+
+  /**
+   * Output the RDD to any Hadoop-supported file system, using a new Hadoop API `OutputFormat`
+   * (mapreduce.OutputFormat) object supporting the key and value types K and V in this RDD.
+   */
+  def saveAsNewAPIHadoopFileAsync[F <: NewOutputFormat[K, V]](
+      path: String)(
+      implicit fm: ClassTag[F]): FutureAction[Unit] = {
+    saveAsNewAPIHadoopFileAsync(path, keyClass, valueClass, fm.runtimeClass.asInstanceOf[Class[F]])
+  }
+
+  /**
+   * Output the RDD to any Hadoop-supported file system, using a new Hadoop API `OutputFormat`
+   * (mapreduce.OutputFormat) object supporting the key and value types K and V in this RDD.
+   */
+  def saveAsNewAPIHadoopFileAsync(
+      path: String,
+      keyClass: Class[_],
+      valueClass: Class[_],
+      outputFormatClass: Class[_ <: NewOutputFormat[_, _]],
+      conf: Configuration = self.context.hadoopConfiguration): FutureAction[Unit] =
+  {
+    // Rename this as hadoopConf internally to avoid shadowing (see SPARK-2038).
+    val hadoopConf = conf
+    val job = NewAPIHadoopJob.getInstance(hadoopConf)
+    job.setOutputKeyClass(keyClass)
+    job.setOutputValueClass(valueClass)
+    job.setOutputFormatClass(outputFormatClass)
+    job.getConfiguration.set("mapred.output.dir", path)
+    saveAsNewAPIHadoopDatasetAsync(job.getConfiguration)
+  }
+
+  /**
+   * Output the RDD to any Hadoop-supported storage system with new Hadoop API, using a Hadoop
+   * Configuration object for that storage system. The Conf should set an OutputFormat and any
+   * output paths required (e.g. a table name to write to) in the same way as it would be
+   * configured for a Hadoop MapReduce job.
+   */
+  def saveAsNewAPIHadoopDatasetAsync(conf: Configuration): FutureAction[Unit] = {
+    // Rename this as hadoopConf internally to avoid shadowing (see SPARK-2038).
+    val hadoopConf = conf
+    val job = NewAPIHadoopJob.getInstance(hadoopConf)
+    val jobConf = job.getConfiguration
+    val formatter = new SimpleDateFormat("yyyyMMddHHmm")
+    val jobtrackerID = formatter.format(new Date())
+    val stageId = self.id
+    val wrappedConf = new SerializableWritable(jobConf)
+    val outfmt = job.getOutputFormatClass
+    val jobFormat = outfmt.newInstance
+
+    if (self.conf.getBoolean("spark.hadoop.validateOutputSpecs", true)) {
+      // FileOutputFormat ignores the filesystem parameter
+      jobFormat.checkOutputSpecs(job)
+    }
+
+    val writeShard = (context: TaskContext, iter: Iterator[(K, V)]) => {
+      val config = wrappedConf.value
+      /* "reduce task" <split #> <attempt # = spark task #> */
+      val attemptId = new TaskAttemptID(jobtrackerID, stageId, TaskType.REDUCE, context.partitionId,
+        context.attemptNumber)
+      val hadoopContext = new TaskAttemptContextImpl(config, attemptId)
+      val format = outfmt.newInstance
+      format match {
+        case c: Configurable => c.setConf(wrappedConf.value)
+        case _ => ()
+      }
+      val committer = format.getOutputCommitter(hadoopContext)
+      committer.setupTask(hadoopContext)
+      val writer = format.getRecordWriter(hadoopContext).asInstanceOf[NewRecordWriter[K, V]]
+      try {
+        while (iter.hasNext) {
+          val pair = iter.next()
+          writer.write(pair._1, pair._2)
+        }
+      } finally {
+        writer.close(hadoopContext)
+      }
+      committer.commitTask(hadoopContext)
+      1
+    } : Int
+
+    val jobAttemptId = new TaskAttemptID(jobtrackerID, stageId, TaskType.MAP, 0, 0)
+    val jobTaskContext = new TaskAttemptContextImpl(wrappedConf.value, jobAttemptId)
+    val jobCommitter = jobFormat.getOutputCommitter(jobTaskContext)
+    jobCommitter.setupJob(jobTaskContext)
+
+    self.context.submitJobWithTaskContext(
+      self,
+      writeShard,
+      self.partitions.indices,
+      (_, _: Int) => {},
+      { jobCommitter.commitJob(jobTaskContext) }
+    )
+  }
+
   /**
    * Return an RDD with the keys of each tuple.
    */
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index 01a95c06fc69c..14a4fd5649dcd 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -824,6 +824,16 @@ class DAGScheduler(
     listenerBus.post(SparkListenerTaskGettingResult(taskInfo))
   }
 
+  private[scheduler] implicit class JobGroupString(job: ActiveJob) {
+    def jobGroup: String = {
+      val jobGroupOpt = for {
+        properties <- Option(job.properties)
+        jobGroup <- Option(properties.get(SparkContext.SPARK_JOB_GROUP_ID)).map(_.toString)
+      } yield s"($jobGroup)"
+      jobGroupOpt.getOrElse("")
+    }
+  }
+
   private[scheduler] def handleJobSubmitted(jobId: Int,
       finalRDD: RDD[_],
       func: (TaskContext, Iterator[_]) => _,
@@ -846,7 +856,9 @@ class DAGScheduler(
     val job = new ActiveJob(jobId, finalStage, callSite, listener, properties)
     clearCacheLocs()
     logInfo("Got job %s (%s) with %d output partitions".format(
-      job.jobId, callSite.shortForm, partitions.length))
+      job.jobId + job.jobGroup,
+      callSite.shortForm,
+      partitions.length))
     logInfo("Final stage: " + finalStage + " (" + finalStage.name + ")")
     logInfo("Parents of final stage: " + finalStage.parents)
     logInfo("Missing parents: " + getMissingParentStages(finalStage))
@@ -884,7 +896,9 @@ class DAGScheduler(
     val job = new ActiveJob(jobId, finalStage, callSite, listener, properties)
     clearCacheLocs()
     logInfo("Got map stage job %s (%s) with %d output partitions".format(
-      jobId, callSite.shortForm, dependency.rdd.partitions.length))
+      jobId + job.jobGroup,
+      callSite.shortForm,
+      dependency.rdd.partitions.length))
     logInfo("Final stage: " + finalStage + " (" + finalStage.name + ")")
     logInfo("Parents of final stage: " + finalStage.parents)
     logInfo("Missing parents: " + getMissingParentStages(finalStage))
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala b/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala
index 96325a0329f89..611672f47bf22 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala
@@ -63,7 +63,7 @@ private[spark] class FairSchedulableBuilder(val rootPool: Pool, conf: SparkConf)
   val WEIGHT_PROPERTY = "weight"
   val POOL_NAME_PROPERTY = "@name"
   val POOLS_PROPERTY = "pool"
-  val DEFAULT_SCHEDULING_MODE = SchedulingMode.FIFO
+  val DEFAULT_SCHEDULING_MODE = SchedulingMode.FAIR
   val DEFAULT_MINIMUM_SHARE = 0
   val DEFAULT_WEIGHT = 1
 

From 664c9795c94d3536ff9fe54af06e0fb6c0012862 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Fri, 3 Mar 2017 19:00:35 -0800
Subject: [PATCH 0553/1204] [SPARK-19816][SQL][TESTS] Fix an issue that
 DataFrameCallbackSuite doesn't recover the log level

## What changes were proposed in this pull request?

"DataFrameCallbackSuite.execute callback functions when a DataFrame action failed" sets the log level to "fatal" but doesn't recover it. Hence, tests running after it won't output any logs except fatal logs.

This PR uses `testQuietly` instead to avoid changing the log level.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #17156 from zsxwing/SPARK-19816.

(cherry picked from commit fbc4058037cf5b0be9f14a7dd28105f7f8151bed)
Signed-off-by: Yin Huai <yhuai@databricks.com>
---
 .../org/apache/spark/sql/util/DataFrameCallbackSuite.scala    | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala
index 3ae5ce610d2a6..f372e9494ce0c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala
@@ -58,7 +58,7 @@ class DataFrameCallbackSuite extends QueryTest with SharedSQLContext {
     spark.listenerManager.unregister(listener)
   }
 
-  test("execute callback functions when a DataFrame action failed") {
+  testQuietly("execute callback functions when a DataFrame action failed") {
     val metrics = ArrayBuffer.empty[(String, QueryExecution, Exception)]
     val listener = new QueryExecutionListener {
       override def onFailure(funcName: String, qe: QueryExecution, exception: Exception): Unit = {
@@ -73,8 +73,6 @@ class DataFrameCallbackSuite extends QueryTest with SharedSQLContext {
     val errorUdf = udf[Int, Int] { _ => throw new RuntimeException("udf error") }
     val df = sparkContext.makeRDD(Seq(1 -> "a")).toDF("i", "j")
 
-    // Ignore the log when we are expecting an exception.
-    sparkContext.setLogLevel("FATAL")
     val e = intercept[SparkException](df.select(errorUdf($"i")).collect())
 
     assert(metrics.length == 1)

From ca7a7e8a893a30d85e4315a4fa1ca1b1c56a703c Mon Sep 17 00:00:00 2001
From: uncleGen <hustyugm@gmail.com>
Date: Sun, 5 Mar 2017 18:17:30 -0800
Subject: [PATCH 0554/1204] [SPARK-19822][TEST]
 CheckpointSuite.testCheckpointedOperation: should not filter
 checkpointFilesOfLatestTime with the PATH string.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/73800/testReport/

```
sbt.ForkMain$ForkError: org.scalatest.exceptions.TestFailedDueToTimeoutException: The code
passed to eventually never returned normally. Attempted 617 times over 10.003740484 seconds.
Last failure message: 8 did not equal 2.
	at org.scalatest.concurrent.Eventually$class.tryTryAgain$1(Eventually.scala:420)
	at org.scalatest.concurrent.Eventually$class.eventually(Eventually.scala:438)
	at org.scalatest.concurrent.Eventually$.eventually(Eventually.scala:478)
	at org.scalatest.concurrent.Eventually$class.eventually(Eventually.scala:336)
	at org.scalatest.concurrent.Eventually$.eventually(Eventually.scala:478)
	at org.apache.spark.streaming.DStreamCheckpointTester$class.generateOutput(CheckpointSuite
.scala:172)
	at org.apache.spark.streaming.CheckpointSuite.generateOutput(CheckpointSuite.scala:211)
```

the check condition is:

```
val checkpointFilesOfLatestTime = Checkpoint.getCheckpointFiles(checkpointDir).filter {
     _.toString.contains(clock.getTimeMillis.toString)
}
// Checkpoint files are written twice for every batch interval. So assert that both
// are written to make sure that both of them have been written.
assert(checkpointFilesOfLatestTime.size === 2)
```

the path string may contain the `clock.getTimeMillis.toString`, like `3500` :

```
file:/root/dev/spark/assembly/CheckpointSuite/spark-20035007-9891-4fb6-91c1-cc15b7ccaf15/checkpoint-500
file:/root/dev/spark/assembly/CheckpointSuite/spark-20035007-9891-4fb6-91c1-cc15b7ccaf15/checkpoint-1000
file:/root/dev/spark/assembly/CheckpointSuite/spark-20035007-9891-4fb6-91c1-cc15b7ccaf15/checkpoint-1500
file:/root/dev/spark/assembly/CheckpointSuite/spark-20035007-9891-4fb6-91c1-cc15b7ccaf15/checkpoint-2000
file:/root/dev/spark/assembly/CheckpointSuite/spark-20035007-9891-4fb6-91c1-cc15b7ccaf15/checkpoint-2500
file:/root/dev/spark/assembly/CheckpointSuite/spark-20035007-9891-4fb6-91c1-cc15b7ccaf15/checkpoint-3000
file:/root/dev/spark/assembly/CheckpointSuite/spark-20035007-9891-4fb6-91c1-cc15b7ccaf15/checkpoint-3500.bk
file:/root/dev/spark/assembly/CheckpointSuite/spark-20035007-9891-4fb6-91c1-cc15b7ccaf15/checkpoint-3500
                                                       ▲▲▲▲
```

so we should only check the filename, but not the whole path.

## How was this patch tested?

Jenkins.

Author: uncleGen <hustyugm@gmail.com>

Closes #17167 from uncleGen/flaky-CheckpointSuite.

(cherry picked from commit 207067ead6db6dc87b0d144a658e2564e3280a89)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../scala/org/apache/spark/streaming/CheckpointSuite.scala    | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
index b79cc65d8b5e9..eaedc8e54b3e0 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
@@ -152,11 +152,9 @@ trait DStreamCheckpointTester { self: SparkFunSuite =>
       stopSparkContext: Boolean
     ): Seq[Seq[V]] = {
     try {
-      val batchDuration = ssc.graph.batchDuration
       val batchCounter = new BatchCounter(ssc)
       ssc.start()
       val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
-      val currentTime = clock.getTimeMillis()
 
       logInfo("Manual clock before advancing = " + clock.getTimeMillis())
       clock.setTime(targetBatchTime.milliseconds)
@@ -171,7 +169,7 @@ trait DStreamCheckpointTester { self: SparkFunSuite =>
 
       eventually(timeout(10 seconds)) {
         val checkpointFilesOfLatestTime = Checkpoint.getCheckpointFiles(checkpointDir).filter {
-          _.toString.contains(clock.getTimeMillis.toString)
+          _.getName.contains(clock.getTimeMillis.toString)
         }
         // Checkpoint files are written twice for every batch interval. So assert that both
         // are written to make sure that both of them have been written.

From fd6c6d5c363008a229759bf628edc0f6c5e00ade Mon Sep 17 00:00:00 2001
From: Tyson Condie <tcondie@gmail.com>
Date: Mon, 6 Mar 2017 16:39:05 -0800
Subject: [PATCH 0555/1204] [SPARK-19719][SS] Kafka writer for both structured
 streaming and batch queires

## What changes were proposed in this pull request?

Add a new Kafka Sink and Kafka Relation for writing streaming and batch queries, respectively, to Apache Kafka.
### Streaming Kafka Sink
- When addBatch is called
-- If batchId is great than the last written batch
--- Write batch to Kafka
---- Topic will be taken from the record, if present, or from a topic option, which overrides topic in record.
-- Else ignore

### Batch Kafka Sink
- KafkaSourceProvider will implement CreatableRelationProvider
- CreatableRelationProvider#createRelation will write the passed in Dataframe to a Kafka
- Topic will be taken from the record, if present, or from topic option, which overrides topic in record.
- Save modes Append and ErrorIfExist supported under identical semantics. Other save modes result in an AnalysisException

tdas zsxwing

## How was this patch tested?

### The following unit tests will be included
- write to stream with topic field: valid stream write with data that includes an existing topic in the schema
- write structured streaming aggregation w/o topic field, with default topic: valid stream write with data that does not include a topic field, but the configuration includes a default topic
- write data with bad schema: various cases of writing data that does not conform to a proper schema e.g., 1. no topic field or default topic, and 2. no value field
- write data with valid schema but wrong types: data with a complete schema but wrong types e.g., key and value types are integers.
- write to non-existing topic: write a stream to a topic that does not exist in Kafka, which has been configured to not auto-create topics.
- write batch to kafka: simple write batch to Kafka, which goes through the same code path as streaming scenario, so validity checks will not be redone here.

### Examples
```scala
// Structured Streaming
val writer = inputStringStream.map(s => s.get(0).toString.getBytes()).toDF("value")
 .selectExpr("value as key", "value as value")
 .writeStream
 .format("kafka")
 .option("checkpointLocation", checkpointDir)
 .outputMode(OutputMode.Append)
 .option("kafka.bootstrap.servers", brokerAddress)
 .option("topic", topic)
 .queryName("kafkaStream")
 .start()

// Batch
val df = spark
 .sparkContext
 .parallelize(Seq("1", "2", "3", "4", "5"))
 .map(v => (topic, v))
 .toDF("topic", "value")

df.write
 .format("kafka")
 .option("kafka.bootstrap.servers",brokerAddress)
 .option("topic", topic)
 .save()
```
Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Tyson Condie <tcondie@gmail.com>

Closes #17043 from tcondie/kafka-writer.
---
 .../apache/spark/sql/kafka010/KafkaSink.scala |  43 ++
 .../sql/kafka010/KafkaSourceProvider.scala    |  83 +++-
 .../spark/sql/kafka010/KafkaWriteTask.scala   | 123 ++++++
 .../spark/sql/kafka010/KafkaWriter.scala      |  97 +++++
 .../spark/sql/kafka010/KafkaSinkSuite.scala   | 412 ++++++++++++++++++
 5 files changed, 753 insertions(+), 5 deletions(-)
 create mode 100644 external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSink.scala
 create mode 100644 external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriteTask.scala
 create mode 100644 external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala
 create mode 100644 external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSink.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSink.scala
new file mode 100644
index 0000000000000..08914d82fffdd
--- /dev/null
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSink.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import java.{util => ju}
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.{DataFrame, SQLContext}
+import org.apache.spark.sql.execution.streaming.Sink
+
+private[kafka010] class KafkaSink(
+    sqlContext: SQLContext,
+    executorKafkaParams: ju.Map[String, Object],
+    topic: Option[String]) extends Sink with Logging {
+  @volatile private var latestBatchId = -1L
+
+  override def toString(): String = "KafkaSink"
+
+  override def addBatch(batchId: Long, data: DataFrame): Unit = {
+    if (batchId <= latestBatchId) {
+      logInfo(s"Skipping already committed batch $batchId")
+    } else {
+      KafkaWriter.write(sqlContext.sparkSession,
+        data.queryExecution, executorKafkaParams, topic)
+      latestBatchId = batchId
+    }
+  }
+}
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index 597c99e093a42..34514dcc0c06b 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -23,12 +23,14 @@ import java.util.UUID
 import scala.collection.JavaConverters._
 
 import org.apache.kafka.clients.consumer.ConsumerConfig
-import org.apache.kafka.common.serialization.ByteArrayDeserializer
+import org.apache.kafka.clients.producer.ProducerConfig
+import org.apache.kafka.common.serialization.{ByteArrayDeserializer, ByteArraySerializer}
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.SQLContext
-import org.apache.spark.sql.execution.streaming.Source
+import org.apache.spark.sql.{AnalysisException, DataFrame, SaveMode, SQLContext}
+import org.apache.spark.sql.execution.streaming.{Sink, Source}
 import org.apache.spark.sql.sources._
+import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -36,8 +38,12 @@ import org.apache.spark.sql.types.StructType
  * IllegalArgumentException when the Kafka Dataset is created, so that it can catch
  * missing options even before the query is started.
  */
-private[kafka010] class KafkaSourceProvider extends DataSourceRegister with StreamSourceProvider
-  with RelationProvider with Logging {
+private[kafka010] class KafkaSourceProvider extends DataSourceRegister
+    with StreamSourceProvider
+    with StreamSinkProvider
+    with RelationProvider
+    with CreatableRelationProvider
+    with Logging {
   import KafkaSourceProvider._
 
   override def shortName(): String = "kafka"
@@ -152,6 +158,72 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister with Stre
       endingRelationOffsets)
   }
 
+  override def createSink(
+      sqlContext: SQLContext,
+      parameters: Map[String, String],
+      partitionColumns: Seq[String],
+      outputMode: OutputMode): Sink = {
+    val defaultTopic = parameters.get(TOPIC_OPTION_KEY).map(_.trim)
+    val specifiedKafkaParams = kafkaParamsForProducer(parameters)
+    new KafkaSink(sqlContext,
+      new ju.HashMap[String, Object](specifiedKafkaParams.asJava), defaultTopic)
+  }
+
+  override def createRelation(
+      outerSQLContext: SQLContext,
+      mode: SaveMode,
+      parameters: Map[String, String],
+      data: DataFrame): BaseRelation = {
+    mode match {
+      case SaveMode.Overwrite | SaveMode.Ignore =>
+        throw new AnalysisException(s"Save mode $mode not allowed for Kafka. " +
+          s"Allowed save modes are ${SaveMode.Append} and " +
+          s"${SaveMode.ErrorIfExists} (default).")
+      case _ => // good
+    }
+    val topic = parameters.get(TOPIC_OPTION_KEY).map(_.trim)
+    val specifiedKafkaParams = kafkaParamsForProducer(parameters)
+    KafkaWriter.write(outerSQLContext.sparkSession, data.queryExecution,
+      new ju.HashMap[String, Object](specifiedKafkaParams.asJava), topic)
+
+    /* This method is suppose to return a relation that reads the data that was written.
+     * We cannot support this for Kafka. Therefore, in order to make things consistent,
+     * we return an empty base relation.
+     */
+    new BaseRelation {
+      override def sqlContext: SQLContext = unsupportedException
+      override def schema: StructType = unsupportedException
+      override def needConversion: Boolean = unsupportedException
+      override def sizeInBytes: Long = unsupportedException
+      override def unhandledFilters(filters: Array[Filter]): Array[Filter] = unsupportedException
+      private def unsupportedException =
+        throw new UnsupportedOperationException("BaseRelation from Kafka write " +
+          "operation is not usable.")
+    }
+  }
+
+  private def kafkaParamsForProducer(parameters: Map[String, String]): Map[String, String] = {
+    val caseInsensitiveParams = parameters.map { case (k, v) => (k.toLowerCase, v) }
+    if (caseInsensitiveParams.contains(s"kafka.${ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG}")) {
+      throw new IllegalArgumentException(
+        s"Kafka option '${ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG}' is not supported as keys "
+          + "are serialized with ByteArraySerializer.")
+    }
+
+    if (caseInsensitiveParams.contains(s"kafka.${ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG}"))
+    {
+      throw new IllegalArgumentException(
+        s"Kafka option '${ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG}' is not supported as "
+          + "value are serialized with ByteArraySerializer.")
+    }
+    parameters
+      .keySet
+      .filter(_.toLowerCase.startsWith("kafka."))
+      .map { k => k.drop(6).toString -> parameters(k) }
+      .toMap + (ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG -> classOf[ByteArraySerializer].getName,
+        ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG -> classOf[ByteArraySerializer].getName)
+  }
+
   private def kafkaParamsForDriver(specifiedKafkaParams: Map[String, String]) =
     ConfigUpdater("source", specifiedKafkaParams)
       .set(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, deserClassName)
@@ -381,6 +453,7 @@ private[kafka010] object KafkaSourceProvider {
   private val STARTING_OFFSETS_OPTION_KEY = "startingoffsets"
   private val ENDING_OFFSETS_OPTION_KEY = "endingoffsets"
   private val FAIL_ON_DATA_LOSS_OPTION_KEY = "failondataloss"
+  val TOPIC_OPTION_KEY = "topic"
 
   private val deserClassName = classOf[ByteArrayDeserializer].getName
 }
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriteTask.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriteTask.scala
new file mode 100644
index 0000000000000..6e160cbe2db52
--- /dev/null
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriteTask.scala
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import java.{util => ju}
+
+import org.apache.kafka.clients.producer.{KafkaProducer, _}
+import org.apache.kafka.common.serialization.ByteArraySerializer
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Literal, UnsafeProjection}
+import org.apache.spark.sql.types.{BinaryType, StringType}
+
+/**
+ * A simple trait for writing out data in a single Spark task, without any concerns about how
+ * to commit or abort tasks. Exceptions thrown by the implementation of this class will
+ * automatically trigger task aborts.
+ */
+private[kafka010] class KafkaWriteTask(
+    producerConfiguration: ju.Map[String, Object],
+    inputSchema: Seq[Attribute],
+    topic: Option[String]) {
+  // used to synchronize with Kafka callbacks
+  @volatile private var failedWrite: Exception = null
+  private val projection = createProjection
+  private var producer: KafkaProducer[Array[Byte], Array[Byte]] = _
+
+  /**
+   * Writes key value data out to topics.
+   */
+  def execute(iterator: Iterator[InternalRow]): Unit = {
+    producer = new KafkaProducer[Array[Byte], Array[Byte]](producerConfiguration)
+    while (iterator.hasNext && failedWrite == null) {
+      val currentRow = iterator.next()
+      val projectedRow = projection(currentRow)
+      val topic = projectedRow.getUTF8String(0)
+      val key = projectedRow.getBinary(1)
+      val value = projectedRow.getBinary(2)
+      if (topic == null) {
+        throw new NullPointerException(s"null topic present in the data. Use the " +
+        s"${KafkaSourceProvider.TOPIC_OPTION_KEY} option for setting a default topic.")
+      }
+      val record = new ProducerRecord[Array[Byte], Array[Byte]](topic.toString, key, value)
+      val callback = new Callback() {
+        override def onCompletion(recordMetadata: RecordMetadata, e: Exception): Unit = {
+          if (failedWrite == null && e != null) {
+            failedWrite = e
+          }
+        }
+      }
+      producer.send(record, callback)
+    }
+  }
+
+  def close(): Unit = {
+    if (producer != null) {
+      checkForErrors
+      producer.close()
+      checkForErrors
+      producer = null
+    }
+  }
+
+  private def createProjection: UnsafeProjection = {
+    val topicExpression = topic.map(Literal(_)).orElse {
+      inputSchema.find(_.name == KafkaWriter.TOPIC_ATTRIBUTE_NAME)
+    }.getOrElse {
+      throw new IllegalStateException(s"topic option required when no " +
+        s"'${KafkaWriter.TOPIC_ATTRIBUTE_NAME}' attribute is present")
+    }
+    topicExpression.dataType match {
+      case StringType => // good
+      case t =>
+        throw new IllegalStateException(s"${KafkaWriter.TOPIC_ATTRIBUTE_NAME} " +
+          s"attribute unsupported type $t. ${KafkaWriter.TOPIC_ATTRIBUTE_NAME} " +
+          s"must be a ${StringType}")
+    }
+    val keyExpression = inputSchema.find(_.name == KafkaWriter.KEY_ATTRIBUTE_NAME)
+      .getOrElse(Literal(null, BinaryType))
+    keyExpression.dataType match {
+      case StringType | BinaryType => // good
+      case t =>
+        throw new IllegalStateException(s"${KafkaWriter.KEY_ATTRIBUTE_NAME} " +
+          s"attribute unsupported type $t")
+    }
+    val valueExpression = inputSchema
+      .find(_.name == KafkaWriter.VALUE_ATTRIBUTE_NAME).getOrElse(
+      throw new IllegalStateException(s"Required attribute " +
+        s"'${KafkaWriter.VALUE_ATTRIBUTE_NAME}' not found")
+    )
+    valueExpression.dataType match {
+      case StringType | BinaryType => // good
+      case t =>
+        throw new IllegalStateException(s"${KafkaWriter.VALUE_ATTRIBUTE_NAME} " +
+          s"attribute unsupported type $t")
+    }
+    UnsafeProjection.create(
+      Seq(topicExpression, Cast(keyExpression, BinaryType),
+        Cast(valueExpression, BinaryType)), inputSchema)
+  }
+
+  private def checkForErrors: Unit = {
+    if (failedWrite != null) {
+      throw failedWrite
+    }
+  }
+}
+
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala
new file mode 100644
index 0000000000000..a637d52c933a3
--- /dev/null
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import java.{util => ju}
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.{AnalysisException, SparkSession}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.execution.{QueryExecution, SQLExecution}
+import org.apache.spark.sql.types.{BinaryType, StringType}
+import org.apache.spark.util.Utils
+
+/**
+ * The [[KafkaWriter]] class is used to write data from a batch query
+ * or structured streaming query, given by a [[QueryExecution]], to Kafka.
+ * The data is assumed to have a value column, and an optional topic and key
+ * columns. If the topic column is missing, then the topic must come from
+ * the 'topic' configuration option. If the key column is missing, then a
+ * null valued key field will be added to the
+ * [[org.apache.kafka.clients.producer.ProducerRecord]].
+ */
+private[kafka010] object KafkaWriter extends Logging {
+  val TOPIC_ATTRIBUTE_NAME: String = "topic"
+  val KEY_ATTRIBUTE_NAME: String = "key"
+  val VALUE_ATTRIBUTE_NAME: String = "value"
+
+  override def toString: String = "KafkaWriter"
+
+  def validateQuery(
+      queryExecution: QueryExecution,
+      kafkaParameters: ju.Map[String, Object],
+      topic: Option[String] = None): Unit = {
+    val schema = queryExecution.logical.output
+    schema.find(_.name == TOPIC_ATTRIBUTE_NAME).getOrElse(
+      if (topic == None) {
+        throw new AnalysisException(s"topic option required when no " +
+          s"'$TOPIC_ATTRIBUTE_NAME' attribute is present. Use the " +
+          s"${KafkaSourceProvider.TOPIC_OPTION_KEY} option for setting a topic.")
+      } else {
+        Literal(topic.get, StringType)
+      }
+    ).dataType match {
+      case StringType => // good
+      case _ =>
+        throw new AnalysisException(s"Topic type must be a String")
+    }
+    schema.find(_.name == KEY_ATTRIBUTE_NAME).getOrElse(
+      Literal(null, StringType)
+    ).dataType match {
+      case StringType | BinaryType => // good
+      case _ =>
+        throw new AnalysisException(s"$KEY_ATTRIBUTE_NAME attribute type " +
+          s"must be a String or BinaryType")
+    }
+    schema.find(_.name == VALUE_ATTRIBUTE_NAME).getOrElse(
+      throw new AnalysisException(s"Required attribute '$VALUE_ATTRIBUTE_NAME' not found")
+    ).dataType match {
+      case StringType | BinaryType => // good
+      case _ =>
+        throw new AnalysisException(s"$VALUE_ATTRIBUTE_NAME attribute type " +
+          s"must be a String or BinaryType")
+    }
+  }
+
+  def write(
+      sparkSession: SparkSession,
+      queryExecution: QueryExecution,
+      kafkaParameters: ju.Map[String, Object],
+      topic: Option[String] = None): Unit = {
+    val schema = queryExecution.logical.output
+    validateQuery(queryExecution, kafkaParameters, topic)
+    SQLExecution.withNewExecutionId(sparkSession, queryExecution) {
+      queryExecution.toRdd.foreachPartition { iter =>
+        val writeTask = new KafkaWriteTask(kafkaParameters, schema, topic)
+        Utils.tryWithSafeFinally(block = writeTask.execute(iter))(
+          finallyBlock = writeTask.close())
+      }
+    }
+  }
+}
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala
new file mode 100644
index 0000000000000..490535623cb36
--- /dev/null
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala
@@ -0,0 +1,412 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import java.util.concurrent.atomic.AtomicInteger
+
+import org.apache.kafka.clients.producer.ProducerConfig
+import org.apache.kafka.common.serialization.ByteArraySerializer
+import org.scalatest.time.SpanSugar._
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, SpecificInternalRow, UnsafeProjection}
+import org.apache.spark.sql.execution.streaming.MemoryStream
+import org.apache.spark.sql.streaming._
+import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.sql.types.{BinaryType, DataType}
+
+class KafkaSinkSuite extends StreamTest with SharedSQLContext {
+  import testImplicits._
+
+  protected var testUtils: KafkaTestUtils = _
+
+  override val streamingTimeout = 30.seconds
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    testUtils = new KafkaTestUtils(
+      withBrokerProps = Map("auto.create.topics.enable" -> "false"))
+    testUtils.setup()
+  }
+
+  override def afterAll(): Unit = {
+    if (testUtils != null) {
+      testUtils.teardown()
+      testUtils = null
+      super.afterAll()
+    }
+  }
+
+  test("batch - write to kafka") {
+    val topic = newTopic()
+    testUtils.createTopic(topic)
+    val df = Seq("1", "2", "3", "4", "5").map(v => (topic, v)).toDF("topic", "value")
+    df.write
+      .format("kafka")
+      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+      .option("topic", topic)
+      .save()
+    checkAnswer(
+      createKafkaReader(topic).selectExpr("CAST(value as STRING) value"),
+      Row("1") :: Row("2") :: Row("3") :: Row("4") :: Row("5") :: Nil)
+  }
+
+  test("batch - null topic field value, and no topic option") {
+    val df = Seq[(String, String)](null.asInstanceOf[String] -> "1").toDF("topic", "value")
+    val ex = intercept[SparkException] {
+      df.write
+        .format("kafka")
+        .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+        .save()
+    }
+    assert(ex.getMessage.toLowerCase.contains(
+      "null topic present in the data"))
+  }
+
+  test("batch - unsupported save modes") {
+    val topic = newTopic()
+    testUtils.createTopic(topic)
+    val df = Seq[(String, String)](null.asInstanceOf[String] -> "1").toDF("topic", "value")
+
+    // Test bad save mode Ignore
+    var ex = intercept[AnalysisException] {
+      df.write
+        .format("kafka")
+        .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+        .mode(SaveMode.Ignore)
+        .save()
+    }
+    assert(ex.getMessage.toLowerCase.contains(
+      s"save mode ignore not allowed for kafka"))
+
+    // Test bad save mode Overwrite
+    ex = intercept[AnalysisException] {
+      df.write
+        .format("kafka")
+        .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+        .mode(SaveMode.Overwrite)
+        .save()
+    }
+    assert(ex.getMessage.toLowerCase.contains(
+      s"save mode overwrite not allowed for kafka"))
+  }
+
+  test("streaming - write to kafka with topic field") {
+    val input = MemoryStream[String]
+    val topic = newTopic()
+    testUtils.createTopic(topic)
+
+    val writer = createKafkaWriter(
+      input.toDF(),
+      withTopic = None,
+      withOutputMode = Some(OutputMode.Append))(
+      withSelectExpr = s"'$topic' as topic", "value")
+
+    val reader = createKafkaReader(topic)
+      .selectExpr("CAST(key as STRING) key", "CAST(value as STRING) value")
+      .selectExpr("CAST(key as INT) key", "CAST(value as INT) value")
+      .as[(Int, Int)]
+      .map(_._2)
+
+    try {
+      input.addData("1", "2", "3", "4", "5")
+      failAfter(streamingTimeout) {
+        writer.processAllAvailable()
+      }
+      checkDatasetUnorderly(reader, 1, 2, 3, 4, 5)
+      input.addData("6", "7", "8", "9", "10")
+      failAfter(streamingTimeout) {
+        writer.processAllAvailable()
+      }
+      checkDatasetUnorderly(reader, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
+    } finally {
+      writer.stop()
+    }
+  }
+
+  test("streaming - write aggregation w/o topic field, with topic option") {
+    val input = MemoryStream[String]
+    val topic = newTopic()
+    testUtils.createTopic(topic)
+
+    val writer = createKafkaWriter(
+      input.toDF().groupBy("value").count(),
+      withTopic = Some(topic),
+      withOutputMode = Some(OutputMode.Update()))(
+      withSelectExpr = "CAST(value as STRING) key", "CAST(count as STRING) value")
+
+    val reader = createKafkaReader(topic)
+      .selectExpr("CAST(key as STRING) key", "CAST(value as STRING) value")
+      .selectExpr("CAST(key as INT) key", "CAST(value as INT) value")
+      .as[(Int, Int)]
+
+    try {
+      input.addData("1", "2", "2", "3", "3", "3")
+      failAfter(streamingTimeout) {
+        writer.processAllAvailable()
+      }
+      checkDatasetUnorderly(reader, (1, 1), (2, 2), (3, 3))
+      input.addData("1", "2", "3")
+      failAfter(streamingTimeout) {
+        writer.processAllAvailable()
+      }
+      checkDatasetUnorderly(reader, (1, 1), (2, 2), (3, 3), (1, 2), (2, 3), (3, 4))
+    } finally {
+      writer.stop()
+    }
+  }
+
+  test("streaming - aggregation with topic field and topic option") {
+    /* The purpose of this test is to ensure that the topic option
+     * overrides the topic field. We begin by writing some data that
+     * includes a topic field and value (e.g., 'foo') along with a topic
+     * option. Then when we read from the topic specified in the option
+     * we should see the data i.e., the data was written to the topic
+     * option, and not to the topic in the data e.g., foo
+     */
+    val input = MemoryStream[String]
+    val topic = newTopic()
+    testUtils.createTopic(topic)
+
+    val writer = createKafkaWriter(
+      input.toDF().groupBy("value").count(),
+      withTopic = Some(topic),
+      withOutputMode = Some(OutputMode.Update()))(
+      withSelectExpr = "'foo' as topic",
+        "CAST(value as STRING) key", "CAST(count as STRING) value")
+
+    val reader = createKafkaReader(topic)
+      .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+      .selectExpr("CAST(key AS INT)", "CAST(value AS INT)")
+      .as[(Int, Int)]
+
+    try {
+      input.addData("1", "2", "2", "3", "3", "3")
+      failAfter(streamingTimeout) {
+        writer.processAllAvailable()
+      }
+      checkDatasetUnorderly(reader, (1, 1), (2, 2), (3, 3))
+      input.addData("1", "2", "3")
+      failAfter(streamingTimeout) {
+        writer.processAllAvailable()
+      }
+      checkDatasetUnorderly(reader, (1, 1), (2, 2), (3, 3), (1, 2), (2, 3), (3, 4))
+    } finally {
+      writer.stop()
+    }
+  }
+
+
+  test("streaming - write data with bad schema") {
+    val input = MemoryStream[String]
+    val topic = newTopic()
+    testUtils.createTopic(topic)
+
+    /* No topic field or topic option */
+    var writer: StreamingQuery = null
+    var ex: Exception = null
+    try {
+      ex = intercept[StreamingQueryException] {
+        writer = createKafkaWriter(input.toDF())(
+          withSelectExpr = "value as key", "value"
+        )
+        input.addData("1", "2", "3", "4", "5")
+        writer.processAllAvailable()
+      }
+    } finally {
+      writer.stop()
+    }
+    assert(ex.getMessage
+      .toLowerCase
+      .contains("topic option required when no 'topic' attribute is present"))
+
+    try {
+      /* No value field */
+      ex = intercept[StreamingQueryException] {
+        writer = createKafkaWriter(input.toDF())(
+          withSelectExpr = s"'$topic' as topic", "value as key"
+        )
+        input.addData("1", "2", "3", "4", "5")
+        writer.processAllAvailable()
+      }
+    } finally {
+      writer.stop()
+    }
+    assert(ex.getMessage.toLowerCase.contains("required attribute 'value' not found"))
+  }
+
+  test("streaming - write data with valid schema but wrong types") {
+    val input = MemoryStream[String]
+    val topic = newTopic()
+    testUtils.createTopic(topic)
+
+    var writer: StreamingQuery = null
+    var ex: Exception = null
+    try {
+      /* topic field wrong type */
+      ex = intercept[StreamingQueryException] {
+        writer = createKafkaWriter(input.toDF())(
+          withSelectExpr = s"CAST('1' as INT) as topic", "value"
+        )
+        input.addData("1", "2", "3", "4", "5")
+        writer.processAllAvailable()
+      }
+    } finally {
+      writer.stop()
+    }
+    assert(ex.getMessage.toLowerCase.contains("topic type must be a string"))
+
+    try {
+      /* value field wrong type */
+      ex = intercept[StreamingQueryException] {
+        writer = createKafkaWriter(input.toDF())(
+          withSelectExpr = s"'$topic' as topic", "CAST(value as INT) as value"
+        )
+        input.addData("1", "2", "3", "4", "5")
+        writer.processAllAvailable()
+      }
+    } finally {
+      writer.stop()
+    }
+    assert(ex.getMessage.toLowerCase.contains(
+      "value attribute type must be a string or binarytype"))
+
+    try {
+      ex = intercept[StreamingQueryException] {
+        /* key field wrong type */
+        writer = createKafkaWriter(input.toDF())(
+          withSelectExpr = s"'$topic' as topic", "CAST(value as INT) as key", "value"
+        )
+        input.addData("1", "2", "3", "4", "5")
+        writer.processAllAvailable()
+      }
+    } finally {
+      writer.stop()
+    }
+    assert(ex.getMessage.toLowerCase.contains(
+      "key attribute type must be a string or binarytype"))
+  }
+
+  test("streaming - write to non-existing topic") {
+    val input = MemoryStream[String]
+    val topic = newTopic()
+
+    var writer: StreamingQuery = null
+    var ex: Exception = null
+    try {
+      ex = intercept[StreamingQueryException] {
+        writer = createKafkaWriter(input.toDF(), withTopic = Some(topic))()
+        input.addData("1", "2", "3", "4", "5")
+        writer.processAllAvailable()
+      }
+    } finally {
+      writer.stop()
+    }
+    assert(ex.getMessage.toLowerCase.contains("job aborted"))
+  }
+
+  test("streaming - exception on config serializer") {
+    val input = MemoryStream[String]
+    var writer: StreamingQuery = null
+    var ex: Exception = null
+    ex = intercept[IllegalArgumentException] {
+      writer = createKafkaWriter(
+        input.toDF(),
+        withOptions = Map("kafka.key.serializer" -> "foo"))()
+    }
+    assert(ex.getMessage.toLowerCase.contains(
+      "kafka option 'key.serializer' is not supported"))
+
+    ex = intercept[IllegalArgumentException] {
+      writer = createKafkaWriter(
+        input.toDF(),
+        withOptions = Map("kafka.value.serializer" -> "foo"))()
+    }
+    assert(ex.getMessage.toLowerCase.contains(
+      "kafka option 'value.serializer' is not supported"))
+  }
+
+  test("generic - write big data with small producer buffer") {
+    /* This test ensures that we understand the semantics of Kafka when
+    * is comes to blocking on a call to send when the send buffer is full.
+    * This test will configure the smallest possible producer buffer and
+    * indicate that we should block when it is full. Thus, no exception should
+    * be thrown in the case of a full buffer.
+    */
+    val topic = newTopic()
+    testUtils.createTopic(topic, 1)
+    val options = new java.util.HashMap[String, Object]
+    options.put("bootstrap.servers", testUtils.brokerAddress)
+    options.put("buffer.memory", "16384") // min buffer size
+    options.put("block.on.buffer.full", "true")
+    options.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[ByteArraySerializer].getName)
+    options.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[ByteArraySerializer].getName)
+    val inputSchema = Seq(AttributeReference("value", BinaryType)())
+    val data = new Array[Byte](15000) // large value
+    val writeTask = new KafkaWriteTask(options, inputSchema, Some(topic))
+    try {
+      val fieldTypes: Array[DataType] = Array(BinaryType)
+      val converter = UnsafeProjection.create(fieldTypes)
+      val row = new SpecificInternalRow(fieldTypes)
+      row.update(0, data)
+      val iter = Seq.fill(1000)(converter.apply(row)).iterator
+      writeTask.execute(iter)
+    } finally {
+      writeTask.close()
+    }
+  }
+
+  private val topicId = new AtomicInteger(0)
+
+  private def newTopic(): String = s"topic-${topicId.getAndIncrement()}"
+
+  private def createKafkaReader(topic: String): DataFrame = {
+    spark.read
+      .format("kafka")
+      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+      .option("startingOffsets", "earliest")
+      .option("endingOffsets", "latest")
+      .option("subscribe", topic)
+      .load()
+  }
+
+  private def createKafkaWriter(
+      input: DataFrame,
+      withTopic: Option[String] = None,
+      withOutputMode: Option[OutputMode] = None,
+      withOptions: Map[String, String] = Map[String, String]())
+      (withSelectExpr: String*): StreamingQuery = {
+    var stream: DataStreamWriter[Row] = null
+    withTempDir { checkpointDir =>
+      var df = input.toDF()
+      if (withSelectExpr.length > 0) {
+        df = df.selectExpr(withSelectExpr: _*)
+      }
+      stream = df.writeStream
+        .format("kafka")
+        .option("checkpointLocation", checkpointDir.getCanonicalPath)
+        .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+        .queryName("kafkaStream")
+      withTopic.foreach(stream.option("topic", _))
+      withOutputMode.foreach(stream.outputMode(_))
+      withOptions.foreach(opt => stream.option(opt._1, opt._2))
+    }
+    stream.start()
+  }
+}

From 9308cb44dbaf8636e27d033b65347bc212a4936b Mon Sep 17 00:00:00 2001
From: Mark Hamstra <markhamstra@gmail.com>
Date: Tue, 7 Mar 2017 09:45:08 -0800
Subject: [PATCH 0556/1204] wip

---
 pom.xml                                       | 41 ++++++++------
 .../catalyst/analysis/FunctionRegistry.scala  |  1 +
 .../sql/catalyst/expressions/arithmetic.scala | 53 +++++++++++++++++++
 .../org/apache/spark/sql/functions.scala      |  9 ++++
 4 files changed, 88 insertions(+), 16 deletions(-)

diff --git a/pom.xml b/pom.xml
index 05f06b85c138f..6f8e6fd41250d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -38,9 +38,9 @@
     </license>
   </licenses>
   <scm>
-    <connection>scm:git:git@github.com:apache/spark.git</connection>
-    <developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/spark.git</developerConnection>
-    <url>scm:git:git@github.com:apache/spark.git</url>
+    <connection>scm:git:git@github.com:clearstorydata/spark.git</connection>
+    <developerConnection>scm:git:git@github.com:clearstorydata/spark.git</developerConnection>
+    <url>scm:git:git@github.com:clearstorydata/spark.git</url>
     <tag>HEAD</tag>
   </scm>
   <developers>
@@ -120,7 +120,7 @@
     <java.version>1.7</java.version>
     <maven.version>3.3.9</maven.version>
     <sbt.project.name>spark</sbt.project.name>
-    <slf4j.version>1.7.16</slf4j.version>
+    <slf4j.version>1.7.21</slf4j.version>
     <log4j.version>1.2.17</log4j.version>
     <hadoop.version>2.2.0</hadoop.version>
     <protobuf.version>2.5.0</protobuf.version>
@@ -159,7 +159,7 @@
     <scala.version>2.11.8</scala.version>
     <scala.binary.version>2.11</scala.binary.version>
     <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
-    <fasterxml.jackson.version>2.6.5</fasterxml.jackson.version>
+    <fasterxml.jackson.version>2.7.3</fasterxml.jackson.version>
     <snappy.version>1.1.2.6</snappy.version>
     <netlib.java.version>1.1.2</netlib.java.version>
     <calcite.version>1.2.0-incubating</calcite.version>
@@ -219,9 +219,6 @@
     -->
     <spark.test.home>${session.executionRootDirectory}</spark.test.home>
 
-    <PermGen>64m</PermGen>
-    <MaxPermGen>512m</MaxPermGen>
-    <CodeCacheSize>512m</CodeCacheSize>
   </properties>
   <repositories>
     <repository>
@@ -249,6 +246,18 @@
       </snapshots>
     </pluginRepository>
   </pluginRepositories>
+
+  <distributionManagement>
+    <snapshotRepository>
+      <id>${distRepo.snapshots.id}</id>
+      <url>${distRepo.snapshots.url}</url>
+    </snapshotRepository>
+    <repository>
+      <id>${distRepo.releases.id}</id>
+      <url>${distRepo.releases.url}</url>
+    </repository>
+  </distributionManagement>
+
   <dependencies>
     <!--
       This is a dummy dependency that is used to trigger the maven-shade plugin so that Spark's
@@ -1962,9 +1971,6 @@
             <jvmArgs>
               <jvmArg>-Xms1024m</jvmArg>
               <jvmArg>-Xmx1024m</jvmArg>
-              <jvmArg>-XX:PermSize=${PermGen}</jvmArg>
-              <jvmArg>-XX:MaxPermSize=${MaxPermGen}</jvmArg>
-              <jvmArg>-XX:ReservedCodeCacheSize=${CodeCacheSize}</jvmArg>
             </jvmArgs>
             <javacArgs>
               <javacArg>-source</javacArg>
@@ -2009,7 +2015,7 @@
               <include>**/*Suite.java</include>
             </includes>
             <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
-            <argLine>-Xmx3g -Xss4096k -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=512m</argLine>
+            <argLine>-Xmx3g -Xss4096k</argLine>
             <environmentVariables>
               <!--
                 Setting SPARK_DIST_CLASSPATH is a simple way to make sure any child processes
@@ -2058,7 +2064,7 @@
             <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
             <junitxml>.</junitxml>
             <filereports>SparkTestSuite.txt</filereports>
-            <argLine>-ea -Xmx3g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
+            <argLine>--enableassertions -Xmx3g</argLine>
             <stderr/>
             <environmentVariables>
               <!--
@@ -2114,9 +2120,8 @@
           </configuration>
           <executions>
             <execution>
-              <id>create-source-jar</id>
+              <id>attach-sources</id>
               <goals>
-                <goal>jar-no-fork</goal>
                 <goal>test-jar-no-fork</goal>
               </goals>
             </execution>
@@ -2140,6 +2145,10 @@
             </filesets>
           </configuration>
         </plugin>
+        <plugin>
+          <artifactId>maven-release-plugin</artifactId>
+          <version>2.5.2</version>
+        </plugin>
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-javadoc-plugin</artifactId>
@@ -2168,7 +2177,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-deploy-plugin</artifactId>
-          <version>2.8.2</version>
+          <version>2.8.2-csd-3</version>
         </plugin>
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 06b52a8db9654..c2c3464b0592d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -210,6 +210,7 @@ object FunctionRegistry {
     expression[UnaryMinus]("negative"),
     expression[Pi]("pi"),
     expression[Pmod]("pmod"),
+    expression[Fmod]("fmod"),
     expression[UnaryPositive]("positive"),
     expression[Pow]("pow"),
     expression[Pow]("power"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
index 4870093e9250f..54957130800ed 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
@@ -509,6 +509,59 @@ case class Pmod(left: Expression, right: Expression) extends BinaryArithmetic wi
   override def sql: String = s"$prettyName(${left.sql}, ${right.sql})"
 }
 
+@ExpressionDescription(
+  usage =
+    "_FUNC_(expr1, expr2) - Returns the float `expr1` mod `expr2` having the same sign as `expr1`.",
+  extended = """
+    Examples:
+      > SELECT _FUNC_(10.5, -3.0);
+       1.5
+      > SELECT _FUNC_(-10.5, -3.0);
+       -1.5
+  """)
+case class Fmod(left: Expression, right: Expression)
+  extends BinaryExpression with Serializable with ImplicitCastInputTypes {
+
+  override def inputTypes: Seq[DataType] = Seq(DoubleType, DoubleType)
+
+  override def toString: String = s"fmod($left, $right)"
+
+  override def dataType: DataType = DoubleType
+
+  override def eval(input: InternalRow): Any = {
+    val input2 = right.eval(input)
+    if (input2 == null || input2 == 0) {
+      null
+    } else {
+      val input1 = left.eval(input)
+      if (input1 == null) {
+        null
+      } else {
+        input1.asInstanceOf[Double] % input2.asInstanceOf[Double]
+      }
+    }
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val eval1 = left.genCode(ctx)
+    val eval2 = right.genCode(ctx)
+    ev.copy(code = s"""
+      ${eval2.code}
+      boolean ${ev.isNull} = ${eval2.isNull} || ${eval2.value} == 0;
+      ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
+      if (!${ev.isNull}) {
+        ${eval1.code}
+        if (!${eval1.isNull}) {
+          ${ev.value} = ${eval1.value} % ${eval2.value};
+        } else {
+          ${ev.isNull} = true;
+        }
+      }
+    """)
+  }
+
+}
+
 /**
  * A function that returns the least value of all parameters, skipping null values.
  * It takes at least 2 parameters, and returns null iff all parameters are null.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 9a080fd3c97c1..7cc6d8dc09a77 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -1547,6 +1547,15 @@ object functions {
    */
   def floor(columnName: String): Column = floor(Column(columnName))
 
+  /**
+   * Computes a floating-point remainder value. The result has the same sign as the numerator.
+   *
+   * @group math_funcs
+   */
+  def fmod(numerator: Column, denominator: Column): Column = withExpr {
+    Fmod(numerator.expr, denominator.expr)
+  }
+
   /**
    * Returns the greatest value of the list of values, skipping null values.
    * This function takes at least 2 parameters. It will return null iff all parameters are null.

From 711addd46e98e42deca97c5b9c0e55fddebaa458 Mon Sep 17 00:00:00 2001
From: Jason White <jason.white@shopify.com>
Date: Tue, 7 Mar 2017 13:14:37 -0800
Subject: [PATCH 0557/1204] [SPARK-19561] [PYTHON] cast
 TimestampType.toInternal output to long
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

Cast the output of `TimestampType.toInternal` to long to allow for proper Timestamp creation in DataFrames near the epoch.

## How was this patch tested?

Added a new test that fails without the change.

dongjoon-hyun davies Mind taking a look?

The contribution is my original work and I license the work to the project under the project’s open source license.

Author: Jason White <jason.white@shopify.com>

Closes #16896 from JasonMWhite/SPARK-19561.

(cherry picked from commit 6f4684622a951806bebe7652a14f7d1ce03e24c7)
Signed-off-by: Davies Liu <davies.liu@gmail.com>
---
 python/pyspark/sql/tests.py | 6 ++++++
 python/pyspark/sql/types.py | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 877ab88d172fa..4140c2d11c9dc 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -1360,6 +1360,12 @@ def test_time_with_timezone(self):
         self.assertEqual(now, now1)
         self.assertEqual(now, utcnow1)
 
+    # regression test for SPARK-19561
+    def test_datetime_at_epoch(self):
+        epoch = datetime.datetime.fromtimestamp(0)
+        df = self.spark.createDataFrame([Row(date=epoch)])
+        self.assertEqual(df.first()['date'], epoch)
+
     def test_decimal(self):
         from decimal import Decimal
         schema = StructType([StructField("decimal", DecimalType(10, 5))])
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 4a023123b6eca..d4b9fa8545056 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -189,7 +189,7 @@ def toInternal(self, dt):
         if dt is not None:
             seconds = (calendar.timegm(dt.utctimetuple()) if dt.tzinfo
                        else time.mktime(dt.timetuple()))
-            return int(seconds) * 1000000 + dt.microsecond
+            return long(seconds) * 1000000 + dt.microsecond
 
     def fromInternal(self, ts):
         if ts is not None:

From 551b7bdbe00b9ee803baa18e6b4690c478af9161 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Tue, 7 Mar 2017 16:21:18 -0800
Subject: [PATCH 0558/1204] [SPARK-19857][YARN] Correctly calculate next
 credential update time.

Add parentheses so that both lines form a single statement; also add
a log message so that the issue becomes more explicit if it shows up
again.

Tested manually with integration test that exercises the feature.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #17198 from vanzin/SPARK-19857.

(cherry picked from commit 8e41c2eed873e215b13215844ba5ba73a8906c5b)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/deploy/yarn/security/CredentialUpdater.scala     | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/CredentialUpdater.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/CredentialUpdater.scala
index 2fdb70a73c754..41b7b5d60b038 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/CredentialUpdater.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/CredentialUpdater.scala
@@ -60,7 +60,7 @@ private[spark] class CredentialUpdater(
     if (remainingTime <= 0) {
       credentialUpdater.schedule(credentialUpdaterRunnable, 1, TimeUnit.MINUTES)
     } else {
-      logInfo(s"Scheduling credentials refresh from HDFS in $remainingTime millis.")
+      logInfo(s"Scheduling credentials refresh from HDFS in $remainingTime ms.")
       credentialUpdater.schedule(credentialUpdaterRunnable, remainingTime, TimeUnit.MILLISECONDS)
     }
   }
@@ -81,8 +81,8 @@ private[spark] class CredentialUpdater(
             UserGroupInformation.getCurrentUser.addCredentials(newCredentials)
             logInfo("Credentials updated from credentials file.")
 
-            val remainingTime = getTimeOfNextUpdateFromFileName(credentialsStatus.getPath)
-              - System.currentTimeMillis()
+            val remainingTime = (getTimeOfNextUpdateFromFileName(credentialsStatus.getPath)
+              - System.currentTimeMillis())
             if (remainingTime <= 0) TimeUnit.MINUTES.toMillis(1) else remainingTime
           } else {
             // If current credential file is older than expected, sleep 1 hour and check again.
@@ -100,6 +100,7 @@ private[spark] class CredentialUpdater(
         TimeUnit.HOURS.toMillis(1)
     }
 
+    logInfo(s"Scheduling credentials refresh from HDFS in $timeToNextUpdate ms.")
     credentialUpdater.schedule(
       credentialUpdaterRunnable, timeToNextUpdate, TimeUnit.MILLISECONDS)
   }

From cbc37007aa07991135a3da13ad566be76a0ef577 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 7 Mar 2017 17:15:39 -0800
Subject: [PATCH 0559/1204] Revert "[SPARK-19561] [PYTHON] cast
 TimestampType.toInternal output to long"

This reverts commit 6f4684622a951806bebe7652a14f7d1ce03e24c7.
---
 python/pyspark/sql/tests.py | 6 ------
 python/pyspark/sql/types.py | 2 +-
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 4140c2d11c9dc..877ab88d172fa 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -1360,12 +1360,6 @@ def test_time_with_timezone(self):
         self.assertEqual(now, now1)
         self.assertEqual(now, utcnow1)
 
-    # regression test for SPARK-19561
-    def test_datetime_at_epoch(self):
-        epoch = datetime.datetime.fromtimestamp(0)
-        df = self.spark.createDataFrame([Row(date=epoch)])
-        self.assertEqual(df.first()['date'], epoch)
-
     def test_decimal(self):
         from decimal import Decimal
         schema = StructType([StructField("decimal", DecimalType(10, 5))])
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index d4b9fa8545056..4a023123b6eca 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -189,7 +189,7 @@ def toInternal(self, dt):
         if dt is not None:
             seconds = (calendar.timegm(dt.utctimetuple()) if dt.tzinfo
                        else time.mktime(dt.timetuple()))
-            return long(seconds) * 1000000 + dt.microsecond
+            return int(seconds) * 1000000 + dt.microsecond
 
     def fromInternal(self, ts):
         if ts is not None:

From 3b648a62626850470f8cceea3f0ec5dfd46e4e33 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Tue, 7 Mar 2017 20:34:55 -0800
Subject: [PATCH 0560/1204] [SPARK-19859][SS] The new watermark should override
 the old one

## What changes were proposed in this pull request?

The new watermark should override the old one. Otherwise, we just pick up the first column which has a watermark, it may be unexpected.

## How was this patch tested?

The new test.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #17199 from zsxwing/SPARK-19859.

(cherry picked from commit d8830c5039d9c7c5ef03631904c32873ab558e22)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../plans/logical/EventTimeWatermark.scala         |  7 +++++++
 .../sql/streaming/EventTimeWatermarkSuite.scala    | 14 ++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala
index 4224a7997c410..c919cdb4cd65f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala
@@ -42,6 +42,13 @@ case class EventTimeWatermark(
         .putLong(EventTimeWatermark.delayKey, delay.milliseconds)
         .build()
       a.withMetadata(updatedMetadata)
+    } else if (a.metadata.contains(EventTimeWatermark.delayKey)) {
+      // Remove existing watermark
+      val updatedMetadata = new MetadataBuilder()
+        .withMetadata(a.metadata)
+        .remove(EventTimeWatermark.delayKey)
+        .build()
+      a.withMetadata(updatedMetadata)
     } else {
       a
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
index c34d119734cc0..c768525bc6855 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
@@ -25,6 +25,7 @@ import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.functions.{count, window}
 import org.apache.spark.sql.streaming.OutputMode._
@@ -305,6 +306,19 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Loggin
     )
   }
 
+  test("the new watermark should override the old one") {
+    val df = MemoryStream[(Long, Long)].toDF()
+      .withColumn("first", $"_1".cast("timestamp"))
+      .withColumn("second", $"_2".cast("timestamp"))
+      .withWatermark("first", "1 minute")
+      .withWatermark("second", "2 minutes")
+
+    val eventTimeColumns = df.logicalPlan.output
+      .filter(_.metadata.contains(EventTimeWatermark.delayKey))
+    assert(eventTimeColumns.size === 1)
+    assert(eventTimeColumns(0).name === "second")
+  }
+
   private def assertNumStateRows(numTotalRows: Long): AssertOnQuery = AssertOnQuery { q =>
     val progressWithData = q.recentProgress.filter(_.numInputRows > 0).lastOption.get
     assert(progressWithData.stateOperators(0).numRowsTotal === numTotalRows)

From 0ba9ecbea88533b2562f2f6045eafeab99d8f0c6 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Tue, 7 Mar 2017 20:44:30 -0800
Subject: [PATCH 0561/1204] [SPARK-19348][PYTHON] PySpark keyword_only
 decorator is not thread-safe

## What changes were proposed in this pull request?
The `keyword_only` decorator in PySpark is not thread-safe.  It writes kwargs to a static class variable in the decorator, which is then retrieved later in the class method as `_input_kwargs`.  If multiple threads are constructing the same class with different kwargs, it becomes a race condition to read from the static class variable before it's overwritten.  See [SPARK-19348](https://issues.apache.org/jira/browse/SPARK-19348) for reproduction code.

This change will write the kwargs to a member variable so that multiple threads can operate on separate instances without the race condition.  It does not protect against multiple threads operating on a single instance, but that is better left to the user to synchronize.

## How was this patch tested?
Added new unit tests for using the keyword_only decorator and a regression test that verifies `_input_kwargs` can be overwritten from different class instances.

Author: Bryan Cutler <cutlerb@gmail.com>

Closes #17193 from BryanCutler/pyspark-keyword_only-threadsafe-SPARK-19348-2_1.
---
 python/pyspark/__init__.py          |  10 ++-
 python/pyspark/ml/classification.py |  28 +++----
 python/pyspark/ml/clustering.py     |  16 ++--
 python/pyspark/ml/evaluation.py     |  12 +--
 python/pyspark/ml/feature.py        | 112 ++++++++++++++--------------
 python/pyspark/ml/pipeline.py       |   4 +-
 python/pyspark/ml/recommendation.py |   4 +-
 python/pyspark/ml/regression.py     |  28 +++----
 python/pyspark/ml/tests.py          |   8 +-
 python/pyspark/ml/tuning.py         |   8 +-
 python/pyspark/tests.py             |  39 ++++++++++
 11 files changed, 155 insertions(+), 114 deletions(-)

diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py
index 5f93586a48a5a..f7927b38e5903 100644
--- a/python/pyspark/__init__.py
+++ b/python/pyspark/__init__.py
@@ -90,13 +90,15 @@ def keyword_only(func):
     """
     A decorator that forces keyword arguments in the wrapped method
     and saves actual input keyword arguments in `_input_kwargs`.
+
+    .. note:: Should only be used to wrap a method where first arg is `self`
     """
     @wraps(func)
-    def wrapper(*args, **kwargs):
-        if len(args) > 1:
+    def wrapper(self, *args, **kwargs):
+        if len(args) > 0:
             raise TypeError("Method %s forces keyword arguments." % func.__name__)
-        wrapper._input_kwargs = kwargs
-        return func(*args, **kwargs)
+        self._input_kwargs = kwargs
+        return func(self, **kwargs)
     return wrapper
 
 
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 5fe4bab186bd2..570a414cc350c 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -152,7 +152,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.classification.LogisticRegression", self.uid)
         self._setDefault(maxIter=100, regParam=0.0, tol=1E-6, threshold=0.5, family="auto")
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
         self._checkThresholdConsistency()
 
@@ -172,7 +172,7 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
         Sets params for logistic regression.
         If the threshold and thresholds Params are both set, they must be equivalent.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         self._set(**kwargs)
         self._checkThresholdConsistency()
         return self
@@ -646,7 +646,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                          maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
                          impurity="gini")
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -664,7 +664,7 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   seed=None)
         Sets params for the DecisionTreeClassifier.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     def _create_model(self, java_model):
@@ -776,7 +776,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                          maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
                          impurity="gini", numTrees=20, featureSubsetStrategy="auto",
                          subsamplingRate=1.0)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -794,7 +794,7 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   impurity="gini", numTrees=20, featureSubsetStrategy="auto", subsamplingRate=1.0)
         Sets params for linear classification.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     def _create_model(self, java_model):
@@ -917,7 +917,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                          maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
                          lossType="logistic", maxIter=20, stepSize=0.1, subsamplingRate=1.0)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -933,7 +933,7 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   lossType="logistic", maxIter=20, stepSize=0.1, seed=None, subsamplingRate=1.0)
         Sets params for Gradient Boosted Tree Classification.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     def _create_model(self, java_model):
@@ -1060,7 +1060,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.classification.NaiveBayes", self.uid)
         self._setDefault(smoothing=1.0, modelType="multinomial")
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -1074,7 +1074,7 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   modelType="multinomial", thresholds=None, weightCol=None)
         Sets params for Naive Bayes.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     def _create_model(self, java_model):
@@ -1215,7 +1215,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.classification.MultilayerPerceptronClassifier", self.uid)
         self._setDefault(maxIter=100, tol=1E-4, blockSize=128, stepSize=0.03, solver="l-bfgs")
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -1229,7 +1229,7 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   solver="l-bfgs", initialWeights=None)
         Sets params for MultilayerPerceptronClassifier.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     def _create_model(self, java_model):
@@ -1400,7 +1400,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  classifier=None)
         """
         super(OneVsRest, self).__init__()
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self._set(**kwargs)
 
     @keyword_only
@@ -1410,7 +1410,7 @@ def setParams(self, featuresCol=None, labelCol=None, predictionCol=None, classif
         setParams(self, featuresCol=None, labelCol=None, predictionCol=None, classifier=None):
         Sets params for OneVsRest.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     def _fit(self, dataset):
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index 35d0aefa04a8e..86aa28905c91b 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -232,7 +232,7 @@ def __init__(self, featuresCol="features", predictionCol="prediction", k=2,
         self._java_obj = self._new_java_obj("org.apache.spark.ml.clustering.GaussianMixture",
                                             self.uid)
         self._setDefault(k=2, tol=0.01, maxIter=100)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     def _create_model(self, java_model):
@@ -248,7 +248,7 @@ def setParams(self, featuresCol="features", predictionCol="prediction", k=2,
 
         Sets params for GaussianMixture.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("2.0.0")
@@ -414,7 +414,7 @@ def __init__(self, featuresCol="features", predictionCol="prediction", k=2,
         super(KMeans, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.clustering.KMeans", self.uid)
         self._setDefault(k=2, initMode="k-means||", initSteps=2, tol=1e-4, maxIter=20)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     def _create_model(self, java_model):
@@ -430,7 +430,7 @@ def setParams(self, featuresCol="features", predictionCol="prediction", k=2,
 
         Sets params for KMeans.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("1.5.0")
@@ -591,7 +591,7 @@ def __init__(self, featuresCol="features", predictionCol="prediction", maxIter=2
         self._java_obj = self._new_java_obj("org.apache.spark.ml.clustering.BisectingKMeans",
                                             self.uid)
         self._setDefault(maxIter=20, k=4, minDivisibleClusterSize=1.0)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -603,7 +603,7 @@ def setParams(self, featuresCol="features", predictionCol="prediction", maxIter=
                   seed=None, k=4, minDivisibleClusterSize=1.0)
         Sets params for BisectingKMeans.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("2.0.0")
@@ -916,7 +916,7 @@ def __init__(self, featuresCol="features", maxIter=20, seed=None, checkpointInte
                          k=10, optimizer="online", learningOffset=1024.0, learningDecay=0.51,
                          subsamplingRate=0.05, optimizeDocConcentration=True,
                          topicDistributionCol="topicDistribution", keepLastCheckpoint=True)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     def _create_model(self, java_model):
@@ -941,7 +941,7 @@ def setParams(self, featuresCol="features", maxIter=20, seed=None, checkpointInt
 
         Sets params for LDA.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("2.0.0")
diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py
index 7aa16fa5b90f2..7cb8d62f212cb 100644
--- a/python/pyspark/ml/evaluation.py
+++ b/python/pyspark/ml/evaluation.py
@@ -148,7 +148,7 @@ def __init__(self, rawPredictionCol="rawPrediction", labelCol="label",
             "org.apache.spark.ml.evaluation.BinaryClassificationEvaluator", self.uid)
         self._setDefault(rawPredictionCol="rawPrediction", labelCol="label",
                          metricName="areaUnderROC")
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self._set(**kwargs)
 
     @since("1.4.0")
@@ -174,7 +174,7 @@ def setParams(self, rawPredictionCol="rawPrediction", labelCol="label",
                   metricName="areaUnderROC")
         Sets params for binary classification evaluator.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
 
@@ -226,7 +226,7 @@ def __init__(self, predictionCol="prediction", labelCol="label",
             "org.apache.spark.ml.evaluation.RegressionEvaluator", self.uid)
         self._setDefault(predictionCol="prediction", labelCol="label",
                          metricName="rmse")
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self._set(**kwargs)
 
     @since("1.4.0")
@@ -252,7 +252,7 @@ def setParams(self, predictionCol="prediction", labelCol="label",
                   metricName="rmse")
         Sets params for regression evaluator.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
 
@@ -299,7 +299,7 @@ def __init__(self, predictionCol="prediction", labelCol="label",
             "org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator", self.uid)
         self._setDefault(predictionCol="prediction", labelCol="label",
                          metricName="f1")
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self._set(**kwargs)
 
     @since("1.5.0")
@@ -325,7 +325,7 @@ def setParams(self, predictionCol="prediction", labelCol="label",
                   metricName="f1")
         Sets params for multiclass classification evaluator.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
 if __name__ == "__main__":
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 62c31431b58ff..3a4b6ed6a3071 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -92,7 +92,7 @@ def __init__(self, threshold=0.0, inputCol=None, outputCol=None):
         super(Binarizer, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Binarizer", self.uid)
         self._setDefault(threshold=0.0)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -102,7 +102,7 @@ def setParams(self, threshold=0.0, inputCol=None, outputCol=None):
         setParams(self, threshold=0.0, inputCol=None, outputCol=None)
         Sets params for this Binarizer.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("1.4.0")
@@ -178,7 +178,7 @@ def __init__(self, splits=None, inputCol=None, outputCol=None, handleInvalid="er
         super(Bucketizer, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Bucketizer", self.uid)
         self._setDefault(handleInvalid="error")
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -188,7 +188,7 @@ def setParams(self, splits=None, inputCol=None, outputCol=None, handleInvalid="e
         setParams(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error")
         Sets params for this Bucketizer.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("1.4.0")
@@ -292,7 +292,7 @@ def __init__(self, minTF=1.0, minDF=1.0, vocabSize=1 << 18, binary=False, inputC
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.CountVectorizer",
                                             self.uid)
         self._setDefault(minTF=1.0, minDF=1.0, vocabSize=1 << 18, binary=False)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -304,7 +304,7 @@ def setParams(self, minTF=1.0, minDF=1.0, vocabSize=1 << 18, binary=False, input
                   outputCol=None)
         Set the params for the CountVectorizer
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("1.6.0")
@@ -424,7 +424,7 @@ def __init__(self, inverse=False, inputCol=None, outputCol=None):
         super(DCT, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.DCT", self.uid)
         self._setDefault(inverse=False)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -434,7 +434,7 @@ def setParams(self, inverse=False, inputCol=None, outputCol=None):
         setParams(self, inverse=False, inputCol=None, outputCol=None)
         Sets params for this DCT.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("1.6.0")
@@ -488,7 +488,7 @@ def __init__(self, scalingVec=None, inputCol=None, outputCol=None):
         super(ElementwiseProduct, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.ElementwiseProduct",
                                             self.uid)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -498,7 +498,7 @@ def setParams(self, scalingVec=None, inputCol=None, outputCol=None):
         setParams(self, scalingVec=None, inputCol=None, outputCol=None)
         Sets params for this ElementwiseProduct.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("2.0.0")
@@ -558,7 +558,7 @@ def __init__(self, numFeatures=1 << 18, binary=False, inputCol=None, outputCol=N
         super(HashingTF, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.HashingTF", self.uid)
         self._setDefault(numFeatures=1 << 18, binary=False)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -568,7 +568,7 @@ def setParams(self, numFeatures=1 << 18, binary=False, inputCol=None, outputCol=
         setParams(self, numFeatures=1 << 18, binary=False, inputCol=None, outputCol=None)
         Sets params for this HashingTF.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("2.0.0")
@@ -631,7 +631,7 @@ def __init__(self, minDocFreq=0, inputCol=None, outputCol=None):
         super(IDF, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.IDF", self.uid)
         self._setDefault(minDocFreq=0)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -641,7 +641,7 @@ def setParams(self, minDocFreq=0, inputCol=None, outputCol=None):
         setParams(self, minDocFreq=0, inputCol=None, outputCol=None)
         Sets params for this IDF.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("1.4.0")
@@ -721,7 +721,7 @@ def __init__(self, inputCol=None, outputCol=None):
         super(MaxAbsScaler, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.MaxAbsScaler", self.uid)
         self._setDefault()
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -731,7 +731,7 @@ def setParams(self, inputCol=None, outputCol=None):
         setParams(self, inputCol=None, outputCol=None)
         Sets params for this MaxAbsScaler.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     def _create_model(self, java_model):
@@ -815,7 +815,7 @@ def __init__(self, min=0.0, max=1.0, inputCol=None, outputCol=None):
         super(MinMaxScaler, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.MinMaxScaler", self.uid)
         self._setDefault(min=0.0, max=1.0)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -825,7 +825,7 @@ def setParams(self, min=0.0, max=1.0, inputCol=None, outputCol=None):
         setParams(self, min=0.0, max=1.0, inputCol=None, outputCol=None)
         Sets params for this MinMaxScaler.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("1.6.0")
@@ -933,7 +933,7 @@ def __init__(self, n=2, inputCol=None, outputCol=None):
         super(NGram, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.NGram", self.uid)
         self._setDefault(n=2)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -943,7 +943,7 @@ def setParams(self, n=2, inputCol=None, outputCol=None):
         setParams(self, n=2, inputCol=None, outputCol=None)
         Sets params for this NGram.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("1.5.0")
@@ -997,7 +997,7 @@ def __init__(self, p=2.0, inputCol=None, outputCol=None):
         super(Normalizer, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Normalizer", self.uid)
         self._setDefault(p=2.0)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -1007,7 +1007,7 @@ def setParams(self, p=2.0, inputCol=None, outputCol=None):
         setParams(self, p=2.0, inputCol=None, outputCol=None)
         Sets params for this Normalizer.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("1.4.0")
@@ -1077,7 +1077,7 @@ def __init__(self, dropLast=True, inputCol=None, outputCol=None):
         super(OneHotEncoder, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.OneHotEncoder", self.uid)
         self._setDefault(dropLast=True)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -1087,7 +1087,7 @@ def setParams(self, dropLast=True, inputCol=None, outputCol=None):
         setParams(self, dropLast=True, inputCol=None, outputCol=None)
         Sets params for this OneHotEncoder.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("1.4.0")
@@ -1143,7 +1143,7 @@ def __init__(self, degree=2, inputCol=None, outputCol=None):
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.feature.PolynomialExpansion", self.uid)
         self._setDefault(degree=2)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -1153,7 +1153,7 @@ def setParams(self, degree=2, inputCol=None, outputCol=None):
         setParams(self, degree=2, inputCol=None, outputCol=None)
         Sets params for this PolynomialExpansion.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("1.4.0")
@@ -1239,7 +1239,7 @@ def __init__(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.QuantileDiscretizer",
                                             self.uid)
         self._setDefault(numBuckets=2, relativeError=0.001, handleInvalid="error")
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -1251,7 +1251,7 @@ def setParams(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0
                   handleInvalid="error")
         Set the params for the QuantileDiscretizer
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("2.0.0")
@@ -1364,7 +1364,7 @@ def __init__(self, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None,
         super(RegexTokenizer, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.RegexTokenizer", self.uid)
         self._setDefault(minTokenLength=1, gaps=True, pattern="\\s+", toLowercase=True)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -1376,7 +1376,7 @@ def setParams(self, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None,
                   outputCol=None, toLowercase=True)
         Sets params for this RegexTokenizer.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("1.4.0")
@@ -1467,7 +1467,7 @@ def __init__(self, statement=None):
         """
         super(SQLTransformer, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.SQLTransformer", self.uid)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -1477,7 +1477,7 @@ def setParams(self, statement=None):
         setParams(self, statement=None)
         Sets params for this SQLTransformer.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("1.6.0")
@@ -1546,7 +1546,7 @@ def __init__(self, withMean=False, withStd=True, inputCol=None, outputCol=None):
         super(StandardScaler, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.StandardScaler", self.uid)
         self._setDefault(withMean=False, withStd=True)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -1556,7 +1556,7 @@ def setParams(self, withMean=False, withStd=True, inputCol=None, outputCol=None)
         setParams(self, withMean=False, withStd=True, inputCol=None, outputCol=None)
         Sets params for this StandardScaler.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("1.4.0")
@@ -1662,7 +1662,7 @@ def __init__(self, inputCol=None, outputCol=None, handleInvalid="error"):
         super(StringIndexer, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.StringIndexer", self.uid)
         self._setDefault(handleInvalid="error")
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -1672,7 +1672,7 @@ def setParams(self, inputCol=None, outputCol=None, handleInvalid="error"):
         setParams(self, inputCol=None, outputCol=None, handleInvalid="error")
         Sets params for this StringIndexer.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     def _create_model(self, java_model):
@@ -1720,7 +1720,7 @@ def __init__(self, inputCol=None, outputCol=None, labels=None):
         super(IndexToString, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.IndexToString",
                                             self.uid)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -1730,7 +1730,7 @@ def setParams(self, inputCol=None, outputCol=None, labels=None):
         setParams(self, inputCol=None, outputCol=None, labels=None)
         Sets params for this IndexToString.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("1.6.0")
@@ -1784,7 +1784,7 @@ def __init__(self, inputCol=None, outputCol=None, stopWords=None, caseSensitive=
                                             self.uid)
         self._setDefault(stopWords=StopWordsRemover.loadDefaultStopWords("english"),
                          caseSensitive=False)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -1794,7 +1794,7 @@ def setParams(self, inputCol=None, outputCol=None, stopWords=None, caseSensitive
         setParams(self, inputCol=None, outputCol=None, stopWords=None, caseSensitive=false)
         Sets params for this StopWordRemover.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("1.6.0")
@@ -1877,7 +1877,7 @@ def __init__(self, inputCol=None, outputCol=None):
         """
         super(Tokenizer, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Tokenizer", self.uid)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -1887,7 +1887,7 @@ def setParams(self, inputCol=None, outputCol=None):
         setParams(self, inputCol=None, outputCol=None)
         Sets params for this Tokenizer.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
 
@@ -1921,7 +1921,7 @@ def __init__(self, inputCols=None, outputCol=None):
         """
         super(VectorAssembler, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.VectorAssembler", self.uid)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -1931,7 +1931,7 @@ def setParams(self, inputCols=None, outputCol=None):
         setParams(self, inputCols=None, outputCol=None)
         Sets params for this VectorAssembler.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
 
@@ -2019,7 +2019,7 @@ def __init__(self, maxCategories=20, inputCol=None, outputCol=None):
         super(VectorIndexer, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.VectorIndexer", self.uid)
         self._setDefault(maxCategories=20)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -2029,7 +2029,7 @@ def setParams(self, maxCategories=20, inputCol=None, outputCol=None):
         setParams(self, maxCategories=20, inputCol=None, outputCol=None)
         Sets params for this VectorIndexer.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("1.4.0")
@@ -2134,7 +2134,7 @@ def __init__(self, inputCol=None, outputCol=None, indices=None, names=None):
         super(VectorSlicer, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.VectorSlicer", self.uid)
         self._setDefault(indices=[], names=[])
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -2144,7 +2144,7 @@ def setParams(self, inputCol=None, outputCol=None, indices=None, names=None):
         setParams(self, inputCol=None, outputCol=None, indices=None, names=None):
         Sets params for this VectorSlicer.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("1.6.0")
@@ -2257,7 +2257,7 @@ def __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025,
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Word2Vec", self.uid)
         self._setDefault(vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
                          windowSize=5, maxSentenceLength=1000)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -2269,7 +2269,7 @@ def setParams(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025,
                  inputCol=None, outputCol=None, windowSize=5, maxSentenceLength=1000)
         Sets params for this Word2Vec.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("1.4.0")
@@ -2417,7 +2417,7 @@ def __init__(self, k=None, inputCol=None, outputCol=None):
         """
         super(PCA, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.PCA", self.uid)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -2427,7 +2427,7 @@ def setParams(self, k=None, inputCol=None, outputCol=None):
         setParams(self, k=None, inputCol=None, outputCol=None)
         Set params for this PCA.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("1.5.0")
@@ -2557,7 +2557,7 @@ def __init__(self, formula=None, featuresCol="features", labelCol="label",
         super(RFormula, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.RFormula", self.uid)
         self._setDefault(forceIndexLabel=False)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -2569,7 +2569,7 @@ def setParams(self, formula=None, featuresCol="features", labelCol="label",
                   forceIndexLabel=False)
         Sets params for RFormula.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("1.5.0")
@@ -2687,7 +2687,7 @@ def __init__(self, numTopFeatures=50, featuresCol="features", outputCol=None,
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.ChiSqSelector", self.uid)
         self._setDefault(numTopFeatures=50, selectorType="numTopFeatures", percentile=0.1,
                          fpr=0.05)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -2699,7 +2699,7 @@ def setParams(self, numTopFeatures=50, featuresCol="features", outputCol=None,
                   labelCol="labels", selectorType="numTopFeatures", percentile=0.1, fpr=0.05)
         Sets params for this ChiSqSelector.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("2.1.0")
diff --git a/python/pyspark/ml/pipeline.py b/python/pyspark/ml/pipeline.py
index 4307ad02a0ebd..2d2e4c13e8d73 100644
--- a/python/pyspark/ml/pipeline.py
+++ b/python/pyspark/ml/pipeline.py
@@ -58,7 +58,7 @@ def __init__(self, stages=None):
         __init__(self, stages=None)
         """
         super(Pipeline, self).__init__()
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @since("1.3.0")
@@ -85,7 +85,7 @@ def setParams(self, stages=None):
         setParams(self, stages=None)
         Sets params for Pipeline.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     def _fit(self, dataset):
diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py
index e28d38bd19f80..ee9916f472712 100644
--- a/python/pyspark/ml/recommendation.py
+++ b/python/pyspark/ml/recommendation.py
@@ -146,7 +146,7 @@ def __init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemB
                          ratingCol="rating", nonnegative=False, checkpointInterval=10,
                          intermediateStorageLevel="MEMORY_AND_DISK",
                          finalStorageLevel="MEMORY_AND_DISK")
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -164,7 +164,7 @@ def setParams(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItem
                  finalStorageLevel="MEMORY_AND_DISK")
         Sets params for ALS.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     def _create_model(self, java_model):
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index b42e807069802..b199bf282e4f2 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -108,7 +108,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.regression.LinearRegression", self.uid)
         self._setDefault(maxIter=100, regParam=0.0, tol=1e-6)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -122,7 +122,7 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   standardization=True, solver="auto", weightCol=None, aggregationDepth=2)
         Sets params for linear regression.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     def _create_model(self, java_model):
@@ -464,7 +464,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.regression.IsotonicRegression", self.uid)
         self._setDefault(isotonic=True, featureIndex=0)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -475,7 +475,7 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                  weightCol=None, isotonic=True, featureIndex=0):
         Set the params for IsotonicRegression.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     def _create_model(self, java_model):
@@ -704,7 +704,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                          maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
                          impurity="variance")
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -720,7 +720,7 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   impurity="variance", seed=None, varianceCol=None)
         Sets params for the DecisionTreeRegressor.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     def _create_model(self, java_model):
@@ -895,7 +895,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                          maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
                          impurity="variance", subsamplingRate=1.0, numTrees=20,
                          featureSubsetStrategy="auto")
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -913,7 +913,7 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   featureSubsetStrategy="auto")
         Sets params for linear regression.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     def _create_model(self, java_model):
@@ -1022,7 +1022,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                          maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0,
                          checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1,
                          impurity="variance")
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -1040,7 +1040,7 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   impurity="variance")
         Sets params for Gradient Boosted Tree Regression.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     def _create_model(self, java_model):
@@ -1171,7 +1171,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         self._setDefault(censorCol="censor",
                          quantileProbabilities=[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99],
                          maxIter=100, tol=1E-6)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -1186,7 +1186,7 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   quantileProbabilities=[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99], \
                   quantilesCol=None, aggregationDepth=2):
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     def _create_model(self, java_model):
@@ -1366,7 +1366,7 @@ def __init__(self, labelCol="label", featuresCol="features", predictionCol="pred
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.regression.GeneralizedLinearRegression", self.uid)
         self._setDefault(family="gaussian", maxIter=25, tol=1e-6, regParam=0.0, solver="irls")
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -1380,7 +1380,7 @@ def setParams(self, labelCol="label", featuresCol="features", predictionCol="pre
                   regParam=0.0, weightCol=None, solver="irls", linkPredictionCol=None)
         Sets params for generalized linear regression.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     def _create_model(self, java_model):
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 46be031ee8ff1..70e0c6de4a7bd 100755
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -250,7 +250,7 @@ class TestParams(HasMaxIter, HasInputCol, HasSeed):
     def __init__(self, seed=None):
         super(TestParams, self).__init__()
         self._setDefault(maxIter=10)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -259,7 +259,7 @@ def setParams(self, seed=None):
         setParams(self, seed=None)
         Sets params for this test.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
 
@@ -271,7 +271,7 @@ class OtherTestParams(HasMaxIter, HasInputCol, HasSeed):
     def __init__(self, seed=None):
         super(OtherTestParams, self).__init__()
         self._setDefault(maxIter=10)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
@@ -280,7 +280,7 @@ def setParams(self, seed=None):
         setParams(self, seed=None)
         Sets params for this test.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
 
diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index 2dcc99cef8aa2..ffeb4459e1aac 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -186,7 +186,7 @@ def __init__(self, estimator=None, estimatorParamMaps=None, evaluator=None, numF
         """
         super(CrossValidator, self).__init__()
         self._setDefault(numFolds=3)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self._set(**kwargs)
 
     @keyword_only
@@ -198,7 +198,7 @@ def setParams(self, estimator=None, estimatorParamMaps=None, evaluator=None, num
                   seed=None):
         Sets params for cross validator.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("1.4.0")
@@ -346,7 +346,7 @@ def __init__(self, estimator=None, estimatorParamMaps=None, evaluator=None, trai
         """
         super(TrainValidationSplit, self).__init__()
         self._setDefault(trainRatio=0.75)
-        kwargs = self.__init__._input_kwargs
+        kwargs = self._input_kwargs
         self._set(**kwargs)
 
     @since("2.0.0")
@@ -358,7 +358,7 @@ def setParams(self, estimator=None, estimatorParamMaps=None, evaluator=None, tra
                   seed=None):
         Sets params for the train validation split.
         """
-        kwargs = self.setParams._input_kwargs
+        kwargs = self._input_kwargs
         return self._set(**kwargs)
 
     @since("2.0.0")
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index 8e35a4ee8e2d3..1df91ad9568c5 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -58,6 +58,7 @@
     from StringIO import StringIO
 
 
+from pyspark import keyword_only
 from pyspark.conf import SparkConf
 from pyspark.context import SparkContext
 from pyspark.rdd import RDD
@@ -2095,6 +2096,44 @@ def test_memory_conf(self):
             sc.stop()
 
 
+class KeywordOnlyTests(unittest.TestCase):
+    class Wrapped(object):
+        @keyword_only
+        def set(self, x=None, y=None):
+            if "x" in self._input_kwargs:
+                self._x = self._input_kwargs["x"]
+            if "y" in self._input_kwargs:
+                self._y = self._input_kwargs["y"]
+            return x, y
+
+    def test_keywords(self):
+        w = self.Wrapped()
+        x, y = w.set(y=1)
+        self.assertEqual(y, 1)
+        self.assertEqual(y, w._y)
+        self.assertIsNone(x)
+        self.assertFalse(hasattr(w, "_x"))
+
+    def test_non_keywords(self):
+        w = self.Wrapped()
+        self.assertRaises(TypeError, lambda: w.set(0, y=1))
+
+    def test_kwarg_ownership(self):
+        # test _input_kwargs is owned by each class instance and not a shared static variable
+        class Setter(object):
+            @keyword_only
+            def set(self, x=None, other=None, other_x=None):
+                if "other" in self._input_kwargs:
+                    self._input_kwargs["other"].set(x=self._input_kwargs["other_x"])
+                self._x = self._input_kwargs["x"]
+
+        a = Setter()
+        b = Setter()
+        a.set(x=1, other=b, other_x=2)
+        self.assertEqual(a._x, 1)
+        self.assertEqual(b._x, 2)
+
+
 @unittest.skipIf(not _have_scipy, "SciPy not installed")
 class SciPyTests(PySparkTestCase):
 

From 320eff14b0bb634eba2cdcae2303ba38fd0eb282 Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Wed, 8 Mar 2017 01:32:42 -0800
Subject: [PATCH 0562/1204] [SPARK-18055][SQL] Use correct mirror in
 ExpresionEncoder

Previously, we were using the mirror of passed in `TypeTag` when reflecting to build an encoder.  This fails when the outer class is built in (i.e. `Seq`'s default mirror is based on root classloader) but inner classes (i.e. `A` in `Seq[A]`) are defined in the REPL or a library.

This patch changes us to always reflect based on a mirror created using the context classloader.

Author: Michael Armbrust <michael@databricks.com>

Closes #17201 from marmbrus/replSeqEncoder.

(cherry picked from commit 314e48a3584bad4b486b046bbf0159d64ba857bc)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../test/scala/org/apache/spark/repl/ReplSuite.scala  | 11 +++++++++++
 .../sql/catalyst/encoders/ExpressionEncoder.scala     |  4 ++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index 9262e938c2a60..5ef3987d3d9dd 100644
--- a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -473,4 +473,15 @@ class ReplSuite extends SparkFunSuite {
     assertDoesNotContain("AssertionError", output)
     assertDoesNotContain("Exception", output)
   }
+
+  test("newProductSeqEncoder with REPL defined class") {
+    val output = runInterpreterInPasteMode("local-cluster[1,4,4096]",
+      """
+      |case class Click(id: Int)
+      |spark.implicits.newProductSeqEncoder[Click]
+    """.stripMargin)
+
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
index 9c4818db6333b..f7999a3cc92bd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
@@ -45,8 +45,8 @@ import org.apache.spark.util.Utils
 object ExpressionEncoder {
   def apply[T : TypeTag](): ExpressionEncoder[T] = {
     // We convert the not-serializable TypeTag into StructType and ClassTag.
-    val mirror = typeTag[T].mirror
-    val tpe = typeTag[T].tpe
+    val mirror = ScalaReflection.mirror
+    val tpe = typeTag[T].in(mirror).tpe
 
     if (ScalaReflection.optionOfProductType(tpe)) {
       throw new UnsupportedOperationException(

From e62b5bf4f73dd08a6a9e78b1904d8ead5bf48334 Mon Sep 17 00:00:00 2001
From: Mark Hamstra <markhamstra@gmail.com>
Date: Wed, 8 Mar 2017 10:44:23 -0800
Subject: [PATCH 0563/1204] typo

---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 6f8e6fd41250d..4e93eef424ec1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2064,7 +2064,7 @@
             <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
             <junitxml>.</junitxml>
             <filereports>SparkTestSuite.txt</filereports>
-            <argLine>--enableassertions -Xmx3g</argLine>
+            <argLine>-enableassertions -Xmx3g</argLine>
             <stderr/>
             <environmentVariables>
               <!--

From f6c1ad2eb6d0706899aabbdd39e558b3488e2ef3 Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Wed, 8 Mar 2017 14:35:07 -0800
Subject: [PATCH 0564/1204] [SPARK-19813] maxFilesPerTrigger combo latestFirst
 may miss old files in combination with maxFileAge in FileStreamSource

## What changes were proposed in this pull request?

**The Problem**
There is a file stream source option called maxFileAge which limits how old the files can be, relative the latest file that has been seen. This is used to limit the files that need to be remembered as "processed". Files older than the latest processed files are ignored. This values is by default 7 days.
This causes a problem when both
latestFirst = true
maxFilesPerTrigger > total files to be processed.
Here is what happens in all combinations
1) latestFirst = false - Since files are processed in order, there wont be any unprocessed file older than the latest processed file. All files will be processed.
2) latestFirst = true AND maxFilesPerTrigger is not set - The maxFileAge thresholding mechanism takes one batch initialize. If maxFilesPerTrigger is not, then all old files get processed in the first batch, and so no file is left behind.
3) latestFirst = true AND maxFilesPerTrigger is set to X - The first batch process the latest X files. That sets the threshold latest file - maxFileAge, so files older than this threshold will never be considered for processing.
The bug is with case 3.

**The Solution**

Ignore `maxFileAge` when both `maxFilesPerTrigger` and `latestFirst` are set.

## How was this patch tested?

Regression test in `FileStreamSourceSuite`

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #17153 from brkyvz/maxFileAge.

(cherry picked from commit a3648b5d4f99ff9461d02f53e9ec71787a3abf51)
Signed-off-by: Burak Yavuz <brkyvz@gmail.com>
---
 .../streaming/FileStreamOptions.scala         |  5 +-
 .../streaming/FileStreamSource.scala          | 14 +++-
 .../sql/streaming/FileStreamSourceSuite.scala | 82 +++++++++++--------
 3 files changed, 63 insertions(+), 38 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala
index 25ebe1797bed8..fe64838696a6e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala
@@ -38,7 +38,10 @@ class FileStreamOptions(parameters: CaseInsensitiveMap) extends Logging {
   }
 
   /**
-   * Maximum age of a file that can be found in this directory, before it is deleted.
+   * Maximum age of a file that can be found in this directory, before it is ignored. For the
+   * first batch all files will be considered valid. If `latestFirst` is set to `true` and
+   * `maxFilesPerTrigger` is set, then this parameter will be ignored, because old files that are
+   * valid, and should be processed, may be ignored. Please refer to SPARK-19813 for details.
    *
    * The max age is specified with respect to the timestamp of the latest file, and not the
    * timestamp of the current system. That this means if the last file has timestamp 1000, and the
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
index 39c0b4979687b..0f0b6f1893583 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
@@ -64,23 +64,29 @@ class FileStreamSource(
 
   private val fileSortOrder = if (sourceOptions.latestFirst) {
       logWarning(
-        """'latestFirst' is true. New files will be processed first.
-          |It may affect the watermark value""".stripMargin)
+        """'latestFirst' is true. New files will be processed first, which may affect the watermark
+          |value. In addition, 'maxFileAge' will be ignored.""".stripMargin)
       implicitly[Ordering[Long]].reverse
     } else {
       implicitly[Ordering[Long]]
     }
 
+  private val maxFileAgeMs: Long = if (sourceOptions.latestFirst && maxFilesPerBatch.isDefined) {
+    Long.MaxValue
+  } else {
+    sourceOptions.maxFileAgeMs
+  }
+
   /** A mapping from a file that we have processed to some timestamp it was last modified. */
   // Visible for testing and debugging in production.
-  val seenFiles = new SeenFilesMap(sourceOptions.maxFileAgeMs)
+  val seenFiles = new SeenFilesMap(maxFileAgeMs)
 
   metadataLog.allFiles().foreach { entry =>
     seenFiles.add(entry.path, entry.timestamp)
   }
   seenFiles.purge()
 
-  logInfo(s"maxFilesPerBatch = $maxFilesPerBatch, maxFileAge = ${sourceOptions.maxFileAgeMs}")
+  logInfo(s"maxFilesPerBatch = $maxFilesPerBatch, maxFileAge = $maxFileAgeMs")
 
   /**
    * Returns the maximum offset that can be retrieved from the source.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index 8a9fa94bea601..f14aedbba86ab 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -1078,6 +1078,41 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
     SerializedOffset(str.trim)
   }
 
+  private def runTwoBatchesAndVerifyResults(
+      src: File,
+      latestFirst: Boolean,
+      firstBatch: String,
+      secondBatch: String,
+      maxFileAge: Option[String] = None): Unit = {
+    val srcOptions = Map("latestFirst" -> latestFirst.toString, "maxFilesPerTrigger" -> "1") ++
+      maxFileAge.map("maxFileAge" -> _)
+    val fileStream = createFileStream(
+      "text",
+      src.getCanonicalPath,
+      options = srcOptions)
+    val clock = new StreamManualClock()
+    testStream(fileStream)(
+      StartStream(trigger = ProcessingTime(10), triggerClock = clock),
+      AssertOnQuery { _ =>
+        // Block until the first batch finishes.
+        eventually(timeout(streamingTimeout)) {
+          assert(clock.isStreamWaitingAt(0))
+        }
+        true
+      },
+      CheckLastBatch(firstBatch),
+      AdvanceManualClock(10),
+      AssertOnQuery { _ =>
+        // Block until the second batch finishes.
+        eventually(timeout(streamingTimeout)) {
+          assert(clock.isStreamWaitingAt(10))
+        }
+        true
+      },
+      CheckLastBatch(secondBatch)
+    )
+  }
+
   test("FileStreamSource - latestFirst") {
     withTempDir { src =>
       // Prepare two files: 1.txt, 2.txt, and make sure they have different modified time.
@@ -1085,42 +1120,23 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
       val f2 = stringToFile(new File(src, "2.txt"), "2")
       f2.setLastModified(f1.lastModified + 1000)
 
-      def runTwoBatchesAndVerifyResults(
-          latestFirst: Boolean,
-          firstBatch: String,
-          secondBatch: String): Unit = {
-        val fileStream = createFileStream(
-          "text",
-          src.getCanonicalPath,
-          options = Map("latestFirst" -> latestFirst.toString, "maxFilesPerTrigger" -> "1"))
-        val clock = new StreamManualClock()
-        testStream(fileStream)(
-          StartStream(trigger = ProcessingTime(10), triggerClock = clock),
-          AssertOnQuery { _ =>
-            // Block until the first batch finishes.
-            eventually(timeout(streamingTimeout)) {
-              assert(clock.isStreamWaitingAt(0))
-            }
-            true
-          },
-          CheckLastBatch(firstBatch),
-          AdvanceManualClock(10),
-          AssertOnQuery { _ =>
-            // Block until the second batch finishes.
-            eventually(timeout(streamingTimeout)) {
-              assert(clock.isStreamWaitingAt(10))
-            }
-            true
-          },
-          CheckLastBatch(secondBatch)
-        )
-      }
-
       // Read oldest files first, so the first batch is "1", and the second batch is "2".
-      runTwoBatchesAndVerifyResults(latestFirst = false, firstBatch = "1", secondBatch = "2")
+      runTwoBatchesAndVerifyResults(src, latestFirst = false, firstBatch = "1", secondBatch = "2")
 
       // Read latest files first, so the first batch is "2", and the second batch is "1".
-      runTwoBatchesAndVerifyResults(latestFirst = true, firstBatch = "2", secondBatch = "1")
+      runTwoBatchesAndVerifyResults(src, latestFirst = true, firstBatch = "2", secondBatch = "1")
+    }
+  }
+
+  test("SPARK-19813: Ignore maxFileAge when maxFilesPerTrigger and latestFirst is used") {
+    withTempDir { src =>
+      // Prepare two files: 1.txt, 2.txt, and make sure they have different modified time.
+      val f1 = stringToFile(new File(src, "1.txt"), "1")
+      val f2 = stringToFile(new File(src, "2.txt"), "2")
+      f2.setLastModified(f1.lastModified + 3600 * 1000 /* 1 hour later */)
+
+      runTwoBatchesAndVerifyResults(src, latestFirst = true, firstBatch = "2", secondBatch = "1",
+        maxFileAge = Some("1m") /* 1 minute */)
     }
   }
 

From 3457c32297e0150a4fbc80a30f84b9c62ca7c372 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 8 Mar 2017 14:30:54 -0800
Subject: [PATCH 0565/1204] Revert "[SPARK-19413][SS] MapGroupsWithState for
 arbitrary stateful operations for branch-2.1"

This reverts commit 502c927b8c8a99ef2adf4e6e1d7a6d9232d45ef5.
---
 .../UnsupportedOperationChecker.scala         |  11 +-
 .../sql/catalyst/plans/logical/object.scala   |  49 ---
 .../analysis/UnsupportedOperationsSuite.scala |  24 +-
 .../FlatMapGroupsWithStateFunction.java       |  38 --
 .../function/MapGroupsWithStateFunction.java  |  38 --
 .../spark/sql/KeyValueGroupedDataset.scala    | 113 ------
 .../org/apache/spark/sql/KeyedState.scala     | 142 --------
 .../spark/sql/execution/SparkStrategies.scala |  21 +-
 .../apache/spark/sql/execution/objects.scala  |  22 --
 .../streaming/IncrementalExecution.scala      |  19 +-
 .../execution/streaming/KeyedStateImpl.scala  |  80 -----
 .../streaming/ProgressReporter.scala          |   2 +-
 ...perators.scala => StatefulAggregate.scala} | 134 ++-----
 .../state/HDFSBackedStateStoreProvider.scala  |  19 -
 .../streaming/state/StateStore.scala          |   5 -
 .../execution/streaming/state/package.scala   |  11 +-
 .../apache/spark/sql/JavaDatasetSuite.java    |  32 --
 .../streaming/MapGroupsWithStateSuite.scala   | 335 ------------------
 18 files changed, 36 insertions(+), 1059 deletions(-)
 delete mode 100644 sql/core/src/main/java/org/apache/spark/api/java/function/FlatMapGroupsWithStateFunction.java
 delete mode 100644 sql/core/src/main/java/org/apache/spark/api/java/function/MapGroupsWithStateFunction.java
 delete mode 100644 sql/core/src/main/scala/org/apache/spark/sql/KeyedState.scala
 delete mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/KeyedStateImpl.scala
 rename sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/{statefulOperators.scala => StatefulAggregate.scala} (63%)
 delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/streaming/MapGroupsWithStateSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
index d8aad42edcf5f..f4d016cb96711 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -46,13 +46,8 @@ object UnsupportedOperationChecker {
         "Queries without streaming sources cannot be executed with writeStream.start()")(plan)
     }
 
-    /** Collect all the streaming aggregates in a sub plan */
-    def collectStreamingAggregates(subplan: LogicalPlan): Seq[Aggregate] = {
-      subplan.collect { case a: Aggregate if a.isStreaming => a }
-    }
-
     // Disallow multiple streaming aggregations
-    val aggregates = collectStreamingAggregates(plan)
+    val aggregates = plan.collect { case a@Aggregate(_, _, _) if a.isStreaming => a }
 
     if (aggregates.size > 1) {
       throwError(
@@ -119,10 +114,6 @@ object UnsupportedOperationChecker {
         case _: InsertIntoTable =>
           throwError("InsertIntoTable is not supported with streaming DataFrames/Datasets")
 
-        case m: MapGroupsWithState if collectStreamingAggregates(m).nonEmpty =>
-          throwError("(map/flatMap)GroupsWithState is not supported after aggregation on a " +
-            "streaming DataFrame/Dataset")
-
         case Join(left, right, joinType, _) =>
 
           joinType match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala
index 0be4823bbc895..0ab4c9016623e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala
@@ -313,55 +313,6 @@ case class MapGroups(
     outputObjAttr: Attribute,
     child: LogicalPlan) extends UnaryNode with ObjectProducer
 
-/** Internal class representing State */
-trait LogicalKeyedState[S]
-
-/** Factory for constructing new `MapGroupsWithState` nodes. */
-object MapGroupsWithState {
-  def apply[K: Encoder, V: Encoder, S: Encoder, U: Encoder](
-      func: (Any, Iterator[Any], LogicalKeyedState[Any]) => Iterator[Any],
-      groupingAttributes: Seq[Attribute],
-      dataAttributes: Seq[Attribute],
-      child: LogicalPlan): LogicalPlan = {
-    val mapped = new MapGroupsWithState(
-      func,
-      UnresolvedDeserializer(encoderFor[K].deserializer, groupingAttributes),
-      UnresolvedDeserializer(encoderFor[V].deserializer, dataAttributes),
-      groupingAttributes,
-      dataAttributes,
-      CatalystSerde.generateObjAttr[U],
-      encoderFor[S].resolveAndBind().deserializer,
-      encoderFor[S].namedExpressions,
-      child)
-    CatalystSerde.serialize[U](mapped)
-  }
-}
-
-/**
- * Applies func to each unique group in `child`, based on the evaluation of `groupingAttributes`,
- * while using state data.
- * Func is invoked with an object representation of the grouping key an iterator containing the
- * object representation of all the rows with that key.
- *
- * @param keyDeserializer used to extract the key object for each group.
- * @param valueDeserializer used to extract the items in the iterator from an input row.
- * @param groupingAttributes used to group the data
- * @param dataAttributes used to read the data
- * @param outputObjAttr used to define the output object
- * @param stateDeserializer used to deserialize state before calling `func`
- * @param stateSerializer used to serialize updated state after calling `func`
- */
-case class MapGroupsWithState(
-    func: (Any, Iterator[Any], LogicalKeyedState[Any]) => Iterator[Any],
-    keyDeserializer: Expression,
-    valueDeserializer: Expression,
-    groupingAttributes: Seq[Attribute],
-    dataAttributes: Seq[Attribute],
-    outputObjAttr: Attribute,
-    stateDeserializer: Expression,
-    stateSerializer: Seq[NamedExpression],
-    child: LogicalPlan) extends UnaryNode with ObjectProducer
-
 /** Factory for constructing new `FlatMapGroupsInR` nodes. */
 object FlatMapGroupsInR {
   def apply(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
index 3b756e89d9036..dcdb1ae089328 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
@@ -22,13 +22,13 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Literal, NamedExpression}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, NamedExpression}
 import org.apache.spark.sql.catalyst.expressions.aggregate.Count
 import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.logical.{MapGroupsWithState, _}
+import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.streaming.OutputMode
-import org.apache.spark.sql.types.{IntegerType, LongType}
+import org.apache.spark.sql.types.IntegerType
 
 /** A dummy command for testing unsupported operations. */
 case class DummyCommand() extends Command
@@ -111,24 +111,6 @@ class UnsupportedOperationsSuite extends SparkFunSuite {
     outputMode = Complete,
     expectedMsgs = Seq("distinct aggregation"))
 
-  // MapGroupsWithState: Not supported after a streaming aggregation
-  val att = new AttributeReference(name = "a", dataType = LongType)()
-  assertSupportedInBatchPlan(
-    "mapGroupsWithState - mapGroupsWithState on batch relation",
-    MapGroupsWithState(null, att, att, Seq(att), Seq(att), att, att, Seq(att), batchRelation))
-
-  assertSupportedInStreamingPlan(
-    "mapGroupsWithState - mapGroupsWithState on streaming relation before aggregation",
-    MapGroupsWithState(null, att, att, Seq(att), Seq(att), att, att, Seq(att), streamRelation),
-    outputMode = Append)
-
-  assertNotSupportedInStreamingPlan(
-    "mapGroupsWithState - mapGroupsWithState on streaming relation after aggregation",
-    MapGroupsWithState(null, att, att, Seq(att), Seq(att), att, att, Seq(att),
-      Aggregate(Nil, aggExprs("c"), streamRelation)),
-    outputMode = Complete,
-    expectedMsgs = Seq("(map/flatMap)GroupsWithState"))
-
   // Inner joins: Stream-stream not supported
   testBinaryOperationInStreamingPlan(
     "inner join",
diff --git a/sql/core/src/main/java/org/apache/spark/api/java/function/FlatMapGroupsWithStateFunction.java b/sql/core/src/main/java/org/apache/spark/api/java/function/FlatMapGroupsWithStateFunction.java
deleted file mode 100644
index 2570c8d02ab7c..0000000000000
--- a/sql/core/src/main/java/org/apache/spark/api/java/function/FlatMapGroupsWithStateFunction.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.api.java.function;
-
-import java.io.Serializable;
-import java.util.Iterator;
-
-import org.apache.spark.annotation.Experimental;
-import org.apache.spark.annotation.InterfaceStability;
-import org.apache.spark.sql.Encoder;
-import org.apache.spark.sql.KeyedState;
-
-/**
- * ::Experimental::
- * Base interface for a map function used in
- * {@link org.apache.spark.sql.KeyValueGroupedDataset#flatMapGroupsWithState(FlatMapGroupsWithStateFunction, Encoder, Encoder)}.
- * @since 2.1.1
- */
-@Experimental
-@InterfaceStability.Evolving
-public interface FlatMapGroupsWithStateFunction<K, V, S, R> extends Serializable {
-  Iterator<R> call(K key, Iterator<V> values, KeyedState<S> state) throws Exception;
-}
diff --git a/sql/core/src/main/java/org/apache/spark/api/java/function/MapGroupsWithStateFunction.java b/sql/core/src/main/java/org/apache/spark/api/java/function/MapGroupsWithStateFunction.java
deleted file mode 100644
index 614d3925e0510..0000000000000
--- a/sql/core/src/main/java/org/apache/spark/api/java/function/MapGroupsWithStateFunction.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.api.java.function;
-
-import java.io.Serializable;
-import java.util.Iterator;
-
-import org.apache.spark.annotation.Experimental;
-import org.apache.spark.annotation.InterfaceStability;
-import org.apache.spark.sql.Encoder;
-import org.apache.spark.sql.KeyedState;
-
-/**
- * ::Experimental::
- * Base interface for a map function used in
- * {@link org.apache.spark.sql.KeyValueGroupedDataset#mapGroupsWithState(MapGroupsWithStateFunction, Encoder, Encoder)}
- * @since 2.1.1
- */
-@Experimental
-@InterfaceStability.Evolving
-public interface MapGroupsWithStateFunction<K, V, S, R> extends Serializable {
-  R call(K key, Iterator<V> values, KeyedState<S> state) throws Exception;
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
index 94e689a4d5b97..395d709f26591 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
@@ -218,119 +218,6 @@ class KeyValueGroupedDataset[K, V] private[sql](
     mapGroups((key, data) => f.call(key, data.asJava))(encoder)
   }
 
-  /**
-   * ::Experimental::
-   * (Scala-specific)
-   * Applies the given function to each group of data, while maintaining a user-defined per-group
-   * state. The result Dataset will represent the objects returned by the function.
-   * For a static batch Dataset, the function will be invoked once per group. For a streaming
-   * Dataset, the function will be invoked for each group repeatedly in every trigger, and
-   * updates to each group's state will be saved across invocations.
-   * See [[KeyedState]] for more details.
-   *
-   * @tparam S The type of the user-defined state. Must be encodable to Spark SQL types.
-   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   * @since 2.1.1
-   */
-  @Experimental
-  @InterfaceStability.Evolving
-  def mapGroupsWithState[S: Encoder, U: Encoder](
-      func: (K, Iterator[V], KeyedState[S]) => U): Dataset[U] = {
-    flatMapGroupsWithState[S, U](
-      (key: K, it: Iterator[V], s: KeyedState[S]) => Iterator(func(key, it, s)))
-  }
-
-  /**
-   * ::Experimental::
-   * (Java-specific)
-   * Applies the given function to each group of data, while maintaining a user-defined per-group
-   * state. The result Dataset will represent the objects returned by the function.
-   * For a static batch Dataset, the function will be invoked once per group. For a streaming
-   * Dataset, the function will be invoked for each group repeatedly in every trigger, and
-   * updates to each group's state will be saved across invocations.
-   * See [[KeyedState]] for more details.
-   *
-   * @tparam S The type of the user-defined state. Must be encodable to Spark SQL types.
-   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
-   * @param func          Function to be called on every group.
-   * @param stateEncoder  Encoder for the state type.
-   * @param outputEncoder Encoder for the output type.
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   * @since 2.1.1
-   */
-  @Experimental
-  @InterfaceStability.Evolving
-  def mapGroupsWithState[S, U](
-      func: MapGroupsWithStateFunction[K, V, S, U],
-      stateEncoder: Encoder[S],
-      outputEncoder: Encoder[U]): Dataset[U] = {
-    flatMapGroupsWithState[S, U](
-      (key: K, it: Iterator[V], s: KeyedState[S]) => Iterator(func.call(key, it.asJava, s))
-    )(stateEncoder, outputEncoder)
-  }
-
-  /**
-   * ::Experimental::
-   * (Scala-specific)
-   * Applies the given function to each group of data, while maintaining a user-defined per-group
-   * state. The result Dataset will represent the objects returned by the function.
-   * For a static batch Dataset, the function will be invoked once per group. For a streaming
-   * Dataset, the function will be invoked for each group repeatedly in every trigger, and
-   * updates to each group's state will be saved across invocations.
-   * See [[KeyedState]] for more details.
-   *
-   * @tparam S The type of the user-defined state. Must be encodable to Spark SQL types.
-   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   * @since 2.1.1
-   */
-  @Experimental
-  @InterfaceStability.Evolving
-  def flatMapGroupsWithState[S: Encoder, U: Encoder](
-      func: (K, Iterator[V], KeyedState[S]) => Iterator[U]): Dataset[U] = {
-    Dataset[U](
-      sparkSession,
-      MapGroupsWithState[K, V, S, U](
-        func.asInstanceOf[(Any, Iterator[Any], LogicalKeyedState[Any]) => Iterator[Any]],
-        groupingAttributes,
-        dataAttributes,
-        logicalPlan))
-  }
-
-  /**
-   * ::Experimental::
-   * (Java-specific)
-   * Applies the given function to each group of data, while maintaining a user-defined per-group
-   * state. The result Dataset will represent the objects returned by the function.
-   * For a static batch Dataset, the function will be invoked once per group. For a streaming
-   * Dataset, the function will be invoked for each group repeatedly in every trigger, and
-   * updates to each group's state will be saved across invocations.
-   * See [[KeyedState]] for more details.
-   *
-   * @tparam S The type of the user-defined state. Must be encodable to Spark SQL types.
-   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
-   * @param func          Function to be called on every group.
-   * @param stateEncoder  Encoder for the state type.
-   * @param outputEncoder Encoder for the output type.
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   * @since 2.1.1
-   */
-  @Experimental
-  @InterfaceStability.Evolving
-  def flatMapGroupsWithState[S, U](
-      func: FlatMapGroupsWithStateFunction[K, V, S, U],
-      stateEncoder: Encoder[S],
-      outputEncoder: Encoder[U]): Dataset[U] = {
-    flatMapGroupsWithState[S, U](
-      (key: K, it: Iterator[V], s: KeyedState[S]) => func.call(key, it.asJava, s).asScala
-    )(stateEncoder, outputEncoder)
-  }
-
   /**
    * (Scala-specific)
    * Reduces the elements of each group of data using the specified binary function.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/KeyedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/KeyedState.scala
deleted file mode 100644
index 6864b6f6b4fd2..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/KeyedState.scala
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql
-
-import java.lang.IllegalArgumentException
-
-import org.apache.spark.annotation.{Experimental, InterfaceStability}
-import org.apache.spark.sql.catalyst.plans.logical.LogicalKeyedState
-
-/**
- * :: Experimental ::
- *
- * Wrapper class for interacting with keyed state data in `mapGroupsWithState` and
- * `flatMapGroupsWithState` operations on
- * [[KeyValueGroupedDataset]].
- *
- * Detail description on `[map/flatMap]GroupsWithState` operation
- * ------------------------------------------------------------
- * Both, `mapGroupsWithState` and `flatMapGroupsWithState` in [[KeyValueGroupedDataset]]
- * will invoke the user-given function on each group (defined by the grouping function in
- * `Dataset.groupByKey()`) while maintaining user-defined per-group state between invocations.
- * For a static batch Dataset, the function will be invoked once per group. For a streaming
- * Dataset, the function will be invoked for each group repeatedly in every trigger.
- * That is, in every batch of the [[streaming.StreamingQuery StreamingQuery]],
- * the function will be invoked once for each group that has data in the batch.
- *
- * The function is invoked with following parameters.
- *  - The key of the group.
- *  - An iterator containing all the values for this key.
- *  - A user-defined state object set by previous invocations of the given function.
- * In case of a batch Dataset, there is only one invocation and state object will be empty as
- * there is no prior state. Essentially, for batch Datasets, `[map/flatMap]GroupsWithState`
- * is equivalent to `[map/flatMap]Groups`.
- *
- * Important points to note about the function.
- *  - In a trigger, the function will be called only the groups present in the batch. So do not
- *    assume that the function will be called in every trigger for every group that has state.
- *  - There is no guaranteed ordering of values in the iterator in the function, neither with
- *    batch, nor with streaming Datasets.
- *  - All the data will be shuffled before applying the function.
- *
- * Important points to note about using KeyedState.
- *  - The value of the state cannot be null. So updating state with null will throw
- *    `IllegalArgumentException`.
- *  - Operations on `KeyedState` are not thread-safe. This is to avoid memory barriers.
- *  - If `remove()` is called, then `exists()` will return `false`,
- *    `get()` will throw `NoSuchElementException` and `getOption()` will return `None`
- *  - After that, if `update(newState)` is called, then `exists()` will again return `true`,
- *    `get()` and `getOption()`will return the updated value.
- *
- * Scala example of using KeyedState in `mapGroupsWithState`:
- * {{{
- * /* A mapping function that maintains an integer state for string keys and returns a string. */
- * def mappingFunction(key: String, value: Iterator[Int], state: KeyedState[Int]): String = {
- *   // Check if state exists
- *   if (state.exists) {
- *     val existingState = state.get  // Get the existing state
- *     val shouldRemove = ...         // Decide whether to remove the state
- *     if (shouldRemove) {
- *       state.remove()     // Remove the state
- *     } else {
- *       val newState = ...
- *       state.update(newState)    // Set the new state
- *     }
- *   } else {
- *     val initialState = ...
- *     state.update(initialState)  // Set the initial state
- *   }
- *   ... // return something
- * }
- *
- * }}}
- *
- * Java example of using `KeyedState`:
- * {{{
- * /* A mapping function that maintains an integer state for string keys and returns a string. */
- * MapGroupsWithStateFunction<String, Integer, Integer, String> mappingFunction =
- *    new MapGroupsWithStateFunction<String, Integer, Integer, String>() {
- *
- *      @Override
- *      public String call(String key, Iterator<Integer> value, KeyedState<Integer> state) {
- *        if (state.exists()) {
- *          int existingState = state.get(); // Get the existing state
- *          boolean shouldRemove = ...; // Decide whether to remove the state
- *          if (shouldRemove) {
- *            state.remove(); // Remove the state
- *          } else {
- *            int newState = ...;
- *            state.update(newState); // Set the new state
- *          }
- *        } else {
- *          int initialState = ...; // Set the initial state
- *          state.update(initialState);
- *        }
- *        ... // return something
- *      }
- *    };
- * }}}
- *
- * @tparam S User-defined type of the state to be stored for each key. Must be encodable into
- *           Spark SQL types (see [[Encoder]] for more details).
- * @since 2.1.1
- */
-@Experimental
-@InterfaceStability.Evolving
-trait KeyedState[S] extends LogicalKeyedState[S] {
-
-  /** Whether state exists or not. */
-  def exists: Boolean
-
-  /** Get the state value if it exists, or throw NoSuchElementException. */
-  @throws[NoSuchElementException]("when state does not exist")
-  def get: S
-
-  /** Get the state value as a scala Option. */
-  def getOption: Option[S]
-
-  /**
-   * Update the value of the state. Note that `null` is not a valid value, and it throws
-   * IllegalArgumentException.
-   */
-  @throws[IllegalArgumentException]("when updating with null")
-  def update(newState: S): Unit
-
-  /** Remove this keyed state. */
-  def remove(): Unit
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index adea358594a0f..ba82ec156e850 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning._
 import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.logical.{BroadcastHint, EventTimeWatermark, LogicalPlan, MapGroupsWithState}
+import org.apache.spark.sql.catalyst.plans.logical.{BroadcastHint, EventTimeWatermark, LogicalPlan}
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution
 import org.apache.spark.sql.execution.columnar.{InMemoryRelation, InMemoryTableScanExec}
@@ -324,23 +324,6 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
     }
   }
 
-  /**
-   * Strategy to convert MapGroupsWithState logical operator to physical operator
-   * in streaming plans. Conversion for batch plans is handled by [[BasicOperators]].
-   */
-  object MapGroupsWithStateStrategy extends Strategy {
-    override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
-      case MapGroupsWithState(
-          f, keyDeser, valueDeser, groupAttr, dataAttr, outputAttr, stateDeser, stateSer, child) =>
-        val execPlan = MapGroupsWithStateExec(
-          f, keyDeser, valueDeser, groupAttr, dataAttr, outputAttr, None, stateDeser, stateSer,
-          planLater(child))
-        execPlan :: Nil
-      case _ =>
-        Nil
-    }
-  }
-
   // Can we automate these 'pass through' operations?
   object BasicOperators extends Strategy {
     def numPartitions: Int = self.numPartitions
@@ -382,8 +365,6 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         execution.AppendColumnsWithObjectExec(f, childSer, newSer, planLater(child)) :: Nil
       case logical.MapGroups(f, key, value, grouping, data, objAttr, child) =>
         execution.MapGroupsExec(f, key, value, grouping, data, objAttr, planLater(child)) :: Nil
-      case logical.MapGroupsWithState(f, key, value, grouping, data, output, _, _, child) =>
-        execution.MapGroupsExec(f, key, value, grouping, data, output, planLater(child)) :: Nil
       case logical.CoGroup(f, key, lObj, rObj, lGroup, rGroup, lAttr, rAttr, oAttr, left, right) =>
         execution.CoGroupExec(
           f, key, lObj, rObj, lGroup, rGroup, lAttr, rAttr, oAttr,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala
index 199ba5ce6969b..fde3b2a528994 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala
@@ -30,8 +30,6 @@ import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.objects.Invoke
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.Row
-import org.apache.spark.sql.catalyst.plans.logical.LogicalKeyedState
-import org.apache.spark.sql.execution.streaming.KeyedStateImpl
 import org.apache.spark.sql.types.{DataType, ObjectType, StructType}
 
 
@@ -146,11 +144,6 @@ object ObjectOperator {
     (i: InternalRow) => proj(i).get(0, deserializer.dataType)
   }
 
-  def deserializeRowToObject(deserializer: Expression): InternalRow => Any = {
-    val proj = GenerateSafeProjection.generate(deserializer :: Nil)
-    (i: InternalRow) => proj(i).get(0, deserializer.dataType)
-  }
-
   def serializeObjectToRow(serializer: Seq[Expression]): Any => UnsafeRow = {
     val proj = GenerateUnsafeProjection.generate(serializer)
     val objType = serializer.head.collect { case b: BoundReference => b.dataType }.head
@@ -351,21 +344,6 @@ case class MapGroupsExec(
   }
 }
 
-object MapGroupsExec {
-  def apply(
-      func: (Any, Iterator[Any], LogicalKeyedState[Any]) => TraversableOnce[Any],
-      keyDeserializer: Expression,
-      valueDeserializer: Expression,
-      groupingAttributes: Seq[Attribute],
-      dataAttributes: Seq[Attribute],
-      outputObjAttr: Attribute,
-      child: SparkPlan): MapGroupsExec = {
-    val f = (key: Any, values: Iterator[Any]) => func(key, values, new KeyedStateImpl[Any](None))
-    new MapGroupsExec(f, keyDeserializer, valueDeserializer,
-      groupingAttributes, dataAttributes, outputObjAttr, child)
-  }
-}
-
 /**
  * Groups the input rows together and calls the R function with each group and an iterator
  * containing all elements in the group.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
index 5c4cbfa7552cb..6ab6fa61dc200 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.execution.streaming
 
-import java.util.concurrent.atomic.AtomicInteger
-
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.expressions.{CurrentBatchTimestamp, Literal}
 import org.apache.spark.sql.SparkSession
@@ -41,9 +39,8 @@ class IncrementalExecution(
   extends QueryExecution(sparkSession, logicalPlan) with Logging {
 
   // TODO: make this always part of planning.
-  val streamingExtraStrategies =
+  val stateStrategy =
     sparkSession.sessionState.planner.StatefulAggregationStrategy +:
-    sparkSession.sessionState.planner.MapGroupsWithStateStrategy +:
     sparkSession.sessionState.planner.StreamingRelationStrategy +:
     sparkSession.sessionState.experimentalMethods.extraStrategies
 
@@ -52,7 +49,7 @@ class IncrementalExecution(
     new SparkPlanner(
       sparkSession.sparkContext,
       sparkSession.sessionState.conf,
-      streamingExtraStrategies)
+      stateStrategy)
 
   /**
    * See [SPARK-18339]
@@ -71,7 +68,7 @@ class IncrementalExecution(
    * Records the current id for a given stateful operator in the query plan as the `state`
    * preparation walks the query plan.
    */
-  private val operatorId = new AtomicInteger(0)
+  private var operatorId = 0
 
   /** Locates save/restore pairs surrounding aggregation. */
   val state = new Rule[SparkPlan] {
@@ -80,8 +77,8 @@ class IncrementalExecution(
       case StateStoreSaveExec(keys, None, None, None,
              UnaryExecNode(agg,
                StateStoreRestoreExec(keys2, None, child))) =>
-        val stateId =
-          OperatorStateId(checkpointLocation, operatorId.getAndIncrement(), currentBatchId)
+        val stateId = OperatorStateId(checkpointLocation, operatorId, currentBatchId)
+        operatorId += 1
 
         StateStoreSaveExec(
           keys,
@@ -93,12 +90,6 @@ class IncrementalExecution(
               keys,
               Some(stateId),
               child) :: Nil))
-      case MapGroupsWithStateExec(
-             f, kDeser, vDeser, group, data, output, None, stateDeser, stateSer, child) =>
-        val stateId =
-          OperatorStateId(checkpointLocation, operatorId.getAndIncrement(), currentBatchId)
-        MapGroupsWithStateExec(
-          f, kDeser, vDeser, group, data, output, Some(stateId), stateDeser, stateSer, child)
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/KeyedStateImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/KeyedStateImpl.scala
deleted file mode 100644
index eee7ec45dd77b..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/KeyedStateImpl.scala
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.streaming
-
-import org.apache.spark.sql.KeyedState
-
-/** Internal implementation of the [[KeyedState]] interface. Methods are not thread-safe. */
-private[sql] class KeyedStateImpl[S](optionalValue: Option[S]) extends KeyedState[S] {
-  private var value: S = optionalValue.getOrElse(null.asInstanceOf[S])
-  private var defined: Boolean = optionalValue.isDefined
-  private var updated: Boolean = false
-  // whether value has been updated (but not removed)
-  private var removed: Boolean = false // whether value has been removed
-
-  // ========= Public API =========
-  override def exists: Boolean = defined
-
-  override def get: S = {
-    if (defined) {
-      value
-    } else {
-      throw new NoSuchElementException("State is either not defined or has already been removed")
-    }
-  }
-
-  override def getOption: Option[S] = {
-    if (defined) {
-      Some(value)
-    } else {
-      None
-    }
-  }
-
-  override def update(newValue: S): Unit = {
-    if (newValue == null) {
-      throw new IllegalArgumentException("'null' is not a valid state value")
-    }
-    value = newValue
-    defined = true
-    updated = true
-    removed = false
-  }
-
-  override def remove(): Unit = {
-    defined = false
-    updated = false
-    removed = true
-  }
-
-  override def toString: String = {
-    s"KeyedState(${getOption.map(_.toString).getOrElse("<undefined>")})"
-  }
-
-  // ========= Internal API =========
-
-  /** Whether the state has been marked for removing */
-  def isRemoved: Boolean = {
-    removed
-  }
-
-  /** Whether the state has been been updated */
-  def isUpdated: Boolean = {
-    updated
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
index 693933f95a231..1f74fffbe6e67 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -186,7 +186,7 @@ trait ProgressReporter extends Logging {
     // lastExecution could belong to one of the previous triggers if `!hasNewData`.
     // Walking the plan again should be inexpensive.
     val stateNodes = lastExecution.executedPlan.collect {
-      case p if p.isInstanceOf[StateStoreWriter] => p
+      case p if p.isInstanceOf[StateStoreSaveExec] => p
     }
     stateNodes.map { node =>
       val numRowsUpdated = if (hasNewData) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulAggregate.scala
similarity index 63%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulAggregate.scala
index 1292452574594..d4ccced9ac9b4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulAggregate.scala
@@ -22,16 +22,16 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.errors._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.{GenerateUnsafeProjection, Predicate}
-import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, LogicalKeyedState}
-import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, Distribution, Partitioning}
+import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark
+import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.execution
-import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.metric.SQLMetrics
 import org.apache.spark.sql.execution.streaming.state._
+import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.StructType
-import org.apache.spark.util.CompletionIterator
+import org.apache.spark.TaskContext
 
 
 /** Used to identify the state store for a given operator. */
@@ -41,7 +41,7 @@ case class OperatorStateId(
     batchId: Long)
 
 /**
- * An operator that reads or writes state from the [[StateStore]].  The [[OperatorStateId]] should
+ * An operator that saves or restores state from the [[StateStore]].  The [[OperatorStateId]] should
  * be filled in by `prepareForExecution` in [[IncrementalExecution]].
  */
 trait StatefulOperator extends SparkPlan {
@@ -54,20 +54,6 @@ trait StatefulOperator extends SparkPlan {
   }
 }
 
-/** An operator that reads from a StateStore. */
-trait StateStoreReader extends StatefulOperator {
-  override lazy val metrics = Map(
-    "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"))
-}
-
-/** An operator that writes to a StateStore. */
-trait StateStoreWriter extends StatefulOperator {
-  override lazy val metrics = Map(
-    "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"),
-    "numTotalStateRows" -> SQLMetrics.createMetric(sparkContext, "number of total state rows"),
-    "numUpdatedStateRows" -> SQLMetrics.createMetric(sparkContext, "number of updated state rows"))
-}
-
 /**
  * For each input tuple, the key is calculated and the value from the [[StateStore]] is added
  * to the stream (in addition to the input tuple) if present.
@@ -76,7 +62,10 @@ case class StateStoreRestoreExec(
     keyExpressions: Seq[Attribute],
     stateId: Option[OperatorStateId],
     child: SparkPlan)
-  extends execution.UnaryExecNode with StateStoreReader {
+  extends execution.UnaryExecNode with StatefulOperator {
+
+  override lazy val metrics = Map(
+    "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"))
 
   override protected def doExecute(): RDD[InternalRow] = {
     val numOutputRows = longMetric("numOutputRows")
@@ -113,7 +102,12 @@ case class StateStoreSaveExec(
     outputMode: Option[OutputMode] = None,
     eventTimeWatermark: Option[Long] = None,
     child: SparkPlan)
-  extends execution.UnaryExecNode with StateStoreWriter {
+  extends execution.UnaryExecNode with StatefulOperator {
+
+  override lazy val metrics = Map(
+    "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"),
+    "numTotalStateRows" -> SQLMetrics.createMetric(sparkContext, "number of total state rows"),
+    "numUpdatedStateRows" -> SQLMetrics.createMetric(sparkContext, "number of updated state rows"))
 
   /** Generate a predicate that matches data older than the watermark */
   private lazy val watermarkPredicate: Option[Predicate] = {
@@ -157,6 +151,13 @@ case class StateStoreSaveExec(
         val numTotalStateRows = longMetric("numTotalStateRows")
         val numUpdatedStateRows = longMetric("numUpdatedStateRows")
 
+        // Abort the state store in case of error
+        TaskContext.get().addTaskCompletionListener(_ => {
+          if (!store.hasCommitted) {
+            store.abort()
+          }
+        })
+
         outputMode match {
           // Update and output all rows in the StateStore.
           case Some(Complete) =>
@@ -183,7 +184,7 @@ case class StateStoreSaveExec(
             }
 
             // Assumption: Append mode can be done only when watermark has been specified
-            store.remove(watermarkPredicate.get.eval _)
+            store.remove(watermarkPredicate.get.eval)
             store.commit()
 
             numTotalStateRows += store.numKeys()
@@ -206,7 +207,7 @@ case class StateStoreSaveExec(
               override def hasNext: Boolean = {
                 if (!baseIterator.hasNext) {
                   // Remove old aggregates if watermark specified
-                  if (watermarkPredicate.nonEmpty) store.remove(watermarkPredicate.get.eval _)
+                  if (watermarkPredicate.nonEmpty) store.remove(watermarkPredicate.get.eval)
                   store.commit()
                   numTotalStateRows += store.numKeys()
                   false
@@ -234,90 +235,3 @@ case class StateStoreSaveExec(
 
   override def outputPartitioning: Partitioning = child.outputPartitioning
 }
-
-
-/** Physical operator for executing streaming mapGroupsWithState. */
-case class MapGroupsWithStateExec(
-    func: (Any, Iterator[Any], LogicalKeyedState[Any]) => Iterator[Any],
-    keyDeserializer: Expression,
-    valueDeserializer: Expression,
-    groupingAttributes: Seq[Attribute],
-    dataAttributes: Seq[Attribute],
-    outputObjAttr: Attribute,
-    stateId: Option[OperatorStateId],
-    stateDeserializer: Expression,
-    stateSerializer: Seq[NamedExpression],
-    child: SparkPlan) extends UnaryExecNode with ObjectProducerExec with StateStoreWriter {
-
-  override def outputPartitioning: Partitioning = child.outputPartitioning
-
-  /** Distribute by grouping attributes */
-  override def requiredChildDistribution: Seq[Distribution] =
-    ClusteredDistribution(groupingAttributes) :: Nil
-
-  /** Ordering needed for using GroupingIterator */
-  override def requiredChildOrdering: Seq[Seq[SortOrder]] =
-    Seq(groupingAttributes.map(SortOrder(_, Ascending)))
-
-  override protected def doExecute(): RDD[InternalRow] = {
-    child.execute().mapPartitionsWithStateStore[InternalRow](
-      getStateId.checkpointLocation,
-      operatorId = getStateId.operatorId,
-      storeVersion = getStateId.batchId,
-      groupingAttributes.toStructType,
-      child.output.toStructType,
-      sqlContext.sessionState,
-      Some(sqlContext.streams.stateStoreCoordinator)) { (store, iter) =>
-        val numTotalStateRows = longMetric("numTotalStateRows")
-        val numUpdatedStateRows = longMetric("numUpdatedStateRows")
-        val numOutputRows = longMetric("numOutputRows")
-
-        // Generate a iterator that returns the rows grouped by the grouping function
-        val groupedIter = GroupedIterator(iter, groupingAttributes, child.output)
-
-        // Converters to and from object and rows
-        val getKeyObj = ObjectOperator.deserializeRowToObject(keyDeserializer, groupingAttributes)
-        val getValueObj = ObjectOperator.deserializeRowToObject(valueDeserializer, dataAttributes)
-        val getOutputRow = ObjectOperator.wrapObjectToRow(outputObjAttr.dataType)
-        val getStateObj =
-          ObjectOperator.deserializeRowToObject(stateDeserializer)
-        val outputStateObj = ObjectOperator.serializeObjectToRow(stateSerializer)
-
-        // For every group, get the key, values and corresponding state and call the function,
-        // and return an iterator of rows
-        val allRowsIterator = groupedIter.flatMap { case (keyRow, valueRowIter) =>
-
-          val key = keyRow.asInstanceOf[UnsafeRow]
-          val keyObj = getKeyObj(keyRow)                         // convert key to objects
-          val valueObjIter = valueRowIter.map(getValueObj.apply) // convert value rows to objects
-          val stateObjOption = store.get(key).map(getStateObj)   // get existing state if any
-          val wrappedState = new KeyedStateImpl(stateObjOption)
-          val mappedIterator = func(keyObj, valueObjIter, wrappedState).map { obj =>
-            numOutputRows += 1
-            getOutputRow(obj) // convert back to rows
-          }
-
-          // Return an iterator of rows generated this key,
-          // such that fully consumed, the updated state value will be saved
-          CompletionIterator[InternalRow, Iterator[InternalRow]](
-            mappedIterator, {
-              // When the iterator is consumed, then write changes to state
-              if (wrappedState.isRemoved) {
-                store.remove(key)
-                numUpdatedStateRows += 1
-              } else if (wrappedState.isUpdated) {
-                store.put(key, outputStateObj(wrappedState.get))
-                numUpdatedStateRows += 1
-              }
-            })
-        }
-
-        // Return an iterator of all the rows generated by all the keys, such that when fully
-        // consumer, all the state updates will be committed by the state store
-        CompletionIterator[InternalRow, Iterator[InternalRow]](allRowsIterator, {
-          store.commit()
-          numTotalStateRows += store.numKeys()
-        })
-      }
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
index ab1204a750fac..f53b9b9a43153 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
@@ -147,25 +147,6 @@ private[state] class HDFSBackedStateStoreProvider(
       }
     }
 
-    /** Remove a single key. */
-    override def remove(key: UnsafeRow): Unit = {
-      verify(state == UPDATING, "Cannot remove after already committed or aborted")
-      if (mapToUpdate.containsKey(key)) {
-        val value = mapToUpdate.remove(key)
-        Option(allUpdates.get(key)) match {
-          case Some(ValueUpdated(_, _)) | None =>
-            // Value existed in previous version and maybe was updated, mark removed
-            allUpdates.put(key, ValueRemoved(key, value))
-          case Some(ValueAdded(_, _)) =>
-            // Value did not exist in previous version and was added, should not appear in updates
-            allUpdates.remove(key)
-          case Some(ValueRemoved(_, _)) =>
-          // Remove already in update map, no need to change
-        }
-        writeToDeltaFile(tempDeltaFileStream, ValueRemoved(key, value))
-      }
-    }
-
     /** Commit all the updates that have been made to the store, and return the new version. */
     override def commit(): Long = {
       verify(state == UPDATING, "Cannot commit after already committed or aborted")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
index dcb24b26f78f3..e61d95a1b1bb0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
@@ -58,11 +58,6 @@ trait StateStore {
    */
   def remove(condition: UnsafeRow => Boolean): Unit
 
-  /**
-   * Remove a single key.
-   */
-  def remove(key: UnsafeRow): Unit
-
   /**
    * Commit all the updates that have been made to the store, and return the new version.
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala
index 589042afb1e52..1b56c08f729c6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.streaming
 
 import scala.reflect.ClassTag
 
-import org.apache.spark.TaskContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.internal.SessionState
@@ -60,18 +59,10 @@ package object state {
         sessionState: SessionState,
         storeCoordinator: Option[StateStoreCoordinatorRef])(
         storeUpdateFunction: (StateStore, Iterator[T]) => Iterator[U]): StateStoreRDD[T, U] = {
-
       val cleanedF = dataRDD.sparkContext.clean(storeUpdateFunction)
-      val wrappedF = (store: StateStore, iter: Iterator[T]) => {
-        // Abort the state store in case of error
-        TaskContext.get().addTaskCompletionListener(_ => {
-          if (!store.hasCommitted) store.abort()
-        })
-        cleanedF(store, iter)
-      }
       new StateStoreRDD(
         dataRDD,
-        wrappedF,
+        cleanedF,
         checkpointLocation,
         operatorId,
         storeVersion,
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
index 5ef4e887ded09..8304b728aa238 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
@@ -225,38 +225,6 @@ public Iterator<String> call(Integer key, Iterator<String> values) {
 
     Assert.assertEquals(asSet("1a", "3foobar"), toSet(flatMapped.collectAsList()));
 
-    Dataset<String> mapped2 = grouped.mapGroupsWithState(
-      new MapGroupsWithStateFunction<Integer, String, Long, String>() {
-        @Override
-        public String call(Integer key, Iterator<String> values, KeyedState<Long> s) throws Exception {
-          StringBuilder sb = new StringBuilder(key.toString());
-          while (values.hasNext()) {
-            sb.append(values.next());
-          }
-          return sb.toString();
-        }
-        },
-        Encoders.LONG(),
-        Encoders.STRING());
-
-    Assert.assertEquals(asSet("1a", "3foobar"), toSet(mapped2.collectAsList()));
-
-    Dataset<String> flatMapped2 = grouped.flatMapGroupsWithState(
-      new FlatMapGroupsWithStateFunction<Integer, String, Long, String>() {
-        @Override
-        public Iterator<String> call(Integer key, Iterator<String> values, KeyedState<Long> s) {
-          StringBuilder sb = new StringBuilder(key.toString());
-          while (values.hasNext()) {
-            sb.append(values.next());
-          }
-          return Collections.singletonList(sb.toString()).iterator();
-        }
-      },
-      Encoders.LONG(),
-      Encoders.STRING());
-
-    Assert.assertEquals(asSet("1a", "3foobar"), toSet(flatMapped2.collectAsList()));
-
     Dataset<Tuple2<Integer, String>> reduced = grouped.reduceGroups(new ReduceFunction<String>() {
       @Override
       public String call(String v1, String v2) throws Exception {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/MapGroupsWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/MapGroupsWithStateSuite.scala
deleted file mode 100644
index 0524898b15ead..0000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/MapGroupsWithStateSuite.scala
+++ /dev/null
@@ -1,335 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.streaming
-
-import org.scalatest.BeforeAndAfterAll
-
-import org.apache.spark.SparkException
-import org.apache.spark.sql.KeyedState
-import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
-import org.apache.spark.sql.execution.streaming.{KeyedStateImpl, MemoryStream}
-import org.apache.spark.sql.execution.streaming.state.StateStore
-
-/** Class to check custom state types */
-case class RunningCount(count: Long)
-
-class MapGroupsWithStateSuite extends StreamTest with BeforeAndAfterAll {
-
-  import testImplicits._
-
-  override def afterAll(): Unit = {
-    super.afterAll()
-    StateStore.stop()
-  }
-
-  test("KeyedState - get, exists, update, remove") {
-    var state: KeyedStateImpl[String] = null
-
-    def testState(
-        expectedData: Option[String],
-        shouldBeUpdated: Boolean = false,
-        shouldBeRemoved: Boolean = false): Unit = {
-      if (expectedData.isDefined) {
-        assert(state.exists)
-        assert(state.get === expectedData.get)
-      } else {
-        assert(!state.exists)
-        intercept[NoSuchElementException] {
-          state.get
-        }
-      }
-      assert(state.getOption === expectedData)
-      assert(state.isUpdated === shouldBeUpdated)
-      assert(state.isRemoved === shouldBeRemoved)
-    }
-
-    // Updating empty state
-    state = new KeyedStateImpl[String](None)
-    testState(None)
-    state.update("")
-    testState(Some(""), shouldBeUpdated = true)
-
-    // Updating exiting state
-    state = new KeyedStateImpl[String](Some("2"))
-    testState(Some("2"))
-    state.update("3")
-    testState(Some("3"), shouldBeUpdated = true)
-
-    // Removing state
-    state.remove()
-    testState(None, shouldBeRemoved = true, shouldBeUpdated = false)
-    state.remove()      // should be still callable
-    state.update("4")
-    testState(Some("4"), shouldBeRemoved = false, shouldBeUpdated = true)
-
-    // Updating by null throw exception
-    intercept[IllegalArgumentException] {
-      state.update(null)
-    }
-  }
-
-  test("KeyedState - primitive type") {
-    var intState = new KeyedStateImpl[Int](None)
-    intercept[NoSuchElementException] {
-      intState.get
-    }
-    assert(intState.getOption === None)
-
-    intState = new KeyedStateImpl[Int](Some(10))
-    assert(intState.get == 10)
-    intState.update(0)
-    assert(intState.get == 0)
-    intState.remove()
-    intercept[NoSuchElementException] {
-      intState.get
-    }
-  }
-
-  test("flatMapGroupsWithState - streaming") {
-    // Function to maintain running count up to 2, and then remove the count
-    // Returns the data and the count if state is defined, otherwise does not return anything
-    val stateFunc = (key: String, values: Iterator[String], state: KeyedState[RunningCount]) => {
-
-      val count = state.getOption.map(_.count).getOrElse(0L) + values.size
-      if (count == 3) {
-        state.remove()
-        Iterator.empty
-      } else {
-        state.update(RunningCount(count))
-        Iterator((key, count.toString))
-      }
-    }
-
-    val inputData = MemoryStream[String]
-    val result =
-      inputData.toDS()
-        .groupByKey(x => x)
-        .flatMapGroupsWithState(stateFunc) // State: Int, Out: (Str, Str)
-
-    testStream(result, Append)(
-      AddData(inputData, "a"),
-      CheckLastBatch(("a", "1")),
-      assertNumStateRows(total = 1, updated = 1),
-      AddData(inputData, "a", "b"),
-      CheckLastBatch(("a", "2"), ("b", "1")),
-      assertNumStateRows(total = 2, updated = 2),
-      StopStream,
-      StartStream(),
-      AddData(inputData, "a", "b"), // should remove state for "a" and not return anything for a
-      CheckLastBatch(("b", "2")),
-      assertNumStateRows(total = 1, updated = 2),
-      StopStream,
-      StartStream(),
-      AddData(inputData, "a", "c"), // should recreate state for "a" and return count as 1 and
-      CheckLastBatch(("a", "1"), ("c", "1")),
-      assertNumStateRows(total = 3, updated = 2)
-    )
-  }
-
-  test("flatMapGroupsWithState - streaming + func returns iterator that updates state lazily") {
-    // Function to maintain running count up to 2, and then remove the count
-    // Returns the data and the count if state is defined, otherwise does not return anything
-    // Additionally, it updates state lazily as the returned iterator get consumed
-    val stateFunc = (key: String, values: Iterator[String], state: KeyedState[RunningCount]) => {
-      values.flatMap { _ =>
-        val count = state.getOption.map(_.count).getOrElse(0L) + 1
-        if (count == 3) {
-          state.remove()
-          None
-        } else {
-          state.update(RunningCount(count))
-          Some((key, count.toString))
-        }
-      }
-    }
-
-    val inputData = MemoryStream[String]
-    val result =
-      inputData.toDS()
-        .groupByKey(x => x)
-        .flatMapGroupsWithState(stateFunc) // State: Int, Out: (Str, Str)
-
-    testStream(result, Append)(
-      AddData(inputData, "a", "a", "b"),
-      CheckLastBatch(("a", "1"), ("a", "2"), ("b", "1")),
-      StopStream,
-      StartStream(),
-      AddData(inputData, "a", "b"), // should remove state for "a" and not return anything for a
-      CheckLastBatch(("b", "2")),
-      StopStream,
-      StartStream(),
-      AddData(inputData, "a", "c"), // should recreate state for "a" and return count as 1 and
-      CheckLastBatch(("a", "1"), ("c", "1"))
-    )
-  }
-
-  test("flatMapGroupsWithState - batch") {
-    // Function that returns running count only if its even, otherwise does not return
-    val stateFunc = (key: String, values: Iterator[String], state: KeyedState[RunningCount]) => {
-      if (state.exists) throw new IllegalArgumentException("state.exists should be false")
-      Iterator((key, values.size))
-    }
-    checkAnswer(
-      Seq("a", "a", "b").toDS.groupByKey(x => x).flatMapGroupsWithState(stateFunc).toDF,
-      Seq(("a", 2), ("b", 1)).toDF)
-  }
-
-  test("mapGroupsWithState - streaming") {
-    // Function to maintain running count up to 2, and then remove the count
-    // Returns the data and the count (-1 if count reached beyond 2 and state was just removed)
-    val stateFunc = (key: String, values: Iterator[String], state: KeyedState[RunningCount]) => {
-
-      val count = state.getOption.map(_.count).getOrElse(0L) + values.size
-      if (count == 3) {
-        state.remove()
-        (key, "-1")
-      } else {
-        state.update(RunningCount(count))
-        (key, count.toString)
-      }
-    }
-
-    val inputData = MemoryStream[String]
-    val result =
-      inputData.toDS()
-        .groupByKey(x => x)
-        .mapGroupsWithState(stateFunc) // Types = State: MyState, Out: (Str, Str)
-
-    testStream(result, Append)(
-      AddData(inputData, "a"),
-      CheckLastBatch(("a", "1")),
-      assertNumStateRows(total = 1, updated = 1),
-      AddData(inputData, "a", "b"),
-      CheckLastBatch(("a", "2"), ("b", "1")),
-      assertNumStateRows(total = 2, updated = 2),
-      StopStream,
-      StartStream(),
-      AddData(inputData, "a", "b"), // should remove state for "a" and return count as -1
-      CheckLastBatch(("a", "-1"), ("b", "2")),
-      assertNumStateRows(total = 1, updated = 2),
-      StopStream,
-      StartStream(),
-      AddData(inputData, "a", "c"), // should recreate state for "a" and return count as 1
-      CheckLastBatch(("a", "1"), ("c", "1")),
-      assertNumStateRows(total = 3, updated = 2)
-    )
-  }
-
-  test("mapGroupsWithState - streaming + aggregation") {
-    // Function to maintain running count up to 2, and then remove the count
-    // Returns the data and the count (-1 if count reached beyond 2 and state was just removed)
-    val stateFunc = (key: String, values: Iterator[String], state: KeyedState[RunningCount]) => {
-
-      val count = state.getOption.map(_.count).getOrElse(0L) + values.size
-      if (count == 3) {
-        state.remove()
-        (key, "-1")
-      } else {
-        state.update(RunningCount(count))
-        (key, count.toString)
-      }
-    }
-
-    val inputData = MemoryStream[String]
-    val result =
-      inputData.toDS()
-        .groupByKey(x => x)
-        .mapGroupsWithState(stateFunc) // Types = State: MyState, Out: (Str, Str)
-        .groupByKey(_._1)
-        .count()
-
-    testStream(result, Complete)(
-      AddData(inputData, "a"),
-      CheckLastBatch(("a", 1)),
-      AddData(inputData, "a", "b"),
-      // mapGroups generates ("a", "2"), ("b", "1"); so increases counts of a and b by 1
-      CheckLastBatch(("a", 2), ("b", 1)),
-      StopStream,
-      StartStream(),
-      AddData(inputData, "a", "b"),
-      // mapGroups should remove state for "a" and generate ("a", "-1"), ("b", "2") ;
-      // so increment a and b by 1
-      CheckLastBatch(("a", 3), ("b", 2)),
-      StopStream,
-      StartStream(),
-      AddData(inputData, "a", "c"),
-      // mapGroups should recreate state for "a" and generate ("a", "1"), ("c", "1") ;
-      // so increment a and c by 1
-      CheckLastBatch(("a", 4), ("b", 2), ("c", 1))
-    )
-  }
-
-  test("mapGroupsWithState - batch") {
-    val stateFunc = (key: String, values: Iterator[String], state: KeyedState[RunningCount]) => {
-      if (state.exists) throw new IllegalArgumentException("state.exists should be false")
-      (key, values.size)
-    }
-
-    checkAnswer(
-      spark.createDataset(Seq("a", "a", "b"))
-        .groupByKey(x => x)
-        .mapGroupsWithState(stateFunc)
-        .toDF,
-      spark.createDataset(Seq(("a", 2), ("b", 1))).toDF)
-  }
-
-  testQuietly("StateStore.abort on task failure handling") {
-    val stateFunc = (key: String, values: Iterator[String], state: KeyedState[RunningCount]) => {
-      if (MapGroupsWithStateSuite.failInTask) throw new Exception("expected failure")
-      val count = state.getOption.map(_.count).getOrElse(0L) + values.size
-      state.update(RunningCount(count))
-      (key, count)
-    }
-
-    val inputData = MemoryStream[String]
-    val result =
-      inputData.toDS()
-        .groupByKey(x => x)
-        .mapGroupsWithState(stateFunc) // Types = State: MyState, Out: (Str, Str)
-
-    def setFailInTask(value: Boolean): AssertOnQuery = AssertOnQuery { q =>
-      MapGroupsWithStateSuite.failInTask = value
-      true
-    }
-
-    testStream(result, Append)(
-      setFailInTask(false),
-      AddData(inputData, "a"),
-      CheckLastBatch(("a", 1L)),
-      AddData(inputData, "a"),
-      CheckLastBatch(("a", 2L)),
-      setFailInTask(true),
-      AddData(inputData, "a"),
-      ExpectFailure[SparkException](),   // task should fail but should not increment count
-      setFailInTask(false),
-      StartStream(),
-      CheckLastBatch(("a", 3L))     // task should not fail, and should show correct count
-    )
-  }
-
-  private def assertNumStateRows(total: Long, updated: Long): AssertOnQuery = AssertOnQuery { q =>
-    val progressWithData = q.recentProgress.filter(_.numInputRows > 0).lastOption.get
-    assert(progressWithData.stateOperators(0).numRowsTotal === total, "incorrect total rows")
-    assert(progressWithData.stateOperators(0).numRowsUpdated === updated, "incorrect updates rows")
-    true
-  }
-}
-
-object MapGroupsWithStateSuite {
-  var failInTask = true
-}

From 78cc5721f07af5c561e89d1bbc72975bb67abb74 Mon Sep 17 00:00:00 2001
From: Dilip Biswal <dbiswal@us.ibm.com>
Date: Wed, 8 Mar 2017 17:33:49 -0800
Subject: [PATCH 0566/1204] [MINOR][SQL] The analyzer rules are fired twice for
 cases when AnalysisException is raised from analyzer.

## What changes were proposed in this pull request?
In general we have a checkAnalysis phase which validates the logical plan and throws AnalysisException on semantic errors. However we also can throw AnalysisException from a few analyzer rules like ResolveSubquery.

I found that we fire up the analyzer rules twice for the queries that throw AnalysisException from one of the analyzer rules. This is a very minor fix. We don't have to strictly fix it. I just got confused seeing the rule getting fired two times when i was not expecting it.

## How was this patch tested?

Tested manually.

Author: Dilip Biswal <dbiswal@us.ibm.com>

Closes #17214 from dilipbiswal/analyis_twice.

(cherry picked from commit d809ceed9762d5bbb04170e45f38751713112dd8)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../org/apache/spark/sql/execution/QueryExecution.scala  | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index b3ef29f6e34c4..9b53d21deed97 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -45,9 +45,14 @@ class QueryExecution(val sparkSession: SparkSession, val logical: LogicalPlan) {
   protected def planner = sparkSession.sessionState.planner
 
   def assertAnalyzed(): Unit = {
-    try sparkSession.sessionState.analyzer.checkAnalysis(analyzed) catch {
+    // Analyzer is invoked outside the try block to avoid calling it again from within the
+    // catch block below.
+    analyzed
+    try {
+      sparkSession.sessionState.analyzer.checkAnalysis(analyzed)
+    } catch {
       case e: AnalysisException =>
-        val ae = new AnalysisException(e.message, e.line, e.startPosition, Some(analyzed))
+        val ae = new AnalysisException(e.message, e.line, e.startPosition, Option(analyzed))
         ae.setStackTrace(e.getStackTrace)
         throw ae
     }

From 00859e148fd1002fa314542953fee61a5d0fb9d9 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 8 Mar 2017 23:15:52 -0800
Subject: [PATCH 0567/1204] [SPARK-19874][BUILD] Hide API docs for
 org.apache.spark.sql.internal

## What changes were proposed in this pull request?

The API docs should not include the "org.apache.spark.sql.internal" package because they are internal private APIs.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #17217 from zsxwing/SPARK-19874.

(cherry picked from commit 029e40b412e332c9f0fff283d604e203066c78c0)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 project/SparkBuild.scala | 1 +
 1 file changed, 1 insertion(+)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index e3fbe0379fb7b..e772fa071a77b 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -699,6 +699,7 @@ object Unidoc {
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/util/collection")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/catalyst")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/execution")))
+      .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/internal")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/hive/test")))
   }
 

From 0c140c1682262bc27df94952bda6ad8e3229fda4 Mon Sep 17 00:00:00 2001
From: uncleGen <hustyugm@gmail.com>
Date: Wed, 8 Mar 2017 23:23:10 -0800
Subject: [PATCH 0568/1204] [SPARK-19859][SS][FOLLOW-UP] The new watermark
 should override the old one.

## What changes were proposed in this pull request?

A follow up to SPARK-19859:

- extract the calculation of `delayMs` and reuse it.
- update EventTimeWatermarkExec
- use the correct `delayMs` in EventTimeWatermark

## How was this patch tested?

Jenkins.

Author: uncleGen <hustyugm@gmail.com>

Closes #17221 from uncleGen/SPARK-19859.

(cherry picked from commit eeb1d6db878641d9eac62d0869a90fe80c1f4461)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../plans/logical/EventTimeWatermark.scala    |  9 ++++++++-
 .../streaming/EventTimeWatermarkExec.scala    | 19 +++++++++++--------
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala
index c919cdb4cd65f..e0dd4c9f0e2dc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala
@@ -24,6 +24,12 @@ import org.apache.spark.unsafe.types.CalendarInterval
 object EventTimeWatermark {
   /** The [[org.apache.spark.sql.types.Metadata]] key used to hold the eventTime watermark delay. */
   val delayKey = "spark.watermarkDelayMs"
+
+  def getDelayMs(delay: CalendarInterval): Long = {
+    // We define month as `31 days` to simplify calculation.
+    val millisPerMonth = CalendarInterval.MICROS_PER_DAY / 1000 * 31
+    delay.milliseconds + delay.months * millisPerMonth
+  }
 }
 
 /**
@@ -37,9 +43,10 @@ case class EventTimeWatermark(
   // Update the metadata on the eventTime column to include the desired delay.
   override val output: Seq[Attribute] = child.output.map { a =>
     if (a semanticEquals eventTime) {
+      val delayMs = EventTimeWatermark.getDelayMs(delay)
       val updatedMetadata = new MetadataBuilder()
         .withMetadata(a.metadata)
-        .putLong(EventTimeWatermark.delayKey, delay.milliseconds)
+        .putLong(EventTimeWatermark.delayKey, delayMs)
         .build()
       a.withMetadata(updatedMetadata)
     } else if (a.metadata.contains(EventTimeWatermark.delayKey)) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
index 5a9a99e11188e..25cf609fc336e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
@@ -84,10 +84,7 @@ case class EventTimeWatermarkExec(
     child: SparkPlan) extends SparkPlan {
 
   val eventTimeStats = new EventTimeStatsAccum()
-  val delayMs = {
-    val millisPerMonth = CalendarInterval.MICROS_PER_DAY / 1000 * 31
-    delay.milliseconds + delay.months * millisPerMonth
-  }
+  val delayMs = EventTimeWatermark.getDelayMs(delay)
 
   sparkContext.register(eventTimeStats)
 
@@ -105,10 +102,16 @@ case class EventTimeWatermarkExec(
   override val output: Seq[Attribute] = child.output.map { a =>
     if (a semanticEquals eventTime) {
       val updatedMetadata = new MetadataBuilder()
-          .withMetadata(a.metadata)
-          .putLong(EventTimeWatermark.delayKey, delayMs)
-          .build()
-
+        .withMetadata(a.metadata)
+        .putLong(EventTimeWatermark.delayKey, delayMs)
+        .build()
+      a.withMetadata(updatedMetadata)
+    } else if (a.metadata.contains(EventTimeWatermark.delayKey)) {
+      // Remove existing watermark
+      val updatedMetadata = new MetadataBuilder()
+        .withMetadata(a.metadata)
+        .remove(EventTimeWatermark.delayKey)
+        .build()
       a.withMetadata(updatedMetadata)
     } else {
       a

From 2a76e2420f6976bd2ef2fcf7b8c8db1f0d37c1ad Mon Sep 17 00:00:00 2001
From: Jason White <jason.white@shopify.com>
Date: Thu, 9 Mar 2017 10:34:54 -0800
Subject: [PATCH 0569/1204] [SPARK-19561][SQL] add int case handling for
 TimestampType
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

Add handling of input of type `Int` for dataType `TimestampType` to `EvaluatePython.scala`. Py4J serializes ints smaller than MIN_INT or larger than MAX_INT to Long, which are handled correctly already, but values between MIN_INT and MAX_INT are serialized to Int.

These range limits correspond to roughly half an hour on either side of the epoch. As a result, PySpark doesn't allow TimestampType values to be created in this range.

Alternatives attempted: patching the `TimestampType.toInternal` function to cast return values to `long`, so Py4J would always serialize them to Scala Long. Python3 does not have a `long` type, so this approach failed on Python3.

## How was this patch tested?

Added a new PySpark-side test that fails without the change.

The contribution is my original work and I license the work to the project under the project’s open source license.

Resubmission of https://github.com/apache/spark/pull/16896. The original PR didn't go through Jenkins and broke the build. davies dongjoon-hyun

cloud-fan Could you kick off a Jenkins run for me? It passed everything for me locally, but it's possible something has changed in the last few weeks.

Author: Jason White <jason.white@shopify.com>

Closes #17200 from JasonMWhite/SPARK-19561.

(cherry picked from commit 206030bd12405623c00c1ff334663984b9250adb)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 python/pyspark/sql/tests.py                               | 8 ++++++++
 .../spark/sql/execution/python/EvaluatePython.scala       | 2 ++
 2 files changed, 10 insertions(+)

diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 877ab88d172fa..22b1ffc900751 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -1360,6 +1360,14 @@ def test_time_with_timezone(self):
         self.assertEqual(now, now1)
         self.assertEqual(now, utcnow1)
 
+    # regression test for SPARK-19561
+    def test_datetime_at_epoch(self):
+        epoch = datetime.datetime.fromtimestamp(0)
+        df = self.spark.createDataFrame([Row(date=epoch)])
+        first = df.select('date', lit(epoch).alias('lit_date')).first()
+        self.assertEqual(first['date'], epoch)
+        self.assertEqual(first['lit_date'], epoch)
+
     def test_decimal(self):
         from decimal import Decimal
         schema = StructType([StructField("decimal", DecimalType(10, 5))])
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala
index 46fd54e5c7420..fcd84705f7e8b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala
@@ -112,6 +112,8 @@ object EvaluatePython {
     case (c: Int, DateType) => c
 
     case (c: Long, TimestampType) => c
+    // Py4J serializes values between MIN_INT and MAX_INT as Ints, not Longs
+    case (c: Int, TimestampType) => c.toLong
 
     case (c, StringType) => UTF8String.fromString(c.toString)
 

From ffe65b06511f3143cb2549073bfbe145663ad561 Mon Sep 17 00:00:00 2001
From: uncleGen <hustyugm@gmail.com>
Date: Thu, 9 Mar 2017 11:07:31 -0800
Subject: [PATCH 0570/1204] [SPARK-19861][SS] watermark should not be a
 negative time.

## What changes were proposed in this pull request?

`watermark` should not be negative. This behavior is invalid, check it before real run.

## How was this patch tested?

add new unit test.

Author: uncleGen <hustyugm@gmail.com>
Author: dylon <hustyugm@gmail.com>

Closes #17202 from uncleGen/SPARK-19861.

(cherry picked from commit 30b18e69361746b4d656474374d8b486bb48a19e)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../scala/org/apache/spark/sql/Dataset.scala  |  4 +++-
 .../streaming/EventTimeWatermarkSuite.scala   | 23 +++++++++++++++++++
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 563bfa8a84ea4..e2d0e512cc023 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -559,7 +559,7 @@ class Dataset[T] private[sql](
    * @param eventTime the name of the column that contains the event time of the row.
    * @param delayThreshold the minimum delay to wait to data to arrive late, relative to the latest
    *                       record that has been processed in the form of an interval
-   *                       (e.g. "1 minute" or "5 hours").
+   *                       (e.g. "1 minute" or "5 hours"). NOTE: This should not be negative.
    *
    * @group streaming
    * @since 2.1.0
@@ -572,6 +572,8 @@ class Dataset[T] private[sql](
     val parsedDelay =
       Option(CalendarInterval.fromString("interval " + delayThreshold))
         .getOrElse(throw new AnalysisException(s"Unable to parse time delay '$delayThreshold'"))
+    require(parsedDelay.milliseconds >= 0 && parsedDelay.months >= 0,
+      s"delay threshold ($delayThreshold) should not be negative.")
     EventTimeWatermark(UnresolvedAttribute(eventTime), parsedDelay, logicalPlan)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
index c768525bc6855..7614ea5eb3c01 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
@@ -306,6 +306,29 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Loggin
     )
   }
 
+  test("delay threshold should not be negative.") {
+    val inputData = MemoryStream[Int].toDF()
+    var e = intercept[IllegalArgumentException] {
+      inputData.withWatermark("value", "-1 year")
+    }
+    assert(e.getMessage contains "should not be negative.")
+
+    e = intercept[IllegalArgumentException] {
+      inputData.withWatermark("value", "1 year -13 months")
+    }
+    assert(e.getMessage contains "should not be negative.")
+
+    e = intercept[IllegalArgumentException] {
+      inputData.withWatermark("value", "1 month -40 days")
+    }
+    assert(e.getMessage contains "should not be negative.")
+
+    e = intercept[IllegalArgumentException] {
+      inputData.withWatermark("value", "-10 seconds")
+    }
+    assert(e.getMessage contains "should not be negative.")
+  }
+
   test("the new watermark should override the old one") {
     val df = MemoryStream[(Long, Long)].toDF()
       .withColumn("first", $"_1".cast("timestamp"))

From a59cc369f57cfd4fc8f2a7177c9519731c71c63a Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Thu, 9 Mar 2017 17:42:10 -0800
Subject: [PATCH 0571/1204] [SPARK-19886] Fix reportDataLoss if statement in SS
 KafkaSource

## What changes were proposed in this pull request?

Fix the `throw new IllegalStateException` if statement part.

## How is this patch tested

Regression test

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #17228 from brkyvz/kafka-cause-fix.

(cherry picked from commit 82138e09b9ad8d9609d5c64d6c11244b8f230be7)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../sql/kafka010/CachedKafkaConsumer.scala    | 33 +++++++++++-------
 .../kafka010/CachedKafkaConsumerSuite.scala   | 34 +++++++++++++++++++
 2 files changed, 54 insertions(+), 13 deletions(-)
 create mode 100644 external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/CachedKafkaConsumerSuite.scala

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaConsumer.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaConsumer.scala
index 15b28256e825e..6d76904fb0e59 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaConsumer.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaConsumer.scala
@@ -273,19 +273,7 @@ private[kafka010] case class CachedKafkaConsumer private(
       message: String,
       cause: Throwable = null): Unit = {
     val finalMessage = s"$message ${additionalMessage(failOnDataLoss)}"
-    if (failOnDataLoss) {
-      if (cause != null) {
-        throw new IllegalStateException(finalMessage)
-      } else {
-        throw new IllegalStateException(finalMessage, cause)
-      }
-    } else {
-      if (cause != null) {
-        logWarning(finalMessage)
-      } else {
-        logWarning(finalMessage, cause)
-      }
-    }
+    reportDataLoss0(failOnDataLoss, finalMessage, cause)
   }
 
   private def close(): Unit = consumer.close()
@@ -398,4 +386,23 @@ private[kafka010] object CachedKafkaConsumer extends Logging {
       consumer
     }
   }
+
+  private def reportDataLoss0(
+      failOnDataLoss: Boolean,
+      finalMessage: String,
+      cause: Throwable = null): Unit = {
+    if (failOnDataLoss) {
+      if (cause != null) {
+        throw new IllegalStateException(finalMessage, cause)
+      } else {
+        throw new IllegalStateException(finalMessage)
+      }
+    } else {
+      if (cause != null) {
+        logWarning(finalMessage, cause)
+      } else {
+        logWarning(finalMessage)
+      }
+    }
+  }
 }
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/CachedKafkaConsumerSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/CachedKafkaConsumerSuite.scala
new file mode 100644
index 0000000000000..7aa7dd096c07b
--- /dev/null
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/CachedKafkaConsumerSuite.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import org.scalatest.PrivateMethodTester
+
+import org.apache.spark.sql.test.SharedSQLContext
+
+class CachedKafkaConsumerSuite extends SharedSQLContext with PrivateMethodTester {
+
+  test("SPARK-19886: Report error cause correctly in reportDataLoss") {
+    val cause = new Exception("D'oh!")
+    val reportDataLoss = PrivateMethod[Unit]('reportDataLoss0)
+    val e = intercept[IllegalStateException] {
+      CachedKafkaConsumer.invokePrivate(reportDataLoss(true, "message", cause))
+    }
+    assert(e.getCause === cause)
+  }
+}

From f0d50fd547c247df06470d68cd5245e3027e89a2 Mon Sep 17 00:00:00 2001
From: Tyson Condie <tcondie@gmail.com>
Date: Thu, 9 Mar 2017 23:02:13 -0800
Subject: [PATCH 0572/1204] [SPARK-19891][SS] Await Batch Lock notified on
 stream execution exit

## What changes were proposed in this pull request?

We need to notify the await batch lock when the stream exits early e.g., when an exception has been thrown.

## How was this patch tested?

Current tests that throw exceptions at runtime will finish faster as a result of this update.

zsxwing

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Tyson Condie <tcondie@gmail.com>

Closes #17231 from tcondie/kafka-writer.

(cherry picked from commit 501b7111997bc74754663348967104181b43319b)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../spark/sql/execution/streaming/StreamExecution.scala    | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index dd80a28b52606..b380db0f9ec2d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -348,6 +348,13 @@ class StreamExecution(
           }
         }
       } finally {
+        awaitBatchLock.lock()
+        try {
+          // Wake up any threads that are waiting for the stream to progress.
+          awaitBatchLockCondition.signalAll()
+        } finally {
+          awaitBatchLock.unlock()
+        }
         terminationLatch.countDown()
       }
     }

From 5a2ad4312dd00a450eac49ce53d70d9541e9e4cb Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 10 Mar 2017 16:14:22 -0800
Subject: [PATCH 0573/1204] [SPARK-19893][SQL] should not run DataFrame set
 oprations with map type

In spark SQL, map type can't be used in equality test/comparison, and `Intersect`/`Except`/`Distinct` do need equality test for all columns, we should not allow map type in `Intersect`/`Except`/`Distinct`.

new regression test

Author: Wenchen Fan <wenchen@databricks.com>

Closes #17236 from cloud-fan/map.

(cherry picked from commit fb9beda54622e0c3190c6504fc468fa4e50eeb45)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/CheckAnalysis.scala | 24 +++++++++++++++----
 .../org/apache/spark/sql/DataFrameSuite.scala | 16 +++++++++++++
 .../columnar/InMemoryColumnarQuerySuite.scala | 14 +++++------
 3 files changed, 42 insertions(+), 12 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 65a2a7b04dd8f..f7109f42838e9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -21,7 +21,6 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.catalog.SimpleCatalogRelation
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
-import org.apache.spark.sql.catalyst.plans.UsingJoin
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.types._
 
@@ -46,6 +45,16 @@ trait CheckAnalysis extends PredicateHelper {
     }).length > 1
   }
 
+  protected def hasMapType(dt: DataType): Boolean = {
+    dt.existsRecursively(_.isInstanceOf[MapType])
+  }
+
+  protected def mapColumnInSetOperation(plan: LogicalPlan): Option[Attribute] = plan match {
+    case _: Intersect | _: Except | _: Distinct =>
+      plan.output.find(a => hasMapType(a.dataType))
+    case _ => None
+  }
+
   private def checkLimitClause(limitExpr: Expression): Unit = {
     limitExpr match {
       case e if !e.foldable => failAnalysis(
@@ -123,8 +132,7 @@ trait CheckAnalysis extends PredicateHelper {
             if (conditions.isEmpty && query.output.size != 1) {
               failAnalysis(
                 s"Scalar subquery must return only one column, but got ${query.output.size}")
-            }
-            else if (conditions.nonEmpty) {
+            } else if (conditions.nonEmpty) {
               // Collect the columns from the subquery for further checking.
               var subqueryColumns = conditions.flatMap(_.references).filter(query.output.contains)
 
@@ -202,7 +210,7 @@ trait CheckAnalysis extends PredicateHelper {
               s"filter expression '${f.condition.sql}' " +
                 s"of type ${f.condition.dataType.simpleString} is not a boolean.")
 
-          case f @ Filter(condition, child) =>
+          case Filter(condition, _) =>
             splitConjunctivePredicates(condition).foreach {
               case _: PredicateSubquery | Not(_: PredicateSubquery) =>
               case e if PredicateSubquery.hasNullAwarePredicateWithinNot(e) =>
@@ -376,6 +384,14 @@ trait CheckAnalysis extends PredicateHelper {
                  |Conflicting attributes: ${conflictingAttributes.mkString(",")}
                """.stripMargin)
 
+          // TODO: although map type is not orderable, technically map type should be able to be
+          // used in equality comparison, remove this type check once we support it.
+          case o if mapColumnInSetOperation(o).isDefined =>
+            val mapCol = mapColumnInSetOperation(o).get
+            failAnalysis("Cannot have map type columns in DataFrame which calls " +
+              s"set operations(intersect, except, etc.), but the type of column ${mapCol.name} " +
+              "is " + mapCol.dataType.simpleString)
+
           case s: SimpleCatalogRelation =>
             failAnalysis(
               s"""
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 22dfc46acfc0f..ec201f3253786 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -1739,4 +1739,20 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
     val df = spark.range(1).selectExpr("CAST(id as DECIMAL) as x").selectExpr("percentile(x, 0.5)")
     checkAnswer(df, Row(BigDecimal(0.0)) :: Nil)
   }
+
+  test("SPARK-19893: cannot run set operations with map type") {
+    val df = spark.range(1).select(map(lit("key"), $"id").as("m"))
+    val e = intercept[AnalysisException](df.intersect(df))
+    assert(e.message.contains(
+      "Cannot have map type columns in DataFrame which calls set operations"))
+    val e2 = intercept[AnalysisException](df.except(df))
+    assert(e2.message.contains(
+      "Cannot have map type columns in DataFrame which calls set operations"))
+    withTempView("v") {
+      df.createOrReplaceTempView("v")
+      val e3 = intercept[AnalysisException](sql("SELECT DISTINCT m FROM v"))
+      assert(e3.message.contains(
+        "Cannot have map type columns in DataFrame which calls set operations"))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
index afeb47828edeb..8592a2924eacf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
@@ -234,8 +234,7 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
       Seq(StringType, BinaryType, NullType, BooleanType,
         ByteType, ShortType, IntegerType, LongType,
         FloatType, DoubleType, DecimalType(25, 5), DecimalType(6, 5),
-        DateType, TimestampType,
-        ArrayType(IntegerType), MapType(StringType, LongType), struct)
+        DateType, TimestampType, ArrayType(IntegerType), struct)
     val fields = dataTypes.zipWithIndex.map { case (dataType, index) =>
       StructField(s"col$index", dataType, true)
     }
@@ -244,10 +243,10 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
 
     // Create an RDD for the schema
     val rdd =
-      sparkContext.parallelize((1 to 10000), 10).map { i =>
+      sparkContext.parallelize(1 to 10000, 10).map { i =>
         Row(
-          s"str${i}: test cache.",
-          s"binary${i}: test cache.".getBytes(StandardCharsets.UTF_8),
+          s"str$i: test cache.",
+          s"binary$i: test cache.".getBytes(StandardCharsets.UTF_8),
           null,
           i % 2 == 0,
           i.toByte,
@@ -255,13 +254,12 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
           i,
           Long.MaxValue - i.toLong,
           (i + 0.25).toFloat,
-          (i + 0.75),
+          i + 0.75,
           BigDecimal(Long.MaxValue.toString + ".12345"),
           new java.math.BigDecimal(s"${i % 9 + 1}" + ".23456"),
           new Date(i),
           new Timestamp(i * 1000000L),
-          (i to i + 10).toSeq,
-          (i to i + 10).map(j => s"map_key_$j" -> (Long.MaxValue - j)).toMap,
+          i to i + 10,
           Row((i - 0.25).toFloat, Seq(true, false, null)))
       }
     spark.createDataFrame(rdd, schema).createOrReplaceTempView("InMemoryCache_different_data_types")

From e481a73819213e4a7919e14e979b79a65098224f Mon Sep 17 00:00:00 2001
From: Budde <budde@amazon.com>
Date: Fri, 10 Mar 2017 16:38:16 -0800
Subject: [PATCH 0574/1204] [SPARK-19611][SQL] Introduce configurable table
 schema inference

Add a new configuration option that allows Spark SQL to infer a case-sensitive schema from a Hive Metastore table's data files when a case-sensitive schema can't be read from the table properties.

- Add spark.sql.hive.caseSensitiveInferenceMode param to SQLConf
- Add schemaPreservesCase field to CatalogTable (set to false when schema can't
  successfully be read from Hive table props)
- Perform schema inference in HiveMetastoreCatalog if schemaPreservesCase is
  false, depending on spark.sql.hive.caseSensitiveInferenceMode
- Add alterTableSchema() method to the ExternalCatalog interface
- Add HiveSchemaInferenceSuite tests
- Refactor and move ParquetFileForamt.meregeMetastoreParquetSchema() as
  HiveMetastoreCatalog.mergeWithMetastoreSchema
- Move schema merging tests from ParquetSchemaSuite to HiveSchemaInferenceSuite

[JIRA for this change](https://issues.apache.org/jira/browse/SPARK-19611)

The tests in ```HiveSchemaInferenceSuite``` should verify that schema inference is working as expected. ```ExternalCatalogSuite``` has also been extended to cover the new ```alterTableSchema()``` API.

Author: Budde <budde@amazon.com>

Closes #17229 from budde/SPARK-19611-2.1.
---
 .../catalyst/catalog/ExternalCatalog.scala    |  15 +-
 .../catalyst/catalog/InMemoryCatalog.scala    |  10 +
 .../sql/catalyst/catalog/interface.scala      |   8 +-
 .../catalog/ExternalCatalogSuite.scala        |  15 +-
 .../sql/catalyst/trees/TreeNodeSuite.scala    |   3 +-
 .../parquet/ParquetFileFormat.scala           |  65 ----
 .../apache/spark/sql/internal/SQLConf.scala   |  22 ++
 .../parquet/ParquetSchemaSuite.scala          |  82 -----
 .../spark/sql/hive/HiveExternalCatalog.scala  |  23 +-
 .../spark/sql/hive/HiveMetastoreCatalog.scala |  97 ++++-
 .../sql/hive/HiveSchemaInferenceSuite.scala   | 333 ++++++++++++++++++
 11 files changed, 513 insertions(+), 160 deletions(-)
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
index 78897daec8107..5e83163209175 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.catalog
 
 import org.apache.spark.sql.catalyst.analysis.{FunctionAlreadyExistsException, NoSuchDatabaseException, NoSuchFunctionException, NoSuchTableException}
 import org.apache.spark.sql.catalyst.expressions.Expression
-
+import org.apache.spark.sql.types.StructType
 
 /**
  * Interface for the system catalog (of functions, partitions, tables, and databases).
@@ -104,6 +104,19 @@ abstract class ExternalCatalog {
    */
   def alterTable(tableDefinition: CatalogTable): Unit
 
+  /**
+   * Alter the schema of a table identified by the provided database and table name. The new schema
+   * should still contain the existing bucket columns and partition columns used by the table. This
+   * method will also update any Spark SQL-related parameters stored as Hive table properties (such
+   * as the schema itself).
+   *
+   * @param db Database that table to alter schema for exists in
+   * @param table Name of table to alter schema for
+   * @param schema Updated schema to be used for the table (must contain existing partition and
+   *               bucket columns)
+   */
+  def alterTableSchema(db: String, table: String, schema: StructType): Unit
+
   def getTable(db: String, table: String): CatalogTable
 
   def getTableOption(db: String, table: String): Option[CatalogTable]
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
index 9a6c732ea6971..d700634e3c2af 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.escapePathName
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.util.StringUtils
+import org.apache.spark.sql.types.StructType
 
 /**
  * An in-memory (ephemeral) implementation of the system catalog.
@@ -297,6 +298,15 @@ class InMemoryCatalog(
     catalog(db).tables(tableDefinition.identifier.table).table = tableDefinition
   }
 
+  override def alterTableSchema(
+      db: String,
+      table: String,
+      schema: StructType): Unit = synchronized {
+    requireTableExists(db, table)
+    val origTable = catalog(db).tables(table).table
+    catalog(db).tables(table).table = origTable.copy(schema = schema)
+  }
+
   override def getTable(db: String, table: String): CatalogTable = synchronized {
     requireTableExists(db, table)
     catalog(db).tables(table).table
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 5b5378c09e540..aa561e57f77f5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -158,6 +158,11 @@ case class BucketSpec(
  * @param tracksPartitionsInCatalog whether this table's partition metadata is stored in the
  *                                  catalog. If false, it is inferred automatically based on file
  *                                  structure.
+ * @param schemaPresevesCase Whether or not the schema resolved for this table is case-sensitive.
+ *                           When using a Hive Metastore, this flag is set to false if a case-
+ *                           sensitive schema was unable to be read from the table properties.
+ *                           Used to trigger case-sensitive schema inference at query time, when
+ *                           configured.
  */
 case class CatalogTable(
     identifier: TableIdentifier,
@@ -176,7 +181,8 @@ case class CatalogTable(
     viewText: Option[String] = None,
     comment: Option[String] = None,
     unsupportedFeatures: Seq[String] = Seq.empty,
-    tracksPartitionsInCatalog: Boolean = false) {
+    tracksPartitionsInCatalog: Boolean = false,
+    schemaPreservesCase: Boolean = true) {
 
   /** schema of this table's partition columns */
   def partitionSchema: StructType = StructType(schema.filter {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
index 59b52651a9fbd..f0692a8e3537e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.{FunctionAlreadyExistsException, NoSuchDatabaseException, NoSuchFunctionException}
 import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
 
@@ -239,6 +239,19 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
     }
   }
 
+  test("alter table schema") {
+    val catalog = newBasicCatalog()
+    val tbl1 = catalog.getTable("db2", "tbl1")
+    val newSchema = StructType(Seq(
+      StructField("new_field_1", IntegerType),
+      StructField("new_field_2", StringType),
+      StructField("a", IntegerType),
+      StructField("b", StringType)))
+    catalog.alterTableSchema("db2", "tbl1", newSchema)
+    val newTbl1 = catalog.getTable("db2", "tbl1")
+    assert(newTbl1.schema == newSchema)
+  }
+
   test("get table") {
     assert(newBasicCatalog().getTable("db2", "tbl1").identifier.table == "tbl1")
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
index af1eaa1f23746..37e3dfabd0b21 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
@@ -491,7 +491,8 @@ class TreeNodeSuite extends SparkFunSuite {
         "lastAccessTime" -> -1,
         "tracksPartitionsInCatalog" -> false,
         "properties" -> JNull,
-        "unsupportedFeatures" -> List.empty[String]))
+        "unsupportedFeatures" -> List.empty[String],
+        "schemaPreservesCase" -> JBool(true)))
 
     // For unknown case class, returns JNull.
     val bigValue = new Array[Int](10000)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index 0e1fc7ae96135..2b4892ee23ba3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -486,71 +486,6 @@ object ParquetFileFormat extends Logging {
     }
   }
 
-  /**
-   * Reconciles Hive Metastore case insensitivity issue and data type conflicts between Metastore
-   * schema and Parquet schema.
-   *
-   * Hive doesn't retain case information, while Parquet is case sensitive. On the other hand, the
-   * schema read from Parquet files may be incomplete (e.g. older versions of Parquet doesn't
-   * distinguish binary and string).  This method generates a correct schema by merging Metastore
-   * schema data types and Parquet schema field names.
-   */
-  def mergeMetastoreParquetSchema(
-      metastoreSchema: StructType,
-      parquetSchema: StructType): StructType = {
-    def schemaConflictMessage: String =
-      s"""Converting Hive Metastore Parquet, but detected conflicting schemas. Metastore schema:
-         |${metastoreSchema.prettyJson}
-         |
-         |Parquet schema:
-         |${parquetSchema.prettyJson}
-       """.stripMargin
-
-    val mergedParquetSchema = mergeMissingNullableFields(metastoreSchema, parquetSchema)
-
-    assert(metastoreSchema.size <= mergedParquetSchema.size, schemaConflictMessage)
-
-    val ordinalMap = metastoreSchema.zipWithIndex.map {
-      case (field, index) => field.name.toLowerCase -> index
-    }.toMap
-
-    val reorderedParquetSchema = mergedParquetSchema.sortBy(f =>
-      ordinalMap.getOrElse(f.name.toLowerCase, metastoreSchema.size + 1))
-
-    StructType(metastoreSchema.zip(reorderedParquetSchema).map {
-      // Uses Parquet field names but retains Metastore data types.
-      case (mSchema, pSchema) if mSchema.name.toLowerCase == pSchema.name.toLowerCase =>
-        mSchema.copy(name = pSchema.name)
-      case _ =>
-        throw new SparkException(schemaConflictMessage)
-    })
-  }
-
-  /**
-   * Returns the original schema from the Parquet file with any missing nullable fields from the
-   * Hive Metastore schema merged in.
-   *
-   * When constructing a DataFrame from a collection of structured data, the resulting object has
-   * a schema corresponding to the union of the fields present in each element of the collection.
-   * Spark SQL simply assigns a null value to any field that isn't present for a particular row.
-   * In some cases, it is possible that a given table partition stored as a Parquet file doesn't
-   * contain a particular nullable field in its schema despite that field being present in the
-   * table schema obtained from the Hive Metastore. This method returns a schema representing the
-   * Parquet file schema along with any additional nullable fields from the Metastore schema
-   * merged in.
-   */
-  private[parquet] def mergeMissingNullableFields(
-      metastoreSchema: StructType,
-      parquetSchema: StructType): StructType = {
-    val fieldMap = metastoreSchema.map(f => f.name.toLowerCase -> f).toMap
-    val missingFields = metastoreSchema
-      .map(_.name.toLowerCase)
-      .diff(parquetSchema.map(_.name.toLowerCase))
-      .map(fieldMap(_))
-      .filter(_.nullable)
-    StructType(parquetSchema ++ missingFields)
-  }
-
   /**
    * Reads Parquet footers in multi-threaded manner.
    * If the config "spark.sql.files.ignoreCorruptFiles" is set to true, we will ignore the corrupted
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 8d493e0d56ca2..c4da2bbd5ead2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -285,6 +285,25 @@ object SQLConf {
       .longConf
       .createWithDefault(250 * 1024 * 1024)
 
+  object HiveCaseSensitiveInferenceMode extends Enumeration {
+    val INFER_AND_SAVE, INFER_ONLY, NEVER_INFER = Value
+  }
+
+  val HIVE_CASE_SENSITIVE_INFERENCE = SQLConfigBuilder("spark.sql.hive.caseSensitiveInferenceMode")
+    .doc("Sets the action to take when a case-sensitive schema cannot be read from a Hive " +
+      "table's properties. Although Spark SQL itself is not case-sensitive, Hive compatible file " +
+      "formats such as Parquet are. Spark SQL must use a case-preserving schema when querying " +
+      "any table backed by files containing case-sensitive field names or queries may not return " +
+      "accurate results. Valid options include INFER_AND_SAVE (the default mode-- infer the " +
+      "case-sensitive schema from the underlying data files and write it back to the table " +
+      "properties), INFER_ONLY (infer the schema but don't attempt to write it to the table " +
+      "properties) and NEVER_INFER (fallback to using the case-insensitive metastore schema " +
+      "instead of inferring).")
+    .stringConf
+    .transform(_.toUpperCase())
+    .checkValues(HiveCaseSensitiveInferenceMode.values.map(_.toString))
+    .createWithDefault(HiveCaseSensitiveInferenceMode.INFER_AND_SAVE.toString)
+
   val OPTIMIZER_METADATA_ONLY = SQLConfigBuilder("spark.sql.optimizer.metadataOnly")
     .doc("When true, enable the metadata-only query optimization that use the table's metadata " +
       "to produce the partition columns instead of table scans. It applies when all the columns " +
@@ -723,6 +742,9 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging {
 
   def filesourcePartitionFileCacheSize: Long = getConf(HIVE_FILESOURCE_PARTITION_FILE_CACHE_SIZE)
 
+  def caseSensitiveInferenceMode: HiveCaseSensitiveInferenceMode.Value =
+    HiveCaseSensitiveInferenceMode.withName(getConf(HIVE_CASE_SENSITIVE_INFERENCE))
+
   def gatherFastStats: Boolean = getConf(GATHER_FASTSTAT)
 
   def optimizerMetadataOnly: Boolean = getConf(OPTIMIZER_METADATA_ONLY)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
index 8a980a7eb538f..6aa940afbb2c4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
@@ -368,88 +368,6 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
     }
   }
 
-  test("merge with metastore schema") {
-    // Field type conflict resolution
-    assertResult(
-      StructType(Seq(
-        StructField("lowerCase", StringType),
-        StructField("UPPERCase", DoubleType, nullable = false)))) {
-
-      ParquetFileFormat.mergeMetastoreParquetSchema(
-        StructType(Seq(
-          StructField("lowercase", StringType),
-          StructField("uppercase", DoubleType, nullable = false))),
-
-        StructType(Seq(
-          StructField("lowerCase", BinaryType),
-          StructField("UPPERCase", IntegerType, nullable = true))))
-    }
-
-    // MetaStore schema is subset of parquet schema
-    assertResult(
-      StructType(Seq(
-        StructField("UPPERCase", DoubleType, nullable = false)))) {
-
-      ParquetFileFormat.mergeMetastoreParquetSchema(
-        StructType(Seq(
-          StructField("uppercase", DoubleType, nullable = false))),
-
-        StructType(Seq(
-          StructField("lowerCase", BinaryType),
-          StructField("UPPERCase", IntegerType, nullable = true))))
-    }
-
-    // Metastore schema contains additional non-nullable fields.
-    assert(intercept[Throwable] {
-      ParquetFileFormat.mergeMetastoreParquetSchema(
-        StructType(Seq(
-          StructField("uppercase", DoubleType, nullable = false),
-          StructField("lowerCase", BinaryType, nullable = false))),
-
-        StructType(Seq(
-          StructField("UPPERCase", IntegerType, nullable = true))))
-    }.getMessage.contains("detected conflicting schemas"))
-
-    // Conflicting non-nullable field names
-    intercept[Throwable] {
-      ParquetFileFormat.mergeMetastoreParquetSchema(
-        StructType(Seq(StructField("lower", StringType, nullable = false))),
-        StructType(Seq(StructField("lowerCase", BinaryType))))
-    }
-  }
-
-  test("merge missing nullable fields from Metastore schema") {
-    // Standard case: Metastore schema contains additional nullable fields not present
-    // in the Parquet file schema.
-    assertResult(
-      StructType(Seq(
-        StructField("firstField", StringType, nullable = true),
-        StructField("secondField", StringType, nullable = true),
-        StructField("thirdfield", StringType, nullable = true)))) {
-      ParquetFileFormat.mergeMetastoreParquetSchema(
-        StructType(Seq(
-          StructField("firstfield", StringType, nullable = true),
-          StructField("secondfield", StringType, nullable = true),
-          StructField("thirdfield", StringType, nullable = true))),
-        StructType(Seq(
-          StructField("firstField", StringType, nullable = true),
-          StructField("secondField", StringType, nullable = true))))
-    }
-
-    // Merge should fail if the Metastore contains any additional fields that are not
-    // nullable.
-    assert(intercept[Throwable] {
-      ParquetFileFormat.mergeMetastoreParquetSchema(
-        StructType(Seq(
-          StructField("firstfield", StringType, nullable = true),
-          StructField("secondfield", StringType, nullable = true),
-          StructField("thirdfield", StringType, nullable = false))),
-        StructType(Seq(
-          StructField("firstField", StringType, nullable = true),
-          StructField("secondField", StringType, nullable = true))))
-    }.getMessage.contains("detected conflicting schemas"))
-  }
-
   test("schema merging failure error message") {
     import testImplicits._
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index f321c45e5c518..cbf146966bcfa 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -597,6 +597,25 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     }
   }
 
+  override def alterTableSchema(db: String, table: String, schema: StructType): Unit = withClient {
+    requireTableExists(db, table)
+    val rawTable = getRawTable(db, table)
+    val withNewSchema = rawTable.copy(schema = schema)
+    // Add table metadata such as table schema, partition columns, etc. to table properties.
+    val updatedTable = withNewSchema.copy(
+      properties = withNewSchema.properties ++ tableMetaToTableProps(withNewSchema))
+    try {
+      client.alterTable(updatedTable)
+    } catch {
+      case NonFatal(e) =>
+        val warningMessage =
+          s"Could not alter schema of table  ${rawTable.identifier.quotedString} in a Hive " +
+            "compatible way. Updating Hive metastore in Spark SQL specific format."
+        logWarning(warningMessage, e)
+        client.alterTable(updatedTable.copy(schema = updatedTable.partitionSchema))
+    }
+  }
+
   override def getTable(db: String, table: String): CatalogTable = withClient {
     restoreTableMetadata(getRawTable(db, table))
   }
@@ -690,10 +709,10 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
           "different from the schema when this table was created by Spark SQL" +
           s"(${schemaFromTableProps.simpleString}). We have to fall back to the table schema " +
           "from Hive metastore which is not case preserving.")
-        hiveTable
+        hiveTable.copy(schemaPreservesCase = false)
       }
     } else {
-      hiveTable
+      hiveTable.copy(schemaPreservesCase = false)
     }
   }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 82e519c994afc..f93922073704a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -17,10 +17,13 @@
 
 package org.apache.spark.sql.hive
 
+import scala.util.control.NonFatal
+
 import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache}
 import com.google.common.util.concurrent.Striped
 import org.apache.hadoop.fs.Path
 
+import org.apache.spark.SparkException
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.TableIdentifier
@@ -31,6 +34,7 @@ import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat, ParquetOptions}
 import org.apache.spark.sql.hive.orc.OrcFileFormat
+import org.apache.spark.sql.internal.SQLConf.HiveCaseSensitiveInferenceMode._
 import org.apache.spark.sql.types._
 
 /**
@@ -45,6 +49,8 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
   /** A fully qualified identifier for a table (i.e., database.tableName) */
   case class QualifiedTableName(database: String, name: String)
 
+  import HiveMetastoreCatalog._
+
   private def getCurrentDatabase: String = sessionState.catalog.getCurrentDatabase
 
   def getQualifiedTableName(tableIdent: TableIdentifier): QualifiedTableName = {
@@ -200,9 +206,10 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
     val bucketSpec = None  // We don't support hive bucketed tables, only ones we write out.
 
     val lazyPruningEnabled = sparkSession.sqlContext.conf.manageFilesourcePartitions
+    val fileFormat = fileFormatClass.newInstance()
+
     val result = if (metastoreRelation.hiveQlTable.isPartitioned) {
       val partitionSchema = StructType.fromAttributes(metastoreRelation.partitionKeys)
-
       val rootPaths: Seq[Path] = if (lazyPruningEnabled) {
         Seq(metastoreRelation.hiveQlTable.getDataLocation)
       } else {
@@ -243,9 +250,9 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
             }
           }
           val partitionSchemaColumnNames = partitionSchema.map(_.name.toLowerCase).toSet
-          val dataSchema =
-            StructType(metastoreSchema
-              .filterNot(field => partitionSchemaColumnNames.contains(field.name.toLowerCase)))
+
+          val (dataSchema, updatedTable) =
+            inferIfNeeded(metastoreRelation, options, fileFormat, Option(fileIndex))
 
           val relation = HadoopFsRelation(
             location = fileIndex,
@@ -256,7 +263,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
             options = options)(sparkSession = sparkSession)
 
           val created = LogicalRelation(relation,
-            catalogTable = Some(metastoreRelation.catalogTable))
+            catalogTable = Some(updatedTable))
           cachedDataSourceTables.put(tableIdentifier, created)
           created
         }
@@ -274,16 +281,17 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
           bucketSpec,
           None)
         val logicalRelation = cached.getOrElse {
+          val (dataSchema, updatedTable) = inferIfNeeded(metastoreRelation, options, fileFormat)
           val created =
             LogicalRelation(
               DataSource(
                 sparkSession = sparkSession,
                 paths = rootPath.toString :: Nil,
-                userSpecifiedSchema = Some(metastoreRelation.schema),
+                userSpecifiedSchema = Option(dataSchema),
                 bucketSpec = bucketSpec,
                 options = options,
                 className = fileType).resolveRelation(),
-              catalogTable = Some(metastoreRelation.catalogTable))
+              catalogTable = Some(updatedTable))
 
           cachedDataSourceTables.put(tableIdentifier, created)
           created
@@ -295,6 +303,54 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
     result.copy(expectedOutputAttributes = Some(metastoreRelation.output))
   }
 
+  private def inferIfNeeded(
+      relation: MetastoreRelation,
+      options: Map[String, String],
+      fileFormat: FileFormat,
+      fileIndexOpt: Option[FileIndex] = None): (StructType, CatalogTable) = {
+    val inferenceMode = sparkSession.sessionState.conf.caseSensitiveInferenceMode
+    val shouldInfer = (inferenceMode != NEVER_INFER) && !relation.catalogTable.schemaPreservesCase
+    val tableName = relation.catalogTable.identifier.unquotedString
+    if (shouldInfer) {
+      logInfo(s"Inferring case-sensitive schema for table $tableName (inference mode: " +
+        s"$inferenceMode)")
+      val fileIndex = fileIndexOpt.getOrElse {
+        val rootPath = new Path(relation.catalogTable.location)
+        new InMemoryFileIndex(sparkSession, Seq(rootPath), options, None)
+      }
+
+      val inferredSchema = fileFormat
+        .inferSchema(
+          sparkSession,
+          options,
+          fileIndex.listFiles(Nil).flatMap(_.files))
+        .map(mergeWithMetastoreSchema(relation.catalogTable.schema, _))
+
+      inferredSchema match {
+        case Some(schema) =>
+          if (inferenceMode == INFER_AND_SAVE) {
+            updateCatalogSchema(relation.catalogTable.identifier, schema)
+          }
+          (schema, relation.catalogTable.copy(schema = schema))
+        case None =>
+          logWarning(s"Unable to infer schema for table $tableName from file format " +
+            s"$fileFormat (inference mode: $inferenceMode). Using metastore schema.")
+          (relation.catalogTable.schema, relation.catalogTable)
+      }
+    } else {
+      (relation.catalogTable.schema, relation.catalogTable)
+    }
+  }
+
+  private def updateCatalogSchema(identifier: TableIdentifier, schema: StructType): Unit = try {
+    val db = identifier.database.get
+    logInfo(s"Saving case-sensitive schema for table ${identifier.unquotedString}")
+    sparkSession.sharedState.externalCatalog.alterTableSchema(db, identifier.table, schema)
+  } catch {
+    case NonFatal(ex) =>
+      logWarning(s"Unable to save case-sensitive schema for table ${identifier.unquotedString}", ex)
+  }
+
   /**
    * When scanning or writing to non-partitioned Metastore Parquet tables, convert them to Parquet
    * data source relations for better performance.
@@ -373,3 +429,30 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
     }
   }
 }
+
+private[hive] object HiveMetastoreCatalog {
+  def mergeWithMetastoreSchema(
+      metastoreSchema: StructType,
+      inferredSchema: StructType): StructType = try {
+    // Find any nullable fields in mestastore schema that are missing from the inferred schema.
+    val metastoreFields = metastoreSchema.map(f => f.name.toLowerCase -> f).toMap
+    val missingNullables = metastoreFields
+      .filterKeys(!inferredSchema.map(_.name.toLowerCase).contains(_))
+      .values
+      .filter(_.nullable)
+    // Merge missing nullable fields to inferred schema and build a case-insensitive field map.
+    val inferredFields = StructType(inferredSchema ++ missingNullables)
+      .map(f => f.name.toLowerCase -> f).toMap
+    StructType(metastoreSchema.map(f => f.copy(name = inferredFields(f.name).name)))
+  } catch {
+    case NonFatal(_) =>
+      val msg = s"""Detected conflicting schemas when merging the schema obtained from the Hive
+         | Metastore with the one inferred from the file format. Metastore schema:
+         |${metastoreSchema.prettyJson}
+         |
+         |Inferred schema:
+         |${inferredSchema.prettyJson}
+       """.stripMargin
+      throw new SparkException(msg)
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala
new file mode 100644
index 0000000000000..5a80c41938a65
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala
@@ -0,0 +1,333 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import java.io.File
+import java.util.concurrent.{Executors, TimeUnit}
+
+import scala.util.Random
+
+import org.scalatest.BeforeAndAfterEach
+
+import org.apache.spark.metrics.source.HiveCatalogMetrics
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.catalog._
+import org.apache.spark.sql.execution.datasources.FileStatusCache
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.hive.client.HiveClient
+import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
+import org.apache.spark.sql.internal.SQLConf.HiveCaseSensitiveInferenceMode.{Value => InferenceMode, _}
+import org.apache.spark.sql.test.SQLTestUtils
+import org.apache.spark.sql.types._
+
+class HiveSchemaInferenceSuite
+  extends QueryTest with TestHiveSingleton with SQLTestUtils with BeforeAndAfterEach {
+
+  import HiveSchemaInferenceSuite._
+  import HiveExternalCatalog.DATASOURCE_SCHEMA_PREFIX
+
+  override def beforeEach(): Unit = {
+    super.beforeEach()
+    FileStatusCache.resetForTesting()
+  }
+
+  override def afterEach(): Unit = {
+    super.afterEach()
+    // spark.sessionState.catalog.tableRelationCache.invalidateAll()
+    FileStatusCache.resetForTesting()
+  }
+
+  private val externalCatalog = spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog]
+  private val client = externalCatalog.client
+
+  // Return a copy of the given schema with all field names converted to lower case.
+  private def lowerCaseSchema(schema: StructType): StructType = {
+    StructType(schema.map(f => f.copy(name = f.name.toLowerCase)))
+  }
+
+  // Create a Hive external test table containing the given field and partition column names.
+  // Returns a case-sensitive schema for the table.
+  private def setupExternalTable(
+      fileType: String,
+      fields: Seq[String],
+      partitionCols: Seq[String],
+      dir: File): StructType = {
+    // Treat all table fields as bigints...
+    val structFields = fields.map { field =>
+      StructField(
+        name = field,
+        dataType = LongType,
+        nullable = true,
+        metadata = new MetadataBuilder().putString(HIVE_TYPE_STRING, "bigint").build())
+    }
+    // and all partition columns as ints
+    val partitionStructFields = partitionCols.map { field =>
+      StructField(
+        // Partition column case isn't preserved
+        name = field.toLowerCase,
+        dataType = IntegerType,
+        nullable = true,
+        metadata = new MetadataBuilder().putString(HIVE_TYPE_STRING, "int").build())
+    }
+    val schema = StructType(structFields ++ partitionStructFields)
+
+    // Write some test data (partitioned if specified)
+    val writer = spark.range(NUM_RECORDS)
+      .selectExpr((fields ++ partitionCols).map("id as " + _): _*)
+      .write
+      .partitionBy(partitionCols: _*)
+      .mode("overwrite")
+    fileType match {
+      case ORC_FILE_TYPE =>
+       writer.orc(dir.getAbsolutePath)
+      case PARQUET_FILE_TYPE =>
+       writer.parquet(dir.getAbsolutePath)
+    }
+
+    // Create Hive external table with lowercased schema
+    val serde = HiveSerDe.sourceToSerDe(fileType).get
+    client.createTable(
+      CatalogTable(
+        identifier = TableIdentifier(table = TEST_TABLE_NAME, database = Option(DATABASE)),
+        tableType = CatalogTableType.EXTERNAL,
+        storage = CatalogStorageFormat(
+          locationUri = Option(dir.getAbsolutePath),
+          inputFormat = serde.inputFormat,
+          outputFormat = serde.outputFormat,
+          serde = serde.serde,
+          compressed = false,
+          properties = Map("serialization.format" -> "1")),
+        schema = schema,
+        provider = Option("hive"),
+        partitionColumnNames = partitionCols.map(_.toLowerCase),
+        properties = Map.empty),
+      true)
+
+    // Add partition records (if specified)
+    if (!partitionCols.isEmpty) {
+      spark.catalog.recoverPartitions(TEST_TABLE_NAME)
+    }
+
+    // Check that the table returned by HiveExternalCatalog has schemaPreservesCase set to false
+    // and that the raw table returned by the Hive client doesn't have any Spark SQL properties
+    // set (table needs to be obtained from client since HiveExternalCatalog filters these
+    // properties out).
+    assert(!externalCatalog.getTable(DATABASE, TEST_TABLE_NAME).schemaPreservesCase)
+    val rawTable = client.getTable(DATABASE, TEST_TABLE_NAME)
+    assert(rawTable.properties.filterKeys(_.startsWith(DATASOURCE_SCHEMA_PREFIX)) == Map.empty)
+    schema
+  }
+
+  private def withTestTables(
+    fileType: String)(f: (Seq[String], Seq[String], StructType) => Unit): Unit = {
+    // Test both a partitioned and unpartitioned Hive table
+    val tableFields = Seq(
+      (Seq("fieldOne"), Seq("partCol1", "partCol2")),
+      (Seq("fieldOne", "fieldTwo"), Seq.empty[String]))
+
+    tableFields.foreach { case (fields, partCols) =>
+      withTempDir { dir =>
+        val schema = setupExternalTable(fileType, fields, partCols, dir)
+        withTable(TEST_TABLE_NAME) { f(fields, partCols, schema) }
+      }
+    }
+  }
+
+  private def withFileTypes(f: (String) => Unit): Unit
+    = Seq(ORC_FILE_TYPE, PARQUET_FILE_TYPE).foreach(f)
+
+  private def withInferenceMode(mode: InferenceMode)(f: => Unit): Unit = {
+    withSQLConf(
+      HiveUtils.CONVERT_METASTORE_ORC.key -> "true",
+      SQLConf.HIVE_CASE_SENSITIVE_INFERENCE.key -> mode.toString)(f)
+  }
+
+  private val inferenceKey = SQLConf.HIVE_CASE_SENSITIVE_INFERENCE.key
+
+  private def testFieldQuery(fields: Seq[String]): Unit = {
+    if (!fields.isEmpty) {
+      val query = s"SELECT * FROM ${TEST_TABLE_NAME} WHERE ${Random.shuffle(fields).head} >= 0"
+      assert(spark.sql(query).count == NUM_RECORDS)
+    }
+  }
+
+  private def testTableSchema(expectedSchema: StructType): Unit = {
+    // Spark 2.1 doesn't add metadata for partition columns when the schema isn't read from the
+    // table properties so strip all field metadata before making the comparison.
+    val tableSchema =
+      StructType(spark.table(TEST_TABLE_NAME).schema.map(_.copy(metadata = Metadata.empty)))
+    val expected =
+      StructType(expectedSchema.map(_.copy(metadata = Metadata.empty)))
+    assert(tableSchema == expected)
+  }
+
+  withFileTypes { fileType =>
+    test(s"$fileType: schema should be inferred and saved when INFER_AND_SAVE is specified") {
+      withInferenceMode(INFER_AND_SAVE) {
+        withTestTables(fileType) { (fields, partCols, schema) =>
+          testFieldQuery(fields)
+          testFieldQuery(partCols)
+          testTableSchema(schema)
+
+          // Verify the catalog table now contains the updated schema and properties
+          val catalogTable = externalCatalog.getTable(DATABASE, TEST_TABLE_NAME)
+          assert(catalogTable.schemaPreservesCase)
+          assert(catalogTable.schema == schema)
+          assert(catalogTable.partitionColumnNames == partCols.map(_.toLowerCase))
+        }
+      }
+    }
+  }
+
+  withFileTypes { fileType =>
+    test(s"$fileType: schema should be inferred but not stored when INFER_ONLY is specified") {
+      withInferenceMode(INFER_ONLY) {
+        withTestTables(fileType) { (fields, partCols, schema) =>
+          val originalTable = externalCatalog.getTable(DATABASE, TEST_TABLE_NAME)
+          testFieldQuery(fields)
+          testFieldQuery(partCols)
+          testTableSchema(schema)
+          // Catalog table shouldn't be altered
+          assert(externalCatalog.getTable(DATABASE, TEST_TABLE_NAME) == originalTable)
+        }
+      }
+    }
+  }
+
+  withFileTypes { fileType =>
+    test(s"$fileType: schema should not be inferred when NEVER_INFER is specified") {
+      withInferenceMode(NEVER_INFER) {
+        withTestTables(fileType) { (fields, partCols, schema) =>
+          val originalTable = externalCatalog.getTable(DATABASE, TEST_TABLE_NAME)
+          // Only check the table schema as the test queries will break
+          testTableSchema(lowerCaseSchema(schema))
+          assert(externalCatalog.getTable(DATABASE, TEST_TABLE_NAME) == originalTable)
+        }
+      }
+    }
+  }
+
+  test("mergeWithMetastoreSchema() should return expected results") {
+    // Field type conflict resolution
+    assertResult(
+      StructType(Seq(
+        StructField("lowerCase", StringType),
+        StructField("UPPERCase", DoubleType, nullable = false)))) {
+
+      HiveMetastoreCatalog.mergeWithMetastoreSchema(
+        StructType(Seq(
+          StructField("lowercase", StringType),
+          StructField("uppercase", DoubleType, nullable = false))),
+
+        StructType(Seq(
+          StructField("lowerCase", BinaryType),
+          StructField("UPPERCase", IntegerType, nullable = true))))
+    }
+
+    // MetaStore schema is subset of parquet schema
+    assertResult(
+      StructType(Seq(
+        StructField("UPPERCase", DoubleType, nullable = false)))) {
+
+      HiveMetastoreCatalog.mergeWithMetastoreSchema(
+        StructType(Seq(
+          StructField("uppercase", DoubleType, nullable = false))),
+
+        StructType(Seq(
+          StructField("lowerCase", BinaryType),
+          StructField("UPPERCase", IntegerType, nullable = true))))
+    }
+
+    // Metastore schema contains additional non-nullable fields.
+    assert(intercept[Throwable] {
+      HiveMetastoreCatalog.mergeWithMetastoreSchema(
+        StructType(Seq(
+          StructField("uppercase", DoubleType, nullable = false),
+          StructField("lowerCase", BinaryType, nullable = false))),
+
+        StructType(Seq(
+          StructField("UPPERCase", IntegerType, nullable = true))))
+    }.getMessage.contains("Detected conflicting schemas"))
+
+    // Conflicting non-nullable field names
+    intercept[Throwable] {
+      HiveMetastoreCatalog.mergeWithMetastoreSchema(
+        StructType(Seq(StructField("lower", StringType, nullable = false))),
+        StructType(Seq(StructField("lowerCase", BinaryType))))
+    }
+
+    // Check that merging missing nullable fields works as expected.
+    assertResult(
+      StructType(Seq(
+        StructField("firstField", StringType, nullable = true),
+        StructField("secondField", StringType, nullable = true),
+        StructField("thirdfield", StringType, nullable = true)))) {
+      HiveMetastoreCatalog.mergeWithMetastoreSchema(
+        StructType(Seq(
+          StructField("firstfield", StringType, nullable = true),
+          StructField("secondfield", StringType, nullable = true),
+          StructField("thirdfield", StringType, nullable = true))),
+        StructType(Seq(
+          StructField("firstField", StringType, nullable = true),
+          StructField("secondField", StringType, nullable = true))))
+    }
+
+    // Merge should fail if the Metastore contains any additional fields that are not
+    // nullable.
+    assert(intercept[Throwable] {
+      HiveMetastoreCatalog.mergeWithMetastoreSchema(
+        StructType(Seq(
+          StructField("firstfield", StringType, nullable = true),
+          StructField("secondfield", StringType, nullable = true),
+          StructField("thirdfield", StringType, nullable = false))),
+        StructType(Seq(
+          StructField("firstField", StringType, nullable = true),
+          StructField("secondField", StringType, nullable = true))))
+    }.getMessage.contains("Detected conflicting schemas"))
+
+    // Schema merge should maintain metastore order.
+    assertResult(
+      StructType(Seq(
+        StructField("first_field", StringType, nullable = true),
+        StructField("second_field", StringType, nullable = true),
+        StructField("third_field", StringType, nullable = true),
+        StructField("fourth_field", StringType, nullable = true),
+        StructField("fifth_field", StringType, nullable = true)))) {
+      HiveMetastoreCatalog.mergeWithMetastoreSchema(
+        StructType(Seq(
+          StructField("first_field", StringType, nullable = true),
+          StructField("second_field", StringType, nullable = true),
+          StructField("third_field", StringType, nullable = true),
+          StructField("fourth_field", StringType, nullable = true),
+          StructField("fifth_field", StringType, nullable = true))),
+        StructType(Seq(
+          StructField("fifth_field", StringType, nullable = true),
+          StructField("third_field", StringType, nullable = true),
+          StructField("second_field", StringType, nullable = true))))
+    }
+  }
+}
+
+object HiveSchemaInferenceSuite {
+  private val NUM_RECORDS = 10
+  private val DATABASE = "default"
+  private val TEST_TABLE_NAME = "test_table"
+  private val ORC_FILE_TYPE = "orc"
+  private val PARQUET_FILE_TYPE = "parquet"
+}

From f9833c66a2f11414357854dae00e9e2448869254 Mon Sep 17 00:00:00 2001
From: uncleGen <hustyugm@gmail.com>
Date: Sun, 12 Mar 2017 08:29:37 +0000
Subject: [PATCH 0575/1204] [DOCS][SS] fix structured streaming python example

## What changes were proposed in this pull request?

- SS python example: `TypeError: 'xxx' object is not callable`
- some other doc issue.

## How was this patch tested?

Jenkins.

Author: uncleGen <hustyugm@gmail.com>

Closes #17257 from uncleGen/docs-ss-python.

(cherry picked from commit e29a74d5b1fa3f9356b7af5dd7e3fce49bc8eb7d)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/structured-streaming-programming-guide.md | 18 +++++++++---------
 .../execution/streaming/FileStreamSource.scala |  2 +-
 .../streaming/dstream/FileInputDStream.scala   |  2 +-
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index 45ee551b67d3f..d316e04a3a6f1 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -545,7 +545,7 @@ spark = SparkSession. ...
 
 # Read text from socket 
 socketDF = spark \
-    .readStream() \
+    .readStream \
     .format("socket") \
     .option("host", "localhost") \
     .option("port", 9999) \
@@ -558,7 +558,7 @@ socketDF.printSchema()
 # Read all the csv files written atomically in a directory
 userSchema = StructType().add("name", "string").add("age", "integer")
 csvDF = spark \
-    .readStream() \
+    .readStream \
     .option("sep", ";") \
     .schema(userSchema) \
     .csv("/path/to/directory")  # Equivalent to format("csv").load("/path/to/directory")
@@ -995,7 +995,7 @@ Here is the compatibility matrix.
         <br/><br/>
         Update mode uses watermark to drop old aggregation state.
         <br/><br/>
-        Complete mode does drop not old aggregation state since by definition this mode
+        Complete mode does not drop old aggregation state since by definition this mode
         preserves all data in the Result Table.
     </td>    
   </tr>
@@ -1217,13 +1217,13 @@ noAggDF = deviceDataDf.select("device").where("signal > 10")
 
 # Print new data to console
 noAggDF \
-    .writeStream() \
+    .writeStream \
     .format("console") \
     .start()
 
 # Write new data to Parquet files
 noAggDF \
-    .writeStream() \
+    .writeStream \
     .format("parquet") \
     .option("checkpointLocation", "path/to/checkpoint/dir") \
     .option("path", "path/to/destination/dir") \
@@ -1234,14 +1234,14 @@ aggDF = df.groupBy("device").count()
 
 # Print updated aggregations to console
 aggDF \
-    .writeStream() \
+    .writeStream \
     .outputMode("complete") \
     .format("console") \
     .start()
 
 # Have all the aggregates in an in memory table. The query name will be the table name
 aggDF \
-    .writeStream() \
+    .writeStream \
     .queryName("aggregates") \
     .outputMode("complete") \
     .format("memory") \
@@ -1329,7 +1329,7 @@ query.lastProgress();    // the most recent progress update of this streaming qu
 <div data-lang="python"  markdown="1">
 
 {% highlight python %}
-query = df.writeStream().format("console").start()   # get the query object
+query = df.writeStream.format("console").start()   # get the query object
 
 query.id()          # get the unique identifier of the running query that persists across restarts from checkpoint data
 
@@ -1674,7 +1674,7 @@ aggDF
 
 {% highlight python %}
 aggDF \
-    .writeStream() \
+    .writeStream \
     .outputMode("complete") \
     .option("checkpointLocation", "path/to/HDFS/dir") \
     .format("memory") \
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
index 0f0b6f1893583..fd94bb658dec4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
@@ -86,7 +86,7 @@ class FileStreamSource(
   }
   seenFiles.purge()
 
-  logInfo(s"maxFilesPerBatch = $maxFilesPerBatch, maxFileAge = $maxFileAgeMs")
+  logInfo(s"maxFilesPerBatch = $maxFilesPerBatch, maxFileAgeMs = $maxFileAgeMs")
 
   /**
    * Returns the maximum offset that can be retrieved from the source.
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala
index ed9305875cb77..905b1c52afa69 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala
@@ -230,7 +230,7 @@ class FileInputDStream[K, V, F <: NewInputFormat[K, V]](
    * - It must pass the user-provided file filter.
    * - It must be newer than the ignore threshold. It is assumed that files older than the ignore
    *   threshold have already been considered or are existing files before start
-   *   (when newFileOnly = true).
+   *   (when newFilesOnly = true).
    * - It must not be present in the recently selected files that this class remembers.
    * - It must not be newer than the time of the batch (i.e. `currentTime` for which this
    *   file is being tested. This can occur if the driver was recovered, and the missing batches

From 8c460804698742b0405d6c7e8a1880472f436f9e Mon Sep 17 00:00:00 2001
From: uncleGen <hustyugm@gmail.com>
Date: Sun, 12 Mar 2017 17:46:31 -0700
Subject: [PATCH 0576/1204] [SPARK-19853][SS] uppercase kafka topics fail when
 startingOffsets are SpecificOffsets

When using the KafkaSource with Structured Streaming, consumer assignments are not what the user expects if startingOffsets is set to an explicit set of topics/partitions in JSON where the topic(s) happen to have uppercase characters. When StartingOffsets is constructed, the original string value from options is transformed toLowerCase to make matching on "earliest" and "latest" case insensitive. However, the toLowerCase JSON is passed to SpecificOffsets for the terminal condition, so topic names may not be what the user intended by the time assignments are made with the underlying KafkaConsumer.

KafkaSourceProvider.scala:
```
val startingOffsets = caseInsensitiveParams.get(STARTING_OFFSETS_OPTION_KEY).map(_.trim.toLowerCase) match {
    case Some("latest") => LatestOffsets
    case Some("earliest") => EarliestOffsets
    case Some(json) => SpecificOffsets(JsonUtils.partitionOffsets(json))
    case None => LatestOffsets
  }
```

Thank cbowden for reporting.

Jenkins

Author: uncleGen <hustyugm@gmail.com>

Closes #17209 from uncleGen/SPARK-19853.

(cherry picked from commit 0a4d06a7c3db9fec2b6f050a631e8b59b0e9376e)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../sql/kafka010/KafkaSourceProvider.scala    | 69 ++++++++++---------
 .../spark/sql/kafka010/KafkaSourceSuite.scala | 19 +++++
 2 files changed, 54 insertions(+), 34 deletions(-)

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index 34514dcc0c06b..ca15cfece123c 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -82,13 +82,8 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
         .map { k => k.drop(6).toString -> parameters(k) }
         .toMap
 
-    val startingStreamOffsets =
-      caseInsensitiveParams.get(STARTING_OFFSETS_OPTION_KEY).map(_.trim.toLowerCase) match {
-        case Some("latest") => LatestOffsetRangeLimit
-        case Some("earliest") => EarliestOffsetRangeLimit
-        case Some(json) => SpecificOffsetRangeLimit(JsonUtils.partitionOffsets(json))
-        case None => LatestOffsetRangeLimit
-      }
+    val startingStreamOffsets = KafkaSourceProvider.getKafkaOffsetRangeLimit(caseInsensitiveParams,
+      STARTING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
 
     val kafkaOffsetReader = new KafkaOffsetReader(
       strategy(caseInsensitiveParams),
@@ -128,19 +123,13 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
         .map { k => k.drop(6).toString -> parameters(k) }
         .toMap
 
-    val startingRelationOffsets =
-      caseInsensitiveParams.get(STARTING_OFFSETS_OPTION_KEY).map(_.trim.toLowerCase) match {
-        case Some("earliest") => EarliestOffsetRangeLimit
-        case Some(json) => SpecificOffsetRangeLimit(JsonUtils.partitionOffsets(json))
-        case None => EarliestOffsetRangeLimit
-      }
+    val startingRelationOffsets = KafkaSourceProvider.getKafkaOffsetRangeLimit(
+      caseInsensitiveParams, STARTING_OFFSETS_OPTION_KEY, EarliestOffsetRangeLimit)
+    assert(startingRelationOffsets != LatestOffsetRangeLimit)
 
-    val endingRelationOffsets =
-      caseInsensitiveParams.get(ENDING_OFFSETS_OPTION_KEY).map(_.trim.toLowerCase) match {
-        case Some("latest") => LatestOffsetRangeLimit
-        case Some(json) => SpecificOffsetRangeLimit(JsonUtils.partitionOffsets(json))
-        case None => LatestOffsetRangeLimit
-      }
+    val endingRelationOffsets = KafkaSourceProvider.getKafkaOffsetRangeLimit(caseInsensitiveParams,
+      ENDING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
+    assert(endingRelationOffsets != EarliestOffsetRangeLimit)
 
     val kafkaOffsetReader = new KafkaOffsetReader(
       strategy(caseInsensitiveParams),
@@ -388,34 +377,34 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
 
   private def validateBatchOptions(caseInsensitiveParams: Map[String, String]) = {
     // Batch specific options
-    caseInsensitiveParams.get(STARTING_OFFSETS_OPTION_KEY).map(_.trim.toLowerCase) match {
-      case Some("earliest") => // good to go
-      case Some("latest") =>
+    KafkaSourceProvider.getKafkaOffsetRangeLimit(
+      caseInsensitiveParams, STARTING_OFFSETS_OPTION_KEY, EarliestOffsetRangeLimit) match {
+      case EarliestOffsetRangeLimit => // good to go
+      case LatestOffsetRangeLimit =>
         throw new IllegalArgumentException("starting offset can't be latest " +
           "for batch queries on Kafka")
-      case Some(json) => (SpecificOffsetRangeLimit(JsonUtils.partitionOffsets(json)))
-        .partitionOffsets.foreach {
+      case SpecificOffsetRangeLimit(partitionOffsets) =>
+        partitionOffsets.foreach {
           case (tp, off) if off == KafkaOffsetRangeLimit.LATEST =>
             throw new IllegalArgumentException(s"startingOffsets for $tp can't " +
               "be latest for batch queries on Kafka")
           case _ => // ignore
         }
-      case _ => // default to earliest
     }
 
-    caseInsensitiveParams.get(ENDING_OFFSETS_OPTION_KEY).map(_.trim.toLowerCase) match {
-      case Some("earliest") =>
+    KafkaSourceProvider.getKafkaOffsetRangeLimit(
+      caseInsensitiveParams, ENDING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit) match {
+      case EarliestOffsetRangeLimit =>
         throw new IllegalArgumentException("ending offset can't be earliest " +
           "for batch queries on Kafka")
-      case Some("latest") => // good to go
-      case Some(json) => (SpecificOffsetRangeLimit(JsonUtils.partitionOffsets(json)))
-        .partitionOffsets.foreach {
+      case LatestOffsetRangeLimit => // good to go
+      case SpecificOffsetRangeLimit(partitionOffsets) =>
+        partitionOffsets.foreach {
           case (tp, off) if off == KafkaOffsetRangeLimit.EARLIEST =>
             throw new IllegalArgumentException(s"ending offset for $tp can't be " +
               "earliest for batch queries on Kafka")
           case _ => // ignore
         }
-      case _ => // default to latest
     }
 
     validateGeneralOptions(caseInsensitiveParams)
@@ -432,7 +421,7 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
 
     def set(key: String, value: Object): this.type = {
       map.put(key, value)
-      logInfo(s"$module: Set $key to $value, earlier value: ${kafkaParams.get(key).getOrElse("")}")
+      logInfo(s"$module: Set $key to $value, earlier value: ${kafkaParams.getOrElse(key, "")}")
       this
     }
 
@@ -450,10 +439,22 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
 
 private[kafka010] object KafkaSourceProvider {
   private val STRATEGY_OPTION_KEYS = Set("subscribe", "subscribepattern", "assign")
-  private val STARTING_OFFSETS_OPTION_KEY = "startingoffsets"
-  private val ENDING_OFFSETS_OPTION_KEY = "endingoffsets"
+  private[kafka010] val STARTING_OFFSETS_OPTION_KEY = "startingoffsets"
+  private[kafka010] val ENDING_OFFSETS_OPTION_KEY = "endingoffsets"
   private val FAIL_ON_DATA_LOSS_OPTION_KEY = "failondataloss"
   val TOPIC_OPTION_KEY = "topic"
 
   private val deserClassName = classOf[ByteArrayDeserializer].getName
+
+  def getKafkaOffsetRangeLimit(
+      params: Map[String, String],
+      offsetOptionKey: String,
+      defaultOffsets: KafkaOffsetRangeLimit): KafkaOffsetRangeLimit = {
+    params.get(offsetOptionKey).map(_.trim) match {
+      case Some(offset) if offset.toLowerCase == "latest" => LatestOffsetRangeLimit
+      case Some(offset) if offset.toLowerCase == "earliest" => EarliestOffsetRangeLimit
+      case Some(json) => SpecificOffsetRangeLimit(JsonUtils.partitionOffsets(json))
+      case None => defaultOffsets
+    }
+  }
 }
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
index 4f82b133cb4c8..638cc3b201a9f 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
@@ -37,6 +37,7 @@ import org.apache.spark.SparkContext
 import org.apache.spark.sql.ForeachWriter
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.functions.{count, window}
+import org.apache.spark.sql.kafka010.KafkaSourceProvider._
 import org.apache.spark.sql.streaming.{ProcessingTime, StreamTest}
 import org.apache.spark.sql.test.{SharedSQLContext, TestSparkSession}
 
@@ -604,6 +605,24 @@ class KafkaSourceSuite extends KafkaSourceTest {
     assert(query.exception.isEmpty)
   }
 
+  test("get offsets from case insensitive parameters") {
+    for ((optionKey, optionValue, answer) <- Seq(
+      (STARTING_OFFSETS_OPTION_KEY, "earLiEst", EarliestOffsetRangeLimit),
+      (ENDING_OFFSETS_OPTION_KEY, "laTest", LatestOffsetRangeLimit),
+      (STARTING_OFFSETS_OPTION_KEY, """{"topic-A":{"0":23}}""",
+        SpecificOffsetRangeLimit(Map(new TopicPartition("topic-A", 0) -> 23))))) {
+      val offset = getKafkaOffsetRangeLimit(Map(optionKey -> optionValue), optionKey, answer)
+      assert(offset === answer)
+    }
+
+    for ((optionKey, answer) <- Seq(
+      (STARTING_OFFSETS_OPTION_KEY, EarliestOffsetRangeLimit),
+      (ENDING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit))) {
+      val offset = getKafkaOffsetRangeLimit(Map.empty, optionKey, answer)
+      assert(offset === answer)
+    }
+  }
+
   private def newTopic(): String = s"topic-${topicId.getAndIncrement()}"
 
   private def assignString(topic: String, partitions: Iterable[Int]): String = {

From 454578257181b0ae8768f9d34fb64964b32530ce Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@databricks.com>
Date: Tue, 14 Mar 2017 18:52:16 +0100
Subject: [PATCH 0577/1204] [SPARK-19933][SQL] Do not change output of a
 subquery

## What changes were proposed in this pull request?
The `RemoveRedundantAlias` rule can change the output attributes (the expression id's to be precise) of a query by eliminating the redundant alias producing them. This is no problem for a regular query, but can cause problems for correlated subqueries: The attributes produced by the subquery are used in the parent plan; changing them will break the parent plan.

This PR fixes this by wrapping a subquery in a `Subquery` top level node when it gets optimized. The `RemoveRedundantAlias` rule now recognizes `Subquery` and makes sure that the output attributes of the `Subquery` node are retained.

## How was this patch tested?
Added a test case to `RemoveRedundantAliasAndProjectSuite` and added a regression test to `SubquerySuite`.

Author: Herman van Hovell <hvanhovell@databricks.com>

Closes #17278 from hvanhovell/SPARK-19933.

(cherry picked from commit e04c05cf41a125b0526f59f9b9e7fdf0b78b8b21)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../spark/sql/catalyst/optimizer/Optimizer.scala  | 15 ++++++++++++---
 .../plans/logical/basicLogicalOperators.scala     |  8 ++++++++
 .../RemoveRedundantAliasAndProjectSuite.scala     |  8 ++++++++
 .../org/apache/spark/sql/SubquerySuite.scala      | 14 ++++++++++++++
 4 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 44782977c5958..de3732061091f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -133,7 +133,8 @@ abstract class Optimizer(sessionCatalog: SessionCatalog, conf: CatalystConf)
   object OptimizeSubqueries extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
       case s: SubqueryExpression =>
-        s.withNewPlan(Optimizer.this.execute(s.plan))
+        val Subquery(newPlan) = Optimizer.this.execute(Subquery(s.plan))
+        s.withNewPlan(newPlan)
     }
   }
 }
@@ -178,7 +179,10 @@ object RemoveRedundantAliases extends Rule[LogicalPlan] {
     // If the alias name is different from attribute name, we can't strip it either, or we
     // may accidentally change the output schema name of the root plan.
     case a @ Alias(attr: Attribute, name)
-      if a.metadata == Metadata.empty && name == attr.name && !blacklist.contains(attr) =>
+      if a.metadata == Metadata.empty &&
+        name == attr.name &&
+        !blacklist.contains(attr) &&
+        !blacklist.contains(a) =>
       attr
     case a => a
   }
@@ -186,10 +190,15 @@ object RemoveRedundantAliases extends Rule[LogicalPlan] {
   /**
    * Remove redundant alias expression from a LogicalPlan and its subtree. A blacklist is used to
    * prevent the removal of seemingly redundant aliases used to deduplicate the input for a (self)
-   * join.
+   * join or to prevent the removal of top-level subquery attributes.
    */
   private def removeRedundantAliases(plan: LogicalPlan, blacklist: AttributeSet): LogicalPlan = {
     plan match {
+      // We want to keep the same output attributes for subqueries. This means we cannot remove
+      // the aliases that produce these attributes
+      case Subquery(child) =>
+        Subquery(removeRedundantAliases(child, blacklist ++ child.outputSet))
+
       // A join has to be treated differently, because the left and the right side of the join are
       // not allowed to use the same attributes. We use a blacklist to prevent us from creating a
       // situation in which this happens; the rule will only remove an alias if its child
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index f51ed22427dbe..1f9aa7d30a4a2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -39,6 +39,14 @@ case class ReturnAnswer(child: LogicalPlan) extends UnaryNode {
   override def output: Seq[Attribute] = child.output
 }
 
+/**
+ * This node is inserted at the top of a subquery when it is optimized. This makes sure we can
+ * recognize a subquery as such, and it allows us to write subquery aware transformations.
+ */
+case class Subquery(child: LogicalPlan) extends UnaryNode {
+  override def output: Seq[Attribute] = child.output
+}
+
 case class Project(projectList: Seq[NamedExpression], child: LogicalPlan) extends UnaryNode {
   override def output: Seq[Attribute] = projectList.map(_.toAttribute)
   override def maxRows: Option[Long] = child.maxRows
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala
index c01ea01ec6808..1973b5abb462d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala
@@ -116,4 +116,12 @@ class RemoveRedundantAliasAndProjectSuite extends PlanTest with PredicateHelper
     val expected = relation.window(Seq('b), Seq('a), Seq()).analyze
     comparePlans(optimized, expected)
   }
+
+  test("do not remove output attributes from a subquery") {
+    val relation = LocalRelation('a.int, 'b.int)
+    val query = Subquery(relation.select('a as "a", 'b as "b").where('b < 10).select('a).analyze)
+    val optimized = Optimize.execute(query)
+    val expected = Subquery(relation.select('a as "a", 'b).where('b < 10).select('a).analyze)
+    comparePlans(optimized, expected)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index 25dbecb5894e4..fb92f314088d1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -825,4 +825,18 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
         Row(1) :: Row(0) :: Nil)
     }
   }
+
+  test("SPARK-19933 Do not eliminate top-level aliases in sub-queries") {
+    withTempView("t1", "t2") {
+      spark.range(4).createOrReplaceTempView("t1")
+      checkAnswer(
+        sql("select * from t1 where id in (select id as id from t1)"),
+        Row(0) :: Row(1) :: Row(2) :: Row(3) :: Nil)
+
+      spark.range(2).createOrReplaceTempView("t2")
+      checkAnswer(
+        sql("select * from t1 where id in (select id as id from t2)"),
+        Row(0) :: Row(1) :: Nil)
+    }
+  }
 }

From a0ce845d9ad87753f676785301ab7ccd8ddd6368 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 15 Mar 2017 08:24:41 +0800
Subject: [PATCH 0578/1204] [SPARK-19887][SQL] dynamic partition keys can be
 null or empty string

When dynamic partition value is null or empty string, we should write the data to a directory like `a=__HIVE_DEFAULT_PARTITION__`, when we read the data back, we should respect this special directory name and treat it as null.

This is the same behavior of impala, see https://issues.apache.org/jira/browse/IMPALA-252

new regression test

Author: Wenchen Fan <wenchen@databricks.com>

Closes #17277 from cloud-fan/partition.

(cherry picked from commit dacc382f0c918f1ca808228484305ce0e21c705e)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalog/ExternalCatalogUtils.scala        | 17 +++++++------
 .../sql/catalyst/catalog/interface.scala      |  9 +++++--
 .../sql/execution/DataSourceScanExec.scala    |  2 +-
 .../datasources/FileFormatWriter.scala        | 11 +++-----
 .../datasources/PartitioningUtils.scala       |  2 +-
 .../spark/sql/hive/HiveExternalCatalog.scala  |  4 +--
 .../PartitionProviderCompatibilitySuite.scala | 25 ++++++++++++++++++-
 7 files changed, 49 insertions(+), 21 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
index 4331841fbffb4..6f2a11748b1a5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
@@ -108,18 +108,21 @@ object ExternalCatalogUtils {
       partitionColumnNames: Seq[String],
       tablePath: Path): Path = {
     val partitionPathStrings = partitionColumnNames.map { col =>
-      val partitionValue = spec(col)
-      val partitionString = if (partitionValue == null) {
-        DEFAULT_PARTITION_NAME
-      } else {
-        escapePathName(partitionValue)
-      }
-      escapePathName(col) + "=" + partitionString
+      getPartitionPathString(col, spec(col))
     }
     partitionPathStrings.foldLeft(tablePath) { (totalPath, nextPartPath) =>
       new Path(totalPath, nextPartPath)
     }
   }
+
+  def getPartitionPathString(col: String, value: String): String = {
+    val partitionString = if (value == null || value.isEmpty) {
+      DEFAULT_PARTITION_NAME
+    } else {
+      escapePathName(value)
+    }
+    escapePathName(col) + "=" + partitionString
+  }
 }
 
 object CatalogUtils {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index aa561e57f77f5..051fcaa63c7f8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -111,7 +111,12 @@ case class CatalogTablePartition(
    */
   def toRow(partitionSchema: StructType): InternalRow = {
     InternalRow.fromSeq(partitionSchema.map { field =>
-      Cast(Literal(spec(field.name)), field.dataType).eval()
+      val partValue = if (spec(field.name) == ExternalCatalogUtils.DEFAULT_PARTITION_NAME) {
+        null
+      } else {
+        spec(field.name)
+      }
+      Cast(Literal(partValue), field.dataType).eval()
     })
   }
 }
@@ -158,7 +163,7 @@ case class BucketSpec(
  * @param tracksPartitionsInCatalog whether this table's partition metadata is stored in the
  *                                  catalog. If false, it is inferred automatically based on file
  *                                  structure.
- * @param schemaPresevesCase Whether or not the schema resolved for this table is case-sensitive.
+ * @param schemaPreservesCase Whether or not the schema resolved for this table is case-sensitive.
  *                           When using a Hive Metastore, this flag is set to false if a case-
  *                           sensitive schema was unable to be read from the table properties.
  *                           Used to trigger case-sensitive schema inference at query time, when
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index 76161643976aa..b4aed23218357 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -319,7 +319,7 @@ case class FileSourceScanExec(
     val input = ctx.freshName("input")
     ctx.addMutableState("scala.collection.Iterator", input, s"$input = inputs[0];")
     val exprRows = output.zipWithIndex.map{ case (a, i) =>
-      new BoundReference(i, a.dataType, a.nullable)
+      BoundReference(i, a.dataType, a.nullable)
     }
     val row = ctx.freshName("row")
     ctx.INPUT_ROW = row
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
index a9f79da6358dc..92b22b813312e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
@@ -285,14 +285,11 @@ object FileFormatWriter extends Logging {
     /** Expressions that given a partition key build a string like: col1=val/col2=val/... */
     private def partitionStringExpression: Seq[Expression] = {
       description.partitionColumns.zipWithIndex.flatMap { case (c, i) =>
-        val escaped = ScalaUDF(
-          ExternalCatalogUtils.escapePathName _,
+        val partitionName = ScalaUDF(
+          ExternalCatalogUtils.getPartitionPathString _,
           StringType,
-          Seq(Cast(c, StringType)),
-          Seq(StringType))
-        val str = If(IsNull(c), Literal(ExternalCatalogUtils.DEFAULT_PARTITION_NAME), escaped)
-        val partitionName = Literal(c.name + "=") :: str :: Nil
-        if (i == 0) partitionName else Literal(Path.SEPARATOR) :: partitionName
+          Seq(Literal(c.name), Cast(c, StringType)))
+        if (i == 0) Seq(partitionName) else Seq(Literal(Path.SEPARATOR), partitionName)
       }
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index bc290702dc37f..ef29ee22e9501 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -118,7 +118,7 @@ object PartitioningUtils {
       //   "hdfs://host:9000/invalidPath"
       //   "hdfs://host:9000/path"
       // TODO: Selective case sensitivity.
-      val discoveredBasePaths = optDiscoveredBasePaths.flatMap(x => x).map(_.toString.toLowerCase())
+      val discoveredBasePaths = optDiscoveredBasePaths.flatten.map(_.toString.toLowerCase())
       assert(
         discoveredBasePaths.distinct.size == 1,
         "Conflicting directory structures detected. Suspicious paths:\b" +
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index cbf146966bcfa..2f0feee0efa8c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -968,8 +968,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     val partColNameMap = buildLowerCasePartColNameMap(catalogTable).mapValues(escapePathName)
     val clientPartitionNames =
       client.getPartitionNames(catalogTable, partialSpec.map(lowerCasePartitionSpec))
-    clientPartitionNames.map { partName =>
-      val partSpec = PartitioningUtils.parsePathFragmentAsSeq(partName)
+    clientPartitionNames.map { partitionPath =>
+      val partSpec = PartitioningUtils.parsePathFragmentAsSeq(partitionPath)
       partSpec.map { case (partName, partValue) =>
         partColNameMap(partName.toLowerCase) + "=" + escapePathName(partValue)
       }.mkString("/")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
index 3f84cbdb1b09c..d98f174f6c553 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.hive
 import java.io.File
 
 import org.apache.spark.metrics.source.HiveCatalogMetrics
-import org.apache.spark.sql.{AnalysisException, QueryTest}
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
@@ -28,6 +28,7 @@ import org.apache.spark.util.Utils
 
 class PartitionProviderCompatibilitySuite
   extends QueryTest with TestHiveSingleton with SQLTestUtils {
+  import testImplicits._
 
   private def setupPartitionedDatasourceTable(tableName: String, dir: File): Unit = {
     spark.range(5).selectExpr("id as fieldOne", "id as partCol").write
@@ -294,6 +295,28 @@ class PartitionProviderCompatibilitySuite
         }
       }
     }
+
+    test(s"SPARK-19887 partition value is null - partition management $enabled") {
+      withTable("test") {
+        Seq((1, "p", 1), (2, null, 2)).toDF("a", "b", "c")
+          .write.partitionBy("b", "c").saveAsTable("test")
+        checkAnswer(spark.table("test"),
+          Row(1, "p", 1) :: Row(2, null, 2) :: Nil)
+
+        Seq((3, null: String, 3)).toDF("a", "b", "c")
+          .write.mode("append").partitionBy("b", "c").saveAsTable("test")
+        checkAnswer(spark.table("test"),
+          Row(1, "p", 1) :: Row(2, null, 2) :: Row(3, null, 3) :: Nil)
+        // make sure partition pruning also works.
+        checkAnswer(spark.table("test").filter($"b".isNotNull), Row(1, "p", 1))
+
+        // empty string is an invalid partition value and we treat it as null when read back.
+        Seq((4, "", 4)).toDF("a", "b", "c")
+          .write.mode("append").partitionBy("b", "c").saveAsTable("test")
+        checkAnswer(spark.table("test"),
+          Row(1, "p", 1) :: Row(2, null, 2) :: Row(3, null, 3) :: Row(4, null, 4) :: Nil)
+      }
+    }
   }
 
   /**

From 80ebca62cbdb7d5c8606e95a944164ab1a943694 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Wed, 15 Mar 2017 13:07:20 +0100
Subject: [PATCH 0579/1204] [SPARK-19944][SQL] Move SQLConf from sql/core to
 sql/catalyst (branch-2.1)

## What changes were proposed in this pull request?
This patch moves SQLConf from sql/core to sql/catalyst. To minimize the changes, the patch used type alias to still keep CatalystConf (as a type alias) and SimpleCatalystConf (as a concrete class that extends SQLConf).

Motivation for the change is that it is pretty weird to have SQLConf only in sql/core and then we have to duplicate config options that impact optimizer/analyzer in sql/catalyst using CatalystConf.

This is a backport into branch-2.1 to minimize merge conflicts.

## How was this patch tested?
N/A

Author: Reynold Xin <rxin@databricks.com>

Closes #17301 from rxin/branch-2.1-conf.
---
 .../spark/sql/catalyst/CatalystConf.scala     | 66 --------------
 .../sql/catalyst/SimpleCatalystConf.scala     | 38 ++++++++
 .../apache/spark/sql/catalyst/package.scala   |  7 ++
 .../apache/spark/sql/internal/SQLConf.scala   | 87 +++++--------------
 .../spark/sql/internal/StaticSQLConf.scala    | 77 ++++++++++++++++
 5 files changed, 144 insertions(+), 131 deletions(-)
 delete mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SimpleCatalystConf.scala
 rename sql/{core => catalyst}/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala (92%)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala
deleted file mode 100644
index 75ae588c18ec6..0000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst
-
-import org.apache.spark.sql.catalyst.analysis._
-
-/**
- * Interface for configuration options used in the catalyst module.
- */
-trait CatalystConf {
-  def caseSensitiveAnalysis: Boolean
-
-  def orderByOrdinal: Boolean
-  def groupByOrdinal: Boolean
-
-  def optimizerMaxIterations: Int
-  def optimizerInSetConversionThreshold: Int
-  def maxCaseBranchesForCodegen: Int
-
-  def runSQLonFile: Boolean
-
-  def warehousePath: String
-
-  /** If true, cartesian products between relations will be allowed for all
-   * join types(inner, (left|right|full) outer).
-   * If false, cartesian products will require explicit CROSS JOIN syntax.
-   */
-  def crossJoinEnabled: Boolean
-
-  /**
-   * Returns the [[Resolver]] for the current configuration, which can be used to determine if two
-   * identifiers are equal.
-   */
-  def resolver: Resolver = {
-    if (caseSensitiveAnalysis) caseSensitiveResolution else caseInsensitiveResolution
-  }
-}
-
-
-/** A CatalystConf that can be used for local testing. */
-case class SimpleCatalystConf(
-    caseSensitiveAnalysis: Boolean,
-    orderByOrdinal: Boolean = true,
-    groupByOrdinal: Boolean = true,
-    optimizerMaxIterations: Int = 100,
-    optimizerInSetConversionThreshold: Int = 10,
-    maxCaseBranchesForCodegen: Int = 20,
-    runSQLonFile: Boolean = true,
-    crossJoinEnabled: Boolean = false,
-    warehousePath: String = "/user/hive/warehouse")
-  extends CatalystConf
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SimpleCatalystConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SimpleCatalystConf.scala
new file mode 100644
index 0000000000000..ab52a90aaad54
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SimpleCatalystConf.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst
+
+import org.apache.spark.sql.internal.SQLConf
+
+
+/**
+ * A SQLConf that can be used for local testing. This class is only here to minimize the change
+ * for ticket SPARK-19944 (moves SQLConf from sql/core to sql/catalyst). This class should
+ * eventually be removed (test cases should just create SQLConf and set values appropriately).
+ */
+case class SimpleCatalystConf(
+    override val caseSensitiveAnalysis: Boolean,
+    override val orderByOrdinal: Boolean = true,
+    override val groupByOrdinal: Boolean = true,
+    override val optimizerMaxIterations: Int = 100,
+    override val optimizerInSetConversionThreshold: Int = 10,
+    override val maxCaseBranchesForCodegen: Int = 20,
+    override val runSQLonFile: Boolean = true,
+    override val crossJoinEnabled: Boolean = false,
+    override val warehousePath: String = "/user/hive/warehouse")
+  extends SQLConf
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/package.scala
index 105cdf52500c6..4af56afebb762 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/package.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql
 
+import org.apache.spark.sql.internal.SQLConf
+
 /**
  * Catalyst is a library for manipulating relational query plans.  All classes in catalyst are
  * considered an internal API to Spark SQL and are subject to change between minor releases.
@@ -29,4 +31,9 @@ package object catalyst {
    */
   protected[sql] object ScalaReflectionLock
 
+  /**
+   * This class is only here to minimize the change for ticket SPARK-19944
+   * (moves SQLConf from sql/core to sql/catalyst). This class should eventually be removed.
+   */
+  type CatalystConf = SQLConf
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
similarity index 92%
rename from sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index c4da2bbd5ead2..ad5b103e49de6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -24,15 +24,11 @@ import scala.collection.JavaConverters._
 import scala.collection.immutable
 
 import org.apache.hadoop.fs.Path
-import org.apache.parquet.hadoop.ParquetOutputCommitter
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.network.util.ByteUnit
-import org.apache.spark.sql.catalyst.CatalystConf
-import org.apache.spark.sql.execution.datasources.SQLHadoopMapReduceCommitProtocol
-import org.apache.spark.sql.execution.streaming.ManifestFileCommitProtocol
-import org.apache.spark.util.Utils
+import org.apache.spark.sql.catalyst.analysis.Resolver
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // This file defines the configuration options for Spark SQL.
@@ -240,7 +236,7 @@ object SQLConf {
       "of org.apache.parquet.hadoop.ParquetOutputCommitter.")
     .internal()
     .stringConf
-    .createWithDefault(classOf[ParquetOutputCommitter].getName)
+    .createWithDefault("org.apache.parquet.hadoop.ParquetOutputCommitter")
 
   val PARQUET_VECTORIZED_READER_ENABLED =
     SQLConfigBuilder("spark.sql.parquet.enableVectorizedReader")
@@ -406,7 +402,8 @@ object SQLConf {
     SQLConfigBuilder("spark.sql.sources.commitProtocolClass")
       .internal()
       .stringConf
-      .createWithDefault(classOf[SQLHadoopMapReduceCommitProtocol].getName)
+      .createWithDefault(
+        "org.apache.spark.sql.execution.datasources.SQLHadoopMapReduceCommitProtocol")
 
   val PARALLEL_PARTITION_DISCOVERY_THRESHOLD =
     SQLConfigBuilder("spark.sql.sources.parallelPartitionDiscovery.threshold")
@@ -552,7 +549,7 @@ object SQLConf {
     SQLConfigBuilder("spark.sql.streaming.commitProtocolClass")
       .internal()
       .stringConf
-      .createWithDefault(classOf[ManifestFileCommitProtocol].getName)
+      .createWithDefault("org.apache.spark.sql.execution.streaming.ManifestFileCommitProtocol")
 
   val FILE_SINK_LOG_DELETION = SQLConfigBuilder("spark.sql.streaming.fileSink.log.deletion")
     .internal()
@@ -658,7 +655,7 @@ object SQLConf {
  *
  * SQLConf is thread-safe (internally synchronized, so safe to be used in multiple threads).
  */
-private[sql] class SQLConf extends Serializable with CatalystConf with Logging {
+class SQLConf extends Serializable with Logging {
   import SQLConf._
 
   /** Only low degree of contention is expected for conf, thus NOT using ConcurrentHashMap. */
@@ -761,6 +758,18 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging {
 
   def caseSensitiveAnalysis: Boolean = getConf(SQLConf.CASE_SENSITIVE)
 
+  /**
+   * Returns the [[Resolver]] for the current configuration, which can be used to determine if two
+   * identifiers are equal.
+   */
+  def resolver: Resolver = {
+    if (caseSensitiveAnalysis) {
+      org.apache.spark.sql.catalyst.analysis.caseSensitiveResolution
+    } else {
+      org.apache.spark.sql.catalyst.analysis.caseInsensitiveResolution
+    }
+  }
+
   def subexpressionEliminationEnabled: Boolean =
     getConf(SUBEXPRESSION_ELIMINATION_ENABLED)
 
@@ -818,7 +827,7 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging {
 
   def dataFramePivotMaxValues: Int = getConf(DATAFRAME_PIVOT_MAX_VALUES)
 
-  override def runSQLonFile: Boolean = getConf(RUN_SQL_ON_FILES)
+  def runSQLonFile: Boolean = getConf(RUN_SQL_ON_FILES)
 
   def enableTwoLevelAggMap: Boolean = getConf(ENABLE_TWOLEVEL_AGG_MAP)
 
@@ -830,11 +839,11 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging {
 
   def ignoreCorruptFiles: Boolean = getConf(IGNORE_CORRUPT_FILES)
 
-  override def orderByOrdinal: Boolean = getConf(ORDER_BY_ORDINAL)
+  def orderByOrdinal: Boolean = getConf(ORDER_BY_ORDINAL)
 
-  override def groupByOrdinal: Boolean = getConf(GROUP_BY_ORDINAL)
+  def groupByOrdinal: Boolean = getConf(GROUP_BY_ORDINAL)
 
-  override def crossJoinEnabled: Boolean = getConf(SQLConf.CROSS_JOINS_ENABLED)
+  def crossJoinEnabled: Boolean = getConf(SQLConf.CROSS_JOINS_ENABLED)
 
   def ndvMaxError: Double = getConf(NDV_MAX_ERROR)
   /** ********************** SQLConf functionality methods ************ */
@@ -956,55 +965,3 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging {
     settings.clear()
   }
 }
-
-/**
- * Static SQL configuration is a cross-session, immutable Spark configuration. External users can
- * see the static sql configs via `SparkSession.conf`, but can NOT set/unset them.
- */
-object StaticSQLConf {
-  val globalConfKeys = java.util.Collections.synchronizedSet(new java.util.HashSet[String]())
-
-  private def buildConf(key: String): ConfigBuilder = {
-    ConfigBuilder(key).onCreate { entry =>
-      globalConfKeys.add(entry.key)
-      SQLConf.register(entry)
-    }
-  }
-
-  val WAREHOUSE_PATH = buildConf("spark.sql.warehouse.dir")
-    .doc("The default location for managed databases and tables.")
-    .stringConf
-    .createWithDefault(Utils.resolveURI("spark-warehouse").toString)
-
-  val CATALOG_IMPLEMENTATION = buildConf("spark.sql.catalogImplementation")
-    .internal()
-    .stringConf
-    .checkValues(Set("hive", "in-memory"))
-    .createWithDefault("in-memory")
-
-  val GLOBAL_TEMP_DATABASE = buildConf("spark.sql.globalTempDatabase")
-    .internal()
-    .stringConf
-    .createWithDefault("global_temp")
-
-  // This is used to control when we will split a schema's JSON string to multiple pieces
-  // in order to fit the JSON string in metastore's table property (by default, the value has
-  // a length restriction of 4000 characters, so do not use a value larger than 4000 as the default
-  // value of this property). We will split the JSON string of a schema to its length exceeds the
-  // threshold. Note that, this conf is only read in HiveExternalCatalog which is cross-session,
-  // that's why this conf has to be a static SQL conf.
-  val SCHEMA_STRING_LENGTH_THRESHOLD = buildConf("spark.sql.sources.schemaStringLengthThreshold")
-    .doc("The maximum length allowed in a single cell when " +
-      "storing additional schema information in Hive's metastore.")
-    .internal()
-    .intConf
-    .createWithDefault(4000)
-
-  // When enabling the debug, Spark SQL internal table properties are not filtered out; however,
-  // some related DDL commands (e.g., ANALYZE TABLE and CREATE TABLE LIKE) might not work properly.
-  val DEBUG_MODE = buildConf("spark.sql.debug")
-    .internal()
-    .doc("Only used for internal debugging. Not all functions are supported when it is enabled.")
-    .booleanConf
-    .createWithDefault(false)
-}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
new file mode 100644
index 0000000000000..52ca17414ed6c
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.internal
+
+import org.apache.spark.internal.config.ConfigBuilder
+import org.apache.spark.util.Utils
+
+
+/**
+ * Static SQL configuration is a cross-session, immutable Spark configuration. External users can
+ * see the static sql configs via `SparkSession.conf`, but can NOT set/unset them.
+ *//**
+ * Static SQL configuration is a cross-session, immutable Spark configuration. External users can
+ * see the static sql configs via `SparkSession.conf`, but can NOT set/unset them.
+ */
+object StaticSQLConf {
+  val globalConfKeys = java.util.Collections.synchronizedSet(new java.util.HashSet[String]())
+
+  private def buildConf(key: String): ConfigBuilder = {
+    ConfigBuilder(key).onCreate { entry =>
+      globalConfKeys.add(entry.key)
+      SQLConf.register(entry)
+    }
+  }
+
+  val WAREHOUSE_PATH = buildConf("spark.sql.warehouse.dir")
+    .doc("The default location for managed databases and tables.")
+    .stringConf
+    .createWithDefault(Utils.resolveURI("spark-warehouse").toString)
+
+  val CATALOG_IMPLEMENTATION = buildConf("spark.sql.catalogImplementation")
+    .internal()
+    .stringConf
+    .checkValues(Set("hive", "in-memory"))
+    .createWithDefault("in-memory")
+
+  val GLOBAL_TEMP_DATABASE = buildConf("spark.sql.globalTempDatabase")
+    .internal()
+    .stringConf
+    .createWithDefault("global_temp")
+
+  // This is used to control when we will split a schema's JSON string to multiple pieces
+  // in order to fit the JSON string in metastore's table property (by default, the value has
+  // a length restriction of 4000 characters, so do not use a value larger than 4000 as the default
+  // value of this property). We will split the JSON string of a schema to its length exceeds the
+  // threshold. Note that, this conf is only read in HiveExternalCatalog which is cross-session,
+  // that's why this conf has to be a static SQL conf.
+  val SCHEMA_STRING_LENGTH_THRESHOLD = buildConf("spark.sql.sources.schemaStringLengthThreshold")
+    .doc("The maximum length allowed in a single cell when " +
+      "storing additional schema information in Hive's metastore.")
+    .internal()
+    .intConf
+    .createWithDefault(4000)
+
+  // When enabling the debug, Spark SQL internal table properties are not filtered out; however,
+  // some related DDL commands (e.g., ANALYZE TABLE and CREATE TABLE LIKE) might not work properly.
+  val DEBUG_MODE = buildConf("spark.sql.debug")
+    .internal()
+    .doc("Only used for internal debugging. Not all functions are supported when it is enabled.")
+    .booleanConf
+    .createWithDefault(false)
+}

From 062254635a98da0b08f69dc7e8907079cfdce035 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Wed, 15 Mar 2017 10:17:18 -0700
Subject: [PATCH 0580/1204] [SPARK-19872] [PYTHON] Use the correct deserializer
 for RDD construction for coalesce/repartition

## What changes were proposed in this pull request?

This PR proposes to use the correct deserializer, `BatchedSerializer` for RDD construction for coalesce/repartition when the shuffle is enabled. Currently, it is passing `UTF8Deserializer` as is not `BatchedSerializer` from the copied one.

with the file, `text.txt` below:

```
a
b

d
e
f
g
h
i
j
k
l

```

- Before

```python
>>> sc.textFile('text.txt').repartition(1).collect()
```

```
UTF8Deserializer(True)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File ".../spark/python/pyspark/rdd.py", line 811, in collect
    return list(_load_from_socket(port, self._jrdd_deserializer))
  File ".../spark/python/pyspark/serializers.py", line 549, in load_stream
    yield self.loads(stream)
  File ".../spark/python/pyspark/serializers.py", line 544, in loads
    return s.decode("utf-8") if self.use_unicode else s
  File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/encodings/utf_8.py", line 16, in decode
    return codecs.utf_8_decode(input, errors, True)
UnicodeDecodeError: 'utf8' codec can't decode byte 0x80 in position 0: invalid start byte
```

- After

```python
>>> sc.textFile('text.txt').repartition(1).collect()
```

```
[u'a', u'b', u'', u'd', u'e', u'f', u'g', u'h', u'i', u'j', u'k', u'l', u'']
```

## How was this patch tested?

Unit test in `python/pyspark/tests.py`.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #17282 from HyukjinKwon/SPARK-19872.

(cherry picked from commit 7387126f83dc0489eb1df734bfeba705709b7861)
Signed-off-by: Davies Liu <davies.liu@gmail.com>
---
 python/pyspark/rdd.py   | 4 +++-
 python/pyspark/tests.py | 6 ++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index b384b2b507332..ccef30cf322ee 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -2071,10 +2071,12 @@ def coalesce(self, numPartitions, shuffle=False):
             batchSize = min(10, self.ctx._batchSize or 1024)
             ser = BatchedSerializer(PickleSerializer(), batchSize)
             selfCopy = self._reserialize(ser)
+            jrdd_deserializer = selfCopy._jrdd_deserializer
             jrdd = selfCopy._jrdd.coalesce(numPartitions, shuffle)
         else:
+            jrdd_deserializer = self._jrdd_deserializer
             jrdd = self._jrdd.coalesce(numPartitions, shuffle)
-        return RDD(jrdd, self.ctx, self._jrdd_deserializer)
+        return RDD(jrdd, self.ctx, jrdd_deserializer)
 
     def zip(self, other):
         """
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index 1df91ad9568c5..8d227eac3d7ef 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -972,6 +972,12 @@ def test_repartition_no_skewed(self):
         zeros = len([x for x in l if x == 0])
         self.assertTrue(zeros == 0)
 
+    def test_repartition_on_textfile(self):
+        path = os.path.join(SPARK_HOME, "python/test_support/hello/hello.txt")
+        rdd = self.sc.textFile(path)
+        result = rdd.repartition(1).collect()
+        self.assertEqual(u"Hello World!", result[0])
+
     def test_distinct(self):
         rdd = self.sc.parallelize((1, 2, 3)*10, 10)
         self.assertEqual(rdd.getNumPartitions(), 10)

From 9d032d02c8988d221a6e4cb27e6ee31627ed8a8e Mon Sep 17 00:00:00 2001
From: windpiger <songjun@outlook.com>
Date: Thu, 16 Mar 2017 10:30:39 -0700
Subject: [PATCH 0581/1204] [SPARK-19329][SQL][BRANCH-2.1] Reading from or
 writing to a datasource table with a non pre-existing location should succeed

## What changes were proposed in this pull request?

This is a backport pr of https://github.com/apache/spark/pull/16672 into branch-2.1.

## How was this patch tested?
Existing tests.

Author: windpiger <songjun@outlook.com>

Closes #17317 from windpiger/backport-insertnotexists.
---
 .../datasources/DataSourceStrategy.scala      |   2 +-
 .../sql/execution/command/DDLSuite.scala      | 118 ++++++++++++++++++
 2 files changed, 119 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index 03eed251763b4..5062da19e5d57 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -299,7 +299,7 @@ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan]
         options = table.storage.properties ++ pathOption)
 
     LogicalRelation(
-      dataSource.resolveRelation(),
+      dataSource.resolveRelation(checkFilesExist = false),
       expectedOutputAttributes = Some(simpleCatalogRelation.output),
       catalogTable = Some(table))
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index c0f583e5f7072..d7fa0b5a01b3c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -1760,4 +1760,122 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
     val rows: Seq[Row] = df.toLocalIterator().asScala.toSeq
     assert(rows.length > 0)
   }
+
+  test("insert data to a data source table which has a not existed location should succeed") {
+    withTable("t") {
+      withTempDir { dir =>
+        spark.sql(
+          s"""
+             |CREATE TABLE t(a string, b int)
+             |USING parquet
+             |OPTIONS(path "$dir")
+           """.stripMargin)
+        val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
+        val expectedPath = dir.getAbsolutePath
+        assert(table.location == expectedPath)
+
+        dir.delete
+        val tableLocFile = new File(table.location.stripPrefix("file:"))
+        assert(!tableLocFile.exists)
+        spark.sql("INSERT INTO TABLE t SELECT 'c', 1")
+        assert(tableLocFile.exists)
+        checkAnswer(spark.table("t"), Row("c", 1) :: Nil)
+
+        Utils.deleteRecursively(dir)
+        assert(!tableLocFile.exists)
+        spark.sql("INSERT OVERWRITE TABLE t SELECT 'c', 1")
+        assert(tableLocFile.exists)
+        checkAnswer(spark.table("t"), Row("c", 1) :: Nil)
+
+        val newDir = new File(dir, "x")
+        spark.sql(s"ALTER TABLE t SET LOCATION '$newDir'")
+        spark.sessionState.catalog.refreshTable(TableIdentifier("t"))
+
+        val table1 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
+        assert(table1.location == newDir.getAbsolutePath)
+        assert(!newDir.exists)
+
+        spark.sql("INSERT INTO TABLE t SELECT 'c', 1")
+        assert(newDir.exists)
+        checkAnswer(spark.table("t"), Row("c", 1) :: Nil)
+      }
+    }
+  }
+
+  test("insert into a data source table with no existed partition location should succeed") {
+    withTable("t") {
+      withTempDir { dir =>
+        spark.sql(
+          s"""
+             |CREATE TABLE t(a int, b int, c int, d int)
+             |USING parquet
+             |OPTIONS(path '$dir')
+             |PARTITIONED BY(a, b)
+           """.stripMargin)
+        val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
+        val expectedPath = dir.getAbsolutePath
+        assert(table.location == expectedPath)
+
+        spark.sql("INSERT INTO TABLE t PARTITION(a=1, b=2) SELECT 3, 4")
+        checkAnswer(spark.table("t"), Row(3, 4, 1, 2) :: Nil)
+
+        val partLoc = new File(dir, "a=1")
+        Utils.deleteRecursively(partLoc)
+        assert(!partLoc.exists())
+        // insert overwrite into a partition which location has been deleted.
+        spark.sql("INSERT OVERWRITE TABLE t PARTITION(a=1, b=2) SELECT 7, 8")
+        assert(partLoc.exists())
+        checkAnswer(spark.table("t"), Row(7, 8, 1, 2) :: Nil)
+      }
+    }
+  }
+
+  test("read data from a data source table which has a not existed location should succeed") {
+    withTable("t") {
+      withTempDir { dir =>
+        spark.sql(
+          s"""
+             |CREATE TABLE t(a string, b int)
+             |USING parquet
+             |OPTIONS(path "$dir")
+           """.stripMargin)
+        val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
+        val expectedPath = dir.getAbsolutePath
+        assert(table.location == expectedPath)
+
+        dir.delete()
+        checkAnswer(spark.table("t"), Nil)
+
+        val newDir = new File(dir, "x")
+        spark.sql(s"ALTER TABLE t SET LOCATION '$newDir'")
+
+        val table1 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
+        assert(table1.location == newDir.getAbsolutePath)
+        assert(!newDir.exists())
+        checkAnswer(spark.table("t"), Nil)
+      }
+    }
+  }
+
+  test("read data from a data source table with no existed partition location should succeed") {
+    withTable("t") {
+      withTempDir { dir =>
+        spark.sql(
+          s"""
+             |CREATE TABLE t(a int, b int, c int, d int)
+             |USING parquet
+             |OPTIONS(path "$dir")
+             |PARTITIONED BY(a, b)
+           """.stripMargin)
+        spark.sql("INSERT INTO TABLE t PARTITION(a=1, b=2) SELECT 3, 4")
+        checkAnswer(spark.table("t"), Row(3, 4, 1, 2) :: Nil)
+
+        // select from a partition which location has been deleted.
+        Utils.deleteRecursively(dir)
+        assert(!dir.exists())
+        spark.sql("REFRESH TABLE t")
+        checkAnswer(spark.sql("select * from t where a=1 and b=2"), Nil)
+      }
+    }
+  }
 }

From 4b977ff041681e73529e745c63a3f7c2b185df2b Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Fri, 17 Mar 2017 10:57:53 +0800
Subject: [PATCH 0582/1204] 
 [SPARK-19765][SPARK-18549][SPARK-19093][SPARK-19736][BACKPORT-2.1][SQL]
 Backport Three Cache-related PRs to Spark 2.1

### What changes were proposed in this pull request?

Backport a few cache related PRs:

---
[[SPARK-19093][SQL] Cached tables are not used in SubqueryExpression](https://github.com/apache/spark/pull/16493)

Consider the plans inside subquery expressions while looking up cache manager to make
use of cached data. Currently CacheManager.useCachedData does not consider the
subquery expressions in the plan.

---
[[SPARK-19736][SQL] refreshByPath should clear all cached plans with the specified path](https://github.com/apache/spark/pull/17064)

Catalog.refreshByPath can refresh the cache entry and the associated metadata for all dataframes (if any), that contain the given data source path.

However, CacheManager.invalidateCachedPath doesn't clear all cached plans with the specified path. It causes some strange behaviors reported in SPARK-15678.

---
[[SPARK-19765][SPARK-18549][SQL] UNCACHE TABLE should un-cache all cached plans that refer to this table](https://github.com/apache/spark/pull/17097)

When un-cache a table, we should not only remove the cache entry for this table, but also un-cache any other cached plans that refer to this table. The following commands trigger the table uncache: `DropTableCommand`, `TruncateTableCommand`, `AlterTableRenameCommand`, `UncacheTableCommand`, `RefreshTable` and `InsertIntoHiveTable`

This PR also includes some refactors:
- use java.util.LinkedList to store the cache entries, so that it's safer to remove elements while iterating
- rename invalidateCache to recacheByPlan, which is more obvious about what it does.

### How was this patch tested?
N/A

Author: Xiao Li <gatorsmile@gmail.com>

Closes #17319 from gatorsmile/backport-17097.
---
 .../spark/sql/execution/CacheManager.scala    | 120 +++++++++--------
 .../execution/columnar/InMemoryRelation.scala |   6 -
 .../spark/sql/execution/command/ddl.scala     |   3 +-
 .../InsertIntoDataSourceCommand.scala         |   5 +-
 .../spark/sql/internal/CatalogImpl.scala      |  23 ++--
 .../apache/spark/sql/CachedTableSuite.scala   | 121 ++++++++++++++++--
 .../hive/execution/InsertIntoHiveTable.scala  |   4 +-
 .../spark/sql/hive/CachedTableSuite.scala     |   4 +-
 .../apache/spark/sql/hive/parquetSuites.scala |   2 +-
 9 files changed, 200 insertions(+), 88 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index 526623a36d2a1..0ea806d6cb50b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -19,9 +19,12 @@ package org.apache.spark.sql.execution
 
 import java.util.concurrent.locks.ReentrantReadWriteLock
 
+import scala.collection.JavaConverters._
+
 import org.apache.hadoop.fs.{FileSystem, Path}
 
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.Dataset
 import org.apache.spark.sql.execution.columnar.InMemoryRelation
@@ -44,7 +47,7 @@ case class CachedData(plan: LogicalPlan, cachedRepresentation: InMemoryRelation)
 class CacheManager extends Logging {
 
   @transient
-  private val cachedData = new scala.collection.mutable.ArrayBuffer[CachedData]
+  private val cachedData = new java.util.LinkedList[CachedData]
 
   @transient
   private val cacheLock = new ReentrantReadWriteLock
@@ -69,7 +72,7 @@ class CacheManager extends Logging {
 
   /** Clears all cached tables. */
   def clearCache(): Unit = writeLock {
-    cachedData.foreach(_.cachedRepresentation.cachedColumnBuffers.unpersist())
+    cachedData.asScala.foreach(_.cachedRepresentation.cachedColumnBuffers.unpersist())
     cachedData.clear()
   }
 
@@ -87,92 +90,109 @@ class CacheManager extends Logging {
       query: Dataset[_],
       tableName: Option[String] = None,
       storageLevel: StorageLevel = MEMORY_AND_DISK): Unit = writeLock {
-    val planToCache = query.queryExecution.analyzed
+    val planToCache = query.logicalPlan
     if (lookupCachedData(planToCache).nonEmpty) {
       logWarning("Asked to cache already cached data.")
     } else {
       val sparkSession = query.sparkSession
-      cachedData +=
-        CachedData(
-          planToCache,
-          InMemoryRelation(
-            sparkSession.sessionState.conf.useCompression,
-            sparkSession.sessionState.conf.columnBatchSize,
-            storageLevel,
-            sparkSession.sessionState.executePlan(planToCache).executedPlan,
-            tableName))
+      cachedData.add(CachedData(
+        planToCache,
+        InMemoryRelation(
+          sparkSession.sessionState.conf.useCompression,
+          sparkSession.sessionState.conf.columnBatchSize,
+          storageLevel,
+          sparkSession.sessionState.executePlan(planToCache).executedPlan,
+          tableName)))
     }
   }
 
   /**
-   * Tries to remove the data for the given [[Dataset]] from the cache.
-   * No operation, if it's already uncached.
+   * Un-cache all the cache entries that refer to the given plan.
+   */
+  def uncacheQuery(query: Dataset[_], blocking: Boolean = true): Unit = writeLock {
+    uncacheQuery(query.sparkSession, query.logicalPlan, blocking)
+  }
+
+  /**
+   * Un-cache all the cache entries that refer to the given plan.
    */
-  def uncacheQuery(query: Dataset[_], blocking: Boolean = true): Boolean = writeLock {
-    val planToCache = query.queryExecution.analyzed
-    val dataIndex = cachedData.indexWhere(cd => planToCache.sameResult(cd.plan))
-    val found = dataIndex >= 0
-    if (found) {
-      cachedData(dataIndex).cachedRepresentation.cachedColumnBuffers.unpersist(blocking)
-      cachedData.remove(dataIndex)
+  def uncacheQuery(spark: SparkSession, plan: LogicalPlan, blocking: Boolean): Unit = writeLock {
+    val it = cachedData.iterator()
+    while (it.hasNext) {
+      val cd = it.next()
+      if (cd.plan.find(_.sameResult(plan)).isDefined) {
+        cd.cachedRepresentation.cachedColumnBuffers.unpersist(blocking)
+        it.remove()
+      }
     }
-    found
+  }
+
+  /**
+   * Tries to re-cache all the cache entries that refer to the given plan.
+   */
+  def recacheByPlan(spark: SparkSession, plan: LogicalPlan): Unit = writeLock {
+    recacheByCondition(spark, _.find(_.sameResult(plan)).isDefined)
+  }
+
+  private def recacheByCondition(spark: SparkSession, condition: LogicalPlan => Boolean): Unit = {
+    val it = cachedData.iterator()
+    val needToRecache = scala.collection.mutable.ArrayBuffer.empty[CachedData]
+    while (it.hasNext) {
+      val cd = it.next()
+      if (condition(cd.plan)) {
+        cd.cachedRepresentation.cachedColumnBuffers.unpersist()
+        // Remove the cache entry before we create a new one, so that we can have a different
+        // physical plan.
+        it.remove()
+        val newCache = InMemoryRelation(
+          useCompression = cd.cachedRepresentation.useCompression,
+          batchSize = cd.cachedRepresentation.batchSize,
+          storageLevel = cd.cachedRepresentation.storageLevel,
+          child = spark.sessionState.executePlan(cd.plan).executedPlan,
+          tableName = cd.cachedRepresentation.tableName)
+        needToRecache += cd.copy(cachedRepresentation = newCache)
+      }
+    }
+
+    needToRecache.foreach(cachedData.add)
   }
 
   /** Optionally returns cached data for the given [[Dataset]] */
   def lookupCachedData(query: Dataset[_]): Option[CachedData] = readLock {
-    lookupCachedData(query.queryExecution.analyzed)
+    lookupCachedData(query.logicalPlan)
   }
 
   /** Optionally returns cached data for the given [[LogicalPlan]]. */
   def lookupCachedData(plan: LogicalPlan): Option[CachedData] = readLock {
-    cachedData.find(cd => plan.sameResult(cd.plan))
+    cachedData.asScala.find(cd => plan.sameResult(cd.plan))
   }
 
   /** Replaces segments of the given logical plan with cached versions where possible. */
   def useCachedData(plan: LogicalPlan): LogicalPlan = {
-    plan transformDown {
+    val newPlan = plan transformDown {
       case currentFragment =>
         lookupCachedData(currentFragment)
           .map(_.cachedRepresentation.withOutput(currentFragment.output))
           .getOrElse(currentFragment)
     }
-  }
 
-  /**
-   * Invalidates the cache of any data that contains `plan`. Note that it is possible that this
-   * function will over invalidate.
-   */
-  def invalidateCache(plan: LogicalPlan): Unit = writeLock {
-    cachedData.foreach {
-      case data if data.plan.collect { case p if p.sameResult(plan) => p }.nonEmpty =>
-        data.cachedRepresentation.recache()
-      case _ =>
+    newPlan transformAllExpressions {
+      case s: SubqueryExpression => s.withNewPlan(useCachedData(s.plan))
     }
   }
 
   /**
-   * Invalidates the cache of any data that contains `resourcePath` in one or more
+   * Tries to re-cache all the cache entries that contain `resourcePath` in one or more
    * `HadoopFsRelation` node(s) as part of its logical plan.
    */
-  def invalidateCachedPath(
-      sparkSession: SparkSession, resourcePath: String): Unit = writeLock {
+  def recacheByPath(spark: SparkSession, resourcePath: String): Unit = writeLock {
     val (fs, qualifiedPath) = {
       val path = new Path(resourcePath)
-      val fs = path.getFileSystem(sparkSession.sessionState.newHadoopConf())
-      (fs, path.makeQualified(fs.getUri, fs.getWorkingDirectory))
+      val fs = path.getFileSystem(spark.sessionState.newHadoopConf())
+      (fs, fs.makeQualified(path))
     }
 
-    cachedData.foreach {
-      case data if data.plan.find(lookupAndRefresh(_, fs, qualifiedPath)).isDefined =>
-        val dataIndex = cachedData.indexWhere(cd => data.plan.sameResult(cd.plan))
-        if (dataIndex >= 0) {
-          data.cachedRepresentation.cachedColumnBuffers.unpersist(blocking = true)
-          cachedData.remove(dataIndex)
-        }
-        sparkSession.sharedState.cacheManager.cacheQuery(Dataset.ofRows(sparkSession, data.plan))
-      case _ => // Do Nothing
-    }
+    recacheByCondition(spark, _.find(lookupAndRefresh(_, fs, qualifiedPath)).isDefined)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
index 03cc04659bd55..949f8b61f297a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
@@ -85,12 +85,6 @@ case class InMemoryRelation(
     buildBuffers()
   }
 
-  def recache(): Unit = {
-    _cachedColumnBuffers.unpersist()
-    _cachedColumnBuffers = null
-    buildBuffers()
-  }
-
   private def buildBuffers(): Unit = {
     val output = child.output
     val cached = child.execute().mapPartitionsInternal { rowIterator =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index b1bb56570ceec..f9afe466d9f66 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -199,8 +199,7 @@ case class DropTableCommand(
       }
     }
     try {
-      sparkSession.sharedState.cacheManager.uncacheQuery(
-        sparkSession.table(tableName.quotedString))
+      sparkSession.sharedState.cacheManager.uncacheQuery(sparkSession.table(tableName))
     } catch {
       case _: NoSuchTableException if ifExists =>
       case NonFatal(e) => log.warn(e.toString, e)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoDataSourceCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoDataSourceCommand.scala
index 2eba1e9986acd..ac7e3bdfc32e7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoDataSourceCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoDataSourceCommand.scala
@@ -42,8 +42,9 @@ case class InsertIntoDataSourceCommand(
     val df = sparkSession.internalCreateDataFrame(data.queryExecution.toRdd, logicalRelation.schema)
     relation.insert(df, overwrite.enabled)
 
-    // Invalidate the cache.
-    sparkSession.sharedState.cacheManager.invalidateCache(logicalRelation)
+    // Re-cache all cached plans(including this relation itself, if it's cached) that refer to this
+    // data source relation.
+    sparkSession.sharedState.cacheManager.recacheByPlan(sparkSession, logicalRelation)
 
     Seq.empty[Row]
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
index 41ed9d71809e9..9d0b2141d453f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
@@ -373,8 +373,8 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
    * @since 2.0.0
    */
   override def dropTempView(viewName: String): Boolean = {
-    sparkSession.sessionState.catalog.getTempView(viewName).exists { tempView =>
-      sparkSession.sharedState.cacheManager.uncacheQuery(Dataset.ofRows(sparkSession, tempView))
+    sparkSession.sessionState.catalog.getTempView(viewName).exists { viewDef =>
+      sparkSession.sharedState.cacheManager.uncacheQuery(sparkSession, viewDef, blocking = true)
       sessionCatalog.dropTempView(viewName)
     }
   }
@@ -389,7 +389,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
    */
   override def dropGlobalTempView(viewName: String): Boolean = {
     sparkSession.sessionState.catalog.getGlobalTempView(viewName).exists { viewDef =>
-      sparkSession.sharedState.cacheManager.uncacheQuery(Dataset.ofRows(sparkSession, viewDef))
+      sparkSession.sharedState.cacheManager.uncacheQuery(sparkSession, viewDef, blocking = true)
       sessionCatalog.dropGlobalTempView(viewName)
     }
   }
@@ -434,7 +434,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
    * @since 2.0.0
    */
   override def uncacheTable(tableName: String): Unit = {
-    sparkSession.sharedState.cacheManager.uncacheQuery(query = sparkSession.table(tableName))
+    sparkSession.sharedState.cacheManager.uncacheQuery(sparkSession.table(tableName))
   }
 
   /**
@@ -472,17 +472,12 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
 
     // If this table is cached as an InMemoryRelation, drop the original
     // cached version and make the new version cached lazily.
-    val logicalPlan = sparkSession.sessionState.catalog.lookupRelation(tableIdent)
-    // Use lookupCachedData directly since RefreshTable also takes databaseName.
-    val isCached = sparkSession.sharedState.cacheManager.lookupCachedData(logicalPlan).nonEmpty
-    if (isCached) {
-      // Create a data frame to represent the table.
-      // TODO: Use uncacheTable once it supports database name.
-      val df = Dataset.ofRows(sparkSession, logicalPlan)
+    val table = sparkSession.table(tableIdent)
+    if (isCached(table)) {
       // Uncache the logicalPlan.
-      sparkSession.sharedState.cacheManager.uncacheQuery(df, blocking = true)
+      sparkSession.sharedState.cacheManager.uncacheQuery(table, blocking = true)
       // Cache it again.
-      sparkSession.sharedState.cacheManager.cacheQuery(df, Some(tableIdent.table))
+      sparkSession.sharedState.cacheManager.cacheQuery(table, Some(tableIdent.table))
     }
   }
 
@@ -494,7 +489,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
    * @since 2.0.0
    */
   override def refreshByPath(resourcePath: String): Unit = {
-    sparkSession.sharedState.cacheManager.invalidateCachedPath(sparkSession, resourcePath)
+    sparkSession.sharedState.cacheManager.recacheByPath(sparkSession, resourcePath)
   }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index f42402e1cc7d2..5fc081c43113e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -24,19 +24,31 @@ import scala.language.postfixOps
 import org.scalatest.concurrent.Eventually._
 
 import org.apache.spark.CleanerListener
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
 import org.apache.spark.sql.execution.RDDScanExec
 import org.apache.spark.sql.execution.columnar._
 import org.apache.spark.sql.execution.exchange.ShuffleExchange
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.{SharedSQLContext, SQLTestUtils}
 import org.apache.spark.storage.{RDDBlockId, StorageLevel}
-import org.apache.spark.util.AccumulatorContext
+import org.apache.spark.util.{AccumulatorContext, Utils}
 
 private case class BigData(s: String)
 
 class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext {
   import testImplicits._
 
+  setupTestData()
+
+  override def afterEach(): Unit = {
+    try {
+      spark.catalog.clearCache()
+    } finally {
+      super.afterEach()
+    }
+  }
+
   def rddIdOf(tableName: String): Int = {
     val plan = spark.table(tableName).queryExecution.sparkPlan
     plan.collect {
@@ -53,6 +65,17 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
     maybeBlock.nonEmpty
   }
 
+  private def getNumInMemoryRelations(ds: Dataset[_]): Int = {
+    val plan = ds.queryExecution.withCachedData
+    var sum = plan.collect { case _: InMemoryRelation => 1 }.sum
+    plan.transformAllExpressions {
+      case e: SubqueryExpression =>
+        sum += getNumInMemoryRelations(e.plan)
+        e
+    }
+    sum
+  }
+
   test("withColumn doesn't invalidate cached dataframe") {
     var evalCount = 0
     val myUDF = udf((x: String) => { evalCount += 1; "result" })
@@ -165,9 +188,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
     assertCached(spark.table("testData"))
 
     assertResult(1, "InMemoryRelation not found, testData should have been cached") {
-      spark.table("testData").queryExecution.withCachedData.collect {
-        case r: InMemoryRelation => r
-      }.size
+      getNumInMemoryRelations(spark.table("testData"))
     }
 
     spark.catalog.cacheTable("testData")
@@ -560,9 +581,93 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
     localRelation.createOrReplaceTempView("localRelation")
 
     spark.catalog.cacheTable("localRelation")
-    assert(
-      localRelation.queryExecution.withCachedData.collect {
-        case i: InMemoryRelation => i
-      }.size == 1)
+    assert(getNumInMemoryRelations(localRelation) == 1)
+  }
+
+  test("SPARK-19093 Caching in side subquery") {
+    withTempView("t1") {
+      Seq(1).toDF("c1").createOrReplaceTempView("t1")
+      spark.catalog.cacheTable("t1")
+      val ds =
+        sql(
+          """
+            |SELECT * FROM t1
+            |WHERE
+            |NOT EXISTS (SELECT * FROM t1)
+          """.stripMargin)
+      assert(getNumInMemoryRelations(ds) == 2)
+    }
+  }
+
+  test("SPARK-19093 scalar and nested predicate query") {
+    withTempView("t1", "t2", "t3", "t4") {
+      Seq(1).toDF("c1").createOrReplaceTempView("t1")
+      Seq(2).toDF("c1").createOrReplaceTempView("t2")
+      Seq(1).toDF("c1").createOrReplaceTempView("t3")
+      Seq(1).toDF("c1").createOrReplaceTempView("t4")
+      spark.catalog.cacheTable("t1")
+      spark.catalog.cacheTable("t2")
+      spark.catalog.cacheTable("t3")
+      spark.catalog.cacheTable("t4")
+
+      // Nested predicate subquery
+      val ds =
+        sql(
+          """
+            |SELECT * FROM t1
+            |WHERE
+            |c1 IN (SELECT c1 FROM t2 WHERE c1 IN (SELECT c1 FROM t3 WHERE c1 = 1))
+          """.stripMargin)
+      assert(getNumInMemoryRelations(ds) == 3)
+
+      // Scalar subquery and predicate subquery
+      val ds2 =
+        sql(
+          """
+            |SELECT * FROM (SELECT max(c1) FROM t1 GROUP BY c1)
+            |WHERE
+            |c1 = (SELECT max(c1) FROM t2 GROUP BY c1)
+            |OR
+            |EXISTS (SELECT c1 FROM t3)
+            |OR
+            |c1 IN (SELECT c1 FROM t4)
+          """.stripMargin)
+      assert(getNumInMemoryRelations(ds2) == 4)
+    }
+  }
+
+  test("SPARK-19765: UNCACHE TABLE should un-cache all cached plans that refer to this table") {
+    withTable("t") {
+      withTempPath { path =>
+        Seq(1 -> "a").toDF("i", "j").write.parquet(path.getCanonicalPath)
+        sql(s"CREATE TABLE t USING parquet OPTIONS (PATH '${path.toURI}')")
+        spark.catalog.cacheTable("t")
+        spark.table("t").select($"i").cache()
+        checkAnswer(spark.table("t").select($"i"), Row(1))
+        assertCached(spark.table("t").select($"i"))
+
+        Utils.deleteRecursively(path)
+        spark.sessionState.catalog.refreshTable(TableIdentifier("t"))
+        spark.catalog.uncacheTable("t")
+        assert(spark.table("t").select($"i").count() == 0)
+        assert(getNumInMemoryRelations(spark.table("t").select($"i")) == 0)
+      }
+    }
+  }
+
+  test("refreshByPath should refresh all cached plans with the specified path") {
+    withTempDir { dir =>
+      val path = dir.getCanonicalPath()
+
+      spark.range(10).write.mode("overwrite").parquet(path)
+      spark.read.parquet(path).cache()
+      spark.read.parquet(path).filter($"id" > 4).cache()
+      assert(spark.read.parquet(path).filter($"id" > 4).count() == 5)
+
+      spark.range(20).write.mode("overwrite").parquet(path)
+      spark.catalog.refreshByPath(path)
+      assert(spark.read.parquet(path).count() == 20)
+      assert(spark.read.parquet(path).filter($"id" > 4).count() == 15)
+    }
   }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index 09d1abfa8c7a2..3b9c2fcb0ce12 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -388,8 +388,8 @@ case class InsertIntoHiveTable(
         logWarning(s"Unable to delete staging directory: $stagingDir.\n" + e)
     }
 
-    // Invalidate the cache.
-    sqlContext.sharedState.cacheManager.invalidateCache(table)
+    // un-cache this table.
+    sqlContext.sparkSession.catalog.uncacheTable(table.catalogTable.identifier.quotedString)
     sqlContext.sessionState.catalog.refreshTable(table.catalogTable.identifier)
 
     // It would be nice to just return the childRdd unchanged so insert operations could be chained,
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
index 3871b3d785882..9b24ad045d2a5 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
@@ -196,9 +196,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
     table("src").write.mode(SaveMode.Overwrite).parquet(tempPath.toString)
     sql("DROP TABLE IF EXISTS refreshTable")
     sparkSession.catalog.createExternalTable("refreshTable", tempPath.toString, "parquet")
-    checkAnswer(
-      table("refreshTable"),
-      table("src").collect())
+    checkAnswer(table("refreshTable"), table("src"))
     // Cache the table.
     sql("CACHE TABLE refreshTable")
     assertCached(table("refreshTable"))
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
index e8b81109e2a90..fbb228e0873e3 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
@@ -457,7 +457,7 @@ class ParquetMetastoreSuite extends ParquetPartitioningTest {
       // Converted test_parquet should be cached.
       sessionState.catalog.getCachedDataSourceTable(tableIdentifier) match {
         case null => fail("Converted test_parquet should be cached in the cache.")
-        case logical @ LogicalRelation(parquetRelation: HadoopFsRelation, _, _) => // OK
+        case LogicalRelation(_: HadoopFsRelation, _, _) => // OK
         case other =>
           fail(
             "The cached test_parquet should be a Parquet Relation. " +

From 710b5554e8a8a502f8a4fe9ce4b865b074646157 Mon Sep 17 00:00:00 2001
From: Liwei Lin <lwlin7@gmail.com>
Date: Fri, 17 Mar 2017 10:41:17 -0700
Subject: [PATCH 0583/1204] [SPARK-19721][SS][BRANCH-2.1] Good error message
 for version mismatch in log files

## Problem

There are several places where we write out version identifiers in various logs for structured streaming (usually `v1`). However, in the places where we check for this, we throw a confusing error message.

## What changes were proposed in this pull request?

This patch made two major changes:
1. added a `parseVersion(...)` method, and based on this method, fixed the following places the way they did version checking (no other place needed to do this checking):
```
HDFSMetadataLog
  - CompactibleFileStreamLog  ------------> fixed with this patch
    - FileStreamSourceLog  ---------------> inherited the fix of `CompactibleFileStreamLog`
    - FileStreamSinkLog  -----------------> inherited the fix of `CompactibleFileStreamLog`
  - OffsetSeqLog  ------------------------> fixed with this patch
  - anonymous subclass in KafkaSource  ---> fixed with this patch
```

2. changed the type of `FileStreamSinkLog.VERSION`, `FileStreamSourceLog.VERSION` etc. from `String` to `Int`, so that we can identify newer versions via `version > 1` instead of `version != "v1"`
    - note this didn't break any backwards compatibility -- we are still writing out `"v1"` and reading back `"v1"`

## Exception message with this patch
```
java.lang.IllegalStateException: Failed to read log file /private/var/folders/nn/82rmvkk568sd8p3p8tb33trw0000gn/T/spark-86867b65-0069-4ef1-b0eb-d8bd258ff5b8/0. UnsupportedLogVersion: maximum supported log version is v1, but encountered v99. The log file was produced by a newer version of Spark and cannot be read by this version. Please upgrade.
	at org.apache.spark.sql.execution.streaming.HDFSMetadataLog.get(HDFSMetadataLog.scala:202)
	at org.apache.spark.sql.execution.streaming.OffsetSeqLogSuite$$anonfun$3$$anonfun$apply$mcV$sp$2.apply(OffsetSeqLogSuite.scala:78)
	at org.apache.spark.sql.execution.streaming.OffsetSeqLogSuite$$anonfun$3$$anonfun$apply$mcV$sp$2.apply(OffsetSeqLogSuite.scala:75)
	at org.apache.spark.sql.test.SQLTestUtils$class.withTempDir(SQLTestUtils.scala:133)
	at org.apache.spark.sql.execution.streaming.OffsetSeqLogSuite.withTempDir(OffsetSeqLogSuite.scala:26)
	at org.apache.spark.sql.execution.streaming.OffsetSeqLogSuite$$anonfun$3.apply$mcV$sp(OffsetSeqLogSuite.scala:75)
	at org.apache.spark.sql.execution.streaming.OffsetSeqLogSuite$$anonfun$3.apply(OffsetSeqLogSuite.scala:75)
	at org.apache.spark.sql.execution.streaming.OffsetSeqLogSuite$$anonfun$3.apply(OffsetSeqLogSuite.scala:75)
	at org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22)
	at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85)
```

## How was this patch tested?

unit tests

Author: Liwei Lin <lwlin7@gmail.com>

Closes #17327 from lw-lin/good-msg-2.1.
---
 .../spark/sql/kafka010/KafkaSource.scala      | 14 +++----
 .../spark/sql/kafka010/KafkaSourceSuite.scala |  9 ++++-
 .../streaming/CompactibleFileStreamLog.scala  |  9 ++---
 .../streaming/FileStreamSinkLog.scala         |  4 +-
 .../streaming/FileStreamSourceLog.scala       |  4 +-
 .../execution/streaming/HDFSMetadataLog.scala | 36 +++++++++++++++++
 .../execution/streaming/OffsetSeqLog.scala    | 10 ++---
 .../CompactibleFileStreamLogSuite.scala       | 40 ++++++++++++++++---
 .../streaming/FileStreamSinkLogSuite.scala    |  8 ++--
 .../streaming/HDFSMetadataLogSuite.scala      | 27 +++++++++++++
 .../streaming/OffsetSeqLogSuite.scala         | 17 ++++++++
 11 files changed, 143 insertions(+), 35 deletions(-)

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
index 04f785d75d9e6..496c11297b9c6 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
@@ -100,7 +100,7 @@ private[kafka010] class KafkaSource(
         override def serialize(metadata: KafkaSourceOffset, out: OutputStream): Unit = {
           out.write(0) // A zero byte is written to support Spark 2.1.0 (SPARK-19517)
           val writer = new BufferedWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8))
-          writer.write(VERSION)
+          writer.write("v" + VERSION + "\n")
           writer.write(metadata.json)
           writer.flush
         }
@@ -111,13 +111,13 @@ private[kafka010] class KafkaSource(
           // HDFSMetadataLog guarantees that it never creates a partial file.
           assert(content.length != 0)
           if (content(0) == 'v') {
-            if (content.startsWith(VERSION)) {
-              KafkaSourceOffset(SerializedOffset(content.substring(VERSION.length)))
+            val indexOfNewLine = content.indexOf("\n")
+            if (indexOfNewLine > 0) {
+              val version = parseVersion(content.substring(0, indexOfNewLine), VERSION)
+              KafkaSourceOffset(SerializedOffset(content.substring(indexOfNewLine + 1)))
             } else {
-              val versionInFile = content.substring(0, content.indexOf("\n"))
               throw new IllegalStateException(
-                s"Unsupported format. Expected version is ${VERSION.stripLineEnd} " +
-                  s"but was $versionInFile. Please upgrade your Spark.")
+                s"Log file was malformed: failed to detect the log file version line.")
             }
           } else {
             // The log was generated by Spark 2.1.0
@@ -351,7 +351,7 @@ private[kafka010] object KafkaSource {
       | source option "failOnDataLoss" to "false".
     """.stripMargin
 
-  private val VERSION = "v1\n"
+  private[kafka010] val VERSION = 1
 
   def getSortedExecutorList(sc: SparkContext): Array[String] = {
     val bm = sc.env.blockManager
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
index 638cc3b201a9f..2825a7483abcf 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceSuite.scala
@@ -203,7 +203,7 @@ class KafkaSourceSuite extends KafkaSourceTest {
           override def serialize(metadata: KafkaSourceOffset, out: OutputStream): Unit = {
             out.write(0)
             val writer = new BufferedWriter(new OutputStreamWriter(out, UTF_8))
-            writer.write(s"v0\n${metadata.json}")
+            writer.write(s"v99999\n${metadata.json}")
             writer.flush
           }
         }
@@ -225,7 +225,12 @@ class KafkaSourceSuite extends KafkaSourceTest {
         source.getOffset.get // Read initial offset
       }
 
-      assert(e.getMessage.contains("Please upgrade your Spark"))
+      Seq(
+        s"maximum supported log version is v${KafkaSource.VERSION}, but encountered v99999",
+        "produced by a newer version of Spark and cannot be read by this version"
+      ).foreach { message =>
+        assert(e.getMessage.contains(message))
+      }
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala
index 5a6f9e87f6eaa..408c8f81f17ba 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala
@@ -40,7 +40,7 @@ import org.apache.spark.sql.SparkSession
  * doing a compaction, it will read all old log files and merge them with the new batch.
  */
 abstract class CompactibleFileStreamLog[T <: AnyRef : ClassTag](
-    metadataLogVersion: String,
+    metadataLogVersion: Int,
     sparkSession: SparkSession,
     path: String)
   extends HDFSMetadataLog[Array[T]](sparkSession, path) {
@@ -134,7 +134,7 @@ abstract class CompactibleFileStreamLog[T <: AnyRef : ClassTag](
 
   override def serialize(logData: Array[T], out: OutputStream): Unit = {
     // called inside a try-finally where the underlying stream is closed in the caller
-    out.write(metadataLogVersion.getBytes(UTF_8))
+    out.write(("v" + metadataLogVersion).getBytes(UTF_8))
     logData.foreach { data =>
       out.write('\n')
       out.write(Serialization.write(data).getBytes(UTF_8))
@@ -146,10 +146,7 @@ abstract class CompactibleFileStreamLog[T <: AnyRef : ClassTag](
     if (!lines.hasNext) {
       throw new IllegalStateException("Incomplete log file")
     }
-    val version = lines.next()
-    if (version != metadataLogVersion) {
-      throw new IllegalStateException(s"Unknown log version: ${version}")
-    }
+    val version = parseVersion(lines.next(), metadataLogVersion)
     lines.map(Serialization.read[T]).toArray
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala
index eb6eed87eca7b..8d718b2164d22 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala
@@ -77,7 +77,7 @@ object SinkFileStatus {
  * (drops the deleted files).
  */
 class FileStreamSinkLog(
-    metadataLogVersion: String,
+    metadataLogVersion: Int,
     sparkSession: SparkSession,
     path: String)
   extends CompactibleFileStreamLog[SinkFileStatus](metadataLogVersion, sparkSession, path) {
@@ -106,7 +106,7 @@ class FileStreamSinkLog(
 }
 
 object FileStreamSinkLog {
-  val VERSION = "v1"
+  val VERSION = 1
   val DELETE_ACTION = "delete"
   val ADD_ACTION = "add"
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala
index 81908c0cefdfa..33e6a1d5d6e18 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.execution.streaming.FileStreamSource.FileEntry
 import org.apache.spark.sql.internal.SQLConf
 
 class FileStreamSourceLog(
-    metadataLogVersion: String,
+    metadataLogVersion: Int,
     sparkSession: SparkSession,
     path: String)
   extends CompactibleFileStreamLog[FileEntry](metadataLogVersion, sparkSession, path) {
@@ -120,5 +120,5 @@ class FileStreamSourceLog(
 }
 
 object FileStreamSourceLog {
-  val VERSION = "v1"
+  val VERSION = 1
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
index 6af60d60d56d3..01c8f3a9e2854 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
@@ -231,6 +231,11 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
       val input = fileManager.open(batchMetadataFile)
       try {
         Some(deserialize(input))
+      } catch {
+        case ise: IllegalStateException =>
+          // re-throw the exception with the log file path added
+          throw new IllegalStateException(
+            s"Failed to read log file $batchMetadataFile. ${ise.getMessage}", ise)
       } finally {
         IOUtils.closeQuietly(input)
       }
@@ -304,6 +309,37 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
         new FileSystemManager(metadataPath, hadoopConf)
     }
   }
+
+  /**
+   * Parse the log version from the given `text` -- will throw exception when the parsed version
+   * exceeds `maxSupportedVersion`, or when `text` is malformed (such as "xyz", "v", "v-1",
+   * "v123xyz" etc.)
+   */
+  private[sql] def parseVersion(text: String, maxSupportedVersion: Int): Int = {
+    if (text.length > 0 && text(0) == 'v') {
+      val version =
+        try {
+          text.substring(1, text.length).toInt
+        } catch {
+          case _: NumberFormatException =>
+            throw new IllegalStateException(s"Log file was malformed: failed to read correct log " +
+              s"version from $text.")
+        }
+      if (version > 0) {
+        if (version > maxSupportedVersion) {
+          throw new IllegalStateException(s"UnsupportedLogVersion: maximum supported log version " +
+            s"is v${maxSupportedVersion}, but encountered v$version. The log file was produced " +
+            s"by a newer version of Spark and cannot be read by this version. Please upgrade.")
+        } else {
+          return version
+        }
+      }
+    }
+
+    // reaching here means we failed to read the correct log version
+    throw new IllegalStateException(s"Log file was malformed: failed to read correct log " +
+      s"version from $text.")
+  }
 }
 
 object HDFSMetadataLog {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala
index 3210d8ad64e22..4f8cd116f610e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala
@@ -55,10 +55,8 @@ class OffsetSeqLog(sparkSession: SparkSession, path: String)
     if (!lines.hasNext) {
       throw new IllegalStateException("Incomplete log file")
     }
-    val version = lines.next()
-    if (version != OffsetSeqLog.VERSION) {
-      throw new IllegalStateException(s"Unknown log version: ${version}")
-    }
+
+    val version = parseVersion(lines.next(), OffsetSeqLog.VERSION)
 
     // read metadata
     val metadata = lines.next().trim match {
@@ -70,7 +68,7 @@ class OffsetSeqLog(sparkSession: SparkSession, path: String)
 
   override protected def serialize(offsetSeq: OffsetSeq, out: OutputStream): Unit = {
     // called inside a try-finally where the underlying stream is closed in the caller
-    out.write(OffsetSeqLog.VERSION.getBytes(UTF_8))
+    out.write(("v" + OffsetSeqLog.VERSION).getBytes(UTF_8))
 
     // write metadata
     out.write('\n')
@@ -88,6 +86,6 @@ class OffsetSeqLog(sparkSession: SparkSession, path: String)
 }
 
 object OffsetSeqLog {
-  private val VERSION = "v1"
+  private[streaming] val VERSION = 1
   private val SERIALIZED_VOID_OFFSET = "-"
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala
index 435d874d75b92..c8734a4777d87 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala
@@ -122,7 +122,7 @@ class CompactibleFileStreamLogSuite extends SparkFunSuite with SharedSQLContext
       defaultMinBatchesToRetain = 1,
       compactibleLog => {
         val logs = Array("entry_1", "entry_2", "entry_3")
-        val expected = s"""${FakeCompactibleFileStreamLog.VERSION}
+        val expected = s"""v${FakeCompactibleFileStreamLog.VERSION}
             |"entry_1"
             |"entry_2"
             |"entry_3"""".stripMargin
@@ -132,7 +132,7 @@ class CompactibleFileStreamLogSuite extends SparkFunSuite with SharedSQLContext
 
         baos.reset()
         compactibleLog.serialize(Array(), baos)
-        assert(FakeCompactibleFileStreamLog.VERSION === baos.toString(UTF_8.name()))
+        assert(s"v${FakeCompactibleFileStreamLog.VERSION}" === baos.toString(UTF_8.name()))
       })
   }
 
@@ -142,7 +142,7 @@ class CompactibleFileStreamLogSuite extends SparkFunSuite with SharedSQLContext
       defaultCompactInterval = 3,
       defaultMinBatchesToRetain = 1,
       compactibleLog => {
-        val logs = s"""${FakeCompactibleFileStreamLog.VERSION}
+        val logs = s"""v${FakeCompactibleFileStreamLog.VERSION}
             |"entry_1"
             |"entry_2"
             |"entry_3"""".stripMargin
@@ -152,10 +152,36 @@ class CompactibleFileStreamLogSuite extends SparkFunSuite with SharedSQLContext
 
         assert(Nil ===
           compactibleLog.deserialize(
-            new ByteArrayInputStream(FakeCompactibleFileStreamLog.VERSION.getBytes(UTF_8))))
+            new ByteArrayInputStream(s"v${FakeCompactibleFileStreamLog.VERSION}".getBytes(UTF_8))))
       })
   }
 
+  test("deserialization log written by future version") {
+    withTempDir { dir =>
+      def newFakeCompactibleFileStreamLog(version: Int): FakeCompactibleFileStreamLog =
+        new FakeCompactibleFileStreamLog(
+          version,
+          _fileCleanupDelayMs = Long.MaxValue, // this param does not matter here in this test case
+          _defaultCompactInterval = 3,         // this param does not matter here in this test case
+          _defaultMinBatchesToRetain = 1,      // this param does not matter here in this test case
+          spark,
+          dir.getCanonicalPath)
+
+      val writer = newFakeCompactibleFileStreamLog(version = 2)
+      val reader = newFakeCompactibleFileStreamLog(version = 1)
+      writer.add(0, Array("entry"))
+      val e = intercept[IllegalStateException] {
+        reader.get(0)
+      }
+      Seq(
+        "maximum supported log version is v1, but encountered v2",
+        "produced by a newer version of Spark and cannot be read by this version"
+      ).foreach { message =>
+        assert(e.getMessage.contains(message))
+      }
+    }
+  }
+
   testWithUninterruptibleThread("compact") {
     withFakeCompactibleFileStreamLog(
       fileCleanupDelayMs = Long.MaxValue,
@@ -219,6 +245,7 @@ class CompactibleFileStreamLogSuite extends SparkFunSuite with SharedSQLContext
   ): Unit = {
     withTempDir { file =>
       val compactibleLog = new FakeCompactibleFileStreamLog(
+        FakeCompactibleFileStreamLog.VERSION,
         fileCleanupDelayMs,
         defaultCompactInterval,
         defaultMinBatchesToRetain,
@@ -230,17 +257,18 @@ class CompactibleFileStreamLogSuite extends SparkFunSuite with SharedSQLContext
 }
 
 object FakeCompactibleFileStreamLog {
-  val VERSION = "test_version"
+  val VERSION = 1
 }
 
 class FakeCompactibleFileStreamLog(
+    metadataLogVersion: Int,
     _fileCleanupDelayMs: Long,
     _defaultCompactInterval: Int,
     _defaultMinBatchesToRetain: Int,
     sparkSession: SparkSession,
     path: String)
   extends CompactibleFileStreamLog[String](
-    FakeCompactibleFileStreamLog.VERSION,
+    metadataLogVersion,
     sparkSession,
     path
   ) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala
index 7e0de5e2657be..ac71ddbc0dd46 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala
@@ -74,7 +74,7 @@ class FileStreamSinkLogSuite extends SparkFunSuite with SharedSQLContext {
           action = FileStreamSinkLog.ADD_ACTION))
 
       // scalastyle:off
-      val expected = s"""$VERSION
+      val expected = s"""v$VERSION
           |{"path":"/a/b/x","size":100,"isDir":false,"modificationTime":1000,"blockReplication":1,"blockSize":10000,"action":"add"}
           |{"path":"/a/b/y","size":200,"isDir":false,"modificationTime":2000,"blockReplication":2,"blockSize":20000,"action":"delete"}
           |{"path":"/a/b/z","size":300,"isDir":false,"modificationTime":3000,"blockReplication":3,"blockSize":30000,"action":"add"}""".stripMargin
@@ -84,14 +84,14 @@ class FileStreamSinkLogSuite extends SparkFunSuite with SharedSQLContext {
       assert(expected === baos.toString(UTF_8.name()))
       baos.reset()
       sinkLog.serialize(Array(), baos)
-      assert(VERSION === baos.toString(UTF_8.name()))
+      assert(s"v$VERSION" === baos.toString(UTF_8.name()))
     }
   }
 
   test("deserialize") {
     withFileStreamSinkLog { sinkLog =>
       // scalastyle:off
-      val logs = s"""$VERSION
+      val logs = s"""v$VERSION
           |{"path":"/a/b/x","size":100,"isDir":false,"modificationTime":1000,"blockReplication":1,"blockSize":10000,"action":"add"}
           |{"path":"/a/b/y","size":200,"isDir":false,"modificationTime":2000,"blockReplication":2,"blockSize":20000,"action":"delete"}
           |{"path":"/a/b/z","size":300,"isDir":false,"modificationTime":3000,"blockReplication":3,"blockSize":30000,"action":"add"}""".stripMargin
@@ -125,7 +125,7 @@ class FileStreamSinkLogSuite extends SparkFunSuite with SharedSQLContext {
 
       assert(expected === sinkLog.deserialize(new ByteArrayInputStream(logs.getBytes(UTF_8))))
 
-      assert(Nil === sinkLog.deserialize(new ByteArrayInputStream(VERSION.getBytes(UTF_8))))
+      assert(Nil === sinkLog.deserialize(new ByteArrayInputStream(s"v$VERSION".getBytes(UTF_8))))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala
index d03e08d9a576c..8737aba4f4b57 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala
@@ -128,6 +128,33 @@ class HDFSMetadataLogSuite extends SparkFunSuite with SharedSQLContext {
     }
   }
 
+  test("HDFSMetadataLog: parseVersion") {
+    withTempDir { dir =>
+      val metadataLog = new HDFSMetadataLog[String](spark, dir.getAbsolutePath)
+      def assertLogFileMalformed(func: => Int): Unit = {
+        val e = intercept[IllegalStateException] { func }
+        assert(e.getMessage.contains(s"Log file was malformed: failed to read correct log version"))
+      }
+      assertLogFileMalformed { metadataLog.parseVersion("", 100) }
+      assertLogFileMalformed { metadataLog.parseVersion("xyz", 100) }
+      assertLogFileMalformed { metadataLog.parseVersion("v10.x", 100) }
+      assertLogFileMalformed { metadataLog.parseVersion("10", 100) }
+      assertLogFileMalformed { metadataLog.parseVersion("v0", 100) }
+      assertLogFileMalformed { metadataLog.parseVersion("v-10", 100) }
+
+      assert(metadataLog.parseVersion("v10", 10) === 10)
+      assert(metadataLog.parseVersion("v10", 100) === 10)
+
+      val e = intercept[IllegalStateException] { metadataLog.parseVersion("v200", 100) }
+      Seq(
+        "maximum supported log version is v100, but encountered v200",
+        "produced by a newer version of Spark and cannot be read by this version"
+      ).foreach { message =>
+        assert(e.getMessage.contains(message))
+      }
+    }
+  }
+
   testWithUninterruptibleThread("HDFSMetadataLog: restart") {
     withTempDir { temp =>
       val metadataLog = new HDFSMetadataLog[String](spark, temp.getAbsolutePath)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala
index bb4274a162e8e..ee0db7c2a8d10 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.streaming
 import java.io.File
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.util.stringToFile
 import org.apache.spark.sql.test.SharedSQLContext
 
 class OffsetSeqLogSuite extends SparkFunSuite with SharedSQLContext {
@@ -70,6 +71,22 @@ class OffsetSeqLogSuite extends SparkFunSuite with SharedSQLContext {
     }
   }
 
+  test("deserialization log written by future version") {
+    withTempDir { dir =>
+      stringToFile(new File(dir, "0"), "v99999")
+      val log = new OffsetSeqLog(spark, dir.getCanonicalPath)
+      val e = intercept[IllegalStateException] {
+        log.get(0)
+      }
+      Seq(
+        s"maximum supported log version is v${OffsetSeqLog.VERSION}, but encountered v99999",
+        "produced by a newer version of Spark and cannot be read by this version"
+      ).foreach { message =>
+        assert(e.getMessage.contains(message))
+      }
+    }
+  }
+
   test("read Spark 2.1.0 log format") {
     val (batchId, offsetSeq) = readFromResource("offset-log-version-2.1.0")
     assert(batchId === 0)

From 5fb70831bd3acf7e1d9933986ccce12c3872432b Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Fri, 17 Mar 2017 11:12:23 -0700
Subject: [PATCH 0584/1204] [SPARK-19986][TESTS] Make
 pyspark.streaming.tests.CheckpointTests more stable

## What changes were proposed in this pull request?

Sometimes, CheckpointTests will hang on a busy machine because the streaming jobs are too slow and cannot catch up. I observed the scheduled delay was keeping increasing for dozens of seconds locally.

This PR increases the batch interval from 0.5 seconds to 2 seconds to generate less Spark jobs. It should make `pyspark.streaming.tests.CheckpointTests` more stable. I also replaced `sleep` with `awaitTerminationOrTimeout` so that if the streaming job fails, it will also fail the test.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #17323 from zsxwing/SPARK-19986.

(cherry picked from commit 376d782164437573880f0ad58cecae1cb5f212f2)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 python/pyspark/streaming/tests.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/python/pyspark/streaming/tests.py b/python/pyspark/streaming/tests.py
index 5ac007cd598b9..080aa3b55d268 100644
--- a/python/pyspark/streaming/tests.py
+++ b/python/pyspark/streaming/tests.py
@@ -903,11 +903,11 @@ def updater(vs, s):
         def setup():
             conf = SparkConf().set("spark.default.parallelism", 1)
             sc = SparkContext(conf=conf)
-            ssc = StreamingContext(sc, 0.5)
+            ssc = StreamingContext(sc, 2)
             dstream = ssc.textFileStream(inputd).map(lambda x: (x, 1))
             wc = dstream.updateStateByKey(updater)
             wc.map(lambda x: "%s,%d" % x).saveAsTextFiles(outputd + "test")
-            wc.checkpoint(.5)
+            wc.checkpoint(2)
             self.setupCalled = True
             return ssc
 
@@ -921,21 +921,22 @@ def setup():
 
         def check_output(n):
             while not os.listdir(outputd):
-                time.sleep(0.01)
+                if self.ssc.awaitTerminationOrTimeout(0.5):
+                    raise Exception("ssc stopped")
             time.sleep(1)  # make sure mtime is larger than the previous one
             with open(os.path.join(inputd, str(n)), 'w') as f:
                 f.writelines(["%d\n" % i for i in range(10)])
 
             while True:
+                if self.ssc.awaitTerminationOrTimeout(0.5):
+                    raise Exception("ssc stopped")
                 p = os.path.join(outputd, max(os.listdir(outputd)))
                 if '_SUCCESS' not in os.listdir(p):
                     # not finished
-                    time.sleep(0.01)
                     continue
                 ordd = self.ssc.sparkContext.textFile(p).map(lambda line: line.split(","))
                 d = ordd.values().map(int).collect()
                 if not d:
-                    time.sleep(0.01)
                     continue
                 self.assertEqual(10, len(d))
                 s = set(d)

From 780f6060c815561d1a82c8d6d698ac60f7605d09 Mon Sep 17 00:00:00 2001
From: Jacek Laskowski <jacek@japila.pl>
Date: Fri, 17 Mar 2017 21:55:10 -0700
Subject: [PATCH 0585/1204] [SQL][MINOR] Fix scaladoc for UDFRegistration

## What changes were proposed in this pull request?

Fix scaladoc for UDFRegistration

## How was this patch tested?

local build

Author: Jacek Laskowski <jacek@japila.pl>

Closes #17337 from jaceklaskowski/udfregistration-scaladoc.

(cherry picked from commit 6326d406b98a34e9cc8afa6743b23ee1cced8611)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../main/scala/org/apache/spark/sql/UDFRegistration.scala   | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
index 7abfa4ea37a74..a57673334c10b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -36,7 +36,11 @@ import org.apache.spark.sql.types.{DataType, DataTypes}
 import org.apache.spark.util.Utils
 
 /**
- * Functions for registering user-defined functions. Use `SQLContext.udf` to access this.
+ * Functions for registering user-defined functions. Use `SparkSession.udf` to access this:
+ *
+ * {{{
+ *   spark.udf
+ * }}}
  *
  * @note The user-defined functions must be deterministic.
  *

From b60f69025f42f8e4df69b22b27b37b55cd4212d5 Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Sun, 19 Mar 2017 10:37:15 -0700
Subject: [PATCH 0586/1204] [SPARK-18817][SPARKR][SQL] change derby log output
 to temp dir

## What changes were proposed in this pull request?

Passes R `tempdir()` (this is the R session temp dir, shared with other temp files/dirs) to JVM, set System.Property for derby home dir to move derby.log

## How was this patch tested?

Manually, unit tests

With this, these are relocated to under /tmp
```
# ls /tmp/RtmpG2M0cB/
derby.log
```
And they are removed automatically when the R session is ended.

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #16330 from felixcheung/rderby.

(cherry picked from commit 422aa67d1bb84f913b06e6d94615adb6557e2870)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/R/sparkR.R                              | 15 +++++++-
 R/pkg/inst/tests/testthat/test_sparkSQL.R     | 34 +++++++++++++++++++
 R/pkg/tests/run-all.R                         |  6 ++++
 .../scala/org/apache/spark/api/r/RRDD.scala   |  9 +++++
 4 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index 61773ed3ee8c0..d0a12b7ecec65 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -322,10 +322,19 @@ sparkRHive.init <- function(jsc = NULL) {
 #' SparkSession or initializes a new SparkSession.
 #' Additional Spark properties can be set in \code{...}, and these named parameters take priority
 #' over values in \code{master}, \code{appName}, named lists of \code{sparkConfig}.
-#' When called in an interactive session, this checks for the Spark installation, and, if not
+#'
+#' When called in an interactive session, this method checks for the Spark installation, and, if not
 #' found, it will be downloaded and cached automatically. Alternatively, \code{install.spark} can
 #' be called manually.
 #'
+#' A default warehouse is created automatically in the current directory when a managed table is
+#' created via \code{sql} statement \code{CREATE TABLE}, for example. To change the location of the
+#' warehouse, set the named parameter \code{spark.sql.warehouse.dir} to the SparkSession. Along with
+#' the warehouse, an accompanied metastore may also be automatically created in the current
+#' directory when a new SparkSession is initialized with \code{enableHiveSupport} set to
+#' \code{TRUE}, which is the default. For more details, refer to Hive configuration at
+#' \url{http://spark.apache.org/docs/latest/sql-programming-guide.html#hive-tables}.
+#'
 #' For details on how to initialize and use SparkR, refer to SparkR programming guide at
 #' \url{http://spark.apache.org/docs/latest/sparkr.html#starting-up-sparksession}.
 #'
@@ -381,6 +390,10 @@ sparkR.session <- function(
     deployMode <- sparkConfigMap[["spark.submit.deployMode"]]
   }
 
+  if (!exists("spark.r.sql.derby.temp.dir", envir = sparkConfigMap)) {
+    sparkConfigMap[["spark.r.sql.derby.temp.dir"]] <- tempdir()
+  }
+
   if (!exists(".sparkRjsc", envir = .sparkREnv)) {
     retHome <- sparkCheckInstall(sparkHome, master, deployMode)
     if (!is.null(retHome)) sparkHome <- retHome
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 9608fa1f77754..fcaa2e805e0d2 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -60,6 +60,7 @@ unsetHiveContext <- function() {
 
 # Tests for SparkSQL functions in SparkR
 
+filesBefore <- list.files(path = sparkRDir, all.files = TRUE)
 sparkSession <- sparkR.session()
 sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
 
@@ -2839,6 +2840,39 @@ test_that("Collect on DataFrame when NAs exists at the top of a timestamp column
   expect_equal(class(ldf3$col3), c("POSIXct", "POSIXt"))
 })
 
+compare_list <- function(list1, list2) {
+  # get testthat to show the diff by first making the 2 lists equal in length
+  expect_equal(length(list1), length(list2))
+  l <- max(length(list1), length(list2))
+  length(list1) <- l
+  length(list2) <- l
+  expect_equal(sort(list1, na.last = TRUE), sort(list2, na.last = TRUE))
+}
+
+# This should always be the **very last test** in this test file.
+test_that("No extra files are created in SPARK_HOME by starting session and making calls", {
+  # Check that it is not creating any extra file.
+  # Does not check the tempdir which would be cleaned up after.
+  filesAfter <- list.files(path = sparkRDir, all.files = TRUE)
+
+  expect_true(length(sparkRFilesBefore) > 0)
+  # first, ensure derby.log is not there
+  expect_false("derby.log" %in% filesAfter)
+  # second, ensure only spark-warehouse is created when calling SparkSession, enableHiveSupport = F
+  # note: currently all other test files have enableHiveSupport = F, so we capture the list of files
+  # before creating a SparkSession with enableHiveSupport = T at the top of this test file
+  # (filesBefore). The test here is to compare that (filesBefore) against the list of files before
+  # any test is run in run-all.R (sparkRFilesBefore).
+  # sparkRWhitelistSQLDirs is also defined in run-all.R, and should contain only 2 whitelisted dirs,
+  # here allow the first value, spark-warehouse, in the diff, everything else should be exactly the
+  # same as before any test is run.
+  compare_list(sparkRFilesBefore, setdiff(filesBefore, sparkRWhitelistSQLDirs[[1]]))
+  # third, ensure only spark-warehouse and metastore_db are created when enableHiveSupport = T
+  # note: as the note above, after running all tests in this file while enableHiveSupport = T, we
+  # check the list of files again. This time we allow both whitelisted dirs to be in the diff.
+  compare_list(sparkRFilesBefore, setdiff(filesAfter, sparkRWhitelistSQLDirs))
+})
+
 unlink(parquetPath)
 unlink(orcPath)
 unlink(jsonPath)
diff --git a/R/pkg/tests/run-all.R b/R/pkg/tests/run-all.R
index ab8d1ca019941..cefaadda6e215 100644
--- a/R/pkg/tests/run-all.R
+++ b/R/pkg/tests/run-all.R
@@ -22,6 +22,12 @@ library(SparkR)
 options("warn" = 2)
 
 # Setup global test environment
+sparkRDir <- file.path(Sys.getenv("SPARK_HOME"), "R")
+sparkRFilesBefore <- list.files(path = sparkRDir, all.files = TRUE)
+sparkRWhitelistSQLDirs <- c("spark-warehouse", "metastore_db")
+invisible(lapply(sparkRWhitelistSQLDirs,
+                 function(x) { unlink(file.path(sparkRDir, x), recursive = TRUE, force = TRUE)}))
+
 install.spark()
 
 test_package("SparkR")
diff --git a/core/src/main/scala/org/apache/spark/api/r/RRDD.scala b/core/src/main/scala/org/apache/spark/api/r/RRDD.scala
index a1a5eb8cf55e8..72ae0340aa3d1 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RRDD.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.api.r
 
+import java.io.File
 import java.util.{Map => JMap}
 
 import scala.collection.JavaConverters._
@@ -127,6 +128,14 @@ private[r] object RRDD {
       sparkConf.setExecutorEnv(name.toString, value.toString)
     }
 
+    if (sparkEnvirMap.containsKey("spark.r.sql.derby.temp.dir") &&
+        System.getProperty("derby.stream.error.file") == null) {
+      // This must be set before SparkContext is instantiated.
+      System.setProperty("derby.stream.error.file",
+                         Seq(sparkEnvirMap.get("spark.r.sql.derby.temp.dir").toString, "derby.log")
+                         .mkString(File.separator))
+    }
+
     val jsc = new JavaSparkContext(sparkConf)
     jars.foreach { jar =>
       jsc.addJar(jar)

From af8bf21836a71cf22fd8c8f13537560cc038f8a1 Mon Sep 17 00:00:00 2001
From: wangzhenhua <wangzhenhua@huawei.com>
Date: Mon, 20 Mar 2017 14:37:23 +0800
Subject: [PATCH 0587/1204] [SPARK-19994][SQL] Wrong outputOrdering for
 right/full outer smj

## What changes were proposed in this pull request?

For right outer join, values of the left key will be filled with nulls if it can't match the value of the right key, so `nullOrdering` of the left key can't be guaranteed. We should output right key order instead of left key order.

For full outer join, neither left key nor right key guarantees `nullOrdering`. We should not output any ordering.

In tests, besides adding three test cases for left/right/full outer sort merge join, this patch also reorganizes code in `PlannerSuite` by putting together tests for `Sort`, and also extracts common logic in Sort tests into a method.

## How was this patch tested?

Corresponding test cases are added.

Author: wangzhenhua <wangzhenhua@huawei.com>
Author: Zhenhua Wang <wzh_zju@163.com>

Closes #17331 from wzhfy/wrongOrdering.

(cherry picked from commit 965a5abcff3adccc10a53b0d97d06c43934df1a2)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../execution/joins/SortMergeJoinExec.scala   |  12 +-
 .../spark/sql/execution/PlannerSuite.scala    | 233 ++++++++++--------
 2 files changed, 146 insertions(+), 99 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
index ca9c0ed8cec32..a1f941644f807 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@@ -79,7 +79,17 @@ case class SortMergeJoinExec(
   override def requiredChildDistribution: Seq[Distribution] =
     ClusteredDistribution(leftKeys) :: ClusteredDistribution(rightKeys) :: Nil
 
-  override def outputOrdering: Seq[SortOrder] = requiredOrders(leftKeys)
+  override def outputOrdering: Seq[SortOrder] = joinType match {
+    // For left and right outer joins, the output is ordered by the streamed input's join keys.
+    case LeftOuter => requiredOrders(leftKeys)
+    case RightOuter => requiredOrders(rightKeys)
+    // There are null rows in both streams, so there is no order.
+    case FullOuter => Nil
+    case _: InnerLike | LeftExistence(_) => requiredOrders(leftKeys)
+    case x =>
+      throw new IllegalArgumentException(
+        s"${getClass.getSimpleName} should not take $x as the JoinType")
+  }
 
   override def requiredChildOrdering: Seq[Seq[SortOrder]] =
     requiredOrders(leftKeys) :: requiredOrders(rightKeys) :: Nil
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index 375da224aaa7f..6df80bca487df 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{execution, Row}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.Inner
+import org.apache.spark.sql.catalyst.plans.{FullOuter, Inner, LeftOuter, RightOuter}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Repartition}
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution.columnar.InMemoryRelation
@@ -250,7 +250,9 @@ class PlannerSuite extends SharedSQLContext {
     }
   }
 
-  // --- Unit tests of EnsureRequirements ---------------------------------------------------------
+  ///////////////////////////////////////////////////////////////////////////
+  // Unit tests of EnsureRequirements for Exchange
+  ///////////////////////////////////////////////////////////////////////////
 
   // When it comes to testing whether EnsureRequirements properly ensures distribution requirements,
   // there two dimensions that need to be considered: are the child partitionings compatible and
@@ -383,93 +385,6 @@ class PlannerSuite extends SharedSQLContext {
     }
   }
 
-  test("EnsureRequirements adds sort when there is no existing ordering") {
-    val orderingA = SortOrder(Literal(1), Ascending)
-    val orderingB = SortOrder(Literal(2), Ascending)
-    assert(orderingA != orderingB)
-    val inputPlan = DummySparkPlan(
-      children = DummySparkPlan(outputOrdering = Seq.empty) :: Nil,
-      requiredChildOrdering = Seq(Seq(orderingB)),
-      requiredChildDistribution = Seq(UnspecifiedDistribution)
-    )
-    val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(inputPlan)
-    assertDistributionRequirementsAreSatisfied(outputPlan)
-    if (outputPlan.collect { case s: SortExec => true }.isEmpty) {
-      fail(s"Sort should have been added:\n$outputPlan")
-    }
-  }
-
-  test("EnsureRequirements skips sort when required ordering is prefix of existing ordering") {
-    val orderingA = SortOrder(Literal(1), Ascending)
-    val orderingB = SortOrder(Literal(2), Ascending)
-    assert(orderingA != orderingB)
-    val inputPlan = DummySparkPlan(
-      children = DummySparkPlan(outputOrdering = Seq(orderingA, orderingB)) :: Nil,
-      requiredChildOrdering = Seq(Seq(orderingA)),
-      requiredChildDistribution = Seq(UnspecifiedDistribution)
-    )
-    val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(inputPlan)
-    assertDistributionRequirementsAreSatisfied(outputPlan)
-    if (outputPlan.collect { case s: SortExec => true }.nonEmpty) {
-      fail(s"No sorts should have been added:\n$outputPlan")
-    }
-  }
-
-  test("EnsureRequirements skips sort when required ordering is semantically equal to " +
-    "existing ordering") {
-    val exprId: ExprId = NamedExpression.newExprId
-    val attribute1 =
-      AttributeReference(
-        name = "col1",
-        dataType = LongType,
-        nullable = false
-      ) (exprId = exprId,
-        qualifier = Some("col1_qualifier")
-      )
-
-    val attribute2 =
-      AttributeReference(
-        name = "col1",
-        dataType = LongType,
-        nullable = false
-      ) (exprId = exprId)
-
-    val orderingA1 = SortOrder(attribute1, Ascending)
-    val orderingA2 = SortOrder(attribute2, Ascending)
-
-    assert(orderingA1 != orderingA2, s"$orderingA1 should NOT equal to $orderingA2")
-    assert(orderingA1.semanticEquals(orderingA2),
-      s"$orderingA1 should be semantically equal to $orderingA2")
-
-    val inputPlan = DummySparkPlan(
-      children = DummySparkPlan(outputOrdering = Seq(orderingA1)) :: Nil,
-      requiredChildOrdering = Seq(Seq(orderingA2)),
-      requiredChildDistribution = Seq(UnspecifiedDistribution)
-    )
-    val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(inputPlan)
-    assertDistributionRequirementsAreSatisfied(outputPlan)
-    if (outputPlan.collect { case s: SortExec => true }.nonEmpty) {
-      fail(s"No sorts should have been added:\n$outputPlan")
-    }
-  }
-
-  // This is a regression test for SPARK-11135
-  test("EnsureRequirements adds sort when required ordering isn't a prefix of existing ordering") {
-    val orderingA = SortOrder(Literal(1), Ascending)
-    val orderingB = SortOrder(Literal(2), Ascending)
-    assert(orderingA != orderingB)
-    val inputPlan = DummySparkPlan(
-      children = DummySparkPlan(outputOrdering = Seq(orderingA)) :: Nil,
-      requiredChildOrdering = Seq(Seq(orderingA, orderingB)),
-      requiredChildDistribution = Seq(UnspecifiedDistribution)
-    )
-    val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(inputPlan)
-    assertDistributionRequirementsAreSatisfied(outputPlan)
-    if (outputPlan.collect { case s: SortExec => true }.isEmpty) {
-      fail(s"Sort should have been added:\n$outputPlan")
-    }
-  }
-
   test("EnsureRequirements eliminates Exchange if child has Exchange with same partitioning") {
     val distribution = ClusteredDistribution(Literal(1) :: Nil)
     val finalPartitioning = HashPartitioning(Literal(1) :: Nil, 5)
@@ -480,7 +395,7 @@ class PlannerSuite extends SharedSQLContext {
         children = DummySparkPlan(outputPartitioning = childPartitioning) :: Nil,
         requiredChildDistribution = Seq(distribution),
         requiredChildOrdering = Seq(Seq.empty)),
-        None)
+      None)
 
     val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(inputPlan)
     assertDistributionRequirementsAreSatisfied(outputPlan)
@@ -509,8 +424,6 @@ class PlannerSuite extends SharedSQLContext {
     }
   }
 
-  // ---------------------------------------------------------------------------------------------
-
   test("Reuse exchanges") {
     val distribution = ClusteredDistribution(Literal(1) :: Nil)
     val finalPartitioning = HashPartitioning(Literal(1) :: Nil, 5)
@@ -524,12 +437,12 @@ class PlannerSuite extends SharedSQLContext {
       None)
 
     val inputPlan = SortMergeJoinExec(
-        Literal(1) :: Nil,
-        Literal(1) :: Nil,
-        Inner,
-        None,
-        shuffle,
-        shuffle)
+      Literal(1) :: Nil,
+      Literal(1) :: Nil,
+      Inner,
+      None,
+      shuffle,
+      shuffle)
 
     val outputPlan = ReuseExchange(spark.sessionState.conf).apply(inputPlan)
     if (outputPlan.collect { case e: ReusedExchangeExec => true }.size != 1) {
@@ -556,6 +469,130 @@ class PlannerSuite extends SharedSQLContext {
       fail(s"Should have only two shuffles:\n$outputPlan")
     }
   }
+
+  ///////////////////////////////////////////////////////////////////////////
+  // Unit tests of EnsureRequirements for Sort
+  ///////////////////////////////////////////////////////////////////////////
+
+  private val exprA = Literal(1)
+  private val exprB = Literal(2)
+  private val orderingA = SortOrder(exprA, Ascending)
+  private val orderingB = SortOrder(exprB, Ascending)
+  private val planA = DummySparkPlan(outputOrdering = Seq(orderingA),
+    outputPartitioning = HashPartitioning(exprA :: Nil, 5))
+  private val planB = DummySparkPlan(outputOrdering = Seq(orderingB),
+    outputPartitioning = HashPartitioning(exprB :: Nil, 5))
+
+  assert(orderingA != orderingB)
+
+  private def assertSortRequirementsAreSatisfied(
+      childPlan: SparkPlan,
+      requiredOrdering: Seq[SortOrder],
+      shouldHaveSort: Boolean): Unit = {
+    val inputPlan = DummySparkPlan(
+      children = childPlan :: Nil,
+      requiredChildOrdering = Seq(requiredOrdering),
+      requiredChildDistribution = Seq(UnspecifiedDistribution)
+    )
+    val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(inputPlan)
+    assertDistributionRequirementsAreSatisfied(outputPlan)
+    if (shouldHaveSort) {
+      if (outputPlan.collect { case s: SortExec => true }.isEmpty) {
+        fail(s"Sort should have been added:\n$outputPlan")
+      }
+    } else {
+      if (outputPlan.collect { case s: SortExec => true }.nonEmpty) {
+        fail(s"No sorts should have been added:\n$outputPlan")
+      }
+    }
+  }
+
+  test("EnsureRequirements for sort operator after left outer sort merge join") {
+    // Only left key is sorted after left outer SMJ (thus doesn't need a sort).
+    val leftSmj = SortMergeJoinExec(exprA :: Nil, exprB :: Nil, LeftOuter, None, planA, planB)
+    Seq((orderingA, false), (orderingB, true)).foreach { case (ordering, needSort) =>
+      assertSortRequirementsAreSatisfied(
+        childPlan = leftSmj,
+        requiredOrdering = Seq(ordering),
+        shouldHaveSort = needSort)
+    }
+  }
+
+  test("EnsureRequirements for sort operator after right outer sort merge join") {
+    // Only right key is sorted after right outer SMJ (thus doesn't need a sort).
+    val rightSmj = SortMergeJoinExec(exprA :: Nil, exprB :: Nil, RightOuter, None, planA, planB)
+    Seq((orderingA, true), (orderingB, false)).foreach { case (ordering, needSort) =>
+      assertSortRequirementsAreSatisfied(
+        childPlan = rightSmj,
+        requiredOrdering = Seq(ordering),
+        shouldHaveSort = needSort)
+    }
+  }
+
+  test("EnsureRequirements adds sort after full outer sort merge join") {
+    // Neither keys is sorted after full outer SMJ, so they both need sorts.
+    val fullSmj = SortMergeJoinExec(exprA :: Nil, exprB :: Nil, FullOuter, None, planA, planB)
+    Seq(orderingA, orderingB).foreach { ordering =>
+      assertSortRequirementsAreSatisfied(
+        childPlan = fullSmj,
+        requiredOrdering = Seq(ordering),
+        shouldHaveSort = true)
+    }
+  }
+
+  test("EnsureRequirements adds sort when there is no existing ordering") {
+    assertSortRequirementsAreSatisfied(
+      childPlan = DummySparkPlan(outputOrdering = Seq.empty),
+      requiredOrdering = Seq(orderingB),
+      shouldHaveSort = true)
+  }
+
+  test("EnsureRequirements skips sort when required ordering is prefix of existing ordering") {
+    assertSortRequirementsAreSatisfied(
+      childPlan = DummySparkPlan(outputOrdering = Seq(orderingA, orderingB)),
+      requiredOrdering = Seq(orderingA),
+      shouldHaveSort = false)
+  }
+
+  test("EnsureRequirements skips sort when required ordering is semantically equal to " +
+    "existing ordering") {
+    val exprId: ExprId = NamedExpression.newExprId
+    val attribute1 =
+      AttributeReference(
+        name = "col1",
+        dataType = LongType,
+        nullable = false
+      ) (exprId = exprId,
+        qualifier = Some("col1_qualifier")
+      )
+
+    val attribute2 =
+      AttributeReference(
+        name = "col1",
+        dataType = LongType,
+        nullable = false
+      ) (exprId = exprId)
+
+    val orderingA1 = SortOrder(attribute1, Ascending)
+    val orderingA2 = SortOrder(attribute2, Ascending)
+
+    assert(orderingA1 != orderingA2, s"$orderingA1 should NOT equal to $orderingA2")
+    assert(orderingA1.semanticEquals(orderingA2),
+      s"$orderingA1 should be semantically equal to $orderingA2")
+
+    assertSortRequirementsAreSatisfied(
+      childPlan = DummySparkPlan(outputOrdering = Seq(orderingA1)),
+      requiredOrdering = Seq(orderingA2),
+      shouldHaveSort = false)
+  }
+
+  // This is a regression test for SPARK-11135
+  test("EnsureRequirements adds sort when required ordering isn't a prefix of existing ordering") {
+    assertSortRequirementsAreSatisfied(
+      childPlan = DummySparkPlan(outputOrdering = Seq(orderingA)),
+      requiredOrdering = Seq(orderingA, orderingB),
+      shouldHaveSort = true)
+  }
 }
 
 // Used for unit-testing EnsureRequirements

From d205d40aed511aca62d16911a59ed014a2786db0 Mon Sep 17 00:00:00 2001
From: Michael Allman <michael@videoamp.com>
Date: Tue, 21 Mar 2017 11:51:22 +0800
Subject: [PATCH 0588/1204] [SPARK-17204][CORE] Fix replicated off heap storage

(Jira: https://issues.apache.org/jira/browse/SPARK-17204)

## What changes were proposed in this pull request?

There are a couple of bugs in the `BlockManager` with respect to support for replicated off-heap storage. First, the locally-stored off-heap byte buffer is disposed of when it is replicated. It should not be. Second, the replica byte buffers are stored as heap byte buffers instead of direct byte buffers even when the storage level memory mode is off-heap. This PR addresses both of these problems.

## How was this patch tested?

`BlockManagerReplicationSuite` was enhanced to fill in the coverage gaps. It now fails if either of the bugs in this PR exist.

Author: Michael Allman <michael@videoamp.com>

Closes #16499 from mallman/spark-17204-replicated_off_heap_storage.

(cherry picked from commit 7fa116f8fc77906202217c0cd2f9718a4e62632b)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/storage/BlockManager.scala   | 23 ++++++--
 .../apache/spark/storage/StorageUtils.scala   | 52 ++++++++++++++++---
 .../spark/util/ByteBufferInputStream.scala    |  8 +--
 .../spark/util/io/ChunkedByteBuffer.scala     | 27 ++++++++--
 .../BlockManagerReplicationSuite.scala        | 20 +++++--
 5 files changed, 105 insertions(+), 25 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index cdf48e430caf5..35551e41213a3 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -318,6 +318,9 @@ private[spark] class BlockManager(
 
   /**
    * Put the block locally, using the given storage level.
+   *
+   * '''Important!''' Callers must not mutate or release the data buffer underlying `bytes`. Doing
+   * so may corrupt or change the data stored by the `BlockManager`.
    */
   override def putBlockData(
       blockId: BlockId,
@@ -756,6 +759,9 @@ private[spark] class BlockManager(
   /**
    * Put a new block of serialized bytes to the block manager.
    *
+   * '''Important!''' Callers must not mutate or release the data buffer underlying `bytes`. Doing
+   * so may corrupt or change the data stored by the `BlockManager`.
+   *
    * @param encrypt If true, asks the block manager to encrypt the data block before storing,
    *                when I/O encryption is enabled. This is required for blocks that have been
    *                read from unencrypted sources, since all the BlockManager read APIs
@@ -774,7 +780,7 @@ private[spark] class BlockManager(
       if (encrypt && securityManager.ioEncryptionKey.isDefined) {
         try {
           val data = bytes.toByteBuffer
-          val in = new ByteBufferInputStream(data, true)
+          val in = new ByteBufferInputStream(data)
           val byteBufOut = new ByteBufferOutputStream(data.remaining())
           val out = CryptoStreamUtils.createCryptoOutputStream(byteBufOut, conf,
             securityManager.ioEncryptionKey.get)
@@ -801,6 +807,9 @@ private[spark] class BlockManager(
    *
    * If the block already exists, this method will not overwrite it.
    *
+   * '''Important!''' Callers must not mutate or release the data buffer underlying `bytes`. Doing
+   * so may corrupt or change the data stored by the `BlockManager`.
+   *
    * @param keepReadLock if true, this method will hold the read lock when it returns (even if the
    *                     block already exists). If false, this method will hold no locks when it
    *                     returns.
@@ -844,7 +853,15 @@ private[spark] class BlockManager(
               false
           }
         } else {
-          memoryStore.putBytes(blockId, size, level.memoryMode, () => bytes)
+          val memoryMode = level.memoryMode
+          memoryStore.putBytes(blockId, size, memoryMode, () => {
+            if (memoryMode == MemoryMode.OFF_HEAP &&
+                bytes.chunks.exists(buffer => !buffer.isDirect)) {
+              bytes.copy(Platform.allocateDirectBuffer)
+            } else {
+              bytes
+            }
+          })
         }
         if (!putSucceeded && level.useDisk) {
           logWarning(s"Persisting block $blockId to disk instead.")
@@ -1049,7 +1066,7 @@ private[spark] class BlockManager(
           try {
             replicate(blockId, bytesToReplicate, level, remoteClassTag)
           } finally {
-            bytesToReplicate.dispose()
+            bytesToReplicate.unmap()
           }
           logDebug("Put block %s remotely took %s"
             .format(blockId, Utils.getUsedTimeMs(remoteStartTime)))
diff --git a/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala b/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
index e12f2e6095d5a..5efdd23f79a21 100644
--- a/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
+++ b/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
@@ -236,22 +236,60 @@ class StorageStatus(val blockManagerId: BlockManagerId, val maxMem: Long) {
 
 /** Helper methods for storage-related objects. */
 private[spark] object StorageUtils extends Logging {
+  // Ewwww... Reflection!!! See the unmap method for justification
+  private val memoryMappedBufferFileDescriptorField = {
+    val mappedBufferClass = classOf[java.nio.MappedByteBuffer]
+    val fdField = mappedBufferClass.getDeclaredField("fd")
+    fdField.setAccessible(true)
+    fdField
+  }
 
   /**
-   * Attempt to clean up a ByteBuffer if it is memory-mapped. This uses an *unsafe* Sun API that
-   * might cause errors if one attempts to read from the unmapped buffer, but it's better than
-   * waiting for the GC to find it because that could lead to huge numbers of open files. There's
-   * unfortunately no standard API to do this.
+   * Attempt to clean up a ByteBuffer if it is direct or memory-mapped. This uses an *unsafe* Sun
+   * API that will cause errors if one attempts to read from the disposed buffer. However, neither
+   * the bytes allocated to direct buffers nor file descriptors opened for memory-mapped buffers put
+   * pressure on the garbage collector. Waiting for garbage collection may lead to the depletion of
+   * off-heap memory or huge numbers of open files. There's unfortunately no standard API to
+   * manually dispose of these kinds of buffers.
+   *
+   * See also [[unmap]]
    */
   def dispose(buffer: ByteBuffer): Unit = {
     if (buffer != null && buffer.isInstanceOf[MappedByteBuffer]) {
-      logTrace(s"Unmapping $buffer")
-      if (buffer.asInstanceOf[DirectBuffer].cleaner() != null) {
-        buffer.asInstanceOf[DirectBuffer].cleaner().clean()
+      logTrace(s"Disposing of $buffer")
+      cleanDirectBuffer(buffer.asInstanceOf[DirectBuffer])
+    }
+  }
+
+  /**
+   * Attempt to unmap a ByteBuffer if it is memory-mapped. This uses an *unsafe* Sun API that will
+   * cause errors if one attempts to read from the unmapped buffer. However, the file descriptors of
+   * memory-mapped buffers do not put pressure on the garbage collector. Waiting for garbage
+   * collection may lead to huge numbers of open files. There's unfortunately no standard API to
+   * manually unmap memory-mapped buffers.
+   *
+   * See also [[dispose]]
+   */
+  def unmap(buffer: ByteBuffer): Unit = {
+    if (buffer != null && buffer.isInstanceOf[MappedByteBuffer]) {
+      // Note that direct buffers are instances of MappedByteBuffer. As things stand in Java 8, the
+      // JDK does not provide a public API to distinguish between direct buffers and memory-mapped
+      // buffers. As an alternative, we peek beneath the curtains and look for a non-null file
+      // descriptor in mappedByteBuffer
+      if (memoryMappedBufferFileDescriptorField.get(buffer) != null) {
+        logTrace(s"Unmapping $buffer")
+        cleanDirectBuffer(buffer.asInstanceOf[DirectBuffer])
       }
     }
   }
 
+  private def cleanDirectBuffer(buffer: DirectBuffer) = {
+    val cleaner = buffer.cleaner()
+    if (cleaner != null) {
+      cleaner.clean()
+    }
+  }
+
   /**
    * Update the given list of RDDInfo with the given list of storage statuses.
    * This method overwrites the old values stored in the RDDInfo's.
diff --git a/core/src/main/scala/org/apache/spark/util/ByteBufferInputStream.scala b/core/src/main/scala/org/apache/spark/util/ByteBufferInputStream.scala
index dce2ac63a664c..50dc948e6c410 100644
--- a/core/src/main/scala/org/apache/spark/util/ByteBufferInputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/ByteBufferInputStream.scala
@@ -23,11 +23,10 @@ import java.nio.ByteBuffer
 import org.apache.spark.storage.StorageUtils
 
 /**
- * Reads data from a ByteBuffer, and optionally cleans it up using StorageUtils.dispose()
- * at the end of the stream (e.g. to close a memory-mapped file).
+ * Reads data from a ByteBuffer.
  */
 private[spark]
-class ByteBufferInputStream(private var buffer: ByteBuffer, dispose: Boolean = false)
+class ByteBufferInputStream(private var buffer: ByteBuffer)
   extends InputStream {
 
   override def read(): Int = {
@@ -72,9 +71,6 @@ class ByteBufferInputStream(private var buffer: ByteBuffer, dispose: Boolean = f
    */
   private def cleanUp() {
     if (buffer != null) {
-      if (dispose) {
-        StorageUtils.dispose(buffer)
-      }
       buffer = null
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala
index da08661d137d0..cdafbca8cb85f 100644
--- a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala
+++ b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala
@@ -86,7 +86,11 @@ private[spark] class ChunkedByteBuffer(var chunks: Array[ByteBuffer]) {
   }
 
   /**
-   * Copy this buffer into a new ByteBuffer.
+   * Convert this buffer to a ByteBuffer. If this buffer is backed by a single chunk, its underlying
+   * data will not be copied. Instead, it will be duplicated. If this buffer is backed by multiple
+   * chunks, the data underlying this buffer will be copied into a new byte buffer. As a result, it
+   * is suggested to use this method only if the caller does not need to manage the memory
+   * underlying this buffer.
    *
    * @throws UnsupportedOperationException if this buffer's size exceeds the max ByteBuffer size.
    */
@@ -132,10 +136,10 @@ private[spark] class ChunkedByteBuffer(var chunks: Array[ByteBuffer]) {
   }
 
   /**
-   * Attempt to clean up a ByteBuffer if it is memory-mapped. This uses an *unsafe* Sun API that
-   * might cause errors if one attempts to read from the unmapped buffer, but it's better than
-   * waiting for the GC to find it because that could lead to huge numbers of open files. There's
-   * unfortunately no standard API to do this.
+   * Attempt to clean up any ByteBuffer in this ChunkedByteBuffer which is direct or memory-mapped.
+   * See [[StorageUtils.dispose]] for more information.
+   *
+   * See also [[unmap]]
    */
   def dispose(): Unit = {
     if (!disposed) {
@@ -143,6 +147,19 @@ private[spark] class ChunkedByteBuffer(var chunks: Array[ByteBuffer]) {
       disposed = true
     }
   }
+
+  /**
+   * Attempt to unmap any ByteBuffer in this ChunkedByteBuffer if it is memory-mapped. See
+   * [[StorageUtils.unmap]] for more information.
+   *
+   * See also [[dispose]]
+   */
+  def unmap(): Unit = {
+    if (!disposed) {
+      chunks.foreach(StorageUtils.unmap)
+      disposed = true
+    }
+  }
 }
 
 /**
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
index f4bfdc2fd69a9..264771281ba21 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
@@ -375,7 +375,8 @@ class BlockManagerReplicationSuite extends SparkFunSuite
       // Put the block into one of the stores
       val blockId = new TestBlockId(
         "block-with-" + storageLevel.description.replace(" ", "-").toLowerCase)
-      stores(0).putSingle(blockId, new Array[Byte](blockSize), storageLevel)
+      val testValue = Array.fill[Byte](blockSize)(1)
+      stores(0).putSingle(blockId, testValue, storageLevel)
 
       // Assert that master know two locations for the block
       val blockLocations = master.getLocations(blockId).map(_.executorId).toSet
@@ -387,12 +388,23 @@ class BlockManagerReplicationSuite extends SparkFunSuite
         testStore => blockLocations.contains(testStore.blockManagerId.executorId)
       }.foreach { testStore =>
         val testStoreName = testStore.blockManagerId.executorId
-        assert(
-          testStore.getLocalValues(blockId).isDefined, s"$blockId was not found in $testStoreName")
-        testStore.releaseLock(blockId)
+        val blockResultOpt = testStore.getLocalValues(blockId)
+        assert(blockResultOpt.isDefined, s"$blockId was not found in $testStoreName")
+        val localValues = blockResultOpt.get.data.toSeq
+        assert(localValues.size == 1)
+        assert(localValues.head === testValue)
         assert(master.getLocations(blockId).map(_.executorId).toSet.contains(testStoreName),
           s"master does not have status for ${blockId.name} in $testStoreName")
 
+        val memoryStore = testStore.memoryStore
+        if (memoryStore.contains(blockId) && !storageLevel.deserialized) {
+          memoryStore.getBytes(blockId).get.chunks.foreach { byteBuffer =>
+            assert(storageLevel.useOffHeap == byteBuffer.isDirect,
+              s"memory mode ${storageLevel.memoryMode} is not compatible with " +
+                byteBuffer.getClass.getSimpleName)
+          }
+        }
+
         val blockStatus = master.getBlockStatus(blockId)(testStore.blockManagerId)
 
         // Assert that block status in the master for this store has expected storage level

From c4c7b18576564135f6a91b345b2b7560309fdecd Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Tue, 21 Mar 2017 12:17:26 +0800
Subject: [PATCH 0589/1204] [SPARK-19912][SQL] String literals should be
 escaped for Hive metastore partition pruning

## What changes were proposed in this pull request?

Since current `HiveShim`'s `convertFilters` does not escape the string literals. There exists the following correctness issues. This PR aims to return the correct result and also shows the more clear exception message.

**BEFORE**

```scala
scala> Seq((1, "p1", "q1"), (2, "p1\" and q=\"q1", "q2")).toDF("a", "p", "q").write.partitionBy("p", "q").saveAsTable("t1")

scala> spark.table("t1").filter($"p" === "p1\" and q=\"q1").select($"a").show
+---+
|  a|
+---+
+---+

scala> spark.table("t1").filter($"p" === "'\"").select($"a").show
java.lang.RuntimeException: Caught Hive MetaException attempting to get partition metadata by filter from ...
```

**AFTER**

```scala
scala> spark.table("t1").filter($"p" === "p1\" and q=\"q1").select($"a").show
+---+
|  a|
+---+
|  2|
+---+

scala> spark.table("t1").filter($"p" === "'\"").select($"a").show
java.lang.UnsupportedOperationException: Partition filter cannot have both `"` and `'` characters
```

## How was this patch tested?

Pass the Jenkins test with new test cases.

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #17266 from dongjoon-hyun/SPARK-19912.

(cherry picked from commit 21e366aea5a7f49e42e78dce06ff6b3ee1e36f06)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/hive/client/HiveShim.scala  | 16 ++++++++++++++--
 .../spark/sql/hive/client/FiltersSuite.scala     |  5 +++++
 .../spark/sql/hive/execution/SQLQuerySuite.scala | 16 ++++++++++++++++
 3 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index 87f58e5f1aa37..dd8e5c6da08c9 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -569,13 +569,24 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
         s"$v ${op.symbol} ${a.name}"
       case op @ BinaryComparison(a: Attribute, Literal(v, _: StringType))
           if !varcharKeys.contains(a.name) =>
-        s"""${a.name} ${op.symbol} "$v""""
+        s"""${a.name} ${op.symbol} ${quoteStringLiteral(v.toString)}"""
       case op @ BinaryComparison(Literal(v, _: StringType), a: Attribute)
           if !varcharKeys.contains(a.name) =>
-        s""""$v" ${op.symbol} ${a.name}"""
+        s"""${quoteStringLiteral(v.toString)} ${op.symbol} ${a.name}"""
     }.mkString(" and ")
   }
 
+  private def quoteStringLiteral(str: String): String = {
+    if (!str.contains("\"")) {
+      s""""$str""""
+    } else if (!str.contains("'")) {
+      s"""'$str'"""
+    } else {
+      throw new UnsupportedOperationException(
+        """Partition filter cannot have both `"` and `'` characters""")
+    }
+  }
+
   override def getPartitionsByFilter(
       hive: Hive,
       table: Table,
@@ -584,6 +595,7 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
     // Hive getPartitionsByFilter() takes a string that represents partition
     // predicates like "str_key=\"value\" and int_key=1 ..."
     val filter = convertFilters(table, predicates)
+
     val partitions =
       if (filter.isEmpty) {
         getAllPartitionsMethod.invoke(hive, table).asInstanceOf[JSet[Partition]]
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
index cd96c85f3e209..031c1a5ec0ec3 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
@@ -65,6 +65,11 @@ class FiltersSuite extends SparkFunSuite with Logging {
     (Literal("") === a("varchar", StringType)) :: Nil,
     "")
 
+  filterTest("SPARK-19912 String literals should be escaped for Hive metastore partition pruning",
+    (a("stringcol", StringType) === Literal("p1\" and q=\"q1")) ::
+      (Literal("p2\" and q=\"q2") === a("stringcol", StringType)) :: Nil,
+    """stringcol = 'p1" and q="q1' and 'p2" and q="q2' = stringcol""")
+
   private def filterTest(name: String, filters: Seq[Expression], result: String) = {
     test(name) {
       val converted = shim.convertFilters(testTable, filters)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index e607af67f93e5..1619115007441 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -2015,4 +2015,20 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     val attempt = Try(Process(command).run(ProcessLogger(_ => ())).exitValue())
     attempt.isSuccess && attempt.get == 0
   }
+
+  test("SPARK-19912 String literals should be escaped for Hive metastore partition pruning") {
+    withTable("spark_19912") {
+      Seq(
+        (1, "p1", "q1"),
+        (2, "'", "q2"),
+        (3, "\"", "q3"),
+        (4, "p1\" and q=\"q1", "q4")
+      ).toDF("a", "p", "q").write.partitionBy("p", "q").saveAsTable("spark_19912")
+
+      val table = spark.table("spark_19912")
+      checkAnswer(table.filter($"p" === "'").select($"a"), Row(2))
+      checkAnswer(table.filter($"p" === "\"").select($"a"), Row(3))
+      checkAnswer(table.filter($"p" === "p1\" and q=\"q1").select($"a"), Row(4))
+    }
+  }
 }

From a88c88aacc6f659fc4086caf74c03cd500068b94 Mon Sep 17 00:00:00 2001
From: zhaorongsheng <334362872@qq.com>
Date: Tue, 21 Mar 2017 11:30:55 -0700
Subject: [PATCH 0590/1204] [SPARK-20017][SQL] change the nullability of
 function 'StringToMap' from 'false' to 'true'

## What changes were proposed in this pull request?

Change the nullability of function `StringToMap` from `false` to `true`.

Author: zhaorongsheng <334362872@qq.com>

Closes #17350 from zhaorongsheng/bug-fix_strToMap_NPE.

(cherry picked from commit 7dbc162f12cc1a447c85a1a2c20d32ebb5cbeacf)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../sql/catalyst/expressions/complexTypeCreator.scala      | 4 +++-
 .../spark/sql/catalyst/expressions/ComplexTypeSuite.scala  | 7 +++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index 599fb638db32a..3df2ed8be0650 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -346,6 +346,8 @@ case class CreateNamedStructUnsafe(children: Seq[Expression]) extends CreateName
     Examples:
       > SELECT _FUNC_('a:1,b:2,c:3', ',', ':');
        map("a":"1","b":"2","c":"3")
+      > SELECT _FUNC_('a');
+       map("a":null)
   """)
 // scalastyle:on line.size.limit
 case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: Expression)
@@ -363,7 +365,7 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E
 
   override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType, StringType)
 
-  override def dataType: DataType = MapType(StringType, StringType, valueContainsNull = false)
+  override def dataType: DataType = MapType(StringType, StringType)
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (Seq(pairDelim, keyValueDelim).exists(! _.foldable)) {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
index c21c6de32c0ba..5f124729702bf 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
@@ -247,6 +247,9 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("StringToMap") {
+    val expectedDataType = MapType(StringType, StringType, valueContainsNull = true)
+    assert(new StringToMap("").dataType === expectedDataType)
+
     val s0 = Literal("a:1,b:2,c:3")
     val m0 = Map("a" -> "1", "b" -> "2", "c" -> "3")
     checkEvaluation(new StringToMap(s0), m0)
@@ -267,6 +270,10 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
     val m4 = Map("a" -> "1", "b" -> "2", "c" -> "3")
     checkEvaluation(new StringToMap(s4, Literal("_")), m4)
 
+    val s5 = Literal("a")
+    val m5 = Map("a" -> null)
+    checkEvaluation(new StringToMap(s5), m5)
+
     // arguments checking
     assert(new StringToMap(Literal("a:1,b:2,c:3")).checkInputDataTypes().isSuccess)
     assert(new StringToMap(Literal(null)).checkInputDataTypes().isFailure)

From 5c18b6c316509430823f4edfabe834d8143481e3 Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Tue, 21 Mar 2017 14:24:41 -0700
Subject: [PATCH 0591/1204] [SPARK-19237][SPARKR][CORE] On Windows spark-submit
 should handle when java is not installed

## What changes were proposed in this pull request?

When SparkR is installed as a R package there might not be any java runtime.
If it is not there SparkR's `sparkR.session()` will block waiting for the connection timeout, hanging the R IDE/shell, without any notification or message.

## How was this patch tested?

manually

- [x] need to test on Windows

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #16596 from felixcheung/rcheckjava.

(cherry picked from commit a8877bdbba6df105740f909bc87a13cdd4440757)
Signed-off-by: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
---
 R/pkg/inst/tests/testthat/test_Windows.R |  1 +
 bin/spark-class2.cmd                     | 11 ++++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/R/pkg/inst/tests/testthat/test_Windows.R b/R/pkg/inst/tests/testthat/test_Windows.R
index e8d983426a676..1d777ddb286df 100644
--- a/R/pkg/inst/tests/testthat/test_Windows.R
+++ b/R/pkg/inst/tests/testthat/test_Windows.R
@@ -20,6 +20,7 @@ test_that("sparkJars tag in SparkContext", {
   if (.Platform$OS.type != "windows") {
     skip("This test is only for Windows, skipped")
   }
+
   testOutput <- launchScript("ECHO", "a/b/c", wait = TRUE)
   abcPath <- testOutput[1]
   expect_equal(abcPath, "a\\b\\c")
diff --git a/bin/spark-class2.cmd b/bin/spark-class2.cmd
index 869c0b202f7f3..9faa7d65f83e4 100644
--- a/bin/spark-class2.cmd
+++ b/bin/spark-class2.cmd
@@ -50,7 +50,16 @@ if not "x%SPARK_PREPEND_CLASSES%"=="x" (
 
 rem Figure out where java is.
 set RUNNER=java
-if not "x%JAVA_HOME%"=="x" set RUNNER=%JAVA_HOME%\bin\java
+if not "x%JAVA_HOME%"=="x" (
+  set RUNNER="%JAVA_HOME%\bin\java"
+) else (
+  where /q "%RUNNER%"
+  if ERRORLEVEL 1 (
+    echo Java not found and JAVA_HOME environment variable is not set.
+    echo Install Java and set JAVA_HOME to point to the Java installation directory.
+    exit /b 1
+  )
+)
 
 rem The launcher library prints the command to be executed in a single line suitable for being
 rem executed by the batch interpreter. So read all the output of the launcher into a variable.

From 9dfdd2adff508d256ae392ebe1b29f721931cf5e Mon Sep 17 00:00:00 2001
From: Will Manning <lwwmanning@gmail.com>
Date: Wed, 22 Mar 2017 00:40:48 +0100
Subject: [PATCH 0592/1204] clarify array_contains function description

## What changes were proposed in this pull request?

The description in the comment for array_contains is vague/incomplete (i.e., doesn't mention that it returns `null` if the array is `null`); this PR fixes that.

## How was this patch tested?

No testing, since it merely changes a comment.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Will Manning <lwwmanning@gmail.com>

Closes #17380 from lwwmanning/patch-1.

(cherry picked from commit a04dcde8cb191e591a5f5d7a67a5371e31e7343c)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 sql/core/src/main/scala/org/apache/spark/sql/functions.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 9a080fd3c97c1..fab92021b5b95 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2854,7 +2854,7 @@ object functions {
   //////////////////////////////////////////////////////////////////////////////////////////////
 
   /**
-   * Returns true if the array contains `value`
+   * Returns null if the array is null, true if the array contains `value`, and false otherwise.
    * @group collection_funcs
    * @since 1.5.0
    */

From a04428fe26b5b3ad998a88c81c829050fe4a0256 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Wed, 22 Mar 2017 08:37:54 +0800
Subject: [PATCH 0593/1204] [SPARK-19980][SQL][BACKPORT-2.1] Add NULL checks in
 Bean serializer

## What changes were proposed in this pull request?
A Bean serializer in `ExpressionEncoder`  could change values when Beans having NULL. A concrete example is as follows;
```
scala> :paste
class Outer extends Serializable {
  private var cls: Inner = _
  def setCls(c: Inner): Unit = cls = c
  def getCls(): Inner = cls
}

class Inner extends Serializable {
  private var str: String = _
  def setStr(s: String): Unit = str = str
  def getStr(): String = str
}

scala> Seq("""{"cls":null}""", """{"cls": {"str":null}}""").toDF().write.text("data")
scala> val encoder = Encoders.bean(classOf[Outer])
scala> val schema = encoder.schema
scala> val df = spark.read.schema(schema).json("data").as[Outer](encoder)
scala> df.show
+------+
|   cls|
+------+
|[null]|
|  null|
+------+

scala> df.map(x => x)(encoder).show()
+------+
|   cls|
+------+
|[null]|
|[null]|     // <-- Value changed
+------+
```

This is because the Bean serializer does not have the NULL-check expressions that the serializer of Scala's product types has. Actually, this value change does not happen in Scala's product types;

```
scala> :paste
case class Outer(cls: Inner)
case class Inner(str: String)

scala> val encoder = Encoders.product[Outer]
scala> val schema = encoder.schema
scala> val df = spark.read.schema(schema).json("data").as[Outer](encoder)
scala> df.show
+------+
|   cls|
+------+
|[null]|
|  null|
+------+

scala> df.map(x => x)(encoder).show()
+------+
|   cls|
+------+
|[null]|
|  null|
+------+
```

This pr added the NULL-check expressions in Bean serializer along with the serializer of Scala's product types.

## How was this patch tested?
Added tests in `JavaDatasetSuite`.

Author: Takeshi Yamamuro <yamamuro@apache.org>

Closes #17372 from maropu/SPARK-19980-BACKPORT2.1.
---
 .../sql/catalyst/JavaTypeInference.scala      | 11 +++++++--
 .../apache/spark/sql/JavaDatasetSuite.java    | 24 +++++++++++++++++++
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
index 61c153c10e47c..2de066f99498d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
@@ -334,7 +334,11 @@ object JavaTypeInference {
    */
   def serializerFor(beanClass: Class[_]): CreateNamedStruct = {
     val inputObject = BoundReference(0, ObjectType(beanClass), nullable = true)
-    serializerFor(inputObject, TypeToken.of(beanClass)).asInstanceOf[CreateNamedStruct]
+    val nullSafeInput = AssertNotNull(inputObject, Seq("top level input bean"))
+    serializerFor(nullSafeInput, TypeToken.of(beanClass)) match {
+      case expressions.If(_, _, s: CreateNamedStruct) => s
+      case other => CreateNamedStruct(expressions.Literal("value") :: other :: Nil)
+    }
   }
 
   private def serializerFor(inputObject: Expression, typeToken: TypeToken[_]): Expression = {
@@ -417,7 +421,7 @@ object JavaTypeInference {
         case other =>
           val properties = getJavaBeanProperties(other)
           if (properties.length > 0) {
-            CreateNamedStruct(properties.flatMap { p =>
+            val nonNullOutput = CreateNamedStruct(properties.flatMap { p =>
               val fieldName = p.getName
               val fieldType = typeToken.method(p.getReadMethod).getReturnType
               val fieldValue = Invoke(
@@ -426,6 +430,9 @@ object JavaTypeInference {
                 inferExternalType(fieldType.getRawType))
               expressions.Literal(fieldName) :: serializerFor(fieldValue, fieldType) :: Nil
             })
+
+            val nullOutput = expressions.Literal.create(null, nonNullOutput.dataType)
+            expressions.If(IsNull(inputObject), nullOutput, nonNullOutput)
           } else {
             throw new UnsupportedOperationException(
               s"Cannot infer type for class ${other.getName} because it is not bean-compliant")
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
index 8304b728aa238..b25e3493c17b6 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
@@ -1305,4 +1305,28 @@ public void test() {
       spark.createDataset(data, Encoders.bean(NestedComplicatedJavaBean.class));
     ds.collectAsList();
   }
+
+  @Test(expected = RuntimeException.class)
+  public void testNullInTopLevelBean() {
+    NestedSmallBean bean = new NestedSmallBean();
+    // We cannot set null in top-level bean
+    spark.createDataset(Arrays.asList(bean, null), Encoders.bean(NestedSmallBean.class));
+  }
+
+  @Test
+  public void testSerializeNull() {
+    NestedSmallBean bean = new NestedSmallBean();
+    Encoder<NestedSmallBean> encoder = Encoders.bean(NestedSmallBean.class);
+    List<NestedSmallBean> beans = Arrays.asList(bean);
+    Dataset<NestedSmallBean> ds1 = spark.createDataset(beans, encoder);
+    Assert.assertEquals(beans, ds1.collectAsList());
+    Dataset<NestedSmallBean> ds2 =
+      ds1.map(new MapFunction<NestedSmallBean, NestedSmallBean>() {
+        @Override
+        public NestedSmallBean call(NestedSmallBean b) throws Exception {
+          return b;
+        }
+      }, encoder);
+    Assert.assertEquals(beans, ds2.collectAsList());
+  }
 }

From 30abb95c9ca1632d98ec9773b19b7b374c3688ff Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Tue, 21 Mar 2017 18:30:02 -0700
Subject: [PATCH 0594/1204] Preparing Spark release v2.1.1-rc1

---
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 2 +-
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/java8-tests/pom.xml              | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mesos/pom.xml                             | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 yarn/pom.xml                              | 2 +-
 38 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/assembly/pom.xml b/assembly/pom.xml
index 29522fd3fd829..cc290c03c9dfa 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 793f6c7cbf3ea..ccf4b27b34a69 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index d8ab265289d8f..98a23249cc192 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index ec23a3339f551..dc1ad144dee6f 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 1cefe88d02b95..250b69699332e 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index ad29848b0ce02..0697ed625b261 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index b94f0991d4e0b..cedae5fc279cc 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index 6e06b627154bc..28c4f95afe198 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 84ad5500c0a7d..75f48a59ab152 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,7 +14,7 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.1.1-SNAPSHOT
+SPARK_VERSION: 2.1.1
 SPARK_VERSION_SHORT: 2.1.1
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.7"
diff --git a/examples/pom.xml b/examples/pom.xml
index 8a9e6cfcfcc70..72ee896f76238 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 86bc5f5520e26..ac407dd48beb0 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 964e45f31b741..92992e2f7081c 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index fa722ee2aad18..7e0423a44b141 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index f2c7d3ec6b9c9..e1b86cec49c43 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/java8-tests/pom.xml b/external/java8-tests/pom.xml
index 1d7cf371a272a..8b0583a861e43 100644
--- a/external/java8-tests/pom.xml
+++ b/external/java8-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index be8e73e41b947..1ca601e765a75 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index 03ebe6a2f6937..7ae63a5fa5654 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index a88a180db7f7e..7a84764798244 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index c0a94f5950d5c..9bf41c5cfc2af 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index 29d898b91b2dd..940112f641b06 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index d7bb1acdc1d81..e3305e91591b4 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index e78218db379a3..7610fad9f29e0 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 41b16500dd2bc..2fd4fd53d1aac 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 3ffffbaacb809..ac6692194a79b 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index c6e5d5c422fde..3917251515d37 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mesos/pom.xml b/mesos/pom.xml
index 532d6073343ba..6d84d45f3be8f 100644
--- a/mesos/pom.xml
+++ b/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index dd77f5269b06f..01a4b86121ebe 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index dc701b8eff743..44f189cb8c063 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index c3909b4f8f66d..a985cf011de4b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.1.1-SNAPSHOT</version>
+  <version>2.1.1</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 6ae3609ae7fae..96b5e44bb320a 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.1.1.dev0"
+__version__ = "2.1.1"
diff --git a/repl/pom.xml b/repl/pom.xml
index b1980eba4c1ff..12142c89db7f9 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 298102f17ab60..53d961d70038b 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index bac37f8355f63..73327c31a4bd1 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 908a2eba50474..f7ea320c74ae7 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 438f9ea7db2a0..fb61f1495df01 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 6ee084fcbcd69..ddad02f2bffed 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 0c4c9c9f51828..193c0c5881715 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 797b169184edf..1933a0ebccf5d 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From c4d2b83389ad57c803860d73f00c27efe30c00b6 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Tue, 21 Mar 2017 18:30:07 -0700
Subject: [PATCH 0595/1204] Preparing development version 2.1.2-SNAPSHOT

---
 R/pkg/DESCRIPTION                         | 2 +-
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 4 ++--
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/java8-tests/pom.xml              | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mesos/pom.xml                             | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 yarn/pom.xml                              | 2 +-
 39 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 1ceda7ba024c0..2d461ca68920b 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 2.1.1
+Version: 2.1.2
 Title: R Frontend for Apache Spark
 Description: The SparkR package provides an R Frontend for Apache Spark.
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
diff --git a/assembly/pom.xml b/assembly/pom.xml
index cc290c03c9dfa..6e092ef8928b6 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index ccf4b27b34a69..77a4b64e8da9d 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 98a23249cc192..1a2d85a2ead6b 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index dc1ad144dee6f..7a57e8964f6fc 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 250b69699332e..ff2d5c52730bf 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 0697ed625b261..b9bf0342eb600 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index cedae5fc279cc..f8a0e577777ee 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index 28c4f95afe198..bad3655452fb4 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 75f48a59ab152..e21d011c4f83f 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,8 +14,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.1.1
-SPARK_VERSION_SHORT: 2.1.1
+SPARK_VERSION: 2.1.2-SNAPSHOT
+SPARK_VERSION_SHORT: 2.1.2
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.7"
 MESOS_VERSION: 1.0.0
diff --git a/examples/pom.xml b/examples/pom.xml
index 72ee896f76238..8fa731fb340a2 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index ac407dd48beb0..2cf0b41ee3544 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 92992e2f7081c..6ea318bf4af6e 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 7e0423a44b141..de3d17e9b9c05 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index e1b86cec49c43..9361fdac11c5b 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/java8-tests/pom.xml b/external/java8-tests/pom.xml
index 8b0583a861e43..f73e4f0aabc29 100644
--- a/external/java8-tests/pom.xml
+++ b/external/java8-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index 1ca601e765a75..66a679661f1d3 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index 7ae63a5fa5654..c84c0408f483a 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index 7a84764798244..961b80df50c5a 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index 9bf41c5cfc2af..e56ed102ac89a 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index 940112f641b06..e260e434f8dd3 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index e3305e91591b4..72e14f58e38f9 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index 7610fad9f29e0..182f963cdd03c 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 2fd4fd53d1aac..d6ba472a1fc99 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index ac6692194a79b..87e34b8a4b00e 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index 3917251515d37..db4b15b10499e 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mesos/pom.xml b/mesos/pom.xml
index 6d84d45f3be8f..262316a193cac 100644
--- a/mesos/pom.xml
+++ b/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 01a4b86121ebe..dae5b86d5fcb8 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 44f189cb8c063..be87ad2d1994b 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index a985cf011de4b..a66156c9050a2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.1.1</version>
+  <version>2.1.2-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 96b5e44bb320a..4447e3d9c7616 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.1.1"
+__version__ = "2.1.2.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 12142c89db7f9..2cefaa191afdc 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 53d961d70038b..4b4a8eb3815e1 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 73327c31a4bd1..732bb6b77f9c2 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index f7ea320c74ae7..1abc0a253098c 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index fb61f1495df01..b62f800277cee 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index ddad02f2bffed..644fc50bf507b 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 193c0c5881715..11b58afdcac7a 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 1933a0ebccf5d..e21df4ec1dc53 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 277ed375b0af3e8fe2a8b9dee62997dcf16d5872 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Tue, 21 Mar 2017 21:50:54 -0700
Subject: [PATCH 0596/1204] [SPARK-19925][SPARKR] Fix SparkR
 spark.getSparkFiles fails when it was called on executors.

## What changes were proposed in this pull request?
SparkR ```spark.getSparkFiles``` fails when it was called on executors, see details at [SPARK-19925](https://issues.apache.org/jira/browse/SPARK-19925).

## How was this patch tested?
Add unit tests, and verify this fix at standalone and yarn cluster.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #17274 from yanboliang/spark-19925.

(cherry picked from commit 478fbc866fbfdb4439788583281863ecea14e8af)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 R/pkg/R/context.R                                | 16 ++++++++++++++--
 R/pkg/inst/tests/testthat/test_context.R         |  7 +++++++
 .../scala/org/apache/spark/api/r/RRunner.scala   |  2 ++
 3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
index 1a0dd65f450b9..634bdcb52363f 100644
--- a/R/pkg/R/context.R
+++ b/R/pkg/R/context.R
@@ -330,7 +330,13 @@ spark.addFile <- function(path, recursive = FALSE) {
 #'}
 #' @note spark.getSparkFilesRootDirectory since 2.1.0
 spark.getSparkFilesRootDirectory <- function() {
-  callJStatic("org.apache.spark.SparkFiles", "getRootDirectory")
+  if (Sys.getenv("SPARKR_IS_RUNNING_ON_WORKER") == "") {
+    # Running on driver.
+    callJStatic("org.apache.spark.SparkFiles", "getRootDirectory")
+  } else {
+    # Running on worker.
+    Sys.getenv("SPARKR_SPARKFILES_ROOT_DIR")
+  }
 }
 
 #' Get the absolute path of a file added through spark.addFile.
@@ -345,7 +351,13 @@ spark.getSparkFilesRootDirectory <- function() {
 #'}
 #' @note spark.getSparkFiles since 2.1.0
 spark.getSparkFiles <- function(fileName) {
-  callJStatic("org.apache.spark.SparkFiles", "get", as.character(fileName))
+  if (Sys.getenv("SPARKR_IS_RUNNING_ON_WORKER") == "") {
+    # Running on driver.
+    callJStatic("org.apache.spark.SparkFiles", "get", as.character(fileName))
+  } else {
+    # Running on worker.
+    file.path(spark.getSparkFilesRootDirectory(), as.character(fileName))
+  }
 }
 
 #' Run a function over a list of elements, distributing the computations with Spark
diff --git a/R/pkg/inst/tests/testthat/test_context.R b/R/pkg/inst/tests/testthat/test_context.R
index caca06933952b..c847113491113 100644
--- a/R/pkg/inst/tests/testthat/test_context.R
+++ b/R/pkg/inst/tests/testthat/test_context.R
@@ -177,6 +177,13 @@ test_that("add and get file to be downloaded with Spark job on every node", {
   spark.addFile(path)
   download_path <- spark.getSparkFiles(filename)
   expect_equal(readLines(download_path), words)
+
+  # Test spark.getSparkFiles works well on executors.
+  seq <- seq(from = 1, to = 10, length.out = 5)
+  f <- function(seq) { spark.getSparkFiles(filename) }
+  results <- spark.lapply(seq, f)
+  for (i in 1:5) { expect_equal(basename(results[[i]]), filename) }
+
   unlink(path)
 
   # Test add directory recursively.
diff --git a/core/src/main/scala/org/apache/spark/api/r/RRunner.scala b/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
index 29e21b3b1aa8a..88118392003e8 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
@@ -347,6 +347,8 @@ private[r] object RRunner {
     pb.environment().put("SPARKR_RLIBDIR", rLibDir.mkString(","))
     pb.environment().put("SPARKR_WORKER_PORT", port.toString)
     pb.environment().put("SPARKR_BACKEND_CONNECTION_TIMEOUT", rConnectionTimeout.toString)
+    pb.environment().put("SPARKR_SPARKFILES_ROOT_DIR", SparkFiles.getRootDirectory())
+    pb.environment().put("SPARKR_IS_RUNNING_ON_WORKER", "TRUE")
     pb.redirectErrorStream(true)  // redirect stderr into stdout
     val proc = pb.start()
     val errThread = startStdoutThread(proc)

From 56f997f1355dc119dfb038d269d8f2f5170f559a Mon Sep 17 00:00:00 2001
From: uncleGen <hustyugm@gmail.com>
Date: Wed, 22 Mar 2017 11:10:08 +0000
Subject: [PATCH 0597/1204] [SPARK-20021][PYSPARK] Miss backslash in python
 code

## What changes were proposed in this pull request?

Add backslash for line continuation in python code.

## How was this patch tested?

Jenkins.

Author: uncleGen <hustyugm@gmail.com>
Author: dylon <hustyugm@gmail.com>

Closes #17352 from uncleGen/python-example-doc.

(cherry picked from commit facfd608865c385c0dabfe09cffe5874532a9cdf)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/structured-streaming-programming-guide.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index d316e04a3a6f1..f73cf93b0cb99 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -788,11 +788,11 @@ Dataset<Row> windowedCounts = words
 words = ...  # streaming DataFrame of schema { timestamp: Timestamp, word: String }
 
 # Group the data by window and word and compute the count of each group
-windowedCounts = words
-    .withWatermark("timestamp", "10 minutes")
+windowedCounts = words \
+    .withWatermark("timestamp", "10 minutes") \
     .groupBy(
         window(words.timestamp, "10 minutes", "5 minutes"),
-        words.word)
+        words.word) \
     .count()
 {% endhighlight %}
 

From af960e86ba0198847c123bd601eb98ed985b15fb Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 23 Mar 2017 14:55:31 -0700
Subject: [PATCH 0598/1204] [SPARK-19970][SQL][BRANCH-2.1] Table owner should
 be USER instead of PRINCIPAL in kerberized clusters

## What changes were proposed in this pull request?

In the kerberized hadoop cluster, when Spark creates tables, the owner of tables are filled with PRINCIPAL strings instead of USER names. This is inconsistent with Hive and causes problems when using [ROLE](https://cwiki.apache.org/confluence/display/Hive/SQL+Standard+Based+Hive+Authorization) in Hive. We had better to fix this.

**BEFORE**
```scala
scala> sql("create table t(a int)").show
scala> sql("desc formatted t").show(false)
...
|Owner:                      |sparkEXAMPLE.COM                                         |       |
```

**AFTER**
```scala
scala> sql("create table t(a int)").show
scala> sql("desc formatted t").show(false)
...
|Owner:                      |spark                                         |       |
```

## How was this patch tested?

Manually do `create table` and `desc formatted` because this happens in Kerberized clusters.

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #17363 from dongjoon-hyun/SPARK-19970-2.
---
 .../scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index faf8a2b77ef73..205b7ffe3016c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -828,7 +828,7 @@ private[hive] class HiveClientImpl(
       hiveTable.setFields(schema.asJava)
     }
     hiveTable.setPartCols(partCols.asJava)
-    hiveTable.setOwner(conf.getUser)
+    hiveTable.setOwner(state.getAuthenticator().getUserName())
     hiveTable.setCreateTime((table.createTime / 1000).toInt)
     hiveTable.setLastAccessTime((table.lastAccessTime / 1000).toInt)
     table.storage.locationUri.foreach { loc => shim.setDataLocation(hiveTable, loc) }

From 92f0b012d14b7597d5fa82f301d0b503af0b6539 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Fri, 24 Mar 2017 12:57:56 +0800
Subject: [PATCH 0599/1204] [SPARK-19959][SQL] Fix to throw
 NullPointerException in  df[java.lang.Long].collect

## What changes were proposed in this pull request?

This PR fixes `NullPointerException` in the generated code by Catalyst. When we run the following code, we get the following `NullPointerException`. This is because there is no null checks for `inputadapter_value`  while `java.lang.Long inputadapter_value` at Line 30 may have `null`.

This happen when a type of DataFrame is nullable primitive type such as `java.lang.Long` and the wholestage codegen is used. While the physical plan keeps `nullable=true` in `input[0, java.lang.Long, true].longValue`, `BoundReference.doGenCode` ignores `nullable=true`. Thus, nullcheck code will not be generated and `NullPointerException` will occur.

This PR checks the nullability and correctly generates nullcheck if needed.
```java
sparkContext.parallelize(Seq[java.lang.Long](0L, null, 2L), 1).toDF.collect
```

```java
Caused by: java.lang.NullPointerException
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(generated.java:37)
	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
	at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:393)
...
```

Generated code without this PR
```java
/* 005 */ final class GeneratedIterator extends org.apache.spark.sql.execution.BufferedRowIterator {
/* 006 */   private Object[] references;
/* 007 */   private scala.collection.Iterator[] inputs;
/* 008 */   private scala.collection.Iterator inputadapter_input;
/* 009 */   private UnsafeRow serializefromobject_result;
/* 010 */   private org.apache.spark.sql.catalyst.expressions.codegen.BufferHolder serializefromobject_holder;
/* 011 */   private org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter serializefromobject_rowWriter;
/* 012 */
/* 013 */   public GeneratedIterator(Object[] references) {
/* 014 */     this.references = references;
/* 015 */   }
/* 016 */
/* 017 */   public void init(int index, scala.collection.Iterator[] inputs) {
/* 018 */     partitionIndex = index;
/* 019 */     this.inputs = inputs;
/* 020 */     inputadapter_input = inputs[0];
/* 021 */     serializefromobject_result = new UnsafeRow(1);
/* 022 */     this.serializefromobject_holder = new org.apache.spark.sql.catalyst.expressions.codegen.BufferHolder(serializefromobject_result, 0);
/* 023 */     this.serializefromobject_rowWriter = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(serializefromobject_holder, 1);
/* 024 */
/* 025 */   }
/* 026 */
/* 027 */   protected void processNext() throws java.io.IOException {
/* 028 */     while (inputadapter_input.hasNext() && !stopEarly()) {
/* 029 */       InternalRow inputadapter_row = (InternalRow) inputadapter_input.next();
/* 030 */       java.lang.Long inputadapter_value = (java.lang.Long)inputadapter_row.get(0, null);
/* 031 */
/* 032 */       boolean serializefromobject_isNull = true;
/* 033 */       long serializefromobject_value = -1L;
/* 034 */       if (!false) {
/* 035 */         serializefromobject_isNull = false;
/* 036 */         if (!serializefromobject_isNull) {
/* 037 */           serializefromobject_value = inputadapter_value.longValue();
/* 038 */         }
/* 039 */
/* 040 */       }
/* 041 */       serializefromobject_rowWriter.zeroOutNullBytes();
/* 042 */
/* 043 */       if (serializefromobject_isNull) {
/* 044 */         serializefromobject_rowWriter.setNullAt(0);
/* 045 */       } else {
/* 046 */         serializefromobject_rowWriter.write(0, serializefromobject_value);
/* 047 */       }
/* 048 */       append(serializefromobject_result);
/* 049 */       if (shouldStop()) return;
/* 050 */     }
/* 051 */   }
/* 052 */ }
```

Generated code with this PR

```java
/* 005 */ final class GeneratedIterator extends org.apache.spark.sql.execution.BufferedRowIterator {
/* 006 */   private Object[] references;
/* 007 */   private scala.collection.Iterator[] inputs;
/* 008 */   private scala.collection.Iterator inputadapter_input;
/* 009 */   private UnsafeRow serializefromobject_result;
/* 010 */   private org.apache.spark.sql.catalyst.expressions.codegen.BufferHolder serializefromobject_holder;
/* 011 */   private org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter serializefromobject_rowWriter;
/* 012 */
/* 013 */   public GeneratedIterator(Object[] references) {
/* 014 */     this.references = references;
/* 015 */   }
/* 016 */
/* 017 */   public void init(int index, scala.collection.Iterator[] inputs) {
/* 018 */     partitionIndex = index;
/* 019 */     this.inputs = inputs;
/* 020 */     inputadapter_input = inputs[0];
/* 021 */     serializefromobject_result = new UnsafeRow(1);
/* 022 */     this.serializefromobject_holder = new org.apache.spark.sql.catalyst.expressions.codegen.BufferHolder(serializefromobject_result, 0);
/* 023 */     this.serializefromobject_rowWriter = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(serializefromobject_holder, 1);
/* 024 */
/* 025 */   }
/* 026 */
/* 027 */   protected void processNext() throws java.io.IOException {
/* 028 */     while (inputadapter_input.hasNext() && !stopEarly()) {
/* 029 */       InternalRow inputadapter_row = (InternalRow) inputadapter_input.next();
/* 030 */       boolean inputadapter_isNull = inputadapter_row.isNullAt(0);
/* 031 */       java.lang.Long inputadapter_value = inputadapter_isNull ? null : ((java.lang.Long)inputadapter_row.get(0, null));
/* 032 */
/* 033 */       boolean serializefromobject_isNull = true;
/* 034 */       long serializefromobject_value = -1L;
/* 035 */       if (!inputadapter_isNull) {
/* 036 */         serializefromobject_isNull = false;
/* 037 */         if (!serializefromobject_isNull) {
/* 038 */           serializefromobject_value = inputadapter_value.longValue();
/* 039 */         }
/* 040 */
/* 041 */       }
/* 042 */       serializefromobject_rowWriter.zeroOutNullBytes();
/* 043 */
/* 044 */       if (serializefromobject_isNull) {
/* 045 */         serializefromobject_rowWriter.setNullAt(0);
/* 046 */       } else {
/* 047 */         serializefromobject_rowWriter.write(0, serializefromobject_value);
/* 048 */       }
/* 049 */       append(serializefromobject_result);
/* 050 */       if (shouldStop()) return;
/* 051 */     }
/* 052 */   }
/* 053 */ }
```

## How was this patch tested?

Added new test suites in `DataFrameSuites`

Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com>

Closes #17302 from kiszk/SPARK-19959.

(cherry picked from commit bb823ca4b479a00030c4919c2d857d254b2a44d8)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/plans/logical/object.scala     |  5 ++++-
 .../apache/spark/sql/DataFrameImplicitsSuite.scala    | 11 +++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala
index 0ab4c9016623e..8c9107afaa0f9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala
@@ -39,7 +39,10 @@ object CatalystSerde {
   }
 
   def generateObjAttr[T : Encoder]: Attribute = {
-    AttributeReference("obj", encoderFor[T].deserializer.dataType, nullable = false)()
+    val enc = encoderFor[T]
+    val dataType = enc.deserializer.dataType
+    val nullable = !enc.clsTag.runtimeClass.isPrimitive
+    AttributeReference("obj", dataType, nullable)()
   }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameImplicitsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameImplicitsSuite.scala
index 094efbaeadcd5..63094d1b6122b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameImplicitsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameImplicitsSuite.scala
@@ -51,4 +51,15 @@ class DataFrameImplicitsSuite extends QueryTest with SharedSQLContext {
       sparkContext.parallelize(1 to 10).map(_.toString).toDF("stringCol"),
       (1 to 10).map(i => Row(i.toString)))
   }
+
+  test("SPARK-19959: df[java.lang.Long].collect includes null throws NullPointerException") {
+    checkAnswer(sparkContext.parallelize(Seq[java.lang.Integer](0, null, 2), 1).toDF,
+      Seq(Row(0), Row(null), Row(2)))
+    checkAnswer(sparkContext.parallelize(Seq[java.lang.Long](0L, null, 2L), 1).toDF,
+      Seq(Row(0L), Row(null), Row(2L)))
+    checkAnswer(sparkContext.parallelize(Seq[java.lang.Float](0.0F, null, 2.0F), 1).toDF,
+      Seq(Row(0.0F), Row(null), Row(2.0F)))
+    checkAnswer(sparkContext.parallelize(Seq[java.lang.Double](0.0D, null, 2.0D), 1).toDF,
+      Seq(Row(0.0D), Row(null), Row(2.0D)))
+  }
 }

From d989434e4abefc1fa8907fb53ccce50b54c53b5c Mon Sep 17 00:00:00 2001
From: Carson Wang <carson.wang@intel.com>
Date: Sat, 25 Mar 2017 20:36:15 +0800
Subject: [PATCH 0600/1204] =?UTF-8?q?[SPARK-19674][SQL]=20Ignore=20driver?=
 =?UTF-8?q?=20accumulator=20updates=20don't=20belong=20to=20=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[SPARK-19674][SQL] Ignore driver accumulator updates don't belong to the execution when merging all accumulator updates

N.B. This is a backport to branch-2.1 of #17009.

## What changes were proposed in this pull request?
In SQLListener.getExecutionMetrics, driver accumulator updates don't belong to the execution should be ignored when merging all accumulator updates to prevent NoSuchElementException.

## How was this patch tested?
Updated unit test.

Author: Carson Wang <carson.wangintel.com>

Author: Carson Wang <carson.wang@intel.com>

Closes #17418 from mallman/spark-19674-backport_2.1.
---
 .../org/apache/spark/sql/execution/ui/SQLListener.scala    | 7 +++++--
 .../apache/spark/sql/execution/ui/SQLListenerSuite.scala   | 5 +++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala
index 5daf21595d8a2..12d3bc9281f35 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala
@@ -343,10 +343,13 @@ class SQLListener(conf: SparkConf) extends SparkListener with Logging {
                accumulatorUpdate <- taskMetrics.accumulatorUpdates) yield {
             (accumulatorUpdate._1, accumulatorUpdate._2)
           }
-        }.filter { case (id, _) => executionUIData.accumulatorMetrics.contains(id) }
+        }
 
         val driverUpdates = executionUIData.driverAccumUpdates.toSeq
-        mergeAccumulatorUpdates(accumulatorUpdates ++ driverUpdates, accumulatorId =>
+        val totalUpdates = (accumulatorUpdates ++ driverUpdates).filter {
+          case (id, _) => executionUIData.accumulatorMetrics.contains(id)
+        }
+        mergeAccumulatorUpdates(totalUpdates, accumulatorId =>
           executionUIData.accumulatorMetrics(accumulatorId).metricType)
       case None =>
         // This execution has been dropped
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLListenerSuite.scala
index 7b4ff675fba72..cf867309f8666 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLListenerSuite.scala
@@ -147,6 +147,11 @@ class SQLListenerSuite extends SparkFunSuite with SharedSQLContext with JsonTest
 
     checkAnswer(listener.getExecutionMetrics(0), accumulatorUpdates.mapValues(_ * 2))
 
+    // Driver accumulator updates don't belong to this execution should be filtered and no
+    // exception will be thrown.
+    listener.onOtherEvent(SparkListenerDriverAccumUpdates(0, Seq((999L, 2L))))
+    checkAnswer(listener.getExecutionMetrics(0), accumulatorUpdates.mapValues(_ * 2))
+
     listener.onExecutorMetricsUpdate(SparkListenerExecutorMetricsUpdate("", Seq(
       // (task id, stage id, stage attempt, accum updates)
       (0L, 0, 0, createTaskMetrics(accumulatorUpdates).accumulators().map(makeInfo)),

From b6d348eea1ac51fe8081f4cc5969a85a1ae7a05b Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@databricks.com>
Date: Sun, 26 Mar 2017 22:47:31 +0200
Subject: [PATCH 0601/1204] [SPARK-20086][SQL] CollapseWindow should not
 collapse dependent adjacent windows

## What changes were proposed in this pull request?
The `CollapseWindow` is currently to aggressive when collapsing adjacent windows. It also collapses windows in the which the parent produces a column that is consumed by the child; this creates an invalid window which will fail at runtime.

This PR fixes this by adding a check for dependent adjacent windows to the `CollapseWindow` rule.

## How was this patch tested?
Added a new test case to `CollapseWindowSuite`

Author: Herman van Hovell <hvanhovell@databricks.com>

Closes #17432 from hvanhovell/SPARK-20086.

(cherry picked from commit 617ab6445ea33d8297f0691723fd19bae19228dc)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../spark/sql/catalyst/optimizer/Optimizer.scala      |  8 +++++---
 .../sql/catalyst/optimizer/CollapseWindowSuite.scala  | 11 +++++++++++
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index de3732061091f..1ca4dba0b01c1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -592,12 +592,14 @@ object CollapseRepartition extends Rule[LogicalPlan] {
 
 /**
  * Collapse Adjacent Window Expression.
- * - If the partition specs and order specs are the same, collapse into the parent.
+ * - If the partition specs and order specs are the same and the window expression are
+ *   independent, collapse into the parent.
  */
 object CollapseWindow extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
-    case w @ Window(we1, ps1, os1, Window(we2, ps2, os2, grandChild)) if ps1 == ps2 && os1 == os2 =>
-      w.copy(windowExpressions = we2 ++ we1, child = grandChild)
+    case w1 @ Window(we1, ps1, os1, w2 @ Window(we2, ps2, os2, grandChild))
+        if ps1 == ps2 && os1 == os2 && w1.references.intersect(w2.windowOutputSet).isEmpty =>
+      w1.copy(windowExpressions = we2 ++ we1, child = grandChild)
   }
 }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseWindowSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseWindowSuite.scala
index 3f7d1d9fd99af..52054c2f8bd8d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseWindowSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseWindowSuite.scala
@@ -78,4 +78,15 @@ class CollapseWindowSuite extends PlanTest {
 
     comparePlans(optimized2, correctAnswer2)
   }
+
+  test("Don't collapse adjacent windows with dependent columns") {
+    val query = testRelation
+      .window(Seq(sum(a).as('sum_a)), partitionSpec1, orderSpec1)
+      .window(Seq(max('sum_a).as('max_sum_a)), partitionSpec1, orderSpec1)
+      .analyze
+
+    val expected = query.analyze
+    val optimized = Optimize.execute(query.analyze)
+    comparePlans(optimized, expected)
+  }
 }

From 4056191d3d8b178b15cabdc170233be0cbe64345 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Mon, 27 Mar 2017 10:23:28 -0700
Subject: [PATCH 0602/1204] [SPARK-20102] Fix nightly packaging and RC
 packaging scripts w/ two minor build fixes

## What changes were proposed in this pull request?

The master snapshot publisher builds are currently broken due to two minor build issues:

1. For unknown reasons, the LFTP `mkdir -p` command began throwing errors when the remote directory already exists. This change of behavior might have been caused by configuration changes in the ASF's SFTP server, but I'm not entirely sure of that. To work around this problem, this patch updates the script to ignore errors from the `lftp mkdir -p` commands.
2. The PySpark `setup.py` file references a non-existent `pyspark.ml.stat` module, causing Python packaging to fail by complaining about a missing directory. The fix is to simply drop that line from the setup script.

## How was this patch tested?

The LFTP fix was tested by manually running the failing commands on AMPLab Jenkins against the ASF SFTP server. The PySpark fix was tested locally.

Author: Josh Rosen <joshrosen@databricks.com>

Closes #17437 from JoshRosen/spark-20102.

(cherry picked from commit 314cf51ded52834cfbaacf58d3d05a220965ca2a)
Signed-off-by: Josh Rosen <joshrosen@databricks.com>
---
 dev/create-release/release-build.sh | 8 ++++----
 python/setup.py                     | 1 -
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index b08577c47c673..ab17f2fe7077b 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -249,7 +249,7 @@ if [[ "$1" == "package" ]]; then
   dest_dir="$REMOTE_PARENT_DIR/${DEST_DIR_NAME}-bin"
   echo "Copying release tarballs to $dest_dir"
   # Put to new directory:
-  LFTP mkdir -p $dest_dir
+  LFTP mkdir -p $dest_dir || true
   LFTP mput -O $dest_dir 'spark-*'
   LFTP mput -O $dest_dir 'pyspark-*'
   LFTP mput -O $dest_dir 'SparkR_*'
@@ -257,7 +257,7 @@ if [[ "$1" == "package" ]]; then
   LFTP "rm -r -f $REMOTE_PARENT_DIR/latest || exit 0"
   LFTP mv $dest_dir "$REMOTE_PARENT_DIR/latest"
   # Re-upload a second time and leave the files in the timestamped upload directory:
-  LFTP mkdir -p $dest_dir
+  LFTP mkdir -p $dest_dir || true
   LFTP mput -O $dest_dir 'spark-*'
   LFTP mput -O $dest_dir 'pyspark-*'
   LFTP mput -O $dest_dir 'SparkR_*'
@@ -275,13 +275,13 @@ if [[ "$1" == "docs" ]]; then
   PRODUCTION=1 RELEASE_VERSION="$SPARK_VERSION" jekyll build
   echo "Copying release documentation to $dest_dir"
   # Put to new directory:
-  LFTP mkdir -p $dest_dir
+  LFTP mkdir -p $dest_dir || true
   LFTP mirror -R _site $dest_dir
   # Delete /latest directory and rename new upload to /latest
   LFTP "rm -r -f $REMOTE_PARENT_DIR/latest || exit 0"
   LFTP mv $dest_dir "$REMOTE_PARENT_DIR/latest"
   # Re-upload a second time and leave the files in the timestamped upload directory:
-  LFTP mkdir -p $dest_dir
+  LFTP mkdir -p $dest_dir || true
   LFTP mirror -R _site $dest_dir
   cd ..
   exit 0
diff --git a/python/setup.py b/python/setup.py
index 47eab98e0f7b3..f50035435e26b 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -167,7 +167,6 @@ def _supports_symlinks():
                   'pyspark.ml',
                   'pyspark.ml.linalg',
                   'pyspark.ml.param',
-                  'pyspark.ml.stat',
                   'pyspark.sql',
                   'pyspark.streaming',
                   'pyspark.bin',

From 4bcb7d676440dedff737a10c98c308d8f2ed1c96 Mon Sep 17 00:00:00 2001
From: jerryshao <sshao@hortonworks.com>
Date: Tue, 28 Mar 2017 10:41:11 -0700
Subject: [PATCH 0603/1204] [SPARK-19995][YARN] Register tokens to current UGI
 to avoid re-issuing of tokens in yarn client mode

## What changes were proposed in this pull request?

In the current Spark on YARN code, we will obtain tokens from provided services, but we're not going to add these tokens to the current user's credentials. This will make all the following operations to these services still require TGT rather than delegation tokens. This is unnecessary since we already got the tokens, also this will lead to failure in user impersonation scenario, because the TGT is granted by real user, not proxy user.

So here changing to put all the tokens to the current UGI, so that following operations to these services will honor tokens rather than TGT, and this will further handle the proxy user issue mentioned above.

## How was this patch tested?

Local verified in secure cluster.

vanzin tgravescs mridulm  dongjoon-hyun please help to review, thanks a lot.

Author: jerryshao <sshao@hortonworks.com>

Closes #17335 from jerryshao/SPARK-19995.

(cherry picked from commit 17eddb35a280e77da7520343e0bf2a86b329ed62)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 5280c420b9887..1ba736b221db8 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -403,6 +403,9 @@ private[spark] class Client(
     val nearestTimeOfNextRenewal = credentialManager.obtainCredentials(hadoopConf, credentials)
 
     if (credentials != null) {
+      // Add credentials to current user's UGI, so that following operations don't need to use the
+      // Kerberos tgt to get delegations again in the client side.
+      UserGroupInformation.getCurrentUser.addCredentials(credentials)
       logDebug(YarnSparkHadoopUtil.get.dumpTokens(credentials).mkString("\n"))
     }
 

From fd2e40614b511fb9ef3e52cc1351659fdbfd612a Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 28 Mar 2017 11:47:43 -0700
Subject: [PATCH 0604/1204] [SPARK-20125][SQL] Dataset of type option of map
 does not work

When we build the deserializer expression for map type, we will use `StaticInvoke` to call `ArrayBasedMapData.toScalaMap`, and declare the return type as `scala.collection.immutable.Map`. If the map is inside an Option, we will wrap this `StaticInvoke` with `WrapOption`, which requires the input to be `scala.collect.Map`. Ideally this should be fine, as `scala.collection.immutable.Map` extends `scala.collect.Map`, but our `ObjectType` is too strict about this, this PR fixes it.

new regression test

Author: Wenchen Fan <wenchen@databricks.com>

Closes #17454 from cloud-fan/map.

(cherry picked from commit d4fac410e0554b7ccd44be44b7ce2fe07ed7f206)
Signed-off-by: Cheng Lian <lian@databricks.com>
---
 .../main/scala/org/apache/spark/sql/types/ObjectType.scala  | 5 +++++
 .../src/test/scala/org/apache/spark/sql/DatasetSuite.scala  | 6 ++++++
 2 files changed, 11 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala
index b18fba29af0f9..2d49fe076786a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala
@@ -44,4 +44,9 @@ case class ObjectType(cls: Class[_]) extends DataType {
   def asNullable: DataType = this
 
   override def simpleString: String = cls.getName
+
+  override def acceptsType(other: DataType): Boolean = other match {
+    case ObjectType(otherCls) => cls.isAssignableFrom(otherCls)
+    case _ => false
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 381652d33796d..9cc49b66b76ee 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -1072,10 +1072,16 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
     val ds2 = Seq(WithMap("hi", Map(42L -> "foo"))).toDS
     checkDataset(ds2.map(t => t), WithMap("hi", Map(42L -> "foo")))
   }
+
+  test("SPARK-20125: option of map") {
+    val ds = Seq(WithMapInOption(Some(Map(1 -> 1)))).toDS()
+    checkDataset(ds, WithMapInOption(Some(Map(1 -> 1))))
+  }
 }
 
 case class WithImmutableMap(id: String, map_test: scala.collection.immutable.Map[Long, String])
 case class WithMap(id: String, map_test: scala.collection.Map[Long, String])
+case class WithMapInOption(m: Option[scala.collection.Map[Int, Int]])
 
 case class Generic[T](id: T, value: Double)
 

From e669dd7ea474f65fea0d5df011a333bda9de91b4 Mon Sep 17 00:00:00 2001
From: sureshthalamati <suresh.thalamati@gmail.com>
Date: Tue, 28 Mar 2017 14:02:01 -0700
Subject: [PATCH 0605/1204] [SPARK-14536][SQL][BACKPORT-2.1] fix to handle null
 value in array type column for postgres.

## What changes were proposed in this pull request?
JDBC read is failing with NPE due to missing null value check for array data type if the source table has null values in the array type column. For null values Resultset.getArray() returns null.
This PR adds null safe check to the Resultset.getArray() value before invoking method on the Array object

## How was this patch tested?
Updated the PostgresIntegration test suite to test null values. Ran docker integration tests on my laptop.

Author: sureshthalamati <suresh.thalamati@gmail.com>

Closes #17460 from sureshthalamati/jdbc_array_null_fix_spark_2.1-SPARK-14536.
---
 .../spark/sql/jdbc/PostgresIntegrationSuite.scala    | 12 ++++++++++--
 .../sql/execution/datasources/jdbc/JdbcUtils.scala   |  6 +++---
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
index c9325dea0bb04..a1a065a443e67 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
@@ -51,12 +51,17 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite {
       + "B'1000100101', E'\\\\xDEADBEEF', true, '172.16.0.42', '192.168.0.0/16', "
       + """'{1, 2}', '{"a", null, "b"}', '{0.11, 0.22}', '{0.11, 0.22}', 'd1', 1.01, 1)"""
     ).executeUpdate()
+    conn.prepareStatement("INSERT INTO bar VALUES (null, null, null, null, null, "
+      + "null, null, null, null, null, "
+      + "null, null, null, null, null, null, null)"
+    ).executeUpdate()
   }
 
   test("Type mapping for various types") {
     val df = sqlContext.read.jdbc(jdbcUrl, "bar", new Properties)
-    val rows = df.collect()
-    assert(rows.length == 1)
+    val rows = df.collect().sortBy(_.toString())
+    assert(rows.length == 2)
+    // Test the types, and values using the first row.
     val types = rows(0).toSeq.map(x => x.getClass)
     assert(types.length == 17)
     assert(classOf[String].isAssignableFrom(types(0)))
@@ -96,6 +101,9 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite {
     assert(rows(0).getString(14) == "d1")
     assert(rows(0).getFloat(15) == 1.01f)
     assert(rows(0).getShort(16) == 1)
+
+    // Test reading null values using the second row.
+    assert(0.until(16).forall(rows(1).isNullAt(_)))
   }
 
   test("Basic write test") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index 41edb6511c2ce..81fdf69bfefb4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -423,9 +423,9 @@ object JdbcUtils extends Logging {
       }
 
       (rs: ResultSet, row: InternalRow, pos: Int) =>
-        val array = nullSafeConvert[Object](
-          rs.getArray(pos + 1).getArray,
-          array => new GenericArrayData(elementConversion.apply(array)))
+        val array = nullSafeConvert[java.sql.Array](
+          input = rs.getArray(pos + 1),
+          array => new GenericArrayData(elementConversion.apply(array.getArray)))
         row.update(pos, array)
 
     case _ => throw new IllegalArgumentException(s"Unsupported type ${dt.simpleString}")

From 02b165dcc2ee5245d1293a375a31660c9d4e1fa6 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Tue, 28 Mar 2017 14:29:03 -0700
Subject: [PATCH 0606/1204] Preparing Spark release v2.1.1-rc2

---
 R/pkg/DESCRIPTION                         | 2 +-
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 4 ++--
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/java8-tests/pom.xml              | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mesos/pom.xml                             | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 yarn/pom.xml                              | 2 +-
 39 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 2d461ca68920b..1ceda7ba024c0 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 2.1.2
+Version: 2.1.1
 Title: R Frontend for Apache Spark
 Description: The SparkR package provides an R Frontend for Apache Spark.
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 6e092ef8928b6..cc290c03c9dfa 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 77a4b64e8da9d..ccf4b27b34a69 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 1a2d85a2ead6b..98a23249cc192 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 7a57e8964f6fc..dc1ad144dee6f 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index ff2d5c52730bf..250b69699332e 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index b9bf0342eb600..0697ed625b261 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index f8a0e577777ee..cedae5fc279cc 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index bad3655452fb4..28c4f95afe198 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index e21d011c4f83f..75f48a59ab152 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,8 +14,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.1.2-SNAPSHOT
-SPARK_VERSION_SHORT: 2.1.2
+SPARK_VERSION: 2.1.1
+SPARK_VERSION_SHORT: 2.1.1
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.7"
 MESOS_VERSION: 1.0.0
diff --git a/examples/pom.xml b/examples/pom.xml
index 8fa731fb340a2..72ee896f76238 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 2cf0b41ee3544..ac407dd48beb0 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 6ea318bf4af6e..92992e2f7081c 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index de3d17e9b9c05..7e0423a44b141 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 9361fdac11c5b..e1b86cec49c43 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/java8-tests/pom.xml b/external/java8-tests/pom.xml
index f73e4f0aabc29..8b0583a861e43 100644
--- a/external/java8-tests/pom.xml
+++ b/external/java8-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index 66a679661f1d3..1ca601e765a75 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index c84c0408f483a..7ae63a5fa5654 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index 961b80df50c5a..7a84764798244 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index e56ed102ac89a..9bf41c5cfc2af 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index e260e434f8dd3..940112f641b06 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index 72e14f58e38f9..e3305e91591b4 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index 182f963cdd03c..7610fad9f29e0 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index d6ba472a1fc99..2fd4fd53d1aac 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 87e34b8a4b00e..ac6692194a79b 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index db4b15b10499e..3917251515d37 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mesos/pom.xml b/mesos/pom.xml
index 262316a193cac..6d84d45f3be8f 100644
--- a/mesos/pom.xml
+++ b/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index dae5b86d5fcb8..01a4b86121ebe 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index be87ad2d1994b..44f189cb8c063 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index a66156c9050a2..a985cf011de4b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.1.2-SNAPSHOT</version>
+  <version>2.1.1</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 4447e3d9c7616..96b5e44bb320a 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.1.2.dev0"
+__version__ = "2.1.1"
diff --git a/repl/pom.xml b/repl/pom.xml
index 2cefaa191afdc..12142c89db7f9 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 4b4a8eb3815e1..53d961d70038b 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 732bb6b77f9c2..73327c31a4bd1 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 1abc0a253098c..f7ea320c74ae7 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index b62f800277cee..fb61f1495df01 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 644fc50bf507b..ddad02f2bffed 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 11b58afdcac7a..193c0c5881715 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/yarn/pom.xml b/yarn/pom.xml
index e21df4ec1dc53..1933a0ebccf5d 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 4964dbedbdc2a127b2d5afb6ec11043a62a6e6f6 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Tue, 28 Mar 2017 14:29:08 -0700
Subject: [PATCH 0607/1204] Preparing development version 2.1.2-SNAPSHOT

---
 R/pkg/DESCRIPTION                         | 2 +-
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 4 ++--
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/java8-tests/pom.xml              | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mesos/pom.xml                             | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 yarn/pom.xml                              | 2 +-
 39 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 1ceda7ba024c0..2d461ca68920b 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 2.1.1
+Version: 2.1.2
 Title: R Frontend for Apache Spark
 Description: The SparkR package provides an R Frontend for Apache Spark.
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
diff --git a/assembly/pom.xml b/assembly/pom.xml
index cc290c03c9dfa..6e092ef8928b6 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index ccf4b27b34a69..77a4b64e8da9d 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 98a23249cc192..1a2d85a2ead6b 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index dc1ad144dee6f..7a57e8964f6fc 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 250b69699332e..ff2d5c52730bf 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 0697ed625b261..b9bf0342eb600 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index cedae5fc279cc..f8a0e577777ee 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index 28c4f95afe198..bad3655452fb4 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 75f48a59ab152..e21d011c4f83f 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,8 +14,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.1.1
-SPARK_VERSION_SHORT: 2.1.1
+SPARK_VERSION: 2.1.2-SNAPSHOT
+SPARK_VERSION_SHORT: 2.1.2
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.7"
 MESOS_VERSION: 1.0.0
diff --git a/examples/pom.xml b/examples/pom.xml
index 72ee896f76238..8fa731fb340a2 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index ac407dd48beb0..2cf0b41ee3544 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 92992e2f7081c..6ea318bf4af6e 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 7e0423a44b141..de3d17e9b9c05 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index e1b86cec49c43..9361fdac11c5b 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/java8-tests/pom.xml b/external/java8-tests/pom.xml
index 8b0583a861e43..f73e4f0aabc29 100644
--- a/external/java8-tests/pom.xml
+++ b/external/java8-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index 1ca601e765a75..66a679661f1d3 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index 7ae63a5fa5654..c84c0408f483a 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index 7a84764798244..961b80df50c5a 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index 9bf41c5cfc2af..e56ed102ac89a 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index 940112f641b06..e260e434f8dd3 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index e3305e91591b4..72e14f58e38f9 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index 7610fad9f29e0..182f963cdd03c 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 2fd4fd53d1aac..d6ba472a1fc99 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index ac6692194a79b..87e34b8a4b00e 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index 3917251515d37..db4b15b10499e 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mesos/pom.xml b/mesos/pom.xml
index 6d84d45f3be8f..262316a193cac 100644
--- a/mesos/pom.xml
+++ b/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 01a4b86121ebe..dae5b86d5fcb8 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 44f189cb8c063..be87ad2d1994b 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index a985cf011de4b..a66156c9050a2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.1.1</version>
+  <version>2.1.2-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 96b5e44bb320a..4447e3d9c7616 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.1.1"
+__version__ = "2.1.2.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 12142c89db7f9..2cefaa191afdc 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 53d961d70038b..4b4a8eb3815e1 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 73327c31a4bd1..732bb6b77f9c2 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index f7ea320c74ae7..1abc0a253098c 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index fb61f1495df01..b62f800277cee 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index ddad02f2bffed..644fc50bf507b 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 193c0c5881715..11b58afdcac7a 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 1933a0ebccf5d..e21df4ec1dc53 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 30954806f1be0dba63f0a608d824d7d811485801 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=A2=9C=E5=8F=91=E6=89=8D=EF=BC=88Yan=20Facai=EF=BC=89?=
 <facai.yan@gmail.com>
Date: Tue, 28 Mar 2017 16:14:01 -0700
Subject: [PATCH 0608/1204] [SPARK-20043][ML] DecisionTreeModel:
 ImpurityCalculator builder fails for uppercase impurity type Gini
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix bug: DecisionTreeModel can't recongnize Impurity "Gini" when loading

TODO:
+ [x] add unit test
+ [x] fix the bug

Author: 颜发才（Yan Facai） <facai.yan@gmail.com>

Closes #17407 from facaiy/BUG/decision_tree_loader_failer_with_Gini_impurity.

(cherry picked from commit 7d432af8f3c47973550ea253dae0c23cd2961bde)
Signed-off-by: Joseph K. Bradley <joseph@databricks.com>
---
 .../spark/mllib/tree/impurity/Impurity.scala       |  2 +-
 .../DecisionTreeClassifierSuite.scala              | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
index a5bdc2c6d2c94..98a3021461eb8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
@@ -184,7 +184,7 @@ private[spark] object ImpurityCalculator {
    * the given stats.
    */
   def getCalculator(impurity: String, stats: Array[Double]): ImpurityCalculator = {
-    impurity match {
+    impurity.toLowerCase match {
       case "gini" => new GiniCalculator(stats)
       case "entropy" => new EntropyCalculator(stats)
       case "variance" => new VarianceCalculator(stats)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
index c711e7fa9dc67..692a172e7409d 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
@@ -383,6 +383,20 @@ class DecisionTreeClassifierSuite
     testEstimatorAndModelReadWrite(dt, continuousData, allParamSettings ++ Map("maxDepth" -> 0),
       checkModelData)
   }
+
+  test("SPARK-20043: " +
+       "ImpurityCalculator builder fails for uppercase impurity type Gini in model read/write") {
+    val rdd = TreeTests.getTreeReadWriteData(sc)
+    val data: DataFrame =
+      TreeTests.setMetadata(rdd, Map.empty[Int, Int], numClasses = 2)
+
+    val dt = new DecisionTreeClassifier()
+      .setImpurity("Gini")
+      .setMaxDepth(2)
+    val model = dt.fit(data)
+
+    testDefaultReadWrite(model)
+  }
 }
 
 private[ml] object DecisionTreeClassifierSuite extends SparkFunSuite {

From f8c1b3e2031071289bbc78f96ca3fb7efa2ba023 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Wed, 29 Mar 2017 00:02:15 -0700
Subject: [PATCH 0609/1204] [SPARK-20134][SQL]
 SQLMetrics.postDriverMetricUpdates to simplify driver side metric updates

## What changes were proposed in this pull request?
It is not super intuitive how to update SQLMetric on the driver side. This patch introduces a new SQLMetrics.postDriverMetricUpdates function to do that, and adds documentation to make it more obvious.

## How was this patch tested?
Updated a test case to use this method.

Author: Reynold Xin <rxin@databricks.com>

Closes #17464 from rxin/SPARK-20134.

(cherry picked from commit 9712bd3954c029de5c828f27b57d46e4a6325a38)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../execution/basicPhysicalOperators.scala    |  8 +-------
 .../exchange/BroadcastExchangeExec.scala      |  8 +-------
 .../sql/execution/metric/SQLMetrics.scala     | 20 +++++++++++++++++++
 .../spark/sql/execution/ui/SQLListener.scala  |  7 +++++++
 .../sql/execution/ui/SQLListenerSuite.scala   |  8 +++++---
 5 files changed, 34 insertions(+), 17 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index dfdaaaae872e4..b00223a86d4d4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -565,13 +565,7 @@ case class SubqueryExec(name: String, child: SparkPlan) extends UnaryExecNode {
         val dataSize = rows.map(_.asInstanceOf[UnsafeRow].getSizeInBytes.toLong).sum
         longMetric("dataSize") += dataSize
 
-        // There are some cases we don't care about the metrics and call `SparkPlan.doExecute`
-        // directly without setting an execution id. We should be tolerant to it.
-        if (executionId != null) {
-          sparkContext.listenerBus.post(SparkListenerDriverAccumUpdates(
-            executionId.toLong, metrics.values.map(m => m.id -> m.value).toSeq))
-        }
-
+        SQLMetrics.postDriverMetricUpdates(sparkContext, executionId, metrics.values.toSeq)
         rows
       }
     }(SubqueryExec.executionContext)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
index ce5013daeb1f9..7a2505ca86e3e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
@@ -97,13 +97,7 @@ case class BroadcastExchangeExec(
           val broadcasted = sparkContext.broadcast(relation)
           longMetric("broadcastTime") += (System.nanoTime() - beforeBroadcast) / 1000000
 
-          // There are some cases we don't care about the metrics and call `SparkPlan.doExecute`
-          // directly without setting an execution id. We should be tolerant to it.
-          if (executionId != null) {
-            sparkContext.listenerBus.post(SparkListenerDriverAccumUpdates(
-              executionId.toLong, metrics.values.map(m => m.id -> m.value).toSeq))
-          }
-
+          SQLMetrics.postDriverMetricUpdates(sparkContext, executionId, metrics.values.toSeq)
           broadcasted
         } catch {
           case oe: OutOfMemoryError =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
index dbc27d8b237f3..ef982a4ebd10d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
@@ -22,9 +22,15 @@ import java.util.Locale
 
 import org.apache.spark.SparkContext
 import org.apache.spark.scheduler.AccumulableInfo
+import org.apache.spark.sql.execution.ui.SparkListenerDriverAccumUpdates
 import org.apache.spark.util.{AccumulatorContext, AccumulatorV2, Utils}
 
 
+/**
+ * A metric used in a SQL query plan. This is implemented as an [[AccumulatorV2]]. Updates on
+ * the executor side are automatically propagated and shown in the SQL UI through metrics. Updates
+ * on the driver side must be explicitly posted using [[SQLMetrics.postDriverMetricUpdates()]].
+ */
 class SQLMetric(val metricType: String, initValue: Long = 0L) extends AccumulatorV2[Long, Long] {
   // This is a workaround for SPARK-11013.
   // We may use -1 as initial value of the accumulator, if the accumulator is valid, we will
@@ -126,4 +132,18 @@ object SQLMetrics {
       s"\n$sum ($min, $med, $max)"
     }
   }
+
+  /**
+   * Updates metrics based on the driver side value. This is useful for certain metrics that
+   * are only updated on the driver, e.g. subquery execution time, or number of files.
+   */
+  def postDriverMetricUpdates(
+      sc: SparkContext, executionId: String, metrics: Seq[SQLMetric]): Unit = {
+    // There are some cases we don't care about the metrics and call `SparkPlan.doExecute`
+    // directly without setting an execution id. We should be tolerant to it.
+    if (executionId != null) {
+      sc.listenerBus.post(
+        SparkListenerDriverAccumUpdates(executionId.toLong, metrics.map(m => m.id -> m.value)))
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala
index 12d3bc9281f35..b4a91230a0012 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala
@@ -47,6 +47,13 @@ case class SparkListenerSQLExecutionStart(
 case class SparkListenerSQLExecutionEnd(executionId: Long, time: Long)
   extends SparkListenerEvent
 
+/**
+ * A message used to update SQL metric value for driver-side updates (which doesn't get reflected
+ * automatically).
+ *
+ * @param executionId The execution id for a query, so we can find the query plan.
+ * @param accumUpdates Map from accumulator id to the metric value (metrics are always 64-bit ints).
+ */
 @DeveloperApi
 case class SparkListenerDriverAccumUpdates(
     executionId: Long,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLListenerSuite.scala
index cf867309f8666..508da1b01b3fc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLListenerSuite.scala
@@ -477,9 +477,11 @@ private case class MyPlan(sc: SparkContext, expectedValue: Long) extends LeafExe
 
   override def doExecute(): RDD[InternalRow] = {
     longMetric("dummy") += expectedValue
-    sc.listenerBus.post(SparkListenerDriverAccumUpdates(
-      sc.getLocalProperty(SQLExecution.EXECUTION_ID_KEY).toLong,
-      metrics.values.map(m => m.id -> m.value).toSeq))
+
+    SQLMetrics.postDriverMetricUpdates(
+      sc,
+      sc.getLocalProperty(SQLExecution.EXECUTION_ID_KEY),
+      metrics.values.toSeq)
     sc.emptyRDD
   }
 }

From 103ff54db9d246e2c1e0babcdb5bf99faff251cc Mon Sep 17 00:00:00 2001
From: jerryshao <sshao@hortonworks.com>
Date: Wed, 29 Mar 2017 10:09:58 -0700
Subject: [PATCH 0610/1204] [SPARK-20059][YARN] Use the correct classloader for
 HBaseCredentialProvider

## What changes were proposed in this pull request?

Currently we use system classloader to find HBase jars, if it is specified by `--jars`, then it will be failed with ClassNotFound issue. So here changing to use child classloader.

Also putting added jars and main jar into classpath of submitted application in yarn cluster mode, otherwise HBase jars specified with `--jars` will never be honored in cluster mode, and fetching tokens in client side will always be failed.

## How was this patch tested?

Unit test and local verification.

Author: jerryshao <sshao@hortonworks.com>

Closes #17388 from jerryshao/SPARK-20059.

(cherry picked from commit c622a87c44e0621e1b3024fdca9b2aa3c508615b)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../main/scala/org/apache/spark/deploy/SparkSubmit.scala   | 7 ++++++-
 .../scala/org/apache/spark/deploy/SparkSubmitSuite.scala   | 7 ++++++-
 .../deploy/yarn/security/HBaseCredentialProvider.scala     | 5 +++--
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index c70061bc5b5bc..443f1f5b084cc 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -485,12 +485,17 @@ object SparkSubmit {
 
     // In client mode, launch the application main class directly
     // In addition, add the main application jar and any added jars (if any) to the classpath
-    if (deployMode == CLIENT) {
+    // Also add the main application jar and any added jars to classpath in case YARN client
+    // requires these jars.
+    if (deployMode == CLIENT || isYarnCluster) {
       childMainClass = args.mainClass
       if (isUserJar(args.primaryResource)) {
         childClasspath += args.primaryResource
       }
       if (args.jars != null) { childClasspath ++= args.jars.split(",") }
+    }
+
+    if (deployMode == CLIENT) {
       if (args.childArgs != null) { childArgs ++= args.childArgs }
     }
 
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 7c649e305a37e..d9e176a12226a 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -204,7 +204,12 @@ class SparkSubmitSuite
     childArgsStr should include ("--arg arg1 --arg arg2")
     childArgsStr should include regex ("--jar .*thejar.jar")
     mainClass should be ("org.apache.spark.deploy.yarn.Client")
-    classpath should have length (0)
+
+    // In yarn cluster mode, also adding jars to classpath
+    classpath(0) should endWith ("thejar.jar")
+    classpath(1) should endWith ("one.jar")
+    classpath(2) should endWith ("two.jar")
+    classpath(3) should endWith ("three.jar")
 
     sysProps("spark.executor.memory") should be ("5g")
     sysProps("spark.driver.memory") should be ("4g")
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/HBaseCredentialProvider.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/HBaseCredentialProvider.scala
index 5571df09a2ec9..5adeb8e605ff4 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/HBaseCredentialProvider.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/HBaseCredentialProvider.scala
@@ -26,6 +26,7 @@ import org.apache.hadoop.security.token.{Token, TokenIdentifier}
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
+import org.apache.spark.util.Utils
 
 private[security] class HBaseCredentialProvider extends ServiceCredentialProvider with Logging {
 
@@ -36,7 +37,7 @@ private[security] class HBaseCredentialProvider extends ServiceCredentialProvide
       sparkConf: SparkConf,
       creds: Credentials): Option[Long] = {
     try {
-      val mirror = universe.runtimeMirror(getClass.getClassLoader)
+      val mirror = universe.runtimeMirror(Utils.getContextOrSparkClassLoader)
       val obtainToken = mirror.classLoader.
         loadClass("org.apache.hadoop.hbase.security.token.TokenUtil").
         getMethod("obtainToken", classOf[Configuration])
@@ -60,7 +61,7 @@ private[security] class HBaseCredentialProvider extends ServiceCredentialProvide
 
   private def hbaseConf(conf: Configuration): Configuration = {
     try {
-      val mirror = universe.runtimeMirror(getClass.getClassLoader)
+      val mirror = universe.runtimeMirror(Utils.getContextOrSparkClassLoader)
       val confCreate = mirror.classLoader.
         loadClass("org.apache.hadoop.hbase.HBaseConfiguration").
         getMethod("create", classOf[Configuration])

From 6a1b2eb4c0efde8b335cfe36e4fe482f23bfb670 Mon Sep 17 00:00:00 2001
From: Kunal Khamar <kkhamar@outlook.com>
Date: Fri, 31 Mar 2017 09:17:22 -0700
Subject: [PATCH 0611/1204] [SPARK-20164][SQL] AnalysisException not tolerant
 of null query plan.

The query plan in an `AnalysisException` may be `null` when an `AnalysisException` object is serialized and then deserialized, since `plan` is marked `transient`. Or when someone throws an `AnalysisException` with a null query plan (which should not happen).
`def getMessage` is not tolerant of this and throws a `NullPointerException`, leading to loss of information about the original exception.
The fix is to add a `null` check in `getMessage`.

- Unit test

Author: Kunal Khamar <kkhamar@outlook.com>

Closes #17486 from kunalkhamar/spark-20164.

(cherry picked from commit 254877c2f04414c70d92fa0a00c0ecee1d73aba7)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../scala/org/apache/spark/sql/AnalysisException.scala    | 2 +-
 .../test/scala/org/apache/spark/sql/SQLQuerySuite.scala   | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
index ff8576157305b..50ee6cd4085ea 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
@@ -43,7 +43,7 @@ class AnalysisException protected[sql] (
   }
 
   override def getMessage: String = {
-    val planAnnotation = plan.map(p => s";\n$p").getOrElse("")
+    val planAnnotation = Option(plan).flatten.map(p => s";\n$p").getOrElse("")
     getSimpleMessage + planAnnotation
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 806381008aba6..8a156df9da95f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -2483,4 +2483,12 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
       assert(sql("SELECT * FROM array_tbl where arr = ARRAY(1L)").count == 1)
     }
   }
+
+  test("SPARK-20164: AnalysisException should be tolerant to null query plan") {
+    try {
+      throw new AnalysisException("", None, None, plan = null)
+    } catch {
+      case ae: AnalysisException => assert(ae.plan == null && ae.getMessage == ae.getSimpleMessage)
+    }
+  }
 }

From e3cec18e1844c2d791754325113f90f005323f9f Mon Sep 17 00:00:00 2001
From: Ryan Blue <blue@apache.org>
Date: Fri, 31 Mar 2017 09:42:49 -0700
Subject: [PATCH 0612/1204] [SPARK-20084][CORE] Remove
 internal.metrics.updatedBlockStatuses from history files.

## What changes were proposed in this pull request?

Remove accumulator updates for internal.metrics.updatedBlockStatuses from SparkListenerTaskEnd entries in the history file. These can cause history files to grow to hundreds of GB because the value of the accumulator contains all tracked blocks.

## How was this patch tested?

Current History UI tests cover use of the history file.

Author: Ryan Blue <blue@apache.org>

Closes #17412 from rdblue/SPARK-20084-remove-block-accumulator-info.

(cherry picked from commit c4c03eed67c05a78dc8944f6119ea708d6b955be)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../org/apache/spark/util/JsonProtocol.scala      | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
index c11eb3ffa4601..7e734bdd95f70 100644
--- a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
@@ -264,8 +264,7 @@ private[spark] object JsonProtocol {
     ("Submission Time" -> submissionTime) ~
     ("Completion Time" -> completionTime) ~
     ("Failure Reason" -> failureReason) ~
-    ("Accumulables" -> JArray(
-      stageInfo.accumulables.values.map(accumulableInfoToJson).toList))
+    ("Accumulables" -> accumulablesToJson(stageInfo.accumulables.values))
   }
 
   def taskInfoToJson(taskInfo: TaskInfo): JValue = {
@@ -281,7 +280,15 @@ private[spark] object JsonProtocol {
     ("Finish Time" -> taskInfo.finishTime) ~
     ("Failed" -> taskInfo.failed) ~
     ("Killed" -> taskInfo.killed) ~
-    ("Accumulables" -> JArray(taskInfo.accumulables.toList.map(accumulableInfoToJson)))
+    ("Accumulables" -> accumulablesToJson(taskInfo.accumulables))
+  }
+
+  private lazy val accumulableBlacklist = Set("internal.metrics.updatedBlockStatuses")
+
+  def accumulablesToJson(accumulables: Traversable[AccumulableInfo]): JArray = {
+    JArray(accumulables
+        .filterNot(_.name.exists(accumulableBlacklist.contains))
+        .toList.map(accumulableInfoToJson))
   }
 
   def accumulableInfoToJson(accumulableInfo: AccumulableInfo): JValue = {
@@ -376,7 +383,7 @@ private[spark] object JsonProtocol {
         ("Message" -> fetchFailed.message)
       case exceptionFailure: ExceptionFailure =>
         val stackTrace = stackTraceToJson(exceptionFailure.stackTrace)
-        val accumUpdates = JArray(exceptionFailure.accumUpdates.map(accumulableInfoToJson).toList)
+        val accumUpdates = accumulablesToJson(exceptionFailure.accumUpdates)
         ("Class Name" -> exceptionFailure.className) ~
         ("Description" -> exceptionFailure.description) ~
         ("Stack Trace" -> stackTrace) ~

From 968eace85005d265cb8ff9d3f4aa2d20db58f8fe Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Sun, 2 Apr 2017 15:33:48 +0100
Subject: [PATCH 0613/1204] [SPARK-19999][BACKPORT-2.1][CORE] Workaround
 JDK-8165231 to identify PPC64 architectures as supporting unaligned access

## What changes were proposed in this pull request?

This PR is backport of #17472 to Spark 2.1

java.nio.Bits.unaligned() does not return true for the ppc64le arch.
see [https://bugs.openjdk.java.net/browse/JDK-8165231](https://bugs.openjdk.java.net/browse/JDK-8165231)
Check architecture in Platform.java

## How was this patch tested?

unit test

Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com>

Closes #17509 from kiszk/branch-2.1.
---
 .../org/apache/spark/unsafe/Platform.java     | 28 +++++++++++--------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
index 671b8c7475943..ba35cf250e482 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
@@ -46,18 +46,22 @@ public final class Platform {
   private static final boolean unaligned;
   static {
     boolean _unaligned;
-    // use reflection to access unaligned field
-    try {
-      Class<?> bitsClass =
-        Class.forName("java.nio.Bits", false, ClassLoader.getSystemClassLoader());
-      Method unalignedMethod = bitsClass.getDeclaredMethod("unaligned");
-      unalignedMethod.setAccessible(true);
-      _unaligned = Boolean.TRUE.equals(unalignedMethod.invoke(null));
-    } catch (Throwable t) {
-      // We at least know x86 and x64 support unaligned access.
-      String arch = System.getProperty("os.arch", "");
-      //noinspection DynamicRegexReplaceableByCompiledPattern
-      _unaligned = arch.matches("^(i[3-6]86|x86(_64)?|x64|amd64|aarch64)$");
+    String arch = System.getProperty("os.arch", "");
+    if (arch.equals("ppc64le") || arch.equals("ppc64")) {
+      // Since java.nio.Bits.unaligned() doesn't return true on ppc (See JDK-8165231), but ppc64 and ppc64le support it
+      _unaligned = true;
+    } else {
+      try {
+        Class<?> bitsClass =
+          Class.forName("java.nio.Bits", false, ClassLoader.getSystemClassLoader());
+        Method unalignedMethod = bitsClass.getDeclaredMethod("unaligned");
+        unalignedMethod.setAccessible(true);
+        _unaligned = Boolean.TRUE.equals(unalignedMethod.invoke(null));
+      } catch (Throwable t) {
+        // We at least know x86 and x64 support unaligned access.
+        //noinspection DynamicRegexReplaceableByCompiledPattern
+        _unaligned = arch.matches("^(i[3-6]86|x86(_64)?|x64|amd64|aarch64)$");
+      }
     }
     unaligned = _unaligned;
   }

From ca144106b8fb1c7790010cc68c072861bbd2d34a Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Sun, 2 Apr 2017 19:44:14 -0700
Subject: [PATCH 0614/1204] [SPARK-20197][SPARKR][BRANCH-2.1] CRAN check fail
 with package installation

## What changes were proposed in this pull request?

Test failed because SPARK_HOME is not set before Spark is installed.
Also current directory is not == SPARK_HOME when tests are run with R CMD check, unlike in Jenkins, so disable that test for now. (that would also disable the test in Jenkins - so this change should not be ported to master as-is.)

## How was this patch tested?

Manual run R CMD check

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #17515 from felixcheung/rcrancheck.
---
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 2 ++
 R/pkg/tests/run-all.R                     | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index fcaa2e805e0d2..628512440d6ed 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -2851,6 +2851,8 @@ compare_list <- function(list1, list2) {
 
 # This should always be the **very last test** in this test file.
 test_that("No extra files are created in SPARK_HOME by starting session and making calls", {
+  skip_on_cran()
+
   # Check that it is not creating any extra file.
   # Does not check the tempdir which would be cleaned up after.
   filesAfter <- list.files(path = sparkRDir, all.files = TRUE)
diff --git a/R/pkg/tests/run-all.R b/R/pkg/tests/run-all.R
index cefaadda6e215..9b75d95566922 100644
--- a/R/pkg/tests/run-all.R
+++ b/R/pkg/tests/run-all.R
@@ -21,6 +21,8 @@ library(SparkR)
 # Turn all warnings into errors
 options("warn" = 2)
 
+install.spark()
+
 # Setup global test environment
 sparkRDir <- file.path(Sys.getenv("SPARK_HOME"), "R")
 sparkRFilesBefore <- list.files(path = sparkRDir, all.files = TRUE)
@@ -28,6 +30,4 @@ sparkRWhitelistSQLDirs <- c("spark-warehouse", "metastore_db")
 invisible(lapply(sparkRWhitelistSQLDirs,
                  function(x) { unlink(file.path(sparkRDir, x), recursive = TRUE, force = TRUE)}))
 
-install.spark()
-
 test_package("SparkR")

From 77700ea38540b8326c37623abeebabf3d2497418 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Mon, 3 Apr 2017 10:09:11 +0100
Subject: [PATCH 0615/1204] [MINOR][DOCS] Replace non-breaking space to normal
 spaces that breaks rendering markdown
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What changes were proposed in this pull request?

It seems there are several non-breaking spaces were inserted into several `.md`s and they look breaking rendering markdown files.

These are different. For example, this can be checked via `python` as below:

```python
>>> " "
'\xc2\xa0'
>>> " "
' '
```

_Note that it seems this PR description automatically replaces non-breaking spaces into normal spaces. Please open a `vi` and copy and paste it into `python` to verify this (do not copy the characters here)._

I checked the output below in  Sapari and Chrome on Mac OS and, Internal Explorer on Windows 10.

**Before**

![2017-04-03 12 37 17](https://cloud.githubusercontent.com/assets/6477701/24594655/50aaba02-186a-11e7-80bb-d34b17a3398a.png)
![2017-04-03 12 36 57](https://cloud.githubusercontent.com/assets/6477701/24594654/50a855e6-186a-11e7-94e2-661e56544b0f.png)

**After**

![2017-04-03 12 36 46](https://cloud.githubusercontent.com/assets/6477701/24594657/53c2545c-186a-11e7-9a73-00529afbfd75.png)
![2017-04-03 12 36 31](https://cloud.githubusercontent.com/assets/6477701/24594658/53c286c0-186a-11e7-99c9-e66b1f510fe7.png)

## How was this patch tested?

Manually checking.

These instances were found via

```
grep --include=*.scala --include=*.python --include=*.java --include=*.r --include=*.R --include=*.md --include=*.r -r -I " " .
```

in Mac OS.

It seems there are several instances more as below:

```
./docs/sql-programming-guide.md:        │   ├── ...
./docs/sql-programming-guide.md:        │   │
./docs/sql-programming-guide.md:        │   ├── country=US
./docs/sql-programming-guide.md:        │   │   └── data.parquet
./docs/sql-programming-guide.md:        │   ├── country=CN
./docs/sql-programming-guide.md:        │   │   └── data.parquet
./docs/sql-programming-guide.md:        │   └── ...
./docs/sql-programming-guide.md:            ├── ...
./docs/sql-programming-guide.md:            │
./docs/sql-programming-guide.md:            ├── country=US
./docs/sql-programming-guide.md:            │   └── data.parquet
./docs/sql-programming-guide.md:            ├── country=CN
./docs/sql-programming-guide.md:            │   └── data.parquet
./docs/sql-programming-guide.md:            └── ...
./sql/core/src/test/README.md:│   ├── *.avdl                  # Testing Avro IDL(s)
./sql/core/src/test/README.md:│   └── *.avpr                  # !! NO TOUCH !! Protocol files generated from Avro IDL(s)
./sql/core/src/test/README.md:│   ├── gen-avro.sh             # Script used to generate Java code for Avro
./sql/core/src/test/README.md:│   └── gen-thrift.sh           # Script used to generate Java code for Thrift
```

These seems generated via `tree` command which inserts non-breaking spaces. They do not look causing any problem for rendering within code blocks and I did not fix it to reduce the overhead to manually replace it when it is overwritten via `tree` command in the future.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #17517 from HyukjinKwon/non-breaking-space.

(cherry picked from commit 364b0db75308ddd346b4ab1e032680e8eb4c1753)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 README.md              | 2 +-
 docs/building-spark.md | 2 +-
 docs/monitoring.md     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index f5983239c043f..d861e9fee7055 100644
--- a/README.md
+++ b/README.md
@@ -98,7 +98,7 @@ building for particular Hive and Hive Thriftserver distributions.
 Please refer to the [Configuration Guide](http://spark.apache.org/docs/latest/configuration.html)
 in the online documentation for an overview on how to configure Spark.
 
-## Contributing
+## Contributing
 
 Please review the [Contribution to Spark guide](http://spark.apache.org/contributing.html)
 for information on how to get started contributing to the project.
diff --git a/docs/building-spark.md b/docs/building-spark.md
index 65c2895b29b10..094509575c1ba 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -199,7 +199,7 @@ in interactive mode by running `build/sbt`, and then run all build commands at t
 prompt. For more recommendations on reducing build time, refer to the
 [Useful Developer Tools page](http://spark.apache.org/developer-tools.html).
 
-## Encrypted Filesystems
+## Encrypted Filesystems
 
 When building on an encrypted filesystem (if your home directory is encrypted, for example), then the Spark build might fail with a "Filename too long" error. As a workaround, add the following in the configuration args of the `scala-maven-plugin` in the project `pom.xml`:
 
diff --git a/docs/monitoring.md b/docs/monitoring.md
index bfea572d3c5ca..5c8539d1e2632 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -257,7 +257,7 @@ In the API, an application is referenced by its application ID, `[app-id]`.
 When running on YARN, each application may have multiple attempts, but there are attempt IDs
 only for applications in cluster mode, not applications in client mode. Applications in YARN cluster mode
 can be identified by their `[attempt-id]`. In the API listed below, when running in YARN cluster mode,
-`[app-id]` will actually be `[base-app-id]/[attempt-id]`, where `[base-app-id]` is the YARN application ID.
+`[app-id]` will actually be `[base-app-id]/[attempt-id]`, where `[base-app-id]` is the YARN application ID.
 
 <table class="table">
   <tr><th>Endpoint</th><th>Meaning</th></tr>

From f9546dacb6c7d25b93d952aa421a80acc6532c11 Mon Sep 17 00:00:00 2001
From: guoxiaolongzte <guo.xiaolong1@zte.com.cn>
Date: Tue, 4 Apr 2017 09:56:17 +0100
Subject: [PATCH 0616/1204] =?UTF-8?q?[SPARK-20190][APP-ID]=20applications/?=
 =?UTF-8?q?/jobs'=20in=20rest=20api,status=20should=20be=20[running|s?=
 =?UTF-8?q?=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

…ucceeded|failed|unknown]

## What changes were proposed in this pull request?

'/applications/[app-id]/jobs' in rest api.status should be'[running|succeeded|failed|unknown]'.
now status is '[complete|succeeded|failed]'.
but '/applications/[app-id]/jobs?status=complete' the server return 'HTTP ERROR 404'.
Added '?status=running' and '?status=unknown'.
code ：
public enum JobExecutionStatus {
RUNNING,
SUCCEEDED,
FAILED,
UNKNOWN;

## How was this patch tested?

 manual tests

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: guoxiaolongzte <guo.xiaolong1@zte.com.cn>

Closes #17507 from guoxiaolongzte/SPARK-20190.

(cherry picked from commit c95fbea68e9dfb2c96a1d13dde17d80a37066ae6)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/monitoring.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/monitoring.md b/docs/monitoring.md
index 5c8539d1e2632..be593501f9dbc 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -278,7 +278,7 @@ can be identified by their `[attempt-id]`. In the API listed below, when running
     <td><code>/applications/[app-id]/jobs</code></td>
     <td>
       A list of all jobs for a given application.
-      <br><code>?status=[complete|succeeded|failed]</code> list only jobs in the specific state.
+      <br><code>?status=[running|succeeded|failed|unknown]</code> list only jobs in the specific state.
     </td>
   </tr>
   <tr>

From 00c124884d84f15cbd930136ceb21c912305c815 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Tue, 4 Apr 2017 11:38:05 -0700
Subject: [PATCH 0617/1204] [SPARK-20191][YARN] Crate wrapper for RackResolver
 so tests can override it.

Current test code tries to override the RackResolver used by setting
configuration params, but because YARN libs statically initialize the
resolver the first time it's used, that means that those configs don't
really take effect during Spark tests.

This change adds a wrapper class that easily allows tests to override the
behavior of the resolver for the Spark code that uses it.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #17508 from vanzin/SPARK-20191.

(cherry picked from commit 0736980f395f114faccbd58e78280ca63ed289c7)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 ...yPreferredContainerPlacementStrategy.scala |  6 +--
 .../spark/deploy/yarn/SparkRackResolver.scala | 40 +++++++++++++++++++
 .../spark/deploy/yarn/YarnAllocator.scala     | 13 ++----
 .../spark/deploy/yarn/YarnRMClient.scala      |  2 +-
 .../yarn/LocalityPlacementStrategySuite.scala |  8 +---
 .../deploy/yarn/YarnAllocatorSuite.scala      | 22 +++-------
 6 files changed, 56 insertions(+), 35 deletions(-)
 create mode 100644 yarn/src/main/scala/org/apache/spark/deploy/yarn/SparkRackResolver.scala

diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/LocalityPreferredContainerPlacementStrategy.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/LocalityPreferredContainerPlacementStrategy.scala
index db638d84c0a1c..872fd354273f6 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/LocalityPreferredContainerPlacementStrategy.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/LocalityPreferredContainerPlacementStrategy.scala
@@ -23,7 +23,6 @@ import scala.collection.JavaConverters._
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.yarn.api.records.{ContainerId, Resource}
 import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest
-import org.apache.hadoop.yarn.util.RackResolver
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.config._
@@ -83,7 +82,8 @@ private[yarn] case class ContainerLocalityPreferences(nodes: Array[String], rack
 private[yarn] class LocalityPreferredContainerPlacementStrategy(
     val sparkConf: SparkConf,
     val yarnConf: Configuration,
-    val resource: Resource) {
+    val resource: Resource,
+    resolver: SparkRackResolver) {
 
   /**
    * Calculate each container's node locality and rack locality
@@ -139,7 +139,7 @@ private[yarn] class LocalityPreferredContainerPlacementStrategy(
         // still be allocated with new container request.
         val hosts = preferredLocalityRatio.filter(_._2 > 0).keys.toArray
         val racks = hosts.map { h =>
-          RackResolver.resolve(yarnConf, h).getNetworkLocation
+          resolver.resolve(yarnConf, h)
         }.toSet
         containerLocalityPreferences += ContainerLocalityPreferences(hosts, racks.toArray)
 
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/SparkRackResolver.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/SparkRackResolver.scala
new file mode 100644
index 0000000000000..c711d088f2116
--- /dev/null
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/SparkRackResolver.scala
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.yarn
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.yarn.util.RackResolver
+import org.apache.log4j.{Level, Logger}
+
+/**
+ * Wrapper around YARN's [[RackResolver]]. This allows Spark tests to easily override the
+ * default behavior, since YARN's class self-initializes the first time it's called, and
+ * future calls all use the initial configuration.
+ */
+private[yarn] class SparkRackResolver {
+
+  // RackResolver logs an INFO message whenever it resolves a rack, which is way too often.
+  if (Logger.getLogger(classOf[RackResolver]).getLevel == null) {
+    Logger.getLogger(classOf[RackResolver]).setLevel(Level.WARN)
+  }
+
+  def resolve(conf: Configuration, hostName: String): String = {
+    RackResolver.resolve(conf, hostName).getNetworkLocation()
+  }
+
+}
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index 0b66d1cf08eac..639e564d4684b 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -30,7 +30,6 @@ import org.apache.hadoop.yarn.api.records._
 import org.apache.hadoop.yarn.client.api.AMRMClient
 import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest
 import org.apache.hadoop.yarn.conf.YarnConfiguration
-import org.apache.hadoop.yarn.util.RackResolver
 import org.apache.log4j.{Level, Logger}
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
@@ -65,16 +64,12 @@ private[yarn] class YarnAllocator(
     amClient: AMRMClient[ContainerRequest],
     appAttemptId: ApplicationAttemptId,
     securityMgr: SecurityManager,
-    localResources: Map[String, LocalResource])
+    localResources: Map[String, LocalResource],
+    resolver: SparkRackResolver)
   extends Logging {
 
   import YarnAllocator._
 
-  // RackResolver logs an INFO message whenever it resolves a rack, which is way too often.
-  if (Logger.getLogger(classOf[RackResolver]).getLevel == null) {
-    Logger.getLogger(classOf[RackResolver]).setLevel(Level.WARN)
-  }
-
   // Visible for testing.
   val allocatedHostToContainersMap = new HashMap[String, collection.mutable.Set[ContainerId]]
   val allocatedContainerToHostMap = new HashMap[ContainerId, String]
@@ -171,7 +166,7 @@ private[yarn] class YarnAllocator(
 
   // A container placement strategy based on pending tasks' locality preference
   private[yarn] val containerPlacementStrategy =
-    new LocalityPreferredContainerPlacementStrategy(sparkConf, conf, resource)
+    new LocalityPreferredContainerPlacementStrategy(sparkConf, conf, resource, resolver)
 
   /**
    * Use a different clock for YarnAllocator. This is mainly used for testing.
@@ -422,7 +417,7 @@ private[yarn] class YarnAllocator(
     // Match remaining by rack
     val remainingAfterRackMatches = new ArrayBuffer[Container]
     for (allocatedContainer <- remainingAfterHostMatches) {
-      val rack = RackResolver.resolve(conf, allocatedContainer.getNodeId.getHost).getNetworkLocation
+      val rack = resolver.resolve(conf, allocatedContainer.getNodeId.getHost)
       matchContainerToRequest(allocatedContainer, rack, containersToUse,
         remainingAfterRackMatches)
     }
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
index 53df11eb66021..9e14d63be55e6 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
@@ -75,7 +75,7 @@ private[spark] class YarnRMClient extends Logging {
       registered = true
     }
     new YarnAllocator(driverUrl, driverRef, conf, sparkConf, amClient, getAttemptId(), securityMgr,
-      localResources)
+      localResources, new SparkRackResolver())
   }
 
   /**
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala
index fb80ff9f31322..b7f25656e49ac 100644
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala
@@ -17,10 +17,9 @@
 
 package org.apache.spark.deploy.yarn
 
+import scala.collection.JavaConverters._
 import scala.collection.mutable.{HashMap, HashSet, Set}
 
-import org.apache.hadoop.fs.CommonConfigurationKeysPublic
-import org.apache.hadoop.net.DNSToSwitchMapping
 import org.apache.hadoop.yarn.api.records._
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.mockito.Mockito._
@@ -51,9 +50,6 @@ class LocalityPlacementStrategySuite extends SparkFunSuite {
 
   private def runTest(): Unit = {
     val yarnConf = new YarnConfiguration()
-    yarnConf.setClass(
-      CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY,
-      classOf[MockResolver], classOf[DNSToSwitchMapping])
 
     // The numbers below have been chosen to balance being large enough to replicate the
     // original issue while not taking too long to run when the issue is fixed. The main
@@ -62,7 +58,7 @@ class LocalityPlacementStrategySuite extends SparkFunSuite {
 
     val resource = Resource.newInstance(8 * 1024, 4)
     val strategy = new LocalityPreferredContainerPlacementStrategy(new SparkConf(),
-      yarnConf, resource)
+      yarnConf, resource, new MockResolver())
 
     val totalTasks = 32 * 1024
     val totalContainers = totalTasks / 16
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
index 994dc75d34c30..1b3f438be4d88 100644
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
@@ -17,10 +17,7 @@
 
 package org.apache.spark.deploy.yarn
 
-import java.util.{Arrays, List => JList}
-
-import org.apache.hadoop.fs.CommonConfigurationKeysPublic
-import org.apache.hadoop.net.DNSToSwitchMapping
+import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.yarn.api.records._
 import org.apache.hadoop.yarn.client.api.AMRMClient
 import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest
@@ -36,24 +33,16 @@ import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.scheduler.SplitInfo
 import org.apache.spark.util.ManualClock
 
-class MockResolver extends DNSToSwitchMapping {
+class MockResolver extends SparkRackResolver {
 
-  override def resolve(names: JList[String]): JList[String] = {
-    if (names.size > 0 && names.get(0) == "host3") Arrays.asList("/rack2")
-    else Arrays.asList("/rack1")
+  override def resolve(conf: Configuration, hostName: String): String = {
+    if (hostName == "host3") "/rack2" else "/rack1"
   }
 
-  override def reloadCachedMappings() {}
-
-  def reloadCachedMappings(names: JList[String]) {}
 }
 
 class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfterEach {
   val conf = new YarnConfiguration()
-  conf.setClass(
-    CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY,
-    classOf[MockResolver], classOf[DNSToSwitchMapping])
-
   val sparkConf = new SparkConf()
   sparkConf.set("spark.driver.host", "localhost")
   sparkConf.set("spark.driver.port", "4040")
@@ -107,7 +96,8 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter
       rmClient,
       appAttemptId,
       new SecurityManager(sparkConf),
-      Map())
+      Map(),
+      new MockResolver())
   }
 
   def createContainer(host: String): Container = {

From efc72dcc3f964ea9931fb47a454db253556d0f8a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Oliver=20K=C3=B6th?= <okoeth@de.ibm.com>
Date: Wed, 5 Apr 2017 08:09:42 +0100
Subject: [PATCH 0618/1204] [SPARK-20042][WEB UI] Fix log page buttons for
 reverse proxy mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

with spark.ui.reverseProxy=true, full path URLs like /log will point to
the master web endpoint which is serving the worker UI as reverse proxy.
To access a REST endpoint in the worker in reverse proxy mode , the
leading /proxy/"target"/ part of the base URI must be retained.

Added logic to log-view.js to handle this, similar to executorspage.js

Patch was tested manually

Author: Oliver Köth <okoeth@de.ibm.com>

Closes #17370 from okoethibm/master.

(cherry picked from commit 6f09dc70d9808cae004ceda9ad615aa9be50f43d)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../org/apache/spark/ui/static/log-view.js    | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/log-view.js b/core/src/main/resources/org/apache/spark/ui/static/log-view.js
index 1782b4f209c09..b5c43e5788bc3 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/log-view.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/log-view.js
@@ -51,13 +51,26 @@ function noNewAlert() {
   window.setTimeout(function () {alert.css("display", "none");}, 4000);
 }
 
+
+function getRESTEndPoint() {
+  // If the worker is served from the master through a proxy (see doc on spark.ui.reverseProxy), 
+  // we need to retain the leading ../proxy/<workerid>/ part of the URL when making REST requests.
+  // Similar logic is contained in executorspage.js function createRESTEndPoint.
+  var words = document.baseURI.split('/');
+  var ind = words.indexOf("proxy");
+  if (ind > 0) {
+      return words.slice(0, ind + 2).join('/') + "/log";
+  }
+  return "/log"
+}
+
 function loadMore() {
   var offset = Math.max(startByte - byteLength, 0);
   var moreByteLength = Math.min(byteLength, startByte);
 
   $.ajax({
     type: "GET",
-    url: "/log" + baseParams + "&offset=" + offset + "&byteLength=" + moreByteLength,
+    url: getRESTEndPoint() + baseParams + "&offset=" + offset + "&byteLength=" + moreByteLength,
     success: function (data) {
       var oldHeight = $(".log-content")[0].scrollHeight;
       var newlineIndex = data.indexOf('\n');
@@ -83,14 +96,14 @@ function loadMore() {
 function loadNew() {
   $.ajax({
     type: "GET",
-    url: "/log" + baseParams + "&byteLength=0",
+    url: getRESTEndPoint() + baseParams + "&byteLength=0",
     success: function (data) {
       var dataInfo = data.substring(0, data.indexOf('\n')).match(/\d+/g);
       var newDataLen = dataInfo[2] - totalLogLength;
       if (newDataLen != 0) {
         $.ajax({
           type: "GET",
-          url: "/log" + baseParams + "&byteLength=" + newDataLen,
+          url: getRESTEndPoint() + baseParams + "&byteLength=" + newDataLen,
           success: function (data) {
             var newlineIndex = data.indexOf('\n');
             var dataInfo = data.substring(0, newlineIndex).match(/\d+/g);

From 2b85e059b634bfc4b015c76b7b232b732460bf12 Mon Sep 17 00:00:00 2001
From: wangzhenhua <wangzhenhua@huawei.com>
Date: Wed, 5 Apr 2017 10:21:43 -0700
Subject: [PATCH 0619/1204] [SPARK-20223][SQL] Fix typo in tpcds q77.sql

## What changes were proposed in this pull request?

Fix typo in tpcds q77.sql

## How was this patch tested?

N/A

Author: wangzhenhua <wangzhenhua@huawei.com>

Closes #17538 from wzhfy/typoQ77.

(cherry picked from commit a2d8d767d933321426a4eb9df1583e017722d7d6)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 sql/core/src/test/resources/tpcds/q77.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/test/resources/tpcds/q77.sql b/sql/core/src/test/resources/tpcds/q77.sql
index 7830f96e76515..a69df9fbcd366 100755
--- a/sql/core/src/test/resources/tpcds/q77.sql
+++ b/sql/core/src/test/resources/tpcds/q77.sql
@@ -36,7 +36,7 @@ WITH ss AS
     sum(cr_net_loss) AS profit_loss
   FROM catalog_returns, date_dim
   WHERE cr_returned_date_sk = d_date_sk
-    AND d_date BETWEEN cast('2000-08-03]' AS DATE) AND
+    AND d_date BETWEEN cast('2000-08-03' AS DATE) AND
   (cast('2000-08-03' AS DATE) + INTERVAL 30 days)),
     ws AS
   (SELECT

From fb81a412eea1e60bd503cb5bb879ae468be24e56 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Wed, 5 Apr 2017 17:46:44 -0700
Subject: [PATCH 0620/1204] [SPARK-20214][ML] Make sure converted csc matrix
 has sorted indices

## What changes were proposed in this pull request?

`_convert_to_vector` converts a scipy sparse matrix to csc matrix for initializing `SparseVector`. However, it doesn't guarantee the converted csc matrix has sorted indices and so a failure happens when you do something like that:

    from scipy.sparse import lil_matrix
    lil = lil_matrix((4, 1))
    lil[1, 0] = 1
    lil[3, 0] = 2
    _convert_to_vector(lil.todok())

    File "/home/jenkins/workspace/python/pyspark/mllib/linalg/__init__.py", line 78, in _convert_to_vector
      return SparseVector(l.shape[0], csc.indices, csc.data)
    File "/home/jenkins/workspace/python/pyspark/mllib/linalg/__init__.py", line 556, in __init__
      % (self.indices[i], self.indices[i + 1]))
    TypeError: Indices 3 and 1 are not strictly increasing

A simple test can confirm that `dok_matrix.tocsc()` won't guarantee sorted indices:

    >>> from scipy.sparse import lil_matrix
    >>> lil = lil_matrix((4, 1))
    >>> lil[1, 0] = 1
    >>> lil[3, 0] = 2
    >>> dok = lil.todok()
    >>> csc = dok.tocsc()
    >>> csc.has_sorted_indices
    0
    >>> csc.indices
    array([3, 1], dtype=int32)

I checked the source codes of scipy. The only way to guarantee it is `csc_matrix.tocsr()` and `csr_matrix.tocsc()`.

## How was this patch tested?

Existing tests.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #17532 from viirya/make-sure-sorted-indices.

(cherry picked from commit 12206058e8780e202c208b92774df3773eff36ae)
Signed-off-by: Joseph K. Bradley <joseph@databricks.com>
---
 python/pyspark/ml/linalg/__init__.py    |  3 +++
 python/pyspark/mllib/linalg/__init__.py |  3 +++
 python/pyspark/mllib/tests.py           | 11 +++++++++++
 3 files changed, 17 insertions(+)

diff --git a/python/pyspark/ml/linalg/__init__.py b/python/pyspark/ml/linalg/__init__.py
index 1705c156ce4c8..eed9946aba063 100644
--- a/python/pyspark/ml/linalg/__init__.py
+++ b/python/pyspark/ml/linalg/__init__.py
@@ -72,7 +72,10 @@ def _convert_to_vector(l):
         return DenseVector(l)
     elif _have_scipy and scipy.sparse.issparse(l):
         assert l.shape[1] == 1, "Expected column vector"
+        # Make sure the converted csc_matrix has sorted indices.
         csc = l.tocsc()
+        if not csc.has_sorted_indices:
+            csc.sort_indices()
         return SparseVector(l.shape[0], csc.indices, csc.data)
     else:
         raise TypeError("Cannot convert type %s into Vector" % type(l))
diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py
index 031f22c02098e..7b24b3c74a9fa 100644
--- a/python/pyspark/mllib/linalg/__init__.py
+++ b/python/pyspark/mllib/linalg/__init__.py
@@ -74,7 +74,10 @@ def _convert_to_vector(l):
         return DenseVector(l)
     elif _have_scipy and scipy.sparse.issparse(l):
         assert l.shape[1] == 1, "Expected column vector"
+        # Make sure the converted csc_matrix has sorted indices.
         csc = l.tocsc()
+        if not csc.has_sorted_indices:
+            csc.sort_indices()
         return SparseVector(l.shape[0], csc.indices, csc.data)
     else:
         raise TypeError("Cannot convert type %s into Vector" % type(l))
diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py
index c519883cdd73b..523b3f1113317 100644
--- a/python/pyspark/mllib/tests.py
+++ b/python/pyspark/mllib/tests.py
@@ -853,6 +853,17 @@ def serialize(l):
         self.assertEqual(sv, serialize(lil.tocsr()))
         self.assertEqual(sv, serialize(lil.todok()))
 
+    def test_convert_to_vector(self):
+        from scipy.sparse import csc_matrix
+        # Create a CSC matrix with non-sorted indices
+        indptr = array([0, 2])
+        indices = array([3, 1])
+        data = array([2.0, 1.0])
+        csc = csc_matrix((data, indices, indptr))
+        self.assertFalse(csc.has_sorted_indices)
+        sv = SparseVector(4, {1: 1, 3: 2})
+        self.assertEqual(sv, _convert_to_vector(csc))
+
     def test_dot(self):
         from scipy.sparse import lil_matrix
         lil = lil_matrix((4, 1))

From 77911201ce67ad0bb15dcafb0384853f21853c11 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=83=AD=E5=B0=8F=E9=BE=99=2010207633?=
 <guo.xiaolong1@zte.com.cn>
Date: Fri, 7 Apr 2017 13:03:07 +0100
Subject: [PATCH 0621/1204] [SPARK-20218][DOC][APP-ID] applications//stages' in
 REST API,add description.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

1. '/applications/[app-id]/stages' in rest api.status should add description '?status=[active|complete|pending|failed] list only stages in the state.'

Now the lack of this description, resulting in the use of this api do not know the use of the status through the brush stage list.

2.'/applications/[app-id]/stages/[stage-id]' in REST API,remove redundant description ‘?status=[active|complete|pending|failed] list only stages in the state.’.
Because only one stage is determined based on stage-id.

code:
  GET
  def stageList(QueryParam("status") statuses: JList[StageStatus]): Seq[StageData] = {
    val listener = ui.jobProgressListener
    val stageAndStatus = AllStagesResource.stagesAndStatus(ui)
    val adjStatuses = {
      if (statuses.isEmpty()) {
        Arrays.asList(StageStatus.values(): _*)
      } else {
        statuses
      }
    };

## How was this patch tested?

manual tests

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: 郭小龙 10207633 <guo.xiaolong1@zte.com.cn>

Closes #17534 from guoxiaolongzte/SPARK-20218.

(cherry picked from commit 9e0893b53d68f777c1f3fb0a67820424a9c253ab)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/monitoring.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/monitoring.md b/docs/monitoring.md
index be593501f9dbc..077af0868a325 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -288,12 +288,12 @@ can be identified by their `[attempt-id]`. In the API listed below, when running
   <tr>
     <td><code>/applications/[app-id]/stages</code></td>
     <td>A list of all stages for a given application.</td>
+    <br><code>?status=[active|complete|pending|failed]</code> list only stages in the state.
   </tr>
   <tr>
     <td><code>/applications/[app-id]/stages/[stage-id]</code></td>
     <td>
       A list of all attempts for the given stage.
-      <br><code>?status=[active|complete|pending|failed]</code> list only stages in the state.
     </td>
   </tr>
   <tr>

From fc242ccf4b8c8e3e3932a814281ebeb14302f0d2 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 7 Apr 2017 20:54:18 -0700
Subject: [PATCH 0622/1204] [SPARK-20246][SQL] should not push predicate down
 through aggregate with non-deterministic expressions

## What changes were proposed in this pull request?

Similar to `Project`, when `Aggregate` has non-deterministic expressions, we should not push predicate down through it, as it will change the number of input rows and thus change the evaluation result of non-deterministic expressions in `Aggregate`.

## How was this patch tested?

new regression test

Author: Wenchen Fan <wenchen@databricks.com>

Closes #17562 from cloud-fan/filter.

(cherry picked from commit 7577e9c356b580d744e1fc27c645fce41bdf9cf0)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../sql/catalyst/optimizer/Optimizer.scala    | 60 ++++++++++---------
 .../optimizer/FilterPushdownSuite.scala       | 41 +++++++++++--
 2 files changed, 68 insertions(+), 33 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 1ca4dba0b01c1..291a0c894e42a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -742,7 +742,8 @@ object PushDownPredicate extends Rule[LogicalPlan] with PredicateHelper {
     // implies that, for a given input row, the output are determined by the expression's initial
     // state and all the input rows processed before. In another word, the order of input rows
     // matters for non-deterministic expressions, while pushing down predicates changes the order.
-    case filter @ Filter(condition, project @ Project(fields, grandChild))
+    // This also applies to Aggregate.
+    case Filter(condition, project @ Project(fields, grandChild))
       if fields.forall(_.deterministic) && canPushThroughCondition(grandChild, condition) =>
 
       // Create a map of Aliases to their values from the child projection.
@@ -753,33 +754,8 @@ object PushDownPredicate extends Rule[LogicalPlan] with PredicateHelper {
 
       project.copy(child = Filter(replaceAlias(condition, aliasMap), grandChild))
 
-    // Push [[Filter]] operators through [[Window]] operators. Parts of the predicate that can be
-    // pushed beneath must satisfy the following conditions:
-    // 1. All the expressions are part of window partitioning key. The expressions can be compound.
-    // 2. Deterministic.
-    // 3. Placed before any non-deterministic predicates.
-    case filter @ Filter(condition, w: Window)
-        if w.partitionSpec.forall(_.isInstanceOf[AttributeReference]) =>
-      val partitionAttrs = AttributeSet(w.partitionSpec.flatMap(_.references))
-
-      val (candidates, containingNonDeterministic) =
-        splitConjunctivePredicates(condition).span(_.deterministic)
-
-      val (pushDown, rest) = candidates.partition { cond =>
-        cond.references.subsetOf(partitionAttrs)
-      }
-
-      val stayUp = rest ++ containingNonDeterministic
-
-      if (pushDown.nonEmpty) {
-        val pushDownPredicate = pushDown.reduce(And)
-        val newWindow = w.copy(child = Filter(pushDownPredicate, w.child))
-        if (stayUp.isEmpty) newWindow else Filter(stayUp.reduce(And), newWindow)
-      } else {
-        filter
-      }
-
-    case filter @ Filter(condition, aggregate: Aggregate) =>
+    case filter @ Filter(condition, aggregate: Aggregate)
+      if aggregate.aggregateExpressions.forall(_.deterministic) =>
       // Find all the aliased expressions in the aggregate list that don't include any actual
       // AggregateExpression, and create a map from the alias to the expression
       val aliasMap = AttributeMap(aggregate.aggregateExpressions.collect {
@@ -810,6 +786,32 @@ object PushDownPredicate extends Rule[LogicalPlan] with PredicateHelper {
         filter
       }
 
+    // Push [[Filter]] operators through [[Window]] operators. Parts of the predicate that can be
+    // pushed beneath must satisfy the following conditions:
+    // 1. All the expressions are part of window partitioning key. The expressions can be compound.
+    // 2. Deterministic.
+    // 3. Placed before any non-deterministic predicates.
+    case filter @ Filter(condition, w: Window)
+      if w.partitionSpec.forall(_.isInstanceOf[AttributeReference]) =>
+      val partitionAttrs = AttributeSet(w.partitionSpec.flatMap(_.references))
+
+      val (candidates, containingNonDeterministic) =
+        splitConjunctivePredicates(condition).span(_.deterministic)
+
+      val (pushDown, rest) = candidates.partition { cond =>
+        cond.references.subsetOf(partitionAttrs)
+      }
+
+      val stayUp = rest ++ containingNonDeterministic
+
+      if (pushDown.nonEmpty) {
+        val pushDownPredicate = pushDown.reduce(And)
+        val newWindow = w.copy(child = Filter(pushDownPredicate, w.child))
+        if (stayUp.isEmpty) newWindow else Filter(stayUp.reduce(And), newWindow)
+      } else {
+        filter
+      }
+
     case filter @ Filter(condition, union: Union) =>
       // Union could change the rows, so non-deterministic predicate can't be pushed down
       val (pushDown, stayUp) = splitConjunctivePredicates(condition).span(_.deterministic)
@@ -835,7 +837,7 @@ object PushDownPredicate extends Rule[LogicalPlan] with PredicateHelper {
         filter
       }
 
-    case filter @ Filter(condition, u: UnaryNode)
+    case filter @ Filter(_, u: UnaryNode)
         if canPushThrough(u) && u.expressions.forall(_.deterministic) =>
       pushDownPredicate(filter, u.child) { predicate =>
         u.withNewChildren(Seq(Filter(predicate, u.child)))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
index 6feea4060f46a..150ebd2c406f5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
@@ -134,15 +134,20 @@ class FilterPushdownSuite extends PlanTest {
     comparePlans(optimized, correctAnswer)
   }
 
-  test("nondeterministic: can't push down filter with nondeterministic condition through project") {
+  test("nondeterministic: can always push down filter through project with deterministic field") {
     val originalQuery = testRelation
-      .select(Rand(10).as('rand), 'a)
-      .where('rand > 5 || 'a > 5)
+      .select('a)
+      .where(Rand(10) > 5 || 'a > 5)
       .analyze
 
     val optimized = Optimize.execute(originalQuery)
 
-    comparePlans(optimized, originalQuery)
+    val correctAnswer = testRelation
+      .where(Rand(10) > 5 || 'a > 5)
+      .select('a)
+      .analyze
+
+    comparePlans(optimized, correctAnswer)
   }
 
   test("nondeterministic: can't push down filter through project with nondeterministic field") {
@@ -156,6 +161,34 @@ class FilterPushdownSuite extends PlanTest {
     comparePlans(optimized, originalQuery)
   }
 
+  test("nondeterministic: can't push down filter through aggregate with nondeterministic field") {
+    val originalQuery = testRelation
+      .groupBy('a)('a, Rand(10).as('rand))
+      .where('a > 5)
+      .analyze
+
+    val optimized = Optimize.execute(originalQuery)
+
+    comparePlans(optimized, originalQuery)
+  }
+
+  test("nondeterministic: push down part of filter through aggregate with deterministic field") {
+    val originalQuery = testRelation
+      .groupBy('a)('a)
+      .where('a > 5 && Rand(10) > 5)
+      .analyze
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+
+    val correctAnswer = testRelation
+      .where('a > 5)
+      .groupBy('a)('a)
+      .where(Rand(10) > 5)
+      .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
   test("filters: combines filters") {
     val originalQuery = testRelation
       .select('a)

From 658b35885db7ad31aafb150451a0aee620320def Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Fri, 7 Apr 2017 21:14:50 -0700
Subject: [PATCH 0623/1204] [SPARK-20262][SQL] AssertNotNull should throw
 NullPointerException

AssertNotNull currently throws RuntimeException. It should throw NullPointerException, which is more specific.

N/A

Author: Reynold Xin <rxin@databricks.com>

Closes #17573 from rxin/SPARK-20262.

(cherry picked from commit e1afc4dcca8ba517f48200c0ecde1152505e41ec)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../spark/sql/catalyst/expressions/objects/objects.scala    | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 038b02351eaf7..c5793e16a9e89 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -929,7 +929,7 @@ case class InitializeJavaBean(beanInstance: Expression, setters: Map[String, Exp
  * `Int` field named `i`.  Expression `s.i` is nullable because `s` can be null.  However, for all
  * non-null `s`, `s.i` can't be null.
  */
-case class AssertNotNull(child: Expression, walkedTypePath: Seq[String])
+case class AssertNotNull(child: Expression, walkedTypePath: Seq[String] = Nil)
   extends UnaryExpression with NonSQLExpression {
 
   override def dataType: DataType = child.dataType
@@ -945,7 +945,7 @@ case class AssertNotNull(child: Expression, walkedTypePath: Seq[String])
   override def eval(input: InternalRow): Any = {
     val result = child.eval(input)
     if (result == null) {
-      throw new RuntimeException(errMsg);
+      throw new NullPointerException(errMsg)
     }
     result
   }
@@ -961,7 +961,7 @@ case class AssertNotNull(child: Expression, walkedTypePath: Seq[String])
       ${childGen.code}
 
       if (${childGen.isNull}) {
-        throw new RuntimeException($errMsgField);
+        throw new NullPointerException($errMsgField);
       }
      """
     ev.copy(code = code, isNull = "false", value = childGen.value)

From 43a7fcad5ecf58b3d20fc44009fd952dd8650772 Mon Sep 17 00:00:00 2001
From: Vijay Ramesh <vramesh@demandbase.com>
Date: Sun, 9 Apr 2017 19:39:09 +0100
Subject: [PATCH 0624/1204] [SPARK-20260][MLLIB] String interpolation required
 for error message

## What changes were proposed in this pull request?
This error message doesn't get properly formatted because of a missing `s`.  Currently the error looks like:

```
Caused by: java.lang.IllegalArgumentException: requirement failed: indices should be one-based and in ascending order; found current=$current, previous=$previous; line="$line"
```
(note the literal `$current` instead of the interpolated value)

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Vijay Ramesh <vramesh@demandbase.com>

Closes #17572 from vijaykramesh/master.

(cherry picked from commit 261eaf5149a8fe479ab4f9c34db892bcedbf5739)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../scala/org/apache/spark/deploy/SparkHadoopUtil.scala     | 2 +-
 .../test/scala/org/apache/spark/ml/util/TestingUtils.scala  | 2 +-
 .../spark/mllib/clustering/PowerIterationClustering.scala   | 4 ++--
 .../apache/spark/mllib/tree/model/DecisionTreeModel.scala   | 2 +-
 .../main/scala/org/apache/spark/mllib/util/MLUtils.scala    | 2 +-
 .../scala/org/apache/spark/mllib/util/TestingUtils.scala    | 2 +-
 .../scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala | 6 +++---
 7 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index 23156072c3ebe..1b3e9521c9bd5 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -373,7 +373,7 @@ class SparkHadoopUtil extends Logging {
       }
     } catch {
       case e: IOException =>
-        logDebug("Failed to decode $token: $e", e)
+        logDebug(s"Failed to decode $token: $e", e)
     }
     buffer.toString
   }
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/util/TestingUtils.scala b/mllib-local/src/test/scala/org/apache/spark/ml/util/TestingUtils.scala
index 2327917e2cad7..336519369823a 100644
--- a/mllib-local/src/test/scala/org/apache/spark/ml/util/TestingUtils.scala
+++ b/mllib-local/src/test/scala/org/apache/spark/ml/util/TestingUtils.scala
@@ -207,7 +207,7 @@ object TestingUtils {
       if (r.fun(x, r.y, r.eps)) {
         throw new TestFailedException(
           s"Did not expect \n$x\n and \n${r.y}\n to be within " +
-            "${r.eps}${r.method} for all elements.", 0)
+            s"${r.eps}${r.method} for all elements.", 0)
       }
       true
     }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
index 4d3e265455da6..b2437b845f826 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
@@ -259,7 +259,7 @@ object PowerIterationClustering extends Logging {
         val j = ctx.dstId
         val s = ctx.attr
         if (s < 0.0) {
-          throw new SparkException("Similarity must be nonnegative but found s($i, $j) = $s.")
+          throw new SparkException(s"Similarity must be nonnegative but found s($i, $j) = $s.")
         }
         if (s > 0.0) {
           ctx.sendToSrc(s)
@@ -283,7 +283,7 @@ object PowerIterationClustering extends Logging {
     : Graph[Double, Double] = {
     val edges = similarities.flatMap { case (i, j, s) =>
       if (s < 0.0) {
-        throw new SparkException("Similarity must be nonnegative but found s($i, $j) = $s.")
+        throw new SparkException(s"Similarity must be nonnegative but found s($i, $j) = $s.")
       }
       if (i != j) {
         Seq(Edge(i, j, s), Edge(j, i, s))
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
index a1562384b0a7e..27618e122aefd 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
@@ -248,7 +248,7 @@ object DecisionTreeModel extends Loader[DecisionTreeModel] with Logging {
       // Build node data into a tree.
       val trees = constructTrees(nodes)
       assert(trees.length == 1,
-        "Decision tree should contain exactly one tree but got ${trees.size} trees.")
+        s"Decision tree should contain exactly one tree but got ${trees.size} trees.")
       val model = new DecisionTreeModel(trees(0), Algo.fromString(algo))
       assert(model.numNodes == numNodes, s"Unable to load DecisionTreeModel data from: $dataPath." +
         s" Expected $numNodes nodes but found ${model.numNodes}")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
index 6bb3271aacb44..92ff37a5934fb 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
@@ -119,7 +119,7 @@ object MLUtils extends Logging {
     while (i < indicesLength) {
       val current = indices(i)
       require(current > previous, s"indices should be one-based and in ascending order;"
-        + " found current=$current, previous=$previous; line=\"$line\"")
+        + s""" found current=$current, previous=$previous; line="$line"""")
       previous = current
       i += 1
     }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala
index 39a6bc37d9638..d39865a19a5c5 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/util/TestingUtils.scala
@@ -207,7 +207,7 @@ object TestingUtils {
       if (r.fun(x, r.y, r.eps)) {
         throw new TestFailedException(
           s"Did not expect \n$x\n and \n${r.y}\n to be within " +
-            "${r.eps}${r.method} for all elements.", 0)
+            s"${r.eps}${r.method} for all elements.", 0)
       }
       true
     }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
index b8761e9de2886..c85f9d0ec5411 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
@@ -283,7 +283,7 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
       val queryOutput = selfJoin.queryExecution.analyzed.output
 
       assertResult(4, "Field count mismatches")(queryOutput.size)
-      assertResult(2, "Duplicated expression ID in query plan:\n $selfJoin") {
+      assertResult(2, s"Duplicated expression ID in query plan:\n $selfJoin") {
         queryOutput.filter(_.name == "_1").map(_.exprId).size
       }
 
@@ -292,7 +292,7 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
   }
 
   test("nested data - struct with array field") {
-    val data = (1 to 10).map(i => Tuple1((i, Seq("val_$i"))))
+    val data = (1 to 10).map(i => Tuple1((i, Seq(s"val_$i"))))
     withOrcTable(data, "t") {
       checkAnswer(sql("SELECT `_1`.`_2`[0] FROM t"), data.map {
         case Tuple1((_, Seq(string))) => Row(string)
@@ -301,7 +301,7 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
   }
 
   test("nested data - array of struct") {
-    val data = (1 to 10).map(i => Tuple1(Seq(i -> "val_$i")))
+    val data = (1 to 10).map(i => Tuple1(Seq(i -> s"val_$i")))
     withOrcTable(data, "t") {
       checkAnswer(sql("SELECT `_1`[0].`_2` FROM t"), data.map {
         case Tuple1(Seq((_, string))) => Row(string)

From 1a73046b415048af7a11565b065dade89097ed5a Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Sun, 9 Apr 2017 20:32:07 -0700
Subject: [PATCH 0625/1204] [SPARK-20264][SQL] asm should be non-test
 dependency in sql/core

## What changes were proposed in this pull request?
sq/core module currently declares asm as a test scope dependency. Transitively it should actually be a normal dependency since the actual core module defines it. This occasionally confuses IntelliJ.

## How was this patch tested?
N/A - This is a build change.

Author: Reynold Xin <rxin@databricks.com>

Closes #17574 from rxin/SPARK-20264.

(cherry picked from commit 7bfa05e0a5e6860a942e1ce47e7890d665acdfe3)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 sql/core/pom.xml | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 732bb6b77f9c2..a03a9593e8520 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -103,6 +103,10 @@
       <artifactId>jackson-databind</artifactId>
       <version>${fasterxml.jackson.version}</version>
     </dependency>
+    <dependency>
+      <groupId>org.apache.xbean</groupId>
+      <artifactId>xbean-asm5-shaded</artifactId>
+    </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
@@ -134,11 +138,6 @@
       <artifactId>mockito-core</artifactId>
       <scope>test</scope>
     </dependency>
-    <dependency>
-      <groupId>org.apache.xbean</groupId>
-      <artifactId>xbean-asm5-shaded</artifactId>
-      <scope>test</scope>
-    </dependency>
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>

From bc7304e1f9ac49998132d3f552939b84841b0d50 Mon Sep 17 00:00:00 2001
From: Bogdan Raducanu <bogdan@databricks.com>
Date: Mon, 10 Apr 2017 21:56:21 +0200
Subject: [PATCH 0626/1204] [SPARK-20280][CORE] FileStatusCache Weigher integer
 overflow

## What changes were proposed in this pull request?

Weigher.weigh needs to return Int but it is possible for an Array[FileStatus] to have size > Int.maxValue. To avoid this, the size is scaled down by a factor of 32. The maximumWeight of the cache is also scaled down by the same factor.

## How was this patch tested?
New test in FileIndexSuite

Author: Bogdan Raducanu <bogdan@databricks.com>

Closes #17591 from bogdanrdc/SPARK-20280.

(cherry picked from commit f6dd8e0e1673aa491b895c1f0467655fa4e9d52f)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../datasources/FileStatusCache.scala         | 47 ++++++++++++++-----
 .../datasources/FileIndexSuite.scala          | 16 +++++++
 2 files changed, 50 insertions(+), 13 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileStatusCache.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileStatusCache.scala
index 5d97558633146..aea27bd4c4d7f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileStatusCache.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileStatusCache.scala
@@ -94,27 +94,48 @@ private class SharedInMemoryCache(maxSizeInBytes: Long) extends Logging {
   // Opaque object that uniquely identifies a shared cache user
   private type ClientId = Object
 
+
   private val warnedAboutEviction = new AtomicBoolean(false)
 
   // we use a composite cache key in order to distinguish entries inserted by different clients
-  private val cache: Cache[(ClientId, Path), Array[FileStatus]] = CacheBuilder.newBuilder()
-    .weigher(new Weigher[(ClientId, Path), Array[FileStatus]] {
+  private val cache: Cache[(ClientId, Path), Array[FileStatus]] = {
+    // [[Weigher]].weigh returns Int so we could only cache objects < 2GB
+    // instead, the weight is divided by this factor (which is smaller
+    // than the size of one [[FileStatus]]).
+    // so it will support objects up to 64GB in size.
+    val weightScale = 32
+    val weigher = new Weigher[(ClientId, Path), Array[FileStatus]] {
       override def weigh(key: (ClientId, Path), value: Array[FileStatus]): Int = {
-        (SizeEstimator.estimate(key) + SizeEstimator.estimate(value)).toInt
-      }})
-    .removalListener(new RemovalListener[(ClientId, Path), Array[FileStatus]]() {
-      override def onRemoval(removed: RemovalNotification[(ClientId, Path), Array[FileStatus]])
-        : Unit = {
+        val estimate = (SizeEstimator.estimate(key) + SizeEstimator.estimate(value)) / weightScale
+        if (estimate > Int.MaxValue) {
+          logWarning(s"Cached table partition metadata size is too big. Approximating to " +
+            s"${Int.MaxValue.toLong * weightScale}.")
+          Int.MaxValue
+        } else {
+          estimate.toInt
+        }
+      }
+    }
+    val removalListener = new RemovalListener[(ClientId, Path), Array[FileStatus]]() {
+      override def onRemoval(
+          removed: RemovalNotification[(ClientId, Path),
+          Array[FileStatus]]): Unit = {
         if (removed.getCause == RemovalCause.SIZE &&
-            warnedAboutEviction.compareAndSet(false, true)) {
+          warnedAboutEviction.compareAndSet(false, true)) {
           logWarning(
             "Evicting cached table partition metadata from memory due to size constraints " +
-            "(spark.sql.hive.filesourcePartitionFileCacheSize = " + maxSizeInBytes + " bytes). " +
-            "This may impact query planning performance.")
+              "(spark.sql.hive.filesourcePartitionFileCacheSize = "
+              + maxSizeInBytes + " bytes). This may impact query planning performance.")
         }
-      }})
-    .maximumWeight(maxSizeInBytes)
-    .build[(ClientId, Path), Array[FileStatus]]()
+      }
+    }
+    CacheBuilder.newBuilder()
+      .weigher(weigher)
+      .removalListener(removalListener)
+      .maximumWeight(maxSizeInBytes / weightScale)
+      .build[(ClientId, Path), Array[FileStatus]]()
+  }
+
 
   /**
    * @return a FileStatusCache that does not share any entries with any other client, but does
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
index c638f5f7d3066..b4b762b1dd598 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
@@ -28,6 +28,7 @@ import org.apache.hadoop.fs.{FileStatus, Path, RawLocalFileSystem}
 import org.apache.spark.metrics.source.HiveCatalogMetrics
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.util.{KnownSizeEstimation, SizeEstimator}
 
 class FileIndexSuite extends SharedSQLContext {
 
@@ -203,6 +204,21 @@ class FileIndexSuite extends SharedSQLContext {
       assert(catalog.leafDirPaths.head == fs.makeQualified(dirPath))
     }
   }
+
+  test("SPARK-20280 - FileStatusCache with a partition with very many files") {
+    /* fake the size, otherwise we need to allocate 2GB of data to trigger this bug */
+    class MyFileStatus extends FileStatus with KnownSizeEstimation {
+      override def estimatedSize: Long = 1000 * 1000 * 1000
+    }
+    /* files * MyFileStatus.estimatedSize should overflow to negative integer
+     * so, make it between 2bn and 4bn
+     */
+    val files = (1 to 3).map { i =>
+      new MyFileStatus()
+    }
+    val fileStatusCache = FileStatusCache.getOrCreate(spark)
+    fileStatusCache.putLeafFiles(new Path("/tmp", "abc"), files.toArray)
+  }
 }
 
 class FakeParentPathFileSystem extends RawLocalFileSystem {

From 489c1f3570b0fc6045ca37cbd4fdb26143f98c81 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Mon, 10 Apr 2017 14:06:49 -0700
Subject: [PATCH 0627/1204] [SPARK-20285][TESTS] Increase the pyspark streaming
 test timeout to 30 seconds

## What changes were proposed in this pull request?

Saw the following failure locally:

```
Traceback (most recent call last):
  File "/home/jenkins/workspace/python/pyspark/streaming/tests.py", line 351, in test_cogroup
    self._test_func(input, func, expected, sort=True, input2=input2)
  File "/home/jenkins/workspace/python/pyspark/streaming/tests.py", line 162, in _test_func
    self.assertEqual(expected, result)
AssertionError: Lists differ: [[(1, ([1], [2])), (2, ([1], [... != []

First list contains 3 additional elements.
First extra element 0:
[(1, ([1], [2])), (2, ([1], [])), (3, ([1], []))]

+ []
- [[(1, ([1], [2])), (2, ([1], [])), (3, ([1], []))],
-  [(1, ([1, 1, 1], [])), (2, ([1], [])), (4, ([], [1]))],
-  [('', ([1, 1], [1, 2])), ('a', ([1, 1], [1, 1])), ('b', ([1], [1]))]]
```

It also happened on Jenkins: http://spark-tests.appspot.com/builds/spark-branch-2.1-test-sbt-hadoop-2.7/120

It's because when the machine is overloaded, the timeout is not enough. This PR just increases the timeout to 30 seconds.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #17597 from zsxwing/SPARK-20285.

(cherry picked from commit f9a50ba2d1bfa3f55199df031e71154611ba51f6)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 python/pyspark/streaming/tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/streaming/tests.py b/python/pyspark/streaming/tests.py
index 080aa3b55d268..f4fbc5a413ebd 100644
--- a/python/pyspark/streaming/tests.py
+++ b/python/pyspark/streaming/tests.py
@@ -55,7 +55,7 @@
 
 class PySparkStreamingTestCase(unittest.TestCase):
 
-    timeout = 10  # seconds
+    timeout = 30  # seconds
     duration = .5
 
     @classmethod

From b26f2c2c6a124e2f7e6c52306b1f3d57e5272d9e Mon Sep 17 00:00:00 2001
From: root <root@iZbp1gsnrlfzjxh82cz80vZ.(none)>
Date: Mon, 5 Dec 2016 18:39:56 -0800
Subject: [PATCH 0628/1204] [SPARK-18555][SQL] DataFrameNaFunctions.fill miss
 up original values in long integers

## What changes were proposed in this pull request?

   DataSet.na.fill(0) used on a DataSet which has a long value column, it will change the original long value.

   The reason is that the type of the function fill's param is Double, and the numeric columns are always cast to double(`fillCol[Double](f, value)`) .
```
  def fill(value: Double, cols: Seq[String]): DataFrame = {
    val columnEquals = df.sparkSession.sessionState.analyzer.resolver
    val projections = df.schema.fields.map { f =>
      // Only fill if the column is part of the cols list.
      if (f.dataType.isInstanceOf[NumericType] && cols.exists(col => columnEquals(f.name, col))) {
        fillCol[Double](f, value)
      } else {
        df.col(f.name)
      }
    }
    df.select(projections : _*)
  }
```

 For example:
```
scala> val df = Seq[(Long, Long)]((1, 2), (-1, -2), (9123146099426677101L, 9123146560113991650L)).toDF("a", "b")
df: org.apache.spark.sql.DataFrame = [a: bigint, b: bigint]

scala> df.show
+-------------------+-------------------+
|                  a|                  b|
+-------------------+-------------------+
|                  1|                  2|
|                 -1|                 -2|
|9123146099426677101|9123146560113991650|
+-------------------+-------------------+

scala> df.na.fill(0).show
+-------------------+-------------------+
|                  a|                  b|
+-------------------+-------------------+
|                  1|                  2|
|                 -1|                 -2|
|9123146099426676736|9123146560113991680|
+-------------------+-------------------+
 ```

the original values changed [which is not we expected result]:
```
 9123146099426677101 -> 9123146099426676736
 9123146560113991650 -> 9123146560113991680
```

## How was this patch tested?

unit test added.

Author: root <root@iZbp1gsnrlfzjxh82cz80vZ.(none)>

Closes #15994 from windpiger/nafillMissupOriginalValue.

(cherry picked from commit 508de38c9928d160cf70e8e7d69ddb1dca5c1a64)
Signed-off-by: DB Tsai <dbtsai@dbtsai.com>
---
 .../spark/sql/DataFrameNaFunctions.scala      | 89 +++++++++++++------
 .../spark/sql/DataFrameNaFunctionsSuite.scala | 18 ++++
 2 files changed, 80 insertions(+), 27 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
index 184c5a11298d9..28820681cd3a6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
@@ -128,6 +128,12 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
   /**
    * Returns a new `DataFrame` that replaces null or NaN values in numeric columns with `value`.
    *
+   * @since 2.2.0
+   */
+  def fill(value: Long): DataFrame = fill(value, df.columns)
+
+  /**
+   * Returns a new `DataFrame` that replaces null or NaN values in numeric columns with `value`.
    * @since 1.3.1
    */
   def fill(value: Double): DataFrame = fill(value, df.columns)
@@ -139,6 +145,14 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    */
   def fill(value: String): DataFrame = fill(value, df.columns)
 
+  /**
+   * Returns a new `DataFrame` that replaces null or NaN values in specified numeric columns.
+   * If a specified column is not a numeric column, it is ignored.
+   *
+   * @since 2.2.0
+   */
+  def fill(value: Long, cols: Array[String]): DataFrame = fill(value, cols.toSeq)
+
   /**
    * Returns a new `DataFrame` that replaces null or NaN values in specified numeric columns.
    * If a specified column is not a numeric column, it is ignored.
@@ -147,24 +161,22 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    */
   def fill(value: Double, cols: Array[String]): DataFrame = fill(value, cols.toSeq)
 
+  /**
+   * (Scala-specific) Returns a new `DataFrame` that replaces null or NaN values in specified
+   * numeric columns. If a specified column is not a numeric column, it is ignored.
+   *
+   * @since 2.2.0
+   */
+  def fill(value: Long, cols: Seq[String]): DataFrame = fillValue(value, cols)
+
   /**
    * (Scala-specific) Returns a new `DataFrame` that replaces null or NaN values in specified
    * numeric columns. If a specified column is not a numeric column, it is ignored.
    *
    * @since 1.3.1
    */
-  def fill(value: Double, cols: Seq[String]): DataFrame = {
-    val columnEquals = df.sparkSession.sessionState.analyzer.resolver
-    val projections = df.schema.fields.map { f =>
-      // Only fill if the column is part of the cols list.
-      if (f.dataType.isInstanceOf[NumericType] && cols.exists(col => columnEquals(f.name, col))) {
-        fillCol[Double](f, value)
-      } else {
-        df.col(f.name)
-      }
-    }
-    df.select(projections : _*)
-  }
+  def fill(value: Double, cols: Seq[String]): DataFrame = fillValue(value, cols)
+
 
   /**
    * Returns a new `DataFrame` that replaces null values in specified string columns.
@@ -180,18 +192,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *
    * @since 1.3.1
    */
-  def fill(value: String, cols: Seq[String]): DataFrame = {
-    val columnEquals = df.sparkSession.sessionState.analyzer.resolver
-    val projections = df.schema.fields.map { f =>
-      // Only fill if the column is part of the cols list.
-      if (f.dataType.isInstanceOf[StringType] && cols.exists(col => columnEquals(f.name, col))) {
-        fillCol[String](f, value)
-      } else {
-        df.col(f.name)
-      }
-    }
-    df.select(projections : _*)
-  }
+  def fill(value: String, cols: Seq[String]): DataFrame = fillValue(value, cols)
 
   /**
    * Returns a new `DataFrame` that replaces null values.
@@ -210,7 +211,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *
    * @since 1.3.1
    */
-  def fill(valueMap: java.util.Map[String, Any]): DataFrame = fill0(valueMap.asScala.toSeq)
+  def fill(valueMap: java.util.Map[String, Any]): DataFrame = fillMap(valueMap.asScala.toSeq)
 
   /**
    * (Scala-specific) Returns a new `DataFrame` that replaces null values.
@@ -230,7 +231,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *
    * @since 1.3.1
    */
-  def fill(valueMap: Map[String, Any]): DataFrame = fill0(valueMap.toSeq)
+  def fill(valueMap: Map[String, Any]): DataFrame = fillMap(valueMap.toSeq)
 
   /**
    * Replaces values matching keys in `replacement` map with the corresponding values.
@@ -368,7 +369,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
     df.select(projections : _*)
   }
 
-  private def fill0(values: Seq[(String, Any)]): DataFrame = {
+  private def fillMap(values: Seq[(String, Any)]): DataFrame = {
     // Error handling
     values.foreach { case (colName, replaceValue) =>
       // Check column name exists
@@ -435,4 +436,38 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
     case v => throw new IllegalArgumentException(
       s"Unsupported value type ${v.getClass.getName} ($v).")
   }
+
+  /**
+   * Returns a new `DataFrame` that replaces null or NaN values in specified
+   * numeric, string columns. If a specified column is not a numeric, string column,
+   * it is ignored.
+   */
+  private def fillValue[T](value: T, cols: Seq[String]): DataFrame = {
+    // the fill[T] which T is  Long/Double,
+    // should apply on all the NumericType Column, for example:
+    // val input = Seq[(java.lang.Integer, java.lang.Double)]((null, 164.3)).toDF("a","b")
+    // input.na.fill(3.1)
+    // the result is (3,164.3), not (null, 164.3)
+    val targetType = value match {
+      case _: Double | _: Long => NumericType
+      case _: String => StringType
+      case _ => throw new IllegalArgumentException(
+        s"Unsupported value type ${value.getClass.getName} ($value).")
+    }
+
+    val columnEquals = df.sparkSession.sessionState.analyzer.resolver
+    val projections = df.schema.fields.map { f =>
+      val typeMatches = (targetType, f.dataType) match {
+        case (NumericType, dt) => dt.isInstanceOf[NumericType]
+        case (StringType, dt) => dt == StringType
+      }
+      // Only fill if the column is part of the cols list.
+      if (typeMatches && cols.exists(col => columnEquals(f.name, col))) {
+        fillCol[T](f, value)
+      } else {
+        df.col(f.name)
+      }
+    }
+    df.select(projections : _*)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
index 47b55e2547d19..fd829846ac332 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
@@ -138,6 +138,24 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSQLContext {
     checkAnswer(
       Seq[(String, String)]((null, null)).toDF("col1", "col2").na.fill("test", "col1" :: Nil),
       Row("test", null))
+
+    checkAnswer(
+      Seq[(Long, Long)]((1, 2), (-1, -2), (9123146099426677101L, 9123146560113991650L))
+        .toDF("a", "b").na.fill(0),
+      Row(1, 2) :: Row(-1, -2) :: Row(9123146099426677101L, 9123146560113991650L) :: Nil
+    )
+
+    checkAnswer(
+      Seq[(java.lang.Long, java.lang.Double)]((null, 1.23), (3L, null), (4L, 3.45))
+        .toDF("a", "b").na.fill(2.34),
+      Row(2, 1.23) :: Row(3, 2.34) :: Row(4, 3.45) :: Nil
+    )
+
+    checkAnswer(
+      Seq[(java.lang.Long, java.lang.Double)]((null, 1.23), (3L, null), (4L, 3.45))
+        .toDF("a", "b").na.fill(5),
+      Row(5, 1.23) :: Row(3, 5.0) :: Row(4, 3.45) :: Nil
+    )
   }
 
   test("fill with map") {

From f40e44de87bf05dd5ca26ad527c0d6fbee84f245 Mon Sep 17 00:00:00 2001
From: DB Tsai <dbt@netflix.com>
Date: Mon, 10 Apr 2017 05:16:34 +0000
Subject: [PATCH 0629/1204] [SPARK-20270][SQL] na.fill should not change the
 values in long or integer when the default value is in double

## What changes were proposed in this pull request?

This bug was partially addressed in SPARK-18555 https://github.com/apache/spark/pull/15994, but the root cause isn't completely solved. This bug is pretty critical since it changes the member id in Long in our application if the member id can not be represented by Double losslessly when the member id is very big.

Here is an example how this happens, with
```
      Seq[(java.lang.Long, java.lang.Double)]((null, 3.14), (9123146099426677101L, null),
        (9123146560113991650L, 1.6), (null, null)).toDF("a", "b").na.fill(0.2),
```
the logical plan will be
```
== Analyzed Logical Plan ==
a: bigint, b: double
Project [cast(coalesce(cast(a#232L as double), cast(0.2 as double)) as bigint) AS a#240L, cast(coalesce(nanvl(b#233, cast(null as double)), 0.2) as double) AS b#241]
+- Project [_1#229L AS a#232L, _2#230 AS b#233]
   +- LocalRelation [_1#229L, _2#230]
```

Note that even the value is not null, Spark will cast the Long into Double first. Then if it's not null, Spark will cast it back to Long which results in losing precision.

The behavior should be that the original value should not be changed if it's not null, but Spark will change the value which is wrong.

With the PR, the logical plan will be
```
== Analyzed Logical Plan ==
a: bigint, b: double
Project [coalesce(a#232L, cast(0.2 as bigint)) AS a#240L, coalesce(nanvl(b#233, cast(null as double)), cast(0.2 as double)) AS b#241]
+- Project [_1#229L AS a#232L, _2#230 AS b#233]
   +- LocalRelation [_1#229L, _2#230]
```
which behaves correctly without changing the original Long values and also avoids extra cost of unnecessary casting.

## How was this patch tested?

unit test added.

+cc srowen rxin cloud-fan gatorsmile

Thanks.

Author: DB Tsai <dbt@netflix.com>

Closes #17577 from dbtsai/fixnafill.

(cherry picked from commit 1a0bc41659eef317dcac18df35c26857216a4314)
Signed-off-by: DB Tsai <dbtsai@dbtsai.com>
---
 .../apache/spark/sql/DataFrameNaFunctions.scala    |  5 +++--
 .../spark/sql/DataFrameNaFunctionsSuite.scala      | 14 ++++++++++++++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
index 28820681cd3a6..d8f953fba5a8b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
@@ -407,10 +407,11 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
     val quotedColName = "`" + col.name + "`"
     val colValue = col.dataType match {
       case DoubleType | FloatType =>
-        nanvl(df.col(quotedColName), lit(null)) // nanvl only supports these types
+        // nanvl only supports these types
+        nanvl(df.col(quotedColName), lit(null).cast(col.dataType))
       case _ => df.col(quotedColName)
     }
-    coalesce(colValue, lit(replacement)).cast(col.dataType).as(col.name)
+    coalesce(colValue, lit(replacement).cast(col.dataType)).as(col.name)
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
index fd829846ac332..aa237d0619ac3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
@@ -145,6 +145,20 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSQLContext {
       Row(1, 2) :: Row(-1, -2) :: Row(9123146099426677101L, 9123146560113991650L) :: Nil
     )
 
+    checkAnswer(
+      Seq[(java.lang.Long, java.lang.Double)]((null, 3.14), (9123146099426677101L, null),
+        (9123146560113991650L, 1.6), (null, null)).toDF("a", "b").na.fill(0.2),
+      Row(0, 3.14) :: Row(9123146099426677101L, 0.2) :: Row(9123146560113991650L, 1.6)
+        :: Row(0, 0.2) :: Nil
+    )
+
+    checkAnswer(
+      Seq[(java.lang.Long, java.lang.Float)]((null, 3.14f), (9123146099426677101L, null),
+        (9123146560113991650L, 1.6f), (null, null)).toDF("a", "b").na.fill(0.2),
+      Row(0, 3.14f) :: Row(9123146099426677101L, 0.2f) :: Row(9123146560113991650L, 1.6f)
+        :: Row(0, 0.2f) :: Nil
+    )
+
     checkAnswer(
       Seq[(java.lang.Long, java.lang.Double)]((null, 1.23), (3L, null), (4L, 3.45))
         .toDF("a", "b").na.fill(2.34),

From 8eb71b81f1805440ae8be95044e83fd32e8e76b9 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Mon, 10 Apr 2017 20:41:08 -0700
Subject: [PATCH 0630/1204] [SPARK-17564][TESTS] Fix flaky
 RequestTimeoutIntegrationSuite.furtherRequestsDelay

## What changes were proposed in this pull request?

This PR  fixs the following failure:
```
sbt.ForkMain$ForkError: java.lang.AssertionError: null
	at org.junit.Assert.fail(Assert.java:86)
	at org.junit.Assert.assertTrue(Assert.java:41)
	at org.junit.Assert.assertTrue(Assert.java:52)
	at org.apache.spark.network.RequestTimeoutIntegrationSuite.furtherRequestsDelay(RequestTimeoutIntegrationSuite.java:230)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:497)
	at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
	at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
	at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
	at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
	at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26)
	at org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
	at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
	at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78)
	at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57)
	at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
	at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
	at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
	at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
	at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
	at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
	at org.junit.runners.Suite.runChild(Suite.java:128)
	at org.junit.runners.Suite.runChild(Suite.java:27)
	at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
	at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
	at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
	at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
	at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
	at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
	at org.junit.runner.JUnitCore.run(JUnitCore.java:137)
	at org.junit.runner.JUnitCore.run(JUnitCore.java:115)
	at com.novocode.junit.JUnitRunner$1.execute(JUnitRunner.java:132)
	at sbt.ForkMain$Run$2.call(ForkMain.java:296)
	at sbt.ForkMain$Run$2.call(ForkMain.java:286)
	at java.util.concurrent.FutureTask.run(FutureTask.java:266)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	at java.lang.Thread.run(Thread.java:745)
```

It happens several times per month on [Jenkins](http://spark-tests.appspot.com/test-details?suite_name=org.apache.spark.network.RequestTimeoutIntegrationSuite&test_name=furtherRequestsDelay). The failure is because `callback1` may not be called before `assertTrue(callback1.failure instanceof IOException);`. It's pretty easy to reproduce this error by adding a sleep before this line: https://github.com/apache/spark/blob/379b0b0bbdbba2278ce3bcf471bd75f6ffd9cf0d/common/network-common/src/test/java/org/apache/spark/network/RequestTimeoutIntegrationSuite.java#L267

The fix is straightforward: just use the latch to wait until `callback1` is called.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #17599 from zsxwing/SPARK-17564.

(cherry picked from commit 734dfbfcfea1ed1ab3a5f18f84c412a569dd87e7)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../apache/spark/network/RequestTimeoutIntegrationSuite.java    | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/common/network-common/src/test/java/org/apache/spark/network/RequestTimeoutIntegrationSuite.java b/common/network-common/src/test/java/org/apache/spark/network/RequestTimeoutIntegrationSuite.java
index 959396bb8c268..9e8057438db90 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/RequestTimeoutIntegrationSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/RequestTimeoutIntegrationSuite.java
@@ -226,6 +226,8 @@ public StreamManager getStreamManager() {
     callback0.latch.await(60, TimeUnit.SECONDS);
     assertTrue(callback0.failure instanceof IOException);
 
+    // make sure callback1 is called.
+    callback1.latch.await(60, TimeUnit.SECONDS);
     // failed at same time as previous
     assertTrue(callback1.failure instanceof IOException);
   }

From 03a42c01252fa2cb59da8f2622b56fd000819a3e Mon Sep 17 00:00:00 2001
From: DB Tsai <dbtsai@dbtsai.com>
Date: Tue, 11 Apr 2017 04:05:40 +0000
Subject: [PATCH 0631/1204] [SPARK-18555][MINOR][SQL] Fix the @since tag when
 backporting from 2.2 branch into 2.1 branch

## What changes were proposed in this pull request?

Fix the since tag when backporting critical bugs (SPARK-18555) from 2.2 branch into 2.1 branch.

## How was this patch tested?

N/A

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: DB Tsai <dbtsai@dbtsai.com>

Closes #17600 from dbtsai/branch-2.1.
---
 .../scala/org/apache/spark/sql/DataFrameNaFunctions.scala   | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
index d8f953fba5a8b..f6ab770e87a57 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
@@ -128,7 +128,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
   /**
    * Returns a new `DataFrame` that replaces null or NaN values in numeric columns with `value`.
    *
-   * @since 2.2.0
+   * @since 2.1.1
    */
   def fill(value: Long): DataFrame = fill(value, df.columns)
 
@@ -149,7 +149,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    * Returns a new `DataFrame` that replaces null or NaN values in specified numeric columns.
    * If a specified column is not a numeric column, it is ignored.
    *
-   * @since 2.2.0
+   * @since 2.1.1
    */
   def fill(value: Long, cols: Array[String]): DataFrame = fill(value, cols.toSeq)
 
@@ -165,7 +165,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    * (Scala-specific) Returns a new `DataFrame` that replaces null or NaN values in specified
    * numeric columns. If a specified column is not a numeric column, it is ignored.
    *
-   * @since 2.2.0
+   * @since 2.1.1
    */
   def fill(value: Long, cols: Seq[String]): DataFrame = fillValue(value, cols)
 

From 46e212d2f062ce4546aca812b55d5b5e2f6563ff Mon Sep 17 00:00:00 2001
From: DB Tsai <dbt@netflix.com>
Date: Wed, 12 Apr 2017 11:19:20 +0800
Subject: [PATCH 0632/1204] [SPARK-20291][SQL] NaNvl(FloatType, NullType)
 should not be cast to NaNvl(DoubleType, DoubleType)

## What changes were proposed in this pull request?

`NaNvl(float value, null)` will be converted into `NaNvl(float value, Cast(null, DoubleType))` and finally `NaNvl(Cast(float value, DoubleType), Cast(null, DoubleType))`.

This will cause mismatching in the output type when the input type is float.

By adding extra rule in TypeCoercion can resolve this issue.

## How was this patch tested?

unite tests.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: DB Tsai <dbt@netflix.com>

Closes #17606 from dbtsai/fixNaNvl.

(cherry picked from commit 8ad63ee158815de5ffff7bf03cdf25aef312095f)
Signed-off-by: DB Tsai <dbtsai@dbtsai.com>
---
 .../spark/sql/catalyst/analysis/TypeCoercion.scala |  1 +
 .../sql/catalyst/analysis/TypeCoercionSuite.scala  | 14 ++++++++++----
 .../apache/spark/sql/DataFrameNaFunctions.scala    |  3 +--
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 6d9799fb70c75..6700fc79d4bc1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -512,6 +512,7 @@ object TypeCoercion {
         NaNvl(l, Cast(r, DoubleType))
       case NaNvl(l, r) if l.dataType == FloatType && r.dataType == DoubleType =>
         NaNvl(Cast(l, DoubleType), r)
+      case NaNvl(l, r) if r.dataType == NullType => NaNvl(l, Cast(r, l.dataType))
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
index 590c9d5e8474b..bfa52be24bd40 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
@@ -560,14 +560,20 @@ class TypeCoercionSuite extends PlanTest {
 
   test("nanvl casts") {
     ruleTest(TypeCoercion.FunctionArgumentConversion,
-      NaNvl(Literal.create(1.0, FloatType), Literal.create(1.0, DoubleType)),
-      NaNvl(Cast(Literal.create(1.0, FloatType), DoubleType), Literal.create(1.0, DoubleType)))
+      NaNvl(Literal.create(1.0f, FloatType), Literal.create(1.0, DoubleType)),
+      NaNvl(Cast(Literal.create(1.0f, FloatType), DoubleType), Literal.create(1.0, DoubleType)))
     ruleTest(TypeCoercion.FunctionArgumentConversion,
-      NaNvl(Literal.create(1.0, DoubleType), Literal.create(1.0, FloatType)),
-      NaNvl(Literal.create(1.0, DoubleType), Cast(Literal.create(1.0, FloatType), DoubleType)))
+      NaNvl(Literal.create(1.0, DoubleType), Literal.create(1.0f, FloatType)),
+      NaNvl(Literal.create(1.0, DoubleType), Cast(Literal.create(1.0f, FloatType), DoubleType)))
     ruleTest(TypeCoercion.FunctionArgumentConversion,
       NaNvl(Literal.create(1.0, DoubleType), Literal.create(1.0, DoubleType)),
       NaNvl(Literal.create(1.0, DoubleType), Literal.create(1.0, DoubleType)))
+    ruleTest(TypeCoercion.FunctionArgumentConversion,
+      NaNvl(Literal.create(1.0f, FloatType), Literal.create(null, NullType)),
+      NaNvl(Literal.create(1.0f, FloatType), Cast(Literal.create(null, NullType), FloatType)))
+    ruleTest(TypeCoercion.FunctionArgumentConversion,
+      NaNvl(Literal.create(1.0, DoubleType), Literal.create(null, NullType)),
+      NaNvl(Literal.create(1.0, DoubleType), Cast(Literal.create(null, NullType), DoubleType)))
   }
 
   test("type coercion for If") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
index f6ab770e87a57..3fbc39142cd29 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
@@ -407,8 +407,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
     val quotedColName = "`" + col.name + "`"
     val colValue = col.dataType match {
       case DoubleType | FloatType =>
-        // nanvl only supports these types
-        nanvl(df.col(quotedColName), lit(null).cast(col.dataType))
+        nanvl(df.col(quotedColName), lit(null)) // nanvl only supports these types
       case _ => df.col(quotedColName)
     }
     coalesce(colValue, lit(replacement).cast(col.dataType)).as(col.name)

From b2970d971b108c519eedb6ad06e6ed16c7386d0c Mon Sep 17 00:00:00 2001
From: Lee Dongjin <dongjin@apache.org>
Date: Wed, 12 Apr 2017 09:12:14 +0100
Subject: [PATCH 0633/1204] [MINOR][DOCS] Fix spacings in Structured Streaming
 Programming Guide

## What changes were proposed in this pull request?

1. Omitted space between the sentences: `... on static data.The Spark SQL engine will ...` -> `... on static data. The Spark SQL engine will ...`
2. Omitted colon in Output Model section.

## How was this patch tested?

None.

Author: Lee Dongjin <dongjin@apache.org>

Closes #17564 from dongjinleekr/feature/fix-programming-guide.

(cherry picked from commit b9384382484a9f5c6b389742e7fdf63865de81c0)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/structured-streaming-programming-guide.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index f73cf93b0cb99..da5c2344a1b18 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -8,7 +8,7 @@ title: Structured Streaming Programming Guide
 {:toc}
 
 # Overview
-Structured Streaming is a scalable and fault-tolerant stream processing engine built on the Spark SQL engine. You can express your streaming computation the same way you would express a batch computation on static data.The Spark SQL engine will take care of running it incrementally and continuously and updating the final result as streaming data continues to arrive. You can use the [Dataset/DataFrame API](sql-programming-guide.html) in Scala, Java or Python to express streaming aggregations, event-time windows, stream-to-batch joins, etc. The computation is executed on the same optimized Spark SQL engine. Finally, the system ensures end-to-end exactly-once fault-tolerance guarantees through checkpointing and Write Ahead Logs. In short, *Structured Streaming provides fast, scalable, fault-tolerant, end-to-end exactly-once stream processing without the user having to reason about streaming.*
+Structured Streaming is a scalable and fault-tolerant stream processing engine built on the Spark SQL engine. You can express your streaming computation the same way you would express a batch computation on static data. The Spark SQL engine will take care of running it incrementally and continuously and updating the final result as streaming data continues to arrive. You can use the [Dataset/DataFrame API](sql-programming-guide.html) in Scala, Java or Python to express streaming aggregations, event-time windows, stream-to-batch joins, etc. The computation is executed on the same optimized Spark SQL engine. Finally, the system ensures end-to-end exactly-once fault-tolerance guarantees through checkpointing and Write Ahead Logs. In short, *Structured Streaming provides fast, scalable, fault-tolerant, end-to-end exactly-once stream processing without the user having to reason about streaming.*
 
 **Structured Streaming is still ALPHA in Spark 2.1** and the APIs are still experimental. In this guide, we are going to walk you through the programming model and the APIs. First, let's start with a simple example - a streaming word count. 
 
@@ -368,7 +368,7 @@ A query on the input will generate the "Result Table". Every trigger interval (s
 
 ![Model](img/structured-streaming-model.png)
 
-The "Output" is defined as what gets written out to the external storage. The output can be defined in different modes 
+The "Output" is defined as what gets written out to the external storage. The output can be defined in a different mode:
 
   - *Complete Mode* - The entire updated Result Table will be written to the external storage. It is up to the storage connector to decide how to handle writing of the entire table. 
 

From dbb6d1b44063925aab79ebe77e4891442a31c42d Mon Sep 17 00:00:00 2001
From: jtoka <jason.tokayer@gmail.com>
Date: Wed, 12 Apr 2017 11:36:08 +0100
Subject: [PATCH 0634/1204] [SPARK-20296][TRIVIAL][DOCS] Count distinct error
 message for streaming

## What changes were proposed in this pull request?
Update count distinct error message for streaming datasets/dataframes to match current behavior. These aggregations are not yet supported, regardless of whether the dataset/dataframe is aggregated.

Author: jtoka <jason.tokayer@gmail.com>

Closes #17609 from jtoka/master.

(cherry picked from commit 2e1fd46e12bf948490ece2caa73d227b6a924a14)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../sql/catalyst/analysis/UnsupportedOperationChecker.scala  | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
index f4d016cb96711..a5eded631fd9b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -103,9 +103,8 @@ object UnsupportedOperationChecker {
           }
           throwErrorIf(
             child.isStreaming && distinctAggExprs.nonEmpty,
-            "Distinct aggregations are not supported on streaming DataFrames/Datasets, unless " +
-              "it is on aggregated DataFrame/Dataset in Complete output mode. Consider using " +
-              "approximate distinct aggregation (e.g. approx_count_distinct() instead of count()).")
+            "Distinct aggregations are not supported on streaming DataFrames/Datasets. Consider " +
+              "using approx_count_distinct() instead.")
 
         case _: Command =>
           throwError("Commands like CreateTable*, AlterTable*, Show* are not supported with " +

From 7e0ddda34ca205f98453ecb2d13132cf8d014641 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Wed, 12 Apr 2017 09:05:05 -0700
Subject: [PATCH 0635/1204] [SPARK-20304][SQL] AssertNotNull should not include
 path in string representation

## What changes were proposed in this pull request?
AssertNotNull's toString/simpleString dumps the entire walkedTypePath. walkedTypePath is used for error message reporting and shouldn't be part of the output.

## How was this patch tested?
Manually tested.

Author: Reynold Xin <rxin@databricks.com>

Closes #17616 from rxin/SPARK-20304.

(cherry picked from commit 540855382c8f139fbf4eb0800b31c7ce91f29c7f)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../apache/spark/sql/catalyst/expressions/objects/objects.scala | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index c5793e16a9e89..256de74d410e4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -936,6 +936,8 @@ case class AssertNotNull(child: Expression, walkedTypePath: Seq[String] = Nil)
   override def foldable: Boolean = false
   override def nullable: Boolean = false
 
+  override def flatArguments: Iterator[Any] = Iterator(child)
+
   private val errMsg = "Null value appeared in non-nullable field:" +
     walkedTypePath.mkString("\n", "\n", "\n") +
     "If the schema is inferred from a Scala tuple/case class, or a Java bean, " +

From ba87a9097a503325abc5a2406c9aebd7c40bcd5c Mon Sep 17 00:00:00 2001
From: Mark Hamstra <markhamstra@gmail.com>
Date: Wed, 12 Apr 2017 11:04:56 -0700
Subject: [PATCH 0636/1204] Removed Hadoop async additions

---
 .../apache/spark/rdd/PairRDDFunctions.scala   | 233 ------------------
 1 file changed, 233 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index 91b979dac91a9..e5ed6a0d7063e 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -1251,239 +1251,6 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
     }
   }
 
-  //////////////////////////////////////////////////
-  // CSD async saveAs(New)HadoopFile extensions   //
-  // code is duplicated from synchronized version //
-  // to maintain binary-compatibility             //
-  //////////////////////////////////////////////////
-
-
-  /**
-   * Output the RDD to any Hadoop-supported file system, using a Hadoop `OutputFormat` class
-   * supporting the key and value types K and V in this RDD.
-   */
-  def saveAsHadoopFileAsync[F <: OutputFormat[K, V]](
-      path: String)(
-      implicit fm: ClassTag[F], executor: ExecutionContext): FutureAction[Unit] = {
-    saveAsHadoopFileAsync(path, keyClass, valueClass, fm.runtimeClass.asInstanceOf[Class[F]])
-  }
-
-  /**
-   * Output the RDD to any Hadoop-supported file system, using a Hadoop `OutputFormat` class
-   * supporting the key and value types K and V in this RDD. Compress the result with the
-   * supplied codec.
-   */
-  def saveAsHadoopFileAsync[F <: OutputFormat[K, V]](
-      path: String,
-      codec: Class[_ <: CompressionCodec])(
-      implicit fm: ClassTag[F], executor: ExecutionContext): FutureAction[Unit] = {
-    val runtimeClass = fm.runtimeClass
-    saveAsHadoopFileAsync(path, keyClass, valueClass, runtimeClass.asInstanceOf[Class[F]], codec)
-  }
-
-  /**
-   * Output the RDD to any Hadoop-supported file system, using a Hadoop `OutputFormat` class
-   * supporting the key and value types K and V in this RDD. Compress with the supplied codec.
-   */
-  def saveAsHadoopFileAsync(
-      path: String,
-      keyClass: Class[_],
-      valueClass: Class[_],
-      outputFormatClass: Class[_ <: OutputFormat[_, _]],
-      codec: Class[_ <: CompressionCodec])(
-      implicit executor: ExecutionContext): FutureAction[Unit] = {
-    saveAsHadoopFileAsync(path, keyClass, valueClass, outputFormatClass,
-      new JobConf(self.context.hadoopConfiguration), Some(codec))
-  }
-
-  /**
-   * Output the RDD to any Hadoop-supported file system, using a Hadoop `OutputFormat` class
-   * supporting the key and value types K and V in this RDD.
-   */
-  def saveAsHadoopFileAsync(
-      path: String,
-      keyClass: Class[_],
-      valueClass: Class[_],
-      outputFormatClass: Class[_ <: OutputFormat[_, _]],
-      conf: JobConf = new JobConf(self.context.hadoopConfiguration),
-      codec: Option[Class[_ <: CompressionCodec]] = None)(
-      implicit executor: ExecutionContext): FutureAction[Unit] = {
-    // Rename this as hadoopConf internally to avoid shadowing (see SPARK-2038).
-    val hadoopConf = conf
-    hadoopConf.setOutputKeyClass(keyClass)
-    hadoopConf.setOutputValueClass(valueClass)
-    // Doesn't work in Scala 2.9 due to what may be a generics bug
-    // TODO: Should we uncomment this for Scala 2.10?
-    // conf.setOutputFormat(outputFormatClass)
-    hadoopConf.set("mapred.output.format.class", outputFormatClass.getName)
-    for (c <- codec) {
-      hadoopConf.setCompressMapOutput(true)
-      hadoopConf.set("mapred.output.compress", "true")
-      hadoopConf.setMapOutputCompressorClass(c)
-      hadoopConf.set("mapred.output.compression.codec", c.getCanonicalName)
-      hadoopConf.set("mapred.output.compression.type", CompressionType.BLOCK.toString)
-    }
-    hadoopConf.setOutputCommitter(classOf[FileOutputCommitter])
-    FileOutputFormat.setOutputPath(hadoopConf,
-      SparkHadoopWriter.createPathFromString(path, hadoopConf))
-    saveAsHadoopDatasetAsync(hadoopConf)
-  }
-
-  /**
-   * Output the RDD to any Hadoop-supported storage system, using a Hadoop JobConf object for
-   * that storage system. The JobConf should set an OutputFormat and any output paths required
-   * (e.g. a table name to write to) in the same way as it would be configured for a Hadoop
-   * MapReduce job.
-   */
-  def saveAsHadoopDatasetAsync(conf: JobConf)(
-      implicit executor: ExecutionContext): FutureAction[Unit] = {
-    // Rename this as hadoopConf internally to avoid shadowing (see SPARK-2038).
-    val hadoopConf = conf
-    val outputFormatInstance = hadoopConf.getOutputFormat
-    val keyClass = hadoopConf.getOutputKeyClass
-    val valueClass = hadoopConf.getOutputValueClass
-    if (outputFormatInstance == null) {
-      throw new SparkException("Output format class not set")
-    }
-    if (keyClass == null) {
-      throw new SparkException("Output key class not set")
-    }
-    if (valueClass == null) {
-      throw new SparkException("Output value class not set")
-    }
-    SparkHadoopUtil.get.addCredentials(hadoopConf)
-
-    logDebug("Saving as hadoop file of type (" + keyClass.getSimpleName + ", " +
-      valueClass.getSimpleName + ")")
-
-    if (self.conf.getBoolean("spark.hadoop.validateOutputSpecs", true)) {
-      // FileOutputFormat ignores the filesystem parameter
-      val ignoredFs = FileSystem.get(hadoopConf)
-      hadoopConf.getOutputFormat.checkOutputSpecs(ignoredFs, hadoopConf)
-    }
-
-    val writer = new SparkHadoopWriter(hadoopConf)
-    writer.preSetup()
-
-    val writeToFile = (context: TaskContext, iter: Iterator[(K, V)]) => {
-      writer.setup(context.stageId, context.partitionId, context.attemptNumber)
-      writer.open()
-      try {
-        var count = 0
-        while (iter.hasNext) {
-          val record = iter.next()
-          count += 1
-          writer.write(record._1.asInstanceOf[AnyRef], record._2.asInstanceOf[AnyRef])
-        }
-      } finally {
-        writer.close()
-      }
-      writer.commit()
-    }
-
-    self.context.submitJobWithTaskContext(
-      self,
-      writeToFile,
-      self.partitions.indices,
-      (_, _: Unit) => {},
-      { writer.commitJob() }
-    )
-  }
-
-  /**
-   * Output the RDD to any Hadoop-supported file system, using a new Hadoop API `OutputFormat`
-   * (mapreduce.OutputFormat) object supporting the key and value types K and V in this RDD.
-   */
-  def saveAsNewAPIHadoopFileAsync[F <: NewOutputFormat[K, V]](
-      path: String)(
-      implicit fm: ClassTag[F]): FutureAction[Unit] = {
-    saveAsNewAPIHadoopFileAsync(path, keyClass, valueClass, fm.runtimeClass.asInstanceOf[Class[F]])
-  }
-
-  /**
-   * Output the RDD to any Hadoop-supported file system, using a new Hadoop API `OutputFormat`
-   * (mapreduce.OutputFormat) object supporting the key and value types K and V in this RDD.
-   */
-  def saveAsNewAPIHadoopFileAsync(
-      path: String,
-      keyClass: Class[_],
-      valueClass: Class[_],
-      outputFormatClass: Class[_ <: NewOutputFormat[_, _]],
-      conf: Configuration = self.context.hadoopConfiguration): FutureAction[Unit] =
-  {
-    // Rename this as hadoopConf internally to avoid shadowing (see SPARK-2038).
-    val hadoopConf = conf
-    val job = NewAPIHadoopJob.getInstance(hadoopConf)
-    job.setOutputKeyClass(keyClass)
-    job.setOutputValueClass(valueClass)
-    job.setOutputFormatClass(outputFormatClass)
-    job.getConfiguration.set("mapred.output.dir", path)
-    saveAsNewAPIHadoopDatasetAsync(job.getConfiguration)
-  }
-
-  /**
-   * Output the RDD to any Hadoop-supported storage system with new Hadoop API, using a Hadoop
-   * Configuration object for that storage system. The Conf should set an OutputFormat and any
-   * output paths required (e.g. a table name to write to) in the same way as it would be
-   * configured for a Hadoop MapReduce job.
-   */
-  def saveAsNewAPIHadoopDatasetAsync(conf: Configuration): FutureAction[Unit] = {
-    // Rename this as hadoopConf internally to avoid shadowing (see SPARK-2038).
-    val hadoopConf = conf
-    val job = NewAPIHadoopJob.getInstance(hadoopConf)
-    val jobConf = job.getConfiguration
-    val formatter = new SimpleDateFormat("yyyyMMddHHmm")
-    val jobtrackerID = formatter.format(new Date())
-    val stageId = self.id
-    val wrappedConf = new SerializableWritable(jobConf)
-    val outfmt = job.getOutputFormatClass
-    val jobFormat = outfmt.newInstance
-
-    if (self.conf.getBoolean("spark.hadoop.validateOutputSpecs", true)) {
-      // FileOutputFormat ignores the filesystem parameter
-      jobFormat.checkOutputSpecs(job)
-    }
-
-    val writeShard = (context: TaskContext, iter: Iterator[(K, V)]) => {
-      val config = wrappedConf.value
-      /* "reduce task" <split #> <attempt # = spark task #> */
-      val attemptId = new TaskAttemptID(jobtrackerID, stageId, TaskType.REDUCE, context.partitionId,
-        context.attemptNumber)
-      val hadoopContext = new TaskAttemptContextImpl(config, attemptId)
-      val format = outfmt.newInstance
-      format match {
-        case c: Configurable => c.setConf(wrappedConf.value)
-        case _ => ()
-      }
-      val committer = format.getOutputCommitter(hadoopContext)
-      committer.setupTask(hadoopContext)
-      val writer = format.getRecordWriter(hadoopContext).asInstanceOf[NewRecordWriter[K, V]]
-      try {
-        while (iter.hasNext) {
-          val pair = iter.next()
-          writer.write(pair._1, pair._2)
-        }
-      } finally {
-        writer.close(hadoopContext)
-      }
-      committer.commitTask(hadoopContext)
-      1
-    } : Int
-
-    val jobAttemptId = new TaskAttemptID(jobtrackerID, stageId, TaskType.MAP, 0, 0)
-    val jobTaskContext = new TaskAttemptContextImpl(wrappedConf.value, jobAttemptId)
-    val jobCommitter = jobFormat.getOutputCommitter(jobTaskContext)
-    jobCommitter.setupJob(jobTaskContext)
-
-    self.context.submitJobWithTaskContext(
-      self,
-      writeShard,
-      self.partitions.indices,
-      (_, _: Int) => {},
-      { jobCommitter.commitJob(jobTaskContext) }
-    )
-  }
-
   /**
    * Return an RDD with the keys of each tuple.
    */

From be36c2f1e41c12d40b3ce4334be962ce926c9299 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 12 Apr 2017 17:44:18 -0700
Subject: [PATCH 0637/1204] [SPARK-20131][CORE] Don't use `this` lock in
 StandaloneSchedulerBackend.stop

## What changes were proposed in this pull request?

`o.a.s.streaming.StreamingContextSuite.SPARK-18560 Receiver data should be deserialized properly` is flaky is because there is a potential dead-lock in StandaloneSchedulerBackend which causes `await` timeout. Here is the related stack trace:
```
"Thread-31" #211 daemon prio=5 os_prio=31 tid=0x00007fedd4808000 nid=0x16403 waiting on condition [0x00007000239b7000]
   java.lang.Thread.State: TIMED_WAITING (parking)
	at sun.misc.Unsafe.park(Native Method)
	- parking to wait for  <0x000000079b49ca10> (a scala.concurrent.impl.Promise$CompletionLatch)
	at java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215)
	at java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedNanos(AbstractQueuedSynchronizer.java:1037)
	at java.util.concurrent.locks.AbstractQueuedSynchronizer.tryAcquireSharedNanos(AbstractQueuedSynchronizer.java:1328)
	at scala.concurrent.impl.Promise$DefaultPromise.tryAwait(Promise.scala:208)
	at scala.concurrent.impl.Promise$DefaultPromise.ready(Promise.scala:218)
	at scala.concurrent.impl.Promise$DefaultPromise.result(Promise.scala:223)
	at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:201)
	at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
	at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:92)
	at org.apache.spark.rpc.RpcEndpointRef.askSync(RpcEndpointRef.scala:76)
	at org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend.stop(CoarseGrainedSchedulerBackend.scala:402)
	at org.apache.spark.scheduler.cluster.StandaloneSchedulerBackend.org$apache$spark$scheduler$cluster$StandaloneSchedulerBackend$$stop(StandaloneSchedulerBackend.scala:213)
	- locked <0x00000007066fca38> (a org.apache.spark.scheduler.cluster.StandaloneSchedulerBackend)
	at org.apache.spark.scheduler.cluster.StandaloneSchedulerBackend.stop(StandaloneSchedulerBackend.scala:116)
	- locked <0x00000007066fca38> (a org.apache.spark.scheduler.cluster.StandaloneSchedulerBackend)
	at org.apache.spark.scheduler.TaskSchedulerImpl.stop(TaskSchedulerImpl.scala:517)
	at org.apache.spark.scheduler.DAGScheduler.stop(DAGScheduler.scala:1657)
	at org.apache.spark.SparkContext$$anonfun$stop$8.apply$mcV$sp(SparkContext.scala:1921)
	at org.apache.spark.util.Utils$.tryLogNonFatalError(Utils.scala:1302)
	at org.apache.spark.SparkContext.stop(SparkContext.scala:1920)
	at org.apache.spark.streaming.StreamingContext.stop(StreamingContext.scala:708)
	at org.apache.spark.streaming.StreamingContextSuite$$anonfun$43$$anonfun$apply$mcV$sp$66$$anon$3.run(StreamingContextSuite.scala:827)

"dispatcher-event-loop-3" #18 daemon prio=5 os_prio=31 tid=0x00007fedd603a000 nid=0x6203 waiting for monitor entry [0x0000700003be4000]
   java.lang.Thread.State: BLOCKED (on object monitor)
	at org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend$DriverEndpoint.org$apache$spark$scheduler$cluster$CoarseGrainedSchedulerBackend$DriverEndpoint$$makeOffers(CoarseGrainedSchedulerBackend.scala:253)
	- waiting to lock <0x00000007066fca38> (a org.apache.spark.scheduler.cluster.StandaloneSchedulerBackend)
	at org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend$DriverEndpoint$$anonfun$receive$1.applyOrElse(CoarseGrainedSchedulerBackend.scala:124)
	at org.apache.spark.rpc.netty.Inbox$$anonfun$process$1.apply$mcV$sp(Inbox.scala:117)
	at org.apache.spark.rpc.netty.Inbox.safelyCall(Inbox.scala:205)
	at org.apache.spark.rpc.netty.Inbox.process(Inbox.scala:101)
	at org.apache.spark.rpc.netty.Dispatcher$MessageLoop.run(Dispatcher.scala:213)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	at java.lang.Thread.run(Thread.java:745)
```

This PR removes `synchronized` and changes `stopping` to AtomicBoolean to ensure idempotent to fix the dead-lock.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #17610 from zsxwing/SPARK-20131.

(cherry picked from commit c5f1cc370f0aa1f0151fd34251607a8de861395e)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../cluster/StandaloneSchedulerBackend.scala  | 33 ++++++++++---------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
index 4a9af80f4537b..6f75a4791e948 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.scheduler.cluster
 
 import java.util.concurrent.Semaphore
+import java.util.concurrent.atomic.AtomicBoolean
 
 import scala.concurrent.Future
 
@@ -42,7 +43,7 @@ private[spark] class StandaloneSchedulerBackend(
   with Logging {
 
   private var client: StandaloneAppClient = null
-  private var stopping = false
+  private val stopping = new AtomicBoolean(false)
   private val launcherBackend = new LauncherBackend() {
     override protected def onStopRequest(): Unit = stop(SparkAppHandle.State.KILLED)
   }
@@ -112,7 +113,7 @@ private[spark] class StandaloneSchedulerBackend(
     launcherBackend.setState(SparkAppHandle.State.RUNNING)
   }
 
-  override def stop(): Unit = synchronized {
+  override def stop(): Unit = {
     stop(SparkAppHandle.State.FINISHED)
   }
 
@@ -125,14 +126,14 @@ private[spark] class StandaloneSchedulerBackend(
 
   override def disconnected() {
     notifyContext()
-    if (!stopping) {
+    if (!stopping.get) {
       logWarning("Disconnected from Spark cluster! Waiting for reconnection...")
     }
   }
 
   override def dead(reason: String) {
     notifyContext()
-    if (!stopping) {
+    if (!stopping.get) {
       launcherBackend.setState(SparkAppHandle.State.KILLED)
       logError("Application has been killed. Reason: " + reason)
       try {
@@ -206,20 +207,20 @@ private[spark] class StandaloneSchedulerBackend(
     registrationBarrier.release()
   }
 
-  private def stop(finalState: SparkAppHandle.State): Unit = synchronized {
-    try {
-      stopping = true
-
-      super.stop()
-      client.stop()
+  private def stop(finalState: SparkAppHandle.State): Unit = {
+    if (stopping.compareAndSet(false, true)) {
+      try {
+        super.stop()
+        client.stop()
 
-      val callback = shutdownCallback
-      if (callback != null) {
-        callback(this)
+        val callback = shutdownCallback
+        if (callback != null) {
+          callback(this)
+        }
+      } finally {
+        launcherBackend.setState(finalState)
+        launcherBackend.close()
       }
-    } finally {
-      launcherBackend.setState(finalState)
-      launcherBackend.close()
     }
   }
 

From 98ae54810f26d28e214f4275ac69843f3a676dff Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Thu, 13 Apr 2017 19:18:55 +0800
Subject: [PATCH 0638/1204] [SPARK-19924][SQL][BACKPORT-2.1] Handle
 InvocationTargetException for all Hive Shim

### What changes were proposed in this pull request?

This is to backport the PR https://github.com/apache/spark/pull/17265 to Spark 2.1 branch.

---
Since we are using shim for most Hive metastore APIs, the exceptions thrown by the underlying method of Method.invoke() are wrapped by `InvocationTargetException`. Instead of doing it one by one, we should handle all of them in the `withClient`. If any of them is missing, the error message could looks unfriendly. For example, below is an example for dropping tables.

```
Expected exception org.apache.spark.sql.AnalysisException to be thrown, but java.lang.reflect.InvocationTargetException was thrown.
ScalaTestFailureLocation: org.apache.spark.sql.catalyst.catalog.ExternalCatalogSuite$$anonfun$14 at (ExternalCatalogSuite.scala:193)
org.scalatest.exceptions.TestFailedException: Expected exception org.apache.spark.sql.AnalysisException to be thrown, but java.lang.reflect.InvocationTargetException was thrown.
	at org.scalatest.Assertions$class.newAssertionFailedException(Assertions.scala:496)
	at org.scalatest.FunSuite.newAssertionFailedException(FunSuite.scala:1555)
	at org.scalatest.Assertions$class.intercept(Assertions.scala:1004)
	at org.scalatest.FunSuite.intercept(FunSuite.scala:1555)
	at org.apache.spark.sql.catalyst.catalog.ExternalCatalogSuite$$anonfun$14.apply$mcV$sp(ExternalCatalogSuite.scala:193)
	at org.apache.spark.sql.catalyst.catalog.ExternalCatalogSuite$$anonfun$14.apply(ExternalCatalogSuite.scala:183)
	at org.apache.spark.sql.catalyst.catalog.ExternalCatalogSuite$$anonfun$14.apply(ExternalCatalogSuite.scala:183)
	at org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22)
	at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85)
	at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
	at org.scalatest.Transformer.apply(Transformer.scala:22)
	at org.scalatest.Transformer.apply(Transformer.scala:20)
	at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:166)
	at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:68)
	at org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:163)
	at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175)
	at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175)
	at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306)
	at org.scalatest.FunSuiteLike$class.runTest(FunSuiteLike.scala:175)
	at org.apache.spark.sql.catalyst.catalog.ExternalCatalogSuite.org$scalatest$BeforeAndAfterEach$$super$runTest(ExternalCatalogSuite.scala:40)
	at org.scalatest.BeforeAndAfterEach$class.runTest(BeforeAndAfterEach.scala:255)
	at org.apache.spark.sql.catalyst.catalog.ExternalCatalogSuite.runTest(ExternalCatalogSuite.scala:40)
	at org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208)
	at org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208)
	at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:413)
	at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:401)
	at scala.collection.immutable.List.foreach(List.scala:381)
	at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401)
	at org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:396)
	at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:483)
	at org.scalatest.FunSuiteLike$class.runTests(FunSuiteLike.scala:208)
	at org.scalatest.FunSuite.runTests(FunSuite.scala:1555)
	at org.scalatest.Suite$class.run(Suite.scala:1424)
	at org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1555)
	at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212)
	at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212)
	at org.scalatest.SuperEngine.runImpl(Engine.scala:545)
	at org.scalatest.FunSuiteLike$class.run(FunSuiteLike.scala:212)
	at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:31)
	at org.scalatest.BeforeAndAfterAll$class.liftedTree1$1(BeforeAndAfterAll.scala:257)
	at org.scalatest.BeforeAndAfterAll$class.run(BeforeAndAfterAll.scala:256)
	at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:31)
	at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:55)
	at org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$3.apply(Runner.scala:2563)
	at org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$3.apply(Runner.scala:2557)
	at scala.collection.immutable.List.foreach(List.scala:381)
	at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:2557)
	at org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1044)
	at org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1043)
	at org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:2722)
	at org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:1043)
	at org.scalatest.tools.Runner$.run(Runner.scala:883)
	at org.scalatest.tools.Runner.run(Runner.scala)
	at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2(ScalaTestRunner.java:138)
	at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:28)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at com.intellij.rt.execution.application.AppMain.main(AppMain.java:147)
Caused by: java.lang.reflect.InvocationTargetException
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at org.apache.spark.sql.hive.client.Shim_v0_14.dropTable(HiveShim.scala:736)
	at org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$dropTable$1.apply$mcV$sp(HiveClientImpl.scala:451)
	at org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$dropTable$1.apply(HiveClientImpl.scala:451)
	at org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$dropTable$1.apply(HiveClientImpl.scala:451)
	at org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$withHiveState$1.apply(HiveClientImpl.scala:287)
	at org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:228)
	at org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:227)
	at org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:270)
	at org.apache.spark.sql.hive.client.HiveClientImpl.dropTable(HiveClientImpl.scala:450)
	at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$dropTable$1.apply$mcV$sp(HiveExternalCatalog.scala:456)
	at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$dropTable$1.apply(HiveExternalCatalog.scala:454)
	at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$dropTable$1.apply(HiveExternalCatalog.scala:454)
	at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:94)
	at org.apache.spark.sql.hive.HiveExternalCatalog.dropTable(HiveExternalCatalog.scala:454)
	at org.apache.spark.sql.catalyst.catalog.ExternalCatalogSuite$$anonfun$14$$anonfun$apply$mcV$sp$8.apply$mcV$sp(ExternalCatalogSuite.scala:194)
	at org.apache.spark.sql.catalyst.catalog.ExternalCatalogSuite$$anonfun$14$$anonfun$apply$mcV$sp$8.apply(ExternalCatalogSuite.scala:194)
	at org.apache.spark.sql.catalyst.catalog.ExternalCatalogSuite$$anonfun$14$$anonfun$apply$mcV$sp$8.apply(ExternalCatalogSuite.scala:194)
	at org.scalatest.Assertions$class.intercept(Assertions.scala:997)
	... 57 more
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: NoSuchObjectException(message:db2.unknown_table table not found)
	at org.apache.hadoop.hive.ql.metadata.Hive.dropTable(Hive.java:1038)
	... 79 more
Caused by: NoSuchObjectException(message:db2.unknown_table table not found)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.get_table_core(HiveMetaStore.java:1808)
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.get_table(HiveMetaStore.java:1778)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at org.apache.hadoop.hive.metastore.RetryingHMSHandler.invoke(RetryingHMSHandler.java:107)
	at com.sun.proxy.$Proxy10.get_table(Unknown Source)
	at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.getTable(HiveMetaStoreClient.java:1208)
	at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.getTable(SessionHiveMetaStoreClient.java:131)
	at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.dropTable(HiveMetaStoreClient.java:952)
	at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.dropTable(HiveMetaStoreClient.java:904)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:156)
	at com.sun.proxy.$Proxy11.dropTable(Unknown Source)
	at org.apache.hadoop.hive.ql.metadata.Hive.dropTable(Hive.java:1035)
	... 79 more
```

After unwrapping the exception, the message is like
```
org.apache.hadoop.hive.ql.metadata.HiveException: NoSuchObjectException(message:db2.unknown_table table not found);
org.apache.spark.sql.AnalysisException: org.apache.hadoop.hive.ql.metadata.HiveException: NoSuchObjectException(message:db2.unknown_table table not found);
	at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:100)
	at org.apache.spark.sql.hive.HiveExternalCatalog.dropTable(HiveExternalCatalog.scala:460)
	at org.apache.spark.sql.catalyst.catalog.ExternalCatalogSuite$$anonfun$14.apply$mcV$sp(ExternalCatalogSuite.scala:193)
	at org.apache.spark.sql.catalyst.catalog.ExternalCatalogSuite$$anonfun$14.apply(ExternalCatalogSuite.scala:183)
	at org.apache.spark.sql.catalyst.catalog.ExternalCatalogSuite$$anonfun$14.apply(ExternalCatalogSuite.scala:183)
	at org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22)
...
```
### How was this patch tested?
N/A

Author: Xiao Li <gatorsmile@gmail.com>

Closes #17627 from gatorsmile/backport-17265.
---
 .../spark/sql/hive/HiveExternalCatalog.scala       | 12 ++++++++++--
 .../apache/spark/sql/hive/client/HiveShim.scala    | 14 +++-----------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 2f0feee0efa8c..23777f2fe35ad 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.hive
 
 import java.io.IOException
+import java.lang.reflect.InvocationTargetException
 import java.net.URI
 import java.util
 
@@ -68,7 +69,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
   // Exceptions thrown by the hive client that we would like to wrap
   private val clientExceptions = Set(
     classOf[HiveException].getCanonicalName,
-    classOf[TException].getCanonicalName)
+    classOf[TException].getCanonicalName,
+    classOf[InvocationTargetException].getCanonicalName)
 
   /**
    * Whether this is an exception thrown by the hive client that should be wrapped.
@@ -94,7 +96,13 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     try {
       body
     } catch {
-      case NonFatal(e) if isClientException(e) =>
+      case NonFatal(exception) if isClientException(exception) =>
+        val e = exception match {
+          // Since we are using shim, the exceptions thrown by the underlying method of
+          // Method.invoke() are wrapped by InvocationTargetException
+          case i: InvocationTargetException => i.getCause
+          case o => o
+        }
         throw new AnalysisException(
           e.getClass.getCanonicalName + ": " + e.getMessage, cause = Some(e))
     }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index dd8e5c6da08c9..64be1ed96da6b 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -749,12 +749,8 @@ private[client] class Shim_v0_14 extends Shim_v0_13 {
       deleteData: Boolean,
       ignoreIfNotExists: Boolean,
       purge: Boolean): Unit = {
-    try {
-      dropTableMethod.invoke(hive, dbName, tableName, deleteData: JBoolean,
-        ignoreIfNotExists: JBoolean, purge: JBoolean)
-    } catch {
-      case e: InvocationTargetException => throw e.getCause()
-    }
+    dropTableMethod.invoke(hive, dbName, tableName, deleteData: JBoolean,
+      ignoreIfNotExists: JBoolean, purge: JBoolean)
   }
 
   override def getMetastoreClientConnectRetryDelayMillis(conf: HiveConf): Long = {
@@ -847,11 +843,7 @@ private[client] class Shim_v1_2 extends Shim_v1_1 {
     val dropOptions = dropOptionsClass.newInstance().asInstanceOf[Object]
     dropOptionsDeleteData.setBoolean(dropOptions, deleteData)
     dropOptionsPurge.setBoolean(dropOptions, purge)
-    try {
-      dropPartitionMethod.invoke(hive, dbName, tableName, part, dropOptions)
-    } catch {
-      case e: InvocationTargetException => throw e.getCause()
-    }
+    dropPartitionMethod.invoke(hive, dbName, tableName, part, dropOptions)
   }
 
 }

From bca7ce2851afc330a8cd3d68b63d331364f5135b Mon Sep 17 00:00:00 2001
From: Bogdan Raducanu <bogdan@databricks.com>
Date: Thu, 13 Apr 2017 20:21:58 +0200
Subject: [PATCH 0639/1204] [SPARK-19946][TESTS][BACKPORT-2.1]
 DebugFilesystem.assertNoOpenStreams should report the open streams to help
 debugging

## What changes were proposed in this pull request?
Backport for PR #17292
DebugFilesystem.assertNoOpenStreams throws an exception with a cause exception that actually shows the code line which leaked the stream.

## How was this patch tested?
New test in SparkContextSuite to check there is a cause exception.

Author: Bogdan Raducanu <bogdan@databricks.com>

Closes #17632 from bogdanrdc/SPARK-19946-BRANCH2.1.
---
 .../org/apache/spark/DebugFilesystem.scala    |  3 ++-
 .../org/apache/spark/SparkContextSuite.scala  | 20 ++++++++++++++++++-
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/DebugFilesystem.scala b/core/src/test/scala/org/apache/spark/DebugFilesystem.scala
index fb8d701ebda8a..72aea841117cc 100644
--- a/core/src/test/scala/org/apache/spark/DebugFilesystem.scala
+++ b/core/src/test/scala/org/apache/spark/DebugFilesystem.scala
@@ -44,7 +44,8 @@ object DebugFilesystem extends Logging {
         logWarning("Leaked filesystem connection created at:")
         exc.printStackTrace()
       }
-      throw new RuntimeException(s"There are $numOpen possibly leaked file streams.")
+      throw new IllegalStateException(s"There are $numOpen possibly leaked file streams.",
+        openStreams.values().asScala.head)
     }
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
index a2d25d25009f8..619b30e3e477d 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark
 
 import java.io.File
-import java.net.MalformedURLException
+import java.net.{MalformedURLException, URI}
 import java.nio.charset.StandardCharsets
 import java.util.concurrent.TimeUnit
 
@@ -26,6 +26,8 @@ import scala.concurrent.Await
 import scala.concurrent.duration.Duration
 
 import com.google.common.io.Files
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hadoop.io.{BytesWritable, LongWritable, Text}
 import org.apache.hadoop.mapred.TextInputFormat
 import org.apache.hadoop.mapreduce.lib.input.{TextInputFormat => NewTextInputFormat}
@@ -467,4 +469,20 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext {
       sc.stop()
     }
   }
+
+  test("SPARK-19446: DebugFilesystem.assertNoOpenStreams should report " +
+    "open streams to help debugging") {
+    val fs = new DebugFilesystem()
+    fs.initialize(new URI("file:///"), new Configuration())
+    val file = File.createTempFile("SPARK19446", "temp")
+    Files.write(Array.ofDim[Byte](1000), file)
+    val path = new Path("file:///" + file.getCanonicalPath)
+    val stream = fs.open(path)
+    val exc = intercept[RuntimeException] {
+      DebugFilesystem.assertNoOpenStreams()
+    }
+    assert(exc != null)
+    assert(exc.getCause() != null)
+    stream.close()
+  }
 }

From 6f715c01dd09db52866fd93ff49eb206d157f8c3 Mon Sep 17 00:00:00 2001
From: Bogdan Raducanu <bogdan@databricks.com>
Date: Mon, 10 Apr 2017 17:34:15 +0200
Subject: [PATCH 0640/1204] [SPARK-20243][TESTS]
 DebugFilesystem.assertNoOpenStreams thread race

## What changes were proposed in this pull request?

Synchronize access to openStreams map.

## How was this patch tested?

Existing tests.

Author: Bogdan Raducanu <bogdan@databricks.com>

Closes #17592 from bogdanrdc/SPARK-20243.
---
 .../org/apache/spark/DebugFilesystem.scala    | 26 ++++++++++++-------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/DebugFilesystem.scala b/core/src/test/scala/org/apache/spark/DebugFilesystem.scala
index 72aea841117cc..91355f7362900 100644
--- a/core/src/test/scala/org/apache/spark/DebugFilesystem.scala
+++ b/core/src/test/scala/org/apache/spark/DebugFilesystem.scala
@@ -20,7 +20,6 @@ package org.apache.spark
 import java.io.{FileDescriptor, InputStream}
 import java.lang
 import java.nio.ByteBuffer
-import java.util.concurrent.ConcurrentHashMap
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
@@ -31,21 +30,29 @@ import org.apache.spark.internal.Logging
 
 object DebugFilesystem extends Logging {
   // Stores the set of active streams and their creation sites.
-  private val openStreams = new ConcurrentHashMap[FSDataInputStream, Throwable]()
+  private val openStreams = mutable.Map.empty[FSDataInputStream, Throwable]
 
-  def clearOpenStreams(): Unit = {
+  def addOpenStream(stream: FSDataInputStream): Unit = openStreams.synchronized {
+    openStreams.put(stream, new Throwable())
+  }
+
+  def clearOpenStreams(): Unit = openStreams.synchronized {
     openStreams.clear()
   }
 
-  def assertNoOpenStreams(): Unit = {
-    val numOpen = openStreams.size()
+  def removeOpenStream(stream: FSDataInputStream): Unit = openStreams.synchronized {
+    openStreams.remove(stream)
+  }
+
+  def assertNoOpenStreams(): Unit = openStreams.synchronized {
+    val numOpen = openStreams.values.size
     if (numOpen > 0) {
-      for (exc <- openStreams.values().asScala) {
+      for (exc <- openStreams.values) {
         logWarning("Leaked filesystem connection created at:")
         exc.printStackTrace()
       }
       throw new IllegalStateException(s"There are $numOpen possibly leaked file streams.",
-        openStreams.values().asScala.head)
+        openStreams.values.head)
     }
   }
 }
@@ -60,8 +67,7 @@ class DebugFilesystem extends LocalFileSystem {
 
   override def open(f: Path, bufferSize: Int): FSDataInputStream = {
     val wrapped: FSDataInputStream = super.open(f, bufferSize)
-    openStreams.put(wrapped, new Throwable())
-
+    addOpenStream(wrapped)
     new FSDataInputStream(wrapped.getWrappedStream) {
       override def setDropBehind(dropBehind: lang.Boolean): Unit = wrapped.setDropBehind(dropBehind)
 
@@ -98,7 +104,7 @@ class DebugFilesystem extends LocalFileSystem {
 
       override def close(): Unit = {
         wrapped.close()
-        openStreams.remove(wrapped)
+        removeOpenStream(wrapped)
       }
 
       override def read(): Int = wrapped.read()

From 2ed19cff2f6ab79a718526e5d16633412d8c4dd4 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Fri, 14 Apr 2017 15:37:43 -0700
Subject: [PATCH 0641/1204] Preparing Spark release v2.1.1-rc3

---
 R/pkg/DESCRIPTION                         | 2 +-
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 4 ++--
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/java8-tests/pom.xml              | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mesos/pom.xml                             | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 yarn/pom.xml                              | 2 +-
 39 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 2d461ca68920b..1ceda7ba024c0 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 2.1.2
+Version: 2.1.1
 Title: R Frontend for Apache Spark
 Description: The SparkR package provides an R Frontend for Apache Spark.
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 6e092ef8928b6..cc290c03c9dfa 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 77a4b64e8da9d..ccf4b27b34a69 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 1a2d85a2ead6b..98a23249cc192 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 7a57e8964f6fc..dc1ad144dee6f 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index ff2d5c52730bf..250b69699332e 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index b9bf0342eb600..0697ed625b261 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index f8a0e577777ee..cedae5fc279cc 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index bad3655452fb4..28c4f95afe198 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index e21d011c4f83f..75f48a59ab152 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,8 +14,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.1.2-SNAPSHOT
-SPARK_VERSION_SHORT: 2.1.2
+SPARK_VERSION: 2.1.1
+SPARK_VERSION_SHORT: 2.1.1
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.7"
 MESOS_VERSION: 1.0.0
diff --git a/examples/pom.xml b/examples/pom.xml
index 8fa731fb340a2..72ee896f76238 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 2cf0b41ee3544..ac407dd48beb0 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 6ea318bf4af6e..92992e2f7081c 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index de3d17e9b9c05..7e0423a44b141 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 9361fdac11c5b..e1b86cec49c43 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/java8-tests/pom.xml b/external/java8-tests/pom.xml
index f73e4f0aabc29..8b0583a861e43 100644
--- a/external/java8-tests/pom.xml
+++ b/external/java8-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index 66a679661f1d3..1ca601e765a75 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index c84c0408f483a..7ae63a5fa5654 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index 961b80df50c5a..7a84764798244 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index e56ed102ac89a..9bf41c5cfc2af 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index e260e434f8dd3..940112f641b06 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index 72e14f58e38f9..e3305e91591b4 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index 182f963cdd03c..7610fad9f29e0 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index d6ba472a1fc99..2fd4fd53d1aac 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 87e34b8a4b00e..ac6692194a79b 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index db4b15b10499e..3917251515d37 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mesos/pom.xml b/mesos/pom.xml
index 262316a193cac..6d84d45f3be8f 100644
--- a/mesos/pom.xml
+++ b/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index dae5b86d5fcb8..01a4b86121ebe 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index be87ad2d1994b..44f189cb8c063 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index a66156c9050a2..a985cf011de4b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.1.2-SNAPSHOT</version>
+  <version>2.1.1</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 4447e3d9c7616..96b5e44bb320a 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.1.2.dev0"
+__version__ = "2.1.1"
diff --git a/repl/pom.xml b/repl/pom.xml
index 2cefaa191afdc..12142c89db7f9 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 4b4a8eb3815e1..53d961d70038b 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index a03a9593e8520..c11710f4dfd6e 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 1abc0a253098c..f7ea320c74ae7 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index b62f800277cee..fb61f1495df01 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 644fc50bf507b..ddad02f2bffed 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 11b58afdcac7a..193c0c5881715 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/yarn/pom.xml b/yarn/pom.xml
index e21df4ec1dc53..1933a0ebccf5d 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 2a3e50e24b1c99bb12cd42d4c648213852dd26bf Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Fri, 14 Apr 2017 15:37:47 -0700
Subject: [PATCH 0642/1204] Preparing development version 2.1.2-SNAPSHOT

---
 R/pkg/DESCRIPTION                         | 2 +-
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 4 ++--
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/java8-tests/pom.xml              | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mesos/pom.xml                             | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 yarn/pom.xml                              | 2 +-
 39 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 1ceda7ba024c0..2d461ca68920b 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 2.1.1
+Version: 2.1.2
 Title: R Frontend for Apache Spark
 Description: The SparkR package provides an R Frontend for Apache Spark.
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
diff --git a/assembly/pom.xml b/assembly/pom.xml
index cc290c03c9dfa..6e092ef8928b6 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index ccf4b27b34a69..77a4b64e8da9d 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 98a23249cc192..1a2d85a2ead6b 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index dc1ad144dee6f..7a57e8964f6fc 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 250b69699332e..ff2d5c52730bf 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 0697ed625b261..b9bf0342eb600 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index cedae5fc279cc..f8a0e577777ee 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index 28c4f95afe198..bad3655452fb4 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 75f48a59ab152..e21d011c4f83f 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,8 +14,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.1.1
-SPARK_VERSION_SHORT: 2.1.1
+SPARK_VERSION: 2.1.2-SNAPSHOT
+SPARK_VERSION_SHORT: 2.1.2
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.7"
 MESOS_VERSION: 1.0.0
diff --git a/examples/pom.xml b/examples/pom.xml
index 72ee896f76238..8fa731fb340a2 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index ac407dd48beb0..2cf0b41ee3544 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 92992e2f7081c..6ea318bf4af6e 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 7e0423a44b141..de3d17e9b9c05 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index e1b86cec49c43..9361fdac11c5b 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/java8-tests/pom.xml b/external/java8-tests/pom.xml
index 8b0583a861e43..f73e4f0aabc29 100644
--- a/external/java8-tests/pom.xml
+++ b/external/java8-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index 1ca601e765a75..66a679661f1d3 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index 7ae63a5fa5654..c84c0408f483a 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index 7a84764798244..961b80df50c5a 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index 9bf41c5cfc2af..e56ed102ac89a 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index 940112f641b06..e260e434f8dd3 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index e3305e91591b4..72e14f58e38f9 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index 7610fad9f29e0..182f963cdd03c 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 2fd4fd53d1aac..d6ba472a1fc99 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index ac6692194a79b..87e34b8a4b00e 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index 3917251515d37..db4b15b10499e 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mesos/pom.xml b/mesos/pom.xml
index 6d84d45f3be8f..262316a193cac 100644
--- a/mesos/pom.xml
+++ b/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 01a4b86121ebe..dae5b86d5fcb8 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 44f189cb8c063..be87ad2d1994b 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index a985cf011de4b..a66156c9050a2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.1.1</version>
+  <version>2.1.2-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 96b5e44bb320a..4447e3d9c7616 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.1.1"
+__version__ = "2.1.2.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 12142c89db7f9..2cefaa191afdc 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 53d961d70038b..4b4a8eb3815e1 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index c11710f4dfd6e..a03a9593e8520 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index f7ea320c74ae7..1abc0a253098c 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index fb61f1495df01..b62f800277cee 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index ddad02f2bffed..644fc50bf507b 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 193c0c5881715..11b58afdcac7a 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 1933a0ebccf5d..e21df4ec1dc53 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From efa11a42f0c34dcfaf4a1bf17055539c43c8e4f9 Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Mon, 17 Apr 2017 15:59:55 +0800
Subject: [PATCH 0643/1204] [SPARK-20335][SQL][BACKPORT-2.1] Children
 expressions of Hive UDF impacts the determinism of Hive UDF

### What changes were proposed in this pull request?

This PR is to backport https://github.com/apache/spark/pull/17635 to Spark 2.1

---
```JAVA
  /**
   * Certain optimizations should not be applied if UDF is not deterministic.
   * Deterministic UDF returns same result each time it is invoked with a
   * particular input. This determinism just needs to hold within the context of
   * a query.
   *
   * return true if the UDF is deterministic
   */
  boolean deterministic() default true;
```

Based on the definition of [UDFType](https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFType.java#L42-L50), when Hive UDF's children are non-deterministic, Hive UDF is also non-deterministic.

### How was this patch tested?
Added test cases.

Author: Xiao Li <gatorsmile@gmail.com>

Closes #17652 from gatorsmile/backport-17635.
---
 .../org/apache/spark/sql/hive/hiveUDFs.scala  |  4 +--
 .../execution/AggregationQuerySuite.scala     | 13 ++++++++
 .../sql/hive/execution/HiveUDFSuite.scala     | 30 +++++++++++++++++++
 3 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
index 37414ad12934d..3e46b7461358a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
@@ -42,7 +42,7 @@ private[hive] case class HiveSimpleUDF(
     name: String, funcWrapper: HiveFunctionWrapper, children: Seq[Expression])
   extends Expression with HiveInspectors with CodegenFallback with Logging {
 
-  override def deterministic: Boolean = isUDFDeterministic
+  override def deterministic: Boolean = isUDFDeterministic && children.forall(_.deterministic)
 
   override def nullable: Boolean = true
 
@@ -120,7 +120,7 @@ private[hive] case class HiveGenericUDF(
 
   override def nullable: Boolean = true
 
-  override def deterministic: Boolean = isUDFDeterministic
+  override def deterministic: Boolean = isUDFDeterministic && children.forall(_.deterministic)
 
   override def foldable: Boolean =
     isUDFDeterministic && returnInspector.isInstanceOf[ConstantObjectInspector]
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
index 4a8086d7e5400..84f915977bd88 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
@@ -509,6 +509,19 @@ abstract class AggregationQuerySuite extends QueryTest with SQLTestUtils with Te
         Row(null, null, 110.0, null, null, 10.0) :: Nil)
   }
 
+  test("non-deterministic children expressions of UDAF") {
+    val e = intercept[AnalysisException] {
+      spark.sql(
+        """
+          |SELECT mydoublesum(value + 1.5 * key + rand())
+          |FROM agg1
+          |GROUP BY key
+        """.stripMargin)
+    }.getMessage
+    assert(Seq("nondeterministic expression",
+      "should not appear in the arguments of an aggregate function").forall(e.contains))
+  }
+
   test("interpreted aggregate function") {
     checkAnswer(
       spark.sql(
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
index 4098bb597bdee..78c80dacb9faf 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
@@ -31,6 +31,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn
 import org.apache.hadoop.io.{LongWritable, Writable}
 
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.catalyst.plans.logical.Project
 import org.apache.spark.sql.functions.max
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.test.SQLTestUtils
@@ -338,6 +339,35 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
     hiveContext.reset()
   }
 
+  test("non-deterministic children of UDF") {
+    withUserDefinedFunction("testStringStringUDF" -> true, "testGenericUDFHash" -> true) {
+      // HiveSimpleUDF
+      sql(s"CREATE TEMPORARY FUNCTION testStringStringUDF AS '${classOf[UDFStringString].getName}'")
+      val df1 = sql("SELECT testStringStringUDF(rand(), \"hello\")")
+      assert(!df1.logicalPlan.asInstanceOf[Project].projectList.forall(_.deterministic))
+
+      // HiveGenericUDF
+      sql(s"CREATE TEMPORARY FUNCTION testGenericUDFHash AS '${classOf[GenericUDFHash].getName}'")
+      val df2 = sql("SELECT testGenericUDFHash(rand())")
+      assert(!df2.logicalPlan.asInstanceOf[Project].projectList.forall(_.deterministic))
+    }
+  }
+
+  test("non-deterministic children expressions of UDAF") {
+    withTempView("view1") {
+      spark.range(1).selectExpr("id as x", "id as y").createTempView("view1")
+      withUserDefinedFunction("testUDAFPercentile" -> true) {
+        // non-deterministic children of Hive UDAF
+        sql(s"CREATE TEMPORARY FUNCTION testUDAFPercentile AS '${classOf[UDAFPercentile].getName}'")
+        val e1 = intercept[AnalysisException] {
+          sql("SELECT testUDAFPercentile(x, rand()) from view1 group by y")
+        }.getMessage
+        assert(Seq("nondeterministic expression",
+          "should not appear in the arguments of an aggregate function").forall(e1.contains))
+      }
+    }
+  }
+
   test("Hive UDFs with insufficient number of input arguments should trigger an analysis error") {
     Seq((1, 2)).toDF("a", "b").createOrReplaceTempView("testUDF")
 

From 7aad057b00db240515692d5c07e67ee58f6b95d3 Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Mon, 17 Apr 2017 09:50:20 -0700
Subject: [PATCH 0644/1204] [SPARK-20349][SQL] ListFunctions returns duplicate
 functions after using persistent functions

### What changes were proposed in this pull request?
The session catalog caches some persistent functions in the `FunctionRegistry`, so there can be duplicates. Our Catalog API `listFunctions` does not handle it.

It would be better if `SessionCatalog` API can de-duplciate the records, instead of doing it by each API caller. In `FunctionRegistry`, our functions are identified by the unquoted string. Thus, this PR is try to parse it using our parser interface and then de-duplicate the names.

### How was this patch tested?
Added test cases.

Author: Xiao Li <gatorsmile@gmail.com>

Closes #17646 from gatorsmile/showFunctions.

(cherry picked from commit 01ff0350a85b179715946c3bd4f003db7c5e3641)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../sql/catalyst/catalog/SessionCatalog.scala | 21 ++++++++++++++-----
 .../sql/execution/command/functions.scala     |  4 +---
 .../sql/hive/execution/HiveUDFSuite.scala     | 17 +++++++++++++++
 3 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index a5cf7196b21e3..6f302d3d02509 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.catalog
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.mutable
+import scala.util.{Failure, Success, Try}
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
@@ -1098,15 +1099,25 @@ class SessionCatalog(
   def listFunctions(db: String, pattern: String): Seq[(FunctionIdentifier, String)] = {
     val dbName = formatDatabaseName(db)
     requireDbExists(dbName)
-    val dbFunctions = externalCatalog.listFunctions(dbName, pattern)
-      .map { f => FunctionIdentifier(f, Some(dbName)) }
-    val loadedFunctions = StringUtils.filterPattern(functionRegistry.listFunction(), pattern)
-      .map { f => FunctionIdentifier(f) }
+    val dbFunctions = externalCatalog.listFunctions(dbName, pattern).map { f =>
+      FunctionIdentifier(f, Some(dbName)) }
+    val loadedFunctions =
+      StringUtils.filterPattern(functionRegistry.listFunction(), pattern).map { f =>
+        // In functionRegistry, function names are stored as an unquoted format.
+        Try(parser.parseFunctionIdentifier(f)) match {
+          case Success(e) => e
+          case Failure(_) =>
+            // The names of some built-in functions are not parsable by our parser, e.g., %
+            FunctionIdentifier(f)
+        }
+      }
     val functions = dbFunctions ++ loadedFunctions
+    // The session catalog caches some persistent functions in the FunctionRegistry
+    // so there can be duplicates.
     functions.map {
       case f if FunctionRegistry.functionSet.contains(f.funcName) => (f, "SYSTEM")
       case f => (f, "USER")
-    }
+    }.distinct
   }
 
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
index ea5398761c46d..75272d295b164 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
@@ -208,8 +208,6 @@ case class ShowFunctionsCommand(
           case (f, "USER") if showUserFunctions => f.unquotedString
           case (f, "SYSTEM") if showSystemFunctions => f.unquotedString
         }
-    // The session catalog caches some persistent functions in the FunctionRegistry
-    // so there can be duplicates.
-    functionNames.distinct.sorted.map(Row(_))
+    functionNames.sorted.map(Row(_))
   }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
index 78c80dacb9faf..9368d0ba8ef6e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
@@ -539,6 +539,23 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
       checkAnswer(testData.selectExpr("statelessUDF() as s").agg(max($"s")), Row(1))
     }
   }
+
+  test("Show persistent functions") {
+    val testData = spark.sparkContext.parallelize(StringCaseClass("") :: Nil).toDF()
+    withTempView("inputTable") {
+      testData.createOrReplaceTempView("inputTable")
+      withUserDefinedFunction("testUDFToListInt" -> false) {
+        val numFunc = spark.catalog.listFunctions().count()
+        sql(s"CREATE FUNCTION testUDFToListInt AS '${classOf[UDFToListInt].getName}'")
+        assert(spark.catalog.listFunctions().count() == numFunc + 1)
+        checkAnswer(
+          sql("SELECT testUDFToListInt(s) FROM inputTable"),
+          Seq(Row(Seq(1, 2, 3))))
+        assert(sql("show functions").count() == numFunc + 1)
+        assert(spark.catalog.listFunctions().count() == numFunc + 1)
+      }
+    }
+  }
 }
 
 class TestPair(x: Int, y: Int) extends Writable with Serializable {

From db9517c1661935e88fe9c5d27874d718c928d5d6 Mon Sep 17 00:00:00 2001
From: Jakob Odersky <jakob@odersky.com>
Date: Mon, 17 Apr 2017 11:17:57 -0700
Subject: [PATCH 0645/1204] [SPARK-17647][SQL] Fix backslash escaping in 'LIKE'
 patterns.

This patch fixes a bug in the way LIKE patterns are translated to Java regexes. The bug causes any character following an escaped backslash to be escaped, i.e. there is double-escaping.
A concrete example is the following pattern:`'%\\%'`. The expected Java regex that this pattern should correspond to (according to the behavior described below) is `'.*\\.*'`, however the current situation leads to `'.*\\%'` instead.

---

Update: in light of the discussion that ensued, we should explicitly define the expected behaviour of LIKE expressions, especially in certain edge cases. With the help of gatorsmile, we put together a list of different RDBMS and their variations wrt to certain standard features.

| RDBMS\Features | Wildcards | Default escape [1] | Case sensitivity |
| --- | --- | --- | --- |
| [MS SQL Server](https://msdn.microsoft.com/en-us/library/ms179859.aspx) | _, %, [], [^] | none | no |
| [Oracle](https://docs.oracle.com/cd/B12037_01/server.101/b10759/conditions016.htm) | _, % | none | yes |
| [DB2 z/OS](http://www.ibm.com/support/knowledgecenter/SSEPEK_11.0.0/sqlref/src/tpc/db2z_likepredicate.html) | _, % | none | yes |
| [MySQL](http://dev.mysql.com/doc/refman/5.7/en/string-comparison-functions.html) | _, % | none | no |
| [PostreSQL](https://www.postgresql.org/docs/9.0/static/functions-matching.html) | _, % | \ | yes |
| [Hive](https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF) | _, % | none | yes |
| Current Spark | _, % | \ | yes |

[1] Default escape character: most systems do not have a default escape character, instead the user can specify one by calling a like expression with an escape argument [A] LIKE [B] ESCAPE [C]. This syntax is currently not supported by Spark, however I would volunteer to implement this feature in a separate ticket.

The specifications are often quite terse and certain scenarios are undocumented, so here is a list of scenarios that I am uncertain about and would appreciate any input. Specifically I am looking for feedback on whether or not Spark's current behavior should be changed.
1. [x] Ending a pattern with the escape sequence, e.g. `like 'a\'`.
   PostreSQL gives an error: 'LIKE pattern must not end with escape character', which I personally find logical. Currently, Spark allows "non-terminated" escapes and simply ignores them as part of the pattern.
   According to [DB2's documentation](http://www.ibm.com/support/knowledgecenter/SSEPGG_9.7.0/com.ibm.db2.luw.messages.sql.doc/doc/msql00130n.html), ending a pattern in an escape character is invalid.
   _Proposed new behaviour in Spark: throw AnalysisException_
2. [x] Empty input, e.g. `'' like ''`
   Postgres and DB2 will match empty input only if the pattern is empty as well, any other combination of empty input will not match. Spark currently follows this rule.
3. [x] Escape before a non-special character, e.g. `'a' like '\a'`.
   Escaping a non-wildcard character is not really documented but PostgreSQL just treats it verbatim, which I also find the least surprising behavior. Spark does the same.
   According to [DB2's documentation](http://www.ibm.com/support/knowledgecenter/SSEPGG_9.7.0/com.ibm.db2.luw.messages.sql.doc/doc/msql00130n.html), it is invalid to follow an escape character with anything other than an escape character, an underscore or a percent sign.
   _Proposed new behaviour in Spark: throw AnalysisException_

The current specification is also described in the operator's source code in this patch.

Extra case in regex unit tests.

Author: Jakob Odersky <jakob@odersky.com>

This patch had conflicts when merged, resolved by
Committer: Reynold Xin <rxin@databricks.com>

Closes #15398 from jodersky/SPARK-17647.

(cherry picked from commit e5fee3e4f853f906f0b476bb04ee35a15f1ae650)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../expressions/regexpExpressions.scala       |  28 ++-
 .../spark/sql/catalyst/util/StringUtils.scala |  50 +++---
 .../expressions/RegexpExpressionsSuite.scala  | 161 +++++++++++-------
 .../sql/catalyst/util/StringUtilsSuite.scala  |   4 +-
 4 files changed, 154 insertions(+), 89 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index 4896a6225aa80..ad121773d1ee4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -68,9 +68,31 @@ trait StringRegexExpression extends ImplicitCastInputTypes {
  * Simple RegEx pattern matching function
  */
 @ExpressionDescription(
-  usage = "str _FUNC_ pattern - Returns true if `str` matches `pattern`, or false otherwise.")
-case class Like(left: Expression, right: Expression)
-  extends BinaryExpression with StringRegexExpression {
+  usage = "str _FUNC_ pattern - Returns true if str matches pattern, " +
+    "null if any arguments are null, false otherwise.",
+  extended = """
+    Arguments:
+      str - a string expression
+      pattern - a string expression. The pattern is a string which is matched literally, with
+        exception to the following special symbols:
+
+          _ matches any one character in the input (similar to . in posix regular expressions)
+
+          % matches zero ore more characters in the input (similar to .* in posix regular
+          expressions)
+
+        The escape character is '\'. If an escape character precedes a special symbol or another
+        escape character, the following character is matched literally. It is invalid to escape
+        any other character.
+
+    Examples:
+      > SELECT '%SystemDrive%\Users\John' _FUNC_ '\%SystemDrive\%\\Users%'
+      true
+
+    See also:
+      Use RLIKE to match with standard regular expressions.
+""")
+case class Like(left: Expression, right: Expression) extends StringRegexExpression {
 
   override def escape(v: String): String = StringUtils.escapeLikeRegex(v)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
index cde8bd5b9614c..ca22ea24207e1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
@@ -19,32 +19,44 @@ package org.apache.spark.sql.catalyst.util
 
 import java.util.regex.{Pattern, PatternSyntaxException}
 
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.unsafe.types.UTF8String
 
 object StringUtils {
 
-  // replace the _ with .{1} exactly match 1 time of any character
-  // replace the % with .*, match 0 or more times with any character
-  def escapeLikeRegex(v: String): String = {
-    if (!v.isEmpty) {
-      "(?s)" + (' ' +: v.init).zip(v).flatMap {
-        case (prev, '\\') => ""
-        case ('\\', c) =>
-          c match {
-            case '_' => "_"
-            case '%' => "%"
-            case _ => Pattern.quote("\\" + c)
-          }
-        case (prev, c) =>
+  /**
+   * Validate and convert SQL 'like' pattern to a Java regular expression.
+   *
+   * Underscores (_) are converted to '.' and percent signs (%) are converted to '.*', other
+   * characters are quoted literally. Escaping is done according to the rules specified in
+   * [[org.apache.spark.sql.catalyst.expressions.Like]] usage documentation. An invalid pattern will
+   * throw an [[AnalysisException]].
+   *
+   * @param pattern the SQL pattern to convert
+   * @return the equivalent Java regular expression of the pattern
+   */
+  def escapeLikeRegex(pattern: String): String = {
+    val in = pattern.toIterator
+    val out = new StringBuilder()
+
+    def fail(message: String) = throw new AnalysisException(
+      s"the pattern '$pattern' is invalid, $message")
+
+    while (in.hasNext) {
+      in.next match {
+        case '\\' if in.hasNext =>
+          val c = in.next
           c match {
-            case '_' => "."
-            case '%' => ".*"
-            case _ => Pattern.quote(Character.toString(c))
+            case '_' | '%' | '\\' => out ++= Pattern.quote(Character.toString(c))
+            case _ => fail(s"the escape character is not allowed to precede '$c'")
           }
-      }.mkString
-    } else {
-      v
+        case '\\' => fail("it is not allowed to end with the escape character")
+        case '_' => out ++= "."
+        case '%' => out ++= ".*"
+        case c => out ++= Pattern.quote(Character.toString(c))
+      }
     }
+    "(?s)" + out.result() // (?s) enables dotall mode, causing "." to match new lines
   }
 
   private[this] val trueStrings = Set("t", "true", "y", "yes", "1").map(UTF8String.fromString)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
index 5299549e7b4da..1ce150e091981 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
@@ -18,16 +18,38 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.dsl.expressions._
-import org.apache.spark.sql.types.StringType
+import org.apache.spark.sql.types.{IntegerType, StringType}
 
 /**
  * Unit tests for regular expression (regexp) related SQL expressions.
  */
 class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
-  test("LIKE literal Regular Expression") {
-    checkEvaluation(Literal.create(null, StringType).like("a"), null)
+  /**
+   * Check if a given expression evaluates to an expected output, in case the input is
+   * a literal and in case the input is in the form of a row.
+   * @tparam A type of input
+   * @param mkExpr the expression to test for a given input
+   * @param input value that will be used to create the expression, as literal and in the form
+   *        of a row
+   * @param expected the expected output of the expression
+   * @param inputToExpression an implicit conversion from the input type to its corresponding
+   *        sql expression
+   */
+  def checkLiteralRow[A](mkExpr: Expression => Expression, input: A, expected: Any)
+    (implicit inputToExpression: A => Expression): Unit = {
+    checkEvaluation(mkExpr(input), expected) // check literal input
+
+    val regex = 'a.string.at(0)
+    checkEvaluation(mkExpr(regex), expected, create_row(input)) // check row input
+  }
+
+  test("LIKE Pattern") {
+
+    // null handling
+    checkLiteralRow(Literal.create(null, StringType).like(_), "a", null)
     checkEvaluation(Literal.create("a", StringType).like(Literal.create(null, StringType)), null)
     checkEvaluation(Literal.create(null, StringType).like(Literal.create(null, StringType)), null)
     checkEvaluation(
@@ -39,45 +61,64 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(
       Literal.create(null, StringType).like(NonFoldableLiteral.create(null, StringType)), null)
 
-    checkEvaluation("abdef" like "abdef", true)
-    checkEvaluation("a_%b" like "a\\__b", true)
-    checkEvaluation("addb" like "a_%b", true)
-    checkEvaluation("addb" like "a\\__b", false)
-    checkEvaluation("addb" like "a%\\%b", false)
-    checkEvaluation("a_%b" like "a%\\%b", true)
-    checkEvaluation("addb" like "a%", true)
-    checkEvaluation("addb" like "**", false)
-    checkEvaluation("abc" like "a%", true)
-    checkEvaluation("abc"  like "b%", false)
-    checkEvaluation("abc"  like "bc%", false)
-    checkEvaluation("a\nb" like "a_b", true)
-    checkEvaluation("ab" like "a%b", true)
-    checkEvaluation("a\nb" like "a%b", true)
-  }
+    // simple patterns
+    checkLiteralRow("abdef" like _, "abdef", true)
+    checkLiteralRow("a_%b" like _, "a\\__b", true)
+    checkLiteralRow("addb" like _, "a_%b", true)
+    checkLiteralRow("addb" like _, "a\\__b", false)
+    checkLiteralRow("addb" like _, "a%\\%b", false)
+    checkLiteralRow("a_%b" like _, "a%\\%b", true)
+    checkLiteralRow("addb" like _, "a%", true)
+    checkLiteralRow("addb" like _, "**", false)
+    checkLiteralRow("abc" like _, "a%", true)
+    checkLiteralRow("abc"  like _, "b%", false)
+    checkLiteralRow("abc"  like _, "bc%", false)
+    checkLiteralRow("a\nb" like _, "a_b", true)
+    checkLiteralRow("ab" like _, "a%b", true)
+    checkLiteralRow("a\nb" like _, "a%b", true)
+
+    // empty input
+    checkLiteralRow("" like _, "", true)
+    checkLiteralRow("a" like _, "", false)
+    checkLiteralRow("" like _, "a", false)
+
+    // SI-17647 double-escaping backslash
+    checkLiteralRow("""\\\\""" like _, """%\\%""", true)
+    checkLiteralRow("""%%""" like _, """%%""", true)
+    checkLiteralRow("""\__""" like _, """\\\__""", true)
+    checkLiteralRow("""\\\__""" like _, """%\\%\%""", false)
+    checkLiteralRow("""_\\\%""" like _, """%\\""", false)
+
+    // unicode
+    // scalastyle:off nonascii
+    checkLiteralRow("a\u20ACa" like _, "_\u20AC_", true)
+    checkLiteralRow("a€a" like _, "_€_", true)
+    checkLiteralRow("a€a" like _, "_\u20AC_", true)
+    checkLiteralRow("a\u20ACa" like _, "_€_", true)
+    // scalastyle:on nonascii
+
+    // invalid escaping
+    val invalidEscape = intercept[AnalysisException] {
+      evaluate("""a""" like """\a""")
+    }
+    assert(invalidEscape.getMessage.contains("pattern"))
+
+    val endEscape = intercept[AnalysisException] {
+      evaluate("""a""" like """a\""")
+    }
+    assert(endEscape.getMessage.contains("pattern"))
+
+    // case
+    checkLiteralRow("A" like _, "a%", false)
+    checkLiteralRow("a" like _, "A%", false)
+    checkLiteralRow("AaA" like _, "_a_", true)
 
-  test("LIKE Non-literal Regular Expression") {
-    val regEx = 'a.string.at(0)
-    checkEvaluation("abcd" like regEx, null, create_row(null))
-    checkEvaluation("abdef" like regEx, true, create_row("abdef"))
-    checkEvaluation("a_%b" like regEx, true, create_row("a\\__b"))
-    checkEvaluation("addb" like regEx, true, create_row("a_%b"))
-    checkEvaluation("addb" like regEx, false, create_row("a\\__b"))
-    checkEvaluation("addb" like regEx, false, create_row("a%\\%b"))
-    checkEvaluation("a_%b" like regEx, true, create_row("a%\\%b"))
-    checkEvaluation("addb" like regEx, true, create_row("a%"))
-    checkEvaluation("addb" like regEx, false, create_row("**"))
-    checkEvaluation("abc" like regEx, true, create_row("a%"))
-    checkEvaluation("abc" like regEx, false, create_row("b%"))
-    checkEvaluation("abc" like regEx, false, create_row("bc%"))
-    checkEvaluation("a\nb" like regEx, true, create_row("a_b"))
-    checkEvaluation("ab" like regEx, true, create_row("a%b"))
-    checkEvaluation("a\nb" like regEx, true, create_row("a%b"))
-
-    checkEvaluation(Literal.create(null, StringType) like regEx, null, create_row("bc%"))
+    // example
+    checkLiteralRow("""%SystemDrive%\Users\John""" like _, """\%SystemDrive\%\\Users%""", true)
   }
 
-  test("RLIKE literal Regular Expression") {
-    checkEvaluation(Literal.create(null, StringType) rlike "abdef", null)
+  test("RLIKE Regular Expression") {
+    checkLiteralRow(Literal.create(null, StringType) rlike _, "abdef", null)
     checkEvaluation("abdef" rlike Literal.create(null, StringType), null)
     checkEvaluation(Literal.create(null, StringType) rlike Literal.create(null, StringType), null)
     checkEvaluation("abdef" rlike NonFoldableLiteral.create("abdef", StringType), true)
@@ -87,42 +128,32 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(
       Literal.create(null, StringType) rlike NonFoldableLiteral.create(null, StringType), null)
 
-    checkEvaluation("abdef" rlike "abdef", true)
-    checkEvaluation("abbbbc" rlike "a.*c", true)
+    checkLiteralRow("abdef" rlike _, "abdef", true)
+    checkLiteralRow("abbbbc" rlike _, "a.*c", true)
 
-    checkEvaluation("fofo" rlike "^fo", true)
-    checkEvaluation("fo\no" rlike "^fo\no$", true)
-    checkEvaluation("Bn" rlike "^Ba*n", true)
-    checkEvaluation("afofo" rlike "fo", true)
-    checkEvaluation("afofo" rlike "^fo", false)
-    checkEvaluation("Baan" rlike "^Ba?n", false)
-    checkEvaluation("axe" rlike "pi|apa", false)
-    checkEvaluation("pip" rlike "^(pi)*$", false)
+    checkLiteralRow("fofo" rlike _, "^fo", true)
+    checkLiteralRow("fo\no" rlike _, "^fo\no$", true)
+    checkLiteralRow("Bn" rlike _, "^Ba*n", true)
+    checkLiteralRow("afofo" rlike _, "fo", true)
+    checkLiteralRow("afofo" rlike _, "^fo", false)
+    checkLiteralRow("Baan" rlike _, "^Ba?n", false)
+    checkLiteralRow("axe" rlike _, "pi|apa", false)
+    checkLiteralRow("pip" rlike _, "^(pi)*$", false)
 
-    checkEvaluation("abc"  rlike "^ab", true)
-    checkEvaluation("abc"  rlike "^bc", false)
-    checkEvaluation("abc"  rlike "^ab", true)
-    checkEvaluation("abc"  rlike "^bc", false)
+    checkLiteralRow("abc"  rlike _, "^ab", true)
+    checkLiteralRow("abc"  rlike _, "^bc", false)
+    checkLiteralRow("abc"  rlike _, "^ab", true)
+    checkLiteralRow("abc"  rlike _, "^bc", false)
 
     intercept[java.util.regex.PatternSyntaxException] {
       evaluate("abbbbc" rlike "**")
     }
-  }
-
-  test("RLIKE Non-literal Regular Expression") {
-    val regEx = 'a.string.at(0)
-    checkEvaluation("abdef" rlike regEx, true, create_row("abdef"))
-    checkEvaluation("abbbbc" rlike regEx, true, create_row("a.*c"))
-    checkEvaluation("fofo" rlike regEx, true, create_row("^fo"))
-    checkEvaluation("fo\no" rlike regEx, true, create_row("^fo\no$"))
-    checkEvaluation("Bn" rlike regEx, true, create_row("^Ba*n"))
-
     intercept[java.util.regex.PatternSyntaxException] {
-      evaluate("abbbbc" rlike regEx, create_row("**"))
+      val regex = 'a.string.at(0)
+      evaluate("abbbbc" rlike regex, create_row("**"))
     }
   }
 
-
   test("RegexReplace") {
     val row1 = create_row("100-200", "(\\d+)", "num")
     val row2 = create_row("100-200", "(\\d+)", "###")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala
index 2ffc18a8d14fb..78fee5135c3ae 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala
@@ -24,9 +24,9 @@ class StringUtilsSuite extends SparkFunSuite {
 
   test("escapeLikeRegex") {
     assert(escapeLikeRegex("abdef") === "(?s)\\Qa\\E\\Qb\\E\\Qd\\E\\Qe\\E\\Qf\\E")
-    assert(escapeLikeRegex("a\\__b") === "(?s)\\Qa\\E_.\\Qb\\E")
+    assert(escapeLikeRegex("a\\__b") === "(?s)\\Qa\\E\\Q_\\E.\\Qb\\E")
     assert(escapeLikeRegex("a_%b") === "(?s)\\Qa\\E..*\\Qb\\E")
-    assert(escapeLikeRegex("a%\\%b") === "(?s)\\Qa\\E.*%\\Qb\\E")
+    assert(escapeLikeRegex("a%\\%b") === "(?s)\\Qa\\E.*\\Q%\\E\\Qb\\E")
     assert(escapeLikeRegex("a%") === "(?s)\\Qa\\E.*")
     assert(escapeLikeRegex("**") === "(?s)\\Q*\\E\\Q*\\E")
     assert(escapeLikeRegex("a_b") === "(?s)\\Qa\\E.\\Qb\\E")

From 622d7a8bf6be22e30db7ff38604ed86b44fcc87e Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Mon, 17 Apr 2017 12:57:58 -0700
Subject: [PATCH 0646/1204] [HOTFIX] Fix compilation.

---
 .../spark/sql/catalyst/expressions/regexpExpressions.scala     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index ad121773d1ee4..0325d0e8370f4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -92,7 +92,8 @@ trait StringRegexExpression extends ImplicitCastInputTypes {
     See also:
       Use RLIKE to match with standard regular expressions.
 """)
-case class Like(left: Expression, right: Expression) extends StringRegexExpression {
+case class Like(left: Expression, right: Expression)
+  extends BinaryExpression with StringRegexExpression {
 
   override def escape(v: String): String = StringUtils.escapeLikeRegex(v)
 

From 3808b472813a2cdf560107787f6971e5202044a8 Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Mon, 17 Apr 2017 17:57:20 -0700
Subject: [PATCH 0647/1204] [SPARK-20349][SQL][REVERT-BRANCH2.1] ListFunctions
 returns duplicate functions after using persistent functions

Revert the changes of https://github.com/apache/spark/pull/17646 made in Branch 2.1, because it breaks the build. It needs the parser interface, but SessionCatalog in branch 2.1 does not have it.

### What changes were proposed in this pull request?

The session catalog caches some persistent functions in the `FunctionRegistry`, so there can be duplicates. Our Catalog API `listFunctions` does not handle it.

It would be better if `SessionCatalog` API can de-duplciate the records, instead of doing it by each API caller. In `FunctionRegistry`, our functions are identified by the unquoted string. Thus, this PR is try to parse it using our parser interface and then de-duplicate the names.

### How was this patch tested?
Added test cases.

Author: Xiao Li <gatorsmile@gmail.com>

Closes #17661 from gatorsmile/compilationFix17646.
---
 .../sql/catalyst/catalog/SessionCatalog.scala | 21 +++++--------------
 .../sql/execution/command/functions.scala     |  4 +++-
 .../sql/hive/execution/HiveUDFSuite.scala     | 17 ---------------
 3 files changed, 8 insertions(+), 34 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 6f302d3d02509..a5cf7196b21e3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql.catalyst.catalog
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.mutable
-import scala.util.{Failure, Success, Try}
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
@@ -1099,25 +1098,15 @@ class SessionCatalog(
   def listFunctions(db: String, pattern: String): Seq[(FunctionIdentifier, String)] = {
     val dbName = formatDatabaseName(db)
     requireDbExists(dbName)
-    val dbFunctions = externalCatalog.listFunctions(dbName, pattern).map { f =>
-      FunctionIdentifier(f, Some(dbName)) }
-    val loadedFunctions =
-      StringUtils.filterPattern(functionRegistry.listFunction(), pattern).map { f =>
-        // In functionRegistry, function names are stored as an unquoted format.
-        Try(parser.parseFunctionIdentifier(f)) match {
-          case Success(e) => e
-          case Failure(_) =>
-            // The names of some built-in functions are not parsable by our parser, e.g., %
-            FunctionIdentifier(f)
-        }
-      }
+    val dbFunctions = externalCatalog.listFunctions(dbName, pattern)
+      .map { f => FunctionIdentifier(f, Some(dbName)) }
+    val loadedFunctions = StringUtils.filterPattern(functionRegistry.listFunction(), pattern)
+      .map { f => FunctionIdentifier(f) }
     val functions = dbFunctions ++ loadedFunctions
-    // The session catalog caches some persistent functions in the FunctionRegistry
-    // so there can be duplicates.
     functions.map {
       case f if FunctionRegistry.functionSet.contains(f.funcName) => (f, "SYSTEM")
       case f => (f, "USER")
-    }.distinct
+    }
   }
 
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
index 75272d295b164..ea5398761c46d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
@@ -208,6 +208,8 @@ case class ShowFunctionsCommand(
           case (f, "USER") if showUserFunctions => f.unquotedString
           case (f, "SYSTEM") if showSystemFunctions => f.unquotedString
         }
-    functionNames.sorted.map(Row(_))
+    // The session catalog caches some persistent functions in the FunctionRegistry
+    // so there can be duplicates.
+    functionNames.distinct.sorted.map(Row(_))
   }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
index 9368d0ba8ef6e..78c80dacb9faf 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
@@ -539,23 +539,6 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
       checkAnswer(testData.selectExpr("statelessUDF() as s").agg(max($"s")), Row(1))
     }
   }
-
-  test("Show persistent functions") {
-    val testData = spark.sparkContext.parallelize(StringCaseClass("") :: Nil).toDF()
-    withTempView("inputTable") {
-      testData.createOrReplaceTempView("inputTable")
-      withUserDefinedFunction("testUDFToListInt" -> false) {
-        val numFunc = spark.catalog.listFunctions().count()
-        sql(s"CREATE FUNCTION testUDFToListInt AS '${classOf[UDFToListInt].getName}'")
-        assert(spark.catalog.listFunctions().count() == numFunc + 1)
-        checkAnswer(
-          sql("SELECT testUDFToListInt(s) FROM inputTable"),
-          Seq(Row(Seq(1, 2, 3))))
-        assert(sql("show functions").count() == numFunc + 1)
-        assert(spark.catalog.listFunctions().count() == numFunc + 1)
-      }
-    }
-  }
 }
 
 class TestPair(x: Int, y: Int) extends Writable with Serializable {

From a4c1ebc1ddf87eb557989c0d9bbcfe73e83ec01e Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Mon, 17 Apr 2017 23:55:40 -0700
Subject: [PATCH 0648/1204] [SPARK-17647][SQL][FOLLOWUP][MINOR] fix typo

## What changes were proposed in this pull request?

fix typo

## How was this patch tested?

manual

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #17663 from felixcheung/likedoctypo.

(cherry picked from commit b0a1e93e93167b53058525a20a8b06f7df5f09a2)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 .../spark/sql/catalyst/expressions/regexpExpressions.scala      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index 0325d0e8370f4..a7fccfc7216d6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -78,7 +78,7 @@ trait StringRegexExpression extends ImplicitCastInputTypes {
 
           _ matches any one character in the input (similar to . in posix regular expressions)
 
-          % matches zero ore more characters in the input (similar to .* in posix regular
+          % matches zero or more characters in the input (similar to .* in posix regular
           expressions)
 
         The escape character is '\'. If an escape character precedes a special symbol or another

From ecf5605a104be67b29d29c00dc98ddab7975c9c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=83=AD=E5=B0=8F=E9=BE=99=2010207633?=
 <guo.xiaolong1@zte.com.cn>
Date: Tue, 18 Apr 2017 10:02:21 -0700
Subject: [PATCH 0649/1204] [SPARK-20354][CORE][REST-API] When I request access
 to the 'http: //ip:port/api/v1/applications' link, return 'sparkUser' is
 empty in REST API.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

When I request access to the 'http: //ip:port/api/v1/applications' link, get the json. I need the 'sparkUser' field specific value, because my Spark big data management platform needs to filter through this field which user submits the application to facilitate my administration and query, but the current return of the json string is empty, causing me this Function can not be achieved, that is, I do not know who the specific application is submitted by this REST Api.

**current return json:**
[ {
  "id" : "app-20170417152053-0000",
  "name" : "KafkaWordCount",
  "attempts" : [ {
    "startTime" : "2017-04-17T07:20:51.395GMT",
    "endTime" : "1969-12-31T23:59:59.999GMT",
    "lastUpdated" : "2017-04-17T07:20:51.395GMT",
    "duration" : 0,
    **"sparkUser" : "",**
    "completed" : false,
    "endTimeEpoch" : -1,
    "startTimeEpoch" : 1492413651395,
    "lastUpdatedEpoch" : 1492413651395
  } ]
} ]

**When I fix this question, return json:**
[ {
  "id" : "app-20170417154201-0000",
  "name" : "KafkaWordCount",
  "attempts" : [ {
    "startTime" : "2017-04-17T07:41:57.335GMT",
    "endTime" : "1969-12-31T23:59:59.999GMT",
    "lastUpdated" : "2017-04-17T07:41:57.335GMT",
    "duration" : 0,
    **"sparkUser" : "mr",**
    "completed" : false,
    "startTimeEpoch" : 1492414917335,
    "endTimeEpoch" : -1,
    "lastUpdatedEpoch" : 1492414917335
  } ]
} ]

## How was this patch tested?

manual tests

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: 郭小龙 10207633 <guo.xiaolong1@zte.com.cn>
Author: guoxiaolong <guo.xiaolong1@zte.com.cn>
Author: guoxiaolongzte <guo.xiaolong1@zte.com.cn>

Closes #17656 from guoxiaolongzte/SPARK-20354.

(cherry picked from commit 1f81dda37cfc2049fabd6abd93ef3720d0aa03ea)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 core/src/main/scala/org/apache/spark/ui/SparkUI.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
index 7d31ac54a7177..bf4cf79e9faa3 100644
--- a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
@@ -117,7 +117,7 @@ private[spark] class SparkUI private (
         endTime = new Date(-1),
         duration = 0,
         lastUpdated = new Date(startTime),
-        sparkUser = "",
+        sparkUser = getSparkUser,
         completed = false
       ))
     ))

From 7dbc0a9101954f1d514b97143b24fbe1e439181b Mon Sep 17 00:00:00 2001
From: Kyle Kelley <rgbkrk@gmail.com>
Date: Tue, 18 Apr 2017 12:35:27 -0700
Subject: [PATCH 0650/1204] [SPARK-20360][PYTHON] reprs for interpreters

## What changes were proposed in this pull request?

Establishes a very minimal `_repr_html_` for PySpark's `SparkContext`.

## How was this patch tested?

nteract:

![screen shot 2017-04-17 at 3 41 29 pm](https://cloud.githubusercontent.com/assets/836375/25107701/d57090ba-2385-11e7-8147-74bc2c50a41b.png)

Jupyter:

![screen shot 2017-04-17 at 3 53 19 pm](https://cloud.githubusercontent.com/assets/836375/25107725/05bf1fe8-2386-11e7-93e1-07a20c917dde.png)

Hydrogen:

![screen shot 2017-04-17 at 3 49 55 pm](https://cloud.githubusercontent.com/assets/836375/25107664/a75e1ddc-2385-11e7-8477-258661833007.png)

Author: Kyle Kelley <rgbkrk@gmail.com>

Closes #17662 from rgbkrk/repr.

(cherry picked from commit f654b39a63d4f9b118733733c7ed2a1b58649e3d)
Signed-off-by: Holden Karau <holden@us.ibm.com>
---
 python/pyspark/context.py     | 26 ++++++++++++++++++++++++++
 python/pyspark/sql/session.py | 11 +++++++++++
 2 files changed, 37 insertions(+)

diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 2961cda553d6a..3be07325f4162 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -240,6 +240,32 @@ def signal_handler(signal, frame):
         if isinstance(threading.current_thread(), threading._MainThread):
             signal.signal(signal.SIGINT, signal_handler)
 
+    def __repr__(self):
+        return "<SparkContext master={master} appName={appName}>".format(
+            master=self.master,
+            appName=self.appName,
+        )
+
+    def _repr_html_(self):
+        return """
+        <div>
+            <p><b>SparkContext</b></p>
+
+            <p><a href="{sc.uiWebUrl}">Spark UI</a></p>
+
+            <dl>
+              <dt>Version</dt>
+                <dd><code>v{sc.version}</code></dd>
+              <dt>Master</dt>
+                <dd><code>{sc.master}</code></dd>
+              <dt>AppName</dt>
+                <dd><code>{sc.appName}</code></dd>
+            </dl>
+        </div>
+        """.format(
+            sc=self
+        )
+
     def _initialize_context(self, jconf):
         """
         Initialize SparkContext in function to allow subclass specific initialization
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index 9f4772eec9f2a..c1bf2bd76fb7c 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -221,6 +221,17 @@ def __init__(self, sparkContext, jsparkSession=None):
                 or SparkSession._instantiatedSession._sc._jsc is None:
             SparkSession._instantiatedSession = self
 
+    def _repr_html_(self):
+        return """
+            <div>
+                <p><b>SparkSession - {catalogImplementation}</b></p>
+                {sc_HTML}
+            </div>
+        """.format(
+            catalogImplementation=self.conf.get("spark.sql.catalogImplementation"),
+            sc_HTML=self.sparkContext._repr_html_()
+        )
+
     @since(2.0)
     def newSession(self):
         """

From 6a25d391f81eed5f23e105c2db427ae8bb032752 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Tue, 18 Apr 2017 16:10:40 -0700
Subject: [PATCH 0651/1204] [SPARK-20377][SS] Fix JavaStructuredSessionization
 example

## What changes were proposed in this pull request?

Extra accessors in java bean class causes incorrect encoder generation, which corrupted the state when using timeouts.

## How was this patch tested?
manually ran the example

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #17676 from tdas/SPARK-20377.

(cherry picked from commit 74aa0df8f7f132b62754e5159262e4a5b9b641ab)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../sql/streaming/JavaStructuredSessionization.java      | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredSessionization.java b/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredSessionization.java
index da3a5dfe8628b..d3c8516882fa6 100644
--- a/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredSessionization.java
+++ b/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredSessionization.java
@@ -76,8 +76,6 @@ public Iterator<Event> call(LineWithTimestamp lineWithTimestamp) throws Exceptio
           for (String word : lineWithTimestamp.getLine().split(" ")) {
             eventList.add(new Event(word, lineWithTimestamp.getTimestamp()));
           }
-          System.out.println(
-              "Number of events from " + lineWithTimestamp.getLine() + " = " + eventList.size());
           return eventList.iterator();
         }
       };
@@ -100,7 +98,7 @@ public Iterator<Event> call(LineWithTimestamp lineWithTimestamp) throws Exceptio
           // If timed out, then remove session and send final update
           if (state.hasTimedOut()) {
             SessionUpdate finalUpdate = new SessionUpdate(
-                sessionId, state.get().getDurationMs(), state.get().getNumEvents(), true);
+                sessionId, state.get().calculateDuration(), state.get().getNumEvents(), true);
             state.remove();
             return finalUpdate;
 
@@ -133,7 +131,7 @@ public Iterator<Event> call(LineWithTimestamp lineWithTimestamp) throws Exceptio
             // Set timeout such that the session will be expired if no data received for 10 seconds
             state.setTimeoutDuration("10 seconds");
             return new SessionUpdate(
-                sessionId, state.get().getDurationMs(), state.get().getNumEvents(), false);
+                sessionId, state.get().calculateDuration(), state.get().getNumEvents(), false);
           }
         }
       };
@@ -215,7 +213,8 @@ public void setStartTimestampMs(long startTimestampMs) {
     public long getEndTimestampMs() { return endTimestampMs; }
     public void setEndTimestampMs(long endTimestampMs) { this.endTimestampMs = endTimestampMs; }
 
-    public long getDurationMs() { return endTimestampMs - startTimestampMs; }
+    public long calculateDuration() { return endTimestampMs - startTimestampMs; }
+
     @Override public String toString() {
       return "SessionInfo(numEvents = " + numEvents +
           ", timestamps = " + startTimestampMs + " to " + endTimestampMs + ")";

From a33d448058ae6608d1031c4c34334778b3c39675 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Wed, 19 Apr 2017 10:58:05 +0800
Subject: [PATCH 0652/1204] [SPARK-20254][SQL] Remove unnecessary data
 conversion for Dataset with primitive array

## What changes were proposed in this pull request?

This PR elminates unnecessary data conversion, which is introduced by SPARK-19716, for Dataset with primitve array in the generated Java code.
When we run the following example program, now we get the Java code "Without this PR". In this code, lines 56-82 are unnecessary since the primitive array in ArrayData can be converted into Java primitive array by using ``toDoubleArray()`` method. ``GenericArrayData`` is not required.

```java
val ds = sparkContext.parallelize(Seq(Array(1.1, 2.2)), 1).toDS.cache
ds.count
ds.map(e => e).show
```

Without this PR
```
== Parsed Logical Plan ==
'SerializeFromObject [staticinvoke(class org.apache.spark.sql.catalyst.expressions.UnsafeArrayData, ArrayType(DoubleType,false), fromPrimitiveArray, input[0, [D, true], true) AS value#25]
+- 'MapElements <function1>, class [D, [StructField(value,ArrayType(DoubleType,false),true)], obj#24: [D
   +- 'DeserializeToObject unresolveddeserializer(unresolvedmapobjects(<function1>, getcolumnbyordinal(0, ArrayType(DoubleType,false)), None).toDoubleArray), obj#23: [D
      +- SerializeFromObject [staticinvoke(class org.apache.spark.sql.catalyst.expressions.UnsafeArrayData, ArrayType(DoubleType,false), fromPrimitiveArray, input[0, [D, true], true) AS value#2]
         +- ExternalRDD [obj#1]

== Analyzed Logical Plan ==
value: array<double>
SerializeFromObject [staticinvoke(class org.apache.spark.sql.catalyst.expressions.UnsafeArrayData, ArrayType(DoubleType,false), fromPrimitiveArray, input[0, [D, true], true) AS value#25]
+- MapElements <function1>, class [D, [StructField(value,ArrayType(DoubleType,false),true)], obj#24: [D
   +- DeserializeToObject mapobjects(MapObjects_loopValue5, MapObjects_loopIsNull5, DoubleType, assertnotnull(lambdavariable(MapObjects_loopValue5, MapObjects_loopIsNull5, DoubleType, true), - array element class: "scala.Double", - root class: "scala.Array"), value#2, None, MapObjects_builderValue5).toDoubleArray, obj#23: [D
      +- SerializeFromObject [staticinvoke(class org.apache.spark.sql.catalyst.expressions.UnsafeArrayData, ArrayType(DoubleType,false), fromPrimitiveArray, input[0, [D, true], true) AS value#2]
         +- ExternalRDD [obj#1]

== Optimized Logical Plan ==
SerializeFromObject [staticinvoke(class org.apache.spark.sql.catalyst.expressions.UnsafeArrayData, ArrayType(DoubleType,false), fromPrimitiveArray, input[0, [D, true], true) AS value#25]
+- MapElements <function1>, class [D, [StructField(value,ArrayType(DoubleType,false),true)], obj#24: [D
   +- DeserializeToObject mapobjects(MapObjects_loopValue5, MapObjects_loopIsNull5, DoubleType, assertnotnull(lambdavariable(MapObjects_loopValue5, MapObjects_loopIsNull5, DoubleType, true), - array element class: "scala.Double", - root class: "scala.Array"), value#2, None, MapObjects_builderValue5).toDoubleArray, obj#23: [D
      +- InMemoryRelation [value#2], true, 10000, StorageLevel(disk, memory, deserialized, 1 replicas)
            +- *SerializeFromObject [staticinvoke(class org.apache.spark.sql.catalyst.expressions.UnsafeArrayData, ArrayType(DoubleType,false), fromPrimitiveArray, input[0, [D, true], true) AS value#2]
               +- Scan ExternalRDDScan[obj#1]

== Physical Plan ==
*SerializeFromObject [staticinvoke(class org.apache.spark.sql.catalyst.expressions.UnsafeArrayData, ArrayType(DoubleType,false), fromPrimitiveArray, input[0, [D, true], true) AS value#25]
+- *MapElements <function1>, obj#24: [D
   +- *DeserializeToObject mapobjects(MapObjects_loopValue5, MapObjects_loopIsNull5, DoubleType, assertnotnull(lambdavariable(MapObjects_loopValue5, MapObjects_loopIsNull5, DoubleType, true), - array element class: "scala.Double", - root class: "scala.Array"), value#2, None, MapObjects_builderValue5).toDoubleArray, obj#23: [D
      +- InMemoryTableScan [value#2]
            +- InMemoryRelation [value#2], true, 10000, StorageLevel(disk, memory, deserialized, 1 replicas)
                  +- *SerializeFromObject [staticinvoke(class org.apache.spark.sql.catalyst.expressions.UnsafeArrayData, ArrayType(DoubleType,false), fromPrimitiveArray, input[0, [D, true], true) AS value#2]
                     +- Scan ExternalRDDScan[obj#1]
```

```java
/* 050 */   protected void processNext() throws java.io.IOException {
/* 051 */     while (inputadapter_input.hasNext() && !stopEarly()) {
/* 052 */       InternalRow inputadapter_row = (InternalRow) inputadapter_input.next();
/* 053 */       boolean inputadapter_isNull = inputadapter_row.isNullAt(0);
/* 054 */       ArrayData inputadapter_value = inputadapter_isNull ? null : (inputadapter_row.getArray(0));
/* 055 */
/* 056 */       ArrayData deserializetoobject_value1 = null;
/* 057 */
/* 058 */       if (!inputadapter_isNull) {
/* 059 */         int deserializetoobject_dataLength = inputadapter_value.numElements();
/* 060 */
/* 061 */         Double[] deserializetoobject_convertedArray = null;
/* 062 */         deserializetoobject_convertedArray = new Double[deserializetoobject_dataLength];
/* 063 */
/* 064 */         int deserializetoobject_loopIndex = 0;
/* 065 */         while (deserializetoobject_loopIndex < deserializetoobject_dataLength) {
/* 066 */           MapObjects_loopValue2 = (double) (inputadapter_value.getDouble(deserializetoobject_loopIndex));
/* 067 */           MapObjects_loopIsNull2 = inputadapter_value.isNullAt(deserializetoobject_loopIndex);
/* 068 */
/* 069 */           if (MapObjects_loopIsNull2) {
/* 070 */             throw new RuntimeException(((java.lang.String) references[0]));
/* 071 */           }
/* 072 */           if (false) {
/* 073 */             deserializetoobject_convertedArray[deserializetoobject_loopIndex] = null;
/* 074 */           } else {
/* 075 */             deserializetoobject_convertedArray[deserializetoobject_loopIndex] = MapObjects_loopValue2;
/* 076 */           }
/* 077 */
/* 078 */           deserializetoobject_loopIndex += 1;
/* 079 */         }
/* 080 */
/* 081 */         deserializetoobject_value1 = new org.apache.spark.sql.catalyst.util.GenericArrayData(deserializetoobject_convertedArray); /*###*/
/* 082 */       }
/* 083 */       boolean deserializetoobject_isNull = true;
/* 084 */       double[] deserializetoobject_value = null;
/* 085 */       if (!inputadapter_isNull) {
/* 086 */         deserializetoobject_isNull = false;
/* 087 */         if (!deserializetoobject_isNull) {
/* 088 */           Object deserializetoobject_funcResult = null;
/* 089 */           deserializetoobject_funcResult = deserializetoobject_value1.toDoubleArray();
/* 090 */           if (deserializetoobject_funcResult == null) {
/* 091 */             deserializetoobject_isNull = true;
/* 092 */           } else {
/* 093 */             deserializetoobject_value = (double[]) deserializetoobject_funcResult;
/* 094 */           }
/* 095 */
/* 096 */         }
/* 097 */         deserializetoobject_isNull = deserializetoobject_value == null;
/* 098 */       }
/* 099 */
/* 100 */       boolean mapelements_isNull = true;
/* 101 */       double[] mapelements_value = null;
/* 102 */       if (!false) {
/* 103 */         mapelements_resultIsNull = false;
/* 104 */
/* 105 */         if (!mapelements_resultIsNull) {
/* 106 */           mapelements_resultIsNull = deserializetoobject_isNull;
/* 107 */           mapelements_argValue = deserializetoobject_value;
/* 108 */         }
/* 109 */
/* 110 */         mapelements_isNull = mapelements_resultIsNull;
/* 111 */         if (!mapelements_isNull) {
/* 112 */           Object mapelements_funcResult = null;
/* 113 */           mapelements_funcResult = ((scala.Function1) references[1]).apply(mapelements_argValue);
/* 114 */           if (mapelements_funcResult == null) {
/* 115 */             mapelements_isNull = true;
/* 116 */           } else {
/* 117 */             mapelements_value = (double[]) mapelements_funcResult;
/* 118 */           }
/* 119 */
/* 120 */         }
/* 121 */         mapelements_isNull = mapelements_value == null;
/* 122 */       }
/* 123 */
/* 124 */       serializefromobject_resultIsNull = false;
/* 125 */
/* 126 */       if (!serializefromobject_resultIsNull) {
/* 127 */         serializefromobject_resultIsNull = mapelements_isNull;
/* 128 */         serializefromobject_argValue = mapelements_value;
/* 129 */       }
/* 130 */
/* 131 */       boolean serializefromobject_isNull = serializefromobject_resultIsNull;
/* 132 */       final ArrayData serializefromobject_value = serializefromobject_resultIsNull ? null : org.apache.spark.sql.catalyst.expressions.UnsafeArrayData.fromPrimitiveArray(serializefromobject_argValue);
/* 133 */       serializefromobject_isNull = serializefromobject_value == null;
/* 134 */       serializefromobject_holder.reset();
/* 135 */
/* 136 */       serializefromobject_rowWriter.zeroOutNullBytes();
/* 137 */
/* 138 */       if (serializefromobject_isNull) {
/* 139 */         serializefromobject_rowWriter.setNullAt(0);
/* 140 */       } else {
/* 141 */         // Remember the current cursor so that we can calculate how many bytes are
/* 142 */         // written later.
/* 143 */         final int serializefromobject_tmpCursor = serializefromobject_holder.cursor;
/* 144 */
/* 145 */         if (serializefromobject_value instanceof UnsafeArrayData) {
/* 146 */           final int serializefromobject_sizeInBytes = ((UnsafeArrayData) serializefromobject_value).getSizeInBytes();
/* 147 */           // grow the global buffer before writing data.
/* 148 */           serializefromobject_holder.grow(serializefromobject_sizeInBytes);
/* 149 */           ((UnsafeArrayData) serializefromobject_value).writeToMemory(serializefromobject_holder.buffer, serializefromobject_holder.cursor);
/* 150 */           serializefromobject_holder.cursor += serializefromobject_sizeInBytes;
/* 151 */
/* 152 */         } else {
/* 153 */           final int serializefromobject_numElements = serializefromobject_value.numElements();
/* 154 */           serializefromobject_arrayWriter.initialize(serializefromobject_holder, serializefromobject_numElements, 8);
/* 155 */
/* 156 */           for (int serializefromobject_index = 0; serializefromobject_index < serializefromobject_numElements; serializefromobject_index++) {
/* 157 */             if (serializefromobject_value.isNullAt(serializefromobject_index)) {
/* 158 */               serializefromobject_arrayWriter.setNullDouble(serializefromobject_index);
/* 159 */             } else {
/* 160 */               final double serializefromobject_element = serializefromobject_value.getDouble(serializefromobject_index);
/* 161 */               serializefromobject_arrayWriter.write(serializefromobject_index, serializefromobject_element);
/* 162 */             }
/* 163 */           }
/* 164 */         }
/* 165 */
/* 166 */         serializefromobject_rowWriter.setOffsetAndSize(0, serializefromobject_tmpCursor, serializefromobject_holder.cursor - serializefromobject_tmpCursor);
/* 167 */       }
/* 168 */       serializefromobject_result.setTotalSize(serializefromobject_holder.totalSize());
/* 169 */       append(serializefromobject_result);
/* 170 */       if (shouldStop()) return;
/* 171 */     }
/* 172 */   }
```

With this PR (eliminated lines 56-62 in the above code)
```java
/* 047 */   protected void processNext() throws java.io.IOException {
/* 048 */     while (inputadapter_input.hasNext() && !stopEarly()) {
/* 049 */       InternalRow inputadapter_row = (InternalRow) inputadapter_input.next();
/* 050 */       boolean inputadapter_isNull = inputadapter_row.isNullAt(0);
/* 051 */       ArrayData inputadapter_value = inputadapter_isNull ? null : (inputadapter_row.getArray(0));
/* 052 */
/* 053 */       boolean deserializetoobject_isNull = true;
/* 054 */       double[] deserializetoobject_value = null;
/* 055 */       if (!inputadapter_isNull) {
/* 056 */         deserializetoobject_isNull = false;
/* 057 */         if (!deserializetoobject_isNull) {
/* 058 */           Object deserializetoobject_funcResult = null;
/* 059 */           deserializetoobject_funcResult = inputadapter_value.toDoubleArray();
/* 060 */           if (deserializetoobject_funcResult == null) {
/* 061 */             deserializetoobject_isNull = true;
/* 062 */           } else {
/* 063 */             deserializetoobject_value = (double[]) deserializetoobject_funcResult;
/* 064 */           }
/* 065 */
/* 066 */         }
/* 067 */         deserializetoobject_isNull = deserializetoobject_value == null;
/* 068 */       }
/* 069 */
/* 070 */       boolean mapelements_isNull = true;
/* 071 */       double[] mapelements_value = null;
/* 072 */       if (!false) {
/* 073 */         mapelements_resultIsNull = false;
/* 074 */
/* 075 */         if (!mapelements_resultIsNull) {
/* 076 */           mapelements_resultIsNull = deserializetoobject_isNull;
/* 077 */           mapelements_argValue = deserializetoobject_value;
/* 078 */         }
/* 079 */
/* 080 */         mapelements_isNull = mapelements_resultIsNull;
/* 081 */         if (!mapelements_isNull) {
/* 082 */           Object mapelements_funcResult = null;
/* 083 */           mapelements_funcResult = ((scala.Function1) references[0]).apply(mapelements_argValue);
/* 084 */           if (mapelements_funcResult == null) {
/* 085 */             mapelements_isNull = true;
/* 086 */           } else {
/* 087 */             mapelements_value = (double[]) mapelements_funcResult;
/* 088 */           }
/* 089 */
/* 090 */         }
/* 091 */         mapelements_isNull = mapelements_value == null;
/* 092 */       }
/* 093 */
/* 094 */       serializefromobject_resultIsNull = false;
/* 095 */
/* 096 */       if (!serializefromobject_resultIsNull) {
/* 097 */         serializefromobject_resultIsNull = mapelements_isNull;
/* 098 */         serializefromobject_argValue = mapelements_value;
/* 099 */       }
/* 100 */
/* 101 */       boolean serializefromobject_isNull = serializefromobject_resultIsNull;
/* 102 */       final ArrayData serializefromobject_value = serializefromobject_resultIsNull ? null : org.apache.spark.sql.catalyst.expressions.UnsafeArrayData.fromPrimitiveArray(serializefromobject_argValue);
/* 103 */       serializefromobject_isNull = serializefromobject_value == null;
/* 104 */       serializefromobject_holder.reset();
/* 105 */
/* 106 */       serializefromobject_rowWriter.zeroOutNullBytes();
/* 107 */
/* 108 */       if (serializefromobject_isNull) {
/* 109 */         serializefromobject_rowWriter.setNullAt(0);
/* 110 */       } else {
/* 111 */         // Remember the current cursor so that we can calculate how many bytes are
/* 112 */         // written later.
/* 113 */         final int serializefromobject_tmpCursor = serializefromobject_holder.cursor;
/* 114 */
/* 115 */         if (serializefromobject_value instanceof UnsafeArrayData) {
/* 116 */           final int serializefromobject_sizeInBytes = ((UnsafeArrayData) serializefromobject_value).getSizeInBytes();
/* 117 */           // grow the global buffer before writing data.
/* 118 */           serializefromobject_holder.grow(serializefromobject_sizeInBytes);
/* 119 */           ((UnsafeArrayData) serializefromobject_value).writeToMemory(serializefromobject_holder.buffer, serializefromobject_holder.cursor);
/* 120 */           serializefromobject_holder.cursor += serializefromobject_sizeInBytes;
/* 121 */
/* 122 */         } else {
/* 123 */           final int serializefromobject_numElements = serializefromobject_value.numElements();
/* 124 */           serializefromobject_arrayWriter.initialize(serializefromobject_holder, serializefromobject_numElements, 8);
/* 125 */
/* 126 */           for (int serializefromobject_index = 0; serializefromobject_index < serializefromobject_numElements; serializefromobject_index++) {
/* 127 */             if (serializefromobject_value.isNullAt(serializefromobject_index)) {
/* 128 */               serializefromobject_arrayWriter.setNullDouble(serializefromobject_index);
/* 129 */             } else {
/* 130 */               final double serializefromobject_element = serializefromobject_value.getDouble(serializefromobject_index);
/* 131 */               serializefromobject_arrayWriter.write(serializefromobject_index, serializefromobject_element);
/* 132 */             }
/* 133 */           }
/* 134 */         }
/* 135 */
/* 136 */         serializefromobject_rowWriter.setOffsetAndSize(0, serializefromobject_tmpCursor, serializefromobject_holder.cursor - serializefromobject_tmpCursor);
/* 137 */       }
/* 138 */       serializefromobject_result.setTotalSize(serializefromobject_holder.totalSize());
/* 139 */       append(serializefromobject_result);
/* 140 */       if (shouldStop()) return;
/* 141 */     }
/* 142 */   }
```

## How was this patch tested?

Add test suites into `DatasetPrimitiveSuite`

Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com>

Closes #17568 from kiszk/SPARK-20254.

(cherry picked from commit e468a96c404eb54261ab219734f67dc2f5b06dc0)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  4 +-
 .../expressions/objects/objects.scala         |  5 +-
 .../sql/catalyst/optimizer/Optimizer.scala    |  3 +-
 .../sql/catalyst/optimizer/expressions.scala  |  3 +
 .../sql/catalyst/optimizer/objects.scala      | 13 ++++
 .../optimizer/EliminateMapObjectsSuite.scala  | 62 +++++++++++++++++++
 6 files changed, 86 insertions(+), 4 deletions(-)
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateMapObjectsSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 9816b33ae8dff..d9f36f7f874d7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -2230,8 +2230,8 @@ class Analyzer(
           val result = resolved transformDown {
             case UnresolvedMapObjects(func, inputData, cls) if inputData.resolved =>
               inputData.dataType match {
-                case ArrayType(et, _) =>
-                  val expr = MapObjects(func, inputData, et, cls) transformUp {
+                case ArrayType(et, cn) =>
+                  val expr = MapObjects(func, inputData, et, cn, cls) transformUp {
                     case UnresolvedExtractValue(child, fieldName) if child.resolved =>
                       ExtractValue(child, fieldName, resolver)
                   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index f446c3e4a75f6..1a202ecf745c9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -451,6 +451,8 @@ object MapObjects {
    * @param function The function applied on the collection elements.
    * @param inputData An expression that when evaluated returns a collection object.
    * @param elementType The data type of elements in the collection.
+   * @param elementNullable When false, indicating elements in the collection are always
+   *                        non-null value.
    * @param customCollectionCls Class of the resulting collection (returning ObjectType)
    *                            or None (returning ArrayType)
    */
@@ -458,11 +460,12 @@ object MapObjects {
       function: Expression => Expression,
       inputData: Expression,
       elementType: DataType,
+      elementNullable: Boolean = true,
       customCollectionCls: Option[Class[_]] = None): MapObjects = {
     val id = curId.getAndIncrement()
     val loopValue = s"MapObjects_loopValue$id"
     val loopIsNull = s"MapObjects_loopIsNull$id"
-    val loopVar = LambdaVariable(loopValue, loopIsNull, elementType)
+    val loopVar = LambdaVariable(loopValue, loopIsNull, elementType, elementNullable)
     MapObjects(
       loopValue, loopIsNull, elementType, function(loopVar), inputData, customCollectionCls)
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index d221b0611a892..dd768d18e8588 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -119,7 +119,8 @@ abstract class Optimizer(sessionCatalog: SessionCatalog, conf: SQLConf)
       CostBasedJoinReorder(conf)) ::
     Batch("Decimal Optimizations", fixedPoint,
       DecimalAggregates(conf)) ::
-    Batch("Typed Filter Optimization", fixedPoint,
+    Batch("Object Expressions Optimization", fixedPoint,
+      EliminateMapObjects,
       CombineTypedFilters) ::
     Batch("LocalRelation", fixedPoint,
       ConvertToLocalRelation,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 8445ee06bd89b..ea2c5d241d8dd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
+import org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
@@ -368,6 +369,8 @@ case class NullPropagation(conf: SQLConf) extends Rule[LogicalPlan] {
       case EqualNullSafe(Literal(null, _), r) => IsNull(r)
       case EqualNullSafe(l, Literal(null, _)) => IsNull(l)
 
+      case AssertNotNull(c, _) if !c.nullable => c
+
       // For Coalesce, remove null literals.
       case e @ Coalesce(children) =>
         val newChildren = children.filterNot(isNullLiteral)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
index 257dbfac8c3e8..8cdc6425bcad8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.optimizer
 
 import org.apache.spark.api.java.function.FilterFunction
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.objects._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 
@@ -96,3 +97,15 @@ object CombineTypedFilters extends Rule[LogicalPlan] {
     }
   }
 }
+
+/**
+ * Removes MapObjects when the following conditions are satisfied
+ *   1. Mapobject(... lambdavariable(..., false) ...), which means types for input and output
+ *      are primitive types with non-nullable
+ *   2. no custom collection class specified representation of data item.
+ */
+object EliminateMapObjects extends Rule[LogicalPlan] {
+  def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
+     case MapObjects(_, _, _, LambdaVariable(_, _, _, false), inputData, None) => inputData
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateMapObjectsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateMapObjectsSuite.scala
new file mode 100644
index 0000000000000..d4f37e2a5e877
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateMapObjectsSuite.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.catalyst.expressions.AttributeReference
+import org.apache.spark.sql.catalyst.expressions.objects.Invoke
+import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.catalyst.plans.logical.{DeserializeToObject, LocalRelation, LogicalPlan}
+import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.types._
+
+class EliminateMapObjectsSuite extends PlanTest {
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches = {
+      Batch("EliminateMapObjects", FixedPoint(50),
+        NullPropagation(conf),
+        SimplifyCasts,
+        EliminateMapObjects) :: Nil
+    }
+  }
+
+  implicit private def intArrayEncoder = ExpressionEncoder[Array[Int]]()
+  implicit private def doubleArrayEncoder = ExpressionEncoder[Array[Double]]()
+
+  test("SPARK-20254: Remove unnecessary data conversion for primitive array") {
+    val intObjType = ObjectType(classOf[Array[Int]])
+    val intInput = LocalRelation('a.array(ArrayType(IntegerType, false)))
+    val intQuery = intInput.deserialize[Array[Int]].analyze
+    val intOptimized = Optimize.execute(intQuery)
+    val intExpected = DeserializeToObject(
+      Invoke(intInput.output(0), "toIntArray", intObjType, Nil, true, false),
+      AttributeReference("obj", intObjType, true)(), intInput)
+    comparePlans(intOptimized, intExpected)
+
+    val doubleObjType = ObjectType(classOf[Array[Double]])
+    val doubleInput = LocalRelation('a.array(ArrayType(DoubleType, false)))
+    val doubleQuery = doubleInput.deserialize[Array[Double]].analyze
+    val doubleOptimized = Optimize.execute(doubleQuery)
+    val doubleExpected = DeserializeToObject(
+      Invoke(doubleInput.output(0), "toDoubleArray", doubleObjType, Nil, true, false),
+      AttributeReference("obj", doubleObjType, true)(), doubleInput)
+    comparePlans(doubleOptimized, doubleExpected)
+  }
+}

From ef6923f7ea85a3163a5d11ad0f63aff7ec5100e6 Mon Sep 17 00:00:00 2001
From: zero323 <zero323@users.noreply.github.com>
Date: Tue, 18 Apr 2017 19:59:18 -0700
Subject: [PATCH 0653/1204] [SPARK-20208][R][DOCS] Document R fpGrowth support

## What changes were proposed in this pull request?

Document  fpGrowth in:

- vignettes
- programming guide
- code example

## How was this patch tested?

Manual tests.

Author: zero323 <zero323@users.noreply.github.com>

Closes #17557 from zero323/SPARK-20208.

(cherry picked from commit 702d85af2df9433254af6fa029683aa19c52a276)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/vignettes/sparkr-vignettes.Rmd | 37 +++++++++++++++++++-
 examples/src/main/r/ml/fpm.R         | 50 ++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+), 1 deletion(-)
 create mode 100644 examples/src/main/r/ml/fpm.R

diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index a6ff650c33fea..f81dbab10b1e1 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -505,6 +505,10 @@ SparkR supports the following machine learning models and algorithms.
 
 * Alternating Least Squares (ALS)
 
+#### Frequent Pattern Mining
+
+* FP-growth
+
 #### Statistics
 
 * Kolmogorov-Smirnov Test
@@ -707,7 +711,7 @@ summary(tweedieGLM1)
 ```
 We can try other distributions in the tweedie family, for example, a compound Poisson distribution with a log link:
 ```{r}
-tweedieGLM2 <- spark.glm(carsDF, mpg ~ wt + hp, family = "tweedie", 
+tweedieGLM2 <- spark.glm(carsDF, mpg ~ wt + hp, family = "tweedie",
                          var.power = 1.2, link.power = 0.0)
 summary(tweedieGLM2)
 ```
@@ -906,6 +910,37 @@ predicted <- predict(model, df)
 head(predicted)
 ```
 
+#### FP-growth
+
+`spark.fpGrowth` executes FP-growth algorithm to mine frequent itemsets on a `SparkDataFrame`. `itemsCol` should be an array of values.
+
+```{r}
+df <- selectExpr(createDataFrame(data.frame(rawItems = c(
+  "T,R,U", "T,S", "V,R", "R,U,T,V", "R,S", "V,S,U", "U,R", "S,T", "V,R", "V,U,S",
+  "T,V,U", "R,V", "T,S", "T,S", "S,T", "S,U", "T,R", "V,R", "S,V", "T,S,U"
+))), "split(rawItems, ',') AS items")
+
+fpm <- spark.fpGrowth(df, minSupport = 0.2, minConfidence = 0.5)
+```
+
+`spark.freqItemsets` method can be used to retrieve a `SparkDataFrame` with the frequent itemsets.
+
+```{r}
+head(spark.freqItemsets(fpm))
+```
+
+`spark.associationRules` returns a `SparkDataFrame` with the association rules.
+
+```{r}
+head(spark.associationRules(fpm))
+```
+
+We can make predictions based on the `antecedent`.
+
+```{r}
+head(predict(fpm, df))
+```
+
 #### Kolmogorov-Smirnov Test
 
 `spark.kstest` runs a two-sided, one-sample [Kolmogorov-Smirnov (KS) test](https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test).
diff --git a/examples/src/main/r/ml/fpm.R b/examples/src/main/r/ml/fpm.R
new file mode 100644
index 0000000000000..89c4564457d9e
--- /dev/null
+++ b/examples/src/main/r/ml/fpm.R
@@ -0,0 +1,50 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# To run this example use
+# ./bin/spark-submit examples/src/main/r/ml/fpm.R
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-ML-fpm-example")
+
+# $example on$
+# Load training data
+
+df <- selectExpr(createDataFrame(data.frame(rawItems = c(
+  "1,2,5", "1,2,3,5", "1,2"
+))), "split(rawItems, ',') AS items")
+
+fpm <- spark.fpGrowth(df, itemsCol="items", minSupport=0.5, minConfidence=0.6)
+
+# Extracting frequent itemsets
+
+spark.freqItemsets(fpm)
+
+# Extracting association rules
+
+spark.associationRules(fpm)
+
+# Predict uses association rules to and combines possible consequents
+
+predict(fpm, df)
+
+# $example off$
+
+sparkR.session.stop()

From 274a3e294d4a302e6b7194ce0ee00d8de66e31ba Mon Sep 17 00:00:00 2001
From: Koert Kuipers <koert@tresata.com>
Date: Wed, 19 Apr 2017 15:52:47 +0800
Subject: [PATCH 0654/1204] [SPARK-20359][SQL] Avoid unnecessary execution in
 EliminateOuterJoin optimization that can lead to NPE

Avoid necessary execution that can lead to NPE in EliminateOuterJoin and add test in DataFrameSuite to confirm NPE is no longer thrown

## What changes were proposed in this pull request?
Change leftHasNonNullPredicate and rightHasNonNullPredicate to lazy so they are only executed when needed.

## How was this patch tested?

Added test in DataFrameSuite that failed before this fix and now succeeds. Note that a test in catalyst project would be better but i am unsure how to do this.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Koert Kuipers <koert@tresata.com>

Closes #17660 from koertkuipers/feat-catch-npe-in-eliminate-outer-join.

(cherry picked from commit 608bf30f0b9759fd0b9b9f33766295550996a9eb)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/catalyst/optimizer/joins.scala    |  4 ++--
 .../scala/org/apache/spark/sql/DataFrameSuite.scala    | 10 ++++++++++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
index c3ab58744953d..2fe3039774423 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
@@ -134,8 +134,8 @@ case class EliminateOuterJoin(conf: SQLConf) extends Rule[LogicalPlan] with Pred
     val leftConditions = conditions.filter(_.references.subsetOf(join.left.outputSet))
     val rightConditions = conditions.filter(_.references.subsetOf(join.right.outputSet))
 
-    val leftHasNonNullPredicate = leftConditions.exists(canFilterOutNull)
-    val rightHasNonNullPredicate = rightConditions.exists(canFilterOutNull)
+    lazy val leftHasNonNullPredicate = leftConditions.exists(canFilterOutNull)
+    lazy val rightHasNonNullPredicate = rightConditions.exists(canFilterOutNull)
 
     join.joinType match {
       case RightOuter if leftHasNonNullPredicate => Inner
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 52bd4e19f8952..b4893b56a8a84 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -1722,4 +1722,14 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
         "Cannot have map type columns in DataFrame which calls set operations"))
     }
   }
+
+  test("SPARK-20359: catalyst outer join optimization should not throw npe") {
+    val df1 = Seq("a", "b", "c").toDF("x")
+      .withColumn("y", udf{ (x: String) => x.substring(0, 1) + "!" }.apply($"x"))
+    val df2 = Seq("a", "b").toDF("x1")
+    df1
+      .join(df2, df1("x") === df2("x1"), "left_outer")
+      .filter($"x1".isNotNull || !$"y".isin("a!"))
+      .count
+  }
 }

From 171bf656f8a940ee334c13e162233381de38c8bd Mon Sep 17 00:00:00 2001
From: Koert Kuipers <koert@tresata.com>
Date: Wed, 19 Apr 2017 15:52:47 +0800
Subject: [PATCH 0655/1204] [SPARK-20359][SQL] Avoid unnecessary execution in
 EliminateOuterJoin optimization that can lead to NPE

Avoid necessary execution that can lead to NPE in EliminateOuterJoin and add test in DataFrameSuite to confirm NPE is no longer thrown

## What changes were proposed in this pull request?
Change leftHasNonNullPredicate and rightHasNonNullPredicate to lazy so they are only executed when needed.

## How was this patch tested?

Added test in DataFrameSuite that failed before this fix and now succeeds. Note that a test in catalyst project would be better but i am unsure how to do this.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Koert Kuipers <koert@tresata.com>

Closes #17660 from koertkuipers/feat-catch-npe-in-eliminate-outer-join.

(cherry picked from commit 608bf30f0b9759fd0b9b9f33766295550996a9eb)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/catalyst/optimizer/joins.scala    |  4 ++--
 .../scala/org/apache/spark/sql/DataFrameSuite.scala    | 10 ++++++++++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
index bfe529e21e9ad..e314955a07eee 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
@@ -121,8 +121,8 @@ object EliminateOuterJoin extends Rule[LogicalPlan] with PredicateHelper {
     val leftConditions = conditions.filter(_.references.subsetOf(join.left.outputSet))
     val rightConditions = conditions.filter(_.references.subsetOf(join.right.outputSet))
 
-    val leftHasNonNullPredicate = leftConditions.exists(canFilterOutNull)
-    val rightHasNonNullPredicate = rightConditions.exists(canFilterOutNull)
+    lazy val leftHasNonNullPredicate = leftConditions.exists(canFilterOutNull)
+    lazy val rightHasNonNullPredicate = rightConditions.exists(canFilterOutNull)
 
     join.joinType match {
       case RightOuter if leftHasNonNullPredicate => Inner
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index ec201f3253786..149db9866a368 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -1755,4 +1755,14 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
         "Cannot have map type columns in DataFrame which calls set operations"))
     }
   }
+
+  test("SPARK-20359: catalyst outer join optimization should not throw npe") {
+    val df1 = Seq("a", "b", "c").toDF("x")
+      .withColumn("y", udf{ (x: String) => x.substring(0, 1) + "!" }.apply($"x"))
+    val df2 = Seq("a", "b").toDF("x1")
+    df1
+      .join(df2, df1("x") === df2("x1"), "left_outer")
+      .filter($"x1".isNotNull || !$"y".isin("a!"))
+      .count
+  }
 }

From a87e21dd2a08c2e030b592322b8c7c4b5915725b Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Wed, 19 Apr 2017 16:01:28 +0800
Subject: [PATCH 0656/1204] [SPARK-20356][SQL] Pruned InMemoryTableScanExec
 should have correct output partitioning and ordering

## What changes were proposed in this pull request?

The output of `InMemoryTableScanExec` can be pruned and mismatch with `InMemoryRelation` and its child plan's output. This causes wrong output partitioning and ordering.

## How was this patch tested?

Jenkins tests.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #17679 from viirya/SPARK-20356.

(cherry picked from commit 773754b6c1516c15b64846a00e491535cbcb1007)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../columnar/InMemoryTableScanExec.scala          |  4 +++-
 .../columnar/InMemoryColumnarQuerySuite.scala     | 15 +++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
index 214e8d309de11..7063b08f7c644 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
@@ -42,7 +42,9 @@ case class InMemoryTableScanExec(
   override def output: Seq[Attribute] = attributes
 
   private def updateAttribute(expr: Expression): Expression = {
-    val attrMap = AttributeMap(relation.child.output.zip(output))
+    // attributes can be pruned so using relation's output.
+    // E.g., relation.output is [id, item] but this scan's output can be [item] only.
+    val attrMap = AttributeMap(relation.child.output.zip(relation.output))
     expr.transform {
       case attr: Attribute => attrMap.getOrElse(attr, attr)
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
index 1e6a6a8ba3362..109b1d9db60d2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
@@ -414,4 +414,19 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
       assert(partitionedAttrs.subsetOf(inMemoryScan.outputSet))
     }
   }
+
+  test("SPARK-20356: pruned InMemoryTableScanExec should have correct ordering and partitioning") {
+    withSQLConf("spark.sql.shuffle.partitions" -> "200") {
+      val df1 = Seq(("a", 1), ("b", 1), ("c", 2)).toDF("item", "group")
+      val df2 = Seq(("a", 1), ("b", 2), ("c", 3)).toDF("item", "id")
+      val df3 = df1.join(df2, Seq("item")).select($"id", $"group".as("item")).distinct()
+
+      df3.unpersist()
+      val agg_without_cache = df3.groupBy($"item").count()
+
+      df3.cache()
+      val agg_with_cache = df3.groupBy($"item").count()
+      checkAnswer(agg_without_cache, agg_with_cache)
+    }
+  }
 }

From 8baa970bcd6ccb810f95113f7c2dd7fbc1935a0a Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Wed, 19 Apr 2017 12:18:54 +0100
Subject: [PATCH 0657/1204] [SPARK-20343][BUILD] Avoid Unidoc build only if
 Hadoop 2.6 is explicitly set in SBT build

## What changes were proposed in this pull request?

This PR proposes two things as below:

- Avoid Unidoc build only if Hadoop 2.6 is explicitly set in SBT build

  Due to a different dependency resolution in SBT & Unidoc by an unknown reason, the documentation build fails on a specific machine & environment in Jenkins but it was unable to reproduce.

  So, this PR just checks an environment variable `AMPLAB_JENKINS_BUILD_PROFILE` that is set in Hadoop 2.6 SBT build against branches on Jenkins, and then disables Unidoc build. **Note that PR builder will still build it with Hadoop 2.6 & SBT.**

  ```
  ========================================================================
  Building Unidoc API Documentation
  ========================================================================
  [info] Building Spark unidoc (w/Hive 1.2.1) using SBT with these arguments:  -Phadoop-2.6 -Pmesos -Pkinesis-asl -Pyarn -Phive-thriftserver -Phive unidoc
  Using /usr/java/jdk1.8.0_60 as default JAVA_HOME.
  ...
  ```

  I checked the environment variables from the logs (first bit) as below:

  - **spark-master-test-sbt-hadoop-2.6** (this one is being failed) - https://amplab.cs.berkeley.edu/jenkins/view/Spark%20QA%20Test%20(Dashboard)/job/spark-master-test-sbt-hadoop-2.6/lastBuild/consoleFull

  ```
  JAVA_HOME=/usr/java/jdk1.8.0_60
  JAVA_7_HOME=/usr/java/jdk1.7.0_79
  SPARK_BRANCH=master
  AMPLAB_JENKINS_BUILD_PROFILE=hadoop2.6   <- I use this variable
  AMPLAB_JENKINS="true"
  ```
  - spark-master-test-sbt-hadoop-2.7 - https://amplab.cs.berkeley.edu/jenkins/view/Spark%20QA%20Test%20(Dashboard)/job/spark-master-test-sbt-hadoop-2.7/lastBuild/consoleFull

  ```
  JAVA_HOME=/usr/java/jdk1.8.0_60
  JAVA_7_HOME=/usr/java/jdk1.7.0_79
  SPARK_BRANCH=master
  AMPLAB_JENKINS_BUILD_PROFILE=hadoop2.7
  AMPLAB_JENKINS="true"
  ```

  - spark-master-test-maven-hadoop-2.6 - https://amplab.cs.berkeley.edu/jenkins/view/Spark%20QA%20Test%20(Dashboard)/job/spark-master-test-maven-hadoop-2.6/lastBuild/consoleFull

  ```
  JAVA_HOME=/usr/java/jdk1.8.0_60
  JAVA_7_HOME=/usr/java/jdk1.7.0_79
  HADOOP_PROFILE=hadoop-2.6
  HADOOP_VERSION=
  SPARK_BRANCH=master
  AMPLAB_JENKINS="true"
  ```

  - spark-master-test-maven-hadoop-2.7 - https://amplab.cs.berkeley.edu/jenkins/view/Spark%20QA%20Test%20(Dashboard)/job/spark-master-test-maven-hadoop-2.7/lastBuild/consoleFull

  ```
  JAVA_HOME=/usr/java/jdk1.8.0_60
  JAVA_7_HOME=/usr/java/jdk1.7.0_79
  HADOOP_PROFILE=hadoop-2.7
  HADOOP_VERSION=
  SPARK_BRANCH=master
  AMPLAB_JENKINS="true"
  ```

  - PR builder - https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/75843/consoleFull

  ```
  JENKINS_MASTER_HOSTNAME=amp-jenkins-master
  JAVA_HOME=/usr/java/jdk1.8.0_60
  JAVA_7_HOME=/usr/java/jdk1.7.0_79
  ```

  Assuming from other logs in branch-2.1

    - SBT & Hadoop 2.6 against branch-2.1 https://amplab.cs.berkeley.edu/jenkins/view/Spark%20QA%20Test%20(Dashboard)/job/spark-branch-2.1-test-sbt-hadoop-2.6/lastBuild/consoleFull

      ```
      JAVA_HOME=/usr/java/jdk1.8.0_60
      JAVA_7_HOME=/usr/java/jdk1.7.0_79
      SPARK_BRANCH=branch-2.1
      AMPLAB_JENKINS_BUILD_PROFILE=hadoop2.6
      AMPLAB_JENKINS="true"
      ```

    - Maven & Hadoop 2.6 against branch-2.1 https://amplab.cs.berkeley.edu/jenkins/view/Spark%20QA%20Test%20(Dashboard)/job/spark-branch-2.1-test-maven-hadoop-2.6/lastBuild/consoleFull

      ```
      JAVA_HOME=/usr/java/jdk1.8.0_60
      JAVA_7_HOME=/usr/java/jdk1.7.0_79
      HADOOP_PROFILE=hadoop-2.6
      HADOOP_VERSION=
      SPARK_BRANCH=branch-2.1
      AMPLAB_JENKINS="true"
      ```

  We have been using the same convention for those variables. These are actually being used in `run-tests.py` script - here https://github.com/apache/spark/blob/master/dev/run-tests.py#L519-L520

- Revert the previous try

  After https://github.com/apache/spark/pull/17651, it seems the build still fails on SBT Hadoop 2.6 master.

  I am unable to reproduce this - https://github.com/apache/spark/pull/17477#issuecomment-294094092 and the reviewer was too. So, this got merged as it looks the only way to verify this is to merge it currently (as no one seems able to reproduce this).

## How was this patch tested?

I only checked `is_hadoop_version_2_6 = os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE") == "hadoop2.6"` is working fine as expected as below:

```python
>>> import collections
>>> os = collections.namedtuple('os', 'environ')(environ={"AMPLAB_JENKINS_BUILD_PROFILE": "hadoop2.6"})
>>> print(not os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE") == "hadoop2.6")
False
>>> os = collections.namedtuple('os', 'environ')(environ={"AMPLAB_JENKINS_BUILD_PROFILE": "hadoop2.7"})
>>> print(not os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE") == "hadoop2.6")
True
>>> os = collections.namedtuple('os', 'environ')(environ={})
>>> print(not os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE") == "hadoop2.6")
True
```

I tried many ways but I was unable to reproduce this in my local. Sean also tried the way I did but he was also unable to reproduce this.

Please refer the comments in https://github.com/apache/spark/pull/17477#issuecomment-294094092

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #17669 from HyukjinKwon/revert-SPARK-20343.

(cherry picked from commit 35378766ad7d3c494425a8781efe9cb9349732b7)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 dev/run-tests.py         | 12 ++++++++++--
 pom.xml                  |  1 -
 project/SparkBuild.scala | 14 ++------------
 3 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/dev/run-tests.py b/dev/run-tests.py
index 450b68123e1fc..818a0c9f48419 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -365,8 +365,16 @@ def build_spark_assembly_sbt(hadoop_version):
     print("[info] Building Spark assembly (w/Hive 1.2.1) using SBT with these arguments: ",
           " ".join(profiles_and_goals))
     exec_sbt(profiles_and_goals)
-    # Make sure that Java and Scala API documentation can be generated
-    build_spark_unidoc_sbt(hadoop_version)
+
+    # Note that we skip Unidoc build only if Hadoop 2.6 is explicitly set in this SBT build.
+    # Due to a different dependency resolution in SBT & Unidoc by an unknown reason, the
+    # documentation build fails on a specific machine & environment in Jenkins but it was unable
+    # to reproduce. Please see SPARK-20343. This is a band-aid fix that should be removed in
+    # the future.
+    is_hadoop_version_2_6 = os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE") == "hadoop2.6"
+    if not is_hadoop_version_2_6:
+        # Make sure that Java and Scala API documentation can be generated
+        build_spark_unidoc_sbt(hadoop_version)
 
 
 def build_apache_spark(build_tool, hadoop_version):
diff --git a/pom.xml b/pom.xml
index 14370d92a9080..c1174593c1922 100644
--- a/pom.xml
+++ b/pom.xml
@@ -142,7 +142,6 @@
     <ivy.version>2.4.0</ivy.version>
     <oro.version>2.0.8</oro.version>
     <codahale.metrics.version>3.1.2</codahale.metrics.version>
-    <!-- Keep consistent with Avro vesion in SBT build for SPARK-20343 -->
     <avro.version>1.7.7</avro.version>
     <avro.mapred.classifier>hadoop2</avro.mapred.classifier>
     <jets3t.version>0.9.3</jets3t.version>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 77dae289f7758..e52baf51aed1a 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -318,8 +318,8 @@ object SparkBuild extends PomBuild {
     enable(MimaBuild.mimaSettings(sparkHome, x))(x)
   }
 
-  /* Generate and pick the spark build info from extra-resources and override a dependency */
-  enable(Core.settings ++ CoreDependencyOverrides.settings)(core)
+  /* Generate and pick the spark build info from extra-resources */
+  enable(Core.settings)(core)
 
   /* Unsafe settings */
   enable(Unsafe.settings)(unsafe)
@@ -443,16 +443,6 @@ object DockerIntegrationTests {
   )
 }
 
-/**
- * Overrides to work around sbt's dependency resolution being different from Maven's in Unidoc.
- *
- * Note that, this is a hack that should be removed in the future. See SPARK-20343
- */
-object CoreDependencyOverrides {
-  lazy val settings = Seq(
-    dependencyOverrides += "org.apache.avro" % "avro" % "1.7.7")
-}
-
 /**
  * Overrides to work around sbt's dependency resolution being different from Maven's.
  */

From 80a60da8f42e86ae1a045d9fd0dcec3234b6ff40 Mon Sep 17 00:00:00 2001
From: cody koeninger <cody@koeninger.org>
Date: Wed, 19 Apr 2017 18:58:58 +0100
Subject: [PATCH 0658/1204] [SPARK-20036][DOC] Note incompatible dependencies
 on org.apache.kafka artifacts

## What changes were proposed in this pull request?

Note that you shouldn't manually add dependencies on org.apache.kafka artifacts

## How was this patch tested?

Doc only change, did jekyll build and looked at the page.

Author: cody koeninger <cody@koeninger.org>

Closes #17675 from koeninger/SPARK-20036.

(cherry picked from commit 71a8e9df12e547cb4716f954ecb762b358f862d5)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/streaming-kafka-0-10-integration.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/streaming-kafka-0-10-integration.md b/docs/streaming-kafka-0-10-integration.md
index e3837013168dc..92c296a9e6bd3 100644
--- a/docs/streaming-kafka-0-10-integration.md
+++ b/docs/streaming-kafka-0-10-integration.md
@@ -12,6 +12,8 @@ For Scala/Java applications using SBT/Maven project definitions, link your strea
 	artifactId = spark-streaming-kafka-0-10_{{site.SCALA_BINARY_VERSION}}
 	version = {{site.SPARK_VERSION_SHORT}}
 
+**Do not** manually add dependencies on `org.apache.kafka` artifacts (e.g. `kafka-clients`).  The `spark-streaming-kafka-0-10` artifact has the appropriate transitive dependencies already, and different versions may be incompatible in hard to diagnose ways.
+
 ### Creating a Direct Stream
  Note that the namespace for the import includes the version, org.apache.spark.streaming.kafka010
 

From d649787ee506a7f23a47afea6d951e299067a3dd Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 19 Apr 2017 13:10:44 -0700
Subject: [PATCH 0659/1204] [SPARK-20397][SPARKR][SS] Fix flaky test:
 test_streaming.R.Terminated by error

## What changes were proposed in this pull request?

Checking a source parameter is asynchronous. When the query is created, it's not guaranteed that source has been created. This PR just increases the timeout of awaitTermination to ensure the parsing error is thrown.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #17687 from zsxwing/SPARK-20397.

(cherry picked from commit 4fea7848c45d85ff3ad0863de5d1449d1fd1b4b0)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 R/pkg/inst/tests/testthat/test_streaming.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/pkg/inst/tests/testthat/test_streaming.R b/R/pkg/inst/tests/testthat/test_streaming.R
index 03b1bd3dc1f44..1f4054a84df53 100644
--- a/R/pkg/inst/tests/testthat/test_streaming.R
+++ b/R/pkg/inst/tests/testthat/test_streaming.R
@@ -131,7 +131,7 @@ test_that("Terminated by error", {
   expect_error(q <- write.stream(counts, "memory", queryName = "people4", outputMode = "complete"),
                NA)
 
-  expect_error(awaitTermination(q, 1),
+  expect_error(awaitTermination(q, 5 * 1000),
                paste0(".*(awaitTermination : streaming query error - Invalid value '-1' for option",
                       " 'maxFilesPerTrigger', must be a positive integer).*"))
 

From 371af9623ea9c14791f2b5d22ccf9425eaef1659 Mon Sep 17 00:00:00 2001
From: ptkool <michael.styles@shopify.com>
Date: Thu, 20 Apr 2017 09:51:13 +0800
Subject: [PATCH 0660/1204] [SPARK-20350] Add optimization rules to apply
 Complementation Laws.

## What changes were proposed in this pull request?

Apply Complementation Laws during boolean expression simplification.

## How was this patch tested?

Tested using unit tests, integration tests, and manual tests.

Author: ptkool <michael.styles@shopify.com>
Author: Michael Styles <michael.styles@shopify.com>

Closes #17650 from ptkool/apply_complementation_laws.

(cherry picked from commit 63824b2c8e010ba03013be498def236c654d4fed)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/optimizer/expressions.scala  |  5 +++++
 .../BooleanSimplificationSuite.scala          | 19 +++++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index ea2c5d241d8dd..34382bd272406 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -154,6 +154,11 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper {
       case TrueLiteral Or _ => TrueLiteral
       case _ Or TrueLiteral => TrueLiteral
 
+      case a And b if Not(a).semanticEquals(b) => FalseLiteral
+      case a Or b if Not(a).semanticEquals(b) => TrueLiteral
+      case a And b if a.semanticEquals(Not(b)) => FalseLiteral
+      case a Or b if a.semanticEquals(Not(b)) => TrueLiteral
+
       case a And b if a.semanticEquals(b) => a
       case a Or b if a.semanticEquals(b) => a
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
index 935bff7cef2e8..c275f997ba6e9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.Row
 
 class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
 
@@ -42,6 +43,16 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
 
   val testRelation = LocalRelation('a.int, 'b.int, 'c.int, 'd.string)
 
+  val testRelationWithData = LocalRelation.fromExternalRows(
+    testRelation.output, Seq(Row(1, 2, 3, "abc"))
+  )
+
+  private def checkCondition(input: Expression, expected: LogicalPlan): Unit = {
+    val plan = testRelationWithData.where(input).analyze
+    val actual = Optimize.execute(plan)
+    comparePlans(actual, expected)
+  }
+
   private def checkCondition(input: Expression, expected: Expression): Unit = {
     val plan = testRelation.where(input).analyze
     val actual = Optimize.execute(plan)
@@ -160,4 +171,12 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
       testRelation.where('a > 2 || ('b > 3 && 'b < 5)))
     comparePlans(actual, expected)
   }
+
+  test("Complementation Laws") {
+    checkCondition('a && !'a, testRelation)
+    checkCondition(!'a && 'a, testRelation)
+
+    checkCondition('a || !'a, testRelationWithData)
+    checkCondition(!'a || 'a, testRelationWithData)
+  }
 }

From af9f18c31b749a600016391f9aaba5c8748d252f Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 19 Apr 2017 18:58:14 -0700
Subject: [PATCH 0661/1204] [MINOR][SS] Fix a missing space in
 UnsupportedOperationChecker error message

## What changes were proposed in this pull request?

Also went through the same file to ensure other string concatenation are correct.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #17691 from zsxwing/fix-error-message.

(cherry picked from commit 39e303a8b6db642c26dbc26ba92e87680f50e4da)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../sql/catalyst/analysis/UnsupportedOperationChecker.scala     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
index 3f76f26dbe4ec..6ab4153bac70e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -267,7 +267,7 @@ object UnsupportedOperationChecker {
           throwError("Limits are not supported on streaming DataFrames/Datasets")
 
         case Sort(_, _, _) if !containsCompleteData(subPlan) =>
-          throwError("Sorting is not supported on streaming DataFrames/Datasets, unless it is on" +
+          throwError("Sorting is not supported on streaming DataFrames/Datasets, unless it is on " +
             "aggregated DataFrame/Dataset in Complete output mode")
 
         case Sample(_, _, _, _, child) if child.isStreaming =>

From 9e5dc82a132dbe92a201b8864e4ae0a5915e3924 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 19 Apr 2017 18:58:14 -0700
Subject: [PATCH 0662/1204] [MINOR][SS] Fix a missing space in
 UnsupportedOperationChecker error message

## What changes were proposed in this pull request?

Also went through the same file to ensure other string concatenation are correct.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #17691 from zsxwing/fix-error-message.

(cherry picked from commit 39e303a8b6db642c26dbc26ba92e87680f50e4da)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../sql/catalyst/analysis/UnsupportedOperationChecker.scala     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
index a5eded631fd9b..f0b68abfdc1b4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -166,7 +166,7 @@ object UnsupportedOperationChecker {
           throwError("Limits are not supported on streaming DataFrames/Datasets")
 
         case Sort(_, _, _) if !containsCompleteData(subPlan) =>
-          throwError("Sorting is not supported on streaming DataFrames/Datasets, unless it is on" +
+          throwError("Sorting is not supported on streaming DataFrames/Datasets, unless it is on " +
             "aggregated DataFrame/Dataset in Complete output mode")
 
         case Sample(_, _, _, _, child) if child.isStreaming =>

From e6bbdb0c50657190192933f29b92278ea8f37704 Mon Sep 17 00:00:00 2001
From: Eric Liang <ekl@databricks.com>
Date: Wed, 19 Apr 2017 19:53:40 -0700
Subject: [PATCH 0663/1204] [SPARK-20398][SQL] range() operator should include
 cancellation reason when killed

## What changes were proposed in this pull request?

https://issues.apache.org/jira/browse/SPARK-19820 adds a reason field for why tasks were killed. However, for backwards compatibility it left the old TaskKilledException constructor which defaults to "unknown reason".
The range() operator should use the constructor that fills in the reason rather than dropping it on task kill.

## How was this patch tested?

Existing tests, and I tested this manually.

Author: Eric Liang <ekl@databricks.com>

Closes #17692 from ericl/fix-kill-reason-in-range.

(cherry picked from commit dd6d55d5de970662eccf024e5eae4e6821373d35)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../apache/spark/sql/execution/basicPhysicalOperators.scala   | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index 44278e37c5276..233a105f4d93a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -463,9 +463,7 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range)
       |     $number = $batchEnd;
       |   }
       |
-      |   if ($taskContext.isInterrupted()) {
-      |     throw new TaskKilledException();
-      |   }
+      |   $taskContext.killTaskIfInterrupted();
       |
       |   long $nextBatchTodo;
       |   if ($numElementsTodo > ${batchSize}L) {

From 8d658b90b9f08ed4a3a899aad5d3ea77986b7302 Mon Sep 17 00:00:00 2001
From: ymahajan <ymahajan@snappydata.io>
Date: Wed, 19 Apr 2017 20:08:31 -0700
Subject: [PATCH 0664/1204] Fixed typos in docs

## What changes were proposed in this pull request?

Typos at a couple of place in the docs.

## How was this patch tested?

build including docs

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: ymahajan <ymahajan@snappydata.io>

Closes #17690 from ymahajan/master.

(cherry picked from commit bdc60569196e9ae4e9086c3e514a406a9e8b23a6)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 docs/sql-programming-guide.md                  | 2 +-
 docs/structured-streaming-programming-guide.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 28942b68fa20d..490c1ce8a7cc5 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -571,7 +571,7 @@ be created by calling the `table` method on a `SparkSession` with the name of th
 For file-based data source, e.g. text, parquet, json, etc. you can specify a custom table path via the
 `path` option, e.g. `df.write.option("path", "/some/path").saveAsTable("t")`. When the table is dropped,
 the custom table path will not be removed and the table data is still there. If no custom table path is
-specifed, Spark will write data to a default table path under the warehouse directory. When the table is
+specified, Spark will write data to a default table path under the warehouse directory. When the table is
 dropped, the default table path will be removed too.
 
 Starting from Spark 2.1, persistent datasource tables have per-partition metadata stored in the Hive metastore. This brings several benefits:
diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index 3cf7151819e2d..5b18cf2f3c2ef 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -778,7 +778,7 @@ windowedCounts = words \
 In this example, we are defining the watermark of the query on the value of the column "timestamp", 
 and also defining "10 minutes" as the threshold of how late is the data allowed to be. If this query 
 is run in Update output mode (discussed later in [Output Modes](#output-modes) section), 
-the engine will keep updating counts of a window in the Resule Table until the window is older 
+the engine will keep updating counts of a window in the Result Table until the window is older
 than the watermark, which lags behind the current event time in column "timestamp" by 10 minutes.
 Here is an illustration. 
 

From d01122dbc23206e203784d62312e9cac93564b45 Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Thu, 20 Apr 2017 11:13:48 +0100
Subject: [PATCH 0665/1204] [SPARK-20156][SQL][FOLLOW-UP] Java String
 toLowerCase "Turkish locale bug" in Database and Table DDLs

### What changes were proposed in this pull request?
Database and Table names conform the Hive standard ("[a-zA-z_0-9]+"), i.e. if this name only contains characters, numbers, and _.

When calling `toLowerCase` on the names, we should add `Locale.ROOT` to the `toLowerCase`for avoiding inadvertent locale-sensitive variation in behavior (aka the "Turkish locale problem").

### How was this patch tested?
Added a test case

Author: Xiao Li <gatorsmile@gmail.com>

Closes #17655 from gatorsmile/locale.

(cherry picked from commit 55bea56911a958f6d3ec3ad96fb425cc71ec03f4)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../ResolveTableValuedFunctions.scala         |  4 ++-
 .../sql/catalyst/catalog/SessionCatalog.scala |  4 +--
 .../spark/sql/internal/SharedState.scala      |  4 ++-
 .../sql/execution/command/DDLSuite.scala      | 19 +++++++++++++
 .../apache/spark/sql/test/SQLTestUtils.scala  | 28 ++++++++++++++++++-
 5 files changed, 54 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala
index 8841309939c24..de6de24350f23 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
+import java.util.Locale
+
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Range}
 import org.apache.spark.sql.catalyst.rules._
@@ -103,7 +105,7 @@ object ResolveTableValuedFunctions extends Rule[LogicalPlan] {
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     case u: UnresolvedTableValuedFunction if u.functionArgs.forall(_.resolved) =>
-      builtinFunctions.get(u.functionName.toLowerCase()) match {
+      builtinFunctions.get(u.functionName.toLowerCase(Locale.ROOT)) match {
         case Some(tvf) =>
           val resolved = tvf.flatMap { case (argList, resolver) =>
             argList.implicitCast(u.functionArgs) match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 3fbf83f3a38a2..6c6d600190b66 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -115,14 +115,14 @@ class SessionCatalog(
    * Format table name, taking into account case sensitivity.
    */
   protected[this] def formatTableName(name: String): String = {
-    if (conf.caseSensitiveAnalysis) name else name.toLowerCase
+    if (conf.caseSensitiveAnalysis) name else name.toLowerCase(Locale.ROOT)
   }
 
   /**
    * Format database name, taking into account case sensitivity.
    */
   protected[this] def formatDatabaseName(name: String): String = {
-    if (conf.caseSensitiveAnalysis) name else name.toLowerCase
+    if (conf.caseSensitiveAnalysis) name else name.toLowerCase(Locale.ROOT)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index 0289471bf841a..d06dbaa2d0abc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.internal
 
+import java.util.Locale
+
 import scala.reflect.ClassTag
 import scala.util.control.NonFatal
 
@@ -114,7 +116,7 @@ private[sql] class SharedState(val sparkContext: SparkContext) extends Logging {
     // System preserved database should not exists in metastore. However it's hard to guarantee it
     // for every session, because case-sensitivity differs. Here we always lowercase it to make our
     // life easier.
-    val globalTempDB = sparkContext.conf.get(GLOBAL_TEMP_DATABASE).toLowerCase
+    val globalTempDB = sparkContext.conf.get(GLOBAL_TEMP_DATABASE).toLowerCase(Locale.ROOT)
     if (externalCatalog.databaseExists(globalTempDB)) {
       throw new SparkException(
         s"$globalTempDB is a system preserved database, please rename your existing database " +
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index fe74ab49f91bd..2f4eb1b15519b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -2295,5 +2295,24 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
         }
       }
     }
+
+    test(s"basic DDL using locale tr - caseSensitive $caseSensitive") {
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> s"$caseSensitive") {
+        withLocale("tr") {
+          val dbName = "DaTaBaSe_I"
+          withDatabase(dbName) {
+            sql(s"CREATE DATABASE $dbName")
+            sql(s"USE $dbName")
+
+            val tabName = "tAb_I"
+            withTable(tabName) {
+              sql(s"CREATE TABLE $tabName(col_I int) USING PARQUET")
+              sql(s"INSERT OVERWRITE TABLE $tabName SELECT 1")
+              checkAnswer(sql(s"SELECT col_I FROM $tabName"), Row(1) :: Nil)
+            }
+          }
+        }
+      }
+    }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
index 6a4cc95d36bea..b5ad73b746a8b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.test
 import java.io.File
 import java.net.URI
 import java.nio.file.Files
-import java.util.UUID
+import java.util.{Locale, UUID}
 
 import scala.language.implicitConversions
 import scala.util.control.NonFatal
@@ -228,6 +228,32 @@ private[sql] trait SQLTestUtils
     }
   }
 
+  /**
+   * Drops database `dbName` after calling `f`.
+   */
+  protected def withDatabase(dbNames: String*)(f: => Unit): Unit = {
+    try f finally {
+      dbNames.foreach { name =>
+        spark.sql(s"DROP DATABASE IF EXISTS $name")
+      }
+    }
+  }
+
+  /**
+   * Enables Locale `language` before executing `f`, then switches back to the default locale of JVM
+   * after `f` returns.
+   */
+  protected def withLocale(language: String)(f: => Unit): Unit = {
+    val originalLocale = Locale.getDefault
+    try {
+      // Add Locale setting
+      Locale.setDefault(new Locale(language))
+      f
+    } finally {
+      Locale.setDefault(originalLocale)
+    }
+  }
+
   /**
    * Activates database `db` before executing `f`, then switches back to `default` database after
    * `f` returns.

From 9fd25fbc48730c34e9dd7a43806ee7ef91a49221 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Thu, 20 Apr 2017 14:29:59 +0200
Subject: [PATCH 0666/1204] [SPARK-20405][SQL] Dataset.withNewExecutionId
 should be private

## What changes were proposed in this pull request?
Dataset.withNewExecutionId is only used in Dataset itself and should be private.

## How was this patch tested?
N/A - this is a simple visibility change.

Author: Reynold Xin <rxin@databricks.com>

Closes #17699 from rxin/SPARK-20405.

(cherry picked from commit c6f62c5b8106534007df31ca8c460064b89b450b)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 520663f624408..c6dcd93bbda66 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -2778,7 +2778,7 @@ class Dataset[T] private[sql](
    * Wrap a Dataset action to track all Spark jobs in the body so that we can connect them with
    * an execution.
    */
-  private[sql] def withNewExecutionId[U](body: => U): U = {
+  private def withNewExecutionId[U](body: => U): U = {
     SQLExecution.withNewExecutionId(sparkSession, queryExecution)(body)
   }
 

From 9904526259caca9559d8f1e0da8ea761f5ce1fd0 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 20 Apr 2017 16:59:38 +0200
Subject: [PATCH 0667/1204] [SPARK-20409][SQL] fail early if aggregate function
 in GROUP BY

## What changes were proposed in this pull request?

It's illegal to have aggregate function in GROUP BY, and we should fail at analysis phase, if this happens.

## How was this patch tested?

new regression test

Author: Wenchen Fan <wenchen@databricks.com>

Closes #17704 from cloud-fan/minor.

(cherry picked from commit b91873db0930c6fe885c27936e1243d5fabd03ed)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../spark/sql/catalyst/analysis/Analyzer.scala     | 14 ++++----------
 .../sql/catalyst/analysis/CheckAnalysis.scala      |  7 ++++++-
 .../sql-tests/results/group-by-ordinal.sql.out     |  4 ++--
 .../apache/spark/sql/DataFrameAggregateSuite.scala |  7 +++++++
 4 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index d9f36f7f874d7..175bfb3e8085d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -966,7 +966,7 @@ class Analyzer(
       case p if !p.childrenResolved => p
       // Replace the index with the related attribute for ORDER BY,
       // which is a 1-base position of the projection list.
-      case s @ Sort(orders, global, child)
+      case Sort(orders, global, child)
         if orders.exists(_.child.isInstanceOf[UnresolvedOrdinal]) =>
         val newOrders = orders map {
           case s @ SortOrder(UnresolvedOrdinal(index), direction, nullOrdering, _) =>
@@ -983,17 +983,11 @@ class Analyzer(
 
       // Replace the index with the corresponding expression in aggregateExpressions. The index is
       // a 1-base position of aggregateExpressions, which is output columns (select expression)
-      case a @ Aggregate(groups, aggs, child) if aggs.forall(_.resolved) &&
+      case Aggregate(groups, aggs, child) if aggs.forall(_.resolved) &&
         groups.exists(_.isInstanceOf[UnresolvedOrdinal]) =>
         val newGroups = groups.map {
-          case ordinal @ UnresolvedOrdinal(index) if index > 0 && index <= aggs.size =>
-            aggs(index - 1) match {
-              case e if ResolveAggregateFunctions.containsAggregate(e) =>
-                ordinal.failAnalysis(
-                  s"GROUP BY position $index is an aggregate function, and " +
-                    "aggregate functions are not allowed in GROUP BY")
-              case o => o
-            }
+          case u @ UnresolvedOrdinal(index) if index > 0 && index <= aggs.size =>
+            aggs(index - 1)
           case ordinal @ UnresolvedOrdinal(index) =>
             ordinal.failAnalysis(
               s"GROUP BY position $index is not in select list " +
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index da0c6b098f5ce..61797bc34dc27 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -254,6 +254,11 @@ trait CheckAnalysis extends PredicateHelper {
             }
 
             def checkValidGroupingExprs(expr: Expression): Unit = {
+              if (expr.find(_.isInstanceOf[AggregateExpression]).isDefined) {
+                failAnalysis(
+                  "aggregate functions are not allowed in GROUP BY, but found " + expr.sql)
+              }
+
               // Check if the data type of expr is orderable.
               if (!RowOrdering.isOrderable(expr.dataType)) {
                 failAnalysis(
@@ -271,8 +276,8 @@ trait CheckAnalysis extends PredicateHelper {
               }
             }
 
-            aggregateExprs.foreach(checkValidAggregateExpression)
             groupingExprs.foreach(checkValidGroupingExprs)
+            aggregateExprs.foreach(checkValidAggregateExpression)
 
           case Sort(orders, _, _) =>
             orders.foreach { order =>
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
index c0930bbde69a4..d03681d0ea59c 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
@@ -122,7 +122,7 @@ select a, b, sum(b) from data group by 3
 struct<>
 -- !query 11 output
 org.apache.spark.sql.AnalysisException
-GROUP BY position 3 is an aggregate function, and aggregate functions are not allowed in GROUP BY; line 1 pos 39
+aggregate functions are not allowed in GROUP BY, but found sum(CAST(data.`b` AS BIGINT));
 
 
 -- !query 12
@@ -131,7 +131,7 @@ select a, b, sum(b) + 2 from data group by 3
 struct<>
 -- !query 12 output
 org.apache.spark.sql.AnalysisException
-GROUP BY position 3 is an aggregate function, and aggregate functions are not allowed in GROUP BY; line 1 pos 43
+aggregate functions are not allowed in GROUP BY, but found (sum(CAST(data.`b` AS BIGINT)) + CAST(2 AS BIGINT));
 
 
 -- !query 13
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index e7079120bb7df..8569c2d76b694 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -538,4 +538,11 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
       Seq(Row(3, 0, 0.0, 1, 5.0), Row(2, 1, 4.0, 0, 0.0))
     )
   }
+
+  test("aggregate function in GROUP BY") {
+    val e = intercept[AnalysisException] {
+      testData.groupBy(sum($"key")).count()
+    }
+    assert(e.message.contains("aggregate functions are not allowed in GROUP BY"))
+  }
 }

From 66e7a8f1d12aff50eea3ed438a2d744d5faa9c98 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 20 Apr 2017 16:59:38 +0200
Subject: [PATCH 0668/1204] [SPARK-20409][SQL] fail early if aggregate function
 in GROUP BY

## What changes were proposed in this pull request?

It's illegal to have aggregate function in GROUP BY, and we should fail at analysis phase, if this happens.

## How was this patch tested?

new regression test

Author: Wenchen Fan <wenchen@databricks.com>

Closes #17704 from cloud-fan/minor.
---
 .../spark/sql/catalyst/analysis/Analyzer.scala     | 14 ++++----------
 .../sql/catalyst/analysis/CheckAnalysis.scala      |  7 ++++++-
 .../sql-tests/results/group-by-ordinal.sql.out     |  4 ++--
 .../apache/spark/sql/DataFrameAggregateSuite.scala |  7 +++++++
 4 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index f41e43431ac1d..25584de0a9230 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -727,7 +727,7 @@ class Analyzer(
       case p if !p.childrenResolved => p
       // Replace the index with the related attribute for ORDER BY,
       // which is a 1-base position of the projection list.
-      case s @ Sort(orders, global, child)
+      case Sort(orders, global, child)
         if orders.exists(_.child.isInstanceOf[UnresolvedOrdinal]) =>
         val newOrders = orders map {
           case s @ SortOrder(UnresolvedOrdinal(index), direction, nullOrdering) =>
@@ -744,17 +744,11 @@ class Analyzer(
 
       // Replace the index with the corresponding expression in aggregateExpressions. The index is
       // a 1-base position of aggregateExpressions, which is output columns (select expression)
-      case a @ Aggregate(groups, aggs, child) if aggs.forall(_.resolved) &&
+      case Aggregate(groups, aggs, child) if aggs.forall(_.resolved) &&
         groups.exists(_.isInstanceOf[UnresolvedOrdinal]) =>
         val newGroups = groups.map {
-          case ordinal @ UnresolvedOrdinal(index) if index > 0 && index <= aggs.size =>
-            aggs(index - 1) match {
-              case e if ResolveAggregateFunctions.containsAggregate(e) =>
-                ordinal.failAnalysis(
-                  s"GROUP BY position $index is an aggregate function, and " +
-                    "aggregate functions are not allowed in GROUP BY")
-              case o => o
-            }
+          case u @ UnresolvedOrdinal(index) if index > 0 && index <= aggs.size =>
+            aggs(index - 1)
           case ordinal @ UnresolvedOrdinal(index) =>
             ordinal.failAnalysis(
               s"GROUP BY position $index is not in select list " +
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index f7109f42838e9..06bbd39ed11d6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -266,6 +266,11 @@ trait CheckAnalysis extends PredicateHelper {
             }
 
             def checkValidGroupingExprs(expr: Expression): Unit = {
+              if (expr.find(_.isInstanceOf[AggregateExpression]).isDefined) {
+                failAnalysis(
+                  "aggregate functions are not allowed in GROUP BY, but found " + expr.sql)
+              }
+
               // Check if the data type of expr is orderable.
               if (!RowOrdering.isOrderable(expr.dataType)) {
                 failAnalysis(
@@ -283,8 +288,8 @@ trait CheckAnalysis extends PredicateHelper {
               }
             }
 
-            aggregateExprs.foreach(checkValidAggregateExpression)
             groupingExprs.foreach(checkValidGroupingExprs)
+            aggregateExprs.foreach(checkValidAggregateExpression)
 
           case Sort(orders, _, _) =>
             orders.foreach { order =>
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
index c64520ff93c83..614c8784ada36 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
@@ -122,7 +122,7 @@ select a, b, sum(b) from data group by 3
 struct<>
 -- !query 11 output
 org.apache.spark.sql.AnalysisException
-GROUP BY position 3 is an aggregate function, and aggregate functions are not allowed in GROUP BY; line 1 pos 39
+aggregate functions are not allowed in GROUP BY, but found sum(CAST(data.`b` AS BIGINT));
 
 
 -- !query 12
@@ -131,7 +131,7 @@ select a, b, sum(b) + 2 from data group by 3
 struct<>
 -- !query 12 output
 org.apache.spark.sql.AnalysisException
-GROUP BY position 3 is an aggregate function, and aggregate functions are not allowed in GROUP BY; line 1 pos 43
+aggregate functions are not allowed in GROUP BY, but found (sum(CAST(data.`b` AS BIGINT)) + CAST(2 AS BIGINT));
 
 
 -- !query 13
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index e7079120bb7df..8569c2d76b694 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -538,4 +538,11 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
       Seq(Row(3, 0, 0.0, 1, 5.0), Row(2, 1, 4.0, 0, 0.0))
     )
   }
+
+  test("aggregate function in GROUP BY") {
+    val e = intercept[AnalysisException] {
+      testData.groupBy(sum($"key")).count()
+    }
+    assert(e.message.contains("aggregate functions are not allowed in GROUP BY"))
+  }
 }

From 32c5a105ef3036bde5222f6b81282b970554432a Mon Sep 17 00:00:00 2001
From: Bogdan Raducanu <bogdan@databricks.com>
Date: Thu, 20 Apr 2017 18:49:39 +0200
Subject: [PATCH 0669/1204] [SPARK-20407][TESTS] ParquetQuerySuite
 'Enabling/disabling ignoreCorruptFiles' flaky test

## What changes were proposed in this pull request?

SharedSQLContext.afterEach now calls DebugFilesystem.assertNoOpenStreams inside eventually.
SQLTestUtils withTempDir calls waitForTasksToFinish before deleting the directory.

## How was this patch tested?
Added new test in ParquetQuerySuite based on the flaky test

Author: Bogdan Raducanu <bogdan@databricks.com>

Closes #17701 from bogdanrdc/SPARK-20407.

(cherry picked from commit c5a31d160f47ba51bb9f8a4f3141851034640fc7)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../parquet/ParquetQuerySuite.scala           | 35 ++++++++++++++++++-
 .../apache/spark/sql/test/SQLTestUtils.scala  | 19 ++++++++--
 .../spark/sql/test/SharedSQLContext.scala     | 13 ++++---
 3 files changed, 60 insertions(+), 7 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
index c36609586c807..2efff3f57d7d3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@@ -23,7 +23,7 @@ import java.sql.Timestamp
 import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.parquet.hadoop.ParquetOutputFormat
 
-import org.apache.spark.SparkException
+import org.apache.spark.{DebugFilesystem, SparkException}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
 import org.apache.spark.sql.catalyst.expressions.SpecificInternalRow
@@ -316,6 +316,39 @@ class ParquetQuerySuite extends QueryTest with ParquetTest with SharedSQLContext
     }
   }
 
+  /**
+   * this is part of test 'Enabling/disabling ignoreCorruptFiles' but run in a loop
+   * to increase the chance of failure
+    */
+  ignore("SPARK-20407 ParquetQuerySuite 'Enabling/disabling ignoreCorruptFiles' flaky test") {
+    def testIgnoreCorruptFiles(): Unit = {
+      withTempDir { dir =>
+        val basePath = dir.getCanonicalPath
+        spark.range(1).toDF("a").write.parquet(new Path(basePath, "first").toString)
+        spark.range(1, 2).toDF("a").write.parquet(new Path(basePath, "second").toString)
+        spark.range(2, 3).toDF("a").write.json(new Path(basePath, "third").toString)
+        val df = spark.read.parquet(
+          new Path(basePath, "first").toString,
+          new Path(basePath, "second").toString,
+          new Path(basePath, "third").toString)
+        checkAnswer(
+          df,
+          Seq(Row(0), Row(1)))
+      }
+    }
+
+    for (i <- 1 to 100) {
+      DebugFilesystem.clearOpenStreams()
+      withSQLConf(SQLConf.IGNORE_CORRUPT_FILES.key -> "false") {
+        val exception = intercept[SparkException] {
+          testIgnoreCorruptFiles()
+        }
+        assert(exception.getMessage().contains("is not a Parquet file"))
+      }
+      DebugFilesystem.assertNoOpenStreams()
+    }
+  }
+
   test("SPARK-8990 DataFrameReader.parquet() should respect user specified options") {
     withTempPath { dir =>
       val basePath = dir.getCanonicalPath
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
index b5ad73b746a8b..44c0fc70d066b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
@@ -22,11 +22,13 @@ import java.net.URI
 import java.nio.file.Files
 import java.util.{Locale, UUID}
 
+import scala.concurrent.duration._
 import scala.language.implicitConversions
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.fs.Path
 import org.scalatest.BeforeAndAfterAll
+import org.scalatest.concurrent.Eventually
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql._
@@ -49,7 +51,7 @@ import org.apache.spark.util.{UninterruptibleThread, Utils}
  * prone to leaving multiple overlapping [[org.apache.spark.SparkContext]]s in the same JVM.
  */
 private[sql] trait SQLTestUtils
-  extends SparkFunSuite
+  extends SparkFunSuite with Eventually
   with BeforeAndAfterAll
   with SQLTestData { self =>
 
@@ -138,6 +140,15 @@ private[sql] trait SQLTestUtils
     }
   }
 
+  /**
+   * Waits for all tasks on all executors to be finished.
+   */
+  protected def waitForTasksToFinish(): Unit = {
+    eventually(timeout(10.seconds)) {
+      assert(spark.sparkContext.statusTracker
+        .getExecutorInfos.map(_.numRunningTasks()).sum == 0)
+    }
+  }
   /**
    * Creates a temporary directory, which is then passed to `f` and will be deleted after `f`
    * returns.
@@ -146,7 +157,11 @@ private[sql] trait SQLTestUtils
    */
   protected def withTempDir(f: File => Unit): Unit = {
     val dir = Utils.createTempDir().getCanonicalFile
-    try f(dir) finally Utils.deleteRecursively(dir)
+    try f(dir) finally {
+      // wait for all tasks to finish before deleting files
+      waitForTasksToFinish()
+      Utils.deleteRecursively(dir)
+    }
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala
index e122b39f6fc40..3d76e05f616d5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala
@@ -17,17 +17,18 @@
 
 package org.apache.spark.sql.test
 
+import scala.concurrent.duration._
+
 import org.scalatest.BeforeAndAfterEach
+import org.scalatest.concurrent.Eventually
 
 import org.apache.spark.{DebugFilesystem, SparkConf}
 import org.apache.spark.sql.{SparkSession, SQLContext}
-import org.apache.spark.sql.internal.SQLConf
-
 
 /**
  * Helper trait for SQL test suites where all tests share a single [[TestSparkSession]].
  */
-trait SharedSQLContext extends SQLTestUtils with BeforeAndAfterEach {
+trait SharedSQLContext extends SQLTestUtils with BeforeAndAfterEach with Eventually {
 
   protected val sparkConf = new SparkConf()
 
@@ -84,6 +85,10 @@ trait SharedSQLContext extends SQLTestUtils with BeforeAndAfterEach {
 
   protected override def afterEach(): Unit = {
     super.afterEach()
-    DebugFilesystem.assertNoOpenStreams()
+    // files can be closed from other threads, so wait a bit
+    // normally this doesn't take more than 1s
+    eventually(timeout(10.seconds)) {
+      DebugFilesystem.assertNoOpenStreams()
+    }
   }
 }

From e929cd76720f9f448f2774c33305a91318bce033 Mon Sep 17 00:00:00 2001
From: Eric Liang <ekl@databricks.com>
Date: Thu, 20 Apr 2017 09:55:10 -0700
Subject: [PATCH 0670/1204] [SPARK-20358][CORE] Executors failing stage on
 interrupted exception thrown by cancelled tasks

## What changes were proposed in this pull request?

This was a regression introduced by my earlier PR here: https://github.com/apache/spark/pull/17531

It turns out NonFatal() does not in fact catch InterruptedException.

## How was this patch tested?

Extended cancellation unit test coverage. The first test fails before this patch.

cc JoshRosen mridulm

Author: Eric Liang <ekl@databricks.com>

Closes #17659 from ericl/spark-20358.

(cherry picked from commit b2ebadfd55283348b8a8b37e28075fca0798228a)
Signed-off-by: Yin Huai <yhuai@databricks.com>
---
 .../org/apache/spark/executor/Executor.scala  |  3 ++-
 .../org/apache/spark/SparkContextSuite.scala  | 26 ++++++++++++-------
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index 83469c5ff0600..18f04391d64c3 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -432,7 +432,8 @@ private[spark] class Executor(
           setTaskFinishedAndClearInterruptStatus()
           execBackend.statusUpdate(taskId, TaskState.KILLED, ser.serialize(TaskKilled(t.reason)))
 
-        case NonFatal(_) if task != null && task.reasonIfKilled.isDefined =>
+        case _: InterruptedException | NonFatal(_) if
+            task != null && task.reasonIfKilled.isDefined =>
           val killReason = task.reasonIfKilled.getOrElse("unknown reason")
           logInfo(s"Executor interrupted and killed $taskName (TID $taskId), reason: $killReason")
           setTaskFinishedAndClearInterruptStatus()
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
index 735f4454e299e..7e26139a2bead 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -540,10 +540,24 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
     }
   }
 
-  // Launches one task that will run forever. Once the SparkListener detects the task has
+  testCancellingTasks("that raise interrupted exception on cancel") {
+    Thread.sleep(9999999)
+  }
+
+  // SPARK-20217 should not fail stage if task throws non-interrupted exception
+  testCancellingTasks("that raise runtime exception on cancel") {
+    try {
+      Thread.sleep(9999999)
+    } catch {
+      case t: Throwable =>
+        throw new RuntimeException("killed")
+    }
+  }
+
+  // Launches one task that will block forever. Once the SparkListener detects the task has
   // started, kill and re-schedule it. The second run of the task will complete immediately.
   // If this test times out, then the first version of the task wasn't killed successfully.
-  test("Killing tasks") {
+  def testCancellingTasks(desc: String)(blockFn: => Unit): Unit = test(s"Killing tasks $desc") {
     sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
 
     SparkContextSuite.isTaskStarted = false
@@ -572,13 +586,7 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
         // first attempt will hang
         if (!SparkContextSuite.isTaskStarted) {
           SparkContextSuite.isTaskStarted = true
-          try {
-            Thread.sleep(9999999)
-          } catch {
-            case t: Throwable =>
-              // SPARK-20217 should not fail stage if task throws non-interrupted exception
-              throw new RuntimeException("killed")
-          }
+          blockFn
         }
         // second attempt succeeds immediately
       }

From 01f62625c817da2c77880d662736b0081dcc7b75 Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@databricks.com>
Date: Thu, 20 Apr 2017 22:37:04 +0200
Subject: [PATCH 0671/1204] [SPARK-20410][SQL] Make sparkConf a def in
 SharedSQLContext

## What changes were proposed in this pull request?
It is kind of annoying that `SharedSQLContext.sparkConf` is a val when overriding test cases, because you cannot call `super` on it. This PR makes it a function.

## How was this patch tested?
Existing tests.

Author: Herman van Hovell <hvanhovell@databricks.com>

Closes #17705 from hvanhovell/SPARK-20410.

(cherry picked from commit 033206355339677812a250b2b64818a261871fd2)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../spark/sql/AggregateHashMapSuite.scala     | 35 ++++++++-----------
 .../DatasetSerializerRegistratorSuite.scala   | 12 +++----
 .../DataSourceScanExecRedactionSuite.scala    | 11 ++----
 .../datasources/FileSourceStrategySuite.scala |  2 +-
 .../CompactibleFileStreamLogSuite.scala       |  4 +--
 .../streaming/HDFSMetadataLogSuite.scala      |  4 +--
 .../spark/sql/test/SharedSQLContext.scala     |  7 ++--
 7 files changed, 32 insertions(+), 43 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/AggregateHashMapSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/AggregateHashMapSuite.scala
index 3e85d95523125..7e61a68025158 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/AggregateHashMapSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/AggregateHashMapSuite.scala
@@ -19,13 +19,12 @@ package org.apache.spark.sql
 
 import org.scalatest.BeforeAndAfter
 
-class SingleLevelAggregateHashMapSuite extends DataFrameAggregateSuite with BeforeAndAfter {
+import org.apache.spark.SparkConf
 
-  protected override def beforeAll(): Unit = {
-    sparkConf.set("spark.sql.codegen.fallback", "false")
-    sparkConf.set("spark.sql.codegen.aggregate.map.twolevel.enable", "false")
-    super.beforeAll()
-  }
+class SingleLevelAggregateHashMapSuite extends DataFrameAggregateSuite with BeforeAndAfter {
+  override protected def sparkConf: SparkConf = super.sparkConf
+    .set("spark.sql.codegen.fallback", "false")
+    .set("spark.sql.codegen.aggregate.map.twolevel.enable", "false")
 
   // adding some checking after each test is run, assuring that the configs are not changed
   // in test code
@@ -38,12 +37,9 @@ class SingleLevelAggregateHashMapSuite extends DataFrameAggregateSuite with Befo
 }
 
 class TwoLevelAggregateHashMapSuite extends DataFrameAggregateSuite with BeforeAndAfter {
-
-  protected override def beforeAll(): Unit = {
-    sparkConf.set("spark.sql.codegen.fallback", "false")
-    sparkConf.set("spark.sql.codegen.aggregate.map.twolevel.enable", "true")
-    super.beforeAll()
-  }
+  override protected def sparkConf: SparkConf = super.sparkConf
+    .set("spark.sql.codegen.fallback", "false")
+    .set("spark.sql.codegen.aggregate.map.twolevel.enable", "true")
 
   // adding some checking after each test is run, assuring that the configs are not changed
   // in test code
@@ -55,15 +51,14 @@ class TwoLevelAggregateHashMapSuite extends DataFrameAggregateSuite with BeforeA
   }
 }
 
-class TwoLevelAggregateHashMapWithVectorizedMapSuite extends DataFrameAggregateSuite with
-BeforeAndAfter {
+class TwoLevelAggregateHashMapWithVectorizedMapSuite
+  extends DataFrameAggregateSuite
+  with BeforeAndAfter {
 
-  protected override def beforeAll(): Unit = {
-    sparkConf.set("spark.sql.codegen.fallback", "false")
-    sparkConf.set("spark.sql.codegen.aggregate.map.twolevel.enable", "true")
-    sparkConf.set("spark.sql.codegen.aggregate.map.vectorized.enable", "true")
-    super.beforeAll()
-  }
+  override protected def sparkConf: SparkConf = super.sparkConf
+    .set("spark.sql.codegen.fallback", "false")
+    .set("spark.sql.codegen.aggregate.map.twolevel.enable", "true")
+    .set("spark.sql.codegen.aggregate.map.vectorized.enable", "true")
 
   // adding some checking after each test is run, assuring that the configs are not changed
   // in test code
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSerializerRegistratorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSerializerRegistratorSuite.scala
index 92c5656f65bb4..68f7de047b392 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSerializerRegistratorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSerializerRegistratorSuite.scala
@@ -20,9 +20,9 @@ package org.apache.spark.sql
 import com.esotericsoftware.kryo.{Kryo, Serializer}
 import com.esotericsoftware.kryo.io.{Input, Output}
 
+import org.apache.spark.SparkConf
 import org.apache.spark.serializer.KryoRegistrator
 import org.apache.spark.sql.test.SharedSQLContext
-import org.apache.spark.sql.test.TestSparkSession
 
 /**
  * Test suite to test Kryo custom registrators.
@@ -30,12 +30,10 @@ import org.apache.spark.sql.test.TestSparkSession
 class DatasetSerializerRegistratorSuite extends QueryTest with SharedSQLContext {
   import testImplicits._
 
-  /**
-   * Initialize the [[TestSparkSession]] with a [[KryoRegistrator]].
-   */
-  protected override def beforeAll(): Unit = {
-    sparkConf.set("spark.kryo.registrator", TestRegistrator().getClass.getCanonicalName)
-    super.beforeAll()
+
+  override protected def sparkConf: SparkConf = {
+    // Make sure we use the KryoRegistrator
+    super.sparkConf.set("spark.kryo.registrator", TestRegistrator().getClass.getCanonicalName)
   }
 
   test("Kryo registrator") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala
index 05a2b2c862c73..f7f1ccea281c1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala
@@ -18,22 +18,17 @@ package org.apache.spark.sql.execution
 
 import org.apache.hadoop.fs.Path
 
+import org.apache.spark.SparkConf
 import org.apache.spark.sql.QueryTest
 import org.apache.spark.sql.test.SharedSQLContext
-import org.apache.spark.util.Utils
 
 /**
  * Suite that tests the redaction of DataSourceScanExec
  */
 class DataSourceScanExecRedactionSuite extends QueryTest with SharedSQLContext {
 
-  import Utils._
-
-  override def beforeAll(): Unit = {
-    sparkConf.set("spark.redaction.string.regex",
-      "file:/[\\w_]+")
-    super.beforeAll()
-  }
+  override protected def sparkConf: SparkConf = super.sparkConf
+    .set("spark.redaction.string.regex", "file:/[\\w_]+")
 
   test("treeString is redacted") {
     withTempDir { dir =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
index f36162858bf7a..8703fe96e5878 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
@@ -42,7 +42,7 @@ import org.apache.spark.util.Utils
 class FileSourceStrategySuite extends QueryTest with SharedSQLContext with PredicateHelper {
   import testImplicits._
 
-  protected override val sparkConf = new SparkConf().set("spark.default.parallelism", "1")
+  protected override def sparkConf = super.sparkConf.set("spark.default.parallelism", "1")
 
   test("unpartitioned table, single partition") {
     val table =
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala
index 20ac06f048c6f..3d480b148db55 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala
@@ -28,8 +28,8 @@ import org.apache.spark.sql.test.SharedSQLContext
 class CompactibleFileStreamLogSuite extends SparkFunSuite with SharedSQLContext {
 
   /** To avoid caching of FS objects */
-  override protected val sparkConf =
-    new SparkConf().set(s"spark.hadoop.fs.$scheme.impl.disable.cache", "true")
+  override protected def sparkConf =
+    super.sparkConf.set(s"spark.hadoop.fs.$scheme.impl.disable.cache", "true")
 
   import CompactibleFileStreamLog._
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala
index 662c4466b21b2..7689bc03a4ccf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala
@@ -38,8 +38,8 @@ import org.apache.spark.util.UninterruptibleThread
 class HDFSMetadataLogSuite extends SparkFunSuite with SharedSQLContext {
 
   /** To avoid caching of FS objects */
-  override protected val sparkConf =
-    new SparkConf().set(s"spark.hadoop.fs.$scheme.impl.disable.cache", "true")
+  override protected def sparkConf =
+    super.sparkConf.set(s"spark.hadoop.fs.$scheme.impl.disable.cache", "true")
 
   private implicit def toOption[A](a: A): Option[A] = Option(a)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala
index 3d76e05f616d5..81c69a338abcc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala
@@ -30,7 +30,9 @@ import org.apache.spark.sql.{SparkSession, SQLContext}
  */
 trait SharedSQLContext extends SQLTestUtils with BeforeAndAfterEach with Eventually {
 
-  protected val sparkConf = new SparkConf()
+  protected def sparkConf = {
+    new SparkConf().set("spark.hadoop.fs.file.impl", classOf[DebugFilesystem].getName)
+  }
 
   /**
    * The [[TestSparkSession]] to use for all tests in this suite.
@@ -51,8 +53,7 @@ trait SharedSQLContext extends SQLTestUtils with BeforeAndAfterEach with Eventua
   protected implicit def sqlContext: SQLContext = _spark.sqlContext
 
   protected def createSparkSession: TestSparkSession = {
-    new TestSparkSession(
-      sparkConf.set("spark.hadoop.fs.file.impl", classOf[DebugFilesystem].getName))
+    new TestSparkSession(sparkConf)
   }
 
   /**

From 7e9eba08acad17f338b4261eebedca7e4f6d3f2a Mon Sep 17 00:00:00 2001
From: jerryshao <sshao@hortonworks.com>
Date: Thu, 20 Apr 2017 16:02:09 -0700
Subject: [PATCH 0672/1204] [SPARK-20172][CORE] Add file permission check when
 listing files in FsHistoryProvider

## What changes were proposed in this pull request?

In the current Spark's HistoryServer we expected to get `AccessControlException` during listing all the files, but unfortunately it was not worked because we actually doesn't check the access permission and no other calls will throw such exception. What was worse is that this check will be deferred until reading files, which is not necessary and quite verbose, since it will be printed out the exception in every 10 seconds when checking the files.

So here with this fix, we actually check the read permission during listing the files, which could avoid unnecessary file read later on and suppress the verbose log.

## How was this patch tested?

Add unit test to verify.

Author: jerryshao <sshao@hortonworks.com>

Closes #17495 from jerryshao/SPARK-20172.

(cherry picked from commit 592f5c89349f3c5b6ec0531c6514b8f7d95ad8da)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../apache/spark/deploy/SparkHadoopUtil.scala | 23 +++++
 .../deploy/history/FsHistoryProvider.scala    | 28 +++---
 .../spark/deploy/SparkHadoopUtilSuite.scala   | 97 +++++++++++++++++++
 .../history/FsHistoryProviderSuite.scala      | 16 ++-
 4 files changed, 145 insertions(+), 19 deletions(-)
 create mode 100644 core/src/test/scala/org/apache/spark/deploy/SparkHadoopUtilSuite.scala

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index bae7a3f307f52..9cc321af4bde2 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -28,6 +28,7 @@ import scala.util.control.NonFatal
 import com.google.common.primitives.Longs
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, FileSystem, Path, PathFilter}
+import org.apache.hadoop.fs.permission.FsAction
 import org.apache.hadoop.mapred.JobConf
 import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 import org.apache.hadoop.security.token.{Token, TokenIdentifier}
@@ -353,6 +354,28 @@ class SparkHadoopUtil extends Logging {
     }
     buffer.toString
   }
+
+  private[spark] def checkAccessPermission(status: FileStatus, mode: FsAction): Boolean = {
+    val perm = status.getPermission
+    val ugi = UserGroupInformation.getCurrentUser
+
+    if (ugi.getShortUserName == status.getOwner) {
+      if (perm.getUserAction.implies(mode)) {
+        return true
+      }
+    } else if (ugi.getGroupNames.contains(status.getGroup)) {
+      if (perm.getGroupAction.implies(mode)) {
+        return true
+      }
+    } else if (perm.getOtherAction.implies(mode)) {
+      return true
+    }
+
+    logDebug(s"Permission denied: user=${ugi.getShortUserName}, " +
+      s"path=${status.getPath}:${status.getOwner}:${status.getGroup}" +
+      s"${if (status.isDirectory) "d" else "-"}$perm")
+    false
+  }
 }
 
 object SparkHadoopUtil {
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 9012736bc2745..f4235df245128 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -27,7 +27,8 @@ import scala.xml.Node
 
 import com.google.common.io.ByteStreams
 import com.google.common.util.concurrent.{MoreExecutors, ThreadFactoryBuilder}
-import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
+import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.hadoop.fs.permission.FsAction
 import org.apache.hadoop.hdfs.DistributedFileSystem
 import org.apache.hadoop.hdfs.protocol.HdfsConstants
 import org.apache.hadoop.security.AccessControlException
@@ -318,21 +319,14 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       // scan for modified applications, replay and merge them
       val logInfos: Seq[FileStatus] = statusList
         .filter { entry =>
-          try {
-            val prevFileSize = fileToAppInfo.get(entry.getPath()).map{_.fileSize}.getOrElse(0L)
-            !entry.isDirectory() &&
-              // FsHistoryProvider generates a hidden file which can't be read.  Accidentally
-              // reading a garbage file is safe, but we would log an error which can be scary to
-              // the end-user.
-              !entry.getPath().getName().startsWith(".") &&
-              prevFileSize < entry.getLen()
-          } catch {
-            case e: AccessControlException =>
-              // Do not use "logInfo" since these messages can get pretty noisy if printed on
-              // every poll.
-              logDebug(s"No permission to read $entry, ignoring.")
-              false
-          }
+          val prevFileSize = fileToAppInfo.get(entry.getPath()).map{_.fileSize}.getOrElse(0L)
+          !entry.isDirectory() &&
+            // FsHistoryProvider generates a hidden file which can't be read.  Accidentally
+            // reading a garbage file is safe, but we would log an error which can be scary to
+            // the end-user.
+            !entry.getPath().getName().startsWith(".") &&
+            prevFileSize < entry.getLen() &&
+            SparkHadoopUtil.get.checkAccessPermission(entry, FsAction.READ)
         }
         .flatMap { entry => Some(entry) }
         .sortWith { case (entry1, entry2) =>
@@ -445,7 +439,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
   /**
    * Replay the log files in the list and merge the list of old applications with new ones
    */
-  private def mergeApplicationListing(fileStatus: FileStatus): Unit = {
+  protected def mergeApplicationListing(fileStatus: FileStatus): Unit = {
     val newAttempts = try {
       val eventsFilter: ReplayEventsFilter = { eventString =>
         eventString.startsWith(APPL_START_EVENT_PREFIX) ||
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkHadoopUtilSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkHadoopUtilSuite.scala
new file mode 100644
index 0000000000000..ab24a76e20a30
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkHadoopUtilSuite.scala
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy
+
+import java.security.PrivilegedExceptionAction
+
+import scala.util.Random
+
+import org.apache.hadoop.fs.FileStatus
+import org.apache.hadoop.fs.permission.{FsAction, FsPermission}
+import org.apache.hadoop.security.UserGroupInformation
+import org.scalatest.Matchers
+
+import org.apache.spark.SparkFunSuite
+
+class SparkHadoopUtilSuite extends SparkFunSuite with Matchers {
+  test("check file permission") {
+    import FsAction._
+    val testUser = s"user-${Random.nextInt(100)}"
+    val testGroups = Array(s"group-${Random.nextInt(100)}")
+    val testUgi = UserGroupInformation.createUserForTesting(testUser, testGroups)
+
+    testUgi.doAs(new PrivilegedExceptionAction[Void] {
+      override def run(): Void = {
+        val sparkHadoopUtil = new SparkHadoopUtil
+
+        // If file is owned by user and user has access permission
+        var status = fileStatus(testUser, testGroups.head, READ_WRITE, READ_WRITE, NONE)
+        sparkHadoopUtil.checkAccessPermission(status, READ) should be(true)
+        sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(true)
+
+        // If file is owned by user but user has no access permission
+        status = fileStatus(testUser, testGroups.head, NONE, READ_WRITE, NONE)
+        sparkHadoopUtil.checkAccessPermission(status, READ) should be(false)
+        sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(false)
+
+        val otherUser = s"test-${Random.nextInt(100)}"
+        val otherGroup = s"test-${Random.nextInt(100)}"
+
+        // If file is owned by user's group and user's group has access permission
+        status = fileStatus(otherUser, testGroups.head, NONE, READ_WRITE, NONE)
+        sparkHadoopUtil.checkAccessPermission(status, READ) should be(true)
+        sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(true)
+
+        // If file is owned by user's group but user's group has no access permission
+        status = fileStatus(otherUser, testGroups.head, READ_WRITE, NONE, NONE)
+        sparkHadoopUtil.checkAccessPermission(status, READ) should be(false)
+        sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(false)
+
+        // If file is owned by other user and this user has access permission
+        status = fileStatus(otherUser, otherGroup, READ_WRITE, READ_WRITE, READ_WRITE)
+        sparkHadoopUtil.checkAccessPermission(status, READ) should be(true)
+        sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(true)
+
+        // If file is owned by other user but this user has no access permission
+        status = fileStatus(otherUser, otherGroup, READ_WRITE, READ_WRITE, NONE)
+        sparkHadoopUtil.checkAccessPermission(status, READ) should be(false)
+        sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(false)
+
+        null
+      }
+    })
+  }
+
+  private def fileStatus(
+      owner: String,
+      group: String,
+      userAction: FsAction,
+      groupAction: FsAction,
+      otherAction: FsAction): FileStatus = {
+    new FileStatus(0L,
+      false,
+      0,
+      0L,
+      0L,
+      0L,
+      new FsPermission(userAction, groupAction, otherAction),
+      owner,
+      group,
+      null)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
index ec580a44b8e76..456158d41b93f 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -27,6 +27,7 @@ import scala.concurrent.duration._
 import scala.language.postfixOps
 
 import com.google.common.io.{ByteStreams, Files}
+import org.apache.hadoop.fs.FileStatus
 import org.apache.hadoop.hdfs.DistributedFileSystem
 import org.json4s.jackson.JsonMethods._
 import org.mockito.Matchers.any
@@ -130,9 +131,19 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
     }
   }
 
-  test("SPARK-3697: ignore directories that cannot be read.") {
+  test("SPARK-3697: ignore files that cannot be read.") {
     // setReadable(...) does not work on Windows. Please refer JDK-6728842.
     assume(!Utils.isWindows)
+
+    class TestFsHistoryProvider extends FsHistoryProvider(createTestConf()) {
+      var mergeApplicationListingCall = 0
+      override protected def mergeApplicationListing(fileStatus: FileStatus): Unit = {
+        super.mergeApplicationListing(fileStatus)
+        mergeApplicationListingCall += 1
+      }
+    }
+    val provider = new TestFsHistoryProvider
+
     val logFile1 = newLogFile("new1", None, inProgress = false)
     writeFile(logFile1, true, None,
       SparkListenerApplicationStart("app1-1", Some("app1-1"), 1L, "test", None),
@@ -145,10 +156,11 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
       )
     logFile2.setReadable(false, false)
 
-    val provider = new FsHistoryProvider(createTestConf())
     updateAndCheck(provider) { list =>
       list.size should be (1)
     }
+
+    provider.mergeApplicationListingCall should be (1)
   }
 
   test("history file is renamed from inprogress to completed") {

From d17dea8f17989e5f8f7809a8564493d82290b5df Mon Sep 17 00:00:00 2001
From: Juliusz Sompolski <julek@databricks.com>
Date: Fri, 21 Apr 2017 09:49:42 +0800
Subject: [PATCH 0673/1204] [SPARK-20367] Properly unescape column names of
 partitioning columns parsed from paths.

## What changes were proposed in this pull request?

When infering partitioning schema from paths, the column in parsePartitionColumn should be unescaped with unescapePathName, just like it is being done in e.g. parsePathFragmentAsSeq.

## How was this patch tested?

Added a test to FileIndexSuite.

Author: Juliusz Sompolski <julek@databricks.com>

Closes #17703 from juliuszsompolski/SPARK-20367.

(cherry picked from commit 0368eb9d86634c83b3140ce3190cb9e0d0b7fd86)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../execution/datasources/PartitioningUtils.scala    |  2 +-
 .../sql/execution/datasources/FileIndexSuite.scala   | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index c3583209efc56..2d70172487e17 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -243,7 +243,7 @@ object PartitioningUtils {
     if (equalSignIndex == -1) {
       None
     } else {
-      val columnName = columnSpec.take(equalSignIndex)
+      val columnName = unescapePathName(columnSpec.take(equalSignIndex))
       assert(columnName.nonEmpty, s"Empty partition column name in '$columnSpec'")
 
       val rawColumnValue = columnSpec.drop(equalSignIndex + 1)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
index a9511cbd9e4cf..b4616826e40b3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
@@ -27,6 +27,7 @@ import org.apache.hadoop.fs.{FileStatus, Path, RawLocalFileSystem}
 
 import org.apache.spark.metrics.source.HiveCatalogMetrics
 import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.util.{KnownSizeEstimation, SizeEstimator}
@@ -236,6 +237,17 @@ class FileIndexSuite extends SharedSQLContext {
     val fileStatusCache = FileStatusCache.getOrCreate(spark)
     fileStatusCache.putLeafFiles(new Path("/tmp", "abc"), files.toArray)
   }
+
+  test("SPARK-20367 - properly unescape column names in inferPartitioning") {
+    withTempPath { path =>
+      val colToUnescape = "Column/#%'?"
+      spark
+        .range(1)
+        .select(col("id").as(colToUnescape), col("id"))
+        .write.partitionBy(colToUnescape).parquet(path.getAbsolutePath)
+      assert(spark.read.parquet(path.getAbsolutePath).schema.exists(_.name == colToUnescape))
+    }
+  }
 }
 
 class FakeParentPathFileSystem extends RawLocalFileSystem {

From 5ce76804cadc1d73d338aedb0347e1f18ea82f1f Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@databricks.com>
Date: Fri, 21 Apr 2017 10:06:12 +0800
Subject: [PATCH 0674/1204] [SPARK-20329][SQL] Make timezone aware expression
 without timezone unresolved

## What changes were proposed in this pull request?
A cast expression with a resolved time zone is not equal to a cast expression without a resolved time zone. The `ResolveAggregateFunction` assumed that these expression were the same, and would fail to resolve `HAVING` clauses which contain a `Cast` expression.

This is in essence caused by the fact that a `TimeZoneAwareExpression` can be resolved without a set time zone. This PR fixes this, and makes a `TimeZoneAwareExpression` unresolved as long as it has no TimeZone set.

## How was this patch tested?
Added a regression test to the `SQLQueryTestSuite.having` file.

Author: Herman van Hovell <hvanhovell@databricks.com>

Closes #17641 from hvanhovell/SPARK-20329.

(cherry picked from commit 760c8d088df1d35d7b8942177d47bc1677daf143)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 20 +-----
 .../analysis/ResolveInlineTables.scala        | 10 +--
 .../catalyst/analysis/timeZoneAnalysis.scala  | 61 +++++++++++++++++++
 .../spark/sql/catalyst/analysis/view.scala    |  4 +-
 .../expressions/datetimeExpressions.scala     |  4 +-
 .../analysis/ResolveInlineTablesSuite.scala   | 10 +--
 .../catalyst/analysis/TypeCoercionSuite.scala | 35 ++++++-----
 .../sql/catalyst/expressions/CastSuite.scala  |  4 +-
 .../expressions/DateExpressionsSuite.scala    |  6 +-
 .../expressions/ExpressionEvalHelper.scala    |  7 ++-
 .../spark/sql/execution/SparkPlanner.scala    |  2 +-
 .../datasources/DataSourceStrategy.scala      | 20 +++---
 .../sql/execution/datasources/rules.scala     |  6 +-
 .../internal/BaseSessionStateBuilder.scala    |  2 +-
 .../resources/sql-tests/inputs/having.sql     |  3 +
 .../sql-tests/results/having.sql.out          | 11 +++-
 .../spark/sql/sources/BucketedReadSuite.scala |  3 +-
 .../sql/sources/DataSourceAnalysisSuite.scala | 16 +++--
 .../sql/hive/HiveSessionStateBuilder.scala    |  2 +-
 19 files changed, 148 insertions(+), 78 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/timeZoneAnalysis.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 175bfb3e8085d..cb24b09e71913 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -150,6 +150,7 @@ class Analyzer(
       ResolveAggregateFunctions ::
       TimeWindowing ::
       ResolveInlineTables(conf) ::
+      ResolveTimeZone(conf) ::
       TypeCoercion.typeCoercionRules ++
       extendedResolutionRules : _*),
     Batch("Post-Hoc Resolution", Once, postHocResolutionRules: _*),
@@ -161,8 +162,6 @@ class Analyzer(
       HandleNullInputsForUDF),
     Batch("FixNullability", Once,
       FixNullability),
-    Batch("ResolveTimeZone", Once,
-      ResolveTimeZone),
     Batch("Subquery", Once,
       UpdateOuterReferences),
     Batch("Cleanup", fixedPoint,
@@ -2341,23 +2340,6 @@ class Analyzer(
       }
     }
   }
-
-  /**
-   * Replace [[TimeZoneAwareExpression]] without timezone id by its copy with session local
-   * time zone.
-   */
-  object ResolveTimeZone extends Rule[LogicalPlan] {
-
-    override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveExpressions {
-      case e: TimeZoneAwareExpression if e.timeZoneId.isEmpty =>
-        e.withTimeZone(conf.sessionLocalTimeZone)
-      // Casts could be added in the subquery plan through the rule TypeCoercion while coercing
-      // the types between the value expression and list query expression of IN expression.
-      // We need to subject the subquery plan through ResolveTimeZone again to setup timezone
-      // information for time zone aware expressions.
-      case e: ListQuery => e.withNewPlan(apply(e.plan))
-    }
-  }
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala
index a991dd96e2828..f2df3e132629f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql.catalyst.analysis
 import scala.util.control.NonFatal
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{Cast, TimeZoneAwareExpression}
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.internal.SQLConf
@@ -29,7 +28,7 @@ import org.apache.spark.sql.types.{StructField, StructType}
 /**
  * An analyzer rule that replaces [[UnresolvedInlineTable]] with [[LocalRelation]].
  */
-case class ResolveInlineTables(conf: SQLConf) extends Rule[LogicalPlan] {
+case class ResolveInlineTables(conf: SQLConf) extends Rule[LogicalPlan] with CastSupport {
   override def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
     case table: UnresolvedInlineTable if table.expressionsResolved =>
       validateInputDimension(table)
@@ -99,12 +98,9 @@ case class ResolveInlineTables(conf: SQLConf) extends Rule[LogicalPlan] {
           val castedExpr = if (e.dataType.sameType(targetType)) {
             e
           } else {
-            Cast(e, targetType)
+            cast(e, targetType)
           }
-          castedExpr.transform {
-            case e: TimeZoneAwareExpression if e.timeZoneId.isEmpty =>
-              e.withTimeZone(conf.sessionLocalTimeZone)
-          }.eval()
+          castedExpr.eval()
         } catch {
           case NonFatal(ex) =>
             table.failAnalysis(s"failed to evaluate expression ${e.sql}: ${ex.getMessage}")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/timeZoneAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/timeZoneAnalysis.scala
new file mode 100644
index 0000000000000..a27aa845bf0ae
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/timeZoneAnalysis.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.expressions.{Cast, Expression, ListQuery, TimeZoneAwareExpression}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.DataType
+
+/**
+ * Replace [[TimeZoneAwareExpression]] without timezone id by its copy with session local
+ * time zone.
+ */
+case class ResolveTimeZone(conf: SQLConf) extends Rule[LogicalPlan] {
+  private val transformTimeZoneExprs: PartialFunction[Expression, Expression] = {
+    case e: TimeZoneAwareExpression if e.timeZoneId.isEmpty =>
+      e.withTimeZone(conf.sessionLocalTimeZone)
+    // Casts could be added in the subquery plan through the rule TypeCoercion while coercing
+    // the types between the value expression and list query expression of IN expression.
+    // We need to subject the subquery plan through ResolveTimeZone again to setup timezone
+    // information for time zone aware expressions.
+    case e: ListQuery => e.withNewPlan(apply(e.plan))
+  }
+
+  override def apply(plan: LogicalPlan): LogicalPlan =
+    plan.resolveExpressions(transformTimeZoneExprs)
+
+  def resolveTimeZones(e: Expression): Expression = e.transform(transformTimeZoneExprs)
+}
+
+/**
+ * Mix-in trait for constructing valid [[Cast]] expressions.
+ */
+trait CastSupport {
+  /**
+   * Configuration used to create a valid cast expression.
+   */
+  def conf: SQLConf
+
+  /**
+   * Create a Cast expression with the session local time zone.
+   */
+  def cast(child: Expression, dataType: DataType): Cast = {
+    Cast(child, dataType, Option(conf.sessionLocalTimeZone))
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala
index 3bd54c257d98d..ea46dd7282401 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala
@@ -47,7 +47,7 @@ import org.apache.spark.sql.internal.SQLConf
  * This should be only done after the batch of Resolution, because the view attributes are not
  * completely resolved during the batch of Resolution.
  */
-case class AliasViewChild(conf: SQLConf) extends Rule[LogicalPlan] {
+case class AliasViewChild(conf: SQLConf) extends Rule[LogicalPlan] with CastSupport {
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     case v @ View(desc, output, child) if child.resolved && output != child.output =>
       val resolver = conf.resolver
@@ -78,7 +78,7 @@ case class AliasViewChild(conf: SQLConf) extends Rule[LogicalPlan] {
             throw new AnalysisException(s"Cannot up cast ${originAttr.sql} from " +
               s"${originAttr.dataType.simpleString} to ${attr.simpleString} as it may truncate\n")
           } else {
-            Alias(Cast(originAttr, attr.dataType), attr.name)(exprId = attr.exprId,
+            Alias(cast(originAttr, attr.dataType), attr.name)(exprId = attr.exprId,
               qualifier = attr.qualifier, explicitMetadata = Some(attr.metadata))
           }
         case (_, originAttr) => originAttr
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index f8fe774823e5b..bb8fd5032d63d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -24,7 +24,6 @@ import java.util.{Calendar, TimeZone}
 import scala.util.control.NonFatal
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodegenFallback, ExprCode}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.types._
@@ -34,6 +33,9 @@ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
  * Common base class for time zone aware expressions.
  */
 trait TimeZoneAwareExpression extends Expression {
+  /** The expression is only resolved when the time zone has been set. */
+  override lazy val resolved: Boolean =
+    childrenResolved && checkInputDataTypes().isSuccess && timeZoneId.isDefined
 
   /** the timezone ID to be used to evaluate value. */
   def timeZoneId: Option[String]
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTablesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTablesSuite.scala
index f45a826869842..d0fe815052256 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTablesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTablesSuite.scala
@@ -22,6 +22,7 @@ import org.scalatest.BeforeAndAfter
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions.{Cast, Literal, Rand}
 import org.apache.spark.sql.catalyst.expressions.aggregate.Count
+import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
 import org.apache.spark.sql.types.{LongType, NullType, TimestampType}
 
 /**
@@ -91,12 +92,13 @@ class ResolveInlineTablesSuite extends AnalysisTest with BeforeAndAfter {
   test("convert TimeZoneAwareExpression") {
     val table = UnresolvedInlineTable(Seq("c1"),
       Seq(Seq(Cast(lit("1991-12-06 00:00:00.0"), TimestampType))))
-    val converted = ResolveInlineTables(conf).convert(table)
+    val withTimeZone = ResolveTimeZone(conf).apply(table)
+    val LocalRelation(output, data) = ResolveInlineTables(conf).apply(withTimeZone)
     val correct = Cast(lit("1991-12-06 00:00:00.0"), TimestampType)
       .withTimeZone(conf.sessionLocalTimeZone).eval().asInstanceOf[Long]
-    assert(converted.output.map(_.dataType) == Seq(TimestampType))
-    assert(converted.data.size == 1)
-    assert(converted.data(0).getLong(0) == correct)
+    assert(output.map(_.dataType) == Seq(TimestampType))
+    assert(data.size == 1)
+    assert(data.head.getLong(0) == correct)
   }
 
   test("nullability inference in convert") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
index 011d09ff60641..2624f5586fd5d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
 
@@ -787,6 +788,12 @@ class TypeCoercionSuite extends PlanTest {
     }
   }
 
+  private val timeZoneResolver = ResolveTimeZone(new SQLConf)
+
+  private def widenSetOperationTypes(plan: LogicalPlan): LogicalPlan = {
+    timeZoneResolver(TypeCoercion.WidenSetOperationTypes(plan))
+  }
+
   test("WidenSetOperationTypes for except and intersect") {
     val firstTable = LocalRelation(
       AttributeReference("i", IntegerType)(),
@@ -799,11 +806,10 @@ class TypeCoercionSuite extends PlanTest {
       AttributeReference("f", FloatType)(),
       AttributeReference("l", LongType)())
 
-    val wt = TypeCoercion.WidenSetOperationTypes
     val expectedTypes = Seq(StringType, DecimalType.SYSTEM_DEFAULT, FloatType, DoubleType)
 
-    val r1 = wt(Except(firstTable, secondTable)).asInstanceOf[Except]
-    val r2 = wt(Intersect(firstTable, secondTable)).asInstanceOf[Intersect]
+    val r1 = widenSetOperationTypes(Except(firstTable, secondTable)).asInstanceOf[Except]
+    val r2 = widenSetOperationTypes(Intersect(firstTable, secondTable)).asInstanceOf[Intersect]
     checkOutput(r1.left, expectedTypes)
     checkOutput(r1.right, expectedTypes)
     checkOutput(r2.left, expectedTypes)
@@ -838,10 +844,9 @@ class TypeCoercionSuite extends PlanTest {
       AttributeReference("p", ByteType)(),
       AttributeReference("q", DoubleType)())
 
-    val wt = TypeCoercion.WidenSetOperationTypes
     val expectedTypes = Seq(StringType, DecimalType.SYSTEM_DEFAULT, FloatType, DoubleType)
 
-    val unionRelation = wt(
+    val unionRelation = widenSetOperationTypes(
       Union(firstTable :: secondTable :: thirdTable :: forthTable :: Nil)).asInstanceOf[Union]
     assert(unionRelation.children.length == 4)
     checkOutput(unionRelation.children.head, expectedTypes)
@@ -862,17 +867,15 @@ class TypeCoercionSuite extends PlanTest {
       }
     }
 
-    val dp = TypeCoercion.WidenSetOperationTypes
-
     val left1 = LocalRelation(
       AttributeReference("l", DecimalType(10, 8))())
     val right1 = LocalRelation(
       AttributeReference("r", DecimalType(5, 5))())
     val expectedType1 = Seq(DecimalType(10, 8))
 
-    val r1 = dp(Union(left1, right1)).asInstanceOf[Union]
-    val r2 = dp(Except(left1, right1)).asInstanceOf[Except]
-    val r3 = dp(Intersect(left1, right1)).asInstanceOf[Intersect]
+    val r1 = widenSetOperationTypes(Union(left1, right1)).asInstanceOf[Union]
+    val r2 = widenSetOperationTypes(Except(left1, right1)).asInstanceOf[Except]
+    val r3 = widenSetOperationTypes(Intersect(left1, right1)).asInstanceOf[Intersect]
 
     checkOutput(r1.children.head, expectedType1)
     checkOutput(r1.children.last, expectedType1)
@@ -891,17 +894,17 @@ class TypeCoercionSuite extends PlanTest {
       val plan2 = LocalRelation(
         AttributeReference("r", rType)())
 
-      val r1 = dp(Union(plan1, plan2)).asInstanceOf[Union]
-      val r2 = dp(Except(plan1, plan2)).asInstanceOf[Except]
-      val r3 = dp(Intersect(plan1, plan2)).asInstanceOf[Intersect]
+      val r1 = widenSetOperationTypes(Union(plan1, plan2)).asInstanceOf[Union]
+      val r2 = widenSetOperationTypes(Except(plan1, plan2)).asInstanceOf[Except]
+      val r3 = widenSetOperationTypes(Intersect(plan1, plan2)).asInstanceOf[Intersect]
 
       checkOutput(r1.children.last, Seq(expectedType))
       checkOutput(r2.right, Seq(expectedType))
       checkOutput(r3.right, Seq(expectedType))
 
-      val r4 = dp(Union(plan2, plan1)).asInstanceOf[Union]
-      val r5 = dp(Except(plan2, plan1)).asInstanceOf[Except]
-      val r6 = dp(Intersect(plan2, plan1)).asInstanceOf[Intersect]
+      val r4 = widenSetOperationTypes(Union(plan2, plan1)).asInstanceOf[Union]
+      val r5 = widenSetOperationTypes(Except(plan2, plan1)).asInstanceOf[Except]
+      val r6 = widenSetOperationTypes(Intersect(plan2, plan1)).asInstanceOf[Intersect]
 
       checkOutput(r4.children.last, Seq(expectedType))
       checkOutput(r5.left, Seq(expectedType))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index a7ffa884d2286..22f3f3514fa41 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -34,7 +34,7 @@ import org.apache.spark.unsafe.types.UTF8String
  */
 class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
 
-  private def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): Cast = {
+  private def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = Some("GMT")): Cast = {
     v match {
       case lit: Expression => Cast(lit, targetType, timeZoneId)
       case _ => Cast(Literal(v), targetType, timeZoneId)
@@ -47,7 +47,7 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   private def checkNullCast(from: DataType, to: DataType): Unit = {
-    checkEvaluation(cast(Literal.create(null, from), to, Option("GMT")), null)
+    checkEvaluation(cast(Literal.create(null, from), to), null)
   }
 
   test("null cast") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index 9978f35a03810..ca89bf7db0b4f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -160,7 +160,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   test("Seconds") {
     assert(Second(Literal.create(null, DateType), gmtId).resolved === false)
-    assert(Second(Cast(Literal(d), TimestampType), None).resolved === true)
+    assert(Second(Cast(Literal(d), TimestampType, gmtId), gmtId).resolved === true)
     checkEvaluation(Second(Cast(Literal(d), TimestampType, gmtId), gmtId), 0)
     checkEvaluation(Second(Cast(Literal(sdf.format(d)), TimestampType, gmtId), gmtId), 15)
     checkEvaluation(Second(Literal(ts), gmtId), 15)
@@ -220,7 +220,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   test("Hour") {
     assert(Hour(Literal.create(null, DateType), gmtId).resolved === false)
-    assert(Hour(Literal(ts), None).resolved === true)
+    assert(Hour(Literal(ts), gmtId).resolved === true)
     checkEvaluation(Hour(Cast(Literal(d), TimestampType, gmtId), gmtId), 0)
     checkEvaluation(Hour(Cast(Literal(sdf.format(d)), TimestampType, gmtId), gmtId), 13)
     checkEvaluation(Hour(Literal(ts), gmtId), 13)
@@ -246,7 +246,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   test("Minute") {
     assert(Minute(Literal.create(null, DateType), gmtId).resolved === false)
-    assert(Minute(Literal(ts), None).resolved === true)
+    assert(Minute(Literal(ts), gmtId).resolved === true)
     checkEvaluation(Minute(Cast(Literal(d), TimestampType, gmtId), gmtId), 0)
     checkEvaluation(
       Minute(Cast(Literal(sdf.format(d)), TimestampType, gmtId), gmtId), 10)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
index 1ba6dd1c5e8ca..b6399edb68dd6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
@@ -25,10 +25,12 @@ import org.scalatest.prop.GeneratorDrivenPropertyChecks
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.serializer.JavaSerializer
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
+import org.apache.spark.sql.catalyst.analysis.ResolveTimeZone
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.optimizer.SimpleTestOptimizer
 import org.apache.spark.sql.catalyst.plans.logical.{OneRowRelation, Project}
-import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, GenericArrayData, MapData}
+import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
@@ -45,7 +47,8 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks {
   protected def checkEvaluation(
       expression: => Expression, expected: Any, inputRow: InternalRow = EmptyRow): Unit = {
     val serializer = new JavaSerializer(new SparkConf()).newInstance
-    val expr: Expression = serializer.deserialize(serializer.serialize(expression))
+    val resolver = ResolveTimeZone(new SQLConf)
+    val expr = resolver.resolveTimeZones(serializer.deserialize(serializer.serialize(expression)))
     val catalystValue = CatalystTypeConverters.convertToCatalyst(expected)
     checkEvaluationWithoutCodegen(expr, catalystValue, inputRow)
     checkEvaluationWithGeneratedMutableProjection(expr, catalystValue, inputRow)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala
index 6566502bd8a8a..4e718d609c921 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala
@@ -36,7 +36,7 @@ class SparkPlanner(
     experimentalMethods.extraStrategies ++
       extraPlanningStrategies ++ (
       FileSourceStrategy ::
-      DataSourceStrategy ::
+      DataSourceStrategy(conf) ::
       SpecialLimits ::
       Aggregation ::
       JoinSelection ::
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index 2d83d512e702d..d307122b5c70d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -24,7 +24,7 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow, QualifiedTableName, TableIdentifier}
+import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow, QualifiedTableName}
 import org.apache.spark.sql.catalyst.CatalystTypeConverters.convertToScala
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, CatalogUtils}
@@ -48,7 +48,7 @@ import org.apache.spark.unsafe.types.UTF8String
  * Note that, this rule must be run after `PreprocessTableCreation` and
  * `PreprocessTableInsertion`.
  */
-case class DataSourceAnalysis(conf: SQLConf) extends Rule[LogicalPlan] {
+case class DataSourceAnalysis(conf: SQLConf) extends Rule[LogicalPlan] with CastSupport {
 
   def resolver: Resolver = conf.resolver
 
@@ -98,11 +98,11 @@ case class DataSourceAnalysis(conf: SQLConf) extends Rule[LogicalPlan] {
       val potentialSpecs = staticPartitions.filter {
         case (partKey, partValue) => resolver(field.name, partKey)
       }
-      if (potentialSpecs.size == 0) {
+      if (potentialSpecs.isEmpty) {
         None
       } else if (potentialSpecs.size == 1) {
         val partValue = potentialSpecs.head._2
-        Some(Alias(Cast(Literal(partValue), field.dataType), field.name)())
+        Some(Alias(cast(Literal(partValue), field.dataType), field.name)())
       } else {
         throw new AnalysisException(
           s"Partition column ${field.name} have multiple values specified, " +
@@ -258,7 +258,9 @@ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan]
 /**
  * A Strategy for planning scans over data sources defined using the sources API.
  */
-object DataSourceStrategy extends Strategy with Logging {
+case class DataSourceStrategy(conf: SQLConf) extends Strategy with Logging with CastSupport {
+  import DataSourceStrategy._
+
   def apply(plan: LogicalPlan): Seq[execution.SparkPlan] = plan match {
     case PhysicalOperation(projects, filters, l @ LogicalRelation(t: CatalystScan, _, _)) =>
       pruneFilterProjectRaw(
@@ -298,7 +300,7 @@ object DataSourceStrategy extends Strategy with Logging {
   // Restriction: Bucket pruning works iff the bucketing column has one and only one column.
   def getBucketId(bucketColumn: Attribute, numBuckets: Int, value: Any): Int = {
     val mutableRow = new SpecificInternalRow(Seq(bucketColumn.dataType))
-    mutableRow(0) = Cast(Literal(value), bucketColumn.dataType).eval(null)
+    mutableRow(0) = cast(Literal(value), bucketColumn.dataType).eval(null)
     val bucketIdGeneration = UnsafeProjection.create(
       HashPartitioning(bucketColumn :: Nil, numBuckets).partitionIdExpression :: Nil,
       bucketColumn :: Nil)
@@ -436,7 +438,9 @@ object DataSourceStrategy extends Strategy with Logging {
   private[this] def toCatalystRDD(relation: LogicalRelation, rdd: RDD[Row]): RDD[InternalRow] = {
     toCatalystRDD(relation, relation.output, rdd)
   }
+}
 
+object DataSourceStrategy {
   /**
    * Tries to translate a Catalyst [[Expression]] into data source [[Filter]].
    *
@@ -527,8 +531,8 @@ object DataSourceStrategy extends Strategy with Logging {
    *         all [[Filter]]s that are completely filtered at the DataSource.
    */
   protected[sql] def selectFilters(
-    relation: BaseRelation,
-    predicates: Seq[Expression]): (Seq[Expression], Seq[Filter], Set[Filter]) = {
+      relation: BaseRelation,
+      predicates: Seq[Expression]): (Seq[Expression], Seq[Filter], Set[Filter]) = {
 
     // For conciseness, all Catalyst filter expressions of type `expressions.Expression` below are
     // called `predicate`s, while all data source filters of type `sources.Filter` are simply called
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index 7abf2ae5166b5..3f4a78580f1eb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -22,7 +22,7 @@ import java.util.Locale
 import org.apache.spark.sql.{AnalysisException, SaveMode, SparkSession}
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog._
-import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, Cast, RowOrdering}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, RowOrdering}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.command.DDLUtils
@@ -315,7 +315,7 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
  * table. It also does data type casting and field renaming, to make sure that the columns to be
  * inserted have the correct data type and fields have the correct names.
  */
-case class PreprocessTableInsertion(conf: SQLConf) extends Rule[LogicalPlan] {
+case class PreprocessTableInsertion(conf: SQLConf) extends Rule[LogicalPlan] with CastSupport {
   private def preprocess(
       insert: InsertIntoTable,
       tblName: String,
@@ -367,7 +367,7 @@ case class PreprocessTableInsertion(conf: SQLConf) extends Rule[LogicalPlan] {
           // Renaming is needed for handling the following cases like
           // 1) Column names/types do not match, e.g., INSERT INTO TABLE tab1 SELECT 1, 2
           // 2) Target tables have column metadata
-          Alias(Cast(actual, expected.dataType), expected.name)(
+          Alias(cast(actual, expected.dataType), expected.name)(
             explicitMetadata = Option(expected.metadata))
         }
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index 2b14eca919fa4..df7c3678b7807 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.internal
 import org.apache.spark.SparkConf
 import org.apache.spark.annotation.{Experimental, InterfaceStability}
 import org.apache.spark.sql.{ExperimentalMethods, SparkSession, Strategy, UDFRegistration}
-import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry}
+import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry, ResolveTimeZone}
 import org.apache.spark.sql.catalyst.catalog.SessionCatalog
 import org.apache.spark.sql.catalyst.optimizer.Optimizer
 import org.apache.spark.sql.catalyst.parser.ParserInterface
diff --git a/sql/core/src/test/resources/sql-tests/inputs/having.sql b/sql/core/src/test/resources/sql-tests/inputs/having.sql
index 364c022d959dc..868a911e787f6 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/having.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/having.sql
@@ -13,3 +13,6 @@ SELECT count(k) FROM hav GROUP BY v + 1 HAVING v + 1 = 2;
 
 -- SPARK-11032: resolve having correctly
 SELECT MIN(t.v) FROM (SELECT * FROM hav WHERE v > 0) t HAVING(COUNT(1) > 0);
+
+-- SPARK-20329: make sure we handle timezones correctly
+SELECT a + b FROM VALUES (1L, 2), (3L, 4) AS T(a, b) GROUP BY a + b HAVING a + b > 1;
diff --git a/sql/core/src/test/resources/sql-tests/results/having.sql.out b/sql/core/src/test/resources/sql-tests/results/having.sql.out
index e0923832673cb..d87ee5221647f 100644
--- a/sql/core/src/test/resources/sql-tests/results/having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/having.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 4
+-- Number of queries: 5
 
 
 -- !query 0
@@ -38,3 +38,12 @@ SELECT MIN(t.v) FROM (SELECT * FROM hav WHERE v > 0) t HAVING(COUNT(1) > 0)
 struct<min(v):int>
 -- !query 3 output
 1
+
+
+-- !query 4
+SELECT a + b FROM VALUES (1L, 2), (3L, 4) AS T(a, b) GROUP BY a + b HAVING a + b > 1
+-- !query 4 schema
+struct<(a + CAST(b AS BIGINT)):bigint>
+-- !query 4 output
+3
+7
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
index 9b65419dba234..ba0ca666b5c14 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
@@ -90,6 +90,7 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils {
       originalDataFrame: DataFrame): Unit = {
     // This test verifies parts of the plan. Disable whole stage codegen.
     withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false") {
+      val strategy = DataSourceStrategy(spark.sessionState.conf)
       val bucketedDataFrame = spark.table("bucketed_table").select("i", "j", "k")
       val BucketSpec(numBuckets, bucketColumnNames, _) = bucketSpec
       // Limit: bucket pruning only works when the bucket column has one and only one column
@@ -98,7 +99,7 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils {
       val bucketColumn = bucketedDataFrame.schema.toAttributes(bucketColumnIndex)
       val matchedBuckets = new BitSet(numBuckets)
       bucketValues.foreach { value =>
-        matchedBuckets.set(DataSourceStrategy.getBucketId(bucketColumn, numBuckets, value))
+        matchedBuckets.set(strategy.getBucketId(bucketColumn, numBuckets, value))
       }
 
       // Filter could hide the bug in bucket pruning. Thus, skipping all the filters
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceAnalysisSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceAnalysisSuite.scala
index b16c9f8fc96b2..735e07c21373a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceAnalysisSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceAnalysisSuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, Cast, Expression, Literal}
 import org.apache.spark.sql.execution.datasources.DataSourceAnalysis
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{IntegerType, StructType}
+import org.apache.spark.sql.types.{DataType, IntegerType, StructType}
 
 class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll {
 
@@ -49,7 +49,11 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll {
   }
 
   Seq(true, false).foreach { caseSensitive =>
-    val rule = DataSourceAnalysis(new SQLConf().copy(SQLConf.CASE_SENSITIVE -> caseSensitive))
+    val conf = new SQLConf().copy(SQLConf.CASE_SENSITIVE -> caseSensitive)
+    def cast(e: Expression, dt: DataType): Expression = {
+      Cast(e, dt, Option(conf.sessionLocalTimeZone))
+    }
+    val rule = DataSourceAnalysis(conf)
     test(
       s"convertStaticPartitions only handle INSERT having at least static partitions " +
         s"(caseSensitive: $caseSensitive)") {
@@ -150,7 +154,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll {
       if (!caseSensitive) {
         val nonPartitionedAttributes = Seq('e.int, 'f.int)
         val expected = nonPartitionedAttributes ++
-          Seq(Cast(Literal("1"), IntegerType), Cast(Literal("3"), IntegerType))
+          Seq(cast(Literal("1"), IntegerType), cast(Literal("3"), IntegerType))
         val actual = rule.convertStaticPartitions(
           sourceAttributes = nonPartitionedAttributes,
           providedPartitions = Map("b" -> Some("1"), "C" -> Some("3")),
@@ -162,7 +166,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll {
       {
         val nonPartitionedAttributes = Seq('e.int, 'f.int)
         val expected = nonPartitionedAttributes ++
-          Seq(Cast(Literal("1"), IntegerType), Cast(Literal("3"), IntegerType))
+          Seq(cast(Literal("1"), IntegerType), cast(Literal("3"), IntegerType))
         val actual = rule.convertStaticPartitions(
           sourceAttributes = nonPartitionedAttributes,
           providedPartitions = Map("b" -> Some("1"), "c" -> Some("3")),
@@ -174,7 +178,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll {
       // Test the case having a single static partition column.
       {
         val nonPartitionedAttributes = Seq('e.int, 'f.int)
-        val expected = nonPartitionedAttributes ++ Seq(Cast(Literal("1"), IntegerType))
+        val expected = nonPartitionedAttributes ++ Seq(cast(Literal("1"), IntegerType))
         val actual = rule.convertStaticPartitions(
           sourceAttributes = nonPartitionedAttributes,
           providedPartitions = Map("b" -> Some("1")),
@@ -189,7 +193,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll {
       val dynamicPartitionAttributes = Seq('g.int)
       val expected =
         nonPartitionedAttributes ++
-          Seq(Cast(Literal("1"), IntegerType)) ++
+          Seq(cast(Literal("1"), IntegerType)) ++
           dynamicPartitionAttributes
       val actual = rule.convertStaticPartitions(
         sourceAttributes = nonPartitionedAttributes ++ dynamicPartitionAttributes,
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
index 9d3b31f39c0f5..e16c9e46b7723 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
@@ -101,7 +101,7 @@ class HiveSessionStateBuilder(session: SparkSession, parentState: Option[Session
         experimentalMethods.extraStrategies ++
           extraPlanningStrategies ++ Seq(
           FileSourceStrategy,
-          DataSourceStrategy,
+          DataSourceStrategy(conf),
           SpecialLimits,
           InMemoryScans,
           HiveTableScans,

From 6cd2f16b155ce42d8e379de5ce6ced7804fbde92 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Thu, 20 Apr 2017 19:40:21 -0700
Subject: [PATCH 0675/1204] [SPARK-20281][SQL] Print the identical Range
 parameters of SparkContext APIs and SQL in explain

## What changes were proposed in this pull request?
This pr modified code to print the identical `Range` parameters of SparkContext APIs and SQL in `explain` output. In the current master, they internally use `defaultParallelism` for `splits` by default though, they print different strings in explain output;

```
scala> spark.range(4).explain
== Physical Plan ==
*Range (0, 4, step=1, splits=Some(8))

scala> sql("select * from range(4)").explain
== Physical Plan ==
*Range (0, 4, step=1, splits=None)
```

## How was this patch tested?
Added tests in `SQLQuerySuite` and modified some results in the existing tests.

Author: Takeshi Yamamuro <yamamuro@apache.org>

Closes #17670 from maropu/SPARK-20281.

(cherry picked from commit 48d760d028dd73371f99d084c4195dbc4dda5267)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../apache/spark/sql/execution/basicPhysicalOperators.scala    | 3 ++-
 .../sql-tests/results/sql-compatibility-functions.sql.out      | 2 +-
 .../resources/sql-tests/results/table-valued-functions.sql.out | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index 233a105f4d93a..d3efa428a6db8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -332,6 +332,7 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range)
   extends LeafExecNode with CodegenSupport {
 
   def start: Long = range.start
+  def end: Long = range.end
   def step: Long = range.step
   def numSlices: Int = range.numSlices.getOrElse(sparkContext.defaultParallelism)
   def numElements: BigInt = range.numElements
@@ -538,7 +539,7 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range)
       }
   }
 
-  override def simpleString: String = range.simpleString
+  override def simpleString: String = s"Range ($start, $end, step=$step, splits=$numSlices)"
 }
 
 /**
diff --git a/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out
index 9f0b95994be53..732b11050f461 100644
--- a/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out
@@ -88,7 +88,7 @@ Project [coalesce(cast(id#xL as string), x) AS ifnull(`id`, 'x')#x, id#xL AS nul
 
 == Physical Plan ==
 *Project [coalesce(cast(id#xL as string), x) AS ifnull(`id`, 'x')#x, id#xL AS nullif(`id`, 'x')#xL, coalesce(cast(id#xL as string), x) AS nvl(`id`, 'x')#x, x AS nvl2(`id`, 'x', 'y')#x]
-+- *Range (0, 2, step=1, splits=None)
++- *Range (0, 2, step=1, splits=2)
 
 
 -- !query 9
diff --git a/sql/core/src/test/resources/sql-tests/results/table-valued-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/table-valued-functions.sql.out
index acd4ecf14617e..e2ee970d35f60 100644
--- a/sql/core/src/test/resources/sql-tests/results/table-valued-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/table-valued-functions.sql.out
@@ -102,4 +102,4 @@ EXPLAIN select * from RaNgE(2)
 struct<plan:string>
 -- !query 8 output
 == Physical Plan ==
-*Range (0, 2, step=1, splits=None)
+*Range (0, 2, step=1, splits=2)

From cddb4b7db81b01b4abf2ab683aba97e4eabb9769 Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@databricks.com>
Date: Fri, 21 Apr 2017 00:05:03 -0700
Subject: [PATCH 0676/1204] [SPARK-20420][SQL] Add events to the external
 catalog

## What changes were proposed in this pull request?
It is often useful to be able to track changes to the `ExternalCatalog`. This PR makes the `ExternalCatalog` emit events when a catalog object is changed. Events are fired before and after the change.

The following events are fired per object:

- Database
  - CreateDatabasePreEvent: event fired before the database is created.
  - CreateDatabaseEvent: event fired after the database has been created.
  - DropDatabasePreEvent: event fired before the database is dropped.
  - DropDatabaseEvent: event fired after the database has been dropped.
- Table
  - CreateTablePreEvent: event fired before the table is created.
  - CreateTableEvent: event fired after the table has been created.
  - RenameTablePreEvent: event fired before the table is renamed.
  - RenameTableEvent: event fired after the table has been renamed.
  - DropTablePreEvent: event fired before the table is dropped.
  - DropTableEvent: event fired after the table has been dropped.
- Function
  - CreateFunctionPreEvent: event fired before the function is created.
  - CreateFunctionEvent: event fired after the function has been created.
  - RenameFunctionPreEvent: event fired before the function is renamed.
  - RenameFunctionEvent: event fired after the function has been renamed.
  - DropFunctionPreEvent: event fired before the function is dropped.
  - DropFunctionPreEvent: event fired after the function has been dropped.

The current events currently only contain the names of the object modified. We add more events, and more details at a later point.

A user can monitor changes to the external catalog by adding a listener to the Spark listener bus checking for `ExternalCatalogEvent`s using the `SparkListener.onOtherEvent` hook. A more direct approach is add listener directly to the `ExternalCatalog`.

## How was this patch tested?
Added the `ExternalCatalogEventSuite`.

Author: Herman van Hovell <hvanhovell@databricks.com>

Closes #17710 from hvanhovell/SPARK-20420.

(cherry picked from commit e2b3d2367a563d4600d8d87b5317e71135c362f0)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../catalyst/catalog/ExternalCatalog.scala    |  85 +++++++-
 .../catalyst/catalog/InMemoryCatalog.scala    |  22 +-
 .../spark/sql/catalyst/catalog/events.scala   | 158 +++++++++++++++
 .../catalog/ExternalCatalogEventSuite.scala   | 188 ++++++++++++++++++
 .../spark/sql/internal/SharedState.scala      |   7 +
 .../spark/sql/hive/HiveExternalCatalog.scala  |  22 +-
 6 files changed, 457 insertions(+), 25 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/events.scala
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogEventSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
index 08a01e8601897..974ef900e2eed 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.catalog
 import org.apache.spark.sql.catalyst.analysis.{FunctionAlreadyExistsException, NoSuchDatabaseException, NoSuchFunctionException, NoSuchTableException}
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.util.ListenerBus
 
 /**
  * Interface for the system catalog (of functions, partitions, tables, and databases).
@@ -30,7 +31,8 @@ import org.apache.spark.sql.types.StructType
  *
  * Implementations should throw [[NoSuchDatabaseException]] when databases don't exist.
  */
-abstract class ExternalCatalog {
+abstract class ExternalCatalog
+  extends ListenerBus[ExternalCatalogEventListener, ExternalCatalogEvent] {
   import CatalogTypes.TablePartitionSpec
 
   protected def requireDbExists(db: String): Unit = {
@@ -61,9 +63,22 @@ abstract class ExternalCatalog {
   // Databases
   // --------------------------------------------------------------------------
 
-  def createDatabase(dbDefinition: CatalogDatabase, ignoreIfExists: Boolean): Unit
+  final def createDatabase(dbDefinition: CatalogDatabase, ignoreIfExists: Boolean): Unit = {
+    val db = dbDefinition.name
+    postToAll(CreateDatabasePreEvent(db))
+    doCreateDatabase(dbDefinition, ignoreIfExists)
+    postToAll(CreateDatabaseEvent(db))
+  }
+
+  protected def doCreateDatabase(dbDefinition: CatalogDatabase, ignoreIfExists: Boolean): Unit
+
+  final def dropDatabase(db: String, ignoreIfNotExists: Boolean, cascade: Boolean): Unit = {
+    postToAll(DropDatabasePreEvent(db))
+    doDropDatabase(db, ignoreIfNotExists, cascade)
+    postToAll(DropDatabaseEvent(db))
+  }
 
-  def dropDatabase(db: String, ignoreIfNotExists: Boolean, cascade: Boolean): Unit
+  protected def doDropDatabase(db: String, ignoreIfNotExists: Boolean, cascade: Boolean): Unit
 
   /**
    * Alter a database whose name matches the one specified in `dbDefinition`,
@@ -88,11 +103,39 @@ abstract class ExternalCatalog {
   // Tables
   // --------------------------------------------------------------------------
 
-  def createTable(tableDefinition: CatalogTable, ignoreIfExists: Boolean): Unit
+  final def createTable(tableDefinition: CatalogTable, ignoreIfExists: Boolean): Unit = {
+    val db = tableDefinition.database
+    val name = tableDefinition.identifier.table
+    postToAll(CreateTablePreEvent(db, name))
+    doCreateTable(tableDefinition, ignoreIfExists)
+    postToAll(CreateTableEvent(db, name))
+  }
 
-  def dropTable(db: String, table: String, ignoreIfNotExists: Boolean, purge: Boolean): Unit
+  protected def doCreateTable(tableDefinition: CatalogTable, ignoreIfExists: Boolean): Unit
 
-  def renameTable(db: String, oldName: String, newName: String): Unit
+  final def dropTable(
+      db: String,
+      table: String,
+      ignoreIfNotExists: Boolean,
+      purge: Boolean): Unit = {
+    postToAll(DropTablePreEvent(db, table))
+    doDropTable(db, table, ignoreIfNotExists, purge)
+    postToAll(DropTableEvent(db, table))
+  }
+
+  protected def doDropTable(
+      db: String,
+      table: String,
+      ignoreIfNotExists: Boolean,
+      purge: Boolean): Unit
+
+  final def renameTable(db: String, oldName: String, newName: String): Unit = {
+    postToAll(RenameTablePreEvent(db, oldName, newName))
+    doRenameTable(db, oldName, newName)
+    postToAll(RenameTableEvent(db, oldName, newName))
+  }
+
+  protected def doRenameTable(db: String, oldName: String, newName: String): Unit
 
   /**
    * Alter a table whose database and name match the ones specified in `tableDefinition`, assuming
@@ -269,11 +312,30 @@ abstract class ExternalCatalog {
   // Functions
   // --------------------------------------------------------------------------
 
-  def createFunction(db: String, funcDefinition: CatalogFunction): Unit
+  final def createFunction(db: String, funcDefinition: CatalogFunction): Unit = {
+    val name = funcDefinition.identifier.funcName
+    postToAll(CreateFunctionPreEvent(db, name))
+    doCreateFunction(db, funcDefinition)
+    postToAll(CreateFunctionEvent(db, name))
+  }
 
-  def dropFunction(db: String, funcName: String): Unit
+  protected def doCreateFunction(db: String, funcDefinition: CatalogFunction): Unit
 
-  def renameFunction(db: String, oldName: String, newName: String): Unit
+  final def dropFunction(db: String, funcName: String): Unit = {
+    postToAll(DropFunctionPreEvent(db, funcName))
+    doDropFunction(db, funcName)
+    postToAll(DropFunctionEvent(db, funcName))
+  }
+
+  protected def doDropFunction(db: String, funcName: String): Unit
+
+  final def renameFunction(db: String, oldName: String, newName: String): Unit = {
+    postToAll(RenameFunctionPreEvent(db, oldName, newName))
+    doRenameFunction(db, oldName, newName)
+    postToAll(RenameFunctionEvent(db, oldName, newName))
+  }
+
+  protected def doRenameFunction(db: String, oldName: String, newName: String): Unit
 
   def getFunction(db: String, funcName: String): CatalogFunction
 
@@ -281,4 +343,9 @@ abstract class ExternalCatalog {
 
   def listFunctions(db: String, pattern: String): Seq[String]
 
+  override protected def doPostEvent(
+      listener: ExternalCatalogEventListener,
+      event: ExternalCatalogEvent): Unit = {
+    listener.onEvent(event)
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
index 9ca1c71d1dcb1..81dd8efc0015f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
@@ -98,7 +98,7 @@ class InMemoryCatalog(
   // Databases
   // --------------------------------------------------------------------------
 
-  override def createDatabase(
+  override protected def doCreateDatabase(
       dbDefinition: CatalogDatabase,
       ignoreIfExists: Boolean): Unit = synchronized {
     if (catalog.contains(dbDefinition.name)) {
@@ -119,7 +119,7 @@ class InMemoryCatalog(
     }
   }
 
-  override def dropDatabase(
+  override protected def doDropDatabase(
       db: String,
       ignoreIfNotExists: Boolean,
       cascade: Boolean): Unit = synchronized {
@@ -180,7 +180,7 @@ class InMemoryCatalog(
   // Tables
   // --------------------------------------------------------------------------
 
-  override def createTable(
+  override protected def doCreateTable(
       tableDefinition: CatalogTable,
       ignoreIfExists: Boolean): Unit = synchronized {
     assert(tableDefinition.identifier.database.isDefined)
@@ -221,7 +221,7 @@ class InMemoryCatalog(
     }
   }
 
-  override def dropTable(
+  override protected def doDropTable(
       db: String,
       table: String,
       ignoreIfNotExists: Boolean,
@@ -264,7 +264,10 @@ class InMemoryCatalog(
     }
   }
 
-  override def renameTable(db: String, oldName: String, newName: String): Unit = synchronized {
+  override protected def doRenameTable(
+      db: String,
+      oldName: String,
+      newName: String): Unit = synchronized {
     requireTableExists(db, oldName)
     requireTableNotExists(db, newName)
     val oldDesc = catalog(db).tables(oldName)
@@ -565,18 +568,21 @@ class InMemoryCatalog(
   // Functions
   // --------------------------------------------------------------------------
 
-  override def createFunction(db: String, func: CatalogFunction): Unit = synchronized {
+  override protected def doCreateFunction(db: String, func: CatalogFunction): Unit = synchronized {
     requireDbExists(db)
     requireFunctionNotExists(db, func.identifier.funcName)
     catalog(db).functions.put(func.identifier.funcName, func)
   }
 
-  override def dropFunction(db: String, funcName: String): Unit = synchronized {
+  override protected def doDropFunction(db: String, funcName: String): Unit = synchronized {
     requireFunctionExists(db, funcName)
     catalog(db).functions.remove(funcName)
   }
 
-  override def renameFunction(db: String, oldName: String, newName: String): Unit = synchronized {
+  override protected def doRenameFunction(
+      db: String,
+      oldName: String,
+      newName: String): Unit = synchronized {
     requireFunctionExists(db, oldName)
     requireFunctionNotExists(db, newName)
     val newFunc = getFunction(db, oldName).copy(identifier = FunctionIdentifier(newName, Some(db)))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/events.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/events.scala
new file mode 100644
index 0000000000000..459973a13bb10
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/events.scala
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.catalog
+
+import org.apache.spark.scheduler.SparkListenerEvent
+
+/**
+ * Event emitted by the external catalog when it is modified. Events are either fired before or
+ * after the modification (the event should document this).
+ */
+trait ExternalCatalogEvent extends SparkListenerEvent
+
+/**
+ * Listener interface for external catalog modification events.
+ */
+trait ExternalCatalogEventListener {
+  def onEvent(event: ExternalCatalogEvent): Unit
+}
+
+/**
+ * Event fired when a database is create or dropped.
+ */
+trait DatabaseEvent extends ExternalCatalogEvent {
+  /**
+   * Database of the object that was touched.
+   */
+  val database: String
+}
+
+/**
+ * Event fired before a database is created.
+ */
+case class CreateDatabasePreEvent(database: String) extends DatabaseEvent
+
+/**
+ * Event fired after a database has been created.
+ */
+case class CreateDatabaseEvent(database: String) extends DatabaseEvent
+
+/**
+ * Event fired before a database is dropped.
+ */
+case class DropDatabasePreEvent(database: String) extends DatabaseEvent
+
+/**
+ * Event fired after a database has been dropped.
+ */
+case class DropDatabaseEvent(database: String) extends DatabaseEvent
+
+/**
+ * Event fired when a table is created, dropped or renamed.
+ */
+trait TableEvent extends DatabaseEvent {
+  /**
+   * Name of the table that was touched.
+   */
+  val name: String
+}
+
+/**
+ * Event fired before a table is created.
+ */
+case class CreateTablePreEvent(database: String, name: String) extends TableEvent
+
+/**
+ * Event fired after a table has been created.
+ */
+case class CreateTableEvent(database: String, name: String) extends TableEvent
+
+/**
+ * Event fired before a table is dropped.
+ */
+case class DropTablePreEvent(database: String, name: String) extends TableEvent
+
+/**
+ * Event fired after a table has been dropped.
+ */
+case class DropTableEvent(database: String, name: String) extends TableEvent
+
+/**
+ * Event fired before a table is renamed.
+ */
+case class RenameTablePreEvent(
+    database: String,
+    name: String,
+    newName: String)
+  extends TableEvent
+
+/**
+ * Event fired after a table has been renamed.
+ */
+case class RenameTableEvent(
+    database: String,
+    name: String,
+    newName: String)
+  extends TableEvent
+
+/**
+ * Event fired when a function is created, dropped or renamed.
+ */
+trait FunctionEvent extends DatabaseEvent {
+  /**
+   * Name of the function that was touched.
+   */
+  val name: String
+}
+
+/**
+ * Event fired before a function is created.
+ */
+case class CreateFunctionPreEvent(database: String, name: String) extends FunctionEvent
+
+/**
+ * Event fired after a function has been created.
+ */
+case class CreateFunctionEvent(database: String, name: String) extends FunctionEvent
+
+/**
+ * Event fired before a function is dropped.
+ */
+case class DropFunctionPreEvent(database: String, name: String) extends FunctionEvent
+
+/**
+ * Event fired after a function has been dropped.
+ */
+case class DropFunctionEvent(database: String, name: String) extends FunctionEvent
+
+/**
+ * Event fired before a function is renamed.
+ */
+case class RenameFunctionPreEvent(
+    database: String,
+    name: String,
+    newName: String)
+  extends FunctionEvent
+
+/**
+ * Event fired after a function has been renamed.
+ */
+case class RenameFunctionEvent(
+    database: String,
+    name: String,
+    newName: String)
+  extends FunctionEvent
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogEventSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogEventSuite.scala
new file mode 100644
index 0000000000000..2539ea615ff92
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogEventSuite.scala
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.catalog
+
+import java.net.URI
+import java.nio.file.{Files, Path}
+
+import scala.collection.mutable
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
+import org.apache.spark.sql.types.StructType
+
+/**
+ * Test Suite for external catalog events
+ */
+class ExternalCatalogEventSuite extends SparkFunSuite {
+
+  protected def newCatalog: ExternalCatalog = new InMemoryCatalog()
+
+  private def testWithCatalog(
+      name: String)(
+      f: (ExternalCatalog, Seq[ExternalCatalogEvent] => Unit) => Unit): Unit = test(name) {
+    val catalog = newCatalog
+    val recorder = mutable.Buffer.empty[ExternalCatalogEvent]
+    catalog.addListener(new ExternalCatalogEventListener {
+      override def onEvent(event: ExternalCatalogEvent): Unit = {
+        recorder += event
+      }
+    })
+    f(catalog, (expected: Seq[ExternalCatalogEvent]) => {
+      val actual = recorder.clone()
+      recorder.clear()
+      assert(expected === actual)
+    })
+  }
+
+  private def createDbDefinition(uri: URI): CatalogDatabase = {
+    CatalogDatabase(name = "db5", description = "", locationUri = uri, Map.empty)
+  }
+
+  private def createDbDefinition(): CatalogDatabase = {
+    createDbDefinition(preparePath(Files.createTempDirectory("db_")))
+  }
+
+  private def preparePath(path: Path): URI = path.normalize().toUri
+
+  testWithCatalog("database") { (catalog, checkEvents) =>
+    // CREATE
+    val dbDefinition = createDbDefinition()
+
+    catalog.createDatabase(dbDefinition, ignoreIfExists = false)
+    checkEvents(CreateDatabasePreEvent("db5") :: CreateDatabaseEvent("db5") :: Nil)
+
+    catalog.createDatabase(dbDefinition, ignoreIfExists = true)
+    checkEvents(CreateDatabasePreEvent("db5") :: CreateDatabaseEvent("db5") :: Nil)
+
+    intercept[AnalysisException] {
+      catalog.createDatabase(dbDefinition, ignoreIfExists = false)
+    }
+    checkEvents(CreateDatabasePreEvent("db5") :: Nil)
+
+    // DROP
+    intercept[AnalysisException] {
+      catalog.dropDatabase("db4", ignoreIfNotExists = false, cascade = false)
+    }
+    checkEvents(DropDatabasePreEvent("db4") :: Nil)
+
+    catalog.dropDatabase("db5", ignoreIfNotExists = false, cascade = false)
+    checkEvents(DropDatabasePreEvent("db5") :: DropDatabaseEvent("db5") :: Nil)
+
+    catalog.dropDatabase("db4", ignoreIfNotExists = true, cascade = false)
+    checkEvents(DropDatabasePreEvent("db4") :: DropDatabaseEvent("db4") :: Nil)
+  }
+
+  testWithCatalog("table") { (catalog, checkEvents) =>
+    val path1 = Files.createTempDirectory("db_")
+    val path2 = Files.createTempDirectory(path1, "tbl_")
+    val uri1 = preparePath(path1)
+    val uri2 = preparePath(path2)
+
+    // CREATE
+    val dbDefinition = createDbDefinition(uri1)
+
+    val storage = CatalogStorageFormat.empty.copy(
+      locationUri = Option(uri2))
+    val tableDefinition = CatalogTable(
+      identifier = TableIdentifier("tbl1", Some("db5")),
+      tableType = CatalogTableType.MANAGED,
+      storage = storage,
+      schema = new StructType().add("id", "long"))
+
+    catalog.createDatabase(dbDefinition, ignoreIfExists = false)
+    checkEvents(CreateDatabasePreEvent("db5") :: CreateDatabaseEvent("db5") :: Nil)
+
+    catalog.createTable(tableDefinition, ignoreIfExists = false)
+    checkEvents(CreateTablePreEvent("db5", "tbl1") :: CreateTableEvent("db5", "tbl1") :: Nil)
+
+    catalog.createTable(tableDefinition, ignoreIfExists = true)
+    checkEvents(CreateTablePreEvent("db5", "tbl1") :: CreateTableEvent("db5", "tbl1") :: Nil)
+
+    intercept[AnalysisException] {
+      catalog.createTable(tableDefinition, ignoreIfExists = false)
+    }
+    checkEvents(CreateTablePreEvent("db5", "tbl1") :: Nil)
+
+    // RENAME
+    catalog.renameTable("db5", "tbl1", "tbl2")
+    checkEvents(
+      RenameTablePreEvent("db5", "tbl1", "tbl2") ::
+      RenameTableEvent("db5", "tbl1", "tbl2") :: Nil)
+
+    intercept[AnalysisException] {
+      catalog.renameTable("db5", "tbl1", "tbl2")
+    }
+    checkEvents(RenameTablePreEvent("db5", "tbl1", "tbl2") :: Nil)
+
+    // DROP
+    intercept[AnalysisException] {
+      catalog.dropTable("db5", "tbl1", ignoreIfNotExists = false, purge = true)
+    }
+    checkEvents(DropTablePreEvent("db5", "tbl1") :: Nil)
+
+    catalog.dropTable("db5", "tbl2", ignoreIfNotExists = false, purge = true)
+    checkEvents(DropTablePreEvent("db5", "tbl2") :: DropTableEvent("db5", "tbl2") :: Nil)
+
+    catalog.dropTable("db5", "tbl2", ignoreIfNotExists = true, purge = true)
+    checkEvents(DropTablePreEvent("db5", "tbl2") :: DropTableEvent("db5", "tbl2") :: Nil)
+  }
+
+  testWithCatalog("function") { (catalog, checkEvents) =>
+    // CREATE
+    val dbDefinition = createDbDefinition()
+
+    val functionDefinition = CatalogFunction(
+      identifier = FunctionIdentifier("fn7", Some("db5")),
+      className = "",
+      resources = Seq.empty)
+
+    val newIdentifier = functionDefinition.identifier.copy(funcName = "fn4")
+    val renamedFunctionDefinition = functionDefinition.copy(identifier = newIdentifier)
+
+    catalog.createDatabase(dbDefinition, ignoreIfExists = false)
+    checkEvents(CreateDatabasePreEvent("db5") :: CreateDatabaseEvent("db5") :: Nil)
+
+    catalog.createFunction("db5", functionDefinition)
+    checkEvents(CreateFunctionPreEvent("db5", "fn7") :: CreateFunctionEvent("db5", "fn7") :: Nil)
+
+    intercept[AnalysisException] {
+      catalog.createFunction("db5", functionDefinition)
+    }
+    checkEvents(CreateFunctionPreEvent("db5", "fn7") :: Nil)
+
+    // RENAME
+    catalog.renameFunction("db5", "fn7", "fn4")
+    checkEvents(
+      RenameFunctionPreEvent("db5", "fn7", "fn4") ::
+      RenameFunctionEvent("db5", "fn7", "fn4") :: Nil)
+    intercept[AnalysisException] {
+      catalog.renameFunction("db5", "fn7", "fn4")
+    }
+    checkEvents(RenameFunctionPreEvent("db5", "fn7", "fn4") :: Nil)
+
+    // DROP
+    intercept[AnalysisException] {
+      catalog.dropFunction("db5", "fn7")
+    }
+    checkEvents(DropFunctionPreEvent("db5", "fn7") :: Nil)
+
+    catalog.dropFunction("db5", "fn4")
+    checkEvents(DropFunctionPreEvent("db5", "fn4") :: DropFunctionEvent("db5", "fn4") :: Nil)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index d06dbaa2d0abc..f834569e59b7f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -109,6 +109,13 @@ private[sql] class SharedState(val sparkContext: SparkContext) extends Logging {
     }
   }
 
+  // Make sure we propagate external catalog events to the spark listener bus
+  externalCatalog.addListener(new ExternalCatalogEventListener {
+    override def onEvent(event: ExternalCatalogEvent): Unit = {
+      sparkContext.listenerBus.post(event)
+    }
+  })
+
   /**
    * A manager for global temporary views.
    */
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 8b0fdf49cefab..71e33c46b9aed 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -141,13 +141,13 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
   // Databases
   // --------------------------------------------------------------------------
 
-  override def createDatabase(
+  override protected def doCreateDatabase(
       dbDefinition: CatalogDatabase,
       ignoreIfExists: Boolean): Unit = withClient {
     client.createDatabase(dbDefinition, ignoreIfExists)
   }
 
-  override def dropDatabase(
+  override protected def doDropDatabase(
       db: String,
       ignoreIfNotExists: Boolean,
       cascade: Boolean): Unit = withClient {
@@ -194,7 +194,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
   // Tables
   // --------------------------------------------------------------------------
 
-  override def createTable(
+  override protected def doCreateTable(
       tableDefinition: CatalogTable,
       ignoreIfExists: Boolean): Unit = withClient {
     assert(tableDefinition.identifier.database.isDefined)
@@ -456,7 +456,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     }
   }
 
-  override def dropTable(
+  override protected def doDropTable(
       db: String,
       table: String,
       ignoreIfNotExists: Boolean,
@@ -465,7 +465,10 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     client.dropTable(db, table, ignoreIfNotExists, purge)
   }
 
-  override def renameTable(db: String, oldName: String, newName: String): Unit = withClient {
+  override protected def doRenameTable(
+      db: String,
+      oldName: String,
+      newName: String): Unit = withClient {
     val rawTable = getRawTable(db, oldName)
 
     // Note that Hive serde tables don't use path option in storage properties to store the value
@@ -1056,7 +1059,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
   // Functions
   // --------------------------------------------------------------------------
 
-  override def createFunction(
+  override protected def doCreateFunction(
       db: String,
       funcDefinition: CatalogFunction): Unit = withClient {
     requireDbExists(db)
@@ -1069,12 +1072,15 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     client.createFunction(db, funcDefinition.copy(identifier = functionIdentifier))
   }
 
-  override def dropFunction(db: String, name: String): Unit = withClient {
+  override protected def doDropFunction(db: String, name: String): Unit = withClient {
     requireFunctionExists(db, name)
     client.dropFunction(db, name)
   }
 
-  override def renameFunction(db: String, oldName: String, newName: String): Unit = withClient {
+  override protected def doRenameFunction(
+      db: String,
+      oldName: String,
+      newName: String): Unit = withClient {
     requireFunctionExists(db, oldName)
     requireFunctionNotExists(db, newName)
     client.renameFunction(db, oldName, newName)

From eb4d097c3c73d1aaf4cd9e17193a6b06ba273429 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Herv=C3=A9?= <dud225@users.noreply.github.com>
Date: Fri, 21 Apr 2017 08:52:18 +0100
Subject: [PATCH 0677/1204] Small rewording about history server use case
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Hello
PR #10991 removed the built-in history view from Spark Standalone, so the history server is no longer useful to Yarn or Mesos only.

Author: Hervé <dud225@users.noreply.github.com>

Closes #17709 from dud225/patch-1.

(cherry picked from commit 34767997e0c6cb28e1fac8cb650fa3511f260ca5)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/monitoring.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/monitoring.md b/docs/monitoring.md
index da954385dc452..3e577c5f36778 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -27,8 +27,8 @@ in the UI to persisted storage.
 
 ## Viewing After the Fact
 
-If Spark is run on Mesos or YARN, it is still possible to construct the UI of an
-application through Spark's history server, provided that the application's event logs exist.
+It is still possible to construct the UI of an application through Spark's history server, 
+provided that the application's event logs exist.
 You can start the history server by executing:
 
     ./sbin/start-history-server.sh

From fb0351a3f76b535c7132f107cc8ea94923d51fd7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Herv=C3=A9?= <dud225@users.noreply.github.com>
Date: Fri, 21 Apr 2017 08:52:18 +0100
Subject: [PATCH 0678/1204] Small rewording about history server use case
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Hello
PR #10991 removed the built-in history view from Spark Standalone, so the history server is no longer useful to Yarn or Mesos only.

Author: Hervé <dud225@users.noreply.github.com>

Closes #17709 from dud225/patch-1.

(cherry picked from commit 34767997e0c6cb28e1fac8cb650fa3511f260ca5)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/monitoring.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/monitoring.md b/docs/monitoring.md
index 077af0868a325..8583213b6725d 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -27,8 +27,8 @@ in the UI to persisted storage.
 
 ## Viewing After the Fact
 
-If Spark is run on Mesos or YARN, it is still possible to construct the UI of an
-application through Spark's history server, provided that the application's event logs exist.
+It is still possible to construct the UI of an application through Spark's history server, 
+provided that the application's event logs exist.
 You can start the history server by executing:
 
     ./sbin/start-history-server.sh

From aaeca8bdd4bbbad5a14e1030e1d7ecf4836e8a5d Mon Sep 17 00:00:00 2001
From: Juliusz Sompolski <julek@databricks.com>
Date: Fri, 21 Apr 2017 22:11:24 +0800
Subject: [PATCH 0679/1204] [SPARK-20412] Throw ParseException from
 visitNonOptionalPartitionSpec instead of returning null values.

## What changes were proposed in this pull request?

If a partitionSpec is supposed to not contain optional values, a ParseException should be thrown, and not nulls returned.
The nulls can later cause NullPointerExceptions in places not expecting them.

## How was this patch tested?

A query like "SHOW PARTITIONS tbl PARTITION(col1='val1', col2)" used to throw a NullPointerException.
Now it throws a ParseException.

Author: Juliusz Sompolski <julek@databricks.com>

Closes #17707 from juliuszsompolski/SPARK-20412.

(cherry picked from commit c9e6035e1fb825d280eaec3bdfc1e4d362897ffd)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/parser/AstBuilder.scala   |  5 ++++-
 .../sql/execution/command/DDLCommandSuite.scala  | 16 ++++++++++++----
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index e1db1ef5b8695..2cf06d15664d9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -215,7 +215,10 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
    */
   protected def visitNonOptionalPartitionSpec(
       ctx: PartitionSpecContext): Map[String, String] = withOrigin(ctx) {
-    visitPartitionSpec(ctx).mapValues(_.orNull).map(identity)
+    visitPartitionSpec(ctx).map {
+      case (key, None) => throw new ParseException(s"Found an empty partition key '$key'.", ctx)
+      case (key, Some(value)) => key -> value
+    }
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
index 97c61dc8694bc..8a6bc62fec96c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
@@ -530,13 +530,13 @@ class DDLCommandSuite extends PlanTest {
       """.stripMargin
     val sql4 =
       """
-       |ALTER TABLE table_name PARTITION (test, dt='2008-08-08',
+       |ALTER TABLE table_name PARTITION (test=1, dt='2008-08-08',
        |country='us') SET SERDE 'org.apache.class' WITH SERDEPROPERTIES ('columns'='foo,bar',
        |'field.delim' = ',')
       """.stripMargin
     val sql5 =
       """
-       |ALTER TABLE table_name PARTITION (test, dt='2008-08-08',
+       |ALTER TABLE table_name PARTITION (test=1, dt='2008-08-08',
        |country='us') SET SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')
       """.stripMargin
     val parsed1 = parser.parsePlan(sql1)
@@ -558,12 +558,12 @@ class DDLCommandSuite extends PlanTest {
       tableIdent,
       Some("org.apache.class"),
       Some(Map("columns" -> "foo,bar", "field.delim" -> ",")),
-      Some(Map("test" -> null, "dt" -> "2008-08-08", "country" -> "us")))
+      Some(Map("test" -> "1", "dt" -> "2008-08-08", "country" -> "us")))
     val expected5 = AlterTableSerDePropertiesCommand(
       tableIdent,
       None,
       Some(Map("columns" -> "foo,bar", "field.delim" -> ",")),
-      Some(Map("test" -> null, "dt" -> "2008-08-08", "country" -> "us")))
+      Some(Map("test" -> "1", "dt" -> "2008-08-08", "country" -> "us")))
     comparePlans(parsed1, expected1)
     comparePlans(parsed2, expected2)
     comparePlans(parsed3, expected3)
@@ -832,6 +832,14 @@ class DDLCommandSuite extends PlanTest {
     assert(e.contains("Found duplicate keys 'a'"))
   }
 
+  test("empty values in non-optional partition specs") {
+    val e = intercept[ParseException] {
+      parser.parsePlan(
+        "SHOW PARTITIONS dbx.tab1 PARTITION (a='1', b)")
+    }.getMessage
+    assert(e.contains("Found an empty partition key 'b'"))
+  }
+
   test("drop table") {
     val tableName1 = "db.tab"
     val tableName2 = "tab"

From adaa3f7e027338522e8a71ea40b3237d5889a30d Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Fri, 21 Apr 2017 22:25:35 +0800
Subject: [PATCH 0680/1204] [SPARK-20341][SQL] Support BigInt's value that does
 not fit in long value range

## What changes were proposed in this pull request?

This PR avoids an exception in the case where `scala.math.BigInt` has a value that does not fit into long value range (e.g. `Long.MAX_VALUE+1`). When we run the following code by using the current Spark, the following exception is thrown.

This PR keeps the value using `BigDecimal` if we detect such an overflow case by catching `ArithmeticException`.

Sample program:
```
case class BigIntWrapper(value:scala.math.BigInt)```
spark.createDataset(BigIntWrapper(scala.math.BigInt("10000000000000000002"))::Nil).show
```
Exception:
```
Error while encoding: java.lang.ArithmeticException: BigInteger out of long range
staticinvoke(class org.apache.spark.sql.types.Decimal$, DecimalType(38,0), apply, assertnotnull(assertnotnull(input[0, org.apache.spark.sql.BigIntWrapper, true])).value, true) AS value#0
java.lang.RuntimeException: Error while encoding: java.lang.ArithmeticException: BigInteger out of long range
staticinvoke(class org.apache.spark.sql.types.Decimal$, DecimalType(38,0), apply, assertnotnull(assertnotnull(input[0, org.apache.spark.sql.BigIntWrapper, true])).value, true) AS value#0
	at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.toRow(ExpressionEncoder.scala:290)
	at org.apache.spark.sql.SparkSession$$anonfun$2.apply(SparkSession.scala:454)
	at org.apache.spark.sql.SparkSession$$anonfun$2.apply(SparkSession.scala:454)
	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
	at scala.collection.immutable.List.foreach(List.scala:381)
	at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
	at scala.collection.immutable.List.map(List.scala:285)
	at org.apache.spark.sql.SparkSession.createDataset(SparkSession.scala:454)
	at org.apache.spark.sql.Agg$$anonfun$18.apply$mcV$sp(MySuite.scala:192)
	at org.apache.spark.sql.Agg$$anonfun$18.apply(MySuite.scala:192)
	at org.apache.spark.sql.Agg$$anonfun$18.apply(MySuite.scala:192)
	at org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22)
	at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85)
	at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
	at org.scalatest.Transformer.apply(Transformer.scala:22)
	at org.scalatest.Transformer.apply(Transformer.scala:20)
	at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:166)
	at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:68)
	at org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:163)
	at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175)
	at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175)
	at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306)
	at org.scalatest.FunSuiteLike$class.runTest(FunSuiteLike.scala:175)
...
Caused by: java.lang.ArithmeticException: BigInteger out of long range
	at java.math.BigInteger.longValueExact(BigInteger.java:4531)
	at org.apache.spark.sql.types.Decimal.set(Decimal.scala:140)
	at org.apache.spark.sql.types.Decimal$.apply(Decimal.scala:434)
	at org.apache.spark.sql.types.Decimal.apply(Decimal.scala)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown Source)
	at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.toRow(ExpressionEncoder.scala:287)
	... 59 more
```

## How was this patch tested?

Add new test suite into `DecimalSuite`

Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com>

Closes #17684 from kiszk/SPARK-20341.

(cherry picked from commit a750a595976791cb8a77063f690ea8f82ea75a8f)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/sql/types/Decimal.scala  | 20 +++++++++++++------
 .../apache/spark/sql/types/DecimalSuite.scala |  6 ++++++
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
index e8f6884c025c2..80916ee9c5379 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
@@ -132,14 +132,22 @@ final class Decimal extends Ordered[Decimal] with Serializable {
   }
 
   /**
-   * Set this Decimal to the given BigInteger value. Will have precision 38 and scale 0.
+   * If the value is not in the range of long, convert it to BigDecimal and
+   * the precision and scale are based on the converted value.
+   *
+   * This code avoids BigDecimal object allocation as possible to improve runtime efficiency
    */
   def set(bigintval: BigInteger): Decimal = {
-    this.decimalVal = null
-    this.longVal = bigintval.longValueExact()
-    this._precision = DecimalType.MAX_PRECISION
-    this._scale = 0
-    this
+    try {
+      this.decimalVal = null
+      this.longVal = bigintval.longValueExact()
+      this._precision = DecimalType.MAX_PRECISION
+      this._scale = 0
+      this
+    } catch {
+      case _: ArithmeticException =>
+        set(BigDecimal(bigintval))
+    }
   }
 
   /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala
index 714883a4099cf..93c231e30b49b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala
@@ -212,4 +212,10 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester {
       }
     }
   }
+
+  test("SPARK-20341: support BigInt's value does not fit in long value range") {
+    val bigInt = scala.math.BigInt("9223372036854775808")
+    val decimal = Decimal.apply(bigInt)
+    assert(decimal.toJavaBigDecimal.unscaledValue.toString === "9223372036854775808")
+  }
 }

From ff1f989f29c08bb5297f3aa35f30ff06e0cb8046 Mon Sep 17 00:00:00 2001
From: WeichenXu <WeichenXu123@outlook.com>
Date: Fri, 21 Apr 2017 17:58:13 +0000
Subject: [PATCH 0681/1204] [SPARK-20423][ML] fix MLOR coeffs centering when
 reg == 0

## What changes were proposed in this pull request?

When reg == 0, MLOR has multiple solutions and we need to centralize the coeffs to get identical result.
BUT current implementation centralize the `coefficientMatrix` by the global coeffs means.

In fact the `coefficientMatrix` should be centralized on each feature index itself.
Because, according to the MLOR probability distribution function, it can be proven easily that:
suppose `{ w0, w1, .. w(K-1) }` make up the `coefficientMatrix`,
then `{ w0 + c, w1 + c, ... w(K - 1) + c}` will also be the equivalent solution.
`c` is an arbitrary vector of `numFeatures` dimension.
reference
https://core.ac.uk/download/pdf/6287975.pdf

So that we need to centralize the `coefficientMatrix` on each feature dimension separately.

**We can also confirm this through R library `glmnet`, that MLOR in `glmnet` always generate coefficients result that the sum of each dimension is all `zero`, when reg == 0.**

## How was this patch tested?

Tests added.

Author: WeichenXu <WeichenXu123@outlook.com>

Closes #17706 from WeichenXu123/mlor_center.

(cherry picked from commit eb00378f0eed6afbf328ae6cd541cc202d14c1f0)
Signed-off-by: DB Tsai <dbtsai@dbtsai.com>
---
 .../spark/ml/classification/LogisticRegression.scala  | 11 ++++++++---
 .../ml/classification/LogisticRegressionSuite.scala   |  6 ++++++
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 965ce3d6f275f..bc8154692e52c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -609,9 +609,14 @@ class LogisticRegression @Since("1.2.0") (
             Friedman, et al. "Regularization Paths for Generalized Linear Models via
               Coordinate Descent," https://core.ac.uk/download/files/153/6287975.pdf
            */
-          val denseValues = denseCoefficientMatrix.values
-          val coefficientMean = denseValues.sum / denseValues.length
-          denseCoefficientMatrix.update(_ - coefficientMean)
+          val centers = Array.fill(numFeatures)(0.0)
+          denseCoefficientMatrix.foreachActive { case (i, j, v) =>
+            centers(j) += v
+          }
+          centers.transform(_ / numCoefficientSets)
+          denseCoefficientMatrix.foreachActive { case (i, j, v) =>
+            denseCoefficientMatrix.update(i, j, v - centers(j))
+          }
         }
 
         // center the intercepts when using multinomial algorithm
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index c858b9bbfc256..83f575e83828f 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -1139,6 +1139,9 @@ class LogisticRegressionSuite
       0.10095851, -0.85897154, 0.08392798, 0.07904499), isTransposed = true)
     val interceptsR = Vectors.dense(-2.10320093, 0.3394473, 1.76375361)
 
+    model1.coefficientMatrix.colIter.foreach(v => assert(v.toArray.sum ~== 0.0 absTol eps))
+    model2.coefficientMatrix.colIter.foreach(v => assert(v.toArray.sum ~== 0.0 absTol eps))
+
     assert(model1.coefficientMatrix ~== coefficientsR relTol 0.05)
     assert(model1.coefficientMatrix.toArray.sum ~== 0.0 absTol eps)
     assert(model1.interceptVector ~== interceptsR relTol 0.05)
@@ -1204,6 +1207,9 @@ class LogisticRegressionSuite
       -0.3180040, 0.9679074, -0.2252219, -0.4319914,
       0.2452411, -0.6046524, 0.1050710, 0.1180180), isTransposed = true)
 
+    model1.coefficientMatrix.colIter.foreach(v => assert(v.toArray.sum ~== 0.0 absTol eps))
+    model2.coefficientMatrix.colIter.foreach(v => assert(v.toArray.sum ~== 0.0 absTol eps))
+
     assert(model1.coefficientMatrix ~== coefficientsR relTol 0.05)
     assert(model1.coefficientMatrix.toArray.sum ~== 0.0 absTol eps)
     assert(model1.interceptVector.toArray === Array.fill(3)(0.0))

From 6c2489c66682fdc6a886346ed980d95e6e5eefde Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=83=AD=E5=B0=8F=E9=BE=99=2010207633?=
 <guo.xiaolong1@zte.com.cn>
Date: Fri, 21 Apr 2017 20:08:26 +0100
Subject: [PATCH 0682/1204] [SPARK-20401][DOC] In the spark official
 configuration document, the 'spark.driver.supervise' configuration parameter
 specification and default values are necessary.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?
Use the REST interface submits the spark job.
e.g.
curl -X  POST http://10.43.183.120:6066/v1/submissions/create --header "Content-Type:application/json;charset=UTF-8" --data'{
    "action": "CreateSubmissionRequest",
    "appArgs": [
        "myAppArgument"
    ],
    "appResource": "/home/mr/gxl/test.jar",
    "clientSparkVersion": "2.2.0",
    "environmentVariables": {
        "SPARK_ENV_LOADED": "1"
    },
    "mainClass": "cn.zte.HdfsTest",
    "sparkProperties": {
        "spark.jars": "/home/mr/gxl/test.jar",
        **"spark.driver.supervise": "true",**
        "spark.app.name": "HdfsTest",
        "spark.eventLog.enabled": "false",
        "spark.submit.deployMode": "cluster",
        "spark.master": "spark://10.43.183.120:6066"
    }
}'

**I hope that make sure that the driver is automatically restarted if it fails with non-zero exit code.
But I can not find the 'spark.driver.supervise' configuration parameter specification and default values from the spark official document.**
## How was this patch tested?

manual tests

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: 郭小龙 10207633 <guo.xiaolong1@zte.com.cn>
Author: guoxiaolong <guo.xiaolong1@zte.com.cn>
Author: guoxiaolongzte <guo.xiaolong1@zte.com.cn>

Closes #17696 from guoxiaolongzte/SPARK-20401.

(cherry picked from commit ad290402aa1d609abf5a2883a6d87fa8bc2bd517)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/configuration.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/docs/configuration.md b/docs/configuration.md
index 2687f542b8bd3..6b65d2bcb83e5 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -213,6 +213,14 @@ of the most common options to set are:
     and typically can have up to 50 characters.
   </td>
 </tr>
+<tr>
+  <td><code>spark.driver.supervise</code></td>
+  <td>false</td>
+  <td>
+    If true, restarts the driver automatically if it fails with a non-zero exit status.
+    Only has effect in Spark standalone mode or Mesos cluster deploy mode.
+  </td>
+</tr>
 </table>
 
 Apart from these, the following properties are also available, and may be useful in some situations:

From d68e0a3a5ec39a3cb4358aacfc2bd1c5d783e51e Mon Sep 17 00:00:00 2001
From: eatoncys <chen.yanshan@zte.com.cn>
Date: Sat, 22 Apr 2017 12:29:35 +0100
Subject: [PATCH 0683/1204] [SPARK-20386][SPARK CORE] modify the log info if
 the block exists on the slave already

## What changes were proposed in this pull request?
Modify the added memory size to memSize-originalMemSize if the  block exists on the slave already
since if the  block exists, the added memory size should be memSize-originalMemSize; if originalMemSize is bigger than memSize ,then the log info should be Removed memory, removed size should be originalMemSize-memSize

## How was this patch tested?
Multiple runs on existing unit tests

(Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests)
(If this patch involves UI changes, please attach a screenshot; otherwise, remove this)

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: eatoncys <chen.yanshan@zte.com.cn>

Closes #17683 from eatoncys/SPARK-20386.

(cherry picked from commit 05a451491d535c0828413ce2eb06fe94571069ac)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../storage/BlockManagerMasterEndpoint.scala  | 52 +++++++++++++------
 1 file changed, 35 insertions(+), 17 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
index 467c3e0e6b51f..6f85b9e4d6c73 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
@@ -497,11 +497,17 @@ private[spark] class BlockManagerInfo(
 
     updateLastSeenMs()
 
-    if (_blocks.containsKey(blockId)) {
+    val blockExists = _blocks.containsKey(blockId)
+    var originalMemSize: Long = 0
+    var originalDiskSize: Long = 0
+    var originalLevel: StorageLevel = StorageLevel.NONE
+
+    if (blockExists) {
       // The block exists on the slave already.
       val blockStatus: BlockStatus = _blocks.get(blockId)
-      val originalLevel: StorageLevel = blockStatus.storageLevel
-      val originalMemSize: Long = blockStatus.memSize
+      originalLevel = blockStatus.storageLevel
+      originalMemSize = blockStatus.memSize
+      originalDiskSize = blockStatus.diskSize
 
       if (originalLevel.useMemory) {
         _remainingMem += originalMemSize
@@ -520,32 +526,44 @@ private[spark] class BlockManagerInfo(
         blockStatus = BlockStatus(storageLevel, memSize = memSize, diskSize = 0)
         _blocks.put(blockId, blockStatus)
         _remainingMem -= memSize
-        logInfo("Added %s in memory on %s (size: %s, free: %s)".format(
-          blockId, blockManagerId.hostPort, Utils.bytesToString(memSize),
-          Utils.bytesToString(_remainingMem)))
+        if (blockExists) {
+          logInfo(s"Updated $blockId in memory on ${blockManagerId.hostPort}" +
+            s" (current size: ${Utils.bytesToString(memSize)}," +
+            s" original size: ${Utils.bytesToString(originalMemSize)}," +
+            s" free: ${Utils.bytesToString(_remainingMem)})")
+        } else {
+          logInfo(s"Added $blockId in memory on ${blockManagerId.hostPort}" +
+            s" (size: ${Utils.bytesToString(memSize)}," +
+            s" free: ${Utils.bytesToString(_remainingMem)})")
+        }
       }
       if (storageLevel.useDisk) {
         blockStatus = BlockStatus(storageLevel, memSize = 0, diskSize = diskSize)
         _blocks.put(blockId, blockStatus)
-        logInfo("Added %s on disk on %s (size: %s)".format(
-          blockId, blockManagerId.hostPort, Utils.bytesToString(diskSize)))
+        if (blockExists) {
+          logInfo(s"Updated $blockId on disk on ${blockManagerId.hostPort}" +
+            s" (current size: ${Utils.bytesToString(diskSize)}," +
+            s" original size: ${Utils.bytesToString(originalDiskSize)})")
+        } else {
+          logInfo(s"Added $blockId on disk on ${blockManagerId.hostPort}" +
+            s" (size: ${Utils.bytesToString(diskSize)})")
+        }
       }
       if (!blockId.isBroadcast && blockStatus.isCached) {
         _cachedBlocks += blockId
       }
-    } else if (_blocks.containsKey(blockId)) {
+    } else if (blockExists) {
       // If isValid is not true, drop the block.
-      val blockStatus: BlockStatus = _blocks.get(blockId)
       _blocks.remove(blockId)
       _cachedBlocks -= blockId
-      if (blockStatus.storageLevel.useMemory) {
-        logInfo("Removed %s on %s in memory (size: %s, free: %s)".format(
-          blockId, blockManagerId.hostPort, Utils.bytesToString(blockStatus.memSize),
-          Utils.bytesToString(_remainingMem)))
+      if (originalLevel.useMemory) {
+        logInfo(s"Removed $blockId on ${blockManagerId.hostPort} in memory" +
+          s" (size: ${Utils.bytesToString(originalMemSize)}," +
+          s" free: ${Utils.bytesToString(_remainingMem)})")
       }
-      if (blockStatus.storageLevel.useDisk) {
-        logInfo("Removed %s on %s on disk (size: %s)".format(
-          blockId, blockManagerId.hostPort, Utils.bytesToString(blockStatus.diskSize)))
+      if (originalLevel.useDisk) {
+        logInfo(s"Removed $blockId on ${blockManagerId.hostPort} on disk" +
+          s" (size: ${Utils.bytesToString(originalDiskSize)})")
       }
     }
   }

From 807c718925dc4105b0eb176dd5f515a85b8047a2 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Sat, 22 Apr 2017 09:41:58 -0700
Subject: [PATCH 0684/1204] [SPARK-20430][SQL] Initialise RangeExec parameters
 in a driver side

## What changes were proposed in this pull request?
This pr initialised `RangeExec` parameters in a driver side.
In the current master, a query below throws `NullPointerException`;
```
sql("SET spark.sql.codegen.wholeStage=false")
sql("SELECT * FROM range(1)").show

17/04/20 17:11:05 ERROR Executor: Exception in task 0.0 in stage 0.0 (TID 0)
java.lang.NullPointerException
        at org.apache.spark.sql.execution.SparkPlan.sparkContext(SparkPlan.scala:54)
        at org.apache.spark.sql.execution.RangeExec.numSlices(basicPhysicalOperators.scala:343)
        at org.apache.spark.sql.execution.RangeExec$$anonfun$20.apply(basicPhysicalOperators.scala:506)
        at org.apache.spark.sql.execution.RangeExec$$anonfun$20.apply(basicPhysicalOperators.scala:505)
        at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$26.apply(RDD.scala:844)
        at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$26.apply(RDD.scala:844)
        at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
        at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
        at org.apache.spark.scheduler.Task.run(Task.scala:108)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:320)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
```

## How was this patch tested?
Added a test in `DataFrameRangeSuite`.

Author: Takeshi Yamamuro <yamamuro@apache.org>

Closes #17717 from maropu/SPARK-20430.

(cherry picked from commit b3c572a6b332b79fef72c309b9038b3c939dcba2)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../spark/sql/execution/basicPhysicalOperators.scala   | 10 +++++-----
 .../org/apache/spark/sql/DataFrameRangeSuite.scala     |  6 ++++++
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index d3efa428a6db8..64698d5527578 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -331,11 +331,11 @@ case class SampleExec(
 case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range)
   extends LeafExecNode with CodegenSupport {
 
-  def start: Long = range.start
-  def end: Long = range.end
-  def step: Long = range.step
-  def numSlices: Int = range.numSlices.getOrElse(sparkContext.defaultParallelism)
-  def numElements: BigInt = range.numElements
+  val start: Long = range.start
+  val end: Long = range.end
+  val step: Long = range.step
+  val numSlices: Int = range.numSlices.getOrElse(sparkContext.defaultParallelism)
+  val numElements: BigInt = range.numElements
 
   override val output: Seq[Attribute] = range.output
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameRangeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameRangeSuite.scala
index 5e323c02b253d..7b495656b93d7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameRangeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameRangeSuite.scala
@@ -185,6 +185,12 @@ class DataFrameRangeSuite extends QueryTest with SharedSQLContext with Eventuall
       }
     }
   }
+
+  test("SPARK-20430 Initialize Range parameters in a driver side") {
+    withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false") {
+      checkAnswer(sql("SELECT * FROM range(3)"), Row(0) :: Row(1) :: Row(2) :: Nil)
+    }
+  }
 }
 
 object DataFrameRangeSuite {

From ba505805dcf17d7964ec9df7e76489bfc162949a Mon Sep 17 00:00:00 2001
From: Bogdan Raducanu <bogdan@databricks.com>
Date: Sat, 22 Apr 2017 09:58:07 -0700
Subject: [PATCH 0685/1204] [SPARK-20407][TESTS][BACKPORT-2.1]
 ParquetQuerySuite 'Enabling/disabling ignoreCorruptFiles' flaky test

## What changes were proposed in this pull request?

SharedSQLContext.afterEach now calls DebugFilesystem.assertNoOpenStreams inside eventually.
SQLTestUtils withTempDir calls waitForTasksToFinish before deleting the directory.

## How was this patch tested?
New test but marked as ignored because it takes 30s. Can be unignored for review.

Author: Bogdan Raducanu <bogdan@databricks.com>

Closes #17720 from bogdanrdc/SPARK-20407-BACKPORT2.1.
---
 .../parquet/ParquetQuerySuite.scala           | 35 ++++++++++++++++++-
 .../apache/spark/sql/test/SQLTestUtils.scala  | 19 ++++++++--
 .../spark/sql/test/SharedSQLContext.scala     | 11 ++++--
 3 files changed, 60 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
index 613237672492b..6033c66e2ee5f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@@ -22,7 +22,7 @@ import java.io.File
 import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.parquet.hadoop.ParquetOutputFormat
 
-import org.apache.spark.SparkException
+import org.apache.spark.{DebugFilesystem, SparkException}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
 import org.apache.spark.sql.catalyst.expressions.SpecificInternalRow
@@ -242,6 +242,39 @@ class ParquetQuerySuite extends QueryTest with ParquetTest with SharedSQLContext
     }
   }
 
+  /**
+   * this is part of test 'Enabling/disabling ignoreCorruptFiles' but run in a loop
+   * to increase the chance of failure
+    */
+  ignore("SPARK-20407 ParquetQuerySuite 'Enabling/disabling ignoreCorruptFiles' flaky test") {
+    def testIgnoreCorruptFiles(): Unit = {
+      withTempDir { dir =>
+        val basePath = dir.getCanonicalPath
+        spark.range(1).toDF("a").write.parquet(new Path(basePath, "first").toString)
+        spark.range(1, 2).toDF("a").write.parquet(new Path(basePath, "second").toString)
+        spark.range(2, 3).toDF("a").write.json(new Path(basePath, "third").toString)
+        val df = spark.read.parquet(
+          new Path(basePath, "first").toString,
+          new Path(basePath, "second").toString,
+          new Path(basePath, "third").toString)
+        checkAnswer(
+          df,
+          Seq(Row(0), Row(1)))
+      }
+    }
+
+    for (i <- 1 to 100) {
+      DebugFilesystem.clearOpenStreams()
+      withSQLConf(SQLConf.IGNORE_CORRUPT_FILES.key -> "false") {
+        val exception = intercept[SparkException] {
+          testIgnoreCorruptFiles()
+        }
+        assert(exception.getMessage().contains("is not a Parquet file"))
+      }
+      DebugFilesystem.assertNoOpenStreams()
+    }
+  }
+
   test("SPARK-8990 DataFrameReader.parquet() should respect user specified options") {
     withTempPath { dir =>
       val basePath = dir.getCanonicalPath
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
index d4afb9d8af6f8..24ba0f571eef5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
@@ -20,12 +20,14 @@ package org.apache.spark.sql.test
 import java.io.File
 import java.util.UUID
 
+import scala.concurrent.duration._
 import scala.language.implicitConversions
 import scala.util.Try
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.conf.Configuration
 import org.scalatest.BeforeAndAfterAll
+import org.scalatest.concurrent.Eventually
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql._
@@ -48,7 +50,7 @@ import org.apache.spark.util.{UninterruptibleThread, Utils}
  * prone to leaving multiple overlapping [[org.apache.spark.SparkContext]]s in the same JVM.
  */
 private[sql] trait SQLTestUtils
-  extends SparkFunSuite
+  extends SparkFunSuite with Eventually
   with BeforeAndAfterAll
   with SQLTestData { self =>
 
@@ -122,6 +124,15 @@ private[sql] trait SQLTestUtils
     try f(path) finally Utils.deleteRecursively(path)
   }
 
+  /**
+   * Waits for all tasks on all executors to be finished.
+   */
+  protected def waitForTasksToFinish(): Unit = {
+    eventually(timeout(10.seconds)) {
+      assert(spark.sparkContext.statusTracker
+        .getExecutorInfos.map(_.numRunningTasks()).sum == 0)
+    }
+  }
   /**
    * Creates a temporary directory, which is then passed to `f` and will be deleted after `f`
    * returns.
@@ -130,7 +141,11 @@ private[sql] trait SQLTestUtils
    */
   protected def withTempDir(f: File => Unit): Unit = {
     val dir = Utils.createTempDir().getCanonicalFile
-    try f(dir) finally Utils.deleteRecursively(dir)
+    try f(dir) finally {
+      // wait for all tasks to finish before deleting files
+      waitForTasksToFinish()
+      Utils.deleteRecursively(dir)
+    }
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala
index 2239f10870eda..243845dfba60e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala
@@ -17,7 +17,10 @@
 
 package org.apache.spark.sql.test
 
+import scala.concurrent.duration._
+
 import org.scalatest.BeforeAndAfterEach
+import org.scalatest.concurrent.Eventually
 
 import org.apache.spark.{DebugFilesystem, SparkConf}
 import org.apache.spark.sql.{SparkSession, SQLContext}
@@ -26,7 +29,7 @@ import org.apache.spark.sql.{SparkSession, SQLContext}
 /**
  * Helper trait for SQL test suites where all tests share a single [[TestSparkSession]].
  */
-trait SharedSQLContext extends SQLTestUtils with BeforeAndAfterEach {
+trait SharedSQLContext extends SQLTestUtils with BeforeAndAfterEach with Eventually {
 
   protected val sparkConf = new SparkConf()
 
@@ -86,6 +89,10 @@ trait SharedSQLContext extends SQLTestUtils with BeforeAndAfterEach {
 
   protected override def afterEach(): Unit = {
     super.afterEach()
-    DebugFilesystem.assertNoOpenStreams()
+    // files can be closed from other threads, so wait a bit
+    // normally this doesn't take more than 1s
+    eventually(timeout(10.seconds)) {
+      DebugFilesystem.assertNoOpenStreams()
+    }
   }
 }

From cad33a7301f6e0b40b88789f0a96f9cc7ebf9d6e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=83=AD=E5=B0=8F=E9=BE=99=2010207633?=
 <guo.xiaolong1@zte.com.cn>
Date: Sun, 23 Apr 2017 13:33:14 +0100
Subject: [PATCH 0686/1204] [SPARK-20385][WEB-UI] Submitted Time' field, the
 date format needs to be formatted, in running Drivers table or Completed
 Drivers table in master web ui.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?
Submitted Time' field, the date format **needs to be formatted**, in running Drivers table or Completed Drivers table in master web ui.
Before fix this problem  e.g.

Completed Drivers
Submission ID	             **Submitted Time**  	             Worker	                            State	   Cores	   Memory	       Main Class
driver-20170419145755-0005	 **Wed Apr 19 14:57:55 CST 2017**	 worker-20170419145250-zdh120-40412	FAILED	   1	       1024.0 MB	   cn.zte.HdfsTest

please see the  attachment:https://issues.apache.org/jira/secure/attachment/12863977/before_fix.png

After fix this problem e.g.

Completed Drivers
Submission ID	             **Submitted Time**  	             Worker	                            State	   Cores	   Memory	       Main Class
driver-20170419145755-0006	 **2017/04/19 16:01:25**	 worker-20170419145250-zdh120-40412	         FAILED	   1	       1024.0 MB	   cn.zte.HdfsTest

please see the  attachment:https://issues.apache.org/jira/secure/attachment/12863976/after_fix.png

'Submitted Time' field, the date format **has been formatted**, in running Applications table or Completed Applicationstable in master web ui, **it is correct.**
e.g.
Running Applications
Application ID	                Name	                Cores	Memory per Executor	   **Submitted Time**	      User	   State	        Duration
app-20170419160910-0000 (kill)	SparkSQL::10.43.183.120	1	    5.0 GB	               **2017/04/19 16:09:10**	  root	   RUNNING	    53 s

**Format after the time easier to observe, and consistent with the applications table,so I think it's worth fixing.**

## How was this patch tested?

(Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests)
(If this patch involves UI changes, please attach a screenshot; otherwise, remove this)

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: 郭小龙 10207633 <guo.xiaolong1@zte.com.cn>
Author: guoxiaolong <guo.xiaolong1@zte.com.cn>
Author: guoxiaolongzte <guo.xiaolong1@zte.com.cn>

Closes #17682 from guoxiaolongzte/SPARK-20385.

(cherry picked from commit 2eaf4f3fe3595ae341a3a5ce886b859992dea5b2)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../apache/spark/deploy/master/ui/ApplicationPage.scala   | 2 +-
 .../org/apache/spark/deploy/master/ui/MasterPage.scala    | 2 +-
 .../org/apache/spark/deploy/mesos/ui/DriverPage.scala     | 4 ++--
 .../apache/spark/deploy/mesos/ui/MesosClusterPage.scala   | 8 ++++----
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
index 946a92882141c..a8d721f3e0d49 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
@@ -83,7 +83,7 @@ private[ui] class ApplicationPage(parent: MasterWebUI) extends WebUIPage("app")
               <strong>Executor Memory:</strong>
               {Utils.megabytesToString(app.desc.memoryPerExecutorMB)}
             </li>
-            <li><strong>Submit Date:</strong> {app.submitDate}</li>
+            <li><strong>Submit Date:</strong> {UIUtils.formatDate(app.submitDate)}</li>
             <li><strong>State:</strong> {app.state}</li>
             {
               if (!app.isFinished) {
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
index e722a24d4a89e..9351c72094e34 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
@@ -252,7 +252,7 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
     }
     <tr>
       <td>{driver.id} {killLink}</td>
-      <td>{driver.submitDate}</td>
+      <td>{UIUtils.formatDate(driver.submitDate)}</td>
       <td>{driver.worker.map(w =>
         if (w.isAlive()) {
           <a href={UIUtils.makeHref(parent.master.reverseProxy, w.id, w.webUiAddress)}>
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala
index cd98110ddcc02..127fadabcce53 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala
@@ -101,7 +101,7 @@ private[ui] class DriverPage(parent: MesosClusterUI) extends WebUIPage("driver")
       </tr>
       <tr>
         <td>Launch Time</td>
-        <td>{state.startDate}</td>
+        <td>{UIUtils.formatDate(state.startDate)}</td>
       </tr>
       <tr>
         <td>Finish Time</td>
@@ -154,7 +154,7 @@ private[ui] class DriverPage(parent: MesosClusterUI) extends WebUIPage("driver")
       <td>Memory</td><td>{driver.mem}</td>
     </tr>
     <tr>
-      <td>Submitted</td><td>{driver.submissionDate}</td>
+      <td>Submitted</td><td>{UIUtils.formatDate(driver.submissionDate)}</td>
     </tr>
     <tr>
       <td>Supervise</td><td>{driver.supervise}</td>
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterPage.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterPage.scala
index 13ba7d311e57d..c9107c3e73d3f 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterPage.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterPage.scala
@@ -68,7 +68,7 @@ private[mesos] class MesosClusterPage(parent: MesosClusterUI) extends WebUIPage(
     val id = submission.submissionId
     <tr>
       <td><a href={s"driver?id=$id"}>{id}</a></td>
-      <td>{submission.submissionDate}</td>
+      <td>{UIUtils.formatDate(submission.submissionDate)}</td>
       <td>{submission.command.mainClass}</td>
       <td>cpus: {submission.cores}, mem: {submission.mem}</td>
     </tr>
@@ -88,10 +88,10 @@ private[mesos] class MesosClusterPage(parent: MesosClusterUI) extends WebUIPage(
     <tr>
       <td><a href={s"driver?id=$id"}>{id}</a></td>
       {historyCol}
-      <td>{state.driverDescription.submissionDate}</td>
+      <td>{UIUtils.formatDate(state.driverDescription.submissionDate)}</td>
       <td>{state.driverDescription.command.mainClass}</td>
       <td>cpus: {state.driverDescription.cores}, mem: {state.driverDescription.mem}</td>
-      <td>{state.startDate}</td>
+      <td>{UIUtils.formatDate(state.startDate)}</td>
       <td>{state.slaveId.getValue}</td>
       <td>{stateString(state.mesosTaskStatus)}</td>
     </tr>
@@ -101,7 +101,7 @@ private[mesos] class MesosClusterPage(parent: MesosClusterUI) extends WebUIPage(
     val id = submission.submissionId
     <tr>
       <td><a href={s"driver?id=$id"}>{id}</a></td>
-      <td>{submission.submissionDate}</td>
+      <td>{UIUtils.formatDate(submission.submissionDate)}</td>
       <td>{submission.command.mainClass}</td>
       <td>{submission.retryState.get.lastFailureStatus}</td>
       <td>{submission.retryState.get.nextRetry}</td>

From 2bef01f64b832a94a52c64aba0aecbbb0e7a4003 Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Mon, 24 Apr 2017 17:21:42 +0800
Subject: [PATCH 0687/1204] [SPARK-20439][SQL] Fix Catalog API listTables and
 getTable when failed to fetch table metadata

### What changes were proposed in this pull request?

`spark.catalog.listTables` and `spark.catalog.getTable` does not work if we are unable to retrieve table metadata due to any reason (e.g., table serde class is not accessible or the table type is not accepted by Spark SQL). After this PR, the APIs still return the corresponding Table without the description and tableType)

### How was this patch tested?
Added a test case

Author: Xiao Li <gatorsmile@gmail.com>

Closes #17730 from gatorsmile/listTables.

(cherry picked from commit 776a2c0e91dfea170ea1c489118e1d42c4121f35)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/internal/CatalogImpl.scala      | 28 +++++++++++++++----
 .../sql/hive/execution/HiveDDLSuite.scala     |  8 ++++++
 2 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
index aebb663df5c92..0b8e53868c999 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.internal
 
 import scala.reflect.runtime.universe.TypeTag
+import scala.util.control.NonFatal
 
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.sql._
@@ -98,14 +99,27 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
     CatalogImpl.makeDataset(tables, sparkSession)
   }
 
+  /**
+   * Returns a Table for the given table/view or temporary view.
+   *
+   * Note that this function requires the table already exists in the Catalog.
+   *
+   * If the table metadata retrieval failed due to any reason (e.g., table serde class
+   * is not accessible or the table type is not accepted by Spark SQL), this function
+   * still returns the corresponding Table without the description and tableType)
+   */
   private def makeTable(tableIdent: TableIdentifier): Table = {
-    val metadata = sessionCatalog.getTempViewOrPermanentTableMetadata(tableIdent)
+    val metadata = try {
+      Some(sessionCatalog.getTempViewOrPermanentTableMetadata(tableIdent))
+    } catch {
+      case NonFatal(_) => None
+    }
     val isTemp = sessionCatalog.isTemporaryTable(tableIdent)
     new Table(
       name = tableIdent.table,
-      database = metadata.identifier.database.orNull,
-      description = metadata.comment.orNull,
-      tableType = if (isTemp) "TEMPORARY" else metadata.tableType.name,
+      database = metadata.map(_.identifier.database).getOrElse(tableIdent.database).orNull,
+      description = metadata.map(_.comment.orNull).orNull,
+      tableType = if (isTemp) "TEMPORARY" else metadata.map(_.tableType.name).orNull,
       isTemporary = isTemp)
   }
 
@@ -197,7 +211,11 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
    * `AnalysisException` when no `Table` can be found.
    */
   override def getTable(dbName: String, tableName: String): Table = {
-    makeTable(TableIdentifier(tableName, Option(dbName)))
+    if (tableExists(dbName, tableName)) {
+      makeTable(TableIdentifier(tableName, Option(dbName)))
+    } else {
+      throw new AnalysisException(s"Table or view '$tableName' not found in database '$dbName'")
+    }
   }
 
   /**
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 3906968aaff10..16a99321bad33 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -1197,6 +1197,14 @@ class HiveDDLSuite
           s"CREATE INDEX $indexName ON TABLE $tabName (a) AS 'COMPACT' WITH DEFERRED REBUILD")
         val indexTabName =
           spark.sessionState.catalog.listTables("default", s"*$indexName*").head.table
+
+        // Even if index tables exist, listTables and getTable APIs should still work
+        checkAnswer(
+          spark.catalog.listTables().toDF(),
+          Row(indexTabName, "default", null, null, false) ::
+            Row(tabName, "default", null, "MANAGED", false) :: Nil)
+        assert(spark.catalog.getTable("default", indexTabName).name === indexTabName)
+
         intercept[TableAlreadyExistsException] {
           sql(s"CREATE TABLE $indexTabName(b int)")
         }

From cf16c3250e946c4f89edc999d8764e8fa3dfb056 Mon Sep 17 00:00:00 2001
From: "wm624@hotmail.com" <wm624@hotmail.com>
Date: Mon, 24 Apr 2017 23:43:06 +0800
Subject: [PATCH 0688/1204] [SPARK-18901][ML] Require in LR LogisticAggregator
 is redundant

## What changes were proposed in this pull request?

In MultivariateOnlineSummarizer,

`add` and `merge` have check for weights and feature sizes. The checks in LR are redundant, which are removed from this PR.

## How was this patch tested?

Existing tests.

Author: wm624@hotmail.com <wm624@hotmail.com>

Closes #17478 from wangmiao1981/logit.

(cherry picked from commit 90264aced7cfdf265636517b91e5d1324fe60112)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 .../apache/spark/ml/classification/LogisticRegression.scala  | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index bc8154692e52c..44b3478e0c3dd 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -1571,9 +1571,6 @@ private class LogisticAggregator(
    */
   def add(instance: Instance): this.type = {
     instance match { case Instance(label, weight, features) =>
-      require(numFeatures == features.size, s"Dimensions mismatch when adding new instance." +
-        s" Expecting $numFeatures but got ${features.size}.")
-      require(weight >= 0.0, s"instance weight, $weight has to be >= 0.0")
 
       if (weight == 0.0) return this
 
@@ -1596,8 +1593,6 @@ private class LogisticAggregator(
    * @return This LogisticAggregator object.
    */
   def merge(other: LogisticAggregator): this.type = {
-    require(numFeatures == other.numFeatures, s"Dimensions mismatch when merging with another " +
-      s"LogisticAggregator. Expecting $numFeatures but got ${other.numFeatures}.")
 
     if (other.weightSum != 0.0) {
       weightSum += other.weightSum

From d99b49b11a44ba13d126caf3e6e086f5b5b04827 Mon Sep 17 00:00:00 2001
From: Eric Liang <ekl@databricks.com>
Date: Tue, 25 Apr 2017 00:33:09 +0200
Subject: [PATCH 0689/1204] [SPARK-20450][SQL] Unexpected first-query schema
 inference cost with 2.1.1

## What changes were proposed in this pull request?

https://issues.apache.org/jira/browse/SPARK-19611 fixes a regression from 2.0 where Spark silently fails to read case-sensitive fields missing a case-sensitive schema in the table properties. The fix is to detect this situation, infer the schema, and write the case-sensitive schema into the metastore.

However this can incur an unexpected performance hit the first time such a problematic table is queried (and there is a high false-positive rate here since most tables don't actually have case-sensitive fields).

This PR changes the default to NEVER_INFER (same behavior as 2.1.0). In 2.2, we can consider leaving the default to INFER_AND_SAVE.

## How was this patch tested?

Unit tests.

Author: Eric Liang <ekl@databricks.com>

Closes #17749 from ericl/spark-20450.
---
 .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index ad5b103e49de6..5926bb060d7af 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -298,7 +298,7 @@ object SQLConf {
     .stringConf
     .transform(_.toUpperCase())
     .checkValues(HiveCaseSensitiveInferenceMode.values.map(_.toString))
-    .createWithDefault(HiveCaseSensitiveInferenceMode.INFER_AND_SAVE.toString)
+    .createWithDefault(HiveCaseSensitiveInferenceMode.NEVER_INFER.toString)
 
   val OPTIMIZER_METADATA_ONLY = SQLConfigBuilder("spark.sql.optimizer.metadataOnly")
     .doc("When true, enable the metadata-only query optimization that use the table's metadata " +

From 30149d54cf4eadc843d7c64f3d0b52c21a3f5dda Mon Sep 17 00:00:00 2001
From: jerryshao <sshao@hortonworks.com>
Date: Mon, 24 Apr 2017 18:18:59 -0700
Subject: [PATCH 0690/1204] [SPARK-20239][CORE] Improve HistoryServer's ACL
 mechanism

## What changes were proposed in this pull request?

Current SHS (Spark History Server) two different ACLs:

* ACL of base URL, it is controlled by "spark.acls.enabled" or "spark.ui.acls.enabled", and with this enabled, only user configured with "spark.admin.acls" (or group) or "spark.ui.view.acls" (or group), or the user who started SHS could list all the applications, otherwise none of them can be listed. This will also affect REST APIs which listing the summary of all apps and one app.
* Per application ACL. This is controlled by "spark.history.ui.acls.enabled". With this enabled only history admin user and user/group who ran this app can access the details of this app.

With this two ACLs, we may encounter several unexpected behaviors:

1. if base URL's ACL (`spark.acls.enable`) is enabled but user A has no view permission. User "A" cannot see the app list but could still access details of it's own app.
2. if ACLs of base URL (`spark.acls.enable`) is disabled, then user "A" could download any application's event log, even it is not run by user "A".
3. The changes of Live UI's ACL will affect History UI's ACL which share the same conf file.

The unexpected behaviors is mainly because we have two different ACLs, ideally we should have only one to manage all.

So to improve SHS's ACL mechanism, here in this PR proposed to:

1. Disable "spark.acls.enable" and only use "spark.history.ui.acls.enable" for history server.
2. Check permission for event-log download REST API.

With this PR:

1. Admin user could see/download the list of all applications, as well as application details.
2. Normal user could see the list of all applications, but can only download and check the details of applications accessible to him.

## How was this patch tested?

New UTs are added, also verified in real cluster.

CC tgravescs vanzin please help to review, this PR changes the semantics you did previously. Thanks a lot.

Author: jerryshao <sshao@hortonworks.com>

Closes #17582 from jerryshao/SPARK-20239.

(cherry picked from commit 5280d93e6ecec7327e7fcd3d8d1cb90e01e774fc)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../history/ApplicationHistoryProvider.scala   |  4 ++--
 .../spark/deploy/history/HistoryServer.scala   |  8 ++++++++
 .../spark/status/api/v1/ApiRootResource.scala  | 18 +++++++++++++++---
 .../deploy/history/HistoryServerSuite.scala    | 14 ++++++++------
 4 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
index d7d82800b8b55..6d8758a3d3b1d 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
@@ -86,7 +86,7 @@ private[history] abstract class ApplicationHistoryProvider {
    * @return Count of application event logs that are currently under process
    */
   def getEventLogsUnderProcess(): Int = {
-    return 0;
+    0
   }
 
   /**
@@ -95,7 +95,7 @@ private[history] abstract class ApplicationHistoryProvider {
    * @return 0 if this is undefined or unsupported, otherwise the last updated time in millis
    */
   def getLastUpdatedTime(): Long = {
-    return 0;
+    0
   }
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index 54f39f7620e5d..d9c8fda99ef97 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -301,6 +301,14 @@ object HistoryServer extends Logging {
       logDebug(s"Clearing ${SecurityManager.SPARK_AUTH_CONF}")
       config.set(SecurityManager.SPARK_AUTH_CONF, "false")
     }
+
+    if (config.getBoolean("spark.acls.enable", config.getBoolean("spark.ui.acls.enable", false))) {
+      logInfo("Either spark.acls.enable or spark.ui.acls.enable is configured, clearing it and " +
+        "only using spark.history.ui.acl.enable")
+      config.set("spark.acls.enable", "false")
+      config.set("spark.ui.acls.enable", "false")
+    }
+
     new SecurityManager(config)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala
index 00f918c09c66b..f17b637754826 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala
@@ -184,14 +184,27 @@ private[v1] class ApiRootResource extends ApiRequestContext {
   @Path("applications/{appId}/logs")
   def getEventLogs(
       @PathParam("appId") appId: String): EventLogDownloadResource = {
-    new EventLogDownloadResource(uiRoot, appId, None)
+    try {
+      // withSparkUI will throw NotFoundException if attemptId exists for this application.
+      // So we need to try again with attempt id "1".
+      withSparkUI(appId, None) { _ =>
+        new EventLogDownloadResource(uiRoot, appId, None)
+      }
+    } catch {
+      case _: NotFoundException =>
+        withSparkUI(appId, Some("1")) { _ =>
+          new EventLogDownloadResource(uiRoot, appId, None)
+        }
+    }
   }
 
   @Path("applications/{appId}/{attemptId}/logs")
   def getEventLogs(
       @PathParam("appId") appId: String,
       @PathParam("attemptId") attemptId: String): EventLogDownloadResource = {
-    new EventLogDownloadResource(uiRoot, appId, Some(attemptId))
+    withSparkUI(appId, Some(attemptId)) { _ =>
+      new EventLogDownloadResource(uiRoot, appId, Some(attemptId))
+    }
   }
 
   @Path("version")
@@ -291,7 +304,6 @@ private[v1] trait ApiRequestContext {
       case None => throw new NotFoundException("no such app: " + appId)
     }
   }
-
 }
 
 private[v1] class ForbiddenException(msg: String) extends WebApplicationException(
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
index 764156c3edc41..95acb9a54440f 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -565,13 +565,12 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
     assert(jobcount === getNumJobs("/jobs"))
 
     // no need to retain the test dir now the tests complete
-    logDir.deleteOnExit();
-
+    logDir.deleteOnExit()
   }
 
   test("ui and api authorization checks") {
-    val appId = "app-20161115172038-0000"
-    val owner = "jose"
+    val appId = "local-1430917381535"
+    val owner = "irashid"
     val admin = "root"
     val other = "alice"
 
@@ -590,8 +589,11 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
 
     val port = server.boundPort
     val testUrls = Seq(
-      s"http://localhost:$port/api/v1/applications/$appId/jobs",
-      s"http://localhost:$port/history/$appId/jobs/")
+      s"http://localhost:$port/api/v1/applications/$appId/1/jobs",
+      s"http://localhost:$port/history/$appId/1/jobs/",
+      s"http://localhost:$port/api/v1/applications/$appId/logs",
+      s"http://localhost:$port/api/v1/applications/$appId/1/logs",
+      s"http://localhost:$port/api/v1/applications/$appId/2/logs")
 
     tests.foreach { case (user, expectedCode) =>
       testUrls.foreach { url =>

From fb59a195428597f50c599fff0c6521604a454400 Mon Sep 17 00:00:00 2001
From: Sameer Agarwal <sameerag@cs.berkeley.edu>
Date: Tue, 25 Apr 2017 13:05:20 +0800
Subject: [PATCH 0691/1204] [SPARK-20451] Filter out nested mapType datatypes
 from sort order in randomSplit

## What changes were proposed in this pull request?

In `randomSplit`, It is possible that the underlying dataset doesn't guarantee the ordering of rows in its constituent partitions each time a split is materialized which could result in overlapping
splits.

To prevent this, as part of SPARK-12662, we explicitly sort each input partition to make the ordering deterministic. Given that `MapTypes` cannot be sorted this patch explicitly prunes them out from the sort order. Additionally, if the resulting sort order is empty, this patch then materializes the dataset to guarantee determinism.

## How was this patch tested?

Extended `randomSplit on reordered partitions` in `DataFrameStatSuite` to also test for dataframes with mapTypes nested mapTypes.

Author: Sameer Agarwal <sameerag@cs.berkeley.edu>

Closes #17751 from sameeragarwal/randomsplit2.

(cherry picked from commit 31345fde82ada1f8bb12807b250b04726a1f6aa6)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scala/org/apache/spark/sql/Dataset.scala  | 18 +++++---
 .../apache/spark/sql/DataFrameStatSuite.scala | 43 ++++++++++++-------
 2 files changed, 41 insertions(+), 20 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index c6dcd93bbda66..06dd5500718de 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -1726,15 +1726,23 @@ class Dataset[T] private[sql](
     // It is possible that the underlying dataframe doesn't guarantee the ordering of rows in its
     // constituent partitions each time a split is materialized which could result in
     // overlapping splits. To prevent this, we explicitly sort each input partition to make the
-    // ordering deterministic.
-    // MapType cannot be sorted.
-    val sorted = Sort(logicalPlan.output.filterNot(_.dataType.isInstanceOf[MapType])
-      .map(SortOrder(_, Ascending)), global = false, logicalPlan)
+    // ordering deterministic. Note that MapTypes cannot be sorted and are explicitly pruned out
+    // from the sort order.
+    val sortOrder = logicalPlan.output
+      .filter(attr => RowOrdering.isOrderable(attr.dataType))
+      .map(SortOrder(_, Ascending))
+    val plan = if (sortOrder.nonEmpty) {
+      Sort(sortOrder, global = false, logicalPlan)
+    } else {
+      // SPARK-12662: If sort order is empty, we materialize the dataset to guarantee determinism
+      cache()
+      logicalPlan
+    }
     val sum = weights.sum
     val normalizedCumWeights = weights.map(_ / sum).scanLeft(0.0d)(_ + _)
     normalizedCumWeights.sliding(2).map { x =>
       new Dataset[T](
-        sparkSession, Sample(x(0), x(1), withReplacement = false, seed, sorted)(), encoder)
+        sparkSession, Sample(x(0), x(1), withReplacement = false, seed, plan)(), encoder)
     }.toArray
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
index 97890a035a62f..dd118f88e3bb3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
@@ -68,25 +68,38 @@ class DataFrameStatSuite extends QueryTest with SharedSQLContext {
   }
 
   test("randomSplit on reordered partitions") {
-    // This test ensures that randomSplit does not create overlapping splits even when the
-    // underlying dataframe (such as the one below) doesn't guarantee a deterministic ordering of
-    // rows in each partition.
-    val data =
-      sparkContext.parallelize(1 to 600, 2).mapPartitions(scala.util.Random.shuffle(_)).toDF("id")
-    val splits = data.randomSplit(Array[Double](2, 3), seed = 1)
 
-    assert(splits.length == 2, "wrong number of splits")
+    def testNonOverlappingSplits(data: DataFrame): Unit = {
+      val splits = data.randomSplit(Array[Double](2, 3), seed = 1)
+      assert(splits.length == 2, "wrong number of splits")
+
+      // Verify that the splits span the entire dataset
+      assert(splits.flatMap(_.collect()).toSet == data.collect().toSet)
 
-    // Verify that the splits span the entire dataset
-    assert(splits.flatMap(_.collect()).toSet == data.collect().toSet)
+      // Verify that the splits don't overlap
+      assert(splits(0).collect().toSeq.intersect(splits(1).collect().toSeq).isEmpty)
 
-    // Verify that the splits don't overlap
-    assert(splits(0).intersect(splits(1)).collect().isEmpty)
+      // Verify that the results are deterministic across multiple runs
+      val firstRun = splits.toSeq.map(_.collect().toSeq)
+      val secondRun = data.randomSplit(Array[Double](2, 3), seed = 1).toSeq.map(_.collect().toSeq)
+      assert(firstRun == secondRun)
+    }
 
-    // Verify that the results are deterministic across multiple runs
-    val firstRun = splits.toSeq.map(_.collect().toSeq)
-    val secondRun = data.randomSplit(Array[Double](2, 3), seed = 1).toSeq.map(_.collect().toSeq)
-    assert(firstRun == secondRun)
+    // This test ensures that randomSplit does not create overlapping splits even when the
+    // underlying dataframe (such as the one below) doesn't guarantee a deterministic ordering of
+    // rows in each partition.
+    val dataWithInts = sparkContext.parallelize(1 to 600, 2)
+      .mapPartitions(scala.util.Random.shuffle(_)).toDF("int")
+    val dataWithMaps = sparkContext.parallelize(1 to 600, 2)
+      .map(i => (i, Map(i -> i.toString)))
+      .mapPartitions(scala.util.Random.shuffle(_)).toDF("int", "map")
+    val dataWithArrayOfMaps = sparkContext.parallelize(1 to 600, 2)
+      .map(i => (i, Array(Map(i -> i.toString))))
+      .mapPartitions(scala.util.Random.shuffle(_)).toDF("int", "arrayOfMaps")
+
+    testNonOverlappingSplits(dataWithInts)
+    testNonOverlappingSplits(dataWithMaps)
+    testNonOverlappingSplits(dataWithArrayOfMaps)
   }
 
   test("pearson correlation") {

From 427966597c53b23a63f7e082083ceca4bb936b86 Mon Sep 17 00:00:00 2001
From: Sameer Agarwal <sameerag@cs.berkeley.edu>
Date: Tue, 25 Apr 2017 13:05:20 +0800
Subject: [PATCH 0692/1204] [SPARK-20451] Filter out nested mapType datatypes
 from sort order in randomSplit

## What changes were proposed in this pull request?

In `randomSplit`, It is possible that the underlying dataset doesn't guarantee the ordering of rows in its constituent partitions each time a split is materialized which could result in overlapping
splits.

To prevent this, as part of SPARK-12662, we explicitly sort each input partition to make the ordering deterministic. Given that `MapTypes` cannot be sorted this patch explicitly prunes them out from the sort order. Additionally, if the resulting sort order is empty, this patch then materializes the dataset to guarantee determinism.

## How was this patch tested?

Extended `randomSplit on reordered partitions` in `DataFrameStatSuite` to also test for dataframes with mapTypes nested mapTypes.

Author: Sameer Agarwal <sameerag@cs.berkeley.edu>

Closes #17751 from sameeragarwal/randomsplit2.

(cherry picked from commit 31345fde82ada1f8bb12807b250b04726a1f6aa6)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scala/org/apache/spark/sql/Dataset.scala  | 18 +++++---
 .../apache/spark/sql/DataFrameStatSuite.scala | 43 ++++++++++++-------
 2 files changed, 41 insertions(+), 20 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index e2d0e512cc023..1d7af72213bf7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -1721,15 +1721,23 @@ class Dataset[T] private[sql](
     // It is possible that the underlying dataframe doesn't guarantee the ordering of rows in its
     // constituent partitions each time a split is materialized which could result in
     // overlapping splits. To prevent this, we explicitly sort each input partition to make the
-    // ordering deterministic.
-    // MapType cannot be sorted.
-    val sorted = Sort(logicalPlan.output.filterNot(_.dataType.isInstanceOf[MapType])
-      .map(SortOrder(_, Ascending)), global = false, logicalPlan)
+    // ordering deterministic. Note that MapTypes cannot be sorted and are explicitly pruned out
+    // from the sort order.
+    val sortOrder = logicalPlan.output
+      .filter(attr => RowOrdering.isOrderable(attr.dataType))
+      .map(SortOrder(_, Ascending))
+    val plan = if (sortOrder.nonEmpty) {
+      Sort(sortOrder, global = false, logicalPlan)
+    } else {
+      // SPARK-12662: If sort order is empty, we materialize the dataset to guarantee determinism
+      cache()
+      logicalPlan
+    }
     val sum = weights.sum
     val normalizedCumWeights = weights.map(_ / sum).scanLeft(0.0d)(_ + _)
     normalizedCumWeights.sliding(2).map { x =>
       new Dataset[T](
-        sparkSession, Sample(x(0), x(1), withReplacement = false, seed, sorted)(), encoder)
+        sparkSession, Sample(x(0), x(1), withReplacement = false, seed, plan)(), encoder)
     }.toArray
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
index 1383208874a19..0602f4ac78c04 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
@@ -68,25 +68,38 @@ class DataFrameStatSuite extends QueryTest with SharedSQLContext {
   }
 
   test("randomSplit on reordered partitions") {
-    // This test ensures that randomSplit does not create overlapping splits even when the
-    // underlying dataframe (such as the one below) doesn't guarantee a deterministic ordering of
-    // rows in each partition.
-    val data =
-      sparkContext.parallelize(1 to 600, 2).mapPartitions(scala.util.Random.shuffle(_)).toDF("id")
-    val splits = data.randomSplit(Array[Double](2, 3), seed = 1)
 
-    assert(splits.length == 2, "wrong number of splits")
+    def testNonOverlappingSplits(data: DataFrame): Unit = {
+      val splits = data.randomSplit(Array[Double](2, 3), seed = 1)
+      assert(splits.length == 2, "wrong number of splits")
+
+      // Verify that the splits span the entire dataset
+      assert(splits.flatMap(_.collect()).toSet == data.collect().toSet)
 
-    // Verify that the splits span the entire dataset
-    assert(splits.flatMap(_.collect()).toSet == data.collect().toSet)
+      // Verify that the splits don't overlap
+      assert(splits(0).collect().toSeq.intersect(splits(1).collect().toSeq).isEmpty)
 
-    // Verify that the splits don't overlap
-    assert(splits(0).intersect(splits(1)).collect().isEmpty)
+      // Verify that the results are deterministic across multiple runs
+      val firstRun = splits.toSeq.map(_.collect().toSeq)
+      val secondRun = data.randomSplit(Array[Double](2, 3), seed = 1).toSeq.map(_.collect().toSeq)
+      assert(firstRun == secondRun)
+    }
 
-    // Verify that the results are deterministic across multiple runs
-    val firstRun = splits.toSeq.map(_.collect().toSeq)
-    val secondRun = data.randomSplit(Array[Double](2, 3), seed = 1).toSeq.map(_.collect().toSeq)
-    assert(firstRun == secondRun)
+    // This test ensures that randomSplit does not create overlapping splits even when the
+    // underlying dataframe (such as the one below) doesn't guarantee a deterministic ordering of
+    // rows in each partition.
+    val dataWithInts = sparkContext.parallelize(1 to 600, 2)
+      .mapPartitions(scala.util.Random.shuffle(_)).toDF("int")
+    val dataWithMaps = sparkContext.parallelize(1 to 600, 2)
+      .map(i => (i, Map(i -> i.toString)))
+      .mapPartitions(scala.util.Random.shuffle(_)).toDF("int", "map")
+    val dataWithArrayOfMaps = sparkContext.parallelize(1 to 600, 2)
+      .map(i => (i, Array(Map(i -> i.toString))))
+      .mapPartitions(scala.util.Random.shuffle(_)).toDF("int", "arrayOfMaps")
+
+    testNonOverlappingSplits(dataWithInts)
+    testNonOverlappingSplits(dataWithMaps)
+    testNonOverlappingSplits(dataWithArrayOfMaps)
   }
 
   test("pearson correlation") {

From c18de9c045aaf7d17113f87a6b2146811b4af0eb Mon Sep 17 00:00:00 2001
From: Armin Braun <me@obrown.io>
Date: Tue, 25 Apr 2017 09:13:50 +0100
Subject: [PATCH 0693/1204] [SPARK-20455][DOCS] Fix Broken Docker IT Docs

## What changes were proposed in this pull request?

Just added the Maven `test`goal.

## How was this patch tested?

No test needed, just a trivial documentation fix.

Author: Armin Braun <me@obrown.io>

Closes #17756 from original-brownbear/SPARK-20455.

(cherry picked from commit c8f1219510f469935aa9ff0b1c92cfe20372377c)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/building-spark.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/building-spark.md b/docs/building-spark.md
index e99b70f7a8b47..0f551bc66b8c9 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -232,7 +232,7 @@ Once installed, the `docker` service needs to be started, if not already running
 On Linux, this can be done by `sudo service docker start`.
 
     ./build/mvn install -DskipTests
-    ./build/mvn -Pdocker-integration-tests -pl :spark-docker-integration-tests_2.11
+    ./build/mvn test -Pdocker-integration-tests -pl :spark-docker-integration-tests_2.11
 
 or
 

From 65990fc5708b35cf53b3582c146a4de5ece1da3c Mon Sep 17 00:00:00 2001
From: Armin Braun <me@obrown.io>
Date: Tue, 25 Apr 2017 09:13:50 +0100
Subject: [PATCH 0694/1204] [SPARK-20455][DOCS] Fix Broken Docker IT Docs

## What changes were proposed in this pull request?

Just added the Maven `test`goal.

## How was this patch tested?

No test needed, just a trivial documentation fix.

Author: Armin Braun <me@obrown.io>

Closes #17756 from original-brownbear/SPARK-20455.

(cherry picked from commit c8f1219510f469935aa9ff0b1c92cfe20372377c)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/building-spark.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/building-spark.md b/docs/building-spark.md
index 094509575c1ba..33ff80e08388c 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -302,7 +302,7 @@ Once installed, the `docker` service needs to be started, if not already running
 On Linux, this can be done by `sudo service docker start`.
 
     ./build/mvn install -DskipTests
-    ./build/mvn -Pdocker-integration-tests -pl :spark-docker-integration-tests_2.11
+    ./build/mvn test -Pdocker-integration-tests -pl :spark-docker-integration-tests_2.11
 
 or
 

From b62ebd91bb2c64e1ecef0f2d97db91f5ce32743b Mon Sep 17 00:00:00 2001
From: Sergey Zhemzhitsky <szhemzhitski@gmail.com>
Date: Tue, 25 Apr 2017 09:18:36 +0100
Subject: [PATCH 0695/1204] [SPARK-20404][CORE] Using Option(name) instead of
 Some(name)

Using Option(name) instead of Some(name) to prevent runtime failures when using accumulators created like the following
```
sparkContext.accumulator(0, null)
```

Author: Sergey Zhemzhitsky <szhemzhitski@gmail.com>

Closes #17740 from szhem/SPARK-20404-null-acc-names.

(cherry picked from commit 0bc7a90210aad9025c1e1bdc99f8e723c1bf0fbf)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 core/src/main/scala/org/apache/spark/SparkContext.scala | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 99efc4893fda4..0ec1bdd39b2f5 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1350,7 +1350,7 @@ class SparkContext(config: SparkConf) extends Logging {
   @deprecated("use AccumulatorV2", "2.0.0")
   def accumulator[T](initialValue: T, name: String)(implicit param: AccumulatorParam[T])
     : Accumulator[T] = {
-    val acc = new Accumulator(initialValue, param, Some(name))
+    val acc = new Accumulator(initialValue, param, Option(name))
     cleaner.foreach(_.registerAccumulatorForCleanup(acc.newAcc))
     acc
   }
@@ -1379,7 +1379,7 @@ class SparkContext(config: SparkConf) extends Logging {
   @deprecated("use AccumulatorV2", "2.0.0")
   def accumulable[R, T](initialValue: R, name: String)(implicit param: AccumulableParam[R, T])
     : Accumulable[R, T] = {
-    val acc = new Accumulable(initialValue, param, Some(name))
+    val acc = new Accumulable(initialValue, param, Option(name))
     cleaner.foreach(_.registerAccumulatorForCleanup(acc.newAcc))
     acc
   }
@@ -1414,7 +1414,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * @note Accumulators must be registered before use, or it will throw exception.
    */
   def register(acc: AccumulatorV2[_, _], name: String): Unit = {
-    acc.register(this, name = Some(name))
+    acc.register(this, name = Option(name))
   }
 
   /**

From 2d47e1aaf93fa13c0407d5c0dcca0f7c898e5b94 Mon Sep 17 00:00:00 2001
From: Sergey Zhemzhitsky <szhemzhitski@gmail.com>
Date: Tue, 25 Apr 2017 09:18:36 +0100
Subject: [PATCH 0696/1204] [SPARK-20404][CORE] Using Option(name) instead of
 Some(name)

Using Option(name) instead of Some(name) to prevent runtime failures when using accumulators created like the following
```
sparkContext.accumulator(0, null)
```

Author: Sergey Zhemzhitsky <szhemzhitski@gmail.com>

Closes #17740 from szhem/SPARK-20404-null-acc-names.

(cherry picked from commit 0bc7a90210aad9025c1e1bdc99f8e723c1bf0fbf)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 core/src/main/scala/org/apache/spark/SparkContext.scala | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 5ae9db7440cbd..6e24656e4635d 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1275,7 +1275,7 @@ class SparkContext(config: SparkConf) extends Logging {
   @deprecated("use AccumulatorV2", "2.0.0")
   def accumulator[T](initialValue: T, name: String)(implicit param: AccumulatorParam[T])
     : Accumulator[T] = {
-    val acc = new Accumulator(initialValue, param, Some(name))
+    val acc = new Accumulator(initialValue, param, Option(name))
     cleaner.foreach(_.registerAccumulatorForCleanup(acc.newAcc))
     acc
   }
@@ -1304,7 +1304,7 @@ class SparkContext(config: SparkConf) extends Logging {
   @deprecated("use AccumulatorV2", "2.0.0")
   def accumulable[R, T](initialValue: R, name: String)(implicit param: AccumulableParam[R, T])
     : Accumulable[R, T] = {
-    val acc = new Accumulable(initialValue, param, Some(name))
+    val acc = new Accumulable(initialValue, param, Option(name))
     cleaner.foreach(_.registerAccumulatorForCleanup(acc.newAcc))
     acc
   }
@@ -1339,7 +1339,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * @note Accumulators must be registered before use, or it will throw exception.
    */
   def register(acc: AccumulatorV2[_, _], name: String): Unit = {
-    acc.register(this, name = Some(name))
+    acc.register(this, name = Option(name))
   }
 
   /**

From e2591c6d74081e9edad2e8982c0125a4f1d21437 Mon Sep 17 00:00:00 2001
From: wangmiao1981 <wm624@hotmail.com>
Date: Tue, 25 Apr 2017 16:30:36 +0800
Subject: [PATCH 0697/1204] [SPARK-18901][FOLLOWUP][ML] Require in LR
 LogisticAggregator is redundant

## What changes were proposed in this pull request?

This is a follow-up PR of #17478.

## How was this patch tested?

Existing tests

Author: wangmiao1981 <wm624@hotmail.com>

Closes #17754 from wangmiao1981/followup.

(cherry picked from commit 387565cf14b490810f9479ff3adbf776e2edecdc)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 .../scala/org/apache/spark/ml/classification/LinearSVC.scala | 5 ++---
 .../org/apache/spark/ml/regression/LinearRegression.scala    | 5 -----
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
index f76b14eeeb542..7507c7539d4ef 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
@@ -458,9 +458,7 @@ private class LinearSVCAggregator(
    */
   def add(instance: Instance): this.type = {
     instance match { case Instance(label, weight, features) =>
-      require(weight >= 0.0, s"instance weight, $weight has to be >= 0.0")
-      require(numFeatures == features.size, s"Dimensions mismatch when adding new instance." +
-        s" Expecting $numFeatures but got ${features.size}.")
+
       if (weight == 0.0) return this
       val localFeaturesStd = bcFeaturesStd.value
       val localCoefficients = coefficientsArray
@@ -512,6 +510,7 @@ private class LinearSVCAggregator(
    * @return This LinearSVCAggregator object.
    */
   def merge(other: LinearSVCAggregator): this.type = {
+
     if (other.weightSum != 0.0) {
       weightSum += other.weightSum
       lossSum += other.lossSum
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index f7e3c8fa5b6e6..eaad54985229e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -971,9 +971,6 @@ private class LeastSquaresAggregator(
    */
   def add(instance: Instance): this.type = {
     instance match { case Instance(label, weight, features) =>
-      require(dim == features.size, s"Dimensions mismatch when adding new sample." +
-        s" Expecting $dim but got ${features.size}.")
-      require(weight >= 0.0, s"instance weight, $weight has to be >= 0.0")
 
       if (weight == 0.0) return this
 
@@ -1005,8 +1002,6 @@ private class LeastSquaresAggregator(
    * @return This LeastSquaresAggregator object.
    */
   def merge(other: LeastSquaresAggregator): this.type = {
-    require(dim == other.dim, s"Dimensions mismatch when merging with another " +
-      s"LeastSquaresAggregator. Expecting $dim but got ${other.dim}.")
 
     if (other.weightSum != 0) {
       totalCnt += other.totalCnt

From 55834a898547b00bb8de1891fd061651f941aa0b Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Tue, 25 Apr 2017 17:10:41 +0000
Subject: [PATCH 0698/1204] [SPARK-20449][ML] Upgrade breeze version to 0.13.1

## What changes were proposed in this pull request?
Upgrade breeze version to 0.13.1, which fixed some critical bugs of L-BFGS-B.

## How was this patch tested?
Existing unit tests.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #17746 from yanboliang/spark-20449.

(cherry picked from commit 67eef47acfd26f1f0be3e8ef10453514f3655f62)
Signed-off-by: DB Tsai <dbtsai@dbtsai.com>
---
 LICENSE                                        |  1 +
 .../tests/testthat/test_mllib_classification.R | 10 +++++-----
 dev/deps/spark-deps-hadoop-2.6                 | 12 +++++++-----
 dev/deps/spark-deps-hadoop-2.7                 | 12 +++++++-----
 .../GeneralizedLinearRegression.scala          |  4 ++--
 .../spark/mllib/clustering/LDAModel.scala      | 14 ++++----------
 .../spark/mllib/optimization/LBFGSSuite.scala  |  4 ++--
 pom.xml                                        |  2 +-
 python/pyspark/ml/classification.py            | 18 ++++++++----------
 9 files changed, 37 insertions(+), 40 deletions(-)

diff --git a/LICENSE b/LICENSE
index 7950dd6ceb6db..c21032a1fd274 100644
--- a/LICENSE
+++ b/LICENSE
@@ -297,3 +297,4 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
      (MIT License) RowsGroup (http://datatables.net/license/mit)
      (MIT License) jsonFormatter (http://www.jqueryscript.net/other/jQuery-Plugin-For-Pretty-JSON-Formatting-jsonFormatter.html)
      (MIT License) modernizr (https://github.com/Modernizr/Modernizr/blob/master/LICENSE)
+     (MIT License) machinist (https://github.com/typelevel/machinist)
diff --git a/R/pkg/inst/tests/testthat/test_mllib_classification.R b/R/pkg/inst/tests/testthat/test_mllib_classification.R
index 459254d271a58..af7cbdccf5d5d 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_classification.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_classification.R
@@ -288,18 +288,18 @@ test_that("spark.mlp", {
     c(0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9))
   mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
   expect_equal(head(mlpPredictions$prediction, 10),
-               c("1.0", "1.0", "1.0", "1.0", "2.0", "1.0", "2.0", "2.0", "1.0", "0.0"))
+               c("1.0", "1.0", "2.0", "1.0", "2.0", "1.0", "2.0", "2.0", "1.0", "0.0"))
 
   model <- spark.mlp(df, label ~ features, layers = c(4, 3), maxIter = 2, initialWeights =
     c(0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 5.0, 5.0, 5.0, 5.0, 9.0, 9.0, 9.0, 9.0, 9.0))
   mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
   expect_equal(head(mlpPredictions$prediction, 10),
-               c("1.0", "1.0", "1.0", "1.0", "2.0", "1.0", "2.0", "2.0", "1.0", "0.0"))
+               c("1.0", "1.0", "2.0", "1.0", "2.0", "1.0", "2.0", "2.0", "1.0", "0.0"))
 
   model <- spark.mlp(df, label ~ features, layers = c(4, 3), maxIter = 2)
   mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
   expect_equal(head(mlpPredictions$prediction, 10),
-               c("1.0", "1.0", "1.0", "1.0", "0.0", "1.0", "0.0", "2.0", "1.0", "0.0"))
+               c("1.0", "1.0", "1.0", "1.0", "0.0", "1.0", "0.0", "0.0", "1.0", "0.0"))
 
   # Test formula works well
   df <- suppressWarnings(createDataFrame(iris))
@@ -310,8 +310,8 @@ test_that("spark.mlp", {
   expect_equal(summary$numOfOutputs, 3)
   expect_equal(summary$layers, c(4, 3))
   expect_equal(length(summary$weights), 15)
-  expect_equal(head(summary$weights, 5), list(-1.1957257, -5.2693685, 7.4489734, -6.3751413,
-               -10.2376130), tolerance = 1e-6)
+  expect_equal(head(summary$weights, 5), list(-0.5793153, -4.652961, 6.216155, -6.649478,
+               -10.51147), tolerance = 1e-3)
 })
 
 test_that("spark.naiveBayes", {
diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6
index 73dc1f9a1398c..9287bd47cf113 100644
--- a/dev/deps/spark-deps-hadoop-2.6
+++ b/dev/deps/spark-deps-hadoop-2.6
@@ -19,8 +19,8 @@ avro-mapred-1.7.7-hadoop2.jar
 base64-2.3.8.jar
 bcprov-jdk15on-1.51.jar
 bonecp-0.8.0.RELEASE.jar
-breeze-macros_2.11-0.12.jar
-breeze_2.11-0.12.jar
+breeze-macros_2.11-0.13.1.jar
+breeze_2.11-0.13.1.jar
 calcite-avatica-1.2.0-incubating.jar
 calcite-core-1.2.0-incubating.jar
 calcite-linq4j-1.2.0-incubating.jar
@@ -129,6 +129,8 @@ libfb303-0.9.3.jar
 libthrift-0.9.3.jar
 log4j-1.2.17.jar
 lz4-1.3.0.jar
+machinist_2.11-0.6.1.jar
+macro-compat_2.11-1.1.1.jar
 mail-1.4.7.jar
 mesos-1.0.0-shaded-protobuf.jar
 metrics-core-3.1.2.jar
@@ -162,13 +164,13 @@ scala-parser-combinators_2.11-1.0.4.jar
 scala-reflect-2.11.8.jar
 scala-xml_2.11-1.0.2.jar
 scalap-2.11.8.jar
-shapeless_2.11-2.0.0.jar
+shapeless_2.11-2.3.2.jar
 slf4j-api-1.7.16.jar
 slf4j-log4j12-1.7.16.jar
 snappy-0.2.jar
 snappy-java-1.1.2.6.jar
-spire-macros_2.11-0.7.4.jar
-spire_2.11-0.7.4.jar
+spire-macros_2.11-0.13.0.jar
+spire_2.11-0.13.0.jar
 stax-api-1.0-2.jar
 stax-api-1.0.1.jar
 stream-2.7.0.jar
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 6bf0923a1d751..ab1de3d3dd8ad 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -19,8 +19,8 @@ avro-mapred-1.7.7-hadoop2.jar
 base64-2.3.8.jar
 bcprov-jdk15on-1.51.jar
 bonecp-0.8.0.RELEASE.jar
-breeze-macros_2.11-0.12.jar
-breeze_2.11-0.12.jar
+breeze-macros_2.11-0.13.1.jar
+breeze_2.11-0.13.1.jar
 calcite-avatica-1.2.0-incubating.jar
 calcite-core-1.2.0-incubating.jar
 calcite-linq4j-1.2.0-incubating.jar
@@ -130,6 +130,8 @@ libfb303-0.9.3.jar
 libthrift-0.9.3.jar
 log4j-1.2.17.jar
 lz4-1.3.0.jar
+machinist_2.11-0.6.1.jar
+macro-compat_2.11-1.1.1.jar
 mail-1.4.7.jar
 mesos-1.0.0-shaded-protobuf.jar
 metrics-core-3.1.2.jar
@@ -163,13 +165,13 @@ scala-parser-combinators_2.11-1.0.4.jar
 scala-reflect-2.11.8.jar
 scala-xml_2.11-1.0.2.jar
 scalap-2.11.8.jar
-shapeless_2.11-2.0.0.jar
+shapeless_2.11-2.3.2.jar
 slf4j-api-1.7.16.jar
 slf4j-log4j12-1.7.16.jar
 snappy-0.2.jar
 snappy-java-1.1.2.6.jar
-spire-macros_2.11-0.7.4.jar
-spire_2.11-0.7.4.jar
+spire-macros_2.11-0.13.0.jar
+spire_2.11-0.13.0.jar
 stax-api-1.0-2.jar
 stax-api-1.0.1.jar
 stream-2.7.0.jar
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index d6093a01c671c..bff0d9bbb46ff 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -894,10 +894,10 @@ object GeneralizedLinearRegression extends DefaultParamsReadable[GeneralizedLine
 
   private[regression] object Probit extends Link("probit") {
 
-    override def link(mu: Double): Double = dist.Gaussian(0.0, 1.0).icdf(mu)
+    override def link(mu: Double): Double = dist.Gaussian(0.0, 1.0).inverseCdf(mu)
 
     override def deriv(mu: Double): Double = {
-      1.0 / dist.Gaussian(0.0, 1.0).pdf(dist.Gaussian(0.0, 1.0).icdf(mu))
+      1.0 / dist.Gaussian(0.0, 1.0).pdf(dist.Gaussian(0.0, 1.0).inverseCdf(mu))
     }
 
     override def unlink(eta: Double): Double = dist.Gaussian(0.0, 1.0).cdf(eta)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
index 7fd722a332923..15b723dadcff7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
@@ -788,20 +788,14 @@ class DistributedLDAModel private[clustering] (
   @Since("1.5.0")
   def topTopicsPerDocument(k: Int): RDD[(Long, Array[Int], Array[Double])] = {
     graph.vertices.filter(LDA.isDocumentVertex).map { case (docID, topicCounts) =>
-      // TODO: Remove work-around for the breeze bug.
-      // https://github.com/scalanlp/breeze/issues/561
-      val topIndices = if (k == topicCounts.length) {
-        Seq.range(0, k)
-      } else {
-        argtopk(topicCounts, k)
-      }
+      val topIndices = argtopk(topicCounts, k)
       val sumCounts = sum(topicCounts)
       val weights = if (sumCounts != 0) {
-        topicCounts(topIndices) / sumCounts
+        topicCounts(topIndices).toArray.map(_ / sumCounts)
       } else {
-        topicCounts(topIndices)
+        topicCounts(topIndices).toArray
       }
-      (docID.toLong, topIndices.toArray, weights.toArray)
+      (docID.toLong, topIndices.toArray, weights)
     }
   }
 
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
index 572959200f47f..3d6a9f8d84cac 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
@@ -191,8 +191,8 @@ class LBFGSSuite extends SparkFunSuite with MLlibTestSparkContext with Matchers
     // With smaller convergenceTol, it takes more steps.
     assert(lossLBFGS3.length > lossLBFGS2.length)
 
-    // Based on observation, lossLBFGS2 runs 5 iterations, no theoretically guaranteed.
-    assert(lossLBFGS3.length == 6)
+    // Based on observation, lossLBFGS3 runs 7 iterations, no theoretically guaranteed.
+    assert(lossLBFGS3.length == 7)
     assert((lossLBFGS3(4) - lossLBFGS3(5)) / lossLBFGS3(4) < convergenceTol)
   }
 
diff --git a/pom.xml b/pom.xml
index c1174593c1922..2a4251d9f2ad2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -658,7 +658,7 @@
       <dependency>
         <groupId>org.scalanlp</groupId>
         <artifactId>breeze_${scala.binary.version}</artifactId>
-        <version>0.12</version>
+        <version>0.13.1</version>
         <exclusions>
           <!-- This is included as a compile-scoped dependency by jtransforms, which is
                a dependency of breeze. -->
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index b4fc357e42d71..864968390ace9 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -190,9 +190,9 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
     >>> blor = LogisticRegression(maxIter=5, regParam=0.01, weightCol="weight")
     >>> blorModel = blor.fit(bdf)
     >>> blorModel.coefficients
-    DenseVector([5.5...])
+    DenseVector([5.4...])
     >>> blorModel.intercept
-    -2.68...
+    -2.63...
     >>> mdf = sc.parallelize([
     ...     Row(label=1.0, weight=2.0, features=Vectors.dense(1.0)),
     ...     Row(label=0.0, weight=2.0, features=Vectors.sparse(1, [], [])),
@@ -200,12 +200,10 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
     >>> mlor = LogisticRegression(maxIter=5, regParam=0.01, weightCol="weight",
     ...     family="multinomial")
     >>> mlorModel = mlor.fit(mdf)
-    >>> print(mlorModel.coefficientMatrix)
-    DenseMatrix([[-2.3...],
-                 [ 0.2...],
-                 [ 2.1... ]])
+    >>> mlorModel.coefficientMatrix
+    DenseMatrix(3, 1, [-2.3..., 0.2..., 2.1...], 1)
     >>> mlorModel.interceptVector
-    DenseVector([2.0..., 0.8..., -2.8...])
+    DenseVector([2.1..., 0.6..., -2.8...])
     >>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0))]).toDF()
     >>> result = blorModel.transform(test0).head()
     >>> result.prediction
@@ -213,7 +211,7 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
     >>> result.probability
     DenseVector([0.99..., 0.00...])
     >>> result.rawPrediction
-    DenseVector([8.22..., -8.22...])
+    DenseVector([8.12..., -8.12...])
     >>> test1 = sc.parallelize([Row(features=Vectors.sparse(1, [0], [1.0]))]).toDF()
     >>> blorModel.transform(test1).head().prediction
     1.0
@@ -1490,9 +1488,9 @@ class OneVsRest(Estimator, OneVsRestParams, MLReadable, MLWritable):
     >>> ovr = OneVsRest(classifier=lr)
     >>> model = ovr.fit(df)
     >>> [x.coefficients for x in model.models]
-    [DenseVector([3.3925, 1.8785]), DenseVector([-4.3016, -6.3163]), DenseVector([-4.5855, 6.1785])]
+    [DenseVector([4.9791, 2.426]), DenseVector([-4.1198, -5.9326]), DenseVector([-3.314, 5.2423])]
     >>> [x.intercept for x in model.models]
-    [-3.64747..., 2.55078..., -1.10165...]
+    [-5.06544..., 2.30341..., -1.29133...]
     >>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0, 0.0))]).toDF()
     >>> model.transform(test0).head().prediction
     1.0

From f971ce5dd0788fe7f5d2ca820b9ea3db72033ddc Mon Sep 17 00:00:00 2001
From: ding <ding@localhost.localdomain>
Date: Tue, 25 Apr 2017 11:20:32 -0700
Subject: [PATCH 0699/1204] [SPARK-5484][GRAPHX] Periodically do checkpoint in
 Pregel

## What changes were proposed in this pull request?

Pregel-based iterative algorithms with more than ~50 iterations begin to slow down and eventually fail with a StackOverflowError due to Spark's lack of support for long lineage chains.

This PR causes Pregel to checkpoint the graph periodically if the checkpoint directory is set.
This PR moves PeriodicGraphCheckpointer.scala from mllib to graphx, moves PeriodicRDDCheckpointer.scala, PeriodicCheckpointer.scala from mllib to core
## How was this patch tested?

unit tests, manual tests
(Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests)

(If this patch involves UI changes, please attach a screenshot; otherwise, remove this)

Author: ding <ding@localhost.localdomain>
Author: dding3 <ding.ding@intel.com>
Author: Michael Allman <michael@videoamp.com>

Closes #15125 from dding3/cp2_pregel.

(cherry picked from commit 0a7f5f2798b6e8b2ba15e8b3aa07d5953ad1c695)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 .../main/scala/org/apache/spark/rdd/RDD.scala |   4 +-
 .../rdd/util}/PeriodicRDDCheckpointer.scala   |   3 +-
 .../spark/util}/PeriodicCheckpointer.scala    |  14 ++-
 .../org/apache/spark/rdd/SortingSuite.scala   |   2 +-
 .../util}/PeriodicRDDCheckpointerSuite.scala  |   8 +-
 docs/configuration.md                         |  14 +++
 docs/graphx-programming-guide.md              |   9 +-
 .../org/apache/spark/graphx/Pregel.scala      |  25 ++++-
 .../util}/PeriodicGraphCheckpointer.scala     |  13 ++-
 .../PeriodicGraphCheckpointerSuite.scala      | 105 +++++++++---------
 .../org/apache/spark/ml/clustering/LDA.scala  |   3 +-
 .../ml/tree/impl/GradientBoostedTrees.scala   |   2 +-
 .../spark/mllib/clustering/LDAOptimizer.scala |   2 +-
 13 files changed, 128 insertions(+), 76 deletions(-)
 rename {mllib/src/main/scala/org/apache/spark/mllib/impl => core/src/main/scala/org/apache/spark/rdd/util}/PeriodicRDDCheckpointer.scala (97%)
 rename {mllib/src/main/scala/org/apache/spark/mllib/impl => core/src/main/scala/org/apache/spark/util}/PeriodicCheckpointer.scala (95%)
 rename {mllib/src/test/scala/org/apache/spark/mllib/impl => core/src/test/scala/org/apache/spark/util}/PeriodicRDDCheckpointerSuite.scala (96%)
 rename {mllib/src/main/scala/org/apache/spark/mllib/impl => graphx/src/main/scala/org/apache/spark/graphx/util}/PeriodicGraphCheckpointer.scala (91%)
 rename {mllib/src/test/scala/org/apache/spark/mllib/impl => graphx/src/test/scala/org/apache/spark/graphx/util}/PeriodicGraphCheckpointerSuite.scala (70%)

diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index e524675332d1b..63a87e7f09d85 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -41,7 +41,7 @@ import org.apache.spark.partial.GroupedCountEvaluator
 import org.apache.spark.partial.PartialResult
 import org.apache.spark.storage.{RDDBlockId, StorageLevel}
 import org.apache.spark.util.{BoundedPriorityQueue, Utils}
-import org.apache.spark.util.collection.OpenHashMap
+import org.apache.spark.util.collection.{OpenHashMap, Utils => collectionUtils}
 import org.apache.spark.util.random.{BernoulliCellSampler, BernoulliSampler, PoissonSampler,
   SamplingUtils}
 
@@ -1420,7 +1420,7 @@ abstract class RDD[T: ClassTag](
       val mapRDDs = mapPartitions { items =>
         // Priority keeps the largest elements, so let's reverse the ordering.
         val queue = new BoundedPriorityQueue[T](num)(ord.reverse)
-        queue ++= util.collection.Utils.takeOrdered(items, num)(ord)
+        queue ++= collectionUtils.takeOrdered(items, num)(ord)
         Iterator.single(queue)
       }
       if (mapRDDs.partitions.length == 0) {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/impl/PeriodicRDDCheckpointer.scala b/core/src/main/scala/org/apache/spark/rdd/util/PeriodicRDDCheckpointer.scala
similarity index 97%
rename from mllib/src/main/scala/org/apache/spark/mllib/impl/PeriodicRDDCheckpointer.scala
rename to core/src/main/scala/org/apache/spark/rdd/util/PeriodicRDDCheckpointer.scala
index 145dc22b7428e..ab72addb2466b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/impl/PeriodicRDDCheckpointer.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/util/PeriodicRDDCheckpointer.scala
@@ -15,11 +15,12 @@
  * limitations under the License.
  */
 
-package org.apache.spark.mllib.impl
+package org.apache.spark.rdd.util
 
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.PeriodicCheckpointer
 
 
 /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/impl/PeriodicCheckpointer.scala b/core/src/main/scala/org/apache/spark/util/PeriodicCheckpointer.scala
similarity index 95%
rename from mllib/src/main/scala/org/apache/spark/mllib/impl/PeriodicCheckpointer.scala
rename to core/src/main/scala/org/apache/spark/util/PeriodicCheckpointer.scala
index 4dd498cd91b4e..ce06e18879a49 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/impl/PeriodicCheckpointer.scala
+++ b/core/src/main/scala/org/apache/spark/util/PeriodicCheckpointer.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.mllib.impl
+package org.apache.spark.util
 
 import scala.collection.mutable
 
@@ -58,7 +58,7 @@ import org.apache.spark.storage.StorageLevel
  * @param sc  SparkContext for the Datasets given to this checkpointer
  * @tparam T  Dataset type, such as RDD[Double]
  */
-private[mllib] abstract class PeriodicCheckpointer[T](
+private[spark] abstract class PeriodicCheckpointer[T](
     val checkpointInterval: Int,
     val sc: SparkContext) extends Logging {
 
@@ -127,6 +127,16 @@ private[mllib] abstract class PeriodicCheckpointer[T](
   /** Get list of checkpoint files for this given Dataset */
   protected def getCheckpointFiles(data: T): Iterable[String]
 
+  /**
+   * Call this to unpersist the Dataset.
+   */
+  def unpersistDataSet(): Unit = {
+    while (persistedQueue.nonEmpty) {
+      val dataToUnpersist = persistedQueue.dequeue()
+      unpersist(dataToUnpersist)
+    }
+  }
+
   /**
    * Call this at the end to delete any remaining checkpoint files.
    */
diff --git a/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala b/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala
index f9a7f151823a2..7f20206202cb9 100644
--- a/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala
@@ -135,7 +135,7 @@ class SortingSuite extends SparkFunSuite with SharedSparkContext with Matchers w
   }
 
   test("get a range of elements in an array not partitioned by a range partitioner") {
-    val pairArr = util.Random.shuffle((1 to 1000).toList).map(x => (x, x))
+    val pairArr = scala.util.Random.shuffle((1 to 1000).toList).map(x => (x, x))
     val pairs = sc.parallelize(pairArr, 10)
     val range = pairs.filterByRange(200, 800).collect()
     assert((800 to 200 by -1).toArray.sorted === range.map(_._1).sorted)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/impl/PeriodicRDDCheckpointerSuite.scala b/core/src/test/scala/org/apache/spark/util/PeriodicRDDCheckpointerSuite.scala
similarity index 96%
rename from mllib/src/test/scala/org/apache/spark/mllib/impl/PeriodicRDDCheckpointerSuite.scala
rename to core/src/test/scala/org/apache/spark/util/PeriodicRDDCheckpointerSuite.scala
index 14adf8c29fc6b..f9e1b791c86ea 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/impl/PeriodicRDDCheckpointerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/PeriodicRDDCheckpointerSuite.scala
@@ -15,18 +15,18 @@
  * limitations under the License.
  */
 
-package org.apache.spark.mllib.impl
+package org.apache.spark.utils
 
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.{SparkContext, SparkFunSuite}
-import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.{SharedSparkContext, SparkContext, SparkFunSuite}
 import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.util.PeriodicRDDCheckpointer
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.Utils
 
 
-class PeriodicRDDCheckpointerSuite extends SparkFunSuite with MLlibTestSparkContext {
+class PeriodicRDDCheckpointerSuite extends SparkFunSuite with SharedSparkContext {
 
   import PeriodicRDDCheckpointerSuite._
 
diff --git a/docs/configuration.md b/docs/configuration.md
index 6b65d2bcb83e5..87b76322cae51 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -2149,6 +2149,20 @@ showDF(properties, numRows = 200, truncate = FALSE)
 
 </table>
 
+### GraphX
+
+<table class="table">
+<tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
+<tr>
+  <td><code>spark.graphx.pregel.checkpointInterval</code></td>
+  <td>-1</td>
+  <td>
+    Checkpoint interval for graph and message in Pregel. It used to avoid stackOverflowError due to long lineage chains
+  after lots of iterations. The checkpoint is disabled by default.
+  </td>
+</tr>
+</table>
+
 ### Deploy
 
 <table class="table">
diff --git a/docs/graphx-programming-guide.md b/docs/graphx-programming-guide.md
index e271b28fb4f28..76aa7b405e18c 100644
--- a/docs/graphx-programming-guide.md
+++ b/docs/graphx-programming-guide.md
@@ -708,7 +708,9 @@ messages remaining.
 > messaging function.  These constraints allow additional optimization within GraphX.
 
 The following is the type signature of the [Pregel operator][GraphOps.pregel] as well as a *sketch*
-of its implementation (note calls to graph.cache have been removed):
+of its implementation (note: to avoid stackOverflowError due to long lineage chains, pregel support periodcally
+checkpoint graph and messages by setting "spark.graphx.pregel.checkpointInterval" to a positive number,
+say 10. And set checkpoint directory as well using SparkContext.setCheckpointDir(directory: String)):
 
 {% highlight scala %}
 class GraphOps[VD, ED] {
@@ -722,6 +724,7 @@ class GraphOps[VD, ED] {
     : Graph[VD, ED] = {
     // Receive the initial message at each vertex
     var g = mapVertices( (vid, vdata) => vprog(vid, vdata, initialMsg) ).cache()
+
     // compute the messages
     var messages = g.mapReduceTriplets(sendMsg, mergeMsg)
     var activeMessages = messages.count()
@@ -734,8 +737,8 @@ class GraphOps[VD, ED] {
       // Send new messages, skipping edges where neither side received a message. We must cache
       // messages so it can be materialized on the next line, allowing us to uncache the previous
       // iteration.
-      messages = g.mapReduceTriplets(
-        sendMsg, mergeMsg, Some((oldMessages, activeDirection))).cache()
+      messages = GraphXUtils.mapReduceTriplets(
+        g, sendMsg, mergeMsg, Some((oldMessages, activeDirection))).cache()
       activeMessages = messages.count()
       i += 1
     }
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala b/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
index 646462b4a8350..755c6febc48e6 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
@@ -19,7 +19,10 @@ package org.apache.spark.graphx
 
 import scala.reflect.ClassTag
 
+import org.apache.spark.graphx.util.PeriodicGraphCheckpointer
 import org.apache.spark.internal.Logging
+import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.util.PeriodicRDDCheckpointer
 
 /**
  * Implements a Pregel-like bulk-synchronous message-passing API.
@@ -122,27 +125,39 @@ object Pregel extends Logging {
     require(maxIterations > 0, s"Maximum number of iterations must be greater than 0," +
       s" but got ${maxIterations}")
 
-    var g = graph.mapVertices((vid, vdata) => vprog(vid, vdata, initialMsg)).cache()
+    val checkpointInterval = graph.vertices.sparkContext.getConf
+      .getInt("spark.graphx.pregel.checkpointInterval", -1)
+    var g = graph.mapVertices((vid, vdata) => vprog(vid, vdata, initialMsg))
+    val graphCheckpointer = new PeriodicGraphCheckpointer[VD, ED](
+      checkpointInterval, graph.vertices.sparkContext)
+    graphCheckpointer.update(g)
+
     // compute the messages
     var messages = GraphXUtils.mapReduceTriplets(g, sendMsg, mergeMsg)
+    val messageCheckpointer = new PeriodicRDDCheckpointer[(VertexId, A)](
+      checkpointInterval, graph.vertices.sparkContext)
+    messageCheckpointer.update(messages.asInstanceOf[RDD[(VertexId, A)]])
     var activeMessages = messages.count()
+
     // Loop
     var prevG: Graph[VD, ED] = null
     var i = 0
     while (activeMessages > 0 && i < maxIterations) {
       // Receive the messages and update the vertices.
       prevG = g
-      g = g.joinVertices(messages)(vprog).cache()
+      g = g.joinVertices(messages)(vprog)
+      graphCheckpointer.update(g)
 
       val oldMessages = messages
       // Send new messages, skipping edges where neither side received a message. We must cache
       // messages so it can be materialized on the next line, allowing us to uncache the previous
       // iteration.
       messages = GraphXUtils.mapReduceTriplets(
-        g, sendMsg, mergeMsg, Some((oldMessages, activeDirection))).cache()
+        g, sendMsg, mergeMsg, Some((oldMessages, activeDirection)))
       // The call to count() materializes `messages` and the vertices of `g`. This hides oldMessages
       // (depended on by the vertices of g) and the vertices of prevG (depended on by oldMessages
       // and the vertices of g).
+      messageCheckpointer.update(messages.asInstanceOf[RDD[(VertexId, A)]])
       activeMessages = messages.count()
 
       logInfo("Pregel finished iteration " + i)
@@ -154,7 +169,9 @@ object Pregel extends Logging {
       // count the iteration
       i += 1
     }
-    messages.unpersist(blocking = false)
+    messageCheckpointer.unpersistDataSet()
+    graphCheckpointer.deleteAllCheckpoints()
+    messageCheckpointer.deleteAllCheckpoints()
     g
   } // end of apply
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/impl/PeriodicGraphCheckpointer.scala b/graphx/src/main/scala/org/apache/spark/graphx/util/PeriodicGraphCheckpointer.scala
similarity index 91%
rename from mllib/src/main/scala/org/apache/spark/mllib/impl/PeriodicGraphCheckpointer.scala
rename to graphx/src/main/scala/org/apache/spark/graphx/util/PeriodicGraphCheckpointer.scala
index 80074897567eb..fda501aa757d6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/impl/PeriodicGraphCheckpointer.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/util/PeriodicGraphCheckpointer.scala
@@ -15,11 +15,12 @@
  * limitations under the License.
  */
 
-package org.apache.spark.mllib.impl
+package org.apache.spark.graphx.util
 
 import org.apache.spark.SparkContext
 import org.apache.spark.graphx.Graph
 import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.PeriodicCheckpointer
 
 
 /**
@@ -74,9 +75,8 @@ import org.apache.spark.storage.StorageLevel
  * @tparam VD  Vertex descriptor type
  * @tparam ED  Edge descriptor type
  *
- * TODO: Move this out of MLlib?
  */
-private[mllib] class PeriodicGraphCheckpointer[VD, ED](
+private[spark] class PeriodicGraphCheckpointer[VD, ED](
     checkpointInterval: Int,
     sc: SparkContext)
   extends PeriodicCheckpointer[Graph[VD, ED]](checkpointInterval, sc) {
@@ -87,10 +87,13 @@ private[mllib] class PeriodicGraphCheckpointer[VD, ED](
 
   override protected def persist(data: Graph[VD, ED]): Unit = {
     if (data.vertices.getStorageLevel == StorageLevel.NONE) {
-      data.vertices.persist()
+      /* We need to use cache because persist does not honor the default storage level requested
+       * when constructing the graph. Only cache does that.
+       */
+      data.vertices.cache()
     }
     if (data.edges.getStorageLevel == StorageLevel.NONE) {
-      data.edges.persist()
+      data.edges.cache()
     }
   }
 
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/impl/PeriodicGraphCheckpointerSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/util/PeriodicGraphCheckpointerSuite.scala
similarity index 70%
rename from mllib/src/test/scala/org/apache/spark/mllib/impl/PeriodicGraphCheckpointerSuite.scala
rename to graphx/src/test/scala/org/apache/spark/graphx/util/PeriodicGraphCheckpointerSuite.scala
index a13e7f63a9296..e0c65e6940f66 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/impl/PeriodicGraphCheckpointerSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/util/PeriodicGraphCheckpointerSuite.scala
@@ -15,77 +15,81 @@
  * limitations under the License.
  */
 
-package org.apache.spark.mllib.impl
+package org.apache.spark.graphx.util
 
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.{SparkContext, SparkFunSuite}
-import org.apache.spark.graphx.{Edge, Graph}
-import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.graphx.{Edge, Graph, LocalSparkContext}
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.Utils
 
 
-class PeriodicGraphCheckpointerSuite extends SparkFunSuite with MLlibTestSparkContext {
+class PeriodicGraphCheckpointerSuite extends SparkFunSuite with LocalSparkContext {
 
   import PeriodicGraphCheckpointerSuite._
 
   test("Persisting") {
     var graphsToCheck = Seq.empty[GraphToCheck]
 
-    val graph1 = createGraph(sc)
-    val checkpointer =
-      new PeriodicGraphCheckpointer[Double, Double](10, graph1.vertices.sparkContext)
-    checkpointer.update(graph1)
-    graphsToCheck = graphsToCheck :+ GraphToCheck(graph1, 1)
-    checkPersistence(graphsToCheck, 1)
-
-    var iteration = 2
-    while (iteration < 9) {
-      val graph = createGraph(sc)
-      checkpointer.update(graph)
-      graphsToCheck = graphsToCheck :+ GraphToCheck(graph, iteration)
-      checkPersistence(graphsToCheck, iteration)
-      iteration += 1
+    withSpark { sc =>
+      val graph1 = createGraph(sc)
+      val checkpointer =
+        new PeriodicGraphCheckpointer[Double, Double](10, graph1.vertices.sparkContext)
+      checkpointer.update(graph1)
+      graphsToCheck = graphsToCheck :+ GraphToCheck(graph1, 1)
+      checkPersistence(graphsToCheck, 1)
+
+      var iteration = 2
+      while (iteration < 9) {
+        val graph = createGraph(sc)
+        checkpointer.update(graph)
+        graphsToCheck = graphsToCheck :+ GraphToCheck(graph, iteration)
+        checkPersistence(graphsToCheck, iteration)
+        iteration += 1
+      }
     }
   }
 
   test("Checkpointing") {
-    val tempDir = Utils.createTempDir()
-    val path = tempDir.toURI.toString
-    val checkpointInterval = 2
-    var graphsToCheck = Seq.empty[GraphToCheck]
-    sc.setCheckpointDir(path)
-    val graph1 = createGraph(sc)
-    val checkpointer = new PeriodicGraphCheckpointer[Double, Double](
-      checkpointInterval, graph1.vertices.sparkContext)
-    checkpointer.update(graph1)
-    graph1.edges.count()
-    graph1.vertices.count()
-    graphsToCheck = graphsToCheck :+ GraphToCheck(graph1, 1)
-    checkCheckpoint(graphsToCheck, 1, checkpointInterval)
-
-    var iteration = 2
-    while (iteration < 9) {
-      val graph = createGraph(sc)
-      checkpointer.update(graph)
-      graph.vertices.count()
-      graph.edges.count()
-      graphsToCheck = graphsToCheck :+ GraphToCheck(graph, iteration)
-      checkCheckpoint(graphsToCheck, iteration, checkpointInterval)
-      iteration += 1
-    }
+    withSpark { sc =>
+      val tempDir = Utils.createTempDir()
+      val path = tempDir.toURI.toString
+      val checkpointInterval = 2
+      var graphsToCheck = Seq.empty[GraphToCheck]
+      sc.setCheckpointDir(path)
+      val graph1 = createGraph(sc)
+      val checkpointer = new PeriodicGraphCheckpointer[Double, Double](
+        checkpointInterval, graph1.vertices.sparkContext)
+      checkpointer.update(graph1)
+      graph1.edges.count()
+      graph1.vertices.count()
+      graphsToCheck = graphsToCheck :+ GraphToCheck(graph1, 1)
+      checkCheckpoint(graphsToCheck, 1, checkpointInterval)
+
+      var iteration = 2
+      while (iteration < 9) {
+        val graph = createGraph(sc)
+        checkpointer.update(graph)
+        graph.vertices.count()
+        graph.edges.count()
+        graphsToCheck = graphsToCheck :+ GraphToCheck(graph, iteration)
+        checkCheckpoint(graphsToCheck, iteration, checkpointInterval)
+        iteration += 1
+      }
 
-    checkpointer.deleteAllCheckpoints()
-    graphsToCheck.foreach { graph =>
-      confirmCheckpointRemoved(graph.graph)
-    }
+      checkpointer.deleteAllCheckpoints()
+      graphsToCheck.foreach { graph =>
+        confirmCheckpointRemoved(graph.graph)
+      }
 
-    Utils.deleteRecursively(tempDir)
+      Utils.deleteRecursively(tempDir)
+    }
   }
 }
 
 private object PeriodicGraphCheckpointerSuite {
+  private val defaultStorageLevel = StorageLevel.MEMORY_ONLY_SER
 
   case class GraphToCheck(graph: Graph[Double, Double], gIndex: Int)
 
@@ -96,7 +100,8 @@ private object PeriodicGraphCheckpointerSuite {
     Edge[Double](3, 4, 0))
 
   def createGraph(sc: SparkContext): Graph[Double, Double] = {
-    Graph.fromEdges[Double, Double](sc.parallelize(edges), 0)
+    Graph.fromEdges[Double, Double](
+      sc.parallelize(edges), 0, defaultStorageLevel, defaultStorageLevel)
   }
 
   def checkPersistence(graphs: Seq[GraphToCheck], iteration: Int): Unit = {
@@ -116,8 +121,8 @@ private object PeriodicGraphCheckpointerSuite {
         assert(graph.vertices.getStorageLevel == StorageLevel.NONE)
         assert(graph.edges.getStorageLevel == StorageLevel.NONE)
       } else {
-        assert(graph.vertices.getStorageLevel != StorageLevel.NONE)
-        assert(graph.edges.getStorageLevel != StorageLevel.NONE)
+        assert(graph.vertices.getStorageLevel == defaultStorageLevel)
+        assert(graph.edges.getStorageLevel == defaultStorageLevel)
       }
     } catch {
       case _: AssertionError =>
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
index 2f50dc7c85f35..e3026c8efa823 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
@@ -36,7 +36,6 @@ import org.apache.spark.mllib.clustering.{DistributedLDAModel => OldDistributedL
   EMLDAOptimizer => OldEMLDAOptimizer, LDA => OldLDA, LDAModel => OldLDAModel,
   LDAOptimizer => OldLDAOptimizer, LocalLDAModel => OldLocalLDAModel,
   OnlineLDAOptimizer => OldOnlineLDAOptimizer}
-import org.apache.spark.mllib.impl.PeriodicCheckpointer
 import org.apache.spark.mllib.linalg.{Vector => OldVector, Vectors => OldVectors}
 import org.apache.spark.mllib.linalg.MatrixImplicits._
 import org.apache.spark.mllib.linalg.VectorImplicits._
@@ -45,9 +44,9 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
 import org.apache.spark.sql.functions.{col, monotonically_increasing_id, udf}
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.util.PeriodicCheckpointer
 import org.apache.spark.util.VersionUtils
 
-
 private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasMaxIter
   with HasSeed with HasCheckpointInterval {
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala
index 4c525c0714ec5..ce2bd7b430f43 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala
@@ -21,12 +21,12 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.ml.feature.LabeledPoint
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.regression.{DecisionTreeRegressionModel, DecisionTreeRegressor}
-import org.apache.spark.mllib.impl.PeriodicRDDCheckpointer
 import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo}
 import org.apache.spark.mllib.tree.configuration.{BoostingStrategy => OldBoostingStrategy}
 import org.apache.spark.mllib.tree.impurity.{Variance => OldVariance}
 import org.apache.spark.mllib.tree.loss.{Loss => OldLoss}
 import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.util.PeriodicRDDCheckpointer
 import org.apache.spark.storage.StorageLevel
 
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
index 48bae4276c480..3697a9b46dd84 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
@@ -25,7 +25,7 @@ import breeze.stats.distributions.{Gamma, RandBasis}
 
 import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.graphx._
-import org.apache.spark.mllib.impl.PeriodicGraphCheckpointer
+import org.apache.spark.graphx.util.PeriodicGraphCheckpointer
 import org.apache.spark.mllib.linalg.{DenseVector, Matrices, SparseVector, Vector, Vectors}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel

From 359382c038d5836e95ee3ca871f3d1da5bc08148 Mon Sep 17 00:00:00 2001
From: jerryshao <sshao@hortonworks.com>
Date: Tue, 25 Apr 2017 15:21:12 -0700
Subject: [PATCH 0700/1204] [SPARK-20239][CORE][2.1-BACKPORT] Improve
 HistoryServer's ACL mechanism

Current SHS (Spark History Server) has two different ACLs:

* ACL of base URL, it is controlled by "spark.acls.enabled" or "spark.ui.acls.enabled", and with this enabled, only user configured with "spark.admin.acls" (or group) or "spark.ui.view.acls" (or group), or the user who started SHS could list all the applications, otherwise none of them can be listed. This will also affect REST APIs which listing the summary of all apps and one app.
* Per application ACL. This is controlled by "spark.history.ui.acls.enabled". With this enabled only history admin user and user/group who ran this app can access the details of this app.

With this two ACLs, we may encounter several unexpected behaviors:

1. if base URL's ACL (`spark.acls.enable`) is enabled but user A has no view permission. User "A" cannot see the app list but could still access details of it's own app.
2. if ACLs of base URL (`spark.acls.enable`) is disabled, then user "A" could download any application's event log, even it is not run by user "A".
3. The changes of Live UI's ACL will affect History UI's ACL which share the same conf file.

The unexpected behaviors is mainly because we have two different ACLs, ideally we should have only one to manage all.

So to improve SHS's ACL mechanism, here in this PR proposed to:

1. Disable "spark.acls.enable" and only use "spark.history.ui.acls.enable" for history server.
2. Check permission for event-log download REST API.

With this PR:

1. Admin user could see/download the list of all applications, as well as application details.
2. Normal user could see the list of all applications, but can only download and check the details of applications accessible to him.

New UTs are added, also verified in real cluster.

CC tgravescs vanzin please help to review, this PR changes the semantics you did previously. Thanks a lot.

Author: jerryshao <sshao@hortonworks.com>

Closes #17755 from jerryshao/SPARK-20239-2.1-backport.
---
 .../history/ApplicationHistoryProvider.scala  |  4 ++--
 .../spark/deploy/history/HistoryServer.scala  | 20 ++++++++++++++++++-
 .../spark/status/api/v1/ApiRootResource.scala | 18 ++++++++++++++---
 .../deploy/history/HistoryServerSuite.scala   | 12 ++++++-----
 4 files changed, 43 insertions(+), 11 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
index d7d82800b8b55..6d8758a3d3b1d 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
@@ -86,7 +86,7 @@ private[history] abstract class ApplicationHistoryProvider {
    * @return Count of application event logs that are currently under process
    */
   def getEventLogsUnderProcess(): Int = {
-    return 0;
+    0
   }
 
   /**
@@ -95,7 +95,7 @@ private[history] abstract class ApplicationHistoryProvider {
    * @return 0 if this is undefined or unsupported, otherwise the last updated time in millis
    */
   def getLastUpdatedTime(): Long = {
-    return 0;
+    0
   }
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index 7e21fa681aa1e..b02992af7b049 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -269,7 +269,7 @@ object HistoryServer extends Logging {
     Utils.initDaemon(log)
     new HistoryServerArguments(conf, argStrings)
     initSecurity()
-    val securityManager = new SecurityManager(conf)
+    val securityManager = createSecurityManager(conf)
 
     val providerName = conf.getOption("spark.history.provider")
       .getOrElse(classOf[FsHistoryProvider].getName())
@@ -289,6 +289,24 @@ object HistoryServer extends Logging {
     while(true) { Thread.sleep(Int.MaxValue) }
   }
 
+  /**
+   * Create a security manager.
+   * This turns off security in the SecurityManager, so that the History Server can start
+   * in a Spark cluster where security is enabled.
+   * @param config configuration for the SecurityManager constructor
+   * @return the security manager for use in constructing the History Server.
+   */
+  private[history] def createSecurityManager(config: SparkConf): SecurityManager = {
+    if (config.getBoolean("spark.acls.enable", config.getBoolean("spark.ui.acls.enable", false))) {
+      logInfo("Either spark.acls.enable or spark.ui.acls.enable is configured, clearing it and " +
+        "only using spark.history.ui.acl.enable")
+      config.set("spark.acls.enable", "false")
+      config.set("spark.ui.acls.enable", "false")
+    }
+
+    new SecurityManager(config)
+  }
+
   def initSecurity() {
     // If we are accessing HDFS and it has security enabled (Kerberos), we have to login
     // from a keytab file so that we can access HDFS beyond the kerberos ticket expiration.
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala
index 67ccf43afa44a..d2df77f1c5dcf 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala
@@ -184,14 +184,27 @@ private[v1] class ApiRootResource extends ApiRequestContext {
   @Path("applications/{appId}/logs")
   def getEventLogs(
       @PathParam("appId") appId: String): EventLogDownloadResource = {
-    new EventLogDownloadResource(uiRoot, appId, None)
+    try {
+      // withSparkUI will throw NotFoundException if attemptId exists for this application.
+      // So we need to try again with attempt id "1".
+      withSparkUI(appId, None) { _ =>
+        new EventLogDownloadResource(uiRoot, appId, None)
+      }
+    } catch {
+      case _: NotFoundException =>
+        withSparkUI(appId, Some("1")) { _ =>
+          new EventLogDownloadResource(uiRoot, appId, None)
+        }
+    }
   }
 
   @Path("applications/{appId}/{attemptId}/logs")
   def getEventLogs(
       @PathParam("appId") appId: String,
       @PathParam("attemptId") attemptId: String): EventLogDownloadResource = {
-    new EventLogDownloadResource(uiRoot, appId, Some(attemptId))
+    withSparkUI(appId, Some(attemptId)) { _ =>
+      new EventLogDownloadResource(uiRoot, appId, Some(attemptId))
+    }
   }
 
   @Path("version")
@@ -276,7 +289,6 @@ private[v1] trait ApiRequestContext {
       case None => throw new NotFoundException("no such app: " + appId)
     }
   }
-
 }
 
 private[v1] class ForbiddenException(msg: String) extends WebApplicationException(
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
index 49be9b92ab198..c5830df4b6db0 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -545,12 +545,11 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
     assert(jobcount === getNumJobs("/jobs"))
 
     // no need to retain the test dir now the tests complete
-    logDir.deleteOnExit();
-
+    logDir.deleteOnExit()
   }
 
   test("ui and api authorization checks") {
-    val appId = "local-1422981759269"
+    val appId = "local-1430917381535"
     val owner = "irashid"
     val admin = "root"
     val other = "alice"
@@ -570,8 +569,11 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
 
     val port = server.boundPort
     val testUrls = Seq(
-      s"http://localhost:$port/api/v1/applications/$appId/jobs",
-      s"http://localhost:$port/history/$appId/jobs/")
+      s"http://localhost:$port/api/v1/applications/$appId/1/jobs",
+      s"http://localhost:$port/history/$appId/1/jobs/",
+      s"http://localhost:$port/api/v1/applications/$appId/logs",
+      s"http://localhost:$port/api/v1/applications/$appId/1/logs",
+      s"http://localhost:$port/api/v1/applications/$appId/2/logs")
 
     tests.foreach { case (user, expectedCode) =>
       testUrls.foreach { url =>

From 267aca5bd5042303a718d10635bc0d1a1596853f Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Tue, 25 Apr 2017 16:28:22 -0700
Subject: [PATCH 0701/1204] Preparing Spark release v2.1.1-rc4

---
 R/pkg/DESCRIPTION                         | 2 +-
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 4 ++--
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/java8-tests/pom.xml              | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mesos/pom.xml                             | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 yarn/pom.xml                              | 2 +-
 39 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 2d461ca68920b..1ceda7ba024c0 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 2.1.2
+Version: 2.1.1
 Title: R Frontend for Apache Spark
 Description: The SparkR package provides an R Frontend for Apache Spark.
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 6e092ef8928b6..cc290c03c9dfa 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 77a4b64e8da9d..ccf4b27b34a69 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 1a2d85a2ead6b..98a23249cc192 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 7a57e8964f6fc..dc1ad144dee6f 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index ff2d5c52730bf..250b69699332e 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index b9bf0342eb600..0697ed625b261 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index f8a0e577777ee..cedae5fc279cc 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index bad3655452fb4..28c4f95afe198 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index e21d011c4f83f..75f48a59ab152 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,8 +14,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.1.2-SNAPSHOT
-SPARK_VERSION_SHORT: 2.1.2
+SPARK_VERSION: 2.1.1
+SPARK_VERSION_SHORT: 2.1.1
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.7"
 MESOS_VERSION: 1.0.0
diff --git a/examples/pom.xml b/examples/pom.xml
index 8fa731fb340a2..72ee896f76238 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 2cf0b41ee3544..ac407dd48beb0 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 6ea318bf4af6e..92992e2f7081c 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index de3d17e9b9c05..7e0423a44b141 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 9361fdac11c5b..e1b86cec49c43 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/java8-tests/pom.xml b/external/java8-tests/pom.xml
index f73e4f0aabc29..8b0583a861e43 100644
--- a/external/java8-tests/pom.xml
+++ b/external/java8-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index 66a679661f1d3..1ca601e765a75 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index c84c0408f483a..7ae63a5fa5654 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index 961b80df50c5a..7a84764798244 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index e56ed102ac89a..9bf41c5cfc2af 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index e260e434f8dd3..940112f641b06 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index 72e14f58e38f9..e3305e91591b4 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index 182f963cdd03c..7610fad9f29e0 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index d6ba472a1fc99..2fd4fd53d1aac 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 87e34b8a4b00e..ac6692194a79b 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index db4b15b10499e..3917251515d37 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mesos/pom.xml b/mesos/pom.xml
index 262316a193cac..6d84d45f3be8f 100644
--- a/mesos/pom.xml
+++ b/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index dae5b86d5fcb8..01a4b86121ebe 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index be87ad2d1994b..44f189cb8c063 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index a66156c9050a2..a985cf011de4b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.1.2-SNAPSHOT</version>
+  <version>2.1.1</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 4447e3d9c7616..96b5e44bb320a 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.1.2.dev0"
+__version__ = "2.1.1"
diff --git a/repl/pom.xml b/repl/pom.xml
index 2cefaa191afdc..12142c89db7f9 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 4b4a8eb3815e1..53d961d70038b 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index a03a9593e8520..c11710f4dfd6e 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 1abc0a253098c..f7ea320c74ae7 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index b62f800277cee..fb61f1495df01 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 644fc50bf507b..ddad02f2bffed 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 11b58afdcac7a..193c0c5881715 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/yarn/pom.xml b/yarn/pom.xml
index e21df4ec1dc53..1933a0ebccf5d 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.2-SNAPSHOT</version>
+    <version>2.1.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 8460b09054c9aa488df4fd1e7461a75b7a646e4b Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Tue, 25 Apr 2017 16:28:26 -0700
Subject: [PATCH 0702/1204] Preparing development version 2.1.2-SNAPSHOT

---
 R/pkg/DESCRIPTION                         | 2 +-
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 4 ++--
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/java8-tests/pom.xml              | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mesos/pom.xml                             | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 yarn/pom.xml                              | 2 +-
 39 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 1ceda7ba024c0..2d461ca68920b 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 2.1.1
+Version: 2.1.2
 Title: R Frontend for Apache Spark
 Description: The SparkR package provides an R Frontend for Apache Spark.
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
diff --git a/assembly/pom.xml b/assembly/pom.xml
index cc290c03c9dfa..6e092ef8928b6 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index ccf4b27b34a69..77a4b64e8da9d 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 98a23249cc192..1a2d85a2ead6b 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index dc1ad144dee6f..7a57e8964f6fc 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 250b69699332e..ff2d5c52730bf 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 0697ed625b261..b9bf0342eb600 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index cedae5fc279cc..f8a0e577777ee 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index 28c4f95afe198..bad3655452fb4 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 75f48a59ab152..e21d011c4f83f 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,8 +14,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.1.1
-SPARK_VERSION_SHORT: 2.1.1
+SPARK_VERSION: 2.1.2-SNAPSHOT
+SPARK_VERSION_SHORT: 2.1.2
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.7"
 MESOS_VERSION: 1.0.0
diff --git a/examples/pom.xml b/examples/pom.xml
index 72ee896f76238..8fa731fb340a2 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index ac407dd48beb0..2cf0b41ee3544 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 92992e2f7081c..6ea318bf4af6e 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 7e0423a44b141..de3d17e9b9c05 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index e1b86cec49c43..9361fdac11c5b 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/java8-tests/pom.xml b/external/java8-tests/pom.xml
index 8b0583a861e43..f73e4f0aabc29 100644
--- a/external/java8-tests/pom.xml
+++ b/external/java8-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index 1ca601e765a75..66a679661f1d3 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index 7ae63a5fa5654..c84c0408f483a 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index 7a84764798244..961b80df50c5a 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index 9bf41c5cfc2af..e56ed102ac89a 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index 940112f641b06..e260e434f8dd3 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index e3305e91591b4..72e14f58e38f9 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index 7610fad9f29e0..182f963cdd03c 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 2fd4fd53d1aac..d6ba472a1fc99 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index ac6692194a79b..87e34b8a4b00e 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index 3917251515d37..db4b15b10499e 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mesos/pom.xml b/mesos/pom.xml
index 6d84d45f3be8f..262316a193cac 100644
--- a/mesos/pom.xml
+++ b/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 01a4b86121ebe..dae5b86d5fcb8 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 44f189cb8c063..be87ad2d1994b 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index a985cf011de4b..a66156c9050a2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.1.1</version>
+  <version>2.1.2-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 96b5e44bb320a..4447e3d9c7616 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.1.1"
+__version__ = "2.1.2.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 12142c89db7f9..2cefaa191afdc 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 53d961d70038b..4b4a8eb3815e1 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index c11710f4dfd6e..a03a9593e8520 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index f7ea320c74ae7..1abc0a253098c 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index fb61f1495df01..b62f800277cee 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index ddad02f2bffed..644fc50bf507b 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 193c0c5881715..11b58afdcac7a 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 1933a0ebccf5d..e21df4ec1dc53 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.1</version>
+    <version>2.1.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From f0de600797ff4883927d0c70732675fd8629e239 Mon Sep 17 00:00:00 2001
From: Sameer Agarwal <sameerag@cs.berkeley.edu>
Date: Tue, 25 Apr 2017 17:05:20 -0700
Subject: [PATCH 0703/1204] [SPARK-18127] Add hooks and extension points to
 Spark

## What changes were proposed in this pull request?

This patch adds support for customizing the spark session by injecting user-defined custom extensions. This allows a user to add custom analyzer rules/checks, optimizer rules, planning strategies or even a customized parser.

## How was this patch tested?

Unit Tests in SparkSessionExtensionSuite

Author: Sameer Agarwal <sameerag@cs.berkeley.edu>

Closes #17724 from sameeragarwal/session-extensions.

(cherry picked from commit caf392025ce21d701b503112060fa016d5eabe04)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../sql/catalyst/parser/ParseDriver.scala     |   9 +-
 .../sql/catalyst/parser/ParserInterface.scala |  35 +++-
 .../spark/sql/internal/StaticSQLConf.scala    |   6 +
 .../org/apache/spark/sql/SparkSession.scala   |  45 ++++-
 .../spark/sql/SparkSessionExtensions.scala    | 171 ++++++++++++++++++
 .../internal/BaseSessionStateBuilder.scala    |  33 +++-
 .../sql/SparkSessionExtensionSuite.scala      | 144 +++++++++++++++
 7 files changed, 418 insertions(+), 25 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index 80ab75cc17fab..dcccbd0ed8d6b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -34,8 +34,7 @@ import org.apache.spark.sql.types.{DataType, StructType}
 abstract class AbstractSqlParser extends ParserInterface with Logging {
 
   /** Creates/Resolves DataType for a given SQL string. */
-  def parseDataType(sqlText: String): DataType = parse(sqlText) { parser =>
-    // TODO add this to the parser interface.
+  override def parseDataType(sqlText: String): DataType = parse(sqlText) { parser =>
     astBuilder.visitSingleDataType(parser.singleDataType())
   }
 
@@ -50,8 +49,10 @@ abstract class AbstractSqlParser extends ParserInterface with Logging {
   }
 
   /** Creates FunctionIdentifier for a given SQL string. */
-  def parseFunctionIdentifier(sqlText: String): FunctionIdentifier = parse(sqlText) { parser =>
-    astBuilder.visitSingleFunctionIdentifier(parser.singleFunctionIdentifier())
+  override def parseFunctionIdentifier(sqlText: String): FunctionIdentifier = {
+    parse(sqlText) { parser =>
+      astBuilder.visitSingleFunctionIdentifier(parser.singleFunctionIdentifier())
+    }
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
index db3598bde04d3..75240d2196222 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
@@ -17,30 +17,51 @@
 
 package org.apache.spark.sql.catalyst.parser
 
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.types.{DataType, StructType}
 
 /**
  * Interface for a parser.
  */
+@DeveloperApi
 trait ParserInterface {
-  /** Creates LogicalPlan for a given SQL string. */
+  /**
+   * Parse a string to a [[LogicalPlan]].
+   */
+  @throws[ParseException]("Text cannot be parsed to a LogicalPlan")
   def parsePlan(sqlText: String): LogicalPlan
 
-  /** Creates Expression for a given SQL string. */
+  /**
+   * Parse a string to an [[Expression]].
+   */
+  @throws[ParseException]("Text cannot be parsed to an Expression")
   def parseExpression(sqlText: String): Expression
 
-  /** Creates TableIdentifier for a given SQL string. */
+  /**
+   * Parse a string to a [[TableIdentifier]].
+   */
+  @throws[ParseException]("Text cannot be parsed to a TableIdentifier")
   def parseTableIdentifier(sqlText: String): TableIdentifier
 
-  /** Creates FunctionIdentifier for a given SQL string. */
+  /**
+   * Parse a string to a [[FunctionIdentifier]].
+   */
+  @throws[ParseException]("Text cannot be parsed to a FunctionIdentifier")
   def parseFunctionIdentifier(sqlText: String): FunctionIdentifier
 
   /**
-   * Creates StructType for a given SQL string, which is a comma separated list of field
-   * definitions which will preserve the correct Hive metadata.
+   * Parse a string to a [[StructType]]. The passed SQL string should be a comma separated list
+   * of field definitions which will preserve the correct Hive metadata.
    */
+  @throws[ParseException]("Text cannot be parsed to a schema")
   def parseTableSchema(sqlText: String): StructType
+
+  /**
+   * Parse a string to a [[DataType]].
+   */
+  @throws[ParseException]("Text cannot be parsed to a DataType")
+  def parseDataType(sqlText: String): DataType
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
index af1a9cee2962a..c6c0a605d89ff 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
@@ -81,4 +81,10 @@ object StaticSQLConf {
         "SQL configuration and the current database.")
       .booleanConf
       .createWithDefault(false)
+
+  val SPARK_SESSION_EXTENSIONS = buildStaticConf("spark.sql.extensions")
+    .doc("Name of the class used to configure Spark Session extensions. The class should " +
+      "implement Function1[SparkSessionExtension, Unit], and must have a no-args constructor.")
+    .stringConf
+    .createOptional
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 95f3463dfe62b..a519492ed8f4f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -38,7 +38,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Range}
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.execution.ui.SQLListener
-import org.apache.spark.sql.internal.{BaseSessionStateBuilder, CatalogImpl, SessionState, SessionStateBuilder, SharedState}
+import org.apache.spark.sql.internal._
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.sql.streaming._
@@ -77,11 +77,12 @@ import org.apache.spark.util.Utils
 class SparkSession private(
     @transient val sparkContext: SparkContext,
     @transient private val existingSharedState: Option[SharedState],
-    @transient private val parentSessionState: Option[SessionState])
+    @transient private val parentSessionState: Option[SessionState],
+    @transient private[sql] val extensions: SparkSessionExtensions)
   extends Serializable with Closeable with Logging { self =>
 
   private[sql] def this(sc: SparkContext) {
-    this(sc, None, None)
+    this(sc, None, None, new SparkSessionExtensions)
   }
 
   sparkContext.assertNotStopped()
@@ -219,7 +220,7 @@ class SparkSession private(
    * @since 2.0.0
    */
   def newSession(): SparkSession = {
-    new SparkSession(sparkContext, Some(sharedState), parentSessionState = None)
+    new SparkSession(sparkContext, Some(sharedState), parentSessionState = None, extensions)
   }
 
   /**
@@ -235,7 +236,7 @@ class SparkSession private(
    * implementation is Hive, this will initialize the metastore, which may take some time.
    */
   private[sql] def cloneSession(): SparkSession = {
-    val result = new SparkSession(sparkContext, Some(sharedState), Some(sessionState))
+    val result = new SparkSession(sparkContext, Some(sharedState), Some(sessionState), extensions)
     result.sessionState // force copy of SessionState
     result
   }
@@ -754,6 +755,8 @@ object SparkSession {
 
     private[this] val options = new scala.collection.mutable.HashMap[String, String]
 
+    private[this] val extensions = new SparkSessionExtensions
+
     private[this] var userSuppliedContext: Option[SparkContext] = None
 
     private[spark] def sparkContext(sparkContext: SparkContext): Builder = synchronized {
@@ -847,6 +850,17 @@ object SparkSession {
       }
     }
 
+    /**
+     * Inject extensions into the [[SparkSession]]. This allows a user to add Analyzer rules,
+     * Optimizer rules, Planning Strategies or a customized parser.
+     *
+     * @since 2.2.0
+     */
+    def withExtensions(f: SparkSessionExtensions => Unit): Builder = {
+      f(extensions)
+      this
+    }
+
     /**
      * Gets an existing [[SparkSession]] or, if there is no existing one, creates a new
      * one based on the options set in this builder.
@@ -903,7 +917,26 @@ object SparkSession {
           }
           sc
         }
-        session = new SparkSession(sparkContext)
+
+        // Initialize extensions if the user has defined a configurator class.
+        val extensionConfOption = sparkContext.conf.get(StaticSQLConf.SPARK_SESSION_EXTENSIONS)
+        if (extensionConfOption.isDefined) {
+          val extensionConfClassName = extensionConfOption.get
+          try {
+            val extensionConfClass = Utils.classForName(extensionConfClassName)
+            val extensionConf = extensionConfClass.newInstance()
+              .asInstanceOf[SparkSessionExtensions => Unit]
+            extensionConf(extensions)
+          } catch {
+            // Ignore the error if we cannot find the class or when the class has the wrong type.
+            case e @ (_: ClassCastException |
+                      _: ClassNotFoundException |
+                      _: NoClassDefFoundError) =>
+              logWarning(s"Cannot use $extensionConfClassName to configure session extensions.", e)
+          }
+        }
+
+        session = new SparkSession(sparkContext, None, None, extensions)
         options.foreach { case (k, v) => session.sessionState.conf.setConfString(k, v) }
         defaultSession.set(session)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
new file mode 100644
index 0000000000000..f99c108161f94
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import scala.collection.mutable
+
+import org.apache.spark.annotation.{DeveloperApi, Experimental, InterfaceStability}
+import org.apache.spark.sql.catalyst.parser.ParserInterface
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
+
+/**
+ * :: Experimental ::
+ * Holder for injection points to the [[SparkSession]]. We make NO guarantee about the stability
+ * regarding binary compatibility and source compatibility of methods here.
+ *
+ * This current provides the following extension points:
+ * - Analyzer Rules.
+ * - Check Analysis Rules
+ * - Optimizer Rules.
+ * - Planning Strategies.
+ * - Customized Parser.
+ * - (External) Catalog listeners.
+ *
+ * The extensions can be used by calling withExtension on the [[SparkSession.Builder]], for
+ * example:
+ * {{{
+ *   SparkSession.builder()
+ *     .master("...")
+ *     .conf("...", true)
+ *     .withExtensions { extensions =>
+ *       extensions.injectResolutionRule { session =>
+ *         ...
+ *       }
+ *       extensions.injectParser { (session, parser) =>
+ *         ...
+ *       }
+ *     }
+ *     .getOrCreate()
+ * }}}
+ *
+ * Note that none of the injected builders should assume that the [[SparkSession]] is fully
+ * initialized and should not touch the session's internals (e.g. the SessionState).
+ */
+@DeveloperApi
+@Experimental
+@InterfaceStability.Unstable
+class SparkSessionExtensions {
+  type RuleBuilder = SparkSession => Rule[LogicalPlan]
+  type CheckRuleBuilder = SparkSession => LogicalPlan => Unit
+  type StrategyBuilder = SparkSession => Strategy
+  type ParserBuilder = (SparkSession, ParserInterface) => ParserInterface
+
+  private[this] val resolutionRuleBuilders = mutable.Buffer.empty[RuleBuilder]
+
+  /**
+   * Build the analyzer resolution `Rule`s using the given [[SparkSession]].
+   */
+  private[sql] def buildResolutionRules(session: SparkSession): Seq[Rule[LogicalPlan]] = {
+    resolutionRuleBuilders.map(_.apply(session))
+  }
+
+  /**
+   * Inject an analyzer resolution `Rule` builder into the [[SparkSession]]. These analyzer
+   * rules will be executed as part of the resolution phase of analysis.
+   */
+  def injectResolutionRule(builder: RuleBuilder): Unit = {
+    resolutionRuleBuilders += builder
+  }
+
+  private[this] val postHocResolutionRuleBuilders = mutable.Buffer.empty[RuleBuilder]
+
+  /**
+   * Build the analyzer post-hoc resolution `Rule`s using the given [[SparkSession]].
+   */
+  private[sql] def buildPostHocResolutionRules(session: SparkSession): Seq[Rule[LogicalPlan]] = {
+    postHocResolutionRuleBuilders.map(_.apply(session))
+  }
+
+  /**
+   * Inject an analyzer `Rule` builder into the [[SparkSession]]. These analyzer
+   * rules will be executed after resolution.
+   */
+  def injectPostHocResolutionRule(builder: RuleBuilder): Unit = {
+    postHocResolutionRuleBuilders += builder
+  }
+
+  private[this] val checkRuleBuilders = mutable.Buffer.empty[CheckRuleBuilder]
+
+  /**
+   * Build the check analysis `Rule`s using the given [[SparkSession]].
+   */
+  private[sql] def buildCheckRules(session: SparkSession): Seq[LogicalPlan => Unit] = {
+    checkRuleBuilders.map(_.apply(session))
+  }
+
+  /**
+   * Inject an check analysis `Rule` builder into the [[SparkSession]]. The injected rules will
+   * be executed after the analysis phase. A check analysis rule is used to detect problems with a
+   * LogicalPlan and should throw an exception when a problem is found.
+   */
+  def injectCheckRule(builder: CheckRuleBuilder): Unit = {
+    checkRuleBuilders += builder
+  }
+
+  private[this] val optimizerRules = mutable.Buffer.empty[RuleBuilder]
+
+  private[sql] def buildOptimizerRules(session: SparkSession): Seq[Rule[LogicalPlan]] = {
+    optimizerRules.map(_.apply(session))
+  }
+
+  /**
+   * Inject an optimizer `Rule` builder into the [[SparkSession]]. The injected rules will be
+   * executed during the operator optimization batch. An optimizer rule is used to improve the
+   * quality of an analyzed logical plan; these rules should never modify the result of the
+   * LogicalPlan.
+   */
+  def injectOptimizerRule(builder: RuleBuilder): Unit = {
+    optimizerRules += builder
+  }
+
+  private[this] val plannerStrategyBuilders = mutable.Buffer.empty[StrategyBuilder]
+
+  private[sql] def buildPlannerStrategies(session: SparkSession): Seq[Strategy] = {
+    plannerStrategyBuilders.map(_.apply(session))
+  }
+
+  /**
+   * Inject a planner `Strategy` builder into the [[SparkSession]]. The injected strategy will
+   * be used to convert a `LogicalPlan` into a executable
+   * [[org.apache.spark.sql.execution.SparkPlan]].
+   */
+  def injectPlannerStrategy(builder: StrategyBuilder): Unit = {
+    plannerStrategyBuilders += builder
+  }
+
+  private[this] val parserBuilders = mutable.Buffer.empty[ParserBuilder]
+
+  private[sql] def buildParser(
+      session: SparkSession,
+      initial: ParserInterface): ParserInterface = {
+    parserBuilders.foldLeft(initial) { (parser, builder) =>
+      builder(session, parser)
+    }
+  }
+
+  /**
+   * Inject a custom parser into the [[SparkSession]]. Note that the builder is passed a session
+   * and an initial parser. The latter allows for a user to create a partial parser and to delegate
+   * to the underlying parser for completeness. If a user injects more parsers, then the parsers
+   * are stacked on top of each other.
+   */
+  def injectParser(builder: ParserBuilder): Unit = {
+    parserBuilders += builder
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index df7c3678b7807..2a801d87b12eb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -18,8 +18,8 @@ package org.apache.spark.sql.internal
 
 import org.apache.spark.SparkConf
 import org.apache.spark.annotation.{Experimental, InterfaceStability}
-import org.apache.spark.sql.{ExperimentalMethods, SparkSession, Strategy, UDFRegistration}
-import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry, ResolveTimeZone}
+import org.apache.spark.sql.{ExperimentalMethods, SparkSession, UDFRegistration, _}
+import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry}
 import org.apache.spark.sql.catalyst.catalog.SessionCatalog
 import org.apache.spark.sql.catalyst.optimizer.Optimizer
 import org.apache.spark.sql.catalyst.parser.ParserInterface
@@ -63,6 +63,11 @@ abstract class BaseSessionStateBuilder(
    */
   protected def newBuilder: NewBuilder
 
+  /**
+   * Session extensions defined in the [[SparkSession]].
+   */
+  protected def extensions: SparkSessionExtensions = session.extensions
+
   /**
    * Extract entries from `SparkConf` and put them in the `SQLConf`
    */
@@ -108,7 +113,9 @@ abstract class BaseSessionStateBuilder(
    *
    * Note: this depends on the `conf` field.
    */
-  protected lazy val sqlParser: ParserInterface = new SparkSqlParser(conf)
+  protected lazy val sqlParser: ParserInterface = {
+    extensions.buildParser(session, new SparkSqlParser(conf))
+  }
 
   /**
    * ResourceLoader that is used to load function resources and jars.
@@ -171,7 +178,9 @@ abstract class BaseSessionStateBuilder(
    *
    * Note that this may NOT depend on the `analyzer` function.
    */
-  protected def customResolutionRules: Seq[Rule[LogicalPlan]] = Nil
+  protected def customResolutionRules: Seq[Rule[LogicalPlan]] = {
+    extensions.buildResolutionRules(session)
+  }
 
   /**
    * Custom post resolution rules to add to the Analyzer. Prefer overriding this instead of
@@ -179,7 +188,9 @@ abstract class BaseSessionStateBuilder(
    *
    * Note that this may NOT depend on the `analyzer` function.
    */
-  protected def customPostHocResolutionRules: Seq[Rule[LogicalPlan]] = Nil
+  protected def customPostHocResolutionRules: Seq[Rule[LogicalPlan]] = {
+    extensions.buildPostHocResolutionRules(session)
+  }
 
   /**
    * Custom check rules to add to the Analyzer. Prefer overriding this instead of creating
@@ -187,7 +198,9 @@ abstract class BaseSessionStateBuilder(
    *
    * Note that this may NOT depend on the `analyzer` function.
    */
-  protected def customCheckRules: Seq[LogicalPlan => Unit] = Nil
+  protected def customCheckRules: Seq[LogicalPlan => Unit] = {
+    extensions.buildCheckRules(session)
+  }
 
   /**
    * Logical query plan optimizer.
@@ -207,7 +220,9 @@ abstract class BaseSessionStateBuilder(
    *
    * Note that this may NOT depend on the `optimizer` function.
    */
-  protected def customOperatorOptimizationRules: Seq[Rule[LogicalPlan]] = Nil
+  protected def customOperatorOptimizationRules: Seq[Rule[LogicalPlan]] = {
+    extensions.buildOptimizerRules(session)
+  }
 
   /**
    * Planner that converts optimized logical plans to physical plans.
@@ -227,7 +242,9 @@ abstract class BaseSessionStateBuilder(
    *
    * Note that this may NOT depend on the `planner` function.
    */
-  protected def customPlanningStrategies: Seq[Strategy] = Nil
+  protected def customPlanningStrategies: Seq[Strategy] = {
+    extensions.buildPlannerStrategies(session)
+  }
 
   /**
    * Create a query execution object.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
new file mode 100644
index 0000000000000..43db79663322a
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParserInterface}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.execution.{SparkPlan, SparkStrategy}
+import org.apache.spark.sql.types.{DataType, StructType}
+
+/**
+ * Test cases for the [[SparkSessionExtensions]].
+ */
+class SparkSessionExtensionSuite extends SparkFunSuite {
+  type ExtensionsBuilder = SparkSessionExtensions => Unit
+  private def create(builder: ExtensionsBuilder): ExtensionsBuilder = builder
+
+  private def stop(spark: SparkSession): Unit = {
+    spark.stop()
+    SparkSession.clearActiveSession()
+    SparkSession.clearDefaultSession()
+  }
+
+  private def withSession(builder: ExtensionsBuilder)(f: SparkSession => Unit): Unit = {
+    val spark = SparkSession.builder().master("local[1]").withExtensions(builder).getOrCreate()
+    try f(spark) finally {
+      stop(spark)
+    }
+  }
+
+  test("inject analyzer rule") {
+    withSession(_.injectResolutionRule(MyRule)) { session =>
+      assert(session.sessionState.analyzer.extendedResolutionRules.contains(MyRule(session)))
+    }
+  }
+
+  test("inject check analysis rule") {
+    withSession(_.injectCheckRule(MyCheckRule)) { session =>
+      assert(session.sessionState.analyzer.extendedCheckRules.contains(MyCheckRule(session)))
+    }
+  }
+
+  test("inject optimizer rule") {
+    withSession(_.injectOptimizerRule(MyRule)) { session =>
+      assert(session.sessionState.optimizer.batches.flatMap(_.rules).contains(MyRule(session)))
+    }
+  }
+
+  test("inject spark planner strategy") {
+    withSession(_.injectPlannerStrategy(MySparkStrategy)) { session =>
+      assert(session.sessionState.planner.strategies.contains(MySparkStrategy(session)))
+    }
+  }
+
+  test("inject parser") {
+    val extension = create { extensions =>
+      extensions.injectParser((_, _) => CatalystSqlParser)
+    }
+    withSession(extension) { session =>
+      assert(session.sessionState.sqlParser == CatalystSqlParser)
+    }
+  }
+
+  test("inject stacked parsers") {
+    val extension = create { extensions =>
+      extensions.injectParser((_, _) => CatalystSqlParser)
+      extensions.injectParser(MyParser)
+      extensions.injectParser(MyParser)
+    }
+    withSession(extension) { session =>
+      val parser = MyParser(session, MyParser(session, CatalystSqlParser))
+      assert(session.sessionState.sqlParser == parser)
+    }
+  }
+
+  test("use custom class for extensions") {
+    val session = SparkSession.builder()
+      .master("local[1]")
+      .config("spark.sql.extensions", classOf[MyExtensions].getCanonicalName)
+      .getOrCreate()
+    try {
+      assert(session.sessionState.planner.strategies.contains(MySparkStrategy(session)))
+      assert(session.sessionState.analyzer.extendedResolutionRules.contains(MyRule(session)))
+    } finally {
+      stop(session)
+    }
+  }
+}
+
+case class MyRule(spark: SparkSession) extends Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = plan
+}
+
+case class MyCheckRule(spark: SparkSession) extends (LogicalPlan => Unit) {
+  override def apply(plan: LogicalPlan): Unit = { }
+}
+
+case class MySparkStrategy(spark: SparkSession) extends SparkStrategy {
+  override def apply(plan: LogicalPlan): Seq[SparkPlan] = Seq.empty
+}
+
+case class MyParser(spark: SparkSession, delegate: ParserInterface) extends ParserInterface {
+  override def parsePlan(sqlText: String): LogicalPlan =
+    delegate.parsePlan(sqlText)
+
+  override def parseExpression(sqlText: String): Expression =
+    delegate.parseExpression(sqlText)
+
+  override def parseTableIdentifier(sqlText: String): TableIdentifier =
+    delegate.parseTableIdentifier(sqlText)
+
+  override def parseFunctionIdentifier(sqlText: String): FunctionIdentifier =
+    delegate.parseFunctionIdentifier(sqlText)
+
+  override def parseTableSchema(sqlText: String): StructType =
+    delegate.parseTableSchema(sqlText)
+
+  override def parseDataType(sqlText: String): DataType =
+    delegate.parseDataType(sqlText)
+}
+
+class MyExtensions extends (SparkSessionExtensions => Unit) {
+  def apply(e: SparkSessionExtensions): Unit = {
+    e.injectPlannerStrategy(MySparkStrategy)
+    e.injectResolutionRule(MyRule)
+  }
+}

From 6696ad0e8ce196a27e2908108f6e7eb7661affc4 Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Wed, 26 Apr 2017 11:39:10 +0800
Subject: [PATCH 0704/1204] [SPARK-20439][SQL][BACKPORT-2.1] Fix Catalog API
 listTables and getTable when failed to fetch table metadata

### What changes were proposed in this pull request?

This PR is to backport https://github.com/apache/spark/pull/17730 to Spark 2.1
--- --
`spark.catalog.listTables` and `spark.catalog.getTable` does not work if we are unable to retrieve table metadata due to any reason (e.g., table serde class is not accessible or the table type is not accepted by Spark SQL). After this PR, the APIs still return the corresponding Table without the description and tableType)

### How was this patch tested?
Added a test case

Author: Xiao Li <gatorsmile@gmail.com>

Closes #17760 from gatorsmile/backport-17730.
---
 .../spark/sql/internal/CatalogImpl.scala      | 28 +++++++++++++++----
 .../sql/hive/execution/HiveDDLSuite.scala     |  8 ++++++
 2 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
index 9d0b2141d453f..c3c3513090730 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.internal
 
 import scala.collection.JavaConverters._
 import scala.reflect.runtime.universe.TypeTag
+import scala.util.control.NonFatal
 
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.sql._
@@ -99,14 +100,27 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
     CatalogImpl.makeDataset(tables, sparkSession)
   }
 
+  /**
+   * Returns a Table for the given table/view or temporary view.
+   *
+   * Note that this function requires the table already exists in the Catalog.
+   *
+   * If the table metadata retrieval failed due to any reason (e.g., table serde class
+   * is not accessible or the table type is not accepted by Spark SQL), this function
+   * still returns the corresponding Table without the description and tableType)
+   */
   private def makeTable(tableIdent: TableIdentifier): Table = {
-    val metadata = sessionCatalog.getTempViewOrPermanentTableMetadata(tableIdent)
+    val metadata = try {
+      Some(sessionCatalog.getTempViewOrPermanentTableMetadata(tableIdent))
+    } catch {
+      case NonFatal(_) => None
+    }
     val isTemp = sessionCatalog.isTemporaryTable(tableIdent)
     new Table(
       name = tableIdent.table,
-      database = metadata.identifier.database.orNull,
-      description = metadata.comment.orNull,
-      tableType = if (isTemp) "TEMPORARY" else metadata.tableType.name,
+      database = metadata.map(_.identifier.database).getOrElse(tableIdent.database).orNull,
+      description = metadata.map(_.comment.orNull).orNull,
+      tableType = if (isTemp) "TEMPORARY" else metadata.map(_.tableType.name).orNull,
       isTemporary = isTemp)
   }
 
@@ -197,7 +211,11 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
    * `AnalysisException` when no `Table` can be found.
    */
   override def getTable(dbName: String, tableName: String): Table = {
-    makeTable(TableIdentifier(tableName, Option(dbName)))
+    if (tableExists(dbName, tableName)) {
+      makeTable(TableIdentifier(tableName, Option(dbName)))
+    } else {
+      throw new AnalysisException(s"Table or view '$tableName' not found in database '$dbName'")
+    }
   }
 
   /**
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 3b9437da372c2..037ab470f3032 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -1060,6 +1060,14 @@ class HiveDDLSuite
           s"CREATE INDEX $indexName ON TABLE $tabName (a) AS 'COMPACT' WITH DEFERRED REBUILD")
         val indexTabName =
           spark.sessionState.catalog.listTables("default", s"*$indexName*").head.table
+
+        // Even if index tables exist, listTables and getTable APIs should still work
+        checkAnswer(
+          spark.catalog.listTables().toDF(),
+          Row(indexTabName, "default", null, null, false) ::
+            Row(tabName, "default", null, "MANAGED", false) :: Nil)
+        assert(spark.catalog.getTable("default", indexTabName).name === indexTabName)
+
         intercept[TableAlreadyExistsException] {
           sql(s"CREATE TABLE $indexTabName(b int)")
         }

From c8803c06854683c8761fdb3c0e4c55d5a9e22a95 Mon Sep 17 00:00:00 2001
From: Eric Wasserman <ericw@sgn.com>
Date: Wed, 26 Apr 2017 11:42:43 +0800
Subject: [PATCH 0705/1204] [SPARK-16548][SQL] Inconsistent error handling in
 JSON parsing SQL functions

## What changes were proposed in this pull request?

change to using Jackson's `com.fasterxml.jackson.core.JsonFactory`

    public JsonParser createParser(String content)

## How was this patch tested?

existing unit tests

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Eric Wasserman <ericw@sgn.com>

Closes #17693 from ewasserman/SPARK-20314.

(cherry picked from commit 57e1da39464131329318b723caa54df9f55fa54f)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/expressions/jsonExpressions.scala  | 12 +++++++++---
 .../expressions/JsonExpressionsSuite.scala      | 17 +++++++++++++++++
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index df4d406b84d60..9fb0ea68153d2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import java.io.{ByteArrayOutputStream, CharArrayWriter, StringWriter}
+import java.io.{ByteArrayInputStream, ByteArrayOutputStream, CharArrayWriter, InputStreamReader, StringWriter}
 
 import scala.util.parsing.combinator.RegexParsers
 
@@ -149,7 +149,10 @@ case class GetJsonObject(json: Expression, path: Expression)
 
     if (parsed.isDefined) {
       try {
-        Utils.tryWithResource(jsonFactory.createParser(jsonStr.getBytes)) { parser =>
+        /* We know the bytes are UTF-8 encoded. Pass a Reader to avoid having Jackson
+          detect character encoding which could fail for some malformed strings */
+        Utils.tryWithResource(jsonFactory.createParser(new InputStreamReader(
+            new ByteArrayInputStream(jsonStr.getBytes), "UTF-8"))) { parser =>
           val output = new ByteArrayOutputStream()
           val matched = Utils.tryWithResource(
             jsonFactory.createGenerator(output, JsonEncoding.UTF8)) { generator =>
@@ -393,7 +396,10 @@ case class JsonTuple(children: Seq[Expression])
     }
 
     try {
-      Utils.tryWithResource(jsonFactory.createParser(json.getBytes)) {
+      /* We know the bytes are UTF-8 encoded. Pass a Reader to avoid having Jackson
+      detect character encoding which could fail for some malformed strings */
+      Utils.tryWithResource(jsonFactory.createParser(new InputStreamReader(
+          new ByteArrayInputStream(json.getBytes), "UTF-8"))) {
         parser => parseRow(parser, input)
       }
     } catch {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
index c5b72235e5db0..4402ad4e9a9e5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
@@ -39,6 +39,10 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       |"fb:testid":"1234"}
       |""".stripMargin
 
+  /* invalid json with leading nulls would trigger java.io.CharConversionException
+   in Jackson's JsonFactory.createParser(byte[]) due to RFC-4627 encoding detection */
+  val badJson = "\0\0\0A\1AAA"
+
   test("$.store.bicycle") {
     checkEvaluation(
       GetJsonObject(Literal(json), Literal("$.store.bicycle")),
@@ -224,6 +228,13 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       null)
   }
 
+  test("SPARK-16548: character conversion") {
+    checkEvaluation(
+      GetJsonObject(Literal(badJson), Literal("$.a")),
+      null
+    )
+  }
+
   test("non foldable literal") {
     checkEvaluation(
       GetJsonObject(NonFoldableLiteral(json), NonFoldableLiteral("$.fb:testid")),
@@ -340,6 +351,12 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       InternalRow(null, null, null, null, null))
   }
 
+  test("SPARK-16548: json_tuple - invalid json with leading nulls") {
+    checkJsonTuple(
+      JsonTuple(Literal(badJson) :: jsonTupleQuery),
+      InternalRow(null, null, null, null, null))
+  }
+
   test("json_tuple - preserve newlines") {
     checkJsonTuple(
       JsonTuple(Literal("{\"a\":\"b\nc\"}") :: Literal("a") :: Nil),

From a2f5ced3236db665bb33adc1bf1f90553997f46b Mon Sep 17 00:00:00 2001
From: anabranch <bill@databricks.com>
Date: Wed, 26 Apr 2017 09:49:05 +0100
Subject: [PATCH 0706/1204] [SPARK-20400][DOCS] Remove References to 3rd Party
 Vendor Tools

## What changes were proposed in this pull request?

Simple documentation change to remove explicit vendor references.

## How was this patch tested?

NA

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: anabranch <bill@databricks.com>

Closes #17695 from anabranch/remove-vendor.

(cherry picked from commit 7a365257e934e838bd90f6a0c50362bf47202b0e)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/configuration.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index 87b76322cae51..8b53e92ccd416 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -2270,8 +2270,8 @@ should be included on Spark's classpath:
 * `hdfs-site.xml`, which provides default behaviors for the HDFS client.
 * `core-site.xml`, which sets the default filesystem name.
 
-The location of these configuration files varies across CDH and HDP versions, but
-a common location is inside of `/etc/hadoop/conf`. Some tools, such as Cloudera Manager, create
+The location of these configuration files varies across Hadoop versions, but
+a common location is inside of `/etc/hadoop/conf`. Some tools create
 configurations on-the-fly, but offer a mechanisms to download copies of them.
 
 To make these files visible to Spark, set `HADOOP_CONF_DIR` in `$SPARK_HOME/spark-env.sh`

From 612952251c5ac626e256bc2ab9414faf1662dde9 Mon Sep 17 00:00:00 2001
From: Tom Graves <tgraves@apache.org>
Date: Wed, 26 Apr 2017 08:23:31 -0500
Subject: [PATCH 0707/1204] =?UTF-8?q?[SPARK-19812]=20YARN=20shuffle=20serv?=
 =?UTF-8?q?ice=20fails=20to=20relocate=20recovery=20DB=20acro=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

…ss NFS directories

## What changes were proposed in this pull request?

Change from using java Files.move to use Hadoop filesystem operations to move the directories.  The java Files.move does not work when moving directories across NFS mounts and in fact also says that if the directory has entries you should do a recursive move. We are already using Hadoop filesystem here so just use the local filesystem from there as it handles this properly.

Note that the DB here is actually a directory of files and not just a single file, hence the change in the name of the local var.

## How was this patch tested?

Ran YarnShuffleServiceSuite unit tests.  Unfortunately couldn't easily add one here since involves NFS.
Ran manual tests to verify that the DB directories were properly moved across NFS mounted directories. Have been running this internally for weeks.

Author: Tom Graves <tgraves@apache.org>

Closes #17748 from tgravescs/SPARK-19812.

(cherry picked from commit 7fecf5130163df9c204a2764d121a7011d007f4e)
Signed-off-by: Tom Graves <tgraves@yahoo-inc.com>
---
 .../network/yarn/YarnShuffleService.java      | 23 +++++++++++--------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
index c7620d0fe1288..4acc203153e5a 100644
--- a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
+++ b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
@@ -21,7 +21,6 @@
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import java.nio.ByteBuffer;
-import java.nio.file.Files;
 import java.util.List;
 import java.util.Map;
 
@@ -340,9 +339,9 @@ protected Path getRecoveryPath(String fileName) {
    * when it previously was not. If YARN NM recovery is enabled it uses that path, otherwise
    * it will uses a YARN local dir.
    */
-  protected File initRecoveryDb(String dbFileName) {
+  protected File initRecoveryDb(String dbName) {
     if (_recoveryPath != null) {
-        File recoveryFile = new File(_recoveryPath.toUri().getPath(), dbFileName);
+        File recoveryFile = new File(_recoveryPath.toUri().getPath(), dbName);
         if (recoveryFile.exists()) {
           return recoveryFile;
         }
@@ -350,7 +349,7 @@ protected File initRecoveryDb(String dbFileName) {
     // db doesn't exist in recovery path go check local dirs for it
     String[] localDirs = _conf.getTrimmedStrings("yarn.nodemanager.local-dirs");
     for (String dir : localDirs) {
-      File f = new File(new Path(dir).toUri().getPath(), dbFileName);
+      File f = new File(new Path(dir).toUri().getPath(), dbName);
       if (f.exists()) {
         if (_recoveryPath == null) {
           // If NM recovery is not enabled, we should specify the recovery path using NM local
@@ -363,17 +362,21 @@ protected File initRecoveryDb(String dbFileName) {
           // make sure to move all DBs to the recovery path from the old NM local dirs.
           // If another DB was initialized first just make sure all the DBs are in the same
           // location.
-          File newLoc = new File(_recoveryPath.toUri().getPath(), dbFileName);
-          if (!newLoc.equals(f)) {
+          Path newLoc = new Path(_recoveryPath, dbName);
+          Path copyFrom = new Path(f.toURI()); 
+          if (!newLoc.equals(copyFrom)) {
+            logger.info("Moving " + copyFrom + " to: " + newLoc); 
             try {
-              Files.move(f.toPath(), newLoc.toPath());
+              // The move here needs to handle moving non-empty directories across NFS mounts
+              FileSystem fs = FileSystem.getLocal(_conf);
+              fs.rename(copyFrom, newLoc);
             } catch (Exception e) {
               // Fail to move recovery file to new path, just continue on with new DB location
               logger.error("Failed to move recovery file {} to the path {}",
-                dbFileName, _recoveryPath.toString(), e);
+                dbName, _recoveryPath.toString(), e);
             }
           }
-          return newLoc;
+          return new File(newLoc.toUri().getPath());
         }
       }
     }
@@ -381,7 +384,7 @@ protected File initRecoveryDb(String dbFileName) {
       _recoveryPath = new Path(localDirs[0]);
     }
 
-    return new File(_recoveryPath.toUri().getPath(), dbFileName);
+    return new File(_recoveryPath.toUri().getPath(), dbName);
   }
 
   /**

From 34dec68d7eb647d997fdb27fe65d579c74b39e58 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Wed, 26 Apr 2017 21:34:18 +0800
Subject: [PATCH 0708/1204] [MINOR][ML] Fix some PySpark & SparkR flaky tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?
Some PySpark & SparkR tests run with tiny dataset and tiny ```maxIter```, which means they are not converged. I don’t think checking intermediate result during iteration make sense, and these intermediate result may vulnerable and not stable, so we should switch to check the converged result. We hit this issue at #17746 when we upgrade breeze to 0.13.1.

## How was this patch tested?
Existing tests.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #17757 from yanboliang/flaky-test.

(cherry picked from commit dbb06c689c157502cb081421baecce411832aad8)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 .../testthat/test_mllib_classification.R      | 17 +----
 python/pyspark/ml/classification.py           | 71 ++++++++++---------
 2 files changed, 38 insertions(+), 50 deletions(-)

diff --git a/R/pkg/inst/tests/testthat/test_mllib_classification.R b/R/pkg/inst/tests/testthat/test_mllib_classification.R
index af7cbdccf5d5d..cbc7087182868 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_classification.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_classification.R
@@ -284,22 +284,11 @@ test_that("spark.mlp", {
                c("1.0", "1.0", "1.0", "1.0", "0.0", "1.0", "2.0", "2.0", "1.0", "0.0"))
 
   # test initialWeights
-  model <- spark.mlp(df, label ~ features, layers = c(4, 3), maxIter = 2, initialWeights =
+  model <- spark.mlp(df, label ~ features, layers = c(4, 3), initialWeights =
     c(0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9))
   mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
   expect_equal(head(mlpPredictions$prediction, 10),
-               c("1.0", "1.0", "2.0", "1.0", "2.0", "1.0", "2.0", "2.0", "1.0", "0.0"))
-
-  model <- spark.mlp(df, label ~ features, layers = c(4, 3), maxIter = 2, initialWeights =
-    c(0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 5.0, 5.0, 5.0, 5.0, 9.0, 9.0, 9.0, 9.0, 9.0))
-  mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
-  expect_equal(head(mlpPredictions$prediction, 10),
-               c("1.0", "1.0", "2.0", "1.0", "2.0", "1.0", "2.0", "2.0", "1.0", "0.0"))
-
-  model <- spark.mlp(df, label ~ features, layers = c(4, 3), maxIter = 2)
-  mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
-  expect_equal(head(mlpPredictions$prediction, 10),
-               c("1.0", "1.0", "1.0", "1.0", "0.0", "1.0", "0.0", "0.0", "1.0", "0.0"))
+               c("1.0", "1.0", "1.0", "1.0", "0.0", "1.0", "2.0", "2.0", "1.0", "0.0"))
 
   # Test formula works well
   df <- suppressWarnings(createDataFrame(iris))
@@ -310,8 +299,6 @@ test_that("spark.mlp", {
   expect_equal(summary$numOfOutputs, 3)
   expect_equal(summary$layers, c(4, 3))
   expect_equal(length(summary$weights), 15)
-  expect_equal(head(summary$weights, 5), list(-0.5793153, -4.652961, 6.216155, -6.649478,
-               -10.51147), tolerance = 1e-3)
 })
 
 test_that("spark.naiveBayes", {
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 864968390ace9..a9756ea4af99a 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -185,34 +185,33 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
     >>> from pyspark.sql import Row
     >>> from pyspark.ml.linalg import Vectors
     >>> bdf = sc.parallelize([
-    ...     Row(label=1.0, weight=2.0, features=Vectors.dense(1.0)),
-    ...     Row(label=0.0, weight=2.0, features=Vectors.sparse(1, [], []))]).toDF()
-    >>> blor = LogisticRegression(maxIter=5, regParam=0.01, weightCol="weight")
+    ...     Row(label=1.0, weight=1.0, features=Vectors.dense(0.0, 5.0)),
+    ...     Row(label=0.0, weight=2.0, features=Vectors.dense(1.0, 2.0)),
+    ...     Row(label=1.0, weight=3.0, features=Vectors.dense(2.0, 1.0)),
+    ...     Row(label=0.0, weight=4.0, features=Vectors.dense(3.0, 3.0))]).toDF()
+    >>> blor = LogisticRegression(regParam=0.01, weightCol="weight")
     >>> blorModel = blor.fit(bdf)
     >>> blorModel.coefficients
-    DenseVector([5.4...])
+    DenseVector([-1.080..., -0.646...])
     >>> blorModel.intercept
-    -2.63...
-    >>> mdf = sc.parallelize([
-    ...     Row(label=1.0, weight=2.0, features=Vectors.dense(1.0)),
-    ...     Row(label=0.0, weight=2.0, features=Vectors.sparse(1, [], [])),
-    ...     Row(label=2.0, weight=2.0, features=Vectors.dense(3.0))]).toDF()
-    >>> mlor = LogisticRegression(maxIter=5, regParam=0.01, weightCol="weight",
-    ...     family="multinomial")
+    3.112...
+    >>> data_path = "data/mllib/sample_multiclass_classification_data.txt"
+    >>> mdf = spark.read.format("libsvm").load(data_path)
+    >>> mlor = LogisticRegression(regParam=0.1, elasticNetParam=1.0, family="multinomial")
     >>> mlorModel = mlor.fit(mdf)
     >>> mlorModel.coefficientMatrix
-    DenseMatrix(3, 1, [-2.3..., 0.2..., 2.1...], 1)
+    SparseMatrix(3, 4, [0, 1, 2, 3], [3, 2, 1], [1.87..., -2.75..., -0.50...], 1)
     >>> mlorModel.interceptVector
-    DenseVector([2.1..., 0.6..., -2.8...])
-    >>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0))]).toDF()
+    DenseVector([0.04..., -0.42..., 0.37...])
+    >>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0, 1.0))]).toDF()
     >>> result = blorModel.transform(test0).head()
     >>> result.prediction
-    0.0
+    1.0
     >>> result.probability
-    DenseVector([0.99..., 0.00...])
+    DenseVector([0.02..., 0.97...])
     >>> result.rawPrediction
-    DenseVector([8.12..., -8.12...])
-    >>> test1 = sc.parallelize([Row(features=Vectors.sparse(1, [0], [1.0]))]).toDF()
+    DenseVector([-3.54..., 3.54...])
+    >>> test1 = sc.parallelize([Row(features=Vectors.sparse(2, [0], [1.0]))]).toDF()
     >>> blorModel.transform(test1).head().prediction
     1.0
     >>> blor.setParams("vector")
@@ -222,8 +221,8 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
     >>> lr_path = temp_path + "/lr"
     >>> blor.save(lr_path)
     >>> lr2 = LogisticRegression.load(lr_path)
-    >>> lr2.getMaxIter()
-    5
+    >>> lr2.getRegParam()
+    0.01
     >>> model_path = temp_path + "/lr_model"
     >>> blorModel.save(model_path)
     >>> model2 = LogisticRegressionModel.load(model_path)
@@ -1480,31 +1479,33 @@ class OneVsRest(Estimator, OneVsRestParams, MLReadable, MLWritable):
 
     >>> from pyspark.sql import Row
     >>> from pyspark.ml.linalg import Vectors
-    >>> df = sc.parallelize([
-    ...     Row(label=0.0, features=Vectors.dense(1.0, 0.8)),
-    ...     Row(label=1.0, features=Vectors.sparse(2, [], [])),
-    ...     Row(label=2.0, features=Vectors.dense(0.5, 0.5))]).toDF()
-    >>> lr = LogisticRegression(maxIter=5, regParam=0.01)
+    >>> data_path = "data/mllib/sample_multiclass_classification_data.txt"
+    >>> df = spark.read.format("libsvm").load(data_path)
+    >>> lr = LogisticRegression(regParam=0.01)
     >>> ovr = OneVsRest(classifier=lr)
     >>> model = ovr.fit(df)
-    >>> [x.coefficients for x in model.models]
-    [DenseVector([4.9791, 2.426]), DenseVector([-4.1198, -5.9326]), DenseVector([-3.314, 5.2423])]
+    >>> model.models[0].coefficients
+    DenseVector([0.5..., -1.0..., 3.4..., 4.2...])
+    >>> model.models[1].coefficients
+    DenseVector([-2.1..., 3.1..., -2.6..., -2.3...])
+    >>> model.models[2].coefficients
+    DenseVector([0.3..., -3.4..., 1.0..., -1.1...])
     >>> [x.intercept for x in model.models]
-    [-5.06544..., 2.30341..., -1.29133...]
-    >>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0, 0.0))]).toDF()
+    [-2.7..., -2.5..., -1.3...]
+    >>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0, 0.0, 1.0, 1.0))]).toDF()
     >>> model.transform(test0).head().prediction
-    1.0
-    >>> test1 = sc.parallelize([Row(features=Vectors.sparse(2, [0], [1.0]))]).toDF()
-    >>> model.transform(test1).head().prediction
     0.0
-    >>> test2 = sc.parallelize([Row(features=Vectors.dense(0.5, 0.4))]).toDF()
-    >>> model.transform(test2).head().prediction
+    >>> test1 = sc.parallelize([Row(features=Vectors.sparse(4, [0], [1.0]))]).toDF()
+    >>> model.transform(test1).head().prediction
     2.0
+    >>> test2 = sc.parallelize([Row(features=Vectors.dense(0.5, 0.4, 0.3, 0.2))]).toDF()
+    >>> model.transform(test2).head().prediction
+    0.0
     >>> model_path = temp_path + "/ovr_model"
     >>> model.save(model_path)
     >>> model2 = OneVsRestModel.load(model_path)
     >>> model2.transform(test0).head().prediction
-    1.0
+    0.0
 
     .. versionadded:: 2.0.0
     """

From b65858bb3cb8e69b1f73f5f2c76a7cd335120695 Mon Sep 17 00:00:00 2001
From: jerryshao <sshao@hortonworks.com>
Date: Wed, 26 Apr 2017 09:01:50 -0500
Subject: [PATCH 0709/1204] [SPARK-20391][CORE] Rename memory related fields in
 ExecutorSummay

## What changes were proposed in this pull request?

This is a follow-up of #14617 to make the name of memory related fields more meaningful.

Here  for the backward compatibility, I didn't change `maxMemory` and `memoryUsed` fields.

## How was this patch tested?

Existing UT and local verification.

CC squito and tgravescs .

Author: jerryshao <sshao@hortonworks.com>

Closes #17700 from jerryshao/SPARK-20391.

(cherry picked from commit 66dd5b83ff95d5f91f37dcdf6aac89faa0b871c5)
Signed-off-by: Imran Rashid <irashid@cloudera.com>
---
 .../apache/spark/ui/static/executorspage.js   | 48 +++++++++--------
 .../org/apache/spark/status/api/v1/api.scala  | 11 ++--
 .../apache/spark/ui/exec/ExecutorsPage.scala  | 21 ++++----
 .../executor_memory_usage_expectation.json    | 51 +++++++++++--------
 ...xecutor_node_blacklisting_expectation.json | 51 +++++++++++--------
 5 files changed, 105 insertions(+), 77 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
index 930a0698928d1..cb9922d23c445 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
@@ -253,10 +253,14 @@ $(document).ready(function () {
             var deadTotalBlacklisted = 0;
 
             response.forEach(function (exec) {
-                exec.onHeapMemoryUsed = exec.hasOwnProperty('onHeapMemoryUsed') ? exec.onHeapMemoryUsed : 0;
-                exec.maxOnHeapMemory = exec.hasOwnProperty('maxOnHeapMemory') ? exec.maxOnHeapMemory : 0;
-                exec.offHeapMemoryUsed = exec.hasOwnProperty('offHeapMemoryUsed') ? exec.offHeapMemoryUsed : 0;
-                exec.maxOffHeapMemory = exec.hasOwnProperty('maxOffHeapMemory') ? exec.maxOffHeapMemory : 0;
+                var memoryMetrics = {
+                    usedOnHeapStorageMemory: 0,
+                    usedOffHeapStorageMemory: 0,
+                    totalOnHeapStorageMemory: 0,
+                    totalOffHeapStorageMemory: 0
+                };
+
+                exec.memoryMetrics = exec.hasOwnProperty('memoryMetrics') ? exec.memoryMetrics : memoryMetrics;
             });
 
             response.forEach(function (exec) {
@@ -264,10 +268,10 @@ $(document).ready(function () {
                 allRDDBlocks += exec.rddBlocks;
                 allMemoryUsed += exec.memoryUsed;
                 allMaxMemory += exec.maxMemory;
-                allOnHeapMemoryUsed += exec.onHeapMemoryUsed;
-                allOnHeapMaxMemory += exec.maxOnHeapMemory;
-                allOffHeapMemoryUsed += exec.offHeapMemoryUsed;
-                allOffHeapMaxMemory += exec.maxOffHeapMemory;
+                allOnHeapMemoryUsed += exec.memoryMetrics.usedOnHeapStorageMemory;
+                allOnHeapMaxMemory += exec.memoryMetrics.totalOnHeapStorageMemory;
+                allOffHeapMemoryUsed += exec.memoryMetrics.usedOffHeapStorageMemory;
+                allOffHeapMaxMemory += exec.memoryMetrics.totalOffHeapStorageMemory;
                 allDiskUsed += exec.diskUsed;
                 allTotalCores += exec.totalCores;
                 allMaxTasks += exec.maxTasks;
@@ -286,10 +290,10 @@ $(document).ready(function () {
                     activeRDDBlocks += exec.rddBlocks;
                     activeMemoryUsed += exec.memoryUsed;
                     activeMaxMemory += exec.maxMemory;
-                    activeOnHeapMemoryUsed += exec.onHeapMemoryUsed;
-                    activeOnHeapMaxMemory += exec.maxOnHeapMemory;
-                    activeOffHeapMemoryUsed += exec.offHeapMemoryUsed;
-                    activeOffHeapMaxMemory += exec.maxOffHeapMemory;
+                    activeOnHeapMemoryUsed += exec.memoryMetrics.usedOnHeapStorageMemory;
+                    activeOnHeapMaxMemory += exec.memoryMetrics.totalOnHeapStorageMemory;
+                    activeOffHeapMemoryUsed += exec.memoryMetrics.usedOffHeapStorageMemory;
+                    activeOffHeapMaxMemory += exec.memoryMetrics.totalOffHeapStorageMemory;
                     activeDiskUsed += exec.diskUsed;
                     activeTotalCores += exec.totalCores;
                     activeMaxTasks += exec.maxTasks;
@@ -308,10 +312,10 @@ $(document).ready(function () {
                     deadRDDBlocks += exec.rddBlocks;
                     deadMemoryUsed += exec.memoryUsed;
                     deadMaxMemory += exec.maxMemory;
-                    deadOnHeapMemoryUsed += exec.onHeapMemoryUsed;
-                    deadOnHeapMaxMemory += exec.maxOnHeapMemory;
-                    deadOffHeapMemoryUsed += exec.offHeapMemoryUsed;
-                    deadOffHeapMaxMemory += exec.maxOffHeapMemory;
+                    deadOnHeapMemoryUsed += exec.memoryMetrics.usedOnHeapStorageMemory;
+                    deadOnHeapMaxMemory += exec.memoryMetrics.totalOnHeapStorageMemory;
+                    deadOffHeapMemoryUsed += exec.memoryMetrics.usedOffHeapStorageMemory;
+                    deadOffHeapMaxMemory += exec.memoryMetrics.totalOffHeapStorageMemory;
                     deadDiskUsed += exec.diskUsed;
                     deadTotalCores += exec.totalCores;
                     deadMaxTasks += exec.maxTasks;
@@ -431,10 +435,10 @@ $(document).ready(function () {
                         {
                             data: function (row, type) {
                                 if (type !== 'display')
-                                    return row.onHeapMemoryUsed;
+                                    return row.memoryMetrics.usedOnHeapStorageMemory;
                                 else
-                                    return (formatBytes(row.onHeapMemoryUsed, type) + ' / ' +
-                                        formatBytes(row.maxOnHeapMemory, type));
+                                    return (formatBytes(row.memoryMetrics.usedOnHeapStorageMemory, type) + ' / ' +
+                                        formatBytes(row.memoryMetrics.totalOnHeapStorageMemory, type));
                             },
                             "fnCreatedCell": function (nTd, sData, oData, iRow, iCol) {
                                 $(nTd).addClass('on_heap_memory')
@@ -443,10 +447,10 @@ $(document).ready(function () {
                         {
                             data: function (row, type) {
                                 if (type !== 'display')
-                                    return row.offHeapMemoryUsed;
+                                    return row.memoryMetrics.usedOffHeapStorageMemory;
                                 else
-                                    return (formatBytes(row.offHeapMemoryUsed, type) + ' / ' +
-                                        formatBytes(row.maxOffHeapMemory, type));
+                                    return (formatBytes(row.memoryMetrics.usedOffHeapStorageMemory, type) + ' / ' +
+                                        formatBytes(row.memoryMetrics.totalOffHeapStorageMemory, type));
                             },
                             "fnCreatedCell": function (nTd, sData, oData, iRow, iCol) {
                                 $(nTd).addClass('off_heap_memory')
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
index d159b9450ef5c..56d8e51732ffd 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
@@ -76,10 +76,13 @@ class ExecutorSummary private[spark](
     val isBlacklisted: Boolean,
     val maxMemory: Long,
     val executorLogs: Map[String, String],
-    val onHeapMemoryUsed: Option[Long],
-    val offHeapMemoryUsed: Option[Long],
-    val maxOnHeapMemory: Option[Long],
-    val maxOffHeapMemory: Option[Long])
+    val memoryMetrics: Option[MemoryMetrics])
+
+class MemoryMetrics private[spark](
+    val usedOnHeapStorageMemory: Long,
+    val usedOffHeapStorageMemory: Long,
+    val totalOnHeapStorageMemory: Long,
+    val totalOffHeapStorageMemory: Long)
 
 class JobData private[spark](
     val jobId: Int,
diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala
index 0a3c63d14ca8a..b7cbed468517c 100644
--- a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala
@@ -21,7 +21,7 @@ import javax.servlet.http.HttpServletRequest
 
 import scala.xml.Node
 
-import org.apache.spark.status.api.v1.ExecutorSummary
+import org.apache.spark.status.api.v1.{ExecutorSummary, MemoryMetrics}
 import org.apache.spark.ui.{UIUtils, WebUIPage}
 
 // This isn't even used anymore -- but we need to keep it b/c of a MiMa false positive
@@ -114,10 +114,16 @@ private[spark] object ExecutorsPage {
     val rddBlocks = status.numBlocks
     val memUsed = status.memUsed
     val maxMem = status.maxMem
-    val onHeapMemUsed = status.onHeapMemUsed
-    val offHeapMemUsed = status.offHeapMemUsed
-    val maxOnHeapMem = status.maxOnHeapMem
-    val maxOffHeapMem = status.maxOffHeapMem
+    val memoryMetrics = for {
+      onHeapUsed <- status.onHeapMemUsed
+      offHeapUsed <- status.offHeapMemUsed
+      maxOnHeap <- status.maxOnHeapMem
+      maxOffHeap <- status.maxOffHeapMem
+    } yield {
+      new MemoryMetrics(onHeapUsed, offHeapUsed, maxOnHeap, maxOffHeap)
+    }
+
+
     val diskUsed = status.diskUsed
     val taskSummary = listener.executorToTaskSummary.getOrElse(execId, ExecutorTaskSummary(execId))
 
@@ -142,10 +148,7 @@ private[spark] object ExecutorsPage {
       taskSummary.isBlacklisted,
       maxMem,
       taskSummary.executorLogs,
-      onHeapMemUsed,
-      offHeapMemUsed,
-      maxOnHeapMem,
-      maxOffHeapMem
+      memoryMetrics
     )
   }
 }
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json
index e732af2663503..0f94e3b255dbc 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json
@@ -22,10 +22,12 @@
     "stdout" : "http://172.22.0.167:51469/logPage/?appId=app-20161116163331-0000&executorId=2&logType=stdout",
     "stderr" : "http://172.22.0.167:51469/logPage/?appId=app-20161116163331-0000&executorId=2&logType=stderr"
   },
-  "onHeapMemoryUsed" : 0,
-  "offHeapMemoryUsed" : 0,
-  "maxOnHeapMemory" : 384093388,
-  "maxOffHeapMemory" : 524288000
+  "memoryMetrics": {
+    "usedOnHeapStorageMemory": 0,
+    "usedOffHeapStorageMemory": 0,
+    "totalOnHeapStorageMemory": 384093388,
+    "totalOffHeapStorageMemory": 524288000
+  }
 }, {
   "id" : "driver",
   "hostPort" : "172.22.0.167:51475",
@@ -47,10 +49,12 @@
   "isBlacklisted" : true,
   "maxMemory" : 908381388,
   "executorLogs" : { },
-  "onHeapMemoryUsed" : 0,
-  "offHeapMemoryUsed" : 0,
-  "maxOnHeapMemory" : 384093388,
-  "maxOffHeapMemory" : 524288000
+  "memoryMetrics": {
+    "usedOnHeapStorageMemory": 0,
+    "usedOffHeapStorageMemory": 0,
+    "totalOnHeapStorageMemory": 384093388,
+    "totalOffHeapStorageMemory": 524288000
+  }
 }, {
   "id" : "1",
   "hostPort" : "172.22.0.167:51490",
@@ -75,11 +79,12 @@
     "stdout" : "http://172.22.0.167:51467/logPage/?appId=app-20161116163331-0000&executorId=1&logType=stdout",
     "stderr" : "http://172.22.0.167:51467/logPage/?appId=app-20161116163331-0000&executorId=1&logType=stderr"
   },
-
-  "onHeapMemoryUsed" : 0,
-  "offHeapMemoryUsed" : 0,
-  "maxOnHeapMemory" : 384093388,
-  "maxOffHeapMemory" : 524288000
+  "memoryMetrics": {
+    "usedOnHeapStorageMemory": 0,
+    "usedOffHeapStorageMemory": 0,
+    "totalOnHeapStorageMemory": 384093388,
+    "totalOffHeapStorageMemory": 524288000
+  }
 }, {
   "id" : "0",
   "hostPort" : "172.22.0.167:51491",
@@ -104,10 +109,12 @@
     "stdout" : "http://172.22.0.167:51465/logPage/?appId=app-20161116163331-0000&executorId=0&logType=stdout",
     "stderr" : "http://172.22.0.167:51465/logPage/?appId=app-20161116163331-0000&executorId=0&logType=stderr"
   },
-  "onHeapMemoryUsed" : 0,
-  "offHeapMemoryUsed" : 0,
-  "maxOnHeapMemory" : 384093388,
-  "maxOffHeapMemory" : 524288000
+  "memoryMetrics": {
+    "usedOnHeapStorageMemory": 0,
+    "usedOffHeapStorageMemory": 0,
+    "totalOnHeapStorageMemory": 384093388,
+    "totalOffHeapStorageMemory": 524288000
+  }
 }, {
   "id" : "3",
   "hostPort" : "172.22.0.167:51485",
@@ -132,8 +139,10 @@
     "stdout" : "http://172.22.0.167:51466/logPage/?appId=app-20161116163331-0000&executorId=3&logType=stdout",
     "stderr" : "http://172.22.0.167:51466/logPage/?appId=app-20161116163331-0000&executorId=3&logType=stderr"
   },
-  "onHeapMemoryUsed" : 0,
-  "offHeapMemoryUsed" : 0,
-  "maxOnHeapMemory" : 384093388,
-  "maxOffHeapMemory" : 524288000
+  "memoryMetrics": {
+    "usedOnHeapStorageMemory": 0,
+    "usedOffHeapStorageMemory": 0,
+    "totalOnHeapStorageMemory": 384093388,
+    "totalOffHeapStorageMemory": 524288000
+  }
 } ]
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_expectation.json
index e732af2663503..0f94e3b255dbc 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_expectation.json
@@ -22,10 +22,12 @@
     "stdout" : "http://172.22.0.167:51469/logPage/?appId=app-20161116163331-0000&executorId=2&logType=stdout",
     "stderr" : "http://172.22.0.167:51469/logPage/?appId=app-20161116163331-0000&executorId=2&logType=stderr"
   },
-  "onHeapMemoryUsed" : 0,
-  "offHeapMemoryUsed" : 0,
-  "maxOnHeapMemory" : 384093388,
-  "maxOffHeapMemory" : 524288000
+  "memoryMetrics": {
+    "usedOnHeapStorageMemory": 0,
+    "usedOffHeapStorageMemory": 0,
+    "totalOnHeapStorageMemory": 384093388,
+    "totalOffHeapStorageMemory": 524288000
+  }
 }, {
   "id" : "driver",
   "hostPort" : "172.22.0.167:51475",
@@ -47,10 +49,12 @@
   "isBlacklisted" : true,
   "maxMemory" : 908381388,
   "executorLogs" : { },
-  "onHeapMemoryUsed" : 0,
-  "offHeapMemoryUsed" : 0,
-  "maxOnHeapMemory" : 384093388,
-  "maxOffHeapMemory" : 524288000
+  "memoryMetrics": {
+    "usedOnHeapStorageMemory": 0,
+    "usedOffHeapStorageMemory": 0,
+    "totalOnHeapStorageMemory": 384093388,
+    "totalOffHeapStorageMemory": 524288000
+  }
 }, {
   "id" : "1",
   "hostPort" : "172.22.0.167:51490",
@@ -75,11 +79,12 @@
     "stdout" : "http://172.22.0.167:51467/logPage/?appId=app-20161116163331-0000&executorId=1&logType=stdout",
     "stderr" : "http://172.22.0.167:51467/logPage/?appId=app-20161116163331-0000&executorId=1&logType=stderr"
   },
-
-  "onHeapMemoryUsed" : 0,
-  "offHeapMemoryUsed" : 0,
-  "maxOnHeapMemory" : 384093388,
-  "maxOffHeapMemory" : 524288000
+  "memoryMetrics": {
+    "usedOnHeapStorageMemory": 0,
+    "usedOffHeapStorageMemory": 0,
+    "totalOnHeapStorageMemory": 384093388,
+    "totalOffHeapStorageMemory": 524288000
+  }
 }, {
   "id" : "0",
   "hostPort" : "172.22.0.167:51491",
@@ -104,10 +109,12 @@
     "stdout" : "http://172.22.0.167:51465/logPage/?appId=app-20161116163331-0000&executorId=0&logType=stdout",
     "stderr" : "http://172.22.0.167:51465/logPage/?appId=app-20161116163331-0000&executorId=0&logType=stderr"
   },
-  "onHeapMemoryUsed" : 0,
-  "offHeapMemoryUsed" : 0,
-  "maxOnHeapMemory" : 384093388,
-  "maxOffHeapMemory" : 524288000
+  "memoryMetrics": {
+    "usedOnHeapStorageMemory": 0,
+    "usedOffHeapStorageMemory": 0,
+    "totalOnHeapStorageMemory": 384093388,
+    "totalOffHeapStorageMemory": 524288000
+  }
 }, {
   "id" : "3",
   "hostPort" : "172.22.0.167:51485",
@@ -132,8 +139,10 @@
     "stdout" : "http://172.22.0.167:51466/logPage/?appId=app-20161116163331-0000&executorId=3&logType=stdout",
     "stderr" : "http://172.22.0.167:51466/logPage/?appId=app-20161116163331-0000&executorId=3&logType=stderr"
   },
-  "onHeapMemoryUsed" : 0,
-  "offHeapMemoryUsed" : 0,
-  "maxOnHeapMemory" : 384093388,
-  "maxOffHeapMemory" : 524288000
+  "memoryMetrics": {
+    "usedOnHeapStorageMemory": 0,
+    "usedOffHeapStorageMemory": 0,
+    "totalOnHeapStorageMemory": 384093388,
+    "totalOffHeapStorageMemory": 524288000
+  }
 } ]

From 6709bcf6e66e99e17ba2a3b1482df2dba1a15716 Mon Sep 17 00:00:00 2001
From: Michal Szafranski <michal@databricks.com>
Date: Wed, 26 Apr 2017 11:21:25 -0700
Subject: [PATCH 0710/1204] [SPARK-20473] Enabling missing types in
 ColumnVector.Array

## What changes were proposed in this pull request?
ColumnVector implementations originally did not support some Catalyst types (float, short, and boolean). Now that they do, those types should be also added to the ColumnVector.Array.

## How was this patch tested?
Tested using existing unit tests.

Author: Michal Szafranski <michal@databricks.com>

Closes #17772 from michal-databricks/spark-20473.

(cherry picked from commit 99c6cf9ef16bf8fae6edb23a62e46546a16bca80)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../apache/spark/sql/execution/vectorized/ColumnVector.java | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java
index 354c878aca000..b105e60a2d34a 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java
@@ -180,7 +180,7 @@ public Object[] array() {
 
     @Override
     public boolean getBoolean(int ordinal) {
-      throw new UnsupportedOperationException();
+      return data.getBoolean(offset + ordinal);
     }
 
     @Override
@@ -188,7 +188,7 @@ public boolean getBoolean(int ordinal) {
 
     @Override
     public short getShort(int ordinal) {
-      throw new UnsupportedOperationException();
+      return data.getShort(offset + ordinal);
     }
 
     @Override
@@ -199,7 +199,7 @@ public short getShort(int ordinal) {
 
     @Override
     public float getFloat(int ordinal) {
-      throw new UnsupportedOperationException();
+      return data.getFloat(offset + ordinal);
     }
 
     @Override

From 2e4926bc390bdfef1de5a4e4dc5bbbe11183bca2 Mon Sep 17 00:00:00 2001
From: Mark Hamstra <markhamstra@gmail.com>
Date: Wed, 26 Apr 2017 11:24:23 -0700
Subject: [PATCH 0711/1204] TableNamePreprocessor WIP

---
 .../spark/sql/hive/HiveExternalCatalog.scala  | 39 +++++++++++++++
 .../apache/spark/sql/hive/TableReader.scala   | 48 ++++++++++++++-----
 2 files changed, 76 insertions(+), 11 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 23777f2fe35ad..85071dcb2d7bb 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -72,6 +72,11 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     classOf[TException].getCanonicalName,
     classOf[InvocationTargetException].getCanonicalName)
 
+  @transient
+  protected[sql] var hadoopFileSelector: Option[HadoopFileSelector] = None
+
+  private[this] var tableNamePreprocessor: (String) => String = identity
+
   /**
    * Whether this is an exception thrown by the hive client that should be wrapped.
    *
@@ -1077,6 +1082,40 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     client.listFunctions(db, pattern)
   }
 
+  /**
+   * Allows the user to pre-process table names before the Hive metastore is looked up. This can
+   * be used to encode additional information into the table name, such as a version number
+   * (e.g. `mytable_v1`, `mytable_v2`, etc.)
+   * @param newTableNamePreprocessor a function to be applied to Hive table name before we look up
+   *                                 the table in the Hive metastore.
+   */
+  def setTableNamePreprocessor(newTableNamePreprocessor: (String) => String): Unit = {
+    tableNamePreprocessor = newTableNamePreprocessor
+  }
+
+  /**
+   * Allows to register a custom way to select files/directories to be included in a table scan
+   * based on the table name. This can be used together with [[setTableNamePreprocessor]] to
+   * customize table scan results based on the specified table name. E.g. `mytable_v1` could have a
+   * different set of files than `mytable_v2`, and both of these "virtual tables" would be backed
+   * by a real Hive table `mytable`. Note that the table name passed to the user-provided file
+   * selection method is the name specified in the query, not the table name in the Hive metastore
+   * that is generated by applying the user-specified "table name preprocessor" method.
+   * @param hadoopFileSelector the user Hadoop file selection strategy
+   * @see [[setTableNamePreprocessor]]
+   */
+  def setHadoopFileSelector(hadoopFileSelector: HadoopFileSelector): Unit = {
+    this.hadoopFileSelector = Some(hadoopFileSelector)
+  }
+
+  /**
+   * Removes the "Hadoop file selector" strategy that was installed using the
+   * [[setHadoopFileSelector]] method.
+   */
+  def unsetHadoopFileSelector(): Unit = {
+    hadoopFileSelector = None
+  }
+
 }
 
 object HiveExternalCatalog {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
index aaf30f41f29c2..5e1db00368ad8 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -22,7 +22,7 @@ import java.util.Properties
 import scala.collection.JavaConverters._
 
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{Path, PathFilter}
+import org.apache.hadoop.fs.{FileSystem, Path, PathFilter}
 import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants._
 import org.apache.hadoop.hive.ql.exec.Utilities
 import org.apache.hadoop.hive.ql.metadata.{Partition => HivePartition, Table => HiveTable}
@@ -66,6 +66,9 @@ class HadoopTableReader(
     hadoopConf: Configuration)
   extends TableReader with Logging {
 
+  private val emptyStringsAsNulls =
+    sparkSession.conf.get("spark.sql.emptyStringsAsNulls", "false").toBoolean
+
   // Hadoop honors "mapred.map.tasks" as hint, but will ignore when mapred.job.tracker is "local".
   // https://hadoop.apache.org/docs/r1.0.4/mapred-default.html
   //
@@ -112,12 +115,21 @@ class HadoopTableReader(
     val broadcastedHadoopConf = _broadcastedHadoopConf
 
     val tablePath = hiveTable.getPath
-    val inputPathStr = applyFilterIfNeeded(tablePath, filterOpt)
+    val fs = tablePath.getFileSystem(hadoopConf)
+    val externalCatalog = sparkSession.sharedState.externalCatalog
+    val inputPaths: Seq[String] = { externalCatalog match {
+        case hiveExternalCatalog: HiveExternalCatalog =>
+          hiveExternalCatalog.hadoopFileSelector.flatMap(
+            _.selectFiles(hiveTable.getTableName, fs, tablePath)
+          ).map(_.map(_.toString))
+        case _ => None
+      }
+    }.getOrElse(applyFilterIfNeeded(tablePath, filterOpt))
 
     // logDebug("Table input: %s".format(tablePath))
     val ifc = hiveTable.getInputFormatClass
       .asInstanceOf[java.lang.Class[InputFormat[Writable, Writable]]]
-    val hadoopRDD = createHadoopRdd(tableDesc, inputPathStr, ifc)
+    val hadoopRDD = createHadoopRdd(tableDesc, inputPaths, ifc)
 
     val attrsWithIndex = attributes.zipWithIndex
     val mutableRow = new SpecificInternalRow(attributes.map(_.dataType))
@@ -270,13 +282,12 @@ class HadoopTableReader(
    * If `filterOpt` is defined, then it will be used to filter files from `path`. These files are
    * returned in a single, comma-separated string.
    */
-  private def applyFilterIfNeeded(path: Path, filterOpt: Option[PathFilter]): String = {
+  private def applyFilterIfNeeded(path: Path, filterOpt: Option[PathFilter]): Seq[String] = {
     filterOpt match {
       case Some(filter) =>
         val fs = path.getFileSystem(hadoopConf)
-        val filteredFiles = fs.listStatus(path, filter).map(_.getPath.toString)
-        filteredFiles.mkString(",")
-      case None => path.toString
+        fs.listStatus(path, filter).map(_.getPath.toString)
+      case None => Seq(path.toString)
     }
   }
 
@@ -286,10 +297,10 @@ class HadoopTableReader(
    */
   private def createHadoopRdd(
     tableDesc: TableDesc,
-    path: String,
+    paths: Seq[String],
     inputFormatClass: Class[InputFormat[Writable, Writable]]): RDD[Writable] = {
 
-    val initializeJobConfFunc = HadoopTableReader.initializeLocalJobConfFunc(path, tableDesc) _
+    val initializeJobConfFunc = HadoopTableReader.initializeLocalJobConfFunc(paths, tableDesc) _
 
     val rdd = new HadoopRDD(
       sparkSession.sparkContext,
@@ -334,8 +345,8 @@ private[hive] object HadoopTableReader extends HiveInspectors with Logging {
    * Curried. After given an argument for 'path', the resulting JobConf => Unit closure is used to
    * instantiate a HadoopRDD.
    */
-  def initializeLocalJobConfFunc(path: String, tableDesc: TableDesc)(jobConf: JobConf) {
-    FileInputFormat.setInputPaths(jobConf, Seq[Path](new Path(path)): _*)
+  def initializeLocalJobConfFunc(paths: Seq[String], tableDesc: TableDesc)(jobConf: JobConf) {
+    FileInputFormat.setInputPaths(jobConf, paths.map { pathStr => new Path(pathStr) }: _*)
     if (tableDesc != null) {
       HiveTableUtil.configureJobPropertiesForStorageHandler(tableDesc, jobConf, true)
       Utilities.copyTableJobPropertiesToConf(tableDesc, jobConf)
@@ -441,3 +452,18 @@ private[hive] object HadoopTableReader extends HiveInspectors with Logging {
     }
   }
 }
+
+abstract class HadoopFileSelector {
+  /**
+   * Select files constituting a table from the given base path according to the client's custom
+   * algorithm. This is only applied to non-partitioned tables.
+   * @param tableName table name to select files for. This is the exact table name specified
+   *                  in the query, not a "preprocessed" file name returned by the user-defined
+   *                  function registered via [[HiveExternalCatalog.setTableNamePreprocessor]].
+   * @param fs the filesystem containing the table
+   * @param basePath base path of the table in the filesystem
+   * @return a set of files, or [[None]] if the custom file selection algorithm does not apply
+   *         to this table.
+   */
+  def selectFiles(tableName: String, fs: FileSystem, basePath: Path): Option[Seq[Path]]
+}

From e278876ba3d66d3fb249df59c3de8d78ca25c5f0 Mon Sep 17 00:00:00 2001
From: Michal Szafranski <michal@databricks.com>
Date: Wed, 26 Apr 2017 12:47:37 -0700
Subject: [PATCH 0712/1204] [SPARK-20474] Fixing OnHeapColumnVector
 reallocation

## What changes were proposed in this pull request?
OnHeapColumnVector reallocation copies to the new storage data up to 'elementsAppended'. This variable is only updated when using the ColumnVector.appendX API, while ColumnVector.putX is more commonly used.

## How was this patch tested?
Tested using existing unit tests.

Author: Michal Szafranski <michal@databricks.com>

Closes #17773 from michal-databricks/spark-20474.

(cherry picked from commit a277ae80a2836e6533b338d2b9c4e59ed8a1daae)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../vectorized/OnHeapColumnVector.java        | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
index 9b410bacff5df..94ed32294cfae 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
@@ -410,53 +410,53 @@ protected void reserveInternal(int newCapacity) {
       int[] newLengths = new int[newCapacity];
       int[] newOffsets = new int[newCapacity];
       if (this.arrayLengths != null) {
-        System.arraycopy(this.arrayLengths, 0, newLengths, 0, elementsAppended);
-        System.arraycopy(this.arrayOffsets, 0, newOffsets, 0, elementsAppended);
+        System.arraycopy(this.arrayLengths, 0, newLengths, 0, capacity);
+        System.arraycopy(this.arrayOffsets, 0, newOffsets, 0, capacity);
       }
       arrayLengths = newLengths;
       arrayOffsets = newOffsets;
     } else if (type instanceof BooleanType) {
       if (byteData == null || byteData.length < newCapacity) {
         byte[] newData = new byte[newCapacity];
-        if (byteData != null) System.arraycopy(byteData, 0, newData, 0, elementsAppended);
+        if (byteData != null) System.arraycopy(byteData, 0, newData, 0, capacity);
         byteData = newData;
       }
     } else if (type instanceof ByteType) {
       if (byteData == null || byteData.length < newCapacity) {
         byte[] newData = new byte[newCapacity];
-        if (byteData != null) System.arraycopy(byteData, 0, newData, 0, elementsAppended);
+        if (byteData != null) System.arraycopy(byteData, 0, newData, 0, capacity);
         byteData = newData;
       }
     } else if (type instanceof ShortType) {
       if (shortData == null || shortData.length < newCapacity) {
         short[] newData = new short[newCapacity];
-        if (shortData != null) System.arraycopy(shortData, 0, newData, 0, elementsAppended);
+        if (shortData != null) System.arraycopy(shortData, 0, newData, 0, capacity);
         shortData = newData;
       }
     } else if (type instanceof IntegerType || type instanceof DateType ||
       DecimalType.is32BitDecimalType(type)) {
       if (intData == null || intData.length < newCapacity) {
         int[] newData = new int[newCapacity];
-        if (intData != null) System.arraycopy(intData, 0, newData, 0, elementsAppended);
+        if (intData != null) System.arraycopy(intData, 0, newData, 0, capacity);
         intData = newData;
       }
     } else if (type instanceof LongType || type instanceof TimestampType ||
         DecimalType.is64BitDecimalType(type)) {
       if (longData == null || longData.length < newCapacity) {
         long[] newData = new long[newCapacity];
-        if (longData != null) System.arraycopy(longData, 0, newData, 0, elementsAppended);
+        if (longData != null) System.arraycopy(longData, 0, newData, 0, capacity);
         longData = newData;
       }
     } else if (type instanceof FloatType) {
       if (floatData == null || floatData.length < newCapacity) {
         float[] newData = new float[newCapacity];
-        if (floatData != null) System.arraycopy(floatData, 0, newData, 0, elementsAppended);
+        if (floatData != null) System.arraycopy(floatData, 0, newData, 0, capacity);
         floatData = newData;
       }
     } else if (type instanceof DoubleType) {
       if (doubleData == null || doubleData.length < newCapacity) {
         double[] newData = new double[newCapacity];
-        if (doubleData != null) System.arraycopy(doubleData, 0, newData, 0, elementsAppended);
+        if (doubleData != null) System.arraycopy(doubleData, 0, newData, 0, capacity);
         doubleData = newData;
       }
     } else if (resultStruct != null) {
@@ -466,7 +466,7 @@ protected void reserveInternal(int newCapacity) {
     }
 
     byte[] newNulls = new byte[newCapacity];
-    if (nulls != null) System.arraycopy(nulls, 0, newNulls, 0, elementsAppended);
+    if (nulls != null) System.arraycopy(nulls, 0, newNulls, 0, capacity);
     nulls = newNulls;
 
     capacity = newCapacity;

From b48bb3ab2c8134f6b533af29a241dce114076720 Mon Sep 17 00:00:00 2001
From: Weiqing Yang <yangweiqing001@gmail.com>
Date: Wed, 26 Apr 2017 13:54:40 -0700
Subject: [PATCH 0713/1204] [SPARK-12868][SQL] Allow adding jars from hdfs

## What changes were proposed in this pull request?
Spark 2.2 is going to be cut, it'll be great if SPARK-12868 can be resolved before that. There have been several PRs for this like [PR#16324](https://github.com/apache/spark/pull/16324) , but all of them are inactivity for a long time or have been closed.

This PR added a SparkUrlStreamHandlerFactory, which relies on 'protocol' to choose the appropriate
UrlStreamHandlerFactory like FsUrlStreamHandlerFactory to create URLStreamHandler.

## How was this patch tested?
1. Add a new unit test.
2. Check manually.
Before: throw an exception with " failed unknown protocol: hdfs"
<img width="914" alt="screen shot 2017-03-17 at 9 07 36 pm" src="https://cloud.githubusercontent.com/assets/8546874/24075277/5abe0a7c-0bd5-11e7-900e-ec3d3105da0b.png">

After:
<img width="1148" alt="screen shot 2017-03-18 at 11 42 18 am" src="https://cloud.githubusercontent.com/assets/8546874/24075283/69382a60-0bd5-11e7-8d30-d9405c3aaaba.png">

Author: Weiqing Yang <yangweiqing001@gmail.com>

Closes #17342 from weiqingy/SPARK-18910.

(cherry picked from commit 2ba1eba371213d1ac3d1fa1552e5906e043c2ee4)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../org/apache/spark/sql/internal/SharedState.scala | 10 +++++++++-
 .../scala/org/apache/spark/sql/SQLQuerySuite.scala  | 13 +++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index f834569e59b7f..a93b701146077 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -17,12 +17,14 @@
 
 package org.apache.spark.sql.internal
 
+import java.net.URL
 import java.util.Locale
 
 import scala.reflect.ClassTag
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.FsUrlStreamHandlerFactory
 
 import org.apache.spark.{SparkConf, SparkContext, SparkException}
 import org.apache.spark.internal.Logging
@@ -154,7 +156,13 @@ private[sql] class SharedState(val sparkContext: SparkContext) extends Logging {
   }
 }
 
-object SharedState {
+object SharedState extends Logging {
+  try {
+    URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory())
+  } catch {
+    case e: Error =>
+      logWarning("URL.setURLStreamHandlerFactory failed to set FsUrlStreamHandlerFactory")
+  }
 
   private val HIVE_EXTERNAL_CATALOG_CLASS_NAME = "org.apache.spark.sql.hive.HiveExternalCatalog"
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 0dd9296a3f0ff..3ecbf96b41961 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql
 
 import java.io.File
 import java.math.MathContext
+import java.net.{MalformedURLException, URL}
 import java.sql.Timestamp
 import java.util.concurrent.atomic.AtomicBoolean
 
@@ -2606,4 +2607,16 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
       case ae: AnalysisException => assert(ae.plan == null && ae.getMessage == ae.getSimpleMessage)
     }
   }
+
+  test("SPARK-12868: Allow adding jars from hdfs ") {
+    val jarFromHdfs = "hdfs://doesnotmatter/test.jar"
+    val jarFromInvalidFs = "fffs://doesnotmatter/test.jar"
+
+    // if 'hdfs' is not supported, MalformedURLException will be thrown
+    new URL(jarFromHdfs)
+
+    intercept[MalformedURLException] {
+      new URL(jarFromInvalidFs)
+    }
+  }
 }

From d6efda512e9d40e0a51c03675477bfb20c6bc7ae Mon Sep 17 00:00:00 2001
From: Mark Grover <mark@apache.org>
Date: Wed, 26 Apr 2017 17:06:21 -0700
Subject: [PATCH 0714/1204] [SPARK-20435][CORE] More thorough redaction of
 sensitive information

This change does a more thorough redaction of sensitive information from logs and UI
Add unit tests that ensure that no regressions happen that leak sensitive information to the logs.

The motivation for this change was appearance of password like so in `SparkListenerEnvironmentUpdate` in event logs under some JVM configurations:
`"sun.java.command":"org.apache.spark.deploy.SparkSubmit ... --conf spark.executorEnv.HADOOP_CREDSTORE_PASSWORD=secret_password ..."
`
Previously redaction logic was only checking if the key matched the secret regex pattern, it'd redact it's value. That worked for most cases. However, in the above case, the key (sun.java.command) doesn't tell much, so the value needs to be searched. This PR expands the check to check for values as well.

## How was this patch tested?

New unit tests added that ensure that no sensitive information is present in the event logs or the yarn logs. Old unit test in UtilsSuite was modified because the test was asserting that a non-sensitive property's value won't be redacted. However, the non-sensitive value had the literal "secret" in it which was causing it to redact. Simply updating the non-sensitive property's value to another arbitrary value (that didn't have "secret" in it) fixed it.

Author: Mark Grover <mark@apache.org>

Closes #17725 from markgrover/spark-20435.

(cherry picked from commit 66636ef0b046e5d1f340c3b8153d7213fa9d19c7)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/internal/config/package.scala       |  4 +--
 .../scheduler/EventLoggingListener.scala      | 16 ++++++---
 .../scala/org/apache/spark/util/Utils.scala   | 22 +++++++++---
 .../spark/deploy/SparkSubmitSuite.scala       | 34 +++++++++++++++++++
 .../org/apache/spark/util/UtilsSuite.scala    | 10 ++++--
 docs/configuration.md                         |  4 +--
 .../spark/deploy/yarn/YarnClusterSuite.scala  | 32 +++++++++++++----
 7 files changed, 100 insertions(+), 22 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 89aeea4939086..2f0a3064be111 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -244,8 +244,8 @@ package object config {
     ConfigBuilder("spark.redaction.regex")
       .doc("Regex to decide which Spark configuration properties and environment variables in " +
         "driver and executor environments contain sensitive information. When this regex matches " +
-        "a property, its value is redacted from the environment UI and various logs like YARN " +
-        "and event logs.")
+        "a property key or value, the value is redacted from the environment UI and various logs " +
+        "like YARN and event logs.")
       .regexConf
       .createWithDefault("(?i)secret|password".r)
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
index aecb3a980e7c1..a7dbf87915b27 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
@@ -252,11 +252,17 @@ private[spark] class EventLoggingListener(
 
   private[spark] def redactEvent(
       event: SparkListenerEnvironmentUpdate): SparkListenerEnvironmentUpdate = {
-    // "Spark Properties" entry will always exist because the map is always populated with it.
-    val redactedProps = Utils.redact(sparkConf, event.environmentDetails("Spark Properties"))
-    val redactedEnvironmentDetails = event.environmentDetails +
-      ("Spark Properties" -> redactedProps)
-    SparkListenerEnvironmentUpdate(redactedEnvironmentDetails)
+    // environmentDetails maps a string descriptor to a set of properties
+    // Similar to:
+    // "JVM Information" -> jvmInformation,
+    // "Spark Properties" -> sparkProperties,
+    // ...
+    // where jvmInformation, sparkProperties, etc. are sequence of tuples.
+    // We go through the various  of properties and redact sensitive information from them.
+    val redactedProps = event.environmentDetails.map{ case (name, props) =>
+      name -> Utils.redact(sparkConf, props)
+    }
+    SparkListenerEnvironmentUpdate(redactedProps)
   }
 
 }
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 943dde0723271..e042badcdd4a4 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2606,10 +2606,24 @@ private[spark] object Utils extends Logging {
   }
 
   private def redact(redactionPattern: Regex, kvs: Seq[(String, String)]): Seq[(String, String)] = {
-    kvs.map { kv =>
-      redactionPattern.findFirstIn(kv._1)
-        .map { _ => (kv._1, REDACTION_REPLACEMENT_TEXT) }
-        .getOrElse(kv)
+    // If the sensitive information regex matches with either the key or the value, redact the value
+    // While the original intent was to only redact the value if the key matched with the regex,
+    // we've found that especially in verbose mode, the value of the property may contain sensitive
+    // information like so:
+    // "sun.java.command":"org.apache.spark.deploy.SparkSubmit ... \
+    // --conf spark.executorEnv.HADOOP_CREDSTORE_PASSWORD=secret_password ...
+    //
+    // And, in such cases, simply searching for the sensitive information regex in the key name is
+    // not sufficient. The values themselves have to be searched as well and redacted if matched.
+    // This does mean we may be accounting more false positives - for example, if the value of an
+    // arbitrary property contained the term 'password', we may redact the value from the UI and
+    // logs. In order to work around it, user would have to make the spark.redaction.regex property
+    // more specific.
+    kvs.map { case (key, value) =>
+      redactionPattern.findFirstIn(key)
+        .orElse(redactionPattern.findFirstIn(value))
+        .map { _ => (key, REDACTION_REPLACEMENT_TEXT) }
+        .getOrElse((key, value))
     }
   }
 
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 7c2ec01a03d04..a43839a8815f9 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -21,8 +21,10 @@ import java.io._
 import java.nio.charset.StandardCharsets
 
 import scala.collection.mutable.ArrayBuffer
+import scala.io.Source
 
 import com.google.common.io.ByteStreams
+import org.apache.hadoop.fs.Path
 import org.scalatest.{BeforeAndAfterEach, Matchers}
 import org.scalatest.concurrent.Timeouts
 import org.scalatest.time.SpanSugar._
@@ -34,6 +36,7 @@ import org.apache.spark.deploy.SparkSubmitUtils.MavenCoordinate
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.Logging
 import org.apache.spark.TestUtils.JavaSourceFromString
+import org.apache.spark.scheduler.EventLoggingListener
 import org.apache.spark.util.{CommandLineUtils, ResetSystemProperties, Utils}
 
 
@@ -404,6 +407,37 @@ class SparkSubmitSuite
     runSparkSubmit(args)
   }
 
+  test("launch simple application with spark-submit with redaction") {
+    val testDir = Utils.createTempDir()
+    testDir.deleteOnExit()
+    val testDirPath = new Path(testDir.getAbsolutePath())
+    val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
+    val fileSystem = Utils.getHadoopFileSystem("/",
+      SparkHadoopUtil.get.newConfiguration(new SparkConf()))
+    try {
+      val args = Seq(
+        "--class", SimpleApplicationTest.getClass.getName.stripSuffix("$"),
+        "--name", "testApp",
+        "--master", "local",
+        "--conf", "spark.ui.enabled=false",
+        "--conf", "spark.master.rest.enabled=false",
+        "--conf", "spark.executorEnv.HADOOP_CREDSTORE_PASSWORD=secret_password",
+        "--conf", "spark.eventLog.enabled=true",
+        "--conf", "spark.eventLog.testing=true",
+        "--conf", s"spark.eventLog.dir=${testDirPath.toUri.toString}",
+        "--conf", "spark.hadoop.fs.defaultFS=unsupported://example.com",
+        unusedJar.toString)
+      runSparkSubmit(args)
+      val listStatus = fileSystem.listStatus(testDirPath)
+      val logData = EventLoggingListener.openEventLog(listStatus.last.getPath, fileSystem)
+      Source.fromInputStream(logData).getLines().foreach { line =>
+        assert(!line.contains("secret_password"))
+      }
+    } finally {
+      Utils.deleteRecursively(testDir)
+    }
+  }
+
   test("includes jars passed in through --jars") {
     val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
     val jar1 = TestUtils.createJarWithClasses(Seq("SparkSubmitClassA"))
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 8ed09749ffd54..3339d5b35d3b2 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -1010,15 +1010,19 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
       "spark.executorEnv.HADOOP_CREDSTORE_PASSWORD",
       "spark.my.password",
       "spark.my.sECreT")
-    secretKeys.foreach { key => sparkConf.set(key, "secret_password") }
+    secretKeys.foreach { key => sparkConf.set(key, "sensitive_value") }
     // Set a non-secret key
-    sparkConf.set("spark.regular.property", "not_a_secret")
+    sparkConf.set("spark.regular.property", "regular_value")
+    // Set a property with a regular key but secret in the value
+    sparkConf.set("spark.sensitive.property", "has_secret_in_value")
 
     // Redact sensitive information
     val redactedConf = Utils.redact(sparkConf, sparkConf.getAll).toMap
 
     // Assert that secret information got redacted while the regular property remained the same
     secretKeys.foreach { key => assert(redactedConf(key) === Utils.REDACTION_REPLACEMENT_TEXT) }
-    assert(redactedConf("spark.regular.property") === "not_a_secret")
+    assert(redactedConf("spark.regular.property") === "regular_value")
+    assert(redactedConf("spark.sensitive.property") === Utils.REDACTION_REPLACEMENT_TEXT)
+
   }
 }
diff --git a/docs/configuration.md b/docs/configuration.md
index 8b53e92ccd416..1d8d963016c71 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -372,8 +372,8 @@ Apart from these, the following properties are also available, and may be useful
   <td>(?i)secret|password</td>
   <td>
     Regex to decide which Spark configuration properties and environment variables in driver and
-    executor environments contain sensitive information. When this regex matches a property, its
-    value is redacted from the environment UI and various logs like YARN and event logs.
+    executor environments contain sensitive information. When this regex matches a property key or
+    value, the value is redacted from the environment UI and various logs like YARN and event logs.
   </td>
 </tr>
 <tr>
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index 99fb58a28934a..59adb7e22d185 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -24,6 +24,7 @@ import java.util.{HashMap => JHashMap}
 
 import scala.collection.mutable
 import scala.concurrent.duration._
+import scala.io.Source
 import scala.language.postfixOps
 
 import com.google.common.io.{ByteStreams, Files}
@@ -87,24 +88,30 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
     testBasicYarnApp(false)
   }
 
-  test("run Spark in yarn-client mode with different configurations") {
+  test("run Spark in yarn-client mode with different configurations, ensuring redaction") {
     testBasicYarnApp(true,
       Map(
         "spark.driver.memory" -> "512m",
         "spark.executor.cores" -> "1",
         "spark.executor.memory" -> "512m",
-        "spark.executor.instances" -> "2"
+        "spark.executor.instances" -> "2",
+        // Sending some senstive information, which we'll make sure gets redacted
+        "spark.executorEnv.HADOOP_CREDSTORE_PASSWORD" -> YarnClusterDriver.SECRET_PASSWORD,
+        "spark.yarn.appMasterEnv.HADOOP_CREDSTORE_PASSWORD" -> YarnClusterDriver.SECRET_PASSWORD
       ))
   }
 
-  test("run Spark in yarn-cluster mode with different configurations") {
+  test("run Spark in yarn-cluster mode with different configurations, ensuring redaction") {
     testBasicYarnApp(false,
       Map(
         "spark.driver.memory" -> "512m",
         "spark.driver.cores" -> "1",
         "spark.executor.cores" -> "1",
         "spark.executor.memory" -> "512m",
-        "spark.executor.instances" -> "2"
+        "spark.executor.instances" -> "2",
+        // Sending some senstive information, which we'll make sure gets redacted
+        "spark.executorEnv.HADOOP_CREDSTORE_PASSWORD" -> YarnClusterDriver.SECRET_PASSWORD,
+        "spark.yarn.appMasterEnv.HADOOP_CREDSTORE_PASSWORD" -> YarnClusterDriver.SECRET_PASSWORD
       ))
   }
 
@@ -349,6 +356,7 @@ private object YarnClusterDriverUseSparkHadoopUtilConf extends Logging with Matc
 private object YarnClusterDriver extends Logging with Matchers {
 
   val WAIT_TIMEOUT_MILLIS = 10000
+  val SECRET_PASSWORD = "secret_password"
 
   def main(args: Array[String]): Unit = {
     if (args.length != 1) {
@@ -395,6 +403,13 @@ private object YarnClusterDriver extends Logging with Matchers {
     assert(executorInfos.nonEmpty)
     executorInfos.foreach { info =>
       assert(info.logUrlMap.nonEmpty)
+      info.logUrlMap.values.foreach { url =>
+        val log = Source.fromURL(url).mkString
+        assert(
+          !log.contains(SECRET_PASSWORD),
+          s"Executor logs contain sensitive info (${SECRET_PASSWORD}): \n${log} "
+        )
+      }
     }
 
     // If we are running in yarn-cluster mode, verify that driver logs links and present and are
@@ -406,8 +421,13 @@ private object YarnClusterDriver extends Logging with Matchers {
       assert(driverLogs.contains("stderr"))
       assert(driverLogs.contains("stdout"))
       val urlStr = driverLogs("stderr")
-      // Ensure that this is a valid URL, else this will throw an exception
-      new URL(urlStr)
+      driverLogs.foreach { kv =>
+        val log = Source.fromURL(kv._2).mkString
+        assert(
+          !log.contains(SECRET_PASSWORD),
+          s"Driver logs contain sensitive info (${SECRET_PASSWORD}): \n${log} "
+        )
+      }
       val containerId = YarnSparkHadoopUtil.get.getContainerId
       val user = Utils.getCurrentUserName()
       assert(urlStr.endsWith(s"/node/containerlogs/$containerId/$user/stderr?start=-4096"))

From 8ccb4a57c82146c1a8f8966c7e64010cf5632cb6 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Wed, 26 Apr 2017 17:32:19 -0700
Subject: [PATCH 0715/1204] Preparing Spark release v2.2.0-rc1

---
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 2 +-
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 resource-managers/mesos/pom.xml           | 2 +-
 resource-managers/yarn/pom.xml            | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 37 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/assembly/pom.xml b/assembly/pom.xml
index 9d8607d9137c6..3a7003f5e94f5 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 8657af744c069..5e9ffd13c61aa 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 24c10fb1ddb9f..c3e10d1f289e1 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 5e5a80bd44467..10ea6571b905a 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 1356c4723b662..1a1f6526ee8e1 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 9345dc8f0cc4b..525ece5be4853 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index f03a4da5e7152..e7c33264fcdbd 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index 24ce36deeb169..6102f6f45a60c 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 83bb30598d153..84098523d3500 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,7 +14,7 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.2.0-SNAPSHOT
+SPARK_VERSION: 2.2.0
 SPARK_VERSION_SHORT: 2.2.0
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.7"
diff --git a/examples/pom.xml b/examples/pom.xml
index 91c2e81ebed2f..80ff0948e1f81 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 8948df2da89e2..3f93a33084348 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index f8ef8a991316d..7e2d58f1d073a 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 6d547c46d6a2d..26418f9769a25 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 46901d64eda97..58057616174eb 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index 295142cbfdff9..27b1bfcfa7dcc 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index 6cf448e65e8b4..6bcbb612fef77 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index 88499240cd569..2df99403840ee 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index 3fedd9eda1959..0e93b75f67ca1 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index 8368a1f12218d..e17b960c9a5b8 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index 90bb0e4987c82..73852fc4c7656 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index daa79e79163b9..2f761fbcda2d4 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 7da27817ebafd..22fe1dca3343e 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 8df33660ea9d1..df69c5e58727a 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index 025cd84f20f0e..d3cb2dce3fab5 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 663f7fb0b010d..996763ad6c256 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 82f840b0fc269..af032ed035f97 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 2a4251d9f2ad2..4ae8856119e4b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.2.0-SNAPSHOT</version>
+  <version>2.2.0</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 41bf8c269b795..e5ec547714d8c 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.2.0.dev0"
+__version__ = "2.2.0"
diff --git a/repl/pom.xml b/repl/pom.xml
index a256ae3b84183..2a5d2f4354ecf 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index 03846d9f5a3be..f94ff4e925e08 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index a1b641c8eeb84..72f891f7c10bd 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 765c92b8d3b9e..722e362943e26 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index b203f31a76f03..84c82f6b86ef8 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 9c879218ddc0d..ab5593da0d655 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 0f249d7d59351..f0ef6779a4742 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index de1be9c13e05f..bed07015e4540 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 938ba2f6ac201..19b44577ca124 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 75544c01939297cc39b4c3095fce435a22b833c0 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Wed, 26 Apr 2017 17:32:23 -0700
Subject: [PATCH 0716/1204] Preparing development version 2.2.0-SNAPSHOT

---
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 2 +-
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 resource-managers/mesos/pom.xml           | 2 +-
 resource-managers/yarn/pom.xml            | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 37 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/assembly/pom.xml b/assembly/pom.xml
index 3a7003f5e94f5..9d8607d9137c6 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 5e9ffd13c61aa..8657af744c069 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index c3e10d1f289e1..24c10fb1ddb9f 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 10ea6571b905a..5e5a80bd44467 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 1a1f6526ee8e1..1356c4723b662 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 525ece5be4853..9345dc8f0cc4b 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index e7c33264fcdbd..f03a4da5e7152 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index 6102f6f45a60c..24ce36deeb169 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 84098523d3500..83bb30598d153 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,7 +14,7 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.2.0
+SPARK_VERSION: 2.2.0-SNAPSHOT
 SPARK_VERSION_SHORT: 2.2.0
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.7"
diff --git a/examples/pom.xml b/examples/pom.xml
index 80ff0948e1f81..91c2e81ebed2f 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 3f93a33084348..8948df2da89e2 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 7e2d58f1d073a..f8ef8a991316d 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 26418f9769a25..6d547c46d6a2d 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 58057616174eb..46901d64eda97 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index 27b1bfcfa7dcc..295142cbfdff9 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index 6bcbb612fef77..6cf448e65e8b4 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index 2df99403840ee..88499240cd569 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index 0e93b75f67ca1..3fedd9eda1959 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index e17b960c9a5b8..8368a1f12218d 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index 73852fc4c7656..90bb0e4987c82 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index 2f761fbcda2d4..daa79e79163b9 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 22fe1dca3343e..7da27817ebafd 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index df69c5e58727a..8df33660ea9d1 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index d3cb2dce3fab5..025cd84f20f0e 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 996763ad6c256..663f7fb0b010d 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index af032ed035f97..82f840b0fc269 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 4ae8856119e4b..2a4251d9f2ad2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.2.0</version>
+  <version>2.2.0-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index e5ec547714d8c..41bf8c269b795 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.2.0"
+__version__ = "2.2.0.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 2a5d2f4354ecf..a256ae3b84183 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index f94ff4e925e08..03846d9f5a3be 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index 72f891f7c10bd..a1b641c8eeb84 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 722e362943e26..765c92b8d3b9e 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 84c82f6b86ef8..b203f31a76f03 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index ab5593da0d655..9c879218ddc0d 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index f0ef6779a4742..0f249d7d59351 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index bed07015e4540..de1be9c13e05f 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 19b44577ca124..938ba2f6ac201 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From c86c078c1e016ff9dd7df26da9febe2a53f905d1 Mon Sep 17 00:00:00 2001
From: Davis Shepherd <dshepherd@netflix.com>
Date: Thu, 27 Apr 2017 18:06:12 +0000
Subject: [PATCH 0717/1204] [SPARK-20483] Mesos Coarse mode may starve other
 Mesos frameworks

## What changes were proposed in this pull request?

Set maxCores to be a multiple of the smallest executor that can be launched. This ensures that we correctly detect the condition where no more executors will be launched when spark.cores.max is not a multiple of spark.executor.cores

## How was this patch tested?

This was manually tested with other sample frameworks measuring their incoming offers to determine if starvation would occur.

dbtsai mgummelt

Author: Davis Shepherd <dshepherd@netflix.com>

Closes #17786 from dgshep/fix_mesos_max_cores.

(cherry picked from commit 7633933e54ffb08ab9d959be5f76c26fae29d1d9)
Signed-off-by: DB Tsai <dbtsai@dbtsai.com>
---
 .../MesosCoarseGrainedSchedulerBackend.scala      | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
index 2a36ec4fa8112..8f5b97ccb1f85 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
@@ -60,8 +60,16 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
 
   private val maxCoresOption = conf.getOption("spark.cores.max").map(_.toInt)
 
+  private val executorCoresOption = conf.getOption("spark.executor.cores").map(_.toInt)
+
+  private val minCoresPerExecutor = executorCoresOption.getOrElse(1)
+
   // Maximum number of cores to acquire
-  private val maxCores = maxCoresOption.getOrElse(Int.MaxValue)
+  private val maxCores = {
+    val cores = maxCoresOption.getOrElse(Int.MaxValue)
+    // Set maxCores to a multiple of smallest executor we can launch
+    cores - (cores % minCoresPerExecutor)
+  }
 
   private val useFetcherCache = conf.getBoolean("spark.mesos.fetcherCache.enable", false)
 
@@ -489,8 +497,9 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
   }
 
   private def executorCores(offerCPUs: Int): Int = {
-    sc.conf.getInt("spark.executor.cores",
-      math.min(offerCPUs, maxCores - totalCoresAcquired))
+    executorCoresOption.getOrElse(
+      math.min(offerCPUs, maxCores - totalCoresAcquired)
+    )
   }
 
   override def statusUpdate(d: org.apache.mesos.SchedulerDriver, status: TaskStatus) {

From 87d27e54462d9131cd4db3bd596838a05ce00e4e Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Thu, 27 Apr 2017 11:31:01 -0700
Subject: [PATCH 0718/1204] [SPARK-20421][CORE] Mark internal listeners as
 deprecated.

These listeners weren't really meant for external consumption, but they're
public and marked with DeveloperApi. Adding the deprecated tag warns people
that they may soon go away (as they will as part of the work for SPARK-18085).

Note that not all types made public by https://github.com/apache/spark/pull/648
are being deprecated. Some remaining types are still exposed through the
SparkListener API.

Also note the text for StorageStatus is a tiny bit different, since I'm not
so sure I'll be able to remove it. But the effect for the users should be the
same (they should stop trying to use it).

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #17766 from vanzin/SPARK-20421.

(cherry picked from commit 561e9cc390b429e4252f59f00a7ca4f6f8c853f8)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../scala/org/apache/spark/storage/StorageStatusListener.scala   | 1 +
 core/src/main/scala/org/apache/spark/storage/StorageUtils.scala  | 1 +
 core/src/main/scala/org/apache/spark/ui/env/EnvironmentTab.scala | 1 +
 core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala  | 1 +
 .../scala/org/apache/spark/ui/jobs/JobProgressListener.scala     | 1 +
 core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala | 1 +
 6 files changed, 6 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala b/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala
index 1b30d4fa93bc0..ac60f795915a3 100644
--- a/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala
@@ -30,6 +30,7 @@ import org.apache.spark.scheduler._
  * This class is thread-safe (unlike JobProgressListener)
  */
 @DeveloperApi
+@deprecated("This class will be removed in a future release.", "2.2.0")
 class StorageStatusListener(conf: SparkConf) extends SparkListener {
   // This maintains only blocks that are cached (i.e. storage level is not StorageLevel.NONE)
   private[storage] val executorIdToStorageStatus = mutable.Map[String, StorageStatus]()
diff --git a/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala b/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
index 8f0d181fc8fe5..e9694fdbca2de 100644
--- a/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
+++ b/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
@@ -35,6 +35,7 @@ import org.apache.spark.internal.Logging
  * class cannot mutate the source of the information. Accesses are not thread-safe.
  */
 @DeveloperApi
+@deprecated("This class may be removed or made private in a future release.", "2.2.0")
 class StorageStatus(
     val blockManagerId: BlockManagerId,
     val maxMemory: Long,
diff --git a/core/src/main/scala/org/apache/spark/ui/env/EnvironmentTab.scala b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentTab.scala
index 70b3ffd95e605..8c18464e6477a 100644
--- a/core/src/main/scala/org/apache/spark/ui/env/EnvironmentTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentTab.scala
@@ -32,6 +32,7 @@ private[ui] class EnvironmentTab(parent: SparkUI) extends SparkUITab(parent, "en
  * A SparkListener that prepares information to be displayed on the EnvironmentTab
  */
 @DeveloperApi
+@deprecated("This class will be removed in a future release.", "2.2.0")
 class EnvironmentListener extends SparkListener {
   var jvmInformation = Seq[(String, String)]()
   var sparkProperties = Seq[(String, String)]()
diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala
index 03851293eb2f1..aabf6e0c63c02 100644
--- a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala
@@ -62,6 +62,7 @@ private[ui] case class ExecutorTaskSummary(
  * A SparkListener that prepares information to be displayed on the ExecutorsTab
  */
 @DeveloperApi
+@deprecated("This class will be removed in a future release.", "2.2.0")
 class ExecutorsListener(storageStatusListener: StorageStatusListener, conf: SparkConf)
     extends SparkListener {
   val executorToTaskSummary = LinkedHashMap[String, ExecutorTaskSummary]()
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
index f78db5ab80d15..8870187f2219c 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
@@ -41,6 +41,7 @@ import org.apache.spark.ui.jobs.UIData._
  * updating the internal data structures concurrently.
  */
 @DeveloperApi
+@deprecated("This class will be removed in a future release.", "2.2.0")
 class JobProgressListener(conf: SparkConf) extends SparkListener with Logging {
 
   // Define a handful of type aliases so that data structures' types can serve as documentation.
diff --git a/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala b/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala
index c212362557be6..148efb134e14f 100644
--- a/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala
@@ -39,6 +39,7 @@ private[ui] class StorageTab(parent: SparkUI) extends SparkUITab(parent, "storag
  * This class is thread-safe (unlike JobProgressListener)
  */
 @DeveloperApi
+@deprecated("This class will be removed in a future release.", "2.2.0")
 class StorageListener(storageStatusListener: StorageStatusListener) extends BlockStatusListener {
 
   private[ui] val _rddInfoMap = mutable.Map[Int, RDDInfo]() // exposed for testing

From 090b3370e198349f2d25e71b8f879bd3bda4ee2f Mon Sep 17 00:00:00 2001
From: Kris Mok <kris.mok@databricks.com>
Date: Thu, 27 Apr 2017 12:08:16 -0700
Subject: [PATCH 0719/1204] [SPARK-20482][SQL] Resolving Casts is too strict on
 having time zone set

## What changes were proposed in this pull request?

Relax the requirement that a `TimeZoneAwareExpression` has to have its `timeZoneId` set to be considered resolved.
With this change, a `Cast` (which is a `TimeZoneAwareExpression`) can be considered resolved if the `(fromType, toType)` combination doesn't require time zone information.

Also de-relaxed test cases in `CastSuite` so Casts in that test suite don't get a default`timeZoneId = Option("GMT")`.

## How was this patch tested?

Ran the de-relaxed`CastSuite` and it's passing. Also ran the SQL unit tests and they're passing too.

Author: Kris Mok <kris.mok@databricks.com>

Closes #17777 from rednaxelafx/fix-catalyst-cast-timezone.

(cherry picked from commit 26ac2ce05cbaf8f152347219403e31491e9c9bf1)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../spark/sql/catalyst/expressions/Cast.scala | 32 +++++++++++++++++++
 .../sql/catalyst/expressions/CastSuite.scala  |  4 +--
 2 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index bb1273f5c3d84..a53ef426f79b5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -89,6 +89,31 @@ object Cast {
     case _ => false
   }
 
+  /**
+   * Return true if we need to use the `timeZone` information casting `from` type to `to` type.
+   * The patterns matched reflect the current implementation in the Cast node.
+   * c.f. usage of `timeZone` in:
+   * * Cast.castToString
+   * * Cast.castToDate
+   * * Cast.castToTimestamp
+   */
+  def needsTimeZone(from: DataType, to: DataType): Boolean = (from, to) match {
+    case (StringType, TimestampType) => true
+    case (DateType, TimestampType) => true
+    case (TimestampType, StringType) => true
+    case (TimestampType, DateType) => true
+    case (ArrayType(fromType, _), ArrayType(toType, _)) => needsTimeZone(fromType, toType)
+    case (MapType(fromKey, fromValue, _), MapType(toKey, toValue, _)) =>
+      needsTimeZone(fromKey, toKey) || needsTimeZone(fromValue, toValue)
+    case (StructType(fromFields), StructType(toFields)) =>
+      fromFields.length == toFields.length &&
+        fromFields.zip(toFields).exists {
+          case (fromField, toField) =>
+            needsTimeZone(fromField.dataType, toField.dataType)
+        }
+    case _ => false
+  }
+
   /**
    * Return true iff we may truncate during casting `from` type to `to` type. e.g. long -> int,
    * timestamp -> date.
@@ -165,6 +190,13 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
     copy(timeZoneId = Option(timeZoneId))
 
+  // When this cast involves TimeZone, it's only resolved if the timeZoneId is set;
+  // Otherwise behave like Expression.resolved.
+  override lazy val resolved: Boolean =
+    childrenResolved && checkInputDataTypes().isSuccess && (!needsTimeZone || timeZoneId.isDefined)
+
+  private[this] def needsTimeZone: Boolean = Cast.needsTimeZone(child.dataType, dataType)
+
   // [[func]] assumes the input is no longer null because eval already does the null check.
   @inline private[this] def buildCast[T](a: Any, func: T => Any): Any = func(a.asInstanceOf[T])
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index 22f3f3514fa41..a7ffa884d2286 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -34,7 +34,7 @@ import org.apache.spark.unsafe.types.UTF8String
  */
 class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
 
-  private def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = Some("GMT")): Cast = {
+  private def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): Cast = {
     v match {
       case lit: Expression => Cast(lit, targetType, timeZoneId)
       case _ => Cast(Literal(v), targetType, timeZoneId)
@@ -47,7 +47,7 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   private def checkNullCast(from: DataType, to: DataType): Unit = {
-    checkEvaluation(cast(Literal.create(null, from), to), null)
+    checkEvaluation(cast(Literal.create(null, from), to, Option("GMT")), null)
   }
 
   test("null cast") {

From 92b61f02d81ee4b1c248e63475c249947ff6109c Mon Sep 17 00:00:00 2001
From: Tejas Patil <tejasp@fb.com>
Date: Thu, 27 Apr 2017 12:13:16 -0700
Subject: [PATCH 0720/1204] [SPARK-20487][SQL] `HiveTableScan` node is quite
 verbose in explained plan

## What changes were proposed in this pull request?

Changed `TreeNode.argString` to handle `CatalogTable` separately (otherwise it would call the default `toString` on the `CatalogTable`)

## How was this patch tested?

- Expanded scope of existing unit test to ensure that verbose information is not present
- Manual testing

Before

```
scala> hc.sql(" SELECT * FROM my_table WHERE name = 'foo' ").explain(true)
== Parsed Logical Plan ==
'Project [*]
+- 'Filter ('name = foo)
   +- 'UnresolvedRelation `my_table`

== Analyzed Logical Plan ==
user_id: bigint, name: string, ds: string
Project [user_id#13L, name#14, ds#15]
+- Filter (name#14 = foo)
   +- SubqueryAlias my_table
      +- CatalogRelation CatalogTable(
Database: default
Table: my_table
Owner: tejasp
Created: Fri Apr 14 17:05:50 PDT 2017
Last Access: Wed Dec 31 16:00:00 PST 1969
Type: MANAGED
Provider: hive
Properties: [serialization.format=1]
Statistics: 9223372036854775807 bytes
Location: file:/tmp/warehouse/my_table
Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Partition Provider: Catalog
Partition Columns: [`ds`]
Schema: root
-- user_id: long (nullable = true)
-- name: string (nullable = true)
-- ds: string (nullable = true)
), [user_id#13L, name#14], [ds#15]

== Optimized Logical Plan ==
Filter (isnotnull(name#14) && (name#14 = foo))
+- CatalogRelation CatalogTable(
Database: default
Table: my_table
Owner: tejasp
Created: Fri Apr 14 17:05:50 PDT 2017
Last Access: Wed Dec 31 16:00:00 PST 1969
Type: MANAGED
Provider: hive
Properties: [serialization.format=1]
Statistics: 9223372036854775807 bytes
Location: file:/tmp/warehouse/my_table
Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Partition Provider: Catalog
Partition Columns: [`ds`]
Schema: root
-- user_id: long (nullable = true)
-- name: string (nullable = true)
-- ds: string (nullable = true)
), [user_id#13L, name#14], [ds#15]

== Physical Plan ==
*Filter (isnotnull(name#14) && (name#14 = foo))
+- HiveTableScan [user_id#13L, name#14, ds#15], CatalogRelation CatalogTable(
Database: default
Table: my_table
Owner: tejasp
Created: Fri Apr 14 17:05:50 PDT 2017
Last Access: Wed Dec 31 16:00:00 PST 1969
Type: MANAGED
Provider: hive
Properties: [serialization.format=1]
Statistics: 9223372036854775807 bytes
Location: file:/tmp/warehouse/my_table
Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Partition Provider: Catalog
Partition Columns: [`ds`]
Schema: root
-- user_id: long (nullable = true)
-- name: string (nullable = true)
-- ds: string (nullable = true)
), [user_id#13L, name#14], [ds#15]
```

After

```
scala> hc.sql(" SELECT * FROM my_table WHERE name = 'foo' ").explain(true)
== Parsed Logical Plan ==
'Project [*]
+- 'Filter ('name = foo)
   +- 'UnresolvedRelation `my_table`

== Analyzed Logical Plan ==
user_id: bigint, name: string, ds: string
Project [user_id#13L, name#14, ds#15]
+- Filter (name#14 = foo)
   +- SubqueryAlias my_table
      +- CatalogRelation `default`.`my_table`, [user_id#13L, name#14], [ds#15]

== Optimized Logical Plan ==
Filter (isnotnull(name#14) && (name#14 = foo))
+- CatalogRelation `default`.`my_table`, [user_id#13L, name#14], [ds#15]

== Physical Plan ==
*Filter (isnotnull(name#14) && (name#14 = foo))
+- HiveTableScan [user_id#13L, name#14, ds#15], CatalogRelation `default`.`my_table`, [user_id#13L, name#14], [ds#15]
```

Author: Tejas Patil <tejasp@fb.com>

Closes #17780 from tejasapatil/SPARK-20487_verbose_plan.

(cherry picked from commit a4aa4665a6775b514b714c88b70576090d2b4a7e)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../spark/sql/catalyst/trees/TreeNode.scala    |  1 +
 .../sql/hive/execution/HiveExplainSuite.scala  | 18 +++++++++++++++++-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index cc4c0835954ba..b091315f24f1f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -444,6 +444,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
     case None => Nil
     case Some(null) => Nil
     case Some(any) => any :: Nil
+    case table: CatalogTable => table.identifier :: Nil
     case other => other :: Nil
   }.mkString(", ")
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
index 8a37bc3665d32..ebafe6de0c830 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
@@ -47,7 +47,23 @@ class HiveExplainSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
     checkKeywordsNotExist(sql(" explain   select * from src where key=123 "),
                    "== Parsed Logical Plan ==",
                    "== Analyzed Logical Plan ==",
-                   "== Optimized Logical Plan ==")
+                   "== Optimized Logical Plan ==",
+                   "Owner",
+                   "Database",
+                   "Created",
+                   "Last Access",
+                   "Type",
+                   "Provider",
+                   "Properties",
+                   "Statistics",
+                   "Location",
+                   "Serde Library",
+                   "InputFormat",
+                   "OutputFormat",
+                   "Partition Provider",
+                   "Schema"
+    )
+
     checkKeywordsExist(sql(" explain   extended select * from src where key=123 "),
                    "== Parsed Logical Plan ==",
                    "== Analyzed Logical Plan ==",

From c69d862b2f67dfe2616796a015d2384683341d5f Mon Sep 17 00:00:00 2001
From: jinxing <jinxing6042@126.com>
Date: Thu, 27 Apr 2017 14:06:07 -0500
Subject: [PATCH 0721/1204] [SPARK-20426] Lazy initialization of
 FileSegmentManagedBuffer for shuffle service.

## What changes were proposed in this pull request?
When application contains large amount of shuffle blocks. NodeManager requires lots of memory to keep metadata(`FileSegmentManagedBuffer`) in `StreamManager`. When the number of shuffle blocks is big enough. NodeManager can run OOM. This pr proposes to do lazy initialization of `FileSegmentManagedBuffer` in shuffle service.

## How was this patch tested?

Manually test.

Author: jinxing <jinxing6042@126.com>

Closes #17744 from jinxing64/SPARK-20426.

(cherry picked from commit 85c6ce61930490e2247fb4b0e22dfebbb8b6a1ee)
Signed-off-by: Tom Graves <tgraves@yahoo-inc.com>
---
 .../shuffle/ExternalShuffleBlockHandler.java  | 31 ++++++++++++-------
 .../ExternalShuffleBlockHandlerSuite.java     |  4 +--
 .../ExternalShuffleIntegrationSuite.java      |  5 ++-
 .../network/netty/NettyBlockRpcServer.scala   |  9 +++---
 4 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java
index 6daf9609d76dc..c0f1da50f5e65 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java
@@ -21,7 +21,7 @@
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.HashMap;
-import java.util.List;
+import java.util.Iterator;
 import java.util.Map;
 
 import com.codahale.metrics.Gauge;
@@ -30,7 +30,6 @@
 import com.codahale.metrics.MetricSet;
 import com.codahale.metrics.Timer;
 import com.google.common.annotations.VisibleForTesting;
-import com.google.common.collect.Lists;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -93,14 +92,25 @@ protected void handleMessage(
         OpenBlocks msg = (OpenBlocks) msgObj;
         checkAuth(client, msg.appId);
 
-        List<ManagedBuffer> blocks = Lists.newArrayList();
-        long totalBlockSize = 0;
-        for (String blockId : msg.blockIds) {
-          final ManagedBuffer block = blockManager.getBlockData(msg.appId, msg.execId, blockId);
-          totalBlockSize += block != null ? block.size() : 0;
-          blocks.add(block);
-        }
-        long streamId = streamManager.registerStream(client.getClientId(), blocks.iterator());
+        Iterator<ManagedBuffer> iter = new Iterator<ManagedBuffer>() {
+          private int index = 0;
+
+          @Override
+          public boolean hasNext() {
+            return index < msg.blockIds.length;
+          }
+
+          @Override
+          public ManagedBuffer next() {
+            final ManagedBuffer block = blockManager.getBlockData(msg.appId, msg.execId,
+              msg.blockIds[index]);
+            index++;
+            metrics.blockTransferRateBytes.mark(block != null ? block.size() : 0);
+            return block;
+          }
+        };
+
+        long streamId = streamManager.registerStream(client.getClientId(), iter);
         if (logger.isTraceEnabled()) {
           logger.trace("Registered streamId {} with {} buffers for client {} from host {}",
                        streamId,
@@ -109,7 +119,6 @@ protected void handleMessage(
                        getRemoteAddress(client.getChannel()));
         }
         callback.onSuccess(new StreamHandle(streamId, msg.blockIds.length).toByteBuffer());
-        metrics.blockTransferRateBytes.mark(totalBlockSize);
       } finally {
         responseDelayContext.stop();
       }
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java
index e47a72c9d16cc..4d48b18970386 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java
@@ -88,8 +88,6 @@ public void testOpenShuffleBlocks() {
     ByteBuffer openBlocks = new OpenBlocks("app0", "exec1", new String[] { "b0", "b1" })
       .toByteBuffer();
     handler.receive(client, openBlocks, callback);
-    verify(blockResolver, times(1)).getBlockData("app0", "exec1", "b0");
-    verify(blockResolver, times(1)).getBlockData("app0", "exec1", "b1");
 
     ArgumentCaptor<ByteBuffer> response = ArgumentCaptor.forClass(ByteBuffer.class);
     verify(callback, times(1)).onSuccess(response.capture());
@@ -107,6 +105,8 @@ public void testOpenShuffleBlocks() {
     assertEquals(block0Marker, buffers.next());
     assertEquals(block1Marker, buffers.next());
     assertFalse(buffers.hasNext());
+    verify(blockResolver, times(1)).getBlockData("app0", "exec1", "b0");
+    verify(blockResolver, times(1)).getBlockData("app0", "exec1", "b1");
 
     // Verify open block request latency metrics
     Timer openBlockRequestLatencyMillis = (Timer) ((ExternalShuffleBlockHandler) handler)
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
index b8ae04eefb972..7a33b6821792c 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
@@ -216,9 +216,8 @@ public void testFetchWrongExecutor() throws Exception {
     registerExecutor("exec-0", dataContext0.createExecutorInfo(SORT_MANAGER));
     FetchResult execFetch = fetchBlocks("exec-0",
       new String[] { "shuffle_0_0_0" /* right */, "shuffle_1_0_0" /* wrong */ });
-    // Both still fail, as we start by checking for all block.
-    assertTrue(execFetch.successBlocks.isEmpty());
-    assertEquals(Sets.newHashSet("shuffle_0_0_0", "shuffle_1_0_0"), execFetch.failedBlocks);
+    assertEquals(Sets.newHashSet("shuffle_0_0_0"), execFetch.successBlocks);
+    assertEquals(Sets.newHashSet("shuffle_1_0_0"), execFetch.failedBlocks);
   }
 
   @Test
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
index 2ed8a00df7023..305fd9a6de10d 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
@@ -56,11 +56,12 @@ class NettyBlockRpcServer(
 
     message match {
       case openBlocks: OpenBlocks =>
-        val blocks: Seq[ManagedBuffer] =
-          openBlocks.blockIds.map(BlockId.apply).map(blockManager.getBlockData)
+        val blocksNum = openBlocks.blockIds.length
+        val blocks = for (i <- (0 until blocksNum).view)
+          yield blockManager.getBlockData(BlockId.apply(openBlocks.blockIds(i)))
         val streamId = streamManager.registerStream(appId, blocks.iterator.asJava)
-        logTrace(s"Registered streamId $streamId with ${blocks.size} buffers")
-        responseContext.onSuccess(new StreamHandle(streamId, blocks.size).toByteBuffer)
+        logTrace(s"Registered streamId $streamId with $blocksNum buffers")
+        responseContext.onSuccess(new StreamHandle(streamId, blocksNum).toByteBuffer)
 
       case uploadBlock: UploadBlock =>
         // StorageLevel and ClassTag are serialized as bytes using our JavaSerializer.

From c29c6dead5d94a6624bf9295820e6418e3dcf199 Mon Sep 17 00:00:00 2001
From: Davis Shepherd <dshepherd@netflix.com>
Date: Thu, 27 Apr 2017 20:25:52 +0000
Subject: [PATCH 0722/1204] [SPARK-20483][MINOR] Test for Mesos Coarse mode may
 starve other Mesos frameworks

## What changes were proposed in this pull request?

Add test case for scenarios where executor.cores is set as a
(non)divisor of spark.cores.max
This tests the change in
#17786

## How was this patch tested?

Ran the existing test suite with the new tests

dbtsai

Author: Davis Shepherd <dshepherd@netflix.com>

Closes #17788 from dgshep/add_mesos_test.

(cherry picked from commit 039e32ca19d113e3be2c09171c7c921698be7ab8)
Signed-off-by: DB Tsai <dbtsai@dbtsai.com>
---
 ...osCoarseGrainedSchedulerBackendSuite.scala | 34 +++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
index c040f05d93b3a..0418bfbaa5ed8 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
@@ -199,6 +199,40 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
     verifyDeclinedOffer(driver, createOfferId("o2"), true)
   }
 
+  test("mesos declines offers with a filter when maxCores not a multiple of executor.cores") {
+    val maxCores = 4
+    val executorCores = 3
+    setBackend(Map(
+      "spark.cores.max" -> maxCores.toString,
+      "spark.executor.cores" -> executorCores.toString
+    ))
+    val executorMemory = backend.executorMemory(sc)
+    offerResources(List(
+      Resources(executorMemory, maxCores + 1),
+      Resources(executorMemory, maxCores + 1)
+    ))
+    verifyTaskLaunched(driver, "o1")
+    verifyDeclinedOffer(driver, createOfferId("o2"), true)
+  }
+
+  test("mesos declines offers with a filter when reached spark.cores.max with executor.cores") {
+    val maxCores = 4
+    val executorCores = 2
+    setBackend(Map(
+      "spark.cores.max" -> maxCores.toString,
+      "spark.executor.cores" -> executorCores.toString
+    ))
+    val executorMemory = backend.executorMemory(sc)
+    offerResources(List(
+      Resources(executorMemory, maxCores + 1),
+      Resources(executorMemory, maxCores + 1),
+      Resources(executorMemory, maxCores + 1)
+    ))
+    verifyTaskLaunched(driver, "o1")
+    verifyTaskLaunched(driver, "o2")
+    verifyDeclinedOffer(driver, createOfferId("o3"), true)
+  }
+
   test("mesos assigns tasks round-robin on offers") {
     val executorCores = 4
     val maxCores = executorCores * 2

From 4512e2ae62622aff362e18cac6c605ebf06d8060 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Thu, 27 Apr 2017 20:48:43 +0000
Subject: [PATCH 0723/1204] [SPARK-20047][ML] Constrained Logistic Regression

## What changes were proposed in this pull request?
MLlib ```LogisticRegression``` should support bound constrained optimization (only for L2 regularization). Users can add bound constraints to coefficients to make the solver produce solution in the specified range.

Under the hood, we call Breeze [```L-BFGS-B```](https://github.com/scalanlp/breeze/blob/master/math/src/main/scala/breeze/optimize/LBFGSB.scala) as the solver for bound constrained optimization. But in the current breeze implementation, there are some bugs in L-BFGS-B, and https://github.com/scalanlp/breeze/pull/633 fixed them. We need to upgrade dependent breeze later, and currently we use the workaround L-BFGS-B in this PR temporary for reviewing.

## How was this patch tested?
Unit tests.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #17715 from yanboliang/spark-20047.

(cherry picked from commit 606432a13ad22d862c7cb5028ad6fe73c9985423)
Signed-off-by: DB Tsai <dbtsai@dbtsai.com>
---
 .../classification/LogisticRegression.scala   | 223 ++++++++-
 .../LogisticRegressionSuite.scala             | 466 +++++++++++++++++-
 2 files changed, 682 insertions(+), 7 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 44b3478e0c3dd..d7dde329ed004 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -22,7 +22,7 @@ import java.util.Locale
 import scala.collection.mutable
 
 import breeze.linalg.{DenseVector => BDV}
-import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS, OWLQN => BreezeOWLQN}
+import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS, LBFGSB => BreezeLBFGSB, OWLQN => BreezeOWLQN}
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkException
@@ -178,11 +178,86 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
     }
   }
 
+  /**
+   * The lower bounds on coefficients if fitting under bound constrained optimization.
+   * The bound matrix must be compatible with the shape (1, number of features) for binomial
+   * regression, or (number of classes, number of features) for multinomial regression.
+   * Otherwise, it throws exception.
+   *
+   * @group param
+   */
+  @Since("2.2.0")
+  val lowerBoundsOnCoefficients: Param[Matrix] = new Param(this, "lowerBoundsOnCoefficients",
+    "The lower bounds on coefficients if fitting under bound constrained optimization.")
+
+  /** @group getParam */
+  @Since("2.2.0")
+  def getLowerBoundsOnCoefficients: Matrix = $(lowerBoundsOnCoefficients)
+
+  /**
+   * The upper bounds on coefficients if fitting under bound constrained optimization.
+   * The bound matrix must be compatible with the shape (1, number of features) for binomial
+   * regression, or (number of classes, number of features) for multinomial regression.
+   * Otherwise, it throws exception.
+   *
+   * @group param
+   */
+  @Since("2.2.0")
+  val upperBoundsOnCoefficients: Param[Matrix] = new Param(this, "upperBoundsOnCoefficients",
+    "The upper bounds on coefficients if fitting under bound constrained optimization.")
+
+  /** @group getParam */
+  @Since("2.2.0")
+  def getUpperBoundsOnCoefficients: Matrix = $(upperBoundsOnCoefficients)
+
+  /**
+   * The lower bounds on intercepts if fitting under bound constrained optimization.
+   * The bounds vector size must be equal with 1 for binomial regression, or the number
+   * of classes for multinomial regression. Otherwise, it throws exception.
+   *
+   * @group param
+   */
+  @Since("2.2.0")
+  val lowerBoundsOnIntercepts: Param[Vector] = new Param(this, "lowerBoundsOnIntercepts",
+    "The lower bounds on intercepts if fitting under bound constrained optimization.")
+
+  /** @group getParam */
+  @Since("2.2.0")
+  def getLowerBoundsOnIntercepts: Vector = $(lowerBoundsOnIntercepts)
+
+  /**
+   * The upper bounds on intercepts if fitting under bound constrained optimization.
+   * The bound vector size must be equal with 1 for binomial regression, or the number
+   * of classes for multinomial regression. Otherwise, it throws exception.
+   *
+   * @group param
+   */
+  @Since("2.2.0")
+  val upperBoundsOnIntercepts: Param[Vector] = new Param(this, "upperBoundsOnIntercepts",
+    "The upper bounds on intercepts if fitting under bound constrained optimization.")
+
+  /** @group getParam */
+  @Since("2.2.0")
+  def getUpperBoundsOnIntercepts: Vector = $(upperBoundsOnIntercepts)
+
+  protected def usingBoundConstrainedOptimization: Boolean = {
+    isSet(lowerBoundsOnCoefficients) || isSet(upperBoundsOnCoefficients) ||
+      isSet(lowerBoundsOnIntercepts) || isSet(upperBoundsOnIntercepts)
+  }
+
   override protected def validateAndTransformSchema(
       schema: StructType,
       fitting: Boolean,
       featuresDataType: DataType): StructType = {
     checkThresholdConsistency()
+    if (usingBoundConstrainedOptimization) {
+      require($(elasticNetParam) == 0.0, "Fitting under bound constrained optimization only " +
+        s"supports L2 regularization, but got elasticNetParam = $getElasticNetParam.")
+    }
+    if (!$(fitIntercept)) {
+      require(!isSet(lowerBoundsOnIntercepts) && !isSet(upperBoundsOnIntercepts),
+        "Pls don't set bounds on intercepts if fitting without intercept.")
+    }
     super.validateAndTransformSchema(schema, fitting, featuresDataType)
   }
 }
@@ -217,6 +292,9 @@ class LogisticRegression @Since("1.2.0") (
    * For alpha in (0,1), the penalty is a combination of L1 and L2.
    * Default is 0.0 which is an L2 penalty.
    *
+   * Note: Fitting under bound constrained optimization only supports L2 regularization,
+   * so throws exception if this param is non-zero value.
+   *
    * @group setParam
    */
   @Since("1.4.0")
@@ -312,6 +390,71 @@ class LogisticRegression @Since("1.2.0") (
   def setAggregationDepth(value: Int): this.type = set(aggregationDepth, value)
   setDefault(aggregationDepth -> 2)
 
+  /**
+   * Set the lower bounds on coefficients if fitting under bound constrained optimization.
+   *
+   * @group setParam
+   */
+  @Since("2.2.0")
+  def setLowerBoundsOnCoefficients(value: Matrix): this.type = set(lowerBoundsOnCoefficients, value)
+
+  /**
+   * Set the upper bounds on coefficients if fitting under bound constrained optimization.
+   *
+   * @group setParam
+   */
+  @Since("2.2.0")
+  def setUpperBoundsOnCoefficients(value: Matrix): this.type = set(upperBoundsOnCoefficients, value)
+
+  /**
+   * Set the lower bounds on intercepts if fitting under bound constrained optimization.
+   *
+   * @group setParam
+   */
+  @Since("2.2.0")
+  def setLowerBoundsOnIntercepts(value: Vector): this.type = set(lowerBoundsOnIntercepts, value)
+
+  /**
+   * Set the upper bounds on intercepts if fitting under bound constrained optimization.
+   *
+   * @group setParam
+   */
+  @Since("2.2.0")
+  def setUpperBoundsOnIntercepts(value: Vector): this.type = set(upperBoundsOnIntercepts, value)
+
+  private def assertBoundConstrainedOptimizationParamsValid(
+      numCoefficientSets: Int,
+      numFeatures: Int): Unit = {
+    if (isSet(lowerBoundsOnCoefficients)) {
+      require($(lowerBoundsOnCoefficients).numRows == numCoefficientSets &&
+        $(lowerBoundsOnCoefficients).numCols == numFeatures)
+    }
+    if (isSet(upperBoundsOnCoefficients)) {
+      require($(upperBoundsOnCoefficients).numRows == numCoefficientSets &&
+        $(upperBoundsOnCoefficients).numCols == numFeatures)
+    }
+    if (isSet(lowerBoundsOnIntercepts)) {
+      require($(lowerBoundsOnIntercepts).size == numCoefficientSets)
+    }
+    if (isSet(upperBoundsOnIntercepts)) {
+      require($(upperBoundsOnIntercepts).size == numCoefficientSets)
+    }
+    if (isSet(lowerBoundsOnCoefficients) && isSet(upperBoundsOnCoefficients)) {
+      require($(lowerBoundsOnCoefficients).toArray.zip($(upperBoundsOnCoefficients).toArray)
+        .forall(x => x._1 <= x._2), "LowerBoundsOnCoefficients should always " +
+        "less than or equal to upperBoundsOnCoefficients, but found: " +
+        s"lowerBoundsOnCoefficients = $getLowerBoundsOnCoefficients, " +
+        s"upperBoundsOnCoefficients = $getUpperBoundsOnCoefficients.")
+    }
+    if (isSet(lowerBoundsOnIntercepts) && isSet(upperBoundsOnIntercepts)) {
+      require($(lowerBoundsOnIntercepts).toArray.zip($(upperBoundsOnIntercepts).toArray)
+        .forall(x => x._1 <= x._2), "LowerBoundsOnIntercepts should always " +
+        "less than or equal to upperBoundsOnIntercepts, but found: " +
+        s"lowerBoundsOnIntercepts = $getLowerBoundsOnIntercepts, " +
+        s"upperBoundsOnIntercepts = $getUpperBoundsOnIntercepts.")
+    }
+  }
+
   private var optInitialModel: Option[LogisticRegressionModel] = None
 
   private[spark] def setInitialModel(model: LogisticRegressionModel): this.type = {
@@ -378,6 +521,11 @@ class LogisticRegression @Since("1.2.0") (
     }
     val numCoefficientSets = if (isMultinomial) numClasses else 1
 
+    // Check params interaction is valid if fitting under bound constrained optimization.
+    if (usingBoundConstrainedOptimization) {
+      assertBoundConstrainedOptimizationParamsValid(numCoefficientSets, numFeatures)
+    }
+
     if (isDefined(thresholds)) {
       require($(thresholds).length == numClasses, this.getClass.getSimpleName +
         ".train() called with non-matching numClasses and thresholds.length." +
@@ -397,7 +545,7 @@ class LogisticRegression @Since("1.2.0") (
 
       val isConstantLabel = histogram.count(_ != 0.0) == 1
 
-      if ($(fitIntercept) && isConstantLabel) {
+      if ($(fitIntercept) && isConstantLabel && !usingBoundConstrainedOptimization) {
         logWarning(s"All labels are the same value and fitIntercept=true, so the coefficients " +
           s"will be zeros. Training is not needed.")
         val constantLabelIndex = Vectors.dense(histogram).argmax
@@ -434,8 +582,53 @@ class LogisticRegression @Since("1.2.0") (
           $(standardization), bcFeaturesStd, regParamL2, multinomial = isMultinomial,
           $(aggregationDepth))
 
+        val numCoeffsPlusIntercepts = numFeaturesPlusIntercept * numCoefficientSets
+
+        val (lowerBounds, upperBounds): (Array[Double], Array[Double]) = {
+          if (usingBoundConstrainedOptimization) {
+            val lowerBounds = Array.fill[Double](numCoeffsPlusIntercepts)(Double.NegativeInfinity)
+            val upperBounds = Array.fill[Double](numCoeffsPlusIntercepts)(Double.PositiveInfinity)
+            val isSetLowerBoundsOnCoefficients = isSet(lowerBoundsOnCoefficients)
+            val isSetUpperBoundsOnCoefficients = isSet(upperBoundsOnCoefficients)
+            val isSetLowerBoundsOnIntercepts = isSet(lowerBoundsOnIntercepts)
+            val isSetUpperBoundsOnIntercepts = isSet(upperBoundsOnIntercepts)
+
+            var i = 0
+            while (i < numCoeffsPlusIntercepts) {
+              val coefficientSetIndex = i % numCoefficientSets
+              val featureIndex = i / numCoefficientSets
+              if (featureIndex < numFeatures) {
+                if (isSetLowerBoundsOnCoefficients) {
+                  lowerBounds(i) = $(lowerBoundsOnCoefficients)(
+                    coefficientSetIndex, featureIndex) * featuresStd(featureIndex)
+                }
+                if (isSetUpperBoundsOnCoefficients) {
+                  upperBounds(i) = $(upperBoundsOnCoefficients)(
+                    coefficientSetIndex, featureIndex) * featuresStd(featureIndex)
+                }
+              } else {
+                if (isSetLowerBoundsOnIntercepts) {
+                  lowerBounds(i) = $(lowerBoundsOnIntercepts)(coefficientSetIndex)
+                }
+                if (isSetUpperBoundsOnIntercepts) {
+                  upperBounds(i) = $(upperBoundsOnIntercepts)(coefficientSetIndex)
+                }
+              }
+              i += 1
+            }
+            (lowerBounds, upperBounds)
+          } else {
+            (null, null)
+          }
+        }
+
         val optimizer = if ($(elasticNetParam) == 0.0 || $(regParam) == 0.0) {
-          new BreezeLBFGS[BDV[Double]]($(maxIter), 10, $(tol))
+          if (lowerBounds != null && upperBounds != null) {
+            new BreezeLBFGSB(
+              BDV[Double](lowerBounds), BDV[Double](upperBounds), $(maxIter), 10, $(tol))
+          } else {
+            new BreezeLBFGS[BDV[Double]]($(maxIter), 10, $(tol))
+          }
         } else {
           val standardizationParam = $(standardization)
           def regParamL1Fun = (index: Int) => {
@@ -546,6 +739,26 @@ class LogisticRegression @Since("1.2.0") (
             math.log(histogram(1) / histogram(0)))
         }
 
+        if (usingBoundConstrainedOptimization) {
+          // Make sure all initial values locate in the corresponding bound.
+          var i = 0
+          while (i < numCoeffsPlusIntercepts) {
+            val coefficientSetIndex = i % numCoefficientSets
+            val featureIndex = i / numCoefficientSets
+            if (initialCoefWithInterceptMatrix(coefficientSetIndex, featureIndex) < lowerBounds(i))
+            {
+              initialCoefWithInterceptMatrix.update(
+                coefficientSetIndex, featureIndex, lowerBounds(i))
+            } else if (
+              initialCoefWithInterceptMatrix(coefficientSetIndex, featureIndex) > upperBounds(i))
+            {
+              initialCoefWithInterceptMatrix.update(
+                coefficientSetIndex, featureIndex, upperBounds(i))
+            }
+            i += 1
+          }
+        }
+
         val states = optimizer.iterations(new CachedDiffFunction(costFun),
           new BDV[Double](initialCoefWithInterceptMatrix.toArray))
 
@@ -599,7 +812,7 @@ class LogisticRegression @Since("1.2.0") (
           if (isIntercept) interceptVec.toArray(classIndex) = value
         }
 
-        if ($(regParam) == 0.0 && isMultinomial) {
+        if ($(regParam) == 0.0 && isMultinomial && !usingBoundConstrainedOptimization) {
           /*
             When no regularization is applied, the multinomial coefficients lack identifiability
             because we do not use a pivot class. We can add any constant value to the coefficients
@@ -620,7 +833,7 @@ class LogisticRegression @Since("1.2.0") (
         }
 
         // center the intercepts when using multinomial algorithm
-        if ($(fitIntercept) && isMultinomial) {
+        if ($(fitIntercept) && isMultinomial && !usingBoundConstrainedOptimization) {
           val interceptArray = interceptVec.toArray
           val interceptMean = interceptArray.sum / interceptArray.length
           (0 until interceptVec.size).foreach { i => interceptArray(i) -= interceptMean }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 83f575e83828f..bf6bfe30bfe20 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -26,7 +26,7 @@ import org.apache.spark.{SparkException, SparkFunSuite}
 import org.apache.spark.ml.attribute.NominalAttribute
 import org.apache.spark.ml.classification.LogisticRegressionSuite._
 import org.apache.spark.ml.feature.{Instance, LabeledPoint}
-import org.apache.spark.ml.linalg.{DenseMatrix, Matrices, SparseMatrix, Vector, Vectors}
+import org.apache.spark.ml.linalg.{DenseMatrix, Matrices, Matrix, SparseMatrix, Vector, Vectors}
 import org.apache.spark.ml.param.{ParamMap, ParamsSuite}
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
 import org.apache.spark.ml.util.TestingUtils._
@@ -150,6 +150,54 @@ class LogisticRegressionSuite
     assert(!model.hasSummary)
   }
 
+  test("logistic regression: illegal params") {
+    val lowerBoundsOnCoefficients = Matrices.dense(1, 4, Array(1.0, 0.0, 1.0, 0.0))
+    val upperBoundsOnCoefficients1 = Matrices.dense(1, 4, Array(0.0, 1.0, 1.0, 0.0))
+    val upperBoundsOnCoefficients2 = Matrices.dense(1, 3, Array(1.0, 0.0, 1.0))
+    val lowerBoundsOnIntercepts = Vectors.dense(1.0)
+
+    // Work well when only set bound in one side.
+    new LogisticRegression()
+      .setLowerBoundsOnCoefficients(lowerBoundsOnCoefficients)
+      .fit(binaryDataset)
+
+    withClue("bound constrained optimization only supports L2 regularization") {
+      intercept[IllegalArgumentException] {
+        new LogisticRegression()
+          .setLowerBoundsOnCoefficients(lowerBoundsOnCoefficients)
+          .setElasticNetParam(1.0)
+          .fit(binaryDataset)
+      }
+    }
+
+    withClue("lowerBoundsOnCoefficients should less than or equal to upperBoundsOnCoefficients") {
+      intercept[IllegalArgumentException] {
+        new LogisticRegression()
+          .setLowerBoundsOnCoefficients(lowerBoundsOnCoefficients)
+          .setUpperBoundsOnCoefficients(upperBoundsOnCoefficients1)
+          .fit(binaryDataset)
+      }
+    }
+
+    withClue("the coefficients bound matrix mismatched with shape (1, number of features)") {
+      intercept[IllegalArgumentException] {
+        new LogisticRegression()
+          .setLowerBoundsOnCoefficients(lowerBoundsOnCoefficients)
+          .setUpperBoundsOnCoefficients(upperBoundsOnCoefficients2)
+          .fit(binaryDataset)
+      }
+    }
+
+    withClue("bounds on intercepts should not be set if fitting without intercept") {
+      intercept[IllegalArgumentException] {
+        new LogisticRegression()
+          .setLowerBoundsOnIntercepts(lowerBoundsOnIntercepts)
+          .setFitIntercept(false)
+          .fit(binaryDataset)
+      }
+    }
+  }
+
   test("empty probabilityCol") {
     val lr = new LogisticRegression().setProbabilityCol("")
     val model = lr.fit(smallBinaryDataset)
@@ -610,6 +658,107 @@ class LogisticRegressionSuite
     assert(model2.coefficients ~= coefficientsR relTol 1E-3)
   }
 
+  test("binary logistic regression with intercept without regularization with bound") {
+    // Bound constrained optimization with bound on one side.
+    val upperBoundsOnCoefficients = Matrices.dense(1, 4, Array(1.0, 0.0, 1.0, 0.0))
+    val upperBoundsOnIntercepts = Vectors.dense(1.0)
+
+    val trainer1 = new LogisticRegression()
+      .setUpperBoundsOnCoefficients(upperBoundsOnCoefficients)
+      .setUpperBoundsOnIntercepts(upperBoundsOnIntercepts)
+      .setFitIntercept(true)
+      .setStandardization(true)
+      .setWeightCol("weight")
+    val trainer2 = new LogisticRegression()
+      .setUpperBoundsOnCoefficients(upperBoundsOnCoefficients)
+      .setUpperBoundsOnIntercepts(upperBoundsOnIntercepts)
+      .setFitIntercept(true)
+      .setStandardization(false)
+      .setWeightCol("weight")
+
+    val model1 = trainer1.fit(binaryDataset)
+    val model2 = trainer2.fit(binaryDataset)
+
+    // The solution is generated by https://github.com/yanboliang/bound-optimization.
+    val coefficientsExpected1 = Vectors.dense(0.06079437, 0.0, -0.26351059, -0.59102199)
+    val interceptExpected1 = 1.0
+
+    assert(model1.intercept ~== interceptExpected1 relTol 1E-3)
+    assert(model1.coefficients ~= coefficientsExpected1 relTol 1E-3)
+
+    // Without regularization, with or without standardization will converge to the same solution.
+    assert(model2.intercept ~== interceptExpected1 relTol 1E-3)
+    assert(model2.coefficients ~= coefficientsExpected1 relTol 1E-3)
+
+    // Bound constrained optimization with bound on both side.
+    val lowerBoundsOnCoefficients = Matrices.dense(1, 4, Array(0.0, -1.0, 0.0, -1.0))
+    val lowerBoundsOnIntercepts = Vectors.dense(0.0)
+
+    val trainer3 = new LogisticRegression()
+      .setUpperBoundsOnCoefficients(upperBoundsOnCoefficients)
+      .setUpperBoundsOnIntercepts(upperBoundsOnIntercepts)
+      .setLowerBoundsOnCoefficients(lowerBoundsOnCoefficients)
+      .setLowerBoundsOnIntercepts(lowerBoundsOnIntercepts)
+      .setFitIntercept(true)
+      .setStandardization(true)
+      .setWeightCol("weight")
+    val trainer4 = new LogisticRegression()
+      .setUpperBoundsOnCoefficients(upperBoundsOnCoefficients)
+      .setUpperBoundsOnIntercepts(upperBoundsOnIntercepts)
+      .setLowerBoundsOnCoefficients(lowerBoundsOnCoefficients)
+      .setLowerBoundsOnIntercepts(lowerBoundsOnIntercepts)
+      .setFitIntercept(true)
+      .setStandardization(false)
+      .setWeightCol("weight")
+
+    val model3 = trainer3.fit(binaryDataset)
+    val model4 = trainer4.fit(binaryDataset)
+
+    // The solution is generated by https://github.com/yanboliang/bound-optimization.
+    val coefficientsExpected3 = Vectors.dense(0.0, 0.0, 0.0, -0.71708632)
+    val interceptExpected3 = 0.58776113
+
+    assert(model3.intercept ~== interceptExpected3 relTol 1E-3)
+    assert(model3.coefficients ~= coefficientsExpected3 relTol 1E-3)
+
+    // Without regularization, with or without standardization will converge to the same solution.
+    assert(model4.intercept ~== interceptExpected3 relTol 1E-3)
+    assert(model4.coefficients ~= coefficientsExpected3 relTol 1E-3)
+
+    // Bound constrained optimization with infinite bound on both side.
+    val trainer5 = new LogisticRegression()
+      .setUpperBoundsOnCoefficients(Matrices.dense(1, 4, Array.fill(4)(Double.PositiveInfinity)))
+      .setUpperBoundsOnIntercepts(Vectors.dense(Double.PositiveInfinity))
+      .setLowerBoundsOnCoefficients(Matrices.dense(1, 4, Array.fill(4)(Double.NegativeInfinity)))
+      .setLowerBoundsOnIntercepts(Vectors.dense(Double.NegativeInfinity))
+      .setFitIntercept(true)
+      .setStandardization(true)
+      .setWeightCol("weight")
+    val trainer6 = new LogisticRegression()
+      .setUpperBoundsOnCoefficients(Matrices.dense(1, 4, Array.fill(4)(Double.PositiveInfinity)))
+      .setUpperBoundsOnIntercepts(Vectors.dense(Double.PositiveInfinity))
+      .setLowerBoundsOnCoefficients(Matrices.dense(1, 4, Array.fill(4)(Double.NegativeInfinity)))
+      .setLowerBoundsOnIntercepts(Vectors.dense(Double.NegativeInfinity))
+      .setFitIntercept(true)
+      .setStandardization(false)
+      .setWeightCol("weight")
+
+    val model5 = trainer5.fit(binaryDataset)
+    val model6 = trainer6.fit(binaryDataset)
+
+    // The solution is generated by https://github.com/yanboliang/bound-optimization.
+    // It should be same as unbound constrained optimization with LBFGS.
+    val coefficientsExpected5 = Vectors.dense(-0.5734389, 0.8911736, -0.3878645, -0.8060570)
+    val interceptExpected5 = 2.7355261
+
+    assert(model5.intercept ~== interceptExpected5 relTol 1E-3)
+    assert(model5.coefficients ~= coefficientsExpected5 relTol 1E-3)
+
+    // Without regularization, with or without standardization will converge to the same solution.
+    assert(model6.intercept ~== interceptExpected5 relTol 1E-3)
+    assert(model6.coefficients ~= coefficientsExpected5 relTol 1E-3)
+  }
+
   test("binary logistic regression without intercept without regularization") {
     val trainer1 = (new LogisticRegression).setFitIntercept(false).setStandardization(true)
       .setWeightCol("weight")
@@ -650,6 +799,34 @@ class LogisticRegressionSuite
     assert(model2.coefficients ~= coefficientsR relTol 1E-2)
   }
 
+  test("binary logistic regression without intercept without regularization with bound") {
+    val upperBoundsOnCoefficients = Matrices.dense(1, 4, Array(1.0, 0.0, 1.0, 0.0)).toSparse
+
+    val trainer1 = new LogisticRegression()
+      .setUpperBoundsOnCoefficients(upperBoundsOnCoefficients)
+      .setFitIntercept(false)
+      .setStandardization(true)
+      .setWeightCol("weight")
+    val trainer2 = new LogisticRegression()
+      .setUpperBoundsOnCoefficients(upperBoundsOnCoefficients)
+      .setFitIntercept(false)
+      .setStandardization(false)
+      .setWeightCol("weight")
+
+    val model1 = trainer1.fit(binaryDataset)
+    val model2 = trainer2.fit(binaryDataset)
+
+    // The solution is generated by https://github.com/yanboliang/bound-optimization.
+    val coefficientsExpected = Vectors.dense(0.20847553, 0.0, -0.24240289, -0.55568071)
+
+    assert(model1.intercept ~== 0.0 relTol 1E-3)
+    assert(model1.coefficients ~= coefficientsExpected relTol 1E-3)
+
+    // Without regularization, with or without standardization will converge to the same solution.
+    assert(model2.intercept ~== 0.0 relTol 1E-3)
+    assert(model2.coefficients ~= coefficientsExpected relTol 1E-3)
+  }
+
   test("binary logistic regression with intercept with L1 regularization") {
     val trainer1 = (new LogisticRegression).setFitIntercept(true)
       .setElasticNetParam(1.0).setRegParam(0.12).setStandardization(true).setWeightCol("weight")
@@ -815,6 +992,40 @@ class LogisticRegressionSuite
     assert(model2.coefficients ~= coefficientsR relTol 1E-3)
   }
 
+  test("binary logistic regression with intercept with L2 regularization with bound") {
+    val upperBoundsOnCoefficients = Matrices.dense(1, 4, Array(1.0, 0.0, 1.0, 0.0))
+    val upperBoundsOnIntercepts = Vectors.dense(1.0)
+
+    val trainer1 = new LogisticRegression()
+      .setUpperBoundsOnCoefficients(upperBoundsOnCoefficients)
+      .setUpperBoundsOnIntercepts(upperBoundsOnIntercepts)
+      .setRegParam(1.37)
+      .setFitIntercept(true)
+      .setStandardization(true)
+      .setWeightCol("weight")
+    val trainer2 = new LogisticRegression()
+      .setUpperBoundsOnCoefficients(upperBoundsOnCoefficients)
+      .setUpperBoundsOnIntercepts(upperBoundsOnIntercepts)
+      .setRegParam(1.37)
+      .setFitIntercept(true)
+      .setStandardization(false)
+      .setWeightCol("weight")
+
+    val model1 = trainer1.fit(binaryDataset)
+    val model2 = trainer2.fit(binaryDataset)
+
+    // The solution is generated by https://github.com/yanboliang/bound-optimization.
+    val coefficientsExpectedWithStd = Vectors.dense(-0.06985003, 0.0, -0.04794278, -0.10168595)
+    val interceptExpectedWithStd = 0.45750141
+    val coefficientsExpected = Vectors.dense(-0.0494524, 0.0, -0.11360797, -0.06313577)
+    val interceptExpected = 0.53722967
+
+    assert(model1.intercept ~== interceptExpectedWithStd relTol 1E-3)
+    assert(model1.coefficients ~= coefficientsExpectedWithStd relTol 1E-3)
+    assert(model2.intercept ~== interceptExpected relTol 1E-3)
+    assert(model2.coefficients ~= coefficientsExpected relTol 1E-3)
+  }
+
   test("binary logistic regression without intercept with L2 regularization") {
     val trainer1 = (new LogisticRegression).setFitIntercept(false)
       .setElasticNetParam(0.0).setRegParam(1.37).setStandardization(true).setWeightCol("weight")
@@ -864,6 +1075,35 @@ class LogisticRegressionSuite
     assert(model2.coefficients ~= coefficientsR relTol 1E-2)
   }
 
+  test("binary logistic regression without intercept with L2 regularization with bound") {
+    val upperBoundsOnCoefficients = Matrices.dense(1, 4, Array(1.0, 0.0, 1.0, 0.0))
+
+    val trainer1 = new LogisticRegression()
+      .setUpperBoundsOnCoefficients(upperBoundsOnCoefficients)
+      .setRegParam(1.37)
+      .setFitIntercept(false)
+      .setStandardization(true)
+      .setWeightCol("weight")
+    val trainer2 = new LogisticRegression()
+      .setUpperBoundsOnCoefficients(upperBoundsOnCoefficients)
+      .setRegParam(1.37)
+      .setFitIntercept(false)
+      .setStandardization(false)
+      .setWeightCol("weight")
+
+    val model1 = trainer1.fit(binaryDataset)
+    val model2 = trainer2.fit(binaryDataset)
+
+    // The solution is generated by https://github.com/yanboliang/bound-optimization.
+    val coefficientsExpectedWithStd = Vectors.dense(-0.00796538, 0.0, -0.0394228, -0.0873314)
+    val coefficientsExpected = Vectors.dense(0.01105972, 0.0, -0.08574949, -0.05079558)
+
+    assert(model1.intercept ~== 0.0 relTol 1E-3)
+    assert(model1.coefficients ~= coefficientsExpectedWithStd relTol 1E-3)
+    assert(model2.intercept ~== 0.0 relTol 1E-3)
+    assert(model2.coefficients ~= coefficientsExpected relTol 1E-3)
+  }
+
   test("binary logistic regression with intercept with ElasticNet regularization") {
     val trainer1 = (new LogisticRegression).setFitIntercept(true).setMaxIter(200)
       .setElasticNetParam(0.38).setRegParam(0.21).setStandardization(true).setWeightCol("weight")
@@ -1084,7 +1324,6 @@ class LogisticRegressionSuite
   }
 
   test("multinomial logistic regression with intercept without regularization") {
-
     val trainer1 = (new LogisticRegression).setFitIntercept(true)
       .setElasticNetParam(0.0).setRegParam(0.0).setStandardization(true).setWeightCol("weight")
     val trainer2 = (new LogisticRegression).setFitIntercept(true)
@@ -1152,6 +1391,110 @@ class LogisticRegressionSuite
     assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
   }
 
+  test("multinomial logistic regression with intercept without regularization with bound") {
+    // Bound constrained optimization with bound on one side.
+    val lowerBoundsOnCoefficients = Matrices.dense(3, 4, Array.fill(12)(1.0))
+    val lowerBoundsOnIntercepts = Vectors.dense(Array.fill(3)(1.0))
+
+    val trainer1 = new LogisticRegression()
+      .setLowerBoundsOnCoefficients(lowerBoundsOnCoefficients)
+      .setLowerBoundsOnIntercepts(lowerBoundsOnIntercepts)
+      .setFitIntercept(true)
+      .setStandardization(true)
+      .setWeightCol("weight")
+    val trainer2 = new LogisticRegression()
+      .setLowerBoundsOnCoefficients(lowerBoundsOnCoefficients)
+      .setLowerBoundsOnIntercepts(lowerBoundsOnIntercepts)
+      .setFitIntercept(true)
+      .setStandardization(false)
+      .setWeightCol("weight")
+
+    val model1 = trainer1.fit(multinomialDataset)
+    val model2 = trainer2.fit(multinomialDataset)
+
+    // The solution is generated by https://github.com/yanboliang/bound-optimization.
+    val coefficientsExpected1 = new DenseMatrix(3, 4, Array(
+      2.52076464, 2.73596057, 1.87984904, 2.73264492,
+      1.93302281, 3.71363303, 1.50681746, 1.93398782,
+      2.37839917, 1.93601818, 1.81924758, 2.45191255), isTransposed = true)
+    val interceptsExpected1 = Vectors.dense(1.00010477, 3.44237083, 4.86740286)
+
+    checkCoefficientsEquivalent(model1.coefficientMatrix, coefficientsExpected1)
+    assert(model1.interceptVector ~== interceptsExpected1 relTol 0.01)
+    checkCoefficientsEquivalent(model2.coefficientMatrix, coefficientsExpected1)
+    assert(model2.interceptVector ~== interceptsExpected1 relTol 0.01)
+
+    // Bound constrained optimization with bound on both side.
+    val upperBoundsOnCoefficients = Matrices.dense(3, 4, Array.fill(12)(2.0))
+    val upperBoundsOnIntercepts = Vectors.dense(Array.fill(3)(2.0))
+
+    val trainer3 = new LogisticRegression()
+      .setLowerBoundsOnCoefficients(lowerBoundsOnCoefficients)
+      .setLowerBoundsOnIntercepts(lowerBoundsOnIntercepts)
+      .setUpperBoundsOnCoefficients(upperBoundsOnCoefficients)
+      .setUpperBoundsOnIntercepts(upperBoundsOnIntercepts)
+      .setFitIntercept(true)
+      .setStandardization(true)
+      .setWeightCol("weight")
+    val trainer4 = new LogisticRegression()
+      .setLowerBoundsOnCoefficients(lowerBoundsOnCoefficients)
+      .setLowerBoundsOnIntercepts(lowerBoundsOnIntercepts)
+      .setUpperBoundsOnCoefficients(upperBoundsOnCoefficients)
+      .setUpperBoundsOnIntercepts(upperBoundsOnIntercepts)
+      .setFitIntercept(true)
+      .setStandardization(false)
+      .setWeightCol("weight")
+
+    val model3 = trainer3.fit(multinomialDataset)
+    val model4 = trainer4.fit(multinomialDataset)
+
+    // The solution is generated by https://github.com/yanboliang/bound-optimization.
+    val coefficientsExpected3 = new DenseMatrix(3, 4, Array(
+      1.61967097, 1.16027835, 1.45131448, 1.97390431,
+      1.30529317, 2.0, 1.12985473, 1.26652854,
+      1.61647195, 1.0, 1.40642959, 1.72985589), isTransposed = true)
+    val interceptsExpected3 = Vectors.dense(1.0, 2.0, 2.0)
+
+    checkCoefficientsEquivalent(model3.coefficientMatrix, coefficientsExpected3)
+    assert(model3.interceptVector ~== interceptsExpected3 relTol 0.01)
+    checkCoefficientsEquivalent(model4.coefficientMatrix, coefficientsExpected3)
+    assert(model4.interceptVector ~== interceptsExpected3 relTol 0.01)
+
+    // Bound constrained optimization with infinite bound on both side.
+    val trainer5 = new LogisticRegression()
+      .setLowerBoundsOnCoefficients(Matrices.dense(3, 4, Array.fill(12)(Double.NegativeInfinity)))
+      .setLowerBoundsOnIntercepts(Vectors.dense(Array.fill(3)(Double.NegativeInfinity)))
+      .setUpperBoundsOnCoefficients(Matrices.dense(3, 4, Array.fill(12)(Double.PositiveInfinity)))
+      .setUpperBoundsOnIntercepts(Vectors.dense(Array.fill(3)(Double.PositiveInfinity)))
+      .setFitIntercept(true)
+      .setStandardization(true)
+      .setWeightCol("weight")
+    val trainer6 = new LogisticRegression()
+      .setLowerBoundsOnCoefficients(Matrices.dense(3, 4, Array.fill(12)(Double.NegativeInfinity)))
+      .setLowerBoundsOnIntercepts(Vectors.dense(Array.fill(3)(Double.NegativeInfinity)))
+      .setUpperBoundsOnCoefficients(Matrices.dense(3, 4, Array.fill(12)(Double.PositiveInfinity)))
+      .setUpperBoundsOnIntercepts(Vectors.dense(Array.fill(3)(Double.PositiveInfinity)))
+      .setFitIntercept(true)
+      .setStandardization(false)
+      .setWeightCol("weight")
+
+    val model5 = trainer5.fit(multinomialDataset)
+    val model6 = trainer6.fit(multinomialDataset)
+
+    // The solution is generated by https://github.com/yanboliang/bound-optimization.
+    // It should be same as unbound constrained optimization with LBFGS.
+    val coefficientsExpected5 = new DenseMatrix(3, 4, Array(
+      0.24337896, -0.05916156, 0.14446790, 0.35976165,
+      -0.3443375, 0.9181331, -0.2283959, -0.4388066,
+      0.10095851, -0.85897154, 0.08392798, 0.07904499), isTransposed = true)
+    val interceptsExpected5 = Vectors.dense(-2.10320093, 0.3394473, 1.76375361)
+
+    checkCoefficientsEquivalent(model5.coefficientMatrix, coefficientsExpected5)
+    assert(model5.interceptVector ~== interceptsExpected5 relTol 0.01)
+    checkCoefficientsEquivalent(model6.coefficientMatrix, coefficientsExpected5)
+    assert(model6.interceptVector ~== interceptsExpected5 relTol 0.01)
+  }
+
   test("multinomial logistic regression without intercept without regularization") {
 
     val trainer1 = (new LogisticRegression).setFitIntercept(false)
@@ -1220,6 +1563,35 @@ class LogisticRegressionSuite
     assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
   }
 
+  test("multinomial logistic regression without intercept without regularization with bound") {
+    val lowerBoundsOnCoefficients = Matrices.dense(3, 4, Array.fill(12)(1.0))
+
+    val trainer1 = new LogisticRegression()
+      .setLowerBoundsOnCoefficients(lowerBoundsOnCoefficients)
+      .setFitIntercept(false)
+      .setStandardization(true)
+      .setWeightCol("weight")
+    val trainer2 = new LogisticRegression()
+      .setLowerBoundsOnCoefficients(lowerBoundsOnCoefficients)
+      .setFitIntercept(false)
+      .setStandardization(false)
+      .setWeightCol("weight")
+
+    val model1 = trainer1.fit(multinomialDataset)
+    val model2 = trainer2.fit(multinomialDataset)
+
+    // The solution is generated by https://github.com/yanboliang/bound-optimization.
+    val coefficientsExpected = new DenseMatrix(3, 4, Array(
+      1.62410051, 1.38219391, 1.34486618, 1.74641729,
+      1.23058989, 2.71787825, 1.0, 1.00007073,
+      1.79478632, 1.14360459, 1.33011603, 1.55093897), isTransposed = true)
+
+    checkCoefficientsEquivalent(model1.coefficientMatrix, coefficientsExpected)
+    assert(model1.interceptVector.toArray === Array.fill(3)(0.0))
+    checkCoefficientsEquivalent(model2.coefficientMatrix, coefficientsExpected)
+    assert(model2.interceptVector.toArray === Array.fill(3)(0.0))
+  }
+
   test("multinomial logistic regression with intercept with L1 regularization") {
 
     // use tighter constraints because OWL-QN solver takes longer to converge
@@ -1518,6 +1890,46 @@ class LogisticRegressionSuite
     assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
   }
 
+  test("multinomial logistic regression with intercept with L2 regularization with bound") {
+    val lowerBoundsOnCoefficients = Matrices.dense(3, 4, Array.fill(12)(1.0))
+    val lowerBoundsOnIntercepts = Vectors.dense(Array.fill(3)(1.0))
+
+    val trainer1 = new LogisticRegression()
+      .setLowerBoundsOnCoefficients(lowerBoundsOnCoefficients)
+      .setLowerBoundsOnIntercepts(lowerBoundsOnIntercepts)
+      .setRegParam(0.1)
+      .setFitIntercept(true)
+      .setStandardization(true)
+      .setWeightCol("weight")
+    val trainer2 = new LogisticRegression()
+      .setLowerBoundsOnCoefficients(lowerBoundsOnCoefficients)
+      .setLowerBoundsOnIntercepts(lowerBoundsOnIntercepts)
+      .setRegParam(0.1)
+      .setFitIntercept(true)
+      .setStandardization(false)
+      .setWeightCol("weight")
+
+    val model1 = trainer1.fit(multinomialDataset)
+    val model2 = trainer2.fit(multinomialDataset)
+
+    // The solution is generated by https://github.com/yanboliang/bound-optimization.
+    val coefficientsExpectedWithStd = new DenseMatrix(3, 4, Array(
+      1.0, 1.0, 1.0, 1.01647497,
+      1.0, 1.44105616, 1.0, 1.0,
+      1.0, 1.0, 1.0, 1.0), isTransposed = true)
+    val interceptsExpectedWithStd = Vectors.dense(2.52055893, 1.0, 2.560682)
+    val coefficientsExpected = new DenseMatrix(3, 4, Array(
+      1.0, 1.0, 1.03189386, 1.0,
+      1.0, 1.0, 1.0, 1.0,
+      1.0, 1.0, 1.0, 1.0), isTransposed = true)
+    val interceptsExpected = Vectors.dense(1.06418835, 1.0, 1.20494701)
+
+    assert(model1.coefficientMatrix ~== coefficientsExpectedWithStd relTol 0.01)
+    assert(model1.interceptVector ~== interceptsExpectedWithStd relTol 0.01)
+    assert(model2.coefficientMatrix ~== coefficientsExpected relTol 0.01)
+    assert(model2.interceptVector ~== interceptsExpected relTol 0.01)
+  }
+
   test("multinomial logistic regression without intercept with L2 regularization") {
     val trainer1 = (new LogisticRegression).setFitIntercept(false)
       .setElasticNetParam(0.0).setRegParam(0.1).setStandardization(true).setWeightCol("weight")
@@ -1615,6 +2027,41 @@ class LogisticRegressionSuite
     assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
   }
 
+  test("multinomial logistic regression without intercept with L2 regularization with bound") {
+    val lowerBoundsOnCoefficients = Matrices.dense(3, 4, Array.fill(12)(1.0))
+
+    val trainer1 = new LogisticRegression()
+      .setLowerBoundsOnCoefficients(lowerBoundsOnCoefficients)
+      .setRegParam(0.1)
+      .setFitIntercept(false)
+      .setStandardization(true)
+      .setWeightCol("weight")
+    val trainer2 = new LogisticRegression()
+      .setLowerBoundsOnCoefficients(lowerBoundsOnCoefficients)
+      .setRegParam(0.1)
+      .setFitIntercept(false)
+      .setStandardization(false)
+      .setWeightCol("weight")
+
+    val model1 = trainer1.fit(multinomialDataset)
+    val model2 = trainer2.fit(multinomialDataset)
+
+    // The solution is generated by https://github.com/yanboliang/bound-optimization.
+    val coefficientsExpectedWithStd = new DenseMatrix(3, 4, Array(
+      1.01324653, 1.0, 1.0, 1.0415767,
+      1.0, 1.0, 1.0, 1.0,
+      1.02244888, 1.0, 1.0, 1.0), isTransposed = true)
+    val coefficientsExpected = new DenseMatrix(3, 4, Array(
+      1.0, 1.0, 1.03932259, 1.0,
+      1.0, 1.0, 1.0, 1.0,
+      1.0, 1.0, 1.03274649, 1.0), isTransposed = true)
+
+    assert(model1.coefficientMatrix ~== coefficientsExpectedWithStd absTol 0.01)
+    assert(model1.interceptVector.toArray === Array.fill(3)(0.0))
+    assert(model2.coefficientMatrix ~== coefficientsExpected absTol 0.01)
+    assert(model2.interceptVector.toArray === Array.fill(3)(0.0))
+  }
+
   test("multinomial logistic regression with intercept with elasticnet regularization") {
     val trainer1 = (new LogisticRegression).setFitIntercept(true).setWeightCol("weight")
       .setElasticNetParam(0.5).setRegParam(0.1).setStandardization(true)
@@ -2273,4 +2720,19 @@ object LogisticRegressionSuite {
     val testData = (0 until nPoints).map(i => LabeledPoint(y(i), x(i)))
     testData
   }
+
+  /**
+   * When no regularization is applied, the multinomial coefficients lack identifiability
+   * because we do not use a pivot class. We can add any constant value to the coefficients
+   * and get the same likelihood. If fitting under bound constrained optimization, we don't
+   * choose the mean centered coefficients like what we do for unbound problems, since they
+   * may out of the bounds. We use this function to check whether two coefficients are equivalent.
+   */
+  def checkCoefficientsEquivalent(coefficients1: Matrix, coefficients2: Matrix): Unit = {
+    coefficients1.colIter.zip(coefficients2.colIter).foreach { case (col1: Vector, col2: Vector) =>
+      (col1.asBreeze - col2.asBreeze).toArray.toSeq.sliding(2).foreach {
+        case Seq(v1, v2) => assert(v1 ~= v2 absTol 1E-3)
+      }
+    }
+  }
 }

From 753e129f3a980760d4dc674e88ed334cf54c54ed Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Thu, 27 Apr 2017 13:55:03 -0700
Subject: [PATCH 0724/1204] [SPARK-20461][CORE][SS] Use UninterruptibleThread
 for Executor and fix the potential hang in CachedKafkaConsumer

## What changes were proposed in this pull request?

This PR changes Executor's threads to `UninterruptibleThread` so that we can use `runUninterruptibly` in `CachedKafkaConsumer`. However, this is just best effort to avoid hanging forever. If the user uses`CachedKafkaConsumer` in another thread (e.g., create a new thread or Future), the potential hang may still happen.

## How was this patch tested?

The new added test.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #17761 from zsxwing/int.

(cherry picked from commit 01c999e7f94d5e6c2fce67304dc62351dfbdf963)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../org/apache/spark/executor/Executor.scala  | 19 +++++++++++++++++--
 .../spark/util/UninterruptibleThread.scala    |  8 +++++++-
 .../apache/spark/executor/ExecutorSuite.scala | 13 +++++++++++++
 .../sql/kafka010/CachedKafkaConsumer.scala    | 15 +++++++++++++--
 4 files changed, 50 insertions(+), 5 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index 18f04391d64c3..51b6c373c4daf 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -23,13 +23,15 @@ import java.lang.management.ManagementFactory
 import java.net.{URI, URL}
 import java.nio.ByteBuffer
 import java.util.Properties
-import java.util.concurrent.{ConcurrentHashMap, TimeUnit}
+import java.util.concurrent._
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.{ArrayBuffer, HashMap, Map}
 import scala.util.control.NonFatal
 
+import com.google.common.util.concurrent.ThreadFactoryBuilder
+
 import org.apache.spark._
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
@@ -84,7 +86,20 @@ private[spark] class Executor(
   }
 
   // Start worker thread pool
-  private val threadPool = ThreadUtils.newDaemonCachedThreadPool("Executor task launch worker")
+  private val threadPool = {
+    val threadFactory = new ThreadFactoryBuilder()
+      .setDaemon(true)
+      .setNameFormat("Executor task launch worker-%d")
+      .setThreadFactory(new ThreadFactory {
+        override def newThread(r: Runnable): Thread =
+          // Use UninterruptibleThread to run tasks so that we can allow running codes without being
+          // interrupted by `Thread.interrupt()`. Some issues, such as KAFKA-1894, HADOOP-10622,
+          // will hang forever if some methods are interrupted.
+          new UninterruptibleThread(r, "unused") // thread name will be set by ThreadFactoryBuilder
+      })
+      .build()
+    Executors.newCachedThreadPool(threadFactory).asInstanceOf[ThreadPoolExecutor]
+  }
   private val executorSource = new ExecutorSource(threadPool, executorId)
   // Pool used for threads that supervise task killing / cancellation
   private val taskReaperPool = ThreadUtils.newDaemonCachedThreadPool("Task reaper")
diff --git a/core/src/main/scala/org/apache/spark/util/UninterruptibleThread.scala b/core/src/main/scala/org/apache/spark/util/UninterruptibleThread.scala
index f0b68f0cb7e29..27922b31949b6 100644
--- a/core/src/main/scala/org/apache/spark/util/UninterruptibleThread.scala
+++ b/core/src/main/scala/org/apache/spark/util/UninterruptibleThread.scala
@@ -27,7 +27,13 @@ import javax.annotation.concurrent.GuardedBy
  *
  * Note: "runUninterruptibly" should be called only in `this` thread.
  */
-private[spark] class UninterruptibleThread(name: String) extends Thread(name) {
+private[spark] class UninterruptibleThread(
+    target: Runnable,
+    name: String) extends Thread(target, name) {
+
+  def this(name: String) {
+    this(null, name)
+  }
 
   /** A monitor to protect "uninterruptible" and "interrupted" */
   private val uninterruptibleLock = new Object
diff --git a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
index f47e574b4fc4b..efcad140350b9 100644
--- a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
@@ -44,6 +44,7 @@ import org.apache.spark.scheduler.{FakeTask, ResultTask, TaskDescription}
 import org.apache.spark.serializer.JavaSerializer
 import org.apache.spark.shuffle.FetchFailedException
 import org.apache.spark.storage.BlockManagerId
+import org.apache.spark.util.UninterruptibleThread
 
 class ExecutorSuite extends SparkFunSuite with LocalSparkContext with MockitoSugar with Eventually {
 
@@ -158,6 +159,18 @@ class ExecutorSuite extends SparkFunSuite with LocalSparkContext with MockitoSug
     assert(failReason.isInstanceOf[FetchFailed])
   }
 
+  test("Executor's worker threads should be UninterruptibleThread") {
+    val conf = new SparkConf()
+      .setMaster("local")
+      .setAppName("executor thread test")
+      .set("spark.ui.enabled", "false")
+    sc = new SparkContext(conf)
+    val executorThread = sc.parallelize(Seq(1), 1).map { _ =>
+      Thread.currentThread.getClass.getName
+    }.collect().head
+    assert(executorThread === classOf[UninterruptibleThread].getName)
+  }
+
   test("SPARK-19276: OOMs correctly handled with a FetchFailure") {
     // when there is a fatal error like an OOM, we don't do normal fetch failure handling, since it
     // may be a false positive.  And we should call the uncaught exception handler.
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaConsumer.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaConsumer.scala
index 6d76904fb0e59..bf6c0900c97e1 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaConsumer.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaConsumer.scala
@@ -28,6 +28,7 @@ import org.apache.kafka.common.TopicPartition
 import org.apache.spark.{SparkEnv, SparkException, TaskContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.kafka010.KafkaSource._
+import org.apache.spark.util.UninterruptibleThread
 
 
 /**
@@ -62,11 +63,20 @@ private[kafka010] case class CachedKafkaConsumer private(
 
   case class AvailableOffsetRange(earliest: Long, latest: Long)
 
+  private def runUninterruptiblyIfPossible[T](body: => T): T = Thread.currentThread match {
+    case ut: UninterruptibleThread =>
+      ut.runUninterruptibly(body)
+    case _ =>
+      logWarning("CachedKafkaConsumer is not running in UninterruptibleThread. " +
+        "It may hang when CachedKafkaConsumer's methods are interrupted because of KAFKA-1894")
+      body
+  }
+
   /**
    * Return the available offset range of the current partition. It's a pair of the earliest offset
    * and the latest offset.
    */
-  def getAvailableOffsetRange(): AvailableOffsetRange = {
+  def getAvailableOffsetRange(): AvailableOffsetRange = runUninterruptiblyIfPossible {
     consumer.seekToBeginning(Set(topicPartition).asJava)
     val earliestOffset = consumer.position(topicPartition)
     consumer.seekToEnd(Set(topicPartition).asJava)
@@ -92,7 +102,8 @@ private[kafka010] case class CachedKafkaConsumer private(
       offset: Long,
       untilOffset: Long,
       pollTimeoutMs: Long,
-      failOnDataLoss: Boolean): ConsumerRecord[Array[Byte], Array[Byte]] = {
+      failOnDataLoss: Boolean):
+    ConsumerRecord[Array[Byte], Array[Byte]] = runUninterruptiblyIfPossible {
     require(offset < untilOffset,
       s"offset must always be less than untilOffset [offset: $offset, untilOffset: $untilOffset]")
     logDebug(s"Get $groupId $topicPartition nextOffset $nextOffsetInFetchedData requested $offset")

From 3d53d825e8b3f5fad770398992d6c4f5341ce00e Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Thu, 27 Apr 2017 13:58:44 -0700
Subject: [PATCH 0725/1204] [SPARK-20452][SS][KAFKA] Fix a potential
 ConcurrentModificationException for batch Kafka DataFrame

## What changes were proposed in this pull request?

Cancel a batch Kafka query but one of task cannot be cancelled, and rerun the same DataFrame may cause ConcurrentModificationException because it may launch two tasks sharing the same group id.

This PR always create a new consumer when `reuseKafkaConsumer = false` to avoid ConcurrentModificationException. It also contains other minor fixes.

## How was this patch tested?

Jenkins.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #17752 from zsxwing/kafka-fix.

(cherry picked from commit 823baca2cb8edb62885af547d3511c9e8923cefd)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../sql/kafka010/CachedKafkaConsumer.scala    |  12 +-
 .../sql/kafka010/KafkaOffsetReader.scala      |   6 +-
 .../spark/sql/kafka010/KafkaRelation.scala    |  30 +++-
 .../sql/kafka010/KafkaSourceProvider.scala    | 147 ++++++++----------
 .../spark/sql/kafka010/KafkaSourceRDD.scala   |  19 ++-
 .../spark/streaming/kafka010/KafkaRDD.scala   |   2 +-
 6 files changed, 119 insertions(+), 97 deletions(-)

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaConsumer.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaConsumer.scala
index bf6c0900c97e1..7c4f38e02fb2a 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaConsumer.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaConsumer.scala
@@ -287,7 +287,7 @@ private[kafka010] case class CachedKafkaConsumer private(
     reportDataLoss0(failOnDataLoss, finalMessage, cause)
   }
 
-  private def close(): Unit = consumer.close()
+  def close(): Unit = consumer.close()
 
   private def seek(offset: Long): Unit = {
     logDebug(s"Seeking to $groupId $topicPartition $offset")
@@ -382,7 +382,7 @@ private[kafka010] object CachedKafkaConsumer extends Logging {
 
     // If this is reattempt at running the task, then invalidate cache and start with
     // a new consumer
-    if (TaskContext.get != null && TaskContext.get.attemptNumber > 1) {
+    if (TaskContext.get != null && TaskContext.get.attemptNumber >= 1) {
       removeKafkaConsumer(topic, partition, kafkaParams)
       val consumer = new CachedKafkaConsumer(topicPartition, kafkaParams)
       consumer.inuse = true
@@ -398,6 +398,14 @@ private[kafka010] object CachedKafkaConsumer extends Logging {
     }
   }
 
+  /** Create an [[CachedKafkaConsumer]] but don't put it into cache. */
+  def createUncached(
+      topic: String,
+      partition: Int,
+      kafkaParams: ju.Map[String, Object]): CachedKafkaConsumer = {
+    new CachedKafkaConsumer(new TopicPartition(topic, partition), kafkaParams)
+  }
+
   private def reportDataLoss0(
       failOnDataLoss: Boolean,
       finalMessage: String,
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
index 2696d6f089d2f..3e65949a6fd1b 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
@@ -95,8 +95,10 @@ private[kafka010] class KafkaOffsetReader(
    * Closes the connection to Kafka, and cleans up state.
    */
   def close(): Unit = {
-    consumer.close()
-    kafkaReaderThread.shutdownNow()
+    runUninterruptibly {
+      consumer.close()
+    }
+    kafkaReaderThread.shutdown()
   }
 
   /**
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
index f180bbad6e363..97bd283169323 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.kafka010
 
 import java.{util => ju}
+import java.util.UUID
 
 import org.apache.kafka.common.TopicPartition
 
@@ -33,9 +34,9 @@ import org.apache.spark.unsafe.types.UTF8String
 
 private[kafka010] class KafkaRelation(
     override val sqlContext: SQLContext,
-    kafkaReader: KafkaOffsetReader,
-    executorKafkaParams: ju.Map[String, Object],
+    strategy: ConsumerStrategy,
     sourceOptions: Map[String, String],
+    specifiedKafkaParams: Map[String, String],
     failOnDataLoss: Boolean,
     startingOffsets: KafkaOffsetRangeLimit,
     endingOffsets: KafkaOffsetRangeLimit)
@@ -53,9 +54,27 @@ private[kafka010] class KafkaRelation(
   override def schema: StructType = KafkaOffsetReader.kafkaSchema
 
   override def buildScan(): RDD[Row] = {
+    // Each running query should use its own group id. Otherwise, the query may be only assigned
+    // partial data since Kafka will assign partitions to multiple consumers having the same group
+    // id. Hence, we should generate a unique id for each query.
+    val uniqueGroupId = s"spark-kafka-relation-${UUID.randomUUID}"
+
+    val kafkaOffsetReader = new KafkaOffsetReader(
+      strategy,
+      KafkaSourceProvider.kafkaParamsForDriver(specifiedKafkaParams),
+      sourceOptions,
+      driverGroupIdPrefix = s"$uniqueGroupId-driver")
+
     // Leverage the KafkaReader to obtain the relevant partition offsets
-    val fromPartitionOffsets = getPartitionOffsets(startingOffsets)
-    val untilPartitionOffsets = getPartitionOffsets(endingOffsets)
+    val (fromPartitionOffsets, untilPartitionOffsets) = {
+      try {
+        (getPartitionOffsets(kafkaOffsetReader, startingOffsets),
+          getPartitionOffsets(kafkaOffsetReader, endingOffsets))
+      } finally {
+        kafkaOffsetReader.close()
+      }
+    }
+
     // Obtain topicPartitions in both from and until partition offset, ignoring
     // topic partitions that were added and/or deleted between the two above calls.
     if (fromPartitionOffsets.keySet != untilPartitionOffsets.keySet) {
@@ -82,6 +101,8 @@ private[kafka010] class KafkaRelation(
       offsetRanges.sortBy(_.topicPartition.toString).mkString(", "))
 
     // Create an RDD that reads from Kafka and get the (key, value) pair as byte arrays.
+    val executorKafkaParams =
+      KafkaSourceProvider.kafkaParamsForExecutors(specifiedKafkaParams, uniqueGroupId)
     val rdd = new KafkaSourceRDD(
       sqlContext.sparkContext, executorKafkaParams, offsetRanges,
       pollTimeoutMs, failOnDataLoss, reuseKafkaConsumer = false).map { cr =>
@@ -98,6 +119,7 @@ private[kafka010] class KafkaRelation(
   }
 
   private def getPartitionOffsets(
+      kafkaReader: KafkaOffsetReader,
       kafkaOffsets: KafkaOffsetRangeLimit): Map[TopicPartition, Long] = {
     def validateTopicPartitions(partitions: Set[TopicPartition],
       partitionOffsets: Map[TopicPartition, Long]): Map[TopicPartition, Long] = {
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index ab1ce347cbe34..3cb4d8cad12cc 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -111,10 +111,6 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
       sqlContext: SQLContext,
       parameters: Map[String, String]): BaseRelation = {
     validateBatchOptions(parameters)
-    // Each running query should use its own group id. Otherwise, the query may be only assigned
-    // partial data since Kafka will assign partitions to multiple consumers having the same group
-    // id. Hence, we should generate a unique id for each query.
-    val uniqueGroupId = s"spark-kafka-relation-${UUID.randomUUID}"
     val caseInsensitiveParams = parameters.map { case (k, v) => (k.toLowerCase(Locale.ROOT), v) }
     val specifiedKafkaParams =
       parameters
@@ -131,20 +127,14 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
       ENDING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
     assert(endingRelationOffsets != EarliestOffsetRangeLimit)
 
-    val kafkaOffsetReader = new KafkaOffsetReader(
-      strategy(caseInsensitiveParams),
-      kafkaParamsForDriver(specifiedKafkaParams),
-      parameters,
-      driverGroupIdPrefix = s"$uniqueGroupId-driver")
-
     new KafkaRelation(
       sqlContext,
-      kafkaOffsetReader,
-      kafkaParamsForExecutors(specifiedKafkaParams, uniqueGroupId),
-      parameters,
-      failOnDataLoss(caseInsensitiveParams),
-      startingRelationOffsets,
-      endingRelationOffsets)
+      strategy(caseInsensitiveParams),
+      sourceOptions = parameters,
+      specifiedKafkaParams = specifiedKafkaParams,
+      failOnDataLoss = failOnDataLoss(caseInsensitiveParams),
+      startingOffsets = startingRelationOffsets,
+      endingOffsets = endingRelationOffsets)
   }
 
   override def createSink(
@@ -213,46 +203,6 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
         ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG -> classOf[ByteArraySerializer].getName)
   }
 
-  private def kafkaParamsForDriver(specifiedKafkaParams: Map[String, String]) =
-    ConfigUpdater("source", specifiedKafkaParams)
-      .set(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, deserClassName)
-      .set(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, deserClassName)
-
-      // Set to "earliest" to avoid exceptions. However, KafkaSource will fetch the initial
-      // offsets by itself instead of counting on KafkaConsumer.
-      .set(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
-
-      // So that consumers in the driver does not commit offsets unnecessarily
-      .set(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false")
-
-      // So that the driver does not pull too much data
-      .set(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, new java.lang.Integer(1))
-
-      // If buffer config is not set, set it to reasonable value to work around
-      // buffer issues (see KAFKA-3135)
-      .setIfUnset(ConsumerConfig.RECEIVE_BUFFER_CONFIG, 65536: java.lang.Integer)
-      .build()
-
-  private def kafkaParamsForExecutors(
-      specifiedKafkaParams: Map[String, String], uniqueGroupId: String) =
-    ConfigUpdater("executor", specifiedKafkaParams)
-      .set(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, deserClassName)
-      .set(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, deserClassName)
-
-      // Make sure executors do only what the driver tells them.
-      .set(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "none")
-
-      // So that consumers in executors do not mess with any existing group id
-      .set(ConsumerConfig.GROUP_ID_CONFIG, s"$uniqueGroupId-executor")
-
-      // So that consumers in executors does not commit offsets unnecessarily
-      .set(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false")
-
-      // If buffer config is not set, set it to reasonable value to work around
-      // buffer issues (see KAFKA-3135)
-      .setIfUnset(ConsumerConfig.RECEIVE_BUFFER_CONFIG, 65536: java.lang.Integer)
-      .build()
-
   private def strategy(caseInsensitiveParams: Map[String, String]) =
       caseInsensitiveParams.find(x => STRATEGY_OPTION_KEYS.contains(x._1)).get match {
     case ("assign", value) =>
@@ -414,30 +364,9 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
       logWarning("maxOffsetsPerTrigger option ignored in batch queries")
     }
   }
-
-  /** Class to conveniently update Kafka config params, while logging the changes */
-  private case class ConfigUpdater(module: String, kafkaParams: Map[String, String]) {
-    private val map = new ju.HashMap[String, Object](kafkaParams.asJava)
-
-    def set(key: String, value: Object): this.type = {
-      map.put(key, value)
-      logInfo(s"$module: Set $key to $value, earlier value: ${kafkaParams.getOrElse(key, "")}")
-      this
-    }
-
-    def setIfUnset(key: String, value: Object): ConfigUpdater = {
-      if (!map.containsKey(key)) {
-        map.put(key, value)
-        logInfo(s"$module: Set $key to $value")
-      }
-      this
-    }
-
-    def build(): ju.Map[String, Object] = map
-  }
 }
 
-private[kafka010] object KafkaSourceProvider {
+private[kafka010] object KafkaSourceProvider extends Logging {
   private val STRATEGY_OPTION_KEYS = Set("subscribe", "subscribepattern", "assign")
   private[kafka010] val STARTING_OFFSETS_OPTION_KEY = "startingoffsets"
   private[kafka010] val ENDING_OFFSETS_OPTION_KEY = "endingoffsets"
@@ -459,4 +388,66 @@ private[kafka010] object KafkaSourceProvider {
       case None => defaultOffsets
     }
   }
+
+  def kafkaParamsForDriver(specifiedKafkaParams: Map[String, String]): ju.Map[String, Object] =
+    ConfigUpdater("source", specifiedKafkaParams)
+      .set(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, deserClassName)
+      .set(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, deserClassName)
+
+      // Set to "earliest" to avoid exceptions. However, KafkaSource will fetch the initial
+      // offsets by itself instead of counting on KafkaConsumer.
+      .set(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest")
+
+      // So that consumers in the driver does not commit offsets unnecessarily
+      .set(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false")
+
+      // So that the driver does not pull too much data
+      .set(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, new java.lang.Integer(1))
+
+      // If buffer config is not set, set it to reasonable value to work around
+      // buffer issues (see KAFKA-3135)
+      .setIfUnset(ConsumerConfig.RECEIVE_BUFFER_CONFIG, 65536: java.lang.Integer)
+      .build()
+
+  def kafkaParamsForExecutors(
+      specifiedKafkaParams: Map[String, String],
+      uniqueGroupId: String): ju.Map[String, Object] =
+    ConfigUpdater("executor", specifiedKafkaParams)
+      .set(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, deserClassName)
+      .set(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, deserClassName)
+
+      // Make sure executors do only what the driver tells them.
+      .set(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "none")
+
+      // So that consumers in executors do not mess with any existing group id
+      .set(ConsumerConfig.GROUP_ID_CONFIG, s"$uniqueGroupId-executor")
+
+      // So that consumers in executors does not commit offsets unnecessarily
+      .set(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false")
+
+      // If buffer config is not set, set it to reasonable value to work around
+      // buffer issues (see KAFKA-3135)
+      .setIfUnset(ConsumerConfig.RECEIVE_BUFFER_CONFIG, 65536: java.lang.Integer)
+      .build()
+
+  /** Class to conveniently update Kafka config params, while logging the changes */
+  private case class ConfigUpdater(module: String, kafkaParams: Map[String, String]) {
+    private val map = new ju.HashMap[String, Object](kafkaParams.asJava)
+
+    def set(key: String, value: Object): this.type = {
+      map.put(key, value)
+      logDebug(s"$module: Set $key to $value, earlier value: ${kafkaParams.getOrElse(key, "")}")
+      this
+    }
+
+    def setIfUnset(key: String, value: Object): ConfigUpdater = {
+      if (!map.containsKey(key)) {
+        map.put(key, value)
+        logDebug(s"$module: Set $key to $value")
+      }
+      this
+    }
+
+    def build(): ju.Map[String, Object] = map
+  }
 }
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala
index 6fb3473eb75f5..9d9e2aaba8079 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala
@@ -125,16 +125,15 @@ private[kafka010] class KafkaSourceRDD(
       context: TaskContext): Iterator[ConsumerRecord[Array[Byte], Array[Byte]]] = {
     val sourcePartition = thePart.asInstanceOf[KafkaSourceRDDPartition]
     val topic = sourcePartition.offsetRange.topic
-    if (!reuseKafkaConsumer) {
-      // if we can't reuse CachedKafkaConsumers, let's reset the groupId to something unique
-      // to each task (i.e., append the task's unique partition id), because we will have
-      // multiple tasks (e.g., in the case of union) reading from the same topic partitions
-      val old = executorKafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG).asInstanceOf[String]
-      val id = TaskContext.getPartitionId()
-      executorKafkaParams.put(ConsumerConfig.GROUP_ID_CONFIG, old + "-" + id)
-    }
     val kafkaPartition = sourcePartition.offsetRange.partition
-    val consumer = CachedKafkaConsumer.getOrCreate(topic, kafkaPartition, executorKafkaParams)
+    val consumer =
+      if (!reuseKafkaConsumer) {
+        // If we can't reuse CachedKafkaConsumers, creating a new CachedKafkaConsumer. As here we
+        // uses `assign`, we don't need to worry about the "group.id" conflicts.
+        CachedKafkaConsumer.createUncached(topic, kafkaPartition, executorKafkaParams)
+      } else {
+        CachedKafkaConsumer.getOrCreate(topic, kafkaPartition, executorKafkaParams)
+      }
     val range = resolveRange(consumer, sourcePartition.offsetRange)
     assert(
       range.fromOffset <= range.untilOffset,
@@ -170,7 +169,7 @@ private[kafka010] class KafkaSourceRDD(
         override protected def close(): Unit = {
           if (!reuseKafkaConsumer) {
             // Don't forget to close non-reuse KafkaConsumers. You may take down your cluster!
-            CachedKafkaConsumer.removeKafkaConsumer(topic, kafkaPartition, executorKafkaParams)
+            consumer.close()
           } else {
             // Indicate that we're no longer using this consumer
             CachedKafkaConsumer.releaseKafkaConsumer(topic, kafkaPartition, executorKafkaParams)
diff --git a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala
index 4c6e2ce87e295..62cdf5b1134e4 100644
--- a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala
+++ b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala
@@ -199,7 +199,7 @@ private[spark] class KafkaRDD[K, V](
 
     val consumer = if (useConsumerCache) {
       CachedKafkaConsumer.init(cacheInitialCapacity, cacheMaxCapacity, cacheLoadFactor)
-      if (context.attemptNumber > 1) {
+      if (context.attemptNumber >= 1) {
         // just in case the prior attempt failures were cache related
         CachedKafkaConsumer.remove(groupId, part.topic, part.partition)
       }

From e02b6ebfd5b46751044da5058db1fd91cf7784a5 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 27 Apr 2017 19:38:14 -0700
Subject: [PATCH 0726/1204] [SPARK-12837][CORE] Do not send the name of
 internal accumulator to executor side

## What changes were proposed in this pull request?

When sending accumulator updates back to driver, the network overhead is pretty big as there are a lot of accumulators, e.g. `TaskMetrics` will send about 20 accumulators everytime, there may be a lot of `SQLMetric` if the query plan is complicated.

Therefore, it's critical to reduce the size of serialized accumulator. A simple way is to not send the name of internal accumulators to executor side, as it's unnecessary. When executor sends accumulator updates back to driver, we can look up the accumulator name in `AccumulatorContext` easily. Note that, we still need to send names of normal accumulators, as the user code run at executor side may rely on accumulator names.

In the future, we should reimplement `TaskMetrics` to not rely on accumulators and use custom serialization.

Tried on the example in https://issues.apache.org/jira/browse/SPARK-12837, the size of serialized accumulator has been cut down by about 40%.

## How was this patch tested?

existing tests.

Author: Wenchen Fan <wenchen@databricks.com>

Closes #17596 from cloud-fan/oom.

(cherry picked from commit b90bf520fd7b979a90d1377cfc2ee7f0bf82c705)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../apache/spark/executor/TaskMetrics.scala   | 29 ++++++-------
 .../org/apache/spark/scheduler/Task.scala     | 13 +++---
 .../org/apache/spark/util/AccumulatorV2.scala | 28 +++++++------
 .../spark/scheduler/TaskContextSuite.scala    |  2 +-
 .../ui/jobs/JobProgressListenerSuite.scala    |  2 +-
 .../apache/spark/util/JsonProtocolSuite.scala |  2 +-
 .../SpecificParquetRecordReaderBase.java      | 12 +++---
 .../parquet/ParquetFilterSuite.scala          | 42 +++++++++++++++----
 8 files changed, 76 insertions(+), 54 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
index dfd2f818acdac..a3ce3d1ccc5e3 100644
--- a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
+++ b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
@@ -251,13 +251,10 @@ class TaskMetrics private[spark] () extends Serializable {
 
   private[spark] def accumulators(): Seq[AccumulatorV2[_, _]] = internalAccums ++ externalAccums
 
-  /**
-   * Looks for a registered accumulator by accumulator name.
-   */
-  private[spark] def lookForAccumulatorByName(name: String): Option[AccumulatorV2[_, _]] = {
-    accumulators.find { acc =>
-      acc.name.isDefined && acc.name.get == name
-    }
+  private[spark] def nonZeroInternalAccums(): Seq[AccumulatorV2[_, _]] = {
+    // RESULT_SIZE accumulator is always zero at executor, we need to send it back as its
+    // value will be updated at driver side.
+    internalAccums.filter(a => !a.isZero || a == _resultSize)
   }
 }
 
@@ -308,16 +305,16 @@ private[spark] object TaskMetrics extends Logging {
    */
   def fromAccumulators(accums: Seq[AccumulatorV2[_, _]]): TaskMetrics = {
     val tm = new TaskMetrics
-    val (internalAccums, externalAccums) =
-      accums.partition(a => a.name.isDefined && tm.nameToAccums.contains(a.name.get))
-
-    internalAccums.foreach { acc =>
-      val tmAcc = tm.nameToAccums(acc.name.get).asInstanceOf[AccumulatorV2[Any, Any]]
-      tmAcc.metadata = acc.metadata
-      tmAcc.merge(acc.asInstanceOf[AccumulatorV2[Any, Any]])
+    for (acc <- accums) {
+      val name = acc.name
+      if (name.isDefined && tm.nameToAccums.contains(name.get)) {
+        val tmAcc = tm.nameToAccums(name.get).asInstanceOf[AccumulatorV2[Any, Any]]
+        tmAcc.metadata = acc.metadata
+        tmAcc.merge(acc.asInstanceOf[AccumulatorV2[Any, Any]])
+      } else {
+        tm.externalAccums += acc
+      }
     }
-
-    tm.externalAccums ++= externalAccums
     tm
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Task.scala b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
index 7fd2918960cd0..5c337b992c840 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Task.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
@@ -182,14 +182,11 @@ private[spark] abstract class Task[T](
    */
   def collectAccumulatorUpdates(taskFailed: Boolean = false): Seq[AccumulatorV2[_, _]] = {
     if (context != null) {
-      context.taskMetrics.internalAccums.filter { a =>
-        // RESULT_SIZE accumulator is always zero at executor, we need to send it back as its
-        // value will be updated at driver side.
-        // Note: internal accumulators representing task metrics always count failed values
-        !a.isZero || a.name == Some(InternalAccumulator.RESULT_SIZE)
-      // zero value external accumulators may still be useful, e.g. SQLMetrics, we should not filter
-      // them out.
-      } ++ context.taskMetrics.externalAccums.filter(a => !taskFailed || a.countFailedValues)
+      // Note: internal accumulators representing task metrics always count failed values
+      context.taskMetrics.nonZeroInternalAccums() ++
+        // zero value external accumulators may still be useful, e.g. SQLMetrics, we should not
+        // filter them out.
+        context.taskMetrics.externalAccums.filter(a => !taskFailed || a.countFailedValues)
     } else {
       Seq.empty
     }
diff --git a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
index 7479de55140ea..a65ec75cc5db6 100644
--- a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
+++ b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
@@ -84,8 +84,12 @@ abstract class AccumulatorV2[IN, OUT] extends Serializable {
    * Returns the name of this accumulator, can only be called after registration.
    */
   final def name: Option[String] = {
-    assertMetadataNotNull()
-    metadata.name
+    if (atDriverSide) {
+      AccumulatorContext.get(id).flatMap(_.metadata.name)
+    } else {
+      assertMetadataNotNull()
+      metadata.name
+    }
   }
 
   /**
@@ -161,7 +165,15 @@ abstract class AccumulatorV2[IN, OUT] extends Serializable {
       }
       val copyAcc = copyAndReset()
       assert(copyAcc.isZero, "copyAndReset must return a zero value copy")
-      copyAcc.metadata = metadata
+      val isInternalAcc =
+        (name.isDefined && name.get.startsWith(InternalAccumulator.METRICS_PREFIX)) ||
+          getClass.getSimpleName == "SQLMetric"
+      if (isInternalAcc) {
+        // Do not serialize the name of internal accumulator and send it to executor.
+        copyAcc.metadata = metadata.copy(name = None)
+      } else {
+        copyAcc.metadata = metadata
+      }
       copyAcc
     } else {
       this
@@ -263,16 +275,6 @@ private[spark] object AccumulatorContext {
     originals.clear()
   }
 
-  /**
-   * Looks for a registered accumulator by accumulator name.
-   */
-  private[spark] def lookForAccumulatorByName(name: String): Option[AccumulatorV2[_, _]] = {
-    originals.values().asScala.find { ref =>
-      val acc = ref.get
-      acc != null && acc.name.isDefined && acc.name.get == name
-    }.map(_.get)
-  }
-
   // Identifier for distinguishing SQL metrics from other accumulators
   private[spark] val SQL_ACCUM_IDENTIFIER = "sql"
 }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
index 8f576daa77d15..b22da565d86e7 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
@@ -198,7 +198,7 @@ class TaskContextSuite extends SparkFunSuite with BeforeAndAfter with LocalSpark
     sc = new SparkContext("local", "test")
     // Create a dummy task. We won't end up running this; we just want to collect
     // accumulator updates from it.
-    val taskMetrics = TaskMetrics.empty
+    val taskMetrics = TaskMetrics.registered
     val task = new Task[Int](0, 0, 0) {
       context = new TaskContextImpl(0, 0, 0L, 0,
         new TaskMemoryManager(SparkEnv.get.memoryManager, 0L),
diff --git a/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala b/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala
index 93964a2d56743..48be3be81755a 100644
--- a/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala
@@ -293,7 +293,7 @@ class JobProgressListenerSuite extends SparkFunSuite with LocalSparkContext with
     val execId = "exe-1"
 
     def makeTaskMetrics(base: Int): TaskMetrics = {
-      val taskMetrics = TaskMetrics.empty
+      val taskMetrics = TaskMetrics.registered
       val shuffleReadMetrics = taskMetrics.createTempShuffleReadMetrics()
       val shuffleWriteMetrics = taskMetrics.shuffleWriteMetrics
       val inputMetrics = taskMetrics.inputMetrics
diff --git a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
index a64dbeae47294..a77c8e3cab4e8 100644
--- a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
@@ -830,7 +830,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
       hasHadoopInput: Boolean,
       hasOutput: Boolean,
       hasRecords: Boolean = true) = {
-    val t = TaskMetrics.empty
+    val t = TaskMetrics.registered
     // Set CPU times same as wall times for testing purpose
     t.setExecutorDeserializeTime(a)
     t.setExecutorDeserializeCpuTime(a)
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java
index eb97118872ea1..0bab321a657d6 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java
@@ -153,14 +153,14 @@ public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptCont
     }
 
     // For test purpose.
-    // If the predefined accumulator exists, the row group number to read will be updated
-    // to the accumulator. So we can check if the row groups are filtered or not in test case.
+    // If the last external accumulator is `NumRowGroupsAccumulator`, the row group number to read
+    // will be updated to the accumulator. So we can check if the row groups are filtered or not
+    // in test case.
     TaskContext taskContext = TaskContext$.MODULE$.get();
     if (taskContext != null) {
-      Option<AccumulatorV2<?, ?>> accu = taskContext.taskMetrics()
-        .lookForAccumulatorByName("numRowGroups");
-      if (accu.isDefined()) {
-        ((LongAccumulator)accu.get()).add((long)blocks.size());
+      Option<AccumulatorV2<?, ?>> accu = taskContext.taskMetrics().externalAccums().lastOption();
+      if (accu.isDefined() && accu.get().getClass().getSimpleName().equals("NumRowGroupsAcc")) {
+        ((AccumulatorV2<Integer, Integer>)accu.get()).add(blocks.size());
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
index 9a3328fcecee8..dd53b561326f3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
-import org.apache.spark.util.{AccumulatorContext, LongAccumulator}
+import org.apache.spark.util.{AccumulatorContext, AccumulatorV2}
 
 /**
  * A test suite that tests Parquet filter2 API based filter pushdown optimization.
@@ -499,18 +499,20 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex
         val path = s"${dir.getCanonicalPath}/table"
         (1 to 1024).map(i => (101, i)).toDF("a", "b").write.parquet(path)
 
-        Seq(("true", (x: Long) => x == 0), ("false", (x: Long) => x > 0)).map { case (push, func) =>
-          withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_ENABLED.key -> push) {
-            val accu = new LongAccumulator
-            accu.register(sparkContext, Some("numRowGroups"))
+        Seq(true, false).foreach { enablePushDown =>
+          withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_ENABLED.key -> enablePushDown.toString) {
+            val accu = new NumRowGroupsAcc
+            sparkContext.register(accu)
 
             val df = spark.read.parquet(path).filter("a < 100")
             df.foreachPartition(_.foreach(v => accu.add(0)))
             df.collect
 
-            val numRowGroups = AccumulatorContext.lookForAccumulatorByName("numRowGroups")
-            assert(numRowGroups.isDefined)
-            assert(func(numRowGroups.get.asInstanceOf[LongAccumulator].value))
+            if (enablePushDown) {
+              assert(accu.value == 0)
+            } else {
+              assert(accu.value > 0)
+            }
             AccumulatorContext.remove(accu.id)
           }
         }
@@ -537,3 +539,27 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex
     }
   }
 }
+
+class NumRowGroupsAcc extends AccumulatorV2[Integer, Integer] {
+  private var _sum = 0
+
+  override def isZero: Boolean = _sum == 0
+
+  override def copy(): AccumulatorV2[Integer, Integer] = {
+    val acc = new NumRowGroupsAcc()
+    acc._sum = _sum
+    acc
+  }
+
+  override def reset(): Unit = _sum = 0
+
+  override def add(v: Integer): Unit = _sum += v
+
+  override def merge(other: AccumulatorV2[Integer, Integer]): Unit = other match {
+    case a: NumRowGroupsAcc => _sum += a._sum
+    case _ => throw new UnsupportedOperationException(
+      s"Cannot merge ${this.getClass.getName} with ${other.getClass.getName}")
+  }
+
+  override def value: Integer = _sum
+}

From f60ed0c2c66cbb990ce41ff10001b393f18e10c9 Mon Sep 17 00:00:00 2001
From: wangmiao1981 <wm624@hotmail.com>
Date: Thu, 27 Apr 2017 22:29:47 -0700
Subject: [PATCH 0727/1204] [SPARKR][DOC] Document LinearSVC in R programming
 guide

## What changes were proposed in this pull request?

add link to svmLinear in the SparkR programming document.

## How was this patch tested?

Build doc manually and click the link to the document. It looks good.

Author: wangmiao1981 <wm624@hotmail.com>

Closes #17797 from wangmiao1981/doc.

(cherry picked from commit 7fe8249793bd3eed4fa67cb4a210264a80520786)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 docs/sparkr.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/sparkr.md b/docs/sparkr.md
index a1a35a7757e57..a16c5833a7c37 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -452,6 +452,7 @@ SparkR supports the following machine learning algorithms currently:
 * [`spark.logit`](api/R/spark.logit.html): [`Logistic Regression`](ml-classification-regression.html#logistic-regression)
 * [`spark.mlp`](api/R/spark.mlp.html): [`Multilayer Perceptron (MLP)`](ml-classification-regression.html#multilayer-perceptron-classifier)
 * [`spark.naiveBayes`](api/R/spark.naiveBayes.html): [`Naive Bayes`](ml-classification-regression.html#naive-bayes)
+* [`spark.svmLinear`](api/R/spark.svmLinear.html): [`Linear Support Vector Machine`](ml-classification-regression.html#linear-support-vector-machine)
 
 #### Regression
 

From 26a9e29488cbe36154d654983c4ec7db292412fe Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Fri, 28 Apr 2017 14:16:40 +0800
Subject: [PATCH 0728/1204] [SPARK-20476][SQL] Block users to create a table
 that use commas in the column names

### What changes were proposed in this pull request?
```SQL
hive> create table t1(`a,` string);
OK
Time taken: 1.399 seconds

hive> create table t2(`a,` string, b string);
FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. java.lang.RuntimeException: MetaException(message:org.apache.hadoop.hive.serde2.SerDeException org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe: columns has 3 elements while columns.types has 2 elements!)

hive> create table t2(`a,` string, b string) stored as parquet;
FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. java.lang.IllegalArgumentException: ParquetHiveSerde initialization failed. Number of column name and column type differs. columnNames = [a, , b], columnTypes = [string, string]
```
It has a bug in Hive metastore.

When users do not provide alias name in the SELECT query, we call `toPrettySQL` to generate the alias name. For example, the string `get_json_object(jstring, '$.f1')` will be the alias name for the function call in the statement
```SQL
SELECT key, get_json_object(jstring, '$.f1') FROM tempView
```
Above is not an issue for the SELECT query statements. However, for CTAS, we hit the issue due to a bug in Hive metastore. Hive metastore does not like the column names containing commas and returned a confusing error message, like:
```
17/04/26 23:12:56 ERROR [hive.log(397) -- main]: error in initSerDe: org.apache.hadoop.hive.serde2.SerDeException org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe: columns has 2 elements while columns.types has 1 elements!
org.apache.hadoop.hive.serde2.SerDeException: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe: columns has 2 elements while columns.types has 1 elements!
```

Thus, this PR is to block users to create a table in Hive metastore when the table table has a column containing commas in the name.

### How was this patch tested?
Added a test case

Author: Xiao Li <gatorsmile@gmail.com>

Closes #17781 from gatorsmile/blockIllegalColumnNames.

(cherry picked from commit e3c816043389e227db5e7a328c7c554209b4f394)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/hive/HiveExternalCatalog.scala  | 18 ++++++++++++++
 .../sql/hive/execution/SQLQuerySuite.scala    | 24 +++++++++++++++++++
 2 files changed, 42 insertions(+)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 71e33c46b9aed..ba48facff2933 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -137,6 +137,22 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     }
   }
 
+  /**
+   * Checks the validity of column names. Hive metastore disallows the table to use comma in
+   * data column names. Partition columns do not have such a restriction. Views do not have such
+   * a restriction.
+   */
+  private def verifyColumnNames(table: CatalogTable): Unit = {
+    if (table.tableType != VIEW) {
+      table.dataSchema.map(_.name).foreach { colName =>
+        if (colName.contains(",")) {
+          throw new AnalysisException("Cannot create a table having a column whose name contains " +
+            s"commas in Hive metastore. Table: ${table.identifier}; Column: $colName")
+        }
+      }
+    }
+  }
+
   // --------------------------------------------------------------------------
   // Databases
   // --------------------------------------------------------------------------
@@ -202,6 +218,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     val table = tableDefinition.identifier.table
     requireDbExists(db)
     verifyTableProperties(tableDefinition)
+    verifyColumnNames(tableDefinition)
 
     if (tableExists(db, table) && !ignoreIfExists) {
       throw new TableAlreadyExistsException(db = db, table = table)
@@ -614,6 +631,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     requireTableExists(db, table)
     val rawTable = getRawTable(db, table)
     val withNewSchema = rawTable.copy(schema = schema)
+    verifyColumnNames(withNewSchema)
     // Add table metadata such as table schema, partition columns, etc. to table properties.
     val updatedTable = withNewSchema.copy(
       properties = withNewSchema.properties ++ tableMetaToTableProps(withNewSchema))
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 75f3744ff35be..c944f28d10ef4 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -1976,6 +1976,30 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     }
   }
 
+  test("Auto alias construction of get_json_object") {
+    val df = Seq(("1", """{"f1": "value1", "f5": 5.23}""")).toDF("key", "jstring")
+    val expectedMsg = "Cannot create a table having a column whose name contains commas " +
+      "in Hive metastore. Table: `default`.`t`; Column: get_json_object(jstring, $.f1)"
+
+    withTable("t") {
+      val e = intercept[AnalysisException] {
+        df.select($"key", functions.get_json_object($"jstring", "$.f1"))
+          .write.format("hive").saveAsTable("t")
+      }.getMessage
+      assert(e.contains(expectedMsg))
+    }
+
+    withTempView("tempView") {
+      withTable("t") {
+        df.createTempView("tempView")
+        val e = intercept[AnalysisException] {
+          sql("CREATE TABLE t AS SELECT key, get_json_object(jstring, '$.f1') FROM tempView")
+        }.getMessage
+        assert(e.contains(expectedMsg))
+      }
+    }
+  }
+
   test("SPARK-19912 String literals should be escaped for Hive metastore partition pruning") {
     withTable("spark_19912") {
       Seq(

From af3a1411a28796d4d9a100eefb093b1d91532754 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Fri, 28 Apr 2017 14:41:53 +0800
Subject: [PATCH 0729/1204] [SPARK-14471][SQL] Aliases in SELECT could be used
 in GROUP BY

## What changes were proposed in this pull request?
This pr added a new rule in `Analyzer` to resolve aliases in `GROUP BY`.
The current master throws an exception if `GROUP BY` clauses have aliases in `SELECT`;
```
scala> spark.sql("select a a1, a1 + 1 as b, count(1) from t group by a1")
org.apache.spark.sql.AnalysisException: cannot resolve '`a1`' given input columns: [a]; line 1 pos 51;
'Aggregate ['a1], [a#83L AS a1#87L, ('a1 + 1) AS b#88, count(1) AS count(1)#90L]
+- SubqueryAlias t
   +- Project [id#80L AS a#83L]
      +- Range (0, 10, step=1, splits=Some(8))

  at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42)
  at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:77)
  at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:74)
  at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:289)
```

## How was this patch tested?
Added tests in `SQLQuerySuite` and `SQLQueryTestSuite`.

Author: Takeshi Yamamuro <yamamuro@apache.org>

Closes #17191 from maropu/SPARK-14471.

(cherry picked from commit 59e3a564448777657125b6f65057ed20d0162d13)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 71 ++++++++++++-------
 .../apache/spark/sql/internal/SQLConf.scala   |  8 +++
 .../sql-tests/inputs/group-by-ordinal.sql     |  3 +
 .../resources/sql-tests/inputs/group-by.sql   | 18 +++++
 .../results/group-by-ordinal.sql.out          | 22 ++++--
 .../sql-tests/results/group-by.sql.out        | 66 ++++++++++++++++-
 6 files changed, 156 insertions(+), 32 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index cb24b09e71913..25783bdc39f55 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -136,6 +136,7 @@ class Analyzer(
       ResolveGroupingAnalytics ::
       ResolvePivot ::
       ResolveOrdinalInOrderByAndGroupBy ::
+      ResolveAggAliasInGroupBy ::
       ResolveMissingReferences ::
       ExtractGenerator ::
       ResolveGenerate ::
@@ -172,7 +173,7 @@ class Analyzer(
    * Analyze cte definitions and substitute child plan with analyzed cte definitions.
    */
   object CTESubstitution extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators  {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators  {
       case With(child, relations) =>
         substituteCTE(child, relations.foldLeft(Seq.empty[(String, LogicalPlan)]) {
           case (resolved, (name, relation)) =>
@@ -200,7 +201,7 @@ class Analyzer(
    * Substitute child plan with WindowSpecDefinitions.
    */
   object WindowsSubstitution extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
       // Lookup WindowSpecDefinitions. This rule works with unresolved children.
       case WithWindowDefinition(windowDefinitions, child) =>
         child.transform {
@@ -242,7 +243,7 @@ class Analyzer(
     private def hasUnresolvedAlias(exprs: Seq[NamedExpression]) =
       exprs.exists(_.find(_.isInstanceOf[UnresolvedAlias]).isDefined)
 
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
       case Aggregate(groups, aggs, child) if child.resolved && hasUnresolvedAlias(aggs) =>
         Aggregate(groups, assignAliases(aggs), child)
 
@@ -614,7 +615,7 @@ class Analyzer(
       case _ => plan
     }
 
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
       case i @ InsertIntoTable(u: UnresolvedRelation, parts, child, _, _) if child.resolved =>
         EliminateSubqueryAliases(lookupTableFromCatalog(u)) match {
           case v: View =>
@@ -786,7 +787,7 @@ class Analyzer(
       }
     }
 
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
       case p: LogicalPlan if !p.childrenResolved => p
 
       // If the projection list contains Stars, expand it.
@@ -844,11 +845,10 @@ class Analyzer(
 
       case q: LogicalPlan =>
         logTrace(s"Attempting to resolve ${q.simpleString}")
-        q transformExpressionsUp  {
+        q.transformExpressionsUp  {
           case u @ UnresolvedAttribute(nameParts) =>
-            // Leave unchanged if resolution fails.  Hopefully will be resolved next round.
-            val result =
-              withPosition(u) { q.resolveChildren(nameParts, resolver).getOrElse(u) }
+            // Leave unchanged if resolution fails. Hopefully will be resolved next round.
+            val result = withPosition(u) { q.resolveChildren(nameParts, resolver).getOrElse(u) }
             logDebug(s"Resolving $u to $result")
             result
           case UnresolvedExtractValue(child, fieldExpr) if child.resolved =>
@@ -961,7 +961,7 @@ class Analyzer(
    * have no effect on the results.
    */
   object ResolveOrdinalInOrderByAndGroupBy extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
       case p if !p.childrenResolved => p
       // Replace the index with the related attribute for ORDER BY,
       // which is a 1-base position of the projection list.
@@ -997,6 +997,27 @@ class Analyzer(
     }
   }
 
+  /**
+   * Replace unresolved expressions in grouping keys with resolved ones in SELECT clauses.
+   * This rule is expected to run after [[ResolveReferences]] applied.
+   */
+  object ResolveAggAliasInGroupBy extends Rule[LogicalPlan] {
+
+    override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
+      case agg @ Aggregate(groups, aggs, child)
+          if conf.groupByAliases && child.resolved && aggs.forall(_.resolved) &&
+            groups.exists(_.isInstanceOf[UnresolvedAttribute]) =>
+        // This is a strict check though, we put this to apply the rule only in alias expressions
+        def notResolvableByChild(attrName: String): Boolean =
+          !child.output.exists(a => resolver(a.name, attrName))
+        agg.copy(groupingExpressions = groups.map {
+          case u: UnresolvedAttribute if notResolvableByChild(u.name) =>
+            aggs.find(ne => resolver(ne.name, u.name)).getOrElse(u)
+          case e => e
+        })
+    }
+  }
+
   /**
    * In many dialects of SQL it is valid to sort by attributes that are not present in the SELECT
    * clause.  This rule detects such queries and adds the required attributes to the original
@@ -1006,7 +1027,7 @@ class Analyzer(
    * The HAVING clause could also used a grouping columns that is not presented in the SELECT.
    */
   object ResolveMissingReferences extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
       // Skip sort with aggregate. This will be handled in ResolveAggregateFunctions
       case sa @ Sort(_, _, child: Aggregate) => sa
 
@@ -1130,7 +1151,7 @@ class Analyzer(
    * Replaces [[UnresolvedFunction]]s with concrete [[Expression]]s.
    */
   object ResolveFunctions extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
       case q: LogicalPlan =>
         q transformExpressions {
           case u if !u.childrenResolved => u // Skip until children are resolved.
@@ -1442,7 +1463,7 @@ class Analyzer(
     /**
      * Resolve and rewrite all subqueries in an operator tree..
      */
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
       // In case of HAVING (a filter after an aggregate) we use both the aggregate and
       // its child for resolution.
       case f @ Filter(_, a: Aggregate) if f.childrenResolved =>
@@ -1457,7 +1478,7 @@ class Analyzer(
    * Turns projections that contain aggregate expressions into aggregations.
    */
   object GlobalAggregates extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
       case Project(projectList, child) if containsAggregates(projectList) =>
         Aggregate(Nil, projectList, child)
     }
@@ -1483,7 +1504,7 @@ class Analyzer(
    * underlying aggregate operator and then projected away after the original operator.
    */
   object ResolveAggregateFunctions extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
       case filter @ Filter(havingCondition,
              aggregate @ Aggregate(grouping, originalAggExprs, child))
           if aggregate.resolved =>
@@ -1655,7 +1676,7 @@ class Analyzer(
       }
     }
 
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
       case Project(projectList, _) if projectList.exists(hasNestedGenerator) =>
         val nestedGenerator = projectList.find(hasNestedGenerator).get
         throw new AnalysisException("Generators are not supported when it's nested in " +
@@ -1713,7 +1734,7 @@ class Analyzer(
    * that wrap the [[Generator]].
    */
   object ResolveGenerate extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
       case g: Generate if !g.child.resolved || !g.generator.resolved => g
       case g: Generate if !g.resolved =>
         g.copy(generatorOutput = makeGeneratorOutput(g.generator, g.generatorOutput.map(_.name)))
@@ -2030,7 +2051,7 @@ class Analyzer(
    * put them into an inner Project and finally project them away at the outer Project.
    */
   object PullOutNondeterministic extends Rule[LogicalPlan] {
-    override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+    override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
       case p if !p.resolved => p // Skip unresolved nodes.
       case p: Project => p
       case f: Filter => f
@@ -2075,7 +2096,7 @@ class Analyzer(
    * and we should return null if the input is null.
    */
   object HandleNullInputsForUDF extends Rule[LogicalPlan] {
-    override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+    override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
       case p if !p.resolved => p // Skip unresolved nodes.
 
       case p => p transformExpressionsUp {
@@ -2140,7 +2161,7 @@ class Analyzer(
    * Then apply a Project on a normal Join to eliminate natural or using join.
    */
   object ResolveNaturalAndUsingJoin extends Rule[LogicalPlan] {
-    override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+    override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
       case j @ Join(left, right, UsingJoin(joinType, usingCols), condition)
           if left.resolved && right.resolved && j.duplicateResolved =>
         commonNaturalJoinProcessing(left, right, joinType, usingCols, None)
@@ -2205,7 +2226,7 @@ class Analyzer(
    * to the given input attributes.
    */
   object ResolveDeserializer extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
       case p if !p.childrenResolved => p
       case p if p.resolved => p
 
@@ -2291,7 +2312,7 @@ class Analyzer(
    * constructed is an inner class.
    */
   object ResolveNewInstance extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
       case p if !p.childrenResolved => p
       case p if p.resolved => p
 
@@ -2325,7 +2346,7 @@ class Analyzer(
         "type of the field in the target object")
     }
 
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
       case p if !p.childrenResolved => p
       case p if p.resolved => p
 
@@ -2379,7 +2400,7 @@ object CleanupAliases extends Rule[LogicalPlan] {
     case other => trimAliases(other)
   }
 
-  override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+  override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
     case Project(projectList, child) =>
       val cleanedProjectList =
         projectList.map(trimNonTopLevelAliases(_).asInstanceOf[NamedExpression])
@@ -2447,7 +2468,7 @@ object TimeWindowing extends Rule[LogicalPlan] {
    * @return the logical plan that will generate the time windows using the Expand operator, with
    *         the Filter operator for correctness and Project for usability.
    */
-  def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+  def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
     case p: LogicalPlan if p.children.size == 1 =>
       val child = p.children.head
       val windowExpressions =
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 2e1798e22b9fc..b24419a41edb0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -421,6 +421,12 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
+  val GROUP_BY_ALIASES = buildConf("spark.sql.groupByAliases")
+    .doc("When true, aliases in a select list can be used in group by clauses. When false, " +
+      "an analysis exception is thrown in the case.")
+    .booleanConf
+    .createWithDefault(true)
+
   // The output committer class used by data sources. The specified class needs to be a
   // subclass of org.apache.hadoop.mapreduce.OutputCommitter.
   val OUTPUT_COMMITTER_CLASS =
@@ -1003,6 +1009,8 @@ class SQLConf extends Serializable with Logging {
 
   def groupByOrdinal: Boolean = getConf(GROUP_BY_ORDINAL)
 
+  def groupByAliases: Boolean = getConf(GROUP_BY_ALIASES)
+
   def crossJoinEnabled: Boolean = getConf(SQLConf.CROSS_JOINS_ENABLED)
 
   def sessionLocalTimeZone: String = getConf(SQLConf.SESSION_LOCAL_TIMEZONE)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by-ordinal.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by-ordinal.sql
index 9c8d851e36e9b..6566338f3d4a9 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by-ordinal.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by-ordinal.sql
@@ -49,6 +49,9 @@ select a, count(a) from (select 1 as a) tmp group by 1 order by 1;
 -- group by ordinal followed by having
 select count(a), a from (select 1 as a) tmp group by 2 having a > 0;
 
+-- mixed cases: group-by ordinals and aliases
+select a, a AS k, count(b) from data group by k, 1;
+
 -- turn of group by ordinal
 set spark.sql.groupByOrdinal=false;
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
index 4d0ed43153004..a7994f3beaff3 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -35,3 +35,21 @@ FROM testData;
 
 -- Aggregate with foldable input and multiple distinct groups.
 SELECT COUNT(DISTINCT b), COUNT(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY a;
+
+-- Aliases in SELECT could be used in GROUP BY
+SELECT a AS k, COUNT(b) FROM testData GROUP BY k;
+SELECT a AS k, COUNT(b) FROM testData GROUP BY k HAVING k > 1;
+
+-- Aggregate functions cannot be used in GROUP BY
+SELECT COUNT(b) AS k FROM testData GROUP BY k;
+
+-- Test data.
+CREATE OR REPLACE TEMPORARY VIEW testDataHasSameNameWithAlias AS SELECT * FROM VALUES
+(1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v);
+SELECT k AS a, COUNT(v) FROM testDataHasSameNameWithAlias GROUP BY a;
+
+-- turn off group by aliases
+set spark.sql.groupByAliases=false;
+
+-- Check analysis exceptions
+SELECT a AS k, COUNT(b) FROM testData GROUP BY k;
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
index d03681d0ea59c..9ecbe19078dd6 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 19
+-- Number of queries: 20
 
 
 -- !query 0
@@ -173,16 +173,26 @@ struct<count(a):bigint,a:int>
 
 
 -- !query 17
-set spark.sql.groupByOrdinal=false
+select a, a AS k, count(b) from data group by k, 1
 -- !query 17 schema
-struct<key:string,value:string>
+struct<a:int,k:int,count(b):bigint>
 -- !query 17 output
-spark.sql.groupByOrdinal	false
+1	1	2
+2	2	2
+3	3	2
 
 
 -- !query 18
-select sum(b) from data group by -1
+set spark.sql.groupByOrdinal=false
 -- !query 18 schema
-struct<sum(b):bigint>
+struct<key:string,value:string>
 -- !query 18 output
+spark.sql.groupByOrdinal	false
+
+
+-- !query 19
+select sum(b) from data group by -1
+-- !query 19 schema
+struct<sum(b):bigint>
+-- !query 19 output
 9
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index 4b87d5161fc0e..6bf9dff883c1e 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 15
+-- Number of queries: 22
 
 
 -- !query 0
@@ -139,3 +139,67 @@ SELECT COUNT(DISTINCT b), COUNT(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS
 struct<count(DISTINCT b):bigint,count(DISTINCT b, c):bigint>
 -- !query 14 output
 1	1
+
+
+-- !query 15
+SELECT a AS k, COUNT(b) FROM testData GROUP BY k
+-- !query 15 schema
+struct<k:int,count(b):bigint>
+-- !query 15 output
+1	2
+2	2
+3	2
+NULL	1
+
+
+-- !query 16
+SELECT a AS k, COUNT(b) FROM testData GROUP BY k HAVING k > 1
+-- !query 16 schema
+struct<k:int,count(b):bigint>
+-- !query 16 output
+2	2
+3	2
+
+
+-- !query 17
+SELECT COUNT(b) AS k FROM testData GROUP BY k
+-- !query 17 schema
+struct<>
+-- !query 17 output
+org.apache.spark.sql.AnalysisException
+aggregate functions are not allowed in GROUP BY, but found count(testdata.`b`);
+
+
+-- !query 18
+CREATE OR REPLACE TEMPORARY VIEW testDataHasSameNameWithAlias AS SELECT * FROM VALUES
+(1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v)
+-- !query 18 schema
+struct<>
+-- !query 18 output
+
+
+
+-- !query 19
+SELECT k AS a, COUNT(v) FROM testDataHasSameNameWithAlias GROUP BY a
+-- !query 19 schema
+struct<>
+-- !query 19 output
+org.apache.spark.sql.AnalysisException
+expression 'testdatahassamenamewithalias.`k`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;
+
+
+-- !query 20
+set spark.sql.groupByAliases=false
+-- !query 20 schema
+struct<key:string,value:string>
+-- !query 20 output
+spark.sql.groupByAliases	false
+
+
+-- !query 21
+SELECT a AS k, COUNT(b) FROM testData GROUP BY k
+-- !query 21 schema
+struct<>
+-- !query 21 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '`k`' given input columns: [a, b]; line 1 pos 47

From ea5b114467c04f5b1ff39a7187f8299af49d22eb Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Fri, 28 Apr 2017 08:49:35 +0100
Subject: [PATCH 0730/1204] [SPARK-20465][CORE] Throws a proper exception when
 any temp directory could not be got

## What changes were proposed in this pull request?

This PR proposes to throw an exception with better message rather than `ArrayIndexOutOfBoundsException` when temp directories could not be created.

Running the commands below:

```bash
./bin/spark-shell --conf spark.local.dir=/NONEXISTENT_DIR_ONE,/NONEXISTENT_DIR_TWO
```

produces ...

**Before**

```
Exception in thread "main" java.lang.ExceptionInInitializerError
        ...
Caused by: java.lang.ArrayIndexOutOfBoundsException: 0
        ...
```

**After**

```
Exception in thread "main" java.lang.ExceptionInInitializerError
        ...
Caused by: java.io.IOException: Failed to get a temp directory under [/NONEXISTENT_DIR_ONE,/NONEXISTENT_DIR_TWO].
        ...
```

## How was this patch tested?

Unit tests in `LocalDirsSuite.scala`.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #17768 from HyukjinKwon/throws-temp-dir-exception.

(cherry picked from commit 8c911adac56a1b1d95bc19915e0070ce7305257c)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../scala/org/apache/spark/util/Utils.scala   |  6 ++++-
 .../apache/spark/storage/LocalDirsSuite.scala | 23 ++++++++++++++++---
 2 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index e042badcdd4a4..4d37db96dfc37 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -740,7 +740,11 @@ private[spark] object Utils extends Logging {
    * always return a single directory.
    */
   def getLocalDir(conf: SparkConf): String = {
-    getOrCreateLocalRootDirs(conf)(0)
+    getOrCreateLocalRootDirs(conf).headOption.getOrElse {
+      val configuredLocalDirs = getConfiguredLocalDirs(conf)
+      throw new IOException(
+        s"Failed to get a temp directory under [${configuredLocalDirs.mkString(",")}].")
+    }
   }
 
   private[spark] def isRunningInYarnContainer(conf: SparkConf): Boolean = {
diff --git a/core/src/test/scala/org/apache/spark/storage/LocalDirsSuite.scala b/core/src/test/scala/org/apache/spark/storage/LocalDirsSuite.scala
index c7074078d8fd2..f7b3a2754f0ea 100644
--- a/core/src/test/scala/org/apache/spark/storage/LocalDirsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/LocalDirsSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.storage
 
-import java.io.File
+import java.io.{File, IOException}
 
 import org.scalatest.BeforeAndAfter
 
@@ -33,9 +33,13 @@ class LocalDirsSuite extends SparkFunSuite with BeforeAndAfter {
     Utils.clearLocalRootDirs()
   }
 
+  after {
+    Utils.clearLocalRootDirs()
+  }
+
   test("Utils.getLocalDir() returns a valid directory, even if some local dirs are missing") {
     // Regression test for SPARK-2974
-    assert(!new File("/NONEXISTENT_DIR").exists())
+    assert(!new File("/NONEXISTENT_PATH").exists())
     val conf = new SparkConf(false)
       .set("spark.local.dir", s"/NONEXISTENT_PATH,${System.getProperty("java.io.tmpdir")}")
     assert(new File(Utils.getLocalDir(conf)).exists())
@@ -43,7 +47,7 @@ class LocalDirsSuite extends SparkFunSuite with BeforeAndAfter {
 
   test("SPARK_LOCAL_DIRS override also affects driver") {
     // Regression test for SPARK-2975
-    assert(!new File("/NONEXISTENT_DIR").exists())
+    assert(!new File("/NONEXISTENT_PATH").exists())
     // spark.local.dir only contains invalid directories, but that's not a problem since
     // SPARK_LOCAL_DIRS will override it on both the driver and workers:
     val conf = new SparkConfWithEnv(Map("SPARK_LOCAL_DIRS" -> System.getProperty("java.io.tmpdir")))
@@ -51,4 +55,17 @@ class LocalDirsSuite extends SparkFunSuite with BeforeAndAfter {
     assert(new File(Utils.getLocalDir(conf)).exists())
   }
 
+  test("Utils.getLocalDir() throws an exception if any temporary directory cannot be retrieved") {
+    val path1 = "/NONEXISTENT_PATH_ONE"
+    val path2 = "/NONEXISTENT_PATH_TWO"
+    assert(!new File(path1).exists())
+    assert(!new File(path2).exists())
+    val conf = new SparkConf(false).set("spark.local.dir", s"$path1,$path2")
+    val message = intercept[IOException] {
+      Utils.getLocalDir(conf)
+    }.getMessage
+    // If any temporary directory could not be retrieved under the given paths above, it should
+    // throw an exception with the message that includes the paths.
+    assert(message.contains(s"$path1,$path2"))
+  }
 }

From ec712d7510579dc4c8da859c86b5236d3ee767be Mon Sep 17 00:00:00 2001
From: Bill Chambers <bill@databricks.com>
Date: Fri, 28 Apr 2017 10:18:31 -0700
Subject: [PATCH 0731/1204] [SPARK-20496][SS] Bug in KafkaWriter Looks at
 Unanalyzed Plans

## What changes were proposed in this pull request?

We didn't enforce analyzed plans in Spark 2.1 when writing out to Kafka.

## How was this patch tested?

New unit test.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Bill Chambers <bill@databricks.com>

Closes #17804 from anabranch/SPARK-20496-2.

(cherry picked from commit 733b81b835f952ab96723c749461d6afc0c71974)
Signed-off-by: Burak Yavuz <brkyvz@gmail.com>
---
 .../apache/spark/sql/kafka010/KafkaWriter.scala  |  4 ++--
 .../spark/sql/kafka010/KafkaSinkSuite.scala      | 16 ++++++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala
index a637d52c933a3..61936e32fd837 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala
@@ -47,7 +47,7 @@ private[kafka010] object KafkaWriter extends Logging {
       queryExecution: QueryExecution,
       kafkaParameters: ju.Map[String, Object],
       topic: Option[String] = None): Unit = {
-    val schema = queryExecution.logical.output
+    val schema = queryExecution.analyzed.output
     schema.find(_.name == TOPIC_ATTRIBUTE_NAME).getOrElse(
       if (topic == None) {
         throw new AnalysisException(s"topic option required when no " +
@@ -84,7 +84,7 @@ private[kafka010] object KafkaWriter extends Logging {
       queryExecution: QueryExecution,
       kafkaParameters: ju.Map[String, Object],
       topic: Option[String] = None): Unit = {
-    val schema = queryExecution.logical.output
+    val schema = queryExecution.analyzed.output
     validateQuery(queryExecution, kafkaParameters, topic)
     SQLExecution.withNewExecutionId(sparkSession, queryExecution) {
       queryExecution.toRdd.foreachPartition { iter =>
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala
index 4bd052d249eca..2ab336c7ac476 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala
@@ -28,6 +28,7 @@ import org.apache.spark.SparkException
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, SpecificInternalRow, UnsafeProjection}
 import org.apache.spark.sql.execution.streaming.MemoryStream
+import org.apache.spark.sql.functions._
 import org.apache.spark.sql.streaming._
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types.{BinaryType, DataType}
@@ -108,6 +109,21 @@ class KafkaSinkSuite extends StreamTest with SharedSQLContext {
       s"save mode overwrite not allowed for kafka"))
   }
 
+  test("SPARK-20496: batch - enforce analyzed plans") {
+    val inputEvents =
+      spark.range(1, 1000)
+        .select(to_json(struct("*")) as 'value)
+
+    val topic = newTopic()
+    testUtils.createTopic(topic)
+    // used to throw UnresolvedException
+    inputEvents.write
+      .format("kafka")
+      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+      .option("topic", topic)
+      .save()
+  }
+
   test("streaming - write to kafka with topic field") {
     val input = MemoryStream[String]
     val topic = newTopic()

From 5131b0a963699b6910949ae2361fa740dafdb678 Mon Sep 17 00:00:00 2001
From: Bill Chambers <bill@databricks.com>
Date: Fri, 28 Apr 2017 10:18:31 -0700
Subject: [PATCH 0732/1204] [SPARK-20496][SS] Bug in KafkaWriter Looks at
 Unanalyzed Plans

## What changes were proposed in this pull request?

We didn't enforce analyzed plans in Spark 2.1 when writing out to Kafka.

## How was this patch tested?

New unit test.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Bill Chambers <bill@databricks.com>

Closes #17804 from anabranch/SPARK-20496-2.

(cherry picked from commit 733b81b835f952ab96723c749461d6afc0c71974)
Signed-off-by: Burak Yavuz <brkyvz@gmail.com>
---
 .../apache/spark/sql/kafka010/KafkaWriter.scala  |  4 ++--
 .../spark/sql/kafka010/KafkaSinkSuite.scala      | 16 ++++++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala
index a637d52c933a3..61936e32fd837 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala
@@ -47,7 +47,7 @@ private[kafka010] object KafkaWriter extends Logging {
       queryExecution: QueryExecution,
       kafkaParameters: ju.Map[String, Object],
       topic: Option[String] = None): Unit = {
-    val schema = queryExecution.logical.output
+    val schema = queryExecution.analyzed.output
     schema.find(_.name == TOPIC_ATTRIBUTE_NAME).getOrElse(
       if (topic == None) {
         throw new AnalysisException(s"topic option required when no " +
@@ -84,7 +84,7 @@ private[kafka010] object KafkaWriter extends Logging {
       queryExecution: QueryExecution,
       kafkaParameters: ju.Map[String, Object],
       topic: Option[String] = None): Unit = {
-    val schema = queryExecution.logical.output
+    val schema = queryExecution.analyzed.output
     validateQuery(queryExecution, kafkaParameters, topic)
     SQLExecution.withNewExecutionId(sparkSession, queryExecution) {
       queryExecution.toRdd.foreachPartition { iter =>
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala
index 490535623cb36..1e7f4f2c144b9 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala
@@ -27,6 +27,7 @@ import org.apache.spark.SparkException
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, SpecificInternalRow, UnsafeProjection}
 import org.apache.spark.sql.execution.streaming.MemoryStream
+import org.apache.spark.sql.functions._
 import org.apache.spark.sql.streaming._
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types.{BinaryType, DataType}
@@ -107,6 +108,21 @@ class KafkaSinkSuite extends StreamTest with SharedSQLContext {
       s"save mode overwrite not allowed for kafka"))
   }
 
+  test("SPARK-20496: batch - enforce analyzed plans") {
+    val inputEvents =
+      spark.range(1, 1000)
+        .select(to_json(struct("*")) as 'value)
+
+    val topic = newTopic()
+    testUtils.createTopic(topic)
+    // used to throw UnresolvedException
+    inputEvents.write
+      .format("kafka")
+      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+      .option("topic", topic)
+      .save()
+  }
+
   test("streaming - write to kafka with topic field") {
     val input = MemoryStream[String]
     val topic = newTopic()

From f66aabd7c5e326210269ba6f2a2700418911d48e Mon Sep 17 00:00:00 2001
From: Mark Grover <mark@apache.org>
Date: Fri, 28 Apr 2017 14:06:57 -0700
Subject: [PATCH 0733/1204] [SPARK-20514][CORE] Upgrade Jetty to
 9.3.11.v20160721

Upgrade Jetty so it can work with Hadoop 3 (alpha 2 release, in particular).
Without this change, because of incompatibily between Jetty versions,
Spark fails to compile when built against Hadoop 3

## How was this patch tested?
Unit tests being run.

Author: Mark Grover <mark@apache.org>

Closes #17790 from markgrover/spark-20514.

(cherry picked from commit 5d71f3db83138bf50749dcd425ef7365c34bd799)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 core/src/main/scala/org/apache/spark/ui/JettyUtils.scala | 2 +-
 pom.xml                                                  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
index bdbdba5780856..edf328b5ae538 100644
--- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
@@ -29,8 +29,8 @@ import org.eclipse.jetty.client.api.Response
 import org.eclipse.jetty.proxy.ProxyServlet
 import org.eclipse.jetty.server._
 import org.eclipse.jetty.server.handler._
+import org.eclipse.jetty.server.handler.gzip.GzipHandler
 import org.eclipse.jetty.servlet._
-import org.eclipse.jetty.servlets.gzip.GzipHandler
 import org.eclipse.jetty.util.component.LifeCycle
 import org.eclipse.jetty.util.thread.{QueuedThreadPool, ScheduledExecutorScheduler}
 import org.json4s.JValue
diff --git a/pom.xml b/pom.xml
index 2a4251d9f2ad2..2d847cfc05c59 100644
--- a/pom.xml
+++ b/pom.xml
@@ -136,7 +136,7 @@
     <derby.version>10.12.1.1</derby.version>
     <parquet.version>1.8.2</parquet.version>
     <hive.parquet.version>1.6.0</hive.parquet.version>
-    <jetty.version>9.2.16.v20160414</jetty.version>
+    <jetty.version>9.3.11.v20160721</jetty.version>
     <javaxservlet.version>3.1.0</javaxservlet.version>
     <chill.version>0.8.0</chill.version>
     <ivy.version>2.4.0</ivy.version>

From 554700266d2e3c3e504631c75eb6a086ce10a548 Mon Sep 17 00:00:00 2001
From: caoxuewen <cao.xuewen@zte.com.cn>
Date: Fri, 28 Apr 2017 14:47:17 -0700
Subject: [PATCH 0734/1204] [SPARK-20471] Remove AggregateBenchmark testsuite
 warning: Two level hashmap is disabled but vectorized hashmap is enabled

What changes were proposed in this pull request?

remove  AggregateBenchmark testsuite warning:
such as '14:26:33.220 WARN org.apache.spark.sql.execution.aggregate.HashAggregateExec: Two level hashmap is disabled but vectorized hashmap is enabled.'

How was this patch tested?
unit tests: AggregateBenchmark
Modify the 'ignore function for 'test funtion

Author: caoxuewen <cao.xuewen@zte.com.cn>

Closes #17771 from heary-cao/AggregateBenchmark.

(cherry picked from commit ebff519c5ead31536e17a5b16cc47c2bf380d55e)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../spark/sql/execution/benchmark/AggregateBenchmark.scala   | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AggregateBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AggregateBenchmark.scala
index 8a2993bdf4b28..8a798fb444696 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AggregateBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AggregateBenchmark.scala
@@ -107,6 +107,7 @@ class AggregateBenchmark extends BenchmarkBase {
     benchmark.addCase(s"codegen = T hashmap = F", numIters = 3) { iter =>
       sparkSession.conf.set("spark.sql.codegen.wholeStage", "true")
       sparkSession.conf.set("spark.sql.codegen.aggregate.map.twolevel.enable", "false")
+      sparkSession.conf.set("spark.sql.codegen.aggregate.map.vectorized.enable", "false")
       f()
     }
 
@@ -148,6 +149,7 @@ class AggregateBenchmark extends BenchmarkBase {
     benchmark.addCase(s"codegen = T hashmap = F", numIters = 3) { iter =>
       sparkSession.conf.set("spark.sql.codegen.wholeStage", value = true)
       sparkSession.conf.set("spark.sql.codegen.aggregate.map.twolevel.enable", "false")
+      sparkSession.conf.set("spark.sql.codegen.aggregate.map.vectorized.enable", "false")
       f()
     }
 
@@ -187,6 +189,7 @@ class AggregateBenchmark extends BenchmarkBase {
     benchmark.addCase(s"codegen = T hashmap = F", numIters = 3) { iter =>
       sparkSession.conf.set("spark.sql.codegen.wholeStage", "true")
       sparkSession.conf.set("spark.sql.codegen.aggregate.map.twolevel.enable", "false")
+      sparkSession.conf.set("spark.sql.codegen.aggregate.map.vectorized.enable", "false")
       f()
     }
 
@@ -225,6 +228,7 @@ class AggregateBenchmark extends BenchmarkBase {
     benchmark.addCase(s"codegen = T hashmap = F") { iter =>
       sparkSession.conf.set("spark.sql.codegen.wholeStage", "true")
       sparkSession.conf.set("spark.sql.codegen.aggregate.map.twolevel.enable", "false")
+      sparkSession.conf.set("spark.sql.codegen.aggregate.map.vectorized.enable", "false")
       f()
     }
 
@@ -273,6 +277,7 @@ class AggregateBenchmark extends BenchmarkBase {
     benchmark.addCase(s"codegen = T hashmap = F") { iter =>
       sparkSession.conf.set("spark.sql.codegen.wholeStage", "true")
       sparkSession.conf.set("spark.sql.codegen.aggregate.map.twolevel.enable", "false")
+      sparkSession.conf.set("spark.sql.codegen.aggregate.map.vectorized.enable", "false")
       f()
     }
 

From 1405862382185e04b09f84af18f82f2f0295a755 Mon Sep 17 00:00:00 2001
From: Aaditya Ramesh <aramesh@conviva.com>
Date: Fri, 28 Apr 2017 15:28:56 -0700
Subject: [PATCH 0735/1204] [SPARK-19525][CORE] Add RDD checkpoint compression
 support

## What changes were proposed in this pull request?

This PR adds RDD checkpoint compression support and add a new config `spark.checkpoint.compress` to enable/disable it. Credit goes to aramesh117

Closes #17024

## How was this patch tested?

The new unit test.

Author: Shixiong Zhu <shixiong@databricks.com>
Author: Aaditya Ramesh <aramesh@conviva.com>

Closes #17789 from zsxwing/pr17024.

(cherry picked from commit 77bcd77ed5fbd91fe61849cca76a8dffe5e4d6b2)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../spark/internal/config/package.scala       |  6 +++
 .../spark/rdd/ReliableCheckpointRDD.scala     | 24 ++++++++++-
 .../org/apache/spark/CheckpointSuite.scala    | 41 +++++++++++++++++++
 3 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 2f0a3064be111..7f7921d56f49e 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -272,4 +272,10 @@ package object config {
       .booleanConf
       .createWithDefault(false)
 
+  private[spark] val CHECKPOINT_COMPRESS =
+    ConfigBuilder("spark.checkpoint.compress")
+      .doc("Whether to compress RDD checkpoints. Generally a good idea. Compression will use " +
+        "spark.io.compression.codec.")
+      .booleanConf
+      .createWithDefault(false)
 }
diff --git a/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala
index e0a29b48314fb..37c67cee55f90 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.rdd
 
 import java.io.{FileNotFoundException, IOException}
+import java.util.concurrent.TimeUnit
 
 import scala.reflect.ClassTag
 import scala.util.control.NonFatal
@@ -27,6 +28,8 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark._
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.CHECKPOINT_COMPRESS
+import org.apache.spark.io.CompressionCodec
 import org.apache.spark.util.{SerializableConfiguration, Utils}
 
 /**
@@ -119,6 +122,7 @@ private[spark] object ReliableCheckpointRDD extends Logging {
       originalRDD: RDD[T],
       checkpointDir: String,
       blockSize: Int = -1): ReliableCheckpointRDD[T] = {
+    val checkpointStartTimeNs = System.nanoTime()
 
     val sc = originalRDD.sparkContext
 
@@ -140,6 +144,10 @@ private[spark] object ReliableCheckpointRDD extends Logging {
       writePartitionerToCheckpointDir(sc, originalRDD.partitioner.get, checkpointDirPath)
     }
 
+    val checkpointDurationMs =
+      TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - checkpointStartTimeNs)
+    logInfo(s"Checkpointing took $checkpointDurationMs ms.")
+
     val newRDD = new ReliableCheckpointRDD[T](
       sc, checkpointDirPath.toString, originalRDD.partitioner)
     if (newRDD.partitions.length != originalRDD.partitions.length) {
@@ -169,7 +177,12 @@ private[spark] object ReliableCheckpointRDD extends Logging {
     val bufferSize = env.conf.getInt("spark.buffer.size", 65536)
 
     val fileOutputStream = if (blockSize < 0) {
-      fs.create(tempOutputPath, false, bufferSize)
+      val fileStream = fs.create(tempOutputPath, false, bufferSize)
+      if (env.conf.get(CHECKPOINT_COMPRESS)) {
+        CompressionCodec.createCodec(env.conf).compressedOutputStream(fileStream)
+      } else {
+        fileStream
+      }
     } else {
       // This is mainly for testing purpose
       fs.create(tempOutputPath, false, bufferSize,
@@ -273,7 +286,14 @@ private[spark] object ReliableCheckpointRDD extends Logging {
     val env = SparkEnv.get
     val fs = path.getFileSystem(broadcastedConf.value.value)
     val bufferSize = env.conf.getInt("spark.buffer.size", 65536)
-    val fileInputStream = fs.open(path, bufferSize)
+    val fileInputStream = {
+      val fileStream = fs.open(path, bufferSize)
+      if (env.conf.get(CHECKPOINT_COMPRESS)) {
+        CompressionCodec.createCodec(env.conf).compressedInputStream(fileStream)
+      } else {
+        fileStream
+      }
+    }
     val serializer = env.serializer.newInstance()
     val deserializeStream = serializer.deserializeStream(fileInputStream)
 
diff --git a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
index b117c7709b46f..ee70a3399efed 100644
--- a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
+++ b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
@@ -21,8 +21,10 @@ import java.io.File
 
 import scala.reflect.ClassTag
 
+import com.google.common.io.ByteStreams
 import org.apache.hadoop.fs.Path
 
+import org.apache.spark.io.CompressionCodec
 import org.apache.spark.rdd._
 import org.apache.spark.storage.{BlockId, StorageLevel, TestBlockId}
 import org.apache.spark.util.Utils
@@ -580,3 +582,42 @@ object CheckpointSuite {
     ).asInstanceOf[RDD[(K, Array[Iterable[V]])]]
   }
 }
+
+class CheckpointCompressionSuite extends SparkFunSuite with LocalSparkContext {
+
+  test("checkpoint compression") {
+    val checkpointDir = Utils.createTempDir()
+    try {
+      val conf = new SparkConf()
+        .set("spark.checkpoint.compress", "true")
+        .set("spark.ui.enabled", "false")
+      sc = new SparkContext("local", "test", conf)
+      sc.setCheckpointDir(checkpointDir.toString)
+      val rdd = sc.makeRDD(1 to 20, numSlices = 1)
+      rdd.checkpoint()
+      assert(rdd.collect().toSeq === (1 to 20))
+
+      // Verify that RDD is checkpointed
+      assert(rdd.firstParent.isInstanceOf[ReliableCheckpointRDD[_]])
+
+      val checkpointPath = new Path(rdd.getCheckpointFile.get)
+      val fs = checkpointPath.getFileSystem(sc.hadoopConfiguration)
+      val checkpointFile =
+        fs.listStatus(checkpointPath).map(_.getPath).find(_.getName.startsWith("part-")).get
+
+      // Verify the checkpoint file is compressed, in other words, can be decompressed
+      val compressedInputStream = CompressionCodec.createCodec(conf)
+        .compressedInputStream(fs.open(checkpointFile))
+      try {
+        ByteStreams.toByteArray(compressedInputStream)
+      } finally {
+        compressedInputStream.close()
+      }
+
+      // Verify that the compressed content can be read back
+      assert(rdd.collect().toSeq === (1 to 20))
+    } finally {
+      Utils.deleteRecursively(checkpointDir)
+    }
+  }
+}

From ca6c59e7e83cb1d1d861a7cc3b49036152c34e21 Mon Sep 17 00:00:00 2001
From: Tejas Patil <tejasp@fb.com>
Date: Fri, 28 Apr 2017 23:12:26 -0700
Subject: [PATCH 0736/1204] [SPARK-20487][SQL] Display `serde` for
 `HiveTableScan` node in explained plan

## What changes were proposed in this pull request?

This was a suggestion by rxin at https://github.com/apache/spark/pull/17780#issuecomment-298073408

## How was this patch tested?

- modified existing unit test
- manual testing:

```
scala> hc.sql(" SELECT * FROM tejasp_bucketed_partitioned_1  where name = ''  ").explain(true)
== Parsed Logical Plan ==
'Project [*]
+- 'Filter ('name = )
   +- 'UnresolvedRelation `tejasp_bucketed_partitioned_1`

== Analyzed Logical Plan ==
user_id: bigint, name: string, ds: string
Project [user_id#24L, name#25, ds#26]
+- Filter (name#25 = )
   +- SubqueryAlias tejasp_bucketed_partitioned_1
      +- CatalogRelation `default`.`tejasp_bucketed_partitioned_1`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [user_id#24L, name#25], [ds#26]

== Optimized Logical Plan ==
Filter (isnotnull(name#25) && (name#25 = ))
+- CatalogRelation `default`.`tejasp_bucketed_partitioned_1`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [user_id#24L, name#25], [ds#26]

== Physical Plan ==
*Filter (isnotnull(name#25) && (name#25 = ))
+- HiveTableScan [user_id#24L, name#25, ds#26], CatalogRelation `default`.`tejasp_bucketed_partitioned_1`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [user_id#24L, name#25], [ds#26]
```

Author: Tejas Patil <tejasp@fb.com>

Closes #17806 from tejasapatil/add_serde.

(cherry picked from commit 814a61a867ded965433c944c90961df529ac83ab)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../org/apache/spark/sql/catalyst/trees/TreeNode.scala      | 6 +++++-
 .../apache/spark/sql/hive/execution/HiveExplainSuite.scala  | 4 +++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index b091315f24f1f..2109c1c23b706 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -444,7 +444,11 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
     case None => Nil
     case Some(null) => Nil
     case Some(any) => any :: Nil
-    case table: CatalogTable => table.identifier :: Nil
+    case table: CatalogTable =>
+      table.storage.serde match {
+        case Some(serde) => table.identifier :: serde :: Nil
+        case _ => table.identifier :: Nil
+      }
     case other => other :: Nil
   }.mkString(", ")
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
index ebafe6de0c830..aa1ca2909074f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
@@ -43,7 +43,9 @@ class HiveExplainSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
 
   test("explain extended command") {
     checkKeywordsExist(sql(" explain   select * from src where key=123 "),
-                   "== Physical Plan ==")
+                   "== Physical Plan ==",
+                   "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")
+
     checkKeywordsNotExist(sql(" explain   select * from src where key=123 "),
                    "== Parsed Logical Plan ==",
                    "== Analyzed Logical Plan ==",

From 4a86d8db41829502ddfb3c54de49c874bb054339 Mon Sep 17 00:00:00 2001
From: wangmiao1981 <wm624@hotmail.com>
Date: Sat, 29 Apr 2017 10:31:01 -0700
Subject: [PATCH 0737/1204] [SPARK-20477][SPARKR][DOC] Document R bisecting
 k-means in R programming guide

## What changes were proposed in this pull request?

Add hyper link in the SparkR programming guide.

## How was this patch tested?

Build doc and manually check the doc link.

Author: wangmiao1981 <wm624@hotmail.com>

Closes #17805 from wangmiao1981/doc.

(cherry picked from commit b28c3bc2020a6936e4ac4c28d49fd832a952af42)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 docs/sparkr.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/sparkr.md b/docs/sparkr.md
index a16c5833a7c37..e7f60210ce6a8 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -467,6 +467,7 @@ SparkR supports the following machine learning algorithms currently:
 
 #### Clustering
 
+* [`spark.bisectingKmeans`](api/R/spark.bisectingKmeans.html): [`Bisecting k-means`](ml-clustering.html#bisecting-k-means)
 * [`spark.gaussianMixture`](api/R/spark.gaussianMixture.html): [`Gaussian Mixture Model (GMM)`](ml-clustering.html#gaussian-mixture-model-gmm)
 * [`spark.kmeans`](api/R/spark.kmeans.html): [`K-Means`](ml-clustering.html#k-means)
 * [`spark.lda`](api/R/spark.lda.html): [`Latent Dirichlet Allocation (LDA)`](ml-clustering.html#latent-dirichlet-allocation-lda)

From 9789d5c57f934372d3bc2ea10c22e780981d7cde Mon Sep 17 00:00:00 2001
From: Yuhao Yang <yuhao.yang@intel.com>
Date: Sat, 29 Apr 2017 10:51:45 -0700
Subject: [PATCH 0738/1204] [SPARK-19791][ML] Add doc and example for fpgrowth

## What changes were proposed in this pull request?

Add a new section for fpm
Add Example for FPGrowth in scala and Java

updated: Rewrite transform to be more compact.

## How was this patch tested?

local doc generation.

Author: Yuhao Yang <yuhao.yang@intel.com>

Closes #17130 from hhbyyh/fpmdoc.

(cherry picked from commit add9d1bba5cf33218a115428a03d3c76a514aa86)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 docs/_data/menu-ml.yaml                       |  2 +
 docs/ml-frequent-pattern-mining.md            | 87 +++++++++++++++++++
 docs/mllib-frequent-pattern-mining.md         |  2 +-
 .../examples/ml/JavaFPGrowthExample.java      | 77 ++++++++++++++++
 .../src/main/python/ml/fpgrowth_example.py    | 56 ++++++++++++
 .../spark/examples/ml/FPGrowthExample.scala   | 67 ++++++++++++++
 .../org/apache/spark/ml/fpm/FPGrowth.scala    | 35 ++++----
 .../apache/spark/ml/fpm/FPGrowthSuite.scala   |  2 +
 8 files changed, 310 insertions(+), 18 deletions(-)
 create mode 100644 docs/ml-frequent-pattern-mining.md
 create mode 100644 examples/src/main/java/org/apache/spark/examples/ml/JavaFPGrowthExample.java
 create mode 100644 examples/src/main/python/ml/fpgrowth_example.py
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/FPGrowthExample.scala

diff --git a/docs/_data/menu-ml.yaml b/docs/_data/menu-ml.yaml
index 0c6b9b20a6e4b..047423f75aec1 100644
--- a/docs/_data/menu-ml.yaml
+++ b/docs/_data/menu-ml.yaml
@@ -8,6 +8,8 @@
   url: ml-clustering.html
 - text: Collaborative filtering
   url: ml-collaborative-filtering.html
+- text: Frequent Pattern Mining
+  url: ml-frequent-pattern-mining.html
 - text: Model selection and tuning
   url: ml-tuning.html
 - text: Advanced topics
diff --git a/docs/ml-frequent-pattern-mining.md b/docs/ml-frequent-pattern-mining.md
new file mode 100644
index 0000000000000..81634de8aade7
--- /dev/null
+++ b/docs/ml-frequent-pattern-mining.md
@@ -0,0 +1,87 @@
+---
+layout: global
+title: Frequent Pattern Mining
+displayTitle: Frequent Pattern Mining
+---
+
+Mining frequent items, itemsets, subsequences, or other substructures is usually among the
+first steps to analyze a large-scale dataset, which has been an active research topic in
+data mining for years.
+We refer users to Wikipedia's [association rule learning](http://en.wikipedia.org/wiki/Association_rule_learning)
+for more information.
+
+**Table of Contents**
+
+* This will become a table of contents (this text will be scraped).
+{:toc}
+
+## FP-Growth
+
+The FP-growth algorithm is described in the paper
+[Han et al., Mining frequent patterns without candidate generation](http://dx.doi.org/10.1145/335191.335372),
+where "FP" stands for frequent pattern.
+Given a dataset of transactions, the first step of FP-growth is to calculate item frequencies and identify frequent items.
+Different from [Apriori-like](http://en.wikipedia.org/wiki/Apriori_algorithm) algorithms designed for the same purpose,
+the second step of FP-growth uses a suffix tree (FP-tree) structure to encode transactions without generating candidate sets
+explicitly, which are usually expensive to generate.
+After the second step, the frequent itemsets can be extracted from the FP-tree.
+In `spark.mllib`, we implemented a parallel version of FP-growth called PFP,
+as described in [Li et al., PFP: Parallel FP-growth for query recommendation](http://dx.doi.org/10.1145/1454008.1454027).
+PFP distributes the work of growing FP-trees based on the suffixes of transactions,
+and hence is more scalable than a single-machine implementation.
+We refer users to the papers for more details.
+
+`spark.ml`'s FP-growth implementation takes the following (hyper-)parameters:
+
+* `minSupport`: the minimum support for an itemset to be identified as frequent.
+  For example, if an item appears 3 out of 5 transactions, it has a support of 3/5=0.6.
+* `minConfidence`: minimum confidence for generating Association Rule. Confidence is an indication of how often an
+  association rule has been found to be true. For example, if in the transactions itemset `X` appears 4 times, `X`
+  and `Y` co-occur only 2 times, the confidence for the rule `X => Y` is then 2/4 = 0.5. The parameter will not
+  affect the mining for frequent itemsets, but specify the minimum confidence for generating association rules
+  from frequent itemsets.
+* `numPartitions`: the number of partitions used to distribute the work. By default the param is not set, and
+  number of partitions of the input dataset is used.
+
+The `FPGrowthModel` provides:
+
+* `freqItemsets`: frequent itemsets in the format of DataFrame("items"[Array], "freq"[Long])
+* `associationRules`: association rules generated with confidence above `minConfidence`, in the format of 
+  DataFrame("antecedent"[Array], "consequent"[Array], "confidence"[Double]).
+* `transform`: For each transaction in `itemsCol`, the `transform` method will compare its items against the antecedents
+  of each association rule. If the record contains all the antecedents of a specific association rule, the rule
+  will be considered as applicable and its consequents will be added to the prediction result. The transform
+  method will summarize the consequents from all the applicable rules as prediction. The prediction column has
+  the same data type as `itemsCol` and does not contain existing items in the `itemsCol`.
+
+
+**Examples**
+
+<div class="codetabs">
+
+<div data-lang="scala" markdown="1">
+Refer to the [Scala API docs](api/scala/index.html#org.apache.spark.ml.fpm.FPGrowth) for more details.
+
+{% include_example scala/org/apache/spark/examples/ml/FPGrowthExample.scala %}
+</div>
+
+<div data-lang="java" markdown="1">
+Refer to the [Java API docs](api/java/org/apache/spark/ml/fpm/FPGrowth.html) for more details.
+
+{% include_example java/org/apache/spark/examples/ml/JavaFPGrowthExample.java %}
+</div>
+
+<div data-lang="python" markdown="1">
+Refer to the [Python API docs](api/python/pyspark.ml.html#pyspark.ml.fpm.FPGrowth) for more details.
+
+{% include_example python/ml/fpgrowth_example.py %}
+</div>
+
+<div data-lang="r" markdown="1">
+
+Refer to the [R API docs](api/R/spark.fpGrowth.html) for more details.
+
+{% include_example r/ml/fpm.R %}
+</div>
+
+</div>
diff --git a/docs/mllib-frequent-pattern-mining.md b/docs/mllib-frequent-pattern-mining.md
index 93e3f0b2d2267..c9cd7cc85e754 100644
--- a/docs/mllib-frequent-pattern-mining.md
+++ b/docs/mllib-frequent-pattern-mining.md
@@ -24,7 +24,7 @@ explicitly, which are usually expensive to generate.
 After the second step, the frequent itemsets can be extracted from the FP-tree.
 In `spark.mllib`, we implemented a parallel version of FP-growth called PFP,
 as described in [Li et al., PFP: Parallel FP-growth for query recommendation](http://dx.doi.org/10.1145/1454008.1454027).
-PFP distributes the work of growing FP-trees based on the suffices of transactions,
+PFP distributes the work of growing FP-trees based on the suffixes of transactions,
 and hence more scalable than a single-machine implementation.
 We refer users to the papers for more details.
 
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaFPGrowthExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaFPGrowthExample.java
new file mode 100644
index 0000000000000..717ec21c8b203
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaFPGrowthExample.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.fpm.FPGrowth;
+import org.apache.spark.ml.fpm.FPGrowthModel;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.types.*;
+// $example off$
+
+/**
+ * An example demonstrating FPGrowth.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaFPGrowthExample
+ * </pre>
+ */
+public class JavaFPGrowthExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaFPGrowthExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+      RowFactory.create(Arrays.asList("1 2 5".split(" "))),
+      RowFactory.create(Arrays.asList("1 2 3 5".split(" "))),
+      RowFactory.create(Arrays.asList("1 2".split(" ")))
+    );
+    StructType schema = new StructType(new StructField[]{ new StructField(
+      "items", new ArrayType(DataTypes.StringType, true), false, Metadata.empty())
+    });
+    Dataset<Row> itemsDF = spark.createDataFrame(data, schema);
+
+    FPGrowthModel model = new FPGrowth()
+      .setItemsCol("items")
+      .setMinSupport(0.5)
+      .setMinConfidence(0.6)
+      .fit(itemsDF);
+
+    // Display frequent itemsets.
+    model.freqItemsets().show();
+
+    // Display generated association rules.
+    model.associationRules().show();
+
+    // transform examines the input items against all the association rules and summarize the
+    // consequents as prediction
+    model.transform(itemsDF).show();
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/examples/src/main/python/ml/fpgrowth_example.py b/examples/src/main/python/ml/fpgrowth_example.py
new file mode 100644
index 0000000000000..c92c3c27abb21
--- /dev/null
+++ b/examples/src/main/python/ml/fpgrowth_example.py
@@ -0,0 +1,56 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# $example on$
+from pyspark.ml.fpm import FPGrowth
+# $example off$
+from pyspark.sql import SparkSession
+
+"""
+An example demonstrating FPGrowth.
+Run with:
+  bin/spark-submit examples/src/main/python/ml/fpgrowth_example.py
+"""
+
+if __name__ == "__main__":
+    spark = SparkSession\
+        .builder\
+        .appName("FPGrowthExample")\
+        .getOrCreate()
+
+    # $example on$
+    df = spark.createDataFrame([
+        (0, [1, 2, 5]),
+        (1, [1, 2, 3, 5]),
+        (2, [1, 2])
+    ], ["id", "items"])
+
+    fpGrowth = FPGrowth(itemsCol="items", minSupport=0.5, minConfidence=0.6)
+    model = fpGrowth.fit(df)
+
+    # Display frequent itemsets.
+    model.freqItemsets.show()
+
+    # Display generated association rules.
+    model.associationRules.show()
+
+    # transform examines the input items against all the association rules and summarize the
+    # consequents as prediction
+    model.transform(df).show()
+    # $example off$
+
+    spark.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/FPGrowthExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/FPGrowthExample.scala
new file mode 100644
index 0000000000000..59110d70de550
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/FPGrowthExample.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+// scalastyle:off println
+
+// $example on$
+import org.apache.spark.ml.fpm.FPGrowth
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * An example demonstrating FP-Growth.
+ * Run with
+ * {{{
+ * bin/run-example ml.FPGrowthExample
+ * }}}
+ */
+object FPGrowthExample {
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName(s"${this.getClass.getSimpleName}")
+      .getOrCreate()
+    import spark.implicits._
+
+    // $example on$
+    val dataset = spark.createDataset(Seq(
+      "1 2 5",
+      "1 2 3 5",
+      "1 2")
+    ).map(t => t.split(" ")).toDF("items")
+
+    val fpgrowth = new FPGrowth().setItemsCol("items").setMinSupport(0.5).setMinConfidence(0.6)
+    val model = fpgrowth.fit(dataset)
+
+    // Display frequent itemsets.
+    model.freqItemsets.show()
+
+    // Display generated association rules.
+    model.associationRules.show()
+
+    // transform examines the input items against all the association rules and summarize the
+    // consequents as prediction
+    model.transform(dataset).show()
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
index d604c1ac001a2..8f00daa59f1a5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.ml.fpm
 
-import scala.collection.mutable.ArrayBuffer
 import scala.reflect.ClassTag
 
 import org.apache.hadoop.fs.Path
@@ -54,7 +53,7 @@ private[fpm] trait FPGrowthParams extends Params with HasPredictionCol {
 
   /**
    * Minimal support level of the frequent pattern. [0.0, 1.0]. Any pattern that appears
-   * more than (minSupport * size-of-the-dataset) times will be output
+   * more than (minSupport * size-of-the-dataset) times will be output in the frequent itemsets.
    * Default: 0.3
    * @group param
    */
@@ -82,8 +81,8 @@ private[fpm] trait FPGrowthParams extends Params with HasPredictionCol {
   def getNumPartitions: Int = $(numPartitions)
 
   /**
-   * Minimal confidence for generating Association Rule.
-   * Note that minConfidence has no effect during fitting.
+   * Minimal confidence for generating Association Rule. minConfidence will not affect the mining
+   * for frequent itemsets, but will affect the association rules generation.
    * Default: 0.8
    * @group param
    */
@@ -118,7 +117,7 @@ private[fpm] trait FPGrowthParams extends Params with HasPredictionCol {
  * Recommendation</a>. PFP distributes computation in such a way that each worker executes an
  * independent group of mining tasks. The FP-Growth algorithm is described in
  * <a href="http://dx.doi.org/10.1145/335191.335372">Han et al., Mining frequent patterns without
- * candidate generation</a>. Note null values in the feature column are ignored during fit().
+ * candidate generation</a>. Note null values in the itemsCol column are ignored during fit().
  *
  * @see <a href="http://en.wikipedia.org/wiki/Association_rule_learning">
  * Association rule learning (Wikipedia)</a>
@@ -167,7 +166,6 @@ class FPGrowth @Since("2.2.0") (
     }
     val parentModel = mllibFP.run(items)
     val rows = parentModel.freqItemsets.map(f => Row(f.items, f.freq))
-
     val schema = StructType(Seq(
       StructField("items", dataset.schema($(itemsCol)).dataType, nullable = false),
       StructField("freq", LongType, nullable = false)))
@@ -196,7 +194,7 @@ object FPGrowth extends DefaultParamsReadable[FPGrowth] {
  * :: Experimental ::
  * Model fitted by FPGrowth.
  *
- * @param freqItemsets frequent items in the format of DataFrame("items"[Seq], "freq"[Long])
+ * @param freqItemsets frequent itemsets in the format of DataFrame("items"[Array], "freq"[Long])
  */
 @Since("2.2.0")
 @Experimental
@@ -244,10 +242,13 @@ class FPGrowthModel private[ml] (
 
   /**
    * The transform method first generates the association rules according to the frequent itemsets.
-   * Then for each association rule, it will examine the input items against antecedents and
-   * summarize the consequents as prediction. The prediction column has the same data type as the
-   * input column(Array[T]) and will not contain existing items in the input column. The null
-   * values in the feature columns are treated as empty sets.
+   * Then for each transaction in itemsCol, the transform method will compare its items against the
+   * antecedents of each association rule. If the record contains all the antecedents of a
+   * specific association rule, the rule will be considered as applicable and its consequents
+   * will be added to the prediction result. The transform method will summarize the consequents
+   * from all the applicable rules as prediction. The prediction column has the same data type as
+   * the input column(Array[T]) and will not contain existing items in the input column. The null
+   * values in the itemsCol columns are treated as empty sets.
    * WARNING: internally it collects association rules to the driver and uses broadcast for
    * efficiency. This may bring pressure to driver memory for large set of association rules.
    */
@@ -335,13 +336,13 @@ private[fpm] object AssociationRules {
 
   /**
    * Computes the association rules with confidence above minConfidence.
-   * @param dataset DataFrame("items", "freq") containing frequent itemset obtained from
-   *                algorithms like [[FPGrowth]].
+   * @param dataset DataFrame("items"[Array], "freq"[Long]) containing frequent itemsets obtained
+   *                from algorithms like [[FPGrowth]].
    * @param itemsCol column name for frequent itemsets
-   * @param freqCol column name for frequent itemsets count
-   * @param minConfidence minimum confidence for the result association rules
-   * @return a DataFrame("antecedent", "consequent", "confidence") containing the association
-   *         rules.
+   * @param freqCol column name for appearance count of the frequent itemsets
+   * @param minConfidence minimum confidence for generating the association rules
+   * @return a DataFrame("antecedent"[Array], "consequent"[Array], "confidence"[Double])
+   *         containing the association rules.
    */
   def getAssociationRulesFromFP[T: ClassTag](
         dataset: Dataset[_],
diff --git a/mllib/src/test/scala/org/apache/spark/ml/fpm/FPGrowthSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/fpm/FPGrowthSuite.scala
index 6806cb03bc42b..87f8b9034dde8 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/fpm/FPGrowthSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/fpm/FPGrowthSuite.scala
@@ -122,6 +122,8 @@ class FPGrowthSuite extends SparkFunSuite with MLlibTestSparkContext with Defaul
       .setMinConfidence(0.5678)
     assert(fpGrowth.getMinSupport === 0.4567)
     assert(model.getMinConfidence === 0.5678)
+    // numPartitions should not have default value.
+    assert(fpGrowth.isDefined(fpGrowth.numPartitions) === false)
     MLTestingUtils.checkCopyAndUids(fpGrowth, model)
     ParamsSuite.checkParams(fpGrowth)
     ParamsSuite.checkParams(model)

From c5f559315c88935cd6ac76c6db97327f5d1ee669 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=83=AD=E5=B0=8F=E9=BE=99=2010207633?=
 <guo.xiaolong1@zte.com.cn>
Date: Sun, 30 Apr 2017 09:06:25 +0100
Subject: [PATCH 0739/1204] [SPARK-20521][DOC][CORE] The default of
 'spark.worker.cleanup.appDataTtl' should be 604800 in spark-standalone.md
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

Currently, our project needs to be set to clean up the worker directory cleanup cycle is three days.
When I follow http://spark.apache.org/docs/latest/spark-standalone.html, configure the 'spark.worker.cleanup.appDataTtl' parameter, I configured to 3 * 24 * 3600.
When I start the spark service, the startup fails, and the worker log displays the error log as follows:

2017-04-28 15:02:03,306 INFO Utils: Successfully started service 'sparkWorker' on port 48728.
Exception in thread "main" java.lang.NumberFormatException: For input string: "3 * 24 * 3600"
	at java.lang.NumberFormatException.forInputString(NumberFormatException.java:65)
	at java.lang.Long.parseLong(Long.java:430)
	at java.lang.Long.parseLong(Long.java:483)
	at scala.collection.immutable.StringLike$class.toLong(StringLike.scala:276)
	at scala.collection.immutable.StringOps.toLong(StringOps.scala:29)
	at org.apache.spark.SparkConf$$anonfun$getLong$2.apply(SparkConf.scala:380)
	at org.apache.spark.SparkConf$$anonfun$getLong$2.apply(SparkConf.scala:380)
	at scala.Option.map(Option.scala:146)
	at org.apache.spark.SparkConf.getLong(SparkConf.scala:380)
	at org.apache.spark.deploy.worker.Worker.<init>(Worker.scala:100)
	at org.apache.spark.deploy.worker.Worker$.startRpcEnvAndEndpoint(Worker.scala:730)
	at org.apache.spark.deploy.worker.Worker$.main(Worker.scala:709)
	at org.apache.spark.deploy.worker.Worker.main(Worker.scala)

**Because we put 7 * 24 * 3600 as a string, forced to convert to the dragon type,  will lead to problems in the program.**

**So I think the default value of the current configuration should be a specific long value, rather than 7 * 24 * 3600,should be 604800. Because it would mislead users for similar configurations, resulting in spark start failure.**

## How was this patch tested?
manual tests

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: 郭小龙 10207633 <guo.xiaolong1@zte.com.cn>
Author: guoxiaolong <guo.xiaolong1@zte.com.cn>
Author: guoxiaolongzte <guo.xiaolong1@zte.com.cn>

Closes #17798 from guoxiaolongzte/SPARK-20521.

(cherry picked from commit 4d99b95ad0d0c7ef909c8e492ec45e94cf0189b4)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/spark-standalone.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index 1c0b60f7b9346..34ced9ed7b462 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -242,7 +242,7 @@ SPARK_WORKER_OPTS supports the following system properties:
 </tr>
 <tr>
   <td><code>spark.worker.cleanup.appDataTtl</code></td>
-  <td>7 * 24 * 3600 (7 days)</td>
+  <td>604800 (7 days, 7 * 24 * 3600)</td>
   <td>
     The number of seconds to retain application work directories on each worker.  This is a Time To Live
     and should depend on the amount of available disk space you have.  Application logs and jars are

From c5beabcbd27a5ee0bacf29b4c2c9fe9025bb32fe Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Sun, 30 Apr 2017 08:24:10 -0700
Subject: [PATCH 0740/1204] [SPARK-20492][SQL] Do not print empty parentheses
 for invalid primitive types in parser

## What changes were proposed in this pull request?

Currently, when the type string is invalid, it looks printing empty parentheses. This PR proposes a small improvement in an error message by removing it in the parse as below:

```scala
spark.range(1).select($"col".cast("aa"))
```

**Before**

```
org.apache.spark.sql.catalyst.parser.ParseException:
DataType aa() is not supported.(line 1, pos 0)

== SQL ==
aa
^^^
```

**After**

```
org.apache.spark.sql.catalyst.parser.ParseException:
DataType aa is not supported.(line 1, pos 0)

== SQL ==
aa
^^^
```

## How was this patch tested?

Unit tests in `DataTypeParserSuite`.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #17784 from HyukjinKwon/SPARK-20492.

(cherry picked from commit 1ee494d0868a85af3154996732817ed63679f382)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../org/apache/spark/sql/catalyst/parser/AstBuilder.scala  | 4 ++--
 .../spark/sql/catalyst/parser/DataTypeParserSuite.scala    | 7 ++++++-
 .../resources/sql-tests/results/json-functions.sql.out     | 2 +-
 .../scala/org/apache/spark/sql/JsonFunctionsSuite.scala    | 2 +-
 4 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 2cf06d15664d9..a48a693a95c93 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1491,8 +1491,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
       case ("decimal", precision :: scale :: Nil) =>
         DecimalType(precision.getText.toInt, scale.getText.toInt)
       case (dt, params) =>
-        throw new ParseException(
-          s"DataType $dt${params.mkString("(", ",", ")")} is not supported.", ctx)
+        val dtStr = if (params.nonEmpty) s"$dt(${params.mkString(",")})" else dt
+        throw new ParseException(s"DataType $dtStr is not supported.", ctx)
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
index 3964fa3924b24..4490523369006 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
@@ -30,7 +30,7 @@ class DataTypeParserSuite extends SparkFunSuite {
     }
   }
 
-  def intercept(sql: String): Unit =
+  def intercept(sql: String): ParseException =
     intercept[ParseException](CatalystSqlParser.parseDataType(sql))
 
   def unsupported(dataTypeString: String): Unit = {
@@ -118,6 +118,11 @@ class DataTypeParserSuite extends SparkFunSuite {
   unsupported("struct<x: int")
   unsupported("struct<x int, y string>")
 
+  test("Do not print empty parentheses for no params") {
+    assert(intercept("unkwon").getMessage.contains("unkwon is not supported"))
+    assert(intercept("unkwon(1,2,3)").getMessage.contains("unkwon(1,2,3) is not supported"))
+  }
+
   // DataType parser accepts certain reserved keywords.
   checkDataType(
     "Struct<TABLE: string, DATE:boolean>",
diff --git a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
index 315e1730ce7df..fedabaee2237f 100644
--- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
@@ -141,7 +141,7 @@ struct<>
 -- !query 13 output
 org.apache.spark.sql.AnalysisException
 
-DataType invalidtype() is not supported.(line 1, pos 2)
+DataType invalidtype is not supported.(line 1, pos 2)
 
 == SQL ==
 a InvalidType
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index 8465e8d036a6d..69a500c845a7b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -274,7 +274,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext {
     val errMsg2 = intercept[AnalysisException] {
       df3.selectExpr("""from_json(value, 'time InvalidType')""")
     }
-    assert(errMsg2.getMessage.contains("DataType invalidtype() is not supported"))
+    assert(errMsg2.getMessage.contains("DataType invalidtype is not supported"))
     val errMsg3 = intercept[AnalysisException] {
       df3.selectExpr("from_json(value, 'time Timestamp', named_struct('a', 1))")
     }

From 994d9da90b6c76d4f9f8c53d438f0b54d65d8d03 Mon Sep 17 00:00:00 2001
From: Srinivasa Reddy Vundela <vsr@cloudera.com>
Date: Sun, 30 Apr 2017 21:42:05 -0700
Subject: [PATCH 0741/1204] [MINOR][DOCS][PYTHON] Adding missing boolean type
 for replacement value in fillna

## What changes were proposed in this pull request?

Currently pyspark Dataframe.fillna API supports boolean type when we pass dict, but it is missing in documentation.

## How was this patch tested?
>>> spark.createDataFrame([Row(a=True),Row(a=None)]).fillna({"a" : True}).show()
+----+
|   a|
+----+
|true|
|true|
+----+

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Srinivasa Reddy Vundela <vsr@cloudera.com>

Closes #17688 from vundela/fillna_doc_fix.

(cherry picked from commit 6613046c8c2daaf46a8ec13dd0a016aad22af1a4)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 python/pyspark/sql/dataframe.py | 2 +-
 python/pyspark/sql/tests.py     | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 774caf53f3a4b..f567cc45e823f 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -1238,7 +1238,7 @@ def fillna(self, value, subset=None):
             Value to replace null values with.
             If the value is a dict, then `subset` is ignored and `value` must be a mapping
             from column name (string) to replacement value. The replacement value must be
-            an int, long, float, or string.
+            an int, long, float, boolean, or string.
         :param subset: optional list of column names to consider.
             Columns specified in subset that do not have matching data type are ignored.
             For example, if `value` is a string, and subset contains a non-string column,
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 2b2444304e04a..cd92148dfa5df 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -1711,6 +1711,10 @@ def test_fillna(self):
         self.assertEqual(row.age, None)
         self.assertEqual(row.height, None)
 
+        # fillna with dictionary for boolean types
+        row = self.spark.createDataFrame([Row(a=None), Row(a=True)]).fillna({"a": True}).first()
+        self.assertEqual(row.a, True)
+
     def test_bitwise_operations(self):
         from pyspark.sql import functions
         row = Row(a=170, b=75)

From c890e938c520a9cefd9484e2324c891c9a1ec2ae Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Sun, 30 Apr 2017 23:23:49 -0700
Subject: [PATCH 0742/1204] [SPARK-20541][SPARKR][SS] support awaitTermination
 without timeout

## What changes were proposed in this pull request?

Add without param for timeout - will need this to submit a job that runs until stopped
Need this for 2.2

## How was this patch tested?

manually, unit test

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #17815 from felixcheung/rssawaitinfinite.

(cherry picked from commit a355b667a3718d9c5d48a0781e836bf5418ab842)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/R/generics.R                         |  2 +-
 R/pkg/R/streaming.R                        | 14 ++++++++++----
 R/pkg/inst/tests/testthat/test_streaming.R |  1 +
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 945676c7f10b3..b23abe65f9507 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1469,7 +1469,7 @@ setGeneric("write.ml", function(object, path, ...) { standardGeneric("write.ml")
 
 #' @rdname awaitTermination
 #' @export
-setGeneric("awaitTermination", function(x, timeout) { standardGeneric("awaitTermination") })
+setGeneric("awaitTermination", function(x, timeout = NULL) { standardGeneric("awaitTermination") })
 
 #' @rdname isActive
 #' @export
diff --git a/R/pkg/R/streaming.R b/R/pkg/R/streaming.R
index e353d2dd07c3d..8390bd5e6de72 100644
--- a/R/pkg/R/streaming.R
+++ b/R/pkg/R/streaming.R
@@ -169,8 +169,10 @@ setMethod("isActive",
 #' immediately.
 #'
 #' @param x a StreamingQuery.
-#' @param timeout time to wait in milliseconds
-#' @return TRUE if query has terminated within the timeout period.
+#' @param timeout time to wait in milliseconds, if omitted, wait indefinitely until \code{stopQuery}
+#'                is called or an error has occured.
+#' @return TRUE if query has terminated within the timeout period; nothing if timeout is not
+#'         specified.
 #' @rdname awaitTermination
 #' @name awaitTermination
 #' @aliases awaitTermination,StreamingQuery-method
@@ -182,8 +184,12 @@ setMethod("isActive",
 #' @note experimental
 setMethod("awaitTermination",
           signature(x = "StreamingQuery"),
-          function(x, timeout) {
-            handledCallJMethod(x@ssq, "awaitTermination", as.integer(timeout))
+          function(x, timeout = NULL) {
+            if (is.null(timeout)) {
+              invisible(handledCallJMethod(x@ssq, "awaitTermination"))
+            } else {
+              handledCallJMethod(x@ssq, "awaitTermination", as.integer(timeout))
+            }
           })
 
 #' stopQuery
diff --git a/R/pkg/inst/tests/testthat/test_streaming.R b/R/pkg/inst/tests/testthat/test_streaming.R
index 1f4054a84df53..b125cb0591de2 100644
--- a/R/pkg/inst/tests/testthat/test_streaming.R
+++ b/R/pkg/inst/tests/testthat/test_streaming.R
@@ -61,6 +61,7 @@ test_that("read.stream, write.stream, awaitTermination, stopQuery", {
 
   stopQuery(q)
   expect_true(awaitTermination(q, 1))
+  expect_error(awaitTermination(q), NA)
 })
 
 test_that("print from explain, lastProgress, status, isActive", {

From 813abd2db6140c4a294cdbeca2303dbfb7903107 Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@databricks.com>
Date: Mon, 1 May 2017 09:46:35 -0700
Subject: [PATCH 0743/1204] [SPARK-20534][SQL] Make outer generate exec return
 empty rows

## What changes were proposed in this pull request?
Generate exec does not produce `null` values if the generator for the input row is empty and the generate operates in outer mode without join. This is caused by the fact that the `join=false` code path is different from the `join=true` code path, and that the `join=false` code path did deal with outer properly. This PR addresses this issue.

## How was this patch tested?
Updated `outer*` tests in `GeneratorFunctionSuite`.

Author: Herman van Hovell <hvanhovell@databricks.com>

Closes #17810 from hvanhovell/SPARK-20534.

(cherry picked from commit 6b44c4d63ab14162e338c5f1ac77333956870a90)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../sql/catalyst/optimizer/Optimizer.scala    |  3 +-
 .../plans/logical/basicLogicalOperators.scala |  2 +-
 .../spark/sql/execution/GenerateExec.scala    | 33 ++++++++++---------
 .../spark/sql/GeneratorFunctionSuite.scala    | 12 +++----
 4 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index dd768d18e8588..f2b9764b0f088 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -441,8 +441,7 @@ object ColumnPruning extends Rule[LogicalPlan] {
       g.copy(child = prunedChild(g.child, g.references))
 
     // Turn off `join` for Generate if no column from it's child is used
-    case p @ Project(_, g: Generate)
-        if g.join && !g.outer && p.references.subsetOf(g.generatedSet) =>
+    case p @ Project(_, g: Generate) if g.join && p.references.subsetOf(g.generatedSet) =>
       p.copy(child = g.copy(join = false))
 
     // Eliminate unneeded attributes from right side of a Left Existence Join.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 3ad757ebba851..f663d7b8a8f7b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -83,7 +83,7 @@ case class Project(projectList: Seq[NamedExpression], child: LogicalPlan) extend
  * @param join  when true, each output row is implicitly joined with the input tuple that produced
  *              it.
  * @param outer when true, each input row will be output at least once, even if the output of the
- *              given `generator` is empty. `outer` has no effect when `join` is false.
+ *              given `generator` is empty.
  * @param qualifier Qualifier for the attributes of generator(UDTF)
  * @param generatorOutput The output schema of the Generator.
  * @param child Children logical plan node
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala
index f87d05884b276..1812a1152cb48 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructType}
 private[execution] sealed case class LazyIterator(func: () => TraversableOnce[InternalRow])
   extends Iterator[InternalRow] {
 
-  lazy val results = func().toIterator
+  lazy val results: Iterator[InternalRow] = func().toIterator
   override def hasNext: Boolean = results.hasNext
   override def next(): InternalRow = results.next()
 }
@@ -50,7 +50,7 @@ private[execution] sealed case class LazyIterator(func: () => TraversableOnce[In
  * @param join  when true, each output row is implicitly joined with the input tuple that produced
  *              it.
  * @param outer when true, each input row will be output at least once, even if the output of the
- *              given `generator` is empty. `outer` has no effect when `join` is false.
+ *              given `generator` is empty.
  * @param generatorOutput the qualified output attributes of the generator of this node, which
  *                        constructed in analysis phase, and we can not change it, as the
  *                        parent node bound with it already.
@@ -78,15 +78,15 @@ case class GenerateExec(
 
   override def outputPartitioning: Partitioning = child.outputPartitioning
 
-  val boundGenerator = BindReferences.bindReference(generator, child.output)
+  val boundGenerator: Generator = BindReferences.bindReference(generator, child.output)
 
   protected override def doExecute(): RDD[InternalRow] = {
     // boundGenerator.terminate() should be triggered after all of the rows in the partition
-    val rows = if (join) {
-      child.execute().mapPartitionsInternal { iter =>
-        val generatorNullRow = new GenericInternalRow(generator.elementSchema.length)
+    val numOutputRows = longMetric("numOutputRows")
+    child.execute().mapPartitionsWithIndexInternal { (index, iter) =>
+      val generatorNullRow = new GenericInternalRow(generator.elementSchema.length)
+      val rows = if (join) {
         val joinedRow = new JoinedRow
-
         iter.flatMap { row =>
           // we should always set the left (child output)
           joinedRow.withLeft(row)
@@ -101,18 +101,21 @@ case class GenerateExec(
           // keep it the same as Hive does
           joinedRow.withRight(row)
         }
+      } else {
+        iter.flatMap { row =>
+          val outputRows = boundGenerator.eval(row)
+          if (outer && outputRows.isEmpty) {
+            Seq(generatorNullRow)
+          } else {
+            outputRows
+          }
+        } ++ LazyIterator(boundGenerator.terminate)
       }
-    } else {
-      child.execute().mapPartitionsInternal { iter =>
-        iter.flatMap(boundGenerator.eval) ++ LazyIterator(boundGenerator.terminate)
-      }
-    }
 
-    val numOutputRows = longMetric("numOutputRows")
-    rows.mapPartitionsWithIndexInternal { (index, iter) =>
+      // Convert the rows to unsafe rows.
       val proj = UnsafeProjection.create(output, output)
       proj.initialize(index)
-      iter.map { r =>
+      rows.map { r =>
         numOutputRows += 1
         proj(r)
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
index cef5bbf0e85a7..b9871afd59e4f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
@@ -91,7 +91,7 @@ class GeneratorFunctionSuite extends QueryTest with SharedSQLContext {
     val df = Seq((1, Seq(1, 2, 3)), (2, Seq())).toDF("a", "intList")
     checkAnswer(
       df.select(explode_outer('intList)),
-      Row(1) :: Row(2) :: Row(3) :: Nil)
+      Row(1) :: Row(2) :: Row(3) :: Row(null) :: Nil)
   }
 
   test("single posexplode") {
@@ -105,7 +105,7 @@ class GeneratorFunctionSuite extends QueryTest with SharedSQLContext {
     val df = Seq((1, Seq(1, 2, 3)), (2, Seq())).toDF("a", "intList")
     checkAnswer(
       df.select(posexplode_outer('intList)),
-      Row(0, 1) :: Row(1, 2) :: Row(2, 3) :: Nil)
+      Row(0, 1) :: Row(1, 2) :: Row(2, 3) :: Row(null, null) :: Nil)
   }
 
   test("explode and other columns") {
@@ -161,7 +161,7 @@ class GeneratorFunctionSuite extends QueryTest with SharedSQLContext {
 
     checkAnswer(
       df.select(explode_outer('intList).as('int)).select('int),
-      Row(1) :: Row(2) :: Row(3) :: Nil)
+      Row(1) :: Row(2) :: Row(3) :: Row(null) :: Nil)
 
     checkAnswer(
       df.select(explode('intList).as('int)).select(sum('int)),
@@ -182,7 +182,7 @@ class GeneratorFunctionSuite extends QueryTest with SharedSQLContext {
 
     checkAnswer(
       df.select(explode_outer('map)),
-      Row("a", "b") :: Row("c", "d") :: Nil)
+      Row("a", "b") :: Row(null, null) :: Row("c", "d") :: Nil)
   }
 
   test("explode on map with aliases") {
@@ -198,7 +198,7 @@ class GeneratorFunctionSuite extends QueryTest with SharedSQLContext {
 
     checkAnswer(
       df.select(explode_outer('map).as("key1" :: "value1" :: Nil)).select("key1", "value1"),
-      Row("a", "b") :: Nil)
+      Row("a", "b") :: Row(null, null) :: Nil)
   }
 
   test("self join explode") {
@@ -279,7 +279,7 @@ class GeneratorFunctionSuite extends QueryTest with SharedSQLContext {
     )
     checkAnswer(
       df2.selectExpr("inline_outer(col1)"),
-      Row(3, "4") :: Row(5, "6") :: Nil
+      Row(null, null) :: Row(3, "4") :: Row(5, "6") :: Nil
     )
   }
 

From 38edb9256d426799901017561b486912e61369d2 Mon Sep 17 00:00:00 2001
From: jerryshao <sshao@hortonworks.com>
Date: Mon, 1 May 2017 10:25:29 -0700
Subject: [PATCH 0744/1204] [SPARK-20517][UI] Fix broken history UI download
 link

The download link in history server UI is concatenated with:

```
 <td><a href="{{uiroot}}/api/v1/applications/{{id}}/{{num}}/logs" class="btn btn-info btn-mini">Download</a></td>
```

Here `num` field represents number of attempts, this is not equal to REST APIs. In the REST API, if attempt id is not existed the URL should be `api/v1/applications/<id>/logs`, otherwise the URL should be `api/v1/applications/<id>/<attemptId>/logs`. Using `<num>` to represent `<attemptId>` will lead to the issue of "no such app".

Manual verification.

CC ajbozarth can you please review this change, since you add this feature before? Thanks!

Author: jerryshao <sshao@hortonworks.com>

Closes #17795 from jerryshao/SPARK-20517.

(cherry picked from commit ab30590f448d05fc1864c54a59b6815bdeef8fc7)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../org/apache/spark/ui/static/historypage-template.html       | 2 +-
 .../main/resources/org/apache/spark/ui/static/historypage.js   | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html b/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html
index 42e2d9abdeb5e..6ba3b092dc658 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html
+++ b/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html
@@ -77,7 +77,7 @@
       <td><span title="{{duration}}" class="durationClass">{{duration}}</span></td>
       <td>{{sparkUser}}</td>
       <td>{{lastUpdated}}</td>
-      <td><a href="{{uiroot}}/api/v1/applications/{{id}}/{{num}}/logs" class="btn btn-info btn-mini">Download</a></td>
+      <td><a href="{{log}}" class="btn btn-info btn-mini">Download</a></td>
       {{/attempts}}
     </tr>
   {{/applications}}
diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage.js b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
index 54810edaf1460..1f89306403cd5 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/historypage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
@@ -120,6 +120,9 @@ $(document).ready(function() {
           attempt["startTime"] = formatDate(attempt["startTime"]);
           attempt["endTime"] = formatDate(attempt["endTime"]);
           attempt["lastUpdated"] = formatDate(attempt["lastUpdated"]);
+          attempt["log"] = uiRoot + "/api/v1/applications/" + id + "/" +
+            (attempt.hasOwnProperty("attemptId") ? attempt["attemptId"] + "/" : "") + "logs";
+
           var app_clone = {"id" : id, "name" : name, "num" : num, "attempts" : [attempt]};
           array.push(app_clone);
         }

From 868b4a1aa954d03d9ba29b9d7743eeefeece682c Mon Sep 17 00:00:00 2001
From: jerryshao <sshao@hortonworks.com>
Date: Mon, 1 May 2017 10:25:29 -0700
Subject: [PATCH 0745/1204] [SPARK-20517][UI] Fix broken history UI download
 link

The download link in history server UI is concatenated with:

```
 <td><a href="{{uiroot}}/api/v1/applications/{{id}}/{{num}}/logs" class="btn btn-info btn-mini">Download</a></td>
```

Here `num` field represents number of attempts, this is not equal to REST APIs. In the REST API, if attempt id is not existed the URL should be `api/v1/applications/<id>/logs`, otherwise the URL should be `api/v1/applications/<id>/<attemptId>/logs`. Using `<num>` to represent `<attemptId>` will lead to the issue of "no such app".

Manual verification.

CC ajbozarth can you please review this change, since you add this feature before? Thanks!

Author: jerryshao <sshao@hortonworks.com>

Closes #17795 from jerryshao/SPARK-20517.

(cherry picked from commit ab30590f448d05fc1864c54a59b6815bdeef8fc7)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../org/apache/spark/ui/static/historypage-template.html       | 2 +-
 .../main/resources/org/apache/spark/ui/static/historypage.js   | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html b/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html
index 42e2d9abdeb5e..6ba3b092dc658 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html
+++ b/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html
@@ -77,7 +77,7 @@
       <td><span title="{{duration}}" class="durationClass">{{duration}}</span></td>
       <td>{{sparkUser}}</td>
       <td>{{lastUpdated}}</td>
-      <td><a href="{{uiroot}}/api/v1/applications/{{id}}/{{num}}/logs" class="btn btn-info btn-mini">Download</a></td>
+      <td><a href="{{log}}" class="btn btn-info btn-mini">Download</a></td>
       {{/attempts}}
     </tr>
   {{/applications}}
diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage.js b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
index 8fd91865b0429..d095a2c635cf1 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/historypage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
@@ -114,6 +114,9 @@ $(document).ready(function() {
           attempt["startTime"] = formatDate(attempt["startTime"]);
           attempt["endTime"] = formatDate(attempt["endTime"]);
           attempt["lastUpdated"] = formatDate(attempt["lastUpdated"]);
+          attempt["log"] = uiRoot + "/api/v1/applications/" + id + "/" +
+            (attempt.hasOwnProperty("attemptId") ? attempt["attemptId"] + "/" : "") + "logs";
+
           var app_clone = {"id" : id, "name" : name, "num" : num, "attempts" : [attempt]};
           array.push(app_clone);
         }

From 6f0d29672512bcb720fb82bc92071207dfae5eb1 Mon Sep 17 00:00:00 2001
From: Kunal Khamar <kkhamar@outlook.com>
Date: Mon, 1 May 2017 11:37:30 -0700
Subject: [PATCH 0746/1204] [SPARK-20464][SS] Add a job group and description
 for streaming queries and fix cancellation of running jobs using the job
 group

## What changes were proposed in this pull request?

Job group: adding a job group is required to properly cancel running jobs related to a query.
Description: the new description makes it easier to group the batches of a query by sorting by name in the Spark Jobs UI.

## How was this patch tested?

- Unit tests
- UI screenshot

  - Order by job id:
![screen shot 2017-04-27 at 5 10 09 pm](https://cloud.githubusercontent.com/assets/7865120/25509468/15452274-2b6e-11e7-87ba-d929816688cf.png)

  - Order by description:
![screen shot 2017-04-27 at 5 10 22 pm](https://cloud.githubusercontent.com/assets/7865120/25509474/1c298512-2b6e-11e7-99b8-fef1ef7665c1.png)

  - Order by job id (no query name):
![screen shot 2017-04-27 at 5 21 33 pm](https://cloud.githubusercontent.com/assets/7865120/25509482/28c96dc8-2b6e-11e7-8df0-9d3cdbb05e36.png)

  - Order by description (no query name):
![screen shot 2017-04-27 at 5 21 44 pm](https://cloud.githubusercontent.com/assets/7865120/25509489/37674742-2b6e-11e7-9357-b5c38ec16ac4.png)

Author: Kunal Khamar <kkhamar@outlook.com>

Closes #17765 from kunalkhamar/sc-6696.

(cherry picked from commit 6fc6cf88d871f5b05b0ad1a504e0d6213cf9d331)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../scala/org/apache/spark/ui/UIUtils.scala   |  2 +-
 .../execution/streaming/StreamExecution.scala | 12 ++++
 .../spark/sql/streaming/StreamSuite.scala     | 66 +++++++++++++++++++
 3 files changed, 79 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index e53d6907bc404..79b0d81af52b5 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -446,7 +446,7 @@ private[spark] object UIUtils extends Logging {
       val xml = XML.loadString(s"""<span class="description-input">$desc</span>""")
 
       // Verify that this has only anchors and span (we are wrapping in span)
-      val allowedNodeLabels = Set("a", "span")
+      val allowedNodeLabels = Set("a", "span", "br")
       val illegalNodes = xml \\ "_"  filterNot { case node: Node =>
         allowedNodeLabels.contains(node.label)
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index bcf0d970f7ec1..affc2018c43cb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -252,6 +252,8 @@ class StreamExecution(
    */
   private def runBatches(): Unit = {
     try {
+      sparkSession.sparkContext.setJobGroup(runId.toString, getBatchDescriptionString,
+        interruptOnCancel = true)
       if (sparkSession.sessionState.conf.streamingMetricsEnabled) {
         sparkSession.sparkContext.env.metricsSystem.registerSource(streamMetrics)
       }
@@ -289,6 +291,7 @@ class StreamExecution(
               if (currentBatchId < 0) {
                 // We'll do this initialization only once
                 populateStartOffsets(sparkSessionToRunBatches)
+                sparkSession.sparkContext.setJobDescription(getBatchDescriptionString)
                 logDebug(s"Stream running from $committedOffsets to $availableOffsets")
               } else {
                 constructNextBatch()
@@ -308,6 +311,7 @@ class StreamExecution(
               logDebug(s"batch ${currentBatchId} committed")
               // We'll increase currentBatchId after we complete processing current batch's data
               currentBatchId += 1
+              sparkSession.sparkContext.setJobDescription(getBatchDescriptionString)
             } else {
               currentStatus = currentStatus.copy(isDataAvailable = false)
               updateStatusMessage("Waiting for data to arrive")
@@ -684,8 +688,11 @@ class StreamExecution(
     // intentionally
     state.set(TERMINATED)
     if (microBatchThread.isAlive) {
+      sparkSession.sparkContext.cancelJobGroup(runId.toString)
       microBatchThread.interrupt()
       microBatchThread.join()
+      // microBatchThread may spawn new jobs, so we need to cancel again to prevent a leak
+      sparkSession.sparkContext.cancelJobGroup(runId.toString)
     }
     logInfo(s"Query $prettyIdString was stopped")
   }
@@ -825,6 +832,11 @@ class StreamExecution(
     }
   }
 
+  private def getBatchDescriptionString: String = {
+    val batchDescription = if (currentBatchId < 0) "init" else currentBatchId.toString
+    Option(name).map(_ + "<br/>").getOrElse("") +
+      s"id = $id<br/>runId = $runId<br/>batch = $batchDescription"
+  }
 }
 
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
index 13fe51a557733..01ea62a9de4d5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -25,6 +25,8 @@ import scala.util.control.ControlThrowable
 
 import org.apache.commons.io.FileUtils
 
+import org.apache.spark.SparkContext
+import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
 import org.apache.spark.sql.execution.command.ExplainCommand
@@ -500,6 +502,70 @@ class StreamSuite extends StreamTest {
       }
     }
   }
+
+  test("calling stop() on a query cancels related jobs") {
+    val input = MemoryStream[Int]
+    val query = input
+      .toDS()
+      .map { i =>
+        while (!org.apache.spark.TaskContext.get().isInterrupted()) {
+          // keep looping till interrupted by query.stop()
+          Thread.sleep(100)
+        }
+        i
+      }
+      .writeStream
+      .format("console")
+      .start()
+
+    input.addData(1)
+    // wait for jobs to start
+    eventually(timeout(streamingTimeout)) {
+      assert(sparkContext.statusTracker.getActiveJobIds().nonEmpty)
+    }
+
+    query.stop()
+    // make sure jobs are stopped
+    eventually(timeout(streamingTimeout)) {
+      assert(sparkContext.statusTracker.getActiveJobIds().isEmpty)
+    }
+  }
+
+  test("batch id is updated correctly in the job description") {
+    val queryName = "memStream"
+    @volatile var jobDescription: String = null
+    def assertDescContainsQueryNameAnd(batch: Integer): Unit = {
+      // wait for listener event to be processed
+      spark.sparkContext.listenerBus.waitUntilEmpty(streamingTimeout.toMillis)
+      assert(jobDescription.contains(queryName) && jobDescription.contains(s"batch = $batch"))
+    }
+
+    spark.sparkContext.addSparkListener(new SparkListener {
+      override def onJobStart(jobStart: SparkListenerJobStart): Unit = {
+        jobDescription = jobStart.properties.getProperty(SparkContext.SPARK_JOB_DESCRIPTION)
+      }
+    })
+
+    val input = MemoryStream[Int]
+    val query = input
+      .toDS()
+      .map(_ + 1)
+      .writeStream
+      .format("memory")
+      .queryName(queryName)
+      .start()
+
+    input.addData(1)
+    query.processAllAvailable()
+    assertDescContainsQueryNameAnd(batch = 0)
+    input.addData(2, 3)
+    query.processAllAvailable()
+    assertDescContainsQueryNameAnd(batch = 1)
+    input.addData(4)
+    query.processAllAvailable()
+    assertDescContainsQueryNameAnd(batch = 2)
+    query.stop()
+  }
 }
 
 abstract class FakeSource extends StreamSourceProvider {

From cfa6bcbe83b9a4b9607e23ac889963b6aa02f0d9 Mon Sep 17 00:00:00 2001
From: Ryan Blue <blue@apache.org>
Date: Mon, 1 May 2017 14:48:02 -0700
Subject: [PATCH 0747/1204] [SPARK-20540][CORE] Fix unstable executor requests.

There are two problems fixed in this commit. First, the
ExecutorAllocationManager sets a timeout to avoid requesting executors
too often. However, the timeout is always updated based on its value and
a timeout, not the current time. If the call is delayed by locking for
more than the ongoing scheduler timeout, the manager will request more
executors on every run. This seems to be the main cause of SPARK-20540.

The second problem is that the total number of requested executors is
not tracked by the CoarseGrainedSchedulerBackend. Instead, it calculates
the value based on the current status of 3 variables: the number of
known executors, the number of executors that have been killed, and the
number of pending executors. But, the number of pending executors is
never less than 0, even though there may be more known than requested.
When executors are killed and not replaced, this can cause the request
sent to YARN to be incorrect because there were too many executors due
to the scheduler's state being slightly out of date. This is fixed by tracking
the currently requested size explicitly.

## How was this patch tested?

Existing tests.

Author: Ryan Blue <blue@apache.org>

Closes #17813 from rdblue/SPARK-20540-fix-dynamic-allocation.

(cherry picked from commit 2b2dd08e975dd7fbf261436aa877f1d7497ed31f)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/ExecutorAllocationManager.scala     |  2 +-
 .../CoarseGrainedSchedulerBackend.scala       | 32 ++++++++++++++++---
 .../StandaloneDynamicAllocationSuite.scala    |  6 ++--
 3 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
index 261b3329a7b9c..fcc72ff49276d 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -331,7 +331,7 @@ private[spark] class ExecutorAllocationManager(
       val delta = addExecutors(maxNeeded)
       logDebug(s"Starting timer to add more executors (to " +
         s"expire in $sustainedSchedulerBacklogTimeoutS seconds)")
-      addTime += sustainedSchedulerBacklogTimeoutS * 1000
+      addTime = now + (sustainedSchedulerBacklogTimeoutS * 1000)
       delta
     } else {
       0
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index 4eedaaea61195..dc82bb7704727 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -69,6 +69,10 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
   // `CoarseGrainedSchedulerBackend.this`.
   private val executorDataMap = new HashMap[String, ExecutorData]
 
+  // Number of executors requested by the cluster manager, [[ExecutorAllocationManager]]
+  @GuardedBy("CoarseGrainedSchedulerBackend.this")
+  private var requestedTotalExecutors = 0
+
   // Number of executors requested from the cluster manager that have not registered yet
   @GuardedBy("CoarseGrainedSchedulerBackend.this")
   private var numPendingExecutors = 0
@@ -413,6 +417,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
    * */
   protected def reset(): Unit = {
     val executors = synchronized {
+      requestedTotalExecutors = 0
       numPendingExecutors = 0
       executorsPendingToRemove.clear()
       Set() ++ executorDataMap.keys
@@ -487,12 +492,21 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     logInfo(s"Requesting $numAdditionalExecutors additional executor(s) from the cluster manager")
 
     val response = synchronized {
+      requestedTotalExecutors += numAdditionalExecutors
       numPendingExecutors += numAdditionalExecutors
       logDebug(s"Number of pending executors is now $numPendingExecutors")
+      if (requestedTotalExecutors !=
+          (numExistingExecutors + numPendingExecutors - executorsPendingToRemove.size)) {
+        logDebug(
+          s"""requestExecutors($numAdditionalExecutors): Executor request doesn't match:
+             |requestedTotalExecutors  = $requestedTotalExecutors
+             |numExistingExecutors     = $numExistingExecutors
+             |numPendingExecutors      = $numPendingExecutors
+             |executorsPendingToRemove = ${executorsPendingToRemove.size}""".stripMargin)
+      }
 
       // Account for executors pending to be added or removed
-      doRequestTotalExecutors(
-        numExistingExecutors + numPendingExecutors - executorsPendingToRemove.size)
+      doRequestTotalExecutors(requestedTotalExecutors)
     }
 
     defaultAskTimeout.awaitResult(response)
@@ -524,6 +538,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     }
 
     val response = synchronized {
+      this.requestedTotalExecutors = numExecutors
       this.localityAwareTasks = localityAwareTasks
       this.hostToLocalTaskCount = hostToLocalTaskCount
 
@@ -589,8 +604,17 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
       // take into account executors that are pending to be added or removed.
       val adjustTotalExecutors =
         if (!replace) {
-          doRequestTotalExecutors(
-            numExistingExecutors + numPendingExecutors - executorsPendingToRemove.size)
+          requestedTotalExecutors = math.max(requestedTotalExecutors - executorsToKill.size, 0)
+          if (requestedTotalExecutors !=
+              (numExistingExecutors + numPendingExecutors - executorsPendingToRemove.size)) {
+            logDebug(
+              s"""killExecutors($executorIds, $replace, $force): Executor counts do not match:
+                 |requestedTotalExecutors  = $requestedTotalExecutors
+                 |numExistingExecutors     = $numExistingExecutors
+                 |numPendingExecutors      = $numPendingExecutors
+                 |executorsPendingToRemove = ${executorsPendingToRemove.size}""".stripMargin)
+          }
+          doRequestTotalExecutors(requestedTotalExecutors)
         } else {
           numPendingExecutors += knownExecutors.size
           Future.successful(true)
diff --git a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
index 9839dcf8535db..bf7480d79f8a1 100644
--- a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
@@ -356,12 +356,13 @@ class StandaloneDynamicAllocationSuite
   test("kill the same executor twice (SPARK-9795)") {
     sc = new SparkContext(appConf)
     val appId = sc.applicationId
+    sc.requestExecutors(2)
     eventually(timeout(10.seconds), interval(10.millis)) {
       val apps = getApplications()
       assert(apps.size === 1)
       assert(apps.head.id === appId)
       assert(apps.head.executors.size === 2)
-      assert(apps.head.getExecutorLimit === Int.MaxValue)
+      assert(apps.head.getExecutorLimit === 2)
     }
     // sync executors between the Master and the driver, needed because
     // the driver refuses to kill executors it does not know about
@@ -380,12 +381,13 @@ class StandaloneDynamicAllocationSuite
   test("the pending replacement executors should not be lost (SPARK-10515)") {
     sc = new SparkContext(appConf)
     val appId = sc.applicationId
+    sc.requestExecutors(2)
     eventually(timeout(10.seconds), interval(10.millis)) {
       val apps = getApplications()
       assert(apps.size === 1)
       assert(apps.head.id === appId)
       assert(apps.head.executors.size === 2)
-      assert(apps.head.getExecutorLimit === Int.MaxValue)
+      assert(apps.head.getExecutorLimit === 2)
     }
     // sync executors between the Master and the driver, needed because
     // the driver refuses to kill executors it does not know about

From 5915588a92e6da955cd42621c3701031547ebf9e Mon Sep 17 00:00:00 2001
From: Ryan Blue <blue@apache.org>
Date: Mon, 1 May 2017 14:48:02 -0700
Subject: [PATCH 0748/1204] [SPARK-20540][CORE] Fix unstable executor requests.

There are two problems fixed in this commit. First, the
ExecutorAllocationManager sets a timeout to avoid requesting executors
too often. However, the timeout is always updated based on its value and
a timeout, not the current time. If the call is delayed by locking for
more than the ongoing scheduler timeout, the manager will request more
executors on every run. This seems to be the main cause of SPARK-20540.

The second problem is that the total number of requested executors is
not tracked by the CoarseGrainedSchedulerBackend. Instead, it calculates
the value based on the current status of 3 variables: the number of
known executors, the number of executors that have been killed, and the
number of pending executors. But, the number of pending executors is
never less than 0, even though there may be more known than requested.
When executors are killed and not replaced, this can cause the request
sent to YARN to be incorrect because there were too many executors due
to the scheduler's state being slightly out of date. This is fixed by tracking
the currently requested size explicitly.

## How was this patch tested?

Existing tests.

Author: Ryan Blue <blue@apache.org>

Closes #17813 from rdblue/SPARK-20540-fix-dynamic-allocation.

(cherry picked from commit 2b2dd08e975dd7fbf261436aa877f1d7497ed31f)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/ExecutorAllocationManager.scala     |  2 +-
 .../CoarseGrainedSchedulerBackend.scala       | 32 ++++++++++++++++---
 .../StandaloneDynamicAllocationSuite.scala    |  6 ++--
 3 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
index 1366251d0618f..f054a78d57883 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -331,7 +331,7 @@ private[spark] class ExecutorAllocationManager(
       val delta = addExecutors(maxNeeded)
       logDebug(s"Starting timer to add more executors (to " +
         s"expire in $sustainedSchedulerBacklogTimeoutS seconds)")
-      addTime += sustainedSchedulerBacklogTimeoutS * 1000
+      addTime = now + (sustainedSchedulerBacklogTimeoutS * 1000)
       delta
     } else {
       0
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index 3452487e72e88..6fa239e4fee6b 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -69,6 +69,10 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
   // `CoarseGrainedSchedulerBackend.this`.
   private val executorDataMap = new HashMap[String, ExecutorData]
 
+  // Number of executors requested by the cluster manager, [[ExecutorAllocationManager]]
+  @GuardedBy("CoarseGrainedSchedulerBackend.this")
+  private var requestedTotalExecutors = 0
+
   // Number of executors requested from the cluster manager that have not registered yet
   @GuardedBy("CoarseGrainedSchedulerBackend.this")
   private var numPendingExecutors = 0
@@ -390,6 +394,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
    * */
   protected def reset(): Unit = {
     val executors = synchronized {
+      requestedTotalExecutors = 0
       numPendingExecutors = 0
       executorsPendingToRemove.clear()
       Set() ++ executorDataMap.keys
@@ -463,12 +468,21 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     logInfo(s"Requesting $numAdditionalExecutors additional executor(s) from the cluster manager")
 
     val response = synchronized {
+      requestedTotalExecutors += numAdditionalExecutors
       numPendingExecutors += numAdditionalExecutors
       logDebug(s"Number of pending executors is now $numPendingExecutors")
+      if (requestedTotalExecutors !=
+          (numExistingExecutors + numPendingExecutors - executorsPendingToRemove.size)) {
+        logDebug(
+          s"""requestExecutors($numAdditionalExecutors): Executor request doesn't match:
+             |requestedTotalExecutors  = $requestedTotalExecutors
+             |numExistingExecutors     = $numExistingExecutors
+             |numPendingExecutors      = $numPendingExecutors
+             |executorsPendingToRemove = ${executorsPendingToRemove.size}""".stripMargin)
+      }
 
       // Account for executors pending to be added or removed
-      doRequestTotalExecutors(
-        numExistingExecutors + numPendingExecutors - executorsPendingToRemove.size)
+      doRequestTotalExecutors(requestedTotalExecutors)
     }
 
     defaultAskTimeout.awaitResult(response)
@@ -500,6 +514,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     }
 
     val response = synchronized {
+      this.requestedTotalExecutors = numExecutors
       this.localityAwareTasks = localityAwareTasks
       this.hostToLocalTaskCount = hostToLocalTaskCount
 
@@ -575,8 +590,17 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
       // take into account executors that are pending to be added or removed.
       val adjustTotalExecutors =
         if (!replace) {
-          doRequestTotalExecutors(
-            numExistingExecutors + numPendingExecutors - executorsPendingToRemove.size)
+          requestedTotalExecutors = math.max(requestedTotalExecutors - executorsToKill.size, 0)
+          if (requestedTotalExecutors !=
+              (numExistingExecutors + numPendingExecutors - executorsPendingToRemove.size)) {
+            logDebug(
+              s"""killExecutors($executorIds, $replace, $force): Executor counts do not match:
+                 |requestedTotalExecutors  = $requestedTotalExecutors
+                 |numExistingExecutors     = $numExistingExecutors
+                 |numPendingExecutors      = $numPendingExecutors
+                 |executorsPendingToRemove = ${executorsPendingToRemove.size}""".stripMargin)
+          }
+          doRequestTotalExecutors(requestedTotalExecutors)
         } else {
           numPendingExecutors += knownExecutors.size
           Future.successful(true)
diff --git a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
index 05dad7a4b86ad..281484c6170ea 100644
--- a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
@@ -354,12 +354,13 @@ class StandaloneDynamicAllocationSuite
   test("kill the same executor twice (SPARK-9795)") {
     sc = new SparkContext(appConf)
     val appId = sc.applicationId
+    sc.requestExecutors(2)
     eventually(timeout(10.seconds), interval(10.millis)) {
       val apps = getApplications()
       assert(apps.size === 1)
       assert(apps.head.id === appId)
       assert(apps.head.executors.size === 2)
-      assert(apps.head.getExecutorLimit === Int.MaxValue)
+      assert(apps.head.getExecutorLimit === 2)
     }
     // sync executors between the Master and the driver, needed because
     // the driver refuses to kill executors it does not know about
@@ -378,12 +379,13 @@ class StandaloneDynamicAllocationSuite
   test("the pending replacement executors should not be lost (SPARK-10515)") {
     sc = new SparkContext(appConf)
     val appId = sc.applicationId
+    sc.requestExecutors(2)
     eventually(timeout(10.seconds), interval(10.millis)) {
       val apps = getApplications()
       assert(apps.size === 1)
       assert(apps.head.id === appId)
       assert(apps.head.executors.size === 2)
-      assert(apps.head.getExecutorLimit === Int.MaxValue)
+      assert(apps.head.getExecutorLimit === 2)
     }
     // sync executors between the Master and the driver, needed because
     // the driver refuses to kill executors it does not know about

From 5a0a8b0396df2feadb8333876cc08edf219fa177 Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Mon, 1 May 2017 17:01:05 -0700
Subject: [PATCH 0749/1204] [SPARK-20459][SQL] JdbcUtils throws
 IllegalStateException: Cause already initialized after getting SQLException

## What changes were proposed in this pull request?

Avoid failing to initCause on JDBC exception with cause initialized to null

## How was this patch tested?

Existing tests

Author: Sean Owen <sowen@cloudera.com>

Closes #17800 from srowen/SPARK-20459.

(cherry picked from commit af726cd6117de05c6e3b9616b8699d884a53651b)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../sql/execution/datasources/jdbc/JdbcUtils.scala    | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index 5fc3c2753b6cf..0183805d56257 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -652,8 +652,17 @@ object JdbcUtils extends Logging {
       case e: SQLException =>
         val cause = e.getNextException
         if (cause != null && e.getCause != cause) {
+          // If there is no cause already, set 'next exception' as cause. If cause is null,
+          // it *may* be because no cause was set yet
           if (e.getCause == null) {
-            e.initCause(cause)
+            try {
+              e.initCause(cause)
+            } catch {
+              // Or it may be null because the cause *was* explicitly initialized, to *null*,
+              // in which case this fails. There is no other way to detect it.
+              // addSuppressed in this case as well.
+              case _: IllegalStateException => e.addSuppressed(cause)
+            }
           } else {
             e.addSuppressed(cause)
           }

From b7c1c2f973635a2ec05aedd89456765d830dfdce Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Mon, 1 May 2017 21:03:48 -0700
Subject: [PATCH 0750/1204] [SPARK-20192][SPARKR][DOC] SparkR migration guide
 to 2.2.0

## What changes were proposed in this pull request?

Updating R Programming Guide

## How was this patch tested?

manually

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #17816 from felixcheung/r22relnote.

(cherry picked from commit d20a976e8918ca8d607af452301e8014fe14e64a)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 docs/sparkr.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/docs/sparkr.md b/docs/sparkr.md
index e7f60210ce6a8..0e97213b78ebc 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -610,3 +610,11 @@ You can inspect the search path in R with [`search()`](https://stat.ethz.ch/R-ma
 ## Upgrading to SparkR 2.1.0
 
  - `join` no longer performs Cartesian Product by default, use `crossJoin` instead.
+
+## Upgrading to SparkR 2.2.0
+
+ - A `numPartitions` parameter has been added to `createDataFrame` and `as.DataFrame`. When splitting the data, the partition position calculation has been made to match the one in Scala.
+ - The method `createExternalTable` has been deprecated to be replaced by `createTable`. Either methods can be called to create external or managed table. Additional catalog methods have also been added.
+ - By default, derby.log is now saved to `tempdir()`. This will be created when instantiating the SparkSession with `enableHiveSupport` set to `TRUE`.
+ - `spark.lda` was not setting the optimizer correctly. It has been corrected.
+ - Several model summary outputs are updated to have `coefficients` as `matrix`. This includes `spark.logit`, `spark.kmeans`, `spark.glm`. Model summary outputs for `spark.gaussianMixture` have added log-likelihood as `loglik`.

From b146481fff1ce529245f9c03b35c73ea604712d0 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Tue, 2 May 2017 13:56:41 +0800
Subject: [PATCH 0751/1204] [SPARK-20537][CORE] Fixing OffHeapColumnVector
 reallocation

## What changes were proposed in this pull request?

As #17773 revealed `OnHeapColumnVector` may copy a part of the original storage.

`OffHeapColumnVector` reallocation also copies to the new storage data up to 'elementsAppended'. This variable is only updated when using the `ColumnVector.appendX` API, while `ColumnVector.putX` is more commonly used.
This PR copies the new storage data up to the previously-allocated size in`OffHeapColumnVector`.

## How was this patch tested?

Existing test suites

Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com>

Closes #17811 from kiszk/SPARK-20537.

(cherry picked from commit afb21bf22a59c9416c04637412fb69d1442e6826)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../vectorized/OffHeapColumnVector.java         | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
index e988c0722bd72..a7d3744d00e91 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
@@ -436,28 +436,29 @@ public void loadBytes(ColumnVector.Array array) {
   // Split out the slow path.
   @Override
   protected void reserveInternal(int newCapacity) {
+    int oldCapacity = (this.data == 0L) ? 0 : capacity;
     if (this.resultArray != null) {
       this.lengthData =
-          Platform.reallocateMemory(lengthData, elementsAppended * 4, newCapacity * 4);
+          Platform.reallocateMemory(lengthData, oldCapacity * 4, newCapacity * 4);
       this.offsetData =
-          Platform.reallocateMemory(offsetData, elementsAppended * 4, newCapacity * 4);
+          Platform.reallocateMemory(offsetData, oldCapacity * 4, newCapacity * 4);
     } else if (type instanceof ByteType || type instanceof BooleanType) {
-      this.data = Platform.reallocateMemory(data, elementsAppended, newCapacity);
+      this.data = Platform.reallocateMemory(data, oldCapacity, newCapacity);
     } else if (type instanceof ShortType) {
-      this.data = Platform.reallocateMemory(data, elementsAppended * 2, newCapacity * 2);
+      this.data = Platform.reallocateMemory(data, oldCapacity * 2, newCapacity * 2);
     } else if (type instanceof IntegerType || type instanceof FloatType ||
         type instanceof DateType || DecimalType.is32BitDecimalType(type)) {
-      this.data = Platform.reallocateMemory(data, elementsAppended * 4, newCapacity * 4);
+      this.data = Platform.reallocateMemory(data, oldCapacity * 4, newCapacity * 4);
     } else if (type instanceof LongType || type instanceof DoubleType ||
         DecimalType.is64BitDecimalType(type) || type instanceof TimestampType) {
-      this.data = Platform.reallocateMemory(data, elementsAppended * 8, newCapacity * 8);
+      this.data = Platform.reallocateMemory(data, oldCapacity * 8, newCapacity * 8);
     } else if (resultStruct != null) {
       // Nothing to store.
     } else {
       throw new RuntimeException("Unhandled " + type);
     }
-    this.nulls = Platform.reallocateMemory(nulls, elementsAppended, newCapacity);
-    Platform.setMemory(nulls + elementsAppended, (byte)0, newCapacity - elementsAppended);
+    this.nulls = Platform.reallocateMemory(nulls, oldCapacity, newCapacity);
+    Platform.setMemory(nulls + oldCapacity, (byte)0, newCapacity - oldCapacity);
     capacity = newCapacity;
   }
 }

From ef5e2a0509801f6afced3bc80f8d700acf84e0dd Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Tue, 2 May 2017 14:08:16 +0800
Subject: [PATCH 0752/1204] [SPARK-20549] java.io.CharConversionException:
 Invalid UTF-32' in JsonToStructs

## What changes were proposed in this pull request?

A fix for the same problem was made in #17693 but ignored `JsonToStructs`. This PR uses the same fix for `JsonToStructs`.

## How was this patch tested?

Regression test

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #17826 from brkyvz/SPARK-20549.

(cherry picked from commit 86174ea89b39a300caaba6baffac70f3dc702788)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/expressions/jsonExpressions.scala  | 8 +++-----
 .../spark/sql/catalyst/json/CreateJacksonParser.scala     | 7 +++++--
 .../sql/catalyst/expressions/JsonExpressionsSuite.scala   | 7 +++++++
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index 9fb0ea68153d2..6b90354367f40 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -151,8 +151,7 @@ case class GetJsonObject(json: Expression, path: Expression)
       try {
         /* We know the bytes are UTF-8 encoded. Pass a Reader to avoid having Jackson
           detect character encoding which could fail for some malformed strings */
-        Utils.tryWithResource(jsonFactory.createParser(new InputStreamReader(
-            new ByteArrayInputStream(jsonStr.getBytes), "UTF-8"))) { parser =>
+        Utils.tryWithResource(CreateJacksonParser.utf8String(jsonFactory, jsonStr)) { parser =>
           val output = new ByteArrayOutputStream()
           val matched = Utils.tryWithResource(
             jsonFactory.createGenerator(output, JsonEncoding.UTF8)) { generator =>
@@ -398,9 +397,8 @@ case class JsonTuple(children: Seq[Expression])
     try {
       /* We know the bytes are UTF-8 encoded. Pass a Reader to avoid having Jackson
       detect character encoding which could fail for some malformed strings */
-      Utils.tryWithResource(jsonFactory.createParser(new InputStreamReader(
-          new ByteArrayInputStream(json.getBytes), "UTF-8"))) {
-        parser => parseRow(parser, input)
+      Utils.tryWithResource(CreateJacksonParser.utf8String(jsonFactory, json)) { parser =>
+        parseRow(parser, input)
       }
     } catch {
       case _: JsonProcessingException =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/CreateJacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/CreateJacksonParser.scala
index e0ed03a68981a..025a388aacaa5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/CreateJacksonParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/CreateJacksonParser.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.json
 
-import java.io.InputStream
+import java.io.{ByteArrayInputStream, InputStream, InputStreamReader}
 
 import com.fasterxml.jackson.core.{JsonFactory, JsonParser}
 import org.apache.hadoop.io.Text
@@ -33,7 +33,10 @@ private[sql] object CreateJacksonParser extends Serializable {
     val bb = record.getByteBuffer
     assert(bb.hasArray)
 
-    jsonFactory.createParser(bb.array(), bb.arrayOffset() + bb.position(), bb.remaining())
+    val bain = new ByteArrayInputStream(
+      bb.array(), bb.arrayOffset() + bb.position(), bb.remaining())
+
+    jsonFactory.createParser(new InputStreamReader(bain, "UTF-8"))
   }
 
   def text(jsonFactory: JsonFactory, record: Text): JsonParser = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
index 4402ad4e9a9e5..65d5c3a582b16 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
@@ -453,6 +453,13 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     )
   }
 
+  test("SPARK-20549: from_json bad UTF-8") {
+    val schema = StructType(StructField("a", IntegerType) :: Nil)
+    checkEvaluation(
+      JsonToStructs(schema, Map.empty, Literal(badJson), gmtId),
+      null)
+  }
+
   test("from_json with timestamp") {
     val schema = StructType(StructField("t", TimestampType) :: Nil)
 

From 01f3be7ddc72643fb18bad8304e6c8eebf04b3e6 Mon Sep 17 00:00:00 2001
From: Nick Pentreath <nickp@za.ibm.com>
Date: Tue, 2 May 2017 10:49:13 +0200
Subject: [PATCH 0753/1204] [SPARK-20300][ML][PYSPARK] Python API for
 ALSModel.recommendForAllUsers,Items

Add Python API for `ALSModel` methods `recommendForAllUsers`, `recommendForAllItems`

## How was this patch tested?

New doc tests.

Author: Nick Pentreath <nickp@za.ibm.com>

Closes #17622 from MLnick/SPARK-20300-pyspark-recall.

(cherry picked from commit e300a5a145820ecd466885c73245d6684e8cb0aa)
Signed-off-by: Nick Pentreath <nickp@za.ibm.com>
---
 python/pyspark/ml/recommendation.py | 30 +++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py
index 8bc899a0788bb..bcfb36880eb02 100644
--- a/python/pyspark/ml/recommendation.py
+++ b/python/pyspark/ml/recommendation.py
@@ -82,6 +82,14 @@ class ALS(JavaEstimator, HasCheckpointInterval, HasMaxIter, HasPredictionCol, Ha
     Row(user=1, item=0, prediction=2.6258413791656494)
     >>> predictions[2]
     Row(user=2, item=0, prediction=-1.5018409490585327)
+    >>> user_recs = model.recommendForAllUsers(3)
+    >>> user_recs.where(user_recs.user == 0)\
+        .select("recommendations.item", "recommendations.rating").collect()
+    [Row(item=[0, 1, 2], rating=[3.910..., 1.992..., -0.138...])]
+    >>> item_recs = model.recommendForAllItems(3)
+    >>> item_recs.where(item_recs.item == 2)\
+        .select("recommendations.user", "recommendations.rating").collect()
+    [Row(user=[2, 1, 0], rating=[4.901..., 3.981..., -0.138...])]
     >>> als_path = temp_path + "/als"
     >>> als.save(als_path)
     >>> als2 = ALS.load(als_path)
@@ -384,6 +392,28 @@ def itemFactors(self):
         """
         return self._call_java("itemFactors")
 
+    @since("2.2.0")
+    def recommendForAllUsers(self, numItems):
+        """
+        Returns top `numItems` items recommended for each user, for all users.
+
+        :param numItems: max number of recommendations for each user
+        :return: a DataFrame of (userCol, recommendations), where recommendations are
+                 stored as an array of (itemCol, rating) Rows.
+        """
+        return self._call_java("recommendForAllUsers", numItems)
+
+    @since("2.2.0")
+    def recommendForAllItems(self, numUsers):
+        """
+        Returns top `numUsers` users recommended for each item, for all items.
+
+        :param numUsers: max number of recommendations for each item
+        :return: a DataFrame of (itemCol, recommendations), where recommendations are
+                 stored as an array of (userCol, rating) Rows.
+        """
+        return self._call_java("recommendForAllItems", numUsers)
+
 
 if __name__ == "__main__":
     import doctest

From 4f4083bfaaaaca7a5da80d346652a5f831aba7e6 Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Tue, 2 May 2017 16:49:24 +0800
Subject: [PATCH 0754/1204] [SPARK-19235][SQL][TEST][FOLLOW-UP] Enable Test
 Cases in DDLSuite with Hive Metastore

### What changes were proposed in this pull request?
This is a follow-up of enabling test cases in DDLSuite with Hive Metastore. It consists of the following remaining tasks:
- Run all the `alter table` and `drop table` DDL tests against data source tables when using Hive metastore.
- Do not run any `alter table` and `drop table` DDL test against Hive serde tables when using InMemoryCatalog.
- Reenable `alter table: set serde partition` and `alter table: set serde` tests for Hive serde tables.

### How was this patch tested?
N/A

Author: Xiao Li <gatorsmile@gmail.com>

Closes #17524 from gatorsmile/cleanupDDLSuite.

(cherry picked from commit b1e639ab09d3a7a1545119e45a505c9a04308353)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/execution/command/DDLSuite.scala      | 291 ++++++++----------
 .../apache/spark/sql/test/SQLTestUtils.scala  |   3 +-
 .../sql/hive/execution/HiveDDLSuite.scala     |  73 ++++-
 3 files changed, 195 insertions(+), 172 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 2f4eb1b15519b..0abcff76060f7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -49,7 +49,8 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSQLContext with Befo
 
   protected override def generateTable(
       catalog: SessionCatalog,
-      name: TableIdentifier): CatalogTable = {
+      name: TableIdentifier,
+      isDataSource: Boolean = true): CatalogTable = {
     val storage =
       CatalogStorageFormat.empty.copy(locationUri = Some(catalog.defaultTablePath(name)))
     val metadata = new MetadataBuilder()
@@ -70,46 +71,6 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSQLContext with Befo
       tracksPartitionsInCatalog = true)
   }
 
-  test("alter table: set location (datasource table)") {
-    testSetLocation(isDatasourceTable = true)
-  }
-
-  test("alter table: set properties (datasource table)") {
-    testSetProperties(isDatasourceTable = true)
-  }
-
-  test("alter table: unset properties (datasource table)") {
-    testUnsetProperties(isDatasourceTable = true)
-  }
-
-  test("alter table: set serde (datasource table)") {
-    testSetSerde(isDatasourceTable = true)
-  }
-
-  test("alter table: set serde partition (datasource table)") {
-    testSetSerdePartition(isDatasourceTable = true)
-  }
-
-  test("alter table: change column (datasource table)") {
-    testChangeColumn(isDatasourceTable = true)
-  }
-
-  test("alter table: add partition (datasource table)") {
-    testAddPartitions(isDatasourceTable = true)
-  }
-
-  test("alter table: drop partition (datasource table)") {
-    testDropPartitions(isDatasourceTable = true)
-  }
-
-  test("alter table: rename partition (datasource table)") {
-    testRenamePartitions(isDatasourceTable = true)
-  }
-
-  test("drop table - data source table") {
-    testDropTable(isDatasourceTable = true)
-  }
-
   test("create a managed Hive source table") {
     assume(spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "in-memory")
     val tabName = "tbl"
@@ -163,7 +124,10 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "hive"
   }
 
-  protected def generateTable(catalog: SessionCatalog, name: TableIdentifier): CatalogTable
+  protected def generateTable(
+      catalog: SessionCatalog,
+      name: TableIdentifier,
+      isDataSource: Boolean = true): CatalogTable
 
   private val escapedIdentifier = "`(.+)`".r
 
@@ -205,8 +169,11 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       ignoreIfExists = false)
   }
 
-  private def createTable(catalog: SessionCatalog, name: TableIdentifier): Unit = {
-    catalog.createTable(generateTable(catalog, name), ignoreIfExists = false)
+  private def createTable(
+      catalog: SessionCatalog,
+      name: TableIdentifier,
+      isDataSource: Boolean = true): Unit = {
+    catalog.createTable(generateTable(catalog, name, isDataSource), ignoreIfExists = false)
   }
 
   private def createTablePartition(
@@ -223,6 +190,46 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     new Path(CatalogUtils.URIToString(warehousePath), s"$dbName.db").toUri
   }
 
+  test("alter table: set location (datasource table)") {
+    testSetLocation(isDatasourceTable = true)
+  }
+
+  test("alter table: set properties (datasource table)") {
+    testSetProperties(isDatasourceTable = true)
+  }
+
+  test("alter table: unset properties (datasource table)") {
+    testUnsetProperties(isDatasourceTable = true)
+  }
+
+  test("alter table: set serde (datasource table)") {
+    testSetSerde(isDatasourceTable = true)
+  }
+
+  test("alter table: set serde partition (datasource table)") {
+    testSetSerdePartition(isDatasourceTable = true)
+  }
+
+  test("alter table: change column (datasource table)") {
+    testChangeColumn(isDatasourceTable = true)
+  }
+
+  test("alter table: add partition (datasource table)") {
+    testAddPartitions(isDatasourceTable = true)
+  }
+
+  test("alter table: drop partition (datasource table)") {
+    testDropPartitions(isDatasourceTable = true)
+  }
+
+  test("alter table: rename partition (datasource table)") {
+    testRenamePartitions(isDatasourceTable = true)
+  }
+
+  test("drop table - data source table") {
+    testDropTable(isDatasourceTable = true)
+  }
+
   test("the qualified path of a database is stored in the catalog") {
     val catalog = spark.sessionState.catalog
 
@@ -835,32 +842,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     }
   }
 
-  test("alter table: set location") {
-    testSetLocation(isDatasourceTable = false)
-  }
-
-  test("alter table: set properties") {
-    testSetProperties(isDatasourceTable = false)
-  }
-
-  test("alter table: unset properties") {
-    testUnsetProperties(isDatasourceTable = false)
-  }
-
-  // TODO: move this test to HiveDDLSuite.scala
-  ignore("alter table: set serde") {
-    testSetSerde(isDatasourceTable = false)
-  }
-
-  // TODO: move this test to HiveDDLSuite.scala
-  ignore("alter table: set serde partition") {
-    testSetSerdePartition(isDatasourceTable = false)
-  }
-
-  test("alter table: change column") {
-    testChangeColumn(isDatasourceTable = false)
-  }
-
   test("alter table: bucketing is not supported") {
     val catalog = spark.sessionState.catalog
     val tableIdent = TableIdentifier("tab1", Some("dbx"))
@@ -885,10 +866,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     assertUnsupported("ALTER TABLE dbx.tab1 NOT STORED AS DIRECTORIES")
   }
 
-  test("alter table: add partition") {
-    testAddPartitions(isDatasourceTable = false)
-  }
-
   test("alter table: recover partitions (sequential)") {
     withSQLConf("spark.rdd.parallelListingThreshold" -> "10") {
       testRecoverPartitions()
@@ -957,17 +934,10 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     assertUnsupported("ALTER VIEW dbx.tab1 ADD IF NOT EXISTS PARTITION (b='2')")
   }
 
-  test("alter table: drop partition") {
-    testDropPartitions(isDatasourceTable = false)
-  }
-
   test("alter table: drop partition is not supported for views") {
     assertUnsupported("ALTER VIEW dbx.tab1 DROP IF EXISTS PARTITION (b='2')")
   }
 
-  test("alter table: rename partition") {
-    testRenamePartitions(isDatasourceTable = false)
-  }
 
   test("show databases") {
     sql("CREATE DATABASE showdb2B")
@@ -1011,18 +981,14 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     assert(catalog.listTables("default") == Nil)
   }
 
-  test("drop table") {
-    testDropTable(isDatasourceTable = false)
-  }
-
   protected def testDropTable(isDatasourceTable: Boolean): Unit = {
+    if (!isUsingHiveMetastore) {
+      assert(isDatasourceTable, "InMemoryCatalog only supports data source tables")
+    }
     val catalog = spark.sessionState.catalog
     val tableIdent = TableIdentifier("tab1", Some("dbx"))
     createDatabase(catalog, "dbx")
-    createTable(catalog, tableIdent)
-    if (isDatasourceTable) {
-      convertToDatasourceTable(catalog, tableIdent)
-    }
+    createTable(catalog, tableIdent, isDatasourceTable)
     assert(catalog.listTables("dbx") == Seq(tableIdent))
     sql("DROP TABLE dbx.tab1")
     assert(catalog.listTables("dbx") == Nil)
@@ -1046,22 +1012,14 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       e.getMessage.contains("Cannot drop a table with DROP VIEW. Please use DROP TABLE instead"))
   }
 
-  private def convertToDatasourceTable(
-      catalog: SessionCatalog,
-      tableIdent: TableIdentifier): Unit = {
-    catalog.alterTable(catalog.getTableMetadata(tableIdent).copy(
-      provider = Some("csv")))
-    assert(catalog.getTableMetadata(tableIdent).provider == Some("csv"))
-  }
-
   protected def testSetProperties(isDatasourceTable: Boolean): Unit = {
+    if (!isUsingHiveMetastore) {
+      assert(isDatasourceTable, "InMemoryCatalog only supports data source tables")
+    }
     val catalog = spark.sessionState.catalog
     val tableIdent = TableIdentifier("tab1", Some("dbx"))
     createDatabase(catalog, "dbx")
-    createTable(catalog, tableIdent)
-    if (isDatasourceTable) {
-      convertToDatasourceTable(catalog, tableIdent)
-    }
+    createTable(catalog, tableIdent, isDatasourceTable)
     def getProps: Map[String, String] = {
       if (isUsingHiveMetastore) {
         normalizeCatalogTable(catalog.getTableMetadata(tableIdent)).properties
@@ -1084,13 +1042,13 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
   }
 
   protected def testUnsetProperties(isDatasourceTable: Boolean): Unit = {
+    if (!isUsingHiveMetastore) {
+      assert(isDatasourceTable, "InMemoryCatalog only supports data source tables")
+    }
     val catalog = spark.sessionState.catalog
     val tableIdent = TableIdentifier("tab1", Some("dbx"))
     createDatabase(catalog, "dbx")
-    createTable(catalog, tableIdent)
-    if (isDatasourceTable) {
-      convertToDatasourceTable(catalog, tableIdent)
-    }
+    createTable(catalog, tableIdent, isDatasourceTable)
     def getProps: Map[String, String] = {
       if (isUsingHiveMetastore) {
         normalizeCatalogTable(catalog.getTableMetadata(tableIdent)).properties
@@ -1121,15 +1079,15 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
   }
 
   protected def testSetLocation(isDatasourceTable: Boolean): Unit = {
+    if (!isUsingHiveMetastore) {
+      assert(isDatasourceTable, "InMemoryCatalog only supports data source tables")
+    }
     val catalog = spark.sessionState.catalog
     val tableIdent = TableIdentifier("tab1", Some("dbx"))
     val partSpec = Map("a" -> "1", "b" -> "2")
     createDatabase(catalog, "dbx")
-    createTable(catalog, tableIdent)
+    createTable(catalog, tableIdent, isDatasourceTable)
     createTablePartition(catalog, partSpec, tableIdent)
-    if (isDatasourceTable) {
-      convertToDatasourceTable(catalog, tableIdent)
-    }
     assert(catalog.getTableMetadata(tableIdent).storage.locationUri.isDefined)
     assert(normalizeSerdeProp(catalog.getTableMetadata(tableIdent).storage.properties).isEmpty)
     assert(catalog.getPartition(tableIdent, partSpec).storage.locationUri.isDefined)
@@ -1171,13 +1129,13 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
   }
 
   protected def testSetSerde(isDatasourceTable: Boolean): Unit = {
+    if (!isUsingHiveMetastore) {
+      assert(isDatasourceTable, "InMemoryCatalog only supports data source tables")
+    }
     val catalog = spark.sessionState.catalog
     val tableIdent = TableIdentifier("tab1", Some("dbx"))
     createDatabase(catalog, "dbx")
-    createTable(catalog, tableIdent)
-    if (isDatasourceTable) {
-      convertToDatasourceTable(catalog, tableIdent)
-    }
+    createTable(catalog, tableIdent, isDatasourceTable)
     def checkSerdeProps(expectedSerdeProps: Map[String, String]): Unit = {
       val serdeProp = catalog.getTableMetadata(tableIdent).storage.properties
       if (isUsingHiveMetastore) {
@@ -1187,8 +1145,12 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       }
     }
     if (isUsingHiveMetastore) {
-      assert(catalog.getTableMetadata(tableIdent).storage.serde ==
-        Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
+      val expectedSerde = if (isDatasourceTable) {
+        "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"
+      } else {
+        "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"
+      }
+      assert(catalog.getTableMetadata(tableIdent).storage.serde == Some(expectedSerde))
     } else {
       assert(catalog.getTableMetadata(tableIdent).storage.serde.isEmpty)
     }
@@ -1229,18 +1191,18 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
   }
 
   protected def testSetSerdePartition(isDatasourceTable: Boolean): Unit = {
+    if (!isUsingHiveMetastore) {
+      assert(isDatasourceTable, "InMemoryCatalog only supports data source tables")
+    }
     val catalog = spark.sessionState.catalog
     val tableIdent = TableIdentifier("tab1", Some("dbx"))
     val spec = Map("a" -> "1", "b" -> "2")
     createDatabase(catalog, "dbx")
-    createTable(catalog, tableIdent)
+    createTable(catalog, tableIdent, isDatasourceTable)
     createTablePartition(catalog, spec, tableIdent)
     createTablePartition(catalog, Map("a" -> "1", "b" -> "3"), tableIdent)
     createTablePartition(catalog, Map("a" -> "2", "b" -> "2"), tableIdent)
     createTablePartition(catalog, Map("a" -> "2", "b" -> "3"), tableIdent)
-    if (isDatasourceTable) {
-      convertToDatasourceTable(catalog, tableIdent)
-    }
     def checkPartitionSerdeProps(expectedSerdeProps: Map[String, String]): Unit = {
       val serdeProp = catalog.getPartition(tableIdent, spec).storage.properties
       if (isUsingHiveMetastore) {
@@ -1250,8 +1212,12 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       }
     }
     if (isUsingHiveMetastore) {
-      assert(catalog.getPartition(tableIdent, spec).storage.serde ==
-        Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
+      val expectedSerde = if (isDatasourceTable) {
+        "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"
+      } else {
+        "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"
+      }
+      assert(catalog.getPartition(tableIdent, spec).storage.serde == Some(expectedSerde))
     } else {
       assert(catalog.getPartition(tableIdent, spec).storage.serde.isEmpty)
     }
@@ -1295,6 +1261,9 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
   }
 
   protected def testAddPartitions(isDatasourceTable: Boolean): Unit = {
+    if (!isUsingHiveMetastore) {
+      assert(isDatasourceTable, "InMemoryCatalog only supports data source tables")
+    }
     val catalog = spark.sessionState.catalog
     val tableIdent = TableIdentifier("tab1", Some("dbx"))
     val part1 = Map("a" -> "1", "b" -> "5")
@@ -1303,11 +1272,8 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     val part4 = Map("a" -> "4", "b" -> "8")
     val part5 = Map("a" -> "9", "b" -> "9")
     createDatabase(catalog, "dbx")
-    createTable(catalog, tableIdent)
+    createTable(catalog, tableIdent, isDatasourceTable)
     createTablePartition(catalog, part1, tableIdent)
-    if (isDatasourceTable) {
-      convertToDatasourceTable(catalog, tableIdent)
-    }
     assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == Set(part1))
 
     // basic add partition
@@ -1354,6 +1320,9 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
   }
 
   protected def testDropPartitions(isDatasourceTable: Boolean): Unit = {
+    if (!isUsingHiveMetastore) {
+      assert(isDatasourceTable, "InMemoryCatalog only supports data source tables")
+    }
     val catalog = spark.sessionState.catalog
     val tableIdent = TableIdentifier("tab1", Some("dbx"))
     val part1 = Map("a" -> "1", "b" -> "5")
@@ -1362,7 +1331,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     val part4 = Map("a" -> "4", "b" -> "8")
     val part5 = Map("a" -> "9", "b" -> "9")
     createDatabase(catalog, "dbx")
-    createTable(catalog, tableIdent)
+    createTable(catalog, tableIdent, isDatasourceTable)
     createTablePartition(catalog, part1, tableIdent)
     createTablePartition(catalog, part2, tableIdent)
     createTablePartition(catalog, part3, tableIdent)
@@ -1370,9 +1339,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     createTablePartition(catalog, part5, tableIdent)
     assert(catalog.listPartitions(tableIdent).map(_.spec).toSet ==
       Set(part1, part2, part3, part4, part5))
-    if (isDatasourceTable) {
-      convertToDatasourceTable(catalog, tableIdent)
-    }
 
     // basic drop partition
     sql("ALTER TABLE dbx.tab1 DROP IF EXISTS PARTITION (a='4', b='8'), PARTITION (a='3', b='7')")
@@ -1407,20 +1373,20 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
   }
 
   protected def testRenamePartitions(isDatasourceTable: Boolean): Unit = {
+    if (!isUsingHiveMetastore) {
+      assert(isDatasourceTable, "InMemoryCatalog only supports data source tables")
+    }
     val catalog = spark.sessionState.catalog
     val tableIdent = TableIdentifier("tab1", Some("dbx"))
     val part1 = Map("a" -> "1", "b" -> "q")
     val part2 = Map("a" -> "2", "b" -> "c")
     val part3 = Map("a" -> "3", "b" -> "p")
     createDatabase(catalog, "dbx")
-    createTable(catalog, tableIdent)
+    createTable(catalog, tableIdent, isDatasourceTable)
     createTablePartition(catalog, part1, tableIdent)
     createTablePartition(catalog, part2, tableIdent)
     createTablePartition(catalog, part3, tableIdent)
     assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == Set(part1, part2, part3))
-    if (isDatasourceTable) {
-      convertToDatasourceTable(catalog, tableIdent)
-    }
 
     // basic rename partition
     sql("ALTER TABLE dbx.tab1 PARTITION (a='1', b='q') RENAME TO PARTITION (a='100', b='p')")
@@ -1451,14 +1417,14 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
   }
 
   protected def testChangeColumn(isDatasourceTable: Boolean): Unit = {
+    if (!isUsingHiveMetastore) {
+      assert(isDatasourceTable, "InMemoryCatalog only supports data source tables")
+    }
     val catalog = spark.sessionState.catalog
     val resolver = spark.sessionState.conf.resolver
     val tableIdent = TableIdentifier("tab1", Some("dbx"))
     createDatabase(catalog, "dbx")
-    createTable(catalog, tableIdent)
-    if (isDatasourceTable) {
-      convertToDatasourceTable(catalog, tableIdent)
-    }
+    createTable(catalog, tableIdent, isDatasourceTable)
     def getMetadata(colName: String): Metadata = {
       val column = catalog.getTableMetadata(tableIdent).schema.fields.find { field =>
         resolver(field.name, colName)
@@ -1601,13 +1567,15 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
   }
 
   test("drop current database") {
-    sql("CREATE DATABASE temp")
-    sql("USE temp")
-    sql("DROP DATABASE temp")
-    val e = intercept[AnalysisException] {
+    withDatabase("temp") {
+      sql("CREATE DATABASE temp")
+      sql("USE temp")
+      sql("DROP DATABASE temp")
+      val e = intercept[AnalysisException] {
         sql("CREATE TABLE t (a INT, b INT) USING parquet")
       }.getMessage
-    assert(e.contains("Database 'temp' not found"))
+      assert(e.contains("Database 'temp' not found"))
+    }
   }
 
   test("drop default database") {
@@ -1837,22 +1805,25 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
         checkAnswer(spark.table("tbl"), Row(1))
         val defaultTablePath = spark.sessionState.catalog
           .getTableMetadata(TableIdentifier("tbl")).storage.locationUri.get
-
-        sql(s"ALTER TABLE tbl SET LOCATION '${dir.toURI}'")
-        spark.catalog.refreshTable("tbl")
-        // SET LOCATION won't move data from previous table path to new table path.
-        assert(spark.table("tbl").count() == 0)
-        // the previous table path should be still there.
-        assert(new File(defaultTablePath).exists())
-
-        sql("INSERT INTO tbl SELECT 2")
-        checkAnswer(spark.table("tbl"), Row(2))
-        // newly inserted data will go to the new table path.
-        assert(dir.listFiles().nonEmpty)
-
-        sql("DROP TABLE tbl")
-        // the new table path will be removed after DROP TABLE.
-        assert(!dir.exists())
+        try {
+          sql(s"ALTER TABLE tbl SET LOCATION '${dir.toURI}'")
+          spark.catalog.refreshTable("tbl")
+          // SET LOCATION won't move data from previous table path to new table path.
+          assert(spark.table("tbl").count() == 0)
+          // the previous table path should be still there.
+          assert(new File(defaultTablePath).exists())
+
+          sql("INSERT INTO tbl SELECT 2")
+          checkAnswer(spark.table("tbl"), Row(2))
+          // newly inserted data will go to the new table path.
+          assert(dir.listFiles().nonEmpty)
+
+          sql("DROP TABLE tbl")
+          // the new table path will be removed after DROP TABLE.
+          assert(!dir.exists())
+        } finally {
+          Utils.deleteRecursively(new File(defaultTablePath))
+        }
       }
     }
   }
@@ -2125,7 +2096,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
 
   Seq("a b", "a:b", "a%b").foreach { specialChars =>
     test(s"location uri contains $specialChars for database") {
-      try {
+      withDatabase ("tmpdb") {
         withTable("t") {
           withTempDir { dir =>
             val loc = new File(dir, specialChars)
@@ -2140,8 +2111,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
             assert(tblloc.listFiles().nonEmpty)
           }
         }
-      } finally {
-        spark.sql("DROP DATABASE IF EXISTS tmpdb")
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
index 44c0fc70d066b..f6d47734d7e83 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
@@ -237,7 +237,7 @@ private[sql] trait SQLTestUtils
 
     try f(dbName) finally {
       if (spark.catalog.currentDatabase == dbName) {
-        spark.sql(s"USE ${DEFAULT_DATABASE}")
+        spark.sql(s"USE $DEFAULT_DATABASE")
       }
       spark.sql(s"DROP DATABASE $dbName CASCADE")
     }
@@ -251,6 +251,7 @@ private[sql] trait SQLTestUtils
       dbNames.foreach { name =>
         spark.sql(s"DROP DATABASE IF EXISTS $name")
       }
+      spark.sql(s"USE $DEFAULT_DATABASE")
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 16a99321bad33..341e03b5e57fb 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.execution.command.{DDLSuite, DDLUtils}
 import org.apache.spark.sql.hive.HiveExternalCatalog
 import org.apache.spark.sql.hive.orc.OrcFileOperator
 import org.apache.spark.sql.hive.test.TestHiveSingleton
-import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types._
@@ -50,15 +50,28 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA
 
   protected override def generateTable(
       catalog: SessionCatalog,
-      name: TableIdentifier): CatalogTable = {
+      name: TableIdentifier,
+      isDataSource: Boolean): CatalogTable = {
     val storage =
-      CatalogStorageFormat(
-        locationUri = Some(catalog.defaultTablePath(name)),
-        inputFormat = Some("org.apache.hadoop.mapred.SequenceFileInputFormat"),
-        outputFormat = Some("org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat"),
-        serde = Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"),
-        compressed = false,
-        properties = Map("serialization.format" -> "1"))
+      if (isDataSource) {
+        val serde = HiveSerDe.sourceToSerDe("parquet")
+        assert(serde.isDefined, "The default format is not Hive compatible")
+        CatalogStorageFormat(
+          locationUri = Some(catalog.defaultTablePath(name)),
+          inputFormat = serde.get.inputFormat,
+          outputFormat = serde.get.outputFormat,
+          serde = serde.get.serde,
+          compressed = false,
+          properties = Map("serialization.format" -> "1"))
+      } else {
+        CatalogStorageFormat(
+          locationUri = Some(catalog.defaultTablePath(name)),
+          inputFormat = Some("org.apache.hadoop.mapred.SequenceFileInputFormat"),
+          outputFormat = Some("org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat"),
+          serde = Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"),
+          compressed = false,
+          properties = Map("serialization.format" -> "1"))
+      }
     val metadata = new MetadataBuilder()
       .putString("key", "value")
       .build()
@@ -71,7 +84,7 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA
         .add("col2", "string")
         .add("a", "int")
         .add("b", "int"),
-      provider = Some("hive"),
+      provider = if (isDataSource) Some("parquet") else Some("hive"),
       partitionColumnNames = Seq("a", "b"),
       createTime = 0L,
       tracksPartitionsInCatalog = true)
@@ -107,6 +120,46 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA
     )
   }
 
+  test("alter table: set location") {
+    testSetLocation(isDatasourceTable = false)
+  }
+
+  test("alter table: set properties") {
+    testSetProperties(isDatasourceTable = false)
+  }
+
+  test("alter table: unset properties") {
+    testUnsetProperties(isDatasourceTable = false)
+  }
+
+  test("alter table: set serde") {
+    testSetSerde(isDatasourceTable = false)
+  }
+
+  test("alter table: set serde partition") {
+    testSetSerdePartition(isDatasourceTable = false)
+  }
+
+  test("alter table: change column") {
+    testChangeColumn(isDatasourceTable = false)
+  }
+
+  test("alter table: rename partition") {
+    testRenamePartitions(isDatasourceTable = false)
+  }
+
+  test("alter table: drop partition") {
+    testDropPartitions(isDatasourceTable = false)
+  }
+
+  test("alter table: add partition") {
+    testAddPartitions(isDatasourceTable = false)
+  }
+
+  test("drop table") {
+    testDropTable(isDatasourceTable = false)
+  }
+
 }
 
 class HiveDDLSuite

From 871b073b9983d1f04d71266de170be13e9fb8440 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Tue, 2 May 2017 14:30:06 -0700
Subject: [PATCH 0755/1204] [SPARK-20421][CORE] Add a missing deprecation tag.

In the previous patch I deprecated StorageStatus, but not the
method in SparkContext that exposes that class publicly. So deprecate
the method too.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #17824 from vanzin/SPARK-20421.

(cherry picked from commit ef3df9125a30f8fb817fe855b74d7130be45b0ee)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 core/src/main/scala/org/apache/spark/SparkContext.scala | 1 +
 1 file changed, 1 insertion(+)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 0ec1bdd39b2f5..f7c32e5f0cec5 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1734,6 +1734,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * Return information about blocks stored in all of the slaves
    */
   @DeveloperApi
+  @deprecated("This method may change or be removed in a future release.", "2.2.0")
   def getExecutorStorageStatus: Array[StorageStatus] = {
     assertNotStopped()
     env.blockManager.master.getStorageStatus

From 14454ddc7ec87200f4ce194046f4b3dd2b3e0066 Mon Sep 17 00:00:00 2001
From: Mark Hamstra <markhamstra@gmail.com>
Date: Tue, 2 May 2017 15:40:11 -0700
Subject: [PATCH 0756/1204] wip

---
 .../sql/catalyst/catalog/ExternalCatalog.scala  |  6 ++++++
 .../catalyst/catalog/ExternalCatalogUtils.scala | 17 ++++++++++++++++-
 .../sql/catalyst/catalog/InMemoryCatalog.scala  |  8 +++++++-
 .../org/apache/spark/sql/SparkSession.scala     | 13 +++++++++++++
 .../spark/sql/hive/HiveExternalCatalog.scala    |  2 +-
 .../org/apache/spark/sql/hive/TableReader.scala | 14 --------------
 6 files changed, 43 insertions(+), 17 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
index 5e83163209175..cc6606954e420 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst.catalog
 
 import org.apache.spark.sql.catalyst.analysis.{FunctionAlreadyExistsException, NoSuchDatabaseException, NoSuchFunctionException, NoSuchTableException}
+import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.HadoopFileSelector
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.types.StructType
 
@@ -268,4 +269,9 @@ abstract class ExternalCatalog {
 
   def listFunctions(db: String, pattern: String): Seq[String]
 
+  def setTableNamePreprocessor(newTableNamePreprocessor: (String) => String): Unit
+
+  def setHadoopFileSelector(hadoopFileSelector: HadoopFileSelector): Unit
+
+  def unsetHadoopFileSelector(): Unit
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
index 6f2a11748b1a5..eec665ce5f2fb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.catalog
 
-import org.apache.hadoop.fs.Path
+import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hadoop.util.Shell
 
 import org.apache.spark.sql.AnalysisException
@@ -123,6 +123,21 @@ object ExternalCatalogUtils {
     }
     escapePathName(col) + "=" + partitionString
   }
+
+  abstract class HadoopFileSelector {
+    /**
+     * Select files constituting a table from the given base path according to the client's custom
+     * algorithm. This is only applied to non-partitioned tables.
+     * @param tableName table name to select files for. This is the exact table name specified
+     *                  in the query, not a "preprocessed" file name returned by the user-defined
+     *                  function registered via [[SparkSession.setTableNamePreprocessor]].
+     * @param fs the filesystem containing the table
+     * @param basePath base path of the table in the filesystem
+     * @return a set of files, or [[None]] if the custom file selection algorithm does not apply
+     *         to this table.
+     */
+    def selectFiles(tableName: String, fs: FileSystem, basePath: Path): Option[Seq[Path]]
+  }
 }
 
 object CatalogUtils {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
index d700634e3c2af..dad814fd48024 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
@@ -28,7 +28,7 @@ import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.escapePathName
+import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.{escapePathName, HadoopFileSelector}
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.util.StringUtils
 import org.apache.spark.sql.types.StructType
@@ -599,4 +599,10 @@ class InMemoryCatalog(
     StringUtils.filterPattern(catalog(db).functions.keysIterator.toSeq, pattern)
   }
 
+  override def setTableNamePreprocessor(newTableNamePreprocessor: (String) => String): Unit = {}
+
+  override def setHadoopFileSelector(hadoopFileSelector: HadoopFileSelector): Unit = {}
+
+  override def unsetHadoopFileSelector(): Unit = {}
+
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index f3dde480eabe0..d936f709dd249 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -34,6 +34,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationEnd}
 import org.apache.spark.sql.catalog.Catalog
 import org.apache.spark.sql.catalyst._
+import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.HadoopFileSelector
 import org.apache.spark.sql.catalyst.encoders._
 import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Range}
@@ -578,6 +579,18 @@ class SparkSession private(
     Dataset.ofRows(self, sessionState.catalog.lookupRelation(tableIdent))
   }
 
+  def setTableNamePreprocessor(newTableNamePreprocessor: (String) => String): Unit = {
+    sharedState.externalCatalog.setTableNamePreprocessor(newTableNamePreprocessor)
+  }
+
+  def setHadoopFileSelector(hadoopFileSelector: HadoopFileSelector): Unit = {
+    sharedState.externalCatalog.setHadoopFileSelector(hadoopFileSelector)
+  }
+
+  def unsetHadoopFileSelector(): Unit = {
+    sharedState.externalCatalog.unsetHadoopFileSelector()
+  }
+
   /* ----------------- *
    |  Everything else  |
    * ----------------- */
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 85071dcb2d7bb..c46ab0986a447 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 import org.apache.spark.sql.catalyst.catalog._
-import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.escapePathName
+import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.{escapePathName, HadoopFileSelector}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, Statistics}
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
index 5e1db00368ad8..4951da8c6aa72 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -453,17 +453,3 @@ private[hive] object HadoopTableReader extends HiveInspectors with Logging {
   }
 }
 
-abstract class HadoopFileSelector {
-  /**
-   * Select files constituting a table from the given base path according to the client's custom
-   * algorithm. This is only applied to non-partitioned tables.
-   * @param tableName table name to select files for. This is the exact table name specified
-   *                  in the query, not a "preprocessed" file name returned by the user-defined
-   *                  function registered via [[HiveExternalCatalog.setTableNamePreprocessor]].
-   * @param fs the filesystem containing the table
-   * @param basePath base path of the table in the filesystem
-   * @return a set of files, or [[None]] if the custom file selection algorithm does not apply
-   *         to this table.
-   */
-  def selectFiles(tableName: String, fs: FileSystem, basePath: Path): Option[Seq[Path]]
-}

From 4d1f265df460e3784562674e6bf9414b82e0d819 Mon Sep 17 00:00:00 2001
From: Mark Hamstra <markhamstra@gmail.com>
Date: Tue, 2 May 2017 15:40:33 -0700
Subject: [PATCH 0757/1204] 2.1.1 version bump

---
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/java8-tests/pom.xml              | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mesos/pom.xml                             | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 repl/pom.xml                              | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 yarn/pom.xml                              | 2 +-
 36 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/assembly/pom.xml b/assembly/pom.xml
index b6495acb0a1cf..ca7a51d44b2cf 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 6fa592b78a30c..49f533da2e6d8 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 0386f29d59807..7d75ce2ac6945 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index b7c9752c49072..d76885abf369b 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 172cf23b7f56a..08647abcb5c47 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index b94e839072607..c2054e5ceb322 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 2e44efc0fb6ce..4ee19a1a3ab5b 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index 3f230c60c701d..5d3ad181eeabb 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/examples/pom.xml b/examples/pom.xml
index 12e10225d6b24..90255e35841a1 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 42ea1c523bbb4..7a44697105642 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index c56477a6f7f98..cc0f527e347ba 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 9e954ce6fc946..e47286992a4f0 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index d11784a989ec9..e8aa2d54055a9 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/java8-tests/pom.xml b/external/java8-tests/pom.xml
index 814883b5fb63a..eb8c99376e76e 100644
--- a/external/java8-tests/pom.xml
+++ b/external/java8-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index 085d7b7b036d0..85e4db0ce5d74 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index fea06d9c91e84..8e1036332b32f 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index b6ddf7037e554..3dd52ee066541 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index 05016233e8126..7fd6d05205e93 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index 6d800f574673d..a2dc6c90f1992 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index e2901fbd59cbe..1e6c9f8943a13 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index 08bb82df43e07..eff86064b6591 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 22891753a1894..5377127924ec7 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 33ab9ca9f6ee9..a56e892fed7ad 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index e5c2551784c02..904fbbd0f2821 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mesos/pom.xml b/mesos/pom.xml
index 9b9bad34d05ca..925b79744cb9c 100644
--- a/mesos/pom.xml
+++ b/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index dfbf591932c13..2164b62707f5b 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index e84b3bea245ad..0f8a0622ed936 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 4e93eef424ec1..046c7ad690221 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.1.0-csd-1-SNAPSHOT</version>
+  <version>2.1.1-csd-1-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/repl/pom.xml b/repl/pom.xml
index 18d43dd4fa0ae..ee037c90904e7 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index ca6897a3e7551..042886ea02a15 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 8551cca7633a0..545cbb8c84390 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 4e3b2259504e1..fbcca00c1ad79 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 0e45de5305978..483fbef1bb829 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index e07624950820a..bf5085d1215e9 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index dec44d94fcfdc..94884c334d6ae 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/yarn/pom.xml b/yarn/pom.xml
index f748f8c9b8259..19bdeff0e771e 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.1.0-csd-1-SNAPSHOT</version>
+    <version>2.1.1-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From c199764babc874be153dee4056d4eab755bb002c Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 3 May 2017 10:08:46 +0800
Subject: [PATCH 0758/1204] [SPARK-20558][CORE] clear InheritableThreadLocal
 variables in SparkContext when stopping it

## What changes were proposed in this pull request?

To better understand this problem, let's take a look at an example first:
```
object Main {
  def main(args: Array[String]): Unit = {
    var t = new Test
    new Thread(new Runnable {
      override def run() = {}
    }).start()
    println("first thread finished")

    t.a = null
    t = new Test
    new Thread(new Runnable {
      override def run() = {}
    }).start()
  }

}

class Test {
  var a = new InheritableThreadLocal[String] {
    override protected def childValue(parent: String): String = {
      println("parent value is: " + parent)
      parent
    }
  }
  a.set("hello")
}
```
The result is:
```
parent value is: hello
first thread finished
parent value is: hello
parent value is: hello
```

Once an `InheritableThreadLocal` has been set value, child threads will inherit its value as long as it has not been GCed, so setting the variable which holds the `InheritableThreadLocal` to `null` doesn't work as we expected.

In `SparkContext`, we have an `InheritableThreadLocal` for local properties, we should clear it when stopping `SparkContext`, or all the future child threads will still inherit it and copy the properties and waste memory.

This is the root cause of https://issues.apache.org/jira/browse/SPARK-20548 , which creates/stops `SparkContext` many times and finally have a lot of `InheritableThreadLocal` alive, and cause OOM when starting new threads in the internal thread pools.

## How was this patch tested?

N/A

Author: Wenchen Fan <wenchen@databricks.com>

Closes #17833 from cloud-fan/core.

(cherry picked from commit b946f3160eb7953fb30edf1f097ea87be75b33e7)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 core/src/main/scala/org/apache/spark/SparkContext.scala | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index f7c32e5f0cec5..7dbceb9c5c1a3 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1939,6 +1939,9 @@ class SparkContext(config: SparkConf) extends Logging {
       }
       SparkEnv.set(null)
     }
+    // Clear this `InheritableThreadLocal`, or it will still be inherited in child threads even this
+    // `SparkContext` is stopped.
+    localProperties.remove()
     // Unset YARN mode system env variable, to allow switching between cluster types.
     System.clearProperty("SPARK_YARN_MODE")
     SparkContext.clearActiveContext()

From d10b0f65459a66c15418ed79285f41d947198164 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 3 May 2017 10:08:46 +0800
Subject: [PATCH 0759/1204] [SPARK-20558][CORE] clear InheritableThreadLocal
 variables in SparkContext when stopping it

## What changes were proposed in this pull request?

To better understand this problem, let's take a look at an example first:
```
object Main {
  def main(args: Array[String]): Unit = {
    var t = new Test
    new Thread(new Runnable {
      override def run() = {}
    }).start()
    println("first thread finished")

    t.a = null
    t = new Test
    new Thread(new Runnable {
      override def run() = {}
    }).start()
  }

}

class Test {
  var a = new InheritableThreadLocal[String] {
    override protected def childValue(parent: String): String = {
      println("parent value is: " + parent)
      parent
    }
  }
  a.set("hello")
}
```
The result is:
```
parent value is: hello
first thread finished
parent value is: hello
parent value is: hello
```

Once an `InheritableThreadLocal` has been set value, child threads will inherit its value as long as it has not been GCed, so setting the variable which holds the `InheritableThreadLocal` to `null` doesn't work as we expected.

In `SparkContext`, we have an `InheritableThreadLocal` for local properties, we should clear it when stopping `SparkContext`, or all the future child threads will still inherit it and copy the properties and waste memory.

This is the root cause of https://issues.apache.org/jira/browse/SPARK-20548 , which creates/stops `SparkContext` many times and finally have a lot of `InheritableThreadLocal` alive, and cause OOM when starting new threads in the internal thread pools.

## How was this patch tested?

N/A

Author: Wenchen Fan <wenchen@databricks.com>

Closes #17833 from cloud-fan/core.

(cherry picked from commit b946f3160eb7953fb30edf1f097ea87be75b33e7)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 core/src/main/scala/org/apache/spark/SparkContext.scala | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 6e24656e4635d..cf03ae5456358 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1850,6 +1850,9 @@ class SparkContext(config: SparkConf) extends Logging {
       }
       SparkEnv.set(null)
     }
+    // Clear this `InheritableThreadLocal`, or it will still be inherited in child threads even this
+    // `SparkContext` is stopped.
+    localProperties.remove()
     // Unset YARN mode system env variable, to allow switching between cluster types.
     System.clearProperty("SPARK_YARN_MODE")
     SparkContext.clearActiveContext()

From c80242ab9c3dfab0341c499075bb302d590c9ed7 Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Tue, 2 May 2017 22:44:27 -0700
Subject: [PATCH 0760/1204] [SPARK-20567] Lazily bind in GenerateExec

It is not valid to eagerly bind with the child's output as this causes failures when we attempt to canonicalize the plan (replacing the attribute references with dummies).

Author: Michael Armbrust <michael@databricks.com>

Closes #17838 from marmbrus/fixBindExplode.

(cherry picked from commit 6235132a8ce64bb12d825d0a65e5dd052d1ee647)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../spark/sql/execution/GenerateExec.scala       |  2 +-
 .../streaming/StreamingAggregationSuite.scala    | 16 ++++++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala
index 1812a1152cb48..c35e5638e9273 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala
@@ -78,7 +78,7 @@ case class GenerateExec(
 
   override def outputPartitioning: Partitioning = child.outputPartitioning
 
-  val boundGenerator: Generator = BindReferences.bindReference(generator, child.output)
+  lazy val boundGenerator: Generator = BindReferences.bindReference(generator, child.output)
 
   protected override def doExecute(): RDD[InternalRow] = {
     // boundGenerator.terminate() should be triggered after all of the rows in the partition
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
index f796a4cb4a398..4345a70601c34 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
@@ -69,6 +69,22 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with BeforeAndAfte
     )
   }
 
+  test("count distinct") {
+    val inputData = MemoryStream[(Int, Seq[Int])]
+
+    val aggregated =
+      inputData.toDF()
+        .select($"*", explode($"_2") as 'value)
+        .groupBy($"_1")
+        .agg(size(collect_set($"value")))
+        .as[(Int, Int)]
+
+    testStream(aggregated, Update)(
+      AddData(inputData, (1, Seq(1, 2))),
+      CheckLastBatch((1, 2))
+    )
+  }
+
   test("simple count, complete mode") {
     val inputData = MemoryStream[Int]
 

From 4f647ab66353b136e4fdf02587ebbd88ce5c5b5f Mon Sep 17 00:00:00 2001
From: MechCoder <manojkumarsivaraj334@gmail.com>
Date: Wed, 3 May 2017 10:58:05 +0200
Subject: [PATCH 0761/1204] [SPARK-6227][MLLIB][PYSPARK] Implement PySpark
 wrappers for SVD and PCA (v2)

Add PCA and SVD to PySpark's wrappers for `RowMatrix` and `IndexedRowMatrix` (SVD only).

Based on #7963, updated.

## How was this patch tested?

New doc tests and unit tests. Ran all examples locally.

Author: MechCoder <manojkumarsivaraj334@gmail.com>
Author: Nick Pentreath <nickp@za.ibm.com>

Closes #17621 from MLnick/SPARK-6227-pyspark-svd-pca.

(cherry picked from commit db2fb84b4a3c45daa449cc9232340193ce8eb37d)
Signed-off-by: Nick Pentreath <nickp@za.ibm.com>
---
 docs/mllib-dimensionality-reduction.md        |  29 +--
 .../spark/examples/mllib/JavaPCAExample.java  |  27 ++-
 .../spark/examples/mllib/JavaSVDExample.java  |  27 +--
 .../python/mllib/pca_rowmatrix_example.py     |  46 ++++
 examples/src/main/python/mllib/svd_example.py |  48 +++++
 .../mllib/PCAOnRowMatrixExample.scala         |   4 +-
 .../spark/examples/mllib/SVDExample.scala     |  11 +-
 python/pyspark/mllib/linalg/distributed.py    | 199 +++++++++++++++++-
 python/pyspark/mllib/tests.py                 |  63 ++++++
 9 files changed, 408 insertions(+), 46 deletions(-)
 create mode 100644 examples/src/main/python/mllib/pca_rowmatrix_example.py
 create mode 100644 examples/src/main/python/mllib/svd_example.py

diff --git a/docs/mllib-dimensionality-reduction.md b/docs/mllib-dimensionality-reduction.md
index 539cbc1b3163a..a72680d52a26c 100644
--- a/docs/mllib-dimensionality-reduction.md
+++ b/docs/mllib-dimensionality-reduction.md
@@ -76,13 +76,14 @@ Refer to the [`SingularValueDecomposition` Java docs](api/java/org/apache/spark/
 
 The same code applies to `IndexedRowMatrix` if `U` is defined as an
 `IndexedRowMatrix`.
+</div>
+<div data-lang="python" markdown="1">
+Refer to the [`SingularValueDecomposition` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.linalg.distributed.SingularValueDecomposition) for details on the API.
 
-In order to run the above application, follow the instructions
-provided in the [Self-Contained
-Applications](quick-start.html#self-contained-applications) section of the Spark
-quick-start guide. Be sure to also include *spark-mllib* to your build file as
-a dependency.
+{% include_example python/mllib/svd_example.py %}
 
+The same code applies to `IndexedRowMatrix` if `U` is defined as an
+`IndexedRowMatrix`.
 </div>
 </div>
 
@@ -118,17 +119,21 @@ Refer to the [`PCA` Scala docs](api/scala/index.html#org.apache.spark.mllib.feat
 
 The following code demonstrates how to compute principal components on a `RowMatrix`
 and use them to project the vectors into a low-dimensional space.
-The number of columns should be small, e.g, less than 1000.
 
 Refer to the [`RowMatrix` Java docs](api/java/org/apache/spark/mllib/linalg/distributed/RowMatrix.html) for details on the API.
 
 {% include_example java/org/apache/spark/examples/mllib/JavaPCAExample.java %}
 
 </div>
-</div>
 
-In order to run the above application, follow the instructions
-provided in the [Self-Contained Applications](quick-start.html#self-contained-applications)
-section of the Spark
-quick-start guide. Be sure to also include *spark-mllib* to your build file as
-a dependency.
+<div data-lang="python" markdown="1">
+
+The following code demonstrates how to compute principal components on a `RowMatrix`
+and use them to project the vectors into a low-dimensional space.
+
+Refer to the [`RowMatrix` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.linalg.distributed.RowMatrix) for details on the API.
+
+{% include_example python/mllib/pca_rowmatrix_example.py %}
+
+</div>
+</div>
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaPCAExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaPCAExample.java
index 3077f557ef886..0a7dc621e1110 100644
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaPCAExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaPCAExample.java
@@ -18,7 +18,8 @@
 package org.apache.spark.examples.mllib;
 
 // $example on$
-import java.util.LinkedList;
+import java.util.Arrays;
+import java.util.List;
 // $example off$
 
 import org.apache.spark.SparkConf;
@@ -39,21 +40,25 @@ public class JavaPCAExample {
   public static void main(String[] args) {
     SparkConf conf = new SparkConf().setAppName("PCA Example");
     SparkContext sc = new SparkContext(conf);
+    JavaSparkContext jsc = JavaSparkContext.fromSparkContext(sc);
 
     // $example on$
-    double[][] array = {{1.12, 2.05, 3.12}, {5.56, 6.28, 8.94}, {10.2, 8.0, 20.5}};
-    LinkedList<Vector> rowsList = new LinkedList<>();
-    for (int i = 0; i < array.length; i++) {
-      Vector currentRow = Vectors.dense(array[i]);
-      rowsList.add(currentRow);
-    }
-    JavaRDD<Vector> rows = JavaSparkContext.fromSparkContext(sc).parallelize(rowsList);
+    List<Vector> data = Arrays.asList(
+            Vectors.sparse(5, new int[] {1, 3}, new double[] {1.0, 7.0}),
+            Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0),
+            Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0)
+    );
+
+    JavaRDD<Vector> rows = jsc.parallelize(data);
 
     // Create a RowMatrix from JavaRDD<Vector>.
     RowMatrix mat = new RowMatrix(rows.rdd());
 
-    // Compute the top 3 principal components.
-    Matrix pc = mat.computePrincipalComponents(3);
+    // Compute the top 4 principal components.
+    // Principal components are stored in a local dense matrix.
+    Matrix pc = mat.computePrincipalComponents(4);
+
+    // Project the rows to the linear space spanned by the top 4 principal components.
     RowMatrix projected = mat.multiply(pc);
     // $example off$
     Vector[] collectPartitions = (Vector[])projected.rows().collect();
@@ -61,6 +66,6 @@ public static void main(String[] args) {
     for (Vector vector : collectPartitions) {
       System.out.println("\t" + vector);
     }
-    sc.stop();
+    jsc.stop();
   }
 }
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaSVDExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaSVDExample.java
index 3730e60f68803..802be3960a337 100644
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaSVDExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaSVDExample.java
@@ -18,7 +18,8 @@
 package org.apache.spark.examples.mllib;
 
 // $example on$
-import java.util.LinkedList;
+import java.util.Arrays;
+import java.util.List;
 // $example off$
 
 import org.apache.spark.SparkConf;
@@ -43,22 +44,22 @@ public static void main(String[] args) {
     JavaSparkContext jsc = JavaSparkContext.fromSparkContext(sc);
 
     // $example on$
-    double[][] array = {{1.12, 2.05, 3.12}, {5.56, 6.28, 8.94}, {10.2, 8.0, 20.5}};
-    LinkedList<Vector> rowsList = new LinkedList<>();
-    for (int i = 0; i < array.length; i++) {
-      Vector currentRow = Vectors.dense(array[i]);
-      rowsList.add(currentRow);
-    }
-    JavaRDD<Vector> rows = jsc.parallelize(rowsList);
+    List<Vector> data = Arrays.asList(
+            Vectors.sparse(5, new int[] {1, 3}, new double[] {1.0, 7.0}),
+            Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0),
+            Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0)
+    );
+
+    JavaRDD<Vector> rows = jsc.parallelize(data);
 
     // Create a RowMatrix from JavaRDD<Vector>.
     RowMatrix mat = new RowMatrix(rows.rdd());
 
-    // Compute the top 3 singular values and corresponding singular vectors.
-    SingularValueDecomposition<RowMatrix, Matrix> svd = mat.computeSVD(3, true, 1.0E-9d);
-    RowMatrix U = svd.U();
-    Vector s = svd.s();
-    Matrix V = svd.V();
+    // Compute the top 5 singular values and corresponding singular vectors.
+    SingularValueDecomposition<RowMatrix, Matrix> svd = mat.computeSVD(5, true, 1.0E-9d);
+    RowMatrix U = svd.U();  // The U factor is a RowMatrix.
+    Vector s = svd.s();     // The singular values are stored in a local dense vector.
+    Matrix V = svd.V();     // The V factor is a local dense matrix.
     // $example off$
     Vector[] collectPartitions = (Vector[]) U.rows().collect();
     System.out.println("U factor is:");
diff --git a/examples/src/main/python/mllib/pca_rowmatrix_example.py b/examples/src/main/python/mllib/pca_rowmatrix_example.py
new file mode 100644
index 0000000000000..49b9b1bbe08e9
--- /dev/null
+++ b/examples/src/main/python/mllib/pca_rowmatrix_example.py
@@ -0,0 +1,46 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.linalg import Vectors
+from pyspark.mllib.linalg.distributed import RowMatrix
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="PythonPCAOnRowMatrixExample")
+
+    # $example on$
+    rows = sc.parallelize([
+        Vectors.sparse(5, {1: 1.0, 3: 7.0}),
+        Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0),
+        Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0)
+    ])
+
+    mat = RowMatrix(rows)
+    # Compute the top 4 principal components.
+    # Principal components are stored in a local dense matrix.
+    pc = mat.computePrincipalComponents(4)
+
+    # Project the rows to the linear space spanned by the top 4 principal components.
+    projected = mat.multiply(pc)
+    # $example off$
+    collected = projected.rows.collect()
+    print("Projected Row Matrix of principal component:")
+    for vector in collected:
+        print(vector)
+    sc.stop()
diff --git a/examples/src/main/python/mllib/svd_example.py b/examples/src/main/python/mllib/svd_example.py
new file mode 100644
index 0000000000000..5b220fdb3fd67
--- /dev/null
+++ b/examples/src/main/python/mllib/svd_example.py
@@ -0,0 +1,48 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark import SparkContext
+# $example on$
+from pyspark.mllib.linalg import Vectors
+from pyspark.mllib.linalg.distributed import RowMatrix
+# $example off$
+
+if __name__ == "__main__":
+    sc = SparkContext(appName="PythonSVDExample")
+
+    # $example on$
+    rows = sc.parallelize([
+        Vectors.sparse(5, {1: 1.0, 3: 7.0}),
+        Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0),
+        Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0)
+    ])
+
+    mat = RowMatrix(rows)
+
+    # Compute the top 5 singular values and corresponding singular vectors.
+    svd = mat.computeSVD(5, computeU=True)
+    U = svd.U       # The U factor is a RowMatrix.
+    s = svd.s       # The singular values are stored in a local dense vector.
+    V = svd.V       # The V factor is a local dense matrix.
+    # $example off$
+    collected = U.rows.collect()
+    print("U factor is:")
+    for vector in collected:
+        print(vector)
+    print("Singular values are: %s" % s)
+    print("V factor is:\n%s" % V)
+    sc.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/PCAOnRowMatrixExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/PCAOnRowMatrixExample.scala
index a137ba2a2f9d3..da43a8d9c7e80 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/PCAOnRowMatrixExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/PCAOnRowMatrixExample.scala
@@ -39,9 +39,9 @@ object PCAOnRowMatrixExample {
       Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0),
       Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0))
 
-    val dataRDD = sc.parallelize(data, 2)
+    val rows = sc.parallelize(data)
 
-    val mat: RowMatrix = new RowMatrix(dataRDD)
+    val mat: RowMatrix = new RowMatrix(rows)
 
     // Compute the top 4 principal components.
     // Principal components are stored in a local dense matrix.
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/SVDExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/SVDExample.scala
index b286a3f7b9096..769ae2a3a88b1 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/SVDExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/SVDExample.scala
@@ -28,6 +28,9 @@ import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.linalg.distributed.RowMatrix
 // $example off$
 
+/**
+ * Example for SingularValueDecomposition.
+ */
 object SVDExample {
 
   def main(args: Array[String]): Unit = {
@@ -41,15 +44,15 @@ object SVDExample {
       Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0),
       Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0))
 
-    val dataRDD = sc.parallelize(data, 2)
+    val rows = sc.parallelize(data)
 
-    val mat: RowMatrix = new RowMatrix(dataRDD)
+    val mat: RowMatrix = new RowMatrix(rows)
 
     // Compute the top 5 singular values and corresponding singular vectors.
     val svd: SingularValueDecomposition[RowMatrix, Matrix] = mat.computeSVD(5, computeU = true)
     val U: RowMatrix = svd.U  // The U factor is a RowMatrix.
-    val s: Vector = svd.s  // The singular values are stored in a local dense vector.
-    val V: Matrix = svd.V  // The V factor is a local dense matrix.
+    val s: Vector = svd.s     // The singular values are stored in a local dense vector.
+    val V: Matrix = svd.V     // The V factor is a local dense matrix.
     // $example off$
     val collect = U.rows.collect()
     println("U factor is:")
diff --git a/python/pyspark/mllib/linalg/distributed.py b/python/pyspark/mllib/linalg/distributed.py
index 600655c912ca6..4cb802514be52 100644
--- a/python/pyspark/mllib/linalg/distributed.py
+++ b/python/pyspark/mllib/linalg/distributed.py
@@ -28,14 +28,13 @@
 
 from pyspark import RDD, since
 from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper
-from pyspark.mllib.linalg import _convert_to_vector, Matrix, QRDecomposition
+from pyspark.mllib.linalg import _convert_to_vector, DenseMatrix, Matrix, QRDecomposition
 from pyspark.mllib.stat import MultivariateStatisticalSummary
 from pyspark.storagelevel import StorageLevel
 
 
-__all__ = ['DistributedMatrix', 'RowMatrix', 'IndexedRow',
-           'IndexedRowMatrix', 'MatrixEntry', 'CoordinateMatrix',
-           'BlockMatrix']
+__all__ = ['BlockMatrix', 'CoordinateMatrix', 'DistributedMatrix', 'IndexedRow',
+           'IndexedRowMatrix', 'MatrixEntry', 'RowMatrix', 'SingularValueDecomposition']
 
 
 class DistributedMatrix(object):
@@ -301,6 +300,136 @@ def tallSkinnyQR(self, computeQ=False):
         R = decomp.call("R")
         return QRDecomposition(Q, R)
 
+    @since('2.2.0')
+    def computeSVD(self, k, computeU=False, rCond=1e-9):
+        """
+        Computes the singular value decomposition of the RowMatrix.
+
+        The given row matrix A of dimension (m X n) is decomposed into
+        U * s * V'T where
+
+        * U: (m X k) (left singular vectors) is a RowMatrix whose
+             columns are the eigenvectors of (A X A')
+        * s: DenseVector consisting of square root of the eigenvalues
+             (singular values) in descending order.
+        * v: (n X k) (right singular vectors) is a Matrix whose columns
+             are the eigenvectors of (A' X A)
+
+        For more specific details on implementation, please refer
+        the Scala documentation.
+
+        :param k: Number of leading singular values to keep (`0 < k <= n`).
+                  It might return less than k if there are numerically zero singular values
+                  or there are not enough Ritz values converged before the maximum number of
+                  Arnoldi update iterations is reached (in case that matrix A is ill-conditioned).
+        :param computeU: Whether or not to compute U. If set to be
+                         True, then U is computed by A * V * s^-1
+        :param rCond: Reciprocal condition number. All singular values
+                      smaller than rCond * s[0] are treated as zero
+                      where s[0] is the largest singular value.
+        :returns: :py:class:`SingularValueDecomposition`
+
+        >>> rows = sc.parallelize([[3, 1, 1], [-1, 3, 1]])
+        >>> rm = RowMatrix(rows)
+
+        >>> svd_model = rm.computeSVD(2, True)
+        >>> svd_model.U.rows.collect()
+        [DenseVector([-0.7071, 0.7071]), DenseVector([-0.7071, -0.7071])]
+        >>> svd_model.s
+        DenseVector([3.4641, 3.1623])
+        >>> svd_model.V
+        DenseMatrix(3, 2, [-0.4082, -0.8165, -0.4082, 0.8944, -0.4472, 0.0], 0)
+        """
+        j_model = self._java_matrix_wrapper.call(
+            "computeSVD", int(k), bool(computeU), float(rCond))
+        return SingularValueDecomposition(j_model)
+
+    @since('2.2.0')
+    def computePrincipalComponents(self, k):
+        """
+        Computes the k principal components of the given row matrix
+
+        .. note:: This cannot be computed on matrices with more than 65535 columns.
+
+        :param k: Number of principal components to keep.
+        :returns: :py:class:`pyspark.mllib.linalg.DenseMatrix`
+
+        >>> rows = sc.parallelize([[1, 2, 3], [2, 4, 5], [3, 6, 1]])
+        >>> rm = RowMatrix(rows)
+
+        >>> # Returns the two principal components of rm
+        >>> pca = rm.computePrincipalComponents(2)
+        >>> pca
+        DenseMatrix(3, 2, [-0.349, -0.6981, 0.6252, -0.2796, -0.5592, -0.7805], 0)
+
+        >>> # Transform into new dimensions with the greatest variance.
+        >>> rm.multiply(pca).rows.collect() # doctest: +NORMALIZE_WHITESPACE
+        [DenseVector([0.1305, -3.7394]), DenseVector([-0.3642, -6.6983]), \
+        DenseVector([-4.6102, -4.9745])]
+        """
+        return self._java_matrix_wrapper.call("computePrincipalComponents", k)
+
+    @since('2.2.0')
+    def multiply(self, matrix):
+        """
+        Multiply this matrix by a local dense matrix on the right.
+
+        :param matrix: a local dense matrix whose number of rows must match the number of columns
+                       of this matrix
+        :returns: :py:class:`RowMatrix`
+
+        >>> rm = RowMatrix(sc.parallelize([[0, 1], [2, 3]]))
+        >>> rm.multiply(DenseMatrix(2, 2, [0, 2, 1, 3])).rows.collect()
+        [DenseVector([2.0, 3.0]), DenseVector([6.0, 11.0])]
+        """
+        if not isinstance(matrix, DenseMatrix):
+            raise ValueError("Only multiplication with DenseMatrix "
+                             "is supported.")
+        j_model = self._java_matrix_wrapper.call("multiply", matrix)
+        return RowMatrix(j_model)
+
+
+class SingularValueDecomposition(JavaModelWrapper):
+    """
+    Represents singular value decomposition (SVD) factors.
+
+    .. versionadded:: 2.2.0
+    """
+
+    @property
+    @since('2.2.0')
+    def U(self):
+        """
+        Returns a distributed matrix whose columns are the left
+        singular vectors of the SingularValueDecomposition if computeU was set to be True.
+        """
+        u = self.call("U")
+        if u is not None:
+            mat_name = u.getClass().getSimpleName()
+            if mat_name == "RowMatrix":
+                return RowMatrix(u)
+            elif mat_name == "IndexedRowMatrix":
+                return IndexedRowMatrix(u)
+            else:
+                raise TypeError("Expected RowMatrix/IndexedRowMatrix got %s" % mat_name)
+
+    @property
+    @since('2.2.0')
+    def s(self):
+        """
+        Returns a DenseVector with singular values in descending order.
+        """
+        return self.call("s")
+
+    @property
+    @since('2.2.0')
+    def V(self):
+        """
+        Returns a DenseMatrix whose columns are the right singular
+        vectors of the SingularValueDecomposition.
+        """
+        return self.call("V")
+
 
 class IndexedRow(object):
     """
@@ -528,6 +657,68 @@ def toBlockMatrix(self, rowsPerBlock=1024, colsPerBlock=1024):
                                                            colsPerBlock)
         return BlockMatrix(java_block_matrix, rowsPerBlock, colsPerBlock)
 
+    @since('2.2.0')
+    def computeSVD(self, k, computeU=False, rCond=1e-9):
+        """
+        Computes the singular value decomposition of the IndexedRowMatrix.
+
+        The given row matrix A of dimension (m X n) is decomposed into
+        U * s * V'T where
+
+        * U: (m X k) (left singular vectors) is a IndexedRowMatrix
+             whose columns are the eigenvectors of (A X A')
+        * s: DenseVector consisting of square root of the eigenvalues
+             (singular values) in descending order.
+        * v: (n X k) (right singular vectors) is a Matrix whose columns
+             are the eigenvectors of (A' X A)
+
+        For more specific details on implementation, please refer
+        the scala documentation.
+
+        :param k: Number of leading singular values to keep (`0 < k <= n`).
+                  It might return less than k if there are numerically zero singular values
+                  or there are not enough Ritz values converged before the maximum number of
+                  Arnoldi update iterations is reached (in case that matrix A is ill-conditioned).
+        :param computeU: Whether or not to compute U. If set to be
+                         True, then U is computed by A * V * s^-1
+        :param rCond: Reciprocal condition number. All singular values
+                      smaller than rCond * s[0] are treated as zero
+                      where s[0] is the largest singular value.
+        :returns: SingularValueDecomposition object
+
+        >>> rows = [(0, (3, 1, 1)), (1, (-1, 3, 1))]
+        >>> irm = IndexedRowMatrix(sc.parallelize(rows))
+        >>> svd_model = irm.computeSVD(2, True)
+        >>> svd_model.U.rows.collect() # doctest: +NORMALIZE_WHITESPACE
+        [IndexedRow(0, [-0.707106781187,0.707106781187]),\
+        IndexedRow(1, [-0.707106781187,-0.707106781187])]
+        >>> svd_model.s
+        DenseVector([3.4641, 3.1623])
+        >>> svd_model.V
+        DenseMatrix(3, 2, [-0.4082, -0.8165, -0.4082, 0.8944, -0.4472, 0.0], 0)
+        """
+        j_model = self._java_matrix_wrapper.call(
+            "computeSVD", int(k), bool(computeU), float(rCond))
+        return SingularValueDecomposition(j_model)
+
+    @since('2.2.0')
+    def multiply(self, matrix):
+        """
+        Multiply this matrix by a local dense matrix on the right.
+
+        :param matrix: a local dense matrix whose number of rows must match the number of columns
+                       of this matrix
+        :returns: :py:class:`IndexedRowMatrix`
+
+        >>> mat = IndexedRowMatrix(sc.parallelize([(0, (0, 1)), (1, (2, 3))]))
+        >>> mat.multiply(DenseMatrix(2, 2, [0, 2, 1, 3])).rows.collect()
+        [IndexedRow(0, [2.0,3.0]), IndexedRow(1, [6.0,11.0])]
+        """
+        if not isinstance(matrix, DenseMatrix):
+            raise ValueError("Only multiplication with DenseMatrix "
+                             "is supported.")
+        return IndexedRowMatrix(self._java_matrix_wrapper.call("multiply", matrix))
+
 
 class MatrixEntry(object):
     """
diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py
index 523b3f1113317..1037bab7f1088 100644
--- a/python/pyspark/mllib/tests.py
+++ b/python/pyspark/mllib/tests.py
@@ -23,6 +23,7 @@
 import sys
 import tempfile
 import array as pyarray
+from math import sqrt
 from time import time, sleep
 from shutil import rmtree
 
@@ -54,6 +55,7 @@
 from pyspark.mllib.clustering import StreamingKMeans, StreamingKMeansModel
 from pyspark.mllib.linalg import Vector, SparseVector, DenseVector, VectorUDT, _convert_to_vector,\
     DenseMatrix, SparseMatrix, Vectors, Matrices, MatrixUDT
+from pyspark.mllib.linalg.distributed import RowMatrix
 from pyspark.mllib.classification import StreamingLogisticRegressionWithSGD
 from pyspark.mllib.recommendation import Rating
 from pyspark.mllib.regression import LabeledPoint, StreamingLinearRegressionWithSGD
@@ -1699,6 +1701,67 @@ def test_binary_term_freqs(self):
                                    ": expected " + str(expected[i]) + ", got " + str(output[i]))
 
 
+class DimensionalityReductionTests(MLlibTestCase):
+
+    denseData = [
+        Vectors.dense([0.0, 1.0, 2.0]),
+        Vectors.dense([3.0, 4.0, 5.0]),
+        Vectors.dense([6.0, 7.0, 8.0]),
+        Vectors.dense([9.0, 0.0, 1.0])
+    ]
+    sparseData = [
+        Vectors.sparse(3, [(1, 1.0), (2, 2.0)]),
+        Vectors.sparse(3, [(0, 3.0), (1, 4.0), (2, 5.0)]),
+        Vectors.sparse(3, [(0, 6.0), (1, 7.0), (2, 8.0)]),
+        Vectors.sparse(3, [(0, 9.0), (2, 1.0)])
+    ]
+
+    def assertEqualUpToSign(self, vecA, vecB):
+        eq1 = vecA - vecB
+        eq2 = vecA + vecB
+        self.assertTrue(sum(abs(eq1)) < 1e-6 or sum(abs(eq2)) < 1e-6)
+
+    def test_svd(self):
+        denseMat = RowMatrix(self.sc.parallelize(self.denseData))
+        sparseMat = RowMatrix(self.sc.parallelize(self.sparseData))
+        m = 4
+        n = 3
+        for mat in [denseMat, sparseMat]:
+            for k in range(1, 4):
+                rm = mat.computeSVD(k, computeU=True)
+                self.assertEqual(rm.s.size, k)
+                self.assertEqual(rm.U.numRows(), m)
+                self.assertEqual(rm.U.numCols(), k)
+                self.assertEqual(rm.V.numRows, n)
+                self.assertEqual(rm.V.numCols, k)
+
+        # Test that U returned is None if computeU is set to False.
+        self.assertEqual(mat.computeSVD(1).U, None)
+
+        # Test that low rank matrices cannot have number of singular values
+        # greater than a limit.
+        rm = RowMatrix(self.sc.parallelize(tile([1, 2, 3], (3, 1))))
+        self.assertEqual(rm.computeSVD(3, False, 1e-6).s.size, 1)
+
+    def test_pca(self):
+        expected_pcs = array([
+            [0.0, 1.0, 0.0],
+            [sqrt(2.0) / 2.0, 0.0, sqrt(2.0) / 2.0],
+            [sqrt(2.0) / 2.0, 0.0, -sqrt(2.0) / 2.0]
+        ])
+        n = 3
+        denseMat = RowMatrix(self.sc.parallelize(self.denseData))
+        sparseMat = RowMatrix(self.sc.parallelize(self.sparseData))
+        for mat in [denseMat, sparseMat]:
+            for k in range(1, 4):
+                pcs = mat.computePrincipalComponents(k)
+                self.assertEqual(pcs.numRows, n)
+                self.assertEqual(pcs.numCols, k)
+
+                # We can just test the updated principal component for equality.
+                self.assertEqualUpToSign(pcs.toArray()[:, k - 1], expected_pcs[:, k - 1])
+
+
 if __name__ == "__main__":
     from pyspark.mllib.tests import *
     if not _have_scipy:

From b5947f5c33eb403d65b1c316d1781c3d7cebf01b Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Wed, 3 May 2017 10:18:35 +0100
Subject: [PATCH 0762/1204] [SPARK-20523][BUILD] Clean up build warnings for
 2.2.0 release

## What changes were proposed in this pull request?

Fix build warnings primarily related to Breeze 0.13 operator changes, Java style problems

## How was this patch tested?

Existing tests

Author: Sean Owen <sowen@cloudera.com>

Closes #17803 from srowen/SPARK-20523.

(cherry picked from commit 16fab6b0ef3dcb33f92df30e17680922ad5fb672)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../spark/network/yarn/YarnShuffleService.java     |  4 ++--
 .../java/org/apache/spark/unsafe/Platform.java     |  3 ++-
 .../org/apache/spark/memory/TaskMemoryManager.java |  3 ++-
 .../spark/scheduler/TaskSetManagerSuite.scala      | 11 ++++++-----
 .../storage/BlockReplicationPolicySuite.scala      |  1 +
 dev/checkstyle-suppressions.xml                    |  4 ++++
 .../streaming/JavaStructuredSessionization.java    |  2 --
 .../org/apache/spark/graphx/lib/PageRank.scala     | 14 +++++++-------
 .../org/apache/spark/ml/ann/LossFunction.scala     |  4 ++--
 .../spark/ml/clustering/GaussianMixture.scala      |  2 +-
 .../spark/mllib/clustering/GaussianMixture.scala   |  2 +-
 .../apache/spark/mllib/clustering/LDAModel.scala   |  8 ++++----
 .../spark/mllib/clustering/LDAOptimizer.scala      | 12 ++++++------
 .../apache/spark/mllib/clustering/LDAUtils.scala   |  2 +-
 .../spark/ml/classification/NaiveBayesSuite.scala  |  2 +-
 pom.xml                                            |  4 ----
 .../cluster/YarnSchedulerBackendSuite.scala        |  2 ++
 .../spark/sql/streaming/GroupStateTimeout.java     |  5 ++++-
 .../expressions/JsonExpressionsSuite.scala         |  2 +-
 .../parquet/SpecificParquetRecordReaderBase.java   |  5 +++--
 .../spark/sql/execution/QueryExecutionSuite.scala  |  2 ++
 .../streaming/StreamingQueryListenerSuite.scala    |  1 +
 .../spark/sql/hive/execution/HiveDDLSuite.scala    |  2 +-
 23 files changed, 54 insertions(+), 43 deletions(-)

diff --git a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
index 4acc203153e5a..fd50e3a4bfb9b 100644
--- a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
+++ b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
@@ -363,9 +363,9 @@ protected File initRecoveryDb(String dbName) {
           // If another DB was initialized first just make sure all the DBs are in the same
           // location.
           Path newLoc = new Path(_recoveryPath, dbName);
-          Path copyFrom = new Path(f.toURI()); 
+          Path copyFrom = new Path(f.toURI());
           if (!newLoc.equals(copyFrom)) {
-            logger.info("Moving " + copyFrom + " to: " + newLoc); 
+            logger.info("Moving " + copyFrom + " to: " + newLoc);
             try {
               // The move here needs to handle moving non-empty directories across NFS mounts
               FileSystem fs = FileSystem.getLocal(_conf);
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
index 1321b83181150..4ab5b6889c212 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
@@ -48,7 +48,8 @@ public final class Platform {
     boolean _unaligned;
     String arch = System.getProperty("os.arch", "");
     if (arch.equals("ppc64le") || arch.equals("ppc64")) {
-      // Since java.nio.Bits.unaligned() doesn't return true on ppc (See JDK-8165231), but ppc64 and ppc64le support it
+      // Since java.nio.Bits.unaligned() doesn't return true on ppc (See JDK-8165231), but 
+      // ppc64 and ppc64le support it
       _unaligned = true;
     } else {
       try {
diff --git a/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java b/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
index aa0b373231327..5f91411749167 100644
--- a/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
+++ b/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
@@ -155,7 +155,8 @@ public long acquireExecutionMemory(long required, MemoryConsumer consumer) {
         for (MemoryConsumer c: consumers) {
           if (c != consumer && c.getUsed() > 0 && c.getMode() == mode) {
             long key = c.getUsed();
-            List<MemoryConsumer> list = sortedConsumers.computeIfAbsent(key, k -> new ArrayList<>(1));
+            List<MemoryConsumer> list =
+                sortedConsumers.computeIfAbsent(key, k -> new ArrayList<>(1));
             list.add(c);
           }
         }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index 9ca6b8b0fe635..db14c9acfdce5 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -1070,11 +1070,12 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
     sched.dagScheduler = mockDAGScheduler
     val taskSet = FakeTask.createTaskSet(numTasks = 1, stageId = 0, stageAttemptId = 0)
     val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock = new ManualClock(1))
-    when(mockDAGScheduler.taskEnded(any(), any(), any(), any(), any())).then(new Answer[Unit] {
-      override def answer(invocationOnMock: InvocationOnMock): Unit = {
-        assert(manager.isZombie === true)
-      }
-    })
+    when(mockDAGScheduler.taskEnded(any(), any(), any(), any(), any())).thenAnswer(
+      new Answer[Unit] {
+        override def answer(invocationOnMock: InvocationOnMock): Unit = {
+          assert(manager.isZombie)
+        }
+      })
     val taskOption = manager.resourceOffer("exec1", "host1", NO_PREF)
     assert(taskOption.isDefined)
     // this would fail, inside our mock dag scheduler, if it calls dagScheduler.taskEnded() too soon
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockReplicationPolicySuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockReplicationPolicySuite.scala
index dfecd04c1b969..4000218e71a8b 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockReplicationPolicySuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockReplicationPolicySuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.storage
 
 import scala.collection.mutable
+import scala.language.implicitConversions
 import scala.util.Random
 
 import org.scalatest.{BeforeAndAfter, Matchers}
diff --git a/dev/checkstyle-suppressions.xml b/dev/checkstyle-suppressions.xml
index 31656ca0e5a60..bb7d31cad7be3 100644
--- a/dev/checkstyle-suppressions.xml
+++ b/dev/checkstyle-suppressions.xml
@@ -44,4 +44,8 @@
               files="src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java"/>
     <suppress checks="MethodName"
               files="sql/catalyst/src/main/java/org/apache/spark/sql/streaming/OutputMode.java"/>
+    <suppress checks="MethodName"
+              files="sql/catalyst/src/main/java/org/apache/spark/sql/streaming/GroupStateTimeout.java"/>
+    <suppress checks="MethodName"
+              files="sql/catalyst/src/main/java/org/apache/spark/sql/streaming/Trigger.java"/>
 </suppressions>
diff --git a/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredSessionization.java b/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredSessionization.java
index d3c8516882fa6..6b8e6554f1bb1 100644
--- a/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredSessionization.java
+++ b/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredSessionization.java
@@ -28,8 +28,6 @@
 import java.sql.Timestamp;
 import java.util.*;
 
-import scala.Tuple2;
-
 /**
  * Counts words in UTF8 encoded, '\n' delimited text received from the network.
  * <p>
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
index 13b2b57719188..fd7b7f7c1c487 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
@@ -226,18 +226,18 @@ object PageRank extends Logging {
       // Propagates the message along outbound edges
       // and adding start nodes back in with activation resetProb
       val rankUpdates = rankGraph.aggregateMessages[BV[Double]](
-        ctx => ctx.sendToDst(ctx.srcAttr :* ctx.attr),
-        (a : BV[Double], b : BV[Double]) => a :+ b, TripletFields.Src)
+        ctx => ctx.sendToDst(ctx.srcAttr *:* ctx.attr),
+        (a : BV[Double], b : BV[Double]) => a +:+ b, TripletFields.Src)
 
       rankGraph = rankGraph.outerJoinVertices(rankUpdates) {
         (vid, oldRank, msgSumOpt) =>
-          val popActivations: BV[Double] = msgSumOpt.getOrElse(zero) :* (1.0 - resetProb)
+          val popActivations: BV[Double] = msgSumOpt.getOrElse(zero) *:* (1.0 - resetProb)
           val resetActivations = if (sourcesInitMapBC.value contains vid) {
-            sourcesInitMapBC.value(vid) :* resetProb
+            sourcesInitMapBC.value(vid) *:* resetProb
           } else {
             zero
           }
-          popActivations :+ resetActivations
+          popActivations +:+ resetActivations
         }.cache()
 
       rankGraph.edges.foreachPartition(x => {}) // also materializes rankGraph.vertices
@@ -250,9 +250,9 @@ object PageRank extends Logging {
     }
 
     // SPARK-18847 If the graph has sinks (vertices with no outgoing edges) correct the sum of ranks
-    val rankSums = rankGraph.vertices.values.fold(zero)(_ :+ _)
+    val rankSums = rankGraph.vertices.values.fold(zero)(_ +:+ _)
     rankGraph.mapVertices { (vid, attr) =>
-      Vectors.fromBreeze(attr :/ rankSums)
+      Vectors.fromBreeze(attr /:/ rankSums)
     }
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/ann/LossFunction.scala b/mllib/src/main/scala/org/apache/spark/ml/ann/LossFunction.scala
index 32d78e9b226eb..3aea568cd6527 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/ann/LossFunction.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/ann/LossFunction.scala
@@ -56,7 +56,7 @@ private[ann] class SigmoidLayerModelWithSquaredError
   extends FunctionalLayerModel(new FunctionalLayer(new SigmoidFunction)) with LossFunction {
   override def loss(output: BDM[Double], target: BDM[Double], delta: BDM[Double]): Double = {
     ApplyInPlace(output, target, delta, (o: Double, t: Double) => o - t)
-    val error = Bsum(delta :* delta) / 2 / output.cols
+    val error = Bsum(delta *:* delta) / 2 / output.cols
     ApplyInPlace(delta, output, delta, (x: Double, o: Double) => x * (o - o * o))
     error
   }
@@ -119,6 +119,6 @@ private[ann] class SoftmaxLayerModelWithCrossEntropyLoss extends LayerModel with
 
   override def loss(output: BDM[Double], target: BDM[Double], delta: BDM[Double]): Double = {
     ApplyInPlace(output, target, delta, (o: Double, t: Double) => o - t)
-    -Bsum( target :* brzlog(output)) / output.cols
+    -Bsum( target *:* brzlog(output)) / output.cols
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
index a9c1a7ba0bc8a..5259ee419445f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
@@ -472,7 +472,7 @@ class GaussianMixture @Since("2.0.0") (
        */
       val cov = {
         val ss = new DenseVector(new Array[Double](numFeatures)).asBreeze
-        slice.foreach(xi => ss += (xi.asBreeze - mean.asBreeze) :^ 2.0)
+        slice.foreach(xi => ss += (xi.asBreeze - mean.asBreeze) ^:^ 2.0)
         val diagVec = Vectors.fromBreeze(ss)
         BLAS.scal(1.0 / numSamples, diagVec)
         val covVec = new DenseVector(Array.fill[Double](
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
index 051ec2404fb6e..4d952ac88c9be 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
@@ -271,7 +271,7 @@ class GaussianMixture private (
   private def initCovariance(x: IndexedSeq[BV[Double]]): BreezeMatrix[Double] = {
     val mu = vectorMean(x)
     val ss = BDV.zeros[Double](x(0).length)
-    x.foreach(xi => ss += (xi - mu) :^ 2.0)
+    x.foreach(xi => ss += (xi - mu) ^:^ 2.0)
     diag(ss / x.length.toDouble)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
index 15b723dadcff7..663f63c25a940 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
@@ -314,7 +314,7 @@ class LocalLDAModel private[spark] (
           docBound += count * LDAUtils.logSumExp(Elogthetad + localElogbeta(idx, ::).t)
         }
         // E[log p(theta | alpha) - log q(theta | gamma)]
-        docBound += sum((brzAlpha - gammad) :* Elogthetad)
+        docBound += sum((brzAlpha - gammad) *:* Elogthetad)
         docBound += sum(lgamma(gammad) - lgamma(brzAlpha))
         docBound += lgamma(sum(brzAlpha)) - lgamma(sum(gammad))
 
@@ -324,7 +324,7 @@ class LocalLDAModel private[spark] (
     // Bound component for prob(topic-term distributions):
     //   E[log p(beta | eta) - log q(beta | lambda)]
     val sumEta = eta * vocabSize
-    val topicsPart = sum((eta - lambda) :* Elogbeta) +
+    val topicsPart = sum((eta - lambda) *:* Elogbeta) +
       sum(lgamma(lambda) - lgamma(eta)) +
       sum(lgamma(sumEta) - lgamma(sum(lambda(::, breeze.linalg.*))))
 
@@ -721,7 +721,7 @@ class DistributedLDAModel private[clustering] (
       val N_wj = edgeContext.attr
       val smoothed_N_wk: TopicCounts = edgeContext.dstAttr + (eta - 1.0)
       val smoothed_N_kj: TopicCounts = edgeContext.srcAttr + (alpha - 1.0)
-      val phi_wk: TopicCounts = smoothed_N_wk :/ smoothed_N_k
+      val phi_wk: TopicCounts = smoothed_N_wk /:/ smoothed_N_k
       val theta_kj: TopicCounts = normalize(smoothed_N_kj, 1.0)
       val tokenLogLikelihood = N_wj * math.log(phi_wk.dot(theta_kj))
       edgeContext.sendToDst(tokenLogLikelihood)
@@ -748,7 +748,7 @@ class DistributedLDAModel private[clustering] (
         if (isTermVertex(vertex)) {
           val N_wk = vertex._2
           val smoothed_N_wk: TopicCounts = N_wk + (eta - 1.0)
-          val phi_wk: TopicCounts = smoothed_N_wk :/ smoothed_N_k
+          val phi_wk: TopicCounts = smoothed_N_wk /:/ smoothed_N_k
           sumPrior + (eta - 1.0) * sum(phi_wk.map(math.log))
         } else {
           val N_kj = vertex._2
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
index 3697a9b46dd84..d633893e55f55 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
@@ -482,7 +482,7 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
       stats.map(_._2).flatMap(list => list).collect().map(_.toDenseMatrix): _*)
     stats.unpersist()
     expElogbetaBc.destroy(false)
-    val batchResult = statsSum :* expElogbeta.t
+    val batchResult = statsSum *:* expElogbeta.t
 
     // Note that this is an optimization to avoid batch.count
     updateLambda(batchResult, (miniBatchFraction * corpusSize).ceil.toInt)
@@ -522,7 +522,7 @@ final class OnlineLDAOptimizer extends LDAOptimizer {
 
     val dalpha = -(gradf - b) / q
 
-    if (all((weight * dalpha + alpha) :> 0D)) {
+    if (all((weight * dalpha + alpha) >:> 0D)) {
       alpha :+= weight * dalpha
       this.alpha = Vectors.dense(alpha.toArray)
     }
@@ -584,7 +584,7 @@ private[clustering] object OnlineLDAOptimizer {
     val expElogthetad: BDV[Double] = exp(LDAUtils.dirichletExpectation(gammad))  // K
     val expElogbetad = expElogbeta(ids, ::).toDenseMatrix                        // ids * K
 
-    val phiNorm: BDV[Double] = expElogbetad * expElogthetad :+ 1e-100            // ids
+    val phiNorm: BDV[Double] = expElogbetad * expElogthetad +:+ 1e-100            // ids
     var meanGammaChange = 1D
     val ctsVector = new BDV[Double](cts)                                         // ids
 
@@ -592,14 +592,14 @@ private[clustering] object OnlineLDAOptimizer {
     while (meanGammaChange > 1e-3) {
       val lastgamma = gammad.copy
       //        K                  K * ids               ids
-      gammad := (expElogthetad :* (expElogbetad.t * (ctsVector :/ phiNorm))) :+ alpha
+      gammad := (expElogthetad *:* (expElogbetad.t * (ctsVector /:/ phiNorm))) +:+ alpha
       expElogthetad := exp(LDAUtils.dirichletExpectation(gammad))
       // TODO: Keep more values in log space, and only exponentiate when needed.
-      phiNorm := expElogbetad * expElogthetad :+ 1e-100
+      phiNorm := expElogbetad * expElogthetad +:+ 1e-100
       meanGammaChange = sum(abs(gammad - lastgamma)) / k
     }
 
-    val sstatsd = expElogthetad.asDenseMatrix.t * (ctsVector :/ phiNorm).asDenseMatrix
+    val sstatsd = expElogthetad.asDenseMatrix.t * (ctsVector /:/ phiNorm).asDenseMatrix
     (gammad, sstatsd, ids)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAUtils.scala
index 1f6e1a077f923..c4bbe51a46c32 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAUtils.scala
@@ -29,7 +29,7 @@ private[clustering] object LDAUtils {
    */
   private[clustering] def logSumExp(x: BDV[Double]): Double = {
     val a = max(x)
-    a + log(sum(exp(x :- a)))
+    a + log(sum(exp(x -:- a)))
   }
 
   /**
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala
index b56f8e19ca53c..3a2be236f1257 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala
@@ -168,7 +168,7 @@ class NaiveBayesSuite extends SparkFunSuite with MLlibTestSparkContext with Defa
       assert(m1.pi ~== m2.pi relTol 0.01)
       assert(m1.theta ~== m2.theta relTol 0.01)
     }
-    val testParams = Seq(
+    val testParams = Seq[(String, Dataset[_])](
       ("bernoulli", bernoulliDataset),
       ("multinomial", dataset)
     )
diff --git a/pom.xml b/pom.xml
index 2d847cfc05c59..f6058d79099ee 100644
--- a/pom.xml
+++ b/pom.xml
@@ -58,10 +58,6 @@
     <url>https://issues.apache.org/jira/browse/SPARK</url>
   </issueManagement>
 
-  <prerequisites>
-    <maven>${maven.version}</maven>
-  </prerequisites>
-
   <mailingLists>
     <mailingList>
       <name>Dev Mailing List</name>
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala
index 4079d9e40fc41..0a413b2c23de1 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala
@@ -16,6 +16,8 @@
  */
 package org.apache.spark.scheduler.cluster
 
+import scala.language.reflectiveCalls
+
 import org.mockito.Mockito.when
 import org.scalatest.mock.MockitoSugar
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/GroupStateTimeout.java b/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/GroupStateTimeout.java
index bd5e2d7ecca9b..5f1032d1229da 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/GroupStateTimeout.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/GroupStateTimeout.java
@@ -37,7 +37,9 @@ public class GroupStateTimeout {
    * `map/flatMapGroupsWithState` by calling `GroupState.setTimeoutDuration()`. See documentation
    * on `GroupState` for more details.
    */
-  public static GroupStateTimeout ProcessingTimeTimeout() { return ProcessingTimeTimeout$.MODULE$; }
+  public static GroupStateTimeout ProcessingTimeTimeout() {
+    return ProcessingTimeTimeout$.MODULE$;
+  }
 
   /**
    * Timeout based on event-time. The event-time timestamp for timeout can be set for each
@@ -51,4 +53,5 @@ public class GroupStateTimeout {
 
   /** No timeout. */
   public static GroupStateTimeout NoTimeout() { return NoTimeout$.MODULE$; }
+
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
index 65d5c3a582b16..f892e80204603 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
@@ -41,7 +41,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   /* invalid json with leading nulls would trigger java.io.CharConversionException
    in Jackson's JsonFactory.createParser(byte[]) due to RFC-4627 encoding detection */
-  val badJson = "\0\0\0A\1AAA"
+  val badJson = "\u0000\u0000\u0000A\u0001AAA"
 
   test("$.store.bicycle") {
     checkEvaluation(
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java
index 0bab321a657d6..5a810cae1e184 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java
@@ -66,7 +66,6 @@
 import org.apache.spark.sql.types.StructType;
 import org.apache.spark.sql.types.StructType$;
 import org.apache.spark.util.AccumulatorV2;
-import org.apache.spark.util.LongAccumulator;
 
 /**
  * Base class for custom RecordReaders for Parquet that directly materialize to `T`.
@@ -160,7 +159,9 @@ public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptCont
     if (taskContext != null) {
       Option<AccumulatorV2<?, ?>> accu = taskContext.taskMetrics().externalAccums().lastOption();
       if (accu.isDefined() && accu.get().getClass().getSimpleName().equals("NumRowGroupsAcc")) {
-        ((AccumulatorV2<Integer, Integer>)accu.get()).add(blocks.size());
+        @SuppressWarnings("unchecked")
+        AccumulatorV2<Integer, Integer> intAccum = (AccumulatorV2<Integer, Integer>) accu.get();
+        intAccum.add(blocks.size());
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
index 1c1931b6a6daf..05637821f71f1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
@@ -18,6 +18,8 @@ package org.apache.spark.sql.execution
 
 import java.util.Locale
 
+import scala.language.reflectiveCalls
+
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation}
 import org.apache.spark.sql.test.SharedSQLContext
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
index b8a694c177310..59c6a6fade175 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
@@ -21,6 +21,7 @@ import java.util.UUID
 
 import scala.collection.mutable
 import scala.concurrent.duration._
+import scala.language.reflectiveCalls
 
 import org.scalactic.TolerantNumerics
 import org.scalatest.concurrent.AsyncAssertions.Waiter
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 341e03b5e57fb..c3d734e5a0366 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -183,7 +183,7 @@ class HiveDDLSuite
       if (dbPath.isEmpty) {
         hiveContext.sessionState.catalog.defaultTablePath(tableIdentifier)
       } else {
-        new Path(new Path(dbPath.get), tableIdentifier.table)
+        new Path(new Path(dbPath.get), tableIdentifier.table).toUri
       }
     val filesystemPath = new Path(expectedTablePath.toString)
     val fs = filesystemPath.getFileSystem(spark.sessionState.newHadoopConf())

From b1a732fead32a37afcb7cf7a35facc49a449b8e2 Mon Sep 17 00:00:00 2001
From: Liwei Lin <lwlin7@gmail.com>
Date: Wed, 3 May 2017 08:55:02 -0700
Subject: [PATCH 0763/1204] [SPARK-20441][SPARK-20432][SS] Within the same
 streaming query, one StreamingRelation should only be transformed to one
 StreamingExecutionRelation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

Within the same streaming query, when one `StreamingRelation` is referred multiple times – e.g. `df.union(df)` – we should transform it only to one `StreamingExecutionRelation`, instead of two or more different `StreamingExecutionRelation`s (each of which would have a separate set of source, source logs, ...).

## How was this patch tested?

Added two test cases, each of which would fail without this patch.

Author: Liwei Lin <lwlin7@gmail.com>

Closes #17735 from lw-lin/SPARK-20441.

(cherry picked from commit 27f543b15f2f493f6f8373e46b4c9564b0a1bf81)
Signed-off-by: Burak Yavuz <brkyvz@gmail.com>
---
 .../execution/streaming/StreamExecution.scala | 20 ++++----
 .../spark/sql/streaming/StreamSuite.scala     | 48 +++++++++++++++++++
 2 files changed, 60 insertions(+), 8 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index affc2018c43cb..b6ddf7437ea13 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -23,6 +23,7 @@ import java.util.concurrent.{CountDownLatch, TimeUnit}
 import java.util.concurrent.atomic.AtomicReference
 import java.util.concurrent.locks.ReentrantLock
 
+import scala.collection.mutable.{Map => MutableMap}
 import scala.collection.mutable.ArrayBuffer
 import scala.util.control.NonFatal
 
@@ -148,15 +149,18 @@ class StreamExecution(
       "logicalPlan must be initialized in StreamExecutionThread " +
         s"but the current thread was ${Thread.currentThread}")
     var nextSourceId = 0L
+    val toExecutionRelationMap = MutableMap[StreamingRelation, StreamingExecutionRelation]()
     val _logicalPlan = analyzedPlan.transform {
-      case StreamingRelation(dataSource, _, output) =>
-        // Materialize source to avoid creating it in every batch
-        val metadataPath = s"$checkpointRoot/sources/$nextSourceId"
-        val source = dataSource.createSource(metadataPath)
-        nextSourceId += 1
-        // We still need to use the previous `output` instead of `source.schema` as attributes in
-        // "df.logicalPlan" has already used attributes of the previous `output`.
-        StreamingExecutionRelation(source, output)
+      case streamingRelation@StreamingRelation(dataSource, _, output) =>
+        toExecutionRelationMap.getOrElseUpdate(streamingRelation, {
+          // Materialize source to avoid creating it in every batch
+          val metadataPath = s"$checkpointRoot/sources/$nextSourceId"
+          val source = dataSource.createSource(metadataPath)
+          nextSourceId += 1
+          // We still need to use the previous `output` instead of `source.schema` as attributes in
+          // "df.logicalPlan" has already used attributes of the previous `output`.
+          StreamingExecutionRelation(source, output)
+        })
     }
     sources = _logicalPlan.collect { case s: StreamingExecutionRelation => s.source }
     uniqueSources = sources.distinct
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
index 01ea62a9de4d5..1fc062974e185 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -71,6 +71,27 @@ class StreamSuite extends StreamTest {
       CheckAnswer(Row(1, 1, "one"), Row(2, 2, "two"), Row(4, 4, "four")))
   }
 
+  test("SPARK-20432: union one stream with itself") {
+    val df = spark.readStream.format(classOf[FakeDefaultSource].getName).load().select("a")
+    val unioned = df.union(df)
+    withTempDir { outputDir =>
+      withTempDir { checkpointDir =>
+        val query =
+          unioned
+            .writeStream.format("parquet")
+            .option("checkpointLocation", checkpointDir.getAbsolutePath)
+            .start(outputDir.getAbsolutePath)
+        try {
+          query.processAllAvailable()
+          val outputDf = spark.read.parquet(outputDir.getAbsolutePath).as[Long]
+          checkDatasetUnorderly[Long](outputDf, (0L to 10L).union((0L to 10L)).toArray: _*)
+        } finally {
+          query.stop()
+        }
+      }
+    }
+  }
+
   test("union two streams") {
     val inputData1 = MemoryStream[Int]
     val inputData2 = MemoryStream[Int]
@@ -122,6 +143,33 @@ class StreamSuite extends StreamTest {
     assertDF(df)
   }
 
+  test("Within the same streaming query, one StreamingRelation should only be transformed to one " +
+    "StreamingExecutionRelation") {
+    val df = spark.readStream.format(classOf[FakeDefaultSource].getName).load()
+    var query: StreamExecution = null
+    try {
+      query =
+        df.union(df)
+          .writeStream
+          .format("memory")
+          .queryName("memory")
+          .start()
+          .asInstanceOf[StreamingQueryWrapper]
+          .streamingQuery
+      query.awaitInitialization(streamingTimeout.toMillis)
+      val executionRelations =
+        query
+          .logicalPlan
+          .collect { case ser: StreamingExecutionRelation => ser }
+      assert(executionRelations.size === 2)
+      assert(executionRelations.distinct.size === 1)
+    } finally {
+      if (query != null) {
+        query.stop()
+      }
+    }
+  }
+
   test("unsupported queries") {
     val streamInput = MemoryStream[Int]
     val batchInput = Seq(1, 2, 3).toDS()

From f0e80aa2ddee80819ef33ee24eb6a15a73bc02d5 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Wed, 3 May 2017 09:22:25 -0700
Subject: [PATCH 0764/1204] [SPARK-20576][SQL] Support generic hint function in
 Dataset/DataFrame

## What changes were proposed in this pull request?
We allow users to specify hints (currently only "broadcast" is supported) in SQL and DataFrame. However, while SQL has a standard hint format (/*+ ... */), DataFrame doesn't have one and sometimes users are confused that they can't find how to apply a broadcast hint. This ticket adds a generic hint function on DataFrame that allows using the same hint on DataFrames as well as SQL.

As an example, after this patch, the following will apply a broadcast hint on a DataFrame using the new hint function:

```
df1.join(df2.hint("broadcast"))
```

## How was this patch tested?
Added a test case in DataFrameJoinSuite.

Author: Reynold Xin <rxin@databricks.com>

Closes #17839 from rxin/SPARK-20576.

(cherry picked from commit 527fc5d0c990daaacad4740f62cfe6736609b77b)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../sql/catalyst/analysis/ResolveHints.scala   |  8 +++++++-
 .../scala/org/apache/spark/sql/Dataset.scala   | 16 ++++++++++++++++
 .../apache/spark/sql/DataFrameJoinSuite.scala  | 18 +++++++++++++++++-
 3 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
index c4827b81e8b63..df688fa0e58ae 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
@@ -86,7 +86,13 @@ object ResolveHints {
 
     def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
       case h: Hint if BROADCAST_HINT_NAMES.contains(h.name.toUpperCase(Locale.ROOT)) =>
-        applyBroadcastHint(h.child, h.parameters.toSet)
+        if (h.parameters.isEmpty) {
+          // If there is no table alias specified, turn the entire subtree into a BroadcastHint.
+          BroadcastHint(h.child)
+        } else {
+          // Otherwise, find within the subtree query plans that should be broadcasted.
+          applyBroadcastHint(h.child, h.parameters.toSet)
+        }
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 06dd5500718de..5f602dc25fb57 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -1073,6 +1073,22 @@ class Dataset[T] private[sql](
    */
   def apply(colName: String): Column = col(colName)
 
+  /**
+   * Specifies some hint on the current Dataset. As an example, the following code specifies
+   * that one of the plan can be broadcasted:
+   *
+   * {{{
+   *   df1.join(df2.hint("broadcast"))
+   * }}}
+   *
+   * @group basic
+   * @since 2.2.0
+   */
+  @scala.annotation.varargs
+  def hint(name: String, parameters: String*): Dataset[T] = withTypedPlan {
+    Hint(name, parameters, logicalPlan)
+  }
+
   /**
    * Selects column based on the column name and return it as a [[Column]].
    *
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
index 541ffb58e727f..4a52af6c32c37 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
@@ -151,7 +151,7 @@ class DataFrameJoinSuite extends QueryTest with SharedSQLContext {
       Row(1, 1, 1, 1) :: Row(2, 1, 2, 2) :: Nil)
   }
 
-  test("broadcast join hint") {
+  test("broadcast join hint using broadcast function") {
     val df1 = Seq((1, "1"), (2, "2")).toDF("key", "value")
     val df2 = Seq((1, "1"), (2, "2")).toDF("key", "value")
 
@@ -174,6 +174,22 @@ class DataFrameJoinSuite extends QueryTest with SharedSQLContext {
     }
   }
 
+  test("broadcast join hint using Dataset.hint") {
+    // make sure a giant join is not broadcastable
+    val plan1 =
+      spark.range(10e10.toLong)
+        .join(spark.range(10e10.toLong), "id")
+        .queryExecution.executedPlan
+    assert(plan1.collect { case p: BroadcastHashJoinExec => p }.size == 0)
+
+    // now with a hint it should be broadcasted
+    val plan2 =
+      spark.range(10e10.toLong)
+        .join(spark.range(10e10.toLong).hint("broadcast"), "id")
+        .queryExecution.executedPlan
+    assert(plan2.collect { case p: BroadcastHashJoinExec => p }.size == 1)
+  }
+
   test("join - outer join conversion") {
     val df = Seq((1, 2, "1"), (3, 4, "3")).toDF("int", "int2", "str").as("a")
     val df2 = Seq((1, 3, "1"), (5, 6, "5")).toDF("int", "int2", "str").as("b")

From 36d80790699c529b15e9c1a2cf2f9f636b1f24e6 Mon Sep 17 00:00:00 2001
From: Liwei Lin <lwlin7@gmail.com>
Date: Wed, 3 May 2017 11:10:24 -0700
Subject: [PATCH 0765/1204] [SPARK-19965][SS] DataFrame batch reader may fail
 to infer partitions when reading FileStreamSink's output

## The Problem

Right now DataFrame batch reader may fail to infer partitions when reading FileStreamSink's output:

```
[info] - partitioned writing and batch reading with 'basePath' *** FAILED *** (3 seconds, 928 milliseconds)
[info]   java.lang.AssertionError: assertion failed: Conflicting directory structures detected. Suspicious paths:
[info] 	***/stream.output-65e3fa45-595a-4d29-b3df-4c001e321637
[info] 	***/stream.output-65e3fa45-595a-4d29-b3df-4c001e321637/_spark_metadata
[info]
[info] If provided paths are partition directories, please set "basePath" in the options of the data source to specify the root directory of the table. If there are multiple root directories, please load them separately and then union them.
[info]   at scala.Predef$.assert(Predef.scala:170)
[info]   at org.apache.spark.sql.execution.datasources.PartitioningUtils$.parsePartitions(PartitioningUtils.scala:133)
[info]   at org.apache.spark.sql.execution.datasources.PartitioningUtils$.parsePartitions(PartitioningUtils.scala:98)
[info]   at org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex.inferPartitioning(PartitioningAwareFileIndex.scala:156)
[info]   at org.apache.spark.sql.execution.datasources.InMemoryFileIndex.partitionSpec(InMemoryFileIndex.scala:54)
[info]   at org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex.partitionSchema(PartitioningAwareFileIndex.scala:55)
[info]   at org.apache.spark.sql.execution.datasources.DataSource.getOrInferFileFormatSchema(DataSource.scala:133)
[info]   at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:361)
[info]   at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:160)
[info]   at org.apache.spark.sql.DataFrameReader.parquet(DataFrameReader.scala:536)
[info]   at org.apache.spark.sql.DataFrameReader.parquet(DataFrameReader.scala:520)
[info]   at org.apache.spark.sql.streaming.FileStreamSinkSuite$$anonfun$8.apply$mcV$sp(FileStreamSinkSuite.scala:292)
[info]   at org.apache.spark.sql.streaming.FileStreamSinkSuite$$anonfun$8.apply(FileStreamSinkSuite.scala:268)
[info]   at org.apache.spark.sql.streaming.FileStreamSinkSuite$$anonfun$8.apply(FileStreamSinkSuite.scala:268)
```

## What changes were proposed in this pull request?

This patch alters `InMemoryFileIndex` to filter out these `basePath`s whose ancestor is the streaming metadata dir (`_spark_metadata`). E.g., the following and other similar dir or files will be filtered out:
- (introduced by globbing `basePath/*`)
   - `basePath/_spark_metadata`
- (introduced by globbing `basePath/*/*`)
   - `basePath/_spark_metadata/0`
   - `basePath/_spark_metadata/1`
   - ...

## How was this patch tested?

Added unit tests

Author: Liwei Lin <lwlin7@gmail.com>

Closes #17346 from lw-lin/filter-metadata.

(cherry picked from commit 6b9e49d12fc4c9b29d497122daa4cc9bf4540b16)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../datasources/InMemoryFileIndex.scala       | 13 +++-
 .../execution/streaming/FileStreamSink.scala  | 20 +++++++
 .../datasources/FileSourceStrategySuite.scala |  2 +-
 .../sql/streaming/FileStreamSinkSuite.scala   | 59 ++++++++++++++++++-
 4 files changed, 90 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
index 9897ab73b0da8..91e31650617ec 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
@@ -27,6 +27,7 @@ import org.apache.hadoop.mapred.{FileInputFormat, JobConf}
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.metrics.source.HiveCatalogMetrics
+import org.apache.spark.sql.execution.streaming.FileStreamSink
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.SerializableConfiguration
@@ -36,20 +37,28 @@ import org.apache.spark.util.SerializableConfiguration
  * A [[FileIndex]] that generates the list of files to process by recursively listing all the
  * files present in `paths`.
  *
- * @param rootPaths the list of root table paths to scan
+ * @param rootPathsSpecified the list of root table paths to scan (some of which might be
+ *                           filtered out later)
  * @param parameters as set of options to control discovery
  * @param partitionSchema an optional partition schema that will be use to provide types for the
  *                        discovered partitions
  */
 class InMemoryFileIndex(
     sparkSession: SparkSession,
-    override val rootPaths: Seq[Path],
+    rootPathsSpecified: Seq[Path],
     parameters: Map[String, String],
     partitionSchema: Option[StructType],
     fileStatusCache: FileStatusCache = NoopCache)
   extends PartitioningAwareFileIndex(
     sparkSession, parameters, partitionSchema, fileStatusCache) {
 
+  // Filter out streaming metadata dirs or files such as "/.../_spark_metadata" (the metadata dir)
+  // or "/.../_spark_metadata/0" (a file in the metadata dir). `rootPathsSpecified` might contain
+  // such streaming metadata dir or files, e.g. when after globbing "basePath/*" where "basePath"
+  // is the output of a streaming query.
+  override val rootPaths =
+    rootPathsSpecified.filterNot(FileStreamSink.ancestorIsMetadataDirectory(_, hadoopConf))
+
   @volatile private var cachedLeafFiles: mutable.LinkedHashMap[Path, FileStatus] = _
   @volatile private var cachedLeafDirToChildrenFiles: Map[Path, Array[FileStatus]] = _
   @volatile private var cachedPartitionSpec: PartitionSpec = _
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
index 07ec4e9429e42..6885d0bf67ccb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
@@ -53,6 +53,26 @@ object FileStreamSink extends Logging {
       case _ => false
     }
   }
+
+  /**
+   * Returns true if the path is the metadata dir or its ancestor is the metadata dir.
+   * E.g.:
+   *  - ancestorIsMetadataDirectory(/.../_spark_metadata) => true
+   *  - ancestorIsMetadataDirectory(/.../_spark_metadata/0) => true
+   *  - ancestorIsMetadataDirectory(/a/b/c) => false
+   */
+  def ancestorIsMetadataDirectory(path: Path, hadoopConf: Configuration): Boolean = {
+    val fs = path.getFileSystem(hadoopConf)
+    var currentPath = path.makeQualified(fs.getUri, fs.getWorkingDirectory)
+    while (currentPath != null) {
+      if (currentPath.getName == FileStreamSink.metadataDir) {
+        return true
+      } else {
+        currentPath = currentPath.getParent
+      }
+    }
+    return false
+  }
 }
 
 /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
index 8703fe96e5878..fa3c69612704d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
@@ -395,7 +395,7 @@ class FileSourceStrategySuite extends QueryTest with SharedSQLContext with Predi
 
         val fileCatalog = new InMemoryFileIndex(
           sparkSession = spark,
-          rootPaths = Seq(new Path(tempDir)),
+          rootPathsSpecified = Seq(new Path(tempDir)),
           parameters = Map.empty[String, String],
           partitionSchema = None)
         // This should not fail.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
index 1211242b9fbb4..1a2d3a13f3a4a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
@@ -19,10 +19,12 @@ package org.apache.spark.sql.streaming
 
 import java.util.Locale
 
+import org.apache.hadoop.fs.Path
+
 import org.apache.spark.sql.{AnalysisException, DataFrame}
 import org.apache.spark.sql.execution.DataSourceScanExec
 import org.apache.spark.sql.execution.datasources._
-import org.apache.spark.sql.execution.streaming.{MemoryStream, MetadataLogFileIndex}
+import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
 import org.apache.spark.util.Utils
@@ -145,6 +147,43 @@ class FileStreamSinkSuite extends StreamTest {
     }
   }
 
+  test("partitioned writing and batch reading with 'basePath'") {
+    withTempDir { outputDir =>
+      withTempDir { checkpointDir =>
+        val outputPath = outputDir.getAbsolutePath
+        val inputData = MemoryStream[Int]
+        val ds = inputData.toDS()
+
+        var query: StreamingQuery = null
+
+        try {
+          query =
+            ds.map(i => (i, -i, i * 1000))
+              .toDF("id1", "id2", "value")
+              .writeStream
+              .partitionBy("id1", "id2")
+              .option("checkpointLocation", checkpointDir.getAbsolutePath)
+              .format("parquet")
+              .start(outputPath)
+
+          inputData.addData(1, 2, 3)
+          failAfter(streamingTimeout) {
+            query.processAllAvailable()
+          }
+
+          val readIn = spark.read.option("basePath", outputPath).parquet(s"$outputDir/*/*")
+          checkDatasetUnorderly(
+            readIn.as[(Int, Int, Int)],
+            (1000, 1, -1), (2000, 2, -2), (3000, 3, -3))
+        } finally {
+          if (query != null) {
+            query.stop()
+          }
+        }
+      }
+    }
+  }
+
   // This tests whether FileStreamSink works with aggregations. Specifically, it tests
   // whether the correct streaming QueryExecution (i.e. IncrementalExecution) is used to
   // to execute the trigger for writing data to file sink. See SPARK-18440 for more details.
@@ -266,4 +305,22 @@ class FileStreamSinkSuite extends StreamTest {
       }
     }
   }
+
+  test("FileStreamSink.ancestorIsMetadataDirectory()") {
+    val hadoopConf = spark.sparkContext.hadoopConfiguration
+    def assertAncestorIsMetadataDirectory(path: String): Unit =
+      assert(FileStreamSink.ancestorIsMetadataDirectory(new Path(path), hadoopConf))
+    def assertAncestorIsNotMetadataDirectory(path: String): Unit =
+      assert(!FileStreamSink.ancestorIsMetadataDirectory(new Path(path), hadoopConf))
+
+    assertAncestorIsMetadataDirectory(s"/${FileStreamSink.metadataDir}")
+    assertAncestorIsMetadataDirectory(s"/${FileStreamSink.metadataDir}/")
+    assertAncestorIsMetadataDirectory(s"/a/${FileStreamSink.metadataDir}")
+    assertAncestorIsMetadataDirectory(s"/a/${FileStreamSink.metadataDir}/")
+    assertAncestorIsMetadataDirectory(s"/a/b/${FileStreamSink.metadataDir}/c")
+    assertAncestorIsMetadataDirectory(s"/a/b/${FileStreamSink.metadataDir}/c/")
+
+    assertAncestorIsNotMetadataDirectory(s"/a/b/c")
+    assertAncestorIsNotMetadataDirectory(s"/a/b/c/${FileStreamSink.metadataDir}extra")
+  }
 }

From 2629e7c7a1dacfb267d866cf825fa8a078612462 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Wed, 3 May 2017 13:08:25 -0700
Subject: [PATCH 0766/1204] [MINOR][SQL] Fix the test title from =!= to <=>,
 remove a duplicated test and add a test for =!=

## What changes were proposed in this pull request?

This PR proposes three things as below:

- This test looks not testing `<=>` and identical with the test above, `===`. So, it removes the test.

  ```diff
  -   test("<=>") {
  -     checkAnswer(
  -      testData2.filter($"a" === 1),
  -      testData2.collect().toSeq.filter(r => r.getInt(0) == 1))
  -
  -    checkAnswer(
  -      testData2.filter($"a" === $"b"),
  -      testData2.collect().toSeq.filter(r => r.getInt(0) == r.getInt(1)))
  -   }
  ```

- Replace the test title from `=!=` to `<=>`. It looks the test actually testing `<=>`.

  ```diff
  +  private lazy val nullData = Seq(
  +    (Some(1), Some(1)), (Some(1), Some(2)), (Some(1), None), (None, None)).toDF("a", "b")
  +
    ...
  -  test("=!=") {
  +  test("<=>") {
  -    val nullData = spark.createDataFrame(sparkContext.parallelize(
  -      Row(1, 1) ::
  -      Row(1, 2) ::
  -      Row(1, null) ::
  -      Row(null, null) :: Nil),
  -      StructType(Seq(StructField("a", IntegerType), StructField("b", IntegerType))))
  -
         checkAnswer(
           nullData.filter($"b" <=> 1),
    ...
  ```

- Add the tests for `=!=` which looks not existing.

  ```diff
  +  test("=!=") {
  +    checkAnswer(
  +      nullData.filter($"b" =!= 1),
  +      Row(1, 2) :: Nil)
  +
  +    checkAnswer(nullData.filter($"b" =!= null), Nil)
  +
  +    checkAnswer(
  +      nullData.filter($"a" =!= $"b"),
  +      Row(1, 2) :: Nil)
  +  }
  ```

## How was this patch tested?

Manually running the tests.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #17842 from HyukjinKwon/minor-test-fix.

(cherry picked from commit 13eb37c860c8f672d0e9d9065d0333f981db71e3)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../spark/sql/ColumnExpressionSuite.scala     | 31 +++++++++----------
 1 file changed, 14 insertions(+), 17 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index b0f398dab7455..bc708ca88d7e1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -39,6 +39,9 @@ class ColumnExpressionSuite extends QueryTest with SharedSQLContext {
       StructType(Seq(StructField("a", BooleanType), StructField("b", BooleanType))))
   }
 
+  private lazy val nullData = Seq(
+    (Some(1), Some(1)), (Some(1), Some(2)), (Some(1), None), (None, None)).toDF("a", "b")
+
   test("column names with space") {
     val df = Seq((1, "a")).toDF("name with space", "name.with.dot")
 
@@ -283,23 +286,6 @@ class ColumnExpressionSuite extends QueryTest with SharedSQLContext {
   }
 
   test("<=>") {
-    checkAnswer(
-      testData2.filter($"a" === 1),
-      testData2.collect().toSeq.filter(r => r.getInt(0) == 1))
-
-    checkAnswer(
-      testData2.filter($"a" === $"b"),
-      testData2.collect().toSeq.filter(r => r.getInt(0) == r.getInt(1)))
-  }
-
-  test("=!=") {
-    val nullData = spark.createDataFrame(sparkContext.parallelize(
-      Row(1, 1) ::
-      Row(1, 2) ::
-      Row(1, null) ::
-      Row(null, null) :: Nil),
-      StructType(Seq(StructField("a", IntegerType), StructField("b", IntegerType))))
-
     checkAnswer(
       nullData.filter($"b" <=> 1),
       Row(1, 1) :: Nil)
@@ -321,7 +307,18 @@ class ColumnExpressionSuite extends QueryTest with SharedSQLContext {
     checkAnswer(
       nullData2.filter($"a" <=> null),
       Row(null) :: Nil)
+  }
 
+  test("=!=") {
+    checkAnswer(
+      nullData.filter($"b" =!= 1),
+      Row(1, 2) :: Nil)
+
+    checkAnswer(nullData.filter($"b" =!= null), Nil)
+
+    checkAnswer(
+      nullData.filter($"a" =!= $"b"),
+      Row(1, 2) :: Nil)
   }
 
   test(">") {

From 1d4017b44d5e6ad156abeaae6371747f111dd1f9 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Wed, 3 May 2017 16:50:08 -0700
Subject: [PATCH 0767/1204] Preparing Spark release v2.2.0-rc2

---
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 2 +-
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 resource-managers/mesos/pom.xml           | 2 +-
 resource-managers/yarn/pom.xml            | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 37 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/assembly/pom.xml b/assembly/pom.xml
index 9d8607d9137c6..3a7003f5e94f5 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 8657af744c069..5e9ffd13c61aa 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 24c10fb1ddb9f..c3e10d1f289e1 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 5e5a80bd44467..10ea6571b905a 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 1356c4723b662..1a1f6526ee8e1 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 9345dc8f0cc4b..525ece5be4853 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index f03a4da5e7152..e7c33264fcdbd 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index 24ce36deeb169..6102f6f45a60c 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 83bb30598d153..84098523d3500 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,7 +14,7 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.2.0-SNAPSHOT
+SPARK_VERSION: 2.2.0
 SPARK_VERSION_SHORT: 2.2.0
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.7"
diff --git a/examples/pom.xml b/examples/pom.xml
index 91c2e81ebed2f..80ff0948e1f81 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 8948df2da89e2..3f93a33084348 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index f8ef8a991316d..7e2d58f1d073a 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 6d547c46d6a2d..26418f9769a25 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 46901d64eda97..58057616174eb 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index 295142cbfdff9..27b1bfcfa7dcc 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index 6cf448e65e8b4..6bcbb612fef77 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index 88499240cd569..2df99403840ee 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index 3fedd9eda1959..0e93b75f67ca1 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index 8368a1f12218d..e17b960c9a5b8 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index 90bb0e4987c82..73852fc4c7656 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index daa79e79163b9..2f761fbcda2d4 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 7da27817ebafd..22fe1dca3343e 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 8df33660ea9d1..df69c5e58727a 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index 025cd84f20f0e..d3cb2dce3fab5 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 663f7fb0b010d..996763ad6c256 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 82f840b0fc269..af032ed035f97 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index f6058d79099ee..7a3be5baea16c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.2.0-SNAPSHOT</version>
+  <version>2.2.0</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 41bf8c269b795..e5ec547714d8c 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.2.0.dev0"
+__version__ = "2.2.0"
diff --git a/repl/pom.xml b/repl/pom.xml
index a256ae3b84183..2a5d2f4354ecf 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index 03846d9f5a3be..f94ff4e925e08 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index a1b641c8eeb84..72f891f7c10bd 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 765c92b8d3b9e..722e362943e26 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index b203f31a76f03..84c82f6b86ef8 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 9c879218ddc0d..ab5593da0d655 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 0f249d7d59351..f0ef6779a4742 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index de1be9c13e05f..bed07015e4540 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 938ba2f6ac201..19b44577ca124 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From a3a5fcfefcc25e03496d097b63cd268f61d24c09 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Wed, 3 May 2017 16:50:12 -0700
Subject: [PATCH 0768/1204] Preparing development version 2.2.1-SNAPSHOT

---
 R/pkg/DESCRIPTION                         | 2 +-
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 4 ++--
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 resource-managers/mesos/pom.xml           | 2 +-
 resource-managers/yarn/pom.xml            | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 38 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 879c1f80f2c5d..cfa49b94c9526 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 2.2.0
+Version: 2.2.1
 Title: R Frontend for Apache Spark
 Description: The SparkR package provides an R Frontend for Apache Spark.
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 3a7003f5e94f5..da7b0c9d1b933 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 5e9ffd13c61aa..7577253dd0390 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index c3e10d1f289e1..558864ae4faab 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 10ea6571b905a..70fed65b02553 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 1a1f6526ee8e1..076d98af834da 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 525ece5be4853..e74d84a5b3b96 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index e7c33264fcdbd..76783abe36a2c 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index 6102f6f45a60c..254a9b9ac3184 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 84098523d3500..4b356053b0867 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,8 +14,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.2.0
-SPARK_VERSION_SHORT: 2.2.0
+SPARK_VERSION: 2.2.1-SNAPSHOT
+SPARK_VERSION_SHORT: 2.2.1
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.7"
 MESOS_VERSION: 1.0.0
diff --git a/examples/pom.xml b/examples/pom.xml
index 80ff0948e1f81..aa91e98b28aea 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 3f93a33084348..04afe28fb7885 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 7e2d58f1d073a..47e03419d3df7 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 26418f9769a25..f961a8f54d9a6 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 58057616174eb..d8bc7dcf75248 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index 27b1bfcfa7dcc..6d46430d6e969 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index 6bcbb612fef77..5d979ddf2f74c 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index 2df99403840ee..e4336ecb07da7 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index 0e93b75f67ca1..2489d29ebe160 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index e17b960c9a5b8..98f81aee376a0 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index 73852fc4c7656..88515f853edbc 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index 2f761fbcda2d4..28797e3fe4328 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 22fe1dca3343e..701455f226094 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index df69c5e58727a..1ed38a794f44c 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index d3cb2dce3fab5..a4bb50ce7dda0 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 996763ad6c256..16cce0a49653e 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index af032ed035f97..fec1be9099460 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 7a3be5baea16c..ccd8546a269c1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.2.0</version>
+  <version>2.2.1-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index e5ec547714d8c..c0bb1968b4b99 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.2.0"
+__version__ = "2.2.1.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 2a5d2f4354ecf..f3c49dfb00603 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index f94ff4e925e08..547836050a610 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index 72f891f7c10bd..e00ed33d2ba17 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 722e362943e26..5ecee28a1f0b8 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 84c82f6b86ef8..c9ac366ed6e62 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index ab5593da0d655..0c344fa4975e8 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index f0ef6779a4742..3dca866307232 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index bed07015e4540..604007c6feaa1 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 19b44577ca124..b2e8e469d197c 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From d8bd213f13279664d50ffa57c1814d0b16fc5d23 Mon Sep 17 00:00:00 2001
From: zero323 <zero323@users.noreply.github.com>
Date: Wed, 3 May 2017 19:15:28 -0700
Subject: [PATCH 0769/1204] [SPARK-20584][PYSPARK][SQL] Python generic hint
 support

## What changes were proposed in this pull request?

Adds `hint` method to PySpark `DataFrame`.

## How was this patch tested?

Unit tests, doctests.

Author: zero323 <zero323@users.noreply.github.com>

Closes #17850 from zero323/SPARK-20584.

(cherry picked from commit 02bbe73118a39e2fb378aa2002449367a92f6d67)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 python/pyspark/sql/dataframe.py | 29 +++++++++++++++++++++++++++++
 python/pyspark/sql/tests.py     | 16 ++++++++++++++++
 2 files changed, 45 insertions(+)

diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index f567cc45e823f..d62ba9623b445 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -371,6 +371,35 @@ def withWatermark(self, eventTime, delayThreshold):
         jdf = self._jdf.withWatermark(eventTime, delayThreshold)
         return DataFrame(jdf, self.sql_ctx)
 
+    @since(2.2)
+    def hint(self, name, *parameters):
+        """Specifies some hint on the current DataFrame.
+
+        :param name: A name of the hint.
+        :param parameters: Optional parameters.
+        :return: :class:`DataFrame`
+
+        >>> df.join(df2.hint("broadcast"), "name").show()
+        +----+---+------+
+        |name|age|height|
+        +----+---+------+
+        | Bob|  5|    85|
+        +----+---+------+
+        """
+        if len(parameters) == 1 and isinstance(parameters[0], list):
+            parameters = parameters[0]
+
+        if not isinstance(name, str):
+            raise TypeError("name should be provided as str, got {0}".format(type(name)))
+
+        for p in parameters:
+            if not isinstance(p, str):
+                raise TypeError(
+                    "all parameters should be str, got {0} of type {1}".format(p, type(p)))
+
+        jdf = self._jdf.hint(name, self._jseq(parameters))
+        return DataFrame(jdf, self.sql_ctx)
+
     @since(1.3)
     def count(self):
         """Returns the number of rows in this :class:`DataFrame`.
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index cd92148dfa5df..2aa2d23c6f0dd 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -1906,6 +1906,22 @@ def test_functions_broadcast(self):
         # planner should not crash without a join
         broadcast(df1)._jdf.queryExecution().executedPlan()
 
+    def test_generic_hints(self):
+        from pyspark.sql import DataFrame
+
+        df1 = self.spark.range(10e10).toDF("id")
+        df2 = self.spark.range(10e10).toDF("id")
+
+        self.assertIsInstance(df1.hint("broadcast"), DataFrame)
+        self.assertIsInstance(df1.hint("broadcast", []), DataFrame)
+
+        # Dummy rules
+        self.assertIsInstance(df1.hint("broadcast", "foo", "bar"), DataFrame)
+        self.assertIsInstance(df1.hint("broadcast", ["foo", "bar"]), DataFrame)
+
+        plan = df1.join(df2.hint("broadcast"), "id")._jdf.queryExecution().executedPlan()
+        self.assertEqual(1, plan.toString().count("BroadcastHashJoin"))
+
     def test_toDF_with_schema_string(self):
         data = [Row(key=i, value=str(i)) for i in range(100)]
         rdd = self.sc.parallelize(data, 5)

From 5fe9313d7c81679981000b8aea5ea4668a0a0bc8 Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Wed, 3 May 2017 21:40:18 -0700
Subject: [PATCH 0770/1204] [SPARK-20544][SPARKR] skip tests when running on
 CRAN

General rule on skip or not:
skip if
- RDD tests
- tests could run long or complicated (streaming, hivecontext)
- tests on error conditions
- tests won't likely change/break

unit tests, `R CMD check --as-cran`, `R CMD check`

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #17817 from felixcheung/rskiptest.

(cherry picked from commit fc472bddd1d9c6a28e57e31496c0166777af597e)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/inst/tests/testthat/test_Serde.R        |   6 +
 R/pkg/inst/tests/testthat/test_Windows.R      |   2 +
 R/pkg/inst/tests/testthat/test_binaryFile.R   |   8 ++
 .../tests/testthat/test_binary_function.R     |   6 +
 R/pkg/inst/tests/testthat/test_broadcast.R    |   4 +
 R/pkg/inst/tests/testthat/test_client.R       |   8 ++
 R/pkg/inst/tests/testthat/test_context.R      |  16 +++
 .../inst/tests/testthat/test_includePackage.R |   4 +
 .../tests/testthat/test_mllib_clustering.R    |   4 +
 .../tests/testthat/test_mllib_regression.R    |  12 ++
 .../tests/testthat/test_parallelize_collect.R |   8 ++
 R/pkg/inst/tests/testthat/test_rdd.R          | 106 +++++++++++++++++-
 R/pkg/inst/tests/testthat/test_shuffle.R      |  24 ++++
 R/pkg/inst/tests/testthat/test_sparkR.R       |   2 +
 R/pkg/inst/tests/testthat/test_sparkSQL.R     |  60 ++++++++++
 R/pkg/inst/tests/testthat/test_streaming.R    |  12 ++
 R/pkg/inst/tests/testthat/test_take.R         |   2 +
 R/pkg/inst/tests/testthat/test_textFile.R     |  18 +++
 R/pkg/inst/tests/testthat/test_utils.R        |   5 +
 R/run-tests.sh                                |   2 +-
 20 files changed, 306 insertions(+), 3 deletions(-)

diff --git a/R/pkg/inst/tests/testthat/test_Serde.R b/R/pkg/inst/tests/testthat/test_Serde.R
index b5f6f1b54fa85..518fb7bd94043 100644
--- a/R/pkg/inst/tests/testthat/test_Serde.R
+++ b/R/pkg/inst/tests/testthat/test_Serde.R
@@ -20,6 +20,8 @@ context("SerDe functionality")
 sparkSession <- sparkR.session(enableHiveSupport = FALSE)
 
 test_that("SerDe of primitive types", {
+  skip_on_cran()
+
   x <- callJStatic("SparkRHandler", "echo", 1L)
   expect_equal(x, 1L)
   expect_equal(class(x), "integer")
@@ -38,6 +40,8 @@ test_that("SerDe of primitive types", {
 })
 
 test_that("SerDe of list of primitive types", {
+  skip_on_cran()
+
   x <- list(1L, 2L, 3L)
   y <- callJStatic("SparkRHandler", "echo", x)
   expect_equal(x, y)
@@ -65,6 +69,8 @@ test_that("SerDe of list of primitive types", {
 })
 
 test_that("SerDe of list of lists", {
+  skip_on_cran()
+
   x <- list(list(1L, 2L, 3L), list(1, 2, 3),
             list(TRUE, FALSE), list("a", "b", "c"))
   y <- callJStatic("SparkRHandler", "echo", x)
diff --git a/R/pkg/inst/tests/testthat/test_Windows.R b/R/pkg/inst/tests/testthat/test_Windows.R
index 1d777ddb286df..919b063bf0693 100644
--- a/R/pkg/inst/tests/testthat/test_Windows.R
+++ b/R/pkg/inst/tests/testthat/test_Windows.R
@@ -17,6 +17,8 @@
 context("Windows-specific tests")
 
 test_that("sparkJars tag in SparkContext", {
+  skip_on_cran()
+
   if (.Platform$OS.type != "windows") {
     skip("This test is only for Windows, skipped")
   }
diff --git a/R/pkg/inst/tests/testthat/test_binaryFile.R b/R/pkg/inst/tests/testthat/test_binaryFile.R
index b5c279e3156e5..63f54e1af02b1 100644
--- a/R/pkg/inst/tests/testthat/test_binaryFile.R
+++ b/R/pkg/inst/tests/testthat/test_binaryFile.R
@@ -24,6 +24,8 @@ sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext",
 mockFile <- c("Spark is pretty.", "Spark is awesome.")
 
 test_that("saveAsObjectFile()/objectFile() following textFile() works", {
+  skip_on_cran()
+
   fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   writeLines(mockFile, fileName1)
@@ -38,6 +40,8 @@ test_that("saveAsObjectFile()/objectFile() following textFile() works", {
 })
 
 test_that("saveAsObjectFile()/objectFile() works on a parallelized list", {
+  skip_on_cran()
+
   fileName <- tempfile(pattern = "spark-test", fileext = ".tmp")
 
   l <- list(1, 2, 3)
@@ -50,6 +54,8 @@ test_that("saveAsObjectFile()/objectFile() works on a parallelized list", {
 })
 
 test_that("saveAsObjectFile()/objectFile() following RDD transformations works", {
+  skip_on_cran()
+
   fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   writeLines(mockFile, fileName1)
@@ -74,6 +80,8 @@ test_that("saveAsObjectFile()/objectFile() following RDD transformations works",
 })
 
 test_that("saveAsObjectFile()/objectFile() works with multiple paths", {
+  skip_on_cran()
+
   fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp")
 
diff --git a/R/pkg/inst/tests/testthat/test_binary_function.R b/R/pkg/inst/tests/testthat/test_binary_function.R
index 59cb2e6204405..25bb2b84266dd 100644
--- a/R/pkg/inst/tests/testthat/test_binary_function.R
+++ b/R/pkg/inst/tests/testthat/test_binary_function.R
@@ -29,6 +29,8 @@ rdd <- parallelize(sc, nums, 2L)
 mockFile <- c("Spark is pretty.", "Spark is awesome.")
 
 test_that("union on two RDDs", {
+  skip_on_cran()
+
   actual <- collectRDD(unionRDD(rdd, rdd))
   expect_equal(actual, as.list(rep(nums, 2)))
 
@@ -51,6 +53,8 @@ test_that("union on two RDDs", {
 })
 
 test_that("cogroup on two RDDs", {
+  skip_on_cran()
+
   rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4)))
   rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3)))
   cogroup.rdd <- cogroup(rdd1, rdd2, numPartitions = 2L)
@@ -69,6 +73,8 @@ test_that("cogroup on two RDDs", {
 })
 
 test_that("zipPartitions() on RDDs", {
+  skip_on_cran()
+
   rdd1 <- parallelize(sc, 1:2, 2L)  # 1, 2
   rdd2 <- parallelize(sc, 1:4, 2L)  # 1:2, 3:4
   rdd3 <- parallelize(sc, 1:6, 2L)  # 1:3, 4:6
diff --git a/R/pkg/inst/tests/testthat/test_broadcast.R b/R/pkg/inst/tests/testthat/test_broadcast.R
index 65f204d096f43..504ded4fc8623 100644
--- a/R/pkg/inst/tests/testthat/test_broadcast.R
+++ b/R/pkg/inst/tests/testthat/test_broadcast.R
@@ -26,6 +26,8 @@ nums <- 1:2
 rrdd <- parallelize(sc, nums, 2L)
 
 test_that("using broadcast variable", {
+  skip_on_cran()
+
   randomMat <- matrix(nrow = 10, ncol = 10, data = rnorm(100))
   randomMatBr <- broadcast(sc, randomMat)
 
@@ -38,6 +40,8 @@ test_that("using broadcast variable", {
 })
 
 test_that("without using broadcast variable", {
+  skip_on_cran()
+
   randomMat <- matrix(nrow = 10, ncol = 10, data = rnorm(100))
 
   useBroadcast <- function(x) {
diff --git a/R/pkg/inst/tests/testthat/test_client.R b/R/pkg/inst/tests/testthat/test_client.R
index 0cf25fe1dbf39..3d53bebab6300 100644
--- a/R/pkg/inst/tests/testthat/test_client.R
+++ b/R/pkg/inst/tests/testthat/test_client.R
@@ -18,6 +18,8 @@
 context("functions in client.R")
 
 test_that("adding spark-testing-base as a package works", {
+  skip_on_cran()
+
   args <- generateSparkSubmitArgs("", "", "", "",
                                   "holdenk:spark-testing-base:1.3.0_0.0.5")
   expect_equal(gsub("[[:space:]]", "", args),
@@ -26,16 +28,22 @@ test_that("adding spark-testing-base as a package works", {
 })
 
 test_that("no package specified doesn't add packages flag", {
+  skip_on_cran()
+
   args <- generateSparkSubmitArgs("", "", "", "", "")
   expect_equal(gsub("[[:space:]]", "", args),
                "")
 })
 
 test_that("multiple packages don't produce a warning", {
+  skip_on_cran()
+
   expect_warning(generateSparkSubmitArgs("", "", "", "", c("A", "B")), NA)
 })
 
 test_that("sparkJars sparkPackages as character vectors", {
+  skip_on_cran()
+
   args <- generateSparkSubmitArgs("", "", c("one.jar", "two.jar", "three.jar"), "",
                                   c("com.databricks:spark-avro_2.10:2.0.1"))
   expect_match(args, "--jars one.jar,two.jar,three.jar")
diff --git a/R/pkg/inst/tests/testthat/test_context.R b/R/pkg/inst/tests/testthat/test_context.R
index c847113491113..9ec79ade56103 100644
--- a/R/pkg/inst/tests/testthat/test_context.R
+++ b/R/pkg/inst/tests/testthat/test_context.R
@@ -18,6 +18,8 @@
 context("test functions in sparkR.R")
 
 test_that("Check masked functions", {
+  skip_on_cran()
+
   # Check that we are not masking any new function from base, stats, testthat unexpectedly
   # NOTE: We should avoid adding entries to *namesOfMaskedCompletely* as masked functions make it
   # hard for users to use base R functions. Please check when in doubt.
@@ -55,6 +57,8 @@ test_that("Check masked functions", {
 })
 
 test_that("repeatedly starting and stopping SparkR", {
+  skip_on_cran()
+
   for (i in 1:4) {
     sc <- suppressWarnings(sparkR.init())
     rdd <- parallelize(sc, 1:20, 2L)
@@ -73,6 +77,8 @@ test_that("repeatedly starting and stopping SparkSession", {
 })
 
 test_that("rdd GC across sparkR.stop", {
+  skip_on_cran()
+
   sc <- sparkR.sparkContext() # sc should get id 0
   rdd1 <- parallelize(sc, 1:20, 2L) # rdd1 should get id 1
   rdd2 <- parallelize(sc, 1:10, 2L) # rdd2 should get id 2
@@ -96,6 +102,8 @@ test_that("rdd GC across sparkR.stop", {
 })
 
 test_that("job group functions can be called", {
+  skip_on_cran()
+
   sc <- sparkR.sparkContext()
   setJobGroup("groupId", "job description", TRUE)
   cancelJobGroup("groupId")
@@ -108,12 +116,16 @@ test_that("job group functions can be called", {
 })
 
 test_that("utility function can be called", {
+  skip_on_cran()
+
   sparkR.sparkContext()
   setLogLevel("ERROR")
   sparkR.session.stop()
 })
 
 test_that("getClientModeSparkSubmitOpts() returns spark-submit args from whitelist", {
+  skip_on_cran()
+
   e <- new.env()
   e[["spark.driver.memory"]] <- "512m"
   ops <- getClientModeSparkSubmitOpts("sparkrmain", e)
@@ -141,6 +153,8 @@ test_that("getClientModeSparkSubmitOpts() returns spark-submit args from whiteli
 })
 
 test_that("sparkJars sparkPackages as comma-separated strings", {
+  skip_on_cran()
+
   expect_warning(processSparkJars(" a, b "))
   jars <- suppressWarnings(processSparkJars(" a, b "))
   expect_equal(lapply(jars, basename), list("a", "b"))
@@ -168,6 +182,8 @@ test_that("spark.lapply should perform simple transforms", {
 })
 
 test_that("add and get file to be downloaded with Spark job on every node", {
+  skip_on_cran()
+
   sparkR.sparkContext()
   # Test add file.
   path <- tempfile(pattern = "hello", fileext = ".txt")
diff --git a/R/pkg/inst/tests/testthat/test_includePackage.R b/R/pkg/inst/tests/testthat/test_includePackage.R
index 563ea298c2dd8..f823ad8e9c985 100644
--- a/R/pkg/inst/tests/testthat/test_includePackage.R
+++ b/R/pkg/inst/tests/testthat/test_includePackage.R
@@ -26,6 +26,8 @@ nums <- 1:2
 rdd <- parallelize(sc, nums, 2L)
 
 test_that("include inside function", {
+  skip_on_cran()
+
   # Only run the test if plyr is installed.
   if ("plyr" %in% rownames(installed.packages())) {
     suppressPackageStartupMessages(library(plyr))
@@ -42,6 +44,8 @@ test_that("include inside function", {
 })
 
 test_that("use include package", {
+  skip_on_cran()
+
   # Only run the test if plyr is installed.
   if ("plyr" %in% rownames(installed.packages())) {
     suppressPackageStartupMessages(library(plyr))
diff --git a/R/pkg/inst/tests/testthat/test_mllib_clustering.R b/R/pkg/inst/tests/testthat/test_mllib_clustering.R
index 1661e987b730f..478012e8828cd 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_clustering.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_clustering.R
@@ -255,6 +255,8 @@ test_that("spark.lda with libsvm", {
 })
 
 test_that("spark.lda with text input", {
+  skip_on_cran()
+
   text <- read.text(absoluteSparkPath("data/mllib/sample_lda_data.txt"))
   model <- spark.lda(text, optimizer = "online", features = "value")
 
@@ -297,6 +299,8 @@ test_that("spark.lda with text input", {
 })
 
 test_that("spark.posterior and spark.perplexity", {
+  skip_on_cran()
+
   text <- read.text(absoluteSparkPath("data/mllib/sample_lda_data.txt"))
   model <- spark.lda(text, features = "value", k = 3)
 
diff --git a/R/pkg/inst/tests/testthat/test_mllib_regression.R b/R/pkg/inst/tests/testthat/test_mllib_regression.R
index 3e9ad77198073..58924f952c6bf 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_regression.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_regression.R
@@ -23,6 +23,8 @@ context("MLlib regression algorithms, except for tree-based algorithms")
 sparkSession <- sparkR.session(enableHiveSupport = FALSE)
 
 test_that("formula of spark.glm", {
+  skip_on_cran()
+
   training <- suppressWarnings(createDataFrame(iris))
   # directly calling the spark API
   # dot minus and intercept vs native glm
@@ -195,6 +197,8 @@ test_that("spark.glm summary", {
 })
 
 test_that("spark.glm save/load", {
+  skip_on_cran()
+
   training <- suppressWarnings(createDataFrame(iris))
   m <- spark.glm(training, Sepal_Width ~ Sepal_Length + Species)
   s <- summary(m)
@@ -222,6 +226,8 @@ test_that("spark.glm save/load", {
 })
 
 test_that("formula of glm", {
+  skip_on_cran()
+
   training <- suppressWarnings(createDataFrame(iris))
   # dot minus and intercept vs native glm
   model <- glm(Sepal_Width ~ . - Species + 0, data = training)
@@ -248,6 +254,8 @@ test_that("formula of glm", {
 })
 
 test_that("glm and predict", {
+  skip_on_cran()
+
   training <- suppressWarnings(createDataFrame(iris))
   # gaussian family
   model <- glm(Sepal_Width ~ Sepal_Length + Species, data = training)
@@ -292,6 +300,8 @@ test_that("glm and predict", {
 })
 
 test_that("glm summary", {
+  skip_on_cran()
+
   # gaussian family
   training <- suppressWarnings(createDataFrame(iris))
   stats <- summary(glm(Sepal_Width ~ Sepal_Length + Species, data = training))
@@ -341,6 +351,8 @@ test_that("glm summary", {
 })
 
 test_that("glm save/load", {
+  skip_on_cran()
+
   training <- suppressWarnings(createDataFrame(iris))
   m <- glm(Sepal_Width ~ Sepal_Length + Species, data = training)
   s <- summary(m)
diff --git a/R/pkg/inst/tests/testthat/test_parallelize_collect.R b/R/pkg/inst/tests/testthat/test_parallelize_collect.R
index 55972e1ba4693..1f7f387de08ce 100644
--- a/R/pkg/inst/tests/testthat/test_parallelize_collect.R
+++ b/R/pkg/inst/tests/testthat/test_parallelize_collect.R
@@ -39,6 +39,8 @@ jsc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext",
 # Tests
 
 test_that("parallelize() on simple vectors and lists returns an RDD", {
+  skip_on_cran()
+
   numVectorRDD <- parallelize(jsc, numVector, 1)
   numVectorRDD2 <- parallelize(jsc, numVector, 10)
   numListRDD <- parallelize(jsc, numList, 1)
@@ -66,6 +68,8 @@ test_that("parallelize() on simple vectors and lists returns an RDD", {
 })
 
 test_that("collect(), following a parallelize(), gives back the original collections", {
+  skip_on_cran()
+
   numVectorRDD <- parallelize(jsc, numVector, 10)
   expect_equal(collectRDD(numVectorRDD), as.list(numVector))
 
@@ -86,6 +90,8 @@ test_that("collect(), following a parallelize(), gives back the original collect
 })
 
 test_that("regression: collect() following a parallelize() does not drop elements", {
+  skip_on_cran()
+
   # 10 %/% 6 = 1, ceiling(10 / 6) = 2
   collLen <- 10
   numPart <- 6
@@ -95,6 +101,8 @@ test_that("regression: collect() following a parallelize() does not drop element
 })
 
 test_that("parallelize() and collect() work for lists of pairs (pairwise data)", {
+  skip_on_cran()
+
   # use the pairwise logical to indicate pairwise data
   numPairsRDDD1 <- parallelize(jsc, numPairs, 1)
   numPairsRDDD2 <- parallelize(jsc, numPairs, 2)
diff --git a/R/pkg/inst/tests/testthat/test_rdd.R b/R/pkg/inst/tests/testthat/test_rdd.R
index b72c801dd958d..a3b1631e1d119 100644
--- a/R/pkg/inst/tests/testthat/test_rdd.R
+++ b/R/pkg/inst/tests/testthat/test_rdd.R
@@ -29,22 +29,30 @@ intPairs <- list(list(1L, -1), list(2L, 100), list(2L, 1), list(1L, 200))
 intRdd <- parallelize(sc, intPairs, 2L)
 
 test_that("get number of partitions in RDD", {
+  skip_on_cran()
+
   expect_equal(getNumPartitionsRDD(rdd), 2)
   expect_equal(getNumPartitionsRDD(intRdd), 2)
 })
 
 test_that("first on RDD", {
+  skip_on_cran()
+
   expect_equal(firstRDD(rdd), 1)
   newrdd <- lapply(rdd, function(x) x + 1)
   expect_equal(firstRDD(newrdd), 2)
 })
 
 test_that("count and length on RDD", {
-   expect_equal(countRDD(rdd), 10)
-   expect_equal(lengthRDD(rdd), 10)
+  skip_on_cran()
+
+  expect_equal(countRDD(rdd), 10)
+  expect_equal(lengthRDD(rdd), 10)
 })
 
 test_that("count by values and keys", {
+  skip_on_cran()
+
   mods <- lapply(rdd, function(x) { x %% 3 })
   actual <- countByValue(mods)
   expected <- list(list(0, 3L), list(1, 4L), list(2, 3L))
@@ -56,30 +64,40 @@ test_that("count by values and keys", {
 })
 
 test_that("lapply on RDD", {
+  skip_on_cran()
+
   multiples <- lapply(rdd, function(x) { 2 * x })
   actual <- collectRDD(multiples)
   expect_equal(actual, as.list(nums * 2))
 })
 
 test_that("lapplyPartition on RDD", {
+  skip_on_cran()
+
   sums <- lapplyPartition(rdd, function(part) { sum(unlist(part)) })
   actual <- collectRDD(sums)
   expect_equal(actual, list(15, 40))
 })
 
 test_that("mapPartitions on RDD", {
+  skip_on_cran()
+
   sums <- mapPartitions(rdd, function(part) { sum(unlist(part)) })
   actual <- collectRDD(sums)
   expect_equal(actual, list(15, 40))
 })
 
 test_that("flatMap() on RDDs", {
+  skip_on_cran()
+
   flat <- flatMap(intRdd, function(x) { list(x, x) })
   actual <- collectRDD(flat)
   expect_equal(actual, rep(intPairs, each = 2))
 })
 
 test_that("filterRDD on RDD", {
+  skip_on_cran()
+
   filtered.rdd <- filterRDD(rdd, function(x) { x %% 2 == 0 })
   actual <- collectRDD(filtered.rdd)
   expect_equal(actual, list(2, 4, 6, 8, 10))
@@ -95,6 +113,8 @@ test_that("filterRDD on RDD", {
 })
 
 test_that("lookup on RDD", {
+  skip_on_cran()
+
   vals <- lookup(intRdd, 1L)
   expect_equal(vals, list(-1, 200))
 
@@ -103,6 +123,8 @@ test_that("lookup on RDD", {
 })
 
 test_that("several transformations on RDD (a benchmark on PipelinedRDD)", {
+  skip_on_cran()
+
   rdd2 <- rdd
   for (i in 1:12)
     rdd2 <- lapplyPartitionsWithIndex(
@@ -117,6 +139,8 @@ test_that("several transformations on RDD (a benchmark on PipelinedRDD)", {
 })
 
 test_that("PipelinedRDD support actions: cache(), persist(), unpersist(), checkpoint()", {
+  skip_on_cran()
+
   # RDD
   rdd2 <- rdd
   # PipelinedRDD
@@ -158,6 +182,8 @@ test_that("PipelinedRDD support actions: cache(), persist(), unpersist(), checkp
 })
 
 test_that("reduce on RDD", {
+  skip_on_cran()
+
   sum <- reduce(rdd, "+")
   expect_equal(sum, 55)
 
@@ -167,6 +193,8 @@ test_that("reduce on RDD", {
 })
 
 test_that("lapply with dependency", {
+  skip_on_cran()
+
   fa <- 5
   multiples <- lapply(rdd, function(x) { fa * x })
   actual <- collectRDD(multiples)
@@ -175,6 +203,8 @@ test_that("lapply with dependency", {
 })
 
 test_that("lapplyPartitionsWithIndex on RDDs", {
+  skip_on_cran()
+
   func <- function(partIndex, part) { list(partIndex, Reduce("+", part)) }
   actual <- collectRDD(lapplyPartitionsWithIndex(rdd, func), flatten = FALSE)
   expect_equal(actual, list(list(0, 15), list(1, 40)))
@@ -191,10 +221,14 @@ test_that("lapplyPartitionsWithIndex on RDDs", {
 })
 
 test_that("sampleRDD() on RDDs", {
+  skip_on_cran()
+
   expect_equal(unlist(collectRDD(sampleRDD(rdd, FALSE, 1.0, 2014L))), nums)
 })
 
 test_that("takeSample() on RDDs", {
+  skip_on_cran()
+
   # ported from RDDSuite.scala, modified seeds
   data <- parallelize(sc, 1:100, 2L)
   for (seed in 4:5) {
@@ -237,6 +271,8 @@ test_that("takeSample() on RDDs", {
 })
 
 test_that("mapValues() on pairwise RDDs", {
+  skip_on_cran()
+
   multiples <- mapValues(intRdd, function(x) { x * 2 })
   actual <- collectRDD(multiples)
   expected <- lapply(intPairs, function(x) {
@@ -246,6 +282,8 @@ test_that("mapValues() on pairwise RDDs", {
 })
 
 test_that("flatMapValues() on pairwise RDDs", {
+  skip_on_cran()
+
   l <- parallelize(sc, list(list(1, c(1, 2)), list(2, c(3, 4))))
   actual <- collectRDD(flatMapValues(l, function(x) { x }))
   expect_equal(actual, list(list(1, 1), list(1, 2), list(2, 3), list(2, 4)))
@@ -258,6 +296,8 @@ test_that("flatMapValues() on pairwise RDDs", {
 })
 
 test_that("reduceByKeyLocally() on PairwiseRDDs", {
+  skip_on_cran()
+
   pairs <- parallelize(sc, list(list(1, 2), list(1.1, 3), list(1, 4)), 2L)
   actual <- reduceByKeyLocally(pairs, "+")
   expect_equal(sortKeyValueList(actual),
@@ -271,6 +311,8 @@ test_that("reduceByKeyLocally() on PairwiseRDDs", {
 })
 
 test_that("distinct() on RDDs", {
+  skip_on_cran()
+
   nums.rep2 <- rep(1:10, 2)
   rdd.rep2 <- parallelize(sc, nums.rep2, 2L)
   uniques <- distinctRDD(rdd.rep2)
@@ -279,21 +321,29 @@ test_that("distinct() on RDDs", {
 })
 
 test_that("maximum() on RDDs", {
+  skip_on_cran()
+
   max <- maximum(rdd)
   expect_equal(max, 10)
 })
 
 test_that("minimum() on RDDs", {
+  skip_on_cran()
+
   min <- minimum(rdd)
   expect_equal(min, 1)
 })
 
 test_that("sumRDD() on RDDs", {
+  skip_on_cran()
+
   sum <- sumRDD(rdd)
   expect_equal(sum, 55)
 })
 
 test_that("keyBy on RDDs", {
+  skip_on_cran()
+
   func <- function(x) { x * x }
   keys <- keyBy(rdd, func)
   actual <- collectRDD(keys)
@@ -301,6 +351,8 @@ test_that("keyBy on RDDs", {
 })
 
 test_that("repartition/coalesce on RDDs", {
+  skip_on_cran()
+
   rdd <- parallelize(sc, 1:20, 4L) # each partition contains 5 elements
 
   # repartition
@@ -322,6 +374,8 @@ test_that("repartition/coalesce on RDDs", {
 })
 
 test_that("sortBy() on RDDs", {
+  skip_on_cran()
+
   sortedRdd <- sortBy(rdd, function(x) { x * x }, ascending = FALSE)
   actual <- collectRDD(sortedRdd)
   expect_equal(actual, as.list(sort(nums, decreasing = TRUE)))
@@ -333,6 +387,8 @@ test_that("sortBy() on RDDs", {
 })
 
 test_that("takeOrdered() on RDDs", {
+  skip_on_cran()
+
   l <- list(10, 1, 2, 9, 3, 4, 5, 6, 7)
   rdd <- parallelize(sc, l)
   actual <- takeOrdered(rdd, 6L)
@@ -345,6 +401,8 @@ test_that("takeOrdered() on RDDs", {
 })
 
 test_that("top() on RDDs", {
+  skip_on_cran()
+
   l <- list(10, 1, 2, 9, 3, 4, 5, 6, 7)
   rdd <- parallelize(sc, l)
   actual <- top(rdd, 6L)
@@ -357,6 +415,8 @@ test_that("top() on RDDs", {
 })
 
 test_that("fold() on RDDs", {
+  skip_on_cran()
+
   actual <- fold(rdd, 0, "+")
   expect_equal(actual, Reduce("+", nums, 0))
 
@@ -366,6 +426,8 @@ test_that("fold() on RDDs", {
 })
 
 test_that("aggregateRDD() on RDDs", {
+  skip_on_cran()
+
   rdd <- parallelize(sc, list(1, 2, 3, 4))
   zeroValue <- list(0, 0)
   seqOp <- function(x, y) { list(x[[1]] + y, x[[2]] + 1) }
@@ -379,6 +441,8 @@ test_that("aggregateRDD() on RDDs", {
 })
 
 test_that("zipWithUniqueId() on RDDs", {
+  skip_on_cran()
+
   rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 3L)
   actual <- collectRDD(zipWithUniqueId(rdd))
   expected <- list(list("a", 0), list("b", 1), list("c", 4),
@@ -393,6 +457,8 @@ test_that("zipWithUniqueId() on RDDs", {
 })
 
 test_that("zipWithIndex() on RDDs", {
+  skip_on_cran()
+
   rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 3L)
   actual <- collectRDD(zipWithIndex(rdd))
   expected <- list(list("a", 0), list("b", 1), list("c", 2),
@@ -407,24 +473,32 @@ test_that("zipWithIndex() on RDDs", {
 })
 
 test_that("glom() on RDD", {
+  skip_on_cran()
+
   rdd <- parallelize(sc, as.list(1:4), 2L)
   actual <- collectRDD(glom(rdd))
   expect_equal(actual, list(list(1, 2), list(3, 4)))
 })
 
 test_that("keys() on RDDs", {
+  skip_on_cran()
+
   keys <- keys(intRdd)
   actual <- collectRDD(keys)
   expect_equal(actual, lapply(intPairs, function(x) { x[[1]] }))
 })
 
 test_that("values() on RDDs", {
+  skip_on_cran()
+
   values <- values(intRdd)
   actual <- collectRDD(values)
   expect_equal(actual, lapply(intPairs, function(x) { x[[2]] }))
 })
 
 test_that("pipeRDD() on RDDs", {
+  skip_on_cran()
+
   actual <- collectRDD(pipeRDD(rdd, "more"))
   expected <- as.list(as.character(1:10))
   expect_equal(actual, expected)
@@ -442,6 +516,8 @@ test_that("pipeRDD() on RDDs", {
 })
 
 test_that("zipRDD() on RDDs", {
+  skip_on_cran()
+
   rdd1 <- parallelize(sc, 0:4, 2)
   rdd2 <- parallelize(sc, 1000:1004, 2)
   actual <- collectRDD(zipRDD(rdd1, rdd2))
@@ -471,6 +547,8 @@ test_that("zipRDD() on RDDs", {
 })
 
 test_that("cartesian() on RDDs", {
+  skip_on_cran()
+
   rdd <- parallelize(sc, 1:3)
   actual <- collectRDD(cartesian(rdd, rdd))
   expect_equal(sortKeyValueList(actual),
@@ -514,6 +592,8 @@ test_that("cartesian() on RDDs", {
 })
 
 test_that("subtract() on RDDs", {
+  skip_on_cran()
+
   l <- list(1, 1, 2, 2, 3, 4)
   rdd1 <- parallelize(sc, l)
 
@@ -541,6 +621,8 @@ test_that("subtract() on RDDs", {
 })
 
 test_that("subtractByKey() on pairwise RDDs", {
+  skip_on_cran()
+
   l <- list(list("a", 1), list("b", 4),
             list("b", 5), list("a", 2))
   rdd1 <- parallelize(sc, l)
@@ -570,6 +652,8 @@ test_that("subtractByKey() on pairwise RDDs", {
 })
 
 test_that("intersection() on RDDs", {
+  skip_on_cran()
+
   # intersection with self
   actual <- collectRDD(intersection(rdd, rdd))
   expect_equal(sort(as.integer(actual)), nums)
@@ -586,6 +670,8 @@ test_that("intersection() on RDDs", {
 })
 
 test_that("join() on pairwise RDDs", {
+  skip_on_cran()
+
   rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4)))
   rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3)))
   actual <- collectRDD(joinRDD(rdd1, rdd2, 2L))
@@ -610,6 +696,8 @@ test_that("join() on pairwise RDDs", {
 })
 
 test_that("leftOuterJoin() on pairwise RDDs", {
+  skip_on_cran()
+
   rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4)))
   rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3)))
   actual <- collectRDD(leftOuterJoin(rdd1, rdd2, 2L))
@@ -640,6 +728,8 @@ test_that("leftOuterJoin() on pairwise RDDs", {
 })
 
 test_that("rightOuterJoin() on pairwise RDDs", {
+  skip_on_cran()
+
   rdd1 <- parallelize(sc, list(list(1, 2), list(1, 3)))
   rdd2 <- parallelize(sc, list(list(1, 1), list(2, 4)))
   actual <- collectRDD(rightOuterJoin(rdd1, rdd2, 2L))
@@ -667,6 +757,8 @@ test_that("rightOuterJoin() on pairwise RDDs", {
 })
 
 test_that("fullOuterJoin() on pairwise RDDs", {
+  skip_on_cran()
+
   rdd1 <- parallelize(sc, list(list(1, 2), list(1, 3), list(3, 3)))
   rdd2 <- parallelize(sc, list(list(1, 1), list(2, 4)))
   actual <- collectRDD(fullOuterJoin(rdd1, rdd2, 2L))
@@ -698,6 +790,8 @@ test_that("fullOuterJoin() on pairwise RDDs", {
 })
 
 test_that("sortByKey() on pairwise RDDs", {
+  skip_on_cran()
+
   numPairsRdd <- map(rdd, function(x) { list (x, x) })
   sortedRdd <- sortByKey(numPairsRdd, ascending = FALSE)
   actual <- collectRDD(sortedRdd)
@@ -747,6 +841,8 @@ test_that("sortByKey() on pairwise RDDs", {
 })
 
 test_that("collectAsMap() on a pairwise RDD", {
+  skip_on_cran()
+
   rdd <- parallelize(sc, list(list(1, 2), list(3, 4)))
   vals <- collectAsMap(rdd)
   expect_equal(vals, list(`1` = 2, `3` = 4))
@@ -765,11 +861,15 @@ test_that("collectAsMap() on a pairwise RDD", {
 })
 
 test_that("show()", {
+  skip_on_cran()
+
   rdd <- parallelize(sc, list(1:10))
   expect_output(showRDD(rdd), "ParallelCollectionRDD\\[\\d+\\] at parallelize at RRDD\\.scala:\\d+")
 })
 
 test_that("sampleByKey() on pairwise RDDs", {
+  skip_on_cran()
+
   rdd <- parallelize(sc, 1:2000)
   pairsRDD <- lapply(rdd, function(x) { if (x %% 2 == 0) list("a", x) else list("b", x) })
   fractions <- list(a = 0.2, b = 0.1)
@@ -794,6 +894,8 @@ test_that("sampleByKey() on pairwise RDDs", {
 })
 
 test_that("Test correct concurrency of RRDD.compute()", {
+  skip_on_cran()
+
   rdd <- parallelize(sc, 1:1000, 100)
   jrdd <- getJRDD(lapply(rdd, function(x) { x }), "row")
   zrdd <- callJMethod(jrdd, "zip", jrdd)
diff --git a/R/pkg/inst/tests/testthat/test_shuffle.R b/R/pkg/inst/tests/testthat/test_shuffle.R
index d38efab0fd1df..cedf4f100c6c4 100644
--- a/R/pkg/inst/tests/testthat/test_shuffle.R
+++ b/R/pkg/inst/tests/testthat/test_shuffle.R
@@ -37,6 +37,8 @@ strList <- list("Dexter Morgan: Blood. Sometimes it sets my teeth on edge and ",
 strListRDD <- parallelize(sc, strList, 4)
 
 test_that("groupByKey for integers", {
+  skip_on_cran()
+
   grouped <- groupByKey(intRdd, 2L)
 
   actual <- collectRDD(grouped)
@@ -46,6 +48,8 @@ test_that("groupByKey for integers", {
 })
 
 test_that("groupByKey for doubles", {
+  skip_on_cran()
+
   grouped <- groupByKey(doubleRdd, 2L)
 
   actual <- collectRDD(grouped)
@@ -55,6 +59,8 @@ test_that("groupByKey for doubles", {
 })
 
 test_that("reduceByKey for ints", {
+  skip_on_cran()
+
   reduced <- reduceByKey(intRdd, "+", 2L)
 
   actual <- collectRDD(reduced)
@@ -64,6 +70,8 @@ test_that("reduceByKey for ints", {
 })
 
 test_that("reduceByKey for doubles", {
+  skip_on_cran()
+
   reduced <- reduceByKey(doubleRdd, "+", 2L)
   actual <- collectRDD(reduced)
 
@@ -72,6 +80,8 @@ test_that("reduceByKey for doubles", {
 })
 
 test_that("combineByKey for ints", {
+  skip_on_cran()
+
   reduced <- combineByKey(intRdd, function(x) { x }, "+", "+", 2L)
 
   actual <- collectRDD(reduced)
@@ -81,6 +91,8 @@ test_that("combineByKey for ints", {
 })
 
 test_that("combineByKey for doubles", {
+  skip_on_cran()
+
   reduced <- combineByKey(doubleRdd, function(x) { x }, "+", "+", 2L)
   actual <- collectRDD(reduced)
 
@@ -89,6 +101,8 @@ test_that("combineByKey for doubles", {
 })
 
 test_that("combineByKey for characters", {
+  skip_on_cran()
+
   stringKeyRDD <- parallelize(sc,
                               list(list("max", 1L), list("min", 2L),
                                    list("other", 3L), list("max", 4L)), 2L)
@@ -101,6 +115,8 @@ test_that("combineByKey for characters", {
 })
 
 test_that("aggregateByKey", {
+  skip_on_cran()
+
   # test aggregateByKey for int keys
   rdd <- parallelize(sc, list(list(1, 1), list(1, 2), list(2, 3), list(2, 4)))
 
@@ -129,6 +145,8 @@ test_that("aggregateByKey", {
 })
 
 test_that("foldByKey", {
+  skip_on_cran()
+
   # test foldByKey for int keys
   folded <- foldByKey(intRdd, 0, "+", 2L)
 
@@ -172,6 +190,8 @@ test_that("foldByKey", {
 })
 
 test_that("partitionBy() partitions data correctly", {
+  skip_on_cran()
+
   # Partition by magnitude
   partitionByMagnitude <- function(key) { if (key >= 3) 1 else 0 }
 
@@ -187,6 +207,8 @@ test_that("partitionBy() partitions data correctly", {
 })
 
 test_that("partitionBy works with dependencies", {
+  skip_on_cran()
+
   kOne <- 1
   partitionByParity <- function(key) { if (key %% 2 == kOne) 7 else 4 }
 
@@ -205,6 +227,8 @@ test_that("partitionBy works with dependencies", {
 })
 
 test_that("test partitionBy with string keys", {
+  skip_on_cran()
+
   words <- flatMap(strListRDD, function(line) { strsplit(line, " ")[[1]] })
   wordCount <- lapply(words, function(word) { list(word, 1L) })
 
diff --git a/R/pkg/inst/tests/testthat/test_sparkR.R b/R/pkg/inst/tests/testthat/test_sparkR.R
index f73fc6baeccef..a40981c188f7a 100644
--- a/R/pkg/inst/tests/testthat/test_sparkR.R
+++ b/R/pkg/inst/tests/testthat/test_sparkR.R
@@ -18,6 +18,8 @@
 context("functions in sparkR.R")
 
 test_that("sparkCheckInstall", {
+  skip_on_cran()
+
   # "local, yarn-client, mesos-client" mode, SPARK_HOME was set correctly,
   # and the SparkR job was submitted by "spark-submit"
   sparkHome <- paste0(tempdir(), "/", "sparkHome")
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 6a6c9a809ab13..18ff585fed566 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -97,15 +97,21 @@ mapTypeJsonPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp")
 writeLines(mockLinesMapType, mapTypeJsonPath)
 
 test_that("calling sparkRSQL.init returns existing SQL context", {
+  skip_on_cran()
+
   sqlContext <- suppressWarnings(sparkRSQL.init(sc))
   expect_equal(suppressWarnings(sparkRSQL.init(sc)), sqlContext)
 })
 
 test_that("calling sparkRSQL.init returns existing SparkSession", {
+  skip_on_cran()
+
   expect_equal(suppressWarnings(sparkRSQL.init(sc)), sparkSession)
 })
 
 test_that("calling sparkR.session returns existing SparkSession", {
+  skip_on_cran()
+
   expect_equal(sparkR.session(), sparkSession)
 })
 
@@ -194,6 +200,8 @@ test_that("structField type strings", {
 })
 
 test_that("create DataFrame from RDD", {
+  skip_on_cran()
+
   rdd <- lapply(parallelize(sc, 1:10), function(x) { list(x, as.character(x)) })
   df <- createDataFrame(rdd, list("a", "b"))
   dfAsDF <- as.DataFrame(rdd, list("a", "b"))
@@ -291,6 +299,8 @@ test_that("create DataFrame from RDD", {
 })
 
 test_that("createDataFrame uses files for large objects", {
+  skip_on_cran()
+
   # To simulate a large file scenario, we set spark.r.maxAllocationLimit to a smaller value
   conf <- callJMethod(sparkSession, "conf")
   callJMethod(conf, "set", "spark.r.maxAllocationLimit", "100")
@@ -351,6 +361,8 @@ test_that("read/write csv as DataFrame", {
 })
 
 test_that("Support other types for options", {
+  skip_on_cran()
+
   csvPath <- tempfile(pattern = "sparkr-test", fileext = ".csv")
   mockLinesCsv <- c("year,make,model,comment,blank",
   "\"2012\",\"Tesla\",\"S\",\"No comment\",",
@@ -405,6 +417,8 @@ test_that("convert NAs to null type in DataFrames", {
 })
 
 test_that("toDF", {
+  skip_on_cran()
+
   rdd <- lapply(parallelize(sc, 1:10), function(x) { list(x, as.character(x)) })
   df <- toDF(rdd, list("a", "b"))
   expect_is(df, "SparkDataFrame")
@@ -516,6 +530,8 @@ test_that("create DataFrame with complex types", {
 })
 
 test_that("create DataFrame from a data.frame with complex types", {
+  skip_on_cran()
+
   ldf <- data.frame(row.names = 1:2)
   ldf$a_list <- list(list(1, 2), list(3, 4))
   ldf$an_envir <- c(as.environment(list(a = 1, b = 2)), as.environment(list(c = 3)))
@@ -528,6 +544,8 @@ test_that("create DataFrame from a data.frame with complex types", {
 })
 
 test_that("Collect DataFrame with complex types", {
+  skip_on_cran()
+
   # ArrayType
   df <- read.json(complexTypeJsonPath)
   ldf <- collect(df)
@@ -615,6 +633,8 @@ test_that("read/write json files", {
 })
 
 test_that("read/write json files - compression option", {
+  skip_on_cran()
+
   df <- read.df(jsonPath, "json")
 
   jsonPath <- tempfile(pattern = "jsonPath", fileext = ".json")
@@ -628,6 +648,8 @@ test_that("read/write json files - compression option", {
 })
 
 test_that("jsonRDD() on a RDD with json string", {
+  skip_on_cran()
+
   sqlContext <- suppressWarnings(sparkRSQL.init(sc))
   rdd <- parallelize(sc, mockLines)
   expect_equal(countRDD(rdd), 3)
@@ -684,6 +706,8 @@ test_that(
 })
 
 test_that("test cache, uncache and clearCache", {
+  skip_on_cran()
+
   df <- read.json(jsonPath)
   createOrReplaceTempView(df, "table1")
   cacheTable("table1")
@@ -737,6 +761,8 @@ test_that("tableToDF() returns a new DataFrame", {
 })
 
 test_that("toRDD() returns an RRDD", {
+  skip_on_cran()
+
   df <- read.json(jsonPath)
   testRDD <- toRDD(df)
   expect_is(testRDD, "RDD")
@@ -744,6 +770,8 @@ test_that("toRDD() returns an RRDD", {
 })
 
 test_that("union on two RDDs created from DataFrames returns an RRDD", {
+  skip_on_cran()
+
   df <- read.json(jsonPath)
   RDD1 <- toRDD(df)
   RDD2 <- toRDD(df)
@@ -754,6 +782,8 @@ test_that("union on two RDDs created from DataFrames returns an RRDD", {
 })
 
 test_that("union on mixed serialization types correctly returns a byte RRDD", {
+  skip_on_cran()
+
   # Byte RDD
   nums <- 1:10
   rdd <- parallelize(sc, nums, 2L)
@@ -783,6 +813,8 @@ test_that("union on mixed serialization types correctly returns a byte RRDD", {
 })
 
 test_that("objectFile() works with row serialization", {
+  skip_on_cran()
+
   objectPath <- tempfile(pattern = "spark-test", fileext = ".tmp")
   df <- read.json(jsonPath)
   dfRDD <- toRDD(df)
@@ -795,6 +827,8 @@ test_that("objectFile() works with row serialization", {
 })
 
 test_that("lapply() on a DataFrame returns an RDD with the correct columns", {
+  skip_on_cran()
+
   df <- read.json(jsonPath)
   testRDD <- lapply(df, function(row) {
     row$newCol <- row$age + 5
@@ -863,6 +897,8 @@ test_that("collect() support Unicode characters", {
 })
 
 test_that("multiple pipeline transformations result in an RDD with the correct values", {
+  skip_on_cran()
+
   df <- read.json(jsonPath)
   first <- lapply(df, function(row) {
     row$age <- row$age + 5
@@ -2049,6 +2085,8 @@ test_that("mutate(), transform(), rename() and names()", {
 })
 
 test_that("read/write ORC files", {
+  skip_on_cran()
+
   setHiveContext(sc)
   df <- read.df(jsonPath, "json")
 
@@ -2070,6 +2108,8 @@ test_that("read/write ORC files", {
 })
 
 test_that("read/write ORC files - compression option", {
+  skip_on_cran()
+
   setHiveContext(sc)
   df <- read.df(jsonPath, "json")
 
@@ -2116,6 +2156,8 @@ test_that("read/write Parquet files", {
 })
 
 test_that("read/write Parquet files - compression option/mode", {
+  skip_on_cran()
+
   df <- read.df(jsonPath, "json")
   tempPath <- tempfile(pattern = "tempPath", fileext = ".parquet")
 
@@ -2133,6 +2175,8 @@ test_that("read/write Parquet files - compression option/mode", {
 })
 
 test_that("read/write text files", {
+  skip_on_cran()
+
   # Test write.df and read.df
   df <- read.df(jsonPath, "text")
   expect_is(df, "SparkDataFrame")
@@ -2154,6 +2198,8 @@ test_that("read/write text files", {
 })
 
 test_that("read/write text files - compression option", {
+  skip_on_cran()
+
   df <- read.df(jsonPath, "text")
 
   textPath <- tempfile(pattern = "textPath", fileext = ".txt")
@@ -2387,6 +2433,8 @@ test_that("approxQuantile() on a DataFrame", {
 })
 
 test_that("SQL error message is returned from JVM", {
+  skip_on_cran()
+
   retError <- tryCatch(sql("select * from blah"), error = function(e) e)
   expect_equal(grepl("Table or view not found", retError), TRUE)
   expect_equal(grepl("blah", retError), TRUE)
@@ -2395,6 +2443,8 @@ test_that("SQL error message is returned from JVM", {
 irisDF <- suppressWarnings(createDataFrame(iris))
 
 test_that("Method as.data.frame as a synonym for collect()", {
+  skip_on_cran()
+
   expect_equal(as.data.frame(irisDF), collect(irisDF))
   irisDF2 <- irisDF[irisDF$Species == "setosa", ]
   expect_equal(as.data.frame(irisDF2), collect(irisDF2))
@@ -2812,6 +2862,8 @@ test_that("Window functions on a DataFrame", {
 })
 
 test_that("createDataFrame sqlContext parameter backward compatibility", {
+  skip_on_cran()
+
   sqlContext <- suppressWarnings(sparkRSQL.init(sc))
   a <- 1:3
   b <- c("a", "b", "c")
@@ -2891,6 +2943,8 @@ test_that("Setting and getting config on SparkSession, sparkR.conf(), sparkR.uiW
 })
 
 test_that("enableHiveSupport on SparkSession", {
+  skip_on_cran()
+
   setHiveContext(sc)
   unsetHiveContext()
   # if we are still here, it must be built with hive
@@ -2906,6 +2960,8 @@ test_that("Spark version from SparkSession", {
 })
 
 test_that("Call DataFrameWriter.save() API in Java without path and check argument types", {
+  skip_on_cran()
+
   df <- read.df(jsonPath, "json")
   # This tests if the exception is thrown from JVM not from SparkR side.
   # It makes sure that we can omit path argument in write.df API and then it calls
@@ -2932,6 +2988,8 @@ test_that("Call DataFrameWriter.save() API in Java without path and check argume
 })
 
 test_that("Call DataFrameWriter.load() API in Java without path and check argument types", {
+  skip_on_cran()
+
   # This tests if the exception is thrown from JVM not from SparkR side.
   # It makes sure that we can omit path argument in read.df API and then it calls
   # DataFrameWriter.load() without path.
@@ -3056,6 +3114,8 @@ compare_list <- function(list1, list2) {
 
 # This should always be the **very last test** in this test file.
 test_that("No extra files are created in SPARK_HOME by starting session and making calls", {
+  skip_on_cran()
+
   # Check that it is not creating any extra file.
   # Does not check the tempdir which would be cleaned up after.
   filesAfter <- list.files(path = sparkRDir, all.files = TRUE)
diff --git a/R/pkg/inst/tests/testthat/test_streaming.R b/R/pkg/inst/tests/testthat/test_streaming.R
index b125cb0591de2..8843991024308 100644
--- a/R/pkg/inst/tests/testthat/test_streaming.R
+++ b/R/pkg/inst/tests/testthat/test_streaming.R
@@ -47,6 +47,8 @@ schema <- structType(structField("name", "string"),
                      structField("count", "double"))
 
 test_that("read.stream, write.stream, awaitTermination, stopQuery", {
+  skip_on_cran()
+
   df <- read.stream("json", path = jsonDir, schema = schema, maxFilesPerTrigger = 1)
   expect_true(isStreaming(df))
   counts <- count(group_by(df, "name"))
@@ -65,6 +67,8 @@ test_that("read.stream, write.stream, awaitTermination, stopQuery", {
 })
 
 test_that("print from explain, lastProgress, status, isActive", {
+  skip_on_cran()
+
   df <- read.stream("json", path = jsonDir, schema = schema)
   expect_true(isStreaming(df))
   counts <- count(group_by(df, "name"))
@@ -83,6 +87,8 @@ test_that("print from explain, lastProgress, status, isActive", {
 })
 
 test_that("Stream other format", {
+  skip_on_cran()
+
   parquetPath <- tempfile(pattern = "sparkr-test", fileext = ".parquet")
   df <- read.df(jsonPath, "json", schema)
   write.df(df, parquetPath, "parquet", "overwrite")
@@ -108,6 +114,8 @@ test_that("Stream other format", {
 })
 
 test_that("Non-streaming DataFrame", {
+  skip_on_cran()
+
   c <- as.DataFrame(cars)
   expect_false(isStreaming(c))
 
@@ -117,6 +125,8 @@ test_that("Non-streaming DataFrame", {
 })
 
 test_that("Unsupported operation", {
+  skip_on_cran()
+
   # memory sink without aggregation
   df <- read.stream("json", path = jsonDir, schema = schema, maxFilesPerTrigger = 1)
   expect_error(write.stream(df, "memory", queryName = "people", outputMode = "complete"),
@@ -125,6 +135,8 @@ test_that("Unsupported operation", {
 })
 
 test_that("Terminated by error", {
+  skip_on_cran()
+
   df <- read.stream("json", path = jsonDir, schema = schema, maxFilesPerTrigger = -1)
   counts <- count(group_by(df, "name"))
   # This would not fail before returning with a StreamingQuery,
diff --git a/R/pkg/inst/tests/testthat/test_take.R b/R/pkg/inst/tests/testthat/test_take.R
index aaa532856c3d9..e2130eaac78dd 100644
--- a/R/pkg/inst/tests/testthat/test_take.R
+++ b/R/pkg/inst/tests/testthat/test_take.R
@@ -34,6 +34,8 @@ sparkSession <- sparkR.session(enableHiveSupport = FALSE)
 sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
 
 test_that("take() gives back the original elements in correct count and order", {
+  skip_on_cran()
+
   numVectorRDD <- parallelize(sc, numVector, 10)
   # case: number of elements to take is less than the size of the first partition
   expect_equal(takeRDD(numVectorRDD, 1), as.list(head(numVector, n = 1)))
diff --git a/R/pkg/inst/tests/testthat/test_textFile.R b/R/pkg/inst/tests/testthat/test_textFile.R
index 3b466066e9390..28b7e8e3183fd 100644
--- a/R/pkg/inst/tests/testthat/test_textFile.R
+++ b/R/pkg/inst/tests/testthat/test_textFile.R
@@ -24,6 +24,8 @@ sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext",
 mockFile <- c("Spark is pretty.", "Spark is awesome.")
 
 test_that("textFile() on a local file returns an RDD", {
+  skip_on_cran()
+
   fileName <- tempfile(pattern = "spark-test", fileext = ".tmp")
   writeLines(mockFile, fileName)
 
@@ -36,6 +38,8 @@ test_that("textFile() on a local file returns an RDD", {
 })
 
 test_that("textFile() followed by a collect() returns the same content", {
+  skip_on_cran()
+
   fileName <- tempfile(pattern = "spark-test", fileext = ".tmp")
   writeLines(mockFile, fileName)
 
@@ -46,6 +50,8 @@ test_that("textFile() followed by a collect() returns the same content", {
 })
 
 test_that("textFile() word count works as expected", {
+  skip_on_cran()
+
   fileName <- tempfile(pattern = "spark-test", fileext = ".tmp")
   writeLines(mockFile, fileName)
 
@@ -64,6 +70,8 @@ test_that("textFile() word count works as expected", {
 })
 
 test_that("several transformations on RDD created by textFile()", {
+  skip_on_cran()
+
   fileName <- tempfile(pattern = "spark-test", fileext = ".tmp")
   writeLines(mockFile, fileName)
 
@@ -78,6 +86,8 @@ test_that("several transformations on RDD created by textFile()", {
 })
 
 test_that("textFile() followed by a saveAsTextFile() returns the same content", {
+  skip_on_cran()
+
   fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   writeLines(mockFile, fileName1)
@@ -92,6 +102,8 @@ test_that("textFile() followed by a saveAsTextFile() returns the same content",
 })
 
 test_that("saveAsTextFile() on a parallelized list works as expected", {
+  skip_on_cran()
+
   fileName <- tempfile(pattern = "spark-test", fileext = ".tmp")
   l <- list(1, 2, 3)
   rdd <- parallelize(sc, l, 1L)
@@ -103,6 +115,8 @@ test_that("saveAsTextFile() on a parallelized list works as expected", {
 })
 
 test_that("textFile() and saveAsTextFile() word count works as expected", {
+  skip_on_cran()
+
   fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   writeLines(mockFile, fileName1)
@@ -128,6 +142,8 @@ test_that("textFile() and saveAsTextFile() word count works as expected", {
 })
 
 test_that("textFile() on multiple paths", {
+  skip_on_cran()
+
   fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   writeLines("Spark is pretty.", fileName1)
@@ -141,6 +157,8 @@ test_that("textFile() on multiple paths", {
 })
 
 test_that("Pipelined operations on RDDs created using textFile", {
+  skip_on_cran()
+
   fileName <- tempfile(pattern = "spark-test", fileext = ".tmp")
   writeLines(mockFile, fileName)
 
diff --git a/R/pkg/inst/tests/testthat/test_utils.R b/R/pkg/inst/tests/testthat/test_utils.R
index 6d006eccf665e..bda479214e9cd 100644
--- a/R/pkg/inst/tests/testthat/test_utils.R
+++ b/R/pkg/inst/tests/testthat/test_utils.R
@@ -23,6 +23,7 @@ sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext",
 
 test_that("convertJListToRList() gives back (deserializes) the original JLists
           of strings and integers", {
+  skip_on_cran()
   # It's hard to manually create a Java List using rJava, since it does not
   # support generics well. Instead, we rely on collectRDD() returning a
   # JList.
@@ -40,6 +41,7 @@ test_that("convertJListToRList() gives back (deserializes) the original JLists
 })
 
 test_that("serializeToBytes on RDD", {
+  skip_on_cran()
   # File content
   mockFile <- c("Spark is pretty.", "Spark is awesome.")
   fileName <- tempfile(pattern = "spark-test", fileext = ".tmp")
@@ -167,6 +169,7 @@ test_that("convertToJSaveMode", {
 })
 
 test_that("captureJVMException", {
+  skip_on_cran()
   method <- "getSQLDataType"
   expect_error(tryCatch(callJStatic("org.apache.spark.sql.api.r.SQLUtils", method,
                                     "unknown"),
@@ -177,6 +180,8 @@ test_that("captureJVMException", {
 })
 
 test_that("hashCode", {
+  skip_on_cran()
+
   expect_error(hashCode("bc53d3605e8a5b7de1e8e271c2317645"), NA)
 })
 
diff --git a/R/run-tests.sh b/R/run-tests.sh
index 742a2c5ed76da..29764f48bd156 100755
--- a/R/run-tests.sh
+++ b/R/run-tests.sh
@@ -23,7 +23,7 @@ FAILED=0
 LOGFILE=$FWDIR/unit-tests.out
 rm -f $LOGFILE
 
-SPARK_TESTING=1 $FWDIR/../bin/spark-submit --driver-java-options "-Dlog4j.configuration=file:$FWDIR/log4j.properties" --conf spark.hadoop.fs.defaultFS="file:///" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
+SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --driver-java-options "-Dlog4j.configuration=file:$FWDIR/log4j.properties" --conf spark.hadoop.fs.defaultFS="file:///" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
 FAILED=$((PIPESTATUS[0]||$FAILED))
 
 NUM_TEST_WARNING="$(grep -c -e 'Warnings ----------------' $LOGFILE)"

From 6c5c594b77fb36d531cdaba5a34abe85b138d0a6 Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Thu, 4 May 2017 00:27:10 -0700
Subject: [PATCH 0771/1204] [SPARK-20015][SPARKR][SS][DOC][EXAMPLE] Document R
 Structured Streaming (experimental) in R vignettes and R & SS programming
 guide, R example

Add
- R vignettes
- R programming guide
- SS programming guide
- R example

Also disable spark.als in vignettes for now since it's failing (SPARK-20402)

manually

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #17814 from felixcheung/rdocss.

(cherry picked from commit b8302ccd02265f9d7a7895c7b033441fa2d8ffd1)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/vignettes/sparkr-vignettes.Rmd          |  77 ++++-
 docs/sparkr.md                                |   4 +
 .../structured-streaming-programming-guide.md | 285 +++++++++++++++---
 .../streaming/structured_network_wordcount.R  |  57 ++++
 4 files changed, 380 insertions(+), 43 deletions(-)
 create mode 100644 examples/src/main/r/streaming/structured_network_wordcount.R

diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index f81dbab10b1e1..b933c59a84568 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -182,7 +182,7 @@ head(df)
 ```
 
 ### Data Sources
-SparkR supports operating on a variety of data sources through the `SparkDataFrame` interface. You can check the Spark SQL programming guide for more [specific options](https://spark.apache.org/docs/latest/sql-programming-guide.html#manually-specifying-options) that are available for the built-in data sources.
+SparkR supports operating on a variety of data sources through the `SparkDataFrame` interface. You can check the Spark SQL Programming Guide for more [specific options](https://spark.apache.org/docs/latest/sql-programming-guide.html#manually-specifying-options) that are available for the built-in data sources.
 
 The general method for creating `SparkDataFrame` from data sources is `read.df`. This method takes in the path for the file to load and the type of data source, and the currently active Spark Session will be used automatically. SparkR supports reading CSV, JSON and Parquet files natively and through Spark Packages you can find data source connectors for popular file formats like Avro. These packages can be added with `sparkPackages` parameter when initializing SparkSession using `sparkR.session`.
 
@@ -232,7 +232,7 @@ write.df(people, path = "people.parquet", source = "parquet", mode = "overwrite"
 ```
 
 ### Hive Tables
-You can also create SparkDataFrames from Hive tables. To do this we will need to create a SparkSession with Hive support which can access tables in the Hive MetaStore. Note that Spark should have been built with Hive support and more details can be found in the [SQL programming guide](https://spark.apache.org/docs/latest/sql-programming-guide.html). In SparkR, by default it will attempt to create a SparkSession with Hive support enabled (`enableHiveSupport = TRUE`).
+You can also create SparkDataFrames from Hive tables. To do this we will need to create a SparkSession with Hive support which can access tables in the Hive MetaStore. Note that Spark should have been built with Hive support and more details can be found in the [SQL Programming Guide](https://spark.apache.org/docs/latest/sql-programming-guide.html). In SparkR, by default it will attempt to create a SparkSession with Hive support enabled (`enableHiveSupport = TRUE`).
 
 ```{r, eval=FALSE}
 sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
@@ -657,6 +657,7 @@ head(select(naiveBayesPrediction, "Class", "Sex", "Age", "Survived", "prediction
 Survival analysis studies the expected duration of time until an event happens, and often the relationship with risk factors or treatment taken on the subject. In contrast to standard regression analysis, survival modeling has to deal with special characteristics in the data including non-negative survival time and censoring.
 
 Accelerated Failure Time (AFT) model is a parametric survival model for censored data that assumes the effect of a covariate is to accelerate or decelerate the life course of an event by some constant. For more information, refer to the Wikipedia page [AFT Model](https://en.wikipedia.org/wiki/Accelerated_failure_time_model) and the references there. Different from a [Proportional Hazards Model](https://en.wikipedia.org/wiki/Proportional_hazards_model) designed for the same purpose, the AFT model is easier to parallelize because each instance contributes to the objective function independently.
+
 ```{r, warning=FALSE}
 library(survival)
 ovarianDF <- createDataFrame(ovarian)
@@ -887,7 +888,7 @@ perplexity
 
 There are multiple options that can be configured in `spark.als`, including `rank`, `reg`, `nonnegative`. For a complete list, refer to the help file.
 
-```{r}
+```{r, eval=FALSE}
 ratings <- list(list(0, 0, 4.0), list(0, 1, 2.0), list(1, 1, 3.0), list(1, 2, 4.0),
                 list(2, 1, 1.0), list(2, 2, 5.0))
 df <- createDataFrame(ratings, c("user", "item", "rating"))
@@ -895,7 +896,7 @@ model <- spark.als(df, "rating", "user", "item", rank = 10, reg = 0.1, nonnegati
 ```
 
 Extract latent factors.
-```{r}
+```{r, eval=FALSE}
 stats <- summary(model)
 userFactors <- stats$userFactors
 itemFactors <- stats$itemFactors
@@ -905,7 +906,7 @@ head(itemFactors)
 
 Make predictions.
 
-```{r}
+```{r, eval=FALSE}
 predicted <- predict(model, df)
 head(predicted)
 ```
@@ -987,6 +988,72 @@ unlink(modelPath)
 ```
 
 
+## Structured Streaming
+
+SparkR supports the Structured Streaming API (experimental).
+
+You can check the Structured Streaming Programming Guide for [an introduction](https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html#programming-model) to its programming model and basic concepts.
+
+### Simple Source and Sink
+
+Spark has a few built-in input sources. As an example, to test with a socket source reading text into words and displaying the computed word counts:
+
+```{r, eval=FALSE}
+# Create DataFrame representing the stream of input lines from connection
+lines <- read.stream("socket", host = hostname, port = port)
+
+# Split the lines into words
+words <- selectExpr(lines, "explode(split(value, ' ')) as word")
+
+# Generate running word count
+wordCounts <- count(groupBy(words, "word"))
+
+# Start running the query that prints the running counts to the console
+query <- write.stream(wordCounts, "console", outputMode = "complete")
+```
+
+### Kafka Source
+
+It is simple to read data from Kafka. For more information, see [Input Sources](https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html#input-sources) supported by Structured Streaming.
+
+```{r, eval=FALSE}
+topic <- read.stream("kafka",
+                     kafka.bootstrap.servers = "host1:port1,host2:port2",
+                     subscribe = "topic1")
+keyvalue <- selectExpr(topic, "CAST(key AS STRING)", "CAST(value AS STRING)")
+```
+
+### Operations and Sinks
+
+Most of the common operations on `SparkDataFrame` are supported for streaming, including selection, projection, and aggregation. Once you have defined the final result, to start the streaming computation, you will call the `write.stream` method setting a sink and `outputMode`.
+
+A streaming `SparkDataFrame` can be written for debugging to the console, to a temporary in-memory table, or for further processing in a fault-tolerant manner to a File Sink in different formats.
+
+```{r, eval=FALSE}
+noAggDF <- select(where(deviceDataStreamingDf, "signal > 10"), "device")
+
+# Print new data to console
+write.stream(noAggDF, "console")
+
+# Write new data to Parquet files
+write.stream(noAggDF,
+             "parquet",
+             path = "path/to/destination/dir",
+             checkpointLocation = "path/to/checkpoint/dir")
+
+# Aggregate
+aggDF <- count(groupBy(noAggDF, "device"))
+
+# Print updated aggregations to console
+write.stream(aggDF, "console", outputMode = "complete")
+
+# Have all the aggregates in an in memory table. The query name will be the table name
+write.stream(aggDF, "memory", queryName = "aggregates", outputMode = "complete")
+
+head(sql("select * from aggregates"))
+```
+
+
 ## Advanced Topics
 
 ### SparkR Object Classes
diff --git a/docs/sparkr.md b/docs/sparkr.md
index 0e97213b78ebc..40395202fde3b 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -559,6 +559,10 @@ The following example shows how to save/load a MLlib model by SparkR.
 </tr>
 </table>
 
+# Structured Streaming
+
+SparkR supports the Structured Streaming API (experimental). Structured Streaming is a scalable and fault-tolerant stream processing engine built on the Spark SQL engine. For more information see the R API on the [Structured Streaming Programming Guide](structured-streaming-programming-guide.html)
+
 # R Function Name Conflicts
 
 When loading and attaching a new package in R, it is possible to have a name [conflict](https://stat.ethz.ch/R-manual/R-devel/library/base/html/library.html), where a
diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index 5b18cf2f3c2ef..53b3db21da769 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -8,13 +8,13 @@ title: Structured Streaming Programming Guide
 {:toc}
 
 # Overview
-Structured Streaming is a scalable and fault-tolerant stream processing engine built on the Spark SQL engine. You can express your streaming computation the same way you would express a batch computation on static data. The Spark SQL engine will take care of running it incrementally and continuously and updating the final result as streaming data continues to arrive. You can use the [Dataset/DataFrame API](sql-programming-guide.html) in Scala, Java or Python to express streaming aggregations, event-time windows, stream-to-batch joins, etc. The computation is executed on the same optimized Spark SQL engine. Finally, the system ensures end-to-end exactly-once fault-tolerance guarantees through checkpointing and Write Ahead Logs. In short, *Structured Streaming provides fast, scalable, fault-tolerant, end-to-end exactly-once stream processing without the user having to reason about streaming.*
+Structured Streaming is a scalable and fault-tolerant stream processing engine built on the Spark SQL engine. You can express your streaming computation the same way you would express a batch computation on static data. The Spark SQL engine will take care of running it incrementally and continuously and updating the final result as streaming data continues to arrive. You can use the [Dataset/DataFrame API](sql-programming-guide.html) in Scala, Java, Python or R to express streaming aggregations, event-time windows, stream-to-batch joins, etc. The computation is executed on the same optimized Spark SQL engine. Finally, the system ensures end-to-end exactly-once fault-tolerance guarantees through checkpointing and Write Ahead Logs. In short, *Structured Streaming provides fast, scalable, fault-tolerant, end-to-end exactly-once stream processing without the user having to reason about streaming.*
 
-**Structured Streaming is still ALPHA in Spark 2.1** and the APIs are still experimental. In this guide, we are going to walk you through the programming model and the APIs. First, let's start with a simple example - a streaming word count. 
+**Structured Streaming is still ALPHA in Spark 2.1** and the APIs are still experimental. In this guide, we are going to walk you through the programming model and the APIs. First, let's start with a simple example - a streaming word count.
 
 # Quick Example
-Let’s say you want to maintain a running word count of text data received from a data server listening on a TCP socket. Let’s see how you can express this using Structured Streaming. You can see the full code in 
-[Scala]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCount.scala)/[Java]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredNetworkWordCount.java)/[Python]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/python/sql/streaming/structured_network_wordcount.py).
+Let’s say you want to maintain a running word count of text data received from a data server listening on a TCP socket. Let’s see how you can express this using Structured Streaming. You can see the full code in
+[Scala]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCount.scala)/[Java]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredNetworkWordCount.java)/[Python]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/python/sql/streaming/structured_network_wordcount.py)/[R]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/r/streaming/structured_network_wordcount.R).
 And if you [download Spark](http://spark.apache.org/downloads.html), you can directly run the example. In any case, let’s walk through the example step-by-step and understand how it works. First, we have to import the necessary classes and create a local SparkSession, the starting point of all functionalities related to Spark.
 
 <div class="codetabs">
@@ -63,6 +63,13 @@ spark = SparkSession \
     .getOrCreate()
 {% endhighlight %}
 
+</div>
+<div data-lang="r"  markdown="1">
+
+{% highlight r %}
+sparkR.session(appName = "StructuredNetworkWordCount")
+{% endhighlight %}
+
 </div>
 </div>
 
@@ -136,6 +143,22 @@ wordCounts = words.groupBy("word").count()
 
 This `lines` DataFrame represents an unbounded table containing the streaming text data. This table contains one column of strings named "value", and each line in the streaming text data becomes a row in the table. Note, that this is not currently receiving any data as we are just setting up the transformation, and have not yet started it. Next, we have used two built-in SQL functions - split and explode, to split each line into multiple rows with a word each. In addition, we use the function `alias` to name the new column as "word". Finally, we have defined the `wordCounts` DataFrame by grouping by the unique values in the Dataset and counting them. Note that this is a streaming DataFrame which represents the running word counts of the stream.
 
+</div>
+<div data-lang="r"  markdown="1">
+
+{% highlight r %}
+# Create DataFrame representing the stream of input lines from connection to localhost:9999
+lines <- read.stream("socket", host = "localhost", port = 9999)
+
+# Split the lines into words
+words <- selectExpr(lines, "explode(split(value, ' ')) as word")
+
+# Generate running word count
+wordCounts <- count(group_by(words, "word"))
+{% endhighlight %}
+
+This `lines` SparkDataFrame represents an unbounded table containing the streaming text data. This table contains one column of strings named "value", and each line in the streaming text data becomes a row in the table. Note, that this is not currently receiving any data as we are just setting up the transformation, and have not yet started it. Next, we have a SQL expression with two SQL functions - split and explode, to split each line into multiple rows with a word each. In addition, we name the new column as "word". Finally, we have defined the `wordCounts` SparkDataFrame by grouping by the unique values in the SparkDataFrame and counting them. Note that this is a streaming SparkDataFrame which represents the running word counts of the stream.
+
 </div>
 </div>
 
@@ -181,10 +204,20 @@ query = wordCounts \
 query.awaitTermination()
 {% endhighlight %}
 
+</div>
+<div data-lang="r"  markdown="1">
+
+{% highlight r %}
+# Start running the query that prints the running counts to the console
+query <- write.stream(wordCounts, "console", outputMode = "complete")
+
+awaitTermination(query)
+{% endhighlight %}
+
 </div>
 </div>
 
-After this code is executed, the streaming computation will have started in the background. The `query` object is a handle to that active streaming query, and we have decided to wait for the termination of the query using `query.awaitTermination()` to prevent the process from exiting while the query is active.
+After this code is executed, the streaming computation will have started in the background. The `query` object is a handle to that active streaming query, and we have decided to wait for the termination of the query using `awaitTermination()` to prevent the process from exiting while the query is active.
 
 To actually execute this example code, you can either compile the code in your own 
 [Spark application](quick-start.html#self-contained-applications), or simply 
@@ -211,6 +244,11 @@ $ ./bin/run-example org.apache.spark.examples.sql.streaming.JavaStructuredNetwor
 $ ./bin/spark-submit examples/src/main/python/sql/streaming/structured_network_wordcount.py localhost 9999
 {% endhighlight %}
 </div>
+<div data-lang="r"  markdown="1">
+{% highlight bash %}
+$ ./bin/spark-submit examples/src/main/r/streaming/structured_network_wordcount.R localhost 9999
+{% endhighlight %}
+</div>
 </div>
 
 Then, any lines typed in the terminal running the netcat server will be counted and printed on screen every second. It will look something like the following.
@@ -325,6 +363,35 @@ Batch: 0
 | spark|    1|
 +------+-----+
 
+-------------------------------------------
+Batch: 1
+-------------------------------------------
++------+-----+
+| value|count|
++------+-----+
+|apache|    2|
+| spark|    1|
+|hadoop|    1|
++------+-----+
+...
+{% endhighlight %}
+</div>
+<div data-lang="r" markdown="1">
+{% highlight bash %}
+# TERMINAL 2: RUNNING structured_network_wordcount.R
+
+$ ./bin/spark-submit examples/src/main/r/streaming/structured_network_wordcount.R localhost 9999
+
+-------------------------------------------
+Batch: 0
+-------------------------------------------
++------+-----+
+| value|count|
++------+-----+
+|apache|    1|
+| spark|    1|
++------+-----+
+
 -------------------------------------------
 Batch: 1
 -------------------------------------------
@@ -409,14 +476,14 @@ to track the read position in the stream. The engine uses checkpointing and writ
 
 # API using Datasets and DataFrames
 Since Spark 2.0, DataFrames and Datasets can represent static, bounded data, as well as streaming, unbounded data. Similar to static Datasets/DataFrames, you can use the common entry point `SparkSession`
-([Scala](api/scala/index.html#org.apache.spark.sql.SparkSession)/[Java](api/java/org/apache/spark/sql/SparkSession.html)/[Python](api/python/pyspark.sql.html#pyspark.sql.SparkSession) docs)
+([Scala](api/scala/index.html#org.apache.spark.sql.SparkSession)/[Java](api/java/org/apache/spark/sql/SparkSession.html)/[Python](api/python/pyspark.sql.html#pyspark.sql.SparkSession)/[R](api/R/sparkR.session.html) docs)
 to create streaming DataFrames/Datasets from streaming sources, and apply the same operations on them as static DataFrames/Datasets. If you are not familiar with Datasets/DataFrames, you are strongly advised to familiarize yourself with them using the
 [DataFrame/Dataset Programming Guide](sql-programming-guide.html).
 
 ## Creating streaming DataFrames and streaming Datasets
-Streaming DataFrames can be created through the `DataStreamReader` interface 
+Streaming DataFrames can be created through the `DataStreamReader` interface
 ([Scala](api/scala/index.html#org.apache.spark.sql.streaming.DataStreamReader)/[Java](api/java/org/apache/spark/sql/streaming/DataStreamReader.html)/[Python](api/python/pyspark.sql.html#pyspark.sql.streaming.DataStreamReader) docs)
-returned by `SparkSession.readStream()`. Similar to the read interface for creating static DataFrame, you can specify the details of the source – data format, schema, options, etc.
+returned by `SparkSession.readStream()`. In [R](api/R/read.stream.html), with the `read.stream()` method. Similar to the read interface for creating static DataFrame, you can specify the details of the source – data format, schema, options, etc.
 
 #### Input Sources
 In Spark 2.0, there are a few built-in sources.
@@ -445,7 +512,8 @@ Here are the details of all the sources in Spark.
         <code>path</code>: path to the input directory, and common to all file formats.
         <br/><br/>
         For file-format-specific options, see the related methods in <code>DataStreamReader</code>
-        (<a href="api/scala/index.html#org.apache.spark.sql.streaming.DataStreamReader">Scala</a>/<a href="api/java/org/apache/spark/sql/streaming/DataStreamReader.html">Java</a>/<a href="api/python/pyspark.sql.html#pyspark.sql.streaming.DataStreamReader">Python</a>).
+        (<a href="api/scala/index.html#org.apache.spark.sql.streaming.DataStreamReader">Scala</a>/<a href="api/java/org/apache/spark/sql/streaming/DataStreamReader.html">Java</a>/<a href="api/python/pyspark.sql.html#pyspark.sql.streaming.DataStreamReader">Python</a>/<a
+        href="api/R/read.stream.html">R</a>).
         E.g. for "parquet" format options see <code>DataStreamReader.parquet()</code></td>
     <td>Yes</td>
     <td>Supports glob paths, but does not support multiple comma-separated paths/globs.</td>
@@ -483,7 +551,7 @@ Here are some examples.
 {% highlight scala %}
 val spark: SparkSession = ...
 
-// Read text from socket 
+// Read text from socket
 val socketDF = spark
   .readStream
   .format("socket")
@@ -493,7 +561,7 @@ val socketDF = spark
 
 socketDF.isStreaming    // Returns True for DataFrames that have streaming sources
 
-socketDF.printSchema 
+socketDF.printSchema
 
 // Read all the csv files written atomically in a directory
 val userSchema = new StructType().add("name", "string").add("age", "integer")
@@ -510,7 +578,7 @@ val csvDF = spark
 {% highlight java %}
 SparkSession spark = ...
 
-// Read text from socket 
+// Read text from socket
 Dataset<Row> socketDF = spark
   .readStream()
   .format("socket")
@@ -537,7 +605,7 @@ Dataset<Row> csvDF = spark
 {% highlight python %}
 spark = SparkSession. ...
 
-# Read text from socket 
+# Read text from socket
 socketDF = spark \
     .readStream \
     .format("socket") \
@@ -547,7 +615,7 @@ socketDF = spark \
 
 socketDF.isStreaming()    # Returns True for DataFrames that have streaming sources
 
-socketDF.printSchema() 
+socketDF.printSchema()
 
 # Read all the csv files written atomically in a directory
 userSchema = StructType().add("name", "string").add("age", "integer")
@@ -558,6 +626,25 @@ csvDF = spark \
     .csv("/path/to/directory")  # Equivalent to format("csv").load("/path/to/directory")
 {% endhighlight %}
 
+</div>
+<div data-lang="r"  markdown="1">
+
+{% highlight r %}
+sparkR.session(...)
+
+# Read text from socket
+socketDF <- read.stream("socket", host = hostname, port = port)
+
+isStreaming(socketDF)    # Returns TRUE for SparkDataFrames that have streaming sources
+
+printSchema(socketDF)
+
+# Read all the csv files written atomically in a directory
+schema <- structType(structField("name", "string"),
+                     structField("age", "integer"))
+csvDF <- read.stream("csv", path = "/path/to/directory", schema = schema, sep = ";")
+{% endhighlight %}
+
 </div>
 </div>
 
@@ -638,12 +725,24 @@ ds.groupByKey((MapFunction<DeviceData, String>) value -> value.getDeviceType(),
 df = ...  # streaming DataFrame with IOT device data with schema { device: string, deviceType: string, signal: double, time: DateType }
 
 # Select the devices which have signal more than 10
-df.select("device").where("signal > 10")                              
+df.select("device").where("signal > 10")
 
 # Running count of the number of updates for each device type
 df.groupBy("deviceType").count()
 {% endhighlight %}
 </div>
+<div data-lang="r"  markdown="1">
+
+{% highlight r %}
+df <- ...  # streaming DataFrame with IOT device data with schema { device: string, deviceType: string, signal: double, time: DateType }
+
+# Select the devices which have signal more than 10
+select(where(df, "signal > 10"), "device")
+
+# Running count of the number of updates for each device type
+count(groupBy(df, "deviceType"))
+{% endhighlight %}
+</div>
 </div>
 
 ### Window Operations on Event Time
@@ -840,7 +939,7 @@ Streaming DataFrames can be joined with static DataFrames to create new streamin
 
 {% highlight scala %}
 val staticDf = spark.read. ...
-val streamingDf = spark.readStream. ... 
+val streamingDf = spark.readStream. ...
 
 streamingDf.join(staticDf, "type")          // inner equi-join with a static DF
 streamingDf.join(staticDf, "type", "right_join")  // right outer join with a static DF  
@@ -972,7 +1071,7 @@ Once you have defined the final result DataFrame/Dataset, all that is left is fo
 ([Scala](api/scala/index.html#org.apache.spark.sql.streaming.DataStreamWriter)/[Java](api/java/org/apache/spark/sql/streaming/DataStreamWriter.html)/[Python](api/python/pyspark.sql.html#pyspark.sql.streaming.DataStreamWriter) docs)
 returned through `Dataset.writeStream()`. You will have to specify one or more of the following in this interface.
 
-- *Details of the output sink:* Data format, location, etc. 
+- *Details of the output sink:* Data format, location, etc.
 
 - *Output mode:* Specify what gets written to the output sink.
 
@@ -1077,7 +1176,7 @@ Here is the compatibility matrix.
 #### Output Sinks
 There are a few types of built-in output sinks.
 
-- **File sink** - Stores the output to a directory. 
+- **File sink** - Stores the output to a directory.
 
 {% highlight scala %}
 writeStream
@@ -1145,7 +1244,8 @@ Here are the details of all the sinks in Spark.
         · "s3a://a/b/c/dataset.txt"<br/>
         <br/>
         For file-format-specific options, see the related methods in DataFrameWriter
-        (<a href="api/scala/index.html#org.apache.spark.sql.DataFrameWriter">Scala</a>/<a href="api/java/org/apache/spark/sql/DataFrameWriter.html">Java</a>/<a href="api/python/pyspark.sql.html#pyspark.sql.DataFrameWriter">Python</a>).
+        (<a href="api/scala/index.html#org.apache.spark.sql.DataFrameWriter">Scala</a>/<a href="api/java/org/apache/spark/sql/DataFrameWriter.html">Java</a>/<a href="api/python/pyspark.sql.html#pyspark.sql.DataFrameWriter">Python</a>/<a
+        href="api/R/write.stream.html">R</a>).
         E.g. for "parquet" format options see <code>DataFrameWriter.parquet()</code>
     </td>
     <td>Yes</td>
@@ -1208,7 +1308,7 @@ noAggDF
   .option("checkpointLocation", "path/to/checkpoint/dir")
   .option("path", "path/to/destination/dir")
   .start()
-   
+
 // ========== DF with aggregation ==========
 val aggDF = df.groupBy("device").count()
 
@@ -1219,7 +1319,7 @@ aggDF
   .format("console")
   .start()
 
-// Have all the aggregates in an in-memory table 
+// Have all the aggregates in an in-memory table
 aggDF
   .writeStream
   .queryName("aggregates")    // this query name will be the table name
@@ -1250,7 +1350,7 @@ noAggDF
   .option("checkpointLocation", "path/to/checkpoint/dir")
   .option("path", "path/to/destination/dir")
   .start();
-   
+
 // ========== DF with aggregation ==========
 Dataset<Row> aggDF = df.groupBy("device").count();
 
@@ -1261,7 +1361,7 @@ aggDF
   .format("console")
   .start();
 
-// Have all the aggregates in an in-memory table 
+// Have all the aggregates in an in-memory table
 aggDF
   .writeStream()
   .queryName("aggregates")    // this query name will be the table name
@@ -1292,7 +1392,7 @@ noAggDF \
     .option("checkpointLocation", "path/to/checkpoint/dir") \
     .option("path", "path/to/destination/dir") \
     .start()
-   
+
 # ========== DF with aggregation ==========
 aggDF = df.groupBy("device").count()
 
@@ -1314,6 +1414,35 @@ aggDF \
 spark.sql("select * from aggregates").show()   # interactively query in-memory table
 {% endhighlight %}
 
+</div>
+<div data-lang="r"  markdown="1">
+
+{% highlight r %}
+# ========== DF with no aggregations ==========
+noAggDF <- select(where(deviceDataDf, "signal > 10"), "device")
+
+# Print new data to console
+write.stream(noAggDF, "console")
+
+# Write new data to Parquet files
+write.stream(noAggDF,
+             "parquet",
+             path = "path/to/destination/dir",
+             checkpointLocation = "path/to/checkpoint/dir")
+
+# ========== DF with aggregation ==========
+aggDF <- count(groupBy(df, "device"))
+
+# Print updated aggregations to console
+write.stream(aggDF, "console", outputMode = "complete")
+
+# Have all the aggregates in an in memory table. The query name will be the table name
+write.stream(aggDF, "memory", queryName = "aggregates", outputMode = "complete")
+
+# Interactively query in-memory table
+head(sql("select * from aggregates"))
+{% endhighlight %}
+
 </div>
 </div>
 
@@ -1351,7 +1480,7 @@ query.name        // get the name of the auto-generated or user-specified name
 
 query.explain()   // print detailed explanations of the query
 
-query.stop()      // stop the query 
+query.stop()      // stop the query
 
 query.awaitTermination()   // block until query is terminated, with stop() or with error
 
@@ -1403,7 +1532,7 @@ query.name()        # get the name of the auto-generated or user-specified name
 
 query.explain()   # print detailed explanations of the query
 
-query.stop()      # stop the query 
+query.stop()      # stop the query
 
 query.awaitTermination()   # block until query is terminated, with stop() or with error
 
@@ -1415,6 +1544,24 @@ query.lastProgress()    # the most recent progress update of this streaming quer
 
 {% endhighlight %}
 
+</div>
+<div data-lang="r"  markdown="1">
+
+{% highlight r %}
+query <- write.stream(df, "console")  # get the query object
+
+queryName(query)          # get the name of the auto-generated or user-specified name
+
+explain(query)            # print detailed explanations of the query
+
+stopQuery(query)          # stop the query
+
+awaitTermination(query)   # block until query is terminated, with stop() or with error
+
+lastProgress(query)       # the most recent progress update of this streaming query
+
+{% endhighlight %}
+
 </div>
 </div>
 
@@ -1461,6 +1608,12 @@ spark.streams().get(id)  # get a query object by its unique id
 spark.streams().awaitAnyTermination()  # block until any one of them terminates
 {% endhighlight %}
 
+</div>
+<div data-lang="r"  markdown="1">
+{% highlight bash %}
+Not available in R.
+{% endhighlight %}
+
 </div>
 </div>
 
@@ -1644,6 +1797,58 @@ Will print something like the following.
 '''
 {% endhighlight %}
 
+</div>
+<div data-lang="r"  markdown="1">
+
+{% highlight r %}
+query <- ...  # a StreamingQuery
+lastProgress(query)
+
+'''
+Will print something like the following.
+
+{
+  "id" : "8c57e1ec-94b5-4c99-b100-f694162df0b9",
+  "runId" : "ae505c5a-a64e-4896-8c28-c7cbaf926f16",
+  "name" : null,
+  "timestamp" : "2017-04-26T08:27:28.835Z",
+  "numInputRows" : 0,
+  "inputRowsPerSecond" : 0.0,
+  "processedRowsPerSecond" : 0.0,
+  "durationMs" : {
+    "getOffset" : 0,
+    "triggerExecution" : 1
+  },
+  "stateOperators" : [ {
+    "numRowsTotal" : 4,
+    "numRowsUpdated" : 0
+  } ],
+  "sources" : [ {
+    "description" : "TextSocketSource[host: localhost, port: 9999]",
+    "startOffset" : 1,
+    "endOffset" : 1,
+    "numInputRows" : 0,
+    "inputRowsPerSecond" : 0.0,
+    "processedRowsPerSecond" : 0.0
+  } ],
+  "sink" : {
+    "description" : "org.apache.spark.sql.execution.streaming.ConsoleSink@76b37531"
+  }
+}
+'''
+
+status(query)
+'''
+Will print something like the following.
+
+{
+  "message" : "Waiting for data to arrive",
+  "isDataAvailable" : false,
+  "isTriggerActive" : false
+}
+'''
+{% endhighlight %}
+
 </div>
 </div>
 
@@ -1703,11 +1908,17 @@ spark.streams().addListener(new StreamingQueryListener() {
 Not available in Python.
 {% endhighlight %}
 
+</div>
+<div data-lang="r"  markdown="1">
+{% highlight bash %}
+Not available in R.
+{% endhighlight %}
+
 </div>
 </div>
 
 ## Recovering from Failures with Checkpointing 
-In case of a failure or intentional shutdown, you can recover the previous progress and state of a previous query, and continue where it left off. This is done using checkpointing and write ahead logs. You can configure a query with a checkpoint location, and the query will save all the progress information (i.e. range of offsets processed in each trigger) and the running aggregates (e.g. word counts in the [quick example](#quick-example)) to the checkpoint location. This checkpoint location has to be a path in an HDFS compatible file system, and can be set as an option in the DataStreamWriter when [starting a query](#starting-streaming-queries). 
+In case of a failure or intentional shutdown, you can recover the previous progress and state of a previous query, and continue where it left off. This is done using checkpointing and write ahead logs. You can configure a query with a checkpoint location, and the query will save all the progress information (i.e. range of offsets processed in each trigger) and the running aggregates (e.g. word counts in the [quick example](#quick-example)) to the checkpoint location. This checkpoint location has to be a path in an HDFS compatible file system, and can be set as an option in the DataStreamWriter when [starting a query](#starting-streaming-queries).
 
 <div class="codetabs">
 <div data-lang="scala"  markdown="1">
@@ -1745,20 +1956,18 @@ aggDF \
     .start()
 {% endhighlight %}
 
+</div>
+<div data-lang="r"  markdown="1">
+
+{% highlight r %}
+write.stream(aggDF, "memory", outputMode = "complete", checkpointLocation = "path/to/HDFS/dir")
+{% endhighlight %}
+
 </div>
 </div>
 
 # Where to go from here
-- Examples: See and run the 
-[Scala]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/scala/org/apache/spark/examples/sql/streaming)/[Java]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/java/org/apache/spark/examples/sql/streaming)/[Python]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/python/sql/streaming) 
+- Examples: See and run the
+[Scala]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/scala/org/apache/spark/examples/sql/streaming)/[Java]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/java/org/apache/spark/examples/sql/streaming)/[Python]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/python/sql/streaming)/[R]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/r/streaming)
 examples.
 - Spark Summit 2016 Talk - [A Deep Dive into Structured Streaming](https://spark-summit.org/2016/events/a-deep-dive-into-structured-streaming/)
-
-
-
-
-
-
-
-
-
diff --git a/examples/src/main/r/streaming/structured_network_wordcount.R b/examples/src/main/r/streaming/structured_network_wordcount.R
new file mode 100644
index 0000000000000..cda18ebc072ee
--- /dev/null
+++ b/examples/src/main/r/streaming/structured_network_wordcount.R
@@ -0,0 +1,57 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Counts words in UTF8 encoded, '\n' delimited text received from the network.
+
+# To run this on your local machine, you need to first run a Netcat server
+# $ nc -lk 9999
+# and then run the example
+# ./bin/spark-submit examples/src/main/r/streaming/structured_network_wordcount.R localhost 9999
+
+# Load SparkR library into your R session
+library(SparkR)
+
+# Initialize SparkSession
+sparkR.session(appName = "SparkR-Streaming-structured-network-wordcount-example")
+
+args <- commandArgs(trailing = TRUE)
+
+if (length(args) != 2) {
+  print("Usage: structured_network_wordcount.R <hostname> <port>")
+  print("<hostname> and <port> describe the TCP server that Structured Streaming")
+  print("would connect to receive data.")
+  q("no")
+}
+
+hostname <- args[[1]]
+port <- as.integer(args[[2]])
+
+# Create DataFrame representing the stream of input lines from connection to localhost:9999
+lines <- read.stream("socket", host = hostname, port = port)
+
+# Split the lines into words
+words <- selectExpr(lines, "explode(split(value, ' ')) as word")
+
+# Generate running word count
+wordCounts <- count(groupBy(words, "word"))
+
+# Start running the query that prints the running counts to the console
+query <- write.stream(wordCounts, "console", outputMode = "complete")
+
+awaitTermination(query)
+
+sparkR.session.stop()

From 3f5c548128c17d058b5ab2142938f6d03b38e0b1 Mon Sep 17 00:00:00 2001
From: zero323 <zero323@users.noreply.github.com>
Date: Thu, 4 May 2017 01:41:36 -0700
Subject: [PATCH 0772/1204] [SPARK-20585][SPARKR] R generic hint support

Adds support for generic hints on `SparkDataFrame`

Unit tests, `check-cran.sh`

Author: zero323 <zero323@users.noreply.github.com>

Closes #17851 from zero323/SPARK-20585.

(cherry picked from commit 9c36aa27919fb7625e388f5c3c90af62ef902b24)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/NAMESPACE                           |  1 +
 R/pkg/R/DataFrame.R                       | 30 +++++++++++++++++++++++
 R/pkg/R/generics.R                        |  4 +++
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 12 +++++++++
 4 files changed, 47 insertions(+)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index ca45c6f9b0a96..44e39c4abb472 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -122,6 +122,7 @@ exportMethods("arrange",
               "group_by",
               "groupBy",
               "head",
+              "hint",
               "insertInto",
               "intersect",
               "isLocal",
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 88a138fd8eb1f..a7b1e3b6ae32e 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -3642,3 +3642,33 @@ setMethod("checkpoint",
             df <- callJMethod(x@sdf, "checkpoint", as.logical(eager))
             dataFrame(df)
           })
+
+#' hint
+#'
+#' Specifies execution plan hint and return a new SparkDataFrame.
+#'
+#' @param x a SparkDataFrame.
+#' @param name a name of the hint.
+#' @param ... optional parameters for the hint.
+#' @return A SparkDataFrame.
+#' @family SparkDataFrame functions
+#' @aliases hint,SparkDataFrame,character-method
+#' @rdname hint
+#' @name hint
+#' @export
+#' @examples
+#' \dontrun{
+#' df <- createDataFrame(mtcars)
+#' avg_mpg <- mean(groupBy(createDataFrame(mtcars), "cyl"), "mpg")
+#'
+#' head(join(df, hint(avg_mpg, "broadcast"), df$cyl == avg_mpg$cyl))
+#' }
+#' @note hint since 2.2.0
+setMethod("hint",
+          signature(x = "SparkDataFrame", name = "character"),
+          function(x, name, ...) {
+            parameters <- list(...)
+            stopifnot(all(sapply(parameters, is.character)))
+            jdf <- callJMethod(x@sdf, "hint", name, parameters)
+            dataFrame(jdf)
+          })
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index b23abe65f9507..f8ae5526bc72a 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -572,6 +572,10 @@ setGeneric("group_by", function(x, ...) { standardGeneric("group_by") })
 #' @export
 setGeneric("groupBy", function(x, ...) { standardGeneric("groupBy") })
 
+#' @rdname hint
+#' @export
+setGeneric("hint", function(x, name, ...) { standardGeneric("hint") })
+
 #' @rdname insertInto
 #' @export
 setGeneric("insertInto", function(x, tableName, ...) { standardGeneric("insertInto") })
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 18ff585fed566..de36d5cc5b084 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1926,6 +1926,18 @@ test_that("join(), crossJoin() and merge() on a DataFrame", {
 
   unlink(jsonPath2)
   unlink(jsonPath3)
+
+  # Join with broadcast hint
+  df1 <- sql("SELECT * FROM range(10e10)")
+  df2 <- sql("SELECT * FROM range(10e10)")
+
+  execution_plan <- capture.output(explain(join(df1, df2, df1$id == df2$id)))
+  expect_false(any(grepl("BroadcastHashJoin", execution_plan)))
+
+  execution_plan_hint <- capture.output(
+    explain(join(df1, hint(df2, "broadcast"), df1$id == df2$id))
+  )
+  expect_true(any(grepl("BroadcastHashJoin", execution_plan_hint)))
 })
 
 test_that("toJSON() on DataFrame", {

From b6727795fea67264608a72febfc32f913cdb9d7c Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Thu, 4 May 2017 01:54:59 -0700
Subject: [PATCH 0773/1204] [SPARK-20571][SPARKR][SS] Flaky Structured
 Streaming tests

## What changes were proposed in this pull request?

Make tests more reliable by having it till processed.
Increasing timeout value might help but ultimately the flakiness from processing delay when Jenkins is hard to account for. This isn't an actual public API supported

## How was this patch tested?
unit tests

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #17857 from felixcheung/rsstestrelia.

(cherry picked from commit 57b64703e66ec8490d8d9dbf6beebc160a61ec29)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/inst/tests/testthat/test_streaming.R | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/R/pkg/inst/tests/testthat/test_streaming.R b/R/pkg/inst/tests/testthat/test_streaming.R
index 8843991024308..91df7ac6f9849 100644
--- a/R/pkg/inst/tests/testthat/test_streaming.R
+++ b/R/pkg/inst/tests/testthat/test_streaming.R
@@ -55,10 +55,12 @@ test_that("read.stream, write.stream, awaitTermination, stopQuery", {
   q <- write.stream(counts, "memory", queryName = "people", outputMode = "complete")
 
   expect_false(awaitTermination(q, 5 * 1000))
+  callJMethod(q@ssq, "processAllAvailable")
   expect_equal(head(sql("SELECT count(*) FROM people"))[[1]], 3)
 
   writeLines(mockLinesNa, jsonPathNa)
   awaitTermination(q, 5 * 1000)
+  callJMethod(q@ssq, "processAllAvailable")
   expect_equal(head(sql("SELECT count(*) FROM people"))[[1]], 6)
 
   stopQuery(q)
@@ -75,6 +77,7 @@ test_that("print from explain, lastProgress, status, isActive", {
   q <- write.stream(counts, "memory", queryName = "people2", outputMode = "complete")
 
   awaitTermination(q, 5 * 1000)
+  callJMethod(q@ssq, "processAllAvailable")
 
   expect_equal(capture.output(explain(q))[[1]], "== Physical Plan ==")
   expect_true(any(grepl("\"description\" : \"MemorySink\"", capture.output(lastProgress(q)))))
@@ -99,6 +102,7 @@ test_that("Stream other format", {
   q <- write.stream(counts, "memory", queryName = "people3", outputMode = "complete")
 
   expect_false(awaitTermination(q, 5 * 1000))
+  callJMethod(q@ssq, "processAllAvailable")
   expect_equal(head(sql("SELECT count(*) FROM people3"))[[1]], 3)
 
   expect_equal(queryName(q), "people3")

From 425ed26d2a0f6d3308bdb4fcbf0cedc6ef12612e Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Thu, 4 May 2017 17:56:43 +0800
Subject: [PATCH 0774/1204] [SPARK-20047][FOLLOWUP][ML] Constrained Logistic
 Regression follow up

## What changes were proposed in this pull request?
Address some minor comments for #17715:
* Put bound-constrained optimization params under expertParams.
* Update some docs.

## How was this patch tested?
Existing tests.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #17829 from yanboliang/spark-20047-followup.

(cherry picked from commit c5dceb8c65545169bc96628140b5acdaa85dd226)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 .../classification/LogisticRegression.scala   | 54 ++++++++++++-------
 1 file changed, 35 insertions(+), 19 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index d7dde329ed004..42dc7fbebe4c3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -183,14 +183,15 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
    * The bound matrix must be compatible with the shape (1, number of features) for binomial
    * regression, or (number of classes, number of features) for multinomial regression.
    * Otherwise, it throws exception.
+   * Default is none.
    *
-   * @group param
+   * @group expertParam
    */
   @Since("2.2.0")
   val lowerBoundsOnCoefficients: Param[Matrix] = new Param(this, "lowerBoundsOnCoefficients",
     "The lower bounds on coefficients if fitting under bound constrained optimization.")
 
-  /** @group getParam */
+  /** @group expertGetParam */
   @Since("2.2.0")
   def getLowerBoundsOnCoefficients: Matrix = $(lowerBoundsOnCoefficients)
 
@@ -199,14 +200,15 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
    * The bound matrix must be compatible with the shape (1, number of features) for binomial
    * regression, or (number of classes, number of features) for multinomial regression.
    * Otherwise, it throws exception.
+   * Default is none.
    *
-   * @group param
+   * @group expertParam
    */
   @Since("2.2.0")
   val upperBoundsOnCoefficients: Param[Matrix] = new Param(this, "upperBoundsOnCoefficients",
     "The upper bounds on coefficients if fitting under bound constrained optimization.")
 
-  /** @group getParam */
+  /** @group expertGetParam */
   @Since("2.2.0")
   def getUpperBoundsOnCoefficients: Matrix = $(upperBoundsOnCoefficients)
 
@@ -214,14 +216,15 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
    * The lower bounds on intercepts if fitting under bound constrained optimization.
    * The bounds vector size must be equal with 1 for binomial regression, or the number
    * of classes for multinomial regression. Otherwise, it throws exception.
+   * Default is none.
    *
-   * @group param
+   * @group expertParam
    */
   @Since("2.2.0")
   val lowerBoundsOnIntercepts: Param[Vector] = new Param(this, "lowerBoundsOnIntercepts",
     "The lower bounds on intercepts if fitting under bound constrained optimization.")
 
-  /** @group getParam */
+  /** @group expertGetParam */
   @Since("2.2.0")
   def getLowerBoundsOnIntercepts: Vector = $(lowerBoundsOnIntercepts)
 
@@ -229,14 +232,15 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
    * The upper bounds on intercepts if fitting under bound constrained optimization.
    * The bound vector size must be equal with 1 for binomial regression, or the number
    * of classes for multinomial regression. Otherwise, it throws exception.
+   * Default is none.
    *
-   * @group param
+   * @group expertParam
    */
   @Since("2.2.0")
   val upperBoundsOnIntercepts: Param[Vector] = new Param(this, "upperBoundsOnIntercepts",
     "The upper bounds on intercepts if fitting under bound constrained optimization.")
 
-  /** @group getParam */
+  /** @group expertGetParam */
   @Since("2.2.0")
   def getUpperBoundsOnIntercepts: Vector = $(upperBoundsOnIntercepts)
 
@@ -256,7 +260,7 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
     }
     if (!$(fitIntercept)) {
       require(!isSet(lowerBoundsOnIntercepts) && !isSet(upperBoundsOnIntercepts),
-        "Pls don't set bounds on intercepts if fitting without intercept.")
+        "Please don't set bounds on intercepts if fitting without intercept.")
     }
     super.validateAndTransformSchema(schema, fitting, featuresDataType)
   }
@@ -393,7 +397,7 @@ class LogisticRegression @Since("1.2.0") (
   /**
    * Set the lower bounds on coefficients if fitting under bound constrained optimization.
    *
-   * @group setParam
+   * @group expertSetParam
    */
   @Since("2.2.0")
   def setLowerBoundsOnCoefficients(value: Matrix): this.type = set(lowerBoundsOnCoefficients, value)
@@ -401,7 +405,7 @@ class LogisticRegression @Since("1.2.0") (
   /**
    * Set the upper bounds on coefficients if fitting under bound constrained optimization.
    *
-   * @group setParam
+   * @group expertSetParam
    */
   @Since("2.2.0")
   def setUpperBoundsOnCoefficients(value: Matrix): this.type = set(upperBoundsOnCoefficients, value)
@@ -409,7 +413,7 @@ class LogisticRegression @Since("1.2.0") (
   /**
    * Set the lower bounds on intercepts if fitting under bound constrained optimization.
    *
-   * @group setParam
+   * @group expertSetParam
    */
   @Since("2.2.0")
   def setLowerBoundsOnIntercepts(value: Vector): this.type = set(lowerBoundsOnIntercepts, value)
@@ -417,7 +421,7 @@ class LogisticRegression @Since("1.2.0") (
   /**
    * Set the upper bounds on intercepts if fitting under bound constrained optimization.
    *
-   * @group setParam
+   * @group expertSetParam
    */
   @Since("2.2.0")
   def setUpperBoundsOnIntercepts(value: Vector): this.type = set(upperBoundsOnIntercepts, value)
@@ -427,28 +431,40 @@ class LogisticRegression @Since("1.2.0") (
       numFeatures: Int): Unit = {
     if (isSet(lowerBoundsOnCoefficients)) {
       require($(lowerBoundsOnCoefficients).numRows == numCoefficientSets &&
-        $(lowerBoundsOnCoefficients).numCols == numFeatures)
+        $(lowerBoundsOnCoefficients).numCols == numFeatures,
+        "The shape of LowerBoundsOnCoefficients must be compatible with (1, number of features) " +
+          "for binomial regression, or (number of classes, number of features) for multinomial " +
+          "regression, but found: " +
+          s"(${getLowerBoundsOnCoefficients.numRows}, ${getLowerBoundsOnCoefficients.numCols}).")
     }
     if (isSet(upperBoundsOnCoefficients)) {
       require($(upperBoundsOnCoefficients).numRows == numCoefficientSets &&
-        $(upperBoundsOnCoefficients).numCols == numFeatures)
+        $(upperBoundsOnCoefficients).numCols == numFeatures,
+        "The shape of upperBoundsOnCoefficients must be compatible with (1, number of features) " +
+          "for binomial regression, or (number of classes, number of features) for multinomial " +
+          "regression, but found: " +
+          s"(${getUpperBoundsOnCoefficients.numRows}, ${getUpperBoundsOnCoefficients.numCols}).")
     }
     if (isSet(lowerBoundsOnIntercepts)) {
-      require($(lowerBoundsOnIntercepts).size == numCoefficientSets)
+      require($(lowerBoundsOnIntercepts).size == numCoefficientSets, "The size of " +
+        "lowerBoundsOnIntercepts must be equal with 1 for binomial regression, or the number of " +
+        s"classes for multinomial regression, but found: ${getLowerBoundsOnIntercepts.size}.")
     }
     if (isSet(upperBoundsOnIntercepts)) {
-      require($(upperBoundsOnIntercepts).size == numCoefficientSets)
+      require($(upperBoundsOnIntercepts).size == numCoefficientSets, "The size of " +
+        "upperBoundsOnIntercepts must be equal with 1 for binomial regression, or the number of " +
+        s"classes for multinomial regression, but found: ${getUpperBoundsOnIntercepts.size}.")
     }
     if (isSet(lowerBoundsOnCoefficients) && isSet(upperBoundsOnCoefficients)) {
       require($(lowerBoundsOnCoefficients).toArray.zip($(upperBoundsOnCoefficients).toArray)
-        .forall(x => x._1 <= x._2), "LowerBoundsOnCoefficients should always " +
+        .forall(x => x._1 <= x._2), "LowerBoundsOnCoefficients should always be " +
         "less than or equal to upperBoundsOnCoefficients, but found: " +
         s"lowerBoundsOnCoefficients = $getLowerBoundsOnCoefficients, " +
         s"upperBoundsOnCoefficients = $getUpperBoundsOnCoefficients.")
     }
     if (isSet(lowerBoundsOnIntercepts) && isSet(upperBoundsOnIntercepts)) {
       require($(lowerBoundsOnIntercepts).toArray.zip($(upperBoundsOnIntercepts).toArray)
-        .forall(x => x._1 <= x._2), "LowerBoundsOnIntercepts should always " +
+        .forall(x => x._1 <= x._2), "LowerBoundsOnIntercepts should always be " +
         "less than or equal to upperBoundsOnIntercepts, but found: " +
         s"lowerBoundsOnIntercepts = $getLowerBoundsOnIntercepts, " +
         s"upperBoundsOnIntercepts = $getUpperBoundsOnIntercepts.")

From c8756288de12cfd9528d8d3ff73ff600909d657a Mon Sep 17 00:00:00 2001
From: Wayne Zhang <actuaryzhang@uber.com>
Date: Fri, 5 May 2017 10:23:58 +0800
Subject: [PATCH 0775/1204] [SPARK-20574][ML] Allow Bucketizer to handle
 non-Double numeric column

## What changes were proposed in this pull request?
Bucketizer currently requires input column to be Double, but the logic should work on any numeric data types. Many practical problems have integer/float data types, and it could get very tedious to manually cast them into Double before calling bucketizer. This PR extends bucketizer to handle all numeric types.

## How was this patch tested?
New test.

Author: Wayne Zhang <actuaryzhang@uber.com>

Closes #17840 from actuaryzhang/bucketizer.

(cherry picked from commit 0d16faab90e4cd1f73c5b749dbda7bc2a400b26f)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 .../apache/spark/ml/feature/Bucketizer.scala  |  4 +--
 .../spark/ml/feature/BucketizerSuite.scala    | 25 +++++++++++++++++++
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
index d1f3b2af1e482..bb8f2a3aa5f71 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
@@ -116,7 +116,7 @@ final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String
       Bucketizer.binarySearchForBuckets($(splits), feature, keepInvalid)
     }
 
-    val newCol = bucketizer(filteredDataset($(inputCol)))
+    val newCol = bucketizer(filteredDataset($(inputCol)).cast(DoubleType))
     val newField = prepOutputField(filteredDataset.schema)
     filteredDataset.withColumn($(outputCol), newCol, newField.metadata)
   }
@@ -130,7 +130,7 @@ final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String
 
   @Since("1.4.0")
   override def transformSchema(schema: StructType): StructType = {
-    SchemaUtils.checkColumnType(schema, $(inputCol), DoubleType)
+    SchemaUtils.checkNumericType(schema, $(inputCol))
     SchemaUtils.appendColumn(schema, prepOutputField(schema))
   }
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
index aac29137d7911..420fb17ddce8c 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
@@ -26,6 +26,8 @@ import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
 import org.apache.spark.ml.util.TestingUtils._
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.sql.{DataFrame, Row}
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.types._
 
 class BucketizerSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
 
@@ -162,6 +164,29 @@ class BucketizerSuite extends SparkFunSuite with MLlibTestSparkContext with Defa
       .setSplits(Array(0.1, 0.8, 0.9))
     testDefaultReadWrite(t)
   }
+
+  test("Bucket numeric features") {
+    val splits = Array(-3.0, 0.0, 3.0)
+    val data = Array(-2.0, -1.0, 0.0, 1.0, 2.0)
+    val expectedBuckets = Array(0.0, 0.0, 1.0, 1.0, 1.0)
+    val dataFrame: DataFrame = data.zip(expectedBuckets).toSeq.toDF("feature", "expected")
+
+    val bucketizer: Bucketizer = new Bucketizer()
+      .setInputCol("feature")
+      .setOutputCol("result")
+      .setSplits(splits)
+
+    val types = Seq(ShortType, IntegerType, LongType, FloatType, DoubleType,
+      ByteType, DecimalType(10, 0))
+    for (mType <- types) {
+      val df = dataFrame.withColumn("feature", col("feature").cast(mType))
+      bucketizer.transform(df).select("result", "expected").collect().foreach {
+        case Row(x: Double, y: Double) =>
+          assert(x === y, "The result is not correct after bucketing in type " +
+            mType.toString + ". " + s"Expected $y but found $x.")
+      }
+    }
+  }
 }
 
 private object BucketizerSuite extends SparkFunSuite {

From 7cb566abc27d41d5816dee16c6ecb749da2adf46 Mon Sep 17 00:00:00 2001
From: Yuming Wang <wgyumg@gmail.com>
Date: Fri, 5 May 2017 11:31:59 +0100
Subject: [PATCH 0776/1204] [SPARK-19660][SQL] Replace the deprecated property
 name fs.default.name to fs.defaultFS that newly introduced

## What changes were proposed in this pull request?

Replace the deprecated property name `fs.default.name` to `fs.defaultFS` that newly introduced.

## How was this patch tested?

Existing tests

Author: Yuming Wang <wgyumg@gmail.com>

Closes #17856 from wangyum/SPARK-19660.

(cherry picked from commit 37cdf077cd3f436f777562df311e3827b0727ce7)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../spark/sql/execution/streaming/state/StateStoreSuite.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
index ebb7422765ebb..cc09b2d5b7763 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
@@ -314,7 +314,7 @@ class StateStoreSuite extends SparkFunSuite with BeforeAndAfter with PrivateMeth
   test("SPARK-19677: Committing a delta file atop an existing one should not fail on HDFS") {
     val conf = new Configuration()
     conf.set("fs.fake.impl", classOf[RenameLikeHDFSFileSystem].getName)
-    conf.set("fs.default.name", "fake:///")
+    conf.set("fs.defaultFS", "fake:///")
 
     val provider = newStoreProvider(hadoopConf = conf)
     provider.getStore(0).commit()

From dbb54a7b39568cc9e8046a86113b98c3c69b7d11 Mon Sep 17 00:00:00 2001
From: jyu00 <jessieyu@us.ibm.com>
Date: Fri, 5 May 2017 11:36:51 +0100
Subject: [PATCH 0777/1204] [SPARK-20546][DEPLOY] spark-class gets syntax error
 in posix mode

## What changes were proposed in this pull request?

Updated spark-class to turn off posix mode so the process substitution doesn't cause a syntax error.

## How was this patch tested?

Existing unit tests, manual spark-shell testing with posix mode on

Author: jyu00 <jessieyu@us.ibm.com>

Closes #17852 from jyu00/master.

(cherry picked from commit 5773ab121d5d7cbefeef17ff4ac6f8af36cc1251)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 bin/spark-class | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bin/spark-class b/bin/spark-class
index 77ea40cc37946..65d3b9612909a 100755
--- a/bin/spark-class
+++ b/bin/spark-class
@@ -72,6 +72,8 @@ build_command() {
   printf "%d\0" $?
 }
 
+# Turn off posix mode since it does not allow process substitution
+set +o posix
 CMD=()
 while IFS= read -d '' -r ARG; do
   CMD+=("$ARG")

From 179f5370e68aa3c1f035f8ac400129c3935e96f8 Mon Sep 17 00:00:00 2001
From: jyu00 <jessieyu@us.ibm.com>
Date: Fri, 5 May 2017 11:36:51 +0100
Subject: [PATCH 0778/1204] [SPARK-20546][DEPLOY] spark-class gets syntax error
 in posix mode

## What changes were proposed in this pull request?

Updated spark-class to turn off posix mode so the process substitution doesn't cause a syntax error.

## How was this patch tested?

Existing unit tests, manual spark-shell testing with posix mode on

Author: jyu00 <jessieyu@us.ibm.com>

Closes #17852 from jyu00/master.

(cherry picked from commit 5773ab121d5d7cbefeef17ff4ac6f8af36cc1251)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 bin/spark-class | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bin/spark-class b/bin/spark-class
index 77ea40cc37946..65d3b9612909a 100755
--- a/bin/spark-class
+++ b/bin/spark-class
@@ -72,6 +72,8 @@ build_command() {
   printf "%d\0" $?
 }
 
+# Turn off posix mode since it does not allow process substitution
+set +o posix
 CMD=()
 while IFS= read -d '' -r ARG; do
   CMD+=("$ARG")

From 1fa3c86a740e072957a2104dbd02ca3c158c508d Mon Sep 17 00:00:00 2001
From: Jarrett Meyer <jarrettmeyer@gmail.com>
Date: Fri, 5 May 2017 08:30:42 -0700
Subject: [PATCH 0779/1204] [SPARK-20613] Remove excess quotes in Windows
 executable

## What changes were proposed in this pull request?

Quotes are already added to the RUNNER variable on line 54. There is no need to put quotes on line 67. If you do, you will get an error when launching Spark.

'""C:\Program' is not recognized as an internal or external command, operable program or batch file.

## How was this patch tested?

Tested manually on Windows 10.

Author: Jarrett Meyer <jarrettmeyer@gmail.com>

Closes #17861 from jarrettmeyer/fix-windows-cmd.

(cherry picked from commit b9ad2d1916af5091c8585d06ccad8219e437e2bc)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 bin/spark-class2.cmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/spark-class2.cmd b/bin/spark-class2.cmd
index 9faa7d65f83e4..f6157f42843e8 100644
--- a/bin/spark-class2.cmd
+++ b/bin/spark-class2.cmd
@@ -51,7 +51,7 @@ if not "x%SPARK_PREPEND_CLASSES%"=="x" (
 rem Figure out where java is.
 set RUNNER=java
 if not "x%JAVA_HOME%"=="x" (
-  set RUNNER="%JAVA_HOME%\bin\java"
+  set RUNNER=%JAVA_HOME%\bin\java
 ) else (
   where /q "%RUNNER%"
   if ERRORLEVEL 1 (

From 2a7f5dae5906f01973d2744ab7b59b54e42f302b Mon Sep 17 00:00:00 2001
From: Jarrett Meyer <jarrettmeyer@gmail.com>
Date: Fri, 5 May 2017 08:30:42 -0700
Subject: [PATCH 0780/1204] [SPARK-20613] Remove excess quotes in Windows
 executable

## What changes were proposed in this pull request?

Quotes are already added to the RUNNER variable on line 54. There is no need to put quotes on line 67. If you do, you will get an error when launching Spark.

'""C:\Program' is not recognized as an internal or external command, operable program or batch file.

## How was this patch tested?

Tested manually on Windows 10.

Author: Jarrett Meyer <jarrettmeyer@gmail.com>

Closes #17861 from jarrettmeyer/fix-windows-cmd.

(cherry picked from commit b9ad2d1916af5091c8585d06ccad8219e437e2bc)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 bin/spark-class2.cmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/spark-class2.cmd b/bin/spark-class2.cmd
index 9faa7d65f83e4..f6157f42843e8 100644
--- a/bin/spark-class2.cmd
+++ b/bin/spark-class2.cmd
@@ -51,7 +51,7 @@ if not "x%SPARK_PREPEND_CLASSES%"=="x" (
 rem Figure out where java is.
 set RUNNER=java
 if not "x%JAVA_HOME%"=="x" (
-  set RUNNER="%JAVA_HOME%\bin\java"
+  set RUNNER=%JAVA_HOME%\bin\java
 ) else (
   where /q "%RUNNER%"
   if ERRORLEVEL 1 (

From f71aea6a0be6eda24623d8563d971687ecd04caf Mon Sep 17 00:00:00 2001
From: Yucai <yucai.yu@intel.com>
Date: Fri, 5 May 2017 09:51:57 -0700
Subject: [PATCH 0781/1204] [SPARK-20381][SQL] Add SQL metrics of numOutputRows
 for ObjectHashAggregateExec

## What changes were proposed in this pull request?

ObjectHashAggregateExec is missing numOutputRows, add this metrics for it.

## How was this patch tested?

Added unit tests for the new metrics.

Author: Yucai <yucai.yu@intel.com>

Closes #17678 from yucai/objectAgg_numOutputRows.

(cherry picked from commit 41439fd52dd263b9f7d92e608f027f193f461777)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../aggregate/ObjectAggregationIterator.scala  |  8 ++++++--
 .../aggregate/ObjectHashAggregateExec.scala    |  3 ++-
 .../sql/execution/metric/SQLMetricsSuite.scala | 18 ++++++++++++++++++
 3 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationIterator.scala
index 3a7fcf1fa9d89..6e47f9d611199 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationIterator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationIterator.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.expressions.codegen.{BaseOrdering, GenerateOrdering}
 import org.apache.spark.sql.execution.UnsafeKVExternalSorter
+import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.unsafe.KVIterator
@@ -39,7 +40,8 @@ class ObjectAggregationIterator(
     newMutableProjection: (Seq[Expression], Seq[Attribute]) => MutableProjection,
     originalInputAttributes: Seq[Attribute],
     inputRows: Iterator[InternalRow],
-    fallbackCountThreshold: Int)
+    fallbackCountThreshold: Int,
+    numOutputRows: SQLMetric)
   extends AggregationIterator(
     groupingExpressions,
     originalInputAttributes,
@@ -83,7 +85,9 @@ class ObjectAggregationIterator(
 
   override final def next(): UnsafeRow = {
     val entry = aggBufferIterator.next()
-    generateOutput(entry.groupingKey, entry.aggregationBuffer)
+    val res = generateOutput(entry.groupingKey, entry.aggregationBuffer)
+    numOutputRows += 1
+    res
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala
index 3fcb7ec9a6411..b53521b1b6ba2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala
@@ -117,7 +117,8 @@ case class ObjectHashAggregateExec(
               newMutableProjection(expressions, inputSchema, subexpressionEliminationEnabled),
             child.output,
             iter,
-            fallbackCountThreshold)
+            fallbackCountThreshold,
+            numOutputRows)
         if (!hasInput && groupingExpressions.isEmpty) {
           numOutputRows += 1
           Iterator.single[UnsafeRow](aggregationIterator.outputForEmptyGroupingKeyWithoutInput())
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index 2ce7db6a22c01..e544245588f46 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -143,6 +143,24 @@ class SQLMetricsSuite extends SparkFunSuite with SharedSQLContext {
     )
   }
 
+  test("ObjectHashAggregate metrics") {
+    // Assume the execution plan is
+    // ... -> ObjectHashAggregate(nodeId = 2) -> Exchange(nodeId = 1)
+    // -> ObjectHashAggregate(nodeId = 0)
+    val df = testData2.groupBy().agg(collect_set('a)) // 2 partitions
+    testSparkPlanMetrics(df, 1, Map(
+      2L -> ("ObjectHashAggregate", Map("number of output rows" -> 2L)),
+      0L -> ("ObjectHashAggregate", Map("number of output rows" -> 1L)))
+    )
+
+    // 2 partitions and each partition contains 2 keys
+    val df2 = testData2.groupBy('a).agg(collect_set('a))
+    testSparkPlanMetrics(df2, 1, Map(
+      2L -> ("ObjectHashAggregate", Map("number of output rows" -> 4L)),
+      0L -> ("ObjectHashAggregate", Map("number of output rows" -> 3L)))
+    )
+  }
+
   test("Sort metrics") {
     // Assume the execution plan is
     // WholeStageCodegen(nodeId = 0, Range(nodeId = 2) -> Sort(nodeId = 1))

From 24fffacad709c553e0f24ae12a8cca3ab980af3c Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Fri, 5 May 2017 11:08:26 -0700
Subject: [PATCH 0782/1204] [SPARK-20603][SS][TEST] Set default number of topic
 partitions to 1 to reduce the load

## What changes were proposed in this pull request?

I checked the logs of https://amplab.cs.berkeley.edu/jenkins/job/spark-branch-2.2-test-maven-hadoop-2.7/47/ and found it took several seconds to create Kafka internal topic `__consumer_offsets`. As Kafka creates this topic lazily, the topic creation happens in the first test `deserialization of initial offset with Spark 2.1.0` and causes it timeout.

This PR changes `offsets.topic.num.partitions` from the default value 50 to 1 to make creating `__consumer_offsets` (50 partitions -> 1 partition) much faster.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #17863 from zsxwing/fix-kafka-flaky-test.

(cherry picked from commit bd5788287957d8610a6d19c273b75bd4cdd2d166)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala     | 1 +
 1 file changed, 1 insertion(+)

diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
index 2ce2760b7f463..f86b8f586d2a0 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
@@ -292,6 +292,7 @@ class KafkaTestUtils(withBrokerProps: Map[String, Object] = Map.empty) extends L
     props.put("log.flush.interval.messages", "1")
     props.put("replica.socket.timeout.ms", "1500")
     props.put("delete.topic.enable", "true")
+    props.put("offsets.topic.num.partitions", "1")
     props.putAll(withBrokerProps.asJava)
     props
   }

From 704b249b6a3ea956086d6c6ef50da18e8228eeb4 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Fri, 5 May 2017 11:08:26 -0700
Subject: [PATCH 0783/1204] [SPARK-20603][SS][TEST] Set default number of topic
 partitions to 1 to reduce the load

## What changes were proposed in this pull request?

I checked the logs of https://amplab.cs.berkeley.edu/jenkins/job/spark-branch-2.2-test-maven-hadoop-2.7/47/ and found it took several seconds to create Kafka internal topic `__consumer_offsets`. As Kafka creates this topic lazily, the topic creation happens in the first test `deserialization of initial offset with Spark 2.1.0` and causes it timeout.

This PR changes `offsets.topic.num.partitions` from the default value 50 to 1 to make creating `__consumer_offsets` (50 partitions -> 1 partition) much faster.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #17863 from zsxwing/fix-kafka-flaky-test.

(cherry picked from commit bd5788287957d8610a6d19c273b75bd4cdd2d166)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala     | 1 +
 1 file changed, 1 insertion(+)

diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
index c2cbd86260bc5..4345f882130a5 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
@@ -281,6 +281,7 @@ class KafkaTestUtils(withBrokerProps: Map[String, Object] = Map.empty) extends L
     props.put("log.flush.interval.messages", "1")
     props.put("replica.socket.timeout.ms", "1500")
     props.put("delete.topic.enable", "true")
+    props.put("offsets.topic.num.partitions", "1")
     props.putAll(withBrokerProps.asJava)
     props
   }

From f59c74a9460b0db4e6c3ecbe872e2eaadc43e2cc Mon Sep 17 00:00:00 2001
From: Michael Patterson <map222@gmail.com>
Date: Sat, 22 Apr 2017 19:58:54 -0700
Subject: [PATCH 0784/1204] [SPARK-20132][DOCS] Add documentation for column
 string functions

## What changes were proposed in this pull request?
Add docstrings to column.py for the Column functions `rlike`, `like`, `startswith`, and `endswith`. Pass these docstrings through `_bin_op`

There may be a better place to put the docstrings. I put them immediately above the Column class.

## How was this patch tested?

I ran `make html` on my local computer to remake the documentation, and verified that the html pages were displaying the docstrings correctly. I tried running `dev-tests`, and the formatting tests passed. However, my mvn build didn't work I think due to issues on my computer.

These docstrings are my original work and free license.

davies has done the most recent work reorganizing `_bin_op`

Author: Michael Patterson <map222@gmail.com>

Closes #17469 from map222/patterson-documentation.
---
 python/pyspark/sql/column.py | 70 ++++++++++++++++++++++++++++++++----
 1 file changed, 64 insertions(+), 6 deletions(-)

diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index ec05c18d4f062..46c1707cb6c37 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -250,11 +250,50 @@ def __iter__(self):
         raise TypeError("Column is not iterable")
 
     # string methods
+    _rlike_doc = """
+    Return a Boolean :class:`Column` based on a regex match.
+
+    :param other: an extended regex expression
+
+    >>> df.filter(df.name.rlike('ice$')).collect()
+    [Row(age=2, name=u'Alice')]
+    """
+    _like_doc = """
+    Return a Boolean :class:`Column` based on a SQL LIKE match.
+
+    :param other: a SQL LIKE pattern
+
+    See :func:`rlike` for a regex version
+
+    >>> df.filter(df.name.like('Al%')).collect()
+    [Row(age=2, name=u'Alice')]
+    """
+    _startswith_doc = """
+    Return a Boolean :class:`Column` based on a string match.
+
+    :param other: string at end of line (do not use a regex `^`)
+
+    >>> df.filter(df.name.startswith('Al')).collect()
+    [Row(age=2, name=u'Alice')]
+    >>> df.filter(df.name.startswith('^Al')).collect()
+    []
+    """
+    _endswith_doc = """
+    Return a Boolean :class:`Column` based on matching end of string.
+
+    :param other: string at end of line (do not use a regex `$`)
+
+    >>> df.filter(df.name.endswith('ice')).collect()
+    [Row(age=2, name=u'Alice')]
+    >>> df.filter(df.name.endswith('ice$')).collect()
+    []
+    """
+
     contains = _bin_op("contains")
-    rlike = _bin_op("rlike")
-    like = _bin_op("like")
-    startswith = _bin_op("startsWith")
-    endswith = _bin_op("endsWith")
+    rlike = ignore_unicode_prefix(_bin_op("rlike", _rlike_doc))
+    like = ignore_unicode_prefix(_bin_op("like", _like_doc))
+    startswith = ignore_unicode_prefix(_bin_op("startsWith", _startswith_doc))
+    endswith = ignore_unicode_prefix(_bin_op("endsWith", _endswith_doc))
 
     @ignore_unicode_prefix
     @since(1.3)
@@ -303,8 +342,27 @@ def isin(self, *cols):
     desc = _unary_op("desc", "Returns a sort expression based on the"
                              " descending order of the given column name.")
 
-    isNull = _unary_op("isNull", "True if the current expression is null.")
-    isNotNull = _unary_op("isNotNull", "True if the current expression is not null.")
+    _isNull_doc = """
+    True if the current expression is null. Often combined with
+    :func:`DataFrame.filter` to select rows with null values.
+
+    >>> from pyspark.sql import Row
+    >>> df2 = sc.parallelize([Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)]).toDF()
+    >>> df2.filter(df2.height.isNull()).collect()
+    [Row(height=None, name=u'Alice')]
+    """
+    _isNotNull_doc = """
+    True if the current expression is null. Often combined with
+    :func:`DataFrame.filter` to select rows with non-null values.
+
+    >>> from pyspark.sql import Row
+    >>> df2 = sc.parallelize([Row(name=u'Tom', height=80), Row(name=u'Alice', height=None)]).toDF()
+    >>> df2.filter(df2.height.isNotNull()).collect()
+    [Row(height=80, name=u'Tom')]
+    """
+
+    isNull = ignore_unicode_prefix(_unary_op("isNull", _isNull_doc))
+    isNotNull = ignore_unicode_prefix(_unary_op("isNotNull", _isNotNull_doc))
 
     @since(1.3)
     def alias(self, *alias, **kwargs):

From 1d9b7a74a839021814ab28d3eba3636c64483130 Mon Sep 17 00:00:00 2001
From: Juliusz Sompolski <julek@databricks.com>
Date: Fri, 5 May 2017 15:31:06 -0700
Subject: [PATCH 0785/1204] [SPARK-20616] RuleExecutor logDebug of batch
 results should show diff to start of batch

## What changes were proposed in this pull request?

Due to a likely typo, the logDebug msg printing the diff of query plans shows a diff to the initial plan, not diff to the start of batch.

## How was this patch tested?

Now the debug message prints the diff between start and end of batch.

Author: Juliusz Sompolski <julek@databricks.com>

Closes #17875 from juliuszsompolski/SPARK-20616.

(cherry picked from commit 5d75b14bf0f4c1f0813287efaabf49797908ed55)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../org/apache/spark/sql/catalyst/rules/RuleExecutor.scala      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
index 6fc828f63f152..85b368c862630 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
@@ -122,7 +122,7 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
         logDebug(
           s"""
           |=== Result of Batch ${batch.name} ===
-          |${sideBySide(plan.treeString, curPlan.treeString).mkString("\n")}
+          |${sideBySide(batchStartPlan.treeString, curPlan.treeString).mkString("\n")}
         """.stripMargin)
       } else {
         logTrace(s"Batch ${batch.name} has no effect.")

From a1112c615b05d615048159c9d324aa10a4391d4e Mon Sep 17 00:00:00 2001
From: Juliusz Sompolski <julek@databricks.com>
Date: Fri, 5 May 2017 15:31:06 -0700
Subject: [PATCH 0786/1204] [SPARK-20616] RuleExecutor logDebug of batch
 results should show diff to start of batch

## What changes were proposed in this pull request?

Due to a likely typo, the logDebug msg printing the diff of query plans shows a diff to the initial plan, not diff to the start of batch.

## How was this patch tested?

Now the debug message prints the diff between start and end of batch.

Author: Juliusz Sompolski <julek@databricks.com>

Closes #17875 from juliuszsompolski/SPARK-20616.

(cherry picked from commit 5d75b14bf0f4c1f0813287efaabf49797908ed55)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../org/apache/spark/sql/catalyst/rules/RuleExecutor.scala      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
index 6fc828f63f152..85b368c862630 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
@@ -122,7 +122,7 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
         logDebug(
           s"""
           |=== Result of Batch ${batch.name} ===
-          |${sideBySide(plan.treeString, curPlan.treeString).mkString("\n")}
+          |${sideBySide(batchStartPlan.treeString, curPlan.treeString).mkString("\n")}
         """.stripMargin)
       } else {
         logTrace(s"Batch ${batch.name} has no effect.")

From 423a78625620523ab6a51b2274548a985fc18ed0 Mon Sep 17 00:00:00 2001
From: zero323 <zero323@users.noreply.github.com>
Date: Thu, 27 Apr 2017 00:34:20 -0700
Subject: [PATCH 0787/1204] [SPARK-20208][DOCS][FOLLOW-UP] Add FP-Growth to
 SparkR programming guide

## What changes were proposed in this pull request?

Add `spark.fpGrowth` to SparkR programming guide.

## How was this patch tested?

Manual tests.

Author: zero323 <zero323@users.noreply.github.com>

Closes #17775 from zero323/SPARK-20208-FOLLOW-UP.

(cherry picked from commit ba7666274e71f1903e5050a5e53fbdcd21debde5)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 docs/sparkr.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/sparkr.md b/docs/sparkr.md
index 40395202fde3b..18db4cb72e7e1 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -476,6 +476,10 @@ SparkR supports the following machine learning algorithms currently:
 
 * [`spark.als`](api/R/spark.als.html): [`Alternating Least Squares (ALS)`](ml-collaborative-filtering.html#collaborative-filtering)
 
+#### Frequent Pattern Mining
+
+* [`spark.fpGrowth`](api/R/spark.fpGrowth.html) : [`FP-growth`](ml-frequent-pattern-mining.html#fp-growth)
+
 #### Statistics
 
 * [`spark.kstest`](api/R/spark.kstest.html): `Kolmogorov-Smirnov Test`

From 048e9890ca6e67c40d298b5dda20742790f5530c Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Sun, 7 May 2017 13:10:10 -0700
Subject: [PATCH 0788/1204] [SPARK-20543][SPARKR][FOLLOWUP] Don't skip tests on
 AppVeyor

## What changes were proposed in this pull request?

add environment

## How was this patch tested?

wait for appveyor run

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #17878 from felixcheung/appveyorrcran.

(cherry picked from commit 7087e01194964a1aad0b45bdb41506a17100eacf)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 2 +-
 appveyor.yml                              | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index de36d5cc5b084..3c985f2587227 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -3126,7 +3126,7 @@ compare_list <- function(list1, list2) {
 
 # This should always be the **very last test** in this test file.
 test_that("No extra files are created in SPARK_HOME by starting session and making calls", {
-  skip_on_cran()
+  skip_on_cran() # skip because when run from R CMD check SPARK_HOME is not the current directory
 
   # Check that it is not creating any extra file.
   # Does not check the tempdir which would be cleaned up after.
diff --git a/appveyor.yml b/appveyor.yml
index bbb27589cad09..f4d13b8515cd0 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -48,6 +48,9 @@ install:
 build_script:
   - cmd: mvn -DskipTests -Psparkr -Phive -Phive-thriftserver package
 
+environment:
+  NOT_CRAN: true
+
 test_script:
   - cmd: .\bin\spark-submit2.cmd --conf spark.hadoop.fs.defaultFS="file:///" R\pkg\tests\run-all.R
 
@@ -56,4 +59,3 @@ notifications:
     on_build_success: false
     on_build_failure: false
     on_build_status_changed: false
-

From 6c5b7e106895302a87cf6522d3c64c3badac699f Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Sun, 7 May 2017 23:10:18 -0700
Subject: [PATCH 0789/1204] [SPARK-20626][SPARKR] address date test warning
 with timezone on windows

## What changes were proposed in this pull request?

set timezone on windows

## How was this patch tested?

unit test, AppVeyor

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #17892 from felixcheung/rtimestamptest.

(cherry picked from commit c24bdaab5a234d18b273544cefc44cc4005bf8fc)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 3c985f2587227..3f445e2136cc3 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -96,6 +96,10 @@ mockLinesMapType <- c("{\"name\":\"Bob\",\"info\":{\"age\":16,\"height\":176.5}}
 mapTypeJsonPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp")
 writeLines(mockLinesMapType, mapTypeJsonPath)
 
+if (.Platform$OS.type == "windows") {
+  Sys.setenv(TZ = "GMT")
+}
+
 test_that("calling sparkRSQL.init returns existing SQL context", {
   skip_on_cran()
 

From d8a5a0d3420abbb911d8a80dc7165762eb08d779 Mon Sep 17 00:00:00 2001
From: Wayne Zhang <actuaryzhang@uber.com>
Date: Sun, 7 May 2017 23:16:30 -0700
Subject: [PATCH 0790/1204] [SPARKR][DOC] fix typo in vignettes

## What changes were proposed in this pull request?
Fix typo in vignettes

Author: Wayne Zhang <actuaryzhang@uber.com>

Closes #17884 from actuaryzhang/typo.

(cherry picked from commit 2fdaeb52bbe2ed1a9127ac72917286e505303c85)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/vignettes/sparkr-vignettes.Rmd | 36 ++++++++++++++--------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index b933c59a84568..0f6d5c21b68af 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -65,7 +65,7 @@ We can view the first few rows of the `SparkDataFrame` by `head` or `showDF` fun
 head(carsDF)
 ```
 
-Common data processing operations such as `filter`, `select` are supported on the `SparkDataFrame`.
+Common data processing operations such as `filter` and `select` are supported on the `SparkDataFrame`.
 ```{r}
 carsSubDF <- select(carsDF, "model", "mpg", "hp")
 carsSubDF <- filter(carsSubDF, carsSubDF$hp >= 200)
@@ -364,7 +364,7 @@ out <- dapply(carsSubDF, function(x) { x <- cbind(x, x$mpg * 1.61) }, schema)
 head(collect(out))
 ```
 
-Like `dapply`, apply a function to each partition of a `SparkDataFrame` and collect the result back. The output of function should be a `data.frame`, but no schema is required in this case. Note that `dapplyCollect` can fail if the output of UDF run on all the partition cannot be pulled to the driver and fit in driver memory.
+Like `dapply`, `dapplyCollect` can apply a function to each partition of a `SparkDataFrame` and collect the result back. The output of the function should be a `data.frame`, but no schema is required in this case. Note that `dapplyCollect` can fail if the output of the UDF on all partitions cannot be pulled into the driver's memory.
 
 ```{r}
 out <- dapplyCollect(
@@ -390,7 +390,7 @@ result <- gapply(
 head(arrange(result, "max_mpg", decreasing = TRUE))
 ```
 
-Like gapply, `gapplyCollect` applies a function to each partition of a `SparkDataFrame` and collect the result back to R `data.frame`. The output of the function should be a `data.frame` but no schema is required in this case. Note that `gapplyCollect` can fail if the output of UDF run on all the partition cannot be pulled to the driver and fit in driver memory.
+Like `gapply`, `gapplyCollect` can apply a function to each partition of a `SparkDataFrame` and collect the result back to R `data.frame`. The output of the function should be a `data.frame` but no schema is required in this case. Note that `gapplyCollect` can fail if the output of the UDF on all partitions cannot be pulled into the driver's memory.
 
 ```{r}
 result <- gapplyCollect(
@@ -443,20 +443,20 @@ options(ops)
 
 
 ### SQL Queries
-A `SparkDataFrame` can also be registered as a temporary view in Spark SQL and that allows you to run SQL queries over its data. The sql function enables applications to run SQL queries programmatically and returns the result as a `SparkDataFrame`.
+A `SparkDataFrame` can also be registered as a temporary view in Spark SQL so that one can run SQL queries over its data. The sql function enables applications to run SQL queries programmatically and returns the result as a `SparkDataFrame`.
 
 ```{r}
 people <- read.df(paste0(sparkR.conf("spark.home"),
                          "/examples/src/main/resources/people.json"), "json")
 ```
 
-Register this SparkDataFrame as a temporary view.
+Register this `SparkDataFrame` as a temporary view.
 
 ```{r}
 createOrReplaceTempView(people, "people")
 ```
 
-SQL statements can be run by using the sql method.
+SQL statements can be run using the sql method.
 ```{r}
 teenagers <- sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
 head(teenagers)
@@ -765,7 +765,7 @@ head(predict(isoregModel, newDF))
 `spark.gbt` fits a [gradient-boosted tree](https://en.wikipedia.org/wiki/Gradient_boosting) classification or regression model on a `SparkDataFrame`.
 Users can call `summary` to get a summary of the fitted model, `predict` to make predictions, and `write.ml`/`read.ml` to save/load fitted models.
 
-Similar to the random forest example above, we use the `longley` dataset to train a gradient-boosted tree and make predictions:
+We use the `longley` dataset to train a gradient-boosted tree and make predictions:
 
 ```{r, warning=FALSE}
 df <- createDataFrame(longley)
@@ -805,7 +805,7 @@ head(select(fitted, "Class", "prediction"))
 
 `spark.gaussianMixture` fits multivariate [Gaussian Mixture Model](https://en.wikipedia.org/wiki/Mixture_model#Multivariate_Gaussian_mixture_model) (GMM) against a `SparkDataFrame`. [Expectation-Maximization](https://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm) (EM) is used to approximate the maximum likelihood estimator (MLE) of the model.
 
-We use a simulated example to demostrate the usage.
+We use a simulated example to demonstrate the usage.
 ```{r}
 X1 <- data.frame(V1 = rnorm(4), V2 = rnorm(4))
 X2 <- data.frame(V1 = rnorm(6, 3), V2 = rnorm(6, 4))
@@ -836,9 +836,9 @@ head(select(kmeansPredictions, "model", "mpg", "hp", "wt", "prediction"), n = 20
 
 * Topics and documents both exist in a feature space, where feature vectors are vectors of word counts (bag of words).
 
-* Rather than estimating a clustering using a traditional distance, LDA uses a function based on a statistical model of how text documents are generated.
+* Rather than clustering using a traditional distance, LDA uses a function based on a statistical model of how text documents are generated.
 
-To use LDA, we need to specify a `features` column in `data` where each entry represents a document. There are two type options for the column:
+To use LDA, we need to specify a `features` column in `data` where each entry represents a document. There are two options for the column:
 
 * character string: This can be a string of the whole document. It will be parsed automatically. Additional stop words can be added in `customizedStopWords`.
 
@@ -886,7 +886,7 @@ perplexity
 
 `spark.als` learns latent factors in [collaborative filtering](https://en.wikipedia.org/wiki/Recommender_system#Collaborative_filtering) via [alternating least squares](http://dl.acm.org/citation.cfm?id=1608614).
 
-There are multiple options that can be configured in `spark.als`, including `rank`, `reg`, `nonnegative`. For a complete list, refer to the help file.
+There are multiple options that can be configured in `spark.als`, including `rank`, `reg`, and `nonnegative`. For a complete list, refer to the help file.
 
 ```{r, eval=FALSE}
 ratings <- list(list(0, 0, 4.0), list(0, 1, 2.0), list(1, 1, 3.0), list(1, 2, 4.0),
@@ -966,7 +966,7 @@ testSummary
 
 
 ### Model Persistence
-The following example shows how to save/load an ML model by SparkR.
+The following example shows how to save/load an ML model in SparkR.
 ```{r}
 t <- as.data.frame(Titanic)
 training <- createDataFrame(t)
@@ -1064,19 +1064,19 @@ There are three main object classes in SparkR you may be working with.
     + `sdf` stores a reference to the corresponding Spark Dataset in the Spark JVM backend.
     + `env` saves the meta-information of the object such as `isCached`.
 
-It can be created by data import methods or by transforming an existing `SparkDataFrame`. We can manipulate `SparkDataFrame` by numerous data processing functions and feed that into machine learning algorithms.
+    It can be created by data import methods or by transforming an existing `SparkDataFrame`. We can manipulate `SparkDataFrame` by numerous data processing functions and feed that into machine learning algorithms.
 
-* `Column`: an S4 class representing column of `SparkDataFrame`. The slot `jc` saves a reference to the corresponding Column object in the Spark JVM backend.
+* `Column`: an S4 class representing a column of `SparkDataFrame`. The slot `jc` saves a reference to the corresponding `Column` object in the Spark JVM backend.
 
-It can be obtained from a `SparkDataFrame` by `$` operator, `df$col`. More often, it is used together with other functions, for example, with `select` to select particular columns, with `filter` and constructed conditions to select rows, with aggregation functions to compute aggregate statistics for each group.
+    It can be obtained from a `SparkDataFrame` by `$` operator, e.g., `df$col`. More often, it is used together with other functions, for example, with `select` to select particular columns, with `filter` and constructed conditions to select rows, with aggregation functions to compute aggregate statistics for each group.
 
-* `GroupedData`: an S4 class representing grouped data created by `groupBy` or by transforming other `GroupedData`. Its `sgd` slot saves a reference to a RelationalGroupedDataset object in the backend.
+* `GroupedData`: an S4 class representing grouped data created by `groupBy` or by transforming other `GroupedData`. Its `sgd` slot saves a reference to a `RelationalGroupedDataset` object in the backend.
 
-This is often an intermediate object with group information and followed up by aggregation operations.
+    This is often an intermediate object with group information and followed up by aggregation operations.
 
 ### Architecture
 
-A complete description of architecture can be seen in reference, in particular the paper *SparkR: Scaling R Programs with Spark*.
+A complete description of architecture can be seen in the references, in particular the paper *SparkR: Scaling R Programs with Spark*.
 
 Under the hood of SparkR is Spark SQL engine. This avoids the overheads of running interpreted R code, and the optimized SQL execution engine in Spark uses structural information about data and computation flow to perform a bunch of optimizations to speed up the computation.
 

From 7b9d05ad00455daa53ae4ef1a602a6c64c2c95a4 Mon Sep 17 00:00:00 2001
From: Nick Pentreath <nickp@za.ibm.com>
Date: Mon, 8 May 2017 12:45:00 +0200
Subject: [PATCH 0791/1204] [SPARK-20596][ML][TEST] Consolidate and improve ALS
 recommendAll test cases

Existing test cases for `recommendForAllX` methods (added in [SPARK-19535](https://issues.apache.org/jira/browse/SPARK-19535)) test `k < num items` and `k = num items`. Technically we should also test that `k > num items` returns the same results as `k = num items`.

## How was this patch tested?

Updated existing unit tests.

Author: Nick Pentreath <nickp@za.ibm.com>

Closes #17860 from MLnick/SPARK-20596-als-rec-tests.

(cherry picked from commit 58518d070777fc0665c4d02bad8adf910807df98)
Signed-off-by: Nick Pentreath <nickp@za.ibm.com>
---
 .../spark/ml/recommendation/ALSSuite.scala    | 63 ++++++++-----------
 1 file changed, 25 insertions(+), 38 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
index 7574af3d77ea8..9d31e792633cd 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
@@ -671,58 +671,45 @@ class ALSSuite
       .setItemCol("item")
   }
 
-  test("recommendForAllUsers with k < num_items") {
-    val topItems = getALSModel.recommendForAllUsers(2)
-    assert(topItems.count() == 3)
-    assert(topItems.columns.contains("user"))
-
-    val expected = Map(
-      0 -> Array((3, 54f), (4, 44f)),
-      1 -> Array((3, 39f), (5, 33f)),
-      2 -> Array((3, 51f), (5, 45f))
-    )
-    checkRecommendations(topItems, expected, "item")
-  }
-
-  test("recommendForAllUsers with k = num_items") {
-    val topItems = getALSModel.recommendForAllUsers(4)
-    assert(topItems.count() == 3)
-    assert(topItems.columns.contains("user"))
-
+  test("recommendForAllUsers with k <, = and > num_items") {
+    val model = getALSModel
+    val numUsers = model.userFactors.count
+    val numItems = model.itemFactors.count
     val expected = Map(
       0 -> Array((3, 54f), (4, 44f), (5, 42f), (6, 28f)),
       1 -> Array((3, 39f), (5, 33f), (4, 26f), (6, 16f)),
       2 -> Array((3, 51f), (5, 45f), (4, 30f), (6, 18f))
     )
-    checkRecommendations(topItems, expected, "item")
-  }
 
-  test("recommendForAllItems with k < num_users") {
-    val topUsers = getALSModel.recommendForAllItems(2)
-    assert(topUsers.count() == 4)
-    assert(topUsers.columns.contains("item"))
-
-    val expected = Map(
-      3 -> Array((0, 54f), (2, 51f)),
-      4 -> Array((0, 44f), (2, 30f)),
-      5 -> Array((2, 45f), (0, 42f)),
-      6 -> Array((0, 28f), (2, 18f))
-    )
-    checkRecommendations(topUsers, expected, "user")
+    Seq(2, 4, 6).foreach { k =>
+      val n = math.min(k, numItems).toInt
+      val expectedUpToN = expected.mapValues(_.slice(0, n))
+      val topItems = model.recommendForAllUsers(k)
+      assert(topItems.count() == numUsers)
+      assert(topItems.columns.contains("user"))
+      checkRecommendations(topItems, expectedUpToN, "item")
+    }
   }
 
-  test("recommendForAllItems with k = num_users") {
-    val topUsers = getALSModel.recommendForAllItems(3)
-    assert(topUsers.count() == 4)
-    assert(topUsers.columns.contains("item"))
-
+  test("recommendForAllItems with k <, = and > num_users") {
+    val model = getALSModel
+    val numUsers = model.userFactors.count
+    val numItems = model.itemFactors.count
     val expected = Map(
       3 -> Array((0, 54f), (2, 51f), (1, 39f)),
       4 -> Array((0, 44f), (2, 30f), (1, 26f)),
       5 -> Array((2, 45f), (0, 42f), (1, 33f)),
       6 -> Array((0, 28f), (2, 18f), (1, 16f))
     )
-    checkRecommendations(topUsers, expected, "user")
+
+    Seq(2, 3, 4).foreach { k =>
+      val n = math.min(k, numUsers).toInt
+      val expectedUpToN = expected.mapValues(_.slice(0, n))
+      val topUsers = getALSModel.recommendForAllItems(k)
+      assert(topUsers.count() == numItems)
+      assert(topUsers.columns.contains("item"))
+      checkRecommendations(topUsers, expectedUpToN, "user")
+    }
   }
 
   private def checkRecommendations(

From 23681e9ca0042328f93962701d19ca371727b0b7 Mon Sep 17 00:00:00 2001
From: Xianyang Liu <xianyang.liu@intel.com>
Date: Mon, 8 May 2017 10:25:24 -0700
Subject: [PATCH 0792/1204] [SPARK-20621][DEPLOY] Delete deprecated config
 parameter in 'spark-env.sh'

## What changes were proposed in this pull request?

Currently, `spark.executor.instances` is deprecated in `spark-env.sh`, because we suggest config it in `spark-defaults.conf` or other config file. And also this parameter is useless even if you set it in `spark-env.sh`, so remove it in this patch.

## How was this patch tested?

Existing tests.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Xianyang Liu <xianyang.liu@intel.com>

Closes #17881 from ConeyLiu/deprecatedParam.

(cherry picked from commit aeb2ecc0cd898f5352df0a04be1014b02ea3e20e)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 conf/spark-env.sh.template                                   | 1 -
 .../org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala   | 5 +----
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template
index 94bd2c477a35b..b7c985ace69cf 100755
--- a/conf/spark-env.sh.template
+++ b/conf/spark-env.sh.template
@@ -34,7 +34,6 @@
 
 # Options read in YARN client mode
 # - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
-# - SPARK_EXECUTOR_INSTANCES, Number of executors to start (Default: 2)
 # - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1).
 # - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default: 1G)
 # - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G)
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
index 93578855122cd..0fc994d629ccb 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
@@ -280,10 +280,7 @@ object YarnSparkHadoopUtil {
 
       initialNumExecutors
     } else {
-      val targetNumExecutors =
-        sys.env.get("SPARK_EXECUTOR_INSTANCES").map(_.toInt).getOrElse(numExecutors)
-      // System property can override environment variable.
-      conf.get(EXECUTOR_INSTANCES).getOrElse(targetNumExecutors)
+      conf.get(EXECUTOR_INSTANCES).getOrElse(numExecutors)
     }
   }
 }

From 4179ffc031a0dbca6a93255c673de800ce7393fe Mon Sep 17 00:00:00 2001
From: Hossein <hossein@databricks.com>
Date: Mon, 8 May 2017 14:48:11 -0700
Subject: [PATCH 0793/1204] [SPARK-20661][SPARKR][TEST] SparkR tableNames()
 test fails

## What changes were proposed in this pull request?
Cleaning existing temp tables before running tableNames tests

## How was this patch tested?
SparkR Unit tests

Author: Hossein <hossein@databricks.com>

Closes #17903 from falaki/SPARK-20661.

(cherry picked from commit 2abfee18b6511482b916c36f00bf3abf68a59e19)
Signed-off-by: Yin Huai <yhuai@databricks.com>
---
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 3f445e2136cc3..58cd2591a6f35 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -668,6 +668,8 @@ test_that("jsonRDD() on a RDD with json string", {
 })
 
 test_that("test tableNames and tables", {
+  # Making sure there are no registered temp tables from previous tests
+  suppressWarnings(sapply(tableNames(), function(tname) { dropTempTable(tname) }))
   df <- read.json(jsonPath)
   createOrReplaceTempView(df, "table1")
   expect_equal(length(tableNames()), 1)

From 54e07434968624dbb0fb80773356e614b954e52f Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Mon, 8 May 2017 22:49:40 -0700
Subject: [PATCH 0794/1204] [SPARK-20661][SPARKR][TEST][FOLLOWUP] SparkR
 tableNames() test fails

## What changes were proposed in this pull request?

Change it to check for relative count like in this test https://github.com/apache/spark/blame/master/R/pkg/inst/tests/testthat/test_sparkSQL.R#L3355 for catalog APIs

## How was this patch tested?

unit tests, this needs to combine with another commit with SQL change to check

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #17905 from felixcheung/rtabletests.

(cherry picked from commit b952b44af4d243f1e3ad88bccf4af7d04df3fc81)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 58cd2591a6f35..ae2969f493a75 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -668,26 +668,27 @@ test_that("jsonRDD() on a RDD with json string", {
 })
 
 test_that("test tableNames and tables", {
-  # Making sure there are no registered temp tables from previous tests
-  suppressWarnings(sapply(tableNames(), function(tname) { dropTempTable(tname) }))
+  count <- count(listTables())
+
   df <- read.json(jsonPath)
   createOrReplaceTempView(df, "table1")
-  expect_equal(length(tableNames()), 1)
-  expect_equal(length(tableNames("default")), 1)
+  expect_equal(length(tableNames()), count + 1)
+  expect_equal(length(tableNames("default")), count + 1)
+
   tables <- listTables()
-  expect_equal(count(tables), 1)
+  expect_equal(count(tables), count + 1)
   expect_equal(count(tables()), count(tables))
   expect_true("tableName" %in% colnames(tables()))
   expect_true(all(c("tableName", "database", "isTemporary") %in% colnames(tables())))
 
   suppressWarnings(registerTempTable(df, "table2"))
   tables <- listTables()
-  expect_equal(count(tables), 2)
+  expect_equal(count(tables), count + 2)
   suppressWarnings(dropTempTable("table1"))
   expect_true(dropTempView("table2"))
 
   tables <- listTables()
-  expect_equal(count(tables), 0)
+  expect_equal(count(tables), count + 0)
 })
 
 test_that(

From 72fca9a0a7a6dd2ab7c338fab9666b51cd981cce Mon Sep 17 00:00:00 2001
From: Peng <peng.meng@intel.com>
Date: Tue, 9 May 2017 10:05:49 +0200
Subject: [PATCH 0795/1204] [SPARK-11968][MLLIB] Optimize MLLIB ALS
 recommendForAll

The recommendForAll of MLLIB ALS is very slow.
GC is a key problem of the current method.
The task use the following code to keep temp result:
val output = new Array[(Int, (Int, Double))](m*n)
m = n = 4096 (default value, no method to set)
so output is about 4k * 4k * (4 + 4 + 8) = 256M. This is a large memory and cause serious GC problem, and it is frequently OOM.

Actually, we don't need to save all the temp result. Support we recommend topK (topK is about 10, or 20) product for each user, we only need 4k * topK * (4 + 4 + 8) memory to save the temp result.

The Test Environment:
3 workers: each work 10 core, each work 30G memory, each work 1 executor.
The Data: User 480,000, and Item 17,000

BlockSize:     1024  2048  4096  8192
Old method:  245s  332s  488s  OOM
This solution: 121s  118s   117s  120s

The existing UT.

Author: Peng <peng.meng@intel.com>
Author: Peng Meng <peng.meng@intel.com>

Closes #17742 from mpjlu/OptimizeAls.

(cherry picked from commit 8079424763c2043264f30a6898ce964379bd9b56)
Signed-off-by: Nick Pentreath <nickp@za.ibm.com>
---
 .../MatrixFactorizationModel.scala            | 81 ++++++++++++-------
 1 file changed, 50 insertions(+), 31 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
index 23045fa2b6863..d45866c016d91 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
@@ -39,6 +39,7 @@ import org.apache.spark.mllib.util.{Loader, Saveable}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.BoundedPriorityQueue
 
 /**
  * Model representing the result of matrix factorization.
@@ -274,46 +275,64 @@ object MatrixFactorizationModel extends Loader[MatrixFactorizationModel] {
       srcFeatures: RDD[(Int, Array[Double])],
       dstFeatures: RDD[(Int, Array[Double])],
       num: Int): RDD[(Int, Array[(Int, Double)])] = {
-    val srcBlocks = blockify(rank, srcFeatures)
-    val dstBlocks = blockify(rank, dstFeatures)
-    val ratings = srcBlocks.cartesian(dstBlocks).flatMap {
-      case ((srcIds, srcFactors), (dstIds, dstFactors)) =>
-        val m = srcIds.length
-        val n = dstIds.length
-        val ratings = srcFactors.transpose.multiply(dstFactors)
-        val output = new Array[(Int, (Int, Double))](m * n)
-        var k = 0
-        ratings.foreachActive { (i, j, r) =>
-          output(k) = (srcIds(i), (dstIds(j), r))
-          k += 1
+    val srcBlocks = blockify(srcFeatures)
+    val dstBlocks = blockify(dstFeatures)
+    /**
+     * The previous approach used for computing top-k recommendations aimed to group
+     * individual factor vectors into blocks, so that Level 3 BLAS operations (gemm) could
+     * be used for efficiency. However, this causes excessive GC pressure due to the large
+     * arrays required for intermediate result storage, as well as a high sensitivity to the
+     * block size used.
+     * The following approach still groups factors into blocks, but instead computes the
+     * top-k elements per block, using a simple dot product (instead of gemm) and an efficient
+     * [[BoundedPriorityQueue]]. This avoids any large intermediate data structures and results
+     * in significantly reduced GC pressure as well as shuffle data, which far outweighs
+     * any cost incurred from not using Level 3 BLAS operations.
+     */
+    val ratings = srcBlocks.cartesian(dstBlocks).flatMap { case (srcIter, dstIter) =>
+      val m = srcIter.size
+      val n = math.min(dstIter.size, num)
+      val output = new Array[(Int, (Int, Double))](m * n)
+      var j = 0
+      val pq = new BoundedPriorityQueue[(Int, Double)](n)(Ordering.by(_._2))
+      srcIter.foreach { case (srcId, srcFactor) =>
+        dstIter.foreach { case (dstId, dstFactor) =>
+          /*
+           * The below code is equivalent to
+           *    `val score = blas.ddot(rank, srcFactor, 1, dstFactor, 1)`
+           * This handwritten version is as or more efficient as BLAS calls in this case.
+           */
+          var score: Double = 0
+          var k = 0
+          while (k < rank) {
+            score += srcFactor(k) * dstFactor(k)
+            k += 1
+          }
+          pq += dstId -> score
+        }
+        val pqIter = pq.iterator
+        var i = 0
+        while (i < n) {
+          output(j + i) = (srcId, pqIter.next())
+          i += 1
         }
-        output.toSeq
+        j += n
+        pq.clear()
+      }
+      output.toSeq
     }
     ratings.topByKey(num)(Ordering.by(_._2))
   }
 
   /**
-   * Blockifies features to use Level-3 BLAS.
+   * Blockifies features to improve the efficiency of cartesian product
+   * TODO: SPARK-20443 - expose blockSize as a param?
    */
   private def blockify(
-      rank: Int,
-      features: RDD[(Int, Array[Double])]): RDD[(Array[Int], DenseMatrix)] = {
-    val blockSize = 4096 // TODO: tune the block size
-    val blockStorage = rank * blockSize
+      features: RDD[(Int, Array[Double])],
+      blockSize: Int = 4096): RDD[Seq[(Int, Array[Double])]] = {
     features.mapPartitions { iter =>
-      iter.grouped(blockSize).map { grouped =>
-        val ids = mutable.ArrayBuilder.make[Int]
-        ids.sizeHint(blockSize)
-        val factors = mutable.ArrayBuilder.make[Double]
-        factors.sizeHint(blockStorage)
-        var i = 0
-        grouped.foreach { case (id, factor) =>
-          ids += id
-          factors ++= factor
-          i += 1
-        }
-        (ids.result(), new DenseMatrix(rank, i, factors.result()))
-      }
+      iter.grouped(blockSize)
     }
   }
 

From ca3f7edbad6a2e7fcd1c1d3dbd1a522cd0d7c476 Mon Sep 17 00:00:00 2001
From: Nick Pentreath <nickp@za.ibm.com>
Date: Tue, 9 May 2017 10:13:15 +0200
Subject: [PATCH 0796/1204] [SPARK-20587][ML] Improve performance of ML ALS
 recommendForAll

This PR is a `DataFrame` version of #17742 for [SPARK-11968](https://issues.apache.org/jira/browse/SPARK-11968), for improving the performance of `recommendAll` methods.

## How was this patch tested?

Existing unit tests.

Author: Nick Pentreath <nickp@za.ibm.com>

Closes #17845 from MLnick/ml-als-perf.

(cherry picked from commit 10b00abadf4a3473332eef996db7b66f491316f2)
Signed-off-by: Nick Pentreath <nickp@za.ibm.com>
---
 .../apache/spark/ml/recommendation/ALS.scala  | 71 +++++++++++++++++--
 1 file changed, 64 insertions(+), 7 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index a20ef72446661..4a130e1089a87 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -45,7 +45,7 @@ import org.apache.spark.sql.{DataFrame, Dataset}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
 import org.apache.spark.storage.StorageLevel
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{BoundedPriorityQueue, Utils}
 import org.apache.spark.util.collection.{OpenHashMap, OpenHashSet, SortDataFormat, Sorter}
 import org.apache.spark.util.random.XORShiftRandom
 
@@ -356,6 +356,19 @@ class ALSModel private[ml] (
 
   /**
    * Makes recommendations for all users (or items).
+   *
+   * Note: the previous approach used for computing top-k recommendations
+   * used a cross-join followed by predicting a score for each row of the joined dataset.
+   * However, this results in exploding the size of intermediate data. While Spark SQL makes it
+   * relatively efficient, the approach implemented here is significantly more efficient.
+   *
+   * This approach groups factors into blocks and computes the top-k elements per block,
+   * using a simple dot product (instead of gemm) and an efficient [[BoundedPriorityQueue]].
+   * It then computes the global top-k by aggregating the per block top-k elements with
+   * a [[TopByKeyAggregator]]. This significantly reduces the size of intermediate and shuffle data.
+   * This is the DataFrame equivalent to the approach used in
+   * [[org.apache.spark.mllib.recommendation.MatrixFactorizationModel]].
+   *
    * @param srcFactors src factors for which to generate recommendations
    * @param dstFactors dst factors used to make recommendations
    * @param srcOutputColumn name of the column for the source ID in the output DataFrame
@@ -372,11 +385,43 @@ class ALSModel private[ml] (
       num: Int): DataFrame = {
     import srcFactors.sparkSession.implicits._
 
-    val ratings = srcFactors.crossJoin(dstFactors)
-      .select(
-        srcFactors("id"),
-        dstFactors("id"),
-        predict(srcFactors("features"), dstFactors("features")))
+    val srcFactorsBlocked = blockify(srcFactors.as[(Int, Array[Float])])
+    val dstFactorsBlocked = blockify(dstFactors.as[(Int, Array[Float])])
+    val ratings = srcFactorsBlocked.crossJoin(dstFactorsBlocked)
+      .as[(Seq[(Int, Array[Float])], Seq[(Int, Array[Float])])]
+      .flatMap { case (srcIter, dstIter) =>
+        val m = srcIter.size
+        val n = math.min(dstIter.size, num)
+        val output = new Array[(Int, Int, Float)](m * n)
+        var j = 0
+        val pq = new BoundedPriorityQueue[(Int, Float)](num)(Ordering.by(_._2))
+        srcIter.foreach { case (srcId, srcFactor) =>
+          dstIter.foreach { case (dstId, dstFactor) =>
+            /*
+             * The below code is equivalent to
+             *    `val score = blas.sdot(rank, srcFactor, 1, dstFactor, 1)`
+             * This handwritten version is as or more efficient as BLAS calls in this case.
+             */
+            var score = 0.0f
+            var k = 0
+            while (k < rank) {
+              score += srcFactor(k) * dstFactor(k)
+              k += 1
+            }
+            pq += dstId -> score
+          }
+          val pqIter = pq.iterator
+          var i = 0
+          while (i < n) {
+            val (dstId, score) = pqIter.next()
+            output(j + i) = (srcId, dstId, score)
+            i += 1
+          }
+          j += n
+          pq.clear()
+        }
+        output.toSeq
+      }
     // We'll force the IDs to be Int. Unfortunately this converts IDs to Int in the output.
     val topKAggregator = new TopByKeyAggregator[Int, Int, Float](num, Ordering.by(_._2))
     val recs = ratings.as[(Int, Int, Float)].groupByKey(_._1).agg(topKAggregator.toColumn)
@@ -387,8 +432,20 @@ class ALSModel private[ml] (
         .add(dstOutputColumn, IntegerType)
         .add("rating", FloatType)
     )
-    recs.select($"id" as srcOutputColumn, $"recommendations" cast arrayType)
+    recs.select($"id".as(srcOutputColumn), $"recommendations".cast(arrayType))
   }
+
+  /**
+   * Blockifies factors to improve the efficiency of cross join
+   * TODO: SPARK-20443 - expose blockSize as a param?
+   */
+  private def blockify(
+      factors: Dataset[(Int, Array[Float])],
+      blockSize: Int = 4096): Dataset[Seq[(Int, Array[Float])]] = {
+    import factors.sparkSession.implicits._
+    factors.mapPartitions(_.grouped(blockSize))
+  }
+
 }
 
 @Since("1.6.0")

From 4bbfad44e426365ad9f4941d68c110523b17ea6d Mon Sep 17 00:00:00 2001
From: Jon McLean <jon.mclean@atsid.com>
Date: Tue, 9 May 2017 09:47:50 +0100
Subject: [PATCH 0797/1204] [SPARK-20615][ML][TEST] SparseVector.argmax throws
 IndexOutOfBoundsException

## What changes were proposed in this pull request?

Added a check for for the number of defined values.  Previously the argmax function assumed that at least one value was defined if the vector size was greater than zero.

## How was this patch tested?

Tests were added to the existing VectorsSuite to cover this case.

Author: Jon McLean <jon.mclean@atsid.com>

Closes #17877 from jonmclean/vectorArgmaxIndexBug.

(cherry picked from commit be53a78352ae7c70d8a07d0df24574b3e3129b4a)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../main/scala/org/apache/spark/ml/linalg/Vectors.scala    | 2 ++
 .../scala/org/apache/spark/ml/linalg/VectorsSuite.scala    | 7 +++++++
 .../main/scala/org/apache/spark/mllib/linalg/Vectors.scala | 2 ++
 .../scala/org/apache/spark/mllib/linalg/VectorsSuite.scala | 7 +++++++
 4 files changed, 18 insertions(+)

diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
index 8e166ba0ff51a..3fbc0958a0f11 100644
--- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
@@ -657,6 +657,8 @@ class SparseVector @Since("2.0.0") (
   override def argmax: Int = {
     if (size == 0) {
       -1
+    } else if (numActives == 0) {
+      0
     } else {
       // Find the max active entry.
       var maxIdx = indices(0)
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala
index dfbdaf19d374b..4cd91afd6d7fc 100644
--- a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala
+++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala
@@ -125,6 +125,13 @@ class VectorsSuite extends SparkMLFunSuite {
 
     val vec8 = Vectors.sparse(5, Array(1, 2), Array(0.0, -1.0))
     assert(vec8.argmax === 0)
+
+    // Check for case when sparse vector is non-empty but the values are empty
+    val vec9 = Vectors.sparse(100, Array.empty[Int], Array.empty[Double]).asInstanceOf[SparseVector]
+    assert(vec9.argmax === 0)
+
+    val vec10 = Vectors.sparse(1, Array.empty[Int], Array.empty[Double]).asInstanceOf[SparseVector]
+    assert(vec10.argmax === 0)
   }
 
   test("vector equals") {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
index 723addc7150dd..f063420bec143 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
@@ -846,6 +846,8 @@ class SparseVector @Since("1.0.0") (
   override def argmax: Int = {
     if (size == 0) {
       -1
+    } else if (numActives == 0) {
+      0
     } else {
       // Find the max active entry.
       var maxIdx = indices(0)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
index 71a3ceac1b947..6172cffee861c 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
@@ -122,6 +122,13 @@ class VectorsSuite extends SparkFunSuite with Logging {
 
     val vec8 = Vectors.sparse(5, Array(1, 2), Array(0.0, -1.0))
     assert(vec8.argmax === 0)
+
+    // Check for case when sparse vector is non-empty but the values are empty
+    val vec9 = Vectors.sparse(100, Array.empty[Int], Array.empty[Double]).asInstanceOf[SparseVector]
+    assert(vec9.argmax === 0)
+
+    val vec10 = Vectors.sparse(1, Array.empty[Int], Array.empty[Double]).asInstanceOf[SparseVector]
+    assert(vec10.argmax === 0)
   }
 
   test("vector equals") {

From f7a91a17e8e20965b3e634e611690a96f72cec6b Mon Sep 17 00:00:00 2001
From: Jon McLean <jon.mclean@atsid.com>
Date: Tue, 9 May 2017 09:47:50 +0100
Subject: [PATCH 0798/1204] [SPARK-20615][ML][TEST] SparseVector.argmax throws
 IndexOutOfBoundsException

## What changes were proposed in this pull request?

Added a check for for the number of defined values.  Previously the argmax function assumed that at least one value was defined if the vector size was greater than zero.

## How was this patch tested?

Tests were added to the existing VectorsSuite to cover this case.

Author: Jon McLean <jon.mclean@atsid.com>

Closes #17877 from jonmclean/vectorArgmaxIndexBug.

(cherry picked from commit be53a78352ae7c70d8a07d0df24574b3e3129b4a)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../main/scala/org/apache/spark/ml/linalg/Vectors.scala    | 2 ++
 .../scala/org/apache/spark/ml/linalg/VectorsSuite.scala    | 7 +++++++
 .../main/scala/org/apache/spark/mllib/linalg/Vectors.scala | 2 ++
 .../scala/org/apache/spark/mllib/linalg/VectorsSuite.scala | 7 +++++++
 4 files changed, 18 insertions(+)

diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
index 22e4ec693b1f7..7bc2cb1701f98 100644
--- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
@@ -657,6 +657,8 @@ class SparseVector @Since("2.0.0") (
   override def argmax: Int = {
     if (size == 0) {
       -1
+    } else if (numActives == 0) {
+      0
     } else {
       // Find the max active entry.
       var maxIdx = indices(0)
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala
index ea22c2787fb3c..bd71656337699 100644
--- a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala
+++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala
@@ -125,6 +125,13 @@ class VectorsSuite extends SparkMLFunSuite {
 
     val vec8 = Vectors.sparse(5, Array(1, 2), Array(0.0, -1.0))
     assert(vec8.argmax === 0)
+
+    // Check for case when sparse vector is non-empty but the values are empty
+    val vec9 = Vectors.sparse(100, Array.empty[Int], Array.empty[Double]).asInstanceOf[SparseVector]
+    assert(vec9.argmax === 0)
+
+    val vec10 = Vectors.sparse(1, Array.empty[Int], Array.empty[Double]).asInstanceOf[SparseVector]
+    assert(vec10.argmax === 0)
   }
 
   test("vector equals") {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
index 63ea9d3264b0f..52828492619f4 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
@@ -846,6 +846,8 @@ class SparseVector @Since("1.0.0") (
   override def argmax: Int = {
     if (size == 0) {
       -1
+    } else if (numActives == 0) {
+      0
     } else {
       // Find the max active entry.
       var maxIdx = indices(0)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
index 71a3ceac1b947..6172cffee861c 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
@@ -122,6 +122,13 @@ class VectorsSuite extends SparkFunSuite with Logging {
 
     val vec8 = Vectors.sparse(5, Array(1, 2), Array(0.0, -1.0))
     assert(vec8.argmax === 0)
+
+    // Check for case when sparse vector is non-empty but the values are empty
+    val vec9 = Vectors.sparse(100, Array.empty[Int], Array.empty[Double]).asInstanceOf[SparseVector]
+    assert(vec9.argmax === 0)
+
+    val vec10 = Vectors.sparse(1, Array.empty[Int], Array.empty[Double]).asInstanceOf[SparseVector]
+    assert(vec10.argmax === 0)
   }
 
   test("vector equals") {

From 4b7aa0b1dbd85e2238acba45e8f94c097358fb72 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Tue, 9 May 2017 17:30:37 +0800
Subject: [PATCH 0799/1204] [SPARK-20606][ML] ML 2.2 QA: Remove deprecated
 methods for ML

## What changes were proposed in this pull request?
Remove ML methods we deprecated in 2.1.

## How was this patch tested?
Existing tests.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #17867 from yanboliang/spark-20606.

(cherry picked from commit b8733e0ad9f5a700f385e210450fd2c10137293e)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 .../DecisionTreeClassifier.scala              |  18 +--
 .../ml/classification/GBTClassifier.scala     |  24 ++--
 .../RandomForestClassifier.scala              |  24 ++--
 .../ml/regression/DecisionTreeRegressor.scala |  18 +--
 .../spark/ml/regression/GBTRegressor.scala    |  24 ++--
 .../ml/regression/RandomForestRegressor.scala |  24 ++--
 .../org/apache/spark/ml/tree/treeParams.scala | 105 ------------------
 .../org/apache/spark/ml/util/ReadWrite.scala  |  16 ---
 project/MimaExcludes.scala                    |  68 ++++++++++++
 python/pyspark/ml/util.py                     |  32 ------
 10 files changed, 134 insertions(+), 219 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
index 9f60f0896ec52..5fb105c6aff60 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
@@ -54,27 +54,27 @@ class DecisionTreeClassifier @Since("1.4.0") (
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setMaxDepth(value: Int): this.type = set(maxDepth, value)
+  def setMaxDepth(value: Int): this.type = set(maxDepth, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setMaxBins(value: Int): this.type = set(maxBins, value)
+  def setMaxBins(value: Int): this.type = set(maxBins, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
+  def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
+  def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
 
   /** @group expertSetParam */
   @Since("1.4.0")
-  override def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
+  def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
 
   /** @group expertSetParam */
   @Since("1.4.0")
-  override def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
+  def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
 
   /**
    * Specifies how often to checkpoint the cached node IDs.
@@ -86,15 +86,15 @@ class DecisionTreeClassifier @Since("1.4.0") (
    * @group setParam
    */
   @Since("1.4.0")
-  override def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
+  def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setImpurity(value: String): this.type = set(impurity, value)
+  def setImpurity(value: String): this.type = set(impurity, value)
 
   /** @group setParam */
   @Since("1.6.0")
-  override def setSeed(value: Long): this.type = set(seed, value)
+  def setSeed(value: Long): this.type = set(seed, value)
 
   override protected def train(dataset: Dataset[_]): DecisionTreeClassificationModel = {
     val categoricalFeatures: Map[Int, Int] =
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
index ade0960f87a0d..263ed10f19855 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
@@ -70,27 +70,27 @@ class GBTClassifier @Since("1.4.0") (
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setMaxDepth(value: Int): this.type = set(maxDepth, value)
+  def setMaxDepth(value: Int): this.type = set(maxDepth, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setMaxBins(value: Int): this.type = set(maxBins, value)
+  def setMaxBins(value: Int): this.type = set(maxBins, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
+  def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
+  def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
 
   /** @group expertSetParam */
   @Since("1.4.0")
-  override def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
+  def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
 
   /** @group expertSetParam */
   @Since("1.4.0")
-  override def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
+  def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
 
   /**
    * Specifies how often to checkpoint the cached node IDs.
@@ -102,7 +102,7 @@ class GBTClassifier @Since("1.4.0") (
    * @group setParam
    */
   @Since("1.4.0")
-  override def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
+  def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
 
   /**
    * The impurity setting is ignored for GBT models.
@@ -111,7 +111,7 @@ class GBTClassifier @Since("1.4.0") (
    * @group setParam
    */
   @Since("1.4.0")
-  override def setImpurity(value: String): this.type = {
+  def setImpurity(value: String): this.type = {
     logWarning("GBTClassifier.setImpurity should NOT be used")
     this
   }
@@ -120,21 +120,21 @@ class GBTClassifier @Since("1.4.0") (
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value)
+  def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setSeed(value: Long): this.type = set(seed, value)
+  def setSeed(value: Long): this.type = set(seed, value)
 
   // Parameters from GBTParams:
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setMaxIter(value: Int): this.type = set(maxIter, value)
+  def setMaxIter(value: Int): this.type = set(maxIter, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setStepSize(value: Double): this.type = set(stepSize, value)
+  def setStepSize(value: Double): this.type = set(stepSize, value)
 
   // Parameters from GBTClassifierParams:
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
index ab4c235209289..441cfda899276 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
@@ -56,27 +56,27 @@ class RandomForestClassifier @Since("1.4.0") (
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setMaxDepth(value: Int): this.type = set(maxDepth, value)
+  def setMaxDepth(value: Int): this.type = set(maxDepth, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setMaxBins(value: Int): this.type = set(maxBins, value)
+  def setMaxBins(value: Int): this.type = set(maxBins, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
+  def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
+  def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
 
   /** @group expertSetParam */
   @Since("1.4.0")
-  override def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
+  def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
 
   /** @group expertSetParam */
   @Since("1.4.0")
-  override def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
+  def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
 
   /**
    * Specifies how often to checkpoint the cached node IDs.
@@ -88,31 +88,31 @@ class RandomForestClassifier @Since("1.4.0") (
    * @group setParam
    */
   @Since("1.4.0")
-  override def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
+  def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setImpurity(value: String): this.type = set(impurity, value)
+  def setImpurity(value: String): this.type = set(impurity, value)
 
   // Parameters from TreeEnsembleParams:
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value)
+  def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setSeed(value: Long): this.type = set(seed, value)
+  def setSeed(value: Long): this.type = set(seed, value)
 
   // Parameters from RandomForestParams:
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setNumTrees(value: Int): this.type = set(numTrees, value)
+  def setNumTrees(value: Int): this.type = set(numTrees, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setFeatureSubsetStrategy(value: String): this.type =
+  def setFeatureSubsetStrategy(value: String): this.type =
     set(featureSubsetStrategy, value)
 
   override protected def train(dataset: Dataset[_]): RandomForestClassificationModel = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
index 01c5cc1c7efa9..c2b0358e8405d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
@@ -53,27 +53,27 @@ class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
   // Override parameter setters from parent trait for Java API compatibility.
   /** @group setParam */
   @Since("1.4.0")
-  override def setMaxDepth(value: Int): this.type = set(maxDepth, value)
+  def setMaxDepth(value: Int): this.type = set(maxDepth, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setMaxBins(value: Int): this.type = set(maxBins, value)
+  def setMaxBins(value: Int): this.type = set(maxBins, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
+  def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
+  def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
 
   /** @group expertSetParam */
   @Since("1.4.0")
-  override def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
+  def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
 
   /** @group expertSetParam */
   @Since("1.4.0")
-  override def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
+  def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
 
   /**
    * Specifies how often to checkpoint the cached node IDs.
@@ -85,15 +85,15 @@ class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
    * @group setParam
    */
   @Since("1.4.0")
-  override def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
+  def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setImpurity(value: String): this.type = set(impurity, value)
+  def setImpurity(value: String): this.type = set(impurity, value)
 
   /** @group setParam */
   @Since("1.6.0")
-  override def setSeed(value: Long): this.type = set(seed, value)
+  def setSeed(value: Long): this.type = set(seed, value)
 
   /** @group setParam */
   @Since("2.0.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
index 08d175cb94442..8d9b519efb142 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
@@ -68,27 +68,27 @@ class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setMaxDepth(value: Int): this.type = set(maxDepth, value)
+  def setMaxDepth(value: Int): this.type = set(maxDepth, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setMaxBins(value: Int): this.type = set(maxBins, value)
+  def setMaxBins(value: Int): this.type = set(maxBins, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
+  def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
+  def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
 
   /** @group expertSetParam */
   @Since("1.4.0")
-  override def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
+  def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
 
   /** @group expertSetParam */
   @Since("1.4.0")
-  override def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
+  def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
 
   /**
    * Specifies how often to checkpoint the cached node IDs.
@@ -100,7 +100,7 @@ class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
    * @group setParam
    */
   @Since("1.4.0")
-  override def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
+  def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
 
   /**
    * The impurity setting is ignored for GBT models.
@@ -109,7 +109,7 @@ class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
    * @group setParam
    */
   @Since("1.4.0")
-  override def setImpurity(value: String): this.type = {
+  def setImpurity(value: String): this.type = {
     logWarning("GBTRegressor.setImpurity should NOT be used")
     this
   }
@@ -118,21 +118,21 @@ class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value)
+  def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setSeed(value: Long): this.type = set(seed, value)
+  def setSeed(value: Long): this.type = set(seed, value)
 
   // Parameters from GBTParams:
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setMaxIter(value: Int): this.type = set(maxIter, value)
+  def setMaxIter(value: Int): this.type = set(maxIter, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setStepSize(value: Double): this.type = set(stepSize, value)
+  def setStepSize(value: Double): this.type = set(stepSize, value)
 
   // Parameters from GBTRegressorParams:
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
index a58da50fad972..7b9ddf6e9521a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
@@ -55,27 +55,27 @@ class RandomForestRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setMaxDepth(value: Int): this.type = set(maxDepth, value)
+  def setMaxDepth(value: Int): this.type = set(maxDepth, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setMaxBins(value: Int): this.type = set(maxBins, value)
+  def setMaxBins(value: Int): this.type = set(maxBins, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
+  def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
+  def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
 
   /** @group expertSetParam */
   @Since("1.4.0")
-  override def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
+  def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
 
   /** @group expertSetParam */
   @Since("1.4.0")
-  override def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
+  def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
 
   /**
    * Specifies how often to checkpoint the cached node IDs.
@@ -87,31 +87,31 @@ class RandomForestRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
    * @group setParam
    */
   @Since("1.4.0")
-  override def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
+  def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setImpurity(value: String): this.type = set(impurity, value)
+  def setImpurity(value: String): this.type = set(impurity, value)
 
   // Parameters from TreeEnsembleParams:
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value)
+  def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setSeed(value: Long): this.type = set(seed, value)
+  def setSeed(value: Long): this.type = set(seed, value)
 
   // Parameters from RandomForestParams:
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setNumTrees(value: Int): this.type = set(numTrees, value)
+  def setNumTrees(value: Int): this.type = set(numTrees, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  override def setFeatureSubsetStrategy(value: String): this.type =
+  def setFeatureSubsetStrategy(value: String): this.type =
     set(featureSubsetStrategy, value)
 
   override protected def train(dataset: Dataset[_]): RandomForestRegressionModel = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
index cd1950bd76c05..5526d4d75bd73 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
@@ -109,80 +109,24 @@ private[ml] trait DecisionTreeParams extends PredictorParams
   setDefault(maxDepth -> 5, maxBins -> 32, minInstancesPerNode -> 1, minInfoGain -> 0.0,
     maxMemoryInMB -> 256, cacheNodeIds -> false, checkpointInterval -> 10)
 
-  /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
-   * @group setParam
-   */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
-  def setMaxDepth(value: Int): this.type = set(maxDepth, value)
-
   /** @group getParam */
   final def getMaxDepth: Int = $(maxDepth)
 
-  /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
-   * @group setParam
-   */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
-  def setMaxBins(value: Int): this.type = set(maxBins, value)
-
   /** @group getParam */
   final def getMaxBins: Int = $(maxBins)
 
-  /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
-   * @group setParam
-   */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
-  def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
-
   /** @group getParam */
   final def getMinInstancesPerNode: Int = $(minInstancesPerNode)
 
-  /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
-   * @group setParam
-   */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
-  def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
-
   /** @group getParam */
   final def getMinInfoGain: Double = $(minInfoGain)
 
-  /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
-   * @group setParam
-   */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
-  def setSeed(value: Long): this.type = set(seed, value)
-
-  /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
-   * @group expertSetParam
-   */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
-  def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
-
   /** @group expertGetParam */
   final def getMaxMemoryInMB: Int = $(maxMemoryInMB)
 
-  /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
-   * @group expertSetParam
-   */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
-  def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
-
   /** @group expertGetParam */
   final def getCacheNodeIds: Boolean = $(cacheNodeIds)
 
-  /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
-   * @group setParam
-   */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
-  def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
-
   /** (private[ml]) Create a Strategy instance to use with the old API. */
   private[ml] def getOldStrategy(
       categoricalFeatures: Map[Int, Int],
@@ -225,13 +169,6 @@ private[ml] trait TreeClassifierParams extends Params {
 
   setDefault(impurity -> "gini")
 
-  /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
-   * @group setParam
-   */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
-  def setImpurity(value: String): this.type = set(impurity, value)
-
   /** @group getParam */
   final def getImpurity: String = $(impurity).toLowerCase(Locale.ROOT)
 
@@ -276,13 +213,6 @@ private[ml] trait TreeRegressorParams extends Params {
 
   setDefault(impurity -> "variance")
 
-  /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
-   * @group setParam
-   */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
-  def setImpurity(value: String): this.type = set(impurity, value)
-
   /** @group getParam */
   final def getImpurity: String = $(impurity).toLowerCase(Locale.ROOT)
 
@@ -338,13 +268,6 @@ private[ml] trait TreeEnsembleParams extends DecisionTreeParams {
 
   setDefault(subsamplingRate -> 1.0)
 
-  /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
-   * @group setParam
-   */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
-  def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value)
-
   /** @group getParam */
   final def getSubsamplingRate: Double = $(subsamplingRate)
 
@@ -382,13 +305,6 @@ private[ml] trait RandomForestParams extends TreeEnsembleParams {
 
   setDefault(numTrees -> 20)
 
-  /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
-   * @group setParam
-   */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
-  def setNumTrees(value: Int): this.type = set(numTrees, value)
-
   /** @group getParam */
   final def getNumTrees: Int = $(numTrees)
 
@@ -430,13 +346,6 @@ private[ml] trait RandomForestParams extends TreeEnsembleParams {
 
   setDefault(featureSubsetStrategy -> "auto")
 
-  /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
-   * @group setParam
-   */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
-  def setFeatureSubsetStrategy(value: String): this.type = set(featureSubsetStrategy, value)
-
   /** @group getParam */
   final def getFeatureSubsetStrategy: String = $(featureSubsetStrategy).toLowerCase(Locale.ROOT)
 }
@@ -471,13 +380,6 @@ private[ml] trait GBTParams extends TreeEnsembleParams with HasMaxIter {
   // final val validationTol: DoubleParam = new DoubleParam(this, "validationTol", "")
   // validationTol -> 1e-5
 
-  /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
-   * @group setParam
-   */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
-  def setMaxIter(value: Int): this.type = set(maxIter, value)
-
   /**
    * Param for Step size (a.k.a. learning rate) in interval (0, 1] for shrinking
    * the contribution of each estimator.
@@ -491,13 +393,6 @@ private[ml] trait GBTParams extends TreeEnsembleParams with HasMaxIter {
   /** @group getParam */
   final def getStepSize: Double = $(stepSize)
 
-  /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
-   * @group setParam
-   */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
-  def setStepSize(value: Double): this.type = set(stepSize, value)
-
   setDefault(maxIter -> 20, stepSize -> 0.1)
 
   /** (private[ml]) Create a BoostingStrategy instance to use with the old API. */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
index a8b80031faf86..f7e570fd5cc94 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
@@ -42,16 +42,6 @@ import org.apache.spark.util.Utils
 private[util] sealed trait BaseReadWrite {
   private var optionSparkSession: Option[SparkSession] = None
 
-  /**
-   * Sets the Spark SQLContext to use for saving/loading.
-   */
-  @Since("1.6.0")
-  @deprecated("Use session instead, This method will be removed in 2.2.0.", "2.0.0")
-  def context(sqlContext: SQLContext): this.type = {
-    optionSparkSession = Option(sqlContext.sparkSession)
-    this
-  }
-
   /**
    * Sets the Spark Session to use for saving/loading.
    */
@@ -130,9 +120,6 @@ abstract class MLWriter extends BaseReadWrite with Logging {
 
   // override for Java compatibility
   override def session(sparkSession: SparkSession): this.type = super.session(sparkSession)
-
-  // override for Java compatibility
-  override def context(sqlContext: SQLContext): this.type = super.session(sqlContext.sparkSession)
 }
 
 /**
@@ -188,9 +175,6 @@ abstract class MLReader[T] extends BaseReadWrite {
 
   // override for Java compatibility
   override def session(sparkSession: SparkSession): this.type = super.session(sparkSession)
-
-  // override for Java compatibility
-  override def context(sqlContext: SQLContext): this.type = super.session(sqlContext.sparkSession)
 }
 
 /**
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index feae76a087dec..92e1bbe76541b 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -999,6 +999,74 @@ object MimaExcludes {
       ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setFeatureSubsetStrategy"),
       ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.numTrees"),
       ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setFeatureSubsetStrategy")
+    ) ++ Seq(
+      // [SPARK-20606] ML 2.2 QA: Remove deprecated methods for ML
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.DecisionTreeClassificationModel.setSeed"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.DecisionTreeClassificationModel.setMinInfoGain"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.DecisionTreeClassificationModel.setCacheNodeIds"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.DecisionTreeClassificationModel.setCheckpointInterval"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.DecisionTreeClassificationModel.setMaxDepth"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.DecisionTreeClassificationModel.setImpurity"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.DecisionTreeClassificationModel.setMaxMemoryInMB"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.DecisionTreeClassificationModel.setMaxBins"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.DecisionTreeClassificationModel.setMinInstancesPerNode"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setSeed"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setMinInfoGain"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setSubsamplingRate"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setMaxIter"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setCacheNodeIds"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setCheckpointInterval"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setMaxDepth"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setImpurity"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setMaxMemoryInMB"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setStepSize"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setMaxBins"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setMinInstancesPerNode"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setSeed"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setMinInfoGain"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setSubsamplingRate"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setCacheNodeIds"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setCheckpointInterval"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setMaxDepth"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setImpurity"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setMaxMemoryInMB"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setFeatureSubsetStrategy"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setMaxBins"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setMinInstancesPerNode"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.DecisionTreeRegressionModel.setSeed"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.DecisionTreeRegressionModel.setMinInfoGain"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.DecisionTreeRegressionModel.setCacheNodeIds"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.DecisionTreeRegressionModel.setCheckpointInterval"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.DecisionTreeRegressionModel.setMaxDepth"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.DecisionTreeRegressionModel.setImpurity"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.DecisionTreeRegressionModel.setMaxMemoryInMB"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.DecisionTreeRegressionModel.setMaxBins"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.DecisionTreeRegressionModel.setMinInstancesPerNode"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setSeed"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setMinInfoGain"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setSubsamplingRate"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setMaxIter"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setCacheNodeIds"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setCheckpointInterval"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setMaxDepth"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setImpurity"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setMaxMemoryInMB"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setStepSize"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setMaxBins"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setMinInstancesPerNode"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setSeed"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setMinInfoGain"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setSubsamplingRate"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setCacheNodeIds"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setCheckpointInterval"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setMaxDepth"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setImpurity"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setMaxMemoryInMB"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setFeatureSubsetStrategy"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setMaxBins"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setMinInstancesPerNode"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.util.MLWriter.context"),
+      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.util.MLReader.context")
     )
   }
 
diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py
index 02016f172aebc..688109ab11fd2 100644
--- a/python/pyspark/ml/util.py
+++ b/python/pyspark/ml/util.py
@@ -76,13 +76,6 @@ def overwrite(self):
         """Overwrites if the output path already exists."""
         raise NotImplementedError("MLWriter is not yet implemented for type: %s" % type(self))
 
-    def context(self, sqlContext):
-        """
-        Sets the SQL context to use for saving.
-        .. note:: Deprecated in 2.1 and will be removed in 2.2, use session instead.
-        """
-        raise NotImplementedError("MLWriter is not yet implemented for type: %s" % type(self))
-
     def session(self, sparkSession):
         """Sets the Spark Session to use for saving."""
         raise NotImplementedError("MLWriter is not yet implemented for type: %s" % type(self))
@@ -110,15 +103,6 @@ def overwrite(self):
         self._jwrite.overwrite()
         return self
 
-    def context(self, sqlContext):
-        """
-        Sets the SQL context to use for saving.
-        .. note:: Deprecated in 2.1 and will be removed in 2.2, use session instead.
-        """
-        warnings.warn("Deprecated in 2.1 and will be removed in 2.2, use session instead.")
-        self._jwrite.context(sqlContext._ssql_ctx)
-        return self
-
     def session(self, sparkSession):
         """Sets the Spark Session to use for saving."""
         self._jwrite.session(sparkSession._jsparkSession)
@@ -165,13 +149,6 @@ def load(self, path):
         """Load the ML instance from the input path."""
         raise NotImplementedError("MLReader is not yet implemented for type: %s" % type(self))
 
-    def context(self, sqlContext):
-        """
-        Sets the SQL context to use for loading.
-        .. note:: Deprecated in 2.1 and will be removed in 2.2, use session instead.
-        """
-        raise NotImplementedError("MLReader is not yet implemented for type: %s" % type(self))
-
     def session(self, sparkSession):
         """Sets the Spark Session to use for loading."""
         raise NotImplementedError("MLReader is not yet implemented for type: %s" % type(self))
@@ -197,15 +174,6 @@ def load(self, path):
                                       % self._clazz)
         return self._clazz._from_java(java_obj)
 
-    def context(self, sqlContext):
-        """
-        Sets the SQL context to use for loading.
-        .. note:: Deprecated in 2.1 and will be removed in 2.2, use session instead.
-        """
-        warnings.warn("Deprecated in 2.1 and will be removed in 2.2, use session instead.")
-        self._jread.context(sqlContext._ssql_ctx)
-        return self
-
     def session(self, sparkSession):
         """Sets the Spark Session to use for loading."""
         self._jread.session(sparkSession._jsparkSession)

From b3309676bb83a80d38b916066d046866a6f42ef0 Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Tue, 9 May 2017 20:10:50 +0800
Subject: [PATCH 0800/1204] [SPARK-20667][SQL][TESTS] Cleanup the cataloged
 metadata after completing the package of sql/core and sql/hive

## What changes were proposed in this pull request?

So far, we do not drop all the cataloged objects after each package. Sometimes, we might hit strange test case errors because the previous test suite did not drop the cataloged/temporary objects (tables/functions/database). At least, we can first clean up the environment when completing the package of `sql/core` and `sql/hive`.

## How was this patch tested?
N/A

Author: Xiao Li <gatorsmile@gmail.com>

Closes #17908 from gatorsmile/reset.

(cherry picked from commit 0d00c768a860fc03402c8f0c9081b8147c29133e)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/catalyst/catalog/SessionCatalog.scala | 3 ++-
 .../scala/org/apache/spark/sql/test/SharedSQLContext.scala | 1 +
 .../scala/org/apache/spark/sql/hive/test/TestHive.scala    | 7 +------
 3 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 6c6d600190b66..18e514681e811 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -1251,9 +1251,10 @@ class SessionCatalog(
         dropTempFunction(func.funcName, ignoreIfNotExists = false)
       }
     }
-    tempTables.clear()
+    clearTempTables()
     globalTempViewManager.clear()
     functionRegistry.clear()
+    tableRelationCache.invalidateAll()
     // restore built-in functions
     FunctionRegistry.builtin.listFunction().foreach { f =>
       val expressionInfo = FunctionRegistry.builtin.lookupFunction(f)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala
index 81c69a338abcc..7cea4c02155ea 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala
@@ -74,6 +74,7 @@ trait SharedSQLContext extends SQLTestUtils with BeforeAndAfterEach with Eventua
   protected override def afterAll(): Unit = {
     super.afterAll()
     if (_spark != null) {
+      _spark.sessionState.catalog.reset()
       _spark.stop()
       _spark = null
     }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index d9bb1f8c7edcc..ee9ac21a738dc 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -488,14 +488,9 @@ private[hive] class TestHiveSparkSession(
 
       sharedState.cacheManager.clearCache()
       loadedTables.clear()
-      sessionState.catalog.clearTempTables()
-      sessionState.catalog.tableRelationCache.invalidateAll()
-
+      sessionState.catalog.reset()
       metadataHive.reset()
 
-      FunctionRegistry.getFunctionNames.asScala.filterNot(originalUDFs.contains(_)).
-        foreach { udfName => FunctionRegistry.unregisterTemporaryUDF(udfName) }
-
       // HDFS root scratch dir requires the write all (733) permission. For each connecting user,
       // an HDFS scratch dir: ${hive.exec.scratchdir}/<username> is created, with
       // ${hive.scratch.dir.permission}. To resolve the permission issue, the simplest way is to

From 272d2a10d70588e1f80cc6579d4ec3c44b5bbfc2 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Tue, 9 May 2017 20:22:51 +0800
Subject: [PATCH 0801/1204] [SPARK-20311][SQL] Support aliases for table value
 functions

## What changes were proposed in this pull request?
This pr added parsing rules to support aliases in table value functions.

## How was this patch tested?
Added tests in `PlanParserSuite`.

Author: Takeshi Yamamuro <yamamuro@apache.org>

Closes #17666 from maropu/SPARK-20311.

(cherry picked from commit 714811d0b5bcb5d47c39782ff74f898d276ecc59)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/parser/SqlBase.g4      | 20 ++++++++++++-----
 .../ResolveTableValuedFunctions.scala         | 22 ++++++++++++++++---
 .../sql/catalyst/analysis/unresolved.scala    | 10 +++++++--
 .../sql/catalyst/parser/AstBuilder.scala      | 17 ++++++++++----
 .../sql/catalyst/analysis/AnalysisSuite.scala | 14 +++++++++++-
 .../sql/catalyst/parser/PlanParserSuite.scala | 13 ++++++++++-
 6 files changed, 79 insertions(+), 17 deletions(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 1ecb3d1958f43..15e4dd44a6996 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -472,15 +472,23 @@ identifierComment
     ;
 
 relationPrimary
-    : tableIdentifier sample? (AS? strictIdentifier)?               #tableName
-    | '(' queryNoWith ')' sample? (AS? strictIdentifier)?           #aliasedQuery
-    | '(' relation ')' sample? (AS? strictIdentifier)?              #aliasedRelation
-    | inlineTable                                                   #inlineTableDefault2
-    | identifier '(' (expression (',' expression)*)? ')'            #tableValuedFunction
+    : tableIdentifier sample? (AS? strictIdentifier)?      #tableName
+    | '(' queryNoWith ')' sample? (AS? strictIdentifier)?  #aliasedQuery
+    | '(' relation ')' sample? (AS? strictIdentifier)?     #aliasedRelation
+    | inlineTable                                          #inlineTableDefault2
+    | functionTable                                        #tableValuedFunction
     ;
 
 inlineTable
-    : VALUES expression (',' expression)*  (AS? identifier identifierList?)?
+    : VALUES expression (',' expression)*  tableAlias
+    ;
+
+functionTable
+    : identifier '(' (expression (',' expression)*)? ')' tableAlias
+    ;
+
+tableAlias
+    : (AS? identifier identifierList?)?
     ;
 
 rowFormat
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala
index de6de24350f23..dad1340571cc8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala
@@ -19,8 +19,8 @@ package org.apache.spark.sql.catalyst.analysis
 
 import java.util.Locale
 
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Range}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Expression}
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, Range}
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.types.{DataType, IntegerType, LongType}
 
@@ -105,7 +105,7 @@ object ResolveTableValuedFunctions extends Rule[LogicalPlan] {
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     case u: UnresolvedTableValuedFunction if u.functionArgs.forall(_.resolved) =>
-      builtinFunctions.get(u.functionName.toLowerCase(Locale.ROOT)) match {
+      val resolvedFunc = builtinFunctions.get(u.functionName.toLowerCase(Locale.ROOT)) match {
         case Some(tvf) =>
           val resolved = tvf.flatMap { case (argList, resolver) =>
             argList.implicitCast(u.functionArgs) match {
@@ -125,5 +125,21 @@ object ResolveTableValuedFunctions extends Rule[LogicalPlan] {
         case _ =>
           u.failAnalysis(s"could not resolve `${u.functionName}` to a table-valued function")
       }
+
+      // If alias names assigned, add `Project` with the aliases
+      if (u.outputNames.nonEmpty) {
+        val outputAttrs = resolvedFunc.output
+        // Checks if the number of the aliases is equal to expected one
+        if (u.outputNames.size != outputAttrs.size) {
+          u.failAnalysis(s"expected ${outputAttrs.size} columns but " +
+            s"found ${u.outputNames.size} columns")
+        }
+        val aliases = outputAttrs.zip(u.outputNames).map {
+          case (attr, name) => Alias(attr, name)()
+        }
+        Project(aliases, resolvedFunc)
+      } else {
+        resolvedFunc
+      }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 262b894e2a0a3..51bef6e20b9fa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -66,10 +66,16 @@ case class UnresolvedInlineTable(
 /**
  * A table-valued function, e.g.
  * {{{
- *   select * from range(10);
+ *   select id from range(10);
+ *
+ *   // Assign alias names
+ *   select t.a from range(10) t(a);
  * }}}
  */
-case class UnresolvedTableValuedFunction(functionName: String, functionArgs: Seq[Expression])
+case class UnresolvedTableValuedFunction(
+    functionName: String,
+    functionArgs: Seq[Expression],
+    outputNames: Seq[String])
   extends LeafNode {
 
   override def output: Seq[Attribute] = Nil
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index a48a693a95c93..b33fad184989d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -687,7 +687,16 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
    */
   override def visitTableValuedFunction(ctx: TableValuedFunctionContext)
       : LogicalPlan = withOrigin(ctx) {
-    UnresolvedTableValuedFunction(ctx.identifier.getText, ctx.expression.asScala.map(expression))
+    val func = ctx.functionTable
+    val aliases = if (func.tableAlias.identifierList != null) {
+      visitIdentifierList(func.tableAlias.identifierList)
+    } else {
+      Seq.empty
+    }
+
+    val tvf = UnresolvedTableValuedFunction(
+      func.identifier.getText, func.expression.asScala.map(expression), aliases)
+    tvf.optionalMap(func.tableAlias.identifier)(aliasPlan)
   }
 
   /**
@@ -705,14 +714,14 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
       }
     }
 
-    val aliases = if (ctx.identifierList != null) {
-      visitIdentifierList(ctx.identifierList)
+    val aliases = if (ctx.tableAlias.identifierList != null) {
+      visitIdentifierList(ctx.tableAlias.identifierList)
     } else {
       Seq.tabulate(rows.head.size)(i => s"col${i + 1}")
     }
 
     val table = UnresolvedInlineTable(aliases, rows)
-    table.optionalMap(ctx.identifier)(aliasPlan)
+    table.optionalMap(ctx.tableAlias.identifier)(aliasPlan)
   }
 
   /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 893bb1b74cea7..31047f688600b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -25,7 +25,6 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.plans.Cross
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.types._
@@ -441,4 +440,17 @@ class AnalysisSuite extends AnalysisTest with ShouldMatchers {
 
     checkAnalysis(SubqueryAlias("tbl", testRelation).as("tbl2"), testRelation)
   }
+
+  test("SPARK-20311 range(N) as alias") {
+    def rangeWithAliases(args: Seq[Int], outputNames: Seq[String]): LogicalPlan = {
+      SubqueryAlias("t", UnresolvedTableValuedFunction("range", args.map(Literal(_)), outputNames))
+        .select(star())
+    }
+    assertAnalysisSuccess(rangeWithAliases(3 :: Nil, "a" :: Nil))
+    assertAnalysisSuccess(rangeWithAliases(1 :: 4 :: Nil, "b" :: Nil))
+    assertAnalysisSuccess(rangeWithAliases(2 :: 6 :: 2 :: Nil, "c" :: Nil))
+    assertAnalysisError(
+      rangeWithAliases(3 :: Nil, "a" :: "b" :: Nil),
+      Seq("expected 1 columns but found 2 columns"))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index 411777d6e85a2..4c2476296c049 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -468,7 +468,18 @@ class PlanParserSuite extends PlanTest {
   test("table valued function") {
     assertEqual(
       "select * from range(2)",
-      UnresolvedTableValuedFunction("range", Literal(2) :: Nil).select(star()))
+      UnresolvedTableValuedFunction("range", Literal(2) :: Nil, Seq.empty).select(star()))
+  }
+
+  test("SPARK-20311 range(N) as alias") {
+    assertEqual(
+      "select * from range(10) AS t",
+      SubqueryAlias("t", UnresolvedTableValuedFunction("range", Literal(10) :: Nil, Seq.empty))
+        .select(star()))
+    assertEqual(
+      "select * from range(7) AS t(a)",
+      SubqueryAlias("t", UnresolvedTableValuedFunction("range", Literal(7) :: Nil, "a" :: Nil))
+        .select(star()))
   }
 
   test("inline table") {

From 08e1b78f01955c7151d9e984d392d45deced6e34 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 10 May 2017 00:09:35 +0800
Subject: [PATCH 0802/1204] [SPARK-20548][FLAKY-TEST] share one REPL instance
 among REPL test cases

`ReplSuite.newProductSeqEncoder with REPL defined class` was flaky and throws OOM exception frequently. By analyzing the heap dump, we found the reason is that, in each test case of `ReplSuite`, we create a REPL instance, which creates a classloader and loads a lot of classes related to `SparkContext`. More details please see https://github.com/apache/spark/pull/17833#issuecomment-298711435.

In this PR, we create a new test suite, `SingletonReplSuite`, which shares one REPL instances among all the test cases. Then we move most of the tests from `ReplSuite` to `SingletonReplSuite`, to avoid creating a lot of REPL instances and reduce memory footprint.

test only change

Author: Wenchen Fan <wenchen@databricks.com>

Closes #17844 from cloud-fan/flaky-test.

(cherry picked from commit f561a76b2f895dea52f228a9376948242c3331ad)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scala/org/apache/spark/repl/Main.scala    |   2 +-
 .../org/apache/spark/repl/SparkILoop.scala    |   9 +-
 .../org/apache/spark/repl/ReplSuite.scala     | 271 +-----------
 .../spark/repl/SingletonReplSuite.scala       | 408 ++++++++++++++++++
 4 files changed, 412 insertions(+), 278 deletions(-)
 create mode 100644 repl/scala-2.11/src/test/scala/org/apache/spark/repl/SingletonReplSuite.scala

diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
index 39fc621de7807..b8b38e828b255 100644
--- a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
@@ -68,7 +68,7 @@ object Main extends Logging {
 
     if (!hasErrors) {
       interp.process(settings) // Repl starts and goes in loop of R.E.P.L
-      Option(sparkContext).map(_.stop)
+      Option(sparkContext).foreach(_.stop)
     }
   }
 
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala
index 76a66c1beada0..d1d25b7bf041f 100644
--- a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala
@@ -86,15 +86,8 @@ class SparkILoop(in0: Option[BufferedReader], out: JPrintWriter)
     echo("Type :help for more information.")
   }
 
-  /** Add repl commands that needs to be blocked. e.g. reset */
-  private val blockedCommands = Set[String]()
-
-  /** Standard commands */
-  lazy val sparkStandardCommands: List[SparkILoop.this.LoopCommand] =
-    standardCommands.filter(cmd => !blockedCommands(cmd.name))
-
   /** Available commands */
-  override def commands: List[LoopCommand] = sparkStandardCommands
+  override def commands: List[LoopCommand] = standardCommands
 
   /**
    * We override `loadFiles` because we need to initialize Spark *before* the REPL
diff --git a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index 121a02a9be0a1..c7ae1940d0297 100644
--- a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -21,12 +21,12 @@ import java.io._
 import java.net.URLClassLoader
 
 import scala.collection.mutable.ArrayBuffer
-import org.apache.commons.lang3.StringEscapeUtils
+
 import org.apache.log4j.{Level, LogManager}
+
 import org.apache.spark.{SparkContext, SparkFunSuite}
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
-import org.apache.spark.util.Utils
 
 class ReplSuite extends SparkFunSuite {
 
@@ -148,71 +148,6 @@ class ReplSuite extends SparkFunSuite {
     }
   }
 
-  test("simple foreach with accumulator") {
-    val output = runInterpreter("local",
-      """
-        |val accum = sc.longAccumulator
-        |sc.parallelize(1 to 10).foreach(x => accum.add(x))
-        |accum.value
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res1: Long = 55", output)
-  }
-
-  test("external vars") {
-    val output = runInterpreter("local",
-      """
-        |var v = 7
-        |sc.parallelize(1 to 10).map(x => v).collect().reduceLeft(_+_)
-        |v = 10
-        |sc.parallelize(1 to 10).map(x => v).collect().reduceLeft(_+_)
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res0: Int = 70", output)
-    assertContains("res1: Int = 100", output)
-  }
-
-  test("external classes") {
-    val output = runInterpreter("local",
-      """
-        |class C {
-        |def foo = 5
-        |}
-        |sc.parallelize(1 to 10).map(x => (new C).foo).collect().reduceLeft(_+_)
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res0: Int = 50", output)
-  }
-
-  test("external functions") {
-    val output = runInterpreter("local",
-      """
-        |def double(x: Int) = x + x
-        |sc.parallelize(1 to 10).map(x => double(x)).collect().reduceLeft(_+_)
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res0: Int = 110", output)
-  }
-
-  test("external functions that access vars") {
-    val output = runInterpreter("local",
-      """
-        |var v = 7
-        |def getV() = v
-        |sc.parallelize(1 to 10).map(x => getV()).collect().reduceLeft(_+_)
-        |v = 10
-        |sc.parallelize(1 to 10).map(x => getV()).collect().reduceLeft(_+_)
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res0: Int = 70", output)
-    assertContains("res1: Int = 100", output)
-  }
-
   test("broadcast vars") {
     // Test that the value that a broadcast var had when it was created is used,
     // even if that variable is then modified in the driver program
@@ -231,124 +166,6 @@ class ReplSuite extends SparkFunSuite {
     assertContains("res2: Array[Int] = Array(5, 0, 0, 0, 0)", output)
   }
 
-  test("interacting with files") {
-    val tempDir = Utils.createTempDir()
-    val out = new FileWriter(tempDir + "/input")
-    out.write("Hello world!\n")
-    out.write("What's up?\n")
-    out.write("Goodbye\n")
-    out.close()
-    val output = runInterpreter("local",
-      """
-        |var file = sc.textFile("%s").cache()
-        |file.count()
-        |file.count()
-        |file.count()
-      """.stripMargin.format(StringEscapeUtils.escapeJava(
-        tempDir.getAbsolutePath + File.separator + "input")))
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res0: Long = 3", output)
-    assertContains("res1: Long = 3", output)
-    assertContains("res2: Long = 3", output)
-    Utils.deleteRecursively(tempDir)
-  }
-
-  test("local-cluster mode") {
-    val output = runInterpreter("local-cluster[1,1,1024]",
-      """
-        |var v = 7
-        |def getV() = v
-        |sc.parallelize(1 to 10).map(x => getV()).collect().reduceLeft(_+_)
-        |v = 10
-        |sc.parallelize(1 to 10).map(x => getV()).collect().reduceLeft(_+_)
-        |var array = new Array[Int](5)
-        |val broadcastArray = sc.broadcast(array)
-        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect()
-        |array(0) = 5
-        |sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect()
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res0: Int = 70", output)
-    assertContains("res1: Int = 100", output)
-    assertContains("res2: Array[Int] = Array(0, 0, 0, 0, 0)", output)
-    assertContains("res4: Array[Int] = Array(0, 0, 0, 0, 0)", output)
-  }
-
-  test("SPARK-1199 two instances of same class don't type check.") {
-    val output = runInterpreter("local",
-      """
-        |case class Sum(exp: String, exp2: String)
-        |val a = Sum("A", "B")
-        |def b(a: Sum): String = a match { case Sum(_, _) => "Found Sum" }
-        |b(a)
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-  }
-
-  test("SPARK-2452 compound statements.") {
-    val output = runInterpreter("local",
-      """
-        |val x = 4 ; def f() = x
-        |f()
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-  }
-
-  test("SPARK-2576 importing implicits") {
-    // We need to use local-cluster to test this case.
-    val output = runInterpreter("local-cluster[1,1,1024]",
-      """
-        |import spark.implicits._
-        |case class TestCaseClass(value: Int)
-        |sc.parallelize(1 to 10).map(x => TestCaseClass(x)).toDF().collect()
-        |
-        |// Test Dataset Serialization in the REPL
-        |Seq(TestCaseClass(1)).toDS().collect()
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-  }
-
-  test("Datasets and encoders") {
-    val output = runInterpreter("local",
-      """
-        |import org.apache.spark.sql.functions._
-        |import org.apache.spark.sql.{Encoder, Encoders}
-        |import org.apache.spark.sql.expressions.Aggregator
-        |import org.apache.spark.sql.TypedColumn
-        |val simpleSum = new Aggregator[Int, Int, Int] {
-        |  def zero: Int = 0                     // The initial value.
-        |  def reduce(b: Int, a: Int) = b + a    // Add an element to the running total
-        |  def merge(b1: Int, b2: Int) = b1 + b2 // Merge intermediate values.
-        |  def finish(b: Int) = b                // Return the final result.
-        |  def bufferEncoder: Encoder[Int] = Encoders.scalaInt
-        |  def outputEncoder: Encoder[Int] = Encoders.scalaInt
-        |}.toColumn
-        |
-        |val ds = Seq(1, 2, 3, 4).toDS()
-        |ds.select(simpleSum).collect
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-  }
-
-  test("SPARK-2632 importing a method from non serializable class and not using it.") {
-    val output = runInterpreter("local-cluster[1,1,1024]",
-      """
-      |class TestClass() { def testMethod = 3 }
-      |val t = new TestClass
-      |import t.testMethod
-      |case class TestCaseClass(value: Int)
-      |sc.parallelize(1 to 10).map(x => TestCaseClass(x)).collect()
-    """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-  }
-
   if (System.getenv("MESOS_NATIVE_JAVA_LIBRARY") != null) {
     test("running on Mesos") {
       val output = runInterpreter("localquiet",
@@ -373,52 +190,6 @@ class ReplSuite extends SparkFunSuite {
     }
   }
 
-  test("collecting objects of class defined in repl") {
-    val output = runInterpreter("local[2]",
-      """
-        |case class Foo(i: Int)
-        |val ret = sc.parallelize((1 to 100).map(Foo), 10).collect()
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("ret: Array[Foo] = Array(Foo(1),", output)
-  }
-
-  test("collecting objects of class defined in repl - shuffling") {
-    val output = runInterpreter("local-cluster[1,1,1024]",
-      """
-        |case class Foo(i: Int)
-        |val list = List((1, Foo(1)), (1, Foo(2)))
-        |val ret = sc.parallelize(list).groupByKey().collect()
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("ret: Array[(Int, Iterable[Foo])] = Array((1,", output)
-  }
-
-  test("replicating blocks of object with class defined in repl") {
-    val output = runInterpreter("local-cluster[2,1,1024]",
-      """
-        |val timeout = 60000 // 60 seconds
-        |val start = System.currentTimeMillis
-        |while(sc.getExecutorStorageStatus.size != 3 &&
-        |    (System.currentTimeMillis - start) < timeout) {
-        |  Thread.sleep(10)
-        |}
-        |if (System.currentTimeMillis - start >= timeout) {
-        |  throw new java.util.concurrent.TimeoutException("Executors were not up in 60 seconds")
-        |}
-        |import org.apache.spark.storage.StorageLevel._
-        |case class Foo(i: Int)
-        |val ret = sc.parallelize((1 to 100).map(Foo), 10).persist(MEMORY_AND_DISK_2)
-        |ret.count()
-        |sc.getExecutorStorageStatus.map(s => s.rddBlocksById(ret.id).size).sum
-      """.stripMargin)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains(": Int = 20", output)
-  }
-
   test("line wrapper only initialized once when used as encoder outer scope") {
     val output = runInterpreter("local",
       """
@@ -446,42 +217,4 @@ class ReplSuite extends SparkFunSuite {
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
   }
-
-  test("should clone and clean line object in ClosureCleaner") {
-    val output = runInterpreterInPasteMode("local-cluster[1,4,4096]",
-      """
-        |import org.apache.spark.rdd.RDD
-        |
-        |val lines = sc.textFile("pom.xml")
-        |case class Data(s: String)
-        |val dataRDD = lines.map(line => Data(line.take(3)))
-        |dataRDD.cache.count
-        |val repartitioned = dataRDD.repartition(dataRDD.partitions.size)
-        |repartitioned.cache.count
-        |
-        |def getCacheSize(rdd: RDD[_]) = {
-        |  sc.getRDDStorageInfo.filter(_.id == rdd.id).map(_.memSize).sum
-        |}
-        |val cacheSize1 = getCacheSize(dataRDD)
-        |val cacheSize2 = getCacheSize(repartitioned)
-        |
-        |// The cache size of dataRDD and the repartitioned one should be similar.
-        |val deviation = math.abs(cacheSize2 - cacheSize1).toDouble / cacheSize1
-        |assert(deviation < 0.2,
-        |  s"deviation too large: $deviation, first size: $cacheSize1, second size: $cacheSize2")
-      """.stripMargin)
-    assertDoesNotContain("AssertionError", output)
-    assertDoesNotContain("Exception", output)
-  }
-
-  test("newProductSeqEncoder with REPL defined class") {
-    val output = runInterpreterInPasteMode("local-cluster[1,4,4096]",
-      """
-      |case class Click(id: Int)
-      |spark.implicits.newProductSeqEncoder[Click]
-    """.stripMargin)
-
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-  }
 }
diff --git a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/SingletonReplSuite.scala b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/SingletonReplSuite.scala
new file mode 100644
index 0000000000000..ec3d790255ad3
--- /dev/null
+++ b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/SingletonReplSuite.scala
@@ -0,0 +1,408 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.repl
+
+import java.io._
+import java.net.URLClassLoader
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.commons.lang3.StringEscapeUtils
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.util.Utils
+
+/**
+ * A special test suite for REPL that all test cases share one REPL instance.
+ */
+class SingletonReplSuite extends SparkFunSuite {
+
+  private val out = new StringWriter()
+  private val in = new PipedOutputStream()
+  private var thread: Thread = _
+
+  private val CONF_EXECUTOR_CLASSPATH = "spark.executor.extraClassPath"
+  private val oldExecutorClasspath = System.getProperty(CONF_EXECUTOR_CLASSPATH)
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+
+    val cl = getClass.getClassLoader
+    var paths = new ArrayBuffer[String]
+    if (cl.isInstanceOf[URLClassLoader]) {
+      val urlLoader = cl.asInstanceOf[URLClassLoader]
+      for (url <- urlLoader.getURLs) {
+        if (url.getProtocol == "file") {
+          paths += url.getFile
+        }
+      }
+    }
+    val classpath = paths.map(new File(_).getAbsolutePath).mkString(File.pathSeparator)
+
+    System.setProperty(CONF_EXECUTOR_CLASSPATH, classpath)
+    Main.conf.set("spark.master", "local-cluster[2,1,1024]")
+    val interp = new SparkILoop(
+      new BufferedReader(new InputStreamReader(new PipedInputStream(in))),
+      new PrintWriter(out))
+
+    // Forces to create new SparkContext
+    Main.sparkContext = null
+    Main.sparkSession = null
+
+    // Starts a new thread to run the REPL interpreter, so that we won't block.
+    thread = new Thread(new Runnable {
+      override def run(): Unit = Main.doMain(Array("-classpath", classpath), interp)
+    })
+    thread.setDaemon(true)
+    thread.start()
+
+    waitUntil(() => out.toString.contains("Type :help for more information"))
+  }
+
+  override def afterAll(): Unit = {
+    in.close()
+    thread.join()
+    if (oldExecutorClasspath != null) {
+      System.setProperty(CONF_EXECUTOR_CLASSPATH, oldExecutorClasspath)
+    } else {
+      System.clearProperty(CONF_EXECUTOR_CLASSPATH)
+    }
+    super.afterAll()
+  }
+
+  private def waitUntil(cond: () => Boolean): Unit = {
+    import scala.concurrent.duration._
+    import org.scalatest.concurrent.Eventually._
+
+    eventually(timeout(50.seconds), interval(500.millis)) {
+      assert(cond(), "current output: " + out.toString)
+    }
+  }
+
+  /**
+   * Run the given commands string in a globally shared interpreter instance. Note that the given
+   * commands should not crash the interpreter, to not affect other test cases.
+   */
+  def runInterpreter(input: String): String = {
+    val currentOffset = out.getBuffer.length()
+    // append a special statement to the end of the given code, so that we can know what's
+    // the final output of this code snippet and rely on it to wait until the output is ready.
+    val timestamp = System.currentTimeMillis()
+    in.write((input + s"\nval _result_$timestamp = 1\n").getBytes)
+    in.flush()
+    val stopMessage = s"_result_$timestamp: Int = 1"
+    waitUntil(() => out.getBuffer.substring(currentOffset).contains(stopMessage))
+    out.getBuffer.substring(currentOffset)
+  }
+
+  def assertContains(message: String, output: String) {
+    val isContain = output.contains(message)
+    assert(isContain,
+      "Interpreter output did not contain '" + message + "':\n" + output)
+  }
+
+  def assertDoesNotContain(message: String, output: String) {
+    val isContain = output.contains(message)
+    assert(!isContain,
+      "Interpreter output contained '" + message + "':\n" + output)
+  }
+
+  test("simple foreach with accumulator") {
+    val output = runInterpreter(
+      """
+        |val accum = sc.longAccumulator
+        |sc.parallelize(1 to 10).foreach(x => accum.add(x))
+        |val res = accum.value
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res: Long = 55", output)
+  }
+
+  test("external vars") {
+    val output = runInterpreter(
+      """
+        |var v = 7
+        |val res1 = sc.parallelize(1 to 10).map(x => v).collect().reduceLeft(_+_)
+        |v = 10
+        |val res2 = sc.parallelize(1 to 10).map(x => v).collect().reduceLeft(_+_)
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res1: Int = 70", output)
+    assertContains("res2: Int = 100", output)
+  }
+
+  test("external classes") {
+    val output = runInterpreter(
+      """
+        |class C {
+        |def foo = 5
+        |}
+        |val res = sc.parallelize(1 to 10).map(x => (new C).foo).collect().reduceLeft(_+_)
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res: Int = 50", output)
+  }
+
+  test("external functions") {
+    val output = runInterpreter(
+      """
+        |def double(x: Int) = x + x
+        |val res = sc.parallelize(1 to 10).map(x => double(x)).collect().reduceLeft(_+_)
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res: Int = 110", output)
+  }
+
+  test("external functions that access vars") {
+    val output = runInterpreter(
+      """
+        |var v = 7
+        |def getV() = v
+        |val res1 = sc.parallelize(1 to 10).map(x => getV()).collect().reduceLeft(_+_)
+        |v = 10
+        |val res2 = sc.parallelize(1 to 10).map(x => getV()).collect().reduceLeft(_+_)
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res1: Int = 70", output)
+    assertContains("res2: Int = 100", output)
+  }
+
+  test("broadcast vars") {
+    // Test that the value that a broadcast var had when it was created is used,
+    // even if that variable is then modified in the driver program
+    val output = runInterpreter(
+      """
+        |var array = new Array[Int](5)
+        |val broadcastArray = sc.broadcast(array)
+        |val res1 = sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect()
+        |array(0) = 5
+        |val res2 = sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect()
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res1: Array[Int] = Array(0, 0, 0, 0, 0)", output)
+    assertContains("res2: Array[Int] = Array(0, 0, 0, 0, 0)", output)
+  }
+
+  test("interacting with files") {
+    val tempDir = Utils.createTempDir()
+    val out = new FileWriter(tempDir + "/input")
+    out.write("Hello world!\n")
+    out.write("What's up?\n")
+    out.write("Goodbye\n")
+    out.close()
+    val output = runInterpreter(
+      """
+        |var file = sc.textFile("%s").cache()
+        |val res1 = file.count()
+        |val res2 = file.count()
+        |val res3 = file.count()
+      """.stripMargin.format(StringEscapeUtils.escapeJava(
+        tempDir.getAbsolutePath + File.separator + "input")))
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res1: Long = 3", output)
+    assertContains("res2: Long = 3", output)
+    assertContains("res3: Long = 3", output)
+    Utils.deleteRecursively(tempDir)
+  }
+
+  test("local-cluster mode") {
+    val output = runInterpreter(
+      """
+        |var v = 7
+        |def getV() = v
+        |val res1 = sc.parallelize(1 to 10).map(x => getV()).collect().reduceLeft(_+_)
+        |v = 10
+        |val res2 = sc.parallelize(1 to 10).map(x => getV()).collect().reduceLeft(_+_)
+        |var array = new Array[Int](5)
+        |val broadcastArray = sc.broadcast(array)
+        |val res3 = sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect()
+        |array(0) = 5
+        |val res4 = sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect()
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res1: Int = 70", output)
+    assertContains("res2: Int = 100", output)
+    assertContains("res3: Array[Int] = Array(0, 0, 0, 0, 0)", output)
+    assertContains("res4: Array[Int] = Array(0, 0, 0, 0, 0)", output)
+  }
+
+  test("SPARK-1199 two instances of same class don't type check.") {
+    val output = runInterpreter(
+      """
+        |case class Sum(exp: String, exp2: String)
+        |val a = Sum("A", "B")
+        |def b(a: Sum): String = a match { case Sum(_, _) => "Found Sum" }
+        |b(a)
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+  }
+
+  test("SPARK-2452 compound statements.") {
+    val output = runInterpreter(
+      """
+        |val x = 4 ; def f() = x
+        |f()
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+  }
+
+  test("SPARK-2576 importing implicits") {
+    // We need to use local-cluster to test this case.
+    val output = runInterpreter(
+      """
+        |import spark.implicits._
+        |case class TestCaseClass(value: Int)
+        |sc.parallelize(1 to 10).map(x => TestCaseClass(x)).toDF().collect()
+        |
+        |// Test Dataset Serialization in the REPL
+        |Seq(TestCaseClass(1)).toDS().collect()
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+  }
+
+  test("Datasets and encoders") {
+    val output = runInterpreter(
+      """
+        |import org.apache.spark.sql.functions._
+        |import org.apache.spark.sql.{Encoder, Encoders}
+        |import org.apache.spark.sql.expressions.Aggregator
+        |import org.apache.spark.sql.TypedColumn
+        |val simpleSum = new Aggregator[Int, Int, Int] {
+        |  def zero: Int = 0                     // The initial value.
+        |  def reduce(b: Int, a: Int) = b + a    // Add an element to the running total
+        |  def merge(b1: Int, b2: Int) = b1 + b2 // Merge intermediate values.
+        |  def finish(b: Int) = b                // Return the final result.
+        |  def bufferEncoder: Encoder[Int] = Encoders.scalaInt
+        |  def outputEncoder: Encoder[Int] = Encoders.scalaInt
+        |}.toColumn
+        |
+        |val ds = Seq(1, 2, 3, 4).toDS()
+        |ds.select(simpleSum).collect
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+  }
+
+  test("SPARK-2632 importing a method from non serializable class and not using it.") {
+    val output = runInterpreter(
+      """
+        |class TestClass() { def testMethod = 3 }
+        |val t = new TestClass
+        |import t.testMethod
+        |case class TestCaseClass(value: Int)
+        |sc.parallelize(1 to 10).map(x => TestCaseClass(x)).collect()
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+  }
+
+  test("collecting objects of class defined in repl") {
+    val output = runInterpreter(
+      """
+        |case class Foo(i: Int)
+        |val res = sc.parallelize((1 to 100).map(Foo), 10).collect()
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res: Array[Foo] = Array(Foo(1),", output)
+  }
+
+  test("collecting objects of class defined in repl - shuffling") {
+    val output = runInterpreter(
+      """
+        |case class Foo(i: Int)
+        |val list = List((1, Foo(1)), (1, Foo(2)))
+        |val res = sc.parallelize(list).groupByKey().collect()
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res: Array[(Int, Iterable[Foo])] = Array((1,", output)
+  }
+
+  test("replicating blocks of object with class defined in repl") {
+    val output = runInterpreter(
+      """
+        |val timeout = 60000 // 60 seconds
+        |val start = System.currentTimeMillis
+        |while(sc.getExecutorStorageStatus.size != 3 &&
+        |    (System.currentTimeMillis - start) < timeout) {
+        |  Thread.sleep(10)
+        |}
+        |if (System.currentTimeMillis - start >= timeout) {
+        |  throw new java.util.concurrent.TimeoutException("Executors were not up in 60 seconds")
+        |}
+        |import org.apache.spark.storage.StorageLevel._
+        |case class Foo(i: Int)
+        |val ret = sc.parallelize((1 to 100).map(Foo), 10).persist(MEMORY_AND_DISK_2)
+        |ret.count()
+        |val res = sc.getExecutorStorageStatus.map(s => s.rddBlocksById(ret.id).size).sum
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertContains("res: Int = 20", output)
+  }
+
+  test("should clone and clean line object in ClosureCleaner") {
+    val output = runInterpreter(
+      """
+        |import org.apache.spark.rdd.RDD
+        |
+        |val lines = sc.textFile("pom.xml")
+        |case class Data(s: String)
+        |val dataRDD = lines.map(line => Data(line.take(3)))
+        |dataRDD.cache.count
+        |val repartitioned = dataRDD.repartition(dataRDD.partitions.size)
+        |repartitioned.cache.count
+        |
+        |def getCacheSize(rdd: RDD[_]) = {
+        |  sc.getRDDStorageInfo.filter(_.id == rdd.id).map(_.memSize).sum
+        |}
+        |val cacheSize1 = getCacheSize(dataRDD)
+        |val cacheSize2 = getCacheSize(repartitioned)
+        |
+        |// The cache size of dataRDD and the repartitioned one should be similar.
+        |val deviation = math.abs(cacheSize2 - cacheSize1).toDouble / cacheSize1
+        |assert(deviation < 0.2,
+        |  s"deviation too large: $deviation, first size: $cacheSize1, second size: $cacheSize2")
+      """.stripMargin)
+    assertDoesNotContain("AssertionError", output)
+    assertDoesNotContain("Exception", output)
+  }
+
+  test("newProductSeqEncoder with REPL defined class") {
+    val output = runInterpreter(
+      """
+        |case class Click(id: Int)
+        |spark.implicits.newProductSeqEncoder[Click]
+      """.stripMargin)
+
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+  }
+}

From 73aa23b8ef64960e7f171aa07aec396667a2339d Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Tue, 9 May 2017 09:24:28 -0700
Subject: [PATCH 0803/1204] [SPARK-20674][SQL] Support registering
 UserDefinedFunction as named UDF

## What changes were proposed in this pull request?
For some reason we don't have an API to register UserDefinedFunction as named UDF. It is a no brainer to add one, in addition to the existing register functions we have.

## How was this patch tested?
Added a test case in UDFSuite for the new API.

Author: Reynold Xin <rxin@databricks.com>

Closes #17915 from rxin/SPARK-20674.

(cherry picked from commit d099f414d2cb53f5a61f6e77317c736be6f953a0)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../apache/spark/sql/UDFRegistration.scala    | 22 ++++++++++++++++---
 .../scala/org/apache/spark/sql/UDFSuite.scala |  7 ++++++
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
index a57673334c10b..6accf1f75064c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -70,15 +70,31 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @param name the name of the UDAF.
    * @param udaf the UDAF needs to be registered.
    * @return the registered UDAF.
+   *
+   * @since 1.5.0
    */
-  def register(
-      name: String,
-      udaf: UserDefinedAggregateFunction): UserDefinedAggregateFunction = {
+  def register(name: String, udaf: UserDefinedAggregateFunction): UserDefinedAggregateFunction = {
     def builder(children: Seq[Expression]) = ScalaUDAF(children, udaf)
     functionRegistry.registerFunction(name, builder)
     udaf
   }
 
+  /**
+   * Register a user-defined function (UDF), for a UDF that's already defined using the DataFrame
+   * API (i.e. of type UserDefinedFunction).
+   *
+   * @param name the name of the UDF.
+   * @param udf the UDF needs to be registered.
+   * @return the registered UDF.
+   *
+   * @since 2.2.0
+   */
+  def register(name: String, udf: UserDefinedFunction): UserDefinedFunction = {
+    def builder(children: Seq[Expression]) = udf.apply(children.map(Column.apply) : _*).expr
+    functionRegistry.registerFunction(name, builder)
+    udf
+  }
+
   // scalastyle:off line.size.limit
 
   /* register 0-22 were generated by this script
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
index ae6b2bc3753fb..6f8723af91cea 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
@@ -93,6 +93,13 @@ class UDFSuite extends QueryTest with SharedSQLContext {
     assert(sql("SELECT strLenScala('test')").head().getInt(0) === 4)
   }
 
+  test("UDF defined using UserDefinedFunction") {
+    import functions.udf
+    val foo = udf((x: Int) => x + 1)
+    spark.udf.register("foo", foo)
+    assert(sql("select foo(5)").head().getInt(0) == 6)
+  }
+
   test("ZeroArgument UDF") {
     spark.udf.register("random0", () => { Math.random()})
     assert(sql("SELECT random0()").head().getDouble(0) >= 0.0)

From c7bd909f67209b4d1354c3d5b0a0fb1d4e28f205 Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Tue, 9 May 2017 10:22:23 -0700
Subject: [PATCH 0804/1204] [SPARK-19876][BUILD] Move Trigger.java to java
 source hierarchy

## What changes were proposed in this pull request?

Simply moves `Trigger.java` to `src/main/java` from `src/main/scala`
See https://github.com/apache/spark/pull/17219

## How was this patch tested?

Existing tests.

Author: Sean Owen <sowen@cloudera.com>

Closes #17921 from srowen/SPARK-19876.2.

(cherry picked from commit 25ee816e090c42f0e35be2d2cb0f8ec60726317c)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../{scala => java}/org/apache/spark/sql/streaming/Trigger.java   | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename sql/core/src/main/{scala => java}/org/apache/spark/sql/streaming/Trigger.java (100%)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/Trigger.java b/sql/core/src/main/java/org/apache/spark/sql/streaming/Trigger.java
similarity index 100%
rename from sql/core/src/main/scala/org/apache/spark/sql/streaming/Trigger.java
rename to sql/core/src/main/java/org/apache/spark/sql/streaming/Trigger.java

From 9e8d23b3a2f99985ffb3c4eb67ac0a2774fa5b02 Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Tue, 9 May 2017 11:25:29 -0700
Subject: [PATCH 0805/1204] [SPARK-20627][PYSPARK] Drop the hadoop distirbution
 name from the Python version

## What changes were proposed in this pull request?

Drop the hadoop distirbution name from the Python version (PEP440 - https://www.python.org/dev/peps/pep-0440/). We've been using the local version string to disambiguate between different hadoop versions packaged with PySpark, but PEP0440 states that local versions should not be used when publishing up-stream. Since we no longer make PySpark pip packages for different hadoop versions, we can simply drop the hadoop information. If at a later point we need to start publishing different hadoop versions we can look at make different packages or similar.

## How was this patch tested?

Ran `make-distribution` locally

Author: Holden Karau <holden@us.ibm.com>

Closes #17885 from holdenk/SPARK-20627-remove-pip-local-version-string.

(cherry picked from commit 1b85bcd9298cf84dd746fe8e91ab0b0df69ef17e)
Signed-off-by: Holden Karau <holden@us.ibm.com>
---
 dev/create-release/release-build.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index 7976d8a039544..a72307a28ad7a 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -163,9 +163,9 @@ if [[ "$1" == "package" ]]; then
     export ZINC_PORT=$ZINC_PORT
     echo "Creating distribution: $NAME ($FLAGS)"
 
-    # Write out the NAME and VERSION to PySpark version info we rewrite the - into a . and SNAPSHOT
-    # to dev0 to be closer to PEP440. We use the NAME as a "local version".
-    PYSPARK_VERSION=`echo "$SPARK_VERSION+$NAME" |  sed -r "s/-/./" | sed -r "s/SNAPSHOT/dev0/"`
+    # Write out the VERSION to PySpark version info we rewrite the - into a . and SNAPSHOT
+    # to dev0 to be closer to PEP440.
+    PYSPARK_VERSION=`echo "$SPARK_VERSION" |  sed -r "s/-/./" | sed -r "s/SNAPSHOT/dev0/"`
     echo "__version__='$PYSPARK_VERSION'" > python/pyspark/version.py
 
     # Get maven home set by MVN

From 12c937ede309d6886ce1ceadff2691f31dcdd6d3 Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Tue, 9 May 2017 11:25:29 -0700
Subject: [PATCH 0806/1204] [SPARK-20627][PYSPARK] Drop the hadoop distirbution
 name from the Python version

## What changes were proposed in this pull request?

Drop the hadoop distirbution name from the Python version (PEP440 - https://www.python.org/dev/peps/pep-0440/). We've been using the local version string to disambiguate between different hadoop versions packaged with PySpark, but PEP0440 states that local versions should not be used when publishing up-stream. Since we no longer make PySpark pip packages for different hadoop versions, we can simply drop the hadoop information. If at a later point we need to start publishing different hadoop versions we can look at make different packages or similar.

## How was this patch tested?

Ran `make-distribution` locally

Author: Holden Karau <holden@us.ibm.com>

Closes #17885 from holdenk/SPARK-20627-remove-pip-local-version-string.

(cherry picked from commit 1b85bcd9298cf84dd746fe8e91ab0b0df69ef17e)
Signed-off-by: Holden Karau <holden@us.ibm.com>
---
 dev/create-release/release-build.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index ab17f2fe7077b..c4ddc2186f5dc 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -163,9 +163,9 @@ if [[ "$1" == "package" ]]; then
     export ZINC_PORT=$ZINC_PORT
     echo "Creating distribution: $NAME ($FLAGS)"
 
-    # Write out the NAME and VERSION to PySpark version info we rewrite the - into a . and SNAPSHOT
-    # to dev0 to be closer to PEP440. We use the NAME as a "local version".
-    PYSPARK_VERSION=`echo "$SPARK_VERSION+$NAME" |  sed -r "s/-/./" | sed -r "s/SNAPSHOT/dev0/"`
+    # Write out the VERSION to PySpark version info we rewrite the - into a . and SNAPSHOT
+    # to dev0 to be closer to PEP440.
+    PYSPARK_VERSION=`echo "$SPARK_VERSION" |  sed -r "s/-/./" | sed -r "s/SNAPSHOT/dev0/"`
     echo "__version__='$PYSPARK_VERSION'" > python/pyspark/version.py
 
     # Get maven home set by MVN

From d191b962dc81c015fa92a38d882a8c7ea620ef06 Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Tue, 9 May 2017 14:47:45 -0700
Subject: [PATCH 0807/1204] Revert "[SPARK-20311][SQL] Support aliases for
 table value functions"

This reverts commit 714811d0b5bcb5d47c39782ff74f898d276ecc59.
---
 .../spark/sql/catalyst/parser/SqlBase.g4      | 20 +++++------------
 .../ResolveTableValuedFunctions.scala         | 22 +++----------------
 .../sql/catalyst/analysis/unresolved.scala    | 10 ++-------
 .../sql/catalyst/parser/AstBuilder.scala      | 17 ++++----------
 .../sql/catalyst/analysis/AnalysisSuite.scala | 14 +-----------
 .../sql/catalyst/parser/PlanParserSuite.scala | 13 +----------
 6 files changed, 17 insertions(+), 79 deletions(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 15e4dd44a6996..1ecb3d1958f43 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -472,23 +472,15 @@ identifierComment
     ;
 
 relationPrimary
-    : tableIdentifier sample? (AS? strictIdentifier)?      #tableName
-    | '(' queryNoWith ')' sample? (AS? strictIdentifier)?  #aliasedQuery
-    | '(' relation ')' sample? (AS? strictIdentifier)?     #aliasedRelation
-    | inlineTable                                          #inlineTableDefault2
-    | functionTable                                        #tableValuedFunction
+    : tableIdentifier sample? (AS? strictIdentifier)?               #tableName
+    | '(' queryNoWith ')' sample? (AS? strictIdentifier)?           #aliasedQuery
+    | '(' relation ')' sample? (AS? strictIdentifier)?              #aliasedRelation
+    | inlineTable                                                   #inlineTableDefault2
+    | identifier '(' (expression (',' expression)*)? ')'            #tableValuedFunction
     ;
 
 inlineTable
-    : VALUES expression (',' expression)*  tableAlias
-    ;
-
-functionTable
-    : identifier '(' (expression (',' expression)*)? ')' tableAlias
-    ;
-
-tableAlias
-    : (AS? identifier identifierList?)?
+    : VALUES expression (',' expression)*  (AS? identifier identifierList?)?
     ;
 
 rowFormat
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala
index dad1340571cc8..de6de24350f23 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala
@@ -19,8 +19,8 @@ package org.apache.spark.sql.catalyst.analysis
 
 import java.util.Locale
 
-import org.apache.spark.sql.catalyst.expressions.{Alias, Expression}
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, Range}
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Range}
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.types.{DataType, IntegerType, LongType}
 
@@ -105,7 +105,7 @@ object ResolveTableValuedFunctions extends Rule[LogicalPlan] {
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     case u: UnresolvedTableValuedFunction if u.functionArgs.forall(_.resolved) =>
-      val resolvedFunc = builtinFunctions.get(u.functionName.toLowerCase(Locale.ROOT)) match {
+      builtinFunctions.get(u.functionName.toLowerCase(Locale.ROOT)) match {
         case Some(tvf) =>
           val resolved = tvf.flatMap { case (argList, resolver) =>
             argList.implicitCast(u.functionArgs) match {
@@ -125,21 +125,5 @@ object ResolveTableValuedFunctions extends Rule[LogicalPlan] {
         case _ =>
           u.failAnalysis(s"could not resolve `${u.functionName}` to a table-valued function")
       }
-
-      // If alias names assigned, add `Project` with the aliases
-      if (u.outputNames.nonEmpty) {
-        val outputAttrs = resolvedFunc.output
-        // Checks if the number of the aliases is equal to expected one
-        if (u.outputNames.size != outputAttrs.size) {
-          u.failAnalysis(s"expected ${outputAttrs.size} columns but " +
-            s"found ${u.outputNames.size} columns")
-        }
-        val aliases = outputAttrs.zip(u.outputNames).map {
-          case (attr, name) => Alias(attr, name)()
-        }
-        Project(aliases, resolvedFunc)
-      } else {
-        resolvedFunc
-      }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 51bef6e20b9fa..262b894e2a0a3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -66,16 +66,10 @@ case class UnresolvedInlineTable(
 /**
  * A table-valued function, e.g.
  * {{{
- *   select id from range(10);
- *
- *   // Assign alias names
- *   select t.a from range(10) t(a);
+ *   select * from range(10);
  * }}}
  */
-case class UnresolvedTableValuedFunction(
-    functionName: String,
-    functionArgs: Seq[Expression],
-    outputNames: Seq[String])
+case class UnresolvedTableValuedFunction(functionName: String, functionArgs: Seq[Expression])
   extends LeafNode {
 
   override def output: Seq[Attribute] = Nil
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index b33fad184989d..a48a693a95c93 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -687,16 +687,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
    */
   override def visitTableValuedFunction(ctx: TableValuedFunctionContext)
       : LogicalPlan = withOrigin(ctx) {
-    val func = ctx.functionTable
-    val aliases = if (func.tableAlias.identifierList != null) {
-      visitIdentifierList(func.tableAlias.identifierList)
-    } else {
-      Seq.empty
-    }
-
-    val tvf = UnresolvedTableValuedFunction(
-      func.identifier.getText, func.expression.asScala.map(expression), aliases)
-    tvf.optionalMap(func.tableAlias.identifier)(aliasPlan)
+    UnresolvedTableValuedFunction(ctx.identifier.getText, ctx.expression.asScala.map(expression))
   }
 
   /**
@@ -714,14 +705,14 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
       }
     }
 
-    val aliases = if (ctx.tableAlias.identifierList != null) {
-      visitIdentifierList(ctx.tableAlias.identifierList)
+    val aliases = if (ctx.identifierList != null) {
+      visitIdentifierList(ctx.identifierList)
     } else {
       Seq.tabulate(rows.head.size)(i => s"col${i + 1}")
     }
 
     val table = UnresolvedInlineTable(aliases, rows)
-    table.optionalMap(ctx.tableAlias.identifier)(aliasPlan)
+    table.optionalMap(ctx.identifier)(aliasPlan)
   }
 
   /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 31047f688600b..893bb1b74cea7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.plans.Cross
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.types._
@@ -440,17 +441,4 @@ class AnalysisSuite extends AnalysisTest with ShouldMatchers {
 
     checkAnalysis(SubqueryAlias("tbl", testRelation).as("tbl2"), testRelation)
   }
-
-  test("SPARK-20311 range(N) as alias") {
-    def rangeWithAliases(args: Seq[Int], outputNames: Seq[String]): LogicalPlan = {
-      SubqueryAlias("t", UnresolvedTableValuedFunction("range", args.map(Literal(_)), outputNames))
-        .select(star())
-    }
-    assertAnalysisSuccess(rangeWithAliases(3 :: Nil, "a" :: Nil))
-    assertAnalysisSuccess(rangeWithAliases(1 :: 4 :: Nil, "b" :: Nil))
-    assertAnalysisSuccess(rangeWithAliases(2 :: 6 :: 2 :: Nil, "c" :: Nil))
-    assertAnalysisError(
-      rangeWithAliases(3 :: Nil, "a" :: "b" :: Nil),
-      Seq("expected 1 columns but found 2 columns"))
-  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index 4c2476296c049..411777d6e85a2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -468,18 +468,7 @@ class PlanParserSuite extends PlanTest {
   test("table valued function") {
     assertEqual(
       "select * from range(2)",
-      UnresolvedTableValuedFunction("range", Literal(2) :: Nil, Seq.empty).select(star()))
-  }
-
-  test("SPARK-20311 range(N) as alias") {
-    assertEqual(
-      "select * from range(10) AS t",
-      SubqueryAlias("t", UnresolvedTableValuedFunction("range", Literal(10) :: Nil, Seq.empty))
-        .select(star()))
-    assertEqual(
-      "select * from range(7) AS t(a)",
-      SubqueryAlias("t", UnresolvedTableValuedFunction("range", Literal(7) :: Nil, "a" :: Nil))
-        .select(star()))
+      UnresolvedTableValuedFunction("range", Literal(2) :: Nil).select(star()))
   }
 
   test("inline table") {

From 7600a7ab65777a59f3a33edef40328b6a5d864ef Mon Sep 17 00:00:00 2001
From: uncleGen <hustyugm@gmail.com>
Date: Tue, 9 May 2017 15:08:09 -0700
Subject: [PATCH 0808/1204] [SPARK-20373][SQL][SS] Batch queries with
 'Dataset/DataFrame.withWatermark()` does not execute

## What changes were proposed in this pull request?

Any Dataset/DataFrame batch query with the operation `withWatermark` does not execute because the batch planner does not have any rule to explicitly handle the EventTimeWatermark logical plan.
The right solution is to simply remove the plan node, as the watermark should not affect any batch query in any way.

Changes:
- In this PR, we add a new rule `EliminateEventTimeWatermark` to check if we need to ignore the event time watermark. We will ignore watermark in any batch query.

Depends upon:
- [SPARK-20672](https://issues.apache.org/jira/browse/SPARK-20672). We can not add this rule into analyzer directly, because streaming query will be copied to `triggerLogicalPlan ` in every trigger, and the rule will be applied to `triggerLogicalPlan` mistakenly.

Others:
- A typo fix in example.

## How was this patch tested?

add new unit test.

Author: uncleGen <hustyugm@gmail.com>

Closes #17896 from uncleGen/SPARK-20373.

(cherry picked from commit c0189abc7c6ddbecc1832d2ff0cfc5546a010b60)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 docs/structured-streaming-programming-guide.md         |  3 +++
 .../sql/streaming/StructuredSessionization.scala       |  4 ++--
 .../apache/spark/sql/catalyst/analysis/Analyzer.scala  | 10 ++++++++++
 .../src/main/scala/org/apache/spark/sql/Dataset.scala  |  3 ++-
 .../spark/sql/streaming/EventTimeWatermarkSuite.scala  | 10 ++++++++++
 5 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index 53b3db21da769..bd01be944460b 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -901,6 +901,9 @@ Some sinks (e.g. files) may not supported fine-grained updates that Update Mode
 with them, we have also support Append Mode, where only the *final counts* are written to sink.
 This is illustrated below.
 
+Note that using `withWatermark` on a non-streaming Dataset is no-op. As the watermark should not affect 
+any batch query in any way, we will ignore it directly.
+
 ![Watermarking in Append Mode](img/structured-streaming-watermark-append-mode.png)
 
 Similar to the Update Mode earlier, the engine maintains intermediate counts for each window. 
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredSessionization.scala b/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredSessionization.scala
index 2ce792c00849c..ed63fb677b9ef 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredSessionization.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredSessionization.scala
@@ -34,14 +34,14 @@ import org.apache.spark.sql.streaming._
  * To run this on your local machine, you need to first run a Netcat server
  * `$ nc -lk 9999`
  * and then run the example
- * `$ bin/run-example sql.streaming.StructuredNetworkWordCount
+ * `$ bin/run-example sql.streaming.StructuredSessionization
  * localhost 9999`
  */
 object StructuredSessionization {
 
   def main(args: Array[String]): Unit = {
     if (args.length < 2) {
-      System.err.println("Usage: StructuredNetworkWordCount <hostname> <port>")
+      System.err.println("Usage: StructuredSessionization <hostname> <port>")
       System.exit(1)
     }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 25783bdc39f55..919f9202fdba7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -2429,6 +2429,16 @@ object CleanupAliases extends Rule[LogicalPlan] {
   }
 }
 
+/**
+ * Ignore event time watermark in batch query, which is only supported in Structured Streaming.
+ * TODO: add this rule into analyzer rule list.
+ */
+object EliminateEventTimeWatermark extends Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+    case EventTimeWatermark(_, _, child) if !child.isStreaming => child
+  }
+}
+
 /**
  * Maps a time column to multiple time windows using the Expand operator. Since it's non-trivial to
  * figure out how many windows a time column can map to, we over-estimate the number of windows and
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 5f602dc25fb57..433f6d10cbcbe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -579,7 +579,8 @@ class Dataset[T] private[sql](
         .getOrElse(throw new AnalysisException(s"Unable to parse time delay '$delayThreshold'"))
     require(parsedDelay.milliseconds >= 0 && parsedDelay.months >= 0,
       s"delay threshold ($delayThreshold) should not be negative.")
-    EventTimeWatermark(UnresolvedAttribute(eventTime), parsedDelay, logicalPlan)
+    EliminateEventTimeWatermark(
+      EventTimeWatermark(UnresolvedAttribute(eventTime), parsedDelay, logicalPlan))
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
index fd850a7365e20..1b60a06ec402f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
@@ -344,6 +344,16 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Loggin
     assert(eventTimeColumns(0).name === "second")
   }
 
+  test("EventTime watermark should be ignored in batch query.") {
+    val df = testData
+      .withColumn("eventTime", $"key".cast("timestamp"))
+      .withWatermark("eventTime", "1 minute")
+      .select("eventTime")
+      .as[Long]
+
+    checkDataset[Long](df, 1L to 100L: _*)
+  }
+
   private def assertNumStateRows(numTotalRows: Long): AssertOnQuery = AssertOnQuery { q =>
     val progressWithData = q.recentProgress.filter(_.numInputRows > 0).lastOption.get
     assert(progressWithData.stateOperators(0).numRowsTotal === numTotalRows)

From 6a996b36283dcd22ff7aa38968a80f575d2f151e Mon Sep 17 00:00:00 2001
From: Yuming Wang <wgyumg@gmail.com>
Date: Tue, 9 May 2017 19:45:00 -0700
Subject: [PATCH 0809/1204] [SPARK-17685][SQL] Make SortMergeJoinExec's
 currentVars is null when calling createJoinKey

## What changes were proposed in this pull request?

The following SQL query cause `IndexOutOfBoundsException` issue when `LIMIT > 1310720`:
```sql
CREATE TABLE tab1(int int, int2 int, str string);
CREATE TABLE tab2(int int, int2 int, str string);
INSERT INTO tab1 values(1,1,'str');
INSERT INTO tab1 values(2,2,'str');
INSERT INTO tab2 values(1,1,'str');
INSERT INTO tab2 values(2,3,'str');

SELECT
  count(*)
FROM
  (
    SELECT t1.int, t2.int2
    FROM (SELECT * FROM tab1 LIMIT 1310721) t1
    INNER JOIN (SELECT * FROM tab2 LIMIT 1310721) t2
    ON (t1.int = t2.int AND t1.int2 = t2.int2)
  ) t;
```

This pull request fix this issue.

## How was this patch tested?

unit tests

Author: Yuming Wang <wgyumg@gmail.com>

Closes #17920 from wangyum/SPARK-17685.

(cherry picked from commit 771abeb46f637592aba2e63db2ed05b6cabfd0be)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../spark/sql/execution/joins/SortMergeJoinExec.scala  |  1 +
 .../org/apache/spark/sql/DataFrameJoinSuite.scala      | 10 ++++++++++
 2 files changed, 11 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
index c6aae1a4db2e4..26fb6103953fc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@@ -371,6 +371,7 @@ case class SortMergeJoinExec(
       keys: Seq[Expression],
       input: Seq[Attribute]): Seq[ExprCode] = {
     ctx.INPUT_ROW = row
+    ctx.currentVars = null
     keys.map(BindReferences.bindReference(_, input).genCode(ctx))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
index 4a52af6c32c37..aef0d7f3e425b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
@@ -264,4 +264,14 @@ class DataFrameJoinSuite extends QueryTest with SharedSQLContext {
     val ab = a.join(b, Seq("a"), "fullouter")
     checkAnswer(ab.join(c, "a"), Row(3, null, 4, 1) :: Nil)
   }
+
+  test("SPARK-17685: WholeStageCodegenExec throws IndexOutOfBoundsException") {
+    val df = Seq((1, 1, "1"), (2, 2, "3")).toDF("int", "int2", "str")
+    val df2 = Seq((1, 1, "1"), (2, 3, "5")).toDF("int", "int2", "str")
+    val limit = 1310721
+    val innerJoin = df.limit(limit).join(df2.limit(limit), Seq("int", "int2"), "inner")
+      .agg(count($"int"))
+    checkAnswer(innerJoin, Row(1) :: Nil)
+  }
+
 }

From 50f28dfe43410dadabbecc62e34fde8bacc8aee6 Mon Sep 17 00:00:00 2001
From: Yuming Wang <wgyumg@gmail.com>
Date: Tue, 9 May 2017 19:45:00 -0700
Subject: [PATCH 0810/1204] [SPARK-17685][SQL] Make SortMergeJoinExec's
 currentVars is null when calling createJoinKey

## What changes were proposed in this pull request?

The following SQL query cause `IndexOutOfBoundsException` issue when `LIMIT > 1310720`:
```sql
CREATE TABLE tab1(int int, int2 int, str string);
CREATE TABLE tab2(int int, int2 int, str string);
INSERT INTO tab1 values(1,1,'str');
INSERT INTO tab1 values(2,2,'str');
INSERT INTO tab2 values(1,1,'str');
INSERT INTO tab2 values(2,3,'str');

SELECT
  count(*)
FROM
  (
    SELECT t1.int, t2.int2
    FROM (SELECT * FROM tab1 LIMIT 1310721) t1
    INNER JOIN (SELECT * FROM tab2 LIMIT 1310721) t2
    ON (t1.int = t2.int AND t1.int2 = t2.int2)
  ) t;
```

This pull request fix this issue.

## How was this patch tested?

unit tests

Author: Yuming Wang <wgyumg@gmail.com>

Closes #17920 from wangyum/SPARK-17685.

(cherry picked from commit 771abeb46f637592aba2e63db2ed05b6cabfd0be)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../spark/sql/execution/joins/SortMergeJoinExec.scala  |  1 +
 .../org/apache/spark/sql/DataFrameJoinSuite.scala      | 10 ++++++++++
 2 files changed, 11 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
index a1f941644f807..89a9b38132732 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@@ -342,6 +342,7 @@ case class SortMergeJoinExec(
       keys: Seq[Expression],
       input: Seq[Attribute]): Seq[ExprCode] = {
     ctx.INPUT_ROW = row
+    ctx.currentVars = null
     keys.map(BindReferences.bindReference(_, input).genCode(ctx))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
index 541ffb58e727f..9383e8381f38a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
@@ -248,4 +248,14 @@ class DataFrameJoinSuite extends QueryTest with SharedSQLContext {
     val ab = a.join(b, Seq("a"), "fullouter")
     checkAnswer(ab.join(c, "a"), Row(3, null, 4, 1) :: Nil)
   }
+
+  test("SPARK-17685: WholeStageCodegenExec throws IndexOutOfBoundsException") {
+    val df = Seq((1, 1, "1"), (2, 2, "3")).toDF("int", "int2", "str")
+    val df2 = Seq((1, 1, "1"), (2, 3, "5")).toDF("int", "int2", "str")
+    val limit = 1310721
+    val innerJoin = df.limit(limit).join(df2.limit(limit), Seq("int", "int2"), "inner")
+      .agg(count($"int"))
+    checkAnswer(innerJoin, Row(1) :: Nil)
+  }
+
 }

From 7b6f3a118e973216264bbf356af2bb1e7870466e Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Wed, 10 May 2017 13:44:47 +0800
Subject: [PATCH 0811/1204] [SPARK-20590][SQL] Use Spark internal datasource if
 multiples are found for the same shorten name

## What changes were proposed in this pull request?

One of the common usability problems around reading data in spark (particularly CSV) is that there can often be a conflict between different readers in the classpath.

As an example, if someone launches a 2.x spark shell with the spark-csv package in the classpath, Spark currently fails in an extremely unfriendly way (see databricks/spark-csv#367):

```bash
./bin/spark-shell --packages com.databricks:spark-csv_2.11:1.5.0
scala> val df = spark.read.csv("/foo/bar.csv")
java.lang.RuntimeException: Multiple sources found for csv (org.apache.spark.sql.execution.datasources.csv.CSVFileFormat, com.databricks.spark.csv.DefaultSource15), please specify the fully qualified class name.
  at scala.sys.package$.error(package.scala:27)
  at org.apache.spark.sql.execution.datasources.DataSource$.lookupDataSource(DataSource.scala:574)
  at org.apache.spark.sql.execution.datasources.DataSource.providingClass$lzycompute(DataSource.scala:85)
  at org.apache.spark.sql.execution.datasources.DataSource.providingClass(DataSource.scala:85)
  at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:295)
  at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:178)
  at org.apache.spark.sql.DataFrameReader.csv(DataFrameReader.scala:533)
  at org.apache.spark.sql.DataFrameReader.csv(DataFrameReader.scala:412)
  ... 48 elided
```

This PR proposes a simple way of fixing this error by picking up the internal datasource if there is single (the datasource that has "org.apache.spark" prefix).

```scala
scala> spark.range(1).write.format("csv").mode("overwrite").save("/tmp/abc")
17/05/10 09:47:44 WARN DataSource: Multiple sources found for csv (org.apache.spark.sql.execution.datasources.csv.CSVFileFormat,
com.databricks.spark.csv.DefaultSource15), defaulting to the internal datasource (org.apache.spark.sql.execution.datasources.csv.CSVFileFormat).
```

```scala
scala> spark.range(1).write.format("Csv").mode("overwrite").save("/tmp/abc")
17/05/10 09:47:52 WARN DataSource: Multiple sources found for Csv (org.apache.spark.sql.execution.datasources.csv.CSVFileFormat,
com.databricks.spark.csv.DefaultSource15), defaulting to the internal datasource (org.apache.spark.sql.execution.datasources.csv.CSVFileFormat).
```

## How was this patch tested?

Manually tested as below:

```bash
./bin/spark-shell --packages com.databricks:spark-csv_2.11:1.5.0
```

```scala
spark.sparkContext.setLogLevel("WARN")
```

**positive cases**:

```scala
scala> spark.range(1).write.format("csv").mode("overwrite").save("/tmp/abc")
17/05/10 09:47:44 WARN DataSource: Multiple sources found for csv (org.apache.spark.sql.execution.datasources.csv.CSVFileFormat,
com.databricks.spark.csv.DefaultSource15), defaulting to the internal datasource (org.apache.spark.sql.execution.datasources.csv.CSVFileFormat).
```

```scala
scala> spark.range(1).write.format("Csv").mode("overwrite").save("/tmp/abc")
17/05/10 09:47:52 WARN DataSource: Multiple sources found for Csv (org.apache.spark.sql.execution.datasources.csv.CSVFileFormat,
com.databricks.spark.csv.DefaultSource15), defaulting to the internal datasource (org.apache.spark.sql.execution.datasources.csv.CSVFileFormat).
```

(newlines were inserted for readability).

```scala
scala> spark.range(1).write.format("com.databricks.spark.csv").mode("overwrite").save("/tmp/abc")
```

```scala
scala> spark.range(1).write.format("org.apache.spark.sql.execution.datasources.csv.CSVFileFormat").mode("overwrite").save("/tmp/abc")
```

**negative cases**:

```scala
scala> spark.range(1).write.format("com.databricks.spark.csv.CsvRelation").save("/tmp/abc")
java.lang.InstantiationException: com.databricks.spark.csv.CsvRelation
...
```

```scala
scala> spark.range(1).write.format("com.databricks.spark.csv.CsvRelatio").save("/tmp/abc")
java.lang.ClassNotFoundException: Failed to find data source: com.databricks.spark.csv.CsvRelatio. Please find packages at http://spark.apache.org/third-party-projects.html
...
```

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #17916 from HyukjinKwon/datasource-detect.

(cherry picked from commit 3d2131ab4ddead29601fb3c597b798202ac25fdd)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../execution/datasources/DataSource.scala    | 19 ++++--
 ...pache.spark.sql.sources.DataSourceRegister |  4 ++
 .../sql/sources/DDLSourceLoadSuite.scala      | 44 ++++++++++---
 .../sql/sources/fakeExternalSources.scala     | 64 +++++++++++++++++++
 4 files changed, 117 insertions(+), 14 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/sources/fakeExternalSources.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index f3b209deaae5c..bb7d1f70b62d9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -481,7 +481,7 @@ case class DataSource(
   }
 }
 
-object DataSource {
+object DataSource extends Logging {
 
   /** A map to maintain backward compatibility in case we move data sources around. */
   private val backwardCompatibilityMap: Map[String, String] = {
@@ -570,10 +570,19 @@ object DataSource {
           // there is exactly one registered alias
           head.getClass
         case sources =>
-          // There are multiple registered aliases for the input
-          sys.error(s"Multiple sources found for $provider1 " +
-            s"(${sources.map(_.getClass.getName).mkString(", ")}), " +
-            "please specify the fully qualified class name.")
+          // There are multiple registered aliases for the input. If there is single datasource
+          // that has "org.apache.spark" package in the prefix, we use it considering it is an
+          // internal datasource within Spark.
+          val sourceNames = sources.map(_.getClass.getName)
+          val internalSources = sources.filter(_.getClass.getName.startsWith("org.apache.spark"))
+          if (internalSources.size == 1) {
+            logWarning(s"Multiple sources found for $provider1 (${sourceNames.mkString(", ")}), " +
+              s"defaulting to the internal datasource (${internalSources.head.getClass.getName}).")
+            internalSources.head.getClass
+          } else {
+            throw new AnalysisException(s"Multiple sources found for $provider1 " +
+              s"(${sourceNames.mkString(", ")}), please specify the fully qualified class name.")
+          }
       }
     } catch {
       case e: ServiceConfigurationError if e.getCause.isInstanceOf[NoClassDefFoundError] =>
diff --git a/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
index cfd7889b4ac2c..c6973bf41d34b 100644
--- a/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
+++ b/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -1,3 +1,7 @@
 org.apache.spark.sql.sources.FakeSourceOne
 org.apache.spark.sql.sources.FakeSourceTwo
 org.apache.spark.sql.sources.FakeSourceThree
+org.apache.spark.sql.sources.FakeSourceFour
+org.apache.fakesource.FakeExternalSourceOne
+org.apache.fakesource.FakeExternalSourceTwo
+org.apache.fakesource.FakeExternalSourceThree
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLSourceLoadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLSourceLoadSuite.scala
index 85ba33e58a787..b5fb740b6eb77 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLSourceLoadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLSourceLoadSuite.scala
@@ -19,26 +19,39 @@ package org.apache.spark.sql.sources
 
 import org.apache.spark.sql.{AnalysisException, SQLContext}
 import org.apache.spark.sql.test.SharedSQLContext
-import org.apache.spark.sql.types.{StringType, StructField, StructType}
+import org.apache.spark.sql.types._
 
 
 // please note that the META-INF/services had to be modified for the test directory for this to work
 class DDLSourceLoadSuite extends DataSourceTest with SharedSQLContext {
 
-  test("data sources with the same name") {
-    intercept[RuntimeException] {
+  test("data sources with the same name - internal data sources") {
+    val e = intercept[AnalysisException] {
       spark.read.format("Fluet da Bomb").load()
     }
+    assert(e.getMessage.contains("Multiple sources found for Fluet da Bomb"))
+  }
+
+  test("data sources with the same name - internal data source/external data source") {
+    assert(spark.read.format("datasource").load().schema ==
+      StructType(Seq(StructField("longType", LongType, nullable = false))))
+  }
+
+  test("data sources with the same name - external data sources") {
+    val e = intercept[AnalysisException] {
+      spark.read.format("Fake external source").load()
+    }
+    assert(e.getMessage.contains("Multiple sources found for Fake external source"))
   }
 
   test("load data source from format alias") {
-    spark.read.format("gathering quorum").load().schema ==
-      StructType(Seq(StructField("stringType", StringType, nullable = false)))
+    assert(spark.read.format("gathering quorum").load().schema ==
+      StructType(Seq(StructField("stringType", StringType, nullable = false))))
   }
 
   test("specify full classname with duplicate formats") {
-    spark.read.format("org.apache.spark.sql.sources.FakeSourceOne")
-      .load().schema == StructType(Seq(StructField("stringType", StringType, nullable = false)))
+    assert(spark.read.format("org.apache.spark.sql.sources.FakeSourceOne")
+      .load().schema == StructType(Seq(StructField("stringType", StringType, nullable = false))))
   }
 
   test("should fail to load ORC without Hive Support") {
@@ -63,7 +76,7 @@ class FakeSourceOne extends RelationProvider with DataSourceRegister {
     }
 }
 
-class FakeSourceTwo extends RelationProvider  with DataSourceRegister {
+class FakeSourceTwo extends RelationProvider with DataSourceRegister {
 
   def shortName(): String = "Fluet da Bomb"
 
@@ -72,7 +85,7 @@ class FakeSourceTwo extends RelationProvider  with DataSourceRegister {
       override def sqlContext: SQLContext = cont
 
       override def schema: StructType =
-        StructType(Seq(StructField("stringType", StringType, nullable = false)))
+        StructType(Seq(StructField("integerType", IntegerType, nullable = false)))
     }
 }
 
@@ -88,3 +101,16 @@ class FakeSourceThree extends RelationProvider with DataSourceRegister {
         StructType(Seq(StructField("stringType", StringType, nullable = false)))
     }
 }
+
+class FakeSourceFour extends RelationProvider with DataSourceRegister {
+
+  def shortName(): String = "datasource"
+
+  override def createRelation(cont: SQLContext, param: Map[String, String]): BaseRelation =
+    new BaseRelation {
+      override def sqlContext: SQLContext = cont
+
+      override def schema: StructType =
+        StructType(Seq(StructField("longType", LongType, nullable = false)))
+    }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/fakeExternalSources.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/fakeExternalSources.scala
new file mode 100644
index 0000000000000..0dfd75e709123
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/fakeExternalSources.scala
@@ -0,0 +1,64 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.fakesource
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.sources.{BaseRelation, DataSourceRegister, RelationProvider}
+import org.apache.spark.sql.types._
+
+
+// Note that the package name is intendedly mismatched in order to resemble external data sources
+// and test the detection for them.
+class FakeExternalSourceOne extends RelationProvider with DataSourceRegister {
+
+  def shortName(): String = "Fake external source"
+
+  override def createRelation(cont: SQLContext, param: Map[String, String]): BaseRelation =
+    new BaseRelation {
+      override def sqlContext: SQLContext = cont
+
+      override def schema: StructType =
+        StructType(Seq(StructField("stringType", StringType, nullable = false)))
+    }
+}
+
+class FakeExternalSourceTwo extends RelationProvider with DataSourceRegister {
+
+  def shortName(): String = "Fake external source"
+
+  override def createRelation(cont: SQLContext, param: Map[String, String]): BaseRelation =
+    new BaseRelation {
+      override def sqlContext: SQLContext = cont
+
+      override def schema: StructType =
+        StructType(Seq(StructField("integerType", IntegerType, nullable = false)))
+    }
+}
+
+class FakeExternalSourceThree extends RelationProvider with DataSourceRegister {
+
+  def shortName(): String = "datasource"
+
+  override def createRelation(cont: SQLContext, param: Map[String, String]): BaseRelation =
+    new BaseRelation {
+      override def sqlContext: SQLContext = cont
+
+      override def schema: StructType =
+        StructType(Seq(StructField("byteType", ByteType, nullable = false)))
+    }
+}

From ef50a954882fa1911f7ede3f0aefc8fcf09c6059 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Wed, 10 May 2017 14:36:36 +0800
Subject: [PATCH 0812/1204] [SPARK-20686][SQL] PropagateEmptyRelation
 incorrectly handles aggregate without grouping

## What changes were proposed in this pull request?

The query

```
SELECT 1 FROM (SELECT COUNT(*) WHERE FALSE) t1
```

should return a single row of output because the subquery is an aggregate without a group-by and thus should return a single row. However, Spark incorrectly returns zero rows.

This is caused by SPARK-16208 / #13906, a patch which added an optimizer rule to propagate EmptyRelation through operators. The logic for handling aggregates is wrong: it checks whether aggregate expressions are non-empty for deciding whether the output should be empty, whereas it should be checking grouping expressions instead:

An aggregate with non-empty grouping expression will return one output row per group. If the input to the grouped aggregate is empty then all groups will be empty and thus the output will be empty. It doesn't matter whether the aggregation output columns include aggregate expressions since that won't affect the number of output rows.

If the grouping expressions are empty, however, then the aggregate will always produce a single output row and thus we cannot propagate the EmptyRelation.

The current implementation is incorrect and also misses an optimization opportunity by not propagating EmptyRelation in the case where a grouped aggregate has aggregate expressions (in other words, `SELECT COUNT(*) from emptyRelation GROUP BY x` would _not_ be optimized to `EmptyRelation` in the old code, even though it safely could be).

This patch resolves this issue by modifying `PropagateEmptyRelation` to consider only the presence/absence of grouping expressions, not the aggregate functions themselves, when deciding whether to propagate EmptyRelation.

## How was this patch tested?

- Added end-to-end regression tests in `SQLQueryTest`'s `group-by.sql` file.
- Updated unit tests in `PropagateEmptyRelationSuite`.

Author: Josh Rosen <joshrosen@databricks.com>

Closes #17929 from JoshRosen/fix-PropagateEmptyRelation.

(cherry picked from commit a90c5cd8226146a58362732171b92cb99a7bc4c7)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../optimizer/PropagateEmptyRelation.scala    | 16 ++++++------
 .../PropagateEmptyRelationSuite.scala         |  8 +++---
 .../resources/sql-tests/inputs/group-by.sql   |  7 +++++
 .../sql-tests/results/group-by.sql.out        | 26 ++++++++++++++++++-
 4 files changed, 44 insertions(+), 13 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
index 7400a01918c52..987cd7434b459 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.catalyst.optimizer
 
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
@@ -30,7 +29,7 @@ import org.apache.spark.sql.catalyst.rules._
  *    - Join with one or two empty children (including Intersect/Except).
  * 2. Unary-node Logical Plans
  *    - Project/Filter/Sample/Join/Limit/Repartition with all empty children.
- *    - Aggregate with all empty children and without AggregateFunction expressions like COUNT.
+ *    - Aggregate with all empty children and at least one grouping expression.
  *    - Generate(Explode) with all empty children. Others like Hive UDTF may return results.
  */
 object PropagateEmptyRelation extends Rule[LogicalPlan] with PredicateHelper {
@@ -39,10 +38,6 @@ object PropagateEmptyRelation extends Rule[LogicalPlan] with PredicateHelper {
     case _ => false
   }
 
-  private def containsAggregateExpression(e: Expression): Boolean = {
-    e.collectFirst { case _: AggregateFunction => () }.isDefined
-  }
-
   private def empty(plan: LogicalPlan) = LocalRelation(plan.output, data = Seq.empty)
 
   def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
@@ -68,8 +63,13 @@ object PropagateEmptyRelation extends Rule[LogicalPlan] with PredicateHelper {
       case _: LocalLimit => empty(p)
       case _: Repartition => empty(p)
       case _: RepartitionByExpression => empty(p)
-      // AggregateExpressions like COUNT(*) return their results like 0.
-      case Aggregate(_, ae, _) if !ae.exists(containsAggregateExpression) => empty(p)
+      // An aggregate with non-empty group expression will return one output row per group when the
+      // input to the aggregate is not empty. If the input to the aggregate is empty then all groups
+      // will be empty and thus the output will be empty.
+      //
+      // If the grouping expressions are empty, however, then the aggregate will always produce a
+      // single output row and thus we cannot propagate the EmptyRelation.
+      case Aggregate(ge, _, _) if ge.nonEmpty => empty(p)
       // Generators like Hive-style UDTF may return their records within `close`.
       case Generate(_: Explode, _, _, _, _, _) => empty(p)
       case _ => p
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala
index c261a6091d476..38dff4733f714 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala
@@ -142,7 +142,7 @@ class PropagateEmptyRelationSuite extends PlanTest {
     comparePlans(optimized, correctAnswer.analyze)
   }
 
-  test("propagate empty relation through Aggregate without aggregate function") {
+  test("propagate empty relation through Aggregate with grouping expressions") {
     val query = testRelation1
       .where(false)
       .groupBy('a)('a, ('a + 1).as('x))
@@ -153,13 +153,13 @@ class PropagateEmptyRelationSuite extends PlanTest {
     comparePlans(optimized, correctAnswer)
   }
 
-  test("don't propagate empty relation through Aggregate with aggregate function") {
+  test("don't propagate empty relation through Aggregate without grouping expressions") {
     val query = testRelation1
       .where(false)
-      .groupBy('a)(count('a))
+      .groupBy()()
 
     val optimized = Optimize.execute(query.analyze)
-    val correctAnswer = LocalRelation('a.int).groupBy('a)(count('a)).analyze
+    val correctAnswer = LocalRelation('a.int).groupBy()().analyze
 
     comparePlans(optimized, correctAnswer)
   }
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
index a7994f3beaff3..1e1384549a410 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -53,3 +53,10 @@ set spark.sql.groupByAliases=false;
 
 -- Check analysis exceptions
 SELECT a AS k, COUNT(b) FROM testData GROUP BY k;
+
+-- Aggregate with empty input and non-empty GroupBy expressions.
+SELECT a, COUNT(1) FROM testData WHERE false GROUP BY a;
+
+-- Aggregate with empty input and empty GroupBy expressions.
+SELECT COUNT(1) FROM testData WHERE false;
+SELECT 1 FROM (SELECT COUNT(1) FROM testData WHERE false) t;
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index 6bf9dff883c1e..42e82308ee1f0 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 22
+-- Number of queries: 25
 
 
 -- !query 0
@@ -203,3 +203,27 @@ struct<>
 -- !query 21 output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`k`' given input columns: [a, b]; line 1 pos 47
+
+
+-- !query 22
+SELECT a, COUNT(1) FROM testData WHERE false GROUP BY a
+-- !query 22 schema
+struct<a:int,count(1):bigint>
+-- !query 22 output
+
+
+
+-- !query 23
+SELECT COUNT(1) FROM testData WHERE false
+-- !query 23 schema
+struct<count(1):bigint>
+-- !query 23 output
+0
+
+
+-- !query 24
+SELECT 1 FROM (SELECT COUNT(1) FROM testData WHERE false) t
+-- !query 24 schema
+struct<1:int>
+-- !query 24 output
+1

From 8e097890a7b39cd8320ed2f2b98dc2b520a87cbf Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Wed, 10 May 2017 14:36:36 +0800
Subject: [PATCH 0813/1204] [SPARK-20686][SQL] PropagateEmptyRelation
 incorrectly handles aggregate without grouping

The query

```
SELECT 1 FROM (SELECT COUNT(*) WHERE FALSE) t1
```

should return a single row of output because the subquery is an aggregate without a group-by and thus should return a single row. However, Spark incorrectly returns zero rows.

This is caused by SPARK-16208 / #13906, a patch which added an optimizer rule to propagate EmptyRelation through operators. The logic for handling aggregates is wrong: it checks whether aggregate expressions are non-empty for deciding whether the output should be empty, whereas it should be checking grouping expressions instead:

An aggregate with non-empty grouping expression will return one output row per group. If the input to the grouped aggregate is empty then all groups will be empty and thus the output will be empty. It doesn't matter whether the aggregation output columns include aggregate expressions since that won't affect the number of output rows.

If the grouping expressions are empty, however, then the aggregate will always produce a single output row and thus we cannot propagate the EmptyRelation.

The current implementation is incorrect and also misses an optimization opportunity by not propagating EmptyRelation in the case where a grouped aggregate has aggregate expressions (in other words, `SELECT COUNT(*) from emptyRelation GROUP BY x` would _not_ be optimized to `EmptyRelation` in the old code, even though it safely could be).

This patch resolves this issue by modifying `PropagateEmptyRelation` to consider only the presence/absence of grouping expressions, not the aggregate functions themselves, when deciding whether to propagate EmptyRelation.

- Added end-to-end regression tests in `SQLQueryTest`'s `group-by.sql` file.
- Updated unit tests in `PropagateEmptyRelationSuite`.

Author: Josh Rosen <joshrosen@databricks.com>

Closes #17929 from JoshRosen/fix-PropagateEmptyRelation.

(cherry picked from commit a90c5cd8226146a58362732171b92cb99a7bc4c7)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../optimizer/PropagateEmptyRelation.scala    | 16 ++++++------
 .../PropagateEmptyRelationSuite.scala         |  8 +++---
 .../resources/sql-tests/inputs/group-by.sql   |  7 +++++
 .../sql-tests/results/group-by.sql.out        | 26 ++++++++++++++++++-
 4 files changed, 44 insertions(+), 13 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
index 7400a01918c52..987cd7434b459 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.catalyst.optimizer
 
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
@@ -30,7 +29,7 @@ import org.apache.spark.sql.catalyst.rules._
  *    - Join with one or two empty children (including Intersect/Except).
  * 2. Unary-node Logical Plans
  *    - Project/Filter/Sample/Join/Limit/Repartition with all empty children.
- *    - Aggregate with all empty children and without AggregateFunction expressions like COUNT.
+ *    - Aggregate with all empty children and at least one grouping expression.
  *    - Generate(Explode) with all empty children. Others like Hive UDTF may return results.
  */
 object PropagateEmptyRelation extends Rule[LogicalPlan] with PredicateHelper {
@@ -39,10 +38,6 @@ object PropagateEmptyRelation extends Rule[LogicalPlan] with PredicateHelper {
     case _ => false
   }
 
-  private def containsAggregateExpression(e: Expression): Boolean = {
-    e.collectFirst { case _: AggregateFunction => () }.isDefined
-  }
-
   private def empty(plan: LogicalPlan) = LocalRelation(plan.output, data = Seq.empty)
 
   def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
@@ -68,8 +63,13 @@ object PropagateEmptyRelation extends Rule[LogicalPlan] with PredicateHelper {
       case _: LocalLimit => empty(p)
       case _: Repartition => empty(p)
       case _: RepartitionByExpression => empty(p)
-      // AggregateExpressions like COUNT(*) return their results like 0.
-      case Aggregate(_, ae, _) if !ae.exists(containsAggregateExpression) => empty(p)
+      // An aggregate with non-empty group expression will return one output row per group when the
+      // input to the aggregate is not empty. If the input to the aggregate is empty then all groups
+      // will be empty and thus the output will be empty.
+      //
+      // If the grouping expressions are empty, however, then the aggregate will always produce a
+      // single output row and thus we cannot propagate the EmptyRelation.
+      case Aggregate(ge, _, _) if ge.nonEmpty => empty(p)
       // Generators like Hive-style UDTF may return their records within `close`.
       case Generate(_: Explode, _, _, _, _, _) => empty(p)
       case _ => p
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala
index 908dde7a66988..2285be16938d6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala
@@ -142,7 +142,7 @@ class PropagateEmptyRelationSuite extends PlanTest {
     comparePlans(optimized, correctAnswer.analyze)
   }
 
-  test("propagate empty relation through Aggregate without aggregate function") {
+  test("propagate empty relation through Aggregate with grouping expressions") {
     val query = testRelation1
       .where(false)
       .groupBy('a)('a, ('a + 1).as('x))
@@ -153,13 +153,13 @@ class PropagateEmptyRelationSuite extends PlanTest {
     comparePlans(optimized, correctAnswer)
   }
 
-  test("don't propagate empty relation through Aggregate with aggregate function") {
+  test("don't propagate empty relation through Aggregate without grouping expressions") {
     val query = testRelation1
       .where(false)
-      .groupBy('a)(count('a))
+      .groupBy()()
 
     val optimized = Optimize.execute(query.analyze)
-    val correctAnswer = LocalRelation('a.int).groupBy('a)(count('a)).analyze
+    val correctAnswer = LocalRelation('a.int).groupBy()().analyze
 
     comparePlans(optimized, correctAnswer)
   }
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
index 4d0ed43153004..f4f5a043d4781 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -35,3 +35,10 @@ FROM testData;
 
 -- Aggregate with foldable input and multiple distinct groups.
 SELECT COUNT(DISTINCT b), COUNT(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY a;
+
+-- Aggregate with empty input and non-empty GroupBy expressions.
+SELECT a, COUNT(1) FROM testData WHERE false GROUP BY a;
+
+-- Aggregate with empty input and empty GroupBy expressions.
+SELECT COUNT(1) FROM testData WHERE false;
+SELECT 1 FROM (SELECT COUNT(1) FROM testData WHERE false) t;
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index 4b87d5161fc0e..f64dd0007846a 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 15
+-- Number of queries: 18
 
 
 -- !query 0
@@ -139,3 +139,27 @@ SELECT COUNT(DISTINCT b), COUNT(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS
 struct<count(DISTINCT b):bigint,count(DISTINCT b, c):bigint>
 -- !query 14 output
 1	1
+
+
+-- !query 15
+SELECT a, COUNT(1) FROM testData WHERE false GROUP BY a
+-- !query 15 schema
+struct<a:int,count(1):bigint>
+-- !query 15 output
+
+
+
+-- !query 16
+SELECT COUNT(1) FROM testData WHERE false
+-- !query 16 schema
+struct<count(1):bigint>
+-- !query 16 output
+0
+
+
+-- !query 17
+SELECT 1 FROM (SELECT COUNT(1) FROM testData WHERE false) t
+-- !query 17 schema
+struct<1:int>
+-- !query 17 output
+1

From 3ed2f4d516ce02dfef929195778f8214703913d8 Mon Sep 17 00:00:00 2001
From: zero323 <zero323@users.noreply.github.com>
Date: Wed, 10 May 2017 16:57:52 +0800
Subject: [PATCH 0814/1204] [SPARK-20631][PYTHON][ML]
 LogisticRegression._checkThresholdConsistency should use values not Params

## What changes were proposed in this pull request?

- Replace `getParam` calls with `getOrDefault` calls.
- Fix exception message to avoid unintended `TypeError`.
- Add unit tests

## How was this patch tested?

New unit tests.

Author: zero323 <zero323@users.noreply.github.com>

Closes #17891 from zero323/SPARK-20631.

(cherry picked from commit 804949c6bf00b8e26c39d48bbcc4d0470ee84e47)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 python/pyspark/ml/classification.py |  6 +++---
 python/pyspark/ml/tests.py          | 12 ++++++++++++
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index a9756ea4af99a..dcc12d93e979f 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -349,13 +349,13 @@ def getThresholds(self):
 
     def _checkThresholdConsistency(self):
         if self.isSet(self.threshold) and self.isSet(self.thresholds):
-            ts = self.getParam(self.thresholds)
+            ts = self.getOrDefault(self.thresholds)
             if len(ts) != 2:
                 raise ValueError("Logistic Regression getThreshold only applies to" +
                                  " binary classification, but thresholds has length != 2." +
-                                 " thresholds: " + ",".join(ts))
+                                 " thresholds: {0}".format(str(ts)))
             t = 1.0/(1.0 + ts[0]/ts[1])
-            t2 = self.getParam(self.threshold)
+            t2 = self.getOrDefault(self.threshold)
             if abs(t2 - t) >= 1E-5:
                 raise ValueError("Logistic Regression getThreshold found inconsistent values for" +
                                  " threshold (%g) and thresholds (equivalent to %g)" % (t2, t))
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 571ac4bc1c366..51a3e8efe8b48 100755
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -807,6 +807,18 @@ def test_logistic_regression(self):
         except OSError:
             pass
 
+    def logistic_regression_check_thresholds(self):
+        self.assertIsInstance(
+            LogisticRegression(threshold=0.5, thresholds=[0.5, 0.5]),
+            LogisticRegressionModel
+        )
+
+        self.assertRaisesRegexp(
+            ValueError,
+            "Logistic Regression getThreshold found inconsistent.*$",
+            LogisticRegression, threshold=0.42, thresholds=[0.5, 0.5]
+        )
+
     def _compare_params(self, m1, m2, param):
         """
         Compare 2 ML Params instances for the given param, and assert both have the same param value

From 69786ea3a972af1b29a332dc11ac507ed4368cc6 Mon Sep 17 00:00:00 2001
From: zero323 <zero323@users.noreply.github.com>
Date: Wed, 10 May 2017 16:57:52 +0800
Subject: [PATCH 0815/1204] [SPARK-20631][PYTHON][ML]
 LogisticRegression._checkThresholdConsistency should use values not Params

## What changes were proposed in this pull request?

- Replace `getParam` calls with `getOrDefault` calls.
- Fix exception message to avoid unintended `TypeError`.
- Add unit tests

## How was this patch tested?

New unit tests.

Author: zero323 <zero323@users.noreply.github.com>

Closes #17891 from zero323/SPARK-20631.

(cherry picked from commit 804949c6bf00b8e26c39d48bbcc4d0470ee84e47)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 python/pyspark/ml/classification.py |  6 +++---
 python/pyspark/ml/tests.py          | 12 ++++++++++++
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 570a414cc350c..2b47c402674ad 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -238,13 +238,13 @@ def getThresholds(self):
 
     def _checkThresholdConsistency(self):
         if self.isSet(self.threshold) and self.isSet(self.thresholds):
-            ts = self.getParam(self.thresholds)
+            ts = self.getOrDefault(self.thresholds)
             if len(ts) != 2:
                 raise ValueError("Logistic Regression getThreshold only applies to" +
                                  " binary classification, but thresholds has length != 2." +
-                                 " thresholds: " + ",".join(ts))
+                                 " thresholds: {0}".format(str(ts)))
             t = 1.0/(1.0 + ts[0]/ts[1])
-            t2 = self.getParam(self.threshold)
+            t2 = self.getOrDefault(self.threshold)
             if abs(t2 - t) >= 1E-5:
                 raise ValueError("Logistic Regression getThreshold found inconsistent values for" +
                                  " threshold (%g) and thresholds (equivalent to %g)" % (t2, t))
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 70e0c6de4a7bd..7152036e385d2 100755
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -808,6 +808,18 @@ def test_logistic_regression(self):
         except OSError:
             pass
 
+    def logistic_regression_check_thresholds(self):
+        self.assertIsInstance(
+            LogisticRegression(threshold=0.5, thresholds=[0.5, 0.5]),
+            LogisticRegressionModel
+        )
+
+        self.assertRaisesRegexp(
+            ValueError,
+            "Logistic Regression getThreshold found inconsistent.*$",
+            LogisticRegression, threshold=0.42, thresholds=[0.5, 0.5]
+        )
+
     def _compare_params(self, m1, m2, param):
         """
         Compare 2 ML Params instances for the given param, and assert both have the same param value

From 7597a522b7e5be43910e86cd6f805e7e9ee08ced Mon Sep 17 00:00:00 2001
From: Alex Bozarth <ajbozart@us.ibm.com>
Date: Wed, 10 May 2017 10:20:10 +0100
Subject: [PATCH 0816/1204] [SPARK-20630][WEB UI] Fixed column visibility in
 Executor Tab

## What changes were proposed in this pull request?

#14617 added new columns to the executor table causing the visibility checks for the logs and threadDump columns to toggle the wrong columns since they used hard-coded column numbers.

I've updated the checks to use column names instead of numbers so future updates don't accidentally break this again.

Note: This will also need to be back ported into 2.2 since #14617 was merged there

## How was this patch tested?

Manually tested

Author: Alex Bozarth <ajbozart@us.ibm.com>

Closes #17904 from ajbozarth/spark20630.

(cherry picked from commit ca4625e0e58df7f02346470d22a9478d9640709d)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../org/apache/spark/ui/static/executorspage.js      | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
index cb9922d23c445..6643a8f361cdc 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
@@ -492,24 +492,20 @@ $(document).ready(function () {
                         {data: 'totalInputBytes', render: formatBytes},
                         {data: 'totalShuffleRead', render: formatBytes},
                         {data: 'totalShuffleWrite', render: formatBytes},
-                        {data: 'executorLogs', render: formatLogsCells},
+                        {name: 'executorLogsCol', data: 'executorLogs', render: formatLogsCells},
                         {
+                            name: 'threadDumpCol',
                             data: 'id', render: function (data, type) {
                                 return type === 'display' ? ("<a href='threadDump/?executorId=" + data + "'>Thread Dump</a>" ) : data;
                             }
                         }
                     ],
-                    "columnDefs": [
-                        {
-                            "targets": [ 16 ],
-                            "visible": getThreadDumpEnabled()
-                        }
-                    ],
                     "order": [[0, "asc"]]
                 };
     
                 var dt = $(selector).DataTable(conf);
-                dt.column(15).visible(logsExist(response));
+                dt.column('executorLogsCol:name').visible(logsExist(response));
+                dt.column('threadDumpCol:name').visible(getThreadDumpEnabled());
                 $('#active-executors [data-toggle="tooltip"]').tooltip();
     
                 var sumSelector = "#summary-execs-table";

From 0851b6cfb8980fa8816a96026fbf0498799e296b Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 10 May 2017 19:30:00 +0800
Subject: [PATCH 0817/1204] [SPARK-20688][SQL] correctly check analysis for
 scalar sub-queries

## What changes were proposed in this pull request?

In `CheckAnalysis`, we should call `checkAnalysis` for `ScalarSubquery` at the beginning, as later we will call `plan.output` which is invalid if `plan` is not resolved.

## How was this patch tested?

new regression test

Author: Wenchen Fan <wenchen@databricks.com>

Closes #17930 from cloud-fan/tmp.

(cherry picked from commit 789bdbe3d0d9558043872161bdfa148ec021a849)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/analysis/CheckAnalysis.scala    |  6 +++---
 .../scala/org/apache/spark/sql/SubquerySuite.scala     | 10 +++++++++-
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 61797bc34dc27..ea4560aac7259 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -130,12 +130,13 @@ trait CheckAnalysis extends PredicateHelper {
             }
 
           case s @ ScalarSubquery(query, conditions, _) =>
+            checkAnalysis(query)
+
             // If no correlation, the output must be exactly one column
             if (conditions.isEmpty && query.output.size != 1) {
               failAnalysis(
                 s"Scalar subquery must return only one column, but got ${query.output.size}")
-            }
-            else if (conditions.nonEmpty) {
+            } else if (conditions.nonEmpty) {
               def checkAggregate(agg: Aggregate): Unit = {
                 // Make sure correlated scalar subqueries contain one row for every outer row by
                 // enforcing that they are aggregates containing exactly one aggregate expression.
@@ -179,7 +180,6 @@ trait CheckAnalysis extends PredicateHelper {
                 case fail => failAnalysis(s"Correlated scalar subqueries must be Aggregated: $fail")
               }
             }
-            checkAnalysis(query)
             s
 
           case s: SubqueryExpression =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index 0f0199cbe2777..2a3bdfbfa0108 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -72,7 +72,7 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
     }
   }
 
-  test("rdd deserialization does not crash [SPARK-15791]") {
+  test("SPARK-15791: rdd deserialization does not crash") {
     sql("select (select 1 as b) as b").rdd.count()
   }
 
@@ -854,4 +854,12 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
       sql("select * from l, r where l.a = r.c + 1 AND (exists (select * from r) OR l.a = r.c)"),
       Row(3, 3.0, 2, 3.0) :: Row(3, 3.0, 2, 3.0) :: Nil)
   }
+
+  test("SPARK-20688: correctly check analysis for scalar sub-queries") {
+    withTempView("t") {
+      Seq(1 -> "a").toDF("i", "j").createTempView("t")
+      val e = intercept[AnalysisException](sql("SELECT (SELECT count(*) FROM t WHERE a = 1)"))
+      assert(e.message.contains("cannot resolve '`a`' given input columns: [i, j]"))
+    }
+  }
 }

From bdc08ab644f8e0974681446d724600c5c2ac7a56 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 10 May 2017 19:30:00 +0800
Subject: [PATCH 0818/1204] [SPARK-20688][SQL] correctly check analysis for
 scalar sub-queries

In `CheckAnalysis`, we should call `checkAnalysis` for `ScalarSubquery` at the beginning, as later we will call `plan.output` which is invalid if `plan` is not resolved.

new regression test

Author: Wenchen Fan <wenchen@databricks.com>

Closes #17930 from cloud-fan/tmp.

(cherry picked from commit 789bdbe3d0d9558043872161bdfa148ec021a849)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/analysis/CheckAnalysis.scala    |  3 ++-
 .../scala/org/apache/spark/sql/SubquerySuite.scala     | 10 +++++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 06bbd39ed11d6..e68bf81c6428b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -128,6 +128,8 @@ trait CheckAnalysis extends PredicateHelper {
             }
 
           case s @ ScalarSubquery(query, conditions, _) =>
+            checkAnalysis(query)
+
             // If no correlation, the output must be exactly one column
             if (conditions.isEmpty && query.output.size != 1) {
               failAnalysis(
@@ -186,7 +188,6 @@ trait CheckAnalysis extends PredicateHelper {
                 case fail => failAnalysis(s"Correlated scalar subqueries must be Aggregated: $fail")
               }
             }
-            checkAnalysis(query)
             s
 
           case s: SubqueryExpression =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index fb92f314088d1..b0b28337509fc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -72,7 +72,7 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
     }
   }
 
-  test("rdd deserialization does not crash [SPARK-15791]") {
+  test("SPARK-15791: rdd deserialization does not crash") {
     sql("select (select 1 as b) as b").rdd.count()
   }
 
@@ -839,4 +839,12 @@ class SubquerySuite extends QueryTest with SharedSQLContext {
         Row(0) :: Row(1) :: Nil)
     }
   }
+
+  test("SPARK-20688: correctly check analysis for scalar sub-queries") {
+    withTempView("t") {
+      Seq(1 -> "a").toDF("i", "j").createTempView("t")
+      val e = intercept[AnalysisException](sql("SELECT (SELECT count(*) FROM t WHERE a = 1)"))
+      assert(e.message.contains("cannot resolve '`a`' given input columns: [i, j]"))
+    }
+  }
 }

From 5f6029c7500b0c5a769c6b62879d8532a5692a50 Mon Sep 17 00:00:00 2001
From: wangzhenhua <wangzhenhua@huawei.com>
Date: Wed, 10 May 2017 19:42:49 +0800
Subject: [PATCH 0819/1204] [SPARK-20678][SQL] Ndv for columns not in filter
 condition should also be updated

## What changes were proposed in this pull request?

In filter estimation, we update column stats for those columns in filter condition. However, if the number of rows decreases after the filter (i.e. the overall selectivity is less than 1), we need to update (scale down) the number of distinct values (NDV) for all columns, no matter they are in filter conditions or not.

This pr also fixes the inconsistency of rounding mode for ndv and rowCount.

## How was this patch tested?

Added new tests.

Author: wangzhenhua <wangzhenhua@huawei.com>

Closes #17918 from wzhfy/scaleDownNdvAfterFilter.

(cherry picked from commit 76e4a5566b1e9579632e03440cecd04dd142bc44)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../statsEstimation/EstimationUtils.scala     |  12 ++
 .../statsEstimation/FilterEstimation.scala    | 134 ++++++++++--------
 .../statsEstimation/JoinEstimation.scala      |  25 +---
 .../FilterEstimationSuite.scala               |  63 ++++----
 4 files changed, 133 insertions(+), 101 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala
index f1aff62cb6af0..e5fcdf9039be9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala
@@ -43,6 +43,18 @@ object EstimationUtils {
       avgLen = dataType.defaultSize, maxLen = dataType.defaultSize)
   }
 
+  /**
+   * Updates (scales down) the number of distinct values if the number of rows decreases after
+   * some operation (such as filter, join). Otherwise keep it unchanged.
+   */
+  def updateNdv(oldNumRows: BigInt, newNumRows: BigInt, oldNdv: BigInt): BigInt = {
+    if (newNumRows < oldNumRows) {
+      ceil(BigDecimal(oldNdv) * BigDecimal(newNumRows) / BigDecimal(oldNumRows))
+    } else {
+      oldNdv
+    }
+  }
+
   def ceil(bigDecimal: BigDecimal): BigInt = bigDecimal.setScale(0, RoundingMode.CEILING).toBigInt()
 
   /** Get column stats for output attributes. */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala
index 4b6b3b14d9ac8..df190867189ec 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala
@@ -19,12 +19,12 @@ package org.apache.spark.sql.catalyst.plans.logical.statsEstimation
 
 import scala.collection.immutable.HashSet
 import scala.collection.mutable
-import scala.math.BigDecimal.RoundingMode
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
 import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, Filter, LeafNode, Statistics}
+import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
@@ -32,14 +32,7 @@ case class FilterEstimation(plan: Filter, catalystConf: SQLConf) extends Logging
 
   private val childStats = plan.child.stats(catalystConf)
 
-  /**
-   * We will update the corresponding ColumnStats for a column after we apply a predicate condition.
-   * For example, column c has [min, max] value as [0, 100].  In a range condition such as
-   * (c > 40 AND c <= 50), we need to set the column's [min, max] value to [40, 100] after we
-   * evaluate the first condition c > 40.  We need to set the column's [min, max] value to [40, 50]
-   * after we evaluate the second condition c <= 50.
-   */
-  private val colStatsMap = new ColumnStatsMap
+  private val colStatsMap = new ColumnStatsMap(childStats.attributeStats)
 
   /**
    * Returns an option of Statistics for a Filter logical plan node.
@@ -53,24 +46,19 @@ case class FilterEstimation(plan: Filter, catalystConf: SQLConf) extends Logging
   def estimate: Option[Statistics] = {
     if (childStats.rowCount.isEmpty) return None
 
-    // Save a mutable copy of colStats so that we can later change it recursively.
-    colStatsMap.setInitValues(childStats.attributeStats)
-
     // Estimate selectivity of this filter predicate, and update column stats if needed.
     // For not-supported condition, set filter selectivity to a conservative estimate 100%
-    val filterSelectivity: Double = calculateFilterSelectivity(plan.condition).getOrElse(1.0)
+    val filterSelectivity = calculateFilterSelectivity(plan.condition).getOrElse(BigDecimal(1.0))
 
-    val newColStats = if (filterSelectivity == 0) {
+    val filteredRowCount: BigInt = ceil(BigDecimal(childStats.rowCount.get) * filterSelectivity)
+    val newColStats = if (filteredRowCount == 0) {
       // The output is empty, we don't need to keep column stats.
       AttributeMap[ColumnStat](Nil)
     } else {
-      colStatsMap.toColumnStats
+      colStatsMap.outputColumnStats(rowsBeforeFilter = childStats.rowCount.get,
+        rowsAfterFilter = filteredRowCount)
     }
-
-    val filteredRowCount: BigInt =
-      EstimationUtils.ceil(BigDecimal(childStats.rowCount.get) * filterSelectivity)
-    val filteredSizeInBytes: BigInt =
-      EstimationUtils.getOutputSize(plan.output, filteredRowCount, newColStats)
+    val filteredSizeInBytes: BigInt = getOutputSize(plan.output, filteredRowCount, newColStats)
 
     Some(childStats.copy(sizeInBytes = filteredSizeInBytes, rowCount = Some(filteredRowCount),
       attributeStats = newColStats))
@@ -92,16 +80,17 @@ case class FilterEstimation(plan: Filter, catalystConf: SQLConf) extends Logging
    * @return an optional double value to show the percentage of rows meeting a given condition.
    *         It returns None if the condition is not supported.
    */
-  def calculateFilterSelectivity(condition: Expression, update: Boolean = true): Option[Double] = {
+  def calculateFilterSelectivity(condition: Expression, update: Boolean = true)
+    : Option[BigDecimal] = {
     condition match {
       case And(cond1, cond2) =>
-        val percent1 = calculateFilterSelectivity(cond1, update).getOrElse(1.0)
-        val percent2 = calculateFilterSelectivity(cond2, update).getOrElse(1.0)
+        val percent1 = calculateFilterSelectivity(cond1, update).getOrElse(BigDecimal(1.0))
+        val percent2 = calculateFilterSelectivity(cond2, update).getOrElse(BigDecimal(1.0))
         Some(percent1 * percent2)
 
       case Or(cond1, cond2) =>
-        val percent1 = calculateFilterSelectivity(cond1, update = false).getOrElse(1.0)
-        val percent2 = calculateFilterSelectivity(cond2, update = false).getOrElse(1.0)
+        val percent1 = calculateFilterSelectivity(cond1, update = false).getOrElse(BigDecimal(1.0))
+        val percent2 = calculateFilterSelectivity(cond2, update = false).getOrElse(BigDecimal(1.0))
         Some(percent1 + percent2 - (percent1 * percent2))
 
       // Not-operator pushdown
@@ -143,7 +132,7 @@ case class FilterEstimation(plan: Filter, catalystConf: SQLConf) extends Logging
    * @return an optional double value to show the percentage of rows meeting a given condition.
    *         It returns None if the condition is not supported.
    */
-  def calculateSingleCondition(condition: Expression, update: Boolean): Option[Double] = {
+  def calculateSingleCondition(condition: Expression, update: Boolean): Option[BigDecimal] = {
     condition match {
       case l: Literal =>
         evaluateLiteral(l)
@@ -237,7 +226,7 @@ case class FilterEstimation(plan: Filter, catalystConf: SQLConf) extends Logging
   def evaluateNullCheck(
       attr: Attribute,
       isNull: Boolean,
-      update: Boolean): Option[Double] = {
+      update: Boolean): Option[BigDecimal] = {
     if (!colStatsMap.contains(attr)) {
       logDebug("[CBO] No statistics for " + attr)
       return None
@@ -256,7 +245,7 @@ case class FilterEstimation(plan: Filter, catalystConf: SQLConf) extends Logging
       } else {
         colStat.copy(nullCount = 0)
       }
-      colStatsMap(attr) = newStats
+      colStatsMap.update(attr, newStats)
     }
 
     val percent = if (isNull) {
@@ -265,7 +254,7 @@ case class FilterEstimation(plan: Filter, catalystConf: SQLConf) extends Logging
       1.0 - nullPercent
     }
 
-    Some(percent.toDouble)
+    Some(percent)
   }
 
   /**
@@ -283,7 +272,7 @@ case class FilterEstimation(plan: Filter, catalystConf: SQLConf) extends Logging
       op: BinaryComparison,
       attr: Attribute,
       literal: Literal,
-      update: Boolean): Option[Double] = {
+      update: Boolean): Option[BigDecimal] = {
     if (!colStatsMap.contains(attr)) {
       logDebug("[CBO] No statistics for " + attr)
       return None
@@ -317,7 +306,7 @@ case class FilterEstimation(plan: Filter, catalystConf: SQLConf) extends Logging
   def evaluateEquality(
       attr: Attribute,
       literal: Literal,
-      update: Boolean): Option[Double] = {
+      update: Boolean): Option[BigDecimal] = {
     if (!colStatsMap.contains(attr)) {
       logDebug("[CBO] No statistics for " + attr)
       return None
@@ -341,10 +330,10 @@ case class FilterEstimation(plan: Filter, catalystConf: SQLConf) extends Logging
             colStat.copy(distinctCount = 1, min = Some(literal.value),
               max = Some(literal.value), nullCount = 0)
         }
-        colStatsMap(attr) = newStats
+        colStatsMap.update(attr, newStats)
       }
 
-      Some((1.0 / BigDecimal(ndv)).toDouble)
+      Some(1.0 / BigDecimal(ndv))
     } else {
       Some(0.0)
     }
@@ -361,7 +350,7 @@ case class FilterEstimation(plan: Filter, catalystConf: SQLConf) extends Logging
    * @param literal a literal value (or constant)
    * @return an optional double value to show the percentage of rows meeting a given condition
    */
-  def evaluateLiteral(literal: Literal): Option[Double] = {
+  def evaluateLiteral(literal: Literal): Option[BigDecimal] = {
     literal match {
       case Literal(null, _) => Some(0.0)
       case FalseLiteral => Some(0.0)
@@ -386,7 +375,7 @@ case class FilterEstimation(plan: Filter, catalystConf: SQLConf) extends Logging
   def evaluateInSet(
       attr: Attribute,
       hSet: Set[Any],
-      update: Boolean): Option[Double] = {
+      update: Boolean): Option[BigDecimal] = {
     if (!colStatsMap.contains(attr)) {
       logDebug("[CBO] No statistics for " + attr)
       return None
@@ -417,7 +406,7 @@ case class FilterEstimation(plan: Filter, catalystConf: SQLConf) extends Logging
         if (update) {
           val newStats = colStat.copy(distinctCount = newNdv, min = Some(newMin),
             max = Some(newMax), nullCount = 0)
-          colStatsMap(attr) = newStats
+          colStatsMap.update(attr, newStats)
         }
 
       // We assume the whole set since there is no min/max information for String/Binary type
@@ -425,13 +414,13 @@ case class FilterEstimation(plan: Filter, catalystConf: SQLConf) extends Logging
         newNdv = ndv.min(BigInt(hSet.size))
         if (update) {
           val newStats = colStat.copy(distinctCount = newNdv, nullCount = 0)
-          colStatsMap(attr) = newStats
+          colStatsMap.update(attr, newStats)
         }
     }
 
     // return the filter selectivity.  Without advanced statistics such as histograms,
     // we have to assume uniform distribution.
-    Some(math.min(1.0, (BigDecimal(newNdv) / BigDecimal(ndv)).toDouble))
+    Some((BigDecimal(newNdv) / BigDecimal(ndv)).min(1.0))
   }
 
   /**
@@ -449,7 +438,7 @@ case class FilterEstimation(plan: Filter, catalystConf: SQLConf) extends Logging
       op: BinaryComparison,
       attr: Attribute,
       literal: Literal,
-      update: Boolean): Option[Double] = {
+      update: Boolean): Option[BigDecimal] = {
 
     val colStat = colStatsMap(attr)
     val statsRange = Range(colStat.min, colStat.max, attr.dataType).asInstanceOf[NumericRange]
@@ -518,7 +507,7 @@ case class FilterEstimation(plan: Filter, catalystConf: SQLConf) extends Logging
         val newValue = Some(literal.value)
         var newMax = colStat.max
         var newMin = colStat.min
-        var newNdv = (ndv * percent).setScale(0, RoundingMode.HALF_UP).toBigInt()
+        var newNdv = ceil(ndv * percent)
         if (newNdv < 1) newNdv = 1
 
         op match {
@@ -532,11 +521,11 @@ case class FilterEstimation(plan: Filter, catalystConf: SQLConf) extends Logging
         val newStats =
           colStat.copy(distinctCount = newNdv, min = newMin, max = newMax, nullCount = 0)
 
-        colStatsMap(attr) = newStats
+        colStatsMap.update(attr, newStats)
       }
     }
 
-    Some(percent.toDouble)
+    Some(percent)
   }
 
   /**
@@ -557,7 +546,7 @@ case class FilterEstimation(plan: Filter, catalystConf: SQLConf) extends Logging
       op: BinaryComparison,
       attrLeft: Attribute,
       attrRight: Attribute,
-      update: Boolean): Option[Double] = {
+      update: Boolean): Option[BigDecimal] = {
 
     if (!colStatsMap.contains(attrLeft)) {
       logDebug("[CBO] No statistics for " + attrLeft)
@@ -654,10 +643,10 @@ case class FilterEstimation(plan: Filter, catalystConf: SQLConf) extends Logging
         // Need to adjust new min/max after the filter condition is applied
 
         val ndvLeft = BigDecimal(colStatLeft.distinctCount)
-        var newNdvLeft = (ndvLeft * percent).setScale(0, RoundingMode.HALF_UP).toBigInt()
+        var newNdvLeft = ceil(ndvLeft * percent)
         if (newNdvLeft < 1) newNdvLeft = 1
         val ndvRight = BigDecimal(colStatRight.distinctCount)
-        var newNdvRight = (ndvRight * percent).setScale(0, RoundingMode.HALF_UP).toBigInt()
+        var newNdvRight = ceil(ndvRight * percent)
         if (newNdvRight < 1) newNdvRight = 1
 
         var newMaxLeft = colStatLeft.max
@@ -750,24 +739,57 @@ case class FilterEstimation(plan: Filter, catalystConf: SQLConf) extends Logging
       }
     }
 
-    Some(percent.toDouble)
+    Some(percent)
   }
 
 }
 
-class ColumnStatsMap {
-  private val baseMap: mutable.Map[ExprId, (Attribute, ColumnStat)] = mutable.HashMap.empty
+/**
+ * This class contains the original column stats from child, and maintains the updated column stats.
+ * We will update the corresponding ColumnStats for a column after we apply a predicate condition.
+ * For example, column c has [min, max] value as [0, 100].  In a range condition such as
+ * (c > 40 AND c <= 50), we need to set the column's [min, max] value to [40, 100] after we
+ * evaluate the first condition c > 40. We also need to set the column's [min, max] value to
+ * [40, 50] after we evaluate the second condition c <= 50.
+ *
+ * @param originalMap Original column stats from child.
+ */
+case class ColumnStatsMap(originalMap: AttributeMap[ColumnStat]) {
 
-  def setInitValues(colStats: AttributeMap[ColumnStat]): Unit = {
-    baseMap.clear()
-    baseMap ++= colStats.baseMap
-  }
+  /** This map maintains the latest column stats. */
+  private val updatedMap: mutable.Map[ExprId, (Attribute, ColumnStat)] = mutable.HashMap.empty
 
-  def contains(a: Attribute): Boolean = baseMap.contains(a.exprId)
+  def contains(a: Attribute): Boolean = updatedMap.contains(a.exprId) || originalMap.contains(a)
 
-  def apply(a: Attribute): ColumnStat = baseMap(a.exprId)._2
+  /**
+   * Gets column stat for the given attribute. Prefer the column stat in updatedMap than that in
+   * originalMap, because updatedMap has the latest (updated) column stats.
+   */
+  def apply(a: Attribute): ColumnStat = {
+    if (updatedMap.contains(a.exprId)) {
+      updatedMap(a.exprId)._2
+    } else {
+      originalMap(a)
+    }
+  }
 
-  def update(a: Attribute, stats: ColumnStat): Unit = baseMap.update(a.exprId, a -> stats)
+  /** Updates column stats in updatedMap. */
+  def update(a: Attribute, stats: ColumnStat): Unit = updatedMap.update(a.exprId, a -> stats)
 
-  def toColumnStats: AttributeMap[ColumnStat] = AttributeMap(baseMap.values.toSeq)
+  /**
+   * Collects updated column stats, and scales down ndv for other column stats if the number of rows
+   * decreases after this Filter operator.
+   */
+  def outputColumnStats(rowsBeforeFilter: BigInt, rowsAfterFilter: BigInt)
+    : AttributeMap[ColumnStat] = {
+    val newColumnStats = originalMap.map { case (attr, oriColStat) =>
+      // Update ndv based on the overall filter selectivity: scale down ndv if the number of rows
+      // decreases; otherwise keep it unchanged.
+      val newNdv = EstimationUtils.updateNdv(oldNumRows = rowsBeforeFilter,
+        newNumRows = rowsAfterFilter, oldNdv = oriColStat.distinctCount)
+      val colStat = updatedMap.get(attr.exprId).map(_._2).getOrElse(oriColStat)
+      attr -> colStat.copy(distinctCount = newNdv)
+    }
+    AttributeMap(newColumnStats.toSeq)
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala
index 3245a73c8a2eb..8ef905c45d50d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala
@@ -217,32 +217,17 @@ case class InnerOuterEstimation(conf: SQLConf, join: Join) extends Logging {
       if (joinKeyStats.contains(a)) {
         outputAttrStats += a -> joinKeyStats(a)
       } else {
-        val leftRatio = if (leftRows != 0) {
-          BigDecimal(outputRows) / BigDecimal(leftRows)
-        } else {
-          BigDecimal(0)
-        }
-        val rightRatio = if (rightRows != 0) {
-          BigDecimal(outputRows) / BigDecimal(rightRows)
-        } else {
-          BigDecimal(0)
-        }
         val oldColStat = oldAttrStats(a)
         val oldNdv = oldColStat.distinctCount
-        // We only change (scale down) the number of distinct values if the number of rows
-        // decreases after join, because join won't produce new values even if the number of
-        // rows increases.
-        val newNdv = if (join.left.outputSet.contains(a) && leftRatio < 1) {
-          ceil(BigDecimal(oldNdv) * leftRatio)
-        } else if (join.right.outputSet.contains(a) && rightRatio < 1) {
-          ceil(BigDecimal(oldNdv) * rightRatio)
+        val newNdv = if (join.left.outputSet.contains(a)) {
+          updateNdv(oldNumRows = leftRows, newNumRows = outputRows, oldNdv = oldNdv)
         } else {
-          oldNdv
+          updateNdv(oldNumRows = rightRows, newNumRows = outputRows, oldNdv = oldNdv)
         }
+        val newColStat = oldColStat.copy(distinctCount = newNdv)
         // TODO: support nullCount updates for specific outer joins
-        outputAttrStats += a -> oldColStat.copy(distinctCount = newNdv)
+        outputAttrStats += a -> newColStat
       }
-
     }
     outputAttrStats
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala
index a28447840ae09..2fa53a6466ef2 100755
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala
@@ -150,7 +150,7 @@ class FilterEstimationSuite extends StatsEstimationTestBase {
     val condition = Or(LessThan(attrInt, Literal(3)), Literal(null, IntegerType))
     validateEstimatedStats(
       Filter(condition, childStatsTestPlan(Seq(attrInt), 10L)),
-      Seq(attrInt -> colStatInt),
+      Seq(attrInt -> colStatInt.copy(distinctCount = 3)),
       expectedRowCount = 3)
   }
 
@@ -158,7 +158,7 @@ class FilterEstimationSuite extends StatsEstimationTestBase {
     val condition = Not(And(LessThan(attrInt, Literal(3)), Literal(null, IntegerType)))
     validateEstimatedStats(
       Filter(condition, childStatsTestPlan(Seq(attrInt), 10L)),
-      Seq(attrInt -> colStatInt),
+      Seq(attrInt -> colStatInt.copy(distinctCount = 8)),
       expectedRowCount = 8)
   }
 
@@ -174,7 +174,7 @@ class FilterEstimationSuite extends StatsEstimationTestBase {
     val condition = Not(And(LessThan(attrInt, Literal(3)), Not(Literal(null, IntegerType))))
     validateEstimatedStats(
       Filter(condition, childStatsTestPlan(Seq(attrInt), 10L)),
-      Seq(attrInt -> colStatInt),
+      Seq(attrInt -> colStatInt.copy(distinctCount = 8)),
       expectedRowCount = 8)
   }
 
@@ -205,7 +205,7 @@ class FilterEstimationSuite extends StatsEstimationTestBase {
   test("cint < 3") {
     validateEstimatedStats(
       Filter(LessThan(attrInt, Literal(3)), childStatsTestPlan(Seq(attrInt), 10L)),
-      Seq(attrInt -> ColumnStat(distinctCount = 2, min = Some(1), max = Some(3),
+      Seq(attrInt -> ColumnStat(distinctCount = 3, min = Some(1), max = Some(3),
         nullCount = 0, avgLen = 4, maxLen = 4)),
       expectedRowCount = 3)
   }
@@ -221,7 +221,7 @@ class FilterEstimationSuite extends StatsEstimationTestBase {
   test("cint <= 3") {
     validateEstimatedStats(
       Filter(LessThanOrEqual(attrInt, Literal(3)), childStatsTestPlan(Seq(attrInt), 10L)),
-      Seq(attrInt -> ColumnStat(distinctCount = 2, min = Some(1), max = Some(3),
+      Seq(attrInt -> ColumnStat(distinctCount = 3, min = Some(1), max = Some(3),
         nullCount = 0, avgLen = 4, maxLen = 4)),
       expectedRowCount = 3)
   }
@@ -229,7 +229,7 @@ class FilterEstimationSuite extends StatsEstimationTestBase {
   test("cint > 6") {
     validateEstimatedStats(
       Filter(GreaterThan(attrInt, Literal(6)), childStatsTestPlan(Seq(attrInt), 10L)),
-      Seq(attrInt -> ColumnStat(distinctCount = 4, min = Some(6), max = Some(10),
+      Seq(attrInt -> ColumnStat(distinctCount = 5, min = Some(6), max = Some(10),
         nullCount = 0, avgLen = 4, maxLen = 4)),
       expectedRowCount = 5)
   }
@@ -245,7 +245,7 @@ class FilterEstimationSuite extends StatsEstimationTestBase {
   test("cint >= 6") {
     validateEstimatedStats(
       Filter(GreaterThanOrEqual(attrInt, Literal(6)), childStatsTestPlan(Seq(attrInt), 10L)),
-      Seq(attrInt -> ColumnStat(distinctCount = 4, min = Some(6), max = Some(10),
+      Seq(attrInt -> ColumnStat(distinctCount = 5, min = Some(6), max = Some(10),
         nullCount = 0, avgLen = 4, maxLen = 4)),
       expectedRowCount = 5)
   }
@@ -279,7 +279,7 @@ class FilterEstimationSuite extends StatsEstimationTestBase {
     val condition = And(GreaterThan(attrInt, Literal(3)), LessThanOrEqual(attrInt, Literal(6)))
     validateEstimatedStats(
       Filter(condition, childStatsTestPlan(Seq(attrInt), 10L)),
-      Seq(attrInt -> ColumnStat(distinctCount = 3, min = Some(3), max = Some(6),
+      Seq(attrInt -> ColumnStat(distinctCount = 4, min = Some(3), max = Some(6),
         nullCount = 0, avgLen = 4, maxLen = 4)),
       expectedRowCount = 4)
   }
@@ -288,8 +288,7 @@ class FilterEstimationSuite extends StatsEstimationTestBase {
     val condition = Or(EqualTo(attrInt, Literal(3)), EqualTo(attrInt, Literal(6)))
     validateEstimatedStats(
       Filter(condition, childStatsTestPlan(Seq(attrInt), 10L)),
-      Seq(attrInt -> ColumnStat(distinctCount = 10, min = Some(1), max = Some(10),
-        nullCount = 0, avgLen = 4, maxLen = 4)),
+      Seq(attrInt -> colStatInt.copy(distinctCount = 2)),
       expectedRowCount = 2)
   }
 
@@ -297,7 +296,7 @@ class FilterEstimationSuite extends StatsEstimationTestBase {
     val condition = Not(And(GreaterThan(attrInt, Literal(3)), LessThanOrEqual(attrInt, Literal(6))))
     validateEstimatedStats(
       Filter(condition, childStatsTestPlan(Seq(attrInt), 10L)),
-      Seq(attrInt -> colStatInt),
+      Seq(attrInt -> colStatInt.copy(distinctCount = 6)),
       expectedRowCount = 6)
   }
 
@@ -305,7 +304,7 @@ class FilterEstimationSuite extends StatsEstimationTestBase {
     val condition = Not(Or(LessThanOrEqual(attrInt, Literal(3)), GreaterThan(attrInt, Literal(6))))
     validateEstimatedStats(
       Filter(condition, childStatsTestPlan(Seq(attrInt), 10L)),
-      Seq(attrInt -> colStatInt),
+      Seq(attrInt -> colStatInt.copy(distinctCount = 5)),
       expectedRowCount = 5)
   }
 
@@ -321,7 +320,8 @@ class FilterEstimationSuite extends StatsEstimationTestBase {
     val condition = Not(Or(EqualTo(attrInt, Literal(3)), LessThan(attrString, Literal("A8"))))
     validateEstimatedStats(
       Filter(condition, childStatsTestPlan(Seq(attrInt, attrString), 10L)),
-      Seq(attrInt -> colStatInt, attrString -> colStatString),
+      Seq(attrInt -> colStatInt.copy(distinctCount = 9),
+        attrString -> colStatString.copy(distinctCount = 9)),
       expectedRowCount = 9)
   }
 
@@ -336,8 +336,7 @@ class FilterEstimationSuite extends StatsEstimationTestBase {
   test("cint NOT IN (3, 4, 5)") {
     validateEstimatedStats(
       Filter(Not(InSet(attrInt, Set(3, 4, 5))), childStatsTestPlan(Seq(attrInt), 10L)),
-      Seq(attrInt -> ColumnStat(distinctCount = 10, min = Some(1), max = Some(10),
-        nullCount = 0, avgLen = 4, maxLen = 4)),
+      Seq(attrInt -> colStatInt.copy(distinctCount = 7)),
       expectedRowCount = 7)
   }
 
@@ -380,7 +379,7 @@ class FilterEstimationSuite extends StatsEstimationTestBase {
     validateEstimatedStats(
       Filter(LessThan(attrDate, Literal(d20170103, DateType)),
         childStatsTestPlan(Seq(attrDate), 10L)),
-      Seq(attrDate -> ColumnStat(distinctCount = 2, min = Some(dMin), max = Some(d20170103),
+      Seq(attrDate -> ColumnStat(distinctCount = 3, min = Some(dMin), max = Some(d20170103),
         nullCount = 0, avgLen = 4, maxLen = 4)),
       expectedRowCount = 3)
   }
@@ -421,7 +420,7 @@ class FilterEstimationSuite extends StatsEstimationTestBase {
   test("cdouble < 3.0") {
     validateEstimatedStats(
       Filter(LessThan(attrDouble, Literal(3.0)), childStatsTestPlan(Seq(attrDouble), 10L)),
-      Seq(attrDouble -> ColumnStat(distinctCount = 2, min = Some(1.0), max = Some(3.0),
+      Seq(attrDouble -> ColumnStat(distinctCount = 3, min = Some(1.0), max = Some(3.0),
         nullCount = 0, avgLen = 8, maxLen = 8)),
       expectedRowCount = 3)
   }
@@ -487,9 +486,9 @@ class FilterEstimationSuite extends StatsEstimationTestBase {
     // partial overlap case
     validateEstimatedStats(
       Filter(EqualTo(attrInt, attrInt2), childStatsTestPlan(Seq(attrInt, attrInt2), 10L)),
-      Seq(attrInt -> ColumnStat(distinctCount = 3, min = Some(7), max = Some(10),
+      Seq(attrInt -> ColumnStat(distinctCount = 4, min = Some(7), max = Some(10),
         nullCount = 0, avgLen = 4, maxLen = 4),
-        attrInt2 -> ColumnStat(distinctCount = 3, min = Some(7), max = Some(10),
+        attrInt2 -> ColumnStat(distinctCount = 4, min = Some(7), max = Some(10),
           nullCount = 0, avgLen = 4, maxLen = 4)),
       expectedRowCount = 4)
   }
@@ -498,9 +497,9 @@ class FilterEstimationSuite extends StatsEstimationTestBase {
     // partial overlap case
     validateEstimatedStats(
       Filter(GreaterThan(attrInt, attrInt2), childStatsTestPlan(Seq(attrInt, attrInt2), 10L)),
-      Seq(attrInt -> ColumnStat(distinctCount = 3, min = Some(7), max = Some(10),
+      Seq(attrInt -> ColumnStat(distinctCount = 4, min = Some(7), max = Some(10),
         nullCount = 0, avgLen = 4, maxLen = 4),
-        attrInt2 -> ColumnStat(distinctCount = 3, min = Some(7), max = Some(10),
+        attrInt2 -> ColumnStat(distinctCount = 4, min = Some(7), max = Some(10),
           nullCount = 0, avgLen = 4, maxLen = 4)),
       expectedRowCount = 4)
   }
@@ -509,9 +508,9 @@ class FilterEstimationSuite extends StatsEstimationTestBase {
     // partial overlap case
     validateEstimatedStats(
       Filter(LessThan(attrInt, attrInt2), childStatsTestPlan(Seq(attrInt, attrInt2), 10L)),
-      Seq(attrInt -> ColumnStat(distinctCount = 3, min = Some(1), max = Some(10),
+      Seq(attrInt -> ColumnStat(distinctCount = 4, min = Some(1), max = Some(10),
         nullCount = 0, avgLen = 4, maxLen = 4),
-        attrInt2 -> ColumnStat(distinctCount = 3, min = Some(7), max = Some(16),
+        attrInt2 -> ColumnStat(distinctCount = 4, min = Some(7), max = Some(16),
           nullCount = 0, avgLen = 4, maxLen = 4)),
       expectedRowCount = 4)
   }
@@ -531,9 +530,9 @@ class FilterEstimationSuite extends StatsEstimationTestBase {
     // partial overlap case
     validateEstimatedStats(
       Filter(LessThan(attrInt, attrInt4), childStatsTestPlan(Seq(attrInt, attrInt4), 10L)),
-      Seq(attrInt -> ColumnStat(distinctCount = 3, min = Some(1), max = Some(10),
+      Seq(attrInt -> ColumnStat(distinctCount = 4, min = Some(1), max = Some(10),
         nullCount = 0, avgLen = 4, maxLen = 4),
-        attrInt4 -> ColumnStat(distinctCount = 3, min = Some(1), max = Some(10),
+        attrInt4 -> ColumnStat(distinctCount = 4, min = Some(1), max = Some(10),
           nullCount = 0, avgLen = 4, maxLen = 4)),
       expectedRowCount = 4)
   }
@@ -565,6 +564,20 @@ class FilterEstimationSuite extends StatsEstimationTestBase {
       expectedRowCount = 0)
   }
 
+  test("update ndv for columns based on overall selectivity") {
+    // filter condition: cint > 3 AND cint4 <= 6
+    val condition = And(GreaterThan(attrInt, Literal(3)), LessThanOrEqual(attrInt4, Literal(6)))
+    validateEstimatedStats(
+      Filter(condition, childStatsTestPlan(Seq(attrInt, attrInt4, attrString), 10L)),
+      Seq(
+        attrInt -> ColumnStat(distinctCount = 5, min = Some(3), max = Some(10),
+          nullCount = 0, avgLen = 4, maxLen = 4),
+        attrInt4 -> ColumnStat(distinctCount = 5, min = Some(1), max = Some(6),
+          nullCount = 0, avgLen = 4, maxLen = 4),
+        attrString -> colStatString.copy(distinctCount = 5)),
+      expectedRowCount = 5)
+  }
+
   private def childStatsTestPlan(outList: Seq[Attribute], tableRowCount: BigInt): StatsTestPlan = {
     StatsTestPlan(
       outputList = outList,

From 358516dcbef5178cdc6cb4387d7f6837359946ba Mon Sep 17 00:00:00 2001
From: Xianyang Liu <xianyang.liu@intel.com>
Date: Wed, 10 May 2017 13:56:34 +0100
Subject: [PATCH 0820/1204] [MINOR][BUILD] Fix lint-java breaks.

## What changes were proposed in this pull request?

This PR proposes to fix the lint-breaks as below:
```
[ERROR] src/main/java/org/apache/spark/unsafe/Platform.java:[51] (regexp) RegexpSingleline: No trailing whitespace allowed.
[ERROR] src/main/scala/org/apache/spark/sql/streaming/Trigger.java:[45,25] (naming) MethodName: Method name 'ProcessingTime' must match pattern '^[a-z][a-z0-9][a-zA-Z0-9_]*$'.
[ERROR] src/main/scala/org/apache/spark/sql/streaming/Trigger.java:[62,25] (naming) MethodName: Method name 'ProcessingTime' must match pattern '^[a-z][a-z0-9][a-zA-Z0-9_]*$'.
[ERROR] src/main/scala/org/apache/spark/sql/streaming/Trigger.java:[78,25] (naming) MethodName: Method name 'ProcessingTime' must match pattern '^[a-z][a-z0-9][a-zA-Z0-9_]*$'.
[ERROR] src/main/scala/org/apache/spark/sql/streaming/Trigger.java:[92,25] (naming) MethodName: Method name 'ProcessingTime' must match pattern '^[a-z][a-z0-9][a-zA-Z0-9_]*$'.
[ERROR] src/main/scala/org/apache/spark/sql/streaming/Trigger.java:[102,25] (naming) MethodName: Method name 'Once' must match pattern '^[a-z][a-z0-9][a-zA-Z0-9_]*$'.
[ERROR] src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisInputDStreamBuilderSuite.java:[28,8] (imports) UnusedImports: Unused import - org.apache.spark.streaming.api.java.JavaDStream.
```

after:
```
dev/lint-java
Checkstyle checks passed.
```
[Test Result](https://travis-ci.org/ConeyLiu/spark/jobs/229666169)

## How was this patch tested?

Travis CI

Author: Xianyang Liu <xianyang.liu@intel.com>

Closes #17890 from ConeyLiu/codestyle.

(cherry picked from commit fcb88f9211e39c705073db5300c96ceeb3f227d7)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../unsafe/src/main/java/org/apache/spark/unsafe/Platform.java | 2 +-
 .../src/main/scala/org/apache/spark/storage/BlockManager.scala | 3 ---
 dev/checkstyle-suppressions.xml                                | 2 +-
 .../streaming/kinesis/JavaKinesisInputDStreamBuilderSuite.java | 1 -
 4 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
index 4ab5b6889c212..aca6fca00c48b 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
@@ -48,7 +48,7 @@ public final class Platform {
     boolean _unaligned;
     String arch = System.getProperty("os.arch", "");
     if (arch.equals("ppc64le") || arch.equals("ppc64")) {
-      // Since java.nio.Bits.unaligned() doesn't return true on ppc (See JDK-8165231), but 
+      // Since java.nio.Bits.unaligned() doesn't return true on ppc (See JDK-8165231), but
       // ppc64 and ppc64le support it
       _unaligned = true;
     } else {
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 3219969bcd06f..ad0dc3ca462b0 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -29,8 +29,6 @@ import scala.reflect.ClassTag
 import scala.util.Random
 import scala.util.control.NonFatal
 
-import com.google.common.io.ByteStreams
-
 import org.apache.spark._
 import org.apache.spark.executor.{DataReadMethod, ShuffleWriteMetrics}
 import org.apache.spark.internal.Logging
@@ -41,7 +39,6 @@ import org.apache.spark.network.netty.SparkTransportConf
 import org.apache.spark.network.shuffle.ExternalShuffleClient
 import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo
 import org.apache.spark.rpc.RpcEnv
-import org.apache.spark.security.CryptoStreamUtils
 import org.apache.spark.serializer.{SerializerInstance, SerializerManager}
 import org.apache.spark.shuffle.ShuffleManager
 import org.apache.spark.storage.memory._
diff --git a/dev/checkstyle-suppressions.xml b/dev/checkstyle-suppressions.xml
index bb7d31cad7be3..6e15f6955984e 100644
--- a/dev/checkstyle-suppressions.xml
+++ b/dev/checkstyle-suppressions.xml
@@ -47,5 +47,5 @@
     <suppress checks="MethodName"
               files="sql/catalyst/src/main/java/org/apache/spark/sql/streaming/GroupStateTimeout.java"/>
     <suppress checks="MethodName"
-              files="sql/catalyst/src/main/java/org/apache/spark/sql/streaming/Trigger.java"/>
+              files="sql/core/src/main/java/org/apache/spark/sql/streaming/Trigger.java"/>
 </suppressions>
diff --git a/external/kinesis-asl/src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisInputDStreamBuilderSuite.java b/external/kinesis-asl/src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisInputDStreamBuilderSuite.java
index 7205f6e27266c..be6d549b1e429 100644
--- a/external/kinesis-asl/src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisInputDStreamBuilderSuite.java
+++ b/external/kinesis-asl/src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisInputDStreamBuilderSuite.java
@@ -25,7 +25,6 @@
 import org.apache.spark.streaming.Duration;
 import org.apache.spark.streaming.Seconds;
 import org.apache.spark.streaming.LocalJavaStreamingContext;
-import org.apache.spark.streaming.api.java.JavaDStream;
 
 public class JavaKinesisInputDStreamBuilderSuite extends LocalJavaStreamingContext {
   /**

From 86cef4df5fd9e28a8ece4ec33376d3622de2ef69 Mon Sep 17 00:00:00 2001
From: Ala Luszczak <ala@databricks.com>
Date: Wed, 10 May 2017 08:41:04 -0700
Subject: [PATCH 0821/1204] [SPARK-19447] Remove remaining references to
 generated rows metric

## What changes were proposed in this pull request?

https://github.com/apache/spark/commit/b486ffc86d8ad6c303321dcf8514afee723f61f8 left behind references to "number of generated rows" metrics, that should have been removed.

## How was this patch tested?

Existing unit tests.

Author: Ala Luszczak <ala@databricks.com>

Closes #17939 from ala/SPARK-19447-fix.

(cherry picked from commit 5c2c4dcce529d228a97ede0386b95213ea0e1da5)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../apache/spark/sql/execution/basicPhysicalOperators.scala   | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index 64698d5527578..85096dcc40f5d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -340,8 +340,7 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range)
   override val output: Seq[Attribute] = range.output
 
   override lazy val metrics = Map(
-    "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"),
-    "numGeneratedRows" -> SQLMetrics.createMetric(sparkContext, "number of generated rows"))
+    "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"))
 
   override lazy val canonicalized: SparkPlan = {
     RangeExec(range.canonicalized.asInstanceOf[org.apache.spark.sql.catalyst.plans.logical.Range])
@@ -354,7 +353,6 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range)
 
   protected override def doProduce(ctx: CodegenContext): String = {
     val numOutput = metricTerm(ctx, "numOutputRows")
-    val numGenerated = metricTerm(ctx, "numGeneratedRows")
 
     val initTerm = ctx.freshName("initRange")
     ctx.addMutableState("boolean", initTerm, s"$initTerm = false;")

From 3eb0ee06a588da5b9c08a72d178835c6e8bad36b Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Wed, 10 May 2017 16:50:57 -0700
Subject: [PATCH 0822/1204] [SPARK-20685] Fix BatchPythonEvaluation bug in case
 of single UDF w/ repeated arg.

## What changes were proposed in this pull request?

There's a latent corner-case bug in PySpark UDF evaluation where executing a `BatchPythonEvaluation` with a single multi-argument UDF where _at least one argument value is repeated_ will crash at execution with a confusing error.

This problem was introduced in #12057: the code there has a fast path for handling a "batch UDF evaluation consisting of a single Python UDF", but that branch incorrectly assumes that a single UDF won't have repeated arguments and therefore skips the code for unpacking arguments from the input row (whose schema may not necessarily match the UDF inputs due to de-duplication of repeated arguments which occurred in the JVM before sending UDF inputs to Python).

This fix here is simply to remove this special-casing: it turns out that the code in the "multiple UDFs" branch just so happens to work for the single-UDF case because Python treats `(x)` as equivalent to `x`, not as a single-argument tuple.

## How was this patch tested?

New regression test in `pyspark.python.sql.tests` module (tested and confirmed that it fails before my fix).

Author: Josh Rosen <joshrosen@databricks.com>

Closes #17927 from JoshRosen/SPARK-20685.

(cherry picked from commit 8ddbc431d8b21d5ee57d3d209a4f25e301f15283)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 python/pyspark/sql/tests.py |  6 ++++++
 python/pyspark/worker.py    | 29 +++++++++++++----------------
 2 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 2aa2d23c6f0dd..e06f62b35bc6f 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -324,6 +324,12 @@ def test_chained_udf(self):
         [row] = self.spark.sql("SELECT double(double(1) + 1)").collect()
         self.assertEqual(row[0], 6)
 
+    def test_single_udf_with_repeated_argument(self):
+        # regression test for SPARK-20685
+        self.spark.catalog.registerFunction("add", lambda x, y: x + y, IntegerType())
+        row = self.spark.sql("SELECT add(1, 1)").first()
+        self.assertEqual(tuple(row), (2, ))
+
     def test_multiple_udfs(self):
         self.spark.catalog.registerFunction("double", lambda x: x * 2, IntegerType())
         [row] = self.spark.sql("SELECT double(1), double(2)").collect()
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 25ee475c7f4d9..baaa3fe074e9a 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -87,22 +87,19 @@ def read_single_udf(pickleSer, infile):
 
 def read_udfs(pickleSer, infile):
     num_udfs = read_int(infile)
-    if num_udfs == 1:
-        # fast path for single UDF
-        _, udf = read_single_udf(pickleSer, infile)
-        mapper = lambda a: udf(*a)
-    else:
-        udfs = {}
-        call_udf = []
-        for i in range(num_udfs):
-            arg_offsets, udf = read_single_udf(pickleSer, infile)
-            udfs['f%d' % i] = udf
-            args = ["a[%d]" % o for o in arg_offsets]
-            call_udf.append("f%d(%s)" % (i, ", ".join(args)))
-        # Create function like this:
-        #   lambda a: (f0(a0), f1(a1, a2), f2(a3))
-        mapper_str = "lambda a: (%s)" % (", ".join(call_udf))
-        mapper = eval(mapper_str, udfs)
+    udfs = {}
+    call_udf = []
+    for i in range(num_udfs):
+        arg_offsets, udf = read_single_udf(pickleSer, infile)
+        udfs['f%d' % i] = udf
+        args = ["a[%d]" % o for o in arg_offsets]
+        call_udf.append("f%d(%s)" % (i, ", ".join(args)))
+    # Create function like this:
+    #   lambda a: (f0(a0), f1(a1, a2), f2(a3))
+    # In the special case of a single UDF this will return a single result rather
+    # than a tuple of results; this is the format that the JVM side expects.
+    mapper_str = "lambda a: (%s)" % (", ".join(call_udf))
+    mapper = eval(mapper_str, udfs)
 
     func = lambda _, it: map(mapper, it)
     ser = BatchedSerializer(PickleSerializer(), 100)

From 92a71a667dd3e13664015f2a9dd2a39e2c1514eb Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Wed, 10 May 2017 16:50:57 -0700
Subject: [PATCH 0823/1204] [SPARK-20685] Fix BatchPythonEvaluation bug in case
 of single UDF w/ repeated arg.

## What changes were proposed in this pull request?

There's a latent corner-case bug in PySpark UDF evaluation where executing a `BatchPythonEvaluation` with a single multi-argument UDF where _at least one argument value is repeated_ will crash at execution with a confusing error.

This problem was introduced in #12057: the code there has a fast path for handling a "batch UDF evaluation consisting of a single Python UDF", but that branch incorrectly assumes that a single UDF won't have repeated arguments and therefore skips the code for unpacking arguments from the input row (whose schema may not necessarily match the UDF inputs due to de-duplication of repeated arguments which occurred in the JVM before sending UDF inputs to Python).

This fix here is simply to remove this special-casing: it turns out that the code in the "multiple UDFs" branch just so happens to work for the single-UDF case because Python treats `(x)` as equivalent to `x`, not as a single-argument tuple.

## How was this patch tested?

New regression test in `pyspark.python.sql.tests` module (tested and confirmed that it fails before my fix).

Author: Josh Rosen <joshrosen@databricks.com>

Closes #17927 from JoshRosen/SPARK-20685.

(cherry picked from commit 8ddbc431d8b21d5ee57d3d209a4f25e301f15283)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 python/pyspark/sql/tests.py |  6 ++++++
 python/pyspark/worker.py    | 29 +++++++++++++----------------
 2 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 22b1ffc900751..c3cc7426b177b 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -324,6 +324,12 @@ def test_chained_udf(self):
         [row] = self.spark.sql("SELECT double(double(1) + 1)").collect()
         self.assertEqual(row[0], 6)
 
+    def test_single_udf_with_repeated_argument(self):
+        # regression test for SPARK-20685
+        self.spark.catalog.registerFunction("add", lambda x, y: x + y, IntegerType())
+        row = self.spark.sql("SELECT add(1, 1)").first()
+        self.assertEqual(tuple(row), (2, ))
+
     def test_multiple_udfs(self):
         self.spark.catalog.registerFunction("double", lambda x: x * 2, IntegerType())
         [row] = self.spark.sql("SELECT double(1), double(2)").collect()
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 09182829538fc..5eeaac70b80ed 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -86,22 +86,19 @@ def read_single_udf(pickleSer, infile):
 
 def read_udfs(pickleSer, infile):
     num_udfs = read_int(infile)
-    if num_udfs == 1:
-        # fast path for single UDF
-        _, udf = read_single_udf(pickleSer, infile)
-        mapper = lambda a: udf(*a)
-    else:
-        udfs = {}
-        call_udf = []
-        for i in range(num_udfs):
-            arg_offsets, udf = read_single_udf(pickleSer, infile)
-            udfs['f%d' % i] = udf
-            args = ["a[%d]" % o for o in arg_offsets]
-            call_udf.append("f%d(%s)" % (i, ", ".join(args)))
-        # Create function like this:
-        #   lambda a: (f0(a0), f1(a1, a2), f2(a3))
-        mapper_str = "lambda a: (%s)" % (", ".join(call_udf))
-        mapper = eval(mapper_str, udfs)
+    udfs = {}
+    call_udf = []
+    for i in range(num_udfs):
+        arg_offsets, udf = read_single_udf(pickleSer, infile)
+        udfs['f%d' % i] = udf
+        args = ["a[%d]" % o for o in arg_offsets]
+        call_udf.append("f%d(%s)" % (i, ", ".join(args)))
+    # Create function like this:
+    #   lambda a: (f0(a0), f1(a1, a2), f2(a3))
+    # In the special case of a single UDF this will return a single result rather
+    # than a tuple of results; this is the format that the JVM side expects.
+    mapper_str = "lambda a: (%s)" % (", ".join(call_udf))
+    mapper = eval(mapper_str, udfs)
 
     func = lambda _, it: map(mapper, it)
     ser = BatchedSerializer(PickleSerializer(), 100)

From 80a57fa90be8dca4340345c09b4ea28fbf11a516 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Thu, 11 May 2017 14:48:13 +0800
Subject: [PATCH 0824/1204] [SPARK-20606][ML] Revert "[] ML 2.2 QA: Remove
 deprecated methods for ML"

This reverts commit b8733e0ad9f5a700f385e210450fd2c10137293e.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #17944 from yanboliang/spark-20606-revert.

(cherry picked from commit 0698e6c88ca11fdfd6e5498cab784cf6dbcdfacb)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 .../DecisionTreeClassifier.scala              |  18 +--
 .../ml/classification/GBTClassifier.scala     |  24 ++--
 .../RandomForestClassifier.scala              |  24 ++--
 .../ml/regression/DecisionTreeRegressor.scala |  18 +--
 .../spark/ml/regression/GBTRegressor.scala    |  24 ++--
 .../ml/regression/RandomForestRegressor.scala |  24 ++--
 .../org/apache/spark/ml/tree/treeParams.scala | 105 ++++++++++++++++++
 .../org/apache/spark/ml/util/ReadWrite.scala  |  16 +++
 project/MimaExcludes.scala                    |  68 ------------
 python/pyspark/ml/util.py                     |  32 ++++++
 10 files changed, 219 insertions(+), 134 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
index 5fb105c6aff60..9f60f0896ec52 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
@@ -54,27 +54,27 @@ class DecisionTreeClassifier @Since("1.4.0") (
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMaxDepth(value: Int): this.type = set(maxDepth, value)
+  override def setMaxDepth(value: Int): this.type = set(maxDepth, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMaxBins(value: Int): this.type = set(maxBins, value)
+  override def setMaxBins(value: Int): this.type = set(maxBins, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
+  override def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
+  override def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
 
   /** @group expertSetParam */
   @Since("1.4.0")
-  def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
+  override def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
 
   /** @group expertSetParam */
   @Since("1.4.0")
-  def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
+  override def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
 
   /**
    * Specifies how often to checkpoint the cached node IDs.
@@ -86,15 +86,15 @@ class DecisionTreeClassifier @Since("1.4.0") (
    * @group setParam
    */
   @Since("1.4.0")
-  def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
+  override def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setImpurity(value: String): this.type = set(impurity, value)
+  override def setImpurity(value: String): this.type = set(impurity, value)
 
   /** @group setParam */
   @Since("1.6.0")
-  def setSeed(value: Long): this.type = set(seed, value)
+  override def setSeed(value: Long): this.type = set(seed, value)
 
   override protected def train(dataset: Dataset[_]): DecisionTreeClassificationModel = {
     val categoricalFeatures: Map[Int, Int] =
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
index 263ed10f19855..ade0960f87a0d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
@@ -70,27 +70,27 @@ class GBTClassifier @Since("1.4.0") (
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMaxDepth(value: Int): this.type = set(maxDepth, value)
+  override def setMaxDepth(value: Int): this.type = set(maxDepth, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMaxBins(value: Int): this.type = set(maxBins, value)
+  override def setMaxBins(value: Int): this.type = set(maxBins, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
+  override def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
+  override def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
 
   /** @group expertSetParam */
   @Since("1.4.0")
-  def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
+  override def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
 
   /** @group expertSetParam */
   @Since("1.4.0")
-  def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
+  override def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
 
   /**
    * Specifies how often to checkpoint the cached node IDs.
@@ -102,7 +102,7 @@ class GBTClassifier @Since("1.4.0") (
    * @group setParam
    */
   @Since("1.4.0")
-  def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
+  override def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
 
   /**
    * The impurity setting is ignored for GBT models.
@@ -111,7 +111,7 @@ class GBTClassifier @Since("1.4.0") (
    * @group setParam
    */
   @Since("1.4.0")
-  def setImpurity(value: String): this.type = {
+  override def setImpurity(value: String): this.type = {
     logWarning("GBTClassifier.setImpurity should NOT be used")
     this
   }
@@ -120,21 +120,21 @@ class GBTClassifier @Since("1.4.0") (
 
   /** @group setParam */
   @Since("1.4.0")
-  def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value)
+  override def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setSeed(value: Long): this.type = set(seed, value)
+  override def setSeed(value: Long): this.type = set(seed, value)
 
   // Parameters from GBTParams:
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMaxIter(value: Int): this.type = set(maxIter, value)
+  override def setMaxIter(value: Int): this.type = set(maxIter, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setStepSize(value: Double): this.type = set(stepSize, value)
+  override def setStepSize(value: Double): this.type = set(stepSize, value)
 
   // Parameters from GBTClassifierParams:
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
index 441cfda899276..ab4c235209289 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
@@ -56,27 +56,27 @@ class RandomForestClassifier @Since("1.4.0") (
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMaxDepth(value: Int): this.type = set(maxDepth, value)
+  override def setMaxDepth(value: Int): this.type = set(maxDepth, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMaxBins(value: Int): this.type = set(maxBins, value)
+  override def setMaxBins(value: Int): this.type = set(maxBins, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
+  override def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
+  override def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
 
   /** @group expertSetParam */
   @Since("1.4.0")
-  def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
+  override def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
 
   /** @group expertSetParam */
   @Since("1.4.0")
-  def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
+  override def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
 
   /**
    * Specifies how often to checkpoint the cached node IDs.
@@ -88,31 +88,31 @@ class RandomForestClassifier @Since("1.4.0") (
    * @group setParam
    */
   @Since("1.4.0")
-  def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
+  override def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setImpurity(value: String): this.type = set(impurity, value)
+  override def setImpurity(value: String): this.type = set(impurity, value)
 
   // Parameters from TreeEnsembleParams:
 
   /** @group setParam */
   @Since("1.4.0")
-  def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value)
+  override def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setSeed(value: Long): this.type = set(seed, value)
+  override def setSeed(value: Long): this.type = set(seed, value)
 
   // Parameters from RandomForestParams:
 
   /** @group setParam */
   @Since("1.4.0")
-  def setNumTrees(value: Int): this.type = set(numTrees, value)
+  override def setNumTrees(value: Int): this.type = set(numTrees, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setFeatureSubsetStrategy(value: String): this.type =
+  override def setFeatureSubsetStrategy(value: String): this.type =
     set(featureSubsetStrategy, value)
 
   override protected def train(dataset: Dataset[_]): RandomForestClassificationModel = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
index c2b0358e8405d..01c5cc1c7efa9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
@@ -53,27 +53,27 @@ class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
   // Override parameter setters from parent trait for Java API compatibility.
   /** @group setParam */
   @Since("1.4.0")
-  def setMaxDepth(value: Int): this.type = set(maxDepth, value)
+  override def setMaxDepth(value: Int): this.type = set(maxDepth, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMaxBins(value: Int): this.type = set(maxBins, value)
+  override def setMaxBins(value: Int): this.type = set(maxBins, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
+  override def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
+  override def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
 
   /** @group expertSetParam */
   @Since("1.4.0")
-  def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
+  override def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
 
   /** @group expertSetParam */
   @Since("1.4.0")
-  def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
+  override def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
 
   /**
    * Specifies how often to checkpoint the cached node IDs.
@@ -85,15 +85,15 @@ class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
    * @group setParam
    */
   @Since("1.4.0")
-  def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
+  override def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setImpurity(value: String): this.type = set(impurity, value)
+  override def setImpurity(value: String): this.type = set(impurity, value)
 
   /** @group setParam */
   @Since("1.6.0")
-  def setSeed(value: Long): this.type = set(seed, value)
+  override def setSeed(value: Long): this.type = set(seed, value)
 
   /** @group setParam */
   @Since("2.0.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
index 8d9b519efb142..08d175cb94442 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
@@ -68,27 +68,27 @@ class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMaxDepth(value: Int): this.type = set(maxDepth, value)
+  override def setMaxDepth(value: Int): this.type = set(maxDepth, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMaxBins(value: Int): this.type = set(maxBins, value)
+  override def setMaxBins(value: Int): this.type = set(maxBins, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
+  override def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
+  override def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
 
   /** @group expertSetParam */
   @Since("1.4.0")
-  def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
+  override def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
 
   /** @group expertSetParam */
   @Since("1.4.0")
-  def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
+  override def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
 
   /**
    * Specifies how often to checkpoint the cached node IDs.
@@ -100,7 +100,7 @@ class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
    * @group setParam
    */
   @Since("1.4.0")
-  def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
+  override def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
 
   /**
    * The impurity setting is ignored for GBT models.
@@ -109,7 +109,7 @@ class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
    * @group setParam
    */
   @Since("1.4.0")
-  def setImpurity(value: String): this.type = {
+  override def setImpurity(value: String): this.type = {
     logWarning("GBTRegressor.setImpurity should NOT be used")
     this
   }
@@ -118,21 +118,21 @@ class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value)
+  override def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setSeed(value: Long): this.type = set(seed, value)
+  override def setSeed(value: Long): this.type = set(seed, value)
 
   // Parameters from GBTParams:
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMaxIter(value: Int): this.type = set(maxIter, value)
+  override def setMaxIter(value: Int): this.type = set(maxIter, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setStepSize(value: Double): this.type = set(stepSize, value)
+  override def setStepSize(value: Double): this.type = set(stepSize, value)
 
   // Parameters from GBTRegressorParams:
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
index 7b9ddf6e9521a..a58da50fad972 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
@@ -55,27 +55,27 @@ class RandomForestRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMaxDepth(value: Int): this.type = set(maxDepth, value)
+  override def setMaxDepth(value: Int): this.type = set(maxDepth, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMaxBins(value: Int): this.type = set(maxBins, value)
+  override def setMaxBins(value: Int): this.type = set(maxBins, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
+  override def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
+  override def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
 
   /** @group expertSetParam */
   @Since("1.4.0")
-  def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
+  override def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
 
   /** @group expertSetParam */
   @Since("1.4.0")
-  def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
+  override def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
 
   /**
    * Specifies how often to checkpoint the cached node IDs.
@@ -87,31 +87,31 @@ class RandomForestRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
    * @group setParam
    */
   @Since("1.4.0")
-  def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
+  override def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setImpurity(value: String): this.type = set(impurity, value)
+  override def setImpurity(value: String): this.type = set(impurity, value)
 
   // Parameters from TreeEnsembleParams:
 
   /** @group setParam */
   @Since("1.4.0")
-  def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value)
+  override def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setSeed(value: Long): this.type = set(seed, value)
+  override def setSeed(value: Long): this.type = set(seed, value)
 
   // Parameters from RandomForestParams:
 
   /** @group setParam */
   @Since("1.4.0")
-  def setNumTrees(value: Int): this.type = set(numTrees, value)
+  override def setNumTrees(value: Int): this.type = set(numTrees, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setFeatureSubsetStrategy(value: String): this.type =
+  override def setFeatureSubsetStrategy(value: String): this.type =
     set(featureSubsetStrategy, value)
 
   override protected def train(dataset: Dataset[_]): RandomForestRegressionModel = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
index 5526d4d75bd73..cd1950bd76c05 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
@@ -109,24 +109,80 @@ private[ml] trait DecisionTreeParams extends PredictorParams
   setDefault(maxDepth -> 5, maxBins -> 32, minInstancesPerNode -> 1, minInfoGain -> 0.0,
     maxMemoryInMB -> 256, cacheNodeIds -> false, checkpointInterval -> 10)
 
+  /**
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @group setParam
+   */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  def setMaxDepth(value: Int): this.type = set(maxDepth, value)
+
   /** @group getParam */
   final def getMaxDepth: Int = $(maxDepth)
 
+  /**
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @group setParam
+   */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  def setMaxBins(value: Int): this.type = set(maxBins, value)
+
   /** @group getParam */
   final def getMaxBins: Int = $(maxBins)
 
+  /**
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @group setParam
+   */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
+
   /** @group getParam */
   final def getMinInstancesPerNode: Int = $(minInstancesPerNode)
 
+  /**
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @group setParam
+   */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
+
   /** @group getParam */
   final def getMinInfoGain: Double = $(minInfoGain)
 
+  /**
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @group setParam
+   */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  def setSeed(value: Long): this.type = set(seed, value)
+
+  /**
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @group expertSetParam
+   */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
+
   /** @group expertGetParam */
   final def getMaxMemoryInMB: Int = $(maxMemoryInMB)
 
+  /**
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @group expertSetParam
+   */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
+
   /** @group expertGetParam */
   final def getCacheNodeIds: Boolean = $(cacheNodeIds)
 
+  /**
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @group setParam
+   */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
+
   /** (private[ml]) Create a Strategy instance to use with the old API. */
   private[ml] def getOldStrategy(
       categoricalFeatures: Map[Int, Int],
@@ -169,6 +225,13 @@ private[ml] trait TreeClassifierParams extends Params {
 
   setDefault(impurity -> "gini")
 
+  /**
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @group setParam
+   */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  def setImpurity(value: String): this.type = set(impurity, value)
+
   /** @group getParam */
   final def getImpurity: String = $(impurity).toLowerCase(Locale.ROOT)
 
@@ -213,6 +276,13 @@ private[ml] trait TreeRegressorParams extends Params {
 
   setDefault(impurity -> "variance")
 
+  /**
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @group setParam
+   */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  def setImpurity(value: String): this.type = set(impurity, value)
+
   /** @group getParam */
   final def getImpurity: String = $(impurity).toLowerCase(Locale.ROOT)
 
@@ -268,6 +338,13 @@ private[ml] trait TreeEnsembleParams extends DecisionTreeParams {
 
   setDefault(subsamplingRate -> 1.0)
 
+  /**
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @group setParam
+   */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value)
+
   /** @group getParam */
   final def getSubsamplingRate: Double = $(subsamplingRate)
 
@@ -305,6 +382,13 @@ private[ml] trait RandomForestParams extends TreeEnsembleParams {
 
   setDefault(numTrees -> 20)
 
+  /**
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @group setParam
+   */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  def setNumTrees(value: Int): this.type = set(numTrees, value)
+
   /** @group getParam */
   final def getNumTrees: Int = $(numTrees)
 
@@ -346,6 +430,13 @@ private[ml] trait RandomForestParams extends TreeEnsembleParams {
 
   setDefault(featureSubsetStrategy -> "auto")
 
+  /**
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @group setParam
+   */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  def setFeatureSubsetStrategy(value: String): this.type = set(featureSubsetStrategy, value)
+
   /** @group getParam */
   final def getFeatureSubsetStrategy: String = $(featureSubsetStrategy).toLowerCase(Locale.ROOT)
 }
@@ -380,6 +471,13 @@ private[ml] trait GBTParams extends TreeEnsembleParams with HasMaxIter {
   // final val validationTol: DoubleParam = new DoubleParam(this, "validationTol", "")
   // validationTol -> 1e-5
 
+  /**
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @group setParam
+   */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  def setMaxIter(value: Int): this.type = set(maxIter, value)
+
   /**
    * Param for Step size (a.k.a. learning rate) in interval (0, 1] for shrinking
    * the contribution of each estimator.
@@ -393,6 +491,13 @@ private[ml] trait GBTParams extends TreeEnsembleParams with HasMaxIter {
   /** @group getParam */
   final def getStepSize: Double = $(stepSize)
 
+  /**
+   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @group setParam
+   */
+  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  def setStepSize(value: Double): this.type = set(stepSize, value)
+
   setDefault(maxIter -> 20, stepSize -> 0.1)
 
   /** (private[ml]) Create a BoostingStrategy instance to use with the old API. */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
index f7e570fd5cc94..a8b80031faf86 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
@@ -42,6 +42,16 @@ import org.apache.spark.util.Utils
 private[util] sealed trait BaseReadWrite {
   private var optionSparkSession: Option[SparkSession] = None
 
+  /**
+   * Sets the Spark SQLContext to use for saving/loading.
+   */
+  @Since("1.6.0")
+  @deprecated("Use session instead, This method will be removed in 2.2.0.", "2.0.0")
+  def context(sqlContext: SQLContext): this.type = {
+    optionSparkSession = Option(sqlContext.sparkSession)
+    this
+  }
+
   /**
    * Sets the Spark Session to use for saving/loading.
    */
@@ -120,6 +130,9 @@ abstract class MLWriter extends BaseReadWrite with Logging {
 
   // override for Java compatibility
   override def session(sparkSession: SparkSession): this.type = super.session(sparkSession)
+
+  // override for Java compatibility
+  override def context(sqlContext: SQLContext): this.type = super.session(sqlContext.sparkSession)
 }
 
 /**
@@ -175,6 +188,9 @@ abstract class MLReader[T] extends BaseReadWrite {
 
   // override for Java compatibility
   override def session(sparkSession: SparkSession): this.type = super.session(sparkSession)
+
+  // override for Java compatibility
+  override def context(sqlContext: SQLContext): this.type = super.session(sqlContext.sparkSession)
 }
 
 /**
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 92e1bbe76541b..feae76a087dec 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -999,74 +999,6 @@ object MimaExcludes {
       ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setFeatureSubsetStrategy"),
       ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.numTrees"),
       ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setFeatureSubsetStrategy")
-    ) ++ Seq(
-      // [SPARK-20606] ML 2.2 QA: Remove deprecated methods for ML
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.DecisionTreeClassificationModel.setSeed"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.DecisionTreeClassificationModel.setMinInfoGain"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.DecisionTreeClassificationModel.setCacheNodeIds"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.DecisionTreeClassificationModel.setCheckpointInterval"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.DecisionTreeClassificationModel.setMaxDepth"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.DecisionTreeClassificationModel.setImpurity"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.DecisionTreeClassificationModel.setMaxMemoryInMB"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.DecisionTreeClassificationModel.setMaxBins"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.DecisionTreeClassificationModel.setMinInstancesPerNode"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setSeed"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setMinInfoGain"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setSubsamplingRate"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setMaxIter"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setCacheNodeIds"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setCheckpointInterval"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setMaxDepth"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setImpurity"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setMaxMemoryInMB"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setStepSize"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setMaxBins"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.setMinInstancesPerNode"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setSeed"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setMinInfoGain"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setSubsamplingRate"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setCacheNodeIds"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setCheckpointInterval"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setMaxDepth"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setImpurity"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setMaxMemoryInMB"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setFeatureSubsetStrategy"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setMaxBins"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.RandomForestClassificationModel.setMinInstancesPerNode"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.DecisionTreeRegressionModel.setSeed"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.DecisionTreeRegressionModel.setMinInfoGain"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.DecisionTreeRegressionModel.setCacheNodeIds"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.DecisionTreeRegressionModel.setCheckpointInterval"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.DecisionTreeRegressionModel.setMaxDepth"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.DecisionTreeRegressionModel.setImpurity"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.DecisionTreeRegressionModel.setMaxMemoryInMB"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.DecisionTreeRegressionModel.setMaxBins"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.DecisionTreeRegressionModel.setMinInstancesPerNode"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setSeed"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setMinInfoGain"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setSubsamplingRate"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setMaxIter"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setCacheNodeIds"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setCheckpointInterval"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setMaxDepth"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setImpurity"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setMaxMemoryInMB"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setStepSize"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setMaxBins"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.setMinInstancesPerNode"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setSeed"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setMinInfoGain"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setSubsamplingRate"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setCacheNodeIds"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setCheckpointInterval"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setMaxDepth"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setImpurity"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setMaxMemoryInMB"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setFeatureSubsetStrategy"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setMaxBins"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setMinInstancesPerNode"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.util.MLWriter.context"),
-      ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.util.MLReader.context")
     )
   }
 
diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py
index 688109ab11fd2..02016f172aebc 100644
--- a/python/pyspark/ml/util.py
+++ b/python/pyspark/ml/util.py
@@ -76,6 +76,13 @@ def overwrite(self):
         """Overwrites if the output path already exists."""
         raise NotImplementedError("MLWriter is not yet implemented for type: %s" % type(self))
 
+    def context(self, sqlContext):
+        """
+        Sets the SQL context to use for saving.
+        .. note:: Deprecated in 2.1 and will be removed in 2.2, use session instead.
+        """
+        raise NotImplementedError("MLWriter is not yet implemented for type: %s" % type(self))
+
     def session(self, sparkSession):
         """Sets the Spark Session to use for saving."""
         raise NotImplementedError("MLWriter is not yet implemented for type: %s" % type(self))
@@ -103,6 +110,15 @@ def overwrite(self):
         self._jwrite.overwrite()
         return self
 
+    def context(self, sqlContext):
+        """
+        Sets the SQL context to use for saving.
+        .. note:: Deprecated in 2.1 and will be removed in 2.2, use session instead.
+        """
+        warnings.warn("Deprecated in 2.1 and will be removed in 2.2, use session instead.")
+        self._jwrite.context(sqlContext._ssql_ctx)
+        return self
+
     def session(self, sparkSession):
         """Sets the Spark Session to use for saving."""
         self._jwrite.session(sparkSession._jsparkSession)
@@ -149,6 +165,13 @@ def load(self, path):
         """Load the ML instance from the input path."""
         raise NotImplementedError("MLReader is not yet implemented for type: %s" % type(self))
 
+    def context(self, sqlContext):
+        """
+        Sets the SQL context to use for loading.
+        .. note:: Deprecated in 2.1 and will be removed in 2.2, use session instead.
+        """
+        raise NotImplementedError("MLReader is not yet implemented for type: %s" % type(self))
+
     def session(self, sparkSession):
         """Sets the Spark Session to use for loading."""
         raise NotImplementedError("MLReader is not yet implemented for type: %s" % type(self))
@@ -174,6 +197,15 @@ def load(self, path):
                                       % self._clazz)
         return self._clazz._from_java(java_obj)
 
+    def context(self, sqlContext):
+        """
+        Sets the SQL context to use for loading.
+        .. note:: Deprecated in 2.1 and will be removed in 2.2, use session instead.
+        """
+        warnings.warn("Deprecated in 2.1 and will be removed in 2.2, use session instead.")
+        self._jread.context(sqlContext._ssql_ctx)
+        return self
+
     def session(self, sparkSession):
         """Sets the Spark Session to use for loading."""
         self._jread.session(sparkSession._jsparkSession)

From dd9e3b2c976a4ef3b4837590a2ba0954cf73860d Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 11 May 2017 00:41:15 -0700
Subject: [PATCH 0825/1204] [SPARK-20569][SQL] RuntimeReplaceable functions
 should not take extra parameters

## What changes were proposed in this pull request?

`RuntimeReplaceable` always has a constructor with the expression to replace with, and this constructor should not be the function builder.

## How was this patch tested?

new regression test

Author: Wenchen Fan <wenchen@databricks.com>

Closes #17876 from cloud-fan/minor.

(cherry picked from commit b4c99f43690f8cfba414af90fa2b3998a510bba8)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../catalyst/analysis/FunctionRegistry.scala  | 20 +++++++++++++------
 .../org/apache/spark/sql/SQLQuerySuite.scala  |  5 +++++
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index e1d83a86f99dc..6fc154f8debcf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
+import java.lang.reflect.Modifier
+
 import scala.language.existentials
 import scala.reflect.ClassTag
 import scala.util.{Failure, Success, Try}
@@ -455,8 +457,17 @@ object FunctionRegistry {
   private def expression[T <: Expression](name: String)
       (implicit tag: ClassTag[T]): (String, (ExpressionInfo, FunctionBuilder)) = {
 
+    // For `RuntimeReplaceable`, skip the constructor with most arguments, which is the main
+    // constructor and contains non-parameter `child` and should not be used as function builder.
+    val constructors = if (classOf[RuntimeReplaceable].isAssignableFrom(tag.runtimeClass)) {
+      val all = tag.runtimeClass.getConstructors
+      val maxNumArgs = all.map(_.getParameterCount).max
+      all.filterNot(_.getParameterCount == maxNumArgs)
+    } else {
+      tag.runtimeClass.getConstructors
+    }
     // See if we can find a constructor that accepts Seq[Expression]
-    val varargCtor = Try(tag.runtimeClass.getDeclaredConstructor(classOf[Seq[_]])).toOption
+    val varargCtor = constructors.find(_.getParameterTypes.toSeq == Seq(classOf[Seq[_]]))
     val builder = (expressions: Seq[Expression]) => {
       if (varargCtor.isDefined) {
         // If there is an apply method that accepts Seq[Expression], use that one.
@@ -470,11 +481,8 @@ object FunctionRegistry {
       } else {
         // Otherwise, find a constructor method that matches the number of arguments, and use that.
         val params = Seq.fill(expressions.size)(classOf[Expression])
-        val f = Try(tag.runtimeClass.getDeclaredConstructor(params : _*)) match {
-          case Success(e) =>
-            e
-          case Failure(e) =>
-            throw new AnalysisException(s"Invalid number of arguments for function $name")
+        val f = constructors.find(_.getParameterTypes.toSeq == params).getOrElse {
+          throw new AnalysisException(s"Invalid number of arguments for function $name")
         }
         Try(f.newInstance(expressions : _*).asInstanceOf[Expression]) match {
           case Success(e) => e
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 3ecbf96b41961..cd14d24370bad 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -2619,4 +2619,9 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
       new URL(jarFromInvalidFs)
     }
   }
+
+  test("RuntimeReplaceable functions should not take extra parameters") {
+    val e = intercept[AnalysisException](sql("SELECT nvl(1, 2, 3)"))
+    assert(e.message.contains("Invalid number of arguments"))
+  }
 }

From 5844151bc8e410e7d5b48990bfc9d3c55926f56f Mon Sep 17 00:00:00 2001
From: Jacek Laskowski <jacek@japila.pl>
Date: Thu, 11 May 2017 10:55:11 -0700
Subject: [PATCH 0826/1204] [SPARK-20600][SS] KafkaRelation should be pretty
 printed in web UI

## What changes were proposed in this pull request?

User-friendly name of `KafkaRelation` in web UI (under Details for Query).

### Before

<img width="516" alt="spark-20600-before" src="https://cloud.githubusercontent.com/assets/62313/25841955/74479ac6-34a2-11e7-87fb-d9f62a1356a7.png">

### After

<img width="439" alt="spark-20600-after" src="https://cloud.githubusercontent.com/assets/62313/25841829/f5335630-34a1-11e7-85a4-afe9b66d73c8.png">

## How was this patch tested?

Local build

```
./bin/spark-shell --jars ~/.m2/repository/org/apache/spark/spark-sql-kafka-0-10_2.11/2.3.0-SNAPSHOT/spark-sql-kafka-0-10_2.11-2.3.0-SNAPSHOT.jar --packages org.apache.kafka:kafka-clients:0.10.0.1
```

Author: Jacek Laskowski <jacek@japila.pl>

Closes #17917 from jaceklaskowski/SPARK-20600-KafkaRelation-webUI.

(cherry picked from commit 7144b51809aa99ac076786c369389e2330142beb)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../scala/org/apache/spark/sql/kafka010/KafkaRelation.scala    | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
index 97bd283169323..7103709969c18 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
@@ -143,4 +143,7 @@ private[kafka010] class KafkaRelation(
         validateTopicPartitions(partitions, partitionOffsets)
     }
   }
+
+  override def toString: String =
+    s"KafkaRelation(strategy=$strategy, start=$startingOffsets, end=$endingOffsets)"
 }

From 3d1908fd58fd9b1970cbffebdb731bfe4c776ad9 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Fri, 12 May 2017 11:15:10 +0800
Subject: [PATCH 0827/1204] [SPARK-20399][SQL] Add a config to fallback string
 literal parsing consistent with old sql parser behavior

## What changes were proposed in this pull request?

The new SQL parser is introduced into Spark 2.0. All string literals are unescaped in parser. Seems it bring an issue regarding the regex pattern string.

The following codes can reproduce it:

    val data = Seq("\u0020\u0021\u0023", "abc")
    val df = data.toDF()

    // 1st usage: works in 1.6
    // Let parser parse pattern string
    val rlike1 = df.filter("value rlike '^\\x20[\\x20-\\x23]+$'")
    // 2nd usage: works in 1.6, 2.x
    // Call Column.rlike so the pattern string is a literal which doesn't go through parser
    val rlike2 = df.filter($"value".rlike("^\\x20[\\x20-\\x23]+$"))

    // In 2.x, we need add backslashes to make regex pattern parsed correctly
    val rlike3 = df.filter("value rlike '^\\\\x20[\\\\x20-\\\\x23]+$'")

Follow the discussion in #17736, this patch adds a config to fallback to 1.6 string literal parsing and mitigate migration issue.

## How was this patch tested?

Jenkins tests.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #17887 from viirya/add-config-fallback-string-parsing.

(cherry picked from commit 609ba5f2b9fd89b1b9971d08f7cc680d202dbc7c)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/catalog/SessionCatalog.scala |   2 +-
 .../expressions/regexpExpressions.scala       |  33 ++++-
 .../sql/catalyst/parser/AstBuilder.scala      |  11 +-
 .../sql/catalyst/parser/ParseDriver.scala     |   8 +-
 .../sql/catalyst/parser/ParserUtils.scala     |   6 +
 .../apache/spark/sql/internal/SQLConf.scala   |  10 ++
 .../parser/ExpressionParserSuite.scala        | 128 +++++++++++++-----
 .../spark/sql/execution/SparkSqlParser.scala  |   2 +-
 .../org/apache/spark/sql/DatasetSuite.scala   |  13 ++
 9 files changed, 171 insertions(+), 42 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 18e514681e811..f6653d384fe1d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -73,7 +73,7 @@ class SessionCatalog(
       functionRegistry,
       conf,
       new Configuration(),
-      CatalystSqlParser,
+      new CatalystSqlParser(conf),
       DummyFunctionResourceLoader)
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index 3fa84589e3c68..aa5a1b5448c6d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -86,6 +86,13 @@ abstract class StringRegexExpression extends BinaryExpression
         escape character, the following character is matched literally. It is invalid to escape
         any other character.
 
+        Since Spark 2.0, string literals are unescaped in our SQL parser. For example, in order
+        to match "\abc", the pattern should be "\\abc".
+
+        When SQL config 'spark.sql.parser.escapedStringLiterals' is enabled, it fallbacks
+        to Spark 1.6 behavior regarding string literal parsing. For example, if the config is
+        enabled, the pattern to match "\abc" should be "\abc".
+
     Examples:
       > SELECT '%SystemDrive%\Users\John' _FUNC_ '\%SystemDrive\%\\Users%'
       true
@@ -144,7 +151,31 @@ case class Like(left: Expression, right: Expression) extends StringRegexExpressi
 }
 
 @ExpressionDescription(
-  usage = "str _FUNC_ regexp - Returns true if `str` matches `regexp`, or false otherwise.")
+  usage = "str _FUNC_ regexp - Returns true if `str` matches `regexp`, or false otherwise.",
+  extended = """
+    Arguments:
+      str - a string expression
+      regexp - a string expression. The pattern string should be a Java regular expression.
+
+        Since Spark 2.0, string literals (including regex patterns) are unescaped in our SQL parser.
+        For example, to match "\abc", a regular expression for `regexp` can be "^\\abc$".
+
+        There is a SQL config 'spark.sql.parser.escapedStringLiterals' that can be used to fallback
+        to the Spark 1.6 behavior regarding string literal parsing. For example, if the config is
+        enabled, the `regexp` that can match "\abc" is "^\abc$".
+
+    Examples:
+      When spark.sql.parser.escapedStringLiterals is disabled (default).
+      > SELECT '%SystemDrive%\Users\John' _FUNC_ '%SystemDrive%\\Users.*'
+      true
+
+      When spark.sql.parser.escapedStringLiterals is enabled.
+      > SELECT '%SystemDrive%\Users\John' _FUNC_ '%SystemDrive%\Users.*'
+      true
+
+    See also:
+      Use LIKE to match with simple string pattern.
+""")
 case class RLike(left: Expression, right: Expression) extends StringRegexExpression {
 
   override def escape(v: String): String = v
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index a48a693a95c93..eb08e9869caa1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -36,6 +36,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last}
 import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
 import org.apache.spark.util.random.RandomSampler
@@ -44,9 +45,11 @@ import org.apache.spark.util.random.RandomSampler
  * The AstBuilder converts an ANTLR4 ParseTree into a catalyst Expression, LogicalPlan or
  * TableIdentifier.
  */
-class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
+class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging {
   import ParserUtils._
 
+  def this() = this(new SQLConf())
+
   protected def typedVisit[T](ctx: ParseTree): T = {
     ctx.accept(this).asInstanceOf[T]
   }
@@ -1409,7 +1412,11 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
    * Special characters can be escaped by using Hive/C-style escaping.
    */
   private def createString(ctx: StringLiteralContext): String = {
-    ctx.STRING().asScala.map(string).mkString
+    if (conf.escapedStringLiterals) {
+      ctx.STRING().asScala.map(stringWithoutUnescape).mkString
+    } else {
+      ctx.STRING().asScala.map(string).mkString
+    }
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index dcccbd0ed8d6b..8e2e973485e1c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.trees.Origin
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataType, StructType}
 
 /**
@@ -121,8 +122,13 @@ abstract class AbstractSqlParser extends ParserInterface with Logging {
 /**
  * Concrete SQL parser for Catalyst-only SQL statements.
  */
+class CatalystSqlParser(conf: SQLConf) extends AbstractSqlParser {
+  val astBuilder = new AstBuilder(conf)
+}
+
+/** For test-only. */
 object CatalystSqlParser extends AbstractSqlParser {
-  val astBuilder = new AstBuilder
+  val astBuilder = new AstBuilder(new SQLConf())
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
index 6fbc33fad735c..77fdaa8255aa6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
@@ -68,6 +68,12 @@ object ParserUtils {
   /** Convert a string node into a string. */
   def string(node: TerminalNode): String = unescapeSQLString(node.getText)
 
+  /** Convert a string node into a string without unescaping. */
+  def stringWithoutUnescape(node: TerminalNode): String = {
+    // STRING parser rule forces that the input always has quotes at the starting and ending.
+    node.getText.slice(1, node.getText.size - 1)
+  }
+
   /** Get the origin (line and position) of the token. */
   def position(token: Token): Origin = {
     val opt = Option(token)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index b24419a41edb0..b97adf7221d18 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -196,6 +196,14 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
+  val ESCAPED_STRING_LITERALS = buildConf("spark.sql.parser.escapedStringLiterals")
+    .internal()
+    .doc("When true, string literals (including regex patterns) remain escaped in our SQL " +
+      "parser. The default is false since Spark 2.0. Setting it to true can restore the behavior " +
+      "prior to Spark 2.0.")
+    .booleanConf
+    .createWithDefault(false)
+
   val PARQUET_SCHEMA_MERGING_ENABLED = buildConf("spark.sql.parquet.mergeSchema")
     .doc("When true, the Parquet data source merges schemas collected from all data files, " +
          "otherwise the schema is picked from the summary file or a random data file " +
@@ -917,6 +925,8 @@ class SQLConf extends Serializable with Logging {
 
   def constraintPropagationEnabled: Boolean = getConf(CONSTRAINT_PROPAGATION_ENABLED)
 
+  def escapedStringLiterals: Boolean = getConf(ESCAPED_STRING_LITERALS)
+
   /**
    * Returns the [[Resolver]] for the current configuration, which can be used to determine if two
    * identifiers are equal.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index e7f3b64a71130..ab818978e42a2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, _}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last}
 import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
 
@@ -39,12 +40,17 @@ class ExpressionParserSuite extends PlanTest {
   import org.apache.spark.sql.catalyst.dsl.expressions._
   import org.apache.spark.sql.catalyst.dsl.plans._
 
-  def assertEqual(sqlCommand: String, e: Expression): Unit = {
-    compareExpressions(parseExpression(sqlCommand), e)
+  val defaultParser = CatalystSqlParser
+
+  def assertEqual(
+      sqlCommand: String,
+      e: Expression,
+      parser: ParserInterface = defaultParser): Unit = {
+    compareExpressions(parser.parseExpression(sqlCommand), e)
   }
 
   def intercept(sqlCommand: String, messages: String*): Unit = {
-    val e = intercept[ParseException](parseExpression(sqlCommand))
+    val e = intercept[ParseException](defaultParser.parseExpression(sqlCommand))
     messages.foreach { message =>
       assert(e.message.contains(message))
     }
@@ -101,7 +107,7 @@ class ExpressionParserSuite extends PlanTest {
   test("long binary logical expressions") {
     def testVeryBinaryExpression(op: String, clazz: Class[_]): Unit = {
       val sql = (1 to 1000).map(x => s"$x == $x").mkString(op)
-      val e = parseExpression(sql)
+      val e = defaultParser.parseExpression(sql)
       assert(e.collect { case _: EqualTo => true }.size === 1000)
       assert(e.collect { case x if clazz.isInstance(x) => true }.size === 999)
     }
@@ -160,6 +166,15 @@ class ExpressionParserSuite extends PlanTest {
     assertEqual("a not regexp 'pattern%'", !('a rlike "pattern%"))
   }
 
+  test("like expressions with ESCAPED_STRING_LITERALS = true") {
+    val conf = new SQLConf()
+    conf.setConfString(SQLConf.ESCAPED_STRING_LITERALS.key, "true")
+    val parser = new CatalystSqlParser(conf)
+    assertEqual("a rlike '^\\x20[\\x20-\\x23]+$'", 'a rlike "^\\x20[\\x20-\\x23]+$", parser)
+    assertEqual("a rlike 'pattern\\\\'", 'a rlike "pattern\\\\", parser)
+    assertEqual("a rlike 'pattern\\t\\n'", 'a rlike "pattern\\t\\n", parser)
+  }
+
   test("is null expressions") {
     assertEqual("a is null", 'a.isNull)
     assertEqual("a is not null", 'a.isNotNull)
@@ -413,38 +428,79 @@ class ExpressionParserSuite extends PlanTest {
   }
 
   test("strings") {
-    // Single Strings.
-    assertEqual("\"hello\"", "hello")
-    assertEqual("'hello'", "hello")
-
-    // Multi-Strings.
-    assertEqual("\"hello\" 'world'", "helloworld")
-    assertEqual("'hello' \" \" 'world'", "hello world")
-
-    // 'LIKE' string literals. Notice that an escaped '%' is the same as an escaped '\' and a
-    // regular '%'; to get the correct result you need to add another escaped '\'.
-    // TODO figure out if we shouldn't change the ParseUtils.unescapeSQLString method?
-    assertEqual("'pattern%'", "pattern%")
-    assertEqual("'no-pattern\\%'", "no-pattern\\%")
-    assertEqual("'pattern\\\\%'", "pattern\\%")
-    assertEqual("'pattern\\\\\\%'", "pattern\\\\%")
-
-    // Escaped characters.
-    // See: http://dev.mysql.com/doc/refman/5.7/en/string-literals.html
-    assertEqual("'\\0'", "\u0000") // ASCII NUL (X'00')
-    assertEqual("'\\''", "\'")     // Single quote
-    assertEqual("'\\\"'", "\"")    // Double quote
-    assertEqual("'\\b'", "\b")     // Backspace
-    assertEqual("'\\n'", "\n")     // Newline
-    assertEqual("'\\r'", "\r")     // Carriage return
-    assertEqual("'\\t'", "\t")     // Tab character
-    assertEqual("'\\Z'", "\u001A") // ASCII 26 - CTRL + Z (EOF on windows)
-
-    // Octals
-    assertEqual("'\\110\\145\\154\\154\\157\\041'", "Hello!")
-
-    // Unicode
-    assertEqual("'\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029'", "World :)")
+    Seq(true, false).foreach { escape =>
+      val conf = new SQLConf()
+      conf.setConfString(SQLConf.ESCAPED_STRING_LITERALS.key, escape.toString)
+      val parser = new CatalystSqlParser(conf)
+
+      // tests that have same result whatever the conf is
+      // Single Strings.
+      assertEqual("\"hello\"", "hello", parser)
+      assertEqual("'hello'", "hello", parser)
+
+      // Multi-Strings.
+      assertEqual("\"hello\" 'world'", "helloworld", parser)
+      assertEqual("'hello' \" \" 'world'", "hello world", parser)
+
+      // 'LIKE' string literals. Notice that an escaped '%' is the same as an escaped '\' and a
+      // regular '%'; to get the correct result you need to add another escaped '\'.
+      // TODO figure out if we shouldn't change the ParseUtils.unescapeSQLString method?
+      assertEqual("'pattern%'", "pattern%", parser)
+      assertEqual("'no-pattern\\%'", "no-pattern\\%", parser)
+
+      // tests that have different result regarding the conf
+      if (escape) {
+        // When SQLConf.ESCAPED_STRING_LITERALS is enabled, string literal parsing fallbacks to
+        // Spark 1.6 behavior.
+
+        // 'LIKE' string literals.
+        assertEqual("'pattern\\\\%'", "pattern\\\\%", parser)
+        assertEqual("'pattern\\\\\\%'", "pattern\\\\\\%", parser)
+
+        // Escaped characters.
+        assertEqual("'\0'", "\u0000", parser) // ASCII NUL (X'00')
+
+        // Note: Single quote follows 1.6 parsing behavior when ESCAPED_STRING_LITERALS is enabled.
+        val e = intercept[ParseException](parser.parseExpression("'\''"))
+        assert(e.message.contains("extraneous input '''"))
+
+        assertEqual("'\"'", "\"", parser)     // Double quote
+        assertEqual("'\b'", "\b", parser)     // Backspace
+        assertEqual("'\n'", "\n", parser)     // Newline
+        assertEqual("'\r'", "\r", parser)     // Carriage return
+        assertEqual("'\t'", "\t", parser)     // Tab character
+
+        // Octals
+        assertEqual("'\110\145\154\154\157\041'", "Hello!", parser)
+        // Unicode
+        assertEqual("'\u0057\u006F\u0072\u006C\u0064\u0020\u003A\u0029'", "World :)", parser)
+      } else {
+        // Default behavior
+
+        // 'LIKE' string literals.
+        assertEqual("'pattern\\\\%'", "pattern\\%", parser)
+        assertEqual("'pattern\\\\\\%'", "pattern\\\\%", parser)
+
+        // Escaped characters.
+        // See: http://dev.mysql.com/doc/refman/5.7/en/string-literals.html
+        assertEqual("'\\0'", "\u0000", parser) // ASCII NUL (X'00')
+        assertEqual("'\\''", "\'", parser)     // Single quote
+        assertEqual("'\\\"'", "\"", parser)    // Double quote
+        assertEqual("'\\b'", "\b", parser)     // Backspace
+        assertEqual("'\\n'", "\n", parser)     // Newline
+        assertEqual("'\\r'", "\r", parser)     // Carriage return
+        assertEqual("'\\t'", "\t", parser)     // Tab character
+        assertEqual("'\\Z'", "\u001A", parser) // ASCII 26 - CTRL + Z (EOF on windows)
+
+        // Octals
+        assertEqual("'\\110\\145\\154\\154\\157\\041'", "Hello!", parser)
+
+        // Unicode
+        assertEqual("'\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029'", "World :)",
+          parser)
+      }
+
+    }
   }
 
   test("intervals") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 20dacf88504f1..c2c52894860b5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -52,7 +52,7 @@ class SparkSqlParser(conf: SQLConf) extends AbstractSqlParser {
 /**
  * Builder that converts an ANTLR ParseTree into a LogicalPlan/Expression/TableIdentifier.
  */
-class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
+class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
   import org.apache.spark.sql.catalyst.parser.ParserUtils._
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 5b5cd28ad0c99..8eb381b91f46d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.execution.{LogicalRDD, RDDScanExec, SortExec}
 import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ShuffleExchange}
 import org.apache.spark.sql.execution.streaming.MemoryStream
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
 
@@ -1168,6 +1169,18 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
     val ds = Seq(WithMapInOption(Some(Map(1 -> 1)))).toDS()
     checkDataset(ds, WithMapInOption(Some(Map(1 -> 1))))
   }
+
+  test("SPARK-20399: do not unescaped regex pattern when ESCAPED_STRING_LITERALS is enabled") {
+    withSQLConf(SQLConf.ESCAPED_STRING_LITERALS.key -> "true") {
+      val data = Seq("\u0020\u0021\u0023", "abc")
+      val df = data.toDF()
+      val rlike1 = df.filter("value rlike '^\\x20[\\x20-\\x23]+$'")
+      val rlike2 = df.filter($"value".rlike("^\\x20[\\x20-\\x23]+$"))
+      val rlike3 = df.filter("value rlike '^\\\\x20[\\\\x20-\\\\x23]+$'")
+      checkAnswer(rlike1, rlike2)
+      assert(rlike3.count() == 0)
+    }
+  }
 }
 
 case class WithImmutableMap(id: String, map_test: scala.collection.immutable.Map[Long, String])

From 2cac317a84a234f034b0c75dcb5e4c27860a4cc0 Mon Sep 17 00:00:00 2001
From: liuxian <liu.xian3@zte.com.cn>
Date: Fri, 12 May 2017 11:38:50 +0800
Subject: [PATCH 0828/1204] [SPARK-20665][SQL] Bround" and "Round" function
 return NULL

## What changes were proposed in this pull request?
   spark-sql>select bround(12.3, 2);
   spark-sql>NULL
For this case,  the expected result is 12.3, but it is null.
So ,when the second parameter is bigger than "decimal.scala", the result is not we expected.
"round" function  has the same problem. This PR can solve the problem for both of them.

## How was this patch tested?
unit test cases in MathExpressionsSuite and MathFunctionsSuite

Author: liuxian <liu.xian3@zte.com.cn>

Closes #17906 from 10110346/wip_lx_0509.

(cherry picked from commit 2b36eb696f6c738e1328582630755aaac4293460)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/expressions/mathExpressions.scala  | 12 ++++++------
 .../catalyst/expressions/MathExpressionsSuite.scala |  7 +++----
 .../org/apache/spark/sql/MathFunctionsSuite.scala   | 13 +++++++++++++
 3 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
index c4d47ab2084fd..de1a46dc47805 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
@@ -1023,10 +1023,10 @@ abstract class RoundBase(child: Expression, scale: Expression,
 
   // not overriding since _scale is a constant int at runtime
   def nullSafeEval(input1: Any): Any = {
-    child.dataType match {
-      case _: DecimalType =>
+    dataType match {
+      case DecimalType.Fixed(_, s) =>
         val decimal = input1.asInstanceOf[Decimal]
-        decimal.toPrecision(decimal.precision, _scale, mode).orNull
+        decimal.toPrecision(decimal.precision, s, mode).orNull
       case ByteType =>
         BigDecimal(input1.asInstanceOf[Byte]).setScale(_scale, mode).toByte
       case ShortType =>
@@ -1055,10 +1055,10 @@ abstract class RoundBase(child: Expression, scale: Expression,
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val ce = child.genCode(ctx)
 
-    val evaluationCode = child.dataType match {
-      case _: DecimalType =>
+    val evaluationCode = dataType match {
+      case DecimalType.Fixed(_, s) =>
         s"""
-        if (${ce.value}.changePrecision(${ce.value}.precision(), ${_scale},
+        if (${ce.value}.changePrecision(${ce.value}.precision(), ${s},
             java.math.BigDecimal.${modeStr})) {
           ${ev.value} = ${ce.value};
         } else {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
index 6b5bfac94645c..1555dd1cf58d4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
@@ -546,15 +546,14 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     val bdResults: Seq[BigDecimal] = Seq(BigDecimal(3.0), BigDecimal(3.1), BigDecimal(3.14),
       BigDecimal(3.142), BigDecimal(3.1416), BigDecimal(3.14159),
       BigDecimal(3.141593), BigDecimal(3.1415927))
-    // round_scale > current_scale would result in precision increase
-    // and not allowed by o.a.s.s.types.Decimal.changePrecision, therefore null
+
     (0 to 7).foreach { i =>
       checkEvaluation(Round(bdPi, i), bdResults(i), EmptyRow)
       checkEvaluation(BRound(bdPi, i), bdResults(i), EmptyRow)
     }
     (8 to 10).foreach { scale =>
-      checkEvaluation(Round(bdPi, scale), null, EmptyRow)
-      checkEvaluation(BRound(bdPi, scale), null, EmptyRow)
+      checkEvaluation(Round(bdPi, scale), bdPi, EmptyRow)
+      checkEvaluation(BRound(bdPi, scale), bdPi, EmptyRow)
     }
 
     DataTypeTestUtils.numericTypes.foreach { dataType =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
index 328c5395ec91e..c2d08a06569bf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
@@ -231,6 +231,19 @@ class MathFunctionsSuite extends QueryTest with SharedSQLContext {
       Seq(Row(BigDecimal("0E3"), BigDecimal("0E2"), BigDecimal("0E1"), BigDecimal(3),
         BigDecimal("3.1"), BigDecimal("3.14"), BigDecimal("3.142")))
     )
+
+    val bdPi: BigDecimal = BigDecimal(31415925L, 7)
+    checkAnswer(
+      sql(s"SELECT round($bdPi, 7), round($bdPi, 8), round($bdPi, 9), round($bdPi, 10), " +
+        s"round($bdPi, 100), round($bdPi, 6), round(null, 8)"),
+      Seq(Row(bdPi, bdPi, bdPi, bdPi, bdPi, BigDecimal("3.141593"), null))
+    )
+
+    checkAnswer(
+      sql(s"SELECT bround($bdPi, 7), bround($bdPi, 8), bround($bdPi, 9), bround($bdPi, 10), " +
+        s"bround($bdPi, 100), bround($bdPi, 6), bround(null, 8)"),
+      Seq(Row(bdPi, bdPi, bdPi, bdPi, bdPi, BigDecimal("3.141592"), null))
+    )
   }
 
   test("round/bround with data frame from a local Seq of Product") {

From 6e89d574058bc2b96b14a691a07580be67f63707 Mon Sep 17 00:00:00 2001
From: liuxian <liu.xian3@zte.com.cn>
Date: Fri, 12 May 2017 11:38:50 +0800
Subject: [PATCH 0829/1204] [SPARK-20665][SQL] Bround" and "Round" function
 return NULL

   spark-sql>select bround(12.3, 2);
   spark-sql>NULL
For this case,  the expected result is 12.3, but it is null.
So ,when the second parameter is bigger than "decimal.scala", the result is not we expected.
"round" function  has the same problem. This PR can solve the problem for both of them.

unit test cases in MathExpressionsSuite and MathFunctionsSuite

Author: liuxian <liu.xian3@zte.com.cn>

Closes #17906 from 10110346/wip_lx_0509.

(cherry picked from commit 2b36eb696f6c738e1328582630755aaac4293460)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/expressions/mathExpressions.scala  | 12 ++++++------
 .../catalyst/expressions/MathExpressionsSuite.scala |  7 +++----
 .../org/apache/spark/sql/MathFunctionsSuite.scala   | 13 +++++++++++++
 3 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
index 65273a77b1054..54b8457403758 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
@@ -1021,10 +1021,10 @@ abstract class RoundBase(child: Expression, scale: Expression,
 
   // not overriding since _scale is a constant int at runtime
   def nullSafeEval(input1: Any): Any = {
-    child.dataType match {
-      case _: DecimalType =>
+    dataType match {
+      case DecimalType.Fixed(_, s) =>
         val decimal = input1.asInstanceOf[Decimal]
-        if (decimal.changePrecision(decimal.precision, _scale, mode)) decimal else null
+        if (decimal.changePrecision(decimal.precision, s, mode)) decimal else null
       case ByteType =>
         BigDecimal(input1.asInstanceOf[Byte]).setScale(_scale, mode).toByte
       case ShortType =>
@@ -1053,10 +1053,10 @@ abstract class RoundBase(child: Expression, scale: Expression,
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val ce = child.genCode(ctx)
 
-    val evaluationCode = child.dataType match {
-      case _: DecimalType =>
+    val evaluationCode = dataType match {
+      case DecimalType.Fixed(_, s) =>
         s"""
-        if (${ce.value}.changePrecision(${ce.value}.precision(), ${_scale},
+        if (${ce.value}.changePrecision(${ce.value}.precision(), ${s},
             java.math.BigDecimal.${modeStr})) {
           ${ev.value} = ${ce.value};
         } else {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
index 6b5bfac94645c..1555dd1cf58d4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
@@ -546,15 +546,14 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     val bdResults: Seq[BigDecimal] = Seq(BigDecimal(3.0), BigDecimal(3.1), BigDecimal(3.14),
       BigDecimal(3.142), BigDecimal(3.1416), BigDecimal(3.14159),
       BigDecimal(3.141593), BigDecimal(3.1415927))
-    // round_scale > current_scale would result in precision increase
-    // and not allowed by o.a.s.s.types.Decimal.changePrecision, therefore null
+
     (0 to 7).foreach { i =>
       checkEvaluation(Round(bdPi, i), bdResults(i), EmptyRow)
       checkEvaluation(BRound(bdPi, i), bdResults(i), EmptyRow)
     }
     (8 to 10).foreach { scale =>
-      checkEvaluation(Round(bdPi, scale), null, EmptyRow)
-      checkEvaluation(BRound(bdPi, scale), null, EmptyRow)
+      checkEvaluation(Round(bdPi, scale), bdPi, EmptyRow)
+      checkEvaluation(BRound(bdPi, scale), bdPi, EmptyRow)
     }
 
     DataTypeTestUtils.numericTypes.foreach { dataType =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
index 37443d0342980..0284f8311e180 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
@@ -231,6 +231,19 @@ class MathFunctionsSuite extends QueryTest with SharedSQLContext {
       Seq(Row(BigDecimal("0E3"), BigDecimal("0E2"), BigDecimal("0E1"), BigDecimal(3),
         BigDecimal("3.1"), BigDecimal("3.14"), BigDecimal("3.142")))
     )
+
+    val bdPi: BigDecimal = BigDecimal(31415925L, 7)
+    checkAnswer(
+      sql(s"SELECT round($bdPi, 7), round($bdPi, 8), round($bdPi, 9), round($bdPi, 10), " +
+        s"round($bdPi, 100), round($bdPi, 6), round(null, 8)"),
+      Seq(Row(bdPi, bdPi, bdPi, bdPi, bdPi, BigDecimal("3.141593"), null))
+    )
+
+    checkAnswer(
+      sql(s"SELECT bround($bdPi, 7), bround($bdPi, 8), bround($bdPi, 9), bround($bdPi, 10), " +
+        s"bround($bdPi, 100), bround($bdPi, 6), bround(null, 8)"),
+      Seq(Row(bdPi, bdPi, bdPi, bdPi, bdPi, BigDecimal("3.141592"), null))
+    )
   }
 
   test("exp") {

From a8d981dc5d11d65a4bd3a68aa57455b34a2649f9 Mon Sep 17 00:00:00 2001
From: wangzhenhua <wangzhenhua@huawei.com>
Date: Fri, 12 May 2017 13:42:48 +0800
Subject: [PATCH 0830/1204] [SPARK-20718][SQL] FileSourceScanExec with
 different filter orders should be the same after canonicalization

## What changes were proposed in this pull request?

Since `constraints` in `QueryPlan` is a set, the order of filters can differ. Usually this is ok because of canonicalization. However, in `FileSourceScanExec`, its data filters and partition filters are sequences, and their orders are not canonicalized. So `def sameResult` returns different results for different orders of data/partition filters. This leads to, e.g. different decision for `ReuseExchange`, and thus results in unstable performance.

## How was this patch tested?

Added a new test for `FileSourceScanExec.sameResult`.

Author: wangzhenhua <wangzhenhua@huawei.com>

Closes #17959 from wzhfy/canonicalizeFileSourceScanExec.

(cherry picked from commit c8da5356000c8e4ff9141e4a2892ebe0b9641d63)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/execution/DataSourceScanExec.scala    | 16 ++++--
 .../spark/sql/execution/SameResultSuite.scala | 49 +++++++++++++++++++
 2 files changed, 62 insertions(+), 3 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index 866fa98533218..251098c9a884c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -38,7 +38,7 @@ import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.Utils
 
-trait DataSourceScanExec extends LeafExecNode with CodegenSupport {
+trait DataSourceScanExec extends LeafExecNode with CodegenSupport with PredicateHelper {
   val relation: BaseRelation
   val metastoreTableIdentifier: Option[TableIdentifier]
 
@@ -519,8 +519,18 @@ case class FileSourceScanExec(
       relation,
       output.map(QueryPlan.normalizeExprId(_, output)),
       requiredSchema,
-      partitionFilters.map(QueryPlan.normalizeExprId(_, output)),
-      dataFilters.map(QueryPlan.normalizeExprId(_, output)),
+      canonicalizeFilters(partitionFilters, output),
+      canonicalizeFilters(dataFilters, output),
       None)
   }
+
+  private def canonicalizeFilters(filters: Seq[Expression], output: Seq[Attribute])
+    : Seq[Expression] = {
+    if (filters.nonEmpty) {
+      val normalizedFilters = QueryPlan.normalizeExprId(filters.reduce(And), output)
+      splitConjunctivePredicates(normalizedFilters)
+    } else {
+      Nil
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala
new file mode 100644
index 0000000000000..25e4ca060ae02
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.apache.spark.sql.{DataFrame, QueryTest}
+import org.apache.spark.sql.test.SharedSQLContext
+
+/**
+ * Tests for the sameResult function for [[SparkPlan]]s.
+ */
+class SameResultSuite extends QueryTest with SharedSQLContext {
+
+  test("FileSourceScanExec: different orders of data filters and partition filters") {
+    withTempPath { path =>
+      val tmpDir = path.getCanonicalPath
+      spark.range(10)
+        .selectExpr("id as a", "id + 1 as b", "id + 2 as c", "id + 3 as d")
+        .write
+        .partitionBy("a", "b")
+        .parquet(tmpDir)
+      val df = spark.read.parquet(tmpDir)
+      // partition filters: a > 1 AND b < 9
+      // data filters: c > 1 AND d < 9
+      val plan1 = getFileSourceScanExec(df.where("a > 1 AND b < 9 AND c > 1 AND d < 9"))
+      val plan2 = getFileSourceScanExec(df.where("b < 9 AND a > 1 AND d < 9 AND c > 1"))
+      assert(plan1.sameResult(plan2))
+    }
+  }
+
+  private def getFileSourceScanExec(df: DataFrame): FileSourceScanExec = {
+    df.queryExecution.sparkPlan.find(_.isInstanceOf[FileSourceScanExec]).get
+      .asInstanceOf[FileSourceScanExec]
+  }
+}

From c1e5ac267fcf73b96c28bb08797de98624df15dc Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Thu, 11 May 2017 23:10:04 -0700
Subject: [PATCH 0831/1204] [SPARK-20704][SPARKR] change CRAN test to run
 single thread

## What changes were proposed in this pull request?

- [x] need to test by running R CMD check --as-cran
- [x] sanity check vignettes

## How was this patch tested?

Jenkins

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #17945 from felixcheung/rchangesforpackage.

(cherry picked from commit 888b84abe8d3fd36c5c2226aeb9e202029936f94)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/inst/tests/testthat/jarTest.R              |  2 +-
 R/pkg/inst/tests/testthat/packageInAJarTest.R    |  2 +-
 R/pkg/inst/tests/testthat/test_Serde.R           |  2 +-
 R/pkg/inst/tests/testthat/test_binaryFile.R      |  2 +-
 R/pkg/inst/tests/testthat/test_binary_function.R |  2 +-
 R/pkg/inst/tests/testthat/test_broadcast.R       |  2 +-
 R/pkg/inst/tests/testthat/test_context.R         | 16 ++++++++--------
 R/pkg/inst/tests/testthat/test_includePackage.R  |  2 +-
 R/pkg/inst/tests/testthat/test_jvm_api.R         |  2 +-
 .../tests/testthat/test_mllib_classification.R   |  2 +-
 .../inst/tests/testthat/test_mllib_clustering.R  |  2 +-
 R/pkg/inst/tests/testthat/test_mllib_fpm.R       |  2 +-
 .../tests/testthat/test_mllib_recommendation.R   |  2 +-
 .../inst/tests/testthat/test_mllib_regression.R  |  2 +-
 R/pkg/inst/tests/testthat/test_mllib_stat.R      |  2 +-
 R/pkg/inst/tests/testthat/test_mllib_tree.R      |  2 +-
 .../tests/testthat/test_parallelize_collect.R    |  2 +-
 R/pkg/inst/tests/testthat/test_rdd.R             |  2 +-
 R/pkg/inst/tests/testthat/test_shuffle.R         |  2 +-
 R/pkg/inst/tests/testthat/test_sparkSQL.R        |  2 +-
 R/pkg/inst/tests/testthat/test_streaming.R       |  2 +-
 R/pkg/inst/tests/testthat/test_take.R            |  2 +-
 R/pkg/inst/tests/testthat/test_textFile.R        |  2 +-
 R/pkg/inst/tests/testthat/test_utils.R           |  2 +-
 R/pkg/tests/run-all.R                            |  5 +++++
 R/pkg/vignettes/sparkr-vignettes.Rmd             |  3 ++-
 26 files changed, 38 insertions(+), 32 deletions(-)

diff --git a/R/pkg/inst/tests/testthat/jarTest.R b/R/pkg/inst/tests/testthat/jarTest.R
index c9615c8d4faf6..e2241e03b55f8 100644
--- a/R/pkg/inst/tests/testthat/jarTest.R
+++ b/R/pkg/inst/tests/testthat/jarTest.R
@@ -16,7 +16,7 @@
 #
 library(SparkR)
 
-sc <- sparkR.session()
+sc <- sparkR.session(master = "local[1]")
 
 helloTest <- SparkR:::callJStatic("sparkrtest.DummyClass",
                                   "helloWorld",
diff --git a/R/pkg/inst/tests/testthat/packageInAJarTest.R b/R/pkg/inst/tests/testthat/packageInAJarTest.R
index 4bc935c79eb0f..ac706261999fb 100644
--- a/R/pkg/inst/tests/testthat/packageInAJarTest.R
+++ b/R/pkg/inst/tests/testthat/packageInAJarTest.R
@@ -17,7 +17,7 @@
 library(SparkR)
 library(sparkPackageTest)
 
-sparkR.session()
+sparkR.session(master = "local[1]")
 
 run1 <- myfunc(5L)
 
diff --git a/R/pkg/inst/tests/testthat/test_Serde.R b/R/pkg/inst/tests/testthat/test_Serde.R
index 518fb7bd94043..6e160fae1afed 100644
--- a/R/pkg/inst/tests/testthat/test_Serde.R
+++ b/R/pkg/inst/tests/testthat/test_Serde.R
@@ -17,7 +17,7 @@
 
 context("SerDe functionality")
 
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
 
 test_that("SerDe of primitive types", {
   skip_on_cran()
diff --git a/R/pkg/inst/tests/testthat/test_binaryFile.R b/R/pkg/inst/tests/testthat/test_binaryFile.R
index 63f54e1af02b1..00954fa31b0ee 100644
--- a/R/pkg/inst/tests/testthat/test_binaryFile.R
+++ b/R/pkg/inst/tests/testthat/test_binaryFile.R
@@ -18,7 +18,7 @@
 context("functions on binary files")
 
 # JavaSparkContext handle
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
 sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
 
 mockFile <- c("Spark is pretty.", "Spark is awesome.")
diff --git a/R/pkg/inst/tests/testthat/test_binary_function.R b/R/pkg/inst/tests/testthat/test_binary_function.R
index 25bb2b84266dd..236cb3885445e 100644
--- a/R/pkg/inst/tests/testthat/test_binary_function.R
+++ b/R/pkg/inst/tests/testthat/test_binary_function.R
@@ -18,7 +18,7 @@
 context("binary functions")
 
 # JavaSparkContext handle
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
 sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
 
 # Data
diff --git a/R/pkg/inst/tests/testthat/test_broadcast.R b/R/pkg/inst/tests/testthat/test_broadcast.R
index 504ded4fc8623..254f8f522a708 100644
--- a/R/pkg/inst/tests/testthat/test_broadcast.R
+++ b/R/pkg/inst/tests/testthat/test_broadcast.R
@@ -18,7 +18,7 @@
 context("broadcast variables")
 
 # JavaSparkContext handle
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
 sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
 
 # Partitioned data
diff --git a/R/pkg/inst/tests/testthat/test_context.R b/R/pkg/inst/tests/testthat/test_context.R
index 9ec79ade56103..f4893c4003f85 100644
--- a/R/pkg/inst/tests/testthat/test_context.R
+++ b/R/pkg/inst/tests/testthat/test_context.R
@@ -60,7 +60,7 @@ test_that("repeatedly starting and stopping SparkR", {
   skip_on_cran()
 
   for (i in 1:4) {
-    sc <- suppressWarnings(sparkR.init())
+    sc <- suppressWarnings(sparkR.init(master = sparkRTestMaster))
     rdd <- parallelize(sc, 1:20, 2L)
     expect_equal(countRDD(rdd), 20)
     suppressWarnings(sparkR.stop())
@@ -69,7 +69,7 @@ test_that("repeatedly starting and stopping SparkR", {
 
 test_that("repeatedly starting and stopping SparkSession", {
   for (i in 1:4) {
-    sparkR.session(enableHiveSupport = FALSE)
+    sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
     df <- createDataFrame(data.frame(dummy = 1:i))
     expect_equal(count(df), i)
     sparkR.session.stop()
@@ -79,12 +79,12 @@ test_that("repeatedly starting and stopping SparkSession", {
 test_that("rdd GC across sparkR.stop", {
   skip_on_cran()
 
-  sc <- sparkR.sparkContext() # sc should get id 0
+  sc <- sparkR.sparkContext(master = sparkRTestMaster) # sc should get id 0
   rdd1 <- parallelize(sc, 1:20, 2L) # rdd1 should get id 1
   rdd2 <- parallelize(sc, 1:10, 2L) # rdd2 should get id 2
   sparkR.session.stop()
 
-  sc <- sparkR.sparkContext() # sc should get id 0 again
+  sc <- sparkR.sparkContext(master = sparkRTestMaster) # sc should get id 0 again
 
   # GC rdd1 before creating rdd3 and rdd2 after
   rm(rdd1)
@@ -104,7 +104,7 @@ test_that("rdd GC across sparkR.stop", {
 test_that("job group functions can be called", {
   skip_on_cran()
 
-  sc <- sparkR.sparkContext()
+  sc <- sparkR.sparkContext(master = sparkRTestMaster)
   setJobGroup("groupId", "job description", TRUE)
   cancelJobGroup("groupId")
   clearJobGroup()
@@ -118,7 +118,7 @@ test_that("job group functions can be called", {
 test_that("utility function can be called", {
   skip_on_cran()
 
-  sparkR.sparkContext()
+  sparkR.sparkContext(master = sparkRTestMaster)
   setLogLevel("ERROR")
   sparkR.session.stop()
 })
@@ -175,7 +175,7 @@ test_that("sparkJars sparkPackages as comma-separated strings", {
 })
 
 test_that("spark.lapply should perform simple transforms", {
-  sparkR.sparkContext()
+  sparkR.sparkContext(master = sparkRTestMaster)
   doubled <- spark.lapply(1:10, function(x) { 2 * x })
   expect_equal(doubled, as.list(2 * 1:10))
   sparkR.session.stop()
@@ -184,7 +184,7 @@ test_that("spark.lapply should perform simple transforms", {
 test_that("add and get file to be downloaded with Spark job on every node", {
   skip_on_cran()
 
-  sparkR.sparkContext()
+  sparkR.sparkContext(master = sparkRTestMaster)
   # Test add file.
   path <- tempfile(pattern = "hello", fileext = ".txt")
   filename <- basename(path)
diff --git a/R/pkg/inst/tests/testthat/test_includePackage.R b/R/pkg/inst/tests/testthat/test_includePackage.R
index f823ad8e9c985..d7d9eeed1575e 100644
--- a/R/pkg/inst/tests/testthat/test_includePackage.R
+++ b/R/pkg/inst/tests/testthat/test_includePackage.R
@@ -18,7 +18,7 @@
 context("include R packages")
 
 # JavaSparkContext handle
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
 sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
 
 # Partitioned data
diff --git a/R/pkg/inst/tests/testthat/test_jvm_api.R b/R/pkg/inst/tests/testthat/test_jvm_api.R
index 7348c893d0af3..8b3b4f73de170 100644
--- a/R/pkg/inst/tests/testthat/test_jvm_api.R
+++ b/R/pkg/inst/tests/testthat/test_jvm_api.R
@@ -17,7 +17,7 @@
 
 context("JVM API")
 
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
 
 test_that("Create and call methods on object", {
   jarr <- sparkR.newJObject("java.util.ArrayList")
diff --git a/R/pkg/inst/tests/testthat/test_mllib_classification.R b/R/pkg/inst/tests/testthat/test_mllib_classification.R
index cbc7087182868..f3eaeb381afc4 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_classification.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_classification.R
@@ -20,7 +20,7 @@ library(testthat)
 context("MLlib classification algorithms, except for tree-based algorithms")
 
 # Tests for MLlib classification algorithms in SparkR
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
 
 absoluteSparkPath <- function(x) {
   sparkHome <- sparkR.conf("spark.home")
diff --git a/R/pkg/inst/tests/testthat/test_mllib_clustering.R b/R/pkg/inst/tests/testthat/test_mllib_clustering.R
index 478012e8828cd..df8e5968b27f4 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_clustering.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_clustering.R
@@ -20,7 +20,7 @@ library(testthat)
 context("MLlib clustering algorithms")
 
 # Tests for MLlib clustering algorithms in SparkR
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
 
 absoluteSparkPath <- function(x) {
   sparkHome <- sparkR.conf("spark.home")
diff --git a/R/pkg/inst/tests/testthat/test_mllib_fpm.R b/R/pkg/inst/tests/testthat/test_mllib_fpm.R
index c38f1133897dd..1fa5375f9da31 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_fpm.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_fpm.R
@@ -20,7 +20,7 @@ library(testthat)
 context("MLlib frequent pattern mining")
 
 # Tests for MLlib frequent pattern mining algorithms in SparkR
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
 
 test_that("spark.fpGrowth", {
   data <- selectExpr(createDataFrame(data.frame(items = c(
diff --git a/R/pkg/inst/tests/testthat/test_mllib_recommendation.R b/R/pkg/inst/tests/testthat/test_mllib_recommendation.R
index 6b1040db93050..e3e2b15c71361 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_recommendation.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_recommendation.R
@@ -20,7 +20,7 @@ library(testthat)
 context("MLlib recommendation algorithms")
 
 # Tests for MLlib recommendation algorithms in SparkR
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
 
 test_that("spark.als", {
   data <- list(list(0, 0, 4.0), list(0, 1, 2.0), list(1, 1, 3.0), list(1, 2, 4.0),
diff --git a/R/pkg/inst/tests/testthat/test_mllib_regression.R b/R/pkg/inst/tests/testthat/test_mllib_regression.R
index 58924f952c6bf..44c98be906d81 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_regression.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_regression.R
@@ -20,7 +20,7 @@ library(testthat)
 context("MLlib regression algorithms, except for tree-based algorithms")
 
 # Tests for MLlib regression algorithms in SparkR
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
 
 test_that("formula of spark.glm", {
   skip_on_cran()
diff --git a/R/pkg/inst/tests/testthat/test_mllib_stat.R b/R/pkg/inst/tests/testthat/test_mllib_stat.R
index beb148e7702fd..1600833a5d03a 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_stat.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_stat.R
@@ -20,7 +20,7 @@ library(testthat)
 context("MLlib statistics algorithms")
 
 # Tests for MLlib statistics algorithms in SparkR
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
 
 test_that("spark.kstest", {
   data <- data.frame(test = c(0.1, 0.15, 0.2, 0.3, 0.25, -1, -0.5))
diff --git a/R/pkg/inst/tests/testthat/test_mllib_tree.R b/R/pkg/inst/tests/testthat/test_mllib_tree.R
index e0802a9b02d13..146bc2878e263 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_tree.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_tree.R
@@ -20,7 +20,7 @@ library(testthat)
 context("MLlib tree-based algorithms")
 
 # Tests for MLlib tree-based algorithms in SparkR
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
 
 absoluteSparkPath <- function(x) {
   sparkHome <- sparkR.conf("spark.home")
diff --git a/R/pkg/inst/tests/testthat/test_parallelize_collect.R b/R/pkg/inst/tests/testthat/test_parallelize_collect.R
index 1f7f387de08ce..52d4c93ed9599 100644
--- a/R/pkg/inst/tests/testthat/test_parallelize_collect.R
+++ b/R/pkg/inst/tests/testthat/test_parallelize_collect.R
@@ -33,7 +33,7 @@ numPairs <- list(list(1, 1), list(1, 2), list(2, 2), list(2, 3))
 strPairs <- list(list(strList, strList), list(strList, strList))
 
 # JavaSparkContext handle
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
 jsc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
 
 # Tests
diff --git a/R/pkg/inst/tests/testthat/test_rdd.R b/R/pkg/inst/tests/testthat/test_rdd.R
index a3b1631e1d119..fb244e1d49e20 100644
--- a/R/pkg/inst/tests/testthat/test_rdd.R
+++ b/R/pkg/inst/tests/testthat/test_rdd.R
@@ -18,7 +18,7 @@
 context("basic RDD functions")
 
 # JavaSparkContext handle
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
 sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
 
 # Data
diff --git a/R/pkg/inst/tests/testthat/test_shuffle.R b/R/pkg/inst/tests/testthat/test_shuffle.R
index cedf4f100c6c4..18320ea44b389 100644
--- a/R/pkg/inst/tests/testthat/test_shuffle.R
+++ b/R/pkg/inst/tests/testthat/test_shuffle.R
@@ -18,7 +18,7 @@
 context("partitionBy, groupByKey, reduceByKey etc.")
 
 # JavaSparkContext handle
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
 sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
 
 # Data
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index ae2969f493a75..4168ab879c20d 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -61,7 +61,7 @@ unsetHiveContext <- function() {
 # Tests for SparkSQL functions in SparkR
 
 filesBefore <- list.files(path = sparkRDir, all.files = TRUE)
-sparkSession <- sparkR.session()
+sparkSession <- sparkR.session(master = sparkRTestMaster)
 sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
 
 mockLines <- c("{\"name\":\"Michael\"}",
diff --git a/R/pkg/inst/tests/testthat/test_streaming.R b/R/pkg/inst/tests/testthat/test_streaming.R
index 91df7ac6f9849..b20b4312fbaae 100644
--- a/R/pkg/inst/tests/testthat/test_streaming.R
+++ b/R/pkg/inst/tests/testthat/test_streaming.R
@@ -21,7 +21,7 @@ context("Structured Streaming")
 
 # Tests for Structured Streaming functions in SparkR
 
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
 
 jsonSubDir <- file.path("sparkr-test", "json", "")
 if (.Platform$OS.type == "windows") {
diff --git a/R/pkg/inst/tests/testthat/test_take.R b/R/pkg/inst/tests/testthat/test_take.R
index e2130eaac78dd..c00723ba31f4c 100644
--- a/R/pkg/inst/tests/testthat/test_take.R
+++ b/R/pkg/inst/tests/testthat/test_take.R
@@ -30,7 +30,7 @@ strList <- list("Dexter Morgan: Blood. Sometimes it sets my teeth on edge, ",
                 "raising me. But they're both dead now. I didn't kill them. Honest.")
 
 # JavaSparkContext handle
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
 sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
 
 test_that("take() gives back the original elements in correct count and order", {
diff --git a/R/pkg/inst/tests/testthat/test_textFile.R b/R/pkg/inst/tests/testthat/test_textFile.R
index 28b7e8e3183fd..e8a961cb3e870 100644
--- a/R/pkg/inst/tests/testthat/test_textFile.R
+++ b/R/pkg/inst/tests/testthat/test_textFile.R
@@ -18,7 +18,7 @@
 context("the textFile() function")
 
 # JavaSparkContext handle
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
 sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
 
 mockFile <- c("Spark is pretty.", "Spark is awesome.")
diff --git a/R/pkg/inst/tests/testthat/test_utils.R b/R/pkg/inst/tests/testthat/test_utils.R
index bda479214e9cd..99a0e66790590 100644
--- a/R/pkg/inst/tests/testthat/test_utils.R
+++ b/R/pkg/inst/tests/testthat/test_utils.R
@@ -18,7 +18,7 @@
 context("functions in utils.R")
 
 # JavaSparkContext handle
-sparkSession <- sparkR.session(enableHiveSupport = FALSE)
+sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
 sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
 
 test_that("convertJListToRList() gives back (deserializes) the original JLists
diff --git a/R/pkg/tests/run-all.R b/R/pkg/tests/run-all.R
index 29812f872c784..9c6cba535d118 100644
--- a/R/pkg/tests/run-all.R
+++ b/R/pkg/tests/run-all.R
@@ -31,4 +31,9 @@ sparkRWhitelistSQLDirs <- c("spark-warehouse", "metastore_db")
 invisible(lapply(sparkRWhitelistSQLDirs,
                  function(x) { unlink(file.path(sparkRDir, x), recursive = TRUE, force = TRUE)}))
 
+sparkRTestMaster <- "local[1]"
+if (identical(Sys.getenv("NOT_CRAN"), "true")) {
+  sparkRTestMaster <- ""
+}
+
 test_package("SparkR")
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index 0f6d5c21b68af..031c3bc41f7cd 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -46,8 +46,9 @@ We use default settings in which it runs in local mode. It auto downloads Spark
 
 ```{r, include=FALSE}
 install.spark()
+sparkR.session(master = "local[1]")
 ```
-```{r, message=FALSE, results="hide"}
+```{r, eval=FALSE}
 sparkR.session()
 ```
 

From 21764f8d1ccb903c89a15cb9b7ee4703ae055e14 Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Fri, 12 May 2017 09:55:04 +0100
Subject: [PATCH 0832/1204] [SPARK-20554][BUILD] Remove usage of
 scala.language.reflectiveCalls

## What changes were proposed in this pull request?

Remove uses of scala.language.reflectiveCalls that are either unnecessary or probably resulting in more complex code. This turned out to be less significant than I thought, but, still worth a touch-up.

## How was this patch tested?

Existing tests.

Author: Sean Owen <sowen@cloudera.com>

Closes #17949 from srowen/SPARK-20554.

(cherry picked from commit fc8a2b6ee6f1041345f9ce9701fac496c3c3b1e6)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../spark/storage/DiskBlockManagerSuite.scala |  2 --
 .../util/random/XORShiftRandomSuite.scala     | 16 +++-------
 .../examples/ml/DecisionTreeExample.scala     |  1 -
 .../apache/spark/examples/ml/GBTExample.scala |  1 -
 .../examples/ml/LinearRegressionExample.scala |  2 --
 .../ml/LogisticRegressionExample.scala        |  1 -
 .../examples/ml/RandomForestExample.scala     |  1 -
 .../sql/execution/QueryExecutionSuite.scala   | 31 +++++++++----------
 .../sql/hive/client/HiveClientImpl.scala      |  1 -
 .../hive/client/IsolatedClientLoader.scala    |  1 -
 .../receiver/BlockGeneratorSuite.scala        | 14 ++++-----
 11 files changed, 25 insertions(+), 46 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
index bbfd6df3b6990..7859b0bba2b48 100644
--- a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
@@ -19,8 +19,6 @@ package org.apache.spark.storage
 
 import java.io.{File, FileWriter}
 
-import scala.language.reflectiveCalls
-
 import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach}
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
diff --git a/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala b/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala
index 83eba3690e289..df3483830ca9c 100644
--- a/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.util.random
 
-import scala.language.reflectiveCalls
-
 import org.apache.commons.math3.stat.inference.ChiSquareTest
 import org.scalatest.Matchers
 
@@ -27,26 +25,22 @@ import org.apache.spark.util.Utils.times
 
 class XORShiftRandomSuite extends SparkFunSuite with Matchers {
 
-  private def fixture = new {
-    val seed = 1L
-    val xorRand = new XORShiftRandom(seed)
-    val hundMil = 1e8.toInt
-  }
-
   /*
    * This test is based on a chi-squared test for randomness.
    */
   test ("XORShift generates valid random numbers") {
 
-    val f = fixture
+    val xorRand = new XORShiftRandom(1L)
 
     val numBins = 10 // create 10 bins
     val numRows = 5 // create 5 rows
     val bins = Array.ofDim[Long](numRows, numBins)
 
     // populate bins based on modulus of the random number for each row
-    for (r <- 0 to numRows-1) {
-      times(f.hundMil) {bins(r)(math.abs(f.xorRand.nextInt) % numBins) += 1}
+    for (r <- 0 until numRows) {
+      times(100000000) {
+        bins(r)(math.abs(xorRand.nextInt) % numBins) += 1
+      }
     }
 
     /*
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala
index f736ceed4436f..b03701e4915d0 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala
@@ -21,7 +21,6 @@ package org.apache.spark.examples.ml
 import java.util.Locale
 
 import scala.collection.mutable
-import scala.language.reflectiveCalls
 
 import scopt.OptionParser
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala
index ed598d0d7dfae..3bd8ff54c2238 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala
@@ -21,7 +21,6 @@ package org.apache.spark.examples.ml
 import java.util.Locale
 
 import scala.collection.mutable
-import scala.language.reflectiveCalls
 
 import scopt.OptionParser
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionExample.scala
index 31ba18033519a..6903a1c298ced 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionExample.scala
@@ -18,8 +18,6 @@
 // scalastyle:off println
 package org.apache.spark.examples.ml
 
-import scala.language.reflectiveCalls
-
 import scopt.OptionParser
 
 import org.apache.spark.examples.mllib.AbstractParams
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionExample.scala
index c67b53899ce4a..bd6cc8cff2348 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionExample.scala
@@ -19,7 +19,6 @@
 package org.apache.spark.examples.ml
 
 import scala.collection.mutable
-import scala.language.reflectiveCalls
 
 import scopt.OptionParser
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala
index 8fd46c37e2987..a735c218c0d26 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala
@@ -21,7 +21,6 @@ package org.apache.spark.examples.ml
 import java.util.Locale
 
 import scala.collection.mutable
-import scala.language.reflectiveCalls
 
 import scopt.OptionParser
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
index 05637821f71f1..afccbe5cc6d19 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
@@ -16,39 +16,36 @@
  */
 package org.apache.spark.sql.execution
 
-import java.util.Locale
-
-import scala.language.reflectiveCalls
-
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation}
 import org.apache.spark.sql.test.SharedSQLContext
 
 class QueryExecutionSuite extends SharedSQLContext {
   test("toString() exception/error handling") {
-    val badRule = new SparkStrategy {
-      var mode: String = ""
-      override def apply(plan: LogicalPlan): Seq[SparkPlan] =
-        mode.toLowerCase(Locale.ROOT) match {
-          case "exception" => throw new AnalysisException(mode)
-          case "error" => throw new Error(mode)
-          case _ => Nil
-        }
-    }
-    spark.experimental.extraStrategies = badRule :: Nil
+    spark.experimental.extraStrategies = Seq(
+        new SparkStrategy {
+          override def apply(plan: LogicalPlan): Seq[SparkPlan] = Nil
+        })
 
     def qe: QueryExecution = new QueryExecution(spark, OneRowRelation)
 
     // Nothing!
-    badRule.mode = ""
     assert(qe.toString.contains("OneRowRelation"))
 
     // Throw an AnalysisException - this should be captured.
-    badRule.mode = "exception"
+    spark.experimental.extraStrategies = Seq(
+      new SparkStrategy {
+        override def apply(plan: LogicalPlan): Seq[SparkPlan] =
+          throw new AnalysisException("exception")
+      })
     assert(qe.toString.contains("org.apache.spark.sql.AnalysisException"))
 
     // Throw an Error - this should not be captured.
-    badRule.mode = "error"
+    spark.experimental.extraStrategies = Seq(
+      new SparkStrategy {
+        override def apply(plan: LogicalPlan): Seq[SparkPlan] =
+          throw new Error("error")
+      })
     val error = intercept[Error](qe.toString)
     assert(error.getMessage.contains("error"))
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 387ec4f967233..74e15a5777916 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -22,7 +22,6 @@ import java.util.Locale
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
-import scala.language.reflectiveCalls
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index e95f9ea480431..b8aa067cdb903 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -22,7 +22,6 @@ import java.lang.reflect.InvocationTargetException
 import java.net.{URL, URLClassLoader}
 import java.util
 
-import scala.language.reflectiveCalls
 import scala.util.Try
 
 import org.apache.commons.io.{FileUtils, IOUtils}
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala
index b70383ecde4d8..4f41b9d0a0b3c 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala
@@ -21,7 +21,6 @@ import java.util.concurrent.ConcurrentLinkedQueue
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
-import scala.language.reflectiveCalls
 
 import org.scalatest.BeforeAndAfter
 import org.scalatest.Matchers._
@@ -202,21 +201,17 @@ class BlockGeneratorSuite extends SparkFunSuite with BeforeAndAfter {
 
   test("block push errors are reported") {
     val listener = new TestBlockGeneratorListener {
-      @volatile var errorReported = false
       override def onPushBlock(
           blockId: StreamBlockId, arrayBuffer: mutable.ArrayBuffer[_]): Unit = {
         throw new SparkException("test")
       }
-      override def onError(message: String, throwable: Throwable): Unit = {
-        errorReported = true
-      }
     }
     blockGenerator = new BlockGenerator(listener, 0, conf)
     blockGenerator.start()
-    assert(listener.errorReported === false)
+    assert(listener.onErrorCalled === false)
     blockGenerator.addData(1)
     eventually(timeout(1 second), interval(10 milliseconds)) {
-      assert(listener.errorReported === true)
+      assert(listener.onErrorCalled === true)
     }
     blockGenerator.stop()
   }
@@ -243,12 +238,15 @@ class BlockGeneratorSuite extends SparkFunSuite with BeforeAndAfter {
     @volatile var onGenerateBlockCalled = false
     @volatile var onAddDataCalled = false
     @volatile var onPushBlockCalled = false
+    @volatile var onErrorCalled = false
 
     override def onPushBlock(blockId: StreamBlockId, arrayBuffer: mutable.ArrayBuffer[_]): Unit = {
       pushedData.addAll(arrayBuffer.asJava)
       onPushBlockCalled = true
     }
-    override def onError(message: String, throwable: Throwable): Unit = {}
+    override def onError(message: String, throwable: Throwable): Unit = {
+      onErrorCalled = true
+    }
     override def onGenerateBlock(blockId: StreamBlockId): Unit = {
       onGenerateBlockCalled = true
     }

From 10b724436a9acabe4b97f4a15c87eb1bdf6d24ac Mon Sep 17 00:00:00 2001
From: Ryan Blue <blue@apache.org>
Date: Fri, 12 May 2017 20:38:36 +0800
Subject: [PATCH 0833/1204] [SPARK-17424] Fix unsound substitution bug in
 ScalaReflection.

## What changes were proposed in this pull request?

This method gets a type's primary constructor and fills in type parameters with concrete types. For example, `MapPartitions[T, U] -> MapPartitions[Int, String]`. This Substitution fails when the actual type args are empty because they are still unknown. Instead, when there are no resolved types to subsitute, this returns the original args with unresolved type parameters.
## How was this patch tested?

This doesn't affect substitutions where the type args are determined. This fixes our case where the actual type args are empty and our job runs successfully.

Author: Ryan Blue <blue@apache.org>

Closes #15062 from rdblue/SPARK-17424-fix-unsound-reflect-substitution.

(cherry picked from commit b23693390781a99ff9248ea07a22e68884ffc747)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/catalyst/ScalaReflection.scala  | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index 82710a2a183ab..6d1d019cc4743 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -836,8 +836,16 @@ trait ScalaReflection {
   def getConstructorParameters(tpe: Type): Seq[(String, Type)] = {
     val formalTypeArgs = tpe.typeSymbol.asClass.typeParams
     val TypeRef(_, _, actualTypeArgs) = tpe
-    constructParams(tpe).map { p =>
-      p.name.toString -> p.typeSignature.substituteTypes(formalTypeArgs, actualTypeArgs)
+    val params = constructParams(tpe)
+    // if there are type variables to fill in, do the substitution (SomeClass[T] -> SomeClass[Int])
+    if (actualTypeArgs.nonEmpty) {
+      params.map { p =>
+        p.name.toString -> p.typeSignature.substituteTypes(formalTypeArgs, actualTypeArgs)
+      }
+    } else {
+      params.map { p =>
+        p.name.toString -> p.typeSignature
+      }
     }
   }
 

From 95de4672ea18e8cc0e14ee574386d252948e003d Mon Sep 17 00:00:00 2001
From: Ryan Blue <blue@apache.org>
Date: Fri, 12 May 2017 20:38:36 +0800
Subject: [PATCH 0834/1204] [SPARK-17424] Fix unsound substitution bug in
 ScalaReflection.

## What changes were proposed in this pull request?

This method gets a type's primary constructor and fills in type parameters with concrete types. For example, `MapPartitions[T, U] -> MapPartitions[Int, String]`. This Substitution fails when the actual type args are empty because they are still unknown. Instead, when there are no resolved types to subsitute, this returns the original args with unresolved type parameters.
## How was this patch tested?

This doesn't affect substitutions where the type args are determined. This fixes our case where the actual type args are empty and our job runs successfully.

Author: Ryan Blue <blue@apache.org>

Closes #15062 from rdblue/SPARK-17424-fix-unsound-reflect-substitution.

(cherry picked from commit b23693390781a99ff9248ea07a22e68884ffc747)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/catalyst/ScalaReflection.scala  | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index fa1b900592b67..1a1daaeba4677 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -814,8 +814,16 @@ trait ScalaReflection {
   def getConstructorParameters(tpe: Type): Seq[(String, Type)] = {
     val formalTypeArgs = tpe.typeSymbol.asClass.typeParams
     val TypeRef(_, _, actualTypeArgs) = tpe
-    constructParams(tpe).map { p =>
-      p.name.toString -> p.typeSignature.substituteTypes(formalTypeArgs, actualTypeArgs)
+    val params = constructParams(tpe)
+    // if there are type variables to fill in, do the substitution (SomeClass[T] -> SomeClass[Int])
+    if (actualTypeArgs.nonEmpty) {
+      params.map { p =>
+        p.name.toString -> p.typeSignature.substituteTypes(formalTypeArgs, actualTypeArgs)
+      }
+    } else {
+      params.map { p =>
+        p.name.toString -> p.typeSignature
+      }
     }
   }
 

From 18725f566f70de16b2d935f5c99709cb7de8967b Mon Sep 17 00:00:00 2001
From: wangzhenhua <wangzhenhua@huawei.com>
Date: Fri, 12 May 2017 20:43:22 +0800
Subject: [PATCH 0835/1204] [SPARK-20718][SQL][FOLLOWUP] Fix canonicalization
 for HiveTableScanExec

## What changes were proposed in this pull request?

Fix canonicalization for different filter orders in `HiveTableScanExec`.

## How was this patch tested?

Added a new test case.

Author: wangzhenhua <wangzhenhua@huawei.com>

Closes #17962 from wzhfy/canonicalizeHiveTableScanExec.

(cherry picked from commit 54b4f2ad43c0ad333a3751a7f10da711b94677a0)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/plans/QueryPlan.scala  | 15 +++++++++-
 .../sql/execution/DataSourceScanExec.scala    | 16 ++---------
 .../hive/execution/HiveTableScanExec.scala    |  2 +-
 .../hive/execution/HiveTableScanSuite.scala   | 28 ++++++++++++++-----
 4 files changed, 39 insertions(+), 22 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index 2fb65bd435507..51faa333307b3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -423,7 +423,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
   lazy val allAttributes: AttributeSeq = children.flatMap(_.output)
 }
 
-object QueryPlan {
+object QueryPlan extends PredicateHelper {
   /**
    * Normalize the exprIds in the given expression, by updating the exprId in `AttributeReference`
    * with its referenced ordinal from input attributes. It's similar to `BindReferences` but we
@@ -442,4 +442,17 @@ object QueryPlan {
         }
     }.canonicalized.asInstanceOf[T]
   }
+
+  /**
+   * Composes the given predicates into a conjunctive predicate, which is normalized and reordered.
+   * Then returns a new sequence of predicates by splitting the conjunctive predicate.
+   */
+  def normalizePredicates(predicates: Seq[Expression], output: AttributeSeq): Seq[Expression] = {
+    if (predicates.nonEmpty) {
+      val normalized = normalizeExprId(predicates.reduce(And), output)
+      splitConjunctivePredicates(normalized)
+    } else {
+      Nil
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index 251098c9a884c..74fc23a52a141 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -38,7 +38,7 @@ import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.Utils
 
-trait DataSourceScanExec extends LeafExecNode with CodegenSupport with PredicateHelper {
+trait DataSourceScanExec extends LeafExecNode with CodegenSupport {
   val relation: BaseRelation
   val metastoreTableIdentifier: Option[TableIdentifier]
 
@@ -519,18 +519,8 @@ case class FileSourceScanExec(
       relation,
       output.map(QueryPlan.normalizeExprId(_, output)),
       requiredSchema,
-      canonicalizeFilters(partitionFilters, output),
-      canonicalizeFilters(dataFilters, output),
+      QueryPlan.normalizePredicates(partitionFilters, output),
+      QueryPlan.normalizePredicates(dataFilters, output),
       None)
   }
-
-  private def canonicalizeFilters(filters: Seq[Expression], output: Seq[Attribute])
-    : Seq[Expression] = {
-    if (filters.nonEmpty) {
-      val normalizedFilters = QueryPlan.normalizeExprId(filters.reduce(And), output)
-      splitConjunctivePredicates(normalizedFilters)
-    } else {
-      Nil
-    }
-  }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala
index 666548d1a490b..e191071efbf18 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala
@@ -206,7 +206,7 @@ case class HiveTableScanExec(
     HiveTableScanExec(
       requestedAttributes.map(QueryPlan.normalizeExprId(_, input)),
       relation.canonicalized.asInstanceOf[CatalogRelation],
-      partitionPruningPred.map(QueryPlan.normalizeExprId(_, input)))(sparkSession)
+      QueryPlan.normalizePredicates(partitionPruningPred, input))(sparkSession)
   }
 
   override def otherCopyArgs: Seq[AnyRef] = Seq(sparkSession)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
index 90e037e292790..ae64cb3210b53 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
@@ -164,16 +164,30 @@ class HiveTableScanSuite extends HiveComparisonTest with SQLTestUtils with TestH
              |PARTITION (p1='a',p2='c',p3='c',p4='d',p5='e')
              |SELECT v.id
            """.stripMargin)
-        val plan = sql(
-          s"""
-             |SELECT * FROM $table
-           """.stripMargin).queryExecution.sparkPlan
-        val scan = plan.collectFirst {
-          case p: HiveTableScanExec => p
-        }.get
+        val scan = getHiveTableScanExec(s"SELECT * FROM $table")
         val numDataCols = scan.relation.dataCols.length
         scan.rawPartitions.foreach(p => assert(p.getCols.size == numDataCols))
       }
     }
   }
+
+  test("HiveTableScanExec canonicalization for different orders of partition filters") {
+    val table = "hive_tbl_part"
+    withTable(table) {
+      sql(
+        s"""
+           |CREATE TABLE $table (id int)
+           |PARTITIONED BY (a int, b int)
+         """.stripMargin)
+      val scan1 = getHiveTableScanExec(s"SELECT * FROM $table WHERE a = 1 AND b = 2")
+      val scan2 = getHiveTableScanExec(s"SELECT * FROM $table WHERE b = 2 AND a = 1")
+      assert(scan1.sameResult(scan2))
+    }
+  }
+
+  private def getHiveTableScanExec(query: String): HiveTableScanExec = {
+    sql(query).queryExecution.sparkPlan.collectFirst {
+      case p: HiveTableScanExec => p
+    }.get
+  }
 }

From eed3a5aa1d8c43106654512aaeb3f8ea81dc9943 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Fri, 12 May 2017 20:48:30 +0800
Subject: [PATCH 0836/1204] [SPARK-20710][SQL] Support aliases in
 CUBE/ROLLUP/GROUPING SETS

## What changes were proposed in this pull request?
This pr added  `Analyzer` code for supporting aliases in CUBE/ROLLUP/GROUPING SETS (This is follow-up of #17191).

## How was this patch tested?
Added tests in `SQLQueryTestSuite`.

Author: Takeshi Yamamuro <yamamuro@apache.org>

Closes #17948 from maropu/SPARK-20710.

(cherry picked from commit 92ea7fd7b6cd4641b2f02b97105835029ddadc5f)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 30 +++++++++---
 .../plans/logical/basicLogicalOperators.scala |  2 +-
 .../sql-tests/inputs/group-analytics.sql      |  7 ++-
 .../sql-tests/results/group-analytics.sql.out | 49 ++++++++++++++++++-
 4 files changed, 77 insertions(+), 11 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 919f9202fdba7..5be67acf3d120 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1003,18 +1003,32 @@ class Analyzer(
    */
   object ResolveAggAliasInGroupBy extends Rule[LogicalPlan] {
 
+    // This is a strict check though, we put this to apply the rule only if the expression is not
+    // resolvable by child.
+    private def notResolvableByChild(attrName: String, child: LogicalPlan): Boolean = {
+      !child.output.exists(a => resolver(a.name, attrName))
+    }
+
+    private def mayResolveAttrByAggregateExprs(
+        exprs: Seq[Expression], aggs: Seq[NamedExpression], child: LogicalPlan): Seq[Expression] = {
+      exprs.map { _.transform {
+        case u: UnresolvedAttribute if notResolvableByChild(u.name, child) =>
+          aggs.find(ne => resolver(ne.name, u.name)).getOrElse(u)
+      }}
+    }
+
     override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
       case agg @ Aggregate(groups, aggs, child)
+          if conf.groupByAliases && child.resolved && aggs.forall(_.resolved) &&
+            groups.exists(!_.resolved) =>
+        agg.copy(groupingExpressions = mayResolveAttrByAggregateExprs(groups, aggs, child))
+
+      case gs @ GroupingSets(selectedGroups, groups, child, aggs)
           if conf.groupByAliases && child.resolved && aggs.forall(_.resolved) &&
             groups.exists(_.isInstanceOf[UnresolvedAttribute]) =>
-        // This is a strict check though, we put this to apply the rule only in alias expressions
-        def notResolvableByChild(attrName: String): Boolean =
-          !child.output.exists(a => resolver(a.name, attrName))
-        agg.copy(groupingExpressions = groups.map {
-          case u: UnresolvedAttribute if notResolvableByChild(u.name) =>
-            aggs.find(ne => resolver(ne.name, u.name)).getOrElse(u)
-          case e => e
-        })
+        gs.copy(
+          selectedGroupByExprs = selectedGroups.map(mayResolveAttrByAggregateExprs(_, aggs, child)),
+          groupByExprs = mayResolveAttrByAggregateExprs(groups, aggs, child))
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index f663d7b8a8f7b..2c19265bedc5d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -704,7 +704,7 @@ case class Expand(
  * We will transform GROUPING SETS into logical plan Aggregate(.., Expand) in Analyzer
  *
  * @param selectedGroupByExprs A sequence of selected GroupBy expressions, all exprs should
- *                     exists in groupByExprs.
+ *                     exist in groupByExprs.
  * @param groupByExprs The Group By expressions candidates.
  * @param child        Child operator
  * @param aggregations The Aggregation expressions, those non selected group by expressions
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-analytics.sql b/sql/core/src/test/resources/sql-tests/inputs/group-analytics.sql
index f8135389a9e5a..8aff4cb524199 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-analytics.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-analytics.sql
@@ -54,4 +54,9 @@ SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY CUBE(co
 ORDER BY GROUPING(course), GROUPING(year), course, year;
 SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING(course);
 SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING_ID(course);
-SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id;
\ No newline at end of file
+SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id;
+
+-- Aliases in SELECT could be used in ROLLUP/CUBE/GROUPING SETS
+SELECT a + b AS k1, b AS k2, SUM(a - b) FROM testData GROUP BY CUBE(k1, k2);
+SELECT a + b AS k, b, SUM(a - b) FROM testData GROUP BY ROLLUP(k, b);
+SELECT a + b, b AS k, SUM(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k)
diff --git a/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out
index 825e8f5488c8b..ce7a16a4d0c81 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 26
+-- Number of queries: 29
 
 
 -- !query 0
@@ -328,3 +328,50 @@ struct<>
 -- !query 25 output
 org.apache.spark.sql.AnalysisException
 grouping__id is deprecated; use grouping_id() instead;
+
+
+-- !query 26
+SELECT a + b AS k1, b AS k2, SUM(a - b) FROM testData GROUP BY CUBE(k1, k2)
+-- !query 26 schema
+struct<k1:int,k2:int,sum((a - b)):bigint>
+-- !query 26 output
+2	1	0
+2	NULL	0
+3	1	1
+3	2	-1
+3	NULL	0
+4	1	2
+4	2	0
+4	NULL	2
+5	2	1
+5	NULL	1
+NULL	1	3
+NULL	2	0
+NULL	NULL	3
+
+
+-- !query 27
+SELECT a + b AS k, b, SUM(a - b) FROM testData GROUP BY ROLLUP(k, b)
+-- !query 27 schema
+struct<k:int,b:int,sum((a - b)):bigint>
+-- !query 27 output
+2	1	0
+2	NULL	0
+3	1	1
+3	2	-1
+3	NULL	0
+4	1	2
+4	2	0
+4	NULL	2
+5	2	1
+5	NULL	1
+NULL	NULL	3
+
+
+-- !query 28
+SELECT a + b, b AS k, SUM(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k)
+-- !query 28 schema
+struct<(a + b):int,k:int,sum((a - b)):bigint>
+-- !query 28 output
+NULL	1	3
+NULL	2	0

From 7123ec8e144746d9333d4fd0f59fccda2d59c508 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Fri, 12 May 2017 10:46:44 -0700
Subject: [PATCH 0837/1204] [SPARK-20702][CORE]
 TaskContextImpl.markTaskCompleted should not hide the original error

## What changes were proposed in this pull request?

This PR adds an `error` parameter to `TaskContextImpl.markTaskCompleted` to propagate the original error.

It also fixes an issue that `TaskCompletionListenerException.getMessage` doesn't include `previousError`.

## How was this patch tested?

New unit tests.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #17942 from zsxwing/SPARK-20702.

(cherry picked from commit 7d6ff39106938fa4bbb68b3d5114b93a4d332c5c)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../org/apache/spark/TaskContextImpl.scala    |  4 +-
 .../org/apache/spark/scheduler/Task.scala     | 39 +++++++++++--------
 .../org/apache/spark/util/taskListeners.scala | 14 ++++---
 .../spark/scheduler/TaskContextSuite.scala    | 36 +++++++++++++++--
 .../PartiallySerializedBlockSuite.scala       |  2 +-
 .../ShuffleBlockFetcherIteratorSuite.scala    |  2 +-
 6 files changed, 68 insertions(+), 29 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/TaskContextImpl.scala b/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
index 8cd1d1c96aa0a..01d8973e1bb06 100644
--- a/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
+++ b/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
@@ -110,10 +110,10 @@ private[spark] class TaskContextImpl(
 
   /** Marks the task as completed and triggers the completion listeners. */
   @GuardedBy("this")
-  private[spark] def markTaskCompleted(): Unit = synchronized {
+  private[spark] def markTaskCompleted(error: Option[Throwable]): Unit = synchronized {
     if (completed) return
     completed = true
-    invokeListeners(onCompleteCallbacks, "TaskCompletionListener", None) {
+    invokeListeners(onCompleteCallbacks, "TaskCompletionListener", error) {
       _.onTaskCompletion(this)
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Task.scala b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
index 5c337b992c840..7767ef1803a06 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Task.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
@@ -115,26 +115,33 @@ private[spark] abstract class Task[T](
           case t: Throwable =>
             e.addSuppressed(t)
         }
+        context.markTaskCompleted(Some(e))
         throw e
     } finally {
-      // Call the task completion callbacks.
-      context.markTaskCompleted()
       try {
-        Utils.tryLogNonFatalError {
-          // Release memory used by this thread for unrolling blocks
-          SparkEnv.get.blockManager.memoryStore.releaseUnrollMemoryForThisTask(MemoryMode.ON_HEAP)
-          SparkEnv.get.blockManager.memoryStore.releaseUnrollMemoryForThisTask(MemoryMode.OFF_HEAP)
-          // Notify any tasks waiting for execution memory to be freed to wake up and try to
-          // acquire memory again. This makes impossible the scenario where a task sleeps forever
-          // because there are no other tasks left to notify it. Since this is safe to do but may
-          // not be strictly necessary, we should revisit whether we can remove this in the future.
-          val memoryManager = SparkEnv.get.memoryManager
-          memoryManager.synchronized { memoryManager.notifyAll() }
-        }
+        // Call the task completion callbacks. If "markTaskCompleted" is called twice, the second
+        // one is no-op.
+        context.markTaskCompleted(None)
       } finally {
-        // Though we unset the ThreadLocal here, the context member variable itself is still queried
-        // directly in the TaskRunner to check for FetchFailedExceptions.
-        TaskContext.unset()
+        try {
+          Utils.tryLogNonFatalError {
+            // Release memory used by this thread for unrolling blocks
+            SparkEnv.get.blockManager.memoryStore.releaseUnrollMemoryForThisTask(MemoryMode.ON_HEAP)
+            SparkEnv.get.blockManager.memoryStore.releaseUnrollMemoryForThisTask(
+              MemoryMode.OFF_HEAP)
+            // Notify any tasks waiting for execution memory to be freed to wake up and try to
+            // acquire memory again. This makes impossible the scenario where a task sleeps forever
+            // because there are no other tasks left to notify it. Since this is safe to do but may
+            // not be strictly necessary, we should revisit whether we can remove this in the
+            // future.
+            val memoryManager = SparkEnv.get.memoryManager
+            memoryManager.synchronized { memoryManager.notifyAll() }
+          }
+        } finally {
+          // Though we unset the ThreadLocal here, the context member variable itself is still
+          // queried directly in the TaskRunner to check for FetchFailedExceptions.
+          TaskContext.unset()
+        }
       }
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/util/taskListeners.scala b/core/src/main/scala/org/apache/spark/util/taskListeners.scala
index 1be31e88ab68e..51feccfb8342a 100644
--- a/core/src/main/scala/org/apache/spark/util/taskListeners.scala
+++ b/core/src/main/scala/org/apache/spark/util/taskListeners.scala
@@ -55,14 +55,16 @@ class TaskCompletionListenerException(
   extends RuntimeException {
 
   override def getMessage: String = {
-    if (errorMessages.size == 1) {
-      errorMessages.head
-    } else {
-      errorMessages.zipWithIndex.map { case (msg, i) => s"Exception $i: $msg" }.mkString("\n")
-    } +
-    previousError.map { e =>
+    val listenerErrorMessage =
+      if (errorMessages.size == 1) {
+        errorMessages.head
+      } else {
+        errorMessages.zipWithIndex.map { case (msg, i) => s"Exception $i: $msg" }.mkString("\n")
+      }
+    val previousErrorMessage = previousError.map { e =>
       "\n\nPrevious exception in task: " + e.getMessage + "\n" +
         e.getStackTrace.mkString("\t", "\n\t", "")
     }.getOrElse("")
+    listenerErrorMessage + previousErrorMessage
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
index b22da565d86e7..992d3396d203f 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
@@ -100,7 +100,7 @@ class TaskContextSuite extends SparkFunSuite with BeforeAndAfter with LocalSpark
     context.addTaskCompletionListener(_ => throw new Exception("blah"))
 
     intercept[TaskCompletionListenerException] {
-      context.markTaskCompleted()
+      context.markTaskCompleted(None)
     }
 
     verify(listener, times(1)).onTaskCompletion(any())
@@ -231,10 +231,10 @@ class TaskContextSuite extends SparkFunSuite with BeforeAndAfter with LocalSpark
   test("immediately call a completion listener if the context is completed") {
     var invocations = 0
     val context = TaskContext.empty()
-    context.markTaskCompleted()
+    context.markTaskCompleted(None)
     context.addTaskCompletionListener(_ => invocations += 1)
     assert(invocations == 1)
-    context.markTaskCompleted()
+    context.markTaskCompleted(None)
     assert(invocations == 1)
   }
 
@@ -254,6 +254,36 @@ class TaskContextSuite extends SparkFunSuite with BeforeAndAfter with LocalSpark
     assert(lastError == error)
     assert(invocations == 1)
   }
+
+  test("TaskCompletionListenerException.getMessage should include previousError") {
+    val listenerErrorMessage = "exception in listener"
+    val taskErrorMessage = "exception in task"
+    val e = new TaskCompletionListenerException(
+      Seq(listenerErrorMessage),
+      Some(new RuntimeException(taskErrorMessage)))
+    assert(e.getMessage.contains(listenerErrorMessage) && e.getMessage.contains(taskErrorMessage))
+  }
+
+  test("all TaskCompletionListeners should be called even if some fail or a task") {
+    val context = TaskContext.empty()
+    val listener = mock(classOf[TaskCompletionListener])
+    context.addTaskCompletionListener(_ => throw new Exception("exception in listener1"))
+    context.addTaskCompletionListener(listener)
+    context.addTaskCompletionListener(_ => throw new Exception("exception in listener3"))
+
+    val e = intercept[TaskCompletionListenerException] {
+      context.markTaskCompleted(Some(new Exception("exception in task")))
+    }
+
+    // Make sure listener 2 was called.
+    verify(listener, times(1)).onTaskCompletion(any())
+
+    // also need to check failure in TaskCompletionListener does not mask earlier exception
+    assert(e.getMessage.contains("exception in listener1"))
+    assert(e.getMessage.contains("exception in listener3"))
+    assert(e.getMessage.contains("exception in task"))
+  }
+
 }
 
 private object TaskContextSuite {
diff --git a/core/src/test/scala/org/apache/spark/storage/PartiallySerializedBlockSuite.scala b/core/src/test/scala/org/apache/spark/storage/PartiallySerializedBlockSuite.scala
index 3050f9a250235..535105379963a 100644
--- a/core/src/test/scala/org/apache/spark/storage/PartiallySerializedBlockSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/PartiallySerializedBlockSuite.scala
@@ -145,7 +145,7 @@ class PartiallySerializedBlockSuite
     try {
       TaskContext.setTaskContext(TaskContext.empty())
       val partiallySerializedBlock = partiallyUnroll((1 to 10).iterator, 2)
-      TaskContext.get().asInstanceOf[TaskContextImpl].markTaskCompleted()
+      TaskContext.get().asInstanceOf[TaskContextImpl].markTaskCompleted(None)
       Mockito.verify(partiallySerializedBlock.getUnrolledChunkedByteBuffer).dispose()
       Mockito.verifyNoMoreInteractions(memoryStore)
     } finally {
diff --git a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
index e56e440380a54..9900d1edc4cb0 100644
--- a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
@@ -192,7 +192,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
 
     // Complete the task; then the 2nd block buffer should be exhausted
     verify(blocks(ShuffleBlockId(0, 1, 0)), times(0)).release()
-    taskContext.markTaskCompleted()
+    taskContext.markTaskCompleted(None)
     verify(blocks(ShuffleBlockId(0, 1, 0)), times(1)).release()
 
     // The 3rd block should not be retained because the iterator is already in zombie state

From f14246959c3ed84d4a69a02d7609676810039dc3 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Fri, 12 May 2017 10:49:50 -0700
Subject: [PATCH 0838/1204] [SPARK-20714][SS] Fix match error when watermark is
 set with timeout = no timeout / processing timeout

## What changes were proposed in this pull request?

When watermark is set, and timeout conf is NoTimeout or ProcessingTimeTimeout (both do not need the watermark), the query fails at runtime with the following exception.
```
MatchException: Some(org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificPredicate1a9b798e) (of class scala.Some)
    org.apache.spark.sql.execution.streaming.FlatMapGroupsWithStateExec$$anonfun$doExecute$1.apply(FlatMapGroupsWithStateExec.scala:120)
    org.apache.spark.sql.execution.streaming.FlatMapGroupsWithStateExec$$anonfun$doExecute$1.apply(FlatMapGroupsWithStateExec.scala:116)
    org.apache.spark.sql.execution.streaming.state.package$StateStoreOps$$anonfun$1.apply(package.scala:70)
    org.apache.spark.sql.execution.streaming.state.package$StateStoreOps$$anonfun$1.apply(package.scala:65)
    org.apache.spark.sql.execution.streaming.state.StateStoreRDD.compute(StateStoreRDD.scala:64)
```

The match did not correctly handle cases where watermark was defined by the timeout was different from EventTimeTimeout.

## How was this patch tested?
New unit tests.

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #17954 from tdas/SPARK-20714.

(cherry picked from commit 0d3a63193c691ece88bb256d04156258a1c03a81)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../FlatMapGroupsWithStateExec.scala          |  2 +-
 .../FlatMapGroupsWithStateSuite.scala         | 40 ++++++++++++++++++-
 2 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
index e42df5dd61c70..5e79232a2043b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
@@ -120,7 +120,7 @@ case class FlatMapGroupsWithStateExec(
         val filteredIter = watermarkPredicateForData match {
           case Some(predicate) if timeoutConf == EventTimeTimeout =>
             iter.filter(row => !predicate.eval(row))
-          case None =>
+          case _ =>
             iter
         }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
index 85aa7dbe9ed86..89cfba6c559d6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
@@ -589,7 +589,7 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest with BeforeAndAf
     )
   }
 
-  test("flatMapGroupsWithState - streaming with event time timeout") {
+  test("flatMapGroupsWithState - streaming with event time timeout + watermark") {
     // Function to maintain the max event time
     // Returns the max event time in the state, or -1 if the state was removed by timeout
     val stateFunc = (
@@ -761,6 +761,44 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest with BeforeAndAf
     assert(e.getMessage === "The output mode of function should be append or update")
   }
 
+  def testWithTimeout(timeoutConf: GroupStateTimeout): Unit = {
+    test("SPARK-20714: watermark does not fail query when timeout = " + timeoutConf) {
+      // Function to maintain running count up to 2, and then remove the count
+      // Returns the data and the count (-1 if count reached beyond 2 and state was just removed)
+      val stateFunc =
+      (key: String, values: Iterator[(String, Long)], state: GroupState[RunningCount]) => {
+        if (state.hasTimedOut) {
+          state.remove()
+          Iterator((key, "-1"))
+        } else {
+          val count = state.getOption.map(_.count).getOrElse(0L) + values.size
+          state.update(RunningCount(count))
+          state.setTimeoutDuration("10 seconds")
+          Iterator((key, count.toString))
+        }
+      }
+
+      val clock = new StreamManualClock
+      val inputData = MemoryStream[(String, Long)]
+      val result =
+        inputData.toDF().toDF("key", "time")
+          .selectExpr("key", "cast(time as timestamp) as timestamp")
+          .withWatermark("timestamp", "10 second")
+          .as[(String, Long)]
+          .groupByKey(x => x._1)
+          .flatMapGroupsWithState(Update, ProcessingTimeTimeout)(stateFunc)
+
+      testStream(result, Update)(
+        StartStream(ProcessingTime("1 second"), triggerClock = clock),
+        AddData(inputData, ("a", 1L)),
+        AdvanceManualClock(1 * 1000),
+        CheckLastBatch(("a", "1"))
+      )
+    }
+  }
+  testWithTimeout(NoTimeout)
+  testWithTimeout(ProcessingTimeTimeout)
+
   def testStateUpdateWithData(
       testName: String,
       stateUpdates: GroupState[Int] => Unit,

From d99165b970b81d0f3a6de5bb940bbbeeb0172a9c Mon Sep 17 00:00:00 2001
From: zuotingbing <zuo.tingbing9@zte.com.cn>
Date: Fri, 12 May 2017 11:24:07 -0700
Subject: [PATCH 0839/1204] [SPARK-20594][SQL] The staging directory should be
 a child directory starts with "." to avoid being deleted if we set
 hive.exec.stagingdir under the table directory.

JIRA Issue: https://issues.apache.org/jira/browse/SPARK-20594

## What changes were proposed in this pull request?

The staging directory should be a child directory starts with "." to avoid being deleted before moving staging directory to table directory if we set hive.exec.stagingdir under the table directory.

## How was this patch tested?

Added unit tests

Author: zuotingbing <zuo.tingbing9@zte.com.cn>

Closes #17858 from zuotingbing/spark-stagingdir.

(cherry picked from commit e3d2022e4b73c5bc352eb001ed334d91ce36109c)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../sql/hive/execution/InsertIntoHiveTable.scala | 16 ++++++++++++++--
 .../sql/hive/InsertIntoHiveTableSuite.scala      | 11 +++++++++++
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index 3682dc850790e..3facf9f67be9f 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.hive.execution
 
-import java.io.IOException
+import java.io.{File, IOException}
 import java.net.URI
 import java.text.SimpleDateFormat
 import java.util.{Date, Locale, Random}
@@ -97,12 +97,24 @@ case class InsertIntoHiveTable(
     val inputPathUri: URI = inputPath.toUri
     val inputPathName: String = inputPathUri.getPath
     val fs: FileSystem = inputPath.getFileSystem(hadoopConf)
-    val stagingPathName: String =
+    var stagingPathName: String =
       if (inputPathName.indexOf(stagingDir) == -1) {
         new Path(inputPathName, stagingDir).toString
       } else {
         inputPathName.substring(0, inputPathName.indexOf(stagingDir) + stagingDir.length)
       }
+
+    // SPARK-20594: This is a walk-around fix to resolve a Hive bug. Hive requires that the
+    // staging directory needs to avoid being deleted when users set hive.exec.stagingdir
+    // under the table directory.
+    if (FileUtils.isSubDir(new Path(stagingPathName), inputPath, fs) &&
+      !stagingPathName.stripPrefix(inputPathName).stripPrefix(File.separator).startsWith(".")) {
+      logDebug(s"The staging dir '$stagingPathName' should be a child directory starts " +
+        "with '.' to avoid being deleted if we set hive.exec.stagingdir under the table " +
+        "directory.")
+      stagingPathName = new Path(inputPathName, ".hive-staging").toString
+    }
+
     val dir: Path =
       fs.makeQualified(
         new Path(stagingPathName + "_" + executionId + "-" + TaskRunner.getTaskRunnerID))
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala
index d6999af84eac0..2c724f8388693 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala
@@ -494,4 +494,15 @@ class InsertIntoHiveTableSuite extends QueryTest with TestHiveSingleton with Bef
         spark.table("t").write.insertInto(tableName)
       }
   }
+
+  test("SPARK-20594: hive.exec.stagingdir was deleted by Hive") {
+    // Set hive.exec.stagingdir under the table directory without start with ".".
+    withSQLConf("hive.exec.stagingdir" -> "./test") {
+      withTable("test_table") {
+        sql("CREATE TABLE test_table (key int)")
+        sql("INSERT OVERWRITE TABLE test_table SELECT 1")
+        checkAnswer(sql("SELECT * FROM test_table"), Row(1))
+      }
+    }
+  }
 }

From 02bf5547a7c77b19e936690cc819de28e55ca134 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Sat, 13 May 2017 20:56:04 +0800
Subject: [PATCH 0840/1204] [SPARK-18772][SQL] Avoid unnecessary conversion try
 for special floats in JSON

## What changes were proposed in this pull request?

This PR is based on  https://github.com/apache/spark/pull/16199 and extracts the valid change from https://github.com/apache/spark/pull/9759 to resolve SPARK-18772

This avoids additional conversion try with `toFloat` and `toDouble`.

For avoiding additional conversions, please refer the codes below:

**Before**

```scala
scala> import org.apache.spark.sql.types._
import org.apache.spark.sql.types._

scala> spark.read.schema(StructType(Seq(StructField("a", DoubleType)))).option("mode", "FAILFAST").json(Seq("""{"a": "nan"}""").toDS).show()
17/05/12 11:30:41 ERROR Executor: Exception in task 0.0 in stage 2.0 (TID 2)
java.lang.NumberFormatException: For input string: "nan"
...
```

**After**

```scala
scala> import org.apache.spark.sql.types._
import org.apache.spark.sql.types._

scala> spark.read.schema(StructType(Seq(StructField("a", DoubleType)))).option("mode", "FAILFAST").json(Seq("""{"a": "nan"}""").toDS).show()
17/05/12 11:44:30 ERROR Executor: Exception in task 0.0 in stage 0.0 (TID 0)
java.lang.RuntimeException: Cannot parse nan as DoubleType.
...
```

## How was this patch tested?

Unit tests added in `JsonSuite`.

Closes #16199

Author: hyukjinkwon <gurwls223@gmail.com>
Author: Nathan Howell <nhowell@godaddy.com>

Closes #17956 from HyukjinKwon/SPARK-18772.

(cherry picked from commit 3f98375d8a0f3d48041042d356915a5c298b1c8e)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/json/JacksonParser.scala     | 31 +++++---------
 .../datasources/json/JsonSuite.scala          | 40 +++++++++++++++++++
 2 files changed, 50 insertions(+), 21 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
index ff6c93ae9815c..4ed6728994193 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.catalyst.json
 
 import java.io.ByteArrayOutputStream
-import java.util.Locale
 
 import scala.collection.mutable.ArrayBuffer
 import scala.util.Try
@@ -126,16 +125,11 @@ class JacksonParser(
 
         case VALUE_STRING =>
           // Special case handling for NaN and Infinity.
-          val value = parser.getText
-          val lowerCaseValue = value.toLowerCase(Locale.ROOT)
-          if (lowerCaseValue.equals("nan") ||
-            lowerCaseValue.equals("infinity") ||
-            lowerCaseValue.equals("-infinity") ||
-            lowerCaseValue.equals("inf") ||
-            lowerCaseValue.equals("-inf")) {
-            value.toFloat
-          } else {
-            throw new RuntimeException(s"Cannot parse $value as FloatType.")
+          parser.getText match {
+            case "NaN" => Float.NaN
+            case "Infinity" => Float.PositiveInfinity
+            case "-Infinity" => Float.NegativeInfinity
+            case other => throw new RuntimeException(s"Cannot parse $other as FloatType.")
           }
       }
 
@@ -146,16 +140,11 @@ class JacksonParser(
 
         case VALUE_STRING =>
           // Special case handling for NaN and Infinity.
-          val value = parser.getText
-          val lowerCaseValue = value.toLowerCase(Locale.ROOT)
-          if (lowerCaseValue.equals("nan") ||
-            lowerCaseValue.equals("infinity") ||
-            lowerCaseValue.equals("-infinity") ||
-            lowerCaseValue.equals("inf") ||
-            lowerCaseValue.equals("-inf")) {
-            value.toDouble
-          } else {
-            throw new RuntimeException(s"Cannot parse $value as DoubleType.")
+          parser.getText match {
+            case "NaN" => Double.NaN
+            case "Infinity" => Double.PositiveInfinity
+            case "-Infinity" => Double.NegativeInfinity
+            case other => throw new RuntimeException(s"Cannot parse $other as DoubleType.")
           }
       }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 2ab03819964be..09e61dc4af60d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.datasources.json
 import java.io.{File, StringWriter}
 import java.nio.charset.StandardCharsets
 import java.sql.{Date, Timestamp}
+import java.util.Locale
 
 import com.fasterxml.jackson.core.JsonFactory
 import org.apache.hadoop.fs.{Path, PathFilter}
@@ -1978,4 +1979,43 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
       assert(errMsg.startsWith("The field for corrupt records must be string type and nullable"))
     }
   }
+
+  test("SPARK-18772: Parse special floats correctly") {
+    val jsons = Seq(
+      """{"a": "NaN"}""",
+      """{"a": "Infinity"}""",
+      """{"a": "-Infinity"}""")
+
+    // positive cases
+    val checks: Seq[Double => Boolean] = Seq(
+      _.isNaN,
+      _.isPosInfinity,
+      _.isNegInfinity)
+
+    Seq(FloatType, DoubleType).foreach { dt =>
+      jsons.zip(checks).foreach { case (json, check) =>
+        val ds = spark.read
+          .schema(StructType(Seq(StructField("a", dt))))
+          .json(Seq(json).toDS())
+          .select($"a".cast(DoubleType)).as[Double]
+        assert(check(ds.first()))
+      }
+    }
+
+    // negative cases
+    Seq(FloatType, DoubleType).foreach { dt =>
+      val lowerCasedJsons = jsons.map(_.toLowerCase(Locale.ROOT))
+      // The special floats are case-sensitive so these cases below throw exceptions.
+      lowerCasedJsons.foreach { lowerCasedJson =>
+        val e = intercept[SparkException] {
+          spark.read
+            .option("mode", "FAILFAST")
+            .schema(StructType(Seq(StructField("a", dt))))
+            .json(Seq(lowerCasedJson).toDS())
+            .collect()
+        }
+        assert(e.getMessage.contains("Cannot parse"))
+      }
+    }
+  }
 }

From 5842eeca5013646d13d36c65864d76b773f5c17e Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Sat, 13 May 2017 12:09:06 -0700
Subject: [PATCH 0841/1204] [SPARK-20725][SQL] partial aggregate should behave
 correctly for sameResult

## What changes were proposed in this pull request?

For aggregate function with `PartialMerge` or `Final` mode, the input is aggregate buffers instead of the actual children expressions. So the actual children expressions won't affect the result, we should normalize the expr id for them.

## How was this patch tested?

a new regression test

Author: Wenchen Fan <wenchen@databricks.com>

Closes #17964 from cloud-fan/tmp.

(cherry picked from commit 1283c3d11af6d55eaf0e40d6df09dc6bcc198322)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../expressions/aggregate/interfaces.scala         | 14 ++++++++++++--
 .../spark/sql/catalyst/plans/QueryPlan.scala       |  4 ++--
 .../spark/sql/execution/SameResultSuite.scala      | 12 ++++++++++++
 3 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala
index 80c25d0b0fb7a..fffcc7c9ef53a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala
@@ -105,12 +105,22 @@ case class AggregateExpression(
   }
 
   // We compute the same thing regardless of our final result.
-  override lazy val canonicalized: Expression =
+  override lazy val canonicalized: Expression = {
+    val normalizedAggFunc = mode match {
+      // For PartialMerge or Final mode, the input to the `aggregateFunction` is aggregate buffers,
+      // and the actual children of `aggregateFunction` is not used, here we normalize the expr id.
+      case PartialMerge | Final => aggregateFunction.transform {
+        case a: AttributeReference => a.withExprId(ExprId(0))
+      }
+      case Partial | Complete => aggregateFunction
+    }
+
     AggregateExpression(
-      aggregateFunction.canonicalized.asInstanceOf[AggregateFunction],
+      normalizedAggFunc.canonicalized.asInstanceOf[AggregateFunction],
       mode,
       isDistinct,
       ExprId(0))
+  }
 
   override def children: Seq[Expression] = aggregateFunction :: Nil
   override def dataType: DataType = aggregateFunction.dataType
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index 51faa333307b3..959fcf7c7548e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -286,7 +286,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
 
     def recursiveTransform(arg: Any): AnyRef = arg match {
       case e: Expression => transformExpression(e)
-      case Some(e: Expression) => Some(transformExpression(e))
+      case Some(value) => Some(recursiveTransform(value))
       case m: Map[_, _] => m
       case d: DataType => d // Avoid unpacking Structs
       case seq: Traversable[_] => seq.map(recursiveTransform)
@@ -320,7 +320,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
 
     productIterator.flatMap {
       case e: Expression => e :: Nil
-      case Some(e: Expression) => e :: Nil
+      case s: Some[_] => seqToExpressions(s.toSeq)
       case seq: Traversable[_] => seqToExpressions(seq)
       case other => Nil
     }.toSeq
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala
index 25e4ca060ae02..aaf51b5b90111 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala
@@ -18,12 +18,14 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.sql.{DataFrame, QueryTest}
+import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.SharedSQLContext
 
 /**
  * Tests for the sameResult function for [[SparkPlan]]s.
  */
 class SameResultSuite extends QueryTest with SharedSQLContext {
+  import testImplicits._
 
   test("FileSourceScanExec: different orders of data filters and partition filters") {
     withTempPath { path =>
@@ -46,4 +48,14 @@ class SameResultSuite extends QueryTest with SharedSQLContext {
     df.queryExecution.sparkPlan.find(_.isInstanceOf[FileSourceScanExec]).get
       .asInstanceOf[FileSourceScanExec]
   }
+
+  test("SPARK-20725: partial aggregate should behave correctly for sameResult") {
+    val df1 = spark.range(10).agg(sum($"id"))
+    val df2 = spark.range(10).agg(sum($"id"))
+    assert(df1.queryExecution.executedPlan.sameResult(df2.queryExecution.executedPlan))
+
+    val df3 = spark.range(10).agg(sumDistinct($"id"))
+    val df4 = spark.range(10).agg(sumDistinct($"id"))
+    assert(df3.queryExecution.executedPlan.sameResult(df4.queryExecution.executedPlan))
+  }
 }

From cb64064dc61ed657ed2071a3a442e384ad3f995e Mon Sep 17 00:00:00 2001
From: guoxiaolong <guo.xiaolong1@zte.com.cn>
Date: Mon, 15 May 2017 07:51:50 +0100
Subject: [PATCH 0842/1204] [SPARK-20705][WEB-UI] The sort function can not be
 used in the master page when you use Firefox or Google Chrome.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?
When you open the master page, when you use Firefox or Google Chrom, the console of Firefox or Google Chrome is wrong. But The IE  is no problem.
e.g.
![error](https://cloud.githubusercontent.com/assets/26266482/25946143/74467a5c-367c-11e7-8f9f-d3585b1aea88.png)

My Firefox version is 48.0.2.
My Google Chrome version  is 49.0.2623.75 m.

## How was this patch tested?

manual tests

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: guoxiaolong <guo.xiaolong1@zte.com.cn>
Author: 郭小龙 10207633 <guo.xiaolong1@zte.com.cn>
Author: guoxiaolongzte <guo.xiaolong1@zte.com.cn>

Closes #17952 from guoxiaolongzte/SPARK-20705.

(cherry picked from commit 99d5799927301e7dfceb9405e2829af3433f104b)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../main/resources/org/apache/spark/ui/static/sorttable.js    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/sorttable.js b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js
index ff241470f32df..9960d5c34d1fc 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/sorttable.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js
@@ -207,8 +207,8 @@ sorttable = {
 
     hasInputs = (typeof node.getElementsByTagName == 'function') &&
                  node.getElementsByTagName('input').length;
-    
-    if (node.getAttribute("sorttable_customkey") != null) {
+
+    if (node.nodeType == 1 && node.getAttribute("sorttable_customkey") != null) {
       return node.getAttribute("sorttable_customkey");
     }
     else if (typeof node.textContent != 'undefined' && !hasInputs) {

From 62969e9be01b55e1cf5ab2c9b273a9e08d6bc2c3 Mon Sep 17 00:00:00 2001
From: guoxiaolong <guo.xiaolong1@zte.com.cn>
Date: Mon, 15 May 2017 07:51:50 +0100
Subject: [PATCH 0843/1204] [SPARK-20705][WEB-UI] The sort function can not be
 used in the master page when you use Firefox or Google Chrome.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?
When you open the master page, when you use Firefox or Google Chrom, the console of Firefox or Google Chrome is wrong. But The IE  is no problem.
e.g.
![error](https://cloud.githubusercontent.com/assets/26266482/25946143/74467a5c-367c-11e7-8f9f-d3585b1aea88.png)

My Firefox version is 48.0.2.
My Google Chrome version  is 49.0.2623.75 m.

## How was this patch tested?

manual tests

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: guoxiaolong <guo.xiaolong1@zte.com.cn>
Author: 郭小龙 10207633 <guo.xiaolong1@zte.com.cn>
Author: guoxiaolongzte <guo.xiaolong1@zte.com.cn>

Closes #17952 from guoxiaolongzte/SPARK-20705.

(cherry picked from commit 99d5799927301e7dfceb9405e2829af3433f104b)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../main/resources/org/apache/spark/ui/static/sorttable.js    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/sorttable.js b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js
index ff241470f32df..9960d5c34d1fc 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/sorttable.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js
@@ -207,8 +207,8 @@ sorttable = {
 
     hasInputs = (typeof node.getElementsByTagName == 'function') &&
                  node.getElementsByTagName('input').length;
-    
-    if (node.getAttribute("sorttable_customkey") != null) {
+
+    if (node.nodeType == 1 && node.getAttribute("sorttable_customkey") != null) {
       return node.getAttribute("sorttable_customkey");
     }
     else if (typeof node.textContent != 'undefined' && !hasInputs) {

From 708f68c8a4f0a57b8774af9d3e3cd010cd8aff5d Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Mon, 15 May 2017 23:21:44 +0800
Subject: [PATCH 0844/1204] [SPARK-20669][ML] LoR.family and LDA.optimizer
 should be case insensitive

## What changes were proposed in this pull request?
make param `family` in LoR and `optimizer` in LDA case insensitive

## How was this patch tested?
updated tests

yanboliang

Author: Zheng RuiFeng <ruifengz@foxmail.com>

Closes #17910 from zhengruifeng/lr_family_lowercase.

(cherry picked from commit 9970aa0962ec253a6e838aea26a627689dc5b011)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 .../classification/LogisticRegression.scala   |  4 +--
 .../org/apache/spark/ml/clustering/LDA.scala  | 30 +++++++++----------
 .../LogisticRegressionSuite.scala             | 11 +++++++
 .../apache/spark/ml/clustering/LDASuite.scala | 10 +++++++
 4 files changed, 38 insertions(+), 17 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 42dc7fbebe4c3..053487242edd8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -94,7 +94,7 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
   final val family: Param[String] = new Param(this, "family",
     "The name of family which is a description of the label distribution to be used in the " +
       s"model. Supported options: ${supportedFamilyNames.mkString(", ")}.",
-    ParamValidators.inArray[String](supportedFamilyNames))
+    (value: String) => supportedFamilyNames.contains(value.toLowerCase(Locale.ROOT)))
 
   /** @group getParam */
   @Since("2.1.0")
@@ -526,7 +526,7 @@ class LogisticRegression @Since("1.2.0") (
       case None => histogram.length
     }
 
-    val isMultinomial = $(family) match {
+    val isMultinomial = getFamily.toLowerCase(Locale.ROOT) match {
       case "binomial" =>
         require(numClasses == 1 || numClasses == 2, s"Binomial family only supports 1 or 2 " +
         s"outcome classes but found $numClasses.")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
index e3026c8efa823..3da29b1c816b1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
@@ -174,8 +174,7 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
   @Since("1.6.0")
   final val optimizer = new Param[String](this, "optimizer", "Optimizer or inference" +
     " algorithm used to estimate the LDA model. Supported: " + supportedOptimizers.mkString(", "),
-    (o: String) =>
-      ParamValidators.inArray(supportedOptimizers).apply(o.toLowerCase(Locale.ROOT)))
+    (value: String) => supportedOptimizers.contains(value.toLowerCase(Locale.ROOT)))
 
   /** @group getParam */
   @Since("1.6.0")
@@ -325,7 +324,7 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
           s" ${getDocConcentration.length}, but k = $getK.  docConcentration must be an array of" +
           s" length either 1 (scalar) or k (num topics).")
       }
-      getOptimizer match {
+      getOptimizer.toLowerCase(Locale.ROOT) match {
         case "online" =>
           require(getDocConcentration.forall(_ >= 0),
             "For Online LDA optimizer, docConcentration values must be >= 0.  Found values: " +
@@ -337,7 +336,7 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
       }
     }
     if (isSet(topicConcentration)) {
-      getOptimizer match {
+      getOptimizer.toLowerCase(Locale.ROOT) match {
         case "online" =>
           require(getTopicConcentration >= 0, s"For Online LDA optimizer, topicConcentration" +
             s" must be >= 0.  Found value: $getTopicConcentration")
@@ -350,17 +349,18 @@ private[clustering] trait LDAParams extends Params with HasFeaturesCol with HasM
     SchemaUtils.appendColumn(schema, $(topicDistributionCol), new VectorUDT)
   }
 
-  private[clustering] def getOldOptimizer: OldLDAOptimizer = getOptimizer match {
-    case "online" =>
-      new OldOnlineLDAOptimizer()
-        .setTau0($(learningOffset))
-        .setKappa($(learningDecay))
-        .setMiniBatchFraction($(subsamplingRate))
-        .setOptimizeDocConcentration($(optimizeDocConcentration))
-    case "em" =>
-      new OldEMLDAOptimizer()
-        .setKeepLastCheckpoint($(keepLastCheckpoint))
-  }
+  private[clustering] def getOldOptimizer: OldLDAOptimizer =
+    getOptimizer.toLowerCase(Locale.ROOT) match {
+      case "online" =>
+        new OldOnlineLDAOptimizer()
+          .setTau0($(learningOffset))
+          .setKappa($(learningDecay))
+          .setMiniBatchFraction($(subsamplingRate))
+          .setOptimizeDocConcentration($(optimizeDocConcentration))
+      case "em" =>
+        new OldEMLDAOptimizer()
+          .setKeepLastCheckpoint($(keepLastCheckpoint))
+    }
 }
 
 private object LDAParams {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index bf6bfe30bfe20..1ffd8dcd53d61 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -2582,6 +2582,17 @@ class LogisticRegressionSuite
         assert(expected.coefficients.toArray === actual.coefficients.toArray)
       }
   }
+
+  test("string params should be case-insensitive") {
+    val lr = new LogisticRegression()
+    Seq(("AuTo", smallBinaryDataset), ("biNoMial", smallBinaryDataset),
+      ("mulTinomIAl", smallMultinomialDataset)).foreach { case (family, data) =>
+      lr.setFamily(family)
+      assert(lr.getFamily === family)
+      val model = lr.fit(data)
+      assert(model.getFamily === family)
+    }
+  }
 }
 
 object LogisticRegressionSuite {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala
index b4fe63a89f871..e73bbc18d76bd 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala
@@ -313,4 +313,14 @@ class LDASuite extends SparkFunSuite with MLlibTestSparkContext with DefaultRead
 
     assert(model.getCheckpointFiles.isEmpty)
   }
+
+  test("string params should be case-insensitive") {
+    val lda = new LDA()
+    Seq("eM", "oNLinE").foreach { optimizer =>
+      lda.setOptimizer(optimizer)
+      assert(lda.getOptimizer === optimizer)
+      val model = lda.fit(dataset)
+      assert(model.getOptimizer === optimizer)
+    }
+  }
 }

From 0bd918f67630f83cdc2922a2f48bd28b023ef821 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 15 May 2017 09:22:06 -0700
Subject: [PATCH 0845/1204] [SPARK-12837][SPARK-20666][CORE][FOLLOWUP] getting
 name should not fail if accumulator is garbage collected

## What changes were proposed in this pull request?

After https://github.com/apache/spark/pull/17596 , we do not send internal accumulator name to executor side anymore, and always look up the accumulator name in `AccumulatorContext`.

This cause a regression if the accumulator is already garbage collected, this PR fixes this by still sending accumulator name for `SQLMetrics`.

## How was this patch tested?

N/A

Author: Wenchen Fan <wenchen@databricks.com>

Closes #17931 from cloud-fan/bug.

(cherry picked from commit e1aaab1e277b1b07c26acea75ade78e39bdac209)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../scala/org/apache/spark/util/AccumulatorV2.scala | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
index a65ec75cc5db6..1a9a6929541aa 100644
--- a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
+++ b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
@@ -84,10 +84,11 @@ abstract class AccumulatorV2[IN, OUT] extends Serializable {
    * Returns the name of this accumulator, can only be called after registration.
    */
   final def name: Option[String] = {
+    assertMetadataNotNull()
+
     if (atDriverSide) {
-      AccumulatorContext.get(id).flatMap(_.metadata.name)
+      metadata.name.orElse(AccumulatorContext.get(id).flatMap(_.metadata.name))
     } else {
-      assertMetadataNotNull()
       metadata.name
     }
   }
@@ -165,13 +166,15 @@ abstract class AccumulatorV2[IN, OUT] extends Serializable {
       }
       val copyAcc = copyAndReset()
       assert(copyAcc.isZero, "copyAndReset must return a zero value copy")
-      val isInternalAcc =
-        (name.isDefined && name.get.startsWith(InternalAccumulator.METRICS_PREFIX)) ||
-          getClass.getSimpleName == "SQLMetric"
+      val isInternalAcc = name.isDefined && name.get.startsWith(InternalAccumulator.METRICS_PREFIX)
       if (isInternalAcc) {
         // Do not serialize the name of internal accumulator and send it to executor.
         copyAcc.metadata = metadata.copy(name = None)
       } else {
+        // For non-internal accumulators, we still need to send the name because users may need to
+        // access the accumulator name at executor side, or they may keep the accumulators sent from
+        // executors and access the name when the registered accumulator is already garbage
+        // collected(e.g. SQLMetrics).
         copyAcc.metadata = metadata
       }
       copyAcc

From 82ae1f0aca9c00fddba130c144adfe0777172cc8 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Mon, 15 May 2017 10:46:38 -0700
Subject: [PATCH 0846/1204] [SPARK-20716][SS] StateStore.abort() should not
 throw exceptions

## What changes were proposed in this pull request?

StateStore.abort() should do a best effort attempt to clean up temporary resources. It should not throw errors, especially because its called in a TaskCompletionListener, because this error could hide previous real errors in the task.

## How was this patch tested?
No unit test.

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #17958 from tdas/SPARK-20716.

(cherry picked from commit 271175e2bd0f7887a068db92de73eff60f5ef2b2)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../state/HDFSBackedStateStoreProvider.scala  | 22 ++++++++++++++-----
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
index 1426728f9b550..fb2bf47d6e83b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.execution.streaming.state
 
 import java.io.{DataInputStream, DataOutputStream, FileNotFoundException, IOException}
+import java.nio.channels.ClosedChannelException
 import java.util.Locale
 
 import scala.collection.JavaConverters._
@@ -202,13 +203,22 @@ private[state] class HDFSBackedStateStoreProvider(
     /** Abort all the updates made on this store. This store will not be usable any more. */
     override def abort(): Unit = {
       verify(state == UPDATING || state == ABORTED, "Cannot abort after already committed")
+      try {
+        state = ABORTED
+        if (tempDeltaFileStream != null) {
+          tempDeltaFileStream.close()
+        }
+        if (tempDeltaFile != null) {
+          fs.delete(tempDeltaFile, true)
+        }
+      } catch {
+        case c: ClosedChannelException =>
+          // This can happen when underlying file output stream has been closed before the
+          // compression stream.
+          logDebug(s"Error aborting version $newVersion into $this", c)
 
-      state = ABORTED
-      if (tempDeltaFileStream != null) {
-        tempDeltaFileStream.close()
-      }
-      if (tempDeltaFile != null) {
-        fs.delete(tempDeltaFile, true)
+        case e: Exception =>
+          logWarning(s"Error aborting version $newVersion into $this", e)
       }
       logInfo(s"Aborted version $newVersion for $this")
     }

From a79a120a8fc595045b32f16663286b32dadc53ed Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Mon, 15 May 2017 10:48:10 -0700
Subject: [PATCH 0847/1204] [SPARK-20717][SS] Minor tweaks to the
 MapGroupsWithState behavior

## What changes were proposed in this pull request?

Timeout and state data are two independent entities and should be settable independently. Therefore, in the same call of the user-defined function, one should be able to set the timeout before initializing the state and also after removing the state. Whether timeouts can be set or not, should not depend on the current state, and vice versa.

However, a limitation of the current implementation is that state cannot be null while timeout is set. This is checked lazily after the function call has completed.

## How was this patch tested?
- Updated existing unit tests that test the behavior of GroupState.setTimeout*** wrt to the current state
- Added new tests that verify the disallowed cases where state is undefined but timeout is set.

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #17957 from tdas/SPARK-20717.

(cherry picked from commit 499ba2cb47efd6a860e74e6995412408efc5238d)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../FlatMapGroupsWithStateExec.scala          |  15 +-
 .../execution/streaming/GroupStateImpl.scala  |  16 +-
 .../spark/sql/streaming/GroupState.scala      |   2 +-
 .../FlatMapGroupsWithStateSuite.scala         | 161 +++++++++++++-----
 4 files changed, 139 insertions(+), 55 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
index 5e79232a2043b..bd8d5d7b43d3a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
@@ -230,6 +230,20 @@ case class FlatMapGroupsWithStateExec(
 
       // When the iterator is consumed, then write changes to state
       def onIteratorCompletion: Unit = {
+
+        val currentTimeoutTimestamp = keyedState.getTimeoutTimestamp
+        // If the state has not yet been set but timeout has been set, then
+        // we have to generate a row to save the timeout. However, attempting serialize
+        // null using case class encoder throws -
+        //    java.lang.NullPointerException: Null value appeared in non-nullable field:
+        //    If the schema is inferred from a Scala tuple / case class, or a Java bean, please
+        //    try to use scala.Option[_] or other nullable types.
+        if (!keyedState.exists && currentTimeoutTimestamp != NO_TIMESTAMP) {
+          throw new IllegalStateException(
+            "Cannot set timeout when state is not defined, that is, state has not been" +
+              "initialized or has been removed")
+        }
+
         if (keyedState.hasRemoved) {
           store.remove(keyRow)
           numUpdatedStateRows += 1
@@ -239,7 +253,6 @@ case class FlatMapGroupsWithStateExec(
             case Some(row) => getTimeoutTimestamp(row)
             case None => NO_TIMESTAMP
           }
-          val currentTimeoutTimestamp = keyedState.getTimeoutTimestamp
           val stateRowToWrite = if (keyedState.hasUpdated) {
             getStateRow(keyedState.get)
           } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala
index 148d92247d6f0..d4606fd5a8463 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala
@@ -91,7 +91,6 @@ private[sql] class GroupStateImpl[S](
     defined = false
     updated = false
     removed = true
-    timeoutTimestamp = NO_TIMESTAMP
   }
 
   override def setTimeoutDuration(durationMs: Long): Unit = {
@@ -100,16 +99,10 @@ private[sql] class GroupStateImpl[S](
         "Cannot set timeout duration without enabling processing time timeout in " +
           "map/flatMapGroupsWithState")
     }
-    if (!defined) {
-      throw new IllegalStateException(
-        "Cannot set timeout information without any state value, " +
-          "state has either not been initialized, or has already been removed")
-    }
-
     if (durationMs <= 0) {
       throw new IllegalArgumentException("Timeout duration must be positive")
     }
-    if (!removed && batchProcessingTimeMs != NO_TIMESTAMP) {
+    if (batchProcessingTimeMs != NO_TIMESTAMP) {
       timeoutTimestamp = durationMs + batchProcessingTimeMs
     } else {
       // This is being called in a batch query, hence no processing timestamp.
@@ -135,7 +128,7 @@ private[sql] class GroupStateImpl[S](
         s"Timeout timestamp ($timestampMs) cannot be earlier than the " +
           s"current watermark ($eventTimeWatermarkMs)")
     }
-    if (!removed && batchProcessingTimeMs != NO_TIMESTAMP) {
+    if (batchProcessingTimeMs != NO_TIMESTAMP) {
       timeoutTimestamp = timestampMs
     } else {
       // This is being called in a batch query, hence no processing timestamp.
@@ -213,11 +206,6 @@ private[sql] class GroupStateImpl[S](
         "Cannot set timeout timestamp without enabling event time timeout in " +
           "map/flatMapGroupsWithState")
     }
-    if (!defined) {
-      throw new IllegalStateException(
-        "Cannot set timeout timestamp without any state value, " +
-          "state has either not been initialized, or has already been removed")
-    }
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/GroupState.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/GroupState.scala
index c659ac7fcf3d9..04a956b70b022 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/GroupState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/GroupState.scala
@@ -212,7 +212,7 @@ trait GroupState[S] extends LogicalGroupState[S] {
   @throws[IllegalArgumentException]("when updating with null")
   def update(newState: S): Unit
 
-  /** Remove this state. Note that this resets any timeout configuration as well. */
+  /** Remove this state. */
   def remove(): Unit
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
index 89cfba6c559d6..10e91740eb922 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
@@ -112,11 +112,12 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest with BeforeAndAf
 
     state = new GroupStateImpl[Int](None, 1000, 1000, ProcessingTimeTimeout, hasTimedOut = false)
     assert(state.getTimeoutTimestamp === NO_TIMESTAMP)
-    testTimeoutDurationNotAllowed[IllegalStateException](state)
+    state.setTimeoutDuration(500)
+    assert(state.getTimeoutTimestamp === 1500)    // can be set without initializing state
     testTimeoutTimestampNotAllowed[UnsupportedOperationException](state)
 
     state.update(5)
-    assert(state.getTimeoutTimestamp === NO_TIMESTAMP)
+    assert(state.getTimeoutTimestamp === 1500)    // does not change
     state.setTimeoutDuration(1000)
     assert(state.getTimeoutTimestamp === 2000)
     state.setTimeoutDuration("2 second")
@@ -124,8 +125,9 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest with BeforeAndAf
     testTimeoutTimestampNotAllowed[UnsupportedOperationException](state)
 
     state.remove()
-    assert(state.getTimeoutTimestamp === NO_TIMESTAMP)
-    testTimeoutDurationNotAllowed[IllegalStateException](state)
+    assert(state.getTimeoutTimestamp === 3000)    // does not change
+    state.setTimeoutDuration(500)                 // can still be set
+    assert(state.getTimeoutTimestamp === 1500)
     testTimeoutTimestampNotAllowed[UnsupportedOperationException](state)
   }
 
@@ -134,9 +136,11 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest with BeforeAndAf
       None, 1000, 1000, EventTimeTimeout, hasTimedOut = false)
     assert(state.getTimeoutTimestamp === NO_TIMESTAMP)
     testTimeoutDurationNotAllowed[UnsupportedOperationException](state)
-    testTimeoutTimestampNotAllowed[IllegalStateException](state)
+    state.setTimeoutTimestamp(5000)
+    assert(state.getTimeoutTimestamp === 5000)    // can be set without initializing state
 
     state.update(5)
+    assert(state.getTimeoutTimestamp === 5000)    // does not change
     state.setTimeoutTimestamp(10000)
     assert(state.getTimeoutTimestamp === 10000)
     state.setTimeoutTimestamp(new Date(20000))
@@ -144,9 +148,10 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest with BeforeAndAf
     testTimeoutDurationNotAllowed[UnsupportedOperationException](state)
 
     state.remove()
-    assert(state.getTimeoutTimestamp === NO_TIMESTAMP)
+    assert(state.getTimeoutTimestamp === 20000)
+    state.setTimeoutTimestamp(5000)
+    assert(state.getTimeoutTimestamp === 5000)    // can be set after removing state
     testTimeoutDurationNotAllowed[UnsupportedOperationException](state)
-    testTimeoutTimestampNotAllowed[IllegalStateException](state)
   }
 
   test("GroupState - illegal params to setTimeout****") {
@@ -154,26 +159,54 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest with BeforeAndAf
 
     // Test setTimeout****() with illegal values
     def testIllegalTimeout(body: => Unit): Unit = {
-      intercept[IllegalArgumentException] { body }
+      intercept[IllegalArgumentException] {
+        body
+      }
       assert(state.getTimeoutTimestamp === NO_TIMESTAMP)
     }
 
     state = new GroupStateImpl(Some(5), 1000, 1000, ProcessingTimeTimeout, hasTimedOut = false)
-    testIllegalTimeout { state.setTimeoutDuration(-1000) }
-    testIllegalTimeout { state.setTimeoutDuration(0) }
-    testIllegalTimeout { state.setTimeoutDuration("-2 second") }
-    testIllegalTimeout { state.setTimeoutDuration("-1 month") }
-    testIllegalTimeout { state.setTimeoutDuration("1 month -1 day") }
+    testIllegalTimeout {
+      state.setTimeoutDuration(-1000)
+    }
+    testIllegalTimeout {
+      state.setTimeoutDuration(0)
+    }
+    testIllegalTimeout {
+      state.setTimeoutDuration("-2 second")
+    }
+    testIllegalTimeout {
+      state.setTimeoutDuration("-1 month")
+    }
+    testIllegalTimeout {
+      state.setTimeoutDuration("1 month -1 day")
+    }
 
     state = new GroupStateImpl(Some(5), 1000, 1000, EventTimeTimeout, hasTimedOut = false)
-    testIllegalTimeout { state.setTimeoutTimestamp(-10000) }
-    testIllegalTimeout { state.setTimeoutTimestamp(10000, "-3 second") }
-    testIllegalTimeout { state.setTimeoutTimestamp(10000, "-1 month") }
-    testIllegalTimeout { state.setTimeoutTimestamp(10000, "1 month -1 day") }
-    testIllegalTimeout { state.setTimeoutTimestamp(new Date(-10000)) }
-    testIllegalTimeout { state.setTimeoutTimestamp(new Date(-10000), "-3 second") }
-    testIllegalTimeout { state.setTimeoutTimestamp(new Date(-10000), "-1 month") }
-    testIllegalTimeout { state.setTimeoutTimestamp(new Date(-10000), "1 month -1 day") }
+    testIllegalTimeout {
+      state.setTimeoutTimestamp(-10000)
+    }
+    testIllegalTimeout {
+      state.setTimeoutTimestamp(10000, "-3 second")
+    }
+    testIllegalTimeout {
+      state.setTimeoutTimestamp(10000, "-1 month")
+    }
+    testIllegalTimeout {
+      state.setTimeoutTimestamp(10000, "1 month -1 day")
+    }
+    testIllegalTimeout {
+      state.setTimeoutTimestamp(new Date(-10000))
+    }
+    testIllegalTimeout {
+      state.setTimeoutTimestamp(new Date(-10000), "-3 second")
+    }
+    testIllegalTimeout {
+      state.setTimeoutTimestamp(new Date(-10000), "-1 month")
+    }
+    testIllegalTimeout {
+      state.setTimeoutTimestamp(new Date(-10000), "1 month -1 day")
+    }
   }
 
   test("GroupState - hasTimedOut") {
@@ -318,6 +351,44 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest with BeforeAndAf
     }
   }
 
+  // Currently disallowed cases for StateStoreUpdater.updateStateForKeysWithData(),
+  // Try to remove these cases in the future
+  for (priorTimeoutTimestamp <- Seq(NO_TIMESTAMP, 1000)) {
+    val testName =
+      if (priorTimeoutTimestamp != NO_TIMESTAMP) "prior timeout set" else "no prior timeout"
+    testStateUpdateWithData(
+      s"ProcessingTimeTimeout - $testName - setting timeout without init state not allowed",
+      stateUpdates = state => { state.setTimeoutDuration(5000) },
+      timeoutConf = ProcessingTimeTimeout,
+      priorState = None,
+      priorTimeoutTimestamp = priorTimeoutTimestamp,
+      expectedException = classOf[IllegalStateException])
+
+    testStateUpdateWithData(
+      s"ProcessingTimeTimeout - $testName - setting timeout with state removal not allowed",
+      stateUpdates = state => { state.remove(); state.setTimeoutDuration(5000) },
+      timeoutConf = ProcessingTimeTimeout,
+      priorState = Some(5),
+      priorTimeoutTimestamp = priorTimeoutTimestamp,
+      expectedException = classOf[IllegalStateException])
+
+    testStateUpdateWithData(
+      s"EventTimeTimeout - $testName - setting timeout without init state not allowed",
+      stateUpdates = state => { state.setTimeoutTimestamp(10000) },
+      timeoutConf = EventTimeTimeout,
+      priorState = None,
+      priorTimeoutTimestamp = priorTimeoutTimestamp,
+      expectedException = classOf[IllegalStateException])
+
+    testStateUpdateWithData(
+      s"EventTimeTimeout - $testName - setting timeout with state removal not allowed",
+      stateUpdates = state => { state.remove(); state.setTimeoutTimestamp(10000) },
+      timeoutConf = EventTimeTimeout,
+      priorState = Some(5),
+      priorTimeoutTimestamp = priorTimeoutTimestamp,
+      expectedException = classOf[IllegalStateException])
+  }
+
   // Tests for StateStoreUpdater.updateStateForTimedOutKeys()
   val preTimeoutState = Some(5)
   for (timeoutConf <- Seq(ProcessingTimeTimeout, EventTimeTimeout)) {
@@ -806,7 +877,8 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest with BeforeAndAf
       priorState: Option[Int],
       priorTimeoutTimestamp: Long = NO_TIMESTAMP,
       expectedState: Option[Int] = None,
-      expectedTimeoutTimestamp: Long = NO_TIMESTAMP): Unit = {
+      expectedTimeoutTimestamp: Long = NO_TIMESTAMP,
+      expectedException: Class[_ <: Exception] = null): Unit = {
 
     if (priorState.isEmpty && priorTimeoutTimestamp != NO_TIMESTAMP) {
       return // there can be no prior timestamp, when there is no prior state
@@ -820,7 +892,8 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest with BeforeAndAf
       }
       testStateUpdate(
         testTimeoutUpdates = false, mapGroupsFunc, timeoutConf,
-        priorState, priorTimeoutTimestamp, expectedState, expectedTimeoutTimestamp)
+        priorState, priorTimeoutTimestamp,
+        expectedState, expectedTimeoutTimestamp, expectedException)
     }
   }
 
@@ -839,9 +912,10 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest with BeforeAndAf
         stateUpdates(state)
         Iterator.empty
       }
+
       testStateUpdate(
         testTimeoutUpdates = true, mapGroupsFunc, timeoutConf = timeoutConf,
-        preTimeoutState, priorTimeoutTimestamp, expectedState, expectedTimeoutTimestamp)
+        preTimeoutState, priorTimeoutTimestamp, expectedState, expectedTimeoutTimestamp, null)
     }
   }
 
@@ -852,7 +926,8 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest with BeforeAndAf
       priorState: Option[Int],
       priorTimeoutTimestamp: Long,
       expectedState: Option[Int],
-      expectedTimeoutTimestamp: Long): Unit = {
+      expectedTimeoutTimestamp: Long,
+      expectedException: Class[_ <: Exception]): Unit = {
 
     val store = newStateStore()
     val mapGroupsSparkPlan = newFlatMapGroupsWithStateExec(
@@ -867,22 +942,30 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest with BeforeAndAf
     }
 
     // Call updating function to update state store
-    val returnedIter = if (testTimeoutUpdates) {
-      updater.updateStateForTimedOutKeys()
-    } else {
-      updater.updateStateForKeysWithData(Iterator(key))
+    def callFunction() = {
+      val returnedIter = if (testTimeoutUpdates) {
+        updater.updateStateForTimedOutKeys()
+      } else {
+        updater.updateStateForKeysWithData(Iterator(key))
+      }
+      returnedIter.size // consume the iterator to force state updates
     }
-    returnedIter.size // consumer the iterator to force state updates
-
-    // Verify updated state in store
-    val updatedStateRow = store.get(key)
-    assert(
-      updater.getStateObj(updatedStateRow).map(_.toString.toInt) === expectedState,
-      "final state not as expected")
-    if (updatedStateRow.nonEmpty) {
+    if (expectedException != null) {
+      // Call function and verify the exception type
+      val e = intercept[Exception] { callFunction() }
+      assert(e.getClass === expectedException, "Exception thrown but of the wrong type")
+    } else {
+      // Call function to update and verify updated state in store
+      callFunction()
+      val updatedStateRow = store.get(key)
       assert(
-        updater.getTimeoutTimestamp(updatedStateRow.get) === expectedTimeoutTimestamp,
-        "final timeout timestamp not as expected")
+        updater.getStateObj(updatedStateRow).map(_.toString.toInt) === expectedState,
+        "final state not as expected")
+      if (updatedStateRow.nonEmpty) {
+        assert(
+          updater.getTimeoutTimestamp(updatedStateRow.get) === expectedTimeoutTimestamp,
+          "final timeout timestamp not as expected")
+      }
     }
   }
 

From e84e9dd54cf67369c75fc38dc60d758ee8930240 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 15 May 2017 11:24:30 -0700
Subject: [PATCH 0848/1204] [SPARK-20735][SQL][TEST] Enable cross join in
 TPCDSQueryBenchmark

## What changes were proposed in this pull request?

Since [SPARK-17298](https://issues.apache.org/jira/browse/SPARK-17298), some queries (q28, q61, q77, q88, q90) in the test suites fail with a message "_Use the CROSS JOIN syntax to allow cartesian products between these relations_".

This benchmark is used as a reference model for Spark TPC-DS, so this PR aims to enable the correct configuration in `TPCDSQueryBenchmark.scala`.

## How was this patch tested?

Manual. (Run TPCDSQueryBenchmark)

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #17977 from dongjoon-hyun/SPARK-20735.

(cherry picked from commit bbd163d589e7503c5cb150d934e7565b18a908f2)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala      | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
index 239822b72034a..a6249ce021400 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
@@ -43,6 +43,7 @@ object TPCDSQueryBenchmark {
       .set("spark.driver.memory", "3g")
       .set("spark.executor.memory", "3g")
       .set("spark.sql.autoBroadcastJoinThreshold", (20 * 1024 * 1024).toString)
+      .set("spark.sql.crossJoin.enabled", "true")
 
   val spark = SparkSession.builder.config(conf).getOrCreate()
 

From 14b6a9d340a75e12ae49b3e8e885997aaffff79c Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 15 May 2017 11:24:30 -0700
Subject: [PATCH 0849/1204] [SPARK-20735][SQL][TEST] Enable cross join in
 TPCDSQueryBenchmark

## What changes were proposed in this pull request?

Since [SPARK-17298](https://issues.apache.org/jira/browse/SPARK-17298), some queries (q28, q61, q77, q88, q90) in the test suites fail with a message "_Use the CROSS JOIN syntax to allow cartesian products between these relations_".

This benchmark is used as a reference model for Spark TPC-DS, so this PR aims to enable the correct configuration in `TPCDSQueryBenchmark.scala`.

## How was this patch tested?

Manual. (Run TPCDSQueryBenchmark)

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #17977 from dongjoon-hyun/SPARK-20735.

(cherry picked from commit bbd163d589e7503c5cb150d934e7565b18a908f2)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala      | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
index 3988d9750b585..a23c87514d3a5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
@@ -43,6 +43,7 @@ object TPCDSQueryBenchmark {
       .set("spark.driver.memory", "3g")
       .set("spark.executor.memory", "3g")
       .set("spark.sql.autoBroadcastJoinThreshold", (20 * 1024 * 1024).toString)
+      .set("spark.sql.crossJoin.enabled", "true")
 
   val spark = SparkSession.builder.config(conf).getOrCreate()
 

From 10e599f69c5c1b1b17d9181b2e93b7e315759b9d Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@databricks.com>
Date: Mon, 15 May 2017 16:52:22 -0700
Subject: [PATCH 0850/1204] [SPARK-20588][SQL] Cache TimeZone instances.

## What changes were proposed in this pull request?

Because the method `TimeZone.getTimeZone(String ID)` is synchronized on the TimeZone class, concurrent call of this method will become a bottleneck.
This especially happens when casting from string value containing timezone info to timestamp value, which uses `DateTimeUtils.stringToTimestamp()` and gets TimeZone instance on the site.

This pr makes a cache of the generated TimeZone instances to avoid the synchronization.

## How was this patch tested?

Existing tests.

Author: Takuya UESHIN <ueshin@databricks.com>

Closes #17933 from ueshin/issues/SPARK-20588.

(cherry picked from commit c8c878a4166415728f6e940504766a099a2f6744)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../expressions/datetimeExpressions.scala       | 17 ++++++++++-------
 .../spark/sql/catalyst/json/JSONOptions.scala   |  2 +-
 .../sql/catalyst/optimizer/finishAnalysis.scala |  4 +---
 .../spark/sql/catalyst/util/DateTimeUtils.scala | 17 ++++++++++++++---
 .../scala/org/apache/spark/sql/Dataset.scala    |  4 ++--
 .../spark/sql/execution/QueryExecution.scala    |  3 +--
 .../datasources/PartitioningUtils.scala         |  2 +-
 .../execution/datasources/csv/CSVOptions.scala  |  2 +-
 .../execution/streaming/ProgressReporter.scala  |  5 +++--
 9 files changed, 34 insertions(+), 22 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index bb8fd5032d63d..6a76058793bb0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -43,7 +43,7 @@ trait TimeZoneAwareExpression extends Expression {
   /** Returns a copy of this expression with the specified timeZoneId. */
   def withTimeZone(timeZoneId: String): TimeZoneAwareExpression
 
-  @transient lazy val timeZone: TimeZone = TimeZone.getTimeZone(timeZoneId.get)
+  @transient lazy val timeZone: TimeZone = DateTimeUtils.getTimeZone(timeZoneId.get)
 }
 
 /**
@@ -416,7 +416,7 @@ case class WeekOfYear(child: Expression) extends UnaryExpression with ImplicitCa
   override def dataType: DataType = IntegerType
 
   @transient private lazy val c = {
-    val c = Calendar.getInstance(TimeZone.getTimeZone("UTC"))
+    val c = Calendar.getInstance(DateTimeUtils.getTimeZone("UTC"))
     c.setFirstDayOfWeek(Calendar.MONDAY)
     c.setMinimalDaysInFirstWeek(4)
     c
@@ -431,9 +431,10 @@ case class WeekOfYear(child: Expression) extends UnaryExpression with ImplicitCa
     nullSafeCodeGen(ctx, ev, time => {
       val cal = classOf[Calendar].getName
       val c = ctx.freshName("cal")
+      val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
       ctx.addMutableState(cal, c,
         s"""
-          $c = $cal.getInstance(java.util.TimeZone.getTimeZone("UTC"));
+          $c = $cal.getInstance($dtu.getTimeZone("UTC"));
           $c.setFirstDayOfWeek($cal.MONDAY);
           $c.setMinimalDaysInFirstWeek(4);
          """)
@@ -954,8 +955,9 @@ case class FromUTCTimestamp(left: Expression, right: Expression)
         val tzTerm = ctx.freshName("tz")
         val utcTerm = ctx.freshName("utc")
         val tzClass = classOf[TimeZone].getName
-        ctx.addMutableState(tzClass, tzTerm, s"""$tzTerm = $tzClass.getTimeZone("$tz");""")
-        ctx.addMutableState(tzClass, utcTerm, s"""$utcTerm = $tzClass.getTimeZone("UTC");""")
+        val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
+        ctx.addMutableState(tzClass, tzTerm, s"""$tzTerm = $dtu.getTimeZone("$tz");""")
+        ctx.addMutableState(tzClass, utcTerm, s"""$utcTerm = $dtu.getTimeZone("UTC");""")
         val eval = left.genCode(ctx)
         ev.copy(code = s"""
            |${eval.code}
@@ -1125,8 +1127,9 @@ case class ToUTCTimestamp(left: Expression, right: Expression)
         val tzTerm = ctx.freshName("tz")
         val utcTerm = ctx.freshName("utc")
         val tzClass = classOf[TimeZone].getName
-        ctx.addMutableState(tzClass, tzTerm, s"""$tzTerm = $tzClass.getTimeZone("$tz");""")
-        ctx.addMutableState(tzClass, utcTerm, s"""$utcTerm = $tzClass.getTimeZone("UTC");""")
+        val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
+        ctx.addMutableState(tzClass, tzTerm, s"""$tzTerm = $dtu.getTimeZone("$tz");""")
+        ctx.addMutableState(tzClass, utcTerm, s"""$utcTerm = $dtu.getTimeZone("UTC");""")
         val eval = left.genCode(ctx)
         ev.copy(code = s"""
            |${eval.code}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
index 23ba5ed4d50dc..7930515038355 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
@@ -70,7 +70,7 @@ private[sql] class JSONOptions(
   val columnNameOfCorruptRecord =
     parameters.getOrElse("columnNameOfCorruptRecord", defaultColumnNameOfCorruptRecord)
 
-  val timeZone: TimeZone = TimeZone.getTimeZone(
+  val timeZone: TimeZone = DateTimeUtils.getTimeZone(
     parameters.getOrElse(DateTimeUtils.TIMEZONE_OPTION, defaultTimeZoneId))
 
   // Uses `FastDateFormat` which can be direct replacement for `SimpleDateFormat` and thread-safe.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
index 89e1dc9e322e0..af0837e36e8ad 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-import java.util.TimeZone
-
 import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.catalog.SessionCatalog
@@ -55,7 +53,7 @@ object ComputeCurrentTime extends Rule[LogicalPlan] {
       case CurrentDate(Some(timeZoneId)) =>
         currentDates.getOrElseUpdate(timeZoneId, {
           Literal.create(
-            DateTimeUtils.millisToDays(timestamp / 1000L, TimeZone.getTimeZone(timeZoneId)),
+            DateTimeUtils.millisToDays(timestamp / 1000L, DateTimeUtils.getTimeZone(timeZoneId)),
             DateType)
         })
       case CurrentTimestamp() => currentTime
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index eb6aad5b2d2bb..fc4946df0feab 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -20,6 +20,8 @@ package org.apache.spark.sql.catalyst.util
 import java.sql.{Date, Timestamp}
 import java.text.{DateFormat, SimpleDateFormat}
 import java.util.{Calendar, Locale, TimeZone}
+import java.util.concurrent.ConcurrentHashMap
+import java.util.function.{Function => JFunction}
 import javax.xml.bind.DatatypeConverter
 
 import scala.annotation.tailrec
@@ -98,6 +100,15 @@ object DateTimeUtils {
     sdf
   }
 
+  private val computedTimeZones = new ConcurrentHashMap[String, TimeZone]
+  private val computeTimeZone = new JFunction[String, TimeZone] {
+    override def apply(timeZoneId: String): TimeZone = TimeZone.getTimeZone(timeZoneId)
+  }
+
+  def getTimeZone(timeZoneId: String): TimeZone = {
+    computedTimeZones.computeIfAbsent(timeZoneId, computeTimeZone)
+  }
+
   def newDateFormat(formatString: String, timeZone: TimeZone): DateFormat = {
     val sdf = new SimpleDateFormat(formatString, Locale.US)
     sdf.setTimeZone(timeZone)
@@ -407,7 +418,7 @@ object DateTimeUtils {
       Calendar.getInstance(timeZone)
     } else {
       Calendar.getInstance(
-        TimeZone.getTimeZone(f"GMT${tz.get.toChar}${segments(7)}%02d:${segments(8)}%02d"))
+        getTimeZone(f"GMT${tz.get.toChar}${segments(7)}%02d:${segments(8)}%02d"))
     }
     c.set(Calendar.MILLISECOND, 0)
 
@@ -1027,7 +1038,7 @@ object DateTimeUtils {
    * representation in their timezone.
    */
   def fromUTCTime(time: SQLTimestamp, timeZone: String): SQLTimestamp = {
-    convertTz(time, TimeZoneGMT, TimeZone.getTimeZone(timeZone))
+    convertTz(time, TimeZoneGMT, getTimeZone(timeZone))
   }
 
   /**
@@ -1035,7 +1046,7 @@ object DateTimeUtils {
    * string representation in their timezone.
    */
   def toUTCTime(time: SQLTimestamp, timeZone: String): SQLTimestamp = {
-    convertTz(time, TimeZone.getTimeZone(timeZone), TimeZoneGMT)
+    convertTz(time, getTimeZone(timeZone), TimeZoneGMT)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 433f6d10cbcbe..cc04ac4c45c46 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql
 
 import java.io.CharArrayWriter
 import java.sql.{Date, Timestamp}
-import java.util.TimeZone
 
 import scala.collection.JavaConverters._
 import scala.language.implicitConversions
@@ -247,7 +246,8 @@ class Dataset[T] private[sql](
     val hasMoreData = takeResult.length > numRows
     val data = takeResult.take(numRows)
 
-    lazy val timeZone = TimeZone.getTimeZone(sparkSession.sessionState.conf.sessionLocalTimeZone)
+    lazy val timeZone =
+      DateTimeUtils.getTimeZone(sparkSession.sessionState.conf.sessionLocalTimeZone)
 
     // For array values, replace Seq and Array with square brackets
     // For cells that are beyond `truncate` characters, replace it with the
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index 8e8210e334a1d..2e05e5d65923c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution
 
 import java.nio.charset.StandardCharsets
 import java.sql.{Date, Timestamp}
-import java.util.TimeZone
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
@@ -187,7 +186,7 @@ class QueryExecution(val sparkSession: SparkSession, val logical: LogicalPlan) {
         DateTimeUtils.dateToString(DateTimeUtils.fromJavaDate(d))
       case (t: Timestamp, TimestampType) =>
         DateTimeUtils.timestampToString(DateTimeUtils.fromJavaTimestamp(t),
-          TimeZone.getTimeZone(sparkSession.sessionState.conf.sessionLocalTimeZone))
+          DateTimeUtils.getTimeZone(sparkSession.sessionState.conf.sessionLocalTimeZone))
       case (bin: Array[Byte], BinaryType) => new String(bin, StandardCharsets.UTF_8)
       case (decimal: java.math.BigDecimal, DecimalType()) => formatDecimal(decimal)
       case (other, tpe) if primitiveTypes.contains(tpe) => other.toString
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index 2d70172487e17..f61c673baaa58 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -94,7 +94,7 @@ object PartitioningUtils {
       typeInference: Boolean,
       basePaths: Set[Path],
       timeZoneId: String): PartitionSpec = {
-    parsePartitions(paths, typeInference, basePaths, TimeZone.getTimeZone(timeZoneId))
+    parsePartitions(paths, typeInference, basePaths, DateTimeUtils.getTimeZone(timeZoneId))
   }
 
   private[datasources] def parsePartitions(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
index 62e4c6e4b4ea0..78c16b75ee684 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
@@ -117,7 +117,7 @@ class CSVOptions(
     name.map(CompressionCodecs.getCodecClassName)
   }
 
-  val timeZone: TimeZone = TimeZone.getTimeZone(
+  val timeZone: TimeZone = DateTimeUtils.getTimeZone(
     parameters.getOrElse(DateTimeUtils.TIMEZONE_OPTION, defaultTimeZoneId))
 
   // Uses `FastDateFormat` which can be direct replacement for `SimpleDateFormat` and thread-safe.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
index 693933f95a231..a4e4ca821374c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.execution.streaming
 
 import java.text.SimpleDateFormat
-import java.util.{Date, TimeZone, UUID}
+import java.util.{Date, UUID}
 
 import scala.collection.mutable
 import scala.collection.JavaConverters._
@@ -26,6 +26,7 @@ import scala.collection.JavaConverters._
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{DataFrame, SparkSession}
 import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, LogicalPlan}
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.streaming._
 import org.apache.spark.sql.streaming.StreamingQueryListener.QueryProgressEvent
@@ -82,7 +83,7 @@ trait ProgressReporter extends Logging {
   private var lastNoDataProgressEventTime = Long.MinValue
 
   private val timestampFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'") // ISO8601
-  timestampFormat.setTimeZone(TimeZone.getTimeZone("UTC"))
+  timestampFormat.setTimeZone(DateTimeUtils.getTimeZone("UTC"))
 
   @volatile
   protected var currentStatus: StreamingQueryStatus = {

From a869e8bfdc23b9e3796a7c4d51f91902b5a067d2 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Tue, 16 May 2017 10:08:23 +0800
Subject: [PATCH 0851/1204] [SPARK-20707][ML] ML deprecated APIs should be
 removed in major release.

## What changes were proposed in this pull request?
Before 2.2, MLlib keep to remove APIs deprecated in last feature/minor release. But from Spark 2.2, we decide to remove deprecated APIs in a major release, so we need to change corresponding annotations to tell users those will be removed in 3.0.
Meanwhile, this fixed bugs in ML documents. The original ML docs can't show deprecated annotations in ```MLWriter``` and ```MLReader``` related class, we correct it in this PR.

Before:
![image](https://cloud.githubusercontent.com/assets/1962026/25939889/f8c55f20-3666-11e7-9fa2-0605bfb3ed06.png)

After:
![image](https://cloud.githubusercontent.com/assets/1962026/25939870/e9b0d5be-3666-11e7-9765-5e04885e4b32.png)

## How was this patch tested?
Existing tests.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #17946 from yanboliang/spark-20707.

(cherry picked from commit d4022d49514cc1f8ffc5bfe243186ec3748df475)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 .../org/apache/spark/ml/tree/treeParams.scala | 60 +++++++++----------
 .../org/apache/spark/ml/util/ReadWrite.scala  |  4 +-
 python/docs/pyspark.ml.rst                    |  8 +++
 python/pyspark/ml/util.py                     | 16 +++--
 4 files changed, 51 insertions(+), 37 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
index cd1950bd76c05..3fc3ac58b7795 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
@@ -110,77 +110,77 @@ private[ml] trait DecisionTreeParams extends PredictorParams
     maxMemoryInMB -> 256, cacheNodeIds -> false, checkpointInterval -> 10)
 
   /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @deprecated This method is deprecated and will be removed in 3.0.0.
    * @group setParam
    */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  @deprecated("This method is deprecated and will be removed in 3.0.0.", "2.1.0")
   def setMaxDepth(value: Int): this.type = set(maxDepth, value)
 
   /** @group getParam */
   final def getMaxDepth: Int = $(maxDepth)
 
   /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @deprecated This method is deprecated and will be removed in 3.0.0.
    * @group setParam
    */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  @deprecated("This method is deprecated and will be removed in 3.0.0.", "2.1.0")
   def setMaxBins(value: Int): this.type = set(maxBins, value)
 
   /** @group getParam */
   final def getMaxBins: Int = $(maxBins)
 
   /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @deprecated This method is deprecated and will be removed in 3.0.0.
    * @group setParam
    */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  @deprecated("This method is deprecated and will be removed in 3.0.0.", "2.1.0")
   def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
 
   /** @group getParam */
   final def getMinInstancesPerNode: Int = $(minInstancesPerNode)
 
   /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @deprecated This method is deprecated and will be removed in 3.0.0.
    * @group setParam
    */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  @deprecated("This method is deprecated and will be removed in 3.0.0.", "2.1.0")
   def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
 
   /** @group getParam */
   final def getMinInfoGain: Double = $(minInfoGain)
 
   /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @deprecated This method is deprecated and will be removed in 3.0.0.
    * @group setParam
    */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  @deprecated("This method is deprecated and will be removed in 3.0.0.", "2.1.0")
   def setSeed(value: Long): this.type = set(seed, value)
 
   /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @deprecated This method is deprecated and will be removed in 3.0.0.
    * @group expertSetParam
    */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  @deprecated("This method is deprecated and will be removed in 3.0.0.", "2.1.0")
   def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
 
   /** @group expertGetParam */
   final def getMaxMemoryInMB: Int = $(maxMemoryInMB)
 
   /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @deprecated This method is deprecated and will be removed in 3.0.0.
    * @group expertSetParam
    */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  @deprecated("This method is deprecated and will be removed in 3.0.0.", "2.1.0")
   def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
 
   /** @group expertGetParam */
   final def getCacheNodeIds: Boolean = $(cacheNodeIds)
 
   /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @deprecated This method is deprecated and will be removed in 3.0.0.
    * @group setParam
    */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  @deprecated("This method is deprecated and will be removed in 3.0.0.", "2.1.0")
   def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value)
 
   /** (private[ml]) Create a Strategy instance to use with the old API. */
@@ -226,10 +226,10 @@ private[ml] trait TreeClassifierParams extends Params {
   setDefault(impurity -> "gini")
 
   /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @deprecated This method is deprecated and will be removed in 3.0.0.
    * @group setParam
    */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  @deprecated("This method is deprecated and will be removed in 3.0.0.", "2.1.0")
   def setImpurity(value: String): this.type = set(impurity, value)
 
   /** @group getParam */
@@ -277,10 +277,10 @@ private[ml] trait TreeRegressorParams extends Params {
   setDefault(impurity -> "variance")
 
   /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @deprecated This method is deprecated and will be removed in 3.0.0.
    * @group setParam
    */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  @deprecated("This method is deprecated and will be removed in 3.0.0.", "2.1.0")
   def setImpurity(value: String): this.type = set(impurity, value)
 
   /** @group getParam */
@@ -339,10 +339,10 @@ private[ml] trait TreeEnsembleParams extends DecisionTreeParams {
   setDefault(subsamplingRate -> 1.0)
 
   /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @deprecated This method is deprecated and will be removed in 3.0.0.
    * @group setParam
    */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  @deprecated("This method is deprecated and will be removed in 3.0.0.", "2.1.0")
   def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value)
 
   /** @group getParam */
@@ -383,10 +383,10 @@ private[ml] trait RandomForestParams extends TreeEnsembleParams {
   setDefault(numTrees -> 20)
 
   /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @deprecated This method is deprecated and will be removed in 3.0.0.
    * @group setParam
    */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  @deprecated("This method is deprecated and will be removed in 3.0.0.", "2.1.0")
   def setNumTrees(value: Int): this.type = set(numTrees, value)
 
   /** @group getParam */
@@ -431,10 +431,10 @@ private[ml] trait RandomForestParams extends TreeEnsembleParams {
   setDefault(featureSubsetStrategy -> "auto")
 
   /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @deprecated This method is deprecated and will be removed in 3.0.0.
    * @group setParam
    */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  @deprecated("This method is deprecated and will be removed in 3.0.0.", "2.1.0")
   def setFeatureSubsetStrategy(value: String): this.type = set(featureSubsetStrategy, value)
 
   /** @group getParam */
@@ -472,10 +472,10 @@ private[ml] trait GBTParams extends TreeEnsembleParams with HasMaxIter {
   // validationTol -> 1e-5
 
   /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @deprecated This method is deprecated and will be removed in 3.0.0.
    * @group setParam
    */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  @deprecated("This method is deprecated and will be removed in 3.0.0.", "2.1.0")
   def setMaxIter(value: Int): this.type = set(maxIter, value)
 
   /**
@@ -492,10 +492,10 @@ private[ml] trait GBTParams extends TreeEnsembleParams with HasMaxIter {
   final def getStepSize: Double = $(stepSize)
 
   /**
-   * @deprecated This method is deprecated and will be removed in 2.2.0.
+   * @deprecated This method is deprecated and will be removed in 3.0.0.
    * @group setParam
    */
-  @deprecated("This method is deprecated and will be removed in 2.2.0.", "2.1.0")
+  @deprecated("This method is deprecated and will be removed in 3.0.0.", "2.1.0")
   def setStepSize(value: Double): this.type = set(stepSize, value)
 
   setDefault(maxIter -> 20, stepSize -> 0.1)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
index a8b80031faf86..b54e258cff2f8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
@@ -44,9 +44,11 @@ private[util] sealed trait BaseReadWrite {
 
   /**
    * Sets the Spark SQLContext to use for saving/loading.
+   *
+   * @deprecated Use session instead. This method will be removed in 3.0.0.
    */
   @Since("1.6.0")
-  @deprecated("Use session instead, This method will be removed in 2.2.0.", "2.0.0")
+  @deprecated("Use session instead. This method will be removed in 3.0.0.", "2.0.0")
   def context(sqlContext: SQLContext): this.type = {
     optionSparkSession = Option(sqlContext.sparkSession)
     this
diff --git a/python/docs/pyspark.ml.rst b/python/docs/pyspark.ml.rst
index 930646de9cd86..01627ba92b633 100644
--- a/python/docs/pyspark.ml.rst
+++ b/python/docs/pyspark.ml.rst
@@ -96,3 +96,11 @@ pyspark.ml.fpm module
     :members:
     :undoc-members:
     :inherited-members:
+
+pyspark.ml.util module
+----------------------------
+
+.. automodule:: pyspark.ml.util
+    :members:
+    :undoc-members:
+    :inherited-members:
diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py
index 02016f172aebc..7863edda7e7ab 100644
--- a/python/pyspark/ml/util.py
+++ b/python/pyspark/ml/util.py
@@ -79,7 +79,8 @@ def overwrite(self):
     def context(self, sqlContext):
         """
         Sets the SQL context to use for saving.
-        .. note:: Deprecated in 2.1 and will be removed in 2.2, use session instead.
+
+        .. note:: Deprecated in 2.1 and will be removed in 3.0, use session instead.
         """
         raise NotImplementedError("MLWriter is not yet implemented for type: %s" % type(self))
 
@@ -113,9 +114,10 @@ def overwrite(self):
     def context(self, sqlContext):
         """
         Sets the SQL context to use for saving.
-        .. note:: Deprecated in 2.1 and will be removed in 2.2, use session instead.
+
+        .. note:: Deprecated in 2.1 and will be removed in 3.0, use session instead.
         """
-        warnings.warn("Deprecated in 2.1 and will be removed in 2.2, use session instead.")
+        warnings.warn("Deprecated in 2.1 and will be removed in 3.0, use session instead.")
         self._jwrite.context(sqlContext._ssql_ctx)
         return self
 
@@ -168,7 +170,8 @@ def load(self, path):
     def context(self, sqlContext):
         """
         Sets the SQL context to use for loading.
-        .. note:: Deprecated in 2.1 and will be removed in 2.2, use session instead.
+
+        .. note:: Deprecated in 2.1 and will be removed in 3.0, use session instead.
         """
         raise NotImplementedError("MLReader is not yet implemented for type: %s" % type(self))
 
@@ -200,9 +203,10 @@ def load(self, path):
     def context(self, sqlContext):
         """
         Sets the SQL context to use for loading.
-        .. note:: Deprecated in 2.1 and will be removed in 2.2, use session instead.
+
+        .. note:: Deprecated in 2.1 and will be removed in 3.0, use session instead.
         """
-        warnings.warn("Deprecated in 2.1 and will be removed in 2.2, use session instead.")
+        warnings.warn("Deprecated in 2.1 and will be removed in 3.0, use session instead.")
         self._jread.context(sqlContext._ssql_ctx)
         return self
 

From 57c87cf2da8063f2757389bd37f2847d397e16ee Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Mon, 15 May 2017 21:21:54 -0700
Subject: [PATCH 0852/1204] [SPARK-20501][ML] ML 2.2 QA: New Scala APIs, docs

## What changes were proposed in this pull request?
Review new Scala APIs introduced in 2.2.

## How was this patch tested?
Existing tests.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #17934 from yanboliang/spark-20501.

(cherry picked from commit dbe81633a766c4dc68a0a27063e5dfde0f5690af)
Signed-off-by: Joseph K. Bradley <joseph@databricks.com>
---
 .../apache/spark/ml/classification/LinearSVC.scala |  5 +++--
 .../ml/classification/LogisticRegression.scala     |  8 ++++++--
 .../org/apache/spark/ml/feature/Imputer.scala      | 14 ++++++++------
 .../scala/org/apache/spark/ml/fpm/FPGrowth.scala   |  2 +-
 .../org/apache/spark/ml/stat/Correlation.scala     |  2 +-
 python/pyspark/ml/classification.py                |  1 +
 6 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
index 7507c7539d4ef..9900fbc9edda7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
@@ -51,6 +51,7 @@ private[classification] trait LinearSVCParams extends ClassifierParams with HasR
  *   Linear SVM Classifier</a>
  *
  * This binary classifier optimizes the Hinge Loss using the OWLQN optimizer.
+ * Only supports L2 regularization currently.
  *
  */
 @Since("2.2.0")
@@ -148,7 +149,7 @@ class LinearSVC @Since("2.2.0") (
   @Since("2.2.0")
   override def copy(extra: ParamMap): LinearSVC = defaultCopy(extra)
 
-  override protected[classification] def train(dataset: Dataset[_]): LinearSVCModel = {
+  override protected def train(dataset: Dataset[_]): LinearSVCModel = {
     val w = if (!isDefined(weightCol) || $(weightCol).isEmpty) lit(1.0) else col($(weightCol))
     val instances: RDD[Instance] =
       dataset.select(col($(labelCol)), w, col($(featuresCol))).rdd.map {
@@ -264,7 +265,7 @@ object LinearSVC extends DefaultParamsReadable[LinearSVC] {
 
 /**
  * :: Experimental ::
- * SVM Model trained by [[LinearSVC]]
+ * Linear SVM Model trained by [[LinearSVC]]
  */
 @Since("2.2.0")
 @Experimental
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 053487242edd8..567af0488e1b4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -267,8 +267,12 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
 }
 
 /**
- * Logistic regression. Supports multinomial logistic (softmax) regression and binomial logistic
- * regression.
+ * Logistic regression. Supports:
+ *  - Multinomial logistic (softmax) regression.
+ *  - Binomial logistic regression.
+ *
+ * This class supports fitting traditional logistic regression model by LBFGS/OWLQN and
+ * bound (box) constrained logistic regression model by LBFGSB.
  */
 @Since("1.2.0")
 class LogisticRegression @Since("1.2.0") (
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala
index a41bd8e689d56..9e023b9dd469b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala
@@ -102,7 +102,8 @@ private[feature] trait ImputerParams extends Params with HasInputCols {
  * computing median, DataFrameStatFunctions.approxQuantile is used with a relative error of 0.001.
  */
 @Experimental
-class Imputer @Since("2.2.0")(override val uid: String)
+@Since("2.2.0")
+class Imputer @Since("2.2.0") (@Since("2.2.0") override val uid: String)
   extends Estimator[ImputerModel] with ImputerParams with DefaultParamsWritable {
 
   @Since("2.2.0")
@@ -165,8 +166,8 @@ class Imputer @Since("2.2.0")(override val uid: String)
 object Imputer extends DefaultParamsReadable[Imputer] {
 
   /** strategy names that Imputer currently supports. */
-  private[ml] val mean = "mean"
-  private[ml] val median = "median"
+  private[feature] val mean = "mean"
+  private[feature] val median = "median"
 
   @Since("2.2.0")
   override def load(path: String): Imputer = super.load(path)
@@ -180,9 +181,10 @@ object Imputer extends DefaultParamsReadable[Imputer] {
  *                    which are used to replace the missing values in the input DataFrame.
  */
 @Experimental
-class ImputerModel private[ml](
-    override val uid: String,
-    val surrogateDF: DataFrame)
+@Since("2.2.0")
+class ImputerModel private[ml] (
+    @Since("2.2.0") override val uid: String,
+    @Since("2.2.0") val surrogateDF: DataFrame)
   extends Model[ImputerModel] with ImputerParams with MLWritable {
 
   import ImputerModel._
diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
index 8f00daa59f1a5..ad6045a50dee8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
@@ -200,7 +200,7 @@ object FPGrowth extends DefaultParamsReadable[FPGrowth] {
 @Experimental
 class FPGrowthModel private[ml] (
     @Since("2.2.0") override val uid: String,
-    @transient val freqItemsets: DataFrame)
+    @Since("2.2.0") @transient val freqItemsets: DataFrame)
   extends Model[FPGrowthModel] with FPGrowthParams with MLWritable {
 
   /** @group setParam */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/Correlation.scala b/mllib/src/main/scala/org/apache/spark/ml/stat/Correlation.scala
index e185bc8a6faaa..6e885d7c8aec5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/stat/Correlation.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/stat/Correlation.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.{DataFrame, Dataset, Row}
 import org.apache.spark.sql.types.{StructField, StructType}
 
 /**
- * API for correlation functions in MLlib, compatible with Dataframes and Datasets.
+ * API for correlation functions in MLlib, compatible with DataFrames and Datasets.
  *
  * The functions in this package generalize the functions in [[org.apache.spark.sql.Dataset#stat]]
  * to spark.ml's Vector types.
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index dcc12d93e979f..60bdeedd6a144 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -70,6 +70,7 @@ class LinearSVC(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, Ha
     `Linear SVM Classifier <https://en.wikipedia.org/wiki/Support_vector_machine#Linear_SVM>`_
 
     This binary classifier optimizes the Hinge Loss using the OWLQN optimizer.
+    Only supports L2 regularization currently.
 
     >>> from pyspark.sql import Row
     >>> from pyspark.ml.linalg import Vectors

From b8d37ac37bcd1ecf8b5f17233bce6b5b39ed2fd0 Mon Sep 17 00:00:00 2001
From: Nick Pentreath <nickp@za.ibm.com>
Date: Tue, 16 May 2017 10:54:42 +0200
Subject: [PATCH 0853/1204] [SPARK-20553][ML][PYSPARK] Update ALS examples with
 recommend-all methods

Update ALS examples illustrating use of "recommendForAllX" methods.

## How was this patch tested?
Built and ran examples locally

Author: Nick Pentreath <nickp@za.ibm.com>

Closes #17950 from MLnick/SPARK-20553-update-als-examples.

(cherry picked from commit 6af7b43b34942c662122e3905b0724b2dd40a63f)
Signed-off-by: Nick Pentreath <nickp@za.ibm.com>
---
 .../java/org/apache/spark/examples/ml/JavaALSExample.java | 8 ++++++++
 examples/src/main/python/ml/als_example.py                | 8 ++++++++
 .../scala/org/apache/spark/examples/ml/ALSExample.scala   | 7 +++++++
 3 files changed, 23 insertions(+)

diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java
index 81970b7c81f40..60ef03d89d17b 100644
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java
@@ -113,7 +113,15 @@ public static void main(String[] args) {
       .setPredictionCol("prediction");
     Double rmse = evaluator.evaluate(predictions);
     System.out.println("Root-mean-square error = " + rmse);
+
+    // Generate top 10 movie recommendations for each user
+    Dataset<Row> userRecs = model.recommendForAllUsers(10);
+    // Generate top 10 user recommendations for each movie
+    Dataset<Row> movieRecs = model.recommendForAllItems(10);
     // $example off$
+    userRecs.show();
+    movieRecs.show();
+    
     spark.stop();
   }
 }
diff --git a/examples/src/main/python/ml/als_example.py b/examples/src/main/python/ml/als_example.py
index 2e7214ed56f98..1672d552eb1d5 100644
--- a/examples/src/main/python/ml/als_example.py
+++ b/examples/src/main/python/ml/als_example.py
@@ -55,5 +55,13 @@
                                     predictionCol="prediction")
     rmse = evaluator.evaluate(predictions)
     print("Root-mean-square error = " + str(rmse))
+
+    # Generate top 10 movie recommendations for each user
+    userRecs = model.recommendForAllUsers(10)
+    # Generate top 10 user recommendations for each movie
+    movieRecs = model.recommendForAllItems(10)
     # $example off$
+    userRecs.show()
+    movieRecs.show()
+
     spark.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala
index 868f49b16f218..07b15dfa178f7 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala
@@ -75,7 +75,14 @@ object ALSExample {
       .setPredictionCol("prediction")
     val rmse = evaluator.evaluate(predictions)
     println(s"Root-mean-square error = $rmse")
+
+    // Generate top 10 movie recommendations for each user
+    val userRecs = model.recommendForAllUsers(10)
+    // Generate top 10 user recommendations for each movie
+    val movieRecs = model.recommendForAllItems(10)
     // $example off$
+    userRecs.show()
+    movieRecs.show()
 
     spark.stop()
   }

From ee0d2af950ea82f539fb08e66d7cf14045912ebe Mon Sep 17 00:00:00 2001
From: Nick Pentreath <nickp@za.ibm.com>
Date: Tue, 16 May 2017 10:59:34 +0200
Subject: [PATCH 0854/1204] [SPARK-20677][MLLIB][ML] Follow-up to ALS
 recommend-all performance PRs

Small clean ups from #17742 and #17845.

## How was this patch tested?

Existing unit tests.

Author: Nick Pentreath <nickp@za.ibm.com>

Closes #17919 from MLnick/SPARK-20677-als-perf-followup.

(cherry picked from commit 25b4f41d239ac67402566c0254a893e2e58ae7d8)
Signed-off-by: Nick Pentreath <nickp@za.ibm.com>
---
 .../org/apache/spark/ml/linalg/BLAS.scala     |  2 +-
 .../apache/spark/ml/recommendation/ALS.scala  | 26 +++-------
 .../org/apache/spark/mllib/linalg/BLAS.scala  |  2 +-
 .../MatrixFactorizationModel.scala            | 51 +++++++------------
 4 files changed, 28 insertions(+), 53 deletions(-)

diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala
index ef3890962494d..2a0f8c11d0a50 100644
--- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala
@@ -29,7 +29,7 @@ private[spark] object BLAS extends Serializable {
   @transient private var _nativeBLAS: NetlibBLAS = _
 
   // For level-1 routines, we use Java implementation.
-  private def f2jBLAS: NetlibBLAS = {
+  private[ml] def f2jBLAS: NetlibBLAS = {
     if (_f2jBLAS == null) {
       _f2jBLAS = new F2jBLAS
     }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index 4a130e1089a87..b2e3dbaf1ee6f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -35,6 +35,7 @@ import org.apache.spark.{Dependency, Partitioner, ShuffleDependency, SparkContex
 import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.internal.Logging
 import org.apache.spark.ml.{Estimator, Model}
+import org.apache.spark.ml.linalg.BLAS
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.util._
@@ -363,7 +364,7 @@ class ALSModel private[ml] (
    * relatively efficient, the approach implemented here is significantly more efficient.
    *
    * This approach groups factors into blocks and computes the top-k elements per block,
-   * using a simple dot product (instead of gemm) and an efficient [[BoundedPriorityQueue]].
+   * using dot product and an efficient [[BoundedPriorityQueue]] (instead of gemm).
    * It then computes the global top-k by aggregating the per block top-k elements with
    * a [[TopByKeyAggregator]]. This significantly reduces the size of intermediate and shuffle data.
    * This is the DataFrame equivalent to the approach used in
@@ -393,31 +394,18 @@ class ALSModel private[ml] (
         val m = srcIter.size
         val n = math.min(dstIter.size, num)
         val output = new Array[(Int, Int, Float)](m * n)
-        var j = 0
+        var i = 0
         val pq = new BoundedPriorityQueue[(Int, Float)](num)(Ordering.by(_._2))
         srcIter.foreach { case (srcId, srcFactor) =>
           dstIter.foreach { case (dstId, dstFactor) =>
-            /*
-             * The below code is equivalent to
-             *    `val score = blas.sdot(rank, srcFactor, 1, dstFactor, 1)`
-             * This handwritten version is as or more efficient as BLAS calls in this case.
-             */
-            var score = 0.0f
-            var k = 0
-            while (k < rank) {
-              score += srcFactor(k) * dstFactor(k)
-              k += 1
-            }
+            // We use F2jBLAS which is faster than a call to native BLAS for vector dot product
+            val score = BLAS.f2jBLAS.sdot(rank, srcFactor, 1, dstFactor, 1)
             pq += dstId -> score
           }
-          val pqIter = pq.iterator
-          var i = 0
-          while (i < n) {
-            val (dstId, score) = pqIter.next()
-            output(j + i) = (srcId, dstId, score)
+          pq.foreach { case (dstId, score) =>
+            output(i) = (srcId, dstId, score)
             i += 1
           }
-          j += n
           pq.clear()
         }
         output.toSeq
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
index 0cd68a633c0b5..cb97742245689 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
@@ -31,7 +31,7 @@ private[spark] object BLAS extends Serializable with Logging {
   @transient private var _nativeBLAS: NetlibBLAS = _
 
   // For level-1 routines, we use Java implementation.
-  private def f2jBLAS: NetlibBLAS = {
+  private[mllib] def f2jBLAS: NetlibBLAS = {
     if (_f2jBLAS == null) {
       _f2jBLAS = new F2jBLAS
     }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
index d45866c016d91..ac709ad72f0c0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
@@ -20,8 +20,6 @@ package org.apache.spark.mllib.recommendation
 import java.io.IOException
 import java.lang.{Integer => JavaInteger}
 
-import scala.collection.mutable
-
 import com.clearspring.analytics.stream.cardinality.HyperLogLogPlus
 import com.github.fommil.netlib.BLAS.{getInstance => blas}
 import org.apache.hadoop.fs.Path
@@ -33,7 +31,7 @@ import org.apache.spark.SparkContext
 import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.{JavaPairRDD, JavaRDD}
 import org.apache.spark.internal.Logging
-import org.apache.spark.mllib.linalg._
+import org.apache.spark.mllib.linalg.BLAS
 import org.apache.spark.mllib.rdd.MLPairRDDFunctions._
 import org.apache.spark.mllib.util.{Loader, Saveable}
 import org.apache.spark.rdd.RDD
@@ -263,6 +261,19 @@ object MatrixFactorizationModel extends Loader[MatrixFactorizationModel] {
 
   /**
    * Makes recommendations for all users (or products).
+   *
+   * Note: the previous approach used for computing top-k recommendations aimed to group
+   * individual factor vectors into blocks, so that Level 3 BLAS operations (gemm) could
+   * be used for efficiency. However, this causes excessive GC pressure due to the large
+   * arrays required for intermediate result storage, as well as a high sensitivity to the
+   * block size used.
+   *
+   * The following approach still groups factors into blocks, but instead computes the
+   * top-k elements per block, using dot product and an efficient [[BoundedPriorityQueue]]
+   * (instead of gemm). This avoids any large intermediate data structures and results
+   * in significantly reduced GC pressure as well as shuffle data, which far outweighs
+   * any cost incurred from not using Level 3 BLAS operations.
+   *
    * @param rank rank
    * @param srcFeatures src features to receive recommendations
    * @param dstFeatures dst features used to make recommendations
@@ -277,46 +288,22 @@ object MatrixFactorizationModel extends Loader[MatrixFactorizationModel] {
       num: Int): RDD[(Int, Array[(Int, Double)])] = {
     val srcBlocks = blockify(srcFeatures)
     val dstBlocks = blockify(dstFeatures)
-    /**
-     * The previous approach used for computing top-k recommendations aimed to group
-     * individual factor vectors into blocks, so that Level 3 BLAS operations (gemm) could
-     * be used for efficiency. However, this causes excessive GC pressure due to the large
-     * arrays required for intermediate result storage, as well as a high sensitivity to the
-     * block size used.
-     * The following approach still groups factors into blocks, but instead computes the
-     * top-k elements per block, using a simple dot product (instead of gemm) and an efficient
-     * [[BoundedPriorityQueue]]. This avoids any large intermediate data structures and results
-     * in significantly reduced GC pressure as well as shuffle data, which far outweighs
-     * any cost incurred from not using Level 3 BLAS operations.
-     */
     val ratings = srcBlocks.cartesian(dstBlocks).flatMap { case (srcIter, dstIter) =>
       val m = srcIter.size
       val n = math.min(dstIter.size, num)
       val output = new Array[(Int, (Int, Double))](m * n)
-      var j = 0
+      var i = 0
       val pq = new BoundedPriorityQueue[(Int, Double)](n)(Ordering.by(_._2))
       srcIter.foreach { case (srcId, srcFactor) =>
         dstIter.foreach { case (dstId, dstFactor) =>
-          /*
-           * The below code is equivalent to
-           *    `val score = blas.ddot(rank, srcFactor, 1, dstFactor, 1)`
-           * This handwritten version is as or more efficient as BLAS calls in this case.
-           */
-          var score: Double = 0
-          var k = 0
-          while (k < rank) {
-            score += srcFactor(k) * dstFactor(k)
-            k += 1
-          }
+          // We use F2jBLAS which is faster than a call to native BLAS for vector dot product
+          val score = BLAS.f2jBLAS.ddot(rank, srcFactor, 1, dstFactor, 1)
           pq += dstId -> score
         }
-        val pqIter = pq.iterator
-        var i = 0
-        while (i < n) {
-          output(j + i) = (srcId, pqIter.next())
+        pq.foreach { case (dstId, score) =>
+          output(i) = (srcId, (dstId, score))
           i += 1
         }
-        j += n
         pq.clear()
       }
       output.toSeq

From 75e5ea294c15ecfb7366ae15dce196aa92c87ca4 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Tue, 16 May 2017 10:35:51 -0700
Subject: [PATCH 0855/1204] [SPARK-20529][CORE] Allow worker and master work
 with a proxy server

## What changes were proposed in this pull request?

In the current codes, when worker connects to master, master will send its address to the worker. Then worker will save this address and use it to reconnect in case of failure. However, sometimes, this address is not correct. If there is a proxy between master and worker, the address master sent is not the address of proxy.

In this PR, the master address used by the worker will be sent to the master, then master just replies this address back, worker will use this address to reconnect in case of failure. In other words, the worker will use the config master address set in the worker side if possible rather than the master address set in the master side.

There is still one potential issue though. When a master is restarted or takes over leadership, the work will use the address sent from the master to connect. If there is still a proxy between  master and worker, the address may be wrong. However, there is no way to figure it out just in the worker.

## How was this patch tested?

The new added unit test.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #17821 from zsxwing/SPARK-20529.

(cherry picked from commit 9150bca47e4b8782e20441386d3d225eb5f2f404)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../apache/spark/deploy/DeployMessage.scala   | 27 ++++++++--
 .../apache/spark/deploy/master/Master.scala   |  5 +-
 .../apache/spark/deploy/worker/Worker.scala   | 53 ++++++++++++++++---
 .../spark/deploy/master/MasterSuite.scala     | 46 ++++++++++++++--
 4 files changed, 114 insertions(+), 17 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
index ac09c6c497f8b..fa35e45688195 100644
--- a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
@@ -24,7 +24,7 @@ import org.apache.spark.deploy.master.{ApplicationInfo, DriverInfo, WorkerInfo}
 import org.apache.spark.deploy.master.DriverState.DriverState
 import org.apache.spark.deploy.master.RecoveryState.MasterState
 import org.apache.spark.deploy.worker.{DriverRunner, ExecutorRunner}
-import org.apache.spark.rpc.RpcEndpointRef
+import org.apache.spark.rpc.{RpcAddress, RpcEndpointRef}
 import org.apache.spark.util.Utils
 
 private[deploy] sealed trait DeployMessage extends Serializable
@@ -34,6 +34,16 @@ private[deploy] object DeployMessages {
 
   // Worker to Master
 
+  /**
+   * @param id the worker id
+   * @param host the worker host
+   * @param port the worker post
+   * @param worker the worker endpoint ref
+   * @param cores the core number of worker
+   * @param memory the memory size of worker
+   * @param workerWebUiUrl the worker Web UI address
+   * @param masterAddress the master address used by the worker to connect
+   */
   case class RegisterWorker(
       id: String,
       host: String,
@@ -41,7 +51,8 @@ private[deploy] object DeployMessages {
       worker: RpcEndpointRef,
       cores: Int,
       memory: Int,
-      workerWebUiUrl: String)
+      workerWebUiUrl: String,
+      masterAddress: RpcAddress)
     extends DeployMessage {
     Utils.checkHost(host, "Required hostname")
     assert (port > 0)
@@ -80,8 +91,16 @@ private[deploy] object DeployMessages {
 
   sealed trait RegisterWorkerResponse
 
-  case class RegisteredWorker(master: RpcEndpointRef, masterWebUiUrl: String) extends DeployMessage
-    with RegisterWorkerResponse
+  /**
+   * @param master the master ref
+   * @param masterWebUiUrl the master Web UI address
+   * @param masterAddress the master address used by the worker to connect. It should be
+   *                      [[RegisterWorker.masterAddress]].
+   */
+  case class RegisteredWorker(
+      master: RpcEndpointRef,
+      masterWebUiUrl: String,
+      masterAddress: RpcAddress) extends DeployMessage with RegisterWorkerResponse
 
   case class RegisterWorkerFailed(message: String) extends DeployMessage with RegisterWorkerResponse
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index 816bf37e39fee..96b53c624232e 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -231,7 +231,8 @@ private[deploy] class Master(
       logError("Leadership has been revoked -- master shutting down.")
       System.exit(0)
 
-    case RegisterWorker(id, workerHost, workerPort, workerRef, cores, memory, workerWebUiUrl) =>
+    case RegisterWorker(
+      id, workerHost, workerPort, workerRef, cores, memory, workerWebUiUrl, masterAddress) =>
       logInfo("Registering worker %s:%d with %d cores, %s RAM".format(
         workerHost, workerPort, cores, Utils.megabytesToString(memory)))
       if (state == RecoveryState.STANDBY) {
@@ -243,7 +244,7 @@ private[deploy] class Master(
           workerRef, workerWebUiUrl)
         if (registerWorker(worker)) {
           persistenceEngine.addWorker(worker)
-          workerRef.send(RegisteredWorker(self, masterWebUiUrl))
+          workerRef.send(RegisteredWorker(self, masterWebUiUrl, masterAddress))
           schedule()
         } else {
           val workerAddress = worker.endpoint.address
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 00b9d1af373db..ca9243e39c0a9 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -99,6 +99,20 @@ private[deploy] class Worker(
 
   private val testing: Boolean = sys.props.contains("spark.testing")
   private var master: Option[RpcEndpointRef] = None
+
+  /**
+   * Whether to use the master address in `masterRpcAddresses` if possible. If it's disabled, Worker
+   * will just use the address received from Master.
+   */
+  private val preferConfiguredMasterAddress =
+    conf.getBoolean("spark.worker.preferConfiguredMasterAddress", false)
+  /**
+   * The master address to connect in case of failure. When the connection is broken, worker will
+   * use this address to connect. This is usually just one of `masterRpcAddresses`. However, when
+   * a master is restarted or takes over leadership, it will be an address sent from master, which
+   * may not be in `masterRpcAddresses`.
+   */
+  private var masterAddressToConnect: Option[RpcAddress] = None
   private var activeMasterUrl: String = ""
   private[worker] var activeMasterWebUiUrl : String = ""
   private var workerWebUiUrl: String = ""
@@ -196,10 +210,19 @@ private[deploy] class Worker(
     metricsSystem.getServletHandlers.foreach(webUi.attachHandler)
   }
 
-  private def changeMaster(masterRef: RpcEndpointRef, uiUrl: String) {
+  /**
+   * Change to use the new master.
+   *
+   * @param masterRef the new master ref
+   * @param uiUrl the new master Web UI address
+   * @param masterAddress the new master address which the worker should use to connect in case of
+   *                      failure
+   */
+  private def changeMaster(masterRef: RpcEndpointRef, uiUrl: String, masterAddress: RpcAddress) {
     // activeMasterUrl it's a valid Spark url since we receive it from master.
     activeMasterUrl = masterRef.address.toSparkURL
     activeMasterWebUiUrl = uiUrl
+    masterAddressToConnect = Some(masterAddress)
     master = Some(masterRef)
     connected = true
     if (conf.getBoolean("spark.ui.reverseProxy", false)) {
@@ -266,7 +289,8 @@ private[deploy] class Worker(
             if (registerMasterFutures != null) {
               registerMasterFutures.foreach(_.cancel(true))
             }
-            val masterAddress = masterRef.address
+            val masterAddress =
+              if (preferConfiguredMasterAddress) masterAddressToConnect.get else masterRef.address
             registerMasterFutures = Array(registerMasterThreadPool.submit(new Runnable {
               override def run(): Unit = {
                 try {
@@ -342,15 +366,27 @@ private[deploy] class Worker(
   }
 
   private def sendRegisterMessageToMaster(masterEndpoint: RpcEndpointRef): Unit = {
-    masterEndpoint.send(RegisterWorker(workerId, host, port, self, cores, memory, workerWebUiUrl))
+    masterEndpoint.send(RegisterWorker(
+      workerId,
+      host,
+      port,
+      self,
+      cores,
+      memory,
+      workerWebUiUrl,
+      masterEndpoint.address))
   }
 
   private def handleRegisterResponse(msg: RegisterWorkerResponse): Unit = synchronized {
     msg match {
-      case RegisteredWorker(masterRef, masterWebUiUrl) =>
-        logInfo("Successfully registered with master " + masterRef.address.toSparkURL)
+      case RegisteredWorker(masterRef, masterWebUiUrl, masterAddress) =>
+        if (preferConfiguredMasterAddress) {
+          logInfo("Successfully registered with master " + masterAddress.toSparkURL)
+        } else {
+          logInfo("Successfully registered with master " + masterRef.address.toSparkURL)
+        }
         registered = true
-        changeMaster(masterRef, masterWebUiUrl)
+        changeMaster(masterRef, masterWebUiUrl, masterAddress)
         forwordMessageScheduler.scheduleAtFixedRate(new Runnable {
           override def run(): Unit = Utils.tryLogNonFatalError {
             self.send(SendHeartbeat)
@@ -419,7 +455,7 @@ private[deploy] class Worker(
 
     case MasterChanged(masterRef, masterWebUiUrl) =>
       logInfo("Master has changed, new master is at " + masterRef.address.toSparkURL)
-      changeMaster(masterRef, masterWebUiUrl)
+      changeMaster(masterRef, masterWebUiUrl, masterRef.address)
 
       val execs = executors.values.
         map(e => new ExecutorDescription(e.appId, e.execId, e.cores, e.state))
@@ -561,7 +597,8 @@ private[deploy] class Worker(
   }
 
   override def onDisconnected(remoteAddress: RpcAddress): Unit = {
-    if (master.exists(_.address == remoteAddress)) {
+    if (master.exists(_.address == remoteAddress) ||
+      masterAddressToConnect.exists(_ == remoteAddress)) {
       logInfo(s"$remoteAddress Disassociated !")
       masterDisconnected()
     }
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
index 2127da48ece49..539264652d7d5 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
@@ -34,7 +34,7 @@ import other.supplier.{CustomPersistenceEngine, CustomRecoveryModeFactory}
 import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
 import org.apache.spark.deploy._
 import org.apache.spark.deploy.DeployMessages._
-import org.apache.spark.rpc.{RpcEndpoint, RpcEnv}
+import org.apache.spark.rpc.{RpcAddress, RpcEndpoint, RpcEnv}
 
 class MasterSuite extends SparkFunSuite
   with Matchers with Eventually with PrivateMethodTester with BeforeAndAfter {
@@ -447,8 +447,15 @@ class MasterSuite extends SparkFunSuite
       }
     })
 
-    master.self.send(
-      RegisterWorker("1", "localhost", 9999, fakeWorker, 10, 1024, "http://localhost:8080"))
+    master.self.send(RegisterWorker(
+      "1",
+      "localhost",
+      9999,
+      fakeWorker,
+      10,
+      1024,
+      "http://localhost:8080",
+      RpcAddress("localhost", 9999)))
     val executors = (0 until 3).map { i =>
       new ExecutorDescription(appId = i.toString, execId = i, 2, ExecutorState.RUNNING)
     }
@@ -459,4 +466,37 @@ class MasterSuite extends SparkFunSuite
       assert(killedDrivers.asScala.toList.sorted === List("0", "1", "2"))
     }
   }
+
+  test("SPARK-20529: Master should reply the address received from worker") {
+    val master = makeMaster()
+    master.rpcEnv.setupEndpoint(Master.ENDPOINT_NAME, master)
+    eventually(timeout(10.seconds)) {
+      val masterState = master.self.askSync[MasterStateResponse](RequestMasterState)
+      assert(masterState.status === RecoveryState.ALIVE, "Master is not alive")
+    }
+
+    @volatile var receivedMasterAddress: RpcAddress = null
+    val fakeWorker = master.rpcEnv.setupEndpoint("worker", new RpcEndpoint {
+      override val rpcEnv: RpcEnv = master.rpcEnv
+
+      override def receive: PartialFunction[Any, Unit] = {
+        case RegisteredWorker(_, _, masterAddress) =>
+          receivedMasterAddress = masterAddress
+      }
+    })
+
+    master.self.send(RegisterWorker(
+      "1",
+      "localhost",
+      9999,
+      fakeWorker,
+      10,
+      1024,
+      "http://localhost:8080",
+      RpcAddress("localhost2", 10000)))
+
+    eventually(timeout(10.seconds)) {
+      assert(receivedMasterAddress === RpcAddress("localhost2", 10000))
+    }
+  }
 }

From 7076ab40f86fe606cd9b813dad506e921501383e Mon Sep 17 00:00:00 2001
From: Yash Sharma <ysharma@atlassian.com>
Date: Tue, 16 May 2017 15:08:05 -0700
Subject: [PATCH 0856/1204] [SPARK-20140][DSTREAM] Remove hardcoded kinesis
 retry wait and max retries

## What changes were proposed in this pull request?

The pull requests proposes to remove the hardcoded values for Amazon Kinesis - MIN_RETRY_WAIT_TIME_MS, MAX_RETRIES.

This change is critical for kinesis checkpoint recovery when the kinesis backed rdd is huge.
Following happens in a typical kinesis recovery :
- kinesis throttles large number of requests while recovering
- retries in case of throttling are not able to recover due to the small wait period
- kinesis throttles per second, the wait period should be configurable for recovery

The patch picks the spark kinesis configs from:
- spark.streaming.kinesis.retry.wait.time
- spark.streaming.kinesis.retry.max.attempts

Jira : https://issues.apache.org/jira/browse/SPARK-20140

## How was this patch tested?

Modified the KinesisBackedBlockRDDSuite.scala to run kinesis tests with the modified configurations. Wasn't able to test the patch with actual throttling.

Author: Yash Sharma <ysharma@atlassian.com>

Closes #17467 from yssharma/ysharma/spark-kinesis-retries.

(cherry picked from commit 38f4e8692ce3b6cbcfe0c1aff9b5e662f7a308b7)
Signed-off-by: Burak Yavuz <brkyvz@gmail.com>
---
 .../kinesis/KinesisBackedBlockRDD.scala       | 33 ++++----
 .../kinesis/KinesisInputDStream.scala         |  6 +-
 .../kinesis/KinesisReadConfigurations.scala   | 78 +++++++++++++++++++
 .../kinesis/KinesisStreamSuite.scala          | 49 +++++++++++-
 4 files changed, 143 insertions(+), 23 deletions(-)
 create mode 100644 external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReadConfigurations.scala

diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDD.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDD.scala
index f31ebf1ec8da0..88b294246bb30 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDD.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDD.scala
@@ -21,7 +21,7 @@ import scala.collection.JavaConverters._
 import scala.reflect.ClassTag
 import scala.util.control.NonFatal
 
-import com.amazonaws.auth.{AWSCredentials, DefaultAWSCredentialsProviderChain}
+import com.amazonaws.auth.AWSCredentials
 import com.amazonaws.services.kinesis.AmazonKinesisClient
 import com.amazonaws.services.kinesis.clientlibrary.types.UserRecord
 import com.amazonaws.services.kinesis.model._
@@ -81,9 +81,9 @@ class KinesisBackedBlockRDD[T: ClassTag](
     @transient private val _blockIds: Array[BlockId],
     @transient val arrayOfseqNumberRanges: Array[SequenceNumberRanges],
     @transient private val isBlockIdValid: Array[Boolean] = Array.empty,
-    val retryTimeoutMs: Int = 10000,
     val messageHandler: Record => T = KinesisInputDStream.defaultMessageHandler _,
-    val kinesisCreds: SparkAWSCredentials = DefaultCredentials
+    val kinesisCreds: SparkAWSCredentials = DefaultCredentials,
+    val kinesisReadConfigs: KinesisReadConfigurations = KinesisReadConfigurations()
   ) extends BlockRDD[T](sc, _blockIds) {
 
   require(_blockIds.length == arrayOfseqNumberRanges.length,
@@ -112,7 +112,7 @@ class KinesisBackedBlockRDD[T: ClassTag](
       val credentials = kinesisCreds.provider.getCredentials
       partition.seqNumberRanges.ranges.iterator.flatMap { range =>
         new KinesisSequenceRangeIterator(credentials, endpointUrl, regionName,
-          range, retryTimeoutMs).map(messageHandler)
+          range, kinesisReadConfigs).map(messageHandler)
       }
     }
     if (partition.isBlockIdValid) {
@@ -135,7 +135,7 @@ class KinesisSequenceRangeIterator(
     endpointUrl: String,
     regionId: String,
     range: SequenceNumberRange,
-    retryTimeoutMs: Int) extends NextIterator[Record] with Logging {
+    kinesisReadConfigs: KinesisReadConfigurations) extends NextIterator[Record] with Logging {
 
   private val client = new AmazonKinesisClient(credentials)
   private val streamName = range.streamName
@@ -251,21 +251,19 @@ class KinesisSequenceRangeIterator(
 
   /** Helper method to retry Kinesis API request with exponential backoff and timeouts */
   private def retryOrTimeout[T](message: String)(body: => T): T = {
-    import KinesisSequenceRangeIterator._
-
-    var startTimeMs = System.currentTimeMillis()
+    val startTimeMs = System.currentTimeMillis()
     var retryCount = 0
-    var waitTimeMs = MIN_RETRY_WAIT_TIME_MS
     var result: Option[T] = None
     var lastError: Throwable = null
+    var waitTimeInterval = kinesisReadConfigs.retryWaitTimeMs
 
-    def isTimedOut = (System.currentTimeMillis() - startTimeMs) >= retryTimeoutMs
-    def isMaxRetryDone = retryCount >= MAX_RETRIES
+    def isTimedOut = (System.currentTimeMillis() - startTimeMs) >= kinesisReadConfigs.retryTimeoutMs
+    def isMaxRetryDone = retryCount >= kinesisReadConfigs.maxRetries
 
     while (result.isEmpty && !isTimedOut && !isMaxRetryDone) {
       if (retryCount > 0) {  // wait only if this is a retry
-        Thread.sleep(waitTimeMs)
-        waitTimeMs *= 2  // if you have waited, then double wait time for next round
+        Thread.sleep(waitTimeInterval)
+        waitTimeInterval *= 2  // if you have waited, then double wait time for next round
       }
       try {
         result = Some(body)
@@ -284,7 +282,8 @@ class KinesisSequenceRangeIterator(
     result.getOrElse {
       if (isTimedOut) {
         throw new SparkException(
-          s"Timed out after $retryTimeoutMs ms while $message, last exception: ", lastError)
+          s"Timed out after ${kinesisReadConfigs.retryTimeoutMs} ms while " +
+          s"$message, last exception: ", lastError)
       } else {
         throw new SparkException(
           s"Gave up after $retryCount retries while $message, last exception: ", lastError)
@@ -292,9 +291,3 @@ class KinesisSequenceRangeIterator(
     }
   }
 }
-
-private[streaming]
-object KinesisSequenceRangeIterator {
-  val MAX_RETRIES = 3
-  val MIN_RETRY_WAIT_TIME_MS = 100
-}
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala
index 77553412eda56..decfb6b3ebd31 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala
@@ -21,6 +21,7 @@ import scala.reflect.ClassTag
 
 import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
 import com.amazonaws.services.kinesis.model.Record
+import KinesisReadConfigurations._
 
 import org.apache.spark.annotation.InterfaceStability
 import org.apache.spark.rdd.RDD
@@ -60,12 +61,13 @@ private[kinesis] class KinesisInputDStream[T: ClassTag](
       val isBlockIdValid = blockInfos.map { _.isBlockIdValid() }.toArray
       logDebug(s"Creating KinesisBackedBlockRDD for $time with ${seqNumRanges.length} " +
           s"seq number ranges: ${seqNumRanges.mkString(", ")} ")
+
       new KinesisBackedBlockRDD(
         context.sc, regionName, endpointUrl, blockIds, seqNumRanges,
         isBlockIdValid = isBlockIdValid,
-        retryTimeoutMs = ssc.graph.batchDuration.milliseconds.toInt,
         messageHandler = messageHandler,
-        kinesisCreds = kinesisCreds)
+        kinesisCreds = kinesisCreds,
+        kinesisReadConfigs = KinesisReadConfigurations(ssc))
     } else {
       logWarning("Kinesis sequence number information was not present with some block metadata," +
         " it may not be possible to recover from failures")
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReadConfigurations.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReadConfigurations.scala
new file mode 100644
index 0000000000000..871071e4677e3
--- /dev/null
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReadConfigurations.scala
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.kinesis
+
+import org.apache.spark.network.util.JavaUtils
+import org.apache.spark.streaming.StreamingContext
+
+/**
+ * Configurations to pass to the [[KinesisBackedBlockRDD]].
+ *
+ * @param maxRetries: The maximum number of attempts to be made to Kinesis. Defaults to 3.
+ * @param retryWaitTimeMs: The interval between consequent Kinesis retries.
+ *                         Defaults to 100ms.
+ * @param retryTimeoutMs: The timeout in milliseconds for a Kinesis request.
+ *                         Defaults to batch duration provided for streaming,
+ *                         else uses 10000 if invoked directly.
+ */
+private[kinesis] case class KinesisReadConfigurations(
+    maxRetries: Int,
+    retryWaitTimeMs: Long,
+    retryTimeoutMs: Long)
+
+private[kinesis] object KinesisReadConfigurations {
+  def apply(): KinesisReadConfigurations = {
+    KinesisReadConfigurations(maxRetries = DEFAULT_MAX_RETRIES,
+      retryWaitTimeMs = JavaUtils.timeStringAsMs(DEFAULT_RETRY_WAIT_TIME),
+      retryTimeoutMs = DEFAULT_RETRY_TIMEOUT)
+  }
+
+  def apply(ssc: StreamingContext): KinesisReadConfigurations = {
+    KinesisReadConfigurations(
+      maxRetries = ssc.sc.getConf.getInt(RETRY_MAX_ATTEMPTS_KEY, DEFAULT_MAX_RETRIES),
+      retryWaitTimeMs = JavaUtils.timeStringAsMs(
+        ssc.sc.getConf.get(RETRY_WAIT_TIME_KEY, DEFAULT_RETRY_WAIT_TIME)),
+      retryTimeoutMs = ssc.graph.batchDuration.milliseconds)
+  }
+
+  /**
+   * SparkConf key for configuring the maximum number of retries used when attempting a Kinesis
+   * request.
+   */
+  val RETRY_MAX_ATTEMPTS_KEY = "spark.streaming.kinesis.retry.maxAttempts"
+
+  /**
+   * SparkConf key for configuring the wait time to use before retrying a Kinesis attempt.
+   */
+  val RETRY_WAIT_TIME_KEY = "spark.streaming.kinesis.retry.waitTime"
+
+  /**
+   * Default value for the RETRY_MAX_ATTEMPTS_KEY
+   */
+  val DEFAULT_MAX_RETRIES = 3
+
+  /**
+   * Default value for the RETRY_WAIT_TIME_KEY
+   */
+  val DEFAULT_RETRY_WAIT_TIME = "100ms"
+
+  /**
+   * Default value for the retry timeout
+   */
+  val DEFAULT_RETRY_TIMEOUT = 10000
+}
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisStreamSuite.scala b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisStreamSuite.scala
index 341a6898cbbff..7e5bda923f63e 100644
--- a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisStreamSuite.scala
+++ b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisStreamSuite.scala
@@ -34,6 +34,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.{StorageLevel, StreamBlockId}
 import org.apache.spark.streaming._
 import org.apache.spark.streaming.dstream.ReceiverInputDStream
+import org.apache.spark.streaming.kinesis.KinesisReadConfigurations._
 import org.apache.spark.streaming.kinesis.KinesisTestUtils._
 import org.apache.spark.streaming.receiver.BlockManagerBasedStoreResult
 import org.apache.spark.streaming.scheduler.ReceivedBlockInfo
@@ -136,7 +137,7 @@ abstract class KinesisStreamTests(aggregateTestData: Boolean) extends KinesisFun
     val kinesisRDD = nonEmptyRDD.asInstanceOf[KinesisBackedBlockRDD[_]]
     assert(kinesisRDD.regionName === dummyRegionName)
     assert(kinesisRDD.endpointUrl === dummyEndpointUrl)
-    assert(kinesisRDD.retryTimeoutMs === batchDuration.milliseconds)
+    assert(kinesisRDD.kinesisReadConfigs.retryTimeoutMs === batchDuration.milliseconds)
     assert(kinesisRDD.kinesisCreds === BasicCredentials(
       awsAccessKeyId = dummyAWSAccessKey,
       awsSecretKey = dummyAWSSecretKey))
@@ -234,6 +235,52 @@ abstract class KinesisStreamTests(aggregateTestData: Boolean) extends KinesisFun
     ssc.stop(stopSparkContext = false)
   }
 
+  test("Kinesis read with custom configurations") {
+    try {
+      ssc.sc.conf.set(RETRY_WAIT_TIME_KEY, "2000ms")
+      ssc.sc.conf.set(RETRY_MAX_ATTEMPTS_KEY, "5")
+
+      val kinesisStream = KinesisInputDStream.builder.streamingContext(ssc)
+      .checkpointAppName(appName)
+      .streamName("dummyStream")
+      .endpointUrl(dummyEndpointUrl)
+      .regionName(dummyRegionName)
+      .initialPositionInStream(InitialPositionInStream.LATEST)
+      .checkpointInterval(Seconds(10))
+      .storageLevel(StorageLevel.MEMORY_ONLY)
+      .build()
+      .asInstanceOf[KinesisInputDStream[Array[Byte]]]
+
+      val time = Time(1000)
+      // Generate block info data for testing
+      val seqNumRanges1 = SequenceNumberRanges(
+        SequenceNumberRange("fakeStream", "fakeShardId", "xxx", "yyy", 67))
+      val blockId1 = StreamBlockId(kinesisStream.id, 123)
+      val blockInfo1 = ReceivedBlockInfo(
+        0, None, Some(seqNumRanges1), new BlockManagerBasedStoreResult(blockId1, None))
+
+      val seqNumRanges2 = SequenceNumberRanges(
+        SequenceNumberRange("fakeStream", "fakeShardId", "aaa", "bbb", 89))
+      val blockId2 = StreamBlockId(kinesisStream.id, 345)
+      val blockInfo2 = ReceivedBlockInfo(
+        0, None, Some(seqNumRanges2), new BlockManagerBasedStoreResult(blockId2, None))
+
+      // Verify that the generated KinesisBackedBlockRDD has the all the right information
+      val blockInfos = Seq(blockInfo1, blockInfo2)
+
+      val kinesisRDD =
+        kinesisStream.createBlockRDD(time, blockInfos).asInstanceOf[KinesisBackedBlockRDD[_]]
+
+      assert(kinesisRDD.kinesisReadConfigs.retryWaitTimeMs === 2000)
+      assert(kinesisRDD.kinesisReadConfigs.maxRetries === 5)
+      assert(kinesisRDD.kinesisReadConfigs.retryTimeoutMs === batchDuration.milliseconds)
+    } finally {
+      ssc.sc.conf.remove(RETRY_WAIT_TIME_KEY)
+      ssc.sc.conf.remove(RETRY_MAX_ATTEMPTS_KEY)
+      ssc.stop(stopSparkContext = false)
+    }
+  }
+
   testIfEnabled("split and merge shards in a stream") {
     // Since this test tries to split and merge shards in a stream, we create another
     // temporary stream and then remove it when finished.

From d42c67a1f9724c68b15b7ffafa0c7256b7d86fb2 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Wed, 17 May 2017 13:04:21 +0800
Subject: [PATCH 0857/1204] [SPARK-20776] Fix perf. problems in
 JobProgressListener caused by TaskMetrics construction

## What changes were proposed in this pull request?

In

```
./bin/spark-shell --master=local[64]
```

I ran

```
sc.parallelize(1 to 100000, 100000).count()
```
and profiled the time spend in the LiveListenerBus event processing thread. I discovered that the majority of the time was being spent in `TaskMetrics.empty` calls in `JobProgressListener.onTaskStart`. It turns out that we can slightly refactor to remove the need to construct one empty instance per call, greatly improving the performance of this code.

The performance gains here help to avoid an issue where listener events would be dropped because the JobProgressListener couldn't keep up with the throughput.

**Before:**

![image](https://cloud.githubusercontent.com/assets/50748/26133095/95bcd42a-3a59-11e7-8051-a50550e447b8.png)

**After:**

![image](https://cloud.githubusercontent.com/assets/50748/26133070/7935e148-3a59-11e7-8c2d-73d5aa5a2397.png)

## How was this patch tested?

Benchmarks described above.

Author: Josh Rosen <joshrosen@databricks.com>

Closes #18008 from JoshRosen/nametoaccums-improvements.

(cherry picked from commit 30e0557dbc134898ee65fe67d31054dcc8728576)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/ui/jobs/JobProgressListener.scala   |  5 +-
 .../org/apache/spark/ui/jobs/UIData.scala     | 54 ++++++++++---------
 .../api/v1/AllStagesResourceSuite.scala       |  2 +-
 3 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
index 8870187f2219c..7370f9feb68cd 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
@@ -329,13 +329,12 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging {
   override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = synchronized {
     val taskInfo = taskStart.taskInfo
     if (taskInfo != null) {
-      val metrics = TaskMetrics.empty
       val stageData = stageIdToData.getOrElseUpdate((taskStart.stageId, taskStart.stageAttemptId), {
         logWarning("Task start for unknown stage " + taskStart.stageId)
         new StageUIData
       })
       stageData.numActiveTasks += 1
-      stageData.taskData.put(taskInfo.taskId, TaskUIData(taskInfo, Some(metrics)))
+      stageData.taskData.put(taskInfo.taskId, TaskUIData(taskInfo))
     }
     for (
       activeJobsDependentOnStage <- stageIdToActiveJobIds.get(taskStart.stageId);
@@ -405,7 +404,7 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging {
         updateAggregateMetrics(stageData, info.executorId, m, oldMetrics)
       }
 
-      val taskData = stageData.taskData.getOrElseUpdate(info.taskId, TaskUIData(info, None))
+      val taskData = stageData.taskData.getOrElseUpdate(info.taskId, TaskUIData(info))
       taskData.updateTaskInfo(info)
       taskData.updateTaskMetrics(taskMetrics)
       taskData.errorMessage = errorMessage
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala b/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala
index ac1a74ad8029d..8d280bc00c3b3 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala
@@ -112,9 +112,9 @@ private[spark] object UIData {
   /**
    * These are kept mutable and reused throughout a task's lifetime to avoid excessive reallocation.
    */
-  class TaskUIData private(
-      private var _taskInfo: TaskInfo,
-      private var _metrics: Option[TaskMetricsUIData]) {
+  class TaskUIData private(private var _taskInfo: TaskInfo) {
+
+    private[this] var _metrics: Option[TaskMetricsUIData] = Some(TaskMetricsUIData.EMPTY)
 
     var errorMessage: Option[String] = None
 
@@ -127,7 +127,7 @@ private[spark] object UIData {
     }
 
     def updateTaskMetrics(metrics: Option[TaskMetrics]): Unit = {
-      _metrics = TaskUIData.toTaskMetricsUIData(metrics)
+      _metrics = metrics.map(TaskMetricsUIData.fromTaskMetrics)
     }
 
     def taskDuration: Option[Long] = {
@@ -140,28 +140,8 @@ private[spark] object UIData {
   }
 
   object TaskUIData {
-    def apply(taskInfo: TaskInfo, metrics: Option[TaskMetrics]): TaskUIData = {
-      new TaskUIData(dropInternalAndSQLAccumulables(taskInfo), toTaskMetricsUIData(metrics))
-    }
-
-    private def toTaskMetricsUIData(metrics: Option[TaskMetrics]): Option[TaskMetricsUIData] = {
-      metrics.map { m =>
-        TaskMetricsUIData(
-          executorDeserializeTime = m.executorDeserializeTime,
-          executorDeserializeCpuTime = m.executorDeserializeCpuTime,
-          executorRunTime = m.executorRunTime,
-          executorCpuTime = m.executorCpuTime,
-          resultSize = m.resultSize,
-          jvmGCTime = m.jvmGCTime,
-          resultSerializationTime = m.resultSerializationTime,
-          memoryBytesSpilled = m.memoryBytesSpilled,
-          diskBytesSpilled = m.diskBytesSpilled,
-          peakExecutionMemory = m.peakExecutionMemory,
-          inputMetrics = InputMetricsUIData(m.inputMetrics),
-          outputMetrics = OutputMetricsUIData(m.outputMetrics),
-          shuffleReadMetrics = ShuffleReadMetricsUIData(m.shuffleReadMetrics),
-          shuffleWriteMetrics = ShuffleWriteMetricsUIData(m.shuffleWriteMetrics))
-      }
+    def apply(taskInfo: TaskInfo): TaskUIData = {
+      new TaskUIData(dropInternalAndSQLAccumulables(taskInfo))
     }
 
     /**
@@ -206,6 +186,28 @@ private[spark] object UIData {
       shuffleReadMetrics: ShuffleReadMetricsUIData,
       shuffleWriteMetrics: ShuffleWriteMetricsUIData)
 
+  object TaskMetricsUIData {
+    def fromTaskMetrics(m: TaskMetrics): TaskMetricsUIData = {
+      TaskMetricsUIData(
+        executorDeserializeTime = m.executorDeserializeTime,
+        executorDeserializeCpuTime = m.executorDeserializeCpuTime,
+        executorRunTime = m.executorRunTime,
+        executorCpuTime = m.executorCpuTime,
+        resultSize = m.resultSize,
+        jvmGCTime = m.jvmGCTime,
+        resultSerializationTime = m.resultSerializationTime,
+        memoryBytesSpilled = m.memoryBytesSpilled,
+        diskBytesSpilled = m.diskBytesSpilled,
+        peakExecutionMemory = m.peakExecutionMemory,
+        inputMetrics = InputMetricsUIData(m.inputMetrics),
+        outputMetrics = OutputMetricsUIData(m.outputMetrics),
+        shuffleReadMetrics = ShuffleReadMetricsUIData(m.shuffleReadMetrics),
+        shuffleWriteMetrics = ShuffleWriteMetricsUIData(m.shuffleWriteMetrics))
+    }
+
+    val EMPTY: TaskMetricsUIData = fromTaskMetrics(TaskMetrics.empty)
+  }
+
   case class InputMetricsUIData(bytesRead: Long, recordsRead: Long)
   object InputMetricsUIData {
     def apply(metrics: InputMetrics): InputMetricsUIData = {
diff --git a/core/src/test/scala/org/apache/spark/status/api/v1/AllStagesResourceSuite.scala b/core/src/test/scala/org/apache/spark/status/api/v1/AllStagesResourceSuite.scala
index 1bfb0c1547ec4..82bd7c4ff6604 100644
--- a/core/src/test/scala/org/apache/spark/status/api/v1/AllStagesResourceSuite.scala
+++ b/core/src/test/scala/org/apache/spark/status/api/v1/AllStagesResourceSuite.scala
@@ -31,7 +31,7 @@ class AllStagesResourceSuite extends SparkFunSuite {
     val tasks = new LinkedHashMap[Long, TaskUIData]
     taskLaunchTimes.zipWithIndex.foreach { case (time, idx) =>
       tasks(idx.toLong) = TaskUIData(
-        new TaskInfo(idx, idx, 1, time, "", "", TaskLocality.ANY, false), None)
+        new TaskInfo(idx, idx, 1, time, "", "", TaskLocality.ANY, false))
     }
 
     val stageUiData = new StageUIData()

From dac0b50b68d18c95a9968bc90a013396a42cc526 Mon Sep 17 00:00:00 2001
From: Andrew Ray <ray.andrew@gmail.com>
Date: Wed, 17 May 2017 10:06:01 +0100
Subject: [PATCH 0858/1204] [SPARK-20769][DOC] Incorrect documentation for
 using Jupyter notebook

## What changes were proposed in this pull request?

SPARK-13973 incorrectly removed the required PYSPARK_DRIVER_PYTHON_OPTS=notebook from documentation to use pyspark with Jupyter notebook. This patch corrects the documentation error.

## How was this patch tested?

Tested invocation locally with
```bash
PYSPARK_DRIVER_PYTHON=jupyter PYSPARK_DRIVER_PYTHON_OPTS=notebook ./bin/pyspark
```

Author: Andrew Ray <ray.andrew@gmail.com>

Closes #18001 from aray/patch-1.

(cherry picked from commit 1995417696a028f8a4fa7f706a77537c7182528d)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/rdd-programming-guide.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/rdd-programming-guide.md b/docs/rdd-programming-guide.md
index e2bf2d7ca77ca..f7cfd5f82070d 100644
--- a/docs/rdd-programming-guide.md
+++ b/docs/rdd-programming-guide.md
@@ -247,7 +247,7 @@ $ PYSPARK_DRIVER_PYTHON=ipython ./bin/pyspark
 To use the Jupyter notebook (previously known as the IPython notebook),
 
 {% highlight bash %}
-$ PYSPARK_DRIVER_PYTHON=jupyter ./bin/pyspark
+$ PYSPARK_DRIVER_PYTHON=jupyter PYSPARK_DRIVER_PYTHON_OPTS=notebook ./bin/pyspark
 {% endhighlight %}
 
 You can customize the `ipython` or `jupyter` commands by setting `PYSPARK_DRIVER_PYTHON_OPTS`.

From ba35c6bd320cda6fbb8ff92a6779986cdc464267 Mon Sep 17 00:00:00 2001
From: Andrew Ray <ray.andrew@gmail.com>
Date: Wed, 17 May 2017 10:06:01 +0100
Subject: [PATCH 0859/1204] [SPARK-20769][DOC] Incorrect documentation for
 using Jupyter notebook

## What changes were proposed in this pull request?

SPARK-13973 incorrectly removed the required PYSPARK_DRIVER_PYTHON_OPTS=notebook from documentation to use pyspark with Jupyter notebook. This patch corrects the documentation error.

## How was this patch tested?

Tested invocation locally with
```bash
PYSPARK_DRIVER_PYTHON=jupyter PYSPARK_DRIVER_PYTHON_OPTS=notebook ./bin/pyspark
```

Author: Andrew Ray <ray.andrew@gmail.com>

Closes #18001 from aray/patch-1.

(cherry picked from commit 1995417696a028f8a4fa7f706a77537c7182528d)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/programming-guide.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/programming-guide.md b/docs/programming-guide.md
index 353730c28f3c7..b9c7a95667d11 100644
--- a/docs/programming-guide.md
+++ b/docs/programming-guide.md
@@ -247,7 +247,7 @@ $ PYSPARK_DRIVER_PYTHON=ipython ./bin/pyspark
 To use the Jupyter notebook (previously known as the IPython notebook), 
 
 {% highlight bash %}
-$ PYSPARK_DRIVER_PYTHON=jupyter ./bin/pyspark
+$ PYSPARK_DRIVER_PYTHON=jupyter PYSPARK_DRIVER_PYTHON_OPTS=notebook ./bin/pyspark
 {% endhighlight %}
 
 You can customize the `ipython` or `jupyter` commands by setting `PYSPARK_DRIVER_PYTHON_OPTS`. 

From cffa525d1fc7331037ae067ba770613b425c9a75 Mon Sep 17 00:00:00 2001
From: Mark Hamstra <markhamstra@gmail.com>
Date: Wed, 17 May 2017 12:53:40 -0700
Subject: [PATCH 0860/1204] expose ParquetWriteSupport

---
 .../execution/datasources/parquet/ParquetWriteSupport.scala   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala
index a31d2b9c37e9d..34f55f426461d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala
@@ -48,7 +48,7 @@ import org.apache.spark.sql.types._
  * of this option is propagated to this class by the `init()` method and its Hadoop configuration
  * argument.
  */
-private[parquet] class ParquetWriteSupport extends WriteSupport[InternalRow] with Logging {
+class ParquetWriteSupport extends WriteSupport[InternalRow] with Logging {
   // A `ValueWriter` is responsible for writing a field of an `InternalRow` to the record consumer.
   // Here we are using `SpecializedGetters` rather than `InternalRow` so that we can directly access
   // data in `ArrayData` without the help of `SpecificMutableRow`.
@@ -424,7 +424,7 @@ private[parquet] class ParquetWriteSupport extends WriteSupport[InternalRow] wit
   }
 }
 
-private[parquet] object ParquetWriteSupport {
+object ParquetWriteSupport {
   val SPARK_ROW_SCHEMA: String = "org.apache.spark.sql.parquet.row.attributes"
 
   def setSchema(schema: StructType, configuration: Configuration): Unit = {

From 2db6101001512fe80998d99cfb972ee51614dfcc Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 17 May 2017 14:13:49 -0700
Subject: [PATCH 0861/1204] [SPARK-20788][CORE] Fix the Executor task reaper's
 false alarm warning logs

## What changes were proposed in this pull request?

Executor task reaper may fail to detect if a task is finished or not when a task is finishing but being killed at the same time.

The fix is pretty easy, just flip the "finished" flag when a task is successful.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #18021 from zsxwing/SPARK-20788.

(cherry picked from commit f8e0f0f47c15ddd646b9f295b91d6748583fe011)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 core/src/main/scala/org/apache/spark/executor/Executor.scala | 1 +
 1 file changed, 1 insertion(+)

diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index 51b6c373c4daf..d54dd2d464821 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -425,6 +425,7 @@ private[spark] class Executor(
           }
         }
 
+        setTaskFinishedAndClearInterruptStatus()
         execBackend.statusUpdate(taskId, TaskState.FINISHED, serializedResult)
 
       } catch {

From b8fa79cec7a15e748bf9916e8a3c6476e0d350a3 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 17 May 2017 17:21:46 -0700
Subject: [PATCH 0862/1204] [SPARK-13747][CORE] Add ThreadUtils.awaitReady and
 disallow Await.ready

## What changes were proposed in this pull request?

Add `ThreadUtils.awaitReady` similar to `ThreadUtils.awaitResult` and disallow `Await.ready`.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #17763 from zsxwing/awaitready.

(cherry picked from commit 324a904d8e80089d8865e4c7edaedb92ab2ec1b2)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../scala/org/apache/spark/FutureAction.scala |  2 +-
 .../apache/spark/scheduler/DAGScheduler.scala |  7 +------
 .../apache/spark/storage/BlockManager.scala   |  6 +++---
 .../org/apache/spark/util/ThreadUtils.scala   | 21 +++++++++++++++++++
 .../org/apache/spark/SparkContextSuite.scala  |  5 ++---
 .../NettyBlockTransferSecuritySuite.scala     |  5 +++--
 .../scheduler/SchedulerIntegrationSuite.scala |  4 ++--
 .../spark/storage/BlockInfoManagerSuite.scala |  2 +-
 .../kinesis/KinesisCheckpointerSuite.scala    |  2 ++
 scalastyle-config.xml                         | 11 ++++++++++
 .../util/FileBasedWriteAheadLog.scala         |  2 ++
 11 files changed, 49 insertions(+), 18 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/FutureAction.scala b/core/src/main/scala/org/apache/spark/FutureAction.scala
index a50600f1488c9..0899693988016 100644
--- a/core/src/main/scala/org/apache/spark/FutureAction.scala
+++ b/core/src/main/scala/org/apache/spark/FutureAction.scala
@@ -261,7 +261,7 @@ class JavaFutureActionWrapper[S, T](futureAction: FutureAction[S], converter: S
 
   private def getImpl(timeout: Duration): T = {
     // This will throw TimeoutException on timeout:
-    Await.ready(futureAction, timeout)
+    ThreadUtils.awaitReady(futureAction, timeout)
     futureAction.value.get match {
       case scala.util.Success(value) => converter(value)
       case scala.util.Failure(exception) =>
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index aab177f257a8c..35f6b365eca85 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -618,12 +618,7 @@ class DAGScheduler(
       properties: Properties): Unit = {
     val start = System.nanoTime
     val waiter = submitJob(rdd, func, partitions, callSite, resultHandler, properties)
-    // Note: Do not call Await.ready(future) because that calls `scala.concurrent.blocking`,
-    // which causes concurrent SQL executions to fail if a fork-join pool is used. Note that
-    // due to idiosyncrasies in Scala, `awaitPermission` is not actually used anywhere so it's
-    // safe to pass in null here. For more detail, see SPARK-13747.
-    val awaitPermission = null.asInstanceOf[scala.concurrent.CanAwait]
-    waiter.completionFuture.ready(Duration.Inf)(awaitPermission)
+    ThreadUtils.awaitReady(waiter.completionFuture, Duration.Inf)
     waiter.completionFuture.value.get match {
       case scala.util.Success(_) =>
         logInfo("Job %d finished: %s, took %f s".format
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index ad0dc3ca462b0..6c363c5f8bca4 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -23,7 +23,7 @@ import java.nio.channels.Channels
 
 import scala.collection.mutable
 import scala.collection.mutable.HashMap
-import scala.concurrent.{Await, ExecutionContext, Future}
+import scala.concurrent.{ExecutionContext, Future}
 import scala.concurrent.duration._
 import scala.reflect.ClassTag
 import scala.util.Random
@@ -334,7 +334,7 @@ private[spark] class BlockManager(
     val task = asyncReregisterTask
     if (task != null) {
       try {
-        Await.ready(task, Duration.Inf)
+        ThreadUtils.awaitReady(task, Duration.Inf)
       } catch {
         case NonFatal(t) =>
           throw new Exception("Error occurred while waiting for async. reregistration", t)
@@ -909,7 +909,7 @@ private[spark] class BlockManager(
       if (level.replication > 1) {
         // Wait for asynchronous replication to finish
         try {
-          Await.ready(replicationFuture, Duration.Inf)
+          ThreadUtils.awaitReady(replicationFuture, Duration.Inf)
         } catch {
           case NonFatal(t) =>
             throw new Exception("Error occurred while waiting for replication to finish", t)
diff --git a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
index 1aa4456ed01b4..81aaf79db0c13 100644
--- a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
@@ -206,4 +206,25 @@ private[spark] object ThreadUtils {
     }
   }
   // scalastyle:on awaitresult
+
+  // scalastyle:off awaitready
+  /**
+   * Preferred alternative to `Await.ready()`.
+   *
+   * @see [[awaitResult]]
+   */
+  @throws(classOf[SparkException])
+  def awaitReady[T](awaitable: Awaitable[T], atMost: Duration): awaitable.type = {
+    try {
+      // `awaitPermission` is not actually used anywhere so it's safe to pass in null here.
+      // See SPARK-13747.
+      val awaitPermission = null.asInstanceOf[scala.concurrent.CanAwait]
+      awaitable.ready(atMost)(awaitPermission)
+    } catch {
+      // TimeoutException is thrown in the current thread, so not need to warp the exception.
+      case NonFatal(t) if !t.isInstanceOf[TimeoutException] =>
+        throw new SparkException("Exception thrown in awaitResult: ", t)
+    }
+  }
+  // scalastyle:on awaitready
 }
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
index 7e26139a2bead..27945a9a5ede8 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -23,7 +23,6 @@ import java.nio.charset.StandardCharsets
 import java.util.concurrent.TimeUnit
 
 import scala.concurrent.duration._
-import scala.concurrent.Await
 
 import com.google.common.io.Files
 import org.apache.hadoop.conf.Configuration
@@ -35,7 +34,7 @@ import org.scalatest.concurrent.Eventually
 import org.scalatest.Matchers._
 
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart, SparkListenerTaskEnd, SparkListenerTaskStart}
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{ThreadUtils, Utils}
 
 
 class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventually {
@@ -315,7 +314,7 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
       sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
       val future = sc.parallelize(Seq(0)).foreachAsync(_ => {Thread.sleep(1000L)})
       sc.cancelJobGroup("nonExistGroupId")
-      Await.ready(future, Duration(2, TimeUnit.SECONDS))
+      ThreadUtils.awaitReady(future, Duration(2, TimeUnit.SECONDS))
 
       // In SPARK-6414, sc.cancelJobGroup will cause NullPointerException and cause
       // SparkContext to shutdown, so the following assertion will fail.
diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala
index fe8955840d72f..792a1d7f57e2d 100644
--- a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala
@@ -22,7 +22,7 @@ import java.nio._
 import java.nio.charset.StandardCharsets
 import java.util.concurrent.TimeUnit
 
-import scala.concurrent.{Await, Promise}
+import scala.concurrent.Promise
 import scala.concurrent.duration._
 import scala.util.{Failure, Success, Try}
 
@@ -36,6 +36,7 @@ import org.apache.spark.network.{BlockDataManager, BlockTransferService}
 import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
 import org.apache.spark.network.shuffle.BlockFetchingListener
 import org.apache.spark.storage.{BlockId, ShuffleBlockId}
+import org.apache.spark.util.ThreadUtils
 
 class NettyBlockTransferSecuritySuite extends SparkFunSuite with MockitoSugar with ShouldMatchers {
   test("security default off") {
@@ -166,7 +167,7 @@ class NettyBlockTransferSecuritySuite extends SparkFunSuite with MockitoSugar wi
         }
       })
 
-    Await.ready(promise.future, FiniteDuration(10, TimeUnit.SECONDS))
+    ThreadUtils.awaitReady(promise.future, FiniteDuration(10, TimeUnit.SECONDS))
     promise.future.value.get
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
index 8300607ea888b..37b08980db877 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
@@ -21,7 +21,7 @@ import java.util.concurrent.{TimeoutException, TimeUnit}
 import java.util.concurrent.atomic.{AtomicBoolean, AtomicReference}
 
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
-import scala.concurrent.{Await, Future}
+import scala.concurrent.Future
 import scala.concurrent.duration.{Duration, SECONDS}
 import scala.language.existentials
 import scala.reflect.ClassTag
@@ -260,7 +260,7 @@ abstract class SchedulerIntegrationSuite[T <: MockBackend: ClassTag] extends Spa
    */
   def awaitJobTermination(jobFuture: Future[_], duration: Duration): Unit = {
     try {
-      Await.ready(jobFuture, duration)
+      ThreadUtils.awaitReady(jobFuture, duration)
     } catch {
       case te: TimeoutException if backendException.get() != null =>
         val msg = raw"""
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala
index 1b325801e27fc..917db766f7f11 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala
@@ -152,7 +152,7 @@ class BlockInfoManagerSuite extends SparkFunSuite with BeforeAndAfterEach {
     // one should acquire the write lock. The second thread should block until the winner of the
     // write race releases its lock.
     val winningFuture: Future[Boolean] =
-      Await.ready(Future.firstCompletedOf(Seq(lock1Future, lock2Future)), 1.seconds)
+      ThreadUtils.awaitReady(Future.firstCompletedOf(Seq(lock1Future, lock2Future)), 1.seconds)
     assert(winningFuture.value.get.get)
     val winningTID = blockInfoManager.get("block").get.writerTask
     assert(winningTID === 1 || winningTID === 2)
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointerSuite.scala b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointerSuite.scala
index fef24ed4c5dd0..8d56d4be9c42a 100644
--- a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointerSuite.scala
+++ b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointerSuite.scala
@@ -140,7 +140,9 @@ class KinesisCheckpointerSuite extends TestSuiteBase
       ExecutionContext.global)
 
     intercept[TimeoutException] {
+      // scalastyle:off awaitready
       Await.ready(f, 50 millis)
+      // scalastyle:on awaitready
     }
 
     clock.advance(checkpointInterval.milliseconds / 2)
diff --git a/scalastyle-config.xml b/scalastyle-config.xml
index 1f48d71cc7a2b..0a4073b03957c 100644
--- a/scalastyle-config.xml
+++ b/scalastyle-config.xml
@@ -203,6 +203,17 @@ This file is divided into 3 sections:
     ]]></customMessage>
   </check>
 
+  <check customId="awaitready" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">Await\.ready</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that you want to use Await.ready? In most cases, you should use ThreadUtils.awaitReady instead.
+      If you must use Await.ready, wrap the code block with
+      // scalastyle:off awaitready
+      Await.ready(...)
+      // scalastyle:on awaitready
+    ]]></customMessage>
+  </check>
+
   <!-- As of SPARK-9613 JavaConversions should be replaced with JavaConverters -->
   <check customId="javaconversions" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
     <parameters><parameter name="regex">JavaConversions</parameter></parameters>
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala
index 845f554308c43..1e5f18797e152 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala
@@ -189,7 +189,9 @@ private[streaming] class FileBasedWriteAheadLog(
           val f = Future { deleteFile(logInfo) }(executionContext)
           if (waitForCompletion) {
             import scala.concurrent.duration._
+            // scalastyle:off awaitready
             Await.ready(f, 1 second)
+            // scalastyle:on awaitready
           }
         } catch {
           case e: RejectedExecutionException =>

From ba0117c2716a6a3b9810bc17b67f9f502c49fa9b Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Thu, 18 May 2017 11:54:09 +0800
Subject: [PATCH 0863/1204] [SPARK-20505][ML] Add docs and examples for
 ml.stat.Correlation and ml.stat.ChiSquareTest.

## What changes were proposed in this pull request?
Add docs and examples for ```ml.stat.Correlation``` and ```ml.stat.ChiSquareTest```.

## How was this patch tested?
Generate docs and run examples manually, successfully.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #17994 from yanboliang/spark-20505.

(cherry picked from commit 697a5e5517e32c5ef44c273e3b26662d0eb70f24)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 docs/_data/menu-ml.yaml                       |  2 +
 docs/ml-statistics.md                         | 92 +++++++++++++++++++
 .../examples/ml/JavaChiSquareTestExample.java | 75 +++++++++++++++
 .../examples/ml/JavaCorrelationExample.java   | 72 +++++++++++++++
 .../main/python/ml/chi_square_test_example.py | 52 +++++++++++
 .../src/main/python/ml/correlation_example.py | 51 ++++++++++
 .../examples/ml/ChiSquareTestExample.scala    | 63 +++++++++++++
 .../examples/ml/CorrelationExample.scala      | 63 +++++++++++++
 8 files changed, 470 insertions(+)
 create mode 100644 docs/ml-statistics.md
 create mode 100644 examples/src/main/java/org/apache/spark/examples/ml/JavaChiSquareTestExample.java
 create mode 100644 examples/src/main/java/org/apache/spark/examples/ml/JavaCorrelationExample.java
 create mode 100644 examples/src/main/python/ml/chi_square_test_example.py
 create mode 100644 examples/src/main/python/ml/correlation_example.py
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/ChiSquareTestExample.scala
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/ml/CorrelationExample.scala

diff --git a/docs/_data/menu-ml.yaml b/docs/_data/menu-ml.yaml
index 047423f75aec1..b5a6641e2e7e2 100644
--- a/docs/_data/menu-ml.yaml
+++ b/docs/_data/menu-ml.yaml
@@ -1,3 +1,5 @@
+- text: Basic statistics
+  url: ml-statistics.html
 - text: Pipelines
   url: ml-pipeline.html
 - text: Extracting, transforming and selecting features
diff --git a/docs/ml-statistics.md b/docs/ml-statistics.md
new file mode 100644
index 0000000000000..abfb3cab1e566
--- /dev/null
+++ b/docs/ml-statistics.md
@@ -0,0 +1,92 @@
+---
+layout: global
+title: Basic Statistics
+displayTitle: Basic Statistics
+---
+
+
+`\[
+\newcommand{\R}{\mathbb{R}}
+\newcommand{\E}{\mathbb{E}}
+\newcommand{\x}{\mathbf{x}}
+\newcommand{\y}{\mathbf{y}}
+\newcommand{\wv}{\mathbf{w}}
+\newcommand{\av}{\mathbf{\alpha}}
+\newcommand{\bv}{\mathbf{b}}
+\newcommand{\N}{\mathbb{N}}
+\newcommand{\id}{\mathbf{I}}
+\newcommand{\ind}{\mathbf{1}}
+\newcommand{\0}{\mathbf{0}}
+\newcommand{\unit}{\mathbf{e}}
+\newcommand{\one}{\mathbf{1}}
+\newcommand{\zero}{\mathbf{0}}
+\]`
+
+**Table of Contents**
+
+* This will become a table of contents (this text will be scraped).
+{:toc}
+
+## Correlation
+
+Calculating the correlation between two series of data is a common operation in Statistics. In `spark.ml`
+we provide the flexibility to calculate pairwise correlations among many series. The supported
+correlation methods are currently Pearson's and Spearman's correlation.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+[`Correlation`](api/scala/index.html#org.apache.spark.ml.stat.Correlation$)
+computes the correlation matrix for the input Dataset of Vectors using the specified method.
+The output will be a DataFrame that contains the correlation matrix of the column of vectors.
+
+{% include_example scala/org/apache/spark/examples/ml/CorrelationExample.scala %}
+</div>
+
+<div data-lang="java" markdown="1">
+[`Correlation`](api/java/org/apache/spark/ml/stat/Correlation.html)
+computes the correlation matrix for the input Dataset of Vectors using the specified method.
+The output will be a DataFrame that contains the correlation matrix of the column of vectors.
+
+{% include_example java/org/apache/spark/examples/ml/JavaCorrelationExample.java %}
+</div>
+
+<div data-lang="python" markdown="1">
+[`Correlation`](api/python/pyspark.ml.html#pyspark.ml.stat.Correlation$)
+computes the correlation matrix for the input Dataset of Vectors using the specified method.
+The output will be a DataFrame that contains the correlation matrix of the column of vectors.
+
+{% include_example python/ml/correlation_example.py %}
+</div>
+
+</div>
+
+## Hypothesis testing
+
+Hypothesis testing is a powerful tool in statistics to determine whether a result is statistically
+significant, whether this result occurred by chance or not. `spark.ml` currently supports Pearson's
+Chi-squared ( $\chi^2$) tests for independence.
+
+`ChiSquareTest` conducts Pearson's independence test for every feature against the label.
+For each feature, the (feature, label) pairs are converted into a contingency matrix for which
+the Chi-squared statistic is computed. All label and feature values must be categorical.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+Refer to the [`ChiSquareTest` Scala docs](api/scala/index.html#org.apache.spark.ml.stat.ChiSquareTest$) for details on the API.
+
+{% include_example scala/org/apache/spark/examples/ml/ChiSquareTestExample.scala %}
+</div>
+
+<div data-lang="java" markdown="1">
+Refer to the [`ChiSquareTest` Java docs](api/java/org/apache/spark/ml/stat/ChiSquareTest.html) for details on the API.
+
+{% include_example java/org/apache/spark/examples/ml/JavaChiSquareTestExample.java %}
+</div>
+
+<div data-lang="python" markdown="1">
+Refer to the [`ChiSquareTest` Python docs](api/python/index.html#pyspark.ml.stat.ChiSquareTest$) for details on the API.
+
+{% include_example python/ml/chi_square_test_example.py %}
+</div>
+
+</div>
\ No newline at end of file
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaChiSquareTestExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaChiSquareTestExample.java
new file mode 100644
index 0000000000000..4b39350fab9b5
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaChiSquareTestExample.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.linalg.Vectors;
+import org.apache.spark.ml.linalg.VectorUDT;
+import org.apache.spark.ml.stat.ChiSquareTest;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.*;
+// $example off$
+
+/**
+ * An example for Chi-square hypothesis testing.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaChiSquareTestExample
+ * </pre>
+ */
+public class JavaChiSquareTestExample {
+
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaChiSquareTestExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+      RowFactory.create(0.0, Vectors.dense(0.5, 10.0)),
+      RowFactory.create(0.0, Vectors.dense(1.5, 20.0)),
+      RowFactory.create(1.0, Vectors.dense(1.5, 30.0)),
+      RowFactory.create(0.0, Vectors.dense(3.5, 30.0)),
+      RowFactory.create(0.0, Vectors.dense(3.5, 40.0)),
+      RowFactory.create(1.0, Vectors.dense(3.5, 40.0))
+    );
+
+    StructType schema = new StructType(new StructField[]{
+      new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
+      new StructField("features", new VectorUDT(), false, Metadata.empty()),
+    });
+
+    Dataset<Row> df = spark.createDataFrame(data, schema);
+    Row r = ChiSquareTest.test(df, "features", "label").head();
+    System.out.println("pValues: " + r.get(0).toString());
+    System.out.println("degreesOfFreedom: " + r.getList(1).toString());
+    System.out.println("statistics: " + r.get(2).toString());
+
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaCorrelationExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaCorrelationExample.java
new file mode 100644
index 0000000000000..2a6d62ab3fb73
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaCorrelationExample.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.ml.linalg.Vectors;
+import org.apache.spark.ml.linalg.VectorUDT;
+import org.apache.spark.ml.stat.Correlation;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.*;
+// $example off$
+
+/**
+ * An example for computing correlation matrix.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaCorrelationExample
+ * </pre>
+ */
+public class JavaCorrelationExample {
+
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaCorrelationExample")
+      .getOrCreate();
+
+    // $example on$
+    List<Row> data = Arrays.asList(
+      RowFactory.create(Vectors.sparse(4, new int[]{0, 3}, new double[]{1.0, -2.0})),
+      RowFactory.create(Vectors.dense(4.0, 5.0, 0.0, 3.0)),
+      RowFactory.create(Vectors.dense(6.0, 7.0, 0.0, 8.0)),
+      RowFactory.create(Vectors.sparse(4, new int[]{0, 3}, new double[]{9.0, 1.0}))
+    );
+
+    StructType schema = new StructType(new StructField[]{
+      new StructField("features", new VectorUDT(), false, Metadata.empty()),
+    });
+
+    Dataset<Row> df = spark.createDataFrame(data, schema);
+    Row r1 = Correlation.corr(df, "features").head();
+    System.out.println("Pearson correlation matrix:\n" + r1.get(0).toString());
+
+    Row r2 = Correlation.corr(df, "features", "spearman").head();
+    System.out.println("Spearman correlation matrix:\n" + r2.get(0).toString());
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/examples/src/main/python/ml/chi_square_test_example.py b/examples/src/main/python/ml/chi_square_test_example.py
new file mode 100644
index 0000000000000..8f25318ded00a
--- /dev/null
+++ b/examples/src/main/python/ml/chi_square_test_example.py
@@ -0,0 +1,52 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+from pyspark.sql import SparkSession
+# $example on$
+from pyspark.ml.linalg import Vectors
+from pyspark.ml.stat import ChiSquareTest
+# $example off$
+
+"""
+An example for Chi-square hypothesis testing.
+Run with:
+  bin/spark-submit examples/src/main/python/ml/chi_square_test_example.py
+"""
+if __name__ == "__main__":
+    spark = SparkSession \
+        .builder \
+        .appName("ChiSquareTestExample") \
+        .getOrCreate()
+
+    # $example on$
+    data = [(0.0, Vectors.dense(0.5, 10.0)),
+            (0.0, Vectors.dense(1.5, 20.0)),
+            (1.0, Vectors.dense(1.5, 30.0)),
+            (0.0, Vectors.dense(3.5, 30.0)),
+            (0.0, Vectors.dense(3.5, 40.0)),
+            (1.0, Vectors.dense(3.5, 40.0))]
+    df = spark.createDataFrame(data, ["label", "features"])
+
+    r = ChiSquareTest.test(df, "features", "label").head()
+    print("pValues: " + str(r.pValues))
+    print("degreesOfFreedom: " + str(r.degreesOfFreedom))
+    print("statistics: " + str(r.statistics))
+    # $example off$
+
+    spark.stop()
diff --git a/examples/src/main/python/ml/correlation_example.py b/examples/src/main/python/ml/correlation_example.py
new file mode 100644
index 0000000000000..0a9d30da5a42e
--- /dev/null
+++ b/examples/src/main/python/ml/correlation_example.py
@@ -0,0 +1,51 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml.linalg import Vectors
+from pyspark.ml.stat import Correlation
+# $example off$
+from pyspark.sql import SparkSession
+
+"""
+An example for computing correlation matrix.
+Run with:
+  bin/spark-submit examples/src/main/python/ml/correlation_example.py
+"""
+if __name__ == "__main__":
+    spark = SparkSession \
+        .builder \
+        .appName("CorrelationExample") \
+        .getOrCreate()
+
+    # $example on$
+    data = [(Vectors.sparse(4, [(0, 1.0), (3, -2.0)]),),
+            (Vectors.dense([4.0, 5.0, 0.0, 3.0]),),
+            (Vectors.dense([6.0, 7.0, 0.0, 8.0]),),
+            (Vectors.sparse(4, [(0, 9.0), (3, 1.0)]),)]
+    df = spark.createDataFrame(data, ["features"])
+
+    r1 = Correlation.corr(df, "features").head()
+    print("Pearson correlation matrix:\n" + str(r1[0]))
+
+    r2 = Correlation.corr(df, "features", "spearman").head()
+    print("Spearman correlation matrix:\n" + str(r2[0]))
+    # $example off$
+
+    spark.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/ChiSquareTestExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/ChiSquareTestExample.scala
new file mode 100644
index 0000000000000..dcee1e427ce58
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/ChiSquareTestExample.scala
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.linalg.{Vector, Vectors}
+import org.apache.spark.ml.stat.ChiSquareTest
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * An example for Chi-square hypothesis testing.
+ * Run with
+ * {{{
+ * bin/run-example ml.ChiSquareTestExample
+ * }}}
+ */
+object ChiSquareTestExample {
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("ChiSquareTestExample")
+      .getOrCreate()
+    import spark.implicits._
+
+    // $example on$
+    val data = Seq(
+      (0.0, Vectors.dense(0.5, 10.0)),
+      (0.0, Vectors.dense(1.5, 20.0)),
+      (1.0, Vectors.dense(1.5, 30.0)),
+      (0.0, Vectors.dense(3.5, 30.0)),
+      (0.0, Vectors.dense(3.5, 40.0)),
+      (1.0, Vectors.dense(3.5, 40.0))
+    )
+
+    val df = data.toDF("label", "features")
+    val chi = ChiSquareTest.test(df, "features", "label").head
+    println("pValues = " + chi.getAs[Vector](0))
+    println("degreesOfFreedom = " + chi.getSeq[Int](1).mkString("[", ",", "]"))
+    println("statistics = " + chi.getAs[Vector](2))
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/CorrelationExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/CorrelationExample.scala
new file mode 100644
index 0000000000000..3f57dc342eb00
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/CorrelationExample.scala
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.linalg.{Matrix, Vectors}
+import org.apache.spark.ml.stat.Correlation
+import org.apache.spark.sql.Row
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * An example for computing correlation matrix.
+ * Run with
+ * {{{
+ * bin/run-example ml.CorrelationExample
+ * }}}
+ */
+object CorrelationExample {
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("CorrelationExample")
+      .getOrCreate()
+    import spark.implicits._
+
+    // $example on$
+    val data = Seq(
+      Vectors.sparse(4, Seq((0, 1.0), (3, -2.0))),
+      Vectors.dense(4.0, 5.0, 0.0, 3.0),
+      Vectors.dense(6.0, 7.0, 0.0, 8.0),
+      Vectors.sparse(4, Seq((0, 9.0), (3, 1.0)))
+    )
+
+    val df = data.map(Tuple1.apply).toDF("features")
+    val Row(coeff1: Matrix) = Correlation.corr(df, "features").head
+    println("Pearson correlation matrix:\n" + coeff1.toString)
+
+    val Row(coeff2: Matrix) = Correlation.corr(df, "features", "spearman").head
+    println("Spearman correlation matrix:\n" + coeff2.toString)
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println

From c708b14803ae461b5c721b2aebb10b5bbd2e1d26 Mon Sep 17 00:00:00 2001
From: Xingbo Jiang <xingbo.jiang@databricks.com>
Date: Wed, 17 May 2017 23:32:31 -0700
Subject: [PATCH 0864/1204] [SPARK-20700][SQL] InferFiltersFromConstraints
 stackoverflows for query (v2)

## What changes were proposed in this pull request?

In the previous approach we used `aliasMap` to link an `Attribute` to the expression with potentially the form `f(a, b)`, but we only searched the `expressions` and `children.expressions` for this, which is not enough when an `Alias` may lies deep in the logical plan. In that case, we can't generate the valid equivalent constraint classes and thus we fail at preventing the recursive deductions.

We fix this problem by collecting all `Alias`s from the logical plan.

## How was this patch tested?

No additional test case is added, but do modified one test case to cover this situation.

Author: Xingbo Jiang <xingbo.jiang@databricks.com>

Closes #18020 from jiangxb1987/inferConstrants.

(cherry picked from commit b7aac15d566b048c20c2491fbf376b727f2eeb68)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../spark/sql/catalyst/plans/QueryPlan.scala  |  9 +++---
 .../InferFiltersFromConstraintsSuite.scala    | 32 ++++++++++---------
 2 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index 959fcf7c7548e..d3f822bf7eb0e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -81,11 +81,12 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
     case _ => Seq.empty[Attribute]
   }
 
-  // Collect aliases from expressions, so we may avoid producing recursive constraints.
-  private lazy val aliasMap = AttributeMap(
-    (expressions ++ children.flatMap(_.expressions)).collect {
+  // Collect aliases from expressions of the whole tree rooted by the current QueryPlan node, so
+  // we may avoid producing recursive constraints.
+  private lazy val aliasMap: AttributeMap[Expression] = AttributeMap(
+    expressions.collect {
       case a: Alias => (a.toAttribute, a.child)
-    })
+    } ++ children.flatMap(_.aliasMap))
 
   /**
    * Infers an additional set of constraints from a given set of equality constraints.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
index c8fe37462726a..9a4bcdb011435 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
@@ -33,7 +33,8 @@ class InferFiltersFromConstraintsSuite extends PlanTest {
         PushPredicateThroughJoin,
         PushDownPredicate,
         InferFiltersFromConstraints(conf),
-        CombineFilters) :: Nil
+        CombineFilters,
+        BooleanSimplification) :: Nil
   }
 
   object OptimizeWithConstraintPropagationDisabled extends RuleExecutor[LogicalPlan] {
@@ -172,7 +173,12 @@ class InferFiltersFromConstraintsSuite extends PlanTest {
     val t1 = testRelation.subquery('t1)
     val t2 = testRelation.subquery('t2)
 
-    val originalQuery = t1.select('a, 'b.as('d), Coalesce(Seq('a, 'b)).as('int_col)).as("t")
+    // We should prevent `Coalese(a, b)` from recursively creating complicated constraints through
+    // the constraint inference procedure.
+    val originalQuery = t1.select('a, 'b.as('d), Coalesce(Seq('a, 'b)).as('int_col))
+      // We hide an `Alias` inside the child's child's expressions, to cover the situation reported
+      // in [SPARK-20700].
+      .select('int_col, 'd, 'a).as("t")
       .join(t2, Inner,
         Some("t.a".attr === "t2.a".attr
           && "t.d".attr === "t2.a".attr
@@ -180,22 +186,18 @@ class InferFiltersFromConstraintsSuite extends PlanTest {
       .analyze
     val correctAnswer = t1
       .where(IsNotNull('a) && IsNotNull(Coalesce(Seq('a, 'a)))
-        && 'a === Coalesce(Seq('a, 'a)) && 'a <=> Coalesce(Seq('a, 'a)) && 'a <=> 'a
-        && Coalesce(Seq('a, 'a)) <=> 'b && Coalesce(Seq('a, 'a)) <=> Coalesce(Seq('a, 'a))
-        && 'a === 'b && IsNotNull(Coalesce(Seq('a, 'b))) && 'a === Coalesce(Seq('a, 'b))
-        && Coalesce(Seq('a, 'b)) <=> Coalesce(Seq('b, 'b)) && Coalesce(Seq('a, 'b)) === 'b
+        && 'a === Coalesce(Seq('a, 'a)) && 'a <=> Coalesce(Seq('a, 'a))
+        && Coalesce(Seq('b, 'b)) <=> 'a && 'a === 'b && IsNotNull(Coalesce(Seq('a, 'b)))
+        && 'a === Coalesce(Seq('a, 'b)) && Coalesce(Seq('a, 'b)) === 'b
         && IsNotNull('b) && IsNotNull(Coalesce(Seq('b, 'b)))
-        && 'b === Coalesce(Seq('b, 'b)) && 'b <=> Coalesce(Seq('b, 'b))
-        && Coalesce(Seq('b, 'b)) <=> Coalesce(Seq('b, 'b)) && 'b <=> 'b)
-      .select('a, 'b.as('d), Coalesce(Seq('a, 'b)).as('int_col)).as("t")
+        && 'b === Coalesce(Seq('b, 'b)) && 'b <=> Coalesce(Seq('b, 'b)))
+      .select('a, 'b.as('d), Coalesce(Seq('a, 'b)).as('int_col))
+      .select('int_col, 'd, 'a).as("t")
       .join(t2
         .where(IsNotNull('a) && IsNotNull(Coalesce(Seq('a, 'a)))
-          && 'a === Coalesce(Seq('a, 'a)) && 'a <=> Coalesce(Seq('a, 'a)) && 'a <=> 'a
-          && Coalesce(Seq('a, 'a)) <=> Coalesce(Seq('a, 'a))), Inner,
-        Some("t.a".attr === "t2.a".attr
-          && "t.d".attr === "t2.a".attr
-          && "t.int_col".attr === "t2.a".attr
-          && Coalesce(Seq("t.d".attr, "t.d".attr)) <=> "t.int_col".attr))
+          && 'a <=> Coalesce(Seq('a, 'a)) && 'a === Coalesce(Seq('a, 'a)) && 'a <=> 'a), Inner,
+        Some("t.a".attr === "t2.a".attr && "t.d".attr === "t2.a".attr
+          && "t.int_col".attr === "t2.a".attr))
       .analyze
     val optimized = Optimize.execute(originalQuery)
     comparePlans(optimized, correctAnswer)

From db821fe55c99e29dc246c2c3156a1fff3a7ec2a5 Mon Sep 17 00:00:00 2001
From: liuzhaokun <liu.zhaokun@zte.com.cn>
Date: Thu, 18 May 2017 17:44:40 +0100
Subject: [PATCH 0865/1204] [SPARK-20796] the location of start-master.sh in
 spark-standalone.md is wrong

[https://issues.apache.org/jira/browse/SPARK-20796](https://issues.apache.org/jira/browse/SPARK-20796)
the location of start-master.sh in spark-standalone.md should be "sbin/start-master.sh" rather than "bin/start-master.sh".

Author: liuzhaokun <liu.zhaokun@zte.com.cn>

Closes #18027 from liu-zhaokun/sbin.

(cherry picked from commit 99452df44fb98c2721d427da4c97f549793615fe)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/spark-standalone.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index 34ced9ed7b462..edefbef93feb6 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -83,7 +83,7 @@ Once you've set up this file, you can launch or stop your cluster with the follo
 - `sbin/start-slaves.sh` - Starts a slave instance on each machine specified in the `conf/slaves` file.
 - `sbin/start-slave.sh` - Starts a slave instance on the machine the script is executed on.
 - `sbin/start-all.sh` - Starts both a master and a number of slaves as described above.
-- `sbin/stop-master.sh` - Stops the master that was started via the `bin/start-master.sh` script.
+- `sbin/stop-master.sh` - Stops the master that was started via the `sbin/start-master.sh` script.
 - `sbin/stop-slaves.sh` - Stops all slave instances on the machines specified in the `conf/slaves` file.
 - `sbin/stop-all.sh` - Stops both the master and the slaves as described above.
 

From e06d9367f619c1b66c8778af61725a069ba7eb71 Mon Sep 17 00:00:00 2001
From: liuzhaokun <liu.zhaokun@zte.com.cn>
Date: Thu, 18 May 2017 17:44:40 +0100
Subject: [PATCH 0866/1204] [SPARK-20796] the location of start-master.sh in
 spark-standalone.md is wrong

[https://issues.apache.org/jira/browse/SPARK-20796](https://issues.apache.org/jira/browse/SPARK-20796)
the location of start-master.sh in spark-standalone.md should be "sbin/start-master.sh" rather than "bin/start-master.sh".

Author: liuzhaokun <liu.zhaokun@zte.com.cn>

Closes #18027 from liu-zhaokun/sbin.

(cherry picked from commit 99452df44fb98c2721d427da4c97f549793615fe)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/spark-standalone.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index 1c0b60f7b9346..084bd701fdcc4 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -83,7 +83,7 @@ Once you've set up this file, you can launch or stop your cluster with the follo
 - `sbin/start-slaves.sh` - Starts a slave instance on each machine specified in the `conf/slaves` file.
 - `sbin/start-slave.sh` - Starts a slave instance on the machine the script is executed on.
 - `sbin/start-all.sh` - Starts both a master and a number of slaves as described above.
-- `sbin/stop-master.sh` - Stops the master that was started via the `bin/start-master.sh` script.
+- `sbin/stop-master.sh` - Stops the master that was started via the `sbin/start-master.sh` script.
 - `sbin/stop-slaves.sh` - Stops all slave instances on the machines specified in the `conf/slaves` file.
 - `sbin/stop-all.sh` - Stops both the master and the slaves as described above.
 

From 8b0cb3a7be138d0f2059731ed4bbd8d01f599497 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Thu, 18 May 2017 10:52:23 -0700
Subject: [PATCH 0867/1204] [SPARK-20364][SQL] Disable Parquet predicate
 pushdown for fields having dots in the names

## What changes were proposed in this pull request?

This is an alternative workaround by simply avoiding the predicate pushdown for columns having dots in the names. This is an approach different with https://github.com/apache/spark/pull/17680.

The downside of this PR is, literally it does not push down filters on the column having dots in Parquet files at all (both no record level and no rowgroup level) whereas the downside of the approach in that PR, it does not use the Parquet's API properly but in a hacky way to support this case.

I assume we prefer a safe way here by using the Parquet API properly but this does close that PR as we are basically just avoiding here.

This way looks a simple workaround and probably it is fine given the problem looks arguably rather corner cases (although it might end up with reading whole row groups under the hood but either looks not the best).

Currently, if there are dots in the column name, predicate pushdown seems being failed in Parquet.

**With dots**

```scala
val path = "/tmp/abcde"
Seq(Some(1), None).toDF("col.dots").write.parquet(path)
spark.read.parquet(path).where("`col.dots` IS NOT NULL").show()
```

```
+--------+
|col.dots|
+--------+
+--------+
```

**Without dots**

```scala
val path = "/tmp/abcde"
Seq(Some(1), None).toDF("coldots").write.parquet(path)
spark.read.parquet(path).where("`coldots` IS NOT NULL").show()
```

```
+-------+
|coldots|
+-------+
|      1|
+-------+
```

**After**

```scala
val path = "/tmp/abcde"
Seq(Some(1), None).toDF("col.dots").write.parquet(path)
spark.read.parquet(path).where("`col.dots` IS NOT NULL").show()
```

```
+--------+
|col.dots|
+--------+
|       1|
+--------+
```

## How was this patch tested?

Unit tests added in `ParquetFilterSuite`.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #18000 from HyukjinKwon/SPARK-20364-workaround.

(cherry picked from commit 8fb3d5c6da30922458091837eec17ccca502098a)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../datasources/parquet/ParquetFilters.scala  | 57 +++++++++++--------
 .../parquet/ParquetFilterSuite.scala          | 15 +++++
 2 files changed, 47 insertions(+), 25 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala
index a6a6cef5861f3..763841efbd9f3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala
@@ -166,7 +166,14 @@ private[parquet] object ParquetFilters {
    * Converts data sources filters to Parquet filter predicates.
    */
   def createFilter(schema: StructType, predicate: sources.Filter): Option[FilterPredicate] = {
-    val dataTypeOf = getFieldMap(schema)
+    val nameToType = getFieldMap(schema)
+
+    // Parquet does not allow dots in the column name because dots are used as a column path
+    // delimiter. Since Parquet 1.8.2 (PARQUET-389), Parquet accepts the filter predicates
+    // with missing columns. The incorrect results could be got from Parquet when we push down
+    // filters for the column having dots in the names. Thus, we do not push down such filters.
+    // See SPARK-20364.
+    def canMakeFilterOn(name: String): Boolean = nameToType.contains(name) && !name.contains(".")
 
     // NOTE:
     //
@@ -184,30 +191,30 @@ private[parquet] object ParquetFilters {
     // Probably I missed something and obviously this should be changed.
 
     predicate match {
-      case sources.IsNull(name) if dataTypeOf.contains(name) =>
-        makeEq.lift(dataTypeOf(name)).map(_(name, null))
-      case sources.IsNotNull(name) if dataTypeOf.contains(name) =>
-        makeNotEq.lift(dataTypeOf(name)).map(_(name, null))
-
-      case sources.EqualTo(name, value) if dataTypeOf.contains(name) =>
-        makeEq.lift(dataTypeOf(name)).map(_(name, value))
-      case sources.Not(sources.EqualTo(name, value)) if dataTypeOf.contains(name) =>
-        makeNotEq.lift(dataTypeOf(name)).map(_(name, value))
-
-      case sources.EqualNullSafe(name, value) if dataTypeOf.contains(name) =>
-        makeEq.lift(dataTypeOf(name)).map(_(name, value))
-      case sources.Not(sources.EqualNullSafe(name, value)) if dataTypeOf.contains(name) =>
-        makeNotEq.lift(dataTypeOf(name)).map(_(name, value))
-
-      case sources.LessThan(name, value) if dataTypeOf.contains(name) =>
-        makeLt.lift(dataTypeOf(name)).map(_(name, value))
-      case sources.LessThanOrEqual(name, value) if dataTypeOf.contains(name) =>
-        makeLtEq.lift(dataTypeOf(name)).map(_(name, value))
-
-      case sources.GreaterThan(name, value) if dataTypeOf.contains(name) =>
-        makeGt.lift(dataTypeOf(name)).map(_(name, value))
-      case sources.GreaterThanOrEqual(name, value) if dataTypeOf.contains(name) =>
-        makeGtEq.lift(dataTypeOf(name)).map(_(name, value))
+      case sources.IsNull(name) if canMakeFilterOn(name) =>
+        makeEq.lift(nameToType(name)).map(_(name, null))
+      case sources.IsNotNull(name) if canMakeFilterOn(name) =>
+        makeNotEq.lift(nameToType(name)).map(_(name, null))
+
+      case sources.EqualTo(name, value) if canMakeFilterOn(name) =>
+        makeEq.lift(nameToType(name)).map(_(name, value))
+      case sources.Not(sources.EqualTo(name, value)) if canMakeFilterOn(name) =>
+        makeNotEq.lift(nameToType(name)).map(_(name, value))
+
+      case sources.EqualNullSafe(name, value) if canMakeFilterOn(name) =>
+        makeEq.lift(nameToType(name)).map(_(name, value))
+      case sources.Not(sources.EqualNullSafe(name, value)) if canMakeFilterOn(name) =>
+        makeNotEq.lift(nameToType(name)).map(_(name, value))
+
+      case sources.LessThan(name, value) if canMakeFilterOn(name) =>
+        makeLt.lift(nameToType(name)).map(_(name, value))
+      case sources.LessThanOrEqual(name, value) if canMakeFilterOn(name) =>
+        makeLtEq.lift(nameToType(name)).map(_(name, value))
+
+      case sources.GreaterThan(name, value) if canMakeFilterOn(name) =>
+        makeGt.lift(nameToType(name)).map(_(name, value))
+      case sources.GreaterThanOrEqual(name, value) if canMakeFilterOn(name) =>
+        makeGtEq.lift(nameToType(name)).map(_(name, value))
 
       case sources.And(lhs, rhs) =>
         // At here, it is not safe to just convert one side if we do not understand the
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
index dd53b561326f3..98427cfe3031c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
@@ -538,6 +538,21 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex
       // scalastyle:on nonascii
     }
   }
+
+  test("SPARK-20364: Disable Parquet predicate pushdown for fields having dots in the names") {
+    import testImplicits._
+
+    Seq(true, false).foreach { vectorized =>
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> vectorized.toString,
+          SQLConf.PARQUET_FILTER_PUSHDOWN_ENABLED.key -> true.toString) {
+        withTempPath { path =>
+          Seq(Some(1), None).toDF("col.dots").write.parquet(path.getAbsolutePath)
+          val readBack = spark.read.parquet(path.getAbsolutePath).where("`col.dots` IS NOT NULL")
+          assert(readBack.count() == 1)
+        }
+      }
+    }
+  }
 }
 
 class NumRowGroupsAcc extends AccumulatorV2[Integer, Integer] {

From 556ad019fa49deb40ba8da3aa6067484ab3d6331 Mon Sep 17 00:00:00 2001
From: Yash Sharma <ysharma@atlassian.com>
Date: Thu, 18 May 2017 11:24:33 -0700
Subject: [PATCH 0868/1204] [DSTREAM][DOC] Add documentation for kinesis retry
 configurations

## What changes were proposed in this pull request?

The changes were merged as part of - https://github.com/apache/spark/pull/17467.
The documentation was missed somewhere in the review iterations. Adding the documentation where it belongs.

## How was this patch tested?
Docs. Not tested.

cc budde , brkyvz

Author: Yash Sharma <ysharma@atlassian.com>

Closes #18028 from yssharma/ysharma/kinesis_retry_docs.

(cherry picked from commit 92580bd0eae5dbf739573093cca1b12fd0c14049)
Signed-off-by: Burak Yavuz <brkyvz@gmail.com>
---
 docs/streaming-kinesis-integration.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/streaming-kinesis-integration.md b/docs/streaming-kinesis-integration.md
index 6be0b548bc62b..9709bd3d6574d 100644
--- a/docs/streaming-kinesis-integration.md
+++ b/docs/streaming-kinesis-integration.md
@@ -216,3 +216,7 @@ de-aggregate records during consumption.
 - If no Kinesis checkpoint info exists when the input DStream starts, it will start either from the oldest record available (`InitialPositionInStream.TRIM_HORIZON`) or from the latest tip (`InitialPositionInStream.LATEST`).  This is configurable.
   - `InitialPositionInStream.LATEST` could lead to missed records if data is added to the stream while no input DStreams are running (and no checkpoint info is being stored).
   - `InitialPositionInStream.TRIM_HORIZON` may lead to duplicate processing of records where the impact is dependent on checkpoint frequency and processing idempotency.
+
+#### Kinesis retry configuration
+ - `spark.streaming.kinesis.retry.waitTime` : Wait time between Kinesis retries as a duration string. When reading from Amazon Kinesis, users may hit `ProvisionedThroughputExceededException`'s, when consuming faster than 5 transactions/second or, exceeding the maximum read rate of 2 MB/second. This configuration can be tweaked to increase the sleep between fetches when a fetch fails to reduce these exceptions. Default is "100ms".
+ - `spark.streaming.kinesis.retry.maxAttempts` : Max number of retries for Kinesis fetches. This config can also be used to tackle the Kinesis `ProvisionedThroughputExceededException`'s in scenarios mentioned above. It can be increased to have more number of retries for Kinesis reads. Default is 3.

From 2eed4c96a5c3c9a7f318a96368493bb6fad2945d Mon Sep 17 00:00:00 2001
From: Ala Luszczak <ala@databricks.com>
Date: Fri, 19 May 2017 13:18:48 +0200
Subject: [PATCH 0869/1204] [SPARK-20798] GenerateUnsafeProjection should check
 if a value is null before calling the getter

## What changes were proposed in this pull request?

GenerateUnsafeProjection.writeStructToBuffer() did not honor the assumption that the caller must make sure that a value is not null before using the getter. This could lead to various errors. This change fixes that behavior.

Example of code generated before:
```scala
/* 059 */         final UTF8String fieldName = value.getUTF8String(0);
/* 060 */         if (value.isNullAt(0)) {
/* 061 */           rowWriter1.setNullAt(0);
/* 062 */         } else {
/* 063 */           rowWriter1.write(0, fieldName);
/* 064 */         }
```

Example of code generated now:
```scala
/* 060 */         boolean isNull1 = value.isNullAt(0);
/* 061 */         UTF8String value1 = isNull1 ? null : value.getUTF8String(0);
/* 062 */         if (isNull1) {
/* 063 */           rowWriter1.setNullAt(0);
/* 064 */         } else {
/* 065 */           rowWriter1.write(0, value1);
/* 066 */         }
```

## How was this patch tested?

Adds GenerateUnsafeProjectionSuite.

Author: Ala Luszczak <ala@databricks.com>

Closes #18030 from ala/fix-generate-unsafe-projection.

(cherry picked from commit ce8edb8bf4db5f82bcfeb11efbdf5229b0d25dfa)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../codegen/GenerateUnsafeProjection.scala    | 15 +++--
 .../GenerateUnsafeProjectionSuite.scala       | 61 +++++++++++++++++++
 .../execution/vectorized/ColumnarBatch.java   |  6 ++
 3 files changed, 78 insertions(+), 4 deletions(-)
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjectionSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
index 7e4c9089a2cb9..b358102d914bd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
@@ -50,10 +50,17 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
       fieldTypes: Seq[DataType],
       bufferHolder: String): String = {
     val fieldEvals = fieldTypes.zipWithIndex.map { case (dt, i) =>
-      val fieldName = ctx.freshName("fieldName")
-      val code = s"final ${ctx.javaType(dt)} $fieldName = ${ctx.getValue(input, dt, i.toString)};"
-      val isNull = s"$input.isNullAt($i)"
-      ExprCode(code, isNull, fieldName)
+      val javaType = ctx.javaType(dt)
+      val isNullVar = ctx.freshName("isNull")
+      val valueVar = ctx.freshName("value")
+      val defaultValue = ctx.defaultValue(dt)
+      val readValue = ctx.getValue(input, dt, i.toString)
+      val code =
+        s"""
+          boolean $isNullVar = $input.isNullAt($i);
+          $javaType $valueVar = $isNullVar ? $defaultValue : $readValue;
+        """
+      ExprCode(code, isNullVar, valueVar)
     }
 
     s"""
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjectionSuite.scala
new file mode 100644
index 0000000000000..e9d21f8a8ebcd
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjectionSuite.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions.codegen
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.BoundReference
+import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
+import org.apache.spark.sql.types.{DataType, Decimal, StringType, StructType}
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
+
+class GenerateUnsafeProjectionSuite extends SparkFunSuite {
+  test("Test unsafe projection string access pattern") {
+    val dataType = (new StructType).add("a", StringType)
+    val exprs = BoundReference(0, dataType, nullable = true) :: Nil
+    val projection = GenerateUnsafeProjection.generate(exprs)
+    val result = projection.apply(InternalRow(AlwaysNull))
+    assert(!result.isNullAt(0))
+    assert(result.getStruct(0, 1).isNullAt(0))
+  }
+}
+
+object AlwaysNull extends InternalRow {
+  override def numFields: Int = 1
+  override def setNullAt(i: Int): Unit = {}
+  override def copy(): InternalRow = this
+  override def anyNull: Boolean = true
+  override def isNullAt(ordinal: Int): Boolean = true
+  override def update(i: Int, value: Any): Unit = notSupported
+  override def getBoolean(ordinal: Int): Boolean = notSupported
+  override def getByte(ordinal: Int): Byte = notSupported
+  override def getShort(ordinal: Int): Short = notSupported
+  override def getInt(ordinal: Int): Int = notSupported
+  override def getLong(ordinal: Int): Long = notSupported
+  override def getFloat(ordinal: Int): Float = notSupported
+  override def getDouble(ordinal: Int): Double = notSupported
+  override def getDecimal(ordinal: Int, precision: Int, scale: Int): Decimal = notSupported
+  override def getUTF8String(ordinal: Int): UTF8String = notSupported
+  override def getBinary(ordinal: Int): Array[Byte] = notSupported
+  override def getInterval(ordinal: Int): CalendarInterval = notSupported
+  override def getStruct(ordinal: Int, numFields: Int): InternalRow = notSupported
+  override def getArray(ordinal: Int): ArrayData = notSupported
+  override def getMap(ordinal: Int): MapData = notSupported
+  override def get(ordinal: Int, dataType: DataType): AnyRef = notSupported
+  private def notSupported: Nothing = throw new UnsupportedOperationException
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarBatch.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarBatch.java
index a6ce4c2edc232..8b7b0e655b31d 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarBatch.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarBatch.java
@@ -198,21 +198,25 @@ public boolean anyNull() {
 
     @Override
     public Decimal getDecimal(int ordinal, int precision, int scale) {
+      if (columns[ordinal].isNullAt(rowId)) return null;
       return columns[ordinal].getDecimal(rowId, precision, scale);
     }
 
     @Override
     public UTF8String getUTF8String(int ordinal) {
+      if (columns[ordinal].isNullAt(rowId)) return null;
       return columns[ordinal].getUTF8String(rowId);
     }
 
     @Override
     public byte[] getBinary(int ordinal) {
+      if (columns[ordinal].isNullAt(rowId)) return null;
       return columns[ordinal].getBinary(rowId);
     }
 
     @Override
     public CalendarInterval getInterval(int ordinal) {
+      if (columns[ordinal].isNullAt(rowId)) return null;
       final int months = columns[ordinal].getChildColumn(0).getInt(rowId);
       final long microseconds = columns[ordinal].getChildColumn(1).getLong(rowId);
       return new CalendarInterval(months, microseconds);
@@ -220,11 +224,13 @@ public CalendarInterval getInterval(int ordinal) {
 
     @Override
     public InternalRow getStruct(int ordinal, int numFields) {
+      if (columns[ordinal].isNullAt(rowId)) return null;
       return columns[ordinal].getStruct(rowId);
     }
 
     @Override
     public ArrayData getArray(int ordinal) {
+      if (columns[ordinal].isNullAt(rowId)) return null;
       return columns[ordinal].getArray(rowId);
     }
 

From e326de485869f6c0708dba1df5f71faaee568f59 Mon Sep 17 00:00:00 2001
From: Ala Luszczak <ala@databricks.com>
Date: Fri, 19 May 2017 13:18:48 +0200
Subject: [PATCH 0870/1204] [SPARK-20798] GenerateUnsafeProjection should check
 if a value is null before calling the getter

## What changes were proposed in this pull request?

GenerateUnsafeProjection.writeStructToBuffer() did not honor the assumption that the caller must make sure that a value is not null before using the getter. This could lead to various errors. This change fixes that behavior.

Example of code generated before:
```scala
/* 059 */         final UTF8String fieldName = value.getUTF8String(0);
/* 060 */         if (value.isNullAt(0)) {
/* 061 */           rowWriter1.setNullAt(0);
/* 062 */         } else {
/* 063 */           rowWriter1.write(0, fieldName);
/* 064 */         }
```

Example of code generated now:
```scala
/* 060 */         boolean isNull1 = value.isNullAt(0);
/* 061 */         UTF8String value1 = isNull1 ? null : value.getUTF8String(0);
/* 062 */         if (isNull1) {
/* 063 */           rowWriter1.setNullAt(0);
/* 064 */         } else {
/* 065 */           rowWriter1.write(0, value1);
/* 066 */         }
```

## How was this patch tested?

Adds GenerateUnsafeProjectionSuite.

Author: Ala Luszczak <ala@databricks.com>

Closes #18030 from ala/fix-generate-unsafe-projection.

(cherry picked from commit ce8edb8bf4db5f82bcfeb11efbdf5229b0d25dfa)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../codegen/GenerateUnsafeProjection.scala    | 15 +++--
 .../GenerateUnsafeProjectionSuite.scala       | 61 +++++++++++++++++++
 .../execution/vectorized/ColumnarBatch.java   |  6 ++
 3 files changed, 78 insertions(+), 4 deletions(-)
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjectionSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
index 7e4c9089a2cb9..b358102d914bd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
@@ -50,10 +50,17 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
       fieldTypes: Seq[DataType],
       bufferHolder: String): String = {
     val fieldEvals = fieldTypes.zipWithIndex.map { case (dt, i) =>
-      val fieldName = ctx.freshName("fieldName")
-      val code = s"final ${ctx.javaType(dt)} $fieldName = ${ctx.getValue(input, dt, i.toString)};"
-      val isNull = s"$input.isNullAt($i)"
-      ExprCode(code, isNull, fieldName)
+      val javaType = ctx.javaType(dt)
+      val isNullVar = ctx.freshName("isNull")
+      val valueVar = ctx.freshName("value")
+      val defaultValue = ctx.defaultValue(dt)
+      val readValue = ctx.getValue(input, dt, i.toString)
+      val code =
+        s"""
+          boolean $isNullVar = $input.isNullAt($i);
+          $javaType $valueVar = $isNullVar ? $defaultValue : $readValue;
+        """
+      ExprCode(code, isNullVar, valueVar)
     }
 
     s"""
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjectionSuite.scala
new file mode 100644
index 0000000000000..e9d21f8a8ebcd
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjectionSuite.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions.codegen
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.BoundReference
+import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
+import org.apache.spark.sql.types.{DataType, Decimal, StringType, StructType}
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
+
+class GenerateUnsafeProjectionSuite extends SparkFunSuite {
+  test("Test unsafe projection string access pattern") {
+    val dataType = (new StructType).add("a", StringType)
+    val exprs = BoundReference(0, dataType, nullable = true) :: Nil
+    val projection = GenerateUnsafeProjection.generate(exprs)
+    val result = projection.apply(InternalRow(AlwaysNull))
+    assert(!result.isNullAt(0))
+    assert(result.getStruct(0, 1).isNullAt(0))
+  }
+}
+
+object AlwaysNull extends InternalRow {
+  override def numFields: Int = 1
+  override def setNullAt(i: Int): Unit = {}
+  override def copy(): InternalRow = this
+  override def anyNull: Boolean = true
+  override def isNullAt(ordinal: Int): Boolean = true
+  override def update(i: Int, value: Any): Unit = notSupported
+  override def getBoolean(ordinal: Int): Boolean = notSupported
+  override def getByte(ordinal: Int): Byte = notSupported
+  override def getShort(ordinal: Int): Short = notSupported
+  override def getInt(ordinal: Int): Int = notSupported
+  override def getLong(ordinal: Int): Long = notSupported
+  override def getFloat(ordinal: Int): Float = notSupported
+  override def getDouble(ordinal: Int): Double = notSupported
+  override def getDecimal(ordinal: Int, precision: Int, scale: Int): Decimal = notSupported
+  override def getUTF8String(ordinal: Int): UTF8String = notSupported
+  override def getBinary(ordinal: Int): Array[Byte] = notSupported
+  override def getInterval(ordinal: Int): CalendarInterval = notSupported
+  override def getStruct(ordinal: Int, numFields: Int): InternalRow = notSupported
+  override def getArray(ordinal: Int): ArrayData = notSupported
+  override def getMap(ordinal: Int): MapData = notSupported
+  override def get(ordinal: Int, dataType: DataType): AnyRef = notSupported
+  private def notSupported: Nothing = throw new UnsupportedOperationException
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarBatch.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarBatch.java
index a6ce4c2edc232..8b7b0e655b31d 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarBatch.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnarBatch.java
@@ -198,21 +198,25 @@ public boolean anyNull() {
 
     @Override
     public Decimal getDecimal(int ordinal, int precision, int scale) {
+      if (columns[ordinal].isNullAt(rowId)) return null;
       return columns[ordinal].getDecimal(rowId, precision, scale);
     }
 
     @Override
     public UTF8String getUTF8String(int ordinal) {
+      if (columns[ordinal].isNullAt(rowId)) return null;
       return columns[ordinal].getUTF8String(rowId);
     }
 
     @Override
     public byte[] getBinary(int ordinal) {
+      if (columns[ordinal].isNullAt(rowId)) return null;
       return columns[ordinal].getBinary(rowId);
     }
 
     @Override
     public CalendarInterval getInterval(int ordinal) {
+      if (columns[ordinal].isNullAt(rowId)) return null;
       final int months = columns[ordinal].getChildColumn(0).getInt(rowId);
       final long microseconds = columns[ordinal].getChildColumn(1).getLong(rowId);
       return new CalendarInterval(months, microseconds);
@@ -220,11 +224,13 @@ public CalendarInterval getInterval(int ordinal) {
 
     @Override
     public InternalRow getStruct(int ordinal, int numFields) {
+      if (columns[ordinal].isNullAt(rowId)) return null;
       return columns[ordinal].getStruct(rowId);
     }
 
     @Override
     public ArrayData getArray(int ordinal) {
+      if (columns[ordinal].isNullAt(rowId)) return null;
       return columns[ordinal].getArray(rowId);
     }
 

From 939b9536fa8f547b7df59c3c22caee9fd0f58688 Mon Sep 17 00:00:00 2001
From: tpoterba <tpoterba@broadinstitute.org>
Date: Fri, 19 May 2017 14:17:12 +0200
Subject: [PATCH 0871/1204] [SPARK-20773][SQL] ParquetWriteSupport.writeFields
 is quadratic in number of fields

Fix quadratic List indexing in ParquetWriteSupport.

I noticed this function while profiling some code with today. It showed up as a significant factor in a table with twenty columns; with hundreds of columns, it could dominate any other function call.

## What changes were proposed in this pull request?

The writeFields method iterates from 0 until number of fields, indexing into rootFieldWriters for each element. rootFieldWriters is a List, so indexing is a linear operation. The complexity of the writeFields method is thus quadratic in the number of fields.

Solution: explicitly convert rootFieldWriters to Array (implicitly converted to WrappedArray) for constant-time indexing.

## How was this patch tested?

This is a one-line change for performance reasons.

Author: tpoterba <tpoterba@broadinstitute.org>
Author: Tim Poterba <tpoterba@gmail.com>

Closes #18005 from tpoterba/tpoterba-patch-1.

(cherry picked from commit 3f2cd51ee06f2c6d735754e5440bc4b74f8dcbc8)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../datasources/parquet/ParquetWriteSupport.scala         | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala
index 38b0e33937f3c..63a8666f0d774 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala
@@ -58,7 +58,7 @@ private[parquet] class ParquetWriteSupport extends WriteSupport[InternalRow] wit
   private var schema: StructType = _
 
   // `ValueWriter`s for all fields of the schema
-  private var rootFieldWriters: Seq[ValueWriter] = _
+  private var rootFieldWriters: Array[ValueWriter] = _
 
   // The Parquet `RecordConsumer` to which all `InternalRow`s are written
   private var recordConsumer: RecordConsumer = _
@@ -90,7 +90,7 @@ private[parquet] class ParquetWriteSupport extends WriteSupport[InternalRow] wit
     }
 
 
-    this.rootFieldWriters = schema.map(_.dataType).map(makeWriter)
+    this.rootFieldWriters = schema.map(_.dataType).map(makeWriter).toArray[ValueWriter]
 
     val messageType = new ParquetSchemaConverter(configuration).convert(schema)
     val metadata = Map(ParquetReadSupport.SPARK_METADATA_KEY -> schemaString).asJava
@@ -116,7 +116,7 @@ private[parquet] class ParquetWriteSupport extends WriteSupport[InternalRow] wit
   }
 
   private def writeFields(
-      row: InternalRow, schema: StructType, fieldWriters: Seq[ValueWriter]): Unit = {
+      row: InternalRow, schema: StructType, fieldWriters: Array[ValueWriter]): Unit = {
     var i = 0
     while (i < row.numFields) {
       if (!row.isNullAt(i)) {
@@ -192,7 +192,7 @@ private[parquet] class ParquetWriteSupport extends WriteSupport[InternalRow] wit
         makeDecimalWriter(precision, scale)
 
       case t: StructType =>
-        val fieldWriters = t.map(_.dataType).map(makeWriter)
+        val fieldWriters = t.map(_.dataType).map(makeWriter).toArray[ValueWriter]
         (row: SpecializedGetters, ordinal: Int) =>
           consumeGroup {
             writeFields(row.getStruct(ordinal, t.length), t, fieldWriters)

From 001b82c18cd6518e9e6ae2e6f6d0de3dbc639943 Mon Sep 17 00:00:00 2001
From: liuzhaokun <liu.zhaokun@zte.com.cn>
Date: Fri, 19 May 2017 15:26:39 +0100
Subject: [PATCH 0872/1204] [SPARK-20759] SCALA_VERSION in _config.yml should
 be consistent with pom.xml

[https://issues.apache.org/jira/browse/SPARK-20759](https://issues.apache.org/jira/browse/SPARK-20759)
SCALA_VERSION in _config.yml is 2.11.7, but 2.11.8 in pom.xml. So I think SCALA_VERSION in _config.yml should be consistent with pom.xml.

Author: liuzhaokun <liu.zhaokun@zte.com.cn>

Closes #17992 from liu-zhaokun/new.

(cherry picked from commit dba2ca2c129b6d2597f1707e0315d4e238c40ed6)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 LICENSE                                    | 10 +++++-----
 docs/_config.yml                           |  2 +-
 external/docker/spark-test/base/Dockerfile |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/LICENSE b/LICENSE
index c21032a1fd274..66a2e8f132953 100644
--- a/LICENSE
+++ b/LICENSE
@@ -249,11 +249,11 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
         (Interpreter classes (all .scala files in repl/src/main/scala
         except for Main.Scala, SparkHelper.scala and ExecutorClassLoader.scala),
         and for SerializableMapWrapper in JavaUtils.scala)
-     (BSD-like) Scala Actors library (org.scala-lang:scala-actors:2.11.7 - http://www.scala-lang.org/)
-     (BSD-like) Scala Compiler (org.scala-lang:scala-compiler:2.11.7 - http://www.scala-lang.org/)
-     (BSD-like) Scala Compiler (org.scala-lang:scala-reflect:2.11.7 - http://www.scala-lang.org/)
-     (BSD-like) Scala Library (org.scala-lang:scala-library:2.11.7 - http://www.scala-lang.org/)
-     (BSD-like) Scalap (org.scala-lang:scalap:2.11.7 - http://www.scala-lang.org/)
+     (BSD-like) Scala Actors library (org.scala-lang:scala-actors:2.11.8 - http://www.scala-lang.org/)
+     (BSD-like) Scala Compiler (org.scala-lang:scala-compiler:2.11.8 - http://www.scala-lang.org/)
+     (BSD-like) Scala Compiler (org.scala-lang:scala-reflect:2.11.8 - http://www.scala-lang.org/)
+     (BSD-like) Scala Library (org.scala-lang:scala-library:2.11.8 - http://www.scala-lang.org/)
+     (BSD-like) Scalap (org.scala-lang:scalap:2.11.8 - http://www.scala-lang.org/)
      (BSD-style) scalacheck (org.scalacheck:scalacheck_2.11:1.10.0 - http://www.scalacheck.org)
      (BSD-style) spire (org.spire-math:spire_2.11:0.7.1 - http://spire-math.org)
      (BSD-style) spire-macros (org.spire-math:spire-macros_2.11:0.7.1 - http://spire-math.org)
diff --git a/docs/_config.yml b/docs/_config.yml
index 4b356053b0867..b61455e3ef9c8 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -17,7 +17,7 @@ include:
 SPARK_VERSION: 2.2.1-SNAPSHOT
 SPARK_VERSION_SHORT: 2.2.1
 SCALA_BINARY_VERSION: "2.11"
-SCALA_VERSION: "2.11.7"
+SCALA_VERSION: "2.11.8"
 MESOS_VERSION: 1.0.0
 SPARK_ISSUE_TRACKER_URL: https://issues.apache.org/jira/browse/SPARK
 SPARK_GITHUB_URL: https://github.com/apache/spark
diff --git a/external/docker/spark-test/base/Dockerfile b/external/docker/spark-test/base/Dockerfile
index 76f550f886ce4..5a95a9387c310 100644
--- a/external/docker/spark-test/base/Dockerfile
+++ b/external/docker/spark-test/base/Dockerfile
@@ -25,7 +25,7 @@ RUN apt-get update && \
     apt-get install -y less openjdk-7-jre-headless net-tools vim-tiny sudo openssh-server && \
     rm -rf /var/lib/apt/lists/*
 
-ENV SCALA_VERSION 2.11.7
+ENV SCALA_VERSION 2.11.8
 ENV CDH_VERSION cdh4
 ENV SCALA_HOME /opt/scala-$SCALA_VERSION
 ENV SPARK_HOME /opt/spark

From c53fe793dbe17a3598b0466ec644130cc251c30f Mon Sep 17 00:00:00 2001
From: liuzhaokun <liu.zhaokun@zte.com.cn>
Date: Fri, 19 May 2017 15:26:39 +0100
Subject: [PATCH 0873/1204] [SPARK-20759] SCALA_VERSION in _config.yml should
 be consistent with pom.xml

[https://issues.apache.org/jira/browse/SPARK-20759](https://issues.apache.org/jira/browse/SPARK-20759)
SCALA_VERSION in _config.yml is 2.11.7, but 2.11.8 in pom.xml. So I think SCALA_VERSION in _config.yml should be consistent with pom.xml.

Author: liuzhaokun <liu.zhaokun@zte.com.cn>

Closes #17992 from liu-zhaokun/new.

(cherry picked from commit dba2ca2c129b6d2597f1707e0315d4e238c40ed6)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 LICENSE                                    | 10 +++++-----
 docs/_config.yml                           |  2 +-
 external/docker/spark-test/base/Dockerfile |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/LICENSE b/LICENSE
index 7950dd6ceb6db..119ecbe15ab7a 100644
--- a/LICENSE
+++ b/LICENSE
@@ -249,11 +249,11 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
         (Interpreter classes (all .scala files in repl/src/main/scala
         except for Main.Scala, SparkHelper.scala and ExecutorClassLoader.scala),
         and for SerializableMapWrapper in JavaUtils.scala)
-     (BSD-like) Scala Actors library (org.scala-lang:scala-actors:2.11.7 - http://www.scala-lang.org/)
-     (BSD-like) Scala Compiler (org.scala-lang:scala-compiler:2.11.7 - http://www.scala-lang.org/)
-     (BSD-like) Scala Compiler (org.scala-lang:scala-reflect:2.11.7 - http://www.scala-lang.org/)
-     (BSD-like) Scala Library (org.scala-lang:scala-library:2.11.7 - http://www.scala-lang.org/)
-     (BSD-like) Scalap (org.scala-lang:scalap:2.11.7 - http://www.scala-lang.org/)
+     (BSD-like) Scala Actors library (org.scala-lang:scala-actors:2.11.8 - http://www.scala-lang.org/)
+     (BSD-like) Scala Compiler (org.scala-lang:scala-compiler:2.11.8 - http://www.scala-lang.org/)
+     (BSD-like) Scala Compiler (org.scala-lang:scala-reflect:2.11.8 - http://www.scala-lang.org/)
+     (BSD-like) Scala Library (org.scala-lang:scala-library:2.11.8 - http://www.scala-lang.org/)
+     (BSD-like) Scalap (org.scala-lang:scalap:2.11.8 - http://www.scala-lang.org/)
      (BSD-style) scalacheck (org.scalacheck:scalacheck_2.11:1.10.0 - http://www.scalacheck.org)
      (BSD-style) spire (org.spire-math:spire_2.11:0.7.1 - http://spire-math.org)
      (BSD-style) spire-macros (org.spire-math:spire-macros_2.11:0.7.1 - http://spire-math.org)
diff --git a/docs/_config.yml b/docs/_config.yml
index e21d011c4f83f..ee38a4cc91998 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -17,7 +17,7 @@ include:
 SPARK_VERSION: 2.1.2-SNAPSHOT
 SPARK_VERSION_SHORT: 2.1.2
 SCALA_BINARY_VERSION: "2.11"
-SCALA_VERSION: "2.11.7"
+SCALA_VERSION: "2.11.8"
 MESOS_VERSION: 1.0.0
 SPARK_ISSUE_TRACKER_URL: https://issues.apache.org/jira/browse/SPARK
 SPARK_GITHUB_URL: https://github.com/apache/spark
diff --git a/external/docker/spark-test/base/Dockerfile b/external/docker/spark-test/base/Dockerfile
index 76f550f886ce4..5a95a9387c310 100644
--- a/external/docker/spark-test/base/Dockerfile
+++ b/external/docker/spark-test/base/Dockerfile
@@ -25,7 +25,7 @@ RUN apt-get update && \
     apt-get install -y less openjdk-7-jre-headless net-tools vim-tiny sudo openssh-server && \
     rm -rf /var/lib/apt/lists/*
 
-ENV SCALA_VERSION 2.11.7
+ENV SCALA_VERSION 2.11.8
 ENV CDH_VERSION cdh4
 ENV SCALA_HOME /opt/scala-$SCALA_VERSION
 ENV SPARK_HOME /opt/spark

From 43f9fb7984c898dd7bab380ba8f6ca72b4e7d7e3 Mon Sep 17 00:00:00 2001
From: liuxian <liu.xian3@zte.com.cn>
Date: Fri, 19 May 2017 10:25:21 -0700
Subject: [PATCH 0874/1204] [SPARK-20763][SQL] The function of `month` and
 `day` return the value which is not we expected.

## What changes were proposed in this pull request?
spark-sql>select month("1582-09-28");
spark-sql>10
For this case, the expected result is 9, but it is 10.

spark-sql>select day("1582-04-18");
spark-sql>28
For this case, the expected result is 18, but it is 28.

when the date  before "1582-10-04", the function of `month` and `day` return the value which is not we expected.

## How was this patch tested?
unit tests

Author: liuxian <liu.xian3@zte.com.cn>

Closes #17997 from 10110346/wip_lx_0516.

(cherry picked from commit ea3b1e352a605cd35cdee987d0e5eb8528ef1b45)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../sql/catalyst/util/DateTimeUtils.scala      |  9 ++++++++-
 .../expressions/DateExpressionsSuite.scala     | 18 ++++++++++++++++++
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index fc4946df0feab..8787e78c12d5d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -603,7 +603,14 @@ object DateTimeUtils {
    */
   private[this] def getYearAndDayInYear(daysSince1970: SQLDate): (Int, Int) = {
     // add the difference (in days) between 1.1.1970 and the artificial year 0 (-17999)
-    val daysNormalized = daysSince1970 + toYearZero
+    var  daysSince1970Tmp = daysSince1970
+    // Since Julian calendar was replaced with the Gregorian calendar,
+    // the 10 days after Oct. 4 were skipped.
+    // (1582-10-04) -141428 days since 1970-01-01
+    if (daysSince1970 <= -141428) {
+      daysSince1970Tmp -= 10
+    }
+    val daysNormalized = daysSince1970Tmp + toYearZero
     val numOfQuarterCenturies = daysNormalized / daysIn400Years
     val daysInThis400 = daysNormalized % daysIn400Years + 1
     val (years, dayInYear) = numYears(daysInThis400)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index ca89bf7db0b4f..257c2a3bef974 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -76,6 +76,9 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       }
     }
     checkEvaluation(DayOfYear(Literal.create(null, DateType)), null)
+
+    checkEvaluation(DayOfYear(Literal(new Date(sdf.parse("1582-10-15 13:10:15").getTime))), 288)
+    checkEvaluation(DayOfYear(Literal(new Date(sdf.parse("1582-10-04 13:10:15").getTime))), 277)
     checkConsistencyBetweenInterpretedAndCodegen(DayOfYear, DateType)
   }
 
@@ -96,6 +99,8 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
         }
       }
     }
+    checkEvaluation(Year(Literal(new Date(sdf.parse("1582-01-01 13:10:15").getTime))), 1582)
+    checkEvaluation(Year(Literal(new Date(sdf.parse("1581-12-31 13:10:15").getTime))), 1581)
     checkConsistencyBetweenInterpretedAndCodegen(Year, DateType)
   }
 
@@ -116,6 +121,9 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
         }
       }
     }
+
+    checkEvaluation(Quarter(Literal(new Date(sdf.parse("1582-10-01 13:10:15").getTime))), 4)
+    checkEvaluation(Quarter(Literal(new Date(sdf.parse("1582-09-30 13:10:15").getTime))), 3)
     checkConsistencyBetweenInterpretedAndCodegen(Quarter, DateType)
   }
 
@@ -125,6 +133,10 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(Month(Cast(Literal(sdfDate.format(d)), DateType, gmtId)), 4)
     checkEvaluation(Month(Cast(Literal(ts), DateType, gmtId)), 11)
 
+    checkEvaluation(Month(Literal(new Date(sdf.parse("1582-04-28 13:10:15").getTime))), 4)
+    checkEvaluation(Month(Literal(new Date(sdf.parse("1582-10-04 13:10:15").getTime))), 10)
+    checkEvaluation(Month(Literal(new Date(sdf.parse("1582-10-15 13:10:15").getTime))), 10)
+
     val c = Calendar.getInstance()
     (2003 to 2004).foreach { y =>
       (0 to 3).foreach { m =>
@@ -146,6 +158,10 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(DayOfMonth(Cast(Literal(sdfDate.format(d)), DateType, gmtId)), 8)
     checkEvaluation(DayOfMonth(Cast(Literal(ts), DateType, gmtId)), 8)
 
+    checkEvaluation(DayOfMonth(Literal(new Date(sdf.parse("1582-04-28 13:10:15").getTime))), 28)
+    checkEvaluation(DayOfMonth(Literal(new Date(sdf.parse("1582-10-15 13:10:15").getTime))), 15)
+    checkEvaluation(DayOfMonth(Literal(new Date(sdf.parse("1582-10-04 13:10:15").getTime))), 4)
+
     val c = Calendar.getInstance()
     (1999 to 2000).foreach { y =>
       c.set(y, 0, 1, 0, 0, 0)
@@ -186,6 +202,8 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(WeekOfYear(Cast(Literal(sdfDate.format(d)), DateType, gmtId)), 15)
     checkEvaluation(WeekOfYear(Cast(Literal(ts), DateType, gmtId)), 45)
     checkEvaluation(WeekOfYear(Cast(Literal("2011-05-06"), DateType, gmtId)), 18)
+    checkEvaluation(WeekOfYear(Literal(new Date(sdf.parse("1582-10-15 13:10:15").getTime))), 40)
+    checkEvaluation(WeekOfYear(Literal(new Date(sdf.parse("1582-10-04 13:10:15").getTime))), 40)
     checkConsistencyBetweenInterpretedAndCodegen(WeekOfYear, DateType)
   }
 

From 4fcd52b48825400acf54f4b021f365ad6414c57a Mon Sep 17 00:00:00 2001
From: Nick Pentreath <nickp@za.ibm.com>
Date: Fri, 19 May 2017 20:51:56 +0200
Subject: [PATCH 0875/1204] [SPARK-20506][DOCS] 2.2 migration guide

Update ML guide for migration `2.1` -> `2.2` and the previous version migration guide section.

## How was this patch tested?

Build doc locally.

Author: Nick Pentreath <nickp@za.ibm.com>

Closes #17996 from MLnick/SPARK-20506-2.2-migration-guide.

(cherry picked from commit b5d8d9ba17d62167cfbacd5f6188a8b4a5b8a2be)
Signed-off-by: Nick Pentreath <nickp@za.ibm.com>
---
 docs/ml-guide.md            | 56 ++++++++++++++++++++++++-------------
 docs/ml-migration-guides.md | 29 +++++++++++++++++++
 2 files changed, 66 insertions(+), 19 deletions(-)

diff --git a/docs/ml-guide.md b/docs/ml-guide.md
index 971761961b965..362e883e55e83 100644
--- a/docs/ml-guide.md
+++ b/docs/ml-guide.md
@@ -26,7 +26,7 @@ The primary Machine Learning API for Spark is now the [DataFrame](sql-programmin
 * MLlib will still support the RDD-based API in `spark.mllib` with bug fixes.
 * MLlib will not add new features to the RDD-based API.
 * In the Spark 2.x releases, MLlib will add features to the DataFrames-based API to reach feature parity with the RDD-based API.
-* After reaching feature parity (roughly estimated for Spark 2.2), the RDD-based API will be deprecated.
+* After reaching feature parity (roughly estimated for Spark 2.3), the RDD-based API will be deprecated.
 * The RDD-based API is expected to be removed in Spark 3.0.
 
 *Why is MLlib switching to the DataFrame-based API?*
@@ -66,41 +66,59 @@ To use MLlib in Python, you will need [NumPy](http://www.numpy.org) version 1.4
 [^1]: To learn more about the benefits and background of system optimised natives, you may wish to
     watch Sam Halliday's ScalaX talk on [High Performance Linear Algebra in Scala](http://fommil.github.io/scalax14/#/).
 
+# Highlights in 2.2
+
+The list below highlights some of the new features and enhancements added to MLlib in the `2.2`
+release of Spark:
+
+* `ALS` methods for _top-k_ recommendations for all users or items, matching the functionality
+ in `mllib` ([SPARK-19535](https://issues.apache.org/jira/browse/SPARK-19535)). Performance
+ was also improved for both `ml` and `mllib`
+ ([SPARK-11968](https://issues.apache.org/jira/browse/SPARK-11968) and
+ [SPARK-20587](https://issues.apache.org/jira/browse/SPARK-20587))
+* `Correlation` and `ChiSquareTest` stats functions for `DataFrames`
+ ([SPARK-19636](https://issues.apache.org/jira/browse/SPARK-19636) and
+ [SPARK-19635](https://issues.apache.org/jira/browse/SPARK-19635))
+* `FPGrowth` algorithm for frequent pattern mining
+ ([SPARK-14503](https://issues.apache.org/jira/browse/SPARK-14503))
+* `GLM` now supports the full `Tweedie` family
+ ([SPARK-18929](https://issues.apache.org/jira/browse/SPARK-18929))
+* `Imputer` feature transformer to impute missing values in a dataset
+ ([SPARK-13568](https://issues.apache.org/jira/browse/SPARK-13568))
+* `LinearSVC` for linear Support Vector Machine classification
+ ([SPARK-14709](https://issues.apache.org/jira/browse/SPARK-14709))
+* Logistic regression now supports constraints on the coefficients during training
+ ([SPARK-20047](https://issues.apache.org/jira/browse/SPARK-20047))
+
 # Migration guide
 
 MLlib is under active development.
 The APIs marked `Experimental`/`DeveloperApi` may change in future releases,
 and the migration guide below will explain all changes between releases.
 
-## From 2.0 to 2.1
+## From 2.1 to 2.2
 
 ### Breaking changes
- 
-**Deprecated methods removed**
 
-* `setLabelCol` in `feature.ChiSqSelectorModel`
-* `numTrees` in `classification.RandomForestClassificationModel` (This now refers to the Param called `numTrees`)
-* `numTrees` in `regression.RandomForestRegressionModel` (This now refers to the Param called `numTrees`)
-* `model` in `regression.LinearRegressionSummary`
-* `validateParams` in `PipelineStage`
-* `validateParams` in `Evaluator`
+There are no breaking changes.
 
 ### Deprecations and changes of behavior
 
 **Deprecations**
 
-* [SPARK-18592](https://issues.apache.org/jira/browse/SPARK-18592):
-  Deprecate all Param setter methods except for input/output column Params for `DecisionTreeClassificationModel`, `GBTClassificationModel`, `RandomForestClassificationModel`, `DecisionTreeRegressionModel`, `GBTRegressionModel` and `RandomForestRegressionModel`
+There are no deprecations.
 
 **Changes of behavior**
 
-* [SPARK-17870](https://issues.apache.org/jira/browse/SPARK-17870):
- Fix a bug of `ChiSqSelector` which will likely change its result. Now `ChiSquareSelector` use pValue rather than raw statistic to select a fixed number of top features.
-* [SPARK-3261](https://issues.apache.org/jira/browse/SPARK-3261):
- `KMeans` returns potentially fewer than k cluster centers in cases where k distinct centroids aren't available or aren't selected.
-* [SPARK-17389](https://issues.apache.org/jira/browse/SPARK-17389):
- `KMeans` reduces the default number of steps from 5 to 2 for the k-means|| initialization mode.
-
+* [SPARK-19787](https://issues.apache.org/jira/browse/SPARK-19787):
+ Default value of `regParam` changed from `1.0` to `0.1` for `ALS.train` method (marked `DeveloperApi`).
+ **Note** this does _not affect_ the `ALS` Estimator or Model, nor MLlib's `ALS` class.
+* [SPARK-14772](https://issues.apache.org/jira/browse/SPARK-14772):
+ Fixed inconsistency between Python and Scala APIs for `Param.copy` method.
+* [SPARK-11569](https://issues.apache.org/jira/browse/SPARK-11569):
+ `StringIndexer` now handles `NULL` values in the same way as unseen values. Previously an exception
+ would always be thrown regardless of the setting of the `handleInvalid` parameter.
+  
 ## Previous Spark versions
 
 Earlier migration guides are archived [on this page](ml-migration-guides.html).
diff --git a/docs/ml-migration-guides.md b/docs/ml-migration-guides.md
index 58c3747ea6387..687d7c8930362 100644
--- a/docs/ml-migration-guides.md
+++ b/docs/ml-migration-guides.md
@@ -7,6 +7,35 @@ description: MLlib migration guides from before Spark SPARK_VERSION_SHORT
 
 The migration guide for the current Spark version is kept on the [MLlib Guide main page](ml-guide.html#migration-guide).
 
+## From 2.0 to 2.1
+
+### Breaking changes
+ 
+**Deprecated methods removed**
+
+* `setLabelCol` in `feature.ChiSqSelectorModel`
+* `numTrees` in `classification.RandomForestClassificationModel` (This now refers to the Param called `numTrees`)
+* `numTrees` in `regression.RandomForestRegressionModel` (This now refers to the Param called `numTrees`)
+* `model` in `regression.LinearRegressionSummary`
+* `validateParams` in `PipelineStage`
+* `validateParams` in `Evaluator`
+
+### Deprecations and changes of behavior
+
+**Deprecations**
+
+* [SPARK-18592](https://issues.apache.org/jira/browse/SPARK-18592):
+  Deprecate all Param setter methods except for input/output column Params for `DecisionTreeClassificationModel`, `GBTClassificationModel`, `RandomForestClassificationModel`, `DecisionTreeRegressionModel`, `GBTRegressionModel` and `RandomForestRegressionModel`
+
+**Changes of behavior**
+
+* [SPARK-17870](https://issues.apache.org/jira/browse/SPARK-17870):
+ Fix a bug of `ChiSqSelector` which will likely change its result. Now `ChiSquareSelector` use pValue rather than raw statistic to select a fixed number of top features.
+* [SPARK-3261](https://issues.apache.org/jira/browse/SPARK-3261):
+ `KMeans` returns potentially fewer than k cluster centers in cases where k distinct centroids aren't available or aren't selected.
+* [SPARK-17389](https://issues.apache.org/jira/browse/SPARK-17389):
+ `KMeans` reduces the default number of steps from 5 to 2 for the k-means|| initialization mode.
+
 ## From 1.6 to 2.0
 
 ### Breaking changes

From 3aad5982a80c300a6c86b876340da85c64cd6ac6 Mon Sep 17 00:00:00 2001
From: liuzhaokun <liu.zhaokun@zte.com.cn>
Date: Fri, 19 May 2017 20:47:30 +0100
Subject: [PATCH 0876/1204] [SPARK-20781] the location of Dockerfile in
 docker.properties.templat is wrong

[https://issues.apache.org/jira/browse/SPARK-20781](https://issues.apache.org/jira/browse/SPARK-20781)
the location of Dockerfile in docker.properties.template should be "../external/docker/spark-mesos/Dockerfile"

Author: liuzhaokun <liu.zhaokun@zte.com.cn>

Closes #18013 from liu-zhaokun/dockerfile_location.

(cherry picked from commit 749418d285461958a0f22ed355edafd87f1ee913)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 conf/docker.properties.template | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/docker.properties.template b/conf/docker.properties.template
index 55cb094b4af46..2ecb4f1464a4f 100644
--- a/conf/docker.properties.template
+++ b/conf/docker.properties.template
@@ -15,6 +15,6 @@
 # limitations under the License.
 #
 
-spark.mesos.executor.docker.image: <image built from `../docker/spark-mesos/Dockerfile`>
+spark.mesos.executor.docker.image: <image built from `../external/docker/spark-mesos/Dockerfile`>
 spark.mesos.executor.docker.volumes: /usr/local/lib:/host/usr/local/lib:ro
 spark.mesos.executor.home: /opt/spark

From e9804b3d4442e4c353af4a171e8f3c1915104bd0 Mon Sep 17 00:00:00 2001
From: liuzhaokun <liu.zhaokun@zte.com.cn>
Date: Fri, 19 May 2017 20:47:30 +0100
Subject: [PATCH 0877/1204] [SPARK-20781] the location of Dockerfile in
 docker.properties.templat is wrong

[https://issues.apache.org/jira/browse/SPARK-20781](https://issues.apache.org/jira/browse/SPARK-20781)
the location of Dockerfile in docker.properties.template should be "../external/docker/spark-mesos/Dockerfile"

Author: liuzhaokun <liu.zhaokun@zte.com.cn>

Closes #18013 from liu-zhaokun/dockerfile_location.

(cherry picked from commit 749418d285461958a0f22ed355edafd87f1ee913)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 conf/docker.properties.template | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/docker.properties.template b/conf/docker.properties.template
index 55cb094b4af46..2ecb4f1464a4f 100644
--- a/conf/docker.properties.template
+++ b/conf/docker.properties.template
@@ -15,6 +15,6 @@
 # limitations under the License.
 #
 
-spark.mesos.executor.docker.image: <image built from `../docker/spark-mesos/Dockerfile`>
+spark.mesos.executor.docker.image: <image built from `../external/docker/spark-mesos/Dockerfile`>
 spark.mesos.executor.docker.volumes: /usr/local/lib:/host/usr/local/lib:ro
 spark.mesos.executor.home: /opt/spark

From cfd1bf0bef766a9b13fe16bcca172d4108eb4e56 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Sun, 21 May 2017 13:07:25 -0700
Subject: [PATCH 0878/1204] [SPARK-20792][SS] Support same timeout operations
 in mapGroupsWithState function in batch queries as in streaming queries

## What changes were proposed in this pull request?

Currently, in the batch queries, timeout is disabled (i.e. GroupStateTimeout.NoTimeout) which means any GroupState.setTimeout*** operation would throw UnsupportedOperationException. This makes it weird when converting a streaming query into a batch query by changing the input DF from streaming to a batch DF. If the timeout was enabled and used, then the batch query will start throwing UnsupportedOperationException.

This PR creates the dummy state in batch queries with the provided timeoutConf so that it behaves in the same way. The code has been refactored to make it obvious when the state is being created for a batch query or a streaming query.

## How was this patch tested?
Additional tests

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #18024 from tdas/SPARK-20792.

(cherry picked from commit 9d6661c829a4a82aae64ed0522c44e4c3d8f4f0b)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../spark/sql/execution/SparkStrategies.scala |   5 +-
 .../apache/spark/sql/execution/objects.scala  |   6 +-
 .../FlatMapGroupsWithStateExec.scala          |   2 +-
 .../execution/streaming/GroupStateImpl.scala  |  42 +++----
 .../FlatMapGroupsWithStateSuite.scala         | 113 +++++++++++++-----
 5 files changed, 116 insertions(+), 52 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index ca2f6dd7a84b2..73541c22c6308 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -383,8 +383,9 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       case logical.MapGroups(f, key, value, grouping, data, objAttr, child) =>
         execution.MapGroupsExec(f, key, value, grouping, data, objAttr, planLater(child)) :: Nil
       case logical.FlatMapGroupsWithState(
-          f, key, value, grouping, data, output, _, _, _, _, child) =>
-        execution.MapGroupsExec(f, key, value, grouping, data, output, planLater(child)) :: Nil
+          f, key, value, grouping, data, output, _, _, _, timeout, child) =>
+        execution.MapGroupsExec(
+          f, key, value, grouping, data, output, timeout, planLater(child)) :: Nil
       case logical.CoGroup(f, key, lObj, rObj, lGroup, rGroup, lAttr, rAttr, oAttr, left, right) =>
         execution.CoGroupExec(
           f, key, lObj, rObj, lGroup, rGroup, lAttr, rAttr, oAttr,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala
index 48c7b80bffe03..34391818f3b9a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala
@@ -33,6 +33,7 @@ import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.plans.logical.LogicalGroupState
 import org.apache.spark.sql.execution.streaming.GroupStateImpl
+import org.apache.spark.sql.streaming.GroupStateTimeout
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
@@ -361,8 +362,11 @@ object MapGroupsExec {
       groupingAttributes: Seq[Attribute],
       dataAttributes: Seq[Attribute],
       outputObjAttr: Attribute,
+      timeoutConf: GroupStateTimeout,
       child: SparkPlan): MapGroupsExec = {
-    val f = (key: Any, values: Iterator[Any]) => func(key, values, new GroupStateImpl[Any](None))
+    val f = (key: Any, values: Iterator[Any]) => {
+      func(key, values, GroupStateImpl.createForBatch(timeoutConf))
+    }
     new MapGroupsExec(f, keyDeserializer, valueDeserializer,
       groupingAttributes, dataAttributes, outputObjAttr, child)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
index bd8d5d7b43d3a..3ceb4cf84a413 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
@@ -215,7 +215,7 @@ case class FlatMapGroupsWithStateExec(
       val keyObj = getKeyObj(keyRow)  // convert key to objects
       val valueObjIter = valueRowIter.map(getValueObj.apply) // convert value rows to objects
       val stateObjOption = getStateObj(prevStateRowOption)
-      val keyedState = new GroupStateImpl(
+      val keyedState = GroupStateImpl.createForStreaming(
         stateObjOption,
         batchTimestampMs.getOrElse(NO_TIMESTAMP),
         eventTimeWatermark.getOrElse(NO_TIMESTAMP),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala
index d4606fd5a8463..4401e86936af9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala
@@ -38,20 +38,13 @@ import org.apache.spark.unsafe.types.CalendarInterval
  * @param hasTimedOut     Whether the key for which this state wrapped is being created is
  *                        getting timed out or not.
  */
-private[sql] class GroupStateImpl[S](
+private[sql] class GroupStateImpl[S] private(
     optionalValue: Option[S],
     batchProcessingTimeMs: Long,
     eventTimeWatermarkMs: Long,
     timeoutConf: GroupStateTimeout,
     override val hasTimedOut: Boolean) extends GroupState[S] {
 
-  // Constructor to create dummy state when using mapGroupsWithState in a batch query
-  def this(optionalValue: Option[S]) = this(
-    optionalValue,
-    batchProcessingTimeMs = NO_TIMESTAMP,
-    eventTimeWatermarkMs = NO_TIMESTAMP,
-    timeoutConf = GroupStateTimeout.NoTimeout,
-    hasTimedOut = false)
   private var value: S = optionalValue.getOrElse(null.asInstanceOf[S])
   private var defined: Boolean = optionalValue.isDefined
   private var updated: Boolean = false // whether value has been updated (but not removed)
@@ -102,12 +95,7 @@ private[sql] class GroupStateImpl[S](
     if (durationMs <= 0) {
       throw new IllegalArgumentException("Timeout duration must be positive")
     }
-    if (batchProcessingTimeMs != NO_TIMESTAMP) {
-      timeoutTimestamp = durationMs + batchProcessingTimeMs
-    } else {
-      // This is being called in a batch query, hence no processing timestamp.
-      // Just ignore any attempts to set timeout.
-    }
+    timeoutTimestamp = durationMs + batchProcessingTimeMs
   }
 
   override def setTimeoutDuration(duration: String): Unit = {
@@ -128,12 +116,7 @@ private[sql] class GroupStateImpl[S](
         s"Timeout timestamp ($timestampMs) cannot be earlier than the " +
           s"current watermark ($eventTimeWatermarkMs)")
     }
-    if (batchProcessingTimeMs != NO_TIMESTAMP) {
-      timeoutTimestamp = timestampMs
-    } else {
-      // This is being called in a batch query, hence no processing timestamp.
-      // Just ignore any attempts to set timeout.
-    }
+    timeoutTimestamp = timestampMs
   }
 
   @throws[IllegalArgumentException]("if 'additionalDuration' is invalid")
@@ -213,4 +196,23 @@ private[sql] class GroupStateImpl[S](
 private[sql] object GroupStateImpl {
   // Value used represent the lack of valid timestamp as a long
   val NO_TIMESTAMP = -1L
+
+  def createForStreaming[S](
+      optionalValue: Option[S],
+      batchProcessingTimeMs: Long,
+      eventTimeWatermarkMs: Long,
+      timeoutConf: GroupStateTimeout,
+      hasTimedOut: Boolean): GroupStateImpl[S] = {
+    new GroupStateImpl[S](
+      optionalValue, batchProcessingTimeMs, eventTimeWatermarkMs, timeoutConf, hasTimedOut)
+  }
+
+  def createForBatch(timeoutConf: GroupStateTimeout): GroupStateImpl[Any] = {
+    new GroupStateImpl[Any](
+      optionalValue = None,
+      batchProcessingTimeMs = NO_TIMESTAMP,
+      eventTimeWatermarkMs = NO_TIMESTAMP,
+      timeoutConf,
+      hasTimedOut = false)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
index 10e91740eb922..6bb9408ce99ed 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
@@ -73,14 +73,15 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest with BeforeAndAf
       assert(state.hasRemoved === shouldBeRemoved)
     }
 
+    // === Tests for state in streaming queries ===
     // Updating empty state
-    state = new GroupStateImpl[String](None)
+    state = GroupStateImpl.createForStreaming(None, 1, 1, NoTimeout, hasTimedOut = false)
     testState(None)
     state.update("")
     testState(Some(""), shouldBeUpdated = true)
 
     // Updating exiting state
-    state = new GroupStateImpl[String](Some("2"))
+    state = GroupStateImpl.createForStreaming(Some("2"), 1, 1, NoTimeout, hasTimedOut = false)
     testState(Some("2"))
     state.update("3")
     testState(Some("3"), shouldBeUpdated = true)
@@ -99,25 +100,34 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest with BeforeAndAf
   }
 
   test("GroupState - setTimeout**** with NoTimeout") {
-    for (initState <- Seq(None, Some(5))) {
-      // for different initial state
-      implicit val state = new GroupStateImpl(initState, 1000, 1000, NoTimeout, hasTimedOut = false)
-      testTimeoutDurationNotAllowed[UnsupportedOperationException](state)
-      testTimeoutTimestampNotAllowed[UnsupportedOperationException](state)
+    for (initValue <- Seq(None, Some(5))) {
+      val states = Seq(
+        GroupStateImpl.createForStreaming(initValue, 1000, 1000, NoTimeout, hasTimedOut = false),
+        GroupStateImpl.createForBatch(NoTimeout)
+      )
+      for (state <- states) {
+        // for streaming queries
+        testTimeoutDurationNotAllowed[UnsupportedOperationException](state)
+        testTimeoutTimestampNotAllowed[UnsupportedOperationException](state)
+
+        // for batch queries
+        testTimeoutDurationNotAllowed[UnsupportedOperationException](state)
+        testTimeoutTimestampNotAllowed[UnsupportedOperationException](state)
+      }
     }
   }
 
   test("GroupState - setTimeout**** with ProcessingTimeTimeout") {
-    implicit var state: GroupStateImpl[Int] = null
-
-    state = new GroupStateImpl[Int](None, 1000, 1000, ProcessingTimeTimeout, hasTimedOut = false)
+    // for streaming queries
+    var state: GroupStateImpl[Int] = GroupStateImpl.createForStreaming(
+      None, 1000, 1000, ProcessingTimeTimeout, hasTimedOut = false)
     assert(state.getTimeoutTimestamp === NO_TIMESTAMP)
     state.setTimeoutDuration(500)
-    assert(state.getTimeoutTimestamp === 1500)    // can be set without initializing state
+    assert(state.getTimeoutTimestamp === 1500) // can be set without initializing state
     testTimeoutTimestampNotAllowed[UnsupportedOperationException](state)
 
     state.update(5)
-    assert(state.getTimeoutTimestamp === 1500)    // does not change
+    assert(state.getTimeoutTimestamp === 1500) // does not change
     state.setTimeoutDuration(1000)
     assert(state.getTimeoutTimestamp === 2000)
     state.setTimeoutDuration("2 second")
@@ -125,22 +135,38 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest with BeforeAndAf
     testTimeoutTimestampNotAllowed[UnsupportedOperationException](state)
 
     state.remove()
-    assert(state.getTimeoutTimestamp === 3000)    // does not change
-    state.setTimeoutDuration(500)                 // can still be set
+    assert(state.getTimeoutTimestamp === 3000) // does not change
+    state.setTimeoutDuration(500) // can still be set
     assert(state.getTimeoutTimestamp === 1500)
     testTimeoutTimestampNotAllowed[UnsupportedOperationException](state)
+
+    // for batch queries
+    state = GroupStateImpl.createForBatch(ProcessingTimeTimeout).asInstanceOf[GroupStateImpl[Int]]
+    assert(state.getTimeoutTimestamp === NO_TIMESTAMP)
+    state.setTimeoutDuration(500)
+    testTimeoutTimestampNotAllowed[UnsupportedOperationException](state)
+
+    state.update(5)
+    state.setTimeoutDuration(1000)
+    state.setTimeoutDuration("2 second")
+    testTimeoutTimestampNotAllowed[UnsupportedOperationException](state)
+
+    state.remove()
+    state.setTimeoutDuration(500)
+    testTimeoutTimestampNotAllowed[UnsupportedOperationException](state)
   }
 
   test("GroupState - setTimeout**** with EventTimeTimeout") {
-    implicit val state = new GroupStateImpl[Int](
-      None, 1000, 1000, EventTimeTimeout, hasTimedOut = false)
+    var state: GroupStateImpl[Int] = GroupStateImpl.createForStreaming(
+      None, 1000, 1000, EventTimeTimeout, false)
+
     assert(state.getTimeoutTimestamp === NO_TIMESTAMP)
     testTimeoutDurationNotAllowed[UnsupportedOperationException](state)
     state.setTimeoutTimestamp(5000)
-    assert(state.getTimeoutTimestamp === 5000)    // can be set without initializing state
+    assert(state.getTimeoutTimestamp === 5000) // can be set without initializing state
 
     state.update(5)
-    assert(state.getTimeoutTimestamp === 5000)    // does not change
+    assert(state.getTimeoutTimestamp === 5000) // does not change
     state.setTimeoutTimestamp(10000)
     assert(state.getTimeoutTimestamp === 10000)
     state.setTimeoutTimestamp(new Date(20000))
@@ -150,7 +176,22 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest with BeforeAndAf
     state.remove()
     assert(state.getTimeoutTimestamp === 20000)
     state.setTimeoutTimestamp(5000)
-    assert(state.getTimeoutTimestamp === 5000)    // can be set after removing state
+    assert(state.getTimeoutTimestamp === 5000) // can be set after removing state
+    testTimeoutDurationNotAllowed[UnsupportedOperationException](state)
+
+    // for batch queries
+    state = GroupStateImpl.createForBatch(EventTimeTimeout).asInstanceOf[GroupStateImpl[Int]]
+    assert(state.getTimeoutTimestamp === NO_TIMESTAMP)
+    testTimeoutDurationNotAllowed[UnsupportedOperationException](state)
+    state.setTimeoutTimestamp(5000)
+
+    state.update(5)
+    state.setTimeoutTimestamp(10000)
+    state.setTimeoutTimestamp(new Date(20000))
+    testTimeoutDurationNotAllowed[UnsupportedOperationException](state)
+
+    state.remove()
+    state.setTimeoutTimestamp(5000)
     testTimeoutDurationNotAllowed[UnsupportedOperationException](state)
   }
 
@@ -165,7 +206,8 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest with BeforeAndAf
       assert(state.getTimeoutTimestamp === NO_TIMESTAMP)
     }
 
-    state = new GroupStateImpl(Some(5), 1000, 1000, ProcessingTimeTimeout, hasTimedOut = false)
+    state = GroupStateImpl.createForStreaming(
+      Some(5), 1000, 1000, ProcessingTimeTimeout, hasTimedOut = false)
     testIllegalTimeout {
       state.setTimeoutDuration(-1000)
     }
@@ -182,7 +224,8 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest with BeforeAndAf
       state.setTimeoutDuration("1 month -1 day")
     }
 
-    state = new GroupStateImpl(Some(5), 1000, 1000, EventTimeTimeout, hasTimedOut = false)
+    state = GroupStateImpl.createForStreaming(
+      Some(5), 1000, 1000, EventTimeTimeout, hasTimedOut = false)
     testIllegalTimeout {
       state.setTimeoutTimestamp(-10000)
     }
@@ -211,23 +254,32 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest with BeforeAndAf
 
   test("GroupState - hasTimedOut") {
     for (timeoutConf <- Seq(NoTimeout, ProcessingTimeTimeout, EventTimeTimeout)) {
+      // for streaming queries
       for (initState <- Seq(None, Some(5))) {
-        val state1 = new GroupStateImpl(initState, 1000, 1000, timeoutConf, hasTimedOut = false)
+        val state1 = GroupStateImpl.createForStreaming(
+          initState, 1000, 1000, timeoutConf, hasTimedOut = false)
         assert(state1.hasTimedOut === false)
-        val state2 = new GroupStateImpl(initState, 1000, 1000, timeoutConf, hasTimedOut = true)
+
+        val state2 = GroupStateImpl.createForStreaming(
+          initState, 1000, 1000, timeoutConf, hasTimedOut = true)
         assert(state2.hasTimedOut === true)
       }
+
+      // for batch queries
+      assert(GroupStateImpl.createForBatch(timeoutConf).hasTimedOut === false)
     }
   }
 
   test("GroupState - primitive type") {
-    var intState = new GroupStateImpl[Int](None)
+    var intState = GroupStateImpl.createForStreaming[Int](
+      None, 1000, 1000, NoTimeout, hasTimedOut = false)
     intercept[NoSuchElementException] {
       intState.get
     }
     assert(intState.getOption === None)
 
-    intState = new GroupStateImpl[Int](Some(10))
+    intState = GroupStateImpl.createForStreaming[Int](
+      Some(10), 1000, 1000, NoTimeout, hasTimedOut = false)
     assert(intState.get == 10)
     intState.update(0)
     assert(intState.get == 0)
@@ -243,7 +295,6 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest with BeforeAndAf
   val beforeTimeoutThreshold = 999
   val afterTimeoutThreshold = 1001
 
-
   // Tests for StateStoreUpdater.updateStateForKeysWithData() when timeout = NoTimeout
   for (priorState <- Seq(None, Some(0))) {
     val priorStateStr = if (priorState.nonEmpty) "prior state set" else "no prior state"
@@ -748,15 +799,21 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest with BeforeAndAf
   }
 
   test("mapGroupsWithState - batch") {
-    val stateFunc = (key: String, values: Iterator[String], state: GroupState[RunningCount]) => {
+    // Test the following
+    // - no initial state
+    // - timeouts operations work, does not throw any error [SPARK-20792]
+    // - works with primitive state type
+    val stateFunc = (key: String, values: Iterator[String], state: GroupState[Int]) => {
       if (state.exists) throw new IllegalArgumentException("state.exists should be false")
+      state.setTimeoutTimestamp(0, "1 hour")
+      state.update(10)
       (key, values.size)
     }
 
     checkAnswer(
       spark.createDataset(Seq("a", "a", "b"))
         .groupByKey(x => x)
-        .mapGroupsWithState(stateFunc)
+        .mapGroupsWithState(EventTimeTimeout)(stateFunc)
         .toDF,
       spark.createDataset(Seq(("a", 2), ("b", 1))).toDF)
   }

From 41d8d21655dc8462238a6252923d23a54f64067f Mon Sep 17 00:00:00 2001
From: Michal Senkyr <mike.senkyr@gmail.com>
Date: Mon, 22 May 2017 16:49:19 +0800
Subject: [PATCH 0879/1204] [SPARK-19089][SQL] Add support for nested sequences

## What changes were proposed in this pull request?

Replaced specific sequence encoders with generic sequence encoder to enable nesting of sequences.

Does not add support for nested arrays as that cannot be solved in this way.

## How was this patch tested?

```bash
build/mvn -DskipTests clean package && dev/run-tests
```

Additionally in Spark shell:

```
scala> Seq(Seq(Seq(1))).toDS.collect()
res0: Array[Seq[Seq[Int]]] = Array(List(List(1)))
```

Author: Michal Senkyr <mike.senkyr@gmail.com>

Closes #18011 from michalsenkyr/dataset-seq-nested.

(cherry picked from commit a2b3b67624ce7bbb29ddade03c1791d95e51869b)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/sql/SQLImplicits.scala   | 55 ++++---------------
 .../spark/sql/DatasetPrimitiveSuite.scala     |  5 ++
 2 files changed, 16 insertions(+), 44 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
index 375df64d39734..17671ea8685b9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
@@ -111,93 +111,60 @@ abstract class SQLImplicits extends LowPrioritySQLImplicits {
 
   /**
    * @since 1.6.1
-   * @deprecated use [[newIntSequenceEncoder]]
+   * @deprecated use [[newSequenceEncoder]]
    */
   def newIntSeqEncoder: Encoder[Seq[Int]] = ExpressionEncoder()
 
   /**
    * @since 1.6.1
-   * @deprecated use [[newLongSequenceEncoder]]
+   * @deprecated use [[newSequenceEncoder]]
    */
   def newLongSeqEncoder: Encoder[Seq[Long]] = ExpressionEncoder()
 
   /**
    * @since 1.6.1
-   * @deprecated use [[newDoubleSequenceEncoder]]
+   * @deprecated use [[newSequenceEncoder]]
    */
   def newDoubleSeqEncoder: Encoder[Seq[Double]] = ExpressionEncoder()
 
   /**
    * @since 1.6.1
-   * @deprecated use [[newFloatSequenceEncoder]]
+   * @deprecated use [[newSequenceEncoder]]
    */
   def newFloatSeqEncoder: Encoder[Seq[Float]] = ExpressionEncoder()
 
   /**
    * @since 1.6.1
-   * @deprecated use [[newByteSequenceEncoder]]
+   * @deprecated use [[newSequenceEncoder]]
    */
   def newByteSeqEncoder: Encoder[Seq[Byte]] = ExpressionEncoder()
 
   /**
    * @since 1.6.1
-   * @deprecated use [[newShortSequenceEncoder]]
+   * @deprecated use [[newSequenceEncoder]]
    */
   def newShortSeqEncoder: Encoder[Seq[Short]] = ExpressionEncoder()
 
   /**
    * @since 1.6.1
-   * @deprecated use [[newBooleanSequenceEncoder]]
+   * @deprecated use [[newSequenceEncoder]]
    */
   def newBooleanSeqEncoder: Encoder[Seq[Boolean]] = ExpressionEncoder()
 
   /**
    * @since 1.6.1
-   * @deprecated use [[newStringSequenceEncoder]]
+   * @deprecated use [[newSequenceEncoder]]
    */
   def newStringSeqEncoder: Encoder[Seq[String]] = ExpressionEncoder()
 
   /**
    * @since 1.6.1
-   * @deprecated use [[newProductSequenceEncoder]]
+   * @deprecated use [[newSequenceEncoder]]
    */
-  implicit def newProductSeqEncoder[A <: Product : TypeTag]: Encoder[Seq[A]] = ExpressionEncoder()
+  def newProductSeqEncoder[A <: Product : TypeTag]: Encoder[Seq[A]] = ExpressionEncoder()
 
   /** @since 2.2.0 */
-  implicit def newIntSequenceEncoder[T <: Seq[Int] : TypeTag]: Encoder[T] =
-    ExpressionEncoder()
-
-  /** @since 2.2.0 */
-  implicit def newLongSequenceEncoder[T <: Seq[Long] : TypeTag]: Encoder[T] =
-    ExpressionEncoder()
-
-  /** @since 2.2.0 */
-  implicit def newDoubleSequenceEncoder[T <: Seq[Double] : TypeTag]: Encoder[T] =
-    ExpressionEncoder()
-
-  /** @since 2.2.0 */
-  implicit def newFloatSequenceEncoder[T <: Seq[Float] : TypeTag]: Encoder[T] =
-    ExpressionEncoder()
-
-  /** @since 2.2.0 */
-  implicit def newByteSequenceEncoder[T <: Seq[Byte] : TypeTag]: Encoder[T] =
-    ExpressionEncoder()
-
-  /** @since 2.2.0 */
-  implicit def newShortSequenceEncoder[T <: Seq[Short] : TypeTag]: Encoder[T] =
-    ExpressionEncoder()
-
-  /** @since 2.2.0 */
-  implicit def newBooleanSequenceEncoder[T <: Seq[Boolean] : TypeTag]: Encoder[T] =
-    ExpressionEncoder()
-
-  /** @since 2.2.0 */
-  implicit def newStringSequenceEncoder[T <: Seq[String] : TypeTag]: Encoder[T] =
-    ExpressionEncoder()
-
-  /** @since 2.2.0 */
-  implicit def newProductSequenceEncoder[T <: Seq[Product] : TypeTag]: Encoder[T] =
-    ExpressionEncoder()
+  implicit def newSequenceEncoder[T <: Seq[_] : TypeTag]: Encoder[T] = ExpressionEncoder()
 
   // Arrays
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
index 541565344f758..7e2949ab5aece 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
@@ -258,6 +258,11 @@ class DatasetPrimitiveSuite extends QueryTest with SharedSQLContext {
       ListClass(List(1)) -> Queue("test" -> SeqClass(Seq(2))))
   }
 
+  test("nested sequences") {
+    checkDataset(Seq(Seq(Seq(1))).toDS(), Seq(Seq(1)))
+    checkDataset(Seq(List(Queue(1))).toDS(), List(Queue(1)))
+  }
+
   test("package objects") {
     import packageobject._
     checkDataset(Seq(PackageClass(1)).toDS(), PackageClass(1))

From af1ff8b00ac7271ddf4cef87013e591e46de79e9 Mon Sep 17 00:00:00 2001
From: Ignacio Bermudez <ignaciobermudez@gmail.com>
Date: Mon, 22 May 2017 10:27:28 +0100
Subject: [PATCH 0880/1204] [SPARK-20687][MLLIB] mllib.Matrices.fromBreeze may
 crash when converting from Breeze sparse matrix

## What changes were proposed in this pull request?

When two Breeze SparseMatrices are operated, the result matrix may contain provisional 0 values extra in rowIndices and data arrays. This causes an incoherence with the colPtrs data, but Breeze get away with this incoherence by keeping a counter of the valid data.

In spark, when this matrices are converted to SparseMatrices, Sparks relies solely on rowIndices, data, and colPtrs, but these might be incorrect because of breeze internal hacks. Therefore, we need to slice both rowIndices and data, using their counter of active data

This method is at least called by BlockMatrix when performing distributed block operations, causing exceptions on valid operations.

See http://stackoverflow.com/questions/33528555/error-thrown-when-using-blockmatrix-add

## How was this patch tested?

Added a test to MatricesSuite that verifies that the conversions are valid and that code doesn't crash. Originally the same code would crash on Spark.

Bugfix for https://issues.apache.org/jira/browse/SPARK-20687

Author: Ignacio Bermudez <ignaciobermudez@gmail.com>
Author: Ignacio Bermudez Corrales <icorrales@splunk.com>

Closes #17940 from ghoto/bug-fix/SPARK-20687.

(cherry picked from commit 06dda1d58f8670e996921e935d5f5402d664699e)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../apache/spark/mllib/linalg/Matrices.scala  | 11 +++++++++-
 .../spark/mllib/linalg/MatricesSuite.scala    | 20 +++++++++++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
index 6c39fe5d84865..2b2b5fe49ea32 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
@@ -992,7 +992,16 @@ object Matrices {
         new DenseMatrix(dm.rows, dm.cols, dm.data, dm.isTranspose)
       case sm: BSM[Double] =>
         // There is no isTranspose flag for sparse matrices in Breeze
-        new SparseMatrix(sm.rows, sm.cols, sm.colPtrs, sm.rowIndices, sm.data)
+        val nsm = if (sm.rowIndices.length > sm.activeSize) {
+          // This sparse matrix has trailing zeros.
+          // Remove them by compacting the matrix.
+          val csm = sm.copy
+          csm.compact()
+          csm
+        } else {
+          sm
+        }
+        new SparseMatrix(nsm.rows, nsm.cols, nsm.colPtrs, nsm.rowIndices, nsm.data)
       case _ =>
         throw new UnsupportedOperationException(
           s"Do not support conversion from type ${breeze.getClass.getName}.")
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
index 563756907d201..93c00d80974c3 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
@@ -513,6 +513,26 @@ class MatricesSuite extends SparkFunSuite {
     Matrices.fromBreeze(sum)
   }
 
+  test("Test FromBreeze when Breeze.CSCMatrix.rowIndices has trailing zeros. - SPARK-20687") {
+    // (2, 0, 0)
+    // (2, 0, 0)
+    val mat1Brz = Matrices.sparse(2, 3, Array(0, 2, 2, 2), Array(0, 1), Array(2, 2)).asBreeze
+    // (2, 1E-15, 1E-15)
+    // (2, 1E-15, 1E-15)
+    val mat2Brz = Matrices.sparse(2, 3,
+      Array(0, 2, 4, 6),
+      Array(0, 0, 0, 1, 1, 1),
+      Array(2, 1E-15, 1E-15, 2, 1E-15, 1E-15)).asBreeze
+    val t1Brz = mat1Brz - mat2Brz
+    val t2Brz = mat2Brz - mat1Brz
+    // The following operations raise exceptions on un-patch Matrices.fromBreeze
+    val t1 = Matrices.fromBreeze(t1Brz)
+    val t2 = Matrices.fromBreeze(t2Brz)
+    // t1 == t1Brz && t2 == t2Brz
+    assert((t1.asBreeze - t1Brz).iterator.map((x) => math.abs(x._2)).sum < 1E-15)
+    assert((t2.asBreeze - t2Brz).iterator.map((x) => math.abs(x._2)).sum < 1E-15)
+  }
+
   test("row/col iterator") {
     val dm = new DenseMatrix(3, 2, Array(0, 1, 2, 3, 4, 0))
     val sm = dm.toSparse

From c3a986b19616217b4b0d8e10e52fcc87fb5356ba Mon Sep 17 00:00:00 2001
From: Ignacio Bermudez <ignaciobermudez@gmail.com>
Date: Mon, 22 May 2017 10:27:28 +0100
Subject: [PATCH 0881/1204] [SPARK-20687][MLLIB] mllib.Matrices.fromBreeze may
 crash when converting from Breeze sparse matrix

## What changes were proposed in this pull request?

When two Breeze SparseMatrices are operated, the result matrix may contain provisional 0 values extra in rowIndices and data arrays. This causes an incoherence with the colPtrs data, but Breeze get away with this incoherence by keeping a counter of the valid data.

In spark, when this matrices are converted to SparseMatrices, Sparks relies solely on rowIndices, data, and colPtrs, but these might be incorrect because of breeze internal hacks. Therefore, we need to slice both rowIndices and data, using their counter of active data

This method is at least called by BlockMatrix when performing distributed block operations, causing exceptions on valid operations.

See http://stackoverflow.com/questions/33528555/error-thrown-when-using-blockmatrix-add

## How was this patch tested?

Added a test to MatricesSuite that verifies that the conversions are valid and that code doesn't crash. Originally the same code would crash on Spark.

Bugfix for https://issues.apache.org/jira/browse/SPARK-20687

Author: Ignacio Bermudez <ignaciobermudez@gmail.com>
Author: Ignacio Bermudez Corrales <icorrales@splunk.com>

Closes #17940 from ghoto/bug-fix/SPARK-20687.

(cherry picked from commit 06dda1d58f8670e996921e935d5f5402d664699e)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../apache/spark/mllib/linalg/Matrices.scala  | 11 +++++++++-
 .../spark/mllib/linalg/MatricesSuite.scala    | 20 +++++++++++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
index 6c39fe5d84865..2b2b5fe49ea32 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
@@ -992,7 +992,16 @@ object Matrices {
         new DenseMatrix(dm.rows, dm.cols, dm.data, dm.isTranspose)
       case sm: BSM[Double] =>
         // There is no isTranspose flag for sparse matrices in Breeze
-        new SparseMatrix(sm.rows, sm.cols, sm.colPtrs, sm.rowIndices, sm.data)
+        val nsm = if (sm.rowIndices.length > sm.activeSize) {
+          // This sparse matrix has trailing zeros.
+          // Remove them by compacting the matrix.
+          val csm = sm.copy
+          csm.compact()
+          csm
+        } else {
+          sm
+        }
+        new SparseMatrix(nsm.rows, nsm.cols, nsm.colPtrs, nsm.rowIndices, nsm.data)
       case _ =>
         throw new UnsupportedOperationException(
           s"Do not support conversion from type ${breeze.getClass.getName}.")
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
index 563756907d201..93c00d80974c3 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
@@ -513,6 +513,26 @@ class MatricesSuite extends SparkFunSuite {
     Matrices.fromBreeze(sum)
   }
 
+  test("Test FromBreeze when Breeze.CSCMatrix.rowIndices has trailing zeros. - SPARK-20687") {
+    // (2, 0, 0)
+    // (2, 0, 0)
+    val mat1Brz = Matrices.sparse(2, 3, Array(0, 2, 2, 2), Array(0, 1), Array(2, 2)).asBreeze
+    // (2, 1E-15, 1E-15)
+    // (2, 1E-15, 1E-15)
+    val mat2Brz = Matrices.sparse(2, 3,
+      Array(0, 2, 4, 6),
+      Array(0, 0, 0, 1, 1, 1),
+      Array(2, 1E-15, 1E-15, 2, 1E-15, 1E-15)).asBreeze
+    val t1Brz = mat1Brz - mat2Brz
+    val t2Brz = mat2Brz - mat1Brz
+    // The following operations raise exceptions on un-patch Matrices.fromBreeze
+    val t1 = Matrices.fromBreeze(t1Brz)
+    val t2 = Matrices.fromBreeze(t2Brz)
+    // t1 == t1Brz && t2 == t2Brz
+    assert((t1.asBreeze - t1Brz).iterator.map((x) => math.abs(x._2)).sum < 1E-15)
+    assert((t2.asBreeze - t2Brz).iterator.map((x) => math.abs(x._2)).sum < 1E-15)
+  }
+
   test("row/col iterator") {
     val dm = new DenseMatrix(3, 2, Array(0, 1, 2, 3, 4, 0))
     val sm = dm.toSparse

From 50dba3053bc352858b77f1c9558a2a37e982d386 Mon Sep 17 00:00:00 2001
From: Nick Pentreath <nickp@za.ibm.com>
Date: Mon, 22 May 2017 12:29:29 +0200
Subject: [PATCH 0882/1204] [SPARK-20506][DOCS] Add HTML links to highlight
 list in MLlib guide for 2.2

Quick follow up to #17996 - forgot to add the HTML links to the relevant sections of the guide in the highlights list.

## How was this patch tested?

Built docs locally and tested links.

Author: Nick Pentreath <nickp@za.ibm.com>

Closes #18043 from MLnick/SPARK-20506-2.2-migration-guide-2.

(cherry picked from commit be846db48b226de2b0dfb5f87d059eda15ecf7cd)
Signed-off-by: Nick Pentreath <nickp@za.ibm.com>
---
 docs/ml-guide.md | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/docs/ml-guide.md b/docs/ml-guide.md
index 362e883e55e83..fb4621389ab92 100644
--- a/docs/ml-guide.md
+++ b/docs/ml-guide.md
@@ -71,21 +71,24 @@ To use MLlib in Python, you will need [NumPy](http://www.numpy.org) version 1.4
 The list below highlights some of the new features and enhancements added to MLlib in the `2.2`
 release of Spark:
 
-* `ALS` methods for _top-k_ recommendations for all users or items, matching the functionality
- in `mllib` ([SPARK-19535](https://issues.apache.org/jira/browse/SPARK-19535)). Performance
- was also improved for both `ml` and `mllib`
+* [`ALS`](ml-collaborative-filtering.html) methods for _top-k_ recommendations for all
+ users or items, matching the functionality in `mllib`
+ ([SPARK-19535](https://issues.apache.org/jira/browse/SPARK-19535)).
+ Performance was also improved for both `ml` and `mllib`
  ([SPARK-11968](https://issues.apache.org/jira/browse/SPARK-11968) and
  [SPARK-20587](https://issues.apache.org/jira/browse/SPARK-20587))
-* `Correlation` and `ChiSquareTest` stats functions for `DataFrames`
+* [`Correlation`](ml-statistics.html#correlation) and
+ [`ChiSquareTest`](ml-statistics.html#hypothesis-testing) stats functions for `DataFrames`
  ([SPARK-19636](https://issues.apache.org/jira/browse/SPARK-19636) and
  [SPARK-19635](https://issues.apache.org/jira/browse/SPARK-19635))
-* `FPGrowth` algorithm for frequent pattern mining
+* [`FPGrowth`](ml-frequent-pattern-mining.html#fp-growth) algorithm for frequent pattern mining
  ([SPARK-14503](https://issues.apache.org/jira/browse/SPARK-14503))
 * `GLM` now supports the full `Tweedie` family
  ([SPARK-18929](https://issues.apache.org/jira/browse/SPARK-18929))
-* `Imputer` feature transformer to impute missing values in a dataset
+* [`Imputer`](ml-features.html#imputer) feature transformer to impute missing values in a dataset
  ([SPARK-13568](https://issues.apache.org/jira/browse/SPARK-13568))
-* `LinearSVC` for linear Support Vector Machine classification
+* [`LinearSVC`](ml-classification-regression.html#linear-support-vector-machine)
+ for linear Support Vector Machine classification
  ([SPARK-14709](https://issues.apache.org/jira/browse/SPARK-14709))
 * Logistic regression now supports constraints on the coefficients during training
  ([SPARK-20047](https://issues.apache.org/jira/browse/SPARK-20047))

From c4b16dcca95bd2f15bc5fd3e1048f8e5444bac90 Mon Sep 17 00:00:00 2001
From: John Lee <jlee2@yahoo-inc.com>
Date: Mon, 22 May 2017 14:24:49 +0100
Subject: [PATCH 0883/1204] [SPARK-20813][WEB UI] Fixed Web UI executor page
 tab search by status not working

## What changes were proposed in this pull request?
On status column of the table, I removed the condition  that forced only the display value to take on values Active, Blacklisted and Dead.

Before the removal, values used for sort and filter for that particular column was True and False.
## How was this patch tested?

Tested with Active, Blacklisted and Dead present as current status.

Author: John Lee <jlee2@yahoo-inc.com>

Closes #18036 from yoonlee95/SPARK-20813.

(cherry picked from commit aea73be1b436f5812dacc05c002c653f29e344de)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../main/resources/org/apache/spark/ui/static/executorspage.js  | 2 --
 1 file changed, 2 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
index 6643a8f361cdc..d430d8c5fb35a 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
@@ -26,7 +26,6 @@ function getThreadDumpEnabled() {
 }
 
 function formatStatus(status, type) {
-    if (type !== 'display') return status;
     if (status) {
         return "Active"
     } else {
@@ -417,7 +416,6 @@ $(document).ready(function () {
                         },
                         {data: 'hostPort'},
                         {data: 'isActive', render: function (data, type, row) {
-                            if (type !== 'display') return data;
                             if (row.isBlacklisted) return "Blacklisted";
                             else return formatStatus (data, type);
                             }

From 81f63c8923416014d5c6bc227dd3c4e2a62bac8e Mon Sep 17 00:00:00 2001
From: jinxing <jinxing6042@126.com>
Date: Mon, 22 May 2017 22:09:49 +0800
Subject: [PATCH 0884/1204] [SPARK-20801] Record accurate size of blocks in
 MapStatus when it's above threshold.

## What changes were proposed in this pull request?

Currently, when number of reduces is above 2000, HighlyCompressedMapStatus is used to store size of blocks. in HighlyCompressedMapStatus, only average size is stored for non empty blocks. Which is not good for memory control when we shuffle blocks. It makes sense to store the accurate size of block when it's above threshold.

## How was this patch tested?

Added test in MapStatusSuite.

Author: jinxing <jinxing6042@126.com>

Closes #18031 from jinxing64/SPARK-20801.

(cherry picked from commit 2597674bcc295c2e29c4cfc4a9a48938bd63bf9c)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/internal/config/package.scala       |  9 ++++
 .../apache/spark/scheduler/MapStatus.scala    | 54 +++++++++++++++----
 .../spark/scheduler/MapStatusSuite.scala      | 28 +++++++++-
 docs/configuration.md                         |  9 ++++
 4 files changed, 90 insertions(+), 10 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 7f7921d56f49e..e193ed222e228 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -278,4 +278,13 @@ package object config {
         "spark.io.compression.codec.")
       .booleanConf
       .createWithDefault(false)
+
+  private[spark] val SHUFFLE_ACCURATE_BLOCK_THRESHOLD =
+    ConfigBuilder("spark.shuffle.accurateBlockThreshold")
+      .doc("When we compress the size of shuffle blocks in HighlyCompressedMapStatus, we will " +
+        "record the size accurately if it's above this config. This helps to prevent OOM by " +
+        "avoiding underestimating shuffle block size when fetch shuffle blocks.")
+      .bytesConf(ByteUnit.BYTE)
+      .createWithDefault(100 * 1024 * 1024)
+
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
index b2e9a97129f08..048e0d0186594 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
@@ -19,8 +19,13 @@ package org.apache.spark.scheduler
 
 import java.io.{Externalizable, ObjectInput, ObjectOutput}
 
+import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
+
 import org.roaringbitmap.RoaringBitmap
 
+import org.apache.spark.SparkEnv
+import org.apache.spark.internal.config
 import org.apache.spark.storage.BlockManagerId
 import org.apache.spark.util.Utils
 
@@ -121,34 +126,41 @@ private[spark] class CompressedMapStatus(
 }
 
 /**
- * A [[MapStatus]] implementation that only stores the average size of non-empty blocks,
+ * A [[MapStatus]] implementation that stores the accurate size of huge blocks, which are larger
+ * than spark.shuffle.accurateBlockThreshold. It stores the average size of other non-empty blocks,
  * plus a bitmap for tracking which blocks are empty.
  *
  * @param loc location where the task is being executed
  * @param numNonEmptyBlocks the number of non-empty blocks
  * @param emptyBlocks a bitmap tracking which blocks are empty
- * @param avgSize average size of the non-empty blocks
+ * @param avgSize average size of the non-empty and non-huge blocks
+ * @param hugeBlockSizes sizes of huge blocks by their reduceId.
  */
 private[spark] class HighlyCompressedMapStatus private (
     private[this] var loc: BlockManagerId,
     private[this] var numNonEmptyBlocks: Int,
     private[this] var emptyBlocks: RoaringBitmap,
-    private[this] var avgSize: Long)
+    private[this] var avgSize: Long,
+    @transient private var hugeBlockSizes: Map[Int, Byte])
   extends MapStatus with Externalizable {
 
   // loc could be null when the default constructor is called during deserialization
-  require(loc == null || avgSize > 0 || numNonEmptyBlocks == 0,
+  require(loc == null || avgSize > 0 || hugeBlockSizes.size > 0 || numNonEmptyBlocks == 0,
     "Average size can only be zero for map stages that produced no output")
 
-  protected def this() = this(null, -1, null, -1)  // For deserialization only
+  protected def this() = this(null, -1, null, -1, null)  // For deserialization only
 
   override def location: BlockManagerId = loc
 
   override def getSizeForBlock(reduceId: Int): Long = {
+    assert(hugeBlockSizes != null)
     if (emptyBlocks.contains(reduceId)) {
       0
     } else {
-      avgSize
+      hugeBlockSizes.get(reduceId) match {
+        case Some(size) => MapStatus.decompressSize(size)
+        case None => avgSize
+      }
     }
   }
 
@@ -156,6 +168,11 @@ private[spark] class HighlyCompressedMapStatus private (
     loc.writeExternal(out)
     emptyBlocks.writeExternal(out)
     out.writeLong(avgSize)
+    out.writeInt(hugeBlockSizes.size)
+    hugeBlockSizes.foreach { kv =>
+      out.writeInt(kv._1)
+      out.writeByte(kv._2)
+    }
   }
 
   override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
@@ -163,6 +180,14 @@ private[spark] class HighlyCompressedMapStatus private (
     emptyBlocks = new RoaringBitmap()
     emptyBlocks.readExternal(in)
     avgSize = in.readLong()
+    val count = in.readInt()
+    val hugeBlockSizesArray = mutable.ArrayBuffer[Tuple2[Int, Byte]]()
+    (0 until count).foreach { _ =>
+      val block = in.readInt()
+      val size = in.readByte()
+      hugeBlockSizesArray += Tuple2(block, size)
+    }
+    hugeBlockSizes = hugeBlockSizesArray.toMap
   }
 }
 
@@ -178,11 +203,21 @@ private[spark] object HighlyCompressedMapStatus {
     // we expect that there will be far fewer of them, so we will perform fewer bitmap insertions.
     val emptyBlocks = new RoaringBitmap()
     val totalNumBlocks = uncompressedSizes.length
+    val threshold = Option(SparkEnv.get)
+      .map(_.conf.get(config.SHUFFLE_ACCURATE_BLOCK_THRESHOLD))
+      .getOrElse(config.SHUFFLE_ACCURATE_BLOCK_THRESHOLD.defaultValue.get)
+    val hugeBlockSizesArray = ArrayBuffer[Tuple2[Int, Byte]]()
     while (i < totalNumBlocks) {
-      var size = uncompressedSizes(i)
+      val size = uncompressedSizes(i)
       if (size > 0) {
         numNonEmptyBlocks += 1
-        totalSize += size
+        // Huge blocks are not included in the calculation for average size, thus size for smaller
+        // blocks is more accurate.
+        if (size < threshold) {
+          totalSize += size
+        } else {
+          hugeBlockSizesArray += Tuple2(i, MapStatus.compressSize(uncompressedSizes(i)))
+        }
       } else {
         emptyBlocks.add(i)
       }
@@ -195,6 +230,7 @@ private[spark] object HighlyCompressedMapStatus {
     }
     emptyBlocks.trim()
     emptyBlocks.runOptimize()
-    new HighlyCompressedMapStatus(loc, numNonEmptyBlocks, emptyBlocks, avgSize)
+    new HighlyCompressedMapStatus(loc, numNonEmptyBlocks, emptyBlocks, avgSize,
+      hugeBlockSizesArray.toMap)
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
index 759d52fca5ce1..3ec37f674c77b 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
@@ -17,11 +17,15 @@
 
 package org.apache.spark.scheduler
 
+import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}
+
 import scala.util.Random
 
+import org.mockito.Mockito._
 import org.roaringbitmap.RoaringBitmap
 
-import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.{SparkConf, SparkEnv, SparkFunSuite}
+import org.apache.spark.internal.config
 import org.apache.spark.serializer.JavaSerializer
 import org.apache.spark.storage.BlockManagerId
 
@@ -128,4 +132,26 @@ class MapStatusSuite extends SparkFunSuite {
     assert(size1 === size2)
     assert(!success)
   }
+
+  test("Blocks which are bigger than SHUFFLE_ACCURATE_BLOCK_THRESHOLD should not be " +
+    "underestimated.") {
+    val conf = new SparkConf().set(config.SHUFFLE_ACCURATE_BLOCK_THRESHOLD.key, "1000")
+    val env = mock(classOf[SparkEnv])
+    doReturn(conf).when(env).conf
+    SparkEnv.set(env)
+    // Value of element in sizes is equal to the corresponding index.
+    val sizes = (0L to 2000L).toArray
+    val status1 = MapStatus(BlockManagerId("exec-0", "host-0", 100), sizes)
+    val arrayStream = new ByteArrayOutputStream(102400)
+    val objectOutputStream = new ObjectOutputStream(arrayStream)
+    assert(status1.isInstanceOf[HighlyCompressedMapStatus])
+    objectOutputStream.writeObject(status1)
+    objectOutputStream.flush()
+    val array = arrayStream.toByteArray
+    val objectInput = new ObjectInputStream(new ByteArrayInputStream(array))
+    val status2 = objectInput.readObject().asInstanceOf[HighlyCompressedMapStatus]
+    (1001 to 2000).foreach {
+      case part => assert(status2.getSizeForBlock(part) >= sizes(part))
+    }
+  }
 }
diff --git a/docs/configuration.md b/docs/configuration.md
index 1d8d963016c71..a6b6d5dfa5f95 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -612,6 +612,15 @@ Apart from these, the following properties are also available, and may be useful
     <code>spark.io.compression.codec</code>.
   </td>
 </tr>
+<tr>
+  <td><code>spark.shuffle.accurateBlockThreshold</code></td>
+  <td>100 * 1024 * 1024</td>
+  <td>
+    When we compress the size of shuffle blocks in HighlyCompressedMapStatus, we will record the
+    size accurately if it's above this config. This helps to prevent OOM by avoiding
+    underestimating shuffle block size when fetch shuffle blocks.
+  </td>
+</tr>
 <tr>
   <td><code>spark.io.encryption.enabled</code></td>
   <td>false</td>

From a57553279b1c680c370b7ee4e617f8a623030d60 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Mon, 22 May 2017 22:24:50 +0800
Subject: [PATCH 0885/1204] [SPARK-20831][SQL] Fix INSERT OVERWRITE data source
 tables with IF NOT EXISTS

### What changes were proposed in this pull request?
Currently, we have a bug when we specify `IF NOT EXISTS` in `INSERT OVERWRITE` data source tables. For example, given a query:
```SQL
INSERT OVERWRITE TABLE $tableName partition (b=2, c=3) IF NOT EXISTS SELECT 9, 10
```
we will get the following error:
```
unresolved operator 'InsertIntoTable Relation[a#425,d#426,b#427,c#428] parquet, Map(b -> Some(2), c -> Some(3)), true, true;;
'InsertIntoTable Relation[a#425,d#426,b#427,c#428] parquet, Map(b -> Some(2), c -> Some(3)), true, true
+- Project [cast(9#423 as int) AS a#429, cast(10#424 as int) AS d#430]
   +- Project [9 AS 9#423, 10 AS 10#424]
      +- OneRowRelation$
```

This PR is to fix the issue to follow the behavior of Hive serde tables
> INSERT OVERWRITE will overwrite any existing data in the table or partition unless IF NOT EXISTS is provided for a partition

### How was this patch tested?
Modified an existing test case

Author: gatorsmile <gatorsmile@gmail.com>

Closes #18050 from gatorsmile/insertPartitionIfNotExists.

(cherry picked from commit f3ed62a381897711d86fde27ab80bb70ed0b0513)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/dsl/package.scala      |   2 +-
 .../plans/logical/basicLogicalOperators.scala |  11 +-
 .../apache/spark/sql/internal/SQLConf.scala   |   2 +-
 .../sql/catalyst/parser/PlanParserSuite.scala |  10 +-
 .../apache/spark/sql/DataFrameWriter.scala    |   2 +-
 .../execution/datasources/DataSource.scala    |   1 +
 .../datasources/DataSourceStrategy.scala      |   5 +-
 .../InsertIntoHadoopFsRelationCommand.scala   |  18 ++-
 .../spark/sql/hive/HiveStrategies.scala       |  14 +--
 .../CreateHiveTableAsSelectCommand.scala      |   4 +-
 .../hive/execution/InsertIntoHiveTable.scala  |   7 +-
 .../sql/hive/InsertIntoHiveTableSuite.scala   | 108 ++++++++----------
 12 files changed, 90 insertions(+), 94 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index 75bf780d41424..ed423e7e334b6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -366,7 +366,7 @@ package object dsl {
       def insertInto(tableName: String, overwrite: Boolean = false): LogicalPlan =
         InsertIntoTable(
           analysis.UnresolvedRelation(TableIdentifier(tableName)),
-          Map.empty, logicalPlan, overwrite, false)
+          Map.empty, logicalPlan, overwrite, ifPartitionNotExists = false)
 
       def as(alias: String): LogicalPlan = SubqueryAlias(alias, logicalPlan)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 2c19265bedc5d..64c36499508ca 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -410,17 +410,20 @@ case class Hint(name: String, parameters: Seq[String], child: LogicalPlan) exten
  *                  would have Map('a' -> Some('1'), 'b' -> None).
  * @param query the logical plan representing data to write to.
  * @param overwrite overwrite existing table or partitions.
- * @param ifNotExists If true, only write if the table or partition does not exist.
+ * @param ifPartitionNotExists If true, only write if the partition does not exist.
+ *                             Only valid for static partitions.
  */
 case class InsertIntoTable(
     table: LogicalPlan,
     partition: Map[String, Option[String]],
     query: LogicalPlan,
     overwrite: Boolean,
-    ifNotExists: Boolean)
+    ifPartitionNotExists: Boolean)
   extends LogicalPlan {
-  assert(overwrite || !ifNotExists)
-  assert(partition.values.forall(_.nonEmpty) || !ifNotExists)
+  // IF NOT EXISTS is only valid in INSERT OVERWRITE
+  assert(overwrite || !ifPartitionNotExists)
+  // IF NOT EXISTS is only valid in static partitions
+  assert(partition.values.forall(_.nonEmpty) || !ifPartitionNotExists)
 
   // We don't want `table` in children as sometimes we don't want to transform it.
   override def children: Seq[LogicalPlan] = query :: Nil
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index b97adf7221d18..c5d69c204642e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -303,7 +303,7 @@ object SQLConf {
   val HIVE_MANAGE_FILESOURCE_PARTITIONS =
     buildConf("spark.sql.hive.manageFilesourcePartitions")
       .doc("When true, enable metastore partition management for file source tables as well. " +
-           "This includes both datasource and converted Hive tables. When partition managment " +
+           "This includes both datasource and converted Hive tables. When partition management " +
            "is enabled, datasource tables store partition in the Hive metastore, and use the " +
            "metastore to prune partitions during query planning.")
       .booleanConf
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index 411777d6e85a2..fa42efab3c3ec 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -176,14 +176,14 @@ class PlanParserSuite extends PlanTest {
     def insert(
         partition: Map[String, Option[String]],
         overwrite: Boolean = false,
-        ifNotExists: Boolean = false): LogicalPlan =
-      InsertIntoTable(table("s"), partition, plan, overwrite, ifNotExists)
+        ifPartitionNotExists: Boolean = false): LogicalPlan =
+      InsertIntoTable(table("s"), partition, plan, overwrite, ifPartitionNotExists)
 
     // Single inserts
     assertEqual(s"insert overwrite table s $sql",
       insert(Map.empty, overwrite = true))
     assertEqual(s"insert overwrite table s partition (e = 1) if not exists $sql",
-      insert(Map("e" -> Option("1")), overwrite = true, ifNotExists = true))
+      insert(Map("e" -> Option("1")), overwrite = true, ifPartitionNotExists = true))
     assertEqual(s"insert into s $sql",
       insert(Map.empty))
     assertEqual(s"insert into table s partition (c = 'd', e = 1) $sql",
@@ -193,9 +193,9 @@ class PlanParserSuite extends PlanTest {
     val plan2 = table("t").where('x > 5).select(star())
     assertEqual("from t insert into s select * limit 1 insert into u select * where x > 5",
       InsertIntoTable(
-        table("s"), Map.empty, plan.limit(1), false, ifNotExists = false).union(
+        table("s"), Map.empty, plan.limit(1), false, ifPartitionNotExists = false).union(
         InsertIntoTable(
-          table("u"), Map.empty, plan2, false, ifNotExists = false)))
+          table("u"), Map.empty, plan2, false, ifPartitionNotExists = false)))
   }
 
   test ("insert with if not exists") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 1732a8e08b73f..b71c5eb843eec 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -286,7 +286,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
         partition = Map.empty[String, Option[String]],
         query = df.logicalPlan,
         overwrite = mode == SaveMode.Overwrite,
-        ifNotExists = false)
+        ifPartitionNotExists = false)
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index bb7d1f70b62d9..14c40605ea31c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -430,6 +430,7 @@ case class DataSource(
       InsertIntoHadoopFsRelationCommand(
         outputPath = outputPath,
         staticPartitions = Map.empty,
+        ifPartitionNotExists = false,
         partitionColumns = partitionAttributes,
         bucketSpec = bucketSpec,
         fileFormat = format,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index d307122b5c70d..21d75a404911b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -142,8 +142,8 @@ case class DataSourceAnalysis(conf: SQLConf) extends Rule[LogicalPlan] with Cast
         parts, query, overwrite, false) if parts.isEmpty =>
       InsertIntoDataSourceCommand(l, query, overwrite)
 
-    case InsertIntoTable(
-        l @ LogicalRelation(t: HadoopFsRelation, _, table), parts, query, overwrite, false) =>
+    case i @ InsertIntoTable(
+        l @ LogicalRelation(t: HadoopFsRelation, _, table), parts, query, overwrite, _) =>
       // If the InsertIntoTable command is for a partitioned HadoopFsRelation and
       // the user has specified static partitions, we add a Project operator on top of the query
       // to include those constant column values in the query result.
@@ -195,6 +195,7 @@ case class DataSourceAnalysis(conf: SQLConf) extends Rule[LogicalPlan] with Cast
       InsertIntoHadoopFsRelationCommand(
         outputPath,
         staticPartitions,
+        i.ifPartitionNotExists,
         partitionSchema,
         t.bucketSpec,
         t.fileFormat,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
index 19b51d4d9530a..c9d31449d3629 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
@@ -37,10 +37,13 @@ import org.apache.spark.sql.execution.command._
  *                         overwrites: when the spec is empty, all partitions are overwritten.
  *                         When it covers a prefix of the partition keys, only partitions matching
  *                         the prefix are overwritten.
+ * @param ifPartitionNotExists If true, only write if the partition does not exist.
+ *                             Only valid for static partitions.
  */
 case class InsertIntoHadoopFsRelationCommand(
     outputPath: Path,
     staticPartitions: TablePartitionSpec,
+    ifPartitionNotExists: Boolean,
     partitionColumns: Seq[Attribute],
     bucketSpec: Option[BucketSpec],
     fileFormat: FileFormat,
@@ -61,8 +64,8 @@ case class InsertIntoHadoopFsRelationCommand(
       val duplicateColumns = query.schema.fieldNames.groupBy(identity).collect {
         case (x, ys) if ys.length > 1 => "\"" + x + "\""
       }.mkString(", ")
-      throw new AnalysisException(s"Duplicate column(s) : $duplicateColumns found, " +
-          s"cannot save to file.")
+      throw new AnalysisException(s"Duplicate column(s): $duplicateColumns found, " +
+        "cannot save to file.")
     }
 
     val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(options)
@@ -76,11 +79,12 @@ case class InsertIntoHadoopFsRelationCommand(
 
     var initialMatchingPartitions: Seq[TablePartitionSpec] = Nil
     var customPartitionLocations: Map[TablePartitionSpec, String] = Map.empty
+    var matchingPartitions: Seq[CatalogTablePartition] = Seq.empty
 
     // When partitions are tracked by the catalog, compute all custom partition locations that
     // may be relevant to the insertion job.
     if (partitionsTrackedByCatalog) {
-      val matchingPartitions = sparkSession.sessionState.catalog.listPartitions(
+      matchingPartitions = sparkSession.sessionState.catalog.listPartitions(
         catalogTable.get.identifier, Some(staticPartitions))
       initialMatchingPartitions = matchingPartitions.map(_.spec)
       customPartitionLocations = getCustomPartitionLocations(
@@ -101,8 +105,12 @@ case class InsertIntoHadoopFsRelationCommand(
       case (SaveMode.ErrorIfExists, true) =>
         throw new AnalysisException(s"path $qualifiedOutputPath already exists.")
       case (SaveMode.Overwrite, true) =>
-        deleteMatchingPartitions(fs, qualifiedOutputPath, customPartitionLocations, committer)
-        true
+        if (ifPartitionNotExists && matchingPartitions.nonEmpty) {
+          false
+        } else {
+          deleteMatchingPartitions(fs, qualifiedOutputPath, customPartitionLocations, committer)
+          true
+        }
       case (SaveMode.Append, _) | (SaveMode.Overwrite, _) | (SaveMode.ErrorIfExists, false) =>
         true
       case (SaveMode.Ignore, exists) =>
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index 09a5eda6e543f..4f090d545cd18 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -160,9 +160,9 @@ class DetermineTableStats(session: SparkSession) extends Rule[LogicalPlan] {
  */
 object HiveAnalysis extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-    case InsertIntoTable(relation: CatalogRelation, partSpec, query, overwrite, ifNotExists)
-        if DDLUtils.isHiveTable(relation.tableMeta) =>
-      InsertIntoHiveTable(relation.tableMeta, partSpec, query, overwrite, ifNotExists)
+    case InsertIntoTable(r: CatalogRelation, partSpec, query, overwrite, ifPartitionNotExists)
+        if DDLUtils.isHiveTable(r.tableMeta) =>
+      InsertIntoHiveTable(r.tableMeta, partSpec, query, overwrite, ifPartitionNotExists)
 
     case CreateTable(tableDesc, mode, None) if DDLUtils.isHiveTable(tableDesc) =>
       CreateTableCommand(tableDesc, ignoreIfExists = mode == SaveMode.Ignore)
@@ -207,11 +207,11 @@ case class RelationConversions(
   override def apply(plan: LogicalPlan): LogicalPlan = {
     plan transformUp {
       // Write path
-      case InsertIntoTable(r: CatalogRelation, partition, query, overwrite, ifNotExists)
+      case InsertIntoTable(r: CatalogRelation, partition, query, overwrite, ifPartitionNotExists)
         // Inserting into partitioned table is not supported in Parquet/Orc data source (yet).
-        if query.resolved && DDLUtils.isHiveTable(r.tableMeta) &&
-          !r.isPartitioned && isConvertible(r) =>
-        InsertIntoTable(convert(r), partition, query, overwrite, ifNotExists)
+          if query.resolved && DDLUtils.isHiveTable(r.tableMeta) &&
+            !r.isPartitioned && isConvertible(r) =>
+        InsertIntoTable(convert(r), partition, query, overwrite, ifPartitionNotExists)
 
       // Read path
       case relation: CatalogRelation
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
index 41c6b18e9d794..65e8b4e3c725c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
@@ -62,7 +62,7 @@ case class CreateHiveTableAsSelectCommand(
           Map(),
           query,
           overwrite = false,
-          ifNotExists = false)).toRdd
+          ifPartitionNotExists = false)).toRdd
     } else {
       // TODO ideally, we should get the output data ready first and then
       // add the relation into catalog, just in case of failure occurs while data
@@ -78,7 +78,7 @@ case class CreateHiveTableAsSelectCommand(
             Map(),
             query,
             overwrite = true,
-            ifNotExists = false)).toRdd
+            ifPartitionNotExists = false)).toRdd
       } catch {
         case NonFatal(e) =>
           // drop the created table.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index 3facf9f67be9f..35f65e972fe27 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -71,14 +71,15 @@ import org.apache.spark.SparkException
  *                  }}}.
  * @param query the logical plan representing data to write to.
  * @param overwrite overwrite existing table or partitions.
- * @param ifNotExists If true, only write if the table or partition does not exist.
+ * @param ifPartitionNotExists If true, only write if the partition does not exist.
+ *                                   Only valid for static partitions.
  */
 case class InsertIntoHiveTable(
     table: CatalogTable,
     partition: Map[String, Option[String]],
     query: LogicalPlan,
     overwrite: Boolean,
-    ifNotExists: Boolean) extends RunnableCommand {
+    ifPartitionNotExists: Boolean) extends RunnableCommand {
 
   override protected def innerChildren: Seq[LogicalPlan] = query :: Nil
 
@@ -354,7 +355,7 @@ case class InsertIntoHiveTable(
 
         var doHiveOverwrite = overwrite
 
-        if (oldPart.isEmpty || !ifNotExists) {
+        if (oldPart.isEmpty || !ifPartitionNotExists) {
           // SPARK-18107: Insert overwrite runs much slower than hive-client.
           // Newer Hive largely improves insert overwrite performance. As Spark uses older Hive
           // version and we may not want to catch up new Hive version every time. We delete the
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala
index 2c724f8388693..58ab0c252bfd7 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala
@@ -166,72 +166,54 @@ class InsertIntoHiveTableSuite extends QueryTest with TestHiveSingleton with Bef
     sql("DROP TABLE tmp_table")
   }
 
-  test("INSERT OVERWRITE - partition IF NOT EXISTS") {
-    withTempDir { tmpDir =>
-      val table = "table_with_partition"
-      withTable(table) {
-        val selQuery = s"select c1, p1, p2 from $table"
-        sql(
-          s"""
-             |CREATE TABLE $table(c1 string)
-             |PARTITIONED by (p1 string,p2 string)
-             |location '${tmpDir.toURI.toString}'
-           """.stripMargin)
-        sql(
-          s"""
-             |INSERT OVERWRITE TABLE $table
-             |partition (p1='a',p2='b')
-             |SELECT 'blarr'
-           """.stripMargin)
-        checkAnswer(
-          sql(selQuery),
-          Row("blarr", "a", "b"))
-
-        sql(
-          s"""
-             |INSERT OVERWRITE TABLE $table
-             |partition (p1='a',p2='b')
-             |SELECT 'blarr2'
-           """.stripMargin)
-        checkAnswer(
-          sql(selQuery),
-          Row("blarr2", "a", "b"))
+  testPartitionedTable("INSERT OVERWRITE - partition IF NOT EXISTS") { tableName =>
+    val selQuery = s"select a, b, c, d from $tableName"
+    sql(
+      s"""
+         |INSERT OVERWRITE TABLE $tableName
+         |partition (b=2, c=3)
+         |SELECT 1, 4
+        """.stripMargin)
+    checkAnswer(sql(selQuery), Row(1, 2, 3, 4))
 
-        var e = intercept[AnalysisException] {
-          sql(
-            s"""
-               |INSERT OVERWRITE TABLE $table
-               |partition (p1='a',p2) IF NOT EXISTS
-               |SELECT 'blarr3', 'newPartition'
-             """.stripMargin)
-        }
-        assert(e.getMessage.contains(
-          "Dynamic partitions do not support IF NOT EXISTS. Specified partitions with value: [p2]"))
+    sql(
+      s"""
+         |INSERT OVERWRITE TABLE $tableName
+         |partition (b=2, c=3)
+         |SELECT 5, 6
+        """.stripMargin)
+    checkAnswer(sql(selQuery), Row(5, 2, 3, 6))
+
+    val e = intercept[AnalysisException] {
+      sql(
+        s"""
+           |INSERT OVERWRITE TABLE $tableName
+           |partition (b=2, c) IF NOT EXISTS
+           |SELECT 7, 8, 3
+          """.stripMargin)
+    }
+    assert(e.getMessage.contains(
+      "Dynamic partitions do not support IF NOT EXISTS. Specified partitions with value: [c]"))
 
-        e = intercept[AnalysisException] {
-          sql(
-            s"""
-               |INSERT OVERWRITE TABLE $table
-               |partition (p1='a',p2) IF NOT EXISTS
-               |SELECT 'blarr3', 'b'
-             """.stripMargin)
-        }
-        assert(e.getMessage.contains(
-          "Dynamic partitions do not support IF NOT EXISTS. Specified partitions with value: [p2]"))
+    // If the partition already exists, the insert will overwrite the data
+    // unless users specify IF NOT EXISTS
+    sql(
+      s"""
+         |INSERT OVERWRITE TABLE $tableName
+         |partition (b=2, c=3) IF NOT EXISTS
+         |SELECT 9, 10
+        """.stripMargin)
+    checkAnswer(sql(selQuery), Row(5, 2, 3, 6))
 
-        // If the partition already exists, the insert will overwrite the data
-        // unless users specify IF NOT EXISTS
-        sql(
-          s"""
-             |INSERT OVERWRITE TABLE $table
-             |partition (p1='a',p2='b') IF NOT EXISTS
-             |SELECT 'blarr3'
-           """.stripMargin)
-        checkAnswer(
-          sql(selQuery),
-          Row("blarr2", "a", "b"))
-      }
-    }
+    // ADD PARTITION has the same effect, even if no actual data is inserted.
+    sql(s"ALTER TABLE $tableName ADD PARTITION (b=21, c=31)")
+    sql(
+      s"""
+         |INSERT OVERWRITE TABLE $tableName
+         |partition (b=21, c=31) IF NOT EXISTS
+         |SELECT 20, 24
+        """.stripMargin)
+    checkAnswer(sql(selQuery), Row(5, 2, 3, 6))
   }
 
   test("Insert ArrayType.containsNull == false") {

From a0bf5c47cb9c72d73616f876a4521ae80e2e4ecb Mon Sep 17 00:00:00 2001
From: Peng <peng.meng@intel.com>
Date: Mon, 22 May 2017 22:42:37 +0800
Subject: [PATCH 0886/1204] [SPARK-20764][ML][PYSPARK] Fix visibility
 discrepancy with numInstances and degreesOfFreedom in LR and GLR - Python
 version

## What changes were proposed in this pull request?

SPARK-20097 exposed degreesOfFreedom in LinearRegressionSummary and numInstances in GeneralizedLinearRegressionSummary. Python API should be updated to reflect these changes.

## How was this patch tested?
The existing UT

Author: Peng <peng.meng@intel.com>

Closes #18062 from mpjlu/spark-20764.

(cherry picked from commit cfca01136bd7443c1d9daf8e8e256635eec20ddc)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 python/pyspark/ml/regression.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 3c3fcc8d9b8d8..2d17f95b0c44f 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -323,6 +323,14 @@ def numInstances(self):
         """
         return self._call_java("numInstances")
 
+    @property
+    @since("2.2.0")
+    def degreesOfFreedom(self):
+        """
+        Degrees of freedom.
+        """
+        return self._call_java("degreesOfFreedom")
+
     @property
     @since("2.0.0")
     def devianceResiduals(self):
@@ -1565,6 +1573,14 @@ def predictionCol(self):
         """
         return self._call_java("predictionCol")
 
+    @property
+    @since("2.2.0")
+    def numInstances(self):
+        """
+        Number of instances in DataFrame predictions.
+        """
+        return self._call_java("numInstances")
+
     @property
     @since("2.0.0")
     def rank(self):

From 2fd613875ae272c72628bb5006cb70b0964c077d Mon Sep 17 00:00:00 2001
From: Mark Grover <mark@apache.org>
Date: Mon, 22 May 2017 10:10:41 -0700
Subject: [PATCH 0887/1204] [SPARK-20756][YARN] yarn-shuffle jar references
 unshaded guava

and contains scala classes

## What changes were proposed in this pull request?
This change ensures that all references to guava from within the yarn shuffle jar pointed to the shaded guava class already provided in the jar.

Also, it explicitly excludes scala classes from being added to the jar.

## How was this patch tested?
Ran unit tests on the module and they passed.
javap now returns the expected result - reference to the shaded guava under `org/spark_project` (previously this was referring to `com.google...`
```
javap -cp common/network-yarn/target/scala-2.11/spark-2.3.0-SNAPSHOT-yarn-shuffle.jar -c org/apache/spark/network/yarn/YarnShuffleService | grep Lists
      57: invokestatic  #138                // Method org/spark_project/guava/collect/Lists.newArrayList:()Ljava/util/ArrayList;
```

Guava is still shaded in the jar:
```
jar -tf common/network-yarn/target/scala-2.11/spark-2.3.0-SNAPSHOT-yarn-shuffle.jar | grep guava | head
META-INF/maven/com.google.guava/
META-INF/maven/com.google.guava/guava/
META-INF/maven/com.google.guava/guava/pom.properties
META-INF/maven/com.google.guava/guava/pom.xml
org/spark_project/guava/
org/spark_project/guava/annotations/
org/spark_project/guava/annotations/Beta.class
org/spark_project/guava/annotations/GwtCompatible.class
org/spark_project/guava/annotations/GwtIncompatible.class
org/spark_project/guava/annotations/VisibleForTesting.class
```
(not sure if the above META-INF/* is a problem or not)

I took this jar, deployed it on a yarn cluster with shuffle service enabled, and made sure the YARN node managers came up. An application with a shuffle was run and it succeeded.

Author: Mark Grover <mark@apache.org>

Closes #17990 from markgrover/spark-20756.

(cherry picked from commit 36309110046a89d749a7c9746eaa16997de26922)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 common/network-yarn/pom.xml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 70fed65b02553..de66617d2fa27 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -87,6 +87,9 @@
             <includes>
               <include>*:*</include>
             </includes>
+            <excludes>
+              <exclude>org.scala-lang:scala-library</exclude>
+            </excludes>
           </artifactSet>
           <filters>
             <filter>
@@ -98,7 +101,7 @@
               </excludes>
             </filter>
           </filters>
-          <relocations>
+          <relocations combine.children="append">
             <relocation>
               <pattern>com.fasterxml.jackson</pattern>
               <shadedPattern>${spark.shade.packageName}.com.fasterxml.jackson</shadedPattern>

From f5ef0762518892986aebcabbcc6c269273d2e1be Mon Sep 17 00:00:00 2001
From: Mark Grover <mark@apache.org>
Date: Mon, 22 May 2017 10:10:41 -0700
Subject: [PATCH 0888/1204] [SPARK-20756][YARN] yarn-shuffle jar references
 unshaded guava

and contains scala classes

## What changes were proposed in this pull request?
This change ensures that all references to guava from within the yarn shuffle jar pointed to the shaded guava class already provided in the jar.

Also, it explicitly excludes scala classes from being added to the jar.

## How was this patch tested?
Ran unit tests on the module and they passed.
javap now returns the expected result - reference to the shaded guava under `org/spark_project` (previously this was referring to `com.google...`
```
javap -cp common/network-yarn/target/scala-2.11/spark-2.3.0-SNAPSHOT-yarn-shuffle.jar -c org/apache/spark/network/yarn/YarnShuffleService | grep Lists
      57: invokestatic  #138                // Method org/spark_project/guava/collect/Lists.newArrayList:()Ljava/util/ArrayList;
```

Guava is still shaded in the jar:
```
jar -tf common/network-yarn/target/scala-2.11/spark-2.3.0-SNAPSHOT-yarn-shuffle.jar | grep guava | head
META-INF/maven/com.google.guava/
META-INF/maven/com.google.guava/guava/
META-INF/maven/com.google.guava/guava/pom.properties
META-INF/maven/com.google.guava/guava/pom.xml
org/spark_project/guava/
org/spark_project/guava/annotations/
org/spark_project/guava/annotations/Beta.class
org/spark_project/guava/annotations/GwtCompatible.class
org/spark_project/guava/annotations/GwtIncompatible.class
org/spark_project/guava/annotations/VisibleForTesting.class
```
(not sure if the above META-INF/* is a problem or not)

I took this jar, deployed it on a yarn cluster with shuffle service enabled, and made sure the YARN node managers came up. An application with a shuffle was run and it succeeded.

Author: Mark Grover <mark@apache.org>

Closes #17990 from markgrover/spark-20756.

(cherry picked from commit 36309110046a89d749a7c9746eaa16997de26922)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 common/network-yarn/pom.xml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 7a57e8964f6fc..fb6c241185897 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -87,6 +87,9 @@
             <includes>
               <include>*:*</include>
             </includes>
+            <excludes>
+              <exclude>org.scala-lang:scala-library</exclude>
+            </excludes>
           </artifactSet>
           <filters>
             <filter>
@@ -98,7 +101,7 @@
               </excludes>
             </filter>
           </filters>
-          <relocations>
+          <relocations combine.children="append">
             <relocation>
               <pattern>com.fasterxml.jackson</pattern>
               <shadedPattern>${spark.shade.packageName}.com.fasterxml.jackson</shadedPattern>

From d8328d8d1cdaeae5d8a5ebee3e4c807821c8ddf3 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Mon, 22 May 2017 12:34:15 -0700
Subject: [PATCH 0889/1204] [SPARK-20814][MESOS] Restore support for
 spark.executor.extraClassPath.

Restore code that was removed as part of SPARK-17979, but instead of
using the deprecated env variable name to propagate the class path, use
a new one.

Verified by running "./bin/spark-class o.a.s.executor.CoarseGrainedExecutorBackend"
manually.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #18037 from vanzin/SPARK-20814.

(cherry picked from commit df64fa79d678e354efe3969a6ba25d964128ce18)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../org/apache/spark/launcher/SparkClassCommandBuilder.java  | 2 ++
 .../cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala   | 5 +++++
 .../cluster/mesos/MesosFineGrainedSchedulerBackend.scala     | 4 ++++
 3 files changed, 11 insertions(+)

diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkClassCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkClassCommandBuilder.java
index 7cf5b7379503f..137ef74843da5 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkClassCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkClassCommandBuilder.java
@@ -68,10 +68,12 @@ public List<String> buildCommand(Map<String, String> env)
       case "org.apache.spark.executor.CoarseGrainedExecutorBackend":
         javaOptsKeys.add("SPARK_EXECUTOR_OPTS");
         memKey = "SPARK_EXECUTOR_MEMORY";
+        extraClassPath = getenv("SPARK_EXECUTOR_CLASSPATH");
         break;
       case "org.apache.spark.executor.MesosExecutorBackend":
         javaOptsKeys.add("SPARK_EXECUTOR_OPTS");
         memKey = "SPARK_EXECUTOR_MEMORY";
+        extraClassPath = getenv("SPARK_EXECUTOR_CLASSPATH");
         break;
       case "org.apache.spark.deploy.mesos.MesosClusterDispatcher":
         javaOptsKeys.add("SPARK_DAEMON_JAVA_OPTS");
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
index 8f5b97ccb1f85..ac7aec7b0a034 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
@@ -185,6 +185,11 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
 
   def createCommand(offer: Offer, numCores: Int, taskId: String): CommandInfo = {
     val environment = Environment.newBuilder()
+    val extraClassPath = conf.getOption("spark.executor.extraClassPath")
+    extraClassPath.foreach { cp =>
+      environment.addVariables(
+        Environment.Variable.newBuilder().setName("SPARK_EXECUTOR_CLASSPATH").setValue(cp).build())
+    }
     val extraJavaOpts = conf.get("spark.executor.extraJavaOptions", "")
 
     // Set the environment variable through a command prefix
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala
index 735c879c63c55..66b8e0a640121 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala
@@ -106,6 +106,10 @@ private[spark] class MesosFineGrainedSchedulerBackend(
       throw new SparkException("Executor Spark home `spark.mesos.executor.home` is not set!")
     }
     val environment = Environment.newBuilder()
+    sc.conf.getOption("spark.executor.extraClassPath").foreach { cp =>
+      environment.addVariables(
+        Environment.Variable.newBuilder().setName("SPARK_EXECUTOR_CLASSPATH").setValue(cp).build())
+    }
     val extraJavaOpts = sc.conf.getOption("spark.executor.extraJavaOptions").getOrElse("")
 
     val prefixEnv = sc.conf.getOption("spark.executor.extraLibraryPath").map { p =>

From f4538c95f73a6249b314e2241f1a3d44a8da4523 Mon Sep 17 00:00:00 2001
From: liuxian <liu.xian3@zte.com.cn>
Date: Tue, 23 May 2017 10:09:18 +0900
Subject: [PATCH 0890/1204] [SPARK-20763][SQL][BACKPORT-2.1] The function of
 `month` and `day` return the value which is not we expected.

What changes were proposed in this pull request?

This PR is to backport #17997 to Spark 2.1

when the date before "1582-10-04", the function of month and day return the value which is not we expected.
How was this patch tested?

unit tests

Author: liuxian <liu.xian3@zte.com.cn>

Closes #18054 from 10110346/wip-lx-0522.
---
 .../spark/sql/catalyst/util/DateTimeUtils.scala    |  9 ++++++++-
 .../expressions/DateExpressionsSuite.scala         | 14 ++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 235ca8d2633a1..15e8772842968 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -511,7 +511,14 @@ object DateTimeUtils {
    */
   private[this] def getYearAndDayInYear(daysSince1970: SQLDate): (Int, Int) = {
     // add the difference (in days) between 1.1.1970 and the artificial year 0 (-17999)
-    val daysNormalized = daysSince1970 + toYearZero
+    var  daysSince1970Tmp = daysSince1970
+    // Since Julian calendar was replaced with the Gregorian calendar,
+    // the 10 days after Oct. 4 were skipped.
+    // (1582-10-04) -141428 days since 1970-01-01
+    if (daysSince1970 <= -141428) {
+      daysSince1970Tmp -= 10
+    }
+    val daysNormalized = daysSince1970Tmp + toYearZero
     val numOfQuarterCenturies = daysNormalized / daysIn400Years
     val daysInThis400 = daysNormalized % daysIn400Years + 1
     val (years, dayInYear) = numYears(daysInThis400)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index 35cea25ba0b7d..25a7aa5a9d837 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -60,6 +60,8 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       }
     }
     checkEvaluation(DayOfYear(Literal.create(null, DateType)), null)
+    checkEvaluation(DayOfYear(Literal(new Date(sdf.parse("1582-10-15 13:10:15").getTime))), 288)
+    checkEvaluation(DayOfYear(Literal(new Date(sdf.parse("1582-10-04 13:10:15").getTime))), 277)
     checkConsistencyBetweenInterpretedAndCodegen(DayOfYear, DateType)
   }
 
@@ -80,6 +82,8 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
         }
       }
     }
+    checkEvaluation(Year(Literal(new Date(sdf.parse("1582-01-01 13:10:15").getTime))), 1582)
+    checkEvaluation(Year(Literal(new Date(sdf.parse("1581-12-31 13:10:15").getTime))), 1581)
     checkConsistencyBetweenInterpretedAndCodegen(Year, DateType)
   }
 
@@ -100,6 +104,8 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
         }
       }
     }
+    checkEvaluation(Quarter(Literal(new Date(sdf.parse("1582-10-01 13:10:15").getTime))), 4)
+    checkEvaluation(Quarter(Literal(new Date(sdf.parse("1582-09-30 13:10:15").getTime))), 3)
     checkConsistencyBetweenInterpretedAndCodegen(Quarter, DateType)
   }
 
@@ -109,6 +115,9 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(Month(Cast(Literal(sdfDate.format(d)), DateType)), 4)
     checkEvaluation(Month(Cast(Literal(ts), DateType)), 11)
 
+    checkEvaluation(Month(Literal(new Date(sdf.parse("1582-04-28 13:10:15").getTime))), 4)
+    checkEvaluation(Month(Literal(new Date(sdf.parse("1582-10-04 13:10:15").getTime))), 10)
+    checkEvaluation(Month(Literal(new Date(sdf.parse("1582-10-15 13:10:15").getTime))), 10)
     (2003 to 2004).foreach { y =>
       (0 to 3).foreach { m =>
         (0 to 2 * 24).foreach { i =>
@@ -130,6 +139,9 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(DayOfMonth(Cast(Literal(sdfDate.format(d)), DateType)), 8)
     checkEvaluation(DayOfMonth(Cast(Literal(ts), DateType)), 8)
 
+    checkEvaluation(DayOfMonth(Literal(new Date(sdf.parse("1582-04-28 13:10:15").getTime))), 28)
+    checkEvaluation(DayOfMonth(Literal(new Date(sdf.parse("1582-10-15 13:10:15").getTime))), 15)
+    checkEvaluation(DayOfMonth(Literal(new Date(sdf.parse("1582-10-04 13:10:15").getTime))), 4)
     (1999 to 2000).foreach { y =>
       val c = Calendar.getInstance()
       c.set(y, 0, 1, 0, 0, 0)
@@ -163,6 +175,8 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(WeekOfYear(Cast(Literal(sdfDate.format(d)), DateType)), 15)
     checkEvaluation(WeekOfYear(Cast(Literal(ts), DateType)), 45)
     checkEvaluation(WeekOfYear(Cast(Literal("2011-05-06"), DateType)), 18)
+    checkEvaluation(WeekOfYear(Literal(new Date(sdf.parse("1582-10-15 13:10:15").getTime))), 40)
+    checkEvaluation(WeekOfYear(Literal(new Date(sdf.parse("1582-10-04 13:10:15").getTime))), 40)
     checkConsistencyBetweenInterpretedAndCodegen(WeekOfYear, DateType)
   }
 

From ddc199eefbf68223f817a4c756b243362c1a95ca Mon Sep 17 00:00:00 2001
From: James Shuster <jshuster@palantir.com>
Date: Mon, 22 May 2017 21:41:11 -0700
Subject: [PATCH 0891/1204] [SPARK-20815][SPARKR] NullPointerException in
 RPackageUtils#checkManifestForR

## What changes were proposed in this pull request?

- Add a null check to RPackageUtils#checkManifestForR so that jars w/o manifests don't NPE.

## How was this patch tested?

- Unit tests and manual tests.

Author: James Shuster <jshuster@palantir.com>

Closes #18040 from jrshust/feature/r-package-utils.

(cherry picked from commit 4dbb63f0857a9cfb018cf49e3d1103cacc862ba2)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 .../org/apache/spark/deploy/RPackageUtils.scala    |  3 +++
 .../org/apache/spark/deploy/IvyTestUtils.scala     | 14 ++++++++++----
 .../apache/spark/deploy/RPackageUtilsSuite.scala   | 10 ++++++++++
 3 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/RPackageUtils.scala b/core/src/main/scala/org/apache/spark/deploy/RPackageUtils.scala
index 050778a895c0f..7d356e8fc1c00 100644
--- a/core/src/main/scala/org/apache/spark/deploy/RPackageUtils.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/RPackageUtils.scala
@@ -92,6 +92,9 @@ private[deploy] object RPackageUtils extends Logging {
    * Exposed for testing.
    */
   private[deploy] def checkManifestForR(jar: JarFile): Boolean = {
+    if (jar.getManifest == null) {
+      return false
+    }
     val manifest = jar.getManifest.getMainAttributes
     manifest.getValue(hasRPackage) != null && manifest.getValue(hasRPackage).trim == "true"
   }
diff --git a/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala b/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala
index f50cb38311db2..42b8cde650390 100644
--- a/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala
@@ -243,16 +243,22 @@ private[deploy] object IvyTestUtils {
       withManifest: Option[Manifest] = None): File = {
     val jarFile = new File(dir, artifactName(artifact, useIvyLayout))
     val jarFileStream = new FileOutputStream(jarFile)
-    val manifest = withManifest.getOrElse {
-      val mani = new Manifest()
+    val manifest: Manifest = withManifest.getOrElse {
       if (withR) {
+        val mani = new Manifest()
         val attr = mani.getMainAttributes
         attr.put(Name.MANIFEST_VERSION, "1.0")
         attr.put(new Name("Spark-HasRPackage"), "true")
+        mani
+      } else {
+        null
       }
-      mani
     }
-    val jarStream = new JarOutputStream(jarFileStream, manifest)
+    val jarStream = if (manifest != null) {
+      new JarOutputStream(jarFileStream, manifest)
+    } else {
+      new JarOutputStream(jarFileStream)
+    }
 
     for (file <- files) {
       val jarEntry = new JarEntry(file._1)
diff --git a/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala
index 005587051b6ad..5e0bf6d438dc8 100644
--- a/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala
@@ -133,6 +133,16 @@ class RPackageUtilsSuite
     }
   }
 
+  test("jars without manifest return false") {
+    IvyTestUtils.withRepository(main, None, None) { repo =>
+      val jar = IvyTestUtils.packJar(new File(new URI(repo)), dep1, Nil,
+        useIvyLayout = false, withR = false, None)
+      val jarFile = new JarFile(jar)
+      assert(jarFile.getManifest == null, "jar file should have null manifest")
+      assert(!RPackageUtils.checkManifestForR(jarFile), "null manifest should return false")
+    }
+  }
+
   test("SparkR zipping works properly") {
     val tempDir = Files.createTempDir()
     Utils.tryWithSafeFinally {

From 5e9541a4d4896f7a84755265fa1955e256cda449 Mon Sep 17 00:00:00 2001
From: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
Date: Mon, 22 May 2017 23:04:22 -0700
Subject: [PATCH 0892/1204] [SPARK-20727] Skip tests that use Hadoop utils on
 CRAN Windows

## What changes were proposed in this pull request?

This change skips tests that use the Hadoop libraries while running
on CRAN check with Windows as the operating system. This is to handle
cases where the Hadoop winutils binaries are missing on the target
system. The skipped tests consist of
1. Tests that save, load a model in MLlib
2. Tests that save, load CSV, JSON and Parquet files in SQL
3. Hive tests

## How was this patch tested?

Tested by running on a local windows VM with HADOOP_HOME unset. Also testing with https://win-builder.r-project.org

Author: Shivaram Venkataraman <shivaram@cs.berkeley.edu>

Closes #17966 from shivaram/sparkr-windows-cran.

(cherry picked from commit d06610f992ccf199928c0a71699fbf4c01705c31)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/R/utils.R                               |  16 +
 .../testthat/test_mllib_classification.R      |  90 ++--
 .../tests/testthat/test_mllib_clustering.R    | 112 ++---
 R/pkg/inst/tests/testthat/test_mllib_fpm.R    |  16 +-
 .../testthat/test_mllib_recommendation.R      |  42 +-
 .../tests/testthat/test_mllib_regression.R    |  42 +-
 R/pkg/inst/tests/testthat/test_mllib_tree.R   | 112 ++---
 R/pkg/inst/tests/testthat/test_sparkSQL.R     | 396 +++++++++---------
 8 files changed, 445 insertions(+), 381 deletions(-)

diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index fbc89e98847bf..b19556a1d57eb 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -899,3 +899,19 @@ basenameSansExtFromUrl <- function(url) {
 isAtomicLengthOne <- function(x) {
   is.atomic(x) && length(x) == 1
 }
+
+is_cran <- function() {
+  !identical(Sys.getenv("NOT_CRAN"), "true")
+}
+
+is_windows <- function() {
+  .Platform$OS.type == "windows"
+}
+
+hadoop_home_set <- function() {
+  !identical(Sys.getenv("HADOOP_HOME"), "")
+}
+
+not_cran_or_windows_with_hadoop <- function() {
+  !is_cran() && (!is_windows() || hadoop_home_set())
+}
diff --git a/R/pkg/inst/tests/testthat/test_mllib_classification.R b/R/pkg/inst/tests/testthat/test_mllib_classification.R
index f3eaeb381afc4..abf8bb25581fd 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_classification.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_classification.R
@@ -50,15 +50,17 @@ test_that("spark.svmLinear", {
   expect_equal(sort(as.list(take(select(prediction, "prediction"), 10))[[1]]), expected)
 
   # Test model save and load
-  modelPath <- tempfile(pattern = "spark-svm-linear", fileext = ".tmp")
-  write.ml(model, modelPath)
-  expect_error(write.ml(model, modelPath))
-  write.ml(model, modelPath, overwrite = TRUE)
-  model2 <- read.ml(modelPath)
-  coefs <- summary(model)$coefficients
-  coefs2 <- summary(model2)$coefficients
-  expect_equal(coefs, coefs2)
-  unlink(modelPath)
+  if (not_cran_or_windows_with_hadoop()) {
+    modelPath <- tempfile(pattern = "spark-svm-linear", fileext = ".tmp")
+    write.ml(model, modelPath)
+    expect_error(write.ml(model, modelPath))
+    write.ml(model, modelPath, overwrite = TRUE)
+    model2 <- read.ml(modelPath)
+    coefs <- summary(model)$coefficients
+    coefs2 <- summary(model2)$coefficients
+    expect_equal(coefs, coefs2)
+    unlink(modelPath)
+  }
 
   # Test prediction with numeric label
   label <- c(0.0, 0.0, 0.0, 1.0, 1.0)
@@ -128,15 +130,17 @@ test_that("spark.logit", {
   expect_true(all(abs(setosaCoefs - setosaCoefs) < 0.1))
 
   # Test model save and load
-  modelPath <- tempfile(pattern = "spark-logit", fileext = ".tmp")
-  write.ml(model, modelPath)
-  expect_error(write.ml(model, modelPath))
-  write.ml(model, modelPath, overwrite = TRUE)
-  model2 <- read.ml(modelPath)
-  coefs <- summary(model)$coefficients
-  coefs2 <- summary(model2)$coefficients
-  expect_equal(coefs, coefs2)
-  unlink(modelPath)
+  if (not_cran_or_windows_with_hadoop()) {
+    modelPath <- tempfile(pattern = "spark-logit", fileext = ".tmp")
+    write.ml(model, modelPath)
+    expect_error(write.ml(model, modelPath))
+    write.ml(model, modelPath, overwrite = TRUE)
+    model2 <- read.ml(modelPath)
+    coefs <- summary(model)$coefficients
+    coefs2 <- summary(model2)$coefficients
+    expect_equal(coefs, coefs2)
+    unlink(modelPath)
+  }
 
   # R code to reproduce the result.
   # nolint start
@@ -243,19 +247,21 @@ test_that("spark.mlp", {
   expect_equal(head(mlpPredictions$prediction, 6), c("1.0", "0.0", "0.0", "0.0", "0.0", "0.0"))
 
   # Test model save/load
-  modelPath <- tempfile(pattern = "spark-mlp", fileext = ".tmp")
-  write.ml(model, modelPath)
-  expect_error(write.ml(model, modelPath))
-  write.ml(model, modelPath, overwrite = TRUE)
-  model2 <- read.ml(modelPath)
-  summary2 <- summary(model2)
-
-  expect_equal(summary2$numOfInputs, 4)
-  expect_equal(summary2$numOfOutputs, 3)
-  expect_equal(summary2$layers, c(4, 5, 4, 3))
-  expect_equal(length(summary2$weights), 64)
-
-  unlink(modelPath)
+  if (not_cran_or_windows_with_hadoop()) {
+    modelPath <- tempfile(pattern = "spark-mlp", fileext = ".tmp")
+    write.ml(model, modelPath)
+    expect_error(write.ml(model, modelPath))
+    write.ml(model, modelPath, overwrite = TRUE)
+    model2 <- read.ml(modelPath)
+    summary2 <- summary(model2)
+
+    expect_equal(summary2$numOfInputs, 4)
+    expect_equal(summary2$numOfOutputs, 3)
+    expect_equal(summary2$layers, c(4, 5, 4, 3))
+    expect_equal(length(summary2$weights), 64)
+
+    unlink(modelPath)
+  }
 
   # Test default parameter
   model <- spark.mlp(df, label ~ features, layers = c(4, 5, 4, 3))
@@ -354,16 +360,18 @@ test_that("spark.naiveBayes", {
                                "Yes", "Yes", "No", "No"))
 
   # Test model save/load
-  modelPath <- tempfile(pattern = "spark-naiveBayes", fileext = ".tmp")
-  write.ml(m, modelPath)
-  expect_error(write.ml(m, modelPath))
-  write.ml(m, modelPath, overwrite = TRUE)
-  m2 <- read.ml(modelPath)
-  s2 <- summary(m2)
-  expect_equal(s$apriori, s2$apriori)
-  expect_equal(s$tables, s2$tables)
-
-  unlink(modelPath)
+  if (not_cran_or_windows_with_hadoop()) {
+    modelPath <- tempfile(pattern = "spark-naiveBayes", fileext = ".tmp")
+    write.ml(m, modelPath)
+    expect_error(write.ml(m, modelPath))
+    write.ml(m, modelPath, overwrite = TRUE)
+    m2 <- read.ml(modelPath)
+    s2 <- summary(m2)
+    expect_equal(s$apriori, s2$apriori)
+    expect_equal(s$tables, s2$tables)
+
+    unlink(modelPath)
+  }
 
   # Test e1071::naiveBayes
   if (requireNamespace("e1071", quietly = TRUE)) {
diff --git a/R/pkg/inst/tests/testthat/test_mllib_clustering.R b/R/pkg/inst/tests/testthat/test_mllib_clustering.R
index df8e5968b27f4..8f71de1cbc7b5 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_clustering.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_clustering.R
@@ -53,18 +53,20 @@ test_that("spark.bisectingKmeans", {
                c(0, 1, 2, 3))
 
   # Test model save/load
-  modelPath <- tempfile(pattern = "spark-bisectingkmeans", fileext = ".tmp")
-  write.ml(model, modelPath)
-  expect_error(write.ml(model, modelPath))
-  write.ml(model, modelPath, overwrite = TRUE)
-  model2 <- read.ml(modelPath)
-  summary2 <- summary(model2)
-  expect_equal(sort(unlist(summary.model$size)), sort(unlist(summary2$size)))
-  expect_equal(summary.model$coefficients, summary2$coefficients)
-  expect_true(!summary.model$is.loaded)
-  expect_true(summary2$is.loaded)
-
-  unlink(modelPath)
+  if (not_cran_or_windows_with_hadoop()) {
+    modelPath <- tempfile(pattern = "spark-bisectingkmeans", fileext = ".tmp")
+    write.ml(model, modelPath)
+    expect_error(write.ml(model, modelPath))
+    write.ml(model, modelPath, overwrite = TRUE)
+    model2 <- read.ml(modelPath)
+    summary2 <- summary(model2)
+    expect_equal(sort(unlist(summary.model$size)), sort(unlist(summary2$size)))
+    expect_equal(summary.model$coefficients, summary2$coefficients)
+    expect_true(!summary.model$is.loaded)
+    expect_true(summary2$is.loaded)
+
+    unlink(modelPath)
+  }
 })
 
 test_that("spark.gaussianMixture", {
@@ -125,18 +127,20 @@ test_that("spark.gaussianMixture", {
   expect_equal(p$prediction, c(0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1))
 
   # Test model save/load
-  modelPath <- tempfile(pattern = "spark-gaussianMixture", fileext = ".tmp")
-  write.ml(model, modelPath)
-  expect_error(write.ml(model, modelPath))
-  write.ml(model, modelPath, overwrite = TRUE)
-  model2 <- read.ml(modelPath)
-  stats2 <- summary(model2)
-  expect_equal(stats$lambda, stats2$lambda)
-  expect_equal(unlist(stats$mu), unlist(stats2$mu))
-  expect_equal(unlist(stats$sigma), unlist(stats2$sigma))
-  expect_equal(unlist(stats$loglik), unlist(stats2$loglik))
-
-  unlink(modelPath)
+  if (not_cran_or_windows_with_hadoop()) {
+    modelPath <- tempfile(pattern = "spark-gaussianMixture", fileext = ".tmp")
+    write.ml(model, modelPath)
+    expect_error(write.ml(model, modelPath))
+    write.ml(model, modelPath, overwrite = TRUE)
+    model2 <- read.ml(modelPath)
+    stats2 <- summary(model2)
+    expect_equal(stats$lambda, stats2$lambda)
+    expect_equal(unlist(stats$mu), unlist(stats2$mu))
+    expect_equal(unlist(stats$sigma), unlist(stats2$sigma))
+    expect_equal(unlist(stats$loglik), unlist(stats2$loglik))
+
+    unlink(modelPath)
+  }
 })
 
 test_that("spark.kmeans", {
@@ -171,18 +175,20 @@ test_that("spark.kmeans", {
   expect_true(class(summary.model$coefficients[1, ]) == "numeric")
 
   # Test model save/load
-  modelPath <- tempfile(pattern = "spark-kmeans", fileext = ".tmp")
-  write.ml(model, modelPath)
-  expect_error(write.ml(model, modelPath))
-  write.ml(model, modelPath, overwrite = TRUE)
-  model2 <- read.ml(modelPath)
-  summary2 <- summary(model2)
-  expect_equal(sort(unlist(summary.model$size)), sort(unlist(summary2$size)))
-  expect_equal(summary.model$coefficients, summary2$coefficients)
-  expect_true(!summary.model$is.loaded)
-  expect_true(summary2$is.loaded)
-
-  unlink(modelPath)
+  if (not_cran_or_windows_with_hadoop()) {
+    modelPath <- tempfile(pattern = "spark-kmeans", fileext = ".tmp")
+    write.ml(model, modelPath)
+    expect_error(write.ml(model, modelPath))
+    write.ml(model, modelPath, overwrite = TRUE)
+    model2 <- read.ml(modelPath)
+    summary2 <- summary(model2)
+    expect_equal(sort(unlist(summary.model$size)), sort(unlist(summary2$size)))
+    expect_equal(summary.model$coefficients, summary2$coefficients)
+    expect_true(!summary.model$is.loaded)
+    expect_true(summary2$is.loaded)
+
+    unlink(modelPath)
+  }
 
   # Test Kmeans on dataset that is sensitive to seed value
   col1 <- c(1, 2, 3, 4, 0, 1, 2, 3, 4, 0)
@@ -236,22 +242,24 @@ test_that("spark.lda with libsvm", {
   expect_true(logPrior <= 0 & !is.na(logPrior))
 
   # Test model save/load
-  modelPath <- tempfile(pattern = "spark-lda", fileext = ".tmp")
-  write.ml(model, modelPath)
-  expect_error(write.ml(model, modelPath))
-  write.ml(model, modelPath, overwrite = TRUE)
-  model2 <- read.ml(modelPath)
-  stats2 <- summary(model2)
-
-  expect_true(stats2$isDistributed)
-  expect_equal(logLikelihood, stats2$logLikelihood)
-  expect_equal(logPerplexity, stats2$logPerplexity)
-  expect_equal(vocabSize, stats2$vocabSize)
-  expect_equal(vocabulary, stats2$vocabulary)
-  expect_equal(trainingLogLikelihood, stats2$trainingLogLikelihood)
-  expect_equal(logPrior, stats2$logPrior)
-
-  unlink(modelPath)
+  if (not_cran_or_windows_with_hadoop()) {
+    modelPath <- tempfile(pattern = "spark-lda", fileext = ".tmp")
+    write.ml(model, modelPath)
+    expect_error(write.ml(model, modelPath))
+    write.ml(model, modelPath, overwrite = TRUE)
+    model2 <- read.ml(modelPath)
+    stats2 <- summary(model2)
+
+    expect_true(stats2$isDistributed)
+    expect_equal(logLikelihood, stats2$logLikelihood)
+    expect_equal(logPerplexity, stats2$logPerplexity)
+    expect_equal(vocabSize, stats2$vocabSize)
+    expect_equal(vocabulary, stats2$vocabulary)
+    expect_equal(trainingLogLikelihood, stats2$trainingLogLikelihood)
+    expect_equal(logPrior, stats2$logPrior)
+
+    unlink(modelPath)
+  }
 })
 
 test_that("spark.lda with text input", {
diff --git a/R/pkg/inst/tests/testthat/test_mllib_fpm.R b/R/pkg/inst/tests/testthat/test_mllib_fpm.R
index 1fa5375f9da31..4e10ca1e4f50b 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_fpm.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_fpm.R
@@ -62,15 +62,17 @@ test_that("spark.fpGrowth", {
 
   expect_equivalent(expected_predictions, collect(predict(model, new_data)))
 
-  modelPath <- tempfile(pattern = "spark-fpm", fileext = ".tmp")
-  write.ml(model, modelPath, overwrite = TRUE)
-  loaded_model <- read.ml(modelPath)
+  if (not_cran_or_windows_with_hadoop()) {
+    modelPath <- tempfile(pattern = "spark-fpm", fileext = ".tmp")
+    write.ml(model, modelPath, overwrite = TRUE)
+    loaded_model <- read.ml(modelPath)
 
-  expect_equivalent(
-    itemsets,
-    collect(spark.freqItemsets(loaded_model)))
+    expect_equivalent(
+      itemsets,
+      collect(spark.freqItemsets(loaded_model)))
 
-  unlink(modelPath)
+    unlink(modelPath)
+  }
 
   model_without_numpartitions <- spark.fpGrowth(data, minSupport = 0.3, minConfidence = 0.8)
   expect_equal(
diff --git a/R/pkg/inst/tests/testthat/test_mllib_recommendation.R b/R/pkg/inst/tests/testthat/test_mllib_recommendation.R
index e3e2b15c71361..cc8064f88d27a 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_recommendation.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_recommendation.R
@@ -37,29 +37,31 @@ test_that("spark.als", {
   tolerance = 1e-4)
 
   # Test model save/load
-  modelPath <- tempfile(pattern = "spark-als", fileext = ".tmp")
-  write.ml(model, modelPath)
-  expect_error(write.ml(model, modelPath))
-  write.ml(model, modelPath, overwrite = TRUE)
-  model2 <- read.ml(modelPath)
-  stats2 <- summary(model2)
-  expect_equal(stats2$rating, "score")
-  userFactors <- collect(stats$userFactors)
-  itemFactors <- collect(stats$itemFactors)
-  userFactors2 <- collect(stats2$userFactors)
-  itemFactors2 <- collect(stats2$itemFactors)
+  if (not_cran_or_windows_with_hadoop()) {
+    modelPath <- tempfile(pattern = "spark-als", fileext = ".tmp")
+    write.ml(model, modelPath)
+    expect_error(write.ml(model, modelPath))
+    write.ml(model, modelPath, overwrite = TRUE)
+    model2 <- read.ml(modelPath)
+    stats2 <- summary(model2)
+    expect_equal(stats2$rating, "score")
+    userFactors <- collect(stats$userFactors)
+    itemFactors <- collect(stats$itemFactors)
+    userFactors2 <- collect(stats2$userFactors)
+    itemFactors2 <- collect(stats2$itemFactors)
 
-  orderUser <- order(userFactors$id)
-  orderUser2 <- order(userFactors2$id)
-  expect_equal(userFactors$id[orderUser], userFactors2$id[orderUser2])
-  expect_equal(userFactors$features[orderUser], userFactors2$features[orderUser2])
+    orderUser <- order(userFactors$id)
+    orderUser2 <- order(userFactors2$id)
+    expect_equal(userFactors$id[orderUser], userFactors2$id[orderUser2])
+    expect_equal(userFactors$features[orderUser], userFactors2$features[orderUser2])
 
-  orderItem <- order(itemFactors$id)
-  orderItem2 <- order(itemFactors2$id)
-  expect_equal(itemFactors$id[orderItem], itemFactors2$id[orderItem2])
-  expect_equal(itemFactors$features[orderItem], itemFactors2$features[orderItem2])
+    orderItem <- order(itemFactors$id)
+    orderItem2 <- order(itemFactors2$id)
+    expect_equal(itemFactors$id[orderItem], itemFactors2$id[orderItem2])
+    expect_equal(itemFactors$features[orderItem], itemFactors2$features[orderItem2])
 
-  unlink(modelPath)
+    unlink(modelPath)
+  }
 })
 
 sparkR.session.stop()
diff --git a/R/pkg/inst/tests/testthat/test_mllib_regression.R b/R/pkg/inst/tests/testthat/test_mllib_regression.R
index 44c98be906d81..b05fdd350ca28 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_regression.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_regression.R
@@ -401,14 +401,16 @@ test_that("spark.isoreg", {
   expect_equal(predict_result$prediction, c(7.0, 7.0, 6.0, 5.5, 5.0, 4.0, 1.0))
 
   # Test model save/load
-  modelPath <- tempfile(pattern = "spark-isoreg", fileext = ".tmp")
-  write.ml(model, modelPath)
-  expect_error(write.ml(model, modelPath))
-  write.ml(model, modelPath, overwrite = TRUE)
-  model2 <- read.ml(modelPath)
-  expect_equal(result, summary(model2))
-
-  unlink(modelPath)
+  if (not_cran_or_windows_with_hadoop()) {
+    modelPath <- tempfile(pattern = "spark-isoreg", fileext = ".tmp")
+    write.ml(model, modelPath)
+    expect_error(write.ml(model, modelPath))
+    write.ml(model, modelPath, overwrite = TRUE)
+    model2 <- read.ml(modelPath)
+    expect_equal(result, summary(model2))
+
+    unlink(modelPath)
+  }
 })
 
 test_that("spark.survreg", {
@@ -450,17 +452,19 @@ test_that("spark.survreg", {
                2.390146, 2.891269, 2.891269), tolerance = 1e-4)
 
   # Test model save/load
-  modelPath <- tempfile(pattern = "spark-survreg", fileext = ".tmp")
-  write.ml(model, modelPath)
-  expect_error(write.ml(model, modelPath))
-  write.ml(model, modelPath, overwrite = TRUE)
-  model2 <- read.ml(modelPath)
-  stats2 <- summary(model2)
-  coefs2 <- as.vector(stats2$coefficients[, 1])
-  expect_equal(coefs, coefs2)
-  expect_equal(rownames(stats$coefficients), rownames(stats2$coefficients))
-
-  unlink(modelPath)
+  if (not_cran_or_windows_with_hadoop()) {
+    modelPath <- tempfile(pattern = "spark-survreg", fileext = ".tmp")
+    write.ml(model, modelPath)
+    expect_error(write.ml(model, modelPath))
+    write.ml(model, modelPath, overwrite = TRUE)
+    model2 <- read.ml(modelPath)
+    stats2 <- summary(model2)
+    coefs2 <- as.vector(stats2$coefficients[, 1])
+    expect_equal(coefs, coefs2)
+    expect_equal(rownames(stats$coefficients), rownames(stats2$coefficients))
+
+    unlink(modelPath)
+  }
 
   # Test survival::survreg
   if (requireNamespace("survival", quietly = TRUE)) {
diff --git a/R/pkg/inst/tests/testthat/test_mllib_tree.R b/R/pkg/inst/tests/testthat/test_mllib_tree.R
index 146bc2878e263..4cde1cd6dc3ab 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_tree.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_tree.R
@@ -44,21 +44,23 @@ test_that("spark.gbt", {
   expect_equal(stats$numFeatures, 6)
   expect_equal(length(stats$treeWeights), 20)
 
-  modelPath <- tempfile(pattern = "spark-gbtRegression", fileext = ".tmp")
-  write.ml(model, modelPath)
-  expect_error(write.ml(model, modelPath))
-  write.ml(model, modelPath, overwrite = TRUE)
-  model2 <- read.ml(modelPath)
-  stats2 <- summary(model2)
-  expect_equal(stats$formula, stats2$formula)
-  expect_equal(stats$numFeatures, stats2$numFeatures)
-  expect_equal(stats$features, stats2$features)
-  expect_equal(stats$featureImportances, stats2$featureImportances)
-  expect_equal(stats$maxDepth, stats2$maxDepth)
-  expect_equal(stats$numTrees, stats2$numTrees)
-  expect_equal(stats$treeWeights, stats2$treeWeights)
-
-  unlink(modelPath)
+  if (not_cran_or_windows_with_hadoop()) {
+    modelPath <- tempfile(pattern = "spark-gbtRegression", fileext = ".tmp")
+    write.ml(model, modelPath)
+    expect_error(write.ml(model, modelPath))
+    write.ml(model, modelPath, overwrite = TRUE)
+    model2 <- read.ml(modelPath)
+    stats2 <- summary(model2)
+    expect_equal(stats$formula, stats2$formula)
+    expect_equal(stats$numFeatures, stats2$numFeatures)
+    expect_equal(stats$features, stats2$features)
+    expect_equal(stats$featureImportances, stats2$featureImportances)
+    expect_equal(stats$maxDepth, stats2$maxDepth)
+    expect_equal(stats$numTrees, stats2$numTrees)
+    expect_equal(stats$treeWeights, stats2$treeWeights)
+
+    unlink(modelPath)
+  }
 
   # classification
   # label must be binary - GBTClassifier currently only supports binary classification.
@@ -76,17 +78,19 @@ test_that("spark.gbt", {
   expect_equal(length(grep("setosa", predictions)), 50)
   expect_equal(length(grep("versicolor", predictions)), 50)
 
-  modelPath <- tempfile(pattern = "spark-gbtClassification", fileext = ".tmp")
-  write.ml(model, modelPath)
-  expect_error(write.ml(model, modelPath))
-  write.ml(model, modelPath, overwrite = TRUE)
-  model2 <- read.ml(modelPath)
-  stats2 <- summary(model2)
-  expect_equal(stats$depth, stats2$depth)
-  expect_equal(stats$numNodes, stats2$numNodes)
-  expect_equal(stats$numClasses, stats2$numClasses)
-
-  unlink(modelPath)
+  if (not_cran_or_windows_with_hadoop()) {
+    modelPath <- tempfile(pattern = "spark-gbtClassification", fileext = ".tmp")
+    write.ml(model, modelPath)
+    expect_error(write.ml(model, modelPath))
+    write.ml(model, modelPath, overwrite = TRUE)
+    model2 <- read.ml(modelPath)
+    stats2 <- summary(model2)
+    expect_equal(stats$depth, stats2$depth)
+    expect_equal(stats$numNodes, stats2$numNodes)
+    expect_equal(stats$numClasses, stats2$numClasses)
+
+    unlink(modelPath)
+  }
 
   iris2$NumericSpecies <- ifelse(iris2$Species == "setosa", 0, 1)
   df <- suppressWarnings(createDataFrame(iris2))
@@ -136,21 +140,23 @@ test_that("spark.randomForest", {
   expect_equal(stats$numTrees, 20)
   expect_equal(stats$maxDepth, 5)
 
-  modelPath <- tempfile(pattern = "spark-randomForestRegression", fileext = ".tmp")
-  write.ml(model, modelPath)
-  expect_error(write.ml(model, modelPath))
-  write.ml(model, modelPath, overwrite = TRUE)
-  model2 <- read.ml(modelPath)
-  stats2 <- summary(model2)
-  expect_equal(stats$formula, stats2$formula)
-  expect_equal(stats$numFeatures, stats2$numFeatures)
-  expect_equal(stats$features, stats2$features)
-  expect_equal(stats$featureImportances, stats2$featureImportances)
-  expect_equal(stats$numTrees, stats2$numTrees)
-  expect_equal(stats$maxDepth, stats2$maxDepth)
-  expect_equal(stats$treeWeights, stats2$treeWeights)
-
-  unlink(modelPath)
+  if (not_cran_or_windows_with_hadoop()) {
+    modelPath <- tempfile(pattern = "spark-randomForestRegression", fileext = ".tmp")
+    write.ml(model, modelPath)
+    expect_error(write.ml(model, modelPath))
+    write.ml(model, modelPath, overwrite = TRUE)
+    model2 <- read.ml(modelPath)
+    stats2 <- summary(model2)
+    expect_equal(stats$formula, stats2$formula)
+    expect_equal(stats$numFeatures, stats2$numFeatures)
+    expect_equal(stats$features, stats2$features)
+    expect_equal(stats$featureImportances, stats2$featureImportances)
+    expect_equal(stats$numTrees, stats2$numTrees)
+    expect_equal(stats$maxDepth, stats2$maxDepth)
+    expect_equal(stats$treeWeights, stats2$treeWeights)
+
+    unlink(modelPath)
+  }
 
   # classification
   data <- suppressWarnings(createDataFrame(iris))
@@ -168,17 +174,19 @@ test_that("spark.randomForest", {
   expect_equal(length(grep("setosa", predictions)), 50)
   expect_equal(length(grep("versicolor", predictions)), 50)
 
-  modelPath <- tempfile(pattern = "spark-randomForestClassification", fileext = ".tmp")
-  write.ml(model, modelPath)
-  expect_error(write.ml(model, modelPath))
-  write.ml(model, modelPath, overwrite = TRUE)
-  model2 <- read.ml(modelPath)
-  stats2 <- summary(model2)
-  expect_equal(stats$depth, stats2$depth)
-  expect_equal(stats$numNodes, stats2$numNodes)
-  expect_equal(stats$numClasses, stats2$numClasses)
-
-  unlink(modelPath)
+  if (not_cran_or_windows_with_hadoop()) {
+    modelPath <- tempfile(pattern = "spark-randomForestClassification", fileext = ".tmp")
+    write.ml(model, modelPath)
+    expect_error(write.ml(model, modelPath))
+    write.ml(model, modelPath, overwrite = TRUE)
+    model2 <- read.ml(modelPath)
+    stats2 <- summary(model2)
+    expect_equal(stats$depth, stats2$depth)
+    expect_equal(stats$numNodes, stats2$numNodes)
+    expect_equal(stats$numClasses, stats2$numClasses)
+
+    unlink(modelPath)
+  }
 
   # Test numeric response variable
   labelToIndex <- function(species) {
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index 4168ab879c20d..d18a77abcd443 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -61,7 +61,11 @@ unsetHiveContext <- function() {
 # Tests for SparkSQL functions in SparkR
 
 filesBefore <- list.files(path = sparkRDir, all.files = TRUE)
-sparkSession <- sparkR.session(master = sparkRTestMaster)
+sparkSession <- if (not_cran_or_windows_with_hadoop()) {
+    sparkR.session(master = sparkRTestMaster)
+  } else {
+    sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
+  }
 sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
 
 mockLines <- c("{\"name\":\"Michael\"}",
@@ -317,51 +321,53 @@ test_that("createDataFrame uses files for large objects", {
 })
 
 test_that("read/write csv as DataFrame", {
-  csvPath <- tempfile(pattern = "sparkr-test", fileext = ".csv")
-  mockLinesCsv <- c("year,make,model,comment,blank",
-                   "\"2012\",\"Tesla\",\"S\",\"No comment\",",
-                   "1997,Ford,E350,\"Go get one now they are going fast\",",
-                   "2015,Chevy,Volt",
-                   "NA,Dummy,Placeholder")
-  writeLines(mockLinesCsv, csvPath)
-
-  # default "header" is false, inferSchema to handle "year" as "int"
-  df <- read.df(csvPath, "csv", header = "true", inferSchema = "true")
-  expect_equal(count(df), 4)
-  expect_equal(columns(df), c("year", "make", "model", "comment", "blank"))
-  expect_equal(sort(unlist(collect(where(df, df$year == 2015)))),
-               sort(unlist(list(year = 2015, make = "Chevy", model = "Volt"))))
-
-  # since "year" is "int", let's skip the NA values
-  withoutna <- na.omit(df, how = "any", cols = "year")
-  expect_equal(count(withoutna), 3)
-
-  unlink(csvPath)
-  csvPath <- tempfile(pattern = "sparkr-test", fileext = ".csv")
-  mockLinesCsv <- c("year,make,model,comment,blank",
-                   "\"2012\",\"Tesla\",\"S\",\"No comment\",",
-                   "1997,Ford,E350,\"Go get one now they are going fast\",",
-                   "2015,Chevy,Volt",
-                   "Empty,Dummy,Placeholder")
-  writeLines(mockLinesCsv, csvPath)
-
-  df2 <- read.df(csvPath, "csv", header = "true", inferSchema = "true", na.strings = "Empty")
-  expect_equal(count(df2), 4)
-  withoutna2 <- na.omit(df2, how = "any", cols = "year")
-  expect_equal(count(withoutna2), 3)
-  expect_equal(count(where(withoutna2, withoutna2$make == "Dummy")), 0)
-
-  # writing csv file
-  csvPath2 <- tempfile(pattern = "csvtest2", fileext = ".csv")
-  write.df(df2, path = csvPath2, "csv", header = "true")
-  df3 <- read.df(csvPath2, "csv", header = "true")
-  expect_equal(nrow(df3), nrow(df2))
-  expect_equal(colnames(df3), colnames(df2))
-  csv <- read.csv(file = list.files(csvPath2, pattern = "^part", full.names = T)[[1]])
-  expect_equal(colnames(df3), colnames(csv))
-
-  unlink(csvPath)
-  unlink(csvPath2)
+  if (not_cran_or_windows_with_hadoop()) {
+    csvPath <- tempfile(pattern = "sparkr-test", fileext = ".csv")
+    mockLinesCsv <- c("year,make,model,comment,blank",
+                     "\"2012\",\"Tesla\",\"S\",\"No comment\",",
+                     "1997,Ford,E350,\"Go get one now they are going fast\",",
+                     "2015,Chevy,Volt",
+                     "NA,Dummy,Placeholder")
+    writeLines(mockLinesCsv, csvPath)
+
+    # default "header" is false, inferSchema to handle "year" as "int"
+    df <- read.df(csvPath, "csv", header = "true", inferSchema = "true")
+    expect_equal(count(df), 4)
+    expect_equal(columns(df), c("year", "make", "model", "comment", "blank"))
+    expect_equal(sort(unlist(collect(where(df, df$year == 2015)))),
+                 sort(unlist(list(year = 2015, make = "Chevy", model = "Volt"))))
+
+    # since "year" is "int", let's skip the NA values
+    withoutna <- na.omit(df, how = "any", cols = "year")
+    expect_equal(count(withoutna), 3)
+
+    unlink(csvPath)
+    csvPath <- tempfile(pattern = "sparkr-test", fileext = ".csv")
+    mockLinesCsv <- c("year,make,model,comment,blank",
+                     "\"2012\",\"Tesla\",\"S\",\"No comment\",",
+                     "1997,Ford,E350,\"Go get one now they are going fast\",",
+                     "2015,Chevy,Volt",
+                     "Empty,Dummy,Placeholder")
+    writeLines(mockLinesCsv, csvPath)
+
+    df2 <- read.df(csvPath, "csv", header = "true", inferSchema = "true", na.strings = "Empty")
+    expect_equal(count(df2), 4)
+    withoutna2 <- na.omit(df2, how = "any", cols = "year")
+    expect_equal(count(withoutna2), 3)
+    expect_equal(count(where(withoutna2, withoutna2$make == "Dummy")), 0)
+
+    # writing csv file
+    csvPath2 <- tempfile(pattern = "csvtest2", fileext = ".csv")
+    write.df(df2, path = csvPath2, "csv", header = "true")
+    df3 <- read.df(csvPath2, "csv", header = "true")
+    expect_equal(nrow(df3), nrow(df2))
+    expect_equal(colnames(df3), colnames(df2))
+    csv <- read.csv(file = list.files(csvPath2, pattern = "^part", full.names = T)[[1]])
+    expect_equal(colnames(df3), colnames(csv))
+
+    unlink(csvPath)
+    unlink(csvPath2)
+  }
 })
 
 test_that("Support other types for options", {
@@ -592,48 +598,50 @@ test_that("Collect DataFrame with complex types", {
 })
 
 test_that("read/write json files", {
-  # Test read.df
-  df <- read.df(jsonPath, "json")
-  expect_is(df, "SparkDataFrame")
-  expect_equal(count(df), 3)
-
-  # Test read.df with a user defined schema
-  schema <- structType(structField("name", type = "string"),
-                       structField("age", type = "double"))
-
-  df1 <- read.df(jsonPath, "json", schema)
-  expect_is(df1, "SparkDataFrame")
-  expect_equal(dtypes(df1), list(c("name", "string"), c("age", "double")))
-
-  # Test loadDF
-  df2 <- loadDF(jsonPath, "json", schema)
-  expect_is(df2, "SparkDataFrame")
-  expect_equal(dtypes(df2), list(c("name", "string"), c("age", "double")))
-
-  # Test read.json
-  df <- read.json(jsonPath)
-  expect_is(df, "SparkDataFrame")
-  expect_equal(count(df), 3)
-
-  # Test write.df
-  jsonPath2 <- tempfile(pattern = "jsonPath2", fileext = ".json")
-  write.df(df, jsonPath2, "json", mode = "overwrite")
-
-  # Test write.json
-  jsonPath3 <- tempfile(pattern = "jsonPath3", fileext = ".json")
-  write.json(df, jsonPath3)
-
-  # Test read.json()/jsonFile() works with multiple input paths
-  jsonDF1 <- read.json(c(jsonPath2, jsonPath3))
-  expect_is(jsonDF1, "SparkDataFrame")
-  expect_equal(count(jsonDF1), 6)
-  # Suppress warnings because jsonFile is deprecated
-  jsonDF2 <- suppressWarnings(jsonFile(c(jsonPath2, jsonPath3)))
-  expect_is(jsonDF2, "SparkDataFrame")
-  expect_equal(count(jsonDF2), 6)
-
-  unlink(jsonPath2)
-  unlink(jsonPath3)
+  if (not_cran_or_windows_with_hadoop()) {
+    # Test read.df
+    df <- read.df(jsonPath, "json")
+    expect_is(df, "SparkDataFrame")
+    expect_equal(count(df), 3)
+
+    # Test read.df with a user defined schema
+    schema <- structType(structField("name", type = "string"),
+                         structField("age", type = "double"))
+
+    df1 <- read.df(jsonPath, "json", schema)
+    expect_is(df1, "SparkDataFrame")
+    expect_equal(dtypes(df1), list(c("name", "string"), c("age", "double")))
+
+    # Test loadDF
+    df2 <- loadDF(jsonPath, "json", schema)
+    expect_is(df2, "SparkDataFrame")
+    expect_equal(dtypes(df2), list(c("name", "string"), c("age", "double")))
+
+    # Test read.json
+    df <- read.json(jsonPath)
+    expect_is(df, "SparkDataFrame")
+    expect_equal(count(df), 3)
+
+    # Test write.df
+    jsonPath2 <- tempfile(pattern = "jsonPath2", fileext = ".json")
+    write.df(df, jsonPath2, "json", mode = "overwrite")
+
+    # Test write.json
+    jsonPath3 <- tempfile(pattern = "jsonPath3", fileext = ".json")
+    write.json(df, jsonPath3)
+
+    # Test read.json()/jsonFile() works with multiple input paths
+    jsonDF1 <- read.json(c(jsonPath2, jsonPath3))
+    expect_is(jsonDF1, "SparkDataFrame")
+    expect_equal(count(jsonDF1), 6)
+    # Suppress warnings because jsonFile is deprecated
+    jsonDF2 <- suppressWarnings(jsonFile(c(jsonPath2, jsonPath3)))
+    expect_is(jsonDF2, "SparkDataFrame")
+    expect_equal(count(jsonDF2), 6)
+
+    unlink(jsonPath2)
+    unlink(jsonPath3)
+  }
 })
 
 test_that("read/write json files - compression option", {
@@ -727,33 +735,35 @@ test_that("test cache, uncache and clearCache", {
 })
 
 test_that("insertInto() on a registered table", {
-  df <- read.df(jsonPath, "json")
-  write.df(df, parquetPath, "parquet", "overwrite")
-  dfParquet <- read.df(parquetPath, "parquet")
-
-  lines <- c("{\"name\":\"Bob\", \"age\":24}",
-             "{\"name\":\"James\", \"age\":35}")
-  jsonPath2 <- tempfile(pattern = "jsonPath2", fileext = ".tmp")
-  parquetPath2 <- tempfile(pattern = "parquetPath2", fileext = ".parquet")
-  writeLines(lines, jsonPath2)
-  df2 <- read.df(jsonPath2, "json")
-  write.df(df2, parquetPath2, "parquet", "overwrite")
-  dfParquet2 <- read.df(parquetPath2, "parquet")
-
-  createOrReplaceTempView(dfParquet, "table1")
-  insertInto(dfParquet2, "table1")
-  expect_equal(count(sql("select * from table1")), 5)
-  expect_equal(first(sql("select * from table1 order by age"))$name, "Michael")
-  expect_true(dropTempView("table1"))
-
-  createOrReplaceTempView(dfParquet, "table1")
-  insertInto(dfParquet2, "table1", overwrite = TRUE)
-  expect_equal(count(sql("select * from table1")), 2)
-  expect_equal(first(sql("select * from table1 order by age"))$name, "Bob")
-  expect_true(dropTempView("table1"))
-
-  unlink(jsonPath2)
-  unlink(parquetPath2)
+  if (not_cran_or_windows_with_hadoop()) {
+    df <- read.df(jsonPath, "json")
+    write.df(df, parquetPath, "parquet", "overwrite")
+    dfParquet <- read.df(parquetPath, "parquet")
+
+    lines <- c("{\"name\":\"Bob\", \"age\":24}",
+               "{\"name\":\"James\", \"age\":35}")
+    jsonPath2 <- tempfile(pattern = "jsonPath2", fileext = ".tmp")
+    parquetPath2 <- tempfile(pattern = "parquetPath2", fileext = ".parquet")
+    writeLines(lines, jsonPath2)
+    df2 <- read.df(jsonPath2, "json")
+    write.df(df2, parquetPath2, "parquet", "overwrite")
+    dfParquet2 <- read.df(parquetPath2, "parquet")
+
+    createOrReplaceTempView(dfParquet, "table1")
+    insertInto(dfParquet2, "table1")
+    expect_equal(count(sql("select * from table1")), 5)
+    expect_equal(first(sql("select * from table1 order by age"))$name, "Michael")
+    expect_true(dropTempView("table1"))
+
+    createOrReplaceTempView(dfParquet, "table1")
+    insertInto(dfParquet2, "table1", overwrite = TRUE)
+    expect_equal(count(sql("select * from table1")), 2)
+    expect_equal(first(sql("select * from table1 order by age"))$name, "Bob")
+    expect_true(dropTempView("table1"))
+
+    unlink(jsonPath2)
+    unlink(parquetPath2)
+  }
 })
 
 test_that("tableToDF() returns a new DataFrame", {
@@ -945,14 +955,16 @@ test_that("cache(), storageLevel(), persist(), and unpersist() on a DataFrame",
 })
 
 test_that("setCheckpointDir(), checkpoint() on a DataFrame", {
-  checkpointDir <- file.path(tempdir(), "cproot")
-  expect_true(length(list.files(path = checkpointDir, all.files = TRUE)) == 0)
-
-  setCheckpointDir(checkpointDir)
-  df <- read.json(jsonPath)
-  df <- checkpoint(df)
-  expect_is(df, "SparkDataFrame")
-  expect_false(length(list.files(path = checkpointDir, all.files = TRUE)) == 0)
+  if (not_cran_or_windows_with_hadoop()) {
+    checkpointDir <- file.path(tempdir(), "cproot")
+    expect_true(length(list.files(path = checkpointDir, all.files = TRUE)) == 0)
+
+    setCheckpointDir(checkpointDir)
+    df <- read.json(jsonPath)
+    df <- checkpoint(df)
+    expect_is(df, "SparkDataFrame")
+    expect_false(length(list.files(path = checkpointDir, all.files = TRUE)) == 0)
+  }
 })
 
 test_that("schema(), dtypes(), columns(), names() return the correct values/format", {
@@ -1310,45 +1322,47 @@ test_that("column calculation", {
 })
 
 test_that("test HiveContext", {
-  setHiveContext(sc)
-
-  schema <- structType(structField("name", "string"), structField("age", "integer"),
-                       structField("height", "float"))
-  createTable("people", source = "json", schema = schema)
-  df <- read.df(jsonPathNa, "json", schema)
-  insertInto(df, "people")
-  expect_equal(collect(sql("SELECT age from people WHERE name = 'Bob'"))$age, c(16))
-  sql("DROP TABLE people")
-
-  df <- createTable("json", jsonPath, "json")
-  expect_is(df, "SparkDataFrame")
-  expect_equal(count(df), 3)
-  df2 <- sql("select * from json")
-  expect_is(df2, "SparkDataFrame")
-  expect_equal(count(df2), 3)
-
-  jsonPath2 <- tempfile(pattern = "sparkr-test", fileext = ".tmp")
-  saveAsTable(df, "json2", "json", "append", path = jsonPath2)
-  df3 <- sql("select * from json2")
-  expect_is(df3, "SparkDataFrame")
-  expect_equal(count(df3), 3)
-  unlink(jsonPath2)
-
-  hivetestDataPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp")
-  saveAsTable(df, "hivetestbl", path = hivetestDataPath)
-  df4 <- sql("select * from hivetestbl")
-  expect_is(df4, "SparkDataFrame")
-  expect_equal(count(df4), 3)
-  unlink(hivetestDataPath)
-
-  parquetDataPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp")
-  saveAsTable(df, "parquetest", "parquet", mode = "overwrite", path = parquetDataPath)
-  df5 <- sql("select * from parquetest")
-  expect_is(df5, "SparkDataFrame")
-  expect_equal(count(df5), 3)
-  unlink(parquetDataPath)
-
-  unsetHiveContext()
+  if (not_cran_or_windows_with_hadoop()) {
+    setHiveContext(sc)
+
+    schema <- structType(structField("name", "string"), structField("age", "integer"),
+                         structField("height", "float"))
+    createTable("people", source = "json", schema = schema)
+    df <- read.df(jsonPathNa, "json", schema)
+    insertInto(df, "people")
+    expect_equal(collect(sql("SELECT age from people WHERE name = 'Bob'"))$age, c(16))
+    sql("DROP TABLE people")
+
+    df <- createTable("json", jsonPath, "json")
+    expect_is(df, "SparkDataFrame")
+    expect_equal(count(df), 3)
+    df2 <- sql("select * from json")
+    expect_is(df2, "SparkDataFrame")
+    expect_equal(count(df2), 3)
+
+    jsonPath2 <- tempfile(pattern = "sparkr-test", fileext = ".tmp")
+    saveAsTable(df, "json2", "json", "append", path = jsonPath2)
+    df3 <- sql("select * from json2")
+    expect_is(df3, "SparkDataFrame")
+    expect_equal(count(df3), 3)
+    unlink(jsonPath2)
+
+    hivetestDataPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp")
+    saveAsTable(df, "hivetestbl", path = hivetestDataPath)
+    df4 <- sql("select * from hivetestbl")
+    expect_is(df4, "SparkDataFrame")
+    expect_equal(count(df4), 3)
+    unlink(hivetestDataPath)
+
+    parquetDataPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp")
+    saveAsTable(df, "parquetest", "parquet", mode = "overwrite", path = parquetDataPath)
+    df5 <- sql("select * from parquetest")
+    expect_is(df5, "SparkDataFrame")
+    expect_equal(count(df5), 3)
+    unlink(parquetDataPath)
+
+    unsetHiveContext()
+  }
 })
 
 test_that("column operators", {
@@ -2144,34 +2158,36 @@ test_that("read/write ORC files - compression option", {
 })
 
 test_that("read/write Parquet files", {
-  df <- read.df(jsonPath, "json")
-  # Test write.df and read.df
-  write.df(df, parquetPath, "parquet", mode = "overwrite")
-  df2 <- read.df(parquetPath, "parquet")
-  expect_is(df2, "SparkDataFrame")
-  expect_equal(count(df2), 3)
-
-  # Test write.parquet/saveAsParquetFile and read.parquet/parquetFile
-  parquetPath2 <- tempfile(pattern = "parquetPath2", fileext = ".parquet")
-  write.parquet(df, parquetPath2)
-  parquetPath3 <- tempfile(pattern = "parquetPath3", fileext = ".parquet")
-  suppressWarnings(saveAsParquetFile(df, parquetPath3))
-  parquetDF <- read.parquet(c(parquetPath2, parquetPath3))
-  expect_is(parquetDF, "SparkDataFrame")
-  expect_equal(count(parquetDF), count(df) * 2)
-  parquetDF2 <- suppressWarnings(parquetFile(parquetPath2, parquetPath3))
-  expect_is(parquetDF2, "SparkDataFrame")
-  expect_equal(count(parquetDF2), count(df) * 2)
-
-  # Test if varargs works with variables
-  saveMode <- "overwrite"
-  mergeSchema <- "true"
-  parquetPath4 <- tempfile(pattern = "parquetPath3", fileext = ".parquet")
-  write.df(df, parquetPath3, "parquet", mode = saveMode, mergeSchema = mergeSchema)
-
-  unlink(parquetPath2)
-  unlink(parquetPath3)
-  unlink(parquetPath4)
+  if (not_cran_or_windows_with_hadoop()) {
+    df <- read.df(jsonPath, "json")
+    # Test write.df and read.df
+    write.df(df, parquetPath, "parquet", mode = "overwrite")
+    df2 <- read.df(parquetPath, "parquet")
+    expect_is(df2, "SparkDataFrame")
+    expect_equal(count(df2), 3)
+
+    # Test write.parquet/saveAsParquetFile and read.parquet/parquetFile
+    parquetPath2 <- tempfile(pattern = "parquetPath2", fileext = ".parquet")
+    write.parquet(df, parquetPath2)
+    parquetPath3 <- tempfile(pattern = "parquetPath3", fileext = ".parquet")
+    suppressWarnings(saveAsParquetFile(df, parquetPath3))
+    parquetDF <- read.parquet(c(parquetPath2, parquetPath3))
+    expect_is(parquetDF, "SparkDataFrame")
+    expect_equal(count(parquetDF), count(df) * 2)
+    parquetDF2 <- suppressWarnings(parquetFile(parquetPath2, parquetPath3))
+    expect_is(parquetDF2, "SparkDataFrame")
+    expect_equal(count(parquetDF2), count(df) * 2)
+
+    # Test if varargs works with variables
+    saveMode <- "overwrite"
+    mergeSchema <- "true"
+    parquetPath4 <- tempfile(pattern = "parquetPath3", fileext = ".parquet")
+    write.df(df, parquetPath3, "parquet", mode = saveMode, mergeSchema = mergeSchema)
+
+    unlink(parquetPath2)
+    unlink(parquetPath3)
+    unlink(parquetPath4)
+  }
 })
 
 test_that("read/write Parquet files - compression option/mode", {

From 06c985c1b4bb72a0a1102f12757124fbaa265a84 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Tue, 23 May 2017 16:09:38 +0800
Subject: [PATCH 0893/1204] [SPARK-20399][SQL][FOLLOW-UP] Add a config to
 fallback string literal parsing consistent with old sql parser behavior

## What changes were proposed in this pull request?

As srowen pointed in https://github.com/apache/spark/commit/609ba5f2b9fd89b1b9971d08f7cc680d202dbc7c#commitcomment-22221259, the previous tests are not proper.

This follow-up is going to fix the tests.

## How was this patch tested?

Jenkins tests.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #18048 from viirya/SPARK-20399-follow-up.

(cherry picked from commit 442287ae2993c24abb31fee6b5f6395724ca3dc7)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../parser/ExpressionParserSuite.scala        | 30 ++++++++++++-------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index ab818978e42a2..2d9d1f719a571 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -458,22 +458,30 @@ class ExpressionParserSuite extends PlanTest {
         assertEqual("'pattern\\\\\\%'", "pattern\\\\\\%", parser)
 
         // Escaped characters.
-        assertEqual("'\0'", "\u0000", parser) // ASCII NUL (X'00')
+        // Unescape string literal "'\\0'" for ASCII NUL (X'00') doesn't work
+        // when ESCAPED_STRING_LITERALS is enabled.
+        // It is parsed literally.
+        assertEqual("'\\0'", "\\0", parser)
 
         // Note: Single quote follows 1.6 parsing behavior when ESCAPED_STRING_LITERALS is enabled.
         val e = intercept[ParseException](parser.parseExpression("'\''"))
         assert(e.message.contains("extraneous input '''"))
 
-        assertEqual("'\"'", "\"", parser)     // Double quote
-        assertEqual("'\b'", "\b", parser)     // Backspace
-        assertEqual("'\n'", "\n", parser)     // Newline
-        assertEqual("'\r'", "\r", parser)     // Carriage return
-        assertEqual("'\t'", "\t", parser)     // Tab character
-
-        // Octals
-        assertEqual("'\110\145\154\154\157\041'", "Hello!", parser)
-        // Unicode
-        assertEqual("'\u0057\u006F\u0072\u006C\u0064\u0020\u003A\u0029'", "World :)", parser)
+        // The unescape special characters (e.g., "\\t") for 2.0+ don't work
+        // when ESCAPED_STRING_LITERALS is enabled. They are parsed literally.
+        assertEqual("'\\\"'", "\\\"", parser)   // Double quote
+        assertEqual("'\\b'", "\\b", parser)     // Backspace
+        assertEqual("'\\n'", "\\n", parser)     // Newline
+        assertEqual("'\\r'", "\\r", parser)     // Carriage return
+        assertEqual("'\\t'", "\\t", parser)     // Tab character
+
+        // The unescape Octals for 2.0+ don't work when ESCAPED_STRING_LITERALS is enabled.
+        // They are parsed literally.
+        assertEqual("'\\110\\145\\154\\154\\157\\041'", "\\110\\145\\154\\154\\157\\041", parser)
+        // The unescape Unicode for 2.0+ doesn't work when ESCAPED_STRING_LITERALS is enabled.
+        // They are parsed literally.
+        assertEqual("'\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029'",
+          "\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029", parser)
       } else {
         // Default behavior
 

From dbb068f4f280fd48c991302f9e9728378926b1a2 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Tue, 23 May 2017 16:16:14 +0800
Subject: [PATCH 0894/1204] [MINOR][SPARKR][ML] Joint coefficients with
 intercept for SparkR linear SVM summary.

## What changes were proposed in this pull request?
Joint coefficients with intercept for SparkR linear SVM summary.

## How was this patch tested?
Existing tests.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #18035 from yanboliang/svm-r.

(cherry picked from commit ad09e4ca045715d053a672c2ba23f598f06085d8)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 R/pkg/R/mllib_classification.R                | 38 ++++++++-----------
 .../testthat/test_mllib_classification.R      |  3 +-
 .../apache/spark/ml/r/LinearSVCWrapper.scala  | 12 +++++-
 3 files changed, 26 insertions(+), 27 deletions(-)

diff --git a/R/pkg/R/mllib_classification.R b/R/pkg/R/mllib_classification.R
index 4db9cc30fb0c1..306a9b8676539 100644
--- a/R/pkg/R/mllib_classification.R
+++ b/R/pkg/R/mllib_classification.R
@@ -46,15 +46,16 @@ setClass("MultilayerPerceptronClassificationModel", representation(jobj = "jobj"
 #' @note NaiveBayesModel since 2.0.0
 setClass("NaiveBayesModel", representation(jobj = "jobj"))
 
-#' linear SVM Model
+#' Linear SVM Model
 #'
-#' Fits an linear SVM model against a SparkDataFrame. It is a binary classifier, similar to svm in glmnet package
+#' Fits a linear SVM model against a SparkDataFrame, similar to svm in e1071 package.
+#' Currently only supports binary classification model with linear kernel.
 #' Users can print, make predictions on the produced model and save the model to the input path.
 #'
 #' @param data SparkDataFrame for training.
 #' @param formula A symbolic description of the model to be fitted. Currently only a few formula
 #'                operators are supported, including '~', '.', ':', '+', and '-'.
-#' @param regParam The regularization parameter.
+#' @param regParam The regularization parameter. Only supports L2 regularization currently.
 #' @param maxIter Maximum iteration number.
 #' @param tol Convergence tolerance of iterations.
 #' @param standardization Whether to standardize the training features before fitting the model. The coefficients
@@ -111,10 +112,10 @@ setMethod("spark.svmLinear", signature(data = "SparkDataFrame", formula = "formu
             new("LinearSVCModel", jobj = jobj)
           })
 
-#  Predicted values based on an LinearSVCModel model
+#  Predicted values based on a LinearSVCModel model
 
 #' @param newData a SparkDataFrame for testing.
-#' @return \code{predict} returns the predicted values based on an LinearSVCModel.
+#' @return \code{predict} returns the predicted values based on a LinearSVCModel.
 #' @rdname spark.svmLinear
 #' @aliases predict,LinearSVCModel,SparkDataFrame-method
 #' @export
@@ -124,13 +125,12 @@ setMethod("predict", signature(object = "LinearSVCModel"),
             predict_internal(object, newData)
           })
 
-#  Get the summary of an LinearSVCModel
+#  Get the summary of a LinearSVCModel
 
-#' @param object an LinearSVCModel fitted by \code{spark.svmLinear}.
+#' @param object a LinearSVCModel fitted by \code{spark.svmLinear}.
 #' @return \code{summary} returns summary information of the fitted model, which is a list.
 #'         The list includes \code{coefficients} (coefficients of the fitted model),
-#'         \code{intercept} (intercept of the fitted model), \code{numClasses} (number of classes),
-#'         \code{numFeatures} (number of features).
+#'         \code{numClasses} (number of classes), \code{numFeatures} (number of features).
 #' @rdname spark.svmLinear
 #' @aliases summary,LinearSVCModel-method
 #' @export
@@ -138,22 +138,14 @@ setMethod("predict", signature(object = "LinearSVCModel"),
 setMethod("summary", signature(object = "LinearSVCModel"),
           function(object) {
             jobj <- object@jobj
-            features <- callJMethod(jobj, "features")
-            labels <- callJMethod(jobj, "labels")
-            coefficients <- callJMethod(jobj, "coefficients")
-            nCol <- length(coefficients) / length(features)
-            coefficients <- matrix(unlist(coefficients), ncol = nCol)
-            intercept <- callJMethod(jobj, "intercept")
+            features <- callJMethod(jobj, "rFeatures")
+            coefficients <- callJMethod(jobj, "rCoefficients")
+            coefficients <- as.matrix(unlist(coefficients))
+            colnames(coefficients) <- c("Estimate")
+            rownames(coefficients) <- unlist(features)
             numClasses <- callJMethod(jobj, "numClasses")
             numFeatures <- callJMethod(jobj, "numFeatures")
-            if (nCol == 1) {
-              colnames(coefficients) <- c("Estimate")
-            } else {
-              colnames(coefficients) <- unlist(labels)
-            }
-            rownames(coefficients) <- unlist(features)
-            list(coefficients = coefficients, intercept = intercept,
-                 numClasses = numClasses, numFeatures = numFeatures)
+            list(coefficients = coefficients, numClasses = numClasses, numFeatures = numFeatures)
           })
 
 #  Save fitted LinearSVCModel to the input path
diff --git a/R/pkg/inst/tests/testthat/test_mllib_classification.R b/R/pkg/inst/tests/testthat/test_mllib_classification.R
index abf8bb25581fd..c1c746828d24b 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_classification.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_classification.R
@@ -38,9 +38,8 @@ test_that("spark.svmLinear", {
   expect_true(class(summary$coefficients[, 1]) == "numeric")
 
   coefs <- summary$coefficients[, "Estimate"]
-  expected_coefs <- c(-0.1563083, -0.460648, 0.2276626, 1.055085)
+  expected_coefs <- c(-0.06004978, -0.1563083, -0.460648, 0.2276626, 1.055085)
   expect_true(all(abs(coefs - expected_coefs) < 0.1))
-  expect_equal(summary$intercept, -0.06004978, tolerance = 1e-2)
 
   # Test prediction with string label
   prediction <- predict(model, training)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/LinearSVCWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/LinearSVCWrapper.scala
index cfd043b66ed94..0dd1f1146fbf8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/LinearSVCWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/LinearSVCWrapper.scala
@@ -38,9 +38,17 @@ private[r] class LinearSVCWrapper private (
   private val svcModel: LinearSVCModel =
     pipeline.stages(1).asInstanceOf[LinearSVCModel]
 
-  lazy val coefficients: Array[Double] = svcModel.coefficients.toArray
+  lazy val rFeatures: Array[String] = if (svcModel.getFitIntercept) {
+    Array("(Intercept)") ++ features
+  } else {
+    features
+  }
 
-  lazy val intercept: Double = svcModel.intercept
+  lazy val rCoefficients: Array[Double] = if (svcModel.getFitIntercept) {
+    Array(svcModel.intercept) ++ svcModel.coefficients.toArray
+  } else {
+    svcModel.coefficients.toArray
+  }
 
   lazy val numClasses: Int = svcModel.numClasses
 

From d20c6469565c4f7687f9af14a6f12a775b0c6e62 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Tue, 23 May 2017 18:44:49 +0200
Subject: [PATCH 0895/1204] [SPARK-20857][SQL] Generic resolved hint node

## What changes were proposed in this pull request?
This patch renames BroadcastHint to ResolvedHint (and Hint to UnresolvedHint) so the hint framework is more generic and would allow us to introduce other hint types in the future without introducing new hint nodes.

## How was this patch tested?
Updated test cases.

Author: Reynold Xin <rxin@databricks.com>

Closes #18072 from rxin/SPARK-20857.

(cherry picked from commit 0d589ba00b5d539fbfef5174221de046a70548cd)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  2 +-
 .../sql/catalyst/analysis/CheckAnalysis.scala |  2 +-
 .../sql/catalyst/analysis/ResolveHints.scala  | 12 ++---
 .../sql/catalyst/optimizer/Optimizer.scala    |  2 +-
 .../sql/catalyst/optimizer/expressions.scala  |  2 +-
 .../sql/catalyst/parser/AstBuilder.scala      |  4 +-
 .../sql/catalyst/planning/patterns.scala      |  4 +-
 .../catalyst/plans/logical/Statistics.scala   |  5 ++
 .../plans/logical/basicLogicalOperators.scala | 22 +--------
 .../sql/catalyst/plans/logical/hints.scala    | 49 +++++++++++++++++++
 .../catalyst/analysis/ResolveHintsSuite.scala | 41 +++++++++-------
 .../optimizer/ColumnPruningSuite.scala        |  5 +-
 .../optimizer/FilterPushdownSuite.scala       |  4 +-
 .../optimizer/JoinOptimizationSuite.scala     |  4 +-
 .../sql/catalyst/parser/PlanParserSuite.scala | 15 +++---
 .../BasicStatsEstimationSuite.scala           |  2 +-
 .../scala/org/apache/spark/sql/Dataset.scala  |  2 +-
 .../spark/sql/execution/SparkStrategies.scala |  2 +-
 .../org/apache/spark/sql/functions.scala      |  5 +-
 .../execution/joins/BroadcastJoinSuite.scala  | 14 +++---
 20 files changed, 118 insertions(+), 80 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 5be67acf3d120..997964238ca98 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1311,7 +1311,7 @@ class Analyzer(
 
         // Category 1:
         // BroadcastHint, Distinct, LeafNode, Repartition, and SubqueryAlias
-        case _: BroadcastHint | _: Distinct | _: LeafNode | _: Repartition | _: SubqueryAlias =>
+        case _: ResolvedHint | _: Distinct | _: LeafNode | _: Repartition | _: SubqueryAlias =>
 
         // Category 2:
         // These operators can be anywhere in a correlated subquery.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index ea4560aac7259..2e3ac3e474866 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -399,7 +399,7 @@ trait CheckAnalysis extends PredicateHelper {
                  |in operator ${operator.simpleString}
                """.stripMargin)
 
-          case _: Hint =>
+          case _: UnresolvedHint =>
             throw new IllegalStateException(
               "Internal error: logical hint operator should have been removed during analysis")
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
index df688fa0e58ae..9dfd84cbc9941 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
@@ -57,11 +57,11 @@ object ResolveHints {
       val newNode = CurrentOrigin.withOrigin(plan.origin) {
         plan match {
           case u: UnresolvedRelation if toBroadcast.exists(resolver(_, u.tableIdentifier.table)) =>
-            BroadcastHint(plan)
+            ResolvedHint(plan, isBroadcastable = Option(true))
           case r: SubqueryAlias if toBroadcast.exists(resolver(_, r.alias)) =>
-            BroadcastHint(plan)
+            ResolvedHint(plan, isBroadcastable = Option(true))
 
-          case _: BroadcastHint | _: View | _: With | _: SubqueryAlias =>
+          case _: ResolvedHint | _: View | _: With | _: SubqueryAlias =>
             // Don't traverse down these nodes.
             // For an existing broadcast hint, there is no point going down (if we do, we either
             // won't change the structure, or will introduce another broadcast hint that is useless.
@@ -85,10 +85,10 @@ object ResolveHints {
     }
 
     def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
-      case h: Hint if BROADCAST_HINT_NAMES.contains(h.name.toUpperCase(Locale.ROOT)) =>
+      case h: UnresolvedHint if BROADCAST_HINT_NAMES.contains(h.name.toUpperCase(Locale.ROOT)) =>
         if (h.parameters.isEmpty) {
           // If there is no table alias specified, turn the entire subtree into a BroadcastHint.
-          BroadcastHint(h.child)
+          ResolvedHint(h.child, isBroadcastable = Option(true))
         } else {
           // Otherwise, find within the subtree query plans that should be broadcasted.
           applyBroadcastHint(h.child, h.parameters.toSet)
@@ -102,7 +102,7 @@ object ResolveHints {
    */
   object RemoveAllHints extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
-      case h: Hint => h.child
+      case h: UnresolvedHint => h.child
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index f2b9764b0f088..f67daa55c04ff 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -861,7 +861,7 @@ object PushDownPredicate extends Rule[LogicalPlan] with PredicateHelper {
     // Note that some operators (e.g. project, aggregate, union) are being handled separately
     // (earlier in this rule).
     case _: AppendColumns => true
-    case _: BroadcastHint => true
+    case _: ResolvedHint => true
     case _: Distinct => true
     case _: Generate => true
     case _: Pivot => true
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 34382bd272406..ae2049cdf9b4a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -477,7 +477,7 @@ object FoldablePropagation extends Rule[LogicalPlan] {
     case _: Distinct => true
     case _: AppendColumns => true
     case _: AppendColumnsWithObject => true
-    case _: BroadcastHint => true
+    case _: ResolvedHint => true
     case _: RepartitionByExpression => true
     case _: Repartition => true
     case _: Sort => true
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index eb08e9869caa1..d4dccf1da3523 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -532,13 +532,13 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
   }
 
   /**
-   * Add a [[Hint]] to a logical plan.
+   * Add a [[UnresolvedHint]] to a logical plan.
    */
   private def withHints(
       ctx: HintContext,
       query: LogicalPlan): LogicalPlan = withOrigin(ctx) {
     val stmt = ctx.hintStatement
-    Hint(stmt.hintName.getText, stmt.parameters.asScala.map(_.getText), query)
+    UnresolvedHint(stmt.hintName.getText, stmt.parameters.asScala.map(_.getText), query)
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
index d39b0ef7e1d8a..ef925f92ecc7e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
@@ -65,8 +65,8 @@ object PhysicalOperation extends PredicateHelper {
         val substitutedCondition = substitute(aliases)(condition)
         (fields, filters ++ splitConjunctivePredicates(substitutedCondition), other, aliases)
 
-      case BroadcastHint(child) =>
-        collectProjectsAndFilters(child)
+      case h: ResolvedHint =>
+        collectProjectsAndFilters(h.child)
 
       case other =>
         (None, Nil, other, Map.empty)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
index 3d4efef953a64..81bb374cb0500 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
@@ -68,6 +68,11 @@ case class Statistics(
       s"isBroadcastable=$isBroadcastable"
     ).filter(_.nonEmpty).mkString(", ")
   }
+
+  /** Must be called when computing stats for a join operator to reset hints. */
+  def resetHintsForJoin(): Statistics = copy(
+    isBroadcastable = false
+  )
 }
 
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 64c36499508ca..d7f56f6362d9a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -364,7 +364,7 @@ case class Join(
       case _ =>
         // Make sure we don't propagate isBroadcastable in other joins, because
         // they could explode the size.
-        super.computeStats(conf).copy(isBroadcastable = false)
+        super.computeStats(conf).resetHintsForJoin()
     }
 
     if (conf.cboEnabled) {
@@ -375,26 +375,6 @@ case class Join(
   }
 }
 
-/**
- * A hint for the optimizer that we should broadcast the `child` if used in a join operator.
- */
-case class BroadcastHint(child: LogicalPlan) extends UnaryNode {
-  override def output: Seq[Attribute] = child.output
-
-  // set isBroadcastable to true so the child will be broadcasted
-  override def computeStats(conf: SQLConf): Statistics =
-    child.stats(conf).copy(isBroadcastable = true)
-}
-
-/**
- * A general hint for the child. This node will be eliminated post analysis.
- * A pair of (name, parameters).
- */
-case class Hint(name: String, parameters: Seq[String], child: LogicalPlan) extends UnaryNode {
-  override lazy val resolved: Boolean = false
-  override def output: Seq[Attribute] = child.output
-}
-
 /**
  * Insert some data into a table. Note that this plan is unresolved and has to be replaced by the
  * concrete implementations during analysis.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
new file mode 100644
index 0000000000000..9bcbfbb4d1397
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * A general hint for the child that is not yet resolved. This node is generated by the parser and
+ * should be removed This node will be eliminated post analysis.
+ * A pair of (name, parameters).
+ */
+case class UnresolvedHint(name: String, parameters: Seq[String], child: LogicalPlan)
+  extends UnaryNode {
+
+  override lazy val resolved: Boolean = false
+  override def output: Seq[Attribute] = child.output
+}
+
+/**
+ * A resolved hint node. The analyzer should convert all [[UnresolvedHint]] into [[ResolvedHint]].
+ */
+case class ResolvedHint(
+    child: LogicalPlan,
+    isBroadcastable: Option[Boolean] = None)
+  extends UnaryNode {
+
+  override def output: Seq[Attribute] = child.output
+
+  override def computeStats(conf: SQLConf): Statistics = {
+    val stats = child.stats(conf)
+    isBroadcastable.map(x => stats.copy(isBroadcastable = x)).getOrElse(stats)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveHintsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveHintsSuite.scala
index d101e2227462d..bb914e11a139a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveHintsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveHintsSuite.scala
@@ -28,68 +28,70 @@ class ResolveHintsSuite extends AnalysisTest {
 
   test("invalid hints should be ignored") {
     checkAnalysis(
-      Hint("some_random_hint_that_does_not_exist", Seq("TaBlE"), table("TaBlE")),
+      UnresolvedHint("some_random_hint_that_does_not_exist", Seq("TaBlE"), table("TaBlE")),
       testRelation,
       caseSensitive = false)
   }
 
   test("case-sensitive or insensitive parameters") {
     checkAnalysis(
-      Hint("MAPJOIN", Seq("TaBlE"), table("TaBlE")),
-      BroadcastHint(testRelation),
+      UnresolvedHint("MAPJOIN", Seq("TaBlE"), table("TaBlE")),
+      ResolvedHint(testRelation, isBroadcastable = Option(true)),
       caseSensitive = false)
 
     checkAnalysis(
-      Hint("MAPJOIN", Seq("table"), table("TaBlE")),
-      BroadcastHint(testRelation),
+      UnresolvedHint("MAPJOIN", Seq("table"), table("TaBlE")),
+      ResolvedHint(testRelation, isBroadcastable = Option(true)),
       caseSensitive = false)
 
     checkAnalysis(
-      Hint("MAPJOIN", Seq("TaBlE"), table("TaBlE")),
-      BroadcastHint(testRelation),
+      UnresolvedHint("MAPJOIN", Seq("TaBlE"), table("TaBlE")),
+      ResolvedHint(testRelation, isBroadcastable = Option(true)),
       caseSensitive = true)
 
     checkAnalysis(
-      Hint("MAPJOIN", Seq("table"), table("TaBlE")),
+      UnresolvedHint("MAPJOIN", Seq("table"), table("TaBlE")),
       testRelation,
       caseSensitive = true)
   }
 
   test("multiple broadcast hint aliases") {
     checkAnalysis(
-      Hint("MAPJOIN", Seq("table", "table2"), table("table").join(table("table2"))),
-      Join(BroadcastHint(testRelation), BroadcastHint(testRelation2), Inner, None),
+      UnresolvedHint("MAPJOIN", Seq("table", "table2"), table("table").join(table("table2"))),
+      Join(ResolvedHint(testRelation, isBroadcastable = Option(true)),
+        ResolvedHint(testRelation2, isBroadcastable = Option(true)), Inner, None),
       caseSensitive = false)
   }
 
   test("do not traverse past existing broadcast hints") {
     checkAnalysis(
-      Hint("MAPJOIN", Seq("table"), BroadcastHint(table("table").where('a > 1))),
-      BroadcastHint(testRelation.where('a > 1)).analyze,
+      UnresolvedHint("MAPJOIN", Seq("table"),
+        ResolvedHint(table("table").where('a > 1), isBroadcastable = Option(true))),
+      ResolvedHint(testRelation.where('a > 1), isBroadcastable = Option(true)).analyze,
       caseSensitive = false)
   }
 
   test("should work for subqueries") {
     checkAnalysis(
-      Hint("MAPJOIN", Seq("tableAlias"), table("table").as("tableAlias")),
-      BroadcastHint(testRelation),
+      UnresolvedHint("MAPJOIN", Seq("tableAlias"), table("table").as("tableAlias")),
+      ResolvedHint(testRelation, isBroadcastable = Option(true)),
       caseSensitive = false)
 
     checkAnalysis(
-      Hint("MAPJOIN", Seq("tableAlias"), table("table").subquery('tableAlias)),
-      BroadcastHint(testRelation),
+      UnresolvedHint("MAPJOIN", Seq("tableAlias"), table("table").subquery('tableAlias)),
+      ResolvedHint(testRelation, isBroadcastable = Option(true)),
       caseSensitive = false)
 
     // Negative case: if the alias doesn't match, don't match the original table name.
     checkAnalysis(
-      Hint("MAPJOIN", Seq("table"), table("table").as("tableAlias")),
+      UnresolvedHint("MAPJOIN", Seq("table"), table("table").as("tableAlias")),
       testRelation,
       caseSensitive = false)
   }
 
   test("do not traverse past subquery alias") {
     checkAnalysis(
-      Hint("MAPJOIN", Seq("table"), table("table").where('a > 1).subquery('tableAlias)),
+      UnresolvedHint("MAPJOIN", Seq("table"), table("table").where('a > 1).subquery('tableAlias)),
       testRelation.where('a > 1).analyze,
       caseSensitive = false)
   }
@@ -102,7 +104,8 @@ class ResolveHintsSuite extends AnalysisTest {
           |SELECT /*+ BROADCAST(ctetable) */ * FROM ctetable
         """.stripMargin
       ),
-      BroadcastHint(testRelation.where('a > 1).select('a)).select('a).analyze,
+      ResolvedHint(testRelation.where('a > 1).select('a), isBroadcastable = Option(true))
+        .select('a).analyze,
       caseSensitive = false)
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
index 589607e3ad5cb..a0a0daea7d075 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
@@ -321,15 +321,14 @@ class ColumnPruningSuite extends PlanTest {
       Project(Seq($"x.key", $"y.key"),
         Join(
           SubqueryAlias("x", input),
-          BroadcastHint(SubqueryAlias("y", input)), Inner, None)).analyze
+          ResolvedHint(SubqueryAlias("y", input)), Inner, None)).analyze
 
     val optimized = Optimize.execute(query)
 
     val expected =
       Join(
         Project(Seq($"x.key"), SubqueryAlias("x", input)),
-        BroadcastHint(
-          Project(Seq($"y.key"), SubqueryAlias("y", input))),
+        ResolvedHint(Project(Seq($"y.key"), SubqueryAlias("y", input))),
         Inner, None).analyze
 
     comparePlans(optimized, expected)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
index 950aa2379517e..d4d281e7e05db 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
@@ -798,12 +798,12 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("broadcast hint") {
-    val originalQuery = BroadcastHint(testRelation)
+    val originalQuery = ResolvedHint(testRelation)
       .where('a === 2L && 'b + Rand(10).as("rnd") === 3)
 
     val optimized = Optimize.execute(originalQuery.analyze)
 
-    val correctAnswer = BroadcastHint(testRelation.where('a === 2L))
+    val correctAnswer = ResolvedHint(testRelation.where('a === 2L))
       .where('b + Rand(10).as("rnd") === 3)
       .analyze
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
index a43d78c7bd447..105407d43bf39 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
@@ -129,14 +129,14 @@ class JoinOptimizationSuite extends PlanTest {
       Project(Seq($"x.key", $"y.key"),
         Join(
           SubqueryAlias("x", input),
-          BroadcastHint(SubqueryAlias("y", input)), Cross, None)).analyze
+          ResolvedHint(SubqueryAlias("y", input)), Cross, None)).analyze
 
     val optimized = Optimize.execute(query)
 
     val expected =
       Join(
         Project(Seq($"x.key"), SubqueryAlias("x", input)),
-        BroadcastHint(Project(Seq($"y.key"), SubqueryAlias("y", input))),
+        ResolvedHint(Project(Seq($"y.key"), SubqueryAlias("y", input))),
         Cross, None).analyze
 
     comparePlans(optimized, expected)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index fa42efab3c3ec..bed5ca9e6e190 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -512,30 +512,31 @@ class PlanParserSuite extends PlanTest {
 
     comparePlans(
       parsePlan("SELECT /*+ HINT */ * FROM t"),
-      Hint("HINT", Seq.empty, table("t").select(star())))
+      UnresolvedHint("HINT", Seq.empty, table("t").select(star())))
 
     comparePlans(
       parsePlan("SELECT /*+ BROADCASTJOIN(u) */ * FROM t"),
-      Hint("BROADCASTJOIN", Seq("u"), table("t").select(star())))
+      UnresolvedHint("BROADCASTJOIN", Seq("u"), table("t").select(star())))
 
     comparePlans(
       parsePlan("SELECT /*+ MAPJOIN(u) */ * FROM t"),
-      Hint("MAPJOIN", Seq("u"), table("t").select(star())))
+      UnresolvedHint("MAPJOIN", Seq("u"), table("t").select(star())))
 
     comparePlans(
       parsePlan("SELECT /*+ STREAMTABLE(a,b,c) */ * FROM t"),
-      Hint("STREAMTABLE", Seq("a", "b", "c"), table("t").select(star())))
+      UnresolvedHint("STREAMTABLE", Seq("a", "b", "c"), table("t").select(star())))
 
     comparePlans(
       parsePlan("SELECT /*+ INDEX(t, emp_job_ix) */ * FROM t"),
-      Hint("INDEX", Seq("t", "emp_job_ix"), table("t").select(star())))
+      UnresolvedHint("INDEX", Seq("t", "emp_job_ix"), table("t").select(star())))
 
     comparePlans(
       parsePlan("SELECT /*+ MAPJOIN(`default.t`) */ * from `default.t`"),
-      Hint("MAPJOIN", Seq("default.t"), table("default.t").select(star())))
+      UnresolvedHint("MAPJOIN", Seq("default.t"), table("default.t").select(star())))
 
     comparePlans(
       parsePlan("SELECT /*+ MAPJOIN(t) */ a from t where true group by a order by a"),
-      Hint("MAPJOIN", Seq("t"), table("t").where(Literal(true)).groupBy('a)('a)).orderBy('a.asc))
+      UnresolvedHint("MAPJOIN", Seq("t"),
+        table("t").where(Literal(true)).groupBy('a)('a)).orderBy('a.asc))
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
index b06871f96f0d8..81b91e63b8f67 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
@@ -45,7 +45,7 @@ class BasicStatsEstimationSuite extends StatsEstimationTestBase {
       expectedStatsCboOn = filterStatsCboOn,
       expectedStatsCboOff = filterStatsCboOff)
 
-    val broadcastHint = BroadcastHint(filter)
+    val broadcastHint = ResolvedHint(filter, isBroadcastable = Option(true))
     checkStats(
       broadcastHint,
       expectedStatsCboOn = filterStatsCboOn.copy(isBroadcastable = true),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index cc04ac4c45c46..50c598f506af3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -1087,7 +1087,7 @@ class Dataset[T] private[sql](
    */
   @scala.annotation.varargs
   def hint(name: String, parameters: String*): Dataset[T] = withTypedPlan {
-    Hint(name, parameters, logicalPlan)
+    UnresolvedHint(name, parameters, logicalPlan)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index 73541c22c6308..5981b49da277e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -433,7 +433,7 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       case ExternalRDD(outputObjAttr, rdd) => ExternalRDDScanExec(outputObjAttr, rdd) :: Nil
       case r: LogicalRDD =>
         RDDScanExec(r.output, r.rdd, "ExistingRDD", r.outputPartitioning, r.outputOrdering) :: Nil
-      case BroadcastHint(child) => planLater(child) :: Nil
+      case h: ResolvedHint => planLater(h.child) :: Nil
       case _ => Nil
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index f07e04368389f..d2256a008e284 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.catalyst.analysis.{Star, UnresolvedFunction}
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
-import org.apache.spark.sql.catalyst.plans.logical.BroadcastHint
+import org.apache.spark.sql.catalyst.plans.logical.ResolvedHint
 import org.apache.spark.sql.execution.SparkSqlParser
 import org.apache.spark.sql.expressions.UserDefinedFunction
 import org.apache.spark.sql.internal.SQLConf
@@ -1019,7 +1019,8 @@ object functions {
    * @since 1.5.0
    */
   def broadcast[T](df: Dataset[T]): Dataset[T] = {
-    Dataset[T](df.sparkSession, BroadcastHint(df.logicalPlan))(df.exprEnc)
+    Dataset[T](df.sparkSession,
+      ResolvedHint(df.logicalPlan, isBroadcastable = Option(true)))(df.exprEnc)
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
index 26c45e092dc65..afb8ced53e25c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
@@ -157,7 +157,7 @@ class BroadcastJoinSuite extends QueryTest with SQLTestUtils {
   }
 
   test("broadcast hint in SQL") {
-    import org.apache.spark.sql.catalyst.plans.logical.{BroadcastHint, Join}
+    import org.apache.spark.sql.catalyst.plans.logical.{ResolvedHint, Join}
 
     spark.range(10).createOrReplaceTempView("t")
     spark.range(10).createOrReplaceTempView("u")
@@ -170,12 +170,12 @@ class BroadcastJoinSuite extends QueryTest with SQLTestUtils {
       val plan3 = sql(s"SELECT /*+ $name(v) */ * FROM t JOIN u ON t.id = u.id").queryExecution
         .optimizedPlan
 
-      assert(plan1.asInstanceOf[Join].left.isInstanceOf[BroadcastHint])
-      assert(!plan1.asInstanceOf[Join].right.isInstanceOf[BroadcastHint])
-      assert(!plan2.asInstanceOf[Join].left.isInstanceOf[BroadcastHint])
-      assert(plan2.asInstanceOf[Join].right.isInstanceOf[BroadcastHint])
-      assert(!plan3.asInstanceOf[Join].left.isInstanceOf[BroadcastHint])
-      assert(!plan3.asInstanceOf[Join].right.isInstanceOf[BroadcastHint])
+      assert(plan1.asInstanceOf[Join].left.isInstanceOf[ResolvedHint])
+      assert(!plan1.asInstanceOf[Join].right.isInstanceOf[ResolvedHint])
+      assert(!plan2.asInstanceOf[Join].left.isInstanceOf[ResolvedHint])
+      assert(plan2.asInstanceOf[Join].right.isInstanceOf[ResolvedHint])
+      assert(!plan3.asInstanceOf[Join].left.isInstanceOf[ResolvedHint])
+      assert(!plan3.asInstanceOf[Join].right.isInstanceOf[ResolvedHint])
     }
   }
 

From 00dee39029119845d3b744ee70c562cf073ee678 Mon Sep 17 00:00:00 2001
From: Bago Amirbekian <bago@databricks.com>
Date: Tue, 23 May 2017 20:56:01 -0700
Subject: [PATCH 0896/1204] [SPARK-20861][ML][PYTHON] Delegate looping over
 paramMaps to estimators

Changes:

pyspark.ml Estimators can take either a list of param maps or a dict of params. This change allows the CrossValidator and TrainValidationSplit Estimators to pass through lists of param maps to the underlying estimators so that those estimators can handle parallelization when appropriate (eg distributed hyper parameter tuning).

Testing:

Existing unit tests.

Author: Bago Amirbekian <bago@databricks.com>

Closes #18077 from MrBago/delegate_params.

(cherry picked from commit 9434280cfd1db94dc9d52bb0ace8283e710e3124)
Signed-off-by: Joseph K. Bradley <joseph@databricks.com>
---
 python/pyspark/ml/tuning.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index ffeb4459e1aac..b64858214d20d 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -18,14 +18,11 @@
 import itertools
 import numpy as np
 
-from pyspark import SparkContext
 from pyspark import since, keyword_only
 from pyspark.ml import Estimator, Model
 from pyspark.ml.param import Params, Param, TypeConverters
 from pyspark.ml.param.shared import HasSeed
-from pyspark.ml.wrapper import JavaParams
 from pyspark.sql.functions import rand
-from pyspark.ml.common import inherit_doc, _py2java
 
 __all__ = ['ParamGridBuilder', 'CrossValidator', 'CrossValidatorModel', 'TrainValidationSplit',
            'TrainValidationSplitModel']
@@ -232,8 +229,9 @@ def _fit(self, dataset):
             condition = (df[randCol] >= validateLB) & (df[randCol] < validateUB)
             validation = df.filter(condition)
             train = df.filter(~condition)
+            models = est.fit(train, epm)
             for j in range(numModels):
-                model = est.fit(train, epm[j])
+                model = models[j]
                 # TODO: duplicate evaluator to take extra params from input
                 metric = eva.evaluate(model.transform(validation, epm[j]))
                 metrics[j] += metric/nFolds
@@ -388,8 +386,9 @@ def _fit(self, dataset):
         condition = (df[randCol] >= tRatio)
         validation = df.filter(condition)
         train = df.filter(~condition)
+        models = est.fit(train, epm)
         for j in range(numModels):
-            model = est.fit(train, epm[j])
+            model = models[j]
             metric = eva.evaluate(model.transform(validation, epm[j]))
             metrics[j] += metric
         if eva.isLargerBetter():

From ee9d5975e6dbc5cb1dfe498870f94b1d760098db Mon Sep 17 00:00:00 2001
From: Xingbo Jiang <xingbo.jiang@databricks.com>
Date: Wed, 24 May 2017 15:43:23 +0800
Subject: [PATCH 0897/1204] [SPARK-18406][CORE] Race between end-of-task and
 completion iterator read lock release

## What changes were proposed in this pull request?

When a TaskContext is not propagated properly to all child threads for the task, just like the reported cases in this issue, we fail to get to TID from TaskContext and that causes unable to release the lock and assertion failures. To resolve this, we have to explicitly pass the TID value to the `unlock` method.

## How was this patch tested?

Add new failing regression test case in `RDDSuite`.

Author: Xingbo Jiang <xingbo.jiang@databricks.com>

Closes #18076 from jiangxb1987/completion-iterator.

(cherry picked from commit d76633e3cad341b9efa23629f33c5ce90993d6d4)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/network/BlockDataManager.scala      |  2 +-
 .../spark/storage/BlockInfoManager.scala      | 15 ++++++----
 .../apache/spark/storage/BlockManager.scala   | 29 ++++++++++++++-----
 .../scala/org/apache/spark/rdd/RDDSuite.scala | 18 +++++++++++-
 4 files changed, 49 insertions(+), 15 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala b/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala
index 8f83668d79029..b3f8bfe8b1d48 100644
--- a/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala
+++ b/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala
@@ -46,5 +46,5 @@ trait BlockDataManager {
   /**
    * Release locks acquired by [[putBlockData()]] and [[getBlockData()]].
    */
-  def releaseLock(blockId: BlockId): Unit
+  def releaseLock(blockId: BlockId, taskAttemptId: Option[Long]): Unit
 }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockInfoManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockInfoManager.scala
index 3db59837fbebd..7064872ec1c77 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockInfoManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockInfoManager.scala
@@ -281,22 +281,27 @@ private[storage] class BlockInfoManager extends Logging {
 
   /**
    * Release a lock on the given block.
+   * In case a TaskContext is not propagated properly to all child threads for the task, we fail to
+   * get the TID from TaskContext, so we have to explicitly pass the TID value to release the lock.
+   *
+   * See SPARK-18406 for more discussion of this issue.
    */
-  def unlock(blockId: BlockId): Unit = synchronized {
-    logTrace(s"Task $currentTaskAttemptId releasing lock for $blockId")
+  def unlock(blockId: BlockId, taskAttemptId: Option[TaskAttemptId] = None): Unit = synchronized {
+    val taskId = taskAttemptId.getOrElse(currentTaskAttemptId)
+    logTrace(s"Task $taskId releasing lock for $blockId")
     val info = get(blockId).getOrElse {
       throw new IllegalStateException(s"Block $blockId not found")
     }
     if (info.writerTask != BlockInfo.NO_WRITER) {
       info.writerTask = BlockInfo.NO_WRITER
-      writeLocksByTask.removeBinding(currentTaskAttemptId, blockId)
+      writeLocksByTask.removeBinding(taskId, blockId)
     } else {
       assert(info.readerCount > 0, s"Block $blockId is not locked for reading")
       info.readerCount -= 1
-      val countsForTask = readLocksByTask(currentTaskAttemptId)
+      val countsForTask = readLocksByTask(taskId)
       val newPinCountForTask: Int = countsForTask.remove(blockId, 1) - 1
       assert(newPinCountForTask >= 0,
-        s"Task $currentTaskAttemptId release lock on block $blockId more times than it acquired it")
+        s"Task $taskId release lock on block $blockId more times than it acquired it")
     }
     notifyAll()
   }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 6c363c5f8bca4..5f067191070ec 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -501,6 +501,7 @@ private[spark] class BlockManager(
       case Some(info) =>
         val level = info.level
         logDebug(s"Level for block $blockId is $level")
+        val taskAttemptId = Option(TaskContext.get()).map(_.taskAttemptId())
         if (level.useMemory && memoryStore.contains(blockId)) {
           val iter: Iterator[Any] = if (level.deserialized) {
             memoryStore.getValues(blockId).get
@@ -508,7 +509,12 @@ private[spark] class BlockManager(
             serializerManager.dataDeserializeStream(
               blockId, memoryStore.getBytes(blockId).get.toInputStream())(info.classTag)
           }
-          val ci = CompletionIterator[Any, Iterator[Any]](iter, releaseLock(blockId))
+          // We need to capture the current taskId in case the iterator completion is triggered
+          // from a different thread which does not have TaskContext set; see SPARK-18406 for
+          // discussion.
+          val ci = CompletionIterator[Any, Iterator[Any]](iter, {
+            releaseLock(blockId, taskAttemptId)
+          })
           Some(new BlockResult(ci, DataReadMethod.Memory, info.size))
         } else if (level.useDisk && diskStore.contains(blockId)) {
           val diskData = diskStore.getBytes(blockId)
@@ -525,8 +531,9 @@ private[spark] class BlockManager(
               serializerManager.dataDeserializeStream(blockId, stream)(info.classTag)
             }
           }
-          val ci = CompletionIterator[Any, Iterator[Any]](iterToReturn,
-            releaseLockAndDispose(blockId, diskData))
+          val ci = CompletionIterator[Any, Iterator[Any]](iterToReturn, {
+            releaseLockAndDispose(blockId, diskData, taskAttemptId)
+          })
           Some(new BlockResult(ci, DataReadMethod.Disk, info.size))
         } else {
           handleLocalReadFailure(blockId)
@@ -704,10 +711,13 @@ private[spark] class BlockManager(
   }
 
   /**
-   * Release a lock on the given block.
+   * Release a lock on the given block with explicit TID.
+   * The param `taskAttemptId` should be passed in case we can't get the correct TID from
+   * TaskContext, for example, the input iterator of a cached RDD iterates to the end in a child
+   * thread.
    */
-  def releaseLock(blockId: BlockId): Unit = {
-    blockInfoManager.unlock(blockId)
+  def releaseLock(blockId: BlockId, taskAttemptId: Option[Long] = None): Unit = {
+    blockInfoManager.unlock(blockId, taskAttemptId)
   }
 
   /**
@@ -1460,8 +1470,11 @@ private[spark] class BlockManager(
     }
   }
 
-  def releaseLockAndDispose(blockId: BlockId, data: BlockData): Unit = {
-    blockInfoManager.unlock(blockId)
+  def releaseLockAndDispose(
+      blockId: BlockId,
+      data: BlockData,
+      taskAttemptId: Option[Long] = None): Unit = {
+    releaseLock(blockId, taskAttemptId)
     data.dispose()
   }
 
diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
index ad56715656c85..8d06f5468f4f1 100644
--- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
@@ -30,7 +30,7 @@ import org.apache.hadoop.mapred.{FileSplit, TextInputFormat}
 import org.apache.spark._
 import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
 import org.apache.spark.rdd.RDDSuiteUtils._
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{ThreadUtils, Utils}
 
 class RDDSuite extends SparkFunSuite with SharedSparkContext {
   var tempDir: File = _
@@ -1082,6 +1082,22 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext {
     assert(totalPartitionCount == 10)
   }
 
+  test("SPARK-18406: race between end-of-task and completion iterator read lock release") {
+    val rdd = sc.parallelize(1 to 1000, 10)
+    rdd.cache()
+
+    rdd.mapPartitions { iter =>
+      ThreadUtils.runInNewThread("TestThread") {
+        // Iterate to the end of the input iterator, to cause the CompletionIterator completion to
+        // fire outside of the task's main thread.
+        while (iter.hasNext) {
+          iter.next()
+        }
+        iter
+      }
+    }.collect()
+  }
+
   // NOTE
   // Below tests calling sc.stop() have to be the last tests in this suite. If there are tests
   // running after them and if they access sc those tests will fail as sc is already closed, because

From e936a96badfeeb2051ee35dc4b0fbecefa9bf4cb Mon Sep 17 00:00:00 2001
From: Peng <peng.meng@intel.com>
Date: Wed, 24 May 2017 19:54:17 +0800
Subject: [PATCH 0898/1204] [SPARK-20764][ML][PYSPARK][FOLLOWUP] Fix visibility
 discrepancy with numInstances and degreesOfFreedom in LR and GLR - Python
 version

## What changes were proposed in this pull request?
Add test cases for PR-18062

## How was this patch tested?
The existing UT

Author: Peng <peng.meng@intel.com>

Closes #18068 from mpjlu/moreTest.

(cherry picked from commit 9afcf127d31b5477a539dde6e5f01861532a1c4c)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 python/pyspark/ml/tests.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 51a3e8efe8b48..a3393c6248657 100755
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -1066,6 +1066,7 @@ def test_linear_regression_summary(self):
         self.assertAlmostEqual(s.r2, 1.0, 2)
         self.assertTrue(isinstance(s.residuals, DataFrame))
         self.assertEqual(s.numInstances, 2)
+        self.assertEqual(s.degreesOfFreedom, 1)
         devResiduals = s.devianceResiduals
         self.assertTrue(isinstance(devResiduals, list) and isinstance(devResiduals[0], float))
         coefStdErr = s.coefficientStandardErrors
@@ -1075,7 +1076,8 @@ def test_linear_regression_summary(self):
         pValues = s.pValues
         self.assertTrue(isinstance(pValues, list) and isinstance(pValues[0], float))
         # test evaluation (with training dataset) produces a summary with same values
-        # one check is enough to verify a summary is returned, Scala version runs full test
+        # one check is enough to verify a summary is returned
+        # The child class LinearRegressionTrainingSummary runs full test
         sameSummary = model.evaluate(df)
         self.assertAlmostEqual(sameSummary.explainedVariance, s.explainedVariance)
 
@@ -1093,6 +1095,7 @@ def test_glr_summary(self):
         self.assertEqual(s.numIterations, 1)  # this should default to a single iteration of WLS
         self.assertTrue(isinstance(s.predictions, DataFrame))
         self.assertEqual(s.predictionCol, "prediction")
+        self.assertEqual(s.numInstances, 2)
         self.assertTrue(isinstance(s.residuals(), DataFrame))
         self.assertTrue(isinstance(s.residuals("pearson"), DataFrame))
         coefStdErr = s.coefficientStandardErrors
@@ -1111,7 +1114,8 @@ def test_glr_summary(self):
         self.assertTrue(isinstance(s.nullDeviance, float))
         self.assertTrue(isinstance(s.dispersion, float))
         # test evaluation (with training dataset) produces a summary with same values
-        # one check is enough to verify a summary is returned, Scala version runs full test
+        # one check is enough to verify a summary is returned
+        # The child class GeneralizedLinearRegressionTrainingSummary runs full test
         sameSummary = model.evaluate(df)
         self.assertAlmostEqual(sameSummary.deviance, s.deviance)
 

From 1d107242f8ec842c009e0b427f6e4a8313d99aa2 Mon Sep 17 00:00:00 2001
From: zero323 <zero323@users.noreply.github.com>
Date: Wed, 24 May 2017 19:57:44 +0800
Subject: [PATCH 0899/1204] [SPARK-20631][FOLLOW-UP] Fix incorrect tests.

## What changes were proposed in this pull request?

- Fix incorrect tests for `_check_thresholds`.
- Move test to `ParamTests`.

## How was this patch tested?

Unit tests.

Author: zero323 <zero323@users.noreply.github.com>

Closes #18085 from zero323/SPARK-20631-FOLLOW-UP.

(cherry picked from commit 1816eb3bef930407dc9e083de08f5105725c55d1)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 python/pyspark/ml/tests.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index a3393c6248657..0daf29d59cb74 100755
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -404,6 +404,18 @@ def test_copy_param_extras(self):
         self.assertEqual(tp._paramMap, copied_no_extra)
         self.assertEqual(tp._defaultParamMap, tp_copy._defaultParamMap)
 
+    def test_logistic_regression_check_thresholds(self):
+        self.assertIsInstance(
+            LogisticRegression(threshold=0.5, thresholds=[0.5, 0.5]),
+            LogisticRegression
+        )
+
+        self.assertRaisesRegexp(
+            ValueError,
+            "Logistic Regression getThreshold found inconsistent.*$",
+            LogisticRegression, threshold=0.42, thresholds=[0.5, 0.5]
+        )
+
 
 class EvaluatorTests(SparkSessionTestCase):
 
@@ -807,18 +819,6 @@ def test_logistic_regression(self):
         except OSError:
             pass
 
-    def logistic_regression_check_thresholds(self):
-        self.assertIsInstance(
-            LogisticRegression(threshold=0.5, thresholds=[0.5, 0.5]),
-            LogisticRegressionModel
-        )
-
-        self.assertRaisesRegexp(
-            ValueError,
-            "Logistic Regression getThreshold found inconsistent.*$",
-            LogisticRegression, threshold=0.42, thresholds=[0.5, 0.5]
-        )
-
     def _compare_params(self, m1, m2, param):
         """
         Compare 2 ML Params instances for the given param, and assert both have the same param value

From 83aeac9e0590e99010d0af8e067822d0ed0971fe Mon Sep 17 00:00:00 2001
From: Bago Amirbekian <bago@databricks.com>
Date: Wed, 24 May 2017 22:55:38 +0800
Subject: [PATCH 0900/1204] [SPARK-20862][MLLIB][PYTHON] Avoid passing float to
 ndarray.reshape in LogisticRegressionModel

## What changes were proposed in this pull request?

Fixed TypeError with python3 and numpy 1.12.1. Numpy's `reshape` no longer takes floats as arguments as of 1.12. Also, python3 uses float division for `/`, we should be using `//` to ensure that `_dataWithBiasSize` doesn't get set to a float.

## How was this patch tested?

Existing tests run using python3 and numpy 1.12.

Author: Bago Amirbekian <bago@databricks.com>

Closes #18081 from MrBago/BF-py3floatbug.

(cherry picked from commit bc66a77bbe2120cc21bd8da25194efca4cde13c3)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 python/pyspark/mllib/classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index 9f53ed098202b..e04eeb2b60d71 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -171,7 +171,7 @@ def __init__(self, weights, intercept, numFeatures, numClasses):
             self._dataWithBiasSize = None
             self._weightsMatrix = None
         else:
-            self._dataWithBiasSize = self._coeff.size / (self._numClasses - 1)
+            self._dataWithBiasSize = self._coeff.size // (self._numClasses - 1)
             self._weightsMatrix = self._coeff.toArray().reshape(self._numClasses - 1,
                                                                 self._dataWithBiasSize)
 

From 13adc0fc0e940a4ea8b703241666440357a597e3 Mon Sep 17 00:00:00 2001
From: Bago Amirbekian <bago@databricks.com>
Date: Wed, 24 May 2017 22:55:38 +0800
Subject: [PATCH 0901/1204] [SPARK-20862][MLLIB][PYTHON] Avoid passing float to
 ndarray.reshape in LogisticRegressionModel

## What changes were proposed in this pull request?

Fixed TypeError with python3 and numpy 1.12.1. Numpy's `reshape` no longer takes floats as arguments as of 1.12. Also, python3 uses float division for `/`, we should be using `//` to ensure that `_dataWithBiasSize` doesn't get set to a float.

## How was this patch tested?

Existing tests run using python3 and numpy 1.12.

Author: Bago Amirbekian <bago@databricks.com>

Closes #18081 from MrBago/BF-py3floatbug.

(cherry picked from commit bc66a77bbe2120cc21bd8da25194efca4cde13c3)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 python/pyspark/mllib/classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index 9f53ed098202b..e04eeb2b60d71 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -171,7 +171,7 @@ def __init__(self, weights, intercept, numFeatures, numClasses):
             self._dataWithBiasSize = None
             self._weightsMatrix = None
         else:
-            self._dataWithBiasSize = self._coeff.size / (self._numClasses - 1)
+            self._dataWithBiasSize = self._coeff.size // (self._numClasses - 1)
             self._weightsMatrix = self._coeff.toArray().reshape(self._numClasses - 1,
                                                                 self._dataWithBiasSize)
 

From c59ad420b5fda29567f4a06b5f71df76e70e269a Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 25 May 2017 00:35:40 +0800
Subject: [PATCH 0902/1204] [SPARK-20848][SQL] Shutdown the pool after reading
 parquet files

## What changes were proposed in this pull request?

From JIRA: On each call to spark.read.parquet, a new ForkJoinPool is created. One of the threads in the pool is kept in the WAITING state, and never stopped, which leads to unbounded growth in number of threads.

We should shutdown the pool after reading parquet files.

## How was this patch tested?

Added a test to ParquetFileFormatSuite.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #18073 from viirya/SPARK-20848.

(cherry picked from commit f72ad303f05a6d99513ea3b121375726b177199c)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../execution/datasources/parquet/ParquetFileFormat.scala    | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index 2f3a2c62b912c..29ed8906137cf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -479,7 +479,8 @@ object ParquetFileFormat extends Logging {
       partFiles: Seq[FileStatus],
       ignoreCorruptFiles: Boolean): Seq[Footer] = {
     val parFiles = partFiles.par
-    parFiles.tasksupport = new ForkJoinTaskSupport(new ForkJoinPool(8))
+    val pool = new ForkJoinPool(8)
+    parFiles.tasksupport = new ForkJoinTaskSupport(pool)
     parFiles.flatMap { currentFile =>
       try {
         // Skips row group information since we only need the schema.
@@ -495,6 +496,8 @@ object ParquetFileFormat extends Logging {
         } else {
           throw new IOException(s"Could not read footer for file: $currentFile", e)
         }
+      } finally {
+        pool.shutdown()
       }
     }.seq
   }

From 2f68631f523e4db08549497d9c3264a43137bbb1 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 25 May 2017 00:35:40 +0800
Subject: [PATCH 0903/1204] [SPARK-20848][SQL] Shutdown the pool after reading
 parquet files

## What changes were proposed in this pull request?

From JIRA: On each call to spark.read.parquet, a new ForkJoinPool is created. One of the threads in the pool is kept in the WAITING state, and never stopped, which leads to unbounded growth in number of threads.

We should shutdown the pool after reading parquet files.

## How was this patch tested?

Added a test to ParquetFileFormatSuite.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #18073 from viirya/SPARK-20848.

(cherry picked from commit f72ad303f05a6d99513ea3b121375726b177199c)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../execution/datasources/parquet/ParquetFileFormat.scala    | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index 2b4892ee23ba3..70eb01cfc9fb0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -496,7 +496,8 @@ object ParquetFileFormat extends Logging {
       partFiles: Seq[FileStatus],
       ignoreCorruptFiles: Boolean): Seq[Footer] = {
     val parFiles = partFiles.par
-    parFiles.tasksupport = new ForkJoinTaskSupport(new ForkJoinPool(8))
+    val pool = new ForkJoinPool(8)
+    parFiles.tasksupport = new ForkJoinTaskSupport(pool)
     parFiles.flatMap { currentFile =>
       try {
         // Skips row group information since we only need the schema.
@@ -512,6 +513,8 @@ object ParquetFileFormat extends Logging {
         } else {
           throw new IOException(s"Could not read footer for file: $currentFile", e)
         }
+      } finally {
+        pool.shutdown()
       }
     }.seq
   }

From b7a2a16b1e01375292938fc48b0a333ec4e7cd30 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Wed, 24 May 2017 13:57:19 -0700
Subject: [PATCH 0904/1204] [SPARK-20867][SQL] Move hints from Statistics into
 HintInfo class

## What changes were proposed in this pull request?
This is a follow-up to SPARK-20857 to move the broadcast hint from Statistics into a new HintInfo class, so we can be more flexible in adding new hints in the future.

## How was this patch tested?
Updated test cases to reflect the change.

Author: Reynold Xin <rxin@databricks.com>

Closes #18087 from rxin/SPARK-20867.

(cherry picked from commit a64746677bf09ef67e3fd538355a6ee9b5ce8cf4)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../sql/catalyst/analysis/ResolveHints.scala  |  6 ++---
 .../catalyst/plans/logical/LogicalPlan.scala  |  2 +-
 .../catalyst/plans/logical/Statistics.scala   | 11 +++------
 .../plans/logical/basicLogicalOperators.scala | 17 ++++++-------
 .../sql/catalyst/plans/logical/hints.scala    | 24 +++++++++++++++----
 .../statsEstimation/AggregateEstimation.scala |  2 +-
 .../catalyst/analysis/ResolveHintsSuite.scala | 20 ++++++++--------
 .../BasicStatsEstimationSuite.scala           | 17 +++++++------
 .../spark/sql/execution/SparkStrategies.scala |  2 +-
 .../org/apache/spark/sql/functions.scala      |  4 ++--
 .../spark/sql/StatisticsCollectionSuite.scala |  2 +-
 11 files changed, 59 insertions(+), 48 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
index 9dfd84cbc9941..86c788aaa828a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
@@ -57,9 +57,9 @@ object ResolveHints {
       val newNode = CurrentOrigin.withOrigin(plan.origin) {
         plan match {
           case u: UnresolvedRelation if toBroadcast.exists(resolver(_, u.tableIdentifier.table)) =>
-            ResolvedHint(plan, isBroadcastable = Option(true))
+            ResolvedHint(plan, HintInfo(isBroadcastable = Option(true)))
           case r: SubqueryAlias if toBroadcast.exists(resolver(_, r.alias)) =>
-            ResolvedHint(plan, isBroadcastable = Option(true))
+            ResolvedHint(plan, HintInfo(isBroadcastable = Option(true)))
 
           case _: ResolvedHint | _: View | _: With | _: SubqueryAlias =>
             // Don't traverse down these nodes.
@@ -88,7 +88,7 @@ object ResolveHints {
       case h: UnresolvedHint if BROADCAST_HINT_NAMES.contains(h.name.toUpperCase(Locale.ROOT)) =>
         if (h.parameters.isEmpty) {
           // If there is no table alias specified, turn the entire subtree into a BroadcastHint.
-          ResolvedHint(h.child, isBroadcastable = Option(true))
+          ResolvedHint(h.child, HintInfo(isBroadcastable = Option(true)))
         } else {
           // Otherwise, find within the subtree query plans that should be broadcasted.
           applyBroadcastHint(h.child, h.parameters.toSet)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index 6bdcf490ca5c8..2ebb2ff323c6b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -347,7 +347,7 @@ abstract class UnaryNode extends LogicalPlan {
     }
 
     // Don't propagate rowCount and attributeStats, since they are not estimated here.
-    Statistics(sizeInBytes = sizeInBytes, isBroadcastable = child.stats(conf).isBroadcastable)
+    Statistics(sizeInBytes = sizeInBytes, hints = child.stats(conf).hints)
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
index 81bb374cb0500..a64562b5dbd93 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
@@ -46,13 +46,13 @@ import org.apache.spark.util.Utils
  *                    defaults to the product of children's `sizeInBytes`.
  * @param rowCount Estimated number of rows.
  * @param attributeStats Statistics for Attributes.
- * @param isBroadcastable If true, output is small enough to be used in a broadcast join.
+ * @param hints Query hints.
  */
 case class Statistics(
     sizeInBytes: BigInt,
     rowCount: Option[BigInt] = None,
     attributeStats: AttributeMap[ColumnStat] = AttributeMap(Nil),
-    isBroadcastable: Boolean = false) {
+    hints: HintInfo = HintInfo()) {
 
   override def toString: String = "Statistics(" + simpleString + ")"
 
@@ -65,14 +65,9 @@ case class Statistics(
       } else {
         ""
       },
-      s"isBroadcastable=$isBroadcastable"
+      s"hints=$hints"
     ).filter(_.nonEmpty).mkString(", ")
   }
-
-  /** Must be called when computing stats for a join operator to reset hints. */
-  def resetHintsForJoin(): Statistics = copy(
-    isBroadcastable = false
-  )
 }
 
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index d7f56f6362d9a..2eee94364d84e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -195,9 +195,9 @@ case class Intersect(left: LogicalPlan, right: LogicalPlan) extends SetOperation
     val leftSize = left.stats(conf).sizeInBytes
     val rightSize = right.stats(conf).sizeInBytes
     val sizeInBytes = if (leftSize < rightSize) leftSize else rightSize
-    val isBroadcastable = left.stats(conf).isBroadcastable || right.stats(conf).isBroadcastable
-
-    Statistics(sizeInBytes = sizeInBytes, isBroadcastable = isBroadcastable)
+    Statistics(
+      sizeInBytes = sizeInBytes,
+      hints = left.stats(conf).hints.resetForJoin())
   }
 }
 
@@ -364,7 +364,8 @@ case class Join(
       case _ =>
         // Make sure we don't propagate isBroadcastable in other joins, because
         // they could explode the size.
-        super.computeStats(conf).resetHintsForJoin()
+        val stats = super.computeStats(conf)
+        stats.copy(hints = stats.hints.resetForJoin())
     }
 
     if (conf.cboEnabled) {
@@ -560,7 +561,7 @@ case class Aggregate(
         Statistics(
           sizeInBytes = EstimationUtils.getOutputSize(output, outputRowCount = 1),
           rowCount = Some(1),
-          isBroadcastable = child.stats(conf).isBroadcastable)
+          hints = child.stats(conf).hints)
       } else {
         super.computeStats(conf)
       }
@@ -749,7 +750,7 @@ case class GlobalLimit(limitExpr: Expression, child: LogicalPlan) extends UnaryN
     Statistics(
       sizeInBytes = EstimationUtils.getOutputSize(output, rowCount, childStats.attributeStats),
       rowCount = Some(rowCount),
-      isBroadcastable = childStats.isBroadcastable)
+      hints = childStats.hints)
   }
 }
 
@@ -770,7 +771,7 @@ case class LocalLimit(limitExpr: Expression, child: LogicalPlan) extends UnaryNo
       Statistics(
         sizeInBytes = 1,
         rowCount = Some(0),
-        isBroadcastable = childStats.isBroadcastable)
+        hints = childStats.hints)
     } else {
       // The output row count of LocalLimit should be the sum of row counts from each partition.
       // However, since the number of partitions is not available here, we just use statistics of
@@ -821,7 +822,7 @@ case class Sample(
     }
     val sampledRowCount = childStats.rowCount.map(c => EstimationUtils.ceil(BigDecimal(c) * ratio))
     // Don't propagate column stats, because we don't know the distribution after a sample operation
-    Statistics(sizeInBytes, sampledRowCount, isBroadcastable = childStats.isBroadcastable)
+    Statistics(sizeInBytes, sampledRowCount, hints = childStats.hints)
   }
 
   override protected def otherCopyArgs: Seq[AnyRef] = isTableSample :: Nil
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
index 9bcbfbb4d1397..b96d7bc9cfdb6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
@@ -35,15 +35,31 @@ case class UnresolvedHint(name: String, parameters: Seq[String], child: LogicalP
 /**
  * A resolved hint node. The analyzer should convert all [[UnresolvedHint]] into [[ResolvedHint]].
  */
-case class ResolvedHint(
-    child: LogicalPlan,
-    isBroadcastable: Option[Boolean] = None)
+case class ResolvedHint(child: LogicalPlan, hints: HintInfo = HintInfo())
   extends UnaryNode {
 
   override def output: Seq[Attribute] = child.output
 
   override def computeStats(conf: SQLConf): Statistics = {
     val stats = child.stats(conf)
-    isBroadcastable.map(x => stats.copy(isBroadcastable = x)).getOrElse(stats)
+    stats.copy(hints = hints)
+  }
+}
+
+
+case class HintInfo(
+    isBroadcastable: Option[Boolean] = None) {
+
+  /** Must be called when computing stats for a join operator to reset hints. */
+  def resetForJoin(): HintInfo = copy(
+    isBroadcastable = None
+  )
+
+  override def toString: String = {
+    if (productIterator.forall(_.asInstanceOf[Option[_]].isEmpty)) {
+      "none"
+    } else {
+      isBroadcastable.map(x => s"isBroadcastable=$x").getOrElse("")
+    }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/AggregateEstimation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/AggregateEstimation.scala
index 48b5fbb03ef1e..a0c23198451a8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/AggregateEstimation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/AggregateEstimation.scala
@@ -56,7 +56,7 @@ object AggregateEstimation {
         sizeInBytes = getOutputSize(agg.output, outputRows, outputAttrStats),
         rowCount = Some(outputRows),
         attributeStats = outputAttrStats,
-        isBroadcastable = childStats.isBroadcastable))
+        hints = childStats.hints))
     } else {
       None
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveHintsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveHintsSuite.scala
index bb914e11a139a..3d5148008c628 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveHintsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveHintsSuite.scala
@@ -36,17 +36,17 @@ class ResolveHintsSuite extends AnalysisTest {
   test("case-sensitive or insensitive parameters") {
     checkAnalysis(
       UnresolvedHint("MAPJOIN", Seq("TaBlE"), table("TaBlE")),
-      ResolvedHint(testRelation, isBroadcastable = Option(true)),
+      ResolvedHint(testRelation, HintInfo(isBroadcastable = Option(true))),
       caseSensitive = false)
 
     checkAnalysis(
       UnresolvedHint("MAPJOIN", Seq("table"), table("TaBlE")),
-      ResolvedHint(testRelation, isBroadcastable = Option(true)),
+      ResolvedHint(testRelation, HintInfo(isBroadcastable = Option(true))),
       caseSensitive = false)
 
     checkAnalysis(
       UnresolvedHint("MAPJOIN", Seq("TaBlE"), table("TaBlE")),
-      ResolvedHint(testRelation, isBroadcastable = Option(true)),
+      ResolvedHint(testRelation, HintInfo(isBroadcastable = Option(true))),
       caseSensitive = true)
 
     checkAnalysis(
@@ -58,28 +58,28 @@ class ResolveHintsSuite extends AnalysisTest {
   test("multiple broadcast hint aliases") {
     checkAnalysis(
       UnresolvedHint("MAPJOIN", Seq("table", "table2"), table("table").join(table("table2"))),
-      Join(ResolvedHint(testRelation, isBroadcastable = Option(true)),
-        ResolvedHint(testRelation2, isBroadcastable = Option(true)), Inner, None),
+      Join(ResolvedHint(testRelation, HintInfo(isBroadcastable = Option(true))),
+        ResolvedHint(testRelation2, HintInfo(isBroadcastable = Option(true))), Inner, None),
       caseSensitive = false)
   }
 
   test("do not traverse past existing broadcast hints") {
     checkAnalysis(
       UnresolvedHint("MAPJOIN", Seq("table"),
-        ResolvedHint(table("table").where('a > 1), isBroadcastable = Option(true))),
-      ResolvedHint(testRelation.where('a > 1), isBroadcastable = Option(true)).analyze,
+        ResolvedHint(table("table").where('a > 1), HintInfo(isBroadcastable = Option(true)))),
+      ResolvedHint(testRelation.where('a > 1), HintInfo(isBroadcastable = Option(true))).analyze,
       caseSensitive = false)
   }
 
   test("should work for subqueries") {
     checkAnalysis(
       UnresolvedHint("MAPJOIN", Seq("tableAlias"), table("table").as("tableAlias")),
-      ResolvedHint(testRelation, isBroadcastable = Option(true)),
+      ResolvedHint(testRelation, HintInfo(isBroadcastable = Option(true))),
       caseSensitive = false)
 
     checkAnalysis(
       UnresolvedHint("MAPJOIN", Seq("tableAlias"), table("table").subquery('tableAlias)),
-      ResolvedHint(testRelation, isBroadcastable = Option(true)),
+      ResolvedHint(testRelation, HintInfo(isBroadcastable = Option(true))),
       caseSensitive = false)
 
     // Negative case: if the alias doesn't match, don't match the original table name.
@@ -104,7 +104,7 @@ class ResolveHintsSuite extends AnalysisTest {
           |SELECT /*+ BROADCAST(ctetable) */ * FROM ctetable
         """.stripMargin
       ),
-      ResolvedHint(testRelation.where('a > 1).select('a), isBroadcastable = Option(true))
+      ResolvedHint(testRelation.where('a > 1).select('a), HintInfo(isBroadcastable = Option(true)))
         .select('a).analyze,
       caseSensitive = false)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
index 81b91e63b8f67..2afea6dd3d37c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
@@ -37,19 +37,20 @@ class BasicStatsEstimationSuite extends StatsEstimationTestBase {
 
   test("BroadcastHint estimation") {
     val filter = Filter(Literal(true), plan)
-    val filterStatsCboOn = Statistics(sizeInBytes = 10 * (8 +4), isBroadcastable = false,
+    val filterStatsCboOn = Statistics(sizeInBytes = 10 * (8 +4),
       rowCount = Some(10), attributeStats = AttributeMap(Seq(attribute -> colStat)))
-    val filterStatsCboOff = Statistics(sizeInBytes = 10 * (8 +4), isBroadcastable = false)
+    val filterStatsCboOff = Statistics(sizeInBytes = 10 * (8 +4))
     checkStats(
       filter,
       expectedStatsCboOn = filterStatsCboOn,
       expectedStatsCboOff = filterStatsCboOff)
 
-    val broadcastHint = ResolvedHint(filter, isBroadcastable = Option(true))
+    val broadcastHint = ResolvedHint(filter, HintInfo(isBroadcastable = Option(true)))
     checkStats(
       broadcastHint,
-      expectedStatsCboOn = filterStatsCboOn.copy(isBroadcastable = true),
-      expectedStatsCboOff = filterStatsCboOff.copy(isBroadcastable = true))
+      expectedStatsCboOn = filterStatsCboOn.copy(hints = HintInfo(isBroadcastable = Option(true))),
+      expectedStatsCboOff = filterStatsCboOff.copy(hints = HintInfo(isBroadcastable = Option(true)))
+    )
   }
 
   test("limit estimation: limit < child's rowCount") {
@@ -94,15 +95,13 @@ class BasicStatsEstimationSuite extends StatsEstimationTestBase {
         sizeInBytes = 40,
         rowCount = Some(10),
         attributeStats = AttributeMap(Seq(
-          AttributeReference("c1", IntegerType)() -> ColumnStat(10, Some(1), Some(10), 0, 4, 4))),
-        isBroadcastable = false)
+          AttributeReference("c1", IntegerType)() -> ColumnStat(10, Some(1), Some(10), 0, 4, 4))))
     val expectedCboStats =
       Statistics(
         sizeInBytes = 4,
         rowCount = Some(1),
         attributeStats = AttributeMap(Seq(
-          AttributeReference("c1", IntegerType)() -> ColumnStat(1, Some(5), Some(5), 0, 4, 4))),
-        isBroadcastable = false)
+          AttributeReference("c1", IntegerType)() -> ColumnStat(1, Some(5), Some(5), 0, 4, 4))))
 
     val plan = DummyLogicalPlan(defaultStats = expectedDefaultStats, cboStats = expectedCboStats)
     checkStats(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index 5981b49da277e..843ce63161220 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -114,7 +114,7 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
      * Matches a plan whose output should be small enough to be used in broadcast join.
      */
     private def canBroadcast(plan: LogicalPlan): Boolean = {
-      plan.stats(conf).isBroadcastable ||
+      plan.stats(conf).hints.isBroadcastable.getOrElse(false) ||
         (plan.stats(conf).sizeInBytes >= 0 &&
           plan.stats(conf).sizeInBytes <= conf.autoBroadcastJoinThreshold)
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index d2256a008e284..a4f79fb27d5af 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.catalyst.analysis.{Star, UnresolvedFunction}
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
-import org.apache.spark.sql.catalyst.plans.logical.ResolvedHint
+import org.apache.spark.sql.catalyst.plans.logical.{HintInfo, ResolvedHint}
 import org.apache.spark.sql.execution.SparkSqlParser
 import org.apache.spark.sql.expressions.UserDefinedFunction
 import org.apache.spark.sql.internal.SQLConf
@@ -1020,7 +1020,7 @@ object functions {
    */
   def broadcast[T](df: Dataset[T]): Dataset[T] = {
     Dataset[T](df.sparkSession,
-      ResolvedHint(df.logicalPlan, isBroadcastable = Option(true)))(df.exprEnc)
+      ResolvedHint(df.logicalPlan, HintInfo(isBroadcastable = Option(true))))(df.exprEnc)
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index ddc393c8da053..601324f2c0172 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -164,7 +164,7 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
     numbers.foreach { case (input, (expectedSize, expectedRows)) =>
       val stats = Statistics(sizeInBytes = input, rowCount = Some(input))
       val expectedString = s"sizeInBytes=$expectedSize, rowCount=$expectedRows," +
-        s" isBroadcastable=${stats.isBroadcastable}"
+        s" hints=none"
       assert(stats.simpleString == expectedString)
     }
   }

From 2405afce4e87c0486f2aef1d068f17aea2480b17 Mon Sep 17 00:00:00 2001
From: Kris Mok <kris.mok@databricks.com>
Date: Wed, 24 May 2017 17:19:35 -0700
Subject: [PATCH 0905/1204] [SPARK-20872][SQL] ShuffleExchange.nodeName should
 handle null coordinator

## What changes were proposed in this pull request?

A one-liner change in `ShuffleExchange.nodeName` to cover the case when `coordinator` is `null`, so that the match expression is exhaustive.

Please refer to [SPARK-20872](https://issues.apache.org/jira/browse/SPARK-20872) for a description of the symptoms.
TL;DR is that inspecting a `ShuffleExchange` (directly or transitively) on the Executor side can hit a case where the `coordinator` field of a `ShuffleExchange` is null, and thus will trigger a `MatchError` in `ShuffleExchange.nodeName()`'s inexhaustive match expression.

Also changed two other match conditions in `ShuffleExchange` on the `coordinator` field to be consistent.

## How was this patch tested?

Manually tested this change with a case where the `coordinator` is null to make sure `ShuffleExchange.nodeName` doesn't throw a `MatchError` any more.

Author: Kris Mok <kris.mok@databricks.com>

Closes #18095 from rednaxelafx/shuffleexchange-nodename.

(cherry picked from commit c0b3e45e3b46a5235b748cb85ad200c9ec1bb426)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../spark/sql/execution/exchange/ShuffleExchange.scala   | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchange.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchange.scala
index f06544ea8ed04..eebe6ad2e7944 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchange.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchange.scala
@@ -40,6 +40,9 @@ case class ShuffleExchange(
     child: SparkPlan,
     @transient coordinator: Option[ExchangeCoordinator]) extends Exchange {
 
+  // NOTE: coordinator can be null after serialization/deserialization,
+  //       e.g. it can be null on the Executor side
+
   override lazy val metrics = Map(
     "dataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size"))
 
@@ -47,7 +50,7 @@ case class ShuffleExchange(
     val extraInfo = coordinator match {
       case Some(exchangeCoordinator) =>
         s"(coordinator id: ${System.identityHashCode(exchangeCoordinator)})"
-      case None => ""
+      case _ => ""
     }
 
     val simpleNodeName = "Exchange"
@@ -70,7 +73,7 @@ case class ShuffleExchange(
     // the plan.
     coordinator match {
       case Some(exchangeCoordinator) => exchangeCoordinator.registerExchange(this)
-      case None =>
+      case _ =>
     }
   }
 
@@ -117,7 +120,7 @@ case class ShuffleExchange(
           val shuffleRDD = exchangeCoordinator.postShuffleRDD(this)
           assert(shuffleRDD.partitions.length == newPartitioning.numPartitions)
           shuffleRDD
-        case None =>
+        case _ =>
           val shuffleDependency = prepareShuffleDependency()
           preparePostShuffleRDD(shuffleDependency)
       }

From ae65d3014941344a924da583959e6b4b1d1d64f2 Mon Sep 17 00:00:00 2001
From: Jacek Laskowski <jacek@japila.pl>
Date: Wed, 24 May 2017 17:24:23 -0700
Subject: [PATCH 0906/1204] [SPARK-16202][SQL][DOC] Follow-up to Correct The
 Description of CreatableRelationProvider's createRelation

## What changes were proposed in this pull request?

Follow-up to SPARK-16202:

1. Remove the duplication of the meaning of `SaveMode` (as one was in fact missing that had proven that the duplication may be incomplete in the future again)

2. Use standard scaladoc tags

/cc gatorsmile rxin yhuai (as they were involved previously)

## How was this patch tested?

local build

Author: Jacek Laskowski <jacek@japila.pl>

Closes #18026 from jaceklaskowski/CreatableRelationProvider-SPARK-16202.

(cherry picked from commit 5f8ff2fc9a859ceeaa8f1d03060fdbb30951e706)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../apache/spark/sql/sources/interfaces.scala   | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
index ff8b15b3ff3ff..86eeb2f7dd419 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
@@ -163,16 +163,13 @@ trait StreamSinkProvider {
 @InterfaceStability.Stable
 trait CreatableRelationProvider {
   /**
-   * Save the DataFrame to the destination and return a relation with the given parameters based on
-   * the contents of the given DataFrame. The mode specifies the expected behavior of createRelation
-   * when data already exists.
-   * Right now, there are three modes, Append, Overwrite, and ErrorIfExists.
-   * Append mode means that when saving a DataFrame to a data source, if data already exists,
-   * contents of the DataFrame are expected to be appended to existing data.
-   * Overwrite mode means that when saving a DataFrame to a data source, if data already exists,
-   * existing data is expected to be overwritten by the contents of the DataFrame.
-   * ErrorIfExists mode means that when saving a DataFrame to a data source,
-   * if data already exists, an exception is expected to be thrown.
+   * Saves a DataFrame to a destination (using data source-specific parameters)
+   *
+   * @param sqlContext SQLContext
+   * @param mode specifies what happens when the destination already exists
+   * @param parameters data source-specific parameters
+   * @param data DataFrame to save (i.e. the rows after executing the query)
+   * @return Relation with a known schema
    *
    * @since 1.3.0
    */

From c3302e81e3e74744ec8da66b18f81c233a15e918 Mon Sep 17 00:00:00 2001
From: Xingbo Jiang <xingbo.jiang@databricks.com>
Date: Thu, 25 May 2017 08:31:04 +0800
Subject: [PATCH 0907/1204] [SPARK-18406][CORE][BACKPORT-2.1] Race between
 end-of-task and completion iterator read lock release

This is a backport PR of  #18076 to 2.1.

## What changes were proposed in this pull request?

When a TaskContext is not propagated properly to all child threads for the task, just like the reported cases in this issue, we fail to get to TID from TaskContext and that causes unable to release the lock and assertion failures. To resolve this, we have to explicitly pass the TID value to the `unlock` method.

## How was this patch tested?

Add new failing regression test case in `RDDSuite`.

Author: Xingbo Jiang <xingbo.jiang@databricks.com>

Closes #18099 from jiangxb1987/completion-iterator-2.1.
---
 .../spark/network/BlockDataManager.scala      |  2 +-
 .../spark/storage/BlockInfoManager.scala      | 15 ++++++++-----
 .../apache/spark/storage/BlockManager.scala   | 21 ++++++++++++++-----
 .../scala/org/apache/spark/rdd/RDDSuite.scala | 18 +++++++++++++++-
 4 files changed, 44 insertions(+), 12 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala b/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala
index 8f83668d79029..b3f8bfe8b1d48 100644
--- a/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala
+++ b/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala
@@ -46,5 +46,5 @@ trait BlockDataManager {
   /**
    * Release locks acquired by [[putBlockData()]] and [[getBlockData()]].
    */
-  def releaseLock(blockId: BlockId): Unit
+  def releaseLock(blockId: BlockId, taskAttemptId: Option[Long]): Unit
 }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockInfoManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockInfoManager.scala
index dd8f5bacb9f6e..c0e18e54f410b 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockInfoManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockInfoManager.scala
@@ -281,22 +281,27 @@ private[storage] class BlockInfoManager extends Logging {
 
   /**
    * Release a lock on the given block.
+   * In case a TaskContext is not propagated properly to all child threads for the task, we fail to
+   * get the TID from TaskContext, so we have to explicitly pass the TID value to release the lock.
+   *
+   * See SPARK-18406 for more discussion of this issue.
    */
-  def unlock(blockId: BlockId): Unit = synchronized {
-    logTrace(s"Task $currentTaskAttemptId releasing lock for $blockId")
+  def unlock(blockId: BlockId, taskAttemptId: Option[TaskAttemptId] = None): Unit = synchronized {
+    val taskId = taskAttemptId.getOrElse(currentTaskAttemptId)
+    logTrace(s"Task $taskId releasing lock for $blockId")
     val info = get(blockId).getOrElse {
       throw new IllegalStateException(s"Block $blockId not found")
     }
     if (info.writerTask != BlockInfo.NO_WRITER) {
       info.writerTask = BlockInfo.NO_WRITER
-      writeLocksByTask.removeBinding(currentTaskAttemptId, blockId)
+      writeLocksByTask.removeBinding(taskId, blockId)
     } else {
       assert(info.readerCount > 0, s"Block $blockId is not locked for reading")
       info.readerCount -= 1
-      val countsForTask = readLocksByTask(currentTaskAttemptId)
+      val countsForTask = readLocksByTask(taskId)
       val newPinCountForTask: Int = countsForTask.remove(blockId, 1) - 1
       assert(newPinCountForTask >= 0,
-        s"Task $currentTaskAttemptId release lock on block $blockId more times than it acquired it")
+        s"Task $taskId release lock on block $blockId more times than it acquired it")
     }
     notifyAll()
   }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 35551e41213a3..6ff61bdd5b7b5 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -454,6 +454,7 @@ private[spark] class BlockManager(
       case Some(info) =>
         val level = info.level
         logDebug(s"Level for block $blockId is $level")
+        val taskAttemptId = Option(TaskContext.get()).map(_.taskAttemptId())
         if (level.useMemory && memoryStore.contains(blockId)) {
           val iter: Iterator[Any] = if (level.deserialized) {
             memoryStore.getValues(blockId).get
@@ -461,7 +462,12 @@ private[spark] class BlockManager(
             serializerManager.dataDeserializeStream(
               blockId, memoryStore.getBytes(blockId).get.toInputStream())(info.classTag)
           }
-          val ci = CompletionIterator[Any, Iterator[Any]](iter, releaseLock(blockId))
+          // We need to capture the current taskId in case the iterator completion is triggered
+          // from a different thread which does not have TaskContext set; see SPARK-18406 for
+          // discussion.
+          val ci = CompletionIterator[Any, Iterator[Any]](iter, {
+            releaseLock(blockId, taskAttemptId)
+          })
           Some(new BlockResult(ci, DataReadMethod.Memory, info.size))
         } else if (level.useDisk && diskStore.contains(blockId)) {
           val iterToReturn: Iterator[Any] = {
@@ -478,7 +484,9 @@ private[spark] class BlockManager(
               serializerManager.dataDeserializeStream(blockId, stream)(info.classTag)
             }
           }
-          val ci = CompletionIterator[Any, Iterator[Any]](iterToReturn, releaseLock(blockId))
+          val ci = CompletionIterator[Any, Iterator[Any]](iterToReturn, {
+            releaseLock(blockId, taskAttemptId)
+          })
           Some(new BlockResult(ci, DataReadMethod.Disk, info.size))
         } else {
           handleLocalReadFailure(blockId)
@@ -654,10 +662,13 @@ private[spark] class BlockManager(
   }
 
   /**
-   * Release a lock on the given block.
+   * Release a lock on the given block with explicit TID.
+   * The param `taskAttemptId` should be passed in case we can't get the correct TID from
+   * TaskContext, for example, the input iterator of a cached RDD iterates to the end in a child
+   * thread.
    */
-  def releaseLock(blockId: BlockId): Unit = {
-    blockInfoManager.unlock(blockId)
+  def releaseLock(blockId: BlockId, taskAttemptId: Option[Long] = None): Unit = {
+    blockInfoManager.unlock(blockId, taskAttemptId)
   }
 
   /**
diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
index ad56715656c85..8d06f5468f4f1 100644
--- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
@@ -30,7 +30,7 @@ import org.apache.hadoop.mapred.{FileSplit, TextInputFormat}
 import org.apache.spark._
 import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
 import org.apache.spark.rdd.RDDSuiteUtils._
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{ThreadUtils, Utils}
 
 class RDDSuite extends SparkFunSuite with SharedSparkContext {
   var tempDir: File = _
@@ -1082,6 +1082,22 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext {
     assert(totalPartitionCount == 10)
   }
 
+  test("SPARK-18406: race between end-of-task and completion iterator read lock release") {
+    val rdd = sc.parallelize(1 to 1000, 10)
+    rdd.cache()
+
+    rdd.mapPartitions { iter =>
+      ThreadUtils.runInNewThread("TestThread") {
+        // Iterate to the end of the input iterator, to cause the CompletionIterator completion to
+        // fire outside of the task's main thread.
+        while (iter.hasNext) {
+          iter.next()
+        }
+        iter
+      }
+    }.collect()
+  }
+
   // NOTE
   // Below tests calling sc.stop() have to be the last tests in this suite. If there are tests
   // running after them and if they access sc those tests will fail as sc is already closed, because

From 3f82d65bf6a628b0d46bb2eded9ed12f1d5aa9d2 Mon Sep 17 00:00:00 2001
From: liuxian <liu.xian3@zte.com.cn>
Date: Wed, 24 May 2017 17:32:02 -0700
Subject: [PATCH 0908/1204] [SPARK-20403][SQL] Modify the instructions of some
 functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?
1.    add  instructions of  'cast'  function When using 'show functions'  and 'desc function cast'
       command in spark-sql
2.    Modify the  instructions of functions，such as
     boolean，tinyint，smallint，int，bigint，float，double，decimal，date，timestamp，binary，string

## How was this patch tested?
Before modification：
spark-sql>desc function boolean;
Function: boolean
Class: org.apache.spark.sql.catalyst.expressions.Cast
Usage: boolean(expr AS type) - Casts the value `expr` to the target data type `type`.

After modification：
spark-sql> desc function boolean;
Function: boolean
Class: org.apache.spark.sql.catalyst.expressions.Cast
Usage: boolean(expr) - Casts the value `expr` to the target data type `boolean`.

spark-sql> desc function cast
Function: cast
Class: org.apache.spark.sql.catalyst.expressions.Cast
Usage: cast(expr AS type) - Casts the value `expr` to the target data type `type`.

Author: liuxian <liu.xian3@zte.com.cn>

Closes #17698 from 10110346/wip_lx_0418.

(cherry picked from commit 197f9018a4641c8fc0725905ebfb535b61bed791)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../catalyst/analysis/FunctionRegistry.scala  |  6 ++++-
 .../expressions/mathExpressions.scala         |  2 +-
 .../test/resources/sql-tests/inputs/cast.sql  |  2 ++
 .../resources/sql-tests/results/cast.sql.out  | 23 ++++++++++++++++++-
 4 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 6fc154f8debcf..96b6b11582162 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -430,6 +430,8 @@ object FunctionRegistry {
     expression[StructsToJson]("to_json"),
     expression[JsonToStructs]("from_json"),
 
+    // cast
+    expression[Cast]("cast"),
     // Cast aliases (SPARK-16730)
     castAlias("boolean", BooleanType),
     castAlias("tinyint", ByteType),
@@ -512,7 +514,9 @@ object FunctionRegistry {
       }
       Cast(args.head, dataType)
     }
-    (name, (expressionInfo[Cast](name), builder))
+    val clazz = scala.reflect.classTag[Cast].runtimeClass
+    val usage = "_FUNC_(expr) - Casts the value `expr` to the target data type `_FUNC_`."
+    (name, (new ExpressionInfo(clazz.getCanonicalName, null, name, usage, null), builder))
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
index de1a46dc47805..e040ad0c0a6b9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
@@ -966,7 +966,7 @@ case class Logarithm(left: Expression, right: Expression)
  *
  * @param child expr to be round, all [[NumericType]] is allowed as Input
  * @param scale new scale to be round to, this should be a constant int at runtime
- * @param mode rounding mode (e.g. HALF_UP, HALF_UP)
+ * @param mode rounding mode (e.g. HALF_UP, HALF_EVEN)
  * @param modeStr rounding mode string name (e.g. "ROUND_HALF_UP", "ROUND_HALF_EVEN")
  */
 abstract class RoundBase(child: Expression, scale: Expression,
diff --git a/sql/core/src/test/resources/sql-tests/inputs/cast.sql b/sql/core/src/test/resources/sql-tests/inputs/cast.sql
index 5fae571945e41..629df59cff8b3 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/cast.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/cast.sql
@@ -40,4 +40,6 @@ SELECT CAST('-9223372036854775809' AS long);
 SELECT CAST('9223372036854775807' AS long);
 SELECT CAST('9223372036854775808' AS long);
 
+DESC FUNCTION boolean;
+DESC FUNCTION EXTENDED boolean;
 -- TODO: migrate all cast tests here.
diff --git a/sql/core/src/test/resources/sql-tests/results/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/cast.sql.out
index bfa29d7d2d597..4e6353b1f332c 100644
--- a/sql/core/src/test/resources/sql-tests/results/cast.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cast.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 22
+-- Number of queries: 24
 
 
 -- !query 0
@@ -176,3 +176,24 @@ SELECT CAST('9223372036854775808' AS long)
 struct<CAST(9223372036854775808 AS BIGINT):bigint>
 -- !query 21 output
 NULL
+
+
+-- !query 22
+DESC FUNCTION boolean
+-- !query 22 schema
+struct<function_desc:string>
+-- !query 22 output
+Class: org.apache.spark.sql.catalyst.expressions.Cast
+Function: boolean
+Usage: boolean(expr) - Casts the value `expr` to the target data type `boolean`.
+
+
+-- !query 23
+DESC FUNCTION EXTENDED boolean
+-- !query 23 schema
+struct<function_desc:string>
+-- !query 23 output
+Class: org.apache.spark.sql.catalyst.expressions.Cast
+Extended Usage:N/A.
+Function: boolean
+Usage: boolean(expr) - Casts the value `expr` to the target data type `boolean`.

From e0aa23939a4cbf95f2cc83a7f5adee841b491358 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 25 May 2017 09:55:45 +0800
Subject: [PATCH 0909/1204] [SPARK-20848][SQL][FOLLOW-UP] Shutdown the pool
 after reading parquet files

## What changes were proposed in this pull request?

This is a follow-up to #18073. Taking a safer approach to shutdown the pool to prevent possible issue. Also using `ThreadUtils.newForkJoinPool` instead to set a better thread name.

## How was this patch tested?

Manually test.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #18100 from viirya/SPARK-20848-followup.

(cherry picked from commit 6b68d61cf31748a088778dfdd66491b2f89a3c7b)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../parquet/ParquetFileFormat.scala           | 42 ++++++++++---------
 1 file changed, 22 insertions(+), 20 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index 29ed8906137cf..87fbf8b1bc9c4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -50,7 +50,7 @@ import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
-import org.apache.spark.util.SerializableConfiguration
+import org.apache.spark.util.{SerializableConfiguration, ThreadUtils}
 
 class ParquetFileFormat
   extends FileFormat
@@ -479,27 +479,29 @@ object ParquetFileFormat extends Logging {
       partFiles: Seq[FileStatus],
       ignoreCorruptFiles: Boolean): Seq[Footer] = {
     val parFiles = partFiles.par
-    val pool = new ForkJoinPool(8)
+    val pool = ThreadUtils.newForkJoinPool("readingParquetFooters", 8)
     parFiles.tasksupport = new ForkJoinTaskSupport(pool)
-    parFiles.flatMap { currentFile =>
-      try {
-        // Skips row group information since we only need the schema.
-        // ParquetFileReader.readFooter throws RuntimeException, instead of IOException,
-        // when it can't read the footer.
-        Some(new Footer(currentFile.getPath(),
-          ParquetFileReader.readFooter(
-            conf, currentFile, SKIP_ROW_GROUPS)))
-      } catch { case e: RuntimeException =>
-        if (ignoreCorruptFiles) {
-          logWarning(s"Skipped the footer in the corrupted file: $currentFile", e)
-          None
-        } else {
-          throw new IOException(s"Could not read footer for file: $currentFile", e)
+    try {
+      parFiles.flatMap { currentFile =>
+        try {
+          // Skips row group information since we only need the schema.
+          // ParquetFileReader.readFooter throws RuntimeException, instead of IOException,
+          // when it can't read the footer.
+          Some(new Footer(currentFile.getPath(),
+            ParquetFileReader.readFooter(
+              conf, currentFile, SKIP_ROW_GROUPS)))
+        } catch { case e: RuntimeException =>
+          if (ignoreCorruptFiles) {
+            logWarning(s"Skipped the footer in the corrupted file: $currentFile", e)
+            None
+          } else {
+            throw new IOException(s"Could not read footer for file: $currentFile", e)
+          }
         }
-      } finally {
-        pool.shutdown()
-      }
-    }.seq
+      }.seq
+    } finally {
+      pool.shutdown()
+    }
   }
 
   /**

From 7015f6f0e7889616db9595b4e68b5a5b5ffe921a Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 25 May 2017 09:55:45 +0800
Subject: [PATCH 0910/1204] [SPARK-20848][SQL][FOLLOW-UP] Shutdown the pool
 after reading parquet files

## What changes were proposed in this pull request?

This is a follow-up to #18073. Taking a safer approach to shutdown the pool to prevent possible issue. Also using `ThreadUtils.newForkJoinPool` instead to set a better thread name.

## How was this patch tested?

Manually test.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #18100 from viirya/SPARK-20848-followup.

(cherry picked from commit 6b68d61cf31748a088778dfdd66491b2f89a3c7b)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../parquet/ParquetFileFormat.scala           | 42 ++++++++++---------
 1 file changed, 22 insertions(+), 20 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index 70eb01cfc9fb0..f303a014fa483 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -50,7 +50,7 @@ import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
-import org.apache.spark.util.SerializableConfiguration
+import org.apache.spark.util.{SerializableConfiguration, ThreadUtils}
 
 class ParquetFileFormat
   extends FileFormat
@@ -496,27 +496,29 @@ object ParquetFileFormat extends Logging {
       partFiles: Seq[FileStatus],
       ignoreCorruptFiles: Boolean): Seq[Footer] = {
     val parFiles = partFiles.par
-    val pool = new ForkJoinPool(8)
+    val pool = ThreadUtils.newForkJoinPool("readingParquetFooters", 8)
     parFiles.tasksupport = new ForkJoinTaskSupport(pool)
-    parFiles.flatMap { currentFile =>
-      try {
-        // Skips row group information since we only need the schema.
-        // ParquetFileReader.readFooter throws RuntimeException, instead of IOException,
-        // when it can't read the footer.
-        Some(new Footer(currentFile.getPath(),
-          ParquetFileReader.readFooter(
-            conf, currentFile, SKIP_ROW_GROUPS)))
-      } catch { case e: RuntimeException =>
-        if (ignoreCorruptFiles) {
-          logWarning(s"Skipped the footer in the corrupted file: $currentFile", e)
-          None
-        } else {
-          throw new IOException(s"Could not read footer for file: $currentFile", e)
+    try {
+      parFiles.flatMap { currentFile =>
+        try {
+          // Skips row group information since we only need the schema.
+          // ParquetFileReader.readFooter throws RuntimeException, instead of IOException,
+          // when it can't read the footer.
+          Some(new Footer(currentFile.getPath(),
+            ParquetFileReader.readFooter(
+              conf, currentFile, SKIP_ROW_GROUPS)))
+        } catch { case e: RuntimeException =>
+          if (ignoreCorruptFiles) {
+            logWarning(s"Skipped the footer in the corrupted file: $currentFile", e)
+            None
+          } else {
+            throw new IOException(s"Could not read footer for file: $currentFile", e)
+          }
         }
-      } finally {
-        pool.shutdown()
-      }
-    }.seq
+      }.seq
+    } finally {
+      pool.shutdown()
+    }
   }
 
   /**

From b52a06d7034b3d392f7f0ee69a2fba098783e70d Mon Sep 17 00:00:00 2001
From: Xianyang Liu <xianyang.liu@intel.com>
Date: Thu, 25 May 2017 15:47:59 +0800
Subject: [PATCH 0911/1204] [SPARK-20250][CORE] Improper OOM error when a task
 been killed while spilling data

## What changes were proposed in this pull request?

Currently, when a task is calling spill() but it receives a killing request from driver (e.g., speculative task), the `TaskMemoryManager` will throw an `OOM` exception.  And we don't catch `Fatal` exception when a error caused by `Thread.interrupt`. So for `ClosedByInterruptException`, we should throw `RuntimeException` instead of `OutOfMemoryError`.

https://issues.apache.org/jira/browse/SPARK-20250?jql=project%20%3D%20SPARK

## How was this patch tested?

Existing unit tests.

Author: Xianyang Liu <xianyang.liu@intel.com>

Closes #18090 from ConeyLiu/SPARK-20250.

(cherry picked from commit 731462a04f8e33ac507ad19b4270c783a012a33e)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../java/org/apache/spark/memory/TaskMemoryManager.java  | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java b/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
index 5f91411749167..761ba9de659d5 100644
--- a/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
+++ b/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
@@ -19,6 +19,7 @@
 
 import javax.annotation.concurrent.GuardedBy;
 import java.io.IOException;
+import java.nio.channels.ClosedByInterruptException;
 import java.util.Arrays;
 import java.util.ArrayList;
 import java.util.BitSet;
@@ -184,6 +185,10 @@ public long acquireExecutionMemory(long required, MemoryConsumer consumer) {
                 break;
               }
             }
+          } catch (ClosedByInterruptException e) {
+            // This called by user to kill a task (e.g: speculative task).
+            logger.error("error while calling spill() on " + c, e);
+            throw new RuntimeException(e.getMessage());
           } catch (IOException e) {
             logger.error("error while calling spill() on " + c, e);
             throw new OutOfMemoryError("error while calling spill() on " + c + " : "
@@ -201,6 +206,10 @@ public long acquireExecutionMemory(long required, MemoryConsumer consumer) {
               Utils.bytesToString(released), consumer);
             got += memoryManager.acquireExecutionMemory(required - got, taskAttemptId, mode);
           }
+        } catch (ClosedByInterruptException e) {
+          // This called by user to kill a task (e.g: speculative task).
+          logger.error("error while calling spill() on " + consumer, e);
+          throw new RuntimeException(e.getMessage());
         } catch (IOException e) {
           logger.error("error while calling spill() on " + consumer, e);
           throw new OutOfMemoryError("error while calling spill() on " + consumer + " : "

From 7fc2347b510d73fb55ab69c0579494b0761fb022 Mon Sep 17 00:00:00 2001
From: Xianyang Liu <xianyang.liu@intel.com>
Date: Thu, 25 May 2017 15:47:59 +0800
Subject: [PATCH 0912/1204] [SPARK-20250][CORE] Improper OOM error when a task
 been killed while spilling data

Currently, when a task is calling spill() but it receives a killing request from driver (e.g., speculative task), the `TaskMemoryManager` will throw an `OOM` exception.  And we don't catch `Fatal` exception when a error caused by `Thread.interrupt`. So for `ClosedByInterruptException`, we should throw `RuntimeException` instead of `OutOfMemoryError`.

https://issues.apache.org/jira/browse/SPARK-20250?jql=project%20%3D%20SPARK

Existing unit tests.

Author: Xianyang Liu <xianyang.liu@intel.com>

Closes #18090 from ConeyLiu/SPARK-20250.

(cherry picked from commit 731462a04f8e33ac507ad19b4270c783a012a33e)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../java/org/apache/spark/memory/TaskMemoryManager.java  | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java b/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
index c40974b54cb47..3385d0e6a31cf 100644
--- a/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
+++ b/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
@@ -19,6 +19,7 @@
 
 import javax.annotation.concurrent.GuardedBy;
 import java.io.IOException;
+import java.nio.channels.ClosedByInterruptException;
 import java.util.Arrays;
 import java.util.BitSet;
 import java.util.HashSet;
@@ -156,6 +157,10 @@ public long acquireExecutionMemory(long required, MemoryConsumer consumer) {
                   break;
                 }
               }
+            } catch (ClosedByInterruptException e) {
+              // This called by user to kill a task (e.g: speculative task).
+              logger.error("error while calling spill() on " + c, e);
+              throw new RuntimeException(e.getMessage());
             } catch (IOException e) {
               logger.error("error while calling spill() on " + c, e);
               throw new OutOfMemoryError("error while calling spill() on " + c + " : "
@@ -174,6 +179,10 @@ public long acquireExecutionMemory(long required, MemoryConsumer consumer) {
               Utils.bytesToString(released), consumer);
             got += memoryManager.acquireExecutionMemory(required - got, taskAttemptId, mode);
           }
+        } catch (ClosedByInterruptException e) {
+          // This called by user to kill a task (e.g: speculative task).
+          logger.error("error while calling spill() on " + consumer, e);
+          throw new RuntimeException(e.getMessage());
         } catch (IOException e) {
           logger.error("error while calling spill() on " + consumer, e);
           throw new OutOfMemoryError("error while calling spill() on " + consumer + " : "

From 8896c4ee9ea315a7dcd1a05b7201e7ad0539a5ed Mon Sep 17 00:00:00 2001
From: jinxing <jinxing6042@126.com>
Date: Thu, 25 May 2017 16:11:30 +0800
Subject: [PATCH 0913/1204] [SPARK-19659] Fetch big blocks to disk when
 shuffle-read.

## What changes were proposed in this pull request?

Currently the whole block is fetched into memory(off heap by default) when shuffle-read. A block is defined by (shuffleId, mapId, reduceId). Thus it can be large when skew situations. If OOM happens during shuffle read, job will be killed and users will be notified to "Consider boosting spark.yarn.executor.memoryOverhead". Adjusting parameter and allocating more memory can resolve the OOM. However the approach is not perfectly suitable for production environment, especially for data warehouse.
Using Spark SQL as data engine in warehouse, users hope to have a unified parameter(e.g. memory) but less resource wasted(resource is allocated but not used). The hope is strong especially when migrating data engine to Spark from another one(e.g. Hive). Tuning the parameter for thousands of SQLs one by one is very time consuming.
It's not always easy to predict skew situations, when happen, it make sense to fetch remote blocks to disk for shuffle-read, rather than kill the job because of OOM.

In this pr, I propose to fetch big blocks to disk(which is also mentioned in SPARK-3019):

1. Track average size and also the outliers(which are larger than 2*avgSize) in MapStatus;
2. Request memory from `MemoryManager` before fetch blocks and release the memory to `MemoryManager` when `ManagedBuffer` is released.
3. Fetch remote blocks to disk when failing acquiring memory from `MemoryManager`, otherwise fetch to memory.

This is an improvement for memory control when shuffle blocks and help to avoid OOM in scenarios like below:
1. Single huge block;
2. Sizes of many blocks are underestimated in `MapStatus` and the actual footprint of blocks is much larger than the estimated.

## How was this patch tested?
Added unit test in `MapStatusSuite` and `ShuffleBlockFetcherIteratorSuite`.

Author: jinxing <jinxing6042@126.com>

Closes #16989 from jinxing64/SPARK-19659.

(cherry picked from commit 3f94e64aa8fd806ae1fa0156d846ce96afacddd3)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../server/OneForOneStreamManager.java        | 21 +++++
 .../shuffle/ExternalShuffleClient.java        |  7 +-
 .../shuffle/OneForOneBlockFetcher.java        | 62 ++++++++++++-
 .../spark/network/shuffle/ShuffleClient.java  |  4 +-
 .../network/sasl/SaslIntegrationSuite.java    |  2 +-
 .../ExternalShuffleIntegrationSuite.java      |  2 +-
 .../shuffle/OneForOneBlockFetcherSuite.java   |  7 +-
 .../spark/internal/config/package.scala       |  6 ++
 .../spark/network/BlockTransferService.scala  |  7 +-
 .../netty/NettyBlockTransferService.scala     |  7 +-
 .../shuffle/BlockStoreShuffleReader.scala     |  3 +-
 .../storage/ShuffleBlockFetcherIterator.scala | 71 ++++++++++-----
 .../apache/spark/MapOutputTrackerSuite.scala  |  2 +-
 .../NettyBlockTransferSecuritySuite.scala     |  2 +-
 .../spark/storage/BlockManagerSuite.scala     |  4 +-
 .../ShuffleBlockFetcherIteratorSuite.scala    | 86 +++++++++++++++++--
 docs/configuration.md                         |  8 ++
 17 files changed, 254 insertions(+), 47 deletions(-)

diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java b/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java
index ee367f9998dbf..ad8e8b44d201e 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java
@@ -23,6 +23,8 @@
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicLong;
 
+import scala.Tuple2;
+
 import com.google.common.base.Preconditions;
 import io.netty.channel.Channel;
 import org.slf4j.Logger;
@@ -94,6 +96,25 @@ public ManagedBuffer getChunk(long streamId, int chunkIndex) {
     return nextChunk;
   }
 
+  @Override
+  public ManagedBuffer openStream(String streamChunkId) {
+    Tuple2<Long, Integer> streamIdAndChunkId = parseStreamChunkId(streamChunkId);
+    return getChunk(streamIdAndChunkId._1, streamIdAndChunkId._2);
+  }
+
+  public static String genStreamChunkId(long streamId, int chunkId) {
+    return String.format("%d_%d", streamId, chunkId);
+  }
+
+  public static Tuple2<Long, Integer> parseStreamChunkId(String streamChunkId) {
+    String[] array = streamChunkId.split("_");
+    assert array.length == 2:
+      "Stream id and chunk index should be specified when open stream for fetching block.";
+    long streamId = Long.valueOf(array[0]);
+    int chunkIndex = Integer.valueOf(array[1]);
+    return new Tuple2<>(streamId, chunkIndex);
+  }
+
   @Override
   public void connectionTerminated(Channel channel) {
     // Close all streams which have been associated with the channel.
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java
index 2c5827bf7dc56..269fa72dad5f5 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java
@@ -17,6 +17,7 @@
 
 package org.apache.spark.network.shuffle;
 
+import java.io.File;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.List;
@@ -86,14 +87,16 @@ public void fetchBlocks(
       int port,
       String execId,
       String[] blockIds,
-      BlockFetchingListener listener) {
+      BlockFetchingListener listener,
+      File[] shuffleFiles) {
     checkInit();
     logger.debug("External shuffle fetch from {}:{} (executor id {})", host, port, execId);
     try {
       RetryingBlockFetcher.BlockFetchStarter blockFetchStarter =
           (blockIds1, listener1) -> {
             TransportClient client = clientFactory.createClient(host, port);
-            new OneForOneBlockFetcher(client, appId, execId, blockIds1, listener1).start();
+            new OneForOneBlockFetcher(client, appId, execId, blockIds1, listener1, conf,
+              shuffleFiles).start();
           };
 
       int maxRetries = conf.maxIORetries();
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
index 35f69fe35c94b..5f428759252aa 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
@@ -17,19 +17,28 @@
 
 package org.apache.spark.network.shuffle;
 
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.nio.channels.Channels;
+import java.nio.channels.WritableByteChannel;
 import java.util.Arrays;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import org.apache.spark.network.buffer.FileSegmentManagedBuffer;
 import org.apache.spark.network.buffer.ManagedBuffer;
 import org.apache.spark.network.client.ChunkReceivedCallback;
 import org.apache.spark.network.client.RpcResponseCallback;
+import org.apache.spark.network.client.StreamCallback;
 import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.server.OneForOneStreamManager;
 import org.apache.spark.network.shuffle.protocol.BlockTransferMessage;
 import org.apache.spark.network.shuffle.protocol.OpenBlocks;
 import org.apache.spark.network.shuffle.protocol.StreamHandle;
+import org.apache.spark.network.util.TransportConf;
 
 /**
  * Simple wrapper on top of a TransportClient which interprets each chunk as a whole block, and
@@ -48,6 +57,8 @@ public class OneForOneBlockFetcher {
   private final String[] blockIds;
   private final BlockFetchingListener listener;
   private final ChunkReceivedCallback chunkCallback;
+  private TransportConf transportConf = null;
+  private File[] shuffleFiles = null;
 
   private StreamHandle streamHandle = null;
 
@@ -56,12 +67,20 @@ public OneForOneBlockFetcher(
       String appId,
       String execId,
       String[] blockIds,
-      BlockFetchingListener listener) {
+      BlockFetchingListener listener,
+      TransportConf transportConf,
+      File[] shuffleFiles) {
     this.client = client;
     this.openMessage = new OpenBlocks(appId, execId, blockIds);
     this.blockIds = blockIds;
     this.listener = listener;
     this.chunkCallback = new ChunkCallback();
+    this.transportConf = transportConf;
+    if (shuffleFiles != null) {
+      this.shuffleFiles = shuffleFiles;
+      assert this.shuffleFiles.length == blockIds.length:
+        "Number of shuffle files should equal to blocks";
+    }
   }
 
   /** Callback invoked on receipt of each chunk. We equate a single chunk to a single block. */
@@ -100,7 +119,12 @@ public void onSuccess(ByteBuffer response) {
           // Immediately request all chunks -- we expect that the total size of the request is
           // reasonable due to higher level chunking in [[ShuffleBlockFetcherIterator]].
           for (int i = 0; i < streamHandle.numChunks; i++) {
-            client.fetchChunk(streamHandle.streamId, i, chunkCallback);
+            if (shuffleFiles != null) {
+              client.stream(OneForOneStreamManager.genStreamChunkId(streamHandle.streamId, i),
+                new DownloadCallback(shuffleFiles[i], i));
+            } else {
+              client.fetchChunk(streamHandle.streamId, i, chunkCallback);
+            }
           }
         } catch (Exception e) {
           logger.error("Failed while starting block fetches after success", e);
@@ -126,4 +150,38 @@ private void failRemainingBlocks(String[] failedBlockIds, Throwable e) {
       }
     }
   }
+
+  private class DownloadCallback implements StreamCallback {
+
+    private WritableByteChannel channel = null;
+    private File targetFile = null;
+    private int chunkIndex;
+
+    public DownloadCallback(File targetFile, int chunkIndex) throws IOException {
+      this.targetFile = targetFile;
+      this.channel = Channels.newChannel(new FileOutputStream(targetFile));
+      this.chunkIndex = chunkIndex;
+    }
+
+    @Override
+    public void onData(String streamId, ByteBuffer buf) throws IOException {
+      channel.write(buf);
+    }
+
+    @Override
+    public void onComplete(String streamId) throws IOException {
+      channel.close();
+      ManagedBuffer buffer = new FileSegmentManagedBuffer(transportConf, targetFile, 0,
+        targetFile.length());
+      listener.onBlockFetchSuccess(blockIds[chunkIndex], buffer);
+    }
+
+    @Override
+    public void onFailure(String streamId, Throwable cause) throws IOException {
+      channel.close();
+      // On receipt of a failure, fail every block from chunkIndex onwards.
+      String[] remainingBlockIds = Arrays.copyOfRange(blockIds, chunkIndex, blockIds.length);
+      failRemainingBlocks(remainingBlockIds, cause);
+    }
+  }
 }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleClient.java
index f72ab40690d0d..978ff5a2a8699 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleClient.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleClient.java
@@ -18,6 +18,7 @@
 package org.apache.spark.network.shuffle;
 
 import java.io.Closeable;
+import java.io.File;
 
 /** Provides an interface for reading shuffle files, either from an Executor or external service. */
 public abstract class ShuffleClient implements Closeable {
@@ -40,5 +41,6 @@ public abstract void fetchBlocks(
       int port,
       String execId,
       String[] blockIds,
-      BlockFetchingListener listener);
+      BlockFetchingListener listener,
+      File[] shuffleFiles);
 }
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java
index c0e170e5b9353..0c054fc5db8f4 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java
@@ -204,7 +204,7 @@ public void onBlockFetchFailure(String blockId, Throwable t) {
 
       String[] blockIds = { "shuffle_2_3_4", "shuffle_6_7_8" };
       OneForOneBlockFetcher fetcher =
-          new OneForOneBlockFetcher(client1, "app-2", "0", blockIds, listener);
+          new OneForOneBlockFetcher(client1, "app-2", "0", blockIds, listener, conf, null);
       fetcher.start();
       blockFetchLatch.await();
       checkSecurityException(exception.get());
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
index 7a33b6821792c..d1d8f5b4e188a 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
@@ -158,7 +158,7 @@ public void onBlockFetchFailure(String blockId, Throwable exception) {
             }
           }
         }
-      });
+      }, null);
 
     if (!requestsRemaining.tryAcquire(blockIds.length, 5, TimeUnit.SECONDS)) {
       fail("Timeout getting response from the server");
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
index 3e51fea3cf0e5..61d82214e7d30 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
@@ -46,8 +46,13 @@
 import org.apache.spark.network.shuffle.protocol.BlockTransferMessage;
 import org.apache.spark.network.shuffle.protocol.OpenBlocks;
 import org.apache.spark.network.shuffle.protocol.StreamHandle;
+import org.apache.spark.network.util.MapConfigProvider;
+import org.apache.spark.network.util.TransportConf;
 
 public class OneForOneBlockFetcherSuite {
+
+  private static final TransportConf conf = new TransportConf("shuffle", MapConfigProvider.EMPTY);
+
   @Test
   public void testFetchOne() {
     LinkedHashMap<String, ManagedBuffer> blocks = Maps.newLinkedHashMap();
@@ -126,7 +131,7 @@ private static BlockFetchingListener fetchBlocks(LinkedHashMap<String, ManagedBu
     BlockFetchingListener listener = mock(BlockFetchingListener.class);
     String[] blockIds = blocks.keySet().toArray(new String[blocks.size()]);
     OneForOneBlockFetcher fetcher =
-      new OneForOneBlockFetcher(client, "app-id", "exec-id", blockIds, listener);
+      new OneForOneBlockFetcher(client, "app-id", "exec-id", blockIds, listener, conf, null);
 
     // Respond to the "OpenBlocks" message with an appropriate ShuffleStreamHandle with streamId 123
     doAnswer(invocationOnMock -> {
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index e193ed222e228..f8139b706a7cc 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -287,4 +287,10 @@ package object config {
       .bytesConf(ByteUnit.BYTE)
       .createWithDefault(100 * 1024 * 1024)
 
+  private[spark] val REDUCER_MAX_REQ_SIZE_SHUFFLE_TO_MEM =
+    ConfigBuilder("spark.reducer.maxReqSizeShuffleToMem")
+      .doc("The blocks of a shuffle request will be fetched to disk when size of the request is " +
+        "above this threshold. This is to avoid a giant request takes too much memory.")
+      .bytesConf(ByteUnit.BYTE)
+      .createWithDefaultString("200m")
 }
diff --git a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
index cb9d389dd7ea6..6860214c7fe39 100644
--- a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.network
 
-import java.io.Closeable
+import java.io.{Closeable, File}
 import java.nio.ByteBuffer
 
 import scala.concurrent.{Future, Promise}
@@ -67,7 +67,8 @@ abstract class BlockTransferService extends ShuffleClient with Closeable with Lo
       port: Int,
       execId: String,
       blockIds: Array[String],
-      listener: BlockFetchingListener): Unit
+      listener: BlockFetchingListener,
+      shuffleFiles: Array[File]): Unit
 
   /**
    * Upload a single block to a remote node, available only after [[init]] is invoked.
@@ -100,7 +101,7 @@ abstract class BlockTransferService extends ShuffleClient with Closeable with Lo
           ret.flip()
           result.success(new NioManagedBuffer(ret))
         }
-      })
+      }, shuffleFiles = null)
     ThreadUtils.awaitResult(result.future, Duration.Inf)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
index b75e91b660969..b13a9c681e543 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.network.netty
 
+import java.io.File
 import java.nio.ByteBuffer
 
 import scala.collection.JavaConverters._
@@ -88,13 +89,15 @@ private[spark] class NettyBlockTransferService(
       port: Int,
       execId: String,
       blockIds: Array[String],
-      listener: BlockFetchingListener): Unit = {
+      listener: BlockFetchingListener,
+      shuffleFiles: Array[File]): Unit = {
     logTrace(s"Fetch blocks from $host:$port (executor id $execId)")
     try {
       val blockFetchStarter = new RetryingBlockFetcher.BlockFetchStarter {
         override def createAndStart(blockIds: Array[String], listener: BlockFetchingListener) {
           val client = clientFactory.createClient(host, port)
-          new OneForOneBlockFetcher(client, appId, execId, blockIds.toArray, listener).start()
+          new OneForOneBlockFetcher(client, appId, execId, blockIds.toArray, listener,
+            transportConf, shuffleFiles).start()
         }
       }
 
diff --git a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
index ba3e0e395e958..2fbac79a2305b 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.shuffle
 
 import org.apache.spark._
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.serializer.SerializerManager
 import org.apache.spark.storage.{BlockManager, ShuffleBlockFetcherIterator}
 import org.apache.spark.util.CompletionIterator
@@ -51,6 +51,7 @@ private[spark] class BlockStoreShuffleReader[K, C](
       // Note: we use getSizeAsMb when no suffix is provided for backwards compatibility
       SparkEnv.get.conf.getSizeAsMb("spark.reducer.maxSizeInFlight", "48m") * 1024 * 1024,
       SparkEnv.get.conf.getInt("spark.reducer.maxReqsInFlight", Int.MaxValue),
+      SparkEnv.get.conf.get(config.REDUCER_MAX_REQ_SIZE_SHUFFLE_TO_MEM),
       SparkEnv.get.conf.getBoolean("spark.shuffle.detectCorrupt", true))
 
     val serializerInstance = dep.serializer.newInstance()
diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
index f8906117638b3..ee35060926555 100644
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.storage
 
-import java.io.{InputStream, IOException}
+import java.io.{File, InputStream, IOException}
 import java.nio.ByteBuffer
 import java.util.concurrent.LinkedBlockingQueue
 import javax.annotation.concurrent.GuardedBy
@@ -52,6 +52,7 @@ import org.apache.spark.util.io.ChunkedByteBufferOutputStream
  * @param streamWrapper A function to wrap the returned input stream.
  * @param maxBytesInFlight max size (in bytes) of remote blocks to fetch at any given point.
  * @param maxReqsInFlight max number of remote requests to fetch blocks at any given point.
+ * @param maxReqSizeShuffleToMem max size (in bytes) of a request that can be shuffled to memory.
  * @param detectCorrupt whether to detect any corruption in fetched blocks.
  */
 private[spark]
@@ -63,6 +64,7 @@ final class ShuffleBlockFetcherIterator(
     streamWrapper: (BlockId, InputStream) => InputStream,
     maxBytesInFlight: Long,
     maxReqsInFlight: Int,
+    maxReqSizeShuffleToMem: Long,
     detectCorrupt: Boolean)
   extends Iterator[(BlockId, InputStream)] with Logging {
 
@@ -129,6 +131,12 @@ final class ShuffleBlockFetcherIterator(
   @GuardedBy("this")
   private[this] var isZombie = false
 
+  /**
+   * A set to store the files used for shuffling remote huge blocks. Files in this set will be
+   * deleted when cleanup. This is a layer of defensiveness against disk file leaks.
+   */
+  val shuffleFilesSet = mutable.HashSet[File]()
+
   initialize()
 
   // Decrements the buffer reference count.
@@ -163,6 +171,11 @@ final class ShuffleBlockFetcherIterator(
         case _ =>
       }
     }
+    shuffleFilesSet.foreach { file =>
+      if (!file.delete()) {
+        logInfo("Failed to cleanup shuffle fetch temp file " + file.getAbsolutePath());
+      }
+    }
   }
 
   private[this] def sendRequest(req: FetchRequest) {
@@ -175,33 +188,45 @@ final class ShuffleBlockFetcherIterator(
     val sizeMap = req.blocks.map { case (blockId, size) => (blockId.toString, size) }.toMap
     val remainingBlocks = new HashSet[String]() ++= sizeMap.keys
     val blockIds = req.blocks.map(_._1.toString)
-
     val address = req.address
-    shuffleClient.fetchBlocks(address.host, address.port, address.executorId, blockIds.toArray,
-      new BlockFetchingListener {
-        override def onBlockFetchSuccess(blockId: String, buf: ManagedBuffer): Unit = {
-          // Only add the buffer to results queue if the iterator is not zombie,
-          // i.e. cleanup() has not been called yet.
-          ShuffleBlockFetcherIterator.this.synchronized {
-            if (!isZombie) {
-              // Increment the ref count because we need to pass this to a different thread.
-              // This needs to be released after use.
-              buf.retain()
-              remainingBlocks -= blockId
-              results.put(new SuccessFetchResult(BlockId(blockId), address, sizeMap(blockId), buf,
-                remainingBlocks.isEmpty))
-              logDebug("remainingBlocks: " + remainingBlocks)
-            }
+
+    val blockFetchingListener = new BlockFetchingListener {
+      override def onBlockFetchSuccess(blockId: String, buf: ManagedBuffer): Unit = {
+        // Only add the buffer to results queue if the iterator is not zombie,
+        // i.e. cleanup() has not been called yet.
+        ShuffleBlockFetcherIterator.this.synchronized {
+          if (!isZombie) {
+            // Increment the ref count because we need to pass this to a different thread.
+            // This needs to be released after use.
+            buf.retain()
+            remainingBlocks -= blockId
+            results.put(new SuccessFetchResult(BlockId(blockId), address, sizeMap(blockId), buf,
+              remainingBlocks.isEmpty))
+            logDebug("remainingBlocks: " + remainingBlocks)
           }
-          logTrace("Got remote block " + blockId + " after " + Utils.getUsedTimeMs(startTime))
         }
+        logTrace("Got remote block " + blockId + " after " + Utils.getUsedTimeMs(startTime))
+      }
 
-        override def onBlockFetchFailure(blockId: String, e: Throwable): Unit = {
-          logError(s"Failed to get block(s) from ${req.address.host}:${req.address.port}", e)
-          results.put(new FailureFetchResult(BlockId(blockId), address, e))
-        }
+      override def onBlockFetchFailure(blockId: String, e: Throwable): Unit = {
+        logError(s"Failed to get block(s) from ${req.address.host}:${req.address.port}", e)
+        results.put(new FailureFetchResult(BlockId(blockId), address, e))
       }
-    )
+    }
+
+    // Shuffle remote blocks to disk when the request is too large.
+    // TODO: Encryption and compression should be considered.
+    if (req.size > maxReqSizeShuffleToMem) {
+      val shuffleFiles = blockIds.map {
+        bId => blockManager.diskBlockManager.createTempLocalBlock()._2
+      }.toArray
+      shuffleFilesSet ++= shuffleFiles
+      shuffleClient.fetchBlocks(address.host, address.port, address.executorId, blockIds.toArray,
+        blockFetchingListener, shuffleFiles)
+    } else {
+      shuffleClient.fetchBlocks(address.host, address.port, address.executorId, blockIds.toArray,
+        blockFetchingListener, null)
+    }
   }
 
   private[this] def splitLocalRemoteBlocks(): ArrayBuffer[FetchRequest] = {
diff --git a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
index bb24c6ce4d33c..71bedda5ac894 100644
--- a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark
 
 import scala.collection.mutable.ArrayBuffer
 
-import org.mockito.Matchers.{any, isA}
+import org.mockito.Matchers.any
 import org.mockito.Mockito._
 
 import org.apache.spark.broadcast.BroadcastManager
diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala
index 792a1d7f57e2d..474e30144f629 100644
--- a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala
@@ -165,7 +165,7 @@ class NettyBlockTransferSecuritySuite extends SparkFunSuite with MockitoSugar wi
         override def onBlockFetchSuccess(blockId: String, data: ManagedBuffer): Unit = {
           promise.success(data.retain())
         }
-      })
+      }, null)
 
     ThreadUtils.awaitReady(promise.future, FiniteDuration(10, TimeUnit.SECONDS))
     promise.future.value.get
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index a8b9604899838..9d7a8696818f3 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.storage
 
+import java.io.File
 import java.nio.ByteBuffer
 
 import scala.collection.mutable.ArrayBuffer
@@ -1265,7 +1266,8 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
         port: Int,
         execId: String,
         blockIds: Array[String],
-        listener: BlockFetchingListener): Unit = {
+        listener: BlockFetchingListener,
+        shuffleFiles: Array[File]): Unit = {
       listener.onBlockFetchSuccess("mockBlockId", new NioManagedBuffer(ByteBuffer.allocate(1)))
     }
 
diff --git a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
index 9900d1edc4cb0..1f813a909fb8b 100644
--- a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.storage
 
 import java.io.{File, InputStream, IOException}
+import java.util.UUID
 import java.util.concurrent.Semaphore
 
 import scala.concurrent.ExecutionContext.Implicits.global
@@ -44,7 +45,8 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
   /** Creates a mock [[BlockTransferService]] that returns data from the given map. */
   private def createMockTransfer(data: Map[BlockId, ManagedBuffer]): BlockTransferService = {
     val transfer = mock(classOf[BlockTransferService])
-    when(transfer.fetchBlocks(any(), any(), any(), any(), any())).thenAnswer(new Answer[Unit] {
+    when(transfer.fetchBlocks(any(), any(), any(), any(), any(), any()))
+      .thenAnswer(new Answer[Unit] {
       override def answer(invocation: InvocationOnMock): Unit = {
         val blocks = invocation.getArguments()(3).asInstanceOf[Array[String]]
         val listener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
@@ -106,6 +108,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       (_, in) => in,
       48 * 1024 * 1024,
       Int.MaxValue,
+      Int.MaxValue,
       true)
 
     // 3 local blocks fetched in initialization
@@ -134,7 +137,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     // 3 local blocks, and 2 remote blocks
     // (but from the same block manager so one call to fetchBlocks)
     verify(blockManager, times(3)).getBlockData(any())
-    verify(transfer, times(1)).fetchBlocks(any(), any(), any(), any(), any())
+    verify(transfer, times(1)).fetchBlocks(any(), any(), any(), any(), any(), any())
   }
 
   test("release current unexhausted buffer in case the task completes early") {
@@ -153,7 +156,8 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     val sem = new Semaphore(0)
 
     val transfer = mock(classOf[BlockTransferService])
-    when(transfer.fetchBlocks(any(), any(), any(), any(), any())).thenAnswer(new Answer[Unit] {
+    when(transfer.fetchBlocks(any(), any(), any(), any(), any(), any()))
+      .thenAnswer(new Answer[Unit] {
       override def answer(invocation: InvocationOnMock): Unit = {
         val listener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
         Future {
@@ -181,6 +185,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       (_, in) => in,
       48 * 1024 * 1024,
       Int.MaxValue,
+      Int.MaxValue,
       true)
 
     verify(blocks(ShuffleBlockId(0, 0, 0)), times(0)).release()
@@ -218,7 +223,8 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     val sem = new Semaphore(0)
 
     val transfer = mock(classOf[BlockTransferService])
-    when(transfer.fetchBlocks(any(), any(), any(), any(), any())).thenAnswer(new Answer[Unit] {
+    when(transfer.fetchBlocks(any(), any(), any(), any(), any(), any()))
+      .thenAnswer(new Answer[Unit] {
       override def answer(invocation: InvocationOnMock): Unit = {
         val listener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
         Future {
@@ -246,6 +252,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       (_, in) => in,
       48 * 1024 * 1024,
       Int.MaxValue,
+      Int.MaxValue,
       true)
 
     // Continue only after the mock calls onBlockFetchFailure
@@ -281,7 +288,8 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     val corruptLocalBuffer = new FileSegmentManagedBuffer(null, new File("a"), 0, 100)
 
     val transfer = mock(classOf[BlockTransferService])
-    when(transfer.fetchBlocks(any(), any(), any(), any(), any())).thenAnswer(new Answer[Unit] {
+    when(transfer.fetchBlocks(any(), any(), any(), any(), any(), any()))
+      .thenAnswer(new Answer[Unit] {
       override def answer(invocation: InvocationOnMock): Unit = {
         val listener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
         Future {
@@ -309,6 +317,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       (_, in) => new LimitedInputStream(in, 100),
       48 * 1024 * 1024,
       Int.MaxValue,
+      Int.MaxValue,
       true)
 
     // Continue only after the mock calls onBlockFetchFailure
@@ -318,7 +327,8 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     val (id1, _) = iterator.next()
     assert(id1 === ShuffleBlockId(0, 0, 0))
 
-    when(transfer.fetchBlocks(any(), any(), any(), any(), any())).thenAnswer(new Answer[Unit] {
+    when(transfer.fetchBlocks(any(), any(), any(), any(), any(), any()))
+      .thenAnswer(new Answer[Unit] {
       override def answer(invocation: InvocationOnMock): Unit = {
         val listener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
         Future {
@@ -359,7 +369,8 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     when(corruptBuffer.createInputStream()).thenReturn(corruptStream)
 
     val transfer = mock(classOf[BlockTransferService])
-    when(transfer.fetchBlocks(any(), any(), any(), any(), any())).thenAnswer(new Answer[Unit] {
+    when(transfer.fetchBlocks(any(), any(), any(), any(), any(), any()))
+      .thenAnswer(new Answer[Unit] {
       override def answer(invocation: InvocationOnMock): Unit = {
         val listener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
         Future {
@@ -387,6 +398,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       (_, in) => new LimitedInputStream(in, 100),
       48 * 1024 * 1024,
       Int.MaxValue,
+      Int.MaxValue,
       false)
 
     // Continue only after the mock calls onBlockFetchFailure
@@ -401,4 +413,64 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     assert(id3 === ShuffleBlockId(0, 2, 0))
   }
 
+  test("Blocks should be shuffled to disk when size of the request is above the" +
+    " threshold(maxReqSizeShuffleToMem).") {
+    val blockManager = mock(classOf[BlockManager])
+    val localBmId = BlockManagerId("test-client", "test-client", 1)
+    doReturn(localBmId).when(blockManager).blockManagerId
+
+    val diskBlockManager = mock(classOf[DiskBlockManager])
+    doReturn{
+      var blockId = new TempLocalBlockId(UUID.randomUUID())
+      (blockId, new File(blockId.name))
+    }.when(diskBlockManager).createTempLocalBlock()
+    doReturn(diskBlockManager).when(blockManager).diskBlockManager
+
+    val remoteBmId = BlockManagerId("test-client-1", "test-client-1", 2)
+    val remoteBlocks = Map[BlockId, ManagedBuffer](
+      ShuffleBlockId(0, 0, 0) -> createMockManagedBuffer())
+    val transfer = mock(classOf[BlockTransferService])
+    var shuffleFiles: Array[File] = null
+    when(transfer.fetchBlocks(any(), any(), any(), any(), any(), any()))
+      .thenAnswer(new Answer[Unit] {
+        override def answer(invocation: InvocationOnMock): Unit = {
+          val listener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
+          shuffleFiles = invocation.getArguments()(5).asInstanceOf[Array[File]]
+          Future {
+            listener.onBlockFetchSuccess(
+              ShuffleBlockId(0, 0, 0).toString, remoteBlocks(ShuffleBlockId(0, 0, 0)))
+          }
+        }
+      })
+
+    val blocksByAddress1 = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
+      (remoteBmId, remoteBlocks.keys.map(blockId => (blockId, 100L)).toSeq))
+    // Set maxReqSizeShuffleToMem to be 200.
+    val iterator1 = new ShuffleBlockFetcherIterator(
+      TaskContext.empty(),
+      transfer,
+      blockManager,
+      blocksByAddress1,
+      (_, in) => in,
+      Int.MaxValue,
+      Int.MaxValue,
+      200,
+      true)
+    assert(shuffleFiles === null)
+
+    val blocksByAddress2 = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
+      (remoteBmId, remoteBlocks.keys.map(blockId => (blockId, 300L)).toSeq))
+    // Set maxReqSizeShuffleToMem to be 200.
+    val iterator2 = new ShuffleBlockFetcherIterator(
+      TaskContext.empty(),
+      transfer,
+      blockManager,
+      blocksByAddress2,
+      (_, in) => in,
+      Int.MaxValue,
+      Int.MaxValue,
+      200,
+      true)
+    assert(shuffleFiles != null)
+  }
 }
diff --git a/docs/configuration.md b/docs/configuration.md
index a6b6d5dfa5f95..0771e36f80b50 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -519,6 +519,14 @@ Apart from these, the following properties are also available, and may be useful
     By allowing it to limit the number of fetch requests, this scenario can be mitigated.
   </td>
 </tr>
+<tr>
+  <td><code>spark.reducer.maxReqSizeShuffleToMem</code></td>
+  <td>200m</td>
+  <td>
+    The blocks of a shuffle request will be fetched to disk when size of the request is above
+    this threshold. This is to avoid a giant request takes too much memory.
+  </td>
+</tr>
 <tr>
   <td><code>spark.shuffle.compress</code></td>
   <td>true</td>

From 9cbf39f1c74f16483865cd93d6ffc3c521e878a7 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Thu, 25 May 2017 20:15:15 +0800
Subject: [PATCH 0914/1204] [SPARK-19281][FOLLOWUP][ML] Minor fix for PySpark
 FPGrowth.

## What changes were proposed in this pull request?
Follow-up for #17218, some minor fix for PySpark ```FPGrowth```.

## How was this patch tested?
Existing UT.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #18089 from yanboliang/spark-19281.

(cherry picked from commit 913a6bfe4b0eb6b80a03b858ab4b2767194103de)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 python/pyspark/ml/fpm.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/python/pyspark/ml/fpm.py b/python/pyspark/ml/fpm.py
index b30d4edb19908..6ff7d2c9b4b52 100644
--- a/python/pyspark/ml/fpm.py
+++ b/python/pyspark/ml/fpm.py
@@ -23,17 +23,17 @@
 __all__ = ["FPGrowth", "FPGrowthModel"]
 
 
-class HasSupport(Params):
+class HasMinSupport(Params):
     """
-    Mixin for param support.
+    Mixin for param minSupport.
     """
 
     minSupport = Param(
         Params._dummy(),
         "minSupport",
-        """Minimal support level of the frequent pattern. [0.0, 1.0].
-        Any pattern that appears more than (minSupport * size-of-the-dataset)
-        times will be output""",
+        "Minimal support level of the frequent pattern. [0.0, 1.0]. " +
+        "Any pattern that appears more than (minSupport * size-of-the-dataset) " +
+        "times will be output in the frequent itemsets.",
         typeConverter=TypeConverters.toFloat)
 
     def setMinSupport(self, value):
@@ -49,16 +49,17 @@ def getMinSupport(self):
         return self.getOrDefault(self.minSupport)
 
 
-class HasConfidence(Params):
+class HasMinConfidence(Params):
     """
-    Mixin for param confidence.
+    Mixin for param minConfidence.
     """
 
     minConfidence = Param(
         Params._dummy(),
         "minConfidence",
-        """Minimal confidence for generating Association Rule. [0.0, 1.0]
-        Note that minConfidence has no effect during fitting.""",
+        "Minimal confidence for generating Association Rule. [0.0, 1.0]. " +
+        "minConfidence will not affect the mining for frequent itemsets, " +
+        "but will affect the association rules generation.",
         typeConverter=TypeConverters.toFloat)
 
     def setMinConfidence(self, value):
@@ -126,7 +127,7 @@ def associationRules(self):
 
 
 class FPGrowth(JavaEstimator, HasItemsCol, HasPredictionCol,
-               HasSupport, HasConfidence, JavaMLWritable, JavaMLReadable):
+               HasMinSupport, HasMinConfidence, JavaMLWritable, JavaMLReadable):
     """
     .. note:: Experimental
 

From e01f1f222bcb7c469b1e1595e9338ed478d99894 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Thu, 25 May 2017 21:40:39 +0800
Subject: [PATCH 0915/1204] [SPARK-20768][PYSPARK][ML] Expose numPartitions
 (expert) param of PySpark FPGrowth.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

Expose numPartitions (expert) param of PySpark FPGrowth.

## How was this patch tested?

+ [x] Pass all unit tests.

Author: Yan Facai (颜发才) <facai.yan@gmail.com>

Closes #18058 from facaiy/ENH/pyspark_fpg_add_num_partition.

(cherry picked from commit 139da116f130ed21481d3e9bdee5df4b8d7760ac)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 python/pyspark/ml/fpm.py | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/ml/fpm.py b/python/pyspark/ml/fpm.py
index 6ff7d2c9b4b52..dd7dda5f03124 100644
--- a/python/pyspark/ml/fpm.py
+++ b/python/pyspark/ml/fpm.py
@@ -49,6 +49,32 @@ def getMinSupport(self):
         return self.getOrDefault(self.minSupport)
 
 
+class HasNumPartitions(Params):
+    """
+    Mixin for param numPartitions: Number of partitions (at least 1) used by parallel FP-growth.
+    """
+
+    numPartitions = Param(
+        Params._dummy(),
+        "numPartitions",
+        "Number of partitions (at least 1) used by parallel FP-growth. " +
+        "By default the param is not set, " +
+        "and partition number of the input dataset is used.",
+        typeConverter=TypeConverters.toInt)
+
+    def setNumPartitions(self, value):
+        """
+        Sets the value of :py:attr:`numPartitions`.
+        """
+        return self._set(numPartitions=value)
+
+    def getNumPartitions(self):
+        """
+        Gets the value of :py:attr:`numPartitions` or its default value.
+        """
+        return self.getOrDefault(self.numPartitions)
+
+
 class HasMinConfidence(Params):
     """
     Mixin for param minConfidence.
@@ -127,7 +153,9 @@ def associationRules(self):
 
 
 class FPGrowth(JavaEstimator, HasItemsCol, HasPredictionCol,
-               HasMinSupport, HasMinConfidence, JavaMLWritable, JavaMLReadable):
+               HasMinSupport, HasNumPartitions, HasMinConfidence,
+               JavaMLWritable, JavaMLReadable):
+
     """
     .. note:: Experimental
 

From 022a4957d8dc8d6049e0a8c9191fcfd1bd95a4a4 Mon Sep 17 00:00:00 2001
From: Lior Regev <lioregev@gmail.com>
Date: Thu, 25 May 2017 17:08:19 +0100
Subject: [PATCH 0916/1204] [SPARK-20741][SPARK SUBMIT] Added cleanup of JARs
 archive generated by SparkSubmit

## What changes were proposed in this pull request?

Deleted generated JARs archive after distribution to HDFS

## How was this patch tested?

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Lior Regev <lioregev@gmail.com>

Closes #17986 from liorregev/master.

(cherry picked from commit 7306d556903c832984c7f34f1e8fe738a4b2343c)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../src/main/scala/org/apache/spark/deploy/yarn/Client.scala     | 1 +
 1 file changed, 1 insertion(+)

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index b817570c0abf7..9956071fd6e38 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -545,6 +545,7 @@ private[spark] class Client(
           distribute(jarsArchive.toURI.getPath,
             resType = LocalResourceType.ARCHIVE,
             destName = Some(LOCALIZED_LIB_DIR))
+          jarsArchive.delete()
       }
     }
 

From 5ae1c652147aba9c5087335b0c6916a1035090b2 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Thu, 25 May 2017 17:10:30 +0100
Subject: [PATCH 0917/1204] [SPARK-19707][SPARK-18922][TESTS][SQL][CORE] Fix
 test failures/the invalid path check for sc.addJar on Windows

## What changes were proposed in this pull request?

This PR proposes two things:

- A follow up for SPARK-19707 (Improving the invalid path check for sc.addJar on Windows as well).

```
org.apache.spark.SparkContextSuite:
 - add jar with invalid path *** FAILED *** (32 milliseconds)
   2 was not equal to 1 (SparkContextSuite.scala:309)
   ...
```

- Fix path vs URI related test failures on Windows.

```
org.apache.spark.storage.LocalDirsSuite:
 - SPARK_LOCAL_DIRS override also affects driver *** FAILED *** (0 milliseconds)
   new java.io.File("/NONEXISTENT_PATH").exists() was true (LocalDirsSuite.scala:50)
   ...

 - Utils.getLocalDir() throws an exception if any temporary directory cannot be retrieved *** FAILED *** (15 milliseconds)
   Expected exception java.io.IOException to be thrown, but no exception was thrown. (LocalDirsSuite.scala:64)
   ...
```

```
org.apache.spark.sql.hive.HiveSchemaInferenceSuite:
 - orc: schema should be inferred and saved when INFER_AND_SAVE is specified *** FAILED *** (203 milliseconds)
   java.net.URISyntaxException: Illegal character in opaque part at index 2: C:\projects\spark\target\tmp\spark-dae61ab3-a851-4dd3-bf4e-be97c501f254
   ...

 - parquet: schema should be inferred and saved when INFER_AND_SAVE is specified *** FAILED *** (203 milliseconds)
   java.net.URISyntaxException: Illegal character in opaque part at index 2: C:\projects\spark\target\tmp\spark-fa3aff89-a66e-4376-9a37-2a9b87596939
   ...

 - orc: schema should be inferred but not stored when INFER_ONLY is specified *** FAILED *** (141 milliseconds)
   java.net.URISyntaxException: Illegal character in opaque part at index 2: C:\projects\spark\target\tmp\spark-fb464e59-b049-481b-9c75-f53295c9fc2c
   ...

 - parquet: schema should be inferred but not stored when INFER_ONLY is specified *** FAILED *** (125 milliseconds)
   java.net.URISyntaxException: Illegal character in opaque part at index 2: C:\projects\spark\target\tmp\spark-9487568e-80a4-42b3-b0a5-d95314c4ccbc
   ...

 - orc: schema should not be inferred when NEVER_INFER is specified *** FAILED *** (156 milliseconds)
   java.net.URISyntaxException: Illegal character in opaque part at index 2: C:\projects\spark\target\tmp\spark-0d2dfa45-1b0f-4958-a8be-1074ed0135a
   ...

 - parquet: schema should not be inferred when NEVER_INFER is specified *** FAILED *** (547 milliseconds)
   java.net.URISyntaxException: Illegal character in opaque part at index 2: C:\projects\spark\target\tmp\spark-6d95d64e-613e-4a59-a0f6-d198c5aa51ee
   ...
```

```
org.apache.spark.sql.execution.command.DDLSuite:
 - create temporary view using *** FAILED *** (15 milliseconds)
   org.apache.spark.sql.AnalysisException: Path does not exist: file:/C:projectsspark	arget	mpspark-3881d9ca-561b-488d-90b9-97587472b853	mp;
   ...

 - insert data to a data source table which has a non-existing location should succeed *** FAILED *** (109 milliseconds)
   file:/C:projectsspark%09arget%09mpspark-4cad3d19-6085-4b75-b407-fe5e9d21df54 did not equal file:///C:/projects/spark/target/tmp/spark-4cad3d19-6085-4b75-b407-fe5e9d21df54 (DDLSuite.scala:1869)
   ...

 - insert into a data source table with a non-existing partition location should succeed *** FAILED *** (94 milliseconds)
   file:/C:projectsspark%09arget%09mpspark-4b52e7de-e3aa-42fd-95d4-6d4d58d1d95d did not equal file:///C:/projects/spark/target/tmp/spark-4b52e7de-e3aa-42fd-95d4-6d4d58d1d95d (DDLSuite.scala:1910)
   ...

 - read data from a data source table which has a non-existing location should succeed *** FAILED *** (93 milliseconds)
   file:/C:projectsspark%09arget%09mpspark-f8c281e2-08c2-4f73-abbf-f3865b702c34 did not equal file:///C:/projects/spark/target/tmp/spark-f8c281e2-08c2-4f73-abbf-f3865b702c34 (DDLSuite.scala:1937)
   ...

 - read data from a data source table with non-existing partition location should succeed *** FAILED *** (110 milliseconds)
   java.lang.IllegalArgumentException: Can not create a Path from an empty string
   ...

 - create datasource table with a non-existing location *** FAILED *** (94 milliseconds)
   file:/C:projectsspark%09arget%09mpspark-387316ae-070c-4e78-9b78-19ebf7b29ec8 did not equal file:///C:/projects/spark/target/tmp/spark-387316ae-070c-4e78-9b78-19ebf7b29ec8 (DDLSuite.scala:1982)
   ...

 - CTAS for external data source table with a non-existing location *** FAILED *** (16 milliseconds)
   java.lang.IllegalArgumentException: Can not create a Path from an empty string
   ...

 - CTAS for external data source table with a existed location *** FAILED *** (15 milliseconds)
   java.lang.IllegalArgumentException: Can not create a Path from an empty string
   ...

 - data source table:partition column name containing a b *** FAILED *** (125 milliseconds)
   java.lang.IllegalArgumentException: Can not create a Path from an empty string
   ...

 - data source table:partition column name containing a:b *** FAILED *** (143 milliseconds)
   java.lang.IllegalArgumentException: Can not create a Path from an empty string
   ...

 - data source table:partition column name containing a%b *** FAILED *** (109 milliseconds)
   java.lang.IllegalArgumentException: Can not create a Path from an empty string
   ...

 - data source table:partition column name containing a,b *** FAILED *** (109 milliseconds)
   java.lang.IllegalArgumentException: Can not create a Path from an empty string
   ...

 - location uri contains a b for datasource table *** FAILED *** (94 milliseconds)
   file:/C:projectsspark%09arget%09mpspark-5739cda9-b702-4e14-932c-42e8c4174480a%20b did not equal file:///C:/projects/spark/target/tmp/spark-5739cda9-b702-4e14-932c-42e8c4174480/a%20b (DDLSuite.scala:2084)
   ...

 - location uri contains a:b for datasource table *** FAILED *** (78 milliseconds)
   file:/C:projectsspark%09arget%09mpspark-9bdd227c-840f-4f08-b7c5-4036638f098da:b did not equal file:///C:/projects/spark/target/tmp/spark-9bdd227c-840f-4f08-b7c5-4036638f098d/a:b (DDLSuite.scala:2084)
   ...

 - location uri contains a%b for datasource table *** FAILED *** (78 milliseconds)
   file:/C:projectsspark%09arget%09mpspark-62bb5f1d-fa20-460a-b534-cb2e172a3640a%25b did not equal file:///C:/projects/spark/target/tmp/spark-62bb5f1d-fa20-460a-b534-cb2e172a3640/a%25b (DDLSuite.scala:2084)
   ...

 - location uri contains a b for database *** FAILED *** (16 milliseconds)
   org.apache.spark.sql.AnalysisException: org.apache.hadoop.hive.ql.metadata.HiveException: MetaException(message:java.lang.IllegalArgumentException: Can not create a Path from an empty string);
   ...

 - location uri contains a:b for database *** FAILED *** (15 milliseconds)
   org.apache.spark.sql.AnalysisException: org.apache.hadoop.hive.ql.metadata.HiveException: MetaException(message:java.lang.IllegalArgumentException: Can not create a Path from an empty string);
   ...

 - location uri contains a%b for database *** FAILED *** (0 milliseconds)
   org.apache.spark.sql.AnalysisException: org.apache.hadoop.hive.ql.metadata.HiveException: MetaException(message:java.lang.IllegalArgumentException: Can not create a Path from an empty string);
   ...
```

```
org.apache.spark.sql.hive.execution.HiveDDLSuite:
 - create hive table with a non-existing location *** FAILED *** (16 milliseconds)
   org.apache.spark.sql.AnalysisException: org.apache.hadoop.hive.ql.metadata.HiveException: MetaException(message:java.lang.IllegalArgumentException: Can not create a Path from an empty string);
   ...

 - CTAS for external hive table with a non-existing location *** FAILED *** (16 milliseconds)
   org.apache.spark.sql.AnalysisException: org.apache.hadoop.hive.ql.metadata.HiveException: MetaException(message:java.lang.IllegalArgumentException: Can not create a Path from an empty string);
   ...

 - CTAS for external hive table with a existed location *** FAILED *** (16 milliseconds)
   org.apache.spark.sql.AnalysisException: org.apache.hadoop.hive.ql.metadata.HiveException: MetaException(message:java.lang.IllegalArgumentException: Can not create a Path from an empty string);
   ...

 - partition column name of parquet table containing a b *** FAILED *** (156 milliseconds)
   java.lang.IllegalArgumentException: Can not create a Path from an empty string
   ...

 - partition column name of parquet table containing a:b *** FAILED *** (94 milliseconds)
   java.lang.IllegalArgumentException: Can not create a Path from an empty string
   ...

 - partition column name of parquet table containing a%b *** FAILED *** (125 milliseconds)
   java.lang.IllegalArgumentException: Can not create a Path from an empty string
   ...

 - partition column name of parquet table containing a,b *** FAILED *** (110 milliseconds)
   java.lang.IllegalArgumentException: Can not create a Path from an empty string
   ...

 - partition column name of hive table containing a b *** FAILED *** (15 milliseconds)
   org.apache.spark.sql.AnalysisException: org.apache.hadoop.hive.ql.metadata.HiveException: MetaException(message:java.lang.IllegalArgumentException: Can not create a Path from an empty string);
   ...

 - partition column name of hive table containing a:b *** FAILED *** (16 milliseconds)
   org.apache.spark.sql.AnalysisException: org.apache.hadoop.hive.ql.metadata.HiveException: MetaException(message:java.lang.IllegalArgumentException: Can not create a Path from an empty string);
   ...

 - partition column name of hive table containing a%b *** FAILED *** (16 milliseconds)
   org.apache.spark.sql.AnalysisException: org.apache.hadoop.hive.ql.metadata.HiveException: MetaException(message:java.lang.IllegalArgumentException: Can not create a Path from an empty string);
   ...

 - partition column name of hive table containing a,b *** FAILED *** (0 milliseconds)
   org.apache.spark.sql.AnalysisException: org.apache.hadoop.hive.ql.metadata.HiveException: MetaException(message:java.lang.IllegalArgumentException: Can not create a Path from an empty string);
   ...

 - hive table: location uri contains a b *** FAILED *** (0 milliseconds)
   org.apache.spark.sql.AnalysisException: org.apache.hadoop.hive.ql.metadata.HiveException: MetaException(message:java.lang.IllegalArgumentException: Can not create a Path from an empty string);
   ...

 - hive table: location uri contains a:b *** FAILED *** (0 milliseconds)
   org.apache.spark.sql.AnalysisException: org.apache.hadoop.hive.ql.metadata.HiveException: MetaException(message:java.lang.IllegalArgumentException: Can not create a Path from an empty string);
   ...

 - hive table: location uri contains a%b *** FAILED *** (0 milliseconds)
   org.apache.spark.sql.AnalysisException: org.apache.hadoop.hive.ql.metadata.HiveException: MetaException(message:java.lang.IllegalArgumentException: Can not create a Path from an empty string);
   ...
```

```
org.apache.spark.sql.sources.PathOptionSuite:
 - path option also exist for write path *** FAILED *** (94 milliseconds)
   file:/C:projectsspark%09arget%09mpspark-2870b281-7ac0-43d6-b6b6-134e01ab6fdc did not equal file:///C:/projects/spark/target/tmp/spark-2870b281-7ac0-43d6-b6b6-134e01ab6fdc (PathOptionSuite.scala:98)
   ...
```

```
org.apache.spark.sql.CachedTableSuite:
 - SPARK-19765: UNCACHE TABLE should un-cache all cached plans that refer to this table *** FAILED *** (110 milliseconds)
   java.lang.IllegalArgumentException: Can not create a Path from an empty string
   ...
```

```
org.apache.spark.sql.execution.DataSourceScanExecRedactionSuite:
 - treeString is redacted *** FAILED *** (250 milliseconds)
   "file:/C:/projects/spark/target/tmp/spark-3ecc1fa4-3e76-489c-95f4-f0b0500eae28" did not contain "C:\projects\spark\target\tmp\spark-3ecc1fa4-3e76-489c-95f4-f0b0500eae28" (DataSourceScanExecRedactionSuite.scala:46)
   ...
```

## How was this patch tested?

Tested via AppVeyor for each and checked it passed once each. These should be retested via AppVeyor in this PR.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #17987 from HyukjinKwon/windows-20170515.

(cherry picked from commit e9f983df275c138626af35fd263a7abedf69297f)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../scala/org/apache/spark/SparkContext.scala | 47 ++++++-----
 .../org/apache/spark/SparkContextSuite.scala  |  6 +-
 .../apache/spark/storage/LocalDirsSuite.scala | 33 +++++++-
 .../apache/spark/sql/CachedTableSuite.scala   |  2 +-
 .../DataSourceScanExecRedactionSuite.scala    |  2 +-
 .../sql/execution/command/DDLSuite.scala      | 81 +++++++++++++------
 .../spark/sql/sources/PathOptionSuite.scala   |  4 +-
 .../sql/hive/HiveSchemaInferenceSuite.scala   |  2 +-
 .../sql/hive/execution/HiveDDLSuite.scala     | 41 ++++++----
 9 files changed, 145 insertions(+), 73 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 7dbceb9c5c1a3..1a2443f7ee78d 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1801,40 +1801,39 @@ class SparkContext(config: SparkConf) extends Logging {
    * an HTTP, HTTPS or FTP URI, or local:/path for a file on every worker node.
    */
   def addJar(path: String) {
+    def addJarFile(file: File): String = {
+      try {
+        if (!file.exists()) {
+          throw new FileNotFoundException(s"Jar ${file.getAbsolutePath} not found")
+        }
+        if (file.isDirectory) {
+          throw new IllegalArgumentException(
+            s"Directory ${file.getAbsoluteFile} is not allowed for addJar")
+        }
+        env.rpcEnv.fileServer.addJar(file)
+      } catch {
+        case NonFatal(e) =>
+          logError(s"Failed to add $path to Spark environment", e)
+          null
+      }
+    }
+
     if (path == null) {
       logWarning("null specified as parameter to addJar")
     } else {
-      var key = ""
-      if (path.contains("\\")) {
+      val key = if (path.contains("\\")) {
         // For local paths with backslashes on Windows, URI throws an exception
-        key = env.rpcEnv.fileServer.addJar(new File(path))
+        addJarFile(new File(path))
       } else {
         val uri = new URI(path)
         // SPARK-17650: Make sure this is a valid URL before adding it to the list of dependencies
         Utils.validateURL(uri)
-        key = uri.getScheme match {
+        uri.getScheme match {
           // A JAR file which exists only on the driver node
-          case null | "file" =>
-            try {
-              val file = new File(uri.getPath)
-              if (!file.exists()) {
-                throw new FileNotFoundException(s"Jar ${file.getAbsolutePath} not found")
-              }
-              if (file.isDirectory) {
-                throw new IllegalArgumentException(
-                  s"Directory ${file.getAbsoluteFile} is not allowed for addJar")
-              }
-              env.rpcEnv.fileServer.addJar(new File(uri.getPath))
-            } catch {
-              case NonFatal(e) =>
-                logError(s"Failed to add $path to Spark environment", e)
-                null
-            }
+          case null | "file" => addJarFile(new File(uri.getPath))
           // A JAR file which exists locally on every worker node
-          case "local" =>
-            "file:" + uri.getPath
-          case _ =>
-            path
+          case "local" => "file:" + uri.getPath
+          case _ => path
         }
       }
       if (key != null) {
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
index 27945a9a5ede8..979270a527a68 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -300,13 +300,13 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
     sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
     sc.addJar(tmpJar.getAbsolutePath)
 
-    // Invaid jar path will only print the error log, will not add to file server.
+    // Invalid jar path will only print the error log, will not add to file server.
     sc.addJar("dummy.jar")
     sc.addJar("")
     sc.addJar(tmpDir.getAbsolutePath)
 
-    sc.listJars().size should be (1)
-    sc.listJars().head should include (tmpJar.getName)
+    assert(sc.listJars().size == 1)
+    assert(sc.listJars().head.contains(tmpJar.getName))
   }
 
   test("Cancelling job group should not cause SparkContext to shutdown (SPARK-6414)") {
diff --git a/core/src/test/scala/org/apache/spark/storage/LocalDirsSuite.scala b/core/src/test/scala/org/apache/spark/storage/LocalDirsSuite.scala
index f7b3a2754f0ea..6883eb211efd6 100644
--- a/core/src/test/scala/org/apache/spark/storage/LocalDirsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/LocalDirsSuite.scala
@@ -37,27 +37,50 @@ class LocalDirsSuite extends SparkFunSuite with BeforeAndAfter {
     Utils.clearLocalRootDirs()
   }
 
+  private def assumeNonExistentAndNotCreatable(f: File): Unit = {
+    try {
+      assume(!f.exists() && !f.mkdirs())
+    } finally {
+      Utils.deleteRecursively(f)
+    }
+  }
+
   test("Utils.getLocalDir() returns a valid directory, even if some local dirs are missing") {
     // Regression test for SPARK-2974
-    assert(!new File("/NONEXISTENT_PATH").exists())
+    val f = new File("/NONEXISTENT_PATH")
+    assumeNonExistentAndNotCreatable(f)
+
     val conf = new SparkConf(false)
       .set("spark.local.dir", s"/NONEXISTENT_PATH,${System.getProperty("java.io.tmpdir")}")
     assert(new File(Utils.getLocalDir(conf)).exists())
+
+    // This directory should not be created.
+    assert(!f.exists())
   }
 
   test("SPARK_LOCAL_DIRS override also affects driver") {
-    // Regression test for SPARK-2975
-    assert(!new File("/NONEXISTENT_PATH").exists())
+    // Regression test for SPARK-2974
+    val f = new File("/NONEXISTENT_PATH")
+    assumeNonExistentAndNotCreatable(f)
+
     // spark.local.dir only contains invalid directories, but that's not a problem since
     // SPARK_LOCAL_DIRS will override it on both the driver and workers:
     val conf = new SparkConfWithEnv(Map("SPARK_LOCAL_DIRS" -> System.getProperty("java.io.tmpdir")))
       .set("spark.local.dir", "/NONEXISTENT_PATH")
     assert(new File(Utils.getLocalDir(conf)).exists())
+
+    // This directory should not be created.
+    assert(!f.exists())
   }
 
   test("Utils.getLocalDir() throws an exception if any temporary directory cannot be retrieved") {
     val path1 = "/NONEXISTENT_PATH_ONE"
     val path2 = "/NONEXISTENT_PATH_TWO"
+    val f1 = new File(path1)
+    val f2 = new File(path2)
+    assumeNonExistentAndNotCreatable(f1)
+    assumeNonExistentAndNotCreatable(f2)
+
     assert(!new File(path1).exists())
     assert(!new File(path2).exists())
     val conf = new SparkConf(false).set("spark.local.dir", s"$path1,$path2")
@@ -67,5 +90,9 @@ class LocalDirsSuite extends SparkFunSuite with BeforeAndAfter {
     // If any temporary directory could not be retrieved under the given paths above, it should
     // throw an exception with the message that includes the paths.
     assert(message.contains(s"$path1,$path2"))
+
+    // These directories should not be created.
+    assert(!f1.exists())
+    assert(!f2.exists())
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index e66fe97afad45..3ad526873f5d2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -647,7 +647,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
     withTable("t") {
       withTempPath { path =>
         Seq(1 -> "a").toDF("i", "j").write.parquet(path.getCanonicalPath)
-        sql(s"CREATE TABLE t USING parquet LOCATION '$path'")
+        sql(s"CREATE TABLE t USING parquet LOCATION '${path.toURI}'")
         spark.catalog.cacheTable("t")
         spark.table("t").select($"i").cache()
         checkAnswer(spark.table("t").select($"i"), Row(1))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala
index f7f1ccea281c1..423e1288e8dcb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala
@@ -38,7 +38,7 @@ class DataSourceScanExecRedactionSuite extends QueryTest with SharedSQLContext {
 
       val rootPath = df.queryExecution.sparkPlan.find(_.isInstanceOf[FileSourceScanExec]).get
         .asInstanceOf[FileSourceScanExec].relation.location.rootPaths.head
-      assert(rootPath.toString.contains(basePath.toString))
+      assert(rootPath.toString.contains(dir.toURI.getPath.stripSuffix("/")))
 
       assert(!df.queryExecution.sparkPlan.treeString(verbose = true).contains(rootPath.getName))
       assert(!df.queryExecution.executedPlan.treeString(verbose = true).contains(rootPath.getName))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 0abcff76060f7..e4dd077715d0f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -702,7 +702,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       withView("testview") {
         sql(s"CREATE OR REPLACE TEMPORARY VIEW testview (c1 String, c2 String)  USING " +
           "org.apache.spark.sql.execution.datasources.csv.CSVFileFormat  " +
-          s"OPTIONS (PATH '$tmpFile')")
+          s"OPTIONS (PATH '${tmpFile.toURI}')")
 
         checkAnswer(
           sql("select c1, c2 from testview order by c1 limit 1"),
@@ -714,7 +714,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
             s"""
                |CREATE TEMPORARY VIEW testview
                |USING org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
-               |OPTIONS (PATH '$tmpFile')
+               |OPTIONS (PATH '${tmpFile.toURI}')
              """.stripMargin)
         }
       }
@@ -1835,7 +1835,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
           s"""
              |CREATE TABLE t(a string, b int)
              |USING parquet
-             |OPTIONS(path "$dir")
+             |OPTIONS(path "${dir.toURI}")
            """.stripMargin)
         val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
         assert(table.location == makeQualifiedPath(dir.getAbsolutePath))
@@ -1853,12 +1853,12 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
         checkAnswer(spark.table("t"), Row("c", 1) :: Nil)
 
         val newDirFile = new File(dir, "x")
-        val newDir = newDirFile.getAbsolutePath
+        val newDir = newDirFile.toURI
         spark.sql(s"ALTER TABLE t SET LOCATION '$newDir'")
         spark.sessionState.catalog.refreshTable(TableIdentifier("t"))
 
         val table1 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
-        assert(table1.location == new URI(newDir))
+        assert(table1.location == newDir)
         assert(!newDirFile.exists)
 
         spark.sql("INSERT INTO TABLE t SELECT 'c', 1")
@@ -1876,7 +1876,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
              |CREATE TABLE t(a int, b int, c int, d int)
              |USING parquet
              |PARTITIONED BY(a, b)
-             |LOCATION "$dir"
+             |LOCATION "${dir.toURI}"
            """.stripMargin)
         val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
         assert(table.location == makeQualifiedPath(dir.getAbsolutePath))
@@ -1902,7 +1902,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
           s"""
              |CREATE TABLE t(a string, b int)
              |USING parquet
-             |OPTIONS(path "$dir")
+             |OPTIONS(path "${dir.toURI}")
            """.stripMargin)
         val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
 
@@ -1931,7 +1931,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
              |CREATE TABLE t(a int, b int, c int, d int)
              |USING parquet
              |PARTITIONED BY(a, b)
-             |LOCATION "$dir"
+             |LOCATION "${dir.toURI}"
            """.stripMargin)
         spark.sql("INSERT INTO TABLE t PARTITION(a=1, b=2) SELECT 3, 4")
         checkAnswer(spark.table("t"), Row(3, 4, 1, 2) :: Nil)
@@ -1948,7 +1948,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
   test("create datasource table with a non-existing location") {
     withTable("t", "t1") {
       withTempPath { dir =>
-        spark.sql(s"CREATE TABLE t(a int, b int) USING parquet LOCATION '$dir'")
+        spark.sql(s"CREATE TABLE t(a int, b int) USING parquet LOCATION '${dir.toURI}'")
 
         val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
         assert(table.location == makeQualifiedPath(dir.getAbsolutePath))
@@ -1960,7 +1960,8 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       }
       // partition table
       withTempPath { dir =>
-        spark.sql(s"CREATE TABLE t1(a int, b int) USING parquet PARTITIONED BY(a) LOCATION '$dir'")
+        spark.sql(
+          s"CREATE TABLE t1(a int, b int) USING parquet PARTITIONED BY(a) LOCATION '${dir.toURI}'")
 
         val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t1"))
         assert(table.location == makeQualifiedPath(dir.getAbsolutePath))
@@ -1985,7 +1986,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
             s"""
                |CREATE TABLE t
                |USING parquet
-               |LOCATION '$dir'
+               |LOCATION '${dir.toURI}'
                |AS SELECT 3 as a, 4 as b, 1 as c, 2 as d
              """.stripMargin)
           val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
@@ -2001,7 +2002,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
                |CREATE TABLE t1
                |USING parquet
                |PARTITIONED BY(a, b)
-               |LOCATION '$dir'
+               |LOCATION '${dir.toURI}'
                |AS SELECT 3 as a, 4 as b, 1 as c, 2 as d
              """.stripMargin)
           val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t1"))
@@ -2018,6 +2019,10 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
 
   Seq("a b", "a:b", "a%b", "a,b").foreach { specialChars =>
     test(s"data source table:partition column name containing $specialChars") {
+      // On Windows, it looks colon in the file name is illegal by default. See
+      // https://support.microsoft.com/en-us/help/289627
+      assume(!Utils.isWindows || specialChars != "a:b")
+
       withTable("t") {
         withTempDir { dir =>
           spark.sql(
@@ -2025,14 +2030,14 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
                |CREATE TABLE t(a string, `$specialChars` string)
                |USING parquet
                |PARTITIONED BY(`$specialChars`)
-               |LOCATION '$dir'
+               |LOCATION '${dir.toURI}'
              """.stripMargin)
 
           assert(dir.listFiles().isEmpty)
           spark.sql(s"INSERT INTO TABLE t PARTITION(`$specialChars`=2) SELECT 1")
           val partEscaped = s"${ExternalCatalogUtils.escapePathName(specialChars)}=2"
           val partFile = new File(dir, partEscaped)
-          assert(partFile.listFiles().length >= 1)
+          assert(partFile.listFiles().nonEmpty)
           checkAnswer(spark.table("t"), Row("1", "2") :: Nil)
         }
       }
@@ -2041,15 +2046,22 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
 
   Seq("a b", "a:b", "a%b").foreach { specialChars =>
     test(s"location uri contains $specialChars for datasource table") {
+      // On Windows, it looks colon in the file name is illegal by default. See
+      // https://support.microsoft.com/en-us/help/289627
+      assume(!Utils.isWindows || specialChars != "a:b")
+
       withTable("t", "t1") {
         withTempDir { dir =>
           val loc = new File(dir, specialChars)
           loc.mkdir()
+          // The parser does not recognize the backslashes on Windows as they are.
+          // These currently should be escaped.
+          val escapedLoc = loc.getAbsolutePath.replace("\\", "\\\\")
           spark.sql(
             s"""
                |CREATE TABLE t(a string)
                |USING parquet
-               |LOCATION '$loc'
+               |LOCATION '$escapedLoc'
              """.stripMargin)
 
           val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
@@ -2058,19 +2070,22 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
 
           assert(loc.listFiles().isEmpty)
           spark.sql("INSERT INTO TABLE t SELECT 1")
-          assert(loc.listFiles().length >= 1)
+          assert(loc.listFiles().nonEmpty)
           checkAnswer(spark.table("t"), Row("1") :: Nil)
         }
 
         withTempDir { dir =>
           val loc = new File(dir, specialChars)
           loc.mkdir()
+          // The parser does not recognize the backslashes on Windows as they are.
+          // These currently should be escaped.
+          val escapedLoc = loc.getAbsolutePath.replace("\\", "\\\\")
           spark.sql(
             s"""
                |CREATE TABLE t1(a string, b string)
                |USING parquet
                |PARTITIONED BY(b)
-               |LOCATION '$loc'
+               |LOCATION '$escapedLoc'
              """.stripMargin)
 
           val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t1"))
@@ -2080,15 +2095,20 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
           assert(loc.listFiles().isEmpty)
           spark.sql("INSERT INTO TABLE t1 PARTITION(b=2) SELECT 1")
           val partFile = new File(loc, "b=2")
-          assert(partFile.listFiles().length >= 1)
+          assert(partFile.listFiles().nonEmpty)
           checkAnswer(spark.table("t1"), Row("1", "2") :: Nil)
 
           spark.sql("INSERT INTO TABLE t1 PARTITION(b='2017-03-03 12:13%3A14') SELECT 1")
           val partFile1 = new File(loc, "b=2017-03-03 12:13%3A14")
           assert(!partFile1.exists())
-          val partFile2 = new File(loc, "b=2017-03-03 12%3A13%253A14")
-          assert(partFile2.listFiles().length >= 1)
-          checkAnswer(spark.table("t1"), Row("1", "2") :: Row("1", "2017-03-03 12:13%3A14") :: Nil)
+
+          if (!Utils.isWindows) {
+            // Actual path becomes "b=2017-03-03%2012%3A13%253A14" on Windows.
+            val partFile2 = new File(loc, "b=2017-03-03 12%3A13%253A14")
+            assert(partFile2.listFiles().nonEmpty)
+            checkAnswer(
+              spark.table("t1"), Row("1", "2") :: Row("1", "2017-03-03 12:13%3A14") :: Nil)
+          }
         }
       }
     }
@@ -2096,11 +2116,18 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
 
   Seq("a b", "a:b", "a%b").foreach { specialChars =>
     test(s"location uri contains $specialChars for database") {
+      // On Windows, it looks colon in the file name is illegal by default. See
+      // https://support.microsoft.com/en-us/help/289627
+      assume(!Utils.isWindows || specialChars != "a:b")
+
       withDatabase ("tmpdb") {
         withTable("t") {
           withTempDir { dir =>
             val loc = new File(dir, specialChars)
-            spark.sql(s"CREATE DATABASE tmpdb LOCATION '$loc'")
+            // The parser does not recognize the backslashes on Windows as they are.
+            // These currently should be escaped.
+            val escapedLoc = loc.getAbsolutePath.replace("\\", "\\\\")
+            spark.sql(s"CREATE DATABASE tmpdb LOCATION '$escapedLoc'")
             spark.sql("USE tmpdb")
 
             import testImplicits._
@@ -2119,11 +2146,14 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     withTable("t", "t1") {
       withTempDir { dir =>
         assert(!dir.getAbsolutePath.startsWith("file:/"))
+        // The parser does not recognize the backslashes on Windows as they are.
+        // These currently should be escaped.
+        val escapedDir = dir.getAbsolutePath.replace("\\", "\\\\")
         spark.sql(
           s"""
              |CREATE TABLE t(a string)
              |USING parquet
-             |LOCATION '$dir'
+             |LOCATION '$escapedDir'
            """.stripMargin)
         val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
         assert(table.location.toString.startsWith("file:/"))
@@ -2131,12 +2161,15 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
 
       withTempDir { dir =>
         assert(!dir.getAbsolutePath.startsWith("file:/"))
+        // The parser does not recognize the backslashes on Windows as they are.
+        // These currently should be escaped.
+        val escapedDir = dir.getAbsolutePath.replace("\\", "\\\\")
         spark.sql(
           s"""
              |CREATE TABLE t1(a string, b string)
              |USING parquet
              |PARTITIONED BY(b)
-             |LOCATION '$dir'
+             |LOCATION '$escapedDir'
            """.stripMargin)
         val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t1"))
         assert(table.location.toString.startsWith("file:/"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala
index 6dd4847ead738..c25c3f62158cf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala
@@ -92,12 +92,12 @@ class PathOptionSuite extends DataSourceTest with SharedSQLContext {
           s"""
             |CREATE TABLE src
             |USING ${classOf[TestOptionsSource].getCanonicalName}
-            |OPTIONS (PATH '$p')
+            |OPTIONS (PATH '${p.toURI}')
             |AS SELECT 1
           """.stripMargin)
         assert(
           spark.table("src").schema.head.metadata.getString("path") ==
-          p.getAbsolutePath)
+          p.toURI.toString)
       }
     }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala
index 319d02613f00a..b3a06045b5fd4 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala
@@ -104,7 +104,7 @@ class HiveSchemaInferenceSuite
         identifier = TableIdentifier(table = TEST_TABLE_NAME, database = Option(DATABASE)),
         tableType = CatalogTableType.EXTERNAL,
         storage = CatalogStorageFormat(
-          locationUri = Option(new java.net.URI(dir.getAbsolutePath)),
+          locationUri = Option(dir.toURI),
           inputFormat = serde.inputFormat,
           outputFormat = serde.outputFormat,
           serde = serde.serde,
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index c3d734e5a0366..b282acf5b557b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -36,6 +36,7 @@ import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types._
+import org.apache.spark.util.Utils
 
 // TODO(gatorsmile): combine HiveCatalogedDDLSuite and HiveDDLSuite
 class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeAndAfterEach {
@@ -1636,7 +1637,7 @@ class HiveDDLSuite
   test("create hive table with a non-existing location") {
     withTable("t", "t1") {
       withTempPath { dir =>
-        spark.sql(s"CREATE TABLE t(a int, b int) USING hive LOCATION '$dir'")
+        spark.sql(s"CREATE TABLE t(a int, b int) USING hive LOCATION '${dir.toURI}'")
 
         val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
         assert(table.location == makeQualifiedPath(dir.getAbsolutePath))
@@ -1653,7 +1654,7 @@ class HiveDDLSuite
              |CREATE TABLE t1(a int, b int)
              |USING hive
              |PARTITIONED BY(a)
-             |LOCATION '$dir'
+             |LOCATION '${dir.toURI}'
            """.stripMargin)
 
         val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t1"))
@@ -1681,7 +1682,7 @@ class HiveDDLSuite
               s"""
                  |CREATE TABLE t
                  |USING hive
-                 |LOCATION '$dir'
+                 |LOCATION '${dir.toURI}'
                  |AS SELECT 3 as a, 4 as b, 1 as c, 2 as d
                """.stripMargin)
             val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
@@ -1697,7 +1698,7 @@ class HiveDDLSuite
                  |CREATE TABLE t1
                  |USING hive
                  |PARTITIONED BY(a, b)
-                 |LOCATION '$dir'
+                 |LOCATION '${dir.toURI}'
                  |AS SELECT 3 as a, 4 as b, 1 as c, 2 as d
                """.stripMargin)
             val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t1"))
@@ -1723,21 +1724,21 @@ class HiveDDLSuite
                  |CREATE TABLE t(a string, `$specialChars` string)
                  |USING $datasource
                  |PARTITIONED BY(`$specialChars`)
-                 |LOCATION '$dir'
+                 |LOCATION '${dir.toURI}'
                """.stripMargin)
 
             assert(dir.listFiles().isEmpty)
             spark.sql(s"INSERT INTO TABLE t PARTITION(`$specialChars`=2) SELECT 1")
             val partEscaped = s"${ExternalCatalogUtils.escapePathName(specialChars)}=2"
             val partFile = new File(dir, partEscaped)
-            assert(partFile.listFiles().length >= 1)
+            assert(partFile.listFiles().nonEmpty)
             checkAnswer(spark.table("t"), Row("1", "2") :: Nil)
 
             withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
               spark.sql(s"INSERT INTO TABLE t PARTITION(`$specialChars`) SELECT 3, 4")
               val partEscaped1 = s"${ExternalCatalogUtils.escapePathName(specialChars)}=4"
               val partFile1 = new File(dir, partEscaped1)
-              assert(partFile1.listFiles().length >= 1)
+              assert(partFile1.listFiles().nonEmpty)
               checkAnswer(spark.table("t"), Row("1", "2") :: Row("3", "4") :: Nil)
             }
           }
@@ -1748,15 +1749,22 @@ class HiveDDLSuite
 
   Seq("a b", "a:b", "a%b").foreach { specialChars =>
     test(s"hive table: location uri contains $specialChars") {
+      // On Windows, it looks colon in the file name is illegal by default. See
+      // https://support.microsoft.com/en-us/help/289627
+      assume(!Utils.isWindows || specialChars != "a:b")
+
       withTable("t") {
         withTempDir { dir =>
           val loc = new File(dir, specialChars)
           loc.mkdir()
+          // The parser does not recognize the backslashes on Windows as they are.
+          // These currently should be escaped.
+          val escapedLoc = loc.getAbsolutePath.replace("\\", "\\\\")
           spark.sql(
             s"""
                |CREATE TABLE t(a string)
                |USING hive
-               |LOCATION '$loc'
+               |LOCATION '$escapedLoc'
              """.stripMargin)
 
           val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
@@ -1779,12 +1787,13 @@ class HiveDDLSuite
         withTempDir { dir =>
           val loc = new File(dir, specialChars)
           loc.mkdir()
+          val escapedLoc = loc.getAbsolutePath.replace("\\", "\\\\")
           spark.sql(
             s"""
                |CREATE TABLE t1(a string, b string)
                |USING hive
                |PARTITIONED BY(b)
-               |LOCATION '$loc'
+               |LOCATION '$escapedLoc'
              """.stripMargin)
 
           val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t1"))
@@ -1795,16 +1804,20 @@ class HiveDDLSuite
           if (specialChars != "a:b") {
             spark.sql("INSERT INTO TABLE t1 PARTITION(b=2) SELECT 1")
             val partFile = new File(loc, "b=2")
-            assert(partFile.listFiles().length >= 1)
+            assert(partFile.listFiles().nonEmpty)
             checkAnswer(spark.table("t1"), Row("1", "2") :: Nil)
 
             spark.sql("INSERT INTO TABLE t1 PARTITION(b='2017-03-03 12:13%3A14') SELECT 1")
             val partFile1 = new File(loc, "b=2017-03-03 12:13%3A14")
             assert(!partFile1.exists())
-            val partFile2 = new File(loc, "b=2017-03-03 12%3A13%253A14")
-            assert(partFile2.listFiles().length >= 1)
-            checkAnswer(spark.table("t1"),
-              Row("1", "2") :: Row("1", "2017-03-03 12:13%3A14") :: Nil)
+
+            if (!Utils.isWindows) {
+              // Actual path becomes "b=2017-03-03%2012%3A13%253A14" on Windows.
+              val partFile2 = new File(loc, "b=2017-03-03 12%3A13%253A14")
+              assert(partFile2.listFiles().nonEmpty)
+              checkAnswer(spark.table("t1"),
+                Row("1", "2") :: Row("1", "2017-03-03 12:13%3A14") :: Nil)
+            }
           } else {
             val e = intercept[AnalysisException] {
               spark.sql("INSERT INTO TABLE t1 PARTITION(b=2) SELECT 1")

From 7a21de9e2bb0d9344a371a8570b2fffa68c3236e Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Thu, 25 May 2017 10:49:14 -0700
Subject: [PATCH 0918/1204] [SPARK-20874][EXAMPLES] Add Structured Streaming
 Kafka Source to examples project

## What changes were proposed in this pull request?

Add Structured Streaming Kafka Source to the `examples` project so that people can run `bin/run-example StructuredKafkaWordCount ...`.

## How was this patch tested?

manually tested it.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #18101 from zsxwing/add-missing-example-dep.

(cherry picked from commit 98c3852986a2cb5f2d249d6c8ef602be283bd90e)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 examples/pom.xml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/examples/pom.xml b/examples/pom.xml
index aa91e98b28aea..0d001ee478c14 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -90,6 +90,12 @@
       <version>${project.version}</version>
       <scope>provided</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql-kafka-0-10_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-math3</artifactId>

From 4f6fccf15d40da503a7c6a8722058c38d57178cc Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Thu, 25 May 2017 10:49:14 -0700
Subject: [PATCH 0919/1204] [SPARK-20874][EXAMPLES] Add Structured Streaming
 Kafka Source to examples project

## What changes were proposed in this pull request?

Add Structured Streaming Kafka Source to the `examples` project so that people can run `bin/run-example StructuredKafkaWordCount ...`.

## How was this patch tested?

manually tested it.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #18101 from zsxwing/add-missing-example-dep.

(cherry picked from commit 98c3852986a2cb5f2d249d6c8ef602be283bd90e)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 examples/pom.xml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/examples/pom.xml b/examples/pom.xml
index 8fa731fb340a2..f17e605250a15 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -90,6 +90,12 @@
       <version>${project.version}</version>
       <scope>provided</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql-kafka-0-10_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-math3</artifactId>

From 289dd170cb3e0b9eca9af5841a0155ceaffee447 Mon Sep 17 00:00:00 2001
From: Michael Allman <michael@videoamp.com>
Date: Fri, 26 May 2017 09:25:43 +0800
Subject: [PATCH 0920/1204] [SPARK-20888][SQL][DOCS] Document change of default
 setting of spark.sql.hive.caseSensitiveInferenceMode

(Link to Jira: https://issues.apache.org/jira/browse/SPARK-20888)

## What changes were proposed in this pull request?

Document change of default setting of spark.sql.hive.caseSensitiveInferenceMode configuration key from NEVER_INFO to INFER_AND_SAVE in the Spark SQL 2.1 to 2.2 migration notes.

Author: Michael Allman <michael@videoamp.com>

Closes #18112 from mallman/spark-20888-document_infer_and_save.

(cherry picked from commit c1e7989c4ffd83c51f5c97998b4ff6fe8dd83cf4)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-programming-guide.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 490c1ce8a7cc5..adb12d2489a57 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -1223,7 +1223,7 @@ the following case-insensitive options:
      This is a JDBC writer related option. If specified, this option allows setting of database-specific table and partition options when creating a table (e.g., <code>CREATE TABLE t (name string) ENGINE=InnoDB.</code>). This option applies only to writing.
    </td>
   </tr>
-  
+
   <tr>
     <td><code>createTableColumnTypes</code></td>
     <td>
@@ -1444,6 +1444,10 @@ options.
 
 # Migration Guide
 
+## Upgrading From Spark SQL 2.1 to 2.2
+
+  - Spark 2.1.1 introduced a new configuration key: `spark.sql.hive.caseSensitiveInferenceMode`. It had a default setting of `NEVER_INFER`, which kept behavior identical to 2.1.0. However, Spark 2.2.0 changes this setting's default value to `INFER_AND_SAVE` to restore compatibility with reading Hive metastore tables whose underlying file schema have mixed-case column names. With the `INFER_AND_SAVE` configuration value, on first access Spark will perform schema inference on any Hive metastore table for which it has not already saved an inferred schema. Note that schema inference can be a very time consuming operation for tables with thousands of partitions. If compatibility with mixed-case column names is not a concern, you can safely set `spark.sql.hive.caseSensitiveInferenceMode` to `NEVER_INFER` to avoid the initial overhead of schema inference. Note that with the new default `INFER_AND_SAVE` setting, the results of the schema inference are saved as a metastore key for future use. Therefore, the initial schema inference occurs only at a table's first access.
+
 ## Upgrading From Spark SQL 2.0 to 2.1
 
  - Datasource tables now store partition metadata in the Hive metastore. This means that Hive DDLs such as `ALTER TABLE PARTITION ... SET LOCATION` are now available for tables created with the Datasource API.

From fafe283277b50974c26684b06449086acd0cf05a Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 26 May 2017 15:01:28 +0800
Subject: [PATCH 0921/1204] [SPARK-20868][CORE] UnsafeShuffleWriter should
 verify the position after FileChannel.transferTo

## What changes were proposed in this pull request?

Long time ago we fixed a [bug](https://issues.apache.org/jira/browse/SPARK-3948) in shuffle writer about `FileChannel.transferTo`. We were not very confident about that fix, so we added a position check after the writing, try to discover the bug earlier.

 However this checking is missing in the new `UnsafeShuffleWriter`, this PR adds it.

https://issues.apache.org/jira/browse/SPARK-18105 maybe related to that `FileChannel.transferTo` bug, hopefully we can find out the root cause after adding this position check.

## How was this patch tested?

N/A

Author: Wenchen Fan <wenchen@databricks.com>

Closes #18091 from cloud-fan/shuffle.

(cherry picked from commit d9ad78908f6189719cec69d34557f1a750d2e6af)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../shuffle/sort/UnsafeShuffleWriter.java     | 15 ++--
 .../scala/org/apache/spark/util/Utils.scala   | 71 +++++++++++--------
 2 files changed, 47 insertions(+), 39 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
index 8a1771848dee6..2fde5c300f072 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
@@ -422,17 +422,14 @@ private long[] mergeSpillsWithTransferTo(SpillInfo[] spills, File outputFile) th
       for (int partition = 0; partition < numPartitions; partition++) {
         for (int i = 0; i < spills.length; i++) {
           final long partitionLengthInSpill = spills[i].partitionLengths[partition];
-          long bytesToTransfer = partitionLengthInSpill;
           final FileChannel spillInputChannel = spillInputChannels[i];
           final long writeStartTime = System.nanoTime();
-          while (bytesToTransfer > 0) {
-            final long actualBytesTransferred = spillInputChannel.transferTo(
-              spillInputChannelPositions[i],
-              bytesToTransfer,
-              mergedFileOutputChannel);
-            spillInputChannelPositions[i] += actualBytesTransferred;
-            bytesToTransfer -= actualBytesTransferred;
-          }
+          Utils.copyFileStreamNIO(
+            spillInputChannel,
+            mergedFileOutputChannel,
+            spillInputChannelPositions[i],
+            partitionLengthInSpill);
+          spillInputChannelPositions[i] += partitionLengthInSpill;
           writeMetrics.incWriteTime(System.nanoTime() - writeStartTime);
           bytesWrittenToMergedFile += partitionLengthInSpill;
           partitionLengths[partition] += partitionLengthInSpill;
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 4d37db96dfc37..67497bbba150e 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -22,7 +22,7 @@ import java.lang.management.{LockInfo, ManagementFactory, MonitorInfo, ThreadInf
 import java.math.{MathContext, RoundingMode}
 import java.net._
 import java.nio.ByteBuffer
-import java.nio.channels.Channels
+import java.nio.channels.{Channels, FileChannel}
 import java.nio.charset.StandardCharsets
 import java.nio.file.{Files, Paths}
 import java.util.{Locale, Properties, Random, UUID}
@@ -60,7 +60,6 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.serializer.{DeserializationStream, SerializationStream, SerializerInstance}
-import org.apache.spark.util.logging.RollingFileAppender
 
 /** CallSite represents a place in user code. It can have a short and a long form. */
 private[spark] case class CallSite(shortForm: String, longForm: String)
@@ -319,41 +318,22 @@ private[spark] object Utils extends Logging {
    * copying is disabled by default unless explicitly set transferToEnabled as true,
    * the parameter transferToEnabled should be configured by spark.file.transferTo = [true|false].
    */
-  def copyStream(in: InputStream,
-                 out: OutputStream,
-                 closeStreams: Boolean = false,
-                 transferToEnabled: Boolean = false): Long =
-  {
-    var count = 0L
+  def copyStream(
+      in: InputStream,
+      out: OutputStream,
+      closeStreams: Boolean = false,
+      transferToEnabled: Boolean = false): Long = {
     tryWithSafeFinally {
       if (in.isInstanceOf[FileInputStream] && out.isInstanceOf[FileOutputStream]
         && transferToEnabled) {
         // When both streams are File stream, use transferTo to improve copy performance.
         val inChannel = in.asInstanceOf[FileInputStream].getChannel()
         val outChannel = out.asInstanceOf[FileOutputStream].getChannel()
-        val initialPos = outChannel.position()
         val size = inChannel.size()
-
-        // In case transferTo method transferred less data than we have required.
-        while (count < size) {
-          count += inChannel.transferTo(count, size - count, outChannel)
-        }
-
-        // Check the position after transferTo loop to see if it is in the right position and
-        // give user information if not.
-        // Position will not be increased to the expected length after calling transferTo in
-        // kernel version 2.6.32, this issue can be seen in
-        // https://bugs.openjdk.java.net/browse/JDK-7052359
-        // This will lead to stream corruption issue when using sort-based shuffle (SPARK-3948).
-        val finalPos = outChannel.position()
-        assert(finalPos == initialPos + size,
-          s"""
-             |Current position $finalPos do not equal to expected position ${initialPos + size}
-             |after transferTo, please check your kernel version to see if it is 2.6.32,
-             |this is a kernel bug which will lead to unexpected behavior when using transferTo.
-             |You can set spark.file.transferTo = false to disable this NIO feature.
-           """.stripMargin)
+        copyFileStreamNIO(inChannel, outChannel, 0, size)
+        size
       } else {
+        var count = 0L
         val buf = new Array[Byte](8192)
         var n = 0
         while (n != -1) {
@@ -363,8 +343,8 @@ private[spark] object Utils extends Logging {
             count += n
           }
         }
+        count
       }
-      count
     } {
       if (closeStreams) {
         try {
@@ -376,6 +356,37 @@ private[spark] object Utils extends Logging {
     }
   }
 
+  def copyFileStreamNIO(
+      input: FileChannel,
+      output: FileChannel,
+      startPosition: Long,
+      bytesToCopy: Long): Unit = {
+    val initialPos = output.position()
+    var count = 0L
+    // In case transferTo method transferred less data than we have required.
+    while (count < bytesToCopy) {
+      count += input.transferTo(count + startPosition, bytesToCopy - count, output)
+    }
+    assert(count == bytesToCopy,
+      s"request to copy $bytesToCopy bytes, but actually copied $count bytes.")
+
+    // Check the position after transferTo loop to see if it is in the right position and
+    // give user information if not.
+    // Position will not be increased to the expected length after calling transferTo in
+    // kernel version 2.6.32, this issue can be seen in
+    // https://bugs.openjdk.java.net/browse/JDK-7052359
+    // This will lead to stream corruption issue when using sort-based shuffle (SPARK-3948).
+    val finalPos = output.position()
+    val expectedPos = initialPos + bytesToCopy
+    assert(finalPos == expectedPos,
+      s"""
+         |Current position $finalPos do not equal to expected position $expectedPos
+         |after transferTo, please check your kernel version to see if it is 2.6.32,
+         |this is a kernel bug which will lead to unexpected behavior when using transferTo.
+         |You can set spark.file.transferTo = false to disable this NIO feature.
+           """.stripMargin)
+  }
+
   /**
    * Construct a URI container information used for authentication.
    * This also sets the default authenticator to properly negotiation the

From 6e6adcceab9218463d8c7637350d7f5229cf648d Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 26 May 2017 15:01:28 +0800
Subject: [PATCH 0922/1204] [SPARK-20868][CORE] UnsafeShuffleWriter should
 verify the position after FileChannel.transferTo

## What changes were proposed in this pull request?

Long time ago we fixed a [bug](https://issues.apache.org/jira/browse/SPARK-3948) in shuffle writer about `FileChannel.transferTo`. We were not very confident about that fix, so we added a position check after the writing, try to discover the bug earlier.

 However this checking is missing in the new `UnsafeShuffleWriter`, this PR adds it.

https://issues.apache.org/jira/browse/SPARK-18105 maybe related to that `FileChannel.transferTo` bug, hopefully we can find out the root cause after adding this position check.

## How was this patch tested?

N/A

Author: Wenchen Fan <wenchen@databricks.com>

Closes #18091 from cloud-fan/shuffle.

(cherry picked from commit d9ad78908f6189719cec69d34557f1a750d2e6af)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../shuffle/sort/UnsafeShuffleWriter.java     | 15 ++--
 .../scala/org/apache/spark/util/Utils.scala   | 71 +++++++++++--------
 2 files changed, 47 insertions(+), 39 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
index 8a1771848dee6..2fde5c300f072 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
@@ -422,17 +422,14 @@ private long[] mergeSpillsWithTransferTo(SpillInfo[] spills, File outputFile) th
       for (int partition = 0; partition < numPartitions; partition++) {
         for (int i = 0; i < spills.length; i++) {
           final long partitionLengthInSpill = spills[i].partitionLengths[partition];
-          long bytesToTransfer = partitionLengthInSpill;
           final FileChannel spillInputChannel = spillInputChannels[i];
           final long writeStartTime = System.nanoTime();
-          while (bytesToTransfer > 0) {
-            final long actualBytesTransferred = spillInputChannel.transferTo(
-              spillInputChannelPositions[i],
-              bytesToTransfer,
-              mergedFileOutputChannel);
-            spillInputChannelPositions[i] += actualBytesTransferred;
-            bytesToTransfer -= actualBytesTransferred;
-          }
+          Utils.copyFileStreamNIO(
+            spillInputChannel,
+            mergedFileOutputChannel,
+            spillInputChannelPositions[i],
+            partitionLengthInSpill);
+          spillInputChannelPositions[i] += partitionLengthInSpill;
           writeMetrics.incWriteTime(System.nanoTime() - writeStartTime);
           bytesWrittenToMergedFile += partitionLengthInSpill;
           partitionLengths[partition] += partitionLengthInSpill;
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 4cdfb9cbf39b7..56de802c423ca 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -21,7 +21,7 @@ import java.io._
 import java.lang.management.{LockInfo, ManagementFactory, MonitorInfo, ThreadInfo}
 import java.net._
 import java.nio.ByteBuffer
-import java.nio.channels.Channels
+import java.nio.channels.{Channels, FileChannel}
 import java.nio.charset.StandardCharsets
 import java.nio.file.{Files, Paths}
 import java.util.{Locale, Properties, Random, UUID}
@@ -58,7 +58,6 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.{DYN_ALLOCATION_INITIAL_EXECUTORS, DYN_ALLOCATION_MIN_EXECUTORS, EXECUTOR_INSTANCES}
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.serializer.{DeserializationStream, SerializationStream, SerializerInstance}
-import org.apache.spark.util.logging.RollingFileAppender
 
 /** CallSite represents a place in user code. It can have a short and a long form. */
 private[spark] case class CallSite(shortForm: String, longForm: String)
@@ -313,41 +312,22 @@ private[spark] object Utils extends Logging {
    * copying is disabled by default unless explicitly set transferToEnabled as true,
    * the parameter transferToEnabled should be configured by spark.file.transferTo = [true|false].
    */
-  def copyStream(in: InputStream,
-                 out: OutputStream,
-                 closeStreams: Boolean = false,
-                 transferToEnabled: Boolean = false): Long =
-  {
-    var count = 0L
+  def copyStream(
+      in: InputStream,
+      out: OutputStream,
+      closeStreams: Boolean = false,
+      transferToEnabled: Boolean = false): Long = {
     tryWithSafeFinally {
       if (in.isInstanceOf[FileInputStream] && out.isInstanceOf[FileOutputStream]
         && transferToEnabled) {
         // When both streams are File stream, use transferTo to improve copy performance.
         val inChannel = in.asInstanceOf[FileInputStream].getChannel()
         val outChannel = out.asInstanceOf[FileOutputStream].getChannel()
-        val initialPos = outChannel.position()
         val size = inChannel.size()
-
-        // In case transferTo method transferred less data than we have required.
-        while (count < size) {
-          count += inChannel.transferTo(count, size - count, outChannel)
-        }
-
-        // Check the position after transferTo loop to see if it is in the right position and
-        // give user information if not.
-        // Position will not be increased to the expected length after calling transferTo in
-        // kernel version 2.6.32, this issue can be seen in
-        // https://bugs.openjdk.java.net/browse/JDK-7052359
-        // This will lead to stream corruption issue when using sort-based shuffle (SPARK-3948).
-        val finalPos = outChannel.position()
-        assert(finalPos == initialPos + size,
-          s"""
-             |Current position $finalPos do not equal to expected position ${initialPos + size}
-             |after transferTo, please check your kernel version to see if it is 2.6.32,
-             |this is a kernel bug which will lead to unexpected behavior when using transferTo.
-             |You can set spark.file.transferTo = false to disable this NIO feature.
-           """.stripMargin)
+        copyFileStreamNIO(inChannel, outChannel, 0, size)
+        size
       } else {
+        var count = 0L
         val buf = new Array[Byte](8192)
         var n = 0
         while (n != -1) {
@@ -357,8 +337,8 @@ private[spark] object Utils extends Logging {
             count += n
           }
         }
+        count
       }
-      count
     } {
       if (closeStreams) {
         try {
@@ -370,6 +350,37 @@ private[spark] object Utils extends Logging {
     }
   }
 
+  def copyFileStreamNIO(
+      input: FileChannel,
+      output: FileChannel,
+      startPosition: Long,
+      bytesToCopy: Long): Unit = {
+    val initialPos = output.position()
+    var count = 0L
+    // In case transferTo method transferred less data than we have required.
+    while (count < bytesToCopy) {
+      count += input.transferTo(count + startPosition, bytesToCopy - count, output)
+    }
+    assert(count == bytesToCopy,
+      s"request to copy $bytesToCopy bytes, but actually copied $count bytes.")
+
+    // Check the position after transferTo loop to see if it is in the right position and
+    // give user information if not.
+    // Position will not be increased to the expected length after calling transferTo in
+    // kernel version 2.6.32, this issue can be seen in
+    // https://bugs.openjdk.java.net/browse/JDK-7052359
+    // This will lead to stream corruption issue when using sort-based shuffle (SPARK-3948).
+    val finalPos = output.position()
+    val expectedPos = initialPos + bytesToCopy
+    assert(finalPos == expectedPos,
+      s"""
+         |Current position $finalPos do not equal to expected position $expectedPos
+         |after transferTo, please check your kernel version to see if it is 2.6.32,
+         |this is a kernel bug which will lead to unexpected behavior when using transferTo.
+         |You can set spark.file.transferTo = false to disable this NIO feature.
+           """.stripMargin)
+  }
+
   /**
    * Construct a URI container information used for authentication.
    * This also sets the default authenticator to properly negotiation the

From f99456b5f6225a534ce52cf2b817285eb8853926 Mon Sep 17 00:00:00 2001
From: "NICHOLAS T. MARION" <nmarion@us.ibm.com>
Date: Wed, 10 May 2017 10:59:57 +0100
Subject: [PATCH 0923/1204] [SPARK-20393][WEBU UI] Strengthen Spark to prevent
 XSS vulnerabilities

## What changes were proposed in this pull request?

Add stripXSS and stripXSSMap to Spark Core's UIUtils. Calling these functions at any point that getParameter is called against a HttpServletRequest.

## How was this patch tested?

Unit tests, IBM Security AppScan Standard no longer showing vulnerabilities, manual verification of WebUI pages.

Author: NICHOLAS T. MARION <nmarion@us.ibm.com>

Closes #17686 from n-marion/xss-fix.

(cherry picked from commit b512233a457092b0e2a39d0b42cb021abc69d375)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../spark/deploy/history/HistoryPage.scala    |  3 +-
 .../deploy/master/ui/ApplicationPage.scala    |  3 +-
 .../spark/deploy/master/ui/MasterPage.scala   |  6 ++-
 .../spark/deploy/worker/ui/LogPage.scala      | 30 ++++++++------
 .../scala/org/apache/spark/ui/UIUtils.scala   | 21 ++++++++++
 .../ui/exec/ExecutorThreadDumpPage.scala      |  4 +-
 .../apache/spark/ui/jobs/AllJobsPage.scala    | 14 ++++---
 .../org/apache/spark/ui/jobs/JobPage.scala    |  3 +-
 .../org/apache/spark/ui/jobs/JobsTab.scala    |  5 ++-
 .../org/apache/spark/ui/jobs/PoolPage.scala   |  3 +-
 .../org/apache/spark/ui/jobs/StagePage.scala  | 15 +++----
 .../org/apache/spark/ui/jobs/StageTable.scala | 15 +++----
 .../org/apache/spark/ui/jobs/StagesTab.scala  |  5 ++-
 .../org/apache/spark/ui/storage/RDDPage.scala | 13 ++++---
 .../org/apache/spark/ui/UIUtilsSuite.scala    | 39 +++++++++++++++++++
 .../spark/deploy/mesos/ui/DriverPage.scala    |  3 +-
 .../sql/execution/ui/ExecutionPage.scala      |  3 +-
 .../ui/ThriftServerSessionPage.scala          |  4 +-
 .../apache/spark/streaming/ui/BatchPage.scala |  5 ++-
 19 files changed, 140 insertions(+), 54 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
index 0e7a6c24d4fa5..af14717633409 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
@@ -26,8 +26,9 @@ import org.apache.spark.ui.{UIUtils, WebUIPage}
 private[history] class HistoryPage(parent: HistoryServer) extends WebUIPage("") {
 
   def render(request: HttpServletRequest): Seq[Node] = {
+    // stripXSS is called first to remove suspicious characters used in XSS attacks
     val requestedIncomplete =
-      Option(request.getParameter("showIncomplete")).getOrElse("false").toBoolean
+      Option(UIUtils.stripXSS(request.getParameter("showIncomplete"))).getOrElse("false").toBoolean
 
     val allAppsSize = parent.getApplicationList().count(_.completed != requestedIncomplete)
     val eventLogsUnderProcessCount = parent.getEventLogsUnderProcess()
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
index a8d721f3e0d49..94ff81c1a68ea 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
@@ -33,7 +33,8 @@ private[ui] class ApplicationPage(parent: MasterWebUI) extends WebUIPage("app")
 
   /** Executor details for a particular application */
   def render(request: HttpServletRequest): Seq[Node] = {
-    val appId = request.getParameter("appId")
+    // stripXSS is called first to remove suspicious characters used in XSS attacks
+    val appId = UIUtils.stripXSS(request.getParameter("appId"))
     val state = master.askSync[MasterStateResponse](RequestMasterState)
     val app = state.activeApps.find(_.id == appId)
       .getOrElse(state.completedApps.find(_.id == appId).orNull)
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
index 9351c72094e34..ce71300e9097d 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
@@ -57,8 +57,10 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
   private def handleKillRequest(request: HttpServletRequest, action: String => Unit): Unit = {
     if (parent.killEnabled &&
         parent.master.securityMgr.checkModifyPermissions(request.getRemoteUser)) {
-      val killFlag = Option(request.getParameter("terminate")).getOrElse("false").toBoolean
-      val id = Option(request.getParameter("id"))
+      // stripXSS is called first to remove suspicious characters used in XSS attacks
+      val killFlag =
+        Option(UIUtils.stripXSS(request.getParameter("terminate"))).getOrElse("false").toBoolean
+      val id = Option(UIUtils.stripXSS(request.getParameter("id")))
       if (id.isDefined && killFlag) {
         action(id.get)
       }
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala
index 80dc9bf8779df..2f5a5642d3cab 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala
@@ -33,13 +33,16 @@ private[ui] class LogPage(parent: WorkerWebUI) extends WebUIPage("logPage") with
   private val supportedLogTypes = Set("stderr", "stdout")
   private val defaultBytes = 100 * 1024
 
+  // stripXSS is called first to remove suspicious characters used in XSS attacks
   def renderLog(request: HttpServletRequest): String = {
-    val appId = Option(request.getParameter("appId"))
-    val executorId = Option(request.getParameter("executorId"))
-    val driverId = Option(request.getParameter("driverId"))
-    val logType = request.getParameter("logType")
-    val offset = Option(request.getParameter("offset")).map(_.toLong)
-    val byteLength = Option(request.getParameter("byteLength")).map(_.toInt).getOrElse(defaultBytes)
+    val appId = Option(UIUtils.stripXSS(request.getParameter("appId")))
+    val executorId = Option(UIUtils.stripXSS(request.getParameter("executorId")))
+    val driverId = Option(UIUtils.stripXSS(request.getParameter("driverId")))
+    val logType = UIUtils.stripXSS(request.getParameter("logType"))
+    val offset = Option(UIUtils.stripXSS(request.getParameter("offset"))).map(_.toLong)
+    val byteLength =
+      Option(UIUtils.stripXSS(request.getParameter("byteLength"))).map(_.toInt)
+      .getOrElse(defaultBytes)
 
     val logDir = (appId, executorId, driverId) match {
       case (Some(a), Some(e), None) =>
@@ -55,13 +58,16 @@ private[ui] class LogPage(parent: WorkerWebUI) extends WebUIPage("logPage") with
     pre + logText
   }
 
+  // stripXSS is called first to remove suspicious characters used in XSS attacks
   def render(request: HttpServletRequest): Seq[Node] = {
-    val appId = Option(request.getParameter("appId"))
-    val executorId = Option(request.getParameter("executorId"))
-    val driverId = Option(request.getParameter("driverId"))
-    val logType = request.getParameter("logType")
-    val offset = Option(request.getParameter("offset")).map(_.toLong)
-    val byteLength = Option(request.getParameter("byteLength")).map(_.toInt).getOrElse(defaultBytes)
+    val appId = Option(UIUtils.stripXSS(request.getParameter("appId")))
+    val executorId = Option(UIUtils.stripXSS(request.getParameter("executorId")))
+    val driverId = Option(UIUtils.stripXSS(request.getParameter("driverId")))
+    val logType = UIUtils.stripXSS(request.getParameter("logType"))
+    val offset = Option(UIUtils.stripXSS(request.getParameter("offset"))).map(_.toLong)
+    val byteLength =
+      Option(UIUtils.stripXSS(request.getParameter("byteLength"))).map(_.toInt)
+      .getOrElse(defaultBytes)
 
     val (logDir, params, pageName) = (appId, executorId, driverId) match {
       case (Some(a), Some(e), None) =>
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index 79b0d81af52b5..4bc7fb6185e6c 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -25,6 +25,8 @@ import scala.util.control.NonFatal
 import scala.xml._
 import scala.xml.transform.{RewriteRule, RuleTransformer}
 
+import org.apache.commons.lang3.StringEscapeUtils
+
 import org.apache.spark.internal.Logging
 import org.apache.spark.ui.scope.RDDOperationGraph
 
@@ -34,6 +36,8 @@ private[spark] object UIUtils extends Logging {
   val TABLE_CLASS_STRIPED = TABLE_CLASS_NOT_STRIPED + " table-striped"
   val TABLE_CLASS_STRIPED_SORTABLE = TABLE_CLASS_STRIPED + " sortable"
 
+  private val NEWLINE_AND_SINGLE_QUOTE_REGEX = raw"(?i)(\r\n|\n|\r|%0D%0A|%0A|%0D|'|%27)".r
+
   // SimpleDateFormat is not thread-safe. Don't expose it to avoid improper use.
   private val dateFormat = new ThreadLocal[SimpleDateFormat]() {
     override def initialValue(): SimpleDateFormat =
@@ -527,4 +531,21 @@ private[spark] object UIUtils extends Logging {
       origHref
     }
   }
+
+  /**
+   * Remove suspicious characters of user input to prevent Cross-Site scripting (XSS) attacks
+   *
+   * For more information about XSS testing:
+   * https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet and
+   * https://www.owasp.org/index.php/Testing_for_Reflected_Cross_site_scripting_(OTG-INPVAL-001)
+   */
+  def stripXSS(requestParameter: String): String = {
+    if (requestParameter == null) {
+      null
+    } else {
+      // Remove new lines and single quotes, followed by escaping HTML version 4.0
+      StringEscapeUtils.escapeHtml4(
+        NEWLINE_AND_SINGLE_QUOTE_REGEX.replaceAllIn(requestParameter, ""))
+    }
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
index 6ce3f511e89c7..7b211ea5199c3 100644
--- a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
@@ -28,8 +28,10 @@ private[ui] class ExecutorThreadDumpPage(parent: ExecutorsTab) extends WebUIPage
 
   private val sc = parent.sc
 
+  // stripXSS is called first to remove suspicious characters used in XSS attacks
   def render(request: HttpServletRequest): Seq[Node] = {
-    val executorId = Option(request.getParameter("executorId")).map { executorId =>
+    val executorId =
+      Option(UIUtils.stripXSS(request.getParameter("executorId"))).map { executorId =>
       UIUtils.decodeURLParameter(executorId)
     }.getOrElse {
       throw new IllegalArgumentException(s"Missing executorId parameter")
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
index 18be0870746e9..a0fd29c22ddca 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
@@ -220,18 +220,20 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") {
       jobTag: String,
       jobs: Seq[JobUIData],
       killEnabled: Boolean): Seq[Node] = {
-    val allParameters = request.getParameterMap.asScala.toMap
+    // stripXSS is called to remove suspicious characters used in XSS attacks
+    val allParameters = request.getParameterMap.asScala.toMap.mapValues(_.map(UIUtils.stripXSS))
     val parameterOtherTable = allParameters.filterNot(_._1.startsWith(jobTag))
       .map(para => para._1 + "=" + para._2(0))
 
     val someJobHasJobGroup = jobs.exists(_.jobGroup.isDefined)
     val jobIdTitle = if (someJobHasJobGroup) "Job Id (Job Group)" else "Job Id"
 
-    val parameterJobPage = request.getParameter(jobTag + ".page")
-    val parameterJobSortColumn = request.getParameter(jobTag + ".sort")
-    val parameterJobSortDesc = request.getParameter(jobTag + ".desc")
-    val parameterJobPageSize = request.getParameter(jobTag + ".pageSize")
-    val parameterJobPrevPageSize = request.getParameter(jobTag + ".prevPageSize")
+    // stripXSS is called first to remove suspicious characters used in XSS attacks
+    val parameterJobPage = UIUtils.stripXSS(request.getParameter(jobTag + ".page"))
+    val parameterJobSortColumn = UIUtils.stripXSS(request.getParameter(jobTag + ".sort"))
+    val parameterJobSortDesc = UIUtils.stripXSS(request.getParameter(jobTag + ".desc"))
+    val parameterJobPageSize = UIUtils.stripXSS(request.getParameter(jobTag + ".pageSize"))
+    val parameterJobPrevPageSize = UIUtils.stripXSS(request.getParameter(jobTag + ".prevPageSize"))
 
     val jobPage = Option(parameterJobPage).map(_.toInt).getOrElse(1)
     val jobSortColumn = Option(parameterJobSortColumn).map { sortColumn =>
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
index 3131c4a1eb7d4..9fb011a049b7e 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
@@ -187,7 +187,8 @@ private[ui] class JobPage(parent: JobsTab) extends WebUIPage("job") {
     val listener = parent.jobProgresslistener
 
     listener.synchronized {
-      val parameterId = request.getParameter("id")
+      // stripXSS is called first to remove suspicious characters used in XSS attacks
+      val parameterId = UIUtils.stripXSS(request.getParameter("id"))
       require(parameterId != null && parameterId.nonEmpty, "Missing id parameter")
 
       val jobId = parameterId.toInt
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala
index 620c54c2dc0a5..cc173381879a6 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ui.jobs
 import javax.servlet.http.HttpServletRequest
 
 import org.apache.spark.scheduler.SchedulingMode
-import org.apache.spark.ui.{SparkUI, SparkUITab}
+import org.apache.spark.ui.{SparkUI, SparkUITab, UIUtils}
 
 /** Web UI showing progress status of all jobs in the given SparkContext. */
 private[ui] class JobsTab(parent: SparkUI) extends SparkUITab(parent, "jobs") {
@@ -40,7 +40,8 @@ private[ui] class JobsTab(parent: SparkUI) extends SparkUITab(parent, "jobs") {
 
   def handleKillRequest(request: HttpServletRequest): Unit = {
     if (killEnabled && parent.securityManager.checkModifyPermissions(request.getRemoteUser)) {
-      val jobId = Option(request.getParameter("id")).map(_.toInt)
+      // stripXSS is called first to remove suspicious characters used in XSS attacks
+      val jobId = Option(UIUtils.stripXSS(request.getParameter("id"))).map(_.toInt)
       jobId.foreach { id =>
         if (jobProgresslistener.activeJobs.contains(id)) {
           sc.foreach(_.cancelJob(id))
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala
index 8ee70d27cc09f..b164f32b62e97 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala
@@ -31,7 +31,8 @@ private[ui] class PoolPage(parent: StagesTab) extends WebUIPage("pool") {
 
   def render(request: HttpServletRequest): Seq[Node] = {
     listener.synchronized {
-      val poolName = Option(request.getParameter("poolname")).map { poolname =>
+      // stripXSS is called first to remove suspicious characters used in XSS attacks
+      val poolName = Option(UIUtils.stripXSS(request.getParameter("poolname"))).map { poolname =>
         UIUtils.decodeURLParameter(poolname)
       }.getOrElse {
         throw new IllegalArgumentException(s"Missing poolname parameter")
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
index 19325a2dc9169..6b3dadc333316 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
@@ -87,17 +87,18 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
 
   def render(request: HttpServletRequest): Seq[Node] = {
     progressListener.synchronized {
-      val parameterId = request.getParameter("id")
+      // stripXSS is called first to remove suspicious characters used in XSS attacks
+      val parameterId = UIUtils.stripXSS(request.getParameter("id"))
       require(parameterId != null && parameterId.nonEmpty, "Missing id parameter")
 
-      val parameterAttempt = request.getParameter("attempt")
+      val parameterAttempt = UIUtils.stripXSS(request.getParameter("attempt"))
       require(parameterAttempt != null && parameterAttempt.nonEmpty, "Missing attempt parameter")
 
-      val parameterTaskPage = request.getParameter("task.page")
-      val parameterTaskSortColumn = request.getParameter("task.sort")
-      val parameterTaskSortDesc = request.getParameter("task.desc")
-      val parameterTaskPageSize = request.getParameter("task.pageSize")
-      val parameterTaskPrevPageSize = request.getParameter("task.prevPageSize")
+      val parameterTaskPage = UIUtils.stripXSS(request.getParameter("task.page"))
+      val parameterTaskSortColumn = UIUtils.stripXSS(request.getParameter("task.sort"))
+      val parameterTaskSortDesc = UIUtils.stripXSS(request.getParameter("task.desc"))
+      val parameterTaskPageSize = UIUtils.stripXSS(request.getParameter("task.pageSize"))
+      val parameterTaskPrevPageSize = UIUtils.stripXSS(request.getParameter("task.prevPageSize"))
 
       val taskPage = Option(parameterTaskPage).map(_.toInt).getOrElse(1)
       val taskSortColumn = Option(parameterTaskSortColumn).map { sortColumn =>
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
index 256b726fa7eea..a28daf7f90451 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
@@ -42,15 +42,17 @@ private[ui] class StageTableBase(
     isFairScheduler: Boolean,
     killEnabled: Boolean,
     isFailedStage: Boolean) {
-  val allParameters = request.getParameterMap().asScala.toMap
+  // stripXSS is called to remove suspicious characters used in XSS attacks
+  val allParameters = request.getParameterMap.asScala.toMap.mapValues(_.map(UIUtils.stripXSS))
   val parameterOtherTable = allParameters.filterNot(_._1.startsWith(stageTag))
     .map(para => para._1 + "=" + para._2(0))
 
-  val parameterStagePage = request.getParameter(stageTag + ".page")
-  val parameterStageSortColumn = request.getParameter(stageTag + ".sort")
-  val parameterStageSortDesc = request.getParameter(stageTag + ".desc")
-  val parameterStagePageSize = request.getParameter(stageTag + ".pageSize")
-  val parameterStagePrevPageSize = request.getParameter(stageTag + ".prevPageSize")
+  val parameterStagePage = UIUtils.stripXSS(request.getParameter(stageTag + ".page"))
+  val parameterStageSortColumn = UIUtils.stripXSS(request.getParameter(stageTag + ".sort"))
+  val parameterStageSortDesc = UIUtils.stripXSS(request.getParameter(stageTag + ".desc"))
+  val parameterStagePageSize = UIUtils.stripXSS(request.getParameter(stageTag + ".pageSize"))
+  val parameterStagePrevPageSize =
+    UIUtils.stripXSS(request.getParameter(stageTag + ".prevPageSize"))
 
   val stagePage = Option(parameterStagePage).map(_.toInt).getOrElse(1)
   val stageSortColumn = Option(parameterStageSortColumn).map { sortColumn =>
@@ -512,4 +514,3 @@ private[ui] class StageDataSource(
     }
   }
 }
-
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala
index 181465bdf9609..799d769626395 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ui.jobs
 import javax.servlet.http.HttpServletRequest
 
 import org.apache.spark.scheduler.SchedulingMode
-import org.apache.spark.ui.{SparkUI, SparkUITab}
+import org.apache.spark.ui.{SparkUI, SparkUITab, UIUtils}
 
 /** Web UI showing progress status of all stages in the given SparkContext. */
 private[ui] class StagesTab(parent: SparkUI) extends SparkUITab(parent, "stages") {
@@ -39,7 +39,8 @@ private[ui] class StagesTab(parent: SparkUI) extends SparkUITab(parent, "stages"
 
   def handleKillRequest(request: HttpServletRequest): Unit = {
     if (killEnabled && parent.securityManager.checkModifyPermissions(request.getRemoteUser)) {
-      val stageId = Option(request.getParameter("id")).map(_.toInt)
+      // stripXSS is called first to remove suspicious characters used in XSS attacks
+      val stageId = Option(UIUtils.stripXSS(request.getParameter("id"))).map(_.toInt)
       stageId.foreach { id =>
         if (progressListener.activeStages.contains(id)) {
           sc.foreach(_.cancelStage(id, "killed via the Web UI"))
diff --git a/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
index a1a0c729b9240..317e0aa5ea25c 100644
--- a/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
@@ -31,14 +31,15 @@ private[ui] class RDDPage(parent: StorageTab) extends WebUIPage("rdd") {
   private val listener = parent.listener
 
   def render(request: HttpServletRequest): Seq[Node] = {
-    val parameterId = request.getParameter("id")
+    // stripXSS is called first to remove suspicious characters used in XSS attacks
+    val parameterId = UIUtils.stripXSS(request.getParameter("id"))
     require(parameterId != null && parameterId.nonEmpty, "Missing id parameter")
 
-    val parameterBlockPage = request.getParameter("block.page")
-    val parameterBlockSortColumn = request.getParameter("block.sort")
-    val parameterBlockSortDesc = request.getParameter("block.desc")
-    val parameterBlockPageSize = request.getParameter("block.pageSize")
-    val parameterBlockPrevPageSize = request.getParameter("block.prevPageSize")
+    val parameterBlockPage = UIUtils.stripXSS(request.getParameter("block.page"))
+    val parameterBlockSortColumn = UIUtils.stripXSS(request.getParameter("block.sort"))
+    val parameterBlockSortDesc = UIUtils.stripXSS(request.getParameter("block.desc"))
+    val parameterBlockPageSize = UIUtils.stripXSS(request.getParameter("block.pageSize"))
+    val parameterBlockPrevPageSize = UIUtils.stripXSS(request.getParameter("block.prevPageSize"))
 
     val blockPage = Option(parameterBlockPage).map(_.toInt).getOrElse(1)
     val blockSortColumn = Option(parameterBlockSortColumn).getOrElse("Block Name")
diff --git a/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala b/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala
index c770fd5da76f7..423daacc0f5a5 100644
--- a/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala
@@ -133,6 +133,45 @@ class UIUtilsSuite extends SparkFunSuite {
     assert(decoded2 === decodeURLParameter(decoded2))
   }
 
+  test("SPARK-20393: Prevent newline characters in parameters.") {
+    val encoding = "Encoding:base64%0d%0a%0d%0aPGh0bWw%2bjcmlwdD48L2h0bWw%2b"
+    val stripEncoding = "Encoding:base64PGh0bWw%2bjcmlwdD48L2h0bWw%2b"
+
+    assert(stripEncoding === stripXSS(encoding))
+  }
+
+  test("SPARK-20393: Prevent script from parameters running on page.") {
+    val scriptAlert = """>"'><script>alert(401)<%2Fscript>"""
+    val stripScriptAlert = "&gt;&quot;&gt;&lt;script&gt;alert(401)&lt;%2Fscript&gt;"
+
+    assert(stripScriptAlert === stripXSS(scriptAlert))
+  }
+
+  test("SPARK-20393: Prevent javascript from parameters running on page.") {
+    val javascriptAlert =
+      """app-20161208133404-0002<iframe+src%3Djavascript%3Aalert(1705)>"""
+    val stripJavascriptAlert =
+      "app-20161208133404-0002&lt;iframe+src%3Djavascript%3Aalert(1705)&gt;"
+
+    assert(stripJavascriptAlert === stripXSS(javascriptAlert))
+  }
+
+  test("SPARK-20393: Prevent links from parameters on page.") {
+    val link =
+      """stdout'"><iframe+id%3D1131+src%3Dhttp%3A%2F%2Fdemo.test.net%2Fphishing.html>"""
+    val stripLink =
+      "stdout&quot;&gt;&lt;iframe+id%3D1131+src%3Dhttp%3A%2F%2Fdemo.test.net%2Fphishing.html&gt;"
+
+    assert(stripLink === stripXSS(link))
+  }
+
+  test("SPARK-20393: Prevent popups from parameters on page.") {
+    val popup = """stdout'%2Balert(60)%2B'"""
+    val stripPopup = "stdout%2Balert(60)%2B"
+
+    assert(stripPopup === stripXSS(popup))
+  }
+
   private def verify(
       desc: String,
       expected: Node,
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala
index 127fadabcce53..a6bb5d5915022 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala
@@ -29,7 +29,8 @@ import org.apache.spark.ui.{UIUtils, WebUIPage}
 private[ui] class DriverPage(parent: MesosClusterUI) extends WebUIPage("driver") {
 
   override def render(request: HttpServletRequest): Seq[Node] = {
-    val driverId = request.getParameter("id")
+    // stripXSS is called first to remove suspicious characters used in XSS attacks
+    val driverId = UIUtils.stripXSS(request.getParameter("id"))
     require(driverId != null && driverId.nonEmpty, "Missing id parameter")
 
     val state = parent.scheduler.getDriverState(driverId)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala
index 23fc0bd0bce13..460fc946c3e6f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala
@@ -29,7 +29,8 @@ class ExecutionPage(parent: SQLTab) extends WebUIPage("execution") with Logging
   private val listener = parent.listener
 
   override def render(request: HttpServletRequest): Seq[Node] = listener.synchronized {
-    val parameterExecutionId = request.getParameter("id")
+    // stripXSS is called first to remove suspicious characters used in XSS attacks
+    val parameterExecutionId = UIUtils.stripXSS(request.getParameter("id"))
     require(parameterExecutionId != null && parameterExecutionId.nonEmpty,
       "Missing execution id parameter")
 
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala
index f39e9dcd3a5bb..38b8605745752 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala
@@ -39,7 +39,8 @@ private[ui] class ThriftServerSessionPage(parent: ThriftServerTab)
 
   /** Render the page */
   def render(request: HttpServletRequest): Seq[Node] = {
-    val parameterId = request.getParameter("id")
+    // stripXSS is called first to remove suspicious characters used in XSS attacks
+    val parameterId = UIUtils.stripXSS(request.getParameter("id"))
     require(parameterId != null && parameterId.nonEmpty, "Missing id parameter")
 
     val content =
@@ -197,4 +198,3 @@ private[ui] class ThriftServerSessionPage(parent: ThriftServerTab)
     UIUtils.listingTable(headers, generateDataRow, data, fixedWidth = true)
   }
 }
-
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala
index f55af6a5cc358..69e15655ad790 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala
@@ -304,7 +304,10 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
   }
 
   def render(request: HttpServletRequest): Seq[Node] = streamingListener.synchronized {
-    val batchTime = Option(request.getParameter("id")).map(id => Time(id.toLong)).getOrElse {
+    // stripXSS is called first to remove suspicious characters used in XSS attacks
+    val batchTime =
+      Option(SparkUIUtils.stripXSS(request.getParameter("id"))).map(id => Time(id.toLong))
+      .getOrElse {
       throw new IllegalArgumentException(s"Missing id parameter")
     }
     val formattedBatchTime =

From 92837aeb47fc3427166e4b6e62f6130f7480d7fa Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Tue, 16 May 2017 14:47:21 -0700
Subject: [PATCH 0924/1204] [SPARK-19372][SQL] Fix throwing a Java exception at
 df.fliter() due to 64KB bytecode size limit

## What changes were proposed in this pull request?

When an expression for `df.filter()` has many nodes (e.g. 400), the size of Java bytecode for the generated Java code is more than 64KB. It produces an Java exception. As a result, the execution fails.
This PR continues to execute by calling `Expression.eval()` disabling code generation if an exception has been caught.

## How was this patch tested?

Add a test suite into `DataFrameSuite`

Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com>

Closes #17087 from kiszk/SPARK-19372.
---
 .../catalog/ExternalCatalogUtils.scala        |  2 +-
 .../expressions/codegen/CodeGenerator.scala   | 21 +++++++++++++----
 .../sql/catalyst/expressions/predicates.scala | 10 ++++----
 .../spark/sql/execution/SparkPlan.scala       | 23 ++++++++++++++++++-
 .../PartitioningAwareFileIndex.scala          |  2 +-
 .../org/apache/spark/sql/DataFrameSuite.scala | 11 +++++++++
 .../sql/sources/SimpleTextRelation.scala      |  2 +-
 7 files changed, 59 insertions(+), 12 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
index 3ca9e6a8da5b5..1fc3a654cfeb1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
@@ -155,7 +155,7 @@ object ExternalCatalogUtils {
         })
 
       inputPartitions.filter { p =>
-        boundPredicate(p.toRow(partitionSchema, defaultTimeZoneId))
+        boundPredicate.eval(p.toRow(partitionSchema, defaultTimeZoneId))
       }
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 760ead42c762c..f8da78b5f5e3e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -27,7 +27,10 @@ import scala.language.existentials
 import scala.util.control.NonFatal
 
 import com.google.common.cache.{CacheBuilder, CacheLoader}
-import org.codehaus.janino.{ByteArrayClassLoader, ClassBodyEvaluator, SimpleCompiler}
+import com.google.common.util.concurrent.{ExecutionError, UncheckedExecutionException}
+import org.apache.commons.lang3.exception.ExceptionUtils
+import org.codehaus.commons.compiler.CompileException
+import org.codehaus.janino.{ByteArrayClassLoader, ClassBodyEvaluator, JaninoRuntimeException, SimpleCompiler}
 import org.codehaus.janino.util.ClassFile
 
 import org.apache.spark.{SparkEnv, TaskContext, TaskKilledException}
@@ -899,8 +902,14 @@ object CodeGenerator extends Logging {
   /**
    * Compile the Java source code into a Java class, using Janino.
    */
-  def compile(code: CodeAndComment): GeneratedClass = {
+  def compile(code: CodeAndComment): GeneratedClass = try {
     cache.get(code)
+  } catch {
+    // Cache.get() may wrap the original exception. See the following URL
+    // http://google.github.io/guava/releases/14.0/api/docs/com/google/common/cache/
+    //   Cache.html#get(K,%20java.util.concurrent.Callable)
+    case e @ (_: UncheckedExecutionException | _: ExecutionError) =>
+      throw e.getCause
   }
 
   /**
@@ -951,10 +960,14 @@ object CodeGenerator extends Logging {
       evaluator.cook("generated.java", code.body)
       recordCompilationStats(evaluator)
     } catch {
-      case e: Exception =>
+      case e: JaninoRuntimeException =>
         val msg = s"failed to compile: $e\n$formatted"
         logError(msg, e)
-        throw new Exception(msg, e)
+        throw new JaninoRuntimeException(msg, e)
+      case e: CompileException =>
+        val msg = s"failed to compile: $e\n$formatted"
+        logError(msg, e)
+        throw new CompileException(msg, e.getLocation)
     }
     evaluator.getClazz().newInstance().asInstanceOf[GeneratedClass]
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 5034566132f7a..c15ee2ab270bc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -20,20 +20,22 @@ package org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
+import org.apache.spark.sql.catalyst.expressions.codegen.{Predicate => BasePredicate}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util.TypeUtils
 import org.apache.spark.sql.types._
 
 
 object InterpretedPredicate {
-  def create(expression: Expression, inputSchema: Seq[Attribute]): (InternalRow => Boolean) =
+  def create(expression: Expression, inputSchema: Seq[Attribute]): InterpretedPredicate =
     create(BindReferences.bindReference(expression, inputSchema))
 
-  def create(expression: Expression): (InternalRow => Boolean) = {
-    (r: InternalRow) => expression.eval(r).asInstanceOf[Boolean]
-  }
+  def create(expression: Expression): InterpretedPredicate = new InterpretedPredicate(expression)
 }
 
+case class InterpretedPredicate(expression: Expression) extends BasePredicate {
+  override def eval(r: InternalRow): Boolean = expression.eval(r).asInstanceOf[Boolean]
+}
 
 /**
  * An [[Expression]] that returns a boolean value.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
index cadab37a449aa..c4ed96640eb19 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
@@ -22,6 +22,9 @@ import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, Da
 import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.ExecutionContext
 
+import org.codehaus.commons.compiler.CompileException
+import org.codehaus.janino.JaninoRuntimeException
+
 import org.apache.spark.{broadcast, SparkEnv}
 import org.apache.spark.internal.Logging
 import org.apache.spark.io.CompressionCodec
@@ -353,9 +356,27 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
     GenerateMutableProjection.generate(expressions, inputSchema, useSubexprElimination)
   }
 
+  private def genInterpretedPredicate(
+      expression: Expression, inputSchema: Seq[Attribute]): InterpretedPredicate = {
+    val str = expression.toString
+    val logMessage = if (str.length > 256) {
+      str.substring(0, 256 - 3) + "..."
+    } else {
+      str
+    }
+    logWarning(s"Codegen disabled for this expression:\n $logMessage")
+    InterpretedPredicate.create(expression, inputSchema)
+  }
+
   protected def newPredicate(
       expression: Expression, inputSchema: Seq[Attribute]): GenPredicate = {
-    GeneratePredicate.generate(expression, inputSchema)
+    try {
+      GeneratePredicate.generate(expression, inputSchema)
+    } catch {
+      case e @ (_: JaninoRuntimeException | _: CompileException)
+          if sqlContext == null || sqlContext.conf.wholeStageFallback =>
+        genInterpretedPredicate(expression, inputSchema)
+    }
   }
 
   protected def newOrdering(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
index ffd7f6c750f85..6b6f6388d54e8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
@@ -177,7 +177,7 @@ abstract class PartitioningAwareFileIndex(
       })
 
       val selected = partitions.filter {
-        case PartitionPath(values, _) => boundPredicate(values)
+        case PartitionPath(values, _) => boundPredicate.eval(values)
       }
       logInfo {
         val total = partitions.length
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index b4893b56a8a84..e12e3969732e5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -1732,4 +1732,15 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
       .filter($"x1".isNotNull || !$"y".isin("a!"))
       .count
   }
+
+  test("SPARK-19372: Filter can be executed w/o generated code due to JVM code size limit") {
+    val N = 400
+    val rows = Seq(Row.fromSeq(Seq.fill(N)("string")))
+    val schema = StructType(Seq.tabulate(N)(i => StructField(s"_c$i", StringType)))
+    val df = spark.createDataFrame(spark.sparkContext.makeRDD(rows), schema)
+
+    val filter = (0 until N)
+      .foldLeft(lit(false))((e, index) => e.or(df.col(df.columns(index)) =!= "string"))
+    df.filter(filter).count
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala
index 9f4009bfe402a..60a4638f610b3 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala
@@ -103,7 +103,7 @@ class SimpleTextSource extends TextBasedFileFormat with DataSourceRegister {
               // `Cast`ed values are always of internal types (e.g. UTF8String instead of String)
               Cast(Literal(value), dataType).eval()
           })
-        }.filter(predicate).map(projection)
+        }.filter(predicate.eval).map(projection)
 
       // Appends partition values
       val fullOutput = requiredSchema.toAttributes ++ partitionSchema.toAttributes

From 2b59ed4f1d4e859d5987b6eaaee074260b2a12f8 Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Fri, 26 May 2017 13:33:23 -0700
Subject: [PATCH 0925/1204] [SPARK-20844] Remove experimental from Structured
 Streaming APIs

Now that Structured Streaming has been out for several Spark release and has large production use cases, the `Experimental` label is no longer appropriate.  I've left `InterfaceStability.Evolving` however, as I think we may make a few changes to the pluggable Source & Sink API in Spark 2.3.

Author: Michael Armbrust <michael@databricks.com>

Closes #18065 from marmbrus/streamingGA.
---
 .../structured-streaming-programming-guide.md |  4 +-
 python/pyspark/sql/context.py                 |  4 +-
 python/pyspark/sql/dataframe.py               |  6 +--
 python/pyspark/sql/session.py                 |  4 +-
 python/pyspark/sql/streaming.py               | 42 +++++++++----------
 .../spark/sql/streaming/OutputMode.java       |  3 --
 .../apache/spark/sql/streaming/Trigger.java   |  7 ----
 .../scala/org/apache/spark/sql/Dataset.scala  |  2 -
 .../org/apache/spark/sql/ForeachWriter.scala  |  4 +-
 .../org/apache/spark/sql/SQLContext.scala     |  2 -
 .../org/apache/spark/sql/SparkSession.scala   |  2 -
 .../org/apache/spark/sql/functions.scala      |  8 +---
 .../sql/streaming/DataStreamReader.scala      |  3 +-
 .../sql/streaming/DataStreamWriter.scala      |  4 +-
 .../spark/sql/streaming/ProcessingTime.scala  |  6 +--
 .../spark/sql/streaming/StreamingQuery.scala  |  4 +-
 .../streaming/StreamingQueryException.scala   |  4 +-
 .../streaming/StreamingQueryListener.scala    | 14 +------
 .../sql/streaming/StreamingQueryManager.scala |  6 +--
 .../sql/streaming/StreamingQueryStatus.scala  |  4 +-
 .../apache/spark/sql/streaming/progress.scala | 10 +----
 21 files changed, 42 insertions(+), 101 deletions(-)

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index bd01be944460b..6a25c9939c264 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -1,6 +1,6 @@
 ---
 layout: global
-displayTitle: Structured Streaming Programming Guide [Experimental]
+displayTitle: Structured Streaming Programming Guide
 title: Structured Streaming Programming Guide
 ---
 
@@ -10,7 +10,7 @@ title: Structured Streaming Programming Guide
 # Overview
 Structured Streaming is a scalable and fault-tolerant stream processing engine built on the Spark SQL engine. You can express your streaming computation the same way you would express a batch computation on static data. The Spark SQL engine will take care of running it incrementally and continuously and updating the final result as streaming data continues to arrive. You can use the [Dataset/DataFrame API](sql-programming-guide.html) in Scala, Java, Python or R to express streaming aggregations, event-time windows, stream-to-batch joins, etc. The computation is executed on the same optimized Spark SQL engine. Finally, the system ensures end-to-end exactly-once fault-tolerance guarantees through checkpointing and Write Ahead Logs. In short, *Structured Streaming provides fast, scalable, fault-tolerant, end-to-end exactly-once stream processing without the user having to reason about streaming.*
 
-**Structured Streaming is still ALPHA in Spark 2.1** and the APIs are still experimental. In this guide, we are going to walk you through the programming model and the APIs. First, let's start with a simple example - a streaming word count.
+In this guide, we are going to walk you through the programming model and the APIs. First, let's start with a simple example - a streaming word count.
 
 # Quick Example
 Let’s say you want to maintain a running word count of text data received from a data server listening on a TCP socket. Let’s see how you can express this using Structured Streaming. You can see the full code in
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index fdb7abbad4e5f..20c449caba1f9 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -470,7 +470,7 @@ def readStream(self):
         Returns a :class:`DataStreamReader` that can be used to read data streams
         as a streaming :class:`DataFrame`.
 
-        .. note:: Experimental.
+        .. note:: Evolving.
 
         :return: :class:`DataStreamReader`
 
@@ -486,7 +486,7 @@ def streams(self):
         """Returns a :class:`StreamingQueryManager` that allows managing all the
         :class:`StreamingQuery` StreamingQueries active on `this` context.
 
-        .. note:: Experimental.
+        .. note:: Evolving.
         """
         from pyspark.sql.streaming import StreamingQueryManager
         return StreamingQueryManager(self._ssql_ctx.streams())
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index d62ba9623b445..8a59fcda180e7 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -209,7 +209,7 @@ def writeStream(self):
         Interface for saving the content of the streaming :class:`DataFrame` out into external
         storage.
 
-        .. note:: Experimental.
+        .. note:: Evolving.
 
         :return: :class:`DataStreamWriter`
         """
@@ -285,7 +285,7 @@ def isStreaming(self):
         :func:`collect`) will throw an :class:`AnalysisException` when there is a streaming
         source present.
 
-        .. note:: Experimental
+        .. note:: Evolving
         """
         return self._jdf.isStreaming()
 
@@ -359,7 +359,7 @@ def withWatermark(self, eventTime, delayThreshold):
             latest record that has been processed in the form of an interval
             (e.g. "1 minute" or "5 hours").
 
-        .. note:: Experimental
+        .. note:: Evolving
 
         >>> sdf.select('name', sdf.time.cast('timestamp')).withWatermark('time', '10 minutes')
         DataFrame[name: string, time: timestamp]
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index c1bf2bd76fb7c..e3bf0f35ea15e 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -586,7 +586,7 @@ def readStream(self):
         Returns a :class:`DataStreamReader` that can be used to read data streams
         as a streaming :class:`DataFrame`.
 
-        .. note:: Experimental.
+        .. note:: Evolving.
 
         :return: :class:`DataStreamReader`
         """
@@ -598,7 +598,7 @@ def streams(self):
         """Returns a :class:`StreamingQueryManager` that allows managing all the
         :class:`StreamingQuery` StreamingQueries active on `this` context.
 
-        .. note:: Experimental.
+        .. note:: Evolving.
 
         :return: :class:`StreamingQueryManager`
         """
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index 65b59d480da36..76e8c4f47d8ad 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -41,7 +41,7 @@ class StreamingQuery(object):
     A handle to a query that is executing continuously in the background as new data arrives.
     All these methods are thread-safe.
 
-    .. note:: Experimental
+    .. note:: Evolving
 
     .. versionadded:: 2.0
     """
@@ -197,7 +197,7 @@ def exception(self):
 class StreamingQueryManager(object):
     """A class to manage all the :class:`StreamingQuery` StreamingQueries active.
 
-    .. note:: Experimental
+    .. note:: Evolving
 
     .. versionadded:: 2.0
     """
@@ -283,7 +283,7 @@ class DataStreamReader(OptionUtils):
     (e.g. file systems, key-value stores, etc). Use :func:`spark.readStream`
     to access this.
 
-    .. note:: Experimental.
+    .. note:: Evolving.
 
     .. versionadded:: 2.0
     """
@@ -300,7 +300,7 @@ def _df(self, jdf):
     def format(self, source):
         """Specifies the input data source format.
 
-        .. note:: Experimental.
+        .. note:: Evolving.
 
         :param source: string, name of the data source, e.g. 'json', 'parquet'.
 
@@ -317,7 +317,7 @@ def schema(self, schema):
         By specifying the schema here, the underlying data source can skip the schema
         inference step, and thus speed up data loading.
 
-        .. note:: Experimental.
+        .. note:: Evolving.
 
         :param schema: a :class:`pyspark.sql.types.StructType` object
 
@@ -340,7 +340,7 @@ def option(self, key, value):
                 in the JSON/CSV datasources or partition values.
                 If it isn't set, it uses the default value, session local timezone.
 
-        .. note:: Experimental.
+        .. note:: Evolving.
 
         >>> s = spark.readStream.option("x", 1)
         """
@@ -356,7 +356,7 @@ def options(self, **options):
                 in the JSON/CSV datasources or partition values.
                 If it isn't set, it uses the default value, session local timezone.
 
-        .. note:: Experimental.
+        .. note:: Evolving.
 
         >>> s = spark.readStream.options(x="1", y=2)
         """
@@ -368,7 +368,7 @@ def options(self, **options):
     def load(self, path=None, format=None, schema=None, **options):
         """Loads a data stream from a data source and returns it as a :class`DataFrame`.
 
-        .. note:: Experimental.
+        .. note:: Evolving.
 
         :param path: optional string for file-system backed data sources.
         :param format: optional string for format of the data source. Default to 'parquet'.
@@ -411,7 +411,7 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
         If the ``schema`` parameter is not specified, this function goes
         through the input once to determine the input schema.
 
-        .. note:: Experimental.
+        .. note:: Evolving.
 
         :param path: string represents path to the JSON dataset,
                      or RDD of Strings storing JSON objects.
@@ -488,7 +488,7 @@ def parquet(self, path):
                 Parquet part-files. This will override ``spark.sql.parquet.mergeSchema``. \
                 The default value is specified in ``spark.sql.parquet.mergeSchema``.
 
-        .. note:: Experimental.
+        .. note:: Evolving.
 
         >>> parquet_sdf = spark.readStream.schema(sdf_schema).parquet(tempfile.mkdtemp())
         >>> parquet_sdf.isStreaming
@@ -511,7 +511,7 @@ def text(self, path):
 
         Each line in the text file is a new row in the resulting DataFrame.
 
-        .. note:: Experimental.
+        .. note:: Evolving.
 
         :param paths: string, or list of strings, for input path(s).
 
@@ -539,7 +539,7 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
         ``inferSchema`` is enabled. To avoid going through the entire data once, disable
         ``inferSchema`` option or specify the schema explicitly using ``schema``.
 
-        .. note:: Experimental.
+        .. note:: Evolving.
 
         :param path: string, or list of strings, for input path(s).
         :param schema: an optional :class:`pyspark.sql.types.StructType` for the input schema.
@@ -637,7 +637,7 @@ class DataStreamWriter(object):
     (e.g. file systems, key-value stores, etc). Use :func:`DataFrame.writeStream`
     to access this.
 
-    .. note:: Experimental.
+    .. note:: Evolving.
 
     .. versionadded:: 2.0
     """
@@ -665,7 +665,7 @@ def outputMode(self, outputMode):
            written to the sink every time there are some updates. If the query doesn't contain
            aggregations, it will be equivalent to `append` mode.
 
-       .. note:: Experimental.
+       .. note:: Evolving.
 
         >>> writer = sdf.writeStream.outputMode('append')
         """
@@ -678,7 +678,7 @@ def outputMode(self, outputMode):
     def format(self, source):
         """Specifies the underlying output data source.
 
-        .. note:: Experimental.
+        .. note:: Evolving.
 
         :param source: string, name of the data source, which for now can be 'parquet'.
 
@@ -696,7 +696,7 @@ def option(self, key, value):
                 timestamps in the JSON/CSV datasources or partition values.
                 If it isn't set, it uses the default value, session local timezone.
 
-        .. note:: Experimental.
+        .. note:: Evolving.
         """
         self._jwrite = self._jwrite.option(key, to_str(value))
         return self
@@ -710,7 +710,7 @@ def options(self, **options):
                 timestamps in the JSON/CSV datasources or partition values.
                 If it isn't set, it uses the default value, session local timezone.
 
-       .. note:: Experimental.
+       .. note:: Evolving.
         """
         for k in options:
             self._jwrite = self._jwrite.option(k, to_str(options[k]))
@@ -723,7 +723,7 @@ def partitionBy(self, *cols):
         If specified, the output is laid out on the file system similar
         to Hive's partitioning scheme.
 
-        .. note:: Experimental.
+        .. note:: Evolving.
 
         :param cols: name of columns
 
@@ -739,7 +739,7 @@ def queryName(self, queryName):
         :func:`start`. This name must be unique among all the currently active queries
         in the associated SparkSession.
 
-        .. note:: Experimental.
+        .. note:: Evolving.
 
         :param queryName: unique name for the query
 
@@ -756,7 +756,7 @@ def trigger(self, processingTime=None, once=None):
         """Set the trigger for the stream query. If this is not set it will run the query as fast
         as possible, which is equivalent to setting the trigger to ``processingTime='0 seconds'``.
 
-        .. note:: Experimental.
+        .. note:: Evolving.
 
         :param processingTime: a processing time interval as a string, e.g. '5 seconds', '1 minute'.
 
@@ -794,7 +794,7 @@ def start(self, path=None, format=None, outputMode=None, partitionBy=None, query
         If ``format`` is not specified, the default data source configured by
         ``spark.sql.sources.default`` will be used.
 
-        .. note:: Experimental.
+        .. note:: Evolving.
 
         :param path: the path in a Hadoop supported file system
         :param format: the format used to save
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/OutputMode.java b/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/OutputMode.java
index 3f7cdb293e0fa..8410abd14fd59 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/OutputMode.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/OutputMode.java
@@ -22,14 +22,11 @@
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes;
 
 /**
- * :: Experimental ::
- *
  * OutputMode is used to what data will be written to a streaming sink when there is
  * new data available in a streaming DataFrame/Dataset.
  *
  * @since 2.0.0
  */
-@Experimental
 @InterfaceStability.Evolving
 public class OutputMode {
 
diff --git a/sql/core/src/main/java/org/apache/spark/sql/streaming/Trigger.java b/sql/core/src/main/java/org/apache/spark/sql/streaming/Trigger.java
index 3e3997fa9bfec..d31790a285687 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/streaming/Trigger.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/streaming/Trigger.java
@@ -21,22 +21,18 @@
 
 import scala.concurrent.duration.Duration;
 
-import org.apache.spark.annotation.Experimental;
 import org.apache.spark.annotation.InterfaceStability;
 import org.apache.spark.sql.execution.streaming.OneTimeTrigger$;
 
 /**
- * :: Experimental ::
  * Policy used to indicate how often results should be produced by a [[StreamingQuery]].
  *
  * @since 2.0.0
  */
-@Experimental
 @InterfaceStability.Evolving
 public class Trigger {
 
   /**
-   * :: Experimental ::
    * A trigger policy that runs a query periodically based on an interval in processing time.
    * If `interval` is 0, the query will run as fast as possible.
    *
@@ -47,7 +43,6 @@ public static Trigger ProcessingTime(long intervalMs) {
   }
 
   /**
-   * :: Experimental ::
    * (Java-friendly)
    * A trigger policy that runs a query periodically based on an interval in processing time.
    * If `interval` is 0, the query will run as fast as possible.
@@ -64,7 +59,6 @@ public static Trigger ProcessingTime(long interval, TimeUnit timeUnit) {
   }
 
   /**
-   * :: Experimental ::
    * (Scala-friendly)
    * A trigger policy that runs a query periodically based on an interval in processing time.
    * If `duration` is 0, the query will run as fast as possible.
@@ -80,7 +74,6 @@ public static Trigger ProcessingTime(Duration interval) {
   }
 
   /**
-   * :: Experimental ::
    * A trigger policy that runs a query periodically based on an interval in processing time.
    * If `interval` is effectively 0, the query will run as fast as possible.
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 50c598f506af3..68d1a6b84b9a0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -2695,13 +2695,11 @@ class Dataset[T] private[sql](
   }
 
   /**
-   * :: Experimental ::
    * Interface for saving the content of the streaming Dataset out into external storage.
    *
    * @group basic
    * @since 2.0.0
    */
-  @Experimental
   @InterfaceStability.Evolving
   def writeStream: DataStreamWriter[T] = {
     if (!isStreaming) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala
index 372ec262f5764..86e02e98c01f3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala
@@ -17,10 +17,9 @@
 
 package org.apache.spark.sql
 
-import org.apache.spark.annotation.{Experimental, InterfaceStability}
+import org.apache.spark.annotation.InterfaceStability
 
 /**
- * :: Experimental ::
  * A class to consume data generated by a `StreamingQuery`. Typically this is used to send the
  * generated data to external systems. Each partition will use a new deserialized instance, so you
  * usually should do all the initialization (e.g. opening a connection or initiating a transaction)
@@ -66,7 +65,6 @@ import org.apache.spark.annotation.{Experimental, InterfaceStability}
  * }}}
  * @since 2.0.0
  */
-@Experimental
 @InterfaceStability.Evolving
 abstract class ForeachWriter[T] extends Serializable {
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index cc2983987eb90..7fde6e9469e5e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -505,7 +505,6 @@ class SQLContext private[sql](val sparkSession: SparkSession)
 
 
   /**
-   * :: Experimental ::
    * Returns a `DataStreamReader` that can be used to read streaming data in as a `DataFrame`.
    * {{{
    *   sparkSession.readStream.parquet("/path/to/directory/of/parquet/files")
@@ -514,7 +513,6 @@ class SQLContext private[sql](val sparkSession: SparkSession)
    *
    * @since 2.0.0
    */
-  @Experimental
   @InterfaceStability.Evolving
   def readStream: DataStreamReader = sparkSession.readStream
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index a519492ed8f4f..d2bf350711936 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -636,7 +636,6 @@ class SparkSession private(
   def read: DataFrameReader = new DataFrameReader(self)
 
   /**
-   * :: Experimental ::
    * Returns a `DataStreamReader` that can be used to read streaming data in as a `DataFrame`.
    * {{{
    *   sparkSession.readStream.parquet("/path/to/directory/of/parquet/files")
@@ -645,7 +644,6 @@ class SparkSession private(
    *
    * @since 2.0.0
    */
-  @Experimental
   @InterfaceStability.Evolving
   def readStream: DataStreamReader = new DataStreamReader(self)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index a4f79fb27d5af..74e9560b342c0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -23,7 +23,7 @@ import scala.reflect.runtime.universe.{typeTag, TypeTag}
 import scala.util.Try
 import scala.util.control.NonFatal
 
-import org.apache.spark.annotation.{Experimental, InterfaceStability}
+import org.apache.spark.annotation.InterfaceStability
 import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.catalyst.analysis.{Star, UnresolvedFunction}
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
@@ -2794,8 +2794,6 @@ object functions {
    * @group datetime_funcs
    * @since 2.0.0
    */
-  @Experimental
-  @InterfaceStability.Evolving
   def window(
       timeColumn: Column,
       windowDuration: String,
@@ -2848,8 +2846,6 @@ object functions {
    * @group datetime_funcs
    * @since 2.0.0
    */
-  @Experimental
-  @InterfaceStability.Evolving
   def window(timeColumn: Column, windowDuration: String, slideDuration: String): Column = {
     window(timeColumn, windowDuration, slideDuration, "0 second")
   }
@@ -2887,8 +2883,6 @@ object functions {
    * @group datetime_funcs
    * @since 2.0.0
    */
-  @Experimental
-  @InterfaceStability.Evolving
   def window(timeColumn: Column, windowDuration: String): Column = {
     window(timeColumn, windowDuration, windowDuration, "0 second")
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index 746b2a94f102d..766776230257d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -21,7 +21,7 @@ import java.util.Locale
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.annotation.{Experimental, InterfaceStability}
+import org.apache.spark.annotation.InterfaceStability
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, SparkSession}
 import org.apache.spark.sql.execution.command.DDLUtils
@@ -35,7 +35,6 @@ import org.apache.spark.sql.types.StructType
  *
  * @since 2.0.0
  */
-@Experimental
 @InterfaceStability.Evolving
 final class DataStreamReader private[sql](sparkSession: SparkSession) extends Logging {
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index 0d2611f9bbcce..14e7df672cc58 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -21,7 +21,7 @@ import java.util.Locale
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.annotation.{Experimental, InterfaceStability}
+import org.apache.spark.annotation.InterfaceStability
 import org.apache.spark.sql.{AnalysisException, Dataset, ForeachWriter}
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
 import org.apache.spark.sql.execution.command.DDLUtils
@@ -29,13 +29,11 @@ import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.streaming.{ForeachSink, MemoryPlan, MemorySink}
 
 /**
- * :: Experimental ::
  * Interface used to write a streaming `Dataset` to external storage systems (e.g. file systems,
  * key-value stores, etc). Use `Dataset.writeStream` to access this.
  *
  * @since 2.0.0
  */
-@Experimental
 @InterfaceStability.Evolving
 final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ProcessingTime.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ProcessingTime.scala
index 9ba1fc01cbd30..a033575d3d38f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ProcessingTime.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ProcessingTime.scala
@@ -23,11 +23,10 @@ import scala.concurrent.duration.Duration
 
 import org.apache.commons.lang3.StringUtils
 
-import org.apache.spark.annotation.{Experimental, InterfaceStability}
+import org.apache.spark.annotation.InterfaceStability
 import org.apache.spark.unsafe.types.CalendarInterval
 
 /**
- * :: Experimental ::
  * A trigger that runs a query periodically based on the processing time. If `interval` is 0,
  * the query will run as fast as possible.
  *
@@ -49,7 +48,6 @@ import org.apache.spark.unsafe.types.CalendarInterval
  *
  * @since 2.0.0
  */
-@Experimental
 @InterfaceStability.Evolving
 @deprecated("use Trigger.ProcessingTime(intervalMs)", "2.2.0")
 case class ProcessingTime(intervalMs: Long) extends Trigger {
@@ -57,12 +55,10 @@ case class ProcessingTime(intervalMs: Long) extends Trigger {
 }
 
 /**
- * :: Experimental ::
  * Used to create [[ProcessingTime]] triggers for [[StreamingQuery]]s.
  *
  * @since 2.0.0
  */
-@Experimental
 @InterfaceStability.Evolving
 @deprecated("use Trigger.ProcessingTime(intervalMs)", "2.2.0")
 object ProcessingTime {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
index 12a1bb1db5779..f2dfbe42260d7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
@@ -19,16 +19,14 @@ package org.apache.spark.sql.streaming
 
 import java.util.UUID
 
-import org.apache.spark.annotation.{Experimental, InterfaceStability}
+import org.apache.spark.annotation.InterfaceStability
 import org.apache.spark.sql.SparkSession
 
 /**
- * :: Experimental ::
  * A handle to a query that is executing continuously in the background as new data arrives.
  * All these methods are thread-safe.
  * @since 2.0.0
  */
-@Experimental
 @InterfaceStability.Evolving
 trait StreamingQuery {
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala
index 234a1166a1953..03aeb14de502a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala
@@ -17,10 +17,9 @@
 
 package org.apache.spark.sql.streaming
 
-import org.apache.spark.annotation.{Experimental, InterfaceStability}
+import org.apache.spark.annotation.InterfaceStability
 
 /**
- * :: Experimental ::
  * Exception that stopped a [[StreamingQuery]]. Use `cause` get the actual exception
  * that caused the failure.
  * @param message     Message of this exception
@@ -29,7 +28,6 @@ import org.apache.spark.annotation.{Experimental, InterfaceStability}
  * @param endOffset   Ending offset in json of the range of data in exception occurred
  * @since 2.0.0
  */
-@Experimental
 @InterfaceStability.Evolving
 class StreamingQueryException private[sql](
     private val queryDebugString: String,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
index c376913516ef7..6aa82b89ede81 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
@@ -19,17 +19,15 @@ package org.apache.spark.sql.streaming
 
 import java.util.UUID
 
-import org.apache.spark.annotation.{Experimental, InterfaceStability}
+import org.apache.spark.annotation.InterfaceStability
 import org.apache.spark.scheduler.SparkListenerEvent
 
 /**
- * :: Experimental ::
  * Interface for listening to events related to [[StreamingQuery StreamingQueries]].
  * @note The methods are not thread-safe as they may be called from different threads.
  *
  * @since 2.0.0
  */
-@Experimental
 @InterfaceStability.Evolving
 abstract class StreamingQueryListener {
 
@@ -66,32 +64,26 @@ abstract class StreamingQueryListener {
 
 
 /**
- * :: Experimental ::
  * Companion object of [[StreamingQueryListener]] that defines the listener events.
  * @since 2.0.0
  */
-@Experimental
 @InterfaceStability.Evolving
 object StreamingQueryListener {
 
   /**
-   * :: Experimental ::
    * Base type of [[StreamingQueryListener]] events
    * @since 2.0.0
    */
-  @Experimental
   @InterfaceStability.Evolving
   trait Event extends SparkListenerEvent
 
   /**
-   * :: Experimental ::
    * Event representing the start of a query
    * @param id An unique query id that persists across restarts. See `StreamingQuery.id()`.
    * @param runId A query id that is unique for every start/restart. See `StreamingQuery.runId()`.
    * @param name User-specified name of the query, null if not specified.
    * @since 2.1.0
    */
-  @Experimental
   @InterfaceStability.Evolving
   class QueryStartedEvent private[sql](
       val id: UUID,
@@ -99,17 +91,14 @@ object StreamingQueryListener {
       val name: String) extends Event
 
   /**
-   * :: Experimental ::
    * Event representing any progress updates in a query.
    * @param progress The query progress updates.
    * @since 2.1.0
    */
-  @Experimental
   @InterfaceStability.Evolving
   class QueryProgressEvent private[sql](val progress: StreamingQueryProgress) extends Event
 
   /**
-   * :: Experimental ::
    * Event representing that termination of a query.
    *
    * @param id An unique query id that persists across restarts. See `StreamingQuery.id()`.
@@ -118,7 +107,6 @@ object StreamingQueryListener {
    *                  with an exception. Otherwise, it will be `None`.
    * @since 2.1.0
    */
-  @Experimental
   @InterfaceStability.Evolving
   class QueryTerminatedEvent private[sql](
       val id: UUID,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index 7810d9f6e9642..002c45413b4c2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -24,7 +24,7 @@ import scala.collection.mutable
 
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.annotation.{Experimental, InterfaceStability}
+import org.apache.spark.annotation.InterfaceStability
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, DataFrame, SparkSession}
 import org.apache.spark.sql.catalyst.analysis.UnsupportedOperationChecker
@@ -34,12 +34,10 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.util.{Clock, SystemClock, Utils}
 
 /**
- * :: Experimental ::
- * A class to manage all the [[StreamingQuery]] active on a `SparkSession`.
+ * A class to manage all the [[StreamingQuery]] active in a `SparkSession`.
  *
  * @since 2.0.0
  */
-@Experimental
 @InterfaceStability.Evolving
 class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Logging {
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
index 687b1267825fe..a0c9bcc8929eb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
@@ -22,10 +22,9 @@ import org.json4s.JsonAST.JValue
 import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
-import org.apache.spark.annotation.{Experimental, InterfaceStability}
+import org.apache.spark.annotation.InterfaceStability
 
 /**
- * :: Experimental ::
  * Reports information about the instantaneous status of a streaming query.
  *
  * @param message A human readable description of what the stream is currently doing.
@@ -35,7 +34,6 @@ import org.apache.spark.annotation.{Experimental, InterfaceStability}
  *
  * @since 2.1.0
  */
-@Experimental
 @InterfaceStability.Evolving
 class StreamingQueryStatus protected[sql](
     val message: String,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
index 35fe6b8605fad..fb590e7df996b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
@@ -29,13 +29,11 @@ import org.json4s.JsonAST.JValue
 import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
-import org.apache.spark.annotation.{Experimental, InterfaceStability}
+import org.apache.spark.annotation.InterfaceStability
 
 /**
- * :: Experimental ::
  * Information about updates made to stateful operators in a [[StreamingQuery]] during a trigger.
  */
-@Experimental
 @InterfaceStability.Evolving
 class StateOperatorProgress private[sql](
     val numRowsTotal: Long,
@@ -54,7 +52,6 @@ class StateOperatorProgress private[sql](
 }
 
 /**
- * :: Experimental ::
  * Information about progress made in the execution of a [[StreamingQuery]] during
  * a trigger. Each event relates to processing done for a single trigger of the streaming
  * query. Events are emitted even when no new data is available to be processed.
@@ -80,7 +77,6 @@ class StateOperatorProgress private[sql](
  * @param sources detailed statistics on data being read from each of the streaming sources.
  * @since 2.1.0
  */
-@Experimental
 @InterfaceStability.Evolving
 class StreamingQueryProgress private[sql](
   val id: UUID,
@@ -139,7 +135,6 @@ class StreamingQueryProgress private[sql](
 }
 
 /**
- * :: Experimental ::
  * Information about progress made for a source in the execution of a [[StreamingQuery]]
  * during a trigger. See [[StreamingQueryProgress]] for more information.
  *
@@ -152,7 +147,6 @@ class StreamingQueryProgress private[sql](
  *                               Spark.
  * @since 2.1.0
  */
-@Experimental
 @InterfaceStability.Evolving
 class SourceProgress protected[sql](
   val description: String,
@@ -191,14 +185,12 @@ class SourceProgress protected[sql](
 }
 
 /**
- * :: Experimental ::
  * Information about progress made for a sink in the execution of a [[StreamingQuery]]
  * during a trigger. See [[StreamingQueryProgress]] for more information.
  *
  * @param description Description of the source corresponding to this status.
  * @since 2.1.0
  */
-@Experimental
 @InterfaceStability.Evolving
 class SinkProgress protected[sql](
     val description: String) extends Serializable {

From 30922dec8a8cc598b6715f85281591208a91df00 Mon Sep 17 00:00:00 2001
From: zero323 <zero323@users.noreply.github.com>
Date: Fri, 26 May 2017 15:01:01 -0700
Subject: [PATCH 0926/1204] [SPARK-20694][DOCS][SQL] Document DataFrameWriter
 partitionBy, bucketBy and sortBy in SQL guide

## What changes were proposed in this pull request?

- Add Scala, Python and Java examples for `partitionBy`, `sortBy` and `bucketBy`.
- Add _Bucketing, Sorting and Partitioning_ section to SQL Programming Guide
- Remove bucketing from Unsupported Hive Functionalities.

## How was this patch tested?

Manual tests, docs build.

Author: zero323 <zero323@users.noreply.github.com>

Closes #17938 from zero323/DOCS-BUCKETING-AND-PARTITIONING.

(cherry picked from commit ae33abf71b353c638487948b775e966c7127cd46)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 docs/sql-programming-guide.md                 | 108 ++++++++++++++++++
 .../sql/JavaSQLDataSourceExample.java         |  16 +++
 examples/src/main/python/sql/datasource.py    |  20 ++++
 .../examples/sql/SQLDataSourceExample.scala   |  16 +++
 4 files changed, 160 insertions(+)

diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index adb12d2489a57..314ff6ef80d29 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -581,6 +581,114 @@ Starting from Spark 2.1, persistent datasource tables have per-partition metadat
 
 Note that partition information is not gathered by default when creating external datasource tables (those with a `path` option). To sync the partition information in the metastore, you can invoke `MSCK REPAIR TABLE`.
 
+### Bucketing, Sorting and Partitioning
+
+For file-based data source, it is also possible to bucket and sort or partition the output. 
+Bucketing and sorting are applicable only to persistent tables:
+
+<div class="codetabs">
+
+<div data-lang="scala"  markdown="1">
+{% include_example write_sorting_and_bucketing scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala %}
+</div>
+
+<div data-lang="java"  markdown="1">
+{% include_example write_sorting_and_bucketing java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java %}
+</div>
+
+<div data-lang="python"  markdown="1">
+{% include_example write_sorting_and_bucketing python/sql/datasource.py %}
+</div>
+
+<div data-lang="sql"  markdown="1">
+
+{% highlight sql %}
+
+CREATE TABLE users_bucketed_by_name(
+  name STRING,
+  favorite_color STRING,
+  favorite_numbers array<integer>
+) USING parquet 
+CLUSTERED BY(name) INTO 42 BUCKETS;
+
+{% endhighlight %}
+
+</div>
+
+</div>
+
+while partitioning can be used with both `save` and `saveAsTable` when using the Dataset APIs.
+
+
+<div class="codetabs">
+
+<div data-lang="scala"  markdown="1">
+{% include_example write_partitioning scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala %}
+</div>
+
+<div data-lang="java"  markdown="1">
+{% include_example write_partitioning java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java %}
+</div>
+
+<div data-lang="python"  markdown="1">
+{% include_example write_partitioning python/sql/datasource.py %}
+</div>
+
+<div data-lang="sql"  markdown="1">
+
+{% highlight sql %}
+
+CREATE TABLE users_by_favorite_color(
+  name STRING, 
+  favorite_color STRING,
+  favorite_numbers array<integer>
+) USING csv PARTITIONED BY(favorite_color);
+
+{% endhighlight %}
+
+</div>
+
+</div>
+
+It is possible to use both partitioning and bucketing for a single table:
+
+<div class="codetabs">
+
+<div data-lang="scala"  markdown="1">
+{% include_example write_partition_and_bucket scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala %}
+</div>
+
+<div data-lang="java"  markdown="1">
+{% include_example write_partition_and_bucket java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java %}
+</div>
+
+<div data-lang="python"  markdown="1">
+{% include_example write_partition_and_bucket python/sql/datasource.py %}
+</div>
+
+<div data-lang="sql"  markdown="1">
+
+{% highlight sql %}
+
+CREATE TABLE users_bucketed_and_partitioned(
+  name STRING,
+  favorite_color STRING,
+  favorite_numbers array<integer>
+) USING parquet 
+PARTITIONED BY (favorite_color)
+CLUSTERED BY(name) SORTED BY (favorite_numbers) INTO 42 BUCKETS;
+
+{% endhighlight %}
+
+</div>
+
+</div>
+
+`partitionBy` creates a directory structure as described in the [Partition Discovery](#partition-discovery) section.
+Thus, it has limited applicability to columns with high cardinality. In contrast 
+ `bucketBy` distributes
+data across a fixed number of buckets and can be used when a number of unique values is unbounded.
+
 ## Parquet Files
 
 [Parquet](http://parquet.io) is a columnar format that is supported by many other data processing systems.
diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java
index b66abaed66000..706856b5215e4 100644
--- a/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java
@@ -120,6 +120,22 @@ private static void runBasicDataSourceExample(SparkSession spark) {
     Dataset<Row> sqlDF =
       spark.sql("SELECT * FROM parquet.`examples/src/main/resources/users.parquet`");
     // $example off:direct_sql$
+    // $example on:write_sorting_and_bucketing$
+    peopleDF.write().bucketBy(42, "name").sortBy("age").saveAsTable("people_bucketed");
+    // $example off:write_sorting_and_bucketing$
+    // $example on:write_partitioning$
+    usersDF.write().partitionBy("favorite_color").format("parquet").save("namesPartByColor.parquet");
+    // $example off:write_partitioning$
+    // $example on:write_partition_and_bucket$
+    peopleDF
+      .write()
+      .partitionBy("favorite_color")
+      .bucketBy(42, "name")
+      .saveAsTable("people_partitioned_bucketed");
+    // $example off:write_partition_and_bucket$
+
+    spark.sql("DROP TABLE IF EXISTS people_bucketed");
+    spark.sql("DROP TABLE IF EXISTS people_partitioned_bucketed");
   }
 
   private static void runBasicParquetExample(SparkSession spark) {
diff --git a/examples/src/main/python/sql/datasource.py b/examples/src/main/python/sql/datasource.py
index e4abb0933345d..8777cca66bfe9 100644
--- a/examples/src/main/python/sql/datasource.py
+++ b/examples/src/main/python/sql/datasource.py
@@ -35,15 +35,35 @@ def basic_datasource_example(spark):
     df.select("name", "favorite_color").write.save("namesAndFavColors.parquet")
     # $example off:generic_load_save_functions$
 
+    # $example on:write_partitioning$
+    df.write.partitionBy("favorite_color").format("parquet").save("namesPartByColor.parquet")
+    # $example off:write_partitioning$
+
+    # $example on:write_partition_and_bucket$
+    df = spark.read.parquet("examples/src/main/resources/users.parquet")
+    (df
+        .write
+        .partitionBy("favorite_color")
+        .bucketBy(42, "name")
+        .saveAsTable("people_partitioned_bucketed"))
+    # $example off:write_partition_and_bucket$
+
     # $example on:manual_load_options$
     df = spark.read.load("examples/src/main/resources/people.json", format="json")
     df.select("name", "age").write.save("namesAndAges.parquet", format="parquet")
     # $example off:manual_load_options$
 
+    # $example on:write_sorting_and_bucketing$
+    df.write.bucketBy(42, "name").sortBy("age").saveAsTable("people_bucketed")
+    # $example off:write_sorting_and_bucketing$
+
     # $example on:direct_sql$
     df = spark.sql("SELECT * FROM parquet.`examples/src/main/resources/users.parquet`")
     # $example off:direct_sql$
 
+    spark.sql("DROP TABLE IF EXISTS people_bucketed")
+    spark.sql("DROP TABLE IF EXISTS people_partitioned_bucketed")
+
 
 def parquet_example(spark):
     # $example on:basic_parquet_example$
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala b/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala
index ad74da72bd5e6..6ff03bdb22129 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala
@@ -52,6 +52,22 @@ object SQLDataSourceExample {
     // $example on:direct_sql$
     val sqlDF = spark.sql("SELECT * FROM parquet.`examples/src/main/resources/users.parquet`")
     // $example off:direct_sql$
+    // $example on:write_sorting_and_bucketing$
+    peopleDF.write.bucketBy(42, "name").sortBy("age").saveAsTable("people_bucketed")
+    // $example off:write_sorting_and_bucketing$
+    // $example on:write_partitioning$
+    usersDF.write.partitionBy("favorite_color").format("parquet").save("namesPartByColor.parquet")
+    // $example off:write_partitioning$
+    // $example on:write_partition_and_bucket$
+    peopleDF
+      .write
+      .partitionBy("favorite_color")
+      .bucketBy(42, "name")
+      .saveAsTable("people_partitioned_bucketed")
+    // $example off:write_partition_and_bucket$
+
+    spark.sql("DROP TABLE IF EXISTS people_bucketed")
+    spark.sql("DROP TABLE IF EXISTS people_partitioned_bucketed")
   }
 
   private def runBasicParquetExample(spark: SparkSession): Unit = {

From fc799d730304c6a176636b414fc15184e89367d7 Mon Sep 17 00:00:00 2001
From: Yu Peng <loneknightpy@gmail.com>
Date: Fri, 26 May 2017 16:28:36 -0700
Subject: [PATCH 0927/1204] [SPARK-10643][CORE] Make spark-submit download
 remote files to local in client mode

## What changes were proposed in this pull request?

This PR makes spark-submit script download remote files to local file system for local/standalone client mode.

## How was this patch tested?

- Unit tests
- Manual tests by adding s3a jar and testing against file on s3.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Yu Peng <loneknightpy@gmail.com>

Closes #18078 from loneknightpy/download-jar-in-spark-submit.

(cherry picked from commit 4af37812915763ac3bfd91a600a7f00a4b84d29a)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../org/apache/spark/deploy/SparkSubmit.scala | 48 +++++++++-
 .../spark/deploy/SparkSubmitSuite.scala       | 95 ++++++++++++++++++-
 2 files changed, 140 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 77005aa9040b5..c60a2a1706d5a 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -20,6 +20,7 @@ package org.apache.spark.deploy
 import java.io.{File, IOException}
 import java.lang.reflect.{InvocationTargetException, Modifier, UndeclaredThrowableException}
 import java.net.URL
+import java.nio.file.Files
 import java.security.PrivilegedExceptionAction
 import java.text.ParseException
 
@@ -28,7 +29,8 @@ import scala.collection.mutable.{ArrayBuffer, HashMap, Map}
 import scala.util.Properties
 
 import org.apache.commons.lang3.StringUtils
-import org.apache.hadoop.fs.Path
+import org.apache.hadoop.conf.{Configuration => HadoopConfiguration}
+import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hadoop.security.UserGroupInformation
 import org.apache.ivy.Ivy
 import org.apache.ivy.core.LogOptions
@@ -308,6 +310,15 @@ object SparkSubmit extends CommandLineUtils {
       RPackageUtils.checkAndBuildRPackage(args.jars, printStream, args.verbose)
     }
 
+    // In client mode, download remote files.
+    if (deployMode == CLIENT) {
+      val hadoopConf = new HadoopConfiguration()
+      args.primaryResource = Option(args.primaryResource).map(downloadFile(_, hadoopConf)).orNull
+      args.jars = Option(args.jars).map(downloadFileList(_, hadoopConf)).orNull
+      args.pyFiles = Option(args.pyFiles).map(downloadFileList(_, hadoopConf)).orNull
+      args.files = Option(args.files).map(downloadFileList(_, hadoopConf)).orNull
+    }
+
     // Require all python files to be local, so we can add them to the PYTHONPATH
     // In YARN cluster mode, python files are distributed as regular files, which can be non-local.
     // In Mesos cluster mode, non-local python files are automatically downloaded by Mesos.
@@ -825,6 +836,41 @@ object SparkSubmit extends CommandLineUtils {
                       .mkString(",")
     if (merged == "") null else merged
   }
+
+  /**
+   * Download a list of remote files to temp local files. If the file is local, the original file
+   * will be returned.
+   * @param fileList A comma separated file list.
+   * @return A comma separated local files list.
+   */
+  private[deploy] def downloadFileList(
+      fileList: String,
+      hadoopConf: HadoopConfiguration): String = {
+    require(fileList != null, "fileList cannot be null.")
+    fileList.split(",").map(downloadFile(_, hadoopConf)).mkString(",")
+  }
+
+  /**
+   * Download a file from the remote to a local temporary directory. If the input path points to
+   * a local path, returns it with no operation.
+   */
+  private[deploy] def downloadFile(path: String, hadoopConf: HadoopConfiguration): String = {
+    require(path != null, "path cannot be null.")
+    val uri = Utils.resolveURI(path)
+    uri.getScheme match {
+      case "file" | "local" =>
+        path
+
+      case _ =>
+        val fs = FileSystem.get(uri, hadoopConf)
+        val tmpFile = new File(Files.createTempDirectory("tmp").toFile, uri.getPath)
+        // scalastyle:off println
+        printStream.println(s"Downloading ${uri.toString} to ${tmpFile.getAbsolutePath}.")
+        // scalastyle:on println
+        fs.copyToLocalFile(new Path(uri), new Path(tmpFile.getAbsolutePath))
+        Utils.resolveURI(tmpFile.getAbsolutePath).toString
+    }
+  }
 }
 
 /** Provides utility functions to be used inside SparkSubmit. */
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index a43839a8815f9..6e9721c45931a 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -18,12 +18,15 @@
 package org.apache.spark.deploy
 
 import java.io._
+import java.net.URI
 import java.nio.charset.StandardCharsets
 
 import scala.collection.mutable.ArrayBuffer
 import scala.io.Source
 
 import com.google.common.io.ByteStreams
+import org.apache.commons.io.{FilenameUtils, FileUtils}
+import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.scalatest.{BeforeAndAfterEach, Matchers}
 import org.scalatest.concurrent.Timeouts
@@ -535,7 +538,7 @@ class SparkSubmitSuite
 
   test("resolves command line argument paths correctly") {
     val jars = "/jar1,/jar2"                 // --jars
-    val files = "hdfs:/file1,file2"          // --files
+    val files = "local:/file1,file2"          // --files
     val archives = "file:/archive1,archive2" // --archives
     val pyFiles = "py-file1,py-file2"        // --py-files
 
@@ -587,7 +590,7 @@ class SparkSubmitSuite
 
   test("resolves config paths correctly") {
     val jars = "/jar1,/jar2" // spark.jars
-    val files = "hdfs:/file1,file2" // spark.files / spark.yarn.dist.files
+    val files = "local:/file1,file2" // spark.files / spark.yarn.dist.files
     val archives = "file:/archive1,archive2" // spark.yarn.dist.archives
     val pyFiles = "py-file1,py-file2" // spark.submit.pyFiles
 
@@ -705,6 +708,87 @@ class SparkSubmitSuite
   }
   // scalastyle:on println
 
+  private def checkDownloadedFile(sourcePath: String, outputPath: String): Unit = {
+    if (sourcePath == outputPath) {
+      return
+    }
+
+    val sourceUri = new URI(sourcePath)
+    val outputUri = new URI(outputPath)
+    assert(outputUri.getScheme === "file")
+
+    // The path and filename are preserved.
+    assert(outputUri.getPath.endsWith(sourceUri.getPath))
+    assert(FileUtils.readFileToString(new File(outputUri.getPath)) ===
+      FileUtils.readFileToString(new File(sourceUri.getPath)))
+  }
+
+  private def deleteTempOutputFile(outputPath: String): Unit = {
+    val outputFile = new File(new URI(outputPath).getPath)
+    if (outputFile.exists) {
+      outputFile.delete()
+    }
+  }
+
+  test("downloadFile - invalid url") {
+    intercept[IOException] {
+      SparkSubmit.downloadFile("abc:/my/file", new Configuration())
+    }
+  }
+
+  test("downloadFile - file doesn't exist") {
+    val hadoopConf = new Configuration()
+    // Set s3a implementation to local file system for testing.
+    hadoopConf.set("fs.s3a.impl", "org.apache.spark.deploy.TestFileSystem")
+    // Disable file system impl cache to make sure the test file system is picked up.
+    hadoopConf.set("fs.s3a.impl.disable.cache", "true")
+    intercept[FileNotFoundException] {
+      SparkSubmit.downloadFile("s3a:/no/such/file", hadoopConf)
+    }
+  }
+
+  test("downloadFile does not download local file") {
+    // empty path is considered as local file.
+    assert(SparkSubmit.downloadFile("", new Configuration()) === "")
+    assert(SparkSubmit.downloadFile("/local/file", new Configuration()) === "/local/file")
+  }
+
+  test("download one file to local") {
+    val jarFile = File.createTempFile("test", ".jar")
+    jarFile.deleteOnExit()
+    val content = "hello, world"
+    FileUtils.write(jarFile, content)
+    val hadoopConf = new Configuration()
+    // Set s3a implementation to local file system for testing.
+    hadoopConf.set("fs.s3a.impl", "org.apache.spark.deploy.TestFileSystem")
+    // Disable file system impl cache to make sure the test file system is picked up.
+    hadoopConf.set("fs.s3a.impl.disable.cache", "true")
+    val sourcePath = s"s3a://${jarFile.getAbsolutePath}"
+    val outputPath = SparkSubmit.downloadFile(sourcePath, hadoopConf)
+    checkDownloadedFile(sourcePath, outputPath)
+    deleteTempOutputFile(outputPath)
+  }
+
+  test("download list of files to local") {
+    val jarFile = File.createTempFile("test", ".jar")
+    jarFile.deleteOnExit()
+    val content = "hello, world"
+    FileUtils.write(jarFile, content)
+    val hadoopConf = new Configuration()
+    // Set s3a implementation to local file system for testing.
+    hadoopConf.set("fs.s3a.impl", "org.apache.spark.deploy.TestFileSystem")
+    // Disable file system impl cache to make sure the test file system is picked up.
+    hadoopConf.set("fs.s3a.impl.disable.cache", "true")
+    val sourcePaths = Seq("/local/file", s"s3a://${jarFile.getAbsolutePath}")
+    val outputPaths = SparkSubmit.downloadFileList(sourcePaths.mkString(","), hadoopConf).split(",")
+
+    assert(outputPaths.length === sourcePaths.length)
+    sourcePaths.zip(outputPaths).foreach { case (sourcePath, outputPath) =>
+      checkDownloadedFile(sourcePath, outputPath)
+      deleteTempOutputFile(outputPath)
+    }
+  }
+
   // NOTE: This is an expensive operation in terms of time (10 seconds+). Use sparingly.
   private def runSparkSubmit(args: Seq[String]): Unit = {
     val sparkHome = sys.props.getOrElse("spark.test.home", fail("spark.test.home is not set!"))
@@ -807,3 +891,10 @@ object UserClasspathFirstTest {
     }
   }
 }
+
+class TestFileSystem extends org.apache.hadoop.fs.LocalFileSystem {
+  override def copyToLocalFile(src: Path, dst: Path): Unit = {
+    // Ignore the scheme for testing.
+    super.copyToLocalFile(new Path(src.toUri.getPath), dst)
+  }
+}

From 39f76657ef2967f4c87230e06cbbb1611c276375 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Sat, 27 May 2017 10:57:43 +0800
Subject: [PATCH 0928/1204] [SPARK-19659][CORE][FOLLOW-UP] Fetch big blocks to
 disk when shuffle-read

## What changes were proposed in this pull request?

This PR includes some minor improvement for the comments and tests in https://github.com/apache/spark/pull/16989

## How was this patch tested?

N/A

Author: Wenchen Fan <wenchen@databricks.com>

Closes #18117 from cloud-fan/follow.

(cherry picked from commit 1d62f8aca82601506c44b6fd852f4faf3602d7e2)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../storage/ShuffleBlockFetcherIterator.scala |  9 ++--
 .../ShuffleBlockFetcherIteratorSuite.scala    | 50 ++++++++++---------
 2 files changed, 31 insertions(+), 28 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
index ee35060926555..bded3a1e4eb54 100644
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
@@ -214,11 +214,12 @@ final class ShuffleBlockFetcherIterator(
       }
     }
 
-    // Shuffle remote blocks to disk when the request is too large.
-    // TODO: Encryption and compression should be considered.
+    // Fetch remote shuffle blocks to disk when the request is too large. Since the shuffle data is
+    // already encrypted and compressed over the wire(w.r.t. the related configs), we can just fetch
+    // the data and write it to file directly.
     if (req.size > maxReqSizeShuffleToMem) {
-      val shuffleFiles = blockIds.map {
-        bId => blockManager.diskBlockManager.createTempLocalBlock()._2
+      val shuffleFiles = blockIds.map { _ =>
+        blockManager.diskBlockManager.createTempLocalBlock()._2
       }.toArray
       shuffleFilesSet ++= shuffleFiles
       shuffleClient.fetchBlocks(address.host, address.port, address.executorId, blockIds.toArray,
diff --git a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
index 1f813a909fb8b..559b3faab8fd2 100644
--- a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
@@ -36,6 +36,7 @@ import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer}
 import org.apache.spark.network.shuffle.BlockFetchingListener
 import org.apache.spark.network.util.LimitedInputStream
 import org.apache.spark.shuffle.FetchFailedException
+import org.apache.spark.util.Utils
 
 
 class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodTester {
@@ -420,9 +421,10 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     doReturn(localBmId).when(blockManager).blockManagerId
 
     val diskBlockManager = mock(classOf[DiskBlockManager])
+    val tmpDir = Utils.createTempDir()
     doReturn{
-      var blockId = new TempLocalBlockId(UUID.randomUUID())
-      (blockId, new File(blockId.name))
+      val blockId = TempLocalBlockId(UUID.randomUUID())
+      (blockId, new File(tmpDir, blockId.name))
     }.when(diskBlockManager).createTempLocalBlock()
     doReturn(diskBlockManager).when(blockManager).diskBlockManager
 
@@ -443,34 +445,34 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
         }
       })
 
+    def fetchShuffleBlock(blocksByAddress: Seq[(BlockManagerId, Seq[(BlockId, Long)])]): Unit = {
+      // Set `maxBytesInFlight` and `maxReqsInFlight` to `Int.MaxValue`, so that during the
+      // construction of `ShuffleBlockFetcherIterator`, all requests to fetch remote shuffle blocks
+      // are issued. The `maxReqSizeShuffleToMem` is hard-coded as 200 here.
+      new ShuffleBlockFetcherIterator(
+        TaskContext.empty(),
+        transfer,
+        blockManager,
+        blocksByAddress,
+        (_, in) => in,
+        maxBytesInFlight = Int.MaxValue,
+        maxReqsInFlight = Int.MaxValue,
+        maxReqSizeShuffleToMem = 200,
+        detectCorrupt = true)
+    }
+
     val blocksByAddress1 = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
       (remoteBmId, remoteBlocks.keys.map(blockId => (blockId, 100L)).toSeq))
-    // Set maxReqSizeShuffleToMem to be 200.
-    val iterator1 = new ShuffleBlockFetcherIterator(
-      TaskContext.empty(),
-      transfer,
-      blockManager,
-      blocksByAddress1,
-      (_, in) => in,
-      Int.MaxValue,
-      Int.MaxValue,
-      200,
-      true)
+    fetchShuffleBlock(blocksByAddress1)
+    // `maxReqSizeShuffleToMem` is 200, which is greater than the block size 100, so don't fetch
+    // shuffle block to disk.
     assert(shuffleFiles === null)
 
     val blocksByAddress2 = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
       (remoteBmId, remoteBlocks.keys.map(blockId => (blockId, 300L)).toSeq))
-    // Set maxReqSizeShuffleToMem to be 200.
-    val iterator2 = new ShuffleBlockFetcherIterator(
-      TaskContext.empty(),
-      transfer,
-      blockManager,
-      blocksByAddress2,
-      (_, in) => in,
-      Int.MaxValue,
-      Int.MaxValue,
-      200,
-      true)
+    fetchShuffleBlock(blocksByAddress2)
+    // `maxReqSizeShuffleToMem` is 200, which is smaller than the block size 300, so fetch
+    // shuffle block to disk.
     assert(shuffleFiles != null)
   }
 }

From f2408bdd7a0950385ee1364e006d55bfa6e5a200 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Fri, 26 May 2017 22:25:38 -0700
Subject: [PATCH 0929/1204] [SPARK-20843][CORE] Add a config to set driver
 terminate timeout

## What changes were proposed in this pull request?

Add a `worker` configuration to set how long to wait before forcibly killing driver.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #18126 from zsxwing/SPARK-20843.

(cherry picked from commit 6c1dbd6fc8d49acf7c1c902d2ebf89ed5e788a4e)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../scala/org/apache/spark/deploy/worker/DriverRunner.scala    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
index e878c10183f61..58a181128eb4d 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
@@ -57,7 +57,8 @@ private[deploy] class DriverRunner(
   @volatile private[worker] var finalException: Option[Exception] = None
 
   // Timeout to wait for when trying to terminate a driver.
-  private val DRIVER_TERMINATE_TIMEOUT_MS = 10 * 1000
+  private val DRIVER_TERMINATE_TIMEOUT_MS =
+    conf.getTimeAsMs("spark.worker.driverTerminateTimeout", "10s")
 
   // Decoupled for testing
   def setClock(_clock: Clock): Unit = {

From ebd72f453aa0b4f68760d28b3e93e6dd33856659 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Fri, 26 May 2017 22:25:38 -0700
Subject: [PATCH 0930/1204] [SPARK-20843][CORE] Add a config to set driver
 terminate timeout

## What changes were proposed in this pull request?

Add a `worker` configuration to set how long to wait before forcibly killing driver.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #18126 from zsxwing/SPARK-20843.

(cherry picked from commit 6c1dbd6fc8d49acf7c1c902d2ebf89ed5e788a4e)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../scala/org/apache/spark/deploy/worker/DriverRunner.scala    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
index e878c10183f61..58a181128eb4d 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
@@ -57,7 +57,8 @@ private[deploy] class DriverRunner(
   @volatile private[worker] var finalException: Option[Exception] = None
 
   // Timeout to wait for when trying to terminate a driver.
-  private val DRIVER_TERMINATE_TIMEOUT_MS = 10 * 1000
+  private val DRIVER_TERMINATE_TIMEOUT_MS =
+    conf.getTimeAsMs("spark.worker.driverTerminateTimeout", "10s")
 
   // Decoupled for testing
   def setClock(_clock: Clock): Unit = {

From 38f37c557367e474000d7d7ae659ab8a8df69097 Mon Sep 17 00:00:00 2001
From: "NICHOLAS T. MARION" <nmarion@us.ibm.com>
Date: Wed, 10 May 2017 10:59:57 +0100
Subject: [PATCH 0931/1204] [SPARK-20393][WEBU UI] Strengthen Spark to prevent
 XSS vulnerabilities

Add stripXSS and stripXSSMap to Spark Core's UIUtils. Calling these functions at any point that getParameter is called against a HttpServletRequest.

Unit tests, IBM Security AppScan Standard no longer showing vulnerabilities, manual verification of WebUI pages.

Author: NICHOLAS T. MARION <nmarion@us.ibm.com>

Closes #17686 from n-marion/xss-fix.

(cherry picked from commit b512233a457092b0e2a39d0b42cb021abc69d375)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../spark/deploy/history/HistoryPage.scala    |  3 +-
 .../deploy/master/ui/ApplicationPage.scala    |  3 +-
 .../spark/deploy/master/ui/MasterPage.scala   |  6 ++-
 .../spark/deploy/worker/ui/LogPage.scala      | 30 ++++++++------
 .../scala/org/apache/spark/ui/UIUtils.scala   | 21 ++++++++++
 .../ui/exec/ExecutorThreadDumpPage.scala      |  4 +-
 .../apache/spark/ui/jobs/AllJobsPage.scala    | 14 ++++---
 .../org/apache/spark/ui/jobs/JobPage.scala    |  3 +-
 .../org/apache/spark/ui/jobs/JobsTab.scala    |  5 ++-
 .../org/apache/spark/ui/jobs/PoolPage.scala   |  3 +-
 .../org/apache/spark/ui/jobs/StagePage.scala  | 15 +++----
 .../org/apache/spark/ui/jobs/StageTable.scala | 15 +++----
 .../org/apache/spark/ui/jobs/StagesTab.scala  |  5 ++-
 .../org/apache/spark/ui/storage/RDDPage.scala | 13 ++++---
 .../org/apache/spark/ui/UIUtilsSuite.scala    | 39 +++++++++++++++++++
 .../spark/deploy/mesos/ui/DriverPage.scala    |  3 +-
 .../sql/execution/ui/ExecutionPage.scala      |  3 +-
 .../ui/ThriftServerSessionPage.scala          |  4 +-
 .../apache/spark/streaming/ui/BatchPage.scala |  5 ++-
 19 files changed, 140 insertions(+), 54 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
index 0e7a6c24d4fa5..af14717633409 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
@@ -26,8 +26,9 @@ import org.apache.spark.ui.{UIUtils, WebUIPage}
 private[history] class HistoryPage(parent: HistoryServer) extends WebUIPage("") {
 
   def render(request: HttpServletRequest): Seq[Node] = {
+    // stripXSS is called first to remove suspicious characters used in XSS attacks
     val requestedIncomplete =
-      Option(request.getParameter("showIncomplete")).getOrElse("false").toBoolean
+      Option(UIUtils.stripXSS(request.getParameter("showIncomplete"))).getOrElse("false").toBoolean
 
     val allAppsSize = parent.getApplicationList().count(_.completed != requestedIncomplete)
     val eventLogsUnderProcessCount = parent.getEventLogsUnderProcess()
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
index 18cff3125d6b4..57778701ff9eb 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
@@ -33,7 +33,8 @@ private[ui] class ApplicationPage(parent: MasterWebUI) extends WebUIPage("app")
 
   /** Executor details for a particular application */
   def render(request: HttpServletRequest): Seq[Node] = {
-    val appId = request.getParameter("appId")
+    // stripXSS is called first to remove suspicious characters used in XSS attacks
+    val appId = UIUtils.stripXSS(request.getParameter("appId"))
     val state = master.askWithRetry[MasterStateResponse](RequestMasterState)
     val app = state.activeApps.find(_.id == appId)
       .getOrElse(state.completedApps.find(_.id == appId).orNull)
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
index 3fb860582cc17..d26e99cda31ae 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
@@ -57,8 +57,10 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
   private def handleKillRequest(request: HttpServletRequest, action: String => Unit): Unit = {
     if (parent.killEnabled &&
         parent.master.securityMgr.checkModifyPermissions(request.getRemoteUser)) {
-      val killFlag = Option(request.getParameter("terminate")).getOrElse("false").toBoolean
-      val id = Option(request.getParameter("id"))
+      // stripXSS is called first to remove suspicious characters used in XSS attacks
+      val killFlag =
+        Option(UIUtils.stripXSS(request.getParameter("terminate"))).getOrElse("false").toBoolean
+      val id = Option(UIUtils.stripXSS(request.getParameter("id")))
       if (id.isDefined && killFlag) {
         action(id.get)
       }
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala
index 465c214362b25..63c0d7d418b60 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala
@@ -35,13 +35,16 @@ private[ui] class LogPage(parent: WorkerWebUI) extends WebUIPage("logPage") with
   private val supportedLogTypes = Set("stderr", "stdout")
   private val defaultBytes = 100 * 1024
 
+  // stripXSS is called first to remove suspicious characters used in XSS attacks
   def renderLog(request: HttpServletRequest): String = {
-    val appId = Option(request.getParameter("appId"))
-    val executorId = Option(request.getParameter("executorId"))
-    val driverId = Option(request.getParameter("driverId"))
-    val logType = request.getParameter("logType")
-    val offset = Option(request.getParameter("offset")).map(_.toLong)
-    val byteLength = Option(request.getParameter("byteLength")).map(_.toInt).getOrElse(defaultBytes)
+    val appId = Option(UIUtils.stripXSS(request.getParameter("appId")))
+    val executorId = Option(UIUtils.stripXSS(request.getParameter("executorId")))
+    val driverId = Option(UIUtils.stripXSS(request.getParameter("driverId")))
+    val logType = UIUtils.stripXSS(request.getParameter("logType"))
+    val offset = Option(UIUtils.stripXSS(request.getParameter("offset"))).map(_.toLong)
+    val byteLength =
+      Option(UIUtils.stripXSS(request.getParameter("byteLength"))).map(_.toInt)
+      .getOrElse(defaultBytes)
 
     val logDir = (appId, executorId, driverId) match {
       case (Some(a), Some(e), None) =>
@@ -57,13 +60,16 @@ private[ui] class LogPage(parent: WorkerWebUI) extends WebUIPage("logPage") with
     pre + logText
   }
 
+  // stripXSS is called first to remove suspicious characters used in XSS attacks
   def render(request: HttpServletRequest): Seq[Node] = {
-    val appId = Option(request.getParameter("appId"))
-    val executorId = Option(request.getParameter("executorId"))
-    val driverId = Option(request.getParameter("driverId"))
-    val logType = request.getParameter("logType")
-    val offset = Option(request.getParameter("offset")).map(_.toLong)
-    val byteLength = Option(request.getParameter("byteLength")).map(_.toInt).getOrElse(defaultBytes)
+    val appId = Option(UIUtils.stripXSS(request.getParameter("appId")))
+    val executorId = Option(UIUtils.stripXSS(request.getParameter("executorId")))
+    val driverId = Option(UIUtils.stripXSS(request.getParameter("driverId")))
+    val logType = UIUtils.stripXSS(request.getParameter("logType"))
+    val offset = Option(UIUtils.stripXSS(request.getParameter("offset"))).map(_.toLong)
+    val byteLength =
+      Option(UIUtils.stripXSS(request.getParameter("byteLength"))).map(_.toInt)
+      .getOrElse(defaultBytes)
 
     val (logDir, params, pageName) = (appId, executorId, driverId) match {
       case (Some(a), Some(e), None) =>
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index d161843dd2230..40aa7ff8a7864 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -25,6 +25,8 @@ import scala.util.control.NonFatal
 import scala.xml._
 import scala.xml.transform.{RewriteRule, RuleTransformer}
 
+import org.apache.commons.lang3.StringEscapeUtils
+
 import org.apache.spark.internal.Logging
 import org.apache.spark.ui.scope.RDDOperationGraph
 
@@ -34,6 +36,8 @@ private[spark] object UIUtils extends Logging {
   val TABLE_CLASS_STRIPED = TABLE_CLASS_NOT_STRIPED + " table-striped"
   val TABLE_CLASS_STRIPED_SORTABLE = TABLE_CLASS_STRIPED + " sortable"
 
+  private val NEWLINE_AND_SINGLE_QUOTE_REGEX = raw"(?i)(\r\n|\n|\r|%0D%0A|%0A|%0D|'|%27)".r
+
   // SimpleDateFormat is not thread-safe. Don't expose it to avoid improper use.
   private val dateFormat = new ThreadLocal[SimpleDateFormat]() {
     override def initialValue(): SimpleDateFormat =
@@ -524,4 +528,21 @@ private[spark] object UIUtils extends Logging {
       origHref
     }
   }
+
+  /**
+   * Remove suspicious characters of user input to prevent Cross-Site scripting (XSS) attacks
+   *
+   * For more information about XSS testing:
+   * https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet and
+   * https://www.owasp.org/index.php/Testing_for_Reflected_Cross_site_scripting_(OTG-INPVAL-001)
+   */
+  def stripXSS(requestParameter: String): String = {
+    if (requestParameter == null) {
+      null
+    } else {
+      // Remove new lines and single quotes, followed by escaping HTML version 4.0
+      StringEscapeUtils.escapeHtml4(
+        NEWLINE_AND_SINGLE_QUOTE_REGEX.replaceAllIn(requestParameter, ""))
+    }
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
index c6a07445f2a35..d1577d170a8aa 100644
--- a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
@@ -27,8 +27,10 @@ private[ui] class ExecutorThreadDumpPage(parent: ExecutorsTab) extends WebUIPage
 
   private val sc = parent.sc
 
+  // stripXSS is called first to remove suspicious characters used in XSS attacks
   def render(request: HttpServletRequest): Seq[Node] = {
-    val executorId = Option(request.getParameter("executorId")).map { executorId =>
+    val executorId =
+      Option(UIUtils.stripXSS(request.getParameter("executorId"))).map { executorId =>
       UIUtils.decodeURLParameter(executorId)
     }.getOrElse {
       throw new IllegalArgumentException(s"Missing executorId parameter")
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
index d9475c4c5d5f2..3f25a2deca4b5 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
@@ -220,18 +220,20 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") {
       jobTag: String,
       jobs: Seq[JobUIData],
       killEnabled: Boolean): Seq[Node] = {
-    val allParameters = request.getParameterMap.asScala.toMap
+    // stripXSS is called to remove suspicious characters used in XSS attacks
+    val allParameters = request.getParameterMap.asScala.toMap.mapValues(_.map(UIUtils.stripXSS))
     val parameterOtherTable = allParameters.filterNot(_._1.startsWith(jobTag))
       .map(para => para._1 + "=" + para._2(0))
 
     val someJobHasJobGroup = jobs.exists(_.jobGroup.isDefined)
     val jobIdTitle = if (someJobHasJobGroup) "Job Id (Job Group)" else "Job Id"
 
-    val parameterJobPage = request.getParameter(jobTag + ".page")
-    val parameterJobSortColumn = request.getParameter(jobTag + ".sort")
-    val parameterJobSortDesc = request.getParameter(jobTag + ".desc")
-    val parameterJobPageSize = request.getParameter(jobTag + ".pageSize")
-    val parameterJobPrevPageSize = request.getParameter(jobTag + ".prevPageSize")
+    // stripXSS is called first to remove suspicious characters used in XSS attacks
+    val parameterJobPage = UIUtils.stripXSS(request.getParameter(jobTag + ".page"))
+    val parameterJobSortColumn = UIUtils.stripXSS(request.getParameter(jobTag + ".sort"))
+    val parameterJobSortDesc = UIUtils.stripXSS(request.getParameter(jobTag + ".desc"))
+    val parameterJobPageSize = UIUtils.stripXSS(request.getParameter(jobTag + ".pageSize"))
+    val parameterJobPrevPageSize = UIUtils.stripXSS(request.getParameter(jobTag + ".prevPageSize"))
 
     val jobPage = Option(parameterJobPage).map(_.toInt).getOrElse(1)
     val jobSortColumn = Option(parameterJobSortColumn).map { sortColumn =>
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
index 0ff9e5e9411ca..c0b1ed934666c 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
@@ -187,7 +187,8 @@ private[ui] class JobPage(parent: JobsTab) extends WebUIPage("job") {
     val listener = parent.jobProgresslistener
 
     listener.synchronized {
-      val parameterId = request.getParameter("id")
+      // stripXSS is called first to remove suspicious characters used in XSS attacks
+      val parameterId = UIUtils.stripXSS(request.getParameter("id"))
       require(parameterId != null && parameterId.nonEmpty, "Missing id parameter")
 
       val jobId = parameterId.toInt
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala
index 620c54c2dc0a5..cc173381879a6 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ui.jobs
 import javax.servlet.http.HttpServletRequest
 
 import org.apache.spark.scheduler.SchedulingMode
-import org.apache.spark.ui.{SparkUI, SparkUITab}
+import org.apache.spark.ui.{SparkUI, SparkUITab, UIUtils}
 
 /** Web UI showing progress status of all jobs in the given SparkContext. */
 private[ui] class JobsTab(parent: SparkUI) extends SparkUITab(parent, "jobs") {
@@ -40,7 +40,8 @@ private[ui] class JobsTab(parent: SparkUI) extends SparkUITab(parent, "jobs") {
 
   def handleKillRequest(request: HttpServletRequest): Unit = {
     if (killEnabled && parent.securityManager.checkModifyPermissions(request.getRemoteUser)) {
-      val jobId = Option(request.getParameter("id")).map(_.toInt)
+      // stripXSS is called first to remove suspicious characters used in XSS attacks
+      val jobId = Option(UIUtils.stripXSS(request.getParameter("id"))).map(_.toInt)
       jobId.foreach { id =>
         if (jobProgresslistener.activeJobs.contains(id)) {
           sc.foreach(_.cancelJob(id))
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala
index 8ee70d27cc09f..b164f32b62e97 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala
@@ -31,7 +31,8 @@ private[ui] class PoolPage(parent: StagesTab) extends WebUIPage("pool") {
 
   def render(request: HttpServletRequest): Seq[Node] = {
     listener.synchronized {
-      val poolName = Option(request.getParameter("poolname")).map { poolname =>
+      // stripXSS is called first to remove suspicious characters used in XSS attacks
+      val poolName = Option(UIUtils.stripXSS(request.getParameter("poolname"))).map { poolname =>
         UIUtils.decodeURLParameter(poolname)
       }.getOrElse {
         throw new IllegalArgumentException(s"Missing poolname parameter")
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
index 412ddfa9fad35..1db4d8b003688 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
@@ -87,17 +87,18 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
 
   def render(request: HttpServletRequest): Seq[Node] = {
     progressListener.synchronized {
-      val parameterId = request.getParameter("id")
+      // stripXSS is called first to remove suspicious characters used in XSS attacks
+      val parameterId = UIUtils.stripXSS(request.getParameter("id"))
       require(parameterId != null && parameterId.nonEmpty, "Missing id parameter")
 
-      val parameterAttempt = request.getParameter("attempt")
+      val parameterAttempt = UIUtils.stripXSS(request.getParameter("attempt"))
       require(parameterAttempt != null && parameterAttempt.nonEmpty, "Missing attempt parameter")
 
-      val parameterTaskPage = request.getParameter("task.page")
-      val parameterTaskSortColumn = request.getParameter("task.sort")
-      val parameterTaskSortDesc = request.getParameter("task.desc")
-      val parameterTaskPageSize = request.getParameter("task.pageSize")
-      val parameterTaskPrevPageSize = request.getParameter("task.prevPageSize")
+      val parameterTaskPage = UIUtils.stripXSS(request.getParameter("task.page"))
+      val parameterTaskSortColumn = UIUtils.stripXSS(request.getParameter("task.sort"))
+      val parameterTaskSortDesc = UIUtils.stripXSS(request.getParameter("task.desc"))
+      val parameterTaskPageSize = UIUtils.stripXSS(request.getParameter("task.pageSize"))
+      val parameterTaskPrevPageSize = UIUtils.stripXSS(request.getParameter("task.prevPageSize"))
 
       val taskPage = Option(parameterTaskPage).map(_.toInt).getOrElse(1)
       val taskSortColumn = Option(parameterTaskSortColumn).map { sortColumn =>
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
index e1fa9043b6a15..f08d7abb184d7 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
@@ -42,15 +42,17 @@ private[ui] class StageTableBase(
     isFairScheduler: Boolean,
     killEnabled: Boolean,
     isFailedStage: Boolean) {
-  val allParameters = request.getParameterMap().asScala.toMap
+  // stripXSS is called to remove suspicious characters used in XSS attacks
+  val allParameters = request.getParameterMap.asScala.toMap.mapValues(_.map(UIUtils.stripXSS))
   val parameterOtherTable = allParameters.filterNot(_._1.startsWith(stageTag))
     .map(para => para._1 + "=" + para._2(0))
 
-  val parameterStagePage = request.getParameter(stageTag + ".page")
-  val parameterStageSortColumn = request.getParameter(stageTag + ".sort")
-  val parameterStageSortDesc = request.getParameter(stageTag + ".desc")
-  val parameterStagePageSize = request.getParameter(stageTag + ".pageSize")
-  val parameterStagePrevPageSize = request.getParameter(stageTag + ".prevPageSize")
+  val parameterStagePage = UIUtils.stripXSS(request.getParameter(stageTag + ".page"))
+  val parameterStageSortColumn = UIUtils.stripXSS(request.getParameter(stageTag + ".sort"))
+  val parameterStageSortDesc = UIUtils.stripXSS(request.getParameter(stageTag + ".desc"))
+  val parameterStagePageSize = UIUtils.stripXSS(request.getParameter(stageTag + ".pageSize"))
+  val parameterStagePrevPageSize =
+    UIUtils.stripXSS(request.getParameter(stageTag + ".prevPageSize"))
 
   val stagePage = Option(parameterStagePage).map(_.toInt).getOrElse(1)
   val stageSortColumn = Option(parameterStageSortColumn).map { sortColumn =>
@@ -512,4 +514,3 @@ private[ui] class StageDataSource(
     }
   }
 }
-
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala
index c1f25114371f1..6baebb3eb3a51 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ui.jobs
 import javax.servlet.http.HttpServletRequest
 
 import org.apache.spark.scheduler.SchedulingMode
-import org.apache.spark.ui.{SparkUI, SparkUITab}
+import org.apache.spark.ui.{SparkUI, SparkUITab, UIUtils}
 
 /** Web UI showing progress status of all stages in the given SparkContext. */
 private[ui] class StagesTab(parent: SparkUI) extends SparkUITab(parent, "stages") {
@@ -39,7 +39,8 @@ private[ui] class StagesTab(parent: SparkUI) extends SparkUITab(parent, "stages"
 
   def handleKillRequest(request: HttpServletRequest): Unit = {
     if (killEnabled && parent.securityManager.checkModifyPermissions(request.getRemoteUser)) {
-      val stageId = Option(request.getParameter("id")).map(_.toInt)
+      // stripXSS is called first to remove suspicious characters used in XSS attacks
+      val stageId = Option(UIUtils.stripXSS(request.getParameter("id"))).map(_.toInt)
       stageId.foreach { id =>
         if (progressListener.activeStages.contains(id)) {
           sc.foreach(_.cancelStage(id))
diff --git a/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
index 227e940c9c50c..68a0ef92a6062 100644
--- a/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
@@ -31,14 +31,15 @@ private[ui] class RDDPage(parent: StorageTab) extends WebUIPage("rdd") {
   private val listener = parent.listener
 
   def render(request: HttpServletRequest): Seq[Node] = {
-    val parameterId = request.getParameter("id")
+    // stripXSS is called first to remove suspicious characters used in XSS attacks
+    val parameterId = UIUtils.stripXSS(request.getParameter("id"))
     require(parameterId != null && parameterId.nonEmpty, "Missing id parameter")
 
-    val parameterBlockPage = request.getParameter("block.page")
-    val parameterBlockSortColumn = request.getParameter("block.sort")
-    val parameterBlockSortDesc = request.getParameter("block.desc")
-    val parameterBlockPageSize = request.getParameter("block.pageSize")
-    val parameterBlockPrevPageSize = request.getParameter("block.prevPageSize")
+    val parameterBlockPage = UIUtils.stripXSS(request.getParameter("block.page"))
+    val parameterBlockSortColumn = UIUtils.stripXSS(request.getParameter("block.sort"))
+    val parameterBlockSortDesc = UIUtils.stripXSS(request.getParameter("block.desc"))
+    val parameterBlockPageSize = UIUtils.stripXSS(request.getParameter("block.pageSize"))
+    val parameterBlockPrevPageSize = UIUtils.stripXSS(request.getParameter("block.prevPageSize"))
 
     val blockPage = Option(parameterBlockPage).map(_.toInt).getOrElse(1)
     val blockSortColumn = Option(parameterBlockSortColumn).getOrElse("Block Name")
diff --git a/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala b/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala
index 6335d905c0fbf..5ae07a53271ab 100644
--- a/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala
@@ -133,6 +133,45 @@ class UIUtilsSuite extends SparkFunSuite {
     assert(decoded2 === decodeURLParameter(decoded2))
   }
 
+  test("SPARK-20393: Prevent newline characters in parameters.") {
+    val encoding = "Encoding:base64%0d%0a%0d%0aPGh0bWw%2bjcmlwdD48L2h0bWw%2b"
+    val stripEncoding = "Encoding:base64PGh0bWw%2bjcmlwdD48L2h0bWw%2b"
+
+    assert(stripEncoding === stripXSS(encoding))
+  }
+
+  test("SPARK-20393: Prevent script from parameters running on page.") {
+    val scriptAlert = """>"'><script>alert(401)<%2Fscript>"""
+    val stripScriptAlert = "&gt;&quot;&gt;&lt;script&gt;alert(401)&lt;%2Fscript&gt;"
+
+    assert(stripScriptAlert === stripXSS(scriptAlert))
+  }
+
+  test("SPARK-20393: Prevent javascript from parameters running on page.") {
+    val javascriptAlert =
+      """app-20161208133404-0002<iframe+src%3Djavascript%3Aalert(1705)>"""
+    val stripJavascriptAlert =
+      "app-20161208133404-0002&lt;iframe+src%3Djavascript%3Aalert(1705)&gt;"
+
+    assert(stripJavascriptAlert === stripXSS(javascriptAlert))
+  }
+
+  test("SPARK-20393: Prevent links from parameters on page.") {
+    val link =
+      """stdout'"><iframe+id%3D1131+src%3Dhttp%3A%2F%2Fdemo.test.net%2Fphishing.html>"""
+    val stripLink =
+      "stdout&quot;&gt;&lt;iframe+id%3D1131+src%3Dhttp%3A%2F%2Fdemo.test.net%2Fphishing.html&gt;"
+
+    assert(stripLink === stripXSS(link))
+  }
+
+  test("SPARK-20393: Prevent popups from parameters on page.") {
+    val popup = """stdout'%2Balert(60)%2B'"""
+    val stripPopup = "stdout%2Balert(60)%2B"
+
+    assert(stripPopup === stripXSS(popup))
+  }
+
   private def verify(
       desc: String,
       expected: Node,
diff --git a/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala b/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala
index cd98110ddcc02..219d214dd0d02 100644
--- a/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala
+++ b/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala
@@ -29,7 +29,8 @@ import org.apache.spark.ui.{UIUtils, WebUIPage}
 private[ui] class DriverPage(parent: MesosClusterUI) extends WebUIPage("driver") {
 
   override def render(request: HttpServletRequest): Seq[Node] = {
-    val driverId = request.getParameter("id")
+    // stripXSS is called first to remove suspicious characters used in XSS attacks
+    val driverId = UIUtils.stripXSS(request.getParameter("id"))
     require(driverId != null && driverId.nonEmpty, "Missing id parameter")
 
     val state = parent.scheduler.getDriverState(driverId)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala
index 23fc0bd0bce13..460fc946c3e6f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala
@@ -29,7 +29,8 @@ class ExecutionPage(parent: SQLTab) extends WebUIPage("execution") with Logging
   private val listener = parent.listener
 
   override def render(request: HttpServletRequest): Seq[Node] = listener.synchronized {
-    val parameterExecutionId = request.getParameter("id")
+    // stripXSS is called first to remove suspicious characters used in XSS attacks
+    val parameterExecutionId = UIUtils.stripXSS(request.getParameter("id"))
     require(parameterExecutionId != null && parameterExecutionId.nonEmpty,
       "Missing execution id parameter")
 
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala
index f39e9dcd3a5bb..38b8605745752 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala
@@ -39,7 +39,8 @@ private[ui] class ThriftServerSessionPage(parent: ThriftServerTab)
 
   /** Render the page */
   def render(request: HttpServletRequest): Seq[Node] = {
-    val parameterId = request.getParameter("id")
+    // stripXSS is called first to remove suspicious characters used in XSS attacks
+    val parameterId = UIUtils.stripXSS(request.getParameter("id"))
     require(parameterId != null && parameterId.nonEmpty, "Missing id parameter")
 
     val content =
@@ -197,4 +198,3 @@ private[ui] class ThriftServerSessionPage(parent: ThriftServerTab)
     UIUtils.listingTable(headers, generateDataRow, data, fixedWidth = true)
   }
 }
-
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala
index 1a87fc790f91b..02f39f9875d47 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala
@@ -304,7 +304,10 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
   }
 
   def render(request: HttpServletRequest): Seq[Node] = streamingListener.synchronized {
-    val batchTime = Option(request.getParameter("id")).map(id => Time(id.toLong)).getOrElse {
+    // stripXSS is called first to remove suspicious characters used in XSS attacks
+    val batchTime =
+      Option(SparkUIUtils.stripXSS(request.getParameter("id"))).map(id => Time(id.toLong))
+      .getOrElse {
       throw new IllegalArgumentException(s"Missing id parameter")
     }
     val formattedBatchTime =

From 25e87d80c483785dc4a79fb283bc80f68197bf4a Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Sat, 27 May 2017 16:16:51 -0700
Subject: [PATCH 0932/1204] [SPARK-20897][SQL] cached self-join should not fail

## What changes were proposed in this pull request?

The failed test case is, we have a `SortMergeJoinExec` for a self-join, which means we have a `ReusedExchange` node in the query plan. It works fine without caching, but throws an exception in `SortMergeJoinExec.outputPartitioning` if we cache it.

The root cause is, `ReusedExchange` doesn't propagate the output partitioning from its child, so in `SortMergeJoinExec.outputPartitioning` we create `PartitioningCollection` with a hash partitioning and an unknown partitioning, and fail.

This bug is mostly fine, because inserting the `ReusedExchange` is the last step to prepare the physical plan, we won't call `SortMergeJoinExec.outputPartitioning` anymore after this.

However, if the dataframe is cached, the physical plan of it becomes `InMemoryTableScanExec`, which contains another physical plan representing the cached query, and it has gone through the entire planning phase and may have `ReusedExchange`. Then the planner call `InMemoryTableScanExec.outputPartitioning`, which then calls `SortMergeJoinExec.outputPartitioning` and trigger this bug.

## How was this patch tested?

a new regression test

Author: Wenchen Fan <wenchen@databricks.com>

Closes #18121 from cloud-fan/bug.

(cherry picked from commit 08ede46b897b7e52cfe8231ffc21d9515122cf49)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../sql/execution/exchange/Exchange.scala     | 21 ++++++++++++++++++-
 .../org/apache/spark/sql/DataFrameSuite.scala | 10 +++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala
index d993ea6c6cef9..4b52f3e4c49b0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala
@@ -23,7 +23,8 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.spark.broadcast
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, Expression, SortOrder}
+import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.{LeafExecNode, SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.internal.SQLConf
@@ -58,6 +59,24 @@ case class ReusedExchangeExec(override val output: Seq[Attribute], child: Exchan
   override protected[sql] def doExecuteBroadcast[T](): broadcast.Broadcast[T] = {
     child.executeBroadcast()
   }
+
+  // `ReusedExchangeExec` can have distinct set of output attribute ids from its child, we need
+  // to update the attribute ids in `outputPartitioning` and `outputOrdering`.
+  private lazy val updateAttr: Expression => Expression = {
+    val originalAttrToNewAttr = AttributeMap(child.output.zip(output))
+    e => e.transform {
+      case attr: Attribute => originalAttrToNewAttr.getOrElse(attr, attr)
+    }
+  }
+
+  override def outputPartitioning: Partitioning = child.outputPartitioning match {
+    case h: HashPartitioning => h.copy(expressions = h.expressions.map(updateAttr))
+    case other => other
+  }
+
+  override def outputOrdering: Seq[SortOrder] = {
+    child.outputOrdering.map(updateAttr(_).asInstanceOf[SortOrder])
+  }
 }
 
 /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index e12e3969732e5..be49b979e6ab4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -1743,4 +1743,14 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
       .foldLeft(lit(false))((e, index) => e.or(df.col(df.columns(index)) =!= "string"))
     df.filter(filter).count
   }
+
+  test("SPARK-20897: cached self-join should not fail") {
+    // force to plan sort merge join
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "0") {
+      val df = Seq(1 -> "a").toDF("i", "j")
+      val df1 = df.as("t1")
+      val df2 = df.as("t2")
+      assert(df1.join(df2, $"t1.i" === $"t2.i").cache().count() == 1)
+    }
+  }
 }

From dc51be1e79b89d143da5df16b893df86a306f059 Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Sat, 27 May 2017 21:32:18 -0700
Subject: [PATCH 0933/1204] [SPARK-20908][SQL] Cache Manager: Hint should be
 ignored in plan matching

### What changes were proposed in this pull request?

In Cache manager, the plan matching should ignore Hint.
```Scala
      val df1 = spark.range(10).join(broadcast(spark.range(10)))
      df1.cache()
      spark.range(10).join(spark.range(10)).explain()
```
The output plan of the above query shows that the second query is  not using the cached data of the first query.
```
BroadcastNestedLoopJoin BuildRight, Inner
:- *Range (0, 10, step=1, splits=2)
+- BroadcastExchange IdentityBroadcastMode
   +- *Range (0, 10, step=1, splits=2)
```

After the fix, the plan becomes
```
InMemoryTableScan [id#20L, id#23L]
   +- InMemoryRelation [id#20L, id#23L], true, 10000, StorageLevel(disk, memory, deserialized, 1 replicas)
         +- BroadcastNestedLoopJoin BuildRight, Inner
            :- *Range (0, 10, step=1, splits=2)
            +- BroadcastExchange IdentityBroadcastMode
               +- *Range (0, 10, step=1, splits=2)
```

### How was this patch tested?
Added a test.

Author: Xiao Li <gatorsmile@gmail.com>

Closes #18131 from gatorsmile/HintCache.

(cherry picked from commit 06c155c90dc784b07002f33d98dcfe9be1e38002)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../apache/spark/sql/catalyst/plans/logical/hints.scala   | 2 ++
 .../apache/spark/sql/catalyst/plans/SameResultSuite.scala | 8 +++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
index b96d7bc9cfdb6..5fe6d2d8da064 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
@@ -40,6 +40,8 @@ case class ResolvedHint(child: LogicalPlan, hints: HintInfo = HintInfo())
 
   override def output: Seq[Attribute] = child.output
 
+  override lazy val canonicalized: LogicalPlan = child.canonicalized
+
   override def computeStats(conf: SQLConf): Statistics = {
     val stats = child.stats(conf)
     stats.copy(hints = hints)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala
index 467f76193cfc5..7c8ed78a49116 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.plans
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Union}
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, ResolvedHint, Union}
 import org.apache.spark.sql.catalyst.util._
 
 /**
@@ -66,4 +66,10 @@ class SameResultSuite extends SparkFunSuite {
     assertSameResult(Union(Seq(testRelation, testRelation2)),
       Union(Seq(testRelation2, testRelation)))
   }
+
+  test("hint") {
+    val df1 = testRelation.join(ResolvedHint(testRelation))
+    val df2 = testRelation.join(testRelation)
+    assertSameResult(df1, df2)
+  }
 }

From 26640a26984bac4fc1037714e60bd3607929b377 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Mon, 29 May 2017 12:17:14 -0700
Subject: [PATCH 0934/1204] [SPARK-20907][TEST] Use testQuietly for test suites
 that generate long log output

## What changes were proposed in this pull request?

Supress console output by using `testQuietly` in test suites

## How was this patch tested?

Tested by `"SPARK-19372: Filter can be executed w/o generated code due to JVM code size limit"` in `DataFrameSuite`

Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com>

Closes #18135 from kiszk/SPARK-20907.

(cherry picked from commit c9749068ecf8e0acabdfeeceeedff0f1f73293b7)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../src/test/scala/org/apache/spark/sql/DataFrameSuite.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index be49b979e6ab4..27558a6cd1a8e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -1733,7 +1733,7 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
       .count
   }
 
-  test("SPARK-19372: Filter can be executed w/o generated code due to JVM code size limit") {
+  testQuietly("SPARK-19372: Filter can be executed w/o generated code due to JVM code size limit") {
     val N = 400
     val rows = Seq(Row.fromSeq(Seq.fill(N)("string")))
     val schema = StructType(Seq.tabulate(N)(i => StructField(s"_c$i", StringType)))

From 3b79e4cda74e0bf82ec55e673beb8f84e7cfaca4 Mon Sep 17 00:00:00 2001
From: Yuming Wang <wgyumg@gmail.com>
Date: Mon, 29 May 2017 16:10:22 -0700
Subject: [PATCH 0935/1204] [SPARK-8184][SQL] Add additional function
 description for weekofyear

## What changes were proposed in this pull request?

Add additional function description for weekofyear.

## How was this patch tested?

 manual tests

![weekofyear](https://cloud.githubusercontent.com/assets/5399861/26525752/08a1c278-4394-11e7-8988-7cbf82c3a999.gif)

Author: Yuming Wang <wgyumg@gmail.com>

Closes #18132 from wangyum/SPARK-8184.

(cherry picked from commit 1c7db00c74ec6a91c7eefbdba85cbf41fbe8634a)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../spark/sql/catalyst/expressions/datetimeExpressions.scala  | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 6a76058793bb0..0ab72074b480c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -402,13 +402,15 @@ case class DayOfMonth(child: Expression) extends UnaryExpression with ImplicitCa
   }
 }
 
+// scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(date) - Returns the week of the year of the given date.",
+  usage = "_FUNC_(date) - Returns the week of the year of the given date. A week is considered to start on a Monday and week 1 is the first week with >3 days.",
   extended = """
     Examples:
       > SELECT _FUNC_('2008-02-20');
        8
   """)
+// scalastyle:on line.size.limit
 case class WeekOfYear(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
   override def inputTypes: Seq[AbstractDataType] = Seq(DateType)

From f6730a70cb47ebb3df7f42209df7b076aece1093 Mon Sep 17 00:00:00 2001
From: Prashant Sharma <prashsh1@in.ibm.com>
Date: Mon, 29 May 2017 18:12:01 -0700
Subject: [PATCH 0936/1204] [SPARK-19968][SS] Use a cached instance of
 `KafkaProducer` instead of creating one every batch.

## What changes were proposed in this pull request?

In summary, cost of recreating a KafkaProducer for writing every batch is high as it starts a lot threads and make connections and then closes them. A KafkaProducer instance is promised to be thread safe in Kafka docs. Reuse of KafkaProducer instance while writing via multiple threads is encouraged.

Furthermore, I have performance improvement of 10x in latency, with this patch.

### These are times that addBatch took in ms. Without applying this patch
![with-out_patch](https://cloud.githubusercontent.com/assets/992952/23994612/a9de4a42-0a6b-11e7-9d5b-7ae18775bee4.png)
### These are times that addBatch took in ms. After applying this patch
![with_patch](https://cloud.githubusercontent.com/assets/992952/23994616/ad8c11ec-0a6b-11e7-8634-2266ebb5033f.png)

## How was this patch tested?
Running distributed benchmarks comparing runs with this patch and without it.
Added relevant unit tests.

Author: Prashant Sharma <prashsh1@in.ibm.com>

Closes #17308 from ScrapCodes/cached-kafka-producer.

(cherry picked from commit 96a4d1d0827fc3fba83f174510b061684f0d00f7)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../sql/kafka010/CachedKafkaProducer.scala    | 112 ++++++++++++++++++
 .../spark/sql/kafka010/KafkaSource.scala      |  14 +--
 .../spark/sql/kafka010/KafkaWriteTask.scala   |  17 ++-
 .../spark/sql/kafka010/KafkaWriter.scala      |   3 +-
 .../kafka010/CachedKafkaProducerSuite.scala   |  78 ++++++++++++
 5 files changed, 206 insertions(+), 18 deletions(-)
 create mode 100644 external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaProducer.scala
 create mode 100644 external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/CachedKafkaProducerSuite.scala

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaProducer.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaProducer.scala
new file mode 100644
index 0000000000000..571140b0afbc7
--- /dev/null
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaProducer.scala
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import java.{util => ju}
+import java.util.concurrent.{ConcurrentMap, ExecutionException, TimeUnit}
+
+import com.google.common.cache._
+import com.google.common.util.concurrent.{ExecutionError, UncheckedExecutionException}
+import org.apache.kafka.clients.producer.KafkaProducer
+import scala.collection.JavaConverters._
+import scala.util.control.NonFatal
+
+import org.apache.spark.SparkEnv
+import org.apache.spark.internal.Logging
+
+private[kafka010] object CachedKafkaProducer extends Logging {
+
+  private type Producer = KafkaProducer[Array[Byte], Array[Byte]]
+
+  private lazy val cacheExpireTimeout: Long =
+    SparkEnv.get.conf.getTimeAsMs("spark.kafka.producer.cache.timeout", "10m")
+
+  private val cacheLoader = new CacheLoader[Seq[(String, Object)], Producer] {
+    override def load(config: Seq[(String, Object)]): Producer = {
+      val configMap = config.map(x => x._1 -> x._2).toMap.asJava
+      createKafkaProducer(configMap)
+    }
+  }
+
+  private val removalListener = new RemovalListener[Seq[(String, Object)], Producer]() {
+    override def onRemoval(
+        notification: RemovalNotification[Seq[(String, Object)], Producer]): Unit = {
+      val paramsSeq: Seq[(String, Object)] = notification.getKey
+      val producer: Producer = notification.getValue
+      logDebug(
+        s"Evicting kafka producer $producer params: $paramsSeq, due to ${notification.getCause}")
+      close(paramsSeq, producer)
+    }
+  }
+
+  private lazy val guavaCache: LoadingCache[Seq[(String, Object)], Producer] =
+    CacheBuilder.newBuilder().expireAfterAccess(cacheExpireTimeout, TimeUnit.MILLISECONDS)
+      .removalListener(removalListener)
+      .build[Seq[(String, Object)], Producer](cacheLoader)
+
+  private def createKafkaProducer(producerConfiguration: ju.Map[String, Object]): Producer = {
+    val kafkaProducer: Producer = new Producer(producerConfiguration)
+    logDebug(s"Created a new instance of KafkaProducer for $producerConfiguration.")
+    kafkaProducer
+  }
+
+  /**
+   * Get a cached KafkaProducer for a given configuration. If matching KafkaProducer doesn't
+   * exist, a new KafkaProducer will be created. KafkaProducer is thread safe, it is best to keep
+   * one instance per specified kafkaParams.
+   */
+  private[kafka010] def getOrCreate(kafkaParams: ju.Map[String, Object]): Producer = {
+    val paramsSeq: Seq[(String, Object)] = paramsToSeq(kafkaParams)
+    try {
+      guavaCache.get(paramsSeq)
+    } catch {
+      case e @ (_: ExecutionException | _: UncheckedExecutionException | _: ExecutionError)
+        if e.getCause != null =>
+        throw e.getCause
+    }
+  }
+
+  private def paramsToSeq(kafkaParams: ju.Map[String, Object]): Seq[(String, Object)] = {
+    val paramsSeq: Seq[(String, Object)] = kafkaParams.asScala.toSeq.sortBy(x => x._1)
+    paramsSeq
+  }
+
+  /** For explicitly closing kafka producer */
+  private[kafka010] def close(kafkaParams: ju.Map[String, Object]): Unit = {
+    val paramsSeq = paramsToSeq(kafkaParams)
+    guavaCache.invalidate(paramsSeq)
+  }
+
+  /** Auto close on cache evict */
+  private def close(paramsSeq: Seq[(String, Object)], producer: Producer): Unit = {
+    try {
+      logInfo(s"Closing the KafkaProducer with params: ${paramsSeq.mkString("\n")}.")
+      producer.close()
+    } catch {
+      case NonFatal(e) => logWarning("Error while closing kafka producer.", e)
+    }
+  }
+
+  private def clear(): Unit = {
+    logInfo("Cleaning up guava cache.")
+    guavaCache.invalidateAll()
+  }
+
+  // Intended for testing purpose only.
+  private def getAsMap: ConcurrentMap[Seq[(String, Object)], Producer] = guavaCache.asMap()
+}
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
index 1fb0a338299b7..7ac183776e20d 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
@@ -70,13 +70,13 @@ import org.apache.spark.unsafe.types.UTF8String
  * and not use wrong broker addresses.
  */
 private[kafka010] class KafkaSource(
-                                     sqlContext: SQLContext,
-                                     kafkaReader: KafkaOffsetReader,
-                                     executorKafkaParams: ju.Map[String, Object],
-                                     sourceOptions: Map[String, String],
-                                     metadataPath: String,
-                                     startingOffsets: KafkaOffsetRangeLimit,
-                                     failOnDataLoss: Boolean)
+    sqlContext: SQLContext,
+    kafkaReader: KafkaOffsetReader,
+    executorKafkaParams: ju.Map[String, Object],
+    sourceOptions: Map[String, String],
+    metadataPath: String,
+    startingOffsets: KafkaOffsetRangeLimit,
+    failOnDataLoss: Boolean)
   extends Source with Logging {
 
   private val sc = sqlContext.sparkContext
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriteTask.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriteTask.scala
index 6e160cbe2db52..6fd333e2f43ba 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriteTask.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriteTask.scala
@@ -19,8 +19,7 @@ package org.apache.spark.sql.kafka010
 
 import java.{util => ju}
 
-import org.apache.kafka.clients.producer.{KafkaProducer, _}
-import org.apache.kafka.common.serialization.ByteArraySerializer
+import org.apache.kafka.clients.producer.{Callback, KafkaProducer, ProducerRecord, RecordMetadata}
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Literal, UnsafeProjection}
@@ -44,7 +43,7 @@ private[kafka010] class KafkaWriteTask(
    * Writes key value data out to topics.
    */
   def execute(iterator: Iterator[InternalRow]): Unit = {
-    producer = new KafkaProducer[Array[Byte], Array[Byte]](producerConfiguration)
+    producer = CachedKafkaProducer.getOrCreate(producerConfiguration)
     while (iterator.hasNext && failedWrite == null) {
       val currentRow = iterator.next()
       val projectedRow = projection(currentRow)
@@ -68,10 +67,10 @@ private[kafka010] class KafkaWriteTask(
   }
 
   def close(): Unit = {
+    checkForErrors()
     if (producer != null) {
-      checkForErrors
-      producer.close()
-      checkForErrors
+      producer.flush()
+      checkForErrors()
       producer = null
     }
   }
@@ -88,7 +87,7 @@ private[kafka010] class KafkaWriteTask(
       case t =>
         throw new IllegalStateException(s"${KafkaWriter.TOPIC_ATTRIBUTE_NAME} " +
           s"attribute unsupported type $t. ${KafkaWriter.TOPIC_ATTRIBUTE_NAME} " +
-          s"must be a ${StringType}")
+          "must be a StringType")
     }
     val keyExpression = inputSchema.find(_.name == KafkaWriter.KEY_ATTRIBUTE_NAME)
       .getOrElse(Literal(null, BinaryType))
@@ -100,7 +99,7 @@ private[kafka010] class KafkaWriteTask(
     }
     val valueExpression = inputSchema
       .find(_.name == KafkaWriter.VALUE_ATTRIBUTE_NAME).getOrElse(
-      throw new IllegalStateException(s"Required attribute " +
+      throw new IllegalStateException("Required attribute " +
         s"'${KafkaWriter.VALUE_ATTRIBUTE_NAME}' not found")
     )
     valueExpression.dataType match {
@@ -114,7 +113,7 @@ private[kafka010] class KafkaWriteTask(
         Cast(valueExpression, BinaryType)), inputSchema)
   }
 
-  private def checkForErrors: Unit = {
+  private def checkForErrors(): Unit = {
     if (failedWrite != null) {
       throw failedWrite
     }
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala
index 61936e32fd837..0ed9d4e84d54d 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala
@@ -21,7 +21,6 @@ import java.{util => ju}
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, SparkSession}
-import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.execution.{QueryExecution, SQLExecution}
 import org.apache.spark.sql.types.{BinaryType, StringType}
@@ -49,7 +48,7 @@ private[kafka010] object KafkaWriter extends Logging {
       topic: Option[String] = None): Unit = {
     val schema = queryExecution.analyzed.output
     schema.find(_.name == TOPIC_ATTRIBUTE_NAME).getOrElse(
-      if (topic == None) {
+      if (topic.isEmpty) {
         throw new AnalysisException(s"topic option required when no " +
           s"'$TOPIC_ATTRIBUTE_NAME' attribute is present. Use the " +
           s"${KafkaSourceProvider.TOPIC_OPTION_KEY} option for setting a topic.")
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/CachedKafkaProducerSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/CachedKafkaProducerSuite.scala
new file mode 100644
index 0000000000000..789bffa9da126
--- /dev/null
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/CachedKafkaProducerSuite.scala
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import java.{util => ju}
+import java.util.concurrent.ConcurrentMap
+
+import org.apache.kafka.clients.producer.KafkaProducer
+import org.apache.kafka.common.serialization.ByteArraySerializer
+import org.scalatest.PrivateMethodTester
+
+import org.apache.spark.sql.test.SharedSQLContext
+
+class CachedKafkaProducerSuite extends SharedSQLContext with PrivateMethodTester {
+
+  type KP = KafkaProducer[Array[Byte], Array[Byte]]
+
+  protected override def beforeEach(): Unit = {
+    super.beforeEach()
+    val clear = PrivateMethod[Unit]('clear)
+    CachedKafkaProducer.invokePrivate(clear())
+  }
+
+  test("Should return the cached instance on calling getOrCreate with same params.") {
+    val kafkaParams = new ju.HashMap[String, Object]()
+    kafkaParams.put("acks", "0")
+    // Here only host should be resolvable, it does not need a running instance of kafka server.
+    kafkaParams.put("bootstrap.servers", "127.0.0.1:9022")
+    kafkaParams.put("key.serializer", classOf[ByteArraySerializer].getName)
+    kafkaParams.put("value.serializer", classOf[ByteArraySerializer].getName)
+    val producer = CachedKafkaProducer.getOrCreate(kafkaParams)
+    val producer2 = CachedKafkaProducer.getOrCreate(kafkaParams)
+    assert(producer == producer2)
+
+    val cacheMap = PrivateMethod[ConcurrentMap[Seq[(String, Object)], KP]]('getAsMap)
+    val map = CachedKafkaProducer.invokePrivate(cacheMap())
+    assert(map.size == 1)
+  }
+
+  test("Should close the correct kafka producer for the given kafkaPrams.") {
+    val kafkaParams = new ju.HashMap[String, Object]()
+    kafkaParams.put("acks", "0")
+    kafkaParams.put("bootstrap.servers", "127.0.0.1:9022")
+    kafkaParams.put("key.serializer", classOf[ByteArraySerializer].getName)
+    kafkaParams.put("value.serializer", classOf[ByteArraySerializer].getName)
+    val producer: KP = CachedKafkaProducer.getOrCreate(kafkaParams)
+    kafkaParams.put("acks", "1")
+    val producer2: KP = CachedKafkaProducer.getOrCreate(kafkaParams)
+    // With updated conf, a new producer instance should be created.
+    assert(producer != producer2)
+
+    val cacheMap = PrivateMethod[ConcurrentMap[Seq[(String, Object)], KP]]('getAsMap)
+    val map = CachedKafkaProducer.invokePrivate(cacheMap())
+    assert(map.size == 2)
+
+    CachedKafkaProducer.close(kafkaParams)
+    val map2 = CachedKafkaProducer.invokePrivate(cacheMap())
+    assert(map2.size == 1)
+    import scala.collection.JavaConverters._
+    val (seq: Seq[(String, Object)], _producer: KP) = map2.asScala.toArray.apply(0)
+    assert(_producer == producer)
+  }
+}

From 5fdc7d80f46d51d4a8e49d9390b191fff42ec222 Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Tue, 30 May 2017 14:06:19 -0700
Subject: [PATCH 0937/1204] [SPARK-20924][SQL] Unable to call the function
 registered in the not-current database

### What changes were proposed in this pull request?
We are unable to call the function registered in the not-current database.
```Scala
sql("CREATE DATABASE dAtABaSe1")
sql(s"CREATE FUNCTION dAtABaSe1.test_avg AS '${classOf[GenericUDAFAverage].getName}'")
sql("SELECT dAtABaSe1.test_avg(1)")
```
The above code returns an error:
```
Undefined function: 'dAtABaSe1.test_avg'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7
```

This PR is to fix the above issue.
### How was this patch tested?
Added test cases.

Author: Xiao Li <gatorsmile@gmail.com>

Closes #18146 from gatorsmile/qualifiedFunction.

(cherry picked from commit 4bb6a53ebd06de3de97139a2dbc7c85fc3aa3e66)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/catalog/SessionCatalog.scala | 17 ++++++++--------
 .../spark/sql/hive/HiveSessionCatalog.scala   |  6 +++---
 .../sql/hive/execution/HiveUDFSuite.scala     | 20 +++++++++++++++++++
 3 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index f6653d384fe1d..a78440df4f3e1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -1105,8 +1105,9 @@ class SessionCatalog(
       !hiveFunctions.contains(name.funcName.toLowerCase(Locale.ROOT))
   }
 
-  protected def failFunctionLookup(name: String): Nothing = {
-    throw new NoSuchFunctionException(db = currentDb, func = name)
+  protected def failFunctionLookup(name: FunctionIdentifier): Nothing = {
+    throw new NoSuchFunctionException(
+      db = name.database.getOrElse(getCurrentDatabase), func = name.funcName)
   }
 
   /**
@@ -1128,7 +1129,7 @@ class SessionCatalog(
             qualifiedName.database.orNull,
             qualifiedName.identifier)
         } else {
-          failFunctionLookup(name.funcName)
+          failFunctionLookup(name)
         }
       }
   }
@@ -1158,8 +1159,8 @@ class SessionCatalog(
     }
 
     // If the name itself is not qualified, add the current database to it.
-    val database = name.database.orElse(Some(currentDb)).map(formatDatabaseName)
-    val qualifiedName = name.copy(database = database)
+    val database = formatDatabaseName(name.database.getOrElse(getCurrentDatabase))
+    val qualifiedName = name.copy(database = Some(database))
 
     if (functionRegistry.functionExists(qualifiedName.unquotedString)) {
       // This function has been already loaded into the function registry.
@@ -1172,10 +1173,10 @@ class SessionCatalog(
     // in the metastore). We need to first put the function in the FunctionRegistry.
     // TODO: why not just check whether the function exists first?
     val catalogFunction = try {
-      externalCatalog.getFunction(currentDb, name.funcName)
+      externalCatalog.getFunction(database, name.funcName)
     } catch {
-      case e: AnalysisException => failFunctionLookup(name.funcName)
-      case e: NoSuchPermanentFunctionException => failFunctionLookup(name.funcName)
+      case _: AnalysisException => failFunctionLookup(name)
+      case _: NoSuchPermanentFunctionException => failFunctionLookup(name)
     }
     loadFunctionResources(catalogFunction.resources)
     // Please note that qualifiedName is provided by the user. However,
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 377d4f2473c58..6227e780c0409 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -140,7 +140,7 @@ private[sql] class HiveSessionCatalog(
           // Hive is case insensitive.
           val functionName = funcName.unquotedString.toLowerCase(Locale.ROOT)
           if (!hiveFunctions.contains(functionName)) {
-            failFunctionLookup(funcName.unquotedString)
+            failFunctionLookup(funcName)
           }
 
           // TODO: Remove this fallback path once we implement the list of fallback functions
@@ -148,12 +148,12 @@ private[sql] class HiveSessionCatalog(
           val functionInfo = {
             try {
               Option(HiveFunctionRegistry.getFunctionInfo(functionName)).getOrElse(
-                failFunctionLookup(funcName.unquotedString))
+                failFunctionLookup(funcName))
             } catch {
               // If HiveFunctionRegistry.getFunctionInfo throws an exception,
               // we are failing to load a Hive builtin function, which means that
               // the given function is not a Hive builtin function.
-              case NonFatal(e) => failFunctionLookup(funcName.unquotedString)
+              case NonFatal(e) => failFunctionLookup(funcName)
             }
           }
           val className = functionInfo.getFunctionClass.getName
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
index 4446af2e75e00..8fcbad58350f4 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
@@ -34,6 +34,7 @@ import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.catalyst.plans.logical.Project
 import org.apache.spark.sql.functions.max
 import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.util.Utils
 
@@ -590,6 +591,25 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
       }
     }
   }
+
+  test("Call the function registered in the not-current database") {
+    Seq("true", "false").foreach { caseSensitive =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive) {
+        withDatabase("dAtABaSe1") {
+          sql("CREATE DATABASE dAtABaSe1")
+          withUserDefinedFunction("dAtABaSe1.test_avg" -> false) {
+            sql(s"CREATE FUNCTION dAtABaSe1.test_avg AS '${classOf[GenericUDAFAverage].getName}'")
+            checkAnswer(sql("SELECT dAtABaSe1.test_avg(1)"), Row(1.0))
+          }
+          val message = intercept[AnalysisException] {
+            sql("SELECT dAtABaSe1.unknownFunc(1)")
+          }.getMessage
+          assert(message.contains("Undefined function: 'unknownFunc'") &&
+            message.contains("nor a permanent function registered in the database 'dAtABaSe1'"))
+        }
+      }
+    }
+  }
 }
 
 class TestPair(x: Int, y: Int) extends Writable with Serializable {

From 287440df6816b5c9f2be2aee949a4c20ab165180 Mon Sep 17 00:00:00 2001
From: jerryshao <sshao@hortonworks.com>
Date: Tue, 30 May 2017 20:24:43 -0700
Subject: [PATCH 0938/1204] [SPARK-20275][UI] Do not display "Completed" column
 for in-progress applications

## What changes were proposed in this pull request?

Current HistoryServer will display completed date of in-progress application as `1969-12-31 23:59:59`, which is not so meaningful. Instead of unnecessarily showing this incorrect completed date, here propose to make this column invisible for in-progress applications.

The purpose of only making this column invisible rather than deleting this field is that: this data is fetched through REST API, and in the REST API  the format is like below shows, in which `endTime` matches `endTimeEpoch`. So instead of changing REST API to break backward compatibility, here choosing a simple solution to only make this column invisible.

```
[ {
  "id" : "local-1491805439678",
  "name" : "Spark shell",
  "attempts" : [ {
    "startTime" : "2017-04-10T06:23:57.574GMT",
    "endTime" : "1969-12-31T23:59:59.999GMT",
    "lastUpdated" : "2017-04-10T06:23:57.574GMT",
    "duration" : 0,
    "sparkUser" : "",
    "completed" : false,
    "startTimeEpoch" : 1491805437574,
    "endTimeEpoch" : -1,
    "lastUpdatedEpoch" : 1491805437574
  } ]
} ]%
```

Here is UI before changed:

<img width="1317" alt="screen shot 2017-04-10 at 3 45 57 pm" src="https://cloud.githubusercontent.com/assets/850797/24851938/17d46cc0-1e08-11e7-84c7-90120e171b41.png">

And after:

<img width="1281" alt="screen shot 2017-04-10 at 4 02 35 pm" src="https://cloud.githubusercontent.com/assets/850797/24851945/1fe9da58-1e08-11e7-8d0d-9262324f9074.png">

## How was this patch tested?

Manual verification.

Author: jerryshao <sshao@hortonworks.com>

Closes #17588 from jerryshao/SPARK-20275.

(cherry picked from commit 52ed9b289d169219f7257795cbedc56565a39c71)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/ui/static/historypage-template.html   | 4 ++--
 .../resources/org/apache/spark/ui/static/historypage.js    | 7 +++++++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html b/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html
index 6ba3b092dc658..c2afa993b2f20 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html
+++ b/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html
@@ -39,7 +39,7 @@
           Started
         </span>
       </th>
-      <th>
+      <th class="completedColumn">
         <span data-toggle="tooltip" data-placement="above" title="The completed time of this application.">
           Completed
         </span>
@@ -73,7 +73,7 @@
       {{#attempts}}
       <td class="attemptIDSpan"><a href="{{uiroot}}/history/{{id}}/{{attemptId}}/jobs/">{{attemptId}}</a></td>
       <td>{{startTime}}</td>
-      <td>{{endTime}}</td>
+      <td class="completedColumn">{{endTime}}</td>
       <td><span title="{{duration}}" class="durationClass">{{duration}}</span></td>
       <td>{{sparkUser}}</td>
       <td>{{lastUpdated}}</td>
diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage.js b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
index 1f89306403cd5..7db8c27e8f7c9 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/historypage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
@@ -177,6 +177,13 @@ $(document).ready(function() {
           }
         }
 
+        if (requestedIncomplete) {
+          var completedCells = document.getElementsByClassName("completedColumn");
+          for (i = 0; i < completedCells.length; i++) {
+            completedCells[i].style.display='none';
+          }
+        }
+
         var durationCells = document.getElementsByClassName("durationClass");
         for (i = 0; i < durationCells.length; i++) {
           var timeInMilliseconds = parseInt(durationCells[i].title);

From 46400867c36cc45f4f4bee3c08f3e4d662fdd2e1 Mon Sep 17 00:00:00 2001
From: jerryshao <sshao@hortonworks.com>
Date: Tue, 30 May 2017 20:24:43 -0700
Subject: [PATCH 0939/1204] [SPARK-20275][UI] Do not display "Completed" column
 for in-progress applications

## What changes were proposed in this pull request?

Current HistoryServer will display completed date of in-progress application as `1969-12-31 23:59:59`, which is not so meaningful. Instead of unnecessarily showing this incorrect completed date, here propose to make this column invisible for in-progress applications.

The purpose of only making this column invisible rather than deleting this field is that: this data is fetched through REST API, and in the REST API  the format is like below shows, in which `endTime` matches `endTimeEpoch`. So instead of changing REST API to break backward compatibility, here choosing a simple solution to only make this column invisible.

```
[ {
  "id" : "local-1491805439678",
  "name" : "Spark shell",
  "attempts" : [ {
    "startTime" : "2017-04-10T06:23:57.574GMT",
    "endTime" : "1969-12-31T23:59:59.999GMT",
    "lastUpdated" : "2017-04-10T06:23:57.574GMT",
    "duration" : 0,
    "sparkUser" : "",
    "completed" : false,
    "startTimeEpoch" : 1491805437574,
    "endTimeEpoch" : -1,
    "lastUpdatedEpoch" : 1491805437574
  } ]
} ]%
```

Here is UI before changed:

<img width="1317" alt="screen shot 2017-04-10 at 3 45 57 pm" src="https://cloud.githubusercontent.com/assets/850797/24851938/17d46cc0-1e08-11e7-84c7-90120e171b41.png">

And after:

<img width="1281" alt="screen shot 2017-04-10 at 4 02 35 pm" src="https://cloud.githubusercontent.com/assets/850797/24851945/1fe9da58-1e08-11e7-8d0d-9262324f9074.png">

## How was this patch tested?

Manual verification.

Author: jerryshao <sshao@hortonworks.com>

Closes #17588 from jerryshao/SPARK-20275.

(cherry picked from commit 52ed9b289d169219f7257795cbedc56565a39c71)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/ui/static/historypage-template.html   | 4 ++--
 .../resources/org/apache/spark/ui/static/historypage.js    | 7 +++++++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html b/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html
index 6ba3b092dc658..c2afa993b2f20 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html
+++ b/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html
@@ -39,7 +39,7 @@
           Started
         </span>
       </th>
-      <th>
+      <th class="completedColumn">
         <span data-toggle="tooltip" data-placement="above" title="The completed time of this application.">
           Completed
         </span>
@@ -73,7 +73,7 @@
       {{#attempts}}
       <td class="attemptIDSpan"><a href="{{uiroot}}/history/{{id}}/{{attemptId}}/jobs/">{{attemptId}}</a></td>
       <td>{{startTime}}</td>
-      <td>{{endTime}}</td>
+      <td class="completedColumn">{{endTime}}</td>
       <td><span title="{{duration}}" class="durationClass">{{duration}}</span></td>
       <td>{{sparkUser}}</td>
       <td>{{lastUpdated}}</td>
diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage.js b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
index d095a2c635cf1..a4430347a0b2a 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/historypage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
@@ -171,6 +171,13 @@ $(document).ready(function() {
           }
         }
 
+        if (requestedIncomplete) {
+          var completedCells = document.getElementsByClassName("completedColumn");
+          for (i = 0; i < completedCells.length; i++) {
+            completedCells[i].style.display='none';
+          }
+        }
+
         var durationCells = document.getElementsByClassName("durationClass");
         for (i = 0; i < durationCells.length; i++) {
           var timeInMilliseconds = parseInt(durationCells[i].title);

From 3cad66e5e06a4020a16fa757fbf67f666b319bab Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Tue, 30 May 2017 22:33:29 -0700
Subject: [PATCH 0940/1204] [SPARK-20877][SPARKR][WIP] add timestamps to test
 runs

to investigate how long they run

Jenkins, AppVeyor

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #18104 from felixcheung/rtimetest.

(cherry picked from commit 382fefd1879e4670f3e9e8841ec243e3eb11c578)
Signed-off-by: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
---
 R/pkg/inst/tests/testthat/test_Windows.R      |  3 +++
 .../testthat/test_mllib_classification.R      |  4 ++++
 .../tests/testthat/test_mllib_clustering.R    |  2 ++
 R/pkg/inst/tests/testthat/test_mllib_tree.R   | 22 ++++++++++++-------
 R/pkg/inst/tests/testthat/test_sparkSQL.R     | 15 +++++++++++++
 R/pkg/inst/tests/testthat/test_utils.R        |  3 +++
 R/pkg/tests/run-all.R                         |  6 +++++
 7 files changed, 47 insertions(+), 8 deletions(-)

diff --git a/R/pkg/inst/tests/testthat/test_Windows.R b/R/pkg/inst/tests/testthat/test_Windows.R
index 919b063bf0693..00d684e1a49ef 100644
--- a/R/pkg/inst/tests/testthat/test_Windows.R
+++ b/R/pkg/inst/tests/testthat/test_Windows.R
@@ -27,3 +27,6 @@ test_that("sparkJars tag in SparkContext", {
   abcPath <- testOutput[1]
   expect_equal(abcPath, "a\\b\\c")
 })
+
+message("--- End test (Windows) ", as.POSIXct(Sys.time(), tz = "GMT"))
+message("elapsed ", (proc.time() - timer_ptm)[3])
diff --git a/R/pkg/inst/tests/testthat/test_mllib_classification.R b/R/pkg/inst/tests/testthat/test_mllib_classification.R
index c1c746828d24b..82e588dc460d0 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_classification.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_classification.R
@@ -28,6 +28,8 @@ absoluteSparkPath <- function(x) {
 }
 
 test_that("spark.svmLinear", {
+  skip_on_cran()
+
   df <- suppressWarnings(createDataFrame(iris))
   training <- df[df$Species %in% c("versicolor", "virginica"), ]
   model <- spark.svmLinear(training,  Species ~ ., regParam = 0.01, maxIter = 10)
@@ -226,6 +228,8 @@ test_that("spark.logit", {
 })
 
 test_that("spark.mlp", {
+  skip_on_cran()
+
   df <- read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"),
                 source = "libsvm")
   model <- spark.mlp(df, label ~ features, blockSize = 128, layers = c(4, 5, 4, 3),
diff --git a/R/pkg/inst/tests/testthat/test_mllib_clustering.R b/R/pkg/inst/tests/testthat/test_mllib_clustering.R
index 8f71de1cbc7b5..e827e961ab4c1 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_clustering.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_clustering.R
@@ -28,6 +28,8 @@ absoluteSparkPath <- function(x) {
 }
 
 test_that("spark.bisectingKmeans", {
+  skip_on_cran()
+
   newIris <- iris
   newIris$Species <- NULL
   training <- suppressWarnings(createDataFrame(newIris))
diff --git a/R/pkg/inst/tests/testthat/test_mllib_tree.R b/R/pkg/inst/tests/testthat/test_mllib_tree.R
index 4cde1cd6dc3ab..923f535c34cd4 100644
--- a/R/pkg/inst/tests/testthat/test_mllib_tree.R
+++ b/R/pkg/inst/tests/testthat/test_mllib_tree.R
@@ -28,6 +28,8 @@ absoluteSparkPath <- function(x) {
 }
 
 test_that("spark.gbt", {
+  skip_on_cran()
+
   # regression
   data <- suppressWarnings(createDataFrame(longley))
   model <- spark.gbt(data, Employed ~ ., "regression", maxDepth = 5, maxBins = 16, seed = 123)
@@ -103,10 +105,12 @@ test_that("spark.gbt", {
   expect_equal(stats$maxDepth, 5)
 
   # spark.gbt classification can work on libsvm data
-  data <- read.df(absoluteSparkPath("data/mllib/sample_binary_classification_data.txt"),
-                source = "libsvm")
-  model <- spark.gbt(data, label ~ features, "classification")
-  expect_equal(summary(model)$numFeatures, 692)
+  if (not_cran_or_windows_with_hadoop()) {
+    data <- read.df(absoluteSparkPath("data/mllib/sample_binary_classification_data.txt"),
+                  source = "libsvm")
+    model <- spark.gbt(data, label ~ features, "classification")
+    expect_equal(summary(model)$numFeatures, 692)
+  }
 })
 
 test_that("spark.randomForest", {
@@ -211,10 +215,12 @@ test_that("spark.randomForest", {
   expect_equal(length(grep("2.0", predictions)), 50)
 
   # spark.randomForest classification can work on libsvm data
-  data <- read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"),
-                source = "libsvm")
-  model <- spark.randomForest(data, label ~ features, "classification")
-  expect_equal(summary(model)$numFeatures, 4)
+  if (not_cran_or_windows_with_hadoop()) {
+    data <- read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"),
+                  source = "libsvm")
+    model <- spark.randomForest(data, label ~ features, "classification")
+    expect_equal(summary(model)$numFeatures, 4)
+  }
 })
 
 sparkR.session.stop()
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index d18a77abcd443..d2d51915d72d0 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -1374,6 +1374,8 @@ test_that("column operators", {
 })
 
 test_that("column functions", {
+  skip_on_cran()
+
   c <- column("a")
   c1 <- abs(c) + acos(c) + approxCountDistinct(c) + ascii(c) + asin(c) + atan(c)
   c2 <- avg(c) + base64(c) + bin(c) + bitwiseNOT(c) + cbrt(c) + ceil(c) + cos(c)
@@ -1695,6 +1697,8 @@ test_that("when(), otherwise() and ifelse() with column on a DataFrame", {
 })
 
 test_that("group by, agg functions", {
+  skip_on_cran()
+
   df <- read.json(jsonPath)
   df1 <- agg(df, name = "max", age = "sum")
   expect_equal(1, count(df1))
@@ -1850,6 +1854,8 @@ test_that("filter() on a DataFrame", {
 })
 
 test_that("join(), crossJoin() and merge() on a DataFrame", {
+  skip_on_cran()
+
   df <- read.json(jsonPath)
 
   mockLines2 <- c("{\"name\":\"Michael\", \"test\": \"yes\"}",
@@ -2702,6 +2708,7 @@ test_that("dapply() and dapplyCollect() on a DataFrame", {
 })
 
 test_that("dapplyCollect() on DataFrame with a binary column", {
+  skip_on_cran()
 
   df <- data.frame(key = 1:3)
   df$bytes <- lapply(df$key, serialize, connection = NULL)
@@ -2723,6 +2730,8 @@ test_that("dapplyCollect() on DataFrame with a binary column", {
 })
 
 test_that("repartition by columns on DataFrame", {
+  skip_on_cran()
+
   df <- createDataFrame(
     list(list(1L, 1, "1", 0.1), list(1L, 2, "2", 0.2), list(3L, 3, "3", 0.3)),
     c("a", "b", "c", "d"))
@@ -2761,6 +2770,8 @@ test_that("repartition by columns on DataFrame", {
 })
 
 test_that("coalesce, repartition, numPartitions", {
+  skip_on_cran()
+
   df <- as.DataFrame(cars, numPartitions = 5)
   expect_equal(getNumPartitions(df), 5)
   expect_equal(getNumPartitions(coalesce(df, 3)), 3)
@@ -2780,6 +2791,8 @@ test_that("coalesce, repartition, numPartitions", {
 })
 
 test_that("gapply() and gapplyCollect() on a DataFrame", {
+  skip_on_cran()
+
   df <- createDataFrame (
     list(list(1L, 1, "1", 0.1), list(1L, 2, "1", 0.2), list(3L, 3, "3", 0.3)),
     c("a", "b", "c", "d"))
@@ -2932,6 +2945,8 @@ test_that("createDataFrame sqlContext parameter backward compatibility", {
 })
 
 test_that("randomSplit", {
+  skip_on_cran()
+
   num <- 4000
   df <- createDataFrame(data.frame(id = 1:num))
   weights <- c(2, 3, 5)
diff --git a/R/pkg/inst/tests/testthat/test_utils.R b/R/pkg/inst/tests/testthat/test_utils.R
index 99a0e66790590..01614716afa36 100644
--- a/R/pkg/inst/tests/testthat/test_utils.R
+++ b/R/pkg/inst/tests/testthat/test_utils.R
@@ -242,3 +242,6 @@ test_that("basenameSansExtFromUrl", {
 })
 
 sparkR.session.stop()
+
+message("--- End test (utils) ", as.POSIXct(Sys.time(), tz = "GMT"))
+message("elapsed ", (proc.time() - timer_ptm)[3])
diff --git a/R/pkg/tests/run-all.R b/R/pkg/tests/run-all.R
index 9c6cba535d118..f0bef4f6d2662 100644
--- a/R/pkg/tests/run-all.R
+++ b/R/pkg/tests/run-all.R
@@ -21,6 +21,12 @@ library(SparkR)
 # Turn all warnings into errors
 options("warn" = 2)
 
+if (.Platform$OS.type == "windows") {
+  Sys.setenv(TZ = "GMT")
+}
+message("--- Start test ", as.POSIXct(Sys.time(), tz = "GMT"))
+timer_ptm <- proc.time()
+
 # Setup global test environment
 # Install Spark first to set SPARK_HOME
 install.spark()

From 3686c2e965758f471f9784b3e06223ce143b6aca Mon Sep 17 00:00:00 2001
From: David Eis <deis@bloomberg.net>
Date: Wed, 31 May 2017 13:52:55 +0100
Subject: [PATCH 0941/1204] [SPARK-20790][MLLIB] Correctly handle negative
 values for implicit feedback in ALS

## What changes were proposed in this pull request?

Revert the handling of negative values in ALS with implicit feedback, so that the confidence is the absolute value of the rating and the preference is 0 for negative ratings. This was the original behavior.

## How was this patch tested?

This patch was tested with the existing unit tests and an added unit test to ensure that negative ratings are not ignored.

mengxr

Author: David Eis <deis@bloomberg.net>

Closes #18022 from davideis/bugfix/negative-rating.

(cherry picked from commit d52f636228e833db89045bc7a0c17b72da13f138)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../apache/spark/ml/recommendation/ALS.scala  | 22 ++++----
 .../spark/ml/recommendation/ALSSuite.scala    | 50 ++++++++++++++++++-
 2 files changed, 62 insertions(+), 10 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index b2e3dbaf1ee6f..3aa6457b09484 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -763,11 +763,15 @@ object ALS extends DefaultParamsReadable[ALS] with Logging {
   /**
    * Representing a normal equation to solve the following weighted least squares problem:
    *
-   * minimize \sum,,i,, c,,i,, (a,,i,,^T^ x - b,,i,,)^2^ + lambda * x^T^ x.
+   * minimize \sum,,i,, c,,i,, (a,,i,,^T^ x - d,,i,,)^2^ + lambda * x^T^ x.
    *
    * Its normal equation is given by
    *
-   * \sum,,i,, c,,i,, (a,,i,, a,,i,,^T^ x - b,,i,, a,,i,,) + lambda * x = 0.
+   * \sum,,i,, c,,i,, (a,,i,, a,,i,,^T^ x - d,,i,, a,,i,,) + lambda * x = 0.
+   *
+   * Distributing and letting b,,i,, = c,,i,, * d,,i,,
+   *
+   * \sum,,i,, c,,i,, a,,i,, a,,i,,^T^ x - b,,i,, a,,i,, + lambda * x = 0.
    */
   private[recommendation] class NormalEquation(val k: Int) extends Serializable {
 
@@ -796,7 +800,7 @@ object ALS extends DefaultParamsReadable[ALS] with Logging {
       copyToDouble(a)
       blas.dspr(upper, k, c, da, 1, ata)
       if (b != 0.0) {
-        blas.daxpy(k, c * b, da, 1, atb, 1)
+        blas.daxpy(k, b, da, 1, atb, 1)
       }
       this
     }
@@ -1456,15 +1460,15 @@ object ALS extends DefaultParamsReadable[ALS] with Logging {
             val srcFactor = sortedSrcFactors(blockId)(localIndex)
             val rating = ratings(i)
             if (implicitPrefs) {
-              // Extension to the original paper to handle b < 0. confidence is a function of |b|
-              // instead so that it is never negative. c1 is confidence - 1.0.
+              // Extension to the original paper to handle rating < 0. confidence is a function
+              // of |rating| instead so that it is never negative. c1 is confidence - 1.
               val c1 = alpha * math.abs(rating)
-              // For rating <= 0, the corresponding preference is 0. So the term below is only added
-              // for rating > 0. Because YtY is already added, we need to adjust the scaling here.
-              if (rating > 0) {
+              // For rating <= 0, the corresponding preference is 0. So the second argument of add
+              // is only there for rating > 0.
+              if (rating > 0.0) {
                 numExplicits += 1
-                ls.add(srcFactor, (c1 + 1.0) / c1, c1)
               }
+              ls.add(srcFactor, if (rating > 0.0) 1.0 + c1 else 0.0, c1)
             } else {
               ls.add(srcFactor, rating)
               numExplicits += 1
diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
index 9d31e792633cd..701040f2d6041 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
@@ -37,6 +37,7 @@ import org.apache.spark.ml.recommendation.ALS._
 import org.apache.spark.ml.recommendation.ALS.Rating
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
 import org.apache.spark.ml.util.TestingUtils._
+import org.apache.spark.mllib.recommendation.MatrixFactorizationModelSuite
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.scheduler.{SparkListener, SparkListenerStageCompleted}
@@ -78,7 +79,7 @@ class ALSSuite
     val k = 2
     val ne0 = new NormalEquation(k)
       .add(Array(1.0f, 2.0f), 3.0)
-      .add(Array(4.0f, 5.0f), 6.0, 2.0) // weighted
+      .add(Array(4.0f, 5.0f), 12.0, 2.0) // weighted
     assert(ne0.k === k)
     assert(ne0.triK === k * (k + 1) / 2)
     // NumPy code that computes the expected values:
@@ -347,6 +348,37 @@ class ALSSuite
     ALSSuite.genFactors(size, rank, random, a, b)
   }
 
+  /**
+  * Train ALS using the given training set and parameters
+  * @param training training dataset
+  * @param rank rank of the matrix factorization
+  * @param maxIter max number of iterations
+  * @param regParam regularization constant
+  * @param implicitPrefs whether to use implicit preference
+  * @param numUserBlocks number of user blocks
+  * @param numItemBlocks number of item blocks
+  * @return a trained ALSModel
+  */
+  def trainALS(
+    training: RDD[Rating[Int]],
+    rank: Int,
+    maxIter: Int,
+    regParam: Double,
+    implicitPrefs: Boolean = false,
+    numUserBlocks: Int = 2,
+    numItemBlocks: Int = 3): ALSModel = {
+    val spark = this.spark
+    import spark.implicits._
+    val als = new ALS()
+      .setRank(rank)
+      .setRegParam(regParam)
+      .setImplicitPrefs(implicitPrefs)
+      .setNumUserBlocks(numUserBlocks)
+      .setNumItemBlocks(numItemBlocks)
+      .setSeed(0)
+    als.fit(training.toDF())
+  }
+
   /**
    * Test ALS using the given training/test splits and parameters.
    * @param training training dataset
@@ -455,6 +487,22 @@ class ALSSuite
       targetRMSE = 0.3)
   }
 
+  test("implicit feedback regression") {
+    val trainingWithNeg = sc.parallelize(Array(Rating(0, 0, 1), Rating(1, 1, 1), Rating(0, 1, -3)))
+    val trainingWithZero = sc.parallelize(Array(Rating(0, 0, 1), Rating(1, 1, 1), Rating(0, 1, 0)))
+    val modelWithNeg =
+      trainALS(trainingWithNeg, rank = 1, maxIter = 5, regParam = 0.01, implicitPrefs = true)
+    val modelWithZero =
+      trainALS(trainingWithZero, rank = 1, maxIter = 5, regParam = 0.01, implicitPrefs = true)
+    val userFactorsNeg = modelWithNeg.userFactors
+    val itemFactorsNeg = modelWithNeg.itemFactors
+    val userFactorsZero = modelWithZero.userFactors
+    val itemFactorsZero = modelWithZero.itemFactors
+    userFactorsNeg.collect().foreach(arr => logInfo(s"implicit test " + arr.mkString(" ")))
+    userFactorsZero.collect().foreach(arr => logInfo(s"implicit test " + arr.mkString(" ")))
+    assert(userFactorsNeg.intersect(userFactorsZero).count() == 0)
+    assert(itemFactorsNeg.intersect(itemFactorsZero).count() == 0)
+  }
   test("using generic ID types") {
     val (ratings, _) = genImplicitTestData(numUsers = 20, numItems = 40, rank = 2, noiseStd = 0.01)
 

From f59f9a380351726de20453ab101f46e199a7079c Mon Sep 17 00:00:00 2001
From: liuxian <liu.xian3@zte.com.cn>
Date: Wed, 31 May 2017 11:43:36 -0700
Subject: [PATCH 0942/1204] [SPARK-20876][SQL][BACKPORT-2.2] If the input
 parameter is float type for ceil or floor,the result is not we expected

## What changes were proposed in this pull request?

This PR is to backport #18103 to Spark 2.2

## How was this patch tested?
unit test

Author: liuxian <liu.xian3@zte.com.cn>

Closes #18155 from 10110346/wip-lx-0531.
---
 .../expressions/mathExpressions.scala         | 16 +++++++++------
 .../expressions/MathExpressionsSuite.scala    | 20 +++++++++++++++++++
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
index e040ad0c0a6b9..52f3af49c6c59 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
@@ -232,18 +232,20 @@ case class Ceil(child: Expression) extends UnaryMathExpression(math.ceil, "CEIL"
   }
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(TypeCollection(DoubleType, DecimalType))
+    Seq(TypeCollection(DoubleType, DecimalType, LongType))
 
   protected override def nullSafeEval(input: Any): Any = child.dataType match {
+    case LongType => input.asInstanceOf[Long]
     case DoubleType => f(input.asInstanceOf[Double]).toLong
-    case DecimalType.Fixed(precision, scale) => input.asInstanceOf[Decimal].ceil
+    case DecimalType.Fixed(_, _) => input.asInstanceOf[Decimal].ceil
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     child.dataType match {
       case DecimalType.Fixed(_, 0) => defineCodeGen(ctx, ev, c => s"$c")
-      case DecimalType.Fixed(precision, scale) =>
+      case DecimalType.Fixed(_, _) =>
         defineCodeGen(ctx, ev, c => s"$c.ceil()")
+      case LongType => defineCodeGen(ctx, ev, c => s"$c")
       case _ => defineCodeGen(ctx, ev, c => s"(long)(java.lang.Math.${funcName}($c))")
     }
   }
@@ -347,18 +349,20 @@ case class Floor(child: Expression) extends UnaryMathExpression(math.floor, "FLO
   }
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(TypeCollection(DoubleType, DecimalType))
+    Seq(TypeCollection(DoubleType, DecimalType, LongType))
 
   protected override def nullSafeEval(input: Any): Any = child.dataType match {
+    case LongType => input.asInstanceOf[Long]
     case DoubleType => f(input.asInstanceOf[Double]).toLong
-    case DecimalType.Fixed(precision, scale) => input.asInstanceOf[Decimal].floor
+    case DecimalType.Fixed(_, _) => input.asInstanceOf[Decimal].floor
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     child.dataType match {
       case DecimalType.Fixed(_, 0) => defineCodeGen(ctx, ev, c => s"$c")
-      case DecimalType.Fixed(precision, scale) =>
+      case DecimalType.Fixed(_, _) =>
         defineCodeGen(ctx, ev, c => s"$c.floor()")
+      case LongType => defineCodeGen(ctx, ev, c => s"$c")
       case _ => defineCodeGen(ctx, ev, c => s"(long)(java.lang.Math.${funcName}($c))")
     }
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
index 1555dd1cf58d4..69ada8216515d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
@@ -252,6 +252,16 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkConsistencyBetweenInterpretedAndCodegen(Ceil, DecimalType(25, 3))
     checkConsistencyBetweenInterpretedAndCodegen(Ceil, DecimalType(25, 0))
     checkConsistencyBetweenInterpretedAndCodegen(Ceil, DecimalType(5, 0))
+
+    val doublePi: Double = 3.1415
+    val floatPi: Float = 3.1415f
+    val longLit: Long = 12345678901234567L
+    checkEvaluation(Ceil(doublePi), 4L, EmptyRow)
+    checkEvaluation(Ceil(floatPi.toDouble), 4L, EmptyRow)
+    checkEvaluation(Ceil(longLit), longLit, EmptyRow)
+    checkEvaluation(Ceil(-doublePi), -3L, EmptyRow)
+    checkEvaluation(Ceil(-floatPi.toDouble), -3L, EmptyRow)
+    checkEvaluation(Ceil(-longLit), -longLit, EmptyRow)
   }
 
   test("floor") {
@@ -262,6 +272,16 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkConsistencyBetweenInterpretedAndCodegen(Floor, DecimalType(25, 3))
     checkConsistencyBetweenInterpretedAndCodegen(Floor, DecimalType(25, 0))
     checkConsistencyBetweenInterpretedAndCodegen(Floor, DecimalType(5, 0))
+
+    val doublePi: Double = 3.1415
+    val floatPi: Float = 3.1415f
+    val longLit: Long = 12345678901234567L
+    checkEvaluation(Floor(doublePi), 3L, EmptyRow)
+    checkEvaluation(Floor(floatPi.toDouble), 3L, EmptyRow)
+    checkEvaluation(Floor(longLit), longLit, EmptyRow)
+    checkEvaluation(Floor(-doublePi), -4L, EmptyRow)
+    checkEvaluation(Floor(-floatPi.toDouble), -4L, EmptyRow)
+    checkEvaluation(Floor(-longLit), -longLit, EmptyRow)
   }
 
   test("factorial") {

From a607a26b344470bbff1247908d49b848bb7918a0 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 31 May 2017 17:26:18 -0700
Subject: [PATCH 0943/1204] [SPARK-20940][CORE] Replace IllegalAccessError with
 IllegalStateException

## What changes were proposed in this pull request?

`IllegalAccessError` is a fatal error (a subclass of LinkageError) and its meaning is `Thrown if an application attempts to access or modify a field, or to call a method that it does not have access to`. Throwing a fatal error for AccumulatorV2 is not necessary and is pretty bad because it usually will just kill executors or SparkContext ([SPARK-20666](https://issues.apache.org/jira/browse/SPARK-20666) is an example of killing SparkContext due to `IllegalAccessError`). I think the correct type of exception in AccumulatorV2 should be `IllegalStateException`.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #18168 from zsxwing/SPARK-20940.

(cherry picked from commit 24db35826a81960f08e3eb68556b0f51781144e1)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala | 4 ++--
 core/src/test/scala/org/apache/spark/AccumulatorSuite.scala   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
index 1a9a6929541aa..603c23abb6895 100644
--- a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
+++ b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
@@ -68,7 +68,7 @@ abstract class AccumulatorV2[IN, OUT] extends Serializable {
 
   private def assertMetadataNotNull(): Unit = {
     if (metadata == null) {
-      throw new IllegalAccessError("The metadata of this accumulator has not been assigned yet.")
+      throw new IllegalStateException("The metadata of this accumulator has not been assigned yet.")
     }
   }
 
@@ -265,7 +265,7 @@ private[spark] object AccumulatorContext {
       // Since we are storing weak references, we must check whether the underlying data is valid.
       val acc = ref.get
       if (acc eq null) {
-        throw new IllegalAccessError(s"Attempted to access garbage collected accumulator $id")
+        throw new IllegalStateException(s"Attempted to access garbage collected accumulator $id")
       }
       acc
     }
diff --git a/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
index ddbcb2d19dcbb..3990ee1ec326d 100644
--- a/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
@@ -210,7 +210,7 @@ class AccumulatorSuite extends SparkFunSuite with Matchers with LocalSparkContex
     assert(ref.get.isEmpty)
 
     // Getting a garbage collected accum should throw error
-    intercept[IllegalAccessError] {
+    intercept[IllegalStateException] {
       AccumulatorContext.get(accId)
     }
 

From dade85f7fa73678eea676a7211ccc677330d2b65 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 31 May 2017 17:26:18 -0700
Subject: [PATCH 0944/1204] [SPARK-20940][CORE] Replace IllegalAccessError with
 IllegalStateException

## What changes were proposed in this pull request?

`IllegalAccessError` is a fatal error (a subclass of LinkageError) and its meaning is `Thrown if an application attempts to access or modify a field, or to call a method that it does not have access to`. Throwing a fatal error for AccumulatorV2 is not necessary and is pretty bad because it usually will just kill executors or SparkContext ([SPARK-20666](https://issues.apache.org/jira/browse/SPARK-20666) is an example of killing SparkContext due to `IllegalAccessError`). I think the correct type of exception in AccumulatorV2 should be `IllegalStateException`.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #18168 from zsxwing/SPARK-20940.

(cherry picked from commit 24db35826a81960f08e3eb68556b0f51781144e1)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala | 4 ++--
 core/src/test/scala/org/apache/spark/AccumulatorSuite.scala   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
index 00e0cf257cd4a..021091d82ec41 100644
--- a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
+++ b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
@@ -68,7 +68,7 @@ abstract class AccumulatorV2[IN, OUT] extends Serializable {
 
   private def assertMetadataNotNull(): Unit = {
     if (metadata == null) {
-      throw new IllegalAccessError("The metadata of this accumulator has not been assigned yet.")
+      throw new IllegalStateException("The metadata of this accumulator has not been assigned yet.")
     }
   }
 
@@ -250,7 +250,7 @@ private[spark] object AccumulatorContext {
       // Since we are storing weak references, we must check whether the underlying data is valid.
       val acc = ref.get
       if (acc eq null) {
-        throw new IllegalAccessError(s"Attempted to access garbage collected accumulator $id")
+        throw new IllegalStateException(s"Attempted to access garbage collected accumulator $id")
       }
       acc
     }
diff --git a/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
index 6d03ee091e4ed..b35447bf657e9 100644
--- a/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
@@ -210,7 +210,7 @@ class AccumulatorSuite extends SparkFunSuite with Matchers with LocalSparkContex
     assert(ref.get.isEmpty)
 
     // Getting a garbage collected accum should throw error
-    intercept[IllegalAccessError] {
+    intercept[IllegalStateException] {
       AccumulatorContext.get(accId)
     }
 

From 14fda6f313c63c9d5a86595c12acfb1e36df43ad Mon Sep 17 00:00:00 2001
From: jerryshao <sshao@hortonworks.com>
Date: Wed, 31 May 2017 22:34:53 -0700
Subject: [PATCH 0945/1204] [SPARK-20244][CORE] Handle incorrect bytesRead
 metrics when using PySpark

## What changes were proposed in this pull request?

Hadoop FileSystem's statistics in based on thread local variables, this is ok if the RDD computation chain is running in the same thread. But if child RDD creates another thread to consume the iterator got from Hadoop RDDs, the bytesRead computation will be error, because now the iterator's `next()` and `close()` may run in different threads. This could be happened when using PySpark with PythonRDD.

So here building a map to track the `bytesRead` for different thread and add them together. This method will be used in three RDDs, `HadoopRDD`, `NewHadoopRDD` and `FileScanRDD`. I assume `FileScanRDD` cannot be called directly, so I only fixed `HadoopRDD` and `NewHadoopRDD`.

## How was this patch tested?

Unit test and local cluster verification.

Author: jerryshao <sshao@hortonworks.com>

Closes #17617 from jerryshao/SPARK-20244.

(cherry picked from commit 5854f77ce1d3b9491e2a6bd1f352459da294e369)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/deploy/SparkHadoopUtil.scala | 28 +++++++++++++----
 .../org/apache/spark/rdd/HadoopRDD.scala      |  8 ++++-
 .../org/apache/spark/rdd/NewHadoopRDD.scala   |  8 ++++-
 .../metrics/InputOutputMetricsSuite.scala     | 31 ++++++++++++++++++-
 4 files changed, 66 insertions(+), 9 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index 9cc321af4bde2..6afe58bff5229 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -23,6 +23,7 @@ import java.text.DateFormat
 import java.util.{Arrays, Comparator, Date, Locale}
 
 import scala.collection.JavaConverters._
+import scala.collection.mutable
 import scala.util.control.NonFatal
 
 import com.google.common.primitives.Longs
@@ -143,14 +144,29 @@ class SparkHadoopUtil extends Logging {
    * Returns a function that can be called to find Hadoop FileSystem bytes read. If
    * getFSBytesReadOnThreadCallback is called from thread r at time t, the returned callback will
    * return the bytes read on r since t.
-   *
-   * @return None if the required method can't be found.
    */
   private[spark] def getFSBytesReadOnThreadCallback(): () => Long = {
-    val threadStats = FileSystem.getAllStatistics.asScala.map(_.getThreadStatistics)
-    val f = () => threadStats.map(_.getBytesRead).sum
-    val baselineBytesRead = f()
-    () => f() - baselineBytesRead
+    val f = () => FileSystem.getAllStatistics.asScala.map(_.getThreadStatistics.getBytesRead).sum
+    val baseline = (Thread.currentThread().getId, f())
+
+    /**
+     * This function may be called in both spawned child threads and parent task thread (in
+     * PythonRDD), and Hadoop FileSystem uses thread local variables to track the statistics.
+     * So we need a map to track the bytes read from the child threads and parent thread,
+     * summing them together to get the bytes read of this task.
+     */
+    new Function0[Long] {
+      private val bytesReadMap = new mutable.HashMap[Long, Long]()
+
+      override def apply(): Long = {
+        bytesReadMap.synchronized {
+          bytesReadMap.put(Thread.currentThread().getId, f())
+          bytesReadMap.map { case (k, v) =>
+            v - (if (k == baseline._1) baseline._2 else 0)
+          }.sum
+        }
+      }
+    }
   }
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index 4bf8ecc383542..76ea8b86c53d2 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -251,7 +251,13 @@ class HadoopRDD[K, V](
             null
         }
       // Register an on-task-completion callback to close the input stream.
-      context.addTaskCompletionListener{ context => closeIfNeeded() }
+      context.addTaskCompletionListener { context =>
+        // Update the bytes read before closing is to make sure lingering bytesRead statistics in
+        // this thread get correctly added.
+        updateBytesRead()
+        closeIfNeeded()
+      }
+
       private val key: K = if (reader == null) null.asInstanceOf[K] else reader.createKey()
       private val value: V = if (reader == null) null.asInstanceOf[V] else reader.createValue()
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
index ce3a9a2a1e2a8..482875e6c1ac5 100644
--- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
@@ -191,7 +191,13 @@ class NewHadoopRDD[K, V](
         }
 
       // Register an on-task-completion callback to close the input stream.
-      context.addTaskCompletionListener(context => close())
+      context.addTaskCompletionListener { context =>
+        // Update the bytesRead before closing is to make sure lingering bytesRead statistics in
+        // this thread get correctly added.
+        updateBytesRead()
+        close()
+      }
+
       private var havePair = false
       private var recordsSinceMetricsUpdate = 0
 
diff --git a/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala b/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala
index 5d522189a0c29..6f4203da1d866 100644
--- a/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala
@@ -34,7 +34,7 @@ import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.{SharedSparkContext, SparkFunSuite}
 import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd}
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{ThreadUtils, Utils}
 
 class InputOutputMetricsSuite extends SparkFunSuite with SharedSparkContext
   with BeforeAndAfter {
@@ -319,6 +319,35 @@ class InputOutputMetricsSuite extends SparkFunSuite with SharedSparkContext
     }
     assert(bytesRead >= tmpFile.length())
   }
+
+  test("input metrics with old Hadoop API in different thread") {
+    val bytesRead = runAndReturnBytesRead {
+      sc.textFile(tmpFilePath, 4).mapPartitions { iter =>
+        val buf = new ArrayBuffer[String]()
+        ThreadUtils.runInNewThread("testThread", false) {
+          iter.flatMap(_.split(" ")).foreach(buf.append(_))
+        }
+
+        buf.iterator
+      }.count()
+    }
+    assert(bytesRead >= tmpFile.length())
+  }
+
+  test("input metrics with new Hadoop API in different thread") {
+    val bytesRead = runAndReturnBytesRead {
+      sc.newAPIHadoopFile(tmpFilePath, classOf[NewTextInputFormat], classOf[LongWritable],
+        classOf[Text]).mapPartitions { iter =>
+        val buf = new ArrayBuffer[String]()
+        ThreadUtils.runInNewThread("testThread", false) {
+          iter.map(_._2.toString).flatMap(_.split(" ")).foreach(buf.append(_))
+        }
+
+        buf.iterator
+      }.count()
+    }
+    assert(bytesRead >= tmpFile.length())
+  }
 }
 
 /**

From 4ab7b820bfa90bd76670715bedfd155df7d8f0fd Mon Sep 17 00:00:00 2001
From: Yuming Wang <wgyumg@gmail.com>
Date: Wed, 31 May 2017 23:17:15 -0700
Subject: [PATCH 0946/1204] [MINOR][SQL] Fix a few function description error.

## What changes were proposed in this pull request?

Fix a few function description error.

## How was this patch tested?

manual tests

![descissues](https://cloud.githubusercontent.com/assets/5399861/26619392/d547736c-4610-11e7-85d7-aeeb09c02cc8.gif)

Author: Yuming Wang <wgyumg@gmail.com>

Closes #18157 from wangyum/DescIssues.

(cherry picked from commit c8045f8b482e347eccf2583e0952e1d8bcb6cb96)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../apache/spark/sql/catalyst/expressions/mathExpressions.scala | 2 +-
 .../spark/sql/catalyst/expressions/stringExpressions.scala      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
index 52f3af49c6c59..1c61428c57f18 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
@@ -283,7 +283,7 @@ case class Cosh(child: Expression) extends UnaryMathExpression(math.cosh, "COSH"
       > SELECT _FUNC_('100', 2, 10);
        4
       > SELECT _FUNC_(-10, 16, -10);
-       16
+       -16
   """)
 case class Conv(numExpr: Expression, fromBaseExpr: Expression, toBaseExpr: Expression)
   extends TernaryExpression with ImplicitCastInputTypes {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 5598a146997ca..a5e253b61e7f7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -1005,7 +1005,7 @@ case class FormatString(children: Expression*) extends Expression with ImplicitC
   """,
   extended = """
     Examples:
-      > SELECT initcap('sPark sql');
+      > SELECT _FUNC_('sPark sql');
        Spark Sql
   """)
 case class InitCap(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {

From 6a4e023b250a86887475958093f1d3bdcbb49a03 Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Thu, 1 Jun 2017 09:52:18 -0700
Subject: [PATCH 0947/1204] [SPARK-20941][SQL] Fix SubqueryExec Reuse

Before this PR, Subquery reuse does not work. Below are three issues:
- Subquery reuse does not work.
- It is sharing the same `SQLConf` (`spark.sql.exchange.reuse`) with the one for Exchange Reuse.
- No test case covers the rule Subquery reuse.

This PR is to fix the above three issues.
- Ignored the physical operator `SubqueryExec` when comparing two plans.
- Added a dedicated conf `spark.sql.subqueries.reuse` for controlling Subquery Reuse
- Added a test case for verifying the behavior

N/A

Author: Xiao Li <gatorsmile@gmail.com>

Closes #18169 from gatorsmile/subqueryReuse.

(cherry picked from commit f7cf2096fdecb8edab61c8973c07c6fc877ee32d)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../apache/spark/sql/internal/SQLConf.scala   |  8 +++++
 .../execution/basicPhysicalOperators.scala    |  3 ++
 .../apache/spark/sql/execution/subquery.scala |  2 +-
 .../org/apache/spark/sql/SQLQuerySuite.scala  | 35 +++++++++++++++++++
 4 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index c5d69c204642e..e741b4e7627ec 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -552,6 +552,12 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
+  val SUBQUERY_REUSE_ENABLED = buildConf("spark.sql.subquery.reuse")
+    .internal()
+    .doc("When true, the planner will try to find out duplicated subqueries and re-use them.")
+    .booleanConf
+    .createWithDefault(true)
+
   val STATE_STORE_MIN_DELTAS_FOR_SNAPSHOT =
     buildConf("spark.sql.streaming.stateStore.minDeltasForSnapshot")
       .internal()
@@ -921,6 +927,8 @@ class SQLConf extends Serializable with Logging {
 
   def exchangeReuseEnabled: Boolean = getConf(EXCHANGE_REUSE_ENABLED)
 
+  def subqueryReuseEnabled: Boolean = getConf(SUBQUERY_REUSE_ENABLED)
+
   def caseSensitiveAnalysis: Boolean = getConf(SQLConf.CASE_SENSITIVE)
 
   def constraintPropagationEnabled: Boolean = getConf(CONSTRAINT_PROPAGATION_ENABLED)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index 85096dcc40f5d..f69a688555bbf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -595,6 +595,9 @@ case class OutputFakerExec(output: Seq[Attribute], child: SparkPlan) extends Spa
  */
 case class SubqueryExec(name: String, child: SparkPlan) extends UnaryExecNode {
 
+  // Ignore this wrapper for canonicalizing.
+  override lazy val canonicalized: SparkPlan = child.canonicalized
+
   override lazy val metrics = Map(
     "dataSize" -> SQLMetrics.createMetric(sparkContext, "data size (bytes)"),
     "collectTime" -> SQLMetrics.createMetric(sparkContext, "time to collect (ms)"))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
index d11045fb6ac8c..2abeadfe45362 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
@@ -156,7 +156,7 @@ case class PlanSubqueries(sparkSession: SparkSession) extends Rule[SparkPlan] {
 case class ReuseSubquery(conf: SQLConf) extends Rule[SparkPlan] {
 
   def apply(plan: SparkPlan): SparkPlan = {
-    if (!conf.exchangeReuseEnabled) {
+    if (!conf.subqueryReuseEnabled) {
       return plan
     }
     // Build a hash map using schema of subqueries to avoid O(N*N) sameResult calls.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index cd14d24370bad..2e831dcdff584 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -23,9 +23,12 @@ import java.net.{MalformedURLException, URL}
 import java.sql.Timestamp
 import java.util.concurrent.atomic.AtomicBoolean
 
+import scala.collection.mutable.ArrayBuffer
+
 import org.apache.spark.{AccumulatorSuite, SparkException}
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
 import org.apache.spark.sql.catalyst.util.StringUtils
+import org.apache.spark.sql.execution.{ScalarSubquery, SubqueryExec}
 import org.apache.spark.sql.execution.aggregate
 import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, CartesianProductExec, SortMergeJoinExec}
 import org.apache.spark.sql.functions._
@@ -708,6 +711,38 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
         row => Seq.fill(16)(Row.merge(row, row))).collect().toSeq)
   }
 
+  test("Verify spark.sql.subquery.reuse") {
+    Seq(true, false).foreach { reuse =>
+      withSQLConf(SQLConf.SUBQUERY_REUSE_ENABLED.key -> reuse.toString) {
+        val df = sql(
+          """
+            |SELECT key, (SELECT avg(key) FROM testData)
+            |FROM testData
+            |WHERE key > (SELECT avg(key) FROM testData)
+            |ORDER BY key
+            |LIMIT 3
+          """.stripMargin)
+
+        checkAnswer(df, Row(51, 50.5) :: Row(52, 50.5) :: Row(53, 50.5) :: Nil)
+
+        val subqueries = ArrayBuffer.empty[SubqueryExec]
+        df.queryExecution.executedPlan.transformAllExpressions {
+          case s @ ScalarSubquery(plan: SubqueryExec, _) =>
+            subqueries += plan
+            s
+        }
+
+        assert(subqueries.size == 2, "Two ScalarSubquery are expected in the plan")
+
+        if (reuse) {
+          assert(subqueries.distinct.size == 1, "Only one ScalarSubquery exists in the plan")
+        } else {
+          assert(subqueries.distinct.size == 2, "Reuse is not expected")
+        }
+      }
+    }
+  }
+
   test("cartesian product join") {
     withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
       checkAnswer(

From b81a702f44c3c0fe851a602d5c4682a82149a1f4 Mon Sep 17 00:00:00 2001
From: Li Yichao <lyc@zhihu.com>
Date: Thu, 1 Jun 2017 14:39:57 -0700
Subject: [PATCH 0948/1204] [SPARK-20365][YARN] Remove local scheme when add
 path to ClassPath.

In Spark on YARN, when configuring "spark.yarn.jars" with local jars (jars started with "local" scheme), we will get inaccurate classpath for AM and containers. This is because we don't remove "local" scheme when concatenating classpath. It is OK to run because classpath is separated with ":" and java treat "local" as a separate jar. But we could improve it to remove the scheme.

Updated `ClientSuite` to check "local" is not in the classpath.

cc jerryshao

Author: Li Yichao <lyc@zhihu.com>
Author: Li Yichao <liyichao.good@gmail.com>

Closes #18129 from liyichao/SPARK-20365.

(cherry picked from commit 640afa49aa349c7ebe35d365eec3ef9bb7710b1d)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../src/main/scala/org/apache/spark/deploy/yarn/Client.scala   | 3 ++-
 .../test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala  | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 9956071fd6e38..1fb7edf2a6e30 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -1275,7 +1275,8 @@ private object Client extends Logging {
     if (sparkConf.get(SPARK_ARCHIVE).isEmpty) {
       sparkConf.get(SPARK_JARS).foreach { jars =>
         jars.filter(isLocalUri).foreach { jar =>
-          addClasspathEntry(getClusterPath(sparkConf, jar), env)
+          val uri = new URI(jar)
+          addClasspathEntry(getClusterPath(sparkConf, uri.getPath()), env)
         }
       }
     }
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
index 3a11787aa57dc..6cf68427921fd 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
@@ -122,6 +122,7 @@ class ClientSuite extends SparkFunSuite with Matchers with BeforeAndAfterAll
         cp should not contain (uri.getPath())
       }
     })
+    cp should not contain ("local")
     cp should contain(PWD)
     cp should contain (s"$PWD${Path.SEPARATOR}${LOCALIZED_CONF_DIR}")
     cp should not contain (APP_JAR)

From 4cba3b5a350f4d477466fc73b32cbd653eee8405 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Thu, 1 Jun 2017 14:44:34 -0700
Subject: [PATCH 0949/1204] [SPARK-20922][CORE] Add whitelist of classes that
 can be deserialized by the launcher.

Blindly deserializing classes using Java serialization opens the code up to
issues in other libraries, since just deserializing data from a stream may
end up execution code (think readObject()).

Since the launcher protocol is pretty self-contained, there's just a handful
of classes it legitimately needs to deserialize, and they're in just two
packages, so add a filter that throws errors if classes from any other
package show up in the stream.

This also maintains backwards compatibility (the updated launcher code can
still communicate with the backend code in older Spark releases).

Tested with new and existing unit tests.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #18166 from vanzin/SPARK-20922.

(cherry picked from commit 8efc6e986554ae66eab93cd64a9035d716adbab0)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../launcher/FilteredObjectInputStream.java   | 53 +++++++++++
 .../spark/launcher/LauncherConnection.java    |  3 +-
 .../spark/launcher/LauncherServerSuite.java   | 92 ++++++++++++++-----
 3 files changed, 121 insertions(+), 27 deletions(-)
 create mode 100644 launcher/src/main/java/org/apache/spark/launcher/FilteredObjectInputStream.java

diff --git a/launcher/src/main/java/org/apache/spark/launcher/FilteredObjectInputStream.java b/launcher/src/main/java/org/apache/spark/launcher/FilteredObjectInputStream.java
new file mode 100644
index 0000000000000..4d254a0c4c9fe
--- /dev/null
+++ b/launcher/src/main/java/org/apache/spark/launcher/FilteredObjectInputStream.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.launcher;
+
+import java.io.InputStream;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectStreamClass;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * An object input stream that only allows classes used by the launcher protocol to be in the
+ * serialized stream. See SPARK-20922.
+ */
+class FilteredObjectInputStream extends ObjectInputStream {
+
+  private static final List<String> ALLOWED_PACKAGES = Arrays.asList(
+    "org.apache.spark.launcher.",
+    "java.lang.");
+
+  FilteredObjectInputStream(InputStream is) throws IOException {
+    super(is);
+  }
+
+  @Override
+  protected Class<?> resolveClass(ObjectStreamClass desc)
+      throws IOException, ClassNotFoundException {
+
+    boolean isValid = ALLOWED_PACKAGES.stream().anyMatch(p -> desc.getName().startsWith(p));
+    if (!isValid) {
+      throw new IllegalArgumentException(
+        String.format("Unexpected class in stream: %s", desc.getName()));
+    }
+    return super.resolveClass(desc);
+  }
+
+}
diff --git a/launcher/src/main/java/org/apache/spark/launcher/LauncherConnection.java b/launcher/src/main/java/org/apache/spark/launcher/LauncherConnection.java
index eec264909bbb6..b4a8719e26053 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/LauncherConnection.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/LauncherConnection.java
@@ -20,7 +20,6 @@
 import java.io.Closeable;
 import java.io.EOFException;
 import java.io.IOException;
-import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
 import java.net.Socket;
 import java.util.logging.Level;
@@ -53,7 +52,7 @@ abstract class LauncherConnection implements Closeable, Runnable {
   @Override
   public void run() {
     try {
-      ObjectInputStream in = new ObjectInputStream(socket.getInputStream());
+      FilteredObjectInputStream in = new FilteredObjectInputStream(socket.getInputStream());
       while (!closed) {
         Message msg = (Message) in.readObject();
         handle(msg);
diff --git a/launcher/src/test/java/org/apache/spark/launcher/LauncherServerSuite.java b/launcher/src/test/java/org/apache/spark/launcher/LauncherServerSuite.java
index 12f1a0ce2d1b4..03c2934e2692e 100644
--- a/launcher/src/test/java/org/apache/spark/launcher/LauncherServerSuite.java
+++ b/launcher/src/test/java/org/apache/spark/launcher/LauncherServerSuite.java
@@ -19,8 +19,11 @@
 
 import java.io.Closeable;
 import java.io.IOException;
+import java.io.ObjectInputStream;
 import java.net.InetAddress;
 import java.net.Socket;
+import java.util.Arrays;
+import java.util.List;
 import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.Semaphore;
@@ -120,31 +123,7 @@ public void testTimeout() throws Exception {
       Socket s = new Socket(InetAddress.getLoopbackAddress(),
         LauncherServer.getServerInstance().getPort());
       client = new TestClient(s);
-
-      // Try a few times since the client-side socket may not reflect the server-side close
-      // immediately.
-      boolean helloSent = false;
-      int maxTries = 10;
-      for (int i = 0; i < maxTries; i++) {
-        try {
-          if (!helloSent) {
-            client.send(new Hello(handle.getSecret(), "1.4.0"));
-            helloSent = true;
-          } else {
-            client.send(new SetAppId("appId"));
-          }
-          fail("Expected exception caused by connection timeout.");
-        } catch (IllegalStateException | IOException e) {
-          // Expected.
-          break;
-        } catch (AssertionError e) {
-          if (i < maxTries - 1) {
-            Thread.sleep(100);
-          } else {
-            throw new AssertionError("Test failed after " + maxTries + " attempts.", e);
-          }
-        }
-      }
+      waitForError(client, handle.getSecret());
     } finally {
       SparkLauncher.launcherConfig.remove(SparkLauncher.CHILD_CONNECTION_TIMEOUT);
       kill(handle);
@@ -183,6 +162,25 @@ public void infoChanged(SparkAppHandle handle) {
     }
   }
 
+  @Test
+  public void testStreamFiltering() throws Exception {
+    ChildProcAppHandle handle = LauncherServer.newAppHandle();
+    TestClient client = null;
+    try {
+      Socket s = new Socket(InetAddress.getLoopbackAddress(),
+        LauncherServer.getServerInstance().getPort());
+
+      client = new TestClient(s);
+      client.send(new EvilPayload());
+      waitForError(client, handle.getSecret());
+      assertEquals(0, EvilPayload.EVIL_BIT);
+    } finally {
+      kill(handle);
+      close(client);
+      client.clientThread.join();
+    }
+  }
+
   private void kill(SparkAppHandle handle) {
     if (handle != null) {
       handle.kill();
@@ -199,6 +197,35 @@ private void close(Closeable c) {
     }
   }
 
+  /**
+   * Try a few times to get a client-side error, since the client-side socket may not reflect the
+   * server-side close immediately.
+   */
+  private void waitForError(TestClient client, String secret) throws Exception {
+    boolean helloSent = false;
+    int maxTries = 10;
+    for (int i = 0; i < maxTries; i++) {
+      try {
+        if (!helloSent) {
+          client.send(new Hello(secret, "1.4.0"));
+          helloSent = true;
+        } else {
+          client.send(new SetAppId("appId"));
+        }
+        fail("Expected error but message went through.");
+      } catch (IllegalStateException | IOException e) {
+        // Expected.
+        break;
+      } catch (AssertionError e) {
+        if (i < maxTries - 1) {
+          Thread.sleep(100);
+        } else {
+          throw new AssertionError("Test failed after " + maxTries + " attempts.", e);
+        }
+      }
+    }
+  }
+
   private static class TestClient extends LauncherConnection {
 
     final BlockingQueue<Message> inbound;
@@ -220,4 +247,19 @@ protected void handle(Message msg) throws IOException {
 
   }
 
+  private static class EvilPayload extends LauncherProtocol.Message {
+
+    static int EVIL_BIT = 0;
+
+    // This field should cause the launcher server to throw an error and not deserialize the
+    // message.
+    private List<String> notAllowedField = Arrays.asList("disallowed");
+
+    private void readObject(ObjectInputStream stream) throws IOException, ClassNotFoundException {
+      stream.defaultReadObject();
+      EVIL_BIT = 1;
+    }
+
+  }
+
 }

From 772a9b969aa179150aa216e9efd950e512e9d0b4 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Thu, 1 Jun 2017 14:44:34 -0700
Subject: [PATCH 0950/1204] [SPARK-20922][CORE] Add whitelist of classes that
 can be deserialized by the launcher.

Blindly deserializing classes using Java serialization opens the code up to
issues in other libraries, since just deserializing data from a stream may
end up execution code (think readObject()).

Since the launcher protocol is pretty self-contained, there's just a handful
of classes it legitimately needs to deserialize, and they're in just two
packages, so add a filter that throws errors if classes from any other
package show up in the stream.

This also maintains backwards compatibility (the updated launcher code can
still communicate with the backend code in older Spark releases).

Tested with new and existing unit tests.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #18166 from vanzin/SPARK-20922.

(cherry picked from commit 8efc6e986554ae66eab93cd64a9035d716adbab0)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../launcher/FilteredObjectInputStream.java   | 53 +++++++++++
 .../spark/launcher/LauncherConnection.java    |  3 +-
 .../spark/launcher/LauncherServerSuite.java   | 92 ++++++++++++++-----
 3 files changed, 121 insertions(+), 27 deletions(-)
 create mode 100644 launcher/src/main/java/org/apache/spark/launcher/FilteredObjectInputStream.java

diff --git a/launcher/src/main/java/org/apache/spark/launcher/FilteredObjectInputStream.java b/launcher/src/main/java/org/apache/spark/launcher/FilteredObjectInputStream.java
new file mode 100644
index 0000000000000..4d254a0c4c9fe
--- /dev/null
+++ b/launcher/src/main/java/org/apache/spark/launcher/FilteredObjectInputStream.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.launcher;
+
+import java.io.InputStream;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.ObjectStreamClass;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * An object input stream that only allows classes used by the launcher protocol to be in the
+ * serialized stream. See SPARK-20922.
+ */
+class FilteredObjectInputStream extends ObjectInputStream {
+
+  private static final List<String> ALLOWED_PACKAGES = Arrays.asList(
+    "org.apache.spark.launcher.",
+    "java.lang.");
+
+  FilteredObjectInputStream(InputStream is) throws IOException {
+    super(is);
+  }
+
+  @Override
+  protected Class<?> resolveClass(ObjectStreamClass desc)
+      throws IOException, ClassNotFoundException {
+
+    boolean isValid = ALLOWED_PACKAGES.stream().anyMatch(p -> desc.getName().startsWith(p));
+    if (!isValid) {
+      throw new IllegalArgumentException(
+        String.format("Unexpected class in stream: %s", desc.getName()));
+    }
+    return super.resolveClass(desc);
+  }
+
+}
diff --git a/launcher/src/main/java/org/apache/spark/launcher/LauncherConnection.java b/launcher/src/main/java/org/apache/spark/launcher/LauncherConnection.java
index eec264909bbb6..b4a8719e26053 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/LauncherConnection.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/LauncherConnection.java
@@ -20,7 +20,6 @@
 import java.io.Closeable;
 import java.io.EOFException;
 import java.io.IOException;
-import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
 import java.net.Socket;
 import java.util.logging.Level;
@@ -53,7 +52,7 @@ abstract class LauncherConnection implements Closeable, Runnable {
   @Override
   public void run() {
     try {
-      ObjectInputStream in = new ObjectInputStream(socket.getInputStream());
+      FilteredObjectInputStream in = new FilteredObjectInputStream(socket.getInputStream());
       while (!closed) {
         Message msg = (Message) in.readObject();
         handle(msg);
diff --git a/launcher/src/test/java/org/apache/spark/launcher/LauncherServerSuite.java b/launcher/src/test/java/org/apache/spark/launcher/LauncherServerSuite.java
index 12f1a0ce2d1b4..03c2934e2692e 100644
--- a/launcher/src/test/java/org/apache/spark/launcher/LauncherServerSuite.java
+++ b/launcher/src/test/java/org/apache/spark/launcher/LauncherServerSuite.java
@@ -19,8 +19,11 @@
 
 import java.io.Closeable;
 import java.io.IOException;
+import java.io.ObjectInputStream;
 import java.net.InetAddress;
 import java.net.Socket;
+import java.util.Arrays;
+import java.util.List;
 import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.Semaphore;
@@ -120,31 +123,7 @@ public void testTimeout() throws Exception {
       Socket s = new Socket(InetAddress.getLoopbackAddress(),
         LauncherServer.getServerInstance().getPort());
       client = new TestClient(s);
-
-      // Try a few times since the client-side socket may not reflect the server-side close
-      // immediately.
-      boolean helloSent = false;
-      int maxTries = 10;
-      for (int i = 0; i < maxTries; i++) {
-        try {
-          if (!helloSent) {
-            client.send(new Hello(handle.getSecret(), "1.4.0"));
-            helloSent = true;
-          } else {
-            client.send(new SetAppId("appId"));
-          }
-          fail("Expected exception caused by connection timeout.");
-        } catch (IllegalStateException | IOException e) {
-          // Expected.
-          break;
-        } catch (AssertionError e) {
-          if (i < maxTries - 1) {
-            Thread.sleep(100);
-          } else {
-            throw new AssertionError("Test failed after " + maxTries + " attempts.", e);
-          }
-        }
-      }
+      waitForError(client, handle.getSecret());
     } finally {
       SparkLauncher.launcherConfig.remove(SparkLauncher.CHILD_CONNECTION_TIMEOUT);
       kill(handle);
@@ -183,6 +162,25 @@ public void infoChanged(SparkAppHandle handle) {
     }
   }
 
+  @Test
+  public void testStreamFiltering() throws Exception {
+    ChildProcAppHandle handle = LauncherServer.newAppHandle();
+    TestClient client = null;
+    try {
+      Socket s = new Socket(InetAddress.getLoopbackAddress(),
+        LauncherServer.getServerInstance().getPort());
+
+      client = new TestClient(s);
+      client.send(new EvilPayload());
+      waitForError(client, handle.getSecret());
+      assertEquals(0, EvilPayload.EVIL_BIT);
+    } finally {
+      kill(handle);
+      close(client);
+      client.clientThread.join();
+    }
+  }
+
   private void kill(SparkAppHandle handle) {
     if (handle != null) {
       handle.kill();
@@ -199,6 +197,35 @@ private void close(Closeable c) {
     }
   }
 
+  /**
+   * Try a few times to get a client-side error, since the client-side socket may not reflect the
+   * server-side close immediately.
+   */
+  private void waitForError(TestClient client, String secret) throws Exception {
+    boolean helloSent = false;
+    int maxTries = 10;
+    for (int i = 0; i < maxTries; i++) {
+      try {
+        if (!helloSent) {
+          client.send(new Hello(secret, "1.4.0"));
+          helloSent = true;
+        } else {
+          client.send(new SetAppId("appId"));
+        }
+        fail("Expected error but message went through.");
+      } catch (IllegalStateException | IOException e) {
+        // Expected.
+        break;
+      } catch (AssertionError e) {
+        if (i < maxTries - 1) {
+          Thread.sleep(100);
+        } else {
+          throw new AssertionError("Test failed after " + maxTries + " attempts.", e);
+        }
+      }
+    }
+  }
+
   private static class TestClient extends LauncherConnection {
 
     final BlockingQueue<Message> inbound;
@@ -220,4 +247,19 @@ protected void handle(Message msg) throws IOException {
 
   }
 
+  private static class EvilPayload extends LauncherProtocol.Message {
+
+    static int EVIL_BIT = 0;
+
+    // This field should cause the launcher server to throw an error and not deserialize the
+    // message.
+    private List<String> notAllowedField = Arrays.asList("disallowed");
+
+    private void readObject(ObjectInputStream stream) throws IOException, ClassNotFoundException {
+      stream.defaultReadObject();
+      EVIL_BIT = 1;
+    }
+
+  }
+
 }

From bb3d900b48d50d27188a52662d5eb95738265669 Mon Sep 17 00:00:00 2001
From: Bogdan Raducanu <bogdan@databricks.com>
Date: Thu, 1 Jun 2017 15:50:40 -0700
Subject: [PATCH 0951/1204] [SPARK-20854][SQL] Extend hint syntax to support
 expressions

SQL hint syntax:
* support expressions such as strings, numbers, etc. instead of only identifiers as it is currently.
* support multiple hints, which was missing compared to the DataFrame syntax.

DataFrame API:
* support any parameters in DataFrame.hint instead of just strings

Existing tests. New tests in PlanParserSuite. New suite DataFrameHintSuite.

Author: Bogdan Raducanu <bogdan@databricks.com>

Closes #18086 from bogdanrdc/SPARK-20854.

(cherry picked from commit 2134196a9c0aca82bc3e203c09e776a8bd064d65)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/parser/SqlBase.g4      |   6 +-
 .../sql/catalyst/analysis/ResolveHints.scala  |   8 +-
 .../spark/sql/catalyst/dsl/package.scala      |   3 +
 .../sql/catalyst/parser/AstBuilder.scala      |  11 +-
 .../sql/catalyst/plans/logical/hints.scala    |   6 +-
 .../sql/catalyst/analysis/DSLHintSuite.scala  |  53 ++++++++++
 .../sql/catalyst/parser/PlanParserSuite.scala | 100 +++++++++++++++---
 .../scala/org/apache/spark/sql/Dataset.scala  |   2 +-
 .../apache/spark/sql/DataFrameHintSuite.scala |  62 +++++++++++
 9 files changed, 225 insertions(+), 26 deletions(-)
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DSLHintSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/DataFrameHintSuite.scala

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 1ecb3d1958f43..31b1c67c49931 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -371,7 +371,7 @@ querySpecification
        (RECORDREADER recordReader=STRING)?
        fromClause?
        (WHERE where=booleanExpression)?)
-    | ((kind=SELECT hint? setQuantifier? namedExpressionSeq fromClause?
+    | ((kind=SELECT (hints+=hint)* setQuantifier? namedExpressionSeq fromClause?
        | fromClause (kind=SELECT setQuantifier? namedExpressionSeq)?)
        lateralView*
        (WHERE where=booleanExpression)?
@@ -381,12 +381,12 @@ querySpecification
     ;
 
 hint
-    : '/*+' hintStatement '*/'
+    : '/*+' hintStatements+=hintStatement (','? hintStatements+=hintStatement)* '*/'
     ;
 
 hintStatement
     : hintName=identifier
-    | hintName=identifier '(' parameters+=identifier (',' parameters+=identifier)* ')'
+    | hintName=identifier '(' parameters+=primaryExpression (',' parameters+=primaryExpression)* ')'
     ;
 
 fromClause
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
index 86c788aaa828a..62a3482d9fac1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.analysis
 
 import java.util.Locale
 
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.CurrentOrigin
@@ -91,7 +92,12 @@ object ResolveHints {
           ResolvedHint(h.child, HintInfo(isBroadcastable = Option(true)))
         } else {
           // Otherwise, find within the subtree query plans that should be broadcasted.
-          applyBroadcastHint(h.child, h.parameters.toSet)
+          applyBroadcastHint(h.child, h.parameters.map {
+            case tableName: String => tableName
+            case tableId: UnresolvedAttribute => tableId.name
+            case unsupported => throw new AnalysisException("Broadcast hint parameter should be " +
+              s"an identifier or string but was $unsupported (${unsupported.getClass}")
+          }.toSet)
         }
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index ed423e7e334b6..beee93d906f0f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -381,6 +381,9 @@ package object dsl {
 
       def analyze: LogicalPlan =
         EliminateSubqueryAliases(analysis.SimpleAnalyzer.execute(logicalPlan))
+
+      def hint(name: String, parameters: Any*): LogicalPlan =
+        UnresolvedHint(name, parameters, logicalPlan)
     }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index d4dccf1da3523..10db749026976 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -406,7 +406,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
         val withWindow = withDistinct.optionalMap(windows)(withWindows)
 
         // Hint
-        withWindow.optionalMap(hint)(withHints)
+        hints.asScala.foldRight(withWindow)(withHints)
     }
   }
 
@@ -532,13 +532,16 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
   }
 
   /**
-   * Add a [[UnresolvedHint]] to a logical plan.
+   * Add [[UnresolvedHint]]s to a logical plan.
    */
   private def withHints(
       ctx: HintContext,
       query: LogicalPlan): LogicalPlan = withOrigin(ctx) {
-    val stmt = ctx.hintStatement
-    UnresolvedHint(stmt.hintName.getText, stmt.parameters.asScala.map(_.getText), query)
+    var plan = query
+    ctx.hintStatements.asScala.reverse.foreach { case stmt =>
+      plan = UnresolvedHint(stmt.hintName.getText, stmt.parameters.asScala.map(expression), plan)
+    }
+    plan
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
index 5fe6d2d8da064..d16fae56b3d4a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
@@ -23,9 +23,11 @@ import org.apache.spark.sql.internal.SQLConf
 /**
  * A general hint for the child that is not yet resolved. This node is generated by the parser and
  * should be removed This node will be eliminated post analysis.
- * A pair of (name, parameters).
+ * @param name the name of the hint
+ * @param parameters the parameters of the hint
+ * @param child the [[LogicalPlan]] on which this hint applies
  */
-case class UnresolvedHint(name: String, parameters: Seq[String], child: LogicalPlan)
+case class UnresolvedHint(name: String, parameters: Seq[Any], child: LogicalPlan)
   extends UnaryNode {
 
   override lazy val resolved: Boolean = false
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DSLHintSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DSLHintSuite.scala
new file mode 100644
index 0000000000000..48a3ca2ccfb0b
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DSLHintSuite.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.catalyst.analysis.AnalysisTest
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical._
+
+class DSLHintSuite extends AnalysisTest {
+  lazy val a = 'a.int
+  lazy val b = 'b.string
+  lazy val c = 'c.string
+  lazy val r1 = LocalRelation(a, b, c)
+
+  test("various hint parameters") {
+    comparePlans(
+      r1.hint("hint1"),
+      UnresolvedHint("hint1", Seq(), r1)
+    )
+
+    comparePlans(
+      r1.hint("hint1", 1, "a"),
+      UnresolvedHint("hint1", Seq(1, "a"), r1)
+    )
+
+    comparePlans(
+      r1.hint("hint1", 1, $"a"),
+      UnresolvedHint("hint1", Seq(1, $"a"), r1)
+    )
+
+    comparePlans(
+      r1.hint("hint1", Seq(1, 2, 3), Seq($"a", $"b", $"c")),
+      UnresolvedHint("hint1", Seq(Seq(1, 2, 3), Seq($"a", $"b", $"c")), r1)
+    )
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index bed5ca9e6e190..954f6da34dbef 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.catalyst.parser
 
 import org.apache.spark.sql.catalyst.FunctionIdentifier
-import org.apache.spark.sql.catalyst.analysis.{UnresolvedGenerator, UnresolvedInlineTable, UnresolvedTableValuedFunction}
+import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction, UnresolvedGenerator, UnresolvedInlineTable, UnresolvedTableValuedFunction}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -496,19 +496,13 @@ class PlanParserSuite extends PlanTest {
     val m = intercept[ParseException] {
       parsePlan("SELECT /*+ HINT() */ * FROM t")
     }.getMessage
-    assert(m.contains("no viable alternative at input"))
-
-    // Hive compatibility: No database.
-    val m2 = intercept[ParseException] {
-      parsePlan("SELECT /*+ MAPJOIN(default.t) */ * from default.t")
-    }.getMessage
-    assert(m2.contains("mismatched input '.' expecting {')', ','}"))
+    assert(m.contains("mismatched input"))
 
     // Disallow space as the delimiter.
     val m3 = intercept[ParseException] {
       parsePlan("SELECT /*+ INDEX(a b c) */ * from default.t")
     }.getMessage
-    assert(m3.contains("mismatched input 'b' expecting {')', ','}"))
+    assert(m3.contains("mismatched input 'b' expecting"))
 
     comparePlans(
       parsePlan("SELECT /*+ HINT */ * FROM t"),
@@ -516,27 +510,103 @@ class PlanParserSuite extends PlanTest {
 
     comparePlans(
       parsePlan("SELECT /*+ BROADCASTJOIN(u) */ * FROM t"),
-      UnresolvedHint("BROADCASTJOIN", Seq("u"), table("t").select(star())))
+      UnresolvedHint("BROADCASTJOIN", Seq($"u"), table("t").select(star())))
 
     comparePlans(
       parsePlan("SELECT /*+ MAPJOIN(u) */ * FROM t"),
-      UnresolvedHint("MAPJOIN", Seq("u"), table("t").select(star())))
+      UnresolvedHint("MAPJOIN", Seq($"u"), table("t").select(star())))
 
     comparePlans(
       parsePlan("SELECT /*+ STREAMTABLE(a,b,c) */ * FROM t"),
-      UnresolvedHint("STREAMTABLE", Seq("a", "b", "c"), table("t").select(star())))
+      UnresolvedHint("STREAMTABLE", Seq($"a", $"b", $"c"), table("t").select(star())))
 
     comparePlans(
       parsePlan("SELECT /*+ INDEX(t, emp_job_ix) */ * FROM t"),
-      UnresolvedHint("INDEX", Seq("t", "emp_job_ix"), table("t").select(star())))
+      UnresolvedHint("INDEX", Seq($"t", $"emp_job_ix"), table("t").select(star())))
 
     comparePlans(
       parsePlan("SELECT /*+ MAPJOIN(`default.t`) */ * from `default.t`"),
-      UnresolvedHint("MAPJOIN", Seq("default.t"), table("default.t").select(star())))
+      UnresolvedHint("MAPJOIN", Seq(UnresolvedAttribute.quoted("default.t")),
+        table("default.t").select(star())))
 
     comparePlans(
       parsePlan("SELECT /*+ MAPJOIN(t) */ a from t where true group by a order by a"),
-      UnresolvedHint("MAPJOIN", Seq("t"),
+      UnresolvedHint("MAPJOIN", Seq($"t"),
         table("t").where(Literal(true)).groupBy('a)('a)).orderBy('a.asc))
   }
+
+  test("SPARK-20854: select hint syntax with expressions") {
+    comparePlans(
+      parsePlan("SELECT /*+ HINT1(a, array(1, 2, 3)) */ * from t"),
+      UnresolvedHint("HINT1", Seq($"a",
+        UnresolvedFunction("array", Literal(1) :: Literal(2) :: Literal(3) :: Nil, false)),
+        table("t").select(star())
+      )
+    )
+
+    comparePlans(
+      parsePlan("SELECT /*+ HINT1(a, array(1, 2, 3)) */ * from t"),
+      UnresolvedHint("HINT1", Seq($"a",
+        UnresolvedFunction("array", Literal(1) :: Literal(2) :: Literal(3) :: Nil, false)),
+        table("t").select(star())
+      )
+    )
+
+    comparePlans(
+      parsePlan("SELECT /*+ HINT1(a, 5, 'a', b) */ * from t"),
+      UnresolvedHint("HINT1", Seq($"a", Literal(5), Literal("a"), $"b"),
+        table("t").select(star())
+      )
+    )
+
+    comparePlans(
+      parsePlan("SELECT /*+ HINT1('a', (b, c), (1, 2)) */ * from t"),
+      UnresolvedHint("HINT1",
+        Seq(Literal("a"),
+          CreateStruct($"b" :: $"c" :: Nil),
+          CreateStruct(Literal(1) :: Literal(2) :: Nil)),
+        table("t").select(star())
+      )
+    )
+  }
+
+  test("SPARK-20854: multiple hints") {
+    comparePlans(
+      parsePlan("SELECT /*+ HINT1(a, 1) hint2(b, 2) */ * from t"),
+      UnresolvedHint("HINT1", Seq($"a", Literal(1)),
+        UnresolvedHint("hint2", Seq($"b", Literal(2)),
+          table("t").select(star())
+        )
+      )
+    )
+
+    comparePlans(
+      parsePlan("SELECT /*+ HINT1(a, 1),hint2(b, 2) */ * from t"),
+      UnresolvedHint("HINT1", Seq($"a", Literal(1)),
+        UnresolvedHint("hint2", Seq($"b", Literal(2)),
+          table("t").select(star())
+        )
+      )
+    )
+
+    comparePlans(
+      parsePlan("SELECT /*+ HINT1(a, 1) */ /*+ hint2(b, 2) */ * from t"),
+      UnresolvedHint("HINT1", Seq($"a", Literal(1)),
+        UnresolvedHint("hint2", Seq($"b", Literal(2)),
+          table("t").select(star())
+        )
+      )
+    )
+
+    comparePlans(
+      parsePlan("SELECT /*+ HINT1(a, 1), hint2(b, 2) */ /*+ hint3(c, 3) */ * from t"),
+      UnresolvedHint("HINT1", Seq($"a", Literal(1)),
+        UnresolvedHint("hint2", Seq($"b", Literal(2)),
+          UnresolvedHint("hint3", Seq($"c", Literal(3)),
+            table("t").select(star())
+          )
+        )
+      )
+    )
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 68d1a6b84b9a0..f491e3c6ace04 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -1086,7 +1086,7 @@ class Dataset[T] private[sql](
    * @since 2.2.0
    */
   @scala.annotation.varargs
-  def hint(name: String, parameters: String*): Dataset[T] = withTypedPlan {
+  def hint(name: String, parameters: Any*): Dataset[T] = withTypedPlan {
     UnresolvedHint(name, parameters, logicalPlan)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameHintSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameHintSuite.scala
new file mode 100644
index 0000000000000..60f6f23860ed9
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameHintSuite.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.test.SharedSQLContext
+
+class DataFrameHintSuite extends PlanTest with SharedSQLContext {
+  import testImplicits._
+  lazy val df = spark.range(10)
+
+  private def check(df: Dataset[_], expected: LogicalPlan) = {
+    comparePlans(
+      df.queryExecution.logical,
+      expected
+    )
+  }
+
+  test("various hint parameters") {
+    check(
+      df.hint("hint1"),
+      UnresolvedHint("hint1", Seq(),
+        df.logicalPlan
+      )
+    )
+
+    check(
+      df.hint("hint1", 1, "a"),
+      UnresolvedHint("hint1", Seq(1, "a"), df.logicalPlan)
+    )
+
+    check(
+      df.hint("hint1", 1, $"a"),
+      UnresolvedHint("hint1", Seq(1, $"a"),
+        df.logicalPlan
+      )
+    )
+
+    check(
+      df.hint("hint1", Seq(1, 2, 3), Seq($"a", $"b", $"c")),
+      UnresolvedHint("hint1", Seq(Seq(1, 2, 3), Seq($"a", $"b", $"c")),
+        df.logicalPlan
+      )
+    )
+  }
+}

From 0b25a7d93359e348e11b2e8698990a53436b3c5d Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Thu, 1 Jun 2017 16:45:31 -0700
Subject: [PATCH 0952/1204] [SPARK-20922][CORE][HOTFIX] Don't use Java 8
 lambdas in older branches.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #18178 from vanzin/SPARK-20922-hotfix.
---
 .../apache/spark/launcher/FilteredObjectInputStream.java  | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/launcher/src/main/java/org/apache/spark/launcher/FilteredObjectInputStream.java b/launcher/src/main/java/org/apache/spark/launcher/FilteredObjectInputStream.java
index 4d254a0c4c9fe..46796b10ae856 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/FilteredObjectInputStream.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/FilteredObjectInputStream.java
@@ -42,7 +42,13 @@ class FilteredObjectInputStream extends ObjectInputStream {
   protected Class<?> resolveClass(ObjectStreamClass desc)
       throws IOException, ClassNotFoundException {
 
-    boolean isValid = ALLOWED_PACKAGES.stream().anyMatch(p -> desc.getName().startsWith(p));
+    boolean isValid = false;
+    for (String p : ALLOWED_PACKAGES) {
+      if (desc.getName().startsWith(p)) {
+        isValid = true;
+        break;
+      }
+    }
     if (!isValid) {
       throw new IllegalArgumentException(
         String.format("Unexpected class in stream: %s", desc.getName()));

From 25cc80066d68190c1ced7473dd4fd40f7e8dec3a Mon Sep 17 00:00:00 2001
From: guoxiaolong <guo.xiaolong1@zte.com.cn>
Date: Fri, 2 Jun 2017 14:38:00 +0100
Subject: [PATCH 0953/1204] [SPARK-20942][WEB-UI] The title style about field
 is error in the history server web ui.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

1.The title style about field is error.
fix before:
![before](https://cloud.githubusercontent.com/assets/26266482/26661987/a7bed018-46b3-11e7-8a54-a5152d2df0f4.png)

fix after:
![fix](https://cloud.githubusercontent.com/assets/26266482/26662000/ba6cc814-46b3-11e7-8f33-cfd4cc2c60fe.png)

![fix1](https://cloud.githubusercontent.com/assets/26266482/26662080/3c732e3e-46b4-11e7-8768-20b5a6aeadcb.png)

executor-page style:
![executor_page](https://cloud.githubusercontent.com/assets/26266482/26662384/167cbd10-46b6-11e7-9e07-bf391dbc6e08.png)

2.Title text description, 'the application' should be changed to 'this application'.

3.Analysis of code:
 $('#history-summary [data-toggle="tooltip"]').tooltip();
The id of 'history-summary' is not there. We only contain id of 'history-summary-table'.

## How was this patch tested?

manual tests

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: guoxiaolong <guo.xiaolong1@zte.com.cn>
Author: 郭小龙 10207633 <guo.xiaolong1@zte.com.cn>
Author: guoxiaolongzte <guo.xiaolong1@zte.com.cn>

Closes #18170 from guoxiaolongzte/SPARK-20942.

(cherry picked from commit 625cebfde632361122e0db3452c4cc38147f696f)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../spark/ui/static/historypage-template.html  | 18 +++++++++---------
 .../org/apache/spark/ui/static/historypage.js  |  2 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html b/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html
index c2afa993b2f20..bfe31aae555ba 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html
+++ b/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html
@@ -20,47 +20,47 @@
   <thead>
     <tr>
       <th>
-        <span data-toggle="tooltip" data-placement="right" title="ID of the application.">
+        <span data-toggle="tooltip" data-placement="top" title="ID of this application.">
           App ID
         </span>
       </th>
       <th>
-        <span data-toggle="tooltip" data-placement="above" title="Name of the application.">
+        <span data-toggle="tooltip" data-placement="top" title="Name of this application.">
           App Name
         </span>
       </th>
       <th class="attemptIDSpan">
-        <span data-toggle="tooltip" data-placement="above" title="The attempt ID of this application since one application might be launched several times">
+        <span data-toggle="tooltip" data-placement="top" title="The attempt ID of this application since one application might be launched several times">
           Attempt ID
         </span>
       </th>
       <th>
-        <span data-toggle="tooltip" data-placement="right" title="Started time of this application.">
+        <span data-toggle="tooltip" data-placement="top" title="Started time of this application.">
           Started
         </span>
       </th>
       <th class="completedColumn">
-        <span data-toggle="tooltip" data-placement="above" title="The completed time of this application.">
+        <span data-toggle="tooltip" data-placement="top" title="The completed time of this application.">
           Completed
         </span>
       </th>
       <th>
-        <span data-toggle="tooltip" data-placement="above" title="The duration time of this application.">
+        <span data-toggle="tooltip" data-placement="top" title="The duration time of this application.">
           Duration
         </span>
       </th>
       <th>
-        <span data-toggle="tooltip" data-placement="right" title="The Spark user of this application">
+        <span data-toggle="tooltip" data-placement="top" title="The Spark user of this application">
           Spark User
         </span>
       </th>
       <th>
-        <span data-toggle="tooltip" data-placement="above" title="The timestamp of the last updating on this application">
+        <span data-toggle="tooltip" data-placement="top" title="The timestamp of the last updating on this application">
           Last Updated
         </span>
       </th>
       <th>
-        <span data-toggle="tooltip" data-placement="above" title="Download the event log for this application">
+        <span data-toggle="tooltip" data-placement="top" title="Download the event log for this application">
           Event Log
         </span>
       </th>
diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage.js b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
index 7db8c27e8f7c9..5ec1ce15a2127 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/historypage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
@@ -195,7 +195,7 @@ $(document).ready(function() {
         }
 
         $(selector).DataTable(conf);
-        $('#hisotry-summary [data-toggle="tooltip"]').tooltip();
+        $('#history-summary [data-toggle="tooltip"]').tooltip();
       });
     });
 });

From ae00d49afc9d6aaeabb16d905b764d705963ab50 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 2 Jun 2017 09:58:01 -0700
Subject: [PATCH 0954/1204] [SPARK-20967][SQL] SharedState.externalCatalog is
 not really lazy

## What changes were proposed in this pull request?

`SharedState.externalCatalog` is marked as a `lazy val` but actually it's not lazy. We access `externalCatalog` while initializing `SharedState` and thus eliminate the effort of `lazy val`. When creating `ExternalCatalog` we will try to connect to the metastore and may throw an error, so it makes sense to make it a `lazy val` in `SharedState`.

## How was this patch tested?

existing tests.

Author: Wenchen Fan <wenchen@databricks.com>

Closes #18187 from cloud-fan/minor.

(cherry picked from commit d1b80ab9220d83e5fdaf33c513cc811dd17d0de1)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/internal/SharedState.scala      | 26 +++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index a93b701146077..7202f1222d10f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -90,38 +90,38 @@ private[sql] class SharedState(val sparkContext: SparkContext) extends Logging {
   /**
    * A catalog that interacts with external systems.
    */
-  lazy val externalCatalog: ExternalCatalog =
-    SharedState.reflect[ExternalCatalog, SparkConf, Configuration](
+  lazy val externalCatalog: ExternalCatalog = {
+    val externalCatalog = SharedState.reflect[ExternalCatalog, SparkConf, Configuration](
       SharedState.externalCatalogClassName(sparkContext.conf),
       sparkContext.conf,
       sparkContext.hadoopConfiguration)
 
-  // Create the default database if it doesn't exist.
-  {
     val defaultDbDefinition = CatalogDatabase(
       SessionCatalog.DEFAULT_DATABASE,
       "default database",
       CatalogUtils.stringToURI(warehousePath),
       Map())
-    // Initialize default database if it doesn't exist
+    // Create default database if it doesn't exist
     if (!externalCatalog.databaseExists(SessionCatalog.DEFAULT_DATABASE)) {
       // There may be another Spark application creating default database at the same time, here we
       // set `ignoreIfExists = true` to avoid `DatabaseAlreadyExists` exception.
       externalCatalog.createDatabase(defaultDbDefinition, ignoreIfExists = true)
     }
-  }
 
-  // Make sure we propagate external catalog events to the spark listener bus
-  externalCatalog.addListener(new ExternalCatalogEventListener {
-    override def onEvent(event: ExternalCatalogEvent): Unit = {
-      sparkContext.listenerBus.post(event)
-    }
-  })
+    // Make sure we propagate external catalog events to the spark listener bus
+    externalCatalog.addListener(new ExternalCatalogEventListener {
+      override def onEvent(event: ExternalCatalogEvent): Unit = {
+        sparkContext.listenerBus.post(event)
+      }
+    })
+
+    externalCatalog
+  }
 
   /**
    * A manager for global temporary views.
    */
-  val globalTempViewManager: GlobalTempViewManager = {
+  lazy val globalTempViewManager: GlobalTempViewManager = {
     // System preserved database should not exists in metastore. However it's hard to guarantee it
     // for every session, because case-sensitivity differs. Here we always lowercase it to make our
     // life easier.

From f36c3ee492c6d06e86a93c8e1e4aa1bf922c4e03 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 2 Jun 2017 10:05:05 -0700
Subject: [PATCH 0955/1204] [SPARK-20946][SQL] simplify the config setting
 logic in SparkSession.getOrCreate

## What changes were proposed in this pull request?

The current conf setting logic is a little complex and has duplication, this PR simplifies it.

## How was this patch tested?

existing tests.

Author: Wenchen Fan <wenchen@databricks.com>

Closes #18172 from cloud-fan/session.

(cherry picked from commit e11d90bf8deb553fd41b8837e3856c11486c2503)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/ml/recommendation/ALSSuite.scala    |  4 +--
 .../apache/spark/ml/tree/impl/TreeTests.scala |  2 --
 .../org/apache/spark/sql/SparkSession.scala   | 25 +++++++------------
 3 files changed, 10 insertions(+), 21 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
index 701040f2d6041..23f22566cc8e7 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
@@ -820,15 +820,13 @@ class ALSCleanerSuite extends SparkFunSuite {
       FileUtils.listFiles(localDir, TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE).asScala.toSet
     try {
       conf.set("spark.local.dir", localDir.getAbsolutePath)
-      val sc = new SparkContext("local[2]", "test", conf)
+      val sc = new SparkContext("local[2]", "ALSCleanerSuite", conf)
       try {
         sc.setCheckpointDir(checkpointDir.getAbsolutePath)
         // Generate test data
         val (training, _) = ALSSuite.genImplicitTestData(sc, 20, 5, 1, 0.2, 0)
         // Implicitly test the cleaning of parents during ALS training
         val spark = SparkSession.builder
-          .master("local[2]")
-          .appName("ALSCleanerSuite")
           .sparkContext(sc)
           .getOrCreate()
         import spark.implicits._
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreeTests.scala b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreeTests.scala
index 92a236928e90b..b6894b30b0c2b 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreeTests.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreeTests.scala
@@ -43,8 +43,6 @@ private[ml] object TreeTests extends SparkFunSuite {
       categoricalFeatures: Map[Int, Int],
       numClasses: Int): DataFrame = {
     val spark = SparkSession.builder()
-      .master("local[2]")
-      .appName("TreeTests")
       .sparkContext(data.sparkContext)
       .getOrCreate()
     import spark.implicits._
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index d2bf350711936..bf37b76087473 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -757,6 +757,8 @@ object SparkSession {
 
     private[this] var userSuppliedContext: Option[SparkContext] = None
 
+    // The `SparkConf` inside the given `SparkContext` may get changed if you specify some options
+    // for this builder.
     private[spark] def sparkContext(sparkContext: SparkContext): Builder = synchronized {
       userSuppliedContext = Option(sparkContext)
       this
@@ -854,7 +856,7 @@ object SparkSession {
      *
      * @since 2.2.0
      */
-    def withExtensions(f: SparkSessionExtensions => Unit): Builder = {
+    def withExtensions(f: SparkSessionExtensions => Unit): Builder = synchronized {
       f(extensions)
       this
     }
@@ -899,22 +901,14 @@ object SparkSession {
 
         // No active nor global default session. Create a new one.
         val sparkContext = userSuppliedContext.getOrElse {
-          // set app name if not given
-          val randomAppName = java.util.UUID.randomUUID().toString
           val sparkConf = new SparkConf()
-          options.foreach { case (k, v) => sparkConf.set(k, v) }
-          if (!sparkConf.contains("spark.app.name")) {
-            sparkConf.setAppName(randomAppName)
-          }
-          val sc = SparkContext.getOrCreate(sparkConf)
-          // maybe this is an existing SparkContext, update its SparkConf which maybe used
-          // by SparkSession
-          options.foreach { case (k, v) => sc.conf.set(k, v) }
-          if (!sc.conf.contains("spark.app.name")) {
-            sc.conf.setAppName(randomAppName)
-          }
-          sc
+          options.get("spark.master").foreach(sparkConf.setMaster)
+          // set a random app name if not given.
+          sparkConf.setAppName(options.getOrElse("spark.app.name",
+            java.util.UUID.randomUUID().toString))
+          SparkContext.getOrCreate(sparkConf)
         }
+        options.foreach { case (k, v) => sparkContext.conf.set(k, v) }
 
         // Initialize extensions if the user has defined a configurator class.
         val extensionConfOption = sparkContext.conf.get(StaticSQLConf.SPARK_SESSION_EXTENSIONS)
@@ -935,7 +929,6 @@ object SparkSession {
         }
 
         session = new SparkSession(sparkContext, None, None, extensions)
-        options.foreach { case (k, v) => session.sessionState.conf.setConfString(k, v) }
         defaultSession.set(session)
 
         // Register a successfully instantiated context to the singleton. This should be at the

From 7f35f5b99d9099b11e68af805a871e7b19f96df5 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Fri, 2 Jun 2017 10:33:21 -0700
Subject: [PATCH 0956/1204] [SPARK-20955][CORE] Intern "executorId" to reduce
 the memory usage

## What changes were proposed in this pull request?

In [this line](https://github.com/apache/spark/blob/f7cf2096fdecb8edab61c8973c07c6fc877ee32d/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala#L128), it uses the `executorId` string received from executors and finally it will go into `TaskUIData`. As deserializing the `executorId` string will always create a new instance, we have a lot of duplicated string instances.

This PR does a String interning for TaskUIData to reduce the memory usage.

## How was this patch tested?

Manually test using `bin/spark-shell --master local-cluster[6,1,1024]`. Test codes:
```
for (_ <- 1 to 10) { sc.makeRDD(1 to 1000, 1000).count() }
Thread.sleep(2000)
val l = sc.getClass.getMethod("jobProgressListener").invoke(sc).asInstanceOf[org.apache.spark.ui.jobs.JobProgressListener]
org.apache.spark.util.SizeEstimator.estimate(l.stageIdToData)
```
This PR reduces the size of `stageIdToData` from 3487280 to 3009744 (86.3%) in the above case.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #18177 from zsxwing/SPARK-20955.

(cherry picked from commit 16186cdcbce1a2ec8f839c550e6b571bf5dc2692)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../scala/org/apache/spark/ui/jobs/UIData.scala    | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala b/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala
index 8d280bc00c3b3..8bedd071a2c1f 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala
@@ -20,6 +20,8 @@ package org.apache.spark.ui.jobs
 import scala.collection.mutable
 import scala.collection.mutable.{HashMap, LinkedHashMap}
 
+import com.google.common.collect.Interners
+
 import org.apache.spark.JobExecutionStatus
 import org.apache.spark.executor._
 import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo}
@@ -140,6 +142,14 @@ private[spark] object UIData {
   }
 
   object TaskUIData {
+
+    private val stringInterner = Interners.newWeakInterner[String]()
+
+    /** String interning to reduce the memory usage. */
+    private def weakIntern(s: String): String = {
+      stringInterner.intern(s)
+    }
+
     def apply(taskInfo: TaskInfo): TaskUIData = {
       new TaskUIData(dropInternalAndSQLAccumulables(taskInfo))
     }
@@ -154,8 +164,8 @@ private[spark] object UIData {
         index = taskInfo.index,
         attemptNumber = taskInfo.attemptNumber,
         launchTime = taskInfo.launchTime,
-        executorId = taskInfo.executorId,
-        host = taskInfo.host,
+        executorId = weakIntern(taskInfo.executorId),
+        host = weakIntern(taskInfo.host),
         taskLocality = taskInfo.taskLocality,
         speculative = taskInfo.speculative
       )

From 9a4a8e1b010bcfa187360c8331ef897195732638 Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Fri, 2 Jun 2017 11:57:22 -0700
Subject: [PATCH 0957/1204] [SPARK-19236][SQL][BACKPORT-2.2] Added
 createOrReplaceGlobalTempView method

### What changes were proposed in this pull request?

This PR is to backport two PRs for adding the `createOrReplaceGlobalTempView` method
https://github.com/apache/spark/pull/18147
https://github.com/apache/spark/pull/16598

---
Added the createOrReplaceGlobalTempView method for dataset API

### How was this patch tested?
N/A

Author: Xiao Li <gatorsmile@gmail.com>

Closes #18167 from gatorsmile/Backport18147.
---
 python/pyspark/sql/dataframe.py               | 17 ++++++
 .../scala/org/apache/spark/sql/Dataset.scala  | 16 +++++
 .../sql/execution/GlobalTempViewSuite.scala   | 60 +++++++++++--------
 3 files changed, 67 insertions(+), 26 deletions(-)

diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 8a59fcda180e7..b1eb80ee1fea4 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -191,6 +191,23 @@ def createGlobalTempView(self, name):
         """
         self._jdf.createGlobalTempView(name)
 
+    @since(2.2)
+    def createOrReplaceGlobalTempView(self, name):
+        """Creates or replaces a global temporary view using the given name.
+
+        The lifetime of this temporary view is tied to this Spark application.
+
+        >>> df.createOrReplaceGlobalTempView("people")
+        >>> df2 = df.filter(df.age > 3)
+        >>> df2.createOrReplaceGlobalTempView("people")
+        >>> df3 = spark.sql("select * from global_temp.people")
+        >>> sorted(df3.collect()) == sorted(df2.collect())
+        True
+        >>> spark.catalog.dropGlobalTempView("people")
+
+        """
+        self._jdf.createOrReplaceGlobalTempView(name)
+
     @property
     @since(1.4)
     def write(self):
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index f491e3c6ace04..503b540e0e2a8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -2657,6 +2657,22 @@ class Dataset[T] private[sql](
     createTempViewCommand(viewName, replace = false, global = true)
   }
 
+  /**
+   * Creates or replaces a global temporary view using the given name. The lifetime of this
+   * temporary view is tied to this Spark application.
+   *
+   * Global temporary view is cross-session. Its lifetime is the lifetime of the Spark application,
+   * i.e. it will be automatically dropped when the application terminates. It's tied to a system
+   * preserved database `_global_temp`, and we must use the qualified name to refer a global temp
+   * view, e.g. `SELECT * FROM _global_temp.view1`.
+   *
+   * @group basic
+   * @since 2.2.0
+   */
+  def createOrReplaceGlobalTempView(viewName: String): Unit = withPlan {
+    createTempViewCommand(viewName, replace = true, global = true)
+  }
+
   private def createTempViewCommand(
       viewName: String,
       replace: Boolean,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/GlobalTempViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/GlobalTempViewSuite.scala
index 5c63c6a414f93..a3d75b221ec3e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/GlobalTempViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/GlobalTempViewSuite.scala
@@ -35,39 +35,47 @@ class GlobalTempViewSuite extends QueryTest with SharedSQLContext {
   private var globalTempDB: String = _
 
   test("basic semantic") {
-    sql("CREATE GLOBAL TEMP VIEW src AS SELECT 1, 'a'")
+    try {
+      sql("CREATE GLOBAL TEMP VIEW src AS SELECT 1, 'a'")
+
+      // If there is no database in table name, we should try local temp view first, if not found,
+      // try table/view in current database, which is "default" in this case. So we expect
+      // NoSuchTableException here.
+      intercept[NoSuchTableException](spark.table("src"))
 
-    // If there is no database in table name, we should try local temp view first, if not found,
-    // try table/view in current database, which is "default" in this case. So we expect
-    // NoSuchTableException here.
-    intercept[NoSuchTableException](spark.table("src"))
+      // Use qualified name to refer to the global temp view explicitly.
+      checkAnswer(spark.table(s"$globalTempDB.src"), Row(1, "a"))
 
-    // Use qualified name to refer to the global temp view explicitly.
-    checkAnswer(spark.table(s"$globalTempDB.src"), Row(1, "a"))
+      // Table name without database will never refer to a global temp view.
+      intercept[NoSuchTableException](sql("DROP VIEW src"))
 
-    // Table name without database will never refer to a global temp view.
-    intercept[NoSuchTableException](sql("DROP VIEW src"))
+      sql(s"DROP VIEW $globalTempDB.src")
+      // The global temp view should be dropped successfully.
+      intercept[NoSuchTableException](spark.table(s"$globalTempDB.src"))
 
-    sql(s"DROP VIEW $globalTempDB.src")
-    // The global temp view should be dropped successfully.
-    intercept[NoSuchTableException](spark.table(s"$globalTempDB.src"))
+      // We can also use Dataset API to create global temp view
+      Seq(1 -> "a").toDF("i", "j").createGlobalTempView("src")
+      checkAnswer(spark.table(s"$globalTempDB.src"), Row(1, "a"))
 
-    // We can also use Dataset API to create global temp view
-    Seq(1 -> "a").toDF("i", "j").createGlobalTempView("src")
-    checkAnswer(spark.table(s"$globalTempDB.src"), Row(1, "a"))
+      // Use qualified name to rename a global temp view.
+      sql(s"ALTER VIEW $globalTempDB.src RENAME TO src2")
+      intercept[NoSuchTableException](spark.table(s"$globalTempDB.src"))
+      checkAnswer(spark.table(s"$globalTempDB.src2"), Row(1, "a"))
 
-    // Use qualified name to rename a global temp view.
-    sql(s"ALTER VIEW $globalTempDB.src RENAME TO src2")
-    intercept[NoSuchTableException](spark.table(s"$globalTempDB.src"))
-    checkAnswer(spark.table(s"$globalTempDB.src2"), Row(1, "a"))
+      // Use qualified name to alter a global temp view.
+      sql(s"ALTER VIEW $globalTempDB.src2 AS SELECT 2, 'b'")
+      checkAnswer(spark.table(s"$globalTempDB.src2"), Row(2, "b"))
 
-    // Use qualified name to alter a global temp view.
-    sql(s"ALTER VIEW $globalTempDB.src2 AS SELECT 2, 'b'")
-    checkAnswer(spark.table(s"$globalTempDB.src2"), Row(2, "b"))
+      // We can also use Catalog API to drop global temp view
+      spark.catalog.dropGlobalTempView("src2")
+      intercept[NoSuchTableException](spark.table(s"$globalTempDB.src2"))
 
-    // We can also use Catalog API to drop global temp view
-    spark.catalog.dropGlobalTempView("src2")
-    intercept[NoSuchTableException](spark.table(s"$globalTempDB.src2"))
+      // We can also use Dataset API to replace global temp view
+      Seq(2 -> "b").toDF("i", "j").createOrReplaceGlobalTempView("src")
+      checkAnswer(spark.table(s"$globalTempDB.src"), Row(2, "b"))
+    } finally {
+      spark.catalog.dropGlobalTempView("src")
+    }
   }
 
   test("global temp view is shared among all sessions") {
@@ -106,7 +114,7 @@ class GlobalTempViewSuite extends QueryTest with SharedSQLContext {
   test("CREATE TABLE LIKE should work for global temp view") {
     try {
       sql("CREATE GLOBAL TEMP VIEW src AS SELECT 1 AS a, '2' AS b")
-      sql(s"CREATE TABLE cloned LIKE ${globalTempDB}.src")
+      sql(s"CREATE TABLE cloned LIKE $globalTempDB.src")
       val tableMeta = spark.sessionState.catalog.getTableMetadata(TableIdentifier("cloned"))
       assert(tableMeta.schema == new StructType().add("a", "int", false).add("b", "string", false))
     } finally {

From cc5dbd55b0b312a661d21a4b605ce5ead2ba5218 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Fri, 2 Jun 2017 12:07:53 -0700
Subject: [PATCH 0958/1204] Preparing Spark release v2.2.0-rc3

---
 R/pkg/DESCRIPTION                         | 2 +-
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 4 ++--
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 resource-managers/mesos/pom.xml           | 2 +-
 resource-managers/yarn/pom.xml            | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 38 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index cfa49b94c9526..879c1f80f2c5d 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 2.2.1
+Version: 2.2.0
 Title: R Frontend for Apache Spark
 Description: The SparkR package provides an R Frontend for Apache Spark.
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
diff --git a/assembly/pom.xml b/assembly/pom.xml
index da7b0c9d1b933..3a7003f5e94f5 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 7577253dd0390..5e9ffd13c61aa 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 558864ae4faab..c3e10d1f289e1 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index de66617d2fa27..e66a8b49de065 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 076d98af834da..1a1f6526ee8e1 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index e74d84a5b3b96..525ece5be4853 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 76783abe36a2c..e7c33264fcdbd 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index 254a9b9ac3184..6102f6f45a60c 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index b61455e3ef9c8..9d8e192cec9ee 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,8 +14,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.2.1-SNAPSHOT
-SPARK_VERSION_SHORT: 2.2.1
+SPARK_VERSION: 2.2.0
+SPARK_VERSION_SHORT: 2.2.0
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.8"
 MESOS_VERSION: 1.0.0
diff --git a/examples/pom.xml b/examples/pom.xml
index 0d001ee478c14..f3d751383c249 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 04afe28fb7885..3f93a33084348 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 47e03419d3df7..7e2d58f1d073a 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index f961a8f54d9a6..26418f9769a25 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index d8bc7dcf75248..58057616174eb 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index 6d46430d6e969..27b1bfcfa7dcc 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index 5d979ddf2f74c..6bcbb612fef77 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index e4336ecb07da7..2df99403840ee 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index 2489d29ebe160..0e93b75f67ca1 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index 98f81aee376a0..e17b960c9a5b8 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index 88515f853edbc..73852fc4c7656 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index 28797e3fe4328..2f761fbcda2d4 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 701455f226094..22fe1dca3343e 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 1ed38a794f44c..df69c5e58727a 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index a4bb50ce7dda0..d3cb2dce3fab5 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 16cce0a49653e..996763ad6c256 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index fec1be9099460..af032ed035f97 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index ccd8546a269c1..7a3be5baea16c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.2.1-SNAPSHOT</version>
+  <version>2.2.0</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index c0bb1968b4b99..e5ec547714d8c 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.2.1.dev0"
+__version__ = "2.2.0"
diff --git a/repl/pom.xml b/repl/pom.xml
index f3c49dfb00603..2a5d2f4354ecf 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index 547836050a610..f94ff4e925e08 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index e00ed33d2ba17..72f891f7c10bd 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 5ecee28a1f0b8..722e362943e26 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index c9ac366ed6e62..84c82f6b86ef8 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 0c344fa4975e8..ab5593da0d655 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 3dca866307232..f0ef6779a4742 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 604007c6feaa1..bed07015e4540 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index b2e8e469d197c..19b44577ca124 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 0c422794041e5ede35faeedcad2fb0112e7420b2 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Fri, 2 Jun 2017 12:07:58 -0700
Subject: [PATCH 0959/1204] Preparing development version 2.2.0-SNAPSHOT

---
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 2 +-
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 resource-managers/mesos/pom.xml           | 2 +-
 resource-managers/yarn/pom.xml            | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 37 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/assembly/pom.xml b/assembly/pom.xml
index 3a7003f5e94f5..9d8607d9137c6 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 5e9ffd13c61aa..8657af744c069 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index c3e10d1f289e1..24c10fb1ddb9f 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index e66a8b49de065..5d4bde9ca7bc2 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 1a1f6526ee8e1..1356c4723b662 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 525ece5be4853..9345dc8f0cc4b 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index e7c33264fcdbd..f03a4da5e7152 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index 6102f6f45a60c..24ce36deeb169 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 9d8e192cec9ee..727fef84fd4b8 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,7 +14,7 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.2.0
+SPARK_VERSION: 2.2.0-SNAPSHOT
 SPARK_VERSION_SHORT: 2.2.0
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.8"
diff --git a/examples/pom.xml b/examples/pom.xml
index f3d751383c249..6cac3e4152be8 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 3f93a33084348..8948df2da89e2 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 7e2d58f1d073a..f8ef8a991316d 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 26418f9769a25..6d547c46d6a2d 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 58057616174eb..46901d64eda97 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index 27b1bfcfa7dcc..295142cbfdff9 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index 6bcbb612fef77..6cf448e65e8b4 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index 2df99403840ee..88499240cd569 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index 0e93b75f67ca1..3fedd9eda1959 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index e17b960c9a5b8..8368a1f12218d 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index 73852fc4c7656..90bb0e4987c82 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index 2f761fbcda2d4..daa79e79163b9 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 22fe1dca3343e..7da27817ebafd 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index df69c5e58727a..8df33660ea9d1 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index d3cb2dce3fab5..025cd84f20f0e 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 996763ad6c256..663f7fb0b010d 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index af032ed035f97..82f840b0fc269 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 7a3be5baea16c..f6058d79099ee 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.2.0</version>
+  <version>2.2.0-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index e5ec547714d8c..41bf8c269b795 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.2.0"
+__version__ = "2.2.0.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 2a5d2f4354ecf..a256ae3b84183 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index f94ff4e925e08..03846d9f5a3be 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index 72f891f7c10bd..a1b641c8eeb84 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 722e362943e26..765c92b8d3b9e 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 84c82f6b86ef8..b203f31a76f03 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index ab5593da0d655..9c879218ddc0d 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index f0ef6779a4742..0f249d7d59351 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index bed07015e4540..de1be9c13e05f 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 19b44577ca124..938ba2f6ac201 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 6c628e75e84e99acb365216cbaf413296ee9138e Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Fri, 2 Jun 2017 12:58:29 -0700
Subject: [PATCH 0960/1204] [MINOR][SQL] Update the description of
 spark.sql.files.ignoreCorruptFiles and spark.sql.columnNameOfCorruptRecord

### What changes were proposed in this pull request?
1. The description of `spark.sql.files.ignoreCorruptFiles` is not accurate. When the file does not exist, we will issue the error message.
```
org.apache.spark.sql.AnalysisException: Path does not exist: file:/nonexist/path;
```

2. `spark.sql.columnNameOfCorruptRecord` also affects the CSV format. The current description only mentions JSON format.

### How was this patch tested?
N/A

Author: Xiao Li <gatorsmile@gmail.com>

Closes #18184 from gatorsmile/updateMessage.

(cherry picked from commit 2a780ac7fe21df7c336885f8e814c1b866e04285)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../main/scala/org/apache/spark/sql/internal/SQLConf.scala  | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index e741b4e7627ec..1ea9eb5ce2173 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -345,7 +345,8 @@ object SQLConf {
     .createWithDefault(true)
 
   val COLUMN_NAME_OF_CORRUPT_RECORD = buildConf("spark.sql.columnNameOfCorruptRecord")
-    .doc("The name of internal column for storing raw/un-parsed JSON records that fail to parse.")
+    .doc("The name of internal column for storing raw/un-parsed JSON and CSV records that fail " +
+      "to parse.")
     .stringConf
     .createWithDefault("_corrupt_record")
 
@@ -535,8 +536,7 @@ object SQLConf {
 
   val IGNORE_CORRUPT_FILES = buildConf("spark.sql.files.ignoreCorruptFiles")
     .doc("Whether to ignore corrupt files. If true, the Spark jobs will continue to run when " +
-      "encountering corrupted or non-existing and contents that have been read will still be " +
-      "returned.")
+      "encountering corrupted files and the contents that have been read will still be returned.")
     .booleanConf
     .createWithDefault(false)
 

From b560c975b7cdc8828fc9e27cbca740c5e550b9cd Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Fri, 2 Jun 2017 15:36:21 -0700
Subject: [PATCH 0961/1204] Revert "[SPARK-20946][SQL] simplify the config
 setting logic in SparkSession.getOrCreate"

This reverts commit e11d90bf8deb553fd41b8837e3856c11486c2503.
---
 .../spark/ml/recommendation/ALSSuite.scala    |  4 ++-
 .../apache/spark/ml/tree/impl/TreeTests.scala |  2 ++
 .../org/apache/spark/sql/SparkSession.scala   | 25 ++++++++++++-------
 3 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
index 23f22566cc8e7..701040f2d6041 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
@@ -820,13 +820,15 @@ class ALSCleanerSuite extends SparkFunSuite {
       FileUtils.listFiles(localDir, TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE).asScala.toSet
     try {
       conf.set("spark.local.dir", localDir.getAbsolutePath)
-      val sc = new SparkContext("local[2]", "ALSCleanerSuite", conf)
+      val sc = new SparkContext("local[2]", "test", conf)
       try {
         sc.setCheckpointDir(checkpointDir.getAbsolutePath)
         // Generate test data
         val (training, _) = ALSSuite.genImplicitTestData(sc, 20, 5, 1, 0.2, 0)
         // Implicitly test the cleaning of parents during ALS training
         val spark = SparkSession.builder
+          .master("local[2]")
+          .appName("ALSCleanerSuite")
           .sparkContext(sc)
           .getOrCreate()
         import spark.implicits._
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreeTests.scala b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreeTests.scala
index b6894b30b0c2b..92a236928e90b 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreeTests.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/TreeTests.scala
@@ -43,6 +43,8 @@ private[ml] object TreeTests extends SparkFunSuite {
       categoricalFeatures: Map[Int, Int],
       numClasses: Int): DataFrame = {
     val spark = SparkSession.builder()
+      .master("local[2]")
+      .appName("TreeTests")
       .sparkContext(data.sparkContext)
       .getOrCreate()
     import spark.implicits._
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index bf37b76087473..d2bf350711936 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -757,8 +757,6 @@ object SparkSession {
 
     private[this] var userSuppliedContext: Option[SparkContext] = None
 
-    // The `SparkConf` inside the given `SparkContext` may get changed if you specify some options
-    // for this builder.
     private[spark] def sparkContext(sparkContext: SparkContext): Builder = synchronized {
       userSuppliedContext = Option(sparkContext)
       this
@@ -856,7 +854,7 @@ object SparkSession {
      *
      * @since 2.2.0
      */
-    def withExtensions(f: SparkSessionExtensions => Unit): Builder = synchronized {
+    def withExtensions(f: SparkSessionExtensions => Unit): Builder = {
       f(extensions)
       this
     }
@@ -901,14 +899,22 @@ object SparkSession {
 
         // No active nor global default session. Create a new one.
         val sparkContext = userSuppliedContext.getOrElse {
+          // set app name if not given
+          val randomAppName = java.util.UUID.randomUUID().toString
           val sparkConf = new SparkConf()
-          options.get("spark.master").foreach(sparkConf.setMaster)
-          // set a random app name if not given.
-          sparkConf.setAppName(options.getOrElse("spark.app.name",
-            java.util.UUID.randomUUID().toString))
-          SparkContext.getOrCreate(sparkConf)
+          options.foreach { case (k, v) => sparkConf.set(k, v) }
+          if (!sparkConf.contains("spark.app.name")) {
+            sparkConf.setAppName(randomAppName)
+          }
+          val sc = SparkContext.getOrCreate(sparkConf)
+          // maybe this is an existing SparkContext, update its SparkConf which maybe used
+          // by SparkSession
+          options.foreach { case (k, v) => sc.conf.set(k, v) }
+          if (!sc.conf.contains("spark.app.name")) {
+            sc.conf.setAppName(randomAppName)
+          }
+          sc
         }
-        options.foreach { case (k, v) => sparkContext.conf.set(k, v) }
 
         // Initialize extensions if the user has defined a configurator class.
         val extensionConfOption = sparkContext.conf.get(StaticSQLConf.SPARK_SESSION_EXTENSIONS)
@@ -929,6 +935,7 @@ object SparkSession {
         }
 
         session = new SparkSession(sparkContext, None, None, extensions)
+        options.foreach { case (k, v) => session.sessionState.conf.setConfString(k, v) }
         defaultSession.set(session)
 
         // Register a successfully instantiated context to the singleton. This should be at the

From 377cfa8ac7ff7a8a6a6d273182e18ea7dc25ce7e Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Fri, 2 Jun 2017 17:20:38 -0700
Subject: [PATCH 0962/1204] Preparing Spark release v2.2.0-rc4

---
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 2 +-
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 resource-managers/mesos/pom.xml           | 2 +-
 resource-managers/yarn/pom.xml            | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 37 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/assembly/pom.xml b/assembly/pom.xml
index 9d8607d9137c6..3a7003f5e94f5 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 8657af744c069..5e9ffd13c61aa 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 24c10fb1ddb9f..c3e10d1f289e1 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 5d4bde9ca7bc2..e66a8b49de065 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 1356c4723b662..1a1f6526ee8e1 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 9345dc8f0cc4b..525ece5be4853 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index f03a4da5e7152..e7c33264fcdbd 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index 24ce36deeb169..6102f6f45a60c 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 727fef84fd4b8..9d8e192cec9ee 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,7 +14,7 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.2.0-SNAPSHOT
+SPARK_VERSION: 2.2.0
 SPARK_VERSION_SHORT: 2.2.0
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.8"
diff --git a/examples/pom.xml b/examples/pom.xml
index 6cac3e4152be8..f3d751383c249 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 8948df2da89e2..3f93a33084348 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index f8ef8a991316d..7e2d58f1d073a 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 6d547c46d6a2d..26418f9769a25 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 46901d64eda97..58057616174eb 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index 295142cbfdff9..27b1bfcfa7dcc 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index 6cf448e65e8b4..6bcbb612fef77 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index 88499240cd569..2df99403840ee 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index 3fedd9eda1959..0e93b75f67ca1 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index 8368a1f12218d..e17b960c9a5b8 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index 90bb0e4987c82..73852fc4c7656 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index daa79e79163b9..2f761fbcda2d4 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 7da27817ebafd..22fe1dca3343e 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 8df33660ea9d1..df69c5e58727a 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index 025cd84f20f0e..d3cb2dce3fab5 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 663f7fb0b010d..996763ad6c256 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 82f840b0fc269..af032ed035f97 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index f6058d79099ee..7a3be5baea16c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.2.0-SNAPSHOT</version>
+  <version>2.2.0</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 41bf8c269b795..e5ec547714d8c 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.2.0.dev0"
+__version__ = "2.2.0"
diff --git a/repl/pom.xml b/repl/pom.xml
index a256ae3b84183..2a5d2f4354ecf 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index 03846d9f5a3be..f94ff4e925e08 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index a1b641c8eeb84..72f891f7c10bd 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 765c92b8d3b9e..722e362943e26 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index b203f31a76f03..84c82f6b86ef8 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 9c879218ddc0d..ab5593da0d655 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 0f249d7d59351..f0ef6779a4742 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index de1be9c13e05f..bed07015e4540 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 938ba2f6ac201..19b44577ca124 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 478874eff53e6615c98eb2de5be5bbacd036ecce Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Fri, 2 Jun 2017 17:20:42 -0700
Subject: [PATCH 0963/1204] Preparing development version 2.2.1-SNAPSHOT

---
 R/pkg/DESCRIPTION                         | 2 +-
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 4 ++--
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 resource-managers/mesos/pom.xml           | 2 +-
 resource-managers/yarn/pom.xml            | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 38 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 879c1f80f2c5d..cfa49b94c9526 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 2.2.0
+Version: 2.2.1
 Title: R Frontend for Apache Spark
 Description: The SparkR package provides an R Frontend for Apache Spark.
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 3a7003f5e94f5..da7b0c9d1b933 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 5e9ffd13c61aa..7577253dd0390 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index c3e10d1f289e1..558864ae4faab 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index e66a8b49de065..de66617d2fa27 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 1a1f6526ee8e1..076d98af834da 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 525ece5be4853..e74d84a5b3b96 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index e7c33264fcdbd..76783abe36a2c 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index 6102f6f45a60c..254a9b9ac3184 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 9d8e192cec9ee..b61455e3ef9c8 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,8 +14,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.2.0
-SPARK_VERSION_SHORT: 2.2.0
+SPARK_VERSION: 2.2.1-SNAPSHOT
+SPARK_VERSION_SHORT: 2.2.1
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.8"
 MESOS_VERSION: 1.0.0
diff --git a/examples/pom.xml b/examples/pom.xml
index f3d751383c249..0d001ee478c14 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 3f93a33084348..04afe28fb7885 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 7e2d58f1d073a..47e03419d3df7 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 26418f9769a25..f961a8f54d9a6 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 58057616174eb..d8bc7dcf75248 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index 27b1bfcfa7dcc..6d46430d6e969 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index 6bcbb612fef77..5d979ddf2f74c 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index 2df99403840ee..e4336ecb07da7 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index 0e93b75f67ca1..2489d29ebe160 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index e17b960c9a5b8..98f81aee376a0 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index 73852fc4c7656..88515f853edbc 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index 2f761fbcda2d4..28797e3fe4328 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 22fe1dca3343e..701455f226094 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index df69c5e58727a..1ed38a794f44c 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index d3cb2dce3fab5..a4bb50ce7dda0 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 996763ad6c256..16cce0a49653e 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index af032ed035f97..fec1be9099460 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 7a3be5baea16c..ccd8546a269c1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.2.0</version>
+  <version>2.2.1-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index e5ec547714d8c..c0bb1968b4b99 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.2.0"
+__version__ = "2.2.1.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 2a5d2f4354ecf..f3c49dfb00603 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index f94ff4e925e08..547836050a610 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index 72f891f7c10bd..e00ed33d2ba17 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 722e362943e26..5ecee28a1f0b8 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 84c82f6b86ef8..c9ac366ed6e62 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index ab5593da0d655..0c344fa4975e8 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index f0ef6779a4742..3dca866307232 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index bed07015e4540..604007c6feaa1 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 19b44577ca124..b2e8e469d197c 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From c8bbab6643533681d1f422cd5c9615638f6a9282 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 2 Jun 2017 21:59:52 -0700
Subject: [PATCH 0964/1204] [SPARK-20974][BUILD] we should run REPL tests if
 SQL module has code changes

## What changes were proposed in this pull request?

REPL module depends on SQL module, so we should run REPL tests if SQL module has code changes.

## How was this patch tested?

N/A

Author: Wenchen Fan <wenchen@databricks.com>

Closes #18191 from cloud-fan/test.

(cherry picked from commit 864d94fe879a32de324da65a844e62a0260b222d)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 dev/run-tests.py                |  2 +-
 dev/sparktestsupport/modules.py | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/dev/run-tests.py b/dev/run-tests.py
index 818a0c9f48419..72d148d7ea0fb 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -111,7 +111,7 @@ def determine_modules_to_test(changed_modules):
     >>> x = [x.name for x in determine_modules_to_test([modules.sql])]
     >>> x # doctest: +NORMALIZE_WHITESPACE
     ['sql', 'hive', 'mllib', 'sql-kafka-0-10', 'examples', 'hive-thriftserver',
-     'pyspark-sql', 'sparkr', 'pyspark-mllib', 'pyspark-ml']
+     'pyspark-sql', 'repl', 'sparkr', 'pyspark-mllib', 'pyspark-ml']
     """
     modules_to_test = set()
     for module in changed_modules:
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 78b5b8b0f4b59..2971e0db40496 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -123,6 +123,7 @@ def __hash__(self):
     ],
 )
 
+
 hive = Module(
     name="hive",
     dependencies=[sql],
@@ -142,6 +143,18 @@ def __hash__(self):
 )
 
 
+repl = Module(
+    name="repl",
+    dependencies=[hive],
+    source_file_regexes=[
+        "repl/",
+    ],
+    sbt_test_goals=[
+        "repl/test",
+    ],
+)
+
+
 hive_thriftserver = Module(
     name="hive-thriftserver",
     dependencies=[hive],

From afab8557b069dff233bc187ddad46d071eeb6137 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 2 Jun 2017 21:59:52 -0700
Subject: [PATCH 0965/1204] [SPARK-20974][BUILD] we should run REPL tests if
 SQL module has code changes

## What changes were proposed in this pull request?

REPL module depends on SQL module, so we should run REPL tests if SQL module has code changes.

## How was this patch tested?

N/A

Author: Wenchen Fan <wenchen@databricks.com>

Closes #18191 from cloud-fan/test.

(cherry picked from commit 864d94fe879a32de324da65a844e62a0260b222d)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 dev/run-tests.py                |  2 +-
 dev/sparktestsupport/modules.py | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/dev/run-tests.py b/dev/run-tests.py
index ab285ac96af7e..f24aac9a93867 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -111,7 +111,7 @@ def determine_modules_to_test(changed_modules):
     >>> x = [x.name for x in determine_modules_to_test([modules.sql])]
     >>> x # doctest: +NORMALIZE_WHITESPACE
     ['sql', 'hive', 'mllib', 'sql-kafka-0-10', 'examples', 'hive-thriftserver',
-     'pyspark-sql', 'sparkr', 'pyspark-mllib', 'pyspark-ml']
+     'pyspark-sql', 'repl', 'sparkr', 'pyspark-mllib', 'pyspark-ml']
     """
     modules_to_test = set()
     for module in changed_modules:
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 0cf078c378fd9..9e293e7206d58 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -123,6 +123,7 @@ def __hash__(self):
     ],
 )
 
+
 hive = Module(
     name="hive",
     dependencies=[sql],
@@ -142,6 +143,18 @@ def __hash__(self):
 )
 
 
+repl = Module(
+    name="repl",
+    dependencies=[hive],
+    source_file_regexes=[
+        "repl/",
+    ],
+    sbt_test_goals=[
+        "repl/test",
+    ],
+)
+
+
 hive_thriftserver = Module(
     name="hive-thriftserver",
     dependencies=[hive],

From acd4481ecf59702b2a2459b8cc7c13397656c329 Mon Sep 17 00:00:00 2001
From: David Eis <deis@bloomberg.net>
Date: Sat, 3 Jun 2017 09:48:10 +0100
Subject: [PATCH 0966/1204] [SPARK-20790][MLLIB] Remove extraneous logging in
 test

## What changes were proposed in this pull request?

Remove extraneous logging.

## How was this patch tested?

Unit tests pass.

Author: David Eis <deis@bloomberg.net>

Closes #18188 from davideis/fix-test.

(cherry picked from commit 96e6ba6c2aaddd885ba6a842fbfcd73c5537f99e)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../scala/org/apache/spark/ml/recommendation/ALSSuite.scala     | 2 --
 1 file changed, 2 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
index 701040f2d6041..3094f52ba1bc5 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
@@ -498,8 +498,6 @@ class ALSSuite
     val itemFactorsNeg = modelWithNeg.itemFactors
     val userFactorsZero = modelWithZero.userFactors
     val itemFactorsZero = modelWithZero.itemFactors
-    userFactorsNeg.collect().foreach(arr => logInfo(s"implicit test " + arr.mkString(" ")))
-    userFactorsZero.collect().foreach(arr => logInfo(s"implicit test " + arr.mkString(" ")))
     assert(userFactorsNeg.intersect(userFactorsZero).count() == 0)
     assert(itemFactorsNeg.intersect(itemFactorsZero).count() == 0)
   }

From 1388fdd70733b92488f32bb31d2eb2ea4155ee62 Mon Sep 17 00:00:00 2001
From: Reza Safi <rezasafi@cloudera.com>
Date: Mon, 5 Jun 2017 13:19:01 -0700
Subject: [PATCH 0967/1204] [SPARK-20926][SQL] Removing exposures to guava
 library caused by directly accessing  SessionCatalog's tableRelationCache

There could be test failures because DataStorageStrategy, HiveMetastoreCatalog and also HiveSchemaInferenceSuite were exposed to guava library by directly accessing SessionCatalog's tableRelationCacheg. These failures occur when guava shading is in place.

## What changes were proposed in this pull request?
This change removes those guava exposures by introducing new methods in SessionCatalog and also changing DataStorageStrategy, HiveMetastoreCatalog and HiveSchemaInferenceSuite so that they use those proxy methods.

## How was this patch tested?

Unit tests passed after applying these changes.

Author: Reza Safi <rezasafi@cloudera.com>

Closes #18148 from rezasafi/branch-2.2.
---
 .../sql/catalyst/catalog/SessionCatalog.scala | 31 ++++++++++++++++---
 .../datasources/DataSourceStrategy.scala      |  4 +--
 .../spark/sql/hive/HiveMetastoreCatalog.scala | 16 +++++-----
 .../sql/hive/HiveSchemaInferenceSuite.scala   |  2 +-
 4 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index a78440df4f3e1..57006bfaf9b69 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.catalog
 
 import java.net.URI
 import java.util.Locale
+import java.util.concurrent.Callable
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.mutable
@@ -125,14 +126,36 @@ class SessionCatalog(
     if (conf.caseSensitiveAnalysis) name else name.toLowerCase(Locale.ROOT)
   }
 
-  /**
-   * A cache of qualified table names to table relation plans.
-   */
-  val tableRelationCache: Cache[QualifiedTableName, LogicalPlan] = {
+  private val tableRelationCache: Cache[QualifiedTableName, LogicalPlan] = {
     val cacheSize = conf.tableRelationCacheSize
     CacheBuilder.newBuilder().maximumSize(cacheSize).build[QualifiedTableName, LogicalPlan]()
   }
 
+  /** This method provides a way to get a cached plan. */
+  def getCachedPlan(t: QualifiedTableName, c: Callable[LogicalPlan]): LogicalPlan = {
+    tableRelationCache.get(t, c)
+  }
+
+  /** This method provides a way to get a cached plan if the key exists. */
+  def getCachedTable(key: QualifiedTableName): LogicalPlan = {
+    tableRelationCache.getIfPresent(key)
+  }
+
+  /** This method provides a way to cache a plan. */
+  def cacheTable(t: QualifiedTableName, l: LogicalPlan): Unit = {
+    tableRelationCache.put(t, l)
+  }
+
+  /** This method provides a way to invalidate a cached plan. */
+  def invalidateCachedTable(key: QualifiedTableName): Unit = {
+    tableRelationCache.invalidate(key)
+  }
+
+  /** This method provides a way to invalidate all the cached plans. */
+  def invalidateAllCachedTables(): Unit = {
+    tableRelationCache.invalidateAll()
+  }
+
   /**
    * This method is used to make the given path qualified before we
    * store this path in the underlying external catalog. So, when a path
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index 21d75a404911b..e05a8d5f02bd8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -215,9 +215,9 @@ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan]
   private def readDataSourceTable(r: CatalogRelation): LogicalPlan = {
     val table = r.tableMeta
     val qualifiedTableName = QualifiedTableName(table.database, table.identifier.table)
-    val cache = sparkSession.sessionState.catalog.tableRelationCache
+    val catalogProxy = sparkSession.sessionState.catalog
 
-    val plan = cache.get(qualifiedTableName, new Callable[LogicalPlan]() {
+    val plan = catalogProxy.getCachedPlan(qualifiedTableName, new Callable[LogicalPlan]() {
       override def call(): LogicalPlan = {
         val pathOption = table.storage.locationUri.map("path" -> CatalogUtils.URIToString(_))
         val dataSource =
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 6b98066cb76c8..9b3cbb63a21b0 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -41,7 +41,7 @@ import org.apache.spark.sql.types._
 private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Logging {
   // these are def_s and not val/lazy val since the latter would introduce circular references
   private def sessionState = sparkSession.sessionState
-  private def tableRelationCache = sparkSession.sessionState.catalog.tableRelationCache
+  private def catalogProxy = sparkSession.sessionState.catalog
   import HiveMetastoreCatalog._
 
   /** These locks guard against multiple attempts to instantiate a table, which wastes memory. */
@@ -61,7 +61,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
     val key = QualifiedTableName(
       table.database.getOrElse(sessionState.catalog.getCurrentDatabase).toLowerCase,
       table.table.toLowerCase)
-    tableRelationCache.getIfPresent(key)
+    catalogProxy.getCachedTable(key)
   }
 
   private def getCached(
@@ -71,7 +71,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
       expectedFileFormat: Class[_ <: FileFormat],
       partitionSchema: Option[StructType]): Option[LogicalRelation] = {
 
-    tableRelationCache.getIfPresent(tableIdentifier) match {
+    catalogProxy.getCachedTable(tableIdentifier) match {
       case null => None // Cache miss
       case logical @ LogicalRelation(relation: HadoopFsRelation, _, _) =>
         val cachedRelationFileFormatClass = relation.fileFormat.getClass
@@ -92,21 +92,21 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
               Some(logical)
             } else {
               // If the cached relation is not updated, we invalidate it right away.
-              tableRelationCache.invalidate(tableIdentifier)
+              catalogProxy.invalidateCachedTable(tableIdentifier)
               None
             }
           case _ =>
             logWarning(s"Table $tableIdentifier should be stored as $expectedFileFormat. " +
               s"However, we are getting a ${relation.fileFormat} from the metastore cache. " +
               "This cached entry will be invalidated.")
-            tableRelationCache.invalidate(tableIdentifier)
+            catalogProxy.invalidateCachedTable(tableIdentifier)
             None
         }
       case other =>
         logWarning(s"Table $tableIdentifier should be stored as $expectedFileFormat. " +
           s"However, we are getting a $other from the metastore cache. " +
           "This cached entry will be invalidated.")
-        tableRelationCache.invalidate(tableIdentifier)
+        catalogProxy.invalidateCachedTable(tableIdentifier)
         None
     }
   }
@@ -176,7 +176,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
             fileFormat = fileFormat,
             options = options)(sparkSession = sparkSession)
           val created = LogicalRelation(fsRelation, updatedTable)
-          tableRelationCache.put(tableIdentifier, created)
+          catalogProxy.cacheTable(tableIdentifier, created)
           created
         }
 
@@ -205,7 +205,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
                 className = fileType).resolveRelation(),
               table = updatedTable)
 
-          tableRelationCache.put(tableIdentifier, created)
+          catalogProxy.cacheTable(tableIdentifier, created)
           created
         }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala
index b3a06045b5fd4..d271acc63de08 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala
@@ -46,7 +46,7 @@ class HiveSchemaInferenceSuite
 
   override def afterEach(): Unit = {
     super.afterEach()
-    spark.sessionState.catalog.tableRelationCache.invalidateAll()
+    spark.sessionState.catalog.invalidateAllCachedTables()
     FileStatusCache.resetForTesting()
   }
 

From 421d8ecb8a2ec1327b50932e6016d5f837b88da9 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Mon, 5 Jun 2017 14:34:10 -0700
Subject: [PATCH 0968/1204] [SPARK-20957][SS][TESTS] Fix
 o.a.s.sql.streaming.StreamingQueryManagerSuite listing

## What changes were proposed in this pull request?

When stopping StreamingQuery, StreamExecution will set `streamDeathCause` then notify StreamingQueryManager to remove this query. So it's possible that when `q2.exception.isDefined` returns `true`, StreamingQueryManager's active list still has `q2`.

This PR just puts the checks into `eventually` to fix the flaky test.

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #18180 from zsxwing/SPARK-20957.

(cherry picked from commit bc537e40ade0658aae7c6b5ddafb4cc038bdae2b)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../spark/sql/streaming/StreamingQueryManagerSuite.scala      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
index b49efa6890236..2986b7f1eecfb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
@@ -78,9 +78,9 @@ class StreamingQueryManagerSuite extends StreamTest with BeforeAndAfter {
       eventually(Timeout(streamingTimeout)) {
         require(!q2.isActive)
         require(q2.exception.isDefined)
+        assert(spark.streams.get(q2.id) === null)
+        assert(spark.streams.active.toSet === Set(q3))
       }
-      assert(spark.streams.get(q2.id) === null)
-      assert(spark.streams.active.toSet === Set(q3))
     }
   }
 

From 3f93d076b8c4a932bace2ebef400abe60ad5927c Mon Sep 17 00:00:00 2001
From: Bogdan Raducanu <bogdan@databricks.com>
Date: Tue, 6 Jun 2017 22:51:10 -0700
Subject: [PATCH 0969/1204] [SPARK-20854][TESTS] Removing duplicate test case

## What changes were proposed in this pull request?

Removed a duplicate case in "SPARK-20854: select hint syntax with expressions"

## How was this patch tested?
Existing tests.

Author: Bogdan Raducanu <bogdan@databricks.com>

Closes #18217 from bogdanrdc/SPARK-20854-2.

(cherry picked from commit cb83ca1433c865cb0aef973df2b872a83671acfd)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../spark/sql/catalyst/parser/PlanParserSuite.scala       | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index 954f6da34dbef..77ae843c30561 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -544,14 +544,6 @@ class PlanParserSuite extends PlanTest {
       )
     )
 
-    comparePlans(
-      parsePlan("SELECT /*+ HINT1(a, array(1, 2, 3)) */ * from t"),
-      UnresolvedHint("HINT1", Seq($"a",
-        UnresolvedFunction("array", Literal(1) :: Literal(2) :: Literal(3) :: Nil, false)),
-        table("t").select(star())
-      )
-    )
-
     comparePlans(
       parsePlan("SELECT /*+ HINT1(a, 5, 'a', b) */ * from t"),
       UnresolvedHint("HINT1", Seq($"a", Literal(5), Literal("a"), $"b"),

From 9a4341b8368136bf1be5a8c5ab8d0a1577c133ca Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Wed, 7 Jun 2017 08:50:36 +0100
Subject: [PATCH 0970/1204] [MINOR][DOC] Update deprecation notes on
 Python/Hadoop/Scala.

## What changes were proposed in this pull request?

We had better update the deprecation notes about Python 2.6, Hadoop (before 2.6.5) and Scala 2.10 in [2.2.0-RC4](http://people.apache.org/~pwendell/spark-releases/spark-2.2.0-rc4-docs/) documentation. Since this is a doc only update, I think we can update the doc during publishing.

**BEFORE (2.2.0-RC4)**
    ![before](https://cloud.githubusercontent.com/assets/9700541/26799758/aea0dc06-49eb-11e7-8ca3-ed8ce1cc6147.png)

**AFTER**
    ![after](https://cloud.githubusercontent.com/assets/9700541/26799761/b3fef818-49eb-11e7-83c5-334f0e4768ed.png)

## How was this patch tested?

Manual.
```
SKIP_API=1 jekyll build
```

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #18207 from dongjoon-hyun/minor_doc_deprecation.

(cherry picked from commit 3218505a0b4fc37026a71a262c931f06a60c7bf6)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/index.md | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/docs/index.md b/docs/index.md
index ad4f24ff1a5d1..a757fa0105114 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -26,15 +26,13 @@ Spark runs on both Windows and UNIX-like systems (e.g. Linux, Mac OS). It's easy
 locally on one machine --- all you need is to have `java` installed on your system `PATH`,
 or the `JAVA_HOME` environment variable pointing to a Java installation.
 
-Spark runs on Java 8+, Python 2.6+/3.4+ and R 3.1+. For the Scala API, Spark {{site.SPARK_VERSION}}
+Spark runs on Java 8+, Python 2.7+/3.4+ and R 3.1+. For the Scala API, Spark {{site.SPARK_VERSION}}
 uses Scala {{site.SCALA_BINARY_VERSION}}. You will need to use a compatible Scala version
 ({{site.SCALA_BINARY_VERSION}}.x).
 
-Note that support for Java 7 was removed as of Spark 2.2.0.
+Note that support for Java 7, Python 2.6 and old Hadoop versions before 2.6.5 were removed as of Spark 2.2.0.
 
-Note that support for Python 2.6 is deprecated as of Spark 2.0.0, and support for 
-Scala 2.10 and versions of Hadoop before 2.6 are deprecated as of Spark 2.1.0, and may be 
-removed in Spark 2.2.0.
+Note that support for Scala 2.10 is deprecated as of Spark 2.1.0, and may be removed in Spark 2.3.0.
 
 # Running the Examples and Shell
 

From 2f5eaa9f8be4658718debd44dbd33af2136a1c5d Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Thu, 8 Jun 2017 10:56:23 +0100
Subject: [PATCH 0971/1204] [SPARK-20914][DOCS] Javadoc contains code that is
 invalid

## What changes were proposed in this pull request?

Fix Java, Scala Dataset examples in scaladoc, which didn't compile.

## How was this patch tested?

Existing compilation/test

Author: Sean Owen <sowen@cloudera.com>

Closes #18215 from srowen/SPARK-20914.

(cherry picked from commit 847efe12656756f9ad6a4dc14bd183ac1a0760a6)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../src/main/scala/org/apache/spark/sql/Dataset.scala     | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 503b540e0e2a8..f37d433f69ac1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -131,7 +131,7 @@ private[sql] object Dataset {
  *
  *   people.filter("age > 30")
  *     .join(department, people("deptId") === department("id"))
- *     .groupBy(department("name"), "gender")
+ *     .groupBy(department("name"), people("gender"))
  *     .agg(avg(people("salary")), max(people("age")))
  * }}}
  *
@@ -141,9 +141,9 @@ private[sql] object Dataset {
  *   Dataset<Row> people = spark.read().parquet("...");
  *   Dataset<Row> department = spark.read().parquet("...");
  *
- *   people.filter("age".gt(30))
- *     .join(department, people.col("deptId").equalTo(department("id")))
- *     .groupBy(department.col("name"), "gender")
+ *   people.filter(people.col("age").gt(30))
+ *     .join(department, people.col("deptId").equalTo(department.col("id")))
+ *     .groupBy(department.col("name"), people.col("gender"))
  *     .agg(avg(people.col("salary")), max(people.col("age")));
  * }}}
  *

From 03cc18ba1ad9b5befe948c692cd1822508bc9a9d Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Thu, 8 Jun 2017 10:56:23 +0100
Subject: [PATCH 0972/1204] [SPARK-20914][DOCS] Javadoc contains code that is
 invalid

## What changes were proposed in this pull request?

Fix Java, Scala Dataset examples in scaladoc, which didn't compile.

## How was this patch tested?

Existing compilation/test

Author: Sean Owen <sowen@cloudera.com>

Closes #18215 from srowen/SPARK-20914.

(cherry picked from commit 847efe12656756f9ad6a4dc14bd183ac1a0760a6)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../src/main/scala/org/apache/spark/sql/Dataset.scala     | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 1d7af72213bf7..ac34e0bf89643 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -128,7 +128,7 @@ private[sql] object Dataset {
  *
  *   people.filter("age > 30")
  *     .join(department, people("deptId") === department("id"))
- *     .groupBy(department("name"), "gender")
+ *     .groupBy(department("name"), people("gender"))
  *     .agg(avg(people("salary")), max(people("age")))
  * }}}
  *
@@ -138,9 +138,9 @@ private[sql] object Dataset {
  *   Dataset<Row> people = spark.read().parquet("...");
  *   Dataset<Row> department = spark.read().parquet("...");
  *
- *   people.filter("age".gt(30))
- *     .join(department, people.col("deptId").equalTo(department("id")))
- *     .groupBy(department.col("name"), "gender")
+ *   people.filter(people.col("age").gt(30))
+ *     .join(department, people.col("deptId").equalTo(department.col("id")))
+ *     .groupBy(department.col("name"), people.col("gender"))
  *     .agg(avg(people.col("salary")), max(people.col("age")));
  * }}}
  *

From 02cf178bb2a7dc8b4c06eb040c44b6453e41ed15 Mon Sep 17 00:00:00 2001
From: Mark Grover <mark@apache.org>
Date: Thu, 8 Jun 2017 09:55:43 -0700
Subject: [PATCH 0973/1204] [SPARK-19185][DSTREAM] Make Kafka consumer cache
 configurable

## What changes were proposed in this pull request?

Add a new property `spark.streaming.kafka.consumer.cache.enabled` that allows users to enable or disable the cache for Kafka consumers. This property can be especially handy in cases where issues like SPARK-19185 get hit, for which there isn't a solution committed yet. By default, the cache is still on, so this change doesn't change any out-of-box behavior.

## How was this patch tested?
Running unit tests

Author: Mark Grover <mark@apache.org>
Author: Mark Grover <grover.markgrover@gmail.com>

Closes #18234 from markgrover/spark-19185.

(cherry picked from commit 55b8cfe6e6a6759d65bf219ff570fd6154197ec4)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 docs/streaming-kafka-0-10-integration.md                  | 4 +++-
 .../streaming/kafka010/DirectKafkaInputDStream.scala      | 8 +++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/docs/streaming-kafka-0-10-integration.md b/docs/streaming-kafka-0-10-integration.md
index 92c296a9e6bd3..386066a85749f 100644
--- a/docs/streaming-kafka-0-10-integration.md
+++ b/docs/streaming-kafka-0-10-integration.md
@@ -91,7 +91,9 @@ The new Kafka consumer API will pre-fetch messages into buffers.  Therefore it i
 
 In most cases, you should use `LocationStrategies.PreferConsistent` as shown above.  This will distribute partitions evenly across available executors.  If your executors are on the same hosts as your Kafka brokers, use `PreferBrokers`, which will prefer to schedule partitions on the Kafka leader for that partition.  Finally, if you have a significant skew in load among partitions, use `PreferFixed`. This allows you to specify an explicit mapping of partitions to hosts (any unspecified partitions will use a consistent location).
 
-The cache for consumers has a default maximum size of 64.  If you expect to be handling more than (64 * number of executors) Kafka partitions, you can change this setting via `spark.streaming.kafka.consumer.cache.maxCapacity`
+The cache for consumers has a default maximum size of 64.  If you expect to be handling more than (64 * number of executors) Kafka partitions, you can change this setting via `spark.streaming.kafka.consumer.cache.maxCapacity`.
+
+If you would like to disable the caching for Kafka consumers, you can set `spark.streaming.kafka.consumer.cache.enabled` to `false`. Disabling the cache may be needed to workaround the problem described in SPARK-19185. This property may be removed in later versions of Spark, once SPARK-19185 is resolved.
 
 The cache is keyed by topicpartition and group.id, so use a **separate** `group.id` for each call to `createDirectStream`.
 
diff --git a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala
index 6d6983c4bd419..9a4a1cf32a480 100644
--- a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala
+++ b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala
@@ -213,8 +213,10 @@ private[spark] class DirectKafkaInputDStream[K, V](
       val fo = currentOffsets(tp)
       OffsetRange(tp.topic, tp.partition, fo, uo)
     }
-    val rdd = new KafkaRDD[K, V](
-      context.sparkContext, executorKafkaParams, offsetRanges.toArray, getPreferredHosts, true)
+    val useConsumerCache = context.conf.getBoolean("spark.streaming.kafka.consumer.cache.enabled",
+      true)
+    val rdd = new KafkaRDD[K, V](context.sparkContext, executorKafkaParams, offsetRanges.toArray,
+      getPreferredHosts, useConsumerCache)
 
     // Report the record number and metadata of this batch interval to InputInfoTracker.
     val description = offsetRanges.filter { offsetRange =>
@@ -316,7 +318,7 @@ private[spark] class DirectKafkaInputDStream[K, V](
            b.map(OffsetRange(_)),
            getPreferredHosts,
            // during restore, it's possible same partition will be consumed from multiple
-           // threads, so dont use cache
+           // threads, so do not use cache.
            false
          )
       }

From 3f6812cf84e90af982e423e22a83671ccb5aff22 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 8 Jun 2017 21:02:51 -0700
Subject: [PATCH 0974/1204] [SPARK-20954][SQL][BRANCH-2.2][EXTENDED] DESCRIBE `
 result should be compatible with previous Spark

## What changes were proposed in this pull request?

After [SPARK-20067](https://issues.apache.org/jira/browse/SPARK-20067), `DESCRIBE` and `DESCRIBE EXTENDED` shows the following result. This is incompatible with Spark 2.1.1. This PR removes the column header line in case of those commands.

**MASTER** and **BRANCH-2.2**
```scala
scala> sql("desc t").show(false)
+----------+---------+-------+
|col_name  |data_type|comment|
+----------+---------+-------+
|# col_name|data_type|comment|
|a         |int      |null   |
+----------+---------+-------+
```

**SPARK 2.1.1** and **this PR**
```scala
scala> sql("desc t").show(false)
+--------+---------+-------+
|col_name|data_type|comment|
+--------+---------+-------+
|a       |int      |null   |
+--------+---------+-------+
```

## How was this patch tested?

Pass the Jenkins with the updated test suites.

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #18245 from dongjoon-hyun/SPARK-20954-BRANCH-2.2.
---
 .../spark/sql/execution/command/tables.scala    | 17 +++++++++++------
 .../sql-tests/results/change-column.sql.out     |  9 ---------
 .../sql-tests/results/describe.sql.out          | 17 -----------------
 .../spark/sql/hive/execution/HiveDDLSuite.scala |  2 +-
 4 files changed, 12 insertions(+), 33 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index ebf03e1bf8869..63486382a405c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -522,15 +522,15 @@ case class DescribeTableCommand(
         throw new AnalysisException(
           s"DESC PARTITION is not allowed on a temporary view: ${table.identifier}")
       }
-      describeSchema(catalog.lookupRelation(table).schema, result)
+      describeSchema(catalog.lookupRelation(table).schema, result, header = false)
     } else {
       val metadata = catalog.getTableMetadata(table)
       if (metadata.schema.isEmpty) {
         // In older version(prior to 2.1) of Spark, the table schema can be empty and should be
         // inferred at runtime. We should still support it.
-        describeSchema(sparkSession.table(metadata.identifier).schema, result)
+        describeSchema(sparkSession.table(metadata.identifier).schema, result, header = false)
       } else {
-        describeSchema(metadata.schema, result)
+        describeSchema(metadata.schema, result, header = false)
       }
 
       describePartitionInfo(metadata, result)
@@ -550,7 +550,7 @@ case class DescribeTableCommand(
   private def describePartitionInfo(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = {
     if (table.partitionColumnNames.nonEmpty) {
       append(buffer, "# Partition Information", "", "")
-      describeSchema(table.partitionSchema, buffer)
+      describeSchema(table.partitionSchema, buffer, header = true)
     }
   }
 
@@ -601,8 +601,13 @@ case class DescribeTableCommand(
     table.storage.toLinkedHashMap.foreach(s => append(buffer, s._1, s._2, ""))
   }
 
-  private def describeSchema(schema: StructType, buffer: ArrayBuffer[Row]): Unit = {
-    append(buffer, s"# ${output.head.name}", output(1).name, output(2).name)
+  private def describeSchema(
+      schema: StructType,
+      buffer: ArrayBuffer[Row],
+      header: Boolean): Unit = {
+    if (header) {
+      append(buffer, s"# ${output.head.name}", output(1).name, output(2).name)
+    }
     schema.foreach { column =>
       append(buffer, column.name, column.dataType.simpleString, column.getComment().orNull)
     }
diff --git a/sql/core/src/test/resources/sql-tests/results/change-column.sql.out b/sql/core/src/test/resources/sql-tests/results/change-column.sql.out
index 678a3f0f0a3c6..ba8bc936f0c79 100644
--- a/sql/core/src/test/resources/sql-tests/results/change-column.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/change-column.sql.out
@@ -15,7 +15,6 @@ DESC test_change
 -- !query 1 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 1 output
-# col_name          	data_type           	comment             
 a                   	int                 	                    
 b                   	string              	                    
 c                   	int
@@ -35,7 +34,6 @@ DESC test_change
 -- !query 3 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 3 output
-# col_name          	data_type           	comment             
 a                   	int                 	                    
 b                   	string              	                    
 c                   	int
@@ -55,7 +53,6 @@ DESC test_change
 -- !query 5 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 5 output
-# col_name          	data_type           	comment             
 a                   	int                 	                    
 b                   	string              	                    
 c                   	int
@@ -94,7 +91,6 @@ DESC test_change
 -- !query 8 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 8 output
-# col_name          	data_type           	comment             
 a                   	int                 	                    
 b                   	string              	                    
 c                   	int
@@ -129,7 +125,6 @@ DESC test_change
 -- !query 12 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 12 output
-# col_name          	data_type           	comment             
 a                   	int                 	this is column a    
 b                   	string              	#*02?`              
 c                   	int
@@ -148,7 +143,6 @@ DESC test_change
 -- !query 14 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 14 output
-# col_name          	data_type           	comment             
 a                   	int                 	this is column a    
 b                   	string              	#*02?`              
 c                   	int
@@ -168,7 +162,6 @@ DESC test_change
 -- !query 16 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 16 output
-# col_name          	data_type           	comment             
 a                   	int                 	this is column a    
 b                   	string              	#*02?`              
 c                   	int
@@ -193,7 +186,6 @@ DESC test_change
 -- !query 18 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 18 output
-# col_name          	data_type           	comment             
 a                   	int                 	this is column a    
 b                   	string              	#*02?`              
 c                   	int
@@ -237,7 +229,6 @@ DESC test_change
 -- !query 23 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 23 output
-# col_name          	data_type           	comment             
 a                   	int                 	this is column A    
 b                   	string              	#*02?`              
 c                   	int
diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
index de10b29f3c65b..46d32bbc52247 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
@@ -54,7 +54,6 @@ DESCRIBE t
 -- !query 5 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 5 output
-# col_name          	data_type           	comment             
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -70,7 +69,6 @@ DESC default.t
 -- !query 6 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 6 output
-# col_name          	data_type           	comment             
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -86,7 +84,6 @@ DESC TABLE t
 -- !query 7 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 7 output
-# col_name          	data_type           	comment             
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -102,7 +99,6 @@ DESC FORMATTED t
 -- !query 8 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 8 output
-# col_name          	data_type           	comment             
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -132,7 +128,6 @@ DESC EXTENDED t
 -- !query 9 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 9 output
-# col_name          	data_type           	comment             
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -162,7 +157,6 @@ DESC t PARTITION (c='Us', d=1)
 -- !query 10 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 10 output
-# col_name          	data_type           	comment             
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -178,7 +172,6 @@ DESC EXTENDED t PARTITION (c='Us', d=1)
 -- !query 11 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 11 output
-# col_name          	data_type           	comment             
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -206,7 +199,6 @@ DESC FORMATTED t PARTITION (c='Us', d=1)
 -- !query 12 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 12 output
-# col_name          	data_type           	comment             
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -268,7 +260,6 @@ DESC temp_v
 -- !query 16 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 16 output
-# col_name          	data_type           	comment             
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -280,7 +271,6 @@ DESC TABLE temp_v
 -- !query 17 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 17 output
-# col_name          	data_type           	comment             
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -292,7 +282,6 @@ DESC FORMATTED temp_v
 -- !query 18 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 18 output
-# col_name          	data_type           	comment             
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -304,7 +293,6 @@ DESC EXTENDED temp_v
 -- !query 19 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 19 output
-# col_name          	data_type           	comment             
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -316,7 +304,6 @@ DESC temp_Data_Source_View
 -- !query 20 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 20 output
-# col_name          	data_type           	comment             
 intType             	int                 	test comment test1  
 stringType          	string              	                    
 dateType            	date                	                    
@@ -349,7 +336,6 @@ DESC v
 -- !query 22 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 22 output
-# col_name          	data_type           	comment             
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -361,7 +347,6 @@ DESC TABLE v
 -- !query 23 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 23 output
-# col_name          	data_type           	comment             
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -373,7 +358,6 @@ DESC FORMATTED v
 -- !query 24 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 24 output
-# col_name          	data_type           	comment             
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -396,7 +380,6 @@ DESC EXTENDED v
 -- !query 25 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 25 output
-# col_name          	data_type           	comment             
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index b282acf5b557b..6b19b5ccb6f24 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -786,7 +786,7 @@ class HiveDDLSuite
 
       checkAnswer(
         sql(s"DESC $tabName").select("col_name", "data_type", "comment"),
-        Row("# col_name", "data_type", "comment") :: Row("a", "int", "test") :: Nil
+        Row("a", "int", "test") :: Nil
       )
     }
   }

From 714153c794da62164302e94af620e8107dbc6b5e Mon Sep 17 00:00:00 2001
From: Corey Woodfield <coreywoodfield@gmail.com>
Date: Fri, 9 Jun 2017 10:24:49 +0100
Subject: [PATCH 0975/1204] Fixed broken link

## What changes were proposed in this pull request?

I fixed some incorrect formatting on a link in the docs

## How was this patch tested?

I looked at the markdown preview before and after, and the link was fixed

Before:
<img width="593" alt="screen shot 2017-06-08 at 6 37 32 pm" src="https://user-images.githubusercontent.com/17733030/26956272-a62cd558-4c79-11e7-862f-9d0e0184b18a.png">
After:
<img width="587" alt="screen shot 2017-06-08 at 6 37 44 pm" src="https://user-images.githubusercontent.com/17733030/26956276-b1135ef6-4c79-11e7-8028-84d19c392fda.png">

Author: Corey Woodfield <coreywoodfield@gmail.com>

Closes #18246 from coreywoodfield/master.

(cherry picked from commit 033839559eab280760e3c5687940d8c62cc9c048)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/running-on-mesos.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md
index 314a806edf39e..847a6594569c6 100644
--- a/docs/running-on-mesos.md
+++ b/docs/running-on-mesos.md
@@ -156,7 +156,7 @@ passing in the Mesos master URL (e.g: mesos://host:5050). This starts the `Mesos
 If you like to run the `MesosClusterDispatcher` with Marathon, you need to run the `MesosClusterDispatcher` in the foreground (i.e: `bin/spark-class org.apache.spark.deploy.mesos.MesosClusterDispatcher`). Note that the `MesosClusterDispatcher` not yet supports multiple instances for HA.
 
 The `MesosClusterDispatcher` also supports writing recovery state into Zookeeper. This will allow the `MesosClusterDispatcher` to be able to recover all submitted and running containers on relaunch.   In order to enable this recovery mode, you can set SPARK_DAEMON_JAVA_OPTS in spark-env by configuring `spark.deploy.recoveryMode` and related spark.deploy.zookeeper.* configurations.
-For more information about these configurations please refer to the configurations (doc)[configurations.html#deploy].
+For more information about these configurations please refer to the configurations [doc](configurations.html#deploy).
 
 From the client, you can submit a job to Mesos cluster by running `spark-submit` and specifying the master URL
 to the URL of the `MesosClusterDispatcher` (e.g: mesos://dispatcher:7077). You can view driver statuses on the

From 869af5bcb2670857ee7924a3e9798fa3638a5e3a Mon Sep 17 00:00:00 2001
From: junzhi lu <452756565@qq.com>
Date: Fri, 9 Jun 2017 10:49:04 +0100
Subject: [PATCH 0976/1204] Fix bug in JavaRegressionMetricsExample.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

the original code cant visit the last element of the"parts" array.
so the v[v.length–1] always equals 0

## What changes were proposed in this pull request?
change the recycle range from (1 to parts.length-1) to (1 to parts.length)

## How was this patch tested?

debug it in eclipse (´〜｀*) zzz.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: junzhi lu <452756565@qq.com>

Closes #18237 from masterwugui/patch-1.

(cherry picked from commit 6491cbf065254e28bca61c9ef55b84f4009ac36c)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../spark/examples/mllib/JavaRegressionMetricsExample.java      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaRegressionMetricsExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaRegressionMetricsExample.java
index 7bb9993b84168..00033b5730a3d 100644
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaRegressionMetricsExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaRegressionMetricsExample.java
@@ -40,7 +40,7 @@ public static void main(String[] args) {
     JavaRDD<LabeledPoint> parsedData = data.map(line -> {
       String[] parts = line.split(" ");
       double[] v = new double[parts.length - 1];
-      for (int i = 1; i < parts.length - 1; i++) {
+      for (int i = 1; i < parts.length; i++) {
         v[i - 1] = Double.parseDouble(parts[i].split(":")[1]);
       }
       return new LabeledPoint(Double.parseDouble(parts[0]), Vectors.dense(v));

From 815a0820b1808118ae198a44f4aa0f0f2b6511e6 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Fri, 9 Jun 2017 18:29:33 -0700
Subject: [PATCH 0977/1204] [SPARK-21042][SQL] Document Dataset.union is
 resolution by position

## What changes were proposed in this pull request?
Document Dataset.union is resolution by position, not by name, since this has been a confusing point for a lot of users.

## How was this patch tested?
N/A - doc only change.

Author: Reynold Xin <rxin@databricks.com>

Closes #18256 from rxin/SPARK-21042.

(cherry picked from commit b78e3849b20d0d09b7146efd7ce8f203ef67b890)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 R/pkg/R/DataFrame.R                                |  1 +
 python/pyspark/sql/dataframe.py                    | 13 +++++++++----
 .../main/scala/org/apache/spark/sql/Dataset.scala  | 14 ++++++++------
 3 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index a7b1e3b6ae32e..b606f1f3f276e 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2642,6 +2642,7 @@ generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) {
 #' Input SparkDataFrames can have different schemas (names and data types).
 #'
 #' Note: This does not remove duplicate rows across the two SparkDataFrames.
+#' Also as standard in SQL, this function resolves columns by position (not by name).
 #'
 #' @param x A SparkDataFrame
 #' @param y A SparkDataFrame
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index b1eb80ee1fea4..d1b336df40f91 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -1166,18 +1166,23 @@ def agg(self, *exprs):
 
     @since(2.0)
     def union(self, other):
-        """ Return a new :class:`DataFrame` containing union of rows in this
-        frame and another frame.
+        """ Return a new :class:`DataFrame` containing union of rows in this and another frame.
 
         This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union
         (that does deduplication of elements), use this function followed by a distinct.
+
+        Also as standard in SQL, this function resolves columns by position (not by name).
         """
         return DataFrame(self._jdf.union(other._jdf), self.sql_ctx)
 
     @since(1.3)
     def unionAll(self, other):
-        """ Return a new :class:`DataFrame` containing union of rows in this
-        frame and another frame.
+        """ Return a new :class:`DataFrame` containing union of rows in this and another frame.
+
+        This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union
+        (that does deduplication of elements), use this function followed by a distinct.
+
+        Also as standard in SQL, this function resolves columns by position (not by name).
 
         .. note:: Deprecated in 2.0, use union instead.
         """
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index f37d433f69ac1..36588904244c1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -1630,10 +1630,11 @@ class Dataset[T] private[sql](
 
   /**
    * Returns a new Dataset containing union of rows in this Dataset and another Dataset.
-   * This is equivalent to `UNION ALL` in SQL.
    *
-   * To do a SQL-style set union (that does deduplication of elements), use this function followed
-   * by a [[distinct]].
+   * This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union (that does
+   * deduplication of elements), use this function followed by a [[distinct]].
+   *
+   * Also as standard in SQL, this function resolves columns by position (not by name).
    *
    * @group typedrel
    * @since 2.0.0
@@ -1643,10 +1644,11 @@ class Dataset[T] private[sql](
 
   /**
    * Returns a new Dataset containing union of rows in this Dataset and another Dataset.
-   * This is equivalent to `UNION ALL` in SQL.
    *
-   * To do a SQL-style set union (that does deduplication of elements), use this function followed
-   * by a [[distinct]].
+   * This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union (that does
+   * deduplication of elements), use this function followed by a [[distinct]].
+   *
+   * Also as standard in SQL, this function resolves columns by position (not by name).
    *
    * @group typedrel
    * @since 2.0.0

From 0b0be47e7b742d96810c60b19a9aa920242e5224 Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Sun, 11 Jun 2017 00:00:33 -0700
Subject: [PATCH 0978/1204] [SPARK-20877][SPARKR] refactor tests to basic tests
 only for CRAN

## What changes were proposed in this pull request?

Move all existing tests to non-installed directory so that it will never run by installing SparkR package

For a follow-up PR:
- remove all skip_on_cran() calls in tests
- clean up test timer
- improve or change basic tests that do run on CRAN (if anyone has suggestion)

It looks like `R CMD build pkg` will still put pkg\tests (ie. the full tests) into the source package but `R CMD INSTALL` on such source package does not install these tests (and so `R CMD check` does not run them)

## How was this patch tested?

- [x] unit tests, Jenkins
- [x] AppVeyor
- [x] make a source package, install it, `R CMD check` it - verify the full tests are not installed or run

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #18264 from felixcheung/rtestset.

(cherry picked from commit dc4c351837879dab26ad8fb471dc51c06832a9e4)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/inst/tests/testthat/test_basic.R        | 90 +++++++++++++++++++
 .../testthat => tests/fulltests}/jarTest.R    |  0
 .../fulltests}/packageInAJarTest.R            |  0
 .../testthat => tests/fulltests}/test_Serde.R |  0
 .../fulltests}/test_Windows.R                 |  0
 .../fulltests}/test_binaryFile.R              |  0
 .../fulltests}/test_binary_function.R         |  0
 .../fulltests}/test_broadcast.R               |  0
 .../fulltests}/test_client.R                  |  0
 .../fulltests}/test_context.R                 |  0
 .../fulltests}/test_includePackage.R          |  0
 .../fulltests}/test_jvm_api.R                 |  0
 .../fulltests}/test_mllib_classification.R    |  0
 .../fulltests}/test_mllib_clustering.R        |  0
 .../fulltests}/test_mllib_fpm.R               |  0
 .../fulltests}/test_mllib_recommendation.R    |  0
 .../fulltests}/test_mllib_regression.R        |  0
 .../fulltests}/test_mllib_stat.R              |  0
 .../fulltests}/test_mllib_tree.R              |  0
 .../fulltests}/test_parallelize_collect.R     |  0
 .../testthat => tests/fulltests}/test_rdd.R   |  0
 .../fulltests}/test_shuffle.R                 |  0
 .../fulltests}/test_sparkR.R                  |  0
 .../fulltests}/test_sparkSQL.R                |  0
 .../fulltests}/test_streaming.R               |  0
 .../testthat => tests/fulltests}/test_take.R  |  0
 .../fulltests}/test_textFile.R                |  0
 .../testthat => tests/fulltests}/test_utils.R |  0
 R/pkg/tests/run-all.R                         |  8 ++
 29 files changed, 98 insertions(+)
 create mode 100644 R/pkg/inst/tests/testthat/test_basic.R
 rename R/pkg/{inst/tests/testthat => tests/fulltests}/jarTest.R (100%)
 rename R/pkg/{inst/tests/testthat => tests/fulltests}/packageInAJarTest.R (100%)
 rename R/pkg/{inst/tests/testthat => tests/fulltests}/test_Serde.R (100%)
 rename R/pkg/{inst/tests/testthat => tests/fulltests}/test_Windows.R (100%)
 rename R/pkg/{inst/tests/testthat => tests/fulltests}/test_binaryFile.R (100%)
 rename R/pkg/{inst/tests/testthat => tests/fulltests}/test_binary_function.R (100%)
 rename R/pkg/{inst/tests/testthat => tests/fulltests}/test_broadcast.R (100%)
 rename R/pkg/{inst/tests/testthat => tests/fulltests}/test_client.R (100%)
 rename R/pkg/{inst/tests/testthat => tests/fulltests}/test_context.R (100%)
 rename R/pkg/{inst/tests/testthat => tests/fulltests}/test_includePackage.R (100%)
 rename R/pkg/{inst/tests/testthat => tests/fulltests}/test_jvm_api.R (100%)
 rename R/pkg/{inst/tests/testthat => tests/fulltests}/test_mllib_classification.R (100%)
 rename R/pkg/{inst/tests/testthat => tests/fulltests}/test_mllib_clustering.R (100%)
 rename R/pkg/{inst/tests/testthat => tests/fulltests}/test_mllib_fpm.R (100%)
 rename R/pkg/{inst/tests/testthat => tests/fulltests}/test_mllib_recommendation.R (100%)
 rename R/pkg/{inst/tests/testthat => tests/fulltests}/test_mllib_regression.R (100%)
 rename R/pkg/{inst/tests/testthat => tests/fulltests}/test_mllib_stat.R (100%)
 rename R/pkg/{inst/tests/testthat => tests/fulltests}/test_mllib_tree.R (100%)
 rename R/pkg/{inst/tests/testthat => tests/fulltests}/test_parallelize_collect.R (100%)
 rename R/pkg/{inst/tests/testthat => tests/fulltests}/test_rdd.R (100%)
 rename R/pkg/{inst/tests/testthat => tests/fulltests}/test_shuffle.R (100%)
 rename R/pkg/{inst/tests/testthat => tests/fulltests}/test_sparkR.R (100%)
 rename R/pkg/{inst/tests/testthat => tests/fulltests}/test_sparkSQL.R (100%)
 rename R/pkg/{inst/tests/testthat => tests/fulltests}/test_streaming.R (100%)
 rename R/pkg/{inst/tests/testthat => tests/fulltests}/test_take.R (100%)
 rename R/pkg/{inst/tests/testthat => tests/fulltests}/test_textFile.R (100%)
 rename R/pkg/{inst/tests/testthat => tests/fulltests}/test_utils.R (100%)

diff --git a/R/pkg/inst/tests/testthat/test_basic.R b/R/pkg/inst/tests/testthat/test_basic.R
new file mode 100644
index 0000000000000..de47162d5325f
--- /dev/null
+++ b/R/pkg/inst/tests/testthat/test_basic.R
@@ -0,0 +1,90 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+context("basic tests for CRAN")
+
+test_that("create DataFrame from list or data.frame", {
+  sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
+
+  i <- 4
+  df <- createDataFrame(data.frame(dummy = 1:i))
+  expect_equal(count(df), i)
+
+  l <- list(list(a = 1, b = 2), list(a = 3, b = 4))
+  df <- createDataFrame(l)
+  expect_equal(columns(df), c("a", "b"))
+
+  a <- 1:3
+  b <- c("a", "b", "c")
+  ldf <- data.frame(a, b)
+  df <- createDataFrame(ldf)
+  expect_equal(columns(df), c("a", "b"))
+  expect_equal(dtypes(df), list(c("a", "int"), c("b", "string")))
+  expect_equal(count(df), 3)
+  ldf2 <- collect(df)
+  expect_equal(ldf$a, ldf2$a)
+
+  mtcarsdf <- createDataFrame(mtcars)
+  expect_equivalent(collect(mtcarsdf), mtcars)
+
+  bytes <- as.raw(c(1, 2, 3))
+  df <- createDataFrame(list(list(bytes)))
+  expect_equal(collect(df)[[1]][[1]], bytes)
+
+  sparkR.session.stop()
+})
+
+test_that("spark.glm and predict", {
+  sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
+
+  training <- suppressWarnings(createDataFrame(iris))
+  # gaussian family
+  model <- spark.glm(training, Sepal_Width ~ Sepal_Length + Species)
+  prediction <- predict(model, training)
+  expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), "double")
+  vals <- collect(select(prediction, "prediction"))
+  rVals <- predict(glm(Sepal.Width ~ Sepal.Length + Species, data = iris), iris)
+  expect_true(all(abs(rVals - vals) < 1e-6), rVals - vals)
+
+  # Gamma family
+  x <- runif(100, -1, 1)
+  y <- rgamma(100, rate = 10 / exp(0.5 + 1.2 * x), shape = 10)
+  df <- as.DataFrame(as.data.frame(list(x = x, y = y)))
+  model <- glm(y ~ x, family = Gamma, df)
+  out <- capture.output(print(summary(model)))
+  expect_true(any(grepl("Dispersion parameter for gamma family", out)))
+
+  # tweedie family
+  model <- spark.glm(training, Sepal_Width ~ Sepal_Length + Species,
+                     family = "tweedie", var.power = 1.2, link.power = 0.0)
+  prediction <- predict(model, training)
+  expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), "double")
+  vals <- collect(select(prediction, "prediction"))
+
+  # manual calculation of the R predicted values to avoid dependence on statmod
+  #' library(statmod)
+  #' rModel <- glm(Sepal.Width ~ Sepal.Length + Species, data = iris,
+  #'             family = tweedie(var.power = 1.2, link.power = 0.0))
+  #' print(coef(rModel))
+
+  rCoef <- c(0.6455409, 0.1169143, -0.3224752, -0.3282174)
+  rVals <- exp(as.numeric(model.matrix(Sepal.Width ~ Sepal.Length + Species,
+                                       data = iris) %*% rCoef))
+  expect_true(all(abs(rVals - vals) < 1e-5), rVals - vals)
+
+  sparkR.session.stop()
+})
diff --git a/R/pkg/inst/tests/testthat/jarTest.R b/R/pkg/tests/fulltests/jarTest.R
similarity index 100%
rename from R/pkg/inst/tests/testthat/jarTest.R
rename to R/pkg/tests/fulltests/jarTest.R
diff --git a/R/pkg/inst/tests/testthat/packageInAJarTest.R b/R/pkg/tests/fulltests/packageInAJarTest.R
similarity index 100%
rename from R/pkg/inst/tests/testthat/packageInAJarTest.R
rename to R/pkg/tests/fulltests/packageInAJarTest.R
diff --git a/R/pkg/inst/tests/testthat/test_Serde.R b/R/pkg/tests/fulltests/test_Serde.R
similarity index 100%
rename from R/pkg/inst/tests/testthat/test_Serde.R
rename to R/pkg/tests/fulltests/test_Serde.R
diff --git a/R/pkg/inst/tests/testthat/test_Windows.R b/R/pkg/tests/fulltests/test_Windows.R
similarity index 100%
rename from R/pkg/inst/tests/testthat/test_Windows.R
rename to R/pkg/tests/fulltests/test_Windows.R
diff --git a/R/pkg/inst/tests/testthat/test_binaryFile.R b/R/pkg/tests/fulltests/test_binaryFile.R
similarity index 100%
rename from R/pkg/inst/tests/testthat/test_binaryFile.R
rename to R/pkg/tests/fulltests/test_binaryFile.R
diff --git a/R/pkg/inst/tests/testthat/test_binary_function.R b/R/pkg/tests/fulltests/test_binary_function.R
similarity index 100%
rename from R/pkg/inst/tests/testthat/test_binary_function.R
rename to R/pkg/tests/fulltests/test_binary_function.R
diff --git a/R/pkg/inst/tests/testthat/test_broadcast.R b/R/pkg/tests/fulltests/test_broadcast.R
similarity index 100%
rename from R/pkg/inst/tests/testthat/test_broadcast.R
rename to R/pkg/tests/fulltests/test_broadcast.R
diff --git a/R/pkg/inst/tests/testthat/test_client.R b/R/pkg/tests/fulltests/test_client.R
similarity index 100%
rename from R/pkg/inst/tests/testthat/test_client.R
rename to R/pkg/tests/fulltests/test_client.R
diff --git a/R/pkg/inst/tests/testthat/test_context.R b/R/pkg/tests/fulltests/test_context.R
similarity index 100%
rename from R/pkg/inst/tests/testthat/test_context.R
rename to R/pkg/tests/fulltests/test_context.R
diff --git a/R/pkg/inst/tests/testthat/test_includePackage.R b/R/pkg/tests/fulltests/test_includePackage.R
similarity index 100%
rename from R/pkg/inst/tests/testthat/test_includePackage.R
rename to R/pkg/tests/fulltests/test_includePackage.R
diff --git a/R/pkg/inst/tests/testthat/test_jvm_api.R b/R/pkg/tests/fulltests/test_jvm_api.R
similarity index 100%
rename from R/pkg/inst/tests/testthat/test_jvm_api.R
rename to R/pkg/tests/fulltests/test_jvm_api.R
diff --git a/R/pkg/inst/tests/testthat/test_mllib_classification.R b/R/pkg/tests/fulltests/test_mllib_classification.R
similarity index 100%
rename from R/pkg/inst/tests/testthat/test_mllib_classification.R
rename to R/pkg/tests/fulltests/test_mllib_classification.R
diff --git a/R/pkg/inst/tests/testthat/test_mllib_clustering.R b/R/pkg/tests/fulltests/test_mllib_clustering.R
similarity index 100%
rename from R/pkg/inst/tests/testthat/test_mllib_clustering.R
rename to R/pkg/tests/fulltests/test_mllib_clustering.R
diff --git a/R/pkg/inst/tests/testthat/test_mllib_fpm.R b/R/pkg/tests/fulltests/test_mllib_fpm.R
similarity index 100%
rename from R/pkg/inst/tests/testthat/test_mllib_fpm.R
rename to R/pkg/tests/fulltests/test_mllib_fpm.R
diff --git a/R/pkg/inst/tests/testthat/test_mllib_recommendation.R b/R/pkg/tests/fulltests/test_mllib_recommendation.R
similarity index 100%
rename from R/pkg/inst/tests/testthat/test_mllib_recommendation.R
rename to R/pkg/tests/fulltests/test_mllib_recommendation.R
diff --git a/R/pkg/inst/tests/testthat/test_mllib_regression.R b/R/pkg/tests/fulltests/test_mllib_regression.R
similarity index 100%
rename from R/pkg/inst/tests/testthat/test_mllib_regression.R
rename to R/pkg/tests/fulltests/test_mllib_regression.R
diff --git a/R/pkg/inst/tests/testthat/test_mllib_stat.R b/R/pkg/tests/fulltests/test_mllib_stat.R
similarity index 100%
rename from R/pkg/inst/tests/testthat/test_mllib_stat.R
rename to R/pkg/tests/fulltests/test_mllib_stat.R
diff --git a/R/pkg/inst/tests/testthat/test_mllib_tree.R b/R/pkg/tests/fulltests/test_mllib_tree.R
similarity index 100%
rename from R/pkg/inst/tests/testthat/test_mllib_tree.R
rename to R/pkg/tests/fulltests/test_mllib_tree.R
diff --git a/R/pkg/inst/tests/testthat/test_parallelize_collect.R b/R/pkg/tests/fulltests/test_parallelize_collect.R
similarity index 100%
rename from R/pkg/inst/tests/testthat/test_parallelize_collect.R
rename to R/pkg/tests/fulltests/test_parallelize_collect.R
diff --git a/R/pkg/inst/tests/testthat/test_rdd.R b/R/pkg/tests/fulltests/test_rdd.R
similarity index 100%
rename from R/pkg/inst/tests/testthat/test_rdd.R
rename to R/pkg/tests/fulltests/test_rdd.R
diff --git a/R/pkg/inst/tests/testthat/test_shuffle.R b/R/pkg/tests/fulltests/test_shuffle.R
similarity index 100%
rename from R/pkg/inst/tests/testthat/test_shuffle.R
rename to R/pkg/tests/fulltests/test_shuffle.R
diff --git a/R/pkg/inst/tests/testthat/test_sparkR.R b/R/pkg/tests/fulltests/test_sparkR.R
similarity index 100%
rename from R/pkg/inst/tests/testthat/test_sparkR.R
rename to R/pkg/tests/fulltests/test_sparkR.R
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
similarity index 100%
rename from R/pkg/inst/tests/testthat/test_sparkSQL.R
rename to R/pkg/tests/fulltests/test_sparkSQL.R
diff --git a/R/pkg/inst/tests/testthat/test_streaming.R b/R/pkg/tests/fulltests/test_streaming.R
similarity index 100%
rename from R/pkg/inst/tests/testthat/test_streaming.R
rename to R/pkg/tests/fulltests/test_streaming.R
diff --git a/R/pkg/inst/tests/testthat/test_take.R b/R/pkg/tests/fulltests/test_take.R
similarity index 100%
rename from R/pkg/inst/tests/testthat/test_take.R
rename to R/pkg/tests/fulltests/test_take.R
diff --git a/R/pkg/inst/tests/testthat/test_textFile.R b/R/pkg/tests/fulltests/test_textFile.R
similarity index 100%
rename from R/pkg/inst/tests/testthat/test_textFile.R
rename to R/pkg/tests/fulltests/test_textFile.R
diff --git a/R/pkg/inst/tests/testthat/test_utils.R b/R/pkg/tests/fulltests/test_utils.R
similarity index 100%
rename from R/pkg/inst/tests/testthat/test_utils.R
rename to R/pkg/tests/fulltests/test_utils.R
diff --git a/R/pkg/tests/run-all.R b/R/pkg/tests/run-all.R
index f0bef4f6d2662..d48e36c880c13 100644
--- a/R/pkg/tests/run-all.R
+++ b/R/pkg/tests/run-all.R
@@ -43,3 +43,11 @@ if (identical(Sys.getenv("NOT_CRAN"), "true")) {
 }
 
 test_package("SparkR")
+
+if (identical(Sys.getenv("NOT_CRAN"), "true")) {
+  # for testthat 1.0.2 later, change reporter from "summary" to default_reporter()
+  testthat:::run_tests("SparkR",
+                       file.path(sparkRDir, "pkg", "tests", "fulltests"),
+                       NULL,
+                       "summary")
+}

From 26003de55ba13695649b0d874563a76d71cda88d Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Sun, 11 Jun 2017 03:00:44 -0700
Subject: [PATCH 0979/1204] [SPARK-20877][SPARKR][FOLLOWUP] clean up after test
 move

clean up after big test move

unit tests, jenkins

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #18267 from felixcheung/rtestset2.

(cherry picked from commit 9f4ff9552470fb97ca38bb56bbf43be49a9a316c)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/.Rbuildignore                           |   1 +
 R/pkg/R/install.R                             |   2 +-
 R/pkg/R/utils.R                               |   8 +-
 R/pkg/tests/fulltests/test_Serde.R            |   6 --
 R/pkg/tests/fulltests/test_Windows.R          |   7 +-
 R/pkg/tests/fulltests/test_binaryFile.R       |   8 --
 R/pkg/tests/fulltests/test_binary_function.R  |   6 --
 R/pkg/tests/fulltests/test_broadcast.R        |   4 -
 R/pkg/tests/fulltests/test_client.R           |   8 --
 R/pkg/tests/fulltests/test_context.R          |  16 ---
 R/pkg/tests/fulltests/test_includePackage.R   |   4 -
 .../fulltests/test_mllib_classification.R     |  12 +--
 R/pkg/tests/fulltests/test_mllib_clustering.R |  14 +--
 R/pkg/tests/fulltests/test_mllib_fpm.R        |   2 +-
 .../fulltests/test_mllib_recommendation.R     |   2 +-
 R/pkg/tests/fulltests/test_mllib_regression.R |  16 +--
 R/pkg/tests/fulltests/test_mllib_tree.R       |  14 ++-
 .../fulltests/test_parallelize_collect.R      |   8 --
 R/pkg/tests/fulltests/test_rdd.R              | 102 ------------------
 R/pkg/tests/fulltests/test_shuffle.R          |  24 -----
 R/pkg/tests/fulltests/test_sparkR.R           |   2 -
 R/pkg/tests/fulltests/test_sparkSQL.R         |  92 ++--------------
 R/pkg/tests/fulltests/test_streaming.R        |  14 +--
 R/pkg/tests/fulltests/test_take.R             |   2 -
 R/pkg/tests/fulltests/test_textFile.R         |  18 ----
 R/pkg/tests/fulltests/test_utils.R            |   8 --
 R/pkg/tests/run-all.R                         |   2 -
 27 files changed, 32 insertions(+), 370 deletions(-)

diff --git a/R/pkg/.Rbuildignore b/R/pkg/.Rbuildignore
index f12f8c275a989..18b2db69db8f1 100644
--- a/R/pkg/.Rbuildignore
+++ b/R/pkg/.Rbuildignore
@@ -6,3 +6,4 @@
 ^README\.Rmd$
 ^src-native$
 ^html$
+^tests/fulltests/*
diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R
index 4ca7aa664e023..ec931befa2854 100644
--- a/R/pkg/R/install.R
+++ b/R/pkg/R/install.R
@@ -267,7 +267,7 @@ hadoopVersionName <- function(hadoopVersion) {
 # The implementation refers to appdirs package: https://pypi.python.org/pypi/appdirs and
 # adapt to Spark context
 sparkCachePath <- function() {
-  if (.Platform$OS.type == "windows") {
+  if (is_windows()) {
     winAppPath <- Sys.getenv("LOCALAPPDATA", unset = NA)
     if (is.na(winAppPath)) {
       stop(paste("%LOCALAPPDATA% not found.",
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index b19556a1d57eb..7225da9ca896f 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -900,10 +900,6 @@ isAtomicLengthOne <- function(x) {
   is.atomic(x) && length(x) == 1
 }
 
-is_cran <- function() {
-  !identical(Sys.getenv("NOT_CRAN"), "true")
-}
-
 is_windows <- function() {
   .Platform$OS.type == "windows"
 }
@@ -912,6 +908,6 @@ hadoop_home_set <- function() {
   !identical(Sys.getenv("HADOOP_HOME"), "")
 }
 
-not_cran_or_windows_with_hadoop <- function() {
-  !is_cran() && (!is_windows() || hadoop_home_set())
+windows_with_hadoop <- function() {
+  !is_windows() || hadoop_home_set()
 }
diff --git a/R/pkg/tests/fulltests/test_Serde.R b/R/pkg/tests/fulltests/test_Serde.R
index 6e160fae1afed..6bbd201bf1d82 100644
--- a/R/pkg/tests/fulltests/test_Serde.R
+++ b/R/pkg/tests/fulltests/test_Serde.R
@@ -20,8 +20,6 @@ context("SerDe functionality")
 sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
 
 test_that("SerDe of primitive types", {
-  skip_on_cran()
-
   x <- callJStatic("SparkRHandler", "echo", 1L)
   expect_equal(x, 1L)
   expect_equal(class(x), "integer")
@@ -40,8 +38,6 @@ test_that("SerDe of primitive types", {
 })
 
 test_that("SerDe of list of primitive types", {
-  skip_on_cran()
-
   x <- list(1L, 2L, 3L)
   y <- callJStatic("SparkRHandler", "echo", x)
   expect_equal(x, y)
@@ -69,8 +65,6 @@ test_that("SerDe of list of primitive types", {
 })
 
 test_that("SerDe of list of lists", {
-  skip_on_cran()
-
   x <- list(list(1L, 2L, 3L), list(1, 2, 3),
             list(TRUE, FALSE), list("a", "b", "c"))
   y <- callJStatic("SparkRHandler", "echo", x)
diff --git a/R/pkg/tests/fulltests/test_Windows.R b/R/pkg/tests/fulltests/test_Windows.R
index 00d684e1a49ef..b2ec6c67311db 100644
--- a/R/pkg/tests/fulltests/test_Windows.R
+++ b/R/pkg/tests/fulltests/test_Windows.R
@@ -17,9 +17,7 @@
 context("Windows-specific tests")
 
 test_that("sparkJars tag in SparkContext", {
-  skip_on_cran()
-
-  if (.Platform$OS.type != "windows") {
+  if (!is_windows()) {
     skip("This test is only for Windows, skipped")
   }
 
@@ -27,6 +25,3 @@ test_that("sparkJars tag in SparkContext", {
   abcPath <- testOutput[1]
   expect_equal(abcPath, "a\\b\\c")
 })
-
-message("--- End test (Windows) ", as.POSIXct(Sys.time(), tz = "GMT"))
-message("elapsed ", (proc.time() - timer_ptm)[3])
diff --git a/R/pkg/tests/fulltests/test_binaryFile.R b/R/pkg/tests/fulltests/test_binaryFile.R
index 00954fa31b0ee..758b174b8787c 100644
--- a/R/pkg/tests/fulltests/test_binaryFile.R
+++ b/R/pkg/tests/fulltests/test_binaryFile.R
@@ -24,8 +24,6 @@ sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext",
 mockFile <- c("Spark is pretty.", "Spark is awesome.")
 
 test_that("saveAsObjectFile()/objectFile() following textFile() works", {
-  skip_on_cran()
-
   fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   writeLines(mockFile, fileName1)
@@ -40,8 +38,6 @@ test_that("saveAsObjectFile()/objectFile() following textFile() works", {
 })
 
 test_that("saveAsObjectFile()/objectFile() works on a parallelized list", {
-  skip_on_cran()
-
   fileName <- tempfile(pattern = "spark-test", fileext = ".tmp")
 
   l <- list(1, 2, 3)
@@ -54,8 +50,6 @@ test_that("saveAsObjectFile()/objectFile() works on a parallelized list", {
 })
 
 test_that("saveAsObjectFile()/objectFile() following RDD transformations works", {
-  skip_on_cran()
-
   fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   writeLines(mockFile, fileName1)
@@ -80,8 +74,6 @@ test_that("saveAsObjectFile()/objectFile() following RDD transformations works",
 })
 
 test_that("saveAsObjectFile()/objectFile() works with multiple paths", {
-  skip_on_cran()
-
   fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp")
 
diff --git a/R/pkg/tests/fulltests/test_binary_function.R b/R/pkg/tests/fulltests/test_binary_function.R
index 236cb3885445e..442bed509bb1d 100644
--- a/R/pkg/tests/fulltests/test_binary_function.R
+++ b/R/pkg/tests/fulltests/test_binary_function.R
@@ -29,8 +29,6 @@ rdd <- parallelize(sc, nums, 2L)
 mockFile <- c("Spark is pretty.", "Spark is awesome.")
 
 test_that("union on two RDDs", {
-  skip_on_cran()
-
   actual <- collectRDD(unionRDD(rdd, rdd))
   expect_equal(actual, as.list(rep(nums, 2)))
 
@@ -53,8 +51,6 @@ test_that("union on two RDDs", {
 })
 
 test_that("cogroup on two RDDs", {
-  skip_on_cran()
-
   rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4)))
   rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3)))
   cogroup.rdd <- cogroup(rdd1, rdd2, numPartitions = 2L)
@@ -73,8 +69,6 @@ test_that("cogroup on two RDDs", {
 })
 
 test_that("zipPartitions() on RDDs", {
-  skip_on_cran()
-
   rdd1 <- parallelize(sc, 1:2, 2L)  # 1, 2
   rdd2 <- parallelize(sc, 1:4, 2L)  # 1:2, 3:4
   rdd3 <- parallelize(sc, 1:6, 2L)  # 1:3, 4:6
diff --git a/R/pkg/tests/fulltests/test_broadcast.R b/R/pkg/tests/fulltests/test_broadcast.R
index 254f8f522a708..5f74d4960451a 100644
--- a/R/pkg/tests/fulltests/test_broadcast.R
+++ b/R/pkg/tests/fulltests/test_broadcast.R
@@ -26,8 +26,6 @@ nums <- 1:2
 rrdd <- parallelize(sc, nums, 2L)
 
 test_that("using broadcast variable", {
-  skip_on_cran()
-
   randomMat <- matrix(nrow = 10, ncol = 10, data = rnorm(100))
   randomMatBr <- broadcast(sc, randomMat)
 
@@ -40,8 +38,6 @@ test_that("using broadcast variable", {
 })
 
 test_that("without using broadcast variable", {
-  skip_on_cran()
-
   randomMat <- matrix(nrow = 10, ncol = 10, data = rnorm(100))
 
   useBroadcast <- function(x) {
diff --git a/R/pkg/tests/fulltests/test_client.R b/R/pkg/tests/fulltests/test_client.R
index 3d53bebab6300..0cf25fe1dbf39 100644
--- a/R/pkg/tests/fulltests/test_client.R
+++ b/R/pkg/tests/fulltests/test_client.R
@@ -18,8 +18,6 @@
 context("functions in client.R")
 
 test_that("adding spark-testing-base as a package works", {
-  skip_on_cran()
-
   args <- generateSparkSubmitArgs("", "", "", "",
                                   "holdenk:spark-testing-base:1.3.0_0.0.5")
   expect_equal(gsub("[[:space:]]", "", args),
@@ -28,22 +26,16 @@ test_that("adding spark-testing-base as a package works", {
 })
 
 test_that("no package specified doesn't add packages flag", {
-  skip_on_cran()
-
   args <- generateSparkSubmitArgs("", "", "", "", "")
   expect_equal(gsub("[[:space:]]", "", args),
                "")
 })
 
 test_that("multiple packages don't produce a warning", {
-  skip_on_cran()
-
   expect_warning(generateSparkSubmitArgs("", "", "", "", c("A", "B")), NA)
 })
 
 test_that("sparkJars sparkPackages as character vectors", {
-  skip_on_cran()
-
   args <- generateSparkSubmitArgs("", "", c("one.jar", "two.jar", "three.jar"), "",
                                   c("com.databricks:spark-avro_2.10:2.0.1"))
   expect_match(args, "--jars one.jar,two.jar,three.jar")
diff --git a/R/pkg/tests/fulltests/test_context.R b/R/pkg/tests/fulltests/test_context.R
index f4893c4003f85..73b0f5355518d 100644
--- a/R/pkg/tests/fulltests/test_context.R
+++ b/R/pkg/tests/fulltests/test_context.R
@@ -18,8 +18,6 @@
 context("test functions in sparkR.R")
 
 test_that("Check masked functions", {
-  skip_on_cran()
-
   # Check that we are not masking any new function from base, stats, testthat unexpectedly
   # NOTE: We should avoid adding entries to *namesOfMaskedCompletely* as masked functions make it
   # hard for users to use base R functions. Please check when in doubt.
@@ -57,8 +55,6 @@ test_that("Check masked functions", {
 })
 
 test_that("repeatedly starting and stopping SparkR", {
-  skip_on_cran()
-
   for (i in 1:4) {
     sc <- suppressWarnings(sparkR.init(master = sparkRTestMaster))
     rdd <- parallelize(sc, 1:20, 2L)
@@ -77,8 +73,6 @@ test_that("repeatedly starting and stopping SparkSession", {
 })
 
 test_that("rdd GC across sparkR.stop", {
-  skip_on_cran()
-
   sc <- sparkR.sparkContext(master = sparkRTestMaster) # sc should get id 0
   rdd1 <- parallelize(sc, 1:20, 2L) # rdd1 should get id 1
   rdd2 <- parallelize(sc, 1:10, 2L) # rdd2 should get id 2
@@ -102,8 +96,6 @@ test_that("rdd GC across sparkR.stop", {
 })
 
 test_that("job group functions can be called", {
-  skip_on_cran()
-
   sc <- sparkR.sparkContext(master = sparkRTestMaster)
   setJobGroup("groupId", "job description", TRUE)
   cancelJobGroup("groupId")
@@ -116,16 +108,12 @@ test_that("job group functions can be called", {
 })
 
 test_that("utility function can be called", {
-  skip_on_cran()
-
   sparkR.sparkContext(master = sparkRTestMaster)
   setLogLevel("ERROR")
   sparkR.session.stop()
 })
 
 test_that("getClientModeSparkSubmitOpts() returns spark-submit args from whitelist", {
-  skip_on_cran()
-
   e <- new.env()
   e[["spark.driver.memory"]] <- "512m"
   ops <- getClientModeSparkSubmitOpts("sparkrmain", e)
@@ -153,8 +141,6 @@ test_that("getClientModeSparkSubmitOpts() returns spark-submit args from whiteli
 })
 
 test_that("sparkJars sparkPackages as comma-separated strings", {
-  skip_on_cran()
-
   expect_warning(processSparkJars(" a, b "))
   jars <- suppressWarnings(processSparkJars(" a, b "))
   expect_equal(lapply(jars, basename), list("a", "b"))
@@ -182,8 +168,6 @@ test_that("spark.lapply should perform simple transforms", {
 })
 
 test_that("add and get file to be downloaded with Spark job on every node", {
-  skip_on_cran()
-
   sparkR.sparkContext(master = sparkRTestMaster)
   # Test add file.
   path <- tempfile(pattern = "hello", fileext = ".txt")
diff --git a/R/pkg/tests/fulltests/test_includePackage.R b/R/pkg/tests/fulltests/test_includePackage.R
index d7d9eeed1575e..f4ea0d1b5cb27 100644
--- a/R/pkg/tests/fulltests/test_includePackage.R
+++ b/R/pkg/tests/fulltests/test_includePackage.R
@@ -26,8 +26,6 @@ nums <- 1:2
 rdd <- parallelize(sc, nums, 2L)
 
 test_that("include inside function", {
-  skip_on_cran()
-
   # Only run the test if plyr is installed.
   if ("plyr" %in% rownames(installed.packages())) {
     suppressPackageStartupMessages(library(plyr))
@@ -44,8 +42,6 @@ test_that("include inside function", {
 })
 
 test_that("use include package", {
-  skip_on_cran()
-
   # Only run the test if plyr is installed.
   if ("plyr" %in% rownames(installed.packages())) {
     suppressPackageStartupMessages(library(plyr))
diff --git a/R/pkg/tests/fulltests/test_mllib_classification.R b/R/pkg/tests/fulltests/test_mllib_classification.R
index 82e588dc460d0..726e9d9a20b1c 100644
--- a/R/pkg/tests/fulltests/test_mllib_classification.R
+++ b/R/pkg/tests/fulltests/test_mllib_classification.R
@@ -28,8 +28,6 @@ absoluteSparkPath <- function(x) {
 }
 
 test_that("spark.svmLinear", {
-  skip_on_cran()
-
   df <- suppressWarnings(createDataFrame(iris))
   training <- df[df$Species %in% c("versicolor", "virginica"), ]
   model <- spark.svmLinear(training,  Species ~ ., regParam = 0.01, maxIter = 10)
@@ -51,7 +49,7 @@ test_that("spark.svmLinear", {
   expect_equal(sort(as.list(take(select(prediction, "prediction"), 10))[[1]]), expected)
 
   # Test model save and load
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     modelPath <- tempfile(pattern = "spark-svm-linear", fileext = ".tmp")
     write.ml(model, modelPath)
     expect_error(write.ml(model, modelPath))
@@ -131,7 +129,7 @@ test_that("spark.logit", {
   expect_true(all(abs(setosaCoefs - setosaCoefs) < 0.1))
 
   # Test model save and load
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     modelPath <- tempfile(pattern = "spark-logit", fileext = ".tmp")
     write.ml(model, modelPath)
     expect_error(write.ml(model, modelPath))
@@ -228,8 +226,6 @@ test_that("spark.logit", {
 })
 
 test_that("spark.mlp", {
-  skip_on_cran()
-
   df <- read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"),
                 source = "libsvm")
   model <- spark.mlp(df, label ~ features, blockSize = 128, layers = c(4, 5, 4, 3),
@@ -250,7 +246,7 @@ test_that("spark.mlp", {
   expect_equal(head(mlpPredictions$prediction, 6), c("1.0", "0.0", "0.0", "0.0", "0.0", "0.0"))
 
   # Test model save/load
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     modelPath <- tempfile(pattern = "spark-mlp", fileext = ".tmp")
     write.ml(model, modelPath)
     expect_error(write.ml(model, modelPath))
@@ -363,7 +359,7 @@ test_that("spark.naiveBayes", {
                                "Yes", "Yes", "No", "No"))
 
   # Test model save/load
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     modelPath <- tempfile(pattern = "spark-naiveBayes", fileext = ".tmp")
     write.ml(m, modelPath)
     expect_error(write.ml(m, modelPath))
diff --git a/R/pkg/tests/fulltests/test_mllib_clustering.R b/R/pkg/tests/fulltests/test_mllib_clustering.R
index e827e961ab4c1..4110e13da4948 100644
--- a/R/pkg/tests/fulltests/test_mllib_clustering.R
+++ b/R/pkg/tests/fulltests/test_mllib_clustering.R
@@ -28,8 +28,6 @@ absoluteSparkPath <- function(x) {
 }
 
 test_that("spark.bisectingKmeans", {
-  skip_on_cran()
-
   newIris <- iris
   newIris$Species <- NULL
   training <- suppressWarnings(createDataFrame(newIris))
@@ -55,7 +53,7 @@ test_that("spark.bisectingKmeans", {
                c(0, 1, 2, 3))
 
   # Test model save/load
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     modelPath <- tempfile(pattern = "spark-bisectingkmeans", fileext = ".tmp")
     write.ml(model, modelPath)
     expect_error(write.ml(model, modelPath))
@@ -129,7 +127,7 @@ test_that("spark.gaussianMixture", {
   expect_equal(p$prediction, c(0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1))
 
   # Test model save/load
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     modelPath <- tempfile(pattern = "spark-gaussianMixture", fileext = ".tmp")
     write.ml(model, modelPath)
     expect_error(write.ml(model, modelPath))
@@ -177,7 +175,7 @@ test_that("spark.kmeans", {
   expect_true(class(summary.model$coefficients[1, ]) == "numeric")
 
   # Test model save/load
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     modelPath <- tempfile(pattern = "spark-kmeans", fileext = ".tmp")
     write.ml(model, modelPath)
     expect_error(write.ml(model, modelPath))
@@ -244,7 +242,7 @@ test_that("spark.lda with libsvm", {
   expect_true(logPrior <= 0 & !is.na(logPrior))
 
   # Test model save/load
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     modelPath <- tempfile(pattern = "spark-lda", fileext = ".tmp")
     write.ml(model, modelPath)
     expect_error(write.ml(model, modelPath))
@@ -265,8 +263,6 @@ test_that("spark.lda with libsvm", {
 })
 
 test_that("spark.lda with text input", {
-  skip_on_cran()
-
   text <- read.text(absoluteSparkPath("data/mllib/sample_lda_data.txt"))
   model <- spark.lda(text, optimizer = "online", features = "value")
 
@@ -309,8 +305,6 @@ test_that("spark.lda with text input", {
 })
 
 test_that("spark.posterior and spark.perplexity", {
-  skip_on_cran()
-
   text <- read.text(absoluteSparkPath("data/mllib/sample_lda_data.txt"))
   model <- spark.lda(text, features = "value", k = 3)
 
diff --git a/R/pkg/tests/fulltests/test_mllib_fpm.R b/R/pkg/tests/fulltests/test_mllib_fpm.R
index 4e10ca1e4f50b..69dda52f0c279 100644
--- a/R/pkg/tests/fulltests/test_mllib_fpm.R
+++ b/R/pkg/tests/fulltests/test_mllib_fpm.R
@@ -62,7 +62,7 @@ test_that("spark.fpGrowth", {
 
   expect_equivalent(expected_predictions, collect(predict(model, new_data)))
 
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     modelPath <- tempfile(pattern = "spark-fpm", fileext = ".tmp")
     write.ml(model, modelPath, overwrite = TRUE)
     loaded_model <- read.ml(modelPath)
diff --git a/R/pkg/tests/fulltests/test_mllib_recommendation.R b/R/pkg/tests/fulltests/test_mllib_recommendation.R
index cc8064f88d27a..4d919c9d746b0 100644
--- a/R/pkg/tests/fulltests/test_mllib_recommendation.R
+++ b/R/pkg/tests/fulltests/test_mllib_recommendation.R
@@ -37,7 +37,7 @@ test_that("spark.als", {
   tolerance = 1e-4)
 
   # Test model save/load
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     modelPath <- tempfile(pattern = "spark-als", fileext = ".tmp")
     write.ml(model, modelPath)
     expect_error(write.ml(model, modelPath))
diff --git a/R/pkg/tests/fulltests/test_mllib_regression.R b/R/pkg/tests/fulltests/test_mllib_regression.R
index b05fdd350ca28..82472c92b9965 100644
--- a/R/pkg/tests/fulltests/test_mllib_regression.R
+++ b/R/pkg/tests/fulltests/test_mllib_regression.R
@@ -23,8 +23,6 @@ context("MLlib regression algorithms, except for tree-based algorithms")
 sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
 
 test_that("formula of spark.glm", {
-  skip_on_cran()
-
   training <- suppressWarnings(createDataFrame(iris))
   # directly calling the spark API
   # dot minus and intercept vs native glm
@@ -197,8 +195,6 @@ test_that("spark.glm summary", {
 })
 
 test_that("spark.glm save/load", {
-  skip_on_cran()
-
   training <- suppressWarnings(createDataFrame(iris))
   m <- spark.glm(training, Sepal_Width ~ Sepal_Length + Species)
   s <- summary(m)
@@ -226,8 +222,6 @@ test_that("spark.glm save/load", {
 })
 
 test_that("formula of glm", {
-  skip_on_cran()
-
   training <- suppressWarnings(createDataFrame(iris))
   # dot minus and intercept vs native glm
   model <- glm(Sepal_Width ~ . - Species + 0, data = training)
@@ -254,8 +248,6 @@ test_that("formula of glm", {
 })
 
 test_that("glm and predict", {
-  skip_on_cran()
-
   training <- suppressWarnings(createDataFrame(iris))
   # gaussian family
   model <- glm(Sepal_Width ~ Sepal_Length + Species, data = training)
@@ -300,8 +292,6 @@ test_that("glm and predict", {
 })
 
 test_that("glm summary", {
-  skip_on_cran()
-
   # gaussian family
   training <- suppressWarnings(createDataFrame(iris))
   stats <- summary(glm(Sepal_Width ~ Sepal_Length + Species, data = training))
@@ -351,8 +341,6 @@ test_that("glm summary", {
 })
 
 test_that("glm save/load", {
-  skip_on_cran()
-
   training <- suppressWarnings(createDataFrame(iris))
   m <- glm(Sepal_Width ~ Sepal_Length + Species, data = training)
   s <- summary(m)
@@ -401,7 +389,7 @@ test_that("spark.isoreg", {
   expect_equal(predict_result$prediction, c(7.0, 7.0, 6.0, 5.5, 5.0, 4.0, 1.0))
 
   # Test model save/load
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     modelPath <- tempfile(pattern = "spark-isoreg", fileext = ".tmp")
     write.ml(model, modelPath)
     expect_error(write.ml(model, modelPath))
@@ -452,7 +440,7 @@ test_that("spark.survreg", {
                2.390146, 2.891269, 2.891269), tolerance = 1e-4)
 
   # Test model save/load
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     modelPath <- tempfile(pattern = "spark-survreg", fileext = ".tmp")
     write.ml(model, modelPath)
     expect_error(write.ml(model, modelPath))
diff --git a/R/pkg/tests/fulltests/test_mllib_tree.R b/R/pkg/tests/fulltests/test_mllib_tree.R
index 923f535c34cd4..267aa80afdd26 100644
--- a/R/pkg/tests/fulltests/test_mllib_tree.R
+++ b/R/pkg/tests/fulltests/test_mllib_tree.R
@@ -28,8 +28,6 @@ absoluteSparkPath <- function(x) {
 }
 
 test_that("spark.gbt", {
-  skip_on_cran()
-
   # regression
   data <- suppressWarnings(createDataFrame(longley))
   model <- spark.gbt(data, Employed ~ ., "regression", maxDepth = 5, maxBins = 16, seed = 123)
@@ -46,7 +44,7 @@ test_that("spark.gbt", {
   expect_equal(stats$numFeatures, 6)
   expect_equal(length(stats$treeWeights), 20)
 
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     modelPath <- tempfile(pattern = "spark-gbtRegression", fileext = ".tmp")
     write.ml(model, modelPath)
     expect_error(write.ml(model, modelPath))
@@ -80,7 +78,7 @@ test_that("spark.gbt", {
   expect_equal(length(grep("setosa", predictions)), 50)
   expect_equal(length(grep("versicolor", predictions)), 50)
 
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     modelPath <- tempfile(pattern = "spark-gbtClassification", fileext = ".tmp")
     write.ml(model, modelPath)
     expect_error(write.ml(model, modelPath))
@@ -105,7 +103,7 @@ test_that("spark.gbt", {
   expect_equal(stats$maxDepth, 5)
 
   # spark.gbt classification can work on libsvm data
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     data <- read.df(absoluteSparkPath("data/mllib/sample_binary_classification_data.txt"),
                   source = "libsvm")
     model <- spark.gbt(data, label ~ features, "classification")
@@ -144,7 +142,7 @@ test_that("spark.randomForest", {
   expect_equal(stats$numTrees, 20)
   expect_equal(stats$maxDepth, 5)
 
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     modelPath <- tempfile(pattern = "spark-randomForestRegression", fileext = ".tmp")
     write.ml(model, modelPath)
     expect_error(write.ml(model, modelPath))
@@ -178,7 +176,7 @@ test_that("spark.randomForest", {
   expect_equal(length(grep("setosa", predictions)), 50)
   expect_equal(length(grep("versicolor", predictions)), 50)
 
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     modelPath <- tempfile(pattern = "spark-randomForestClassification", fileext = ".tmp")
     write.ml(model, modelPath)
     expect_error(write.ml(model, modelPath))
@@ -215,7 +213,7 @@ test_that("spark.randomForest", {
   expect_equal(length(grep("2.0", predictions)), 50)
 
   # spark.randomForest classification can work on libsvm data
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     data <- read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"),
                   source = "libsvm")
     model <- spark.randomForest(data, label ~ features, "classification")
diff --git a/R/pkg/tests/fulltests/test_parallelize_collect.R b/R/pkg/tests/fulltests/test_parallelize_collect.R
index 52d4c93ed9599..3d122ccaf448f 100644
--- a/R/pkg/tests/fulltests/test_parallelize_collect.R
+++ b/R/pkg/tests/fulltests/test_parallelize_collect.R
@@ -39,8 +39,6 @@ jsc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext",
 # Tests
 
 test_that("parallelize() on simple vectors and lists returns an RDD", {
-  skip_on_cran()
-
   numVectorRDD <- parallelize(jsc, numVector, 1)
   numVectorRDD2 <- parallelize(jsc, numVector, 10)
   numListRDD <- parallelize(jsc, numList, 1)
@@ -68,8 +66,6 @@ test_that("parallelize() on simple vectors and lists returns an RDD", {
 })
 
 test_that("collect(), following a parallelize(), gives back the original collections", {
-  skip_on_cran()
-
   numVectorRDD <- parallelize(jsc, numVector, 10)
   expect_equal(collectRDD(numVectorRDD), as.list(numVector))
 
@@ -90,8 +86,6 @@ test_that("collect(), following a parallelize(), gives back the original collect
 })
 
 test_that("regression: collect() following a parallelize() does not drop elements", {
-  skip_on_cran()
-
   # 10 %/% 6 = 1, ceiling(10 / 6) = 2
   collLen <- 10
   numPart <- 6
@@ -101,8 +95,6 @@ test_that("regression: collect() following a parallelize() does not drop element
 })
 
 test_that("parallelize() and collect() work for lists of pairs (pairwise data)", {
-  skip_on_cran()
-
   # use the pairwise logical to indicate pairwise data
   numPairsRDDD1 <- parallelize(jsc, numPairs, 1)
   numPairsRDDD2 <- parallelize(jsc, numPairs, 2)
diff --git a/R/pkg/tests/fulltests/test_rdd.R b/R/pkg/tests/fulltests/test_rdd.R
index fb244e1d49e20..6ee1fceffd822 100644
--- a/R/pkg/tests/fulltests/test_rdd.R
+++ b/R/pkg/tests/fulltests/test_rdd.R
@@ -29,30 +29,22 @@ intPairs <- list(list(1L, -1), list(2L, 100), list(2L, 1), list(1L, 200))
 intRdd <- parallelize(sc, intPairs, 2L)
 
 test_that("get number of partitions in RDD", {
-  skip_on_cran()
-
   expect_equal(getNumPartitionsRDD(rdd), 2)
   expect_equal(getNumPartitionsRDD(intRdd), 2)
 })
 
 test_that("first on RDD", {
-  skip_on_cran()
-
   expect_equal(firstRDD(rdd), 1)
   newrdd <- lapply(rdd, function(x) x + 1)
   expect_equal(firstRDD(newrdd), 2)
 })
 
 test_that("count and length on RDD", {
-  skip_on_cran()
-
   expect_equal(countRDD(rdd), 10)
   expect_equal(lengthRDD(rdd), 10)
 })
 
 test_that("count by values and keys", {
-  skip_on_cran()
-
   mods <- lapply(rdd, function(x) { x %% 3 })
   actual <- countByValue(mods)
   expected <- list(list(0, 3L), list(1, 4L), list(2, 3L))
@@ -64,40 +56,30 @@ test_that("count by values and keys", {
 })
 
 test_that("lapply on RDD", {
-  skip_on_cran()
-
   multiples <- lapply(rdd, function(x) { 2 * x })
   actual <- collectRDD(multiples)
   expect_equal(actual, as.list(nums * 2))
 })
 
 test_that("lapplyPartition on RDD", {
-  skip_on_cran()
-
   sums <- lapplyPartition(rdd, function(part) { sum(unlist(part)) })
   actual <- collectRDD(sums)
   expect_equal(actual, list(15, 40))
 })
 
 test_that("mapPartitions on RDD", {
-  skip_on_cran()
-
   sums <- mapPartitions(rdd, function(part) { sum(unlist(part)) })
   actual <- collectRDD(sums)
   expect_equal(actual, list(15, 40))
 })
 
 test_that("flatMap() on RDDs", {
-  skip_on_cran()
-
   flat <- flatMap(intRdd, function(x) { list(x, x) })
   actual <- collectRDD(flat)
   expect_equal(actual, rep(intPairs, each = 2))
 })
 
 test_that("filterRDD on RDD", {
-  skip_on_cran()
-
   filtered.rdd <- filterRDD(rdd, function(x) { x %% 2 == 0 })
   actual <- collectRDD(filtered.rdd)
   expect_equal(actual, list(2, 4, 6, 8, 10))
@@ -113,8 +95,6 @@ test_that("filterRDD on RDD", {
 })
 
 test_that("lookup on RDD", {
-  skip_on_cran()
-
   vals <- lookup(intRdd, 1L)
   expect_equal(vals, list(-1, 200))
 
@@ -123,8 +103,6 @@ test_that("lookup on RDD", {
 })
 
 test_that("several transformations on RDD (a benchmark on PipelinedRDD)", {
-  skip_on_cran()
-
   rdd2 <- rdd
   for (i in 1:12)
     rdd2 <- lapplyPartitionsWithIndex(
@@ -139,8 +117,6 @@ test_that("several transformations on RDD (a benchmark on PipelinedRDD)", {
 })
 
 test_that("PipelinedRDD support actions: cache(), persist(), unpersist(), checkpoint()", {
-  skip_on_cran()
-
   # RDD
   rdd2 <- rdd
   # PipelinedRDD
@@ -182,8 +158,6 @@ test_that("PipelinedRDD support actions: cache(), persist(), unpersist(), checkp
 })
 
 test_that("reduce on RDD", {
-  skip_on_cran()
-
   sum <- reduce(rdd, "+")
   expect_equal(sum, 55)
 
@@ -193,8 +167,6 @@ test_that("reduce on RDD", {
 })
 
 test_that("lapply with dependency", {
-  skip_on_cran()
-
   fa <- 5
   multiples <- lapply(rdd, function(x) { fa * x })
   actual <- collectRDD(multiples)
@@ -203,8 +175,6 @@ test_that("lapply with dependency", {
 })
 
 test_that("lapplyPartitionsWithIndex on RDDs", {
-  skip_on_cran()
-
   func <- function(partIndex, part) { list(partIndex, Reduce("+", part)) }
   actual <- collectRDD(lapplyPartitionsWithIndex(rdd, func), flatten = FALSE)
   expect_equal(actual, list(list(0, 15), list(1, 40)))
@@ -221,14 +191,10 @@ test_that("lapplyPartitionsWithIndex on RDDs", {
 })
 
 test_that("sampleRDD() on RDDs", {
-  skip_on_cran()
-
   expect_equal(unlist(collectRDD(sampleRDD(rdd, FALSE, 1.0, 2014L))), nums)
 })
 
 test_that("takeSample() on RDDs", {
-  skip_on_cran()
-
   # ported from RDDSuite.scala, modified seeds
   data <- parallelize(sc, 1:100, 2L)
   for (seed in 4:5) {
@@ -271,8 +237,6 @@ test_that("takeSample() on RDDs", {
 })
 
 test_that("mapValues() on pairwise RDDs", {
-  skip_on_cran()
-
   multiples <- mapValues(intRdd, function(x) { x * 2 })
   actual <- collectRDD(multiples)
   expected <- lapply(intPairs, function(x) {
@@ -282,8 +246,6 @@ test_that("mapValues() on pairwise RDDs", {
 })
 
 test_that("flatMapValues() on pairwise RDDs", {
-  skip_on_cran()
-
   l <- parallelize(sc, list(list(1, c(1, 2)), list(2, c(3, 4))))
   actual <- collectRDD(flatMapValues(l, function(x) { x }))
   expect_equal(actual, list(list(1, 1), list(1, 2), list(2, 3), list(2, 4)))
@@ -296,8 +258,6 @@ test_that("flatMapValues() on pairwise RDDs", {
 })
 
 test_that("reduceByKeyLocally() on PairwiseRDDs", {
-  skip_on_cran()
-
   pairs <- parallelize(sc, list(list(1, 2), list(1.1, 3), list(1, 4)), 2L)
   actual <- reduceByKeyLocally(pairs, "+")
   expect_equal(sortKeyValueList(actual),
@@ -311,8 +271,6 @@ test_that("reduceByKeyLocally() on PairwiseRDDs", {
 })
 
 test_that("distinct() on RDDs", {
-  skip_on_cran()
-
   nums.rep2 <- rep(1:10, 2)
   rdd.rep2 <- parallelize(sc, nums.rep2, 2L)
   uniques <- distinctRDD(rdd.rep2)
@@ -321,29 +279,21 @@ test_that("distinct() on RDDs", {
 })
 
 test_that("maximum() on RDDs", {
-  skip_on_cran()
-
   max <- maximum(rdd)
   expect_equal(max, 10)
 })
 
 test_that("minimum() on RDDs", {
-  skip_on_cran()
-
   min <- minimum(rdd)
   expect_equal(min, 1)
 })
 
 test_that("sumRDD() on RDDs", {
-  skip_on_cran()
-
   sum <- sumRDD(rdd)
   expect_equal(sum, 55)
 })
 
 test_that("keyBy on RDDs", {
-  skip_on_cran()
-
   func <- function(x) { x * x }
   keys <- keyBy(rdd, func)
   actual <- collectRDD(keys)
@@ -351,8 +301,6 @@ test_that("keyBy on RDDs", {
 })
 
 test_that("repartition/coalesce on RDDs", {
-  skip_on_cran()
-
   rdd <- parallelize(sc, 1:20, 4L) # each partition contains 5 elements
 
   # repartition
@@ -374,8 +322,6 @@ test_that("repartition/coalesce on RDDs", {
 })
 
 test_that("sortBy() on RDDs", {
-  skip_on_cran()
-
   sortedRdd <- sortBy(rdd, function(x) { x * x }, ascending = FALSE)
   actual <- collectRDD(sortedRdd)
   expect_equal(actual, as.list(sort(nums, decreasing = TRUE)))
@@ -387,8 +333,6 @@ test_that("sortBy() on RDDs", {
 })
 
 test_that("takeOrdered() on RDDs", {
-  skip_on_cran()
-
   l <- list(10, 1, 2, 9, 3, 4, 5, 6, 7)
   rdd <- parallelize(sc, l)
   actual <- takeOrdered(rdd, 6L)
@@ -401,8 +345,6 @@ test_that("takeOrdered() on RDDs", {
 })
 
 test_that("top() on RDDs", {
-  skip_on_cran()
-
   l <- list(10, 1, 2, 9, 3, 4, 5, 6, 7)
   rdd <- parallelize(sc, l)
   actual <- top(rdd, 6L)
@@ -415,8 +357,6 @@ test_that("top() on RDDs", {
 })
 
 test_that("fold() on RDDs", {
-  skip_on_cran()
-
   actual <- fold(rdd, 0, "+")
   expect_equal(actual, Reduce("+", nums, 0))
 
@@ -426,8 +366,6 @@ test_that("fold() on RDDs", {
 })
 
 test_that("aggregateRDD() on RDDs", {
-  skip_on_cran()
-
   rdd <- parallelize(sc, list(1, 2, 3, 4))
   zeroValue <- list(0, 0)
   seqOp <- function(x, y) { list(x[[1]] + y, x[[2]] + 1) }
@@ -441,8 +379,6 @@ test_that("aggregateRDD() on RDDs", {
 })
 
 test_that("zipWithUniqueId() on RDDs", {
-  skip_on_cran()
-
   rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 3L)
   actual <- collectRDD(zipWithUniqueId(rdd))
   expected <- list(list("a", 0), list("b", 1), list("c", 4),
@@ -457,8 +393,6 @@ test_that("zipWithUniqueId() on RDDs", {
 })
 
 test_that("zipWithIndex() on RDDs", {
-  skip_on_cran()
-
   rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 3L)
   actual <- collectRDD(zipWithIndex(rdd))
   expected <- list(list("a", 0), list("b", 1), list("c", 2),
@@ -473,32 +407,24 @@ test_that("zipWithIndex() on RDDs", {
 })
 
 test_that("glom() on RDD", {
-  skip_on_cran()
-
   rdd <- parallelize(sc, as.list(1:4), 2L)
   actual <- collectRDD(glom(rdd))
   expect_equal(actual, list(list(1, 2), list(3, 4)))
 })
 
 test_that("keys() on RDDs", {
-  skip_on_cran()
-
   keys <- keys(intRdd)
   actual <- collectRDD(keys)
   expect_equal(actual, lapply(intPairs, function(x) { x[[1]] }))
 })
 
 test_that("values() on RDDs", {
-  skip_on_cran()
-
   values <- values(intRdd)
   actual <- collectRDD(values)
   expect_equal(actual, lapply(intPairs, function(x) { x[[2]] }))
 })
 
 test_that("pipeRDD() on RDDs", {
-  skip_on_cran()
-
   actual <- collectRDD(pipeRDD(rdd, "more"))
   expected <- as.list(as.character(1:10))
   expect_equal(actual, expected)
@@ -516,8 +442,6 @@ test_that("pipeRDD() on RDDs", {
 })
 
 test_that("zipRDD() on RDDs", {
-  skip_on_cran()
-
   rdd1 <- parallelize(sc, 0:4, 2)
   rdd2 <- parallelize(sc, 1000:1004, 2)
   actual <- collectRDD(zipRDD(rdd1, rdd2))
@@ -547,8 +471,6 @@ test_that("zipRDD() on RDDs", {
 })
 
 test_that("cartesian() on RDDs", {
-  skip_on_cran()
-
   rdd <- parallelize(sc, 1:3)
   actual <- collectRDD(cartesian(rdd, rdd))
   expect_equal(sortKeyValueList(actual),
@@ -592,8 +514,6 @@ test_that("cartesian() on RDDs", {
 })
 
 test_that("subtract() on RDDs", {
-  skip_on_cran()
-
   l <- list(1, 1, 2, 2, 3, 4)
   rdd1 <- parallelize(sc, l)
 
@@ -621,8 +541,6 @@ test_that("subtract() on RDDs", {
 })
 
 test_that("subtractByKey() on pairwise RDDs", {
-  skip_on_cran()
-
   l <- list(list("a", 1), list("b", 4),
             list("b", 5), list("a", 2))
   rdd1 <- parallelize(sc, l)
@@ -652,8 +570,6 @@ test_that("subtractByKey() on pairwise RDDs", {
 })
 
 test_that("intersection() on RDDs", {
-  skip_on_cran()
-
   # intersection with self
   actual <- collectRDD(intersection(rdd, rdd))
   expect_equal(sort(as.integer(actual)), nums)
@@ -670,8 +586,6 @@ test_that("intersection() on RDDs", {
 })
 
 test_that("join() on pairwise RDDs", {
-  skip_on_cran()
-
   rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4)))
   rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3)))
   actual <- collectRDD(joinRDD(rdd1, rdd2, 2L))
@@ -696,8 +610,6 @@ test_that("join() on pairwise RDDs", {
 })
 
 test_that("leftOuterJoin() on pairwise RDDs", {
-  skip_on_cran()
-
   rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4)))
   rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3)))
   actual <- collectRDD(leftOuterJoin(rdd1, rdd2, 2L))
@@ -728,8 +640,6 @@ test_that("leftOuterJoin() on pairwise RDDs", {
 })
 
 test_that("rightOuterJoin() on pairwise RDDs", {
-  skip_on_cran()
-
   rdd1 <- parallelize(sc, list(list(1, 2), list(1, 3)))
   rdd2 <- parallelize(sc, list(list(1, 1), list(2, 4)))
   actual <- collectRDD(rightOuterJoin(rdd1, rdd2, 2L))
@@ -757,8 +667,6 @@ test_that("rightOuterJoin() on pairwise RDDs", {
 })
 
 test_that("fullOuterJoin() on pairwise RDDs", {
-  skip_on_cran()
-
   rdd1 <- parallelize(sc, list(list(1, 2), list(1, 3), list(3, 3)))
   rdd2 <- parallelize(sc, list(list(1, 1), list(2, 4)))
   actual <- collectRDD(fullOuterJoin(rdd1, rdd2, 2L))
@@ -790,8 +698,6 @@ test_that("fullOuterJoin() on pairwise RDDs", {
 })
 
 test_that("sortByKey() on pairwise RDDs", {
-  skip_on_cran()
-
   numPairsRdd <- map(rdd, function(x) { list (x, x) })
   sortedRdd <- sortByKey(numPairsRdd, ascending = FALSE)
   actual <- collectRDD(sortedRdd)
@@ -841,8 +747,6 @@ test_that("sortByKey() on pairwise RDDs", {
 })
 
 test_that("collectAsMap() on a pairwise RDD", {
-  skip_on_cran()
-
   rdd <- parallelize(sc, list(list(1, 2), list(3, 4)))
   vals <- collectAsMap(rdd)
   expect_equal(vals, list(`1` = 2, `3` = 4))
@@ -861,15 +765,11 @@ test_that("collectAsMap() on a pairwise RDD", {
 })
 
 test_that("show()", {
-  skip_on_cran()
-
   rdd <- parallelize(sc, list(1:10))
   expect_output(showRDD(rdd), "ParallelCollectionRDD\\[\\d+\\] at parallelize at RRDD\\.scala:\\d+")
 })
 
 test_that("sampleByKey() on pairwise RDDs", {
-  skip_on_cran()
-
   rdd <- parallelize(sc, 1:2000)
   pairsRDD <- lapply(rdd, function(x) { if (x %% 2 == 0) list("a", x) else list("b", x) })
   fractions <- list(a = 0.2, b = 0.1)
@@ -894,8 +794,6 @@ test_that("sampleByKey() on pairwise RDDs", {
 })
 
 test_that("Test correct concurrency of RRDD.compute()", {
-  skip_on_cran()
-
   rdd <- parallelize(sc, 1:1000, 100)
   jrdd <- getJRDD(lapply(rdd, function(x) { x }), "row")
   zrdd <- callJMethod(jrdd, "zip", jrdd)
diff --git a/R/pkg/tests/fulltests/test_shuffle.R b/R/pkg/tests/fulltests/test_shuffle.R
index 18320ea44b389..98300c67c415f 100644
--- a/R/pkg/tests/fulltests/test_shuffle.R
+++ b/R/pkg/tests/fulltests/test_shuffle.R
@@ -37,8 +37,6 @@ strList <- list("Dexter Morgan: Blood. Sometimes it sets my teeth on edge and ",
 strListRDD <- parallelize(sc, strList, 4)
 
 test_that("groupByKey for integers", {
-  skip_on_cran()
-
   grouped <- groupByKey(intRdd, 2L)
 
   actual <- collectRDD(grouped)
@@ -48,8 +46,6 @@ test_that("groupByKey for integers", {
 })
 
 test_that("groupByKey for doubles", {
-  skip_on_cran()
-
   grouped <- groupByKey(doubleRdd, 2L)
 
   actual <- collectRDD(grouped)
@@ -59,8 +55,6 @@ test_that("groupByKey for doubles", {
 })
 
 test_that("reduceByKey for ints", {
-  skip_on_cran()
-
   reduced <- reduceByKey(intRdd, "+", 2L)
 
   actual <- collectRDD(reduced)
@@ -70,8 +64,6 @@ test_that("reduceByKey for ints", {
 })
 
 test_that("reduceByKey for doubles", {
-  skip_on_cran()
-
   reduced <- reduceByKey(doubleRdd, "+", 2L)
   actual <- collectRDD(reduced)
 
@@ -80,8 +72,6 @@ test_that("reduceByKey for doubles", {
 })
 
 test_that("combineByKey for ints", {
-  skip_on_cran()
-
   reduced <- combineByKey(intRdd, function(x) { x }, "+", "+", 2L)
 
   actual <- collectRDD(reduced)
@@ -91,8 +81,6 @@ test_that("combineByKey for ints", {
 })
 
 test_that("combineByKey for doubles", {
-  skip_on_cran()
-
   reduced <- combineByKey(doubleRdd, function(x) { x }, "+", "+", 2L)
   actual <- collectRDD(reduced)
 
@@ -101,8 +89,6 @@ test_that("combineByKey for doubles", {
 })
 
 test_that("combineByKey for characters", {
-  skip_on_cran()
-
   stringKeyRDD <- parallelize(sc,
                               list(list("max", 1L), list("min", 2L),
                                    list("other", 3L), list("max", 4L)), 2L)
@@ -115,8 +101,6 @@ test_that("combineByKey for characters", {
 })
 
 test_that("aggregateByKey", {
-  skip_on_cran()
-
   # test aggregateByKey for int keys
   rdd <- parallelize(sc, list(list(1, 1), list(1, 2), list(2, 3), list(2, 4)))
 
@@ -145,8 +129,6 @@ test_that("aggregateByKey", {
 })
 
 test_that("foldByKey", {
-  skip_on_cran()
-
   # test foldByKey for int keys
   folded <- foldByKey(intRdd, 0, "+", 2L)
 
@@ -190,8 +172,6 @@ test_that("foldByKey", {
 })
 
 test_that("partitionBy() partitions data correctly", {
-  skip_on_cran()
-
   # Partition by magnitude
   partitionByMagnitude <- function(key) { if (key >= 3) 1 else 0 }
 
@@ -207,8 +187,6 @@ test_that("partitionBy() partitions data correctly", {
 })
 
 test_that("partitionBy works with dependencies", {
-  skip_on_cran()
-
   kOne <- 1
   partitionByParity <- function(key) { if (key %% 2 == kOne) 7 else 4 }
 
@@ -227,8 +205,6 @@ test_that("partitionBy works with dependencies", {
 })
 
 test_that("test partitionBy with string keys", {
-  skip_on_cran()
-
   words <- flatMap(strListRDD, function(line) { strsplit(line, " ")[[1]] })
   wordCount <- lapply(words, function(word) { list(word, 1L) })
 
diff --git a/R/pkg/tests/fulltests/test_sparkR.R b/R/pkg/tests/fulltests/test_sparkR.R
index a40981c188f7a..f73fc6baeccef 100644
--- a/R/pkg/tests/fulltests/test_sparkR.R
+++ b/R/pkg/tests/fulltests/test_sparkR.R
@@ -18,8 +18,6 @@
 context("functions in sparkR.R")
 
 test_that("sparkCheckInstall", {
-  skip_on_cran()
-
   # "local, yarn-client, mesos-client" mode, SPARK_HOME was set correctly,
   # and the SparkR job was submitted by "spark-submit"
   sparkHome <- paste0(tempdir(), "/", "sparkHome")
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index d2d51915d72d0..fc69b4dd24021 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -61,7 +61,7 @@ unsetHiveContext <- function() {
 # Tests for SparkSQL functions in SparkR
 
 filesBefore <- list.files(path = sparkRDir, all.files = TRUE)
-sparkSession <- if (not_cran_or_windows_with_hadoop()) {
+sparkSession <- if (windows_with_hadoop()) {
     sparkR.session(master = sparkRTestMaster)
   } else {
     sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
@@ -100,26 +100,20 @@ mockLinesMapType <- c("{\"name\":\"Bob\",\"info\":{\"age\":16,\"height\":176.5}}
 mapTypeJsonPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp")
 writeLines(mockLinesMapType, mapTypeJsonPath)
 
-if (.Platform$OS.type == "windows") {
+if (is_windows()) {
   Sys.setenv(TZ = "GMT")
 }
 
 test_that("calling sparkRSQL.init returns existing SQL context", {
-  skip_on_cran()
-
   sqlContext <- suppressWarnings(sparkRSQL.init(sc))
   expect_equal(suppressWarnings(sparkRSQL.init(sc)), sqlContext)
 })
 
 test_that("calling sparkRSQL.init returns existing SparkSession", {
-  skip_on_cran()
-
   expect_equal(suppressWarnings(sparkRSQL.init(sc)), sparkSession)
 })
 
 test_that("calling sparkR.session returns existing SparkSession", {
-  skip_on_cran()
-
   expect_equal(sparkR.session(), sparkSession)
 })
 
@@ -208,8 +202,6 @@ test_that("structField type strings", {
 })
 
 test_that("create DataFrame from RDD", {
-  skip_on_cran()
-
   rdd <- lapply(parallelize(sc, 1:10), function(x) { list(x, as.character(x)) })
   df <- createDataFrame(rdd, list("a", "b"))
   dfAsDF <- as.DataFrame(rdd, list("a", "b"))
@@ -307,8 +299,6 @@ test_that("create DataFrame from RDD", {
 })
 
 test_that("createDataFrame uses files for large objects", {
-  skip_on_cran()
-
   # To simulate a large file scenario, we set spark.r.maxAllocationLimit to a smaller value
   conf <- callJMethod(sparkSession, "conf")
   callJMethod(conf, "set", "spark.r.maxAllocationLimit", "100")
@@ -321,7 +311,7 @@ test_that("createDataFrame uses files for large objects", {
 })
 
 test_that("read/write csv as DataFrame", {
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     csvPath <- tempfile(pattern = "sparkr-test", fileext = ".csv")
     mockLinesCsv <- c("year,make,model,comment,blank",
                      "\"2012\",\"Tesla\",\"S\",\"No comment\",",
@@ -371,8 +361,6 @@ test_that("read/write csv as DataFrame", {
 })
 
 test_that("Support other types for options", {
-  skip_on_cran()
-
   csvPath <- tempfile(pattern = "sparkr-test", fileext = ".csv")
   mockLinesCsv <- c("year,make,model,comment,blank",
   "\"2012\",\"Tesla\",\"S\",\"No comment\",",
@@ -427,8 +415,6 @@ test_that("convert NAs to null type in DataFrames", {
 })
 
 test_that("toDF", {
-  skip_on_cran()
-
   rdd <- lapply(parallelize(sc, 1:10), function(x) { list(x, as.character(x)) })
   df <- toDF(rdd, list("a", "b"))
   expect_is(df, "SparkDataFrame")
@@ -540,8 +526,6 @@ test_that("create DataFrame with complex types", {
 })
 
 test_that("create DataFrame from a data.frame with complex types", {
-  skip_on_cran()
-
   ldf <- data.frame(row.names = 1:2)
   ldf$a_list <- list(list(1, 2), list(3, 4))
   ldf$an_envir <- c(as.environment(list(a = 1, b = 2)), as.environment(list(c = 3)))
@@ -554,8 +538,6 @@ test_that("create DataFrame from a data.frame with complex types", {
 })
 
 test_that("Collect DataFrame with complex types", {
-  skip_on_cran()
-
   # ArrayType
   df <- read.json(complexTypeJsonPath)
   ldf <- collect(df)
@@ -598,7 +580,7 @@ test_that("Collect DataFrame with complex types", {
 })
 
 test_that("read/write json files", {
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     # Test read.df
     df <- read.df(jsonPath, "json")
     expect_is(df, "SparkDataFrame")
@@ -645,8 +627,6 @@ test_that("read/write json files", {
 })
 
 test_that("read/write json files - compression option", {
-  skip_on_cran()
-
   df <- read.df(jsonPath, "json")
 
   jsonPath <- tempfile(pattern = "jsonPath", fileext = ".json")
@@ -660,8 +640,6 @@ test_that("read/write json files - compression option", {
 })
 
 test_that("jsonRDD() on a RDD with json string", {
-  skip_on_cran()
-
   sqlContext <- suppressWarnings(sparkRSQL.init(sc))
   rdd <- parallelize(sc, mockLines)
   expect_equal(countRDD(rdd), 3)
@@ -721,8 +699,6 @@ test_that(
 })
 
 test_that("test cache, uncache and clearCache", {
-  skip_on_cran()
-
   df <- read.json(jsonPath)
   createOrReplaceTempView(df, "table1")
   cacheTable("table1")
@@ -735,7 +711,7 @@ test_that("test cache, uncache and clearCache", {
 })
 
 test_that("insertInto() on a registered table", {
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     df <- read.df(jsonPath, "json")
     write.df(df, parquetPath, "parquet", "overwrite")
     dfParquet <- read.df(parquetPath, "parquet")
@@ -778,8 +754,6 @@ test_that("tableToDF() returns a new DataFrame", {
 })
 
 test_that("toRDD() returns an RRDD", {
-  skip_on_cran()
-
   df <- read.json(jsonPath)
   testRDD <- toRDD(df)
   expect_is(testRDD, "RDD")
@@ -787,8 +761,6 @@ test_that("toRDD() returns an RRDD", {
 })
 
 test_that("union on two RDDs created from DataFrames returns an RRDD", {
-  skip_on_cran()
-
   df <- read.json(jsonPath)
   RDD1 <- toRDD(df)
   RDD2 <- toRDD(df)
@@ -799,8 +771,6 @@ test_that("union on two RDDs created from DataFrames returns an RRDD", {
 })
 
 test_that("union on mixed serialization types correctly returns a byte RRDD", {
-  skip_on_cran()
-
   # Byte RDD
   nums <- 1:10
   rdd <- parallelize(sc, nums, 2L)
@@ -830,8 +800,6 @@ test_that("union on mixed serialization types correctly returns a byte RRDD", {
 })
 
 test_that("objectFile() works with row serialization", {
-  skip_on_cran()
-
   objectPath <- tempfile(pattern = "spark-test", fileext = ".tmp")
   df <- read.json(jsonPath)
   dfRDD <- toRDD(df)
@@ -844,8 +812,6 @@ test_that("objectFile() works with row serialization", {
 })
 
 test_that("lapply() on a DataFrame returns an RDD with the correct columns", {
-  skip_on_cran()
-
   df <- read.json(jsonPath)
   testRDD <- lapply(df, function(row) {
     row$newCol <- row$age + 5
@@ -914,8 +880,6 @@ test_that("collect() support Unicode characters", {
 })
 
 test_that("multiple pipeline transformations result in an RDD with the correct values", {
-  skip_on_cran()
-
   df <- read.json(jsonPath)
   first <- lapply(df, function(row) {
     row$age <- row$age + 5
@@ -955,7 +919,7 @@ test_that("cache(), storageLevel(), persist(), and unpersist() on a DataFrame",
 })
 
 test_that("setCheckpointDir(), checkpoint() on a DataFrame", {
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     checkpointDir <- file.path(tempdir(), "cproot")
     expect_true(length(list.files(path = checkpointDir, all.files = TRUE)) == 0)
 
@@ -1322,7 +1286,7 @@ test_that("column calculation", {
 })
 
 test_that("test HiveContext", {
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     setHiveContext(sc)
 
     schema <- structType(structField("name", "string"), structField("age", "integer"),
@@ -1374,8 +1338,6 @@ test_that("column operators", {
 })
 
 test_that("column functions", {
-  skip_on_cran()
-
   c <- column("a")
   c1 <- abs(c) + acos(c) + approxCountDistinct(c) + ascii(c) + asin(c) + atan(c)
   c2 <- avg(c) + base64(c) + bin(c) + bitwiseNOT(c) + cbrt(c) + ceil(c) + cos(c)
@@ -1697,8 +1659,6 @@ test_that("when(), otherwise() and ifelse() with column on a DataFrame", {
 })
 
 test_that("group by, agg functions", {
-  skip_on_cran()
-
   df <- read.json(jsonPath)
   df1 <- agg(df, name = "max", age = "sum")
   expect_equal(1, count(df1))
@@ -1854,8 +1814,6 @@ test_that("filter() on a DataFrame", {
 })
 
 test_that("join(), crossJoin() and merge() on a DataFrame", {
-  skip_on_cran()
-
   df <- read.json(jsonPath)
 
   mockLines2 <- c("{\"name\":\"Michael\", \"test\": \"yes\"}",
@@ -2124,8 +2082,6 @@ test_that("mutate(), transform(), rename() and names()", {
 })
 
 test_that("read/write ORC files", {
-  skip_on_cran()
-
   setHiveContext(sc)
   df <- read.df(jsonPath, "json")
 
@@ -2147,8 +2103,6 @@ test_that("read/write ORC files", {
 })
 
 test_that("read/write ORC files - compression option", {
-  skip_on_cran()
-
   setHiveContext(sc)
   df <- read.df(jsonPath, "json")
 
@@ -2164,7 +2118,7 @@ test_that("read/write ORC files - compression option", {
 })
 
 test_that("read/write Parquet files", {
-  if (not_cran_or_windows_with_hadoop()) {
+  if (windows_with_hadoop()) {
     df <- read.df(jsonPath, "json")
     # Test write.df and read.df
     write.df(df, parquetPath, "parquet", mode = "overwrite")
@@ -2197,8 +2151,6 @@ test_that("read/write Parquet files", {
 })
 
 test_that("read/write Parquet files - compression option/mode", {
-  skip_on_cran()
-
   df <- read.df(jsonPath, "json")
   tempPath <- tempfile(pattern = "tempPath", fileext = ".parquet")
 
@@ -2216,8 +2168,6 @@ test_that("read/write Parquet files - compression option/mode", {
 })
 
 test_that("read/write text files", {
-  skip_on_cran()
-
   # Test write.df and read.df
   df <- read.df(jsonPath, "text")
   expect_is(df, "SparkDataFrame")
@@ -2239,8 +2189,6 @@ test_that("read/write text files", {
 })
 
 test_that("read/write text files - compression option", {
-  skip_on_cran()
-
   df <- read.df(jsonPath, "text")
 
   textPath <- tempfile(pattern = "textPath", fileext = ".txt")
@@ -2474,8 +2422,6 @@ test_that("approxQuantile() on a DataFrame", {
 })
 
 test_that("SQL error message is returned from JVM", {
-  skip_on_cran()
-
   retError <- tryCatch(sql("select * from blah"), error = function(e) e)
   expect_equal(grepl("Table or view not found", retError), TRUE)
   expect_equal(grepl("blah", retError), TRUE)
@@ -2484,8 +2430,6 @@ test_that("SQL error message is returned from JVM", {
 irisDF <- suppressWarnings(createDataFrame(iris))
 
 test_that("Method as.data.frame as a synonym for collect()", {
-  skip_on_cran()
-
   expect_equal(as.data.frame(irisDF), collect(irisDF))
   irisDF2 <- irisDF[irisDF$Species == "setosa", ]
   expect_equal(as.data.frame(irisDF2), collect(irisDF2))
@@ -2708,8 +2652,6 @@ test_that("dapply() and dapplyCollect() on a DataFrame", {
 })
 
 test_that("dapplyCollect() on DataFrame with a binary column", {
-  skip_on_cran()
-
   df <- data.frame(key = 1:3)
   df$bytes <- lapply(df$key, serialize, connection = NULL)
 
@@ -2730,8 +2672,6 @@ test_that("dapplyCollect() on DataFrame with a binary column", {
 })
 
 test_that("repartition by columns on DataFrame", {
-  skip_on_cran()
-
   df <- createDataFrame(
     list(list(1L, 1, "1", 0.1), list(1L, 2, "2", 0.2), list(3L, 3, "3", 0.3)),
     c("a", "b", "c", "d"))
@@ -2770,8 +2710,6 @@ test_that("repartition by columns on DataFrame", {
 })
 
 test_that("coalesce, repartition, numPartitions", {
-  skip_on_cran()
-
   df <- as.DataFrame(cars, numPartitions = 5)
   expect_equal(getNumPartitions(df), 5)
   expect_equal(getNumPartitions(coalesce(df, 3)), 3)
@@ -2791,8 +2729,6 @@ test_that("coalesce, repartition, numPartitions", {
 })
 
 test_that("gapply() and gapplyCollect() on a DataFrame", {
-  skip_on_cran()
-
   df <- createDataFrame (
     list(list(1L, 1, "1", 0.1), list(1L, 2, "1", 0.2), list(3L, 3, "3", 0.3)),
     c("a", "b", "c", "d"))
@@ -2910,8 +2846,6 @@ test_that("Window functions on a DataFrame", {
 })
 
 test_that("createDataFrame sqlContext parameter backward compatibility", {
-  skip_on_cran()
-
   sqlContext <- suppressWarnings(sparkRSQL.init(sc))
   a <- 1:3
   b <- c("a", "b", "c")
@@ -2945,8 +2879,6 @@ test_that("createDataFrame sqlContext parameter backward compatibility", {
 })
 
 test_that("randomSplit", {
-  skip_on_cran()
-
   num <- 4000
   df <- createDataFrame(data.frame(id = 1:num))
   weights <- c(2, 3, 5)
@@ -2993,8 +2925,6 @@ test_that("Setting and getting config on SparkSession, sparkR.conf(), sparkR.uiW
 })
 
 test_that("enableHiveSupport on SparkSession", {
-  skip_on_cran()
-
   setHiveContext(sc)
   unsetHiveContext()
   # if we are still here, it must be built with hive
@@ -3010,8 +2940,6 @@ test_that("Spark version from SparkSession", {
 })
 
 test_that("Call DataFrameWriter.save() API in Java without path and check argument types", {
-  skip_on_cran()
-
   df <- read.df(jsonPath, "json")
   # This tests if the exception is thrown from JVM not from SparkR side.
   # It makes sure that we can omit path argument in write.df API and then it calls
@@ -3038,8 +2966,6 @@ test_that("Call DataFrameWriter.save() API in Java without path and check argume
 })
 
 test_that("Call DataFrameWriter.load() API in Java without path and check argument types", {
-  skip_on_cran()
-
   # This tests if the exception is thrown from JVM not from SparkR side.
   # It makes sure that we can omit path argument in read.df API and then it calls
   # DataFrameWriter.load() without path.
@@ -3164,8 +3090,6 @@ compare_list <- function(list1, list2) {
 
 # This should always be the **very last test** in this test file.
 test_that("No extra files are created in SPARK_HOME by starting session and making calls", {
-  skip_on_cran() # skip because when run from R CMD check SPARK_HOME is not the current directory
-
   # Check that it is not creating any extra file.
   # Does not check the tempdir which would be cleaned up after.
   filesAfter <- list.files(path = sparkRDir, all.files = TRUE)
diff --git a/R/pkg/tests/fulltests/test_streaming.R b/R/pkg/tests/fulltests/test_streaming.R
index b20b4312fbaae..d691de7cd725d 100644
--- a/R/pkg/tests/fulltests/test_streaming.R
+++ b/R/pkg/tests/fulltests/test_streaming.R
@@ -24,7 +24,7 @@ context("Structured Streaming")
 sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
 
 jsonSubDir <- file.path("sparkr-test", "json", "")
-if (.Platform$OS.type == "windows") {
+if (is_windows()) {
   # file.path removes the empty separator on Windows, adds it back
   jsonSubDir <- paste0(jsonSubDir, .Platform$file.sep)
 }
@@ -47,8 +47,6 @@ schema <- structType(structField("name", "string"),
                      structField("count", "double"))
 
 test_that("read.stream, write.stream, awaitTermination, stopQuery", {
-  skip_on_cran()
-
   df <- read.stream("json", path = jsonDir, schema = schema, maxFilesPerTrigger = 1)
   expect_true(isStreaming(df))
   counts <- count(group_by(df, "name"))
@@ -69,8 +67,6 @@ test_that("read.stream, write.stream, awaitTermination, stopQuery", {
 })
 
 test_that("print from explain, lastProgress, status, isActive", {
-  skip_on_cran()
-
   df <- read.stream("json", path = jsonDir, schema = schema)
   expect_true(isStreaming(df))
   counts <- count(group_by(df, "name"))
@@ -90,8 +86,6 @@ test_that("print from explain, lastProgress, status, isActive", {
 })
 
 test_that("Stream other format", {
-  skip_on_cran()
-
   parquetPath <- tempfile(pattern = "sparkr-test", fileext = ".parquet")
   df <- read.df(jsonPath, "json", schema)
   write.df(df, parquetPath, "parquet", "overwrite")
@@ -118,8 +112,6 @@ test_that("Stream other format", {
 })
 
 test_that("Non-streaming DataFrame", {
-  skip_on_cran()
-
   c <- as.DataFrame(cars)
   expect_false(isStreaming(c))
 
@@ -129,8 +121,6 @@ test_that("Non-streaming DataFrame", {
 })
 
 test_that("Unsupported operation", {
-  skip_on_cran()
-
   # memory sink without aggregation
   df <- read.stream("json", path = jsonDir, schema = schema, maxFilesPerTrigger = 1)
   expect_error(write.stream(df, "memory", queryName = "people", outputMode = "complete"),
@@ -139,8 +129,6 @@ test_that("Unsupported operation", {
 })
 
 test_that("Terminated by error", {
-  skip_on_cran()
-
   df <- read.stream("json", path = jsonDir, schema = schema, maxFilesPerTrigger = -1)
   counts <- count(group_by(df, "name"))
   # This would not fail before returning with a StreamingQuery,
diff --git a/R/pkg/tests/fulltests/test_take.R b/R/pkg/tests/fulltests/test_take.R
index c00723ba31f4c..8936cc57da227 100644
--- a/R/pkg/tests/fulltests/test_take.R
+++ b/R/pkg/tests/fulltests/test_take.R
@@ -34,8 +34,6 @@ sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FA
 sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession)
 
 test_that("take() gives back the original elements in correct count and order", {
-  skip_on_cran()
-
   numVectorRDD <- parallelize(sc, numVector, 10)
   # case: number of elements to take is less than the size of the first partition
   expect_equal(takeRDD(numVectorRDD, 1), as.list(head(numVector, n = 1)))
diff --git a/R/pkg/tests/fulltests/test_textFile.R b/R/pkg/tests/fulltests/test_textFile.R
index e8a961cb3e870..be2d2711ff88e 100644
--- a/R/pkg/tests/fulltests/test_textFile.R
+++ b/R/pkg/tests/fulltests/test_textFile.R
@@ -24,8 +24,6 @@ sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext",
 mockFile <- c("Spark is pretty.", "Spark is awesome.")
 
 test_that("textFile() on a local file returns an RDD", {
-  skip_on_cran()
-
   fileName <- tempfile(pattern = "spark-test", fileext = ".tmp")
   writeLines(mockFile, fileName)
 
@@ -38,8 +36,6 @@ test_that("textFile() on a local file returns an RDD", {
 })
 
 test_that("textFile() followed by a collect() returns the same content", {
-  skip_on_cran()
-
   fileName <- tempfile(pattern = "spark-test", fileext = ".tmp")
   writeLines(mockFile, fileName)
 
@@ -50,8 +46,6 @@ test_that("textFile() followed by a collect() returns the same content", {
 })
 
 test_that("textFile() word count works as expected", {
-  skip_on_cran()
-
   fileName <- tempfile(pattern = "spark-test", fileext = ".tmp")
   writeLines(mockFile, fileName)
 
@@ -70,8 +64,6 @@ test_that("textFile() word count works as expected", {
 })
 
 test_that("several transformations on RDD created by textFile()", {
-  skip_on_cran()
-
   fileName <- tempfile(pattern = "spark-test", fileext = ".tmp")
   writeLines(mockFile, fileName)
 
@@ -86,8 +78,6 @@ test_that("several transformations on RDD created by textFile()", {
 })
 
 test_that("textFile() followed by a saveAsTextFile() returns the same content", {
-  skip_on_cran()
-
   fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   writeLines(mockFile, fileName1)
@@ -102,8 +92,6 @@ test_that("textFile() followed by a saveAsTextFile() returns the same content",
 })
 
 test_that("saveAsTextFile() on a parallelized list works as expected", {
-  skip_on_cran()
-
   fileName <- tempfile(pattern = "spark-test", fileext = ".tmp")
   l <- list(1, 2, 3)
   rdd <- parallelize(sc, l, 1L)
@@ -115,8 +103,6 @@ test_that("saveAsTextFile() on a parallelized list works as expected", {
 })
 
 test_that("textFile() and saveAsTextFile() word count works as expected", {
-  skip_on_cran()
-
   fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   writeLines(mockFile, fileName1)
@@ -142,8 +128,6 @@ test_that("textFile() and saveAsTextFile() word count works as expected", {
 })
 
 test_that("textFile() on multiple paths", {
-  skip_on_cran()
-
   fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   writeLines("Spark is pretty.", fileName1)
@@ -157,8 +141,6 @@ test_that("textFile() on multiple paths", {
 })
 
 test_that("Pipelined operations on RDDs created using textFile", {
-  skip_on_cran()
-
   fileName <- tempfile(pattern = "spark-test", fileext = ".tmp")
   writeLines(mockFile, fileName)
 
diff --git a/R/pkg/tests/fulltests/test_utils.R b/R/pkg/tests/fulltests/test_utils.R
index 01614716afa36..8cfdc9550d6a8 100644
--- a/R/pkg/tests/fulltests/test_utils.R
+++ b/R/pkg/tests/fulltests/test_utils.R
@@ -23,7 +23,6 @@ sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext",
 
 test_that("convertJListToRList() gives back (deserializes) the original JLists
           of strings and integers", {
-  skip_on_cran()
   # It's hard to manually create a Java List using rJava, since it does not
   # support generics well. Instead, we rely on collectRDD() returning a
   # JList.
@@ -41,7 +40,6 @@ test_that("convertJListToRList() gives back (deserializes) the original JLists
 })
 
 test_that("serializeToBytes on RDD", {
-  skip_on_cran()
   # File content
   mockFile <- c("Spark is pretty.", "Spark is awesome.")
   fileName <- tempfile(pattern = "spark-test", fileext = ".tmp")
@@ -169,7 +167,6 @@ test_that("convertToJSaveMode", {
 })
 
 test_that("captureJVMException", {
-  skip_on_cran()
   method <- "getSQLDataType"
   expect_error(tryCatch(callJStatic("org.apache.spark.sql.api.r.SQLUtils", method,
                                     "unknown"),
@@ -180,8 +177,6 @@ test_that("captureJVMException", {
 })
 
 test_that("hashCode", {
-  skip_on_cran()
-
   expect_error(hashCode("bc53d3605e8a5b7de1e8e271c2317645"), NA)
 })
 
@@ -242,6 +237,3 @@ test_that("basenameSansExtFromUrl", {
 })
 
 sparkR.session.stop()
-
-message("--- End test (utils) ", as.POSIXct(Sys.time(), tz = "GMT"))
-message("elapsed ", (proc.time() - timer_ptm)[3])
diff --git a/R/pkg/tests/run-all.R b/R/pkg/tests/run-all.R
index d48e36c880c13..f00a610679752 100644
--- a/R/pkg/tests/run-all.R
+++ b/R/pkg/tests/run-all.R
@@ -24,8 +24,6 @@ options("warn" = 2)
 if (.Platform$OS.type == "windows") {
   Sys.setenv(TZ = "GMT")
 }
-message("--- Start test ", as.POSIXct(Sys.time(), tz = "GMT"))
-timer_ptm <- proc.time()
 
 # Setup global test environment
 # Install Spark first to set SPARK_HOME

From a4d78e4b34170ab614fc5da130454a1080dcb8cc Mon Sep 17 00:00:00 2001
From: Ziyue Huang <zyhuang94@gmail.com>
Date: Mon, 12 Jun 2017 10:59:33 +0100
Subject: [PATCH 0980/1204] [DOCS] Fix error: ambiguous reference to overloaded
 definition

## What changes were proposed in this pull request?

`df.groupBy.count()` should be `df.groupBy().count()` , otherwise there is an error :

ambiguous reference to overloaded definition, both method groupBy in class Dataset of type (col1: String, cols: String*) and method groupBy in class Dataset of type (cols: org.apache.spark.sql.Column*)

## How was this patch tested?

```scala
val df = spark.readStream.schema(...).json(...)
val dfCounts = df.groupBy().count()
```

Author: Ziyue Huang <zyhuang94@gmail.com>

Closes #18272 from ZiyueHuang/master.

(cherry picked from commit e6eb02df1540764ef2a4f0edb45c48df8de18c13)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/structured-streaming-programming-guide.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index 6a25c9939c264..9b9177d44145f 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -1056,7 +1056,7 @@ Some of them are as follows.
 
 In addition, there are some Dataset methods that will not work on streaming Datasets. They are actions that will immediately run queries and return results, which does not make sense on a streaming Dataset. Rather, those functionalities can be done by explicitly starting a streaming query (see the next section regarding that).
 
-- `count()` - Cannot return a single count from a streaming Dataset. Instead, use `ds.groupBy.count()` which returns a streaming Dataset containing a running count. 
+- `count()` - Cannot return a single count from a streaming Dataset. Instead, use `ds.groupBy().count()` which returns a streaming Dataset containing a running count. 
 
 - `foreach()` - Instead use `ds.writeStream.foreach(...)` (see next section).
 

From e6773944fd38fb28fd2daf89a705b20ac6bef2e5 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 12 Jun 2017 20:58:27 +0800
Subject: [PATCH 0981/1204] [SPARK-21041][SQL] SparkSession.range should be
 consistent with SparkContext.range

## What changes were proposed in this pull request?

This PR fixes the inconsistency in `SparkSession.range`.

**BEFORE**
```scala
scala> spark.range(java.lang.Long.MAX_VALUE - 3, java.lang.Long.MIN_VALUE + 2, 1).collect
res2: Array[Long] = Array(9223372036854775804, 9223372036854775805, 9223372036854775806)
```

**AFTER**
```scala
scala> spark.range(java.lang.Long.MAX_VALUE - 3, java.lang.Long.MIN_VALUE + 2, 1).collect
res2: Array[Long] = Array()
```

## How was this patch tested?

Pass the Jenkins with newly added test cases.

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #18257 from dongjoon-hyun/SPARK-21041.

(cherry picked from commit a92e095e705155ea10c8311f7856b964d654626a)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/basicPhysicalOperators.scala  | 10 +++++++---
 .../org/apache/spark/sql/DataFrameRangeSuite.scala    | 11 +++++++++++
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index f69a688555bbf..04c130314388a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -21,7 +21,7 @@ import scala.concurrent.{ExecutionContext, Future}
 import scala.concurrent.duration.Duration
 
 import org.apache.spark.{InterruptibleIterator, SparkException, TaskContext}
-import org.apache.spark.rdd.{PartitionwiseSampledRDD, RDD}
+import org.apache.spark.rdd.{EmptyRDD, PartitionwiseSampledRDD, RDD}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode, ExpressionCanonicalizer}
@@ -347,8 +347,12 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range)
   }
 
   override def inputRDDs(): Seq[RDD[InternalRow]] = {
-    sqlContext.sparkContext.parallelize(0 until numSlices, numSlices)
-      .map(i => InternalRow(i)) :: Nil
+    val rdd = if (start == end || (start < end ^ 0 < step)) {
+      new EmptyRDD[InternalRow](sqlContext.sparkContext)
+    } else {
+      sqlContext.sparkContext.parallelize(0 until numSlices, numSlices).map(i => InternalRow(i))
+    }
+    rdd :: Nil
   }
 
   protected override def doProduce(ctx: CodegenContext): String = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameRangeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameRangeSuite.scala
index 7b495656b93d7..45afbd29d1907 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameRangeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameRangeSuite.scala
@@ -191,6 +191,17 @@ class DataFrameRangeSuite extends QueryTest with SharedSQLContext with Eventuall
       checkAnswer(sql("SELECT * FROM range(3)"), Row(0) :: Row(1) :: Row(2) :: Nil)
     }
   }
+
+  test("SPARK-21041 SparkSession.range()'s behavior is inconsistent with SparkContext.range()") {
+    val start = java.lang.Long.MAX_VALUE - 3
+    val end = java.lang.Long.MIN_VALUE + 2
+    Seq("false", "true").foreach { value =>
+      withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> value) {
+        assert(spark.range(start, end, 1).collect.length == 0)
+        assert(spark.range(start, start, 1).collect.length == 0)
+      }
+    }
+  }
 }
 
 object DataFrameRangeSuite {

From 92f7c8f523d941ddd3c00cdf31af13f239e319d9 Mon Sep 17 00:00:00 2001
From: aokolnychyi <anton.okolnychyi@sap.com>
Date: Mon, 12 Jun 2017 13:06:14 -0700
Subject: [PATCH 0982/1204] [SPARK-17914][SQL] Fix parsing of timestamp strings
 with nanoseconds

The PR contains a tiny change to fix the way Spark parses string literals into timestamps. Currently, some timestamps that contain nanoseconds are corrupted during the conversion from internal UTF8Strings into the internal representation of timestamps.

Consider the following example:
```
spark.sql("SELECT cast('2015-01-02 00:00:00.000000001' as TIMESTAMP)").show(false)
+------------------------------------------------+
|CAST(2015-01-02 00:00:00.000000001 AS TIMESTAMP)|
+------------------------------------------------+
|2015-01-02 00:00:00.000001                      |
+------------------------------------------------+
```

The fix was tested with existing tests. Also, there is a new test to cover cases that did not work previously.

Author: aokolnychyi <anton.okolnychyi@sap.com>

Closes #18252 from aokolnychyi/spark-17914.

(cherry picked from commit ca4e960aec1a5f8cdc1e1344a25840d2670de391)
Signed-off-by: Takuya UESHIN <ueshin@databricks.com>
---
 .../spark/sql/catalyst/util/DateTimeUtils.scala  | 13 +++++++------
 .../sql/catalyst/util/DateTimeUtilsSuite.scala   | 16 ++++++++++++++++
 2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 8787e78c12d5d..02cfa6e1b8afd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -32,7 +32,7 @@ import org.apache.spark.unsafe.types.UTF8String
  * Helper functions for converting between internal and external date and time representations.
  * Dates are exposed externally as java.sql.Date and are represented internally as the number of
  * dates since the Unix epoch (1970-01-01). Timestamps are exposed externally as java.sql.Timestamp
- * and are stored internally as longs, which are capable of storing timestamps with 100 nanosecond
+ * and are stored internally as longs, which are capable of storing timestamps with microsecond
  * precision.
  */
 object DateTimeUtils {
@@ -399,13 +399,14 @@ object DateTimeUtils {
       digitsMilli += 1
     }
 
-    if (!justTime && isInvalidDate(segments(0), segments(1), segments(2))) {
-      return None
+    // We are truncating the nanosecond part, which results in loss of precision
+    while (digitsMilli > 6) {
+      segments(6) /= 10
+      digitsMilli -= 1
     }
 
-    // Instead of return None, we truncate the fractional seconds to prevent inserting NULL
-    if (segments(6) > 999999) {
-      segments(6) = segments(6).toString.take(6).toInt
+    if (!justTime && isInvalidDate(segments(0), segments(1), segments(2))) {
+      return None
     }
 
     if (segments(3) < 0 || segments(3) > 23 || segments(4) < 0 || segments(4) > 59 ||
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 9799817494f15..c8cf16d937352 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -34,6 +34,22 @@ class DateTimeUtilsSuite extends SparkFunSuite {
     ((timestamp + tz.getOffset(timestamp)) / MILLIS_PER_DAY).toInt
   }
 
+  test("nanoseconds truncation") {
+    def checkStringToTimestamp(originalTime: String, expectedParsedTime: String) {
+      val parsedTimestampOp = DateTimeUtils.stringToTimestamp(UTF8String.fromString(originalTime))
+      assert(parsedTimestampOp.isDefined, "timestamp with nanoseconds was not parsed correctly")
+      assert(DateTimeUtils.timestampToString(parsedTimestampOp.get) === expectedParsedTime)
+    }
+
+    checkStringToTimestamp("2015-01-02 00:00:00.123456789", "2015-01-02 00:00:00.123456")
+    checkStringToTimestamp("2015-01-02 00:00:00.100000009", "2015-01-02 00:00:00.1")
+    checkStringToTimestamp("2015-01-02 00:00:00.000050000", "2015-01-02 00:00:00.00005")
+    checkStringToTimestamp("2015-01-02 00:00:00.12005", "2015-01-02 00:00:00.12005")
+    checkStringToTimestamp("2015-01-02 00:00:00.100", "2015-01-02 00:00:00.1")
+    checkStringToTimestamp("2015-01-02 00:00:00.000456789", "2015-01-02 00:00:00.000456")
+    checkStringToTimestamp("1950-01-02 00:00:00.000456789", "1950-01-02 00:00:00.000456")
+  }
+
   test("timestamp and us") {
     val now = new Timestamp(System.currentTimeMillis())
     now.setNanos(1000)

From a6b7875d3768a3acdcf5bbbcae2df7eb37e6b25e Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 12 Jun 2017 14:05:03 -0700
Subject: [PATCH 0983/1204] [SPARK-20345][SQL] Fix STS error handling logic on
 HiveSQLException

## What changes were proposed in this pull request?

[SPARK-5100](https://github.com/apache/spark/commit/343d3bfafd449a0371feb6a88f78e07302fa7143) added Spark Thrift Server(STS) UI and the following logic to handle exceptions on case `Throwable`.

```scala
HiveThriftServer2.listener.onStatementError(
  statementId, e.getMessage, SparkUtils.exceptionString(e))
```

However, there occurred a missed case after implementing [SPARK-6964](https://github.com/apache/spark/commit/eb19d3f75cbd002f7e72ce02017a8de67f562792)'s `Support Cancellation in the Thrift Server` by adding case `HiveSQLException` before case `Throwable`.

```scala
case e: HiveSQLException =>
  if (getStatus().getState() == OperationState.CANCELED) {
    return
  } else {
    setState(OperationState.ERROR)
    throw e
  }
  // Actually do need to catch Throwable as some failures don't inherit from Exception and
  // HiveServer will silently swallow them.
case e: Throwable =>
  val currentState = getStatus().getState()
  logError(s"Error executing query, currentState $currentState, ", e)
  setState(OperationState.ERROR)
  HiveThriftServer2.listener.onStatementError(
    statementId, e.getMessage, SparkUtils.exceptionString(e))
  throw new HiveSQLException(e.toString)
```

Logically, we had better add `HiveThriftServer2.listener.onStatementError` on case `HiveSQLException`, too.

## How was this patch tested?

N/A

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #17643 from dongjoon-hyun/SPARK-20345.

(cherry picked from commit 32818d9b378f98556cff529af8645382cd5c6d16)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../sql/hive/thriftserver/SparkExecuteStatementOperation.scala  | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
index ff3784cab9e26..1d1074a2a7387 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -253,6 +253,8 @@ private[hive] class SparkExecuteStatementOperation(
           return
         } else {
           setState(OperationState.ERROR)
+          HiveThriftServer2.listener.onStatementError(
+            statementId, e.getMessage, SparkUtils.exceptionString(e))
           throw e
         }
       // Actually do need to catch Throwable as some failures don't inherit from Exception and

From 580ecfd7a8464e17eaa6f62a8b6558c0fb8e3f8b Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Mon, 12 Jun 2017 14:07:51 -0700
Subject: [PATCH 0984/1204] [SPARK-21059][SQL] LikeSimplification can NPE on
 null pattern

## What changes were proposed in this pull request?
This patch fixes a bug that can cause NullPointerException in LikeSimplification, when the pattern for like is null.

## How was this patch tested?
Added a new unit test case in LikeSimplificationSuite.

Author: Reynold Xin <rxin@databricks.com>

Closes #18273 from rxin/SPARK-21059.

(cherry picked from commit b1436c7496161da1f4fa6950a06de62c9007c40c)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../sql/catalyst/optimizer/expressions.scala  | 37 +++++++++++--------
 .../optimizer/LikeSimplificationSuite.scala   |  8 +++-
 2 files changed, 28 insertions(+), 17 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index ae2049cdf9b4a..f2334830f8d88 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -326,22 +326,27 @@ object LikeSimplification extends Rule[LogicalPlan] {
 
   def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
     case Like(input, Literal(pattern, StringType)) =>
-      pattern.toString match {
-        case startsWith(prefix) if !prefix.endsWith("\\") =>
-          StartsWith(input, Literal(prefix))
-        case endsWith(postfix) =>
-          EndsWith(input, Literal(postfix))
-        // 'a%a' pattern is basically same with 'a%' && '%a'.
-        // However, the additional `Length` condition is required to prevent 'a' match 'a%a'.
-        case startsAndEndsWith(prefix, postfix) if !prefix.endsWith("\\") =>
-          And(GreaterThanOrEqual(Length(input), Literal(prefix.size + postfix.size)),
-            And(StartsWith(input, Literal(prefix)), EndsWith(input, Literal(postfix))))
-        case contains(infix) if !infix.endsWith("\\") =>
-          Contains(input, Literal(infix))
-        case equalTo(str) =>
-          EqualTo(input, Literal(str))
-        case _ =>
-          Like(input, Literal.create(pattern, StringType))
+      if (pattern == null) {
+        // If pattern is null, return null value directly, since "col like null" == null.
+        Literal(null, BooleanType)
+      } else {
+        pattern.toString match {
+          case startsWith(prefix) if !prefix.endsWith("\\") =>
+            StartsWith(input, Literal(prefix))
+          case endsWith(postfix) =>
+            EndsWith(input, Literal(postfix))
+          // 'a%a' pattern is basically same with 'a%' && '%a'.
+          // However, the additional `Length` condition is required to prevent 'a' match 'a%a'.
+          case startsAndEndsWith(prefix, postfix) if !prefix.endsWith("\\") =>
+            And(GreaterThanOrEqual(Length(input), Literal(prefix.length + postfix.length)),
+              And(StartsWith(input, Literal(prefix)), EndsWith(input, Literal(postfix))))
+          case contains(infix) if !infix.endsWith("\\") =>
+            Contains(input, Literal(infix))
+          case equalTo(str) =>
+            EqualTo(input, Literal(str))
+          case _ =>
+            Like(input, Literal.create(pattern, StringType))
+        }
       }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala
index fdde89d079bc0..50398788c605c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala
@@ -17,13 +17,13 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-/* Implicit conversions */
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.rules._
+import org.apache.spark.sql.types.{BooleanType, StringType}
 
 class LikeSimplificationSuite extends PlanTest {
 
@@ -100,4 +100,10 @@ class LikeSimplificationSuite extends PlanTest {
 
     comparePlans(optimized, correctAnswer)
   }
+
+  test("null pattern") {
+    val originalQuery = testRelation.where('a like Literal(null, StringType)).analyze
+    val optimized = Optimize.execute(originalQuery)
+    comparePlans(optimized, testRelation.where(Literal(null, BooleanType)).analyze)
+  }
 }

From 48a843b56c99b7d26c6346974bb13e1033dffe4f Mon Sep 17 00:00:00 2001
From: "Joseph K. Bradley" <joseph@databricks.com>
Date: Mon, 12 Jun 2017 14:27:57 -0700
Subject: [PATCH 0985/1204] [SPARK-21050][ML] Word2vec persistence overflow bug
 fix

## What changes were proposed in this pull request?

The method calculateNumberOfPartitions() uses Int, not Long (unlike the MLlib version), so it is very easily to have an overflow in calculating the number of partitions for ML persistence.

This modifies the calculations to use Long.

## How was this patch tested?

New unit test.  I verified that the test fails before this patch.

Author: Joseph K. Bradley <joseph@databricks.com>

Closes #18265 from jkbradley/word2vec-save-fix.

(cherry picked from commit ff318c0d2f283c3f46491f229f82d93714da40c7)
Signed-off-by: Joseph K. Bradley <joseph@databricks.com>
---
 .../apache/spark/ml/feature/Word2Vec.scala    | 38 ++++++++++++++-----
 .../spark/ml/feature/Word2VecSuite.scala      | 10 +++++
 2 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
index 4ca062c0b5adf..b6909b3386b71 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
@@ -19,6 +19,7 @@ package org.apache.spark.ml.feature
 
 import org.apache.hadoop.fs.Path
 
+import org.apache.spark.SparkContext
 import org.apache.spark.annotation.Since
 import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.linalg.{BLAS, Vector, Vectors, VectorUDT}
@@ -339,25 +340,42 @@ object Word2VecModel extends MLReadable[Word2VecModel] {
       val wordVectors = instance.wordVectors.getVectors
       val dataSeq = wordVectors.toSeq.map { case (word, vector) => Data(word, vector) }
       val dataPath = new Path(path, "data").toString
+      val bufferSizeInBytes = Utils.byteStringAsBytes(
+        sc.conf.get("spark.kryoserializer.buffer.max", "64m"))
+      val numPartitions = Word2VecModelWriter.calculateNumberOfPartitions(
+        bufferSizeInBytes, instance.wordVectors.wordIndex.size, instance.getVectorSize)
       sparkSession.createDataFrame(dataSeq)
-        .repartition(calculateNumberOfPartitions)
+        .repartition(numPartitions)
         .write
         .parquet(dataPath)
     }
+  }
 
-    def calculateNumberOfPartitions(): Int = {
-      val floatSize = 4
+  private[feature]
+  object Word2VecModelWriter {
+    /**
+     * Calculate the number of partitions to use in saving the model.
+     * [SPARK-11994] - We want to partition the model in partitions smaller than
+     * spark.kryoserializer.buffer.max
+     * @param bufferSizeInBytes  Set to spark.kryoserializer.buffer.max
+     * @param numWords  Vocab size
+     * @param vectorSize  Vector length for each word
+     */
+    def calculateNumberOfPartitions(
+        bufferSizeInBytes: Long,
+        numWords: Int,
+        vectorSize: Int): Int = {
+      val floatSize = 4L  // Use Long to help avoid overflow
       val averageWordSize = 15
-      // [SPARK-11994] - We want to partition the model in partitions smaller than
-      // spark.kryoserializer.buffer.max
-      val bufferSizeInBytes = Utils.byteStringAsBytes(
-        sc.conf.get("spark.kryoserializer.buffer.max", "64m"))
       // Calculate the approximate size of the model.
       // Assuming an average word size of 15 bytes, the formula is:
       // (floatSize * vectorSize + 15) * numWords
-      val numWords = instance.wordVectors.wordIndex.size
-      val approximateSizeInBytes = (floatSize * instance.getVectorSize + averageWordSize) * numWords
-      ((approximateSizeInBytes / bufferSizeInBytes) + 1).toInt
+      val approximateSizeInBytes = (floatSize * vectorSize + averageWordSize) * numWords
+      val numPartitions = (approximateSizeInBytes / bufferSizeInBytes) + 1
+      require(numPartitions < 10e8, s"Word2VecModel calculated that it needs $numPartitions " +
+        s"partitions to save this model, which is too large.  Try increasing " +
+        s"spark.kryoserializer.buffer.max so that Word2VecModel can use fewer partitions.")
+      numPartitions.toInt
     }
   }
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
index a6a1c2b4f32bd..6183606a7b2ac 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
@@ -25,6 +25,7 @@ import org.apache.spark.ml.util.TestingUtils._
 import org.apache.spark.mllib.feature.{Word2VecModel => OldWord2VecModel}
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.sql.Row
+import org.apache.spark.util.Utils
 
 class Word2VecSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
 
@@ -188,6 +189,15 @@ class Word2VecSuite extends SparkFunSuite with MLlibTestSparkContext with Defaul
     assert(math.abs(similarity(5) - similarityLarger(5) / similarity(5)) > 1E-5)
   }
 
+  test("Word2Vec read/write numPartitions calculation") {
+    val smallModelNumPartitions = Word2VecModel.Word2VecModelWriter.calculateNumberOfPartitions(
+      Utils.byteStringAsBytes("64m"), numWords = 10, vectorSize = 5)
+    assert(smallModelNumPartitions === 1)
+    val largeModelNumPartitions = Word2VecModel.Word2VecModelWriter.calculateNumberOfPartitions(
+      Utils.byteStringAsBytes("64m"), numWords = 1000000, vectorSize = 5000)
+    assert(largeModelNumPartitions > 1)
+  }
+
   test("Word2Vec read/write") {
     val t = new Word2Vec()
       .setInputCol("myInputCol")

From dae1a98758d09dde97a8e7863100d2dd52389bf3 Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Mon, 12 Jun 2017 22:08:49 -0700
Subject: [PATCH 0986/1204] [TEST][SPARKR][CORE] Fix broken SparkSubmitSuite

## What changes were proposed in this pull request?

Fix test file path. This is broken in #18264 and undetected since R-only changes don't build core and subsequent post-commit with the change built fine (again because it wasn't building core)

actually appveyor builds everything but it's not running scala suites ...

## How was this patch tested?

jenkins
srowen gatorsmile

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #18283 from felixcheung/rsubmitsuite.

(cherry picked from commit 278ba7a2c62b2cbb7bcfe79ce10d35ab57bb1950)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 .../scala/org/apache/spark/deploy/SparkSubmitSuite.scala    | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 6e9721c45931a..6fa3a09b2ef1e 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -485,8 +485,8 @@ class SparkSubmitSuite
     assume(RUtils.isSparkRInstalled, "SparkR is not installed in this build.")
     val main = MavenCoordinate("my.great.lib", "mylib", "0.1")
     val sparkHome = sys.props.getOrElse("spark.test.home", fail("spark.test.home is not set!"))
-    val rScriptDir =
-      Seq(sparkHome, "R", "pkg", "inst", "tests", "packageInAJarTest.R").mkString(File.separator)
+    val rScriptDir = Seq(
+      sparkHome, "R", "pkg", "tests", "fulltests", "packageInAJarTest.R").mkString(File.separator)
     assert(new File(rScriptDir).exists)
     IvyTestUtils.withRepository(main, None, None, withR = true) { repo =>
       val args = Seq(
@@ -507,7 +507,7 @@ class SparkSubmitSuite
     // Check if the SparkR package is installed
     assume(RUtils.isSparkRInstalled, "SparkR is not installed in this build.")
     val rScriptDir =
-      Seq(sparkHome, "R", "pkg", "inst", "tests", "testthat", "jarTest.R").mkString(File.separator)
+      Seq(sparkHome, "R", "pkg", "tests", "fulltests", "jarTest.R").mkString(File.separator)
     assert(new File(rScriptDir).exists)
 
     // compile a small jar containing a class that will be called from R code.

From 24836be5450ef741beff4967c8adcc50bea25f41 Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Tue, 13 Jun 2017 10:48:07 +0100
Subject: [PATCH 0987/1204] [SPARK-20920][SQL] ForkJoinPool pools are leaked
 when writing hive tables with many partitions

## What changes were proposed in this pull request?

Don't leave thread pool running from AlterTableRecoverPartitionsCommand DDL command

## How was this patch tested?

Existing tests.

Author: Sean Owen <sowen@cloudera.com>

Closes #18216 from srowen/SPARK-20920.

(cherry picked from commit 7b7c85ede398996aafffb126440e5f0c67f67210)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../spark/sql/execution/command/ddl.scala     | 21 ++++++++++++-------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 55540563ef911..b1eaecb98defe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -21,7 +21,6 @@ import java.util.Locale
 
 import scala.collection.{GenMap, GenSeq}
 import scala.collection.parallel.ForkJoinTaskSupport
-import scala.concurrent.forkjoin.ForkJoinPool
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.conf.Configuration
@@ -36,7 +35,7 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.execution.datasources.PartitioningUtils
 import org.apache.spark.sql.types._
-import org.apache.spark.util.SerializableConfiguration
+import org.apache.spark.util.{SerializableConfiguration, ThreadUtils}
 
 // Note: The definition of these commands are based on the ones described in
 // https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL
@@ -582,8 +581,15 @@ case class AlterTableRecoverPartitionsCommand(
     val threshold = spark.conf.get("spark.rdd.parallelListingThreshold", "10").toInt
     val hadoopConf = spark.sparkContext.hadoopConfiguration
     val pathFilter = getPathFilter(hadoopConf)
-    val partitionSpecsAndLocs = scanPartitions(spark, fs, pathFilter, root, Map(),
-      table.partitionColumnNames, threshold, spark.sessionState.conf.resolver)
+
+    val evalPool = ThreadUtils.newForkJoinPool("AlterTableRecoverPartitionsCommand", 8)
+    val partitionSpecsAndLocs: Seq[(TablePartitionSpec, Path)] =
+      try {
+        scanPartitions(spark, fs, pathFilter, root, Map(), table.partitionColumnNames, threshold,
+          spark.sessionState.conf.resolver, new ForkJoinTaskSupport(evalPool)).seq
+      } finally {
+        evalPool.shutdown()
+      }
     val total = partitionSpecsAndLocs.length
     logInfo(s"Found $total partitions in $root")
 
@@ -604,8 +610,6 @@ case class AlterTableRecoverPartitionsCommand(
     Seq.empty[Row]
   }
 
-  @transient private lazy val evalTaskSupport = new ForkJoinTaskSupport(new ForkJoinPool(8))
-
   private def scanPartitions(
       spark: SparkSession,
       fs: FileSystem,
@@ -614,7 +618,8 @@ case class AlterTableRecoverPartitionsCommand(
       spec: TablePartitionSpec,
       partitionNames: Seq[String],
       threshold: Int,
-      resolver: Resolver): GenSeq[(TablePartitionSpec, Path)] = {
+      resolver: Resolver,
+      evalTaskSupport: ForkJoinTaskSupport): GenSeq[(TablePartitionSpec, Path)] = {
     if (partitionNames.isEmpty) {
       return Seq(spec -> path)
     }
@@ -638,7 +643,7 @@ case class AlterTableRecoverPartitionsCommand(
         val value = ExternalCatalogUtils.unescapePathName(ps(1))
         if (resolver(columnName, partitionNames.head)) {
           scanPartitions(spark, fs, filter, st.getPath, spec ++ Map(partitionNames.head -> value),
-            partitionNames.drop(1), threshold, resolver)
+            partitionNames.drop(1), threshold, resolver, evalTaskSupport)
         } else {
           logWarning(
             s"expected partition column ${partitionNames.head}, but got ${ps(0)}, ignoring it")

From 58a8a379df96aef934ecc63881d4af39c25ac6ff Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Tue, 13 Jun 2017 10:48:07 +0100
Subject: [PATCH 0988/1204] [SPARK-20920][SQL] ForkJoinPool pools are leaked
 when writing hive tables with many partitions

## What changes were proposed in this pull request?

Don't leave thread pool running from AlterTableRecoverPartitionsCommand DDL command

## How was this patch tested?

Existing tests.

Author: Sean Owen <sowen@cloudera.com>

Closes #18216 from srowen/SPARK-20920.

(cherry picked from commit 7b7c85ede398996aafffb126440e5f0c67f67210)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../spark/sql/execution/command/ddl.scala     | 21 ++++++++++++-------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index f9afe466d9f66..a59560edbc57f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.command
 
 import scala.collection.{GenMap, GenSeq}
 import scala.collection.parallel.ForkJoinTaskSupport
-import scala.concurrent.forkjoin.ForkJoinPool
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.conf.Configuration
@@ -34,7 +33,7 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.execution.datasources.PartitioningUtils
 import org.apache.spark.sql.types._
-import org.apache.spark.util.SerializableConfiguration
+import org.apache.spark.util.{SerializableConfiguration, ThreadUtils}
 
 // Note: The definition of these commands are based on the ones described in
 // https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL
@@ -508,8 +507,15 @@ case class AlterTableRecoverPartitionsCommand(
     val threshold = spark.conf.get("spark.rdd.parallelListingThreshold", "10").toInt
     val hadoopConf = spark.sparkContext.hadoopConfiguration
     val pathFilter = getPathFilter(hadoopConf)
-    val partitionSpecsAndLocs = scanPartitions(spark, fs, pathFilter, root, Map(),
-      table.partitionColumnNames, threshold, spark.sessionState.conf.resolver)
+
+    val evalPool = ThreadUtils.newForkJoinPool("AlterTableRecoverPartitionsCommand", 8)
+    val partitionSpecsAndLocs: Seq[(TablePartitionSpec, Path)] =
+      try {
+        scanPartitions(spark, fs, pathFilter, root, Map(), table.partitionColumnNames, threshold,
+          spark.sessionState.conf.resolver, new ForkJoinTaskSupport(evalPool)).seq
+      } finally {
+        evalPool.shutdown()
+      }
     val total = partitionSpecsAndLocs.length
     logInfo(s"Found $total partitions in $root")
 
@@ -530,8 +536,6 @@ case class AlterTableRecoverPartitionsCommand(
     Seq.empty[Row]
   }
 
-  @transient private lazy val evalTaskSupport = new ForkJoinTaskSupport(new ForkJoinPool(8))
-
   private def scanPartitions(
       spark: SparkSession,
       fs: FileSystem,
@@ -540,7 +544,8 @@ case class AlterTableRecoverPartitionsCommand(
       spec: TablePartitionSpec,
       partitionNames: Seq[String],
       threshold: Int,
-      resolver: Resolver): GenSeq[(TablePartitionSpec, Path)] = {
+      resolver: Resolver,
+      evalTaskSupport: ForkJoinTaskSupport): GenSeq[(TablePartitionSpec, Path)] = {
     if (partitionNames.isEmpty) {
       return Seq(spec -> path)
     }
@@ -564,7 +569,7 @@ case class AlterTableRecoverPartitionsCommand(
         val value = ExternalCatalogUtils.unescapePathName(ps(1))
         if (resolver(columnName, partitionNames.head)) {
           scanPartitions(spark, fs, filter, st.getPath, spec ++ Map(partitionNames.head -> value),
-            partitionNames.drop(1), threshold, resolver)
+            partitionNames.drop(1), threshold, resolver, evalTaskSupport)
         } else {
           logWarning(
             s"expected partition column ${partitionNames.head}, but got ${ps(0)}, ignoring it")

From 039c465062b15aa8793fa72d19f3d2ff3b53a99c Mon Sep 17 00:00:00 2001
From: guoxiaolong <guo.xiaolong1@zte.com.cn>
Date: Tue, 13 Jun 2017 15:38:11 +0100
Subject: [PATCH 0989/1204] [SPARK-21060][WEB-UI] Css style about paging
 function is error in the executor page. Css style about paging function is
 error in the executor page. It is different of history server ui paging
 function css style.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

Css style about paging function is error in the executor page. It is different of history server ui paging function css style.

**But their style should be consistent**. There are three reasons.

1. The first reason: 'Previous', 'Next' and number should be the button format.

2. The second reason: when you are on the first page, 'Previous' and '1' should be gray and can not be clicked.
![1](https://user-images.githubusercontent.com/26266482/27026667-1fe745ee-4f91-11e7-8b34-150819d22bd3.png)

3. The third reason: when you are on the last page, 'Previous' and 'Max number' should be gray and can not be clicked.
![2](https://user-images.githubusercontent.com/26266482/27026811-9d8d6fa0-4f91-11e7-8b51-7816c3feb381.png)

before fix:
![fix_before](https://user-images.githubusercontent.com/26266482/27026428-47ec5c56-4f90-11e7-9dd5-d52c22d7bd36.png)

after fix:
![fix_after](https://user-images.githubusercontent.com/26266482/27026439-50d17072-4f90-11e7-8405-6f81da5ab32c.png)

The style of history server ui:
![history](https://user-images.githubusercontent.com/26266482/27026528-9c90f780-4f90-11e7-91e6-90d32651fe03.png)

## How was this patch tested?

manual tests

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: guoxiaolong <guo.xiaolong1@zte.com.cn>
Author: 郭小龙 10207633 <guo.xiaolong1@zte.com.cn>
Author: guoxiaolongzte <guo.xiaolong1@zte.com.cn>

Closes #18275 from guoxiaolongzte/SPARK-21060.

(cherry picked from commit b7304f25590fb1bc65cba6440d59a3322fd09947)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala
index b7cbed468517c..d63381c78bc3b 100644
--- a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala
@@ -82,7 +82,7 @@ private[ui] class ExecutorsPage(
               </ul>
             </div>
           </div> ++
-          <div id="active-executors"></div> ++
+          <div id="active-executors" class="span12 pagination"></div> ++
           <script src={UIUtils.prependBaseUri("/static/utils.js")}></script> ++
           <script src={UIUtils.prependBaseUri("/static/executorspage.js")}></script> ++
           <script>setThreadDumpEnabled({threadDumpEnabled})</script>

From 2bc2c1539eb5c44ee75d5e4ce0a8f3c689bd8ffd Mon Sep 17 00:00:00 2001
From: DjvuLee <lihu@bytedance.com>
Date: Tue, 13 Jun 2017 15:56:03 +0100
Subject: [PATCH 0990/1204] [SPARK-21064][CORE][TEST] Fix the default value bug
 in NettyBlockTransferServiceSuite

## What changes were proposed in this pull request?

The default value for `spark.port.maxRetries` is 100,
but we use 10 in the suite file.
So we change it to 100 to avoid test failure.

## How was this patch tested?
No test

Author: DjvuLee <lihu@bytedance.com>

Closes #18280 from djvulee/NettyTestBug.

(cherry picked from commit b36ce2a2469ff923a3367a530d4a14899ecf9238)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../spark/network/netty/NettyBlockTransferServiceSuite.scala   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala
index 271ab8b148831..98259300381eb 100644
--- a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala
@@ -80,7 +80,8 @@ class NettyBlockTransferServiceSuite
   private def verifyServicePort(expectedPort: Int, actualPort: Int): Unit = {
     actualPort should be >= expectedPort
     // avoid testing equality in case of simultaneous tests
-    actualPort should be <= (expectedPort + 10)
+    // the default value for `spark.port.maxRetries` is 100 under test
+    actualPort should be <= (expectedPort + 100)
   }
 
   private def createService(port: Int): NettyBlockTransferService = {

From ee0e74e65cd4075fcd6da74227b46c5b1c7d42cc Mon Sep 17 00:00:00 2001
From: DjvuLee <lihu@bytedance.com>
Date: Tue, 13 Jun 2017 15:56:03 +0100
Subject: [PATCH 0991/1204] [SPARK-21064][CORE][TEST] Fix the default value bug
 in NettyBlockTransferServiceSuite

## What changes were proposed in this pull request?

The default value for `spark.port.maxRetries` is 100,
but we use 10 in the suite file.
So we change it to 100 to avoid test failure.

## How was this patch tested?
No test

Author: DjvuLee <lihu@bytedance.com>

Closes #18280 from djvulee/NettyTestBug.

(cherry picked from commit b36ce2a2469ff923a3367a530d4a14899ecf9238)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../spark/network/netty/NettyBlockTransferServiceSuite.scala   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala
index 121447a96529b..ebda24420cd55 100644
--- a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala
@@ -81,7 +81,8 @@ class NettyBlockTransferServiceSuite
   private def verifyServicePort(expectedPort: Int, actualPort: Int): Unit = {
     actualPort should be >= expectedPort
     // avoid testing equality in case of simultaneous tests
-    actualPort should be <= (expectedPort + 10)
+    // the default value for `spark.port.maxRetries` is 100 under test
+    actualPort should be <= (expectedPort + 100)
   }
 
   private def createService(port: Int): NettyBlockTransferService = {

From 220943d858a060b8b8454f2058065751726ced17 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Mon, 12 Jun 2017 14:58:08 -0700
Subject: [PATCH 0992/1204] [SPARK-20979][SS] Add RateSource to generate values
 for tests and benchmark

## What changes were proposed in this pull request?

This PR adds RateSource for Structured Streaming so that the user can use it to generate data for tests and benchmark easily.

This source generates increment long values with timestamps. Each generated row has two columns: a timestamp column for the generated time and an auto increment long column starting with 0L.

It supports the following options:
- `rowsPerSecond` (e.g. 100, default: 1): How many rows should be generated per second.
- `rampUpTime` (e.g. 5s, default: 0s): How long to ramp up before the generating speed becomes `rowsPerSecond`. Using finer granularities than seconds will be truncated to integer seconds.
- `numPartitions` (e.g. 10, default: Spark's default parallelism): The partition number for the generated rows. The source will try its best to reach `rowsPerSecond`, but the query may be resource constrained, and `numPartitions` can be tweaked to help reach the desired speed.

Here is a simple example that prints 10 rows per seconds:
```
    spark.readStream
      .format("rate")
      .option("rowsPerSecond", "10")
      .load()
      .writeStream
      .format("console")
      .start()
```

The idea came from marmbrus and he did the initial work.

## How was this patch tested?

The added tests.

Author: Shixiong Zhu <shixiong@databricks.com>
Author: Michael Armbrust <michael@databricks.com>

Closes #18199 from zsxwing/rate.
---
 ...pache.spark.sql.sources.DataSourceRegister |   1 +
 .../streaming/RateSourceProvider.scala        | 243 ++++++++++++++++++
 .../execution/streaming/RateSourceSuite.scala | 182 +++++++++++++
 .../spark/sql/streaming/StreamTest.scala      |   3 +
 4 files changed, 429 insertions(+)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/RateSourceProvider.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/RateSourceSuite.scala

diff --git a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
index 27d32b5dca431..0c5f3f22e31e8 100644
--- a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
+++ b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -5,3 +5,4 @@ org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 org.apache.spark.sql.execution.datasources.text.TextFileFormat
 org.apache.spark.sql.execution.streaming.ConsoleSinkProvider
 org.apache.spark.sql.execution.streaming.TextSocketSourceProvider
+org.apache.spark.sql.execution.streaming.RateSourceProvider
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/RateSourceProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/RateSourceProvider.scala
new file mode 100644
index 0000000000000..e61a8eb628891
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/RateSourceProvider.scala
@@ -0,0 +1,243 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming
+
+import java.io._
+import java.nio.charset.StandardCharsets
+import java.util.concurrent.TimeUnit
+
+import org.apache.commons.io.IOUtils
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.network.util.JavaUtils
+import org.apache.spark.sql.{DataFrame, SQLContext}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
+import org.apache.spark.sql.sources.{DataSourceRegister, StreamSourceProvider}
+import org.apache.spark.sql.types._
+import org.apache.spark.util.{ManualClock, SystemClock}
+
+/**
+ *  A source that generates increment long values with timestamps. Each generated row has two
+ *  columns: a timestamp column for the generated time and an auto increment long column starting
+ *  with 0L.
+ *
+ *  This source supports the following options:
+ *  - `rowsPerSecond` (e.g. 100, default: 1): How many rows should be generated per second.
+ *  - `rampUpTime` (e.g. 5s, default: 0s): How long to ramp up before the generating speed
+ *    becomes `rowsPerSecond`. Using finer granularities than seconds will be truncated to integer
+ *    seconds.
+ *  - `numPartitions` (e.g. 10, default: Spark's default parallelism): The partition number for the
+ *    generated rows. The source will try its best to reach `rowsPerSecond`, but the query may
+ *    be resource constrained, and `numPartitions` can be tweaked to help reach the desired speed.
+ */
+class RateSourceProvider extends StreamSourceProvider with DataSourceRegister {
+
+  override def sourceSchema(
+      sqlContext: SQLContext,
+      schema: Option[StructType],
+      providerName: String,
+      parameters: Map[String, String]): (String, StructType) =
+    (shortName(), RateSourceProvider.SCHEMA)
+
+  override def createSource(
+      sqlContext: SQLContext,
+      metadataPath: String,
+      schema: Option[StructType],
+      providerName: String,
+      parameters: Map[String, String]): Source = {
+    val params = CaseInsensitiveMap(parameters)
+
+    val rowsPerSecond = params.get("rowsPerSecond").map(_.toLong).getOrElse(1L)
+    if (rowsPerSecond <= 0) {
+      throw new IllegalArgumentException(
+        s"Invalid value '${params("rowsPerSecond")}'. The option 'rowsPerSecond' " +
+          "must be positive")
+    }
+
+    val rampUpTimeSeconds =
+      params.get("rampUpTime").map(JavaUtils.timeStringAsSec(_)).getOrElse(0L)
+    if (rampUpTimeSeconds < 0) {
+      throw new IllegalArgumentException(
+        s"Invalid value '${params("rampUpTime")}'. The option 'rampUpTime' " +
+          "must not be negative")
+    }
+
+    val numPartitions = params.get("numPartitions").map(_.toInt).getOrElse(
+      sqlContext.sparkContext.defaultParallelism)
+    if (numPartitions <= 0) {
+      throw new IllegalArgumentException(
+        s"Invalid value '${params("numPartitions")}'. The option 'numPartitions' " +
+          "must be positive")
+    }
+
+    new RateStreamSource(
+      sqlContext,
+      metadataPath,
+      rowsPerSecond,
+      rampUpTimeSeconds,
+      numPartitions,
+      params.get("useManualClock").map(_.toBoolean).getOrElse(false) // Only for testing
+    )
+  }
+  override def shortName(): String = "rate"
+}
+
+object RateSourceProvider {
+  val SCHEMA =
+    StructType(StructField("timestamp", TimestampType) :: StructField("value", LongType) :: Nil)
+
+  val VERSION = 1
+}
+
+class RateStreamSource(
+    sqlContext: SQLContext,
+    metadataPath: String,
+    rowsPerSecond: Long,
+    rampUpTimeSeconds: Long,
+    numPartitions: Int,
+    useManualClock: Boolean) extends Source with Logging {
+
+  import RateSourceProvider._
+  import RateStreamSource._
+
+  val clock = if (useManualClock) new ManualClock else new SystemClock
+
+  private val maxSeconds = Long.MaxValue / rowsPerSecond
+
+  if (rampUpTimeSeconds > maxSeconds) {
+    throw new ArithmeticException(
+      s"Integer overflow. Max offset with $rowsPerSecond rowsPerSecond" +
+        s" is $maxSeconds, but 'rampUpTimeSeconds' is $rampUpTimeSeconds.")
+  }
+
+  private val startTimeMs = {
+    val metadataLog =
+      new HDFSMetadataLog[LongOffset](sqlContext.sparkSession, metadataPath) {
+        override def serialize(metadata: LongOffset, out: OutputStream): Unit = {
+          val writer = new BufferedWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8))
+          writer.write("v" + VERSION + "\n")
+          writer.write(metadata.json)
+          writer.flush
+        }
+
+        override def deserialize(in: InputStream): LongOffset = {
+          val content = IOUtils.toString(new InputStreamReader(in, StandardCharsets.UTF_8))
+          // HDFSMetadataLog guarantees that it never creates a partial file.
+          assert(content.length != 0)
+          if (content(0) == 'v') {
+            val indexOfNewLine = content.indexOf("\n")
+            if (indexOfNewLine > 0) {
+              val version = parseVersion(content.substring(0, indexOfNewLine), VERSION)
+              LongOffset(SerializedOffset(content.substring(indexOfNewLine + 1)))
+            } else {
+              throw new IllegalStateException(
+                s"Log file was malformed: failed to detect the log file version line.")
+            }
+          } else {
+            throw new IllegalStateException(
+              s"Log file was malformed: failed to detect the log file version line.")
+          }
+        }
+      }
+
+    metadataLog.get(0).getOrElse {
+      val offset = LongOffset(clock.getTimeMillis())
+      metadataLog.add(0, offset)
+      logInfo(s"Start time: $offset")
+      offset
+    }.offset
+  }
+
+  /** When the system time runs backward, "lastTimeMs" will make sure we are still monotonic. */
+  @volatile private var lastTimeMs = startTimeMs
+
+  override def schema: StructType = RateSourceProvider.SCHEMA
+
+  override def getOffset: Option[Offset] = {
+    val now = clock.getTimeMillis()
+    if (lastTimeMs < now) {
+      lastTimeMs = now
+    }
+    Some(LongOffset(TimeUnit.MILLISECONDS.toSeconds(lastTimeMs - startTimeMs)))
+  }
+
+  override def getBatch(start: Option[Offset], end: Offset): DataFrame = {
+    val startSeconds = start.flatMap(LongOffset.convert(_).map(_.offset)).getOrElse(0L)
+    val endSeconds = LongOffset.convert(end).map(_.offset).getOrElse(0L)
+    assert(startSeconds <= endSeconds, s"startSeconds($startSeconds) > endSeconds($endSeconds)")
+    if (endSeconds > maxSeconds) {
+      throw new ArithmeticException("Integer overflow. Max offset with " +
+        s"$rowsPerSecond rowsPerSecond is $maxSeconds, but it's $endSeconds now.")
+    }
+    // Fix "lastTimeMs" for recovery
+    if (lastTimeMs < TimeUnit.SECONDS.toMillis(endSeconds) + startTimeMs) {
+      lastTimeMs = TimeUnit.SECONDS.toMillis(endSeconds) + startTimeMs
+    }
+    val rangeStart = valueAtSecond(startSeconds, rowsPerSecond, rampUpTimeSeconds)
+    val rangeEnd = valueAtSecond(endSeconds, rowsPerSecond, rampUpTimeSeconds)
+    logDebug(s"startSeconds: $startSeconds, endSeconds: $endSeconds, " +
+      s"rangeStart: $rangeStart, rangeEnd: $rangeEnd")
+
+    if (rangeStart == rangeEnd) {
+      return sqlContext.internalCreateDataFrame(sqlContext.sparkContext.emptyRDD, schema)
+    }
+
+    val localStartTimeMs = startTimeMs + TimeUnit.SECONDS.toMillis(startSeconds)
+    val relativeMsPerValue =
+      TimeUnit.SECONDS.toMillis(endSeconds - startSeconds).toDouble / (rangeEnd - rangeStart)
+
+    val rdd = sqlContext.sparkContext.range(rangeStart, rangeEnd, 1, numPartitions).map { v =>
+      val relative = math.round((v - rangeStart) * relativeMsPerValue)
+      InternalRow(DateTimeUtils.fromMillis(relative + localStartTimeMs), v)
+    }
+    sqlContext.internalCreateDataFrame(rdd, schema)
+  }
+
+  override def stop(): Unit = {}
+
+  override def toString: String = s"RateSource[rowsPerSecond=$rowsPerSecond, " +
+    s"rampUpTimeSeconds=$rampUpTimeSeconds, numPartitions=$numPartitions]"
+}
+
+object RateStreamSource {
+
+  /** Calculate the end value we will emit at the time `seconds`. */
+  def valueAtSecond(seconds: Long, rowsPerSecond: Long, rampUpTimeSeconds: Long): Long = {
+    // E.g., rampUpTimeSeconds = 4, rowsPerSecond = 10
+    // Then speedDeltaPerSecond = 2
+    //
+    // seconds   = 0 1 2  3  4  5  6
+    // speed     = 0 2 4  6  8 10 10 (speedDeltaPerSecond * seconds)
+    // end value = 0 2 6 12 20 30 40 (0 + speedDeltaPerSecond * seconds) * (seconds + 1) / 2
+    val speedDeltaPerSecond = rowsPerSecond / (rampUpTimeSeconds + 1)
+    if (seconds <= rampUpTimeSeconds) {
+      // Calculate "(0 + speedDeltaPerSecond * seconds) * (seconds + 1) / 2" in a special way to
+      // avoid overflow
+      if (seconds % 2 == 1) {
+        (seconds + 1) / 2 * speedDeltaPerSecond * seconds
+      } else {
+        seconds / 2 * speedDeltaPerSecond * (seconds + 1)
+      }
+    } else {
+      // rampUpPart is just a special case of the above formula: rampUpTimeSeconds == seconds
+      val rampUpPart = valueAtSecond(rampUpTimeSeconds, rowsPerSecond, rampUpTimeSeconds)
+      rampUpPart + (seconds - rampUpTimeSeconds) * rowsPerSecond
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/RateSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/RateSourceSuite.scala
new file mode 100644
index 0000000000000..bdba536425a43
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/RateSourceSuite.scala
@@ -0,0 +1,182 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming
+
+import java.util.concurrent.TimeUnit
+
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.streaming.{StreamingQueryException, StreamTest}
+import org.apache.spark.util.ManualClock
+
+class RateSourceSuite extends StreamTest {
+
+  import testImplicits._
+
+  case class AdvanceRateManualClock(seconds: Long) extends AddData {
+    override def addData(query: Option[StreamExecution]): (Source, Offset) = {
+      assert(query.nonEmpty)
+      val rateSource = query.get.logicalPlan.collect {
+        case StreamingExecutionRelation(source, _) if source.isInstanceOf[RateStreamSource] =>
+          source.asInstanceOf[RateStreamSource]
+      }.head
+      rateSource.clock.asInstanceOf[ManualClock].advance(TimeUnit.SECONDS.toMillis(seconds))
+      (rateSource, rateSource.getOffset.get)
+    }
+  }
+
+  test("basic") {
+    val input = spark.readStream
+      .format("rate")
+      .option("rowsPerSecond", "10")
+      .option("useManualClock", "true")
+      .load()
+    testStream(input)(
+      AdvanceRateManualClock(seconds = 1),
+      CheckLastBatch((0 until 10).map(v => new java.sql.Timestamp(v * 100L) -> v): _*),
+      StopStream,
+      StartStream(),
+      // Advance 2 seconds because creating a new RateSource will also create a new ManualClock
+      AdvanceRateManualClock(seconds = 2),
+      CheckLastBatch((10 until 20).map(v => new java.sql.Timestamp(v * 100L) -> v): _*)
+    )
+  }
+
+  test("uniform distribution of event timestamps") {
+    val input = spark.readStream
+      .format("rate")
+      .option("rowsPerSecond", "1500")
+      .option("useManualClock", "true")
+      .load()
+      .as[(java.sql.Timestamp, Long)]
+      .map(v => (v._1.getTime, v._2))
+    val expectedAnswer = (0 until 1500).map { v =>
+      (math.round(v * (1000.0 / 1500)), v)
+    }
+    testStream(input)(
+      AdvanceRateManualClock(seconds = 1),
+      CheckLastBatch(expectedAnswer: _*)
+    )
+  }
+
+  test("valueAtSecond") {
+    import RateStreamSource._
+
+    assert(valueAtSecond(seconds = 0, rowsPerSecond = 5, rampUpTimeSeconds = 0) === 0)
+    assert(valueAtSecond(seconds = 1, rowsPerSecond = 5, rampUpTimeSeconds = 0) === 5)
+
+    assert(valueAtSecond(seconds = 0, rowsPerSecond = 5, rampUpTimeSeconds = 2) === 0)
+    assert(valueAtSecond(seconds = 1, rowsPerSecond = 5, rampUpTimeSeconds = 2) === 1)
+    assert(valueAtSecond(seconds = 2, rowsPerSecond = 5, rampUpTimeSeconds = 2) === 3)
+    assert(valueAtSecond(seconds = 3, rowsPerSecond = 5, rampUpTimeSeconds = 2) === 8)
+
+    assert(valueAtSecond(seconds = 0, rowsPerSecond = 10, rampUpTimeSeconds = 4) === 0)
+    assert(valueAtSecond(seconds = 1, rowsPerSecond = 10, rampUpTimeSeconds = 4) === 2)
+    assert(valueAtSecond(seconds = 2, rowsPerSecond = 10, rampUpTimeSeconds = 4) === 6)
+    assert(valueAtSecond(seconds = 3, rowsPerSecond = 10, rampUpTimeSeconds = 4) === 12)
+    assert(valueAtSecond(seconds = 4, rowsPerSecond = 10, rampUpTimeSeconds = 4) === 20)
+    assert(valueAtSecond(seconds = 5, rowsPerSecond = 10, rampUpTimeSeconds = 4) === 30)
+  }
+
+  test("rampUpTime") {
+    val input = spark.readStream
+      .format("rate")
+      .option("rowsPerSecond", "10")
+      .option("rampUpTime", "4s")
+      .option("useManualClock", "true")
+      .load()
+      .as[(java.sql.Timestamp, Long)]
+      .map(v => (v._1.getTime, v._2))
+    testStream(input)(
+      AdvanceRateManualClock(seconds = 1),
+      CheckLastBatch((0 until 2).map(v => v * 500 -> v): _*), // speed = 2
+      AdvanceRateManualClock(seconds = 1),
+      CheckLastBatch((2 until 6).map(v => 1000 + (v - 2) * 250 -> v): _*), // speed = 4
+      AdvanceRateManualClock(seconds = 1),
+      CheckLastBatch({
+        Seq(2000 -> 6, 2167 -> 7, 2333 -> 8, 2500 -> 9, 2667 -> 10, 2833 -> 11)
+      }: _*), // speed = 6
+      AdvanceRateManualClock(seconds = 1),
+      CheckLastBatch((12 until 20).map(v => 3000 + (v - 12) * 125 -> v): _*), // speed = 8
+      AdvanceRateManualClock(seconds = 1),
+      // Now we should reach full speed
+      CheckLastBatch((20 until 30).map(v => 4000 + (v - 20) * 100 -> v): _*), // speed = 10
+      AdvanceRateManualClock(seconds = 1),
+      CheckLastBatch((30 until 40).map(v => 5000 + (v - 30) * 100 -> v): _*), // speed = 10
+      AdvanceRateManualClock(seconds = 1),
+      CheckLastBatch((40 until 50).map(v => 6000 + (v - 40) * 100 -> v): _*) // speed = 10
+    )
+  }
+
+  test("numPartitions") {
+    val input = spark.readStream
+      .format("rate")
+      .option("rowsPerSecond", "10")
+      .option("numPartitions", "6")
+      .option("useManualClock", "true")
+      .load()
+      .select(spark_partition_id())
+      .distinct()
+    testStream(input)(
+      AdvanceRateManualClock(1),
+      CheckLastBatch((0 until 6): _*)
+    )
+  }
+
+  testQuietly("overflow") {
+    val input = spark.readStream
+      .format("rate")
+      .option("rowsPerSecond", Long.MaxValue.toString)
+      .option("useManualClock", "true")
+      .load()
+      .select(spark_partition_id())
+      .distinct()
+    testStream(input)(
+      AdvanceRateManualClock(2),
+      ExpectFailure[ArithmeticException](t => {
+        Seq("overflow", "rowsPerSecond").foreach { msg =>
+          assert(t.getMessage.contains(msg))
+        }
+      })
+    )
+  }
+
+  testQuietly("illegal option values") {
+    def testIllegalOptionValue(
+        option: String,
+        value: String,
+        expectedMessages: Seq[String]): Unit = {
+      val e = intercept[StreamingQueryException] {
+        spark.readStream
+          .format("rate")
+          .option(option, value)
+          .load()
+          .writeStream
+          .format("console")
+          .start()
+          .awaitTermination()
+      }
+      assert(e.getCause.isInstanceOf[IllegalArgumentException])
+      for (msg <- expectedMessages) {
+        assert(e.getCause.getMessage.contains(msg))
+      }
+    }
+
+    testIllegalOptionValue("rowsPerSecond", "-1", Seq("-1", "rowsPerSecond", "positive"))
+    testIllegalOptionValue("numPartitions", "-1", Seq("-1", "numPartitions", "positive"))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index 5bc36dd30f6d1..2a4039cc5831a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -172,8 +172,10 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
    *
    * @param isFatalError if this is a fatal error. If so, the error should also be caught by
    *                     UncaughtExceptionHandler.
+   * @param assertFailure a function to verify the error.
    */
   case class ExpectFailure[T <: Throwable : ClassTag](
+      assertFailure: Throwable => Unit = _ => {},
       isFatalError: Boolean = false) extends StreamAction {
     val causeClass: Class[T] = implicitly[ClassTag[T]].runtimeClass.asInstanceOf[Class[T]]
     override def toString(): String =
@@ -455,6 +457,7 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts {
                     s"\tExpected: ${ef.causeClass}\n\tReturned: $streamThreadDeathCause")
                 streamThreadDeathCause = null
               }
+              ef.assertFailure(exception.getCause)
             } catch {
               case _: InterruptedException =>
               case e: org.scalatest.exceptions.TestFailedDueToTimeoutException =>

From 53212c32bc39dd86a2bb0f9c5a1f747591e545a0 Mon Sep 17 00:00:00 2001
From: jerryshao <sshao@hortonworks.com>
Date: Wed, 14 Jun 2017 08:12:15 +0800
Subject: [PATCH 0993/1204] [SPARK-12552][CORE] Correctly count the driver
 resource when recovering from failure for Master

Currently in Standalone HA mode, the resource usage of driver is not correctly counted in Master when recovering from failure, this will lead to some unexpected behaviors like negative value in UI.

So here fix this to also count the driver's resource usage.

Also changing the recovered app's state to `RUNNING` when fully recovered. Previously it will always be WAITING even fully recovered.

andrewor14 please help to review, thanks a lot.

Author: jerryshao <sshao@hortonworks.com>

Closes #10506 from jerryshao/SPARK-12552.

(cherry picked from commit 9eb095243b0bd8a2627db6dc36daa4561ef3c08b)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/deploy/master/Master.scala   |   5 +-
 .../spark/deploy/master/MasterSuite.scala     | 130 +++++++++++++++++-
 2 files changed, 131 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index 96b53c624232e..cbc5aae0b334c 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -367,7 +367,7 @@ private[deploy] class Master(
             drivers.find(_.id == driverId).foreach { driver =>
               driver.worker = Some(worker)
               driver.state = DriverState.RUNNING
-              worker.drivers(driverId) = driver
+              worker.addDriver(driver)
             }
           }
         case None =>
@@ -547,6 +547,9 @@ private[deploy] class Master(
     workers.filter(_.state == WorkerState.UNKNOWN).foreach(removeWorker)
     apps.filter(_.state == ApplicationState.UNKNOWN).foreach(finishApplication)
 
+    // Update the state of recovered apps to RUNNING
+    apps.filter(_.state == ApplicationState.WAITING).foreach(_.state = ApplicationState.RUNNING)
+
     // Reschedule drivers which were not claimed by any workers
     drivers.filter(_.worker.isEmpty).foreach { d =>
       logWarning(s"Driver ${d.id} was not found after master recovery")
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
index 539264652d7d5..4f432e4cf21c7 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
@@ -21,12 +21,15 @@ import java.util.Date
 import java.util.concurrent.ConcurrentLinkedQueue
 
 import scala.collection.JavaConverters._
+import scala.collection.mutable.{HashMap, HashSet}
 import scala.concurrent.duration._
 import scala.io.Source
 import scala.language.postfixOps
+import scala.reflect.ClassTag
 
 import org.json4s._
 import org.json4s.jackson.JsonMethods._
+import org.mockito.Mockito.{mock, when}
 import org.scalatest.{BeforeAndAfter, Matchers, PrivateMethodTester}
 import org.scalatest.concurrent.Eventually
 import other.supplier.{CustomPersistenceEngine, CustomRecoveryModeFactory}
@@ -34,7 +37,8 @@ import other.supplier.{CustomPersistenceEngine, CustomRecoveryModeFactory}
 import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
 import org.apache.spark.deploy._
 import org.apache.spark.deploy.DeployMessages._
-import org.apache.spark.rpc.{RpcAddress, RpcEndpoint, RpcEnv}
+import org.apache.spark.rpc.{RpcAddress, RpcEndpoint, RpcEndpointRef, RpcEnv}
+import org.apache.spark.serializer
 
 class MasterSuite extends SparkFunSuite
   with Matchers with Eventually with PrivateMethodTester with BeforeAndAfter {
@@ -134,6 +138,81 @@ class MasterSuite extends SparkFunSuite
     CustomRecoveryModeFactory.instantiationAttempts should be > instantiationAttempts
   }
 
+  test("master correctly recover the application") {
+    val conf = new SparkConf(loadDefaults = false)
+    conf.set("spark.deploy.recoveryMode", "CUSTOM")
+    conf.set("spark.deploy.recoveryMode.factory",
+      classOf[FakeRecoveryModeFactory].getCanonicalName)
+    conf.set("spark.master.rest.enabled", "false")
+
+    val fakeAppInfo = makeAppInfo(1024)
+    val fakeWorkerInfo = makeWorkerInfo(8192, 16)
+    val fakeDriverInfo = new DriverInfo(
+      startTime = 0,
+      id = "test_driver",
+      desc = new DriverDescription(
+        jarUrl = "",
+        mem = 1024,
+        cores = 1,
+        supervise = false,
+        command = new Command("", Nil, Map.empty, Nil, Nil, Nil)),
+      submitDate = new Date())
+
+    // Build the fake recovery data
+    FakeRecoveryModeFactory.persistentData.put(s"app_${fakeAppInfo.id}", fakeAppInfo)
+    FakeRecoveryModeFactory.persistentData.put(s"driver_${fakeDriverInfo.id}", fakeDriverInfo)
+    FakeRecoveryModeFactory.persistentData.put(s"worker_${fakeWorkerInfo.id}", fakeWorkerInfo)
+
+    var master: Master = null
+    try {
+      master = makeMaster(conf)
+      master.rpcEnv.setupEndpoint(Master.ENDPOINT_NAME, master)
+      // Wait until Master recover from checkpoint data.
+      eventually(timeout(5 seconds), interval(100 milliseconds)) {
+        master.idToApp.size should be(1)
+      }
+
+      master.idToApp.keySet should be(Set(fakeAppInfo.id))
+      getDrivers(master) should be(Set(fakeDriverInfo))
+      master.workers should be(Set(fakeWorkerInfo))
+
+      // Notify Master about the executor and driver info to make it correctly recovered.
+      val fakeExecutors = List(
+        new ExecutorDescription(fakeAppInfo.id, 0, 8, ExecutorState.RUNNING),
+        new ExecutorDescription(fakeAppInfo.id, 0, 7, ExecutorState.RUNNING))
+
+      fakeAppInfo.state should be(ApplicationState.UNKNOWN)
+      fakeWorkerInfo.coresFree should be(16)
+      fakeWorkerInfo.coresUsed should be(0)
+
+      master.self.send(MasterChangeAcknowledged(fakeAppInfo.id))
+      eventually(timeout(1 second), interval(10 milliseconds)) {
+        // Application state should be WAITING when "MasterChangeAcknowledged" event executed.
+        fakeAppInfo.state should be(ApplicationState.WAITING)
+      }
+
+      master.self.send(
+        WorkerSchedulerStateResponse(fakeWorkerInfo.id, fakeExecutors, Seq(fakeDriverInfo.id)))
+
+      eventually(timeout(5 seconds), interval(100 milliseconds)) {
+        getState(master) should be(RecoveryState.ALIVE)
+      }
+
+      // If driver's resource is also counted, free cores should 0
+      fakeWorkerInfo.coresFree should be(0)
+      fakeWorkerInfo.coresUsed should be(16)
+      // State of application should be RUNNING
+      fakeAppInfo.state should be(ApplicationState.RUNNING)
+    } finally {
+      if (master != null) {
+        master.rpcEnv.shutdown()
+        master.rpcEnv.awaitTermination()
+        master = null
+        FakeRecoveryModeFactory.persistentData.clear()
+      }
+    }
+  }
+
   test("master/worker web ui available") {
     implicit val formats = org.json4s.DefaultFormats
     val conf = new SparkConf()
@@ -394,6 +473,9 @@ class MasterSuite extends SparkFunSuite
   // ==========================================
 
   private val _scheduleExecutorsOnWorkers = PrivateMethod[Array[Int]]('scheduleExecutorsOnWorkers)
+  private val _drivers = PrivateMethod[HashSet[DriverInfo]]('drivers)
+  private val _state = PrivateMethod[RecoveryState.Value]('state)
+
   private val workerInfo = makeWorkerInfo(4096, 10)
   private val workerInfos = Array(workerInfo, workerInfo, workerInfo)
 
@@ -412,12 +494,18 @@ class MasterSuite extends SparkFunSuite
     val desc = new ApplicationDescription(
       "test", maxCores, memoryPerExecutorMb, null, "", None, None, coresPerExecutor)
     val appId = System.currentTimeMillis.toString
-    new ApplicationInfo(0, appId, desc, new Date, null, Int.MaxValue)
+    val endpointRef = mock(classOf[RpcEndpointRef])
+    val mockAddress = mock(classOf[RpcAddress])
+    when(endpointRef.address).thenReturn(mockAddress)
+    new ApplicationInfo(0, appId, desc, new Date, endpointRef, Int.MaxValue)
   }
 
   private def makeWorkerInfo(memoryMb: Int, cores: Int): WorkerInfo = {
     val workerId = System.currentTimeMillis.toString
-    new WorkerInfo(workerId, "host", 100, cores, memoryMb, null, "http://localhost:80")
+    val endpointRef = mock(classOf[RpcEndpointRef])
+    val mockAddress = mock(classOf[RpcAddress])
+    when(endpointRef.address).thenReturn(mockAddress)
+    new WorkerInfo(workerId, "host", 100, cores, memoryMb, endpointRef, "http://localhost:80")
   }
 
   private def scheduleExecutorsOnWorkers(
@@ -499,4 +587,40 @@ class MasterSuite extends SparkFunSuite
       assert(receivedMasterAddress === RpcAddress("localhost2", 10000))
     }
   }
+
+  private def getDrivers(master: Master): HashSet[DriverInfo] = {
+    master.invokePrivate(_drivers())
+  }
+
+  private def getState(master: Master): RecoveryState.Value = {
+    master.invokePrivate(_state())
+  }
+}
+
+private class FakeRecoveryModeFactory(conf: SparkConf, ser: serializer.Serializer)
+    extends StandaloneRecoveryModeFactory(conf, ser) {
+  import FakeRecoveryModeFactory.persistentData
+
+  override def createPersistenceEngine(): PersistenceEngine = new PersistenceEngine {
+
+    override def unpersist(name: String): Unit = {
+      persistentData.remove(name)
+    }
+
+    override def persist(name: String, obj: Object): Unit = {
+      persistentData(name) = obj
+    }
+
+    override def read[T: ClassTag](prefix: String): Seq[T] = {
+      persistentData.filter(_._1.startsWith(prefix)).map(_._2.asInstanceOf[T]).toSeq
+    }
+  }
+
+  override def createLeaderElectionAgent(master: LeaderElectable): LeaderElectionAgent = {
+    new MonarchyLeaderAgent(master)
+  }
+}
+
+private object FakeRecoveryModeFactory {
+  val persistentData = new HashMap[String, Object]()
 }

From 42cc830825e23addf891a1d1e9b14f0f588dd0a4 Mon Sep 17 00:00:00 2001
From: lianhuiwang <lianhuiwang09@gmail.com>
Date: Wed, 14 Jun 2017 09:57:56 +0800
Subject: [PATCH 0994/1204] [SPARK-20986][SQL] Reset table's statistics after
 PruneFileSourcePartitions rule.

## What changes were proposed in this pull request?
After PruneFileSourcePartitions rule, It needs reset table's statistics because PruneFileSourcePartitions can filter some unnecessary partitions. So the statistics need to be changed.

## How was this patch tested?
add unit test.

Author: lianhuiwang <lianhuiwang09@gmail.com>

Closes #18205 from lianhuiwang/SPARK-20986.

(cherry picked from commit 8b5b2e272f48f7ddf8aeece0205cb4a5853c364e)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../PruneFileSourcePartitions.scala           |  8 ++++--
 .../PruneFileSourcePartitionsSuite.scala      | 25 +++++++++++++++++++
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala
index 905b8683e10bd..f5df1848a38c4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.execution.datasources
 
+import org.apache.spark.sql.catalyst.catalog.CatalogStatistics
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project}
@@ -59,8 +60,11 @@ private[sql] object PruneFileSourcePartitions extends Rule[LogicalPlan] {
         val prunedFileIndex = catalogFileIndex.filterPartitions(partitionKeyFilters.toSeq)
         val prunedFsRelation =
           fsRelation.copy(location = prunedFileIndex)(sparkSession)
-        val prunedLogicalRelation = logicalRelation.copy(relation = prunedFsRelation)
-
+        // Change table stats based on the sizeInBytes of pruned files
+        val withStats = logicalRelation.catalogTable.map(_.copy(
+          stats = Some(CatalogStatistics(sizeInBytes = BigInt(prunedFileIndex.sizeInBytes)))))
+        val prunedLogicalRelation = logicalRelation.copy(
+          relation = prunedFsRelation, catalogTable = withStats)
         // Keep partition-pruning predicates so that they are visible in physical planning
         val filterExpression = filters.reduceLeft(And)
         val filter = Filter(filterExpression, prunedLogicalRelation)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneFileSourcePartitionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneFileSourcePartitionsSuite.scala
index f818e29555468..d91f25a4da013 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneFileSourcePartitionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneFileSourcePartitionsSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.hive.execution
 
 import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project}
@@ -66,4 +67,28 @@ class PruneFileSourcePartitionsSuite extends QueryTest with SQLTestUtils with Te
       }
     }
   }
+
+  test("SPARK-20986 Reset table's statistics after PruneFileSourcePartitions rule") {
+    withTable("tbl") {
+      spark.range(10).selectExpr("id", "id % 3 as p").write.partitionBy("p").saveAsTable("tbl")
+      sql(s"ANALYZE TABLE tbl COMPUTE STATISTICS")
+      val tableStats = spark.sessionState.catalog.getTableMetadata(TableIdentifier("tbl")).stats
+      assert(tableStats.isDefined && tableStats.get.sizeInBytes > 0, "tableStats is lost")
+
+      val df = sql("SELECT * FROM tbl WHERE p = 1")
+      val sizes1 = df.queryExecution.analyzed.collect {
+        case relation: LogicalRelation => relation.catalogTable.get.stats.get.sizeInBytes
+      }
+      assert(sizes1.size === 1, s"Size wrong for:\n ${df.queryExecution}")
+      assert(sizes1(0) == tableStats.get.sizeInBytes)
+
+      val relations = df.queryExecution.optimizedPlan.collect {
+        case relation: LogicalRelation => relation
+      }
+      assert(relations.size === 1, s"Size wrong for:\n ${df.queryExecution}")
+      val size2 = relations(0).computeStats(conf).sizeInBytes
+      assert(size2 == relations(0).catalogTable.get.stats.get.sizeInBytes)
+      assert(size2 < tableStats.get.sizeInBytes)
+    }
+  }
 }

From 9bdc83590922e0e1f22424904411acb0d1b37a11 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Wed, 14 Jun 2017 16:28:06 +0800
Subject: [PATCH 0995/1204] [SPARK-21085][SQL] Failed to read the partitioned
 table created by Spark 2.1

### What changes were proposed in this pull request?
Before the PR, Spark is unable to read the partitioned table created by Spark 2.1 when the table schema does not put the partitioning column at the end of the schema.
[assert(partitionFields.map(_.name) == partitionColumnNames)](https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala#L234-L236)

When reading the table metadata from the metastore, we also need to reorder the columns.

### How was this patch tested?
Added test cases to check both Hive-serde and data source tables.

Author: gatorsmile <gatorsmile@gmail.com>

Closes #18295 from gatorsmile/reorderReadSchema.

(cherry picked from commit 0c88e8d37224713199ca5661c2cd57f5918dcb9a)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/hive/HiveExternalCatalog.scala  | 31 ++++++++++++++++---
 .../sql/hive/HiveExternalCatalogSuite.scala   | 26 ++++++++++++++++
 2 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index ba48facff2933..a03beb72b520c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -717,6 +717,20 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       properties = table.properties.filterNot { case (key, _) => key.startsWith(SPARK_SQL_PREFIX) })
   }
 
+  // Reorder table schema to put partition columns at the end. Before Spark 2.2, the partition
+  // columns are not put at the end of schema. We need to reorder it when reading the schema
+  // from the table properties.
+  private def reorderSchema(schema: StructType, partColumnNames: Seq[String]): StructType = {
+    val partitionFields = partColumnNames.map { partCol =>
+      schema.find(_.name == partCol).getOrElse {
+        throw new AnalysisException("The metadata is corrupted. Unable to find the " +
+          s"partition column names from the schema. schema: ${schema.catalogString}. " +
+          s"Partition columns: ${partColumnNames.mkString("[", ", ", "]")}")
+      }
+    }
+    StructType(schema.filterNot(partitionFields.contains) ++ partitionFields)
+  }
+
   private def restoreHiveSerdeTable(table: CatalogTable): CatalogTable = {
     val hiveTable = table.copy(
       provider = Some(DDLUtils.HIVE_PROVIDER),
@@ -726,10 +740,13 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     // schema from table properties.
     if (table.properties.contains(DATASOURCE_SCHEMA_NUMPARTS)) {
       val schemaFromTableProps = getSchemaFromTableProperties(table)
-      if (DataType.equalsIgnoreCaseAndNullability(schemaFromTableProps, table.schema)) {
+      val partColumnNames = getPartitionColumnsFromTableProperties(table)
+      val reorderedSchema = reorderSchema(schema = schemaFromTableProps, partColumnNames)
+
+      if (DataType.equalsIgnoreCaseAndNullability(reorderedSchema, table.schema)) {
         hiveTable.copy(
-          schema = schemaFromTableProps,
-          partitionColumnNames = getPartitionColumnsFromTableProperties(table),
+          schema = reorderedSchema,
+          partitionColumnNames = partColumnNames,
           bucketSpec = getBucketSpecFromTableProperties(table))
       } else {
         // Hive metastore may change the table schema, e.g. schema inference. If the table
@@ -759,11 +776,15 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     }
     val partitionProvider = table.properties.get(TABLE_PARTITION_PROVIDER)
 
+    val schemaFromTableProps = getSchemaFromTableProperties(table)
+    val partColumnNames = getPartitionColumnsFromTableProperties(table)
+    val reorderedSchema = reorderSchema(schema = schemaFromTableProps, partColumnNames)
+
     table.copy(
       provider = Some(provider),
       storage = storageWithLocation,
-      schema = getSchemaFromTableProperties(table),
-      partitionColumnNames = getPartitionColumnsFromTableProperties(table),
+      schema = reorderedSchema,
+      partitionColumnNames = partColumnNames,
       bucketSpec = getBucketSpecFromTableProperties(table),
       tracksPartitionsInCatalog = partitionProvider == Some(TABLE_PARTITION_PROVIDER_CATALOG))
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
index bd54c043c6ec4..d43534d5914d1 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
@@ -63,4 +63,30 @@ class HiveExternalCatalogSuite extends ExternalCatalogSuite {
     assert(!rawTable.properties.contains(HiveExternalCatalog.DATASOURCE_PROVIDER))
     assert(DDLUtils.isHiveTable(externalCatalog.getTable("db1", "hive_tbl")))
   }
+
+  Seq("parquet", "hive").foreach { format =>
+    test(s"Partition columns should be put at the end of table schema for the format $format") {
+      val catalog = newBasicCatalog()
+      val newSchema = new StructType()
+        .add("col1", "int")
+        .add("col2", "string")
+        .add("partCol1", "int")
+        .add("partCol2", "string")
+      val table = CatalogTable(
+        identifier = TableIdentifier("tbl", Some("db1")),
+        tableType = CatalogTableType.MANAGED,
+        storage = CatalogStorageFormat.empty,
+        schema = new StructType()
+          .add("col1", "int")
+          .add("partCol1", "int")
+          .add("partCol2", "string")
+          .add("col2", "string"),
+        provider = Some(format),
+        partitionColumnNames = Seq("partCol1", "partCol2"))
+      catalog.createTable(table, ignoreIfExists = false)
+
+      val restoredTable = externalCatalog.getTable("db1", "tbl")
+      assert(restoredTable.schema == newSchema)
+    }
+  }
 }

From 626511953b87747e933e4f64b9fcd4c4776a5c4e Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Wed, 14 Jun 2017 19:18:28 +0800
Subject: [PATCH 0996/1204] [SPARK-20211][SQL][BACKPORT-2.2] Fix the Precision
 and Scale of Decimal Values when the Input is BigDecimal between -1.0 and 1.0

### What changes were proposed in this pull request?

This PR is to backport https://github.com/apache/spark/pull/18244 to 2.2

---

The precision and scale of decimal values are wrong when the input is BigDecimal between -1.0 and 1.0.

The BigDecimal's precision is the digit count starts from the leftmost nonzero digit based on the [JAVA's BigDecimal definition](https://docs.oracle.com/javase/7/docs/api/java/math/BigDecimal.html). However, our Decimal decision follows the database decimal standard, which is the total number of digits, including both to the left and the right of the decimal point. Thus, this PR is to fix the issue by doing the conversion.

Before this PR, the following queries failed:
```SQL
select 1 > 0.0001
select floor(0.0001)
select ceil(0.0001)
```

### How was this patch tested?
Added test cases.

Author: gatorsmile <gatorsmile@gmail.com>

Closes #18297 from gatorsmile/backport18244.
---
 .../org/apache/spark/sql/types/Decimal.scala  |  10 +-
 .../apache/spark/sql/types/DecimalSuite.scala |  10 ++
 .../resources/sql-tests/inputs/arithmetic.sql |  24 ++++
 .../sql-tests/results/arithmetic.sql.out      | 134 +++++++++++++++++-
 4 files changed, 176 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
index 80916ee9c5379..1f1fb51addfd8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
@@ -126,7 +126,15 @@ final class Decimal extends Ordered[Decimal] with Serializable {
   def set(decimal: BigDecimal): Decimal = {
     this.decimalVal = decimal
     this.longVal = 0L
-    this._precision = decimal.precision
+    if (decimal.precision <= decimal.scale) {
+      // For Decimal, we expect the precision is equal to or large than the scale, however,
+      // in BigDecimal, the digit count starts from the leftmost nonzero digit of the exact
+      // result. For example, the precision of 0.01 equals to 1 based on the definition, but
+      // the scale is 2. The expected precision should be 3.
+      this._precision = decimal.scale + 1
+    } else {
+      this._precision = decimal.precision
+    }
     this._scale = decimal.scale
     this
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala
index 93c231e30b49b..144f3d688d402 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala
@@ -32,6 +32,16 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester {
 
   test("creating decimals") {
     checkDecimal(new Decimal(), "0", 1, 0)
+    checkDecimal(Decimal(BigDecimal("0.09")), "0.09", 3, 2)
+    checkDecimal(Decimal(BigDecimal("0.9")), "0.9", 2, 1)
+    checkDecimal(Decimal(BigDecimal("0.90")), "0.90", 3, 2)
+    checkDecimal(Decimal(BigDecimal("0.0")), "0.0", 2, 1)
+    checkDecimal(Decimal(BigDecimal("0")), "0", 1, 0)
+    checkDecimal(Decimal(BigDecimal("1.0")), "1.0", 2, 1)
+    checkDecimal(Decimal(BigDecimal("-0.09")), "-0.09", 3, 2)
+    checkDecimal(Decimal(BigDecimal("-0.9")), "-0.9", 2, 1)
+    checkDecimal(Decimal(BigDecimal("-0.90")), "-0.90", 3, 2)
+    checkDecimal(Decimal(BigDecimal("-1.0")), "-1.0", 2, 1)
     checkDecimal(Decimal(BigDecimal("10.030")), "10.030", 5, 3)
     checkDecimal(Decimal(BigDecimal("10.030"), 4, 1), "10.0", 4, 1)
     checkDecimal(Decimal(BigDecimal("-9.95"), 4, 1), "-10.0", 4, 1)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql b/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql
index f62b10ca0037b..492a405d7ebbd 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql
@@ -32,3 +32,27 @@ select 1 - 2;
 select 2 * 5;
 select 5 % 3;
 select pmod(-7, 3);
+
+-- math functions
+select cot(1);
+select cot(null);
+select cot(0);
+select cot(-1);
+
+-- ceil and ceiling
+select ceiling(0);
+select ceiling(1);
+select ceil(1234567890123456);
+select ceiling(1234567890123456);
+select ceil(0.01);
+select ceiling(-0.10);
+
+-- floor
+select floor(0);
+select floor(1);
+select floor(1234567890123456);
+select floor(0.01);
+select floor(-0.10);
+
+-- comparison operator
+select 1 > 0.00001
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out
index ce42c016a7100..3811cd2c30986 100644
--- a/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 28
+-- Number of queries: 44
 
 
 -- !query 0
@@ -224,3 +224,135 @@ select pmod(-7, 3)
 struct<pmod(-7, 3):int>
 -- !query 27 output
 2
+
+
+-- !query 28
+select cot(1)
+-- !query 28 schema
+struct<>
+-- !query 28 output
+org.apache.spark.sql.AnalysisException
+Undefined function: 'cot'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7
+
+
+-- !query 29
+select cot(null)
+-- !query 29 schema
+struct<>
+-- !query 29 output
+org.apache.spark.sql.AnalysisException
+Undefined function: 'cot'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7
+
+
+-- !query 30
+select cot(0)
+-- !query 30 schema
+struct<>
+-- !query 30 output
+org.apache.spark.sql.AnalysisException
+Undefined function: 'cot'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7
+
+
+-- !query 31
+select cot(-1)
+-- !query 31 schema
+struct<>
+-- !query 31 output
+org.apache.spark.sql.AnalysisException
+Undefined function: 'cot'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7
+
+
+-- !query 32
+select ceiling(0)
+-- !query 32 schema
+struct<CEIL(CAST(0 AS DOUBLE)):bigint>
+-- !query 32 output
+0
+
+
+-- !query 33
+select ceiling(1)
+-- !query 33 schema
+struct<CEIL(CAST(1 AS DOUBLE)):bigint>
+-- !query 33 output
+1
+
+
+-- !query 34
+select ceil(1234567890123456)
+-- !query 34 schema
+struct<CEIL(1234567890123456):bigint>
+-- !query 34 output
+1234567890123456
+
+
+-- !query 35
+select ceiling(1234567890123456)
+-- !query 35 schema
+struct<CEIL(1234567890123456):bigint>
+-- !query 35 output
+1234567890123456
+
+
+-- !query 36
+select ceil(0.01)
+-- !query 36 schema
+struct<CEIL(0.01):decimal(1,0)>
+-- !query 36 output
+1
+
+
+-- !query 37
+select ceiling(-0.10)
+-- !query 37 schema
+struct<CEIL(-0.10):decimal(1,0)>
+-- !query 37 output
+0
+
+
+-- !query 38
+select floor(0)
+-- !query 38 schema
+struct<FLOOR(CAST(0 AS DOUBLE)):bigint>
+-- !query 38 output
+0
+
+
+-- !query 39
+select floor(1)
+-- !query 39 schema
+struct<FLOOR(CAST(1 AS DOUBLE)):bigint>
+-- !query 39 output
+1
+
+
+-- !query 40
+select floor(1234567890123456)
+-- !query 40 schema
+struct<FLOOR(1234567890123456):bigint>
+-- !query 40 output
+1234567890123456
+
+
+-- !query 41
+select floor(0.01)
+-- !query 41 schema
+struct<FLOOR(0.01):decimal(1,0)>
+-- !query 41 output
+0
+
+
+-- !query 42
+select floor(-0.10)
+-- !query 42 schema
+struct<FLOOR(-0.10):decimal(1,0)>
+-- !query 42 output
+-1
+
+
+-- !query 43
+select 1 > 0.00001
+-- !query 43 schema
+struct<(CAST(1 AS BIGINT) > 0):boolean>
+-- !query 43 output
+true

From a890466bcc600941927c7040aee409d82b7587d6 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Wed, 14 Jun 2017 19:18:28 +0800
Subject: [PATCH 0997/1204] [SPARK-20211][SQL][BACKPORT-2.2] Fix the Precision
 and Scale of Decimal Values when the Input is BigDecimal between -1.0 and 1.0

### What changes were proposed in this pull request?

This PR is to backport https://github.com/apache/spark/pull/18244 to 2.2

---

The precision and scale of decimal values are wrong when the input is BigDecimal between -1.0 and 1.0.

The BigDecimal's precision is the digit count starts from the leftmost nonzero digit based on the [JAVA's BigDecimal definition](https://docs.oracle.com/javase/7/docs/api/java/math/BigDecimal.html). However, our Decimal decision follows the database decimal standard, which is the total number of digits, including both to the left and the right of the decimal point. Thus, this PR is to fix the issue by doing the conversion.

Before this PR, the following queries failed:
```SQL
select 1 > 0.0001
select floor(0.0001)
select ceil(0.0001)
```

### How was this patch tested?
Added test cases.

Author: gatorsmile <gatorsmile@gmail.com>

Closes #18297 from gatorsmile/backport18244.

(cherry picked from commit 626511953b87747e933e4f64b9fcd4c4776a5c4e)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/sql/types/Decimal.scala  |  10 +-
 .../apache/spark/sql/types/DecimalSuite.scala |  10 ++
 .../resources/sql-tests/inputs/arithmetic.sql |  24 ++++
 .../sql-tests/results/arithmetic.sql.out      | 134 +++++++++++++++++-
 4 files changed, 176 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
index 465fb83669a76..1807fd669af73 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
@@ -125,7 +125,15 @@ final class Decimal extends Ordered[Decimal] with Serializable {
   def set(decimal: BigDecimal): Decimal = {
     this.decimalVal = decimal
     this.longVal = 0L
-    this._precision = decimal.precision
+    if (decimal.precision <= decimal.scale) {
+      // For Decimal, we expect the precision is equal to or large than the scale, however,
+      // in BigDecimal, the digit count starts from the leftmost nonzero digit of the exact
+      // result. For example, the precision of 0.01 equals to 1 based on the definition, but
+      // the scale is 2. The expected precision should be 3.
+      this._precision = decimal.scale + 1
+    } else {
+      this._precision = decimal.precision
+    }
     this._scale = decimal.scale
     this
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala
index 52d0692524d0f..6a71aca8f69a8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala
@@ -32,6 +32,16 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester {
 
   test("creating decimals") {
     checkDecimal(new Decimal(), "0", 1, 0)
+    checkDecimal(Decimal(BigDecimal("0.09")), "0.09", 3, 2)
+    checkDecimal(Decimal(BigDecimal("0.9")), "0.9", 2, 1)
+    checkDecimal(Decimal(BigDecimal("0.90")), "0.90", 3, 2)
+    checkDecimal(Decimal(BigDecimal("0.0")), "0.0", 2, 1)
+    checkDecimal(Decimal(BigDecimal("0")), "0", 1, 0)
+    checkDecimal(Decimal(BigDecimal("1.0")), "1.0", 2, 1)
+    checkDecimal(Decimal(BigDecimal("-0.09")), "-0.09", 3, 2)
+    checkDecimal(Decimal(BigDecimal("-0.9")), "-0.9", 2, 1)
+    checkDecimal(Decimal(BigDecimal("-0.90")), "-0.90", 3, 2)
+    checkDecimal(Decimal(BigDecimal("-1.0")), "-1.0", 2, 1)
     checkDecimal(Decimal(BigDecimal("10.030")), "10.030", 5, 3)
     checkDecimal(Decimal(BigDecimal("10.030"), 4, 1), "10.0", 4, 1)
     checkDecimal(Decimal(BigDecimal("-9.95"), 4, 1), "-10.0", 4, 1)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql b/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql
index f62b10ca0037b..492a405d7ebbd 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql
@@ -32,3 +32,27 @@ select 1 - 2;
 select 2 * 5;
 select 5 % 3;
 select pmod(-7, 3);
+
+-- math functions
+select cot(1);
+select cot(null);
+select cot(0);
+select cot(-1);
+
+-- ceil and ceiling
+select ceiling(0);
+select ceiling(1);
+select ceil(1234567890123456);
+select ceiling(1234567890123456);
+select ceil(0.01);
+select ceiling(-0.10);
+
+-- floor
+select floor(0);
+select floor(1);
+select floor(1234567890123456);
+select floor(0.01);
+select floor(-0.10);
+
+-- comparison operator
+select 1 > 0.00001
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out
index ce42c016a7100..3811cd2c30986 100644
--- a/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 28
+-- Number of queries: 44
 
 
 -- !query 0
@@ -224,3 +224,135 @@ select pmod(-7, 3)
 struct<pmod(-7, 3):int>
 -- !query 27 output
 2
+
+
+-- !query 28
+select cot(1)
+-- !query 28 schema
+struct<>
+-- !query 28 output
+org.apache.spark.sql.AnalysisException
+Undefined function: 'cot'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7
+
+
+-- !query 29
+select cot(null)
+-- !query 29 schema
+struct<>
+-- !query 29 output
+org.apache.spark.sql.AnalysisException
+Undefined function: 'cot'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7
+
+
+-- !query 30
+select cot(0)
+-- !query 30 schema
+struct<>
+-- !query 30 output
+org.apache.spark.sql.AnalysisException
+Undefined function: 'cot'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7
+
+
+-- !query 31
+select cot(-1)
+-- !query 31 schema
+struct<>
+-- !query 31 output
+org.apache.spark.sql.AnalysisException
+Undefined function: 'cot'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7
+
+
+-- !query 32
+select ceiling(0)
+-- !query 32 schema
+struct<CEIL(CAST(0 AS DOUBLE)):bigint>
+-- !query 32 output
+0
+
+
+-- !query 33
+select ceiling(1)
+-- !query 33 schema
+struct<CEIL(CAST(1 AS DOUBLE)):bigint>
+-- !query 33 output
+1
+
+
+-- !query 34
+select ceil(1234567890123456)
+-- !query 34 schema
+struct<CEIL(1234567890123456):bigint>
+-- !query 34 output
+1234567890123456
+
+
+-- !query 35
+select ceiling(1234567890123456)
+-- !query 35 schema
+struct<CEIL(1234567890123456):bigint>
+-- !query 35 output
+1234567890123456
+
+
+-- !query 36
+select ceil(0.01)
+-- !query 36 schema
+struct<CEIL(0.01):decimal(1,0)>
+-- !query 36 output
+1
+
+
+-- !query 37
+select ceiling(-0.10)
+-- !query 37 schema
+struct<CEIL(-0.10):decimal(1,0)>
+-- !query 37 output
+0
+
+
+-- !query 38
+select floor(0)
+-- !query 38 schema
+struct<FLOOR(CAST(0 AS DOUBLE)):bigint>
+-- !query 38 output
+0
+
+
+-- !query 39
+select floor(1)
+-- !query 39 schema
+struct<FLOOR(CAST(1 AS DOUBLE)):bigint>
+-- !query 39 output
+1
+
+
+-- !query 40
+select floor(1234567890123456)
+-- !query 40 schema
+struct<FLOOR(1234567890123456):bigint>
+-- !query 40 output
+1234567890123456
+
+
+-- !query 41
+select floor(0.01)
+-- !query 41 schema
+struct<FLOOR(0.01):decimal(1,0)>
+-- !query 41 output
+0
+
+
+-- !query 42
+select floor(-0.10)
+-- !query 42 schema
+struct<FLOOR(-0.10):decimal(1,0)>
+-- !query 42 output
+-1
+
+
+-- !query 43
+select 1 > 0.00001
+-- !query 43 schema
+struct<(CAST(1 AS BIGINT) > 0):boolean>
+-- !query 43 output
+true

From 3dda682c43daa5eb016d1c02a23cc40a443a6523 Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Wed, 14 Jun 2017 11:13:16 -0700
Subject: [PATCH 0998/1204] [SPARK-21089][SQL] Fix DESC EXTENDED/FORMATTED to
 Show Table Properties

Since both table properties and storage properties share the same key values, table properties are not shown in the output of DESC EXTENDED/FORMATTED when the storage properties are not empty.

This PR is to fix the above issue by renaming them to different keys.

Added test cases.

Author: Xiao Li <gatorsmile@gmail.com>

Closes #18294 from gatorsmile/tableProperties.

(cherry picked from commit df766a471426625fe86c8845f6261e0fc087772d)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../sql/catalyst/catalog/interface.scala      |   4 +-
 .../resources/sql-tests/inputs/describe.sql   |   3 +
 .../sql-tests/results/describe.sql.out        | 183 ++++++++++--------
 3 files changed, 104 insertions(+), 86 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index cc0cbba275b81..976d78749bfdc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -75,7 +75,7 @@ case class CatalogStorageFormat(
     CatalogUtils.maskCredentials(properties) match {
       case props if props.isEmpty => // No-op
       case props =>
-        map.put("Properties", props.map(p => p._1 + "=" + p._2).mkString("[", ", ", "]"))
+        map.put("Storage Properties", props.map(p => p._1 + "=" + p._2).mkString("[", ", ", "]"))
     }
     map
   }
@@ -313,7 +313,7 @@ case class CatalogTable(
       }
     }
 
-    if (properties.nonEmpty) map.put("Properties", tableProperties)
+    if (properties.nonEmpty) map.put("Table Properties", tableProperties)
     stats.foreach(s => map.put("Statistics", s.simpleString))
     map ++= storage.toLinkedHashMap
     if (tracksPartitionsInCatalog) map.put("Partition Provider", "Catalog")
diff --git a/sql/core/src/test/resources/sql-tests/inputs/describe.sql b/sql/core/src/test/resources/sql-tests/inputs/describe.sql
index 6de4cf0d5afa1..91b966829f8fb 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/describe.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/describe.sql
@@ -1,4 +1,5 @@
 CREATE TABLE t (a STRING, b INT, c STRING, d STRING) USING parquet
+  OPTIONS (a '1', b '2')
   PARTITIONED BY (c, d) CLUSTERED BY (a) SORTED BY (b ASC) INTO 2 BUCKETS
   COMMENT 'table_comment';
 
@@ -13,6 +14,8 @@ CREATE TEMPORARY VIEW temp_Data_Source_View
 
 CREATE VIEW v AS SELECT * FROM t;
 
+ALTER TABLE t SET TBLPROPERTIES (e = '3');
+
 ALTER TABLE t ADD PARTITION (c='Us', d=1);
 
 DESCRIBE t;
diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
index 46d32bbc52247..329532cd7c842 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
@@ -1,9 +1,10 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 31
+-- Number of queries: 32
 
 
 -- !query 0
 CREATE TABLE t (a STRING, b INT, c STRING, d STRING) USING parquet
+  OPTIONS (a '1', b '2')
   PARTITIONED BY (c, d) CLUSTERED BY (a) SORTED BY (b ASC) INTO 2 BUCKETS
   COMMENT 'table_comment'
 -- !query 0 schema
@@ -42,7 +43,7 @@ struct<>
 
 
 -- !query 4
-ALTER TABLE t ADD PARTITION (c='Us', d=1)
+ALTER TABLE t SET TBLPROPERTIES (e = '3')
 -- !query 4 schema
 struct<>
 -- !query 4 output
@@ -50,10 +51,18 @@ struct<>
 
 
 -- !query 5
-DESCRIBE t
+ALTER TABLE t ADD PARTITION (c='Us', d=1)
 -- !query 5 schema
-struct<col_name:string,data_type:string,comment:string>
+struct<>
 -- !query 5 output
+
+
+
+-- !query 6
+DESCRIBE t
+-- !query 6 schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query 6 output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -64,11 +73,11 @@ c                   	string
 d                   	string
 
 
--- !query 6
+-- !query 7
 DESC default.t
--- !query 6 schema
+-- !query 7 schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 6 output
+-- !query 7 output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -79,11 +88,11 @@ c                   	string
 d                   	string
 
 
--- !query 7
+-- !query 8
 DESC TABLE t
--- !query 7 schema
+-- !query 8 schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 7 output
+-- !query 8 output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -94,11 +103,11 @@ c                   	string
 d                   	string
 
 
--- !query 8
+-- !query 9
 DESC FORMATTED t
--- !query 8 schema
+-- !query 9 schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 8 output
+-- !query 9 output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -118,16 +127,17 @@ Provider            	parquet
 Num Buckets         	2                   	                    
 Bucket Columns      	[`a`]               	                    
 Sort Columns        	[`b`]               	                    
-Comment             	table_comment       	                    
+Table Properties    	[e=3]               	                    
 Location [not included in comparison]sql/core/spark-warehouse/t	                    
+Storage Properties  	[a=1, b=2]          	                    
 Partition Provider  	Catalog
 
 
--- !query 9
+-- !query 10
 DESC EXTENDED t
--- !query 9 schema
+-- !query 10 schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 9 output
+-- !query 10 output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -147,16 +157,17 @@ Provider            	parquet
 Num Buckets         	2                   	                    
 Bucket Columns      	[`a`]               	                    
 Sort Columns        	[`b`]               	                    
-Comment             	table_comment       	                    
+Table Properties    	[e=3]               	                    
 Location [not included in comparison]sql/core/spark-warehouse/t	                    
+Storage Properties  	[a=1, b=2]          	                    
 Partition Provider  	Catalog
 
 
--- !query 10
+-- !query 11
 DESC t PARTITION (c='Us', d=1)
--- !query 10 schema
+-- !query 11 schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 10 output
+-- !query 11 output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -167,11 +178,11 @@ c                   	string
 d                   	string
 
 
--- !query 11
+-- !query 12
 DESC EXTENDED t PARTITION (c='Us', d=1)
--- !query 11 schema
+-- !query 12 schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 11 output
+-- !query 12 output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -186,19 +197,21 @@ Database            	default
 Table               	t                   	                    
 Partition Values    	[c=Us, d=1]         	                    
 Location [not included in comparison]sql/core/spark-warehouse/t/c=Us/d=1	                    
+Storage Properties  	[a=1, b=2]          	                    
                     	                    	                    
 # Storage Information	                    	                    
 Num Buckets         	2                   	                    
 Bucket Columns      	[`a`]               	                    
 Sort Columns        	[`b`]               	                    
-Location [not included in comparison]sql/core/spark-warehouse/t
+Location [not included in comparison]sql/core/spark-warehouse/t	                    
+Storage Properties  	[a=1, b=2]
 
 
--- !query 12
+-- !query 13
 DESC FORMATTED t PARTITION (c='Us', d=1)
--- !query 12 schema
+-- !query 13 schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 12 output
+-- !query 13 output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -213,39 +226,41 @@ Database            	default
 Table               	t                   	                    
 Partition Values    	[c=Us, d=1]         	                    
 Location [not included in comparison]sql/core/spark-warehouse/t/c=Us/d=1	                    
+Storage Properties  	[a=1, b=2]          	                    
                     	                    	                    
 # Storage Information	                    	                    
 Num Buckets         	2                   	                    
 Bucket Columns      	[`a`]               	                    
 Sort Columns        	[`b`]               	                    
-Location [not included in comparison]sql/core/spark-warehouse/t
+Location [not included in comparison]sql/core/spark-warehouse/t	                    
+Storage Properties  	[a=1, b=2]
 
 
--- !query 13
+-- !query 14
 DESC t PARTITION (c='Us', d=2)
--- !query 13 schema
+-- !query 14 schema
 struct<>
--- !query 13 output
+-- !query 14 output
 org.apache.spark.sql.catalyst.analysis.NoSuchPartitionException
 Partition not found in table 't' database 'default':
 c -> Us
 d -> 2;
 
 
--- !query 14
+-- !query 15
 DESC t PARTITION (c='Us')
--- !query 14 schema
+-- !query 15 schema
 struct<>
--- !query 14 output
+-- !query 15 output
 org.apache.spark.sql.AnalysisException
 Partition spec is invalid. The spec (c) must match the partition spec (c, d) defined in table '`default`.`t`';
 
 
--- !query 15
+-- !query 16
 DESC t PARTITION (c='Us', d)
--- !query 15 schema
+-- !query 16 schema
 struct<>
--- !query 15 output
+-- !query 16 output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 PARTITION specification is incomplete: `d`(line 1, pos 0)
@@ -255,19 +270,8 @@ DESC t PARTITION (c='Us', d)
 ^^^
 
 
--- !query 16
-DESC temp_v
--- !query 16 schema
-struct<col_name:string,data_type:string,comment:string>
--- !query 16 output
-a                   	string              	                    
-b                   	int                 	                    
-c                   	string              	                    
-d                   	string
-
-
 -- !query 17
-DESC TABLE temp_v
+DESC temp_v
 -- !query 17 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 17 output
@@ -278,7 +282,7 @@ d                   	string
 
 
 -- !query 18
-DESC FORMATTED temp_v
+DESC TABLE temp_v
 -- !query 18 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 18 output
@@ -289,7 +293,7 @@ d                   	string
 
 
 -- !query 19
-DESC EXTENDED temp_v
+DESC FORMATTED temp_v
 -- !query 19 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 19 output
@@ -300,10 +304,21 @@ d                   	string
 
 
 -- !query 20
-DESC temp_Data_Source_View
+DESC EXTENDED temp_v
 -- !query 20 schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query 20 output
+a                   	string              	                    
+b                   	int                 	                    
+c                   	string              	                    
+d                   	string
+
+
+-- !query 21
+DESC temp_Data_Source_View
+-- !query 21 schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query 21 output
 intType             	int                 	test comment test1  
 stringType          	string              	                    
 dateType            	date                	                    
@@ -322,42 +337,42 @@ arrayType           	array<string>
 structType          	struct<f1:string,f2:int>
 
 
--- !query 21
+-- !query 22
 DESC temp_v PARTITION (c='Us', d=1)
--- !query 21 schema
+-- !query 22 schema
 struct<>
--- !query 21 output
+-- !query 22 output
 org.apache.spark.sql.AnalysisException
 DESC PARTITION is not allowed on a temporary view: temp_v;
 
 
--- !query 22
+-- !query 23
 DESC v
--- !query 22 schema
+-- !query 23 schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 22 output
+-- !query 23 output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
 d                   	string
 
 
--- !query 23
+-- !query 24
 DESC TABLE v
--- !query 23 schema
+-- !query 24 schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 23 output
+-- !query 24 output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
 d                   	string
 
 
--- !query 24
+-- !query 25
 DESC FORMATTED v
--- !query 24 schema
+-- !query 25 schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 24 output
+-- !query 25 output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -372,14 +387,14 @@ Type                	VIEW
 View Text           	SELECT * FROM t     	                    
 View Default Database	default             	                    
 View Query Output Columns	[a, b, c, d]        	                    
-Properties          	[view.query.out.col.3=d, view.query.out.col.0=a, view.query.out.numCols=4, view.default.database=default, view.query.out.col.1=b, view.query.out.col.2=c]
+Table Properties    	[view.query.out.col.3=d, view.query.out.col.0=a, view.query.out.numCols=4, view.default.database=default, view.query.out.col.1=b, view.query.out.col.2=c]
 
 
--- !query 25
+-- !query 26
 DESC EXTENDED v
--- !query 25 schema
+-- !query 26 schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 25 output
+-- !query 26 output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -394,28 +409,20 @@ Type                	VIEW
 View Text           	SELECT * FROM t     	                    
 View Default Database	default             	                    
 View Query Output Columns	[a, b, c, d]        	                    
-Properties          	[view.query.out.col.3=d, view.query.out.col.0=a, view.query.out.numCols=4, view.default.database=default, view.query.out.col.1=b, view.query.out.col.2=c]
-
-
--- !query 26
-DESC v PARTITION (c='Us', d=1)
--- !query 26 schema
-struct<>
--- !query 26 output
-org.apache.spark.sql.AnalysisException
-DESC PARTITION is not allowed on a view: v;
+Table Properties    	[view.query.out.col.3=d, view.query.out.col.0=a, view.query.out.numCols=4, view.default.database=default, view.query.out.col.1=b, view.query.out.col.2=c]
 
 
 -- !query 27
-DROP TABLE t
+DESC v PARTITION (c='Us', d=1)
 -- !query 27 schema
 struct<>
 -- !query 27 output
-
+org.apache.spark.sql.AnalysisException
+DESC PARTITION is not allowed on a view: v;
 
 
 -- !query 28
-DROP VIEW temp_v
+DROP TABLE t
 -- !query 28 schema
 struct<>
 -- !query 28 output
@@ -423,7 +430,7 @@ struct<>
 
 
 -- !query 29
-DROP VIEW temp_Data_Source_View
+DROP VIEW temp_v
 -- !query 29 schema
 struct<>
 -- !query 29 output
@@ -431,8 +438,16 @@ struct<>
 
 
 -- !query 30
-DROP VIEW v
+DROP VIEW temp_Data_Source_View
 -- !query 30 schema
 struct<>
 -- !query 30 output
 
+
+
+-- !query 31
+DROP VIEW v
+-- !query 31 schema
+struct<>
+-- !query 31 output
+

From e02e0637f43dddbdc0961cb2af85869f7ef9e12d Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Wed, 14 Jun 2017 11:35:14 -0700
Subject: [PATCH 0999/1204] Revert "[SPARK-20941][SQL] Fix SubqueryExec Reuse"

This reverts commit 6a4e023b250a86887475958093f1d3bdcbb49a03.
---
 .../apache/spark/sql/internal/SQLConf.scala   |  8 -----
 .../execution/basicPhysicalOperators.scala    |  3 --
 .../apache/spark/sql/execution/subquery.scala |  2 +-
 .../org/apache/spark/sql/SQLQuerySuite.scala  | 35 -------------------
 4 files changed, 1 insertion(+), 47 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 1ea9eb5ce2173..94244dd904114 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -552,12 +552,6 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
-  val SUBQUERY_REUSE_ENABLED = buildConf("spark.sql.subquery.reuse")
-    .internal()
-    .doc("When true, the planner will try to find out duplicated subqueries and re-use them.")
-    .booleanConf
-    .createWithDefault(true)
-
   val STATE_STORE_MIN_DELTAS_FOR_SNAPSHOT =
     buildConf("spark.sql.streaming.stateStore.minDeltasForSnapshot")
       .internal()
@@ -927,8 +921,6 @@ class SQLConf extends Serializable with Logging {
 
   def exchangeReuseEnabled: Boolean = getConf(EXCHANGE_REUSE_ENABLED)
 
-  def subqueryReuseEnabled: Boolean = getConf(SUBQUERY_REUSE_ENABLED)
-
   def caseSensitiveAnalysis: Boolean = getConf(SQLConf.CASE_SENSITIVE)
 
   def constraintPropagationEnabled: Boolean = getConf(CONSTRAINT_PROPAGATION_ENABLED)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index 04c130314388a..bd7a5c5d914c1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -599,9 +599,6 @@ case class OutputFakerExec(output: Seq[Attribute], child: SparkPlan) extends Spa
  */
 case class SubqueryExec(name: String, child: SparkPlan) extends UnaryExecNode {
 
-  // Ignore this wrapper for canonicalizing.
-  override lazy val canonicalized: SparkPlan = child.canonicalized
-
   override lazy val metrics = Map(
     "dataSize" -> SQLMetrics.createMetric(sparkContext, "data size (bytes)"),
     "collectTime" -> SQLMetrics.createMetric(sparkContext, "time to collect (ms)"))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
index 2abeadfe45362..d11045fb6ac8c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
@@ -156,7 +156,7 @@ case class PlanSubqueries(sparkSession: SparkSession) extends Rule[SparkPlan] {
 case class ReuseSubquery(conf: SQLConf) extends Rule[SparkPlan] {
 
   def apply(plan: SparkPlan): SparkPlan = {
-    if (!conf.subqueryReuseEnabled) {
+    if (!conf.exchangeReuseEnabled) {
       return plan
     }
     // Build a hash map using schema of subqueries to avoid O(N*N) sameResult calls.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 2e831dcdff584..cd14d24370bad 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -23,12 +23,9 @@ import java.net.{MalformedURLException, URL}
 import java.sql.Timestamp
 import java.util.concurrent.atomic.AtomicBoolean
 
-import scala.collection.mutable.ArrayBuffer
-
 import org.apache.spark.{AccumulatorSuite, SparkException}
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
 import org.apache.spark.sql.catalyst.util.StringUtils
-import org.apache.spark.sql.execution.{ScalarSubquery, SubqueryExec}
 import org.apache.spark.sql.execution.aggregate
 import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, CartesianProductExec, SortMergeJoinExec}
 import org.apache.spark.sql.functions._
@@ -711,38 +708,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
         row => Seq.fill(16)(Row.merge(row, row))).collect().toSeq)
   }
 
-  test("Verify spark.sql.subquery.reuse") {
-    Seq(true, false).foreach { reuse =>
-      withSQLConf(SQLConf.SUBQUERY_REUSE_ENABLED.key -> reuse.toString) {
-        val df = sql(
-          """
-            |SELECT key, (SELECT avg(key) FROM testData)
-            |FROM testData
-            |WHERE key > (SELECT avg(key) FROM testData)
-            |ORDER BY key
-            |LIMIT 3
-          """.stripMargin)
-
-        checkAnswer(df, Row(51, 50.5) :: Row(52, 50.5) :: Row(53, 50.5) :: Nil)
-
-        val subqueries = ArrayBuffer.empty[SubqueryExec]
-        df.queryExecution.executedPlan.transformAllExpressions {
-          case s @ ScalarSubquery(plan: SubqueryExec, _) =>
-            subqueries += plan
-            s
-        }
-
-        assert(subqueries.size == 2, "Two ScalarSubquery are expected in the plan")
-
-        if (reuse) {
-          assert(subqueries.distinct.size == 1, "Only one ScalarSubquery exists in the plan")
-        } else {
-          assert(subqueries.distinct.size == 2, "Reuse is not expected")
-        }
-      }
-    }
-  }
-
   test("cartesian product join") {
     withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
       checkAnswer(

From af4f89c9815ddbd84ed9f3917765d26efd171483 Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Thu, 15 Jun 2017 13:18:19 +0800
Subject: [PATCH 1000/1204] [SPARK-20980][SQL] Rename `wholeFile` to
 `multiLine` for both CSV and JSON

The current option name `wholeFile` is misleading for CSV users. Currently, it is not representing a record per file. Actually, one file could have multiple records. Thus, we should rename it. Now, the proposal is `multiLine`.

N/A

Author: Xiao Li <gatorsmile@gmail.com>

Closes #18202 from gatorsmile/renameCVSOption.

(cherry picked from commit 2051428173d8cd548702eb1a2e1c1ca76b8f2fd5)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 R/pkg/R/SQLContext.R                          |  6 ++---
 python/pyspark/sql/readwriter.py              | 14 +++++------
 python/pyspark/sql/streaming.py               | 14 +++++------
 python/pyspark/sql/tests.py                   |  8 +++----
 .../spark/sql/catalyst/json/JSONOptions.scala |  2 +-
 .../apache/spark/sql/DataFrameReader.scala    |  6 ++---
 .../datasources/csv/CSVDataSource.scala       |  6 ++---
 .../datasources/csv/CSVOptions.scala          |  2 +-
 .../datasources/json/JsonDataSource.scala     |  6 ++---
 .../sql/streaming/DataStreamReader.scala      |  6 ++---
 .../execution/datasources/csv/CSVSuite.scala  | 24 +++++++++----------
 .../datasources/json/JsonSuite.scala          | 14 +++++------
 12 files changed, 54 insertions(+), 54 deletions(-)

diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index f5c3a749fe0a1..e3528bc7c3135 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -334,7 +334,7 @@ setMethod("toDF", signature(x = "RDD"),
 #'
 #' Loads a JSON file, returning the result as a SparkDataFrame
 #' By default, (\href{http://jsonlines.org/}{JSON Lines text format or newline-delimited JSON}
-#' ) is supported. For JSON (one record per file), set a named property \code{wholeFile} to
+#' ) is supported. For JSON (one record per file), set a named property \code{multiLine} to
 #' \code{TRUE}.
 #' It goes through the entire dataset once to determine the schema.
 #'
@@ -348,7 +348,7 @@ setMethod("toDF", signature(x = "RDD"),
 #' sparkR.session()
 #' path <- "path/to/file.json"
 #' df <- read.json(path)
-#' df <- read.json(path, wholeFile = TRUE)
+#' df <- read.json(path, multiLine = TRUE)
 #' df <- jsonFile(path)
 #' }
 #' @name read.json
@@ -598,7 +598,7 @@ tableToDF <- function(tableName) {
 #' df1 <- read.df("path/to/file.json", source = "json")
 #' schema <- structType(structField("name", "string"),
 #'                      structField("info", "map<string,double>"))
-#' df2 <- read.df(mapTypeJsonPath, "json", schema, wholeFile = TRUE)
+#' df2 <- read.df(mapTypeJsonPath, "json", schema, multiLine = TRUE)
 #' df3 <- loadDF("data/test_table", "parquet", mergeSchema = "true")
 #' }
 #' @name read.df
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 960fb882cf901..4caad8ffc7c87 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -169,12 +169,12 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
              allowComments=None, allowUnquotedFieldNames=None, allowSingleQuotes=None,
              allowNumericLeadingZero=None, allowBackslashEscapingAnyCharacter=None,
              mode=None, columnNameOfCorruptRecord=None, dateFormat=None, timestampFormat=None,
-             wholeFile=None):
+             multiLine=None):
         """
         Loads JSON files and returns the results as a :class:`DataFrame`.
 
         `JSON Lines <http://jsonlines.org/>`_ (newline-delimited JSON) is supported by default.
-        For JSON (one record per file), set the ``wholeFile`` parameter to ``true``.
+        For JSON (one record per file), set the ``multiLine`` parameter to ``true``.
 
         If the ``schema`` parameter is not specified, this function goes
         through the input once to determine the input schema.
@@ -224,7 +224,7 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
                                 formats follow the formats at ``java.text.SimpleDateFormat``.
                                 This applies to timestamp type. If None is set, it uses the
                                 default value, ``yyyy-MM-dd'T'HH:mm:ss.SSSXXX``.
-        :param wholeFile: parse one record, which may span multiple lines, per file. If None is
+        :param multiLine: parse one record, which may span multiple lines, per file. If None is
                           set, it uses the default value, ``false``.
 
         >>> df1 = spark.read.json('python/test_support/sql/people.json')
@@ -242,7 +242,7 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
             allowSingleQuotes=allowSingleQuotes, allowNumericLeadingZero=allowNumericLeadingZero,
             allowBackslashEscapingAnyCharacter=allowBackslashEscapingAnyCharacter,
             mode=mode, columnNameOfCorruptRecord=columnNameOfCorruptRecord, dateFormat=dateFormat,
-            timestampFormat=timestampFormat, wholeFile=wholeFile)
+            timestampFormat=timestampFormat, multiLine=multiLine)
         if isinstance(path, basestring):
             path = [path]
         if type(path) == list:
@@ -316,7 +316,7 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
             ignoreTrailingWhiteSpace=None, nullValue=None, nanValue=None, positiveInf=None,
             negativeInf=None, dateFormat=None, timestampFormat=None, maxColumns=None,
             maxCharsPerColumn=None, maxMalformedLogPerPartition=None, mode=None,
-            columnNameOfCorruptRecord=None, wholeFile=None):
+            columnNameOfCorruptRecord=None, multiLine=None):
         """Loads a CSV file and returns the result as a  :class:`DataFrame`.
 
         This function will go through the input once to determine the input schema if
@@ -389,7 +389,7 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
                                           ``spark.sql.columnNameOfCorruptRecord``. If None is set,
                                           it uses the value specified in
                                           ``spark.sql.columnNameOfCorruptRecord``.
-        :param wholeFile: parse records, which may span multiple lines. If None is
+        :param multiLine: parse records, which may span multiple lines. If None is
                           set, it uses the default value, ``false``.
 
         >>> df = spark.read.csv('python/test_support/sql/ages.csv')
@@ -404,7 +404,7 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
             dateFormat=dateFormat, timestampFormat=timestampFormat, maxColumns=maxColumns,
             maxCharsPerColumn=maxCharsPerColumn,
             maxMalformedLogPerPartition=maxMalformedLogPerPartition, mode=mode,
-            columnNameOfCorruptRecord=columnNameOfCorruptRecord, wholeFile=wholeFile)
+            columnNameOfCorruptRecord=columnNameOfCorruptRecord, multiLine=multiLine)
         if isinstance(path, basestring):
             path = [path]
         return self._df(self._jreader.csv(self._spark._sc._jvm.PythonUtils.toSeq(path)))
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index 76e8c4f47d8ad..58aa2468e006d 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -401,12 +401,12 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
              allowComments=None, allowUnquotedFieldNames=None, allowSingleQuotes=None,
              allowNumericLeadingZero=None, allowBackslashEscapingAnyCharacter=None,
              mode=None, columnNameOfCorruptRecord=None, dateFormat=None, timestampFormat=None,
-             wholeFile=None):
+             multiLine=None):
         """
         Loads a JSON file stream and returns the results as a :class:`DataFrame`.
 
         `JSON Lines <http://jsonlines.org/>`_ (newline-delimited JSON) is supported by default.
-        For JSON (one record per file), set the ``wholeFile`` parameter to ``true``.
+        For JSON (one record per file), set the ``multiLine`` parameter to ``true``.
 
         If the ``schema`` parameter is not specified, this function goes
         through the input once to determine the input schema.
@@ -458,7 +458,7 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
                                 formats follow the formats at ``java.text.SimpleDateFormat``.
                                 This applies to timestamp type. If None is set, it uses the
                                 default value, ``yyyy-MM-dd'T'HH:mm:ss.SSSXXX``.
-        :param wholeFile: parse one record, which may span multiple lines, per file. If None is
+        :param multiLine: parse one record, which may span multiple lines, per file. If None is
                           set, it uses the default value, ``false``.
 
         >>> json_sdf = spark.readStream.json(tempfile.mkdtemp(), schema = sdf_schema)
@@ -473,7 +473,7 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
             allowSingleQuotes=allowSingleQuotes, allowNumericLeadingZero=allowNumericLeadingZero,
             allowBackslashEscapingAnyCharacter=allowBackslashEscapingAnyCharacter,
             mode=mode, columnNameOfCorruptRecord=columnNameOfCorruptRecord, dateFormat=dateFormat,
-            timestampFormat=timestampFormat, wholeFile=wholeFile)
+            timestampFormat=timestampFormat, multiLine=multiLine)
         if isinstance(path, basestring):
             return self._df(self._jreader.json(path))
         else:
@@ -532,7 +532,7 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
             ignoreTrailingWhiteSpace=None, nullValue=None, nanValue=None, positiveInf=None,
             negativeInf=None, dateFormat=None, timestampFormat=None, maxColumns=None,
             maxCharsPerColumn=None, maxMalformedLogPerPartition=None, mode=None,
-            columnNameOfCorruptRecord=None, wholeFile=None):
+            columnNameOfCorruptRecord=None, multiLine=None):
         """Loads a CSV file stream and returns the result as a  :class:`DataFrame`.
 
         This function will go through the input once to determine the input schema if
@@ -607,7 +607,7 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
                                           ``spark.sql.columnNameOfCorruptRecord``. If None is set,
                                           it uses the value specified in
                                           ``spark.sql.columnNameOfCorruptRecord``.
-        :param wholeFile: parse one record, which may span multiple lines. If None is
+        :param multiLine: parse one record, which may span multiple lines. If None is
                           set, it uses the default value, ``false``.
 
         >>> csv_sdf = spark.readStream.csv(tempfile.mkdtemp(), schema = sdf_schema)
@@ -624,7 +624,7 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
             dateFormat=dateFormat, timestampFormat=timestampFormat, maxColumns=maxColumns,
             maxCharsPerColumn=maxCharsPerColumn,
             maxMalformedLogPerPartition=maxMalformedLogPerPartition, mode=mode,
-            columnNameOfCorruptRecord=columnNameOfCorruptRecord, wholeFile=wholeFile)
+            columnNameOfCorruptRecord=columnNameOfCorruptRecord, multiLine=multiLine)
         if isinstance(path, basestring):
             return self._df(self._jreader.csv(path))
         else:
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index e06f62b35bc6f..20d9ca22d6850 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -442,15 +442,15 @@ def test_udf_with_order_by_and_limit(self):
         res.explain(True)
         self.assertEqual(res.collect(), [Row(id=0, copy=0)])
 
-    def test_wholefile_json(self):
+    def test_multiLine_json(self):
         people1 = self.spark.read.json("python/test_support/sql/people.json")
         people_array = self.spark.read.json("python/test_support/sql/people_array.json",
-                                            wholeFile=True)
+                                            multiLine=True)
         self.assertEqual(people1.collect(), people_array.collect())
 
-    def test_wholefile_csv(self):
+    def test_multiLine_csv(self):
         ages_newlines = self.spark.read.csv(
-            "python/test_support/sql/ages_newlines.csv", wholeFile=True)
+            "python/test_support/sql/ages_newlines.csv", multiLine=True)
         expected = [Row(_c0=u'Joe', _c1=u'20', _c2=u'Hi,\nI am Jeo'),
                     Row(_c0=u'Tom', _c1=u'30', _c2=u'My name is Tom'),
                     Row(_c0=u'Hyukjin', _c1=u'25', _c2=u'I am Hyukjin\n\nI love Spark!')]
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
index 7930515038355..1fd680ab64b5a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
@@ -81,7 +81,7 @@ private[sql] class JSONOptions(
     FastDateFormat.getInstance(
       parameters.getOrElse("timestampFormat", "yyyy-MM-dd'T'HH:mm:ss.SSSXXX"), timeZone, Locale.US)
 
-  val wholeFile = parameters.get("wholeFile").map(_.toBoolean).getOrElse(false)
+  val multiLine = parameters.get("multiLine").map(_.toBoolean).getOrElse(false)
 
   /** Sets config options on a Jackson [[JsonFactory]]. */
   def setJacksonOptions(factory: JsonFactory): Unit = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index c1b32917415ae..628a82fd23c13 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -283,7 +283,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * Loads JSON files and returns the results as a `DataFrame`.
    *
    * <a href="http://jsonlines.org/">JSON Lines</a> (newline-delimited JSON) is supported by
-   * default. For JSON (one record per file), set the `wholeFile` option to true.
+   * default. For JSON (one record per file), set the `multiLine` option to true.
    *
    * This function goes through the input once to determine the input schema. If you know the
    * schema in advance, use the version that specifies the schema to avoid the extra scan.
@@ -323,7 +323,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * <li>`timestampFormat` (default `yyyy-MM-dd'T'HH:mm:ss.SSSXXX`): sets the string that
    * indicates a timestamp format. Custom date formats follow the formats at
    * `java.text.SimpleDateFormat`. This applies to timestamp type.</li>
-   * <li>`wholeFile` (default `false`): parse one record, which may span multiple lines,
+   * <li>`multiLine` (default `false`): parse one record, which may span multiple lines,
    * per file</li>
    * </ul>
    *
@@ -525,7 +525,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * <li>`columnNameOfCorruptRecord` (default is the value specified in
    * `spark.sql.columnNameOfCorruptRecord`): allows renaming the new field having malformed string
    * created by `PERMISSIVE` mode. This overrides `spark.sql.columnNameOfCorruptRecord`.</li>
-   * <li>`wholeFile` (default `false`): parse one record, which may span multiple lines.</li>
+   * <li>`multiLine` (default `false`): parse one record, which may span multiple lines.</li>
    * </ul>
    * @since 2.0.0
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala
index 83bdf6fe224be..2de58384f9834 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala
@@ -111,8 +111,8 @@ abstract class CSVDataSource extends Serializable {
 
 object CSVDataSource {
   def apply(options: CSVOptions): CSVDataSource = {
-    if (options.wholeFile) {
-      WholeFileCSVDataSource
+    if (options.multiLine) {
+      MultiLineCSVDataSource
     } else {
       TextInputCSVDataSource
     }
@@ -196,7 +196,7 @@ object TextInputCSVDataSource extends CSVDataSource {
   }
 }
 
-object WholeFileCSVDataSource extends CSVDataSource {
+object MultiLineCSVDataSource extends CSVDataSource {
   override val isSplitable: Boolean = false
 
   override def readFile(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
index 78c16b75ee684..a13a5a34b4a84 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVOptions.scala
@@ -128,7 +128,7 @@ class CSVOptions(
     FastDateFormat.getInstance(
       parameters.getOrElse("timestampFormat", "yyyy-MM-dd'T'HH:mm:ss.SSSXXX"), timeZone, Locale.US)
 
-  val wholeFile = parameters.get("wholeFile").map(_.toBoolean).getOrElse(false)
+  val multiLine = parameters.get("multiLine").map(_.toBoolean).getOrElse(false)
 
   val maxColumns = getInt("maxColumns", 20480)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala
index 4f2963da9ace9..5a92a71d19e78 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala
@@ -86,8 +86,8 @@ abstract class JsonDataSource extends Serializable {
 
 object JsonDataSource {
   def apply(options: JSONOptions): JsonDataSource = {
-    if (options.wholeFile) {
-      WholeFileJsonDataSource
+    if (options.multiLine) {
+      MultiLineJsonDataSource
     } else {
       TextInputJsonDataSource
     }
@@ -147,7 +147,7 @@ object TextInputJsonDataSource extends JsonDataSource {
   }
 }
 
-object WholeFileJsonDataSource extends JsonDataSource {
+object MultiLineJsonDataSource extends JsonDataSource {
   override val isSplitable: Boolean = {
     false
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index 766776230257d..7e8e6394b4862 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -163,7 +163,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
    * Loads a JSON file stream and returns the results as a `DataFrame`.
    *
    * <a href="http://jsonlines.org/">JSON Lines</a> (newline-delimited JSON) is supported by
-   * default. For JSON (one record per file), set the `wholeFile` option to true.
+   * default. For JSON (one record per file), set the `multiLine` option to true.
    *
    * This function goes through the input once to determine the input schema. If you know the
    * schema in advance, use the version that specifies the schema to avoid the extra scan.
@@ -205,7 +205,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
    * <li>`timestampFormat` (default `yyyy-MM-dd'T'HH:mm:ss.SSSXXX`): sets the string that
    * indicates a timestamp format. Custom date formats follow the formats at
    * `java.text.SimpleDateFormat`. This applies to timestamp type.</li>
-   * <li>`wholeFile` (default `false`): parse one record, which may span multiple lines,
+   * <li>`multiLine` (default `false`): parse one record, which may span multiple lines,
    * per file</li>
    * </ul>
    *
@@ -276,7 +276,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
    * <li>`columnNameOfCorruptRecord` (default is the value specified in
    * `spark.sql.columnNameOfCorruptRecord`): allows renaming the new field having malformed string
    * created by `PERMISSIVE` mode. This overrides `spark.sql.columnNameOfCorruptRecord`.</li>
-   * <li>`wholeFile` (default `false`): parse one record, which may span multiple lines.</li>
+   * <li>`multiLine` (default `false`): parse one record, which may span multiple lines.</li>
    * </ul>
    *
    * @since 2.0.0
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index 352dba79a4c08..89d9b69dec7ef 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -261,10 +261,10 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
   }
 
   test("test for DROPMALFORMED parsing mode") {
-    Seq(false, true).foreach { wholeFile =>
+    Seq(false, true).foreach { multiLine =>
       val cars = spark.read
         .format("csv")
-        .option("wholeFile", wholeFile)
+        .option("multiLine", multiLine)
         .options(Map("header" -> "true", "mode" -> "dropmalformed"))
         .load(testFile(carsFile))
 
@@ -284,11 +284,11 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
   }
 
   test("test for FAILFAST parsing mode") {
-    Seq(false, true).foreach { wholeFile =>
+    Seq(false, true).foreach { multiLine =>
       val exception = intercept[SparkException] {
         spark.read
           .format("csv")
-          .option("wholeFile", wholeFile)
+          .option("multiLine", multiLine)
           .options(Map("header" -> "true", "mode" -> "failfast"))
           .load(testFile(carsFile)).collect()
       }
@@ -990,13 +990,13 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
   }
 
   test("SPARK-18699 put malformed records in a `columnNameOfCorruptRecord` field") {
-    Seq(false, true).foreach { wholeFile =>
+    Seq(false, true).foreach { multiLine =>
       val schema = new StructType().add("a", IntegerType).add("b", TimestampType)
       // We use `PERMISSIVE` mode by default if invalid string is given.
       val df1 = spark
         .read
         .option("mode", "abcd")
-        .option("wholeFile", wholeFile)
+        .option("multiLine", multiLine)
         .schema(schema)
         .csv(testFile(valueMalformedFile))
       checkAnswer(df1,
@@ -1011,7 +1011,7 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
         .read
         .option("mode", "Permissive")
         .option("columnNameOfCorruptRecord", columnNameOfCorruptRecord)
-        .option("wholeFile", wholeFile)
+        .option("multiLine", multiLine)
         .schema(schemaWithCorrField1)
         .csv(testFile(valueMalformedFile))
       checkAnswer(df2,
@@ -1028,7 +1028,7 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
         .read
         .option("mode", "permissive")
         .option("columnNameOfCorruptRecord", columnNameOfCorruptRecord)
-        .option("wholeFile", wholeFile)
+        .option("multiLine", multiLine)
         .schema(schemaWithCorrField2)
         .csv(testFile(valueMalformedFile))
       checkAnswer(df3,
@@ -1041,7 +1041,7 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
           .read
           .option("mode", "PERMISSIVE")
           .option("columnNameOfCorruptRecord", columnNameOfCorruptRecord)
-          .option("wholeFile", wholeFile)
+          .option("multiLine", multiLine)
           .schema(schema.add(columnNameOfCorruptRecord, IntegerType))
           .csv(testFile(valueMalformedFile))
           .collect
@@ -1073,7 +1073,7 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
 
       val df = spark.read
         .option("header", true)
-        .option("wholeFile", true)
+        .option("multiLine", true)
         .csv(path.getAbsolutePath)
 
       // Check if headers have new lines in the names.
@@ -1096,10 +1096,10 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
   }
 
   test("Empty file produces empty dataframe with empty schema") {
-    Seq(false, true).foreach { wholeFile =>
+    Seq(false, true).foreach { multiLine =>
       val df = spark.read.format("csv")
         .option("header", true)
-        .option("wholeFile", wholeFile)
+        .option("multiLine", multiLine)
         .load(testFile(emptyFile))
 
       assert(df.schema === spark.emptyDataFrame.schema)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 09e61dc4af60d..f8eb5c569f9ea 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -1804,7 +1804,7 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
 
       assert(new File(path).listFiles().exists(_.getName.endsWith(".gz")))
 
-      val jsonDF = spark.read.option("wholeFile", true).json(path)
+      val jsonDF = spark.read.option("multiLine", true).json(path)
       val jsonDir = new File(dir, "json").getCanonicalPath
       jsonDF.coalesce(1).write
         .option("compression", "gZiP")
@@ -1826,7 +1826,7 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
         .write
         .text(path)
 
-      val jsonDF = spark.read.option("wholeFile", true).json(path)
+      val jsonDF = spark.read.option("multiLine", true).json(path)
       val jsonDir = new File(dir, "json").getCanonicalPath
       jsonDF.coalesce(1).write.json(jsonDir)
 
@@ -1855,7 +1855,7 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
         .write
         .text(path)
 
-      val jsonDF = spark.read.option("wholeFile", true).json(path)
+      val jsonDF = spark.read.option("multiLine", true).json(path)
       // no corrupt record column should be created
       assert(jsonDF.schema === StructType(Seq()))
       // only the first object should be read
@@ -1876,7 +1876,7 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
         .write
         .text(path)
 
-      val jsonDF = spark.read.option("wholeFile", true).option("mode", "PERMISSIVE").json(path)
+      val jsonDF = spark.read.option("multiLine", true).option("mode", "PERMISSIVE").json(path)
       assert(jsonDF.count() === corruptRecordCount)
       assert(jsonDF.schema === new StructType()
         .add("_corrupt_record", StringType)
@@ -1907,7 +1907,7 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
         .write
         .text(path)
 
-      val jsonDF = spark.read.option("wholeFile", true).option("mode", "DROPMALFORMED").json(path)
+      val jsonDF = spark.read.option("multiLine", true).option("mode", "DROPMALFORMED").json(path)
       checkAnswer(jsonDF, Seq(Row("test")))
     }
   }
@@ -1930,7 +1930,7 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
       // `FAILFAST` mode should throw an exception for corrupt records.
       val exceptionOne = intercept[SparkException] {
         spark.read
-          .option("wholeFile", true)
+          .option("multiLine", true)
           .option("mode", "FAILFAST")
           .json(path)
       }
@@ -1938,7 +1938,7 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
 
       val exceptionTwo = intercept[SparkException] {
         spark.read
-          .option("wholeFile", true)
+          .option("multiLine", true)
           .option("mode", "FAILFAST")
           .schema(schema)
           .json(path)

From b5504f6d3fc375eecb131460c8b01e0be18f4e9b Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Wed, 14 Jun 2017 23:08:05 -0700
Subject: [PATCH 1001/1204] [SPARK-20980][DOCS] update doc to reflect multiLine
 change

## What changes were proposed in this pull request?

doc only change

## How was this patch tested?

manually

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #18312 from felixcheung/sqljsonwholefiledoc.

(cherry picked from commit 1bf55e396c7b995a276df61d9a4eb8e60bcee334)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 docs/sql-programming-guide.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 314ff6ef80d29..8e722ae6adca6 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -998,7 +998,7 @@ Note that the file that is offered as _a json file_ is not a typical JSON file.
 line must contain a separate, self-contained valid JSON object. For more information, please see
 [JSON Lines text format, also called newline-delimited JSON](http://jsonlines.org/).
 
-For a regular multi-line JSON file, set the `wholeFile` option to `true`.
+For a regular multi-line JSON file, set the `multiLine` option to `true`.
 
 {% include_example json_dataset scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala %}
 </div>
@@ -1012,7 +1012,7 @@ Note that the file that is offered as _a json file_ is not a typical JSON file.
 line must contain a separate, self-contained valid JSON object. For more information, please see
 [JSON Lines text format, also called newline-delimited JSON](http://jsonlines.org/).
 
-For a regular multi-line JSON file, set the `wholeFile` option to `true`.
+For a regular multi-line JSON file, set the `multiLine` option to `true`.
 
 {% include_example json_dataset java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java %}
 </div>
@@ -1025,7 +1025,7 @@ Note that the file that is offered as _a json file_ is not a typical JSON file.
 line must contain a separate, self-contained valid JSON object. For more information, please see
 [JSON Lines text format, also called newline-delimited JSON](http://jsonlines.org/).
 
-For a regular multi-line JSON file, set the `wholeFile` parameter to `True`.
+For a regular multi-line JSON file, set the `multiLine` parameter to `True`.
 
 {% include_example json_dataset python/sql/datasource.py %}
 </div>
@@ -1039,7 +1039,7 @@ Note that the file that is offered as _a json file_ is not a typical JSON file.
 line must contain a separate, self-contained valid JSON object. For more information, please see
 [JSON Lines text format, also called newline-delimited JSON](http://jsonlines.org/).
 
-For a regular multi-line JSON file, set a named parameter `wholeFile` to `TRUE`.
+For a regular multi-line JSON file, set a named parameter `multiLine` to `TRUE`.
 
 {% include_example json_dataset r/RSparkSQLExample.R %}
 

From 76ee41fd7188cece3d59a77612cba37c9179af7a Mon Sep 17 00:00:00 2001
From: Xingbo Jiang <xingbo.jiang@databricks.com>
Date: Fri, 16 Jun 2017 00:06:54 +0800
Subject: [PATCH 1002/1204] [SPARK-16251][SPARK-20200][CORE][TEST] Flaky test:
 org.apache.spark.rdd.LocalCheckpointSuite.missing checkpoint block fails with
 informative message

## What changes were proposed in this pull request?

Currently we don't wait to confirm the removal of the block from the slave's BlockManager, if the removal takes too much time, we will fail the assertion in this test case.
The failure can be easily reproduced if we sleep for a while before we remove the block in BlockManagerSlaveEndpoint.receiveAndReply().

## How was this patch tested?
N/A

Author: Xingbo Jiang <xingbo.jiang@databricks.com>

Closes #18314 from jiangxb1987/LocalCheckpointSuite.

(cherry picked from commit 7dc3e697c74864a4e3cca7342762f1427058b3c3)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scala/org/apache/spark/rdd/LocalCheckpointSuite.scala | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/core/src/test/scala/org/apache/spark/rdd/LocalCheckpointSuite.scala b/core/src/test/scala/org/apache/spark/rdd/LocalCheckpointSuite.scala
index 2802cd975292c..9e204f5cc33fe 100644
--- a/core/src/test/scala/org/apache/spark/rdd/LocalCheckpointSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/LocalCheckpointSuite.scala
@@ -17,6 +17,10 @@
 
 package org.apache.spark.rdd
 
+import scala.concurrent.duration._
+
+import org.scalatest.concurrent.Eventually.{eventually, interval, timeout}
+
 import org.apache.spark.{LocalSparkContext, SparkContext, SparkException, SparkFunSuite}
 import org.apache.spark.storage.{RDDBlockId, StorageLevel}
 
@@ -168,6 +172,10 @@ class LocalCheckpointSuite extends SparkFunSuite with LocalSparkContext {
     // Collecting the RDD should now fail with an informative exception
     val blockId = RDDBlockId(rdd.id, numPartitions - 1)
     bmm.removeBlock(blockId)
+    // Wait until the block has been removed successfully.
+    eventually(timeout(1 seconds), interval(100 milliseconds)) {
+      assert(bmm.getBlockStatus(blockId).isEmpty)
+    }
     try {
       rdd.collect()
       fail("Collect should have failed if local checkpoint block is removed...")

From 62f2b804e78bab9270ca73cfcfdc3c574835dbfc Mon Sep 17 00:00:00 2001
From: Xingbo Jiang <xingbo.jiang@databricks.com>
Date: Fri, 16 Jun 2017 00:06:54 +0800
Subject: [PATCH 1003/1204] [SPARK-16251][SPARK-20200][CORE][TEST] Flaky test:
 org.apache.spark.rdd.LocalCheckpointSuite.missing checkpoint block fails with
 informative message

## What changes were proposed in this pull request?

Currently we don't wait to confirm the removal of the block from the slave's BlockManager, if the removal takes too much time, we will fail the assertion in this test case.
The failure can be easily reproduced if we sleep for a while before we remove the block in BlockManagerSlaveEndpoint.receiveAndReply().

## How was this patch tested?
N/A

Author: Xingbo Jiang <xingbo.jiang@databricks.com>

Closes #18314 from jiangxb1987/LocalCheckpointSuite.

(cherry picked from commit 7dc3e697c74864a4e3cca7342762f1427058b3c3)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scala/org/apache/spark/rdd/LocalCheckpointSuite.scala | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/core/src/test/scala/org/apache/spark/rdd/LocalCheckpointSuite.scala b/core/src/test/scala/org/apache/spark/rdd/LocalCheckpointSuite.scala
index 2802cd975292c..9e204f5cc33fe 100644
--- a/core/src/test/scala/org/apache/spark/rdd/LocalCheckpointSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/LocalCheckpointSuite.scala
@@ -17,6 +17,10 @@
 
 package org.apache.spark.rdd
 
+import scala.concurrent.duration._
+
+import org.scalatest.concurrent.Eventually.{eventually, interval, timeout}
+
 import org.apache.spark.{LocalSparkContext, SparkContext, SparkException, SparkFunSuite}
 import org.apache.spark.storage.{RDDBlockId, StorageLevel}
 
@@ -168,6 +172,10 @@ class LocalCheckpointSuite extends SparkFunSuite with LocalSparkContext {
     // Collecting the RDD should now fail with an informative exception
     val blockId = RDDBlockId(rdd.id, numPartitions - 1)
     bmm.removeBlock(blockId)
+    // Wait until the block has been removed successfully.
+    eventually(timeout(1 seconds), interval(100 milliseconds)) {
+      assert(bmm.getBlockStatus(blockId).isEmpty)
+    }
     try {
       rdd.collect()
       fail("Collect should have failed if local checkpoint block is removed...")

From a585c870a066fa94d97462cefbaa4057a7a0ed44 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Thu, 15 Jun 2017 18:25:39 -0700
Subject: [PATCH 1004/1204] [SPARK-21111][TEST][2.2] Fix the test failure of
 describe.sql

## What changes were proposed in this pull request?
Test failed in `describe.sql`.

We need to fix the related bug introduced in (https://github.com/apache/spark/pull/17649) in the follow-up PR to master.

## How was this patch tested?
N/A

Author: gatorsmile <gatorsmile@gmail.com>

Closes #18316 from gatorsmile/fix.
---
 sql/core/src/test/resources/sql-tests/results/describe.sql.out | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
index 329532cd7c842..ab9f2783f06bb 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
@@ -127,6 +127,7 @@ Provider            	parquet
 Num Buckets         	2                   	                    
 Bucket Columns      	[`a`]               	                    
 Sort Columns        	[`b`]               	                    
+Comment             	table_comment       	                    
 Table Properties    	[e=3]               	                    
 Location [not included in comparison]sql/core/spark-warehouse/t	                    
 Storage Properties  	[a=1, b=2]          	                    
@@ -157,6 +158,7 @@ Provider            	parquet
 Num Buckets         	2                   	                    
 Bucket Columns      	[`a`]               	                    
 Sort Columns        	[`b`]               	                    
+Comment             	table_comment       	                    
 Table Properties    	[e=3]               	                    
 Location [not included in comparison]sql/core/spark-warehouse/t	                    
 Storage Properties  	[a=1, b=2]          	                    

From 9909be3681fcdfd3532c4b4997b3433d23018efb Mon Sep 17 00:00:00 2001
From: Xianyang Liu <xianyang.liu@intel.com>
Date: Fri, 16 Jun 2017 12:10:09 +0800
Subject: [PATCH 1005/1204] [SPARK-21072][SQL] TreeNode.mapChildren should only
 apply to the children node.

## What changes were proposed in this pull request?

Just as the function name and comments of `TreeNode.mapChildren` mentioned, the function should be apply to all currently node children. So, the follow code should judge whether it is the children node.

https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala#L342

## How was this patch tested?

Existing tests.

Author: Xianyang Liu <xianyang.liu@intel.com>

Closes #18284 from ConeyLiu/treenode.

(cherry picked from commit 87ab0cec65b50584a627037b9d1b6fdecaee725c)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/trees/TreeNode.scala   | 14 +++++++++++--
 .../sql/catalyst/trees/TreeNodeSuite.scala    | 21 ++++++++++++++++++-
 2 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 2109c1c23b706..ae5c513eb040b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -340,8 +340,18 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
               arg
             }
           case tuple@(arg1: TreeNode[_], arg2: TreeNode[_]) =>
-            val newChild1 = f(arg1.asInstanceOf[BaseType])
-            val newChild2 = f(arg2.asInstanceOf[BaseType])
+            val newChild1 = if (containsChild(arg1)) {
+              f(arg1.asInstanceOf[BaseType])
+            } else {
+              arg1.asInstanceOf[BaseType]
+            }
+
+            val newChild2 = if (containsChild(arg2)) {
+              f(arg2.asInstanceOf[BaseType])
+            } else {
+              arg2.asInstanceOf[BaseType]
+            }
+
             if (!(newChild1 fastEquals arg1) || !(newChild2 fastEquals arg2)) {
               changed = true
               (newChild1, newChild2)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
index 37e3dfabd0b21..06ef7bcee0d84 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
@@ -54,13 +54,21 @@ case class ComplexPlan(exprs: Seq[Seq[Expression]])
   override def output: Seq[Attribute] = Nil
 }
 
-case class ExpressionInMap(map: Map[String, Expression]) extends Expression with Unevaluable {
+case class ExpressionInMap(map: Map[String, Expression]) extends Unevaluable {
   override def children: Seq[Expression] = map.values.toSeq
   override def nullable: Boolean = true
   override def dataType: NullType = NullType
   override lazy val resolved = true
 }
 
+case class SeqTupleExpression(sons: Seq[(Expression, Expression)],
+    nonSons: Seq[(Expression, Expression)]) extends Unevaluable {
+  override def children: Seq[Expression] = sons.flatMap(t => Iterator(t._1, t._2))
+  override def nullable: Boolean = true
+  override def dataType: NullType = NullType
+  override lazy val resolved = true
+}
+
 case class JsonTestTreeNode(arg: Any) extends LeafNode {
   override def output: Seq[Attribute] = Seq.empty[Attribute]
 }
@@ -146,6 +154,17 @@ class TreeNodeSuite extends SparkFunSuite {
     assert(actual === Dummy(None))
   }
 
+  test("mapChildren should only works on children") {
+    val children = Seq((Literal(1), Literal(2)))
+    val nonChildren = Seq((Literal(3), Literal(4)))
+    val before = SeqTupleExpression(children, nonChildren)
+    val toZero: PartialFunction[Expression, Expression] = { case Literal(_, _) => Literal(0) }
+    val expect = SeqTupleExpression(Seq((Literal(0), Literal(0))), nonChildren)
+
+    val actual = before mapChildren toZero
+    assert(actual === expect)
+  }
+
   test("preserves origin") {
     CurrentOrigin.setPosition(1, 1)
     val add = Add(Literal(1), Literal(1))

From 915a2010448cf012527ccefe4430c6b0deb8f49c Mon Sep 17 00:00:00 2001
From: Xianyang Liu <xianyang.liu@intel.com>
Date: Fri, 16 Jun 2017 12:10:09 +0800
Subject: [PATCH 1006/1204] [SPARK-21072][SQL] TreeNode.mapChildren should only
 apply to the children node.

## What changes were proposed in this pull request?

Just as the function name and comments of `TreeNode.mapChildren` mentioned, the function should be apply to all currently node children. So, the follow code should judge whether it is the children node.

https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala#L342

## How was this patch tested?

Existing tests.

Author: Xianyang Liu <xianyang.liu@intel.com>

Closes #18284 from ConeyLiu/treenode.

(cherry picked from commit 87ab0cec65b50584a627037b9d1b6fdecaee725c)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/trees/TreeNode.scala   | 14 +++++++++++--
 .../sql/catalyst/trees/TreeNodeSuite.scala    | 21 ++++++++++++++++++-
 2 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 26d13ba3bca0b..238eeccefde40 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -341,8 +341,18 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
               arg
             }
           case tuple@(arg1: TreeNode[_], arg2: TreeNode[_]) =>
-            val newChild1 = f(arg1.asInstanceOf[BaseType])
-            val newChild2 = f(arg2.asInstanceOf[BaseType])
+            val newChild1 = if (containsChild(arg1)) {
+              f(arg1.asInstanceOf[BaseType])
+            } else {
+              arg1.asInstanceOf[BaseType]
+            }
+
+            val newChild2 = if (containsChild(arg2)) {
+              f(arg2.asInstanceOf[BaseType])
+            } else {
+              arg2.asInstanceOf[BaseType]
+            }
+
             if (!(newChild1 fastEquals arg1) || !(newChild2 fastEquals arg2)) {
               changed = true
               (newChild1, newChild2)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
index 37e3dfabd0b21..06ef7bcee0d84 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
@@ -54,13 +54,21 @@ case class ComplexPlan(exprs: Seq[Seq[Expression]])
   override def output: Seq[Attribute] = Nil
 }
 
-case class ExpressionInMap(map: Map[String, Expression]) extends Expression with Unevaluable {
+case class ExpressionInMap(map: Map[String, Expression]) extends Unevaluable {
   override def children: Seq[Expression] = map.values.toSeq
   override def nullable: Boolean = true
   override def dataType: NullType = NullType
   override lazy val resolved = true
 }
 
+case class SeqTupleExpression(sons: Seq[(Expression, Expression)],
+    nonSons: Seq[(Expression, Expression)]) extends Unevaluable {
+  override def children: Seq[Expression] = sons.flatMap(t => Iterator(t._1, t._2))
+  override def nullable: Boolean = true
+  override def dataType: NullType = NullType
+  override lazy val resolved = true
+}
+
 case class JsonTestTreeNode(arg: Any) extends LeafNode {
   override def output: Seq[Attribute] = Seq.empty[Attribute]
 }
@@ -146,6 +154,17 @@ class TreeNodeSuite extends SparkFunSuite {
     assert(actual === Dummy(None))
   }
 
+  test("mapChildren should only works on children") {
+    val children = Seq((Literal(1), Literal(2)))
+    val nonChildren = Seq((Literal(3), Literal(4)))
+    val before = SeqTupleExpression(children, nonChildren)
+    val toZero: PartialFunction[Expression, Expression] = { case Literal(_, _) => Literal(0) }
+    val expect = SeqTupleExpression(Seq((Literal(0), Literal(0))), nonChildren)
+
+    val actual = before mapChildren toZero
+    assert(actual === expect)
+  }
+
   test("preserves origin") {
     CurrentOrigin.setPosition(1, 1)
     val add = Add(Literal(1), Literal(1))

From 0ebb3b843aaaaa7eb97aac953646a6955fe10a1c Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Fri, 16 Jun 2017 12:19:55 +0800
Subject: [PATCH 1007/1204] [SPARK-21114][TEST][2.1] Fix test failure in Spark
 2.1/2.0 due to name mismatch

## What changes were proposed in this pull request?
Name mismatch between 2.1/2.0 and 2.2. Thus, the test cases failed after we backport a fix to 2.1/2.0. This PR is to fix the issue.

https://amplab.cs.berkeley.edu/jenkins/view/Spark%20QA%20Test/job/spark-branch-2.1-test-maven-hadoop-2.7/lastCompletedBuild/testReport/org.apache.spark.sql/SQLQueryTestSuite/arithmetic_sql/

https://amplab.cs.berkeley.edu/jenkins/view/Spark%20QA%20Test/job/spark-branch-2.0-test-maven-hadoop-2.2/lastCompletedBuild/testReport/org.apache.spark.sql/SQLQueryTestSuite/arithmetic_sql/

## How was this patch tested?
N/A

Author: gatorsmile <gatorsmile@gmail.com>

Closes #18319 from gatorsmile/fixDecimal.
---
 .../src/test/resources/sql-tests/results/arithmetic.sql.out | 6 +++---
 .../src/test/resources/sql-tests/results/inner-join.sql.out | 3 +--
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out
index 3811cd2c30986..5418d47ba0c08 100644
--- a/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out
@@ -281,7 +281,7 @@ struct<CEIL(CAST(1 AS DOUBLE)):bigint>
 -- !query 34
 select ceil(1234567890123456)
 -- !query 34 schema
-struct<CEIL(1234567890123456):bigint>
+struct<CEIL(CAST(1234567890123456 AS DOUBLE)):bigint>
 -- !query 34 output
 1234567890123456
 
@@ -289,7 +289,7 @@ struct<CEIL(1234567890123456):bigint>
 -- !query 35
 select ceiling(1234567890123456)
 -- !query 35 schema
-struct<CEIL(1234567890123456):bigint>
+struct<CEIL(CAST(1234567890123456 AS DOUBLE)):bigint>
 -- !query 35 output
 1234567890123456
 
@@ -329,7 +329,7 @@ struct<FLOOR(CAST(1 AS DOUBLE)):bigint>
 -- !query 40
 select floor(1234567890123456)
 -- !query 40 schema
-struct<FLOOR(1234567890123456):bigint>
+struct<FLOOR(CAST(1234567890123456 AS DOUBLE)):bigint>
 -- !query 40 output
 1234567890123456
 
diff --git a/sql/core/src/test/resources/sql-tests/results/inner-join.sql.out b/sql/core/src/test/resources/sql-tests/results/inner-join.sql.out
index aa20537d449e3..8d56ebe9fd3b4 100644
--- a/sql/core/src/test/resources/sql-tests/results/inner-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/inner-join.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 13
+-- Number of queries: 7
 
 
 -- !query 0
@@ -65,4 +65,3 @@ struct<a:int,tag:string>
 1	a
 1	b
 1	b
-

From 653e6f12821164a30f40404b197f472ba7ae8b89 Mon Sep 17 00:00:00 2001
From: jerryshao <sshao@hortonworks.com>
Date: Fri, 16 Jun 2017 14:24:15 +0800
Subject: [PATCH 1008/1204] [SPARK-12552][FOLLOWUP] Fix flaky test for
 "o.a.s.deploy.master.MasterSuite.master correctly recover the application"

## What changes were proposed in this pull request?

Due to the RPC asynchronous event processing, The test "correctly recover the application" could potentially be failed. The issue could be found in here: https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/78126/testReport/org.apache.spark.deploy.master/MasterSuite/master_correctly_recover_the_application/.

So here fixing this flaky test.

## How was this patch tested?

Existing UT.

CC cloud-fan jiangxb1987 , please help to review, thanks!

Author: jerryshao <sshao@hortonworks.com>

Closes #18321 from jerryshao/SPARK-12552-followup.

(cherry picked from commit 2837b14cdc42f096dce07e383caa30c7469c5d6b)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../test/scala/org/apache/spark/deploy/master/MasterSuite.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
index 4f432e4cf21c7..7f88aa6d2ece3 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
@@ -169,7 +169,7 @@ class MasterSuite extends SparkFunSuite
       master.rpcEnv.setupEndpoint(Master.ENDPOINT_NAME, master)
       // Wait until Master recover from checkpoint data.
       eventually(timeout(5 seconds), interval(100 milliseconds)) {
-        master.idToApp.size should be(1)
+        master.workers.size should be(1)
       }
 
       master.idToApp.keySet should be(Set(fakeAppInfo.id))

From d3deeb35bde3adc6b26281f03adb173dcc4c4022 Mon Sep 17 00:00:00 2001
From: Yuming Wang <wgyumg@gmail.com>
Date: Fri, 16 Jun 2017 11:03:54 +0100
Subject: [PATCH 1009/1204] [MINOR][DOCS] Improve Running R Tests docs

## What changes were proposed in this pull request?

Update Running R Tests dependence packages to:
```bash
R -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'e1071', 'survival'), repos='http://cran.us.r-project.org')"
```

## How was this patch tested?
manual tests

Author: Yuming Wang <wgyumg@gmail.com>

Closes #18271 from wangyum/building-spark.

(cherry picked from commit 45824fb608930eb461e7df53bb678c9534c183a9)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 R/README.md            | 6 +-----
 R/WINDOWS.md           | 3 +--
 docs/building-spark.md | 8 +++++---
 3 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/R/README.md b/R/README.md
index 4c40c5963db70..1152b1e8e5f9f 100644
--- a/R/README.md
+++ b/R/README.md
@@ -66,11 +66,7 @@ To run one of them, use `./bin/spark-submit <filename> <args>`. For example:
 ```bash
 ./bin/spark-submit examples/src/main/r/dataframe.R
 ```
-You can also run the unit tests for SparkR by running. You need to install the [testthat](http://cran.r-project.org/web/packages/testthat/index.html) package first:
-```bash
-R -e 'install.packages("testthat", repos="http://cran.us.r-project.org")'
-./R/run-tests.sh
-```
+You can run R unit tests by following the instructions under [Running R Tests](http://spark.apache.org/docs/latest/building-spark.html#running-r-tests).
 
 ### Running on YARN
 
diff --git a/R/WINDOWS.md b/R/WINDOWS.md
index 9ca7e58e20cd2..124bc631be9cd 100644
--- a/R/WINDOWS.md
+++ b/R/WINDOWS.md
@@ -34,10 +34,9 @@ To run the SparkR unit tests on Windows, the following steps are required —ass
 
 4. Set the environment variable `HADOOP_HOME` to the full path to the newly created `hadoop` directory.
 
-5. Run unit tests for SparkR by running the command below. You need to install the [testthat](http://cran.r-project.org/web/packages/testthat/index.html) package first:
+5. Run unit tests for SparkR by running the command below. You need to install the needed packages following the instructions under [Running R Tests](http://spark.apache.org/docs/latest/building-spark.html#running-r-tests) first:
 
     ```
-    R -e "install.packages('testthat', repos='http://cran.us.r-project.org')"
     .\bin\spark-submit2.cmd --conf spark.hadoop.fs.defaultFS="file:///" R\pkg\tests\run-all.R
     ```
 
diff --git a/docs/building-spark.md b/docs/building-spark.md
index 0f551bc66b8c9..777635a64f83c 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -218,9 +218,11 @@ The run-tests script also can be limited to a specific Python version or a speci
 
 ## Running R Tests
 
-To run the SparkR tests you will need to install the R package `testthat`
-(run `install.packages(testthat)` from R shell).  You can run just the SparkR tests using
-the command:
+To run the SparkR tests you will need to install the [knitr](https://cran.r-project.org/package=knitr), [rmarkdown](https://cran.r-project.org/package=rmarkdown), [testthat](https://cran.r-project.org/package=testthat), [e1071](https://cran.r-project.org/package=e1071) and [survival](https://cran.r-project.org/package=survival) packages first:
+
+    R -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'e1071', 'survival'), repos='http://cran.us.r-project.org')"
+
+You can run just the SparkR tests using the command:
 
     ./R/run-tests.sh
 

From 8747f8ef0567ed724367ce74cc312c69e839e0b2 Mon Sep 17 00:00:00 2001
From: liuzhaokun <liu.zhaokun@zte.com.cn>
Date: Sun, 18 Jun 2017 08:32:29 +0100
Subject: [PATCH 1010/1204] [SPARK-21126] The configuration which named
 "spark.core.connection.auth.wait.timeout" hasn't been used in spark

[https://issues.apache.org/jira/browse/SPARK-21126](https://issues.apache.org/jira/browse/SPARK-21126)
The configuration which named "spark.core.connection.auth.wait.timeout" hasn't been used in spark,so I think it should be removed from configuration.md.

Author: liuzhaokun <liu.zhaokun@zte.com.cn>

Closes #18333 from liu-zhaokun/new3.

(cherry picked from commit 0d8604bb849b3370cc21966cdd773238f3a29f84)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/configuration.md | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index 0771e36f80b50..8d5d55ca0429b 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1765,14 +1765,6 @@ Apart from these, the following properties are also available, and may be useful
     you can set larger value.
   </td>
 </tr>
-<tr>
-  <td><code>spark.core.connection.auth.wait.timeout</code></td>
-  <td>30s</td>
-  <td>
-    How long for the connection to wait for authentication to occur before timing
-    out and giving up.
-  </td>
-</tr>
 <tr>
   <td><code>spark.modify.acls</code></td>
   <td>Empty</td>

From c0d4acce50f45d9063b736a61f063ae9a0127c3f Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Sun, 18 Jun 2017 08:43:47 +0100
Subject: [PATCH 1011/1204] [MINOR][R] Add knitr and rmarkdown packages/improve
 output for version info in AppVeyor tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

This PR proposes three things as below:

**Install packages per documentation** - this does not affect the tests itself (but CRAN which we are not doing via AppVeyor) up to my knowledge.

This adds `knitr` and `rmarkdown` per https://github.com/apache/spark/blob/45824fb608930eb461e7df53bb678c9534c183a9/R/WINDOWS.md#unit-tests (please see https://github.com/apache/spark/commit/45824fb608930eb461e7df53bb678c9534c183a9)

**Improve logs/shorten logs** - actually, long logs can be a problem on AppVeyor (e.g., see https://github.com/apache/spark/pull/17873)

`R -e ...` repeats printing R information for each invocation as below:

```
R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
Copyright (C) 2016 The R Foundation for Statistical Computing
Platform: i386-w64-mingw32/i386 (32-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

  Natural language support but running in an English locale

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
```

It looks reducing the call might be slightly better and print out the versions together looks more readable.

Before:

```
# R information ...
> packageVersion('testthat')
[1] '1.0.2'
>
>

# R information ...
> packageVersion('e1071')
[1] '1.6.8'
>
>
... 3 more times
```

After:

```
# R information ...
> packageVersion('knitr'); packageVersion('rmarkdown'); packageVersion('testthat'); packageVersion('e1071'); packageVersion('survival')
[1] ‘1.16’
[1] ‘1.6’
[1] ‘1.0.2’
[1] ‘1.6.8’
[1] ‘2.41.3’
```

**Add`appveyor.yml`/`dev/appveyor-install-dependencies.ps1` for triggering the test**

Changing this file might break the test, e.g., https://github.com/apache/spark/pull/16927

## How was this patch tested?

Before (please see https://ci.appveyor.com/project/HyukjinKwon/spark/build/169-master)
After (please see the AppVeyor build in this PR):

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #18336 from HyukjinKwon/minor-add-knitr-and-rmarkdown.

(cherry picked from commit 75a6d05853fea13f88e3c941b1959b24e4640824)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 appveyor.yml | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/appveyor.yml b/appveyor.yml
index f4d13b8515cd0..49e09eadee5da 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -26,6 +26,8 @@ branches:
 
 only_commits:
   files:
+    - appveyor.yml
+    - dev/appveyor-install-dependencies.ps1
     - R/
     - sql/core/src/main/scala/org/apache/spark/sql/api/r/
     - core/src/main/scala/org/apache/spark/api/r/
@@ -38,12 +40,8 @@ install:
   # Install maven and dependencies
   - ps: .\dev\appveyor-install-dependencies.ps1
   # Required package for R unit tests
-  - cmd: R -e "install.packages('testthat', repos='http://cran.us.r-project.org')"
-  - cmd: R -e "packageVersion('testthat')"
-  - cmd: R -e "install.packages('e1071', repos='http://cran.us.r-project.org')"
-  - cmd: R -e "packageVersion('e1071')"
-  - cmd: R -e "install.packages('survival', repos='http://cran.us.r-project.org')"
-  - cmd: R -e "packageVersion('survival')"
+  - cmd: R -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'e1071', 'survival'), repos='http://cran.us.r-project.org')"
+  - cmd: R -e "packageVersion('knitr'); packageVersion('rmarkdown'); packageVersion('testthat'); packageVersion('e1071'); packageVersion('survival')"
 
 build_script:
   - cmd: mvn -DskipTests -Psparkr -Phive -Phive-thriftserver package

From d3c79b77ab6df5f20d8fc07db089051a064ecb16 Mon Sep 17 00:00:00 2001
From: liuxian <liu.xian3@zte.com.cn>
Date: Mon, 19 Jun 2017 11:46:58 +0800
Subject: [PATCH 1012/1204] [SPARK-21090][CORE] Optimize the unified memory
 manager code

## What changes were proposed in this pull request?
1.In `acquireStorageMemory`, when the Memory Mode is OFF_HEAP ,the `maxOffHeapMemory` should be modified to `maxOffHeapStorageMemory`. after this PR,it will same as ON_HEAP Memory Mode.
Because when acquire memory is between `maxOffHeapStorageMemory` and `maxOffHeapMemory`,it will fail surely, so if acquire memory is greater than  `maxOffHeapStorageMemory`(not greater than `maxOffHeapMemory`),we should fail fast.
2. Borrow memory from execution, `numBytes` modified to `numBytes - storagePool.memoryFree` will be more reasonable.
Because we just acquire `(numBytes - storagePool.memoryFree)`, unnecessary borrowed `numBytes` from execution

## How was this patch tested?
added unit test case

Author: liuxian <liu.xian3@zte.com.cn>

Closes #18296 from 10110346/wip-lx-0614.

(cherry picked from commit 112bd9bfc5b9729f6f86518998b5d80c5e79fe5e)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/memory/UnifiedMemoryManager.scala   |  5 +--
 .../spark/memory/MemoryManagerSuite.scala     |  2 +-
 .../memory/UnifiedMemoryManagerSuite.scala    | 32 +++++++++++++++++++
 3 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala b/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala
index fea2808218a53..df193552bed3c 100644
--- a/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala
+++ b/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala
@@ -160,7 +160,7 @@ private[spark] class UnifiedMemoryManager private[memory] (
       case MemoryMode.OFF_HEAP => (
         offHeapExecutionMemoryPool,
         offHeapStorageMemoryPool,
-        maxOffHeapMemory)
+        maxOffHeapStorageMemory)
     }
     if (numBytes > maxMemory) {
       // Fail fast if the block simply won't fit
@@ -171,7 +171,8 @@ private[spark] class UnifiedMemoryManager private[memory] (
     if (numBytes > storagePool.memoryFree) {
       // There is not enough free memory in the storage pool, so try to borrow free memory from
       // the execution pool.
-      val memoryBorrowedFromExecution = Math.min(executionPool.memoryFree, numBytes)
+      val memoryBorrowedFromExecution = Math.min(executionPool.memoryFree,
+        numBytes - storagePool.memoryFree)
       executionPool.decrementPoolSize(memoryBorrowedFromExecution)
       storagePool.incrementPoolSize(memoryBorrowedFromExecution)
     }
diff --git a/core/src/test/scala/org/apache/spark/memory/MemoryManagerSuite.scala b/core/src/test/scala/org/apache/spark/memory/MemoryManagerSuite.scala
index eb2b3ffd1509a..85eeb5055ae03 100644
--- a/core/src/test/scala/org/apache/spark/memory/MemoryManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/memory/MemoryManagerSuite.scala
@@ -117,7 +117,7 @@ private[memory] trait MemoryManagerSuite extends SparkFunSuite with BeforeAndAft
         evictBlocksToFreeSpaceCalled.set(numBytesToFree)
         if (numBytesToFree <= mm.storageMemoryUsed) {
           // We can evict enough blocks to fulfill the request for space
-          mm.releaseStorageMemory(numBytesToFree, MemoryMode.ON_HEAP)
+          mm.releaseStorageMemory(numBytesToFree, mm.tungstenMemoryMode)
           evictedBlocks += Tuple2(null, BlockStatus(StorageLevel.MEMORY_ONLY, numBytesToFree, 0L))
           numBytesToFree
         } else {
diff --git a/core/src/test/scala/org/apache/spark/memory/UnifiedMemoryManagerSuite.scala b/core/src/test/scala/org/apache/spark/memory/UnifiedMemoryManagerSuite.scala
index c821054412d7d..02b04cdbb2a5f 100644
--- a/core/src/test/scala/org/apache/spark/memory/UnifiedMemoryManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/memory/UnifiedMemoryManagerSuite.scala
@@ -303,4 +303,36 @@ class UnifiedMemoryManagerSuite extends MemoryManagerSuite with PrivateMethodTes
     mm.invokePrivate[Unit](assertInvariants())
   }
 
+  test("not enough free memory in the storage pool --OFF_HEAP") {
+    val conf = new SparkConf()
+      .set("spark.memory.offHeap.size", "1000")
+      .set("spark.testing.memory", "1000")
+      .set("spark.memory.offHeap.enabled", "true")
+    val taskAttemptId = 0L
+    val mm = UnifiedMemoryManager(conf, numCores = 1)
+    val ms = makeMemoryStore(mm)
+    val memoryMode = MemoryMode.OFF_HEAP
+
+    assert(mm.acquireExecutionMemory(400L, taskAttemptId, memoryMode) === 400L)
+    assert(mm.storageMemoryUsed === 0L)
+    assert(mm.executionMemoryUsed === 400L)
+
+    // Fail fast
+    assert(!mm.acquireStorageMemory(dummyBlock, 700L, memoryMode))
+    assert(mm.storageMemoryUsed === 0L)
+
+    assert(mm.acquireStorageMemory(dummyBlock, 100L, memoryMode))
+    assert(mm.storageMemoryUsed === 100L)
+    assertEvictBlocksToFreeSpaceNotCalled(ms)
+
+    // Borrow 50 from execution memory
+    assert(mm.acquireStorageMemory(dummyBlock, 450L, memoryMode))
+    assertEvictBlocksToFreeSpaceNotCalled(ms)
+    assert(mm.storageMemoryUsed === 550L)
+
+    // Borrow 50 from execution memory and evict 50 to free space
+    assert(mm.acquireStorageMemory(dummyBlock, 100L, memoryMode))
+    assertEvictBlocksToFreeSpaceCalled(ms, 50)
+    assert(mm.storageMemoryUsed === 600L)
+  }
 }

From fab070ca4048fbf51ae511dce80a51322f657c27 Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Mon, 19 Jun 2017 15:51:21 +0800
Subject: [PATCH 1013/1204] [SPARK-21132][SQL] DISTINCT modifier of function
 arguments should not be silently ignored

### What changes were proposed in this pull request?
We should not silently ignore `DISTINCT` when they are not supported in the function arguments. This PR is to block these cases and issue the error messages.

### How was this patch tested?
Added test cases for both regular functions and window functions

Author: Xiao Li <gatorsmile@gmail.com>

Closes #18340 from gatorsmile/firstCount.

(cherry picked from commit 9413b84b5a99e264816c61f72905b392c2f9cd35)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/analysis/Analyzer.scala    | 14 ++++++++++++--
 .../catalyst/analysis/AnalysisErrorSuite.scala    | 15 +++++++++++++--
 .../sql/catalyst/analysis/AnalysisTest.scala      |  8 ++++++--
 3 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 997964238ca98..3e416b3bb2725 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1189,11 +1189,21 @@ class Analyzer(
                 // AggregateWindowFunctions are AggregateFunctions that can only be evaluated within
                 // the context of a Window clause. They do not need to be wrapped in an
                 // AggregateExpression.
-                case wf: AggregateWindowFunction => wf
+                case wf: AggregateWindowFunction =>
+                  if (isDistinct) {
+                    failAnalysis(s"${wf.prettyName} does not support the modifier DISTINCT")
+                  } else {
+                    wf
+                  }
                 // We get an aggregate function, we need to wrap it in an AggregateExpression.
                 case agg: AggregateFunction => AggregateExpression(agg, Complete, isDistinct)
                 // This function is not an aggregate function, just return the resolved one.
-                case other => other
+                case other =>
+                  if (isDistinct) {
+                    failAnalysis(s"${other.prettyName} does not support the modifier DISTINCT")
+                  } else {
+                    other
+                  }
               }
             }
         }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index d2ebca5a83dd3..5050318d96358 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -24,7 +24,8 @@ import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete, Count, Max}
-import org.apache.spark.sql.catalyst.plans.{Cross, Inner, LeftOuter, RightOuter}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.catalyst.plans.{Cross, LeftOuter, RightOuter}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData, MapData}
 import org.apache.spark.sql.types._
@@ -152,7 +153,7 @@ class AnalysisErrorSuite extends AnalysisTest {
     "not supported within a window function" :: Nil)
 
   errorTest(
-    "distinct window function",
+    "distinct aggregate function in window",
     testRelation2.select(
       WindowExpression(
         AggregateExpression(Count(UnresolvedAttribute("b")), Complete, isDistinct = true),
@@ -162,6 +163,16 @@ class AnalysisErrorSuite extends AnalysisTest {
           UnspecifiedFrame)).as('window)),
     "Distinct window functions are not supported" :: Nil)
 
+  errorTest(
+    "distinct function",
+    CatalystSqlParser.parsePlan("SELECT hex(DISTINCT a) FROM TaBlE"),
+    "hex does not support the modifier DISTINCT" :: Nil)
+
+  errorTest(
+    "distinct window function",
+    CatalystSqlParser.parsePlan("SELECT percent_rank(DISTINCT a) over () FROM TaBlE"),
+    "percent_rank does not support the modifier DISTINCT" :: Nil)
+
   errorTest(
     "nested aggregate functions",
     testRelation.groupBy('a)(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
index 82015b1e0671c..08d9313894c2d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
@@ -17,10 +17,11 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
+import java.net.URI
 import java.util.Locale
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
+import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.internal.SQLConf
@@ -32,7 +33,10 @@ trait AnalysisTest extends PlanTest {
 
   private def makeAnalyzer(caseSensitive: Boolean): Analyzer = {
     val conf = new SQLConf().copy(SQLConf.CASE_SENSITIVE -> caseSensitive)
-    val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf)
+    val catalog = new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin, conf)
+    catalog.createDatabase(
+      CatalogDatabase("default", "", new URI("loc"), Map.empty),
+      ignoreIfExists = false)
     catalog.createTempView("TaBlE", TestRelations.testRelation, overrideIfExists = true)
     catalog.createTempView("TaBlE2", TestRelations.testRelation2, overrideIfExists = true)
     new Analyzer(catalog, conf) {

From f7fcdec6c2ff04b20c3110b1457a8df769fb4dba Mon Sep 17 00:00:00 2001
From: saturday_s <shi.indetail@gmail.com>
Date: Mon, 19 Jun 2017 10:24:29 -0700
Subject: [PATCH 1014/1204] [SPARK-19688][STREAMING] Not to read
 `spark.yarn.credentials.file` from checkpoint.

## What changes were proposed in this pull request?

Reload the `spark.yarn.credentials.file` property when restarting a streaming application from checkpoint.

## How was this patch tested?

Manual tested with 1.6.3 and 2.1.1.
I didn't test this with master because of some compile problems, but I think it will be the same result.

## Notice

This should be merged into maintenance branches too.

jira: [SPARK-21008](https://issues.apache.org/jira/browse/SPARK-21008)

Author: saturday_s <shi.indetail@gmail.com>

Closes #18230 from saturday-shi/SPARK-21008.

(cherry picked from commit e92ffe6f1771e3fe9ea2e62ba552c1b5cf255368)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../src/main/scala/org/apache/spark/streaming/Checkpoint.scala | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
index 5cbad8bf3ce6e..b8c780db07c98 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
@@ -55,6 +55,9 @@ class Checkpoint(ssc: StreamingContext, val checkpointTime: Time)
       "spark.master",
       "spark.yarn.keytab",
       "spark.yarn.principal",
+      "spark.yarn.credentials.file",
+      "spark.yarn.credentials.renewalTime",
+      "spark.yarn.credentials.updateTime",
       "spark.ui.filters")
 
     val newSparkConf = new SparkConf(loadDefaults = false).setAll(sparkConfPairs)

From a44c118e28754fd021f084ddd1819fec6bfbc625 Mon Sep 17 00:00:00 2001
From: saturday_s <shi.indetail@gmail.com>
Date: Mon, 19 Jun 2017 10:24:29 -0700
Subject: [PATCH 1015/1204] [SPARK-19688][STREAMING] Not to read
 `spark.yarn.credentials.file` from checkpoint.

## What changes were proposed in this pull request?

Reload the `spark.yarn.credentials.file` property when restarting a streaming application from checkpoint.

## How was this patch tested?

Manual tested with 1.6.3 and 2.1.1.
I didn't test this with master because of some compile problems, but I think it will be the same result.

## Notice

This should be merged into maintenance branches too.

jira: [SPARK-21008](https://issues.apache.org/jira/browse/SPARK-21008)

Author: saturday_s <shi.indetail@gmail.com>

Closes #18230 from saturday-shi/SPARK-21008.

(cherry picked from commit e92ffe6f1771e3fe9ea2e62ba552c1b5cf255368)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../src/main/scala/org/apache/spark/streaming/Checkpoint.scala | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
index 5cbad8bf3ce6e..b8c780db07c98 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
@@ -55,6 +55,9 @@ class Checkpoint(ssc: StreamingContext, val checkpointTime: Time)
       "spark.master",
       "spark.yarn.keytab",
       "spark.yarn.principal",
+      "spark.yarn.credentials.file",
+      "spark.yarn.credentials.renewalTime",
+      "spark.yarn.credentials.updateTime",
       "spark.ui.filters")
 
     val newSparkConf = new SparkConf(loadDefaults = false).setAll(sparkConfPairs)

From 7b50736c45f379841466ae5730323b153313f400 Mon Sep 17 00:00:00 2001
From: assafmendelson <assaf.mendelson@gmail.com>
Date: Mon, 19 Jun 2017 10:58:58 -0700
Subject: [PATCH 1016/1204] [SPARK-21123][DOCS][STRUCTURED STREAMING] Options
 for file stream source are in a wrong table

## What changes were proposed in this pull request?

The description for several options of File Source for structured streaming appeared in the File Sink description instead.

This pull request has two commits: The first includes changes to the version as it appeared in spark 2.1 and the second handled an additional option added for spark 2.2

## How was this patch tested?

Built the documentation by SKIP_API=1 jekyll build and visually inspected the structured streaming programming guide.

The original documentation was written by tdas and lw-lin

Author: assafmendelson <assaf.mendelson@gmail.com>

Closes #18342 from assafmendelson/spark-21123.

(cherry picked from commit 66a792cd88c63cc0a1d20cbe14ac5699afbb3662)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../structured-streaming-programming-guide.md | 28 ++++++++++---------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index 9b9177d44145f..d478042dea5c8 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -510,7 +510,20 @@ Here are the details of all the sources in Spark.
     <td><b>File source</b></td>
     <td>
         <code>path</code>: path to the input directory, and common to all file formats.
-        <br/><br/>
+        <br/>
+        <code>maxFilesPerTrigger</code>: maximum number of new files to be considered in every trigger (default: no max)
+        <br/>
+        <code>latestFirst</code>: whether to processs the latest new files first, useful when there is a large backlog of files (default: false)
+        <br/>
+        <code>fileNameOnly</code>: whether to check new files based on only the filename instead of on the full path (default: false). With this set to `true`, the following files would be considered as the same file, because their filenames, "dataset.txt", are the same:
+        <br/>
+        · "file:///dataset.txt"<br/>
+        · "s3://a/dataset.txt"<br/>
+        · "s3n://a/b/dataset.txt"<br/>
+        · "s3a://a/b/c/dataset.txt"<br/>
+        <br/>
+
+        <br/>
         For file-format-specific options, see the related methods in <code>DataStreamReader</code>
         (<a href="api/scala/index.html#org.apache.spark.sql.streaming.DataStreamReader">Scala</a>/<a href="api/java/org/apache/spark/sql/streaming/DataStreamReader.html">Java</a>/<a href="api/python/pyspark.sql.html#pyspark.sql.streaming.DataStreamReader">Python</a>/<a
         href="api/R/read.stream.html">R</a>).
@@ -1234,18 +1247,7 @@ Here are the details of all the sinks in Spark.
     <td>Append</td>
     <td>
         <code>path</code>: path to the output directory, must be specified.
-        <br/>
-        <code>maxFilesPerTrigger</code>: maximum number of new files to be considered in every trigger (default: no max)
-        <br/>
-        <code>latestFirst</code>: whether to processs the latest new files first, useful when there is a large backlog of files (default: false)
-        <br/>
-        <code>fileNameOnly</code>: whether to check new files based on only the filename instead of on the full path (default: false). With this set to `true`, the following files would be considered as the same file, because their filenames, "dataset.txt", are the same:
-        <br/>
-        · "file:///dataset.txt"<br/>
-        · "s3://a/dataset.txt"<br/>
-        · "s3n://a/b/dataset.txt"<br/>
-        · "s3a://a/b/c/dataset.txt"<br/>
-        <br/>
+        <br/><br/>
         For file-format-specific options, see the related methods in DataFrameWriter
         (<a href="api/scala/index.html#org.apache.spark.sql.DataFrameWriter">Scala</a>/<a href="api/java/org/apache/spark/sql/DataFrameWriter.html">Java</a>/<a href="api/python/pyspark.sql.html#pyspark.sql.DataFrameWriter">Python</a>/<a
         href="api/R/write.stream.html">R</a>).

From e329beaffe3d7691d63799cd1de2b30b990543c7 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 19 Jun 2017 20:17:54 +0100
Subject: [PATCH 1017/1204] [MINOR][BUILD] Fix Java linter errors

This PR cleans up a few Java linter errors for Apache Spark 2.2 release.

```bash
$ dev/lint-java
Using `mvn` from path: /usr/local/bin/mvn
Checkstyle checks passed.
```

We can check the result at Travis CI, [here](https://travis-ci.org/dongjoon-hyun/spark/builds/244297894).

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #18345 from dongjoon-hyun/fix_lint_java_2.

(cherry picked from commit ecc5631351e81bbee4befb213f3053a4f31532a7)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../apache/spark/network/shuffle/OneForOneBlockFetcher.java | 2 +-
 .../org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java  | 5 +++--
 .../java/org/apache/spark/examples/ml/JavaALSExample.java   | 2 +-
 .../apache/spark/examples/sql/JavaSQLDataSourceExample.java | 6 +++++-
 .../java/org/apache/spark/sql/streaming/OutputMode.java     | 1 -
 5 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
index 5f428759252aa..d46ce2e0e6b78 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
@@ -157,7 +157,7 @@ private class DownloadCallback implements StreamCallback {
     private File targetFile = null;
     private int chunkIndex;
 
-    public DownloadCallback(File targetFile, int chunkIndex) throws IOException {
+    DownloadCallback(File targetFile, int chunkIndex) throws IOException {
       this.targetFile = targetFile;
       this.channel = Channels.newChannel(new FileOutputStream(targetFile));
       this.chunkIndex = chunkIndex;
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
index 2fde5c300f072..6c19c27c8eefb 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
@@ -353,8 +353,9 @@ private long[] mergeSpillsWithFileStream(
       }
       for (int partition = 0; partition < numPartitions; partition++) {
         final long initialFileLength = mergedFileOutputStream.getByteCount();
-        // Shield the underlying output stream from close() calls, so that we can close the higher
-        // level streams to make sure all data is really flushed and internal state is cleaned.
+        // Shield the underlying output stream from close() calls, so that we can close
+        // the higher level streams to make sure all data is really flushed and internal state is
+        // cleaned.
         OutputStream partitionOutput = new CloseShieldOutputStream(
           new TimeTrackingOutputStream(writeMetrics, mergedFileOutputStream));
         partitionOutput = blockManager.serializerManager().wrapForEncryption(partitionOutput);
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java
index 60ef03d89d17b..fe4d6bc83f04a 100644
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java
@@ -121,7 +121,7 @@ public static void main(String[] args) {
     // $example off$
     userRecs.show();
     movieRecs.show();
-    
+
     spark.stop();
   }
 }
diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java
index 706856b5215e4..95859c52c2aeb 100644
--- a/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java
@@ -124,7 +124,11 @@ private static void runBasicDataSourceExample(SparkSession spark) {
     peopleDF.write().bucketBy(42, "name").sortBy("age").saveAsTable("people_bucketed");
     // $example off:write_sorting_and_bucketing$
     // $example on:write_partitioning$
-    usersDF.write().partitionBy("favorite_color").format("parquet").save("namesPartByColor.parquet");
+    usersDF
+      .write()
+      .partitionBy("favorite_color")
+      .format("parquet")
+      .save("namesPartByColor.parquet");
     // $example off:write_partitioning$
     // $example on:write_partition_and_bucket$
     peopleDF
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/OutputMode.java b/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/OutputMode.java
index 8410abd14fd59..2800b3068f87b 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/OutputMode.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/OutputMode.java
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.streaming;
 
-import org.apache.spark.annotation.Experimental;
 import org.apache.spark.annotation.InterfaceStability;
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes;
 

From cf10fa88c41119c1bdd61bbb281de5e16055227f Mon Sep 17 00:00:00 2001
From: sharkdtu <sharkdtu@tencent.com>
Date: Mon, 19 Jun 2017 14:54:54 -0700
Subject: [PATCH 1018/1204] [SPARK-21138][YARN] Cannot delete staging dir when
 the clusters of "spark.yarn.stagingDir" and "spark.hadoop.fs.defaultFS" are
 different
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

When I set different clusters for "spark.hadoop.fs.defaultFS" and "spark.yarn.stagingDir" as follows：
```
spark.hadoop.fs.defaultFS  hdfs://tl-nn-tdw.tencent-distribute.com:54310
spark.yarn.stagingDir hdfs://ss-teg-2-v2/tmp/spark
```
The staging dir can not be deleted, it will prompt following message:
```
java.lang.IllegalArgumentException: Wrong FS: hdfs://ss-teg-2-v2/tmp/spark/.sparkStaging/application_1496819138021_77618, expected: hdfs://tl-nn-tdw.tencent-distribute.com:54310
```

## How was this patch tested?

Existing tests

Author: sharkdtu <sharkdtu@tencent.com>

Closes #18352 from sharkdtu/master.

(cherry picked from commit 3d4d11a80fe8953d48d8bfac2ce112e37d38dc90)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../org/apache/spark/deploy/yarn/ApplicationMaster.scala   | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 864c834d110fd..fb53242616bc1 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -209,8 +209,6 @@ private[spark] class ApplicationMaster(
 
       logInfo("ApplicationAttemptId: " + appAttemptId)
 
-      val fs = FileSystem.get(yarnConf)
-
       // This shutdown hook should run *after* the SparkContext is shut down.
       val priority = ShutdownHookManager.SPARK_CONTEXT_SHUTDOWN_PRIORITY - 1
       ShutdownHookManager.addShutdownHook(priority) { () =>
@@ -232,7 +230,7 @@ private[spark] class ApplicationMaster(
           // we only want to unregister if we don't want the RM to retry
           if (finalStatus == FinalApplicationStatus.SUCCEEDED || isLastAttempt) {
             unregister(finalStatus, finalMsg)
-            cleanupStagingDir(fs)
+            cleanupStagingDir()
           }
         }
       }
@@ -530,7 +528,7 @@ private[spark] class ApplicationMaster(
   /**
    * Clean up the staging directory.
    */
-  private def cleanupStagingDir(fs: FileSystem) {
+  private def cleanupStagingDir(): Unit = {
     var stagingDirPath: Path = null
     try {
       val preserveFiles = sparkConf.get(PRESERVE_STAGING_FILES)
@@ -541,6 +539,7 @@ private[spark] class ApplicationMaster(
           return
         }
         logInfo("Deleting staging directory " + stagingDirPath)
+        val fs = stagingDirPath.getFileSystem(yarnConf)
         fs.delete(stagingDirPath, true)
       }
     } catch {

From 7799f35dd25b700f50915ba7371d9deb4df86df5 Mon Sep 17 00:00:00 2001
From: sharkdtu <sharkdtu@tencent.com>
Date: Mon, 19 Jun 2017 14:54:54 -0700
Subject: [PATCH 1019/1204] [SPARK-21138][YARN] Cannot delete staging dir when
 the clusters of "spark.yarn.stagingDir" and "spark.hadoop.fs.defaultFS" are
 different
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

When I set different clusters for "spark.hadoop.fs.defaultFS" and "spark.yarn.stagingDir" as follows：
```
spark.hadoop.fs.defaultFS  hdfs://tl-nn-tdw.tencent-distribute.com:54310
spark.yarn.stagingDir hdfs://ss-teg-2-v2/tmp/spark
```
The staging dir can not be deleted, it will prompt following message:
```
java.lang.IllegalArgumentException: Wrong FS: hdfs://ss-teg-2-v2/tmp/spark/.sparkStaging/application_1496819138021_77618, expected: hdfs://tl-nn-tdw.tencent-distribute.com:54310
```

## How was this patch tested?

Existing tests

Author: sharkdtu <sharkdtu@tencent.com>

Closes #18352 from sharkdtu/master.

(cherry picked from commit 3d4d11a80fe8953d48d8bfac2ce112e37d38dc90)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../org/apache/spark/deploy/yarn/ApplicationMaster.scala   | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index aabae140af8b1..71892e0947dbf 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -207,8 +207,6 @@ private[spark] class ApplicationMaster(
 
       logInfo("ApplicationAttemptId: " + appAttemptId)
 
-      val fs = FileSystem.get(yarnConf)
-
       // This shutdown hook should run *after* the SparkContext is shut down.
       val priority = ShutdownHookManager.SPARK_CONTEXT_SHUTDOWN_PRIORITY - 1
       ShutdownHookManager.addShutdownHook(priority) { () =>
@@ -230,7 +228,7 @@ private[spark] class ApplicationMaster(
           // we only want to unregister if we don't want the RM to retry
           if (finalStatus == FinalApplicationStatus.SUCCEEDED || isLastAttempt) {
             unregister(finalStatus, finalMsg)
-            cleanupStagingDir(fs)
+            cleanupStagingDir()
           }
         }
       }
@@ -531,7 +529,7 @@ private[spark] class ApplicationMaster(
   /**
    * Clean up the staging directory.
    */
-  private def cleanupStagingDir(fs: FileSystem) {
+  private def cleanupStagingDir(): Unit = {
     var stagingDirPath: Path = null
     try {
       val preserveFiles = sparkConf.get(PRESERVE_STAGING_FILES)
@@ -542,6 +540,7 @@ private[spark] class ApplicationMaster(
           return
         }
         logInfo("Deleting staging directory " + stagingDirPath)
+        val fs = stagingDirPath.getFileSystem(yarnConf)
         fs.delete(stagingDirPath, true)
       }
     } catch {

From 8bf7f1ebaff06f8cf394b0475bd4222dc0d38e22 Mon Sep 17 00:00:00 2001
From: Yuming Wang <wgyumg@gmail.com>
Date: Tue, 20 Jun 2017 09:22:30 +0800
Subject: [PATCH 1020/1204] [SPARK-21133][CORE] Fix
 HighlyCompressedMapStatus#writeExternal throws NPE

## What changes were proposed in this pull request?

Fix HighlyCompressedMapStatus#writeExternal NPE:
```
17/06/18 15:00:27 ERROR Utils: Exception encountered
java.lang.NullPointerException
        at org.apache.spark.scheduler.HighlyCompressedMapStatus$$anonfun$writeExternal$2.apply$mcV$sp(MapStatus.scala:171)
        at org.apache.spark.scheduler.HighlyCompressedMapStatus$$anonfun$writeExternal$2.apply(MapStatus.scala:167)
        at org.apache.spark.scheduler.HighlyCompressedMapStatus$$anonfun$writeExternal$2.apply(MapStatus.scala:167)
        at org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1303)
        at org.apache.spark.scheduler.HighlyCompressedMapStatus.writeExternal(MapStatus.scala:167)
        at java.io.ObjectOutputStream.writeExternalData(ObjectOutputStream.java:1459)
        at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1430)
        at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1178)
        at java.io.ObjectOutputStream.writeArray(ObjectOutputStream.java:1378)
        at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1174)
        at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:348)
        at org.apache.spark.MapOutputTracker$$anonfun$serializeMapStatuses$1.apply$mcV$sp(MapOutputTracker.scala:617)
        at org.apache.spark.MapOutputTracker$$anonfun$serializeMapStatuses$1.apply(MapOutputTracker.scala:616)
        at org.apache.spark.MapOutputTracker$$anonfun$serializeMapStatuses$1.apply(MapOutputTracker.scala:616)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1337)
        at org.apache.spark.MapOutputTracker$.serializeMapStatuses(MapOutputTracker.scala:619)
        at org.apache.spark.MapOutputTrackerMaster.getSerializedMapOutputStatuses(MapOutputTracker.scala:562)
        at org.apache.spark.MapOutputTrackerMaster$MessageLoop.run(MapOutputTracker.scala:351)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
        at java.lang.Thread.run(Thread.java:745)
17/06/18 15:00:27 ERROR MapOutputTrackerMaster: java.lang.NullPointerException
java.io.IOException: java.lang.NullPointerException
        at org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1310)
        at org.apache.spark.scheduler.HighlyCompressedMapStatus.writeExternal(MapStatus.scala:167)
        at java.io.ObjectOutputStream.writeExternalData(ObjectOutputStream.java:1459)
        at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1430)
        at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1178)
        at java.io.ObjectOutputStream.writeArray(ObjectOutputStream.java:1378)
        at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1174)
        at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:348)
        at org.apache.spark.MapOutputTracker$$anonfun$serializeMapStatuses$1.apply$mcV$sp(MapOutputTracker.scala:617)
        at org.apache.spark.MapOutputTracker$$anonfun$serializeMapStatuses$1.apply(MapOutputTracker.scala:616)
        at org.apache.spark.MapOutputTracker$$anonfun$serializeMapStatuses$1.apply(MapOutputTracker.scala:616)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1337)
        at org.apache.spark.MapOutputTracker$.serializeMapStatuses(MapOutputTracker.scala:619)
        at org.apache.spark.MapOutputTrackerMaster.getSerializedMapOutputStatuses(MapOutputTracker.scala:562)
        at org.apache.spark.MapOutputTrackerMaster$MessageLoop.run(MapOutputTracker.scala:351)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
        at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.NullPointerException
        at org.apache.spark.scheduler.HighlyCompressedMapStatus$$anonfun$writeExternal$2.apply$mcV$sp(MapStatus.scala:171)
        at org.apache.spark.scheduler.HighlyCompressedMapStatus$$anonfun$writeExternal$2.apply(MapStatus.scala:167)
        at org.apache.spark.scheduler.HighlyCompressedMapStatus$$anonfun$writeExternal$2.apply(MapStatus.scala:167)
        at org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1303)
        ... 17 more
17/06/18 15:00:27 INFO MapOutputTrackerMasterEndpoint: Asked to send map output locations for shuffle 0 to 10.17.47.20:50188
17/06/18 15:00:27 ERROR Utils: Exception encountered
java.lang.NullPointerException
        at org.apache.spark.scheduler.HighlyCompressedMapStatus$$anonfun$writeExternal$2.apply$mcV$sp(MapStatus.scala:171)
        at org.apache.spark.scheduler.HighlyCompressedMapStatus$$anonfun$writeExternal$2.apply(MapStatus.scala:167)
        at org.apache.spark.scheduler.HighlyCompressedMapStatus$$anonfun$writeExternal$2.apply(MapStatus.scala:167)
        at org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1303)
        at org.apache.spark.scheduler.HighlyCompressedMapStatus.writeExternal(MapStatus.scala:167)
        at java.io.ObjectOutputStream.writeExternalData(ObjectOutputStream.java:1459)
        at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1430)
        at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1178)
        at java.io.ObjectOutputStream.writeArray(ObjectOutputStream.java:1378)
        at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1174)
        at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:348)
        at org.apache.spark.MapOutputTracker$$anonfun$serializeMapStatuses$1.apply$mcV$sp(MapOutputTracker.scala:617)
        at org.apache.spark.MapOutputTracker$$anonfun$serializeMapStatuses$1.apply(MapOutputTracker.scala:616)
        at org.apache.spark.MapOutputTracker$$anonfun$serializeMapStatuses$1.apply(MapOutputTracker.scala:616)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1337)
        at org.apache.spark.MapOutputTracker$.serializeMapStatuses(MapOutputTracker.scala:619)
        at org.apache.spark.MapOutputTrackerMaster.getSerializedMapOutputStatuses(MapOutputTracker.scala:562)
        at org.apache.spark.MapOutputTrackerMaster$MessageLoop.run(MapOutputTracker.scala:351)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
        at java.lang.Thread.run(Thread.java:745)
```

## How was this patch tested?

manual tests

Author: Yuming Wang <wgyumg@gmail.com>

Closes #18343 from wangyum/SPARK-21133.

(cherry picked from commit 9b57cd8d5c594731a7b3c90ce59bcddb05193d79)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/scheduler/MapStatus.scala |  2 +-
 .../spark/serializer/KryoSerializer.scala      |  1 +
 .../spark/scheduler/MapStatusSuite.scala       | 18 ++++++++++++++++--
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
index 048e0d0186594..5e45b375ddd45 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
@@ -141,7 +141,7 @@ private[spark] class HighlyCompressedMapStatus private (
     private[this] var numNonEmptyBlocks: Int,
     private[this] var emptyBlocks: RoaringBitmap,
     private[this] var avgSize: Long,
-    @transient private var hugeBlockSizes: Map[Int, Byte])
+    private var hugeBlockSizes: Map[Int, Byte])
   extends MapStatus with Externalizable {
 
   // loc could be null when the default constructor is called during deserialization
diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
index e15166d11c243..4f03e54e304f6 100644
--- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
@@ -175,6 +175,7 @@ class KryoSerializer(conf: SparkConf)
     kryo.register(None.getClass)
     kryo.register(Nil.getClass)
     kryo.register(Utils.classForName("scala.collection.immutable.$colon$colon"))
+    kryo.register(Utils.classForName("scala.collection.immutable.Map$EmptyMap$"))
     kryo.register(classOf[ArrayBuffer[Any]])
 
     kryo.setClassLoader(classLoader)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
index 3ec37f674c77b..e6120139f4958 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
@@ -24,9 +24,9 @@ import scala.util.Random
 import org.mockito.Mockito._
 import org.roaringbitmap.RoaringBitmap
 
-import org.apache.spark.{SparkConf, SparkEnv, SparkFunSuite}
+import org.apache.spark.{SparkConf, SparkContext, SparkEnv, SparkFunSuite}
 import org.apache.spark.internal.config
-import org.apache.spark.serializer.JavaSerializer
+import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
 import org.apache.spark.storage.BlockManagerId
 
 class MapStatusSuite extends SparkFunSuite {
@@ -154,4 +154,18 @@ class MapStatusSuite extends SparkFunSuite {
       case part => assert(status2.getSizeForBlock(part) >= sizes(part))
     }
   }
+
+  test("SPARK-21133 HighlyCompressedMapStatus#writeExternal throws NPE") {
+    val conf = new SparkConf()
+      .set("spark.serializer", classOf[KryoSerializer].getName)
+      .setMaster("local")
+      .setAppName("SPARK-21133")
+    val sc = new SparkContext(conf)
+    try {
+      val count = sc.parallelize(0 until 3000, 10).repartition(2001).collect().length
+      assert(count === 3000)
+    } finally {
+      sc.stop()
+    }
+  }
 }

From 514a7e6f8a11801c0c1e040796816f154480e75e Mon Sep 17 00:00:00 2001
From: "Joseph K. Bradley" <joseph@databricks.com>
Date: Mon, 19 Jun 2017 23:04:17 -0700
Subject: [PATCH 1021/1204] [SPARK-20929][ML] LinearSVC should use its own
 threshold param

## What changes were proposed in this pull request?

LinearSVC should use its own threshold param, rather than the shared one, since it applies to rawPrediction instead of probability.  This PR changes the param in the Scala, Python and R APIs.

## How was this patch tested?

New unit test to make sure the threshold can be set to any Double value.

Author: Joseph K. Bradley <joseph@databricks.com>

Closes #18151 from jkbradley/ml-2.2-linearsvc-cleanup.

(cherry picked from commit cc67bd573264c9046c4a034927ed8deb2a732110)
Signed-off-by: Joseph K. Bradley <joseph@databricks.com>
---
 R/pkg/R/mllib_classification.R                |  4 ++-
 .../spark/ml/classification/LinearSVC.scala   | 25 +++++++++++--
 .../ml/classification/LinearSVCSuite.scala    | 35 ++++++++++++++++++-
 python/pyspark/ml/classification.py           | 20 ++++++++++-
 4 files changed, 79 insertions(+), 5 deletions(-)

diff --git a/R/pkg/R/mllib_classification.R b/R/pkg/R/mllib_classification.R
index 306a9b8676539..bdcc0818d139d 100644
--- a/R/pkg/R/mllib_classification.R
+++ b/R/pkg/R/mllib_classification.R
@@ -62,7 +62,9 @@ setClass("NaiveBayesModel", representation(jobj = "jobj"))
 #'                        of models will be always returned on the original scale, so it will be transparent for
 #'                        users. Note that with/without standardization, the models should be always converged
 #'                        to the same solution when no regularization is applied.
-#' @param threshold The threshold in binary classification, in range [0, 1].
+#' @param threshold The threshold in binary classification applied to the linear model prediction.
+#'                  This threshold can be any real number, where Inf will make all predictions 0.0
+#'                  and -Inf will make all predictions 1.0.
 #' @param weightCol The weight column name.
 #' @param aggregationDepth The depth for treeAggregate (greater than or equal to 2). If the dimensions of features
 #'                         or the number of partitions are large, this param could be adjusted to a larger size.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
index 9900fbc9edda7..d6ed6a4570a4a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
@@ -42,7 +42,23 @@ import org.apache.spark.sql.functions.{col, lit}
 /** Params for linear SVM Classifier. */
 private[classification] trait LinearSVCParams extends ClassifierParams with HasRegParam
   with HasMaxIter with HasFitIntercept with HasTol with HasStandardization with HasWeightCol
-  with HasThreshold with HasAggregationDepth
+  with HasAggregationDepth {
+
+  /**
+   * Param for threshold in binary classification prediction.
+   * For LinearSVC, this threshold is applied to the rawPrediction, rather than a probability.
+   * This threshold can be any real number, where Inf will make all predictions 0.0
+   * and -Inf will make all predictions 1.0.
+   * Default: 0.0
+   *
+   * @group param
+   */
+  final val threshold: DoubleParam = new DoubleParam(this, "threshold",
+    "threshold in binary classification prediction applied to rawPrediction")
+
+  /** @group getParam */
+  def getThreshold: Double = $(threshold)
+}
 
 /**
  * :: Experimental ::
@@ -126,7 +142,7 @@ class LinearSVC @Since("2.2.0") (
   def setWeightCol(value: String): this.type = set(weightCol, value)
 
   /**
-   * Set threshold in binary classification, in range [0, 1].
+   * Set threshold in binary classification.
    *
    * @group setParam
    */
@@ -284,6 +300,7 @@ class LinearSVCModel private[classification] (
 
   @Since("2.2.0")
   def setThreshold(value: Double): this.type = set(threshold, value)
+  setDefault(threshold, 0.0)
 
   @Since("2.2.0")
   def setWeightCol(value: Double): this.type = set(threshold, value)
@@ -301,6 +318,10 @@ class LinearSVCModel private[classification] (
     Vectors.dense(-m, m)
   }
 
+  override protected def raw2prediction(rawPrediction: Vector): Double = {
+    if (rawPrediction(1) > $(threshold)) 1.0 else 0.0
+  }
+
   @Since("2.2.0")
   override def copy(extra: ParamMap): LinearSVCModel = {
     copyValues(new LinearSVCModel(uid, coefficients, intercept), extra).setParent(parent)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala
index 2f87afc23fe7e..f2b00d0bae1d6 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.classification.LinearSVCSuite._
 import org.apache.spark.ml.feature.{Instance, LabeledPoint}
 import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
-import org.apache.spark.ml.param.ParamsSuite
+import org.apache.spark.ml.param.{ParamMap, ParamsSuite}
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
 import org.apache.spark.ml.util.TestingUtils._
 import org.apache.spark.mllib.util.MLlibTestSparkContext
@@ -127,6 +127,39 @@ class LinearSVCSuite extends SparkFunSuite with MLlibTestSparkContext with Defau
     MLTestingUtils.checkCopyAndUids(lsvc, model)
   }
 
+  test("LinearSVC threshold acts on rawPrediction") {
+    val lsvc =
+      new LinearSVCModel(uid = "myLSVCM", coefficients = Vectors.dense(1.0), intercept = 0.0)
+    val df = spark.createDataFrame(Seq(
+      (1, Vectors.dense(1e-7)),
+      (0, Vectors.dense(0.0)),
+      (-1, Vectors.dense(-1e-7)))).toDF("id", "features")
+
+    def checkOneResult(
+        model: LinearSVCModel,
+        threshold: Double,
+        expected: Set[(Int, Double)]): Unit = {
+      model.setThreshold(threshold)
+      val results = model.transform(df).select("id", "prediction").collect()
+        .map(r => (r.getInt(0), r.getDouble(1)))
+        .toSet
+      assert(results === expected, s"Failed for threshold = $threshold")
+    }
+
+    def checkResults(threshold: Double, expected: Set[(Int, Double)]): Unit = {
+      // Check via code path using Classifier.raw2prediction
+      lsvc.setRawPredictionCol("rawPrediction")
+      checkOneResult(lsvc, threshold, expected)
+      // Check via code path using Classifier.predict
+      lsvc.setRawPredictionCol("")
+      checkOneResult(lsvc, threshold, expected)
+    }
+
+    checkResults(0.0, Set((1, 1.0), (0, 0.0), (-1, 0.0)))
+    checkResults(Double.PositiveInfinity, Set((1, 0.0), (0, 0.0), (-1, 0.0)))
+    checkResults(Double.NegativeInfinity, Set((1, 1.0), (0, 1.0), (-1, 1.0)))
+  }
+
   test("linear svc doesn't fit intercept when fitIntercept is off") {
     val lsvc = new LinearSVC().setFitIntercept(false).setMaxIter(5)
     val model = lsvc.fit(smallBinaryDataset)
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 60bdeedd6a144..9b345ac73f3d9 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -63,7 +63,7 @@ def numClasses(self):
 @inherit_doc
 class LinearSVC(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
                 HasRegParam, HasTol, HasRawPredictionCol, HasFitIntercept, HasStandardization,
-                HasThreshold, HasWeightCol, HasAggregationDepth, JavaMLWritable, JavaMLReadable):
+                HasWeightCol, HasAggregationDepth, JavaMLWritable, JavaMLReadable):
     """
     .. note:: Experimental
 
@@ -109,6 +109,12 @@ class LinearSVC(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, Ha
     .. versionadded:: 2.2.0
     """
 
+    threshold = Param(Params._dummy(), "threshold",
+                      "The threshold in binary classification applied to the linear model"
+                      " prediction.  This threshold can be any real number, where Inf will make"
+                      " all predictions 0.0 and -Inf will make all predictions 1.0.",
+                      typeConverter=TypeConverters.toFloat)
+
     @keyword_only
     def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
                  maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction",
@@ -147,6 +153,18 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
     def _create_model(self, java_model):
         return LinearSVCModel(java_model)
 
+    def setThreshold(self, value):
+        """
+        Sets the value of :py:attr:`threshold`.
+        """
+        return self._set(threshold=value)
+
+    def getThreshold(self):
+        """
+        Gets the value of threshold or its default value.
+        """
+        return self.getOrDefault(self.threshold)
+
 
 class LinearSVCModel(JavaModel, JavaClassificationModel, JavaMLWritable, JavaMLReadable):
     """

From b8b80f6dea86d4e4a648b86e38936d3a82ffc0aa Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 20 Jun 2017 09:15:33 -0700
Subject: [PATCH 1022/1204] [SPARK-21150][SQL] Persistent view stored in Hive
 metastore should be case preserving

## What changes were proposed in this pull request?

This is a regression in Spark 2.2. In Spark 2.2, we introduced a new way to resolve persisted view: https://issues.apache.org/jira/browse/SPARK-18209 , but this makes the persisted view non case-preserving because we store the schema in hive metastore directly. We should follow data source table and store schema in table properties.

## How was this patch tested?

new regression test

Author: Wenchen Fan <wenchen@databricks.com>

Closes #18360 from cloud-fan/view.

(cherry picked from commit e862dc904963cf7832bafc1d3d0ea9090bbddd81)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../spark/sql/execution/command/views.scala   |  4 +-
 .../spark/sql/execution/SQLViewSuite.scala    | 10 +++
 .../spark/sql/hive/HiveExternalCatalog.scala  | 84 ++++++++++---------
 3 files changed, 56 insertions(+), 42 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index 00f0acab21aa2..3518ee581c5fa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -159,7 +159,9 @@ case class CreateViewCommand(
         checkCyclicViewReference(analyzedPlan, Seq(viewIdent), viewIdent)
 
         // Handles `CREATE OR REPLACE VIEW v0 AS SELECT ...`
-        catalog.alterTable(prepareTable(sparkSession, analyzedPlan))
+        // Nothing we need to retain from the old view, so just drop and create a new one
+        catalog.dropTable(viewIdent, ignoreIfNotExists = false, purge = false)
+        catalog.createTable(prepareTable(sparkSession, analyzedPlan), ignoreIfExists = false)
       } else {
         // Handles `CREATE VIEW v0 AS SELECT ...`. Throws exception when the target view already
         // exists.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index d32716c18ddfb..6761f05bb462a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -669,4 +669,14 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         "positive."))
     }
   }
+
+  test("permanent view should be case-preserving") {
+    withView("v") {
+      sql("CREATE VIEW v AS SELECT 1 as aBc")
+      assert(spark.table("v").schema.head.name == "aBc")
+
+      sql("CREATE OR REPLACE VIEW v AS SELECT 2 as cBa")
+      assert(spark.table("v").schema.head.name == "cBa")
+    }
+  }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index a03beb72b520c..f2fe22714b24f 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -224,39 +224,36 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       throw new TableAlreadyExistsException(db = db, table = table)
     }
 
-    if (tableDefinition.tableType == VIEW) {
-      client.createTable(tableDefinition, ignoreIfExists)
+    // Ideally we should not create a managed table with location, but Hive serde table can
+    // specify location for managed table. And in [[CreateDataSourceTableAsSelectCommand]] we have
+    // to create the table directory and write out data before we create this table, to avoid
+    // exposing a partial written table.
+    val needDefaultTableLocation = tableDefinition.tableType == MANAGED &&
+      tableDefinition.storage.locationUri.isEmpty
+
+    val tableLocation = if (needDefaultTableLocation) {
+      Some(CatalogUtils.stringToURI(defaultTablePath(tableDefinition.identifier)))
     } else {
-      // Ideally we should not create a managed table with location, but Hive serde table can
-      // specify location for managed table. And in [[CreateDataSourceTableAsSelectCommand]] we have
-      // to create the table directory and write out data before we create this table, to avoid
-      // exposing a partial written table.
-      val needDefaultTableLocation = tableDefinition.tableType == MANAGED &&
-        tableDefinition.storage.locationUri.isEmpty
-
-      val tableLocation = if (needDefaultTableLocation) {
-        Some(CatalogUtils.stringToURI(defaultTablePath(tableDefinition.identifier)))
-      } else {
-        tableDefinition.storage.locationUri
-      }
+      tableDefinition.storage.locationUri
+    }
 
-      if (DDLUtils.isHiveTable(tableDefinition)) {
-        val tableWithDataSourceProps = tableDefinition.copy(
-          // We can't leave `locationUri` empty and count on Hive metastore to set a default table
-          // location, because Hive metastore uses hive.metastore.warehouse.dir to generate default
-          // table location for tables in default database, while we expect to use the location of
-          // default database.
-          storage = tableDefinition.storage.copy(locationUri = tableLocation),
-          // Here we follow data source tables and put table metadata like table schema, partition
-          // columns etc. in table properties, so that we can work around the Hive metastore issue
-          // about not case preserving and make Hive serde table support mixed-case column names.
-          properties = tableDefinition.properties ++ tableMetaToTableProps(tableDefinition))
-        client.createTable(tableWithDataSourceProps, ignoreIfExists)
-      } else {
-        createDataSourceTable(
-          tableDefinition.withNewStorage(locationUri = tableLocation),
-          ignoreIfExists)
-      }
+    if (DDLUtils.isDatasourceTable(tableDefinition)) {
+      createDataSourceTable(
+        tableDefinition.withNewStorage(locationUri = tableLocation),
+        ignoreIfExists)
+    } else {
+      val tableWithDataSourceProps = tableDefinition.copy(
+        // We can't leave `locationUri` empty and count on Hive metastore to set a default table
+        // location, because Hive metastore uses hive.metastore.warehouse.dir to generate default
+        // table location for tables in default database, while we expect to use the location of
+        // default database.
+        storage = tableDefinition.storage.copy(locationUri = tableLocation),
+        // Here we follow data source tables and put table metadata like table schema, partition
+        // columns etc. in table properties, so that we can work around the Hive metastore issue
+        // about not case preserving and make Hive serde table and view support mixed-case column
+        // names.
+        properties = tableDefinition.properties ++ tableMetaToTableProps(tableDefinition))
+      client.createTable(tableWithDataSourceProps, ignoreIfExists)
     }
   }
 
@@ -669,16 +666,21 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
 
     var table = inputTable
 
-    if (table.tableType != VIEW) {
-      table.properties.get(DATASOURCE_PROVIDER) match {
-        // No provider in table properties, which means this is a Hive serde table.
-        case None =>
-          table = restoreHiveSerdeTable(table)
-
-        // This is a regular data source table.
-        case Some(provider) =>
-          table = restoreDataSourceTable(table, provider)
-      }
+    table.properties.get(DATASOURCE_PROVIDER) match {
+      case None if table.tableType == VIEW =>
+        // If this is a view created by Spark 2.2 or higher versions, we should restore its schema
+        // from table properties.
+        if (table.properties.contains(DATASOURCE_SCHEMA_NUMPARTS)) {
+          table = table.copy(schema = getSchemaFromTableProperties(table))
+        }
+
+      // No provider in table properties, which means this is a Hive serde table.
+      case None =>
+        table = restoreHiveSerdeTable(table)
+
+      // This is a regular data source table.
+      case Some(provider) =>
+        table = restoreDataSourceTable(table, provider)
     }
 
     // construct Spark's statistics from information in Hive metastore

From 62e442e73a2fa663892d2edaff5f7d72d7f402ed Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Tue, 20 Jun 2017 10:56:51 -0700
Subject: [PATCH 1023/1204] Preparing Spark release v2.2.0-rc5

---
 R/pkg/DESCRIPTION                         | 2 +-
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 4 ++--
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 resource-managers/mesos/pom.xml           | 2 +-
 resource-managers/yarn/pom.xml            | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 38 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index cfa49b94c9526..879c1f80f2c5d 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 2.2.1
+Version: 2.2.0
 Title: R Frontend for Apache Spark
 Description: The SparkR package provides an R Frontend for Apache Spark.
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
diff --git a/assembly/pom.xml b/assembly/pom.xml
index da7b0c9d1b933..3a7003f5e94f5 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 7577253dd0390..5e9ffd13c61aa 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 558864ae4faab..c3e10d1f289e1 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index de66617d2fa27..e66a8b49de065 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 076d98af834da..1a1f6526ee8e1 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index e74d84a5b3b96..525ece5be4853 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 76783abe36a2c..e7c33264fcdbd 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index 254a9b9ac3184..6102f6f45a60c 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index b61455e3ef9c8..9d8e192cec9ee 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,8 +14,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.2.1-SNAPSHOT
-SPARK_VERSION_SHORT: 2.2.1
+SPARK_VERSION: 2.2.0
+SPARK_VERSION_SHORT: 2.2.0
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.8"
 MESOS_VERSION: 1.0.0
diff --git a/examples/pom.xml b/examples/pom.xml
index 0d001ee478c14..f3d751383c249 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 04afe28fb7885..3f93a33084348 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 47e03419d3df7..7e2d58f1d073a 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index f961a8f54d9a6..26418f9769a25 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index d8bc7dcf75248..58057616174eb 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index 6d46430d6e969..27b1bfcfa7dcc 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index 5d979ddf2f74c..6bcbb612fef77 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index e4336ecb07da7..2df99403840ee 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index 2489d29ebe160..0e93b75f67ca1 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index 98f81aee376a0..e17b960c9a5b8 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index 88515f853edbc..73852fc4c7656 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index 28797e3fe4328..2f761fbcda2d4 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 701455f226094..22fe1dca3343e 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 1ed38a794f44c..df69c5e58727a 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index a4bb50ce7dda0..d3cb2dce3fab5 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 16cce0a49653e..996763ad6c256 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index fec1be9099460..af032ed035f97 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index ccd8546a269c1..7a3be5baea16c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.2.1-SNAPSHOT</version>
+  <version>2.2.0</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index c0bb1968b4b99..e5ec547714d8c 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.2.1.dev0"
+__version__ = "2.2.0"
diff --git a/repl/pom.xml b/repl/pom.xml
index f3c49dfb00603..2a5d2f4354ecf 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index 547836050a610..f94ff4e925e08 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index e00ed33d2ba17..72f891f7c10bd 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 5ecee28a1f0b8..722e362943e26 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index c9ac366ed6e62..84c82f6b86ef8 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 0c344fa4975e8..ab5593da0d655 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 3dca866307232..f0ef6779a4742 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 604007c6feaa1..bed07015e4540 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index b2e8e469d197c..19b44577ca124 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From e88349873b3678045741c82009f36d7fe66d29ee Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Tue, 20 Jun 2017 10:56:55 -0700
Subject: [PATCH 1024/1204] Preparing development version 2.2.1-SNAPSHOT

---
 R/pkg/DESCRIPTION                         | 2 +-
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 4 ++--
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 resource-managers/mesos/pom.xml           | 2 +-
 resource-managers/yarn/pom.xml            | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 38 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 879c1f80f2c5d..cfa49b94c9526 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 2.2.0
+Version: 2.2.1
 Title: R Frontend for Apache Spark
 Description: The SparkR package provides an R Frontend for Apache Spark.
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 3a7003f5e94f5..da7b0c9d1b933 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 5e9ffd13c61aa..7577253dd0390 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index c3e10d1f289e1..558864ae4faab 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index e66a8b49de065..de66617d2fa27 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 1a1f6526ee8e1..076d98af834da 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 525ece5be4853..e74d84a5b3b96 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index e7c33264fcdbd..76783abe36a2c 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index 6102f6f45a60c..254a9b9ac3184 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 9d8e192cec9ee..b61455e3ef9c8 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,8 +14,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.2.0
-SPARK_VERSION_SHORT: 2.2.0
+SPARK_VERSION: 2.2.1-SNAPSHOT
+SPARK_VERSION_SHORT: 2.2.1
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.8"
 MESOS_VERSION: 1.0.0
diff --git a/examples/pom.xml b/examples/pom.xml
index f3d751383c249..0d001ee478c14 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 3f93a33084348..04afe28fb7885 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 7e2d58f1d073a..47e03419d3df7 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 26418f9769a25..f961a8f54d9a6 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 58057616174eb..d8bc7dcf75248 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index 27b1bfcfa7dcc..6d46430d6e969 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index 6bcbb612fef77..5d979ddf2f74c 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index 2df99403840ee..e4336ecb07da7 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index 0e93b75f67ca1..2489d29ebe160 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index e17b960c9a5b8..98f81aee376a0 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index 73852fc4c7656..88515f853edbc 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index 2f761fbcda2d4..28797e3fe4328 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 22fe1dca3343e..701455f226094 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index df69c5e58727a..1ed38a794f44c 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index d3cb2dce3fab5..a4bb50ce7dda0 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 996763ad6c256..16cce0a49653e 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index af032ed035f97..fec1be9099460 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 7a3be5baea16c..ccd8546a269c1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.2.0</version>
+  <version>2.2.1-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index e5ec547714d8c..c0bb1968b4b99 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.2.0"
+__version__ = "2.2.1.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 2a5d2f4354ecf..f3c49dfb00603 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index f94ff4e925e08..547836050a610 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index 72f891f7c10bd..e00ed33d2ba17 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 722e362943e26..5ecee28a1f0b8 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 84c82f6b86ef8..c9ac366ed6e62 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index ab5593da0d655..0c344fa4975e8 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index f0ef6779a4742..3dca866307232 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index bed07015e4540..604007c6feaa1 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 19b44577ca124..b2e8e469d197c 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 8923bac1e895e57ce2d9ef6aea31e13e390be6da Mon Sep 17 00:00:00 2001
From: assafmendelson <assaf.mendelson@gmail.com>
Date: Tue, 20 Jun 2017 13:07:51 -0700
Subject: [PATCH 1025/1204] [SPARK-21123][DOCS][STRUCTURED STREAMING] Options
 for file stream source are in a wrong table - version to fix 2.1

## What changes were proposed in this pull request?

The description for several options of File Source for structured streaming appeared in the File Sink description instead.

This commit continues on PR #18342 and targets the fixes for the documentation of version spark version 2.1

## How was this patch tested?

Built the documentation by SKIP_API=1 jekyll build and visually inspected the structured streaming programming guide.

zsxwing This is the PR to fix version 2.1 as discussed in PR #18342

Author: assafmendelson <assaf.mendelson@gmail.com>

Closes #18363 from assafmendelson/spark-21123-for-spark2.1.
---
 docs/structured-streaming-programming-guide.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index da5c2344a1b18..e536f5d8ecbd5 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -449,6 +449,10 @@ Here are the details of all the sources in Spark.
     <td><b>File source</b></td>
     <td>
         <code>path</code>: path to the input directory, and common to all file formats.
+        <br/>
+        <code>maxFilesPerTrigger</code>: maximum number of new files to be considered in every trigger (default: no max)
+        <br/>
+        <code>latestFirst</code>: whether to processs the latest new files first, useful when there is a large backlog of files(default: false)
         <br/><br/>
         For file-format-specific options, see the related methods in <code>DataStreamReader</code>
         (<a href="api/scala/index.html#org.apache.spark.sql.streaming.DataStreamReader">Scala</a>/<a href="api/java/org/apache/spark/sql/streaming/DataStreamReader.html">Java</a>/<a href="api/python/pyspark.sql.html#pyspark.sql.streaming.DataStreamReader">Python</a>).
@@ -1076,9 +1080,6 @@ Here are the details of all the sinks in Spark.
     <td>Append</td>
     <td>
         <code>path</code>: path to the output directory, must be specified.
-        <code>maxFilesPerTrigger</code>: maximum number of new files to be considered in every trigger (default: no max)
-        <br/>
-        <code>latestFirst</code>: whether to processs the latest new files first, useful when there is a large backlog of files(default: false)
         <br/><br/>
         For file-format-specific options, see the related methods in DataFrameWriter
         (<a href="api/scala/index.html#org.apache.spark.sql.DataFrameWriter">Scala</a>/<a href="api/java/org/apache/spark/sql/DataFrameWriter.html">Java</a>/<a href="api/python/pyspark.sql.html#pyspark.sql.DataFrameWriter">Python</a>).

From 529c04f01fa776d281a7f85ac30e5f38d6a31439 Mon Sep 17 00:00:00 2001
From: Yuming Wang <wgyumg@gmail.com>
Date: Wed, 21 Jun 2017 15:30:31 +0100
Subject: [PATCH 1026/1204] [MINOR][DOCS] Add lost <tr> tag for
 configuration.md

## What changes were proposed in this pull request?

Add lost `<tr>` tag for `configuration.md`.

## How was this patch tested?
N/A

Author: Yuming Wang <wgyumg@gmail.com>

Closes #18372 from wangyum/docs-missing-tr.

(cherry picked from commit 987eb8faddbb533e006c769d382a3e4fda3dd6ee)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/configuration.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/configuration.md b/docs/configuration.md
index 8d5d55ca0429b..6a00ad14e3bf6 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1543,6 +1543,8 @@ Apart from these, the following properties are also available, and may be useful
     of this setting is to act as a safety-net to prevent runaway uncancellable tasks from rendering
     an executor unusable.
   </td>
+</tr>
+<tr>
   <td><code>spark.stage.maxConsecutiveAttempts</code></td>
   <td>4</td>
   <td>

From 6b37c863848d5991821aa2c3233754ca3061f4d6 Mon Sep 17 00:00:00 2001
From: ALeksander Eskilson <alek.eskilson@cerner.com>
Date: Thu, 22 Jun 2017 13:23:59 +0800
Subject: [PATCH 1027/1204] [SPARK-18016][SQL][CATALYST][BRANCH-2.1] Code
 Generation: Constant Pool Limit - Class Splitting

## What changes were proposed in this pull request?

This is a backport patch for Spark 2.1.x of the class splitting feature over excess generated code as was merged in #18075.

## How was this patch tested?

The same test provided in #18075 is included in this patch.

Author: ALeksander Eskilson <alek.eskilson@cerner.com>

Closes #18354 from bdrillard/class_splitting_2.1.
---
 sql/catalyst/pom.xml                          |   7 +
 .../sql/catalyst/expressions/ScalaUDF.scala   |   6 +-
 .../expressions/codegen/CodeGenerator.scala   | 159 ++++++++++++++----
 .../codegen/GenerateMutableProjection.scala   |  17 +-
 .../codegen/GenerateOrdering.scala            |   3 +
 .../codegen/GeneratePredicate.scala           |   3 +
 .../codegen/GenerateSafeProjection.scala      |   9 +-
 .../codegen/GenerateUnsafeProjection.scala    |   9 +-
 .../expressions/complexTypeCreator.scala      |  18 +-
 .../expressions/conditionalExpressions.scala  |   4 +-
 .../expressions/objects/objects.scala         |   2 +-
 .../codegen/GeneratedProjectionSuite.scala    |  72 ++++++--
 .../sql/execution/DataSourceScanExec.scala    |   6 +-
 .../apache/spark/sql/execution/SortExec.scala |   4 +-
 .../sql/execution/WholeStageCodegenExec.scala |   3 +
 .../aggregate/HashAggregateExec.scala         |   8 +-
 .../execution/basicPhysicalOperators.scala    |  10 +-
 .../columnar/GenerateColumnAccessor.scala     |  19 ++-
 .../execution/joins/SortMergeJoinExec.scala   |   2 +-
 .../apache/spark/sql/execution/limit.scala    |   2 +-
 20 files changed, 267 insertions(+), 96 deletions(-)

diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 4b4a8eb3815e1..66c0ff09ea4a2 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -126,6 +126,13 @@
           </execution>
         </executions>
       </plugin>
+      <plugin>
+        <groupId>org.scalatest</groupId>
+        <artifactId>scalatest-maven-plugin</artifactId>
+        <configuration>
+          <argLine>-Xmx4g -Xss4096k -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=512m</argLine>
+        </configuration>
+      </plugin>
       <plugin>
         <groupId>org.antlr</groupId>
         <artifactId>antlr4-maven-plugin</artifactId>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
index 228f4b756c8b4..5c68f9ffc691c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
@@ -988,7 +988,7 @@ case class ScalaUDF(
     val converterTerm = ctx.freshName("converter")
     val expressionIdx = ctx.references.size - 1
     ctx.addMutableState(converterClassName, converterTerm,
-      s"this.$converterTerm = ($converterClassName)$typeConvertersClassName" +
+      s"$converterTerm = ($converterClassName)$typeConvertersClassName" +
         s".createToScalaConverter(((${expressionClassName})((($scalaUDFClassName)" +
           s"references[$expressionIdx]).getChildren().apply($index))).dataType());")
     converterTerm
@@ -1005,7 +1005,7 @@ case class ScalaUDF(
     // Generate codes used to convert the returned value of user-defined functions to Catalyst type
     val catalystConverterTerm = ctx.freshName("catalystConverter")
     ctx.addMutableState(converterClassName, catalystConverterTerm,
-      s"this.$catalystConverterTerm = ($converterClassName)$typeConvertersClassName" +
+      s"$catalystConverterTerm = ($converterClassName)$typeConvertersClassName" +
         s".createToCatalystConverter($scalaUDF.dataType());")
 
     val resultTerm = ctx.freshName("result")
@@ -1019,7 +1019,7 @@ case class ScalaUDF(
 
     val funcTerm = ctx.freshName("udf")
     ctx.addMutableState(funcClassName, funcTerm,
-      s"this.$funcTerm = ($funcClassName)$scalaUDF.userDefinedFunc();")
+      s"$funcTerm = ($funcClassName)$scalaUDF.userDefinedFunc();")
 
     // codegen for children expressions
     val evals = children.map(_.genCode(ctx))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 683b9cbb343c8..22ce3f7e7c52e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -109,7 +109,7 @@ class CodegenContext {
     val idx = references.length
     references += obj
     val clsName = Option(className).getOrElse(obj.getClass.getName)
-    addMutableState(clsName, term, s"this.$term = ($clsName) references[$idx];")
+    addMutableState(clsName, term, s"$term = ($clsName) references[$idx];")
     term
   }
 
@@ -198,41 +198,139 @@ class CodegenContext {
     partitionInitializationStatements.mkString("\n")
   }
 
+ /**
+  * Holds expressions that are equivalent. Used to perform subexpression elimination
+  * during codegen.
+  *
+  * For expressions that appear more than once, generate additional code to prevent
+  * recomputing the value.
+  *
+  * For example, consider two expression generated from this SQL statement:
+  *  SELECT (col1 + col2), (col1 + col2) / col3.
+  *
+  *  equivalentExpressions will match the tree containing `col1 + col2` and it will only
+  *  be evaluated once.
+  */
+  val equivalentExpressions: EquivalentExpressions = new EquivalentExpressions
+
+  // Foreach expression that is participating in subexpression elimination, the state to use.
+  val subExprEliminationExprs = mutable.HashMap.empty[Expression, SubExprEliminationState]
+
+  // The collection of sub-expression result resetting methods that need to be called on each row.
+  val subexprFunctions = mutable.ArrayBuffer.empty[String]
+
+  private val outerClassName = "OuterClass"
+
   /**
-   * Holding all the functions those will be added into generated class.
+   * Holds the class and instance names to be generated, where `OuterClass` is a placeholder
+   * standing for whichever class is generated as the outermost class and which will contain any
+   * nested sub-classes. All other classes and instance names in this list will represent private,
+   * nested sub-classes.
    */
-  val addedFunctions: mutable.Map[String, String] =
-    mutable.Map.empty[String, String]
+  private val classes: mutable.ListBuffer[(String, String)] =
+    mutable.ListBuffer[(String, String)](outerClassName -> null)
+
+  // A map holding the current size in bytes of each class to be generated.
+  private val classSize: mutable.Map[String, Int] =
+    mutable.Map[String, Int](outerClassName -> 0)
+
+  // Nested maps holding function names and their code belonging to each class.
+  private val classFunctions: mutable.Map[String, mutable.Map[String, String]] =
+    mutable.Map(outerClassName -> mutable.Map.empty[String, String])
 
-  def addNewFunction(funcName: String, funcCode: String): Unit = {
-    addedFunctions += ((funcName, funcCode))
+  // Returns the size of the most recently added class.
+  private def currClassSize(): Int = classSize(classes.head._1)
+
+  // Returns the class name and instance name for the most recently added class.
+  private def currClass(): (String, String) = classes.head
+
+  // Adds a new class. Requires the class' name, and its instance name.
+  private def addClass(className: String, classInstance: String): Unit = {
+    classes.prepend(className -> classInstance)
+    classSize += className -> 0
+    classFunctions += className -> mutable.Map.empty[String, String]
   }
 
   /**
-   * Holds expressions that are equivalent. Used to perform subexpression elimination
-   * during codegen.
-   *
-   * For expressions that appear more than once, generate additional code to prevent
-   * recomputing the value.
+   * Adds a function to the generated class. If the code for the `OuterClass` grows too large, the
+   * function will be inlined into a new private, nested class, and a instance-qualified name for
+   * the function will be returned. Otherwise, the function will be inlined to the `OuterClass` the
+   * simple `funcName` will be returned.
    *
-   * For example, consider two expression generated from this SQL statement:
-   *  SELECT (col1 + col2), (col1 + col2) / col3.
-   *
-   *  equivalentExpressions will match the tree containing `col1 + col2` and it will only
-   *  be evaluated once.
+   * @param funcName the class-unqualified name of the function
+   * @param funcCode the body of the function
+   * @param inlineToOuterClass whether the given code must be inlined to the `OuterClass`. This
+   *                           can be necessary when a function is declared outside of the context
+   *                           it is eventually referenced and a returned qualified function name
+   *                           cannot otherwise be accessed.
+   * @return the name of the function, qualified by class if it will be inlined to a private,
+   *         nested sub-class
    */
-  val equivalentExpressions: EquivalentExpressions = new EquivalentExpressions
+  def addNewFunction(
+      funcName: String,
+      funcCode: String,
+      inlineToOuterClass: Boolean = false): String = {
+    // The number of named constants that can exist in the class is limited by the Constant Pool
+    // limit, 65,536. We cannot know how many constants will be inserted for a class, so we use a
+    // threshold of 1600k bytes to determine when a function should be inlined to a private, nested
+    // sub-class.
+    val (className, classInstance) = if (inlineToOuterClass) {
+      outerClassName -> ""
+    } else if (currClassSize > 1600000) {
+      val className = freshName("NestedClass")
+      val classInstance = freshName("nestedClassInstance")
+
+      addClass(className, classInstance)
+
+      className -> classInstance
+    } else {
+      currClass()
+    }
 
-  // Foreach expression that is participating in subexpression elimination, the state to use.
-  val subExprEliminationExprs = mutable.HashMap.empty[Expression, SubExprEliminationState]
+    classSize(className) += funcCode.length
+    classFunctions(className) += funcName -> funcCode
 
-  // The collection of sub-expression result resetting methods that need to be called on each row.
-  val subexprFunctions = mutable.ArrayBuffer.empty[String]
+    if (className == outerClassName) {
+      funcName
+    } else {
 
-  def declareAddedFunctions(): String = {
-    addedFunctions.map { case (funcName, funcCode) => funcCode }.mkString("\n")
+      s"$classInstance.$funcName"
+    }
   }
 
+  /**
+   * Instantiates all nested, private sub-classes as objects to the `OuterClass`
+   */
+  private[sql] def initNestedClasses(): String = {
+    // Nested, private sub-classes have no mutable state (though they do reference the outer class'
+    // mutable state), so we declare and initialize them inline to the OuterClass.
+    classes.filter(_._1 != outerClassName).map {
+      case (className, classInstance) =>
+        s"private $className $classInstance = new $className();"
+    }.mkString("\n")
+  }
+
+  /**
+   * Declares all function code that should be inlined to the `OuterClass`.
+   */
+  private[sql] def declareAddedFunctions(): String = {
+    classFunctions(outerClassName).values.mkString("\n")
+  }
+
+  /**
+   * Declares all nested, private sub-classes and the function code that should be inlined to them.
+   */
+  private[sql] def declareNestedClasses(): String = {
+    classFunctions.filterKeys(_ != outerClassName).map {
+      case (className, functions) =>
+        s"""
+           |private class $className {
+           |  ${functions.values.mkString("\n")}
+           |}
+         """.stripMargin
+    }
+  }.mkString("\n")
+
   final val JAVA_BOOLEAN = "boolean"
   final val JAVA_BYTE = "byte"
   final val JAVA_SHORT = "short"
@@ -552,8 +650,7 @@ class CodegenContext {
             return 0;
           }
         """
-      addNewFunction(compareFunc, funcCode)
-      s"this.$compareFunc($c1, $c2)"
+      s"${addNewFunction(compareFunc, funcCode)}($c1, $c2)"
     case schema: StructType =>
       val comparisons = GenerateOrdering.genComparisons(this, schema)
       val compareFunc = freshName("compareStruct")
@@ -569,8 +666,7 @@ class CodegenContext {
             return 0;
           }
         """
-      addNewFunction(compareFunc, funcCode)
-      s"this.$compareFunc($c1, $c2)"
+      s"${addNewFunction(compareFunc, funcCode)}($c1, $c2)"
     case other if other.isInstanceOf[AtomicType] => s"$c1.compare($c2)"
     case udt: UserDefinedType[_] => genComp(udt.sqlType, c1, c2)
     case _ =>
@@ -640,7 +736,9 @@ class CodegenContext {
 
   /**
    * Splits the generated code of expressions into multiple functions, because function has
-   * 64kb code size limit in JVM
+   * 64kb code size limit in JVM. If the class to which the function would be inlined would grow
+   * beyond 1600kb, we declare a private, nested sub-class, and the function is inlined to it
+   * instead, because classes have a constant pool limit of 65,536 named values.
    *
    * @param expressions the codes to evaluate expressions.
    * @param funcName the split function name base.
@@ -685,7 +783,6 @@ class CodegenContext {
            |}
          """.stripMargin
         addNewFunction(name, code)
-        name
       }
 
       foldFunctions(functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")})"))
@@ -769,8 +866,6 @@ class CodegenContext {
            |}
            """.stripMargin
 
-      addNewFunction(fnName, fn)
-
       // Add a state and a mapping of the common subexpressions that are associate with this
       // state. Adding this expression to subExprEliminationExprMap means it will call `fn`
       // when it is code generated. This decision should be a cost based one.
@@ -791,7 +886,7 @@ class CodegenContext {
       addMutableState(javaType(expr.dataType), value,
         s"$value = ${defaultValue(expr.dataType)};")
 
-      subexprFunctions += s"$fnName($INPUT_ROW);"
+      subexprFunctions += s"${addNewFunction(fnName, fn)}($INPUT_ROW);"
       val state = SubExprEliminationState(isNull, value)
       e.foreach(subExprEliminationExprs.put(_, state))
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
index 4d732445544a8..635766835029b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
@@ -63,21 +63,21 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
         if (e.nullable) {
           val isNull = s"isNull_$i"
           val value = s"value_$i"
-          ctx.addMutableState("boolean", isNull, s"this.$isNull = true;")
+          ctx.addMutableState("boolean", isNull, s"$isNull = true;")
           ctx.addMutableState(ctx.javaType(e.dataType), value,
-            s"this.$value = ${ctx.defaultValue(e.dataType)};")
+            s"$value = ${ctx.defaultValue(e.dataType)};")
           s"""
             ${ev.code}
-            this.$isNull = ${ev.isNull};
-            this.$value = ${ev.value};
+            $isNull = ${ev.isNull};
+            $value = ${ev.value};
            """
         } else {
           val value = s"value_$i"
           ctx.addMutableState(ctx.javaType(e.dataType), value,
-            s"this.$value = ${ctx.defaultValue(e.dataType)};")
+            s"$value = ${ctx.defaultValue(e.dataType)};")
           s"""
             ${ev.code}
-            this.$value = ${ev.value};
+            $value = ${ev.value};
            """
         }
     }
@@ -87,7 +87,7 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
 
     val updates = validExpr.zip(index).map {
       case (e, i) =>
-        val ev = ExprCode("", s"this.isNull_$i", s"this.value_$i")
+        val ev = ExprCode("", s"isNull_$i", s"value_$i")
         ctx.updateColumn("mutableRow", e.dataType, i, ev, e.nullable)
     }
 
@@ -135,6 +135,9 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
           $allUpdates
           return mutableRow;
         }
+
+        ${ctx.initNestedClasses()}
+        ${ctx.declareNestedClasses()}
       }
     """
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
index f7fc2d54a047b..a31943255b995 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
@@ -179,6 +179,9 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
           $comparisons
           return 0;
         }
+
+        ${ctx.initNestedClasses()}
+        ${ctx.declareNestedClasses()}
       }"""
 
     val code = CodeFormatter.stripOverlappingComments(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
index dcd1ed96a298e..b400783bb5e55 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
@@ -72,6 +72,9 @@ object GeneratePredicate extends CodeGenerator[Expression, Predicate] {
           ${eval.code}
           return !${eval.isNull} && ${eval.value};
         }
+
+        ${ctx.initNestedClasses()}
+        ${ctx.declareNestedClasses()}
       }"""
 
     val code = CodeFormatter.stripOverlappingComments(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
index b1cb6edefb852..f708aeff2b146 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
@@ -49,7 +49,7 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
     val output = ctx.freshName("safeRow")
     val values = ctx.freshName("values")
     // These expressions could be split into multiple functions
-    ctx.addMutableState("Object[]", values, s"this.$values = null;")
+    ctx.addMutableState("Object[]", values, s"$values = null;")
 
     val rowClass = classOf[GenericInternalRow].getName
 
@@ -65,10 +65,10 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
     val allFields = ctx.splitExpressions(tmp, fieldWriters)
     val code = s"""
       final InternalRow $tmp = $input;
-      this.$values = new Object[${schema.length}];
+      $values = new Object[${schema.length}];
       $allFields
       final InternalRow $output = new $rowClass($values);
-      this.$values = null;
+      $values = null;
     """
 
     ExprCode(code, "false", output)
@@ -184,6 +184,9 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
           $allExpressions
           return mutableRow;
         }
+
+        ${ctx.initNestedClasses()}
+        ${ctx.declareNestedClasses()}
       }
     """
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
index b358102d914bd..febfe3124f2bd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
@@ -82,7 +82,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
     val rowWriterClass = classOf[UnsafeRowWriter].getName
     val rowWriter = ctx.freshName("rowWriter")
     ctx.addMutableState(rowWriterClass, rowWriter,
-      s"this.$rowWriter = new $rowWriterClass($bufferHolder, ${inputs.length});")
+      s"$rowWriter = new $rowWriterClass($bufferHolder, ${inputs.length});")
 
     val resetWriter = if (isTopLevel) {
       // For top level row writer, it always writes to the beginning of the global buffer holder,
@@ -182,7 +182,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
     val arrayWriterClass = classOf[UnsafeArrayWriter].getName
     val arrayWriter = ctx.freshName("arrayWriter")
     ctx.addMutableState(arrayWriterClass, arrayWriter,
-      s"this.$arrayWriter = new $arrayWriterClass();")
+      s"$arrayWriter = new $arrayWriterClass();")
     val numElements = ctx.freshName("numElements")
     val index = ctx.freshName("index")
     val element = ctx.freshName("element")
@@ -321,7 +321,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
     val holder = ctx.freshName("holder")
     val holderClass = classOf[BufferHolder].getName
     ctx.addMutableState(holderClass, holder,
-      s"this.$holder = new $holderClass($result, ${numVarLenFields * 32});")
+      s"$holder = new $holderClass($result, ${numVarLenFields * 32});")
 
     val resetBufferHolder = if (numVarLenFields == 0) {
       ""
@@ -402,6 +402,9 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
           ${eval.code.trim}
           return ${eval.value};
         }
+
+        ${ctx.initNestedClasses()}
+        ${ctx.declareNestedClasses()}
       }
       """
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index 3df2ed8be0650..04e32bda6b0d4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -58,10 +58,10 @@ case class CreateArray(children: Seq[Expression]) extends Expression {
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val arrayClass = classOf[GenericArrayData].getName
     val values = ctx.freshName("values")
-    ctx.addMutableState("Object[]", values, s"this.$values = null;")
+    ctx.addMutableState("Object[]", values, s"$values = null;")
 
     ev.copy(code = s"""
-      this.$values = new Object[${children.size}];""" +
+      $values = new Object[${children.size}];""" +
       ctx.splitExpressions(
         ctx.INPUT_ROW,
         children.zipWithIndex.map { case (e, i) =>
@@ -76,7 +76,7 @@ case class CreateArray(children: Seq[Expression]) extends Expression {
         }) +
       s"""
         final ArrayData ${ev.value} = new $arrayClass($values);
-        this.$values = null;
+        $values = null;
       """, isNull = "false")
   }
 
@@ -137,8 +137,8 @@ case class CreateMap(children: Seq[Expression]) extends Expression {
     val mapClass = classOf[ArrayBasedMapData].getName
     val keyArray = ctx.freshName("keyArray")
     val valueArray = ctx.freshName("valueArray")
-    ctx.addMutableState("Object[]", keyArray, s"this.$keyArray = null;")
-    ctx.addMutableState("Object[]", valueArray, s"this.$valueArray = null;")
+    ctx.addMutableState("Object[]", keyArray, s"$keyArray = null;")
+    ctx.addMutableState("Object[]", valueArray, s"$valueArray = null;")
 
     val keyData = s"new $arrayClass($keyArray)"
     val valueData = s"new $arrayClass($valueArray)"
@@ -173,8 +173,8 @@ case class CreateMap(children: Seq[Expression]) extends Expression {
         }) +
       s"""
         final MapData ${ev.value} = new $mapClass($keyData, $valueData);
-        this.$keyArray = null;
-        this.$valueArray = null;
+        $keyArray = null;
+        $valueArray = null;
       """, isNull = "false")
   }
 
@@ -296,7 +296,7 @@ case class CreateNamedStruct(children: Seq[Expression]) extends CreateNamedStruc
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val rowClass = classOf[GenericInternalRow].getName
     val values = ctx.freshName("values")
-    ctx.addMutableState("Object[]", values, s"this.$values = null;")
+    ctx.addMutableState("Object[]", values, s"$values = null;")
 
     ev.copy(code = s"""
       $values = new Object[${valExprs.size}];""" +
@@ -313,7 +313,7 @@ case class CreateNamedStruct(children: Seq[Expression]) extends CreateNamedStruc
         }) +
       s"""
         final InternalRow ${ev.value} = new $rowClass($values);
-        this.$values = null;
+        $values = null;
       """, isNull = "false")
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
index bacedec1ae203..092c5de08df70 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
@@ -131,8 +131,8 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi
          |  $globalValue = ${ev.value};
          |}
          """.stripMargin
-    ctx.addNewFunction(funcName, funcBody)
-    (funcName, globalIsNull, globalValue)
+    val fullFuncName = ctx.addNewFunction(funcName, funcBody)
+    (fullFuncName, globalIsNull, globalValue)
   }
 
   override def toString: String = s"if ($predicate) $trueValue else $falseValue"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 256de74d410e4..5009bf8e96e83 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -912,7 +912,7 @@ case class InitializeJavaBean(beanInstance: Expression, setters: Map[String, Exp
 
     val code = s"""
       ${instanceGen.code}
-      this.${javaBeanInstance} = ${instanceGen.value};
+      ${javaBeanInstance} = ${instanceGen.value};
       if (!${instanceGen.isNull}) {
         $initializeCode
       }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
index b69b74b4240bd..7bfdf550bc376 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
@@ -33,10 +33,10 @@ class GeneratedProjectionSuite extends SparkFunSuite {
 
   test("generated projections on wider table") {
     val N = 1000
-    val wideRow1 = new GenericInternalRow((1 to N).toArray[Any])
+    val wideRow1 = new GenericInternalRow((0 until N).toArray[Any])
     val schema1 = StructType((1 to N).map(i => StructField("", IntegerType)))
     val wideRow2 = new GenericInternalRow(
-      (1 to N).map(i => UTF8String.fromString(i.toString)).toArray[Any])
+      (0 until N).map(i => UTF8String.fromString(i.toString)).toArray[Any])
     val schema2 = StructType((1 to N).map(i => StructField("", StringType)))
     val joined = new JoinedRow(wideRow1, wideRow2)
     val joinedSchema = StructType(schema1 ++ schema2)
@@ -48,12 +48,12 @@ class GeneratedProjectionSuite extends SparkFunSuite {
     val unsafeProj = UnsafeProjection.create(nestedSchema)
     val unsafe: UnsafeRow = unsafeProj(nested)
     (0 until N).foreach { i =>
-      val s = UTF8String.fromString((i + 1).toString)
-      assert(i + 1 === unsafe.getInt(i + 2))
+      val s = UTF8String.fromString(i.toString)
+      assert(i === unsafe.getInt(i + 2))
       assert(s === unsafe.getUTF8String(i + 2 + N))
-      assert(i + 1 === unsafe.getStruct(0, N * 2).getInt(i))
+      assert(i === unsafe.getStruct(0, N * 2).getInt(i))
       assert(s === unsafe.getStruct(0, N * 2).getUTF8String(i + N))
-      assert(i + 1 === unsafe.getStruct(1, N * 2).getInt(i))
+      assert(i === unsafe.getStruct(1, N * 2).getInt(i))
       assert(s === unsafe.getStruct(1, N * 2).getUTF8String(i + N))
     }
 
@@ -62,13 +62,63 @@ class GeneratedProjectionSuite extends SparkFunSuite {
     val result = safeProj(unsafe)
     // Can't compare GenericInternalRow with JoinedRow directly
     (0 until N).foreach { i =>
-      val r = i + 1
-      val s = UTF8String.fromString((i + 1).toString)
-      assert(r === result.getInt(i + 2))
+      val s = UTF8String.fromString(i.toString)
+      assert(i === result.getInt(i + 2))
       assert(s === result.getUTF8String(i + 2 + N))
-      assert(r === result.getStruct(0, N * 2).getInt(i))
+      assert(i === result.getStruct(0, N * 2).getInt(i))
       assert(s === result.getStruct(0, N * 2).getUTF8String(i + N))
-      assert(r === result.getStruct(1, N * 2).getInt(i))
+      assert(i === result.getStruct(1, N * 2).getInt(i))
+      assert(s === result.getStruct(1, N * 2).getUTF8String(i + N))
+    }
+
+    // test generated MutableProjection
+    val exprs = nestedSchema.fields.zipWithIndex.map { case (f, i) =>
+      BoundReference(i, f.dataType, true)
+    }
+    val mutableProj = GenerateMutableProjection.generate(exprs)
+    val row1 = mutableProj(result)
+    assert(result === row1)
+    val row2 = mutableProj(result)
+    assert(result === row2)
+  }
+
+   test("generated projections on wider table requiring class-splitting") {
+    val N = 4000
+    val wideRow1 = new GenericInternalRow((0 until N).toArray[Any])
+    val schema1 = StructType((1 to N).map(i => StructField("", IntegerType)))
+    val wideRow2 = new GenericInternalRow(
+      (0 until N).map(i => UTF8String.fromString(i.toString)).toArray[Any])
+    val schema2 = StructType((1 to N).map(i => StructField("", StringType)))
+    val joined = new JoinedRow(wideRow1, wideRow2)
+    val joinedSchema = StructType(schema1 ++ schema2)
+    val nested = new JoinedRow(InternalRow(joined, joined), joined)
+    val nestedSchema = StructType(
+      Seq(StructField("", joinedSchema), StructField("", joinedSchema)) ++ joinedSchema)
+
+    // test generated UnsafeProjection
+    val unsafeProj = UnsafeProjection.create(nestedSchema)
+    val unsafe: UnsafeRow = unsafeProj(nested)
+    (0 until N).foreach { i =>
+      val s = UTF8String.fromString(i.toString)
+      assert(i === unsafe.getInt(i + 2))
+      assert(s === unsafe.getUTF8String(i + 2 + N))
+      assert(i === unsafe.getStruct(0, N * 2).getInt(i))
+      assert(s === unsafe.getStruct(0, N * 2).getUTF8String(i + N))
+      assert(i === unsafe.getStruct(1, N * 2).getInt(i))
+      assert(s === unsafe.getStruct(1, N * 2).getUTF8String(i + N))
+    }
+
+    // test generated SafeProjection
+    val safeProj = FromUnsafeProjection(nestedSchema)
+    val result = safeProj(unsafe)
+    // Can't compare GenericInternalRow with JoinedRow directly
+    (0 until N).foreach { i =>
+      val s = UTF8String.fromString(i.toString)
+      assert(i === result.getInt(i + 2))
+      assert(s === result.getUTF8String(i + 2 + N))
+      assert(i === result.getStruct(0, N * 2).getInt(i))
+      assert(s === result.getStruct(0, N * 2).getUTF8String(i + N))
+      assert(i === result.getStruct(1, N * 2).getInt(i))
       assert(s === result.getStruct(1, N * 2).getUTF8String(i + N))
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index b4aed23218357..0cfdc83573936 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -363,7 +363,7 @@ case class FileSourceScanExec(
     }
 
     val nextBatch = ctx.freshName("nextBatch")
-    ctx.addNewFunction(nextBatch,
+    val nextBatchFuncName = ctx.addNewFunction(nextBatch,
       s"""
          |private void $nextBatch() throws java.io.IOException {
          |  long getBatchStart = System.nanoTime();
@@ -383,7 +383,7 @@ case class FileSourceScanExec(
     }
     s"""
        |if ($batch == null) {
-       |  $nextBatch();
+       |  $nextBatchFuncName();
        |}
        |while ($batch != null) {
        |  int numRows = $batch.numRows();
@@ -393,7 +393,7 @@ case class FileSourceScanExec(
        |    if (shouldStop()) return;
        |  }
        |  $batch = null;
-       |  $nextBatch();
+       |  $nextBatchFuncName();
        |}
        |$scanTimeMetric.add($scanTimeTotalNs / (1000 * 1000));
        |$scanTimeTotalNs = 0;
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala
index cc576bbc4c802..9d3dbc2571610 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala
@@ -141,7 +141,7 @@ case class SortExec(
     ctx.addMutableState("scala.collection.Iterator<UnsafeRow>", sortedIterator, "")
 
     val addToSorter = ctx.freshName("addToSorter")
-    ctx.addNewFunction(addToSorter,
+    val addToSorterFuncName = ctx.addNewFunction(addToSorter,
       s"""
         | private void $addToSorter() throws java.io.IOException {
         |   ${child.asInstanceOf[CodegenSupport].produce(ctx, this)}
@@ -160,7 +160,7 @@ case class SortExec(
     s"""
        | if ($needToSort) {
        |   long $spillSizeBefore = $metrics.memoryBytesSpilled();
-       |   $addToSorter();
+       |   $addToSorterFuncName();
        |   $sortedIterator = $sorterVariable.sort();
        |   $sortTime.add($sorterVariable.getSortTimeNanos() / 1000000);
        |   $peakMemory.add($sorterVariable.getPeakMemoryUsage());
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index 2ead8f6baae6b..f3931b8e47d15 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -339,6 +339,9 @@ case class WholeStageCodegenExec(child: SparkPlan) extends UnaryExecNode with Co
         protected void processNext() throws java.io.IOException {
           ${code.trim}
         }
+
+        ${ctx.initNestedClasses()}
+        ${ctx.declareNestedClasses()}
       }
       """.trim
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
index 4529ed067e565..1c6d4f8b18fa5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -209,7 +209,7 @@ case class HashAggregateExec(
     }
 
     val doAgg = ctx.freshName("doAggregateWithoutKey")
-    ctx.addNewFunction(doAgg,
+    val doAggFuncName = ctx.addNewFunction(doAgg,
       s"""
          | private void $doAgg() throws java.io.IOException {
          |   // initialize aggregation buffer
@@ -226,7 +226,7 @@ case class HashAggregateExec(
        | while (!$initAgg) {
        |   $initAgg = true;
        |   long $beforeAgg = System.nanoTime();
-       |   $doAgg();
+       |   $doAggFuncName();
        |   $aggTime.add((System.nanoTime() - $beforeAgg) / 1000000);
        |
        |   // output the result
@@ -590,7 +590,7 @@ case class HashAggregateExec(
       } else ""
     }
 
-    ctx.addNewFunction(doAgg,
+    val doAggFuncName = ctx.addNewFunction(doAgg,
       s"""
         ${generateGenerateCode}
         private void $doAgg() throws java.io.IOException {
@@ -670,7 +670,7 @@ case class HashAggregateExec(
      if (!$initAgg) {
        $initAgg = true;
        long $beforeAgg = System.nanoTime();
-       $doAgg();
+       $doAggFuncName();
        $aggTime.add((System.nanoTime() - $beforeAgg) / 1000000);
      }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index b00223a86d4d4..6176e6d55f784 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -281,10 +281,8 @@ case class SampleExec(
       val samplerClass = classOf[PoissonSampler[UnsafeRow]].getName
       val initSampler = ctx.freshName("initSampler")
       ctx.copyResult = true
-      ctx.addMutableState(s"$samplerClass<UnsafeRow>", sampler,
-        s"$initSampler();")
 
-      ctx.addNewFunction(initSampler,
+      val initSamplerFuncName = ctx.addNewFunction(initSampler,
         s"""
           | private void $initSampler() {
           |   $sampler = new $samplerClass<UnsafeRow>($upperBound - $lowerBound, false);
@@ -299,6 +297,8 @@ case class SampleExec(
           | }
          """.stripMargin.trim)
 
+      ctx.addMutableState(s"$samplerClass<UnsafeRow>", sampler, s"$initSamplerFuncName();")
+
       val samplingCount = ctx.freshName("samplingCount")
       s"""
          | int $samplingCount = $sampler.sample();
@@ -370,7 +370,7 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range)
       s"$number > $partitionEnd"
     }
 
-    ctx.addNewFunction("initRange",
+    val initRangeFuncName = ctx.addNewFunction("initRange",
       s"""
         | private void initRange(int idx) {
         |   $BigInt index = $BigInt.valueOf(idx);
@@ -409,7 +409,7 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range)
       | // initialize Range
       | if (!$initTerm) {
       |   $initTerm = true;
-      |   initRange(partitionIndex);
+      |   $initRangeFuncName(partitionIndex);
       | }
       |
       | while (!$overflow && $checkEnd) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
index 14024d6c10558..f4566496fca5a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
@@ -128,9 +128,7 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
       } else {
         val groupedAccessorsItr = initializeAccessors.grouped(numberOfStatementsThreshold)
         val groupedExtractorsItr = extractors.grouped(numberOfStatementsThreshold)
-        var groupedAccessorsLength = 0
-        groupedAccessorsItr.zipWithIndex.foreach { case (body, i) =>
-          groupedAccessorsLength += 1
+        val accessorNames = groupedAccessorsItr.zipWithIndex.map { case (body, i) =>
           val funcName = s"accessors$i"
           val funcCode = s"""
              |private void $funcName() {
@@ -139,7 +137,7 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
            """.stripMargin
           ctx.addNewFunction(funcName, funcCode)
         }
-        groupedExtractorsItr.zipWithIndex.foreach { case (body, i) =>
+        val extractorNames = groupedExtractorsItr.zipWithIndex.map { case (body, i) =>
           val funcName = s"extractors$i"
           val funcCode = s"""
              |private void $funcName() {
@@ -148,8 +146,8 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
            """.stripMargin
           ctx.addNewFunction(funcName, funcCode)
         }
-        ((0 to groupedAccessorsLength - 1).map { i => s"accessors$i();" }.mkString("\n"),
-         (0 to groupedAccessorsLength - 1).map { i => s"extractors$i();" }.mkString("\n"))
+        (accessorNames.map { accessorName => s"$accessorName();" }.mkString("\n"),
+         extractorNames.map { extractorName => s"$extractorName();" }.mkString("\n"))
       }
 
     val codeBody = s"""
@@ -184,9 +182,9 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
         ${ctx.declareMutableStates()}
 
         public SpecificColumnarIterator() {
-          this.nativeOrder = ByteOrder.nativeOrder();
-          this.buffers = new byte[${columnTypes.length}][];
-          this.mutableRow = new MutableUnsafeRow(rowWriter);
+          nativeOrder = ByteOrder.nativeOrder();
+          buffers = new byte[${columnTypes.length}][];
+          mutableRow = new MutableUnsafeRow(rowWriter);
         }
 
         public void initialize(Iterator input, DataType[] columnTypes, int[] columnIndexes) {
@@ -224,6 +222,9 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
           unsafeRow.setTotalSize(bufferHolder.totalSize());
           return unsafeRow;
         }
+
+        ${ctx.initNestedClasses()}
+        ${ctx.declareNestedClasses()}
       }"""
 
     val code = CodeFormatter.stripOverlappingComments(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
index 89a9b38132732..f8e9a91592c0b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@@ -446,7 +446,7 @@ case class SortMergeJoinExec(
          |  }
          |  return false; // unreachable
          |}
-       """.stripMargin)
+       """.stripMargin, inlineToOuterClass = true)
 
     (leftRow, matches)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
index 757fe2185d302..73a0f8735ed45 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
@@ -75,7 +75,7 @@ trait BaseLimitExec extends UnaryExecNode with CodegenSupport {
       protected boolean stopEarly() {
         return $stopEarly;
       }
-    """)
+    """, inlineToOuterClass = true)
     val countTerm = ctx.freshName("count")
     ctx.addMutableState("int", countTerm, s"$countTerm = 0;")
     s"""

From 198e3a036fbed373be3d26964e005b4f2fed2bb7 Mon Sep 17 00:00:00 2001
From: ALeksander Eskilson <alek.eskilson@cerner.com>
Date: Thu, 22 Jun 2017 13:25:45 +0800
Subject: [PATCH 1028/1204] [SPARK-18016][SQL][CATALYST][BRANCH-2.2] Code
 Generation: Constant Pool Limit - Class Splitting

## What changes were proposed in this pull request?

This is a backport patch for Spark 2.2.x of the class splitting feature over excess generated code as was merged in #18075.

## How was this patch tested?

The same test provided in #18075 is included in this patch.

Author: ALeksander Eskilson <alek.eskilson@cerner.com>

Closes #18377 from bdrillard/class_splitting_2.2.
---
 sql/catalyst/pom.xml                          |   7 +
 .../sql/catalyst/expressions/ScalaUDF.scala   |   6 +-
 .../expressions/codegen/CodeGenerator.scala   | 135 +++++++++++++++---
 .../codegen/GenerateMutableProjection.scala   |  17 ++-
 .../codegen/GenerateOrdering.scala            |   3 +
 .../codegen/GeneratePredicate.scala           |   3 +
 .../codegen/GenerateSafeProjection.scala      |   9 +-
 .../codegen/GenerateUnsafeProjection.scala    |   9 +-
 .../expressions/complexTypeCreator.scala      |   6 +-
 .../expressions/conditionalExpressions.scala  |   4 +-
 .../sql/catalyst/expressions/generators.scala |   6 +-
 .../expressions/objects/objects.scala         |   2 +-
 .../codegen/GeneratedProjectionSuite.scala    |  72 ++++++++--
 .../sql/execution/ColumnarBatchScan.scala     |   6 +-
 .../apache/spark/sql/execution/SortExec.scala |   4 +-
 .../sql/execution/WholeStageCodegenExec.scala |   3 +
 .../aggregate/HashAggregateExec.scala         |   8 +-
 .../execution/basicPhysicalOperators.scala    |  10 +-
 .../columnar/GenerateColumnAccessor.scala     |  13 +-
 .../execution/joins/SortMergeJoinExec.scala   |   2 +-
 .../apache/spark/sql/execution/limit.scala    |   2 +-
 21 files changed, 248 insertions(+), 79 deletions(-)

diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 5ecee28a1f0b8..d5027ff6ad23f 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -131,6 +131,13 @@
           </execution>
         </executions>
       </plugin>
+      <plugin>
+        <groupId>org.scalatest</groupId>
+        <artifactId>scalatest-maven-plugin</artifactId>
+        <configuration>
+          <argLine>-Xmx4g -Xss4096k -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=512m</argLine>
+        </configuration>
+      </plugin>
       <plugin>
         <groupId>org.antlr</groupId>
         <artifactId>antlr4-maven-plugin</artifactId>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
index 228f4b756c8b4..5c68f9ffc691c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
@@ -988,7 +988,7 @@ case class ScalaUDF(
     val converterTerm = ctx.freshName("converter")
     val expressionIdx = ctx.references.size - 1
     ctx.addMutableState(converterClassName, converterTerm,
-      s"this.$converterTerm = ($converterClassName)$typeConvertersClassName" +
+      s"$converterTerm = ($converterClassName)$typeConvertersClassName" +
         s".createToScalaConverter(((${expressionClassName})((($scalaUDFClassName)" +
           s"references[$expressionIdx]).getChildren().apply($index))).dataType());")
     converterTerm
@@ -1005,7 +1005,7 @@ case class ScalaUDF(
     // Generate codes used to convert the returned value of user-defined functions to Catalyst type
     val catalystConverterTerm = ctx.freshName("catalystConverter")
     ctx.addMutableState(converterClassName, catalystConverterTerm,
-      s"this.$catalystConverterTerm = ($converterClassName)$typeConvertersClassName" +
+      s"$catalystConverterTerm = ($converterClassName)$typeConvertersClassName" +
         s".createToCatalystConverter($scalaUDF.dataType());")
 
     val resultTerm = ctx.freshName("result")
@@ -1019,7 +1019,7 @@ case class ScalaUDF(
 
     val funcTerm = ctx.freshName("udf")
     ctx.addMutableState(funcClassName, funcTerm,
-      s"this.$funcTerm = ($funcClassName)$scalaUDF.userDefinedFunc();")
+      s"$funcTerm = ($funcClassName)$scalaUDF.userDefinedFunc();")
 
     // codegen for children expressions
     val evals = children.map(_.genCode(ctx))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index f8da78b5f5e3e..4954cf8bc1177 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -113,7 +113,7 @@ class CodegenContext {
     val idx = references.length
     references += obj
     val clsName = Option(className).getOrElse(obj.getClass.getName)
-    addMutableState(clsName, term, s"this.$term = ($clsName) references[$idx];")
+    addMutableState(clsName, term, s"$term = ($clsName) references[$idx];")
     term
   }
 
@@ -202,16 +202,6 @@ class CodegenContext {
     partitionInitializationStatements.mkString("\n")
   }
 
-  /**
-   * Holding all the functions those will be added into generated class.
-   */
-  val addedFunctions: mutable.Map[String, String] =
-    mutable.Map.empty[String, String]
-
-  def addNewFunction(funcName: String, funcCode: String): Unit = {
-    addedFunctions += ((funcName, funcCode))
-  }
-
   /**
    * Holds expressions that are equivalent. Used to perform subexpression elimination
    * during codegen.
@@ -233,10 +223,118 @@ class CodegenContext {
   // The collection of sub-expression result resetting methods that need to be called on each row.
   val subexprFunctions = mutable.ArrayBuffer.empty[String]
 
-  def declareAddedFunctions(): String = {
-    addedFunctions.map { case (funcName, funcCode) => funcCode }.mkString("\n")
+  private val outerClassName = "OuterClass"
+
+  /**
+   * Holds the class and instance names to be generated, where `OuterClass` is a placeholder
+   * standing for whichever class is generated as the outermost class and which will contain any
+   * nested sub-classes. All other classes and instance names in this list will represent private,
+   * nested sub-classes.
+   */
+  private val classes: mutable.ListBuffer[(String, String)] =
+    mutable.ListBuffer[(String, String)](outerClassName -> null)
+
+  // A map holding the current size in bytes of each class to be generated.
+  private val classSize: mutable.Map[String, Int] =
+    mutable.Map[String, Int](outerClassName -> 0)
+
+  // Nested maps holding function names and their code belonging to each class.
+  private val classFunctions: mutable.Map[String, mutable.Map[String, String]] =
+    mutable.Map(outerClassName -> mutable.Map.empty[String, String])
+
+  // Returns the size of the most recently added class.
+  private def currClassSize(): Int = classSize(classes.head._1)
+
+  // Returns the class name and instance name for the most recently added class.
+  private def currClass(): (String, String) = classes.head
+
+  // Adds a new class. Requires the class' name, and its instance name.
+  private def addClass(className: String, classInstance: String): Unit = {
+    classes.prepend(className -> classInstance)
+    classSize += className -> 0
+    classFunctions += className -> mutable.Map.empty[String, String]
   }
 
+  /**
+   * Adds a function to the generated class. If the code for the `OuterClass` grows too large, the
+   * function will be inlined into a new private, nested class, and a instance-qualified name for
+   * the function will be returned. Otherwise, the function will be inined to the `OuterClass` the
+   * simple `funcName` will be returned.
+   *
+   * @param funcName the class-unqualified name of the function
+   * @param funcCode the body of the function
+   * @param inlineToOuterClass whether the given code must be inlined to the `OuterClass`. This
+   *                           can be necessary when a function is declared outside of the context
+   *                           it is eventually referenced and a returned qualified function name
+   *                           cannot otherwise be accessed.
+   * @return the name of the function, qualified by class if it will be inlined to a private,
+   *         nested sub-class
+   */
+  def addNewFunction(
+      funcName: String,
+      funcCode: String,
+      inlineToOuterClass: Boolean = false): String = {
+    // The number of named constants that can exist in the class is limited by the Constant Pool
+    // limit, 65,536. We cannot know how many constants will be inserted for a class, so we use a
+    // threshold of 1600k bytes to determine when a function should be inlined to a private, nested
+    // sub-class.
+    val (className, classInstance) = if (inlineToOuterClass) {
+      outerClassName -> ""
+    } else if (currClassSize > 1600000) {
+      val className = freshName("NestedClass")
+      val classInstance = freshName("nestedClassInstance")
+
+      addClass(className, classInstance)
+
+      className -> classInstance
+    } else {
+      currClass()
+    }
+
+    classSize(className) += funcCode.length
+    classFunctions(className) += funcName -> funcCode
+
+    if (className == outerClassName) {
+      funcName
+    } else {
+
+      s"$classInstance.$funcName"
+    }
+  }
+
+  /**
+   * Instantiates all nested, private sub-classes as objects to the `OuterClass`
+   */
+  private[sql] def initNestedClasses(): String = {
+    // Nested, private sub-classes have no mutable state (though they do reference the outer class'
+    // mutable state), so we declare and initialize them inline to the OuterClass.
+    classes.filter(_._1 != outerClassName).map {
+      case (className, classInstance) =>
+        s"private $className $classInstance = new $className();"
+    }.mkString("\n")
+  }
+
+  /**
+   * Declares all function code that should be inlined to the `OuterClass`.
+   */
+  private[sql] def declareAddedFunctions(): String = {
+    classFunctions(outerClassName).values.mkString("\n")
+  }
+
+  /**
+   * Declares all nested, private sub-classes and the function code that should be inlined to them.
+   */
+  private[sql] def declareNestedClasses(): String = {
+    classFunctions.filterKeys(_ != outerClassName).map {
+      case (className, functions) =>
+        s"""
+           |private class $className {
+           |  ${functions.values.mkString("\n")}
+           |}
+           """.stripMargin
+    }
+  }.mkString("\n")
+
   final val JAVA_BOOLEAN = "boolean"
   final val JAVA_BYTE = "byte"
   final val JAVA_SHORT = "short"
@@ -556,8 +654,7 @@ class CodegenContext {
             return 0;
           }
         """
-      addNewFunction(compareFunc, funcCode)
-      s"this.$compareFunc($c1, $c2)"
+      s"${addNewFunction(compareFunc, funcCode)}($c1, $c2)"
     case schema: StructType =>
       val comparisons = GenerateOrdering.genComparisons(this, schema)
       val compareFunc = freshName("compareStruct")
@@ -573,8 +670,7 @@ class CodegenContext {
             return 0;
           }
         """
-      addNewFunction(compareFunc, funcCode)
-      s"this.$compareFunc($c1, $c2)"
+      s"${addNewFunction(compareFunc, funcCode)}($c1, $c2)"
     case other if other.isInstanceOf[AtomicType] => s"$c1.compare($c2)"
     case udt: UserDefinedType[_] => genComp(udt.sqlType, c1, c2)
     case _ =>
@@ -689,7 +785,6 @@ class CodegenContext {
            |}
          """.stripMargin
         addNewFunction(name, code)
-        name
       }
 
       foldFunctions(functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")})"))
@@ -773,8 +868,6 @@ class CodegenContext {
            |}
            """.stripMargin
 
-      addNewFunction(fnName, fn)
-
       // Add a state and a mapping of the common subexpressions that are associate with this
       // state. Adding this expression to subExprEliminationExprMap means it will call `fn`
       // when it is code generated. This decision should be a cost based one.
@@ -792,7 +885,7 @@ class CodegenContext {
       addMutableState(javaType(expr.dataType), value,
         s"$value = ${defaultValue(expr.dataType)};")
 
-      subexprFunctions += s"$fnName($INPUT_ROW);"
+      subexprFunctions += s"${addNewFunction(fnName, fn)}($INPUT_ROW);"
       val state = SubExprEliminationState(isNull, value)
       e.foreach(subExprEliminationExprs.put(_, state))
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
index 4d732445544a8..635766835029b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
@@ -63,21 +63,21 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
         if (e.nullable) {
           val isNull = s"isNull_$i"
           val value = s"value_$i"
-          ctx.addMutableState("boolean", isNull, s"this.$isNull = true;")
+          ctx.addMutableState("boolean", isNull, s"$isNull = true;")
           ctx.addMutableState(ctx.javaType(e.dataType), value,
-            s"this.$value = ${ctx.defaultValue(e.dataType)};")
+            s"$value = ${ctx.defaultValue(e.dataType)};")
           s"""
             ${ev.code}
-            this.$isNull = ${ev.isNull};
-            this.$value = ${ev.value};
+            $isNull = ${ev.isNull};
+            $value = ${ev.value};
            """
         } else {
           val value = s"value_$i"
           ctx.addMutableState(ctx.javaType(e.dataType), value,
-            s"this.$value = ${ctx.defaultValue(e.dataType)};")
+            s"$value = ${ctx.defaultValue(e.dataType)};")
           s"""
             ${ev.code}
-            this.$value = ${ev.value};
+            $value = ${ev.value};
            """
         }
     }
@@ -87,7 +87,7 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
 
     val updates = validExpr.zip(index).map {
       case (e, i) =>
-        val ev = ExprCode("", s"this.isNull_$i", s"this.value_$i")
+        val ev = ExprCode("", s"isNull_$i", s"value_$i")
         ctx.updateColumn("mutableRow", e.dataType, i, ev, e.nullable)
     }
 
@@ -135,6 +135,9 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
           $allUpdates
           return mutableRow;
         }
+
+        ${ctx.initNestedClasses()}
+        ${ctx.declareNestedClasses()}
       }
     """
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
index f7fc2d54a047b..a31943255b995 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
@@ -179,6 +179,9 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
           $comparisons
           return 0;
         }
+
+        ${ctx.initNestedClasses()}
+        ${ctx.declareNestedClasses()}
       }"""
 
     val code = CodeFormatter.stripOverlappingComments(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
index dcd1ed96a298e..b400783bb5e55 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
@@ -72,6 +72,9 @@ object GeneratePredicate extends CodeGenerator[Expression, Predicate] {
           ${eval.code}
           return !${eval.isNull} && ${eval.value};
         }
+
+        ${ctx.initNestedClasses()}
+        ${ctx.declareNestedClasses()}
       }"""
 
     val code = CodeFormatter.stripOverlappingComments(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
index b1cb6edefb852..f708aeff2b146 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
@@ -49,7 +49,7 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
     val output = ctx.freshName("safeRow")
     val values = ctx.freshName("values")
     // These expressions could be split into multiple functions
-    ctx.addMutableState("Object[]", values, s"this.$values = null;")
+    ctx.addMutableState("Object[]", values, s"$values = null;")
 
     val rowClass = classOf[GenericInternalRow].getName
 
@@ -65,10 +65,10 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
     val allFields = ctx.splitExpressions(tmp, fieldWriters)
     val code = s"""
       final InternalRow $tmp = $input;
-      this.$values = new Object[${schema.length}];
+      $values = new Object[${schema.length}];
       $allFields
       final InternalRow $output = new $rowClass($values);
-      this.$values = null;
+      $values = null;
     """
 
     ExprCode(code, "false", output)
@@ -184,6 +184,9 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
           $allExpressions
           return mutableRow;
         }
+
+        ${ctx.initNestedClasses()}
+        ${ctx.declareNestedClasses()}
       }
     """
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
index b358102d914bd..febfe3124f2bd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
@@ -82,7 +82,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
     val rowWriterClass = classOf[UnsafeRowWriter].getName
     val rowWriter = ctx.freshName("rowWriter")
     ctx.addMutableState(rowWriterClass, rowWriter,
-      s"this.$rowWriter = new $rowWriterClass($bufferHolder, ${inputs.length});")
+      s"$rowWriter = new $rowWriterClass($bufferHolder, ${inputs.length});")
 
     val resetWriter = if (isTopLevel) {
       // For top level row writer, it always writes to the beginning of the global buffer holder,
@@ -182,7 +182,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
     val arrayWriterClass = classOf[UnsafeArrayWriter].getName
     val arrayWriter = ctx.freshName("arrayWriter")
     ctx.addMutableState(arrayWriterClass, arrayWriter,
-      s"this.$arrayWriter = new $arrayWriterClass();")
+      s"$arrayWriter = new $arrayWriterClass();")
     val numElements = ctx.freshName("numElements")
     val index = ctx.freshName("index")
     val element = ctx.freshName("element")
@@ -321,7 +321,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
     val holder = ctx.freshName("holder")
     val holderClass = classOf[BufferHolder].getName
     ctx.addMutableState(holderClass, holder,
-      s"this.$holder = new $holderClass($result, ${numVarLenFields * 32});")
+      s"$holder = new $holderClass($result, ${numVarLenFields * 32});")
 
     val resetBufferHolder = if (numVarLenFields == 0) {
       ""
@@ -402,6 +402,9 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
           ${eval.code.trim}
           return ${eval.value};
         }
+
+        ${ctx.initNestedClasses()}
+        ${ctx.declareNestedClasses()}
       }
       """
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index b6675a84ece48..98c4cbee38dee 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -93,7 +93,7 @@ private [sql] object GenArrayData {
     if (!ctx.isPrimitiveType(elementType)) {
       val genericArrayClass = classOf[GenericArrayData].getName
       ctx.addMutableState("Object[]", arrayName,
-        s"this.$arrayName = new Object[${numElements}];")
+        s"$arrayName = new Object[${numElements}];")
 
       val assignments = elementsCode.zipWithIndex.map { case (eval, i) =>
         val isNullAssignment = if (!isMapKey) {
@@ -340,7 +340,7 @@ case class CreateNamedStruct(children: Seq[Expression]) extends CreateNamedStruc
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val rowClass = classOf[GenericInternalRow].getName
     val values = ctx.freshName("values")
-    ctx.addMutableState("Object[]", values, s"this.$values = null;")
+    ctx.addMutableState("Object[]", values, s"$values = null;")
 
     ev.copy(code = s"""
       $values = new Object[${valExprs.size}];""" +
@@ -357,7 +357,7 @@ case class CreateNamedStruct(children: Seq[Expression]) extends CreateNamedStruc
         }) +
       s"""
         final InternalRow ${ev.value} = new $rowClass($values);
-        this.$values = null;
+        $values = null;
       """, isNull = "false")
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
index ee365fe636614..ae8efb673f91c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
@@ -131,8 +131,8 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi
          |  $globalValue = ${ev.value};
          |}
          """.stripMargin
-    ctx.addNewFunction(funcName, funcBody)
-    (funcName, globalIsNull, globalValue)
+    val fullFuncName = ctx.addNewFunction(funcName, funcBody)
+    (fullFuncName, globalIsNull, globalValue)
   }
 
   override def toString: String = s"if ($predicate) $trueValue else $falseValue"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
index e84796f2edad0..baa5ba68dcb30 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
@@ -181,7 +181,7 @@ case class Stack(children: Seq[Expression]) extends Generator {
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     // Rows - we write these into an array.
     val rowData = ctx.freshName("rows")
-    ctx.addMutableState("InternalRow[]", rowData, s"this.$rowData = new InternalRow[$numRows];")
+    ctx.addMutableState("InternalRow[]", rowData, s"$rowData = new InternalRow[$numRows];")
     val values = children.tail
     val dataTypes = values.take(numFields).map(_.dataType)
     val code = ctx.splitExpressions(ctx.INPUT_ROW, Seq.tabulate(numRows) { row =>
@@ -190,7 +190,7 @@ case class Stack(children: Seq[Expression]) extends Generator {
         if (index < values.length) values(index) else Literal(null, dataTypes(col))
       }
       val eval = CreateStruct(fields).genCode(ctx)
-      s"${eval.code}\nthis.$rowData[$row] = ${eval.value};"
+      s"${eval.code}\n$rowData[$row] = ${eval.value};"
     })
 
     // Create the collection.
@@ -198,7 +198,7 @@ case class Stack(children: Seq[Expression]) extends Generator {
     ctx.addMutableState(
       s"$wrapperClass<InternalRow>",
       ev.value,
-      s"this.${ev.value} = $wrapperClass$$.MODULE$$.make(this.$rowData);")
+      s"${ev.value} = $wrapperClass$$.MODULE$$.make($rowData);")
     ev.copy(code = code, isNull = "false")
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 1a202ecf745c9..2bd752c82e6c1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -981,7 +981,7 @@ case class InitializeJavaBean(beanInstance: Expression, setters: Map[String, Exp
 
     val code = s"""
       ${instanceGen.code}
-      this.${javaBeanInstance} = ${instanceGen.value};
+      ${javaBeanInstance} = ${instanceGen.value};
       if (!${instanceGen.isNull}) {
         $initializeCode
       }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
index b69b74b4240bd..d7ba57a697b08 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
@@ -33,10 +33,10 @@ class GeneratedProjectionSuite extends SparkFunSuite {
 
   test("generated projections on wider table") {
     val N = 1000
-    val wideRow1 = new GenericInternalRow((1 to N).toArray[Any])
+    val wideRow1 = new GenericInternalRow((0 until N).toArray[Any])
     val schema1 = StructType((1 to N).map(i => StructField("", IntegerType)))
     val wideRow2 = new GenericInternalRow(
-      (1 to N).map(i => UTF8String.fromString(i.toString)).toArray[Any])
+      (0 until N).map(i => UTF8String.fromString(i.toString)).toArray[Any])
     val schema2 = StructType((1 to N).map(i => StructField("", StringType)))
     val joined = new JoinedRow(wideRow1, wideRow2)
     val joinedSchema = StructType(schema1 ++ schema2)
@@ -48,12 +48,12 @@ class GeneratedProjectionSuite extends SparkFunSuite {
     val unsafeProj = UnsafeProjection.create(nestedSchema)
     val unsafe: UnsafeRow = unsafeProj(nested)
     (0 until N).foreach { i =>
-      val s = UTF8String.fromString((i + 1).toString)
-      assert(i + 1 === unsafe.getInt(i + 2))
+      val s = UTF8String.fromString(i.toString)
+      assert(i === unsafe.getInt(i + 2))
       assert(s === unsafe.getUTF8String(i + 2 + N))
-      assert(i + 1 === unsafe.getStruct(0, N * 2).getInt(i))
+      assert(i === unsafe.getStruct(0, N * 2).getInt(i))
       assert(s === unsafe.getStruct(0, N * 2).getUTF8String(i + N))
-      assert(i + 1 === unsafe.getStruct(1, N * 2).getInt(i))
+      assert(i === unsafe.getStruct(1, N * 2).getInt(i))
       assert(s === unsafe.getStruct(1, N * 2).getUTF8String(i + N))
     }
 
@@ -62,13 +62,63 @@ class GeneratedProjectionSuite extends SparkFunSuite {
     val result = safeProj(unsafe)
     // Can't compare GenericInternalRow with JoinedRow directly
     (0 until N).foreach { i =>
-      val r = i + 1
-      val s = UTF8String.fromString((i + 1).toString)
-      assert(r === result.getInt(i + 2))
+      val s = UTF8String.fromString(i.toString)
+      assert(i === result.getInt(i + 2))
       assert(s === result.getUTF8String(i + 2 + N))
-      assert(r === result.getStruct(0, N * 2).getInt(i))
+      assert(i === result.getStruct(0, N * 2).getInt(i))
       assert(s === result.getStruct(0, N * 2).getUTF8String(i + N))
-      assert(r === result.getStruct(1, N * 2).getInt(i))
+      assert(i === result.getStruct(1, N * 2).getInt(i))
+      assert(s === result.getStruct(1, N * 2).getUTF8String(i + N))
+    }
+
+    // test generated MutableProjection
+    val exprs = nestedSchema.fields.zipWithIndex.map { case (f, i) =>
+      BoundReference(i, f.dataType, true)
+    }
+    val mutableProj = GenerateMutableProjection.generate(exprs)
+    val row1 = mutableProj(result)
+    assert(result === row1)
+    val row2 = mutableProj(result)
+    assert(result === row2)
+  }
+
+  test("generated projections on wider table requiring class-splitting") {
+    val N = 4000
+    val wideRow1 = new GenericInternalRow((0 until N).toArray[Any])
+    val schema1 = StructType((1 to N).map(i => StructField("", IntegerType)))
+    val wideRow2 = new GenericInternalRow(
+      (0 until N).map(i => UTF8String.fromString(i.toString)).toArray[Any])
+    val schema2 = StructType((1 to N).map(i => StructField("", StringType)))
+    val joined = new JoinedRow(wideRow1, wideRow2)
+    val joinedSchema = StructType(schema1 ++ schema2)
+    val nested = new JoinedRow(InternalRow(joined, joined), joined)
+    val nestedSchema = StructType(
+      Seq(StructField("", joinedSchema), StructField("", joinedSchema)) ++ joinedSchema)
+
+    // test generated UnsafeProjection
+    val unsafeProj = UnsafeProjection.create(nestedSchema)
+    val unsafe: UnsafeRow = unsafeProj(nested)
+    (0 until N).foreach { i =>
+      val s = UTF8String.fromString(i.toString)
+      assert(i === unsafe.getInt(i + 2))
+      assert(s === unsafe.getUTF8String(i + 2 + N))
+      assert(i === unsafe.getStruct(0, N * 2).getInt(i))
+      assert(s === unsafe.getStruct(0, N * 2).getUTF8String(i + N))
+      assert(i === unsafe.getStruct(1, N * 2).getInt(i))
+      assert(s === unsafe.getStruct(1, N * 2).getUTF8String(i + N))
+    }
+
+    // test generated SafeProjection
+    val safeProj = FromUnsafeProjection(nestedSchema)
+    val result = safeProj(unsafe)
+    // Can't compare GenericInternalRow with JoinedRow directly
+    (0 until N).foreach { i =>
+      val s = UTF8String.fromString(i.toString)
+      assert(i === result.getInt(i + 2))
+      assert(s === result.getUTF8String(i + 2 + N))
+      assert(i === result.getStruct(0, N * 2).getInt(i))
+      assert(s === result.getStruct(0, N * 2).getUTF8String(i + N))
+      assert(i === result.getStruct(1, N * 2).getInt(i))
       assert(s === result.getStruct(1, N * 2).getUTF8String(i + N))
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ColumnarBatchScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ColumnarBatchScan.scala
index e86116680a57a..74a47da2deef2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ColumnarBatchScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ColumnarBatchScan.scala
@@ -93,7 +93,7 @@ private[sql] trait ColumnarBatchScan extends CodegenSupport {
     }
 
     val nextBatch = ctx.freshName("nextBatch")
-    ctx.addNewFunction(nextBatch,
+    val nextBatchFuncName = ctx.addNewFunction(nextBatch,
       s"""
          |private void $nextBatch() throws java.io.IOException {
          |  long getBatchStart = System.nanoTime();
@@ -121,7 +121,7 @@ private[sql] trait ColumnarBatchScan extends CodegenSupport {
     }
     s"""
        |if ($batch == null) {
-       |  $nextBatch();
+       |  $nextBatchFuncName();
        |}
        |while ($batch != null) {
        |  int $numRows = $batch.numRows();
@@ -133,7 +133,7 @@ private[sql] trait ColumnarBatchScan extends CodegenSupport {
        |  }
        |  $idx = $numRows;
        |  $batch = null;
-       |  $nextBatch();
+       |  $nextBatchFuncName();
        |}
        |$scanTimeMetric.add($scanTimeTotalNs / (1000 * 1000));
        |$scanTimeTotalNs = 0;
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala
index f98ae82574d20..ff71fd4dc7bb7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala
@@ -141,7 +141,7 @@ case class SortExec(
     ctx.addMutableState("scala.collection.Iterator<UnsafeRow>", sortedIterator, "")
 
     val addToSorter = ctx.freshName("addToSorter")
-    ctx.addNewFunction(addToSorter,
+    val addToSorterFuncName = ctx.addNewFunction(addToSorter,
       s"""
         | private void $addToSorter() throws java.io.IOException {
         |   ${child.asInstanceOf[CodegenSupport].produce(ctx, this)}
@@ -160,7 +160,7 @@ case class SortExec(
     s"""
        | if ($needToSort) {
        |   long $spillSizeBefore = $metrics.memoryBytesSpilled();
-       |   $addToSorter();
+       |   $addToSorterFuncName();
        |   $sortedIterator = $sorterVariable.sort();
        |   $sortTime.add($sorterVariable.getSortTimeNanos() / 1000000);
        |   $peakMemory.add($sorterVariable.getPeakMemoryUsage());
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index c1e1a631c677e..c7e9d25bd2cc0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -357,6 +357,9 @@ case class WholeStageCodegenExec(child: SparkPlan) extends UnaryExecNode with Co
         protected void processNext() throws java.io.IOException {
           ${code.trim}
         }
+
+        ${ctx.initNestedClasses()}
+        ${ctx.declareNestedClasses()}
       }
       """.trim
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
index 68c8e6ce62cbb..bf7fa07765b9a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -209,7 +209,7 @@ case class HashAggregateExec(
     }
 
     val doAgg = ctx.freshName("doAggregateWithoutKey")
-    ctx.addNewFunction(doAgg,
+    val doAggFuncName = ctx.addNewFunction(doAgg,
       s"""
          | private void $doAgg() throws java.io.IOException {
          |   // initialize aggregation buffer
@@ -226,7 +226,7 @@ case class HashAggregateExec(
        | while (!$initAgg) {
        |   $initAgg = true;
        |   long $beforeAgg = System.nanoTime();
-       |   $doAgg();
+       |   $doAggFuncName();
        |   $aggTime.add((System.nanoTime() - $beforeAgg) / 1000000);
        |
        |   // output the result
@@ -592,7 +592,7 @@ case class HashAggregateExec(
       } else ""
     }
 
-    ctx.addNewFunction(doAgg,
+    val doAggFuncName = ctx.addNewFunction(doAgg,
       s"""
         ${generateGenerateCode}
         private void $doAgg() throws java.io.IOException {
@@ -672,7 +672,7 @@ case class HashAggregateExec(
      if (!$initAgg) {
        $initAgg = true;
        long $beforeAgg = System.nanoTime();
-       $doAgg();
+       $doAggFuncName();
        $aggTime.add((System.nanoTime() - $beforeAgg) / 1000000);
      }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index bd7a5c5d914c1..bb24489ade1b3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -281,10 +281,8 @@ case class SampleExec(
       val samplerClass = classOf[PoissonSampler[UnsafeRow]].getName
       val initSampler = ctx.freshName("initSampler")
       ctx.copyResult = true
-      ctx.addMutableState(s"$samplerClass<UnsafeRow>", sampler,
-        s"$initSampler();")
 
-      ctx.addNewFunction(initSampler,
+      val initSamplerFuncName = ctx.addNewFunction(initSampler,
         s"""
           | private void $initSampler() {
           |   $sampler = new $samplerClass<UnsafeRow>($upperBound - $lowerBound, false);
@@ -299,6 +297,8 @@ case class SampleExec(
           | }
          """.stripMargin.trim)
 
+      ctx.addMutableState(s"$samplerClass<UnsafeRow>", sampler, s"$initSamplerFuncName();")
+
       val samplingCount = ctx.freshName("samplingCount")
       s"""
          | int $samplingCount = $sampler.sample();
@@ -394,7 +394,7 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range)
     // The default size of a batch, which must be positive integer
     val batchSize = 1000
 
-    ctx.addNewFunction("initRange",
+    val initRangeFuncName = ctx.addNewFunction("initRange",
       s"""
         | private void initRange(int idx) {
         |   $BigInt index = $BigInt.valueOf(idx);
@@ -451,7 +451,7 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range)
       | // initialize Range
       | if (!$initTerm) {
       |   $initTerm = true;
-      |   initRange(partitionIndex);
+      |   $initRangeFuncName(partitionIndex);
       | }
       |
       | while (true) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
index 14024d6c10558..a66e8e2b46e3d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
@@ -128,9 +128,7 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
       } else {
         val groupedAccessorsItr = initializeAccessors.grouped(numberOfStatementsThreshold)
         val groupedExtractorsItr = extractors.grouped(numberOfStatementsThreshold)
-        var groupedAccessorsLength = 0
-        groupedAccessorsItr.zipWithIndex.foreach { case (body, i) =>
-          groupedAccessorsLength += 1
+        val accessorNames = groupedAccessorsItr.zipWithIndex.map { case (body, i) =>
           val funcName = s"accessors$i"
           val funcCode = s"""
              |private void $funcName() {
@@ -139,7 +137,7 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
            """.stripMargin
           ctx.addNewFunction(funcName, funcCode)
         }
-        groupedExtractorsItr.zipWithIndex.foreach { case (body, i) =>
+        val extractorNames = groupedExtractorsItr.zipWithIndex.map { case (body, i) =>
           val funcName = s"extractors$i"
           val funcCode = s"""
              |private void $funcName() {
@@ -148,8 +146,8 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
            """.stripMargin
           ctx.addNewFunction(funcName, funcCode)
         }
-        ((0 to groupedAccessorsLength - 1).map { i => s"accessors$i();" }.mkString("\n"),
-         (0 to groupedAccessorsLength - 1).map { i => s"extractors$i();" }.mkString("\n"))
+        (accessorNames.map { accessorName => s"$accessorName();" }.mkString("\n"),
+         extractorNames.map { extractorName => s"$extractorName();" }.mkString("\n"))
       }
 
     val codeBody = s"""
@@ -224,6 +222,9 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
           unsafeRow.setTotalSize(bufferHolder.totalSize());
           return unsafeRow;
         }
+
+        ${ctx.initNestedClasses()}
+        ${ctx.declareNestedClasses()}
       }"""
 
     val code = CodeFormatter.stripOverlappingComments(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
index 26fb6103953fc..8445c26eeee58 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@@ -478,7 +478,7 @@ case class SortMergeJoinExec(
          |  }
          |  return false; // unreachable
          |}
-       """.stripMargin)
+       """.stripMargin, inlineToOuterClass = true)
 
     (leftRow, matches)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
index 757fe2185d302..73a0f8735ed45 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
@@ -75,7 +75,7 @@ trait BaseLimitExec extends UnaryExecNode with CodegenSupport {
       protected boolean stopEarly() {
         return $stopEarly;
       }
-    """)
+    """, inlineToOuterClass = true)
     val countTerm = ctx.freshName("count")
     ctx.addMutableState("int", countTerm, s"$countTerm = 0;")
     s"""

From 6ef7a5bd32a483ea1bdac22fbd2403cdefd71bff Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 21 Jun 2017 23:43:21 -0700
Subject: [PATCH 1029/1204] [SPARK-21167][SS] Decode the path generated by File
 sink to handle special characters

## What changes were proposed in this pull request?

Decode the path generated by File sink to handle special characters.

## How was this patch tested?

The added unit test.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #18381 from zsxwing/SPARK-21167.

(cherry picked from commit d66b143eec7f604595089f72d8786edbdcd74282)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../streaming/FileStreamSinkLog.scala         |  5 +++-
 .../sql/streaming/FileStreamSinkSuite.scala   | 29 +++++++++++++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala
index 8d718b2164d22..c9939ac1db746 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution.streaming
 
+import java.net.URI
+
 import org.apache.hadoop.fs.{FileStatus, Path}
 import org.json4s.NoTypeHints
 import org.json4s.jackson.Serialization
@@ -47,7 +49,8 @@ case class SinkFileStatus(
     action: String) {
 
   def toFileStatus: FileStatus = {
-    new FileStatus(size, isDir, blockReplication, blockSize, modificationTime, new Path(path))
+    new FileStatus(
+      size, isDir, blockReplication, blockSize, modificationTime, new Path(new URI(path)))
   }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
index 1a2d3a13f3a4a..bb6a27803bb20 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
@@ -64,6 +64,35 @@ class FileStreamSinkSuite extends StreamTest {
     }
   }
 
+  test("SPARK-21167: encode and decode path correctly") {
+    val inputData = MemoryStream[String]
+    val ds = inputData.toDS()
+
+    val outputDir = Utils.createTempDir(namePrefix = "stream.output").getCanonicalPath
+    val checkpointDir = Utils.createTempDir(namePrefix = "stream.checkpoint").getCanonicalPath
+
+    val query = ds.map(s => (s, s.length))
+      .toDF("value", "len")
+      .writeStream
+      .partitionBy("value")
+      .option("checkpointLocation", checkpointDir)
+      .format("parquet")
+      .start(outputDir)
+
+    try {
+      // The output is partitoned by "value", so the value will appear in the file path.
+      // This is to test if we handle spaces in the path correctly.
+      inputData.addData("hello world")
+      failAfter(streamingTimeout) {
+        query.processAllAvailable()
+      }
+      val outputDf = spark.read.parquet(outputDir)
+      checkDatasetUnorderly(outputDf.as[(Int, String)], ("hello world".length, "hello world"))
+    } finally {
+      query.stop()
+    }
+  }
+
   test("partitioned writing and batch reading") {
     val inputData = MemoryStream[Int]
     val ds = inputData.toDS()

From 1a98d5d0a907e3eee5cd541934fe5f033a13535d Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 21 Jun 2017 23:43:21 -0700
Subject: [PATCH 1030/1204] [SPARK-21167][SS] Decode the path generated by File
 sink to handle special characters

## What changes were proposed in this pull request?

Decode the path generated by File sink to handle special characters.

## How was this patch tested?

The added unit test.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #18381 from zsxwing/SPARK-21167.

(cherry picked from commit d66b143eec7f604595089f72d8786edbdcd74282)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../streaming/FileStreamSinkLog.scala         |  5 +++-
 .../sql/streaming/FileStreamSinkSuite.scala   | 29 +++++++++++++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala
index 8d718b2164d22..c9939ac1db746 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution.streaming
 
+import java.net.URI
+
 import org.apache.hadoop.fs.{FileStatus, Path}
 import org.json4s.NoTypeHints
 import org.json4s.jackson.Serialization
@@ -47,7 +49,8 @@ case class SinkFileStatus(
     action: String) {
 
   def toFileStatus: FileStatus = {
-    new FileStatus(size, isDir, blockReplication, blockSize, modificationTime, new Path(path))
+    new FileStatus(
+      size, isDir, blockReplication, blockSize, modificationTime, new Path(new URI(path)))
   }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
index 688829ff927ac..821bb19eb1ef8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
@@ -60,6 +60,35 @@ class FileStreamSinkSuite extends StreamTest {
     }
   }
 
+  test("SPARK-21167: encode and decode path correctly") {
+    val inputData = MemoryStream[String]
+    val ds = inputData.toDS()
+
+    val outputDir = Utils.createTempDir(namePrefix = "stream.output").getCanonicalPath
+    val checkpointDir = Utils.createTempDir(namePrefix = "stream.checkpoint").getCanonicalPath
+
+    val query = ds.map(s => (s, s.length))
+      .toDF("value", "len")
+      .writeStream
+      .partitionBy("value")
+      .option("checkpointLocation", checkpointDir)
+      .format("parquet")
+      .start(outputDir)
+
+    try {
+      // The output is partitoned by "value", so the value will appear in the file path.
+      // This is to test if we handle spaces in the path correctly.
+      inputData.addData("hello world")
+      failAfter(streamingTimeout) {
+        query.processAllAvailable()
+      }
+      val outputDf = spark.read.parquet(outputDir)
+      checkDatasetUnorderly(outputDf.as[(Int, String)], ("hello world".length, "hello world"))
+    } finally {
+      query.stop()
+    }
+  }
+
   test("partitioned writing and batch reading") {
     val inputData = MemoryStream[Int]
     val ds = inputData.toDS()

From d6257347122abd86f8c36e54a446e05c95615d69 Mon Sep 17 00:00:00 2001
From: actuaryzhang <actuaryzhang10@gmail.com>
Date: Thu, 22 Jun 2017 10:12:33 +0100
Subject: [PATCH 1031/1204] [SQL][DOC] Fix documentation of lpad

## What changes were proposed in this pull request?
Fix incomplete documentation for `lpad`.

Author: actuaryzhang <actuaryzhang10@gmail.com>

Closes #18367 from actuaryzhang/SQLDoc.

(cherry picked from commit 97b307c87c0f262ea3e020bf3d72383deef76619)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../src/main/scala/org/apache/spark/sql/functions.scala     | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 74e9560b342c0..067b6d528d3f2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2292,7 +2292,8 @@ object functions {
   }
 
   /**
-   * Left-pad the string column with
+   * Left-pad the string column with pad to a length of len. If the string column is longer
+   * than len, the return value is shortened to len characters.
    *
    * @group string_funcs
    * @since 1.5.0
@@ -2350,7 +2351,8 @@ object functions {
   def unbase64(e: Column): Column = withExpr { UnBase64(e.expr) }
 
   /**
-   * Right-padded with pad to a length of len.
+   * Right-pad the string column with pad to a length of len. If the string column is longer
+   * than len, the return value is shortened to len characters.
    *
    * @group string_funcs
    * @since 1.5.0

From b99c0e9d1cb42666676462bade1609d42c34688d Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 23 Jun 2017 08:37:24 +0800
Subject: [PATCH 1032/1204] Revert "[SPARK-18016][SQL][CATALYST][BRANCH-2.2]
 Code Generation: Constant Pool Limit - Class Splitting"

This reverts commit 198e3a036fbed373be3d26964e005b4f2fed2bb7.
---
 sql/catalyst/pom.xml                          |   7 -
 .../sql/catalyst/expressions/ScalaUDF.scala   |   6 +-
 .../expressions/codegen/CodeGenerator.scala   | 135 +++---------------
 .../codegen/GenerateMutableProjection.scala   |  17 +--
 .../codegen/GenerateOrdering.scala            |   3 -
 .../codegen/GeneratePredicate.scala           |   3 -
 .../codegen/GenerateSafeProjection.scala      |   9 +-
 .../codegen/GenerateUnsafeProjection.scala    |   9 +-
 .../expressions/complexTypeCreator.scala      |   6 +-
 .../expressions/conditionalExpressions.scala  |   4 +-
 .../sql/catalyst/expressions/generators.scala |   6 +-
 .../expressions/objects/objects.scala         |   2 +-
 .../codegen/GeneratedProjectionSuite.scala    |  72 ++--------
 .../sql/execution/ColumnarBatchScan.scala     |   6 +-
 .../apache/spark/sql/execution/SortExec.scala |   4 +-
 .../sql/execution/WholeStageCodegenExec.scala |   3 -
 .../aggregate/HashAggregateExec.scala         |   8 +-
 .../execution/basicPhysicalOperators.scala    |  10 +-
 .../columnar/GenerateColumnAccessor.scala     |  13 +-
 .../execution/joins/SortMergeJoinExec.scala   |   2 +-
 .../apache/spark/sql/execution/limit.scala    |   2 +-
 21 files changed, 79 insertions(+), 248 deletions(-)

diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index d5027ff6ad23f..5ecee28a1f0b8 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -131,13 +131,6 @@
           </execution>
         </executions>
       </plugin>
-      <plugin>
-        <groupId>org.scalatest</groupId>
-        <artifactId>scalatest-maven-plugin</artifactId>
-        <configuration>
-          <argLine>-Xmx4g -Xss4096k -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=512m</argLine>
-        </configuration>
-      </plugin>
       <plugin>
         <groupId>org.antlr</groupId>
         <artifactId>antlr4-maven-plugin</artifactId>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
index 5c68f9ffc691c..228f4b756c8b4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
@@ -988,7 +988,7 @@ case class ScalaUDF(
     val converterTerm = ctx.freshName("converter")
     val expressionIdx = ctx.references.size - 1
     ctx.addMutableState(converterClassName, converterTerm,
-      s"$converterTerm = ($converterClassName)$typeConvertersClassName" +
+      s"this.$converterTerm = ($converterClassName)$typeConvertersClassName" +
         s".createToScalaConverter(((${expressionClassName})((($scalaUDFClassName)" +
           s"references[$expressionIdx]).getChildren().apply($index))).dataType());")
     converterTerm
@@ -1005,7 +1005,7 @@ case class ScalaUDF(
     // Generate codes used to convert the returned value of user-defined functions to Catalyst type
     val catalystConverterTerm = ctx.freshName("catalystConverter")
     ctx.addMutableState(converterClassName, catalystConverterTerm,
-      s"$catalystConverterTerm = ($converterClassName)$typeConvertersClassName" +
+      s"this.$catalystConverterTerm = ($converterClassName)$typeConvertersClassName" +
         s".createToCatalystConverter($scalaUDF.dataType());")
 
     val resultTerm = ctx.freshName("result")
@@ -1019,7 +1019,7 @@ case class ScalaUDF(
 
     val funcTerm = ctx.freshName("udf")
     ctx.addMutableState(funcClassName, funcTerm,
-      s"$funcTerm = ($funcClassName)$scalaUDF.userDefinedFunc();")
+      s"this.$funcTerm = ($funcClassName)$scalaUDF.userDefinedFunc();")
 
     // codegen for children expressions
     val evals = children.map(_.genCode(ctx))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 4954cf8bc1177..f8da78b5f5e3e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -113,7 +113,7 @@ class CodegenContext {
     val idx = references.length
     references += obj
     val clsName = Option(className).getOrElse(obj.getClass.getName)
-    addMutableState(clsName, term, s"$term = ($clsName) references[$idx];")
+    addMutableState(clsName, term, s"this.$term = ($clsName) references[$idx];")
     term
   }
 
@@ -202,6 +202,16 @@ class CodegenContext {
     partitionInitializationStatements.mkString("\n")
   }
 
+  /**
+   * Holding all the functions those will be added into generated class.
+   */
+  val addedFunctions: mutable.Map[String, String] =
+    mutable.Map.empty[String, String]
+
+  def addNewFunction(funcName: String, funcCode: String): Unit = {
+    addedFunctions += ((funcName, funcCode))
+  }
+
   /**
    * Holds expressions that are equivalent. Used to perform subexpression elimination
    * during codegen.
@@ -223,118 +233,10 @@ class CodegenContext {
   // The collection of sub-expression result resetting methods that need to be called on each row.
   val subexprFunctions = mutable.ArrayBuffer.empty[String]
 
-  private val outerClassName = "OuterClass"
-
-  /**
-   * Holds the class and instance names to be generated, where `OuterClass` is a placeholder
-   * standing for whichever class is generated as the outermost class and which will contain any
-   * nested sub-classes. All other classes and instance names in this list will represent private,
-   * nested sub-classes.
-   */
-  private val classes: mutable.ListBuffer[(String, String)] =
-    mutable.ListBuffer[(String, String)](outerClassName -> null)
-
-  // A map holding the current size in bytes of each class to be generated.
-  private val classSize: mutable.Map[String, Int] =
-    mutable.Map[String, Int](outerClassName -> 0)
-
-  // Nested maps holding function names and their code belonging to each class.
-  private val classFunctions: mutable.Map[String, mutable.Map[String, String]] =
-    mutable.Map(outerClassName -> mutable.Map.empty[String, String])
-
-  // Returns the size of the most recently added class.
-  private def currClassSize(): Int = classSize(classes.head._1)
-
-  // Returns the class name and instance name for the most recently added class.
-  private def currClass(): (String, String) = classes.head
-
-  // Adds a new class. Requires the class' name, and its instance name.
-  private def addClass(className: String, classInstance: String): Unit = {
-    classes.prepend(className -> classInstance)
-    classSize += className -> 0
-    classFunctions += className -> mutable.Map.empty[String, String]
+  def declareAddedFunctions(): String = {
+    addedFunctions.map { case (funcName, funcCode) => funcCode }.mkString("\n")
   }
 
-  /**
-   * Adds a function to the generated class. If the code for the `OuterClass` grows too large, the
-   * function will be inlined into a new private, nested class, and a instance-qualified name for
-   * the function will be returned. Otherwise, the function will be inined to the `OuterClass` the
-   * simple `funcName` will be returned.
-   *
-   * @param funcName the class-unqualified name of the function
-   * @param funcCode the body of the function
-   * @param inlineToOuterClass whether the given code must be inlined to the `OuterClass`. This
-   *                           can be necessary when a function is declared outside of the context
-   *                           it is eventually referenced and a returned qualified function name
-   *                           cannot otherwise be accessed.
-   * @return the name of the function, qualified by class if it will be inlined to a private,
-   *         nested sub-class
-   */
-  def addNewFunction(
-      funcName: String,
-      funcCode: String,
-      inlineToOuterClass: Boolean = false): String = {
-    // The number of named constants that can exist in the class is limited by the Constant Pool
-    // limit, 65,536. We cannot know how many constants will be inserted for a class, so we use a
-    // threshold of 1600k bytes to determine when a function should be inlined to a private, nested
-    // sub-class.
-    val (className, classInstance) = if (inlineToOuterClass) {
-      outerClassName -> ""
-    } else if (currClassSize > 1600000) {
-      val className = freshName("NestedClass")
-      val classInstance = freshName("nestedClassInstance")
-
-      addClass(className, classInstance)
-
-      className -> classInstance
-    } else {
-      currClass()
-    }
-
-    classSize(className) += funcCode.length
-    classFunctions(className) += funcName -> funcCode
-
-    if (className == outerClassName) {
-      funcName
-    } else {
-
-      s"$classInstance.$funcName"
-    }
-  }
-
-  /**
-   * Instantiates all nested, private sub-classes as objects to the `OuterClass`
-   */
-  private[sql] def initNestedClasses(): String = {
-    // Nested, private sub-classes have no mutable state (though they do reference the outer class'
-    // mutable state), so we declare and initialize them inline to the OuterClass.
-    classes.filter(_._1 != outerClassName).map {
-      case (className, classInstance) =>
-        s"private $className $classInstance = new $className();"
-    }.mkString("\n")
-  }
-
-  /**
-   * Declares all function code that should be inlined to the `OuterClass`.
-   */
-  private[sql] def declareAddedFunctions(): String = {
-    classFunctions(outerClassName).values.mkString("\n")
-  }
-
-  /**
-   * Declares all nested, private sub-classes and the function code that should be inlined to them.
-   */
-  private[sql] def declareNestedClasses(): String = {
-    classFunctions.filterKeys(_ != outerClassName).map {
-      case (className, functions) =>
-        s"""
-           |private class $className {
-           |  ${functions.values.mkString("\n")}
-           |}
-           """.stripMargin
-    }
-  }.mkString("\n")
-
   final val JAVA_BOOLEAN = "boolean"
   final val JAVA_BYTE = "byte"
   final val JAVA_SHORT = "short"
@@ -654,7 +556,8 @@ class CodegenContext {
             return 0;
           }
         """
-      s"${addNewFunction(compareFunc, funcCode)}($c1, $c2)"
+      addNewFunction(compareFunc, funcCode)
+      s"this.$compareFunc($c1, $c2)"
     case schema: StructType =>
       val comparisons = GenerateOrdering.genComparisons(this, schema)
       val compareFunc = freshName("compareStruct")
@@ -670,7 +573,8 @@ class CodegenContext {
             return 0;
           }
         """
-      s"${addNewFunction(compareFunc, funcCode)}($c1, $c2)"
+      addNewFunction(compareFunc, funcCode)
+      s"this.$compareFunc($c1, $c2)"
     case other if other.isInstanceOf[AtomicType] => s"$c1.compare($c2)"
     case udt: UserDefinedType[_] => genComp(udt.sqlType, c1, c2)
     case _ =>
@@ -785,6 +689,7 @@ class CodegenContext {
            |}
          """.stripMargin
         addNewFunction(name, code)
+        name
       }
 
       foldFunctions(functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")})"))
@@ -868,6 +773,8 @@ class CodegenContext {
            |}
            """.stripMargin
 
+      addNewFunction(fnName, fn)
+
       // Add a state and a mapping of the common subexpressions that are associate with this
       // state. Adding this expression to subExprEliminationExprMap means it will call `fn`
       // when it is code generated. This decision should be a cost based one.
@@ -885,7 +792,7 @@ class CodegenContext {
       addMutableState(javaType(expr.dataType), value,
         s"$value = ${defaultValue(expr.dataType)};")
 
-      subexprFunctions += s"${addNewFunction(fnName, fn)}($INPUT_ROW);"
+      subexprFunctions += s"$fnName($INPUT_ROW);"
       val state = SubExprEliminationState(isNull, value)
       e.foreach(subExprEliminationExprs.put(_, state))
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
index 635766835029b..4d732445544a8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
@@ -63,21 +63,21 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
         if (e.nullable) {
           val isNull = s"isNull_$i"
           val value = s"value_$i"
-          ctx.addMutableState("boolean", isNull, s"$isNull = true;")
+          ctx.addMutableState("boolean", isNull, s"this.$isNull = true;")
           ctx.addMutableState(ctx.javaType(e.dataType), value,
-            s"$value = ${ctx.defaultValue(e.dataType)};")
+            s"this.$value = ${ctx.defaultValue(e.dataType)};")
           s"""
             ${ev.code}
-            $isNull = ${ev.isNull};
-            $value = ${ev.value};
+            this.$isNull = ${ev.isNull};
+            this.$value = ${ev.value};
            """
         } else {
           val value = s"value_$i"
           ctx.addMutableState(ctx.javaType(e.dataType), value,
-            s"$value = ${ctx.defaultValue(e.dataType)};")
+            s"this.$value = ${ctx.defaultValue(e.dataType)};")
           s"""
             ${ev.code}
-            $value = ${ev.value};
+            this.$value = ${ev.value};
            """
         }
     }
@@ -87,7 +87,7 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
 
     val updates = validExpr.zip(index).map {
       case (e, i) =>
-        val ev = ExprCode("", s"isNull_$i", s"value_$i")
+        val ev = ExprCode("", s"this.isNull_$i", s"this.value_$i")
         ctx.updateColumn("mutableRow", e.dataType, i, ev, e.nullable)
     }
 
@@ -135,9 +135,6 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
           $allUpdates
           return mutableRow;
         }
-
-        ${ctx.initNestedClasses()}
-        ${ctx.declareNestedClasses()}
       }
     """
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
index a31943255b995..f7fc2d54a047b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
@@ -179,9 +179,6 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
           $comparisons
           return 0;
         }
-
-        ${ctx.initNestedClasses()}
-        ${ctx.declareNestedClasses()}
       }"""
 
     val code = CodeFormatter.stripOverlappingComments(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
index b400783bb5e55..dcd1ed96a298e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
@@ -72,9 +72,6 @@ object GeneratePredicate extends CodeGenerator[Expression, Predicate] {
           ${eval.code}
           return !${eval.isNull} && ${eval.value};
         }
-
-        ${ctx.initNestedClasses()}
-        ${ctx.declareNestedClasses()}
       }"""
 
     val code = CodeFormatter.stripOverlappingComments(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
index f708aeff2b146..b1cb6edefb852 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
@@ -49,7 +49,7 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
     val output = ctx.freshName("safeRow")
     val values = ctx.freshName("values")
     // These expressions could be split into multiple functions
-    ctx.addMutableState("Object[]", values, s"$values = null;")
+    ctx.addMutableState("Object[]", values, s"this.$values = null;")
 
     val rowClass = classOf[GenericInternalRow].getName
 
@@ -65,10 +65,10 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
     val allFields = ctx.splitExpressions(tmp, fieldWriters)
     val code = s"""
       final InternalRow $tmp = $input;
-      $values = new Object[${schema.length}];
+      this.$values = new Object[${schema.length}];
       $allFields
       final InternalRow $output = new $rowClass($values);
-      $values = null;
+      this.$values = null;
     """
 
     ExprCode(code, "false", output)
@@ -184,9 +184,6 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
           $allExpressions
           return mutableRow;
         }
-
-        ${ctx.initNestedClasses()}
-        ${ctx.declareNestedClasses()}
       }
     """
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
index febfe3124f2bd..b358102d914bd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
@@ -82,7 +82,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
     val rowWriterClass = classOf[UnsafeRowWriter].getName
     val rowWriter = ctx.freshName("rowWriter")
     ctx.addMutableState(rowWriterClass, rowWriter,
-      s"$rowWriter = new $rowWriterClass($bufferHolder, ${inputs.length});")
+      s"this.$rowWriter = new $rowWriterClass($bufferHolder, ${inputs.length});")
 
     val resetWriter = if (isTopLevel) {
       // For top level row writer, it always writes to the beginning of the global buffer holder,
@@ -182,7 +182,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
     val arrayWriterClass = classOf[UnsafeArrayWriter].getName
     val arrayWriter = ctx.freshName("arrayWriter")
     ctx.addMutableState(arrayWriterClass, arrayWriter,
-      s"$arrayWriter = new $arrayWriterClass();")
+      s"this.$arrayWriter = new $arrayWriterClass();")
     val numElements = ctx.freshName("numElements")
     val index = ctx.freshName("index")
     val element = ctx.freshName("element")
@@ -321,7 +321,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
     val holder = ctx.freshName("holder")
     val holderClass = classOf[BufferHolder].getName
     ctx.addMutableState(holderClass, holder,
-      s"$holder = new $holderClass($result, ${numVarLenFields * 32});")
+      s"this.$holder = new $holderClass($result, ${numVarLenFields * 32});")
 
     val resetBufferHolder = if (numVarLenFields == 0) {
       ""
@@ -402,9 +402,6 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
           ${eval.code.trim}
           return ${eval.value};
         }
-
-        ${ctx.initNestedClasses()}
-        ${ctx.declareNestedClasses()}
       }
       """
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index 98c4cbee38dee..b6675a84ece48 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -93,7 +93,7 @@ private [sql] object GenArrayData {
     if (!ctx.isPrimitiveType(elementType)) {
       val genericArrayClass = classOf[GenericArrayData].getName
       ctx.addMutableState("Object[]", arrayName,
-        s"$arrayName = new Object[${numElements}];")
+        s"this.$arrayName = new Object[${numElements}];")
 
       val assignments = elementsCode.zipWithIndex.map { case (eval, i) =>
         val isNullAssignment = if (!isMapKey) {
@@ -340,7 +340,7 @@ case class CreateNamedStruct(children: Seq[Expression]) extends CreateNamedStruc
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val rowClass = classOf[GenericInternalRow].getName
     val values = ctx.freshName("values")
-    ctx.addMutableState("Object[]", values, s"$values = null;")
+    ctx.addMutableState("Object[]", values, s"this.$values = null;")
 
     ev.copy(code = s"""
       $values = new Object[${valExprs.size}];""" +
@@ -357,7 +357,7 @@ case class CreateNamedStruct(children: Seq[Expression]) extends CreateNamedStruc
         }) +
       s"""
         final InternalRow ${ev.value} = new $rowClass($values);
-        $values = null;
+        this.$values = null;
       """, isNull = "false")
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
index ae8efb673f91c..ee365fe636614 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
@@ -131,8 +131,8 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi
          |  $globalValue = ${ev.value};
          |}
          """.stripMargin
-    val fullFuncName = ctx.addNewFunction(funcName, funcBody)
-    (fullFuncName, globalIsNull, globalValue)
+    ctx.addNewFunction(funcName, funcBody)
+    (funcName, globalIsNull, globalValue)
   }
 
   override def toString: String = s"if ($predicate) $trueValue else $falseValue"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
index baa5ba68dcb30..e84796f2edad0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
@@ -181,7 +181,7 @@ case class Stack(children: Seq[Expression]) extends Generator {
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     // Rows - we write these into an array.
     val rowData = ctx.freshName("rows")
-    ctx.addMutableState("InternalRow[]", rowData, s"$rowData = new InternalRow[$numRows];")
+    ctx.addMutableState("InternalRow[]", rowData, s"this.$rowData = new InternalRow[$numRows];")
     val values = children.tail
     val dataTypes = values.take(numFields).map(_.dataType)
     val code = ctx.splitExpressions(ctx.INPUT_ROW, Seq.tabulate(numRows) { row =>
@@ -190,7 +190,7 @@ case class Stack(children: Seq[Expression]) extends Generator {
         if (index < values.length) values(index) else Literal(null, dataTypes(col))
       }
       val eval = CreateStruct(fields).genCode(ctx)
-      s"${eval.code}\n$rowData[$row] = ${eval.value};"
+      s"${eval.code}\nthis.$rowData[$row] = ${eval.value};"
     })
 
     // Create the collection.
@@ -198,7 +198,7 @@ case class Stack(children: Seq[Expression]) extends Generator {
     ctx.addMutableState(
       s"$wrapperClass<InternalRow>",
       ev.value,
-      s"${ev.value} = $wrapperClass$$.MODULE$$.make($rowData);")
+      s"this.${ev.value} = $wrapperClass$$.MODULE$$.make(this.$rowData);")
     ev.copy(code = code, isNull = "false")
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 2bd752c82e6c1..1a202ecf745c9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -981,7 +981,7 @@ case class InitializeJavaBean(beanInstance: Expression, setters: Map[String, Exp
 
     val code = s"""
       ${instanceGen.code}
-      ${javaBeanInstance} = ${instanceGen.value};
+      this.${javaBeanInstance} = ${instanceGen.value};
       if (!${instanceGen.isNull}) {
         $initializeCode
       }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
index d7ba57a697b08..b69b74b4240bd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
@@ -33,10 +33,10 @@ class GeneratedProjectionSuite extends SparkFunSuite {
 
   test("generated projections on wider table") {
     val N = 1000
-    val wideRow1 = new GenericInternalRow((0 until N).toArray[Any])
+    val wideRow1 = new GenericInternalRow((1 to N).toArray[Any])
     val schema1 = StructType((1 to N).map(i => StructField("", IntegerType)))
     val wideRow2 = new GenericInternalRow(
-      (0 until N).map(i => UTF8String.fromString(i.toString)).toArray[Any])
+      (1 to N).map(i => UTF8String.fromString(i.toString)).toArray[Any])
     val schema2 = StructType((1 to N).map(i => StructField("", StringType)))
     val joined = new JoinedRow(wideRow1, wideRow2)
     val joinedSchema = StructType(schema1 ++ schema2)
@@ -48,12 +48,12 @@ class GeneratedProjectionSuite extends SparkFunSuite {
     val unsafeProj = UnsafeProjection.create(nestedSchema)
     val unsafe: UnsafeRow = unsafeProj(nested)
     (0 until N).foreach { i =>
-      val s = UTF8String.fromString(i.toString)
-      assert(i === unsafe.getInt(i + 2))
+      val s = UTF8String.fromString((i + 1).toString)
+      assert(i + 1 === unsafe.getInt(i + 2))
       assert(s === unsafe.getUTF8String(i + 2 + N))
-      assert(i === unsafe.getStruct(0, N * 2).getInt(i))
+      assert(i + 1 === unsafe.getStruct(0, N * 2).getInt(i))
       assert(s === unsafe.getStruct(0, N * 2).getUTF8String(i + N))
-      assert(i === unsafe.getStruct(1, N * 2).getInt(i))
+      assert(i + 1 === unsafe.getStruct(1, N * 2).getInt(i))
       assert(s === unsafe.getStruct(1, N * 2).getUTF8String(i + N))
     }
 
@@ -62,63 +62,13 @@ class GeneratedProjectionSuite extends SparkFunSuite {
     val result = safeProj(unsafe)
     // Can't compare GenericInternalRow with JoinedRow directly
     (0 until N).foreach { i =>
-      val s = UTF8String.fromString(i.toString)
-      assert(i === result.getInt(i + 2))
+      val r = i + 1
+      val s = UTF8String.fromString((i + 1).toString)
+      assert(r === result.getInt(i + 2))
       assert(s === result.getUTF8String(i + 2 + N))
-      assert(i === result.getStruct(0, N * 2).getInt(i))
+      assert(r === result.getStruct(0, N * 2).getInt(i))
       assert(s === result.getStruct(0, N * 2).getUTF8String(i + N))
-      assert(i === result.getStruct(1, N * 2).getInt(i))
-      assert(s === result.getStruct(1, N * 2).getUTF8String(i + N))
-    }
-
-    // test generated MutableProjection
-    val exprs = nestedSchema.fields.zipWithIndex.map { case (f, i) =>
-      BoundReference(i, f.dataType, true)
-    }
-    val mutableProj = GenerateMutableProjection.generate(exprs)
-    val row1 = mutableProj(result)
-    assert(result === row1)
-    val row2 = mutableProj(result)
-    assert(result === row2)
-  }
-
-  test("generated projections on wider table requiring class-splitting") {
-    val N = 4000
-    val wideRow1 = new GenericInternalRow((0 until N).toArray[Any])
-    val schema1 = StructType((1 to N).map(i => StructField("", IntegerType)))
-    val wideRow2 = new GenericInternalRow(
-      (0 until N).map(i => UTF8String.fromString(i.toString)).toArray[Any])
-    val schema2 = StructType((1 to N).map(i => StructField("", StringType)))
-    val joined = new JoinedRow(wideRow1, wideRow2)
-    val joinedSchema = StructType(schema1 ++ schema2)
-    val nested = new JoinedRow(InternalRow(joined, joined), joined)
-    val nestedSchema = StructType(
-      Seq(StructField("", joinedSchema), StructField("", joinedSchema)) ++ joinedSchema)
-
-    // test generated UnsafeProjection
-    val unsafeProj = UnsafeProjection.create(nestedSchema)
-    val unsafe: UnsafeRow = unsafeProj(nested)
-    (0 until N).foreach { i =>
-      val s = UTF8String.fromString(i.toString)
-      assert(i === unsafe.getInt(i + 2))
-      assert(s === unsafe.getUTF8String(i + 2 + N))
-      assert(i === unsafe.getStruct(0, N * 2).getInt(i))
-      assert(s === unsafe.getStruct(0, N * 2).getUTF8String(i + N))
-      assert(i === unsafe.getStruct(1, N * 2).getInt(i))
-      assert(s === unsafe.getStruct(1, N * 2).getUTF8String(i + N))
-    }
-
-    // test generated SafeProjection
-    val safeProj = FromUnsafeProjection(nestedSchema)
-    val result = safeProj(unsafe)
-    // Can't compare GenericInternalRow with JoinedRow directly
-    (0 until N).foreach { i =>
-      val s = UTF8String.fromString(i.toString)
-      assert(i === result.getInt(i + 2))
-      assert(s === result.getUTF8String(i + 2 + N))
-      assert(i === result.getStruct(0, N * 2).getInt(i))
-      assert(s === result.getStruct(0, N * 2).getUTF8String(i + N))
-      assert(i === result.getStruct(1, N * 2).getInt(i))
+      assert(r === result.getStruct(1, N * 2).getInt(i))
       assert(s === result.getStruct(1, N * 2).getUTF8String(i + N))
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ColumnarBatchScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ColumnarBatchScan.scala
index 74a47da2deef2..e86116680a57a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ColumnarBatchScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ColumnarBatchScan.scala
@@ -93,7 +93,7 @@ private[sql] trait ColumnarBatchScan extends CodegenSupport {
     }
 
     val nextBatch = ctx.freshName("nextBatch")
-    val nextBatchFuncName = ctx.addNewFunction(nextBatch,
+    ctx.addNewFunction(nextBatch,
       s"""
          |private void $nextBatch() throws java.io.IOException {
          |  long getBatchStart = System.nanoTime();
@@ -121,7 +121,7 @@ private[sql] trait ColumnarBatchScan extends CodegenSupport {
     }
     s"""
        |if ($batch == null) {
-       |  $nextBatchFuncName();
+       |  $nextBatch();
        |}
        |while ($batch != null) {
        |  int $numRows = $batch.numRows();
@@ -133,7 +133,7 @@ private[sql] trait ColumnarBatchScan extends CodegenSupport {
        |  }
        |  $idx = $numRows;
        |  $batch = null;
-       |  $nextBatchFuncName();
+       |  $nextBatch();
        |}
        |$scanTimeMetric.add($scanTimeTotalNs / (1000 * 1000));
        |$scanTimeTotalNs = 0;
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala
index ff71fd4dc7bb7..f98ae82574d20 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala
@@ -141,7 +141,7 @@ case class SortExec(
     ctx.addMutableState("scala.collection.Iterator<UnsafeRow>", sortedIterator, "")
 
     val addToSorter = ctx.freshName("addToSorter")
-    val addToSorterFuncName = ctx.addNewFunction(addToSorter,
+    ctx.addNewFunction(addToSorter,
       s"""
         | private void $addToSorter() throws java.io.IOException {
         |   ${child.asInstanceOf[CodegenSupport].produce(ctx, this)}
@@ -160,7 +160,7 @@ case class SortExec(
     s"""
        | if ($needToSort) {
        |   long $spillSizeBefore = $metrics.memoryBytesSpilled();
-       |   $addToSorterFuncName();
+       |   $addToSorter();
        |   $sortedIterator = $sorterVariable.sort();
        |   $sortTime.add($sorterVariable.getSortTimeNanos() / 1000000);
        |   $peakMemory.add($sorterVariable.getPeakMemoryUsage());
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index c7e9d25bd2cc0..c1e1a631c677e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -357,9 +357,6 @@ case class WholeStageCodegenExec(child: SparkPlan) extends UnaryExecNode with Co
         protected void processNext() throws java.io.IOException {
           ${code.trim}
         }
-
-        ${ctx.initNestedClasses()}
-        ${ctx.declareNestedClasses()}
       }
       """.trim
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
index bf7fa07765b9a..68c8e6ce62cbb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -209,7 +209,7 @@ case class HashAggregateExec(
     }
 
     val doAgg = ctx.freshName("doAggregateWithoutKey")
-    val doAggFuncName = ctx.addNewFunction(doAgg,
+    ctx.addNewFunction(doAgg,
       s"""
          | private void $doAgg() throws java.io.IOException {
          |   // initialize aggregation buffer
@@ -226,7 +226,7 @@ case class HashAggregateExec(
        | while (!$initAgg) {
        |   $initAgg = true;
        |   long $beforeAgg = System.nanoTime();
-       |   $doAggFuncName();
+       |   $doAgg();
        |   $aggTime.add((System.nanoTime() - $beforeAgg) / 1000000);
        |
        |   // output the result
@@ -592,7 +592,7 @@ case class HashAggregateExec(
       } else ""
     }
 
-    val doAggFuncName = ctx.addNewFunction(doAgg,
+    ctx.addNewFunction(doAgg,
       s"""
         ${generateGenerateCode}
         private void $doAgg() throws java.io.IOException {
@@ -672,7 +672,7 @@ case class HashAggregateExec(
      if (!$initAgg) {
        $initAgg = true;
        long $beforeAgg = System.nanoTime();
-       $doAggFuncName();
+       $doAgg();
        $aggTime.add((System.nanoTime() - $beforeAgg) / 1000000);
      }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index bb24489ade1b3..bd7a5c5d914c1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -281,8 +281,10 @@ case class SampleExec(
       val samplerClass = classOf[PoissonSampler[UnsafeRow]].getName
       val initSampler = ctx.freshName("initSampler")
       ctx.copyResult = true
+      ctx.addMutableState(s"$samplerClass<UnsafeRow>", sampler,
+        s"$initSampler();")
 
-      val initSamplerFuncName = ctx.addNewFunction(initSampler,
+      ctx.addNewFunction(initSampler,
         s"""
           | private void $initSampler() {
           |   $sampler = new $samplerClass<UnsafeRow>($upperBound - $lowerBound, false);
@@ -297,8 +299,6 @@ case class SampleExec(
           | }
          """.stripMargin.trim)
 
-      ctx.addMutableState(s"$samplerClass<UnsafeRow>", sampler, s"$initSamplerFuncName();")
-
       val samplingCount = ctx.freshName("samplingCount")
       s"""
          | int $samplingCount = $sampler.sample();
@@ -394,7 +394,7 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range)
     // The default size of a batch, which must be positive integer
     val batchSize = 1000
 
-    val initRangeFuncName = ctx.addNewFunction("initRange",
+    ctx.addNewFunction("initRange",
       s"""
         | private void initRange(int idx) {
         |   $BigInt index = $BigInt.valueOf(idx);
@@ -451,7 +451,7 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range)
       | // initialize Range
       | if (!$initTerm) {
       |   $initTerm = true;
-      |   $initRangeFuncName(partitionIndex);
+      |   initRange(partitionIndex);
       | }
       |
       | while (true) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
index a66e8e2b46e3d..14024d6c10558 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
@@ -128,7 +128,9 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
       } else {
         val groupedAccessorsItr = initializeAccessors.grouped(numberOfStatementsThreshold)
         val groupedExtractorsItr = extractors.grouped(numberOfStatementsThreshold)
-        val accessorNames = groupedAccessorsItr.zipWithIndex.map { case (body, i) =>
+        var groupedAccessorsLength = 0
+        groupedAccessorsItr.zipWithIndex.foreach { case (body, i) =>
+          groupedAccessorsLength += 1
           val funcName = s"accessors$i"
           val funcCode = s"""
              |private void $funcName() {
@@ -137,7 +139,7 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
            """.stripMargin
           ctx.addNewFunction(funcName, funcCode)
         }
-        val extractorNames = groupedExtractorsItr.zipWithIndex.map { case (body, i) =>
+        groupedExtractorsItr.zipWithIndex.foreach { case (body, i) =>
           val funcName = s"extractors$i"
           val funcCode = s"""
              |private void $funcName() {
@@ -146,8 +148,8 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
            """.stripMargin
           ctx.addNewFunction(funcName, funcCode)
         }
-        (accessorNames.map { accessorName => s"$accessorName();" }.mkString("\n"),
-         extractorNames.map { extractorName => s"$extractorName();" }.mkString("\n"))
+        ((0 to groupedAccessorsLength - 1).map { i => s"accessors$i();" }.mkString("\n"),
+         (0 to groupedAccessorsLength - 1).map { i => s"extractors$i();" }.mkString("\n"))
       }
 
     val codeBody = s"""
@@ -222,9 +224,6 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
           unsafeRow.setTotalSize(bufferHolder.totalSize());
           return unsafeRow;
         }
-
-        ${ctx.initNestedClasses()}
-        ${ctx.declareNestedClasses()}
       }"""
 
     val code = CodeFormatter.stripOverlappingComments(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
index 8445c26eeee58..26fb6103953fc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@@ -478,7 +478,7 @@ case class SortMergeJoinExec(
          |  }
          |  return false; // unreachable
          |}
-       """.stripMargin, inlineToOuterClass = true)
+       """.stripMargin)
 
     (leftRow, matches)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
index 73a0f8735ed45..757fe2185d302 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
@@ -75,7 +75,7 @@ trait BaseLimitExec extends UnaryExecNode with CodegenSupport {
       protected boolean stopEarly() {
         return $stopEarly;
       }
-    """, inlineToOuterClass = true)
+    """)
     val countTerm = ctx.freshName("count")
     ctx.addMutableState("int", countTerm, s"$countTerm = 0;")
     s"""

From b6749ba09724b3ed19166e7bb0b1fdcca79a44ba Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Fri, 23 Jun 2017 20:44:25 +0800
Subject: [PATCH 1033/1204] [SPARK-21165] [SQL] [2.2] Use executedPlan instead
 of analyzedPlan in INSERT AS SELECT [WIP]

### What changes were proposed in this pull request?

The input query schema of INSERT AS SELECT could be changed after optimization. For example, the following query's output schema is changed by the rule `SimplifyCasts` and `RemoveRedundantAliases`.
```SQL
 SELECT word, length, cast(first as string) as first FROM view1
```

This PR is to fix the issue in Spark 2.2. Instead of using the analyzed plan of the input query, this PR use its executed plan to determine the attributes in `FileFormatWriter`.

The related issue in the master branch has been fixed by https://github.com/apache/spark/pull/18064. After this PR is merged, I will submit a separate PR to merge the test case to the master.

### How was this patch tested?
Added a test case

Author: Xiao Li <gatorsmile@gmail.com>
Author: gatorsmile <gatorsmile@gmail.com>

Closes #18386 from gatorsmile/newRC5.
---
 .../execution/datasources/DataSource.scala    | 12 +---------
 .../datasources/DataSourceStrategy.scala      |  4 +---
 .../datasources/FileFormatWriter.scala        | 15 ++++++++++---
 .../InsertIntoHadoopFsRelationCommand.scala   | 10 ++++-----
 .../sql/execution/datasources/rules.scala     | 16 +++++---------
 .../execution/streaming/FileStreamSink.scala  | 11 +---------
 .../hive/execution/InsertIntoHiveTable.scala  |  9 +-------
 .../sql/hive/InsertIntoHiveTableSuite.scala   | 22 +++++++++++++++++++
 8 files changed, 48 insertions(+), 51 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 14c40605ea31c..4ffe2151ae638 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -408,16 +408,6 @@ case class DataSource(
     val caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
     PartitioningUtils.validatePartitionColumn(data.schema, partitionColumns, caseSensitive)
 
-    // SPARK-17230: Resolve the partition columns so InsertIntoHadoopFsRelationCommand does
-    // not need to have the query as child, to avoid to analyze an optimized query,
-    // because InsertIntoHadoopFsRelationCommand will be optimized first.
-    val partitionAttributes = partitionColumns.map { name =>
-      val plan = data.logicalPlan
-      plan.resolve(name :: Nil, data.sparkSession.sessionState.analyzer.resolver).getOrElse {
-        throw new AnalysisException(
-          s"Unable to resolve $name given [${plan.output.map(_.name).mkString(", ")}]")
-      }.asInstanceOf[Attribute]
-    }
     val fileIndex = catalogTable.map(_.identifier).map { tableIdent =>
       sparkSession.table(tableIdent).queryExecution.analyzed.collect {
         case LogicalRelation(t: HadoopFsRelation, _, _) => t.location
@@ -431,7 +421,7 @@ case class DataSource(
         outputPath = outputPath,
         staticPartitions = Map.empty,
         ifPartitionNotExists = false,
-        partitionColumns = partitionAttributes,
+        partitionColumns = partitionColumns,
         bucketSpec = bucketSpec,
         fileFormat = format,
         options = options,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index e05a8d5f02bd8..ded9303de55fe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -188,15 +188,13 @@ case class DataSourceAnalysis(conf: SQLConf) extends Rule[LogicalPlan] with Cast
           "Cannot overwrite a path that is also being read from.")
       }
 
-      val partitionSchema = actualQuery.resolve(
-        t.partitionSchema, t.sparkSession.sessionState.analyzer.resolver)
       val staticPartitions = parts.filter(_._2.nonEmpty).map { case (k, v) => k -> v.get }
 
       InsertIntoHadoopFsRelationCommand(
         outputPath,
         staticPartitions,
         i.ifPartitionNotExists,
-        partitionSchema,
+        partitionColumns = t.partitionSchema.map(_.name),
         t.bucketSpec,
         t.fileFormat,
         t.options,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
index 4ec09bff429c5..2c31d2a84c258 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
@@ -101,7 +101,7 @@ object FileFormatWriter extends Logging {
       committer: FileCommitProtocol,
       outputSpec: OutputSpec,
       hadoopConf: Configuration,
-      partitionColumns: Seq[Attribute],
+      partitionColumnNames: Seq[String],
       bucketSpec: Option[BucketSpec],
       refreshFunction: (Seq[TablePartitionSpec]) => Unit,
       options: Map[String, String]): Unit = {
@@ -111,9 +111,18 @@ object FileFormatWriter extends Logging {
     job.setOutputValueClass(classOf[InternalRow])
     FileOutputFormat.setOutputPath(job, new Path(outputSpec.outputPath))
 
-    val allColumns = queryExecution.logical.output
+    val allColumns = queryExecution.executedPlan.output
+    // Get the actual partition columns as attributes after matching them by name with
+    // the given columns names.
+    val partitionColumns = partitionColumnNames.map { col =>
+      val nameEquality = sparkSession.sessionState.conf.resolver
+      allColumns.find(f => nameEquality(f.name, col)).getOrElse {
+        throw new RuntimeException(
+          s"Partition column $col not found in schema ${queryExecution.executedPlan.schema}")
+      }
+    }
     val partitionSet = AttributeSet(partitionColumns)
-    val dataColumns = queryExecution.logical.output.filterNot(partitionSet.contains)
+    val dataColumns = allColumns.filterNot(partitionSet.contains)
 
     val bucketIdExpression = bucketSpec.map { spec =>
       val bucketColumns = spec.bucketColumnNames.map(c => dataColumns.find(_.name == c).get)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
index c9d31449d3629..ab35fdcbc1f25 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
@@ -44,7 +44,7 @@ case class InsertIntoHadoopFsRelationCommand(
     outputPath: Path,
     staticPartitions: TablePartitionSpec,
     ifPartitionNotExists: Boolean,
-    partitionColumns: Seq[Attribute],
+    partitionColumns: Seq[String],
     bucketSpec: Option[BucketSpec],
     fileFormat: FileFormat,
     options: Map[String, String],
@@ -150,7 +150,7 @@ case class InsertIntoHadoopFsRelationCommand(
         outputSpec = FileFormatWriter.OutputSpec(
           qualifiedOutputPath.toString, customPartitionLocations),
         hadoopConf = hadoopConf,
-        partitionColumns = partitionColumns,
+        partitionColumnNames = partitionColumns,
         bucketSpec = bucketSpec,
         refreshFunction = refreshPartitionsCallback,
         options = options)
@@ -176,10 +176,10 @@ case class InsertIntoHadoopFsRelationCommand(
       customPartitionLocations: Map[TablePartitionSpec, String],
       committer: FileCommitProtocol): Unit = {
     val staticPartitionPrefix = if (staticPartitions.nonEmpty) {
-      "/" + partitionColumns.flatMap { p =>
-        staticPartitions.get(p.name) match {
+      "/" + partitionColumns.flatMap { col =>
+        staticPartitions.get(col) match {
           case Some(value) =>
-            Some(escapePathName(p.name) + "=" + escapePathName(value))
+            Some(escapePathName(col) + "=" + escapePathName(value))
           case None =>
             None
         }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index 3f4a78580f1eb..45f2a41f24937 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -127,11 +127,11 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
       val resolver = sparkSession.sessionState.conf.resolver
       val tableCols = existingTable.schema.map(_.name)
 
-      // As we are inserting into an existing table, we should respect the existing schema and
-      // adjust the column order of the given dataframe according to it, or throw exception
-      // if the column names do not match.
+      // As we are inserting into an existing table, we should respect the existing schema, preserve
+      // the case and adjust the column order of the given DataFrame according to it, or throw
+      // an exception if the column names do not match.
       val adjustedColumns = tableCols.map { col =>
-        query.resolve(Seq(col), resolver).getOrElse {
+        query.resolve(Seq(col), resolver).map(Alias(_, col)()).getOrElse {
           val inputColumns = query.schema.map(_.name).mkString(", ")
           throw new AnalysisException(
             s"cannot resolve '$col' given input columns: [$inputColumns]")
@@ -168,15 +168,9 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
           """.stripMargin)
       }
 
-      val newQuery = if (adjustedColumns != query.output) {
-        Project(adjustedColumns, query)
-      } else {
-        query
-      }
-
       c.copy(
         tableDesc = existingTable,
-        query = Some(newQuery))
+        query = Some(Project(adjustedColumns, query)))
 
     // Here we normalize partition, bucket and sort column names, w.r.t. the case sensitivity
     // config, and do various checks:
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
index 6885d0bf67ccb..2a652920c10c9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
@@ -111,15 +111,6 @@ class FileStreamSink(
         case _ =>  // Do nothing
       }
 
-      // Get the actual partition columns as attributes after matching them by name with
-      // the given columns names.
-      val partitionColumns: Seq[Attribute] = partitionColumnNames.map { col =>
-        val nameEquality = data.sparkSession.sessionState.conf.resolver
-        data.logicalPlan.output.find(f => nameEquality(f.name, col)).getOrElse {
-          throw new RuntimeException(s"Partition column $col not found in schema ${data.schema}")
-        }
-      }
-
       FileFormatWriter.write(
         sparkSession = sparkSession,
         queryExecution = data.queryExecution,
@@ -127,7 +118,7 @@ class FileStreamSink(
         committer = committer,
         outputSpec = FileFormatWriter.OutputSpec(path, Map.empty),
         hadoopConf = hadoopConf,
-        partitionColumns = partitionColumns,
+        partitionColumnNames = partitionColumnNames,
         bucketSpec = None,
         refreshFunction = _ => (),
         options = options)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index 35f65e972fe27..797481c879e7a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -314,13 +314,6 @@ case class InsertIntoHiveTable(
       outputPath = tmpLocation.toString,
       isAppend = false)
 
-    val partitionAttributes = partitionColumnNames.takeRight(numDynamicPartitions).map { name =>
-      query.resolve(name :: Nil, sparkSession.sessionState.analyzer.resolver).getOrElse {
-        throw new AnalysisException(
-          s"Unable to resolve $name given [${query.output.map(_.name).mkString(", ")}]")
-      }.asInstanceOf[Attribute]
-    }
-
     FileFormatWriter.write(
       sparkSession = sparkSession,
       queryExecution = Dataset.ofRows(sparkSession, query).queryExecution,
@@ -328,7 +321,7 @@ case class InsertIntoHiveTable(
       committer = committer,
       outputSpec = FileFormatWriter.OutputSpec(tmpLocation.toString, Map.empty),
       hadoopConf = hadoopConf,
-      partitionColumns = partitionAttributes,
+      partitionColumnNames = partitionColumnNames.takeRight(numDynamicPartitions),
       bucketSpec = None,
       refreshFunction = _ => (),
       options = Map.empty)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala
index 58ab0c252bfd7..618e5b68ff8c0 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala
@@ -468,6 +468,28 @@ class InsertIntoHiveTableSuite extends QueryTest with TestHiveSingleton with Bef
       }
   }
 
+  test("SPARK-21165: the query schema of INSERT is changed after optimization") {
+    withSQLConf(("hive.exec.dynamic.partition.mode", "nonstrict")) {
+      withTable("tab1", "tab2") {
+        Seq(("a", "b", 3)).toDF("word", "first", "length").write.saveAsTable("tab1")
+
+        spark.sql(
+          """
+            |CREATE TABLE tab2 (word string, length int)
+            |PARTITIONED BY (first string)
+          """.stripMargin)
+
+        spark.sql(
+          """
+            |INSERT INTO TABLE tab2 PARTITION(first)
+            |SELECT word, length, cast(first as string) as first FROM tab1
+          """.stripMargin)
+
+        checkAnswer(spark.table("tab2"), Row("a", 3, "b"))
+      }
+    }
+  }
+
   testPartitionedTable("insertInto() should reject extra columns") {
     tableName =>
       sql("CREATE TABLE t (a INT, b INT, c INT, d INT, e INT)")

From 9d29808324dcb4c194b3557c093e30eda5ce135d Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Fri, 23 Jun 2017 09:28:02 -0700
Subject: [PATCH 1034/1204] [SPARK-21144][SQL] Print a warning if the data
 schema and partition schema have the duplicate columns

## What changes were proposed in this pull request?
The current master outputs unexpected results when the data schema and partition schema have the duplicate columns:
```
withTempPath { dir =>
  val basePath = dir.getCanonicalPath
  spark.range(0, 3).toDF("foo").write.parquet(new Path(basePath, "foo=1").toString)
  spark.range(0, 3).toDF("foo").write.parquet(new Path(basePath, "foo=a").toString)
  spark.read.parquet(basePath).show()
}

+---+
|foo|
+---+
|  1|
|  1|
|  a|
|  a|
|  1|
|  a|
+---+
```
This patch added code to print a warning when the duplication found.

## How was this patch tested?
Manually checked.

Author: Takeshi Yamamuro <yamamuro@apache.org>

Closes #18375 from maropu/SPARK-21144-3.

(cherry picked from commit f3dea60793d86212ba1068e88ad89cb3dcf07801)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../apache/spark/sql/util/SchemaUtils.scala   | 53 +++++++++++++++++++
 .../execution/datasources/DataSource.scala    |  6 +++
 2 files changed, 59 insertions(+)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala
new file mode 100644
index 0000000000000..e881685ce6262
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.util
+
+import org.apache.spark.internal.Logging
+
+
+/**
+ * Utils for handling schemas.
+ *
+ * TODO: Merge this file with [[org.apache.spark.ml.util.SchemaUtils]].
+ */
+private[spark] object SchemaUtils extends Logging {
+
+  /**
+   * Checks if input column names have duplicate identifiers. Prints a warning message if
+   * the duplication exists.
+   *
+   * @param columnNames column names to check
+   * @param colType column type name, used in a warning message
+   * @param caseSensitiveAnalysis whether duplication checks should be case sensitive or not
+   */
+  def checkColumnNameDuplication(
+      columnNames: Seq[String], colType: String, caseSensitiveAnalysis: Boolean): Unit = {
+    val names = if (caseSensitiveAnalysis) {
+      columnNames
+    } else {
+      columnNames.map(_.toLowerCase)
+    }
+    if (names.distinct.length != names.length) {
+      val duplicateColumns = names.groupBy(identity).collect {
+        case (x, ys) if ys.length > 1 => s"`$x`"
+      }
+      logWarning(s"Found duplicate column(s) $colType: ${duplicateColumns.mkString(", ")}. " +
+        "You might need to assign different column names.")
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 4ffe2151ae638..0915bd3ba25b9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -39,6 +39,7 @@ import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.{CalendarIntervalType, StructType}
+import org.apache.spark.sql.util.SchemaUtils
 import org.apache.spark.util.Utils
 
 /**
@@ -181,6 +182,11 @@ case class DataSource(
       throw new AnalysisException(
         s"Unable to infer schema for $format. It must be specified manually.")
     }
+
+    SchemaUtils.checkColumnNameDuplication(
+      (dataSchema ++ partitionSchema).map(_.name), "in the data schema and the partition schema",
+      sparkSession.sessionState.conf.caseSensitiveAnalysis)
+
     (dataSchema, partitionSchema)
   }
 

From f160267384d7b8b5f423c8f4f948c6ac9b5eb49e Mon Sep 17 00:00:00 2001
From: Dhruve Ashar <dhruveashar@gmail.com>
Date: Fri, 23 Jun 2017 10:36:29 -0700
Subject: [PATCH 1035/1204] [SPARK-21181] Release byteBuffers to suppress netty
 error messages

## What changes were proposed in this pull request?
We are explicitly calling release on the byteBuf's used to encode the string to Base64 to suppress the memory leak error message reported by netty. This is to make it less confusing for the user.

### Changes proposed in this fix
By explicitly invoking release on the byteBuf's we are decrement the internal reference counts for the wrappedByteBuf's. Now, when the GC kicks in, these would be reclaimed as before, just that netty wouldn't report any memory leak error messages as the internal ref. counts are now 0.

## How was this patch tested?
Ran a few spark-applications and examined the logs. The error message no longer appears.

Original PR was opened against branch-2.1 => https://github.com/apache/spark/pull/18392

Author: Dhruve Ashar <dhruveashar@gmail.com>

Closes #18407 from dhruve/master.

(cherry picked from commit 1ebe7ffe072bcac03360e65e959a6cd36530a9c4)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/network/sasl/SparkSaslServer.java   | 26 ++++++++++++++++---
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java b/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java
index e24fdf0c74de3..00f3e83dbc8b3 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java
@@ -34,6 +34,7 @@
 import com.google.common.base.Preconditions;
 import com.google.common.base.Throwables;
 import com.google.common.collect.ImmutableMap;
+import io.netty.buffer.ByteBuf;
 import io.netty.buffer.Unpooled;
 import io.netty.handler.codec.base64.Base64;
 import org.slf4j.Logger;
@@ -187,14 +188,31 @@ public void handle(Callback[] callbacks) throws IOException, UnsupportedCallback
   /* Encode a byte[] identifier as a Base64-encoded string. */
   public static String encodeIdentifier(String identifier) {
     Preconditions.checkNotNull(identifier, "User cannot be null if SASL is enabled");
-    return Base64.encode(Unpooled.wrappedBuffer(identifier.getBytes(StandardCharsets.UTF_8)))
-      .toString(StandardCharsets.UTF_8);
+    return getBase64EncodedString(identifier);
   }
 
   /** Encode a password as a base64-encoded char[] array. */
   public static char[] encodePassword(String password) {
     Preconditions.checkNotNull(password, "Password cannot be null if SASL is enabled");
-    return Base64.encode(Unpooled.wrappedBuffer(password.getBytes(StandardCharsets.UTF_8)))
-      .toString(StandardCharsets.UTF_8).toCharArray();
+    return getBase64EncodedString(password).toCharArray();
+  }
+
+  /** Return a Base64-encoded string. */
+  private static String getBase64EncodedString(String str) {
+    ByteBuf byteBuf = null;
+    ByteBuf encodedByteBuf = null;
+    try {
+      byteBuf = Unpooled.wrappedBuffer(str.getBytes(StandardCharsets.UTF_8));
+      encodedByteBuf = Base64.encode(byteBuf);
+      return encodedByteBuf.toString(StandardCharsets.UTF_8);
+    } finally {
+      // The release is called to suppress the memory leak error messages raised by netty.
+      if (byteBuf != null) {
+        byteBuf.release();
+        if (encodedByteBuf != null) {
+          encodedByteBuf.release();
+        }
+      }
+    }
   }
 }

From f8fd3b48b7fdfe1808cf871c215d9dbed35040c0 Mon Sep 17 00:00:00 2001
From: Dhruve Ashar <dhruveashar@gmail.com>
Date: Fri, 23 Jun 2017 10:36:29 -0700
Subject: [PATCH 1036/1204] [SPARK-21181] Release byteBuffers to suppress netty
 error messages

## What changes were proposed in this pull request?
We are explicitly calling release on the byteBuf's used to encode the string to Base64 to suppress the memory leak error message reported by netty. This is to make it less confusing for the user.

### Changes proposed in this fix
By explicitly invoking release on the byteBuf's we are decrement the internal reference counts for the wrappedByteBuf's. Now, when the GC kicks in, these would be reclaimed as before, just that netty wouldn't report any memory leak error messages as the internal ref. counts are now 0.

## How was this patch tested?
Ran a few spark-applications and examined the logs. The error message no longer appears.

Original PR was opened against branch-2.1 => https://github.com/apache/spark/pull/18392

Author: Dhruve Ashar <dhruveashar@gmail.com>

Closes #18407 from dhruve/master.

(cherry picked from commit 1ebe7ffe072bcac03360e65e959a6cd36530a9c4)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/network/sasl/SparkSaslServer.java   | 26 ++++++++++++++++---
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java b/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java
index e24fdf0c74de3..00f3e83dbc8b3 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java
@@ -34,6 +34,7 @@
 import com.google.common.base.Preconditions;
 import com.google.common.base.Throwables;
 import com.google.common.collect.ImmutableMap;
+import io.netty.buffer.ByteBuf;
 import io.netty.buffer.Unpooled;
 import io.netty.handler.codec.base64.Base64;
 import org.slf4j.Logger;
@@ -187,14 +188,31 @@ public void handle(Callback[] callbacks) throws IOException, UnsupportedCallback
   /* Encode a byte[] identifier as a Base64-encoded string. */
   public static String encodeIdentifier(String identifier) {
     Preconditions.checkNotNull(identifier, "User cannot be null if SASL is enabled");
-    return Base64.encode(Unpooled.wrappedBuffer(identifier.getBytes(StandardCharsets.UTF_8)))
-      .toString(StandardCharsets.UTF_8);
+    return getBase64EncodedString(identifier);
   }
 
   /** Encode a password as a base64-encoded char[] array. */
   public static char[] encodePassword(String password) {
     Preconditions.checkNotNull(password, "Password cannot be null if SASL is enabled");
-    return Base64.encode(Unpooled.wrappedBuffer(password.getBytes(StandardCharsets.UTF_8)))
-      .toString(StandardCharsets.UTF_8).toCharArray();
+    return getBase64EncodedString(password).toCharArray();
+  }
+
+  /** Return a Base64-encoded string. */
+  private static String getBase64EncodedString(String str) {
+    ByteBuf byteBuf = null;
+    ByteBuf encodedByteBuf = null;
+    try {
+      byteBuf = Unpooled.wrappedBuffer(str.getBytes(StandardCharsets.UTF_8));
+      encodedByteBuf = Base64.encode(byteBuf);
+      return encodedByteBuf.toString(StandardCharsets.UTF_8);
+    } finally {
+      // The release is called to suppress the memory leak error messages raised by netty.
+      if (byteBuf != null) {
+        byteBuf.release();
+        if (encodedByteBuf != null) {
+          encodedByteBuf.release();
+        }
+      }
+    }
   }
 }

From 3394b0641c72d071d23d71daae813e03f44ab726 Mon Sep 17 00:00:00 2001
From: Ong Ming Yang <me@ongmingyang.com>
Date: Fri, 23 Jun 2017 10:56:59 -0700
Subject: [PATCH 1037/1204] [MINOR][DOCS] Docs in DataFrameNaFunctions.scala
 use wrong method

## What changes were proposed in this pull request?

* Following the first few examples in this file, the remaining methods should also be methods of `df.na` not `df`.
* Filled in some missing parentheses

## How was this patch tested?

N/A

Author: Ong Ming Yang <me@ongmingyang.com>

Closes #18398 from ongmingyang/master.

(cherry picked from commit 4cc62951a2b12a372a2b267bf8597a0a31e2b2cb)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../spark/sql/DataFrameNaFunctions.scala      | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
index 052d85ad33bd6..1d88992c48562 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
@@ -244,13 +244,13 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *   import com.google.common.collect.ImmutableMap;
    *
    *   // Replaces all occurrences of 1.0 with 2.0 in column "height".
-   *   df.replace("height", ImmutableMap.of(1.0, 2.0));
+   *   df.na.replace("height", ImmutableMap.of(1.0, 2.0));
    *
    *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "name".
-   *   df.replace("name", ImmutableMap.of("UNKNOWN", "unnamed"));
+   *   df.na.replace("name", ImmutableMap.of("UNKNOWN", "unnamed"));
    *
    *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in all string columns.
-   *   df.replace("*", ImmutableMap.of("UNKNOWN", "unnamed"));
+   *   df.na.replace("*", ImmutableMap.of("UNKNOWN", "unnamed"));
    * }}}
    *
    * @param col name of the column to apply the value replacement
@@ -271,10 +271,10 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *   import com.google.common.collect.ImmutableMap;
    *
    *   // Replaces all occurrences of 1.0 with 2.0 in column "height" and "weight".
-   *   df.replace(new String[] {"height", "weight"}, ImmutableMap.of(1.0, 2.0));
+   *   df.na.replace(new String[] {"height", "weight"}, ImmutableMap.of(1.0, 2.0));
    *
    *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "firstname" and "lastname".
-   *   df.replace(new String[] {"firstname", "lastname"}, ImmutableMap.of("UNKNOWN", "unnamed"));
+   *   df.na.replace(new String[] {"firstname", "lastname"}, ImmutableMap.of("UNKNOWN", "unnamed"));
    * }}}
    *
    * @param cols list of columns to apply the value replacement
@@ -295,13 +295,13 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *
    * {{{
    *   // Replaces all occurrences of 1.0 with 2.0 in column "height".
-   *   df.replace("height", Map(1.0 -> 2.0))
+   *   df.na.replace("height", Map(1.0 -> 2.0));
    *
    *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "name".
-   *   df.replace("name", Map("UNKNOWN" -> "unnamed")
+   *   df.na.replace("name", Map("UNKNOWN" -> "unnamed"));
    *
    *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in all string columns.
-   *   df.replace("*", Map("UNKNOWN" -> "unnamed")
+   *   df.na.replace("*", Map("UNKNOWN" -> "unnamed"));
    * }}}
    *
    * @param col name of the column to apply the value replacement
@@ -324,10 +324,10 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *
    * {{{
    *   // Replaces all occurrences of 1.0 with 2.0 in column "height" and "weight".
-   *   df.replace("height" :: "weight" :: Nil, Map(1.0 -> 2.0));
+   *   df.na.replace("height" :: "weight" :: Nil, Map(1.0 -> 2.0));
    *
    *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "firstname" and "lastname".
-   *   df.replace("firstname" :: "lastname" :: Nil, Map("UNKNOWN" -> "unnamed");
+   *   df.na.replace("firstname" :: "lastname" :: Nil, Map("UNKNOWN" -> "unnamed"));
    * }}}
    *
    * @param cols list of columns to apply the value replacement

From bcaf06c497b3bf99521b094eff71c9b9168064c7 Mon Sep 17 00:00:00 2001
From: Ong Ming Yang <me@ongmingyang.com>
Date: Fri, 23 Jun 2017 10:56:59 -0700
Subject: [PATCH 1038/1204] [MINOR][DOCS] Docs in DataFrameNaFunctions.scala
 use wrong method

## What changes were proposed in this pull request?

* Following the first few examples in this file, the remaining methods should also be methods of `df.na` not `df`.
* Filled in some missing parentheses

## How was this patch tested?

N/A

Author: Ong Ming Yang <me@ongmingyang.com>

Closes #18398 from ongmingyang/master.

(cherry picked from commit 4cc62951a2b12a372a2b267bf8597a0a31e2b2cb)
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../spark/sql/DataFrameNaFunctions.scala      | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
index 3fbc39142cd29..323950b661604 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
@@ -243,13 +243,13 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *   import com.google.common.collect.ImmutableMap;
    *
    *   // Replaces all occurrences of 1.0 with 2.0 in column "height".
-   *   df.replace("height", ImmutableMap.of(1.0, 2.0));
+   *   df.na.replace("height", ImmutableMap.of(1.0, 2.0));
    *
    *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "name".
-   *   df.replace("name", ImmutableMap.of("UNKNOWN", "unnamed"));
+   *   df.na.replace("name", ImmutableMap.of("UNKNOWN", "unnamed"));
    *
    *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in all string columns.
-   *   df.replace("*", ImmutableMap.of("UNKNOWN", "unnamed"));
+   *   df.na.replace("*", ImmutableMap.of("UNKNOWN", "unnamed"));
    * }}}
    *
    * @param col name of the column to apply the value replacement
@@ -270,10 +270,10 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *   import com.google.common.collect.ImmutableMap;
    *
    *   // Replaces all occurrences of 1.0 with 2.0 in column "height" and "weight".
-   *   df.replace(new String[] {"height", "weight"}, ImmutableMap.of(1.0, 2.0));
+   *   df.na.replace(new String[] {"height", "weight"}, ImmutableMap.of(1.0, 2.0));
    *
    *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "firstname" and "lastname".
-   *   df.replace(new String[] {"firstname", "lastname"}, ImmutableMap.of("UNKNOWN", "unnamed"));
+   *   df.na.replace(new String[] {"firstname", "lastname"}, ImmutableMap.of("UNKNOWN", "unnamed"));
    * }}}
    *
    * @param cols list of columns to apply the value replacement
@@ -294,13 +294,13 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *
    * {{{
    *   // Replaces all occurrences of 1.0 with 2.0 in column "height".
-   *   df.replace("height", Map(1.0 -> 2.0))
+   *   df.na.replace("height", Map(1.0 -> 2.0));
    *
    *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "name".
-   *   df.replace("name", Map("UNKNOWN" -> "unnamed")
+   *   df.na.replace("name", Map("UNKNOWN" -> "unnamed"));
    *
    *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in all string columns.
-   *   df.replace("*", Map("UNKNOWN" -> "unnamed")
+   *   df.na.replace("*", Map("UNKNOWN" -> "unnamed"));
    * }}}
    *
    * @param col name of the column to apply the value replacement
@@ -323,10 +323,10 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *
    * {{{
    *   // Replaces all occurrences of 1.0 with 2.0 in column "height" and "weight".
-   *   df.replace("height" :: "weight" :: Nil, Map(1.0 -> 2.0));
+   *   df.na.replace("height" :: "weight" :: Nil, Map(1.0 -> 2.0));
    *
    *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "firstname" and "lastname".
-   *   df.replace("firstname" :: "lastname" :: Nil, Map("UNKNOWN" -> "unnamed");
+   *   df.na.replace("firstname" :: "lastname" :: Nil, Map("UNKNOWN" -> "unnamed"));
    * }}}
    *
    * @param cols list of columns to apply the value replacement

From a3088d23a7d921618687a9af4e7aa083e8dd0e2b Mon Sep 17 00:00:00 2001
From: Gabor Feher <gabor.feher@lynxanalytics.com>
Date: Fri, 23 Jun 2017 21:53:38 -0700
Subject: [PATCH 1039/1204] [SPARK-20555][SQL] Fix mapping of Oracle DECIMAL
 types to Spark types in read path

## What changes were proposed in this pull request?

This PR is to revert some code changes in the read path of https://github.com/apache/spark/pull/14377. The original fix is https://github.com/apache/spark/pull/17830

When merging this PR, please give the credit to gaborfeher

## How was this patch tested?

Added a test case to OracleIntegrationSuite.scala

Author: Gabor Feher <gabor.feher@lynxanalytics.com>
Author: gatorsmile <gatorsmile@gmail.com>

Closes #18408 from gatorsmile/OracleType.

(cherry picked from commit b837bf9ae97cf7ee7558c10a5a34636e69367a05)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../sql/jdbc/OracleIntegrationSuite.scala     | 65 +++++++++++++------
 .../apache/spark/sql/jdbc/OracleDialect.scala |  4 --
 2 files changed, 45 insertions(+), 24 deletions(-)

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
index 1bb89a361ca75..56460d79a208c 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.jdbc
 
 import java.sql.{Connection, Date, Timestamp}
 import java.util.Properties
+import java.math.BigDecimal
 
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.test.SharedSQLContext
@@ -87,8 +88,31 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSQLCo
         |USING org.apache.spark.sql.jdbc
         |OPTIONS (url '$jdbcUrl', dbTable 'datetime1', oracle.jdbc.mapDateToTimestamp 'false')
       """.stripMargin.replaceAll("\n", " "))
+
+
+    conn.prepareStatement("CREATE TABLE numerics (b DECIMAL(1), f DECIMAL(3, 2), i DECIMAL(10))").executeUpdate();
+    conn.prepareStatement(
+      "INSERT INTO numerics VALUES (4, 1.23, 9999999999)").executeUpdate();
+    conn.commit();
   }
 
+
+  test("SPARK-16625 : Importing Oracle numeric types") { 
+    val df = sqlContext.read.jdbc(jdbcUrl, "numerics", new Properties);
+    val rows = df.collect()
+    assert(rows.size == 1)
+    val row = rows(0)
+    // The main point of the below assertions is not to make sure that these Oracle types are
+    // mapped to decimal types, but to make sure that the returned values are correct.
+    // A value > 1 from DECIMAL(1) is correct:
+    assert(row.getDecimal(0).compareTo(BigDecimal.valueOf(4)) == 0)
+    // A value with fractions from DECIMAL(3, 2) is correct:
+    assert(row.getDecimal(1).compareTo(BigDecimal.valueOf(1.23)) == 0)
+    // A value > Int.MaxValue from DECIMAL(10) is correct:
+    assert(row.getDecimal(2).compareTo(BigDecimal.valueOf(9999999999l)) == 0)
+  }
+
+
   test("SPARK-12941: String datatypes to be mapped to Varchar in Oracle") {
     // create a sample dataframe with string type
     val df1 = sparkContext.parallelize(Seq(("foo"))).toDF("x")
@@ -148,27 +172,28 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSQLCo
     val dfRead = spark.read.jdbc(jdbcUrl, tableName, props)
     val rows = dfRead.collect()
     // verify the data type is inserted
-    val types = rows(0).toSeq.map(x => x.getClass.toString)
-    assert(types(0).equals("class java.lang.Boolean"))
-    assert(types(1).equals("class java.lang.Integer"))
-    assert(types(2).equals("class java.lang.Long"))
-    assert(types(3).equals("class java.lang.Float"))
-    assert(types(4).equals("class java.lang.Float"))
-    assert(types(5).equals("class java.lang.Integer"))
-    assert(types(6).equals("class java.lang.Integer"))
-    assert(types(7).equals("class java.lang.String"))
-    assert(types(8).equals("class [B"))
-    assert(types(9).equals("class java.sql.Date"))
-    assert(types(10).equals("class java.sql.Timestamp"))
+    val types = dfRead.schema.map(field => field.dataType)
+    assert(types(0).equals(DecimalType(1, 0)))
+    assert(types(1).equals(DecimalType(10, 0)))
+    assert(types(2).equals(DecimalType(19, 0)))
+    assert(types(3).equals(DecimalType(19, 4)))
+    assert(types(4).equals(DecimalType(19, 4)))
+    assert(types(5).equals(DecimalType(3, 0)))
+    assert(types(6).equals(DecimalType(5, 0)))
+    assert(types(7).equals(StringType))
+    assert(types(8).equals(BinaryType))
+    assert(types(9).equals(DateType))
+    assert(types(10).equals(TimestampType))
+
     // verify the value is the inserted correct or not
     val values = rows(0)
-    assert(values.getBoolean(0).equals(booleanVal))
-    assert(values.getInt(1).equals(integerVal))
-    assert(values.getLong(2).equals(longVal))
-    assert(values.getFloat(3).equals(floatVal))
-    assert(values.getFloat(4).equals(doubleVal.toFloat))
-    assert(values.getInt(5).equals(byteVal.toInt))
-    assert(values.getInt(6).equals(shortVal.toInt))
+    assert(values.getDecimal(0).compareTo(BigDecimal.valueOf(1)) == 0)
+    assert(values.getDecimal(1).compareTo(BigDecimal.valueOf(integerVal)) == 0)
+    assert(values.getDecimal(2).compareTo(BigDecimal.valueOf(longVal)) == 0)
+    assert(values.getDecimal(3).compareTo(BigDecimal.valueOf(floatVal)) == 0)
+    assert(values.getDecimal(4).compareTo(BigDecimal.valueOf(doubleVal)) == 0)
+    assert(values.getDecimal(5).compareTo(BigDecimal.valueOf(byteVal)) == 0)
+    assert(values.getDecimal(6).compareTo(BigDecimal.valueOf(shortVal)) == 0)
     assert(values.getString(7).equals(stringVal))
     assert(values.getAs[Array[Byte]](8).mkString.equals("678"))
     assert(values.getDate(9).equals(dateVal))
@@ -177,7 +202,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSQLCo
 
   test("SPARK-19318: connection property keys should be case-sensitive") {
     def checkRow(row: Row): Unit = {
-      assert(row.getInt(0) == 1)
+      assert(row.getDecimal(0).equals(BigDecimal.valueOf(1)))
       assert(row.getDate(1).equals(Date.valueOf("1991-11-09")))
       assert(row.getTimestamp(2).equals(Timestamp.valueOf("1996-01-01 01:23:45")))
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
index f541996b651e9..20e634c06b610 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
@@ -43,10 +43,6 @@ private case object OracleDialect extends JdbcDialect {
         // Not sure if there is a more robust way to identify the field as a float (or other
         // numeric types that do not specify a scale.
         case _ if scale == -127L => Option(DecimalType(DecimalType.MAX_PRECISION, 10))
-        case 1 => Option(BooleanType)
-        case 3 | 5 | 10 => Option(IntegerType)
-        case 19 if scale == 0L => Option(LongType)
-        case 19 if scale == 4L => Option(FloatType)
         case _ => None
       }
     } else {

From f12883e3232c50a01fc20e0520bb0f4099d2c79a Mon Sep 17 00:00:00 2001
From: Gabor Feher <gabor.feher@lynxanalytics.com>
Date: Fri, 23 Jun 2017 21:53:38 -0700
Subject: [PATCH 1040/1204] [SPARK-20555][SQL] Fix mapping of Oracle DECIMAL
 types to Spark types in read path

This PR is to revert some code changes in the read path of https://github.com/apache/spark/pull/14377. The original fix is https://github.com/apache/spark/pull/17830

When merging this PR, please give the credit to gaborfeher

Added a test case to OracleIntegrationSuite.scala

Author: Gabor Feher <gabor.feher@lynxanalytics.com>
Author: gatorsmile <gatorsmile@gmail.com>

Closes #18408 from gatorsmile/OracleType.
---
 .../sql/jdbc/OracleIntegrationSuite.scala     | 61 +++++++++++++------
 .../apache/spark/sql/jdbc/OracleDialect.scala |  4 --
 2 files changed, 42 insertions(+), 23 deletions(-)

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
index 8c880f3ee5fa2..e111e17b34164 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.jdbc
 
 import java.sql.{Connection, Date, Timestamp}
 import java.util.Properties
+import java.math.BigDecimal
 
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.test.SharedSQLContext
@@ -62,8 +63,29 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSQLCo
   }
 
   override def dataPreparation(conn: Connection): Unit = {
+    conn.prepareStatement("CREATE TABLE numerics (b DECIMAL(1), f DECIMAL(3, 2), i DECIMAL(10))").executeUpdate();
+    conn.prepareStatement(
+      "INSERT INTO numerics VALUES (4, 1.23, 9999999999)").executeUpdate();
+    conn.commit();
   }
 
+
+  test("SPARK-16625 : Importing Oracle numeric types") { 
+    val df = sqlContext.read.jdbc(jdbcUrl, "numerics", new Properties);
+    val rows = df.collect()
+    assert(rows.size == 1)
+    val row = rows(0)
+    // The main point of the below assertions is not to make sure that these Oracle types are
+    // mapped to decimal types, but to make sure that the returned values are correct.
+    // A value > 1 from DECIMAL(1) is correct:
+    assert(row.getDecimal(0).compareTo(BigDecimal.valueOf(4)) == 0)
+    // A value with fractions from DECIMAL(3, 2) is correct:
+    assert(row.getDecimal(1).compareTo(BigDecimal.valueOf(1.23)) == 0)
+    // A value > Int.MaxValue from DECIMAL(10) is correct:
+    assert(row.getDecimal(2).compareTo(BigDecimal.valueOf(9999999999l)) == 0)
+  }
+
+
   test("SPARK-12941: String datatypes to be mapped to Varchar in Oracle") {
     // create a sample dataframe with string type
     val df1 = sparkContext.parallelize(Seq(("foo"))).toDF("x")
@@ -123,27 +145,28 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSQLCo
     val dfRead = spark.read.jdbc(jdbcUrl, tableName, props)
     val rows = dfRead.collect()
     // verify the data type is inserted
-    val types = rows(0).toSeq.map(x => x.getClass.toString)
-    assert(types(0).equals("class java.lang.Boolean"))
-    assert(types(1).equals("class java.lang.Integer"))
-    assert(types(2).equals("class java.lang.Long"))
-    assert(types(3).equals("class java.lang.Float"))
-    assert(types(4).equals("class java.lang.Float"))
-    assert(types(5).equals("class java.lang.Integer"))
-    assert(types(6).equals("class java.lang.Integer"))
-    assert(types(7).equals("class java.lang.String"))
-    assert(types(8).equals("class [B"))
-    assert(types(9).equals("class java.sql.Date"))
-    assert(types(10).equals("class java.sql.Timestamp"))
+    val types = dfRead.schema.map(field => field.dataType)
+    assert(types(0).equals(DecimalType(1, 0)))
+    assert(types(1).equals(DecimalType(10, 0)))
+    assert(types(2).equals(DecimalType(19, 0)))
+    assert(types(3).equals(DecimalType(19, 4)))
+    assert(types(4).equals(DecimalType(19, 4)))
+    assert(types(5).equals(DecimalType(3, 0)))
+    assert(types(6).equals(DecimalType(5, 0)))
+    assert(types(7).equals(StringType))
+    assert(types(8).equals(BinaryType))
+    assert(types(9).equals(DateType))
+    assert(types(10).equals(TimestampType))
+
     // verify the value is the inserted correct or not
     val values = rows(0)
-    assert(values.getBoolean(0).equals(booleanVal))
-    assert(values.getInt(1).equals(integerVal))
-    assert(values.getLong(2).equals(longVal))
-    assert(values.getFloat(3).equals(floatVal))
-    assert(values.getFloat(4).equals(doubleVal.toFloat))
-    assert(values.getInt(5).equals(byteVal.toInt))
-    assert(values.getInt(6).equals(shortVal.toInt))
+    assert(values.getDecimal(0).compareTo(BigDecimal.valueOf(1)) == 0)
+    assert(values.getDecimal(1).compareTo(BigDecimal.valueOf(integerVal)) == 0)
+    assert(values.getDecimal(2).compareTo(BigDecimal.valueOf(longVal)) == 0)
+    assert(values.getDecimal(3).compareTo(BigDecimal.valueOf(floatVal)) == 0)
+    assert(values.getDecimal(4).compareTo(BigDecimal.valueOf(doubleVal)) == 0)
+    assert(values.getDecimal(5).compareTo(BigDecimal.valueOf(byteVal)) == 0)
+    assert(values.getDecimal(6).compareTo(BigDecimal.valueOf(shortVal)) == 0)
     assert(values.getString(7).equals(stringVal))
     assert(values.getAs[Array[Byte]](8).mkString.equals("678"))
     assert(values.getDate(9).equals(dateVal))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
index f541996b651e9..20e634c06b610 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
@@ -43,10 +43,6 @@ private case object OracleDialect extends JdbcDialect {
         // Not sure if there is a more robust way to identify the field as a float (or other
         // numeric types that do not specify a scale.
         case _ if scale == -127L => Option(DecimalType(DecimalType.MAX_PRECISION, 10))
-        case 1 => Option(BooleanType)
-        case 3 | 5 | 10 => Option(IntegerType)
-        case 19 if scale == 0L => Option(LongType)
-        case 19 if scale == 4L => Option(FloatType)
         case _ => None
       }
     } else {

From 96c04f1edcd53798d9db5a356482248868a0a905 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Sat, 24 Jun 2017 13:23:43 +0800
Subject: [PATCH 1041/1204] [SPARK-21159][CORE] Don't try to connect to
 launcher in standalone cluster mode.

Monitoring for standalone cluster mode is not implemented (see SPARK-11033), but
the same scheduler implementation is used, and if it tries to connect to the
launcher it will fail. So fix the scheduler so it only tries that in client mode;
cluster mode applications will be correctly launched and will work, but monitoring
through the launcher handle will not be available.

Tested by running a cluster mode app with "SparkLauncher.startApplication".

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #18397 from vanzin/SPARK-21159.

(cherry picked from commit bfd73a7c48b87456d1b84d826e04eca938a1be64)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scheduler/cluster/StandaloneSchedulerBackend.scala    | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
index 0529fe9eed4da..22ca14fab85bc 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
@@ -58,7 +58,13 @@ private[spark] class StandaloneSchedulerBackend(
 
   override def start() {
     super.start()
-    launcherBackend.connect()
+
+    // SPARK-21159. The scheduler backend should only try to connect to the launcher when in client
+    // mode. In cluster mode, the code that submits the application to the Master needs to connect
+    // to the launcher instead.
+    if (sc.deployMode == "client") {
+      launcherBackend.connect()
+    }
 
     // The endpoint for executors to talk to us
     val driverUrl = RpcEndpointAddress(

From 6750db3ffdc17c72ae5ef6bc6b6b6af444d7228b Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Sat, 24 Jun 2017 13:23:43 +0800
Subject: [PATCH 1042/1204] [SPARK-21159][CORE] Don't try to connect to
 launcher in standalone cluster mode.

Monitoring for standalone cluster mode is not implemented (see SPARK-11033), but
the same scheduler implementation is used, and if it tries to connect to the
launcher it will fail. So fix the scheduler so it only tries that in client mode;
cluster mode applications will be correctly launched and will work, but monitoring
through the launcher handle will not be available.

Tested by running a cluster mode app with "SparkLauncher.startApplication".

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #18397 from vanzin/SPARK-21159.

(cherry picked from commit bfd73a7c48b87456d1b84d826e04eca938a1be64)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scheduler/cluster/StandaloneSchedulerBackend.scala    | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
index 6f75a4791e948..70a15ed129424 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
@@ -58,7 +58,13 @@ private[spark] class StandaloneSchedulerBackend(
 
   override def start() {
     super.start()
-    launcherBackend.connect()
+
+    // SPARK-21159. The scheduler backend should only try to connect to the launcher when in client
+    // mode. In cluster mode, the code that submits the application to the Master needs to connect
+    // to the launcher instead.
+    if (sc.deployMode == "client") {
+      launcherBackend.connect()
+    }
 
     // The endpoint for executors to talk to us
     val driverUrl = RpcEndpointAddress(

From ad44ab5cb9cdaff836c7469d10b00a86a3e46adf Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Sat, 24 Jun 2017 22:35:59 +0800
Subject: [PATCH 1043/1204] [SPARK-21203][SQL] Fix wrong results of insertion
 of Array of Struct

### What changes were proposed in this pull request?
```SQL
CREATE TABLE `tab1`
(`custom_fields` ARRAY<STRUCT<`id`: BIGINT, `value`: STRING>>)
USING parquet

INSERT INTO `tab1`
SELECT ARRAY(named_struct('id', 1, 'value', 'a'), named_struct('id', 2, 'value', 'b'))

SELECT custom_fields.id, custom_fields.value FROM tab1
```

The above query always return the last struct of the array, because the rule `SimplifyCasts` incorrectly rewrites the query. The underlying cause is we always use the same `GenericInternalRow` object when doing the cast.

### How was this patch tested?

Author: gatorsmile <gatorsmile@gmail.com>

Closes #18412 from gatorsmile/castStruct.

(cherry picked from commit 2e1586f60a77ea0adb6f3f68ba74323f0c242199)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/expressions/Cast.scala |  4 ++--
 .../spark/sql/sources/InsertSuite.scala       | 21 +++++++++++++++++++
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index a53ef426f79b5..43df19ba009a8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -482,15 +482,15 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       case (fromField, toField) => cast(fromField.dataType, toField.dataType)
     }
     // TODO: Could be faster?
-    val newRow = new GenericInternalRow(from.fields.length)
     buildCast[InternalRow](_, row => {
+      val newRow = new GenericInternalRow(from.fields.length)
       var i = 0
       while (i < row.numFields) {
         newRow.update(i,
           if (row.isNullAt(i)) null else castFuncs(i)(row.get(i, from.apply(i).dataType)))
         i += 1
       }
-      newRow.copy()
+      newRow
     })
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index 2eae66dda88de..41abff2a5da25 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -345,4 +345,25 @@ class InsertSuite extends DataSourceTest with SharedSQLContext {
       )
     }
   }
+
+  test("SPARK-21203 wrong results of insertion of Array of Struct") {
+    val tabName = "tab1"
+    withTable(tabName) {
+      spark.sql(
+        """
+          |CREATE TABLE `tab1`
+          |(`custom_fields` ARRAY<STRUCT<`id`: BIGINT, `value`: STRING>>)
+          |USING parquet
+        """.stripMargin)
+      spark.sql(
+        """
+          |INSERT INTO `tab1`
+          |SELECT ARRAY(named_struct('id', 1, 'value', 'a'), named_struct('id', 2, 'value', 'b'))
+        """.stripMargin)
+
+      checkAnswer(
+        spark.sql("SELECT custom_fields.id, custom_fields.value FROM tab1"),
+        Row(Array(1, 2), Array("a", "b")))
+    }
+  }
 }

From 0d6b701e476148b414089830a6be0d804136f73f Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Sat, 24 Jun 2017 22:35:59 +0800
Subject: [PATCH 1044/1204] [SPARK-21203][SQL] Fix wrong results of insertion
 of Array of Struct

### What changes were proposed in this pull request?
```SQL
CREATE TABLE `tab1`
(`custom_fields` ARRAY<STRUCT<`id`: BIGINT, `value`: STRING>>)
USING parquet

INSERT INTO `tab1`
SELECT ARRAY(named_struct('id', 1, 'value', 'a'), named_struct('id', 2, 'value', 'b'))

SELECT custom_fields.id, custom_fields.value FROM tab1
```

The above query always return the last struct of the array, because the rule `SimplifyCasts` incorrectly rewrites the query. The underlying cause is we always use the same `GenericInternalRow` object when doing the cast.

### How was this patch tested?

Author: gatorsmile <gatorsmile@gmail.com>

Closes #18412 from gatorsmile/castStruct.

(cherry picked from commit 2e1586f60a77ea0adb6f3f68ba74323f0c242199)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/expressions/Cast.scala |  4 ++--
 .../spark/sql/sources/InsertSuite.scala       | 21 +++++++++++++++++++
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index f15ae3255ca98..6afe1fa7df12d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -407,15 +407,15 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w
       case (fromField, toField) => cast(fromField.dataType, toField.dataType)
     }
     // TODO: Could be faster?
-    val newRow = new GenericInternalRow(from.fields.length)
     buildCast[InternalRow](_, row => {
+      val newRow = new GenericInternalRow(from.fields.length)
       var i = 0
       while (i < row.numFields) {
         newRow.update(i,
           if (row.isNullAt(i)) null else castFuncs(i)(row.get(i, from.apply(i).dataType)))
         i += 1
       }
-      newRow.copy()
+      newRow
     })
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index 4a85b5975ea53..f00e3c2171d37 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -344,4 +344,25 @@ class InsertSuite extends DataSourceTest with SharedSQLContext {
       )
     }
   }
+
+  test("SPARK-21203 wrong results of insertion of Array of Struct") {
+    val tabName = "tab1"
+    withTable(tabName) {
+      spark.sql(
+        """
+          |CREATE TABLE `tab1`
+          |(`custom_fields` ARRAY<STRUCT<`id`: BIGINT, `value`: STRING>>)
+          |USING parquet
+        """.stripMargin)
+      spark.sql(
+        """
+          |INSERT INTO `tab1`
+          |SELECT ARRAY(named_struct('id', 1, 'value', 'a'), named_struct('id', 2, 'value', 'b'))
+        """.stripMargin)
+
+      checkAnswer(
+        spark.sql("SELECT custom_fields.id, custom_fields.value FROM tab1"),
+        Row(Array(1, 2), Array("a", "b")))
+    }
+  }
 }

From d8e3a4af36f85455548e82ae4acd525f5e52f322 Mon Sep 17 00:00:00 2001
From: Masha Basmanova <mbasmanova@fb.com>
Date: Sat, 24 Jun 2017 22:49:35 -0700
Subject: [PATCH 1045/1204] [SPARK-21079][SQL] Calculate total size of a
 partition table as a sum of individual partitions

## What changes were proposed in this pull request?

Storage URI of a partitioned table may or may not point to a directory under which individual partitions are stored. In fact, individual partitions may be located in totally unrelated directories. Before this change, ANALYZE TABLE table COMPUTE STATISTICS command calculated total size of a table by adding up sizes of files found under table's storage URI. This calculation could produce 0 if partitions are stored elsewhere.

This change uses storage URIs of individual partitions to calculate the sizes of all partitions of a table and adds these up to produce the total size of a table.

CC: wzhfy

## How was this patch tested?

Added unit test.

Ran ANALYZE TABLE xxx COMPUTE STATISTICS on a partitioned Hive table and verified that sizeInBytes is calculated correctly. Before this change, the size would be zero.

Author: Masha Basmanova <mbasmanova@fb.com>

Closes #18309 from mbasmanova/mbasmanova-analyze-part-table.

(cherry picked from commit b449a1d6aa322a50cf221cd7a2ae85a91d6c7e9f)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../command/AnalyzeTableCommand.scala         | 29 ++++++--
 .../spark/sql/hive/StatisticsSuite.scala      | 72 +++++++++++++++++++
 2 files changed, 95 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
index d2ea0cdf61aa6..0f3c69c930acb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution.command
 
+import java.net.URI
+
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.fs.{FileSystem, Path}
@@ -81,6 +83,21 @@ case class AnalyzeTableCommand(
 object AnalyzeTableCommand extends Logging {
 
   def calculateTotalSize(sessionState: SessionState, catalogTable: CatalogTable): Long = {
+    if (catalogTable.partitionColumnNames.isEmpty) {
+      calculateLocationSize(sessionState, catalogTable.identifier, catalogTable.storage.locationUri)
+    } else {
+      // Calculate table size as a sum of the visible partitions. See SPARK-21079
+      val partitions = sessionState.catalog.listPartitions(catalogTable.identifier)
+      partitions.map(p =>
+        calculateLocationSize(sessionState, catalogTable.identifier, p.storage.locationUri)
+      ).sum
+    }
+  }
+
+  private def calculateLocationSize(
+      sessionState: SessionState,
+      tableId: TableIdentifier,
+      locationUri: Option[URI]): Long = {
     // This method is mainly based on
     // org.apache.hadoop.hive.ql.stats.StatsUtils.getFileSizeForTable(HiveConf, Table)
     // in Hive 0.13 (except that we do not use fs.getContentSummary).
@@ -91,13 +108,13 @@ object AnalyzeTableCommand extends Logging {
     // countFileSize to count the table size.
     val stagingDir = sessionState.conf.getConfString("hive.exec.stagingdir", ".hive-staging")
 
-    def calculateTableSize(fs: FileSystem, path: Path): Long = {
+    def calculateLocationSize(fs: FileSystem, path: Path): Long = {
       val fileStatus = fs.getFileStatus(path)
       val size = if (fileStatus.isDirectory) {
         fs.listStatus(path)
           .map { status =>
             if (!status.getPath.getName.startsWith(stagingDir)) {
-              calculateTableSize(fs, status.getPath)
+              calculateLocationSize(fs, status.getPath)
             } else {
               0L
             }
@@ -109,16 +126,16 @@ object AnalyzeTableCommand extends Logging {
       size
     }
 
-    catalogTable.storage.locationUri.map { p =>
+    locationUri.map { p =>
       val path = new Path(p)
       try {
         val fs = path.getFileSystem(sessionState.newHadoopConf())
-        calculateTableSize(fs, path)
+        calculateLocationSize(fs, path)
       } catch {
         case NonFatal(e) =>
           logWarning(
-            s"Failed to get the size of table ${catalogTable.identifier.table} in the " +
-              s"database ${catalogTable.identifier.database} because of ${e.toString}", e)
+            s"Failed to get the size of table ${tableId.table} in the " +
+              s"database ${tableId.database} because of ${e.toString}", e)
           0L
       }
     }.getOrElse(0L)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index 3191b9975fbf9..b03d69e8254cd 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -30,6 +30,7 @@ import org.apache.spark.sql.execution.joins._
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
+import org.apache.spark.util.Utils
 
 class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleton {
 
@@ -125,6 +126,77 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
       TableIdentifier("tempTable"), ignoreIfNotExists = true, purge = false)
   }
 
+  test("SPARK-21079 - analyze table with location different than that of individual partitions") {
+    def queryTotalSize(tableName: String): BigInt =
+      spark.table(tableName).queryExecution.analyzed.stats(conf).sizeInBytes
+
+    val tableName = "analyzeTable_part"
+    withTable(tableName) {
+      withTempPath { path =>
+        sql(s"CREATE TABLE $tableName (key STRING, value STRING) PARTITIONED BY (ds STRING)")
+
+        val partitionDates = List("2010-01-01", "2010-01-02", "2010-01-03")
+        partitionDates.foreach { ds =>
+          sql(s"INSERT INTO TABLE $tableName PARTITION (ds='$ds') SELECT * FROM src")
+        }
+
+        sql(s"ALTER TABLE $tableName SET LOCATION '$path'")
+
+        sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS noscan")
+
+        assert(queryTotalSize(tableName) === BigInt(17436))
+      }
+    }
+  }
+
+  test("SPARK-21079 - analyze partitioned table with only a subset of partitions visible") {
+    def queryTotalSize(tableName: String): BigInt =
+      spark.table(tableName).queryExecution.analyzed.stats(conf).sizeInBytes
+
+    val sourceTableName = "analyzeTable_part"
+    val tableName = "analyzeTable_part_vis"
+    withTable(sourceTableName, tableName) {
+      withTempPath { path =>
+          // Create a table with 3 partitions all located under a single top-level directory 'path'
+          sql(
+            s"""
+               |CREATE TABLE $sourceTableName (key STRING, value STRING)
+               |PARTITIONED BY (ds STRING)
+               |LOCATION '$path'
+             """.stripMargin)
+
+          val partitionDates = List("2010-01-01", "2010-01-02", "2010-01-03")
+          partitionDates.foreach { ds =>
+              sql(
+                s"""
+                   |INSERT INTO TABLE $sourceTableName PARTITION (ds='$ds')
+                   |SELECT * FROM src
+                 """.stripMargin)
+          }
+
+          // Create another table referring to the same location
+          sql(
+            s"""
+               |CREATE TABLE $tableName (key STRING, value STRING)
+               |PARTITIONED BY (ds STRING)
+               |LOCATION '$path'
+             """.stripMargin)
+
+          // Register only one of the partitions found on disk
+          val ds = partitionDates.head
+          sql(s"ALTER TABLE $tableName ADD PARTITION (ds='$ds')").collect()
+
+          // Analyze original table - expect 3 partitions
+          sql(s"ANALYZE TABLE $sourceTableName COMPUTE STATISTICS noscan")
+          assert(queryTotalSize(sourceTableName) === BigInt(3 * 5812))
+
+          // Analyze partial-copy table - expect only 1 partition
+          sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS noscan")
+          assert(queryTotalSize(tableName) === BigInt(5812))
+        }
+    }
+  }
+
   test("analyzing views is not supported") {
     def assertAnalyzeUnsupported(analyzeCommand: String): Unit = {
       val err = intercept[AnalysisException] {

From 26f4f340ce51815b77af8bab0ea077928438a5d9 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Sun, 25 Jun 2017 14:22:16 +0800
Subject: [PATCH 1046/1204] Revert "[SPARK-18016][SQL][CATALYST][BRANCH-2.1]
 Code Generation: Constant Pool Limit - Class Splitting"

This reverts commit 6b37c863848d5991821aa2c3233754ca3061f4d6.
---
 sql/catalyst/pom.xml                          |   7 -
 .../sql/catalyst/expressions/ScalaUDF.scala   |   6 +-
 .../expressions/codegen/CodeGenerator.scala   | 159 ++++--------------
 .../codegen/GenerateMutableProjection.scala   |  17 +-
 .../codegen/GenerateOrdering.scala            |   3 -
 .../codegen/GeneratePredicate.scala           |   3 -
 .../codegen/GenerateSafeProjection.scala      |   9 +-
 .../codegen/GenerateUnsafeProjection.scala    |   9 +-
 .../expressions/complexTypeCreator.scala      |  18 +-
 .../expressions/conditionalExpressions.scala  |   4 +-
 .../expressions/objects/objects.scala         |   2 +-
 .../codegen/GeneratedProjectionSuite.scala    |  72 ++------
 .../sql/execution/DataSourceScanExec.scala    |   6 +-
 .../apache/spark/sql/execution/SortExec.scala |   4 +-
 .../sql/execution/WholeStageCodegenExec.scala |   3 -
 .../aggregate/HashAggregateExec.scala         |   8 +-
 .../execution/basicPhysicalOperators.scala    |  10 +-
 .../columnar/GenerateColumnAccessor.scala     |  19 +--
 .../execution/joins/SortMergeJoinExec.scala   |   2 +-
 .../apache/spark/sql/execution/limit.scala    |   2 +-
 20 files changed, 96 insertions(+), 267 deletions(-)

diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 66c0ff09ea4a2..4b4a8eb3815e1 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -126,13 +126,6 @@
           </execution>
         </executions>
       </plugin>
-      <plugin>
-        <groupId>org.scalatest</groupId>
-        <artifactId>scalatest-maven-plugin</artifactId>
-        <configuration>
-          <argLine>-Xmx4g -Xss4096k -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=512m</argLine>
-        </configuration>
-      </plugin>
       <plugin>
         <groupId>org.antlr</groupId>
         <artifactId>antlr4-maven-plugin</artifactId>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
index 5c68f9ffc691c..228f4b756c8b4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
@@ -988,7 +988,7 @@ case class ScalaUDF(
     val converterTerm = ctx.freshName("converter")
     val expressionIdx = ctx.references.size - 1
     ctx.addMutableState(converterClassName, converterTerm,
-      s"$converterTerm = ($converterClassName)$typeConvertersClassName" +
+      s"this.$converterTerm = ($converterClassName)$typeConvertersClassName" +
         s".createToScalaConverter(((${expressionClassName})((($scalaUDFClassName)" +
           s"references[$expressionIdx]).getChildren().apply($index))).dataType());")
     converterTerm
@@ -1005,7 +1005,7 @@ case class ScalaUDF(
     // Generate codes used to convert the returned value of user-defined functions to Catalyst type
     val catalystConverterTerm = ctx.freshName("catalystConverter")
     ctx.addMutableState(converterClassName, catalystConverterTerm,
-      s"$catalystConverterTerm = ($converterClassName)$typeConvertersClassName" +
+      s"this.$catalystConverterTerm = ($converterClassName)$typeConvertersClassName" +
         s".createToCatalystConverter($scalaUDF.dataType());")
 
     val resultTerm = ctx.freshName("result")
@@ -1019,7 +1019,7 @@ case class ScalaUDF(
 
     val funcTerm = ctx.freshName("udf")
     ctx.addMutableState(funcClassName, funcTerm,
-      s"$funcTerm = ($funcClassName)$scalaUDF.userDefinedFunc();")
+      s"this.$funcTerm = ($funcClassName)$scalaUDF.userDefinedFunc();")
 
     // codegen for children expressions
     val evals = children.map(_.genCode(ctx))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 22ce3f7e7c52e..683b9cbb343c8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -109,7 +109,7 @@ class CodegenContext {
     val idx = references.length
     references += obj
     val clsName = Option(className).getOrElse(obj.getClass.getName)
-    addMutableState(clsName, term, s"$term = ($clsName) references[$idx];")
+    addMutableState(clsName, term, s"this.$term = ($clsName) references[$idx];")
     term
   }
 
@@ -198,139 +198,41 @@ class CodegenContext {
     partitionInitializationStatements.mkString("\n")
   }
 
- /**
-  * Holds expressions that are equivalent. Used to perform subexpression elimination
-  * during codegen.
-  *
-  * For expressions that appear more than once, generate additional code to prevent
-  * recomputing the value.
-  *
-  * For example, consider two expression generated from this SQL statement:
-  *  SELECT (col1 + col2), (col1 + col2) / col3.
-  *
-  *  equivalentExpressions will match the tree containing `col1 + col2` and it will only
-  *  be evaluated once.
-  */
-  val equivalentExpressions: EquivalentExpressions = new EquivalentExpressions
-
-  // Foreach expression that is participating in subexpression elimination, the state to use.
-  val subExprEliminationExprs = mutable.HashMap.empty[Expression, SubExprEliminationState]
-
-  // The collection of sub-expression result resetting methods that need to be called on each row.
-  val subexprFunctions = mutable.ArrayBuffer.empty[String]
-
-  private val outerClassName = "OuterClass"
-
   /**
-   * Holds the class and instance names to be generated, where `OuterClass` is a placeholder
-   * standing for whichever class is generated as the outermost class and which will contain any
-   * nested sub-classes. All other classes and instance names in this list will represent private,
-   * nested sub-classes.
+   * Holding all the functions those will be added into generated class.
    */
-  private val classes: mutable.ListBuffer[(String, String)] =
-    mutable.ListBuffer[(String, String)](outerClassName -> null)
-
-  // A map holding the current size in bytes of each class to be generated.
-  private val classSize: mutable.Map[String, Int] =
-    mutable.Map[String, Int](outerClassName -> 0)
-
-  // Nested maps holding function names and their code belonging to each class.
-  private val classFunctions: mutable.Map[String, mutable.Map[String, String]] =
-    mutable.Map(outerClassName -> mutable.Map.empty[String, String])
+  val addedFunctions: mutable.Map[String, String] =
+    mutable.Map.empty[String, String]
 
-  // Returns the size of the most recently added class.
-  private def currClassSize(): Int = classSize(classes.head._1)
-
-  // Returns the class name and instance name for the most recently added class.
-  private def currClass(): (String, String) = classes.head
-
-  // Adds a new class. Requires the class' name, and its instance name.
-  private def addClass(className: String, classInstance: String): Unit = {
-    classes.prepend(className -> classInstance)
-    classSize += className -> 0
-    classFunctions += className -> mutable.Map.empty[String, String]
+  def addNewFunction(funcName: String, funcCode: String): Unit = {
+    addedFunctions += ((funcName, funcCode))
   }
 
   /**
-   * Adds a function to the generated class. If the code for the `OuterClass` grows too large, the
-   * function will be inlined into a new private, nested class, and a instance-qualified name for
-   * the function will be returned. Otherwise, the function will be inlined to the `OuterClass` the
-   * simple `funcName` will be returned.
+   * Holds expressions that are equivalent. Used to perform subexpression elimination
+   * during codegen.
+   *
+   * For expressions that appear more than once, generate additional code to prevent
+   * recomputing the value.
    *
-   * @param funcName the class-unqualified name of the function
-   * @param funcCode the body of the function
-   * @param inlineToOuterClass whether the given code must be inlined to the `OuterClass`. This
-   *                           can be necessary when a function is declared outside of the context
-   *                           it is eventually referenced and a returned qualified function name
-   *                           cannot otherwise be accessed.
-   * @return the name of the function, qualified by class if it will be inlined to a private,
-   *         nested sub-class
+   * For example, consider two expression generated from this SQL statement:
+   *  SELECT (col1 + col2), (col1 + col2) / col3.
+   *
+   *  equivalentExpressions will match the tree containing `col1 + col2` and it will only
+   *  be evaluated once.
    */
-  def addNewFunction(
-      funcName: String,
-      funcCode: String,
-      inlineToOuterClass: Boolean = false): String = {
-    // The number of named constants that can exist in the class is limited by the Constant Pool
-    // limit, 65,536. We cannot know how many constants will be inserted for a class, so we use a
-    // threshold of 1600k bytes to determine when a function should be inlined to a private, nested
-    // sub-class.
-    val (className, classInstance) = if (inlineToOuterClass) {
-      outerClassName -> ""
-    } else if (currClassSize > 1600000) {
-      val className = freshName("NestedClass")
-      val classInstance = freshName("nestedClassInstance")
-
-      addClass(className, classInstance)
-
-      className -> classInstance
-    } else {
-      currClass()
-    }
-
-    classSize(className) += funcCode.length
-    classFunctions(className) += funcName -> funcCode
-
-    if (className == outerClassName) {
-      funcName
-    } else {
+  val equivalentExpressions: EquivalentExpressions = new EquivalentExpressions
 
-      s"$classInstance.$funcName"
-    }
-  }
+  // Foreach expression that is participating in subexpression elimination, the state to use.
+  val subExprEliminationExprs = mutable.HashMap.empty[Expression, SubExprEliminationState]
 
-  /**
-   * Instantiates all nested, private sub-classes as objects to the `OuterClass`
-   */
-  private[sql] def initNestedClasses(): String = {
-    // Nested, private sub-classes have no mutable state (though they do reference the outer class'
-    // mutable state), so we declare and initialize them inline to the OuterClass.
-    classes.filter(_._1 != outerClassName).map {
-      case (className, classInstance) =>
-        s"private $className $classInstance = new $className();"
-    }.mkString("\n")
-  }
+  // The collection of sub-expression result resetting methods that need to be called on each row.
+  val subexprFunctions = mutable.ArrayBuffer.empty[String]
 
-  /**
-   * Declares all function code that should be inlined to the `OuterClass`.
-   */
-  private[sql] def declareAddedFunctions(): String = {
-    classFunctions(outerClassName).values.mkString("\n")
+  def declareAddedFunctions(): String = {
+    addedFunctions.map { case (funcName, funcCode) => funcCode }.mkString("\n")
   }
 
-  /**
-   * Declares all nested, private sub-classes and the function code that should be inlined to them.
-   */
-  private[sql] def declareNestedClasses(): String = {
-    classFunctions.filterKeys(_ != outerClassName).map {
-      case (className, functions) =>
-        s"""
-           |private class $className {
-           |  ${functions.values.mkString("\n")}
-           |}
-         """.stripMargin
-    }
-  }.mkString("\n")
-
   final val JAVA_BOOLEAN = "boolean"
   final val JAVA_BYTE = "byte"
   final val JAVA_SHORT = "short"
@@ -650,7 +552,8 @@ class CodegenContext {
             return 0;
           }
         """
-      s"${addNewFunction(compareFunc, funcCode)}($c1, $c2)"
+      addNewFunction(compareFunc, funcCode)
+      s"this.$compareFunc($c1, $c2)"
     case schema: StructType =>
       val comparisons = GenerateOrdering.genComparisons(this, schema)
       val compareFunc = freshName("compareStruct")
@@ -666,7 +569,8 @@ class CodegenContext {
             return 0;
           }
         """
-      s"${addNewFunction(compareFunc, funcCode)}($c1, $c2)"
+      addNewFunction(compareFunc, funcCode)
+      s"this.$compareFunc($c1, $c2)"
     case other if other.isInstanceOf[AtomicType] => s"$c1.compare($c2)"
     case udt: UserDefinedType[_] => genComp(udt.sqlType, c1, c2)
     case _ =>
@@ -736,9 +640,7 @@ class CodegenContext {
 
   /**
    * Splits the generated code of expressions into multiple functions, because function has
-   * 64kb code size limit in JVM. If the class to which the function would be inlined would grow
-   * beyond 1600kb, we declare a private, nested sub-class, and the function is inlined to it
-   * instead, because classes have a constant pool limit of 65,536 named values.
+   * 64kb code size limit in JVM
    *
    * @param expressions the codes to evaluate expressions.
    * @param funcName the split function name base.
@@ -783,6 +685,7 @@ class CodegenContext {
            |}
          """.stripMargin
         addNewFunction(name, code)
+        name
       }
 
       foldFunctions(functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")})"))
@@ -866,6 +769,8 @@ class CodegenContext {
            |}
            """.stripMargin
 
+      addNewFunction(fnName, fn)
+
       // Add a state and a mapping of the common subexpressions that are associate with this
       // state. Adding this expression to subExprEliminationExprMap means it will call `fn`
       // when it is code generated. This decision should be a cost based one.
@@ -886,7 +791,7 @@ class CodegenContext {
       addMutableState(javaType(expr.dataType), value,
         s"$value = ${defaultValue(expr.dataType)};")
 
-      subexprFunctions += s"${addNewFunction(fnName, fn)}($INPUT_ROW);"
+      subexprFunctions += s"$fnName($INPUT_ROW);"
       val state = SubExprEliminationState(isNull, value)
       e.foreach(subExprEliminationExprs.put(_, state))
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
index 635766835029b..4d732445544a8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
@@ -63,21 +63,21 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
         if (e.nullable) {
           val isNull = s"isNull_$i"
           val value = s"value_$i"
-          ctx.addMutableState("boolean", isNull, s"$isNull = true;")
+          ctx.addMutableState("boolean", isNull, s"this.$isNull = true;")
           ctx.addMutableState(ctx.javaType(e.dataType), value,
-            s"$value = ${ctx.defaultValue(e.dataType)};")
+            s"this.$value = ${ctx.defaultValue(e.dataType)};")
           s"""
             ${ev.code}
-            $isNull = ${ev.isNull};
-            $value = ${ev.value};
+            this.$isNull = ${ev.isNull};
+            this.$value = ${ev.value};
            """
         } else {
           val value = s"value_$i"
           ctx.addMutableState(ctx.javaType(e.dataType), value,
-            s"$value = ${ctx.defaultValue(e.dataType)};")
+            s"this.$value = ${ctx.defaultValue(e.dataType)};")
           s"""
             ${ev.code}
-            $value = ${ev.value};
+            this.$value = ${ev.value};
            """
         }
     }
@@ -87,7 +87,7 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
 
     val updates = validExpr.zip(index).map {
       case (e, i) =>
-        val ev = ExprCode("", s"isNull_$i", s"value_$i")
+        val ev = ExprCode("", s"this.isNull_$i", s"this.value_$i")
         ctx.updateColumn("mutableRow", e.dataType, i, ev, e.nullable)
     }
 
@@ -135,9 +135,6 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
           $allUpdates
           return mutableRow;
         }
-
-        ${ctx.initNestedClasses()}
-        ${ctx.declareNestedClasses()}
       }
     """
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
index a31943255b995..f7fc2d54a047b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
@@ -179,9 +179,6 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
           $comparisons
           return 0;
         }
-
-        ${ctx.initNestedClasses()}
-        ${ctx.declareNestedClasses()}
       }"""
 
     val code = CodeFormatter.stripOverlappingComments(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
index b400783bb5e55..dcd1ed96a298e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
@@ -72,9 +72,6 @@ object GeneratePredicate extends CodeGenerator[Expression, Predicate] {
           ${eval.code}
           return !${eval.isNull} && ${eval.value};
         }
-
-        ${ctx.initNestedClasses()}
-        ${ctx.declareNestedClasses()}
       }"""
 
     val code = CodeFormatter.stripOverlappingComments(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
index f708aeff2b146..b1cb6edefb852 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
@@ -49,7 +49,7 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
     val output = ctx.freshName("safeRow")
     val values = ctx.freshName("values")
     // These expressions could be split into multiple functions
-    ctx.addMutableState("Object[]", values, s"$values = null;")
+    ctx.addMutableState("Object[]", values, s"this.$values = null;")
 
     val rowClass = classOf[GenericInternalRow].getName
 
@@ -65,10 +65,10 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
     val allFields = ctx.splitExpressions(tmp, fieldWriters)
     val code = s"""
       final InternalRow $tmp = $input;
-      $values = new Object[${schema.length}];
+      this.$values = new Object[${schema.length}];
       $allFields
       final InternalRow $output = new $rowClass($values);
-      $values = null;
+      this.$values = null;
     """
 
     ExprCode(code, "false", output)
@@ -184,9 +184,6 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection]
           $allExpressions
           return mutableRow;
         }
-
-        ${ctx.initNestedClasses()}
-        ${ctx.declareNestedClasses()}
       }
     """
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
index febfe3124f2bd..b358102d914bd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
@@ -82,7 +82,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
     val rowWriterClass = classOf[UnsafeRowWriter].getName
     val rowWriter = ctx.freshName("rowWriter")
     ctx.addMutableState(rowWriterClass, rowWriter,
-      s"$rowWriter = new $rowWriterClass($bufferHolder, ${inputs.length});")
+      s"this.$rowWriter = new $rowWriterClass($bufferHolder, ${inputs.length});")
 
     val resetWriter = if (isTopLevel) {
       // For top level row writer, it always writes to the beginning of the global buffer holder,
@@ -182,7 +182,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
     val arrayWriterClass = classOf[UnsafeArrayWriter].getName
     val arrayWriter = ctx.freshName("arrayWriter")
     ctx.addMutableState(arrayWriterClass, arrayWriter,
-      s"$arrayWriter = new $arrayWriterClass();")
+      s"this.$arrayWriter = new $arrayWriterClass();")
     val numElements = ctx.freshName("numElements")
     val index = ctx.freshName("index")
     val element = ctx.freshName("element")
@@ -321,7 +321,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
     val holder = ctx.freshName("holder")
     val holderClass = classOf[BufferHolder].getName
     ctx.addMutableState(holderClass, holder,
-      s"$holder = new $holderClass($result, ${numVarLenFields * 32});")
+      s"this.$holder = new $holderClass($result, ${numVarLenFields * 32});")
 
     val resetBufferHolder = if (numVarLenFields == 0) {
       ""
@@ -402,9 +402,6 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
           ${eval.code.trim}
           return ${eval.value};
         }
-
-        ${ctx.initNestedClasses()}
-        ${ctx.declareNestedClasses()}
       }
       """
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index 04e32bda6b0d4..3df2ed8be0650 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -58,10 +58,10 @@ case class CreateArray(children: Seq[Expression]) extends Expression {
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val arrayClass = classOf[GenericArrayData].getName
     val values = ctx.freshName("values")
-    ctx.addMutableState("Object[]", values, s"$values = null;")
+    ctx.addMutableState("Object[]", values, s"this.$values = null;")
 
     ev.copy(code = s"""
-      $values = new Object[${children.size}];""" +
+      this.$values = new Object[${children.size}];""" +
       ctx.splitExpressions(
         ctx.INPUT_ROW,
         children.zipWithIndex.map { case (e, i) =>
@@ -76,7 +76,7 @@ case class CreateArray(children: Seq[Expression]) extends Expression {
         }) +
       s"""
         final ArrayData ${ev.value} = new $arrayClass($values);
-        $values = null;
+        this.$values = null;
       """, isNull = "false")
   }
 
@@ -137,8 +137,8 @@ case class CreateMap(children: Seq[Expression]) extends Expression {
     val mapClass = classOf[ArrayBasedMapData].getName
     val keyArray = ctx.freshName("keyArray")
     val valueArray = ctx.freshName("valueArray")
-    ctx.addMutableState("Object[]", keyArray, s"$keyArray = null;")
-    ctx.addMutableState("Object[]", valueArray, s"$valueArray = null;")
+    ctx.addMutableState("Object[]", keyArray, s"this.$keyArray = null;")
+    ctx.addMutableState("Object[]", valueArray, s"this.$valueArray = null;")
 
     val keyData = s"new $arrayClass($keyArray)"
     val valueData = s"new $arrayClass($valueArray)"
@@ -173,8 +173,8 @@ case class CreateMap(children: Seq[Expression]) extends Expression {
         }) +
       s"""
         final MapData ${ev.value} = new $mapClass($keyData, $valueData);
-        $keyArray = null;
-        $valueArray = null;
+        this.$keyArray = null;
+        this.$valueArray = null;
       """, isNull = "false")
   }
 
@@ -296,7 +296,7 @@ case class CreateNamedStruct(children: Seq[Expression]) extends CreateNamedStruc
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val rowClass = classOf[GenericInternalRow].getName
     val values = ctx.freshName("values")
-    ctx.addMutableState("Object[]", values, s"$values = null;")
+    ctx.addMutableState("Object[]", values, s"this.$values = null;")
 
     ev.copy(code = s"""
       $values = new Object[${valExprs.size}];""" +
@@ -313,7 +313,7 @@ case class CreateNamedStruct(children: Seq[Expression]) extends CreateNamedStruc
         }) +
       s"""
         final InternalRow ${ev.value} = new $rowClass($values);
-        $values = null;
+        this.$values = null;
       """, isNull = "false")
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
index 092c5de08df70..bacedec1ae203 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
@@ -131,8 +131,8 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi
          |  $globalValue = ${ev.value};
          |}
          """.stripMargin
-    val fullFuncName = ctx.addNewFunction(funcName, funcBody)
-    (fullFuncName, globalIsNull, globalValue)
+    ctx.addNewFunction(funcName, funcBody)
+    (funcName, globalIsNull, globalValue)
   }
 
   override def toString: String = s"if ($predicate) $trueValue else $falseValue"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 5009bf8e96e83..256de74d410e4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -912,7 +912,7 @@ case class InitializeJavaBean(beanInstance: Expression, setters: Map[String, Exp
 
     val code = s"""
       ${instanceGen.code}
-      ${javaBeanInstance} = ${instanceGen.value};
+      this.${javaBeanInstance} = ${instanceGen.value};
       if (!${instanceGen.isNull}) {
         $initializeCode
       }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
index 7bfdf550bc376..b69b74b4240bd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
@@ -33,10 +33,10 @@ class GeneratedProjectionSuite extends SparkFunSuite {
 
   test("generated projections on wider table") {
     val N = 1000
-    val wideRow1 = new GenericInternalRow((0 until N).toArray[Any])
+    val wideRow1 = new GenericInternalRow((1 to N).toArray[Any])
     val schema1 = StructType((1 to N).map(i => StructField("", IntegerType)))
     val wideRow2 = new GenericInternalRow(
-      (0 until N).map(i => UTF8String.fromString(i.toString)).toArray[Any])
+      (1 to N).map(i => UTF8String.fromString(i.toString)).toArray[Any])
     val schema2 = StructType((1 to N).map(i => StructField("", StringType)))
     val joined = new JoinedRow(wideRow1, wideRow2)
     val joinedSchema = StructType(schema1 ++ schema2)
@@ -48,12 +48,12 @@ class GeneratedProjectionSuite extends SparkFunSuite {
     val unsafeProj = UnsafeProjection.create(nestedSchema)
     val unsafe: UnsafeRow = unsafeProj(nested)
     (0 until N).foreach { i =>
-      val s = UTF8String.fromString(i.toString)
-      assert(i === unsafe.getInt(i + 2))
+      val s = UTF8String.fromString((i + 1).toString)
+      assert(i + 1 === unsafe.getInt(i + 2))
       assert(s === unsafe.getUTF8String(i + 2 + N))
-      assert(i === unsafe.getStruct(0, N * 2).getInt(i))
+      assert(i + 1 === unsafe.getStruct(0, N * 2).getInt(i))
       assert(s === unsafe.getStruct(0, N * 2).getUTF8String(i + N))
-      assert(i === unsafe.getStruct(1, N * 2).getInt(i))
+      assert(i + 1 === unsafe.getStruct(1, N * 2).getInt(i))
       assert(s === unsafe.getStruct(1, N * 2).getUTF8String(i + N))
     }
 
@@ -62,63 +62,13 @@ class GeneratedProjectionSuite extends SparkFunSuite {
     val result = safeProj(unsafe)
     // Can't compare GenericInternalRow with JoinedRow directly
     (0 until N).foreach { i =>
-      val s = UTF8String.fromString(i.toString)
-      assert(i === result.getInt(i + 2))
+      val r = i + 1
+      val s = UTF8String.fromString((i + 1).toString)
+      assert(r === result.getInt(i + 2))
       assert(s === result.getUTF8String(i + 2 + N))
-      assert(i === result.getStruct(0, N * 2).getInt(i))
+      assert(r === result.getStruct(0, N * 2).getInt(i))
       assert(s === result.getStruct(0, N * 2).getUTF8String(i + N))
-      assert(i === result.getStruct(1, N * 2).getInt(i))
-      assert(s === result.getStruct(1, N * 2).getUTF8String(i + N))
-    }
-
-    // test generated MutableProjection
-    val exprs = nestedSchema.fields.zipWithIndex.map { case (f, i) =>
-      BoundReference(i, f.dataType, true)
-    }
-    val mutableProj = GenerateMutableProjection.generate(exprs)
-    val row1 = mutableProj(result)
-    assert(result === row1)
-    val row2 = mutableProj(result)
-    assert(result === row2)
-  }
-
-   test("generated projections on wider table requiring class-splitting") {
-    val N = 4000
-    val wideRow1 = new GenericInternalRow((0 until N).toArray[Any])
-    val schema1 = StructType((1 to N).map(i => StructField("", IntegerType)))
-    val wideRow2 = new GenericInternalRow(
-      (0 until N).map(i => UTF8String.fromString(i.toString)).toArray[Any])
-    val schema2 = StructType((1 to N).map(i => StructField("", StringType)))
-    val joined = new JoinedRow(wideRow1, wideRow2)
-    val joinedSchema = StructType(schema1 ++ schema2)
-    val nested = new JoinedRow(InternalRow(joined, joined), joined)
-    val nestedSchema = StructType(
-      Seq(StructField("", joinedSchema), StructField("", joinedSchema)) ++ joinedSchema)
-
-    // test generated UnsafeProjection
-    val unsafeProj = UnsafeProjection.create(nestedSchema)
-    val unsafe: UnsafeRow = unsafeProj(nested)
-    (0 until N).foreach { i =>
-      val s = UTF8String.fromString(i.toString)
-      assert(i === unsafe.getInt(i + 2))
-      assert(s === unsafe.getUTF8String(i + 2 + N))
-      assert(i === unsafe.getStruct(0, N * 2).getInt(i))
-      assert(s === unsafe.getStruct(0, N * 2).getUTF8String(i + N))
-      assert(i === unsafe.getStruct(1, N * 2).getInt(i))
-      assert(s === unsafe.getStruct(1, N * 2).getUTF8String(i + N))
-    }
-
-    // test generated SafeProjection
-    val safeProj = FromUnsafeProjection(nestedSchema)
-    val result = safeProj(unsafe)
-    // Can't compare GenericInternalRow with JoinedRow directly
-    (0 until N).foreach { i =>
-      val s = UTF8String.fromString(i.toString)
-      assert(i === result.getInt(i + 2))
-      assert(s === result.getUTF8String(i + 2 + N))
-      assert(i === result.getStruct(0, N * 2).getInt(i))
-      assert(s === result.getStruct(0, N * 2).getUTF8String(i + N))
-      assert(i === result.getStruct(1, N * 2).getInt(i))
+      assert(r === result.getStruct(1, N * 2).getInt(i))
       assert(s === result.getStruct(1, N * 2).getUTF8String(i + N))
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index 0cfdc83573936..b4aed23218357 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -363,7 +363,7 @@ case class FileSourceScanExec(
     }
 
     val nextBatch = ctx.freshName("nextBatch")
-    val nextBatchFuncName = ctx.addNewFunction(nextBatch,
+    ctx.addNewFunction(nextBatch,
       s"""
          |private void $nextBatch() throws java.io.IOException {
          |  long getBatchStart = System.nanoTime();
@@ -383,7 +383,7 @@ case class FileSourceScanExec(
     }
     s"""
        |if ($batch == null) {
-       |  $nextBatchFuncName();
+       |  $nextBatch();
        |}
        |while ($batch != null) {
        |  int numRows = $batch.numRows();
@@ -393,7 +393,7 @@ case class FileSourceScanExec(
        |    if (shouldStop()) return;
        |  }
        |  $batch = null;
-       |  $nextBatchFuncName();
+       |  $nextBatch();
        |}
        |$scanTimeMetric.add($scanTimeTotalNs / (1000 * 1000));
        |$scanTimeTotalNs = 0;
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala
index 9d3dbc2571610..cc576bbc4c802 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala
@@ -141,7 +141,7 @@ case class SortExec(
     ctx.addMutableState("scala.collection.Iterator<UnsafeRow>", sortedIterator, "")
 
     val addToSorter = ctx.freshName("addToSorter")
-    val addToSorterFuncName = ctx.addNewFunction(addToSorter,
+    ctx.addNewFunction(addToSorter,
       s"""
         | private void $addToSorter() throws java.io.IOException {
         |   ${child.asInstanceOf[CodegenSupport].produce(ctx, this)}
@@ -160,7 +160,7 @@ case class SortExec(
     s"""
        | if ($needToSort) {
        |   long $spillSizeBefore = $metrics.memoryBytesSpilled();
-       |   $addToSorterFuncName();
+       |   $addToSorter();
        |   $sortedIterator = $sorterVariable.sort();
        |   $sortTime.add($sorterVariable.getSortTimeNanos() / 1000000);
        |   $peakMemory.add($sorterVariable.getPeakMemoryUsage());
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index f3931b8e47d15..2ead8f6baae6b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -339,9 +339,6 @@ case class WholeStageCodegenExec(child: SparkPlan) extends UnaryExecNode with Co
         protected void processNext() throws java.io.IOException {
           ${code.trim}
         }
-
-        ${ctx.initNestedClasses()}
-        ${ctx.declareNestedClasses()}
       }
       """.trim
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
index 1c6d4f8b18fa5..4529ed067e565 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -209,7 +209,7 @@ case class HashAggregateExec(
     }
 
     val doAgg = ctx.freshName("doAggregateWithoutKey")
-    val doAggFuncName = ctx.addNewFunction(doAgg,
+    ctx.addNewFunction(doAgg,
       s"""
          | private void $doAgg() throws java.io.IOException {
          |   // initialize aggregation buffer
@@ -226,7 +226,7 @@ case class HashAggregateExec(
        | while (!$initAgg) {
        |   $initAgg = true;
        |   long $beforeAgg = System.nanoTime();
-       |   $doAggFuncName();
+       |   $doAgg();
        |   $aggTime.add((System.nanoTime() - $beforeAgg) / 1000000);
        |
        |   // output the result
@@ -590,7 +590,7 @@ case class HashAggregateExec(
       } else ""
     }
 
-    val doAggFuncName = ctx.addNewFunction(doAgg,
+    ctx.addNewFunction(doAgg,
       s"""
         ${generateGenerateCode}
         private void $doAgg() throws java.io.IOException {
@@ -670,7 +670,7 @@ case class HashAggregateExec(
      if (!$initAgg) {
        $initAgg = true;
        long $beforeAgg = System.nanoTime();
-       $doAggFuncName();
+       $doAgg();
        $aggTime.add((System.nanoTime() - $beforeAgg) / 1000000);
      }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index 6176e6d55f784..b00223a86d4d4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -281,8 +281,10 @@ case class SampleExec(
       val samplerClass = classOf[PoissonSampler[UnsafeRow]].getName
       val initSampler = ctx.freshName("initSampler")
       ctx.copyResult = true
+      ctx.addMutableState(s"$samplerClass<UnsafeRow>", sampler,
+        s"$initSampler();")
 
-      val initSamplerFuncName = ctx.addNewFunction(initSampler,
+      ctx.addNewFunction(initSampler,
         s"""
           | private void $initSampler() {
           |   $sampler = new $samplerClass<UnsafeRow>($upperBound - $lowerBound, false);
@@ -297,8 +299,6 @@ case class SampleExec(
           | }
          """.stripMargin.trim)
 
-      ctx.addMutableState(s"$samplerClass<UnsafeRow>", sampler, s"$initSamplerFuncName();")
-
       val samplingCount = ctx.freshName("samplingCount")
       s"""
          | int $samplingCount = $sampler.sample();
@@ -370,7 +370,7 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range)
       s"$number > $partitionEnd"
     }
 
-    val initRangeFuncName = ctx.addNewFunction("initRange",
+    ctx.addNewFunction("initRange",
       s"""
         | private void initRange(int idx) {
         |   $BigInt index = $BigInt.valueOf(idx);
@@ -409,7 +409,7 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range)
       | // initialize Range
       | if (!$initTerm) {
       |   $initTerm = true;
-      |   $initRangeFuncName(partitionIndex);
+      |   initRange(partitionIndex);
       | }
       |
       | while (!$overflow && $checkEnd) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
index f4566496fca5a..14024d6c10558 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
@@ -128,7 +128,9 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
       } else {
         val groupedAccessorsItr = initializeAccessors.grouped(numberOfStatementsThreshold)
         val groupedExtractorsItr = extractors.grouped(numberOfStatementsThreshold)
-        val accessorNames = groupedAccessorsItr.zipWithIndex.map { case (body, i) =>
+        var groupedAccessorsLength = 0
+        groupedAccessorsItr.zipWithIndex.foreach { case (body, i) =>
+          groupedAccessorsLength += 1
           val funcName = s"accessors$i"
           val funcCode = s"""
              |private void $funcName() {
@@ -137,7 +139,7 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
            """.stripMargin
           ctx.addNewFunction(funcName, funcCode)
         }
-        val extractorNames = groupedExtractorsItr.zipWithIndex.map { case (body, i) =>
+        groupedExtractorsItr.zipWithIndex.foreach { case (body, i) =>
           val funcName = s"extractors$i"
           val funcCode = s"""
              |private void $funcName() {
@@ -146,8 +148,8 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
            """.stripMargin
           ctx.addNewFunction(funcName, funcCode)
         }
-        (accessorNames.map { accessorName => s"$accessorName();" }.mkString("\n"),
-         extractorNames.map { extractorName => s"$extractorName();" }.mkString("\n"))
+        ((0 to groupedAccessorsLength - 1).map { i => s"accessors$i();" }.mkString("\n"),
+         (0 to groupedAccessorsLength - 1).map { i => s"extractors$i();" }.mkString("\n"))
       }
 
     val codeBody = s"""
@@ -182,9 +184,9 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
         ${ctx.declareMutableStates()}
 
         public SpecificColumnarIterator() {
-          nativeOrder = ByteOrder.nativeOrder();
-          buffers = new byte[${columnTypes.length}][];
-          mutableRow = new MutableUnsafeRow(rowWriter);
+          this.nativeOrder = ByteOrder.nativeOrder();
+          this.buffers = new byte[${columnTypes.length}][];
+          this.mutableRow = new MutableUnsafeRow(rowWriter);
         }
 
         public void initialize(Iterator input, DataType[] columnTypes, int[] columnIndexes) {
@@ -222,9 +224,6 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
           unsafeRow.setTotalSize(bufferHolder.totalSize());
           return unsafeRow;
         }
-
-        ${ctx.initNestedClasses()}
-        ${ctx.declareNestedClasses()}
       }"""
 
     val code = CodeFormatter.stripOverlappingComments(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
index f8e9a91592c0b..89a9b38132732 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@@ -446,7 +446,7 @@ case class SortMergeJoinExec(
          |  }
          |  return false; // unreachable
          |}
-       """.stripMargin, inlineToOuterClass = true)
+       """.stripMargin)
 
     (leftRow, matches)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
index 73a0f8735ed45..757fe2185d302 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
@@ -75,7 +75,7 @@ trait BaseLimitExec extends UnaryExecNode with CodegenSupport {
       protected boolean stopEarly() {
         return $stopEarly;
       }
-    """, inlineToOuterClass = true)
+    """)
     val countTerm = ctx.freshName("count")
     ctx.addMutableState("int", countTerm, s"$countTerm = 0;")
     s"""

From 970f68c056ce543068af28df44490d068ec3d15d Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Wed, 28 Jun 2017 00:57:05 +0800
Subject: [PATCH 1047/1204] [SPARK-19104][SQL] Lambda variables in
 ExternalMapToCatalyst should be global

The issue happens in `ExternalMapToCatalyst`. For example, the following codes create `ExternalMapToCatalyst` to convert Scala Map to catalyst map format.

    val data = Seq.tabulate(10)(i => NestedData(1, Map("key" -> InnerData("name", i + 100))))
    val ds = spark.createDataset(data)

The `valueConverter` in `ExternalMapToCatalyst` looks like:

    if (isnull(lambdavariable(ExternalMapToCatalyst_value52, ExternalMapToCatalyst_value_isNull52, ObjectType(class org.apache.spark.sql.InnerData), true))) null else named_struct(name, staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, assertnotnull(lambdavariable(ExternalMapToCatalyst_value52, ExternalMapToCatalyst_value_isNull52, ObjectType(class org.apache.spark.sql.InnerData), true)).name, true), value, assertnotnull(lambdavariable(ExternalMapToCatalyst_value52, ExternalMapToCatalyst_value_isNull52, ObjectType(class org.apache.spark.sql.InnerData), true)).value)

There is a `CreateNamedStruct` expression (`named_struct`) to create a row of `InnerData.name` and `InnerData.value` that are referred by `ExternalMapToCatalyst_value52`.

Because `ExternalMapToCatalyst_value52` are local variable, when `CreateNamedStruct` splits expressions to individual functions, the local variable can't be accessed anymore.

Jenkins tests.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #18418 from viirya/SPARK-19104.

(cherry picked from commit fd8c931a30a084ee981b75aa469fc97dda6cfaa9)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/expressions/objects/objects.scala | 18 ++++++++++++------
 .../spark/sql/DatasetPrimitiveSuite.scala      |  8 ++++++++
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 1a202ecf745c9..bedc88e356c26 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -729,6 +729,12 @@ case class ExternalMapToCatalyst private(
     val entry = ctx.freshName("entry")
     val entries = ctx.freshName("entries")
 
+    val keyElementJavaType = ctx.javaType(keyType)
+    val valueElementJavaType = ctx.javaType(valueType)
+    ctx.addMutableState(keyElementJavaType, key, "")
+    ctx.addMutableState("boolean", valueIsNull, "")
+    ctx.addMutableState(valueElementJavaType, value, "")
+
     val (defineEntries, defineKeyValue) = child.dataType match {
       case ObjectType(cls) if classOf[java.util.Map[_, _]].isAssignableFrom(cls) =>
         val javaIteratorCls = classOf[java.util.Iterator[_]].getName
@@ -740,8 +746,8 @@ case class ExternalMapToCatalyst private(
         val defineKeyValue =
           s"""
             final $javaMapEntryCls $entry = ($javaMapEntryCls) $entries.next();
-            ${ctx.javaType(keyType)} $key = (${ctx.boxedType(keyType)}) $entry.getKey();
-            ${ctx.javaType(valueType)} $value = (${ctx.boxedType(valueType)}) $entry.getValue();
+            $key = (${ctx.boxedType(keyType)}) $entry.getKey();
+            $value = (${ctx.boxedType(valueType)}) $entry.getValue();
           """
 
         defineEntries -> defineKeyValue
@@ -755,17 +761,17 @@ case class ExternalMapToCatalyst private(
         val defineKeyValue =
           s"""
             final $scalaMapEntryCls $entry = ($scalaMapEntryCls) $entries.next();
-            ${ctx.javaType(keyType)} $key = (${ctx.boxedType(keyType)}) $entry._1();
-            ${ctx.javaType(valueType)} $value = (${ctx.boxedType(valueType)}) $entry._2();
+            $key = (${ctx.boxedType(keyType)}) $entry._1();
+            $value = (${ctx.boxedType(valueType)}) $entry._2();
           """
 
         defineEntries -> defineKeyValue
     }
 
     val valueNullCheck = if (ctx.isPrimitiveType(valueType)) {
-      s"boolean $valueIsNull = false;"
+      s"$valueIsNull = false;"
     } else {
-      s"boolean $valueIsNull = $value == null;"
+      s"$valueIsNull = $value == null;"
     }
 
     val arrayCls = classOf[GenericArrayData].getName
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
index 7e2949ab5aece..212ee1b39adf1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
@@ -32,6 +32,9 @@ case class QueueClass(q: Queue[Int])
 
 case class ComplexClass(seq: SeqClass, list: ListClass, queue: QueueClass)
 
+case class InnerData(name: String, value: Int)
+case class NestedData(id: Int, param: Map[String, InnerData])
+
 package object packageobject {
   case class PackageClass(value: Int)
 }
@@ -268,4 +271,9 @@ class DatasetPrimitiveSuite extends QueryTest with SharedSQLContext {
     checkDataset(Seq(PackageClass(1)).toDS(), PackageClass(1))
   }
 
+  test("SPARK-19104: Lambda variables in ExternalMapToCatalyst should be global") {
+    val data = Seq.tabulate(10)(i => NestedData(1, Map("key" -> InnerData("name", i + 100))))
+    val ds = spark.createDataset(data)
+    checkDataset(ds, data: _*)
+  }
 }

From 8fdc51bc133a3f6b6173e721e6defd8bac43b394 Mon Sep 17 00:00:00 2001
From: Ian Li <ianl@clearstorydata.com>
Date: Tue, 27 Jun 2017 12:41:48 -0700
Subject: [PATCH 1048/1204] treating empty string as null for csd

---
 .../apache/spark/sql/hive/TableReader.scala   | 29 +++++++++++++++++--
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
index 4951da8c6aa72..de28025d8e7e0 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -134,11 +134,13 @@ class HadoopTableReader(
     val attrsWithIndex = attributes.zipWithIndex
     val mutableRow = new SpecificInternalRow(attributes.map(_.dataType))
 
+    val localEmptyStringsAsNulls = emptyStringsAsNulls  // for serializability
     val deserializedHadoopRDD = hadoopRDD.mapPartitions { iter =>
       val hconf = broadcastedHadoopConf.value.value
       val deserializer = deserializerClass.newInstance()
       deserializer.initialize(hconf, tableDesc.getProperties)
-      HadoopTableReader.fillObject(iter, deserializer, attrsWithIndex, mutableRow, deserializer)
+      HadoopTableReader.fillObject(iter, deserializer, attrsWithIndex, mutableRow, deserializer,
+        localEmptyStringsAsNulls)
     }
 
     deserializedHadoopRDD
@@ -247,6 +249,7 @@ class HadoopTableReader(
 
       val tableProperties = relation.tableDesc.getProperties
 
+      val localEmptyStringsAsNulls = emptyStringsAsNulls  // for serializability
       createHadoopRdd(tableDesc, inputPathStr, ifc).mapPartitions { iter =>
         val hconf = broadcastedHiveConf.value.value
         val deserializer = localDeserializer.newInstance()
@@ -266,7 +269,7 @@ class HadoopTableReader(
 
         // fill the non partition key attributes
         HadoopTableReader.fillObject(iter, deserializer, nonPartitionKeyAttrs,
-          mutableRow, tableSerDe)
+          mutableRow, tableSerDe, localEmptyStringsAsNulls)
       }
     }.toSeq
 
@@ -364,6 +367,7 @@ private[hive] object HadoopTableReader extends HiveInspectors with Logging {
    *                             positions in the output schema
    * @param mutableRow A reusable `MutableRow` that should be filled
    * @param tableDeser Table Deserializer
+   * @param emptyStringsAsNulls whether to treat empty strings as nulls
    * @return An `Iterator[Row]` transformed from `iterator`
    */
   def fillObject(
@@ -371,7 +375,8 @@ private[hive] object HadoopTableReader extends HiveInspectors with Logging {
       rawDeser: Deserializer,
       nonPartitionKeyAttrs: Seq[(Attribute, Int)],
       mutableRow: InternalRow,
-      tableDeser: Deserializer): Iterator[InternalRow] = {
+      tableDeser: Deserializer,
+      emptyStringsAsNulls: Boolean): Iterator[InternalRow] = {
 
     val soi = if (rawDeser.getObjectInspector.equals(tableDeser.getObjectInspector)) {
       rawDeser.getObjectInspector.asInstanceOf[StructObjectInspector]
@@ -407,9 +412,27 @@ private[hive] object HadoopTableReader extends HiveInspectors with Logging {
           (value: Any, row: InternalRow, ordinal: Int) => row.setFloat(ordinal, oi.get(value))
         case oi: DoubleObjectInspector =>
           (value: Any, row: InternalRow, ordinal: Int) => row.setDouble(ordinal, oi.get(value))
+        case oi: HiveVarcharObjectInspector if emptyStringsAsNulls =>
+          (value: Any, row: InternalRow, ordinal: Int) => {
+            val strValue = UTF8String.fromString(oi.getPrimitiveJavaObject(value).getValue)
+            if (strValue == UTF8String.EMPTY_UTF8) {
+              row.update(ordinal, null)
+            } else {
+              row.update(ordinal, strValue)
+            }
+          }
         case oi: HiveVarcharObjectInspector =>
           (value: Any, row: InternalRow, ordinal: Int) =>
             row.update(ordinal, UTF8String.fromString(oi.getPrimitiveJavaObject(value).getValue))
+        case oi: StringObjectInspector if emptyStringsAsNulls =>
+          (value: Any, row: InternalRow, ordinal: Int) => {
+            val strValue = UTF8String.fromString(oi.getPrimitiveJavaObject(value))
+            if (strValue == UTF8String.EMPTY_UTF8) {
+              row.update(ordinal, null)
+            } else {
+              row.update(ordinal, strValue)
+            }
+          }
         case oi: HiveCharObjectInspector =>
           (value: Any, row: InternalRow, ordinal: Int) =>
             row.update(ordinal, UTF8String.fromString(oi.getPrimitiveJavaObject(value).getValue))

From f3c40d5c592194bfa9320de52c291415afa6f12f Mon Sep 17 00:00:00 2001
From: Mark Hamstra <markhamstra@gmail.com>
Date: Tue, 27 Jun 2017 15:52:02 -0700
Subject: [PATCH 1049/1204] TableNamePreprocessor support

---
 .../catalyst/catalog/ExternalCatalog.scala    |  4 ++
 .../catalyst/catalog/InMemoryCatalog.scala    |  3 ++
 .../sql/catalyst/catalog/interface.scala      |  3 ++
 .../spark/sql/hive/HiveExternalCatalog.scala  |  3 ++
 .../spark/sql/hive/HiveMetastoreCatalog.scala | 40 ++++++++++++++++++-
 5 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
index cc6606954e420..291b6855cf22b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
@@ -271,7 +271,11 @@ abstract class ExternalCatalog {
 
   def setTableNamePreprocessor(newTableNamePreprocessor: (String) => String): Unit
 
+  def getTableNamePreprocessor: (String) => String
+
   def setHadoopFileSelector(hadoopFileSelector: HadoopFileSelector): Unit
 
   def unsetHadoopFileSelector(): Unit
+
+  def findHadoopFileSelector: Option[HadoopFileSelector]
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
index dad814fd48024..c4fd7eac7ce91 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
@@ -601,8 +601,11 @@ class InMemoryCatalog(
 
   override def setTableNamePreprocessor(newTableNamePreprocessor: (String) => String): Unit = {}
 
+  def getTableNamePreprocessor: (String) => String = identity
+
   override def setHadoopFileSelector(hadoopFileSelector: HadoopFileSelector): Unit = {}
 
   override def unsetHadoopFileSelector(): Unit = {}
 
+  override def findHadoopFileSelector: Option[HadoopFileSelector] = None
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 051fcaa63c7f8..bccac71a12096 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -219,6 +219,9 @@ case class CatalogTable(
       locationUri, inputFormat, outputFormat, serde, compressed, properties))
   }
 
+  def withTableName(newName: String): CatalogTable =
+    copy(identifier = identifier.copy(table = newName))
+
   override def toString: String = {
     val tableProperties = properties.map(p => p._1 + "=" + p._2).mkString("[", ", ", "]")
     val partitionColumns = partitionColumnNames.map(quoteIdentifier).mkString("[", ", ", "]")
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index c46ab0986a447..497379f158f72 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -1093,6 +1093,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     tableNamePreprocessor = newTableNamePreprocessor
   }
 
+  def getTableNamePreprocessor: (String) => String = tableNamePreprocessor
+
   /**
    * Allows to register a custom way to select files/directories to be included in a table scan
    * based on the table name. This can be used together with [[setTableNamePreprocessor]] to
@@ -1116,6 +1118,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     hadoopFileSelector = None
   }
 
+  override def findHadoopFileSelector: Option[HadoopFileSelector] = hadoopFileSelector
 }
 
 object HiveExternalCatalog {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index f93922073704a..6a63785747ff2 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -120,9 +120,12 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
   def lookupRelation(
       tableIdent: TableIdentifier,
       alias: Option[String]): LogicalPlan = {
+    val tableNamePreprocessor = sparkSession.sharedState.externalCatalog.getTableNamePreprocessor
+    val rawTableName = tableIdent.table
+    val tblNameInMetastore = tableNamePreprocessor(rawTableName)
     val qualifiedTableName = getQualifiedTableName(tableIdent)
     val table = sparkSession.sharedState.externalCatalog.getTable(
-      qualifiedTableName.database, qualifiedTableName.name)
+      qualifiedTableName.database, tblNameInMetastore).withTableName(qualifiedTableName.name)
 
     if (DDLUtils.isDatasourceTable(table)) {
       val dataSourceTable = cachedDataSourceTables(qualifiedTableName)
@@ -272,9 +275,12 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
       })
     } else {
       val rootPath = metastoreRelation.hiveQlTable.getDataLocation
+      val paths: Seq[Path] = if (fileType != "parquet") { Seq(rootPath) } else {
+        selectParquetLocationDirectories(metastoreRelation.tableName, rootPath)
+      }
       withTableCreationLock(tableIdentifier, {
         val cached = getCached(tableIdentifier,
-          Seq(rootPath),
+          paths,
           metastoreRelation,
           metastoreSchema,
           fileFormatClass,
@@ -303,6 +309,36 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
     result.copy(expectedOutputAttributes = Some(metastoreRelation.output))
   }
 
+  /**
+   * Customizing the data directory selection by using hadoopFileSelector.
+   *
+   * The value of locationOpt will be returned as single element sequence if
+   * 1. the hadoopFileSelector is not defined or
+   * 2. locationOpt is not defined or
+   * 3. the selected directories are empty.
+   *
+   * Otherwise, the non-empty selected directories will be returned.
+   */
+  private[hive] def selectParquetLocationDirectories(
+      tableName: String,
+      location: Path): Seq[Path] = {
+
+    val inputPaths: Option[Seq[Path]] = for {
+      selector <- sparkSession.sharedState.externalCatalog.findHadoopFileSelector
+      newLocation = new Path(location.toString)
+      fs = newLocation.getFileSystem(sparkSession.sparkContext.hadoopConfiguration)
+      selectedPaths <- selector.selectFiles(tableName, fs, newLocation)
+      selectedDir = for {
+        selectedPath <- selectedPaths
+        if selectedPath
+          .getFileSystem(sparkSession.sparkContext.hadoopConfiguration)
+          .isDirectory(selectedPath)
+      } yield selectedPath
+      if selectedDir.nonEmpty
+    } yield selectedDir
+    inputPaths.getOrElse(Seq(location))
+  }
+
   private def inferIfNeeded(
       relation: MetastoreRelation,
       options: Map[String, String],

From 17a04b9000f86c04f492927d4aa3b23d60e2b5b6 Mon Sep 17 00:00:00 2001
From: Nick Pentreath <nickp@za.ibm.com>
Date: Thu, 29 Jun 2017 09:51:12 +0100
Subject: [PATCH 1050/1204] [SPARK-21210][DOC][ML] Javadoc 8 fixes for ML
 shared param traits

PR #15999 included fixes for doc strings in the ML shared param traits (occurrences of `>` and `>=`).

This PR simply uses the HTML-escaped version of the param doc to embed into the Scaladoc, to ensure that when `SharedParamsCodeGen` is run, the generated javadoc will be compliant for Java 8.

## How was this patch tested?
Existing tests

Author: Nick Pentreath <nickp@za.ibm.com>

Closes #18420 from MLnick/shared-params-javadoc8.

(cherry picked from commit 70085e83d1ee728b23f7df15f570eb8d77f67a7a)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../apache/spark/ml/param/shared/SharedParamsCodeGen.scala   | 5 ++++-
 .../org/apache/spark/ml/param/shared/sharedParams.scala      | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
index c94b8b4e9dfda..013817a41baf5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
@@ -20,6 +20,7 @@ package org.apache.spark.ml.param.shared
 import java.io.PrintWriter
 
 import scala.reflect.ClassTag
+import scala.xml.Utility
 
 /**
  * Code generator for shared params (sharedParams.scala). Run under the Spark folder with
@@ -167,6 +168,8 @@ private[shared] object SharedParamsCodeGen {
       "def"
     }
 
+    val htmlCompliantDoc = Utility.escape(doc)
+
     s"""
       |/**
       | * Trait for shared param $name$defaultValueDoc.
@@ -174,7 +177,7 @@ private[shared] object SharedParamsCodeGen {
       |private[ml] trait Has$Name extends Params {
       |
       |  /**
-      |   * Param for $doc.
+      |   * Param for $htmlCompliantDoc.
       |   * @group ${groupStr(0)}
       |   */
       |  final val $name: $Param = new $Param(this, "$name", "$doc"$isValid)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
index e3e03dfd43dd6..50619607a5054 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
@@ -176,7 +176,7 @@ private[ml] trait HasThreshold extends Params {
 private[ml] trait HasThresholds extends Params {
 
   /**
-   * Param for Thresholds in multi-class classification to adjust the probability of predicting each class. Array must have length equal to the number of classes, with values > 0 excepting that at most one value may be 0. The class with largest value p/t is predicted, where p is the original probability of that class and t is the class's threshold.
+   * Param for Thresholds in multi-class classification to adjust the probability of predicting each class. Array must have length equal to the number of classes, with values &gt; 0 excepting that at most one value may be 0. The class with largest value p/t is predicted, where p is the original probability of that class and t is the class's threshold.
    * @group param
    */
   final val thresholds: DoubleArrayParam = new DoubleArrayParam(this, "thresholds", "Thresholds in multi-class classification to adjust the probability of predicting each class. Array must have length equal to the number of classes, with values > 0 excepting that at most one value may be 0. The class with largest value p/t is predicted, where p is the original probability of that class and t is the class's threshold", (t: Array[Double]) => t.forall(_ >= 0) && t.count(_ == 0) <= 1)

From 20cf51194a70b1bc2446289f4e57e5d287a8e7b9 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Fri, 30 Jun 2017 10:56:48 +0800
Subject: [PATCH 1051/1204] [SPARK-21253][CORE] Fix a bug that StreamCallback
 may not be notified if network errors happen

## What changes were proposed in this pull request?

If a network error happens before processing StreamResponse/StreamFailure events, StreamCallback.onFailure won't be called.

This PR fixes `failOutstandingRequests` to also notify outstanding StreamCallbacks.

## How was this patch tested?

The new unit tests.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #18472 from zsxwing/fix-stream-2.

(cherry picked from commit 4996c53949376153f9ebdc74524fed7226968808)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/network/client/TransportClient.java |  2 +-
 .../client/TransportResponseHandler.java      | 38 ++++++++++++++-----
 .../TransportResponseHandlerSuite.java        | 31 ++++++++++++++-
 3 files changed, 59 insertions(+), 12 deletions(-)

diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java
index a6f527c118218..8f354ad78bbaa 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java
@@ -179,7 +179,7 @@ public void stream(String streamId, StreamCallback callback) {
     // written to the socket atomically, so that callbacks are called in the right order
     // when responses arrive.
     synchronized (this) {
-      handler.addStreamCallback(callback);
+      handler.addStreamCallback(streamId, callback);
       channel.writeAndFlush(new StreamRequest(streamId)).addListener(future -> {
         if (future.isSuccess()) {
           long timeTaken = System.currentTimeMillis() - startTime;
diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
index 41bead546cad6..be9f18203c8e4 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
@@ -24,6 +24,8 @@
 import java.util.concurrent.ConcurrentLinkedQueue;
 import java.util.concurrent.atomic.AtomicLong;
 
+import scala.Tuple2;
+
 import com.google.common.annotations.VisibleForTesting;
 import io.netty.channel.Channel;
 import org.slf4j.Logger;
@@ -56,7 +58,7 @@ public class TransportResponseHandler extends MessageHandler<ResponseMessage> {
 
   private final Map<Long, RpcResponseCallback> outstandingRpcs;
 
-  private final Queue<StreamCallback> streamCallbacks;
+  private final Queue<Tuple2<String, StreamCallback>> streamCallbacks;
   private volatile boolean streamActive;
 
   /** Records the time (in system nanoseconds) that the last fetch or RPC request was sent. */
@@ -88,9 +90,9 @@ public void removeRpcRequest(long requestId) {
     outstandingRpcs.remove(requestId);
   }
 
-  public void addStreamCallback(StreamCallback callback) {
+  public void addStreamCallback(String streamId, StreamCallback callback) {
     timeOfLastRequestNs.set(System.nanoTime());
-    streamCallbacks.offer(callback);
+    streamCallbacks.offer(Tuple2.apply(streamId, callback));
   }
 
   @VisibleForTesting
@@ -104,15 +106,31 @@ public void deactivateStream() {
    */
   private void failOutstandingRequests(Throwable cause) {
     for (Map.Entry<StreamChunkId, ChunkReceivedCallback> entry : outstandingFetches.entrySet()) {
-      entry.getValue().onFailure(entry.getKey().chunkIndex, cause);
+      try {
+        entry.getValue().onFailure(entry.getKey().chunkIndex, cause);
+      } catch (Exception e) {
+        logger.warn("ChunkReceivedCallback.onFailure throws exception", e);
+      }
     }
     for (Map.Entry<Long, RpcResponseCallback> entry : outstandingRpcs.entrySet()) {
-      entry.getValue().onFailure(cause);
+      try {
+        entry.getValue().onFailure(cause);
+      } catch (Exception e) {
+        logger.warn("RpcResponseCallback.onFailure throws exception", e);
+      }
+    }
+    for (Tuple2<String, StreamCallback> entry : streamCallbacks) {
+      try {
+        entry._2().onFailure(entry._1(), cause);
+      } catch (Exception e) {
+        logger.warn("StreamCallback.onFailure throws exception", e);
+      }
     }
 
     // It's OK if new fetches appear, as they will fail immediately.
     outstandingFetches.clear();
     outstandingRpcs.clear();
+    streamCallbacks.clear();
   }
 
   @Override
@@ -190,8 +208,9 @@ public void handle(ResponseMessage message) throws Exception {
       }
     } else if (message instanceof StreamResponse) {
       StreamResponse resp = (StreamResponse) message;
-      StreamCallback callback = streamCallbacks.poll();
-      if (callback != null) {
+      Tuple2<String, StreamCallback> entry = streamCallbacks.poll();
+      if (entry != null) {
+        StreamCallback callback = entry._2();
         if (resp.byteCount > 0) {
           StreamInterceptor interceptor = new StreamInterceptor(this, resp.streamId, resp.byteCount,
             callback);
@@ -216,8 +235,9 @@ public void handle(ResponseMessage message) throws Exception {
       }
     } else if (message instanceof StreamFailure) {
       StreamFailure resp = (StreamFailure) message;
-      StreamCallback callback = streamCallbacks.poll();
-      if (callback != null) {
+      Tuple2<String, StreamCallback> entry = streamCallbacks.poll();
+      if (entry != null) {
+        StreamCallback callback = entry._2();
         try {
           callback.onFailure(resp.streamId, new RuntimeException(resp.error));
         } catch (IOException ioe) {
diff --git a/common/network-common/src/test/java/org/apache/spark/network/TransportResponseHandlerSuite.java b/common/network-common/src/test/java/org/apache/spark/network/TransportResponseHandlerSuite.java
index 09fc80d12d510..b4032c4c3f031 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/TransportResponseHandlerSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/TransportResponseHandlerSuite.java
@@ -17,6 +17,7 @@
 
 package org.apache.spark.network;
 
+import java.io.IOException;
 import java.nio.ByteBuffer;
 
 import io.netty.channel.Channel;
@@ -127,7 +128,7 @@ public void testActiveStreams() throws Exception {
 
     StreamResponse response = new StreamResponse("stream", 1234L, null);
     StreamCallback cb = mock(StreamCallback.class);
-    handler.addStreamCallback(cb);
+    handler.addStreamCallback("stream", cb);
     assertEquals(1, handler.numOutstandingRequests());
     handler.handle(response);
     assertEquals(1, handler.numOutstandingRequests());
@@ -135,9 +136,35 @@ public void testActiveStreams() throws Exception {
     assertEquals(0, handler.numOutstandingRequests());
 
     StreamFailure failure = new StreamFailure("stream", "uh-oh");
-    handler.addStreamCallback(cb);
+    handler.addStreamCallback("stream", cb);
     assertEquals(1, handler.numOutstandingRequests());
     handler.handle(failure);
     assertEquals(0, handler.numOutstandingRequests());
   }
+
+  @Test
+  public void failOutstandingStreamCallbackOnClose() throws Exception {
+    Channel c = new LocalChannel();
+    c.pipeline().addLast(TransportFrameDecoder.HANDLER_NAME, new TransportFrameDecoder());
+    TransportResponseHandler handler = new TransportResponseHandler(c);
+
+    StreamCallback cb = mock(StreamCallback.class);
+    handler.addStreamCallback("stream-1", cb);
+    handler.channelInactive();
+
+    verify(cb).onFailure(eq("stream-1"), isA(IOException.class));
+  }
+
+  @Test
+  public void failOutstandingStreamCallbackOnException() throws Exception {
+    Channel c = new LocalChannel();
+    c.pipeline().addLast(TransportFrameDecoder.HANDLER_NAME, new TransportFrameDecoder());
+    TransportResponseHandler handler = new TransportResponseHandler(c);
+
+    StreamCallback cb = mock(StreamCallback.class);
+    handler.addStreamCallback("stream-1", cb);
+    handler.exceptionCaught(new IOException("Oops!"));
+
+    verify(cb).onFailure(eq("stream-1"), isA(IOException.class));
+  }
 }

From 8de67e3692c70e6401902cd9d9be823e1882da8d Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Fri, 30 Jun 2017 11:02:22 +0800
Subject: [PATCH 1052/1204] [SPARK-21253][CORE] Disable
 spark.reducer.maxReqSizeShuffleToMem

Disable spark.reducer.maxReqSizeShuffleToMem because it breaks the old shuffle service.

Credits to wangyum

Closes #18466

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>
Author: Yuming Wang <wgyumg@gmail.com>

Closes #18467 from zsxwing/SPARK-21253.

(cherry picked from commit 80f7ac3a601709dd9471092244612023363f54cd)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scala/org/apache/spark/internal/config/package.scala  | 3 ++-
 docs/configuration.md                                     | 8 --------
 2 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index f8139b706a7cc..70feea8af8c56 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -289,8 +289,9 @@ package object config {
 
   private[spark] val REDUCER_MAX_REQ_SIZE_SHUFFLE_TO_MEM =
     ConfigBuilder("spark.reducer.maxReqSizeShuffleToMem")
+      .internal()
       .doc("The blocks of a shuffle request will be fetched to disk when size of the request is " +
         "above this threshold. This is to avoid a giant request takes too much memory.")
       .bytesConf(ByteUnit.BYTE)
-      .createWithDefaultString("200m")
+      .createWithDefault(Long.MaxValue)
 }
diff --git a/docs/configuration.md b/docs/configuration.md
index 6a00ad14e3bf6..459e944e95e25 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -519,14 +519,6 @@ Apart from these, the following properties are also available, and may be useful
     By allowing it to limit the number of fetch requests, this scenario can be mitigated.
   </td>
 </tr>
-<tr>
-  <td><code>spark.reducer.maxReqSizeShuffleToMem</code></td>
-  <td>200m</td>
-  <td>
-    The blocks of a shuffle request will be fetched to disk when size of the request is above
-    this threshold. This is to avoid a giant request takes too much memory.
-  </td>
-</tr>
 <tr>
   <td><code>spark.shuffle.compress</code></td>
   <td>true</td>

From c6ba647935e9108d139cc8914091790917567ad7 Mon Sep 17 00:00:00 2001
From: IngoSchuster <ingo.schuster@de.ibm.com>
Date: Fri, 30 Jun 2017 11:16:09 +0800
Subject: [PATCH 1053/1204] [SPARK-21176][WEB UI] Limit number of selector
 threads for admin ui proxy servlets to 8

## What changes were proposed in this pull request?
Please see also https://issues.apache.org/jira/browse/SPARK-21176

This change limits the number of selector threads that jetty creates to maximum 8 per proxy servlet (Jetty default is number of processors / 2).
The newHttpClient for Jettys ProxyServlet class is overwritten to avoid the Jetty defaults (which are designed for high-performance http servers).
Once https://github.com/eclipse/jetty.project/issues/1643 is available, the code could be cleaned up to avoid the method override.

I really need this on v2.1.1 - what is the best way for a backport automatic merge works fine)? Shall I create another PR?

## How was this patch tested?
(Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests)
The patch was tested manually on a Spark cluster with a head node that has 88 processors using JMX to verify that the number of selector threads is now limited to 8 per proxy.

gurvindersingh zsxwing can you please review the change?

Author: IngoSchuster <ingo.schuster@de.ibm.com>
Author: Ingo Schuster <ingo.schuster@de.ibm.com>

Closes #18437 from IngoSchuster/master.

(cherry picked from commit 88a536babf119b7e331d02aac5d52b57658803bf)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../main/scala/org/apache/spark/ui/JettyUtils.scala  | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
index edf328b5ae538..b9371c7ad7b45 100644
--- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
@@ -26,6 +26,8 @@ import scala.language.implicitConversions
 import scala.xml.Node
 
 import org.eclipse.jetty.client.api.Response
+import org.eclipse.jetty.client.HttpClient
+import org.eclipse.jetty.client.http.HttpClientTransportOverHTTP
 import org.eclipse.jetty.proxy.ProxyServlet
 import org.eclipse.jetty.server._
 import org.eclipse.jetty.server.handler._
@@ -208,6 +210,16 @@ private[spark] object JettyUtils extends Logging {
         rewrittenURI.toString()
       }
 
+      override def newHttpClient(): HttpClient = {
+        // SPARK-21176: Use the Jetty logic to calculate the number of selector threads (#CPUs/2),
+        // but limit it to 8 max.
+        // Otherwise, it might happen that we exhaust the threadpool since in reverse proxy mode
+        // a proxy is instantiated for each executor. If the head node has many processors, this
+        // can quickly add up to an unreasonably high number of threads.
+        val numSelectors = math.max(1, math.min(8, Runtime.getRuntime().availableProcessors() / 2))
+        new HttpClient(new HttpClientTransportOverHTTP(numSelectors), null)
+      }
+
       override def filterServerResponseHeader(
           clientRequest: HttpServletRequest,
           serverResponse: Response,

From 083adb07fc78b702bb84fa43373969ca7c5eb29a Mon Sep 17 00:00:00 2001
From: IngoSchuster <ingo.schuster@de.ibm.com>
Date: Fri, 30 Jun 2017 11:16:09 +0800
Subject: [PATCH 1054/1204] [SPARK-21176][WEB UI] Limit number of selector
 threads for admin ui proxy servlets to 8

## What changes were proposed in this pull request?
Please see also https://issues.apache.org/jira/browse/SPARK-21176

This change limits the number of selector threads that jetty creates to maximum 8 per proxy servlet (Jetty default is number of processors / 2).
The newHttpClient for Jettys ProxyServlet class is overwritten to avoid the Jetty defaults (which are designed for high-performance http servers).
Once https://github.com/eclipse/jetty.project/issues/1643 is available, the code could be cleaned up to avoid the method override.

I really need this on v2.1.1 - what is the best way for a backport automatic merge works fine)? Shall I create another PR?

## How was this patch tested?
(Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests)
The patch was tested manually on a Spark cluster with a head node that has 88 processors using JMX to verify that the number of selector threads is now limited to 8 per proxy.

gurvindersingh zsxwing can you please review the change?

Author: IngoSchuster <ingo.schuster@de.ibm.com>
Author: Ingo Schuster <ingo.schuster@de.ibm.com>

Closes #18437 from IngoSchuster/master.

(cherry picked from commit 88a536babf119b7e331d02aac5d52b57658803bf)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../main/scala/org/apache/spark/ui/JettyUtils.scala  | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
index 639b8577617f6..4d7f76975b606 100644
--- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
@@ -26,6 +26,8 @@ import scala.language.implicitConversions
 import scala.xml.Node
 
 import org.eclipse.jetty.client.api.Response
+import org.eclipse.jetty.client.HttpClient
+import org.eclipse.jetty.client.http.HttpClientTransportOverHTTP
 import org.eclipse.jetty.proxy.ProxyServlet
 import org.eclipse.jetty.server.{HttpConnectionFactory, Request, Server, ServerConnector}
 import org.eclipse.jetty.server.handler._
@@ -208,6 +210,16 @@ private[spark] object JettyUtils extends Logging {
         rewrittenURI.toString()
       }
 
+      override def newHttpClient(): HttpClient = {
+        // SPARK-21176: Use the Jetty logic to calculate the number of selector threads (#CPUs/2),
+        // but limit it to 8 max.
+        // Otherwise, it might happen that we exhaust the threadpool since in reverse proxy mode
+        // a proxy is instantiated for each executor. If the head node has many processors, this
+        // can quickly add up to an unreasonably high number of threads.
+        val numSelectors = math.max(1, math.min(8, Runtime.getRuntime().availableProcessors() / 2))
+        new HttpClient(new HttpClientTransportOverHTTP(numSelectors), null)
+      }
+
       override def filterServerResponseHeader(
           clientRequest: HttpServletRequest,
           serverResponse: Response,

From d16e2620d1fc8a6b6b5c71401e5d452683fa7762 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Thu, 29 Jun 2017 20:56:37 -0700
Subject: [PATCH 1055/1204] [SPARK-21253][CORE][HOTFIX] Fix Scala 2.10 build

## What changes were proposed in this pull request?

A follow up PR to fix Scala 2.10 build for #18472

## How was this patch tested?

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #18478 from zsxwing/SPARK-21253-2.

(cherry picked from commit cfc696f4a4289acf132cb26baf7c02c5b6305277)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../apache/spark/network/client/TransportResponseHandler.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
index be9f18203c8e4..340b8b96aabc6 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
@@ -92,7 +92,7 @@ public void removeRpcRequest(long requestId) {
 
   public void addStreamCallback(String streamId, StreamCallback callback) {
     timeOfLastRequestNs.set(System.nanoTime());
-    streamCallbacks.offer(Tuple2.apply(streamId, callback));
+    streamCallbacks.offer(new Tuple2<>(streamId, callback));
   }
 
   @VisibleForTesting

From 8b08fd06c0e22e7967c05aee83654b6be446efb4 Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@databricks.com>
Date: Fri, 30 Jun 2017 12:34:09 +0800
Subject: [PATCH 1056/1204] [SPARK-21258][SQL] Fix WindowExec complex object
 aggregation with spilling

## What changes were proposed in this pull request?
`WindowExec` currently improperly stores complex objects (UnsafeRow, UnsafeArrayData, UnsafeMapData, UTF8String) during aggregation by keeping a reference in the buffer used by `GeneratedMutableProjections` to the actual input data. Things go wrong when the input object (or the backing bytes) are reused for other things. This could happen in window functions when it starts spilling to disk. When reading the back the spill files the `UnsafeSorterSpillReader` reuses the buffer to which the `UnsafeRow` points, leading to weird corruption scenario's. Note that this only happens for aggregate functions that preserve (parts of) their input, for example `FIRST`, `LAST`, `MIN` & `MAX`.

This was not seen before, because the spilling logic was not doing actual spills as much and actually used an in-memory page. This page was not cleaned up during window processing and made sure unsafe objects point to their own dedicated memory location. This was changed by https://github.com/apache/spark/pull/16909, after this PR Spark spills more eagerly.

This PR provides a surgical fix because we are close to releasing Spark 2.2. This change just makes sure that there cannot be any object reuse at the expensive of a little bit of performance. We will follow-up with a more subtle solution at a later point.

## How was this patch tested?
Added a regression test to `DataFrameWindowFunctionsSuite`.

Author: Herman van Hovell <hvanhovell@databricks.com>

Closes #18470 from hvanhovell/SPARK-21258.

(cherry picked from commit e2f32ee45ac907f1f53fde7e412676a849a94872)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../execution/window/AggregateProcessor.scala |  7 ++-
 .../sql/DataFrameWindowFunctionsSuite.scala   | 47 ++++++++++++++++++-
 2 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/AggregateProcessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/AggregateProcessor.scala
index c9f5d3b3d92d7..dfa1100c37a0a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/AggregateProcessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/AggregateProcessor.scala
@@ -145,10 +145,13 @@ private[window] final class AggregateProcessor(
 
   /** Update the buffer. */
   def update(input: InternalRow): Unit = {
-    updateProjection(join(buffer, input))
+    // TODO(hvanhovell) this sacrifices performance for correctness. We should make sure that
+    // MutableProjection makes copies of the complex input objects it buffer.
+    val copy = input.copy()
+    updateProjection(join(buffer, copy))
     var i = 0
     while (i < numImperatives) {
-      imperatives(i).update(buffer, input)
+      imperatives(i).update(buffer, copy)
       i += 1
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
index 1255c49104718..204858fa29787 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
@@ -19,8 +19,9 @@ package org.apache.spark.sql
 
 import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction, Window}
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
-import org.apache.spark.sql.types.{DataType, LongType, StructType}
+import org.apache.spark.sql.types._
 
 /**
  * Window function testing for DataFrame API.
@@ -423,4 +424,48 @@ class DataFrameWindowFunctionsSuite extends QueryTest with SharedSQLContext {
       df.select(selectList: _*).where($"value" < 2),
       Seq(Row(3, "1", null, 3.0, 4.0, 3.0), Row(5, "1", false, 4.0, 5.0, 5.0)))
   }
+
+  test("SPARK-21258: complex object in combination with spilling") {
+    // Make sure we trigger the spilling path.
+    withSQLConf(SQLConf.WINDOW_EXEC_BUFFER_SPILL_THRESHOLD.key -> "17") {
+      val sampleSchema = new StructType().
+        add("f0", StringType).
+        add("f1", LongType).
+        add("f2", ArrayType(new StructType().
+          add("f20", StringType))).
+        add("f3", ArrayType(new StructType().
+          add("f30", StringType)))
+
+      val w0 = Window.partitionBy("f0").orderBy("f1")
+      val w1 = w0.rowsBetween(Long.MinValue, Long.MaxValue)
+
+      val c0 = first(struct($"f2", $"f3")).over(w0) as "c0"
+      val c1 = last(struct($"f2", $"f3")).over(w1) as "c1"
+
+      val input =
+        """{"f1":1497820153720,"f2":[{"f20":"x","f21":0}],"f3":[{"f30":"x","f31":0}]}
+          |{"f1":1497802179638}
+          |{"f1":1497802189347}
+          |{"f1":1497802189593}
+          |{"f1":1497802189597}
+          |{"f1":1497802189599}
+          |{"f1":1497802192103}
+          |{"f1":1497802193414}
+          |{"f1":1497802193577}
+          |{"f1":1497802193709}
+          |{"f1":1497802202883}
+          |{"f1":1497802203006}
+          |{"f1":1497802203743}
+          |{"f1":1497802203834}
+          |{"f1":1497802203887}
+          |{"f1":1497802203893}
+          |{"f1":1497802203976}
+          |{"f1":1497820168098}
+          |""".stripMargin.split("\n").toSeq
+
+      import testImplicits._
+
+      spark.read.schema(sampleSchema).json(input.toDS()).select(c0, c1).foreach { _ => () }
+    }
+  }
 }

From d995dac1cdeec940364453675f59ce5cf2b53684 Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@databricks.com>
Date: Fri, 30 Jun 2017 12:34:09 +0800
Subject: [PATCH 1057/1204] [SPARK-21258][SQL] Fix WindowExec complex object
 aggregation with spilling

## What changes were proposed in this pull request?
`WindowExec` currently improperly stores complex objects (UnsafeRow, UnsafeArrayData, UnsafeMapData, UTF8String) during aggregation by keeping a reference in the buffer used by `GeneratedMutableProjections` to the actual input data. Things go wrong when the input object (or the backing bytes) are reused for other things. This could happen in window functions when it starts spilling to disk. When reading the back the spill files the `UnsafeSorterSpillReader` reuses the buffer to which the `UnsafeRow` points, leading to weird corruption scenario's. Note that this only happens for aggregate functions that preserve (parts of) their input, for example `FIRST`, `LAST`, `MIN` & `MAX`.

This was not seen before, because the spilling logic was not doing actual spills as much and actually used an in-memory page. This page was not cleaned up during window processing and made sure unsafe objects point to their own dedicated memory location. This was changed by https://github.com/apache/spark/pull/16909, after this PR Spark spills more eagerly.

This PR provides a surgical fix because we are close to releasing Spark 2.2. This change just makes sure that there cannot be any object reuse at the expensive of a little bit of performance. We will follow-up with a more subtle solution at a later point.

## How was this patch tested?
Added a regression test to `DataFrameWindowFunctionsSuite`.

Author: Herman van Hovell <hvanhovell@databricks.com>

Closes #18470 from hvanhovell/SPARK-21258.

(cherry picked from commit e2f32ee45ac907f1f53fde7e412676a849a94872)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../execution/window/AggregateProcessor.scala |  7 ++-
 .../sql/DataFrameWindowFunctionsSuite.scala   | 47 ++++++++++++++++++-
 2 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/AggregateProcessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/AggregateProcessor.scala
index c9f5d3b3d92d7..dfa1100c37a0a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/AggregateProcessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/AggregateProcessor.scala
@@ -145,10 +145,13 @@ private[window] final class AggregateProcessor(
 
   /** Update the buffer. */
   def update(input: InternalRow): Unit = {
-    updateProjection(join(buffer, input))
+    // TODO(hvanhovell) this sacrifices performance for correctness. We should make sure that
+    // MutableProjection makes copies of the complex input objects it buffer.
+    val copy = input.copy()
+    updateProjection(join(buffer, copy))
     var i = 0
     while (i < numImperatives) {
-      imperatives(i).update(buffer, input)
+      imperatives(i).update(buffer, copy)
       i += 1
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
index 1255c49104718..204858fa29787 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
@@ -19,8 +19,9 @@ package org.apache.spark.sql
 
 import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction, Window}
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
-import org.apache.spark.sql.types.{DataType, LongType, StructType}
+import org.apache.spark.sql.types._
 
 /**
  * Window function testing for DataFrame API.
@@ -423,4 +424,48 @@ class DataFrameWindowFunctionsSuite extends QueryTest with SharedSQLContext {
       df.select(selectList: _*).where($"value" < 2),
       Seq(Row(3, "1", null, 3.0, 4.0, 3.0), Row(5, "1", false, 4.0, 5.0, 5.0)))
   }
+
+  test("SPARK-21258: complex object in combination with spilling") {
+    // Make sure we trigger the spilling path.
+    withSQLConf(SQLConf.WINDOW_EXEC_BUFFER_SPILL_THRESHOLD.key -> "17") {
+      val sampleSchema = new StructType().
+        add("f0", StringType).
+        add("f1", LongType).
+        add("f2", ArrayType(new StructType().
+          add("f20", StringType))).
+        add("f3", ArrayType(new StructType().
+          add("f30", StringType)))
+
+      val w0 = Window.partitionBy("f0").orderBy("f1")
+      val w1 = w0.rowsBetween(Long.MinValue, Long.MaxValue)
+
+      val c0 = first(struct($"f2", $"f3")).over(w0) as "c0"
+      val c1 = last(struct($"f2", $"f3")).over(w1) as "c1"
+
+      val input =
+        """{"f1":1497820153720,"f2":[{"f20":"x","f21":0}],"f3":[{"f30":"x","f31":0}]}
+          |{"f1":1497802179638}
+          |{"f1":1497802189347}
+          |{"f1":1497802189593}
+          |{"f1":1497802189597}
+          |{"f1":1497802189599}
+          |{"f1":1497802192103}
+          |{"f1":1497802193414}
+          |{"f1":1497802193577}
+          |{"f1":1497802193709}
+          |{"f1":1497802202883}
+          |{"f1":1497802203006}
+          |{"f1":1497802203743}
+          |{"f1":1497802203834}
+          |{"f1":1497802203887}
+          |{"f1":1497802203893}
+          |{"f1":1497802203976}
+          |{"f1":1497820168098}
+          |""".stripMargin.split("\n").toSeq
+
+      import testImplicits._
+
+      spark.read.schema(sampleSchema).json(input.toDS()).select(c0, c1).foreach { _ => () }
+    }
+  }
 }

From 3ecef249184cdcf74765070d3ea39cf180214976 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 30 Jun 2017 14:45:55 +0800
Subject: [PATCH 1058/1204] Revert "[SPARK-21258][SQL] Fix WindowExec complex
 object aggregation with spilling"

This reverts commit d995dac1cdeec940364453675f59ce5cf2b53684.
---
 .../execution/window/AggregateProcessor.scala |  7 +--
 .../sql/DataFrameWindowFunctionsSuite.scala   | 47 +------------------
 2 files changed, 3 insertions(+), 51 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/AggregateProcessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/AggregateProcessor.scala
index dfa1100c37a0a..c9f5d3b3d92d7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/AggregateProcessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/AggregateProcessor.scala
@@ -145,13 +145,10 @@ private[window] final class AggregateProcessor(
 
   /** Update the buffer. */
   def update(input: InternalRow): Unit = {
-    // TODO(hvanhovell) this sacrifices performance for correctness. We should make sure that
-    // MutableProjection makes copies of the complex input objects it buffer.
-    val copy = input.copy()
-    updateProjection(join(buffer, copy))
+    updateProjection(join(buffer, input))
     var i = 0
     while (i < numImperatives) {
-      imperatives(i).update(buffer, copy)
+      imperatives(i).update(buffer, input)
       i += 1
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
index 204858fa29787..1255c49104718 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
@@ -19,9 +19,8 @@ package org.apache.spark.sql
 
 import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction, Window}
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
-import org.apache.spark.sql.types._
+import org.apache.spark.sql.types.{DataType, LongType, StructType}
 
 /**
  * Window function testing for DataFrame API.
@@ -424,48 +423,4 @@ class DataFrameWindowFunctionsSuite extends QueryTest with SharedSQLContext {
       df.select(selectList: _*).where($"value" < 2),
       Seq(Row(3, "1", null, 3.0, 4.0, 3.0), Row(5, "1", false, 4.0, 5.0, 5.0)))
   }
-
-  test("SPARK-21258: complex object in combination with spilling") {
-    // Make sure we trigger the spilling path.
-    withSQLConf(SQLConf.WINDOW_EXEC_BUFFER_SPILL_THRESHOLD.key -> "17") {
-      val sampleSchema = new StructType().
-        add("f0", StringType).
-        add("f1", LongType).
-        add("f2", ArrayType(new StructType().
-          add("f20", StringType))).
-        add("f3", ArrayType(new StructType().
-          add("f30", StringType)))
-
-      val w0 = Window.partitionBy("f0").orderBy("f1")
-      val w1 = w0.rowsBetween(Long.MinValue, Long.MaxValue)
-
-      val c0 = first(struct($"f2", $"f3")).over(w0) as "c0"
-      val c1 = last(struct($"f2", $"f3")).over(w1) as "c1"
-
-      val input =
-        """{"f1":1497820153720,"f2":[{"f20":"x","f21":0}],"f3":[{"f30":"x","f31":0}]}
-          |{"f1":1497802179638}
-          |{"f1":1497802189347}
-          |{"f1":1497802189593}
-          |{"f1":1497802189597}
-          |{"f1":1497802189599}
-          |{"f1":1497802192103}
-          |{"f1":1497802193414}
-          |{"f1":1497802193577}
-          |{"f1":1497802193709}
-          |{"f1":1497802202883}
-          |{"f1":1497802203006}
-          |{"f1":1497802203743}
-          |{"f1":1497802203834}
-          |{"f1":1497802203887}
-          |{"f1":1497802203893}
-          |{"f1":1497802203976}
-          |{"f1":1497820168098}
-          |""".stripMargin.split("\n").toSeq
-
-      import testImplicits._
-
-      spark.read.schema(sampleSchema).json(input.toDS()).select(c0, c1).foreach { _ => () }
-    }
-  }
 }

From 29a0be2b3d42bfe991f47725f077892918731e08 Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Fri, 30 Jun 2017 14:23:56 -0700
Subject: [PATCH 1059/1204] [SPARK-21129][SQL] Arguments of SQL function call
 should not be named expressions

### What changes were proposed in this pull request?

Function argument should not be named expressions. It could cause two issues:
- Misleading error message
- Unexpected query results when the column name is `distinct`, which is not a reserved word in our parser.

```
spark-sql> select count(distinct c1, distinct c2) from t1;
Error in query: cannot resolve '`distinct`' given input columns: [c1, c2]; line 1 pos 26;
'Project [unresolvedalias('count(c1#30, 'distinct), None)]
+- SubqueryAlias t1
   +- CatalogRelation `default`.`t1`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [c1#30, c2#31]
```

After the fix, the error message becomes
```
spark-sql> select count(distinct c1, distinct c2) from t1;
Error in query:
extraneous input 'c2' expecting {')', ',', '.', '[', 'OR', 'AND', 'IN', NOT, 'BETWEEN', 'LIKE', RLIKE, 'IS', EQ, '<=>', '<>', '!=', '<', LTE, '>', GTE, '+', '-', '*', '/', '%', 'DIV', '&', '|', '||', '^'}(line 1, pos 35)

== SQL ==
select count(distinct c1, distinct c2) from t1
-----------------------------------^^^
```

### How was this patch tested?
Added a test case to parser suite.

Author: Xiao Li <gatorsmile@gmail.com>
Author: gatorsmile <gatorsmile@gmail.com>

Closes #18338 from gatorsmile/parserDistinctAggFunc.

(cherry picked from commit eed9c4ef859fdb75a816a3e0ce2d593b34b23444)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../spark/sql/catalyst/parser/SqlBase.g4      |  3 +-
 .../spark/sql/catalyst/dsl/package.scala      |  1 +
 .../sql/catalyst/parser/AstBuilder.scala      |  9 +++++-
 .../parser/ExpressionParserSuite.scala        |  6 ++--
 .../sql/catalyst/parser/PlanParserSuite.scala |  6 ++++
 .../resources/sql-tests/inputs/struct.sql     |  7 ++++
 .../sql-tests/results/struct.sql.out          | 32 ++++++++++++++++++-
 7 files changed, 59 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 31b1c67c49931..499f27f00e7e0 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -552,6 +552,7 @@ primaryExpression
     | CASE whenClause+ (ELSE elseExpression=expression)? END                                   #searchedCase
     | CASE value=expression whenClause+ (ELSE elseExpression=expression)? END                  #simpleCase
     | CAST '(' expression AS dataType ')'                                                      #cast
+    | STRUCT '(' (argument+=namedExpression (',' argument+=namedExpression)*)? ')'             #struct
     | FIRST '(' expression (IGNORE NULLS)? ')'                                                 #first
     | LAST '(' expression (IGNORE NULLS)? ')'                                                  #last
     | constant                                                                                 #constantDefault
@@ -559,7 +560,7 @@ primaryExpression
     | qualifiedName '.' ASTERISK                                                               #star
     | '(' namedExpression (',' namedExpression)+ ')'                                           #rowConstructor
     | '(' query ')'                                                                            #subqueryExpression
-    | qualifiedName '(' (setQuantifier? namedExpression (',' namedExpression)*)? ')'
+    | qualifiedName '(' (setQuantifier? argument+=expression (',' argument+=expression)*)? ')'
        (OVER windowSpec)?                                                                      #functionCall
     | value=primaryExpression '[' index=valueExpression ']'                                    #subscript
     | identifier                                                                               #columnReference
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index beee93d906f0f..85d17afe20230 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -168,6 +168,7 @@ package object dsl {
       case Seq() => UnresolvedStar(None)
       case target => UnresolvedStar(Option(target))
     }
+    def namedStruct(e: Expression*): Expression = CreateNamedStruct(e)
 
     def callFunction[T, U](
         func: T => U,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 10db749026976..d1c9332bee18b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1033,6 +1033,13 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     Cast(expression(ctx.expression), visitSparkDataType(ctx.dataType))
   }
 
+  /**
+   * Create a [[CreateStruct]] expression.
+   */
+  override def visitStruct(ctx: StructContext): Expression = withOrigin(ctx) {
+    CreateStruct(ctx.argument.asScala.map(expression))
+  }
+
   /**
    * Create a [[First]] expression.
    */
@@ -1056,7 +1063,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     // Create the function call.
     val name = ctx.qualifiedName.getText
     val isDistinct = Option(ctx.setQuantifier()).exists(_.DISTINCT != null)
-    val arguments = ctx.namedExpression().asScala.map(expression) match {
+    val arguments = ctx.argument.asScala.map(expression) match {
       case Seq(UnresolvedStar(None))
         if name.toLowerCase(Locale.ROOT) == "count" && !isDistinct =>
         // Transform COUNT(*) into COUNT(1).
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index 2d9d1f719a571..f06219198bb58 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -226,7 +226,7 @@ class ExpressionParserSuite extends PlanTest {
     assertEqual("foo(distinct a, b)", 'foo.distinctFunction('a, 'b))
     assertEqual("grouping(distinct a, b)", 'grouping.distinctFunction('a, 'b))
     assertEqual("`select`(all a, b)", 'select.function('a, 'b))
-    assertEqual("foo(a as x, b as e)", 'foo.function('a as 'x, 'b as 'e))
+    intercept("foo(a x)", "extraneous input 'x'")
   }
 
   test("window function expressions") {
@@ -325,7 +325,9 @@ class ExpressionParserSuite extends PlanTest {
     assertEqual("a.b", UnresolvedAttribute("a.b"))
     assertEqual("`select`.b", UnresolvedAttribute("select.b"))
     assertEqual("(a + b).b", ('a + 'b).getField("b")) // This will fail analysis.
-    assertEqual("struct(a, b).b", 'struct.function('a, 'b).getField("b"))
+    assertEqual(
+      "struct(a, b).b",
+      namedStruct(NamePlaceholder, 'a, NamePlaceholder, 'b).getField("b"))
   }
 
   test("reference") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index 77ae843c30561..950f152b94b4d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -223,6 +223,12 @@ class PlanParserSuite extends PlanTest {
     assertEqual(s"$sql grouping sets((a, b), (a), ())",
       GroupingSets(Seq(Seq('a, 'b), Seq('a), Seq()), Seq('a, 'b), table("d"),
         Seq('a, 'b, 'sum.function('c).as("c"))))
+
+    val m = intercept[ParseException] {
+      parsePlan("SELECT a, b, count(distinct a, distinct b) as c FROM d GROUP BY a, b")
+    }.getMessage
+    assert(m.contains("extraneous input 'b'"))
+
   }
 
   test("limit") {
diff --git a/sql/core/src/test/resources/sql-tests/inputs/struct.sql b/sql/core/src/test/resources/sql-tests/inputs/struct.sql
index e56344dc4de80..93a1238ab18c2 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/struct.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/struct.sql
@@ -18,3 +18,10 @@ SELECT ID, STRUCT(ST.*,CAST(ID AS STRING) AS E) NST FROM tbl_x;
 
 -- Prepend a column to a struct
 SELECT ID, STRUCT(CAST(ID AS STRING) AS AA, ST.*) NST FROM tbl_x;
+
+-- Select a column from a struct
+SELECT ID, STRUCT(ST.*).C NST FROM tbl_x;
+SELECT ID, STRUCT(ST.C, ST.D).D NST FROM tbl_x;
+
+-- Select an alias from a struct
+SELECT ID, STRUCT(ST.C as STC, ST.D as STD).STD FROM tbl_x;
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/results/struct.sql.out b/sql/core/src/test/resources/sql-tests/results/struct.sql.out
index 3e32f46195464..1da33bc736f0b 100644
--- a/sql/core/src/test/resources/sql-tests/results/struct.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/struct.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 6
+-- Number of queries: 9
 
 
 -- !query 0
@@ -58,3 +58,33 @@ struct<ID:int,NST:struct<AA:string,C:string,D:string>>
 1	{"AA":"1","C":"gamma","D":"delta"}
 2	{"AA":"2","C":"epsilon","D":"eta"}
 3	{"AA":"3","C":"theta","D":"iota"}
+
+
+-- !query 6
+SELECT ID, STRUCT(ST.*).C NST FROM tbl_x
+-- !query 6 schema
+struct<ID:int,NST:string>
+-- !query 6 output
+1	gamma
+2	epsilon
+3	theta
+
+
+-- !query 7
+SELECT ID, STRUCT(ST.C, ST.D).D NST FROM tbl_x
+-- !query 7 schema
+struct<ID:int,NST:string>
+-- !query 7 output
+1	delta
+2	eta
+3	iota
+
+
+-- !query 8
+SELECT ID, STRUCT(ST.C as STC, ST.D as STD).STD FROM tbl_x
+-- !query 8 schema
+struct<ID:int,named_struct(STC, ST.C AS `C` AS `STC`, STD, ST.D AS `D` AS `STD`).STD:string>
+-- !query 8 output
+1	delta
+2	eta
+3	iota

From a2c7b2133cfee7fa9abfaa2bfbfb637155466783 Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Fri, 30 Jun 2017 15:54:34 -0700
Subject: [PATCH 1060/1204] Preparing Spark release v2.2.0-rc6

---
 R/pkg/DESCRIPTION                         | 2 +-
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 4 ++--
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 resource-managers/mesos/pom.xml           | 2 +-
 resource-managers/yarn/pom.xml            | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 38 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index cfa49b94c9526..879c1f80f2c5d 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 2.2.1
+Version: 2.2.0
 Title: R Frontend for Apache Spark
 Description: The SparkR package provides an R Frontend for Apache Spark.
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
diff --git a/assembly/pom.xml b/assembly/pom.xml
index da7b0c9d1b933..3a7003f5e94f5 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 7577253dd0390..5e9ffd13c61aa 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 558864ae4faab..c3e10d1f289e1 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index de66617d2fa27..e66a8b49de065 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 076d98af834da..1a1f6526ee8e1 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index e74d84a5b3b96..525ece5be4853 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 76783abe36a2c..e7c33264fcdbd 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index 254a9b9ac3184..6102f6f45a60c 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index b61455e3ef9c8..9d8e192cec9ee 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,8 +14,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.2.1-SNAPSHOT
-SPARK_VERSION_SHORT: 2.2.1
+SPARK_VERSION: 2.2.0
+SPARK_VERSION_SHORT: 2.2.0
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.8"
 MESOS_VERSION: 1.0.0
diff --git a/examples/pom.xml b/examples/pom.xml
index 0d001ee478c14..f3d751383c249 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 04afe28fb7885..3f93a33084348 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 47e03419d3df7..7e2d58f1d073a 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index f961a8f54d9a6..26418f9769a25 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index d8bc7dcf75248..58057616174eb 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index 6d46430d6e969..27b1bfcfa7dcc 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index 5d979ddf2f74c..6bcbb612fef77 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index e4336ecb07da7..2df99403840ee 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index 2489d29ebe160..0e93b75f67ca1 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index 98f81aee376a0..e17b960c9a5b8 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index 88515f853edbc..73852fc4c7656 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index 28797e3fe4328..2f761fbcda2d4 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 701455f226094..22fe1dca3343e 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 1ed38a794f44c..df69c5e58727a 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index a4bb50ce7dda0..d3cb2dce3fab5 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 16cce0a49653e..996763ad6c256 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index fec1be9099460..af032ed035f97 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index ccd8546a269c1..7a3be5baea16c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.2.1-SNAPSHOT</version>
+  <version>2.2.0</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index c0bb1968b4b99..e5ec547714d8c 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.2.1.dev0"
+__version__ = "2.2.0"
diff --git a/repl/pom.xml b/repl/pom.xml
index f3c49dfb00603..2a5d2f4354ecf 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index 547836050a610..f94ff4e925e08 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index e00ed33d2ba17..72f891f7c10bd 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 5ecee28a1f0b8..722e362943e26 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index c9ac366ed6e62..84c82f6b86ef8 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 0c344fa4975e8..ab5593da0d655 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 3dca866307232..f0ef6779a4742 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 604007c6feaa1..bed07015e4540 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index b2e8e469d197c..19b44577ca124 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.1-SNAPSHOT</version>
+    <version>2.2.0</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 85fddf406429dac00ddfb2e6c30870da450455bd Mon Sep 17 00:00:00 2001
From: Patrick Wendell <pwendell@gmail.com>
Date: Fri, 30 Jun 2017 15:54:39 -0700
Subject: [PATCH 1061/1204] Preparing development version 2.2.1-SNAPSHOT

---
 R/pkg/DESCRIPTION                         | 2 +-
 assembly/pom.xml                          | 2 +-
 common/network-common/pom.xml             | 2 +-
 common/network-shuffle/pom.xml            | 2 +-
 common/network-yarn/pom.xml               | 2 +-
 common/sketch/pom.xml                     | 2 +-
 common/tags/pom.xml                       | 2 +-
 common/unsafe/pom.xml                     | 2 +-
 core/pom.xml                              | 2 +-
 docs/_config.yml                          | 4 ++--
 examples/pom.xml                          | 2 +-
 external/docker-integration-tests/pom.xml | 2 +-
 external/flume-assembly/pom.xml           | 2 +-
 external/flume-sink/pom.xml               | 2 +-
 external/flume/pom.xml                    | 2 +-
 external/kafka-0-10-assembly/pom.xml      | 2 +-
 external/kafka-0-10-sql/pom.xml           | 2 +-
 external/kafka-0-10/pom.xml               | 2 +-
 external/kafka-0-8-assembly/pom.xml       | 2 +-
 external/kafka-0-8/pom.xml                | 2 +-
 external/kinesis-asl-assembly/pom.xml     | 2 +-
 external/kinesis-asl/pom.xml              | 2 +-
 external/spark-ganglia-lgpl/pom.xml       | 2 +-
 graphx/pom.xml                            | 2 +-
 launcher/pom.xml                          | 2 +-
 mllib-local/pom.xml                       | 2 +-
 mllib/pom.xml                             | 2 +-
 pom.xml                                   | 2 +-
 python/pyspark/version.py                 | 2 +-
 repl/pom.xml                              | 2 +-
 resource-managers/mesos/pom.xml           | 2 +-
 resource-managers/yarn/pom.xml            | 2 +-
 sql/catalyst/pom.xml                      | 2 +-
 sql/core/pom.xml                          | 2 +-
 sql/hive-thriftserver/pom.xml             | 2 +-
 sql/hive/pom.xml                          | 2 +-
 streaming/pom.xml                         | 2 +-
 tools/pom.xml                             | 2 +-
 38 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 879c1f80f2c5d..cfa49b94c9526 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 2.2.0
+Version: 2.2.1
 Title: R Frontend for Apache Spark
 Description: The SparkR package provides an R Frontend for Apache Spark.
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 3a7003f5e94f5..da7b0c9d1b933 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 5e9ffd13c61aa..7577253dd0390 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index c3e10d1f289e1..558864ae4faab 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index e66a8b49de065..de66617d2fa27 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 1a1f6526ee8e1..076d98af834da 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 525ece5be4853..e74d84a5b3b96 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index e7c33264fcdbd..76783abe36a2c 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index 6102f6f45a60c..254a9b9ac3184 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 9d8e192cec9ee..b61455e3ef9c8 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,8 +14,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 2.2.0
-SPARK_VERSION_SHORT: 2.2.0
+SPARK_VERSION: 2.2.1-SNAPSHOT
+SPARK_VERSION_SHORT: 2.2.1
 SCALA_BINARY_VERSION: "2.11"
 SCALA_VERSION: "2.11.8"
 MESOS_VERSION: 1.0.0
diff --git a/examples/pom.xml b/examples/pom.xml
index f3d751383c249..0d001ee478c14 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index 3f93a33084348..04afe28fb7885 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 7e2d58f1d073a..47e03419d3df7 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 26418f9769a25..f961a8f54d9a6 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 58057616174eb..d8bc7dcf75248 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index 27b1bfcfa7dcc..6d46430d6e969 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index 6bcbb612fef77..5d979ddf2f74c 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index 2df99403840ee..e4336ecb07da7 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index 0e93b75f67ca1..2489d29ebe160 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index e17b960c9a5b8..98f81aee376a0 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index 73852fc4c7656..88515f853edbc 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index 2f761fbcda2d4..28797e3fe4328 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 22fe1dca3343e..701455f226094 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index df69c5e58727a..1ed38a794f44c 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index d3cb2dce3fab5..a4bb50ce7dda0 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 996763ad6c256..16cce0a49653e 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index af032ed035f97..fec1be9099460 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 7a3be5baea16c..ccd8546a269c1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.11</artifactId>
-  <version>2.2.0</version>
+  <version>2.2.1-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index e5ec547714d8c..c0bb1968b4b99 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.2.0"
+__version__ = "2.2.1.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 2a5d2f4354ecf..f3c49dfb00603 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index f94ff4e925e08..547836050a610 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index 72f891f7c10bd..e00ed33d2ba17 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 722e362943e26..5ecee28a1f0b8 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 84c82f6b86ef8..c9ac366ed6e62 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index ab5593da0d655..0c344fa4975e8 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index f0ef6779a4742..3dca866307232 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index bed07015e4540..604007c6feaa1 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 19b44577ca124..b2e8e469d197c 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.11</artifactId>
-    <version>2.2.0</version>
+    <version>2.2.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 6fd39ea1c9dbf68763cb394a28d8a13c116341df Mon Sep 17 00:00:00 2001
From: Devaraj K <devaraj@apache.org>
Date: Sat, 1 Jul 2017 15:53:49 +0100
Subject: [PATCH 1062/1204] [SPARK-21170][CORE]
 Utils.tryWithSafeFinallyAndFailureCallbacks throws IllegalArgumentException:
 Self-suppression not permitted

## What changes were proposed in this pull request?

Not adding the exception to the suppressed if it is the same instance as originalThrowable.

## How was this patch tested?

Added new tests to verify this, these tests fail without source code changes and passes with the change.

Author: Devaraj K <devaraj@apache.org>

Closes #18384 from devaraj-kavali/SPARK-21170.

(cherry picked from commit 6beca9ce94f484de2f9ffb946bef8334781b3122)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../scala/org/apache/spark/util/Utils.scala   | 30 +++----
 .../org/apache/spark/util/UtilsSuite.scala    | 88 ++++++++++++++++++-
 2 files changed, 99 insertions(+), 19 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 67497bbba150e..999486c3b6761 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -1345,14 +1345,10 @@ private[spark] object Utils extends Logging {
       try {
         finallyBlock
       } catch {
-        case t: Throwable =>
-          if (originalThrowable != null) {
-            originalThrowable.addSuppressed(t)
-            logWarning(s"Suppressing exception in finally: " + t.getMessage, t)
-            throw originalThrowable
-          } else {
-            throw t
-          }
+        case t: Throwable if (originalThrowable != null && originalThrowable != t) =>
+          originalThrowable.addSuppressed(t)
+          logWarning(s"Suppressing exception in finally: ${t.getMessage}", t)
+          throw originalThrowable
       }
     }
   }
@@ -1384,22 +1380,20 @@ private[spark] object Utils extends Logging {
           catchBlock
         } catch {
           case t: Throwable =>
-            originalThrowable.addSuppressed(t)
-            logWarning(s"Suppressing exception in catch: " + t.getMessage, t)
+            if (originalThrowable != t) {
+              originalThrowable.addSuppressed(t)
+              logWarning(s"Suppressing exception in catch: ${t.getMessage}", t)
+            }
         }
         throw originalThrowable
     } finally {
       try {
         finallyBlock
       } catch {
-        case t: Throwable =>
-          if (originalThrowable != null) {
-            originalThrowable.addSuppressed(t)
-            logWarning(s"Suppressing exception in finally: " + t.getMessage, t)
-            throw originalThrowable
-          } else {
-            throw t
-          }
+        case t: Throwable if (originalThrowable != null && originalThrowable != t) =>
+          originalThrowable.addSuppressed(t)
+          logWarning(s"Suppressing exception in finally: ${t.getMessage}", t)
+          throw originalThrowable
       }
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 3339d5b35d3b2..d130a1d629987 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -38,7 +38,7 @@ import org.apache.commons.math3.stat.inference.ChiSquareTest
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.{SparkConf, SparkFunSuite, TaskContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.network.util.ByteUnit
 
@@ -1025,4 +1025,90 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
     assert(redactedConf("spark.sensitive.property") === Utils.REDACTION_REPLACEMENT_TEXT)
 
   }
+
+  test("tryWithSafeFinally") {
+    var e = new Error("Block0")
+    val finallyBlockError = new Error("Finally Block")
+    var isErrorOccurred = false
+    // if the try and finally blocks throw different exception instances
+    try {
+      Utils.tryWithSafeFinally { throw e }(finallyBlock = { throw finallyBlockError })
+    } catch {
+      case t: Error =>
+        assert(t.getSuppressed.head == finallyBlockError)
+        isErrorOccurred = true
+    }
+    assert(isErrorOccurred)
+    // if the try and finally blocks throw the same exception instance then it should not
+    // try to add to suppressed and get IllegalArgumentException
+    e = new Error("Block1")
+    isErrorOccurred = false
+    try {
+      Utils.tryWithSafeFinally { throw e }(finallyBlock = { throw e })
+    } catch {
+      case t: Error =>
+        assert(t.getSuppressed.length == 0)
+        isErrorOccurred = true
+    }
+    assert(isErrorOccurred)
+    // if the try throws the exception and finally doesn't throw exception
+    e = new Error("Block2")
+    isErrorOccurred = false
+    try {
+      Utils.tryWithSafeFinally { throw e }(finallyBlock = {})
+    } catch {
+      case t: Error =>
+        assert(t.getSuppressed.length == 0)
+        isErrorOccurred = true
+    }
+    assert(isErrorOccurred)
+    // if the try and finally block don't throw exception
+    Utils.tryWithSafeFinally {}(finallyBlock = {})
+  }
+
+  test("tryWithSafeFinallyAndFailureCallbacks") {
+    var e = new Error("Block0")
+    val catchBlockError = new Error("Catch Block")
+    val finallyBlockError = new Error("Finally Block")
+    var isErrorOccurred = false
+    TaskContext.setTaskContext(TaskContext.empty())
+    // if the try, catch and finally blocks throw different exception instances
+    try {
+      Utils.tryWithSafeFinallyAndFailureCallbacks { throw e }(
+        catchBlock = { throw catchBlockError }, finallyBlock = { throw finallyBlockError })
+    } catch {
+      case t: Error =>
+        assert(t.getSuppressed.head == catchBlockError)
+        assert(t.getSuppressed.last == finallyBlockError)
+        isErrorOccurred = true
+    }
+    assert(isErrorOccurred)
+    // if the try, catch and finally blocks throw the same exception instance then it should not
+    // try to add to suppressed and get IllegalArgumentException
+    e = new Error("Block1")
+    isErrorOccurred = false
+    try {
+      Utils.tryWithSafeFinallyAndFailureCallbacks { throw e }(catchBlock = { throw e },
+        finallyBlock = { throw e })
+    } catch {
+      case t: Error =>
+        assert(t.getSuppressed.length == 0)
+        isErrorOccurred = true
+    }
+    assert(isErrorOccurred)
+    // if the try throws the exception, catch and finally don't throw exceptions
+    e = new Error("Block2")
+    isErrorOccurred = false
+    try {
+      Utils.tryWithSafeFinallyAndFailureCallbacks { throw e }(catchBlock = {}, finallyBlock = {})
+    } catch {
+      case t: Error =>
+        assert(t.getSuppressed.length == 0)
+        isErrorOccurred = true
+    }
+    assert(isErrorOccurred)
+    // if the try, catch and finally blocks don't throw exceptions
+    Utils.tryWithSafeFinallyAndFailureCallbacks {}(catchBlock = {}, finallyBlock = {})
+    TaskContext.unset
+  }
 }

From db21b679343aba54b240e1d552cf7ec772109f22 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Tue, 4 Jul 2017 09:48:40 -0700
Subject: [PATCH 1063/1204] [SPARK-20256][SQL] SessionState should be created
 more lazily

## What changes were proposed in this pull request?

`SessionState` is designed to be created lazily. However, in reality, it created immediately in `SparkSession.Builder.getOrCreate` ([here](https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala#L943)).

This PR aims to recover the lazy behavior by keeping the options into `initialSessionOptions`. The benefit is like the following. Users can start `spark-shell` and use RDD operations without any problems.

**BEFORE**
```scala
$ bin/spark-shell
java.lang.IllegalArgumentException: Error while instantiating 'org.apache.spark.sql.hive.HiveSessionStateBuilder'
...
Caused by: org.apache.spark.sql.AnalysisException:
    org.apache.hadoop.hive.ql.metadata.HiveException:
       MetaException(message:java.security.AccessControlException:
          Permission denied: user=spark, access=READ,
             inode="/apps/hive/warehouse":hive:hdfs:drwx------
```
As reported in SPARK-20256, this happens when the warehouse directory is not allowed for this user.

**AFTER**
```scala
$ bin/spark-shell
...
Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/  '_/
   /___/ .__/\_,_/_/ /_/\_\   version 2.3.0-SNAPSHOT
      /_/

Using Scala version 2.11.8 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_112)
Type in expressions to have them evaluated.
Type :help for more information.

scala> sc.range(0, 10, 1).count()
res0: Long = 10
```

## How was this patch tested?

Manual.

This closes #18512 .

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #18501 from dongjoon-hyun/SPARK-20256.

(cherry picked from commit 1b50e0e0d6fd9d1b815a3bb37647ea659222e3f1)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../scala/org/apache/spark/sql/SparkSession.scala    | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index d2bf350711936..cce8a1ceaf726 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -112,6 +112,12 @@ class SparkSession private(
     existingSharedState.getOrElse(new SharedState(sparkContext))
   }
 
+  /**
+   * Initial options for session. This options are applied once when sessionState is created.
+   */
+  @transient
+  private[sql] val initialSessionOptions = new scala.collection.mutable.HashMap[String, String]
+
   /**
    * State isolated across sessions, including SQL configurations, temporary tables, registered
    * functions, and everything else that accepts a [[org.apache.spark.sql.internal.SQLConf]].
@@ -127,9 +133,11 @@ class SparkSession private(
     parentSessionState
       .map(_.clone(this))
       .getOrElse {
-        SparkSession.instantiateSessionState(
+        val state = SparkSession.instantiateSessionState(
           SparkSession.sessionStateClassName(sparkContext.conf),
           self)
+        initialSessionOptions.foreach { case (k, v) => state.conf.setConfString(k, v) }
+        state
       }
   }
 
@@ -935,7 +943,7 @@ object SparkSession {
         }
 
         session = new SparkSession(sparkContext, None, None, extensions)
-        options.foreach { case (k, v) => session.sessionState.conf.setConfString(k, v) }
+        options.foreach { case (k, v) => session.initialSessionOptions.put(k, v) }
         defaultSession.set(session)
 
         // Register a successfully instantiated context to the singleton. This should be at the

From 8f1ca695753ca4692c091791d4cc66b06e176d52 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Wed, 5 Jul 2017 10:29:37 +0800
Subject: [PATCH 1064/1204] [SPARK-20256][SQL][BRANCH-2.1] SessionState should
 be created more lazily

## What changes were proposed in this pull request?

`SessionState` is designed to be created lazily. However, in reality, it created immediately in `SparkSession.Builder.getOrCreate` ([here](https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala#L943)).

This PR aims to recover the lazy behavior by keeping the options into `initialSessionOptions`. The benefit is like the following. Users can start `spark-shell` and use RDD operations without any problems.

**BEFORE**
```scala
$ bin/spark-shell
java.lang.IllegalArgumentException: Error while instantiating 'org.apache.spark.sql.hive.HiveSessionStateBuilder'
...
Caused by: org.apache.spark.sql.AnalysisException:
    org.apache.hadoop.hive.ql.metadata.HiveException:
       MetaException(message:java.security.AccessControlException:
          Permission denied: user=spark, access=READ,
             inode="/apps/hive/warehouse":hive:hdfs:drwx------
```
As reported in SPARK-20256, this happens when the warehouse directory is not allowed for this user.

**AFTER**
```scala
$ bin/spark-shell
...
Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/  '_/
   /___/ .__/\_,_/_/ /_/\_\   version 2.1.2-SNAPSHOT
      /_/

Using Scala version 2.11.8 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_131)
Type in expressions to have them evaluated.
Type :help for more information.

scala> sc.range(0, 10, 1).count()
res0: Long = 10
```

## How was this patch tested?

Manual.

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #18530 from dongjoon-hyun/SPARK-20256-BRANCH-2.1.
---
 .../scala/org/apache/spark/sql/SparkSession.scala    | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index f3dde480eabe0..f8860691b692f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -101,15 +101,23 @@ class SparkSession private(
     existingSharedState.getOrElse(new SharedState(sparkContext))
   }
 
+  /**
+   * Initial options for session. This options are applied once when sessionState is created.
+   */
+  @transient
+  private[sql] val initialSessionOptions = new scala.collection.mutable.HashMap[String, String]
+
   /**
    * State isolated across sessions, including SQL configurations, temporary tables, registered
    * functions, and everything else that accepts a [[org.apache.spark.sql.internal.SQLConf]].
    */
   @transient
   private[sql] lazy val sessionState: SessionState = {
-    SparkSession.reflect[SessionState, SparkSession](
+    val state = SparkSession.reflect[SessionState, SparkSession](
       SparkSession.sessionStateClassName(sparkContext.conf),
       self)
+    initialSessionOptions.foreach { case (k, v) => state.conf.setConfString(k, v) }
+    state
   }
 
   /**
@@ -875,7 +883,7 @@ object SparkSession {
           sc
         }
         session = new SparkSession(sparkContext)
-        options.foreach { case (k, v) => session.sessionState.conf.setConfString(k, v) }
+        options.foreach { case (k, v) => session.initialSessionOptions.put(k, v) }
         defaultSession.set(session)
 
         // Register a successfully instantiated context to the singleton. This should be at the

From 770fd2a239798d3fa1cb4223d73cfc57413c0bb8 Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@databricks.com>
Date: Wed, 5 Jul 2017 11:24:38 +0800
Subject: [PATCH 1065/1204] [SPARK-21300][SQL] ExternalMapToCatalyst should
 null-check map key prior to converting to internal value.

## What changes were proposed in this pull request?

`ExternalMapToCatalyst` should null-check map key prior to converting to internal value to throw an appropriate Exception instead of something like NPE.

## How was this patch tested?

Added a test and existing tests.

Author: Takuya UESHIN <ueshin@databricks.com>

Closes #18524 from ueshin/issues/SPARK-21300.

(cherry picked from commit ce10545d3401c555e56a214b7c2f334274803660)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/JavaTypeInference.scala   |  1 +
 .../spark/sql/catalyst/ScalaReflection.scala     |  1 +
 .../catalyst/expressions/objects/objects.scala   | 16 +++++++++++++++-
 .../encoders/ExpressionEncoderSuite.scala        |  8 +++++++-
 4 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
index 86a73a319ec3f..2698faef76902 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
@@ -423,6 +423,7 @@ object JavaTypeInference {
             inputObject,
             ObjectType(keyType.getRawType),
             serializerFor(_, keyType),
+            keyNullable = true,
             ObjectType(valueType.getRawType),
             serializerFor(_, valueType),
             valueNullable = true
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index 6d1d019cc4743..c887634adf7bc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -511,6 +511,7 @@ object ScalaReflection extends ScalaReflection {
           inputObject,
           dataTypeFor(keyType),
           serializerFor(_, keyType, keyPath, seenTypeSet),
+          keyNullable = !keyType.typeSymbol.asClass.isPrimitive,
           dataTypeFor(valueType),
           serializerFor(_, valueType, valuePath, seenTypeSet),
           valueNullable = !valueType.typeSymbol.asClass.isPrimitive)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index bedc88e356c26..43cef6ca952c3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -659,18 +659,21 @@ object ExternalMapToCatalyst {
       inputMap: Expression,
       keyType: DataType,
       keyConverter: Expression => Expression,
+      keyNullable: Boolean,
       valueType: DataType,
       valueConverter: Expression => Expression,
       valueNullable: Boolean): ExternalMapToCatalyst = {
     val id = curId.getAndIncrement()
     val keyName = "ExternalMapToCatalyst_key" + id
+    val keyIsNull = "ExternalMapToCatalyst_key_isNull" + id
     val valueName = "ExternalMapToCatalyst_value" + id
     val valueIsNull = "ExternalMapToCatalyst_value_isNull" + id
 
     ExternalMapToCatalyst(
       keyName,
+      keyIsNull,
       keyType,
-      keyConverter(LambdaVariable(keyName, "false", keyType, false)),
+      keyConverter(LambdaVariable(keyName, keyIsNull, keyType, keyNullable)),
       valueName,
       valueIsNull,
       valueType,
@@ -686,6 +689,8 @@ object ExternalMapToCatalyst {
  *
  * @param key the name of the map key variable that used when iterate the map, and used as input for
  *            the `keyConverter`
+ * @param keyIsNull the nullability of the map key variable that used when iterate the map, and
+ *                  used as input for the `keyConverter`
  * @param keyType the data type of the map key variable that used when iterate the map, and used as
  *                input for the `keyConverter`
  * @param keyConverter A function that take the `key` as input, and converts it to catalyst format.
@@ -701,6 +706,7 @@ object ExternalMapToCatalyst {
  */
 case class ExternalMapToCatalyst private(
     key: String,
+    keyIsNull: String,
     keyType: DataType,
     keyConverter: Expression,
     value: String,
@@ -731,6 +737,7 @@ case class ExternalMapToCatalyst private(
 
     val keyElementJavaType = ctx.javaType(keyType)
     val valueElementJavaType = ctx.javaType(valueType)
+    ctx.addMutableState("boolean", keyIsNull, "")
     ctx.addMutableState(keyElementJavaType, key, "")
     ctx.addMutableState("boolean", valueIsNull, "")
     ctx.addMutableState(valueElementJavaType, value, "")
@@ -768,6 +775,12 @@ case class ExternalMapToCatalyst private(
         defineEntries -> defineKeyValue
     }
 
+    val keyNullCheck = if (ctx.isPrimitiveType(keyType)) {
+      s"$keyIsNull = false;"
+    } else {
+      s"$keyIsNull = $key == null;"
+    }
+
     val valueNullCheck = if (ctx.isPrimitiveType(valueType)) {
       s"$valueIsNull = false;"
     } else {
@@ -790,6 +803,7 @@ case class ExternalMapToCatalyst private(
           $defineEntries
           while($entries.hasNext()) {
             $defineKeyValue
+            $keyNullCheck
             $valueNullCheck
 
             ${genKeyConverter.code}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
index 080f11b769388..bb1955a1ae242 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
@@ -355,12 +355,18 @@ class ExpressionEncoderSuite extends PlanTest with AnalysisTest {
     checkNullable[String](true)
   }
 
-  test("null check for map key") {
+  test("null check for map key: String") {
     val encoder = ExpressionEncoder[Map[String, Int]]()
     val e = intercept[RuntimeException](encoder.toRow(Map(("a", 1), (null, 2))))
     assert(e.getMessage.contains("Cannot use null as map key"))
   }
 
+  test("null check for map key: Integer") {
+    val encoder = ExpressionEncoder[Map[Integer, String]]()
+    val e = intercept[RuntimeException](encoder.toRow(Map((1, "a"), (null, "b"))))
+    assert(e.getMessage.contains("Cannot use null as map key"))
+  }
+
   private def encodeDecodeTest[T : ExpressionEncoder](
       input: T,
       testName: String): Unit = {

From 6e1081cbeac58826526b6ff7f2938a556b31ca9e Mon Sep 17 00:00:00 2001
From: Sumedh Wale <swale@snappydata.io>
Date: Thu, 6 Jul 2017 14:47:22 +0800
Subject: [PATCH 1066/1204] [SPARK-21312][SQL] correct offsetInBytes in
 UnsafeRow.writeToStream

## What changes were proposed in this pull request?

Corrects offsetInBytes calculation in UnsafeRow.writeToStream. Known failures include writes to some DataSources that have own SparkPlan implementations and cause EXCHANGE in writes.

## How was this patch tested?

Extended UnsafeRowSuite.writeToStream to include an UnsafeRow over byte array having non-zero offset.

Author: Sumedh Wale <swale@snappydata.io>

Closes #18535 from sumwale/SPARK-21312.

(cherry picked from commit 14a3bb3a008c302aac908d7deaf0942a98c63be7)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/expressions/UnsafeRow.java   |  2 +-
 .../scala/org/apache/spark/sql/UnsafeRowSuite.scala | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
index 86de90984ca00..56994fafe064b 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
@@ -550,7 +550,7 @@ public void copyFrom(UnsafeRow row) {
    */
   public void writeToStream(OutputStream out, byte[] writeBuffer) throws IOException {
     if (baseObject instanceof byte[]) {
-      int offsetInByteArray = (int) (Platform.BYTE_ARRAY_OFFSET - baseOffset);
+      int offsetInByteArray = (int) (baseOffset - Platform.BYTE_ARRAY_OFFSET);
       out.write((byte[]) baseObject, offsetInByteArray, sizeInBytes);
     } else {
       int dataRemaining = sizeInBytes;
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala
index a32763db054f3..a5f904c621e6e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala
@@ -101,9 +101,22 @@ class UnsafeRowSuite extends SparkFunSuite {
         MemoryAllocator.UNSAFE.free(offheapRowPage)
       }
     }
+    val (bytesFromArrayBackedRowWithOffset, field0StringFromArrayBackedRowWithOffset) = {
+      val baos = new ByteArrayOutputStream()
+      val numBytes = arrayBackedUnsafeRow.getSizeInBytes
+      val bytesWithOffset = new Array[Byte](numBytes + 100)
+      System.arraycopy(arrayBackedUnsafeRow.getBaseObject.asInstanceOf[Array[Byte]], 0,
+        bytesWithOffset, 100, numBytes)
+      val arrayBackedRow = new UnsafeRow(arrayBackedUnsafeRow.numFields())
+      arrayBackedRow.pointTo(bytesWithOffset, Platform.BYTE_ARRAY_OFFSET + 100, numBytes)
+      arrayBackedRow.writeToStream(baos, null)
+      (baos.toByteArray, arrayBackedRow.getString(0))
+    }
 
     assert(bytesFromArrayBackedRow === bytesFromOffheapRow)
     assert(field0StringFromArrayBackedRow === field0StringFromOffheapRow)
+    assert(bytesFromArrayBackedRow === bytesFromArrayBackedRowWithOffset)
+    assert(field0StringFromArrayBackedRow === field0StringFromArrayBackedRowWithOffset)
   }
 
   test("calling getDouble() and getFloat() on null columns") {

From 7f7b63bb634c3b89db80cee99848ee94f9dca6ba Mon Sep 17 00:00:00 2001
From: Sumedh Wale <swale@snappydata.io>
Date: Thu, 6 Jul 2017 14:47:22 +0800
Subject: [PATCH 1067/1204] [SPARK-21312][SQL] correct offsetInBytes in
 UnsafeRow.writeToStream

## What changes were proposed in this pull request?

Corrects offsetInBytes calculation in UnsafeRow.writeToStream. Known failures include writes to some DataSources that have own SparkPlan implementations and cause EXCHANGE in writes.

## How was this patch tested?

Extended UnsafeRowSuite.writeToStream to include an UnsafeRow over byte array having non-zero offset.

Author: Sumedh Wale <swale@snappydata.io>

Closes #18535 from sumwale/SPARK-21312.

(cherry picked from commit 14a3bb3a008c302aac908d7deaf0942a98c63be7)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/expressions/UnsafeRow.java   |  2 +-
 .../scala/org/apache/spark/sql/UnsafeRowSuite.scala | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
index d205547698c5b..b8e938881a6d5 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
@@ -550,7 +550,7 @@ public void copyFrom(UnsafeRow row) {
    */
   public void writeToStream(OutputStream out, byte[] writeBuffer) throws IOException {
     if (baseObject instanceof byte[]) {
-      int offsetInByteArray = (int) (Platform.BYTE_ARRAY_OFFSET - baseOffset);
+      int offsetInByteArray = (int) (baseOffset - Platform.BYTE_ARRAY_OFFSET);
       out.write((byte[]) baseObject, offsetInByteArray, sizeInBytes);
     } else {
       int dataRemaining = sizeInBytes;
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala
index a32763db054f3..a5f904c621e6e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala
@@ -101,9 +101,22 @@ class UnsafeRowSuite extends SparkFunSuite {
         MemoryAllocator.UNSAFE.free(offheapRowPage)
       }
     }
+    val (bytesFromArrayBackedRowWithOffset, field0StringFromArrayBackedRowWithOffset) = {
+      val baos = new ByteArrayOutputStream()
+      val numBytes = arrayBackedUnsafeRow.getSizeInBytes
+      val bytesWithOffset = new Array[Byte](numBytes + 100)
+      System.arraycopy(arrayBackedUnsafeRow.getBaseObject.asInstanceOf[Array[Byte]], 0,
+        bytesWithOffset, 100, numBytes)
+      val arrayBackedRow = new UnsafeRow(arrayBackedUnsafeRow.numFields())
+      arrayBackedRow.pointTo(bytesWithOffset, Platform.BYTE_ARRAY_OFFSET + 100, numBytes)
+      arrayBackedRow.writeToStream(baos, null)
+      (baos.toByteArray, arrayBackedRow.getString(0))
+    }
 
     assert(bytesFromArrayBackedRow === bytesFromOffheapRow)
     assert(field0StringFromArrayBackedRow === field0StringFromOffheapRow)
+    assert(bytesFromArrayBackedRow === bytesFromArrayBackedRowWithOffset)
+    assert(field0StringFromArrayBackedRow === field0StringFromArrayBackedRowWithOffset)
   }
 
   test("calling getDouble() and getFloat() on null columns") {

From 4e53a4edd72e372583f243c660bbcc0572205716 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Thu, 6 Jul 2017 00:20:26 -0700
Subject: [PATCH 1068/1204] [SS][MINOR] Fix flaky test in
 DatastreamReaderWriterSuite. temp checkpoint dir should be deleted

## What changes were proposed in this pull request?

Stopping query while it is being initialized can throw interrupt exception, in which case temporary checkpoint directories will not be deleted, and the test will fail.

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #18442 from tdas/DatastreamReaderWriterSuite-fix.

(cherry picked from commit 60043f22458668ac7ecba94fa78953f23a6bdcec)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../spark/sql/streaming/test/DataStreamReaderWriterSuite.scala   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
index dc2506a48ad00..bae9d811f7790 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
@@ -641,6 +641,7 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
   test("temp checkpoint dir should be deleted if a query is stopped without errors") {
     import testImplicits._
     val query = MemoryStream[Int].toDS.writeStream.format("console").start()
+    query.processAllAvailable()
     val checkpointDir = new Path(
       query.asInstanceOf[StreamingQueryWrapper].streamingQuery.checkpointRoot)
     val fs = checkpointDir.getFileSystem(spark.sessionState.newHadoopConf())

From 576fd4c3a67b4affc5ac50979e27ae929472f0d9 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Thu, 6 Jul 2017 17:28:20 -0700
Subject: [PATCH 1069/1204] [SPARK-21267][SS][DOCS] Update Structured Streaming
 Documentation

## What changes were proposed in this pull request?

Few changes to the Structured Streaming documentation
- Clarify that the entire stream input table is not materialized
- Add information for Ganglia
- Add Kafka Sink to the main docs
- Removed a couple of leftover experimental tags
- Added more associated reading material and talk videos.

In addition, https://github.com/apache/spark/pull/16856 broke the link to the RDD programming guide in several places while renaming the page. This PR fixes those sameeragarwal cloud-fan.
- Added a redirection to avoid breaking internal and possible external links.
- Removed unnecessary redirection pages that were there since the separate scala, java, and python programming guides were merged together in 2013 or 2014.

## How was this patch tested?

(Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests)
(If this patch involves UI changes, please attach a screenshot; otherwise, remove this)

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #18485 from tdas/SPARK-21267.

(cherry picked from commit 0217dfd26f89133f146197359b556c9bf5aca172)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 docs/_layouts/global.html                     |   7 +-
 docs/index.md                                 |  13 +-
 docs/java-programming-guide.md                |   7 -
 docs/programming-guide.md                     |   7 +
 docs/python-programming-guide.md              |   7 -
 docs/rdd-programming-guide.md                 |   2 +-
 docs/scala-programming-guide.md               |   7 -
 docs/sql-programming-guide.md                 |  16 +-
 .../structured-streaming-programming-guide.md | 172 +++++++++++++++---
 .../scala/org/apache/spark/sql/Dataset.scala  |   3 -
 10 files changed, 169 insertions(+), 72 deletions(-)
 delete mode 100644 docs/java-programming-guide.md
 create mode 100644 docs/programming-guide.md
 delete mode 100644 docs/python-programming-guide.md
 delete mode 100644 docs/scala-programming-guide.md

diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html
index c00d0db63cd10..570483c0b04ea 100755
--- a/docs/_layouts/global.html
+++ b/docs/_layouts/global.html
@@ -69,11 +69,10 @@
                             <a href="#" class="dropdown-toggle" data-toggle="dropdown">Programming Guides<b class="caret"></b></a>
                             <ul class="dropdown-menu">
                                 <li><a href="quick-start.html">Quick Start</a></li>
-                                <li><a href="programming-guide.html">Spark Programming Guide</a></li>
-                                <li class="divider"></li>
-                                <li><a href="streaming-programming-guide.html">Spark Streaming</a></li>
-                                <li><a href="sql-programming-guide.html">DataFrames, Datasets and SQL</a></li>
+                                <li><a href="rdd-programming-guide.html">RDDs, Accumulators, Broadcasts Vars</a></li>
+                                <li><a href="sql-programming-guide.html">SQL, DataFrames, and Datasets</a></li>
                                 <li><a href="structured-streaming-programming-guide.html">Structured Streaming</a></li>
+                                <li><a href="streaming-programming-guide.html">Spark Streaming (DStreams)</a></li>
                                 <li><a href="ml-guide.html">MLlib (Machine Learning)</a></li>
                                 <li><a href="graphx-programming-guide.html">GraphX (Graph Processing)</a></li>
                                 <li><a href="sparkr.html">SparkR (R on Spark)</a></li>
diff --git a/docs/index.md b/docs/index.md
index a757fa0105114..51641c98b188e 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -88,13 +88,12 @@ options for deployment:
 **Programming Guides:**
 
 * [Quick Start](quick-start.html): a quick introduction to the Spark API; start here!
-* [Spark Programming Guide](programming-guide.html): detailed overview of Spark
-  in all supported languages (Scala, Java, Python, R)
-* Modules built on Spark:
-  * [Spark Streaming](streaming-programming-guide.html): processing real-time data streams
-  * [Spark SQL, Datasets, and DataFrames](sql-programming-guide.html): support for structured data and relational queries
-  * [MLlib](ml-guide.html): built-in machine learning library
-  * [GraphX](graphx-programming-guide.html): Spark's new API for graph processing
+* [RDD Programming Guide](programming-guide.html): overview of Spark basics - RDDs (core but old API), accumulators, and broadcast variables  
+* [Spark SQL, Datasets, and DataFrames](sql-programming-guide.html): processing structured data with relational queries (newer API than RDDs)
+* [Structured Streaming](structured-streaming-programming-guide.html): processing structured data streams with relation queries (using Datasets and DataFrames, newer API than DStreams)
+* [Spark Streaming](streaming-programming-guide.html): processing data streams using DStreams (old API)
+* [MLlib](ml-guide.html): applying machine learning algorithms
+* [GraphX](graphx-programming-guide.html): processing graphs 
 
 **API Docs:**
 
diff --git a/docs/java-programming-guide.md b/docs/java-programming-guide.md
deleted file mode 100644
index bb539582f64ec..0000000000000
--- a/docs/java-programming-guide.md
+++ /dev/null
@@ -1,7 +0,0 @@
----
-layout: global
-title: Java Programming Guide
-redirect: programming-guide.html
----
-
-This document has been merged into the [Spark programming guide](programming-guide.html).
diff --git a/docs/programming-guide.md b/docs/programming-guide.md
new file mode 100644
index 0000000000000..f8b8f74c53b55
--- /dev/null
+++ b/docs/programming-guide.md
@@ -0,0 +1,7 @@
+---
+layout: global
+title: Spark Programming Guide
+redirect: rdd-programming-guide.html
+---
+
+This document has moved [here](rdd-programming-guide.html).
diff --git a/docs/python-programming-guide.md b/docs/python-programming-guide.md
deleted file mode 100644
index 68f04b50aaa0d..0000000000000
--- a/docs/python-programming-guide.md
+++ /dev/null
@@ -1,7 +0,0 @@
----
-layout: global
-title: Python Programming Guide
-redirect: programming-guide.html
----
-
-This document has been merged into the [Spark programming guide](programming-guide.html).
diff --git a/docs/rdd-programming-guide.md b/docs/rdd-programming-guide.md
index f7cfd5f82070d..d021b7321e750 100644
--- a/docs/rdd-programming-guide.md
+++ b/docs/rdd-programming-guide.md
@@ -1,6 +1,6 @@
 ---
 layout: global
-title: Spark Programming Guide
+title: RDD Programming Guide
 description: Spark SPARK_VERSION_SHORT programming guide in Java, Scala and Python
 ---
 
diff --git a/docs/scala-programming-guide.md b/docs/scala-programming-guide.md
deleted file mode 100644
index 69ceb637dc8db..0000000000000
--- a/docs/scala-programming-guide.md
+++ /dev/null
@@ -1,7 +0,0 @@
----
-layout: global
-title: Spark Programming Guide
-redirect: programming-guide.html
----
-
-This document has moved [here](programming-guide.html).
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 8e722ae6adca6..b5eca76480eb8 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -392,41 +392,31 @@ While those functions are designed for DataFrames, Spark SQL also has type-safe
 Moreover, users are not limited to the predefined aggregate functions and can create their own.
 
 ### Untyped User-Defined Aggregate Functions
-
-<div class="codetabs">
-
-<div data-lang="scala"  markdown="1">
-
 Users have to extend the [UserDefinedAggregateFunction](api/scala/index.html#org.apache.spark.sql.expressions.UserDefinedAggregateFunction)
 abstract class to implement a custom untyped aggregate function. For example, a user-defined average
 can look like:
 
+<div class="codetabs">
+<div data-lang="scala"  markdown="1">
 {% include_example untyped_custom_aggregation scala/org/apache/spark/examples/sql/UserDefinedUntypedAggregation.scala%}
 </div>
-
 <div data-lang="java"  markdown="1">
-
 {% include_example untyped_custom_aggregation java/org/apache/spark/examples/sql/JavaUserDefinedUntypedAggregation.java%}
 </div>
-
 </div>
 
 ### Type-Safe User-Defined Aggregate Functions
 
 User-defined aggregations for strongly typed Datasets revolve around the [Aggregator](api/scala/index.html#org.apache.spark.sql.expressions.Aggregator) abstract class.
 For example, a type-safe user-defined average can look like:
-<div class="codetabs">
 
+<div class="codetabs">
 <div data-lang="scala"  markdown="1">
-
 {% include_example typed_custom_aggregation scala/org/apache/spark/examples/sql/UserDefinedTypedAggregation.scala%}
 </div>
-
 <div data-lang="java"  markdown="1">
-
 {% include_example typed_custom_aggregation java/org/apache/spark/examples/sql/JavaUserDefinedTypedAggregation.java%}
 </div>
-
 </div>
 
 # Data Sources
diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index d478042dea5c8..3bc377c9a38b5 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -15,7 +15,7 @@ In this guide, we are going to walk you through the programming model and the AP
 # Quick Example
 Let’s say you want to maintain a running word count of text data received from a data server listening on a TCP socket. Let’s see how you can express this using Structured Streaming. You can see the full code in
 [Scala]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCount.scala)/[Java]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredNetworkWordCount.java)/[Python]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/python/sql/streaming/structured_network_wordcount.py)/[R]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/r/streaming/structured_network_wordcount.R).
-And if you [download Spark](http://spark.apache.org/downloads.html), you can directly run the example. In any case, let’s walk through the example step-by-step and understand how it works. First, we have to import the necessary classes and create a local SparkSession, the starting point of all functionalities related to Spark.
+And if you [download Spark](http://spark.apache.org/downloads.html), you can directly [run the example](index.html#running-the-examples-and-shell). In any case, let’s walk through the example step-by-step and understand how it works. First, we have to import the necessary classes and create a local SparkSession, the starting point of all functionalities related to Spark.
 
 <div class="codetabs">
 <div data-lang="scala"  markdown="1">
@@ -450,7 +450,12 @@ running counts with the new data to compute updated counts, as shown below.
 
 ![Model](img/structured-streaming-example-model.png)
 
-This model is significantly different from many other stream processing 
+**Note that Structured Streaming does not materialize the entire table**. It reads the latest
+available data from the streaming data source, processes it incrementally to update the result,
+and then discards the source data. It only keeps around the minimal intermediate *state* data as
+required to update the result (e.g. intermediate counts in the earlier example).
+
+This model is significantly different from many other stream processing
 engines. Many streaming systems require the user to maintain running 
 aggregations themselves, thus having to reason about fault-tolerance, and 
 data consistency (at-least-once, or at-most-once, or exactly-once). In this 
@@ -486,11 +491,11 @@ Streaming DataFrames can be created through the `DataStreamReader` interface
 returned by `SparkSession.readStream()`. In [R](api/R/read.stream.html), with the `read.stream()` method. Similar to the read interface for creating static DataFrame, you can specify the details of the source – data format, schema, options, etc.
 
 #### Input Sources
-In Spark 2.0, there are a few built-in sources.
+There are a few built-in sources.
 
   - **File source** - Reads files written in a directory as a stream of data. Supported file formats are text, csv, json, parquet. See the docs of the DataStreamReader interface for a more up-to-date list, and supported options for each file format. Note that the files must be atomically placed in the given directory, which in most file systems, can be achieved by file move operations.
 
-  - **Kafka source** - Poll data from Kafka. It's compatible with Kafka broker versions 0.10.0 or higher. See the [Kafka Integration Guide](structured-streaming-kafka-integration.html) for more details.
+  - **Kafka source** - Reads data from Kafka. It's compatible with Kafka broker versions 0.10.0 or higher. See the [Kafka Integration Guide](structured-streaming-kafka-integration.html) for more details.
 
   - **Socket source (for testing)** - Reads UTF8 text data from a socket connection. The listening server socket is at the driver. Note that this should be used only for testing as this does not provide end-to-end fault-tolerance guarantees. 
 
@@ -517,17 +522,18 @@ Here are the details of all the sources in Spark.
         <br/>
         <code>fileNameOnly</code>: whether to check new files based on only the filename instead of on the full path (default: false). With this set to `true`, the following files would be considered as the same file, because their filenames, "dataset.txt", are the same:
         <br/>
-        · "file:///dataset.txt"<br/>
-        · "s3://a/dataset.txt"<br/>
-        · "s3n://a/b/dataset.txt"<br/>
-        · "s3a://a/b/c/dataset.txt"<br/>
-        <br/>
-
-        <br/>
+        "file:///dataset.txt"<br/>
+        "s3://a/dataset.txt"<br/>
+        "s3n://a/b/dataset.txt"<br/>
+        "s3a://a/b/c/dataset.txt"<br/>
+        <br/><br/>
         For file-format-specific options, see the related methods in <code>DataStreamReader</code>
         (<a href="api/scala/index.html#org.apache.spark.sql.streaming.DataStreamReader">Scala</a>/<a href="api/java/org/apache/spark/sql/streaming/DataStreamReader.html">Java</a>/<a href="api/python/pyspark.sql.html#pyspark.sql.streaming.DataStreamReader">Python</a>/<a
         href="api/R/read.stream.html">R</a>).
-        E.g. for "parquet" format options see <code>DataStreamReader.parquet()</code></td>
+        E.g. for "parquet" format options see <code>DataStreamReader.parquet()</code>.
+        <br/><br/>
+        In addition, there are session configurations that affect certain file-formats. See the <a href="sql-programming-guide.html">SQL Programming Guide</a> for more details. E.g., for "parquet", see <a href="sql-programming-guide.html#configuration">Parquet configuration</a> section.
+        </td>
     <td>Yes</td>
     <td>Supports glob paths, but does not support multiple comma-separated paths/globs.</td>
   </tr>
@@ -758,6 +764,60 @@ count(groupBy(df, "deviceType"))
 </div>
 </div>
 
+You can also register a streaming DataFrame/Dataset as a temporary view and then apply SQL commands on it.
+
+<div class="codetabs">
+<div data-lang="scala"  markdown="1">
+{% highlight scala %}
+df.createOrReplaceTempView("updates")
+spark.sql("select count(*) from updates")  // returns another streaming DF
+{% endhighlight %}
+</div>
+<div data-lang="java"  markdown="1">  
+{% highlight java %}
+df.createOrReplaceTempView("updates");
+spark.sql("select count(*) from updates");  // returns another streaming DF
+{% endhighlight %}
+</div>
+<div data-lang="python"  markdown="1">  
+{% highlight python %}
+df.createOrReplaceTempView("updates")
+spark.sql("select count(*) from updates")  # returns another streaming DF
+{% endhighlight %}
+</div>
+<div data-lang="r"  markdown="1">
+{% highlight r %}
+createOrReplaceTempView(df, "updates")
+sql("select count(*) from updates")
+{% endhighlight %}
+</div>
+</div>
+
+Note, you can identify whether a DataFrame/Dataset has streaming data or not by using `df.isStreaming`.
+
+<div class="codetabs">
+<div data-lang="scala"  markdown="1">
+{% highlight scala %}
+df.isStreaming
+{% endhighlight %}
+</div>
+<div data-lang="java"  markdown="1">
+{% highlight java %}
+df.isStreaming()
+{% endhighlight %}
+</div>
+<div data-lang="python"  markdown="1">
+{% highlight python %}
+df.isStreaming()
+{% endhighlight %}
+</div>
+<div data-lang="r"  markdown="1">
+{% highlight bash %}
+Not available.
+{% endhighlight %}
+</div>
+</div>
+
 ### Window Operations on Event Time
 Aggregations over a sliding event-time window are straightforward with Structured Streaming and are very similar to grouped aggregations. In a grouped aggregation, aggregate values (e.g. counts) are maintained for each unique value in the user-specified grouping column. In case of window-based aggregations, aggregate values are maintained for each window the event-time of a row falls into. Let's understand this with an illustration. 
 
@@ -1043,7 +1103,7 @@ streamingDf \
 </div>
 
 ### Arbitrary Stateful Operations
-Many uscases require more advanced stateful operations than aggregations. For example, in many usecases, you have to track sessions from data streams of events. For doing such sessionization, you will have to save arbitrary types of data as state, and perform arbitrary operations on the state using the data stream events in every trigger. Since Spark 2.2, this can be done using the operation `mapGroupsWithState` and the more powerful operation `flatMapGroupsWithState`. Both operations allow you to apply user-defined code on grouped Datasets to update user-defined state. For more concrete details, take a look at the API documentation ([Scala](api/scala/index.html#org.apache.spark.sql.streaming.GroupState)/[Java](api/java/org/apache/spark/sql/streaming/GroupState.html)) and the examples ([Scala]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredSessionization.scala)/[Java]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredSessionization.java)). 
+Many usecases require more advanced stateful operations than aggregations. For example, in many usecases, you have to track sessions from data streams of events. For doing such sessionization, you will have to save arbitrary types of data as state, and perform arbitrary operations on the state using the data stream events in every trigger. Since Spark 2.2, this can be done using the operation `mapGroupsWithState` and the more powerful operation `flatMapGroupsWithState`. Both operations allow you to apply user-defined code on grouped Datasets to update user-defined state. For more concrete details, take a look at the API documentation ([Scala](api/scala/index.html#org.apache.spark.sql.streaming.GroupState)/[Java](api/java/org/apache/spark/sql/streaming/GroupState.html)) and the examples ([Scala]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredSessionization.scala)/[Java]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredSessionization.java)).
 
 ### Unsupported Operations
 There are a few DataFrame/Dataset operations that are not supported with streaming DataFrames/Datasets. 
@@ -1201,6 +1261,16 @@ writeStream
     .start()
 {% endhighlight %}
 
+- **Kafka sink** - Stores the output to one or more topics in Kafka.
+
+{% highlight scala %}
+writeStream
+    .format("kafka")
+    .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+    .option("topic", "updates")
+    .start()
+{% endhighlight %}
+
 - **Foreach sink** - Runs arbitrary computation on the records in the output. See later in the section for more details.
 
 {% highlight scala %}
@@ -1253,12 +1323,19 @@ Here are the details of all the sinks in Spark.
         href="api/R/write.stream.html">R</a>).
         E.g. for "parquet" format options see <code>DataFrameWriter.parquet()</code>
     </td>
-    <td>Yes</td>
+    <td>Yes (exactly-once)</td>
     <td>Supports writes to partitioned tables. Partitioning by time may be useful.</td>
   </tr>
+  <tr>
+    <td><b>Kafka Sink</b></td>
+    <td>Append, Update, Complete</td>
+    <td>See the <a href="structured-streaming-kafka-integration.html">Kafka Integration Guide</a></td>
+    <td>Yes (at-least-once)</td>
+    <td>More details in the <a href="structured-streaming-kafka-integration.html">Kafka Integration Guide</a></td>
+  </tr>
   <tr>
     <td><b>Foreach Sink</b></td>
-    <td>Append, Update, Compelete</td>
+    <td>Append, Update, Complete</td>
     <td>None</td>
     <td>Depends on ForeachWriter implementation</td>
     <td>More details in the <a href="#using-foreach">next section</a></td>
@@ -1624,10 +1701,9 @@ Not available in R.
 
 
 ## Monitoring Streaming Queries
-There are two APIs for monitoring and debugging active queries - 
-interactively and asynchronously.
+There are multiple ways to monitor active streaming queries. You can either push metrics to external systems using Spark's Dropwizard Metrics support, or access them programmatically.
 
-### Interactive APIs
+### Reading Metrics Interactively
 
 You can directly get the current status and metrics of an active query using 
 `streamingQuery.lastProgress()` and `streamingQuery.status()`. 
@@ -1857,7 +1933,7 @@ Will print something like the following.
 </div>
 </div>
 
-### Asynchronous API
+### Reporting Metrics programmatically using Asynchronous APIs
 
 You can also asynchronously monitor all queries associated with a
 `SparkSession` by attaching a `StreamingQueryListener`
@@ -1922,6 +1998,41 @@ Not available in R.
 </div>
 </div>
 
+### Reporting Metrics using Dropwizard 
+Spark supports reporting metrics using the [Dropwizard Library](monitoring.html#metrics). To enable metrics of Structured Streaming queries to be reported as well, you have to explicitly enable the configuration `spark.sql.streaming.metricsEnabled` in the SparkSession. 
+
+<div class="codetabs">
+<div data-lang="scala"  markdown="1">
+{% highlight scala %}
+spark.conf.set("spark.sql.streaming.metricsEnabled", "true")
+// or
+spark.sql("SET spark.sql.streaming.metricsEnabled=true")
+{% endhighlight %}
+</div>
+<div data-lang="java"  markdown="1">  
+{% highlight java %}
+spark.conf().set("spark.sql.streaming.metricsEnabled", "true");
+// or
+spark.sql("SET spark.sql.streaming.metricsEnabled=true");
+{% endhighlight %}
+</div>
+<div data-lang="python"  markdown="1">  
+{% highlight python %}
+spark.conf.set("spark.sql.streaming.metricsEnabled", "true")
+# or
+spark.sql("SET spark.sql.streaming.metricsEnabled=true")
+{% endhighlight %}
+</div>
+<div data-lang="r"  markdown="1">
+{% highlight r %}
+sql("SET spark.sql.streaming.metricsEnabled=true")
+{% endhighlight %}
+</div>
+</div>
+
+
+All queries started in the SparkSession after this configuration has been enabled will report metrics through Dropwizard to whatever [sinks](monitoring.html#metrics) have been configured (e.g. Ganglia, Graphite, JMX, etc.).
+
 ## Recovering from Failures with Checkpointing 
 In case of a failure or intentional shutdown, you can recover the previous progress and state of a previous query, and continue where it left off. This is done using checkpointing and write ahead logs. You can configure a query with a checkpoint location, and the query will save all the progress information (i.e. range of offsets processed in each trigger) and the running aggregates (e.g. word counts in the [quick example](#quick-example)) to the checkpoint location. This checkpoint location has to be a path in an HDFS compatible file system, and can be set as an option in the DataStreamWriter when [starting a query](#starting-streaming-queries).
 
@@ -1971,8 +2082,23 @@ write.stream(aggDF, "memory", outputMode = "complete", checkpointLocation = "pat
 </div>
 </div>
 
-# Where to go from here
-- Examples: See and run the
-[Scala]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/scala/org/apache/spark/examples/sql/streaming)/[Java]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/java/org/apache/spark/examples/sql/streaming)/[Python]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/python/sql/streaming)/[R]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/r/streaming)
-examples.
+# Additional Information
+
+**Further Reading**
+
+- See and run the
+  [Scala]({{site.SPARK_GITHUB_URL}}/tree/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/scala/org/apache/spark/examples/sql/streaming)/[Java]({{site.SPARK_GITHUB_URL}}/tree/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/java/org/apache/spark/examples/sql/streaming)/[Python]({{site.SPARK_GITHUB_URL}}/tree/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/python/sql/streaming)/[R]({{site.SPARK_GITHUB_URL}}/tree/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/r/streaming)
+  examples.
+    - [Instructions](index.html#running-the-examples-and-shell) on how to run Spark examples
+- Read about integrating with Kafka in the [Structured Streaming Kafka Integration Guide](structured-streaming-kafka-integration.html)
+- Read more details about using DataFrames/Datasets in the [Spark SQL Programming Guide](sql-programming-guide.html)
+- Third-party Blog Posts
+    - [Real-time Streaming ETL with Structured Streaming in Apache Spark 2.1 (Databricks Blog)](https://databricks.com/blog/2017/01/19/real-time-streaming-etl-structured-streaming-apache-spark-2-1.html)
+    - [Real-Time End-to-End Integration with Apache Kafka in Apache Spark’s Structured Streaming (Databricks Blog)](https://databricks.com/blog/2017/04/04/real-time-end-to-end-integration-with-apache-kafka-in-apache-sparks-structured-streaming.html)
+    - [Event-time Aggregation and Watermarking in Apache Spark’s Structured Streaming (Databricks Blog)](https://databricks.com/blog/2017/05/08/event-time-aggregation-watermarking-apache-sparks-structured-streaming.html)
+
+**Talks**
+
+- Spark Summit 2017 Talk - [Easy, Scalable, Fault-tolerant Stream Processing with Structured Streaming in Apache Spark](https://spark-summit.org/2017/events/easy-scalable-fault-tolerant-stream-processing-with-structured-streaming-in-apache-spark/)
 - Spark Summit 2016 Talk - [A Deep Dive into Structured Streaming](https://spark-summit.org/2016/events/a-deep-dive-into-structured-streaming/)
+
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 36588904244c1..65ce77f1cefee 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -484,7 +484,6 @@ class Dataset[T] private[sql](
    * @group streaming
    * @since 2.0.0
    */
-  @Experimental
   @InterfaceStability.Evolving
   def isStreaming: Boolean = logicalPlan.isStreaming
 
@@ -545,7 +544,6 @@ class Dataset[T] private[sql](
   }
 
   /**
-   * :: Experimental ::
    * Defines an event time watermark for this [[Dataset]]. A watermark tracks a point in time
    * before which we assume no more late data is going to arrive.
    *
@@ -569,7 +567,6 @@ class Dataset[T] private[sql](
    * @group streaming
    * @since 2.1.0
    */
-  @Experimental
   @InterfaceStability.Evolving
   // We only accept an existing column name, not a derived column here as a watermark that is
   // defined on a derived column cannot referenced elsewhere in the plan.

From 6e3396598bb8afb48660c620189b1ed9159fecbc Mon Sep 17 00:00:00 2001
From: Mark Hamstra <markhamstra@gmail.com>
Date: Fri, 7 Jul 2017 13:38:33 -0700
Subject: [PATCH 1070/1204] corrected String/Path refactoring of
 ParquetLocationSelection

---
 .../spark/sql/hive/HiveMetastoreCatalog.scala | 20 +++--
 .../sql/hive/HiveMetastoreCatalogSuite.scala  | 87 +++++++++++++++++++
 2 files changed, 98 insertions(+), 9 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 6a63785747ff2..638eb37a8c596 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -274,9 +274,10 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
         logicalRelation
       })
     } else {
-      val rootPath = metastoreRelation.hiveQlTable.getDataLocation
-      val paths: Seq[Path] = if (fileType != "parquet") { Seq(rootPath) } else {
+      val rootPath = Option(metastoreRelation.hiveQlTable.getDataLocation.toString)
+      val paths: Seq[Path] = if (fileType != "parquet") { Seq(new Path(rootPath.orNull)) } else {
         selectParquetLocationDirectories(metastoreRelation.tableName, rootPath)
+          .map{ s => new Path(s) }
       }
       withTableCreationLock(tableIdentifier, {
         val cached = getCached(tableIdentifier,
@@ -321,22 +322,23 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
    */
   private[hive] def selectParquetLocationDirectories(
       tableName: String,
-      location: Path): Seq[Path] = {
+      locationOpt: Option[String]): Seq[String] = {
 
-    val inputPaths: Option[Seq[Path]] = for {
+    val inputPaths: Option[Seq[String]] = for {
       selector <- sparkSession.sharedState.externalCatalog.findHadoopFileSelector
-      newLocation = new Path(location.toString)
-      fs = newLocation.getFileSystem(sparkSession.sparkContext.hadoopConfiguration)
-      selectedPaths <- selector.selectFiles(tableName, fs, newLocation)
+      l <- locationOpt
+      location = new Path(l)
+      fs = location.getFileSystem(sparkSession.sparkContext.hadoopConfiguration)
+      selectedPaths <- selector.selectFiles(tableName, fs, location)
       selectedDir = for {
         selectedPath <- selectedPaths
         if selectedPath
           .getFileSystem(sparkSession.sparkContext.hadoopConfiguration)
           .isDirectory(selectedPath)
-      } yield selectedPath
+      } yield selectedPath.toString
       if selectedDir.nonEmpty
     } yield selectedDir
-    inputPaths.getOrElse(Seq(location))
+    inputPaths.getOrElse(Seq(locationOpt.orNull))
   }
 
   private def inferIfNeeded(
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
index 0a280b495215c..a22244fcc01d6 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
@@ -17,6 +17,10 @@
 
 package org.apache.spark.sql.hive
 
+import java.io.File
+
+import org.apache.hadoop.fs.{FileSystem, Path}
+
 import org.apache.spark.sql.{QueryTest, Row, SaveMode}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTableType
@@ -184,3 +188,86 @@ class DataSourceWithHiveMetastoreCatalogSuite
     }
   }
 }
+
+class ParquetLocationSelectionSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
+  import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.HadoopFileSelector
+  import org.apache.spark.sql.hive.test.TestHive
+  private val hmc = new HiveMetastoreCatalog(spark)
+  // ensuring temp directories
+  private val baseDir = {
+    val base =
+      File.createTempFile(
+        "selectParquetLocationDirectories",
+        "1",
+        TestHive.sparkSession.hiveFilesTemp)
+    base.delete()
+    base.mkdirs()
+    base
+  }
+
+  test(s"With Selector selecting from ${baseDir.toString}") {
+    val fullpath = { (somewhere: String, sometable: String) =>
+      s"${baseDir.toString}/$somewhere/$sometable"
+    }
+    spark.sharedState.externalCatalog.setHadoopFileSelector(new HadoopFileSelector() {
+      override def selectFiles(
+          sometable: String,
+          fs: FileSystem,
+          somewhere: Path): Option[Seq[Path]] = {
+        Some(Seq(new Path(fullpath(somewhere.toString, sometable))))
+      }
+    })
+
+    // ensure directory existence for somewhere/sometable
+    val somewhereSometable = new File(fullpath("somewhere", "sometable"))
+    somewhereSometable.mkdirs()
+    // somewhere/sometable is a directory => will be selected
+    assertResult(Seq(fullpath("somewhere", "sometable"))) {
+      hmc.selectParquetLocationDirectories("sometable", Option("somewhere"))
+    }
+
+    // ensure file existence for somewhere/sometable
+    somewhereSometable.delete()
+    somewhereSometable.createNewFile()
+    // somewhere/sometable is a file => will not be selected
+    assertResult(Seq("somewhere")) {
+      hmc.selectParquetLocationDirectories("otherplace", Option("somewhere"))
+    }
+
+    // no location specified, none selected
+    assertResult(Seq(null)) {
+      hmc.selectParquetLocationDirectories("sometable", Option(null))
+    }
+  }
+
+  test("With Selector selecting None") {
+    spark.sharedState.externalCatalog.setHadoopFileSelector(new HadoopFileSelector() {
+      override def selectFiles(
+          tableName: String,
+          fs: FileSystem,
+          basePath: Path): Option[Seq[Path]] = None
+    })
+
+    // none selected
+    assertResult(Seq("somewhere")) {
+      hmc.selectParquetLocationDirectories("sometable", Option("somewhere"))
+    }
+    // none selected
+    assertResult(Seq(null)) {
+      hmc.selectParquetLocationDirectories("sometable", Option(null))
+    }
+  }
+
+  test("Without Selector") {
+    spark.sharedState.externalCatalog.unsetHadoopFileSelector()
+
+    // none selected
+    assertResult(Seq("somewhere")) {
+      hmc.selectParquetLocationDirectories("sometable", Option("somewhere"))
+    }
+    // none selected
+    assertResult(Seq(null)) {
+      hmc.selectParquetLocationDirectories("sometable", Option(null))
+    }
+  }
+}

From ab12848d624f6b74d401e924255c0b4fcc535231 Mon Sep 17 00:00:00 2001
From: Prashant Sharma <prashant@apache.org>
Date: Fri, 7 Jul 2017 23:33:12 -0700
Subject: [PATCH 1071/1204] [SPARK-21069][SS][DOCS] Add rate source to
 programming guide.

## What changes were proposed in this pull request?

SPARK-20979 added a new structured streaming source: Rate source. This patch adds the corresponding documentation to programming guide.

## How was this patch tested?

Tested by running jekyll locally.

Author: Prashant Sharma <prashant@apache.org>
Author: Prashant Sharma <prashsh1@in.ibm.com>

Closes #18562 from ScrapCodes/spark-21069/rate-source-docs.

(cherry picked from commit d0bfc6733521709e453d643582df2bdd68f28de7)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 docs/structured-streaming-programming-guide.md | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index 3bc377c9a38b5..8f64faadc32dc 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -499,6 +499,8 @@ There are a few built-in sources.
 
   - **Socket source (for testing)** - Reads UTF8 text data from a socket connection. The listening server socket is at the driver. Note that this should be used only for testing as this does not provide end-to-end fault-tolerance guarantees. 
 
+  - **Rate source (for testing)** - Generates data at the specified number of rows per second, each output row contains a `timestamp` and `value`. Where `timestamp` is a `Timestamp` type containing the time of message dispatch, and `value` is of `Long` type containing the message count, starting from 0 as the first row. This source is intended for testing and benchmarking.
+
 Some sources are not fault-tolerant because they do not guarantee that data can be replayed using 
 checkpointed offsets after a failure. See the earlier section on 
 [fault-tolerance semantics](#fault-tolerance-semantics).
@@ -546,6 +548,19 @@ Here are the details of all the sources in Spark.
     <td>No</td>
     <td></td>
   </tr>
+  <tr>
+    <td><b>Rate Source</b></td>
+    <td>
+        <code>rowsPerSecond</code> (e.g. 100, default: 1): How many rows should be generated per second.<br/><br/>
+        <code>rampUpTime</code> (e.g. 5s, default: 0s): How long to ramp up before the generating speed becomes <code>rowsPerSecond</code>. Using finer granularities than seconds will be truncated to integer seconds. <br/><br/>
+        <code>numPartitions</code> (e.g. 10, default: Spark's default parallelism): The partition number for the generated rows. <br/><br/>
+        
+        The source will try its best to reach <code>rowsPerSecond</code>, but the query may be resource constrained, and <code>numPartitions</code> can be tweaked to help reach the desired speed.
+    </td>
+    <td>Yes</td>
+    <td></td>
+  </tr>
+
   <tr>
     <td><b>Kafka Source</b></td>
     <td>

From 7d0b1c927d92cc2a4932262514ffd12c47593b80 Mon Sep 17 00:00:00 2001
From: Bogdan Raducanu <bogdan@databricks.com>
Date: Sat, 8 Jul 2017 20:14:59 +0800
Subject: [PATCH 1072/1204] [SPARK-21228][SQL][BRANCH-2.2] InSet incorrect
 handling of structs

## What changes were proposed in this pull request?

This is backport of https://github.com/apache/spark/pull/18455
When data type is struct, InSet now uses TypeUtils.getInterpretedOrdering (similar to EqualTo) to build a TreeSet. In other cases it will use a HashSet as before (which should be faster). Similarly, In.eval uses Ordering.equiv instead of equals.

## How was this patch tested?
New test in SQLQuerySuite.

Author: Bogdan Raducanu <bogdan@databricks.com>

Closes #18563 from bogdanrdc/SPARK-21228-BRANCH2.2.
---
 .../sql/catalyst/expressions/predicates.scala | 57 ++++++++++++-------
 .../catalyst/expressions/PredicateSuite.scala | 31 +++++-----
 .../catalyst/optimizer/OptimizeInSuite.scala  |  2 +-
 .../org/apache/spark/sql/SQLQuerySuite.scala  | 22 +++++++
 4 files changed, 78 insertions(+), 34 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index c15ee2ab270bc..358d0e7099484 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -17,10 +17,11 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import scala.collection.immutable.TreeSet
+
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
-import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
-import org.apache.spark.sql.catalyst.expressions.codegen.{Predicate => BasePredicate}
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode, GenerateSafeProjection, GenerateUnsafeProjection, Predicate => BasePredicate}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util.TypeUtils
 import org.apache.spark.sql.types._
@@ -164,19 +165,22 @@ case class In(value: Expression, list: Seq[Expression]) extends Predicate {
                |[${sub.output.map(_.dataType.catalogString).mkString(", ")}].
              """.stripMargin)
         } else {
-          TypeCheckResult.TypeCheckSuccess
+          TypeUtils.checkForOrderingExpr(value.dataType, s"function $prettyName")
         }
       case _ =>
-        if (list.exists(l => l.dataType != value.dataType)) {
-          TypeCheckResult.TypeCheckFailure("Arguments must be same type")
+        val mismatchOpt = list.find(l => l.dataType != value.dataType)
+        if (mismatchOpt.isDefined) {
+          TypeCheckResult.TypeCheckFailure(s"Arguments must be same type but were: " +
+            s"${value.dataType} != ${mismatchOpt.get.dataType}")
         } else {
-          TypeCheckResult.TypeCheckSuccess
+          TypeUtils.checkForOrderingExpr(value.dataType, s"function $prettyName")
         }
     }
   }
 
   override def children: Seq[Expression] = value +: list
   lazy val inSetConvertible = list.forall(_.isInstanceOf[Literal])
+  private lazy val ordering = TypeUtils.getInterpretedOrdering(value.dataType)
 
   override def nullable: Boolean = children.exists(_.nullable)
   override def foldable: Boolean = children.forall(_.foldable)
@@ -191,10 +195,10 @@ case class In(value: Expression, list: Seq[Expression]) extends Predicate {
       var hasNull = false
       list.foreach { e =>
         val v = e.eval(input)
-        if (v == evaluatedValue) {
-          return true
-        } else if (v == null) {
+        if (v == null) {
           hasNull = true
+        } else if (ordering.equiv(v, evaluatedValue)) {
+          return true
         }
       }
       if (hasNull) {
@@ -253,7 +257,7 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with
   override def nullable: Boolean = child.nullable || hasNull
 
   protected override def nullSafeEval(value: Any): Any = {
-    if (hset.contains(value)) {
+    if (set.contains(value)) {
       true
     } else if (hasNull) {
       null
@@ -262,27 +266,40 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with
     }
   }
 
-  def getHSet(): Set[Any] = hset
+  @transient private[this] lazy val set = child.dataType match {
+    case _: AtomicType => hset
+    case _: NullType => hset
+    case _ =>
+      // for structs use interpreted ordering to be able to compare UnsafeRows with non-UnsafeRows
+      TreeSet.empty(TypeUtils.getInterpretedOrdering(child.dataType)) ++ hset
+  }
+
+  def getSet(): Set[Any] = set
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val setName = classOf[Set[Any]].getName
     val InSetName = classOf[InSet].getName
     val childGen = child.genCode(ctx)
     ctx.references += this
-    val hsetTerm = ctx.freshName("hset")
-    val hasNullTerm = ctx.freshName("hasNull")
-    ctx.addMutableState(setName, hsetTerm,
-      s"$hsetTerm = (($InSetName)references[${ctx.references.size - 1}]).getHSet();")
-    ctx.addMutableState("boolean", hasNullTerm, s"$hasNullTerm = $hsetTerm.contains(null);")
+    val setTerm = ctx.freshName("set")
+    val setNull = if (hasNull) {
+      s"""
+         |if (!${ev.value}) {
+         |  ${ev.isNull} = true;
+         |}
+       """.stripMargin
+    } else {
+      ""
+    }
+    ctx.addMutableState(setName, setTerm,
+      s"$setTerm = (($InSetName)references[${ctx.references.size - 1}]).getSet();")
     ev.copy(code = s"""
       ${childGen.code}
       boolean ${ev.isNull} = ${childGen.isNull};
       boolean ${ev.value} = false;
       if (!${ev.isNull}) {
-        ${ev.value} = $hsetTerm.contains(${childGen.value});
-        if (!${ev.value} && $hasNullTerm) {
-          ${ev.isNull} = true;
-        }
+        ${ev.value} = $setTerm.contains(${childGen.value});
+        $setNull
       }
      """)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
index 6fe295c3dd936..ef510a95ef446 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
@@ -35,7 +35,8 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
     test(s"3VL $name") {
       truthTable.foreach {
         case (l, r, answer) =>
-          val expr = op(NonFoldableLiteral(l, BooleanType), NonFoldableLiteral(r, BooleanType))
+          val expr = op(NonFoldableLiteral.create(l, BooleanType),
+            NonFoldableLiteral.create(r, BooleanType))
           checkEvaluation(expr, answer)
       }
     }
@@ -72,7 +73,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
         (false, true) ::
         (null, null) :: Nil
     notTrueTable.foreach { case (v, answer) =>
-      checkEvaluation(Not(NonFoldableLiteral(v, BooleanType)), answer)
+      checkEvaluation(Not(NonFoldableLiteral.create(v, BooleanType)), answer)
     }
     checkConsistencyBetweenInterpretedAndCodegen(Not, BooleanType)
   }
@@ -120,22 +121,26 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
       (null, null, null) :: Nil)
 
   test("IN") {
-    checkEvaluation(In(NonFoldableLiteral(null, IntegerType), Seq(Literal(1), Literal(2))), null)
-    checkEvaluation(In(NonFoldableLiteral(null, IntegerType),
-      Seq(NonFoldableLiteral(null, IntegerType))), null)
-    checkEvaluation(In(NonFoldableLiteral(null, IntegerType), Seq.empty), null)
+    checkEvaluation(In(NonFoldableLiteral.create(null, IntegerType), Seq(Literal(1),
+      Literal(2))), null)
+    checkEvaluation(In(NonFoldableLiteral.create(null, IntegerType),
+      Seq(NonFoldableLiteral.create(null, IntegerType))), null)
+    checkEvaluation(In(NonFoldableLiteral.create(null, IntegerType), Seq.empty), null)
     checkEvaluation(In(Literal(1), Seq.empty), false)
-    checkEvaluation(In(Literal(1), Seq(NonFoldableLiteral(null, IntegerType))), null)
-    checkEvaluation(In(Literal(1), Seq(Literal(1), NonFoldableLiteral(null, IntegerType))), true)
-    checkEvaluation(In(Literal(2), Seq(Literal(1), NonFoldableLiteral(null, IntegerType))), null)
+    checkEvaluation(In(Literal(1), Seq(NonFoldableLiteral.create(null, IntegerType))), null)
+    checkEvaluation(In(Literal(1), Seq(Literal(1), NonFoldableLiteral.create(null, IntegerType))),
+      true)
+    checkEvaluation(In(Literal(2), Seq(Literal(1), NonFoldableLiteral.create(null, IntegerType))),
+      null)
     checkEvaluation(In(Literal(1), Seq(Literal(1), Literal(2))), true)
     checkEvaluation(In(Literal(2), Seq(Literal(1), Literal(2))), true)
     checkEvaluation(In(Literal(3), Seq(Literal(1), Literal(2))), false)
     checkEvaluation(
-      And(In(Literal(1), Seq(Literal(1), Literal(2))), In(Literal(2), Seq(Literal(1), Literal(2)))),
+      And(In(Literal(1), Seq(Literal(1), Literal(2))), In(Literal(2), Seq(Literal(1),
+        Literal(2)))),
       true)
 
-    val ns = NonFoldableLiteral(null, StringType)
+    val ns = NonFoldableLiteral.create(null, StringType)
     checkEvaluation(In(ns, Seq(Literal("1"), Literal("2"))), null)
     checkEvaluation(In(ns, Seq(ns)), null)
     checkEvaluation(In(Literal("a"), Seq(ns)), null)
@@ -155,7 +160,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
           case _ => value
         }
       }
-      val input = inputData.map(NonFoldableLiteral(_, t))
+      val input = inputData.map(NonFoldableLiteral.create(_, t))
       val expected = if (inputData(0) == null) {
         null
       } else if (inputData.slice(1, 10).contains(inputData(0))) {
@@ -279,7 +284,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
   test("BinaryComparison: null test") {
     // Use -1 (default value for codegen) which can trigger some weird bugs, e.g. SPARK-14757
     val normalInt = Literal(-1)
-    val nullInt = NonFoldableLiteral(null, IntegerType)
+    val nullInt = NonFoldableLiteral.create(null, IntegerType)
 
     def nullTest(op: (Expression, Expression) => Expression): Unit = {
       checkEvaluation(op(normalInt, nullInt), null)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
index d8937321ecb98..f12f0f5eb4cd4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
@@ -166,7 +166,7 @@ class OptimizeInSuite extends PlanTest {
     val optimizedPlan = OptimizeIn(conf.copy(OPTIMIZER_INSET_CONVERSION_THRESHOLD -> 2))(plan)
     optimizedPlan match {
       case Filter(cond, _)
-        if cond.isInstanceOf[InSet] && cond.asInstanceOf[InSet].getHSet().size == 3 =>
+        if cond.isInstanceOf[InSet] && cond.asInstanceOf[InSet].getSet().size == 3 =>
           // pass
       case _ => fail("Unexpected result for OptimizedIn")
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index cd14d24370bad..c6a6efda59879 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -2624,4 +2624,26 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
     val e = intercept[AnalysisException](sql("SELECT nvl(1, 2, 3)"))
     assert(e.message.contains("Invalid number of arguments"))
   }
+
+  test("SPARK-21228: InSet incorrect handling of structs") {
+    withTempView("A") {
+      // reduce this from the default of 10 so the repro query text is not too long
+      withSQLConf((SQLConf.OPTIMIZER_INSET_CONVERSION_THRESHOLD.key -> "3")) {
+        // a relation that has 1 column of struct type with values (1,1), ..., (9, 9)
+        spark.range(1, 10).selectExpr("named_struct('a', id, 'b', id) as a")
+          .createOrReplaceTempView("A")
+        val df = sql(
+          """
+            |SELECT * from
+            | (SELECT MIN(a) as minA FROM A) AA -- this Aggregate will return UnsafeRows
+            | -- the IN will become InSet with a Set of GenericInternalRows
+            | -- a GenericInternalRow is never equal to an UnsafeRow so the query would
+            | -- returns 0 results, which is incorrect
+            | WHERE minA IN (NAMED_STRUCT('a', 1L, 'b', 1L), NAMED_STRUCT('a', 2L, 'b', 2L),
+            |   NAMED_STRUCT('a', 3L, 'b', 3L))
+          """.stripMargin)
+        checkAnswer(df, Row(Row(1, 1)))
+      }
+    }
+  }
 }

From a64f10800244a8057f7f32c3d2f4a719c5080d05 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Sat, 8 Jul 2017 20:16:47 +0800
Subject: [PATCH 1073/1204] [SPARK-21345][SQL][TEST][TEST-MAVEN]
 SparkSessionBuilderSuite should clean up stopped sessions.

`SparkSessionBuilderSuite` should clean up stopped sessions. Otherwise, it leaves behind some stopped `SparkContext`s interfereing with other test suites using `ShardSQLContext`.

Recently, master branch fails consequtively.
- https://amplab.cs.berkeley.edu/jenkins/view/Spark%20QA%20Test%20(Dashboard)/

Pass the Jenkins with a updated suite.

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #18567 from dongjoon-hyun/SPARK-SESSION.

(cherry picked from commit 0b8dd2d08460f3e6eb578727d2c336b6f11959e7)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/SparkSessionBuilderSuite.scala  | 44 ++++++++-----------
 1 file changed, 18 insertions(+), 26 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
index 386d13d07a95f..1c6afa5e26e14 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
@@ -17,49 +17,48 @@
 
 package org.apache.spark.sql
 
+import org.scalatest.BeforeAndAfterEach
+
 import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
 
 /**
  * Test cases for the builder pattern of [[SparkSession]].
  */
-class SparkSessionBuilderSuite extends SparkFunSuite {
+class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach {
 
-  private var initialSession: SparkSession = _
+  override def afterEach(): Unit = {
+    // This suite should not interfere with the other test suites.
+    SparkSession.getActiveSession.foreach(_.stop())
+    SparkSession.clearActiveSession()
+    SparkSession.getDefaultSession.foreach(_.stop())
+    SparkSession.clearDefaultSession()
+  }
 
-  private lazy val sparkContext: SparkContext = {
-    initialSession = SparkSession.builder()
+  test("create with config options and propagate them to SparkContext and SparkSession") {
+    val session = SparkSession.builder()
       .master("local")
       .config("spark.ui.enabled", value = false)
       .config("some-config", "v2")
       .getOrCreate()
-    initialSession.sparkContext
-  }
-
-  test("create with config options and propagate them to SparkContext and SparkSession") {
-    // Creating a new session with config - this works by just calling the lazy val
-    sparkContext
-    assert(initialSession.sparkContext.conf.get("some-config") == "v2")
-    assert(initialSession.conf.get("some-config") == "v2")
-    SparkSession.clearDefaultSession()
+    assert(session.sparkContext.conf.get("some-config") == "v2")
+    assert(session.conf.get("some-config") == "v2")
   }
 
   test("use global default session") {
-    val session = SparkSession.builder().getOrCreate()
+    val session = SparkSession.builder().master("local").getOrCreate()
     assert(SparkSession.builder().getOrCreate() == session)
-    SparkSession.clearDefaultSession()
   }
 
   test("config options are propagated to existing SparkSession") {
-    val session1 = SparkSession.builder().config("spark-config1", "a").getOrCreate()
+    val session1 = SparkSession.builder().master("local").config("spark-config1", "a").getOrCreate()
     assert(session1.conf.get("spark-config1") == "a")
     val session2 = SparkSession.builder().config("spark-config1", "b").getOrCreate()
     assert(session1 == session2)
     assert(session1.conf.get("spark-config1") == "b")
-    SparkSession.clearDefaultSession()
   }
 
   test("use session from active thread session and propagate config options") {
-    val defaultSession = SparkSession.builder().getOrCreate()
+    val defaultSession = SparkSession.builder().master("local").getOrCreate()
     val activeSession = defaultSession.newSession()
     SparkSession.setActiveSession(activeSession)
     val session = SparkSession.builder().config("spark-config2", "a").getOrCreate()
@@ -70,16 +69,14 @@ class SparkSessionBuilderSuite extends SparkFunSuite {
     SparkSession.clearActiveSession()
 
     assert(SparkSession.builder().getOrCreate() == defaultSession)
-    SparkSession.clearDefaultSession()
   }
 
   test("create a new session if the default session has been stopped") {
-    val defaultSession = SparkSession.builder().getOrCreate()
+    val defaultSession = SparkSession.builder().master("local").getOrCreate()
     SparkSession.setDefaultSession(defaultSession)
     defaultSession.stop()
     val newSession = SparkSession.builder().master("local").getOrCreate()
     assert(newSession != defaultSession)
-    newSession.stop()
   }
 
   test("create a new session if the active thread session has been stopped") {
@@ -88,11 +85,9 @@ class SparkSessionBuilderSuite extends SparkFunSuite {
     activeSession.stop()
     val newSession = SparkSession.builder().master("local").getOrCreate()
     assert(newSession != activeSession)
-    newSession.stop()
   }
 
   test("create SparkContext first then SparkSession") {
-    sparkContext.stop()
     val conf = new SparkConf().setAppName("test").setMaster("local").set("key1", "value1")
     val sparkContext2 = new SparkContext(conf)
     val session = SparkSession.builder().config("key2", "value2").getOrCreate()
@@ -101,14 +96,12 @@ class SparkSessionBuilderSuite extends SparkFunSuite {
     assert(session.sparkContext.conf.get("key1") == "value1")
     assert(session.sparkContext.conf.get("key2") == "value2")
     assert(session.sparkContext.conf.get("spark.app.name") == "test")
-    session.stop()
   }
 
   test("SPARK-15887: hive-site.xml should be loaded") {
     val session = SparkSession.builder().master("local").getOrCreate()
     assert(session.sessionState.newHadoopConf().get("hive.in.test") == "true")
     assert(session.sparkContext.hadoopConfiguration.get("hive.in.test") == "true")
-    session.stop()
   }
 
   test("SPARK-15991: Set global Hadoop conf") {
@@ -120,7 +113,6 @@ class SparkSessionBuilderSuite extends SparkFunSuite {
       assert(session.sessionState.newHadoopConf().get(mySpecialKey) == mySpecialValue)
     } finally {
       session.sparkContext.hadoopConfiguration.unset(mySpecialKey)
-      session.stop()
     }
   }
 }

From c8d7855b905742033b7588ce7ee28bc23de13709 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Sun, 9 Jul 2017 00:24:54 +0800
Subject: [PATCH 1074/1204] [SPARK-20342][CORE] Update task accumulators before
 sending task end event.

This makes sures that listeners get updated task information; otherwise it's
possible to write incomplete task information into event logs, for example,
making the information in a replayed UI inconsistent with the original
application.

Added a new unit test to try to detect the problem, but it's not guaranteed
to fail since it's a race; but it fails pretty reliably for me without the
scheduler changes.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #18393 from vanzin/SPARK-20342.try2.

(cherry picked from commit 9131bdb7e12bcfb2cb699b3438f554604e28aaa8)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/scheduler/DAGScheduler.scala | 70 ++++++++++++-------
 .../spark/scheduler/DAGSchedulerSuite.scala   | 32 ++++++++-
 2 files changed, 75 insertions(+), 27 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index 35f6b365eca85..b00942463ecfa 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -1111,6 +1111,25 @@ class DAGScheduler(
     }
   }
 
+  private def postTaskEnd(event: CompletionEvent): Unit = {
+    val taskMetrics: TaskMetrics =
+      if (event.accumUpdates.nonEmpty) {
+        try {
+          TaskMetrics.fromAccumulators(event.accumUpdates)
+        } catch {
+          case NonFatal(e) =>
+            val taskId = event.taskInfo.taskId
+            logError(s"Error when attempting to reconstruct metrics for task $taskId", e)
+            null
+        }
+      } else {
+        null
+      }
+
+    listenerBus.post(SparkListenerTaskEnd(event.task.stageId, event.task.stageAttemptId,
+      Utils.getFormattedClassName(event.task), event.reason, event.taskInfo, taskMetrics))
+  }
+
   /**
    * Responds to a task finishing. This is called inside the event loop so it assumes that it can
    * modify the scheduler's internal state. Use taskEnded() to post a task end event from outside.
@@ -1127,34 +1146,36 @@ class DAGScheduler(
       event.taskInfo.attemptNumber, // this is a task attempt number
       event.reason)
 
-    // Reconstruct task metrics. Note: this may be null if the task has failed.
-    val taskMetrics: TaskMetrics =
-      if (event.accumUpdates.nonEmpty) {
-        try {
-          TaskMetrics.fromAccumulators(event.accumUpdates)
-        } catch {
-          case NonFatal(e) =>
-            logError(s"Error when attempting to reconstruct metrics for task $taskId", e)
-            null
-        }
-      } else {
-        null
-      }
-
-    // The stage may have already finished when we get this event -- eg. maybe it was a
-    // speculative task. It is important that we send the TaskEnd event in any case, so listeners
-    // are properly notified and can chose to handle it. For instance, some listeners are
-    // doing their own accounting and if they don't get the task end event they think
-    // tasks are still running when they really aren't.
-    listenerBus.post(SparkListenerTaskEnd(
-       stageId, task.stageAttemptId, taskType, event.reason, event.taskInfo, taskMetrics))
-
     if (!stageIdToStage.contains(task.stageId)) {
+      // The stage may have already finished when we get this event -- eg. maybe it was a
+      // speculative task. It is important that we send the TaskEnd event in any case, so listeners
+      // are properly notified and can chose to handle it. For instance, some listeners are
+      // doing their own accounting and if they don't get the task end event they think
+      // tasks are still running when they really aren't.
+      postTaskEnd(event)
+
       // Skip all the actions if the stage has been cancelled.
       return
     }
 
     val stage = stageIdToStage(task.stageId)
+
+    // Make sure the task's accumulators are updated before any other processing happens, so that
+    // we can post a task end event before any jobs or stages are updated. The accumulators are
+    // only updated in certain cases.
+    event.reason match {
+      case Success =>
+        stage match {
+          case rs: ResultStage if rs.activeJob.isEmpty =>
+            // Ignore update if task's job has finished.
+          case _ =>
+            updateAccumulators(event)
+        }
+      case _: ExceptionFailure => updateAccumulators(event)
+      case _ =>
+    }
+    postTaskEnd(event)
+
     event.reason match {
       case Success =>
         task match {
@@ -1165,7 +1186,6 @@ class DAGScheduler(
             resultStage.activeJob match {
               case Some(job) =>
                 if (!job.finished(rt.outputId)) {
-                  updateAccumulators(event)
                   job.finished(rt.outputId) = true
                   job.numFinished += 1
                   // If the whole job has finished, remove it
@@ -1192,7 +1212,6 @@ class DAGScheduler(
 
           case smt: ShuffleMapTask =>
             val shuffleStage = stage.asInstanceOf[ShuffleMapStage]
-            updateAccumulators(event)
             val status = event.result.asInstanceOf[MapStatus]
             val execId = status.location.executorId
             logDebug("ShuffleMapTask finished on " + execId)
@@ -1351,8 +1370,7 @@ class DAGScheduler(
         // Do nothing here, left up to the TaskScheduler to decide how to handle denied commits
 
       case exceptionFailure: ExceptionFailure =>
-        // Tasks failed with exceptions might still have accumulator updates.
-        updateAccumulators(event)
+        // Nothing left to do, already handled above for accumulator updates.
 
       case TaskResultLost =>
         // Do nothing here; the TaskScheduler handles these failures and resubmits the task.
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index a10941b579fe2..9112065c9f1fa 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.scheduler
 
 import java.util.Properties
-import java.util.concurrent.atomic.AtomicBoolean
+import java.util.concurrent.atomic.{AtomicBoolean, AtomicLong}
 
 import scala.annotation.meta.param
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, Map}
@@ -2277,6 +2277,36 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with Timeou
       (Success, 1)))
   }
 
+  test("task end event should have updated accumulators (SPARK-20342)") {
+    val tasks = 10
+
+    val accumId = new AtomicLong()
+    val foundCount = new AtomicLong()
+    val listener = new SparkListener() {
+      override def onTaskEnd(event: SparkListenerTaskEnd): Unit = {
+        event.taskInfo.accumulables.find(_.id == accumId.get).foreach { _ =>
+          foundCount.incrementAndGet()
+        }
+      }
+    }
+    sc.addSparkListener(listener)
+
+    // Try a few times in a loop to make sure. This is not guaranteed to fail when the bug exists,
+    // but it should at least make the test flaky. If the bug is fixed, this should always pass.
+    (1 to 10).foreach { i =>
+      foundCount.set(0L)
+
+      val accum = sc.longAccumulator(s"accum$i")
+      accumId.set(accum.id)
+
+      sc.parallelize(1 to tasks, tasks).foreach { _ =>
+        accum.add(1L)
+      }
+      sc.listenerBus.waitUntilEmpty(1000)
+      assert(foundCount.get() === tasks)
+    }
+  }
+
   /**
    * Assert that the supplied TaskSet has exactly the given hosts as its preferred locations.
    * Note that this checks only the host and not the executor ID.

From 964332b2879af048a95606dfcb4f2cb2e356135b Mon Sep 17 00:00:00 2001
From: jinxing <jinxing6042@126.com>
Date: Sun, 9 Jul 2017 00:27:58 +0800
Subject: [PATCH 1075/1204] [SPARK-21343] Refine the document for
 spark.reducer.maxReqSizeShuffleToMem.

## What changes were proposed in this pull request?

In current code, reducer can break the old shuffle service when `spark.reducer.maxReqSizeShuffleToMem` is enabled. Let's refine document.

Author: jinxing <jinxing6042@126.com>

Closes #18566 from jinxing64/SPARK-21343.

(cherry picked from commit 062c336d06a0bd4e740a18d2349e03e311509243)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/internal/config/package.scala     |  6 ++++--
 docs/configuration.md                                  | 10 ++++++++++
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 70feea8af8c56..17746734df082 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -289,9 +289,11 @@ package object config {
 
   private[spark] val REDUCER_MAX_REQ_SIZE_SHUFFLE_TO_MEM =
     ConfigBuilder("spark.reducer.maxReqSizeShuffleToMem")
-      .internal()
       .doc("The blocks of a shuffle request will be fetched to disk when size of the request is " +
-        "above this threshold. This is to avoid a giant request takes too much memory.")
+        "above this threshold. This is to avoid a giant request takes too much memory. We can " +
+        "enable this config by setting a specific value(e.g. 200m). Note that this config can " +
+        "be enabled only when the shuffle shuffle service is newer than Spark-2.2 or the shuffle" +
+        " service is disabled.")
       .bytesConf(ByteUnit.BYTE)
       .createWithDefault(Long.MaxValue)
 }
diff --git a/docs/configuration.md b/docs/configuration.md
index 459e944e95e25..f47967cab669b 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -519,6 +519,16 @@ Apart from these, the following properties are also available, and may be useful
     By allowing it to limit the number of fetch requests, this scenario can be mitigated.
   </td>
 </tr>
+<tr>
+  <td><code>spark.reducer.maxReqSizeShuffleToMem</code></td>
+  <td>Long.MaxValue</td>
+  <td>
+    The blocks of a shuffle request will be fetched to disk when size of the request is above
+    this threshold. This is to avoid a giant request takes too much memory. We can enable this
+    config by setting a specific value(e.g. 200m). Note that this config can be enabled only when
+    the shuffle shuffle service is newer than Spark-2.2 or the shuffle service is disabled.
+  </td>
+</tr>
 <tr>
   <td><code>spark.shuffle.compress</code></td>
   <td>true</td>

From 5e2bfd5bca14d604270e8bd3018f5771b83ea07f Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Sun, 9 Jul 2017 11:11:02 +0800
Subject: [PATCH 1076/1204] [SPARK-21345][SQL][TEST][TEST-MAVEN][BRANCH-2.1]
 SparkSessionBuilderSuite should clean up stopped sessions.

## What changes were proposed in this pull request?

`SparkSessionBuilderSuite` should clean up stopped sessions. Otherwise, it leaves behind some stopped `SparkContext`s interfereing with other test suites using `ShardSQLContext`.

Recently, master branch fails consequtively.
- https://amplab.cs.berkeley.edu/jenkins/view/Spark%20QA%20Test%20(Dashboard)/

## How was this patch tested?

Pass the Jenkins with a updated suite.

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #18572 from dongjoon-hyun/SPARK-21345-BRANCH-2.1.
---
 .../spark/sql/SparkSessionBuilderSuite.scala  | 44 ++++++++-----------
 1 file changed, 18 insertions(+), 26 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
index 386d13d07a95f..1c6afa5e26e14 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
@@ -17,49 +17,48 @@
 
 package org.apache.spark.sql
 
+import org.scalatest.BeforeAndAfterEach
+
 import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
 
 /**
  * Test cases for the builder pattern of [[SparkSession]].
  */
-class SparkSessionBuilderSuite extends SparkFunSuite {
+class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach {
 
-  private var initialSession: SparkSession = _
+  override def afterEach(): Unit = {
+    // This suite should not interfere with the other test suites.
+    SparkSession.getActiveSession.foreach(_.stop())
+    SparkSession.clearActiveSession()
+    SparkSession.getDefaultSession.foreach(_.stop())
+    SparkSession.clearDefaultSession()
+  }
 
-  private lazy val sparkContext: SparkContext = {
-    initialSession = SparkSession.builder()
+  test("create with config options and propagate them to SparkContext and SparkSession") {
+    val session = SparkSession.builder()
       .master("local")
       .config("spark.ui.enabled", value = false)
       .config("some-config", "v2")
       .getOrCreate()
-    initialSession.sparkContext
-  }
-
-  test("create with config options and propagate them to SparkContext and SparkSession") {
-    // Creating a new session with config - this works by just calling the lazy val
-    sparkContext
-    assert(initialSession.sparkContext.conf.get("some-config") == "v2")
-    assert(initialSession.conf.get("some-config") == "v2")
-    SparkSession.clearDefaultSession()
+    assert(session.sparkContext.conf.get("some-config") == "v2")
+    assert(session.conf.get("some-config") == "v2")
   }
 
   test("use global default session") {
-    val session = SparkSession.builder().getOrCreate()
+    val session = SparkSession.builder().master("local").getOrCreate()
     assert(SparkSession.builder().getOrCreate() == session)
-    SparkSession.clearDefaultSession()
   }
 
   test("config options are propagated to existing SparkSession") {
-    val session1 = SparkSession.builder().config("spark-config1", "a").getOrCreate()
+    val session1 = SparkSession.builder().master("local").config("spark-config1", "a").getOrCreate()
     assert(session1.conf.get("spark-config1") == "a")
     val session2 = SparkSession.builder().config("spark-config1", "b").getOrCreate()
     assert(session1 == session2)
     assert(session1.conf.get("spark-config1") == "b")
-    SparkSession.clearDefaultSession()
   }
 
   test("use session from active thread session and propagate config options") {
-    val defaultSession = SparkSession.builder().getOrCreate()
+    val defaultSession = SparkSession.builder().master("local").getOrCreate()
     val activeSession = defaultSession.newSession()
     SparkSession.setActiveSession(activeSession)
     val session = SparkSession.builder().config("spark-config2", "a").getOrCreate()
@@ -70,16 +69,14 @@ class SparkSessionBuilderSuite extends SparkFunSuite {
     SparkSession.clearActiveSession()
 
     assert(SparkSession.builder().getOrCreate() == defaultSession)
-    SparkSession.clearDefaultSession()
   }
 
   test("create a new session if the default session has been stopped") {
-    val defaultSession = SparkSession.builder().getOrCreate()
+    val defaultSession = SparkSession.builder().master("local").getOrCreate()
     SparkSession.setDefaultSession(defaultSession)
     defaultSession.stop()
     val newSession = SparkSession.builder().master("local").getOrCreate()
     assert(newSession != defaultSession)
-    newSession.stop()
   }
 
   test("create a new session if the active thread session has been stopped") {
@@ -88,11 +85,9 @@ class SparkSessionBuilderSuite extends SparkFunSuite {
     activeSession.stop()
     val newSession = SparkSession.builder().master("local").getOrCreate()
     assert(newSession != activeSession)
-    newSession.stop()
   }
 
   test("create SparkContext first then SparkSession") {
-    sparkContext.stop()
     val conf = new SparkConf().setAppName("test").setMaster("local").set("key1", "value1")
     val sparkContext2 = new SparkContext(conf)
     val session = SparkSession.builder().config("key2", "value2").getOrCreate()
@@ -101,14 +96,12 @@ class SparkSessionBuilderSuite extends SparkFunSuite {
     assert(session.sparkContext.conf.get("key1") == "value1")
     assert(session.sparkContext.conf.get("key2") == "value2")
     assert(session.sparkContext.conf.get("spark.app.name") == "test")
-    session.stop()
   }
 
   test("SPARK-15887: hive-site.xml should be loaded") {
     val session = SparkSession.builder().master("local").getOrCreate()
     assert(session.sessionState.newHadoopConf().get("hive.in.test") == "true")
     assert(session.sparkContext.hadoopConfiguration.get("hive.in.test") == "true")
-    session.stop()
   }
 
   test("SPARK-15991: Set global Hadoop conf") {
@@ -120,7 +113,6 @@ class SparkSessionBuilderSuite extends SparkFunSuite {
       assert(session.sessionState.newHadoopConf().get(mySpecialKey) == mySpecialValue)
     } finally {
       session.sparkContext.hadoopConfiguration.unset(mySpecialKey)
-      session.stop()
     }
   }
 }

From 3bfad9d4210f96dcd2270599257c3a5272cad77b Mon Sep 17 00:00:00 2001
From: Zhenhua Wang <wangzhenhua@huawei.com>
Date: Sun, 9 Jul 2017 18:51:06 +0800
Subject: [PATCH 1077/1204] [SPARK-21083][SQL][BRANCH-2.2] Store zero size and
 row count when analyzing empty table

## What changes were proposed in this pull request?

We should be able to store zero size and row count after analyzing empty table.
This is a backport for https://github.com/apache/spark/commit/9fccc3627fa41d32fbae6dbbb9bd1521e43eb4f0.

## How was this patch tested?

Added new test.

Author: Zhenhua Wang <wangzhenhua@huawei.com>

Closes #18575 from wzhfy/analyzeEmptyTable-2.2.
---
 .../command/AnalyzeTableCommand.scala         |  4 +-
 .../spark/sql/StatisticsCollectionSuite.scala | 45 +++++++++++++------
 .../spark/sql/hive/StatisticsSuite.scala      | 19 +-------
 3 files changed, 35 insertions(+), 33 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
index 0f3c69c930acb..bf7c22761dc04 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
@@ -47,10 +47,10 @@ case class AnalyzeTableCommand(
     }
     val newTotalSize = AnalyzeTableCommand.calculateTotalSize(sessionState, tableMeta)
 
-    val oldTotalSize = tableMeta.stats.map(_.sizeInBytes.toLong).getOrElse(0L)
+    val oldTotalSize = tableMeta.stats.map(_.sizeInBytes.toLong).getOrElse(-1L)
     val oldRowCount = tableMeta.stats.flatMap(_.rowCount.map(_.toLong)).getOrElse(-1L)
     var newStats: Option[CatalogStatistics] = None
-    if (newTotalSize > 0 && newTotalSize != oldTotalSize) {
+    if (newTotalSize >= 0 && newTotalSize != oldTotalSize) {
       newStats = Some(CatalogStatistics(sizeInBytes = newTotalSize))
     }
     // We only set rowCount when noscan is false, because otherwise:
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index 601324f2c0172..ae0f219b281ea 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -40,17 +40,6 @@ import org.apache.spark.sql.types._
 class StatisticsCollectionSuite extends StatisticsCollectionTestBase with SharedSQLContext {
   import testImplicits._
 
-  private def checkTableStats(tableName: String, expectedRowCount: Option[Int])
-    : Option[CatalogStatistics] = {
-    val df = spark.table(tableName)
-    val stats = df.queryExecution.analyzed.collect { case rel: LogicalRelation =>
-      assert(rel.catalogTable.get.stats.flatMap(_.rowCount) === expectedRowCount)
-      rel.catalogTable.get.stats
-    }
-    assert(stats.size == 1)
-    stats.head
-  }
-
   test("estimates the size of a limit 0 on outer join") {
     withTempView("test") {
       Seq(("one", 1), ("two", 2), ("three", 3), ("four", 4)).toDF("k", "v")
@@ -88,6 +77,19 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
     }
   }
 
+  test("analyze empty table") {
+    val table = "emptyTable"
+    withTable(table) {
+      sql(s"CREATE TABLE $table (key STRING, value STRING) USING PARQUET")
+      sql(s"ANALYZE TABLE $table COMPUTE STATISTICS noscan")
+      val fetchedStats1 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = None)
+      assert(fetchedStats1.get.sizeInBytes == 0)
+      sql(s"ANALYZE TABLE $table COMPUTE STATISTICS")
+      val fetchedStats2 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = Some(0))
+      assert(fetchedStats2.get.sizeInBytes == 0)
+    }
+  }
+
   test("test table-level statistics for data source table") {
     val tableName = "tbl"
     withTable(tableName) {
@@ -96,11 +98,11 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
 
       // noscan won't count the number of rows
       sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS noscan")
-      checkTableStats(tableName, expectedRowCount = None)
+      checkTableStats(tableName, hasSizeInBytes = true, expectedRowCounts = None)
 
       // without noscan, we count the number of rows
       sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS")
-      checkTableStats(tableName, expectedRowCount = Some(2))
+      checkTableStats(tableName, hasSizeInBytes = true, expectedRowCounts = Some(2))
     }
   }
 
@@ -219,6 +221,23 @@ abstract class StatisticsCollectionTestBase extends QueryTest with SQLTestUtils
 
   private val randomName = new Random(31)
 
+  def checkTableStats(
+      tableName: String,
+      hasSizeInBytes: Boolean,
+      expectedRowCounts: Option[Int]): Option[CatalogStatistics] = {
+    val stats = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName)).stats
+
+    if (hasSizeInBytes || expectedRowCounts.nonEmpty) {
+      assert(stats.isDefined)
+      assert(stats.get.sizeInBytes >= 0)
+      assert(stats.get.rowCount === expectedRowCounts)
+    } else {
+      assert(stats.isEmpty)
+    }
+
+    stats
+  }
+
   /**
    * Compute column stats for the given DataFrame and compare it with colStats.
    */
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index b03d69e8254cd..819180d8b972e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.execution.joins._
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.util.Utils
+
 
 class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleton {
 
@@ -217,23 +217,6 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
     }
   }
 
-  private def checkTableStats(
-      tableName: String,
-      hasSizeInBytes: Boolean,
-      expectedRowCounts: Option[Int]): Option[CatalogStatistics] = {
-    val stats = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName)).stats
-
-    if (hasSizeInBytes || expectedRowCounts.nonEmpty) {
-      assert(stats.isDefined)
-      assert(stats.get.sizeInBytes > 0)
-      assert(stats.get.rowCount === expectedRowCounts)
-    } else {
-      assert(stats.isEmpty)
-    }
-
-    stats
-  }
-
   test("test table-level statistics for hive tables created in HiveExternalCatalog") {
     val textTable = "textTable"
     withTable(textTable) {

From 2c28462411f21f71c0e048cb1f7e05efe19da6b7 Mon Sep 17 00:00:00 2001
From: Zhenhua Wang <wzh_zju@163.com>
Date: Mon, 10 Jul 2017 11:31:55 +0800
Subject: [PATCH 1078/1204] [SPARK-21083][SQL][BRANCH-2.1] Store zero size and
 row count when analyzing empty table

## What changes were proposed in this pull request?

We should be able to store zero size and row count after analyzing empty table.
This is a backport for https://github.com/apache/spark/commit/9fccc3627fa41d32fbae6dbbb9bd1521e43eb4f0.

## How was this patch tested?

Added new test.

Author: Zhenhua Wang <wzh_zju@163.com>

Closes #18577 from wzhfy/analyzeEmptyTable-2.1.
---
 .../command/AnalyzeTableCommand.scala         |  4 +-
 .../spark/sql/StatisticsCollectionSuite.scala | 39 +++++++++++++------
 2 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
index 52a8fc88c56cd..e6606b403f700 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTableCommand.scala
@@ -60,10 +60,10 @@ case class AnalyzeTableCommand(
     }
 
     def updateTableStats(catalogTable: CatalogTable, newTotalSize: Long): Unit = {
-      val oldTotalSize = catalogTable.stats.map(_.sizeInBytes.toLong).getOrElse(0L)
+      val oldTotalSize = catalogTable.stats.map(_.sizeInBytes.toLong).getOrElse(-1L)
       val oldRowCount = catalogTable.stats.flatMap(_.rowCount.map(_.toLong)).getOrElse(-1L)
       var newStats: Option[Statistics] = None
-      if (newTotalSize > 0 && newTotalSize != oldTotalSize) {
+      if (newTotalSize >= 0 && newTotalSize != oldTotalSize) {
         newStats = Some(Statistics(sizeInBytes = newTotalSize))
       }
       // We only set rowCount when noscan is false, because otherwise:
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index c663b31351b52..a08edbe7d9a5b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -25,7 +25,6 @@ import scala.util.Random
 
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.internal.StaticSQLConf
 import org.apache.spark.sql.test.{SharedSQLContext, SQLTestUtils}
 import org.apache.spark.sql.test.SQLTestData.ArrayData
@@ -38,15 +37,20 @@ import org.apache.spark.sql.types._
 class StatisticsCollectionSuite extends StatisticsCollectionTestBase with SharedSQLContext {
   import testImplicits._
 
-  private def checkTableStats(tableName: String, expectedRowCount: Option[Int])
-    : Option[Statistics] = {
-    val df = spark.table(tableName)
-    val stats = df.queryExecution.analyzed.collect { case rel: LogicalRelation =>
-      assert(rel.catalogTable.get.stats.flatMap(_.rowCount) === expectedRowCount)
-      rel.catalogTable.get.stats
+  def checkTableStats(
+      tableName: String,
+      hasSizeInBytes: Boolean,
+      expectedRowCounts: Option[Int]): Option[Statistics] = {
+    val stats = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName)).stats
+    if (hasSizeInBytes || expectedRowCounts.nonEmpty) {
+      assert(stats.isDefined)
+      assert(stats.get.sizeInBytes >= 0)
+      assert(stats.get.rowCount === expectedRowCounts)
+    } else {
+      assert(stats.isEmpty)
     }
-    assert(stats.size == 1)
-    stats.head
+
+    stats
   }
 
   test("estimates the size of a limit 0 on outer join") {
@@ -86,6 +90,19 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
     }
   }
 
+  test("analyze empty table") {
+    val table = "emptyTable"
+    withTable(table) {
+      sql(s"CREATE TABLE $table (key STRING, value STRING) USING PARQUET")
+      sql(s"ANALYZE TABLE $table COMPUTE STATISTICS noscan")
+      val fetchedStats1 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = None)
+      assert(fetchedStats1.get.sizeInBytes == 0)
+      sql(s"ANALYZE TABLE $table COMPUTE STATISTICS")
+      val fetchedStats2 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = Some(0))
+      assert(fetchedStats2.get.sizeInBytes == 0)
+    }
+  }
+
   test("test table-level statistics for data source table") {
     val tableName = "tbl"
     withTable(tableName) {
@@ -94,11 +111,11 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
 
       // noscan won't count the number of rows
       sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS noscan")
-      checkTableStats(tableName, expectedRowCount = None)
+      checkTableStats(tableName, hasSizeInBytes = true, expectedRowCounts = None)
 
       // without noscan, we count the number of rows
       sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS")
-      checkTableStats(tableName, expectedRowCount = Some(2))
+      checkTableStats(tableName, hasSizeInBytes = true, expectedRowCounts = Some(2))
     }
   }
 

From 40fd0ce7f2c2facb96fc5d613bc7b6e4b573d9f7 Mon Sep 17 00:00:00 2001
From: jinxing <jinxing6042@126.com>
Date: Mon, 10 Jul 2017 21:06:58 +0800
Subject: [PATCH 1079/1204] [SPARK-21342] Fix DownloadCallback to work well
 with RetryingBlockFetcher.

When `RetryingBlockFetcher` retries fetching blocks. There could be two `DownloadCallback`s download the same content to the same target file. It could cause `ShuffleBlockFetcherIterator` reading a partial result.

This pr proposes to create and delete the tmp files in `OneForOneBlockFetcher`

Author: jinxing <jinxing6042@126.com>
Author: Shixiong Zhu <zsxwing@gmail.com>

Closes #18565 from jinxing64/SPARK-21342.

(cherry picked from commit 6a06c4b03c4dd86241fb9d11b4360371488f0e53)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../shuffle/ExternalShuffleClient.java        |  7 ++--
 .../shuffle/OneForOneBlockFetcher.java        | 34 +++++++++++-------
 .../spark/network/shuffle/ShuffleClient.java  | 13 +++++--
 .../shuffle/TempShuffleFileManager.java       | 36 +++++++++++++++++++
 .../network/sasl/SaslIntegrationSuite.java    |  2 +-
 .../shuffle/OneForOneBlockFetcherSuite.java   |  2 +-
 .../spark/network/BlockTransferService.scala  |  8 ++---
 .../netty/NettyBlockTransferService.scala     |  9 +++--
 .../storage/ShuffleBlockFetcherIterator.scala | 28 ++++++++++-----
 .../spark/storage/BlockManagerSuite.scala     |  5 ++-
 .../ShuffleBlockFetcherIteratorSuite.scala    | 10 +++---
 11 files changed, 108 insertions(+), 46 deletions(-)
 create mode 100644 common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/TempShuffleFileManager.java

diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java
index 269fa72dad5f5..39af9d5fe34d7 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java
@@ -17,7 +17,6 @@
 
 package org.apache.spark.network.shuffle;
 
-import java.io.File;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.List;
@@ -88,15 +87,15 @@ public void fetchBlocks(
       String execId,
       String[] blockIds,
       BlockFetchingListener listener,
-      File[] shuffleFiles) {
+      TempShuffleFileManager tempShuffleFileManager) {
     checkInit();
     logger.debug("External shuffle fetch from {}:{} (executor id {})", host, port, execId);
     try {
       RetryingBlockFetcher.BlockFetchStarter blockFetchStarter =
           (blockIds1, listener1) -> {
             TransportClient client = clientFactory.createClient(host, port);
-            new OneForOneBlockFetcher(client, appId, execId, blockIds1, listener1, conf,
-              shuffleFiles).start();
+            new OneForOneBlockFetcher(client, appId, execId,
+              blockIds1, listener1, conf, tempShuffleFileManager).start();
           };
 
       int maxRetries = conf.maxIORetries();
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
index d46ce2e0e6b78..2f160d12af22b 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
@@ -57,11 +57,21 @@ public class OneForOneBlockFetcher {
   private final String[] blockIds;
   private final BlockFetchingListener listener;
   private final ChunkReceivedCallback chunkCallback;
-  private TransportConf transportConf = null;
-  private File[] shuffleFiles = null;
+  private final TransportConf transportConf;
+  private final TempShuffleFileManager tempShuffleFileManager;
 
   private StreamHandle streamHandle = null;
 
+  public OneForOneBlockFetcher(
+    TransportClient client,
+    String appId,
+    String execId,
+    String[] blockIds,
+    BlockFetchingListener listener,
+    TransportConf transportConf) {
+    this(client, appId, execId, blockIds, listener, transportConf, null);
+  }
+
   public OneForOneBlockFetcher(
       TransportClient client,
       String appId,
@@ -69,18 +79,14 @@ public OneForOneBlockFetcher(
       String[] blockIds,
       BlockFetchingListener listener,
       TransportConf transportConf,
-      File[] shuffleFiles) {
+      TempShuffleFileManager tempShuffleFileManager) {
     this.client = client;
     this.openMessage = new OpenBlocks(appId, execId, blockIds);
     this.blockIds = blockIds;
     this.listener = listener;
     this.chunkCallback = new ChunkCallback();
     this.transportConf = transportConf;
-    if (shuffleFiles != null) {
-      this.shuffleFiles = shuffleFiles;
-      assert this.shuffleFiles.length == blockIds.length:
-        "Number of shuffle files should equal to blocks";
-    }
+    this.tempShuffleFileManager = tempShuffleFileManager;
   }
 
   /** Callback invoked on receipt of each chunk. We equate a single chunk to a single block. */
@@ -119,9 +125,9 @@ public void onSuccess(ByteBuffer response) {
           // Immediately request all chunks -- we expect that the total size of the request is
           // reasonable due to higher level chunking in [[ShuffleBlockFetcherIterator]].
           for (int i = 0; i < streamHandle.numChunks; i++) {
-            if (shuffleFiles != null) {
+            if (tempShuffleFileManager != null) {
               client.stream(OneForOneStreamManager.genStreamChunkId(streamHandle.streamId, i),
-                new DownloadCallback(shuffleFiles[i], i));
+                new DownloadCallback(i));
             } else {
               client.fetchChunk(streamHandle.streamId, i, chunkCallback);
             }
@@ -157,8 +163,8 @@ private class DownloadCallback implements StreamCallback {
     private File targetFile = null;
     private int chunkIndex;
 
-    DownloadCallback(File targetFile, int chunkIndex) throws IOException {
-      this.targetFile = targetFile;
+    DownloadCallback(int chunkIndex) throws IOException {
+      this.targetFile = tempShuffleFileManager.createTempShuffleFile();
       this.channel = Channels.newChannel(new FileOutputStream(targetFile));
       this.chunkIndex = chunkIndex;
     }
@@ -174,6 +180,9 @@ public void onComplete(String streamId) throws IOException {
       ManagedBuffer buffer = new FileSegmentManagedBuffer(transportConf, targetFile, 0,
         targetFile.length());
       listener.onBlockFetchSuccess(blockIds[chunkIndex], buffer);
+      if (!tempShuffleFileManager.registerTempShuffleFileToClean(targetFile)) {
+        targetFile.delete();
+      }
     }
 
     @Override
@@ -182,6 +191,7 @@ public void onFailure(String streamId, Throwable cause) throws IOException {
       // On receipt of a failure, fail every block from chunkIndex onwards.
       String[] remainingBlockIds = Arrays.copyOfRange(blockIds, chunkIndex, blockIds.length);
       failRemainingBlocks(remainingBlockIds, cause);
+      targetFile.delete();
     }
   }
 }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleClient.java
index 978ff5a2a8699..9e77bee7f9ee6 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleClient.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleClient.java
@@ -18,7 +18,6 @@
 package org.apache.spark.network.shuffle;
 
 import java.io.Closeable;
-import java.io.File;
 
 /** Provides an interface for reading shuffle files, either from an Executor or external service. */
 public abstract class ShuffleClient implements Closeable {
@@ -35,6 +34,16 @@ public void init(String appId) { }
    * Note that this API takes a sequence so the implementation can batch requests, and does not
    * return a future so the underlying implementation can invoke onBlockFetchSuccess as soon as
    * the data of a block is fetched, rather than waiting for all blocks to be fetched.
+   *
+   * @param host the host of the remote node.
+   * @param port the port of the remote node.
+   * @param execId the executor id.
+   * @param blockIds block ids to fetch.
+   * @param listener the listener to receive block fetching status.
+   * @param tempShuffleFileManager TempShuffleFileManager to create and clean temp shuffle files.
+   *                               If it's not <code>null</code>, the remote blocks will be streamed
+   *                               into temp shuffle files to reduce the memory usage, otherwise,
+   *                               they will be kept in memory.
    */
   public abstract void fetchBlocks(
       String host,
@@ -42,5 +51,5 @@ public abstract void fetchBlocks(
       String execId,
       String[] blockIds,
       BlockFetchingListener listener,
-      File[] shuffleFiles);
+      TempShuffleFileManager tempShuffleFileManager);
 }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/TempShuffleFileManager.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/TempShuffleFileManager.java
new file mode 100644
index 0000000000000..84a5ed6a276bd
--- /dev/null
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/TempShuffleFileManager.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.io.File;
+
+/**
+ * A manager to create temp shuffle block files to reduce the memory usage and also clean temp
+ * files when they won't be used any more.
+ */
+public interface TempShuffleFileManager {
+
+  /** Create a temp shuffle block file. */
+  File createTempShuffleFile();
+
+  /**
+   * Register a temp shuffle file to clean up when it won't be used any more. Return whether the
+   * file is registered successfully. If `false`, the caller should clean up the file by itself.
+   */
+  boolean registerTempShuffleFileToClean(File file);
+}
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java
index 0c054fc5db8f4..65cd550bbf672 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java
@@ -204,7 +204,7 @@ public void onBlockFetchFailure(String blockId, Throwable t) {
 
       String[] blockIds = { "shuffle_2_3_4", "shuffle_6_7_8" };
       OneForOneBlockFetcher fetcher =
-          new OneForOneBlockFetcher(client1, "app-2", "0", blockIds, listener, conf, null);
+          new OneForOneBlockFetcher(client1, "app-2", "0", blockIds, listener, conf);
       fetcher.start();
       blockFetchLatch.await();
       checkSecurityException(exception.get());
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
index 61d82214e7d30..dc947a619bf02 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
@@ -131,7 +131,7 @@ private static BlockFetchingListener fetchBlocks(LinkedHashMap<String, ManagedBu
     BlockFetchingListener listener = mock(BlockFetchingListener.class);
     String[] blockIds = blocks.keySet().toArray(new String[blocks.size()]);
     OneForOneBlockFetcher fetcher =
-      new OneForOneBlockFetcher(client, "app-id", "exec-id", blockIds, listener, conf, null);
+      new OneForOneBlockFetcher(client, "app-id", "exec-id", blockIds, listener, conf);
 
     // Respond to the "OpenBlocks" message with an appropriate ShuffleStreamHandle with streamId 123
     doAnswer(invocationOnMock -> {
diff --git a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
index 6860214c7fe39..fe5fd2da039bb 100644
--- a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.network
 
-import java.io.{Closeable, File}
+import java.io.Closeable
 import java.nio.ByteBuffer
 
 import scala.concurrent.{Future, Promise}
@@ -26,7 +26,7 @@ import scala.reflect.ClassTag
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
-import org.apache.spark.network.shuffle.{BlockFetchingListener, ShuffleClient}
+import org.apache.spark.network.shuffle.{BlockFetchingListener, ShuffleClient, TempShuffleFileManager}
 import org.apache.spark.storage.{BlockId, StorageLevel}
 import org.apache.spark.util.ThreadUtils
 
@@ -68,7 +68,7 @@ abstract class BlockTransferService extends ShuffleClient with Closeable with Lo
       execId: String,
       blockIds: Array[String],
       listener: BlockFetchingListener,
-      shuffleFiles: Array[File]): Unit
+      tempShuffleFileManager: TempShuffleFileManager): Unit
 
   /**
    * Upload a single block to a remote node, available only after [[init]] is invoked.
@@ -101,7 +101,7 @@ abstract class BlockTransferService extends ShuffleClient with Closeable with Lo
           ret.flip()
           result.success(new NioManagedBuffer(ret))
         }
-      }, shuffleFiles = null)
+      }, tempShuffleFileManager = null)
     ThreadUtils.awaitResult(result.future, Duration.Inf)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
index b13a9c681e543..30ff93897f98a 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.network.netty
 
-import java.io.File
 import java.nio.ByteBuffer
 
 import scala.collection.JavaConverters._
@@ -30,7 +29,7 @@ import org.apache.spark.network.buffer.ManagedBuffer
 import org.apache.spark.network.client.{RpcResponseCallback, TransportClientBootstrap, TransportClientFactory}
 import org.apache.spark.network.crypto.{AuthClientBootstrap, AuthServerBootstrap}
 import org.apache.spark.network.server._
-import org.apache.spark.network.shuffle.{BlockFetchingListener, OneForOneBlockFetcher, RetryingBlockFetcher}
+import org.apache.spark.network.shuffle.{BlockFetchingListener, OneForOneBlockFetcher, RetryingBlockFetcher, TempShuffleFileManager}
 import org.apache.spark.network.shuffle.protocol.UploadBlock
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.serializer.JavaSerializer
@@ -90,14 +89,14 @@ private[spark] class NettyBlockTransferService(
       execId: String,
       blockIds: Array[String],
       listener: BlockFetchingListener,
-      shuffleFiles: Array[File]): Unit = {
+      tempShuffleFileManager: TempShuffleFileManager): Unit = {
     logTrace(s"Fetch blocks from $host:$port (executor id $execId)")
     try {
       val blockFetchStarter = new RetryingBlockFetcher.BlockFetchStarter {
         override def createAndStart(blockIds: Array[String], listener: BlockFetchingListener) {
           val client = clientFactory.createClient(host, port)
-          new OneForOneBlockFetcher(client, appId, execId, blockIds.toArray, listener,
-            transportConf, shuffleFiles).start()
+          new OneForOneBlockFetcher(client, appId, execId, blockIds, listener,
+            transportConf, tempShuffleFileManager).start()
         }
       }
 
diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
index bded3a1e4eb54..7e2bcf7683bff 100644
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
@@ -28,7 +28,7 @@ import scala.collection.mutable.{ArrayBuffer, HashSet, Queue}
 import org.apache.spark.{SparkException, TaskContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer}
-import org.apache.spark.network.shuffle.{BlockFetchingListener, ShuffleClient}
+import org.apache.spark.network.shuffle.{BlockFetchingListener, ShuffleClient, TempShuffleFileManager}
 import org.apache.spark.shuffle.FetchFailedException
 import org.apache.spark.util.Utils
 import org.apache.spark.util.io.ChunkedByteBufferOutputStream
@@ -66,7 +66,7 @@ final class ShuffleBlockFetcherIterator(
     maxReqsInFlight: Int,
     maxReqSizeShuffleToMem: Long,
     detectCorrupt: Boolean)
-  extends Iterator[(BlockId, InputStream)] with Logging {
+  extends Iterator[(BlockId, InputStream)] with TempShuffleFileManager with Logging {
 
   import ShuffleBlockFetcherIterator._
 
@@ -135,7 +135,8 @@ final class ShuffleBlockFetcherIterator(
    * A set to store the files used for shuffling remote huge blocks. Files in this set will be
    * deleted when cleanup. This is a layer of defensiveness against disk file leaks.
    */
-  val shuffleFilesSet = mutable.HashSet[File]()
+  @GuardedBy("this")
+  private[this] val shuffleFilesSet = mutable.HashSet[File]()
 
   initialize()
 
@@ -149,6 +150,19 @@ final class ShuffleBlockFetcherIterator(
     currentResult = null
   }
 
+  override def createTempShuffleFile(): File = {
+    blockManager.diskBlockManager.createTempLocalBlock()._2
+  }
+
+  override def registerTempShuffleFileToClean(file: File): Boolean = synchronized {
+    if (isZombie) {
+      false
+    } else {
+      shuffleFilesSet += file
+      true
+    }
+  }
+
   /**
    * Mark the iterator as zombie, and release all buffers that haven't been deserialized yet.
    */
@@ -173,7 +187,7 @@ final class ShuffleBlockFetcherIterator(
     }
     shuffleFilesSet.foreach { file =>
       if (!file.delete()) {
-        logInfo("Failed to cleanup shuffle fetch temp file " + file.getAbsolutePath());
+        logWarning("Failed to cleanup shuffle fetch temp file " + file.getAbsolutePath())
       }
     }
   }
@@ -218,12 +232,8 @@ final class ShuffleBlockFetcherIterator(
     // already encrypted and compressed over the wire(w.r.t. the related configs), we can just fetch
     // the data and write it to file directly.
     if (req.size > maxReqSizeShuffleToMem) {
-      val shuffleFiles = blockIds.map { _ =>
-        blockManager.diskBlockManager.createTempLocalBlock()._2
-      }.toArray
-      shuffleFilesSet ++= shuffleFiles
       shuffleClient.fetchBlocks(address.host, address.port, address.executorId, blockIds.toArray,
-        blockFetchingListener, shuffleFiles)
+        blockFetchingListener, this)
     } else {
       shuffleClient.fetchBlocks(address.host, address.port, address.executorId, blockIds.toArray,
         blockFetchingListener, null)
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 9d7a8696818f3..24308a26d7811 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.storage
 
-import java.io.File
 import java.nio.ByteBuffer
 
 import scala.collection.mutable.ArrayBuffer
@@ -41,7 +40,7 @@ import org.apache.spark.memory.UnifiedMemoryManager
 import org.apache.spark.network.{BlockDataManager, BlockTransferService}
 import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
 import org.apache.spark.network.netty.NettyBlockTransferService
-import org.apache.spark.network.shuffle.BlockFetchingListener
+import org.apache.spark.network.shuffle.{BlockFetchingListener, TempShuffleFileManager}
 import org.apache.spark.rpc.RpcEnv
 import org.apache.spark.scheduler.LiveListenerBus
 import org.apache.spark.security.{CryptoStreamUtils, EncryptionFunSuite}
@@ -1267,7 +1266,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
         execId: String,
         blockIds: Array[String],
         listener: BlockFetchingListener,
-        shuffleFiles: Array[File]): Unit = {
+        tempShuffleFileManager: TempShuffleFileManager): Unit = {
       listener.onBlockFetchSuccess("mockBlockId", new NioManagedBuffer(ByteBuffer.allocate(1)))
     }
 
diff --git a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
index 559b3faab8fd2..6a70cedf769b8 100644
--- a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
@@ -33,7 +33,7 @@ import org.scalatest.PrivateMethodTester
 import org.apache.spark.{SparkFunSuite, TaskContext}
 import org.apache.spark.network._
 import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer}
-import org.apache.spark.network.shuffle.BlockFetchingListener
+import org.apache.spark.network.shuffle.{BlockFetchingListener, TempShuffleFileManager}
 import org.apache.spark.network.util.LimitedInputStream
 import org.apache.spark.shuffle.FetchFailedException
 import org.apache.spark.util.Utils
@@ -432,12 +432,12 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     val remoteBlocks = Map[BlockId, ManagedBuffer](
       ShuffleBlockId(0, 0, 0) -> createMockManagedBuffer())
     val transfer = mock(classOf[BlockTransferService])
-    var shuffleFiles: Array[File] = null
+    var tempShuffleFileManager: TempShuffleFileManager = null
     when(transfer.fetchBlocks(any(), any(), any(), any(), any(), any()))
       .thenAnswer(new Answer[Unit] {
         override def answer(invocation: InvocationOnMock): Unit = {
           val listener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
-          shuffleFiles = invocation.getArguments()(5).asInstanceOf[Array[File]]
+          tempShuffleFileManager = invocation.getArguments()(5).asInstanceOf[TempShuffleFileManager]
           Future {
             listener.onBlockFetchSuccess(
               ShuffleBlockId(0, 0, 0).toString, remoteBlocks(ShuffleBlockId(0, 0, 0)))
@@ -466,13 +466,13 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     fetchShuffleBlock(blocksByAddress1)
     // `maxReqSizeShuffleToMem` is 200, which is greater than the block size 100, so don't fetch
     // shuffle block to disk.
-    assert(shuffleFiles === null)
+    assert(tempShuffleFileManager == null)
 
     val blocksByAddress2 = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
       (remoteBmId, remoteBlocks.keys.map(blockId => (blockId, 300L)).toSeq))
     fetchShuffleBlock(blocksByAddress2)
     // `maxReqSizeShuffleToMem` is 200, which is smaller than the block size 300, so fetch
     // shuffle block to disk.
-    assert(shuffleFiles != null)
+    assert(tempShuffleFileManager != null)
   }
 }

From a05edf454a67261c89f0f2ecd1fe46bb8cebc257 Mon Sep 17 00:00:00 2001
From: Juliusz Sompolski <julek@databricks.com>
Date: Mon, 10 Jul 2017 09:26:42 -0700
Subject: [PATCH 1080/1204] [SPARK-21272] SortMergeJoin LeftAnti does not
 update numOutputRows

## What changes were proposed in this pull request?

Updating numOutputRows metric was missing from one return path of LeftAnti SortMergeJoin.

## How was this patch tested?

Non-zero output rows manually seen in metrics.

Author: Juliusz Sompolski <julek@databricks.com>

Closes #18494 from juliuszsompolski/SPARK-21272.
---
 .../sql/execution/joins/SortMergeJoinExec.scala      |  1 +
 .../spark/sql/execution/metric/SQLMetricsSuite.scala | 12 ++++++++++++
 2 files changed, 13 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
index 26fb6103953fc..a77201534b94e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@@ -290,6 +290,7 @@ case class SortMergeJoinExec(
                 currentLeftRow = smjScanner.getStreamedRow
                 val currentRightMatches = smjScanner.getBufferedMatches
                 if (currentRightMatches == null || currentRightMatches.length == 0) {
+                  numOutputRows += 1
                   return true
                 }
                 var found = false
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index e544245588f46..79d1fbfa3f072 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -288,6 +288,18 @@ class SQLMetricsSuite extends SparkFunSuite with SharedSQLContext {
     }
   }
 
+  test("SortMergeJoin(left-anti) metrics") {
+    val anti = testData2.filter("a > 2")
+    withTempView("antiData") {
+      anti.createOrReplaceTempView("antiData")
+      val df = spark.sql(
+        "SELECT * FROM testData2 ANTI JOIN antiData ON testData2.a = antiData.a")
+      testSparkPlanMetrics(df, 1, Map(
+        0L -> ("SortMergeJoin", Map("number of output rows" -> 4L)))
+      )
+    }
+  }
+
   test("save metrics") {
     withTempPath { file =>
       val previousExecutionIds = spark.sharedState.listener.executionIdToData.keySet

From 73df649f10596d44f8415a4f04f5449c4c611fb6 Mon Sep 17 00:00:00 2001
From: Mark Hamstra <markhamstra@gmail.com>
Date: Mon, 10 Jul 2017 13:51:31 -0700
Subject: [PATCH 1081/1204] foo

---
 .../apache/spark/sql/hive/HiveMetastoreCatalog.scala | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 638eb37a8c596..8f3807ec9d45b 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -274,11 +274,17 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
         logicalRelation
       })
     } else {
-      val rootPath = Option(metastoreRelation.hiveQlTable.getDataLocation.toString)
-      val paths: Seq[Path] = if (fileType != "parquet") { Seq(new Path(rootPath.orNull)) } else {
-        selectParquetLocationDirectories(metastoreRelation.tableName, rootPath)
+      val rootPath = metastoreRelation.hiveQlTable.getDataLocation
+//      val rootPath = Option(metastoreRelation.hiveQlTable.getDataLocation.toString)
+//      assert(!rootPath.get.startsWith("file:"))
+      val paths: Seq[Path] = if (fileType != "parquet") { Seq(rootPath) } else {
+        selectParquetLocationDirectories(metastoreRelation.tableName, Option(rootPath.toString))
           .map{ s => new Path(s) }
       }
+      // scalastyle:off
+      println("\n\n\n***MEH")
+      paths.foreach { p => println(p.toString) }
+      println("\n\n\n")
       withTableCreationLock(tableIdentifier, {
         val cached = getCached(tableIdentifier,
           paths,

From edcd9fbc92683753d55ed0c69f391bf3bed59da4 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Tue, 11 Jul 2017 11:26:17 +0800
Subject: [PATCH 1082/1204] [SPARK-21369][CORE] Don't use Scala Tuple2 in
 common/network-*

## What changes were proposed in this pull request?

Remove all usages of Scala Tuple2 from common/network-* projects. Otherwise, Yarn users cannot use `spark.reducer.maxReqSizeShuffleToMem`.

## How was this patch tested?

Jenkins.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #18593 from zsxwing/SPARK-21369.

(cherry picked from commit 833eab2c9bd273ee9577fbf9e480d3e3a4b7d203)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 common/network-common/pom.xml                 |  3 ++-
 .../client/TransportResponseHandler.java      | 20 +++++++++----------
 .../server/OneForOneStreamManager.java        | 17 +++++-----------
 common/network-shuffle/pom.xml                |  1 +
 common/network-yarn/pom.xml                   |  1 +
 5 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 7577253dd0390..303e25f7e4547 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -90,7 +90,8 @@
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
-    </dependency>
+      <scope>test</scope>
+   </dependency>
 
     <!--
       This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
index 340b8b96aabc6..7a3d96ceaef0c 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
@@ -24,10 +24,10 @@
 import java.util.concurrent.ConcurrentLinkedQueue;
 import java.util.concurrent.atomic.AtomicLong;
 
-import scala.Tuple2;
-
 import com.google.common.annotations.VisibleForTesting;
 import io.netty.channel.Channel;
+import org.apache.commons.lang3.tuple.ImmutablePair;
+import org.apache.commons.lang3.tuple.Pair;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -58,7 +58,7 @@ public class TransportResponseHandler extends MessageHandler<ResponseMessage> {
 
   private final Map<Long, RpcResponseCallback> outstandingRpcs;
 
-  private final Queue<Tuple2<String, StreamCallback>> streamCallbacks;
+  private final Queue<Pair<String, StreamCallback>> streamCallbacks;
   private volatile boolean streamActive;
 
   /** Records the time (in system nanoseconds) that the last fetch or RPC request was sent. */
@@ -92,7 +92,7 @@ public void removeRpcRequest(long requestId) {
 
   public void addStreamCallback(String streamId, StreamCallback callback) {
     timeOfLastRequestNs.set(System.nanoTime());
-    streamCallbacks.offer(new Tuple2<>(streamId, callback));
+    streamCallbacks.offer(ImmutablePair.of(streamId, callback));
   }
 
   @VisibleForTesting
@@ -119,9 +119,9 @@ private void failOutstandingRequests(Throwable cause) {
         logger.warn("RpcResponseCallback.onFailure throws exception", e);
       }
     }
-    for (Tuple2<String, StreamCallback> entry : streamCallbacks) {
+    for (Pair<String, StreamCallback> entry : streamCallbacks) {
       try {
-        entry._2().onFailure(entry._1(), cause);
+        entry.getValue().onFailure(entry.getKey(), cause);
       } catch (Exception e) {
         logger.warn("StreamCallback.onFailure throws exception", e);
       }
@@ -208,9 +208,9 @@ public void handle(ResponseMessage message) throws Exception {
       }
     } else if (message instanceof StreamResponse) {
       StreamResponse resp = (StreamResponse) message;
-      Tuple2<String, StreamCallback> entry = streamCallbacks.poll();
+      Pair<String, StreamCallback> entry = streamCallbacks.poll();
       if (entry != null) {
-        StreamCallback callback = entry._2();
+        StreamCallback callback = entry.getValue();
         if (resp.byteCount > 0) {
           StreamInterceptor interceptor = new StreamInterceptor(this, resp.streamId, resp.byteCount,
             callback);
@@ -235,9 +235,9 @@ public void handle(ResponseMessage message) throws Exception {
       }
     } else if (message instanceof StreamFailure) {
       StreamFailure resp = (StreamFailure) message;
-      Tuple2<String, StreamCallback> entry = streamCallbacks.poll();
+      Pair<String, StreamCallback> entry = streamCallbacks.poll();
       if (entry != null) {
-        StreamCallback callback = entry._2();
+        StreamCallback callback = entry.getValue();
         try {
           callback.onFailure(resp.streamId, new RuntimeException(resp.error));
         } catch (IOException ioe) {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java b/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java
index ad8e8b44d201e..85ca2f1728e6a 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java
@@ -23,8 +23,6 @@
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicLong;
 
-import scala.Tuple2;
-
 import com.google.common.base.Preconditions;
 import io.netty.channel.Channel;
 import org.slf4j.Logger;
@@ -98,21 +96,16 @@ public ManagedBuffer getChunk(long streamId, int chunkIndex) {
 
   @Override
   public ManagedBuffer openStream(String streamChunkId) {
-    Tuple2<Long, Integer> streamIdAndChunkId = parseStreamChunkId(streamChunkId);
-    return getChunk(streamIdAndChunkId._1, streamIdAndChunkId._2);
-  }
-
-  public static String genStreamChunkId(long streamId, int chunkId) {
-    return String.format("%d_%d", streamId, chunkId);
-  }
-
-  public static Tuple2<Long, Integer> parseStreamChunkId(String streamChunkId) {
     String[] array = streamChunkId.split("_");
     assert array.length == 2:
       "Stream id and chunk index should be specified when open stream for fetching block.";
     long streamId = Long.valueOf(array[0]);
     int chunkIndex = Integer.valueOf(array[1]);
-    return new Tuple2<>(streamId, chunkIndex);
+    return getChunk(streamId, chunkIndex);
+  }
+
+  public static String genStreamChunkId(long streamId, int chunkId) {
+    return String.format("%d_%d", streamId, chunkId);
   }
 
   @Override
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 558864ae4faab..25558dad9c3c0 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -69,6 +69,7 @@
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <scope>test</scope>
     </dependency>
 
     <!--
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index de66617d2fa27..310b4b8ffe337 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -48,6 +48,7 @@
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <scope>test</scope>
     </dependency>
 
     <!--

From 399aa016e8f44fea4e5ef4b71a9a80484dd755f8 Mon Sep 17 00:00:00 2001
From: Xingbo Jiang <xingbo.jiang@databricks.com>
Date: Tue, 11 Jul 2017 21:52:54 +0800
Subject: [PATCH 1083/1204] [SPARK-21366][SQL][TEST] Add sql test for window
 functions

## What changes were proposed in this pull request?

Add sql test for window functions, also remove uncecessary test cases in `WindowQuerySuite`.

## How was this patch tested?

Added `window.sql` and the corresponding output file.

Author: Xingbo Jiang <xingbo.jiang@databricks.com>

Closes #18591 from jiangxb1987/window.

(cherry picked from commit 66d21686556681457aab6e44e19f5614c5635f0c)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../resources/sql-tests/inputs/window.sql     |  69 ++++++
 .../sql-tests/results/window.sql.out          | 204 ++++++++++++++++++
 .../sql/hive/execution/WindowQuerySuite.scala |  27 ---
 3 files changed, 273 insertions(+), 27 deletions(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/window.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/results/window.sql.out

diff --git a/sql/core/src/test/resources/sql-tests/inputs/window.sql b/sql/core/src/test/resources/sql-tests/inputs/window.sql
new file mode 100644
index 0000000000000..c800fc3d49891
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/window.sql
@@ -0,0 +1,69 @@
+-- Test data.
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(null, "a"), (1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"), (null, null), (3, null)
+AS testData(val, cate);
+
+-- RowsBetween
+SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val ROWS CURRENT ROW) FROM testData
+ORDER BY cate, val;
+SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val
+ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
+
+-- RangeBetween
+SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val RANGE 1 PRECEDING) FROM testData
+ORDER BY cate, val;
+SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val
+RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
+
+-- RangeBetween with reverse OrderBy
+SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val DESC
+RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
+
+-- Window functions
+SELECT val, cate,
+max(val) OVER w AS max,
+min(val) OVER w AS min,
+min(val) OVER w AS min,
+count(val) OVER w AS count,
+sum(val) OVER w AS sum,
+avg(val) OVER w AS avg,
+stddev(val) OVER w AS stddev,
+first_value(val) OVER w AS first_value,
+first_value(val, true) OVER w AS first_value_ignore_null,
+first_value(val, false) OVER w AS first_value_contain_null,
+last_value(val) OVER w AS last_value,
+last_value(val, true) OVER w AS last_value_ignore_null,
+last_value(val, false) OVER w AS last_value_contain_null,
+rank() OVER w AS rank,
+dense_rank() OVER w AS dense_rank,
+cume_dist() OVER w AS cume_dist,
+percent_rank() OVER w AS percent_rank,
+ntile(2) OVER w AS ntile,
+row_number() OVER w AS row_number,
+var_pop(val) OVER w AS var_pop,
+var_samp(val) OVER w AS var_samp,
+approx_count_distinct(val) OVER w AS approx_count_distinct
+FROM testData
+WINDOW w AS (PARTITION BY cate ORDER BY val)
+ORDER BY cate, val;
+
+-- Null inputs
+SELECT val, cate, avg(null) OVER(PARTITION BY cate ORDER BY val) FROM testData ORDER BY cate, val;
+
+-- OrderBy not specified
+SELECT val, cate, row_number() OVER(PARTITION BY cate) FROM testData ORDER BY cate, val;
+
+-- Over clause is empty
+SELECT val, cate, sum(val) OVER(), avg(val) OVER() FROM testData ORDER BY cate, val;
+
+-- first_value()/last_value() over ()
+SELECT val, cate,
+first_value(false) OVER w AS first_value,
+first_value(true, true) OVER w AS first_value_ignore_null,
+first_value(false, false) OVER w AS first_value_contain_null,
+last_value(false) OVER w AS last_value,
+last_value(true, true) OVER w AS last_value_ignore_null,
+last_value(false, false) OVER w AS last_value_contain_null
+FROM testData
+WINDOW w AS ()
+ORDER BY cate, val;
diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out
new file mode 100644
index 0000000000000..aa5856138ed81
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out
@@ -0,0 +1,204 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 11
+
+
+-- !query 0
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(null, "a"), (1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"), (null, null), (3, null)
+AS testData(val, cate)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val ROWS CURRENT ROW) FROM testData
+ORDER BY cate, val
+-- !query 1 schema
+struct<val:int,cate:string,count(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND CURRENT ROW):bigint>
+-- !query 1 output
+NULL	NULL	0
+3	NULL	1
+NULL	a	0
+1	a	1
+1	a	1
+2	a	1
+1	b	1
+2	b	1
+3	b	1
+
+
+-- !query 2
+SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val
+ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) FROM testData ORDER BY cate, val
+-- !query 2 schema
+struct<val:int,cate:string,sum(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING):bigint>
+-- !query 2 output
+NULL	NULL	3
+3	NULL	3
+NULL	a	1
+1	a	2
+1	a	4
+2	a	4
+1	b	3
+2	b	6
+3	b	6
+
+
+-- !query 3
+SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val RANGE 1 PRECEDING) FROM testData
+ORDER BY cate, val
+-- !query 3 schema
+struct<val:int,cate:string,count(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN 1 PRECEDING AND CURRENT ROW):bigint>
+-- !query 3 output
+NULL	NULL	0
+3	NULL	1
+NULL	a	0
+1	a	2
+1	a	2
+2	a	3
+1	b	1
+2	b	2
+3	b	2
+
+
+-- !query 4
+SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val
+RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val
+-- !query 4 schema
+struct<val:int,cate:string,sum(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING):bigint>
+-- !query 4 output
+NULL	NULL	NULL
+3	NULL	3
+NULL	a	NULL
+1	a	4
+1	a	4
+2	a	2
+1	b	3
+2	b	5
+3	b	3
+
+
+-- !query 5
+SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val DESC
+RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val
+-- !query 5 schema
+struct<val:int,cate:string,sum(val) OVER (PARTITION BY cate ORDER BY val DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING):bigint>
+-- !query 5 output
+NULL	NULL	NULL
+3	NULL	3
+NULL	a	NULL
+1	a	2
+1	a	2
+2	a	4
+1	b	1
+2	b	3
+3	b	5
+
+
+-- !query 6
+SELECT val, cate,
+max(val) OVER w AS max,
+min(val) OVER w AS min,
+min(val) OVER w AS min,
+count(val) OVER w AS count,
+sum(val) OVER w AS sum,
+avg(val) OVER w AS avg,
+stddev(val) OVER w AS stddev,
+first_value(val) OVER w AS first_value,
+first_value(val, true) OVER w AS first_value_ignore_null,
+first_value(val, false) OVER w AS first_value_contain_null,
+last_value(val) OVER w AS last_value,
+last_value(val, true) OVER w AS last_value_ignore_null,
+last_value(val, false) OVER w AS last_value_contain_null,
+rank() OVER w AS rank,
+dense_rank() OVER w AS dense_rank,
+cume_dist() OVER w AS cume_dist,
+percent_rank() OVER w AS percent_rank,
+ntile(2) OVER w AS ntile,
+row_number() OVER w AS row_number,
+var_pop(val) OVER w AS var_pop,
+var_samp(val) OVER w AS var_samp,
+approx_count_distinct(val) OVER w AS approx_count_distinct
+FROM testData
+WINDOW w AS (PARTITION BY cate ORDER BY val)
+ORDER BY cate, val
+-- !query 6 schema
+struct<val:int,cate:string,max:int,min:int,min:int,count:bigint,sum:bigint,avg:double,stddev:double,first_value:int,first_value_ignore_null:int,first_value_contain_null:int,last_value:int,last_value_ignore_null:int,last_value_contain_null:int,rank:int,dense_rank:int,cume_dist:double,percent_rank:double,ntile:int,row_number:int,var_pop:double,var_samp:double,approx_count_distinct:bigint>
+-- !query 6 output
+NULL	NULL	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.5	0.0	1	1	NULL	NULL	0
+3	NULL	3	3	3	1	3	3.0	NaN	NULL	3	NULL	3	3	3	2	2	1.0	1.0	2	2	0.0	NaN	1
+NULL	a	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.25	0.0	1	1	NULL	NULL	0
+1	a	1	1	1	2	2	1.0	0.0	NULL	1	NULL	1	1	1	2	2	0.75	0.3333333333333333	1	2	0.0	0.0	1
+1	a	1	1	1	2	2	1.0	0.0	NULL	1	NULL	1	1	1	2	2	0.75	0.3333333333333333	2	3	0.0	0.0	1
+2	a	2	1	1	3	4	1.3333333333333333	0.5773502691896258	NULL	1	NULL	2	2	2	4	3	1.0	1.0	2	4	0.22222222222222224	0.33333333333333337	2
+1	b	1	1	1	1	1	1.0	NaN	1	1	1	1	1	1	1	1	0.3333333333333333	0.0	1	1	0.0	NaN	1
+2	b	2	1	1	2	3	1.5	0.7071067811865476	1	1	1	2	2	2	2	2	0.6666666666666666	0.5	1	2	0.25	0.5	2
+3	b	3	1	1	3	6	2.0	1.0	1	1	1	3	3	3	3	3	1.0	1.0	2	3	0.6666666666666666	1.0	3
+
+
+-- !query 7
+SELECT val, cate, avg(null) OVER(PARTITION BY cate ORDER BY val) FROM testData ORDER BY cate, val
+-- !query 7 schema
+struct<val:int,cate:string,avg(CAST(NULL AS DOUBLE)) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double>
+-- !query 7 output
+NULL	NULL	NULL
+3	NULL	NULL
+NULL	a	NULL
+1	a	NULL
+1	a	NULL
+2	a	NULL
+1	b	NULL
+2	b	NULL
+3	b	NULL
+
+
+-- !query 8
+SELECT val, cate, row_number() OVER(PARTITION BY cate) FROM testData ORDER BY cate, val
+-- !query 8 schema
+struct<>
+-- !query 8 output
+org.apache.spark.sql.AnalysisException
+Window function row_number() requires window to be ordered, please add ORDER BY clause. For example SELECT row_number()(value_expr) OVER (PARTITION BY window_partition ORDER BY window_ordering) from table;
+
+
+-- !query 9
+SELECT val, cate, sum(val) OVER(), avg(val) OVER() FROM testData ORDER BY cate, val
+-- !query 9 schema
+struct<val:int,cate:string,sum(CAST(val AS BIGINT)) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint,avg(CAST(val AS BIGINT)) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double>
+-- !query 9 output
+NULL	NULL	13	1.8571428571428572
+3	NULL	13	1.8571428571428572
+NULL	a	13	1.8571428571428572
+1	a	13	1.8571428571428572
+1	a	13	1.8571428571428572
+2	a	13	1.8571428571428572
+1	b	13	1.8571428571428572
+2	b	13	1.8571428571428572
+3	b	13	1.8571428571428572
+
+
+-- !query 10
+SELECT val, cate,
+first_value(false) OVER w AS first_value,
+first_value(true, true) OVER w AS first_value_ignore_null,
+first_value(false, false) OVER w AS first_value_contain_null,
+last_value(false) OVER w AS last_value,
+last_value(true, true) OVER w AS last_value_ignore_null,
+last_value(false, false) OVER w AS last_value_contain_null
+FROM testData
+WINDOW w AS ()
+ORDER BY cate, val
+-- !query 10 schema
+struct<val:int,cate:string,first_value:boolean,first_value_ignore_null:boolean,first_value_contain_null:boolean,last_value:boolean,last_value_ignore_null:boolean,last_value_contain_null:boolean>
+-- !query 10 output
+NULL	NULL	false	true	false	false	true	false
+3	NULL	false	true	false	false	true	false
+NULL	a	false	true	false	false	true	false
+1	a	false	true	false	false	true	false
+1	a	false	true	false	false	true	false
+2	a	false	true	false	false	true	false
+1	b	false	true	false	false	true	false
+2	b	false	true	false	false	true	false
+3	b	false	true	false	false	true	false
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala
index a20c758a83e71..3f9485dd018b1 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala
@@ -232,31 +232,4 @@ class WindowQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleto
         Row("Manufacturer#5", "almond azure blanched chiffon midnight", 23, 315.9225931564038, 315.9225931564038, 46, 99807.08486666666, -0.9978877469246935, -5664.856666666666)))
       // scalastyle:on
   }
-
-  test("null arguments") {
-    checkAnswer(sql("""
-        |select  p_mfgr, p_name, p_size,
-        |sum(null) over(distribute by p_mfgr sort by p_name) as sum,
-        |avg(null) over(distribute by p_mfgr sort by p_name) as avg
-        |from part
-      """.stripMargin),
-      sql("""
-        |select  p_mfgr, p_name, p_size,
-        |null as sum,
-        |null as avg
-        |from part
-        """.stripMargin))
-  }
-
-  test("SPARK-16646: LAST_VALUE(FALSE) OVER ()") {
-    checkAnswer(sql("SELECT LAST_VALUE(FALSE) OVER ()"), Row(false))
-    checkAnswer(sql("SELECT LAST_VALUE(FALSE, FALSE) OVER ()"), Row(false))
-    checkAnswer(sql("SELECT LAST_VALUE(TRUE, TRUE) OVER ()"), Row(true))
-  }
-
-  test("SPARK-16646: FIRST_VALUE(FALSE) OVER ()") {
-    checkAnswer(sql("SELECT FIRST_VALUE(FALSE) OVER ()"), Row(false))
-    checkAnswer(sql("SELECT FIRST_VALUE(FALSE, FALSE) OVER ()"), Row(false))
-    checkAnswer(sql("SELECT FIRST_VALUE(TRUE, TRUE) OVER ()"), Row(true))
-  }
 }

From cb6fc89ba20a427fa7d66fa5036b17c1a5d5d87f Mon Sep 17 00:00:00 2001
From: Eric Vandenberg <ericvandenberg@fb.com>
Date: Wed, 12 Jul 2017 14:49:15 +0800
Subject: [PATCH 1084/1204] =?UTF-8?q?[SPARK-21219][CORE]=20Task=20retry=20?=
 =?UTF-8?q?occurs=20on=20same=20executor=20due=20to=20race=20co=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

…ndition with blacklisting

There's a race condition in the current TaskSetManager where a failed task is added for retry (addPendingTask), and can asynchronously be assigned to an executor *prior* to the blacklist state (updateBlacklistForFailedTask), the result is the task might re-execute on the same executor.  This is particularly problematic if the executor is shutting down since the retry task immediately becomes a lost task (ExecutorLostFailure).  Another side effect is that the actual failure reason gets obscured by the retry task which never actually executed.  There are sample logs showing the issue in the https://issues.apache.org/jira/browse/SPARK-21219

The fix is to change the ordering of the addPendingTask and updatingBlackListForFailedTask calls in TaskSetManager.handleFailedTask

Implemented a unit test that verifies the task is black listed before it is added to the pending task.  Ran the unit test without the fix and it fails.  Ran the unit test with the fix and it passes.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Eric Vandenberg <ericvandenbergfb.com>

Closes #18427 from ericvandenbergfb/blacklistFix.

## What changes were proposed in this pull request?

This is a backport of the fix to SPARK-21219, already checked in as 96d58f2.

## How was this patch tested?

Ran TaskSetManagerSuite tests locally.

Author: Eric Vandenberg <ericvandenberg@fb.com>

Closes #18604 from jsoltren/branch-2.2.
---
 .../spark/scheduler/TaskSetManager.scala      | 21 ++++-----
 .../spark/scheduler/TaskSetManagerSuite.scala | 44 ++++++++++++++++++-
 2 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index a41b059fa7dec..6ac76c86dead3 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -198,7 +198,7 @@ private[spark] class TaskSetManager(
   private[scheduler] var emittedTaskSizeWarning = false
 
   /** Add a task to all the pending-task lists that it should be on. */
-  private def addPendingTask(index: Int) {
+  private[spark] def addPendingTask(index: Int) {
     for (loc <- tasks(index).preferredLocations) {
       loc match {
         case e: ExecutorCacheTaskLocation =>
@@ -826,15 +826,6 @@ private[spark] class TaskSetManager(
 
     sched.dagScheduler.taskEnded(tasks(index), reason, null, accumUpdates, info)
 
-    if (successful(index)) {
-      logInfo(s"Task ${info.id} in stage ${taskSet.id} (TID $tid) failed, but the task will not" +
-        s" be re-executed (either because the task failed with a shuffle data fetch failure," +
-        s" so the previous stage needs to be re-run, or because a different copy of the task" +
-        s" has already succeeded).")
-    } else {
-      addPendingTask(index)
-    }
-
     if (!isZombie && reason.countTowardsTaskFailures) {
       taskSetBlacklistHelperOpt.foreach(_.updateBlacklistForFailedTask(
         info.host, info.executorId, index))
@@ -848,6 +839,16 @@ private[spark] class TaskSetManager(
         return
       }
     }
+
+    if (successful(index)) {
+      logInfo(s"Task ${info.id} in stage ${taskSet.id} (TID $tid) failed, but the task will not" +
+        s" be re-executed (either because the task failed with a shuffle data fetch failure," +
+        s" so the previous stage needs to be re-run, or because a different copy of the task" +
+        s" has already succeeded).")
+    } else {
+      addPendingTask(index)
+    }
+
     maybeFinishTaskSet()
   }
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index db14c9acfdce5..807ad0a86ed2e 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -23,7 +23,7 @@ import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 
 import org.mockito.Matchers.{any, anyInt, anyString}
-import org.mockito.Mockito.{mock, never, spy, verify, when}
+import org.mockito.Mockito.{mock, never, spy, times, verify, when}
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
 
@@ -1140,6 +1140,48 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
       .updateBlacklistForFailedTask(anyString(), anyString(), anyInt())
   }
 
+  test("update blacklist before adding pending task to avoid race condition") {
+    // When a task fails, it should apply the blacklist policy prior to
+    // retrying the task otherwise there's a race condition where run on
+    // the same executor that it was intended to be black listed from.
+    val conf = new SparkConf().
+      set(config.BLACKLIST_ENABLED, true)
+
+    // Create a task with two executors.
+    sc = new SparkContext("local", "test", conf)
+    val exec = "executor1"
+    val host = "host1"
+    val exec2 = "executor2"
+    val host2 = "host2"
+    sched = new FakeTaskScheduler(sc, (exec, host), (exec2, host2))
+    val taskSet = FakeTask.createTaskSet(1)
+
+    val clock = new ManualClock
+    val mockListenerBus = mock(classOf[LiveListenerBus])
+    val blacklistTracker = new BlacklistTracker(mockListenerBus, conf, None, clock)
+    val taskSetManager = new TaskSetManager(sched, taskSet, 1, Some(blacklistTracker))
+    val taskSetManagerSpy = spy(taskSetManager)
+
+    val taskDesc = taskSetManagerSpy.resourceOffer(exec, host, TaskLocality.ANY)
+
+    // Assert the task has been black listed on the executor it was last executed on.
+    when(taskSetManagerSpy.addPendingTask(anyInt())).thenAnswer(
+      new Answer[Unit] {
+        override def answer(invocationOnMock: InvocationOnMock): Unit = {
+          val task = invocationOnMock.getArgumentAt(0, classOf[Int])
+          assert(taskSetManager.taskSetBlacklistHelperOpt.get.
+            isExecutorBlacklistedForTask(exec, task))
+        }
+      }
+    )
+
+    // Simulate a fake exception
+    val e = new ExceptionFailure("a", "b", Array(), "c", None)
+    taskSetManagerSpy.handleFailedTask(taskDesc.get.taskId, TaskState.FAILED, e)
+
+    verify(taskSetManagerSpy, times(1)).addPendingTask(anyInt())
+  }
+
   private def createTaskResult(
       id: Int,
       accumUpdates: Seq[AccumulatorV2[_, _]] = Seq.empty): DirectTaskResult[Int] = {

From 39eba3053ac99f03d9df56471bae5fc5cc9f4462 Mon Sep 17 00:00:00 2001
From: Kohki Nishio <taroplus@me.com>
Date: Thu, 13 Jul 2017 08:22:40 +0800
Subject: [PATCH 1085/1204] [SPARK-18646][REPL] Set parent classloader as null
 for ExecutorClassLoader

## What changes were proposed in this pull request?

`ClassLoader` will preferentially load class from `parent`. Only when `parent` is null or the load failed, that it will call the overridden `findClass` function. To avoid the potential issue caused by loading class using inappropriate class loader, we should set the `parent` of `ClassLoader` to null, so that we can fully control which class loader is used.

This is take over of #17074,  the primary author of this PR is taroplus .

Should close #17074 after this PR get merged.

## How was this patch tested?

Add test case in `ExecutorClassLoaderSuite`.

Author: Kohki Nishio <taroplus@me.com>
Author: Xingbo Jiang <xingbo.jiang@databricks.com>

Closes #18614 from jiangxb1987/executor_classloader.

(cherry picked from commit e08d06b37bc96cc48fec1c5e40f73e0bca09c616)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/repl/ExecutorClassLoader.scala      | 17 ++++---
 .../spark/repl/ExecutorClassLoaderSuite.scala | 46 +++++++++++++++++++
 2 files changed, 57 insertions(+), 6 deletions(-)

diff --git a/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
index df13b32451af2..127f67329f266 100644
--- a/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
@@ -33,18 +33,23 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.util.{ParentClassLoader, Utils}
 
 /**
- * A ClassLoader that reads classes from a Hadoop FileSystem or HTTP URI,
- * used to load classes defined by the interpreter when the REPL is used.
- * Allows the user to specify if user class path should be first.
- * This class loader delegates getting/finding resources to parent loader,
- * which makes sense until REPL never provide resource dynamically.
+ * A ClassLoader that reads classes from a Hadoop FileSystem or HTTP URI, used to load classes
+ * defined by the interpreter when the REPL is used. Allows the user to specify if user class path
+ * should be first. This class loader delegates getting/finding resources to parent loader, which
+ * makes sense until REPL never provide resource dynamically.
+ *
+ * Note: [[ClassLoader]] will preferentially load class from parent. Only when parent is null or
+ * the load failed, that it will call the overridden `findClass` function. To avoid the potential
+ * issue caused by loading class using inappropriate class loader, we should set the parent of
+ * ClassLoader to null, so that we can fully control which class loader is used. For detailed
+ * discussion, see SPARK-18646.
  */
 class ExecutorClassLoader(
     conf: SparkConf,
     env: SparkEnv,
     classUri: String,
     parent: ClassLoader,
-    userClassPathFirst: Boolean) extends ClassLoader with Logging {
+    userClassPathFirst: Boolean) extends ClassLoader(null) with Logging {
   val uri = new URI(classUri)
   val directory = uri.getPath
 
diff --git a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
index 6d274bddb7782..092d3c272b8f6 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
@@ -23,6 +23,8 @@ import java.nio.channels.{FileChannel, ReadableByteChannel}
 import java.nio.charset.StandardCharsets
 import java.nio.file.{Paths, StandardOpenOption}
 import java.util
+import java.util.Collections
+import javax.tools.{JavaFileObject, SimpleJavaFileObject, ToolProvider}
 
 import scala.io.Source
 import scala.language.implicitConversions
@@ -77,6 +79,50 @@ class ExecutorClassLoaderSuite
     }
   }
 
+  test("child over system classloader") {
+    // JavaFileObject for scala.Option class
+    val scalaOptionFile = new SimpleJavaFileObject(
+      URI.create(s"string:///scala/Option.java"),
+      JavaFileObject.Kind.SOURCE) {
+
+      override def getCharContent(ignoreEncodingErrors: Boolean): CharSequence = {
+        "package scala; class Option {}"
+      }
+    }
+    // compile fake scala.Option class
+    ToolProvider
+      .getSystemJavaCompiler
+      .getTask(null, null, null, null, null, Collections.singletonList(scalaOptionFile)).call()
+
+    // create 'scala' dir in tempDir1
+    val scalaDir = new File(tempDir1, "scala")
+    assert(scalaDir.mkdir(), s"Failed to create 'scala' directory in $tempDir1")
+
+    // move the generated class into scala dir
+    val filename = "Option.class"
+    val result = new File(filename)
+    assert(result.exists(), "Compiled file not found: " + result.getAbsolutePath)
+
+    val out = new File(scalaDir, filename)
+    Files.move(result, out)
+    assert(out.exists(), "Destination file not moved: " + out.getAbsolutePath)
+
+    // construct class loader tree
+    val parentLoader = new URLClassLoader(urls2, null)
+    val classLoader = new ExecutorClassLoader(
+      new SparkConf(), null, url1, parentLoader, true)
+
+    // load 'scala.Option', using ClassforName to do the exact same behavior as
+    // what JavaDeserializationStream does
+
+    // scalastyle:off classforname
+    val optionClass = Class.forName("scala.Option", false, classLoader)
+    // scalastyle:on classforname
+
+    assert(optionClass.getClassLoader == classLoader,
+      "scala.Option didn't come from ExecutorClassLoader")
+  }
+
   test("child first") {
     val parentLoader = new URLClassLoader(urls2, null)
     val classLoader = new ExecutorClassLoader(new SparkConf(), null, url1, parentLoader, true)

From cf0719b5e99333b28bb4066b304dbcf8400c80ea Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 13 Jul 2017 08:34:42 +0800
Subject: [PATCH 1086/1204] Revert "[SPARK-18646][REPL] Set parent classloader
 as null for ExecutorClassLoader"

This reverts commit 39eba3053ac99f03d9df56471bae5fc5cc9f4462.
---
 .../spark/repl/ExecutorClassLoader.scala      | 17 +++----
 .../spark/repl/ExecutorClassLoaderSuite.scala | 46 -------------------
 2 files changed, 6 insertions(+), 57 deletions(-)

diff --git a/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
index 127f67329f266..df13b32451af2 100644
--- a/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
@@ -33,23 +33,18 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.util.{ParentClassLoader, Utils}
 
 /**
- * A ClassLoader that reads classes from a Hadoop FileSystem or HTTP URI, used to load classes
- * defined by the interpreter when the REPL is used. Allows the user to specify if user class path
- * should be first. This class loader delegates getting/finding resources to parent loader, which
- * makes sense until REPL never provide resource dynamically.
- *
- * Note: [[ClassLoader]] will preferentially load class from parent. Only when parent is null or
- * the load failed, that it will call the overridden `findClass` function. To avoid the potential
- * issue caused by loading class using inappropriate class loader, we should set the parent of
- * ClassLoader to null, so that we can fully control which class loader is used. For detailed
- * discussion, see SPARK-18646.
+ * A ClassLoader that reads classes from a Hadoop FileSystem or HTTP URI,
+ * used to load classes defined by the interpreter when the REPL is used.
+ * Allows the user to specify if user class path should be first.
+ * This class loader delegates getting/finding resources to parent loader,
+ * which makes sense until REPL never provide resource dynamically.
  */
 class ExecutorClassLoader(
     conf: SparkConf,
     env: SparkEnv,
     classUri: String,
     parent: ClassLoader,
-    userClassPathFirst: Boolean) extends ClassLoader(null) with Logging {
+    userClassPathFirst: Boolean) extends ClassLoader with Logging {
   val uri = new URI(classUri)
   val directory = uri.getPath
 
diff --git a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
index 092d3c272b8f6..6d274bddb7782 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
@@ -23,8 +23,6 @@ import java.nio.channels.{FileChannel, ReadableByteChannel}
 import java.nio.charset.StandardCharsets
 import java.nio.file.{Paths, StandardOpenOption}
 import java.util
-import java.util.Collections
-import javax.tools.{JavaFileObject, SimpleJavaFileObject, ToolProvider}
 
 import scala.io.Source
 import scala.language.implicitConversions
@@ -79,50 +77,6 @@ class ExecutorClassLoaderSuite
     }
   }
 
-  test("child over system classloader") {
-    // JavaFileObject for scala.Option class
-    val scalaOptionFile = new SimpleJavaFileObject(
-      URI.create(s"string:///scala/Option.java"),
-      JavaFileObject.Kind.SOURCE) {
-
-      override def getCharContent(ignoreEncodingErrors: Boolean): CharSequence = {
-        "package scala; class Option {}"
-      }
-    }
-    // compile fake scala.Option class
-    ToolProvider
-      .getSystemJavaCompiler
-      .getTask(null, null, null, null, null, Collections.singletonList(scalaOptionFile)).call()
-
-    // create 'scala' dir in tempDir1
-    val scalaDir = new File(tempDir1, "scala")
-    assert(scalaDir.mkdir(), s"Failed to create 'scala' directory in $tempDir1")
-
-    // move the generated class into scala dir
-    val filename = "Option.class"
-    val result = new File(filename)
-    assert(result.exists(), "Compiled file not found: " + result.getAbsolutePath)
-
-    val out = new File(scalaDir, filename)
-    Files.move(result, out)
-    assert(out.exists(), "Destination file not moved: " + out.getAbsolutePath)
-
-    // construct class loader tree
-    val parentLoader = new URLClassLoader(urls2, null)
-    val classLoader = new ExecutorClassLoader(
-      new SparkConf(), null, url1, parentLoader, true)
-
-    // load 'scala.Option', using ClassforName to do the exact same behavior as
-    // what JavaDeserializationStream does
-
-    // scalastyle:off classforname
-    val optionClass = Class.forName("scala.Option", false, classLoader)
-    // scalastyle:on classforname
-
-    assert(optionClass.getClassLoader == classLoader,
-      "scala.Option didn't come from ExecutorClassLoader")
-  }
-
   test("child first") {
     val parentLoader = new URLClassLoader(urls2, null)
     val classLoader = new ExecutorClassLoader(new SparkConf(), null, url1, parentLoader, true)

From bfe3ba86936ffaabff9f89d03018eb368d246b4d Mon Sep 17 00:00:00 2001
From: jerryshao <sshao@hortonworks.com>
Date: Thu, 13 Jul 2017 15:25:38 -0700
Subject: [PATCH 1087/1204] [SPARK-21376][YARN] Fix yarn client token expire
 issue when cleaning the staging files in long running scenario

## What changes were proposed in this pull request?

This issue happens in long running application with yarn cluster mode, because yarn#client doesn't sync token with AM, so it will always keep the initial token, this token may be expired in the long running scenario, so when yarn#client tries to clean up staging directory after application finished, it will use this expired token and meet token expire issue.

## How was this patch tested?

Manual verification is secure cluster.

Author: jerryshao <sshao@hortonworks.com>

Closes #18617 from jerryshao/SPARK-21376.

(cherry picked from commit cb8d5cc90ff8d3c991ff33da41b136ab7634f71b)
---
 .../org/apache/spark/deploy/yarn/Client.scala | 35 ++++++++++++++-----
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 1fb7edf2a6e30..7e39c08763eb0 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -21,6 +21,7 @@ import java.io.{File, FileOutputStream, IOException, OutputStreamWriter}
 import java.net.{InetAddress, UnknownHostException, URI}
 import java.nio.ByteBuffer
 import java.nio.charset.StandardCharsets
+import java.security.PrivilegedExceptionAction
 import java.util.{Locale, Properties, UUID}
 import java.util.zip.{ZipEntry, ZipOutputStream}
 
@@ -189,16 +190,32 @@ private[spark] class Client(
    * Cleanup application staging directory.
    */
   private def cleanupStagingDir(appId: ApplicationId): Unit = {
-    val stagingDirPath = new Path(appStagingBaseDir, getAppStagingDir(appId))
-    try {
-      val preserveFiles = sparkConf.get(PRESERVE_STAGING_FILES)
-      val fs = stagingDirPath.getFileSystem(hadoopConf)
-      if (!preserveFiles && fs.delete(stagingDirPath, true)) {
-        logInfo(s"Deleted staging directory $stagingDirPath")
+    if (sparkConf.get(PRESERVE_STAGING_FILES)) {
+      return
+    }
+
+    def cleanupStagingDirInternal(): Unit = {
+      val stagingDirPath = new Path(appStagingBaseDir, getAppStagingDir(appId))
+      try {
+        val fs = stagingDirPath.getFileSystem(hadoopConf)
+        if (fs.delete(stagingDirPath, true)) {
+          logInfo(s"Deleted staging directory $stagingDirPath")
+        }
+      } catch {
+        case ioe: IOException =>
+          logWarning("Failed to cleanup staging dir " + stagingDirPath, ioe)
       }
-    } catch {
-      case ioe: IOException =>
-        logWarning("Failed to cleanup staging dir " + stagingDirPath, ioe)
+    }
+
+    if (isClusterMode && principal != null && keytab != null) {
+      val newUgi = UserGroupInformation.loginUserFromKeytabAndReturnUGI(principal, keytab)
+      newUgi.doAs(new PrivilegedExceptionAction[Unit] {
+        override def run(): Unit = {
+          cleanupStagingDirInternal()
+        }
+      })
+    } else {
+      cleanupStagingDirInternal()
     }
   }
 

From 1cb4369a5b894619582e0d5ccc8c1f4ecb8ae36a Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Fri, 14 Jul 2017 20:16:04 -0700
Subject: [PATCH 1088/1204] [SPARK-21344][SQL] BinaryType comparison does
 signed byte array comparison

## What changes were proposed in this pull request?

This PR fixes a wrong comparison for `BinaryType`. This PR enables unsigned comparison and unsigned prefix generation for an array for `BinaryType`. Previous implementations uses signed operations.

## How was this patch tested?

Added a test suite in `OrderingSuite`.

Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com>

Closes #18571 from kiszk/SPARK-21344.

(cherry picked from commit ac5d5d795909061a17e056696cf0ef87d9e65dd1)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../apache/spark/unsafe/types/ByteArray.java  |  2 +-
 .../unsafe/sort/PrefixComparatorsSuite.scala  | 17 ++++++++++++++++-
 .../spark/sql/catalyst/util/TypeUtils.scala   |  4 +++-
 .../catalyst/expressions/OrderingSuite.scala  | 19 +++++++++++++++++++
 .../resources/sql-tests/inputs/comparator.sql |  3 +++
 .../sql-tests/results/comparator.sql.out      | 18 ++++++++++++++++++
 6 files changed, 60 insertions(+), 3 deletions(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/comparator.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/results/comparator.sql.out

diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
index 3ced2094f5e6b..7ced13d357237 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
@@ -44,7 +44,7 @@ public static long getPrefix(byte[] bytes) {
       final int minLen = Math.min(bytes.length, 8);
       long p = 0;
       for (int i = 0; i < minLen; ++i) {
-        p |= (128L + Platform.getByte(bytes, Platform.BYTE_ARRAY_OFFSET + i))
+        p |= ((long) Platform.getByte(bytes, Platform.BYTE_ARRAY_OFFSET + i) & 0xff)
             << (56 - 8 * i);
       }
       return p;
diff --git a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
index 5180c58a566cb..73546ef1b7a60 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
@@ -63,7 +63,9 @@ class PrefixComparatorsSuite extends SparkFunSuite with PropertyChecks {
 
      def compareBinary(x: Array[Byte], y: Array[Byte]): Int = {
       for (i <- 0 until x.length; if i < y.length) {
-        val res = x(i).compare(y(i))
+        val v1 = x(i) & 0xff
+        val v2 = y(i) & 0xff
+        val res = v1 - v2
         if (res != 0) return res
       }
       x.length - y.length
@@ -80,6 +82,19 @@ class PrefixComparatorsSuite extends SparkFunSuite with PropertyChecks {
         (prefixComparisonResult > 0 && compareBinary(x, y) > 0))
     }
 
+    val binaryRegressionTests = Seq(
+      (Array[Byte](1), Array[Byte](-1)),
+      (Array[Byte](1, 1, 1, 1, 1), Array[Byte](1, 1, 1, 1, -1)),
+      (Array[Byte](1, 1, 1, 1, 1, 1, 1, 1, 1), Array[Byte](1, 1, 1, 1, 1, 1, 1, 1, -1)),
+      (Array[Byte](1), Array[Byte](1, 1, 1, 1)),
+      (Array[Byte](1, 1, 1, 1, 1), Array[Byte](1, 1, 1, 1, 1, 1, 1, 1, 1)),
+      (Array[Byte](-1), Array[Byte](-1, -1, -1, -1)),
+      (Array[Byte](-1, -1, -1, -1, -1), Array[Byte](-1, -1, -1, -1, -1, -1, -1, -1, -1))
+    )
+    binaryRegressionTests.foreach { case (b1, b2) =>
+      testPrefixComparison(b1, b2)
+    }
+
     // scalastyle:off
     val regressionTests = Table(
       ("s1", "s2"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala
index 7101ca5a17de9..45225779bffcb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala
@@ -70,7 +70,9 @@ object TypeUtils {
 
   def compareBinary(x: Array[Byte], y: Array[Byte]): Int = {
     for (i <- 0 until x.length; if i < y.length) {
-      val res = x(i).compareTo(y(i))
+      val v1 = x(i) & 0xff
+      val v2 = y(i) & 0xff
+      val res = v1 - v2
       if (res != 0) return res
     }
     x.length - y.length
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
index 190fab5d249bb..aa61ba2bff2bb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
@@ -137,4 +137,23 @@ class OrderingSuite extends SparkFunSuite with ExpressionEvalHelper {
     // verify that we can support up to 5000 ordering comparisons, which should be sufficient
     GenerateOrdering.generate(Array.fill(5000)(sortOrder))
   }
+
+  test("SPARK-21344: BinaryType comparison does signed byte array comparison") {
+    val data = Seq(
+      (Array[Byte](1), Array[Byte](-1)),
+      (Array[Byte](1, 1, 1, 1, 1), Array[Byte](1, 1, 1, 1, -1)),
+      (Array[Byte](1, 1, 1, 1, 1, 1, 1, 1, 1), Array[Byte](1, 1, 1, 1, 1, 1, 1, 1, -1))
+      )
+    data.foreach { case (b1, b2) =>
+      val rowOrdering = InterpretedOrdering.forSchema(Seq(BinaryType))
+      val genOrdering = GenerateOrdering.generate(
+        BoundReference(0, BinaryType, nullable = true).asc :: Nil)
+      val rowType = StructType(StructField("b", BinaryType, nullable = true) :: Nil)
+      val toCatalyst = CatalystTypeConverters.createToCatalystConverter(rowType)
+      val rowB1 = toCatalyst(Row(b1)).asInstanceOf[InternalRow]
+      val rowB2 = toCatalyst(Row(b2)).asInstanceOf[InternalRow]
+      assert(rowOrdering.compare(rowB1, rowB2) < 0)
+      assert(genOrdering.compare(rowB1, rowB2) < 0)
+    }
+  }
 }
diff --git a/sql/core/src/test/resources/sql-tests/inputs/comparator.sql b/sql/core/src/test/resources/sql-tests/inputs/comparator.sql
new file mode 100644
index 0000000000000..3e2447723e576
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/comparator.sql
@@ -0,0 +1,3 @@
+-- binary type
+select x'00' < x'0f';
+select x'00' < x'ff';
diff --git a/sql/core/src/test/resources/sql-tests/results/comparator.sql.out b/sql/core/src/test/resources/sql-tests/results/comparator.sql.out
new file mode 100644
index 0000000000000..afc7b5448b7b6
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/comparator.sql.out
@@ -0,0 +1,18 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 2
+
+
+-- !query 0
+select x'00' < x'0f'
+-- !query 0 schema
+struct<(X'00' < X'0F'):boolean>
+-- !query 0 output
+true
+
+
+-- !query 1
+select x'00' < x'ff'
+-- !query 1 schema
+struct<(X'00' < X'FF'):boolean>
+-- !query 1 output
+true

From ca4d2aa39240252a3aa7648d74a7bc968a9e4013 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Fri, 14 Jul 2017 20:16:04 -0700
Subject: [PATCH 1089/1204] [SPARK-21344][SQL] BinaryType comparison does
 signed byte array comparison

## What changes were proposed in this pull request?

This PR fixes a wrong comparison for `BinaryType`. This PR enables unsigned comparison and unsigned prefix generation for an array for `BinaryType`. Previous implementations uses signed operations.

## How was this patch tested?

Added a test suite in `OrderingSuite`.

Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com>

Closes #18571 from kiszk/SPARK-21344.

(cherry picked from commit ac5d5d795909061a17e056696cf0ef87d9e65dd1)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../apache/spark/unsafe/types/ByteArray.java  |  2 +-
 .../unsafe/sort/PrefixComparatorsSuite.scala  | 17 ++++++++++++++++-
 .../spark/sql/catalyst/util/TypeUtils.scala   |  4 +++-
 .../catalyst/expressions/OrderingSuite.scala  | 19 +++++++++++++++++++
 .../resources/sql-tests/inputs/comparator.sql |  3 +++
 .../sql-tests/results/comparator.sql.out      | 18 ++++++++++++++++++
 6 files changed, 60 insertions(+), 3 deletions(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/comparator.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/results/comparator.sql.out

diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
index 3ced2094f5e6b..7ced13d357237 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
@@ -44,7 +44,7 @@ public static long getPrefix(byte[] bytes) {
       final int minLen = Math.min(bytes.length, 8);
       long p = 0;
       for (int i = 0; i < minLen; ++i) {
-        p |= (128L + Platform.getByte(bytes, Platform.BYTE_ARRAY_OFFSET + i))
+        p |= ((long) Platform.getByte(bytes, Platform.BYTE_ARRAY_OFFSET + i) & 0xff)
             << (56 - 8 * i);
       }
       return p;
diff --git a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
index 5180c58a566cb..73546ef1b7a60 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
@@ -63,7 +63,9 @@ class PrefixComparatorsSuite extends SparkFunSuite with PropertyChecks {
 
      def compareBinary(x: Array[Byte], y: Array[Byte]): Int = {
       for (i <- 0 until x.length; if i < y.length) {
-        val res = x(i).compare(y(i))
+        val v1 = x(i) & 0xff
+        val v2 = y(i) & 0xff
+        val res = v1 - v2
         if (res != 0) return res
       }
       x.length - y.length
@@ -80,6 +82,19 @@ class PrefixComparatorsSuite extends SparkFunSuite with PropertyChecks {
         (prefixComparisonResult > 0 && compareBinary(x, y) > 0))
     }
 
+    val binaryRegressionTests = Seq(
+      (Array[Byte](1), Array[Byte](-1)),
+      (Array[Byte](1, 1, 1, 1, 1), Array[Byte](1, 1, 1, 1, -1)),
+      (Array[Byte](1, 1, 1, 1, 1, 1, 1, 1, 1), Array[Byte](1, 1, 1, 1, 1, 1, 1, 1, -1)),
+      (Array[Byte](1), Array[Byte](1, 1, 1, 1)),
+      (Array[Byte](1, 1, 1, 1, 1), Array[Byte](1, 1, 1, 1, 1, 1, 1, 1, 1)),
+      (Array[Byte](-1), Array[Byte](-1, -1, -1, -1)),
+      (Array[Byte](-1, -1, -1, -1, -1), Array[Byte](-1, -1, -1, -1, -1, -1, -1, -1, -1))
+    )
+    binaryRegressionTests.foreach { case (b1, b2) =>
+      testPrefixComparison(b1, b2)
+    }
+
     // scalastyle:off
     val regressionTests = Table(
       ("s1", "s2"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala
index 7101ca5a17de9..45225779bffcb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala
@@ -70,7 +70,9 @@ object TypeUtils {
 
   def compareBinary(x: Array[Byte], y: Array[Byte]): Int = {
     for (i <- 0 until x.length; if i < y.length) {
-      val res = x(i).compareTo(y(i))
+      val v1 = x(i) & 0xff
+      val v2 = y(i) & 0xff
+      val res = v1 - v2
       if (res != 0) return res
     }
     x.length - y.length
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
index 190fab5d249bb..aa61ba2bff2bb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
@@ -137,4 +137,23 @@ class OrderingSuite extends SparkFunSuite with ExpressionEvalHelper {
     // verify that we can support up to 5000 ordering comparisons, which should be sufficient
     GenerateOrdering.generate(Array.fill(5000)(sortOrder))
   }
+
+  test("SPARK-21344: BinaryType comparison does signed byte array comparison") {
+    val data = Seq(
+      (Array[Byte](1), Array[Byte](-1)),
+      (Array[Byte](1, 1, 1, 1, 1), Array[Byte](1, 1, 1, 1, -1)),
+      (Array[Byte](1, 1, 1, 1, 1, 1, 1, 1, 1), Array[Byte](1, 1, 1, 1, 1, 1, 1, 1, -1))
+      )
+    data.foreach { case (b1, b2) =>
+      val rowOrdering = InterpretedOrdering.forSchema(Seq(BinaryType))
+      val genOrdering = GenerateOrdering.generate(
+        BoundReference(0, BinaryType, nullable = true).asc :: Nil)
+      val rowType = StructType(StructField("b", BinaryType, nullable = true) :: Nil)
+      val toCatalyst = CatalystTypeConverters.createToCatalystConverter(rowType)
+      val rowB1 = toCatalyst(Row(b1)).asInstanceOf[InternalRow]
+      val rowB2 = toCatalyst(Row(b2)).asInstanceOf[InternalRow]
+      assert(rowOrdering.compare(rowB1, rowB2) < 0)
+      assert(genOrdering.compare(rowB1, rowB2) < 0)
+    }
+  }
 }
diff --git a/sql/core/src/test/resources/sql-tests/inputs/comparator.sql b/sql/core/src/test/resources/sql-tests/inputs/comparator.sql
new file mode 100644
index 0000000000000..3e2447723e576
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/comparator.sql
@@ -0,0 +1,3 @@
+-- binary type
+select x'00' < x'0f';
+select x'00' < x'ff';
diff --git a/sql/core/src/test/resources/sql-tests/results/comparator.sql.out b/sql/core/src/test/resources/sql-tests/results/comparator.sql.out
new file mode 100644
index 0000000000000..afc7b5448b7b6
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/comparator.sql.out
@@ -0,0 +1,18 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 2
+
+
+-- !query 0
+select x'00' < x'0f'
+-- !query 0 schema
+struct<(X'00' < X'0F'):boolean>
+-- !query 0 output
+true
+
+
+-- !query 1
+select x'00' < x'ff'
+-- !query 1 schema
+struct<(X'00' < X'FF'):boolean>
+-- !query 1 output
+true

From 8e85ce625127f62b7e2abdfab81c7bcbebcc8448 Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Sat, 15 Jul 2017 09:21:29 +0100
Subject: [PATCH 1090/1204] [SPARK-21267][DOCS][MINOR] Follow up to avoid
 referencing programming-guide redirector

## What changes were proposed in this pull request?

Update internal references from programming-guide to rdd-programming-guide

See https://github.com/apache/spark-website/commit/5ddf243fd84a0f0f98a5193a207737cea9cdc083 and https://github.com/apache/spark/pull/18485#issuecomment-314789751

Let's keep the redirector even if it's problematic to build, but not rely on it internally.

## How was this patch tested?

(Doc build)

Author: Sean Owen <sowen@cloudera.com>

Closes #18625 from srowen/SPARK-21267.2.

(cherry picked from commit 74ac1fb081e9532d77278a4edca9f3f129fd62eb)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 R/pkg/R/DataFrame.R                 |  2 +-
 R/pkg/R/RDD.R                       |  2 +-
 docs/graphx-programming-guide.md    |  2 +-
 docs/index.md                       |  2 +-
 docs/ml-guide.md                    |  2 +-
 docs/mllib-optimization.md          |  2 +-
 docs/spark-standalone.md            |  2 +-
 docs/streaming-programming-guide.md | 14 ++++++++++----
 docs/tuning.md                      |  6 +++---
 9 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index b606f1f3f276e..3859fa8631b38 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -591,7 +591,7 @@ setMethod("cache",
 #'
 #' Persist this SparkDataFrame with the specified storage level. For details of the
 #' supported storage levels, refer to
-#' \url{http://spark.apache.org/docs/latest/programming-guide.html#rdd-persistence}.
+#' \url{http://spark.apache.org/docs/latest/rdd-programming-guide.html#rdd-persistence}.
 #'
 #' @param x the SparkDataFrame to persist.
 #' @param newLevel storage level chosen for the persistance. See available options in
diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R
index 7ad3993e9ecbc..15ca212acf87f 100644
--- a/R/pkg/R/RDD.R
+++ b/R/pkg/R/RDD.R
@@ -227,7 +227,7 @@ setMethod("cacheRDD",
 #'
 #' Persist this RDD with the specified storage level. For details of the
 #' supported storage levels, refer to
-#'\url{http://spark.apache.org/docs/latest/programming-guide.html#rdd-persistence}.
+#'\url{http://spark.apache.org/docs/latest/rdd-programming-guide.html#rdd-persistence}.
 #'
 #' @param x The RDD to persist
 #' @param newLevel The new storage level to be assigned
diff --git a/docs/graphx-programming-guide.md b/docs/graphx-programming-guide.md
index 76aa7b405e18c..46225dc598da8 100644
--- a/docs/graphx-programming-guide.md
+++ b/docs/graphx-programming-guide.md
@@ -27,7 +27,7 @@ description: GraphX graph processing library guide for Spark SPARK_VERSION_SHORT
 [EdgeContext]: api/scala/index.html#org.apache.spark.graphx.EdgeContext
 [GraphOps.collectNeighborIds]: api/scala/index.html#org.apache.spark.graphx.GraphOps@collectNeighborIds(EdgeDirection):VertexRDD[Array[VertexId]]
 [GraphOps.collectNeighbors]: api/scala/index.html#org.apache.spark.graphx.GraphOps@collectNeighbors(EdgeDirection):VertexRDD[Array[(VertexId,VD)]]
-[RDD Persistence]: programming-guide.html#rdd-persistence
+[RDD Persistence]: rdd-programming-guide.html#rdd-persistence
 [Graph.cache]: api/scala/index.html#org.apache.spark.graphx.Graph@cache():Graph[VD,ED]
 [GraphOps.pregel]: api/scala/index.html#org.apache.spark.graphx.GraphOps@pregel[A](A,Int,EdgeDirection)((VertexId,VD,A)⇒VD,(EdgeTriplet[VD,ED])⇒Iterator[(VertexId,A)],(A,A)⇒A)(ClassTag[A]):Graph[VD,ED]
 [PartitionStrategy]: api/scala/index.html#org.apache.spark.graphx.PartitionStrategy$
diff --git a/docs/index.md b/docs/index.md
index 51641c98b188e..5ecfcf7073436 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -88,7 +88,7 @@ options for deployment:
 **Programming Guides:**
 
 * [Quick Start](quick-start.html): a quick introduction to the Spark API; start here!
-* [RDD Programming Guide](programming-guide.html): overview of Spark basics - RDDs (core but old API), accumulators, and broadcast variables  
+* [RDD Programming Guide](rdd-programming-guide.html): overview of Spark basics - RDDs (core but old API), accumulators, and broadcast variables  
 * [Spark SQL, Datasets, and DataFrames](sql-programming-guide.html): processing structured data with relational queries (newer API than RDDs)
 * [Structured Streaming](structured-streaming-programming-guide.html): processing structured data streams with relation queries (using Datasets and DataFrames, newer API than DStreams)
 * [Spark Streaming](streaming-programming-guide.html): processing data streams using DStreams (old API)
diff --git a/docs/ml-guide.md b/docs/ml-guide.md
index fb4621389ab92..f622a4edbaa0b 100644
--- a/docs/ml-guide.md
+++ b/docs/ml-guide.md
@@ -18,7 +18,7 @@ At a high level, it provides tools such as:
 
 **The MLlib RDD-based API is now in maintenance mode.**
 
-As of Spark 2.0, the [RDD](programming-guide.html#resilient-distributed-datasets-rdds)-based APIs in the `spark.mllib` package have entered maintenance mode.
+As of Spark 2.0, the [RDD](rdd-programming-guide.html#resilient-distributed-datasets-rdds)-based APIs in the `spark.mllib` package have entered maintenance mode.
 The primary Machine Learning API for Spark is now the [DataFrame](sql-programming-guide.html)-based API in the `spark.ml` package.
 
 *What are the implications?*
diff --git a/docs/mllib-optimization.md b/docs/mllib-optimization.md
index eefd7dcf1108b..14d76a6e41e23 100644
--- a/docs/mllib-optimization.md
+++ b/docs/mllib-optimization.md
@@ -116,7 +116,7 @@ is a stochastic gradient. Here `$S$` is the sampled subset of size `$|S|=$ miniB
 $\cdot n$`.
 
 In each iteration, the sampling over the distributed dataset
-([RDD](programming-guide.html#resilient-distributed-datasets-rdds)), as well as the
+([RDD](rdd-programming-guide.html#resilient-distributed-datasets-rdds)), as well as the
 computation of the sum of the partial results from each worker machine is performed by the
 standard spark routines.
 
diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index edefbef93feb6..642575b46dd42 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -264,7 +264,7 @@ SPARK_WORKER_OPTS supports the following system properties:
 # Connecting an Application to the Cluster
 
 To run an application on the Spark cluster, simply pass the `spark://IP:PORT` URL of the master as to the [`SparkContext`
-constructor](programming-guide.html#initializing-spark).
+constructor](rdd-programming-guide.html#initializing-spark).
 
 To run an interactive Spark shell against the cluster, run the following command:
 
diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index abd4ac9653606..fca0cf8ff05f2 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -535,7 +535,7 @@ After a context is defined, you have to do the following.
 It represents a continuous stream of data, either the input data stream received from source,
 or the processed data stream generated by transforming the input stream. Internally,
 a DStream is represented by a continuous series of RDDs, which is Spark's abstraction of an immutable,
-distributed dataset (see [Spark Programming Guide](programming-guide.html#resilient-distributed-datasets-rdds) for more details). Each RDD in a DStream contains data from a certain interval,
+distributed dataset (see [Spark Programming Guide](rdd-programming-guide.html#resilient-distributed-datasets-rdds) for more details). Each RDD in a DStream contains data from a certain interval,
 as shown in the following figure.
 
 <p style="text-align: center;">
@@ -1531,7 +1531,7 @@ default persistence level is set to replicate the data to two nodes for fault-to
 
 Note that, unlike RDDs, the default persistence level of DStreams keeps the data serialized in
 memory. This is further discussed in the [Performance Tuning](#memory-tuning) section. More
-information on different persistence levels can be found in the [Spark Programming Guide](programming-guide.html#rdd-persistence).
+information on different persistence levels can be found in the [Spark Programming Guide](rdd-programming-guide.html#rdd-persistence).
 
 ***
 
@@ -1720,7 +1720,13 @@ batch interval that is at least 10 seconds. It can be set by using
 
 ## Accumulators, Broadcast Variables, and Checkpoints
 
-[Accumulators](programming-guide.html#accumulators) and [Broadcast variables](programming-guide.html#broadcast-variables) cannot be recovered from checkpoint in Spark Streaming. If you enable checkpointing and use [Accumulators](programming-guide.html#accumulators) or [Broadcast variables](programming-guide.html#broadcast-variables) as well, you'll have to create lazily instantiated singleton instances for [Accumulators](programming-guide.html#accumulators) and [Broadcast variables](programming-guide.html#broadcast-variables) so that they can be re-instantiated after the driver restarts on failure. This is shown in the following example.
+[Accumulators](rdd-programming-guide.html#accumulators) and [Broadcast variables](rdd-programming-guide.html#broadcast-variables) 
+cannot be recovered from checkpoint in Spark Streaming. If you enable checkpointing and use 
+[Accumulators](rdd-programming-guide.html#accumulators) or [Broadcast variables](rdd-programming-guide.html#broadcast-variables) 
+as well, you'll have to create lazily instantiated singleton instances for 
+[Accumulators](rdd-programming-guide.html#accumulators) and [Broadcast variables](rdd-programming-guide.html#broadcast-variables) 
+so that they can be re-instantiated after the driver restarts on failure. 
+This is shown in the following example.
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
@@ -2182,7 +2188,7 @@ overall processing throughput of the system, its use is still recommended to ach
 consistent batch processing times. Make sure you set the CMS GC on both the driver (using `--driver-java-options` in `spark-submit`) and the executors (using [Spark configuration](configuration.html#runtime-environment) `spark.executor.extraJavaOptions`).
 
 * **Other tips**: To further reduce GC overheads, here are some more tips to try.
-    - Persist RDDs using the `OFF_HEAP` storage level. See more detail in the [Spark Programming Guide](programming-guide.html#rdd-persistence).
+    - Persist RDDs using the `OFF_HEAP` storage level. See more detail in the [Spark Programming Guide](rdd-programming-guide.html#rdd-persistence).
     - Use more executors with smaller heap sizes. This will reduce the GC pressure within each JVM heap.
 
 ***
diff --git a/docs/tuning.md b/docs/tuning.md
index 0de303a3bd9bf..7d5f97a02fe6e 100644
--- a/docs/tuning.md
+++ b/docs/tuning.md
@@ -12,7 +12,7 @@ Because of the in-memory nature of most Spark computations, Spark programs can b
 by any resource in the cluster: CPU, network bandwidth, or memory.
 Most often, if the data fits in memory, the bottleneck is network bandwidth, but sometimes, you
 also need to do some tuning, such as
-[storing RDDs in serialized form](programming-guide.html#rdd-persistence), to
+[storing RDDs in serialized form](rdd-programming-guide.html#rdd-persistence), to
 decrease memory usage.
 This guide will cover two main topics: data serialization, which is crucial for good network
 performance and can also reduce memory use, and memory tuning. We also sketch several smaller topics.
@@ -155,7 +155,7 @@ pointer-based data structures and wrapper objects. There are several ways to do
 
 When your objects are still too large to efficiently store despite this tuning, a much simpler way
 to reduce memory usage is to store them in *serialized* form, using the serialized StorageLevels in
-the [RDD persistence API](programming-guide.html#rdd-persistence), such as `MEMORY_ONLY_SER`.
+the [RDD persistence API](rdd-programming-guide.html#rdd-persistence), such as `MEMORY_ONLY_SER`.
 Spark will then store each RDD partition as one large byte array.
 The only downside of storing data in serialized form is slower access times, due to having to
 deserialize each object on the fly.
@@ -262,7 +262,7 @@ number of cores in your clusters.
 
 ## Broadcasting Large Variables
 
-Using the [broadcast functionality](programming-guide.html#broadcast-variables)
+Using the [broadcast functionality](rdd-programming-guide.html#broadcast-variables)
 available in `SparkContext` can greatly reduce the size of each serialized task, and the cost
 of launching a job over a cluster. If your tasks use any large object from the driver program
 inside of them (e.g. a static lookup table), consider turning it into a broadcast variable.

From 0ef98fd435ff77196780c2cad6e1bda377b2642f Mon Sep 17 00:00:00 2001
From: John Lee <jlee2@yahoo-inc.com>
Date: Mon, 17 Jul 2017 13:13:35 -0500
Subject: [PATCH 1091/1204] [SPARK-21321][SPARK CORE] Spark very verbose on
 shutdown

## What changes were proposed in this pull request?

The current code is very verbose on shutdown.

The changes I propose is to change the log level when the driver is shutting down and the RPC connections are closed (RpcEnvStoppedException).

## How was this patch tested?

Tested with word count(deploy-mode = cluster, master = yarn, num-executors = 4) with 300GB of data.

Author: John Lee <jlee2@yahoo-inc.com>

Closes #18547 from yoonlee95/SPARK-21321.

(cherry picked from commit 0e07a29cf4a5587f939585e6885ed0f7e68c31b5)
Signed-off-by: Tom Graves <tgraves@yahoo-inc.com>
---
 .../scala/org/apache/spark/rpc/netty/Dispatcher.scala |  7 +++++--
 .../main/scala/org/apache/spark/rpc/netty/Inbox.scala |  7 ++++++-
 .../org/apache/spark/rpc/netty/NettyRpcEnv.scala      |  7 +++++--
 .../scala/org/apache/spark/rpc/netty/Outbox.scala     |  2 +-
 .../org/apache/spark/scheduler/LiveListenerBus.scala  |  2 +-
 .../scheduler/cluster/YarnSchedulerBackend.scala      | 11 +++++++++--
 6 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala b/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala
index a02cf30a5d831..e94babb846128 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala
@@ -109,8 +109,11 @@ private[netty] class Dispatcher(nettyEnv: NettyRpcEnv) extends Logging {
     val iter = endpoints.keySet().iterator()
     while (iter.hasNext) {
       val name = iter.next
-      postMessage(name, message, (e) => logWarning(s"Message $message dropped. ${e.getMessage}"))
-    }
+        postMessage(name, message, (e) => { e match {
+          case e: RpcEnvStoppedException => logDebug (s"Message $message dropped. ${e.getMessage}")
+          case e: Throwable => logWarning(s"Message $message dropped. ${e.getMessage}")
+        }}
+      )}
   }
 
   /** Posts a message sent by a remote endpoint. */
diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala b/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala
index ae4a6003517cc..d32eba64e13e9 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala
@@ -205,7 +205,12 @@ private[netty] class Inbox(
     try action catch {
       case NonFatal(e) =>
         try endpoint.onError(e) catch {
-          case NonFatal(ee) => logError(s"Ignoring error", ee)
+          case NonFatal(ee) =>
+            if (stopped) {
+              logDebug("Ignoring error", ee)
+            } else {
+              logError("Ignoring error", ee)
+            }
         }
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala b/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
index b316e5443f639..64898499246ac 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
@@ -185,7 +185,7 @@ private[netty] class NettyRpcEnv(
       try {
         dispatcher.postOneWayMessage(message)
       } catch {
-        case e: RpcEnvStoppedException => logWarning(e.getMessage)
+        case e: RpcEnvStoppedException => logDebug(e.getMessage)
       }
     } else {
       // Message to a remote RPC endpoint.
@@ -203,7 +203,10 @@ private[netty] class NettyRpcEnv(
 
     def onFailure(e: Throwable): Unit = {
       if (!promise.tryFailure(e)) {
-        logWarning(s"Ignored failure: $e")
+        e match {
+          case e : RpcEnvStoppedException => logDebug (s"Ignored failure: $e")
+          case _ => logWarning(s"Ignored failure: $e")
+        }
       }
     }
 
diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/Outbox.scala b/core/src/main/scala/org/apache/spark/rpc/netty/Outbox.scala
index a7b7f58376f6b..b7e068aa68357 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/Outbox.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/Outbox.scala
@@ -45,7 +45,7 @@ private[netty] case class OneWayOutboxMessage(content: ByteBuffer) extends Outbo
 
   override def onFailure(e: Throwable): Unit = {
     e match {
-      case e1: RpcEnvStoppedException => logWarning(e1.getMessage)
+      case e1: RpcEnvStoppedException => logDebug(e1.getMessage)
       case e1: Throwable => logWarning(s"Failed to send one-way RPC.", e1)
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala
index 5533f7b1f2363..73e9141d344bc 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala
@@ -121,7 +121,7 @@ private[spark] class LiveListenerBus(val sparkContext: SparkContext) extends Spa
   def post(event: SparkListenerEvent): Unit = {
     if (stopped.get) {
       // Drop further events to make `listenerThread` exit ASAP
-      logError(s"$name has already stopped! Dropping event $event")
+      logDebug(s"$name has already stopped! Dropping event $event")
       return
     }
     val eventAdded = eventQueue.offer(event)
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
index cbc6e60e839c1..8452f43774194 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.scheduler.cluster
 
+import java.util.concurrent.atomic.{AtomicBoolean}
+
 import scala.concurrent.{ExecutionContext, Future}
 import scala.util.{Failure, Success}
 import scala.util.control.NonFatal
@@ -40,6 +42,8 @@ private[spark] abstract class YarnSchedulerBackend(
     sc: SparkContext)
   extends CoarseGrainedSchedulerBackend(scheduler, sc.env.rpcEnv) {
 
+  private val stopped = new AtomicBoolean(false)
+
   override val minRegisteredRatio =
     if (conf.getOption("spark.scheduler.minRegisteredResourcesRatio").isEmpty) {
       0.8
@@ -93,6 +97,7 @@ private[spark] abstract class YarnSchedulerBackend(
       requestTotalExecutors(0, 0, Map.empty)
       super.stop()
     } finally {
+      stopped.set(true)
       services.stop()
     }
   }
@@ -206,8 +211,10 @@ private[spark] abstract class YarnSchedulerBackend(
      */
     override def onDisconnected(rpcAddress: RpcAddress): Unit = {
       addressToExecutorId.get(rpcAddress).foreach { executorId =>
-        if (disableExecutor(executorId)) {
-          yarnSchedulerEndpoint.handleExecutorDisconnectedFromDriver(executorId, rpcAddress)
+        if (!stopped.get) {
+          if (disableExecutor(executorId)) {
+            yarnSchedulerEndpoint.handleExecutorDisconnectedFromDriver(executorId, rpcAddress)
+          }
         }
       }
     }

From a9efce46b7c9af5fc01dc55a8731a7ae02919a93 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Tue, 18 Jul 2017 09:18:32 +0800
Subject: [PATCH 1092/1204] [SPARK-19104][BACKPORT-2.1][SQL] Lambda variables
 in ExternalMapToCatalyst should be global

## What changes were proposed in this pull request?

This PR is backport of #18418 to Spark 2.1. [SPARK-21391](https://issues.apache.org/jira/browse/SPARK-21391) reported this problem in Spark 2.1.

The issue happens in `ExternalMapToCatalyst`. For example, the following codes create ExternalMap`ExternalMapToCatalyst`ToCatalyst to convert Scala Map to catalyst map format.

```
val data = Seq.tabulate(10)(i => NestedData(1, Map("key" -> InnerData("name", i + 100))))
val ds = spark.createDataset(data)
```
The `valueConverter` in `ExternalMapToCatalyst` looks like:

```
if (isnull(lambdavariable(ExternalMapToCatalyst_value52, ExternalMapToCatalyst_value_isNull52, ObjectType(class org.apache.spark.sql.InnerData), true))) null else named_struct(name, staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, assertnotnull(lambdavariable(ExternalMapToCatalyst_value52, ExternalMapToCatalyst_value_isNull52, ObjectType(class org.apache.spark.sql.InnerData), true)).name, true), value, assertnotnull(lambdavariable(ExternalMapToCatalyst_value52, ExternalMapToCatalyst_value_isNull52, ObjectType(class org.apache.spark.sql.InnerData), true)).value)
```
There is a `CreateNamedStruct` expression (`named_struct`) to create a row of `InnerData.name` and `InnerData.value` that are referred by `ExternalMapToCatalyst_value52`.

Because `ExternalMapToCatalyst_value52` are local variable, when `CreateNamedStruct` splits expressions to individual functions, the local variable can't be accessed anymore.

## How was this patch tested?

Added a new test suite into `DatasetPrimitiveSuite`

Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com>

Closes #18627 from kiszk/SPARK-21391.
---
 .../catalyst/expressions/objects/objects.scala | 18 ++++++++++++------
 .../spark/sql/DatasetPrimitiveSuite.scala      |  8 ++++++++
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 256de74d410e4..127a8c4fb93a9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -660,6 +660,12 @@ case class ExternalMapToCatalyst private(
     val entry = ctx.freshName("entry")
     val entries = ctx.freshName("entries")
 
+    val keyElementJavaType = ctx.javaType(keyType)
+    val valueElementJavaType = ctx.javaType(valueType)
+    ctx.addMutableState(keyElementJavaType, key, "")
+    ctx.addMutableState("boolean", valueIsNull, "")
+    ctx.addMutableState(valueElementJavaType, value, "")
+
     val (defineEntries, defineKeyValue) = child.dataType match {
       case ObjectType(cls) if classOf[java.util.Map[_, _]].isAssignableFrom(cls) =>
         val javaIteratorCls = classOf[java.util.Iterator[_]].getName
@@ -671,8 +677,8 @@ case class ExternalMapToCatalyst private(
         val defineKeyValue =
           s"""
             final $javaMapEntryCls $entry = ($javaMapEntryCls) $entries.next();
-            ${ctx.javaType(keyType)} $key = (${ctx.boxedType(keyType)}) $entry.getKey();
-            ${ctx.javaType(valueType)} $value = (${ctx.boxedType(valueType)}) $entry.getValue();
+            $key = (${ctx.boxedType(keyType)}) $entry.getKey();
+            $value = (${ctx.boxedType(valueType)}) $entry.getValue();
           """
 
         defineEntries -> defineKeyValue
@@ -686,17 +692,17 @@ case class ExternalMapToCatalyst private(
         val defineKeyValue =
           s"""
             final $scalaMapEntryCls $entry = ($scalaMapEntryCls) $entries.next();
-            ${ctx.javaType(keyType)} $key = (${ctx.boxedType(keyType)}) $entry._1();
-            ${ctx.javaType(valueType)} $value = (${ctx.boxedType(valueType)}) $entry._2();
+            $key = (${ctx.boxedType(keyType)}) $entry._1();
+            $value = (${ctx.boxedType(valueType)}) $entry._2();
           """
 
         defineEntries -> defineKeyValue
     }
 
     val valueNullCheck = if (ctx.isPrimitiveType(valueType)) {
-      s"boolean $valueIsNull = false;"
+      s"$valueIsNull = false;"
     } else {
-      s"boolean $valueIsNull = $value == null;"
+      s"$valueIsNull = $value == null;"
     }
 
     val arrayCls = classOf[GenericArrayData].getName
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
index f8d4c61967f95..deb76ce06f9a0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
@@ -21,6 +21,9 @@ import org.apache.spark.sql.test.SharedSQLContext
 
 case class IntClass(value: Int)
 
+case class InnerData(name: String, value: Int)
+case class NestedData(id: Int, param: Map[String, InnerData])
+
 package object packageobject {
   case class PackageClass(value: Int)
 }
@@ -135,4 +138,9 @@ class DatasetPrimitiveSuite extends QueryTest with SharedSQLContext {
     checkDataset(Seq(PackageClass(1)).toDS(), PackageClass(1))
   }
 
+  test("SPARK-19104: Lambda variables in ExternalMapToCatalyst should be global") {
+    val data = Seq.tabulate(10)(i => NestedData(1, Map("key" -> InnerData("name", i + 100))))
+    val ds = spark.createDataset(data)
+    checkDataset(ds, data: _*)
+  }
 }

From 83bdb04871248357ddbb665198c538f2df449006 Mon Sep 17 00:00:00 2001
From: aokolnychyi <anton.okolnychyi@sap.com>
Date: Mon, 17 Jul 2017 21:07:50 -0700
Subject: [PATCH 1093/1204] [SPARK-21332][SQL] Incorrect result type inferred
 for some decimal expressions

## What changes were proposed in this pull request?

This PR changes the direction of expression transformation in the DecimalPrecision rule. Previously, the expressions were transformed down, which led to incorrect result types when decimal expressions had other decimal expressions as their operands. The root cause of this issue was in visiting outer nodes before their children. Consider the example below:

```
    val inputSchema = StructType(StructField("col", DecimalType(26, 6)) :: Nil)
    val sc = spark.sparkContext
    val rdd = sc.parallelize(1 to 2).map(_ => Row(BigDecimal(12)))
    val df = spark.createDataFrame(rdd, inputSchema)

    // Works correctly since no nested decimal expression is involved
    // Expected result type: (26, 6) * (26, 6) = (38, 12)
    df.select($"col" * $"col").explain(true)
    df.select($"col" * $"col").printSchema()

    // Gives a wrong result since there is a nested decimal expression that should be visited first
    // Expected result type: ((26, 6) * (26, 6)) * (26, 6) = (38, 12) * (26, 6) = (38, 18)
    df.select($"col" * $"col" * $"col").explain(true)
    df.select($"col" * $"col" * $"col").printSchema()
```

The example above gives the following output:

```
// Correct result without sub-expressions
== Parsed Logical Plan ==
'Project [('col * 'col) AS (col * col)#4]
+- LogicalRDD [col#1]

== Analyzed Logical Plan ==
(col * col): decimal(38,12)
Project [CheckOverflow((promote_precision(cast(col#1 as decimal(26,6))) * promote_precision(cast(col#1 as decimal(26,6)))), DecimalType(38,12)) AS (col * col)#4]
+- LogicalRDD [col#1]

== Optimized Logical Plan ==
Project [CheckOverflow((col#1 * col#1), DecimalType(38,12)) AS (col * col)#4]
+- LogicalRDD [col#1]

== Physical Plan ==
*Project [CheckOverflow((col#1 * col#1), DecimalType(38,12)) AS (col * col)#4]
+- Scan ExistingRDD[col#1]

// Schema
root
 |-- (col * col): decimal(38,12) (nullable = true)

// Incorrect result with sub-expressions
== Parsed Logical Plan ==
'Project [(('col * 'col) * 'col) AS ((col * col) * col)#11]
+- LogicalRDD [col#1]

== Analyzed Logical Plan ==
((col * col) * col): decimal(38,12)
Project [CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(col#1 as decimal(26,6))) * promote_precision(cast(col#1 as decimal(26,6)))), DecimalType(38,12)) as decimal(26,6))) * promote_precision(cast(col#1 as decimal(26,6)))), DecimalType(38,12)) AS ((col * col) * col)#11]
+- LogicalRDD [col#1]

== Optimized Logical Plan ==
Project [CheckOverflow((cast(CheckOverflow((col#1 * col#1), DecimalType(38,12)) as decimal(26,6)) * col#1), DecimalType(38,12)) AS ((col * col) * col)#11]
+- LogicalRDD [col#1]

== Physical Plan ==
*Project [CheckOverflow((cast(CheckOverflow((col#1 * col#1), DecimalType(38,12)) as decimal(26,6)) * col#1), DecimalType(38,12)) AS ((col * col) * col)#11]
+- Scan ExistingRDD[col#1]

// Schema
root
 |-- ((col * col) * col): decimal(38,12) (nullable = true)
```

## How was this patch tested?

This PR was tested with available unit tests. Moreover, there are tests to cover previously failing scenarios.

Author: aokolnychyi <anton.okolnychyi@sap.com>

Closes #18583 from aokolnychyi/spark-21332.

(cherry picked from commit 0be5fb41a6b7ef4da9ba36f3604ac646cb6d4ae3)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../spark/sql/catalyst/analysis/DecimalPrecision.scala      | 2 +-
 .../spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
index 9c38dd2ee4e53..fd2ac78b25dbd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
@@ -80,7 +80,7 @@ object DecimalPrecision extends Rule[LogicalPlan] {
 
   def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     // fix decimal precision for expressions
-    case q => q.transformExpressions(
+    case q => q.transformExpressionsUp(
       decimalAndDecimal.orElse(integralAndDecimalLiteral).orElse(nondecimalAndDecimal))
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
index 8f43171f309a9..3df2530ece636 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
@@ -90,8 +90,14 @@ class DecimalPrecisionSuite extends PlanTest with BeforeAndAfter {
     checkType(Average(d1), DecimalType(6, 5))
 
     checkType(Add(Add(d1, d2), d1), DecimalType(7, 2))
+    checkType(Add(Add(d1, d1), d1), DecimalType(4, 1))
+    checkType(Add(d1, Add(d1, d1)), DecimalType(4, 1))
     checkType(Add(Add(Add(d1, d2), d1), d2), DecimalType(8, 2))
     checkType(Add(Add(d1, d2), Add(d1, d2)), DecimalType(7, 2))
+    checkType(Subtract(Subtract(d2, d1), d1), DecimalType(7, 2))
+    checkType(Multiply(Multiply(d1, d1), d2), DecimalType(11, 4))
+    checkType(Divide(d2, Add(d1, d1)), DecimalType(10, 6))
+    checkType(Sum(Add(d1, d1)), DecimalType(13, 1))
   }
 
   test("Comparison operations") {

From caf32b3caedcd6820b0879f7ce7be3ecc7ef790a Mon Sep 17 00:00:00 2001
From: aokolnychyi <anton.okolnychyi@sap.com>
Date: Mon, 17 Jul 2017 21:07:50 -0700
Subject: [PATCH 1094/1204] [SPARK-21332][SQL] Incorrect result type inferred
 for some decimal expressions

## What changes were proposed in this pull request?

This PR changes the direction of expression transformation in the DecimalPrecision rule. Previously, the expressions were transformed down, which led to incorrect result types when decimal expressions had other decimal expressions as their operands. The root cause of this issue was in visiting outer nodes before their children. Consider the example below:

```
    val inputSchema = StructType(StructField("col", DecimalType(26, 6)) :: Nil)
    val sc = spark.sparkContext
    val rdd = sc.parallelize(1 to 2).map(_ => Row(BigDecimal(12)))
    val df = spark.createDataFrame(rdd, inputSchema)

    // Works correctly since no nested decimal expression is involved
    // Expected result type: (26, 6) * (26, 6) = (38, 12)
    df.select($"col" * $"col").explain(true)
    df.select($"col" * $"col").printSchema()

    // Gives a wrong result since there is a nested decimal expression that should be visited first
    // Expected result type: ((26, 6) * (26, 6)) * (26, 6) = (38, 12) * (26, 6) = (38, 18)
    df.select($"col" * $"col" * $"col").explain(true)
    df.select($"col" * $"col" * $"col").printSchema()
```

The example above gives the following output:

```
// Correct result without sub-expressions
== Parsed Logical Plan ==
'Project [('col * 'col) AS (col * col)#4]
+- LogicalRDD [col#1]

== Analyzed Logical Plan ==
(col * col): decimal(38,12)
Project [CheckOverflow((promote_precision(cast(col#1 as decimal(26,6))) * promote_precision(cast(col#1 as decimal(26,6)))), DecimalType(38,12)) AS (col * col)#4]
+- LogicalRDD [col#1]

== Optimized Logical Plan ==
Project [CheckOverflow((col#1 * col#1), DecimalType(38,12)) AS (col * col)#4]
+- LogicalRDD [col#1]

== Physical Plan ==
*Project [CheckOverflow((col#1 * col#1), DecimalType(38,12)) AS (col * col)#4]
+- Scan ExistingRDD[col#1]

// Schema
root
 |-- (col * col): decimal(38,12) (nullable = true)

// Incorrect result with sub-expressions
== Parsed Logical Plan ==
'Project [(('col * 'col) * 'col) AS ((col * col) * col)#11]
+- LogicalRDD [col#1]

== Analyzed Logical Plan ==
((col * col) * col): decimal(38,12)
Project [CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(col#1 as decimal(26,6))) * promote_precision(cast(col#1 as decimal(26,6)))), DecimalType(38,12)) as decimal(26,6))) * promote_precision(cast(col#1 as decimal(26,6)))), DecimalType(38,12)) AS ((col * col) * col)#11]
+- LogicalRDD [col#1]

== Optimized Logical Plan ==
Project [CheckOverflow((cast(CheckOverflow((col#1 * col#1), DecimalType(38,12)) as decimal(26,6)) * col#1), DecimalType(38,12)) AS ((col * col) * col)#11]
+- LogicalRDD [col#1]

== Physical Plan ==
*Project [CheckOverflow((cast(CheckOverflow((col#1 * col#1), DecimalType(38,12)) as decimal(26,6)) * col#1), DecimalType(38,12)) AS ((col * col) * col)#11]
+- Scan ExistingRDD[col#1]

// Schema
root
 |-- ((col * col) * col): decimal(38,12) (nullable = true)
```

## How was this patch tested?

This PR was tested with available unit tests. Moreover, there are tests to cover previously failing scenarios.

Author: aokolnychyi <anton.okolnychyi@sap.com>

Closes #18583 from aokolnychyi/spark-21332.

(cherry picked from commit 0be5fb41a6b7ef4da9ba36f3604ac646cb6d4ae3)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../spark/sql/catalyst/analysis/DecimalPrecision.scala      | 2 +-
 .../spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
index 9c38dd2ee4e53..fd2ac78b25dbd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
@@ -80,7 +80,7 @@ object DecimalPrecision extends Rule[LogicalPlan] {
 
   def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     // fix decimal precision for expressions
-    case q => q.transformExpressions(
+    case q => q.transformExpressionsUp(
       decimalAndDecimal.orElse(integralAndDecimalLiteral).orElse(nondecimalAndDecimal))
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
index 66d9b4c8e351f..f98d5c0e52eaa 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
@@ -92,8 +92,14 @@ class DecimalPrecisionSuite extends PlanTest with BeforeAndAfter {
     checkType(Average(d1), DecimalType(6, 5))
 
     checkType(Add(Add(d1, d2), d1), DecimalType(7, 2))
+    checkType(Add(Add(d1, d1), d1), DecimalType(4, 1))
+    checkType(Add(d1, Add(d1, d1)), DecimalType(4, 1))
     checkType(Add(Add(Add(d1, d2), d1), d2), DecimalType(8, 2))
     checkType(Add(Add(d1, d2), Add(d1, d2)), DecimalType(7, 2))
+    checkType(Subtract(Subtract(d2, d1), d1), DecimalType(7, 2))
+    checkType(Multiply(Multiply(d1, d1), d2), DecimalType(11, 4))
+    checkType(Divide(d2, Add(d1, d1)), DecimalType(10, 6))
+    checkType(Sum(Add(d1, d1)), DecimalType(13, 1))
   }
 
   test("Comparison operations") {

From 99ce551a13f0918b440ddc094c3a32167d7ab3dd Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Tue, 18 Jul 2017 12:09:07 +0800
Subject: [PATCH 1095/1204] [SPARK-21445] Make IntWrapper and LongWrapper in
 UTF8String Serializable

## What changes were proposed in this pull request?

Making those two classes will avoid Serialization issues like below:
```
Caused by: java.io.NotSerializableException: org.apache.spark.unsafe.types.UTF8String$IntWrapper
Serialization stack:
    - object not serializable (class: org.apache.spark.unsafe.types.UTF8String$IntWrapper, value: org.apache.spark.unsafe.types.UTF8String$IntWrapper326450e)
    - field (class: org.apache.spark.sql.catalyst.expressions.Cast$$anonfun$castToInt$1, name: result$2, type: class org.apache.spark.unsafe.types.UTF8String$IntWrapper)
    - object (class org.apache.spark.sql.catalyst.expressions.Cast$$anonfun$castToInt$1, <function1>)
```

## How was this patch tested?

- [x] Manual testing
- [ ] Unit test

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #18660 from brkyvz/serializableutf8.

(cherry picked from commit 26cd2ca0402d7d49780116d45a5622a45c79f661)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../java/org/apache/spark/unsafe/types/UTF8String.java    | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index 5437e998c085f..23636ca28c182 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -854,8 +854,8 @@ public UTF8String translate(Map<Character, Character> dict) {
    * Wrapper over `long` to allow result of parsing long from string to be accessed via reference.
    * This is done solely for better performance and is not expected to be used by end users.
    */
-  public static class LongWrapper {
-    public long value = 0;
+  public static class LongWrapper implements Serializable {
+    public transient long value = 0;
   }
 
   /**
@@ -865,8 +865,8 @@ public static class LongWrapper {
    * {@link LongWrapper} could have been used here but using `int` directly save the extra cost of
    * conversion from `long` to `int`
    */
-  public static class IntWrapper {
-    public int value = 0;
+  public static class IntWrapper implements Serializable {
+    public transient int value = 0;
   }
 
   /**

From 49e2ada22cbe2284171dabf92faea5c3c23c6c2a Mon Sep 17 00:00:00 2001
From: Mark Hamstra <markhamstra@gmail.com>
Date: Tue, 29 Nov 2016 15:01:12 -0800
Subject: [PATCH 1096/1204] [SPARK-18631][SQL] Changed ExchangeCoordinator
 re-partitioning to avoid more data skew

## What changes were proposed in this pull request?

Re-partitioning logic in ExchangeCoordinator changed so that adding another pre-shuffle partition to the post-shuffle partition will not be done if doing so would cause the size of the post-shuffle partition to exceed the target partition size.

## How was this patch tested?

Existing tests updated to reflect new expectations.

Author: Mark Hamstra <markhamstra@gmail.com>

Closes #16065 from markhamstra/SPARK-17064.
---
 .../exchange/ExchangeCoordinator.scala        | 32 +++++++--------
 .../execution/ExchangeCoordinatorSuite.scala  | 40 +++++++++----------
 2 files changed, 35 insertions(+), 37 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ExchangeCoordinator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ExchangeCoordinator.scala
index 57da85fa84f99..deb2c24d0f16e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ExchangeCoordinator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ExchangeCoordinator.scala
@@ -69,15 +69,18 @@ import org.apache.spark.sql.execution.{ShuffledRowRDD, SparkPlan}
  * post-shuffle partition. Once we have size statistics of pre-shuffle partitions from stages
  * corresponding to the registered [[ShuffleExchange]]s, we will do a pass of those statistics and
  * pack pre-shuffle partitions with continuous indices to a single post-shuffle partition until
- * the size of a post-shuffle partition is equal or greater than the target size.
+ * adding another pre-shuffle partition would cause the size of a post-shuffle partition to be
+ * greater than the target size.
+ *
  * For example, we have two stages with the following pre-shuffle partition size statistics:
  * stage 1: [100 MB, 20 MB, 100 MB, 10MB, 30 MB]
  * stage 2: [10 MB,  10 MB, 70 MB,  5 MB, 5 MB]
  * assuming the target input size is 128 MB, we will have three post-shuffle partitions,
  * which are:
- *  - post-shuffle partition 0: pre-shuffle partition 0 and 1
- *  - post-shuffle partition 1: pre-shuffle partition 2
- *  - post-shuffle partition 2: pre-shuffle partition 3 and 4
+ *  - post-shuffle partition 0: pre-shuffle partition 0 (size 110 MB)
+ *  - post-shuffle partition 1: pre-shuffle partition 1 (size 30 MB)
+ *  - post-shuffle partition 2: pre-shuffle partition 2 (size 170 MB)
+ *  - post-shuffle partition 3: pre-shuffle partition 3 and 4 (size 50 MB)
  */
 class ExchangeCoordinator(
     numExchanges: Int,
@@ -164,25 +167,20 @@ class ExchangeCoordinator(
     while (i < numPreShufflePartitions) {
       // We calculate the total size of ith pre-shuffle partitions from all pre-shuffle stages.
       // Then, we add the total size to postShuffleInputSize.
+      var nextShuffleInputSize = 0L
       var j = 0
       while (j < mapOutputStatistics.length) {
-        postShuffleInputSize += mapOutputStatistics(j).bytesByPartitionId(i)
+        nextShuffleInputSize += mapOutputStatistics(j).bytesByPartitionId(i)
         j += 1
       }
 
-      // If the current postShuffleInputSize is equal or greater than the
-      // targetPostShuffleInputSize, We need to add a new element in partitionStartIndices.
-      if (postShuffleInputSize >= targetPostShuffleInputSize) {
-        if (i < numPreShufflePartitions - 1) {
-          // Next start index.
-          partitionStartIndices += i + 1
-        } else {
-          // This is the last element. So, we do not need to append the next start index to
-          // partitionStartIndices.
-        }
+      // If including the nextShuffleInputSize would exceed the target partition size, then start a
+      // new partition.
+      if (i > 0 && postShuffleInputSize + nextShuffleInputSize > targetPostShuffleInputSize) {
+        partitionStartIndices += i
         // reset postShuffleInputSize.
-        postShuffleInputSize = 0L
-      }
+        postShuffleInputSize = nextShuffleInputSize
+      } else postShuffleInputSize += nextShuffleInputSize
 
       i += 1
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExchangeCoordinatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExchangeCoordinatorSuite.scala
index 2803b62462417..06bce9a2400e7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExchangeCoordinatorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExchangeCoordinatorSuite.scala
@@ -85,7 +85,7 @@ class ExchangeCoordinatorSuite extends SparkFunSuite with BeforeAndAfterAll {
     {
       // There are a few large pre-shuffle partitions.
       val bytesByPartitionId = Array[Long](110, 10, 100, 110, 0)
-      val expectedPartitionStartIndices = Array[Int](0, 1, 3, 4)
+      val expectedPartitionStartIndices = Array[Int](0, 1, 2, 3, 4)
       checkEstimation(coordinator, Array(bytesByPartitionId), expectedPartitionStartIndices)
     }
 
@@ -146,7 +146,7 @@ class ExchangeCoordinatorSuite extends SparkFunSuite with BeforeAndAfterAll {
       // 2 post-shuffle partition are needed.
       val bytesByPartitionId1 = Array[Long](0, 10, 0, 20, 0)
       val bytesByPartitionId2 = Array[Long](30, 0, 70, 0, 30)
-      val expectedPartitionStartIndices = Array[Int](0, 3)
+      val expectedPartitionStartIndices = Array[Int](0, 2, 4)
       checkEstimation(
         coordinator,
         Array(bytesByPartitionId1, bytesByPartitionId2),
@@ -154,10 +154,10 @@ class ExchangeCoordinatorSuite extends SparkFunSuite with BeforeAndAfterAll {
     }
 
     {
-      // 2 post-shuffle partition are needed.
+      // 4 post-shuffle partition are needed.
       val bytesByPartitionId1 = Array[Long](0, 99, 0, 20, 0)
       val bytesByPartitionId2 = Array[Long](30, 0, 70, 0, 30)
-      val expectedPartitionStartIndices = Array[Int](0, 2)
+      val expectedPartitionStartIndices = Array[Int](0, 1, 2, 4)
       checkEstimation(
         coordinator,
         Array(bytesByPartitionId1, bytesByPartitionId2),
@@ -168,7 +168,7 @@ class ExchangeCoordinatorSuite extends SparkFunSuite with BeforeAndAfterAll {
       // 2 post-shuffle partition are needed.
       val bytesByPartitionId1 = Array[Long](0, 100, 0, 30, 0)
       val bytesByPartitionId2 = Array[Long](30, 0, 70, 0, 30)
-      val expectedPartitionStartIndices = Array[Int](0, 2, 4)
+      val expectedPartitionStartIndices = Array[Int](0, 1, 2, 4)
       checkEstimation(
         coordinator,
         Array(bytesByPartitionId1, bytesByPartitionId2),
@@ -179,7 +179,7 @@ class ExchangeCoordinatorSuite extends SparkFunSuite with BeforeAndAfterAll {
       // There are a few large pre-shuffle partitions.
       val bytesByPartitionId1 = Array[Long](0, 100, 40, 30, 0)
       val bytesByPartitionId2 = Array[Long](30, 0, 60, 0, 110)
-      val expectedPartitionStartIndices = Array[Int](0, 2, 3)
+      val expectedPartitionStartIndices = Array[Int](0, 1, 2, 3, 4)
       checkEstimation(
         coordinator,
         Array(bytesByPartitionId1, bytesByPartitionId2),
@@ -228,7 +228,7 @@ class ExchangeCoordinatorSuite extends SparkFunSuite with BeforeAndAfterAll {
       // The number of post-shuffle partitions is determined by the coordinator.
       val bytesByPartitionId1 = Array[Long](10, 50, 20, 80, 20)
       val bytesByPartitionId2 = Array[Long](40, 10, 0, 10, 30)
-      val expectedPartitionStartIndices = Array[Int](0, 2, 4)
+      val expectedPartitionStartIndices = Array[Int](0, 1, 3, 4)
       checkEstimation(
         coordinator,
         Array(bytesByPartitionId1, bytesByPartitionId2),
@@ -272,13 +272,13 @@ class ExchangeCoordinatorSuite extends SparkFunSuite with BeforeAndAfterAll {
         sparkConf.set(SQLConf.SHUFFLE_MIN_NUM_POSTSHUFFLE_PARTITIONS.key, "-1")
     }
 
-    val spark = SparkSession.builder
+    val spark = SparkSession.builder()
       .config(sparkConf)
       .getOrCreate()
     try f(spark) finally spark.stop()
   }
 
-  Seq(Some(3), None).foreach { minNumPostShufflePartitions =>
+  Seq(Some(5), None).foreach { minNumPostShufflePartitions =>
     val testNameNote = minNumPostShufflePartitions match {
       case Some(numPartitions) => "(minNumPostShufflePartitions: 3)"
       case None => ""
@@ -290,7 +290,7 @@ class ExchangeCoordinatorSuite extends SparkFunSuite with BeforeAndAfterAll {
           spark
             .range(0, 1000, 1, numInputPartitions)
             .selectExpr("id % 20 as key", "id as value")
-        val agg = df.groupBy("key").count
+        val agg = df.groupBy("key").count()
 
         // Check the answer first.
         checkAnswer(
@@ -308,7 +308,7 @@ class ExchangeCoordinatorSuite extends SparkFunSuite with BeforeAndAfterAll {
             exchanges.foreach {
               case e: ShuffleExchange =>
                 assert(e.coordinator.isDefined)
-                assert(e.outputPartitioning.numPartitions === 3)
+                assert(e.outputPartitioning.numPartitions === 5)
               case o =>
             }
 
@@ -316,7 +316,7 @@ class ExchangeCoordinatorSuite extends SparkFunSuite with BeforeAndAfterAll {
             exchanges.foreach {
               case e: ShuffleExchange =>
                 assert(e.coordinator.isDefined)
-                assert(e.outputPartitioning.numPartitions === 2)
+                assert(e.outputPartitioning.numPartitions === 3)
               case o =>
             }
         }
@@ -359,7 +359,7 @@ class ExchangeCoordinatorSuite extends SparkFunSuite with BeforeAndAfterAll {
             exchanges.foreach {
               case e: ShuffleExchange =>
                 assert(e.coordinator.isDefined)
-                assert(e.outputPartitioning.numPartitions === 3)
+                assert(e.outputPartitioning.numPartitions === 5)
               case o =>
             }
 
@@ -383,14 +383,14 @@ class ExchangeCoordinatorSuite extends SparkFunSuite with BeforeAndAfterAll {
             .range(0, 1000, 1, numInputPartitions)
             .selectExpr("id % 500 as key1", "id as value1")
             .groupBy("key1")
-            .count
+            .count()
             .toDF("key1", "cnt1")
         val df2 =
           spark
             .range(0, 1000, 1, numInputPartitions)
             .selectExpr("id % 500 as key2", "id as value2")
             .groupBy("key2")
-            .count
+            .count()
             .toDF("key2", "cnt2")
 
         val join = df1.join(df2, col("key1") === col("key2")).select(col("key1"), col("cnt2"))
@@ -415,13 +415,13 @@ class ExchangeCoordinatorSuite extends SparkFunSuite with BeforeAndAfterAll {
             exchanges.foreach {
               case e: ShuffleExchange =>
                 assert(e.coordinator.isDefined)
-                assert(e.outputPartitioning.numPartitions === 3)
+                assert(e.outputPartitioning.numPartitions === 5)
               case o =>
             }
 
           case None =>
             assert(exchanges.forall(_.coordinator.isDefined))
-            assert(exchanges.map(_.outputPartitioning.numPartitions).toSeq.toSet === Set(1, 2))
+            assert(exchanges.map(_.outputPartitioning.numPartitions).toSet === Set(2, 3))
         }
       }
 
@@ -435,7 +435,7 @@ class ExchangeCoordinatorSuite extends SparkFunSuite with BeforeAndAfterAll {
             .range(0, 1000, 1, numInputPartitions)
             .selectExpr("id % 500 as key1", "id as value1")
             .groupBy("key1")
-            .count
+            .count()
             .toDF("key1", "cnt1")
         val df2 =
           spark
@@ -467,13 +467,13 @@ class ExchangeCoordinatorSuite extends SparkFunSuite with BeforeAndAfterAll {
             exchanges.foreach {
               case e: ShuffleExchange =>
                 assert(e.coordinator.isDefined)
-                assert(e.outputPartitioning.numPartitions === 3)
+                assert(e.outputPartitioning.numPartitions === 5)
               case o =>
             }
 
           case None =>
             assert(exchanges.forall(_.coordinator.isDefined))
-            assert(exchanges.map(_.outputPartitioning.numPartitions).toSeq.toSet === Set(2, 3))
+            assert(exchanges.map(_.outputPartitioning.numPartitions).toSet === Set(5, 3))
         }
       }
 

From df061fd5f93c8110107198a94e68a4e29248e345 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 18 Jul 2017 15:56:16 -0700
Subject: [PATCH 1097/1204] [SPARK-21457][SQL] ExternalCatalog.listPartitions
 should correctly handle partition values with dot

## What changes were proposed in this pull request?

When we list partitions from hive metastore with a partial partition spec, we are expecting exact matching according to the partition values. However, hive treats dot specially and match any single character for dot. We should do an extra filter to drop unexpected partitions.

## How was this patch tested?

new regression test.

Author: Wenchen Fan <wenchen@databricks.com>

Closes #18671 from cloud-fan/hive.

(cherry picked from commit f18b905f6cace7686ef169fda7de474079d0af23)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../sql/catalyst/catalog/ExternalCatalogUtils.scala  | 12 ++++++++++++
 .../spark/sql/catalyst/catalog/InMemoryCatalog.scala | 12 ------------
 .../sql/catalyst/catalog/ExternalCatalogSuite.scala  | 12 ++++++++++++
 .../apache/spark/sql/hive/HiveExternalCatalog.scala  | 12 +++++++++++-
 4 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
index 1fc3a654cfeb1..50f32e81d997d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
@@ -159,6 +159,18 @@ object ExternalCatalogUtils {
       }
     }
   }
+
+  /**
+   * Returns true if `spec1` is a partial partition spec w.r.t. `spec2`, e.g. PARTITION (a=1) is a
+   * partial partition spec w.r.t. PARTITION (a=1,b=2).
+   */
+  def isPartialPartitionSpec(
+      spec1: TablePartitionSpec,
+      spec2: TablePartitionSpec): Boolean = {
+    spec1.forall {
+      case (partitionColumn, value) => spec2(partitionColumn) == value
+    }
+  }
 }
 
 object CatalogUtils {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
index 81dd8efc0015f..864ee485d8f0d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
@@ -542,18 +542,6 @@ class InMemoryCatalog(
     }
   }
 
-  /**
-   * Returns true if `spec1` is a partial partition spec w.r.t. `spec2`, e.g. PARTITION (a=1) is a
-   * partial partition spec w.r.t. PARTITION (a=1,b=2).
-   */
-  private def isPartialPartitionSpec(
-      spec1: TablePartitionSpec,
-      spec2: TablePartitionSpec): Boolean = {
-    spec1.forall {
-      case (partitionColumn, value) => spec2(partitionColumn) == value
-    }
-  }
-
   override def listPartitionsByFilter(
       db: String,
       table: String,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
index 42db4398e5072..54ecf442a8c9e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
@@ -439,6 +439,18 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
     assert(catalog.listPartitions("db2", "tbl2", Some(Map("a" -> "unknown"))).isEmpty)
   }
 
+  test("SPARK-21457: list partitions with special chars") {
+    val catalog = newBasicCatalog()
+    assert(catalog.listPartitions("db2", "tbl1").isEmpty)
+
+    val part1 = CatalogTablePartition(Map("a" -> "1", "b" -> "i+j"), storageFormat)
+    val part2 = CatalogTablePartition(Map("a" -> "1", "b" -> "i.j"), storageFormat)
+    catalog.createPartitions("db2", "tbl1", Seq(part1, part2), ignoreIfExists = false)
+
+    assert(catalog.listPartitions("db2", "tbl1", Some(part1.spec)).map(_.spec) == Seq(part1.spec))
+    assert(catalog.listPartitions("db2", "tbl1", Some(part2.spec)).map(_.spec) == Seq(part2.spec))
+  }
+
   test("list partitions by filter") {
     val tz = TimeZone.getDefault.getID
     val catalog = newBasicCatalog()
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index f2fe22714b24f..6b0f2b4cea385 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -1074,9 +1074,19 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       table: String,
       partialSpec: Option[TablePartitionSpec] = None): Seq[CatalogTablePartition] = withClient {
     val partColNameMap = buildLowerCasePartColNameMap(getTable(db, table))
-    client.getPartitions(db, table, partialSpec.map(lowerCasePartitionSpec)).map { part =>
+    val res = client.getPartitions(db, table, partialSpec.map(lowerCasePartitionSpec)).map { part =>
       part.copy(spec = restorePartitionSpec(part.spec, partColNameMap))
     }
+
+    partialSpec match {
+      // This might be a bug of Hive: When the partition value inside the partial partition spec
+      // contains dot, and we ask Hive to list partitions w.r.t. the partial partition spec, Hive
+      // treats dot as matching any single character and may return more partitions than we
+      // expected. Here we do an extra filter to drop unexpected partitions.
+      case Some(spec) if spec.exists(_._2.contains(".")) =>
+        res.filter(p => isPartialPartitionSpec(spec, p.spec))
+      case _ => res
+    }
   }
 
   override def listPartitionsByFilter(

From 5a0a76f1648729dfa7ed0522dd2cb41ba805a2cd Mon Sep 17 00:00:00 2001
From: jinxing <jinxing6042@126.com>
Date: Wed, 19 Jul 2017 21:35:26 +0800
Subject: [PATCH 1098/1204] [SPARK-21414] Refine SlidingWindowFunctionFrame to
 avoid OOM.

## What changes were proposed in this pull request?

In `SlidingWindowFunctionFrame`, it is now adding all rows to the buffer for which the input row value is equal to or less than the output row upper bound, then drop all rows from the buffer for which the input row value is smaller than the output row lower bound.
This could result in the buffer is very big though the window is small.
For example:
```
select a, b, sum(a)
over (partition by b order by a range between 1000000 following and 1000001 following)
from table
```
We can refine the logic and just add the qualified rows into buffer.

## How was this patch tested?
Manual test:
Run sql
`select shop, shopInfo, district, sum(revenue) over(partition by district order by revenue range between 100 following and 200 following) from revenueList limit 10`
against a table with 4  columns(shop: String, shopInfo: String, district: String, revenue: Int). The biggest partition is around 2G bytes, containing 200k lines.
Configure the executor with 2G bytes memory.
With the change in this pr, it works find. Without this change, below exception will be thrown.
```
MemoryError: Java heap space
	at org.apache.spark.sql.catalyst.expressions.UnsafeRow.copy(UnsafeRow.java:504)
	at org.apache.spark.sql.catalyst.expressions.UnsafeRow.copy(UnsafeRow.java:62)
	at org.apache.spark.sql.execution.window.SlidingWindowFunctionFrame.write(WindowFunctionFrame.scala:201)
	at org.apache.spark.sql.execution.window.WindowExec$$anonfun$14$$anon$1.next(WindowExec.scala:365)
	at org.apache.spark.sql.execution.window.WindowExec$$anonfun$14$$anon$1.next(WindowExec.scala:289)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source)
	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
	at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:395)
	at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:231)
	at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:225)
	at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827)
	at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$25.apply(RDD.scala:827)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
	at org.apache.spark.scheduler.Task.run(Task.scala:108)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:341)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
```

Author: jinxing <jinxing6042@126.com>

Closes #18634 from jinxing64/SPARK-21414.

(cherry picked from commit 4eb081cc870a9d1c42aae90418535f7d782553e9)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../window/WindowFunctionFrame.scala          | 22 +++++-----
 .../execution/SQLWindowFunctionSuite.scala    | 40 +++++++++++++++++++
 2 files changed, 53 insertions(+), 9 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
index af2b4fb92062b..156002ef58fbe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
@@ -195,15 +195,6 @@ private[window] final class SlidingWindowFunctionFrame(
   override def write(index: Int, current: InternalRow): Unit = {
     var bufferUpdated = index == 0
 
-    // Add all rows to the buffer for which the input row value is equal to or less than
-    // the output row upper bound.
-    while (nextRow != null && ubound.compare(nextRow, inputHighIndex, current, index) <= 0) {
-      buffer.add(nextRow.copy())
-      nextRow = WindowFunctionFrame.getNextOrNull(inputIterator)
-      inputHighIndex += 1
-      bufferUpdated = true
-    }
-
     // Drop all rows from the buffer for which the input row value is smaller than
     // the output row lower bound.
     while (!buffer.isEmpty && lbound.compare(buffer.peek(), inputLowIndex, current, index) < 0) {
@@ -212,6 +203,19 @@ private[window] final class SlidingWindowFunctionFrame(
       bufferUpdated = true
     }
 
+    // Add all rows to the buffer for which the input row value is equal to or less than
+    // the output row upper bound.
+    while (nextRow != null && ubound.compare(nextRow, inputHighIndex, current, index) <= 0) {
+      if (lbound.compare(nextRow, inputLowIndex, current, index) < 0) {
+        inputLowIndex += 1
+      } else {
+        buffer.add(nextRow.copy())
+        bufferUpdated = true
+      }
+      nextRow = WindowFunctionFrame.getNextOrNull(inputIterator)
+      inputHighIndex += 1
+    }
+
     // Only recalculate and update when the buffer changes.
     if (bufferUpdated) {
       processor.initialize(input.length)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLWindowFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLWindowFunctionSuite.scala
index 52e4f047225de..a9f3fb355c775 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLWindowFunctionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLWindowFunctionSuite.scala
@@ -356,6 +356,46 @@ class SQLWindowFunctionSuite extends QueryTest with SharedSQLContext {
     spark.catalog.dropTempView("nums")
   }
 
+  test("window function: mutiple window expressions specified by range in a single expression") {
+    val nums = sparkContext.parallelize(1 to 10).map(x => (x, x % 2)).toDF("x", "y")
+    nums.createOrReplaceTempView("nums")
+    withTempView("nums") {
+      val expected =
+        Row(1, 1, 1, 4, null, 8, 25) ::
+          Row(1, 3, 4, 9, 1, 12, 24) ::
+          Row(1, 5, 9, 15, 4, 16, 21) ::
+          Row(1, 7, 16, 21, 8, 9, 16) ::
+          Row(1, 9, 25, 16, 12, null, 9) ::
+          Row(0, 2, 2, 6, null, 10, 30) ::
+          Row(0, 4, 6, 12, 2, 14, 28) ::
+          Row(0, 6, 12, 18, 6, 18, 24) ::
+          Row(0, 8, 20, 24, 10, 10, 18) ::
+          Row(0, 10, 30, 18, 14, null, 10) ::
+          Nil
+
+      val actual = sql(
+        """
+          |SELECT
+          |  y,
+          |  x,
+          |  sum(x) over w1 as history_sum,
+          |  sum(x) over w2 as period_sum1,
+          |  sum(x) over w3 as period_sum2,
+          |  sum(x) over w4 as period_sum3,
+          |  sum(x) over w5 as future_sum
+          |FROM nums
+          |WINDOW
+          |  w1 AS (PARTITION BY y ORDER BY x RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW),
+          |  w2 AS (PARTITION BY y ORDER BY x RANGE BETWEEN 2 PRECEDING AND 2 FOLLOWING),
+          |  w3 AS (PARTITION BY y ORDER BY x RANGE BETWEEN 4 PRECEDING AND 2 PRECEDING ),
+          |  w4 AS (PARTITION BY y ORDER BY x RANGE BETWEEN 2 FOLLOWING AND 4 FOLLOWING),
+          |  w5 AS (PARTITION BY y ORDER BY x RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+        """.stripMargin
+      )
+      checkAnswer(actual, expected)
+    }
+  }
+
   test("SPARK-7595: Window will cause resolve failed with self join") {
     checkAnswer(sql(
       """

From 4c212eed1a4a75756216b13aab211d945e14d89b Mon Sep 17 00:00:00 2001
From: donnyzone <wellfengzhu@gmail.com>
Date: Wed, 19 Jul 2017 21:48:54 +0800
Subject: [PATCH 1099/1204] [SPARK-21441][SQL] Incorrect Codegen in
 SortMergeJoinExec results failures in some cases

## What changes were proposed in this pull request?

https://issues.apache.org/jira/projects/SPARK/issues/SPARK-21441

This issue can be reproduced by the following example:

```
val spark = SparkSession
   .builder()
   .appName("smj-codegen")
   .master("local")
   .config("spark.sql.autoBroadcastJoinThreshold", "1")
   .getOrCreate()
val df1 = spark.createDataFrame(Seq((1, 1), (2, 2), (3, 3))).toDF("key", "int")
val df2 = spark.createDataFrame(Seq((1, "1"), (2, "2"), (3, "3"))).toDF("key", "str")
val df = df1.join(df2, df1("key") === df2("key"))
   .filter("int = 2 or reflect('java.lang.Integer', 'valueOf', str) = 1")
   .select("int")
   df.show()
```

To conclude, the issue happens when:
(1) SortMergeJoin condition contains CodegenFallback expressions.
(2) In PhysicalPlan tree, SortMergeJoin node  is the child of root node, e.g., the Project in above example.

This patch fixes the logic in `CollapseCodegenStages` rule.

## How was this patch tested?
Unit test and manual verification in our cluster.

Author: donnyzone <wellfengzhu@gmail.com>

Closes #18656 from DonnyZone/Fix_SortMergeJoinExec.

(cherry picked from commit 6b6dd682e84d3b03d0b15fbd81a0d16729e521d2)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/execution/WholeStageCodegenExec.scala |  8 +++----
 .../execution/WholeStageCodegenSuite.scala    | 22 +++++++++++++++++++
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index c1e1a631c677e..974315db584da 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -489,13 +489,13 @@ case class CollapseCodegenStages(conf: SQLConf) extends Rule[SparkPlan] {
    * Inserts an InputAdapter on top of those that do not support codegen.
    */
   private def insertInputAdapter(plan: SparkPlan): SparkPlan = plan match {
-    case j @ SortMergeJoinExec(_, _, _, _, left, right) if j.supportCodegen =>
-      // The children of SortMergeJoin should do codegen separately.
-      j.copy(left = InputAdapter(insertWholeStageCodegen(left)),
-        right = InputAdapter(insertWholeStageCodegen(right)))
     case p if !supportCodegen(p) =>
       // collapse them recursively
       InputAdapter(insertWholeStageCodegen(p))
+    case j @ SortMergeJoinExec(_, _, _, _, left, right) =>
+      // The children of SortMergeJoin should do codegen separately.
+      j.copy(left = InputAdapter(insertWholeStageCodegen(left)),
+        right = InputAdapter(insertWholeStageCodegen(right)))
     case p =>
       p.withNewChildren(p.children.map(insertInputAdapter))
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
index a4b30a2f8cec1..183c68fd3c016 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
@@ -22,8 +22,10 @@ import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.expressions.{Add, Literal, Stack}
 import org.apache.spark.sql.execution.aggregate.HashAggregateExec
 import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec
+import org.apache.spark.sql.execution.joins.SortMergeJoinExec
 import org.apache.spark.sql.expressions.scalalang.typed
 import org.apache.spark.sql.functions.{avg, broadcast, col, max}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types.{IntegerType, StringType, StructType}
 
@@ -127,4 +129,24 @@ class WholeStageCodegenSuite extends SparkPlanTest with SharedSQLContext {
         "named_struct('a',id+2, 'b',id+2) as col2")
       .filter("col1 = col2").count()
   }
+
+  test("SPARK-21441 SortMergeJoin codegen with CodegenFallback expressions should be disabled") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "1") {
+      import testImplicits._
+
+      val df1 = Seq((1, 1), (2, 2), (3, 3)).toDF("key", "int")
+      val df2 = Seq((1, "1"), (2, "2"), (3, "3")).toDF("key", "str")
+
+      val df = df1.join(df2, df1("key") === df2("key"))
+        .filter("int = 2 or reflect('java.lang.Integer', 'valueOf', str) = 1")
+        .select("int")
+
+      val plan = df.queryExecution.executedPlan
+      assert(!plan.find(p =>
+        p.isInstanceOf[WholeStageCodegenExec] &&
+          p.asInstanceOf[WholeStageCodegenExec].child.children(0)
+            .isInstanceOf[SortMergeJoinExec]).isDefined)
+      assert(df.collect() === Array(Row(1), Row(2)))
+    }
+  }
 }

From ac2069341d0c82428d1b4e8f5d696ac83a487896 Mon Sep 17 00:00:00 2001
From: donnyzone <wellfengzhu@gmail.com>
Date: Wed, 19 Jul 2017 21:48:54 +0800
Subject: [PATCH 1100/1204] [SPARK-21441][SQL] Incorrect Codegen in
 SortMergeJoinExec results failures in some cases

## What changes were proposed in this pull request?

https://issues.apache.org/jira/projects/SPARK/issues/SPARK-21441

This issue can be reproduced by the following example:

```
val spark = SparkSession
   .builder()
   .appName("smj-codegen")
   .master("local")
   .config("spark.sql.autoBroadcastJoinThreshold", "1")
   .getOrCreate()
val df1 = spark.createDataFrame(Seq((1, 1), (2, 2), (3, 3))).toDF("key", "int")
val df2 = spark.createDataFrame(Seq((1, "1"), (2, "2"), (3, "3"))).toDF("key", "str")
val df = df1.join(df2, df1("key") === df2("key"))
   .filter("int = 2 or reflect('java.lang.Integer', 'valueOf', str) = 1")
   .select("int")
   df.show()
```

To conclude, the issue happens when:
(1) SortMergeJoin condition contains CodegenFallback expressions.
(2) In PhysicalPlan tree, SortMergeJoin node  is the child of root node, e.g., the Project in above example.

This patch fixes the logic in `CollapseCodegenStages` rule.

## How was this patch tested?
Unit test and manual verification in our cluster.

Author: donnyzone <wellfengzhu@gmail.com>

Closes #18656 from DonnyZone/Fix_SortMergeJoinExec.

(cherry picked from commit 6b6dd682e84d3b03d0b15fbd81a0d16729e521d2)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/execution/WholeStageCodegenExec.scala |  8 +++----
 .../execution/WholeStageCodegenSuite.scala    | 22 +++++++++++++++++++
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index 2ead8f6baae6b..328df7757fbf2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -470,13 +470,13 @@ case class CollapseCodegenStages(conf: SQLConf) extends Rule[SparkPlan] {
    * Inserts an InputAdapter on top of those that do not support codegen.
    */
   private def insertInputAdapter(plan: SparkPlan): SparkPlan = plan match {
-    case j @ SortMergeJoinExec(_, _, _, _, left, right) if j.supportCodegen =>
-      // The children of SortMergeJoin should do codegen separately.
-      j.copy(left = InputAdapter(insertWholeStageCodegen(left)),
-        right = InputAdapter(insertWholeStageCodegen(right)))
     case p if !supportCodegen(p) =>
       // collapse them recursively
       InputAdapter(insertWholeStageCodegen(p))
+    case j @ SortMergeJoinExec(_, _, _, _, left, right) =>
+      // The children of SortMergeJoin should do codegen separately.
+      j.copy(left = InputAdapter(insertWholeStageCodegen(left)),
+        right = InputAdapter(insertWholeStageCodegen(right)))
     case p =>
       p.withNewChildren(p.children.map(insertInputAdapter))
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
index 9f6ef032d5f4a..4b7380972f739 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
@@ -20,8 +20,10 @@ package org.apache.spark.sql.execution
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.execution.aggregate.HashAggregateExec
 import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec
+import org.apache.spark.sql.execution.joins.SortMergeJoinExec
 import org.apache.spark.sql.expressions.scalalang.typed
 import org.apache.spark.sql.functions.{avg, broadcast, col, max}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types.{IntegerType, StringType, StructType}
 
@@ -125,4 +127,24 @@ class WholeStageCodegenSuite extends SparkPlanTest with SharedSQLContext {
         "named_struct('a',id+2, 'b',id+2) as col2")
       .filter("col1 = col2").count()
   }
+
+  test("SPARK-21441 SortMergeJoin codegen with CodegenFallback expressions should be disabled") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "1") {
+      import testImplicits._
+
+      val df1 = Seq((1, 1), (2, 2), (3, 3)).toDF("key", "int")
+      val df2 = Seq((1, "1"), (2, "2"), (3, "3")).toDF("key", "str")
+
+      val df = df1.join(df2, df1("key") === df2("key"))
+        .filter("int = 2 or reflect('java.lang.Integer', 'valueOf', str) = 1")
+        .select("int")
+
+      val plan = df.queryExecution.executedPlan
+      assert(!plan.find(p =>
+        p.isInstanceOf[WholeStageCodegenExec] &&
+          p.asInstanceOf[WholeStageCodegenExec].child.children(0)
+            .isInstanceOf[SortMergeJoinExec]).isDefined)
+      assert(df.collect() === Array(Row(1), Row(2)))
+    }
+  }
 }

From 9c6183397d2d5e51ff2a5504beb2dc3832843fad Mon Sep 17 00:00:00 2001
From: Mark Hamstra <markhamstra@gmail.com>
Date: Wed, 19 Jul 2017 10:41:03 -0700
Subject: [PATCH 1101/1204] wip

---
 .../apache/spark/sql/internal/SQLConf.scala   |  9 ++
 .../execution/datasources/DataSource.scala    |  4 +-
 .../datasources/InMemoryFileIndex.scala       |  7 ++
 .../exchange/EnsureRequirements.scala         | 22 ++++-
 .../execution/exchange/ShuffleExchange.scala  |  2 +-
 .../execution/ExchangeCoordinatorSuite.scala  | 95 ++++++++++++++++++-
 .../spark/sql/hive/HiveMetastoreCatalog.scala | 45 +++------
 .../sql/hive/HiveMetastoreCatalogSuite.scala  | 16 ++--
 8 files changed, 153 insertions(+), 47 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 5926bb060d7af..289b44d478130 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -150,6 +150,12 @@ object SQLConf {
     .booleanConf
     .createWithDefault(false)
 
+  val ADAPTIVE_EXECUTION_DISABLED_FOR_JOINING =
+    SQLConfigBuilder("spark.sql.adaptive.disabled.for.join")
+      .doc("When true, disable adaptive query execution when performing joining.")
+      .booleanConf
+      .createWithDefault(false)
+
   val SHUFFLE_MIN_NUM_POSTSHUFFLE_PARTITIONS =
     SQLConfigBuilder("spark.sql.adaptive.minNumPostShufflePartitions")
       .internal()
@@ -722,6 +728,9 @@ class SQLConf extends Serializable with Logging {
 
   def adaptiveExecutionEnabled: Boolean = getConf(ADAPTIVE_EXECUTION_ENABLED)
 
+  def adaptiveExecutionDisabledForJoining: Boolean =
+    getConf(ADAPTIVE_EXECUTION_DISABLED_FOR_JOINING)
+
   def minNumPostShufflePartitions: Int =
     getConf(SHUFFLE_MIN_NUM_POSTSHUFFLE_PARTITIONS)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index af70bf7e5c645..3137ec41de7e7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -130,7 +130,7 @@ case class DataSource(
         val qualified = hdfsPath.makeQualified(fs.getUri, fs.getWorkingDirectory)
         SparkHadoopUtil.get.globPathIfNecessary(qualified)
       }.toArray
-      new InMemoryFileIndex(sparkSession, globbedPaths, options, None)
+      new InMemoryFileIndex(sparkSession, globbedPaths, options, None) // MEH
     }
     val partitionSchema = if (partitionColumns.isEmpty) {
       // Try to infer partitioning, because no DataSource in the read path provides the partitioning
@@ -394,7 +394,7 @@ case class DataSource(
             catalogTable.get,
             catalogTable.get.stats.map(_.sizeInBytes.toLong).getOrElse(defaultTableSize))
         } else {
-          new InMemoryFileIndex(sparkSession, globbedPaths, options, Some(partitionSchema))
+          new InMemoryFileIndex(sparkSession, globbedPaths, options, Some(partitionSchema)) // MEH
         }
 
         HadoopFsRelation(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
index ee4d0863d9771..7270bd9a63fdb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
@@ -47,6 +47,13 @@ class InMemoryFileIndex(
   @volatile private var cachedLeafDirToChildrenFiles: Map[Path, Array[FileStatus]] = _
   @volatile private var cachedPartitionSpec: PartitionSpec = _
 
+  // scalastyle:off
+  import org.apache.spark.util.Utils.exceptionString
+  println("\n\n\n*** MEH")
+  println(exceptionString(new RuntimeException("initializing InMemoryFileIndex")))
+  println("\n\n\n")
+  // scalastyle:on
+
   refresh0()
 
   override def partitionSpec(): PartitionSpec = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
index f17049949aa47..3a8a352008a3b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
@@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution._
+import org.apache.spark.sql.execution.joins.SortMergeJoinExec
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -37,6 +38,8 @@ case class EnsureRequirements(conf: SQLConf) extends Rule[SparkPlan] {
 
   private def adaptiveExecutionEnabled: Boolean = conf.adaptiveExecutionEnabled
 
+  private def adaptiveExecutionDisabledForJoin: Boolean = conf.adaptiveExecutionDisabledForJoining
+
   private def minNumPostShufflePartitions: Option[Int] = {
     val minNumPostShufflePartitions = conf.minNumPostShufflePartitions
     if (minNumPostShufflePartitions > 0) Some(minNumPostShufflePartitions) else None
@@ -62,7 +65,8 @@ case class EnsureRequirements(conf: SQLConf) extends Rule[SparkPlan] {
    */
   private def withExchangeCoordinator(
       children: Seq[SparkPlan],
-      requiredChildDistributions: Seq[Distribution]): Seq[SparkPlan] = {
+      requiredChildDistributions: Seq[Distribution],
+      disableAdaptiveExecution: Boolean = false): Seq[SparkPlan] = {
     val supportsCoordinator =
       if (children.exists(_.isInstanceOf[ShuffleExchange])) {
         // Right now, ExchangeCoordinator only support HashPartitionings.
@@ -87,7 +91,7 @@ case class EnsureRequirements(conf: SQLConf) extends Rule[SparkPlan] {
       }
 
     val withCoordinator =
-      if (adaptiveExecutionEnabled && supportsCoordinator) {
+      if (adaptiveExecutionEnabled && !disableAdaptiveExecution && supportsCoordinator) {
         val coordinator =
           new ExchangeCoordinator(
             children.length,
@@ -230,7 +234,19 @@ case class EnsureRequirements(conf: SQLConf) extends Rule[SparkPlan] {
     // at here for now.
     // Once we finish https://issues.apache.org/jira/browse/SPARK-10665,
     // we can first add Exchanges and then add coordinator once we have a DAG of query fragments.
-    children = withExchangeCoordinator(children, requiredChildDistributions)
+
+    // We have observed some performance issue when enabling adaptive execution in performing
+    // joining. It is hard to predict how many results the joining will produce. When the estimation
+    // from previous stage is within the postShuffleSize estimation, it will produce only one
+    // partition and makes the performance bad. We will disable adaptive execution for
+    // joining now till we figure out a better way of size estimation in joining.
+    val disableAdaptiveExecutionForJoin =
+      operator.isInstanceOf[SortMergeJoinExec] &&
+      adaptiveExecutionEnabled &&
+      adaptiveExecutionDisabledForJoin
+
+    children =
+      withExchangeCoordinator(children, requiredChildDistributions, disableAdaptiveExecutionForJoin)
 
     // Now that we've performed any necessary shuffles, add sorts to guarantee output orderings:
     children = children.zip(requiredChildOrderings).map { case (child, requiredOrdering) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchange.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchange.scala
index 125a4930c6528..f06544ea8ed04 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchange.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchange.scala
@@ -46,7 +46,7 @@ case class ShuffleExchange(
   override def nodeName: String = {
     val extraInfo = coordinator match {
       case Some(exchangeCoordinator) =>
-        s"(coordinator id: ${System.identityHashCode(coordinator)})"
+        s"(coordinator id: ${System.identityHashCode(exchangeCoordinator)})"
       case None => ""
     }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExchangeCoordinatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExchangeCoordinatorSuite.scala
index 06bce9a2400e7..05d0b366fa721 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExchangeCoordinatorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExchangeCoordinatorSuite.scala
@@ -17,11 +17,14 @@
 
 package org.apache.spark.sql.execution
 
+import scala.collection.mutable
+
 import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.{MapOutputStatistics, SparkConf, SparkFunSuite}
 import org.apache.spark.sql._
 import org.apache.spark.sql.execution.exchange.{ExchangeCoordinator, ShuffleExchange}
+import org.apache.spark.sql.execution.joins.SortMergeJoinExec
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 
@@ -252,7 +255,8 @@ class ExchangeCoordinatorSuite extends SparkFunSuite with BeforeAndAfterAll {
   def withSparkSession(
       f: SparkSession => Unit,
       targetNumPostShufflePartitions: Int,
-      minNumPostShufflePartitions: Option[Int]): Unit = {
+      minNumPostShufflePartitions: Option[Int],
+      adaptiveExecutionDisabledForJoin: Boolean = false): Unit = {
     val sparkConf =
       new SparkConf(false)
         .setMaster("local[*]")
@@ -265,6 +269,9 @@ class ExchangeCoordinatorSuite extends SparkFunSuite with BeforeAndAfterAll {
         .set(
           SQLConf.SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE.key,
           targetNumPostShufflePartitions.toString)
+    if (adaptiveExecutionDisabledForJoin) {
+      sparkConf.set(SQLConf.ADAPTIVE_EXECUTION_DISABLED_FOR_JOINING.key, "true")
+    }
     minNumPostShufflePartitions match {
       case Some(numPartitions) =>
         sparkConf.set(SQLConf.SHUFFLE_MIN_NUM_POSTSHUFFLE_PARTITIONS.key, numPartitions.toString)
@@ -458,6 +465,9 @@ class ExchangeCoordinatorSuite extends SparkFunSuite with BeforeAndAfterAll {
 
         // Then, let's look at the number of post-shuffle partitions estimated
         // by the ExchangeCoordinator.
+        // scalastyle:off
+        println(join.queryExecution.executedPlan)
+        // scalastyle:on
         val exchanges = join.queryExecution.executedPlan.collect {
           case e: ShuffleExchange => e
         }
@@ -473,11 +483,90 @@ class ExchangeCoordinatorSuite extends SparkFunSuite with BeforeAndAfterAll {
 
           case None =>
             assert(exchanges.forall(_.coordinator.isDefined))
-            assert(exchanges.map(_.outputPartitioning.numPartitions).toSet === Set(5, 3))
+            assert(exchanges.map(_.outputPartitioning.numPartitions).toSeq.toSet === Set(5, 3))
         }
       }
 
-      withSparkSession(test, 6144, minNumPostShufflePartitions)
+      withSparkSession(
+        test,
+        6144,
+        minNumPostShufflePartitions,
+        adaptiveExecutionDisabledForJoin = false
+      )
     }
+    /*
+    test(s"adaptive execution disabled for joining: complex query 3$testNameNote") {
+      val test = { spark: SparkSession =>
+        val df1 =
+          spark
+            .range(0, 1000, 1, numInputPartitions)
+            .selectExpr("id % 500 as key1", "id as value1")
+            .groupBy("key1")
+            .count()
+            .toDF("key1", "cnt1")
+        val df2 =
+          spark
+            .range(0, 1000, 1, numInputPartitions)
+            .selectExpr("id % 500 as key2", "id as value2")
+
+        val join =
+          df1
+            .join(df2, col("key1") === col("key2"))
+            .select(col("key1"), col("cnt1"), col("value2"))
+
+        // Check the answer first.
+        val expectedAnswer =
+          spark
+            .range(0, 1000)
+            .selectExpr("id % 500 as key", "2 as cnt", "id as value")
+        checkAnswer(
+          join,
+          expectedAnswer.collect())
+
+        // Verify that adaptive execution is disabled for SortMergeJoin but not for others
+        val executedPlan = join.queryExecution.executedPlan
+        val verified = new mutable.HashSet[SparkPlan]
+        val waitingToVerify = new mutable.Stack[(SparkPlan, Boolean)]
+        def verify(plan: SparkPlan, childOfJoin: Boolean): Unit = {
+          if (!verified(plan)) {
+            verified += plan
+            plan match {
+              case SortMergeJoinExec(_, _, _, _, left, right) =>
+                waitingToVerify.push((left, true))
+                waitingToVerify.push((right, true))
+              case ShuffleExchange(newPartitioning, child, coordinator) =>
+                if (childOfJoin) {
+                  assert(coordinator.isEmpty)
+                  waitingToVerify.push((child, false))
+                } else {
+                  minNumPostShufflePartitions match {
+                    case Some(_) =>
+                      assert(coordinator.isDefined)
+                      assert(newPartitioning.numPartitions === 5)
+                    case None =>
+                      assert(coordinator.isDefined)
+                  }
+                  waitingToVerify.push((child, false))
+                }
+              case _ =>
+                plan.children.foreach { child =>
+                  waitingToVerify.push((child, childOfJoin))
+                }
+            }
+          }
+        }
+        waitingToVerify.push((executedPlan, false))
+        while(waitingToVerify.nonEmpty) {
+          val (plan, childOfJoin) = waitingToVerify.pop()
+          verify(plan, childOfJoin)
+        }
+      }
+
+      withSparkSession(
+        test,
+        6144,
+        minNumPostShufflePartitions,
+        adaptiveExecutionDisabledForJoin = true)
+    } */
   }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 8f3807ec9d45b..208aa85fe24d3 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -275,16 +275,12 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
       })
     } else {
       val rootPath = metastoreRelation.hiveQlTable.getDataLocation
-//      val rootPath = Option(metastoreRelation.hiveQlTable.getDataLocation.toString)
-//      assert(!rootPath.get.startsWith("file:"))
-      val paths: Seq[Path] = if (fileType != "parquet") { Seq(rootPath) } else {
-        selectParquetLocationDirectories(metastoreRelation.tableName, Option(rootPath.toString))
-          .map{ s => new Path(s) }
-      }
-      // scalastyle:off
-      println("\n\n\n***MEH")
-      paths.foreach { p => println(p.toString) }
-      println("\n\n\n")
+      val paths: Seq[Path] =
+        if (fileType != "parquet") {
+          Seq(rootPath)
+        } else {
+          selectParquetLocationDirectories(metastoreRelation.tableName, Option(rootPath))
+        }
       withTableCreationLock(tableIdentifier, {
         val cached = getCached(tableIdentifier,
           paths,
@@ -299,7 +295,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
             LogicalRelation(
               DataSource(
                 sparkSession = sparkSession,
-                paths = rootPath.toString :: Nil,
+                paths = paths.map(_.toString),
                 userSpecifiedSchema = Option(dataSchema),
                 bucketSpec = bucketSpec,
                 options = options,
@@ -316,35 +312,24 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
     result.copy(expectedOutputAttributes = Some(metastoreRelation.output))
   }
 
-  /**
-   * Customizing the data directory selection by using hadoopFileSelector.
-   *
-   * The value of locationOpt will be returned as single element sequence if
-   * 1. the hadoopFileSelector is not defined or
-   * 2. locationOpt is not defined or
-   * 3. the selected directories are empty.
-   *
-   * Otherwise, the non-empty selected directories will be returned.
-   */
   private[hive] def selectParquetLocationDirectories(
       tableName: String,
-      locationOpt: Option[String]): Seq[String] = {
-
-    val inputPaths: Option[Seq[String]] = for {
+      locationOpt: Option[Path]): Seq[Path] = {
+    val hadoopConf = sparkSession.sparkContext.hadoopConfiguration
+    val paths: Option[Seq[Path]] = for {
       selector <- sparkSession.sharedState.externalCatalog.findHadoopFileSelector
-      l <- locationOpt
-      location = new Path(l)
-      fs = location.getFileSystem(sparkSession.sparkContext.hadoopConfiguration)
+      location <- locationOpt
+      fs = location.getFileSystem(hadoopConf)
       selectedPaths <- selector.selectFiles(tableName, fs, location)
       selectedDir = for {
         selectedPath <- selectedPaths
         if selectedPath
-          .getFileSystem(sparkSession.sparkContext.hadoopConfiguration)
+          .getFileSystem(hadoopConf)
           .isDirectory(selectedPath)
-      } yield selectedPath.toString
+      } yield selectedPath
       if selectedDir.nonEmpty
     } yield selectedDir
-    inputPaths.getOrElse(Seq(locationOpt.orNull))
+    paths.getOrElse(Seq(locationOpt.orNull))
   }
 
   private def inferIfNeeded(
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
index a22244fcc01d6..3f93891c65c3f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
@@ -222,16 +222,16 @@ class ParquetLocationSelectionSuite extends QueryTest with SQLTestUtils with Tes
     val somewhereSometable = new File(fullpath("somewhere", "sometable"))
     somewhereSometable.mkdirs()
     // somewhere/sometable is a directory => will be selected
-    assertResult(Seq(fullpath("somewhere", "sometable"))) {
-      hmc.selectParquetLocationDirectories("sometable", Option("somewhere"))
+    assertResult(Seq(new Path(fullpath("somewhere", "sometable")))) {
+      hmc.selectParquetLocationDirectories("sometable", Option(new Path("somewhere")))
     }
 
     // ensure file existence for somewhere/sometable
     somewhereSometable.delete()
     somewhereSometable.createNewFile()
     // somewhere/sometable is a file => will not be selected
-    assertResult(Seq("somewhere")) {
-      hmc.selectParquetLocationDirectories("otherplace", Option("somewhere"))
+    assertResult(Seq(new Path("somewhere"))) {
+      hmc.selectParquetLocationDirectories("otherplace", Option(new Path("somewhere")))
     }
 
     // no location specified, none selected
@@ -249,8 +249,8 @@ class ParquetLocationSelectionSuite extends QueryTest with SQLTestUtils with Tes
     })
 
     // none selected
-    assertResult(Seq("somewhere")) {
-      hmc.selectParquetLocationDirectories("sometable", Option("somewhere"))
+    assertResult(Seq(new Path("somewhere"))) {
+      hmc.selectParquetLocationDirectories("sometable", Option(new Path("somewhere")))
     }
     // none selected
     assertResult(Seq(null)) {
@@ -262,8 +262,8 @@ class ParquetLocationSelectionSuite extends QueryTest with SQLTestUtils with Tes
     spark.sharedState.externalCatalog.unsetHadoopFileSelector()
 
     // none selected
-    assertResult(Seq("somewhere")) {
-      hmc.selectParquetLocationDirectories("sometable", Option("somewhere"))
+    assertResult(Seq(new Path("somewhere"))) {
+      hmc.selectParquetLocationDirectories("sometable", Option(new Path("somewhere")))
     }
     // none selected
     assertResult(Seq(null)) {

From 86cd3c08871618441c0c297da0f48ac284595697 Mon Sep 17 00:00:00 2001
From: Tathagata Das <tathagata.das1565@gmail.com>
Date: Wed, 19 Jul 2017 11:02:07 -0700
Subject: [PATCH 1102/1204] [SPARK-21464][SS] Minimize deprecation warnings
 caused by ProcessingTime class

## What changes were proposed in this pull request?

Use of `ProcessingTime` class was deprecated in favor of `Trigger.ProcessingTime` in Spark 2.2. However interval uses to ProcessingTime causes deprecation warnings during compilation. This cannot be avoided entirely as even though it is deprecated as a public API, ProcessingTime instances are used internally in TriggerExecutor. This PR is to minimize the warning by removing its uses from tests as much as possible.

## How was this patch tested?
Existing tests.

Author: Tathagata Das <tathagata.das1565@gmail.com>

Closes #18678 from tdas/SPARK-21464.

(cherry picked from commit 70fe99dc62ef636a99bcb8a580ad4de4dca95181)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../spark/sql/ProcessingTimeSuite.scala       | 24 ++++++++++---------
 .../FlatMapGroupsWithStateSuite.scala         |  6 ++---
 .../streaming/StreamingAggregationSuite.scala |  8 +++----
 3 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ProcessingTimeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ProcessingTimeSuite.scala
index 52c200796ce41..623a1b6f854cf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ProcessingTimeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ProcessingTimeSuite.scala
@@ -22,20 +22,22 @@ import java.util.concurrent.TimeUnit
 import scala.concurrent.duration._
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.streaming.ProcessingTime
+import org.apache.spark.sql.streaming.{ProcessingTime, Trigger}
 
 class ProcessingTimeSuite extends SparkFunSuite {
 
   test("create") {
-    assert(ProcessingTime(10.seconds).intervalMs === 10 * 1000)
-    assert(ProcessingTime.create(10, TimeUnit.SECONDS).intervalMs === 10 * 1000)
-    assert(ProcessingTime("1 minute").intervalMs === 60 * 1000)
-    assert(ProcessingTime("interval 1 minute").intervalMs === 60 * 1000)
-
-    intercept[IllegalArgumentException] { ProcessingTime(null: String) }
-    intercept[IllegalArgumentException] { ProcessingTime("") }
-    intercept[IllegalArgumentException] { ProcessingTime("invalid") }
-    intercept[IllegalArgumentException] { ProcessingTime("1 month") }
-    intercept[IllegalArgumentException] { ProcessingTime("1 year") }
+    def getIntervalMs(trigger: Trigger): Long = trigger.asInstanceOf[ProcessingTime].intervalMs
+
+    assert(getIntervalMs(Trigger.ProcessingTime(10.seconds)) === 10 * 1000)
+    assert(getIntervalMs(Trigger.ProcessingTime(10, TimeUnit.SECONDS)) === 10 * 1000)
+    assert(getIntervalMs(Trigger.ProcessingTime("1 minute")) === 60 * 1000)
+    assert(getIntervalMs(Trigger.ProcessingTime("interval 1 minute")) === 60 * 1000)
+
+    intercept[IllegalArgumentException] { Trigger.ProcessingTime(null: String) }
+    intercept[IllegalArgumentException] { Trigger.ProcessingTime("") }
+    intercept[IllegalArgumentException] { Trigger.ProcessingTime("invalid") }
+    intercept[IllegalArgumentException] { Trigger.ProcessingTime("1 month") }
+    intercept[IllegalArgumentException] { Trigger.ProcessingTime("1 year") }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
index 6bb9408ce99ed..d7370642d08b7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
@@ -680,7 +680,7 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest with BeforeAndAf
         .flatMapGroupsWithState(Update, ProcessingTimeTimeout)(stateFunc)
 
     testStream(result, Update)(
-      StartStream(ProcessingTime("1 second"), triggerClock = clock),
+      StartStream(Trigger.ProcessingTime("1 second"), triggerClock = clock),
       AddData(inputData, "a"),
       AdvanceManualClock(1 * 1000),
       CheckLastBatch(("a", "1")),
@@ -745,7 +745,7 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest with BeforeAndAf
         .flatMapGroupsWithState(Update, EventTimeTimeout)(stateFunc)
 
     testStream(result, Update)(
-      StartStream(ProcessingTime("1 second")),
+      StartStream(Trigger.ProcessingTime("1 second")),
       AddData(inputData, ("a", 11), ("a", 13), ("a", 15)), // Set timeout timestamp of ...
       CheckLastBatch(("a", 15)),                           // "a" to 15 + 5 = 20s, watermark to 5s
       AddData(inputData, ("a", 4)),       // Add data older than watermark for "a"
@@ -917,7 +917,7 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest with BeforeAndAf
           .flatMapGroupsWithState(Update, ProcessingTimeTimeout)(stateFunc)
 
       testStream(result, Update)(
-        StartStream(ProcessingTime("1 second"), triggerClock = clock),
+        StartStream(Trigger.ProcessingTime("1 second"), triggerClock = clock),
         AddData(inputData, ("a", 1L)),
         AdvanceManualClock(1 * 1000),
         CheckLastBatch(("a", "1"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
index 4345a70601c34..b6e82b621c8cb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
@@ -267,7 +267,7 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with BeforeAndAfte
         .where('value >= current_timestamp().cast("long") - 10L)
 
     testStream(aggregated, Complete)(
-      StartStream(ProcessingTime("10 seconds"), triggerClock = clock),
+      StartStream(Trigger.ProcessingTime("10 seconds"), triggerClock = clock),
 
       // advance clock to 10 seconds, all keys retained
       AddData(inputData, 0L, 5L, 5L, 10L),
@@ -294,7 +294,7 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with BeforeAndAfte
         clock.advance(60 * 1000L)
         true
       },
-      StartStream(ProcessingTime("10 seconds"), triggerClock = clock),
+      StartStream(Trigger.ProcessingTime("10 seconds"), triggerClock = clock),
       // The commit log blown, causing the last batch to re-run
       CheckLastBatch((20L, 1), (85L, 1)),
       AssertOnQuery { q =>
@@ -322,7 +322,7 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with BeforeAndAfte
         .where($"value".cast("date") >= date_sub(current_date(), 10))
         .select(($"value".cast("long") / DateTimeUtils.SECONDS_PER_DAY).cast("long"), $"count(1)")
     testStream(aggregated, Complete)(
-      StartStream(ProcessingTime("10 day"), triggerClock = clock),
+      StartStream(Trigger.ProcessingTime("10 day"), triggerClock = clock),
       // advance clock to 10 days, should retain all keys
       AddData(inputData, 0L, 5L, 5L, 10L),
       AdvanceManualClock(DateTimeUtils.MILLIS_PER_DAY * 10),
@@ -346,7 +346,7 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with BeforeAndAfte
         clock.advance(DateTimeUtils.MILLIS_PER_DAY * 60)
         true
       },
-      StartStream(ProcessingTime("10 day"), triggerClock = clock),
+      StartStream(Trigger.ProcessingTime("10 day"), triggerClock = clock),
       // Commit log blown, causing a re-run of the last batch
       CheckLastBatch((20L, 1), (85L, 1)),
 

From 308bce0eb60649b15836614567532460ea73bd12 Mon Sep 17 00:00:00 2001
From: DFFuture <albert.zhang23@gmail.com>
Date: Wed, 19 Jul 2017 14:45:11 -0700
Subject: [PATCH 1103/1204] [SPARK-21446][SQL] Fix setAutoCommit never executed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?
JIRA Issue: https://issues.apache.org/jira/browse/SPARK-21446
options.asConnectionProperties can not have fetchsize，because fetchsize belongs to Spark-only options, and Spark-only options have been excluded in connection properities.
So change properties of beforeFetch from  options.asConnectionProperties.asScala.toMap to options.asProperties.asScala.toMap

## How was this patch tested?

Author: DFFuture <albert.zhang23@gmail.com>

Closes #18665 from DFFuture/sparksql_pg.

(cherry picked from commit c9729187bcef78299390e53cd9af38c3e084060e)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
index 2bdc43254133e..7097069b92b78 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
@@ -286,7 +286,7 @@ private[jdbc] class JDBCRDD(
     conn = getConnection()
     val dialect = JdbcDialects.get(url)
     import scala.collection.JavaConverters._
-    dialect.beforeFetch(conn, options.asConnectionProperties.asScala.toMap)
+    dialect.beforeFetch(conn, options.asProperties.asScala.toMap)
 
     // H2's JDBC driver does not support the setSchema() method.  We pass a
     // fully-qualified table name in the SELECT statement.  I don't know how to

From 9949fed1c45865b6e5e8ebe610789c5fb9546052 Mon Sep 17 00:00:00 2001
From: Corey Woodfield <coreywoodfield@gmail.com>
Date: Wed, 19 Jul 2017 15:21:38 -0700
Subject: [PATCH 1104/1204] [SPARK-21333][DOCS] Removed invalid joinTypes from
 javadoc of Dataset#joinWith

## What changes were proposed in this pull request?

Two invalid join types were mistakenly listed in the javadoc for joinWith, in the Dataset class. I presume these were copied from the javadoc of join, but since joinWith returns a Dataset\<Tuple2\>, left_semi and left_anti are invalid, as they only return values from one of the datasets, instead of from both

## How was this patch tested?

I ran the following code :
```
public static void main(String[] args) {
	SparkSession spark = new SparkSession(new SparkContext("local[*]", "Test"));
	Dataset<Row> one = spark.createDataFrame(Arrays.asList(new Bean(1), new Bean(2), new Bean(3), new Bean(4), new Bean(5)), Bean.class);
	Dataset<Row> two = spark.createDataFrame(Arrays.asList(new Bean(4), new Bean(5), new Bean(6), new Bean(7), new Bean(8), new Bean(9)), Bean.class);

	try {two.joinWith(one, one.col("x").equalTo(two.col("x")), "inner").show();} catch (Exception e) {e.printStackTrace();}
	try {two.joinWith(one, one.col("x").equalTo(two.col("x")), "cross").show();} catch (Exception e) {e.printStackTrace();}
	try {two.joinWith(one, one.col("x").equalTo(two.col("x")), "outer").show();} catch (Exception e) {e.printStackTrace();}
	try {two.joinWith(one, one.col("x").equalTo(two.col("x")), "full").show();} catch (Exception e) {e.printStackTrace();}
	try {two.joinWith(one, one.col("x").equalTo(two.col("x")), "full_outer").show();} catch (Exception e) {e.printStackTrace();}
	try {two.joinWith(one, one.col("x").equalTo(two.col("x")), "left").show();} catch (Exception e) {e.printStackTrace();}
	try {two.joinWith(one, one.col("x").equalTo(two.col("x")), "left_outer").show();} catch (Exception e) {e.printStackTrace();}
	try {two.joinWith(one, one.col("x").equalTo(two.col("x")), "right").show();} catch (Exception e) {e.printStackTrace();}
	try {two.joinWith(one, one.col("x").equalTo(two.col("x")), "right_outer").show();} catch (Exception e) {e.printStackTrace();}
	try {two.joinWith(one, one.col("x").equalTo(two.col("x")), "left_semi").show();} catch (Exception e) {e.printStackTrace();}
	try {two.joinWith(one, one.col("x").equalTo(two.col("x")), "left_anti").show();} catch (Exception e) {e.printStackTrace();}
}
```
which tests all the different join types, and the last two (left_semi and left_anti) threw exceptions. The same code using join instead of joinWith did fine. The Bean class was just a java bean with a single int field, x.

Author: Corey Woodfield <coreywoodfield@gmail.com>

Closes #18462 from coreywoodfield/master.

(cherry picked from commit 8cd9cdf17a7a4ad6f2eecd7c4b388ca363c20982)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../scala/org/apache/spark/sql/Dataset.scala     |  6 +++++-
 .../org/apache/spark/sql/DatasetSuite.scala      | 16 ++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 65ce77f1cefee..7aa8cfa23c071 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -901,7 +901,7 @@ class Dataset[T] private[sql](
    * @param condition Join expression.
    * @param joinType Type of join to perform. Default `inner`. Must be one of:
    *                 `inner`, `cross`, `outer`, `full`, `full_outer`, `left`, `left_outer`,
-   *                 `right`, `right_outer`, `left_semi`, `left_anti`.
+   *                 `right`, `right_outer`.
    *
    * @group typedrel
    * @since 1.6.0
@@ -918,6 +918,10 @@ class Dataset[T] private[sql](
         JoinType(joinType),
         Some(condition.expr))).analyzed.asInstanceOf[Join]
 
+    if (joined.joinType == LeftSemi || joined.joinType == LeftAnti) {
+      throw new AnalysisException("Invalid join type in joinWith: " + joined.joinType.sql)
+    }
+
     // For both join side, combine all outputs into a single column and alias it with "_1" or "_2",
     // to match the schema for the encoder of the join result.
     // Note that we do this before joining them, to enable the join operator to return null for one
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 8eb381b91f46d..ae6fe51617755 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -21,6 +21,7 @@ import java.io.{Externalizable, ObjectInput, ObjectOutput}
 import java.sql.{Date, Timestamp}
 
 import org.apache.spark.sql.catalyst.encoders.{OuterScopes, RowEncoder}
+import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftSemi}
 import org.apache.spark.sql.catalyst.util.sideBySide
 import org.apache.spark.sql.execution.{LogicalRDD, RDDScanExec, SortExec}
 import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ShuffleExchange}
@@ -321,6 +322,21 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
       ((("b", 2), ("b", 2)), ("b", 2)))
   }
 
+  test("joinWith join types") {
+    val ds1 = Seq(1, 2, 3).toDS().as("a")
+    val ds2 = Seq(1, 2).toDS().as("b")
+
+    val e1 = intercept[AnalysisException] {
+      ds1.joinWith(ds2, $"a.value" === $"b.value", "left_semi")
+    }.getMessage
+    assert(e1.contains("Invalid join type in joinWith: " + LeftSemi.sql))
+
+    val e2 = intercept[AnalysisException] {
+      ds1.joinWith(ds2, $"a.value" === $"b.value", "left_anti")
+    }.getMessage
+    assert(e2.contains("Invalid join type in joinWith: " + LeftAnti.sql))
+  }
+
   test("groupBy function, keys") {
     val ds = Seq(("a", 1), ("b", 1)).toDS()
     val grouped = ds.groupByKey(v => (1, v._2))

From 88dccda393bc79dc6032f71b6acf8eb2b4b152be Mon Sep 17 00:00:00 2001
From: Dhruve Ashar <dhruveashar@gmail.com>
Date: Fri, 21 Jul 2017 14:03:46 -0500
Subject: [PATCH 1105/1204] [SPARK-21243][CORE] Limit no. of map outputs in a
 shuffle fetch

For configurations with external shuffle enabled, we have observed that if a very large no. of blocks are being fetched from a remote host, it puts the NM under extra pressure and can crash it. This change introduces a configuration `spark.reducer.maxBlocksInFlightPerAddress` , to limit the no. of map outputs being fetched from a given remote address. The changes applied here are applicable for both the scenarios - when external shuffle is enabled as well as disabled.

Ran the job with the default configuration which does not change the existing behavior and ran it with few configurations of lower values -10,20,50,100. The job ran fine and there is no change in the output. (I will update the metrics related to NM in some time.)

Author: Dhruve Ashar <dhruveashargmail.com>

Closes #18487 from dhruve/impr/SPARK-21243.

Author: Dhruve Ashar <dhruveashar@gmail.com>

Closes #18691 from dhruve/branch-2.2.
---
 .../spark/internal/config/package.scala       | 11 +++
 .../shuffle/BlockStoreShuffleReader.scala     |  1 +
 .../storage/ShuffleBlockFetcherIterator.scala | 81 ++++++++++++++++---
 .../ShuffleBlockFetcherIteratorSuite.scala    |  6 ++
 docs/configuration.md                         |  9 +++
 5 files changed, 98 insertions(+), 10 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 17746734df082..c0fcf99fecd2a 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -287,6 +287,17 @@ package object config {
       .bytesConf(ByteUnit.BYTE)
       .createWithDefault(100 * 1024 * 1024)
 
+  private[spark] val REDUCER_MAX_BLOCKS_IN_FLIGHT_PER_ADDRESS =
+    ConfigBuilder("spark.reducer.maxBlocksInFlightPerAddress")
+      .doc("This configuration limits the number of remote blocks being fetched per reduce task" +
+        " from a given host port. When a large number of blocks are being requested from a given" +
+        " address in a single fetch or simultaneously, this could crash the serving executor or" +
+        " Node Manager. This is especially useful to reduce the load on the Node Manager when" +
+        " external shuffle is enabled. You can mitigate the issue by setting it to a lower value.")
+      .intConf
+      .checkValue(_ > 0, "The max no. of blocks in flight cannot be non-positive.")
+      .createWithDefault(Int.MaxValue)
+
   private[spark] val REDUCER_MAX_REQ_SIZE_SHUFFLE_TO_MEM =
     ConfigBuilder("spark.reducer.maxReqSizeShuffleToMem")
       .doc("The blocks of a shuffle request will be fetched to disk when size of the request is " +
diff --git a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
index 2fbac79a2305b..c8d1460300934 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
@@ -51,6 +51,7 @@ private[spark] class BlockStoreShuffleReader[K, C](
       // Note: we use getSizeAsMb when no suffix is provided for backwards compatibility
       SparkEnv.get.conf.getSizeAsMb("spark.reducer.maxSizeInFlight", "48m") * 1024 * 1024,
       SparkEnv.get.conf.getInt("spark.reducer.maxReqsInFlight", Int.MaxValue),
+      SparkEnv.get.conf.get(config.REDUCER_MAX_BLOCKS_IN_FLIGHT_PER_ADDRESS),
       SparkEnv.get.conf.get(config.REDUCER_MAX_REQ_SIZE_SHUFFLE_TO_MEM),
       SparkEnv.get.conf.getBoolean("spark.shuffle.detectCorrupt", true))
 
diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
index 7e2bcf7683bff..a91bbf71b68dd 100644
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
@@ -23,7 +23,7 @@ import java.util.concurrent.LinkedBlockingQueue
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.mutable
-import scala.collection.mutable.{ArrayBuffer, HashSet, Queue}
+import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, Queue}
 
 import org.apache.spark.{SparkException, TaskContext}
 import org.apache.spark.internal.Logging
@@ -52,6 +52,8 @@ import org.apache.spark.util.io.ChunkedByteBufferOutputStream
  * @param streamWrapper A function to wrap the returned input stream.
  * @param maxBytesInFlight max size (in bytes) of remote blocks to fetch at any given point.
  * @param maxReqsInFlight max number of remote requests to fetch blocks at any given point.
+ * @param maxBlocksInFlightPerAddress max number of shuffle blocks being fetched at any given point
+ *                                    for a given remote host:port.
  * @param maxReqSizeShuffleToMem max size (in bytes) of a request that can be shuffled to memory.
  * @param detectCorrupt whether to detect any corruption in fetched blocks.
  */
@@ -64,6 +66,7 @@ final class ShuffleBlockFetcherIterator(
     streamWrapper: (BlockId, InputStream) => InputStream,
     maxBytesInFlight: Long,
     maxReqsInFlight: Int,
+    maxBlocksInFlightPerAddress: Int,
     maxReqSizeShuffleToMem: Long,
     detectCorrupt: Boolean)
   extends Iterator[(BlockId, InputStream)] with TempShuffleFileManager with Logging {
@@ -110,12 +113,21 @@ final class ShuffleBlockFetcherIterator(
    */
   private[this] val fetchRequests = new Queue[FetchRequest]
 
+  /**
+   * Queue of fetch requests which could not be issued the first time they were dequeued. These
+   * requests are tried again when the fetch constraints are satisfied.
+   */
+  private[this] val deferredFetchRequests = new HashMap[BlockManagerId, Queue[FetchRequest]]()
+
   /** Current bytes in flight from our requests */
   private[this] var bytesInFlight = 0L
 
   /** Current number of requests in flight */
   private[this] var reqsInFlight = 0
 
+  /** Current number of blocks in flight per host:port */
+  private[this] val numBlocksInFlightPerAddress = new HashMap[BlockManagerId, Int]()
+
   /**
    * The blocks that can't be decompressed successfully, it is used to guarantee that we retry
    * at most once for those corrupted blocks.
@@ -245,7 +257,8 @@ final class ShuffleBlockFetcherIterator(
     // smaller than maxBytesInFlight is to allow multiple, parallel fetches from up to 5
     // nodes, rather than blocking on reading output from one node.
     val targetRequestSize = math.max(maxBytesInFlight / 5, 1L)
-    logDebug("maxBytesInFlight: " + maxBytesInFlight + ", targetRequestSize: " + targetRequestSize)
+    logDebug("maxBytesInFlight: " + maxBytesInFlight + ", targetRequestSize: " + targetRequestSize
+      + ", maxBlocksInFlightPerAddress: " + maxBlocksInFlightPerAddress)
 
     // Split local and remote blocks. Remote blocks are further split into FetchRequests of size
     // at most maxBytesInFlight in order to limit the amount of data in flight.
@@ -274,11 +287,13 @@ final class ShuffleBlockFetcherIterator(
           } else if (size < 0) {
             throw new BlockException(blockId, "Negative block size " + size)
           }
-          if (curRequestSize >= targetRequestSize) {
+          if (curRequestSize >= targetRequestSize ||
+              curBlocks.size >= maxBlocksInFlightPerAddress) {
             // Add this FetchRequest
             remoteRequests += new FetchRequest(address, curBlocks)
+            logDebug(s"Creating fetch request of $curRequestSize at $address "
+              + s"with ${curBlocks.size} blocks")
             curBlocks = new ArrayBuffer[(BlockId, Long)]
-            logDebug(s"Creating fetch request of $curRequestSize at $address")
             curRequestSize = 0
           }
         }
@@ -372,6 +387,7 @@ final class ShuffleBlockFetcherIterator(
       result match {
         case r @ SuccessFetchResult(blockId, address, size, buf, isNetworkReqDone) =>
           if (address != blockManager.blockManagerId) {
+            numBlocksInFlightPerAddress(address) = numBlocksInFlightPerAddress(address) - 1
             shuffleMetrics.incRemoteBytesRead(buf.size)
             shuffleMetrics.incRemoteBlocksFetched(1)
           }
@@ -437,12 +453,57 @@ final class ShuffleBlockFetcherIterator(
   }
 
   private def fetchUpToMaxBytes(): Unit = {
-    // Send fetch requests up to maxBytesInFlight
-    while (fetchRequests.nonEmpty &&
-      (bytesInFlight == 0 ||
-        (reqsInFlight + 1 <= maxReqsInFlight &&
-          bytesInFlight + fetchRequests.front.size <= maxBytesInFlight))) {
-      sendRequest(fetchRequests.dequeue())
+    // Send fetch requests up to maxBytesInFlight. If you cannot fetch from a remote host
+    // immediately, defer the request until the next time it can be processed.
+
+    // Process any outstanding deferred fetch requests if possible.
+    if (deferredFetchRequests.nonEmpty) {
+      for ((remoteAddress, defReqQueue) <- deferredFetchRequests) {
+        while (isRemoteBlockFetchable(defReqQueue) &&
+            !isRemoteAddressMaxedOut(remoteAddress, defReqQueue.front)) {
+          val request = defReqQueue.dequeue()
+          logDebug(s"Processing deferred fetch request for $remoteAddress with "
+            + s"${request.blocks.length} blocks")
+          send(remoteAddress, request)
+          if (defReqQueue.isEmpty) {
+            deferredFetchRequests -= remoteAddress
+          }
+        }
+      }
+    }
+
+    // Process any regular fetch requests if possible.
+    while (isRemoteBlockFetchable(fetchRequests)) {
+      val request = fetchRequests.dequeue()
+      val remoteAddress = request.address
+      if (isRemoteAddressMaxedOut(remoteAddress, request)) {
+        logDebug(s"Deferring fetch request for $remoteAddress with ${request.blocks.size} blocks")
+        val defReqQueue = deferredFetchRequests.getOrElse(remoteAddress, new Queue[FetchRequest]())
+        defReqQueue.enqueue(request)
+        deferredFetchRequests(remoteAddress) = defReqQueue
+      } else {
+        send(remoteAddress, request)
+      }
+    }
+
+    def send(remoteAddress: BlockManagerId, request: FetchRequest): Unit = {
+      sendRequest(request)
+      numBlocksInFlightPerAddress(remoteAddress) =
+        numBlocksInFlightPerAddress.getOrElse(remoteAddress, 0) + request.blocks.size
+    }
+
+    def isRemoteBlockFetchable(fetchReqQueue: Queue[FetchRequest]): Boolean = {
+      fetchReqQueue.nonEmpty &&
+        (bytesInFlight == 0 ||
+          (reqsInFlight + 1 <= maxReqsInFlight &&
+            bytesInFlight + fetchReqQueue.front.size <= maxBytesInFlight))
+    }
+
+    // Checks if sending a new fetch request will exceed the max no. of blocks being fetched from a
+    // given remote address.
+    def isRemoteAddressMaxedOut(remoteAddress: BlockManagerId, request: FetchRequest): Boolean = {
+      numBlocksInFlightPerAddress.getOrElse(remoteAddress, 0) + request.blocks.size >
+        maxBlocksInFlightPerAddress
     }
   }
 
diff --git a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
index 6a70cedf769b8..c371cbcf8dff5 100644
--- a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
@@ -110,6 +110,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       48 * 1024 * 1024,
       Int.MaxValue,
       Int.MaxValue,
+      Int.MaxValue,
       true)
 
     // 3 local blocks fetched in initialization
@@ -187,6 +188,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       48 * 1024 * 1024,
       Int.MaxValue,
       Int.MaxValue,
+      Int.MaxValue,
       true)
 
     verify(blocks(ShuffleBlockId(0, 0, 0)), times(0)).release()
@@ -254,6 +256,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       48 * 1024 * 1024,
       Int.MaxValue,
       Int.MaxValue,
+      Int.MaxValue,
       true)
 
     // Continue only after the mock calls onBlockFetchFailure
@@ -319,6 +322,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       48 * 1024 * 1024,
       Int.MaxValue,
       Int.MaxValue,
+      Int.MaxValue,
       true)
 
     // Continue only after the mock calls onBlockFetchFailure
@@ -400,6 +404,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       48 * 1024 * 1024,
       Int.MaxValue,
       Int.MaxValue,
+      Int.MaxValue,
       false)
 
     // Continue only after the mock calls onBlockFetchFailure
@@ -457,6 +462,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
         (_, in) => in,
         maxBytesInFlight = Int.MaxValue,
         maxReqsInFlight = Int.MaxValue,
+        maxBlocksInFlightPerAddress = Int.MaxValue,
         maxReqSizeShuffleToMem = 200,
         detectCorrupt = true)
     }
diff --git a/docs/configuration.md b/docs/configuration.md
index f47967cab669b..3c9da8151094c 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -520,6 +520,15 @@ Apart from these, the following properties are also available, and may be useful
   </td>
 </tr>
 <tr>
+    <td><code>spark.reducer.maxBlocksInFlightPerAddress</code></td>
+    <td>Int.MaxValue</td>
+    <td>
+      This configuration limits the number of remote blocks being fetched per reduce task from a
+      given host port. When a large number of blocks are being requested from a given address in a
+      single fetch or simultaneously, this could crash the serving executor or Node Manager. This
+      is especially useful to reduce the load on the Node Manager when external shuffle is enabled.
+      You can mitigate this issue by setting it to a lower value.
+    </td>
   <td><code>spark.reducer.maxReqSizeShuffleToMem</code></td>
   <td>Long.MaxValue</td>
   <td>

From da403b95353f064c24da25236fa7f905fa8ddca1 Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Fri, 21 Jul 2017 16:50:47 -0700
Subject: [PATCH 1106/1204] [SPARK-21434][PYTHON][DOCS] Add pyspark pip
 documentation.

Update the Quickstart and RDD programming guides to mention pip.

Built docs locally.

Author: Holden Karau <holden@us.ibm.com>

Closes #18698 from holdenk/SPARK-21434-add-pyspark-pip-documentation.

(cherry picked from commit cc00e99d5396893b2d3d50960161080837cf950a)
Signed-off-by: Holden Karau <holden@us.ibm.com>
---
 docs/quick-start.md           | 27 ++++++++++++++++++++++++++-
 docs/rdd-programming-guide.md | 15 ++++++++++++---
 2 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/docs/quick-start.md b/docs/quick-start.md
index b88ae5f6bb313..cb5211af377e5 100644
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -66,6 +66,11 @@ res3: Long = 15
 
     ./bin/pyspark
 
+
+Or if PySpark is installed with pip in your current enviroment:
+
+    pyspark
+
 Spark's primary abstraction is a distributed collection of items called a Dataset. Datasets can be created from Hadoop InputFormats (such as HDFS files) or by transforming other Datasets. Due to Python's dynamic nature, we don't need the Dataset to be strongly-typed in Python. As a result, all Datasets in Python are Dataset[Row], and we call it `DataFrame` to be consistent with the data frame concept in Pandas and R. Let's make a new DataFrame from the text of the README file in the Spark source directory:
 
 {% highlight python %}
@@ -206,7 +211,7 @@ a cluster, as described in the [RDD programming guide](rdd-programming-guide.htm
 
 # Self-Contained Applications
 Suppose we wish to write a self-contained application using the Spark API. We will walk through a
-simple application in Scala (with sbt), Java (with Maven), and Python.
+simple application in Scala (with sbt), Java (with Maven), and Python (pip).
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
@@ -367,6 +372,16 @@ Lines with a: 46, Lines with b: 23
 
 Now we will show how to write an application using the Python API (PySpark).
 
+
+If you are building a packaged PySpark application or library you can add it to your setup.py file as:
+
+{% highlight python %}
+    install_requires=[
+        'pyspark=={site.SPARK_VERSION}'
+    ]
+{% endhighlight %}
+
+
 As an example, we'll create a simple Spark application, `SimpleApp.py`:
 
 {% highlight python %}
@@ -406,6 +421,16 @@ $ YOUR_SPARK_HOME/bin/spark-submit \
 Lines with a: 46, Lines with b: 23
 {% endhighlight %}
 
+If you have PySpark pip installed into your enviroment (e.g. `pip instal pyspark` you can run your application with the regular Python interpeter or use the provided spark-submit as you prefer.
+
+{% highlight bash %}
+# Use spark-submit to run your application
+$ python SimpleApp.py
+...
+Lines with a: 46, Lines with b: 23
+{% endhighlight %}
+
+
 </div>
 </div>
 
diff --git a/docs/rdd-programming-guide.md b/docs/rdd-programming-guide.md
index d021b7321e750..8e6c36b8dc226 100644
--- a/docs/rdd-programming-guide.md
+++ b/docs/rdd-programming-guide.md
@@ -86,12 +86,21 @@ import org.apache.spark.SparkConf;
 
 <div data-lang="python"  markdown="1">
 
-Spark {{site.SPARK_VERSION}} works with Python 2.6+ or Python 3.4+. It can use the standard CPython interpreter,
+Spark {{site.SPARK_VERSION}} works with Python 2.7+ or Python 3.4+. It can use the standard CPython interpreter,
 so C libraries like NumPy can be used. It also works with PyPy 2.3+.
 
-Note that support for Python 2.6 is deprecated as of Spark 2.0.0, and may be removed in Spark 2.2.0.
+Python 2.6 support was removed in Spark 2.2.0.
 
-To run Spark applications in Python, use the `bin/spark-submit` script located in the Spark directory.
+Spark applications in Python can either be run with the `bin/spark-submit` script which includes Spark at runtime, or by including including it in your setup.py as:
+
+{% highlight python %}
+    install_requires=[
+        'pyspark=={site.SPARK_VERSION}'
+    ]
+{% endhighlight %}
+
+
+To run Spark applications in Python without pip installing PySpark, use the `bin/spark-submit` script located in the Spark directory.
 This script will load Spark's Java/Scala libraries and allow you to submit applications to a cluster.
 You can also use `bin/pyspark` to launch an interactive Python shell.
 

From 62ca13dcaf79b85fca02de5628b607196534c605 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Sun, 23 Jul 2017 23:23:13 +0800
Subject: [PATCH 1107/1204] [SPARK-20904][CORE] Don't report task failures to
 driver during shutdown.

Executors run a thread pool with daemon threads to run tasks. This means
that those threads remain active when the JVM is shutting down, meaning
those tasks are affected by code that runs in shutdown hooks.

So if a shutdown hook messes with something that the task is using (e.g.
an HDFS connection), the task will fail and will report that failure to
the driver. That will make the driver mark the task as failed regardless
of what caused the executor to shut down. So, for example, if YARN pre-empted
that executor, the driver would consider that task failed when it should
instead ignore the failure.

This change avoids reporting failures to the driver when shutdown hooks
are executing; this fixes the YARN preemption accounting, and doesn't really
change things much for other scenarios, other than reporting a more generic
error ("Executor lost") when the executor shuts down unexpectedly - which
is arguably more correct.

Tested with a hacky app running on spark-shell that tried to cause failures
only when shutdown hooks were running, verified that preemption didn't cause
the app to fail because of task failures exceeding the threshold.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #18594 from vanzin/SPARK-20904.

(cherry picked from commit cecd285a2aabad4e7db5a3d18944b87fbc4eee6c)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/executor/Executor.scala  | 47 +++++++++++--------
 1 file changed, 28 insertions(+), 19 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index d54dd2d464821..e53d91d4e468b 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -467,29 +467,38 @@ private[spark] class Executor(
           // the default uncaught exception handler, which will terminate the Executor.
           logError(s"Exception in $taskName (TID $taskId)", t)
 
-          // Collect latest accumulator values to report back to the driver
-          val accums: Seq[AccumulatorV2[_, _]] =
-            if (task != null) {
-              task.metrics.setExecutorRunTime(System.currentTimeMillis() - taskStart)
-              task.metrics.setJvmGCTime(computeTotalGcTime() - startGCTime)
-              task.collectAccumulatorUpdates(taskFailed = true)
-            } else {
-              Seq.empty
-            }
+          // SPARK-20904: Do not report failure to driver if if happened during shut down. Because
+          // libraries may set up shutdown hooks that race with running tasks during shutdown,
+          // spurious failures may occur and can result in improper accounting in the driver (e.g.
+          // the task failure would not be ignored if the shutdown happened because of premption,
+          // instead of an app issue).
+          if (!ShutdownHookManager.inShutdown()) {
+            // Collect latest accumulator values to report back to the driver
+            val accums: Seq[AccumulatorV2[_, _]] =
+              if (task != null) {
+                task.metrics.setExecutorRunTime(System.currentTimeMillis() - taskStart)
+                task.metrics.setJvmGCTime(computeTotalGcTime() - startGCTime)
+                task.collectAccumulatorUpdates(taskFailed = true)
+              } else {
+                Seq.empty
+              }
 
-          val accUpdates = accums.map(acc => acc.toInfo(Some(acc.value), None))
+            val accUpdates = accums.map(acc => acc.toInfo(Some(acc.value), None))
 
-          val serializedTaskEndReason = {
-            try {
-              ser.serialize(new ExceptionFailure(t, accUpdates).withAccums(accums))
-            } catch {
-              case _: NotSerializableException =>
-                // t is not serializable so just send the stacktrace
-                ser.serialize(new ExceptionFailure(t, accUpdates, false).withAccums(accums))
+            val serializedTaskEndReason = {
+              try {
+                ser.serialize(new ExceptionFailure(t, accUpdates).withAccums(accums))
+              } catch {
+                case _: NotSerializableException =>
+                  // t is not serializable so just send the stacktrace
+                  ser.serialize(new ExceptionFailure(t, accUpdates, false).withAccums(accums))
+              }
             }
+            setTaskFinishedAndClearInterruptStatus()
+            execBackend.statusUpdate(taskId, TaskState.FAILED, serializedTaskEndReason)
+          } else {
+            logInfo("Not reporting error to driver during JVM shutdown.")
           }
-          setTaskFinishedAndClearInterruptStatus()
-          execBackend.statusUpdate(taskId, TaskState.FAILED, serializedTaskEndReason)
 
           // Don't forcibly exit unless the exception was inherently fatal, to avoid
           // stopping other tasks unnecessarily.

From e5ec3390cbbef87fca8a27bea701a225e18b98ea Mon Sep 17 00:00:00 2001
From: DjvuLee <lihu@bytedance.com>
Date: Tue, 25 Jul 2017 10:21:18 -0700
Subject: [PATCH 1108/1204] [SPARK-21383][YARN] Fix the YarnAllocator allocates
 more Resource

When NodeManagers launching Executors,
the `missing` value will exceed the
real value when the launch is slow, this can lead to YARN allocates more resource.

We add the `numExecutorsRunning` when calculate the `missing` to avoid this.

Test by experiment.

Author: DjvuLee <lihu@bytedance.com>

Closes #18651 from djvulee/YarnAllocate.

(cherry picked from commit 8de080d9f9d3deac7745f9b3428d97595975701d)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/deploy/yarn/YarnAllocator.scala     | 50 ++++++++++++-------
 1 file changed, 33 insertions(+), 17 deletions(-)

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index ed77a6e4a1c7c..cc571c330f8d8 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -19,6 +19,7 @@ package org.apache.spark.deploy.yarn
 
 import java.util.Collections
 import java.util.concurrent._
+import java.util.concurrent.atomic.AtomicInteger
 import java.util.regex.Pattern
 
 import scala.collection.mutable
@@ -30,7 +31,6 @@ import org.apache.hadoop.yarn.api.records._
 import org.apache.hadoop.yarn.client.api.AMRMClient
 import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest
 import org.apache.hadoop.yarn.conf.YarnConfiguration
-import org.apache.log4j.{Level, Logger}
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
 import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._
@@ -80,7 +80,9 @@ private[yarn] class YarnAllocator(
   private val releasedContainers = Collections.newSetFromMap[ContainerId](
     new ConcurrentHashMap[ContainerId, java.lang.Boolean])
 
-  @volatile private var numExecutorsRunning = 0
+  private val numExecutorsRunning = new AtomicInteger(0)
+
+  private val numExecutorsStarting = new AtomicInteger(0)
 
   /**
    * Used to generate a unique ID per executor
@@ -163,7 +165,7 @@ private[yarn] class YarnAllocator(
     clock = newClock
   }
 
-  def getNumExecutorsRunning: Int = numExecutorsRunning
+  def getNumExecutorsRunning: Int = numExecutorsRunning.get()
 
   def getNumExecutorsFailed: Int = synchronized {
     val endTime = clock.getTimeMillis()
@@ -242,7 +244,7 @@ private[yarn] class YarnAllocator(
     if (executorIdToContainer.contains(executorId)) {
       val container = executorIdToContainer.get(executorId).get
       internalReleaseContainer(container)
-      numExecutorsRunning -= 1
+      numExecutorsRunning.decrementAndGet()
     } else {
       logWarning(s"Attempted to kill unknown executor $executorId!")
     }
@@ -267,10 +269,12 @@ private[yarn] class YarnAllocator(
     val allocatedContainers = allocateResponse.getAllocatedContainers()
 
     if (allocatedContainers.size > 0) {
-      logDebug("Allocated containers: %d. Current executor count: %d. Cluster resources: %s."
+      logDebug(("Allocated containers: %d. Current executor count: %d. " +
+        "Launching executor count: %d. Cluster resources: %s.")
         .format(
           allocatedContainers.size,
-          numExecutorsRunning,
+          numExecutorsRunning.get,
+          numExecutorsStarting.get,
           allocateResponse.getAvailableResources))
 
       handleAllocatedContainers(allocatedContainers.asScala)
@@ -281,7 +285,7 @@ private[yarn] class YarnAllocator(
       logDebug("Completed %d containers".format(completedContainers.size))
       processCompletedContainers(completedContainers.asScala)
       logDebug("Finished processing %d completed containers. Current running executor count: %d."
-        .format(completedContainers.size, numExecutorsRunning))
+        .format(completedContainers.size, numExecutorsRunning.get))
     }
   }
 
@@ -294,7 +298,11 @@ private[yarn] class YarnAllocator(
   def updateResourceRequests(): Unit = {
     val pendingAllocate = getPendingAllocate
     val numPendingAllocate = pendingAllocate.size
-    val missing = targetNumExecutors - numPendingAllocate - numExecutorsRunning
+    val missing = targetNumExecutors - numPendingAllocate -
+      numExecutorsStarting.get - numExecutorsRunning.get
+    logDebug(s"Updating resource requests, target: $targetNumExecutors, " +
+      s"pending: $numPendingAllocate, running: ${numExecutorsRunning.get}, " +
+      s"executorsStarting: ${numExecutorsStarting.get}")
 
     if (missing > 0) {
       logInfo(s"Will request $missing executor container(s), each with " +
@@ -493,7 +501,8 @@ private[yarn] class YarnAllocator(
         s"for executor with ID $executorId")
 
       def updateInternalState(): Unit = synchronized {
-        numExecutorsRunning += 1
+        numExecutorsRunning.incrementAndGet()
+        numExecutorsStarting.decrementAndGet()
         executorIdToContainer(executorId) = container
         containerIdToExecutorId(container.getId) = executorId
 
@@ -503,7 +512,8 @@ private[yarn] class YarnAllocator(
         allocatedContainerToHostMap.put(containerId, executorHostname)
       }
 
-      if (numExecutorsRunning < targetNumExecutors) {
+      if (numExecutorsRunning.get < targetNumExecutors) {
+        numExecutorsStarting.incrementAndGet()
         if (launchContainers) {
           launcherPool.execute(new Runnable {
             override def run(): Unit = {
@@ -523,11 +533,16 @@ private[yarn] class YarnAllocator(
                 ).run()
                 updateInternalState()
               } catch {
-                case NonFatal(e) =>
-                  logError(s"Failed to launch executor $executorId on container $containerId", e)
-                  // Assigned container should be released immediately to avoid unnecessary resource
-                  // occupation.
-                  amClient.releaseAssignedContainer(containerId)
+                case e: Throwable =>
+                  numExecutorsStarting.decrementAndGet()
+                  if (NonFatal(e)) {
+                    logError(s"Failed to launch executor $executorId on container $containerId", e)
+                    // Assigned container should be released immediately
+                    // to avoid unnecessary resource occupation.
+                    amClient.releaseAssignedContainer(containerId)
+                  } else {
+                    throw e
+                  }
               }
             }
           })
@@ -537,7 +552,8 @@ private[yarn] class YarnAllocator(
         }
       } else {
         logInfo(("Skip launching executorRunnable as runnning Excecutors count: %d " +
-          "reached target Executors count: %d.").format(numExecutorsRunning, targetNumExecutors))
+          "reached target Executors count: %d.").format(
+          numExecutorsRunning.get, targetNumExecutors))
       }
     }
   }
@@ -552,7 +568,7 @@ private[yarn] class YarnAllocator(
       val exitReason = if (!alreadyReleased) {
         // Decrement the number of executors running. The next iteration of
         // the ApplicationMaster's reporting thread will take care of allocating.
-        numExecutorsRunning -= 1
+        numExecutorsRunning.decrementAndGet()
         logInfo("Completed container %s%s (state: %s, exit status: %s)".format(
           containerId,
           onHostStr,

From c91191bed186f816b760af98218392f9a178942b Mon Sep 17 00:00:00 2001
From: Eric Vandenberg <ericvandenberg@fb.com>
Date: Tue, 25 Jul 2017 11:45:35 -0700
Subject: [PATCH 1109/1204] [SPARK-21447][WEB UI] Spark history server fails to
 render compressed

inprogress history file in some cases.

Add failure handling for EOFException that can be thrown during
decompression of an inprogress spark history file, treat same as case
where can't parse the last line.

## What changes were proposed in this pull request?

Failure handling for case of EOFException thrown within the ReplayListenerBus.replay method to handle the case analogous to json parse fail case.  This path can arise in compressed inprogress history files since an incomplete compression block could be read (not flushed by writer on a block boundary).  See the stack trace of this occurrence in the jira ticket (https://issues.apache.org/jira/browse/SPARK-21447)

## How was this patch tested?

Added a unit test that specifically targets validating the failure handling path appropriately when maybeTruncated is true and false.

Author: Eric Vandenberg <ericvandenberg@fb.com>

Closes #18673 from ericvandenbergfb/fix_inprogress_compr_history_file.

(cherry picked from commit 06a9793793ca41dcef2f10ca06af091a57c721c4)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/scheduler/ReplayListenerBus.scala   |  3 +-
 .../spark/scheduler/ReplayListenerSuite.scala | 77 ++++++++++++++++++-
 2 files changed, 77 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
index 08e05ae0c095b..26a6a3effc9ac 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.scheduler
 
-import java.io.{InputStream, IOException}
+import java.io.{EOFException, InputStream, IOException}
 
 import scala.io.Source
 
@@ -107,6 +107,7 @@ private[spark] class ReplayListenerBus extends SparkListenerBus with Logging {
         }
       }
     } catch {
+      case _: EOFException if maybeTruncated =>
       case ioe: IOException =>
         throw ioe
       case e: Exception =>
diff --git a/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala
index 1732aca9417ea..88a68af6b647d 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala
@@ -17,15 +17,16 @@
 
 package org.apache.spark.scheduler
 
-import java.io.{File, PrintWriter}
+import java.io._
 import java.net.URI
+import java.util.concurrent.atomic.AtomicInteger
 
 import org.json4s.jackson.JsonMethods._
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkFunSuite}
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.io.CompressionCodec
+import org.apache.spark.io.{CompressionCodec, LZ4CompressionCodec}
 import org.apache.spark.util.{JsonProtocol, JsonProtocolSuite, Utils}
 
 /**
@@ -72,6 +73,59 @@ class ReplayListenerSuite extends SparkFunSuite with BeforeAndAfter with LocalSp
     assert(eventMonster.loggedEvents(1) === JsonProtocol.sparkEventToJson(applicationEnd))
   }
 
+  /**
+   * Test replaying compressed spark history file that internally throws an EOFException.  To
+   * avoid sensitivity to the compression specifics the test forces an EOFException to occur
+   * while reading bytes from the underlying stream (such as observed in actual history files
+   * in some cases) and forces specific failure handling.  This validates correctness in both
+   * cases when maybeTruncated is true or false.
+   */
+  test("Replay compressed inprogress log file succeeding on partial read") {
+    val buffered = new ByteArrayOutputStream
+    val codec = new LZ4CompressionCodec(new SparkConf())
+    val compstream = codec.compressedOutputStream(buffered)
+    val writer = new PrintWriter(compstream)
+
+    val applicationStart = SparkListenerApplicationStart("AppStarts", None,
+      125L, "Mickey", None)
+    val applicationEnd = SparkListenerApplicationEnd(1000L)
+
+    // scalastyle:off println
+    writer.println(compact(render(JsonProtocol.sparkEventToJson(applicationStart))))
+    writer.println(compact(render(JsonProtocol.sparkEventToJson(applicationEnd))))
+    // scalastyle:on println
+    writer.close()
+
+    val logFilePath = Utils.getFilePath(testDir, "events.lz4.inprogress")
+    val fstream = fileSystem.create(logFilePath)
+    val bytes = buffered.toByteArray
+
+    fstream.write(bytes, 0, buffered.size)
+    fstream.close
+
+    // Read the compressed .inprogress file and verify only first event was parsed.
+    val conf = EventLoggingListenerSuite.getLoggingConf(logFilePath)
+    val replayer = new ReplayListenerBus()
+
+    val eventMonster = new EventMonster(conf)
+    replayer.addListener(eventMonster)
+
+    // Verify the replay returns the events given the input maybe truncated.
+    val logData = EventLoggingListener.openEventLog(logFilePath, fileSystem)
+    val failingStream = new EarlyEOFInputStream(logData, buffered.size - 10)
+    replayer.replay(failingStream, logFilePath.toString, true)
+
+    assert(eventMonster.loggedEvents.size === 1)
+    assert(failingStream.didFail)
+
+    // Verify the replay throws the EOF exception since the input may not be truncated.
+    val logData2 = EventLoggingListener.openEventLog(logFilePath, fileSystem)
+    val failingStream2 = new EarlyEOFInputStream(logData2, buffered.size - 10)
+    intercept[EOFException] {
+      replayer.replay(failingStream2, logFilePath.toString, false)
+    }
+  }
+
   // This assumes the correctness of EventLoggingListener
   test("End-to-end replay") {
     testApplicationReplay()
@@ -156,4 +210,23 @@ class ReplayListenerSuite extends SparkFunSuite with BeforeAndAfter with LocalSp
     override def start() { }
 
   }
+
+  /*
+   * This is a dummy input stream that wraps another input stream but ends prematurely when
+   * reading at the specified position, throwing an EOFExeption.
+   */
+  private class EarlyEOFInputStream(in: InputStream, failAtPos: Int) extends InputStream {
+    private val countDown = new AtomicInteger(failAtPos)
+
+    def didFail: Boolean = countDown.get == 0
+
+    @throws[IOException]
+    def read: Int = {
+      if (countDown.get == 0) {
+        throw new EOFException("Stream ended prematurely")
+      }
+      countDown.decrementAndGet()
+      in.read
+    }
+  }
 }

From f3df120cdc09bcae071e11a01c1eb0ffcd0a3a6f Mon Sep 17 00:00:00 2001
From: Mark Hamstra <markhamstra@gmail.com>
Date: Tue, 25 Jul 2017 14:13:34 -0700
Subject: [PATCH 1110/1204] Change PoolSuite tests for default FAIR scheduling
 mode

---
 .../org/apache/spark/scheduler/PoolSuite.scala     | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala
index 4901062a78553..254763c3d87f9 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala
@@ -88,10 +88,10 @@ class PoolSuite extends SparkFunSuite with LocalSparkContext {
     schedulableBuilder.buildPools()
 
     // Ensure that the XML file was read in correctly.
-    verifyPool(rootPool, schedulableBuilder.DEFAULT_POOL_NAME, 0, 1, FIFO)
+    verifyPool(rootPool, schedulableBuilder.DEFAULT_POOL_NAME, 0, 1, FAIR)
     verifyPool(rootPool, "1", 2, 1, FIFO)
     verifyPool(rootPool, "2", 3, 1, FIFO)
-    verifyPool(rootPool, "3", 0, 1, FIFO)
+    verifyPool(rootPool, "3", 0, 1, FAIR)
 
     val properties1 = new Properties()
     properties1.setProperty(schedulableBuilder.FAIR_SCHEDULER_PROPERTIES, "1")
@@ -187,18 +187,18 @@ class PoolSuite extends SparkFunSuite with LocalSparkContext {
     val schedulableBuilder = new FairSchedulableBuilder(rootPool, conf)
     schedulableBuilder.buildPools()
 
-    verifyPool(rootPool, schedulableBuilder.DEFAULT_POOL_NAME, 0, 1, FIFO)
+    verifyPool(rootPool, schedulableBuilder.DEFAULT_POOL_NAME, 0, 1, FAIR)
     verifyPool(rootPool, "pool_with_invalid_min_share", 0, 2, FAIR)
     verifyPool(rootPool, "pool_with_invalid_weight", 1, 1, FAIR)
-    verifyPool(rootPool, "pool_with_invalid_scheduling_mode", 3, 2, FIFO)
+    verifyPool(rootPool, "pool_with_invalid_scheduling_mode", 3, 2, FAIR)
     verifyPool(rootPool, "pool_with_non_uppercase_scheduling_mode", 2, 1, FAIR)
-    verifyPool(rootPool, "pool_with_NONE_scheduling_mode", 1, 2, FIFO)
+    verifyPool(rootPool, "pool_with_NONE_scheduling_mode", 1, 2, FAIR)
     verifyPool(rootPool, "pool_with_whitespace_min_share", 0, 2, FAIR)
     verifyPool(rootPool, "pool_with_whitespace_weight", 1, 1, FAIR)
-    verifyPool(rootPool, "pool_with_whitespace_scheduling_mode", 3, 2, FIFO)
+    verifyPool(rootPool, "pool_with_whitespace_scheduling_mode", 3, 2, FAIR)
     verifyPool(rootPool, "pool_with_empty_min_share", 0, 3, FAIR)
     verifyPool(rootPool, "pool_with_empty_weight", 2, 1, FAIR)
-    verifyPool(rootPool, "pool_with_empty_scheduling_mode", 2, 2, FIFO)
+    verifyPool(rootPool, "pool_with_empty_scheduling_mode", 2, 2, FAIR)
     verifyPool(rootPool, "pool_with_surrounded_whitespace", 3, 2, FAIR)
   }
 

From 1bfd1a83b5e18f42bf76c1d72cd0347ff578e9cd Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Tue, 25 Jul 2017 17:57:26 -0700
Subject: [PATCH 1111/1204] [SPARK-21494][NETWORK] Use correct app id when
 authenticating to external service.

There was some code based on the old SASL handler in the new auth client that
was incorrectly using the SASL user as the user to authenticate against the
external shuffle service. This caused the external service to not be able to
find the correct secret to authenticate the connection, failing the connection.

In the course of debugging, I found that some log messages from the YARN shuffle
service were a little noisy, so I silenced some of them, and also added a couple
of new ones that helped find this issue. On top of that, I found that a check
in the code that records app secrets was wrong, causing more log spam and also
using an O(n) operation instead of an O(1) call.

Also added a new integration suite for the YARN shuffle service with auth on,
and verified it failed before, and passes now.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #18706 from vanzin/SPARK-21494.

(cherry picked from commit 300807c6e3011e4d78c6cf750201d0ab8e5bdaf5)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../network/crypto/AuthClientBootstrap.java   |  6 +--
 .../spark/network/crypto/AuthRpcHandler.java  |  7 +++-
 .../network/sasl/ShuffleSecretManager.java    |  4 +-
 .../network/yarn/YarnShuffleService.java      |  2 -
 .../yarn/YarnShuffleIntegrationSuite.scala    | 42 ++++++++++++++++---
 5 files changed, 47 insertions(+), 14 deletions(-)

diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthClientBootstrap.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthClientBootstrap.java
index 799f4540aa934..3c263783a6104 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthClientBootstrap.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthClientBootstrap.java
@@ -50,7 +50,6 @@ public class AuthClientBootstrap implements TransportClientBootstrap {
 
   private final TransportConf conf;
   private final String appId;
-  private final String authUser;
   private final SecretKeyHolder secretKeyHolder;
 
   public AuthClientBootstrap(
@@ -65,7 +64,6 @@ public AuthClientBootstrap(
     // required by the protocol. At some point, though, it would be better for the actual app ID
     // to be provided here.
     this.appId = appId;
-    this.authUser = secretKeyHolder.getSaslUser(appId);
     this.secretKeyHolder = secretKeyHolder;
   }
 
@@ -97,8 +95,8 @@ public void doBootstrap(TransportClient client, Channel channel) {
   private void doSparkAuth(TransportClient client, Channel channel)
     throws GeneralSecurityException, IOException {
 
-    String secretKey = secretKeyHolder.getSecretKey(authUser);
-    try (AuthEngine engine = new AuthEngine(authUser, secretKey, conf)) {
+    String secretKey = secretKeyHolder.getSecretKey(appId);
+    try (AuthEngine engine = new AuthEngine(appId, secretKey, conf)) {
       ClientChallenge challenge = engine.challenge();
       ByteBuf challengeData = Unpooled.buffer(challenge.encodedLength());
       challenge.encode(challengeData);
diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthRpcHandler.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthRpcHandler.java
index 0a5c029940005..8a6e3858081bf 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthRpcHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthRpcHandler.java
@@ -20,6 +20,7 @@
 import java.nio.ByteBuffer;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
 import com.google.common.base.Throwables;
 import io.netty.buffer.ByteBuf;
 import io.netty.buffer.Unpooled;
@@ -113,7 +114,11 @@ public void receive(TransportClient client, ByteBuffer message, RpcResponseCallb
     // Here we have the client challenge, so perform the new auth protocol and set up the channel.
     AuthEngine engine = null;
     try {
-      engine = new AuthEngine(challenge.appId, secretKeyHolder.getSecretKey(challenge.appId), conf);
+      String secret = secretKeyHolder.getSecretKey(challenge.appId);
+      Preconditions.checkState(secret != null,
+        "Trying to authenticate non-registered app %s.", challenge.appId);
+      LOG.debug("Authenticating challenge for app {}.", challenge.appId);
+      engine = new AuthEngine(challenge.appId, secret, conf);
       ServerResponse response = engine.respond(challenge);
       ByteBuf responseData = Unpooled.buffer(response.encodedLength());
       response.encode(responseData);
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/sasl/ShuffleSecretManager.java b/common/network-shuffle/src/main/java/org/apache/spark/network/sasl/ShuffleSecretManager.java
index 426a604f4f157..d2d008f8a3d35 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/sasl/ShuffleSecretManager.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/sasl/ShuffleSecretManager.java
@@ -47,7 +47,7 @@ public ShuffleSecretManager() {
    * fetching shuffle files written by other executors in this application.
    */
   public void registerApp(String appId, String shuffleSecret) {
-    if (!shuffleSecretMap.contains(appId)) {
+    if (!shuffleSecretMap.containsKey(appId)) {
       shuffleSecretMap.put(appId, shuffleSecret);
       logger.info("Registered shuffle secret for application {}", appId);
     } else {
@@ -67,7 +67,7 @@ public void registerApp(String appId, ByteBuffer shuffleSecret) {
    * This is called when the application terminates.
    */
   public void unregisterApp(String appId) {
-    if (shuffleSecretMap.contains(appId)) {
+    if (shuffleSecretMap.containsKey(appId)) {
       shuffleSecretMap.remove(appId);
       logger.info("Unregistered shuffle secret for application {}", appId);
     } else {
diff --git a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
index fd50e3a4bfb9b..cd67eb28573e8 100644
--- a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
+++ b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
@@ -243,7 +243,6 @@ public void initializeApplication(ApplicationInitializationContext context) {
     String appId = context.getApplicationId().toString();
     try {
       ByteBuffer shuffleSecret = context.getApplicationDataForService();
-      logger.info("Initializing application {}", appId);
       if (isAuthenticationEnabled()) {
         AppId fullId = new AppId(appId);
         if (db != null) {
@@ -262,7 +261,6 @@ public void initializeApplication(ApplicationInitializationContext context) {
   public void stopApplication(ApplicationTerminationContext context) {
     String appId = context.getApplicationId().toString();
     try {
-      logger.info("Stopping application {}", appId);
       if (isAuthenticationEnabled()) {
         AppId fullId = new AppId(appId);
         if (db != null) {
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala
index 950ebd9a2d4d9..75427b4ad6cb4 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala
@@ -26,7 +26,9 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.scalatest.Matchers
 
 import org.apache.spark._
+import org.apache.spark.deploy.yarn.config._
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
 import org.apache.spark.network.shuffle.ShuffleTestAccessor
 import org.apache.spark.network.yarn.{YarnShuffleService, YarnTestAccessor}
 import org.apache.spark.tags.ExtendedYarnTest
@@ -46,28 +48,58 @@ class YarnShuffleIntegrationSuite extends BaseYarnClusterSuite {
     yarnConfig
   }
 
+  protected def extraSparkConf(): Map[String, String] = {
+    val shuffleServicePort = YarnTestAccessor.getShuffleServicePort
+    val shuffleService = YarnTestAccessor.getShuffleServiceInstance
+    logInfo("Shuffle service port = " + shuffleServicePort)
+
+    Map(
+      "spark.shuffle.service.enabled" -> "true",
+      "spark.shuffle.service.port" -> shuffleServicePort.toString,
+      MAX_EXECUTOR_FAILURES.key -> "1"
+    )
+  }
+
   test("external shuffle service") {
     val shuffleServicePort = YarnTestAccessor.getShuffleServicePort
     val shuffleService = YarnTestAccessor.getShuffleServiceInstance
 
     val registeredExecFile = YarnTestAccessor.getRegisteredExecutorFile(shuffleService)
 
-    logInfo("Shuffle service port = " + shuffleServicePort)
     val result = File.createTempFile("result", null, tempDir)
     val finalState = runSpark(
       false,
       mainClassName(YarnExternalShuffleDriver.getClass),
       appArgs = Seq(result.getAbsolutePath(), registeredExecFile.getAbsolutePath),
-      extraConf = Map(
-        "spark.shuffle.service.enabled" -> "true",
-        "spark.shuffle.service.port" -> shuffleServicePort.toString
-      )
+      extraConf = extraSparkConf()
     )
     checkResult(finalState, result)
     assert(YarnTestAccessor.getRegisteredExecutorFile(shuffleService).exists())
   }
 }
 
+/**
+ * Integration test for the external shuffle service with auth on.
+ */
+@ExtendedYarnTest
+class YarnShuffleAuthSuite extends YarnShuffleIntegrationSuite {
+
+  override def newYarnConfig(): YarnConfiguration = {
+    val yarnConfig = super.newYarnConfig()
+    yarnConfig.set(NETWORK_AUTH_ENABLED.key, "true")
+    yarnConfig.set(NETWORK_ENCRYPTION_ENABLED.key, "true")
+    yarnConfig
+  }
+
+  override protected def extraSparkConf(): Map[String, String] = {
+    super.extraSparkConf() ++ Map(
+      NETWORK_AUTH_ENABLED.key -> "true",
+      NETWORK_ENCRYPTION_ENABLED.key -> "true"
+    )
+  }
+
+}
+
 private object YarnExternalShuffleDriver extends Logging with Matchers {
 
   val WAIT_TIMEOUT_MILLIS = 10000

From 464a934533c7932cfb8185be02bb86a52bbb4c77 Mon Sep 17 00:00:00 2001
From: Mark Hamstra <markhamstra@gmail.com>
Date: Thu, 27 Jul 2017 11:31:19 -0700
Subject: [PATCH 1112/1204] fix mismerge

---
 .../org/apache/spark/serializer/SerializerManager.scala     | 6 ++----
 .../main/scala/org/apache/spark/sql/hive/TableReader.scala  | 2 +-
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
index 548cdd76fc7c1..bb7ed8709ba8a 100644
--- a/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
@@ -181,8 +181,7 @@ private[spark] class SerializerManager(
   def dataSerializeWithExplicitClassTag(
       blockId: BlockId,
       values: Iterator[_],
-      classTag: ClassTag[_],
-      allowEncryption: Boolean = true): ChunkedByteBuffer = {
+      classTag: ClassTag[_]): ChunkedByteBuffer = {
     val bbos = new ChunkedByteBufferOutputStream(1024 * 1024 * 4, ByteBuffer.allocate)
     val byteStream = new BufferedOutputStream(bbos)
     val autoPick = !blockId.isInstanceOf[StreamBlockId]
@@ -197,8 +196,7 @@ private[spark] class SerializerManager(
    */
   def dataDeserializeStream[T](
       blockId: BlockId,
-      inputStream: InputStream,
-      maybeEncrypted: Boolean = true)
+      inputStream: InputStream)
       (classTag: ClassTag[T]): Iterator[T] = {
     val stream = new BufferedInputStream(inputStream)
     val autoPick = !blockId.isInstanceOf[StreamBlockId]
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
index f503f1f7ec210..4b1fdaa9ef650 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -141,7 +141,7 @@ class HadoopTableReader(
     val deserializedHadoopRDD = hadoopRDD.mapPartitions { iter =>
       val hconf = broadcastedHadoopConf.value.value
       val deserializer = deserializerClass.newInstance()
-      deserializer.initialize(hconf, tableDesc.getProperties)
+      deserializer.initialize(hconf, localTableDesc.getProperties)
       HadoopTableReader.fillObject(iter, deserializer, attrsWithIndex, mutableRow, deserializer,
         localEmptyStringsAsNulls)
     }

From 06b2ef01ed87add681144fe1d801718caba271af Mon Sep 17 00:00:00 2001
From: aokolnychyi <anton.okolnychyi@sap.com>
Date: Thu, 27 Jul 2017 16:49:42 -0700
Subject: [PATCH 1113/1204] [SPARK-21538][SQL] Attribute resolution
 inconsistency in the Dataset API

## What changes were proposed in this pull request?

This PR contains a tiny update that removes an attribute resolution inconsistency in the Dataset API. The following example is taken from the ticket description:

```
spark.range(1).withColumnRenamed("id", "x").sort(col("id"))  // works
spark.range(1).withColumnRenamed("id", "x").sort($"id")  // works
spark.range(1).withColumnRenamed("id", "x").sort('id) // works
spark.range(1).withColumnRenamed("id", "x").sort("id") // fails with:
org.apache.spark.sql.AnalysisException: Cannot resolve column name "id" among (x);
```
The above `AnalysisException` happens because the last case calls `Dataset.apply()` to convert strings into columns, which triggers attribute resolution. To make the API consistent between overloaded methods, this PR defers the resolution and constructs columns directly.

Author: aokolnychyi <anton.okolnychyi@sap.com>

Closes #18740 from aokolnychyi/spark-21538.

(cherry picked from commit f44ead89f48f040b7eb9dfc88df0ec995b47bfe9)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../main/scala/org/apache/spark/sql/Dataset.scala   |  2 +-
 .../scala/org/apache/spark/sql/DatasetSuite.scala   | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 7aa8cfa23c071..00a5edfab9446 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -1028,7 +1028,7 @@ class Dataset[T] private[sql](
    */
   @scala.annotation.varargs
   def sort(sortCol: String, sortCols: String*): Dataset[T] = {
-    sort((sortCol +: sortCols).map(apply) : _*)
+    sort((sortCol +: sortCols).map(Column(_)) : _*)
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index ae6fe51617755..060fbe7cf81ac 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -1197,6 +1197,19 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
       assert(rlike3.count() == 0)
     }
   }
+
+  test("SPARK-21538: Attribute resolution inconsistency in Dataset API") {
+    val df = spark.range(3).withColumnRenamed("id", "x")
+    val expected = Row(0) :: Row(1) :: Row (2) :: Nil
+    checkAnswer(df.sort("id"), expected)
+    checkAnswer(df.sort(col("id")), expected)
+    checkAnswer(df.sort($"id"), expected)
+    checkAnswer(df.sort('id), expected)
+    checkAnswer(df.orderBy("id"), expected)
+    checkAnswer(df.orderBy(col("id")), expected)
+    checkAnswer(df.orderBy($"id"), expected)
+    checkAnswer(df.orderBy('id), expected)
+  }
 }
 
 case class WithImmutableMap(id: String, map_test: scala.collection.immutable.Map[Long, String])

From 93790313b2e36e5e5ac4dfe13b285f03c42da111 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?=
 <facai.yan@gmail.com>
Date: Fri, 28 Jul 2017 10:10:35 +0800
Subject: [PATCH 1114/1204] [SPARK-21306][ML] OneVsRest should support
 setWeightCol
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

add `setWeightCol` method for OneVsRest.

`weightCol` is ignored if classifier doesn't inherit HasWeightCol trait.

## How was this patch tested?

+ [x] add an unit test.

Author: Yan Facai (颜发才) <facai.yan@gmail.com>

Closes #18554 from facaiy/BUG/oneVsRest_missing_weightCol.

(cherry picked from commit a5a3189974ea4628e9489eb50099a5432174e80c)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 .../spark/ml/classification/OneVsRest.scala   | 39 +++++++++++++++++--
 .../ml/classification/OneVsRestSuite.scala    | 10 +++++
 python/pyspark/ml/classification.py           | 27 ++++++++++---
 python/pyspark/ml/tests.py                    | 14 +++++++
 4 files changed, 81 insertions(+), 9 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
index 7cbcccf2720a3..05b8c3ab5456e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
@@ -34,6 +34,7 @@ import org.apache.spark.ml._
 import org.apache.spark.ml.attribute._
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.param.{Param, ParamMap, ParamPair, Params}
+import org.apache.spark.ml.param.shared.HasWeightCol
 import org.apache.spark.ml.util._
 import org.apache.spark.sql.{DataFrame, Dataset, Row}
 import org.apache.spark.sql.functions._
@@ -53,7 +54,8 @@ private[ml] trait ClassifierTypeTrait {
 /**
  * Params for [[OneVsRest]].
  */
-private[ml] trait OneVsRestParams extends PredictorParams with ClassifierTypeTrait {
+private[ml] trait OneVsRestParams extends PredictorParams
+  with ClassifierTypeTrait with HasWeightCol {
 
   /**
    * param for the base binary classifier that we reduce multiclass classification into.
@@ -294,6 +296,18 @@ final class OneVsRest @Since("1.4.0") (
   @Since("1.5.0")
   def setPredictionCol(value: String): this.type = set(predictionCol, value)
 
+  /**
+   * Sets the value of param [[weightCol]].
+   *
+   * This is ignored if weight is not supported by [[classifier]].
+   * If this is not set or empty, we treat all instance weights as 1.0.
+   * Default is not set, so all instances have weight one.
+   *
+   * @group setParam
+   */
+  @Since("2.3.0")
+  def setWeightCol(value: String): this.type = set(weightCol, value)
+
   @Since("1.4.0")
   override def transformSchema(schema: StructType): StructType = {
     validateAndTransformSchema(schema, fitting = true, getClassifier.featuresDataType)
@@ -317,7 +331,20 @@ final class OneVsRest @Since("1.4.0") (
     val numClasses = MetadataUtils.getNumClasses(labelSchema).fold(computeNumClasses())(identity)
     instr.logNumClasses(numClasses)
 
-    val multiclassLabeled = dataset.select($(labelCol), $(featuresCol))
+    val weightColIsUsed = isDefined(weightCol) && $(weightCol).nonEmpty && {
+      getClassifier match {
+        case _: HasWeightCol => true
+        case c =>
+          logWarning(s"weightCol is ignored, as it is not supported by $c now.")
+          false
+      }
+    }
+
+    val multiclassLabeled = if (weightColIsUsed) {
+      dataset.select($(labelCol), $(featuresCol), $(weightCol))
+    } else {
+      dataset.select($(labelCol), $(featuresCol))
+    }
 
     // persist if underlying dataset is not persistent.
     val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE
@@ -337,7 +364,13 @@ final class OneVsRest @Since("1.4.0") (
       paramMap.put(classifier.labelCol -> labelColName)
       paramMap.put(classifier.featuresCol -> getFeaturesCol)
       paramMap.put(classifier.predictionCol -> getPredictionCol)
-      classifier.fit(trainingDataset, paramMap)
+      if (weightColIsUsed) {
+        val classifier_ = classifier.asInstanceOf[ClassifierType with HasWeightCol]
+        paramMap.put(classifier_.weightCol -> getWeightCol)
+        classifier_.fit(trainingDataset, paramMap)
+      } else {
+        classifier.fit(trainingDataset, paramMap)
+      }
     }.toArray[ClassificationModel[_, _]]
     instr.logNumFeatures(models.head.numFeatures)
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
index c02e38ad64e3e..17f82827b74e6 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
@@ -156,6 +156,16 @@ class OneVsRestSuite extends SparkFunSuite with MLlibTestSparkContext with Defau
     assert(output.schema.fieldNames.toSet === Set("label", "features", "prediction"))
   }
 
+  test("SPARK-21306: OneVsRest should support setWeightCol") {
+    val dataset2 = dataset.withColumn("weight", lit(1))
+    // classifier inherits hasWeightCol
+    val ova = new OneVsRest().setWeightCol("weight").setClassifier(new LogisticRegression())
+    assert(ova.fit(dataset2) !== null)
+    // classifier doesn't inherit hasWeightCol
+    val ova2 = new OneVsRest().setWeightCol("weight").setClassifier(new DecisionTreeClassifier())
+    assert(ova2.fit(dataset2) !== null)
+  }
+
   test("OneVsRest.copy and OneVsRestModel.copy") {
     val lr = new LogisticRegression()
       .setMaxIter(1)
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 9b345ac73f3d9..80bb054a00456 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -1461,7 +1461,7 @@ def weights(self):
         return self._call_java("weights")
 
 
-class OneVsRestParams(HasFeaturesCol, HasLabelCol, HasPredictionCol):
+class OneVsRestParams(HasFeaturesCol, HasLabelCol, HasWeightCol, HasPredictionCol):
     """
     Parameters for OneVsRest and OneVsRestModel.
     """
@@ -1531,10 +1531,10 @@ class OneVsRest(Estimator, OneVsRestParams, MLReadable, MLWritable):
 
     @keyword_only
     def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
-                 classifier=None):
+                 classifier=None, weightCol=None):
         """
         __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
-                 classifier=None)
+                 classifier=None, weightCol=None)
         """
         super(OneVsRest, self).__init__()
         kwargs = self._input_kwargs
@@ -1542,9 +1542,11 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
 
     @keyword_only
     @since("2.0.0")
-    def setParams(self, featuresCol=None, labelCol=None, predictionCol=None, classifier=None):
+    def setParams(self, featuresCol=None, labelCol=None, predictionCol=None,
+                  classifier=None, weightCol=None):
         """
-        setParams(self, featuresCol=None, labelCol=None, predictionCol=None, classifier=None):
+        setParams(self, featuresCol=None, labelCol=None, predictionCol=None, \
+                  classifier=None, weightCol=None):
         Sets params for OneVsRest.
         """
         kwargs = self._input_kwargs
@@ -1560,7 +1562,18 @@ def _fit(self, dataset):
 
         numClasses = int(dataset.agg({labelCol: "max"}).head()["max("+labelCol+")"]) + 1
 
-        multiclassLabeled = dataset.select(labelCol, featuresCol)
+        weightCol = None
+        if (self.isDefined(self.weightCol) and self.getWeightCol()):
+            if isinstance(classifier, HasWeightCol):
+                weightCol = self.getWeightCol()
+            else:
+                warnings.warn("weightCol is ignored, "
+                              "as it is not supported by {} now.".format(classifier))
+
+        if weightCol:
+            multiclassLabeled = dataset.select(labelCol, featuresCol, weightCol)
+        else:
+            multiclassLabeled = dataset.select(labelCol, featuresCol)
 
         # persist if underlying dataset is not persistent.
         handlePersistence = \
@@ -1576,6 +1589,8 @@ def trainSingleClass(index):
             paramMap = dict([(classifier.labelCol, binaryLabelCol),
                             (classifier.featuresCol, featuresCol),
                             (classifier.predictionCol, predictionCol)])
+            if weightCol:
+                paramMap[classifier.weightCol] = weightCol
             return classifier.fit(trainingDataset, paramMap)
 
         # TODO: Parallel training for all classes.
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 0daf29d59cb74..27caa3cbe8e9b 100755
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -1221,6 +1221,20 @@ def test_output_columns(self):
         output = model.transform(df)
         self.assertEqual(output.columns, ["label", "features", "prediction"])
 
+    def test_support_for_weightCol(self):
+        df = self.spark.createDataFrame([(0.0, Vectors.dense(1.0, 0.8), 1.0),
+                                         (1.0, Vectors.sparse(2, [], []), 1.0),
+                                         (2.0, Vectors.dense(0.5, 0.5), 1.0)],
+                                        ["label", "features", "weight"])
+        # classifier inherits hasWeightCol
+        lr = LogisticRegression(maxIter=5, regParam=0.01)
+        ovr = OneVsRest(classifier=lr, weightCol="weight")
+        self.assertIsNotNone(ovr.fit(df))
+        # classifier doesn't inherit hasWeightCol
+        dt = DecisionTreeClassifier()
+        ovr2 = OneVsRest(classifier=dt, weightCol="weight")
+        self.assertIsNotNone(ovr2.fit(df))
+
 
 class HashingTFTest(SparkSessionTestCase):
 

From df6cd35ecb710b99911f39b9d7d16cac08468b4d Mon Sep 17 00:00:00 2001
From: Remis Haroon <Remis.Haroon@insdc01.pwc.com>
Date: Sat, 29 Jul 2017 13:26:10 +0100
Subject: [PATCH 1115/1204] [SPARK-21508][DOC] Fix example code provided in
 Spark Streaming Documentation

## What changes were proposed in this pull request?

JIRA ticket : [SPARK-21508](https://issues.apache.org/jira/projects/SPARK/issues/SPARK-21508)

correcting a mistake in example code provided in Spark Streaming Custom Receivers Documentation
The example code provided in the documentation on 'Spark Streaming Custom Receivers' has an error.
doc link : https://spark.apache.org/docs/latest/streaming-custom-receivers.html

```

// Assuming ssc is the StreamingContext
val customReceiverStream = ssc.receiverStream(new CustomReceiver(host, port))
val words = lines.flatMap(_.split(" "))
...
```

instead of `lines.flatMap(_.split(" "))`
it should be `customReceiverStream.flatMap(_.split(" "))`

## How was this patch tested?
this documentation change is tested manually by jekyll build , running below commands
```
jekyll build
jekyll serve --watch
```
screen-shots provided below
![screenshot1](https://user-images.githubusercontent.com/8828470/28744636-a6de1ac6-7482-11e7-843b-ff84b5855ec0.png)
![screenshot2](https://user-images.githubusercontent.com/8828470/28744637-a6def496-7482-11e7-9512-7f4bbe027c6a.png)

Author: Remis Haroon <Remis.Haroon@insdc01.pwc.com>

Closes #18770 from remisharoon/master.

(cherry picked from commit c14382030b373177cf6aa3c045e27d754368a927)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/streaming-custom-receivers.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/streaming-custom-receivers.md b/docs/streaming-custom-receivers.md
index d4ddcb16bdd0e..44ae52e81cd64 100644
--- a/docs/streaming-custom-receivers.md
+++ b/docs/streaming-custom-receivers.md
@@ -175,7 +175,7 @@ an input DStream using data received by the instance of custom receiver, as show
 {% highlight scala %}
 // Assuming ssc is the StreamingContext
 val customReceiverStream = ssc.receiverStream(new CustomReceiver(host, port))
-val words = lines.flatMap(_.split(" "))
+val words = customReceiverStream.flatMap(_.split(" "))
 ...
 {% endhighlight %}
 
@@ -187,7 +187,7 @@ The full source code is in the example [CustomReceiver.scala]({{site.SPARK_GITHU
 {% highlight java %}
 // Assuming ssc is the JavaStreamingContext
 JavaDStream<String> customReceiverStream = ssc.receiverStream(new JavaCustomReceiver(host, port));
-JavaDStream<String> words = lines.flatMap(s -> ...);
+JavaDStream<String> words = customReceiverStream.flatMap(s -> ...);
 ...
 {% endhighlight %}
 

From 24a9bace131465bf6a177f304cf8f05b0e4fe6ed Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Sat, 29 Jul 2017 10:02:56 -0700
Subject: [PATCH 1116/1204] [SPARK-21555][SQL] RuntimeReplaceable should be
 compared semantically by its canonicalized child

## What changes were proposed in this pull request?

When there are aliases (these aliases were added for nested fields) as parameters in `RuntimeReplaceable`, as they are not in the children expression, those aliases can't be cleaned up in analyzer rule `CleanupAliases`.

An expression `nvl(foo.foo1, "value")` can be resolved to two semantically different expressions in a group by query because they contain different aliases.

Because those aliases are not children of `RuntimeReplaceable` which is an `UnaryExpression`. So we can't trim the aliases out by simple transforming the expressions in `CleanupAliases`.

If we want to replace the non-children aliases in `RuntimeReplaceable`, we need to add more codes to `RuntimeReplaceable` and modify all expressions of `RuntimeReplaceable`. It makes the interface ugly IMO.

Consider those aliases will be replaced later at optimization and so they're no harm, this patch chooses to simply override `canonicalized` of `RuntimeReplaceable`.

One concern is about `CleanupAliases`. Because it actually cannot clean up ALL aliases inside a plan. To make caller of this rule notice that, this patch adds a comment to `CleanupAliases`.

## How was this patch tested?

Added test.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #18761 from viirya/SPARK-21555.

(cherry picked from commit 9c8109ef414c92553335bb1e90e9681e142128a4)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../spark/sql/catalyst/analysis/Analyzer.scala |  4 +++-
 .../sql/catalyst/expressions/Expression.scala  |  4 ++++
 .../inputs/sql-compatibility-functions.sql     |  4 ++++
 .../sql-compatibility-functions.sql.out        | 18 +++++++++++++++++-
 4 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 3e416b3bb2725..50c82f5d8f2ed 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -2409,7 +2409,9 @@ object EliminateUnions extends Rule[LogicalPlan] {
 /**
  * Cleans up unnecessary Aliases inside the plan. Basically we only need Alias as a top level
  * expression in Project(project list) or Aggregate(aggregate expressions) or
- * Window(window expressions).
+ * Window(window expressions). Notice that if an expression has other expression parameters which
+ * are not in its `children`, e.g. `RuntimeReplaceable`, the transformation for Aliases in this
+ * rule can't work for those parameters.
  */
 object CleanupAliases extends Rule[LogicalPlan] {
   private def trimAliases(e: Expression): Expression = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index b847ef7bfaa97..74c4cddf2b47e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -241,6 +241,10 @@ trait RuntimeReplaceable extends UnaryExpression with Unevaluable {
   override def nullable: Boolean = child.nullable
   override def foldable: Boolean = child.foldable
   override def dataType: DataType = child.dataType
+  // As this expression gets replaced at optimization with its `child" expression,
+  // two `RuntimeReplaceable` are considered to be semantically equal if their "child" expressions
+  // are semantically equal.
+  override lazy val canonicalized: Expression = child.canonicalized
 }
 
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/sql-compatibility-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/sql-compatibility-functions.sql
index 2b5b692d29ef4..f1461032065ad 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/sql-compatibility-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/sql-compatibility-functions.sql
@@ -23,3 +23,7 @@ SELECT float(1), double(1), decimal(1);
 SELECT date("2014-04-04"), timestamp(date("2014-04-04"));
 -- error handling: only one argument
 SELECT string(1, 2);
+
+-- SPARK-21555: RuntimeReplaceable used in group by
+CREATE TEMPORARY VIEW tempView1 AS VALUES (1, NAMED_STRUCT('col1', 'gamma', 'col2', 'delta')) AS T(id, st);
+SELECT nvl(st.col1, "value"), count(*) FROM from tempView1 GROUP BY nvl(st.col1, "value");
diff --git a/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out
index 732b11050f461..e035505f15d28 100644
--- a/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 13
+-- Number of queries: 15
 
 
 -- !query 0
@@ -122,3 +122,19 @@ struct<>
 -- !query 12 output
 org.apache.spark.sql.AnalysisException
 Function string accepts only one argument; line 1 pos 7
+
+
+-- !query 13
+CREATE TEMPORARY VIEW tempView1 AS VALUES (1, NAMED_STRUCT('col1', 'gamma', 'col2', 'delta')) AS T(id, st)
+-- !query 13 schema
+struct<>
+-- !query 13 output
+
+
+
+-- !query 14
+SELECT nvl(st.col1, "value"), count(*) FROM from tempView1 GROUP BY nvl(st.col1, "value")
+-- !query 14 schema
+struct<nvl(tempview1.`st`.`col1` AS `col1`, 'value'):string,FROM:bigint>
+-- !query 14 output
+gamma	1

From 66fa6bd6d48b08625ecedfcb5a976678141300bd Mon Sep 17 00:00:00 2001
From: Xingbo Jiang <xingbo.jiang@databricks.com>
Date: Sat, 29 Jul 2017 10:11:31 -0700
Subject: [PATCH 1117/1204] [SPARK-19451][SQL] rangeBetween method should
 accept Long value as boundary

## What changes were proposed in this pull request?

Long values can be passed to `rangeBetween` as range frame boundaries, but we silently convert it to Int values, this can cause wrong results and we should fix this.

Further more, we should accept any legal literal values as range frame boundaries. In this PR, we make it possible for Long values, and make accepting other DataTypes really easy to add.

This PR is mostly based on Herman's previous amazing work: https://github.com/hvanhovell/spark/commit/596f53c339b1b4629f5651070e56a8836a397768

After this been merged, we can close #16818 .

## How was this patch tested?

Add new tests in `DataFrameWindowFunctionsSuite` and `TypeCoercionSuite`.

Author: Xingbo Jiang <xingbo.jiang@databricks.com>

Closes #18540 from jiangxb1987/rangeFrame.

(cherry picked from commit 92d85637e7f382aae61c0f26eb1524d2b4c93516)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../sql/catalyst/analysis/CheckAnalysis.scala |  15 +-
 .../sql/catalyst/analysis/TypeCoercion.scala  |  23 ++
 .../sql/catalyst/expressions/package.scala    |   7 +
 .../expressions/windowExpressions.scala       | 328 +++++++++---------
 .../sql/catalyst/parser/AstBuilder.scala      |  20 +-
 .../spark/sql/catalyst/trees/TreeNode.scala   |   2 -
 .../analysis/AnalysisErrorSuite.scala         |   2 +-
 .../catalyst/analysis/TypeCoercionSuite.scala |  36 ++
 .../parser/ExpressionParserSuite.scala        |  17 +-
 .../sql/catalyst/parser/PlanParserSuite.scala |   2 +-
 .../sql/catalyst/trees/TreeNodeSuite.scala    |  27 +-
 .../sql/execution/window/WindowExec.scala     | 103 +++---
 .../spark/sql/expressions/WindowSpec.scala    |  33 +-
 .../resources/sql-tests/inputs/window.sql     |  24 +-
 .../sql-tests/results/window.sql.out          | 146 ++++++--
 .../sql/DataFrameWindowFunctionsSuite.scala   |  42 +++
 .../catalyst/ExpressionSQLBuilderSuite.scala  |  10 +-
 17 files changed, 533 insertions(+), 304 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 2e3ac3e474866..2e300c538ebd2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -106,11 +106,9 @@ trait CheckAnalysis extends PredicateHelper {
           case w @ WindowExpression(AggregateExpression(_, _, true, _), _) =>
             failAnalysis(s"Distinct window functions are not supported: $w")
 
-          case w @ WindowExpression(_: OffsetWindowFunction, WindowSpecDefinition(_, order,
-               SpecifiedWindowFrame(frame,
-                 FrameBoundary(l),
-                 FrameBoundary(h))))
-             if order.isEmpty || frame != RowFrame || l != h =>
+          case w @ WindowExpression(_: OffsetWindowFunction,
+            WindowSpecDefinition(_, order, frame: SpecifiedWindowFrame))
+             if order.isEmpty || !frame.isOffset =>
             failAnalysis("An offset window function can only be evaluated in an ordered " +
               s"row-based window frame with a single offset: $w")
 
@@ -119,15 +117,10 @@ trait CheckAnalysis extends PredicateHelper {
             // function.
             e match {
               case _: AggregateExpression | _: OffsetWindowFunction | _: AggregateWindowFunction =>
+                w
               case _ =>
                 failAnalysis(s"Expression '$e' not supported within a window function.")
             }
-            // Make sure the window specification is valid.
-            s.validate match {
-              case Some(m) =>
-                failAnalysis(s"Window specification $s is not valid because $m")
-              case None => w
-            }
 
           case s @ ScalarSubquery(query, conditions, _) =>
             checkAnalysis(query)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index e1dd010d37a95..a2fcad9b2f5c6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -58,6 +58,7 @@ object TypeCoercion {
       PropagateTypes ::
       ImplicitTypeCasts ::
       DateTimeOperations ::
+      WindowFrameCoercion ::
       Nil
 
   // See https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types.
@@ -785,4 +786,26 @@ object TypeCoercion {
       Option(ret)
     }
   }
+
+  /**
+   * Cast WindowFrame boundaries to the type they operate upon.
+   */
+  object WindowFrameCoercion extends Rule[LogicalPlan] {
+    def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions {
+      case s @ WindowSpecDefinition(_, Seq(order), SpecifiedWindowFrame(RangeFrame, lower, upper))
+          if order.resolved =>
+        s.copy(frameSpecification = SpecifiedWindowFrame(
+          RangeFrame,
+          createBoundaryCast(lower, order.dataType),
+          createBoundaryCast(upper, order.dataType)))
+    }
+
+    private def createBoundaryCast(boundary: Expression, dt: DataType): Expression = {
+      boundary match {
+        case e: SpecialFrameBoundary => e
+        case e: Expression if e.dataType != dt && Cast.canCast(e.dataType, dt) => Cast(e, dt)
+        case _ => boundary
+      }
+    }
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
index 4c8b177237d23..1a48995358af7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
@@ -74,6 +74,13 @@ package object expressions  {
     def initialize(partitionIndex: Int): Unit = {}
   }
 
+  /**
+   * An identity projection. This returns the input row.
+   */
+  object IdentityProjection extends Projection {
+    override def apply(row: InternalRow): InternalRow = row
+  }
+
   /**
    * Converts a [[InternalRow]] to another Row given a sequence of expression that define each
    * column of the new row. If the schema of the input row is specified, then the given expression
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index 37190429fc423..62c4832827c4d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.catalyst.expressions
 
 import java.util.Locale
 
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, UnresolvedException}
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
 import org.apache.spark.sql.catalyst.expressions.aggregate.{DeclarativeAggregate, NoOp}
@@ -43,34 +42,7 @@ case class WindowSpecDefinition(
     orderSpec: Seq[SortOrder],
     frameSpecification: WindowFrame) extends Expression with WindowSpec with Unevaluable {
 
-  def validate: Option[String] = frameSpecification match {
-    case UnspecifiedFrame =>
-      Some("Found a UnspecifiedFrame. It should be converted to a SpecifiedWindowFrame " +
-        "during analysis. Please file a bug report.")
-    case frame: SpecifiedWindowFrame => frame.validate.orElse {
-      def checkValueBasedBoundaryForRangeFrame(): Option[String] = {
-        if (orderSpec.length > 1)  {
-          // It is not allowed to have a value-based PRECEDING and FOLLOWING
-          // as the boundary of a Range Window Frame.
-          Some("This Range Window Frame only accepts at most one ORDER BY expression.")
-        } else if (orderSpec.nonEmpty && !orderSpec.head.dataType.isInstanceOf[NumericType]) {
-          Some("The data type of the expression in the ORDER BY clause should be a numeric type.")
-        } else {
-          None
-        }
-      }
-
-      (frame.frameType, frame.frameStart, frame.frameEnd) match {
-        case (RangeFrame, vp: ValuePreceding, _) => checkValueBasedBoundaryForRangeFrame()
-        case (RangeFrame, vf: ValueFollowing, _) => checkValueBasedBoundaryForRangeFrame()
-        case (RangeFrame, _, vp: ValuePreceding) => checkValueBasedBoundaryForRangeFrame()
-        case (RangeFrame, _, vf: ValueFollowing) => checkValueBasedBoundaryForRangeFrame()
-        case (_, _, _) => None
-      }
-    }
-  }
-
-  override def children: Seq[Expression] = partitionSpec ++ orderSpec
+  override def children: Seq[Expression] = partitionSpec ++ orderSpec :+ frameSpecification
 
   override lazy val resolved: Boolean =
     childrenResolved && checkInputDataTypes().isSuccess &&
@@ -78,23 +50,46 @@ case class WindowSpecDefinition(
 
   override def nullable: Boolean = true
   override def foldable: Boolean = false
-  override def dataType: DataType = throw new UnsupportedOperationException
+  override def dataType: DataType = throw new UnsupportedOperationException("dataType")
 
-  override def sql: String = {
-    val partition = if (partitionSpec.isEmpty) {
-      ""
-    } else {
-      "PARTITION BY " + partitionSpec.map(_.sql).mkString(", ") + " "
+  override def checkInputDataTypes(): TypeCheckResult = {
+    frameSpecification match {
+      case UnspecifiedFrame =>
+        TypeCheckFailure(
+          "Cannot use an UnspecifiedFrame. This should have been converted during analysis. " +
+            "Please file a bug report.")
+      case f: SpecifiedWindowFrame if f.frameType == RangeFrame && !f.isUnbounded &&
+          orderSpec.isEmpty =>
+        TypeCheckFailure(
+          "A range window frame cannot be used in an unordered window specification.")
+      case f: SpecifiedWindowFrame if f.frameType == RangeFrame && f.isValueBound &&
+          orderSpec.size > 1 =>
+        TypeCheckFailure(
+          s"A range window frame with value boundaries cannot be used in a window specification " +
+            s"with multiple order by expressions: ${orderSpec.mkString(",")}")
+      case f: SpecifiedWindowFrame if f.frameType == RangeFrame && f.isValueBound &&
+          !isValidFrameType(f.valueBoundary.head.dataType) =>
+        TypeCheckFailure(
+          s"The data type '${orderSpec.head.dataType}' used in the order specification does " +
+            s"not match the data type '${f.valueBoundary.head.dataType}' which is used in the " +
+            "range frame.")
+      case _ => TypeCheckSuccess
     }
+  }
 
-    val order = if (orderSpec.isEmpty) {
-      ""
-    } else {
-      "ORDER BY " + orderSpec.map(_.sql).mkString(", ") + " "
+  override def sql: String = {
+    def toSql(exprs: Seq[Expression], prefix: String): Seq[String] = {
+      Seq(exprs).filter(_.nonEmpty).map(_.map(_.sql).mkString(prefix, ", ", ""))
     }
 
-    s"($partition$order${frameSpecification.toString})"
+    val elements =
+      toSql(partitionSpec, "PARTITION BY ") ++
+        toSql(orderSpec, "ORDER BY ") ++
+        Seq(frameSpecification.sql)
+    elements.mkString("(", " ", ")")
   }
+
+  private def isValidFrameType(ft: DataType): Boolean = orderSpec.head.dataType == ft
 }
 
 /**
@@ -106,22 +101,26 @@ case class WindowSpecReference(name: String) extends WindowSpec
 /**
  * The trait used to represent the type of a Window Frame.
  */
-sealed trait FrameType
+sealed trait FrameType {
+  def inputType: AbstractDataType
+  def sql: String
+}
 
 /**
- * RowFrame treats rows in a partition individually. When a [[ValuePreceding]]
- * or a [[ValueFollowing]] is used as its [[FrameBoundary]], the value is considered
- * as a physical offset.
+ * RowFrame treats rows in a partition individually. Values used in a row frame are considered
+ * to be physical offsets.
  * For example, `ROW BETWEEN 1 PRECEDING AND 1 FOLLOWING` represents a 3-row frame,
  * from the row precedes the current row to the row follows the current row.
  */
-case object RowFrame extends FrameType
+case object RowFrame extends FrameType {
+  override def inputType: AbstractDataType = IntegerType
+  override def sql: String = "ROWS"
+}
 
 /**
- * RangeFrame treats rows in a partition as groups of peers.
- * All rows having the same `ORDER BY` ordering are considered as peers.
- * When a [[ValuePreceding]] or a [[ValueFollowing]] is used as its [[FrameBoundary]],
- * the value is considered as a logical offset.
+ * RangeFrame treats rows in a partition as groups of peers. All rows having the same `ORDER BY`
+ * ordering are considered as peers. Values used in a range frame are considered to be logical
+ * offsets.
  * For example, assuming the value of the current row's `ORDER BY` expression `expr` is `v`,
  * `RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING` represents a frame containing rows whose values
  * `expr` are in the range of [v-1, v+1].
@@ -129,138 +128,144 @@ case object RowFrame extends FrameType
  * If `ORDER BY` clause is not defined, all rows in the partition is considered as peers
  * of the current row.
  */
-case object RangeFrame extends FrameType
-
-/**
- * The trait used to represent the type of a Window Frame Boundary.
- */
-sealed trait FrameBoundary {
-  def notFollows(other: FrameBoundary): Boolean
+case object RangeFrame extends FrameType {
+  override def inputType: AbstractDataType = NumericType
+  override def sql: String = "RANGE"
 }
 
 /**
- * Extractor for making working with frame boundaries easier.
+ * The trait used to represent special boundaries used in a window frame.
  */
-object FrameBoundary {
-  def apply(boundary: FrameBoundary): Option[Int] = unapply(boundary)
-  def unapply(boundary: FrameBoundary): Option[Int] = boundary match {
-    case CurrentRow => Some(0)
-    case ValuePreceding(offset) => Some(-offset)
-    case ValueFollowing(offset) => Some(offset)
-    case _ => None
-  }
+sealed trait SpecialFrameBoundary extends Expression with Unevaluable {
+  override def children: Seq[Expression] = Nil
+  override def dataType: DataType = NullType
+  override def foldable: Boolean = false
+  override def nullable: Boolean = false
 }
 
-/** UNBOUNDED PRECEDING boundary. */
-case object UnboundedPreceding extends FrameBoundary {
-  def notFollows(other: FrameBoundary): Boolean = other match {
-    case UnboundedPreceding => true
-    case vp: ValuePreceding => true
-    case CurrentRow => true
-    case vf: ValueFollowing => true
-    case UnboundedFollowing => true
-  }
-
-  override def toString: String = "UNBOUNDED PRECEDING"
+/** UNBOUNDED boundary. */
+case object UnboundedPreceding extends SpecialFrameBoundary {
+  override def sql: String = "UNBOUNDED PRECEDING"
 }
 
-/** <value> PRECEDING boundary. */
-case class ValuePreceding(value: Int) extends FrameBoundary {
-  def notFollows(other: FrameBoundary): Boolean = other match {
-    case UnboundedPreceding => false
-    case ValuePreceding(anotherValue) => value >= anotherValue
-    case CurrentRow => true
-    case vf: ValueFollowing => true
-    case UnboundedFollowing => true
-  }
-
-  override def toString: String = s"$value PRECEDING"
+case object UnboundedFollowing extends SpecialFrameBoundary {
+  override def sql: String = "UNBOUNDED FOLLOWING"
 }
 
 /** CURRENT ROW boundary. */
-case object CurrentRow extends FrameBoundary {
-  def notFollows(other: FrameBoundary): Boolean = other match {
-    case UnboundedPreceding => false
-    case vp: ValuePreceding => false
-    case CurrentRow => true
-    case vf: ValueFollowing => true
-    case UnboundedFollowing => true
-  }
-
-  override def toString: String = "CURRENT ROW"
-}
-
-/** <value> FOLLOWING boundary. */
-case class ValueFollowing(value: Int) extends FrameBoundary {
-  def notFollows(other: FrameBoundary): Boolean = other match {
-    case UnboundedPreceding => false
-    case vp: ValuePreceding => false
-    case CurrentRow => false
-    case ValueFollowing(anotherValue) => value <= anotherValue
-    case UnboundedFollowing => true
-  }
-
-  override def toString: String = s"$value FOLLOWING"
-}
-
-/** UNBOUNDED FOLLOWING boundary. */
-case object UnboundedFollowing extends FrameBoundary {
-  def notFollows(other: FrameBoundary): Boolean = other match {
-    case UnboundedPreceding => false
-    case vp: ValuePreceding => false
-    case CurrentRow => false
-    case vf: ValueFollowing => false
-    case UnboundedFollowing => true
-  }
-
-  override def toString: String = "UNBOUNDED FOLLOWING"
+case object CurrentRow extends SpecialFrameBoundary {
+  override def sql: String = "CURRENT ROW"
 }
 
 /**
  * The trait used to represent the a Window Frame.
  */
-sealed trait WindowFrame
+sealed trait WindowFrame extends Expression with Unevaluable {
+  override def children: Seq[Expression] = Nil
+  override def dataType: DataType = throw new UnsupportedOperationException("dataType")
+  override def foldable: Boolean = false
+  override def nullable: Boolean = false
+}
 
 /** Used as a place holder when a frame specification is not defined.  */
 case object UnspecifiedFrame extends WindowFrame
 
-/** A specified Window Frame. */
+/**
+ * A specified Window Frame. The val lower/uppper can be either a foldable [[Expression]] or a
+ * [[SpecialFrameBoundary]].
+ */
 case class SpecifiedWindowFrame(
     frameType: FrameType,
-    frameStart: FrameBoundary,
-    frameEnd: FrameBoundary) extends WindowFrame {
-
-  /** If this WindowFrame is valid or not. */
-  def validate: Option[String] = (frameType, frameStart, frameEnd) match {
-    case (_, UnboundedFollowing, _) =>
-      Some(s"$UnboundedFollowing is not allowed as the start of a Window Frame.")
-    case (_, _, UnboundedPreceding) =>
-      Some(s"$UnboundedPreceding is not allowed as the end of a Window Frame.")
-    // case (RowFrame, start, end) => ??? RowFrame specific rule
-    // case (RangeFrame, start, end) => ??? RangeFrame specific rule
-    case (_, start, end) =>
-      if (start.notFollows(end)) {
-        None
-      } else {
-        val reason =
-          s"The end of this Window Frame $end is smaller than the start of " +
-          s"this Window Frame $start."
-        Some(reason)
-      }
+    lower: Expression,
+    upper: Expression)
+  extends WindowFrame {
+
+  override def children: Seq[Expression] = lower :: upper :: Nil
+
+  lazy val valueBoundary: Seq[Expression] =
+    children.filterNot(_.isInstanceOf[SpecialFrameBoundary])
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    // Check lower value.
+    val lowerCheck = checkBoundary(lower, "lower")
+    if (lowerCheck.isFailure) {
+      return lowerCheck
+    }
+
+    // Check upper value.
+    val upperCheck = checkBoundary(upper, "upper")
+    if (upperCheck.isFailure) {
+      return upperCheck
+    }
+
+    // Check combination (of expressions).
+    (lower, upper) match {
+      case (l: Expression, u: Expression) if !isValidFrameBoundary(l, u) =>
+        TypeCheckFailure(s"Window frame upper bound '$upper' does not followes the lower bound " +
+          s"'$lower'.")
+      case (l: SpecialFrameBoundary, _) => TypeCheckSuccess
+      case (_, u: SpecialFrameBoundary) => TypeCheckSuccess
+      case (l: Expression, u: Expression) if l.dataType != u.dataType =>
+        TypeCheckFailure(
+          s"Window frame bounds '$lower' and '$upper' do no not have the same data type: " +
+            s"'${l.dataType.catalogString}' <> '${u.dataType.catalogString}'")
+      case (l: Expression, u: Expression) if isGreaterThan(l, u) =>
+        TypeCheckFailure(
+          "The lower bound of a window frame must be less than or equal to the upper bound")
+      case _ => TypeCheckSuccess
+    }
+  }
+
+  override def sql: String = {
+    val lowerSql = boundarySql(lower)
+    val upperSql = boundarySql(upper)
+    s"${frameType.sql} BETWEEN $lowerSql AND $upperSql"
   }
 
-  override def toString: String = frameType match {
-    case RowFrame => s"ROWS BETWEEN $frameStart AND $frameEnd"
-    case RangeFrame => s"RANGE BETWEEN $frameStart AND $frameEnd"
+  def isUnbounded: Boolean = lower == UnboundedPreceding && upper == UnboundedFollowing
+
+  def isValueBound: Boolean = valueBoundary.nonEmpty
+
+  def isOffset: Boolean = (lower, upper) match {
+    case (l: Expression, u: Expression) => frameType == RowFrame && l == u
+    case _ => false
+  }
+
+  private def boundarySql(expr: Expression): String = expr match {
+    case e: SpecialFrameBoundary => e.sql
+    case UnaryMinus(n) => n.sql + " PRECEDING"
+    case e: Expression => e.sql + " FOLLOWING"
+  }
+
+  private def isGreaterThan(l: Expression, r: Expression): Boolean = {
+    GreaterThan(l, r).eval().asInstanceOf[Boolean]
+  }
+
+  private def checkBoundary(b: Expression, location: String): TypeCheckResult = b match {
+    case _: SpecialFrameBoundary => TypeCheckSuccess
+    case e: Expression if !e.foldable =>
+      TypeCheckFailure(s"Window frame $location bound '$e' is not a literal.")
+    case e: Expression if !frameType.inputType.acceptsType(e.dataType) =>
+      TypeCheckFailure(
+        s"The data type of the $location bound '${e.dataType} does not match " +
+          s"the expected data type '${frameType.inputType}'.")
+    case _ => TypeCheckSuccess
+  }
+
+  private def isValidFrameBoundary(l: Expression, u: Expression): Boolean = {
+    (l, u) match {
+      case (UnboundedFollowing, _) => false
+      case (_, UnboundedPreceding) => false
+      case _ => true
+    }
   }
 }
 
 object SpecifiedWindowFrame {
   /**
-   *
    * @param hasOrderSpecification If the window spec has order by expressions.
    * @param acceptWindowFrame If the window function accepts user-specified frame.
-   * @return
+   * @return the default window frame.
    */
   def defaultWindowFrame(
       hasOrderSpecification: Boolean,
@@ -351,20 +356,25 @@ abstract class OffsetWindowFunction
 
   override def nullable: Boolean = default == null || default.nullable || input.nullable
 
-  override lazy val frame = {
-    // This will be triggered by the Analyzer.
-    val offsetValue = offset.eval() match {
-      case o: Int => o
-      case x => throw new AnalysisException(
-        s"Offset expression must be a foldable integer expression: $x")
-    }
+  override lazy val frame: WindowFrame = {
     val boundary = direction match {
-      case Ascending => ValueFollowing(offsetValue)
-      case Descending => ValuePreceding(offsetValue)
+      case Ascending => offset
+      case Descending => UnaryMinus(offset)
     }
     SpecifiedWindowFrame(RowFrame, boundary, boundary)
   }
 
+  override def checkInputDataTypes(): TypeCheckResult = {
+    val check = super.checkInputDataTypes()
+    if (check.isFailure) {
+      check
+    } else if (!offset.foldable) {
+      TypeCheckFailure(s"Offset expression '$offset' must be a literal.")
+    } else {
+      TypeCheckSuccess
+    }
+  }
+
   override def dataType: DataType = input.dataType
 
   override def inputTypes: Seq[AbstractDataType] =
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index d1c9332bee18b..8ea18fa718e51 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1142,32 +1142,26 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
   }
 
   /**
-   * Create or resolve a [[FrameBoundary]]. Simple math expressions are allowed for Value
-   * Preceding/Following boundaries. These expressions must be constant (foldable) and return an
-   * integer value.
+   * Create or resolve a frame boundary expressions.
    */
-  override def visitFrameBound(ctx: FrameBoundContext): FrameBoundary = withOrigin(ctx) {
-    // We currently only allow foldable integers.
-    def value: Int = {
+  override def visitFrameBound(ctx: FrameBoundContext): Expression = withOrigin(ctx) {
+    def value: Expression = {
       val e = expression(ctx.expression)
-      validate(e.resolved && e.foldable && e.dataType == IntegerType,
-        "Frame bound value must be a constant integer.",
-        ctx)
-      e.eval().asInstanceOf[Int]
+      validate(e.resolved && e.foldable, "Frame bound value must be a literal.", ctx)
+      e
     }
 
-    // Create the FrameBoundary
     ctx.boundType.getType match {
       case SqlBaseParser.PRECEDING if ctx.UNBOUNDED != null =>
         UnboundedPreceding
       case SqlBaseParser.PRECEDING =>
-        ValuePreceding(value)
+        UnaryMinus(value)
       case SqlBaseParser.CURRENT =>
         CurrentRow
       case SqlBaseParser.FOLLOWING if ctx.UNBOUNDED != null =>
         UnboundedFollowing
       case SqlBaseParser.FOLLOWING =>
-        ValueFollowing(value)
+        value
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index ae5c513eb040b..6fb5a9976c215 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -688,8 +688,6 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
     case id: FunctionIdentifier => true
     case spec: BucketSpec => true
     case catalog: CatalogTable => true
-    case boundary: FrameBoundary => true
-    case frame: WindowFrame => true
     case partition: Partitioning => true
     case resource: FunctionResource => true
     case broadcast: BroadcastMode => true
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index 5050318d96358..adc50321c3d39 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -191,7 +191,7 @@ class AnalysisErrorSuite extends AnalysisTest {
         WindowSpecDefinition(
           UnresolvedAttribute("a") :: Nil,
           SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
-          SpecifiedWindowFrame(RangeFrame, ValueFollowing(1), ValueFollowing(2)))).as('window)),
+          SpecifiedWindowFrame(RangeFrame, Literal(1), Literal(2)))).as('window)),
     "window frame" :: "must match the required frame" :: Nil)
 
   errorTest(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
index 2624f5586fd5d..345ec0fc31301 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
@@ -998,6 +998,42 @@ class TypeCoercionSuite extends PlanTest {
       EqualTo(Literal(Array(1, 2)), Literal("123")),
       EqualTo(Literal(Array(1, 2)), Literal("123")))
   }
+
+  test("cast WindowFrame boundaries to the type they operate upon") {
+    // Can cast frame boundaries to order dataType.
+    ruleTest(WindowFrameCoercion,
+      windowSpec(
+        Seq(UnresolvedAttribute("a")),
+        Seq(SortOrder(Literal(1L), Ascending)),
+        SpecifiedWindowFrame(RangeFrame, Literal(3), Literal(2147483648L))),
+      windowSpec(
+        Seq(UnresolvedAttribute("a")),
+        Seq(SortOrder(Literal(1L), Ascending)),
+        SpecifiedWindowFrame(RangeFrame, Cast(3, LongType), Literal(2147483648L)))
+    )
+    // Cannot cast frame boundaries to order dataType.
+    ruleTest(WindowFrameCoercion,
+      windowSpec(
+        Seq(UnresolvedAttribute("a")),
+        Seq(SortOrder(Literal.default(DateType), Ascending)),
+        SpecifiedWindowFrame(RangeFrame, Literal(10.0), Literal(2147483648L))),
+      windowSpec(
+        Seq(UnresolvedAttribute("a")),
+        Seq(SortOrder(Literal.default(DateType), Ascending)),
+        SpecifiedWindowFrame(RangeFrame, Literal(10.0), Literal(2147483648L)))
+    )
+    // Should not cast SpecialFrameBoundary.
+    ruleTest(WindowFrameCoercion,
+      windowSpec(
+        Seq(UnresolvedAttribute("a")),
+        Seq(SortOrder(Literal(1L), Ascending)),
+        SpecifiedWindowFrame(RangeFrame, CurrentRow, UnboundedFollowing)),
+      windowSpec(
+        Seq(UnresolvedAttribute("a")),
+        Seq(SortOrder(Literal(1L), Ascending)),
+        SpecifiedWindowFrame(RangeFrame, CurrentRow, UnboundedFollowing))
+    )
+  }
 }
 
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index f06219198bb58..e770cc35974e9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -262,16 +262,17 @@ class ExpressionParserSuite extends PlanTest {
     // Range/Row
     val frameTypes = Seq(("rows", RowFrame), ("range", RangeFrame))
     val boundaries = Seq(
-      ("10 preceding", ValuePreceding(10), CurrentRow),
-      ("3 + 1 following", ValueFollowing(4), CurrentRow), // Will fail during analysis
+      ("10 preceding", -Literal(10), CurrentRow),
+      ("2147483648 preceding", -Literal(2147483648L), CurrentRow),
+      ("3 + 1 following", Add(Literal(3), Literal(1)), CurrentRow),
       ("unbounded preceding", UnboundedPreceding, CurrentRow),
       ("unbounded following", UnboundedFollowing, CurrentRow), // Will fail during analysis
       ("between unbounded preceding and current row", UnboundedPreceding, CurrentRow),
       ("between unbounded preceding and unbounded following",
         UnboundedPreceding, UnboundedFollowing),
-      ("between 10 preceding and current row", ValuePreceding(10), CurrentRow),
-      ("between current row and 5 following", CurrentRow, ValueFollowing(5)),
-      ("between 10 preceding and 5 following", ValuePreceding(10), ValueFollowing(5))
+      ("between 10 preceding and current row", -Literal(10), CurrentRow),
+      ("between current row and 5 following", CurrentRow, Literal(5)),
+      ("between 10 preceding and 5 following", -Literal(10), Literal(5))
     )
     frameTypes.foreach {
       case (frameTypeSql, frameType) =>
@@ -283,13 +284,9 @@ class ExpressionParserSuite extends PlanTest {
         }
     }
 
-    // We cannot use non integer constants.
-    intercept("foo(*) over (partition by a order by b rows 10.0 preceding)",
-      "Frame bound value must be a constant integer.")
-
     // We cannot use an arbitrary expression.
     intercept("foo(*) over (partition by a order by b rows exp(b) preceding)",
-      "Frame bound value must be a constant integer.")
+      "Frame bound value must be a literal.")
   }
 
   test("row constructor") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index 950f152b94b4d..1e43654c5c68e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -243,7 +243,7 @@ class PlanParserSuite extends PlanTest {
     val sql = "select * from t"
     val plan = table("t").select(star())
     val spec = WindowSpecDefinition(Seq('a, 'b), Seq('c.asc),
-      SpecifiedWindowFrame(RowFrame, ValuePreceding(1), ValueFollowing(1)))
+      SpecifiedWindowFrame(RowFrame, -Literal(1), Literal(1)))
 
     // Test window resolution.
     val ws1 = Map("w1" -> spec, "w2" -> spec, "w3" -> spec)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
index 06ef7bcee0d84..363c1e21a2957 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
@@ -436,21 +436,22 @@ class TreeNodeSuite extends SparkFunSuite {
         "bucketColumnNames" -> "[bucket]",
         "sortColumnNames" -> "[sort]"))
 
-    // Converts FrameBoundary to JSON
-    assertJSON(
-      ValueFollowing(3),
-      JObject(
-        "product-class" -> classOf[ValueFollowing].getName,
-        "value" -> 3))
-
     // Converts WindowFrame to JSON
     assertJSON(
-      SpecifiedWindowFrame(RowFrame, UnboundedFollowing, CurrentRow),
-      JObject(
-        "product-class" -> classOf[SpecifiedWindowFrame].getName,
-        "frameType" -> JObject("object" -> JString(RowFrame.getClass.getName)),
-        "frameStart" -> JObject("object" -> JString(UnboundedFollowing.getClass.getName)),
-        "frameEnd" -> JObject("object" -> JString(CurrentRow.getClass.getName))))
+      SpecifiedWindowFrame(RowFrame, UnboundedPreceding, CurrentRow),
+      List(
+        JObject(
+          "class" -> classOf[SpecifiedWindowFrame].getName,
+          "num-children" -> 2,
+          "frameType" -> JObject("object" -> JString(RowFrame.getClass.getName)),
+          "lower" -> 0,
+          "upper" -> 1),
+        JObject(
+          "class" -> UnboundedPreceding.getClass.getName,
+          "num-children" -> 0),
+        JObject(
+          "class" -> CurrentRow.getClass.getName,
+          "num-children" -> 0)))
 
     // Converts Partitioning to JSON
     assertJSON(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
index 950a6794a74a3..9efbeeef35ac0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
@@ -26,7 +26,6 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution.{ExternalAppendOnlyUnsafeRowArray, SparkPlan, UnaryExecNode}
-import org.apache.spark.sql.types.IntegerType
 
 /**
  * This class calculates and outputs (windowed) aggregates over the rows in a single (sorted)
@@ -109,46 +108,50 @@ case class WindowExec(
    *
    * This method uses Code Generation. It can only be used on the executor side.
    *
-   * @param frameType to evaluate. This can either be Row or Range based.
-   * @param offset with respect to the row.
+   * @param frame to evaluate. This can either be a Row or Range frame.
+   * @param bound with respect to the row.
    * @return a bound ordering object.
    */
-  private[this] def createBoundOrdering(frameType: FrameType, offset: Int): BoundOrdering = {
-    frameType match {
-      case RangeFrame =>
-        val (exprs, current, bound) = if (offset == 0) {
-          // Use the entire order expression when the offset is 0.
-          val exprs = orderSpec.map(_.child)
-          val buildProjection = () => newMutableProjection(exprs, child.output)
-          (orderSpec, buildProjection(), buildProjection())
-        } else if (orderSpec.size == 1) {
-          // Use only the first order expression when the offset is non-null.
-          val sortExpr = orderSpec.head
-          val expr = sortExpr.child
-          // Create the projection which returns the current 'value'.
-          val current = newMutableProjection(expr :: Nil, child.output)
-          // Flip the sign of the offset when processing the order is descending
-          val boundOffset = sortExpr.direction match {
-            case Descending => -offset
-            case Ascending => offset
-          }
-          // Create the projection which returns the current 'value' modified by adding the offset.
-          val boundExpr = Add(expr, Cast(Literal.create(boundOffset, IntegerType), expr.dataType))
-          val bound = newMutableProjection(boundExpr :: Nil, child.output)
-          (sortExpr :: Nil, current, bound)
-        } else {
-          sys.error("Non-Zero range offsets are not supported for windows " +
-            "with multiple order expressions.")
+  private[this] def createBoundOrdering(frame: FrameType, bound: Expression): BoundOrdering = {
+    (frame, bound) match {
+      case (RowFrame, CurrentRow) =>
+        RowBoundOrdering(0)
+
+      case (RowFrame, IntegerLiteral(offset)) =>
+        RowBoundOrdering(offset)
+
+      case (RangeFrame, CurrentRow) =>
+        val ordering = newOrdering(orderSpec, child.output)
+        RangeBoundOrdering(ordering, IdentityProjection, IdentityProjection)
+
+      case (RangeFrame, offset: Expression) if orderSpec.size == 1 =>
+        // Use only the first order expression when the offset is non-null.
+        val sortExpr = orderSpec.head
+        val expr = sortExpr.child
+
+        // Create the projection which returns the current 'value'.
+        val current = newMutableProjection(expr :: Nil, child.output)
+
+        // Flip the sign of the offset when processing the order is descending
+        val boundOffset = sortExpr.direction match {
+          case Descending => UnaryMinus(offset)
+          case Ascending => offset
         }
+
+        // Create the projection which returns the current 'value' modified by adding the offset.
+        val boundExpr = Add(expr, Cast(boundOffset, expr.dataType))
+        val bound = newMutableProjection(boundExpr :: Nil, child.output)
+
         // Construct the ordering. This is used to compare the result of current value projection
         // to the result of bound value projection. This is done manually because we want to use
         // Code Generation (if it is enabled).
-        val sortExprs = exprs.zipWithIndex.map { case (e, i) =>
-          SortOrder(BoundReference(i, e.dataType, e.nullable), e.direction)
-        }
-        val ordering = newOrdering(sortExprs, Nil)
+        val boundSortExprs = sortExpr.copy(BoundReference(0, expr.dataType, expr.nullable)) :: Nil
+        val ordering = newOrdering(boundSortExprs, Nil)
         RangeBoundOrdering(ordering, current, bound)
-      case RowFrame => RowBoundOrdering(offset)
+
+      case (RangeFrame, _) =>
+        sys.error("Non-Zero range offsets are not supported for windows " +
+          "with multiple order expressions.")
     }
   }
 
@@ -157,13 +160,13 @@ case class WindowExec(
    * WindowExpressions and factory function for the WindowFrameFunction.
    */
   private[this] lazy val windowFrameExpressionFactoryPairs = {
-    type FrameKey = (String, FrameType, Option[Int], Option[Int])
+    type FrameKey = (String, FrameType, Expression, Expression)
     type ExpressionBuffer = mutable.Buffer[Expression]
     val framedFunctions = mutable.Map.empty[FrameKey, (ExpressionBuffer, ExpressionBuffer)]
 
     // Add a function and its function to the map for a given frame.
     def collect(tpe: String, fr: SpecifiedWindowFrame, e: Expression, fn: Expression): Unit = {
-      val key = (tpe, fr.frameType, FrameBoundary(fr.frameStart), FrameBoundary(fr.frameEnd))
+      val key = (tpe, fr.frameType, fr.lower, fr.upper)
       val (es, fns) = framedFunctions.getOrElseUpdate(
         key, (ArrayBuffer.empty[Expression], ArrayBuffer.empty[Expression]))
       es += e
@@ -203,7 +206,7 @@ case class WindowExec(
         // Create the factory
         val factory = key match {
           // Offset Frame
-          case ("OFFSET", RowFrame, Some(offset), Some(h)) if offset == h =>
+          case ("OFFSET", _, IntegerLiteral(offset), _) =>
             target: InternalRow =>
               new OffsetWindowFunctionFrame(
                 target,
@@ -215,38 +218,38 @@ case class WindowExec(
                   newMutableProjection(expressions, schema, subexpressionEliminationEnabled),
                 offset)
 
+          // Entire Partition Frame.
+          case ("AGGREGATE", _, UnboundedPreceding, UnboundedFollowing) =>
+            target: InternalRow => {
+              new UnboundedWindowFunctionFrame(target, processor)
+            }
+
           // Growing Frame.
-          case ("AGGREGATE", frameType, None, Some(high)) =>
+          case ("AGGREGATE", frameType, UnboundedPreceding, upper) =>
             target: InternalRow => {
               new UnboundedPrecedingWindowFunctionFrame(
                 target,
                 processor,
-                createBoundOrdering(frameType, high))
+                createBoundOrdering(frameType, upper))
             }
 
           // Shrinking Frame.
-          case ("AGGREGATE", frameType, Some(low), None) =>
+          case ("AGGREGATE", frameType, lower, UnboundedFollowing) =>
             target: InternalRow => {
               new UnboundedFollowingWindowFunctionFrame(
                 target,
                 processor,
-                createBoundOrdering(frameType, low))
+                createBoundOrdering(frameType, lower))
             }
 
           // Moving Frame.
-          case ("AGGREGATE", frameType, Some(low), Some(high)) =>
+          case ("AGGREGATE", frameType, lower, upper) =>
             target: InternalRow => {
               new SlidingWindowFunctionFrame(
                 target,
                 processor,
-                createBoundOrdering(frameType, low),
-                createBoundOrdering(frameType, high))
-            }
-
-          // Entire Partition Frame.
-          case ("AGGREGATE", frameType, None, None) =>
-            target: InternalRow => {
-              new UnboundedWindowFunctionFrame(target, processor)
+                createBoundOrdering(frameType, lower),
+                createBoundOrdering(frameType, upper))
             }
         }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
index 6279d48c94de5..e6cb889b338c3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.expressions
 
 import org.apache.spark.annotation.InterfaceStability
-import org.apache.spark.sql.Column
+import org.apache.spark.sql.{AnalysisException, Column}
 import org.apache.spark.sql.catalyst.expressions._
 
 /**
@@ -123,7 +123,24 @@ class WindowSpec private[sql](
    */
   // Note: when updating the doc for this method, also update Window.rowsBetween.
   def rowsBetween(start: Long, end: Long): WindowSpec = {
-    between(RowFrame, start, end)
+    val boundaryStart = start match {
+      case 0 => CurrentRow
+      case Long.MinValue => UnboundedPreceding
+      case x if Int.MinValue <= x && x <= Int.MaxValue => Literal(x.toInt)
+      case x => throw new AnalysisException(s"Boundary start is not a valid integer: $x")
+    }
+
+    val boundaryEnd = end match {
+      case 0 => CurrentRow
+      case Long.MaxValue => UnboundedFollowing
+      case x if Int.MinValue <= x && x <= Int.MaxValue => Literal(x.toInt)
+      case x => throw new AnalysisException(s"Boundary end is not a valid integer: $x")
+    }
+
+    new WindowSpec(
+      partitionSpec,
+      orderSpec,
+      SpecifiedWindowFrame(RowFrame, boundaryStart, boundaryEnd))
   }
 
   /**
@@ -174,28 +191,22 @@ class WindowSpec private[sql](
    */
   // Note: when updating the doc for this method, also update Window.rangeBetween.
   def rangeBetween(start: Long, end: Long): WindowSpec = {
-    between(RangeFrame, start, end)
-  }
-
-  private def between(typ: FrameType, start: Long, end: Long): WindowSpec = {
     val boundaryStart = start match {
       case 0 => CurrentRow
       case Long.MinValue => UnboundedPreceding
-      case x if x < 0 => ValuePreceding(-start.toInt)
-      case x if x > 0 => ValueFollowing(start.toInt)
+      case x => Literal(x)
     }
 
     val boundaryEnd = end match {
       case 0 => CurrentRow
       case Long.MaxValue => UnboundedFollowing
-      case x if x < 0 => ValuePreceding(-end.toInt)
-      case x if x > 0 => ValueFollowing(end.toInt)
+      case x => Literal(x)
     }
 
     new WindowSpec(
       partitionSpec,
       orderSpec,
-      SpecifiedWindowFrame(typ, boundaryStart, boundaryEnd))
+      SpecifiedWindowFrame(RangeFrame, boundaryStart, boundaryEnd))
   }
 
   /**
diff --git a/sql/core/src/test/resources/sql-tests/inputs/window.sql b/sql/core/src/test/resources/sql-tests/inputs/window.sql
index c800fc3d49891..342e5719e9a60 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/window.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/window.sql
@@ -1,24 +1,44 @@
 -- Test data.
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
-(null, "a"), (1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"), (null, null), (3, null)
-AS testData(val, cate);
+(null, 1L, "a"), (1, 1L, "a"), (1, 2L, "a"), (2, 2147483650L, "a"), (1, null, "b"), (2, 3L, "b"),
+(3, 2147483650L, "b"), (null, null, null), (3, 1L, null)
+AS testData(val, val_long, cate);
 
 -- RowsBetween
 SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val ROWS CURRENT ROW) FROM testData
 ORDER BY cate, val;
 SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val
 ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
+SELECT val_long, cate, sum(val_long) OVER(PARTITION BY cate ORDER BY val_long
+ROWS BETWEEN CURRENT ROW AND 2147483648 FOLLOWING) FROM testData ORDER BY cate, val_long;
 
 -- RangeBetween
 SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val RANGE 1 PRECEDING) FROM testData
 ORDER BY cate, val;
 SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val
 RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
+SELECT val_long, cate, sum(val_long) OVER(PARTITION BY cate ORDER BY val_long
+RANGE BETWEEN CURRENT ROW AND 2147483648 FOLLOWING) FROM testData ORDER BY cate, val_long;
 
 -- RangeBetween with reverse OrderBy
 SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val DESC
 RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
 
+-- Invalid window frame
+SELECT val, cate, count(val) OVER(PARTITION BY cate
+ROWS BETWEEN UNBOUNDED FOLLOWING AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
+SELECT val, cate, count(val) OVER(PARTITION BY cate
+RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
+SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val, cate
+RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
+SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY current_date
+RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
+SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val
+RANGE BETWEEN 1 FOLLOWING AND 1 PRECEDING) FROM testData ORDER BY cate, val;
+SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val
+RANGE BETWEEN CURRENT ROW AND current_date PRECEDING) FROM testData ORDER BY cate, val;
+
+
 -- Window functions
 SELECT val, cate,
 max(val) OVER w AS max,
diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out
index aa5856138ed81..97511068b323c 100644
--- a/sql/core/src/test/resources/sql-tests/results/window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out
@@ -1,11 +1,12 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 11
+-- Number of queries: 19
 
 
 -- !query 0
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
-(null, "a"), (1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"), (null, null), (3, null)
-AS testData(val, cate)
+(null, 1L, "a"), (1, 1L, "a"), (1, 2L, "a"), (2, 2147483650L, "a"), (1, null, "b"), (2, 3L, "b"),
+(3, 2147483650L, "b"), (null, null, null), (3, 1L, null)
+AS testData(val, val_long, cate)
 -- !query 0 schema
 struct<>
 -- !query 0 output
@@ -47,11 +48,21 @@ NULL	a	1
 
 
 -- !query 3
+SELECT val_long, cate, sum(val_long) OVER(PARTITION BY cate ORDER BY val_long
+ROWS BETWEEN CURRENT ROW AND 2147483648 FOLLOWING) FROM testData ORDER BY cate, val_long
+-- !query 3 schema
+struct<>
+-- !query 3 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'ROWS BETWEEN CURRENT ROW AND 2147483648L FOLLOWING' due to data type mismatch: The data type of the upper bound 'LongType does not match the expected data type 'IntegerType'.; line 1 pos 41
+
+
+-- !query 4
 SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val RANGE 1 PRECEDING) FROM testData
 ORDER BY cate, val
--- !query 3 schema
+-- !query 4 schema
 struct<val:int,cate:string,count(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN 1 PRECEDING AND CURRENT ROW):bigint>
--- !query 3 output
+-- !query 4 output
 NULL	NULL	0
 3	NULL	1
 NULL	a	0
@@ -63,12 +74,12 @@ NULL	a	0
 3	b	2
 
 
--- !query 4
+-- !query 5
 SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val
 RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val
--- !query 4 schema
+-- !query 5 schema
 struct<val:int,cate:string,sum(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING):bigint>
--- !query 4 output
+-- !query 5 output
 NULL	NULL	NULL
 3	NULL	3
 NULL	a	NULL
@@ -80,12 +91,29 @@ NULL	a	NULL
 3	b	3
 
 
--- !query 5
+-- !query 6
+SELECT val_long, cate, sum(val_long) OVER(PARTITION BY cate ORDER BY val_long
+RANGE BETWEEN CURRENT ROW AND 2147483648 FOLLOWING) FROM testData ORDER BY cate, val_long
+-- !query 6 schema
+struct<val_long:bigint,cate:string,sum(val_long) OVER (PARTITION BY cate ORDER BY val_long ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 2147483648 FOLLOWING):bigint>
+-- !query 6 output
+NULL	NULL	NULL
+1	NULL	1
+1	a	4
+1	a	4
+2	a	2147483652
+2147483650	a	2147483650
+NULL	b	NULL
+3	b	2147483653
+2147483650	b	2147483650
+
+
+-- !query 7
 SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val DESC
 RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val
--- !query 5 schema
+-- !query 7 schema
 struct<val:int,cate:string,sum(val) OVER (PARTITION BY cate ORDER BY val DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING):bigint>
--- !query 5 output
+-- !query 7 output
 NULL	NULL	NULL
 3	NULL	3
 NULL	a	NULL
@@ -97,7 +125,73 @@ NULL	a	NULL
 3	b	5
 
 
--- !query 6
+-- !query 8
+SELECT val, cate, count(val) OVER(PARTITION BY cate
+ROWS BETWEEN UNBOUNDED FOLLOWING AND 1 FOLLOWING) FROM testData ORDER BY cate, val
+-- !query 8 schema
+struct<>
+-- !query 8 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'ROWS BETWEEN UNBOUNDED FOLLOWING AND 1 FOLLOWING' due to data type mismatch: Window frame upper bound '1' does not followes the lower bound 'unboundedfollowing$()'.; line 1 pos 33
+
+
+-- !query 9
+SELECT val, cate, count(val) OVER(PARTITION BY cate
+RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val
+-- !query 9 schema
+struct<>
+-- !query 9 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(PARTITION BY testdata.`cate` RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)' due to data type mismatch: A range window frame cannot be used in an unordered window specification.; line 1 pos 33
+
+
+-- !query 10
+SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val, cate
+RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val
+-- !query 10 schema
+struct<>
+-- !query 10 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(PARTITION BY testdata.`cate` ORDER BY testdata.`val` ASC NULLS FIRST, testdata.`cate` ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)' due to data type mismatch: A range window frame with value boundaries cannot be used in a window specification with multiple order by expressions: val#x ASC NULLS FIRST,cate#x ASC NULLS FIRST; line 1 pos 33
+
+
+-- !query 11
+SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY current_date
+RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val
+-- !query 11 schema
+struct<>
+-- !query 11 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(PARTITION BY testdata.`cate` ORDER BY current_date() ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)' due to data type mismatch: The data type 'DateType' used in the order specification does not match the data type 'IntegerType' which is used in the range frame.; line 1 pos 33
+
+
+-- !query 12
+SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val
+RANGE BETWEEN 1 FOLLOWING AND 1 PRECEDING) FROM testData ORDER BY cate, val
+-- !query 12 schema
+struct<>
+-- !query 12 output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'RANGE BETWEEN 1 FOLLOWING AND 1 PRECEDING' due to data type mismatch: The lower bound of a window frame must be less than or equal to the upper bound; line 1 pos 33
+
+
+-- !query 13
+SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val
+RANGE BETWEEN CURRENT ROW AND current_date PRECEDING) FROM testData ORDER BY cate, val
+-- !query 13 schema
+struct<>
+-- !query 13 output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Frame bound value must be a literal.(line 2, pos 30)
+
+== SQL ==
+SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val
+RANGE BETWEEN CURRENT ROW AND current_date PRECEDING) FROM testData ORDER BY cate, val
+------------------------------^^^
+
+
+-- !query 14
 SELECT val, cate,
 max(val) OVER w AS max,
 min(val) OVER w AS min,
@@ -124,9 +218,9 @@ approx_count_distinct(val) OVER w AS approx_count_distinct
 FROM testData
 WINDOW w AS (PARTITION BY cate ORDER BY val)
 ORDER BY cate, val
--- !query 6 schema
+-- !query 14 schema
 struct<val:int,cate:string,max:int,min:int,min:int,count:bigint,sum:bigint,avg:double,stddev:double,first_value:int,first_value_ignore_null:int,first_value_contain_null:int,last_value:int,last_value_ignore_null:int,last_value_contain_null:int,rank:int,dense_rank:int,cume_dist:double,percent_rank:double,ntile:int,row_number:int,var_pop:double,var_samp:double,approx_count_distinct:bigint>
--- !query 6 output
+-- !query 14 output
 NULL	NULL	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.5	0.0	1	1	NULL	NULL	0
 3	NULL	3	3	3	1	3	3.0	NaN	NULL	3	NULL	3	3	3	2	2	1.0	1.0	2	2	0.0	NaN	1
 NULL	a	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.25	0.0	1	1	NULL	NULL	0
@@ -138,11 +232,11 @@ NULL	a	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.25	0.
 3	b	3	1	1	3	6	2.0	1.0	1	1	1	3	3	3	3	3	1.0	1.0	2	3	0.6666666666666666	1.0	3
 
 
--- !query 7
+-- !query 15
 SELECT val, cate, avg(null) OVER(PARTITION BY cate ORDER BY val) FROM testData ORDER BY cate, val
--- !query 7 schema
+-- !query 15 schema
 struct<val:int,cate:string,avg(CAST(NULL AS DOUBLE)) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double>
--- !query 7 output
+-- !query 15 output
 NULL	NULL	NULL
 3	NULL	NULL
 NULL	a	NULL
@@ -154,20 +248,20 @@ NULL	a	NULL
 3	b	NULL
 
 
--- !query 8
+-- !query 16
 SELECT val, cate, row_number() OVER(PARTITION BY cate) FROM testData ORDER BY cate, val
--- !query 8 schema
+-- !query 16 schema
 struct<>
--- !query 8 output
+-- !query 16 output
 org.apache.spark.sql.AnalysisException
 Window function row_number() requires window to be ordered, please add ORDER BY clause. For example SELECT row_number()(value_expr) OVER (PARTITION BY window_partition ORDER BY window_ordering) from table;
 
 
--- !query 9
+-- !query 17
 SELECT val, cate, sum(val) OVER(), avg(val) OVER() FROM testData ORDER BY cate, val
--- !query 9 schema
+-- !query 17 schema
 struct<val:int,cate:string,sum(CAST(val AS BIGINT)) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint,avg(CAST(val AS BIGINT)) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double>
--- !query 9 output
+-- !query 17 output
 NULL	NULL	13	1.8571428571428572
 3	NULL	13	1.8571428571428572
 NULL	a	13	1.8571428571428572
@@ -179,7 +273,7 @@ NULL	a	13	1.8571428571428572
 3	b	13	1.8571428571428572
 
 
--- !query 10
+-- !query 18
 SELECT val, cate,
 first_value(false) OVER w AS first_value,
 first_value(true, true) OVER w AS first_value_ignore_null,
@@ -190,9 +284,9 @@ last_value(false, false) OVER w AS last_value_contain_null
 FROM testData
 WINDOW w AS ()
 ORDER BY cate, val
--- !query 10 schema
+-- !query 18 schema
 struct<val:int,cate:string,first_value:boolean,first_value_ignore_null:boolean,first_value_contain_null:boolean,last_value:boolean,last_value_ignore_null:boolean,last_value_contain_null:boolean>
--- !query 10 output
+-- !query 18 output
 NULL	NULL	false	true	false	false	true	false
 3	NULL	false	true	false	false	true	false
 NULL	a	false	true	false	false	true	false
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
index 204858fa29787..9806e57f08744 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
@@ -151,6 +151,48 @@ class DataFrameWindowFunctionsSuite extends QueryTest with SharedSQLContext {
         Row(2.0d), Row(2.0d)))
   }
 
+  test("row between should accept integer values as boundary") {
+    val df = Seq((1L, "1"), (1L, "1"), (2147483650L, "1"),
+      (3L, "2"), (2L, "1"), (2147483650L, "2"))
+      .toDF("key", "value")
+    df.createOrReplaceTempView("window_table")
+    checkAnswer(
+      df.select(
+        $"key",
+        count("key").over(
+          Window.partitionBy($"value").orderBy($"key").rowsBetween(0, 2147483647))),
+      Seq(Row(1, 3), Row(1, 4), Row(2, 2), Row(3, 2), Row(2147483650L, 1), Row(2147483650L, 1))
+    )
+
+    val e = intercept[AnalysisException](
+      df.select(
+        $"key",
+        count("key").over(
+          Window.partitionBy($"value").orderBy($"key").rowsBetween(0, 2147483648L))))
+    assert(e.message.contains("Boundary end is not a valid integer: 2147483648"))
+  }
+
+  test("range between should accept integer/long values as boundary") {
+    val df = Seq((1L, "1"), (1L, "1"), (2147483650L, "1"),
+      (3L, "2"), (2L, "1"), (2147483650L, "2"))
+      .toDF("key", "value")
+    df.createOrReplaceTempView("window_table")
+    checkAnswer(
+      df.select(
+        $"key",
+        count("key").over(
+          Window.partitionBy($"value").orderBy($"key").rangeBetween(0, 2147483648L))),
+      Seq(Row(1, 3), Row(1, 3), Row(2, 2), Row(3, 2), Row(2147483650L, 1), Row(2147483650L, 1))
+    )
+    checkAnswer(
+      df.select(
+        $"key",
+        count("key").over(
+          Window.partitionBy($"value").orderBy($"key").rangeBetween(-2147483649L, 0))),
+      Seq(Row(1, 2), Row(1, 2), Row(2, 3), Row(2147483650L, 2), Row(2147483650L, 4), Row(3, 1))
+    )
+  }
+
   test("aggregation and rows between with unbounded") {
     val df = Seq((1, "1"), (2, "2"), (2, "3"), (1, "3"), (3, "2"), (4, "3")).toDF("key", "value")
     df.createOrReplaceTempView("window_table")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionSQLBuilderSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionSQLBuilderSuite.scala
index 149ce1e195111..90f90599d5bf4 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionSQLBuilderSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionSQLBuilderSuite.scala
@@ -98,27 +98,27 @@ class ExpressionSQLBuilderSuite extends SQLBuilderTest {
 
     checkSQL(
       WindowSpecDefinition('a.int :: Nil, Nil, frame),
-      s"(PARTITION BY `a` $frame)"
+      s"(PARTITION BY `a` ${frame.sql})"
     )
 
     checkSQL(
       WindowSpecDefinition('a.int :: 'b.string :: Nil, Nil, frame),
-      s"(PARTITION BY `a`, `b` $frame)"
+      s"(PARTITION BY `a`, `b` ${frame.sql})"
     )
 
     checkSQL(
       WindowSpecDefinition(Nil, 'a.int.asc :: Nil, frame),
-      s"(ORDER BY `a` ASC NULLS FIRST $frame)"
+      s"(ORDER BY `a` ASC NULLS FIRST ${frame.sql})"
     )
 
     checkSQL(
       WindowSpecDefinition(Nil, 'a.int.asc :: 'b.string.desc :: Nil, frame),
-      s"(ORDER BY `a` ASC NULLS FIRST, `b` DESC NULLS LAST $frame)"
+      s"(ORDER BY `a` ASC NULLS FIRST, `b` DESC NULLS LAST ${frame.sql})"
     )
 
     checkSQL(
       WindowSpecDefinition('a.int :: 'b.string :: Nil, 'c.int.asc :: 'd.string.desc :: Nil, frame),
-      s"(PARTITION BY `a`, `b` ORDER BY `c` ASC NULLS FIRST, `d` DESC NULLS LAST $frame)"
+      s"(PARTITION BY `a`, `b` ORDER BY `c` ASC NULLS FIRST, `d` DESC NULLS LAST ${frame.sql})"
     )
   }
 

From e2062b9c1106433799d2874dfe17e181fe1ecb5e Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Sat, 29 Jul 2017 20:35:22 -0700
Subject: [PATCH 1118/1204] Revert "[SPARK-19451][SQL] rangeBetween method
 should accept Long value as boundary"

This reverts commit 66fa6bd6d48b08625ecedfcb5a976678141300bd.
---
 .../sql/catalyst/analysis/CheckAnalysis.scala |  15 +-
 .../sql/catalyst/analysis/TypeCoercion.scala  |  23 --
 .../sql/catalyst/expressions/package.scala    |   7 -
 .../expressions/windowExpressions.scala       | 328 +++++++++---------
 .../sql/catalyst/parser/AstBuilder.scala      |  20 +-
 .../spark/sql/catalyst/trees/TreeNode.scala   |   2 +
 .../analysis/AnalysisErrorSuite.scala         |   2 +-
 .../catalyst/analysis/TypeCoercionSuite.scala |  36 --
 .../parser/ExpressionParserSuite.scala        |  17 +-
 .../sql/catalyst/parser/PlanParserSuite.scala |   2 +-
 .../sql/catalyst/trees/TreeNodeSuite.scala    |  27 +-
 .../sql/execution/window/WindowExec.scala     | 103 +++---
 .../spark/sql/expressions/WindowSpec.scala    |  33 +-
 .../resources/sql-tests/inputs/window.sql     |  24 +-
 .../sql-tests/results/window.sql.out          | 146 ++------
 .../sql/DataFrameWindowFunctionsSuite.scala   |  42 ---
 .../catalyst/ExpressionSQLBuilderSuite.scala  |  10 +-
 17 files changed, 304 insertions(+), 533 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 2e300c538ebd2..2e3ac3e474866 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -106,9 +106,11 @@ trait CheckAnalysis extends PredicateHelper {
           case w @ WindowExpression(AggregateExpression(_, _, true, _), _) =>
             failAnalysis(s"Distinct window functions are not supported: $w")
 
-          case w @ WindowExpression(_: OffsetWindowFunction,
-            WindowSpecDefinition(_, order, frame: SpecifiedWindowFrame))
-             if order.isEmpty || !frame.isOffset =>
+          case w @ WindowExpression(_: OffsetWindowFunction, WindowSpecDefinition(_, order,
+               SpecifiedWindowFrame(frame,
+                 FrameBoundary(l),
+                 FrameBoundary(h))))
+             if order.isEmpty || frame != RowFrame || l != h =>
             failAnalysis("An offset window function can only be evaluated in an ordered " +
               s"row-based window frame with a single offset: $w")
 
@@ -117,10 +119,15 @@ trait CheckAnalysis extends PredicateHelper {
             // function.
             e match {
               case _: AggregateExpression | _: OffsetWindowFunction | _: AggregateWindowFunction =>
-                w
               case _ =>
                 failAnalysis(s"Expression '$e' not supported within a window function.")
             }
+            // Make sure the window specification is valid.
+            s.validate match {
+              case Some(m) =>
+                failAnalysis(s"Window specification $s is not valid because $m")
+              case None => w
+            }
 
           case s @ ScalarSubquery(query, conditions, _) =>
             checkAnalysis(query)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index a2fcad9b2f5c6..e1dd010d37a95 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -58,7 +58,6 @@ object TypeCoercion {
       PropagateTypes ::
       ImplicitTypeCasts ::
       DateTimeOperations ::
-      WindowFrameCoercion ::
       Nil
 
   // See https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types.
@@ -786,26 +785,4 @@ object TypeCoercion {
       Option(ret)
     }
   }
-
-  /**
-   * Cast WindowFrame boundaries to the type they operate upon.
-   */
-  object WindowFrameCoercion extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions {
-      case s @ WindowSpecDefinition(_, Seq(order), SpecifiedWindowFrame(RangeFrame, lower, upper))
-          if order.resolved =>
-        s.copy(frameSpecification = SpecifiedWindowFrame(
-          RangeFrame,
-          createBoundaryCast(lower, order.dataType),
-          createBoundaryCast(upper, order.dataType)))
-    }
-
-    private def createBoundaryCast(boundary: Expression, dt: DataType): Expression = {
-      boundary match {
-        case e: SpecialFrameBoundary => e
-        case e: Expression if e.dataType != dt && Cast.canCast(e.dataType, dt) => Cast(e, dt)
-        case _ => boundary
-      }
-    }
-  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
index 1a48995358af7..4c8b177237d23 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
@@ -74,13 +74,6 @@ package object expressions  {
     def initialize(partitionIndex: Int): Unit = {}
   }
 
-  /**
-   * An identity projection. This returns the input row.
-   */
-  object IdentityProjection extends Projection {
-    override def apply(row: InternalRow): InternalRow = row
-  }
-
   /**
    * Converts a [[InternalRow]] to another Row given a sequence of expression that define each
    * column of the new row. If the schema of the input row is specified, then the given expression
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index 62c4832827c4d..37190429fc423 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import java.util.Locale
 
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, UnresolvedException}
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
 import org.apache.spark.sql.catalyst.expressions.aggregate.{DeclarativeAggregate, NoOp}
@@ -42,7 +43,34 @@ case class WindowSpecDefinition(
     orderSpec: Seq[SortOrder],
     frameSpecification: WindowFrame) extends Expression with WindowSpec with Unevaluable {
 
-  override def children: Seq[Expression] = partitionSpec ++ orderSpec :+ frameSpecification
+  def validate: Option[String] = frameSpecification match {
+    case UnspecifiedFrame =>
+      Some("Found a UnspecifiedFrame. It should be converted to a SpecifiedWindowFrame " +
+        "during analysis. Please file a bug report.")
+    case frame: SpecifiedWindowFrame => frame.validate.orElse {
+      def checkValueBasedBoundaryForRangeFrame(): Option[String] = {
+        if (orderSpec.length > 1)  {
+          // It is not allowed to have a value-based PRECEDING and FOLLOWING
+          // as the boundary of a Range Window Frame.
+          Some("This Range Window Frame only accepts at most one ORDER BY expression.")
+        } else if (orderSpec.nonEmpty && !orderSpec.head.dataType.isInstanceOf[NumericType]) {
+          Some("The data type of the expression in the ORDER BY clause should be a numeric type.")
+        } else {
+          None
+        }
+      }
+
+      (frame.frameType, frame.frameStart, frame.frameEnd) match {
+        case (RangeFrame, vp: ValuePreceding, _) => checkValueBasedBoundaryForRangeFrame()
+        case (RangeFrame, vf: ValueFollowing, _) => checkValueBasedBoundaryForRangeFrame()
+        case (RangeFrame, _, vp: ValuePreceding) => checkValueBasedBoundaryForRangeFrame()
+        case (RangeFrame, _, vf: ValueFollowing) => checkValueBasedBoundaryForRangeFrame()
+        case (_, _, _) => None
+      }
+    }
+  }
+
+  override def children: Seq[Expression] = partitionSpec ++ orderSpec
 
   override lazy val resolved: Boolean =
     childrenResolved && checkInputDataTypes().isSuccess &&
@@ -50,46 +78,23 @@ case class WindowSpecDefinition(
 
   override def nullable: Boolean = true
   override def foldable: Boolean = false
-  override def dataType: DataType = throw new UnsupportedOperationException("dataType")
+  override def dataType: DataType = throw new UnsupportedOperationException
 
-  override def checkInputDataTypes(): TypeCheckResult = {
-    frameSpecification match {
-      case UnspecifiedFrame =>
-        TypeCheckFailure(
-          "Cannot use an UnspecifiedFrame. This should have been converted during analysis. " +
-            "Please file a bug report.")
-      case f: SpecifiedWindowFrame if f.frameType == RangeFrame && !f.isUnbounded &&
-          orderSpec.isEmpty =>
-        TypeCheckFailure(
-          "A range window frame cannot be used in an unordered window specification.")
-      case f: SpecifiedWindowFrame if f.frameType == RangeFrame && f.isValueBound &&
-          orderSpec.size > 1 =>
-        TypeCheckFailure(
-          s"A range window frame with value boundaries cannot be used in a window specification " +
-            s"with multiple order by expressions: ${orderSpec.mkString(",")}")
-      case f: SpecifiedWindowFrame if f.frameType == RangeFrame && f.isValueBound &&
-          !isValidFrameType(f.valueBoundary.head.dataType) =>
-        TypeCheckFailure(
-          s"The data type '${orderSpec.head.dataType}' used in the order specification does " +
-            s"not match the data type '${f.valueBoundary.head.dataType}' which is used in the " +
-            "range frame.")
-      case _ => TypeCheckSuccess
+  override def sql: String = {
+    val partition = if (partitionSpec.isEmpty) {
+      ""
+    } else {
+      "PARTITION BY " + partitionSpec.map(_.sql).mkString(", ") + " "
     }
-  }
 
-  override def sql: String = {
-    def toSql(exprs: Seq[Expression], prefix: String): Seq[String] = {
-      Seq(exprs).filter(_.nonEmpty).map(_.map(_.sql).mkString(prefix, ", ", ""))
+    val order = if (orderSpec.isEmpty) {
+      ""
+    } else {
+      "ORDER BY " + orderSpec.map(_.sql).mkString(", ") + " "
     }
 
-    val elements =
-      toSql(partitionSpec, "PARTITION BY ") ++
-        toSql(orderSpec, "ORDER BY ") ++
-        Seq(frameSpecification.sql)
-    elements.mkString("(", " ", ")")
+    s"($partition$order${frameSpecification.toString})"
   }
-
-  private def isValidFrameType(ft: DataType): Boolean = orderSpec.head.dataType == ft
 }
 
 /**
@@ -101,26 +106,22 @@ case class WindowSpecReference(name: String) extends WindowSpec
 /**
  * The trait used to represent the type of a Window Frame.
  */
-sealed trait FrameType {
-  def inputType: AbstractDataType
-  def sql: String
-}
+sealed trait FrameType
 
 /**
- * RowFrame treats rows in a partition individually. Values used in a row frame are considered
- * to be physical offsets.
+ * RowFrame treats rows in a partition individually. When a [[ValuePreceding]]
+ * or a [[ValueFollowing]] is used as its [[FrameBoundary]], the value is considered
+ * as a physical offset.
  * For example, `ROW BETWEEN 1 PRECEDING AND 1 FOLLOWING` represents a 3-row frame,
  * from the row precedes the current row to the row follows the current row.
  */
-case object RowFrame extends FrameType {
-  override def inputType: AbstractDataType = IntegerType
-  override def sql: String = "ROWS"
-}
+case object RowFrame extends FrameType
 
 /**
- * RangeFrame treats rows in a partition as groups of peers. All rows having the same `ORDER BY`
- * ordering are considered as peers. Values used in a range frame are considered to be logical
- * offsets.
+ * RangeFrame treats rows in a partition as groups of peers.
+ * All rows having the same `ORDER BY` ordering are considered as peers.
+ * When a [[ValuePreceding]] or a [[ValueFollowing]] is used as its [[FrameBoundary]],
+ * the value is considered as a logical offset.
  * For example, assuming the value of the current row's `ORDER BY` expression `expr` is `v`,
  * `RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING` represents a frame containing rows whose values
  * `expr` are in the range of [v-1, v+1].
@@ -128,144 +129,138 @@ case object RowFrame extends FrameType {
  * If `ORDER BY` clause is not defined, all rows in the partition is considered as peers
  * of the current row.
  */
-case object RangeFrame extends FrameType {
-  override def inputType: AbstractDataType = NumericType
-  override def sql: String = "RANGE"
-}
+case object RangeFrame extends FrameType
 
 /**
- * The trait used to represent special boundaries used in a window frame.
+ * The trait used to represent the type of a Window Frame Boundary.
  */
-sealed trait SpecialFrameBoundary extends Expression with Unevaluable {
-  override def children: Seq[Expression] = Nil
-  override def dataType: DataType = NullType
-  override def foldable: Boolean = false
-  override def nullable: Boolean = false
-}
-
-/** UNBOUNDED boundary. */
-case object UnboundedPreceding extends SpecialFrameBoundary {
-  override def sql: String = "UNBOUNDED PRECEDING"
-}
-
-case object UnboundedFollowing extends SpecialFrameBoundary {
-  override def sql: String = "UNBOUNDED FOLLOWING"
-}
-
-/** CURRENT ROW boundary. */
-case object CurrentRow extends SpecialFrameBoundary {
-  override def sql: String = "CURRENT ROW"
+sealed trait FrameBoundary {
+  def notFollows(other: FrameBoundary): Boolean
 }
 
 /**
- * The trait used to represent the a Window Frame.
+ * Extractor for making working with frame boundaries easier.
  */
-sealed trait WindowFrame extends Expression with Unevaluable {
-  override def children: Seq[Expression] = Nil
-  override def dataType: DataType = throw new UnsupportedOperationException("dataType")
-  override def foldable: Boolean = false
-  override def nullable: Boolean = false
+object FrameBoundary {
+  def apply(boundary: FrameBoundary): Option[Int] = unapply(boundary)
+  def unapply(boundary: FrameBoundary): Option[Int] = boundary match {
+    case CurrentRow => Some(0)
+    case ValuePreceding(offset) => Some(-offset)
+    case ValueFollowing(offset) => Some(offset)
+    case _ => None
+  }
 }
 
-/** Used as a place holder when a frame specification is not defined.  */
-case object UnspecifiedFrame extends WindowFrame
-
-/**
- * A specified Window Frame. The val lower/uppper can be either a foldable [[Expression]] or a
- * [[SpecialFrameBoundary]].
- */
-case class SpecifiedWindowFrame(
-    frameType: FrameType,
-    lower: Expression,
-    upper: Expression)
-  extends WindowFrame {
-
-  override def children: Seq[Expression] = lower :: upper :: Nil
+/** UNBOUNDED PRECEDING boundary. */
+case object UnboundedPreceding extends FrameBoundary {
+  def notFollows(other: FrameBoundary): Boolean = other match {
+    case UnboundedPreceding => true
+    case vp: ValuePreceding => true
+    case CurrentRow => true
+    case vf: ValueFollowing => true
+    case UnboundedFollowing => true
+  }
 
-  lazy val valueBoundary: Seq[Expression] =
-    children.filterNot(_.isInstanceOf[SpecialFrameBoundary])
+  override def toString: String = "UNBOUNDED PRECEDING"
+}
 
-  override def checkInputDataTypes(): TypeCheckResult = {
-    // Check lower value.
-    val lowerCheck = checkBoundary(lower, "lower")
-    if (lowerCheck.isFailure) {
-      return lowerCheck
-    }
+/** <value> PRECEDING boundary. */
+case class ValuePreceding(value: Int) extends FrameBoundary {
+  def notFollows(other: FrameBoundary): Boolean = other match {
+    case UnboundedPreceding => false
+    case ValuePreceding(anotherValue) => value >= anotherValue
+    case CurrentRow => true
+    case vf: ValueFollowing => true
+    case UnboundedFollowing => true
+  }
 
-    // Check upper value.
-    val upperCheck = checkBoundary(upper, "upper")
-    if (upperCheck.isFailure) {
-      return upperCheck
-    }
+  override def toString: String = s"$value PRECEDING"
+}
 
-    // Check combination (of expressions).
-    (lower, upper) match {
-      case (l: Expression, u: Expression) if !isValidFrameBoundary(l, u) =>
-        TypeCheckFailure(s"Window frame upper bound '$upper' does not followes the lower bound " +
-          s"'$lower'.")
-      case (l: SpecialFrameBoundary, _) => TypeCheckSuccess
-      case (_, u: SpecialFrameBoundary) => TypeCheckSuccess
-      case (l: Expression, u: Expression) if l.dataType != u.dataType =>
-        TypeCheckFailure(
-          s"Window frame bounds '$lower' and '$upper' do no not have the same data type: " +
-            s"'${l.dataType.catalogString}' <> '${u.dataType.catalogString}'")
-      case (l: Expression, u: Expression) if isGreaterThan(l, u) =>
-        TypeCheckFailure(
-          "The lower bound of a window frame must be less than or equal to the upper bound")
-      case _ => TypeCheckSuccess
-    }
+/** CURRENT ROW boundary. */
+case object CurrentRow extends FrameBoundary {
+  def notFollows(other: FrameBoundary): Boolean = other match {
+    case UnboundedPreceding => false
+    case vp: ValuePreceding => false
+    case CurrentRow => true
+    case vf: ValueFollowing => true
+    case UnboundedFollowing => true
   }
 
-  override def sql: String = {
-    val lowerSql = boundarySql(lower)
-    val upperSql = boundarySql(upper)
-    s"${frameType.sql} BETWEEN $lowerSql AND $upperSql"
-  }
+  override def toString: String = "CURRENT ROW"
+}
 
-  def isUnbounded: Boolean = lower == UnboundedPreceding && upper == UnboundedFollowing
+/** <value> FOLLOWING boundary. */
+case class ValueFollowing(value: Int) extends FrameBoundary {
+  def notFollows(other: FrameBoundary): Boolean = other match {
+    case UnboundedPreceding => false
+    case vp: ValuePreceding => false
+    case CurrentRow => false
+    case ValueFollowing(anotherValue) => value <= anotherValue
+    case UnboundedFollowing => true
+  }
 
-  def isValueBound: Boolean = valueBoundary.nonEmpty
+  override def toString: String = s"$value FOLLOWING"
+}
 
-  def isOffset: Boolean = (lower, upper) match {
-    case (l: Expression, u: Expression) => frameType == RowFrame && l == u
-    case _ => false
+/** UNBOUNDED FOLLOWING boundary. */
+case object UnboundedFollowing extends FrameBoundary {
+  def notFollows(other: FrameBoundary): Boolean = other match {
+    case UnboundedPreceding => false
+    case vp: ValuePreceding => false
+    case CurrentRow => false
+    case vf: ValueFollowing => false
+    case UnboundedFollowing => true
   }
 
-  private def boundarySql(expr: Expression): String = expr match {
-    case e: SpecialFrameBoundary => e.sql
-    case UnaryMinus(n) => n.sql + " PRECEDING"
-    case e: Expression => e.sql + " FOLLOWING"
-  }
+  override def toString: String = "UNBOUNDED FOLLOWING"
+}
 
-  private def isGreaterThan(l: Expression, r: Expression): Boolean = {
-    GreaterThan(l, r).eval().asInstanceOf[Boolean]
-  }
+/**
+ * The trait used to represent the a Window Frame.
+ */
+sealed trait WindowFrame
+
+/** Used as a place holder when a frame specification is not defined.  */
+case object UnspecifiedFrame extends WindowFrame
 
-  private def checkBoundary(b: Expression, location: String): TypeCheckResult = b match {
-    case _: SpecialFrameBoundary => TypeCheckSuccess
-    case e: Expression if !e.foldable =>
-      TypeCheckFailure(s"Window frame $location bound '$e' is not a literal.")
-    case e: Expression if !frameType.inputType.acceptsType(e.dataType) =>
-      TypeCheckFailure(
-        s"The data type of the $location bound '${e.dataType} does not match " +
-          s"the expected data type '${frameType.inputType}'.")
-    case _ => TypeCheckSuccess
+/** A specified Window Frame. */
+case class SpecifiedWindowFrame(
+    frameType: FrameType,
+    frameStart: FrameBoundary,
+    frameEnd: FrameBoundary) extends WindowFrame {
+
+  /** If this WindowFrame is valid or not. */
+  def validate: Option[String] = (frameType, frameStart, frameEnd) match {
+    case (_, UnboundedFollowing, _) =>
+      Some(s"$UnboundedFollowing is not allowed as the start of a Window Frame.")
+    case (_, _, UnboundedPreceding) =>
+      Some(s"$UnboundedPreceding is not allowed as the end of a Window Frame.")
+    // case (RowFrame, start, end) => ??? RowFrame specific rule
+    // case (RangeFrame, start, end) => ??? RangeFrame specific rule
+    case (_, start, end) =>
+      if (start.notFollows(end)) {
+        None
+      } else {
+        val reason =
+          s"The end of this Window Frame $end is smaller than the start of " +
+          s"this Window Frame $start."
+        Some(reason)
+      }
   }
 
-  private def isValidFrameBoundary(l: Expression, u: Expression): Boolean = {
-    (l, u) match {
-      case (UnboundedFollowing, _) => false
-      case (_, UnboundedPreceding) => false
-      case _ => true
-    }
+  override def toString: String = frameType match {
+    case RowFrame => s"ROWS BETWEEN $frameStart AND $frameEnd"
+    case RangeFrame => s"RANGE BETWEEN $frameStart AND $frameEnd"
   }
 }
 
 object SpecifiedWindowFrame {
   /**
+   *
    * @param hasOrderSpecification If the window spec has order by expressions.
    * @param acceptWindowFrame If the window function accepts user-specified frame.
-   * @return the default window frame.
+   * @return
    */
   def defaultWindowFrame(
       hasOrderSpecification: Boolean,
@@ -356,25 +351,20 @@ abstract class OffsetWindowFunction
 
   override def nullable: Boolean = default == null || default.nullable || input.nullable
 
-  override lazy val frame: WindowFrame = {
+  override lazy val frame = {
+    // This will be triggered by the Analyzer.
+    val offsetValue = offset.eval() match {
+      case o: Int => o
+      case x => throw new AnalysisException(
+        s"Offset expression must be a foldable integer expression: $x")
+    }
     val boundary = direction match {
-      case Ascending => offset
-      case Descending => UnaryMinus(offset)
+      case Ascending => ValueFollowing(offsetValue)
+      case Descending => ValuePreceding(offsetValue)
     }
     SpecifiedWindowFrame(RowFrame, boundary, boundary)
   }
 
-  override def checkInputDataTypes(): TypeCheckResult = {
-    val check = super.checkInputDataTypes()
-    if (check.isFailure) {
-      check
-    } else if (!offset.foldable) {
-      TypeCheckFailure(s"Offset expression '$offset' must be a literal.")
-    } else {
-      TypeCheckSuccess
-    }
-  }
-
   override def dataType: DataType = input.dataType
 
   override def inputTypes: Seq[AbstractDataType] =
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 8ea18fa718e51..d1c9332bee18b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1142,26 +1142,32 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
   }
 
   /**
-   * Create or resolve a frame boundary expressions.
+   * Create or resolve a [[FrameBoundary]]. Simple math expressions are allowed for Value
+   * Preceding/Following boundaries. These expressions must be constant (foldable) and return an
+   * integer value.
    */
-  override def visitFrameBound(ctx: FrameBoundContext): Expression = withOrigin(ctx) {
-    def value: Expression = {
+  override def visitFrameBound(ctx: FrameBoundContext): FrameBoundary = withOrigin(ctx) {
+    // We currently only allow foldable integers.
+    def value: Int = {
       val e = expression(ctx.expression)
-      validate(e.resolved && e.foldable, "Frame bound value must be a literal.", ctx)
-      e
+      validate(e.resolved && e.foldable && e.dataType == IntegerType,
+        "Frame bound value must be a constant integer.",
+        ctx)
+      e.eval().asInstanceOf[Int]
     }
 
+    // Create the FrameBoundary
     ctx.boundType.getType match {
       case SqlBaseParser.PRECEDING if ctx.UNBOUNDED != null =>
         UnboundedPreceding
       case SqlBaseParser.PRECEDING =>
-        UnaryMinus(value)
+        ValuePreceding(value)
       case SqlBaseParser.CURRENT =>
         CurrentRow
       case SqlBaseParser.FOLLOWING if ctx.UNBOUNDED != null =>
         UnboundedFollowing
       case SqlBaseParser.FOLLOWING =>
-        value
+        ValueFollowing(value)
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 6fb5a9976c215..ae5c513eb040b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -688,6 +688,8 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
     case id: FunctionIdentifier => true
     case spec: BucketSpec => true
     case catalog: CatalogTable => true
+    case boundary: FrameBoundary => true
+    case frame: WindowFrame => true
     case partition: Partitioning => true
     case resource: FunctionResource => true
     case broadcast: BroadcastMode => true
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index adc50321c3d39..5050318d96358 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -191,7 +191,7 @@ class AnalysisErrorSuite extends AnalysisTest {
         WindowSpecDefinition(
           UnresolvedAttribute("a") :: Nil,
           SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
-          SpecifiedWindowFrame(RangeFrame, Literal(1), Literal(2)))).as('window)),
+          SpecifiedWindowFrame(RangeFrame, ValueFollowing(1), ValueFollowing(2)))).as('window)),
     "window frame" :: "must match the required frame" :: Nil)
 
   errorTest(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
index 345ec0fc31301..2624f5586fd5d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
@@ -998,42 +998,6 @@ class TypeCoercionSuite extends PlanTest {
       EqualTo(Literal(Array(1, 2)), Literal("123")),
       EqualTo(Literal(Array(1, 2)), Literal("123")))
   }
-
-  test("cast WindowFrame boundaries to the type they operate upon") {
-    // Can cast frame boundaries to order dataType.
-    ruleTest(WindowFrameCoercion,
-      windowSpec(
-        Seq(UnresolvedAttribute("a")),
-        Seq(SortOrder(Literal(1L), Ascending)),
-        SpecifiedWindowFrame(RangeFrame, Literal(3), Literal(2147483648L))),
-      windowSpec(
-        Seq(UnresolvedAttribute("a")),
-        Seq(SortOrder(Literal(1L), Ascending)),
-        SpecifiedWindowFrame(RangeFrame, Cast(3, LongType), Literal(2147483648L)))
-    )
-    // Cannot cast frame boundaries to order dataType.
-    ruleTest(WindowFrameCoercion,
-      windowSpec(
-        Seq(UnresolvedAttribute("a")),
-        Seq(SortOrder(Literal.default(DateType), Ascending)),
-        SpecifiedWindowFrame(RangeFrame, Literal(10.0), Literal(2147483648L))),
-      windowSpec(
-        Seq(UnresolvedAttribute("a")),
-        Seq(SortOrder(Literal.default(DateType), Ascending)),
-        SpecifiedWindowFrame(RangeFrame, Literal(10.0), Literal(2147483648L)))
-    )
-    // Should not cast SpecialFrameBoundary.
-    ruleTest(WindowFrameCoercion,
-      windowSpec(
-        Seq(UnresolvedAttribute("a")),
-        Seq(SortOrder(Literal(1L), Ascending)),
-        SpecifiedWindowFrame(RangeFrame, CurrentRow, UnboundedFollowing)),
-      windowSpec(
-        Seq(UnresolvedAttribute("a")),
-        Seq(SortOrder(Literal(1L), Ascending)),
-        SpecifiedWindowFrame(RangeFrame, CurrentRow, UnboundedFollowing))
-    )
-  }
 }
 
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index e770cc35974e9..f06219198bb58 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -262,17 +262,16 @@ class ExpressionParserSuite extends PlanTest {
     // Range/Row
     val frameTypes = Seq(("rows", RowFrame), ("range", RangeFrame))
     val boundaries = Seq(
-      ("10 preceding", -Literal(10), CurrentRow),
-      ("2147483648 preceding", -Literal(2147483648L), CurrentRow),
-      ("3 + 1 following", Add(Literal(3), Literal(1)), CurrentRow),
+      ("10 preceding", ValuePreceding(10), CurrentRow),
+      ("3 + 1 following", ValueFollowing(4), CurrentRow), // Will fail during analysis
       ("unbounded preceding", UnboundedPreceding, CurrentRow),
       ("unbounded following", UnboundedFollowing, CurrentRow), // Will fail during analysis
       ("between unbounded preceding and current row", UnboundedPreceding, CurrentRow),
       ("between unbounded preceding and unbounded following",
         UnboundedPreceding, UnboundedFollowing),
-      ("between 10 preceding and current row", -Literal(10), CurrentRow),
-      ("between current row and 5 following", CurrentRow, Literal(5)),
-      ("between 10 preceding and 5 following", -Literal(10), Literal(5))
+      ("between 10 preceding and current row", ValuePreceding(10), CurrentRow),
+      ("between current row and 5 following", CurrentRow, ValueFollowing(5)),
+      ("between 10 preceding and 5 following", ValuePreceding(10), ValueFollowing(5))
     )
     frameTypes.foreach {
       case (frameTypeSql, frameType) =>
@@ -284,9 +283,13 @@ class ExpressionParserSuite extends PlanTest {
         }
     }
 
+    // We cannot use non integer constants.
+    intercept("foo(*) over (partition by a order by b rows 10.0 preceding)",
+      "Frame bound value must be a constant integer.")
+
     // We cannot use an arbitrary expression.
     intercept("foo(*) over (partition by a order by b rows exp(b) preceding)",
-      "Frame bound value must be a literal.")
+      "Frame bound value must be a constant integer.")
   }
 
   test("row constructor") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index 1e43654c5c68e..950f152b94b4d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -243,7 +243,7 @@ class PlanParserSuite extends PlanTest {
     val sql = "select * from t"
     val plan = table("t").select(star())
     val spec = WindowSpecDefinition(Seq('a, 'b), Seq('c.asc),
-      SpecifiedWindowFrame(RowFrame, -Literal(1), Literal(1)))
+      SpecifiedWindowFrame(RowFrame, ValuePreceding(1), ValueFollowing(1)))
 
     // Test window resolution.
     val ws1 = Map("w1" -> spec, "w2" -> spec, "w3" -> spec)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
index 363c1e21a2957..06ef7bcee0d84 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
@@ -436,22 +436,21 @@ class TreeNodeSuite extends SparkFunSuite {
         "bucketColumnNames" -> "[bucket]",
         "sortColumnNames" -> "[sort]"))
 
+    // Converts FrameBoundary to JSON
+    assertJSON(
+      ValueFollowing(3),
+      JObject(
+        "product-class" -> classOf[ValueFollowing].getName,
+        "value" -> 3))
+
     // Converts WindowFrame to JSON
     assertJSON(
-      SpecifiedWindowFrame(RowFrame, UnboundedPreceding, CurrentRow),
-      List(
-        JObject(
-          "class" -> classOf[SpecifiedWindowFrame].getName,
-          "num-children" -> 2,
-          "frameType" -> JObject("object" -> JString(RowFrame.getClass.getName)),
-          "lower" -> 0,
-          "upper" -> 1),
-        JObject(
-          "class" -> UnboundedPreceding.getClass.getName,
-          "num-children" -> 0),
-        JObject(
-          "class" -> CurrentRow.getClass.getName,
-          "num-children" -> 0)))
+      SpecifiedWindowFrame(RowFrame, UnboundedFollowing, CurrentRow),
+      JObject(
+        "product-class" -> classOf[SpecifiedWindowFrame].getName,
+        "frameType" -> JObject("object" -> JString(RowFrame.getClass.getName)),
+        "frameStart" -> JObject("object" -> JString(UnboundedFollowing.getClass.getName)),
+        "frameEnd" -> JObject("object" -> JString(CurrentRow.getClass.getName))))
 
     // Converts Partitioning to JSON
     assertJSON(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
index 9efbeeef35ac0..950a6794a74a3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution.{ExternalAppendOnlyUnsafeRowArray, SparkPlan, UnaryExecNode}
+import org.apache.spark.sql.types.IntegerType
 
 /**
  * This class calculates and outputs (windowed) aggregates over the rows in a single (sorted)
@@ -108,50 +109,46 @@ case class WindowExec(
    *
    * This method uses Code Generation. It can only be used on the executor side.
    *
-   * @param frame to evaluate. This can either be a Row or Range frame.
-   * @param bound with respect to the row.
+   * @param frameType to evaluate. This can either be Row or Range based.
+   * @param offset with respect to the row.
    * @return a bound ordering object.
    */
-  private[this] def createBoundOrdering(frame: FrameType, bound: Expression): BoundOrdering = {
-    (frame, bound) match {
-      case (RowFrame, CurrentRow) =>
-        RowBoundOrdering(0)
-
-      case (RowFrame, IntegerLiteral(offset)) =>
-        RowBoundOrdering(offset)
-
-      case (RangeFrame, CurrentRow) =>
-        val ordering = newOrdering(orderSpec, child.output)
-        RangeBoundOrdering(ordering, IdentityProjection, IdentityProjection)
-
-      case (RangeFrame, offset: Expression) if orderSpec.size == 1 =>
-        // Use only the first order expression when the offset is non-null.
-        val sortExpr = orderSpec.head
-        val expr = sortExpr.child
-
-        // Create the projection which returns the current 'value'.
-        val current = newMutableProjection(expr :: Nil, child.output)
-
-        // Flip the sign of the offset when processing the order is descending
-        val boundOffset = sortExpr.direction match {
-          case Descending => UnaryMinus(offset)
-          case Ascending => offset
+  private[this] def createBoundOrdering(frameType: FrameType, offset: Int): BoundOrdering = {
+    frameType match {
+      case RangeFrame =>
+        val (exprs, current, bound) = if (offset == 0) {
+          // Use the entire order expression when the offset is 0.
+          val exprs = orderSpec.map(_.child)
+          val buildProjection = () => newMutableProjection(exprs, child.output)
+          (orderSpec, buildProjection(), buildProjection())
+        } else if (orderSpec.size == 1) {
+          // Use only the first order expression when the offset is non-null.
+          val sortExpr = orderSpec.head
+          val expr = sortExpr.child
+          // Create the projection which returns the current 'value'.
+          val current = newMutableProjection(expr :: Nil, child.output)
+          // Flip the sign of the offset when processing the order is descending
+          val boundOffset = sortExpr.direction match {
+            case Descending => -offset
+            case Ascending => offset
+          }
+          // Create the projection which returns the current 'value' modified by adding the offset.
+          val boundExpr = Add(expr, Cast(Literal.create(boundOffset, IntegerType), expr.dataType))
+          val bound = newMutableProjection(boundExpr :: Nil, child.output)
+          (sortExpr :: Nil, current, bound)
+        } else {
+          sys.error("Non-Zero range offsets are not supported for windows " +
+            "with multiple order expressions.")
         }
-
-        // Create the projection which returns the current 'value' modified by adding the offset.
-        val boundExpr = Add(expr, Cast(boundOffset, expr.dataType))
-        val bound = newMutableProjection(boundExpr :: Nil, child.output)
-
         // Construct the ordering. This is used to compare the result of current value projection
         // to the result of bound value projection. This is done manually because we want to use
         // Code Generation (if it is enabled).
-        val boundSortExprs = sortExpr.copy(BoundReference(0, expr.dataType, expr.nullable)) :: Nil
-        val ordering = newOrdering(boundSortExprs, Nil)
+        val sortExprs = exprs.zipWithIndex.map { case (e, i) =>
+          SortOrder(BoundReference(i, e.dataType, e.nullable), e.direction)
+        }
+        val ordering = newOrdering(sortExprs, Nil)
         RangeBoundOrdering(ordering, current, bound)
-
-      case (RangeFrame, _) =>
-        sys.error("Non-Zero range offsets are not supported for windows " +
-          "with multiple order expressions.")
+      case RowFrame => RowBoundOrdering(offset)
     }
   }
 
@@ -160,13 +157,13 @@ case class WindowExec(
    * WindowExpressions and factory function for the WindowFrameFunction.
    */
   private[this] lazy val windowFrameExpressionFactoryPairs = {
-    type FrameKey = (String, FrameType, Expression, Expression)
+    type FrameKey = (String, FrameType, Option[Int], Option[Int])
     type ExpressionBuffer = mutable.Buffer[Expression]
     val framedFunctions = mutable.Map.empty[FrameKey, (ExpressionBuffer, ExpressionBuffer)]
 
     // Add a function and its function to the map for a given frame.
     def collect(tpe: String, fr: SpecifiedWindowFrame, e: Expression, fn: Expression): Unit = {
-      val key = (tpe, fr.frameType, fr.lower, fr.upper)
+      val key = (tpe, fr.frameType, FrameBoundary(fr.frameStart), FrameBoundary(fr.frameEnd))
       val (es, fns) = framedFunctions.getOrElseUpdate(
         key, (ArrayBuffer.empty[Expression], ArrayBuffer.empty[Expression]))
       es += e
@@ -206,7 +203,7 @@ case class WindowExec(
         // Create the factory
         val factory = key match {
           // Offset Frame
-          case ("OFFSET", _, IntegerLiteral(offset), _) =>
+          case ("OFFSET", RowFrame, Some(offset), Some(h)) if offset == h =>
             target: InternalRow =>
               new OffsetWindowFunctionFrame(
                 target,
@@ -218,38 +215,38 @@ case class WindowExec(
                   newMutableProjection(expressions, schema, subexpressionEliminationEnabled),
                 offset)
 
-          // Entire Partition Frame.
-          case ("AGGREGATE", _, UnboundedPreceding, UnboundedFollowing) =>
-            target: InternalRow => {
-              new UnboundedWindowFunctionFrame(target, processor)
-            }
-
           // Growing Frame.
-          case ("AGGREGATE", frameType, UnboundedPreceding, upper) =>
+          case ("AGGREGATE", frameType, None, Some(high)) =>
             target: InternalRow => {
               new UnboundedPrecedingWindowFunctionFrame(
                 target,
                 processor,
-                createBoundOrdering(frameType, upper))
+                createBoundOrdering(frameType, high))
             }
 
           // Shrinking Frame.
-          case ("AGGREGATE", frameType, lower, UnboundedFollowing) =>
+          case ("AGGREGATE", frameType, Some(low), None) =>
             target: InternalRow => {
               new UnboundedFollowingWindowFunctionFrame(
                 target,
                 processor,
-                createBoundOrdering(frameType, lower))
+                createBoundOrdering(frameType, low))
             }
 
           // Moving Frame.
-          case ("AGGREGATE", frameType, lower, upper) =>
+          case ("AGGREGATE", frameType, Some(low), Some(high)) =>
             target: InternalRow => {
               new SlidingWindowFunctionFrame(
                 target,
                 processor,
-                createBoundOrdering(frameType, lower),
-                createBoundOrdering(frameType, upper))
+                createBoundOrdering(frameType, low),
+                createBoundOrdering(frameType, high))
+            }
+
+          // Entire Partition Frame.
+          case ("AGGREGATE", frameType, None, None) =>
+            target: InternalRow => {
+              new UnboundedWindowFunctionFrame(target, processor)
             }
         }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
index e6cb889b338c3..6279d48c94de5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.expressions
 
 import org.apache.spark.annotation.InterfaceStability
-import org.apache.spark.sql.{AnalysisException, Column}
+import org.apache.spark.sql.Column
 import org.apache.spark.sql.catalyst.expressions._
 
 /**
@@ -123,24 +123,7 @@ class WindowSpec private[sql](
    */
   // Note: when updating the doc for this method, also update Window.rowsBetween.
   def rowsBetween(start: Long, end: Long): WindowSpec = {
-    val boundaryStart = start match {
-      case 0 => CurrentRow
-      case Long.MinValue => UnboundedPreceding
-      case x if Int.MinValue <= x && x <= Int.MaxValue => Literal(x.toInt)
-      case x => throw new AnalysisException(s"Boundary start is not a valid integer: $x")
-    }
-
-    val boundaryEnd = end match {
-      case 0 => CurrentRow
-      case Long.MaxValue => UnboundedFollowing
-      case x if Int.MinValue <= x && x <= Int.MaxValue => Literal(x.toInt)
-      case x => throw new AnalysisException(s"Boundary end is not a valid integer: $x")
-    }
-
-    new WindowSpec(
-      partitionSpec,
-      orderSpec,
-      SpecifiedWindowFrame(RowFrame, boundaryStart, boundaryEnd))
+    between(RowFrame, start, end)
   }
 
   /**
@@ -191,22 +174,28 @@ class WindowSpec private[sql](
    */
   // Note: when updating the doc for this method, also update Window.rangeBetween.
   def rangeBetween(start: Long, end: Long): WindowSpec = {
+    between(RangeFrame, start, end)
+  }
+
+  private def between(typ: FrameType, start: Long, end: Long): WindowSpec = {
     val boundaryStart = start match {
       case 0 => CurrentRow
       case Long.MinValue => UnboundedPreceding
-      case x => Literal(x)
+      case x if x < 0 => ValuePreceding(-start.toInt)
+      case x if x > 0 => ValueFollowing(start.toInt)
     }
 
     val boundaryEnd = end match {
       case 0 => CurrentRow
       case Long.MaxValue => UnboundedFollowing
-      case x => Literal(x)
+      case x if x < 0 => ValuePreceding(-end.toInt)
+      case x if x > 0 => ValueFollowing(end.toInt)
     }
 
     new WindowSpec(
       partitionSpec,
       orderSpec,
-      SpecifiedWindowFrame(RangeFrame, boundaryStart, boundaryEnd))
+      SpecifiedWindowFrame(typ, boundaryStart, boundaryEnd))
   }
 
   /**
diff --git a/sql/core/src/test/resources/sql-tests/inputs/window.sql b/sql/core/src/test/resources/sql-tests/inputs/window.sql
index 342e5719e9a60..c800fc3d49891 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/window.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/window.sql
@@ -1,44 +1,24 @@
 -- Test data.
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
-(null, 1L, "a"), (1, 1L, "a"), (1, 2L, "a"), (2, 2147483650L, "a"), (1, null, "b"), (2, 3L, "b"),
-(3, 2147483650L, "b"), (null, null, null), (3, 1L, null)
-AS testData(val, val_long, cate);
+(null, "a"), (1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"), (null, null), (3, null)
+AS testData(val, cate);
 
 -- RowsBetween
 SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val ROWS CURRENT ROW) FROM testData
 ORDER BY cate, val;
 SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val
 ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
-SELECT val_long, cate, sum(val_long) OVER(PARTITION BY cate ORDER BY val_long
-ROWS BETWEEN CURRENT ROW AND 2147483648 FOLLOWING) FROM testData ORDER BY cate, val_long;
 
 -- RangeBetween
 SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val RANGE 1 PRECEDING) FROM testData
 ORDER BY cate, val;
 SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val
 RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
-SELECT val_long, cate, sum(val_long) OVER(PARTITION BY cate ORDER BY val_long
-RANGE BETWEEN CURRENT ROW AND 2147483648 FOLLOWING) FROM testData ORDER BY cate, val_long;
 
 -- RangeBetween with reverse OrderBy
 SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val DESC
 RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
 
--- Invalid window frame
-SELECT val, cate, count(val) OVER(PARTITION BY cate
-ROWS BETWEEN UNBOUNDED FOLLOWING AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
-SELECT val, cate, count(val) OVER(PARTITION BY cate
-RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
-SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val, cate
-RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
-SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY current_date
-RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
-SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val
-RANGE BETWEEN 1 FOLLOWING AND 1 PRECEDING) FROM testData ORDER BY cate, val;
-SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val
-RANGE BETWEEN CURRENT ROW AND current_date PRECEDING) FROM testData ORDER BY cate, val;
-
-
 -- Window functions
 SELECT val, cate,
 max(val) OVER w AS max,
diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out
index 97511068b323c..aa5856138ed81 100644
--- a/sql/core/src/test/resources/sql-tests/results/window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out
@@ -1,12 +1,11 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 19
+-- Number of queries: 11
 
 
 -- !query 0
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
-(null, 1L, "a"), (1, 1L, "a"), (1, 2L, "a"), (2, 2147483650L, "a"), (1, null, "b"), (2, 3L, "b"),
-(3, 2147483650L, "b"), (null, null, null), (3, 1L, null)
-AS testData(val, val_long, cate)
+(null, "a"), (1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"), (null, null), (3, null)
+AS testData(val, cate)
 -- !query 0 schema
 struct<>
 -- !query 0 output
@@ -48,21 +47,11 @@ NULL	a	1
 
 
 -- !query 3
-SELECT val_long, cate, sum(val_long) OVER(PARTITION BY cate ORDER BY val_long
-ROWS BETWEEN CURRENT ROW AND 2147483648 FOLLOWING) FROM testData ORDER BY cate, val_long
--- !query 3 schema
-struct<>
--- !query 3 output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'ROWS BETWEEN CURRENT ROW AND 2147483648L FOLLOWING' due to data type mismatch: The data type of the upper bound 'LongType does not match the expected data type 'IntegerType'.; line 1 pos 41
-
-
--- !query 4
 SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val RANGE 1 PRECEDING) FROM testData
 ORDER BY cate, val
--- !query 4 schema
+-- !query 3 schema
 struct<val:int,cate:string,count(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN 1 PRECEDING AND CURRENT ROW):bigint>
--- !query 4 output
+-- !query 3 output
 NULL	NULL	0
 3	NULL	1
 NULL	a	0
@@ -74,12 +63,12 @@ NULL	a	0
 3	b	2
 
 
--- !query 5
+-- !query 4
 SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val
 RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val
--- !query 5 schema
+-- !query 4 schema
 struct<val:int,cate:string,sum(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING):bigint>
--- !query 5 output
+-- !query 4 output
 NULL	NULL	NULL
 3	NULL	3
 NULL	a	NULL
@@ -91,29 +80,12 @@ NULL	a	NULL
 3	b	3
 
 
--- !query 6
-SELECT val_long, cate, sum(val_long) OVER(PARTITION BY cate ORDER BY val_long
-RANGE BETWEEN CURRENT ROW AND 2147483648 FOLLOWING) FROM testData ORDER BY cate, val_long
--- !query 6 schema
-struct<val_long:bigint,cate:string,sum(val_long) OVER (PARTITION BY cate ORDER BY val_long ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 2147483648 FOLLOWING):bigint>
--- !query 6 output
-NULL	NULL	NULL
-1	NULL	1
-1	a	4
-1	a	4
-2	a	2147483652
-2147483650	a	2147483650
-NULL	b	NULL
-3	b	2147483653
-2147483650	b	2147483650
-
-
--- !query 7
+-- !query 5
 SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val DESC
 RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val
--- !query 7 schema
+-- !query 5 schema
 struct<val:int,cate:string,sum(val) OVER (PARTITION BY cate ORDER BY val DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING):bigint>
--- !query 7 output
+-- !query 5 output
 NULL	NULL	NULL
 3	NULL	3
 NULL	a	NULL
@@ -125,73 +97,7 @@ NULL	a	NULL
 3	b	5
 
 
--- !query 8
-SELECT val, cate, count(val) OVER(PARTITION BY cate
-ROWS BETWEEN UNBOUNDED FOLLOWING AND 1 FOLLOWING) FROM testData ORDER BY cate, val
--- !query 8 schema
-struct<>
--- !query 8 output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'ROWS BETWEEN UNBOUNDED FOLLOWING AND 1 FOLLOWING' due to data type mismatch: Window frame upper bound '1' does not followes the lower bound 'unboundedfollowing$()'.; line 1 pos 33
-
-
--- !query 9
-SELECT val, cate, count(val) OVER(PARTITION BY cate
-RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val
--- !query 9 schema
-struct<>
--- !query 9 output
-org.apache.spark.sql.AnalysisException
-cannot resolve '(PARTITION BY testdata.`cate` RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)' due to data type mismatch: A range window frame cannot be used in an unordered window specification.; line 1 pos 33
-
-
--- !query 10
-SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val, cate
-RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val
--- !query 10 schema
-struct<>
--- !query 10 output
-org.apache.spark.sql.AnalysisException
-cannot resolve '(PARTITION BY testdata.`cate` ORDER BY testdata.`val` ASC NULLS FIRST, testdata.`cate` ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)' due to data type mismatch: A range window frame with value boundaries cannot be used in a window specification with multiple order by expressions: val#x ASC NULLS FIRST,cate#x ASC NULLS FIRST; line 1 pos 33
-
-
--- !query 11
-SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY current_date
-RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val
--- !query 11 schema
-struct<>
--- !query 11 output
-org.apache.spark.sql.AnalysisException
-cannot resolve '(PARTITION BY testdata.`cate` ORDER BY current_date() ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)' due to data type mismatch: The data type 'DateType' used in the order specification does not match the data type 'IntegerType' which is used in the range frame.; line 1 pos 33
-
-
--- !query 12
-SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val
-RANGE BETWEEN 1 FOLLOWING AND 1 PRECEDING) FROM testData ORDER BY cate, val
--- !query 12 schema
-struct<>
--- !query 12 output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'RANGE BETWEEN 1 FOLLOWING AND 1 PRECEDING' due to data type mismatch: The lower bound of a window frame must be less than or equal to the upper bound; line 1 pos 33
-
-
--- !query 13
-SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val
-RANGE BETWEEN CURRENT ROW AND current_date PRECEDING) FROM testData ORDER BY cate, val
--- !query 13 schema
-struct<>
--- !query 13 output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-Frame bound value must be a literal.(line 2, pos 30)
-
-== SQL ==
-SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val
-RANGE BETWEEN CURRENT ROW AND current_date PRECEDING) FROM testData ORDER BY cate, val
-------------------------------^^^
-
-
--- !query 14
+-- !query 6
 SELECT val, cate,
 max(val) OVER w AS max,
 min(val) OVER w AS min,
@@ -218,9 +124,9 @@ approx_count_distinct(val) OVER w AS approx_count_distinct
 FROM testData
 WINDOW w AS (PARTITION BY cate ORDER BY val)
 ORDER BY cate, val
--- !query 14 schema
+-- !query 6 schema
 struct<val:int,cate:string,max:int,min:int,min:int,count:bigint,sum:bigint,avg:double,stddev:double,first_value:int,first_value_ignore_null:int,first_value_contain_null:int,last_value:int,last_value_ignore_null:int,last_value_contain_null:int,rank:int,dense_rank:int,cume_dist:double,percent_rank:double,ntile:int,row_number:int,var_pop:double,var_samp:double,approx_count_distinct:bigint>
--- !query 14 output
+-- !query 6 output
 NULL	NULL	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.5	0.0	1	1	NULL	NULL	0
 3	NULL	3	3	3	1	3	3.0	NaN	NULL	3	NULL	3	3	3	2	2	1.0	1.0	2	2	0.0	NaN	1
 NULL	a	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.25	0.0	1	1	NULL	NULL	0
@@ -232,11 +138,11 @@ NULL	a	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.25	0.
 3	b	3	1	1	3	6	2.0	1.0	1	1	1	3	3	3	3	3	1.0	1.0	2	3	0.6666666666666666	1.0	3
 
 
--- !query 15
+-- !query 7
 SELECT val, cate, avg(null) OVER(PARTITION BY cate ORDER BY val) FROM testData ORDER BY cate, val
--- !query 15 schema
+-- !query 7 schema
 struct<val:int,cate:string,avg(CAST(NULL AS DOUBLE)) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double>
--- !query 15 output
+-- !query 7 output
 NULL	NULL	NULL
 3	NULL	NULL
 NULL	a	NULL
@@ -248,20 +154,20 @@ NULL	a	NULL
 3	b	NULL
 
 
--- !query 16
+-- !query 8
 SELECT val, cate, row_number() OVER(PARTITION BY cate) FROM testData ORDER BY cate, val
--- !query 16 schema
+-- !query 8 schema
 struct<>
--- !query 16 output
+-- !query 8 output
 org.apache.spark.sql.AnalysisException
 Window function row_number() requires window to be ordered, please add ORDER BY clause. For example SELECT row_number()(value_expr) OVER (PARTITION BY window_partition ORDER BY window_ordering) from table;
 
 
--- !query 17
+-- !query 9
 SELECT val, cate, sum(val) OVER(), avg(val) OVER() FROM testData ORDER BY cate, val
--- !query 17 schema
+-- !query 9 schema
 struct<val:int,cate:string,sum(CAST(val AS BIGINT)) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint,avg(CAST(val AS BIGINT)) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double>
--- !query 17 output
+-- !query 9 output
 NULL	NULL	13	1.8571428571428572
 3	NULL	13	1.8571428571428572
 NULL	a	13	1.8571428571428572
@@ -273,7 +179,7 @@ NULL	a	13	1.8571428571428572
 3	b	13	1.8571428571428572
 
 
--- !query 18
+-- !query 10
 SELECT val, cate,
 first_value(false) OVER w AS first_value,
 first_value(true, true) OVER w AS first_value_ignore_null,
@@ -284,9 +190,9 @@ last_value(false, false) OVER w AS last_value_contain_null
 FROM testData
 WINDOW w AS ()
 ORDER BY cate, val
--- !query 18 schema
+-- !query 10 schema
 struct<val:int,cate:string,first_value:boolean,first_value_ignore_null:boolean,first_value_contain_null:boolean,last_value:boolean,last_value_ignore_null:boolean,last_value_contain_null:boolean>
--- !query 18 output
+-- !query 10 output
 NULL	NULL	false	true	false	false	true	false
 3	NULL	false	true	false	false	true	false
 NULL	a	false	true	false	false	true	false
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
index 9806e57f08744..204858fa29787 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
@@ -151,48 +151,6 @@ class DataFrameWindowFunctionsSuite extends QueryTest with SharedSQLContext {
         Row(2.0d), Row(2.0d)))
   }
 
-  test("row between should accept integer values as boundary") {
-    val df = Seq((1L, "1"), (1L, "1"), (2147483650L, "1"),
-      (3L, "2"), (2L, "1"), (2147483650L, "2"))
-      .toDF("key", "value")
-    df.createOrReplaceTempView("window_table")
-    checkAnswer(
-      df.select(
-        $"key",
-        count("key").over(
-          Window.partitionBy($"value").orderBy($"key").rowsBetween(0, 2147483647))),
-      Seq(Row(1, 3), Row(1, 4), Row(2, 2), Row(3, 2), Row(2147483650L, 1), Row(2147483650L, 1))
-    )
-
-    val e = intercept[AnalysisException](
-      df.select(
-        $"key",
-        count("key").over(
-          Window.partitionBy($"value").orderBy($"key").rowsBetween(0, 2147483648L))))
-    assert(e.message.contains("Boundary end is not a valid integer: 2147483648"))
-  }
-
-  test("range between should accept integer/long values as boundary") {
-    val df = Seq((1L, "1"), (1L, "1"), (2147483650L, "1"),
-      (3L, "2"), (2L, "1"), (2147483650L, "2"))
-      .toDF("key", "value")
-    df.createOrReplaceTempView("window_table")
-    checkAnswer(
-      df.select(
-        $"key",
-        count("key").over(
-          Window.partitionBy($"value").orderBy($"key").rangeBetween(0, 2147483648L))),
-      Seq(Row(1, 3), Row(1, 3), Row(2, 2), Row(3, 2), Row(2147483650L, 1), Row(2147483650L, 1))
-    )
-    checkAnswer(
-      df.select(
-        $"key",
-        count("key").over(
-          Window.partitionBy($"value").orderBy($"key").rangeBetween(-2147483649L, 0))),
-      Seq(Row(1, 2), Row(1, 2), Row(2, 3), Row(2147483650L, 2), Row(2147483650L, 4), Row(3, 1))
-    )
-  }
-
   test("aggregation and rows between with unbounded") {
     val df = Seq((1, "1"), (2, "2"), (2, "3"), (1, "3"), (3, "2"), (4, "3")).toDF("key", "value")
     df.createOrReplaceTempView("window_table")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionSQLBuilderSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionSQLBuilderSuite.scala
index 90f90599d5bf4..149ce1e195111 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionSQLBuilderSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionSQLBuilderSuite.scala
@@ -98,27 +98,27 @@ class ExpressionSQLBuilderSuite extends SQLBuilderTest {
 
     checkSQL(
       WindowSpecDefinition('a.int :: Nil, Nil, frame),
-      s"(PARTITION BY `a` ${frame.sql})"
+      s"(PARTITION BY `a` $frame)"
     )
 
     checkSQL(
       WindowSpecDefinition('a.int :: 'b.string :: Nil, Nil, frame),
-      s"(PARTITION BY `a`, `b` ${frame.sql})"
+      s"(PARTITION BY `a`, `b` $frame)"
     )
 
     checkSQL(
       WindowSpecDefinition(Nil, 'a.int.asc :: Nil, frame),
-      s"(ORDER BY `a` ASC NULLS FIRST ${frame.sql})"
+      s"(ORDER BY `a` ASC NULLS FIRST $frame)"
     )
 
     checkSQL(
       WindowSpecDefinition(Nil, 'a.int.asc :: 'b.string.desc :: Nil, frame),
-      s"(ORDER BY `a` ASC NULLS FIRST, `b` DESC NULLS LAST ${frame.sql})"
+      s"(ORDER BY `a` ASC NULLS FIRST, `b` DESC NULLS LAST $frame)"
     )
 
     checkSQL(
       WindowSpecDefinition('a.int :: 'b.string :: Nil, 'c.int.asc :: 'd.string.desc :: Nil, frame),
-      s"(PARTITION BY `a`, `b` ORDER BY `c` ASC NULLS FIRST, `d` DESC NULLS LAST ${frame.sql})"
+      s"(PARTITION BY `a`, `b` ORDER BY `c` ASC NULLS FIRST, `d` DESC NULLS LAST $frame)"
     )
   }
 

From 174543466934c6ced5812e2dfc7e1a18793cf0b1 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Tue, 1 Aug 2017 10:06:03 -0700
Subject: [PATCH 1119/1204] [SPARK-21522][CORE] Fix flakiness in
 LauncherServerSuite.

Handle the case where the server closes the socket before the full message
has been written by the client.

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #18727 from vanzin/SPARK-21522.

(cherry picked from commit b133501800b43fa5c538a4e5ad597c9dc7d8378e)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../apache/spark/launcher/LauncherServerSuite.java    | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/launcher/src/test/java/org/apache/spark/launcher/LauncherServerSuite.java b/launcher/src/test/java/org/apache/spark/launcher/LauncherServerSuite.java
index 03c2934e2692e..d0f6abe2fe692 100644
--- a/launcher/src/test/java/org/apache/spark/launcher/LauncherServerSuite.java
+++ b/launcher/src/test/java/org/apache/spark/launcher/LauncherServerSuite.java
@@ -22,6 +22,7 @@
 import java.io.ObjectInputStream;
 import java.net.InetAddress;
 import java.net.Socket;
+import java.net.SocketException;
 import java.util.Arrays;
 import java.util.List;
 import java.util.concurrent.BlockingQueue;
@@ -171,7 +172,15 @@ public void testStreamFiltering() throws Exception {
         LauncherServer.getServerInstance().getPort());
 
       client = new TestClient(s);
-      client.send(new EvilPayload());
+
+      try {
+        client.send(new EvilPayload());
+      } catch (SocketException se) {
+        // SPARK-21522: this can happen if the server closes the socket before the full message has
+        // been written, so it's expected. It may cause false positives though (socket errors
+        // happening for other reasons).
+      }
+
       waitForError(client, handle.getSecret());
       assertEquals(0, EvilPayload.EVIL_BIT);
     } finally {

From 79e5805f9284c53b0c329f086190298b70f012c1 Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Tue, 1 Aug 2017 19:05:55 +0100
Subject: [PATCH 1120/1204] [SPARK-21593][DOCS] Fix 2 rendering errors on
 configuration page

## What changes were proposed in this pull request?

Fix 2 rendering errors on configuration doc page, due to SPARK-21243 and SPARK-15355.

## How was this patch tested?

Manually built and viewed docs with jekyll

Author: Sean Owen <sowen@cloudera.com>

Closes #18793 from srowen/SPARK-21593.

(cherry picked from commit b1d59e60dee2a41f8eff8ef29b3bcac69111e2f0)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/configuration.md | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index 3c9da8151094c..c39c7580fd75a 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -520,15 +520,17 @@ Apart from these, the following properties are also available, and may be useful
   </td>
 </tr>
 <tr>
-    <td><code>spark.reducer.maxBlocksInFlightPerAddress</code></td>
-    <td>Int.MaxValue</td>
-    <td>
-      This configuration limits the number of remote blocks being fetched per reduce task from a
-      given host port. When a large number of blocks are being requested from a given address in a
-      single fetch or simultaneously, this could crash the serving executor or Node Manager. This
-      is especially useful to reduce the load on the Node Manager when external shuffle is enabled.
-      You can mitigate this issue by setting it to a lower value.
-    </td>
+  <td><code>spark.reducer.maxBlocksInFlightPerAddress</code></td>
+  <td>Int.MaxValue</td>
+  <td>
+    This configuration limits the number of remote blocks being fetched per reduce task from a
+    given host port. When a large number of blocks are being requested from a given address in a
+    single fetch or simultaneously, this could crash the serving executor or Node Manager. This
+    is especially useful to reduce the load on the Node Manager when external shuffle is enabled.
+    You can mitigate this issue by setting it to a lower value.
+  </td>
+</tr>
+<tr>
   <td><code>spark.reducer.maxReqSizeShuffleToMem</code></td>
   <td>Long.MaxValue</td>
   <td>
@@ -1040,7 +1042,7 @@ Apart from these, the following properties are also available, and may be useful
   </td>
 </tr>
 <tr>
-  <td><code>spark.storage.replication.proactive<code></td>
+  <td><code>spark.storage.replication.proactive</code></td>
   <td>false</td>
   <td>
     Enables proactive block replication for RDD blocks. Cached RDD block replicas lost due to

From 67c60d78e4c4562fbf86b46d14b7d635aaf67e5b Mon Sep 17 00:00:00 2001
From: Devaraj K <devaraj@apache.org>
Date: Tue, 1 Aug 2017 13:38:55 -0700
Subject: [PATCH 1121/1204] [SPARK-21339][CORE] spark-shell --packages option
 does not add jars to classpath on windows

The --packages option jars are getting added to the classpath with the scheme as "file:///", in Unix it doesn't have problem with this since the scheme contains the Unix Path separator which separates the jar name with location in the classpath. In Windows, the jar file is not getting resolved from the classpath because of the scheme.

Windows : file:///C:/Users/<user>/.ivy2/jars/<jar-name>.jar
Unix : file:///home/<user>/.ivy2/jars/<jar-name>.jar

With this PR, we are avoiding the 'file://' scheme to get added to the packages jar files.

I have verified manually in Windows and Unix environments, with the change it adds the jar to classpath like below,

Windows : C:\Users\<user>\.ivy2\jars\<jar-name>.jar
Unix : /home/<user>/.ivy2/jars/<jar-name>.jar

Author: Devaraj K <devaraj@apache.org>

Closes #18708 from devaraj-kavali/SPARK-21339.

(cherry picked from commit 58da1a2455258156fe8ba57241611eac1a7928ef)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../src/main/scala/org/apache/spark/repl/Main.scala         | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
index b8b38e828b255..9702a1e653c32 100644
--- a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.repl
 
 import java.io.File
+import java.net.URI
 import java.util.Locale
 
 import scala.tools.nsc.GenericRunnerSettings
@@ -56,7 +57,10 @@ object Main extends Logging {
   // Visible for testing
   private[repl] def doMain(args: Array[String], _interp: SparkILoop): Unit = {
     interp = _interp
-    val jars = Utils.getUserJars(conf, isShell = true).mkString(File.pathSeparator)
+    val jars = Utils.getUserJars(conf, isShell = true)
+      // Remove file:///, file:// or file:/ scheme if exists for each jar
+      .map { x => if (x.startsWith("file:")) new File(new URI(x)).getPath else x }
+      .mkString(File.pathSeparator)
     val interpArguments = List(
       "-Yrepl-class-based",
       "-Yrepl-outdir", s"${outputDir.getAbsolutePath}",

From 397f904219e7617386144aba87998a057bde02e3 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 2 Aug 2017 10:59:59 -0700
Subject: [PATCH 1122/1204] [SPARK-21597][SS] Fix a potential overflow issue in
 EventTimeStats

## What changes were proposed in this pull request?

This PR fixed a potential overflow issue in EventTimeStats.

## How was this patch tested?

The new unit tests

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #18803 from zsxwing/avg.

(cherry picked from commit 7f63e85b47a93434030482160e88fe63bf9cff4e)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../streaming/EventTimeWatermarkExec.scala    | 10 ++---
 .../streaming/ProgressReporter.scala          |  2 +-
 .../streaming/EventTimeWatermarkSuite.scala   | 41 ++++++++++++++++++-
 3 files changed, 44 insertions(+), 9 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
index 25cf609fc336e..55e7508b2ed29 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
@@ -27,27 +27,25 @@ import org.apache.spark.unsafe.types.CalendarInterval
 import org.apache.spark.util.AccumulatorV2
 
 /** Class for collecting event time stats with an accumulator */
-case class EventTimeStats(var max: Long, var min: Long, var sum: Long, var count: Long) {
+case class EventTimeStats(var max: Long, var min: Long, var avg: Double, var count: Long) {
   def add(eventTime: Long): Unit = {
     this.max = math.max(this.max, eventTime)
     this.min = math.min(this.min, eventTime)
-    this.sum += eventTime
     this.count += 1
+    this.avg += (eventTime - avg) / count
   }
 
   def merge(that: EventTimeStats): Unit = {
     this.max = math.max(this.max, that.max)
     this.min = math.min(this.min, that.min)
-    this.sum += that.sum
     this.count += that.count
+    this.avg += (that.avg - this.avg) * that.count / this.count
   }
-
-  def avg: Long = sum / count
 }
 
 object EventTimeStats {
   def zero: EventTimeStats = EventTimeStats(
-    max = Long.MinValue, min = Long.MaxValue, sum = 0L, count = 0L)
+    max = Long.MinValue, min = Long.MaxValue, avg = 0.0, count = 0L)
 }
 
 /** Accumulator that collects stats on event time in a batch. */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
index a4e4ca821374c..db46fcd9dfe78 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -267,7 +267,7 @@ trait ProgressReporter extends Logging {
         Map(
           "max" -> stats.max,
           "min" -> stats.min,
-          "avg" -> stats.avg).mapValues(formatTimestamp)
+          "avg" -> stats.avg.toLong).mapValues(formatTimestamp)
     }.headOption.getOrElse(Map.empty) ++ watermarkTimestamp
 
     ExecutionStats(numInputRows, stateOperators, eventTimeStats)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
index 1b60a06ec402f..552911f32ee43 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
@@ -21,7 +21,7 @@ import java.{util => ju}
 import java.text.SimpleDateFormat
 import java.util.Date
 
-import org.scalatest.BeforeAndAfter
+import org.scalatest.{BeforeAndAfter, Matchers}
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
@@ -30,7 +30,7 @@ import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.functions.{count, window}
 import org.apache.spark.sql.streaming.OutputMode._
 
-class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Logging {
+class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matchers with Logging {
 
   import testImplicits._
 
@@ -38,6 +38,43 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Loggin
     sqlContext.streams.active.foreach(_.stop())
   }
 
+  test("EventTimeStats") {
+    val epsilon = 10E-6
+
+    val stats = EventTimeStats(max = 100, min = 10, avg = 20.0, count = 5)
+    stats.add(80L)
+    stats.max should be (100)
+    stats.min should be (10)
+    stats.avg should be (30.0 +- epsilon)
+    stats.count should be (6)
+
+    val stats2 = EventTimeStats(80L, 5L, 15.0, 4)
+    stats.merge(stats2)
+    stats.max should be (100)
+    stats.min should be (5)
+    stats.avg should be (24.0 +- epsilon)
+    stats.count should be (10)
+  }
+
+  test("EventTimeStats: avg on large values") {
+    val epsilon = 10E-6
+    val largeValue = 10000000000L // 10B
+    // Make sure `largeValue` will cause overflow if we use a Long sum to calc avg.
+    assert(largeValue * largeValue != BigInt(largeValue) * BigInt(largeValue))
+    val stats =
+      EventTimeStats(max = largeValue, min = largeValue, avg = largeValue, count = largeValue - 1)
+    stats.add(largeValue)
+    stats.avg should be (largeValue.toDouble +- epsilon)
+
+    val stats2 = EventTimeStats(
+      max = largeValue + 1,
+      min = largeValue,
+      avg = largeValue + 1,
+      count = largeValue)
+    stats.merge(stats2)
+    stats.avg should be ((largeValue + 0.5) +- epsilon)
+  }
+
   test("error on bad column") {
     val inputData = MemoryStream[Int].toDF()
     val e = intercept[AnalysisException] {

From 467ee8dff8494a730ef8c00aafc02266a794a1fe Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Wed, 2 Aug 2017 14:02:13 -0700
Subject: [PATCH 1123/1204] [SPARK-21546][SS] dropDuplicates should ignore
 watermark when it's not a key

## What changes were proposed in this pull request?

When the watermark is not a column of `dropDuplicates`, right now it will crash. This PR fixed this issue.

## How was this patch tested?

The new unit test.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #18822 from zsxwing/SPARK-21546.

(cherry picked from commit 0d26b3aa55f9cc75096b0e2b309f64fe3270b9a5)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../sql/execution/streaming/statefulOperators.scala |  9 +++++++--
 .../spark/sql/streaming/DeduplicateSuite.scala      | 13 +++++++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
index 8dbda298c87bc..d4de046787b9a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
@@ -102,8 +102,13 @@ trait WatermarkSupport extends UnaryExecNode {
   }
 
   /** Predicate based on keys that matches data older than the watermark */
-  lazy val watermarkPredicateForKeys: Option[Predicate] =
-    watermarkExpression.map(newPredicate(_, keyExpressions))
+  lazy val watermarkPredicateForKeys: Option[Predicate] = watermarkExpression.flatMap { e =>
+    if (keyExpressions.exists(_.metadata.contains(EventTimeWatermark.delayKey))) {
+      Some(newPredicate(e, keyExpressions))
+    } else {
+      None
+    }
+  }
 
   /** Predicate based on the child output that matches data older than the watermark. */
   lazy val watermarkPredicateForData: Option[Predicate] =
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/DeduplicateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/DeduplicateSuite.scala
index a15c2cff930fc..e858b7d9998a8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/DeduplicateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/DeduplicateSuite.scala
@@ -268,4 +268,17 @@ class DeduplicateSuite extends StateStoreMetricsTest with BeforeAndAfterAll {
       CheckLastBatch(7)
     )
   }
+
+  test("SPARK-21546: dropDuplicates should ignore watermark when it's not a key") {
+    val input = MemoryStream[(Int, Int)]
+    val df = input.toDS.toDF("id", "time")
+      .withColumn("time", $"time".cast("timestamp"))
+      .withWatermark("time", "1 second")
+      .dropDuplicates("id")
+      .select($"id", $"time".cast("long"))
+    testStream(df)(
+      AddData(input, 1 -> 1, 1 -> 2, 2 -> 2),
+      CheckLastBatch(1 -> 1, 2 -> 2)
+    )
+  }
 }

From 690f491f6e979bc960baa05de1a66306b06dc85a Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Thu, 3 Aug 2017 10:28:19 +0900
Subject: [PATCH 1124/1204] [SPARK-12717][PYTHON][BRANCH-2.2] Adding
 thread-safe broadcast pickle registry

## What changes were proposed in this pull request?

When using PySpark broadcast variables in a multi-threaded environment,  `SparkContext._pickled_broadcast_vars` becomes a shared resource.  A race condition can occur when broadcast variables that are pickled from one thread get added to the shared ` _pickled_broadcast_vars` and become part of the python command from another thread.  This PR introduces a thread-safe pickled registry using thread local storage so that when python command is pickled (causing the broadcast variable to be pickled and added to the registry) each thread will have their own view of the pickle registry to retrieve and clear the broadcast variables used.

## How was this patch tested?

Added a unit test that causes this race condition using another thread.

Author: Bryan Cutler <cutlerb@gmail.com>

Closes #18823 from BryanCutler/branch-2.2.
---
 python/pyspark/broadcast.py | 19 ++++++++++++++++
 python/pyspark/context.py   |  4 ++--
 python/pyspark/tests.py     | 44 +++++++++++++++++++++++++++++++++++++
 3 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py
index b1b59f73d6718..02fc515fb824a 100644
--- a/python/pyspark/broadcast.py
+++ b/python/pyspark/broadcast.py
@@ -19,6 +19,7 @@
 import sys
 import gc
 from tempfile import NamedTemporaryFile
+import threading
 
 from pyspark.cloudpickle import print_exec
 from pyspark.util import _exception_message
@@ -139,6 +140,24 @@ def __reduce__(self):
         return _from_id, (self._jbroadcast.id(),)
 
 
+class BroadcastPickleRegistry(threading.local):
+    """ Thread-local registry for broadcast variables that have been pickled
+    """
+
+    def __init__(self):
+        self.__dict__.setdefault("_registry", set())
+
+    def __iter__(self):
+        for bcast in self._registry:
+            yield bcast
+
+    def add(self, bcast):
+        self._registry.add(bcast)
+
+    def clear(self):
+        self._registry.clear()
+
+
 if __name__ == "__main__":
     import doctest
     (failure_count, test_count) = doctest.testmod()
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 3be07325f4162..49be76ea21952 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -30,7 +30,7 @@
 
 from pyspark import accumulators
 from pyspark.accumulators import Accumulator
-from pyspark.broadcast import Broadcast
+from pyspark.broadcast import Broadcast, BroadcastPickleRegistry
 from pyspark.conf import SparkConf
 from pyspark.files import SparkFiles
 from pyspark.java_gateway import launch_gateway
@@ -198,7 +198,7 @@ def _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize,
         # This allows other code to determine which Broadcast instances have
         # been pickled, so it can determine which Java broadcast objects to
         # send.
-        self._pickled_broadcast_vars = set()
+        self._pickled_broadcast_vars = BroadcastPickleRegistry()
 
         SparkFiles._sc = self
         root_dir = SparkFiles.getRootDirectory()
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index bb13de563cdd4..20a933ea6aa65 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -858,6 +858,50 @@ def test_multiple_broadcasts(self):
         self.assertEqual(N, size)
         self.assertEqual(checksum, csum)
 
+    def test_multithread_broadcast_pickle(self):
+        import threading
+
+        b1 = self.sc.broadcast(list(range(3)))
+        b2 = self.sc.broadcast(list(range(3)))
+
+        def f1():
+            return b1.value
+
+        def f2():
+            return b2.value
+
+        funcs_num_pickled = {f1: None, f2: None}
+
+        def do_pickle(f, sc):
+            command = (f, None, sc.serializer, sc.serializer)
+            ser = CloudPickleSerializer()
+            ser.dumps(command)
+
+        def process_vars(sc):
+            broadcast_vars = list(sc._pickled_broadcast_vars)
+            num_pickled = len(broadcast_vars)
+            sc._pickled_broadcast_vars.clear()
+            return num_pickled
+
+        def run(f, sc):
+            do_pickle(f, sc)
+            funcs_num_pickled[f] = process_vars(sc)
+
+        # pickle f1, adds b1 to sc._pickled_broadcast_vars in main thread local storage
+        do_pickle(f1, self.sc)
+
+        # run all for f2, should only add/count/clear b2 from worker thread local storage
+        t = threading.Thread(target=run, args=(f2, self.sc))
+        t.start()
+        t.join()
+
+        # count number of vars pickled in main thread, only b1 should be counted and cleared
+        funcs_num_pickled[f1] = process_vars(self.sc)
+
+        self.assertEqual(funcs_num_pickled[f1], 1)
+        self.assertEqual(funcs_num_pickled[f2], 1)
+        self.assertEqual(len(list(self.sc._pickled_broadcast_vars)), 0)
+
     def test_large_closure(self):
         N = 200000
         data = [float(i) for i in xrange(N)]

From 1bcfa2a0ccdc1d3c3c5075bc6e2838c69f5b2f7f Mon Sep 17 00:00:00 2001
From: Christiam Camacho <camacho@ncbi.nlm.nih.gov>
Date: Thu, 3 Aug 2017 23:40:25 +0100
Subject: [PATCH 1125/1204] Fix Java SimpleApp spark application

## What changes were proposed in this pull request?

Add missing import and missing parentheses to invoke `SparkSession::text()`.

## How was this patch tested?

Built and the code for this application, ran jekyll locally per docs/README.md.

Author: Christiam Camacho <camacho@ncbi.nlm.nih.gov>

Closes #18795 from christiam/master.

(cherry picked from commit dd72b10aba9997977f82605c5c1778f02dd1f91e)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/quick-start.md                            | 3 ++-
 docs/structured-streaming-programming-guide.md | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/docs/quick-start.md b/docs/quick-start.md
index cb5211af377e5..c4c5a5acb2dee 100644
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -297,12 +297,13 @@ We'll create a very simple Spark application, `SimpleApp.java`:
 {% highlight java %}
 /* SimpleApp.java */
 import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.Dataset;
 
 public class SimpleApp {
   public static void main(String[] args) {
     String logFile = "YOUR_SPARK_HOME/README.md"; // Should be some file on your system
     SparkSession spark = SparkSession.builder().appName("Simple Application").getOrCreate();
-    Dataset<String> logData = spark.read.textFile(logFile).cache();
+    Dataset<String> logData = spark.read().textFile(logFile).cache();
 
     long numAs = logData.filter(s -> s.contains("a")).count();
     long numBs = logData.filter(s -> s.contains("b")).count();
diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index 8f64faadc32dc..8367f5a08c755 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -1041,8 +1041,8 @@ streamingDf.join(staticDf, "type", "right_join")  // right outer join with a sta
 <div data-lang="java"  markdown="1">
 
 {% highlight java %}
-Dataset<Row> staticDf = spark.read. ...;
-Dataset<Row> streamingDf = spark.readStream. ...;
+Dataset<Row> staticDf = spark.read(). ...;
+Dataset<Row> streamingDf = spark.readStream(). ...;
 streamingDf.join(staticDf, "type");         // inner equi-join with a static DF
 streamingDf.join(staticDf, "type", "right_join");  // right outer join with a static DF
 {% endhighlight %}
@@ -1087,7 +1087,7 @@ streamingDf
 <div data-lang="java"  markdown="1">
 
 {% highlight java %}
-Dataset<Row> streamingDf = spark.readStream. ...;  // columns: guid, eventTime, ...
+Dataset<Row> streamingDf = spark.readStream(). ...;  // columns: guid, eventTime, ...
 
 // Without watermark using guid column
 streamingDf.dropDuplicates("guid");

From f9aae8ecde62fc6d92a4807c68d812bac6b207e2 Mon Sep 17 00:00:00 2001
From: Andrew Ray <ray.andrew@gmail.com>
Date: Fri, 4 Aug 2017 08:58:01 +0100
Subject: [PATCH 1126/1204] [SPARK-21330][SQL] Bad partitioning does not allow
 to read a JDBC table with extreme values on the partition column

## What changes were proposed in this pull request?

An overflow of the difference of bounds on the partitioning column leads to no data being read. This
patch checks for this overflow.

## How was this patch tested?

New unit test.

Author: Andrew Ray <ray.andrew@gmail.com>

Closes #18800 from aray/SPARK-21330.

(cherry picked from commit 25826c77ddf0d5753d2501d0e764111da2caa8b6)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../execution/datasources/jdbc/JDBCRelation.scala |  3 ++-
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala     | 15 +++++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
index 8b45dba04d29e..272cb4a82641e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
@@ -64,7 +64,8 @@ private[sql] object JDBCRelation extends Logging {
       s"bound. Lower bound: $lowerBound; Upper bound: $upperBound")
 
     val numPartitions =
-      if ((upperBound - lowerBound) >= partitioning.numPartitions) {
+      if ((upperBound - lowerBound) >= partitioning.numPartitions || /* check for overflow */
+          (upperBound - lowerBound) < 0) {
         partitioning.numPartitions
       } else {
         logWarning("The number of partitions is reduced because the specified number of " +
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index 5bd36ec25ccb0..daec409dae569 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -96,6 +96,15 @@ class JDBCSuite extends SparkFunSuite
         |         partitionColumn 'THEID', lowerBound '1', upperBound '4', numPartitions '3')
        """.stripMargin.replaceAll("\n", " "))
 
+    sql(
+      s"""
+        |CREATE OR REPLACE TEMPORARY VIEW partsoverflow
+        |USING org.apache.spark.sql.jdbc
+        |OPTIONS (url '$url', dbtable 'TEST.PEOPLE', user 'testUser', password 'testPass',
+        |         partitionColumn 'THEID', lowerBound '-9223372036854775808',
+        |         upperBound '9223372036854775807', numPartitions '3')
+       """.stripMargin.replaceAll("\n", " "))
+
     conn.prepareStatement("create table test.inttypes (a INT, b BOOLEAN, c TINYINT, "
       + "d SMALLINT, e BIGINT)").executeUpdate()
     conn.prepareStatement("insert into test.inttypes values (1, false, 3, 4, 1234567890123)"
@@ -367,6 +376,12 @@ class JDBCSuite extends SparkFunSuite
     assert(ids(2) === 3)
   }
 
+  test("overflow of partition bound difference does not give negative stride") {
+    val df = sql("SELECT * FROM partsoverflow")
+    checkNumPartitions(df, expectedNumPartitions = 3)
+    assert(df.collect().length == 3)
+  }
+
   test("Register JDBC query with renamed fields") {
     // Regression test for bug SPARK-7345
     sql(

From 841bc2f86d61769057fca08cebbb72a98bde00dc Mon Sep 17 00:00:00 2001
From: liuxian <liu.xian3@zte.com.cn>
Date: Fri, 4 Aug 2017 22:55:06 -0700
Subject: [PATCH 1127/1204] [SPARK-21580][SQL] Integers in aggregation
 expressions are wrongly taken as group-by ordinal

## What changes were proposed in this pull request?

create temporary view data as select * from values
(1, 1),
(1, 2),
(2, 1),
(2, 2),
(3, 1),
(3, 2)
as data(a, b);

`select 3, 4, sum(b) from data group by 1, 2;`
`select 3 as c, 4 as d, sum(b) from data group by c, d;`
When running these two cases, the following exception occurred:
`Error in query: GROUP BY position 4 is not in select list (valid range is [1, 3]); line 1 pos 10`

The cause of this failure:
If an aggregateExpression is integer, after replaced with this aggregateExpression, the
groupExpression still considered as an ordinal.

The solution:
This bug is due to re-entrance of an analyzed plan. We can solve it by using `resolveOperators` in `SubstituteUnresolvedOrdinals`.

## How was this patch tested?
Added unit test case

Author: liuxian <liu.xian3@zte.com.cn>

Closes #18779 from 10110346/groupby.

(cherry picked from commit 894d5a453a3f47525408ee8c91b3b594daa43ccb)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../analysis/SubstituteUnresolvedOrdinals.scala  |  2 +-
 .../sql-tests/inputs/group-by-ordinal.sql        |  2 +-
 .../spark/sql/DataFrameAggregateSuite.scala      | 16 ++++++++++++++++
 .../org/apache/spark/sql/DataFrameSuite.scala    |  6 ++++++
 4 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala
index 256b18771052a..860d20f897690 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala
@@ -33,7 +33,7 @@ class SubstituteUnresolvedOrdinals(conf: SQLConf) extends Rule[LogicalPlan] {
     case _ => false
   }
 
-  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+  def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     case s: Sort if conf.orderByOrdinal && s.order.exists(o => isIntLiteral(o.child)) =>
       val newOrders = s.order.map {
         case order @ SortOrder(ordinal @ Literal(index: Int, IntegerType), _, _, _) =>
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by-ordinal.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by-ordinal.sql
index 6566338f3d4a9..928f766b4add2 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by-ordinal.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by-ordinal.sql
@@ -52,7 +52,7 @@ select count(a), a from (select 1 as a) tmp group by 2 having a > 0;
 -- mixed cases: group-by ordinals and aliases
 select a, a AS k, count(b) from data group by k, 1;
 
--- turn of group by ordinal
+-- turn off group by ordinal
 set spark.sql.groupByOrdinal=false;
 
 -- can now group by negative literal
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 8569c2d76b694..5f65512b17855 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -545,4 +545,20 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
     }
     assert(e.message.contains("aggregate functions are not allowed in GROUP BY"))
   }
+
+  test("SPARK-21580 ints in aggregation expressions are taken as group-by ordinal.") {
+    checkAnswer(
+      testData2.groupBy(lit(3), lit(4)).agg(lit(6), lit(7), sum("b")),
+      Seq(Row(3, 4, 6, 7, 9)))
+    checkAnswer(
+      testData2.groupBy(lit(3), lit(4)).agg(lit(6), 'b, sum("b")),
+      Seq(Row(3, 4, 6, 1, 3), Row(3, 4, 6, 2, 6)))
+
+    checkAnswer(
+      spark.sql("SELECT 3, 4, SUM(b) FROM testData2 GROUP BY 1, 2"),
+      Seq(Row(3, 4, 9)))
+    checkAnswer(
+      spark.sql("SELECT 3 AS c, 4 AS d, SUM(b) FROM testData2 GROUP BY c, d"),
+      Seq(Row(3, 4, 9)))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 27558a6cd1a8e..3fa538c51bde6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -1753,4 +1753,10 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
       assert(df1.join(df2, $"t1.i" === $"t2.i").cache().count() == 1)
     }
   }
+
+  test("order-by ordinal.") {
+    checkAnswer(
+      testData2.select(lit(7), 'a, 'b).orderBy(lit(1), lit(2), lit(3)),
+      Seq(Row(7, 1, 1), Row(7, 1, 2), Row(7, 2, 1), Row(7, 2, 2), Row(7, 3, 1), Row(7, 3, 2)))
+  }
 }

From 098aaec304a6b4c94a364f08c2d8ef18009689d8 Mon Sep 17 00:00:00 2001
From: vinodkc <vinod.kc.in@gmail.com>
Date: Sat, 5 Aug 2017 23:04:39 -0700
Subject: [PATCH 1128/1204] [SPARK-21588][SQL] SQLContext.getConf(key, null)
 should return null

## What changes were proposed in this pull request?

In SQLContext.get(key,null) for a key that is not defined in the conf, and doesn't have a default value defined, throws a NPE. Int happens only when conf has a value converter

Added null check on defaultValue inside SQLConf.getConfString to avoid calling entry.valueConverter(defaultValue)

## How was this patch tested?
Added unit test

Author: vinodkc <vinod.kc.in@gmail.com>

Closes #18852 from vinodkc/br_Fix_SPARK-21588.

(cherry picked from commit 1ba967b25e6d88be2db7a4e100ac3ead03a2ade9)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../scala/org/apache/spark/sql/internal/SQLConf.scala | 10 ++++++----
 .../org/apache/spark/sql/internal/SQLConfSuite.scala  | 11 +++++++++++
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 94244dd904114..0e2259487d2c1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1122,10 +1122,12 @@ class SQLConf extends Serializable with Logging {
    * not set yet, return `defaultValue`.
    */
   def getConfString(key: String, defaultValue: String): String = {
-    val entry = sqlConfEntries.get(key)
-    if (entry != null && defaultValue != "<undefined>") {
-      // Only verify configs in the SQLConf object
-      entry.valueConverter(defaultValue)
+    if (defaultValue != null && defaultValue != "<undefined>") {
+      val entry = sqlConfEntries.get(key)
+      if (entry != null) {
+        // Only verify configs in the SQLConf object
+        entry.valueConverter(defaultValue)
+      }
     }
     Option(settings.get(key)).getOrElse(defaultValue)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
index a283ff971adcd..948f179f5e8f0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
@@ -270,4 +270,15 @@ class SQLConfSuite extends QueryTest with SharedSQLContext {
     val e2 = intercept[AnalysisException](spark.conf.unset(SCHEMA_STRING_LENGTH_THRESHOLD.key))
     assert(e2.message.contains("Cannot modify the value of a static config"))
   }
+
+  test("SPARK-21588 SQLContext.getConf(key, null) should return null") {
+    withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
+      assert("1" == spark.conf.get(SQLConf.SHUFFLE_PARTITIONS.key, null))
+      assert("1" == spark.conf.get(SQLConf.SHUFFLE_PARTITIONS.key, "<undefined>"))
+    }
+
+    assert(spark.conf.getOption("spark.sql.nonexistent").isEmpty)
+    assert(null == spark.conf.get("spark.sql.nonexistent", null))
+    assert("<undefined>" == spark.conf.get("spark.sql.nonexistent", "<undefined>"))
+  }
 }

From 7a04def920438ef0e08b66a95befeec981e5571e Mon Sep 17 00:00:00 2001
From: Xianyang Liu <xianyang.liu@intel.com>
Date: Mon, 7 Aug 2017 17:04:53 +0800
Subject: [PATCH 1129/1204] [SPARK-21621][CORE] Reset numRecordsWritten after
 DiskBlockObjectWriter.commitAndGet called

## What changes were proposed in this pull request?

We should reset numRecordsWritten to zero after DiskBlockObjectWriter.commitAndGet called.
Because when `revertPartialWritesAndClose` be called, we decrease the written records in `ShuffleWriteMetrics` . However, we decreased the written records to zero, this should be wrong, we should only decreased the number reords after the last `commitAndGet` called.

## How was this patch tested?
Modified existing test.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Xianyang Liu <xianyang.liu@intel.com>

Closes #18830 from ConeyLiu/DiskBlockObjectWriter.

(cherry picked from commit 534a063f7c693158437d13224f50d4ae789ff6fb)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scala/org/apache/spark/storage/DiskBlockObjectWriter.scala  | 2 ++
 .../org/apache/spark/storage/DiskBlockObjectWriterSuite.scala   | 1 +
 2 files changed, 3 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
index eb3ff926372a2..a024c83d8d8b7 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
@@ -95,6 +95,7 @@ private[spark] class DiskBlockObjectWriter(
   /**
    * Keep track of number of records written and also use this to periodically
    * output bytes written since the latter is expensive to do for each record.
+   * And we reset it after every commitAndGet called.
    */
   private var numRecordsWritten = 0
 
@@ -185,6 +186,7 @@ private[spark] class DiskBlockObjectWriter(
       // In certain compression codecs, more bytes are written after streams are closed
       writeMetrics.incBytesWritten(committedPosition - reportedPosition)
       reportedPosition = committedPosition
+      numRecordsWritten = 0
       fileSegment
     } else {
       new FileSegment(file, committedPosition, 0)
diff --git a/core/src/test/scala/org/apache/spark/storage/DiskBlockObjectWriterSuite.scala b/core/src/test/scala/org/apache/spark/storage/DiskBlockObjectWriterSuite.scala
index bfb3ac4c15bca..cea55012c1de5 100644
--- a/core/src/test/scala/org/apache/spark/storage/DiskBlockObjectWriterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/DiskBlockObjectWriterSuite.scala
@@ -116,6 +116,7 @@ class DiskBlockObjectWriterSuite extends SparkFunSuite with BeforeAndAfterEach {
     writer.revertPartialWritesAndClose()
     assert(firstSegment.length === file.length())
     assert(writeMetrics.bytesWritten === file.length())
+    assert(writeMetrics.recordsWritten == 1)
   }
 
   test("calling revertPartialWritesAndClose() after commit() should have no effect") {

From 4f0eb0c862c0362b14fc5db468f4fc08fb8a08c6 Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Tue, 8 Aug 2017 00:00:01 +0800
Subject: [PATCH 1130/1204] [SPARK-21647][SQL] Fix SortMergeJoin when using
 CROSS

### What changes were proposed in this pull request?
author: BoleynSu
closes https://github.com/apache/spark/pull/18836

```Scala
val df = Seq((1, 1)).toDF("i", "j")
df.createOrReplaceTempView("T")
withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
  sql("select * from (select a.i from T a cross join T t where t.i = a.i) as t1 " +
    "cross join T t2 where t2.i = t1.i").explain(true)
}
```
The above code could cause the following exception:
```
SortMergeJoinExec should not take Cross as the JoinType
java.lang.IllegalArgumentException: SortMergeJoinExec should not take Cross as the JoinType
	at org.apache.spark.sql.execution.joins.SortMergeJoinExec.outputOrdering(SortMergeJoinExec.scala:100)
```

Our SortMergeJoinExec supports CROSS. We should not hit such an exception. This PR is to fix the issue.

### How was this patch tested?
Modified the two existing test cases.

Author: Xiao Li <gatorsmile@gmail.com>
Author: Boleyn Su <boleyn.su@gmail.com>

Closes #18863 from gatorsmile/pr-18836.

(cherry picked from commit bbfd6b5d24be5919a3ab1ac3eaec46e33201df39)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../execution/joins/SortMergeJoinExec.scala   |  2 +-
 .../spark/sql/execution/PlannerSuite.scala    | 36 ++++++++++---------
 2 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
index a77201534b94e..bd5b633119a04 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@@ -82,7 +82,7 @@ case class SortMergeJoinExec(
 
   override def outputOrdering: Seq[SortOrder] = joinType match {
     // For inner join, orders of both sides keys should be kept.
-    case Inner =>
+    case _: InnerLike =>
       val leftKeyOrdering = getKeyOrdering(leftKeys, left.outputOrdering)
       val rightKeyOrdering = getKeyOrdering(rightKeys, right.outputOrdering)
       leftKeyOrdering.zip(rightKeyOrdering).map { case (lKey, rKey) =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index 4d155d538d637..63e17c7f372b0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{execution, Row}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.{FullOuter, Inner, LeftOuter, RightOuter}
+import org.apache.spark.sql.catalyst.plans.{Cross, FullOuter, Inner, LeftOuter, RightOuter}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Repartition}
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution.columnar.InMemoryRelation
@@ -513,26 +513,30 @@ class PlannerSuite extends SharedSQLContext {
   }
 
   test("EnsureRequirements skips sort when either side of join keys is required after inner SMJ") {
-    val innerSmj = SortMergeJoinExec(exprA :: Nil, exprB :: Nil, Inner, None, planA, planB)
-    // Both left and right keys should be sorted after the SMJ.
-    Seq(orderingA, orderingB).foreach { ordering =>
-      assertSortRequirementsAreSatisfied(
-        childPlan = innerSmj,
-        requiredOrdering = Seq(ordering),
-        shouldHaveSort = false)
+    Seq(Inner, Cross).foreach { joinType =>
+      val innerSmj = SortMergeJoinExec(exprA :: Nil, exprB :: Nil, joinType, None, planA, planB)
+      // Both left and right keys should be sorted after the SMJ.
+      Seq(orderingA, orderingB).foreach { ordering =>
+        assertSortRequirementsAreSatisfied(
+          childPlan = innerSmj,
+          requiredOrdering = Seq(ordering),
+          shouldHaveSort = false)
+      }
     }
   }
 
   test("EnsureRequirements skips sort when key order of a parent SMJ is propagated from its " +
     "child SMJ") {
-    val childSmj = SortMergeJoinExec(exprA :: Nil, exprB :: Nil, Inner, None, planA, planB)
-    val parentSmj = SortMergeJoinExec(exprB :: Nil, exprC :: Nil, Inner, None, childSmj, planC)
-    // After the second SMJ, exprA, exprB and exprC should all be sorted.
-    Seq(orderingA, orderingB, orderingC).foreach { ordering =>
-      assertSortRequirementsAreSatisfied(
-        childPlan = parentSmj,
-        requiredOrdering = Seq(ordering),
-        shouldHaveSort = false)
+    Seq(Inner, Cross).foreach { joinType =>
+      val childSmj = SortMergeJoinExec(exprA :: Nil, exprB :: Nil, joinType, None, planA, planB)
+      val parentSmj = SortMergeJoinExec(exprB :: Nil, exprC :: Nil, joinType, None, childSmj, planC)
+      // After the second SMJ, exprA, exprB and exprC should all be sorted.
+      Seq(orderingA, orderingB, orderingC).foreach { ordering =>
+        assertSortRequirementsAreSatisfied(
+          childPlan = parentSmj,
+          requiredOrdering = Seq(ordering),
+          shouldHaveSort = false)
+      }
     }
   }
 

From 43f9c84b6749b2ebf802e1f062238167b2b1f3bb Mon Sep 17 00:00:00 2001
From: Andrey Taptunov <taptunov@amazon.com>
Date: Fri, 4 Aug 2017 22:40:04 -0700
Subject: [PATCH 1131/1204] [SPARK-21374][CORE] Fix reading globbed paths from
 S3 into DF with disabled FS cache

This PR replaces #18623 to do some clean up.

Closes #18623

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>
Author: Andrey Taptunov <taptunov@amazon.com>

Closes #18848 from zsxwing/review-pr18623.
---
 .../apache/spark/deploy/SparkHadoopUtil.scala |  8 ++++
 .../execution/datasources/DataSource.scala    | 45 ++++++++++++-------
 2 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index 6afe58bff5229..550bd6854f67e 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -227,6 +227,10 @@ class SparkHadoopUtil extends Logging {
 
   def globPath(pattern: Path): Seq[Path] = {
     val fs = pattern.getFileSystem(conf)
+    globPath(fs, pattern)
+  }
+
+  def globPath(fs: FileSystem, pattern: Path): Seq[Path] = {
     Option(fs.globStatus(pattern)).map { statuses =>
       statuses.map(_.getPath.makeQualified(fs.getUri, fs.getWorkingDirectory)).toSeq
     }.getOrElse(Seq.empty[Path])
@@ -236,6 +240,10 @@ class SparkHadoopUtil extends Logging {
     if (isGlobPath(pattern)) globPath(pattern) else Seq(pattern)
   }
 
+  def globPathIfNecessary(fs: FileSystem, pattern: Path): Seq[Path] = {
+    if (isGlobPath(pattern)) globPath(fs, pattern) else Seq(pattern)
+  }
+
   /**
    * Lists all the files in a directory with the specified prefix, and does not end with the
    * given suffix. The returned {{FileStatus}} instances are sorted by the modification times of
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 0915bd3ba25b9..a13bb2476ceab 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -23,6 +23,7 @@ import scala.collection.JavaConverters._
 import scala.language.{existentials, implicitConversions}
 import scala.util.{Failure, Success, Try}
 
+import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.deploy.SparkHadoopUtil
@@ -123,7 +124,7 @@ case class DataSource(
         val hdfsPath = new Path(path)
         val fs = hdfsPath.getFileSystem(hadoopConf)
         val qualified = hdfsPath.makeQualified(fs.getUri, fs.getWorkingDirectory)
-        SparkHadoopUtil.get.globPathIfNecessary(qualified)
+        SparkHadoopUtil.get.globPathIfNecessary(fs, qualified)
       }.toArray
       new InMemoryFileIndex(sparkSession, globbedPaths, options, None, fileStatusCache)
     }
@@ -345,22 +346,8 @@ case class DataSource(
       case (format: FileFormat, _) =>
         val allPaths = caseInsensitiveOptions.get("path") ++ paths
         val hadoopConf = sparkSession.sessionState.newHadoopConf()
-        val globbedPaths = allPaths.flatMap { path =>
-          val hdfsPath = new Path(path)
-          val fs = hdfsPath.getFileSystem(hadoopConf)
-          val qualified = hdfsPath.makeQualified(fs.getUri, fs.getWorkingDirectory)
-          val globPath = SparkHadoopUtil.get.globPathIfNecessary(qualified)
-
-          if (globPath.isEmpty) {
-            throw new AnalysisException(s"Path does not exist: $qualified")
-          }
-          // Sufficient to check head of the globPath seq for non-glob scenario
-          // Don't need to check once again if files exist in streaming mode
-          if (checkFilesExist && !fs.exists(globPath.head)) {
-            throw new AnalysisException(s"Path does not exist: ${globPath.head}")
-          }
-          globPath
-        }.toArray
+        val globbedPaths = allPaths.flatMap(
+          DataSource.checkAndGlobPathIfNecessary(hadoopConf, _, checkFilesExist)).toArray
 
         val fileStatusCache = FileStatusCache.getOrCreate(sparkSession)
         val (dataSchema, partitionSchema) = getOrInferFileFormatSchema(format, fileStatusCache)
@@ -606,4 +593,28 @@ object DataSource extends Logging {
     CatalogStorageFormat.empty.copy(
       locationUri = path.map(CatalogUtils.stringToURI), properties = optionsWithoutPath)
   }
+
+  /**
+   * If `path` is a file pattern, return all the files that match it. Otherwise, return itself.
+   * If `checkFilesExist` is `true`, also check the file existence.
+   */
+  private def checkAndGlobPathIfNecessary(
+      hadoopConf: Configuration,
+      path: String,
+      checkFilesExist: Boolean): Seq[Path] = {
+    val hdfsPath = new Path(path)
+    val fs = hdfsPath.getFileSystem(hadoopConf)
+    val qualified = hdfsPath.makeQualified(fs.getUri, fs.getWorkingDirectory)
+    val globPath = SparkHadoopUtil.get.globPathIfNecessary(fs, qualified)
+
+    if (globPath.isEmpty) {
+      throw new AnalysisException(s"Path does not exist: $qualified")
+    }
+    // Sufficient to check head of the globPath seq for non-glob scenario
+    // Don't need to check once again if files exist in streaming mode
+    if (checkFilesExist && !fs.exists(globPath.head)) {
+      throw new AnalysisException(s"Path does not exist: ${globPath.head}")
+    }
+    globPath
+  }
 }

From fa92a7be709e78db8e8f50dca8e13855c1034fde Mon Sep 17 00:00:00 2001
From: Jose Torres <joseph-torres@databricks.com>
Date: Mon, 7 Aug 2017 12:27:16 -0700
Subject: [PATCH 1132/1204] [SPARK-21565][SS] Propagate metadata in attribute
 replacement.

## What changes were proposed in this pull request?

Propagate metadata in attribute replacement during streaming execution. This is necessary for EventTimeWatermarks consuming replaced attributes.

## How was this patch tested?
new unit test, which was verified to fail before the fix

Author: Jose Torres <joseph-torres@databricks.com>

Closes #18840 from joseph-torres/SPARK-21565.

(cherry picked from commit cce25b360ee9e39d9510134c73a1761475eaf4ac)
Signed-off-by: Shixiong Zhu <shixiong@databricks.com>
---
 .../execution/streaming/StreamExecution.scala |  3 +-
 .../streaming/EventTimeWatermarkSuite.scala   | 28 +++++++++++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index b6ddf7437ea13..63c4dc17fddc5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -631,7 +631,8 @@ class StreamExecution(
     // Rewire the plan to use the new attributes that were returned by the source.
     val replacementMap = AttributeMap(replacements)
     val triggerLogicalPlan = withNewSources transformAllExpressions {
-      case a: Attribute if replacementMap.contains(a) => replacementMap(a)
+      case a: Attribute if replacementMap.contains(a) =>
+        replacementMap(a).withMetadata(a.metadata)
       case ct: CurrentTimestamp =>
         CurrentBatchTimestamp(offsetSeqMetadata.batchTimestampMs,
           ct.dataType)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
index 552911f32ee43..4f19fa0bb4a97 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
@@ -391,6 +391,34 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche
     checkDataset[Long](df, 1L to 100L: _*)
   }
 
+  test("SPARK-21565: watermark operator accepts attributes from replacement") {
+    withTempDir { dir =>
+      dir.delete()
+
+      val df = Seq(("a", 100.0, new java.sql.Timestamp(100L)))
+        .toDF("symbol", "price", "eventTime")
+      df.write.json(dir.getCanonicalPath)
+
+      val input = spark.readStream.schema(df.schema)
+        .json(dir.getCanonicalPath)
+
+      val groupEvents = input
+        .withWatermark("eventTime", "2 seconds")
+        .groupBy("symbol", "eventTime")
+        .agg(count("price") as 'count)
+        .select("symbol", "eventTime", "count")
+      val q = groupEvents.writeStream
+        .outputMode("append")
+        .format("console")
+        .start()
+      try {
+        q.processAllAvailable()
+      } finally {
+        q.stop()
+      }
+    }
+  }
+
   private def assertNumStateRows(numTotalRows: Long): AssertOnQuery = AssertOnQuery { q =>
     val progressWithData = q.recentProgress.filter(_.numInputRows > 0).lastOption.get
     assert(progressWithData.stateOperators(0).numRowsTotal === numTotalRows)

From a1c1199e122889ed34415be5e4da67168107a595 Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Mon, 7 Aug 2017 13:04:04 -0700
Subject: [PATCH 1133/1204] [SPARK-21648][SQL] Fix confusing assert failure in
 JDBC source when parallel fetching parameters are not properly provided.

### What changes were proposed in this pull request?
```SQL
CREATE TABLE mytesttable1
USING org.apache.spark.sql.jdbc
  OPTIONS (
  url 'jdbc:mysql://${jdbcHostname}:${jdbcPort}/${jdbcDatabase}?user=${jdbcUsername}&password=${jdbcPassword}',
  dbtable 'mytesttable1',
  paritionColumn 'state_id',
  lowerBound '0',
  upperBound '52',
  numPartitions '53',
  fetchSize '10000'
)
```

The above option name `paritionColumn` is wrong. That mean, users did not provide the value for `partitionColumn`. In such case, users hit a confusing error.

```
AssertionError: assertion failed
java.lang.AssertionError: assertion failed
	at scala.Predef$.assert(Predef.scala:156)
	at org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider.createRelation(JdbcRelationProvider.scala:39)
	at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:312)
```

### How was this patch tested?
Added a test case

Author: gatorsmile <gatorsmile@gmail.com>

Closes #18864 from gatorsmile/jdbcPartCol.

(cherry picked from commit baf5cac0f8c35925c366464d7e0eb5f6023fce57)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../datasources/jdbc/JDBCOptions.scala        | 11 ++++++----
 .../jdbc/JdbcRelationProvider.scala           |  9 ++++++--
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 22 +++++++++++++++++++
 .../spark/sql/jdbc/JDBCWriteSuite.scala       |  5 +++--
 4 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
index 591096d5efd22..96a8a51da18e5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
@@ -97,10 +97,13 @@ class JDBCOptions(
   val lowerBound = parameters.get(JDBC_LOWER_BOUND).map(_.toLong)
   // the upper bound of the partition column
   val upperBound = parameters.get(JDBC_UPPER_BOUND).map(_.toLong)
-  require(partitionColumn.isEmpty ||
-    (lowerBound.isDefined && upperBound.isDefined && numPartitions.isDefined),
-    s"If '$JDBC_PARTITION_COLUMN' is specified then '$JDBC_LOWER_BOUND', '$JDBC_UPPER_BOUND'," +
-      s" and '$JDBC_NUM_PARTITIONS' are required.")
+  // numPartitions is also used for data source writing
+  require((partitionColumn.isEmpty && lowerBound.isEmpty && upperBound.isEmpty) ||
+    (partitionColumn.isDefined && lowerBound.isDefined && upperBound.isDefined &&
+      numPartitions.isDefined),
+    s"When reading JDBC data sources, users need to specify all or none for the following " +
+      s"options: '$JDBC_PARTITION_COLUMN', '$JDBC_LOWER_BOUND', '$JDBC_UPPER_BOUND', " +
+      s"and '$JDBC_NUM_PARTITIONS'")
   val fetchSize = {
     val size = parameters.getOrElse(JDBC_BATCH_FETCH_SIZE, "0").toInt
     require(size >= 0,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcRelationProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcRelationProvider.scala
index 74dcfb06f5c2b..37e7bb0a59bb6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcRelationProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcRelationProvider.scala
@@ -29,6 +29,8 @@ class JdbcRelationProvider extends CreatableRelationProvider
   override def createRelation(
       sqlContext: SQLContext,
       parameters: Map[String, String]): BaseRelation = {
+    import JDBCOptions._
+
     val jdbcOptions = new JDBCOptions(parameters)
     val partitionColumn = jdbcOptions.partitionColumn
     val lowerBound = jdbcOptions.lowerBound
@@ -36,10 +38,13 @@ class JdbcRelationProvider extends CreatableRelationProvider
     val numPartitions = jdbcOptions.numPartitions
 
     val partitionInfo = if (partitionColumn.isEmpty) {
-      assert(lowerBound.isEmpty && upperBound.isEmpty)
+      assert(lowerBound.isEmpty && upperBound.isEmpty, "When 'partitionColumn' is not specified, " +
+        s"'$JDBC_LOWER_BOUND' and '$JDBC_UPPER_BOUND' are expected to be empty")
       null
     } else {
-      assert(lowerBound.nonEmpty && upperBound.nonEmpty && numPartitions.nonEmpty)
+      assert(lowerBound.nonEmpty && upperBound.nonEmpty && numPartitions.nonEmpty,
+        s"When 'partitionColumn' is specified, '$JDBC_LOWER_BOUND', '$JDBC_UPPER_BOUND', and " +
+          s"'$JDBC_NUM_PARTITIONS' are also required")
       JDBCPartitioningInfo(
         partitionColumn.get, lowerBound.get, upperBound.get, numPartitions.get)
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index daec409dae569..ae4b1bf0f47cb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -412,6 +412,28 @@ class JDBCSuite extends SparkFunSuite
     assert(e.contains("Invalid value `-1` for parameter `fetchsize`"))
   }
 
+  test("Missing partition columns") {
+    withView("tempPeople") {
+      val e = intercept[IllegalArgumentException] {
+        sql(
+          s"""
+             |CREATE OR REPLACE TEMPORARY VIEW tempPeople
+             |USING org.apache.spark.sql.jdbc
+             |OPTIONS (
+             |  url 'jdbc:h2:mem:testdb0;user=testUser;password=testPass',
+             |  dbtable 'TEST.PEOPLE',
+             |  lowerBound '0',
+             |  upperBound '52',
+             |  numPartitions '53',
+             |  fetchSize '10000' )
+           """.stripMargin.replaceAll("\n", " "))
+      }.getMessage
+      assert(e.contains("When reading JDBC data sources, users need to specify all or none " +
+        "for the following options: 'partitionColumn', 'lowerBound', 'upperBound', and " +
+        "'numPartitions'"))
+    }
+  }
+
   test("Basic API with FetchSize") {
     (0 to 4).foreach { size =>
       val properties = new Properties()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
index bf1fd160704fa..d7ae45680fe56 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
@@ -323,8 +323,9 @@ class JDBCWriteSuite extends SharedSQLContext with BeforeAndAfter {
         .option("partitionColumn", "foo")
         .save()
     }.getMessage
-    assert(e.contains("If 'partitionColumn' is specified then 'lowerBound', 'upperBound'," +
-      " and 'numPartitions' are required."))
+    assert(e.contains("When reading JDBC data sources, users need to specify all or none " +
+      "for the following options: 'partitionColumn', 'lowerBound', 'upperBound', and " +
+      "'numPartitions'"))
   }
 
   test("SPARK-18433: Improve DataSource option keys to be more case-insensitive") {

From 86609a95af4b700e83638b7416c7e3706c2d64c6 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Tue, 8 Aug 2017 16:12:41 +0800
Subject: [PATCH 1134/1204] [SPARK-21567][SQL] Dataset should work with type
 alias

If we create a type alias for a type workable with Dataset, the type alias doesn't work with Dataset.

A reproducible case looks like:

    object C {
      type TwoInt = (Int, Int)
      def tupleTypeAlias: TwoInt = (1, 1)
    }

    Seq(1).toDS().map(_ => ("", C.tupleTypeAlias))

It throws an exception like:

    type T1 is not a class
    scala.ScalaReflectionException: type T1 is not a class
      at scala.reflect.api.Symbols$SymbolApi$class.asClass(Symbols.scala:275)
      ...

This patch accesses the dealias of type in many places in `ScalaReflection` to fix it.

Added test case.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #18813 from viirya/SPARK-21567.

(cherry picked from commit ee1304199bcd9c1d5fc94f5b06fdd5f6fe7336a1)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/ScalaReflection.scala  | 27 ++++++++++---------
 .../org/apache/spark/sql/DatasetSuite.scala   | 24 +++++++++++++++++
 2 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index c887634adf7bc..fe994b82024cc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -63,7 +63,7 @@ object ScalaReflection extends ScalaReflection {
   def dataTypeFor[T : TypeTag]: DataType = dataTypeFor(localTypeOf[T])
 
   private def dataTypeFor(tpe: `Type`): DataType = ScalaReflectionLock.synchronized {
-    tpe match {
+    tpe.dealias match {
       case t if t <:< definitions.IntTpe => IntegerType
       case t if t <:< definitions.LongTpe => LongType
       case t if t <:< definitions.DoubleTpe => DoubleType
@@ -93,7 +93,7 @@ object ScalaReflection extends ScalaReflection {
    * JVM form instead of the Scala Array that handles auto boxing.
    */
   private def arrayClassFor(tpe: `Type`): ObjectType = ScalaReflectionLock.synchronized {
-    val cls = tpe match {
+    val cls = tpe.dealias match {
       case t if t <:< definitions.IntTpe => classOf[Array[Int]]
       case t if t <:< definitions.LongTpe => classOf[Array[Long]]
       case t if t <:< definitions.DoubleTpe => classOf[Array[Double]]
@@ -192,7 +192,7 @@ object ScalaReflection extends ScalaReflection {
       case _ => UpCast(expr, expected, walkedTypePath)
     }
 
-    tpe match {
+    tpe.dealias match {
       case t if !dataTypeFor(t).isInstanceOf[ObjectType] => getPath
 
       case t if t <:< localTypeOf[Option[_]] =>
@@ -479,7 +479,7 @@ object ScalaReflection extends ScalaReflection {
       }
     }
 
-    tpe match {
+    tpe.dealias match {
       case _ if !inputObject.dataType.isInstanceOf[ObjectType] => inputObject
 
       case t if t <:< localTypeOf[Option[_]] =>
@@ -633,7 +633,7 @@ object ScalaReflection extends ScalaReflection {
    * we also treat [[DefinedByConstructorParams]] as product type.
    */
   def optionOfProductType(tpe: `Type`): Boolean = ScalaReflectionLock.synchronized {
-    tpe match {
+    tpe.dealias match {
       case t if t <:< localTypeOf[Option[_]] =>
         val TypeRef(_, _, Seq(optType)) = t
         definedByConstructorParams(optType)
@@ -680,7 +680,7 @@ object ScalaReflection extends ScalaReflection {
   /*
    * Retrieves the runtime class corresponding to the provided type.
    */
-  def getClassFromType(tpe: Type): Class[_] = mirror.runtimeClass(tpe.typeSymbol.asClass)
+  def getClassFromType(tpe: Type): Class[_] = mirror.runtimeClass(tpe.dealias.typeSymbol.asClass)
 
   case class Schema(dataType: DataType, nullable: Boolean)
 
@@ -695,7 +695,7 @@ object ScalaReflection extends ScalaReflection {
 
   /** Returns a catalyst DataType and its nullability for the given Scala Type using reflection. */
   def schemaFor(tpe: `Type`): Schema = ScalaReflectionLock.synchronized {
-    tpe match {
+    tpe.dealias match {
       case t if t.typeSymbol.annotations.exists(_.tpe =:= typeOf[SQLUserDefinedType]) =>
         val udt = getClassFromType(t).getAnnotation(classOf[SQLUserDefinedType]).udt().newInstance()
         Schema(udt, nullable = true)
@@ -761,7 +761,7 @@ object ScalaReflection extends ScalaReflection {
    * Whether the fields of the given type is defined entirely by its constructor parameters.
    */
   def definedByConstructorParams(tpe: Type): Boolean = {
-    tpe <:< localTypeOf[Product] || tpe <:< localTypeOf[DefinedByConstructorParams]
+    tpe.dealias <:< localTypeOf[Product] || tpe.dealias <:< localTypeOf[DefinedByConstructorParams]
   }
 
   private val javaKeywords = Set("abstract", "assert", "boolean", "break", "byte", "case", "catch",
@@ -816,7 +816,7 @@ trait ScalaReflection {
    * synthetic classes, emulating behaviour in Java bytecode.
    */
   def getClassNameFromType(tpe: `Type`): String = {
-    tpe.erasure.typeSymbol.asClass.fullName
+    tpe.dealias.erasure.typeSymbol.asClass.fullName
   }
 
   /**
@@ -835,9 +835,10 @@ trait ScalaReflection {
    * support inner class.
    */
   def getConstructorParameters(tpe: Type): Seq[(String, Type)] = {
-    val formalTypeArgs = tpe.typeSymbol.asClass.typeParams
-    val TypeRef(_, _, actualTypeArgs) = tpe
-    val params = constructParams(tpe)
+    val dealiasedTpe = tpe.dealias
+    val formalTypeArgs = dealiasedTpe.typeSymbol.asClass.typeParams
+    val TypeRef(_, _, actualTypeArgs) = dealiasedTpe
+    val params = constructParams(dealiasedTpe)
     // if there are type variables to fill in, do the substitution (SomeClass[T] -> SomeClass[Int])
     if (actualTypeArgs.nonEmpty) {
       params.map { p =>
@@ -851,7 +852,7 @@ trait ScalaReflection {
   }
 
   protected def constructParams(tpe: Type): Seq[Symbol] = {
-    val constructorSymbol = tpe.member(nme.CONSTRUCTOR)
+    val constructorSymbol = tpe.dealias.member(nme.CONSTRUCTOR)
     val params = if (constructorSymbol.isMethod) {
       constructorSymbol.asMethod.paramss
     } else {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 060fbe7cf81ac..a2deb85ccb16c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -34,6 +34,16 @@ import org.apache.spark.sql.types._
 case class TestDataPoint(x: Int, y: Double, s: String, t: TestDataPoint2)
 case class TestDataPoint2(x: Int, s: String)
 
+object TestForTypeAlias {
+  type TwoInt = (Int, Int)
+  type ThreeInt = (TwoInt, Int)
+  type SeqOfTwoInt = Seq[TwoInt]
+
+  def tupleTypeAlias: TwoInt = (1, 1)
+  def nestedTupleTypeAlias: ThreeInt = ((1, 1), 2)
+  def seqOfTupleTypeAlias: SeqOfTwoInt = Seq((1, 1), (2, 2))
+}
+
 class DatasetSuite extends QueryTest with SharedSQLContext {
   import testImplicits._
 
@@ -1210,6 +1220,20 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
     checkAnswer(df.orderBy($"id"), expected)
     checkAnswer(df.orderBy('id), expected)
   }
+
+  test("SPARK-21567: Dataset should work with type alias") {
+    checkDataset(
+      Seq(1).toDS().map(_ => ("", TestForTypeAlias.tupleTypeAlias)),
+      ("", (1, 1)))
+
+    checkDataset(
+      Seq(1).toDS().map(_ => ("", TestForTypeAlias.nestedTupleTypeAlias)),
+      ("", ((1, 1), 2)))
+
+    checkDataset(
+      Seq(1).toDS().map(_ => ("", TestForTypeAlias.seqOfTupleTypeAlias)),
+      ("", Seq((1, 1), (2, 2))))
+  }
 }
 
 case class WithImmutableMap(id: String, map_test: scala.collection.immutable.Map[Long, String])

From e87ffcaa3e5b75f8d313dc995e4801063b60cd5c Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 8 Aug 2017 16:32:49 +0800
Subject: [PATCH 1135/1204] Revert "[SPARK-21567][SQL] Dataset should work with
 type alias"

This reverts commit 86609a95af4b700e83638b7416c7e3706c2d64c6.
---
 .../spark/sql/catalyst/ScalaReflection.scala  | 27 +++++++++----------
 .../org/apache/spark/sql/DatasetSuite.scala   | 24 -----------------
 2 files changed, 13 insertions(+), 38 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index fe994b82024cc..c887634adf7bc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -63,7 +63,7 @@ object ScalaReflection extends ScalaReflection {
   def dataTypeFor[T : TypeTag]: DataType = dataTypeFor(localTypeOf[T])
 
   private def dataTypeFor(tpe: `Type`): DataType = ScalaReflectionLock.synchronized {
-    tpe.dealias match {
+    tpe match {
       case t if t <:< definitions.IntTpe => IntegerType
       case t if t <:< definitions.LongTpe => LongType
       case t if t <:< definitions.DoubleTpe => DoubleType
@@ -93,7 +93,7 @@ object ScalaReflection extends ScalaReflection {
    * JVM form instead of the Scala Array that handles auto boxing.
    */
   private def arrayClassFor(tpe: `Type`): ObjectType = ScalaReflectionLock.synchronized {
-    val cls = tpe.dealias match {
+    val cls = tpe match {
       case t if t <:< definitions.IntTpe => classOf[Array[Int]]
       case t if t <:< definitions.LongTpe => classOf[Array[Long]]
       case t if t <:< definitions.DoubleTpe => classOf[Array[Double]]
@@ -192,7 +192,7 @@ object ScalaReflection extends ScalaReflection {
       case _ => UpCast(expr, expected, walkedTypePath)
     }
 
-    tpe.dealias match {
+    tpe match {
       case t if !dataTypeFor(t).isInstanceOf[ObjectType] => getPath
 
       case t if t <:< localTypeOf[Option[_]] =>
@@ -479,7 +479,7 @@ object ScalaReflection extends ScalaReflection {
       }
     }
 
-    tpe.dealias match {
+    tpe match {
       case _ if !inputObject.dataType.isInstanceOf[ObjectType] => inputObject
 
       case t if t <:< localTypeOf[Option[_]] =>
@@ -633,7 +633,7 @@ object ScalaReflection extends ScalaReflection {
    * we also treat [[DefinedByConstructorParams]] as product type.
    */
   def optionOfProductType(tpe: `Type`): Boolean = ScalaReflectionLock.synchronized {
-    tpe.dealias match {
+    tpe match {
       case t if t <:< localTypeOf[Option[_]] =>
         val TypeRef(_, _, Seq(optType)) = t
         definedByConstructorParams(optType)
@@ -680,7 +680,7 @@ object ScalaReflection extends ScalaReflection {
   /*
    * Retrieves the runtime class corresponding to the provided type.
    */
-  def getClassFromType(tpe: Type): Class[_] = mirror.runtimeClass(tpe.dealias.typeSymbol.asClass)
+  def getClassFromType(tpe: Type): Class[_] = mirror.runtimeClass(tpe.typeSymbol.asClass)
 
   case class Schema(dataType: DataType, nullable: Boolean)
 
@@ -695,7 +695,7 @@ object ScalaReflection extends ScalaReflection {
 
   /** Returns a catalyst DataType and its nullability for the given Scala Type using reflection. */
   def schemaFor(tpe: `Type`): Schema = ScalaReflectionLock.synchronized {
-    tpe.dealias match {
+    tpe match {
       case t if t.typeSymbol.annotations.exists(_.tpe =:= typeOf[SQLUserDefinedType]) =>
         val udt = getClassFromType(t).getAnnotation(classOf[SQLUserDefinedType]).udt().newInstance()
         Schema(udt, nullable = true)
@@ -761,7 +761,7 @@ object ScalaReflection extends ScalaReflection {
    * Whether the fields of the given type is defined entirely by its constructor parameters.
    */
   def definedByConstructorParams(tpe: Type): Boolean = {
-    tpe.dealias <:< localTypeOf[Product] || tpe.dealias <:< localTypeOf[DefinedByConstructorParams]
+    tpe <:< localTypeOf[Product] || tpe <:< localTypeOf[DefinedByConstructorParams]
   }
 
   private val javaKeywords = Set("abstract", "assert", "boolean", "break", "byte", "case", "catch",
@@ -816,7 +816,7 @@ trait ScalaReflection {
    * synthetic classes, emulating behaviour in Java bytecode.
    */
   def getClassNameFromType(tpe: `Type`): String = {
-    tpe.dealias.erasure.typeSymbol.asClass.fullName
+    tpe.erasure.typeSymbol.asClass.fullName
   }
 
   /**
@@ -835,10 +835,9 @@ trait ScalaReflection {
    * support inner class.
    */
   def getConstructorParameters(tpe: Type): Seq[(String, Type)] = {
-    val dealiasedTpe = tpe.dealias
-    val formalTypeArgs = dealiasedTpe.typeSymbol.asClass.typeParams
-    val TypeRef(_, _, actualTypeArgs) = dealiasedTpe
-    val params = constructParams(dealiasedTpe)
+    val formalTypeArgs = tpe.typeSymbol.asClass.typeParams
+    val TypeRef(_, _, actualTypeArgs) = tpe
+    val params = constructParams(tpe)
     // if there are type variables to fill in, do the substitution (SomeClass[T] -> SomeClass[Int])
     if (actualTypeArgs.nonEmpty) {
       params.map { p =>
@@ -852,7 +851,7 @@ trait ScalaReflection {
   }
 
   protected def constructParams(tpe: Type): Seq[Symbol] = {
-    val constructorSymbol = tpe.dealias.member(nme.CONSTRUCTOR)
+    val constructorSymbol = tpe.member(nme.CONSTRUCTOR)
     val params = if (constructorSymbol.isMethod) {
       constructorSymbol.asMethod.paramss
     } else {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index a2deb85ccb16c..060fbe7cf81ac 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -34,16 +34,6 @@ import org.apache.spark.sql.types._
 case class TestDataPoint(x: Int, y: Double, s: String, t: TestDataPoint2)
 case class TestDataPoint2(x: Int, s: String)
 
-object TestForTypeAlias {
-  type TwoInt = (Int, Int)
-  type ThreeInt = (TwoInt, Int)
-  type SeqOfTwoInt = Seq[TwoInt]
-
-  def tupleTypeAlias: TwoInt = (1, 1)
-  def nestedTupleTypeAlias: ThreeInt = ((1, 1), 2)
-  def seqOfTupleTypeAlias: SeqOfTwoInt = Seq((1, 1), (2, 2))
-}
-
 class DatasetSuite extends QueryTest with SharedSQLContext {
   import testImplicits._
 
@@ -1220,20 +1210,6 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
     checkAnswer(df.orderBy($"id"), expected)
     checkAnswer(df.orderBy('id), expected)
   }
-
-  test("SPARK-21567: Dataset should work with type alias") {
-    checkDataset(
-      Seq(1).toDS().map(_ => ("", TestForTypeAlias.tupleTypeAlias)),
-      ("", (1, 1)))
-
-    checkDataset(
-      Seq(1).toDS().map(_ => ("", TestForTypeAlias.nestedTupleTypeAlias)),
-      ("", ((1, 1), 2)))
-
-    checkDataset(
-      Seq(1).toDS().map(_ => ("", TestForTypeAlias.seqOfTupleTypeAlias)),
-      ("", Seq((1, 1), (2, 2))))
-  }
 }
 
 case class WithImmutableMap(id: String, map_test: scala.collection.immutable.Map[Long, String])

From d0233145208eb6afcd9fe0c1c3a9dbbd35d7727e Mon Sep 17 00:00:00 2001
From: pgandhi <pgandhi@yahoo-inc.com>
Date: Wed, 9 Aug 2017 13:46:06 +0800
Subject: [PATCH 1136/1204] [SPARK-21503][UI] Spark UI shows incorrect task
 status for a killed Executor Process

The executor tab on Spark UI page shows task as completed when an executor process that is running that task is killed using the kill command.
Added the case ExecutorLostFailure which was previously not there, thus, the default case would be executed in which case, task would be marked as completed. This case will consider all those cases where executor connection to Spark Driver was lost due to killing the executor process, network connection etc.

## How was this patch tested?
Manually Tested the fix by observing the UI change before and after.
Before:
<img width="1398" alt="screen shot-before" src="https://user-images.githubusercontent.com/22228190/28482929-571c9cea-6e30-11e7-93dd-728de5cdea95.png">
After:
<img width="1385" alt="screen shot-after" src="https://user-images.githubusercontent.com/22228190/28482964-8649f5ee-6e30-11e7-91bd-2eb2089c61cc.png">

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: pgandhi <pgandhi@yahoo-inc.com>
Author: pgandhi999 <parthkgandhi9@gmail.com>

Closes #18707 from pgandhi999/master.

(cherry picked from commit f016f5c8f6c6aae674e9905a5c0b0bede09163a4)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/ui/exec/ExecutorsTab.scala   | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala
index aabf6e0c63c02..770da2226fe04 100644
--- a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsTab.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ui.exec
 
 import scala.collection.mutable.{LinkedHashMap, ListBuffer}
 
-import org.apache.spark.{ExceptionFailure, Resubmitted, SparkConf, SparkContext}
+import org.apache.spark.{Resubmitted, SparkConf, SparkContext}
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.scheduler._
 import org.apache.spark.storage.{StorageStatus, StorageStatusListener}
@@ -131,17 +131,17 @@ class ExecutorsListener(storageStatusListener: StorageStatusListener, conf: Spar
     if (info != null) {
       val eid = info.executorId
       val taskSummary = executorToTaskSummary.getOrElseUpdate(eid, ExecutorTaskSummary(eid))
-      taskEnd.reason match {
-        case Resubmitted =>
-          // Note: For resubmitted tasks, we continue to use the metrics that belong to the
-          // first attempt of this task. This may not be 100% accurate because the first attempt
-          // could have failed half-way through. The correct fix would be to keep track of the
-          // metrics added by each attempt, but this is much more complicated.
-          return
-        case _: ExceptionFailure =>
-          taskSummary.tasksFailed += 1
-        case _ =>
-          taskSummary.tasksComplete += 1
+      // Note: For resubmitted tasks, we continue to use the metrics that belong to the
+      // first attempt of this task. This may not be 100% accurate because the first attempt
+      // could have failed half-way through. The correct fix would be to keep track of the
+      // metrics added by each attempt, but this is much more complicated.
+      if (taskEnd.reason == Resubmitted) {
+        return
+      }
+      if (info.successful) {
+        taskSummary.tasksComplete += 1
+      } else {
+        taskSummary.tasksFailed += 1
       }
       if (taskSummary.tasksActive >= 1) {
         taskSummary.tasksActive -= 1

From 7446be3328ea75a5197b2587e3a8e2ca7977726b Mon Sep 17 00:00:00 2001
From: WeichenXu <WeichenXu123@outlook.com>
Date: Wed, 9 Aug 2017 14:44:10 +0800
Subject: [PATCH 1137/1204] [SPARK-21523][ML] update breeze to 0.13.2 for an
 emergency bugfix in strong wolfe line search

## What changes were proposed in this pull request?

Update breeze to 0.13.1 for an emergency bugfix in strong wolfe line search
https://github.com/scalanlp/breeze/pull/651

## How was this patch tested?

N/A

Author: WeichenXu <WeichenXu123@outlook.com>

Closes #18797 from WeichenXu123/update-breeze.

(cherry picked from commit b35660dd0e930f4b484a079d9e2516b0a7dacf1d)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 dev/deps/spark-deps-hadoop-2.6                     |  4 ++--
 dev/deps/spark-deps-hadoop-2.7                     |  4 ++--
 .../ml/regression/AFTSurvivalRegression.scala      |  2 ++
 .../ml/regression/AFTSurvivalRegressionSuite.scala |  1 -
 .../org/apache/spark/ml/util/MLTestingUtils.scala  |  1 -
 .../spark/mllib/optimization/LBFGSSuite.scala      |  4 ++--
 pom.xml                                            |  2 +-
 python/pyspark/ml/regression.py                    | 14 +++++++-------
 8 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6
index 9287bd47cf113..02c0b21f95856 100644
--- a/dev/deps/spark-deps-hadoop-2.6
+++ b/dev/deps/spark-deps-hadoop-2.6
@@ -19,8 +19,8 @@ avro-mapred-1.7.7-hadoop2.jar
 base64-2.3.8.jar
 bcprov-jdk15on-1.51.jar
 bonecp-0.8.0.RELEASE.jar
-breeze-macros_2.11-0.13.1.jar
-breeze_2.11-0.13.1.jar
+breeze-macros_2.11-0.13.2.jar
+breeze_2.11-0.13.2.jar
 calcite-avatica-1.2.0-incubating.jar
 calcite-core-1.2.0-incubating.jar
 calcite-linq4j-1.2.0-incubating.jar
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index ab1de3d3dd8ad..47e28de89c7e1 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -19,8 +19,8 @@ avro-mapred-1.7.7-hadoop2.jar
 base64-2.3.8.jar
 bcprov-jdk15on-1.51.jar
 bonecp-0.8.0.RELEASE.jar
-breeze-macros_2.11-0.13.1.jar
-breeze_2.11-0.13.1.jar
+breeze-macros_2.11-0.13.2.jar
+breeze_2.11-0.13.2.jar
 calcite-avatica-1.2.0-incubating.jar
 calcite-core-1.2.0-incubating.jar
 calcite-linq4j-1.2.0-incubating.jar
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
index 094853b6f4802..0891994530f88 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
@@ -553,6 +553,8 @@ private class AFTAggregator(
     val ti = data.label
     val delta = data.censor
 
+    require(ti > 0.0, "The lifetime or label should be  greater than 0.")
+
     val localFeaturesStd = bcFeaturesStd.value
 
     val margin = {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
index fb39e50a83552..02e5c6d294f44 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
@@ -364,7 +364,6 @@ class AFTSurvivalRegressionSuite
 
   test("should support all NumericType censors, and not support other types") {
     val df = spark.createDataFrame(Seq(
-      (0, Vectors.dense(0)),
       (1, Vectors.dense(1)),
       (2, Vectors.dense(2)),
       (3, Vectors.dense(3)),
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/MLTestingUtils.scala b/mllib/src/test/scala/org/apache/spark/ml/util/MLTestingUtils.scala
index bef79e634f75f..aef81c8c173a0 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/util/MLTestingUtils.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/util/MLTestingUtils.scala
@@ -160,7 +160,6 @@ object MLTestingUtils extends SparkFunSuite {
       featuresColName: String = "features",
       censorColName: String = "censor"): Map[NumericType, DataFrame] = {
     val df = spark.createDataFrame(Seq(
-      (0, Vectors.dense(0)),
       (1, Vectors.dense(1)),
       (2, Vectors.dense(2)),
       (3, Vectors.dense(3)),
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
index 3d6a9f8d84cac..69c303ee932e0 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala
@@ -191,8 +191,8 @@ class LBFGSSuite extends SparkFunSuite with MLlibTestSparkContext with Matchers
     // With smaller convergenceTol, it takes more steps.
     assert(lossLBFGS3.length > lossLBFGS2.length)
 
-    // Based on observation, lossLBFGS3 runs 7 iterations, no theoretically guaranteed.
-    assert(lossLBFGS3.length == 7)
+    // Based on observation, lossLBFGS3 runs 6 iterations, no theoretically guaranteed.
+    assert(lossLBFGS3.length == 6)
     assert((lossLBFGS3(4) - lossLBFGS3(5)) / lossLBFGS3(4) < convergenceTol)
   }
 
diff --git a/pom.xml b/pom.xml
index ccd8546a269c1..02815a19b0759 100644
--- a/pom.xml
+++ b/pom.xml
@@ -654,7 +654,7 @@
       <dependency>
         <groupId>org.scalanlp</groupId>
         <artifactId>breeze_${scala.binary.version}</artifactId>
-        <version>0.13.1</version>
+        <version>0.13.2</version>
         <exclusions>
           <!-- This is included as a compile-scoped dependency by jtransforms, which is
                a dependency of breeze. -->
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 2d17f95b0c44f..b94f34004e6f4 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -1115,7 +1115,7 @@ class AFTSurvivalRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame([
     ...     (1.0, Vectors.dense(1.0), 1.0),
-    ...     (0.0, Vectors.sparse(1, [], []), 0.0)], ["label", "features", "censor"])
+    ...     (1e-40, Vectors.sparse(1, [], []), 0.0)], ["label", "features", "censor"])
     >>> aftsr = AFTSurvivalRegression()
     >>> model = aftsr.fit(df)
     >>> model.predict(Vectors.dense(6.3))
@@ -1123,12 +1123,12 @@ class AFTSurvivalRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
     >>> model.predictQuantiles(Vectors.dense(6.3))
     DenseVector([0.0101, 0.0513, 0.1054, 0.2877, 0.6931, 1.3863, 2.3026, 2.9957, 4.6052])
     >>> model.transform(df).show()
-    +-----+---------+------+----------+
-    |label| features|censor|prediction|
-    +-----+---------+------+----------+
-    |  1.0|    [1.0]|   1.0|       1.0|
-    |  0.0|(1,[],[])|   0.0|       1.0|
-    +-----+---------+------+----------+
+    +-------+---------+------+----------+
+    |  label| features|censor|prediction|
+    +-------+---------+------+----------+
+    |    1.0|    [1.0]|   1.0|       1.0|
+    |1.0E-40|(1,[],[])|   0.0|       1.0|
+    +-------+---------+------+----------+
     ...
     >>> aftsr_path = temp_path + "/aftsr"
     >>> aftsr.save(aftsr_path)

From f6d56d2f1c377000921effea2b1faae15f9cae82 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Tue, 8 Aug 2017 23:49:33 -0700
Subject: [PATCH 1138/1204] [SPARK-21596][SS] Ensure places calling
 HDFSMetadataLog.get check the return value

Same PR as #18799 but for branch 2.2. Main discussion the other PR.
--------

When I was investigating a flaky test, I realized that many places don't check the return value of `HDFSMetadataLog.get(batchId: Long): Option[T]`. When a batch is supposed to be there, the caller just ignores None rather than throwing an error. If some bug causes a query doesn't generate a batch metadata file, this behavior will hide it and allow the query continuing to run and finally delete metadata logs and make it hard to debug.

This PR ensures that places calling HDFSMetadataLog.get always check the return value.

Jenkins

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #18890 from tdas/SPARK-21596-2.2.
---
 .../streaming/CompactibleFileStreamLog.scala  | 24 +++++---
 .../streaming/FileStreamSourceLog.scala       |  5 +-
 .../execution/streaming/HDFSMetadataLog.scala | 57 ++++++++++++++++++-
 .../execution/streaming/StreamExecution.scala | 17 ++++--
 .../streaming/HDFSMetadataLogSuite.scala      | 17 ++++++
 .../sql/streaming/FileStreamSourceSuite.scala |  1 +
 6 files changed, 104 insertions(+), 17 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala
index 408c8f81f17ba..77bc0ba5548dd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala
@@ -169,13 +169,15 @@ abstract class CompactibleFileStreamLog[T <: AnyRef : ClassTag](
    */
   private def compact(batchId: Long, logs: Array[T]): Boolean = {
     val validBatches = getValidBatchesBeforeCompactionBatch(batchId, compactInterval)
-    val allLogs = validBatches.flatMap(batchId => super.get(batchId)).flatten ++ logs
-    if (super.add(batchId, compactLogs(allLogs).toArray)) {
-      true
-    } else {
-      // Return false as there is another writer.
-      false
-    }
+    val allLogs = validBatches.map { id =>
+      super.get(id).getOrElse {
+        throw new IllegalStateException(
+          s"${batchIdToPath(id)} doesn't exist when compacting batch $batchId " +
+            s"(compactInterval: $compactInterval)")
+      }
+    }.flatten ++ logs
+    // Return false as there is another writer.
+    super.add(batchId, compactLogs(allLogs).toArray)
   }
 
   /**
@@ -190,7 +192,13 @@ abstract class CompactibleFileStreamLog[T <: AnyRef : ClassTag](
       if (latestId >= 0) {
         try {
           val logs =
-            getAllValidBatches(latestId, compactInterval).flatMap(id => super.get(id)).flatten
+            getAllValidBatches(latestId, compactInterval).map { id =>
+              super.get(id).getOrElse {
+                throw new IllegalStateException(
+                  s"${batchIdToPath(id)} doesn't exist " +
+                    s"(latestId: $latestId, compactInterval: $compactInterval)")
+              }
+            }.flatten
           return compactLogs(logs).toArray
         } catch {
           case e: IOException =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala
index 33e6a1d5d6e18..8628471fdb925 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala
@@ -115,7 +115,10 @@ class FileStreamSourceLog(
       Map.empty[Long, Option[Array[FileEntry]]]
     }
 
-    (existedBatches ++ retrievedBatches).map(i => i._1 -> i._2.get).toArray.sortBy(_._1)
+    val batches =
+      (existedBatches ++ retrievedBatches).map(i => i._1 -> i._2.get).toArray.sortBy(_._1)
+    HDFSMetadataLog.verifyBatchIds(batches.map(_._1), startId, endId)
+    batches
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
index 46bfc297931fb..5f8973fd09460 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
@@ -123,7 +123,7 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
           serialize(metadata, output)
           return Some(tempPath)
         } finally {
-          IOUtils.closeQuietly(output)
+          output.close()
         }
       } catch {
         case e: FileAlreadyExistsException =>
@@ -211,13 +211,17 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
   }
 
   override def get(startId: Option[Long], endId: Option[Long]): Array[(Long, T)] = {
+    assert(startId.isEmpty || endId.isEmpty || startId.get <= endId.get)
     val files = fileManager.list(metadataPath, batchFilesFilter)
     val batchIds = files
       .map(f => pathToBatchId(f.getPath))
       .filter { batchId =>
         (endId.isEmpty || batchId <= endId.get) && (startId.isEmpty || batchId >= startId.get)
-    }
-    batchIds.sorted.map(batchId => (batchId, get(batchId))).filter(_._2.isDefined).map {
+    }.sorted
+
+    verifyBatchIds(batchIds, startId, endId)
+
+    batchIds.map(batchId => (batchId, get(batchId))).filter(_._2.isDefined).map {
       case (batchId, metadataOption) =>
         (batchId, metadataOption.get)
     }
@@ -437,4 +441,51 @@ object HDFSMetadataLog {
       }
     }
   }
+
+  /**
+   * Verify if batchIds are continuous and between `startId` and `endId`.
+   *
+   * @param batchIds the sorted ids to verify.
+   * @param startId the start id. If it's set, batchIds should start with this id.
+   * @param endId the start id. If it's set, batchIds should end with this id.
+   */
+  def verifyBatchIds(batchIds: Seq[Long], startId: Option[Long], endId: Option[Long]): Unit = {
+    // Verify that we can get all batches between `startId` and `endId`.
+    if (startId.isDefined || endId.isDefined) {
+      if (batchIds.isEmpty) {
+        throw new IllegalStateException(s"batch ${startId.orElse(endId).get} doesn't exist")
+      }
+      if (startId.isDefined) {
+        val minBatchId = batchIds.head
+        assert(minBatchId >= startId.get)
+        if (minBatchId != startId.get) {
+          val missingBatchIds = startId.get to minBatchId
+          throw new IllegalStateException(
+            s"batches (${missingBatchIds.mkString(", ")}) don't exist " +
+              s"(startId: $startId, endId: $endId)")
+        }
+      }
+
+      if (endId.isDefined) {
+        val maxBatchId = batchIds.last
+        assert(maxBatchId <= endId.get)
+        if (maxBatchId != endId.get) {
+          val missingBatchIds = maxBatchId to endId.get
+          throw new IllegalStateException(
+            s"batches (${missingBatchIds.mkString(", ")}) don't  exist " +
+              s"(startId: $startId, endId: $endId)")
+        }
+      }
+    }
+
+    if (batchIds.nonEmpty) {
+      val minBatchId = batchIds.head
+      val maxBatchId = batchIds.last
+      val missingBatchIds = (minBatchId to maxBatchId).toSet -- batchIds
+      if (missingBatchIds.nonEmpty) {
+        throw new IllegalStateException(s"batches (${missingBatchIds.mkString(", ")}) " +
+          s"don't exist (startId: $startId, endId: $endId)")
+      }
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 63c4dc17fddc5..16db353eef54c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -429,7 +429,10 @@ class StreamExecution(
         availableOffsets = nextOffsets.toStreamProgress(sources)
         /* Initialize committed offsets to a committed batch, which at this
          * is the second latest batch id in the offset log. */
-        offsetLog.get(latestBatchId - 1).foreach { secondLatestBatchId =>
+        if (latestBatchId != 0) {
+          val secondLatestBatchId = offsetLog.get(latestBatchId - 1).getOrElse {
+            throw new IllegalStateException(s"batch ${latestBatchId - 1} doesn't exist")
+          }
           committedOffsets = secondLatestBatchId.toStreamProgress(sources)
         }
 
@@ -568,10 +571,14 @@ class StreamExecution(
 
         // Now that we've updated the scheduler's persistent checkpoint, it is safe for the
         // sources to discard data from the previous batch.
-        val prevBatchOff = offsetLog.get(currentBatchId - 1)
-        if (prevBatchOff.isDefined) {
-          prevBatchOff.get.toStreamProgress(sources).foreach {
-            case (src, off) => src.commit(off)
+        if (currentBatchId != 0) {
+          val prevBatchOff = offsetLog.get(currentBatchId - 1)
+          if (prevBatchOff.isDefined) {
+            prevBatchOff.get.toStreamProgress(sources).foreach {
+              case (src, off) => src.commit(off)
+            }
+          } else {
+            throw new IllegalStateException(s"batch $currentBatchId doesn't exist")
           }
         }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala
index 7689bc03a4ccf..48e70e48b1799 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala
@@ -259,6 +259,23 @@ class HDFSMetadataLogSuite extends SparkFunSuite with SharedSQLContext {
       fm.rename(path2, path3)
     }
   }
+
+  test("verifyBatchIds") {
+    import HDFSMetadataLog.verifyBatchIds
+    verifyBatchIds(Seq(1L, 2L, 3L), Some(1L), Some(3L))
+    verifyBatchIds(Seq(1L), Some(1L), Some(1L))
+    verifyBatchIds(Seq(1L, 2L, 3L), None, Some(3L))
+    verifyBatchIds(Seq(1L, 2L, 3L), Some(1L), None)
+    verifyBatchIds(Seq(1L, 2L, 3L), None, None)
+
+    intercept[IllegalStateException](verifyBatchIds(Seq(), Some(1L), None))
+    intercept[IllegalStateException](verifyBatchIds(Seq(), None, Some(1L)))
+    intercept[IllegalStateException](verifyBatchIds(Seq(), Some(1L), Some(1L)))
+    intercept[IllegalStateException](verifyBatchIds(Seq(2, 3, 4), Some(1L), None))
+    intercept[IllegalStateException](verifyBatchIds(Seq(2, 3, 4), None, Some(5L)))
+    intercept[IllegalStateException](verifyBatchIds(Seq(2, 3, 4), Some(1L), Some(5L)))
+    intercept[IllegalStateException](verifyBatchIds(Seq(1, 2, 4, 5), Some(1L), Some(5L)))
+  }
 }
 
 /** FakeFileSystem to test fallback of the HDFSMetadataLog from FileContext to FileSystem API */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index 2108b118bf059..e2ec690d90e52 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -1314,6 +1314,7 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
     val metadataLog =
       new FileStreamSourceLog(FileStreamSourceLog.VERSION, spark, dir.getAbsolutePath)
       assert(metadataLog.add(0, Array(FileEntry(s"$scheme:///file1", 100L, 0))))
+      assert(metadataLog.add(1, Array(FileEntry(s"$scheme:///file2", 200L, 0))))
 
       val newSource = new FileStreamSource(spark, s"$scheme:///", "parquet", StructType(Nil), Nil,
         dir.getAbsolutePath, Map.empty)

From 3ca55eaafee8f4216eb5466021a97604713033a1 Mon Sep 17 00:00:00 2001
From: 10087686 <wang.jiaochun@zte.com.cn>
Date: Wed, 9 Aug 2017 18:45:38 +0800
Subject: [PATCH 1139/1204] [SPARK-21663][TESTS] test("remote fetch below max
 RPC message size") should call masterTracker.stop() in MapOutputTrackerSuite
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: 10087686 <wang.jiaochunzte.com.cn>

## What changes were proposed in this pull request?
After Unit tests end，there should be call masterTracker.stop() to free resource;
(Please fill in changes proposed in this fix)

## How was this patch tested?
Run Unit tests;
(Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests)
(If this patch involves UI changes, please attach a screenshot; otherwise, remove this)

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: 10087686 <wang.jiaochun@zte.com.cn>

Closes #18867 from wangjiaochun/mapout.

(cherry picked from commit 6426adffaf152651c30d481bb925d5025fd6130a)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../test/scala/org/apache/spark/MapOutputTrackerSuite.scala  | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
index 71bedda5ac894..ca94fd1560b3f 100644
--- a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
@@ -175,7 +175,8 @@ class MapOutputTrackerSuite extends SparkFunSuite {
     val masterTracker = newTrackerMaster(newConf)
     val rpcEnv = createRpcEnv("spark")
     val masterEndpoint = new MapOutputTrackerMasterEndpoint(rpcEnv, masterTracker, newConf)
-    rpcEnv.setupEndpoint(MapOutputTracker.ENDPOINT_NAME, masterEndpoint)
+    masterTracker.trackerEndpoint =
+      rpcEnv.setupEndpoint(MapOutputTracker.ENDPOINT_NAME, masterEndpoint)
 
     // Message size should be ~123B, and no exception should be thrown
     masterTracker.registerShuffle(10, 1)
@@ -190,7 +191,7 @@ class MapOutputTrackerSuite extends SparkFunSuite {
     verify(rpcCallContext, timeout(30000)).reply(any())
     assert(0 == masterTracker.getNumCachedSerializedBroadcast)
 
-//    masterTracker.stop() // this throws an exception
+    masterTracker.stop()
     rpcEnv.shutdown()
   }
 

From c909496983314b48dd4d8587e586b553b04ff0ce Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Thu, 10 Aug 2017 18:56:25 -0700
Subject: [PATCH 1140/1204] [SPARK-21699][SQL] Remove unused getTableOption in
 ExternalCatalog

## What changes were proposed in this pull request?
This patch removes the unused SessionCatalog.getTableMetadataOption and ExternalCatalog. getTableOption.

## How was this patch tested?
Removed the test case.

Author: Reynold Xin <rxin@databricks.com>

Closes #18912 from rxin/remove-getTableOption.

(cherry picked from commit 584c7f14370cdfafdc6cd554b2760b7ce7709368)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../sql/catalyst/catalog/ExternalCatalog.scala  |  2 --
 .../sql/catalyst/catalog/InMemoryCatalog.scala  |  4 ----
 .../sql/catalyst/catalog/SessionCatalog.scala   | 17 +++--------------
 .../catalyst/catalog/SessionCatalogSuite.scala  | 11 -----------
 .../spark/sql/hive/HiveExternalCatalog.scala    |  4 ----
 5 files changed, 3 insertions(+), 35 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
index 974ef900e2eed..18644b088c12b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
@@ -162,8 +162,6 @@ abstract class ExternalCatalog
 
   def getTable(db: String, table: String): CatalogTable
 
-  def getTableOption(db: String, table: String): Option[CatalogTable]
-
   def tableExists(db: String, table: String): Boolean
 
   def listTables(db: String): Seq[String]
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
index 864ee485d8f0d..bf8542c716eba 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
@@ -315,10 +315,6 @@ class InMemoryCatalog(
     catalog(db).tables(table).table
   }
 
-  override def getTableOption(db: String, table: String): Option[CatalogTable] = synchronized {
-    if (!tableExists(db, table)) None else Option(catalog(db).tables(table).table)
-  }
-
   override def tableExists(db: String, table: String): Boolean = synchronized {
     requireDbExists(db)
     catalog(db).tables.contains(table)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 57006bfaf9b69..8d9fb4c452fc9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -388,9 +388,10 @@ class SessionCatalog(
 
   /**
    * Retrieve the metadata of an existing permanent table/view. If no database is specified,
-   * assume the table/view is in the current database. If the specified table/view is not found
-   * in the database then a [[NoSuchTableException]] is thrown.
+   * assume the table/view is in the current database.
    */
+  @throws[NoSuchDatabaseException]
+  @throws[NoSuchTableException]
   def getTableMetadata(name: TableIdentifier): CatalogTable = {
     val db = formatDatabaseName(name.database.getOrElse(getCurrentDatabase))
     val table = formatTableName(name.table)
@@ -399,18 +400,6 @@ class SessionCatalog(
     externalCatalog.getTable(db, table)
   }
 
-  /**
-   * Retrieve the metadata of an existing metastore table.
-   * If no database is specified, assume the table is in the current database.
-   * If the specified table is not found in the database then return None if it doesn't exist.
-   */
-  def getTableMetadataOption(name: TableIdentifier): Option[CatalogTable] = {
-    val db = formatDatabaseName(name.database.getOrElse(getCurrentDatabase))
-    val table = formatTableName(name.table)
-    requireDbExists(db)
-    externalCatalog.getTableOption(db, table)
-  }
-
   /**
    * Load files stored in given path into an existing metastore table.
    * If no database is specified, assume the table is in the current database.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index be8903000a0d1..5ee729ef5cfad 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -498,17 +498,6 @@ abstract class SessionCatalogSuite extends PlanTest {
     }
   }
 
-  test("get option of table metadata") {
-    withBasicCatalog { catalog =>
-      assert(catalog.getTableMetadataOption(TableIdentifier("tbl1", Some("db2")))
-        == Option(catalog.externalCatalog.getTable("db2", "tbl1")))
-      assert(catalog.getTableMetadataOption(TableIdentifier("unknown_table", Some("db2"))).isEmpty)
-      intercept[NoSuchDatabaseException] {
-        catalog.getTableMetadataOption(TableIdentifier("tbl1", Some("unknown_db")))
-      }
-    }
-  }
-
   test("lookup table relation") {
     withBasicCatalog { catalog =>
       val tempTable1 = Range(1, 10, 1, 10)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 6b0f2b4cea385..9fea0c6249700 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -648,10 +648,6 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     restoreTableMetadata(getRawTable(db, table))
   }
 
-  override def getTableOption(db: String, table: String): Option[CatalogTable] = withClient {
-    client.getTableOption(db, table).map(restoreTableMetadata)
-  }
-
   /**
    * Restores table metadata from the table properties. This method is kind of a opposite version
    * of [[createTable]].

From 406eb1c2ee670c2f14f2737c32c9aa0b8d35bf7c Mon Sep 17 00:00:00 2001
From: Tejas Patil <tejasp@fb.com>
Date: Fri, 11 Aug 2017 22:01:00 +0200
Subject: [PATCH 1141/1204] [SPARK-21595] Separate thresholds for buffering and
 spilling in ExternalAppendOnlyUnsafeRowArray

## What changes were proposed in this pull request?

[SPARK-21595](https://issues.apache.org/jira/browse/SPARK-21595) reported that there is excessive spilling to disk due to default spill threshold for `ExternalAppendOnlyUnsafeRowArray` being quite small for WINDOW operator. Old behaviour of WINDOW operator (pre https://github.com/apache/spark/pull/16909) would hold data in an array for first 4096 records post which it would switch to `UnsafeExternalSorter` and start spilling to disk after reaching `spark.shuffle.spill.numElementsForceSpillThreshold` (or earlier if there was paucity of memory due to excessive consumers).

Currently the (switch from in-memory to `UnsafeExternalSorter`) and (`UnsafeExternalSorter` spilling to disk) for `ExternalAppendOnlyUnsafeRowArray` is controlled by a single threshold. This PR aims to separate that to have more granular control.

## How was this patch tested?

Added unit tests

Author: Tejas Patil <tejasp@fb.com>

Closes #18843 from tejasapatil/SPARK-21595.

(cherry picked from commit 94439997d57875838a8283c543f9b44705d3a503)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../apache/spark/sql/internal/SQLConf.scala   |  41 ++++++-
 .../ExternalAppendOnlyUnsafeRowArray.scala    |  28 ++---
 .../joins/CartesianProductExec.scala          |  12 +-
 .../execution/joins/SortMergeJoinExec.scala   |  24 +++-
 .../sql/execution/window/WindowExec.scala     |   4 +-
 .../org/apache/spark/sql/JoinSuite.scala      |   3 +-
 ...nalAppendOnlyUnsafeRowArrayBenchmark.scala |   7 +-
 ...xternalAppendOnlyUnsafeRowArraySuite.scala | 103 +++++++++++-------
 .../execution/SQLWindowFunctionSuite.scala    |   3 +-
 9 files changed, 155 insertions(+), 70 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 0e2259487d2c1..79398fb3ec78f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -774,24 +774,47 @@ object SQLConf {
       .stringConf
       .createWithDefaultFunction(() => TimeZone.getDefault.getID)
 
+  val WINDOW_EXEC_BUFFER_IN_MEMORY_THRESHOLD =
+    buildConf("spark.sql.windowExec.buffer.in.memory.threshold")
+      .internal()
+      .doc("Threshold for number of rows guaranteed to be held in memory by the window operator")
+      .intConf
+      .createWithDefault(4096)
+
   val WINDOW_EXEC_BUFFER_SPILL_THRESHOLD =
     buildConf("spark.sql.windowExec.buffer.spill.threshold")
       .internal()
-      .doc("Threshold for number of rows buffered in window operator")
+      .doc("Threshold for number of rows to be spilled by window operator")
       .intConf
-      .createWithDefault(4096)
+      .createWithDefault(UnsafeExternalSorter.DEFAULT_NUM_ELEMENTS_FOR_SPILL_THRESHOLD.toInt)
+
+  val SORT_MERGE_JOIN_EXEC_BUFFER_IN_MEMORY_THRESHOLD =
+    buildConf("spark.sql.sortMergeJoinExec.buffer.in.memory.threshold")
+      .internal()
+      .doc("Threshold for number of rows guaranteed to be held in memory by the sort merge " +
+        "join operator")
+      .intConf
+      .createWithDefault(Int.MaxValue)
 
   val SORT_MERGE_JOIN_EXEC_BUFFER_SPILL_THRESHOLD =
     buildConf("spark.sql.sortMergeJoinExec.buffer.spill.threshold")
       .internal()
-      .doc("Threshold for number of rows buffered in sort merge join operator")
+      .doc("Threshold for number of rows to be spilled by sort merge join operator")
       .intConf
-      .createWithDefault(Int.MaxValue)
+      .createWithDefault(UnsafeExternalSorter.DEFAULT_NUM_ELEMENTS_FOR_SPILL_THRESHOLD.toInt)
+
+  val CARTESIAN_PRODUCT_EXEC_BUFFER_IN_MEMORY_THRESHOLD =
+    buildConf("spark.sql.cartesianProductExec.buffer.in.memory.threshold")
+      .internal()
+      .doc("Threshold for number of rows guaranteed to be held in memory by the cartesian " +
+        "product operator")
+      .intConf
+      .createWithDefault(4096)
 
   val CARTESIAN_PRODUCT_EXEC_BUFFER_SPILL_THRESHOLD =
     buildConf("spark.sql.cartesianProductExec.buffer.spill.threshold")
       .internal()
-      .doc("Threshold for number of rows buffered in cartesian product operator")
+      .doc("Threshold for number of rows to be spilled by cartesian product operator")
       .intConf
       .createWithDefault(UnsafeExternalSorter.DEFAULT_NUM_ELEMENTS_FOR_SPILL_THRESHOLD.toInt)
 
@@ -1037,11 +1060,19 @@ class SQLConf extends Serializable with Logging {
 
   def joinReorderDPStarFilter: Boolean = getConf(SQLConf.JOIN_REORDER_DP_STAR_FILTER)
 
+  def windowExecBufferInMemoryThreshold: Int = getConf(WINDOW_EXEC_BUFFER_IN_MEMORY_THRESHOLD)
+
   def windowExecBufferSpillThreshold: Int = getConf(WINDOW_EXEC_BUFFER_SPILL_THRESHOLD)
 
+  def sortMergeJoinExecBufferInMemoryThreshold: Int =
+    getConf(SORT_MERGE_JOIN_EXEC_BUFFER_IN_MEMORY_THRESHOLD)
+
   def sortMergeJoinExecBufferSpillThreshold: Int =
     getConf(SORT_MERGE_JOIN_EXEC_BUFFER_SPILL_THRESHOLD)
 
+  def cartesianProductExecBufferInMemoryThreshold: Int =
+    getConf(CARTESIAN_PRODUCT_EXEC_BUFFER_IN_MEMORY_THRESHOLD)
+
   def cartesianProductExecBufferSpillThreshold: Int =
     getConf(CARTESIAN_PRODUCT_EXEC_BUFFER_SPILL_THRESHOLD)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala
index 458ac4ba3637c..01c9c65e5399d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala
@@ -31,16 +31,16 @@ import org.apache.spark.storage.BlockManager
 import org.apache.spark.util.collection.unsafe.sort.{UnsafeExternalSorter, UnsafeSorterIterator}
 
 /**
- * An append-only array for [[UnsafeRow]]s that spills content to disk when there a predefined
- * threshold of rows is reached.
+ * An append-only array for [[UnsafeRow]]s that strictly keeps content in an in-memory array
+ * until [[numRowsInMemoryBufferThreshold]] is reached post which it will switch to a mode which
+ * would flush to disk after [[numRowsSpillThreshold]] is met (or before if there is
+ * excessive memory consumption). Setting these threshold involves following trade-offs:
  *
- * Setting spill threshold faces following trade-off:
- *
- * - If the spill threshold is too high, the in-memory array may occupy more memory than is
- *   available, resulting in OOM.
- * - If the spill threshold is too low, we spill frequently and incur unnecessary disk writes.
- *   This may lead to a performance regression compared to the normal case of using an
- *   [[ArrayBuffer]] or [[Array]].
+ * - If [[numRowsInMemoryBufferThreshold]] is too high, the in-memory array may occupy more memory
+ *   than is available, resulting in OOM.
+ * - If [[numRowsSpillThreshold]] is too low, data will be spilled frequently and lead to
+ *   excessive disk writes. This may lead to a performance regression compared to the normal case
+ *   of using an [[ArrayBuffer]] or [[Array]].
  */
 private[sql] class ExternalAppendOnlyUnsafeRowArray(
     taskMemoryManager: TaskMemoryManager,
@@ -49,9 +49,10 @@ private[sql] class ExternalAppendOnlyUnsafeRowArray(
     taskContext: TaskContext,
     initialSize: Int,
     pageSizeBytes: Long,
+    numRowsInMemoryBufferThreshold: Int,
     numRowsSpillThreshold: Int) extends Logging {
 
-  def this(numRowsSpillThreshold: Int) {
+  def this(numRowsInMemoryBufferThreshold: Int, numRowsSpillThreshold: Int) {
     this(
       TaskContext.get().taskMemoryManager(),
       SparkEnv.get.blockManager,
@@ -59,11 +60,12 @@ private[sql] class ExternalAppendOnlyUnsafeRowArray(
       TaskContext.get(),
       1024,
       SparkEnv.get.memoryManager.pageSizeBytes,
+      numRowsInMemoryBufferThreshold,
       numRowsSpillThreshold)
   }
 
   private val initialSizeOfInMemoryBuffer =
-    Math.min(DefaultInitialSizeOfInMemoryBuffer, numRowsSpillThreshold)
+    Math.min(DefaultInitialSizeOfInMemoryBuffer, numRowsInMemoryBufferThreshold)
 
   private val inMemoryBuffer = if (initialSizeOfInMemoryBuffer > 0) {
     new ArrayBuffer[UnsafeRow](initialSizeOfInMemoryBuffer)
@@ -102,11 +104,11 @@ private[sql] class ExternalAppendOnlyUnsafeRowArray(
   }
 
   def add(unsafeRow: UnsafeRow): Unit = {
-    if (numRows < numRowsSpillThreshold) {
+    if (numRows < numRowsInMemoryBufferThreshold) {
       inMemoryBuffer += unsafeRow.copy()
     } else {
       if (spillableArray == null) {
-        logInfo(s"Reached spill threshold of $numRowsSpillThreshold rows, switching to " +
+        logInfo(s"Reached spill threshold of $numRowsInMemoryBufferThreshold rows, switching to " +
           s"${classOf[UnsafeExternalSorter].getName}")
 
         // We will not sort the rows, so prefixComparator and recordComparator are null
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProductExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProductExec.scala
index f380986951317..4d261dd422bc5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProductExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProductExec.scala
@@ -35,11 +35,12 @@ class UnsafeCartesianRDD(
     left : RDD[UnsafeRow],
     right : RDD[UnsafeRow],
     numFieldsOfRight: Int,
+    inMemoryBufferThreshold: Int,
     spillThreshold: Int)
   extends CartesianRDD[UnsafeRow, UnsafeRow](left.sparkContext, left, right) {
 
   override def compute(split: Partition, context: TaskContext): Iterator[(UnsafeRow, UnsafeRow)] = {
-    val rowArray = new ExternalAppendOnlyUnsafeRowArray(spillThreshold)
+    val rowArray = new ExternalAppendOnlyUnsafeRowArray(inMemoryBufferThreshold, spillThreshold)
 
     val partition = split.asInstanceOf[CartesianPartition]
     rdd2.iterator(partition.s2, context).foreach(rowArray.add)
@@ -71,9 +72,12 @@ case class CartesianProductExec(
     val leftResults = left.execute().asInstanceOf[RDD[UnsafeRow]]
     val rightResults = right.execute().asInstanceOf[RDD[UnsafeRow]]
 
-    val spillThreshold = sqlContext.conf.cartesianProductExecBufferSpillThreshold
-
-    val pair = new UnsafeCartesianRDD(leftResults, rightResults, right.output.size, spillThreshold)
+    val pair = new UnsafeCartesianRDD(
+      leftResults,
+      rightResults,
+      right.output.size,
+      sqlContext.conf.cartesianProductExecBufferInMemoryThreshold,
+      sqlContext.conf.cartesianProductExecBufferSpillThreshold)
     pair.mapPartitionsWithIndexInternal { (index, iter) =>
       val joiner = GenerateUnsafeRowJoiner.create(left.schema, right.schema)
       val filtered = if (condition.isDefined) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
index bd5b633119a04..70dada8b63ae9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@@ -130,9 +130,14 @@ case class SortMergeJoinExec(
     sqlContext.conf.sortMergeJoinExecBufferSpillThreshold
   }
 
+  private def getInMemoryThreshold: Int = {
+    sqlContext.conf.sortMergeJoinExecBufferInMemoryThreshold
+  }
+
   protected override def doExecute(): RDD[InternalRow] = {
     val numOutputRows = longMetric("numOutputRows")
     val spillThreshold = getSpillThreshold
+    val inMemoryThreshold = getInMemoryThreshold
     left.execute().zipPartitions(right.execute()) { (leftIter, rightIter) =>
       val boundCondition: (InternalRow) => Boolean = {
         condition.map { cond =>
@@ -158,6 +163,7 @@ case class SortMergeJoinExec(
               keyOrdering,
               RowIterator.fromScala(leftIter),
               RowIterator.fromScala(rightIter),
+              inMemoryThreshold,
               spillThreshold
             )
             private[this] val joinRow = new JoinedRow
@@ -201,6 +207,7 @@ case class SortMergeJoinExec(
             keyOrdering,
             streamedIter = RowIterator.fromScala(leftIter),
             bufferedIter = RowIterator.fromScala(rightIter),
+            inMemoryThreshold,
             spillThreshold
           )
           val rightNullRow = new GenericInternalRow(right.output.length)
@@ -214,6 +221,7 @@ case class SortMergeJoinExec(
             keyOrdering,
             streamedIter = RowIterator.fromScala(rightIter),
             bufferedIter = RowIterator.fromScala(leftIter),
+            inMemoryThreshold,
             spillThreshold
           )
           val leftNullRow = new GenericInternalRow(left.output.length)
@@ -247,6 +255,7 @@ case class SortMergeJoinExec(
               keyOrdering,
               RowIterator.fromScala(leftIter),
               RowIterator.fromScala(rightIter),
+              inMemoryThreshold,
               spillThreshold
             )
             private[this] val joinRow = new JoinedRow
@@ -281,6 +290,7 @@ case class SortMergeJoinExec(
               keyOrdering,
               RowIterator.fromScala(leftIter),
               RowIterator.fromScala(rightIter),
+              inMemoryThreshold,
               spillThreshold
             )
             private[this] val joinRow = new JoinedRow
@@ -322,6 +332,7 @@ case class SortMergeJoinExec(
               keyOrdering,
               RowIterator.fromScala(leftIter),
               RowIterator.fromScala(rightIter),
+              inMemoryThreshold,
               spillThreshold
             )
             private[this] val joinRow = new JoinedRow
@@ -420,8 +431,10 @@ case class SortMergeJoinExec(
     val clsName = classOf[ExternalAppendOnlyUnsafeRowArray].getName
 
     val spillThreshold = getSpillThreshold
+    val inMemoryThreshold = getInMemoryThreshold
 
-    ctx.addMutableState(clsName, matches, s"$matches = new $clsName($spillThreshold);")
+    ctx.addMutableState(clsName, matches,
+      s"$matches = new $clsName($inMemoryThreshold, $spillThreshold);")
     // Copy the left keys as class members so they could be used in next function call.
     val matchedKeyVars = copyKeys(ctx, leftKeyVars)
 
@@ -626,6 +639,9 @@ case class SortMergeJoinExec(
  * @param streamedIter an input whose rows will be streamed.
  * @param bufferedIter an input whose rows will be buffered to construct sequences of rows that
  *                     have the same join key.
+ * @param inMemoryThreshold Threshold for number of rows guaranteed to be held in memory by
+ *                          internal buffer
+ * @param spillThreshold Threshold for number of rows to be spilled by internal buffer
  */
 private[joins] class SortMergeJoinScanner(
     streamedKeyGenerator: Projection,
@@ -633,7 +649,8 @@ private[joins] class SortMergeJoinScanner(
     keyOrdering: Ordering[InternalRow],
     streamedIter: RowIterator,
     bufferedIter: RowIterator,
-    bufferThreshold: Int) {
+    inMemoryThreshold: Int,
+    spillThreshold: Int) {
   private[this] var streamedRow: InternalRow = _
   private[this] var streamedRowKey: InternalRow = _
   private[this] var bufferedRow: InternalRow = _
@@ -644,7 +661,8 @@ private[joins] class SortMergeJoinScanner(
    */
   private[this] var matchJoinKey: InternalRow = _
   /** Buffered rows from the buffered side of the join. This is empty if there are no matches. */
-  private[this] val bufferedMatches = new ExternalAppendOnlyUnsafeRowArray(bufferThreshold)
+  private[this] val bufferedMatches =
+    new ExternalAppendOnlyUnsafeRowArray(inMemoryThreshold, spillThreshold)
 
   // Initialization (note: do _not_ want to advance streamed here).
   advancedBufferedToRowWithNullFreeJoinKey()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
index 950a6794a74a3..b9c932ae21727 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
@@ -282,6 +282,7 @@ case class WindowExec(
     // Unwrap the expressions and factories from the map.
     val expressions = windowFrameExpressionFactoryPairs.flatMap(_._1)
     val factories = windowFrameExpressionFactoryPairs.map(_._2).toArray
+    val inMemoryThreshold = sqlContext.conf.windowExecBufferInMemoryThreshold
     val spillThreshold = sqlContext.conf.windowExecBufferSpillThreshold
 
     // Start processing.
@@ -312,7 +313,8 @@ case class WindowExec(
         val inputFields = child.output.length
 
         val buffer: ExternalAppendOnlyUnsafeRowArray =
-          new ExternalAppendOnlyUnsafeRowArray(spillThreshold)
+          new ExternalAppendOnlyUnsafeRowArray(inMemoryThreshold, spillThreshold)
+
         var bufferIterator: Iterator[UnsafeRow] = _
 
         val windowFunctionResult = new SpecificInternalRow(expressions.map(_.dataType))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
index 1a66aa85f5a02..40bc1e9f13f81 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -665,7 +665,8 @@ class JoinSuite extends QueryTest with SharedSQLContext {
 
   test("test SortMergeJoin (with spill)") {
     withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "1",
-      "spark.sql.sortMergeJoinExec.buffer.spill.threshold" -> "0") {
+      "spark.sql.sortMergeJoinExec.buffer.in.memory.threshold" -> "0",
+      "spark.sql.sortMergeJoinExec.buffer.spill.threshold" -> "1") {
 
       assertSpilled(sparkContext, "inner join") {
         checkAnswer(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArrayBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArrayBenchmark.scala
index 00c5f2550cbb1..a5adc3639ad64 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArrayBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArrayBenchmark.scala
@@ -67,7 +67,10 @@ object ExternalAppendOnlyUnsafeRowArrayBenchmark {
     benchmark.addCase("ExternalAppendOnlyUnsafeRowArray") { _: Int =>
       var sum = 0L
       for (_ <- 0L until iterations) {
-        val array = new ExternalAppendOnlyUnsafeRowArray(numSpillThreshold)
+        val array = new ExternalAppendOnlyUnsafeRowArray(
+          ExternalAppendOnlyUnsafeRowArray.DefaultInitialSizeOfInMemoryBuffer,
+          numSpillThreshold)
+
         rows.foreach(x => array.add(x))
 
         val iterator = array.generateIterator()
@@ -143,7 +146,7 @@ object ExternalAppendOnlyUnsafeRowArrayBenchmark {
     benchmark.addCase("ExternalAppendOnlyUnsafeRowArray") { _: Int =>
       var sum = 0L
       for (_ <- 0L until iterations) {
-        val array = new ExternalAppendOnlyUnsafeRowArray(numSpillThreshold)
+        val array = new ExternalAppendOnlyUnsafeRowArray(numSpillThreshold, numSpillThreshold)
         rows.foreach(x => array.add(x))
 
         val iterator = array.generateIterator()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArraySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArraySuite.scala
index 53c41639942b4..ecc7264d79442 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArraySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArraySuite.scala
@@ -31,7 +31,7 @@ class ExternalAppendOnlyUnsafeRowArraySuite extends SparkFunSuite with LocalSpar
 
   override def afterAll(): Unit = TaskContext.unset()
 
-  private def withExternalArray(spillThreshold: Int)
+  private def withExternalArray(inMemoryThreshold: Int, spillThreshold: Int)
                                (f: ExternalAppendOnlyUnsafeRowArray => Unit): Unit = {
     sc = new SparkContext("local", "test", new SparkConf(false))
 
@@ -45,6 +45,7 @@ class ExternalAppendOnlyUnsafeRowArraySuite extends SparkFunSuite with LocalSpar
       taskContext,
       1024,
       SparkEnv.get.memoryManager.pageSizeBytes,
+      inMemoryThreshold,
       spillThreshold)
     try f(array) finally {
       array.clear()
@@ -109,9 +110,9 @@ class ExternalAppendOnlyUnsafeRowArraySuite extends SparkFunSuite with LocalSpar
     assert(getNumBytesSpilled > 0)
   }
 
-  test("insert rows less than the spillThreshold") {
-    val spillThreshold = 100
-    withExternalArray(spillThreshold) { array =>
+  test("insert rows less than the inMemoryThreshold") {
+    val (inMemoryThreshold, spillThreshold) = (100, 50)
+    withExternalArray(inMemoryThreshold, spillThreshold) { array =>
       assert(array.isEmpty)
 
       val expectedValues = populateRows(array, 1)
@@ -122,8 +123,8 @@ class ExternalAppendOnlyUnsafeRowArraySuite extends SparkFunSuite with LocalSpar
 
       // Add more rows (but not too many to trigger switch to [[UnsafeExternalSorter]])
       // Verify that NO spill has happened
-      populateRows(array, spillThreshold - 1, expectedValues)
-      assert(array.length == spillThreshold)
+      populateRows(array, inMemoryThreshold - 1, expectedValues)
+      assert(array.length == inMemoryThreshold)
       assertNoSpill()
 
       val iterator2 = validateData(array, expectedValues)
@@ -133,20 +134,42 @@ class ExternalAppendOnlyUnsafeRowArraySuite extends SparkFunSuite with LocalSpar
     }
   }
 
-  test("insert rows more than the spillThreshold to force spill") {
-    val spillThreshold = 100
-    withExternalArray(spillThreshold) { array =>
-      val numValuesInserted = 20 * spillThreshold
-
+  test("insert rows more than the inMemoryThreshold but less than spillThreshold") {
+    val (inMemoryThreshold, spillThreshold) = (10, 50)
+    withExternalArray(inMemoryThreshold, spillThreshold) { array =>
       assert(array.isEmpty)
-      val expectedValues = populateRows(array, 1)
-      assert(array.length == 1)
+      val expectedValues = populateRows(array, inMemoryThreshold - 1)
+      assert(array.length == (inMemoryThreshold - 1))
+      val iterator1 = validateData(array, expectedValues)
+      assertNoSpill()
+
+      // Add more rows to trigger switch to [[UnsafeExternalSorter]] but not too many to cause a
+      // spill to happen. Verify that NO spill has happened
+      populateRows(array, spillThreshold - expectedValues.length - 1, expectedValues)
+      assert(array.length == spillThreshold - 1)
+      assertNoSpill()
+
+      val iterator2 = validateData(array, expectedValues)
+      assert(!iterator2.hasNext)
 
+      assert(!iterator1.hasNext)
+      intercept[ConcurrentModificationException](iterator1.next())
+    }
+  }
+
+  test("insert rows enough to force spill") {
+    val (inMemoryThreshold, spillThreshold) = (20, 10)
+    withExternalArray(inMemoryThreshold, spillThreshold) { array =>
+      assert(array.isEmpty)
+      val expectedValues = populateRows(array, inMemoryThreshold - 1)
+      assert(array.length == (inMemoryThreshold - 1))
       val iterator1 = validateData(array, expectedValues)
+      assertNoSpill()
 
-      // Populate more rows to trigger spill. Verify that spill has happened
-      populateRows(array, numValuesInserted - 1, expectedValues)
-      assert(array.length == numValuesInserted)
+      // Add more rows to trigger switch to [[UnsafeExternalSorter]] and cause a spill to happen.
+      // Verify that spill has happened
+      populateRows(array, 2, expectedValues)
+      assert(array.length == inMemoryThreshold + 1)
       assertSpill()
 
       val iterator2 = validateData(array, expectedValues)
@@ -158,7 +181,7 @@ class ExternalAppendOnlyUnsafeRowArraySuite extends SparkFunSuite with LocalSpar
   }
 
   test("iterator on an empty array should be empty") {
-    withExternalArray(spillThreshold = 10) { array =>
+    withExternalArray(inMemoryThreshold = 4, spillThreshold = 10) { array =>
       val iterator = array.generateIterator()
       assert(array.isEmpty)
       assert(array.length == 0)
@@ -167,7 +190,7 @@ class ExternalAppendOnlyUnsafeRowArraySuite extends SparkFunSuite with LocalSpar
   }
 
   test("generate iterator with negative start index") {
-    withExternalArray(spillThreshold = 2) { array =>
+    withExternalArray(inMemoryThreshold = 100, spillThreshold = 56) { array =>
       val exception =
         intercept[ArrayIndexOutOfBoundsException](array.generateIterator(startIndex = -10))
 
@@ -178,8 +201,8 @@ class ExternalAppendOnlyUnsafeRowArraySuite extends SparkFunSuite with LocalSpar
   }
 
   test("generate iterator with start index exceeding array's size (without spill)") {
-    val spillThreshold = 2
-    withExternalArray(spillThreshold) { array =>
+    val (inMemoryThreshold, spillThreshold) = (20, 100)
+    withExternalArray(inMemoryThreshold, spillThreshold) { array =>
       populateRows(array, spillThreshold / 2)
 
       val exception =
@@ -191,8 +214,8 @@ class ExternalAppendOnlyUnsafeRowArraySuite extends SparkFunSuite with LocalSpar
   }
 
   test("generate iterator with start index exceeding array's size (with spill)") {
-    val spillThreshold = 2
-    withExternalArray(spillThreshold) { array =>
+    val (inMemoryThreshold, spillThreshold) = (20, 100)
+    withExternalArray(inMemoryThreshold, spillThreshold) { array =>
       populateRows(array, spillThreshold * 2)
 
       val exception =
@@ -205,10 +228,10 @@ class ExternalAppendOnlyUnsafeRowArraySuite extends SparkFunSuite with LocalSpar
   }
 
   test("generate iterator with custom start index (without spill)") {
-    val spillThreshold = 10
-    withExternalArray(spillThreshold) { array =>
-      val expectedValues = populateRows(array, spillThreshold)
-      val startIndex = spillThreshold / 2
+    val (inMemoryThreshold, spillThreshold) = (20, 100)
+    withExternalArray(inMemoryThreshold, spillThreshold) { array =>
+      val expectedValues = populateRows(array, inMemoryThreshold)
+      val startIndex = inMemoryThreshold / 2
       val iterator = array.generateIterator(startIndex = startIndex)
       for (i <- startIndex until expectedValues.length) {
         checkIfValueExists(iterator, expectedValues(i))
@@ -217,8 +240,8 @@ class ExternalAppendOnlyUnsafeRowArraySuite extends SparkFunSuite with LocalSpar
   }
 
   test("generate iterator with custom start index (with spill)") {
-    val spillThreshold = 10
-    withExternalArray(spillThreshold) { array =>
+    val (inMemoryThreshold, spillThreshold) = (20, 100)
+    withExternalArray(inMemoryThreshold, spillThreshold) { array =>
       val expectedValues = populateRows(array, spillThreshold * 10)
       val startIndex = spillThreshold * 2
       val iterator = array.generateIterator(startIndex = startIndex)
@@ -229,7 +252,7 @@ class ExternalAppendOnlyUnsafeRowArraySuite extends SparkFunSuite with LocalSpar
   }
 
   test("test iterator invalidation (without spill)") {
-    withExternalArray(spillThreshold = 10) { array =>
+    withExternalArray(inMemoryThreshold = 10, spillThreshold = 100) { array =>
       // insert 2 rows, iterate until the first row
       populateRows(array, 2)
 
@@ -254,9 +277,9 @@ class ExternalAppendOnlyUnsafeRowArraySuite extends SparkFunSuite with LocalSpar
   }
 
   test("test iterator invalidation (with spill)") {
-    val spillThreshold = 10
-    withExternalArray(spillThreshold) { array =>
-      // Populate enough rows so that spill has happens
+    val (inMemoryThreshold, spillThreshold) = (2, 10)
+    withExternalArray(inMemoryThreshold, spillThreshold) { array =>
+      // Populate enough rows so that spill happens
       populateRows(array, spillThreshold * 2)
       assertSpill()
 
@@ -281,7 +304,7 @@ class ExternalAppendOnlyUnsafeRowArraySuite extends SparkFunSuite with LocalSpar
   }
 
   test("clear on an empty the array") {
-    withExternalArray(spillThreshold = 2) { array =>
+    withExternalArray(inMemoryThreshold = 2, spillThreshold = 3) { array =>
       val iterator = array.generateIterator()
       assert(!iterator.hasNext)
 
@@ -299,10 +322,10 @@ class ExternalAppendOnlyUnsafeRowArraySuite extends SparkFunSuite with LocalSpar
   }
 
   test("clear array (without spill)") {
-    val spillThreshold = 10
-    withExternalArray(spillThreshold) { array =>
+    val (inMemoryThreshold, spillThreshold) = (10, 100)
+    withExternalArray(inMemoryThreshold, spillThreshold) { array =>
       // Populate rows ... but not enough to trigger spill
-      populateRows(array, spillThreshold / 2)
+      populateRows(array, inMemoryThreshold / 2)
       assertNoSpill()
 
       // Clear the array
@@ -311,21 +334,21 @@ class ExternalAppendOnlyUnsafeRowArraySuite extends SparkFunSuite with LocalSpar
 
       // Re-populate few rows so that there is no spill
       // Verify the data. Verify that there was no spill
-      val expectedValues = populateRows(array, spillThreshold / 3)
+      val expectedValues = populateRows(array, inMemoryThreshold / 2)
       validateData(array, expectedValues)
       assertNoSpill()
 
       // Populate more rows .. enough to not trigger a spill.
       // Verify the data. Verify that there was no spill
-      populateRows(array, spillThreshold / 3, expectedValues)
+      populateRows(array, inMemoryThreshold / 2, expectedValues)
       validateData(array, expectedValues)
       assertNoSpill()
     }
   }
 
   test("clear array (with spill)") {
-    val spillThreshold = 10
-    withExternalArray(spillThreshold) { array =>
+    val (inMemoryThreshold, spillThreshold) = (10, 20)
+    withExternalArray(inMemoryThreshold, spillThreshold) { array =>
       // Populate enough rows to trigger spill
       populateRows(array, spillThreshold * 2)
       val bytesSpilled = getNumBytesSpilled
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLWindowFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLWindowFunctionSuite.scala
index a9f3fb355c775..a57514c256b90 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLWindowFunctionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLWindowFunctionSuite.scala
@@ -477,7 +477,8 @@ class SQLWindowFunctionSuite extends QueryTest with SharedSQLContext {
         |WINDOW w1 AS (ORDER BY x ROWS BETWEEN UNBOUNDED PRECEDiNG AND CURRENT RoW)
       """.stripMargin)
 
-    withSQLConf("spark.sql.windowExec.buffer.spill.threshold" -> "1") {
+    withSQLConf("spark.sql.windowExec.buffer.in.memory.threshold" -> "1",
+      "spark.sql.windowExec.buffer.spill.threshold" -> "2") {
       assertSpilled(sparkContext, "test with low buffer spill threshold") {
         checkAnswer(actual, expected)
       }

From 7b9807754fd43756ba852bf93590a5024f2aa129 Mon Sep 17 00:00:00 2001
From: Andrew Ash <andrew@andrewash.com>
Date: Mon, 14 Aug 2017 22:48:08 +0800
Subject: [PATCH 1142/1204] [SPARK-21563][CORE] Fix race condition when
 serializing TaskDescriptions and adding jars

## What changes were proposed in this pull request?

Fix the race condition when serializing TaskDescriptions and adding jars by keeping the set of jars and files for a TaskSet constant across the lifetime of the TaskSet.  Otherwise TaskDescription serialization can produce an invalid serialization when new file/jars are added concurrently as the TaskDescription is serialized.

## How was this patch tested?

Additional unit test ensures jars/files contained in the TaskDescription remain constant throughout the lifetime of the TaskSet.

Author: Andrew Ash <andrew@andrewash.com>

Closes #18913 from ash211/SPARK-21563.

(cherry picked from commit 6847e93cf427aa971dac1ea261c1443eebf4089e)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scala/org/apache/spark/SparkContext.scala |  7 ++++
 .../spark/scheduler/TaskSetManager.scala      |  8 +++--
 .../spark/scheduler/TaskSetManagerSuite.scala | 34 ++++++++++++++++++-
 3 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 1a2443f7ee78d..ef2b8aca6e1ed 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1495,6 +1495,8 @@ class SparkContext(config: SparkConf) extends Logging {
   /**
    * Add a file to be downloaded with this Spark job on every node.
    *
+   * If a file is added during execution, it will not be available until the next TaskSet starts.
+   *
    * @param path can be either a local file, a file in HDFS (or other Hadoop-supported
    * filesystems), or an HTTP, HTTPS or FTP URI. To access the file in Spark jobs,
    * use `SparkFiles.get(fileName)` to find its download location.
@@ -1511,6 +1513,8 @@ class SparkContext(config: SparkConf) extends Logging {
   /**
    * Add a file to be downloaded with this Spark job on every node.
    *
+   * If a file is added during execution, it will not be available until the next TaskSet starts.
+   *
    * @param path can be either a local file, a file in HDFS (or other Hadoop-supported
    * filesystems), or an HTTP, HTTPS or FTP URI. To access the file in Spark jobs,
    * use `SparkFiles.get(fileName)` to find its download location.
@@ -1797,6 +1801,9 @@ class SparkContext(config: SparkConf) extends Logging {
 
   /**
    * Adds a JAR dependency for all tasks to be executed on this `SparkContext` in the future.
+   *
+   * If a jar is added during execution, it will not be available until the next TaskSet starts.
+   *
    * @param path can be either a local file, a file in HDFS (or other Hadoop-supported filesystems),
    * an HTTP, HTTPS or FTP URI, or local:/path for a file on every worker node.
    */
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index 6ac76c86dead3..2f4e46c7ec8f1 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -56,6 +56,10 @@ private[spark] class TaskSetManager(
 
   private val conf = sched.sc.conf
 
+  // SPARK-21563 make a copy of the jars/files so they are consistent across the TaskSet
+  private val addedJars = HashMap[String, Long](sched.sc.addedJars.toSeq: _*)
+  private val addedFiles = HashMap[String, Long](sched.sc.addedFiles.toSeq: _*)
+
   // Quantile of tasks at which to start speculation
   val SPECULATION_QUANTILE = conf.getDouble("spark.speculation.quantile", 0.75)
   val SPECULATION_MULTIPLIER = conf.getDouble("spark.speculation.multiplier", 1.5)
@@ -502,8 +506,8 @@ private[spark] class TaskSetManager(
           execId,
           taskName,
           index,
-          sched.sc.addedFiles,
-          sched.sc.addedJars,
+          addedFiles,
+          addedJars,
           task.localProperties,
           serializedTask)
       }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index 807ad0a86ed2e..7d31a66aef096 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -32,7 +32,7 @@ import org.apache.spark.internal.config
 import org.apache.spark.internal.Logging
 import org.apache.spark.serializer.SerializerInstance
 import org.apache.spark.storage.BlockManagerId
-import org.apache.spark.util.{AccumulatorV2, ManualClock}
+import org.apache.spark.util.{AccumulatorV2, ManualClock, Utils}
 
 class FakeDAGScheduler(sc: SparkContext, taskScheduler: FakeTaskScheduler)
   extends DAGScheduler(sc) {
@@ -1182,6 +1182,38 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
     verify(taskSetManagerSpy, times(1)).addPendingTask(anyInt())
   }
 
+  test("SPARK-21563 context's added jars shouldn't change mid-TaskSet") {
+    sc = new SparkContext("local", "test")
+    val addedJarsPreTaskSet = Map[String, Long](sc.addedJars.toSeq: _*)
+    assert(addedJarsPreTaskSet.size === 0)
+
+    sched = new FakeTaskScheduler(sc, ("exec1", "host1"))
+    val taskSet1 = FakeTask.createTaskSet(3)
+    val manager1 = new TaskSetManager(sched, taskSet1, MAX_TASK_FAILURES, clock = new ManualClock)
+
+    // all tasks from the first taskset have the same jars
+    val taskOption1 = manager1.resourceOffer("exec1", "host1", NO_PREF)
+    assert(taskOption1.get.addedJars === addedJarsPreTaskSet)
+    val taskOption2 = manager1.resourceOffer("exec1", "host1", NO_PREF)
+    assert(taskOption2.get.addedJars === addedJarsPreTaskSet)
+
+    // even with a jar added mid-TaskSet
+    val jarPath = Thread.currentThread().getContextClassLoader.getResource("TestUDTF.jar")
+    sc.addJar(jarPath.toString)
+    val addedJarsMidTaskSet = Map[String, Long](sc.addedJars.toSeq: _*)
+    assert(addedJarsPreTaskSet !== addedJarsMidTaskSet)
+    val taskOption3 = manager1.resourceOffer("exec1", "host1", NO_PREF)
+    // which should have the old version of the jars list
+    assert(taskOption3.get.addedJars === addedJarsPreTaskSet)
+
+    // and then the jar does appear in the next TaskSet
+    val taskSet2 = FakeTask.createTaskSet(1)
+    val manager2 = new TaskSetManager(sched, taskSet2, MAX_TASK_FAILURES, clock = new ManualClock)
+
+    val taskOption4 = manager2.resourceOffer("exec1", "host1", NO_PREF)
+    assert(taskOption4.get.addedJars === addedJarsMidTaskSet)
+  }
+
   private def createTaskResult(
       id: Int,
       accumUpdates: Seq[AccumulatorV2[_, _]] = Seq.empty): DirectTaskResult[Int] = {

From 48bacd36c673bcbe20dc2e119cddb2a61261a394 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Mon, 14 Aug 2017 15:06:55 -0700
Subject: [PATCH 1143/1204] [SPARK-21696][SS] Fix a potential issue that may
 generate partial snapshot files

## What changes were proposed in this pull request?

Directly writing a snapshot file may generate a partial file. This PR changes it to write to a temp file then rename to the target file.

## How was this patch tested?

Jenkins.

Author: Shixiong Zhu <shixiong@databricks.com>

Closes #18928 from zsxwing/SPARK-21696.

(cherry picked from commit 282f00b410fdc4dc69b9d1f3cb3e2ba53cd85b8b)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../streaming/state/HDFSBackedStateStoreProvider.scala | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
index fb2bf47d6e83b..ef48fffe1d980 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
@@ -448,9 +448,11 @@ private[state] class HDFSBackedStateStoreProvider(
 
   private def writeSnapshotFile(version: Long, map: MapType): Unit = {
     val fileToWrite = snapshotFile(version)
+    val tempFile =
+      new Path(fileToWrite.getParent, s"${fileToWrite.getName}.temp-${Random.nextLong}")
     var output: DataOutputStream = null
     Utils.tryWithSafeFinally {
-      output = compressStream(fs.create(fileToWrite, false))
+      output = compressStream(fs.create(tempFile, false))
       val iter = map.entrySet().iterator()
       while(iter.hasNext) {
         val entry = iter.next()
@@ -465,6 +467,12 @@ private[state] class HDFSBackedStateStoreProvider(
     } {
       if (output != null) output.close()
     }
+    if (fs.exists(fileToWrite)) {
+      // Skip rename if the file is alreayd created.
+      fs.delete(tempFile, true)
+    } else if (!fs.rename(tempFile, fileToWrite)) {
+      throw new IOException(s"Failed to rename $tempFile to $fileToWrite")
+    }
     logInfo(s"Written snapshot file for version $version of $this at $fileToWrite")
   }
 

From 3a02a3c631c6fe1a51b19658fd60b995215c92d4 Mon Sep 17 00:00:00 2001
From: Mark Hamstra <markhamstra@gmail.com>
Date: Mon, 14 Aug 2017 15:22:17 -0700
Subject: [PATCH 1144/1204] ExchangeCoordinatorSuite cleanup

---
 .../spark/sql/execution/ExchangeCoordinatorSuite.scala    | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExchangeCoordinatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExchangeCoordinatorSuite.scala
index 0f7b4b35daafa..0de1832a0fed9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExchangeCoordinatorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExchangeCoordinatorSuite.scala
@@ -17,14 +17,11 @@
 
 package org.apache.spark.sql.execution
 
-import scala.collection.mutable
-
 import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.{MapOutputStatistics, SparkConf, SparkFunSuite}
 import org.apache.spark.sql._
 import org.apache.spark.sql.execution.exchange.{ExchangeCoordinator, ShuffleExchange}
-import org.apache.spark.sql.execution.joins.SortMergeJoinExec
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 
@@ -465,9 +462,6 @@ class ExchangeCoordinatorSuite extends SparkFunSuite with BeforeAndAfterAll {
 
         // Then, let's look at the number of post-shuffle partitions estimated
         // by the ExchangeCoordinator.
-        // scalastyle:off
-        println(join.queryExecution.executedPlan)
-        // scalastyle:on
         val exchanges = join.queryExecution.executedPlan.collect {
           case e: ShuffleExchange => e
         }
@@ -542,7 +536,7 @@ class ExchangeCoordinatorSuite extends SparkFunSuite with BeforeAndAfterAll {
                   minNumPostShufflePartitions match {
                     case Some(_) =>
                       assert(coordinator.isDefined)
-                      assert(newPartitioning.numPartitions === 5)
+                      assert(newPartitioning.numPartitions === 3)
                     case None =>
                       assert(coordinator.isDefined)
                   }

From d9c8e6223f6b31bfbca33b1064ead9720cfefa10 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 14 Aug 2017 22:29:15 -0700
Subject: [PATCH 1145/1204] [SPARK-21721][SQL] Clear FileSystem deleteOnExit
 cache when paths are successfully removed

## What changes were proposed in this pull request?

We put staging path to delete into the deleteOnExit cache of `FileSystem` in case of the path can't be successfully removed. But when we successfully remove the path, we don't remove it from the cache. We should do it to avoid continuing grow the cache size.

## How was this patch tested?

Added a test.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #18934 from viirya/SPARK-21721.

(cherry picked from commit 4c3cf1cc5cdb400ceef447d366e9f395cd87b273)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../hive/execution/InsertIntoHiveTable.scala  |  8 ++++++-
 .../sql/hive/execution/SQLQuerySuite.scala    | 22 +++++++++++++++++--
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index 797481c879e7a..66ee5d4581e7e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -393,7 +393,13 @@ case class InsertIntoHiveTable(
     // Attempt to delete the staging directory and the inclusive files. If failed, the files are
     // expected to be dropped at the normal termination of VM since deleteOnExit is used.
     try {
-      createdTempDir.foreach { path => path.getFileSystem(hadoopConf).delete(path, true) }
+      createdTempDir.foreach { path =>
+        val fs = path.getFileSystem(hadoopConf)
+        if (fs.delete(path, true)) {
+          // If we successfully delete the staging directory, remove it from FileSystem's cache.
+          fs.cancelDeleteOnExit(path)
+        }
+      }
     } catch {
       case NonFatal(e) =>
         logWarning(s"Unable to delete staging directory: $stagingDir.\n" + e)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index c944f28d10ef4..002ddd44e3bdb 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -20,10 +20,10 @@ package org.apache.spark.sql.hive.execution
 import java.io.File
 import java.nio.charset.StandardCharsets
 import java.sql.{Date, Timestamp}
-import java.util.Locale
+import java.util.{Locale, Set}
 
 import com.google.common.io.Files
-import org.apache.hadoop.fs.Path
+import org.apache.hadoop.fs.{FileSystem, Path}
 
 import org.apache.spark.TestUtils
 import org.apache.spark.sql._
@@ -2015,4 +2015,22 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
       checkAnswer(table.filter($"p" === "p1\" and q=\"q1").select($"a"), Row(4))
     }
   }
+
+  test("SPARK-21721: Clear FileSystem deleterOnExit cache if path is successfully removed") {
+    withTable("test21721") {
+      val deleteOnExitField = classOf[FileSystem].getDeclaredField("deleteOnExit")
+      deleteOnExitField.setAccessible(true)
+
+      val fs = FileSystem.get(spark.sparkContext.hadoopConfiguration)
+      val setOfPath = deleteOnExitField.get(fs).asInstanceOf[Set[Path]]
+
+      val testData = sparkContext.parallelize(1 to 10).map(i => TestData(i, i.toString)).toDF()
+      sql("CREATE TABLE test21721 (key INT, value STRING)")
+      val pathSizeToDeleteOnExit = setOfPath.size()
+
+      (0 to 10).foreach(_ => testData.write.mode(SaveMode.Append).insertInto("test1"))
+
+      assert(setOfPath.size() == pathSizeToDeleteOnExit)
+    }
+  }
 }

From f1accc8511cf034fa4edee0c0a5747def0df04a2 Mon Sep 17 00:00:00 2001
From: Jan Vrsovsky <jan.vrsovsky@firma.seznam.cz>
Date: Wed, 16 Aug 2017 08:21:42 +0100
Subject: [PATCH 1146/1204] [SPARK-21723][ML] Fix writing LibSVM (key not
 found: numFeatures)

Check the option "numFeatures" only when reading LibSVM, not when writing. When writing, Spark was raising an exception. After the change it will ignore the option completely. liancheng HyukjinKwon

(Maybe the usage should be forbidden when writing, in a major version change?).

Manual test, that loading and writing LibSVM files work fine, both with and without the numFeatures option.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Jan Vrsovsky <jan.vrsovsky@firma.seznam.cz>

Closes #18872 from ProtD/master.

(cherry picked from commit 8321c141f63a911a97ec183aefa5ff75a338c051)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../ml/source/libsvm/LibSVMRelation.scala     |  8 ++---
 .../source/libsvm/LibSVMRelationSuite.scala   | 36 +++++++++++++++----
 2 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
index f68847a664b69..1840d07abc531 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
@@ -72,12 +72,12 @@ private[libsvm] class LibSVMFileFormat extends TextBasedFileFormat with DataSour
 
   override def toString: String = "LibSVM"
 
-  private def verifySchema(dataSchema: StructType): Unit = {
+  private def verifySchema(dataSchema: StructType, forWriting: Boolean): Unit = {
     if (
       dataSchema.size != 2 ||
         !dataSchema(0).dataType.sameType(DataTypes.DoubleType) ||
         !dataSchema(1).dataType.sameType(new VectorUDT()) ||
-        !(dataSchema(1).metadata.getLong(LibSVMOptions.NUM_FEATURES).toInt > 0)
+        !(forWriting || dataSchema(1).metadata.getLong(LibSVMOptions.NUM_FEATURES).toInt > 0)
     ) {
       throw new IOException(s"Illegal schema for libsvm data, schema=$dataSchema")
     }
@@ -119,7 +119,7 @@ private[libsvm] class LibSVMFileFormat extends TextBasedFileFormat with DataSour
       job: Job,
       options: Map[String, String],
       dataSchema: StructType): OutputWriterFactory = {
-    verifySchema(dataSchema)
+    verifySchema(dataSchema, true)
     new OutputWriterFactory {
       override def newInstance(
           path: String,
@@ -142,7 +142,7 @@ private[libsvm] class LibSVMFileFormat extends TextBasedFileFormat with DataSour
       filters: Seq[Filter],
       options: Map[String, String],
       hadoopConf: Configuration): (PartitionedFile) => Iterator[InternalRow] = {
-    verifySchema(dataSchema)
+    verifySchema(dataSchema, false)
     val numFeatures = dataSchema("features").metadata.getLong(LibSVMOptions.NUM_FEATURES).toInt
     assert(numFeatures > 0)
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
index e164d279f3f02..30cb60f4f1913 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
@@ -19,13 +19,16 @@ package org.apache.spark.ml.source.libsvm
 
 import java.io.{File, IOException}
 import java.nio.charset.StandardCharsets
+import java.util.List
 
 import com.google.common.io.Files
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
+import org.apache.spark.ml.linalg.SQLDataTypes.VectorType
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.sql.{Row, SaveMode}
+import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
 import org.apache.spark.util.Utils
 
 
@@ -41,10 +44,10 @@ class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext {
         |0
         |0 2:4.0 4:5.0 6:6.0
       """.stripMargin
-    val dir = Utils.createDirectory(tempDir.getCanonicalPath, "data")
+    val dir = Utils.createTempDir()
     val file = new File(dir, "part-00000")
     Files.write(lines, file, StandardCharsets.UTF_8)
-    path = dir.toURI.toString
+    path = dir.getPath
   }
 
   override def afterAll(): Unit = {
@@ -101,12 +104,12 @@ class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext {
 
   test("write libsvm data and read it again") {
     val df = spark.read.format("libsvm").load(path)
-    val tempDir2 = new File(tempDir, "read_write_test")
-    val writepath = tempDir2.toURI.toString
+    val writePath = Utils.createTempDir().getPath
+
     // TODO: Remove requirement to coalesce by supporting multiple reads.
-    df.coalesce(1).write.format("libsvm").mode(SaveMode.Overwrite).save(writepath)
+    df.coalesce(1).write.format("libsvm").mode(SaveMode.Overwrite).save(writePath)
 
-    val df2 = spark.read.format("libsvm").load(writepath)
+    val df2 = spark.read.format("libsvm").load(writePath)
     val row1 = df2.first()
     val v = row1.getAs[SparseVector](1)
     assert(v == Vectors.sparse(6, Seq((0, 1.0), (2, 2.0), (4, 3.0))))
@@ -119,6 +122,27 @@ class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext {
     }
   }
 
+  test("write libsvm data from scratch and read it again") {
+    val rawData = new java.util.ArrayList[Row]()
+    rawData.add(Row(1.0, Vectors.sparse(3, Seq((0, 2.0), (1, 3.0)))))
+    rawData.add(Row(4.0, Vectors.sparse(3, Seq((0, 5.0), (2, 6.0)))))
+
+    val struct = StructType(
+      StructField("labelFoo", DoubleType, false) ::
+      StructField("featuresBar", VectorType, false) :: Nil
+    )
+    val df = spark.sqlContext.createDataFrame(rawData, struct)
+
+    val writePath = Utils.createTempDir().getPath
+
+    df.coalesce(1).write.format("libsvm").mode(SaveMode.Overwrite).save(writePath)
+
+    val df2 = spark.read.format("libsvm").load(writePath)
+    val row1 = df2.first()
+    val v = row1.getAs[SparseVector](1)
+    assert(v == Vectors.sparse(3, Seq((0, 2.0), (1, 3.0))))
+  }
+
   test("select features from libsvm relation") {
     val df = spark.read.format("libsvm").load(path)
     df.select("features").rdd.map { case Row(d: Vector) => d }.first

From f5ede0d558e3db51867d8c1c0a12c8fb286c797c Mon Sep 17 00:00:00 2001
From: John Lee <jlee2@yahoo-inc.com>
Date: Wed, 16 Aug 2017 09:44:09 -0500
Subject: [PATCH 1147/1204] [SPARK-21656][CORE] spark dynamic allocation should
 not idle timeout executors when tasks still to run

## What changes were proposed in this pull request?

Right now spark lets go of executors when they are idle for the 60s (or configurable time). I have seen spark let them go when they are idle but they were really needed. I have seen this issue when the scheduler was waiting to get node locality but that takes longer than the default idle timeout. In these jobs the number of executors goes down really small (less than 10) but there are still like 80,000 tasks to run.
We should consider not allowing executors to idle timeout if they are still needed according to the number of tasks to be run.

## How was this patch tested?

Tested by manually adding executors to `executorsIdsToBeRemoved` list and seeing if those executors were removed when there are a lot of tasks and a high `numExecutorsTarget` value.

Code used

In  `ExecutorAllocationManager.start()`

```
    start_time = clock.getTimeMillis()
```

In `ExecutorAllocationManager.schedule()`
```
    val executorIdsToBeRemoved = ArrayBuffer[String]()
    if ( now > start_time + 1000 * 60 * 2) {
      logInfo("--- REMOVING 1/2 of the EXECUTORS ---")
      start_time +=  1000 * 60 * 100
      var counter = 0
      for (x <- executorIds) {
        counter += 1
        if (counter == 2) {
          counter = 0
          executorIdsToBeRemoved += x
        }
      }
    }

Author: John Lee <jlee2@yahoo-inc.com>

Closes #18874 from yoonlee95/SPARK-21656.

(cherry picked from commit adf005dabe3b0060033e1eeaedbab31a868efc8c)
Signed-off-by: Tom Graves <tgraves@yahoo-inc.com>
---
 .../spark/ExecutorAllocationManager.scala     |   5 +-
 .../ExecutorAllocationManagerSuite.scala      | 119 +++++++++++++-----
 2 files changed, 89 insertions(+), 35 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
index fcc72ff49276d..bb5eb7f8a9e01 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -410,7 +410,10 @@ private[spark] class ExecutorAllocationManager(
     executors.foreach { executorIdToBeRemoved =>
       if (newExecutorTotal - 1 < minNumExecutors) {
         logDebug(s"Not removing idle executor $executorIdToBeRemoved because there are only " +
-          s"$newExecutorTotal executor(s) left (limit $minNumExecutors)")
+          s"$newExecutorTotal executor(s) left (minimum number of executor limit $minNumExecutors)")
+      } else if (newExecutorTotal - 1 < numExecutorsTarget) {
+        logDebug(s"Not removing idle executor $executorIdToBeRemoved because there are only " +
+          s"$newExecutorTotal executor(s) left (number of executor target $numExecutorsTarget)")
       } else if (canBeKilled(executorIdToBeRemoved)) {
         executorIdsToBeRemoved += executorIdToBeRemoved
         newExecutorTotal -= 1
diff --git a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
index 4ea42fc7d5c22..b9ce71a0c5254 100644
--- a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
@@ -314,8 +314,47 @@ class ExecutorAllocationManagerSuite
     assert(executorsPendingToRemove(manager).isEmpty)
   }
 
+  test ("Removing with various numExecutorsTarget condition") {
+    sc = createSparkContext(5, 12, 5)
+    val manager = sc.executorAllocationManager.get
+
+    sc.listenerBus.postToAll(SparkListenerStageSubmitted(createStageInfo(0, 8)))
+
+    // Remove when numExecutorsTarget is the same as the current number of executors
+    assert(addExecutors(manager) === 1)
+    assert(addExecutors(manager) === 2)
+    (1 to 8).map { i => createTaskInfo(i, i, s"$i") }.foreach {
+      info => sc.listenerBus.postToAll(SparkListenerTaskStart(0, 0, info)) }
+    assert(executorIds(manager).size === 8)
+    assert(numExecutorsTarget(manager) === 8)
+    assert(maxNumExecutorsNeeded(manager) == 8)
+    assert(!removeExecutor(manager, "1")) // won't work since numExecutorsTarget == numExecutors
+
+    // Remove executors when numExecutorsTarget is lower than current number of executors
+    (1 to 3).map { i => createTaskInfo(i, i, s"$i") }.foreach {
+      info => sc.listenerBus.postToAll(SparkListenerTaskEnd(0, 0, null, Success, info, null)) }
+    adjustRequestedExecutors(manager)
+    assert(executorIds(manager).size === 8)
+    assert(numExecutorsTarget(manager) === 5)
+    assert(maxNumExecutorsNeeded(manager) == 5)
+    assert(removeExecutor(manager, "1"))
+    assert(removeExecutors(manager, Seq("2", "3"))=== Seq("2", "3"))
+    onExecutorRemoved(manager, "1")
+    onExecutorRemoved(manager, "2")
+    onExecutorRemoved(manager, "3")
+
+    // numExecutorsTarget is lower than minNumExecutors
+    sc.listenerBus.postToAll(
+      SparkListenerTaskEnd(0, 0, null, Success, createTaskInfo(4, 4, "4"), null))
+    assert(executorIds(manager).size === 5)
+    assert(numExecutorsTarget(manager) === 5)
+    assert(maxNumExecutorsNeeded(manager) == 4)
+    assert(!removeExecutor(manager, "4")) // lower limit
+    assert(addExecutors(manager) === 0) // upper limit
+  }
+
   test ("interleaving add and remove") {
-    sc = createSparkContext(5, 10, 5)
+    sc = createSparkContext(5, 12, 5)
     val manager = sc.executorAllocationManager.get
     sc.listenerBus.postToAll(SparkListenerStageSubmitted(createStageInfo(0, 1000)))
 
@@ -331,52 +370,59 @@ class ExecutorAllocationManagerSuite
     onExecutorAdded(manager, "7")
     onExecutorAdded(manager, "8")
     assert(executorIds(manager).size === 8)
+    assert(numExecutorsTarget(manager) === 8)
 
-    // Remove until limit
-    assert(removeExecutor(manager, "1"))
-    assert(removeExecutors(manager, Seq("2", "3")) === Seq("2", "3"))
-    assert(!removeExecutor(manager, "4")) // lower limit reached
-    assert(!removeExecutor(manager, "5"))
-    onExecutorRemoved(manager, "1")
-    onExecutorRemoved(manager, "2")
-    onExecutorRemoved(manager, "3")
-    assert(executorIds(manager).size === 5)
 
-    // Add until limit
-    assert(addExecutors(manager) === 2) // upper limit reached
-    assert(addExecutors(manager) === 0)
-    assert(!removeExecutor(manager, "4")) // still at lower limit
-    assert((manager, Seq("5")) !== Seq("5"))
+    // Remove when numTargetExecutors is equal to the current number of executors
+    assert(!removeExecutor(manager, "1"))
+    assert(removeExecutors(manager, Seq("2", "3")) !== Seq("2", "3"))
+
+    // Remove until limit
     onExecutorAdded(manager, "9")
     onExecutorAdded(manager, "10")
     onExecutorAdded(manager, "11")
     onExecutorAdded(manager, "12")
-    onExecutorAdded(manager, "13")
-    assert(executorIds(manager).size === 10)
+    assert(executorIds(manager).size === 12)
+    assert(numExecutorsTarget(manager) === 8)
 
-    // Remove succeeds again, now that we are no longer at the lower limit
-    assert(removeExecutors(manager, Seq("4", "5", "6")) === Seq("4", "5", "6"))
-    assert(removeExecutor(manager, "7"))
-    assert(executorIds(manager).size === 10)
-    assert(addExecutors(manager) === 0)
+    assert(removeExecutor(manager, "1"))
+    assert(removeExecutors(manager, Seq("2", "3", "4")) === Seq("2", "3", "4"))
+    assert(!removeExecutor(manager, "5")) // lower limit reached
+    assert(!removeExecutor(manager, "6"))
+    onExecutorRemoved(manager, "1")
+    onExecutorRemoved(manager, "2")
+    onExecutorRemoved(manager, "3")
     onExecutorRemoved(manager, "4")
-    onExecutorRemoved(manager, "5")
     assert(executorIds(manager).size === 8)
 
-    // Number of executors pending restarts at 1
-    assert(numExecutorsToAdd(manager) === 1)
-    assert(addExecutors(manager) === 0)
-    assert(executorIds(manager).size === 8)
-    onExecutorRemoved(manager, "6")
-    onExecutorRemoved(manager, "7")
+    // Add until limit
+    assert(!removeExecutor(manager, "7")) // still at lower limit
+    assert((manager, Seq("8")) !== Seq("8"))
+    onExecutorAdded(manager, "13")
     onExecutorAdded(manager, "14")
     onExecutorAdded(manager, "15")
-    assert(executorIds(manager).size === 8)
-    assert(addExecutors(manager) === 0) // still at upper limit
     onExecutorAdded(manager, "16")
+    assert(executorIds(manager).size === 12)
+
+    // Remove succeeds again, now that we are no longer at the lower limit
+    assert(removeExecutors(manager, Seq("5", "6", "7")) === Seq("5", "6", "7"))
+    assert(removeExecutor(manager, "8"))
+    assert(executorIds(manager).size === 12)
+    onExecutorRemoved(manager, "5")
+    onExecutorRemoved(manager, "6")
+    assert(executorIds(manager).size === 10)
+    assert(numExecutorsToAdd(manager) === 4)
+    onExecutorRemoved(manager, "9")
+    onExecutorRemoved(manager, "10")
+    assert(addExecutors(manager) === 4) // at upper limit
     onExecutorAdded(manager, "17")
+    onExecutorAdded(manager, "18")
     assert(executorIds(manager).size === 10)
-    assert(numExecutorsTarget(manager) === 10)
+    assert(addExecutors(manager) === 0) // still at upper limit
+    onExecutorAdded(manager, "19")
+    onExecutorAdded(manager, "20")
+    assert(executorIds(manager).size === 12)
+    assert(numExecutorsTarget(manager) === 12)
   }
 
   test("starting/canceling add timer") {
@@ -915,12 +961,17 @@ class ExecutorAllocationManagerSuite
     onExecutorAdded(manager, "third")
     onExecutorAdded(manager, "fourth")
     onExecutorAdded(manager, "fifth")
-    assert(executorIds(manager) === Set("first", "second", "third", "fourth", "fifth"))
+    onExecutorAdded(manager, "sixth")
+    onExecutorAdded(manager, "seventh")
+    onExecutorAdded(manager, "eighth")
+    assert(executorIds(manager) === Set("first", "second", "third", "fourth", "fifth",
+      "sixth", "seventh", "eighth"))
 
     removeExecutor(manager, "first")
     removeExecutors(manager, Seq("second", "third"))
     assert(executorsPendingToRemove(manager) === Set("first", "second", "third"))
-    assert(executorIds(manager) === Set("first", "second", "third", "fourth", "fifth"))
+    assert(executorIds(manager) === Set("first", "second", "third", "fourth", "fifth",
+      "sixth", "seventh", "eighth"))
 
 
     // Cluster manager lost will make all the live executors lost, so here simulate this behavior

From 2a9697593add425efa15d51afb501b6236a78e26 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 16 Aug 2017 09:36:33 -0700
Subject: [PATCH 1148/1204] [SPARK-18464][SQL][BACKPORT] support old table
 which doesn't store schema in table properties

backport https://github.com/apache/spark/pull/18907 to branch 2.2

Author: Wenchen Fan <wenchen@databricks.com>

Closes #18963 from cloud-fan/backport.
---
 .../sql/catalyst/catalog/SessionCatalog.scala |  7 +--
 .../sql/catalyst/catalog/interface.scala      | 22 ++++++---
 .../catalog/SessionCatalogSuite.scala         |  4 +-
 .../apache/spark/sql/DataFrameWriter.scala    |  9 ++--
 .../scala/org/apache/spark/sql/Dataset.scala  |  4 +-
 .../execution/OptimizeMetadataOnlyQuery.scala |  6 +--
 .../datasources/DataSourceStrategy.scala      | 47 +++++++++++--------
 .../sql/execution/datasources/rules.scala     |  2 +-
 .../spark/sql/StatisticsCollectionSuite.scala |  4 +-
 .../spark/sql/hive/HiveMetastoreCatalog.scala |  6 +--
 .../spark/sql/hive/HiveStrategies.scala       | 16 +++----
 .../hive/execution/HiveTableScanExec.scala    |  6 +--
 .../sql/hive/MetastoreDataSourcesSuite.scala  |  1 +
 .../spark/sql/hive/StatisticsSuite.scala      | 10 ++--
 .../sql/hive/execution/SQLQuerySuite.scala    | 10 ++--
 .../spark/sql/hive/orc/OrcQuerySuite.scala    |  4 +-
 .../apache/spark/sql/hive/parquetSuites.scala |  4 +-
 17 files changed, 86 insertions(+), 76 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 8d9fb4c452fc9..df8f9aae1e1fe 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -679,12 +679,7 @@ class SessionCatalog(
             child = parser.parsePlan(viewText))
           SubqueryAlias(table, child)
         } else {
-          val tableRelation = CatalogRelation(
-            metadata,
-            // we assume all the columns are nullable.
-            metadata.dataSchema.asNullable.toAttributes,
-            metadata.partitionSchema.asNullable.toAttributes)
-          SubqueryAlias(table, tableRelation)
+          SubqueryAlias(table, UnresolvedCatalogRelation(metadata))
         }
       } else {
         SubqueryAlias(table, tempTables(table))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 976d78749bfdc..5c8e5709a34f3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -397,11 +397,22 @@ object CatalogTypes {
   type TablePartitionSpec = Map[String, String]
 }
 
+/**
+ * A placeholder for a table relation, which will be replaced by concrete relation like
+ * `LogicalRelation` or `HiveTableRelation`, during analysis.
+ */
+case class UnresolvedCatalogRelation(tableMeta: CatalogTable) extends LeafNode {
+  assert(tableMeta.identifier.database.isDefined)
+  override lazy val resolved: Boolean = false
+  override def output: Seq[Attribute] = Nil
+}
 
 /**
- * A [[LogicalPlan]] that represents a table.
+ * A `LogicalPlan` that represents a hive table.
+ *
+ * TODO: remove this after we completely make hive as a data source.
  */
-case class CatalogRelation(
+case class HiveTableRelation(
     tableMeta: CatalogTable,
     dataCols: Seq[AttributeReference],
     partitionCols: Seq[AttributeReference]) extends LeafNode with MultiInstanceRelation {
@@ -415,7 +426,7 @@ case class CatalogRelation(
   def isPartitioned: Boolean = partitionCols.nonEmpty
 
   override def equals(relation: Any): Boolean = relation match {
-    case other: CatalogRelation => tableMeta == other.tableMeta && output == other.output
+    case other: HiveTableRelation => tableMeta == other.tableMeta && output == other.output
     case _ => false
   }
 
@@ -434,15 +445,12 @@ case class CatalogRelation(
   ))
 
   override def computeStats(conf: SQLConf): Statistics = {
-    // For data source tables, we will create a `LogicalRelation` and won't call this method, for
-    // hive serde tables, we will always generate a statistics.
-    // TODO: unify the table stats generation.
     tableMeta.stats.map(_.toPlanStats(output)).getOrElse {
       throw new IllegalStateException("table stats must be specified.")
     }
   }
 
-  override def newInstance(): LogicalPlan = copy(
+  override def newInstance(): HiveTableRelation = copy(
     dataCols = dataCols.map(_.newInstance()),
     partitionCols = partitionCols.map(_.newInstance()))
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index 5ee729ef5cfad..9c1b6385d049b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -506,14 +506,14 @@ abstract class SessionCatalogSuite extends PlanTest {
       catalog.setCurrentDatabase("db2")
       // If we explicitly specify the database, we'll look up the relation in that database
       assert(catalog.lookupRelation(TableIdentifier("tbl1", Some("db2"))).children.head
-        .asInstanceOf[CatalogRelation].tableMeta == metastoreTable1)
+        .asInstanceOf[UnresolvedCatalogRelation].tableMeta == metastoreTable1)
       // Otherwise, we'll first look up a temporary table with the same name
       assert(catalog.lookupRelation(TableIdentifier("tbl1"))
         == SubqueryAlias("tbl1", tempTable1))
       // Then, if that does not exist, look up the relation in the current database
       catalog.dropTable(TableIdentifier("tbl1"), ignoreIfNotExists = false, purge = false)
       assert(catalog.lookupRelation(TableIdentifier("tbl1")).children.head
-        .asInstanceOf[CatalogRelation].tableMeta == metastoreTable1)
+        .asInstanceOf[UnresolvedCatalogRelation].tableMeta == metastoreTable1)
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index b71c5eb843eec..0259fffeab2db 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -24,7 +24,7 @@ import scala.collection.JavaConverters._
 import org.apache.spark.annotation.InterfaceStability
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, UnresolvedRelation}
-import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogRelation, CatalogTable, CatalogTableType}
+import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan}
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource, LogicalRelation, SaveIntoDataSourceCommand}
@@ -372,8 +372,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
         // Get all input data source or hive relations of the query.
         val srcRelations = df.logicalPlan.collect {
           case LogicalRelation(src: BaseRelation, _, _) => src
-          case relation: CatalogRelation if DDLUtils.isHiveTable(relation.tableMeta) =>
-            relation.tableMeta.identifier
+          case relation: HiveTableRelation => relation.tableMeta.identifier
         }
 
         val tableRelation = df.sparkSession.table(tableIdentWithDB).queryExecution.analyzed
@@ -383,8 +382,8 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
             throw new AnalysisException(
               s"Cannot overwrite table $tableName that is also being read from")
           // check hive table relation when overwrite mode
-          case relation: CatalogRelation if DDLUtils.isHiveTable(relation.tableMeta)
-            && srcRelations.contains(relation.tableMeta.identifier) =>
+          case relation: HiveTableRelation
+              if srcRelations.contains(relation.tableMeta.identifier) =>
             throw new AnalysisException(
               s"Cannot overwrite table $tableName that is also being read from")
           case _ => // OK
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 00a5edfab9446..a775fb8ed4ed3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -35,7 +35,7 @@ import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst._
 import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.catalog.CatalogRelation
+import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
 import org.apache.spark.sql.catalyst.encoders._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
@@ -2777,7 +2777,7 @@ class Dataset[T] private[sql](
         fsBasedRelation.inputFiles
       case fr: FileRelation =>
         fr.inputFiles
-      case r: CatalogRelation if DDLUtils.isHiveTable(r.tableMeta) =>
+      case r: HiveTableRelation =>
         r.tableMeta.storage.locationUri.map(_.toString).toArray
     }.flatten
     files.toSet.toArray
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala
index 3c046ce494285..d59b3c6f0caf2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, SessionCatalog}
+import org.apache.spark.sql.catalyst.catalog.{HiveTableRelation, SessionCatalog}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -101,7 +101,7 @@ case class OptimizeMetadataOnlyQuery(
             val partitionData = fsRelation.location.listFiles(Nil, Nil)
             LocalRelation(partAttrs, partitionData.map(_.values))
 
-          case relation: CatalogRelation =>
+          case relation: HiveTableRelation =>
             val partAttrs = getPartitionAttrs(relation.tableMeta.partitionColumnNames, relation)
             val caseInsensitiveProperties =
               CaseInsensitiveMap(relation.tableMeta.storage.properties)
@@ -137,7 +137,7 @@ case class OptimizeMetadataOnlyQuery(
         val partAttrs = getPartitionAttrs(fsRelation.partitionSchema.map(_.name), l)
         Some(AttributeSet(partAttrs), l)
 
-      case relation: CatalogRelation if relation.tableMeta.partitionColumnNames.nonEmpty =>
+      case relation: HiveTableRelation if relation.tableMeta.partitionColumnNames.nonEmpty =>
         val partAttrs = getPartitionAttrs(relation.tableMeta.partitionColumnNames, relation)
         Some(AttributeSet(partAttrs), relation)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index ded9303de55fe..04ee081a0f9ce 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow, QualifiedTableName}
 import org.apache.spark.sql.catalyst.CatalystTypeConverters.convertToScala
 import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, CatalogUtils}
+import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
@@ -207,15 +207,16 @@ case class DataSourceAnalysis(conf: SQLConf) extends Rule[LogicalPlan] with Cast
 
 
 /**
- * Replaces [[CatalogRelation]] with data source table if its table provider is not hive.
+ * Replaces [[UnresolvedCatalogRelation]] with concrete relation logical plans.
+ *
+ * TODO: we should remove the special handling for hive tables after completely making hive as a
+ * data source.
  */
 class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan] {
-  private def readDataSourceTable(r: CatalogRelation): LogicalPlan = {
-    val table = r.tableMeta
+  private def readDataSourceTable(table: CatalogTable): LogicalPlan = {
     val qualifiedTableName = QualifiedTableName(table.database, table.identifier.table)
-    val catalogProxy = sparkSession.sessionState.catalog
-
-    val plan = catalogProxy.getCachedPlan(qualifiedTableName, new Callable[LogicalPlan]() {
+    val catalog = sparkSession.sessionState.catalog
+    catalog.getCachedPlan(qualifiedTableName, new Callable[LogicalPlan]() {
       override def call(): LogicalPlan = {
         val pathOption = table.storage.locationUri.map("path" -> CatalogUtils.URIToString(_))
         val dataSource =
@@ -232,24 +233,30 @@ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan]
 
         LogicalRelation(dataSource.resolveRelation(checkFilesExist = false), table)
       }
-    }).asInstanceOf[LogicalRelation]
+    })
+  }
 
-    if (r.output.isEmpty) {
-      // It's possible that the table schema is empty and need to be inferred at runtime. For this
-      // case, we don't need to change the output of the cached plan.
-      plan
-    } else {
-      plan.copy(output = r.output)
-    }
+  private def readHiveTable(table: CatalogTable): LogicalPlan = {
+    HiveTableRelation(
+      table,
+      // Hive table columns are always nullable.
+      table.dataSchema.asNullable.toAttributes,
+      table.partitionSchema.asNullable.toAttributes)
   }
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan transform {
-    case i @ InsertIntoTable(r: CatalogRelation, _, _, _, _)
-        if DDLUtils.isDatasourceTable(r.tableMeta) =>
-      i.copy(table = readDataSourceTable(r))
+    case i @ InsertIntoTable(UnresolvedCatalogRelation(tableMeta), _, _, _, _)
+        if DDLUtils.isDatasourceTable(tableMeta) =>
+      i.copy(table = readDataSourceTable(tableMeta))
+
+    case i @ InsertIntoTable(UnresolvedCatalogRelation(tableMeta), _, _, _, _) =>
+      i.copy(table = readHiveTable(tableMeta))
+
+    case UnresolvedCatalogRelation(tableMeta) if DDLUtils.isDatasourceTable(tableMeta) =>
+      readDataSourceTable(tableMeta)
 
-    case r: CatalogRelation if DDLUtils.isDatasourceTable(r.tableMeta) =>
-      readDataSourceTable(r)
+    case UnresolvedCatalogRelation(tableMeta) =>
+      readHiveTable(tableMeta)
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index 45f2a41f24937..9647f2c0edccb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -376,7 +376,7 @@ case class PreprocessTableInsertion(conf: SQLConf) extends Rule[LogicalPlan] wit
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     case i @ InsertIntoTable(table, _, query, _, _) if table.resolved && query.resolved =>
       table match {
-        case relation: CatalogRelation =>
+        case relation: HiveTableRelation =>
           val metadata = relation.tableMeta
           preprocess(i, metadata.identifier.quotedString, metadata.partitionColumnNames)
         case LogicalRelation(h: HadoopFsRelation, _, catalogTable) =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index ae0f219b281ea..86d19af9dd548 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -24,7 +24,7 @@ import scala.collection.mutable
 import scala.util.Random
 
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, CatalogStatistics}
+import org.apache.spark.sql.catalyst.catalog.{CatalogStatistics, HiveTableRelation}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.datasources.LogicalRelation
@@ -304,7 +304,7 @@ abstract class StatisticsCollectionTestBase extends QueryTest with SQLTestUtils
     // Analyze only one column.
     sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR COLUMNS c1")
     val (relation, catalogTable) = spark.table(tableName).queryExecution.analyzed.collect {
-      case catalogRel: CatalogRelation => (catalogRel, catalogRel.tableMeta)
+      case catalogRel: HiveTableRelation => (catalogRel, catalogRel.tableMeta)
       case logicalRel: LogicalRelation => (logicalRel, logicalRel.catalogTable.get)
     }.head
     val emptyColStat = ColumnStat(0, None, None, 0, 4, 4)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 9b3cbb63a21b0..e1fee9af420dc 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -112,7 +112,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
   }
 
   def convertToLogicalRelation(
-      relation: CatalogRelation,
+      relation: HiveTableRelation,
       options: Map[String, String],
       fileFormatClass: Class[_ <: FileFormat],
       fileType: String): LogicalRelation = {
@@ -212,7 +212,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
         logicalRelation
       })
     }
-    // The inferred schema may have different filed names as the table schema, we should respect
+    // The inferred schema may have different field names as the table schema, we should respect
     // it, but also respect the exprId in table relation output.
     assert(result.output.length == relation.output.length &&
       result.output.zip(relation.output).forall { case (a1, a2) => a1.dataType == a2.dataType })
@@ -223,7 +223,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
   }
 
   private def inferIfNeeded(
-      relation: CatalogRelation,
+      relation: HiveTableRelation,
       options: Map[String, String],
       fileFormat: FileFormat,
       fileIndexOpt: Option[FileIndex] = None): (StructType, CatalogTable) = {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index 4f090d545cd18..53e500ea78fc4 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -24,7 +24,7 @@ import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hadoop.hive.common.StatsSetupConst
 
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, CatalogStatistics, CatalogStorageFormat, CatalogTable}
+import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning._
 import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan, ScriptTransformation}
@@ -116,7 +116,7 @@ class ResolveHiveSerdeTable(session: SparkSession) extends Rule[LogicalPlan] {
 
 class DetermineTableStats(session: SparkSession) extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-    case relation: CatalogRelation
+    case relation: HiveTableRelation
         if DDLUtils.isHiveTable(relation.tableMeta) && relation.tableMeta.stats.isEmpty =>
       val table = relation.tableMeta
       // TODO: check if this estimate is valid for tables after partition pruning.
@@ -160,7 +160,7 @@ class DetermineTableStats(session: SparkSession) extends Rule[LogicalPlan] {
  */
 object HiveAnalysis extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-    case InsertIntoTable(r: CatalogRelation, partSpec, query, overwrite, ifPartitionNotExists)
+    case InsertIntoTable(r: HiveTableRelation, partSpec, query, overwrite, ifPartitionNotExists)
         if DDLUtils.isHiveTable(r.tableMeta) =>
       InsertIntoHiveTable(r.tableMeta, partSpec, query, overwrite, ifPartitionNotExists)
 
@@ -184,13 +184,13 @@ object HiveAnalysis extends Rule[LogicalPlan] {
 case class RelationConversions(
     conf: SQLConf,
     sessionCatalog: HiveSessionCatalog) extends Rule[LogicalPlan] {
-  private def isConvertible(relation: CatalogRelation): Boolean = {
+  private def isConvertible(relation: HiveTableRelation): Boolean = {
     val serde = relation.tableMeta.storage.serde.getOrElse("").toLowerCase(Locale.ROOT)
     serde.contains("parquet") && conf.getConf(HiveUtils.CONVERT_METASTORE_PARQUET) ||
       serde.contains("orc") && conf.getConf(HiveUtils.CONVERT_METASTORE_ORC)
   }
 
-  private def convert(relation: CatalogRelation): LogicalRelation = {
+  private def convert(relation: HiveTableRelation): LogicalRelation = {
     val serde = relation.tableMeta.storage.serde.getOrElse("").toLowerCase(Locale.ROOT)
     if (serde.contains("parquet")) {
       val options = Map(ParquetOptions.MERGE_SCHEMA ->
@@ -207,14 +207,14 @@ case class RelationConversions(
   override def apply(plan: LogicalPlan): LogicalPlan = {
     plan transformUp {
       // Write path
-      case InsertIntoTable(r: CatalogRelation, partition, query, overwrite, ifPartitionNotExists)
+      case InsertIntoTable(r: HiveTableRelation, partition, query, overwrite, ifPartitionNotExists)
         // Inserting into partitioned table is not supported in Parquet/Orc data source (yet).
           if query.resolved && DDLUtils.isHiveTable(r.tableMeta) &&
             !r.isPartitioned && isConvertible(r) =>
         InsertIntoTable(convert(r), partition, query, overwrite, ifPartitionNotExists)
 
       // Read path
-      case relation: CatalogRelation
+      case relation: HiveTableRelation
           if DDLUtils.isHiveTable(relation.tableMeta) && isConvertible(relation) =>
         convert(relation)
     }
@@ -242,7 +242,7 @@ private[hive] trait HiveStrategies {
    */
   object HiveTableScans extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
-      case PhysicalOperation(projectList, predicates, relation: CatalogRelation) =>
+      case PhysicalOperation(projectList, predicates, relation: HiveTableRelation) =>
         // Filter out all predicates that only deal with partition keys, these are given to the
         // hive table scan operator to be used for partition pruning.
         val partitionKeyIds = AttributeSet(relation.partitionCols)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala
index e191071efbf18..75b076b07f4bd 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala
@@ -30,7 +30,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.catalog.CatalogRelation
+import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.execution._
@@ -50,7 +50,7 @@ import org.apache.spark.util.Utils
 private[hive]
 case class HiveTableScanExec(
     requestedAttributes: Seq[Attribute],
-    relation: CatalogRelation,
+    relation: HiveTableRelation,
     partitionPruningPred: Seq[Expression])(
     @transient private val sparkSession: SparkSession)
   extends LeafExecNode {
@@ -205,7 +205,7 @@ case class HiveTableScanExec(
     val input: AttributeSeq = relation.output
     HiveTableScanExec(
       requestedAttributes.map(QueryPlan.normalizeExprId(_, input)),
-      relation.canonicalized.asInstanceOf[CatalogRelation],
+      relation.canonicalized.asInstanceOf[HiveTableRelation],
       QueryPlan.normalizePredicates(partitionPruningPred, input))(sparkSession)
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index b554694815571..06a30b726549e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -1377,6 +1377,7 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
         hiveClient.createTable(tableDesc, ignoreIfExists = false)
 
         checkAnswer(spark.table("old"), Row(1, "a"))
+        checkAnswer(sql("select * from old"), Row(1, "a"))
 
         val expectedSchema = StructType(Seq(
           StructField("i", IntegerType, nullable = true),
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index 819180d8b972e..a9caad897c589 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -23,7 +23,7 @@ import scala.reflect.ClassTag
 
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, CatalogStatistics}
+import org.apache.spark.sql.catalyst.catalog.{CatalogStatistics, HiveTableRelation}
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.execution.joins._
@@ -60,7 +60,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
                |LOCATION '${tempDir.toURI}'""".stripMargin)
 
           val relation = spark.table("csv_table").queryExecution.analyzed.children.head
-            .asInstanceOf[CatalogRelation]
+            .asInstanceOf[HiveTableRelation]
 
           val properties = relation.tableMeta.properties
           assert(properties("totalSize").toLong <= 0, "external table totalSize must be <= 0")
@@ -497,7 +497,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
   test("estimates the size of a test Hive serde tables") {
     val df = sql("""SELECT * FROM src""")
     val sizes = df.queryExecution.analyzed.collect {
-      case relation: CatalogRelation => relation.stats(conf).sizeInBytes
+      case relation: HiveTableRelation => relation.stats(conf).sizeInBytes
     }
     assert(sizes.size === 1, s"Size wrong for:\n ${df.queryExecution}")
     assert(sizes(0).equals(BigInt(5812)),
@@ -557,7 +557,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
       () => (),
       metastoreQuery,
       metastoreAnswer,
-      implicitly[ClassTag[CatalogRelation]]
+      implicitly[ClassTag[HiveTableRelation]]
     )
   }
 
@@ -571,7 +571,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
 
     // Assert src has a size smaller than the threshold.
     val sizes = df.queryExecution.analyzed.collect {
-      case relation: CatalogRelation => relation.stats(conf).sizeInBytes
+      case relation: HiveTableRelation => relation.stats(conf).sizeInBytes
     }
     assert(sizes.size === 2 && sizes(1) <= spark.sessionState.conf.autoBroadcastJoinThreshold
       && sizes(0) <= spark.sessionState.conf.autoBroadcastJoinThreshold,
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 002ddd44e3bdb..dc79bfa96725c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.TestUtils
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, FunctionRegistry, NoSuchPartitionException}
-import org.apache.spark.sql.catalyst.catalog.{CatalogRelation, CatalogTableType, CatalogUtils}
+import org.apache.spark.sql.catalyst.catalog.{CatalogTableType, CatalogUtils, HiveTableRelation}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
 import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
@@ -454,7 +454,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
       case LogicalRelation(r: HadoopFsRelation, _, _) =>
         if (!isDataSourceTable) {
           fail(
-            s"${classOf[CatalogRelation].getCanonicalName} is expected, but found " +
+            s"${classOf[HiveTableRelation].getCanonicalName} is expected, but found " +
               s"${HadoopFsRelation.getClass.getCanonicalName}.")
         }
         userSpecifiedLocation match {
@@ -464,11 +464,11 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
         }
         assert(catalogTable.provider.get === format)
 
-      case r: CatalogRelation =>
+      case r: HiveTableRelation =>
         if (isDataSourceTable) {
           fail(
             s"${HadoopFsRelation.getClass.getCanonicalName} is expected, but found " +
-              s"${classOf[CatalogRelation].getCanonicalName}.")
+              s"${classOf[HiveTableRelation].getCanonicalName}.")
         }
         userSpecifiedLocation match {
           case Some(location) =>
@@ -948,7 +948,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     withSQLConf(SQLConf.CONVERT_CTAS.key -> "false") {
       sql("CREATE TABLE explodeTest (key bigInt)")
       table("explodeTest").queryExecution.analyzed match {
-        case SubqueryAlias(_, r: CatalogRelation) => // OK
+        case SubqueryAlias(_, r: HiveTableRelation) => // OK
         case _ =>
           fail("To correctly test the fix of SPARK-5875, explodeTest should be a MetastoreRelation")
       }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
index 8c855730c31f2..60ccd996d6d58 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala
@@ -26,7 +26,7 @@ import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.catalog.CatalogRelation
+import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
 import org.apache.spark.sql.execution.datasources.{LogicalRelation, RecordReaderIterator}
 import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.test.TestHive._
@@ -475,7 +475,7 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
                 }
               } else {
                 queryExecution.analyzed.collectFirst {
-                  case _: CatalogRelation => ()
+                  case _: HiveTableRelation => ()
                 }.getOrElse {
                   fail(s"Expecting no conversion from orc to data sources, " +
                     s"but got:\n$queryExecution")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
index 23f21e6b9931e..303884da19f09 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
@@ -21,7 +21,7 @@ import java.io.File
 
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.catalog.CatalogRelation
+import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.DataSourceScanExec
 import org.apache.spark.sql.execution.datasources._
@@ -812,7 +812,7 @@ class ParquetSourceSuite extends ParquetPartitioningTest {
               }
             } else {
               queryExecution.analyzed.collectFirst {
-                case _: CatalogRelation =>
+                case _: HiveTableRelation =>
               }.getOrElse {
                 fail(s"Expecting no conversion from parquet to data sources, " +
                   s"but got:\n$queryExecution")

From fdea642dbd17d74c8bf136c1746159acaa937d25 Mon Sep 17 00:00:00 2001
From: donnyzone <wellfengzhu@gmail.com>
Date: Thu, 17 Aug 2017 22:37:32 -0700
Subject: [PATCH 1149/1204] [SPARK-21739][SQL] Cast expression should
 initialize timezoneId when it is called statically to convert something into
 TimestampType

## What changes were proposed in this pull request?

https://issues.apache.org/jira/projects/SPARK/issues/SPARK-21739

This issue is caused by introducing TimeZoneAwareExpression.
When the **Cast** expression converts something into TimestampType, it should be resolved with setting `timezoneId`. In general, it is resolved in LogicalPlan phase.

However, there are still some places that use Cast expression statically to convert datatypes without setting `timezoneId`. In such cases,  `NoSuchElementException: None.get` will be thrown for TimestampType.

This PR is proposed to fix the issue. We have checked the whole project and found two such usages(i.e., in`TableReader` and `HiveTableScanExec`).

## How was this patch tested?

unit test

Author: donnyzone <wellfengzhu@gmail.com>

Closes #18960 from DonnyZone/spark-21739.

(cherry picked from commit 310454be3b0ce5ff6b6ef0070c5daadf6fb16927)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../org/apache/spark/sql/hive/TableReader.scala |  8 ++++++--
 .../sql/hive/execution/HiveTableScanExec.scala  |  8 ++++++--
 .../spark/sql/hive/QueryPartitionSuite.scala    | 17 +++++++++++++++++
 3 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
index 16c1103dd1ea3..a0e379f375820 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -39,8 +39,10 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.{EmptyRDD, HadoopRDD, RDD, UnionRDD}
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.CastSupport
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.{SerializableConfiguration, Utils}
 
@@ -65,7 +67,7 @@ class HadoopTableReader(
     @transient private val tableDesc: TableDesc,
     @transient private val sparkSession: SparkSession,
     hadoopConf: Configuration)
-  extends TableReader with Logging {
+  extends TableReader with CastSupport with Logging {
 
   // Hadoop honors "mapreduce.job.maps" as hint,
   // but will ignore when mapreduce.jobtracker.address is "local".
@@ -86,6 +88,8 @@ class HadoopTableReader(
   private val _broadcastedHadoopConf =
     sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
 
+  override def conf: SQLConf = sparkSession.sessionState.conf
+
   override def makeRDDForTable(hiveTable: HiveTable): RDD[InternalRow] =
     makeRDDForTable(
       hiveTable,
@@ -227,7 +231,7 @@ class HadoopTableReader(
       def fillPartitionKeys(rawPartValues: Array[String], row: InternalRow): Unit = {
         partitionKeyAttrs.foreach { case (attr, ordinal) =>
           val partOrdinal = partitionKeys.indexOf(attr)
-          row(ordinal) = Cast(Literal(rawPartValues(partOrdinal)), attr.dataType).eval(null)
+          row(ordinal) = cast(Literal(rawPartValues(partOrdinal)), attr.dataType).eval(null)
         }
       }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala
index 75b076b07f4bd..2ce8ccfb35e0c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala
@@ -30,6 +30,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.CastSupport
 import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.QueryPlan
@@ -37,6 +38,7 @@ import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.metric.SQLMetrics
 import org.apache.spark.sql.hive._
 import org.apache.spark.sql.hive.client.HiveClientImpl
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{BooleanType, DataType}
 import org.apache.spark.util.Utils
 
@@ -53,11 +55,13 @@ case class HiveTableScanExec(
     relation: HiveTableRelation,
     partitionPruningPred: Seq[Expression])(
     @transient private val sparkSession: SparkSession)
-  extends LeafExecNode {
+  extends LeafExecNode with CastSupport {
 
   require(partitionPruningPred.isEmpty || relation.isPartitioned,
     "Partition pruning predicates only supported for partitioned tables.")
 
+  override def conf: SQLConf = sparkSession.sessionState.conf
+
   override lazy val metrics = Map(
     "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"))
 
@@ -104,7 +108,7 @@ case class HiveTableScanExec(
     hadoopConf)
 
   private def castFromString(value: String, dataType: DataType) = {
-    Cast(Literal(value), dataType).eval(null)
+    cast(Literal(value), dataType).eval(null)
   }
 
   private def addColumnMetadataToConf(hiveConf: Configuration): Unit = {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala
index 43b6bf5feeb60..b2dc401ce1efc 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.hive
 
 import java.io.File
+import java.sql.Timestamp
 
 import com.google.common.io.Files
 import org.apache.hadoop.fs.FileSystem
@@ -68,4 +69,20 @@ class QueryPartitionSuite extends QueryTest with SQLTestUtils with TestHiveSingl
       sql("DROP TABLE IF EXISTS createAndInsertTest")
     }
   }
+
+  test("SPARK-21739: Cast expression should initialize timezoneId") {
+    withTable("table_with_timestamp_partition") {
+      sql("CREATE TABLE table_with_timestamp_partition(value int) PARTITIONED BY (ts TIMESTAMP)")
+      sql("INSERT OVERWRITE TABLE table_with_timestamp_partition " +
+        "PARTITION (ts = '2010-01-01 00:00:00.000') VALUES (1)")
+
+      // test for Cast expression in TableReader
+      checkAnswer(sql("SELECT * FROM table_with_timestamp_partition"),
+        Seq(Row(1, Timestamp.valueOf("2010-01-01 00:00:00.000"))))
+
+      // test for Cast expression in HiveTableScanExec
+      checkAnswer(sql("SELECT value FROM table_with_timestamp_partition " +
+        "WHERE ts = '2010-01-01 00:00:00.000'"), Row(1))
+    }
+  }
 }

From 6c2a38a381f22029abd9ca4beab49b2473a13670 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Pelvet?= <cedric.pelvet@gmail.com>
Date: Sun, 20 Aug 2017 11:05:54 +0100
Subject: [PATCH 1150/1204] [MINOR] Correct validateAndTransformSchema in
 GaussianMixture and AFTSurvivalRegression
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

The line SchemaUtils.appendColumn(schema, $(predictionCol), IntegerType) did not modify the variable schema, hence only the last line had any effect. A temporary variable is used to correctly append the two columns predictionCol and probabilityCol.

## How was this patch tested?

Manually.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Cédric Pelvet <cedric.pelvet@gmail.com>

Closes #18980 from sharp-pixel/master.

(cherry picked from commit 73e04ecc4f29a0fe51687ed1337c61840c976f89)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../org/apache/spark/ml/clustering/GaussianMixture.scala  | 4 ++--
 .../spark/ml/regression/AFTSurvivalRegression.scala       | 8 +++++---
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
index 5259ee419445f..f19ad7a5a6938 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
@@ -64,8 +64,8 @@ private[clustering] trait GaussianMixtureParams extends Params with HasMaxIter w
    */
   protected def validateAndTransformSchema(schema: StructType): StructType = {
     SchemaUtils.checkColumnType(schema, $(featuresCol), new VectorUDT)
-    SchemaUtils.appendColumn(schema, $(predictionCol), IntegerType)
-    SchemaUtils.appendColumn(schema, $(probabilityCol), new VectorUDT)
+    val schemaWithPredictionCol = SchemaUtils.appendColumn(schema, $(predictionCol), IntegerType)
+    SchemaUtils.appendColumn(schemaWithPredictionCol, $(probabilityCol), new VectorUDT)
   }
 }
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
index 0891994530f88..16821f317760e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
@@ -109,10 +109,12 @@ private[regression] trait AFTSurvivalRegressionParams extends Params
       SchemaUtils.checkNumericType(schema, $(censorCol))
       SchemaUtils.checkNumericType(schema, $(labelCol))
     }
-    if (hasQuantilesCol) {
+
+    val schemaWithQuantilesCol = if (hasQuantilesCol) {
       SchemaUtils.appendColumn(schema, $(quantilesCol), new VectorUDT)
-    }
-    SchemaUtils.appendColumn(schema, $(predictionCol), DoubleType)
+    } else schema
+
+    SchemaUtils.appendColumn(schemaWithQuantilesCol, $(predictionCol), DoubleType)
   }
 }
 

From 0f640e96c9d0b0d95ac4bbcc84eaccefe7352f0f Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 21 Aug 2017 00:45:23 +0800
Subject: [PATCH 1151/1204] [SPARK-21721][SQL][FOLLOWUP] Clear FileSystem
 deleteOnExit cache when paths are successfully removed

## What changes were proposed in this pull request?

Fix a typo in test.

## How was this patch tested?

Jenkins tests.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #19005 from viirya/SPARK-21721-followup.

(cherry picked from commit 28a6cca7df900d13613b318c07acb97a5722d2b8)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/hive/execution/SQLQuerySuite.scala    | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index dc79bfa96725c..31b36f1574d16 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -2017,7 +2017,8 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
   }
 
   test("SPARK-21721: Clear FileSystem deleterOnExit cache if path is successfully removed") {
-    withTable("test21721") {
+    val table = "test21721"
+    withTable(table) {
       val deleteOnExitField = classOf[FileSystem].getDeclaredField("deleteOnExit")
       deleteOnExitField.setAccessible(true)
 
@@ -2025,10 +2026,10 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
       val setOfPath = deleteOnExitField.get(fs).asInstanceOf[Set[Path]]
 
       val testData = sparkContext.parallelize(1 to 10).map(i => TestData(i, i.toString)).toDF()
-      sql("CREATE TABLE test21721 (key INT, value STRING)")
+      sql(s"CREATE TABLE $table (key INT, value STRING)")
       val pathSizeToDeleteOnExit = setOfPath.size()
 
-      (0 to 10).foreach(_ => testData.write.mode(SaveMode.Append).insertInto("test1"))
+      (0 to 10).foreach(_ => testData.write.mode(SaveMode.Append).insertInto(table))
 
       assert(setOfPath.size() == pathSizeToDeleteOnExit)
     }

From 526087f9ebca90f77f78d699c5f8d0243dd8ab61 Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Mon, 21 Aug 2017 15:09:02 -0700
Subject: [PATCH 1152/1204] [SPARK-21617][SQL] Store correct table metadata
 when altering schema in Hive metastore.

For Hive tables, the current "replace the schema" code is the correct
path, except that an exception in that path should result in an error, and
not in retrying in a different way.

For data source tables, Spark may generate a non-compatible Hive table;
but for that to work with Hive 2.1, the detection of data source tables needs
to be fixed in the Hive client, to also consider the raw tables used by code
such as `alterTableSchema`.

Tested with existing and added unit tests (plus internal tests with a 2.1 metastore).

Author: Marcelo Vanzin <vanzin@cloudera.com>

Closes #18849 from vanzin/SPARK-21617.

(cherry picked from commit 84b5b16ea6c9816c70f7471a50eb5e4acb7fb1a1)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../sql/execution/command/DDLSuite.scala      |  15 +--
 .../spark/sql/hive/HiveExternalCatalog.scala  |  55 +++++---
 .../sql/hive/client/HiveClientImpl.scala      |   3 +-
 .../hive/execution/Hive_2_1_DDLSuite.scala    | 126 ++++++++++++++++++
 4 files changed, 171 insertions(+), 28 deletions(-)
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index e4dd077715d0f..56d2937ccc6ff 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -2281,18 +2281,9 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
             }.getMessage
             assert(e.contains("Found duplicate column(s)"))
           } else {
-            if (isUsingHiveMetastore) {
-              // hive catalog will still complains that c1 is duplicate column name because hive
-              // identifiers are case insensitive.
-              val e = intercept[AnalysisException] {
-                sql("ALTER TABLE t1 ADD COLUMNS (C1 string)")
-              }.getMessage
-              assert(e.contains("HiveException"))
-            } else {
-              sql("ALTER TABLE t1 ADD COLUMNS (C1 string)")
-              assert(spark.table("t1").schema
-                .equals(new StructType().add("c1", IntegerType).add("C1", StringType)))
-            }
+            sql("ALTER TABLE t1 ADD COLUMNS (C1 string)")
+            assert(spark.table("t1").schema ==
+              new StructType().add("c1", IntegerType).add("C1", StringType))
           }
         }
       }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 9fea0c6249700..2ea4e157a94b3 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -114,7 +114,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
    * should interpret these special data source properties and restore the original table metadata
    * before returning it.
    */
-  private def getRawTable(db: String, table: String): CatalogTable = withClient {
+  private[hive] def getRawTable(db: String, table: String): CatalogTable = withClient {
     client.getTable(db, table)
   }
 
@@ -386,6 +386,12 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
    * can be used as table properties later.
    */
   private def tableMetaToTableProps(table: CatalogTable): mutable.Map[String, String] = {
+    tableMetaToTableProps(table, table.schema)
+  }
+
+  private def tableMetaToTableProps(
+      table: CatalogTable,
+      schema: StructType): mutable.Map[String, String] = {
     val partitionColumns = table.partitionColumnNames
     val bucketSpec = table.bucketSpec
 
@@ -394,7 +400,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     // property. In this case, we split the JSON string and store each part as a separate table
     // property.
     val threshold = conf.get(SCHEMA_STRING_LENGTH_THRESHOLD)
-    val schemaJsonString = table.schema.json
+    val schemaJsonString = schema.json
     // Split the JSON string.
     val parts = schemaJsonString.grouped(threshold).toSeq
     properties.put(DATASOURCE_SCHEMA_NUMPARTS, parts.size.toString)
@@ -627,20 +633,29 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
   override def alterTableSchema(db: String, table: String, schema: StructType): Unit = withClient {
     requireTableExists(db, table)
     val rawTable = getRawTable(db, table)
-    val withNewSchema = rawTable.copy(schema = schema)
-    verifyColumnNames(withNewSchema)
     // Add table metadata such as table schema, partition columns, etc. to table properties.
-    val updatedTable = withNewSchema.copy(
-      properties = withNewSchema.properties ++ tableMetaToTableProps(withNewSchema))
-    try {
-      client.alterTable(updatedTable)
-    } catch {
-      case NonFatal(e) =>
-        val warningMessage =
-          s"Could not alter schema of table  ${rawTable.identifier.quotedString} in a Hive " +
-            "compatible way. Updating Hive metastore in Spark SQL specific format."
-        logWarning(warningMessage, e)
-        client.alterTable(updatedTable.copy(schema = updatedTable.partitionSchema))
+    val updatedProperties = rawTable.properties ++ tableMetaToTableProps(rawTable, schema)
+    val withNewSchema = rawTable.copy(properties = updatedProperties, schema = schema)
+    verifyColumnNames(withNewSchema)
+
+    if (isDatasourceTable(rawTable)) {
+      // For data source tables, first try to write it with the schema set; if that does not work,
+      // try again with updated properties and the partition schema. This is a simplified version of
+      // what createDataSourceTable() does, and may leave the table in a state unreadable by Hive
+      // (for example, the schema does not match the data source schema, or does not match the
+      // storage descriptor).
+      try {
+        client.alterTable(withNewSchema)
+      } catch {
+        case NonFatal(e) =>
+          val warningMessage =
+            s"Could not alter schema of table  ${rawTable.identifier.quotedString} in a Hive " +
+              "compatible way. Updating Hive metastore in Spark SQL specific format."
+          logWarning(warningMessage, e)
+          client.alterTable(withNewSchema.copy(schema = rawTable.partitionSchema))
+      }
+    } else {
+      client.alterTable(withNewSchema)
     }
   }
 
@@ -1246,4 +1261,14 @@ object HiveExternalCatalog {
         getColumnNamesByType(metadata.properties, "sort", "sorting columns"))
     }
   }
+
+  /**
+   * Detects a data source table. This checks both the table provider and the table properties,
+   * unlike DDLUtils which just checks the former.
+   */
+  private[spark] def isDatasourceTable(table: CatalogTable): Boolean = {
+    val provider = table.provider.orElse(table.properties.get(DATASOURCE_PROVIDER))
+    provider.isDefined && provider != Some(DDLUtils.HIVE_PROVIDER)
+  }
+
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 74e15a5777916..2cf11f41a10da 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -47,6 +47,7 @@ import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException}
 import org.apache.spark.sql.execution.QueryExecutionException
 import org.apache.spark.sql.execution.command.DDLUtils
+import org.apache.spark.sql.hive.HiveExternalCatalog
 import org.apache.spark.sql.hive.client.HiveClientImpl._
 import org.apache.spark.sql.types._
 import org.apache.spark.util.{CircularBuffer, Utils}
@@ -838,7 +839,7 @@ private[hive] object HiveClientImpl {
     }
     // after SPARK-19279, it is not allowed to create a hive table with an empty schema,
     // so here we should not add a default col schema
-    if (schema.isEmpty && DDLUtils.isDatasourceTable(table)) {
+    if (schema.isEmpty && HiveExternalCatalog.isDatasourceTable(table)) {
       // This is a hack to preserve existing behavior. Before Spark 2.0, we do not
       // set a default serde here (this was done in Hive), and so if the user provides
       // an empty schema Hive would automatically populate the schema with a single
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala
new file mode 100644
index 0000000000000..5c248b9acd04f
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution
+
+import scala.language.existentials
+
+import org.apache.hadoop.conf.Configuration
+import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach}
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.launcher.SparkLauncher
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.catalog._
+import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils}
+import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.internal.StaticSQLConf._
+import org.apache.spark.sql.types._
+import org.apache.spark.tags.ExtendedHiveTest
+import org.apache.spark.util.Utils
+
+/**
+ * A separate set of DDL tests that uses Hive 2.1 libraries, which behave a little differently
+ * from the built-in ones.
+ */
+@ExtendedHiveTest
+class Hive_2_1_DDLSuite extends SparkFunSuite with TestHiveSingleton with BeforeAndAfterEach
+  with BeforeAndAfterAll {
+
+  // Create a custom HiveExternalCatalog instance with the desired configuration. We cannot
+  // use SparkSession here since there's already an active on managed by the TestHive object.
+  private var catalog = {
+    val warehouse = Utils.createTempDir()
+    val metastore = Utils.createTempDir()
+    metastore.delete()
+    val sparkConf = new SparkConf()
+      .set(SparkLauncher.SPARK_MASTER, "local")
+      .set(WAREHOUSE_PATH.key, warehouse.toURI().toString())
+      .set(CATALOG_IMPLEMENTATION.key, "hive")
+      .set(HiveUtils.HIVE_METASTORE_VERSION.key, "2.1")
+      .set(HiveUtils.HIVE_METASTORE_JARS.key, "maven")
+
+    val hadoopConf = new Configuration()
+    hadoopConf.set("hive.metastore.warehouse.dir", warehouse.toURI().toString())
+    hadoopConf.set("javax.jdo.option.ConnectionURL",
+      s"jdbc:derby:;databaseName=${metastore.getAbsolutePath()};create=true")
+    // These options are needed since the defaults in Hive 2.1 cause exceptions with an
+    // empty metastore db.
+    hadoopConf.set("datanucleus.schema.autoCreateAll", "true")
+    hadoopConf.set("hive.metastore.schema.verification", "false")
+
+    new HiveExternalCatalog(sparkConf, hadoopConf)
+  }
+
+  override def afterEach: Unit = {
+    catalog.listTables("default").foreach { t =>
+      catalog.dropTable("default", t, true, false)
+    }
+    spark.sessionState.catalog.reset()
+  }
+
+  override def afterAll(): Unit = {
+    catalog = null
+  }
+
+  test("SPARK-21617: ALTER TABLE for non-compatible DataSource tables") {
+    testAlterTable(
+      "t1",
+      "CREATE TABLE t1 (c1 int) USING json",
+      StructType(Array(StructField("c1", IntegerType), StructField("c2", IntegerType))),
+      hiveCompatible = false)
+  }
+
+  test("SPARK-21617: ALTER TABLE for Hive-compatible DataSource tables") {
+    testAlterTable(
+      "t1",
+      "CREATE TABLE t1 (c1 int) USING parquet",
+      StructType(Array(StructField("c1", IntegerType), StructField("c2", IntegerType))))
+  }
+
+  test("SPARK-21617: ALTER TABLE for Hive tables") {
+    testAlterTable(
+      "t1",
+      "CREATE TABLE t1 (c1 int) STORED AS parquet",
+      StructType(Array(StructField("c1", IntegerType), StructField("c2", IntegerType))))
+  }
+
+  test("SPARK-21617: ALTER TABLE with incompatible schema on Hive-compatible table") {
+    val exception = intercept[AnalysisException] {
+      testAlterTable(
+        "t1",
+        "CREATE TABLE t1 (c1 string) USING parquet",
+        StructType(Array(StructField("c2", IntegerType))))
+    }
+    assert(exception.getMessage().contains("types incompatible with the existing columns"))
+  }
+
+  private def testAlterTable(
+      tableName: String,
+      createTableStmt: String,
+      updatedSchema: StructType,
+      hiveCompatible: Boolean = true): Unit = {
+    spark.sql(createTableStmt)
+    val oldTable = spark.sessionState.catalog.externalCatalog.getTable("default", tableName)
+    catalog.createTable(oldTable, true)
+    catalog.alterTableSchema("default", tableName, updatedSchema)
+
+    val updatedTable = catalog.getTable("default", tableName)
+    assert(updatedTable.schema.fieldNames === updatedSchema.fieldNames)
+  }
+
+}

From 236b2f4d5a0879fc200f67b0af3a3c4f881ee98f Mon Sep 17 00:00:00 2001
From: Felix Cheung <felixcheung_m@hotmail.com>
Date: Wed, 23 Aug 2017 21:35:17 -0700
Subject: [PATCH 1153/1204] [SPARK-21805][SPARKR] Disable R vignettes code on
 Windows

## What changes were proposed in this pull request?

Code in vignettes requires winutils on windows to run, when publishing to CRAN or building from source, winutils might not be available, so it's better to disable code run (so resulting vigenttes will not have output from code, but text is still there and code is still there)

fix * checking re-building of vignette outputs ... WARNING
and
> %LOCALAPPDATA% not found. Please define the environment variable or restart and enter an installation path in localDir.

## How was this patch tested?

jenkins, appveyor, r-hub

before: https://artifacts.r-hub.io/SparkR_2.2.0.tar.gz-49cecef3bb09db1db130db31604e0293/SparkR.Rcheck/00check.log
after: https://artifacts.r-hub.io/SparkR_2.2.0.tar.gz-86a066c7576f46794930ad114e5cff7c/SparkR.Rcheck/00check.log

Author: Felix Cheung <felixcheung_m@hotmail.com>

Closes #19016 from felixcheung/rvigwind.

(cherry picked from commit 43cbfad9992624d89bbb3209d1f5b765c7947bb9)
Signed-off-by: Felix Cheung <felixcheung@apache.org>
---
 R/pkg/DESCRIPTION                    |  2 +-
 R/pkg/R/install.R                    |  6 +++++-
 R/pkg/vignettes/sparkr-vignettes.Rmd | 11 +++++++++++
 3 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index cfa49b94c9526..4ac45fc98d5e9 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -2,7 +2,7 @@ Package: SparkR
 Type: Package
 Version: 2.2.1
 Title: R Frontend for Apache Spark
-Description: The SparkR package provides an R Frontend for Apache Spark.
+Description: Provides an R Frontend for Apache Spark.
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
                     email = "shivaram@cs.berkeley.edu"),
              person("Xiangrui", "Meng", role = "aut",
diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R
index ec931befa2854..492dee68e164d 100644
--- a/R/pkg/R/install.R
+++ b/R/pkg/R/install.R
@@ -270,7 +270,11 @@ sparkCachePath <- function() {
   if (is_windows()) {
     winAppPath <- Sys.getenv("LOCALAPPDATA", unset = NA)
     if (is.na(winAppPath)) {
-      stop(paste("%LOCALAPPDATA% not found.",
+      message("%LOCALAPPDATA% not found. Falling back to %USERPROFILE%.")
+      winAppPath <- Sys.getenv("USERPROFILE", unset = NA)
+    }
+    if (is.na(winAppPath)) {
+      stop(paste("%LOCALAPPDATA% and %USERPROFILE% not found.",
                    "Please define the environment variable",
                    "or restart and enter an installation path in localDir."))
     } else {
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index 031c3bc41f7cd..c97ba5f9a1351 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -27,6 +27,17 @@ vignette: >
  limitations under the License.
 -->
 
+```{r setup, include=FALSE}
+library(knitr)
+opts_hooks$set(eval = function(options) {
+  # override eval to FALSE only on windows
+  if (.Platform$OS.type == "windows") {
+    options$eval = FALSE
+  }
+  options
+})
+```
+
 ## Overview
 
 SparkR is an R package that provides a light-weight frontend to use Apache Spark from R. With Spark `r packageVersion("SparkR")`, SparkR provides a distributed data frame implementation that supports data processing operations like selection, filtering, aggregation etc. and distributed machine learning using [MLlib](http://spark.apache.org/mllib/).

From a58536741f8365bb3fff01b588f3b42b219d11e5 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 24 Aug 2017 16:44:12 +0200
Subject: [PATCH 1154/1204] [SPARK-21826][SQL] outer broadcast hash join should
 not throw NPE

This is a bug introduced by https://github.com/apache/spark/pull/11274/files#diff-7adb688cbfa583b5711801f196a074bbL274 .

Non-equal join condition should only be applied when the equal-join condition matches.

regression test

Author: Wenchen Fan <wenchen@databricks.com>

Closes #19036 from cloud-fan/bug.

(cherry picked from commit 2dd37d827f2e443dcb3eaf8a95437d179130d55c)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 .../joins/BroadcastHashJoinExec.scala         |  2 +-
 .../org/apache/spark/sql/JoinSuite.scala      | 20 +++++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala
index 0bc261d593df4..69715ab1f675f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala
@@ -257,8 +257,8 @@ case class BroadcastHashJoinExec(
       s"""
          |boolean $conditionPassed = true;
          |${eval.trim}
-         |${ev.code}
          |if ($matched != null) {
+         |  ${ev.code}
          |  $conditionPassed = !${ev.isNull} && ${ev.value};
          |}
        """.stripMargin
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
index 40bc1e9f13f81..95dc1478896f3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql
 
+import scala.collection.JavaConverters._
 import scala.collection.mutable.ListBuffer
 import scala.language.existentials
 
@@ -25,6 +26,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.execution.joins._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.sql.types.StructType
 import org.apache.spark.TestUtils.{assertNotSpilled, assertSpilled}
 
 class JoinSuite extends QueryTest with SharedSQLContext {
@@ -739,4 +741,22 @@ class JoinSuite extends QueryTest with SharedSQLContext {
       }
     }
   }
+
+  test("outer broadcast hash join should not throw NPE") {
+    withTempView("v1", "v2") {
+      withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") {
+        Seq(2 -> 2).toDF("x", "y").createTempView("v1")
+
+        spark.createDataFrame(
+          Seq(Row(1, "a")).asJava,
+          new StructType().add("i", "int", nullable = false).add("j", "string", nullable = false)
+        ).createTempView("v2")
+
+        checkAnswer(
+          sql("select x, y, i, j from v1 left join v2 on x = i and y < length(j)"),
+          Row(2, 2, null, null)
+        )
+      }
+    }
+  }
 }

From 2b4bd7910fecc8b7b41c7d4388d2a8204c1901e8 Mon Sep 17 00:00:00 2001
From: Weichen Xu <weichen.xu@databricks.com>
Date: Thu, 24 Aug 2017 10:18:56 -0700
Subject: [PATCH 1155/1204] [SPARK-21681][ML] fix bug of MLOR do not work
 correctly when featureStd contains zero (backport PR for 2.2)

## What changes were proposed in this pull request?

This is backport PR of https://github.com/apache/spark/pull/18896

fix bug of MLOR do not work correctly when featureStd contains zero

We can reproduce the bug through such dataset (features including zero variance), will generate wrong result (all coefficients becomes 0)
```
    val multinomialDatasetWithZeroVar = {
      val nPoints = 100
      val coefficients = Array(
        -0.57997, 0.912083, -0.371077,
        -0.16624, -0.84355, -0.048509)

      val xMean = Array(5.843, 3.0)
      val xVariance = Array(0.6856, 0.0)  // including zero variance

      val testData = generateMultinomialLogisticInput(
        coefficients, xMean, xVariance, addIntercept = true, nPoints, seed)

      val df = sc.parallelize(testData, 4).toDF().withColumn("weight", lit(1.0))
      df.cache()
      df
    }
```
## How was this patch tested?

testcase added.

Author: WeichenXu <WeichenXu123@outlook.com>

Closes #19026 from WeichenXu123/fix_mlor_zero_var_bug_2_2.
---
 .../classification/LogisticRegression.scala   | 12 +--
 .../LogisticRegressionSuite.scala             | 75 +++++++++++++++++++
 2 files changed, 82 insertions(+), 5 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 567af0488e1b4..1de237309aeae 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -1727,11 +1727,13 @@ private class LogisticAggregator(
 
     val margins = new Array[Double](numClasses)
     features.foreachActive { (index, value) =>
-      val stdValue = value / localFeaturesStd(index)
-      var j = 0
-      while (j < numClasses) {
-        margins(j) += localCoefficients(index * numClasses + j) * stdValue
-        j += 1
+      if (localFeaturesStd(index) != 0.0 && value != 0.0) {
+        val stdValue = value / localFeaturesStd(index)
+        var j = 0
+        while (j < numClasses) {
+          margins(j) += localCoefficients(index * numClasses + j) * stdValue
+          j += 1
+        }
       }
     }
     var i = 0
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 1ffd8dcd53d61..8461d646099a8 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -45,6 +45,7 @@ class LogisticRegressionSuite
   @transient var smallMultinomialDataset: Dataset[_] = _
   @transient var binaryDataset: Dataset[_] = _
   @transient var multinomialDataset: Dataset[_] = _
+  @transient var multinomialDatasetWithZeroVar: Dataset[_] = _
   private val eps: Double = 1e-5
 
   override def beforeAll(): Unit = {
@@ -98,6 +99,23 @@ class LogisticRegressionSuite
       df.cache()
       df
     }
+
+    multinomialDatasetWithZeroVar = {
+      val nPoints = 100
+      val coefficients = Array(
+        -0.57997, 0.912083, -0.371077,
+        -0.16624, -0.84355, -0.048509)
+
+      val xMean = Array(5.843, 3.0)
+      val xVariance = Array(0.6856, 0.0)
+
+      val testData = generateMultinomialLogisticInput(
+        coefficients, xMean, xVariance, addIntercept = true, nPoints, seed)
+
+      val df = sc.parallelize(testData, 4).toDF().withColumn("weight", lit(1.0))
+      df.cache()
+      df
+    }
   }
 
   /**
@@ -111,6 +129,11 @@ class LogisticRegressionSuite
     multinomialDataset.rdd.map { case Row(label: Double, features: Vector, weight: Double) =>
       label + "," + weight + "," + features.toArray.mkString(",")
     }.repartition(1).saveAsTextFile("target/tmp/LogisticRegressionSuite/multinomialDataset")
+    multinomialDatasetWithZeroVar.rdd.map {
+      case Row(label: Double, features: Vector, weight: Double) =>
+        label + "," + weight + "," + features.toArray.mkString(",")
+    }.repartition(1)
+      .saveAsTextFile("target/tmp/LogisticRegressionSuite/multinomialDatasetWithZeroVar")
   }
 
   test("params") {
@@ -1391,6 +1414,58 @@ class LogisticRegressionSuite
     assert(model2.interceptVector.toArray.sum ~== 0.0 absTol eps)
   }
 
+  test("multinomial logistic regression with zero variance (SPARK-21681)") {
+    val sqlContext = multinomialDatasetWithZeroVar.sqlContext
+    import sqlContext.implicits._
+    val mlr = new LogisticRegression().setFamily("multinomial").setFitIntercept(true)
+      .setElasticNetParam(0.0).setRegParam(0.0).setStandardization(true).setWeightCol("weight")
+
+    val model = mlr.fit(multinomialDatasetWithZeroVar)
+
+    /*
+     Use the following R code to load the data and train the model using glmnet package.
+     library("glmnet")
+     data <- read.csv("path", header=FALSE)
+     label = as.factor(data$V1)
+     w = data$V2
+     features = as.matrix(data.frame(data$V3, data$V4))
+     coefficients = coef(glmnet(features, label, weights=w, family="multinomial",
+     alpha = 0, lambda = 0))
+     coefficients
+     $`0`
+     3 x 1 sparse Matrix of class "dgCMatrix"
+                    s0
+             0.2658824
+     data.V3 0.1881871
+     data.V4 .
+     $`1`
+     3 x 1 sparse Matrix of class "dgCMatrix"
+                      s0
+              0.53604701
+     data.V3 -0.02412645
+     data.V4  .
+     $`2`
+     3 x 1 sparse Matrix of class "dgCMatrix"
+                     s0
+             -0.8019294
+     data.V3 -0.1640607
+     data.V4  .
+    */
+
+    val coefficientsR = new DenseMatrix(3, 2, Array(
+      0.1881871, 0.0,
+      -0.02412645, 0.0,
+      -0.1640607, 0.0), isTransposed = true)
+    val interceptsR = Vectors.dense(0.2658824, 0.53604701, -0.8019294)
+
+    model.coefficientMatrix.colIter.foreach(v => assert(v.toArray.sum ~== 0.0 absTol eps))
+
+    assert(model.coefficientMatrix ~== coefficientsR relTol 0.05)
+    assert(model.coefficientMatrix.toArray.sum ~== 0.0 absTol eps)
+    assert(model.interceptVector ~== interceptsR relTol 0.05)
+    assert(model.interceptVector.toArray.sum ~== 0.0 absTol eps)
+  }
+
   test("multinomial logistic regression with intercept without regularization with bound") {
     // Bound constrained optimization with bound on one side.
     val lowerBoundsOnCoefficients = Matrices.dense(3, 4, Array.fill(12)(1.0))

From 0d4ef2f690e378cade0a3ec84d535a535dc20dfc Mon Sep 17 00:00:00 2001
From: WeichenXu <WeichenXu123@outlook.com>
Date: Mon, 28 Aug 2017 07:41:42 +0100
Subject: [PATCH 1156/1204] [SPARK-21818][ML][MLLIB] Fix bug of
 MultivariateOnlineSummarizer.variance generate negative result

Because of numerical error, MultivariateOnlineSummarizer.variance is possible to generate negative variance.

**This is a serious bug because many algos in MLLib**
**use stddev computed from** `sqrt(variance)`
**it will generate NaN and crash the whole algorithm.**

we can reproduce this bug use the following code:
```
    val summarizer1 = (new MultivariateOnlineSummarizer)
      .add(Vectors.dense(3.0), 0.7)
    val summarizer2 = (new MultivariateOnlineSummarizer)
      .add(Vectors.dense(3.0), 0.4)
    val summarizer3 = (new MultivariateOnlineSummarizer)
      .add(Vectors.dense(3.0), 0.5)
    val summarizer4 = (new MultivariateOnlineSummarizer)
      .add(Vectors.dense(3.0), 0.4)

    val summarizer = summarizer1
      .merge(summarizer2)
      .merge(summarizer3)
      .merge(summarizer4)

    println(summarizer.variance(0))
```
This PR fix the bugs in `mllib.stat.MultivariateOnlineSummarizer.variance` and `ml.stat.SummarizerBuffer.variance`, and several places in `WeightedLeastSquares`

test cases added.

Author: WeichenXu <WeichenXu123@outlook.com>

Closes #19029 from WeichenXu123/fix_summarizer_var_bug.

(cherry picked from commit 0456b4050817e64f27824720e695bbfff738d474)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../spark/ml/optim/WeightedLeastSquares.scala  | 12 +++++++++---
 .../stat/MultivariateOnlineSummarizer.scala    |  5 +++--
 .../MultivariateOnlineSummarizerSuite.scala    | 18 ++++++++++++++++++
 3 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
index 56ab9675700a0..c5c9c8eb2bd29 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
@@ -440,7 +440,11 @@ private[ml] object WeightedLeastSquares {
     /**
      * Weighted population standard deviation of labels.
      */
-    def bStd: Double = math.sqrt(bbSum / wSum - bBar * bBar)
+    def bStd: Double = {
+      // We prevent variance from negative value caused by numerical error.
+      val variance = math.max(bbSum / wSum - bBar * bBar, 0.0)
+      math.sqrt(variance)
+    }
 
     /**
      * Weighted mean of (label * features).
@@ -471,7 +475,8 @@ private[ml] object WeightedLeastSquares {
       while (i < triK) {
         val l = j - 2
         val aw = aSum(l) / wSum
-        std(l) = math.sqrt(aaValues(i) / wSum - aw * aw)
+        // We prevent variance from negative value caused by numerical error.
+        std(l) = math.sqrt(math.max(aaValues(i) / wSum - aw * aw, 0.0))
         i += j
         j += 1
       }
@@ -489,7 +494,8 @@ private[ml] object WeightedLeastSquares {
       while (i < triK) {
         val l = j - 2
         val aw = aSum(l) / wSum
-        variance(l) = aaValues(i) / wSum - aw * aw
+        // We prevent variance from negative value caused by numerical error.
+        variance(l) = math.max(aaValues(i) / wSum - aw * aw, 0.0)
         i += j
         j += 1
       }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
index 7dc0c459ec032..8121880cfb233 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
@@ -213,8 +213,9 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
       var i = 0
       val len = currM2n.length
       while (i < len) {
-        realVariance(i) = (currM2n(i) + deltaMean(i) * deltaMean(i) * weightSum(i) *
-          (totalWeightSum - weightSum(i)) / totalWeightSum) / denominator
+        // We prevent variance from negative value caused by numerical error.
+        realVariance(i) = math.max((currM2n(i) + deltaMean(i) * deltaMean(i) * weightSum(i) *
+          (totalWeightSum - weightSum(i)) / totalWeightSum) / denominator, 0.0)
         i += 1
       }
     }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizerSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizerSuite.scala
index 797e84fcc7377..c6466bc918dd0 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizerSuite.scala
@@ -270,4 +270,22 @@ class MultivariateOnlineSummarizerSuite extends SparkFunSuite {
     assert(summarizer3.max ~== Vectors.dense(10.0, 0.0) absTol 1e-14)
     assert(summarizer3.min ~== Vectors.dense(0.0, -10.0) absTol 1e-14)
   }
+
+  test ("test zero variance (SPARK-21818)") {
+    val summarizer1 = (new MultivariateOnlineSummarizer)
+      .add(Vectors.dense(3.0), 0.7)
+    val summarizer2 = (new MultivariateOnlineSummarizer)
+      .add(Vectors.dense(3.0), 0.4)
+    val summarizer3 = (new MultivariateOnlineSummarizer)
+      .add(Vectors.dense(3.0), 0.5)
+    val summarizer4 = (new MultivariateOnlineSummarizer)
+      .add(Vectors.dense(3.0), 0.4)
+
+    val summarizer = summarizer1
+      .merge(summarizer2)
+      .merge(summarizer3)
+      .merge(summarizer4)
+
+    assert(summarizer.variance(0) >= 0.0)
+  }
 }

From 59bb7ebfb83c292cea853d6cd6fdf9748baa6ce2 Mon Sep 17 00:00:00 2001
From: pgandhi <pgandhi@yahoo-inc.com>
Date: Mon, 28 Aug 2017 08:51:22 -0500
Subject: [PATCH 1157/1204] [SPARK-21798] No config to replace deprecated
 SPARK_CLASSPATH config for launching daemons like History Server

History Server Launch uses SparkClassCommandBuilder for launching the server. It is observed that SPARK_CLASSPATH has been removed and deprecated. For spark-submit this takes a different route and spark.driver.extraClasspath takes care of specifying additional jars in the classpath that were previously specified in the SPARK_CLASSPATH. Right now the only way specify the additional jars for launching daemons such as history server is using SPARK_DIST_CLASSPATH (https://spark.apache.org/docs/latest/hadoop-provided.html) but this I presume is a distribution classpath. It would be nice to have a similar config like spark.driver.extraClasspath for launching daemons similar to history server.

Added new environment variable SPARK_DAEMON_CLASSPATH to set classpath for launching daemons. Tested and verified for History Server and Standalone Mode.

## How was this patch tested?
Initially, history server start script would fail for the reason being that it could not find the required jars for launching the server in the java classpath. Same was true for running Master and Worker in standalone mode. By adding the environment variable SPARK_DAEMON_CLASSPATH to the java classpath, both the daemons(History Server, Standalone daemons) are starting up and running.

Author: pgandhi <pgandhi@yahoo-inc.com>
Author: pgandhi999 <parthkgandhi9@gmail.com>

Closes #19047 from pgandhi999/master.

(cherry picked from commit 24e6c187fbaa6874eedbdda6b3b5dc6ff9e1de36)
Signed-off-by: Tom Graves <tgraves@yahoo-inc.com>
---
 conf/spark-env.sh.template                                   | 1 +
 docs/monitoring.md                                           | 4 ++++
 docs/running-on-mesos.md                                     | 2 ++
 docs/spark-standalone.md                                     | 4 ++++
 .../org/apache/spark/launcher/SparkClassCommandBuilder.java  | 5 +++++
 5 files changed, 16 insertions(+)

diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template
index b7c985ace69cf..0f9150b3e4e39 100755
--- a/conf/spark-env.sh.template
+++ b/conf/spark-env.sh.template
@@ -51,6 +51,7 @@
 # - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y")
 # - SPARK_SHUFFLE_OPTS, to set config properties only for the external shuffle service (e.g. "-Dx=y")
 # - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y")
+# - SPARK_DAEMON_CLASSPATH, to set the classpath for all daemons
 # - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers
 
 # Generic options for the daemons used in the standalone deploy mode
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 3e577c5f36778..d22cd945eaf61 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -61,6 +61,10 @@ The history server can be configured as follows:
     <td><code>SPARK_DAEMON_JAVA_OPTS</code></td>
     <td>JVM options for the history server (default: none).</td>
   </tr>
+  <tr>
+    <td><code>SPARK_DAEMON_CLASSPATH</code></td>
+    <td>Classpath for the history server (default: none).</td>
+  </tr>
   <tr>
     <td><code>SPARK_PUBLIC_DNS</code></td>
     <td>
diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md
index 847a6594569c6..6b69bfc6ef5e6 100644
--- a/docs/running-on-mesos.md
+++ b/docs/running-on-mesos.md
@@ -158,6 +158,8 @@ If you like to run the `MesosClusterDispatcher` with Marathon, you need to run t
 The `MesosClusterDispatcher` also supports writing recovery state into Zookeeper. This will allow the `MesosClusterDispatcher` to be able to recover all submitted and running containers on relaunch.   In order to enable this recovery mode, you can set SPARK_DAEMON_JAVA_OPTS in spark-env by configuring `spark.deploy.recoveryMode` and related spark.deploy.zookeeper.* configurations.
 For more information about these configurations please refer to the configurations [doc](configurations.html#deploy).
 
+You can also specify any additional jars required by the `MesosClusterDispatcher` in the classpath by setting the environment variable SPARK_DAEMON_CLASSPATH in spark-env.
+
 From the client, you can submit a job to Mesos cluster by running `spark-submit` and specifying the master URL
 to the URL of the `MesosClusterDispatcher` (e.g: mesos://dispatcher:7077). You can view driver statuses on the
 Spark cluster Web UI.
diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index 642575b46dd42..1095386c31ab8 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -149,6 +149,10 @@ You can optionally configure the cluster further by setting environment variable
     <td><code>SPARK_DAEMON_JAVA_OPTS</code></td>
     <td>JVM options for the Spark master and worker daemons themselves in the form "-Dx=y" (default: none).</td>
   </tr>
+  <tr>
+    <td><code>SPARK_DAEMON_CLASSPATH</code></td>
+    <td>Classpath for the Spark master and worker daemons themselves (default: none).</td>
+  </tr>
   <tr>
     <td><code>SPARK_PUBLIC_DNS</code></td>
     <td>The public DNS name of the Spark master and workers (default: none).</td>
diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkClassCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkClassCommandBuilder.java
index 137ef74843da5..32724acdc362c 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkClassCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkClassCommandBuilder.java
@@ -53,16 +53,19 @@ public List<String> buildCommand(Map<String, String> env)
       case "org.apache.spark.deploy.master.Master":
         javaOptsKeys.add("SPARK_DAEMON_JAVA_OPTS");
         javaOptsKeys.add("SPARK_MASTER_OPTS");
+        extraClassPath = getenv("SPARK_DAEMON_CLASSPATH");
         memKey = "SPARK_DAEMON_MEMORY";
         break;
       case "org.apache.spark.deploy.worker.Worker":
         javaOptsKeys.add("SPARK_DAEMON_JAVA_OPTS");
         javaOptsKeys.add("SPARK_WORKER_OPTS");
+        extraClassPath = getenv("SPARK_DAEMON_CLASSPATH");
         memKey = "SPARK_DAEMON_MEMORY";
         break;
       case "org.apache.spark.deploy.history.HistoryServer":
         javaOptsKeys.add("SPARK_DAEMON_JAVA_OPTS");
         javaOptsKeys.add("SPARK_HISTORY_OPTS");
+        extraClassPath = getenv("SPARK_DAEMON_CLASSPATH");
         memKey = "SPARK_DAEMON_MEMORY";
         break;
       case "org.apache.spark.executor.CoarseGrainedExecutorBackend":
@@ -77,11 +80,13 @@ public List<String> buildCommand(Map<String, String> env)
         break;
       case "org.apache.spark.deploy.mesos.MesosClusterDispatcher":
         javaOptsKeys.add("SPARK_DAEMON_JAVA_OPTS");
+        extraClassPath = getenv("SPARK_DAEMON_CLASSPATH");
         break;
       case "org.apache.spark.deploy.ExternalShuffleService":
       case "org.apache.spark.deploy.mesos.MesosExternalShuffleService":
         javaOptsKeys.add("SPARK_DAEMON_JAVA_OPTS");
         javaOptsKeys.add("SPARK_SHUFFLE_OPTS");
+        extraClassPath = getenv("SPARK_DAEMON_CLASSPATH");
         memKey = "SPARK_DAEMON_MEMORY";
         break;
       default:

From 59529b21a99f3c4db16b31da9dc7fce62349cf11 Mon Sep 17 00:00:00 2001
From: jerryshao <sshao@hortonworks.com>
Date: Tue, 29 Aug 2017 10:50:03 -0700
Subject: [PATCH 1158/1204] [SPARK-21714][CORE][BACKPORT-2.2] Avoiding
 re-uploading remote resources in yarn client mode

## What changes were proposed in this pull request?

This is a backport PR to fix issue of re-uploading remote resource in yarn client mode. The original PR is #18962.

## How was this patch tested?

Tested in local UT.

Author: jerryshao <sshao@hortonworks.com>

Closes #19074 from jerryshao/SPARK-21714-2.2-backport.
---
 .../org/apache/spark/deploy/SparkSubmit.scala | 66 +++++++++++-------
 .../spark/internal/config/package.scala       |  2 +-
 .../scala/org/apache/spark/util/Utils.scala   | 25 ++++---
 .../spark/deploy/SparkSubmitSuite.scala       | 68 +++++++++++++++----
 .../scala/org/apache/spark/repl/Main.scala    |  2 +-
 5 files changed, 115 insertions(+), 48 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index c60a2a1706d5a..86d578e1ece72 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -208,14 +208,20 @@ object SparkSubmit extends CommandLineUtils {
 
   /**
    * Prepare the environment for submitting an application.
-   * This returns a 4-tuple:
-   *   (1) the arguments for the child process,
-   *   (2) a list of classpath entries for the child,
-   *   (3) a map of system properties, and
-   *   (4) the main class for the child
+   *
+   * @param args the parsed SparkSubmitArguments used for environment preparation.
+   * @param conf the Hadoop Configuration, this argument will only be set in unit test.
+   * @return a 4-tuple:
+   *        (1) the arguments for the child process,
+   *        (2) a list of classpath entries for the child,
+   *        (3) a map of system properties, and
+   *        (4) the main class for the child
+   *
    * Exposed for testing.
    */
-  private[deploy] def prepareSubmitEnvironment(args: SparkSubmitArguments)
+  private[deploy] def prepareSubmitEnvironment(
+      args: SparkSubmitArguments,
+      conf: Option[HadoopConfiguration] = None)
       : (Seq[String], Seq[String], Map[String, String], String) = {
     // Return values
     val childArgs = new ArrayBuffer[String]()
@@ -311,12 +317,16 @@ object SparkSubmit extends CommandLineUtils {
     }
 
     // In client mode, download remote files.
+    var localPrimaryResource: String = null
+    var localJars: String = null
+    var localPyFiles: String = null
+    var localFiles: String = null
     if (deployMode == CLIENT) {
-      val hadoopConf = new HadoopConfiguration()
-      args.primaryResource = Option(args.primaryResource).map(downloadFile(_, hadoopConf)).orNull
-      args.jars = Option(args.jars).map(downloadFileList(_, hadoopConf)).orNull
-      args.pyFiles = Option(args.pyFiles).map(downloadFileList(_, hadoopConf)).orNull
-      args.files = Option(args.files).map(downloadFileList(_, hadoopConf)).orNull
+      val hadoopConf = conf.getOrElse(new HadoopConfiguration())
+      localPrimaryResource = Option(args.primaryResource).map(downloadFile(_, hadoopConf)).orNull
+      localJars = Option(args.jars).map(downloadFileList(_, hadoopConf)).orNull
+      localPyFiles = Option(args.pyFiles).map(downloadFileList(_, hadoopConf)).orNull
+      localFiles = Option(args.files).map(downloadFileList(_, hadoopConf)).orNull
     }
 
     // Require all python files to be local, so we can add them to the PYTHONPATH
@@ -366,7 +376,7 @@ object SparkSubmit extends CommandLineUtils {
         // If a python file is provided, add it to the child arguments and list of files to deploy.
         // Usage: PythonAppRunner <main python file> <extra python files> [app arguments]
         args.mainClass = "org.apache.spark.deploy.PythonRunner"
-        args.childArgs = ArrayBuffer(args.primaryResource, args.pyFiles) ++ args.childArgs
+        args.childArgs = ArrayBuffer(localPrimaryResource, localPyFiles) ++ args.childArgs
         if (clusterManager != YARN) {
           // The YARN backend distributes the primary file differently, so don't merge it.
           args.files = mergeFileLists(args.files, args.primaryResource)
@@ -376,8 +386,8 @@ object SparkSubmit extends CommandLineUtils {
         // The YARN backend handles python files differently, so don't merge the lists.
         args.files = mergeFileLists(args.files, args.pyFiles)
       }
-      if (args.pyFiles != null) {
-        sysProps("spark.submit.pyFiles") = args.pyFiles
+      if (localPyFiles != null) {
+        sysProps("spark.submit.pyFiles") = localPyFiles
       }
     }
 
@@ -431,7 +441,7 @@ object SparkSubmit extends CommandLineUtils {
         // If an R file is provided, add it to the child arguments and list of files to deploy.
         // Usage: RRunner <main R file> [app arguments]
         args.mainClass = "org.apache.spark.deploy.RRunner"
-        args.childArgs = ArrayBuffer(args.primaryResource) ++ args.childArgs
+        args.childArgs = ArrayBuffer(localPrimaryResource) ++ args.childArgs
         args.files = mergeFileLists(args.files, args.primaryResource)
       }
     }
@@ -468,6 +478,7 @@ object SparkSubmit extends CommandLineUtils {
       OptionAssigner(args.queue, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.queue"),
       OptionAssigner(args.numExecutors, YARN, ALL_DEPLOY_MODES,
         sysProp = "spark.executor.instances"),
+      OptionAssigner(args.pyFiles, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.dist.pyFiles"),
       OptionAssigner(args.jars, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.dist.jars"),
       OptionAssigner(args.files, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.dist.files"),
       OptionAssigner(args.archives, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.dist.archives"),
@@ -491,15 +502,28 @@ object SparkSubmit extends CommandLineUtils {
         sysProp = "spark.driver.cores"),
       OptionAssigner(args.supervise.toString, STANDALONE | MESOS, CLUSTER,
         sysProp = "spark.driver.supervise"),
-      OptionAssigner(args.ivyRepoPath, STANDALONE, CLUSTER, sysProp = "spark.jars.ivy")
+      OptionAssigner(args.ivyRepoPath, STANDALONE, CLUSTER, sysProp = "spark.jars.ivy"),
+
+      // An internal option used only for spark-shell to add user jars to repl's classloader,
+      // previously it uses "spark.jars" or "spark.yarn.dist.jars" which now may be pointed to
+      // remote jars, so adding a new option to only specify local jars for spark-shell internally.
+      OptionAssigner(localJars, ALL_CLUSTER_MGRS, CLIENT, sysProp = "spark.repl.local.jars")
     )
 
     // In client mode, launch the application main class directly
     // In addition, add the main application jar and any added jars (if any) to the classpath
-    // Also add the main application jar and any added jars to classpath in case YARN client
-    // requires these jars.
-    if (deployMode == CLIENT || isYarnCluster) {
+    if (deployMode == CLIENT) {
       childMainClass = args.mainClass
+      if (localPrimaryResource != null && isUserJar(localPrimaryResource)) {
+        childClasspath += localPrimaryResource
+      }
+      if (localJars != null) { childClasspath ++= localJars.split(",") }
+    }
+    // Add the main application jar and any added jars to classpath in case YARN client
+    // requires these jars.
+    // This assumes both primaryResource and user jars are local jars, otherwise it will not be
+    // added to the classpath of YARN client.
+    if (isYarnCluster) {
       if (isUserJar(args.primaryResource)) {
         childClasspath += args.primaryResource
       }
@@ -556,10 +580,6 @@ object SparkSubmit extends CommandLineUtils {
       if (args.isPython) {
         sysProps.put("spark.yarn.isPython", "true")
       }
-
-      if (args.pyFiles != null) {
-        sysProps("spark.submit.pyFiles") = args.pyFiles
-      }
     }
 
     // assure a keytab is available from any place in a JVM
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index c0fcf99fecd2a..1588dfec2074a 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -87,7 +87,7 @@ package object config {
     .intConf
     .createOptional
 
-  private[spark] val PY_FILES = ConfigBuilder("spark.submit.pyFiles")
+  private[spark] val PY_FILES = ConfigBuilder("spark.yarn.dist.pyFiles")
     .internal()
     .stringConf
     .toSequence
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 999486c3b6761..69c6c3327ee24 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2580,18 +2580,23 @@ private[spark] object Utils extends Logging {
   }
 
   /**
-   * In YARN mode this method returns a union of the jar files pointed by "spark.jars" and the
-   * "spark.yarn.dist.jars" properties, while in other modes it returns the jar files pointed by
-   * only the "spark.jars" property.
+   * Return the jar files pointed by the "spark.jars" property. Spark internally will distribute
+   * these jars through file server. In the YARN mode, it will return an empty list, since YARN
+   * has its own mechanism to distribute jars.
    */
-  def getUserJars(conf: SparkConf, isShell: Boolean = false): Seq[String] = {
+  def getUserJars(conf: SparkConf): Seq[String] = {
     val sparkJars = conf.getOption("spark.jars")
-    if (conf.get("spark.master") == "yarn" && isShell) {
-      val yarnJars = conf.getOption("spark.yarn.dist.jars")
-      unionFileLists(sparkJars, yarnJars).toSeq
-    } else {
-      sparkJars.map(_.split(",")).map(_.filter(_.nonEmpty)).toSeq.flatten
-    }
+    sparkJars.map(_.split(",")).map(_.filter(_.nonEmpty)).toSeq.flatten
+  }
+
+  /**
+   * Return the local jar files which will be added to REPL's classpath. These jar files are
+   * specified by --jars (spark.jars) or --packages, remote jars will be downloaded to local by
+   * SparkSubmit at first.
+   */
+  def getLocalUserJarsForShell(conf: SparkConf): Seq[String] = {
+    val localJars = conf.getOption("spark.repl.local.jars")
+    localJars.map(_.split(",")).map(_.filter(_.nonEmpty)).toSeq.flatten
   }
 
   private[spark] val REDACTION_REPLACEMENT_TEXT = "*********(redacted)"
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 6fa3a09b2ef1e..3c49b1fcd0be1 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -27,7 +27,7 @@ import scala.io.Source
 import com.google.common.io.ByteStreams
 import org.apache.commons.io.{FilenameUtils, FileUtils}
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.Path
+import org.apache.hadoop.fs.{FileStatus, Path, RawLocalFileSystem}
 import org.scalatest.{BeforeAndAfterEach, Matchers}
 import org.scalatest.concurrent.Timeouts
 import org.scalatest.time.SpanSugar._
@@ -738,10 +738,7 @@ class SparkSubmitSuite
 
   test("downloadFile - file doesn't exist") {
     val hadoopConf = new Configuration()
-    // Set s3a implementation to local file system for testing.
-    hadoopConf.set("fs.s3a.impl", "org.apache.spark.deploy.TestFileSystem")
-    // Disable file system impl cache to make sure the test file system is picked up.
-    hadoopConf.set("fs.s3a.impl.disable.cache", "true")
+    updateConfWithFakeS3Fs(hadoopConf)
     intercept[FileNotFoundException] {
       SparkSubmit.downloadFile("s3a:/no/such/file", hadoopConf)
     }
@@ -759,10 +756,7 @@ class SparkSubmitSuite
     val content = "hello, world"
     FileUtils.write(jarFile, content)
     val hadoopConf = new Configuration()
-    // Set s3a implementation to local file system for testing.
-    hadoopConf.set("fs.s3a.impl", "org.apache.spark.deploy.TestFileSystem")
-    // Disable file system impl cache to make sure the test file system is picked up.
-    hadoopConf.set("fs.s3a.impl.disable.cache", "true")
+    updateConfWithFakeS3Fs(hadoopConf)
     val sourcePath = s"s3a://${jarFile.getAbsolutePath}"
     val outputPath = SparkSubmit.downloadFile(sourcePath, hadoopConf)
     checkDownloadedFile(sourcePath, outputPath)
@@ -775,10 +769,7 @@ class SparkSubmitSuite
     val content = "hello, world"
     FileUtils.write(jarFile, content)
     val hadoopConf = new Configuration()
-    // Set s3a implementation to local file system for testing.
-    hadoopConf.set("fs.s3a.impl", "org.apache.spark.deploy.TestFileSystem")
-    // Disable file system impl cache to make sure the test file system is picked up.
-    hadoopConf.set("fs.s3a.impl.disable.cache", "true")
+    updateConfWithFakeS3Fs(hadoopConf)
     val sourcePaths = Seq("/local/file", s"s3a://${jarFile.getAbsolutePath}")
     val outputPaths = SparkSubmit.downloadFileList(sourcePaths.mkString(","), hadoopConf).split(",")
 
@@ -789,6 +780,43 @@ class SparkSubmitSuite
     }
   }
 
+  test("Avoid re-upload remote resources in yarn client mode") {
+    val hadoopConf = new Configuration()
+    updateConfWithFakeS3Fs(hadoopConf)
+
+    val tmpDir = Utils.createTempDir()
+    val file = File.createTempFile("tmpFile", "", tmpDir)
+    val pyFile = File.createTempFile("tmpPy", ".egg", tmpDir)
+    val mainResource = File.createTempFile("tmpPy", ".py", tmpDir)
+    val tmpJar = TestUtils.createJarWithFiles(Map("test.resource" -> "USER"), tmpDir)
+    val tmpJarPath = s"s3a://${new File(tmpJar.toURI).getAbsolutePath}"
+
+    val args = Seq(
+      "--class", UserClasspathFirstTest.getClass.getName.stripPrefix("$"),
+      "--name", "testApp",
+      "--master", "yarn",
+      "--deploy-mode", "client",
+      "--jars", tmpJarPath,
+      "--files", s"s3a://${file.getAbsolutePath}",
+      "--py-files", s"s3a://${pyFile.getAbsolutePath}",
+      s"s3a://$mainResource"
+      )
+
+    val appArgs = new SparkSubmitArguments(args)
+    val sysProps = SparkSubmit.prepareSubmitEnvironment(appArgs, Some(hadoopConf))._3
+
+    // All the resources should still be remote paths, so that YARN client will not upload again.
+    sysProps("spark.yarn.dist.jars") should be (tmpJarPath)
+    sysProps("spark.yarn.dist.files") should be (s"s3a://${file.getAbsolutePath}")
+    sysProps("spark.yarn.dist.pyFiles") should be (s"s3a://${pyFile.getAbsolutePath}")
+
+    // Local repl jars should be a local path.
+    sysProps("spark.repl.local.jars") should (startWith("file:"))
+
+    // local py files should not be a URI format.
+    sysProps("spark.submit.pyFiles") should (startWith("/"))
+  }
+
   // NOTE: This is an expensive operation in terms of time (10 seconds+). Use sparingly.
   private def runSparkSubmit(args: Seq[String]): Unit = {
     val sparkHome = sys.props.getOrElse("spark.test.home", fail("spark.test.home is not set!"))
@@ -828,6 +856,11 @@ class SparkSubmitSuite
       Utils.deleteRecursively(tmpDir)
     }
   }
+
+  private def updateConfWithFakeS3Fs(conf: Configuration): Unit = {
+    conf.set("fs.s3a.impl", classOf[TestFileSystem].getCanonicalName)
+    conf.set("fs.s3a.impl.disable.cache", "true")
+  }
 }
 
 object JarCreationTest extends Logging {
@@ -897,4 +930,13 @@ class TestFileSystem extends org.apache.hadoop.fs.LocalFileSystem {
     // Ignore the scheme for testing.
     super.copyToLocalFile(new Path(src.toUri.getPath), dst)
   }
+
+  override def globStatus(pathPattern: Path): Array[FileStatus] = {
+    val newPath = new Path(pathPattern.toUri.getPath)
+    super.globStatus(newPath).map { status =>
+      val path = s"s3a://${status.getPath.toUri.getPath}"
+      status.setPath(new Path(path))
+      status
+    }
+  }
 }
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
index 9702a1e653c32..0b16e1b073e32 100644
--- a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
@@ -57,7 +57,7 @@ object Main extends Logging {
   // Visible for testing
   private[repl] def doMain(args: Array[String], _interp: SparkILoop): Unit = {
     interp = _interp
-    val jars = Utils.getUserJars(conf, isShell = true)
+    val jars = Utils.getLocalUserJarsForShell(conf)
       // Remove file:///, file:// or file:/ scheme if exists for each jar
       .map { x => if (x.startsWith("file:")) new File(new URI(x)).getPath else x }
       .mkString(File.pathSeparator)

From 917fe6635891ea76b22a3bcba282040afd14651d Mon Sep 17 00:00:00 2001
From: Marcelo Vanzin <vanzin@cloudera.com>
Date: Tue, 29 Aug 2017 12:51:27 -0700
Subject: [PATCH 1159/1204] Revert "[SPARK-21714][CORE][BACKPORT-2.2] Avoiding
 re-uploading remote resources in yarn client mode"

This reverts commit 59529b21a99f3c4db16b31da9dc7fce62349cf11.
---
 .../org/apache/spark/deploy/SparkSubmit.scala | 66 +++++++-----------
 .../spark/internal/config/package.scala       |  2 +-
 .../scala/org/apache/spark/util/Utils.scala   | 25 +++----
 .../spark/deploy/SparkSubmitSuite.scala       | 68 ++++---------------
 .../scala/org/apache/spark/repl/Main.scala    |  2 +-
 5 files changed, 48 insertions(+), 115 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 86d578e1ece72..c60a2a1706d5a 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -208,20 +208,14 @@ object SparkSubmit extends CommandLineUtils {
 
   /**
    * Prepare the environment for submitting an application.
-   *
-   * @param args the parsed SparkSubmitArguments used for environment preparation.
-   * @param conf the Hadoop Configuration, this argument will only be set in unit test.
-   * @return a 4-tuple:
-   *        (1) the arguments for the child process,
-   *        (2) a list of classpath entries for the child,
-   *        (3) a map of system properties, and
-   *        (4) the main class for the child
-   *
+   * This returns a 4-tuple:
+   *   (1) the arguments for the child process,
+   *   (2) a list of classpath entries for the child,
+   *   (3) a map of system properties, and
+   *   (4) the main class for the child
    * Exposed for testing.
    */
-  private[deploy] def prepareSubmitEnvironment(
-      args: SparkSubmitArguments,
-      conf: Option[HadoopConfiguration] = None)
+  private[deploy] def prepareSubmitEnvironment(args: SparkSubmitArguments)
       : (Seq[String], Seq[String], Map[String, String], String) = {
     // Return values
     val childArgs = new ArrayBuffer[String]()
@@ -317,16 +311,12 @@ object SparkSubmit extends CommandLineUtils {
     }
 
     // In client mode, download remote files.
-    var localPrimaryResource: String = null
-    var localJars: String = null
-    var localPyFiles: String = null
-    var localFiles: String = null
     if (deployMode == CLIENT) {
-      val hadoopConf = conf.getOrElse(new HadoopConfiguration())
-      localPrimaryResource = Option(args.primaryResource).map(downloadFile(_, hadoopConf)).orNull
-      localJars = Option(args.jars).map(downloadFileList(_, hadoopConf)).orNull
-      localPyFiles = Option(args.pyFiles).map(downloadFileList(_, hadoopConf)).orNull
-      localFiles = Option(args.files).map(downloadFileList(_, hadoopConf)).orNull
+      val hadoopConf = new HadoopConfiguration()
+      args.primaryResource = Option(args.primaryResource).map(downloadFile(_, hadoopConf)).orNull
+      args.jars = Option(args.jars).map(downloadFileList(_, hadoopConf)).orNull
+      args.pyFiles = Option(args.pyFiles).map(downloadFileList(_, hadoopConf)).orNull
+      args.files = Option(args.files).map(downloadFileList(_, hadoopConf)).orNull
     }
 
     // Require all python files to be local, so we can add them to the PYTHONPATH
@@ -376,7 +366,7 @@ object SparkSubmit extends CommandLineUtils {
         // If a python file is provided, add it to the child arguments and list of files to deploy.
         // Usage: PythonAppRunner <main python file> <extra python files> [app arguments]
         args.mainClass = "org.apache.spark.deploy.PythonRunner"
-        args.childArgs = ArrayBuffer(localPrimaryResource, localPyFiles) ++ args.childArgs
+        args.childArgs = ArrayBuffer(args.primaryResource, args.pyFiles) ++ args.childArgs
         if (clusterManager != YARN) {
           // The YARN backend distributes the primary file differently, so don't merge it.
           args.files = mergeFileLists(args.files, args.primaryResource)
@@ -386,8 +376,8 @@ object SparkSubmit extends CommandLineUtils {
         // The YARN backend handles python files differently, so don't merge the lists.
         args.files = mergeFileLists(args.files, args.pyFiles)
       }
-      if (localPyFiles != null) {
-        sysProps("spark.submit.pyFiles") = localPyFiles
+      if (args.pyFiles != null) {
+        sysProps("spark.submit.pyFiles") = args.pyFiles
       }
     }
 
@@ -441,7 +431,7 @@ object SparkSubmit extends CommandLineUtils {
         // If an R file is provided, add it to the child arguments and list of files to deploy.
         // Usage: RRunner <main R file> [app arguments]
         args.mainClass = "org.apache.spark.deploy.RRunner"
-        args.childArgs = ArrayBuffer(localPrimaryResource) ++ args.childArgs
+        args.childArgs = ArrayBuffer(args.primaryResource) ++ args.childArgs
         args.files = mergeFileLists(args.files, args.primaryResource)
       }
     }
@@ -478,7 +468,6 @@ object SparkSubmit extends CommandLineUtils {
       OptionAssigner(args.queue, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.queue"),
       OptionAssigner(args.numExecutors, YARN, ALL_DEPLOY_MODES,
         sysProp = "spark.executor.instances"),
-      OptionAssigner(args.pyFiles, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.dist.pyFiles"),
       OptionAssigner(args.jars, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.dist.jars"),
       OptionAssigner(args.files, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.dist.files"),
       OptionAssigner(args.archives, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.dist.archives"),
@@ -502,28 +491,15 @@ object SparkSubmit extends CommandLineUtils {
         sysProp = "spark.driver.cores"),
       OptionAssigner(args.supervise.toString, STANDALONE | MESOS, CLUSTER,
         sysProp = "spark.driver.supervise"),
-      OptionAssigner(args.ivyRepoPath, STANDALONE, CLUSTER, sysProp = "spark.jars.ivy"),
-
-      // An internal option used only for spark-shell to add user jars to repl's classloader,
-      // previously it uses "spark.jars" or "spark.yarn.dist.jars" which now may be pointed to
-      // remote jars, so adding a new option to only specify local jars for spark-shell internally.
-      OptionAssigner(localJars, ALL_CLUSTER_MGRS, CLIENT, sysProp = "spark.repl.local.jars")
+      OptionAssigner(args.ivyRepoPath, STANDALONE, CLUSTER, sysProp = "spark.jars.ivy")
     )
 
     // In client mode, launch the application main class directly
     // In addition, add the main application jar and any added jars (if any) to the classpath
-    if (deployMode == CLIENT) {
-      childMainClass = args.mainClass
-      if (localPrimaryResource != null && isUserJar(localPrimaryResource)) {
-        childClasspath += localPrimaryResource
-      }
-      if (localJars != null) { childClasspath ++= localJars.split(",") }
-    }
-    // Add the main application jar and any added jars to classpath in case YARN client
+    // Also add the main application jar and any added jars to classpath in case YARN client
     // requires these jars.
-    // This assumes both primaryResource and user jars are local jars, otherwise it will not be
-    // added to the classpath of YARN client.
-    if (isYarnCluster) {
+    if (deployMode == CLIENT || isYarnCluster) {
+      childMainClass = args.mainClass
       if (isUserJar(args.primaryResource)) {
         childClasspath += args.primaryResource
       }
@@ -580,6 +556,10 @@ object SparkSubmit extends CommandLineUtils {
       if (args.isPython) {
         sysProps.put("spark.yarn.isPython", "true")
       }
+
+      if (args.pyFiles != null) {
+        sysProps("spark.submit.pyFiles") = args.pyFiles
+      }
     }
 
     // assure a keytab is available from any place in a JVM
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 1588dfec2074a..c0fcf99fecd2a 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -87,7 +87,7 @@ package object config {
     .intConf
     .createOptional
 
-  private[spark] val PY_FILES = ConfigBuilder("spark.yarn.dist.pyFiles")
+  private[spark] val PY_FILES = ConfigBuilder("spark.submit.pyFiles")
     .internal()
     .stringConf
     .toSequence
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 69c6c3327ee24..999486c3b6761 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2580,23 +2580,18 @@ private[spark] object Utils extends Logging {
   }
 
   /**
-   * Return the jar files pointed by the "spark.jars" property. Spark internally will distribute
-   * these jars through file server. In the YARN mode, it will return an empty list, since YARN
-   * has its own mechanism to distribute jars.
+   * In YARN mode this method returns a union of the jar files pointed by "spark.jars" and the
+   * "spark.yarn.dist.jars" properties, while in other modes it returns the jar files pointed by
+   * only the "spark.jars" property.
    */
-  def getUserJars(conf: SparkConf): Seq[String] = {
+  def getUserJars(conf: SparkConf, isShell: Boolean = false): Seq[String] = {
     val sparkJars = conf.getOption("spark.jars")
-    sparkJars.map(_.split(",")).map(_.filter(_.nonEmpty)).toSeq.flatten
-  }
-
-  /**
-   * Return the local jar files which will be added to REPL's classpath. These jar files are
-   * specified by --jars (spark.jars) or --packages, remote jars will be downloaded to local by
-   * SparkSubmit at first.
-   */
-  def getLocalUserJarsForShell(conf: SparkConf): Seq[String] = {
-    val localJars = conf.getOption("spark.repl.local.jars")
-    localJars.map(_.split(",")).map(_.filter(_.nonEmpty)).toSeq.flatten
+    if (conf.get("spark.master") == "yarn" && isShell) {
+      val yarnJars = conf.getOption("spark.yarn.dist.jars")
+      unionFileLists(sparkJars, yarnJars).toSeq
+    } else {
+      sparkJars.map(_.split(",")).map(_.filter(_.nonEmpty)).toSeq.flatten
+    }
   }
 
   private[spark] val REDACTION_REPLACEMENT_TEXT = "*********(redacted)"
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 3c49b1fcd0be1..6fa3a09b2ef1e 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -27,7 +27,7 @@ import scala.io.Source
 import com.google.common.io.ByteStreams
 import org.apache.commons.io.{FilenameUtils, FileUtils}
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{FileStatus, Path, RawLocalFileSystem}
+import org.apache.hadoop.fs.Path
 import org.scalatest.{BeforeAndAfterEach, Matchers}
 import org.scalatest.concurrent.Timeouts
 import org.scalatest.time.SpanSugar._
@@ -738,7 +738,10 @@ class SparkSubmitSuite
 
   test("downloadFile - file doesn't exist") {
     val hadoopConf = new Configuration()
-    updateConfWithFakeS3Fs(hadoopConf)
+    // Set s3a implementation to local file system for testing.
+    hadoopConf.set("fs.s3a.impl", "org.apache.spark.deploy.TestFileSystem")
+    // Disable file system impl cache to make sure the test file system is picked up.
+    hadoopConf.set("fs.s3a.impl.disable.cache", "true")
     intercept[FileNotFoundException] {
       SparkSubmit.downloadFile("s3a:/no/such/file", hadoopConf)
     }
@@ -756,7 +759,10 @@ class SparkSubmitSuite
     val content = "hello, world"
     FileUtils.write(jarFile, content)
     val hadoopConf = new Configuration()
-    updateConfWithFakeS3Fs(hadoopConf)
+    // Set s3a implementation to local file system for testing.
+    hadoopConf.set("fs.s3a.impl", "org.apache.spark.deploy.TestFileSystem")
+    // Disable file system impl cache to make sure the test file system is picked up.
+    hadoopConf.set("fs.s3a.impl.disable.cache", "true")
     val sourcePath = s"s3a://${jarFile.getAbsolutePath}"
     val outputPath = SparkSubmit.downloadFile(sourcePath, hadoopConf)
     checkDownloadedFile(sourcePath, outputPath)
@@ -769,7 +775,10 @@ class SparkSubmitSuite
     val content = "hello, world"
     FileUtils.write(jarFile, content)
     val hadoopConf = new Configuration()
-    updateConfWithFakeS3Fs(hadoopConf)
+    // Set s3a implementation to local file system for testing.
+    hadoopConf.set("fs.s3a.impl", "org.apache.spark.deploy.TestFileSystem")
+    // Disable file system impl cache to make sure the test file system is picked up.
+    hadoopConf.set("fs.s3a.impl.disable.cache", "true")
     val sourcePaths = Seq("/local/file", s"s3a://${jarFile.getAbsolutePath}")
     val outputPaths = SparkSubmit.downloadFileList(sourcePaths.mkString(","), hadoopConf).split(",")
 
@@ -780,43 +789,6 @@ class SparkSubmitSuite
     }
   }
 
-  test("Avoid re-upload remote resources in yarn client mode") {
-    val hadoopConf = new Configuration()
-    updateConfWithFakeS3Fs(hadoopConf)
-
-    val tmpDir = Utils.createTempDir()
-    val file = File.createTempFile("tmpFile", "", tmpDir)
-    val pyFile = File.createTempFile("tmpPy", ".egg", tmpDir)
-    val mainResource = File.createTempFile("tmpPy", ".py", tmpDir)
-    val tmpJar = TestUtils.createJarWithFiles(Map("test.resource" -> "USER"), tmpDir)
-    val tmpJarPath = s"s3a://${new File(tmpJar.toURI).getAbsolutePath}"
-
-    val args = Seq(
-      "--class", UserClasspathFirstTest.getClass.getName.stripPrefix("$"),
-      "--name", "testApp",
-      "--master", "yarn",
-      "--deploy-mode", "client",
-      "--jars", tmpJarPath,
-      "--files", s"s3a://${file.getAbsolutePath}",
-      "--py-files", s"s3a://${pyFile.getAbsolutePath}",
-      s"s3a://$mainResource"
-      )
-
-    val appArgs = new SparkSubmitArguments(args)
-    val sysProps = SparkSubmit.prepareSubmitEnvironment(appArgs, Some(hadoopConf))._3
-
-    // All the resources should still be remote paths, so that YARN client will not upload again.
-    sysProps("spark.yarn.dist.jars") should be (tmpJarPath)
-    sysProps("spark.yarn.dist.files") should be (s"s3a://${file.getAbsolutePath}")
-    sysProps("spark.yarn.dist.pyFiles") should be (s"s3a://${pyFile.getAbsolutePath}")
-
-    // Local repl jars should be a local path.
-    sysProps("spark.repl.local.jars") should (startWith("file:"))
-
-    // local py files should not be a URI format.
-    sysProps("spark.submit.pyFiles") should (startWith("/"))
-  }
-
   // NOTE: This is an expensive operation in terms of time (10 seconds+). Use sparingly.
   private def runSparkSubmit(args: Seq[String]): Unit = {
     val sparkHome = sys.props.getOrElse("spark.test.home", fail("spark.test.home is not set!"))
@@ -856,11 +828,6 @@ class SparkSubmitSuite
       Utils.deleteRecursively(tmpDir)
     }
   }
-
-  private def updateConfWithFakeS3Fs(conf: Configuration): Unit = {
-    conf.set("fs.s3a.impl", classOf[TestFileSystem].getCanonicalName)
-    conf.set("fs.s3a.impl.disable.cache", "true")
-  }
 }
 
 object JarCreationTest extends Logging {
@@ -930,13 +897,4 @@ class TestFileSystem extends org.apache.hadoop.fs.LocalFileSystem {
     // Ignore the scheme for testing.
     super.copyToLocalFile(new Path(src.toUri.getPath), dst)
   }
-
-  override def globStatus(pathPattern: Path): Array[FileStatus] = {
-    val newPath = new Path(pathPattern.toUri.getPath)
-    super.globStatus(newPath).map { status =>
-      val path = s"s3a://${status.getPath.toUri.getPath}"
-      status.setPath(new Path(path))
-      status
-    }
-  }
 }
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
index 0b16e1b073e32..9702a1e653c32 100644
--- a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
@@ -57,7 +57,7 @@ object Main extends Logging {
   // Visible for testing
   private[repl] def doMain(args: Array[String], _interp: SparkILoop): Unit = {
     interp = _interp
-    val jars = Utils.getLocalUserJarsForShell(conf)
+    val jars = Utils.getUserJars(conf, isShell = true)
       // Remove file:///, file:// or file:/ scheme if exists for each jar
       .map { x => if (x.startsWith("file:")) new File(new URI(x)).getPath else x }
       .mkString(File.pathSeparator)

From a6a9944140bbb336146d0d868429cb01839375c7 Mon Sep 17 00:00:00 2001
From: Dmitry Parfenchik <d.parfenchik@criteo.com>
Date: Wed, 30 Aug 2017 09:42:15 +0100
Subject: [PATCH 1160/1204] [SPARK-21254][WEBUI] History UI performance fixes

## This is a backport of PR #18783 to the latest released branch 2.2.

## What changes were proposed in this pull request?

As described in JIRA ticket, History page is taking ~1min to load for cases when amount of jobs is 10k+.
Most of the time is currently being spent on DOM manipulations and all additional costs implied by this (browser repaints and reflows).
PR's goal is not to change any behavior but to optimize time of History UI rendering:

1. The most costly operation is setting `innerHTML` for `duration` column within a loop, which is [extremely unperformant](https://jsperf.com/jquery-append-vs-html-list-performance/24). [Refactoring ](https://github.com/criteo-forks/spark/commit/b7e56eef4d66af977bd05af58a81e14faf33c211) this helped to get page load time **down to 10-15s**

2. Second big gain bringing page load time **down to 4s** was [was achieved](https://github.com/criteo-forks/spark/commit/3630ca212baa94d60c5fe7e4109cf6da26288cec) by detaching table's DOM before parsing it with DataTables jQuery plugin.

3. Another chunk of improvements ([1]https://github.com/criteo-forks/spark/commit/aeeeeb520d156a7293a707aa6bc053a2f83b9ac2), [2](https://github.com/criteo-forks/spark/commit/e25be9a66b018ba0cc53884f242469b515cb2bf4), [3](https://github.com/criteo-forks/spark/commit/91697079a29138b7581e64f2aa79247fa1a4e4af)) was focused on removing unnecessary DOM manipulations that in total contributed ~250ms to page load time.

## How was this patch tested?

Tested by existing Selenium tests in `org.apache.spark.deploy.history.HistoryServerSuite`.

Changes were also tested on Criteo's spark-2.1 fork with 20k+ number of rows in the table, reducing load time to 4s.

Author: Dmitry Parfenchik <d.parfenchik@criteo.com>

Closes #18860 from 2ooom/history-ui-perf-fix-2.2.
---
 .../spark/ui/static/historypage-template.html |  22 ++--
 .../org/apache/spark/ui/static/historypage.js | 112 +++++++++---------
 2 files changed, 71 insertions(+), 63 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html b/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html
index bfe31aae555ba..20cd7bfdb2234 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html
+++ b/core/src/main/resources/org/apache/spark/ui/static/historypage-template.html
@@ -29,21 +29,25 @@
           App Name
         </span>
       </th>
-      <th class="attemptIDSpan">
+      {{#hasMultipleAttempts}}
+      <th>
         <span data-toggle="tooltip" data-placement="top" title="The attempt ID of this application since one application might be launched several times">
           Attempt ID
         </span>
       </th>
+      {{/hasMultipleAttempts}}
       <th>
         <span data-toggle="tooltip" data-placement="top" title="Started time of this application.">
           Started
         </span>
       </th>
-      <th class="completedColumn">
+      {{#showCompletedColumn}}
+      <th>
         <span data-toggle="tooltip" data-placement="top" title="The completed time of this application.">
           Completed
         </span>
       </th>
+      {{/showCompletedColumn}}
       <th>
         <span data-toggle="tooltip" data-placement="top" title="The duration time of this application.">
           Duration
@@ -68,13 +72,17 @@
   <tbody>
   {{#applications}}
     <tr>
-      <td class="rowGroupColumn"><span title="{{id}}"><a href="{{uiroot}}/history/{{id}}/{{num}}/jobs/">{{id}}</a></span></td>
-      <td class="rowGroupColumn">{{name}}</td>
+      <td {{#hasMultipleAttempts}}style="background-color:#fff"{{/hasMultipleAttempts}}><span title="{{id}}"><a href="{{uiroot}}/history/{{id}}/{{num}}/jobs/">{{id}}</a></span></td>
+      <td {{#hasMultipleAttempts}}style="background-color:#fff"{{/hasMultipleAttempts}}>{{name}}</td>
       {{#attempts}}
-      <td class="attemptIDSpan"><a href="{{uiroot}}/history/{{id}}/{{attemptId}}/jobs/">{{attemptId}}</a></td>
+      {{#hasMultipleAttempts}}
+      <td><a href="{{uiroot}}/history/{{id}}/{{attemptId}}/jobs/">{{attemptId}}</a></td>
+      {{/hasMultipleAttempts}}
       <td>{{startTime}}</td>
-      <td class="completedColumn">{{endTime}}</td>
-      <td><span title="{{duration}}" class="durationClass">{{duration}}</span></td>
+      {{#showCompletedColumn}}
+      <td>{{endTime}}</td>
+      {{/showCompletedColumn}}
+      <td><span title="{{durationMillisec}}">{{duration}}</span></td>
       <td>{{sparkUser}}</td>
       <td>{{lastUpdated}}</td>
       <td><a href="{{log}}" class="btn btn-info btn-mini">Download</a></td>
diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage.js b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
index 5ec1ce15a2127..3e2bba8a89418 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/historypage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
@@ -48,6 +48,18 @@ function getParameterByName(name, searchString) {
   return results === null ? "" : decodeURIComponent(results[1].replace(/\+/g, " "));
 }
 
+function removeColumnByName(columns, columnName) {
+  return columns.filter(function(col) {return col.name != columnName})
+}
+
+function getColumnIndex(columns, columnName) {
+  for(var i = 0; i < columns.length; i++) {
+    if (columns[i].name == columnName)
+      return i;
+  }
+  return -1;
+}
+
 jQuery.extend( jQuery.fn.dataTableExt.oSort, {
     "title-numeric-pre": function ( a ) {
         var x = a.match(/title="*(-?[0-9\.]+)/)[1];
@@ -122,79 +134,67 @@ $(document).ready(function() {
           attempt["lastUpdated"] = formatDate(attempt["lastUpdated"]);
           attempt["log"] = uiRoot + "/api/v1/applications/" + id + "/" +
             (attempt.hasOwnProperty("attemptId") ? attempt["attemptId"] + "/" : "") + "logs";
-
+          attempt["durationMillisec"] = attempt["duration"];
+          attempt["duration"] = formatDuration(attempt["duration"]);
           var app_clone = {"id" : id, "name" : name, "num" : num, "attempts" : [attempt]};
           array.push(app_clone);
         }
       }
+      if(array.length < 20) {
+        $.fn.dataTable.defaults.paging = false;
+      }
 
       var data = {
         "uiroot": uiRoot,
-        "applications": array
-        }
+        "applications": array,
+        "hasMultipleAttempts": hasMultipleAttempts,
+        "showCompletedColumn": !requestedIncomplete,
+      }
 
       $.get("static/historypage-template.html", function(template) {
-        historySummary.append(Mustache.render($(template).filter("#history-summary-template").html(),data));
-        var selector = "#history-summary-table";
+        var sibling = historySummary.prev();
+        historySummary.detach();
+        var apps = $(Mustache.render($(template).filter("#history-summary-template").html(),data));
+        var attemptIdColumnName = 'attemptId';
+        var startedColumnName = 'started';
+        var defaultSortColumn = completedColumnName = 'completed';
+        var durationColumnName = 'duration';
         var conf = {
-                    "columns": [
-                        {name: 'first', type: "appid-numeric"},
-                        {name: 'second'},
-                        {name: 'third'},
-                        {name: 'fourth'},
-                        {name: 'fifth'},
-                        {name: 'sixth', type: "title-numeric"},
-                        {name: 'seventh'},
-                        {name: 'eighth'},
-                        {name: 'ninth'},
-                    ],
-                    "columnDefs": [
-                        {"searchable": false, "targets": [5]}
-                    ],
-                    "autoWidth": false,
-                    "order": [[ 4, "desc" ]]
-        };
-
-        var rowGroupConf = {
-                           "rowsGroup": [
-                               'first:name',
-                               'second:name'
-                           ],
+          "columns": [
+            {name: 'appId', type: "appid-numeric"},
+            {name: 'appName'},
+            {name: attemptIdColumnName},
+            {name: startedColumnName},
+            {name: completedColumnName},
+            {name: durationColumnName, type: "title-numeric"},
+            {name: 'user'},
+            {name: 'lastUpdated'},
+            {name: 'eventLog'},
+          ],
+          "autoWidth": false,
         };
 
         if (hasMultipleAttempts) {
-          jQuery.extend(conf, rowGroupConf);
-          var rowGroupCells = document.getElementsByClassName("rowGroupColumn");
-          for (i = 0; i < rowGroupCells.length; i++) {
-            rowGroupCells[i].style='background-color: #ffffff';
-          }
-        }
-
-        if (!hasMultipleAttempts) {
-          var attemptIDCells = document.getElementsByClassName("attemptIDSpan");
-          for (i = 0; i < attemptIDCells.length; i++) {
-            attemptIDCells[i].style.display='none';
-          }
+          conf.rowsGroup = [
+            'appId:name',
+            'appName:name'
+          ];
+        } else {
+          conf.columns = removeColumnByName(conf.columns, attemptIdColumnName);
         }
 
+        var defaultSortColumn = completedColumnName;
         if (requestedIncomplete) {
-          var completedCells = document.getElementsByClassName("completedColumn");
-          for (i = 0; i < completedCells.length; i++) {
-            completedCells[i].style.display='none';
-          }
+          defaultSortColumn = startedColumnName;
+          conf.columns = removeColumnByName(conf.columns, completedColumnName);
         }
-
-        var durationCells = document.getElementsByClassName("durationClass");
-        for (i = 0; i < durationCells.length; i++) {
-          var timeInMilliseconds = parseInt(durationCells[i].title);
-          durationCells[i].innerHTML = formatDuration(timeInMilliseconds);
-        }
-
-        if ($(selector.concat(" tr")).length < 20) {
-          $.extend(conf, {paging: false});
-        }
-
-        $(selector).DataTable(conf);
+        conf.order = [[ getColumnIndex(conf.columns, defaultSortColumn), "desc" ]];
+        conf.columnDefs = [
+          {"searchable": false, "targets": [getColumnIndex(conf.columns, durationColumnName)]}
+        ];
+        historySummary.append(apps);
+        apps.DataTable(conf);
+        sibling.after(historySummary);
         $('#history-summary [data-toggle="tooltip"]').tooltip();
       });
     });

From d10c9dc3f631a26dbbbd8f5c601ca2001a5d7c80 Mon Sep 17 00:00:00 2001
From: jerryshao <sshao@hortonworks.com>
Date: Wed, 30 Aug 2017 12:30:24 -0700
Subject: [PATCH 1161/1204] [SPARK-21714][CORE][BACKPORT-2.2] Avoiding
 re-uploading remote resources in yarn client mode

## What changes were proposed in this pull request?

This is a backport PR to fix issue of re-uploading remote resource in yarn client mode. The original PR is #18962.

## How was this patch tested?

Tested in local UT.

Author: jerryshao <sshao@hortonworks.com>

Closes #19074 from jerryshao/SPARK-21714-2.2-backport.
---
 .../org/apache/spark/deploy/SparkSubmit.scala | 66 +++++++++++-------
 .../spark/internal/config/package.scala       |  2 +-
 .../scala/org/apache/spark/util/Utils.scala   | 25 ++++---
 .../spark/deploy/SparkSubmitSuite.scala       | 68 +++++++++++++++----
 .../org/apache/spark/repl/SparkILoop.scala    |  2 +-
 .../scala/org/apache/spark/repl/Main.scala    |  2 +-
 6 files changed, 116 insertions(+), 49 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index c60a2a1706d5a..86d578e1ece72 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -208,14 +208,20 @@ object SparkSubmit extends CommandLineUtils {
 
   /**
    * Prepare the environment for submitting an application.
-   * This returns a 4-tuple:
-   *   (1) the arguments for the child process,
-   *   (2) a list of classpath entries for the child,
-   *   (3) a map of system properties, and
-   *   (4) the main class for the child
+   *
+   * @param args the parsed SparkSubmitArguments used for environment preparation.
+   * @param conf the Hadoop Configuration, this argument will only be set in unit test.
+   * @return a 4-tuple:
+   *        (1) the arguments for the child process,
+   *        (2) a list of classpath entries for the child,
+   *        (3) a map of system properties, and
+   *        (4) the main class for the child
+   *
    * Exposed for testing.
    */
-  private[deploy] def prepareSubmitEnvironment(args: SparkSubmitArguments)
+  private[deploy] def prepareSubmitEnvironment(
+      args: SparkSubmitArguments,
+      conf: Option[HadoopConfiguration] = None)
       : (Seq[String], Seq[String], Map[String, String], String) = {
     // Return values
     val childArgs = new ArrayBuffer[String]()
@@ -311,12 +317,16 @@ object SparkSubmit extends CommandLineUtils {
     }
 
     // In client mode, download remote files.
+    var localPrimaryResource: String = null
+    var localJars: String = null
+    var localPyFiles: String = null
+    var localFiles: String = null
     if (deployMode == CLIENT) {
-      val hadoopConf = new HadoopConfiguration()
-      args.primaryResource = Option(args.primaryResource).map(downloadFile(_, hadoopConf)).orNull
-      args.jars = Option(args.jars).map(downloadFileList(_, hadoopConf)).orNull
-      args.pyFiles = Option(args.pyFiles).map(downloadFileList(_, hadoopConf)).orNull
-      args.files = Option(args.files).map(downloadFileList(_, hadoopConf)).orNull
+      val hadoopConf = conf.getOrElse(new HadoopConfiguration())
+      localPrimaryResource = Option(args.primaryResource).map(downloadFile(_, hadoopConf)).orNull
+      localJars = Option(args.jars).map(downloadFileList(_, hadoopConf)).orNull
+      localPyFiles = Option(args.pyFiles).map(downloadFileList(_, hadoopConf)).orNull
+      localFiles = Option(args.files).map(downloadFileList(_, hadoopConf)).orNull
     }
 
     // Require all python files to be local, so we can add them to the PYTHONPATH
@@ -366,7 +376,7 @@ object SparkSubmit extends CommandLineUtils {
         // If a python file is provided, add it to the child arguments and list of files to deploy.
         // Usage: PythonAppRunner <main python file> <extra python files> [app arguments]
         args.mainClass = "org.apache.spark.deploy.PythonRunner"
-        args.childArgs = ArrayBuffer(args.primaryResource, args.pyFiles) ++ args.childArgs
+        args.childArgs = ArrayBuffer(localPrimaryResource, localPyFiles) ++ args.childArgs
         if (clusterManager != YARN) {
           // The YARN backend distributes the primary file differently, so don't merge it.
           args.files = mergeFileLists(args.files, args.primaryResource)
@@ -376,8 +386,8 @@ object SparkSubmit extends CommandLineUtils {
         // The YARN backend handles python files differently, so don't merge the lists.
         args.files = mergeFileLists(args.files, args.pyFiles)
       }
-      if (args.pyFiles != null) {
-        sysProps("spark.submit.pyFiles") = args.pyFiles
+      if (localPyFiles != null) {
+        sysProps("spark.submit.pyFiles") = localPyFiles
       }
     }
 
@@ -431,7 +441,7 @@ object SparkSubmit extends CommandLineUtils {
         // If an R file is provided, add it to the child arguments and list of files to deploy.
         // Usage: RRunner <main R file> [app arguments]
         args.mainClass = "org.apache.spark.deploy.RRunner"
-        args.childArgs = ArrayBuffer(args.primaryResource) ++ args.childArgs
+        args.childArgs = ArrayBuffer(localPrimaryResource) ++ args.childArgs
         args.files = mergeFileLists(args.files, args.primaryResource)
       }
     }
@@ -468,6 +478,7 @@ object SparkSubmit extends CommandLineUtils {
       OptionAssigner(args.queue, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.queue"),
       OptionAssigner(args.numExecutors, YARN, ALL_DEPLOY_MODES,
         sysProp = "spark.executor.instances"),
+      OptionAssigner(args.pyFiles, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.dist.pyFiles"),
       OptionAssigner(args.jars, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.dist.jars"),
       OptionAssigner(args.files, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.dist.files"),
       OptionAssigner(args.archives, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.dist.archives"),
@@ -491,15 +502,28 @@ object SparkSubmit extends CommandLineUtils {
         sysProp = "spark.driver.cores"),
       OptionAssigner(args.supervise.toString, STANDALONE | MESOS, CLUSTER,
         sysProp = "spark.driver.supervise"),
-      OptionAssigner(args.ivyRepoPath, STANDALONE, CLUSTER, sysProp = "spark.jars.ivy")
+      OptionAssigner(args.ivyRepoPath, STANDALONE, CLUSTER, sysProp = "spark.jars.ivy"),
+
+      // An internal option used only for spark-shell to add user jars to repl's classloader,
+      // previously it uses "spark.jars" or "spark.yarn.dist.jars" which now may be pointed to
+      // remote jars, so adding a new option to only specify local jars for spark-shell internally.
+      OptionAssigner(localJars, ALL_CLUSTER_MGRS, CLIENT, sysProp = "spark.repl.local.jars")
     )
 
     // In client mode, launch the application main class directly
     // In addition, add the main application jar and any added jars (if any) to the classpath
-    // Also add the main application jar and any added jars to classpath in case YARN client
-    // requires these jars.
-    if (deployMode == CLIENT || isYarnCluster) {
+    if (deployMode == CLIENT) {
       childMainClass = args.mainClass
+      if (localPrimaryResource != null && isUserJar(localPrimaryResource)) {
+        childClasspath += localPrimaryResource
+      }
+      if (localJars != null) { childClasspath ++= localJars.split(",") }
+    }
+    // Add the main application jar and any added jars to classpath in case YARN client
+    // requires these jars.
+    // This assumes both primaryResource and user jars are local jars, otherwise it will not be
+    // added to the classpath of YARN client.
+    if (isYarnCluster) {
       if (isUserJar(args.primaryResource)) {
         childClasspath += args.primaryResource
       }
@@ -556,10 +580,6 @@ object SparkSubmit extends CommandLineUtils {
       if (args.isPython) {
         sysProps.put("spark.yarn.isPython", "true")
       }
-
-      if (args.pyFiles != null) {
-        sysProps("spark.submit.pyFiles") = args.pyFiles
-      }
     }
 
     // assure a keytab is available from any place in a JVM
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index c0fcf99fecd2a..1588dfec2074a 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -87,7 +87,7 @@ package object config {
     .intConf
     .createOptional
 
-  private[spark] val PY_FILES = ConfigBuilder("spark.submit.pyFiles")
+  private[spark] val PY_FILES = ConfigBuilder("spark.yarn.dist.pyFiles")
     .internal()
     .stringConf
     .toSequence
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 999486c3b6761..69c6c3327ee24 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2580,18 +2580,23 @@ private[spark] object Utils extends Logging {
   }
 
   /**
-   * In YARN mode this method returns a union of the jar files pointed by "spark.jars" and the
-   * "spark.yarn.dist.jars" properties, while in other modes it returns the jar files pointed by
-   * only the "spark.jars" property.
+   * Return the jar files pointed by the "spark.jars" property. Spark internally will distribute
+   * these jars through file server. In the YARN mode, it will return an empty list, since YARN
+   * has its own mechanism to distribute jars.
    */
-  def getUserJars(conf: SparkConf, isShell: Boolean = false): Seq[String] = {
+  def getUserJars(conf: SparkConf): Seq[String] = {
     val sparkJars = conf.getOption("spark.jars")
-    if (conf.get("spark.master") == "yarn" && isShell) {
-      val yarnJars = conf.getOption("spark.yarn.dist.jars")
-      unionFileLists(sparkJars, yarnJars).toSeq
-    } else {
-      sparkJars.map(_.split(",")).map(_.filter(_.nonEmpty)).toSeq.flatten
-    }
+    sparkJars.map(_.split(",")).map(_.filter(_.nonEmpty)).toSeq.flatten
+  }
+
+  /**
+   * Return the local jar files which will be added to REPL's classpath. These jar files are
+   * specified by --jars (spark.jars) or --packages, remote jars will be downloaded to local by
+   * SparkSubmit at first.
+   */
+  def getLocalUserJarsForShell(conf: SparkConf): Seq[String] = {
+    val localJars = conf.getOption("spark.repl.local.jars")
+    localJars.map(_.split(",")).map(_.filter(_.nonEmpty)).toSeq.flatten
   }
 
   private[spark] val REDACTION_REPLACEMENT_TEXT = "*********(redacted)"
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 6fa3a09b2ef1e..3c49b1fcd0be1 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -27,7 +27,7 @@ import scala.io.Source
 import com.google.common.io.ByteStreams
 import org.apache.commons.io.{FilenameUtils, FileUtils}
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.Path
+import org.apache.hadoop.fs.{FileStatus, Path, RawLocalFileSystem}
 import org.scalatest.{BeforeAndAfterEach, Matchers}
 import org.scalatest.concurrent.Timeouts
 import org.scalatest.time.SpanSugar._
@@ -738,10 +738,7 @@ class SparkSubmitSuite
 
   test("downloadFile - file doesn't exist") {
     val hadoopConf = new Configuration()
-    // Set s3a implementation to local file system for testing.
-    hadoopConf.set("fs.s3a.impl", "org.apache.spark.deploy.TestFileSystem")
-    // Disable file system impl cache to make sure the test file system is picked up.
-    hadoopConf.set("fs.s3a.impl.disable.cache", "true")
+    updateConfWithFakeS3Fs(hadoopConf)
     intercept[FileNotFoundException] {
       SparkSubmit.downloadFile("s3a:/no/such/file", hadoopConf)
     }
@@ -759,10 +756,7 @@ class SparkSubmitSuite
     val content = "hello, world"
     FileUtils.write(jarFile, content)
     val hadoopConf = new Configuration()
-    // Set s3a implementation to local file system for testing.
-    hadoopConf.set("fs.s3a.impl", "org.apache.spark.deploy.TestFileSystem")
-    // Disable file system impl cache to make sure the test file system is picked up.
-    hadoopConf.set("fs.s3a.impl.disable.cache", "true")
+    updateConfWithFakeS3Fs(hadoopConf)
     val sourcePath = s"s3a://${jarFile.getAbsolutePath}"
     val outputPath = SparkSubmit.downloadFile(sourcePath, hadoopConf)
     checkDownloadedFile(sourcePath, outputPath)
@@ -775,10 +769,7 @@ class SparkSubmitSuite
     val content = "hello, world"
     FileUtils.write(jarFile, content)
     val hadoopConf = new Configuration()
-    // Set s3a implementation to local file system for testing.
-    hadoopConf.set("fs.s3a.impl", "org.apache.spark.deploy.TestFileSystem")
-    // Disable file system impl cache to make sure the test file system is picked up.
-    hadoopConf.set("fs.s3a.impl.disable.cache", "true")
+    updateConfWithFakeS3Fs(hadoopConf)
     val sourcePaths = Seq("/local/file", s"s3a://${jarFile.getAbsolutePath}")
     val outputPaths = SparkSubmit.downloadFileList(sourcePaths.mkString(","), hadoopConf).split(",")
 
@@ -789,6 +780,43 @@ class SparkSubmitSuite
     }
   }
 
+  test("Avoid re-upload remote resources in yarn client mode") {
+    val hadoopConf = new Configuration()
+    updateConfWithFakeS3Fs(hadoopConf)
+
+    val tmpDir = Utils.createTempDir()
+    val file = File.createTempFile("tmpFile", "", tmpDir)
+    val pyFile = File.createTempFile("tmpPy", ".egg", tmpDir)
+    val mainResource = File.createTempFile("tmpPy", ".py", tmpDir)
+    val tmpJar = TestUtils.createJarWithFiles(Map("test.resource" -> "USER"), tmpDir)
+    val tmpJarPath = s"s3a://${new File(tmpJar.toURI).getAbsolutePath}"
+
+    val args = Seq(
+      "--class", UserClasspathFirstTest.getClass.getName.stripPrefix("$"),
+      "--name", "testApp",
+      "--master", "yarn",
+      "--deploy-mode", "client",
+      "--jars", tmpJarPath,
+      "--files", s"s3a://${file.getAbsolutePath}",
+      "--py-files", s"s3a://${pyFile.getAbsolutePath}",
+      s"s3a://$mainResource"
+      )
+
+    val appArgs = new SparkSubmitArguments(args)
+    val sysProps = SparkSubmit.prepareSubmitEnvironment(appArgs, Some(hadoopConf))._3
+
+    // All the resources should still be remote paths, so that YARN client will not upload again.
+    sysProps("spark.yarn.dist.jars") should be (tmpJarPath)
+    sysProps("spark.yarn.dist.files") should be (s"s3a://${file.getAbsolutePath}")
+    sysProps("spark.yarn.dist.pyFiles") should be (s"s3a://${pyFile.getAbsolutePath}")
+
+    // Local repl jars should be a local path.
+    sysProps("spark.repl.local.jars") should (startWith("file:"))
+
+    // local py files should not be a URI format.
+    sysProps("spark.submit.pyFiles") should (startWith("/"))
+  }
+
   // NOTE: This is an expensive operation in terms of time (10 seconds+). Use sparingly.
   private def runSparkSubmit(args: Seq[String]): Unit = {
     val sparkHome = sys.props.getOrElse("spark.test.home", fail("spark.test.home is not set!"))
@@ -828,6 +856,11 @@ class SparkSubmitSuite
       Utils.deleteRecursively(tmpDir)
     }
   }
+
+  private def updateConfWithFakeS3Fs(conf: Configuration): Unit = {
+    conf.set("fs.s3a.impl", classOf[TestFileSystem].getCanonicalName)
+    conf.set("fs.s3a.impl.disable.cache", "true")
+  }
 }
 
 object JarCreationTest extends Logging {
@@ -897,4 +930,13 @@ class TestFileSystem extends org.apache.hadoop.fs.LocalFileSystem {
     // Ignore the scheme for testing.
     super.copyToLocalFile(new Path(src.toUri.getPath), dst)
   }
+
+  override def globStatus(pathPattern: Path): Array[FileStatus] = {
+    val newPath = new Path(pathPattern.toUri.getPath)
+    super.globStatus(newPath).map { status =>
+      val path = s"s3a://${status.getPath.toUri.getPath}"
+      status.setPath(new Path(path))
+      status
+    }
+  }
 }
diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala
index b7237a6ce822f..004348c5eb3c9 100644
--- a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala
+++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala
@@ -1066,7 +1066,7 @@ class SparkILoop(
       logWarning("ADD_JARS environment variable is deprecated, use --jar spark submit argument instead")
     }
     val jars = {
-      val userJars = Utils.getUserJars(conf, isShell = true)
+      val userJars = Utils.getLocalUserJarsForShell(conf)
       if (userJars.isEmpty) {
         envJars.getOrElse("")
       } else {
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
index 9702a1e653c32..0b16e1b073e32 100644
--- a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
@@ -57,7 +57,7 @@ object Main extends Logging {
   // Visible for testing
   private[repl] def doMain(args: Array[String], _interp: SparkILoop): Unit = {
     interp = _interp
-    val jars = Utils.getUserJars(conf, isShell = true)
+    val jars = Utils.getLocalUserJarsForShell(conf)
       // Remove file:///, file:// or file:/ scheme if exists for each jar
       .map { x => if (x.startsWith("file:")) new File(new URI(x)).getPath else x }
       .mkString(File.pathSeparator)

From 14054ffc5fd3399d04d69e26efb31d8b24b60bdc Mon Sep 17 00:00:00 2001
From: Sital Kedia <skedia@fb.com>
Date: Wed, 30 Aug 2017 14:19:13 -0700
Subject: [PATCH 1162/1204] [SPARK-21834] Incorrect executor request in case of
 dynamic allocation

## What changes were proposed in this pull request?

killExecutor api currently does not allow killing an executor without updating the total number of executors needed. In case of dynamic allocation is turned on and the allocator tries to kill an executor, the scheduler reduces the total number of executors needed ( see https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala#L635) which is incorrect because the allocator already takes care of setting the required number of executors itself.

## How was this patch tested?

Ran a job on the cluster and made sure the executor request is correct

Author: Sital Kedia <skedia@fb.com>

Closes #19081 from sitalkedia/skedia/oss_fix_executor_allocation.

(cherry picked from commit 6949a9c5c6120fdde1b63876ede661adbd1eb15e)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../scala/org/apache/spark/ExecutorAllocationManager.scala     | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
index bb5eb7f8a9e01..632d5f260b737 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -430,6 +430,9 @@ private[spark] class ExecutorAllocationManager(
     } else {
       client.killExecutors(executorIdsToBeRemoved)
     }
+    // [SPARK-21834] killExecutors api reduces the target number of executors.
+    // So we need to update the target with desired value.
+    client.requestTotalExecutors(numExecutorsTarget, localityAwareTasks, hostToLocalTaskCount)
     // reset the newExecutorTotal to the existing number of executors
     newExecutorTotal = numExistingExecutors
     if (testing || executorsRemoved.nonEmpty) {

From 50f86e1fe2aad67e4472b24d910ea519b9ad746f Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Fri, 1 Sep 2017 13:48:50 -0700
Subject: [PATCH 1163/1204] [SPARK-21884][SPARK-21477][BACKPORT-2.2][SQL] Mark
 LocalTableScanExec's input data transient

This PR is to backport https://github.com/apache/spark/pull/18686 for resolving the issue in https://github.com/apache/spark/pull/19094

---

## What changes were proposed in this pull request?
This PR is to mark the parameter `rows` and `unsafeRow` of LocalTableScanExec transient. It can avoid serializing the unneeded objects.

## How was this patch tested?
N/A

Author: gatorsmile <gatorsmile@gmail.com>

Closes #19101 from gatorsmile/backport-21477.
---
 .../apache/spark/sql/execution/LocalTableScanExec.scala   | 4 ++--
 .../sql/execution/OptimizeMetadataOnlyQuerySuite.scala    | 8 ++++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala
index 19c68c13262a5..514ad7018d8c7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala
@@ -28,12 +28,12 @@ import org.apache.spark.sql.execution.metric.SQLMetrics
  */
 case class LocalTableScanExec(
     output: Seq[Attribute],
-    rows: Seq[InternalRow]) extends LeafExecNode {
+    @transient rows: Seq[InternalRow]) extends LeafExecNode {
 
   override lazy val metrics = Map(
     "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"))
 
-  private lazy val unsafeRows: Array[InternalRow] = {
+  @transient private lazy val unsafeRows: Array[InternalRow] = {
     if (rows.isEmpty) {
       Array.empty
     } else {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala
index 58c310596ca6d..6c66902127d03 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala
@@ -117,4 +117,12 @@ class OptimizeMetadataOnlyQuerySuite extends QueryTest with SharedSQLContext {
     "select partcol1, max(partcol2) from srcpart where partcol1 = 0 group by rollup (partcol1)",
     "select partcol2 from (select partcol2 from srcpart where partcol1 = 0 union all " +
       "select partcol2 from srcpart where partcol1 = 1) t group by partcol2")
+
+  test("SPARK-21884 Fix StackOverflowError on MetadataOnlyQuery") {
+    withTable("t_1000") {
+      sql("CREATE TABLE t_1000 (a INT, p INT) USING PARQUET PARTITIONED BY (p)")
+      (1 to 1000).foreach(p => sql(s"ALTER TABLE t_1000 ADD PARTITION (p=$p)"))
+      sql("SELECT COUNT(DISTINCT p) FROM t_1000").collect()
+    }
+  }
 }

From fb1b5f08adaf4ec7c786b7a8b6283b62683f1324 Mon Sep 17 00:00:00 2001
From: Sean Owen <sowen@cloudera.com>
Date: Mon, 4 Sep 2017 23:02:59 +0200
Subject: [PATCH 1164/1204] [SPARK-21418][SQL] NoSuchElementException: None.get
 in DataSourceScanExec with sun.io.serialization.extendedDebugInfo=true

## What changes were proposed in this pull request?

If no SparkConf is available to Utils.redact, simply don't redact.

## How was this patch tested?

Existing tests

Author: Sean Owen <sowen@cloudera.com>

Closes #19123 from srowen/SPARK-21418.

(cherry picked from commit ca59445adb30ed796189532df2a2898ecd33db68)
Signed-off-by: Herman van Hovell <hvanhovell@databricks.com>
---
 core/src/main/scala/org/apache/spark/util/Utils.scala    | 9 ++++++---
 .../apache/spark/sql/execution/DataSourceScanExec.scala  | 2 +-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 69c6c3327ee24..a5f44bd746f14 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2614,9 +2614,12 @@ private[spark] object Utils extends Logging {
    * Redact the sensitive information in the given string.
    */
   def redact(conf: SparkConf, text: String): String = {
-    if (text == null || text.isEmpty || !conf.contains(STRING_REDACTION_PATTERN)) return text
-    val regex = conf.get(STRING_REDACTION_PATTERN).get
-    regex.replaceAllIn(text, REDACTION_REPLACEMENT_TEXT)
+    if (text == null || text.isEmpty || conf == null || !conf.contains(STRING_REDACTION_PATTERN)) {
+      text
+    } else {
+      val regex = conf.get(STRING_REDACTION_PATTERN).get
+      regex.replaceAllIn(text, REDACTION_REPLACEMENT_TEXT)
+    }
   }
 
   private def redact(redactionPattern: Regex, kvs: Seq[(String, String)]): Seq[(String, String)] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index 74fc23a52a141..6fb41b6425c4b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -67,7 +67,7 @@ trait DataSourceScanExec extends LeafExecNode with CodegenSupport {
    * Shorthand for calling redactString() without specifying redacting rules
    */
   private def redact(text: String): String = {
-    Utils.redact(SparkSession.getActiveSession.get.sparkContext.conf, text)
+    Utils.redact(SparkSession.getActiveSession.map(_.sparkContext.conf).orNull, text)
   }
 }
 

From 1f7c4869b811f9a05cd1fb54e168e739cde7933f Mon Sep 17 00:00:00 2001
From: Burak Yavuz <brkyvz@gmail.com>
Date: Tue, 5 Sep 2017 13:10:32 -0700
Subject: [PATCH 1165/1204] [SPARK-21925] Update trigger interval documentation
 in docs with behavior change in Spark 2.2

Forgot to update docs with behavior change.

Author: Burak Yavuz <brkyvz@gmail.com>

Closes #19138 from brkyvz/trigger-doc-fix.

(cherry picked from commit 8c954d2cd10a2cf729d2971fbeb19b2dd751a178)
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 docs/structured-streaming-programming-guide.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index 8367f5a08c755..13a6a8297b922 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -1168,7 +1168,7 @@ returned through `Dataset.writeStream()`. You will have to specify one or more o
 
 - *Query name:* Optionally, specify a unique name of the query for identification.
 
-- *Trigger interval:* Optionally, specify the trigger interval. If it is not specified, the system will check for availability of new data as soon as the previous processing has completed. If a trigger time is missed because the previous processing has not completed, then the system will attempt to trigger at the next trigger point, not immediately after the processing has completed.
+- *Trigger interval:* Optionally, specify the trigger interval. If it is not specified, the system will check for availability of new data as soon as the previous processing has completed. If a trigger time is missed because the previous processing has not completed, then the system will trigger processing immediately.
 
 - *Checkpoint location:* For some output sinks where the end-to-end fault-tolerance can be guaranteed, specify the location where the system will write all the checkpoint information. This should be a directory in an HDFS-compatible fault-tolerant file system. The semantics of checkpointing is discussed in more detail in the next section.
 

From 7da8fbf08b492ae899bef5ea5a08e2bcf4c6db93 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Tue, 5 Sep 2017 14:35:09 -0700
Subject: [PATCH 1166/1204] [MINOR][DOC] Update `Partition Discovery` section
 to enumerate all available file sources

## What changes were proposed in this pull request?

All built-in data sources support `Partition Discovery`. We had better update the document to give the users more benefit clearly.

**AFTER**

<img width="906" alt="1" src="https://user-images.githubusercontent.com/9700541/30083628-14278908-9244-11e7-98dc-9ad45fe233a9.png">

## How was this patch tested?

```
SKIP_API=1 jekyll serve --watch
```

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #19139 from dongjoon-hyun/partitiondiscovery.

(cherry picked from commit 9e451bcf36151bf401f72dcd66001b9ceb079738)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 docs/sql-programming-guide.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index b5eca76480eb8..9a54adca31e2d 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -733,8 +733,9 @@ SELECT * FROM parquetTable
 
 Table partitioning is a common optimization approach used in systems like Hive. In a partitioned
 table, data are usually stored in different directories, with partitioning column values encoded in
-the path of each partition directory. The Parquet data source is now able to discover and infer
-partitioning information automatically. For example, we can store all our previously used
+the path of each partition directory. All built-in file sources (including Text/CSV/JSON/ORC/Parquet)
+are able to discover and infer partitioning information automatically.
+For example, we can store all our previously used
 population data into a partitioned table using the following directory structure, with two extra
 columns, `gender` and `country` as partitioning columns:
 

From 9afab9a524c287a5c87c0ff54e5c1b757b32747c Mon Sep 17 00:00:00 2001
From: Riccardo Corbella <r.corbella@reply.it>
Date: Wed, 6 Sep 2017 08:22:57 +0100
Subject: [PATCH 1167/1204] [SPARK-21924][DOCS] Update structured streaming
 programming guide doc

## What changes were proposed in this pull request?

Update the line "For example, the data (12:09, cat) is out of order and late, and it falls in windows 12:05 - 12:15 and 12:10 - 12:20." as follow "For example, the data (12:09, cat) is out of order and late, and it falls in windows 12:00 - 12:10 and 12:05 - 12:15." under the programming structured streaming programming guide.

Author: Riccardo Corbella <r.corbella@reply.it>

Closes #19137 from riccardocorbella/bugfix.

(cherry picked from commit 4ee7dfe41b27abbd4c32074ecc8f268f6193c3f4)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/structured-streaming-programming-guide.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index 13a6a8297b922..93bef8d5bb7e2 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -977,7 +977,7 @@ at the beginning of every trigger is the red line  For example, when the engine
 `(12:14, dog)`, it sets the watermark for the next trigger as `12:04`.
 This watermark lets the engine maintain intermediate state for additional 10 minutes to allow late
 data to be counted. For example, the data `(12:09, cat)` is out of order and late, and it falls in
-windows `12:05 - 12:15` and `12:10 - 12:20`. Since, it is still ahead of the watermark `12:04` in 
+windows `12:00 - 12:10` and `12:05 - 12:15`. Since, it is still ahead of the watermark `12:04` in 
 the trigger, the engine still maintains the intermediate counts as state and correctly updates the 
 counts of the related windows. However, when the watermark is updated to `12:11`, the intermediate 
 state for window `(12:00 - 12:10)` is cleared, and all subsequent data (e.g. `(12:04, donkey)`) 

From 342cc2a4cad4b8491f4689b66570d14e5fcba33b Mon Sep 17 00:00:00 2001
From: Jacek Laskowski <jacek@japila.pl>
Date: Wed, 6 Sep 2017 15:48:48 -0700
Subject: [PATCH 1168/1204] [SPARK-21901][SS] Define toString for
 StateOperatorProgress

## What changes were proposed in this pull request?

Just `StateOperatorProgress.toString` + few formatting fixes

## How was this patch tested?

Local build. Waiting for OK from Jenkins.

Author: Jacek Laskowski <jacek@japila.pl>

Closes #19112 from jaceklaskowski/SPARK-21901-StateOperatorProgress-toString.

(cherry picked from commit fa0092bddf695a757f5ddaed539e55e2dc9fccb7)
Signed-off-by: Shixiong Zhu <zsxwing@gmail.com>
---
 .../main/scala/org/apache/spark/sql/streaming/progress.scala    | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
index fb590e7df996b..5171852c48b9e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
@@ -49,6 +49,8 @@ class StateOperatorProgress private[sql](
     ("numRowsTotal" -> JInt(numRowsTotal)) ~
     ("numRowsUpdated" -> JInt(numRowsUpdated))
   }
+
+  override def toString: String = prettyJson
 }
 
 /**

From 49968de526e76a75abafb636cbd5ed84f9a496e9 Mon Sep 17 00:00:00 2001
From: Tucker Beck <tucker.beck@rentrakmail.com>
Date: Thu, 7 Sep 2017 09:38:00 +0900
Subject: [PATCH 1169/1204] Fixed pandoc dependency issue in python/setup.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Problem Description

When pyspark is listed as a dependency of another package, installing
the other package will cause an install failure in pyspark. When the
other package is being installed, pyspark's setup_requires requirements
are installed including pypandoc. Thus, the exception handling on
setup.py:152 does not work because the pypandoc module is indeed
available. However, the pypandoc.convert() function fails if pandoc
itself is not installed (in our use cases it is not). This raises an
OSError that is not handled, and setup fails.

The following is a sample failure:
```
$ which pandoc
$ pip freeze | grep pypandoc
pypandoc==1.4
$ pip install pyspark
Collecting pyspark
  Downloading pyspark-2.2.0.post0.tar.gz (188.3MB)
    100% |████████████████████████████████| 188.3MB 16.8MB/s
    Complete output from command python setup.py egg_info:
    Maybe try:

        sudo apt-get install pandoc
    See http://johnmacfarlane.net/pandoc/installing.html
    for installation options
    ---------------------------------------------------------------

    Traceback (most recent call last):
      File "<string>", line 1, in <module>
      File "/tmp/pip-build-mfnizcwa/pyspark/setup.py", line 151, in <module>
        long_description = pypandoc.convert('README.md', 'rst')
      File "/home/tbeck/.virtualenvs/cem/lib/python3.5/site-packages/pypandoc/__init__.py", line 69, in convert
        outputfile=outputfile, filters=filters)
      File "/home/tbeck/.virtualenvs/cem/lib/python3.5/site-packages/pypandoc/__init__.py", line 260, in _convert_input
        _ensure_pandoc_path()
      File "/home/tbeck/.virtualenvs/cem/lib/python3.5/site-packages/pypandoc/__init__.py", line 544, in _ensure_pandoc_path
        raise OSError("No pandoc was found: either install pandoc and add it\n"
    OSError: No pandoc was found: either install pandoc and add it
    to your PATH or or call pypandoc.download_pandoc(...) or
    install pypandoc wheels with included pandoc.

    ----------------------------------------
Command "python setup.py egg_info" failed with error code 1 in /tmp/pip-build-mfnizcwa/pyspark/
```

## What changes were proposed in this pull request?

This change simply adds an additional exception handler for the OSError
that is raised. This allows pyspark to be installed client-side without requiring pandoc to be installed.

## How was this patch tested?

I tested this by building a wheel package of pyspark with the change applied. Then, in a clean virtual environment with pypandoc installed but pandoc not available on the system, I installed pyspark from the wheel.

Here is the output

```
$ pip freeze | grep pypandoc
pypandoc==1.4
$ which pandoc
$ pip install --no-cache-dir ../spark/python/dist/pyspark-2.3.0.dev0-py2.py3-none-any.whl
Processing /home/tbeck/work/spark/python/dist/pyspark-2.3.0.dev0-py2.py3-none-any.whl
Requirement already satisfied: py4j==0.10.6 in /home/tbeck/.virtualenvs/cem/lib/python3.5/site-packages (from pyspark==2.3.0.dev0)
Installing collected packages: pyspark
Successfully installed pyspark-2.3.0.dev0
```

Author: Tucker Beck <tucker.beck@rentrakmail.com>

Closes #18981 from dusktreader/dusktreader/fix-pandoc-dependency-issue-in-setup_py.

(cherry picked from commit aad2125475dcdeb4a0410392b6706511db17bac4)
Signed-off-by: hyukjinkwon <gurwls223@gmail.com>
---
 python/setup.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/setup.py b/python/setup.py
index f50035435e26b..7e63461d289b2 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -151,6 +151,8 @@ def _supports_symlinks():
         long_description = pypandoc.convert('README.md', 'rst')
     except ImportError:
         print("Could not import pypandoc - required to package PySpark", file=sys.stderr)
+    except OSError:
+        print("Could not convert - pandoc is not installed", file=sys.stderr)
 
     setup(
         name='pyspark',

From 0848df1bb6f27fc7182e0e52efeef1407fd532d2 Mon Sep 17 00:00:00 2001
From: Sanket Chintapalli <schintap@yahoo-inc.com>
Date: Thu, 7 Sep 2017 10:20:39 -0700
Subject: [PATCH 1170/1204] [SPARK-21890] Credentials not being passed to add
 the tokens

## What changes were proposed in this pull request?
I observed this while running a oozie job trying to connect to hbase via spark.
It look like the creds are not being passed in thehttps://github.com/apache/spark/blob/branch-2.2/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/HadoopFSCredentialProvider.scala#L53 for 2.2 release.
More Info as to why it fails on secure grid:
Oozie client gets the necessary tokens the application needs before launching. It passes those tokens along to the oozie launcher job (MR job) which will then actually call the Spark client to launch the spark app and pass the tokens along.
The oozie launcher job cannot get anymore tokens because all it has is tokens ( you can't get tokens with tokens, you need tgt or keytab).
The error here is because the launcher job runs the Spark Client to submit the spark job but the spark client doesn't see that it already has the hdfs tokens so it tries to get more, which ends with the exception.
There was a change with SPARK-19021 to generalize the hdfs credentials provider that changed it so we don't pass the existing credentials into the call to get tokens so it doesn't realize it already has the necessary tokens.

https://issues.apache.org/jira/browse/SPARK-21890
Modified to pass creds to get delegation tokens

## How was this patch tested?
Manual testing on our secure cluster

Author: Sanket Chintapalli <schintap@yahoo-inc.com>

Closes #19103 from redsanket/SPARK-21890.
---
 .../deploy/yarn/security/HadoopFSCredentialProvider.scala   | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/HadoopFSCredentialProvider.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/HadoopFSCredentialProvider.scala
index f65c886db944e..19ed026f07137 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/HadoopFSCredentialProvider.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/security/HadoopFSCredentialProvider.scala
@@ -45,12 +45,11 @@ private[security] class HadoopFSCredentialProvider
       sparkConf: SparkConf,
       creds: Credentials): Option[Long] = {
     // NameNode to access, used to get tokens from different FileSystems
-    val tmpCreds = new Credentials()
     val tokenRenewer = getTokenRenewer(hadoopConf)
     hadoopFSsToAccess(hadoopConf, sparkConf).foreach { dst =>
       val dstFs = dst.getFileSystem(hadoopConf)
       logInfo("getting token for: " + dst)
-      dstFs.addDelegationTokens(tokenRenewer, tmpCreds)
+      dstFs.addDelegationTokens(tokenRenewer, creds)
     }
 
     // Get the token renewal interval if it is not set. It will only be called once.
@@ -60,7 +59,7 @@ private[security] class HadoopFSCredentialProvider
 
     // Get the time of next renewal.
     val nextRenewalDate = tokenRenewalInterval.flatMap { interval =>
-      val nextRenewalDates = tmpCreds.getAllTokens.asScala
+      val nextRenewalDates = creds.getAllTokens.asScala
         .filter(_.decodeIdentifier().isInstanceOf[AbstractDelegationTokenIdentifier])
         .map { t =>
           val identifier = t.decodeIdentifier().asInstanceOf[AbstractDelegationTokenIdentifier]
@@ -69,7 +68,6 @@ private[security] class HadoopFSCredentialProvider
       if (nextRenewalDates.isEmpty) None else Some(nextRenewalDates.min)
     }
 
-    creds.addAll(tmpCreds)
     nextRenewalDate
   }
 

From 4304d0bf05eb51c13ae1b9ee9a2970a945b51cac Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@databricks.com>
Date: Fri, 8 Sep 2017 14:26:07 +0900
Subject: [PATCH 1171/1204] [SPARK-21950][SQL][PYTHON][TEST]
 pyspark.sql.tests.SQLTests2 should stop SparkContext.

## What changes were proposed in this pull request?

`pyspark.sql.tests.SQLTests2` doesn't stop newly created spark context in the test and it might affect the following tests.
This pr makes `pyspark.sql.tests.SQLTests2` stop `SparkContext`.

## How was this patch tested?

Existing tests.

Author: Takuya UESHIN <ueshin@databricks.com>

Closes #19158 from ueshin/issues/SPARK-21950.

(cherry picked from commit 57bc1e9eb452284cbed090dbd5008eb2062f1b36)
Signed-off-by: Takuya UESHIN <ueshin@databricks.com>
---
 python/pyspark/sql/tests.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 20d9ca22d6850..a100dc07a161e 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -2252,8 +2252,12 @@ def test_sparksession_with_stopped_sparkcontext(self):
         self.sc.stop()
         sc = SparkContext('local[4]', self.sc.appName)
         spark = SparkSession.builder.getOrCreate()
-        df = spark.createDataFrame([(1, 2)], ["c", "c"])
-        df.collect()
+        try:
+            df = spark.createDataFrame([(1, 2)], ["c", "c"])
+            df.collect()
+        finally:
+            spark.stop()
+            sc.stop()
 
 
 class UDFInitializationTests(unittest.TestCase):

From 781a1f83c538a80ce1f1876e4786b02cb7984e16 Mon Sep 17 00:00:00 2001
From: "MarkTab marktab.net" <marktab@users.noreply.github.com>
Date: Fri, 8 Sep 2017 08:08:09 +0100
Subject: [PATCH 1172/1204] [SPARK-21915][ML][PYSPARK] Model 1 and Model 2
 ParamMaps Missing

dongjoon-hyun HyukjinKwon

Error in PySpark example code:
/examples/src/main/python/ml/estimator_transformer_param_example.py

The original Scala code says
println("Model 2 was fit using parameters: " + model2.parent.extractParamMap)

The parent is lr

There is no method for accessing parent as is done in Scala.

This code has been tested in Python, and returns values consistent with Scala

## What changes were proposed in this pull request?

Proposing to call the lr variable instead of model1 or model2

## How was this patch tested?

This patch was tested with Spark 2.1.0 comparing the Scala and PySpark results. Pyspark returns nothing at present for those two print lines.

The output for model2 in PySpark should be

{Param(parent='LogisticRegression_4187be538f744d5a9090', name='tol', doc='the convergence tolerance for iterative algorithms (>= 0).'): 1e-06,
Param(parent='LogisticRegression_4187be538f744d5a9090', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.0,
Param(parent='LogisticRegression_4187be538f744d5a9090', name='predictionCol', doc='prediction column name.'): 'prediction',
Param(parent='LogisticRegression_4187be538f744d5a9090', name='featuresCol', doc='features column name.'): 'features',
Param(parent='LogisticRegression_4187be538f744d5a9090', name='labelCol', doc='label column name.'): 'label',
Param(parent='LogisticRegression_4187be538f744d5a9090', name='probabilityCol', doc='Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities.'): 'myProbability',
Param(parent='LogisticRegression_4187be538f744d5a9090', name='rawPredictionCol', doc='raw prediction (a.k.a. confidence) column name.'): 'rawPrediction',
Param(parent='LogisticRegression_4187be538f744d5a9090', name='family', doc='The name of family which is a description of the label distribution to be used in the model. Supported options: auto, binomial, multinomial'): 'auto',
Param(parent='LogisticRegression_4187be538f744d5a9090', name='fitIntercept', doc='whether to fit an intercept term.'): True,
Param(parent='LogisticRegression_4187be538f744d5a9090', name='threshold', doc='Threshold in binary classification prediction, in range [0, 1]. If threshold and thresholds are both set, they must match.e.g. if threshold is p, then thresholds must be equal to [1-p, p].'): 0.55,
Param(parent='LogisticRegression_4187be538f744d5a9090', name='aggregationDepth', doc='suggested depth for treeAggregate (>= 2).'): 2,
Param(parent='LogisticRegression_4187be538f744d5a9090', name='maxIter', doc='max number of iterations (>= 0).'): 30,
Param(parent='LogisticRegression_4187be538f744d5a9090', name='regParam', doc='regularization parameter (>= 0).'): 0.1,
Param(parent='LogisticRegression_4187be538f744d5a9090', name='standardization', doc='whether to standardize the training features before fitting the model.'): True}

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: MarkTab marktab.net <marktab@users.noreply.github.com>

Closes #19152 from marktab/branch-2.2.
---
 .../src/main/python/ml/estimator_transformer_param_example.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/src/main/python/ml/estimator_transformer_param_example.py b/examples/src/main/python/ml/estimator_transformer_param_example.py
index eb21051435393..929bd2595fe4a 100644
--- a/examples/src/main/python/ml/estimator_transformer_param_example.py
+++ b/examples/src/main/python/ml/estimator_transformer_param_example.py
@@ -53,7 +53,7 @@
     # This prints the parameter (name: value) pairs, where names are unique IDs for this
     # LogisticRegression instance.
     print("Model 1 was fit using parameters: ")
-    print(model1.extractParamMap())
+    print(lr.extractParamMap())
 
     # We may alternatively specify parameters using a Python dictionary as a paramMap
     paramMap = {lr.maxIter: 20}
@@ -69,7 +69,7 @@
     # paramMapCombined overrides all parameters set earlier via lr.set* methods.
     model2 = lr.fit(training, paramMapCombined)
     print("Model 2 was fit using parameters: ")
-    print(model2.extractParamMap())
+    print(lr.extractParamMap(extra=paramMapCombined))
 
     # Prepare test data
     test = spark.createDataFrame([

From 08cb06af20f87d40b78b521f82774cf1b6f9c80a Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 8 Sep 2017 09:35:41 -0700
Subject: [PATCH 1173/1204] [SPARK-21936][SQL][2.2] backward compatibility test
 framework for HiveExternalCatalog

backport https://github.com/apache/spark/pull/19148 to 2.2

Author: Wenchen Fan <wenchen@databricks.com>

Closes #19163 from cloud-fan/test.
---
 sql/hive/pom.xml                              |   4 +
 ...nalCatalogBackwardCompatibilitySuite.scala | 264 ------------------
 .../HiveExternalCatalogVersionsSuite.scala    | 194 +++++++++++++
 .../spark/sql/hive/HiveSparkSubmitSuite.scala |  77 +----
 .../sql/hive/MetastoreDataSourcesSuite.scala  |  27 --
 .../spark/sql/hive/SparkSubmitTestUtils.scala | 101 +++++++
 6 files changed, 301 insertions(+), 366 deletions(-)
 delete mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/SparkSubmitTestUtils.scala

diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 3dca866307232..616f7cd2bc490 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -162,6 +162,10 @@
       <groupId>org.apache.thrift</groupId>
       <artifactId>libfb303</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.apache.derby</groupId>
+      <artifactId>derby</artifactId>
+    </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala
deleted file mode 100644
index 705d43f1f3aba..0000000000000
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.hive
-
-import java.net.URI
-
-import org.apache.hadoop.fs.Path
-import org.scalatest.BeforeAndAfterEach
-
-import org.apache.spark.sql.QueryTest
-import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
-import org.apache.spark.sql.hive.client.HiveClient
-import org.apache.spark.sql.hive.test.TestHiveSingleton
-import org.apache.spark.sql.test.SQLTestUtils
-import org.apache.spark.sql.types.StructType
-import org.apache.spark.util.Utils
-
-
-class HiveExternalCatalogBackwardCompatibilitySuite extends QueryTest
-  with SQLTestUtils with TestHiveSingleton with BeforeAndAfterEach {
-
-  // To test `HiveExternalCatalog`, we need to read/write the raw table meta from/to hive client.
-  val hiveClient: HiveClient =
-    spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client
-
-  val tempDir = Utils.createTempDir().getCanonicalFile
-  val tempDirUri = tempDir.toURI
-  val tempDirStr = tempDir.getAbsolutePath
-
-  override def beforeEach(): Unit = {
-    sql("CREATE DATABASE test_db")
-    for ((tbl, _) <- rawTablesAndExpectations) {
-      hiveClient.createTable(tbl, ignoreIfExists = false)
-    }
-  }
-
-  override def afterEach(): Unit = {
-    Utils.deleteRecursively(tempDir)
-    hiveClient.dropDatabase("test_db", ignoreIfNotExists = false, cascade = true)
-  }
-
-  private def getTableMetadata(tableName: String): CatalogTable = {
-    spark.sharedState.externalCatalog.getTable("test_db", tableName)
-  }
-
-  private def defaultTableURI(tableName: String): URI = {
-    spark.sessionState.catalog.defaultTablePath(TableIdentifier(tableName, Some("test_db")))
-  }
-
-  // Raw table metadata that are dumped from tables created by Spark 2.0. Note that, all spark
-  // versions prior to 2.1 would generate almost same raw table metadata for a specific table.
-  val simpleSchema = new StructType().add("i", "int")
-  val partitionedSchema = new StructType().add("i", "int").add("j", "int")
-
-  lazy val hiveTable = CatalogTable(
-    identifier = TableIdentifier("tbl1", Some("test_db")),
-    tableType = CatalogTableType.MANAGED,
-    storage = CatalogStorageFormat.empty.copy(
-      inputFormat = Some("org.apache.hadoop.mapred.TextInputFormat"),
-      outputFormat = Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat")),
-    schema = simpleSchema)
-
-  lazy val externalHiveTable = CatalogTable(
-    identifier = TableIdentifier("tbl2", Some("test_db")),
-    tableType = CatalogTableType.EXTERNAL,
-    storage = CatalogStorageFormat.empty.copy(
-      locationUri = Some(tempDirUri),
-      inputFormat = Some("org.apache.hadoop.mapred.TextInputFormat"),
-      outputFormat = Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat")),
-    schema = simpleSchema)
-
-  lazy val partitionedHiveTable = CatalogTable(
-    identifier = TableIdentifier("tbl3", Some("test_db")),
-    tableType = CatalogTableType.MANAGED,
-    storage = CatalogStorageFormat.empty.copy(
-      inputFormat = Some("org.apache.hadoop.mapred.TextInputFormat"),
-      outputFormat = Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat")),
-    schema = partitionedSchema,
-    partitionColumnNames = Seq("j"))
-
-
-  val simpleSchemaJson =
-    """
-      |{
-      | "type": "struct",
-      | "fields": [{
-      |             "name": "i",
-      |             "type": "integer",
-      |             "nullable": true,
-      |             "metadata": {}
-      |            }]
-      |}
-    """.stripMargin
-
-  val partitionedSchemaJson =
-    """
-      |{
-      | "type": "struct",
-      | "fields": [{
-      |             "name": "i",
-      |             "type": "integer",
-      |             "nullable": true,
-      |             "metadata": {}
-      |            },
-      |            {
-      |             "name": "j",
-      |             "type": "integer",
-      |             "nullable": true,
-      |             "metadata": {}
-      |            }]
-      |}
-    """.stripMargin
-
-  lazy val dataSourceTable = CatalogTable(
-    identifier = TableIdentifier("tbl4", Some("test_db")),
-    tableType = CatalogTableType.MANAGED,
-    storage = CatalogStorageFormat.empty.copy(
-      properties = Map("path" -> defaultTableURI("tbl4").toString)),
-    schema = new StructType(),
-    provider = Some("json"),
-    properties = Map(
-      "spark.sql.sources.provider" -> "json",
-      "spark.sql.sources.schema.numParts" -> "1",
-      "spark.sql.sources.schema.part.0" -> simpleSchemaJson))
-
-  lazy val hiveCompatibleDataSourceTable = CatalogTable(
-    identifier = TableIdentifier("tbl5", Some("test_db")),
-    tableType = CatalogTableType.MANAGED,
-    storage = CatalogStorageFormat.empty.copy(
-      properties = Map("path" -> defaultTableURI("tbl5").toString)),
-    schema = simpleSchema,
-    provider = Some("parquet"),
-    properties = Map(
-      "spark.sql.sources.provider" -> "parquet",
-      "spark.sql.sources.schema.numParts" -> "1",
-      "spark.sql.sources.schema.part.0" -> simpleSchemaJson))
-
-  lazy val partitionedDataSourceTable = CatalogTable(
-    identifier = TableIdentifier("tbl6", Some("test_db")),
-    tableType = CatalogTableType.MANAGED,
-    storage = CatalogStorageFormat.empty.copy(
-      properties = Map("path" -> defaultTableURI("tbl6").toString)),
-    schema = new StructType(),
-    provider = Some("json"),
-    properties = Map(
-      "spark.sql.sources.provider" -> "json",
-      "spark.sql.sources.schema.numParts" -> "1",
-      "spark.sql.sources.schema.part.0" -> partitionedSchemaJson,
-      "spark.sql.sources.schema.numPartCols" -> "1",
-      "spark.sql.sources.schema.partCol.0" -> "j"))
-
-  lazy val externalDataSourceTable = CatalogTable(
-    identifier = TableIdentifier("tbl7", Some("test_db")),
-    tableType = CatalogTableType.EXTERNAL,
-    storage = CatalogStorageFormat.empty.copy(
-      locationUri = Some(new URI(defaultTableURI("tbl7") + "-__PLACEHOLDER__")),
-      properties = Map("path" -> tempDirStr)),
-    schema = new StructType(),
-    provider = Some("json"),
-    properties = Map(
-      "spark.sql.sources.provider" -> "json",
-      "spark.sql.sources.schema.numParts" -> "1",
-      "spark.sql.sources.schema.part.0" -> simpleSchemaJson))
-
-  lazy val hiveCompatibleExternalDataSourceTable = CatalogTable(
-    identifier = TableIdentifier("tbl8", Some("test_db")),
-    tableType = CatalogTableType.EXTERNAL,
-    storage = CatalogStorageFormat.empty.copy(
-      locationUri = Some(tempDirUri),
-      properties = Map("path" -> tempDirStr)),
-    schema = simpleSchema,
-    properties = Map(
-      "spark.sql.sources.provider" -> "parquet",
-      "spark.sql.sources.schema.numParts" -> "1",
-      "spark.sql.sources.schema.part.0" -> simpleSchemaJson))
-
-  lazy val dataSourceTableWithoutSchema = CatalogTable(
-    identifier = TableIdentifier("tbl9", Some("test_db")),
-    tableType = CatalogTableType.EXTERNAL,
-    storage = CatalogStorageFormat.empty.copy(
-      locationUri = Some(new URI(defaultTableURI("tbl9") + "-__PLACEHOLDER__")),
-      properties = Map("path" -> tempDirStr)),
-    schema = new StructType(),
-    provider = Some("json"),
-    properties = Map("spark.sql.sources.provider" -> "json"))
-
-  // A list of all raw tables we want to test, with their expected schema.
-  lazy val rawTablesAndExpectations = Seq(
-    hiveTable -> simpleSchema,
-    externalHiveTable -> simpleSchema,
-    partitionedHiveTable -> partitionedSchema,
-    dataSourceTable -> simpleSchema,
-    hiveCompatibleDataSourceTable -> simpleSchema,
-    partitionedDataSourceTable -> partitionedSchema,
-    externalDataSourceTable -> simpleSchema,
-    hiveCompatibleExternalDataSourceTable -> simpleSchema,
-    dataSourceTableWithoutSchema -> new StructType())
-
-  test("make sure we can read table created by old version of Spark") {
-    for ((tbl, expectedSchema) <- rawTablesAndExpectations) {
-      val readBack = getTableMetadata(tbl.identifier.table)
-      assert(readBack.schema.sameType(expectedSchema))
-
-      if (tbl.tableType == CatalogTableType.EXTERNAL) {
-        // trim the URI prefix
-        val tableLocation = readBack.storage.locationUri.get.getPath
-        val expectedLocation = tempDir.toURI.getPath.stripSuffix("/")
-        assert(tableLocation == expectedLocation)
-      }
-    }
-  }
-
-  test("make sure we can alter table location created by old version of Spark") {
-    withTempDir { dir =>
-      for ((tbl, _) <- rawTablesAndExpectations if tbl.tableType == CatalogTableType.EXTERNAL) {
-        val path = dir.toURI.toString.stripSuffix("/")
-        sql(s"ALTER TABLE ${tbl.identifier} SET LOCATION '$path'")
-
-        val readBack = getTableMetadata(tbl.identifier.table)
-
-        // trim the URI prefix
-        val actualTableLocation = readBack.storage.locationUri.get.getPath
-        val expected = dir.toURI.getPath.stripSuffix("/")
-        assert(actualTableLocation == expected)
-      }
-    }
-  }
-
-  test("make sure we can rename table created by old version of Spark") {
-    for ((tbl, expectedSchema) <- rawTablesAndExpectations) {
-      val newName = tbl.identifier.table + "_renamed"
-      sql(s"ALTER TABLE ${tbl.identifier} RENAME TO $newName")
-
-      val readBack = getTableMetadata(newName)
-      assert(readBack.schema.sameType(expectedSchema))
-
-      // trim the URI prefix
-      val actualTableLocation = readBack.storage.locationUri.get.getPath
-      val expectedLocation = if (tbl.tableType == CatalogTableType.EXTERNAL) {
-        tempDir.toURI.getPath.stripSuffix("/")
-      } else {
-        // trim the URI prefix
-        defaultTableURI(newName).getPath
-      }
-      assert(actualTableLocation == expectedLocation)
-    }
-  }
-}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
new file mode 100644
index 0000000000000..2928a734a7e36
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import java.io.File
+import java.nio.file.Files
+
+import org.apache.spark.TestUtils
+import org.apache.spark.sql.{QueryTest, Row, SparkSession}
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.catalog.CatalogTableType
+import org.apache.spark.sql.test.SQLTestUtils
+import org.apache.spark.util.Utils
+
+/**
+ * Test HiveExternalCatalog backward compatibility.
+ *
+ * Note that, this test suite will automatically download spark binary packages of different
+ * versions to a local directory `/tmp/spark-test`. If there is already a spark folder with
+ * expected version under this local directory, e.g. `/tmp/spark-test/spark-2.0.3`, we will skip the
+ * downloading for this spark version.
+ */
+class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
+  private val wareHousePath = Utils.createTempDir(namePrefix = "warehouse")
+  private val tmpDataDir = Utils.createTempDir(namePrefix = "test-data")
+  private val sparkTestingDir = "/tmp/spark-test"
+  private val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
+
+  override def afterAll(): Unit = {
+    Utils.deleteRecursively(wareHousePath)
+    Utils.deleteRecursively(tmpDataDir)
+    super.afterAll()
+  }
+
+  private def downloadSpark(version: String): Unit = {
+    import scala.sys.process._
+
+    val url = s"https://d3kbcqa49mib13.cloudfront.net/spark-$version-bin-hadoop2.7.tgz"
+
+    Seq("wget", url, "-q", "-P", sparkTestingDir).!
+
+    val downloaded = new File(sparkTestingDir, s"spark-$version-bin-hadoop2.7.tgz").getCanonicalPath
+    val targetDir = new File(sparkTestingDir, s"spark-$version").getCanonicalPath
+
+    Seq("mkdir", targetDir).!
+
+    Seq("tar", "-xzf", downloaded, "-C", targetDir, "--strip-components=1").!
+
+    Seq("rm", downloaded).!
+  }
+
+  private def genDataDir(name: String): String = {
+    new File(tmpDataDir, name).getCanonicalPath
+  }
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+
+    val tempPyFile = File.createTempFile("test", ".py")
+    Files.write(tempPyFile.toPath,
+      s"""
+        |from pyspark.sql import SparkSession
+        |
+        |spark = SparkSession.builder.enableHiveSupport().getOrCreate()
+        |version_index = spark.conf.get("spark.sql.test.version.index", None)
+        |
+        |spark.sql("create table data_source_tbl_{} using json as select 1 i".format(version_index))
+        |
+        |spark.sql("create table hive_compatible_data_source_tbl_" + version_index + \\
+        |          " using parquet as select 1 i")
+        |
+        |json_file = "${genDataDir("json_")}" + str(version_index)
+        |spark.range(1, 2).selectExpr("cast(id as int) as i").write.json(json_file)
+        |spark.sql("create table external_data_source_tbl_" + version_index + \\
+        |          "(i int) using json options (path '{}')".format(json_file))
+        |
+        |parquet_file = "${genDataDir("parquet_")}" + str(version_index)
+        |spark.range(1, 2).selectExpr("cast(id as int) as i").write.parquet(parquet_file)
+        |spark.sql("create table hive_compatible_external_data_source_tbl_" + version_index + \\
+        |          "(i int) using parquet options (path '{}')".format(parquet_file))
+        |
+        |json_file2 = "${genDataDir("json2_")}" + str(version_index)
+        |spark.range(1, 2).selectExpr("cast(id as int) as i").write.json(json_file2)
+        |spark.sql("create table external_table_without_schema_" + version_index + \\
+        |          " using json options (path '{}')".format(json_file2))
+        |
+        |spark.sql("create view v_{} as select 1 i".format(version_index))
+      """.stripMargin.getBytes("utf8"))
+
+    PROCESS_TABLES.testingVersions.zipWithIndex.foreach { case (version, index) =>
+      val sparkHome = new File(sparkTestingDir, s"spark-$version")
+      if (!sparkHome.exists()) {
+        downloadSpark(version)
+      }
+
+      val args = Seq(
+        "--name", "prepare testing tables",
+        "--master", "local[2]",
+        "--conf", "spark.ui.enabled=false",
+        "--conf", "spark.master.rest.enabled=false",
+        "--conf", s"spark.sql.warehouse.dir=${wareHousePath.getCanonicalPath}",
+        "--conf", s"spark.sql.test.version.index=$index",
+        "--driver-java-options", s"-Dderby.system.home=${wareHousePath.getCanonicalPath}",
+        tempPyFile.getCanonicalPath)
+      runSparkSubmit(args, Some(sparkHome.getCanonicalPath))
+    }
+
+    tempPyFile.delete()
+  }
+
+  test("backward compatibility") {
+    val args = Seq(
+      "--class", PROCESS_TABLES.getClass.getName.stripSuffix("$"),
+      "--name", "HiveExternalCatalog backward compatibility test",
+      "--master", "local[2]",
+      "--conf", "spark.ui.enabled=false",
+      "--conf", "spark.master.rest.enabled=false",
+      "--conf", s"spark.sql.warehouse.dir=${wareHousePath.getCanonicalPath}",
+      "--driver-java-options", s"-Dderby.system.home=${wareHousePath.getCanonicalPath}",
+      unusedJar.toString)
+    runSparkSubmit(args)
+  }
+}
+
+object PROCESS_TABLES extends QueryTest with SQLTestUtils {
+  // Tests the latest version of every release line.
+  val testingVersions = Seq("2.0.2", "2.1.1", "2.2.0")
+
+  protected var spark: SparkSession = _
+
+  def main(args: Array[String]): Unit = {
+    val session = SparkSession.builder()
+      .enableHiveSupport()
+      .getOrCreate()
+    spark = session
+
+    testingVersions.indices.foreach { index =>
+      Seq(
+        s"data_source_tbl_$index",
+        s"hive_compatible_data_source_tbl_$index",
+        s"external_data_source_tbl_$index",
+        s"hive_compatible_external_data_source_tbl_$index",
+        s"external_table_without_schema_$index").foreach { tbl =>
+        val tableMeta = spark.sharedState.externalCatalog.getTable("default", tbl)
+
+        // make sure we can insert and query these tables.
+        session.sql(s"insert into $tbl select 2")
+        checkAnswer(session.sql(s"select * from $tbl"), Row(1) :: Row(2) :: Nil)
+        checkAnswer(session.sql(s"select i from $tbl where i > 1"), Row(2))
+
+        // make sure we can rename table.
+        val newName = tbl + "_renamed"
+        sql(s"ALTER TABLE $tbl RENAME TO $newName")
+        val readBack = spark.sharedState.externalCatalog.getTable("default", newName)
+
+        val actualTableLocation = readBack.storage.locationUri.get.getPath
+        val expectedLocation = if (tableMeta.tableType == CatalogTableType.EXTERNAL) {
+          tableMeta.storage.locationUri.get.getPath
+        } else {
+          spark.sessionState.catalog.defaultTablePath(TableIdentifier(newName, None)).getPath
+        }
+        assert(actualTableLocation == expectedLocation)
+
+        // make sure we can alter table location.
+        withTempDir { dir =>
+          val path = dir.toURI.toString.stripSuffix("/")
+          sql(s"ALTER TABLE ${tbl}_renamed SET LOCATION '$path'")
+          val readBack = spark.sharedState.externalCatalog.getTable("default", tbl + "_renamed")
+          val actualTableLocation = readBack.storage.locationUri.get.getPath
+          val expected = dir.toURI.getPath.stripSuffix("/")
+          assert(actualTableLocation == expected)
+        }
+      }
+
+      // test permanent view
+      checkAnswer(sql(s"select i from v_$index"), Row(1))
+    }
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index 5f15a705a2e99..cf145c845eef0 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -18,17 +18,11 @@
 package org.apache.spark.sql.hive
 
 import java.io.{BufferedWriter, File, FileWriter}
-import java.sql.Timestamp
-import java.util.Date
 
-import scala.collection.mutable.ArrayBuffer
 import scala.tools.nsc.Properties
 
 import org.apache.hadoop.fs.Path
 import org.scalatest.{BeforeAndAfterEach, Matchers}
-import org.scalatest.concurrent.Timeouts
-import org.scalatest.exceptions.TestFailedDueToTimeoutException
-import org.scalatest.time.SpanSugar._
 
 import org.apache.spark._
 import org.apache.spark.internal.Logging
@@ -38,7 +32,6 @@ import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.expressions.Window
 import org.apache.spark.sql.hive.test.{TestHive, TestHiveContext}
-import org.apache.spark.sql.test.ProcessTestUtils.ProcessOutputCapturer
 import org.apache.spark.sql.types.{DecimalType, StructType}
 import org.apache.spark.util.{ResetSystemProperties, Utils}
 
@@ -46,11 +39,10 @@ import org.apache.spark.util.{ResetSystemProperties, Utils}
  * This suite tests spark-submit with applications using HiveContext.
  */
 class HiveSparkSubmitSuite
-  extends SparkFunSuite
+  extends SparkSubmitTestUtils
   with Matchers
   with BeforeAndAfterEach
-  with ResetSystemProperties
-  with Timeouts {
+  with ResetSystemProperties {
 
   // TODO: rewrite these or mark them as slow tests to be run sparingly
 
@@ -333,71 +325,6 @@ class HiveSparkSubmitSuite
       unusedJar.toString)
     runSparkSubmit(argsForShowTables)
   }
-
-  // NOTE: This is an expensive operation in terms of time (10 seconds+). Use sparingly.
-  // This is copied from org.apache.spark.deploy.SparkSubmitSuite
-  private def runSparkSubmit(args: Seq[String]): Unit = {
-    val sparkHome = sys.props.getOrElse("spark.test.home", fail("spark.test.home is not set!"))
-    val history = ArrayBuffer.empty[String]
-    val sparkSubmit = if (Utils.isWindows) {
-      // On Windows, `ProcessBuilder.directory` does not change the current working directory.
-      new File("..\\..\\bin\\spark-submit.cmd").getAbsolutePath
-    } else {
-      "./bin/spark-submit"
-    }
-    val commands = Seq(sparkSubmit) ++ args
-    val commandLine = commands.mkString("'", "' '", "'")
-
-    val builder = new ProcessBuilder(commands: _*).directory(new File(sparkHome))
-    val env = builder.environment()
-    env.put("SPARK_TESTING", "1")
-    env.put("SPARK_HOME", sparkHome)
-
-    def captureOutput(source: String)(line: String): Unit = {
-      // This test suite has some weird behaviors when executed on Jenkins:
-      //
-      // 1. Sometimes it gets extremely slow out of unknown reason on Jenkins.  Here we add a
-      //    timestamp to provide more diagnosis information.
-      // 2. Log lines are not correctly redirected to unit-tests.log as expected, so here we print
-      //    them out for debugging purposes.
-      val logLine = s"${new Timestamp(new Date().getTime)} - $source> $line"
-      // scalastyle:off println
-      println(logLine)
-      // scalastyle:on println
-      history += logLine
-    }
-
-    val process = builder.start()
-    new ProcessOutputCapturer(process.getInputStream, captureOutput("stdout")).start()
-    new ProcessOutputCapturer(process.getErrorStream, captureOutput("stderr")).start()
-
-    try {
-      val exitCode = failAfter(300.seconds) { process.waitFor() }
-      if (exitCode != 0) {
-        // include logs in output. Note that logging is async and may not have completed
-        // at the time this exception is raised
-        Thread.sleep(1000)
-        val historyLog = history.mkString("\n")
-        fail {
-          s"""spark-submit returned with exit code $exitCode.
-             |Command line: $commandLine
-             |
-             |$historyLog
-           """.stripMargin
-        }
-      }
-    } catch {
-      case to: TestFailedDueToTimeoutException =>
-        val historyLog = history.mkString("\n")
-        fail(s"Timeout of $commandLine" +
-            s" See the log4j logs for more detail." +
-            s"\n$historyLog", to)
-        case t: Throwable => throw t
-    } finally {
-      // Ensure we still kill the process in case it timed out
-      process.destroy()
-    }
-  }
 }
 
 object SetMetastoreURLTest extends Logging {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index 06a30b726549e..07d641d72e709 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -1359,31 +1359,4 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
       sparkSession.sparkContext.conf.set(DEBUG_MODE, previousValue)
     }
   }
-
-  test("SPARK-18464: support old table which doesn't store schema in table properties") {
-    withTable("old") {
-      withTempPath { path =>
-        Seq(1 -> "a").toDF("i", "j").write.parquet(path.getAbsolutePath)
-        val tableDesc = CatalogTable(
-          identifier = TableIdentifier("old", Some("default")),
-          tableType = CatalogTableType.EXTERNAL,
-          storage = CatalogStorageFormat.empty.copy(
-            properties = Map("path" -> path.getAbsolutePath)
-          ),
-          schema = new StructType(),
-          provider = Some("parquet"),
-          properties = Map(
-            HiveExternalCatalog.DATASOURCE_PROVIDER -> "parquet"))
-        hiveClient.createTable(tableDesc, ignoreIfExists = false)
-
-        checkAnswer(spark.table("old"), Row(1, "a"))
-        checkAnswer(sql("select * from old"), Row(1, "a"))
-
-        val expectedSchema = StructType(Seq(
-          StructField("i", IntegerType, nullable = true),
-          StructField("j", StringType, nullable = true)))
-        assert(table("old").schema === expectedSchema)
-      }
-    }
-  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/SparkSubmitTestUtils.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/SparkSubmitTestUtils.scala
new file mode 100644
index 0000000000000..4b28d4f362b80
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/SparkSubmitTestUtils.scala
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import java.io.File
+import java.sql.Timestamp
+import java.util.Date
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.scalatest.concurrent.Timeouts
+import org.scalatest.exceptions.TestFailedDueToTimeoutException
+import org.scalatest.time.SpanSugar._
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.test.ProcessTestUtils.ProcessOutputCapturer
+import org.apache.spark.util.Utils
+
+trait SparkSubmitTestUtils extends SparkFunSuite with Timeouts {
+
+  // NOTE: This is an expensive operation in terms of time (10 seconds+). Use sparingly.
+  // This is copied from org.apache.spark.deploy.SparkSubmitSuite
+  protected def runSparkSubmit(args: Seq[String], sparkHomeOpt: Option[String] = None): Unit = {
+    val sparkHome = sparkHomeOpt.getOrElse(
+      sys.props.getOrElse("spark.test.home", fail("spark.test.home is not set!")))
+    val history = ArrayBuffer.empty[String]
+    val sparkSubmit = if (Utils.isWindows) {
+      // On Windows, `ProcessBuilder.directory` does not change the current working directory.
+      new File("..\\..\\bin\\spark-submit.cmd").getAbsolutePath
+    } else {
+      "./bin/spark-submit"
+    }
+    val commands = Seq(sparkSubmit) ++ args
+    val commandLine = commands.mkString("'", "' '", "'")
+
+    val builder = new ProcessBuilder(commands: _*).directory(new File(sparkHome))
+    val env = builder.environment()
+    env.put("SPARK_TESTING", "1")
+    env.put("SPARK_HOME", sparkHome)
+
+    def captureOutput(source: String)(line: String): Unit = {
+      // This test suite has some weird behaviors when executed on Jenkins:
+      //
+      // 1. Sometimes it gets extremely slow out of unknown reason on Jenkins.  Here we add a
+      //    timestamp to provide more diagnosis information.
+      // 2. Log lines are not correctly redirected to unit-tests.log as expected, so here we print
+      //    them out for debugging purposes.
+      val logLine = s"${new Timestamp(new Date().getTime)} - $source> $line"
+      // scalastyle:off println
+      println(logLine)
+      // scalastyle:on println
+      history += logLine
+    }
+
+    val process = builder.start()
+    new ProcessOutputCapturer(process.getInputStream, captureOutput("stdout")).start()
+    new ProcessOutputCapturer(process.getErrorStream, captureOutput("stderr")).start()
+
+    try {
+      val exitCode = failAfter(300.seconds) { process.waitFor() }
+      if (exitCode != 0) {
+        // include logs in output. Note that logging is async and may not have completed
+        // at the time this exception is raised
+        Thread.sleep(1000)
+        val historyLog = history.mkString("\n")
+        fail {
+          s"""spark-submit returned with exit code $exitCode.
+             |Command line: $commandLine
+             |
+             |$historyLog
+           """.stripMargin
+        }
+      }
+    } catch {
+      case to: TestFailedDueToTimeoutException =>
+        val historyLog = history.mkString("\n")
+        fail(s"Timeout of $commandLine" +
+          s" See the log4j logs for more detail." +
+          s"\n$historyLog", to)
+      case t: Throwable => throw t
+    } finally {
+      // Ensure we still kill the process in case it timed out
+      process.destroy()
+    }
+  }
+}

From 9ae7c96ce33d3d67f49059b5b83ef1d9d3d8e8e5 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Fri, 8 Sep 2017 09:39:20 -0700
Subject: [PATCH 1174/1204] [SPARK-21946][TEST] fix flaky test: "alter table:
 rename cached table" in InMemoryCatalogedDDLSuite

## What changes were proposed in this pull request?

This PR fixes flaky test `InMemoryCatalogedDDLSuite "alter table: rename cached table"`.
Since this test validates distributed DataFrame, the result should be checked by using `checkAnswer`. The original version used `df.collect().Seq` method that does not guaranty an order of each element of the result.

## How was this patch tested?

Use existing test case

Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com>

Closes #19159 from kiszk/SPARK-21946.

(cherry picked from commit 8a4f228dc0afed7992695486ecab6bc522f1e392)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../org/apache/spark/sql/execution/command/DDLSuite.scala     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 56d2937ccc6ff..5109c649f4318 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -758,7 +758,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     val df = (1 to 2).map { i => (i, i.toString) }.toDF("age", "name")
     df.write.insertInto("students")
     spark.catalog.cacheTable("students")
-    assume(spark.table("students").collect().toSeq == df.collect().toSeq, "bad test: wrong data")
+    checkAnswer(spark.table("students"), df)
     assume(spark.catalog.isCached("students"), "bad test: table was not cached in the first place")
     sql("ALTER TABLE students RENAME TO teachers")
     sql("CREATE TABLE students (age INT, name STRING) USING parquet")
@@ -767,7 +767,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     assert(!spark.catalog.isCached("students"))
     assert(spark.catalog.isCached("teachers"))
     assert(spark.table("students").collect().isEmpty)
-    assert(spark.table("teachers").collect().toSeq == df.collect().toSeq)
+    checkAnswer(spark.table("teachers"), df)
   }
 
   test("rename temporary table - destination table with database name") {

From 9876821603ec12e77ee58e8ef6f5841c9c310c93 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Fri, 8 Sep 2017 09:47:45 -0700
Subject: [PATCH 1175/1204] [SPARK-21128][R][BACKPORT-2.2] Remove both
 "spark-warehouse" and "metastore_db" before listing files in R tests

## What changes were proposed in this pull request?

This PR proposes to list the files in test _after_ removing both "spark-warehouse" and "metastore_db" so that the next run of R tests pass fine. This is sometimes a bit annoying.

## How was this patch tested?

Manually running multiple times R tests via `./R/run-tests.sh`.

**Before**

Second run:

```
SparkSQL functions: Spark package found in SPARK_HOME: .../spark
...............................................................................................................................................................
...............................................................................................................................................................
...............................................................................................................................................................
...............................................................................................................................................................
...............................................................................................................................................................
....................................................................................................1234.......................

Failed -------------------------------------------------------------------------
1. Failure: No extra files are created in SPARK_HOME by starting session and making calls (test_sparkSQL.R#3384)
length(list1) not equal to length(list2).
1/1 mismatches
[1] 25 - 23 == 2

2. Failure: No extra files are created in SPARK_HOME by starting session and making calls (test_sparkSQL.R#3384)
sort(list1, na.last = TRUE) not equal to sort(list2, na.last = TRUE).
10/25 mismatches
x[16]: "metastore_db"
y[16]: "pkg"

x[17]: "pkg"
y[17]: "R"

x[18]: "R"
y[18]: "README.md"

x[19]: "README.md"
y[19]: "run-tests.sh"

x[20]: "run-tests.sh"
y[20]: "SparkR_2.2.0.tar.gz"

x[21]: "metastore_db"
y[21]: "pkg"

x[22]: "pkg"
y[22]: "R"

x[23]: "R"
y[23]: "README.md"

x[24]: "README.md"
y[24]: "run-tests.sh"

x[25]: "run-tests.sh"
y[25]: "SparkR_2.2.0.tar.gz"

3. Failure: No extra files are created in SPARK_HOME by starting session and making calls (test_sparkSQL.R#3388)
length(list1) not equal to length(list2).
1/1 mismatches
[1] 25 - 23 == 2

4. Failure: No extra files are created in SPARK_HOME by starting session and making calls (test_sparkSQL.R#3388)
sort(list1, na.last = TRUE) not equal to sort(list2, na.last = TRUE).
10/25 mismatches
x[16]: "metastore_db"
y[16]: "pkg"

x[17]: "pkg"
y[17]: "R"

x[18]: "R"
y[18]: "README.md"

x[19]: "README.md"
y[19]: "run-tests.sh"

x[20]: "run-tests.sh"
y[20]: "SparkR_2.2.0.tar.gz"

x[21]: "metastore_db"
y[21]: "pkg"

x[22]: "pkg"
y[22]: "R"

x[23]: "R"
y[23]: "README.md"

x[24]: "README.md"
y[24]: "run-tests.sh"

x[25]: "run-tests.sh"
y[25]: "SparkR_2.2.0.tar.gz"

DONE ===========================================================================
```

**After**

Second run:

```
SparkSQL functions: Spark package found in SPARK_HOME: .../spark
...............................................................................................................................................................
...............................................................................................................................................................
...............................................................................................................................................................
...............................................................................................................................................................
...............................................................................................................................................................
...............................................................................................................................
```

Author: hyukjinkwon <gurwls223gmail.com>

Closes #18335 from HyukjinKwon/SPARK-21128.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #19166 from felixcheung/rbackport21128.
---
 R/pkg/tests/run-all.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/pkg/tests/run-all.R b/R/pkg/tests/run-all.R
index f00a610679752..0aefd8006caa4 100644
--- a/R/pkg/tests/run-all.R
+++ b/R/pkg/tests/run-all.R
@@ -30,10 +30,10 @@ if (.Platform$OS.type == "windows") {
 install.spark()
 
 sparkRDir <- file.path(Sys.getenv("SPARK_HOME"), "R")
-sparkRFilesBefore <- list.files(path = sparkRDir, all.files = TRUE)
 sparkRWhitelistSQLDirs <- c("spark-warehouse", "metastore_db")
 invisible(lapply(sparkRWhitelistSQLDirs,
                  function(x) { unlink(file.path(sparkRDir, x), recursive = TRUE, force = TRUE)}))
+sparkRFilesBefore <- list.files(path = sparkRDir, all.files = TRUE)
 
 sparkRTestMaster <- "local[1]"
 if (identical(Sys.getenv("NOT_CRAN"), "true")) {

From 182478e030688b602bf95edfd82f700d6f5678d1 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Sat, 9 Sep 2017 19:10:52 +0900
Subject: [PATCH 1176/1204] [SPARK-21954][SQL] JacksonUtils should verify
 MapType's value type instead of key type

## What changes were proposed in this pull request?

`JacksonUtils.verifySchema` verifies if a data type can be converted to JSON. For `MapType`, it now verifies the key type. However, in `JacksonGenerator`, when converting a map to JSON, we only care about its values and create a writer for the values. The keys in a map are treated as strings by calling `toString` on the keys.

Thus, we should change `JacksonUtils.verifySchema` to verify the value type of `MapType`.

## How was this patch tested?

Added tests.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #19167 from viirya/test-jacksonutils.

(cherry picked from commit 6b45d7e941eba8a36be26116787322d9e3ae25d0)
Signed-off-by: hyukjinkwon <gurwls223@gmail.com>
---
 .../sql/catalyst/json/JacksonUtils.scala      |  4 +++-
 .../expressions/JsonExpressionsSuite.scala    | 23 ++++++++++++++++++
 .../apache/spark/sql/JsonFunctionsSuite.scala | 24 ++++++++++++++++---
 3 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonUtils.scala
index 3b23c6cd2816f..134d16e981a15 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonUtils.scala
@@ -44,7 +44,9 @@ object JacksonUtils {
 
       case at: ArrayType => verifyType(name, at.elementType)
 
-      case mt: MapType => verifyType(name, mt.keyType)
+      // For MapType, its keys are treated as a string (i.e. calling `toString`) basically when
+      // generating JSON, so we only care if the values are valid for JSON.
+      case mt: MapType => verifyType(name, mt.valueType)
 
       case udt: UserDefinedType[_] => verifyType(name, udt.sqlType)
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
index f892e80204603..53b54de606930 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
@@ -21,6 +21,7 @@ import java.util.Calendar
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.errors.TreeNodeException
 import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils, GenericArrayData, PermissiveMode}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
@@ -590,4 +591,26 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       """{"t":"2015-12-31T16:00:00"}"""
     )
   }
+
+  test("to_json: verify MapType's value type instead of key type") {
+    // Keys in map are treated as strings when converting to JSON. The type doesn't matter at all.
+    val mapType1 = MapType(CalendarIntervalType, IntegerType)
+    val schema1 = StructType(StructField("a", mapType1) :: Nil)
+    val struct1 = Literal.create(null, schema1)
+    checkEvaluation(
+      StructsToJson(Map.empty, struct1, gmtId),
+      null
+    )
+
+    // The value type must be valid for converting to JSON.
+    val mapType2 = MapType(IntegerType, CalendarIntervalType)
+    val schema2 = StructType(StructField("a", mapType2) :: Nil)
+    val struct2 = Literal.create(null, schema2)
+    intercept[TreeNodeException[_]] {
+      checkEvaluation(
+        StructsToJson(Map.empty, struct2, gmtId),
+        null
+      )
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index 69a500c845a7b..989f8c23a4069 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql
 
-import org.apache.spark.sql.functions.{from_json, struct, to_json}
+import org.apache.spark.sql.functions.{from_json, lit, map, struct, to_json}
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
 
@@ -188,15 +188,33 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext {
       Row("""{"_1":"26/08/2015 18:00"}""") :: Nil)
   }
 
-  test("to_json unsupported type") {
+  test("to_json - key types of map don't matter") {
+    // interval type is invalid for converting to JSON. However, the keys of a map are treated
+    // as strings, so its type doesn't matter.
     val df = Seq(Tuple1(Tuple1("interval -3 month 7 hours"))).toDF("a")
-      .select(struct($"a._1".cast(CalendarIntervalType).as("a")).as("c"))
+      .select(struct(map($"a._1".cast(CalendarIntervalType), lit("a")).as("col1")).as("c"))
+    checkAnswer(
+      df.select(to_json($"c")),
+      Row("""{"col1":{"interval -3 months 7 hours":"a"}}""") :: Nil)
+  }
+
+  test("to_json unsupported type") {
+    val baseDf = Seq(Tuple1(Tuple1("interval -3 month 7 hours"))).toDF("a")
+    val df = baseDf.select(struct($"a._1".cast(CalendarIntervalType).as("a")).as("c"))
     val e = intercept[AnalysisException]{
       // Unsupported type throws an exception
       df.select(to_json($"c")).collect()
     }
     assert(e.getMessage.contains(
       "Unable to convert column a of type calendarinterval to JSON."))
+
+    // interval type is invalid for converting to JSON. We can't use it as value type of a map.
+    val df2 = baseDf
+      .select(struct(map(lit("a"), $"a._1".cast(CalendarIntervalType)).as("col1")).as("c"))
+    val e2 = intercept[AnalysisException] {
+      df2.select(to_json($"c")).collect()
+    }
+    assert(e2.getMessage.contains("Unable to convert column col1 of type calendarinterval to JSON"))
   }
 
   test("roundtrip in to_json and from_json - struct") {

From b1b5a7fdc0f8fabfb235f0b31bde0f1bfb71591a Mon Sep 17 00:00:00 2001
From: Peter Szalai <szalaipeti.vagyok@gmail.com>
Date: Sun, 10 Sep 2017 17:47:45 +0900
Subject: [PATCH 1177/1204] [SPARK-20098][PYSPARK] dataType's typeName fix

## What changes were proposed in this pull request?
`typeName`  classmethod has been fixed by using type -> typeName map.

## How was this patch tested?
local build

Author: Peter Szalai <szalaipeti.vagyok@gmail.com>

Closes #17435 from szalai1/datatype-gettype-fix.

(cherry picked from commit 520d92a191c3148498087d751aeeddd683055622)
Signed-off-by: hyukjinkwon <gurwls223@gmail.com>
---
 python/pyspark/sql/tests.py | 4 ++++
 python/pyspark/sql/types.py | 5 +++++
 2 files changed, 9 insertions(+)

diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index a100dc07a161e..39655a50dc80c 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -188,6 +188,10 @@ def test_empty_row(self):
         row = Row()
         self.assertEqual(len(row), 0)
 
+    def test_struct_field_type_name(self):
+        struct_field = StructField("a", IntegerType())
+        self.assertRaises(TypeError, struct_field.typeName)
+
 
 class SQLTests(ReusedPySparkTestCase):
 
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 26b54a7fb3709..d9206dd14ca2d 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -438,6 +438,11 @@ def toInternal(self, obj):
     def fromInternal(self, obj):
         return self.dataType.fromInternal(obj)
 
+    def typeName(self):
+        raise TypeError(
+            "StructField does not have typeName. "
+            "Use typeName on its type explicitly instead.")
+
 
 class StructType(DataType):
     """Struct type, consisting of a list of :class:`StructField`.

From 10c68366e5474f131f7ea294e6abee4e02fca9f3 Mon Sep 17 00:00:00 2001
From: FavioVazquez <favio.vazquezp@gmail.com>
Date: Tue, 12 Sep 2017 10:33:35 +0100
Subject: [PATCH 1178/1204] [SPARK-21976][DOC] Fix wrong documentation for Mean
 Absolute Error.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## What changes were proposed in this pull request?

Fixed wrong documentation for Mean Absolute Error.

Even though the code is correct for the MAE:

```scala
Since("1.2.0")
  def meanAbsoluteError: Double = {
    summary.normL1(1) / summary.count
  }
```
In the documentation the division by N is missing.

## How was this patch tested?

All of spark tests were run.

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: FavioVazquez <favio.vazquezp@gmail.com>
Author: faviovazquez <favio.vazquezp@gmail.com>
Author: Favio André Vázquez <favio.vazquezp@gmail.com>

Closes #19190 from FavioVazquez/mae-fix.

(cherry picked from commit e2ac2f1c71a0f8b03743d0d916dc0ef28482a393)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/mllib-evaluation-metrics.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/mllib-evaluation-metrics.md b/docs/mllib-evaluation-metrics.md
index ac82f43cfb79d..7f277543d2e9a 100644
--- a/docs/mllib-evaluation-metrics.md
+++ b/docs/mllib-evaluation-metrics.md
@@ -549,7 +549,7 @@ variable from a number of independent variables.
     </tr>
     <tr>
       <td>Mean Absolute Error (MAE)</td>
-      <td>$MAE=\sum_{i=0}^{N-1} \left|\mathbf{y}_i - \hat{\mathbf{y}}_i\right|$</td>
+      <td>$MAE=\frac{1}{N}\sum_{i=0}^{N-1} \left|\mathbf{y}_i - \hat{\mathbf{y}}_i\right|$</td>
     </tr>
     <tr>
       <td>Coefficient of Determination $(R^2)$</td>

From 63098dc3170bf4289091d97b7beb63dd0e2356c5 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.co.jp>
Date: Tue, 12 Sep 2017 15:07:04 +0100
Subject: [PATCH 1179/1204] [DOCS] Fix unreachable links in the document

## What changes were proposed in this pull request?

Recently, I found two unreachable links in the document and fixed them.
Because of small changes related to the document, I don't file this issue in JIRA but please suggest I should do it if you think it's needed.

## How was this patch tested?

Tested manually.

Author: Kousuke Saruta <sarutak@oss.nttdata.co.jp>

Closes #19195 from sarutak/fix-unreachable-link.

(cherry picked from commit 957558235b7537c706c6ab4779655aa57838ebac)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 docs/building-spark.md        | 2 +-
 docs/rdd-programming-guide.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/building-spark.md b/docs/building-spark.md
index 777635a64f83c..14164f16c08d2 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -119,7 +119,7 @@ should run continuous compilation (i.e. wait for changes). However, this has not
 extensively. A couple of gotchas to note:
 
 * it only scans the paths `src/main` and `src/test` (see
-[docs](http://scala-tools.org/mvnsites/maven-scala-plugin/usage_cc.html)), so it will only work
+[docs](http://davidb.github.io/scala-maven-plugin/example_cc.html)), so it will only work
 from within certain submodules that have that structure.
 
 * you'll typically need to run `mvn install` from the project root for compilation within
diff --git a/docs/rdd-programming-guide.md b/docs/rdd-programming-guide.md
index 8e6c36b8dc226..e3a31b821fa31 100644
--- a/docs/rdd-programming-guide.md
+++ b/docs/rdd-programming-guide.md
@@ -604,7 +604,7 @@ before the `reduce`, which would cause `lineLengths` to be saved in memory after
 Spark's API relies heavily on passing functions in the driver program to run on the cluster.
 There are two recommended ways to do this:
 
-* [Anonymous function syntax](http://docs.scala-lang.org/tutorials/tour/anonymous-function-syntax.html),
+* [Anonymous function syntax](http://docs.scala-lang.org/tour/basics.html#functions),
   which can be used for short pieces of code.
 * Static methods in a global singleton object. For example, you can define `object MyFunctions` and then
   pass `MyFunctions.func1`, as follows:

From b606dc177e177bdbf99e72638eb8baec12e9fb53 Mon Sep 17 00:00:00 2001
From: Zheng RuiFeng <ruifengz@foxmail.com>
Date: Tue, 12 Sep 2017 11:37:05 -0700
Subject: [PATCH 1180/1204] [SPARK-18608][ML] Fix double caching

## What changes were proposed in this pull request?
`df.rdd.getStorageLevel` => `df.storageLevel`

using cmd `find . -name '*.scala' | xargs -i bash -c 'egrep -in "\.rdd\.getStorageLevel" {} && echo {}'` to make sure all algs involved in this issue are fixed.

Previous discussion in other PRs: https://github.com/apache/spark/pull/19107, https://github.com/apache/spark/pull/17014

## How was this patch tested?
existing tests

Author: Zheng RuiFeng <ruifengz@foxmail.com>

Closes #19197 from zhengruifeng/double_caching.

(cherry picked from commit c5f9b89dda40ffaa4622a7ba2b3d0605dbe815c0)
Signed-off-by: Joseph K. Bradley <joseph@databricks.com>
---
 .../apache/spark/ml/classification/LogisticRegression.scala   | 2 +-
 .../scala/org/apache/spark/ml/classification/OneVsRest.scala  | 4 ++--
 .../main/scala/org/apache/spark/ml/clustering/KMeans.scala    | 2 +-
 .../apache/spark/ml/regression/AFTSurvivalRegression.scala    | 2 +-
 .../org/apache/spark/ml/regression/IsotonicRegression.scala   | 2 +-
 .../org/apache/spark/ml/regression/LinearRegression.scala     | 2 +-
 6 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 1de237309aeae..e7f99fccf6a22 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -483,7 +483,7 @@ class LogisticRegression @Since("1.2.0") (
   }
 
   override protected[spark] def train(dataset: Dataset[_]): LogisticRegressionModel = {
-    val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE
+    val handlePersistence = dataset.storageLevel == StorageLevel.NONE
     train(dataset, handlePersistence)
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
index 05b8c3ab5456e..f3aff4c44e708 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
@@ -164,7 +164,7 @@ final class OneVsRestModel private[ml] (
     val newDataset = dataset.withColumn(accColName, initUDF())
 
     // persist if underlying dataset is not persistent.
-    val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE
+    val handlePersistence = dataset.storageLevel == StorageLevel.NONE
     if (handlePersistence) {
       newDataset.persist(StorageLevel.MEMORY_AND_DISK)
     }
@@ -347,7 +347,7 @@ final class OneVsRest @Since("1.4.0") (
     }
 
     // persist if underlying dataset is not persistent.
-    val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE
+    val handlePersistence = dataset.storageLevel == StorageLevel.NONE
     if (handlePersistence) {
       multiclassLabeled.persist(StorageLevel.MEMORY_AND_DISK)
     }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
index e02b532ca8a93..f2af7fe082b41 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
@@ -304,7 +304,7 @@ class KMeans @Since("1.5.0") (
   override def fit(dataset: Dataset[_]): KMeansModel = {
     transformSchema(dataset.schema, logging = true)
 
-    val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE
+    val handlePersistence = dataset.storageLevel == StorageLevel.NONE
     val instances: RDD[OldVector] = dataset.select(col($(featuresCol))).rdd.map {
       case Row(point: Vector) => OldVectors.fromML(point)
     }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
index 16821f317760e..4b46c3831d75f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
@@ -213,7 +213,7 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S
   override def fit(dataset: Dataset[_]): AFTSurvivalRegressionModel = {
     transformSchema(dataset.schema, logging = true)
     val instances = extractAFTPoints(dataset)
-    val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE
+    val handlePersistence = dataset.storageLevel == StorageLevel.NONE
     if (handlePersistence) instances.persist(StorageLevel.MEMORY_AND_DISK)
 
     val featuresSummarizer = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
index 529f66eadbcff..8faab52ea474b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
@@ -165,7 +165,7 @@ class IsotonicRegression @Since("1.5.0") (@Since("1.5.0") override val uid: Stri
     transformSchema(dataset.schema, logging = true)
     // Extract columns from data.  If dataset is persisted, do not persist oldDataset.
     val instances = extractWeightedLabeledPoints(dataset)
-    val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE
+    val handlePersistence = dataset.storageLevel == StorageLevel.NONE
     if (handlePersistence) instances.persist(StorageLevel.MEMORY_AND_DISK)
 
     val instr = Instrumentation.create(this, dataset)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index eaad54985229e..18fcedfdb7bba 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -236,7 +236,7 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
       return lrModel
     }
 
-    val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE
+    val handlePersistence = dataset.storageLevel == StorageLevel.NONE
     if (handlePersistence) instances.persist(StorageLevel.MEMORY_AND_DISK)
 
     val (featuresSummarizer, ySummarizer) = {

From 30e7298bb086c2aeaa2620e8f8c038c9cf25d404 Mon Sep 17 00:00:00 2001
From: Mark Hamstra <markhamstra@gmail.com>
Date: Tue, 12 Sep 2017 11:57:57 -0700
Subject: [PATCH 1181/1204] parquet versioning

---
 .../sql/catalyst/catalog/SessionCatalog.scala |  4 ++-
 .../spark/sql/hive/HiveMetastoreCatalog.scala | 30 +++++++++++++++++--
 2 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index df8f9aae1e1fe..f1e650762ef17 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -667,7 +667,9 @@ class SessionCatalog(
           SubqueryAlias(table, viewDef)
         }.getOrElse(throw new NoSuchTableException(db, table))
       } else if (name.database.isDefined || !tempTables.contains(table)) {
-        val metadata = externalCatalog.getTable(db, table)
+        val tableNamePreprocessor = externalCatalog.getTableNamePreprocessor
+        val tableNameInMetastore = tableNamePreprocessor(table)
+        val metadata = externalCatalog.getTable(db, tableNameInMetastore).withTableName(table)
         if (metadata.tableType == CatalogTableType.VIEW) {
           val viewText = metadata.viewText.getOrElse(sys.error("Invalid view without text."))
           // The relation is a view, so we wrap the relation by:
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index e1fee9af420dc..f8b1f47341af5 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -184,10 +184,16 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
       })
     } else {
       val rootPath = tablePath
+      val paths: Seq[Path] =
+        if (fileType != "parquet") {
+          Seq(rootPath)
+        } else {
+          selectParquetLocationDirectories(relation.tableMeta.identifier.table, Option(rootPath))
+        }
       withTableCreationLock(tableIdentifier, {
         val cached = getCached(
           tableIdentifier,
-          Seq(rootPath),
+          paths,
           metastoreSchema,
           fileFormatClass,
           None)
@@ -197,7 +203,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
             LogicalRelation(
               DataSource(
                 sparkSession = sparkSession,
-                paths = rootPath.toString :: Nil,
+                paths = paths.map(_.toString),
                 userSpecifiedSchema = Option(dataSchema),
                 // We don't support hive bucketed tables, only ones we write out.
                 bucketSpec = None,
@@ -222,6 +228,26 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
     result.copy(output = newOutput)
   }
 
+  private[hive] def selectParquetLocationDirectories(
+                                                      tableName: String,
+                                                      locationOpt: Option[Path]): Seq[Path] = {
+    val hadoopConf = sparkSession.sparkContext.hadoopConfiguration
+    val paths: Option[Seq[Path]] = for {
+      selector <- sparkSession.sharedState.externalCatalog.findHadoopFileSelector
+      location <- locationOpt
+      fs = location.getFileSystem(hadoopConf)
+      selectedPaths <- selector.selectFiles(tableName, fs, location)
+      selectedDir = for {
+        selectedPath <- selectedPaths
+        if selectedPath
+          .getFileSystem(hadoopConf)
+          .isDirectory(selectedPath)
+      } yield selectedPath
+      if selectedDir.nonEmpty
+    } yield selectedDir
+    paths.getOrElse(Seq(locationOpt.orNull))
+  }
+
   private def inferIfNeeded(
       relation: HiveTableRelation,
       options: Map[String, String],

From 7966c84e4edf76eef6e7cc7d3c93ad77708858d3 Mon Sep 17 00:00:00 2001
From: Mark Hamstra <markhamstra@gmail.com>
Date: Tue, 12 Sep 2017 14:01:27 -0700
Subject: [PATCH 1182/1204] style fix

---
 .../org/apache/spark/sql/hive/HiveMetastoreCatalog.scala      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index f8b1f47341af5..a87aa4877a128 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -229,8 +229,8 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
   }
 
   private[hive] def selectParquetLocationDirectories(
-                                                      tableName: String,
-                                                      locationOpt: Option[Path]): Seq[Path] = {
+      tableName: String,
+      locationOpt: Option[Path]): Seq[Path] = {
     val hadoopConf = sparkSession.sparkContext.hadoopConfiguration
     val paths: Option[Seq[Path]] = for {
       selector <- sparkSession.sharedState.externalCatalog.findHadoopFileSelector

From 3a692e355a786260c4a9c2ef210fe14e409af37a Mon Sep 17 00:00:00 2001
From: donnyzone <wellfengzhu@gmail.com>
Date: Wed, 13 Sep 2017 10:06:53 -0700
Subject: [PATCH 1183/1204] [SPARK-21980][SQL] References in grouping functions
 should be indexed with semanticEquals

## What changes were proposed in this pull request?

https://issues.apache.org/jira/browse/SPARK-21980

This PR fixes the issue in ResolveGroupingAnalytics rule, which indexes the column references in grouping functions without considering case sensitive configurations.

The problem can be reproduced by:

`val df = spark.createDataFrame(Seq((1, 1), (2, 1), (2, 2))).toDF("a", "b")
 df.cube("a").agg(grouping("A")).show()`

## How was this patch tested?
unit tests

Author: donnyzone <wellfengzhu@gmail.com>

Closes #19202 from DonnyZone/ResolveGroupingAnalytics.

(cherry picked from commit 21c4450fb24635fab6481a3756fefa9c6f6d6235)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../spark/sql/catalyst/analysis/Analyzer.scala   |  2 +-
 .../spark/sql/DataFrameAggregateSuite.scala      | 16 ++++++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 50c82f5d8f2ed..c970c2080b11f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -315,7 +315,7 @@ class Analyzer(
                 s"grouping columns (${groupByExprs.mkString(",")})")
           }
         case e @ Grouping(col: Expression) =>
-          val idx = groupByExprs.indexOf(col)
+          val idx = groupByExprs.indexWhere(_.semanticEquals(col))
           if (idx >= 0) {
             Alias(Cast(BitwiseAnd(ShiftRight(gid, Literal(groupByExprs.length - 1 - idx)),
               Literal(1)), ByteType), toPrettySQL(e))()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 5f65512b17855..f50c0cfcd00ed 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -186,6 +186,22 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
     )
   }
 
+  test("SPARK-21980: References in grouping functions should be indexed with semanticEquals") {
+    checkAnswer(
+      courseSales.cube("course", "year")
+        .agg(grouping("CouRse"), grouping("year")),
+      Row("Java", 2012, 0, 0) ::
+        Row("Java", 2013, 0, 0) ::
+        Row("Java", null, 0, 1) ::
+        Row("dotNET", 2012, 0, 0) ::
+        Row("dotNET", 2013, 0, 0) ::
+        Row("dotNET", null, 0, 1) ::
+        Row(null, 2012, 1, 0) ::
+        Row(null, 2013, 1, 0) ::
+        Row(null, null, 1, 1) :: Nil
+    )
+  }
+
   test("rollup overlapping columns") {
     checkAnswer(
       testData2.rollup($"a" + $"b" as "foo", $"b" as "bar").agg(sum($"a" - $"b") as "foo"),

From 51e5a821dcaa1d5f529afafc88cb8cfb4ad48e09 Mon Sep 17 00:00:00 2001
From: Yanbo Liang <ybliang8@gmail.com>
Date: Thu, 14 Sep 2017 14:09:44 +0800
Subject: [PATCH 1184/1204] [SPARK-18608][ML][FOLLOWUP] Fix double caching for
 PySpark OneVsRest.

## What changes were proposed in this pull request?
#19197 fixed double caching for MLlib algorithms, but missed PySpark ```OneVsRest```, this PR fixed it.

## How was this patch tested?
Existing tests.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #19220 from yanboliang/SPARK-18608.

(cherry picked from commit c76153cc7dd25b8de5266fe119095066be7f78f5)
Signed-off-by: Yanbo Liang <ybliang8@gmail.com>
---
 python/pyspark/ml/classification.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 80bb054a00456..ea6800a823023 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -1576,8 +1576,7 @@ def _fit(self, dataset):
             multiclassLabeled = dataset.select(labelCol, featuresCol)
 
         # persist if underlying dataset is not persistent.
-        handlePersistence = \
-            dataset.rdd.getStorageLevel() == StorageLevel(False, False, False, False)
+        handlePersistence = dataset.storageLevel == StorageLevel(False, False, False, False)
         if handlePersistence:
             multiclassLabeled.persist(StorageLevel.MEMORY_AND_DISK)
 
@@ -1690,8 +1689,7 @@ def _transform(self, dataset):
         newDataset = dataset.withColumn(accColName, initUDF(dataset[origCols[0]]))
 
         # persist if underlying dataset is not persistent.
-        handlePersistence = \
-            dataset.rdd.getStorageLevel() == StorageLevel(False, False, False, False)
+        handlePersistence = dataset.storageLevel == StorageLevel(False, False, False, False)
         if handlePersistence:
             newDataset.persist(StorageLevel.MEMORY_AND_DISK)
 

From 42852bb17121fb8067a4aea3e56d56f76a2e0d1d Mon Sep 17 00:00:00 2001
From: Andrew Ray <ray.andrew@gmail.com>
Date: Mon, 18 Sep 2017 02:46:27 +0900
Subject: [PATCH 1185/1204] [SPARK-21985][PYSPARK] PairDeserializer is broken
 for double-zipped RDDs

## What changes were proposed in this pull request?
(edited)
Fixes a bug introduced in #16121

In PairDeserializer convert each batch of keys and values to lists (if they do not have `__len__` already) so that we can check that they are the same size. Normally they already are lists so this should not have a performance impact, but this is needed when repeated `zip`'s are done.

## How was this patch tested?

Additional unit test

Author: Andrew Ray <ray.andrew@gmail.com>

Closes #19226 from aray/SPARK-21985.

(cherry picked from commit 6adf67dd14b0ece342bb91adf800df0a7101e038)
Signed-off-by: hyukjinkwon <gurwls223@gmail.com>
---
 python/pyspark/serializers.py |  6 +++++-
 python/pyspark/tests.py       | 12 ++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index ea5e00e9eeef5..9bd4e557aec92 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -97,7 +97,7 @@ def load_stream(self, stream):
 
     def _load_stream_without_unbatching(self, stream):
         """
-        Return an iterator of deserialized batches (lists) of objects from the input stream.
+        Return an iterator of deserialized batches (iterable) of objects from the input stream.
         if the serializer does not operate on batches the default implementation returns an
         iterator of single element lists.
         """
@@ -326,6 +326,10 @@ def _load_stream_without_unbatching(self, stream):
         key_batch_stream = self.key_ser._load_stream_without_unbatching(stream)
         val_batch_stream = self.val_ser._load_stream_without_unbatching(stream)
         for (key_batch, val_batch) in zip(key_batch_stream, val_batch_stream):
+            # For double-zipped RDDs, the batches can be iterators from other PairDeserializer,
+            # instead of lists. We need to convert them to lists if needed.
+            key_batch = key_batch if hasattr(key_batch, '__len__') else list(key_batch)
+            val_batch = val_batch if hasattr(val_batch, '__len__') else list(val_batch)
             if len(key_batch) != len(val_batch):
                 raise ValueError("Can not deserialize PairRDD with different number of items"
                                  " in batches: (%d, %d)" % (len(key_batch), len(val_batch)))
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index 20a933ea6aa65..9f4779852679b 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -644,6 +644,18 @@ def test_cartesian_chaining(self):
             set([(x, (y, y)) for x in range(10) for y in range(10)])
         )
 
+    def test_zip_chaining(self):
+        # Tests for SPARK-21985
+        rdd = self.sc.parallelize('abc', 2)
+        self.assertSetEqual(
+            set(rdd.zip(rdd).zip(rdd).collect()),
+            set([((x, x), x) for x in 'abc'])
+        )
+        self.assertSetEqual(
+            set(rdd.zip(rdd.zip(rdd)).collect()),
+            set([(x, (x, x)) for x in 'abc'])
+        )
+
     def test_deleting_input_files(self):
         # Regression test for SPARK-1025
         tempFile = tempfile.NamedTemporaryFile(delete=False)

From 309c401a5b3c76cc1b6b5aef97d03034fe4e1ce4 Mon Sep 17 00:00:00 2001
From: Andrew Ash <andrew@andrewash.com>
Date: Mon, 18 Sep 2017 10:42:24 +0800
Subject: [PATCH 1186/1204] [SPARK-21953] Show both memory and disk bytes
 spilled if either is present

As written now, there must be both memory and disk bytes spilled to show either of them. If there is only one of those types of spill recorded, it will be hidden.

Author: Andrew Ash <andrew@andrewash.com>

Closes #19164 from ash211/patch-3.

(cherry picked from commit 6308c65f08b507408033da1f1658144ea8c1491f)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala b/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala
index 8bedd071a2c1f..25aa5042e0e07 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala
@@ -108,7 +108,7 @@ private[spark] object UIData {
     def hasOutput: Boolean = outputBytes > 0
     def hasShuffleRead: Boolean = shuffleReadTotalBytes > 0
     def hasShuffleWrite: Boolean = shuffleWriteBytes > 0
-    def hasBytesSpilled: Boolean = memoryBytesSpilled > 0 && diskBytesSpilled > 0
+    def hasBytesSpilled: Boolean = memoryBytesSpilled > 0 || diskBytesSpilled > 0
   }
 
   /**

From a86831d618b05c789c2cea0afe5488c3234a14bc Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Mon, 18 Sep 2017 13:20:11 +0900
Subject: [PATCH 1187/1204] [SPARK-22043][PYTHON] Improves error message for
 show_profiles and dump_profiles

## What changes were proposed in this pull request?

This PR proposes to improve error message from:

```
>>> sc.show_profiles()
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File ".../spark/python/pyspark/context.py", line 1000, in show_profiles
    self.profiler_collector.show_profiles()
AttributeError: 'NoneType' object has no attribute 'show_profiles'
>>> sc.dump_profiles("/tmp/abc")
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File ".../spark/python/pyspark/context.py", line 1005, in dump_profiles
    self.profiler_collector.dump_profiles(path)
AttributeError: 'NoneType' object has no attribute 'dump_profiles'
```

to

```
>>> sc.show_profiles()
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File ".../spark/python/pyspark/context.py", line 1003, in show_profiles
    raise RuntimeError("'spark.python.profile' configuration must be set "
RuntimeError: 'spark.python.profile' configuration must be set to 'true' to enable Python profile.
>>> sc.dump_profiles("/tmp/abc")
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File ".../spark/python/pyspark/context.py", line 1012, in dump_profiles
    raise RuntimeError("'spark.python.profile' configuration must be set "
RuntimeError: 'spark.python.profile' configuration must be set to 'true' to enable Python profile.
```

## How was this patch tested?

Unit tests added in `python/pyspark/tests.py` and manual tests.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #19260 from HyukjinKwon/profile-errors.

(cherry picked from commit 7c7266208a3be984ac1ce53747dc0c3640f4ecac)
Signed-off-by: hyukjinkwon <gurwls223@gmail.com>
---
 python/pyspark/context.py | 12 ++++++++++--
 python/pyspark/tests.py   | 16 ++++++++++++++++
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 49be76ea21952..ea58b3a93899e 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -994,12 +994,20 @@ def runJob(self, rdd, partitionFunc, partitions=None, allowLocal=False):
 
     def show_profiles(self):
         """ Print the profile stats to stdout """
-        self.profiler_collector.show_profiles()
+        if self.profiler_collector is not None:
+            self.profiler_collector.show_profiles()
+        else:
+            raise RuntimeError("'spark.python.profile' configuration must be set "
+                               "to 'true' to enable Python profile.")
 
     def dump_profiles(self, path):
         """ Dump the profile stats into directory `path`
         """
-        self.profiler_collector.dump_profiles(path)
+        if self.profiler_collector is not None:
+            self.profiler_collector.dump_profiles(path)
+        else:
+            raise RuntimeError("'spark.python.profile' configuration must be set "
+                               "to 'true' to enable Python profile.")
 
     def getConf(self):
         conf = SparkConf()
diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
index 9f4779852679b..6a96aaf633d9a 100644
--- a/python/pyspark/tests.py
+++ b/python/pyspark/tests.py
@@ -1288,6 +1288,22 @@ def heavy_foo(x):
         rdd.foreach(heavy_foo)
 
 
+class ProfilerTests2(unittest.TestCase):
+    def test_profiler_disabled(self):
+        sc = SparkContext(conf=SparkConf().set("spark.python.profile", "false"))
+        try:
+            self.assertRaisesRegexp(
+                RuntimeError,
+                "'spark.python.profile' configuration must be set",
+                lambda: sc.show_profiles())
+            self.assertRaisesRegexp(
+                RuntimeError,
+                "'spark.python.profile' configuration must be set",
+                lambda: sc.dump_profiles("/tmp/abc"))
+        finally:
+            sc.stop()
+
+
 class InputFormatTests(ReusedPySparkTestCase):
 
     @classmethod

From 48d6aef99f2568b8038682ff79006bb6db9af638 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 18 Sep 2017 16:42:08 +0800
Subject: [PATCH 1188/1204] [SPARK-22047][TEST] ignore
 HiveExternalCatalogVersionsSuite

## What changes were proposed in this pull request?

As reported in https://issues.apache.org/jira/browse/SPARK-22047 , HiveExternalCatalogVersionsSuite is failing frequently, let's disable this test suite to unblock other PRs, I'm looking into the root cause.

## How was this patch tested?
N/A

Author: Wenchen Fan <wenchen@databricks.com>

Closes #19264 from cloud-fan/test.

(cherry picked from commit 894a7561de2c2ff01fe7fcc5268378161e9e5643)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
index 2928a734a7e36..01db9eb6f04f2 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
@@ -35,6 +35,7 @@ import org.apache.spark.util.Utils
  * expected version under this local directory, e.g. `/tmp/spark-test/spark-2.0.3`, we will skip the
  * downloading for this spark version.
  */
+@org.scalatest.Ignore
 class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
   private val wareHousePath = Utils.createTempDir(namePrefix = "warehouse")
   private val tmpDataDir = Utils.createTempDir(namePrefix = "test-data")

From dfbc6a5224a9eb38d3c39b13098f7a4a8dd8ffec Mon Sep 17 00:00:00 2001
From: Mark Hamstra <markhamstra@gmail.com>
Date: Mon, 18 Sep 2017 10:34:25 -0700
Subject: [PATCH 1189/1204] Parquet versioning

---
 .../spark/sql/hive/HiveMetastoreCatalog.scala |  4 +-
 .../sql/hive/HiveMetastoreCatalogSuite.scala  | 86 +++++++++++++++++++
 2 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index f8b1f47341af5..a87aa4877a128 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -229,8 +229,8 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
   }
 
   private[hive] def selectParquetLocationDirectories(
-                                                      tableName: String,
-                                                      locationOpt: Option[Path]): Seq[Path] = {
+      tableName: String,
+      locationOpt: Option[Path]): Seq[Path] = {
     val hadoopConf = sparkSession.sparkContext.hadoopConfiguration
     val paths: Option[Seq[Path]] = for {
       selector <- sparkSession.sharedState.externalCatalog.findHadoopFileSelector
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
index d8fd68b63d1eb..c58060754f793 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
@@ -17,6 +17,9 @@
 
 package org.apache.spark.sql.hive
 
+import java.io.File
+
+import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.spark.sql.{QueryTest, Row, SaveMode}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTableType
@@ -182,3 +185,86 @@ class DataSourceWithHiveMetastoreCatalogSuite
     }
   }
 }
+
+class ParquetLocationSelectionSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
+  import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.HadoopFileSelector
+  import org.apache.spark.sql.hive.test.TestHive
+  private val hmc = new HiveMetastoreCatalog(spark)
+  // ensuring temp directories
+  private val baseDir = {
+    val base =
+      File.createTempFile(
+        "selectParquetLocationDirectories",
+        "1",
+        TestHive.sparkSession.hiveFilesTemp)
+    base.delete()
+    base.mkdirs()
+    base
+  }
+
+  test(s"With Selector selecting from ${baseDir.toString}") {
+    val fullpath = { (somewhere: String, sometable: String) =>
+      s"${baseDir.toString}/$somewhere/$sometable"
+    }
+    spark.sharedState.externalCatalog.setHadoopFileSelector(new HadoopFileSelector() {
+      override def selectFiles(
+                                sometable: String,
+                                fs: FileSystem,
+                                somewhere: Path): Option[Seq[Path]] = {
+        Some(Seq(new Path(fullpath(somewhere.toString, sometable))))
+      }
+    })
+
+    // ensure directory existence for somewhere/sometable
+    val somewhereSometable = new File(fullpath("somewhere", "sometable"))
+    somewhereSometable.mkdirs()
+    // somewhere/sometable is a directory => will be selected
+    assertResult(Seq(new Path(fullpath("somewhere", "sometable")))) {
+      hmc.selectParquetLocationDirectories("sometable", Option(new Path("somewhere")))
+    }
+
+    // ensure file existence for somewhere/sometable
+    somewhereSometable.delete()
+    somewhereSometable.createNewFile()
+    // somewhere/sometable is a file => will not be selected
+    assertResult(Seq(new Path("somewhere"))) {
+      hmc.selectParquetLocationDirectories("otherplace", Option(new Path("somewhere")))
+    }
+
+    // no location specified, none selected
+    assertResult(Seq(null)) {
+      hmc.selectParquetLocationDirectories("sometable", Option(null))
+    }
+  }
+
+  test("With Selector selecting None") {
+    spark.sharedState.externalCatalog.setHadoopFileSelector(new HadoopFileSelector() {
+      override def selectFiles(
+                                tableName: String,
+                                fs: FileSystem,
+                                basePath: Path): Option[Seq[Path]] = None
+    })
+
+    // none selected
+    assertResult(Seq(new Path("somewhere"))) {
+      hmc.selectParquetLocationDirectories("sometable", Option(new Path("somewhere")))
+    }
+    // none selected
+    assertResult(Seq(null)) {
+      hmc.selectParquetLocationDirectories("sometable", Option(null))
+    }
+  }
+
+  test("Without Selector") {
+    spark.sharedState.externalCatalog.unsetHadoopFileSelector()
+
+    // none selected
+    assertResult(Seq(new Path("somewhere"))) {
+      hmc.selectParquetLocationDirectories("sometable", Option(new Path("somewhere")))
+    }
+    // none selected
+    assertResult(Seq(null)) {
+      hmc.selectParquetLocationDirectories("sometable", Option(null))
+    }
+  }
+}
\ No newline at end of file

From d0234ebcf84acf0ead49ae2ea66ceada9995b80b Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 19 Sep 2017 11:53:50 +0800
Subject: [PATCH 1190/1204] [SPARK-22047][FLAKY TEST]
 HiveExternalCatalogVersionsSuite

## What changes were proposed in this pull request?

This PR tries to download Spark for each test run, to make sure each test run is absolutely isolated.

## How was this patch tested?

N/A

Author: Wenchen Fan <wenchen@databricks.com>

Closes #19265 from cloud-fan/test.

(cherry picked from commit 10f45b3c84ff7b3f1765dc6384a563c33d26548b)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/hive/HiveExternalCatalogVersionsSuite.scala | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
index 01db9eb6f04f2..305f5b533d592 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
@@ -35,16 +35,18 @@ import org.apache.spark.util.Utils
  * expected version under this local directory, e.g. `/tmp/spark-test/spark-2.0.3`, we will skip the
  * downloading for this spark version.
  */
-@org.scalatest.Ignore
 class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
   private val wareHousePath = Utils.createTempDir(namePrefix = "warehouse")
   private val tmpDataDir = Utils.createTempDir(namePrefix = "test-data")
-  private val sparkTestingDir = "/tmp/spark-test"
+  // For local test, you can set `sparkTestingDir` to a static value like `/tmp/test-spark`, to
+  // avoid downloading Spark of different versions in each run.
+  private val sparkTestingDir = Utils.createTempDir(namePrefix = "test-spark")
   private val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
 
   override def afterAll(): Unit = {
     Utils.deleteRecursively(wareHousePath)
     Utils.deleteRecursively(tmpDataDir)
+    Utils.deleteRecursively(sparkTestingDir)
     super.afterAll()
   }
 
@@ -53,7 +55,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
 
     val url = s"https://d3kbcqa49mib13.cloudfront.net/spark-$version-bin-hadoop2.7.tgz"
 
-    Seq("wget", url, "-q", "-P", sparkTestingDir).!
+    Seq("wget", url, "-q", "-P", sparkTestingDir.getCanonicalPath).!
 
     val downloaded = new File(sparkTestingDir, s"spark-$version-bin-hadoop2.7.tgz").getCanonicalPath
     val targetDir = new File(sparkTestingDir, s"spark-$version").getCanonicalPath

From 6764408f68495e2ca7c1b9959db53ee12cabb197 Mon Sep 17 00:00:00 2001
From: Taaffy <32072374+Taaffy@users.noreply.github.com>
Date: Tue, 19 Sep 2017 10:20:04 +0100
Subject: [PATCH 1191/1204] [SPARK-22052] Incorrect Metric assigned in
 MetricsReporter.scala

Current implementation for processingRate-total uses wrong metric:
mistakenly uses inputRowsPerSecond instead of processedRowsPerSecond

## What changes were proposed in this pull request?
Adjust processingRate-total from using inputRowsPerSecond to processedRowsPerSecond

## How was this patch tested?

Built spark from source with proposed change and tested output with correct parameter. Before change the csv metrics file for inputRate-total and processingRate-total displayed the same values due to the error. After changing MetricsReporter.scala the processingRate-total csv file displayed the correct metric.
<img width="963" alt="processed rows per second" src="https://user-images.githubusercontent.com/32072374/30554340-82eea12c-9ca4-11e7-8370-8168526ff9a2.png">

Please review http://spark.apache.org/contributing.html before opening a pull request.

Author: Taaffy <32072374+Taaffy@users.noreply.github.com>

Closes #19268 from Taaffy/patch-1.

(cherry picked from commit 1bc17a6b8add02772a8a0a1048ac6a01d045baf4)
Signed-off-by: Sean Owen <sowen@cloudera.com>
---
 .../apache/spark/sql/execution/streaming/MetricsReporter.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetricsReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetricsReporter.scala
index 5551d12fa8ad2..b84e6ce64c611 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetricsReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetricsReporter.scala
@@ -40,7 +40,7 @@ class MetricsReporter(
   // Metric names should not have . in them, so that all the metrics of a query are identified
   // together in Ganglia as a single metric group
   registerGauge("inputRate-total", () => stream.lastProgress.inputRowsPerSecond)
-  registerGauge("processingRate-total", () => stream.lastProgress.inputRowsPerSecond)
+  registerGauge("processingRate-total", () => stream.lastProgress.processedRowsPerSecond)
   registerGauge("latency", () => stream.lastProgress.durationMs.get("triggerExecution").longValue())
 
   private def registerGauge[T](name: String, f: () => T)(implicit num: Numeric[T]): Unit = {

From 5d10586a0065c6845e0e89afc5f22e09baa185b7 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 20 Sep 2017 09:00:43 -0700
Subject: [PATCH 1192/1204] [SPARK-22076][SQL] Expand.projections should not be
 a Stream

## What changes were proposed in this pull request?

Spark with Scala 2.10 fails with a group by cube:
```
spark.range(1).select($"id" as "a", $"id" as "b").write.partitionBy("a").mode("overwrite").saveAsTable("rollup_bug")
spark.sql("select 1 from rollup_bug group by rollup ()").show
```

It can be traced back to https://github.com/apache/spark/pull/15484 , which made `Expand.projections` a lazy `Stream` for group by cube.

In scala 2.10 `Stream` captures a lot of stuff, and in this case it captures the entire query plan which has some un-serializable parts.

This change is also good for master branch, to reduce the serialized size of `Expand.projections`.

## How was this patch tested?

manually verified with Spark with Scala 2.10.

Author: Wenchen Fan <wenchen@databricks.com>

Closes #19289 from cloud-fan/bug.

(cherry picked from commit ce6a71e013c403d0a3690cf823934530ce0ea5ef)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../apache/spark/sql/catalyst/analysis/Analyzer.scala  | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index c970c2080b11f..f707aa820ee57 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -280,9 +280,15 @@ class Analyzer(
      *  We need to get all of its subsets for a given GROUPBY expression, the subsets are
      *  represented as sequence of expressions.
      */
-    def cubeExprs(exprs: Seq[Expression]): Seq[Seq[Expression]] = exprs.toList match {
+    def cubeExprs(exprs: Seq[Expression]): Seq[Seq[Expression]] = {
+      // `cubeExprs0` is recursive and returns a lazy Stream. Here we call `toIndexedSeq` to
+      // materialize it and avoid serialization problems later on.
+      cubeExprs0(exprs).toIndexedSeq
+    }
+
+    def cubeExprs0(exprs: Seq[Expression]): Seq[Seq[Expression]] = exprs.toList match {
       case x :: xs =>
-        val initial = cubeExprs(xs)
+        val initial = cubeExprs0(xs)
         initial.map(x +: _) ++ initial
       case Nil =>
         Seq(Seq.empty)

From 401ac20d22f46f4a0e279c8aa08ceb00d8bdc617 Mon Sep 17 00:00:00 2001
From: Devaraj K <devaraj@apache.org>
Date: Wed, 20 Sep 2017 16:22:36 -0700
Subject: [PATCH 1193/1204] [SPARK-21384][YARN] Spark + YARN fails with
 LocalFileSystem as default FS

## What changes were proposed in this pull request?

When the libraries temp directory(i.e. __spark_libs__*.zip dir) file system and staging dir(destination) file systems are the same then the __spark_libs__*.zip is not copying to the staging directory. But after making this decision the libraries zip file is getting deleted immediately and becoming unavailable for the Node Manager's localization.

With this change, client copies the files to remote always when the source scheme is "file".

## How was this patch tested?

I have verified it manually in yarn/cluster and yarn/client modes with hdfs and local file systems.

Author: Devaraj K <devaraj@apache.org>

Closes #19141 from devaraj-kavali/SPARK-21384.

(cherry picked from commit 55d5fa79db883e4d93a9c102a94713c9d2d1fb55)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../main/scala/org/apache/spark/deploy/yarn/Client.scala   | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 7e39c08763eb0..638ee0f86d06d 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -337,8 +337,9 @@ private[spark] class Client(
 
   /**
    * Copy the given file to a remote file system (e.g. HDFS) if needed.
-   * The file is only copied if the source and destination file systems are different. This is used
-   * for preparing resources for launching the ApplicationMaster container. Exposed for testing.
+   * The file is only copied if the source and destination file systems are different or the source
+   * scheme is "file". This is used for preparing resources for launching the ApplicationMaster
+   * container. Exposed for testing.
    */
   private[yarn] def copyFileToRemote(
       destDir: Path,
@@ -350,7 +351,7 @@ private[spark] class Client(
     val destFs = destDir.getFileSystem(hadoopConf)
     val srcFs = srcPath.getFileSystem(hadoopConf)
     var destPath = srcPath
-    if (force || !compareFs(srcFs, destFs)) {
+    if (force || !compareFs(srcFs, destFs) || "file".equals(srcFs.getScheme)) {
       destPath = new Path(destDir, destName.getOrElse(srcPath.getName()))
       logInfo(s"Uploading resource $srcPath -> $destPath")
       FileUtil.copy(srcFs, srcPath, destFs, destPath, false, hadoopConf)

From 765fd92e7bc2a03c69ed0eb1f19d51752be53504 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Thu, 21 Sep 2017 10:20:19 -0700
Subject: [PATCH 1194/1204] [SPARK-21928][CORE] Set classloader on
 SerializerManager's private kryo

## What changes were proposed in this pull request?

We have to make sure that SerializerManager's private instance of
kryo also uses the right classloader, regardless of the current thread
classloader.  In particular, this fixes serde during remote cache
fetches, as those occur in netty threads.

## How was this patch tested?

Manual tests & existing suite via jenkins.  I haven't been able to reproduce this is in a unit test, because when a remote RDD partition can be fetched, there is a warning message and then the partition is just recomputed locally.  I manually verified the warning message is no longer present.

Author: Imran Rashid <irashid@cloudera.com>

Closes #19280 from squito/SPARK-21928_ser_classloader.

(cherry picked from commit b75bd1777496ce0354458bf85603a8087a6a0ff8)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 core/src/main/scala/org/apache/spark/executor/Executor.scala  | 3 +++
 .../scala/org/apache/spark/serializer/SerializerManager.scala | 4 ++++
 .../test/scala/org/apache/spark/executor/ExecutorSuite.scala  | 3 ++-
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index e53d91d4e468b..24561b8da5738 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -130,6 +130,9 @@ private[spark] class Executor(
 
   // Set the classloader for serializer
   env.serializer.setDefaultClassLoader(replClassLoader)
+  // SPARK-21928.  SerializerManager's internal instance of Kryo might get used in netty threads
+  // for fetching remote cached RDD blocks, so need to make sure it uses the right classloader too.
+  env.serializerManager.setDefaultClassLoader(replClassLoader)
 
   // Max size of direct result. If task result is bigger than this, we use the block manager
   // to send the result back.
diff --git a/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
index bb7ed8709ba8a..311383e7ea2bd 100644
--- a/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
@@ -41,6 +41,10 @@ private[spark] class SerializerManager(
 
   private[this] val kryoSerializer = new KryoSerializer(conf)
 
+  def setDefaultClassLoader(classLoader: ClassLoader): Unit = {
+    kryoSerializer.setDefaultClassLoader(classLoader)
+  }
+
   private[this] val stringClassTag: ClassTag[String] = implicitly[ClassTag[String]]
   private[this] val primitiveAndPrimitiveArrayClassTags: Set[ClassTag[_]] = {
     val primitiveClassTags = Set[ClassTag[_]](
diff --git a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
index efcad140350b9..5bef2e2ff84a6 100644
--- a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
@@ -41,7 +41,7 @@ import org.apache.spark.metrics.MetricsSystem
 import org.apache.spark.rdd.RDD
 import org.apache.spark.rpc.RpcEnv
 import org.apache.spark.scheduler.{FakeTask, ResultTask, TaskDescription}
-import org.apache.spark.serializer.JavaSerializer
+import org.apache.spark.serializer.{JavaSerializer, SerializerManager}
 import org.apache.spark.shuffle.FetchFailedException
 import org.apache.spark.storage.BlockManagerId
 import org.apache.spark.util.UninterruptibleThread
@@ -233,6 +233,7 @@ class ExecutorSuite extends SparkFunSuite with LocalSparkContext with MockitoSug
     val mockMemoryManager = mock[MemoryManager]
     when(mockEnv.conf).thenReturn(conf)
     when(mockEnv.serializer).thenReturn(serializer)
+    when(mockEnv.serializerManager).thenReturn(mock[SerializerManager])
     when(mockEnv.rpcEnv).thenReturn(mockRpcEnv)
     when(mockEnv.metricsSystem).thenReturn(mockMetricsSystem)
     when(mockEnv.memoryManager).thenReturn(mockMemoryManager)

From 090b987e665a47f08e2dc9fc5f22c427bc260fbc Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <zsxwing@gmail.com>
Date: Thu, 21 Sep 2017 21:55:07 -0700
Subject: [PATCH 1195/1204] [SPARK-22094][SS] processAllAvailable should check
 the query state

`processAllAvailable` should also check the query state and if the query is stopped, it should return.

The new unit test.

Author: Shixiong Zhu <zsxwing@gmail.com>

Closes #19314 from zsxwing/SPARK-22094.

(cherry picked from commit fedf6961be4e99139eb7ab08d5e6e29187ea5ccf)
Signed-off-by: Shixiong Zhu <zsxwing@gmail.com>
---
 .../sql/execution/streaming/StreamExecution.scala    |  2 +-
 .../spark/sql/streaming/StreamingQuerySuite.scala    | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 16db353eef54c..33f81d98ca593 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -777,7 +777,7 @@ class StreamExecution(
         if (streamDeathCause != null) {
           throw streamDeathCause
         }
-        if (noNewData) {
+        if (noNewData || !isActive) {
           return
         }
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index b69536ed37463..ee5af65cd71c3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -613,6 +613,18 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
     }
   }
 
+  test("processAllAvailable should not block forever when a query is stopped") {
+    val input = MemoryStream[Int]
+    input.addData(1)
+    val query = input.toDF().writeStream
+      .trigger(Trigger.Once())
+      .format("console")
+      .start()
+    failAfter(streamingTimeout) {
+      query.processAllAvailable()
+    }
+  }
+
   /** Create a streaming DF that only execute one batch in which it returns the given static DF */
   private def createSingleTriggerStreamingDF(triggerDF: DataFrame): DataFrame = {
     require(!triggerDF.isStreaming)

From de6274a585fdc2eb9252dc5d5688ce3f3e9e0c39 Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@us.ibm.com>
Date: Fri, 22 Sep 2017 00:14:57 -0700
Subject: [PATCH 1196/1204] [SPARK-22072][SPARK-22071][BUILD] Improve release
 build scripts

## What changes were proposed in this pull request?

Check JDK version (with javac) and use SPARK_VERSION for publish-release

## How was this patch tested?

Manually tried local build with wrong JDK / JAVA_HOME & built a local release (LFTP disabled)

Author: Holden Karau <holden@us.ibm.com>

Closes #19312 from holdenk/improve-release-scripts-r2.

(cherry picked from commit 8f130ad40178e35fecb3f2ba4a61ad23e6a90e3d)
Signed-off-by: Holden Karau <holden@us.ibm.com>
---
 dev/create-release/release-build.sh | 33 +++++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index a72307a28ad7a..f93a96b01cf52 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -31,8 +31,8 @@ Top level targets are
 All other inputs are environment variables
 
 GIT_REF - Release tag or commit to build from
-SPARK_VERSION - Release identifier used when publishing
-SPARK_PACKAGE_VERSION - Release identifier in top level package directory
+SPARK_VERSION - Version of Spark being built (e.g. 2.1.2)
+SPARK_PACKAGE_VERSION - Release identifier in top level package directory (e.g. 2.1.2-rc1)
 REMOTE_PARENT_DIR - Parent in which to create doc or release builds.
 REMOTE_PARENT_MAX_LENGTH - If set, parent directory will be cleaned to only
  have this number of subdirectories (by deleting old ones). WARNING: This deletes data.
@@ -95,6 +95,33 @@ if [ -z "$SPARK_VERSION" ]; then
     | grep -v INFO | grep -v WARNING | grep -v Download)
 fi
 
+# Verify we have the right java version set
+if [ -z "$JAVA_HOME" ]; then
+  echo "Please set JAVA_HOME."
+  exit 1
+fi
+
+java_version=$("${JAVA_HOME}"/bin/javac -version 2>&1 | cut -d " " -f 2)
+
+if [[ ! $SPARK_VERSION < "2.2." ]]; then
+  if [[ $java_version < "1.8." ]]; then
+    echo "Java version $java_version is less than required 1.8 for 2.2+"
+    echo "Please set JAVA_HOME correctly."
+    exit 1
+  fi
+else
+  if [[ $java_version > "1.7." ]]; then
+    if [ -z "$JAVA_7_HOME" ]; then
+      echo "Java version $java_version is higher than required 1.7 for pre-2.2"
+      echo "Please set JAVA_HOME correctly."
+      exit 1
+    else
+      JAVA_HOME="$JAVA_7_HOME"
+    fi
+  fi
+fi
+
+
 if [ -z "$SPARK_PACKAGE_VERSION" ]; then
   SPARK_PACKAGE_VERSION="${SPARK_VERSION}-$(date +%Y_%m_%d_%H_%M)-${git_hash}"
 fi
@@ -318,6 +345,8 @@ if [[ "$1" == "publish-snapshot" ]]; then
 fi
 
 if [[ "$1" == "publish-release" ]]; then
+  SPARK_VERSION=$SPARK_PACKAGE_VERSION
+
   cd spark
   # Publish Spark to Maven release repo
   echo "Publishing Spark checkout at '$GIT_REF' ($git_hash)"

From c0a34a9fff0912b3f1ae508e43f1fae53a45afae Mon Sep 17 00:00:00 2001
From: Jakub Nowacki <j.s.nowacki@gmail.com>
Date: Sat, 23 Sep 2017 21:04:10 +0900
Subject: [PATCH 1197/1204] [SPARK-18136] Fix SPARK_JARS_DIR for Python pip
 install on Windows

## What changes were proposed in this pull request?

Fix for setup of `SPARK_JARS_DIR` on Windows as it looks for `%SPARK_HOME%\RELEASE` file instead of `%SPARK_HOME%\jars` as it should. RELEASE file is not included in the `pip` build of PySpark.

## How was this patch tested?

Local install of PySpark on Anaconda 4.4.0 (Python 3.6.1).

Author: Jakub Nowacki <j.s.nowacki@gmail.com>

Closes #19310 from jsnowacki/master.

(cherry picked from commit c11f24a94007bbaad0835645843e776507094071)
Signed-off-by: hyukjinkwon <gurwls223@gmail.com>
---
 bin/spark-class2.cmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/spark-class2.cmd b/bin/spark-class2.cmd
index f6157f42843e8..a93fd2f0e54bc 100644
--- a/bin/spark-class2.cmd
+++ b/bin/spark-class2.cmd
@@ -29,7 +29,7 @@ if "x%1"=="x" (
 )
 
 rem Find Spark jars.
-if exist "%SPARK_HOME%\RELEASE" (
+if exist "%SPARK_HOME%\jars" (
   set SPARK_JARS_DIR="%SPARK_HOME%\jars"
 ) else (
   set SPARK_JARS_DIR="%SPARK_HOME%\assembly\target\scala-%SPARK_SCALA_VERSION%\jars"

From 1a829df94a9cfee4395353b0f93fb5bcd628dce4 Mon Sep 17 00:00:00 2001
From: Ala Luszczak <ala@databricks.com>
Date: Sat, 23 Sep 2017 16:09:47 +0200
Subject: [PATCH 1198/1204] [SPARK-22092] Reallocation in
 OffHeapColumnVector.reserveInternal corrupts struct and array data

`OffHeapColumnVector.reserveInternal()` will only copy already inserted values during reallocation if `data != null`. In vectors containing arrays or structs this is incorrect, since there field `data` is not used at all. We need to check `nulls` instead.

Adds new tests to `ColumnVectorSuite` that reproduce the errors.

Author: Ala Luszczak <ala@databricks.com>

Closes #19323 from ala/port-vector-realloc.
---
 .../vectorized/OffHeapColumnVector.java       |   2 +-
 .../vectorized/ColumnVectorSuite.scala        | 227 ++++++++++++++++++
 2 files changed, 228 insertions(+), 1 deletion(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala

diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
index a7d3744d00e91..cda7f2fe23815 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
@@ -436,7 +436,7 @@ public void loadBytes(ColumnVector.Array array) {
   // Split out the slow path.
   @Override
   protected void reserveInternal(int newCapacity) {
-    int oldCapacity = (this.data == 0L) ? 0 : capacity;
+    int oldCapacity = (nulls == 0L) ? 0 : capacity;
     if (this.resultArray != null) {
       this.lengthData =
           Platform.reallocateMemory(lengthData, oldCapacity * 4, newCapacity * 4);
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
new file mode 100644
index 0000000000000..19b93c9257212
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
@@ -0,0 +1,227 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.vectorized
+
+import org.scalatest.BeforeAndAfterEach
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.util.ArrayData
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
+
+class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
+
+  var testVector: ColumnVector = _
+
+  private def allocate(capacity: Int, dt: DataType): ColumnVector = {
+    new OnHeapColumnVector(capacity, dt)
+  }
+
+  override def afterEach(): Unit = {
+    testVector.close()
+  }
+
+  test("boolean") {
+    testVector = allocate(10, BooleanType)
+    (0 until 10).foreach { i =>
+      testVector.appendBoolean(i % 2 == 0)
+    }
+
+    val array = new ColumnVector.Array(testVector)
+
+    (0 until 10).foreach { i =>
+      assert(array.getBoolean(i) === (i % 2 == 0))
+    }
+  }
+
+  test("byte") {
+    testVector = allocate(10, ByteType)
+    (0 until 10).foreach { i =>
+      testVector.appendByte(i.toByte)
+    }
+
+    val array = new ColumnVector.Array(testVector)
+
+    (0 until 10).foreach { i =>
+      assert(array.getByte(i) === (i.toByte))
+    }
+  }
+
+  test("short") {
+    testVector = allocate(10, ShortType)
+    (0 until 10).foreach { i =>
+      testVector.appendShort(i.toShort)
+    }
+
+    val array = new ColumnVector.Array(testVector)
+
+    (0 until 10).foreach { i =>
+      assert(array.getShort(i) === (i.toShort))
+    }
+  }
+
+  test("int") {
+    testVector = allocate(10, IntegerType)
+    (0 until 10).foreach { i =>
+      testVector.appendInt(i)
+    }
+
+    val array = new ColumnVector.Array(testVector)
+
+    (0 until 10).foreach { i =>
+      assert(array.getInt(i) === i)
+    }
+  }
+
+  test("long") {
+    testVector = allocate(10, LongType)
+    (0 until 10).foreach { i =>
+      testVector.appendLong(i)
+    }
+
+    val array = new ColumnVector.Array(testVector)
+
+    (0 until 10).foreach { i =>
+      assert(array.getLong(i) === i)
+    }
+  }
+
+  test("float") {
+    testVector = allocate(10, FloatType)
+    (0 until 10).foreach { i =>
+      testVector.appendFloat(i.toFloat)
+    }
+
+    val array = new ColumnVector.Array(testVector)
+
+    (0 until 10).foreach { i =>
+      assert(array.getFloat(i) === i.toFloat)
+    }
+  }
+
+  test("double") {
+    testVector = allocate(10, DoubleType)
+    (0 until 10).foreach { i =>
+      testVector.appendDouble(i.toDouble)
+    }
+
+    val array = new ColumnVector.Array(testVector)
+
+    (0 until 10).foreach { i =>
+      assert(array.getDouble(i) === i.toDouble)
+    }
+  }
+
+  test("string") {
+    testVector = allocate(10, StringType)
+    (0 until 10).map { i =>
+      val utf8 = s"str$i".getBytes("utf8")
+      testVector.appendByteArray(utf8, 0, utf8.length)
+    }
+
+    val array = new ColumnVector.Array(testVector)
+
+    (0 until 10).foreach { i =>
+      assert(array.getUTF8String(i) === UTF8String.fromString(s"str$i"))
+    }
+  }
+
+  test("binary") {
+    testVector = allocate(10, BinaryType)
+    (0 until 10).map { i =>
+      val utf8 = s"str$i".getBytes("utf8")
+      testVector.appendByteArray(utf8, 0, utf8.length)
+    }
+
+    val array = new ColumnVector.Array(testVector)
+
+    (0 until 10).foreach { i =>
+      val utf8 = s"str$i".getBytes("utf8")
+      assert(array.getBinary(i) === utf8)
+    }
+  }
+
+  test("array") {
+    val arrayType = ArrayType(IntegerType, true)
+    testVector = allocate(10, arrayType)
+
+    val data = testVector.arrayData()
+    var i = 0
+    while (i < 6) {
+      data.putInt(i, i)
+      i += 1
+    }
+
+    // Populate it with arrays [0], [1, 2], [], [3, 4, 5]
+    testVector.putArray(0, 0, 1)
+    testVector.putArray(1, 1, 2)
+    testVector.putArray(2, 3, 0)
+    testVector.putArray(3, 3, 3)
+
+    val array = new ColumnVector.Array(testVector)
+
+    assert(array.getArray(0).toIntArray() === Array(0))
+    assert(array.getArray(1).asInstanceOf[ArrayData].toIntArray() === Array(1, 2))
+    assert(array.getArray(2).asInstanceOf[ArrayData].toIntArray() === Array.empty[Int])
+    assert(array.getArray(3).asInstanceOf[ArrayData].toIntArray() === Array(3, 4, 5))
+  }
+
+  test("struct") {
+    val schema = new StructType().add("int", IntegerType).add("double", DoubleType)
+    testVector = allocate(10, schema)
+    val c1 = testVector.getChildColumn(0)
+    val c2 = testVector.getChildColumn(1)
+    c1.putInt(0, 123)
+    c2.putDouble(0, 3.45)
+    c1.putInt(1, 456)
+    c2.putDouble(1, 5.67)
+
+    val array = new ColumnVector.Array(testVector)
+
+    assert(array.getStruct(0, 2).asInstanceOf[ColumnarBatch.Row].getInt(0) === 123)
+    assert(array.getStruct(0, 2).asInstanceOf[ColumnarBatch.Row].getDouble(1) === 3.45)
+    assert(array.getStruct(1, 2).asInstanceOf[ColumnarBatch.Row].getInt(0) === 456)
+    assert(array.getStruct(1, 2).asInstanceOf[ColumnarBatch.Row].getDouble(1) === 5.67)
+  }
+
+  test("[SPARK-22092] off-heap column vector reallocation corrupts array data") {
+    val arrayType = ArrayType(IntegerType, true)
+    testVector = new OffHeapColumnVector(8, arrayType)
+
+    val data = testVector.arrayData()
+    (0 until 8).foreach(i => data.putInt(i, i))
+    (0 until 8).foreach(i => testVector.putArray(i, i, 1))
+
+    // Increase vector's capacity and reallocate the data to new bigger buffers.
+    testVector.reserve(16)
+
+    // Check that none of the values got lost/overwritten.
+    val array = new ColumnVector.Array(testVector)
+    (0 until 8).foreach { i =>
+      assert(array.getArray(i).toIntArray() === Array(i))
+    }
+  }
+
+  test("[SPARK-22092] off-heap column vector reallocation corrupts struct nullability") {
+    val structType = new StructType().add("int", IntegerType).add("double", DoubleType)
+    testVector = new OffHeapColumnVector(8, structType)
+    (0 until 8).foreach(i => if (i % 2 == 0) testVector.putNull(i) else testVector.putNotNull(i))
+    testVector.reserve(16)
+    (0 until 8).foreach(i => assert(testVector.isNullAt(i) == (i % 2 == 0)))
+  }
+}

From 211d81beb001a113e262d399fcacbd72f33ea0d9 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Sun, 24 Sep 2017 02:51:04 +0900
Subject: [PATCH 1199/1204] [SPARK-22109][SQL][BRANCH-2.2] Resolves type
 conflicts between strings and timestamps in partition column

## What changes were proposed in this pull request?

This PR backports https://github.com/apache/spark/commit/04975a68b583a6175f93da52374108e5d4754d9a into branch-2.2.

## How was this patch tested?

Unit tests in `ParquetPartitionDiscoverySuite`.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #19333 from HyukjinKwon/SPARK-22109-backport-2.2.
---
 .../execution/datasources/PartitioningUtils.scala    | 11 ++++++-----
 .../parquet/ParquetPartitionDiscoverySuite.scala     | 12 ++++++++++++
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index f61c673baaa58..6f7438192dfe2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -138,7 +138,7 @@ object PartitioningUtils {
           "root directory of the table. If there are multiple root directories, " +
           "please load them separately and then union them.")
 
-      val resolvedPartitionValues = resolvePartitions(pathsWithPartitionValues)
+      val resolvedPartitionValues = resolvePartitions(pathsWithPartitionValues, timeZone)
 
       // Creates the StructType which represents the partition columns.
       val fields = {
@@ -322,7 +322,8 @@ object PartitioningUtils {
    * }}}
    */
   def resolvePartitions(
-      pathsWithPartitionValues: Seq[(Path, PartitionValues)]): Seq[PartitionValues] = {
+      pathsWithPartitionValues: Seq[(Path, PartitionValues)],
+      timeZone: TimeZone): Seq[PartitionValues] = {
     if (pathsWithPartitionValues.isEmpty) {
       Seq.empty
     } else {
@@ -337,7 +338,7 @@ object PartitioningUtils {
       val values = pathsWithPartitionValues.map(_._2)
       val columnCount = values.head.columnNames.size
       val resolvedValues = (0 until columnCount).map { i =>
-        resolveTypeConflicts(values.map(_.literals(i)))
+        resolveTypeConflicts(values.map(_.literals(i)), timeZone)
       }
 
       // Fills resolved literals back to each partition
@@ -474,7 +475,7 @@ object PartitioningUtils {
    * Given a collection of [[Literal]]s, resolves possible type conflicts by up-casting "lower"
    * types.
    */
-  private def resolveTypeConflicts(literals: Seq[Literal]): Seq[Literal] = {
+  private def resolveTypeConflicts(literals: Seq[Literal], timeZone: TimeZone): Seq[Literal] = {
     val desiredType = {
       val topType = literals.map(_.dataType).maxBy(upCastingOrder.indexOf(_))
       // Falls back to string if all values of this column are null or empty string
@@ -482,7 +483,7 @@ object PartitioningUtils {
     }
 
     literals.map { case l @ Literal(_, dataType) =>
-      Literal.create(Cast(l, desiredType).eval(), desiredType)
+      Literal.create(Cast(l, desiredType, Some(timeZone.getID)).eval(), desiredType)
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
index b4f3de9961209..7225693e50279 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
@@ -1022,4 +1022,16 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha
       }
     }
   }
+
+  test("SPARK-22109: Resolve type conflicts between strings and timestamps in partition column") {
+    val df = Seq(
+      (1, "2015-01-01 00:00:00"),
+      (2, "2014-01-01 00:00:00"),
+      (3, "blah")).toDF("i", "str")
+
+    withTempPath { path =>
+      df.write.format("parquet").partitionBy("str").save(path.getAbsolutePath)
+      checkAnswer(spark.read.load(path.getAbsolutePath), df)
+    }
+  }
 }

From 8acce00acc343bc04a0f5af4ce4717b42c8938da Mon Sep 17 00:00:00 2001
From: John O'Leary <jgoleary@gmail.com>
Date: Mon, 25 Sep 2017 09:16:27 +0900
Subject: [PATCH 1200/1204] [SPARK-22107] Change as to alias in python
 quickstart

## What changes were proposed in this pull request?

Updated docs so that a line of python in the quick start guide executes. Closes #19283

## How was this patch tested?

Existing tests.

Author: John O'Leary <jgoleary@gmail.com>

Closes #19326 from jgoleary/issues/22107.

(cherry picked from commit 20adf9aa1f42353432d356117e655e799ea1290b)
Signed-off-by: hyukjinkwon <gurwls223@gmail.com>
---
 docs/quick-start.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/quick-start.md b/docs/quick-start.md
index c4c5a5acb2dee..aac047f286e63 100644
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -153,7 +153,7 @@ This first maps a line to an integer value and aliases it as "numWords", creatin
 One common data flow pattern is MapReduce, as popularized by Hadoop. Spark can implement MapReduce flows easily:
 
 {% highlight python %}
->>> wordCounts = textFile.select(explode(split(textFile.value, "\s+")).as("word")).groupBy("word").count()
+>>> wordCounts = textFile.select(explode(split(textFile.value, "\s+")).alias("word")).groupBy("word").count()
 {% endhighlight %}
 
 Here, we use the `explode` function in `select`, to transfrom a Dataset of lines to a Dataset of words, and then combine `groupBy` and `count` to compute the per-word counts in the file as a DataFrame of 2 columns: "word" and "count". To collect the word counts in our shell, we can call `collect`:

From 9836ea19fde10bb1ea2c206cbe753bda746e9922 Mon Sep 17 00:00:00 2001
From: Imran Rashid <irashid@cloudera.com>
Date: Mon, 25 Sep 2017 12:02:30 -0700
Subject: [PATCH 1201/1204] [SPARK-22083][CORE] Release locks in
 MemoryStore.evictBlocksToFreeSpace

## What changes were proposed in this pull request?

MemoryStore.evictBlocksToFreeSpace acquires write locks for all the
blocks it intends to evict up front.  If there is a failure to evict
blocks (eg., some failure dropping a block to disk), then we have to
release the lock.  Otherwise the lock is never released and an executor
trying to get the lock will wait forever.

## How was this patch tested?

Added unit test.

Author: Imran Rashid <irashid@cloudera.com>

Closes #19311 from squito/SPARK-22083.

(cherry picked from commit 2c5b9b1173c23f6ca8890817a9a35dc7557b0776)
Signed-off-by: Marcelo Vanzin <vanzin@cloudera.com>
---
 .../spark/storage/memory/MemoryStore.scala    |  47 +++++--
 .../spark/storage/MemoryStoreSuite.scala      | 119 ++++++++++++++++++
 2 files changed, 153 insertions(+), 13 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala b/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala
index 90e3af2d0ec74..1b8b4db2e45de 100644
--- a/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala
@@ -31,7 +31,7 @@ import org.apache.spark.{SparkConf, TaskContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.memory.{MemoryManager, MemoryMode}
 import org.apache.spark.serializer.{SerializationStream, SerializerManager}
-import org.apache.spark.storage.{BlockId, BlockInfoManager, StorageLevel, StreamBlockId}
+import org.apache.spark.storage._
 import org.apache.spark.unsafe.Platform
 import org.apache.spark.util.{SizeEstimator, Utils}
 import org.apache.spark.util.collection.SizeTrackingVector
@@ -534,20 +534,38 @@ private[spark] class MemoryStore(
       }
 
       if (freedMemory >= space) {
-        logInfo(s"${selectedBlocks.size} blocks selected for dropping " +
-          s"(${Utils.bytesToString(freedMemory)} bytes)")
-        for (blockId <- selectedBlocks) {
-          val entry = entries.synchronized { entries.get(blockId) }
-          // This should never be null as only one task should be dropping
-          // blocks and removing entries. However the check is still here for
-          // future safety.
-          if (entry != null) {
-            dropBlock(blockId, entry)
+        var lastSuccessfulBlock = -1
+        try {
+          logInfo(s"${selectedBlocks.size} blocks selected for dropping " +
+            s"(${Utils.bytesToString(freedMemory)} bytes)")
+          (0 until selectedBlocks.size).foreach { idx =>
+            val blockId = selectedBlocks(idx)
+            val entry = entries.synchronized {
+              entries.get(blockId)
+            }
+            // This should never be null as only one task should be dropping
+            // blocks and removing entries. However the check is still here for
+            // future safety.
+            if (entry != null) {
+              dropBlock(blockId, entry)
+              afterDropAction(blockId)
+            }
+            lastSuccessfulBlock = idx
+          }
+          logInfo(s"After dropping ${selectedBlocks.size} blocks, " +
+            s"free memory is ${Utils.bytesToString(maxMemory - blocksMemoryUsed)}")
+          freedMemory
+        } finally {
+          // like BlockManager.doPut, we use a finally rather than a catch to avoid having to deal
+          // with InterruptedException
+          if (lastSuccessfulBlock != selectedBlocks.size - 1) {
+            // the blocks we didn't process successfully are still locked, so we have to unlock them
+            (lastSuccessfulBlock + 1 until selectedBlocks.size).foreach { idx =>
+              val blockId = selectedBlocks(idx)
+              blockInfoManager.unlock(blockId)
+            }
           }
         }
-        logInfo(s"After dropping ${selectedBlocks.size} blocks, " +
-          s"free memory is ${Utils.bytesToString(maxMemory - blocksMemoryUsed)}")
-        freedMemory
       } else {
         blockId.foreach { id =>
           logInfo(s"Will not store $id")
@@ -560,6 +578,9 @@ private[spark] class MemoryStore(
     }
   }
 
+  // hook for testing, so we can simulate a race
+  protected def afterDropAction(blockId: BlockId): Unit = {}
+
   def contains(blockId: BlockId): Boolean = {
     entries.synchronized { entries.containsKey(blockId) }
   }
diff --git a/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala b/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala
index 9929ea033a99f..7274072e5049a 100644
--- a/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala
@@ -407,4 +407,123 @@ class MemoryStoreSuite
     })
     assert(memoryStore.getSize(blockId) === 10000)
   }
+
+  test("SPARK-22083: Release all locks in evictBlocksToFreeSpace") {
+    // Setup a memory store with many blocks cached, and then one request which leads to multiple
+    // blocks getting evicted.  We'll make the eviction throw an exception, and make sure that
+    // all locks are released.
+    val ct = implicitly[ClassTag[Array[Byte]]]
+    val numInitialBlocks = 10
+    val memStoreSize = 100
+    val bytesPerSmallBlock = memStoreSize / numInitialBlocks
+    def testFailureOnNthDrop(numValidBlocks: Int, readLockAfterDrop: Boolean): Unit = {
+      val tc = TaskContext.empty()
+      val memManager = new StaticMemoryManager(conf, Long.MaxValue, memStoreSize, numCores = 1)
+      val blockInfoManager = new BlockInfoManager
+      blockInfoManager.registerTask(tc.taskAttemptId)
+      var droppedSoFar = 0
+      val blockEvictionHandler = new BlockEvictionHandler {
+        var memoryStore: MemoryStore = _
+
+        override private[storage] def dropFromMemory[T: ClassTag](
+            blockId: BlockId,
+            data: () => Either[Array[T], ChunkedByteBuffer]): StorageLevel = {
+          if (droppedSoFar < numValidBlocks) {
+            droppedSoFar += 1
+            memoryStore.remove(blockId)
+            if (readLockAfterDrop) {
+              // for testing purposes, we act like another thread gets the read lock on the new
+              // block
+              StorageLevel.DISK_ONLY
+            } else {
+              StorageLevel.NONE
+            }
+          } else {
+            throw new RuntimeException(s"Mock error dropping block $droppedSoFar")
+          }
+        }
+      }
+      val memoryStore = new MemoryStore(conf, blockInfoManager, serializerManager, memManager,
+          blockEvictionHandler) {
+        override def afterDropAction(blockId: BlockId): Unit = {
+          if (readLockAfterDrop) {
+            // pretend that we get a read lock on the block (now on disk) in another thread
+            TaskContext.setTaskContext(tc)
+            blockInfoManager.lockForReading(blockId)
+            TaskContext.unset()
+          }
+        }
+      }
+
+      blockEvictionHandler.memoryStore = memoryStore
+      memManager.setMemoryStore(memoryStore)
+
+      // Put in some small blocks to fill up the memory store
+      val initialBlocks = (1 to numInitialBlocks).map { id =>
+        val blockId = BlockId(s"rdd_1_$id")
+        val blockInfo = new BlockInfo(StorageLevel.MEMORY_ONLY, ct, tellMaster = false)
+        val initialWriteLock = blockInfoManager.lockNewBlockForWriting(blockId, blockInfo)
+        assert(initialWriteLock)
+        val success = memoryStore.putBytes(blockId, bytesPerSmallBlock, MemoryMode.ON_HEAP, () => {
+          new ChunkedByteBuffer(ByteBuffer.allocate(bytesPerSmallBlock))
+        })
+        assert(success)
+        blockInfoManager.unlock(blockId, None)
+      }
+      assert(blockInfoManager.size === numInitialBlocks)
+
+
+      // Add one big block, which will require evicting everything in the memorystore.  However our
+      // mock BlockEvictionHandler will throw an exception -- make sure all locks are cleared.
+      val largeBlockId = BlockId(s"rdd_2_1")
+      val largeBlockInfo = new BlockInfo(StorageLevel.MEMORY_ONLY, ct, tellMaster = false)
+      val initialWriteLock = blockInfoManager.lockNewBlockForWriting(largeBlockId, largeBlockInfo)
+      assert(initialWriteLock)
+      if (numValidBlocks < numInitialBlocks) {
+        val exc = intercept[RuntimeException] {
+          memoryStore.putBytes(largeBlockId, memStoreSize, MemoryMode.ON_HEAP, () => {
+            new ChunkedByteBuffer(ByteBuffer.allocate(memStoreSize))
+          })
+        }
+        assert(exc.getMessage().startsWith("Mock error dropping block"), exc)
+        // BlockManager.doPut takes care of releasing the lock for the newly written block -- not
+        // testing that here, so do it manually
+        blockInfoManager.removeBlock(largeBlockId)
+      } else {
+        memoryStore.putBytes(largeBlockId, memStoreSize, MemoryMode.ON_HEAP, () => {
+          new ChunkedByteBuffer(ByteBuffer.allocate(memStoreSize))
+        })
+        // BlockManager.doPut takes care of releasing the lock for the newly written block -- not
+        // testing that here, so do it manually
+        blockInfoManager.unlock(largeBlockId)
+      }
+
+      val largeBlockInMemory = if (numValidBlocks == numInitialBlocks) 1 else 0
+      val expBlocks = numInitialBlocks +
+        (if (readLockAfterDrop) 0 else -numValidBlocks) +
+        largeBlockInMemory
+      assert(blockInfoManager.size === expBlocks)
+
+      val blocksStillInMemory = blockInfoManager.entries.filter { case (id, info) =>
+        assert(info.writerTask === BlockInfo.NO_WRITER, id)
+        // in this test, all the blocks in memory have no reader, but everything dropped to disk
+        // had another thread read the block.  We shouldn't lose the other thread's reader lock.
+        if (memoryStore.contains(id)) {
+          assert(info.readerCount === 0, id)
+          true
+        } else {
+          assert(info.readerCount === 1, id)
+          false
+        }
+      }
+      assert(blocksStillInMemory.size ===
+        (numInitialBlocks - numValidBlocks + largeBlockInMemory))
+    }
+
+    Seq(0, 3, numInitialBlocks).foreach { failAfterDropping =>
+      Seq(true, false).foreach { readLockAfterDropping =>
+        testFailureOnNthDrop(failAfterDropping, readLockAfterDropping)
+      }
+    }
+  }
 }

From b0f30b56af0563186516147d9ef296b37f679192 Mon Sep 17 00:00:00 2001
From: Greg Owen <greg@databricks.com>
Date: Mon, 25 Sep 2017 14:16:11 -0700
Subject: [PATCH 1202/1204] [SPARK-22120][SQL] TestHiveSparkSession.reset()
 should clean out Hive warehouse directory

## What changes were proposed in this pull request?
During TestHiveSparkSession.reset(), which is called after each TestHiveSingleton suite, we now delete and recreate the Hive warehouse directory.

## How was this patch tested?
Ran full suite of tests locally, verified that they pass.

Author: Greg Owen <greg@databricks.com>

Closes #19341 from GregOwen/SPARK-22120.

(cherry picked from commit ce204780ee2434ff6bae50428ae37083835798d3)
Signed-off-by: gatorsmile <gatorsmile@gmail.com>
---
 .../scala/org/apache/spark/sql/hive/test/TestHive.scala     | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index ee9ac21a738dc..4612cce80effd 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.hive.test
 
 import java.io.File
+import java.net.URI
 import java.util.{Set => JavaSet}
 
 import scala.collection.JavaConverters._
@@ -486,6 +487,11 @@ private[hive] class TestHiveSparkSession(
         }
       }
 
+      // Clean out the Hive warehouse between each suite
+      val warehouseDir = new File(new URI(sparkContext.conf.get("spark.sql.warehouse.dir")).getPath)
+      Utils.deleteRecursively(warehouseDir)
+      warehouseDir.mkdir()
+
       sharedState.cacheManager.clearCache()
       loadedTables.clear()
       sessionState.catalog.reset()

From a406473a525285888dbc29503443173df1d1c490 Mon Sep 17 00:00:00 2001
From: Wang Gengliang <ltnwgl@gmail.com>
Date: Wed, 27 Sep 2017 17:40:31 +0200
Subject: [PATCH 1203/1204] [SPARK-22141][BACKPORT][SQL] Propagate empty
 relation before checking Cartesian products

Back port https://github.com/apache/spark/pull/19362 to branch-2.2

## What changes were proposed in this pull request?

When inferring constraints from children, Join's condition can be simplified as None.
For example,
```
val testRelation = LocalRelation('a.int)
val x = testRelation.as("x")
val y = testRelation.where($"a" === 2 && !($"a" === 2)).as("y")
x.join.where($"x.a" === $"y.a")
```
The plan will become
```
Join Inner
:- LocalRelation <empty>, [a#23]
+- LocalRelation <empty>, [a#224]
```
And the Cartesian products check will throw exception for above plan.

Propagate empty relation before checking Cartesian products, and the issue is resolved.

## How was this patch tested?

Unit test

Author: Wang Gengliang <ltnwgl@gmail.com>

Closes #19366 from gengliangwang/branch-2.2.
---
 .../apache/spark/sql/catalyst/optimizer/Optimizer.scala   | 4 ++--
 .../src/test/scala/org/apache/spark/sql/JoinSuite.scala   | 8 ++++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index f67daa55c04ff..71e03ee829710 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -113,8 +113,6 @@ abstract class Optimizer(sessionCatalog: SessionCatalog, conf: SQLConf)
       SimplifyCreateArrayOps,
       SimplifyCreateMapOps) ++
       extendedOperatorOptimizationRules: _*) ::
-    Batch("Check Cartesian Products", Once,
-      CheckCartesianProducts(conf)) ::
     Batch("Join Reorder", Once,
       CostBasedJoinReorder(conf)) ::
     Batch("Decimal Optimizations", fixedPoint,
@@ -125,6 +123,8 @@ abstract class Optimizer(sessionCatalog: SessionCatalog, conf: SQLConf)
     Batch("LocalRelation", fixedPoint,
       ConvertToLocalRelation,
       PropagateEmptyRelation) ::
+    Batch("Check Cartesian Products", Once,
+      CheckCartesianProducts(conf)) ::
     Batch("OptimizeCodegen", Once,
       OptimizeCodegen(conf)) ::
     Batch("RewriteSubquery", Once,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
index 95dc1478896f3..cdfd33dfb91a3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -200,6 +200,14 @@ class JoinSuite extends QueryTest with SharedSQLContext {
       Nil)
   }
 
+  test("SPARK-22141: Propagate empty relation before checking Cartesian products") {
+    Seq("inner", "left", "right", "left_outer", "right_outer", "full_outer").foreach { joinType =>
+      val x = testData2.where($"a" === 2 && !($"a" === 2)).as("x")
+      val y = testData2.where($"a" === 1 && !($"a" === 1)).as("y")
+      checkAnswer(x.join(y, Seq.empty, joinType), Nil)
+    }
+  }
+
   test("big inner join, 4 matches per row") {
     val bigData = testData.union(testData).union(testData).union(testData)
     val bigDataX = bigData.as("x")

From ef02a07a634b4f4864a56e49a8a4db4584aa898b Mon Sep 17 00:00:00 2001
From: Ian Li <ianl@clearstorydata.com>
Date: Tue, 26 Sep 2017 07:14:15 -0700
Subject: [PATCH 1204/1204] SPY-1429

---
 assembly/pom.xml                        | 147 ++++++++++++++++++++++++
 assembly/src/deb/control/control        |   8 ++
 assembly/src/main/assembly/assembly.xml |   9 ++
 3 files changed, 164 insertions(+)
 create mode 100644 assembly/src/deb/control/control

diff --git a/assembly/pom.xml b/assembly/pom.xml
index fb55acc5f33f0..f1433918995a6 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -34,6 +34,19 @@
     <sbt.project.name>assembly</sbt.project.name>
     <build.testJarPhase>none</build.testJarPhase>
     <build.copyDependenciesPhase>package</build.copyDependenciesPhase>
+
+    <spark.jar.dir>scala-${scala.binary.version}/jars</spark.jar.dir>
+
+    <!-- ${shuffle.jar} related properties needed by SPY-1281 -->
+    <shuffle.jar.dir>scala-${scala.binary.version}</shuffle.jar.dir>
+    <shuffle.jar.basename>spark-${project.version}-yarn-shuffle.jar</shuffle.jar.basename>
+    <shuffle.jar>${project.parent.basedir}/common/network-yarn/target/${shuffle.jar.dir}/${shuffle.jar.basename}</shuffle.jar>
+
+
+    <deb.pkg.name>spark</deb.pkg.name>
+    <deb.install.path>/usr/share/spark</deb.install.path>
+    <deb.user>root</deb.user>
+    <deb.bin.filemode>755</deb.bin.filemode>
   </properties>
 
   <dependencies>
@@ -226,5 +239,139 @@
         <parquet.deps.scope>provided</parquet.deps.scope>
       </properties>
     </profile>
+    <!-- CSD debian packaging -->
+    <profile>
+      <id>deb</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.spark</groupId>
+          <artifactId>spark-network-shuffle_${scala.binary.version}</artifactId>
+          <version>${project.version}</version>
+        </dependency>
+      </dependencies>
+      <build>
+        <plugins>
+            <plugin>
+              <artifactId>maven-antrun-plugin</artifactId>
+              <executions>
+                <execution>
+                  <phase>prepare-package</phase>
+                  <goals>
+                    <goal>run</goal>
+                  </goals>
+                  <configuration>
+                    <target>
+                      <echo>
+                        NOTE: Debian packaging is deprecated and is scheduled to be removed in Spark 1.4.
+                      </echo>
+                    </target>
+                  </configuration>
+                </execution>
+              </executions>
+            </plugin>
+            <plugin>
+              <groupId>org.codehaus.mojo</groupId>
+              <artifactId>buildnumber-maven-plugin</artifactId>
+              <version>1.2</version>
+              <executions>
+                <execution>
+                  <phase>validate</phase>
+                  <goals>
+                    <goal>create</goal>
+                  </goals>
+                  <configuration>
+                    <shortRevisionLength>8</shortRevisionLength>
+                  </configuration>
+                </execution>
+              </executions>
+            </plugin>
+            <plugin>
+              <groupId>org.vafer</groupId>
+              <artifactId>jdeb</artifactId>
+              <version>0.11</version>
+              <executions>
+                <execution>
+                  <phase>package</phase>
+                  <goals>
+                    <goal>jdeb</goal>
+                  </goals>
+                  <configuration>
+                    <deb>${project.build.directory}/${deb.pkg.name}_${project.version}-${buildNumber}_all.deb</deb>
+                    <attach>false</attach>
+                    <compression>gzip</compression>
+                    <dataSet>
+                    <data>
+                      <src>${basedir}/target/${spark.jar.dir}</src>
+                      <type>directory</type>
+                      <mapper>
+                        <type>perm</type>
+                        <user>${deb.user}</user>
+                        <group>${deb.user}</group>
+                        <prefix>${deb.install.path}/jars</prefix>
+                      </mapper>
+                    </data>
+                    <data>
+                      <src>${shuffle.jar}</src>
+                      <type>file</type>
+                      <mapper>
+                        <type>perm</type>
+                        <user>${deb.user}</user>
+                        <group>${deb.user}</group>
+                        <prefix>${deb.install.path}/yarn</prefix>
+                      </mapper>
+                    </data>
+                    <data>
+                        <src>${basedir}/../conf</src>
+                        <type>directory</type>
+                        <mapper>
+                          <type>perm</type>
+                          <user>${deb.user}</user>
+                          <group>${deb.user}</group>
+                          <prefix>${deb.install.path}/conf</prefix>
+                          <filemode>${deb.bin.filemode}</filemode>
+                        </mapper>
+                      </data>
+                    <data>
+                      <src>${basedir}/../bin</src>
+                      <type>directory</type>
+                      <mapper>
+                        <type>perm</type>
+                        <user>${deb.user}</user>
+                        <group>${deb.user}</group>
+                        <prefix>${deb.install.path}/bin</prefix>
+                        <filemode>${deb.bin.filemode}</filemode>
+                      </mapper>
+                    </data>
+                    <data>
+                      <src>${basedir}/../sbin</src>
+                      <type>directory</type>
+                      <mapper>
+                        <type>perm</type>
+                        <user>${deb.user}</user>
+                        <group>${deb.user}</group>
+                        <prefix>${deb.install.path}/sbin</prefix>
+                        <filemode>${deb.bin.filemode}</filemode>
+                      </mapper>
+                    </data>
+                    <data>
+                      <src>${basedir}/../python</src>
+                      <type>directory</type>
+                      <mapper>
+                        <type>perm</type>
+                        <user>${deb.user}</user>
+                        <group>${deb.user}</group>
+                        <prefix>${deb.install.path}/python</prefix>
+                        <filemode>${deb.bin.filemode}</filemode>
+                      </mapper>
+                    </data>
+                    </dataSet>
+                  </configuration>
+                </execution>
+              </executions>
+            </plugin>
+        </plugins>
+      </build>
+    </profile>
   </profiles>
+
 </project>
diff --git a/assembly/src/deb/control/control b/assembly/src/deb/control/control
new file mode 100644
index 0000000000000..a6b4471d485f4
--- /dev/null
+++ b/assembly/src/deb/control/control
@@ -0,0 +1,8 @@
+Package: [[deb.pkg.name]]
+Version: [[version]]-[[buildNumber]]
+Section: misc
+Priority: extra
+Architecture: all
+Maintainer: Matei Zaharia <matei.zaharia@gmail.com>
+Description: [[name]]
+Distribution: development
diff --git a/assembly/src/main/assembly/assembly.xml b/assembly/src/main/assembly/assembly.xml
index 009d4b92f406c..6058c653c99f4 100644
--- a/assembly/src/main/assembly/assembly.xml
+++ b/assembly/src/main/assembly/assembly.xml
@@ -64,6 +64,15 @@
         <include>${spark.jar.basename}</include>
       </includes>
     </fileSet>
+    <fileSet>
+      <directory>
+        ${project.parent.basedir}/common/network-yarn/target/${shuffle.jar.dir}
+      </directory>
+      <outputDirectory></outputDirectory>
+      <includes>
+        <include>${shuffle.jar.basename}</include>
+      </includes>
+    </fileSet>
   </fileSets>
 
   <dependencySets>